8047931: Remove unused medialib code
authorvadim
Fri, 13 May 2016 11:31:05 +0300
changeset 38415 acea5f7d354b
parent 38414 3e22d8fd4912
child 38416 dd0b515bc286
8047931: Remove unused medialib code Reviewed-by: bae
jdk/make/lib/Awt2dLibraries.gmk
jdk/src/java.desktop/share/native/libmlib_image/mlib_ImageAffine.c
jdk/src/java.desktop/share/native/libmlib_image/mlib_ImageAffine.h
jdk/src/java.desktop/share/native/libmlib_image/mlib_ImageAffineEdge.c
jdk/src/java.desktop/share/native/libmlib_image/mlib_ImageCheck.h
jdk/src/java.desktop/share/native/libmlib_image/mlib_ImageColorTrue2Index.c
jdk/src/java.desktop/share/native/libmlib_image/mlib_ImageColormap.h
jdk/src/java.desktop/share/native/libmlib_image/mlib_ImageConv.h
jdk/src/java.desktop/share/native/libmlib_image/mlib_ImageConv2x2_f.c
jdk/src/java.desktop/share/native/libmlib_image/mlib_ImageConv_16ext.c
jdk/src/java.desktop/share/native/libmlib_image/mlib_ImageConv_16nw.c
jdk/src/java.desktop/share/native/libmlib_image/mlib_ImageConv_32nw.c
jdk/src/java.desktop/share/native/libmlib_image/mlib_ImageConv_8ext.c
jdk/src/java.desktop/share/native/libmlib_image/mlib_ImageConv_8nw.c
jdk/src/java.desktop/share/native/libmlib_image/mlib_ImageConv_D64nw.c
jdk/src/java.desktop/share/native/libmlib_image/mlib_ImageConv_F32nw.c
jdk/src/java.desktop/share/native/libmlib_image/mlib_ImageConv_u16ext.c
jdk/src/java.desktop/share/native/libmlib_image/mlib_ImageConv_u16nw.c
jdk/src/java.desktop/share/native/libmlib_image/mlib_c_ImageAffineIndex_BC.c
jdk/src/java.desktop/share/native/libmlib_image/mlib_c_ImageAffineIndex_BL.c
jdk/src/java.desktop/share/native/libmlib_image/mlib_c_ImageBlendTable.c
jdk/src/java.desktop/share/native/libmlib_image/mlib_c_ImageBlendTable.h
jdk/src/java.desktop/share/native/libmlib_image/mlib_c_ImageConv.h
jdk/src/java.desktop/share/native/libmlib_image/mlib_c_ImageConv_f.c
jdk/src/java.desktop/share/native/libmlib_image/mlib_c_ImageThresh1.h
jdk/src/java.desktop/share/native/libmlib_image/mlib_c_ImageThresh1_U8.c
jdk/src/java.desktop/share/native/libmlib_image/mlib_image.h
jdk/src/java.desktop/share/native/libmlib_image/mlib_image_blend_proto.h
jdk/src/java.desktop/share/native/libmlib_image/mlib_image_proto.h
jdk/src/java.desktop/unix/native/libawt/awt/medialib/mlib_v_ImageCopy.c
jdk/src/java.desktop/unix/native/libawt/awt/medialib/mlib_v_ImageCopy_blk.s
jdk/src/java.desktop/unix/native/libawt/awt/medialib/vis_asi.h
jdk/src/java.desktop/unix/native/libmlib_image/mlib_v_ImageAffineIndex_BC.c
jdk/src/java.desktop/unix/native/libmlib_image/mlib_v_ImageAffine_BL.c
jdk/src/java.desktop/unix/native/libmlib_image/mlib_v_ImageAffine_BL_S16.c
jdk/src/java.desktop/unix/native/libmlib_image/mlib_v_ImageAffine_BL_U16.c
jdk/src/java.desktop/unix/native/libmlib_image/mlib_v_ImageChannelExtract.c
jdk/src/java.desktop/unix/native/libmlib_image/mlib_v_ImageChannelExtract.h
jdk/src/java.desktop/unix/native/libmlib_image/mlib_v_ImageChannelExtract_1.c
jdk/src/java.desktop/unix/native/libmlib_image/mlib_v_ImageChannelExtract_43.c
jdk/src/java.desktop/unix/native/libmlib_image/mlib_v_ImageChannelExtract_f.c
jdk/src/java.desktop/unix/native/libmlib_image/mlib_v_ImageChannelInsert.c
jdk/src/java.desktop/unix/native/libmlib_image/mlib_v_ImageChannelInsert.h
jdk/src/java.desktop/unix/native/libmlib_image/mlib_v_ImageChannelInsert_1.c
jdk/src/java.desktop/unix/native/libmlib_image/mlib_v_ImageChannelInsert_34.c
jdk/src/java.desktop/unix/native/libmlib_image/mlib_v_ImageConv.h
jdk/src/java.desktop/unix/native/libmlib_image/mlib_v_ImageConvIndex3_8_16nw.c
jdk/src/java.desktop/unix/native/libmlib_image/mlib_v_ImageConvIndex3_8_8nw.c
jdk/src/java.desktop/unix/native/libmlib_image/mlib_v_ImageConv_8nw.c
--- a/jdk/make/lib/Awt2dLibraries.gmk	Thu May 12 11:03:07 2016 -0700
+++ b/jdk/make/lib/Awt2dLibraries.gmk	Fri May 13 11:31:05 2016 +0300
@@ -96,10 +96,7 @@
       mlib_c_ImageAffine_BL.c \
       mlib_c_ImageAffine_BL_S16.c \
       mlib_c_ImageAffine_BL_U16.c \
-      mlib_c_ImageAffineIndex_BC.c \
-      mlib_c_ImageAffineIndex_BL.c \
       mlib_c_ImageAffine_NN.c \
-      mlib_c_ImageBlendTable.c \
       mlib_c_ImageConvClearEdge.c \
       mlib_c_ImageConvCopyEdge.c \
       mlib_c_ImageConv_f.c \
@@ -107,14 +104,6 @@
       mlib_c_ImageCopy.c \
       mlib_c_ImageLookUp.c \
       mlib_c_ImageLookUp_f.c \
-      mlib_v_ImageChannelExtract.c \
-      mlib_v_ImageChannelExtract_f.c \
-      mlib_v_ImageChannelInsert_34.c \
-      mlib_v_ImageChannelInsert.c \
-      mlib_v_ImageConvIndex3_8_16nw.c \
-      mlib_v_ImageConvIndex3_8_8nw.c \
-      mlib_v_ImageCopy.c \
-      mlib_v_ImageCopy_blk.s \
       #
 
   LIBMLIB_IMAGE_V_CFLAGS += $(filter-out -DMLIB_NO_LIBSUNMATH, $(BUILD_LIBMLIB_CFLAGS))
--- a/jdk/src/java.desktop/share/native/libmlib_image/mlib_ImageAffine.c	Thu May 12 11:03:07 2016 -0700
+++ b/jdk/src/java.desktop/share/native/libmlib_image/mlib_ImageAffine.c	Fri May 13 11:31:05 2016 +0300
@@ -73,7 +73,6 @@
  */
 
 #include "mlib_ImageCheck.h"
-#include "mlib_ImageColormap.h"
 #include "mlib_ImageAffine.h"
 
 
@@ -125,18 +124,6 @@
 };
 
 /***************************************************************/
-const type_affine_i_fun mlib_AffineFunArr_bc_i[] = {
-  mlib_ImageAffineIndex_U8_U8_3CH_BC,
-  mlib_ImageAffineIndex_U8_U8_4CH_BC,
-  mlib_ImageAffineIndex_S16_U8_3CH_BC,
-  mlib_ImageAffineIndex_S16_U8_4CH_BC,
-  mlib_ImageAffineIndex_U8_S16_3CH_BC,
-  mlib_ImageAffineIndex_U8_S16_4CH_BC,
-  mlib_ImageAffineIndex_S16_S16_3CH_BC,
-  mlib_ImageAffineIndex_S16_S16_4CH_BC
-};
-
-/***************************************************************/
 #ifdef i386 /* do not perform the coping by mlib_d64 data type for x86 */
 #define MAX_T_IND  2
 #else
@@ -148,8 +135,7 @@
                                       const mlib_image *src,
                                       const mlib_d64   *mtx,
                                       mlib_filter      filter,
-                                      mlib_edge        edge,
-                                      const void       *colormap)
+                                      mlib_edge        edge)
 {
   mlib_affine_param param[1];
   mlib_status res;
@@ -213,18 +199,6 @@
   else
     return MLIB_FAILURE; /* unknown image type */
 
-  if (colormap != NULL && filter != MLIB_NEAREST) {
-    if (t_ind != 0 && t_ind != 1)
-      return MLIB_FAILURE;
-
-    if (mlib_ImageGetLutType(colormap) == MLIB_SHORT)
-      t_ind += 2;
-    t_ind = 2 * t_ind;
-
-    if (mlib_ImageGetLutChannels(colormap) == 4)
-      t_ind++;
-  }
-
   if (type == MLIB_BIT) {
     mlib_s32 s_bitoff = mlib_ImageGetBitOffset(src);
     mlib_s32 d_bitoff = mlib_ImageGetBitOffset(dst);
@@ -253,25 +227,13 @@
 
       case MLIB_BILINEAR:
 
-        if (colormap != NULL) {
-          res = mlib_AffineFunArr_bl_i[t_ind] (param, colormap);
-        }
-        else {
-          res = mlib_AffineFunArr_bl[4 * t_ind + (nchan - 1)] (param);
-        }
-
+        res = mlib_AffineFunArr_bl[4 * t_ind + (nchan - 1)] (param);
         break;
 
       case MLIB_BICUBIC:
       case MLIB_BICUBIC2:
 
-        if (colormap != NULL) {
-          res = mlib_AffineFunArr_bc_i[t_ind] (param, colormap);
-        }
-        else {
-          res = mlib_AffineFunArr_bc[4 * t_ind + (nchan - 1)] (param);
-        }
-
+        res = mlib_AffineFunArr_bc[4 * t_ind + (nchan - 1)] (param);
         break;
     }
 
@@ -303,7 +265,7 @@
 
     switch (edge) {
       case MLIB_EDGE_DST_FILL_ZERO:
-        mlib_ImageAffineEdgeZero(param, param_e, colormap);
+        mlib_ImageAffineEdgeZero(param, param_e);
         break;
 
       case MLIB_EDGE_OP_NEAREST:
@@ -313,10 +275,10 @@
       case MLIB_EDGE_SRC_EXTEND:
 
         if (filter == MLIB_BILINEAR) {
-          res = mlib_ImageAffineEdgeExtend_BL(param, param_e, colormap);
+          res = mlib_ImageAffineEdgeExtend_BL(param, param_e);
         }
         else {
-          res = mlib_ImageAffineEdgeExtend_BC(param, param_e, colormap);
+          res = mlib_ImageAffineEdgeExtend_BC(param, param_e);
         }
 
         break;
@@ -355,7 +317,7 @@
     return MLIB_FAILURE;
   }
 
-  return mlib_ImageAffine_alltypes(dst, src, mtx, filter, edge, NULL);
+  return mlib_ImageAffine_alltypes(dst, src, mtx, filter, edge);
 }
 
 /***************************************************************/
--- a/jdk/src/java.desktop/share/native/libmlib_image/mlib_ImageAffine.h	Thu May 12 11:03:07 2016 -0700
+++ b/jdk/src/java.desktop/share/native/libmlib_image/mlib_ImageAffine.h	Fri May 13 11:31:05 2016 +0300
@@ -162,99 +162,25 @@
 
 /***************************************************************/
 void mlib_ImageAffineEdgeZero(mlib_affine_param *param,
-                              mlib_affine_param *param_e,
-                              const void        *colormap);
+                              mlib_affine_param *param_e);
 
 void mlib_ImageAffineEdgeNearest(mlib_affine_param *param,
                                  mlib_affine_param *param_e);
 
 mlib_status mlib_ImageAffineEdgeExtend_BL(mlib_affine_param *param,
-                                          mlib_affine_param *param_e,
-                                          const void        *colormap);
+                                          mlib_affine_param *param_e);
 
 mlib_status mlib_ImageAffineEdgeExtend_BC(mlib_affine_param *param,
-                                          mlib_affine_param *param_e,
-                                          const void        *colormap);
-
-mlib_status mlib_ImageAffineEdgeExtend_BC2(mlib_affine_param *param,
-                                           mlib_affine_param *param_e,
-                                           const void        *colormap);
-
-/***************************************************************/
-typedef mlib_status (*type_affine_i_fun)(mlib_affine_param *param, const void *colormap);
-
-mlib_status mlib_ImageAffine_u8_u8_i_bl(mlib_affine_param *param,
-                                        const void        *colormap);
-mlib_status mlib_ImageAffine_u8_s16_i_bl(mlib_affine_param *param,
-                                         const void        *colormap);
-mlib_status mlib_ImageAffine_s16_u8_i_bl(mlib_affine_param *param,
-                                         const void        *colormap);
-mlib_status mlib_ImageAffine_s16_s16_i_bl(mlib_affine_param *param,
-                                          const void        *colormap);
-
-mlib_status mlib_ImageAffine_u8_u8_i_bc(mlib_affine_param *param,
-                                        const void        *colormap);
-mlib_status mlib_ImageAffine_u8_s16_i_bc(mlib_affine_param *param,
-                                         const void        *colormap);
-mlib_status mlib_ImageAffine_s16_u8_i_bc(mlib_affine_param *param,
-                                         const void        *colormap);
-mlib_status mlib_ImageAffine_s16_s16_i_bc(mlib_affine_param *param,
-                                          const void        *colormap);
-
-void mlib_ImageAffineEdgeZeroIndex(mlib_affine_param *param,
-                                   mlib_affine_param *param_e,
-                                   const void        *colormap);
-
-void mlib_ImageAffineEdgeExtendIndex_BL(mlib_affine_param *param,
-                                        mlib_affine_param *param_e,
-                                        const void        *colormap);
-
-void mlib_ImageAffineEdgeExtendIndex_BC(mlib_affine_param *param,
-                                        mlib_affine_param *param_e,
-                                        const void        *colormap);
-
-void mlib_ImageAffineEdgeExtendIndex_BC2(mlib_affine_param *param,
-                                         mlib_affine_param *param_e,
-                                         const void        *colormap);
-
-/***************************************************************/
-#define PROT_AFFINEINDEX_BC(ITYPE, LTYPE, NCHAN)                                                 \
-  mlib_status mlib_ImageAffineIndex_##ITYPE##_##LTYPE##_##NCHAN##CH_BC(mlib_affine_param *param, \
-                                                                       const void        *colormap)
-
-PROT_AFFINEINDEX_BC(U8, U8, 3);
-PROT_AFFINEINDEX_BC(U8, S16, 3);
-PROT_AFFINEINDEX_BC(U8, U8, 4);
-PROT_AFFINEINDEX_BC(U8, S16, 4);
-PROT_AFFINEINDEX_BC(S16, U8, 3);
-PROT_AFFINEINDEX_BC(S16, S16, 3);
-PROT_AFFINEINDEX_BC(S16, U8, 4);
-PROT_AFFINEINDEX_BC(S16, S16, 4);
-
-/***************************************************************/
-#define PROT_AFFINEINDEX_BL(ITYPE, LTYPE, NCHAN)                                                 \
-  mlib_status mlib_ImageAffineIndex_##ITYPE##_##LTYPE##_##NCHAN##CH_BL(mlib_affine_param *param, \
-                                                                       const void        *colormap)
-
-PROT_AFFINEINDEX_BL(U8, U8, 3);
-PROT_AFFINEINDEX_BL(U8, S16, 3);
-PROT_AFFINEINDEX_BL(U8, U8, 4);
-PROT_AFFINEINDEX_BL(U8, S16, 4);
-PROT_AFFINEINDEX_BL(S16, U8, 3);
-PROT_AFFINEINDEX_BL(S16, S16, 3);
-PROT_AFFINEINDEX_BL(S16, U8, 4);
-PROT_AFFINEINDEX_BL(S16, S16, 4);
+                                          mlib_affine_param *param_e);
 
 /***************************************************************/
 mlib_status mlib_ImageAffine_alltypes(mlib_image       *dst,
                                       const mlib_image *src,
                                       const mlib_d64   *mtx,
                                       mlib_filter      filter,
-                                      mlib_edge        edge,
-                                      const void       *colormap);
+                                      mlib_edge        edge);
 
 /***************************************************************/
-extern const type_affine_i_fun mlib_AffineFunArr_bl_i[];
 extern const type_affine_fun mlib_AffineFunArr_nn[];
 extern const type_affine_fun mlib_AffineFunArr_bl[];
 extern const type_affine_fun mlib_AffineFunArr_bc[];
--- a/jdk/src/java.desktop/share/native/libmlib_image/mlib_ImageAffineEdge.c	Thu May 12 11:03:07 2016 -0700
+++ b/jdk/src/java.desktop/share/native/libmlib_image/mlib_ImageAffineEdge.c	Fri May 13 11:31:05 2016 +0300
@@ -73,7 +73,6 @@
  */
 
 #include "mlib_image.h"
-#include "mlib_ImageColormap.h"
 #include "mlib_ImageAffine.h"
 
 /***************************************************************/
@@ -218,97 +217,6 @@
   }
 
 /***************************************************************/
-#define LUT(k, ind) plut[channels*sp[ind] + k]
-
-/***************************************************************/
-#define MLIB_EDGE_INDEX(ITYPE, DTYPE, size)                             \
-  for (j = 0; j < size; j++) {                                          \
-    ySrc = ((Y - 32768) >> MLIB_SHIFT);                                 \
-    xSrc = ((X - 32768) >> MLIB_SHIFT);                                 \
-                                                                        \
-    t = ((X - 32768) & MLIB_MASK) * scale;                              \
-    u = ((Y - 32768) & MLIB_MASK) * scale;                              \
-                                                                        \
-    xDelta = (((xSrc + 1 - srcWidth )) >> MLIB_SIGN_SHIFT) & 1;         \
-    yDelta = (((ySrc + 1 - srcHeight)) >> MLIB_SIGN_SHIFT) & srcStride; \
-                                                                        \
-    xFlag = (xSrc >> (MLIB_SIGN_SHIFT - MLIB_SHIFT));                   \
-    xSrc = xSrc + (1 & xFlag);                                          \
-    xDelta = xDelta &~ xFlag;                                           \
-                                                                        \
-    yFlag = (ySrc >> (MLIB_SIGN_SHIFT - MLIB_SHIFT));                   \
-    ySrc = ySrc + (1 & yFlag);                                          \
-    yDelta = yDelta &~ yFlag;                                           \
-                                                                        \
-    sp = (ITYPE*)lineAddr[ySrc] + xSrc;                                 \
-                                                                        \
-    for (k = 0; k < channels; k++) {                                    \
-      a00  = LUT(k, 0);                                                 \
-      a01  = LUT(k, xDelta);                                            \
-      a10  = LUT(k, yDelta);                                            \
-      a11  = LUT(k, yDelta + xDelta);                                   \
-      pix0 = (a00 * (1 - t) + a01 * t) * (1 - u) +                      \
-             (a10 * (1 - t) + a11 * t) * u;                             \
-                                                                        \
-      pbuff[k] = (mlib_s32)pix0;                                        \
-    }                                                                   \
-    pbuff += channels;                                                  \
-                                                                        \
-    X += dX;                                                            \
-    Y += dY;                                                            \
-  }
-
-/***************************************************************/
-#define MLIB_EDGE_INDEX_u8i(ITYPE, Left, Right) {                              \
-  mlib_u8  *pbuff = buff;                                                      \
-                                                                               \
-  size = Right - Left;                                                         \
-                                                                               \
-  MLIB_EDGE_INDEX(ITYPE, mlib_u8, size);                                       \
-                                                                               \
-  dp = (ITYPE*)data + Left;                                                    \
-                                                                               \
-  if (channels == 3) {                                                         \
-    if (sizeof(ITYPE) == 1) {                                                  \
-      mlib_ImageColorTrue2IndexLine_U8_U8_3 (buff, (void*)dp, size, colormap); \
-    } else {                                                                   \
-      mlib_ImageColorTrue2IndexLine_U8_S16_3(buff, (void*)dp, size, colormap); \
-    }                                                                          \
-  } else {                                                                     \
-    if (sizeof(ITYPE) == 1) {                                                  \
-      mlib_ImageColorTrue2IndexLine_U8_U8_4 (buff, (void*)dp, size, colormap); \
-    } else {                                                                   \
-      mlib_ImageColorTrue2IndexLine_U8_S16_4(buff, (void*)dp, size, colormap); \
-    }                                                                          \
-  }                                                                            \
-}
-
-/***************************************************************/
-#define MLIB_EDGE_INDEX_s16i(ITYPE, Left, Right) {                              \
-  mlib_s16 *pbuff = buff;                                                       \
-                                                                                \
-  size = Right - Left;                                                          \
-                                                                                \
-  MLIB_EDGE_INDEX(ITYPE, mlib_s16, size);                                       \
-                                                                                \
-  dp = (ITYPE*)data + Left;                                                     \
-                                                                                \
-  if (channels == 3) {                                                          \
-    if (sizeof(ITYPE) == 1) {                                                   \
-      mlib_ImageColorTrue2IndexLine_S16_U8_3 (buff, (void*)dp, size, colormap); \
-    } else {                                                                    \
-      mlib_ImageColorTrue2IndexLine_S16_S16_3(buff, (void*)dp, size, colormap); \
-    }                                                                           \
-  } else {                                                                      \
-    if (sizeof(ITYPE) == 1) {                                                   \
-      mlib_ImageColorTrue2IndexLine_S16_U8_4 (buff, (void*)dp, size, colormap); \
-    } else {                                                                    \
-      mlib_ImageColorTrue2IndexLine_S16_S16_4(buff, (void*)dp, size, colormap); \
-    }                                                                           \
-  }                                                                             \
-}
-
-/***************************************************************/
 #define GET_FLT_TBL(X, xf0, xf1, xf2, xf3)                      \
   filterpos = ((X - 32768) >> flt_shift) & flt_mask;            \
   fptr = (mlib_f32 *) ((mlib_u8 *)flt_tbl + filterpos);         \
@@ -424,47 +332,6 @@
   MLIB_EDGE_BC_LINE(TYPE, Left, Right, GET_FLT_BC2)
 
 /***************************************************************/
-#define MLIB_EDGE_INDEX_BC(ITYPE, DTYPE, size)                  \
-  for (j = 0; j < size; j++) {                                  \
-    GET_FLT_TBL(X, xf0, xf1, xf2, xf3);                         \
-    GET_FLT_TBL(Y, yf0, yf1, yf2, yf3);                         \
-                                                                \
-    CALC_SRC_POS(X, Y, 1, srcStride);                           \
-                                                                \
-    sp = (ITYPE*)lineAddr[ySrc] + xSrc;                         \
-                                                                \
-    for (k = 0; k < channels; k++) {                            \
-      c0 = LUT(k, yDelta0 + xDelta0) * xf0 +                    \
-           LUT(k, yDelta0          ) * xf1 +                    \
-           LUT(k, yDelta0 + xDelta1) * xf2 +                    \
-           LUT(k, yDelta0 + xDelta2) * xf3;                     \
-                                                                \
-      c1 = LUT(k, xDelta0) * xf0 +                              \
-           LUT(k, 0      ) * xf1 +                              \
-           LUT(k, xDelta1) * xf2 +                              \
-           LUT(k, xDelta2) * xf3;                               \
-                                                                \
-      c2 = LUT(k, yDelta1 + xDelta0) * xf0 +                    \
-           LUT(k, yDelta1          ) * xf1 +                    \
-           LUT(k, yDelta1 + xDelta1) * xf2 +                    \
-           LUT(k, yDelta1 + xDelta2) * xf3;                     \
-                                                                \
-      c3 = LUT(k, yDelta2 + xDelta0) * xf0 +                    \
-           LUT(k, yDelta2          ) * xf1 +                    \
-           LUT(k, yDelta2 + xDelta1) * xf2 +                    \
-           LUT(k, yDelta2 + xDelta2) * xf3;                     \
-                                                                \
-      val0 = c0*yf0 + c1*yf1 + c2*yf2 + c3*yf3;                 \
-                                                                \
-      SAT##DTYPE(pbuff[k], val0);                               \
-    }                                                           \
-    pbuff += channels;                                          \
-                                                                \
-    X += dX;                                                    \
-    Y += dY;                                                    \
-  }
-
-/***************************************************************/
 #define MLIB_PROCESS_EDGES_ZERO(TYPE) {                         \
   TYPE *dp, *dstLineEnd;                                        \
                                                                 \
@@ -586,16 +453,11 @@
 
 /***************************************************************/
 void mlib_ImageAffineEdgeZero(mlib_affine_param *param,
-                              mlib_affine_param *param_e,
-                              const void        *colormap)
+                              mlib_affine_param *param_e)
 {
   GET_EDGE_PARAMS_ZERO();
   mlib_s32 zero = 0;
 
-  if (colormap != NULL) {
-    zero = mlib_ImageGetLutOffset(colormap);
-  }
-
   switch (type) {
     case MLIB_BYTE:
       MLIB_PROCESS_EDGES_ZERO(mlib_u8);
@@ -654,8 +516,7 @@
 
 /***************************************************************/
 mlib_status mlib_ImageAffineEdgeExtend_BL(mlib_affine_param *param,
-                                          mlib_affine_param *param_e,
-                                          const void        *colormap)
+                                          mlib_affine_param *param_e)
 {
   GET_EDGE_PARAMS();
   mlib_d64 scale = 1.0 / (mlib_d64) MLIB_PREC;
@@ -663,79 +524,6 @@
   mlib_d64 t, u, pix0;
   mlib_d64 a00, a01, a10, a11;
 
-  if (colormap != NULL) {
-    mlib_s32 max_xsize = param_e->max_xsize;
-    mlib_type ltype = mlib_ImageGetLutType(colormap);
-    mlib_d64 *plut = (mlib_d64 *) mlib_ImageGetLutDoubleData(colormap);
-    void *buff;
-
-    channels = mlib_ImageGetLutChannels(colormap);
-    plut -= channels * mlib_ImageGetLutOffset(colormap);
-
-    if (max_xsize == 0) {
-      return MLIB_SUCCESS;
-    }
-
-    if (ltype == MLIB_BYTE) {
-      buff = mlib_malloc(channels * max_xsize);
-    }
-    else if (ltype == MLIB_SHORT) {
-      buff = mlib_malloc(channels * max_xsize * sizeof(mlib_s16));
-    } else {
-      /* Unsupported type of lookup table. Report a failure */
-      return MLIB_FAILURE;
-    }
-
-    if (buff == NULL)
-      return MLIB_FAILURE;
-
-    switch (ltype) {
-      case MLIB_BYTE:
-        switch (type) {
-          case MLIB_BYTE:
-            MLIB_PROCESS_EDGES(MLIB_EDGE_INDEX_u8i, mlib_u8);
-            break;
-
-          case MLIB_SHORT:
-            srcStride >>= 1;
-            MLIB_PROCESS_EDGES(MLIB_EDGE_INDEX_u8i, mlib_s16);
-            break;
-        default:
-          /* Incompatible image type. Ignore it for now. */
-          break;
-        }
-
-        break;
-
-      case MLIB_SHORT:
-        switch (type) {
-          case MLIB_BYTE:
-            MLIB_PROCESS_EDGES(MLIB_EDGE_INDEX_s16i, mlib_u8);
-            break;
-
-          case MLIB_SHORT:
-            srcStride >>= 1;
-            MLIB_PROCESS_EDGES(MLIB_EDGE_INDEX_s16i, mlib_s16);
-            break;
-        default:
-          /* Incompatible image type. Ignore it for now. */
-          break;
-        }
-
-        break;
-    default:
-      /* Unsupported type of lookup table.
-       * Can not be here due to check on line 685,
-       * so just ignore it.
-       */
-      break;
-    }
-
-    mlib_free(buff);
-
-    return MLIB_SUCCESS;
-  }
-
   switch (type) {
     case MLIB_BYTE:
       MLIB_PROCESS_EDGES(MLIB_EDGE_BL, mlib_u8);
@@ -775,12 +563,8 @@
 }
 
 /***************************************************************/
-#undef  MLIB_EDGE_INDEX
-#define MLIB_EDGE_INDEX MLIB_EDGE_INDEX_BC
-
 mlib_status mlib_ImageAffineEdgeExtend_BC(mlib_affine_param *param,
-                                          mlib_affine_param *param_e,
-                                          const void        *colormap)
+                                          mlib_affine_param *param_e)
 {
   GET_EDGE_PARAMS();
   mlib_d64 scale = 1.0 / (mlib_d64) MLIB_PREC;
@@ -789,7 +573,6 @@
   mlib_d64 xf0, xf1, xf2, xf3;
   mlib_d64 yf0, yf1, yf2, yf3;
   mlib_d64 c0, c1, c2, c3, val0;
-  mlib_type ltype;
   mlib_filter filter = param->filter;
   mlib_f32 *fptr;
   mlib_f32 const *flt_tbl;
@@ -798,9 +581,7 @@
   mlib_s32 yDelta0, yDelta1, yDelta2;
   mlib_d64 sat;
 
-  ltype = (colormap != NULL) ? mlib_ImageGetLutType(colormap) : type;
-
-  if (ltype == MLIB_BYTE) {
+  if (type == MLIB_BYTE) {
     flt_shift = FLT_SHIFT_U8;
     flt_mask = FLT_MASK_U8;
     flt_tbl = (filter == MLIB_BICUBIC) ? mlib_filters_u8f_bc : mlib_filters_u8f_bc2;
@@ -813,78 +594,6 @@
     sat = (mlib_d64) 0x7FFF8000;                           /* saturation for U16 */
   }
 
-  if (colormap != NULL) {
-    mlib_s32 max_xsize = param_e->max_xsize;
-    mlib_d64 *plut = (mlib_d64 *) mlib_ImageGetLutDoubleData(colormap);
-    void *buff;
-
-    channels = mlib_ImageGetLutChannels(colormap);
-    plut -= channels * mlib_ImageGetLutOffset(colormap);
-
-    if (max_xsize == 0) {
-      return MLIB_SUCCESS;
-    }
-
-    if (ltype == MLIB_BYTE) {
-      buff = mlib_malloc(channels * max_xsize);
-    }
-    else if (ltype == MLIB_SHORT) {
-      buff = mlib_malloc(channels * max_xsize * sizeof(mlib_s16));
-    } else {
-      /* Unsupported type of lookup table. */
-      return MLIB_FAILURE;
-    }
-
-    if (buff == NULL)
-      return MLIB_FAILURE;
-
-    switch (ltype) {
-      case MLIB_BYTE:
-        switch (type) {
-          case MLIB_BYTE:
-            MLIB_PROCESS_EDGES(MLIB_EDGE_INDEX_u8i, mlib_u8);
-            break;
-
-          case MLIB_SHORT:
-            srcStride >>= 1;
-            MLIB_PROCESS_EDGES(MLIB_EDGE_INDEX_u8i, mlib_s16);
-            break;
-        default:
-          /* Ignore incomatible image type. */
-          break;
-        }
-
-        break;
-
-      case MLIB_SHORT:
-        switch (type) {
-          case MLIB_BYTE:
-            MLIB_PROCESS_EDGES(MLIB_EDGE_INDEX_s16i, mlib_u8);
-            break;
-
-          case MLIB_SHORT:
-            srcStride >>= 1;
-            MLIB_PROCESS_EDGES(MLIB_EDGE_INDEX_s16i, mlib_s16);
-            break;
-        default:
-          /* Ignore incomatible image type. */
-          break;
-        }
-
-        break;
-
-    default:
-      /* Unsupported type of lookup table.
-       * Can not be here due to check on line 836,
-       * so just ignore it.
-       */
-      break;
-    }
-
-    mlib_free(buff);
-
-    return MLIB_SUCCESS;
-  }
 
   switch (type) {
     case MLIB_BYTE:
--- a/jdk/src/java.desktop/share/native/libmlib_image/mlib_ImageCheck.h	Thu May 12 11:03:07 2016 -0700
+++ b/jdk/src/java.desktop/share/native/libmlib_image/mlib_ImageCheck.h	Fri May 13 11:31:05 2016 +0300
@@ -82,12 +82,6 @@
     return MLIB_FAILURE;                                        \
   }
 
-#define MLIB_IMAGE_AND_COLORMAP_ARE_COMPAT(image,colormap)                 \
-  if ((mlib_ImageGetChannels(image) != mlib_ImageGetLutChannels(colormap)) \
-    || (mlib_ImageGetLutType(colormap) != mlib_ImageGetType(image))) {     \
-    return MLIB_FAILURE;                                                   \
-  }
-
 #define MLIB_IMAGE_GET_ALL_PARAMS(image, type, nchan, width, height, stride, pdata) \
   type   = mlib_ImageGetType(image);                                                \
   nchan  = mlib_ImageGetChannels(image);                                            \
--- a/jdk/src/java.desktop/share/native/libmlib_image/mlib_ImageColorTrue2Index.c	Thu May 12 11:03:07 2016 -0700
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,4256 +0,0 @@
-/*
- * Copyright (c) 2003, 2011, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.  Oracle designates this
- * particular file as subject to the "Classpath" exception as provided
- * by Oracle in the LICENSE file that accompanied this code.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- */
-
-/*
- * FUNCTION
- *      mlib_ImageColorTrue2Index - convert a true color image to an indexed
- *                                  color image
- *
- * SYNOPSIS
- *      mlib_status mlib_ImageColorTrue2Index(mlib_image       *dst,
- *                                            const mlib_image *src,
- *                                            const void       *colormap)
- *
- * ARGUMENTS
- *      colormap  Internal data structure for inverse color mapping.
- *      dst       Pointer to destination image.
- *      src       Pointer to source image.
- *
- * DESCRIPTION
- *      Convert a true color image to a pseudo color image with the method
- *      of finding the nearest matched lut entry for each pixel.
- *
- *      The src can be an MLIB_BYTE or MLIB_SHORT image with 3 or 4 channels.
- *      The dst must be a 1-channel MLIB_BYTE or MLIB_SHORT image.
- *
- *      The lut might have either 3 or 4 channels. The type of the lut can be
- *      one of the following:
- *              MLIB_BYTE in, MLIB_BYTE out (i.e., BYTE-to-BYTE)
- *              MLIB_BYTE in, MLIB_SHORT out (i.e., BYTE-to-SHORT)
- *              MLIB_SHORT in, MLIB_SHORT out (i.e., SHORT-to-SHORT)
- *              MLIB_SHORT in, MLIB_BYTE out (i.e., SHORT-to-BYTE)
- *
- *      The src image and the lut must have same number of channels.
- */
-
-#include "mlib_image.h"
-#include "mlib_ImageColormap.h"
-#include "mlib_ImageCheck.h"
-
-/***************************************************************/
-
-/*#define USE_VIS_CODE*/
-
-#ifdef USE_VIS_CODE
-#include "vis_proto.h"
-#define VIS_ALIGNADDR(X, Y)  vis_alignaddr((void *)(X), (Y))
-#endif
-
-/***************************************************************/
-
-#define LUT_BYTE_COLORS_3CHANNELS  1000
-#define LUT_BYTE_COLORS_4CHANNELS  3000
-#define LUT_SHORT_COLORS_3CHANNELS 1000
-#define LUT_SHORT_COLORS_4CHANNELS 1000
-
-/***************************************************************/
-
-#define MAIN_COLORTRUE2INDEX_LOOP( FROM_TYPE, TO_TYPE, NCHANNELS )       \
-  for( y = 0; y < height; y++ )                                          \
-  {                                                                      \
-    mlib_ImageColorTrue2IndexLine_##FROM_TYPE##_##TO_TYPE##_##NCHANNELS( \
-      sdata, ddata, width, colormap );                                   \
-                                                                         \
-    sdata += sstride;                                                    \
-    ddata += dstride;                                                    \
-  }
-
-/***************************************************************/
-
-#define COLOR_CUBE_U8_3_SEARCH( TABLE_POINTER_TYPE, SHIFT, STEP ) \
-{                                                                 \
-  const mlib_u8 *c0, *c1, *c2;                                    \
-  TABLE_POINTER_TYPE *table = s->table;                           \
-  mlib_s32 bits = s->bits;                                        \
-  mlib_s32 nbits = 8 - bits;                                      \
-  mlib_s32 mask = ~( ( 1 << nbits ) - 1 );                        \
-  mlib_s32 j;                                                     \
-                                                                  \
-  c0 = src + SHIFT;                                               \
-  c1 = src + 1 + SHIFT;                                           \
-  c2 = src + 2 + SHIFT;                                           \
-                                                                  \
-  switch( bits )                                                  \
-  {                                                               \
-    case 1:                                                       \
-    case 2:                                                       \
-    {                                                             \
-      mlib_s32 bits0 = 8 - bits;                                  \
-      mlib_s32 bits1 = bits0 - bits;                              \
-      mlib_s32 bits2 = bits1 - bits;                              \
-                                                                  \
-      for( j = 0; j < length; j++ )                               \
-      {                                                           \
-        dst[ j ] = table[ ( ( *c0 & mask ) >> bits2 ) |           \
-          ( ( *c1 & mask ) >> bits1 ) |                           \
-          ( ( *c2 & mask ) >> bits0 ) ];                          \
-                                                                  \
-        c0 += STEP;                                               \
-        c1 += STEP;                                               \
-        c2 += STEP;                                               \
-      }                                                           \
-      break;                                                      \
-    }                                                             \
-    case 3:                                                       \
-    {                                                             \
-      for( j = 0; j < length; j++ )                               \
-      {                                                           \
-        dst[ j ] = table[ ( ( *c0 & mask ) << 1 ) |               \
-          ( ( *c1 & mask ) >> 2 ) |                               \
-          ( ( *c2 & mask ) >> 5 ) ];                              \
-                                                                  \
-        c0 += STEP;                                               \
-        c1 += STEP;                                               \
-        c2 += STEP;                                               \
-      }                                                           \
-      break;                                                      \
-    }                                                             \
-    case 4:                                                       \
-    {                                                             \
-      for( j = 0; j < length; j++ )                               \
-      {                                                           \
-        dst[ j ] = table[ ( ( *c0 & mask ) << 4 ) |               \
-          ( *c1 & mask ) |                                        \
-          ( ( *c2 & mask ) >> 4 ) ];                              \
-                                                                  \
-        c0 += STEP;                                               \
-        c1 += STEP;                                               \
-        c2 += STEP;                                               \
-      }                                                           \
-      break;                                                      \
-    }                                                             \
-    case 5:                                                       \
-    case 6:                                                       \
-    case 7:                                                       \
-    {                                                             \
-      mlib_s32 bits0 = 8 - bits;                                  \
-      mlib_s32 bits1 = bits * 2 - 8;                              \
-      mlib_s32 bits2 = bits1 + bits;                              \
-                                                                  \
-      for( j = 0; j < length; j++ )                               \
-      {                                                           \
-        dst[ j ] = table[ ( ( *c0 & mask ) << bits2 ) |           \
-          ( ( *c1 & mask ) << bits1 ) |                           \
-          ( ( *c2 & mask ) >> bits0 ) ];                          \
-                                                                  \
-        c0 += STEP;                                               \
-        c1 += STEP;                                               \
-        c2 += STEP;                                               \
-      }                                                           \
-      break;                                                      \
-    }                                                             \
-    case 8:                                                       \
-    {                                                             \
-      for( j = 0; j < length; j++ )                               \
-      {                                                           \
-        dst[ j ] = table[ ( ( *c0 & mask ) << 16 ) |              \
-          ( ( *c1 & mask ) << 8 ) |                               \
-          ( *c2 & mask ) ];                                       \
-                                                                  \
-        c0 += STEP;                                               \
-        c1 += STEP;                                               \
-        c2 += STEP;                                               \
-      }                                                           \
-      break;                                                      \
-    }                                                             \
-  }                                                               \
-}
-
-/***************************************************************/
-#define COLOR_CUBE_U8_4_SEARCH( TABLE_TYPE )                    \
-{                                                               \
-  const mlib_u8 *c0, *c1, *c2, *c3;                             \
-  TABLE_TYPE *table = s->table;                                 \
-  mlib_s32 bits = s->bits;                                      \
-  mlib_s32 nbits = 8 - bits;                                    \
-  mlib_s32 mask = ~( ( 1 << nbits ) - 1 );                      \
-  mlib_s32 j;                                                   \
-                                                                \
-  c0 = src;                                                     \
-  c1 = src + 1;                                                 \
-  c2 = src + 2;                                                 \
-  c3 = src + 3;                                                 \
-                                                                \
-  switch( bits )                                                \
-  {                                                             \
-    case 1:                                                     \
-    {                                                           \
-      for( j = 0; j < length; j++ )                             \
-      {                                                         \
-        dst[ j ] = table[ ( ( *c0 & mask ) >> 4 ) |             \
-          ( ( *c1 & mask ) >> 5 ) |                             \
-          ( ( *c2 & mask ) >> 6 ) |                             \
-          ( ( *c3 & mask ) >> 7 ) ];                            \
-                                                                \
-        c0 += 4;                                                \
-        c1 += 4;                                                \
-        c2 += 4;                                                \
-        c3 += 4;                                                \
-      }                                                         \
-      break;                                                    \
-    }                                                           \
-    case 2:                                                     \
-    {                                                           \
-      for( j = 0; j < length; j++ )                             \
-      {                                                         \
-        dst[ j ] = table[ ( *c0 & mask ) |                      \
-          ( ( *c1 & mask ) >> 2 ) |                             \
-          ( ( *c2 & mask ) >> 4 ) |                             \
-          ( ( *c3 & mask ) >> 6 ) ];                            \
-                                                                \
-        c0 += 4;                                                \
-        c1 += 4;                                                \
-        c2 += 4;                                                \
-        c3 += 4;                                                \
-          }                                                     \
-      break;                                                    \
-    }                                                           \
-    case 3:                                                     \
-    {                                                           \
-      for( j = 0; j < length; j++ )                             \
-      {                                                         \
-        dst[ j ] = table[ ( ( *c0 & mask ) << 4 ) |             \
-          ( ( *c1 & mask ) << 1 ) |                             \
-          ( ( *c2 & mask ) >> 2 ) |                             \
-          ( ( *c3 & mask ) >> 5 ) ];                            \
-                                                                \
-        c0 += 4;                                                \
-        c1 += 4;                                                \
-        c2 += 4;                                                \
-        c3 += 4;                                                \
-      }                                                         \
-      break;                                                    \
-    }                                                           \
-    case 4:                                                     \
-    {                                                           \
-      for( j = 0; j < length; j++ )                             \
-      {                                                         \
-        dst[ j ] = table[ ( ( *c0 & mask ) << 8 ) |             \
-          ( ( *c1 & mask ) << 4 ) |                             \
-          ( *c2 & mask ) |                                      \
-          ( ( *c3 & mask ) >> 4 ) ];                            \
-                                                                \
-        c0 += 4;                                                \
-        c1 += 4;                                                \
-        c2 += 4;                                                \
-        c3 += 4;                                                \
-      }                                                         \
-      break;                                                    \
-    }                                                           \
-    case 5:                                                     \
-    case 6:                                                     \
-    {                                                           \
-      mlib_s32 bits3 = bits * 4 - 8;                            \
-      mlib_s32 bits2 = bits3 - bits;                            \
-      mlib_s32 bits1 = bits2 - bits;                            \
-      mlib_s32 bits0 = 8 - bits;                                \
-                                                                \
-      for( j = 0; j < length; j++ )                             \
-      {                                                         \
-        dst[ j ] = table[ ( ( *c0 & mask ) << bits3 ) |         \
-          ( ( *c1 & mask ) << bits2 ) |                         \
-          ( ( *c2 & mask ) << bits1 ) |                         \
-          ( ( *c3 & mask ) >> bits0 ) ];                        \
-                                                                \
-        c0 += 4;                                                \
-        c1 += 4;                                                \
-        c2 += 4;                                                \
-        c3 += 4;                                                \
-      }                                                         \
-      break;                                                    \
-    }                                                           \
-    case 7:                                                     \
-    {                                                           \
-      for( j = 0; j < length; j++ )                             \
-      {                                                         \
-        dst[ j ] = table[ ( ( *c0 & mask ) << 20 ) |            \
-          ( ( *c1 & mask ) << 13 ) |                            \
-          ( ( *c2 & mask ) << 6 ) |                             \
-          ( ( *c3 & mask ) >> 1 ) ];                            \
-                                                                \
-        c0 += 4;                                                \
-        c1 += 4;                                                \
-        c2 += 4;                                                \
-        c3 += 4;                                                \
-      }                                                         \
-      break;                                                    \
-    }                                                           \
-    case 8: /* will never be called */                          \
-    {                                                           \
-      for( j = 0; j < length; j++ )                             \
-      {                                                         \
-        dst[ j ] = table[ ( ( *c0 & mask ) << 24 ) |            \
-          ( ( *c1 & mask ) << 16 ) |                            \
-          ( ( *c2 & mask ) << 8 ) |                             \
-          ( *c3 & mask ) ];                                     \
-                                                                \
-        c0 += 4;                                                \
-        c1 += 4;                                                \
-        c2 += 4;                                                \
-        c3 += 4;                                                \
-      }                                                         \
-      break;                                                    \
-    }                                                           \
-  }                                                             \
-}
-
-/***************************************************************/
-#define COLOR_CUBE_S16_3_SEARCH( TABLE_TYPE, SHIFT, STEP )                 \
-{                                                                          \
-  const mlib_s16 *c0, *c1, *c2;                                            \
-  mlib_s32 bits = s->bits;                                                 \
-  mlib_s32 nbits = 16 - bits;                                              \
-  mlib_s32 mask = ~( ( 1 << nbits ) - 1 );                                 \
-  TABLE_TYPE *table = s->table;                                            \
-  mlib_s32 j;                                                              \
-                                                                           \
-  c0 = src + SHIFT;                                                        \
-  c1 = src + 1 + SHIFT;                                                    \
-  c2 = src + 2 + SHIFT;                                                    \
-                                                                           \
-  switch( bits )                                                           \
-  {                                                                        \
-    case 1:                                                                \
-    case 2:                                                                \
-    case 3:                                                                \
-    case 4:                                                                \
-    case 5:                                                                \
-    {                                                                      \
-      mlib_s32 bits0 = 16 - bits;                                          \
-      mlib_s32 bits1 = bits0 - bits;                                       \
-      mlib_s32 bits2 = bits1 - bits;                                       \
-                                                                           \
-      for( j = 0; j < length; j++ )                                        \
-      {                                                                    \
-        dst[ j ] = table[ ( ( ( *c0 - MLIB_S16_MIN ) & mask ) >> bits2 ) | \
-          ( ( ( *c1 - MLIB_S16_MIN ) & mask ) >> bits1 ) |                 \
-          ( ( ( *c2 - MLIB_S16_MIN ) & mask ) >> bits0 ) ];                \
-                                                                           \
-        c0 += STEP;                                                        \
-        c1 += STEP;                                                        \
-        c2 += STEP;                                                        \
-      }                                                                    \
-      break;                                                               \
-    }                                                                      \
-    case 6:                                                                \
-    case 7:                                                                \
-    {                                                                      \
-      mlib_s32 bits0 = 16 - bits;                                          \
-      mlib_s32 bits1 = bits0 - bits;                                       \
-      mlib_s32 bits2 = bits * 3 - 16;                                      \
-                                                                           \
-      for( j = 0; j < length; j++ )                                        \
-      {                                                                    \
-        dst[ j ] = table[ ( ( ( *c0 - MLIB_S16_MIN ) & mask ) << bits2 ) | \
-          ( ( ( *c1 - MLIB_S16_MIN ) & mask ) >> bits1 ) |                 \
-          ( ( ( *c2 - MLIB_S16_MIN ) & mask ) >> bits0 ) ];                \
-                                                                           \
-        c0 += STEP;                                                        \
-        c1 += STEP;                                                        \
-        c2 += STEP;                                                        \
-      }                                                                    \
-      break;                                                               \
-    }                                                                      \
-    case 8:                                                                \
-    {                                                                      \
-      for( j = 0; j < length; j++ )                                        \
-      {                                                                    \
-        dst[ j ] = table[ ( ( ( *c0 - MLIB_S16_MIN ) & mask ) << 8 ) |     \
-          ( ( *c1 - MLIB_S16_MIN ) & mask ) |                              \
-          ( ( ( *c2 - MLIB_S16_MIN ) & mask ) >> 8 ) ];                    \
-                                                                           \
-        c0 += STEP;                                                        \
-        c1 += STEP;                                                        \
-        c2 += STEP;                                                        \
-      }                                                                    \
-      break;                                                               \
-    }                                                                      \
-    case 9:                                                                \
-    case 10:                                                               \
-    {                                                                      \
-      mlib_s32 bits0 = 16 - bits;                                          \
-      mlib_s32 bits1 = 2 * bits - 16;                                      \
-      mlib_s32 bits2 = bits1 + bits;                                       \
-                                                                           \
-      for( j = 0; j < length; j++ )                                        \
-      {                                                                    \
-        dst[ j ] = table[ ( ( ( *c0 - MLIB_S16_MIN ) & mask ) << bits2 ) | \
-          ( ( ( *c1 - MLIB_S16_MIN ) & mask ) << bits1 ) |                 \
-          ( ( ( *c2 - MLIB_S16_MIN ) & mask ) >> bits0 ) ];                \
-                                                                           \
-        c0 += STEP;                                                        \
-        c1 += STEP;                                                        \
-        c2 += STEP;                                                        \
-      }                                                                    \
-      break;                                                               \
-    }                                                                      \
-    /* Other cases may not be considered as the table size will be more    \
-       than 2^32 */                                                        \
-  }                                                                        \
-}
-
-/***************************************************************/
-#define COLOR_CUBE_S16_4_SEARCH( TABLE_TYPE )                              \
-{                                                                          \
-  const mlib_s16 *c0, *c1, *c2, *c3;                                       \
-  TABLE_TYPE *table = s->table;                                            \
-  mlib_s32 bits = s->bits;                                                 \
-  mlib_s32 nbits = 16 - bits;                                              \
-  mlib_s32 mask = ~( ( 1 << nbits ) - 1 );                                 \
-  mlib_s32 j;                                                              \
-                                                                           \
-  c0 = src;                                                                \
-  c1 = src + 1;                                                            \
-  c2 = src + 2;                                                            \
-  c3 = src + 3;                                                            \
-                                                                           \
-  switch( bits )                                                           \
-  {                                                                        \
-    case 1:                                                                \
-    case 2:                                                                \
-    case 3:                                                                \
-    {                                                                      \
-      mlib_s32 bits0 = 16 - bits;                                          \
-      mlib_s32 bits1 = bits0 - bits;                                       \
-      mlib_s32 bits2 = bits1 - bits;                                       \
-      mlib_s32 bits3 = bits2 - bits;                                       \
-                                                                           \
-      for( j = 0; j < length; j++ )                                        \
-      {                                                                    \
-        dst[ j ] = table[ ( ( ( *c0 - MLIB_S16_MIN ) & mask ) >> bits3 ) | \
-          ( ( ( *c1 - MLIB_S16_MIN ) & mask ) >> bits2 ) |                 \
-          ( ( ( *c2 - MLIB_S16_MIN ) & mask ) >> bits1 ) |                 \
-          ( ( ( *c3 - MLIB_S16_MIN ) & mask ) >> bits0 ) ];                \
-                                                                           \
-        c0 += 4;                                                           \
-        c1 += 4;                                                           \
-        c2 += 4;                                                           \
-        c3 += 4;                                                           \
-      }                                                                    \
-      break;                                                               \
-    }                                                                      \
-    case 4:                                                                \
-    {                                                                      \
-      for( j = 0; j < length; j++ )                                        \
-      {                                                                    \
-        dst[ j ] = table[ ( ( *c0 - MLIB_S16_MIN ) & mask ) |              \
-          ( ( ( *c1 - MLIB_S16_MIN ) & mask ) >> 4 ) |                     \
-          ( ( ( *c2 - MLIB_S16_MIN ) & mask ) >> 8 ) |                     \
-          ( ( ( *c3 - MLIB_S16_MIN ) & mask ) >> 12 ) ];                   \
-                                                                           \
-        c0 += 4;                                                           \
-        c1 += 4;                                                           \
-        c2 += 4;                                                           \
-        c3 += 4;                                                           \
-      }                                                                    \
-      break;                                                               \
-    }                                                                      \
-    case 5:                                                                \
-    {                                                                      \
-      for( j = 0; j < length; j++ )                                        \
-      {                                                                    \
-        dst[ j ] = table[ ( ( ( *c0 - MLIB_S16_MIN ) & mask ) << 4 ) |     \
-          ( ( ( *c1 - MLIB_S16_MIN ) & mask ) >> 1 ) |                     \
-          ( ( ( *c2 - MLIB_S16_MIN ) & mask ) >> 6 ) |                     \
-          ( ( ( *c3 - MLIB_S16_MIN ) & mask ) >> 11 ) ];                   \
-                                                                           \
-        c0 += 4;                                                           \
-        c1 += 4;                                                           \
-        c2 += 4;                                                           \
-        c3 += 4;                                                           \
-      }                                                                    \
-      break;                                                               \
-    }                                                                      \
-    case 6:                                                                \
-    case 7:                                                                \
-    {                                                                      \
-      mlib_s32 bits0 = 16 - bits;                                          \
-      mlib_s32 bits1 = bits0 - bits;                                       \
-      mlib_s32 bits3 = bits * 4 - 16;                                      \
-      mlib_s32 bits2 = bits3 - bits;                                       \
-                                                                           \
-      for( j = 0; j < length; j++ )                                        \
-      {                                                                    \
-        dst[ j ] = table[ ( ( ( *c0 - MLIB_S16_MIN ) & mask ) << bits3 ) | \
-          ( ( ( *c1 - MLIB_S16_MIN ) & mask ) << bits2 ) |                 \
-          ( ( ( *c2 - MLIB_S16_MIN ) & mask ) >> bits1 ) |                 \
-          ( ( ( *c3 - MLIB_S16_MIN ) & mask ) >> bits0 ) ];                \
-                                                                           \
-        c0 += 4;                                                           \
-        c1 += 4;                                                           \
-        c2 += 4;                                                           \
-        c3 += 4;                                                           \
-      }                                                                    \
-      break;                                                               \
-    }                                                                      \
-    case 8:                                                                \
-    {                                                                      \
-      for( j = 0; j < length; j++ )                                        \
-      {                                                                    \
-        dst[ j ] = table[ ( ( ( *c0 - MLIB_S16_MIN ) & mask ) << 16 ) |    \
-          ( ( ( *c1 - MLIB_S16_MIN ) & mask ) << 8 ) |                     \
-          ( ( *c2 - MLIB_S16_MIN ) & mask ) |                              \
-          ( ( ( *c3 - MLIB_S16_MIN ) & mask ) >> 8 ) ];                    \
-                                                                           \
-        c0 += 4;                                                           \
-        c1 += 4;                                                           \
-        c2 += 4;                                                           \
-        c3 += 4;                                                           \
-      }                                                                    \
-      break;                                                               \
-    }                                                                      \
-    /* Other cases may not be considered as the table size will be more    \
-       than 2^32 */                                                        \
-  }                                                                        \
-}
-
-/***************************************************************/
-#define BINARY_TREE_SEARCH_RIGHT( POSITION, COLOR_MAX, SHIFT )  \
-{                                                               \
-  if( ( distance >= ( ( ( position[ POSITION ] + current_size - \
-    c[ POSITION ] ) * ( position[ POSITION ] + current_size -   \
-    c[ POSITION ] ) ) >> SHIFT ) ) &&                           \
-    ( position[ POSITION ] + current_size != COLOR_MAX ) )      \
-    continue_up = 1;                                            \
-}
-
-/***************************************************************/
-#define BINARY_TREE_EXPLORE_RIGHT_3( POSITION, COLOR_MAX, IMAGE_TYPE,    \
-  FIRST_NEIBOUR, SECOND_NEIBOUR, SUBSTRACTION, SHIFT )                   \
-{                                                                        \
-  if( distance >= ( ( ( position[ POSITION ] + current_size -            \
-    c[ POSITION ] ) * ( position[ POSITION ] +                           \
-      current_size - c[ POSITION ] ) ) >> SHIFT ) )                      \
-  {                                                                      \
-    if( distance < ( ( ( COLOR_MAX - c[ POSITION ] ) *                   \
-      ( COLOR_MAX - c[ POSITION ] ) ) >> SHIFT ) )                       \
-    {                                                                    \
-      if( distance < ( ( ( position[ POSITION ] +                        \
-        current_size * 2 - c[ POSITION ] ) *                             \
-        ( position[ POSITION ] + current_size * 2 -                      \
-          c[ POSITION ] ) ) >> SHIFT ) )                                 \
-      {                                                                  \
-        /* Check only a part of quadrant */                              \
-        mlib_s32 qq = q ^ ( 1 << POSITION );                             \
-                                                                         \
-        check_neibours[ FIRST_NEIBOUR ] += 1;                            \
-        check_neibours[ SECOND_NEIBOUR ] += 1;                           \
-        check_corner += 1;                                               \
-        if( node->tag & ( 1 << qq ) )                                    \
-        {                                                                \
-          /* Here is another color cell.                                 \
-             Check the distance */                                       \
-          mlib_s32 new_found_color =                                     \
-            node->contents.index[ qq ];                                  \
-          mlib_u32 newdistance = FIND_DISTANCE_3( c[ 0 ],                \
-            p[ 0 ][ new_found_color ] - SUBSTRACTION, c[ 1 ],            \
-            p[ 1 ][ new_found_color ] - SUBSTRACTION, c[ 2 ],            \
-            p[ 2 ][ new_found_color ] - SUBSTRACTION, SHIFT );           \
-                                                                         \
-          if( newdistance < distance )                                   \
-          {                                                              \
-            found_color = new_found_color;                               \
-            distance = newdistance;                                      \
-          }                                                              \
-        }                                                                \
-        else if( node->contents.quadrants[ qq ] )                        \
-          /* Only a part of quadrant needs checking */                   \
-          distance =                                                     \
-            mlib_search_quadrant_part_to_left_##IMAGE_TYPE##_3(          \
-              node->contents.quadrants[ qq ],                            \
-              distance, &found_color, c, p,                              \
-              position[ POSITION ] + current_size, pass - 1, POSITION ); \
-      }                                                                  \
-      else /* Check whole quadrant */                                    \
-      {                                                                  \
-        mlib_s32 qq = q ^ ( 1 << POSITION );                             \
-                                                                         \
-        check_neibours[ FIRST_NEIBOUR ] += 2;                            \
-        check_neibours[ SECOND_NEIBOUR ] += 2;                           \
-        check_corner += 2;                                               \
-        continue_up = 1;                                                 \
-        if( node->tag & ( 1 << qq ) )                                    \
-        {                                                                \
-          /* Here is another color cell.                                 \
-             Check the distance */                                       \
-          mlib_s32 new_found_color =                                     \
-            node->contents.index[ qq ];                                  \
-          mlib_u32 newdistance = FIND_DISTANCE_3( c[ 0 ],                \
-            p[ 0 ][ new_found_color ] - SUBSTRACTION, c[ 1 ],            \
-            p[ 1 ][ new_found_color ] - SUBSTRACTION, c[ 2 ],            \
-            p[ 2 ][ new_found_color ] - SUBSTRACTION, SHIFT );           \
-                                                                         \
-          if( newdistance < distance )                                   \
-          {                                                              \
-            found_color = new_found_color;                               \
-            distance = newdistance;                                      \
-          }                                                              \
-        }                                                                \
-        else if( node->contents.quadrants[ qq ] )                        \
-          /* Here is a full node. Just explore it */                     \
-          distance = mlib_search_quadrant_##IMAGE_TYPE##_3(              \
-            node->contents.quadrants[ qq ],                              \
-            distance, &found_color, c[ 0 ], c[ 1 ], c[ 2 ], p );         \
-      }                                                                  \
-    }                                                                    \
-    else /* Cell is on the edge of the space */                          \
-    {                                                                    \
-      if( position[ POSITION ] + current_size * 2 ==                     \
-        COLOR_MAX )                                                      \
-      {                                                                  \
-        /* Check only a part of quadrant */                              \
-        mlib_s32 qq = q ^ ( 1 << POSITION );                             \
-                                                                         \
-        check_neibours[ FIRST_NEIBOUR ] += 1;                            \
-        check_neibours[ SECOND_NEIBOUR ] += 1;                           \
-        check_corner += 1;                                               \
-        if( node->tag & ( 1 << qq ) )                                    \
-        {                                                                \
-          /* Here is another color cell.                                 \
-             Check the distance */                                       \
-          mlib_s32 new_found_color =                                     \
-            node->contents.index[ qq ];                                  \
-          mlib_u32 newdistance = FIND_DISTANCE_3( c[ 0 ],                \
-            p[ 0 ][ new_found_color ] - SUBSTRACTION, c[ 1 ],            \
-            p[ 1 ][ new_found_color ] - SUBSTRACTION, c[ 2 ],            \
-            p[ 2 ][ new_found_color ] - SUBSTRACTION, SHIFT );           \
-                                                                         \
-          if( newdistance < distance )                                   \
-          {                                                              \
-            found_color = new_found_color;                               \
-            distance = newdistance;                                      \
-          }                                                              \
-        }                                                                \
-        else if( node->contents.quadrants[ qq ] )                        \
-          /* Only a part of quadrant needs checking */                   \
-          distance =                                                     \
-            mlib_search_quadrant_part_to_left_##IMAGE_TYPE##_3(          \
-              node->contents.quadrants[ qq ],                            \
-              distance, &found_color, c, p,                              \
-              position[ POSITION ] + current_size,                       \
-              pass - 1, POSITION );                                      \
-      }                                                                  \
-      else /* Check whole quadrant */                                    \
-      {                                                                  \
-        mlib_s32 qq = q ^ ( 1 << POSITION );                             \
-                                                                         \
-        check_neibours[ FIRST_NEIBOUR ] += 2;                            \
-        check_neibours[ SECOND_NEIBOUR ] += 2;                           \
-        check_corner += 2;                                               \
-        continue_up = 1;                                                 \
-        if( node->tag & ( 1 << qq ) )                                    \
-        {                                                                \
-          /* Here is another color cell.                                 \
-             Check the distance */                                       \
-          mlib_s32 new_found_color =                                     \
-            node->contents.index[ qq ];                                  \
-          mlib_u32 newdistance = FIND_DISTANCE_3( c[ 0 ],                \
-            p[ 0 ][ new_found_color ] - SUBSTRACTION, c[ 1 ],            \
-            p[ 1 ][ new_found_color ] - SUBSTRACTION, c[ 2 ],            \
-            p[ 2 ][ new_found_color ] - SUBSTRACTION, SHIFT );           \
-                                                                         \
-          if( newdistance < distance )                                   \
-          {                                                              \
-            found_color = new_found_color;                               \
-            distance = newdistance;                                      \
-          }                                                              \
-        }                                                                \
-        else if( node->contents.quadrants[ qq ] )                        \
-          /* Here is a full node. Just explore it */                     \
-          distance = mlib_search_quadrant_##IMAGE_TYPE##_3(              \
-            node->contents.quadrants[ qq ],                              \
-            distance, &found_color, c[ 0 ], c[ 1 ], c[ 2 ], p );         \
-      }                                                                  \
-    }                                                                    \
-  }                                                                      \
-}
-
-/***************************************************************/
-#define BINARY_TREE_EXPLORE_RIGHT_4( POSITION, COLOR_MAX, IMAGE_TYPE,    \
-  FIRST_NEIBOUR, SECOND_NEIBOUR, THIRD_NEIBOUR, SUBSTRACTION, SHIFT )    \
-{                                                                        \
-  if( distance >= ( ( ( position[ POSITION ] + current_size -            \
-    c[ POSITION ] ) * ( position[ POSITION ] +                           \
-      current_size - c[ POSITION ] ) ) >> SHIFT ) )                      \
-  {                                                                      \
-    if( distance < ( ( ( COLOR_MAX - c[ POSITION ] ) *                   \
-      ( COLOR_MAX - c[ POSITION ] ) ) >> SHIFT ) )                       \
-    {                                                                    \
-      if( distance < ( ( ( position[ POSITION ] +                        \
-        current_size * 2 - c[ POSITION ] ) *                             \
-        ( position[ POSITION ] + current_size * 2 -                      \
-          c[ POSITION ] ) ) >> SHIFT ) )                                 \
-      {                                                                  \
-        /* Check only a part of quadrant */                              \
-        mlib_s32 qq = q ^ ( 1 << POSITION );                             \
-                                                                         \
-        check_neibours[ FIRST_NEIBOUR ] += 1;                            \
-        check_neibours[ SECOND_NEIBOUR ] += 1;                           \
-        check_neibours[ THIRD_NEIBOUR ] += 1;                            \
-        if( node->tag & ( 1 << qq ) )                                    \
-        {                                                                \
-          /* Here is another color cell.                                 \
-             Check the distance */                                       \
-          mlib_s32 new_found_color =                                     \
-            node->contents.index[ qq ];                                  \
-          mlib_u32 newdistance = FIND_DISTANCE_4( c[ 0 ],                \
-            p[ 0 ][ new_found_color ] - SUBSTRACTION, c[ 1 ],            \
-            p[ 1 ][ new_found_color ] - SUBSTRACTION, c[ 2 ],            \
-            p[ 2 ][ new_found_color ] - SUBSTRACTION, c[ 3 ],            \
-            p[ 3 ][ new_found_color ] - SUBSTRACTION, SHIFT );           \
-                                                                         \
-          if( newdistance < distance )                                   \
-          {                                                              \
-            found_color = new_found_color;                               \
-            distance = newdistance;                                      \
-          }                                                              \
-        }                                                                \
-        else if( node->contents.quadrants[ qq ] )                        \
-          /* Only a part of quadrant needs checking */                   \
-          distance =                                                     \
-            mlib_search_quadrant_part_to_left_##IMAGE_TYPE##_4(          \
-              node->contents.quadrants[ qq ],                            \
-              distance, &found_color, c, p,                              \
-              position[ POSITION ] + current_size, pass - 1, POSITION ); \
-      }                                                                  \
-      else /* Check whole quadrant */                                    \
-      {                                                                  \
-        mlib_s32 qq = q ^ ( 1 << POSITION );                             \
-                                                                         \
-        check_neibours[ FIRST_NEIBOUR ] += 2;                            \
-        check_neibours[ SECOND_NEIBOUR ] += 2;                           \
-        check_neibours[ THIRD_NEIBOUR ] += 2;                            \
-        continue_up = 1;                                                 \
-        if( node->tag & ( 1 << qq ) )                                    \
-        {                                                                \
-          /* Here is another color cell.                                 \
-             Check the distance */                                       \
-          mlib_s32 new_found_color =                                     \
-            node->contents.index[ qq ];                                  \
-          mlib_u32 newdistance = FIND_DISTANCE_4( c[ 0 ],                \
-            p[ 0 ][ new_found_color ] - SUBSTRACTION, c[ 1 ],            \
-            p[ 1 ][ new_found_color ] - SUBSTRACTION, c[ 2 ],            \
-            p[ 2 ][ new_found_color ] - SUBSTRACTION, c[ 3 ],            \
-            p[ 3 ][ new_found_color ] - SUBSTRACTION, SHIFT );           \
-                                                                         \
-          if( newdistance < distance )                                   \
-          {                                                              \
-            found_color = new_found_color;                               \
-            distance = newdistance;                                      \
-          }                                                              \
-        }                                                                \
-        else if( node->contents.quadrants[ qq ] )                        \
-          /* Here is a full node. Just explore it */                     \
-          distance = mlib_search_quadrant_##IMAGE_TYPE##_4(              \
-            node->contents.quadrants[ qq ],                              \
-            distance, &found_color, c[ 0 ], c[ 1 ], c[ 2 ], c[ 3 ], p ); \
-      }                                                                  \
-    }                                                                    \
-    else /* Cell is on the edge of the space */                          \
-    {                                                                    \
-      if( position[ POSITION ] + current_size * 2 ==                     \
-        COLOR_MAX )                                                      \
-      {                                                                  \
-        /* Check only a part of quadrant */                              \
-        mlib_s32 qq = q ^ ( 1 << POSITION );                             \
-                                                                         \
-        check_neibours[ FIRST_NEIBOUR ] += 1;                            \
-        check_neibours[ SECOND_NEIBOUR ] += 1;                           \
-        check_neibours[ THIRD_NEIBOUR ] += 1;                            \
-        if( node->tag & ( 1 << qq ) )                                    \
-        {                                                                \
-          /* Here is another color cell.                                 \
-             Check the distance */                                       \
-          mlib_s32 new_found_color =                                     \
-            node->contents.index[ qq ];                                  \
-          mlib_u32 newdistance = FIND_DISTANCE_4( c[ 0 ],                \
-            p[ 0 ][ new_found_color ] - SUBSTRACTION, c[ 1 ],            \
-            p[ 1 ][ new_found_color ] - SUBSTRACTION, c[ 2 ],            \
-            p[ 2 ][ new_found_color ] - SUBSTRACTION, c[ 3 ],            \
-            p[ 3 ][ new_found_color ] - SUBSTRACTION, SHIFT );           \
-                                                                         \
-          if( newdistance < distance )                                   \
-          {                                                              \
-            found_color = new_found_color;                               \
-            distance = newdistance;                                      \
-          }                                                              \
-        }                                                                \
-        else if( node->contents.quadrants[ qq ] )                        \
-          /* Only a part of quadrant needs checking */                   \
-          distance =                                                     \
-            mlib_search_quadrant_part_to_left_##IMAGE_TYPE##_4(          \
-              node->contents.quadrants[ qq ],                            \
-              distance, &found_color, c, p,                              \
-              position[ POSITION ] + current_size,                       \
-              pass - 1, POSITION );                                      \
-      }                                                                  \
-      else /* Check whole quadrant */                                    \
-      {                                                                  \
-        mlib_s32 qq = q ^ ( 1 << POSITION );                             \
-                                                                         \
-        check_neibours[ FIRST_NEIBOUR ] += 2;                            \
-        check_neibours[ SECOND_NEIBOUR ] += 2;                           \
-        check_neibours[ THIRD_NEIBOUR ] += 2;                            \
-        continue_up = 1;                                                 \
-        if( node->tag & ( 1 << qq ) )                                    \
-        {                                                                \
-          /* Here is another color cell.                                 \
-             Check the distance */                                       \
-          mlib_s32 new_found_color =                                     \
-            node->contents.index[ qq ];                                  \
-          mlib_u32 newdistance = FIND_DISTANCE_4( c[ 0 ],                \
-            p[ 0 ][ new_found_color ] - SUBSTRACTION, c[ 1 ],            \
-            p[ 1 ][ new_found_color ] - SUBSTRACTION, c[ 2 ],            \
-            p[ 2 ][ new_found_color ] - SUBSTRACTION, c[ 3 ],            \
-            p[ 3 ][ new_found_color ] - SUBSTRACTION, SHIFT );           \
-                                                                         \
-          if( newdistance < distance )                                   \
-          {                                                              \
-            found_color = new_found_color;                               \
-            distance = newdistance;                                      \
-          }                                                              \
-        }                                                                \
-        else if( node->contents.quadrants[ qq ] )                        \
-          /* Here is a full node. Just explore it */                     \
-          distance = mlib_search_quadrant_##IMAGE_TYPE##_4(              \
-            node->contents.quadrants[ qq ],                              \
-            distance, &found_color, c[ 0 ], c[ 1 ], c[ 2 ], c[ 3 ], p ); \
-      }                                                                  \
-    }                                                                    \
-  }                                                                      \
-}
-
-/***************************************************************/
-#define BINARY_TREE_SEARCH_LEFT( POSITION, SHIFT )                \
-{                                                                 \
-  if( ( distance > ( ( ( position[ POSITION ] - c[ POSITION ] ) * \
-    ( position[ POSITION ] - c[ POSITION ] ) ) >> SHIFT ) )  &&   \
-    position[ POSITION ] )                                        \
-    continue_up = 1;                                              \
-}
-
-/***************************************************************/
-#define BINARY_TREE_EXPLORE_LEFT_3( POSITION, IMAGE_TYPE,                \
-  FIRST_NEIBOUR, SECOND_NEIBOUR, SUBSTRACTION, SHIFT )                   \
-{                                                                        \
-  if( distance >                                                         \
-    ( ( ( c[ POSITION ] - position[ POSITION ] ) *                       \
-    ( c[ POSITION ] - position[ POSITION ] ) ) >> SHIFT ) )              \
-  {                                                                      \
-    if( distance <= ( ( c[ POSITION ] * c[ POSITION ] ) >> SHIFT ) )     \
-    {                                                                    \
-      if( distance <= ( ( ( c[ POSITION ] + current_size -               \
-        position[ POSITION ] ) *                                         \
-        ( c[ POSITION ] + current_size -                                 \
-          position[ POSITION ] ) ) >> SHIFT ) )                          \
-      {                                                                  \
-        mlib_s32 qq = q ^ ( 1 << POSITION );                             \
-                                                                         \
-        check_neibours[ FIRST_NEIBOUR ] += 1;                            \
-        check_neibours[ SECOND_NEIBOUR ] += 1;                           \
-        check_corner += 1;                                               \
-        if( node->tag & ( 1 << qq ) )                                    \
-        {                                                                \
-          /* Here is another color cell.                                 \
-             Check the distance */                                       \
-          mlib_s32 new_found_color =                                     \
-            node->contents.index[ qq ];                                  \
-          mlib_u32 newdistance = FIND_DISTANCE_3( c[ 0 ],                \
-            p[ 0 ][ new_found_color ] - SUBSTRACTION, c[ 1 ],            \
-            p[ 1 ][ new_found_color ] - SUBSTRACTION, c[ 2 ],            \
-            p[ 2 ][ new_found_color ] - SUBSTRACTION, SHIFT );           \
-                                                                         \
-          if( newdistance < distance )                                   \
-          {                                                              \
-            found_color = new_found_color;                               \
-            distance = newdistance;                                      \
-          }                                                              \
-        }                                                                \
-        else if( node->contents.quadrants[ qq ] )                        \
-          /* Only a part of quadrant needs checking */                   \
-          distance =                                                     \
-            mlib_search_quadrant_part_to_right_##IMAGE_TYPE##_3(         \
-              node->contents.quadrants[ qq ],                            \
-              distance, &found_color, c, p,                              \
-              position[ POSITION ] - current_size, pass - 1, POSITION ); \
-      }                                                                  \
-      else /* Check whole quadrant */                                    \
-      {                                                                  \
-        mlib_s32 qq = q ^ ( 1 << POSITION );                             \
-                                                                         \
-        check_neibours[ FIRST_NEIBOUR ] += 2;                            \
-        check_neibours[ SECOND_NEIBOUR ] += 2;                           \
-        check_corner += 2;                                               \
-        continue_up = 1;                                                 \
-        if( node->tag & ( 1 << qq ) )                                    \
-        {                                                                \
-          /* Here is another color cell.                                 \
-             Check the distance */                                       \
-          mlib_s32 new_found_color =                                     \
-            node->contents.index[ qq ];                                  \
-          mlib_u32 newdistance = FIND_DISTANCE_3( c[ 0 ],                \
-            p[ 0 ][ new_found_color ] - SUBSTRACTION, c[ 1 ],            \
-            p[ 1 ][ new_found_color ] - SUBSTRACTION, c[ 2 ],            \
-            p[ 2 ][ new_found_color ] - SUBSTRACTION, SHIFT );           \
-                                                                         \
-          if( newdistance < distance )                                   \
-          {                                                              \
-            found_color = new_found_color;                               \
-            distance = newdistance;                                      \
-          }                                                              \
-        }                                                                \
-        else if( node->contents.quadrants[ qq ] )                        \
-          /* Here is a full node. Just explore it */                     \
-          distance = mlib_search_quadrant_##IMAGE_TYPE##_3(              \
-            node->contents.quadrants[ qq ],                              \
-            distance, &found_color, c[ 0 ], c[ 1 ], c[ 2 ], p );         \
-      }                                                                  \
-    }                                                                    \
-    else                                                                 \
-    {                                                                    \
-      if( !( position[ POSITION ] - current_size ) )                     \
-      {                                                                  \
-        mlib_s32 qq = q ^ ( 1 << POSITION );                             \
-                                                                         \
-        check_neibours[ FIRST_NEIBOUR ] += 1;                            \
-        check_neibours[ SECOND_NEIBOUR ] += 1;                           \
-        check_corner += 1;                                               \
-        if( node->tag & ( 1 << qq ) )                                    \
-        {                                                                \
-          /* Here is another color cell.                                 \
-             Check the distance */                                       \
-          mlib_s32 new_found_color =                                     \
-            node->contents.index[ qq ];                                  \
-          mlib_u32 newdistance = FIND_DISTANCE_3( c[ 0 ],                \
-            p[ 0 ][ new_found_color ] - SUBSTRACTION, c[ 1 ],            \
-            p[ 1 ][ new_found_color ] - SUBSTRACTION, c[ 2 ],            \
-            p[ 2 ][ new_found_color ] - SUBSTRACTION, SHIFT );           \
-                                                                         \
-          if( newdistance < distance )                                   \
-          {                                                              \
-            found_color = new_found_color;                               \
-            distance = newdistance;                                      \
-          }                                                              \
-        }                                                                \
-        else if( node->contents.quadrants[ qq ] )                        \
-          /* Only a part of quadrant needs checking */                   \
-          distance =                                                     \
-            mlib_search_quadrant_part_to_right_##IMAGE_TYPE##_3(         \
-              node->contents.quadrants[ qq ],                            \
-              distance, &found_color, c, p,                              \
-              position[ POSITION ] - current_size, pass - 1, POSITION ); \
-      }                                                                  \
-      else                                                               \
-      {                                                                  \
-        mlib_s32 qq = q ^ ( 1 << POSITION );                             \
-                                                                         \
-        check_neibours[ FIRST_NEIBOUR ] += 2;                            \
-        check_neibours[ SECOND_NEIBOUR ] += 2;                           \
-        check_corner += 2;                                               \
-        continue_up = 1;                                                 \
-        if( node->tag & ( 1 << qq ) )                                    \
-        {                                                                \
-          /* Here is another color cell.                                 \
-             Check the distance */                                       \
-          mlib_s32 new_found_color =                                     \
-            node->contents.index[ qq ];                                  \
-          mlib_u32 newdistance = FIND_DISTANCE_3( c[ 0 ],                \
-            p[ 0 ][ new_found_color ] - SUBSTRACTION, c[ 1 ],            \
-            p[ 1 ][ new_found_color ] - SUBSTRACTION, c[ 2 ],            \
-            p[ 2 ][ new_found_color ] - SUBSTRACTION, SHIFT );           \
-                                                                         \
-          if( newdistance < distance )                                   \
-          {                                                              \
-            found_color = new_found_color;                               \
-            distance = newdistance;                                      \
-          }                                                              \
-        }                                                                \
-        else if( node->contents.quadrants[ qq ] )                        \
-          /* Here is a full node. Just explore it */                     \
-          distance = mlib_search_quadrant_##IMAGE_TYPE##_3(              \
-            node->contents.quadrants[ qq ],                              \
-            distance, &found_color, c[ 0 ], c[ 1 ], c[ 2 ], p );         \
-      }                                                                  \
-    }                                                                    \
-  }                                                                      \
-}
-
-/***************************************************************/
-#define BINARY_TREE_EXPLORE_LEFT_4( POSITION, IMAGE_TYPE,                \
-  FIRST_NEIBOUR, SECOND_NEIBOUR, THIRD_NEIBOUR, SUBSTRACTION, SHIFT )    \
-{                                                                        \
-  if( distance >                                                         \
-    ( ( ( c[ POSITION ] - position[ POSITION ] ) *                       \
-    ( c[ POSITION ] - position[ POSITION ] ) ) >> SHIFT ) )              \
-  {                                                                      \
-    if( distance <= ( ( c[ POSITION ] * c[ POSITION ] ) >> SHIFT ) )     \
-    {                                                                    \
-      if( distance <= ( ( ( c[ POSITION ] + current_size -               \
-        position[ POSITION ] ) *                                         \
-        ( c[ POSITION ] + current_size -                                 \
-          position[ POSITION ] ) ) >> SHIFT ) )                          \
-      {                                                                  \
-        mlib_s32 qq = q ^ ( 1 << POSITION );                             \
-                                                                         \
-        check_neibours[ FIRST_NEIBOUR ] += 1;                            \
-        check_neibours[ SECOND_NEIBOUR ] += 1;                           \
-        check_neibours[ THIRD_NEIBOUR ] += 1;                            \
-        if( node->tag & ( 1 << qq ) )                                    \
-        {                                                                \
-          /* Here is another color cell.                                 \
-             Check the distance */                                       \
-          mlib_s32 new_found_color =                                     \
-            node->contents.index[ qq ];                                  \
-          mlib_u32 newdistance = FIND_DISTANCE_4( c[ 0 ],                \
-            p[ 0 ][ new_found_color ] - SUBSTRACTION, c[ 1 ],            \
-            p[ 1 ][ new_found_color ] - SUBSTRACTION, c[ 2 ],            \
-            p[ 2 ][ new_found_color ] - SUBSTRACTION, c[ 3 ],            \
-            p[ 3 ][ new_found_color ] - SUBSTRACTION, SHIFT );           \
-                                                                         \
-          if( newdistance < distance )                                   \
-          {                                                              \
-            found_color = new_found_color;                               \
-            distance = newdistance;                                      \
-          }                                                              \
-        }                                                                \
-        else if( node->contents.quadrants[ qq ] )                        \
-          /* Only a part of quadrant needs checking */                   \
-          distance =                                                     \
-            mlib_search_quadrant_part_to_right_##IMAGE_TYPE##_4(         \
-              node->contents.quadrants[ qq ],                            \
-              distance, &found_color, c, p,                              \
-              position[ POSITION ] - current_size, pass - 1, POSITION ); \
-      }                                                                  \
-      else /* Check whole quadrant */                                    \
-      {                                                                  \
-        mlib_s32 qq = q ^ ( 1 << POSITION );                             \
-                                                                         \
-        check_neibours[ FIRST_NEIBOUR ] += 2;                            \
-        check_neibours[ SECOND_NEIBOUR ] += 2;                           \
-        check_neibours[ THIRD_NEIBOUR ] += 2;                            \
-        continue_up = 1;                                                 \
-        if( node->tag & ( 1 << qq ) )                                    \
-        {                                                                \
-          /* Here is another color cell.                                 \
-             Check the distance */                                       \
-          mlib_s32 new_found_color =                                     \
-            node->contents.index[ qq ];                                  \
-          mlib_u32 newdistance = FIND_DISTANCE_4( c[ 0 ],                \
-            p[ 0 ][ new_found_color ] - SUBSTRACTION, c[ 1 ],            \
-            p[ 1 ][ new_found_color ] - SUBSTRACTION, c[ 2 ],            \
-            p[ 2 ][ new_found_color ] - SUBSTRACTION, c[ 3 ],            \
-            p[ 3 ][ new_found_color ] - SUBSTRACTION, SHIFT );           \
-                                                                         \
-          if( newdistance < distance )                                   \
-          {                                                              \
-            found_color = new_found_color;                               \
-            distance = newdistance;                                      \
-          }                                                              \
-        }                                                                \
-        else if( node->contents.quadrants[ qq ] )                        \
-          /* Here is a full node. Just explore it */                     \
-          distance = mlib_search_quadrant_##IMAGE_TYPE##_4(              \
-            node->contents.quadrants[ qq ],                              \
-            distance, &found_color, c[ 0 ], c[ 1 ], c[ 2 ], c[ 3 ], p ); \
-      }                                                                  \
-    }                                                                    \
-    else                                                                 \
-    {                                                                    \
-      if( !( position[ POSITION ] - current_size ) )                     \
-      {                                                                  \
-        mlib_s32 qq = q ^ ( 1 << POSITION );                             \
-                                                                         \
-        check_neibours[ FIRST_NEIBOUR ] += 1;                            \
-        check_neibours[ SECOND_NEIBOUR ] += 1;                           \
-        check_neibours[ THIRD_NEIBOUR ] += 1;                            \
-        if( node->tag & ( 1 << qq ) )                                    \
-        {                                                                \
-          /* Here is another color cell.                                 \
-             Check the distance */                                       \
-          mlib_s32 new_found_color =                                     \
-            node->contents.index[ qq ];                                  \
-          mlib_u32 newdistance = FIND_DISTANCE_4( c[ 0 ],                \
-            p[ 0 ][ new_found_color ] - SUBSTRACTION, c[ 1 ],            \
-            p[ 1 ][ new_found_color ] - SUBSTRACTION, c[ 2 ],            \
-            p[ 2 ][ new_found_color ] - SUBSTRACTION, c[ 3 ],            \
-            p[ 3 ][ new_found_color ] - SUBSTRACTION, SHIFT );           \
-                                                                         \
-          if( newdistance < distance )                                   \
-          {                                                              \
-            found_color = new_found_color;                               \
-            distance = newdistance;                                      \
-          }                                                              \
-        }                                                                \
-        else if( node->contents.quadrants[ qq ] )                        \
-          /* Only a part of quadrant needs checking */                   \
-          distance =                                                     \
-            mlib_search_quadrant_part_to_right_##IMAGE_TYPE##_4(         \
-              node->contents.quadrants[ qq ],                            \
-              distance, &found_color, c, p,                              \
-              position[ POSITION ] - current_size, pass - 1, POSITION ); \
-      }                                                                  \
-      else                                                               \
-      {                                                                  \
-        mlib_s32 qq = q ^ ( 1 << POSITION );                             \
-                                                                         \
-        check_neibours[ FIRST_NEIBOUR ] += 2;                            \
-        check_neibours[ SECOND_NEIBOUR ] += 2;                           \
-        check_neibours[ THIRD_NEIBOUR ] += 2;                            \
-        continue_up = 1;                                                 \
-        if( node->tag & ( 1 << qq ) )                                    \
-        {                                                                \
-          /* Here is another color cell.                                 \
-             Check the distance */                                       \
-          mlib_s32 new_found_color =                                     \
-            node->contents.index[ qq ];                                  \
-          mlib_u32 newdistance = FIND_DISTANCE_4( c[ 0 ],                \
-            p[ 0 ][ new_found_color ] - SUBSTRACTION, c[ 1 ],            \
-            p[ 1 ][ new_found_color ] - SUBSTRACTION, c[ 2 ],            \
-            p[ 2 ][ new_found_color ] - SUBSTRACTION, c[ 3 ],            \
-            p[ 3 ][ new_found_color ] - SUBSTRACTION, SHIFT );           \
-                                                                         \
-          if( newdistance < distance )                                   \
-          {                                                              \
-            found_color = new_found_color;                               \
-            distance = newdistance;                                      \
-          }                                                              \
-        }                                                                \
-        else if( node->contents.quadrants[ qq ] )                        \
-          /* Here is a full node. Just explore it */                     \
-          distance = mlib_search_quadrant_##IMAGE_TYPE##_4(              \
-            node->contents.quadrants[ qq ],                              \
-            distance, &found_color, c[ 0 ], c[ 1 ], c[ 2 ], c[ 3 ], p ); \
-      }                                                                  \
-    }                                                                    \
-  }                                                                      \
-}
-
-/***************************************************************/
-#define CHECK_QUADRANT_U8_3( qq )                               \
-{                                                               \
-  if( node->tag & ( 1 << qq ) )                                 \
-  {                                                             \
-    /* Here is another color cell. Check the distance */        \
-    mlib_s32 new_found_color = node->contents.index[ qq ];      \
-    mlib_u32 newdistance = FIND_DISTANCE_3( c[ 0 ],             \
-      p[ 0 ][ new_found_color ], c[ 1 ],                        \
-      p[ 1 ][ new_found_color ], c[ 2 ],                        \
-      p[ 2 ][ new_found_color ], 0 );                           \
-                                                                \
-    if( newdistance < distance )                                \
-    {                                                           \
-      found_color = new_found_color;                            \
-      distance = newdistance;                                   \
-    }                                                           \
-  }                                                             \
-  else if( node->contents.quadrants[ qq ] )                     \
-    /* Here is a full node. Just explore it all */              \
-    distance = mlib_search_quadrant_U8_3(                       \
-      node->contents.quadrants[ qq ], distance, &found_color,   \
-      c[ 0 ], c[ 1 ], c[ 2 ], p );                              \
-/* Else there is just an empty cell */                          \
-}
-
-/***************************************************************/
-#define CHECK_QUADRANT_S16_3( qq )                              \
-{                                                               \
-  if( node->tag & ( 1 << qq ) )                                 \
-  {                                                             \
-    /* Here is another color cell. Check the distance */        \
-    mlib_s32 new_found_color = node->contents.index[ qq ];      \
-    mlib_u32 palc0, palc1, palc2, newdistance;                  \
-                                                                \
-    palc0 = p[ 0 ][ new_found_color ] - MLIB_S16_MIN;           \
-    palc1 = p[ 1 ][ new_found_color ] - MLIB_S16_MIN;           \
-    palc2 = p[ 2 ][ new_found_color ] - MLIB_S16_MIN;           \
-                                                                \
-    newdistance = FIND_DISTANCE_3( c[ 0 ], palc0,               \
-      c[ 1 ], palc1,                                            \
-      c[ 2 ], palc2, 2 );                                       \
-                                                                \
-    if( newdistance < distance )                                \
-    {                                                           \
-      found_color = new_found_color;                            \
-      distance = newdistance;                                   \
-    }                                                           \
-  }                                                             \
-  else if( node->contents.quadrants[ qq ] )                     \
-    /* Here is a full node. Just explore it all */              \
-    distance = mlib_search_quadrant_S16_3(                      \
-      node->contents.quadrants[ qq ], distance, &found_color,   \
-      c[ 0 ], c[ 1 ], c[ 2 ], p );                              \
-/* Else there is just an empty cell */                          \
-}
-
-/***************************************************************/
-#define BINARY_TREE_SEARCH_3( SOURCE_IMAGE, POINTER_TYPE, BITS,              \
-  COLOR_MAX, SUBTRACTION, POINTER_SHIFT, STEP, SHIFT )                       \
-{                                                                            \
-  const POINTER_TYPE *channels[ 3 ], *p[ 3 ];                                \
-  mlib_u32 c[ 3 ];                                                           \
-  mlib_s32 j;                                                                \
-                                                                             \
-  p[ 0 ] = s->lut[ 0 ];                                                      \
-  p[ 1 ] = s->lut[ 1 ];                                                      \
-  p[ 2 ] = s->lut[ 2 ];                                                      \
-  channels[ 0 ] = src + POINTER_SHIFT;                                       \
-  channels[ 1 ] = src + 1 + POINTER_SHIFT;                                   \
-  channels[ 2 ] = src + 2 + POINTER_SHIFT;                                   \
-                                                                             \
-  for( j = 0; j < length; j++ )                                              \
-  {                                                                          \
-    mlib_s32 pass = BITS - 1;                                                \
-    mlib_u32 position[ 3 ] = { 0, 0, 0 };                                    \
-    mlib_s32 we_found_it = 0;                                                \
-    struct lut_node_3 *node = s->table;                                      \
-    /* Stack pointer pointers to the first free element of stack. */         \
-    /* The node we are in is in the `node' */                                \
-    struct                                                                   \
-    {                                                                        \
-      struct lut_node_3 *node;                                               \
-      mlib_s32 q;                                                            \
-    } stack[ BITS ];                                                         \
-    mlib_s32 stack_pointer = 0;                                              \
-                                                                             \
-    c[ 0 ] = *channels[ 0 ] - SUBTRACTION;                                   \
-    c[ 1 ] = *channels[ 1 ] - SUBTRACTION;                                   \
-    c[ 2 ] = *channels[ 2 ] - SUBTRACTION;                                   \
-                                                                             \
-    do                                                                       \
-    {                                                                        \
-      mlib_s32 q;                                                            \
-      mlib_u32 current_size = 1 << pass;                                     \
-                                                                             \
-      q = ( ( c[ 0 ] >> pass ) & 1 ) |                                       \
-        ( ( ( c[ 1 ] << 1 ) >> pass ) & 2 ) |                                \
-        ( ( ( c[ 2 ] << 2 ) >> pass ) & 4 );                                 \
-                                                                             \
-      position[ 0 ] |= c[ 0 ] & current_size;                                \
-      position[ 1 ] |= c[ 1 ] & current_size;                                \
-      position[ 2 ] |= c[ 2 ] & current_size;                                \
-                                                                             \
-      if( node->tag & ( 1 << q ) )                                           \
-      {                                                                      \
-        /*                                                                   \
-          Here is a cell with one color. We need to be sure it's             \
-          the one that is the closest to our color                           \
-        */                                                                   \
-        mlib_s32 palindex = node->contents.index[ q ];                       \
-        mlib_u32 palc[ 3 ];                                                  \
-        mlib_s32 identical;                                                  \
-                                                                             \
-        palc[ 0 ] = p[ 0 ][ palindex ] - SUBTRACTION;                        \
-        palc[ 1 ] = p[ 1 ][ palindex ] - SUBTRACTION;                        \
-        palc[ 2 ] = p[ 2 ][ palindex ] - SUBTRACTION;                        \
-                                                                             \
-        identical = ( palc[ 0 ] - c[ 0 ] ) | ( palc[ 1 ] - c[ 1 ] ) |        \
-          ( palc[ 2 ] - c[ 2 ] );                                            \
-                                                                             \
-        if( !identical || BITS - pass == bits )                              \
-        {                                                                    \
-          /* Oh, here it is :) */                                            \
-          dst[ j ] = palindex + s->offset;                                   \
-          we_found_it = 1;                                                   \
-        }                                                                    \
-        else                                                                 \
-        {                                                                    \
-          mlib_u32 distance;                                                 \
-          /* First index is the channel, second is the number of the         \
-             side */                                                         \
-          mlib_s32 found_color;                                              \
-          mlib_s32 continue_up;                                              \
-                                                                             \
-          distance = FIND_DISTANCE_3( c[ 0 ], palc[ 0 ],                     \
-            c[ 1 ], palc[ 1 ], c[ 2 ], palc[ 2 ], SHIFT );                   \
-          found_color = palindex;                                            \
-                                                                             \
-          do                                                                 \
-          {                                                                  \
-            mlib_s32 check_corner;                                           \
-                                                                             \
-            /*                                                               \
-              Neibours are enumerated in a cicle:                            \
-              0 - between quadrants 0 and 1,                                 \
-              1 - between quadrants 1 and 2 and                              \
-              2 - between quadrants 2 and 0                                  \
-            */                                                               \
-            mlib_s32 check_neibours[ 3 ];                                    \
-                                                                             \
-            /*                                                               \
-              Others are three two neibour quadrants                         \
-                                                                             \
-              Side number is [ <number of the coordinate >][ <the bit        \
-              in the quadrant number of the corner, corresponding to         \
-              this coordinate> ], e.g. 2 is 0..010b, so the sides it has     \
-              near are:                                                      \
-              [ 0 (coordinate number) ][ 0 (bit 0 in the number) ]           \
-              [ 1 (coordinate number) ][ 1 (bit 1 in the number) ]           \
-                                                                             \
-              Now we can look in the three nearest quadrants. Do             \
-              we really need it ? Check it.                                  \
-            */                                                               \
-                                                                             \
-            check_corner = check_neibours[ 0 ] = check_neibours[ 1 ] =       \
-              check_neibours[ 2 ] = 0;                                       \
-            continue_up = 0;                                                 \
-                                                                             \
-            if( q & 1 )                                                      \
-            {                                                                \
-              BINARY_TREE_EXPLORE_LEFT_3( 0, SOURCE_IMAGE, 2, 0,             \
-                SUBTRACTION, SHIFT );                                        \
-            }                                                                \
-            else                                                             \
-            {                                                                \
-              BINARY_TREE_EXPLORE_RIGHT_3( 0, COLOR_MAX, SOURCE_IMAGE, 2, 0, \
-                SUBTRACTION, SHIFT );                                        \
-            }                                                                \
-                                                                             \
-            if( q & 2 )                                                      \
-            {                                                                \
-              BINARY_TREE_EXPLORE_LEFT_3( 1, SOURCE_IMAGE, 0, 1,             \
-                SUBTRACTION, SHIFT );                                        \
-            }                                                                \
-            else                                                             \
-            {                                                                \
-              BINARY_TREE_EXPLORE_RIGHT_3( 1, COLOR_MAX, SOURCE_IMAGE, 0, 1, \
-                SUBTRACTION, SHIFT );                                        \
-            }                                                                \
-                                                                             \
-            if( q & 4 )                                                      \
-            {                                                                \
-              BINARY_TREE_EXPLORE_LEFT_3( 2, SOURCE_IMAGE, 1, 2,             \
-                SUBTRACTION, SHIFT );                                        \
-            }                                                                \
-            else                                                             \
-            {                                                                \
-              BINARY_TREE_EXPLORE_RIGHT_3( 2, COLOR_MAX, SOURCE_IMAGE, 1, 2, \
-                SUBTRACTION, SHIFT );                                        \
-            }                                                                \
-                                                                             \
-            if( check_neibours[ 0 ] >= 2 )                                   \
-            {                                                                \
-              mlib_s32 qq = q ^ 3;                                           \
-              CHECK_QUADRANT_##SOURCE_IMAGE##_3( qq );                       \
-            }                                                                \
-                                                                             \
-            if( check_neibours[ 1 ] >= 2 )                                   \
-            {                                                                \
-              mlib_s32 qq = q ^ 6;                                           \
-              CHECK_QUADRANT_##SOURCE_IMAGE##_3( qq );                       \
-            }                                                                \
-                                                                             \
-            if( check_neibours[ 2 ] >= 2 )                                   \
-            {                                                                \
-              mlib_s32 qq = q ^ 5;                                           \
-              CHECK_QUADRANT_##SOURCE_IMAGE##_3( qq );                       \
-            }                                                                \
-                                                                             \
-            if( check_corner >= 3 )                                          \
-            {                                                                \
-              mlib_s32 qq = q ^ 7;                                           \
-              CHECK_QUADRANT_##SOURCE_IMAGE##_3( qq );                       \
-            }                                                                \
-                                                                             \
-            if( q & 1 )                                                      \
-            {                                                                \
-              BINARY_TREE_SEARCH_RIGHT( 0, COLOR_MAX, SHIFT );               \
-            }                                                                \
-            else                                                             \
-            {                                                                \
-              BINARY_TREE_SEARCH_LEFT( 0, SHIFT );                           \
-            }                                                                \
-                                                                             \
-            if( q & 2 )                                                      \
-            {                                                                \
-              BINARY_TREE_SEARCH_RIGHT( 1, COLOR_MAX, SHIFT );               \
-            }                                                                \
-            else                                                             \
-            {                                                                \
-              BINARY_TREE_SEARCH_LEFT( 1, SHIFT );                           \
-            }                                                                \
-                                                                             \
-            if( q & 4 )                                                      \
-            {                                                                \
-              BINARY_TREE_SEARCH_RIGHT( 2, COLOR_MAX, SHIFT );               \
-            }                                                                \
-            else                                                             \
-            {                                                                \
-              BINARY_TREE_SEARCH_LEFT( 2, SHIFT );                           \
-            }                                                                \
-                                                                             \
-            position[ 0 ] &= ~( c[ 0 ] & current_size );                     \
-            position[ 1 ] &= ~( c[ 1 ] & current_size );                     \
-            position[ 2 ] &= ~( c[ 2 ] & current_size );                     \
-                                                                             \
-            current_size <<= 1;                                              \
-                                                                             \
-            pass++;                                                          \
-                                                                             \
-            stack_pointer--;                                                 \
-            q = stack[ stack_pointer ].q;                                    \
-            node = stack[ stack_pointer ].node;                              \
-          } while( continue_up );                                            \
-                                                                             \
-          dst[ j ] = found_color + s->offset;                                \
-                                                                             \
-          we_found_it = 1;                                                   \
-        }                                                                    \
-      }                                                                      \
-      else if( node->contents.quadrants[ q ] )                               \
-      {                                                                      \
-        /* Descend one level */                                              \
-        stack[ stack_pointer ].node = node;                                  \
-        stack[ stack_pointer++ ].q = q;                                      \
-        node = node->contents.quadrants[ q ];                                \
-      }                                                                      \
-      else                                                                   \
-      {                                                                      \
-        /* Found the empty quadrant. Look around */                          \
-        mlib_u32 distance = MLIB_U32_MAX;                                    \
-        mlib_s32 found_color;                                                \
-        mlib_s32 continue_up;                                                \
-                                                                             \
-        /*                                                                   \
-          As we had come to this level, it is warranted that there           \
-          are other points on this level near the empty quadrant             \
-        */                                                                   \
-        do                                                                   \
-        {                                                                    \
-          mlib_s32 check_corner;                                             \
-          mlib_s32 check_neibours[ 3 ];                                      \
-                                                                             \
-          check_corner = check_neibours[ 0 ] = check_neibours[ 1 ] =         \
-            check_neibours[ 2 ] = 0;                                         \
-          continue_up = 0;                                                   \
-                                                                             \
-          if( q & 1 )                                                        \
-          {                                                                  \
-            BINARY_TREE_EXPLORE_LEFT_3( 0, SOURCE_IMAGE, 2, 0,               \
-              SUBTRACTION, SHIFT );                                          \
-          }                                                                  \
-          else                                                               \
-          {                                                                  \
-            BINARY_TREE_EXPLORE_RIGHT_3( 0, COLOR_MAX, SOURCE_IMAGE, 2, 0,   \
-              SUBTRACTION, SHIFT );                                          \
-          }                                                                  \
-                                                                             \
-          if( q & 2 )                                                        \
-          {                                                                  \
-            BINARY_TREE_EXPLORE_LEFT_3( 1, SOURCE_IMAGE, 0, 1,               \
-              SUBTRACTION, SHIFT );                                          \
-          }                                                                  \
-          else                                                               \
-          {                                                                  \
-            BINARY_TREE_EXPLORE_RIGHT_3( 1, COLOR_MAX, SOURCE_IMAGE, 0, 1,   \
-              SUBTRACTION, SHIFT );                                          \
-          }                                                                  \
-                                                                             \
-          if( q & 4 )                                                        \
-          {                                                                  \
-            BINARY_TREE_EXPLORE_LEFT_3( 2, SOURCE_IMAGE, 1, 2,               \
-              SUBTRACTION, SHIFT );                                          \
-          }                                                                  \
-          else                                                               \
-          {                                                                  \
-            BINARY_TREE_EXPLORE_RIGHT_3( 2, COLOR_MAX, SOURCE_IMAGE, 1, 2,   \
-              SUBTRACTION, SHIFT );                                          \
-          }                                                                  \
-                                                                             \
-          if( check_neibours[ 0 ] >= 2 )                                     \
-          {                                                                  \
-            mlib_s32 qq = q ^ 3;                                             \
-            CHECK_QUADRANT_##SOURCE_IMAGE##_3( qq );                         \
-          }                                                                  \
-                                                                             \
-          if( check_neibours[ 1 ] >= 2 )                                     \
-          {                                                                  \
-            mlib_s32 qq = q ^ 6;                                             \
-            CHECK_QUADRANT_##SOURCE_IMAGE##_3( qq );                         \
-          }                                                                  \
-                                                                             \
-          if( check_neibours[ 2 ] >= 2 )                                     \
-          {                                                                  \
-            mlib_s32 qq = q ^ 5;                                             \
-            CHECK_QUADRANT_##SOURCE_IMAGE##_3( qq );                         \
-          }                                                                  \
-                                                                             \
-          if( check_corner >= 3 )                                            \
-          {                                                                  \
-            mlib_s32 qq = q ^ 7;                                             \
-            CHECK_QUADRANT_##SOURCE_IMAGE##_3( qq );                         \
-          }                                                                  \
-                                                                             \
-          if( q & 1 )                                                        \
-          {                                                                  \
-            BINARY_TREE_SEARCH_RIGHT( 0, COLOR_MAX, SHIFT );                 \
-          }                                                                  \
-          else                                                               \
-          {                                                                  \
-            BINARY_TREE_SEARCH_LEFT( 0, SHIFT );                             \
-          }                                                                  \
-                                                                             \
-          if( q & 2 )                                                        \
-          {                                                                  \
-            BINARY_TREE_SEARCH_RIGHT( 1, COLOR_MAX, SHIFT );                 \
-          }                                                                  \
-          else                                                               \
-          {                                                                  \
-            BINARY_TREE_SEARCH_LEFT( 1, SHIFT );                             \
-          }                                                                  \
-                                                                             \
-          if( q & 4 )                                                        \
-          {                                                                  \
-            BINARY_TREE_SEARCH_RIGHT( 2, COLOR_MAX, SHIFT );                 \
-          }                                                                  \
-          else                                                               \
-          {                                                                  \
-            BINARY_TREE_SEARCH_LEFT( 2, SHIFT );                             \
-          }                                                                  \
-                                                                             \
-          position[ 0 ] &= ~( c[ 0 ] & current_size );                       \
-          position[ 1 ] &= ~( c[ 1 ] & current_size );                       \
-          position[ 2 ] &= ~( c[ 2 ] & current_size );                       \
-                                                                             \
-          current_size <<= 1;                                                \
-                                                                             \
-          pass++;                                                            \
-                                                                             \
-          stack_pointer--;                                                   \
-          q = stack[ stack_pointer ].q;                                      \
-          node = stack[ stack_pointer ].node;                                \
-        } while( continue_up );                                              \
-                                                                             \
-        dst[ j ] = found_color + s->offset;                                  \
-        we_found_it = 1;                                                     \
-      }                                                                      \
-                                                                             \
-      pass--;                                                                \
-                                                                             \
-    } while( !we_found_it );                                                 \
-                                                                             \
-    channels[ 0 ] += STEP;                                                   \
-    channels[ 1 ] += STEP;                                                   \
-    channels[ 2 ] += STEP;                                                   \
-  }                                                                          \
-}
-
-/***************************************************************/
-#define CHECK_QUADRANT_U8_4( qq )                               \
-{                                                               \
-  if( node->tag & ( 1 << qq ) )                                 \
-  {                                                             \
-    /* Here is another color cell. Check the distance */        \
-    mlib_s32 new_found_color = node->contents.index[ qq ];      \
-    mlib_u32 newdistance = FIND_DISTANCE_4( c[ 0 ],             \
-      p[ 0 ][ new_found_color ], c[ 1 ],                        \
-      p[ 1 ][ new_found_color ], c[ 2 ],                        \
-      p[ 2 ][ new_found_color ], c[ 3 ],                        \
-      p[ 3 ][ new_found_color ], 0 );                           \
-                                                                \
-    if( newdistance < distance )                                \
-    {                                                           \
-      found_color = new_found_color;                            \
-      distance = newdistance;                                   \
-    }                                                           \
-  }                                                             \
-  else if( node->contents.quadrants[ qq ] )                     \
-    /* Here is a full node. Just explore it all */              \
-    distance = mlib_search_quadrant_U8_4(                       \
-      node->contents.quadrants[ qq ], distance, &found_color,   \
-      c[ 0 ], c[ 1 ], c[ 2 ], c[ 3 ], p );                      \
-/* Else there is just an empty cell */                          \
-}
-
-/***************************************************************/
-#define CHECK_QUADRANT_S16_4( qq )                              \
-{                                                               \
-  if( node->tag & ( 1 << qq ) )                                 \
-  {                                                             \
-    /* Here is another color cell. Check the distance */        \
-    mlib_s32 new_found_color = node->contents.index[ qq ];      \
-    mlib_u32 palc0, palc1, palc2, palc3, newdistance;           \
-                                                                \
-    palc0 = p[ 0 ][ new_found_color ] - MLIB_S16_MIN;           \
-    palc1 = p[ 1 ][ new_found_color ] - MLIB_S16_MIN;           \
-    palc2 = p[ 2 ][ new_found_color ] - MLIB_S16_MIN;           \
-    palc3 = p[ 3 ][ new_found_color ] - MLIB_S16_MIN;           \
-                                                                \
-    newdistance = FIND_DISTANCE_4( c[ 0 ], palc0,               \
-      c[ 1 ], palc1,                                            \
-      c[ 2 ], palc2,                                            \
-      c[ 3 ], palc3, 2 );                                       \
-                                                                \
-    if( newdistance < distance )                                \
-    {                                                           \
-      found_color = new_found_color;                            \
-      distance = newdistance;                                   \
-    }                                                           \
-  }                                                             \
-  else if( node->contents.quadrants[ qq ] )                     \
-    /* Here is a full node. Just explore it all */              \
-    distance = mlib_search_quadrant_S16_4(                      \
-      node->contents.quadrants[ qq ], distance, &found_color,   \
-      c[ 0 ], c[ 1 ], c[ 2 ], c[ 3 ], p );                      \
-/* Else there is just an empty cell */                          \
-}
-
-/***************************************************************/
-#define BINARY_TREE_SEARCH_4( SOURCE_IMAGE, POINTER_TYPE, BITS,               \
-  COLOR_MAX, SUBTRACTION, SHIFT )                                             \
-{                                                                             \
-  const POINTER_TYPE *channels[ 4 ], *p[ 4 ];                                 \
-  mlib_u32 c[ 4 ];                                                            \
-  mlib_s32 j;                                                                 \
-                                                                              \
-  p[ 0 ] = s->lut[ 0 ];                                                       \
-  p[ 1 ] = s->lut[ 1 ];                                                       \
-  p[ 2 ] = s->lut[ 2 ];                                                       \
-  p[ 3 ] = s->lut[ 3 ];                                                       \
-  channels[ 0 ] = src;                                                        \
-  channels[ 1 ] = src + 1;                                                    \
-  channels[ 2 ] = src + 2;                                                    \
-  channels[ 3 ] = src + 3;                                                    \
-                                                                              \
-  for( j = 0; j < length; j++ )                                               \
-  {                                                                           \
-    mlib_s32 pass = BITS - 1;                                                 \
-    mlib_u32 position[ 4 ] = { 0, 0, 0, 0 };                                  \
-    mlib_s32 we_found_it = 0;                                                 \
-    struct lut_node_4 *node = s->table;                                       \
-    /* Stack pointer pointers to the first free element of stack. */          \
-    /* The node we are in is in the `node' */                                 \
-    struct                                                                    \
-    {                                                                         \
-      struct lut_node_4 *node;                                                \
-      mlib_s32 q;                                                             \
-    } stack[ BITS ];                                                          \
-    mlib_s32 stack_pointer = 0;                                               \
-                                                                              \
-    c[ 0 ] = *channels[ 0 ] - SUBTRACTION;                                    \
-    c[ 1 ] = *channels[ 1 ] - SUBTRACTION;                                    \
-    c[ 2 ] = *channels[ 2 ] - SUBTRACTION;                                    \
-    c[ 3 ] = *channels[ 3 ] - SUBTRACTION;                                    \
-                                                                              \
-    do                                                                        \
-    {                                                                         \
-      mlib_s32 q;                                                             \
-      mlib_u32 current_size = 1 << pass;                                      \
-                                                                              \
-      q = ( ( c[ 0 ] >> pass ) & 1 ) |                                        \
-        ( ( ( c[ 1 ] << 1 ) >> pass ) & 2 ) |                                 \
-        ( ( ( c[ 2 ] << 2 ) >> pass ) & 4 ) |                                 \
-        ( ( ( c[ 3 ] << 3 ) >> pass ) & 8 );                                  \
-                                                                              \
-      position[ 0 ] |= c[ 0 ] & current_size;                                 \
-      position[ 1 ] |= c[ 1 ] & current_size;                                 \
-      position[ 2 ] |= c[ 2 ] & current_size;                                 \
-      position[ 3 ] |= c[ 3 ] & current_size;                                 \
-                                                                              \
-      if( node->tag & ( 1 << q ) )                                            \
-      {                                                                       \
-        /*                                                                    \
-          Here is a cell with one color. We need to be sure it's              \
-          the one that is the closest to our color                            \
-        */                                                                    \
-        mlib_s32 palindex = node->contents.index[ q ];                        \
-        mlib_u32 palc[ 4 ];                                                   \
-        mlib_s32 identical;                                                   \
-                                                                              \
-        palc[ 0 ] = p[ 0 ][ palindex ] - SUBTRACTION;                         \
-        palc[ 1 ] = p[ 1 ][ palindex ] - SUBTRACTION;                         \
-        palc[ 2 ] = p[ 2 ][ palindex ] - SUBTRACTION;                         \
-        palc[ 3 ] = p[ 3 ][ palindex ] - SUBTRACTION;                         \
-                                                                              \
-        identical = ( palc[ 0 ] - c[ 0 ] ) | ( palc[ 1 ] - c[ 1 ] ) |         \
-          ( palc[ 2 ] - c[ 2 ] ) | ( palc[ 3 ] - c[ 3 ] );                    \
-                                                                              \
-        if( !identical || BITS - pass == bits )                               \
-        {                                                                     \
-          /* Oh, here it is :) */                                             \
-          dst[ j ] = palindex + s->offset;                                    \
-          we_found_it = 1;                                                    \
-        }                                                                     \
-        else                                                                  \
-        {                                                                     \
-          mlib_u32 distance;                                                  \
-          /* First index is the channel, second is the number of the          \
-             side */                                                          \
-          mlib_s32 found_color;                                               \
-          mlib_s32 continue_up;                                               \
-                                                                              \
-          distance = FIND_DISTANCE_4( c[ 0 ], palc[ 0 ],                      \
-            c[ 1 ], palc[ 1 ], c[ 2 ], palc[ 2 ], c[ 3 ], palc[ 3 ], SHIFT ); \
-          found_color = palindex;                                             \
-                                                                              \
-          do                                                                  \
-          {                                                                   \
-            mlib_s32 check_corner;                                            \
-            mlib_s32 check_neibours[ 6 ];                                     \
-            mlib_s32 check_far_neibours[ 4 ];                                 \
-                                                                              \
-            /*                                                                \
-              Check neibours: quadrants that are different by 2 bits          \
-              from the quadrant, that we are in:                              \
-              3 -  0                                                          \
-              5 -  1                                                          \
-              6 -  2                                                          \
-              9 -  3                                                          \
-              10 - 4                                                          \
-              12 - 5                                                          \
-              Far quadrants: different by 3 bits:                             \
-              7  - 0                                                          \
-              11 - 1                                                          \
-              13 - 2                                                          \
-              14 - 3                                                          \
-            */                                                                \
-                                                                              \
-            check_neibours[ 0 ] = check_neibours[ 1 ] =                       \
-              check_neibours[ 2 ] = check_neibours[ 3 ] =                     \
-              check_neibours[ 4 ] = check_neibours[ 5 ] = 0;                  \
-            continue_up = 0;                                                  \
-                                                                              \
-            if( q & 1 )                                                       \
-            {                                                                 \
-              BINARY_TREE_EXPLORE_LEFT_4( 0, SOURCE_IMAGE, 0, 1, 3,           \
-                SUBTRACTION, SHIFT );                                         \
-            }                                                                 \
-            else                                                              \
-            {                                                                 \
-              BINARY_TREE_EXPLORE_RIGHT_4( 0, COLOR_MAX, SOURCE_IMAGE,        \
-                0, 1, 3, SUBTRACTION, SHIFT );                                \
-            }                                                                 \
-                                                                              \
-            if( q & 2 )                                                       \
-            {                                                                 \
-              BINARY_TREE_EXPLORE_LEFT_4( 1, SOURCE_IMAGE, 0, 2, 4,           \
-                SUBTRACTION, SHIFT );                                         \
-            }                                                                 \
-            else                                                              \
-            {                                                                 \
-              BINARY_TREE_EXPLORE_RIGHT_4( 1, COLOR_MAX, SOURCE_IMAGE,        \
-                0, 2, 4, SUBTRACTION, SHIFT );                                \
-            }                                                                 \
-                                                                              \
-            if( q & 4 )                                                       \
-            {                                                                 \
-              BINARY_TREE_EXPLORE_LEFT_4( 2, SOURCE_IMAGE, 1, 2, 5,           \
-                SUBTRACTION, SHIFT );                                         \
-            }                                                                 \
-            else                                                              \
-            {                                                                 \
-              BINARY_TREE_EXPLORE_RIGHT_4( 2, COLOR_MAX, SOURCE_IMAGE,        \
-                1, 2, 5, SUBTRACTION, SHIFT );                                \
-            }                                                                 \
-                                                                              \
-            if( q & 8 )                                                       \
-            {                                                                 \
-              BINARY_TREE_EXPLORE_LEFT_4( 3, SOURCE_IMAGE, 3, 4, 5,           \
-                SUBTRACTION, SHIFT );                                         \
-            }                                                                 \
-            else                                                              \
-            {                                                                 \
-              BINARY_TREE_EXPLORE_RIGHT_4( 3, COLOR_MAX, SOURCE_IMAGE,        \
-                3, 4, 5, SUBTRACTION, SHIFT );                                \
-            }                                                                 \
-                                                                              \
-            check_far_neibours[ 0 ] = check_neibours[ 0 ] +                   \
-              check_neibours[ 1 ] + check_neibours[ 2 ];                      \
-            check_far_neibours[ 1 ] = check_neibours[ 0 ] +                   \
-              check_neibours[ 3 ] + check_neibours[ 4 ];                      \
-            check_far_neibours[ 2 ] = check_neibours[ 1 ] +                   \
-              check_neibours[ 3 ] + check_neibours[ 5 ];                      \
-            check_far_neibours[ 3 ] = check_neibours[ 2 ] +                   \
-              check_neibours[ 4 ] + check_neibours[ 5 ];                      \
-                                                                              \
-            check_corner = check_far_neibours[ 0 ] +                          \
-              check_far_neibours[ 1 ] +                                       \
-              check_far_neibours[ 2 ] +                                       \
-              check_far_neibours[ 3 ];                                        \
-                                                                              \
-            if( check_neibours[ 0 ] >= 2 )                                    \
-            {                                                                 \
-              mlib_s32 qq = q ^ 3;                                            \
-              CHECK_QUADRANT_##SOURCE_IMAGE##_4( qq );                        \
-            }                                                                 \
-                                                                              \
-            if( check_neibours[ 1 ] >= 2 )                                    \
-            {                                                                 \
-              mlib_s32 qq = q ^ 5;                                            \
-              CHECK_QUADRANT_##SOURCE_IMAGE##_4( qq );                        \
-            }                                                                 \
-                                                                              \
-            if( check_neibours[ 2 ] >= 2 )                                    \
-            {                                                                 \
-              mlib_s32 qq = q ^ 6;                                            \
-              CHECK_QUADRANT_##SOURCE_IMAGE##_4( qq );                        \
-            }                                                                 \
-                                                                              \
-            if( check_neibours[ 3 ] >= 2 )                                    \
-            {                                                                 \
-              mlib_s32 qq = q ^ 9;                                            \
-              CHECK_QUADRANT_##SOURCE_IMAGE##_4( qq );                        \
-            }                                                                 \
-                                                                              \
-            if( check_neibours[ 4 ] >= 2 )                                    \
-            {                                                                 \
-              mlib_s32 qq = q ^ 10;                                           \
-              CHECK_QUADRANT_##SOURCE_IMAGE##_4( qq );                        \
-            }                                                                 \
-                                                                              \
-            if( check_neibours[ 5 ] >= 2 )                                    \
-            {                                                                 \
-              mlib_s32 qq = q ^ 12;                                           \
-              CHECK_QUADRANT_##SOURCE_IMAGE##_4( qq );                        \
-            }                                                                 \
-                                                                              \
-            if( check_far_neibours[ 0 ] >= 3 )                                \
-            {                                                                 \
-              mlib_s32 qq = q ^ 7;                                            \
-              CHECK_QUADRANT_##SOURCE_IMAGE##_4( qq );                        \
-            }                                                                 \
-                                                                              \
-            if( check_far_neibours[ 1 ] >= 3 )                                \
-            {                                                                 \
-              mlib_s32 qq = q ^ 11;                                           \
-              CHECK_QUADRANT_##SOURCE_IMAGE##_4( qq );                        \
-            }                                                                 \
-                                                                              \
-            if( check_far_neibours[ 2 ] >= 3 )                                \
-            {                                                                 \
-              mlib_s32 qq = q ^ 13;                                           \
-              CHECK_QUADRANT_##SOURCE_IMAGE##_4( qq );                        \
-            }                                                                 \
-                                                                              \
-            if( check_far_neibours[ 3 ] >= 3 )                                \
-            {                                                                 \
-              mlib_s32 qq = q ^ 14;                                           \
-              CHECK_QUADRANT_##SOURCE_IMAGE##_4( qq );                        \
-            }                                                                 \
-                                                                              \
-            if( check_corner >= 4 )                                           \
-            {                                                                 \
-              mlib_s32 qq = q ^ 15;                                           \
-              CHECK_QUADRANT_##SOURCE_IMAGE##_4( qq );                        \
-            }                                                                 \
-                                                                              \
-            if( q & 1 )                                                       \
-            {                                                                 \
-              BINARY_TREE_SEARCH_RIGHT( 0, COLOR_MAX, SHIFT );                \
-            }                                                                 \
-            else                                                              \
-            {                                                                 \
-              BINARY_TREE_SEARCH_LEFT( 0, SHIFT );                            \
-            }                                                                 \
-                                                                              \
-            if( q & 2 )                                                       \
-            {                                                                 \
-              BINARY_TREE_SEARCH_RIGHT( 1, COLOR_MAX, SHIFT );                \
-            }                                                                 \
-            else                                                              \
-            {                                                                 \
-              BINARY_TREE_SEARCH_LEFT( 1, SHIFT );                            \
-            }                                                                 \
-                                                                              \
-            if( q & 4 )                                                       \
-            {                                                                 \
-              BINARY_TREE_SEARCH_RIGHT( 2, COLOR_MAX, SHIFT );                \
-            }                                                                 \
-            else                                                              \
-            {                                                                 \
-              BINARY_TREE_SEARCH_LEFT( 2, SHIFT );                            \
-            }                                                                 \
-                                                                              \
-            if( q & 8 )                                                       \
-            {                                                                 \
-              BINARY_TREE_SEARCH_RIGHT( 3, COLOR_MAX, SHIFT );                \
-            }                                                                 \
-            else                                                              \
-            {                                                                 \
-              BINARY_TREE_SEARCH_LEFT( 3, SHIFT );                            \
-            }                                                                 \
-                                                                              \
-            position[ 0 ] &= ~( c[ 0 ] & current_size );                      \
-            position[ 1 ] &= ~( c[ 1 ] & current_size );                      \
-            position[ 2 ] &= ~( c[ 2 ] & current_size );                      \
-            position[ 3 ] &= ~( c[ 3 ] & current_size );                      \
-                                                                              \
-            current_size <<= 1;                                               \
-                                                                              \
-            pass++;                                                           \
-                                                                              \
-            stack_pointer--;                                                  \
-            q = stack[ stack_pointer ].q;                                     \
-            node = stack[ stack_pointer ].node;                               \
-          } while( continue_up );                                             \
-                                                                              \
-          dst[ j ] = found_color + s->offset;                                 \
-          we_found_it = 1;                                                    \
-        }                                                                     \
-      }                                                                       \
-      else if( node->contents.quadrants[ q ] )                                \
-      {                                                                       \
-        /* Descend one level */                                               \
-        stack[ stack_pointer ].node = node;                                   \
-        stack[ stack_pointer++ ].q = q;                                       \
-        node = node->contents.quadrants[ q ];                                 \
-      }                                                                       \
-      else                                                                    \
-      {                                                                       \
-        /* Found the empty quadrant. Look around */                           \
-        mlib_u32 distance = MLIB_U32_MAX;                                     \
-        mlib_s32 found_color;                                                 \
-        mlib_s32 continue_up;                                                 \
-                                                                              \
-        /*                                                                    \
-          As we had come to this level, it is warranted that there            \
-          are other points on this level near the empty quadrant              \
-        */                                                                    \
-        do                                                                    \
-        {                                                                     \
-          mlib_s32 check_corner;                                              \
-          mlib_s32 check_neibours[ 6 ];                                       \
-          mlib_s32 check_far_neibours[ 4 ];                                   \
-                                                                              \
-          /*                                                                  \
-            Check neibours: quadrants that are different by 2 bits            \
-            from the quadrant, that we are in:                                \
-            3 -  0                                                            \
-            5 -  1                                                            \
-            6 -  2                                                            \
-            9 -  3                                                            \
-            10 - 4                                                            \
-            12 - 5                                                            \
-            Far quadrants: different by 3 bits:                               \
-            7  - 0                                                            \
-            11 - 1                                                            \
-            13 - 2                                                            \
-            14 - 3                                                            \
-          */                                                                  \
-                                                                              \
-          check_neibours[ 0 ] = check_neibours[ 1 ] =                         \
-            check_neibours[ 2 ] = check_neibours[ 3 ] =                       \
-            check_neibours[ 4 ] = check_neibours[ 5 ] = 0;                    \
-          continue_up = 0;                                                    \
-                                                                              \
-          if( q & 1 )                                                         \
-          {                                                                   \
-            BINARY_TREE_EXPLORE_LEFT_4( 0, SOURCE_IMAGE, 0, 1, 3,             \
-              SUBTRACTION, SHIFT );                                           \
-          }                                                                   \
-          else                                                                \
-          {                                                                   \
-            BINARY_TREE_EXPLORE_RIGHT_4( 0, COLOR_MAX, SOURCE_IMAGE,          \
-              0, 1, 3, SUBTRACTION, SHIFT );                                  \
-          }                                                                   \
-                                                                              \
-          if( q & 2 )                                                         \
-          {                                                                   \
-            BINARY_TREE_EXPLORE_LEFT_4( 1, SOURCE_IMAGE, 0, 2, 4,             \
-              SUBTRACTION, SHIFT );                                           \
-          }                                                                   \
-          else                                                                \
-          {                                                                   \
-            BINARY_TREE_EXPLORE_RIGHT_4( 1, COLOR_MAX, SOURCE_IMAGE,          \
-              0, 2, 4, SUBTRACTION, SHIFT );                                  \
-          }                                                                   \
-                                                                              \
-          if( q & 4 )                                                         \
-          {                                                                   \
-            BINARY_TREE_EXPLORE_LEFT_4( 2, SOURCE_IMAGE, 1, 2, 5,             \
-              SUBTRACTION, SHIFT );                                           \
-          }                                                                   \
-          else                                                                \
-          {                                                                   \
-            BINARY_TREE_EXPLORE_RIGHT_4( 2, COLOR_MAX, SOURCE_IMAGE,          \
-              1, 2, 5, SUBTRACTION, SHIFT );                                  \
-          }                                                                   \
-                                                                              \
-          if( q & 8 )                                                         \
-          {                                                                   \
-            BINARY_TREE_EXPLORE_LEFT_4( 3, SOURCE_IMAGE, 3, 4, 5,             \
-              SUBTRACTION, SHIFT );                                           \
-          }                                                                   \
-          else                                                                \
-          {                                                                   \
-            BINARY_TREE_EXPLORE_RIGHT_4( 3, COLOR_MAX, SOURCE_IMAGE,          \
-              3, 4, 5, SUBTRACTION, SHIFT );                                  \
-          }                                                                   \
-                                                                              \
-          check_far_neibours[ 0 ] = check_neibours[ 0 ] +                     \
-            check_neibours[ 1 ] + check_neibours[ 2 ];                        \
-          check_far_neibours[ 1 ] = check_neibours[ 0 ] +                     \
-            check_neibours[ 3 ] + check_neibours[ 4 ];                        \
-          check_far_neibours[ 2 ] = check_neibours[ 1 ] +                     \
-            check_neibours[ 3 ] + check_neibours[ 5 ];                        \
-          check_far_neibours[ 3 ] = check_neibours[ 2 ] +                     \
-            check_neibours[ 4 ] + check_neibours[ 5 ];                        \
-                                                                              \
-          check_corner = check_far_neibours[ 0 ] +                            \
-            check_far_neibours[ 1 ] +                                         \
-            check_far_neibours[ 2 ] +                                         \
-            check_far_neibours[ 3 ];                                          \
-                                                                              \
-          if( check_neibours[ 0 ] >= 2 )                                      \
-          {                                                                   \
-            mlib_s32 qq = q ^ 3;                                              \
-            CHECK_QUADRANT_##SOURCE_IMAGE##_4( qq );                          \
-          }                                                                   \
-                                                                              \
-          if( check_neibours[ 1 ] >= 2 )                                      \
-          {                                                                   \
-            mlib_s32 qq = q ^ 5;                                              \
-            CHECK_QUADRANT_##SOURCE_IMAGE##_4( qq );                          \
-          }                                                                   \
-                                                                              \
-          if( check_neibours[ 2 ] >= 2 )                                      \
-          {                                                                   \
-            mlib_s32 qq = q ^ 6;                                              \
-            CHECK_QUADRANT_##SOURCE_IMAGE##_4( qq );                          \
-          }                                                                   \
-                                                                              \
-          if( check_neibours[ 3 ] >= 2 )                                      \
-          {                                                                   \
-            mlib_s32 qq = q ^ 9;                                              \
-            CHECK_QUADRANT_##SOURCE_IMAGE##_4( qq );                          \
-          }                                                                   \
-                                                                              \
-          if( check_neibours[ 4 ] >= 2 )                                      \
-          {                                                                   \
-            mlib_s32 qq = q ^ 10;                                             \
-            CHECK_QUADRANT_##SOURCE_IMAGE##_4( qq );                          \
-          }                                                                   \
-                                                                              \
-          if( check_neibours[ 5 ] >= 2 )                                      \
-          {                                                                   \
-            mlib_s32 qq = q ^ 12;                                             \
-            CHECK_QUADRANT_##SOURCE_IMAGE##_4( qq );                          \
-          }                                                                   \
-                                                                              \
-          if( check_far_neibours[ 0 ] >= 3 )                                  \
-          {                                                                   \
-            mlib_s32 qq = q ^ 7;                                              \
-            CHECK_QUADRANT_##SOURCE_IMAGE##_4( qq );                          \
-          }                                                                   \
-                                                                              \
-          if( check_far_neibours[ 1 ] >= 3 )                                  \
-          {                                                                   \
-            mlib_s32 qq = q ^ 11;                                             \
-            CHECK_QUADRANT_##SOURCE_IMAGE##_4( qq );                          \
-          }                                                                   \
-                                                                              \
-          if( check_far_neibours[ 2 ] >= 3 )                                  \
-          {                                                                   \
-            mlib_s32 qq = q ^ 13;                                             \
-            CHECK_QUADRANT_##SOURCE_IMAGE##_4( qq );                          \
-          }                                                                   \
-                                                                              \
-          if( check_far_neibours[ 3 ] >= 3 )                                  \
-          {                                                                   \
-            mlib_s32 qq = q ^ 14;                                             \
-            CHECK_QUADRANT_##SOURCE_IMAGE##_4( qq );                          \
-          }                                                                   \
-                                                                              \
-          if( check_corner >= 4 )                                             \
-          {                                                                   \
-            mlib_s32 qq = q ^ 15;                                             \
-            CHECK_QUADRANT_##SOURCE_IMAGE##_4( qq );                          \
-          }                                                                   \
-                                                                              \
-          if( q & 1 )                                                         \
-          {                                                                   \
-            BINARY_TREE_SEARCH_RIGHT( 0, COLOR_MAX, SHIFT );                  \
-          }                                                                   \
-          else                                                                \
-          {                                                                   \
-            BINARY_TREE_SEARCH_LEFT( 0, SHIFT );                              \
-          }                                                                   \
-                                                                              \
-          if( q & 2 )                                                         \
-          {                                                                   \
-            BINARY_TREE_SEARCH_RIGHT( 1, COLOR_MAX, SHIFT );                  \
-          }                                                                   \
-          else                                                                \
-          {                                                                   \
-            BINARY_TREE_SEARCH_LEFT( 1, SHIFT );                              \
-          }                                                                   \
-                                                                              \
-          if( q & 4 )                                                         \
-          {                                                                   \
-            BINARY_TREE_SEARCH_RIGHT( 2, COLOR_MAX, SHIFT );                  \
-          }                                                                   \
-          else                                                                \
-          {                                                                   \
-            BINARY_TREE_SEARCH_LEFT( 2, SHIFT );                              \
-          }                                                                   \
-                                                                              \
-          if( q & 8 )                                                         \
-          {                                                                   \
-            BINARY_TREE_SEARCH_RIGHT( 3, COLOR_MAX, SHIFT );                  \
-          }                                                                   \
-          else                                                                \
-          {                                                                   \
-            BINARY_TREE_SEARCH_LEFT( 3, SHIFT );                              \
-          }                                                                   \
-                                                                              \
-          position[ 0 ] &= ~( c[ 0 ] & current_size );                        \
-          position[ 1 ] &= ~( c[ 1 ] & current_size );                        \
-          position[ 2 ] &= ~( c[ 2 ] & current_size );                        \
-          position[ 3 ] &= ~( c[ 3 ] & current_size );                        \
-                                                                              \
-          current_size <<= 1;                                                 \
-                                                                              \
-          pass++;                                                             \
-                                                                              \
-          stack_pointer--;                                                    \
-          q = stack[ stack_pointer ].q;                                       \
-          node = stack[ stack_pointer ].node;                                 \
-        } while( continue_up );                                               \
-                                                                              \
-        dst[ j ] = found_color + s->offset;                                   \
-        we_found_it = 1;                                                      \
-      }                                                                       \
-                                                                              \
-      pass--;                                                                 \
-                                                                              \
-    } while( !we_found_it );                                                  \
-                                                                              \
-    channels[ 0 ] += 4;                                                       \
-    channels[ 1 ] += 4;                                                       \
-    channels[ 2 ] += 4;                                                       \
-    channels[ 3 ] += 4;                                                       \
-  }                                                                           \
-}
-
-/***************************************************************/
-#define FIND_NEAREST_U8_3_C( SHIFT, STEP )                      \
-  mlib_s32 i, k, k_min, min_dist, diff, mask;                   \
-  mlib_s32 offset = mlib_ImageGetLutOffset( s ) - 1;            \
-  mlib_s32 entries = s -> lutlength;                            \
-  mlib_d64 *double_lut = mlib_ImageGetLutDoubleData( s );       \
-  mlib_d64 col0, col1, col2;                                    \
-  mlib_d64 dist, len0, len1, len2;                              \
-                                                                \
-  for ( i = 0; i < length; i++ ) {                              \
-    col0 = src[ STEP * i + SHIFT ];                             \
-    col1 = src[ STEP * i + 1 + SHIFT ];                         \
-    col2 = src[ STEP * i + 2 + SHIFT ];                         \
-    min_dist = MLIB_S32_MAX;                                    \
-    k_min = 1;                                                  \
-    len0 = double_lut[ 0 ] - col0;                              \
-    len1 = double_lut[ 1 ] - col1;                              \
-    len2 = double_lut[ 2 ] - col2;                              \
-                                                                \
-    for ( k = 1; k <= entries; k++ ) {                          \
-      dist = len0 * len0;                                       \
-      len0 = double_lut[ 3 * k ] - col0;                        \
-      dist += len1 * len1;                                      \
-      len1 = double_lut[ 3 * k + 1 ] - col1;                    \
-      dist += len2 * len2;                                      \
-      len2 = double_lut[ 3 * k + 2 ] - col2;                    \
-      diff = ( mlib_s32 )dist - min_dist;                       \
-      mask = diff >> 31;                                        \
-      min_dist += diff & mask;                                  \
-      k_min += ( k - k_min ) & mask;                            \
-    }                                                           \
-                                                                \
-    dst[ i ] = k_min + offset;                                  \
-  }
-
-/***************************************************************/
-#define FIND_NEAREST_U8_4_C                                     \
-  mlib_s32 i, k, k_min, min_dist, diff, mask;                   \
-  mlib_s32 offset = mlib_ImageGetLutOffset( s ) - 1;            \
-  mlib_s32 entries = s -> lutlength;                            \
-  mlib_d64 *double_lut = mlib_ImageGetLutDoubleData( s );       \
-  mlib_d64 col0, col1, col2, col3;                              \
-  mlib_d64 dist, len0, len1, len2, len3;                        \
-                                                                \
-  for ( i = 0; i < length; i++ ) {                              \
-    col0 = src[ 4 * i ];                                        \
-    col1 = src[ 4 * i + 1 ];                                    \
-    col2 = src[ 4 * i + 2 ];                                    \
-    col3 = src[ 4 * i + 3 ];                                    \
-    min_dist = MLIB_S32_MAX;                                    \
-    k_min = 1;                                                  \
-    len0 = double_lut[ 0 ] - col0;                              \
-    len1 = double_lut[ 1 ] - col1;                              \
-    len2 = double_lut[ 2 ] - col2;                              \
-    len3 = double_lut[ 3 ] - col3;                              \
-                                                                \
-    for ( k = 1; k <= entries; k++ ) {                          \
-      dist = len0 * len0;                                       \
-      len0 =  double_lut[ 4 * k ] - col0;                       \
-      dist += len1 * len1;                                      \
-      len1 = double_lut[ 4 * k + 1 ] - col1;                    \
-      dist += len2 * len2;                                      \
-      len2 =  double_lut[ 4 * k + 2 ] - col2;                   \
-      dist += len3 * len3;                                      \
-      len3 =  double_lut[ 4 * k + 3 ] - col3;                   \
-      diff = ( mlib_s32 )dist - min_dist;                       \
-      mask = diff >> 31;                                        \
-      min_dist += diff & mask;                                  \
-      k_min += ( k - k_min ) & mask;                            \
-    }                                                           \
-                                                                \
-    dst[ i ] = k_min + offset;                                  \
-  }
-
-/***************************************************************/
-#define FSQR_S16_HI(dsrc)                                                   \
-  vis_fpadd32( vis_fmuld8ulx16( vis_read_hi( dsrc ), vis_read_hi( dsrc ) ), \
-    vis_fmuld8sux16( vis_read_hi( dsrc ), vis_read_hi( dsrc ) ) )
-
-/***************************************************************/
-#define FSQR_S16_LO(dsrc)                                                  \
-  vis_fpadd32( vis_fmuld8ulx16( vis_read_lo( dsrc ), vis_read_lo( dsrc) ), \
-    vis_fmuld8sux16( vis_read_lo( dsrc ), vis_read_lo( dsrc ) ) )
-
-/***************************************************************/
-#define FIND_NEAREST_U8_3                                             \
-{                                                                     \
-  mlib_d64 *dpsrc, dsrc, dsrc1, ddist, ddist1, ddist2, ddist3;        \
-  mlib_d64 dcolor, dind, dres, dres1, dpind[1], dpmin[1];             \
-  mlib_d64 done = vis_to_double_dup( 1 ),                             \
-           dmax = vis_to_double_dup( MLIB_S32_MAX );                  \
-  mlib_f32 *lut = ( mlib_f32 * )mlib_ImageGetLutNormalTable( s );     \
-  mlib_f32 fone = vis_to_float( 0x100 );                              \
-  mlib_s32 i, k, mask;                                                \
-  mlib_s32 gsr[1];                                                    \
-  mlib_s32 offset = mlib_ImageGetLutOffset( s ) - 1;                  \
-  mlib_s32 entries = s->lutlength;                                    \
-                                                                      \
-  gsr[0] = vis_read_gsr();                                            \
-  for( i = 0; i <= ( length-2 ); i += 2 )                             \
-  {                                                                   \
-    dpsrc = VIS_ALIGNADDR( src, -1 );                                 \
-    src += 6;                                                         \
-    dsrc = dpsrc[ 0 ];                                                \
-    dsrc1 = dpsrc[ 1 ];                                               \
-    dsrc1 = vis_faligndata( dsrc, dsrc1 );                            \
-    dsrc = vis_fmul8x16al( vis_read_hi( dsrc1 ), fone );              \
-    VIS_ALIGNADDR( dpsrc, 3 );                                        \
-    dsrc1 = vis_faligndata( dsrc1, dsrc1 );                           \
-    dsrc1 = vis_fmul8x16al( vis_read_hi( dsrc1 ), fone );             \
-    dpind[ 0 ] = dind = done;                                         \
-    dpmin[ 0 ] = dmax;                                                \
-    dcolor = vis_fmul8x16al( lut[ 0 ], fone );                        \
-    for( k = 1; k <= entries; k++ )                                   \
-    {                                                                 \
-      ddist1 = vis_fpsub16( dcolor, dsrc );                           \
-      ddist = FSQR_S16_HI( ddist1 );                                  \
-      ddist1 = FSQR_S16_LO( ddist1 );                                 \
-      dres = vis_fpadd32( ddist, ddist1 );                            \
-      ddist3 = vis_fpsub16( dcolor, dsrc1 );                          \
-      ddist2 = FSQR_S16_HI( ddist3 );                                 \
-      ddist3 = FSQR_S16_LO( ddist3 );                                 \
-      dres1 = vis_fpadd32( ddist2, ddist3 );                          \
-      dcolor = vis_fmul8x16al( lut[ k ], fone );                      \
-      dres = vis_freg_pair(                                           \
-        vis_fpadd32s( vis_read_hi( dres ), vis_read_lo( dres ) ),     \
-        vis_fpadd32s( vis_read_hi( dres1 ), vis_read_lo( dres1 ) ) ); \
-      mask = vis_fcmplt32( dres, dpmin[ 0 ] );                        \
-      vis_pst_32( dind, ( void * )dpind, mask );                      \
-      dind = vis_fpadd32( dind, done );                               \
-      vis_pst_32( dres, ( void * )dpmin, mask );                      \
-    }                                                                 \
-    dst[ i ] = ( ( mlib_s32 * )dpind )[ 0 ] + offset;                 \
-    dst[ i + 1 ] = ( ( mlib_s32 * )dpind)[ 1 ] + offset;              \
-  }                                                                   \
-  if( i < length )                                                    \
-  {                                                                   \
-    dpsrc = VIS_ALIGNADDR( src, -1 );                                 \
-    dsrc = dpsrc[ 0 ];                                                \
-    dsrc1 = dpsrc[ 1 ];                                               \
-    dsrc1 = vis_faligndata( dsrc, dsrc1 );                            \
-    dsrc = vis_fmul8x16al( vis_read_hi( dsrc1 ), fone );              \
-    dpind[ 0 ] = dind = done;                                         \
-    dpmin[ 0 ] = dmax;                                                \
-    for( k = 0; k < entries; k++ )                                    \
-    {                                                                 \
-      dcolor = vis_fmul8x16al( lut[ k ], fone );                      \
-      ddist1 = vis_fpsub16( dcolor, dsrc );                           \
-      ddist = FSQR_S16_HI( ddist1 );                                  \
-      ddist1 = FSQR_S16_LO( ddist1 );                                 \
-      dres = vis_fpadd32( ddist, ddist1 );                            \
-      dres = vis_write_lo( dres,                                      \
-        vis_fpadd32s( vis_read_hi( dres ), vis_read_lo( dres ) ) );   \
-      mask = vis_fcmplt32( dres, dpmin[ 0 ] );                        \
-      vis_pst_32( dind, ( void * )dpind, mask );                      \
-      dind = vis_fpadd32( dind, done );                               \
-      vis_pst_32( dres, ( void * )dpmin, mask );                      \
-    }                                                                 \
-    dst[ i ] = ( ( mlib_s32 * )dpind)[ 1 ] + offset;                  \
-  }                                                                   \
-  vis_write_gsr(gsr[0]);                                              \
-}
-
-/***************************************************************/
-#define FIND_NEAREST_U8_3_IN4                                         \
-{                                                                     \
-  mlib_d64 *dpsrc, dsrc, dsrc1, ddist, ddist1, ddist2, ddist3;        \
-  mlib_d64 dcolor, dind, dres, dres1, dpind[1], dpmin[1];             \
-  mlib_d64 done = vis_to_double_dup( 1 ),                             \
-           dmax = vis_to_double_dup( MLIB_S32_MAX );                  \
-  mlib_f32 *lut = ( mlib_f32 * )mlib_ImageGetLutNormalTable( s );     \
-  mlib_f32 fone = vis_to_float( 0x100 );                              \
-  mlib_s32 i, k, mask, gsr[1];                                        \
-  mlib_s32 offset = mlib_ImageGetLutOffset( s ) - 1;                  \
-  mlib_s32 entries = s->lutlength;                                    \
-                                                                      \
-  gsr[0] = vis_read_gsr();                                            \
-  dpsrc = VIS_ALIGNADDR( src, 0 );                                    \
-  for( i = 0; i <= ( length-2 ); i += 2 )                             \
-  {                                                                   \
-    dsrc = dpsrc[ 0 ];                                                \
-    dsrc1 = dpsrc[ 1 ];                                               \
-    dsrc1 = vis_faligndata( dsrc, dsrc1 );                            \
-    dpsrc++;                                                          \
-    dsrc = vis_fmul8x16al( vis_read_hi( dsrc1 ), fone );              \
-    dsrc1 = vis_fmul8x16al( vis_read_lo( dsrc1 ), fone );             \
-    dpind[ 0 ] = dind = done;                                         \
-    dpmin[ 0 ] = dmax;                                                \
-    dcolor = vis_fmul8x16al( lut[ 0 ], fone );                        \
-    for( k = 1; k <= entries; k++ )                                   \
-    {                                                                 \
-      ddist1 = vis_fpsub16( dcolor, dsrc );                           \
-      ddist = FSQR_S16_HI( ddist1 );                                  \
-      ddist1 = FSQR_S16_LO( ddist1 );                                 \
-      dres = vis_fpadd32( ddist, ddist1 );                            \
-      ddist3 = vis_fpsub16( dcolor, dsrc1 );                          \
-      ddist2 = FSQR_S16_HI( ddist3 );                                 \
-      ddist3 = FSQR_S16_LO( ddist3 );                                 \
-      dres1 = vis_fpadd32( ddist2, ddist3 );                          \
-      dcolor = vis_fmul8x16al( lut[ k ], fone );                      \
-      dres = vis_freg_pair(                                           \
-        vis_fpadd32s( vis_read_hi( dres ), vis_read_lo( dres ) ),     \
-        vis_fpadd32s( vis_read_hi( dres1 ), vis_read_lo( dres1 ) ) ); \
-      mask = vis_fcmplt32( dres, dpmin[ 0 ] );                        \
-      vis_pst_32( dind, ( void * )dpind, mask );                      \
-      dind = vis_fpadd32( dind, done );                               \
-      vis_pst_32( dres, ( void * )dpmin, mask );                      \
-    }                                                                 \
-    dst[ i ] = ( ( mlib_s32 * )dpind )[ 0 ] + offset;                 \
-    dst[ i + 1 ] = ( ( mlib_s32 * )dpind)[ 1 ] + offset;              \
-  }                                                                   \
-  if( i < length )                                                    \
-  {                                                                   \
-    dsrc = dpsrc[ 0 ];                                                \
-    dsrc1 = dpsrc[ 1 ];                                               \
-    dsrc1 = vis_faligndata( dsrc, dsrc1 );                            \
-    dsrc = vis_fmul8x16al( vis_read_hi( dsrc1 ), fone );              \
-    dpind[ 0 ] = dind = done;                                         \
-    dpmin[ 0 ] = dmax;                                                \
-    for( k = 0; k < entries; k++ )                                    \
-    {                                                                 \
-      dcolor = vis_fmul8x16al( lut[ k ], fone );                      \
-      ddist1 = vis_fpsub16( dcolor, dsrc );                           \
-      ddist = FSQR_S16_HI( ddist1 );                                  \
-      ddist1 = FSQR_S16_LO( ddist1 );                                 \
-      dres = vis_fpadd32( ddist, ddist1 );                            \
-      dres = vis_write_lo( dres,                                      \
-        vis_fpadd32s( vis_read_hi( dres ), vis_read_lo( dres ) ) );   \
-      mask = vis_fcmplt32( dres, dpmin[ 0 ] );                        \
-      vis_pst_32( dind, ( void * )dpind, mask );                      \
-      dind = vis_fpadd32( dind, done );                               \
-      vis_pst_32( dres, ( void * )dpmin, mask );                      \
-    }                                                                 \
-    dst[ i ] = ( ( mlib_s32 * )dpind)[ 1 ] + offset;                  \
-  }                                                                   \
-  vis_write_gsr(gsr[0]);                                              \
-}
-
-/***************************************************************/
-#define FIND_NEAREST_U8_4                                             \
-{                                                                     \
-  mlib_d64 *dpsrc, dsrc, dsrc1, ddist, ddist1, ddist2, ddist3;        \
-  mlib_d64 dcolor, dind, dres, dres1, dpind[ 1 ], dpmin[ 1 ];         \
-  mlib_d64 done = vis_to_double_dup( 1 ),                             \
-           dmax = vis_to_double_dup( MLIB_S32_MAX );                  \
-  mlib_f32 *lut = ( mlib_f32 * )mlib_ImageGetLutNormalTable( s );     \
-  mlib_f32 fone = vis_to_float( 0x100 );                              \
-  mlib_s32 i, k, mask, gsr[1];                                        \
-  mlib_s32 offset = mlib_ImageGetLutOffset( s ) - 1;                  \
-  mlib_s32 entries = s->lutlength;                                    \
-                                                                      \
-  gsr[0] = vis_read_gsr();                                            \
-  dpsrc = VIS_ALIGNADDR( src, 0 );                                    \
-  for( i = 0; i <= ( length-2 ); i += 2 )                             \
-  {                                                                   \
-    dsrc = dpsrc[ 0 ];                                                \
-    dsrc1 = dpsrc[ 1 ];                                               \
-    dsrc1 = vis_faligndata( dsrc, dsrc1 );                            \
-    dpsrc++;                                                          \
-    dsrc = vis_fmul8x16al( vis_read_hi( dsrc1 ), fone );              \
-    dsrc1 = vis_fmul8x16al( vis_read_lo( dsrc1 ), fone );             \
-    dpind[ 0 ] = dind = done;                                         \
-    dpmin[ 0 ] = dmax;                                                \
-    dcolor = vis_fmul8x16al(lut[0], fone);                            \
-    for( k = 1; k <= entries; k++ )                                   \
-    {                                                                 \
-      ddist1 = vis_fpsub16( dcolor, dsrc );                           \
-      ddist = FSQR_S16_HI( ddist1 );                                  \
-      ddist1 = FSQR_S16_LO( ddist1 );                                 \
-      dres = vis_fpadd32( ddist, ddist1 );                            \
-      ddist3 = vis_fpsub16( dcolor, dsrc1 );                          \
-      ddist2 = FSQR_S16_HI( ddist3 );                                 \
-      ddist3 = FSQR_S16_LO( ddist3 );                                 \
-      dres1 = vis_fpadd32( ddist2, ddist3 );                          \
-      dcolor = vis_fmul8x16al( lut[ k ], fone );                      \
-      dres = vis_freg_pair(                                           \
-        vis_fpadd32s( vis_read_hi( dres ), vis_read_lo( dres ) ),     \
-        vis_fpadd32s( vis_read_hi( dres1 ), vis_read_lo( dres1 ) ) ); \
-      mask = vis_fcmplt32( dres, dpmin[ 0 ] );                        \
-      vis_pst_32( dind, ( void * )dpind, mask );                      \
-      dind = vis_fpadd32( dind, done );                               \
-      vis_pst_32( dres, ( void * )dpmin, mask );                      \
-    }                                                                 \
-    dst[ i ] = ( ( mlib_s32 * )dpind )[ 0 ] + offset;                 \
-    dst[ i + 1 ] = ( ( mlib_s32 * )dpind )[ 1 ] + offset;             \
-  }                                                                   \
-  if( i < length )                                                    \
-  {                                                                   \
-    dsrc = dpsrc[ 0 ];                                                \
-    dsrc1 = dpsrc[ 1 ];                                               \
-    dsrc1 = vis_faligndata( dsrc, dsrc1 );                            \
-    dsrc = vis_fmul8x16al( vis_read_hi( dsrc1 ), fone );              \
-    dpind[ 0 ] = dind = done;                                         \
-    dpmin[ 0 ] = dmax;                                                \
-    for( k = 0; k < entries; k++ )                                    \
-    {                                                                 \
-      dcolor = vis_fmul8x16al( lut[ k ], fone );                      \
-      ddist1 = vis_fpsub16( dcolor, dsrc );                           \
-      ddist = FSQR_S16_HI( ddist1 );                                  \
-      ddist1 = FSQR_S16_LO( ddist1 );                                 \
-      dres = vis_fpadd32( ddist, ddist1 );                            \
-      dres = vis_write_lo( dres,                                      \
-        vis_fpadd32s( vis_read_hi( dres ), vis_read_lo( dres ) ) );   \
-      mask = vis_fcmplt32( dres, dpmin[ 0 ] );                        \
-      vis_pst_32( dind, ( void * )dpind, mask );                      \
-      dind = vis_fpadd32( dind, done );                               \
-      vis_pst_32( dres, ( void * )dpmin, mask );                      \
-    }                                                                 \
-    dst[ i ] = ( ( mlib_s32 * )dpind )[ 1 ] + offset;                 \
-  }                                                                   \
-  vis_write_gsr(gsr[0]);                                              \
-}
-
-/***************************************************************/
-#define FIND_NEAREST_S16_3( SHIFT, STEP )                       \
-  mlib_s32 i, k, k_min, min_dist, diff, mask;                   \
-  mlib_s32 offset = mlib_ImageGetLutOffset( s ) - 1;            \
-  mlib_s32 entries = s->lutlength;                              \
-  mlib_d64 *double_lut = mlib_ImageGetLutDoubleData( s );       \
-  mlib_d64 col0, col1, col2;                                    \
-  mlib_d64 dist, len0, len1, len2;                              \
-                                                                \
-  for( i = 0; i < length; i++ )                                 \
-  {                                                             \
-    col0 = src[ STEP * i + SHIFT ];                             \
-    col1 = src[ STEP * i + 1 + SHIFT ];                         \
-    col2 = src[ STEP * i + 2 + SHIFT ];                         \
-    min_dist = MLIB_S32_MAX;                                    \
-    k_min = 1;                                                  \
-    len0 = double_lut[ 0 ] - col0;                              \
-    len1 = double_lut[ 1 ] - col1;                              \
-    len2 = double_lut[ 2 ] - col2;                              \
-    for( k = 1; k <= entries; k++ )                             \
-    {                                                           \
-      dist = len0 * len0;                                       \
-      len0 = double_lut[ 3 * k ] - col0;                        \
-      dist += len1 * len1;                                      \
-      len1 = double_lut[ 3 * k + 1 ] - col1;                    \
-      dist += len2 * len2;                                      \
-      len2 = double_lut[ 3 * k + 2 ] - col2;                    \
-      diff = ( mlib_s32 )( dist * 0.125 ) - min_dist;           \
-      mask = diff >> 31;                                        \
-      min_dist += diff & mask;                                  \
-      k_min += ( k - k_min ) & mask;                            \
-    }                                                           \
-    dst[ i ] = k_min + offset;                                  \
-  }
-
-/***************************************************************/
-#define FIND_NEAREST_S16_4                                      \
-  mlib_s32 i, k, k_min, min_dist, diff, mask;                   \
-  mlib_s32 offset = mlib_ImageGetLutOffset( s ) - 1;            \
-  mlib_s32 entries = s->lutlength;                              \
-  mlib_d64 *double_lut = mlib_ImageGetLutDoubleData( s );       \
-  mlib_d64 col0, col1, col2, col3;                              \
-  mlib_d64 dist, len0, len1, len2, len3;                        \
-                                                                \
-  for( i = 0; i < length; i++ )                                 \
-  {                                                             \
-    col0 = src[ 4 * i ];                                        \
-    col1 = src[ 4 * i + 1 ];                                    \
-    col2 = src[ 4 * i + 2 ];                                    \
-    col3 = src[ 4 * i + 3 ];                                    \
-    min_dist = MLIB_S32_MAX;                                    \
-    k_min = 1;                                                  \
-    len0 = double_lut[ 0 ] - col0;                              \
-    len1 = double_lut[ 1 ] - col1;                              \
-    len2 = double_lut[ 2 ] - col2;                              \
-    len3 = double_lut[ 3 ] - col3;                              \
-    for( k = 1; k <= entries; k++ )                             \
-    {                                                           \
-      dist = len0 * len0;                                       \
-      len0 =  double_lut[ 4 * k ] - col0;                       \
-      dist += len1 * len1;                                      \
-      len1 = double_lut[ 4 * k + 1 ] - col1;                    \
-      dist += len2 * len2;                                      \
-      len2 =  double_lut[ 4 * k + 2 ] - col2;                   \
-      dist += len3 * len3;                                      \
-      len3 =  double_lut[ 4 * k + 3 ] - col3;                   \
-      diff = ( mlib_s32 )( dist * 0.125 ) - min_dist;           \
-      mask = diff >> 31;                                        \
-      min_dist += diff & mask;                                  \
-      k_min += ( k - k_min ) & mask;                            \
-    }                                                           \
-    dst[ i ] = k_min + offset;                                  \
-  }
-
-/***************************************************************/
-mlib_status mlib_ImageColorTrue2Index(mlib_image       *dst,
-                                      const mlib_image *src,
-                                      const void       *colormap)
-{
-  mlib_s32 y, width, height, sstride, dstride, schann;
-  mlib_colormap *s = (mlib_colormap *)colormap;
-  mlib_s32 channels;
-  mlib_type stype, dtype;
-
-  MLIB_IMAGE_CHECK(src);
-  MLIB_IMAGE_CHECK(dst);
-  MLIB_IMAGE_SIZE_EQUAL(src, dst);
-  MLIB_IMAGE_HAVE_CHAN(dst, 1);
-
-  if (!colormap)
-    return MLIB_NULLPOINTER;
-
-  channels = s->channels;
-  stype = mlib_ImageGetType(src);
-  dtype = mlib_ImageGetType(dst);
-  width = mlib_ImageGetWidth(src);
-  height = mlib_ImageGetHeight(src);
-  sstride = mlib_ImageGetStride(src);
-  dstride = mlib_ImageGetStride(dst);
-  schann = mlib_ImageGetChannels(src);
-
-  if (stype != s->intype || dtype != s->outtype)
-    return MLIB_FAILURE;
-
-  if (channels != schann)
-    return MLIB_FAILURE;
-
-  switch (stype) {
-    case MLIB_BYTE:
-      {
-        mlib_u8 *sdata = mlib_ImageGetData(src);
-
-        switch (dtype) {
-          case MLIB_BYTE:
-            {
-              mlib_u8 *ddata = mlib_ImageGetData(dst);
-
-              switch (channels) {
-                case 3:
-                  {
-                    MAIN_COLORTRUE2INDEX_LOOP(U8, U8, 3);
-                    return MLIB_SUCCESS;
-                  }
-
-                case 4:
-                  {
-                    MAIN_COLORTRUE2INDEX_LOOP(U8, U8, 4);
-                    return MLIB_SUCCESS;
-                  }
-
-                default:
-                  return MLIB_FAILURE;
-              }
-            }
-
-          case MLIB_SHORT:
-            {
-              mlib_s16 *ddata = mlib_ImageGetData(dst);
-
-              dstride /= 2;
-              switch (channels) {
-                case 3:
-                  {
-                    MAIN_COLORTRUE2INDEX_LOOP(U8, S16, 3);
-                    return MLIB_SUCCESS;
-                  }
-
-                case 4:
-                  {
-                    MAIN_COLORTRUE2INDEX_LOOP(U8, S16, 4);
-                    return MLIB_SUCCESS;
-                  }
-
-                default:
-                  return MLIB_FAILURE;
-              }
-            }
-        default:
-          /* Unsupported type of destination image */
-          return MLIB_FAILURE;
-        }
-      }
-
-    case MLIB_SHORT:
-      {
-        mlib_s16 *sdata = mlib_ImageGetData(src);
-
-        sstride /= 2;
-        switch (dtype) {
-          case MLIB_BYTE:
-            {
-              mlib_u8 *ddata = mlib_ImageGetData(dst);
-
-              switch (channels) {
-                case 3:
-                  {
-                    MAIN_COLORTRUE2INDEX_LOOP(S16, U8, 3);
-                    return MLIB_SUCCESS;
-                  }
-
-                case 4:
-                  {
-                    MAIN_COLORTRUE2INDEX_LOOP(S16, U8, 4);
-                    return MLIB_SUCCESS;
-                  }
-
-                default:
-                  return MLIB_FAILURE;
-              }
-            }
-
-          case MLIB_SHORT:
-            {
-              mlib_s16 *ddata = mlib_ImageGetData(dst);
-
-              dstride /= 2;
-              switch (channels) {
-                case 3:
-                  {
-                    MAIN_COLORTRUE2INDEX_LOOP(S16, S16, 3);
-                    return MLIB_SUCCESS;
-                  }
-
-                case 4:
-                  {
-                    MAIN_COLORTRUE2INDEX_LOOP(S16, S16, 4);
-                    return MLIB_SUCCESS;
-                  }
-
-                default:
-                  return MLIB_FAILURE;
-              }
-            }
-        default:
-          /* Unsupported type of destination image */
-          return MLIB_FAILURE;
-        }
-      }
-
-    default:
-      return MLIB_FAILURE;
-  }
-}
-
-/***************************************************************/
-mlib_u32 mlib_search_quadrant_U8_3(struct lut_node_3 *node,
-                                   mlib_u32          distance,
-                                    mlib_s32    *found_color,
-                                   mlib_u32          c0,
-                                   mlib_u32          c1,
-                                   mlib_u32          c2,
-                                   const mlib_u8     **base)
-{
-  mlib_s32 i;
-
-  for (i = 0; i < 8; i++) {
-
-    if (node->tag & (1 << i)) {
-      /* Here is alone color cell. Check the distance */
-      mlib_s32 newindex = node->contents.index[i];
-      mlib_u32 newpalc0, newpalc1, newpalc2;
-      mlib_u32 newdistance;
-
-      newpalc0 = base[0][newindex];
-      newpalc1 = base[1][newindex];
-      newpalc2 = base[2][newindex];
-      newdistance = FIND_DISTANCE_3(c0, newpalc0, c1, newpalc1, c2, newpalc2, 0);
-
-      if (distance > newdistance) {
-        *found_color = newindex;
-        distance = newdistance;
-      }
-    }
-    else if (node->contents.quadrants[i])
-      distance =
-        mlib_search_quadrant_U8_3(node->contents.quadrants[i], distance,
-                                  found_color, c0, c1, c2, base);
-  }
-
-  return distance;
-}
-
-/***************************************************************/
-mlib_u32 mlib_search_quadrant_part_to_left_U8_3(struct lut_node_3 *node,
-                                                mlib_u32          distance,
-                                                 mlib_s32    *found_color,
-                                                const mlib_u32    *c,
-                                                const mlib_u8     **base,
-                                                mlib_u32          position,
-                                                mlib_s32          pass,
-                                                mlib_s32          dir_bit)
-{
-  mlib_u32 current_size = 1 << pass;
-  mlib_s32 i;
-  static mlib_s32 opposite_quadrants[3][4] = {
-    {0, 2, 4, 6},
-    {0, 1, 4, 5},
-    {0, 1, 2, 3}
-  };
-
-/* Search only quadrant's half untill it is necessary to check the
-  whole quadrant */
-
-  if (distance < (position + current_size - c[dir_bit]) * (position + current_size - c[dir_bit])) { /* Search half of quadrant */
-    for (i = 0; i < 4; i++) {
-      mlib_s32 qq = opposite_quadrants[dir_bit][i];
-
-      if (node->tag & (1 << qq)) {
-        /* Here is alone color cell. Check the distance */
-        mlib_s32 newindex = node->contents.index[qq];
-        mlib_u32 newpalc0, newpalc1, newpalc2;
-        mlib_u32 newdistance;
-
-        newpalc0 = base[0][newindex];
-        newpalc1 = base[1][newindex];
-        newpalc2 = base[2][newindex];
-        newdistance = FIND_DISTANCE_3(c[0], newpalc0, c[1], newpalc1, c[2], newpalc2, 0);
-
-        if (distance > newdistance) {
-          *found_color = newindex;
-          distance = newdistance;
-        }
-      }
-      else if (node->contents.quadrants[qq])
-        distance =
-          mlib_search_quadrant_part_to_left_U8_3(node->contents.quadrants[qq],
-                                                 distance, found_color, c, base,
-                                                 position, pass - 1, dir_bit);
-    }
-  }
-  else {                                    /* Search whole quadrant */
-
-    mlib_s32 mask = 1 << dir_bit;
-
-    for (i = 0; i < 8; i++) {
-
-      if (node->tag & (1 << i)) {
-        /* Here is alone color cell. Check the distance */
-        mlib_s32 newindex = node->contents.index[i];
-        mlib_u32 newpalc0, newpalc1, newpalc2;
-        mlib_u32 newdistance;
-
-        newpalc0 = base[0][newindex];
-        newpalc1 = base[1][newindex];
-        newpalc2 = base[2][newindex];
-        newdistance = FIND_DISTANCE_3(c[0], newpalc0, c[1], newpalc1, c[2], newpalc2, 0);
-
-        if (distance > newdistance) {
-          *found_color = newindex;
-          distance = newdistance;
-        }
-      }
-      else if (node->contents.quadrants[i]) {
-
-        if (i & mask)
-          /* This quadrant may require partial checking */
-          distance =
-            mlib_search_quadrant_part_to_left_U8_3(node->contents.quadrants[i],
-                                                   distance, found_color, c,
-                                                   base,
-                                                   position + current_size,
-                                                   pass - 1, dir_bit);
-        else
-          /* Here we should check all */
-          distance =
-            mlib_search_quadrant_U8_3(node->contents.quadrants[i], distance,
-                                      found_color, c[0], c[1], c[2], base);
-      }
-    }
-  }
-
-  return distance;
-}
-
-/***************************************************************/
-mlib_u32 mlib_search_quadrant_part_to_right_U8_3(struct lut_node_3 *node,
-                                                 mlib_u32          distance,
-                                                  mlib_s32    *found_color,
-                                                 const mlib_u32    *c,
-                                                 const mlib_u8     **base,
-                                                 mlib_u32          position,
-                                                 mlib_s32          pass,
-                                                 mlib_s32          dir_bit)
-{
-  mlib_u32 current_size = 1 << pass;
-  mlib_s32 i;
-  static mlib_s32 opposite_quadrants[3][4] = {
-    {1, 3, 5, 7},
-    {2, 3, 6, 7},
-    {4, 5, 6, 7}
-  };
-
-/* Search only quadrant's half untill it is necessary to check the
-  whole quadrant */
-
-  if (distance <= (c[dir_bit] - position - current_size) * (c[dir_bit] - position - current_size)) { /* Search half of quadrant */
-    for (i = 0; i < 4; i++) {
-      mlib_s32 qq = opposite_quadrants[dir_bit][i];
-
-      if (node->tag & (1 << qq)) {
-        /* Here is alone color cell. Check the distance */
-        mlib_s32 newindex = node->contents.index[qq];
-        mlib_u32 newpalc0, newpalc1, newpalc2;
-        mlib_u32 newdistance;
-
-        newpalc0 = base[0][newindex];
-        newpalc1 = base[1][newindex];
-        newpalc2 = base[2][newindex];
-        newdistance = FIND_DISTANCE_3(c[0], newpalc0, c[1], newpalc1, c[2], newpalc2, 0);
-
-        if (distance > newdistance) {
-          *found_color = newindex;
-          distance = newdistance;
-        }
-      }
-      else if (node->contents.quadrants[qq])
-        distance =
-          mlib_search_quadrant_part_to_right_U8_3(node->contents.quadrants[qq],
-                                                  distance, found_color, c,
-                                                  base, position + current_size,
-                                                  pass - 1, dir_bit);
-    }
-  }
-  else {                                    /* Search whole quadrant */
-
-    mlib_s32 mask = 1 << dir_bit;
-
-    for (i = 0; i < 8; i++) {
-
-      if (node->tag & (1 << i)) {
-        /* Here is alone color cell. Check the distance */
-        mlib_s32 newindex = node->contents.index[i];
-        mlib_u32 newpalc0, newpalc1, newpalc2;
-        mlib_u32 newdistance;
-
-        newpalc0 = base[0][newindex];
-        newpalc1 = base[1][newindex];
-        newpalc2 = base[2][newindex];
-        newdistance = FIND_DISTANCE_3(c[0], newpalc0, c[1], newpalc1, c[2], newpalc2, 0);
-
-        if (distance > newdistance) {
-          *found_color = newindex;
-          distance = newdistance;
-        }
-      }
-      else if (node->contents.quadrants[i]) {
-
-        if (i & mask)
-          /* Here we should check all */
-          distance =
-            mlib_search_quadrant_U8_3(node->contents.quadrants[i], distance,
-                                      found_color, c[0], c[1], c[2], base);
-        else
-          /* This quadrant may require partial checking */
-          distance =
-            mlib_search_quadrant_part_to_right_U8_3(node->contents.quadrants[i],
-                                                    distance, found_color, c,
-                                                    base, position, pass - 1, dir_bit);
-      }
-    }
-  }
-
-  return distance;
-}
-
-/***************************************************************/
-mlib_u32 mlib_search_quadrant_S16_3(struct lut_node_3 *node,
-                                    mlib_u32          distance,
-                                     mlib_s32    *found_color,
-                                    mlib_u32          c0,
-                                    mlib_u32          c1,
-                                    mlib_u32          c2,
-                                    const mlib_s16    **base)
-{
-  mlib_s32 i;
-
-  for (i = 0; i < 8; i++) {
-
-    if (node->tag & (1 << i)) {
-      /* Here is alone color cell. Check the distance */
-      mlib_s32 newindex = node->contents.index[i];
-      mlib_u32 newpalc0, newpalc1, newpalc2;
-      mlib_u32 newdistance;
-
-      newpalc0 = base[0][newindex] - MLIB_S16_MIN;
-      newpalc1 = base[1][newindex] - MLIB_S16_MIN;
-      newpalc2 = base[2][newindex] - MLIB_S16_MIN;
-      newdistance = FIND_DISTANCE_3(c0, newpalc0, c1, newpalc1, c2, newpalc2, 2);
-
-      if (distance > newdistance) {
-        *found_color = newindex;
-        distance = newdistance;
-      }
-    }
-    else if (node->contents.quadrants[i])
-      distance =
-        mlib_search_quadrant_S16_3(node->contents.quadrants[i], distance,
-                                   found_color, c0, c1, c2, base);
-  }
-
-  return distance;
-}
-
-/***************************************************************/
-mlib_u32 mlib_search_quadrant_part_to_left_S16_3(struct lut_node_3 *node,
-                                                 mlib_u32          distance,
-                                                  mlib_s32    *found_color,
-                                                 const mlib_u32    *c,
-                                                 const mlib_s16    **base,
-                                                 mlib_u32          position,
-                                                 mlib_s32          pass,
-                                                 mlib_s32          dir_bit)
-{
-  mlib_u32 current_size = 1 << pass;
-  mlib_s32 i;
-  static mlib_s32 opposite_quadrants[3][4] = {
-    {0, 2, 4, 6},
-    {0, 1, 4, 5},
-    {0, 1, 2, 3}
-  };
-
-/* Search only quadrant's half untill it is necessary to check the
-  whole quadrant */
-
-  if (distance < (((position + current_size - c[dir_bit]) * (position + current_size - c[dir_bit])) >> 2)) { /* Search half of quadrant */
-    for (i = 0; i < 4; i++) {
-      mlib_s32 qq = opposite_quadrants[dir_bit][i];
-
-      if (node->tag & (1 << qq)) {
-        /* Here is alone color cell. Check the distance */
-        mlib_s32 newindex = node->contents.index[qq];
-        mlib_u32 newpalc0, newpalc1, newpalc2;
-        mlib_u32 newdistance;
-
-        newpalc0 = base[0][newindex] - MLIB_S16_MIN;
-        newpalc1 = base[1][newindex] - MLIB_S16_MIN;
-        newpalc2 = base[2][newindex] - MLIB_S16_MIN;
-        newdistance = FIND_DISTANCE_3(c[0], newpalc0, c[1], newpalc1, c[2], newpalc2, 2);
-
-        if (distance > newdistance) {
-          *found_color = newindex;
-          distance = newdistance;
-        }
-      }
-      else if (node->contents.quadrants[qq])
-        distance =
-          mlib_search_quadrant_part_to_left_S16_3(node->contents.quadrants[qq],
-                                                  distance, found_color, c,
-                                                  base, position, pass - 1, dir_bit);
-    }
-  }
-  else {                                    /* Search whole quadrant */
-
-    mlib_s32 mask = 1 << dir_bit;
-
-    for (i = 0; i < 8; i++) {
-
-      if (node->tag & (1 << i)) {
-        /* Here is alone color cell. Check the distance */
-        mlib_s32 newindex = node->contents.index[i];
-        mlib_u32 newpalc0, newpalc1, newpalc2;
-        mlib_u32 newdistance;
-
-        newpalc0 = base[0][newindex] - MLIB_S16_MIN;
-        newpalc1 = base[1][newindex] - MLIB_S16_MIN;
-        newpalc2 = base[2][newindex] - MLIB_S16_MIN;
-        newdistance = FIND_DISTANCE_3(c[0], newpalc0, c[1], newpalc1, c[2], newpalc2, 2);
-
-        if (distance > newdistance) {
-          *found_color = newindex;
-          distance = newdistance;
-        }
-      }
-      else if (node->contents.quadrants[i]) {
-
-        if (i & mask)
-          /* This quadrant may require partial checking */
-          distance =
-            mlib_search_quadrant_part_to_left_S16_3(node->contents.quadrants[i],
-                                                    distance, found_color, c,
-                                                    base,
-                                                    position + current_size,
-                                                    pass - 1, dir_bit);
-        else
-          /* Here we should check all */
-          distance =
-            mlib_search_quadrant_S16_3(node->contents.quadrants[i], distance,
-                                       found_color, c[0], c[1], c[2], base);
-      }
-    }
-  }
-
-  return distance;
-}
-
-/***************************************************************/
-mlib_u32 mlib_search_quadrant_part_to_right_S16_3(struct lut_node_3 *node,
-                                                  mlib_u32          distance,
-                                                   mlib_s32    *found_color,
-                                                  const mlib_u32    *c,
-                                                  const mlib_s16    **base,
-                                                  mlib_u32          position,
-                                                  mlib_s32          pass,
-                                                  mlib_s32          dir_bit)
-{
-  mlib_u32 current_size = 1 << pass;
-  mlib_s32 i;
-  static mlib_s32 opposite_quadrants[3][4] = {
-    {1, 3, 5, 7},
-    {2, 3, 6, 7},
-    {4, 5, 6, 7}
-  };
-
-/* Search only quadrant's half untill it is necessary to check the
-  whole quadrant */
-
-  if (distance <= (((c[dir_bit] - position - current_size) * (c[dir_bit] - position - current_size)) >> 2)) { /* Search half of quadrant */
-    for (i = 0; i < 4; i++) {
-      mlib_s32 qq = opposite_quadrants[dir_bit][i];
-
-      if (node->tag & (1 << qq)) {
-        /* Here is alone color cell. Check the distance */
-        mlib_s32 newindex = node->contents.index[qq];
-        mlib_u32 newpalc0, newpalc1, newpalc2;
-        mlib_u32 newdistance;
-
-        newpalc0 = base[0][newindex] - MLIB_S16_MIN;
-        newpalc1 = base[1][newindex] - MLIB_S16_MIN;
-        newpalc2 = base[2][newindex] - MLIB_S16_MIN;
-        newdistance = FIND_DISTANCE_3(c[0], newpalc0, c[1], newpalc1, c[2], newpalc2, 2);
-
-        if (distance > newdistance) {
-          *found_color = newindex;
-          distance = newdistance;
-        }
-      }
-      else if (node->contents.quadrants[qq])
-        distance =
-          mlib_search_quadrant_part_to_right_S16_3(node->contents.quadrants[qq],
-                                                   distance, found_color, c,
-                                                   base,
-                                                   position + current_size,
-                                                   pass - 1, dir_bit);
-    }
-  }
-  else {                                    /* Search whole quadrant */
-
-    mlib_s32 mask = 1 << dir_bit;
-
-    for (i = 0; i < 8; i++) {
-
-      if (node->tag & (1 << i)) {
-        /* Here is alone color cell. Check the distance */
-        mlib_s32 newindex = node->contents.index[i];
-        mlib_u32 newpalc0, newpalc1, newpalc2;
-        mlib_u32 newdistance;
-
-        newpalc0 = base[0][newindex] - MLIB_S16_MIN;
-        newpalc1 = base[1][newindex] - MLIB_S16_MIN;
-        newpalc2 = base[2][newindex] - MLIB_S16_MIN;
-        newdistance = FIND_DISTANCE_3(c[0], newpalc0, c[1], newpalc1, c[2], newpalc2, 2);
-
-        if (distance > newdistance) {
-          *found_color = newindex;
-          distance = newdistance;
-        }
-      }
-      else if (node->contents.quadrants[i]) {
-
-        if (i & mask)
-          /* Here we should check all */
-          distance =
-            mlib_search_quadrant_S16_3(node->contents.quadrants[i], distance,
-                                       found_color, c[0], c[1], c[2], base);
-        else
-          /* This quadrant may require partial checking */
-          distance =
-            mlib_search_quadrant_part_to_right_S16_3(node->contents.
-                                                     quadrants[i], distance,
-                                                     found_color, c, base,
-                                                     position, pass - 1, dir_bit);
-      }
-    }
-  }
-
-  return distance;
-}
-
-/***************************************************************/
-mlib_u32 mlib_search_quadrant_U8_4(struct lut_node_4 *node,
-                                   mlib_u32          distance,
-                                    mlib_s32    *found_color,
-                                   mlib_u32          c0,
-                                   mlib_u32          c1,
-                                   mlib_u32          c2,
-                                   mlib_u32          c3,
-                                   const mlib_u8     **base)
-{
-  mlib_s32 i;
-
-  for (i = 0; i < 16; i++) {
-
-    if (node->tag & (1 << i)) {
-      /* Here is alone color cell. Check the distance */
-      mlib_s32 newindex = node->contents.index[i];
-      mlib_u32 newpalc0, newpalc1, newpalc2, newpalc3;
-      mlib_u32 newdistance;
-
-      newpalc0 = base[0][newindex];
-      newpalc1 = base[1][newindex];
-      newpalc2 = base[2][newindex];
-      newpalc3 = base[3][newindex];
-      newdistance = FIND_DISTANCE_4(c0, newpalc0,
-                                    c1, newpalc1, c2, newpalc2, c3, newpalc3, 0);
-
-      if (distance > newdistance) {
-        *found_color = newindex;
-        distance = newdistance;
-      }
-    }
-    else if (node->contents.quadrants[i])
-      distance =
-        mlib_search_quadrant_U8_4(node->contents.quadrants[i], distance,
-                                  found_color, c0, c1, c2, c3, base);
-  }
-
-  return distance;
-}
-
-/***************************************************************/
-mlib_u32 mlib_search_quadrant_part_to_left_U8_4(struct lut_node_4 *node,
-                                                mlib_u32          distance,
-                                                 mlib_s32    *found_color,
-                                                const mlib_u32    *c,
-                                                const mlib_u8     **base,
-                                                mlib_u32          position,
-                                                mlib_s32          pass,
-                                                mlib_s32          dir_bit)
-{
-  mlib_u32 current_size = 1 << pass;
-  mlib_s32 i;
-  static mlib_s32 opposite_quadrants[4][8] = {
-    {0, 2, 4, 6, 8, 10, 12, 14},
-    {0, 1, 4, 5, 8, 9, 12, 13},
-    {0, 1, 2, 3, 8, 9, 10, 11},
-    {0, 1, 2, 3, 4, 5, 6, 7}
-  };
-
-/* Search only quadrant's half untill it is necessary to check the
-  whole quadrant */
-
-  if (distance < (position + current_size - c[dir_bit]) * (position + current_size - c[dir_bit])) { /* Search half of quadrant */
-    for (i = 0; i < 8; i++) {
-      mlib_s32 qq = opposite_quadrants[dir_bit][i];
-
-      if (node->tag & (1 << qq)) {
-        /* Here is alone color cell. Check the distance */
-        mlib_s32 newindex = node->contents.index[qq];
-        mlib_u32 newpalc0, newpalc1, newpalc2, newpalc3;
-        mlib_u32 newdistance;
-
-        newpalc0 = base[0][newindex];
-        newpalc1 = base[1][newindex];
-        newpalc2 = base[2][newindex];
-        newpalc3 = base[3][newindex];
-        newdistance = FIND_DISTANCE_4(c[0], newpalc0,
-                                      c[1], newpalc1, c[2], newpalc2, c[3], newpalc3, 0);
-
-        if (distance > newdistance) {
-          *found_color = newindex;
-          distance = newdistance;
-        }
-      }
-      else if (node->contents.quadrants[qq])
-        distance =
-          mlib_search_quadrant_part_to_left_U8_4(node->contents.quadrants[qq],
-                                                 distance, found_color, c, base,
-                                                 position, pass - 1, dir_bit);
-    }
-  }
-  else {                                    /* Search whole quadrant */
-
-    mlib_s32 mask = 1 << dir_bit;
-
-    for (i = 0; i < 16; i++) {
-
-      if (node->tag & (1 << i)) {
-        /* Here is alone color cell. Check the distance */
-        mlib_s32 newindex = node->contents.index[i];
-        mlib_u32 newpalc0, newpalc1, newpalc2, newpalc3;
-        mlib_u32 newdistance;
-
-        newpalc0 = base[0][newindex];
-        newpalc1 = base[1][newindex];
-        newpalc2 = base[2][newindex];
-        newpalc3 = base[3][newindex];
-        newdistance = FIND_DISTANCE_4(c[0], newpalc0,
-                                      c[1], newpalc1, c[2], newpalc2, c[3], newpalc3, 0);
-
-        if (distance > newdistance) {
-          *found_color = newindex;
-          distance = newdistance;
-        }
-      }
-      else if (node->contents.quadrants[i]) {
-
-        if (i & mask)
-          /* This quadrant may require partial checking */
-          distance =
-            mlib_search_quadrant_part_to_left_U8_4(node->contents.quadrants[i],
-                                                   distance, found_color, c,
-                                                   base,
-                                                   position + current_size,
-                                                   pass - 1, dir_bit);
-        else
-          /* Here we should check all */
-          distance =
-            mlib_search_quadrant_U8_4(node->contents.quadrants[i], distance,
-                                      found_color, c[0], c[1], c[2], c[3], base);
-      }
-    }
-  }
-
-  return distance;
-}
-
-/***************************************************************/
-mlib_u32 mlib_search_quadrant_part_to_right_U8_4(struct lut_node_4 *node,
-                                                 mlib_u32          distance,
-                                                  mlib_s32    *found_color,
-                                                 const mlib_u32    *c,
-                                                 const mlib_u8     **base,
-                                                 mlib_u32          position,
-                                                 mlib_s32          pass,
-                                                 mlib_s32          dir_bit)
-{
-  mlib_u32 current_size = 1 << pass;
-  mlib_s32 i;
-  static mlib_s32 opposite_quadrants[4][8] = {
-    {1, 3, 5, 7, 9, 11, 13, 15},
-    {2, 3, 6, 7, 10, 11, 14, 15},
-    {4, 5, 6, 7, 12, 13, 14, 15},
-    {8, 9, 10, 11, 12, 13, 14, 15}
-  };
-
-/* Search only quadrant's half untill it is necessary to check the
-  whole quadrant */
-
-  if (distance <= (c[dir_bit] - position - current_size) * (c[dir_bit] - position - current_size)) { /* Search half of quadrant */
-    for (i = 0; i < 8; i++) {
-      mlib_s32 qq = opposite_quadrants[dir_bit][i];
-
-      if (node->tag & (1 << qq)) {
-        /* Here is alone color cell. Check the distance */
-        mlib_s32 newindex = node->contents.index[qq];
-        mlib_u32 newpalc0, newpalc1, newpalc2, newpalc3;
-        mlib_u32 newdistance;
-
-        newpalc0 = base[0][newindex];
-        newpalc1 = base[1][newindex];
-        newpalc2 = base[2][newindex];
-        newpalc3 = base[3][newindex];
-        newdistance = FIND_DISTANCE_4(c[0], newpalc0,
-                                      c[1], newpalc1, c[2], newpalc2, c[3], newpalc3, 0);
-
-        if (distance > newdistance) {
-          *found_color = newindex;
-          distance = newdistance;
-        }
-      }
-      else if (node->contents.quadrants[qq])
-        distance =
-          mlib_search_quadrant_part_to_right_U8_4(node->contents.quadrants[qq],
-                                                  distance, found_color, c,
-                                                  base, position + current_size,
-                                                  pass - 1, dir_bit);
-    }
-  }
-  else {                                    /* Search whole quadrant */
-
-    mlib_s32 mask = 1 << dir_bit;
-
-    for (i = 0; i < 16; i++) {
-
-      if (node->tag & (1 << i)) {
-        /* Here is alone color cell. Check the distance */
-        mlib_s32 newindex = node->contents.index[i];
-        mlib_u32 newpalc0, newpalc1, newpalc2, newpalc3;
-        mlib_u32 newdistance;
-
-        newpalc0 = base[0][newindex];
-        newpalc1 = base[1][newindex];
-        newpalc2 = base[2][newindex];
-        newpalc3 = base[3][newindex];
-        newdistance = FIND_DISTANCE_4(c[0], newpalc0,
-                                      c[1], newpalc1, c[2], newpalc2, c[3], newpalc3, 0);
-
-        if (distance > newdistance) {
-          *found_color = newindex;
-          distance = newdistance;
-        }
-      }
-      else if (node->contents.quadrants[i]) {
-
-        if (i & mask)
-          /* Here we should check all */
-          distance =
-            mlib_search_quadrant_U8_4(node->contents.quadrants[i], distance,
-                                      found_color, c[0], c[1], c[2], c[3], base);
-        else
-          /* This quadrant may require partial checking */
-          distance =
-            mlib_search_quadrant_part_to_right_U8_4(node->contents.quadrants[i],
-                                                    distance, found_color, c,
-                                                    base, position, pass - 1, dir_bit);
-      }
-    }
-  }
-
-  return distance;
-}
-
-/***************************************************************/
-mlib_u32 mlib_search_quadrant_S16_4(struct lut_node_4 *node,
-                                    mlib_u32          distance,
-                                     mlib_s32    *found_color,
-                                    mlib_u32          c0,
-                                    mlib_u32          c1,
-                                    mlib_u32          c2,
-                                    mlib_u32          c3,
-                                    const mlib_s16    **base)
-{
-  mlib_s32 i;
-
-  for (i = 0; i < 16; i++) {
-
-    if (node->tag & (1 << i)) {
-      /* Here is alone color cell. Check the distance */
-      mlib_s32 newindex = node->contents.index[i];
-      mlib_u32 newpalc0, newpalc1, newpalc2, newpalc3;
-      mlib_u32 newdistance;
-
-      newpalc0 = base[0][newindex] - MLIB_S16_MIN;
-      newpalc1 = base[1][newindex] - MLIB_S16_MIN;
-      newpalc2 = base[2][newindex] - MLIB_S16_MIN;
-      newpalc3 = base[3][newindex] - MLIB_S16_MIN;
-      newdistance = FIND_DISTANCE_4(c0, newpalc0,
-                                    c1, newpalc1, c2, newpalc2, c3, newpalc3, 2);
-
-      if (distance > newdistance) {
-        *found_color = newindex;
-        distance = newdistance;
-      }
-    }
-    else if (node->contents.quadrants[i])
-      distance =
-        mlib_search_quadrant_S16_4(node->contents.quadrants[i], distance,
-                                   found_color, c0, c1, c2, c3, base);
-  }
-
-  return distance;
-}
-
-/***************************************************************/
-mlib_u32 mlib_search_quadrant_part_to_left_S16_4(struct lut_node_4 *node,
-                                                 mlib_u32          distance,
-                                                  mlib_s32    *found_color,
-                                                 const mlib_u32    *c,
-                                                 const mlib_s16    **base,
-                                                 mlib_u32          position,
-                                                 mlib_s32          pass,
-                                                 mlib_s32          dir_bit)
-{
-  mlib_u32 current_size = 1 << pass;
-  mlib_s32 i;
-  static mlib_s32 opposite_quadrants[4][8] = {
-    {0, 2, 4, 6, 8, 10, 12, 14},
-    {0, 1, 4, 5, 8, 9, 12, 13},
-    {0, 1, 2, 3, 8, 9, 10, 11},
-    {0, 1, 2, 3, 4, 5, 6, 7}
-  };
-
-/* Search only quadrant's half untill it is necessary to check the
-  whole quadrant */
-
-  if (distance < (((position + current_size - c[dir_bit]) * (position + current_size - c[dir_bit])) >> 2)) { /* Search half of quadrant */
-    for (i = 0; i < 8; i++) {
-      mlib_s32 qq = opposite_quadrants[dir_bit][i];
-
-      if (node->tag & (1 << qq)) {
-        /* Here is alone color cell. Check the distance */
-        mlib_s32 newindex = node->contents.index[qq];
-        mlib_u32 newpalc0, newpalc1, newpalc2, newpalc3;
-        mlib_u32 newdistance;
-
-        newpalc0 = base[0][newindex] - MLIB_S16_MIN;
-        newpalc1 = base[1][newindex] - MLIB_S16_MIN;
-        newpalc2 = base[2][newindex] - MLIB_S16_MIN;
-        newpalc3 = base[3][newindex] - MLIB_S16_MIN;
-        newdistance = FIND_DISTANCE_4(c[0], newpalc0,
-                                      c[1], newpalc1, c[2], newpalc2, c[3], newpalc3, 2);
-
-        if (distance > newdistance) {
-          *found_color = newindex;
-          distance = newdistance;
-        }
-      }
-      else if (node->contents.quadrants[qq])
-        distance =
-          mlib_search_quadrant_part_to_left_S16_4(node->contents.quadrants[qq],
-                                                  distance, found_color, c,
-                                                  base, position, pass - 1, dir_bit);
-    }
-  }
-  else {                                    /* Search whole quadrant */
-
-    mlib_s32 mask = 1 << dir_bit;
-
-    for (i = 0; i < 16; i++) {
-
-      if (node->tag & (1 << i)) {
-        /* Here is alone color cell. Check the distance */
-        mlib_s32 newindex = node->contents.index[i];
-        mlib_u32 newpalc0, newpalc1, newpalc2, newpalc3;
-        mlib_u32 newdistance;
-
-        newpalc0 = base[0][newindex] - MLIB_S16_MIN;
-        newpalc1 = base[1][newindex] - MLIB_S16_MIN;
-        newpalc2 = base[2][newindex] - MLIB_S16_MIN;
-        newpalc3 = base[3][newindex] - MLIB_S16_MIN;
-        newdistance = FIND_DISTANCE_4(c[0], newpalc0,
-                                      c[1], newpalc1, c[2], newpalc2, c[3], newpalc3, 2);
-
-        if (distance > newdistance) {
-          *found_color = newindex;
-          distance = newdistance;
-        }
-      }
-      else if (node->contents.quadrants[i]) {
-
-        if (i & mask)
-          /* This quadrant may require partial checking */
-          distance =
-            mlib_search_quadrant_part_to_left_S16_4(node->contents.quadrants[i],
-                                                    distance, found_color, c,
-                                                    base,
-                                                    position + current_size,
-                                                    pass - 1, dir_bit);
-        else
-          /* Here we should check all */
-          distance =
-            mlib_search_quadrant_S16_4(node->contents.quadrants[i], distance,
-                                       found_color, c[0], c[1], c[2], c[3], base);
-      }
-    }
-  }
-
-  return distance;
-}
-
-/***************************************************************/
-mlib_u32 mlib_search_quadrant_part_to_right_S16_4(struct lut_node_4 *node,
-                                                  mlib_u32          distance,
-                                                   mlib_s32    *found_color,
-                                                  const mlib_u32    *c,
-                                                  const mlib_s16    **base,
-                                                  mlib_u32          position,
-                                                  mlib_s32          pass,
-                                                  mlib_s32          dir_bit)
-{
-  mlib_u32 current_size = 1 << pass;
-  mlib_s32 i;
-  static mlib_s32 opposite_quadrants[4][8] = {
-    {1, 3, 5, 7, 9, 11, 13, 15},
-    {2, 3, 6, 7, 10, 11, 14, 15},
-    {4, 5, 6, 7, 12, 13, 14, 15},
-    {8, 9, 10, 11, 12, 13, 14, 15}
-  };
-
-/* Search only quadrant's half untill it is necessary to check the
-  whole quadrant */
-
-  if (distance <= (((c[dir_bit] - position - current_size) * (c[dir_bit] - position - current_size)) >> 2)) { /* Search half of quadrant */
-    for (i = 0; i < 8; i++) {
-      mlib_s32 qq = opposite_quadrants[dir_bit][i];
-
-      if (node->tag & (1 << qq)) {
-        /* Here is alone color cell. Check the distance */
-        mlib_s32 newindex = node->contents.index[qq];
-        mlib_u32 newpalc0, newpalc1, newpalc2, newpalc3;
-        mlib_u32 newdistance;
-
-        newpalc0 = base[0][newindex] - MLIB_S16_MIN;
-        newpalc1 = base[1][newindex] - MLIB_S16_MIN;
-        newpalc2 = base[2][newindex] - MLIB_S16_MIN;
-        newpalc3 = base[3][newindex] - MLIB_S16_MIN;
-        newdistance = FIND_DISTANCE_4(c[0], newpalc0,
-                                      c[1], newpalc1, c[2], newpalc2, c[3], newpalc3, 2);
-
-        if (distance > newdistance) {
-          *found_color = newindex;
-          distance = newdistance;
-        }
-      }
-      else if (node->contents.quadrants[qq])
-        distance =
-          mlib_search_quadrant_part_to_right_S16_4(node->contents.quadrants[qq],
-                                                   distance, found_color, c,
-                                                   base,
-                                                   position + current_size,
-                                                   pass - 1, dir_bit);
-    }
-  }
-  else {                                    /* Search whole quadrant */
-
-    mlib_s32 mask = 1 << dir_bit;
-
-    for (i = 0; i < 16; i++) {
-
-      if (node->tag & (1 << i)) {
-        /* Here is alone color cell. Check the distance */
-        mlib_s32 newindex = node->contents.index[i];
-        mlib_u32 newpalc0, newpalc1, newpalc2, newpalc3;
-        mlib_u32 newdistance;
-
-        newpalc0 = base[0][newindex] - MLIB_S16_MIN;
-        newpalc1 = base[1][newindex] - MLIB_S16_MIN;
-        newpalc2 = base[2][newindex] - MLIB_S16_MIN;
-        newpalc3 = base[3][newindex] - MLIB_S16_MIN;
-        newdistance = FIND_DISTANCE_4(c[0], newpalc0,
-                                      c[1], newpalc1, c[2], newpalc2, c[3], newpalc3, 2);
-
-        if (distance > newdistance) {
-          *found_color = newindex;
-          distance = newdistance;
-        }
-      }
-      else if (node->contents.quadrants[i]) {
-
-        if (i & mask)
-          /* Here we should check all */
-          distance =
-            mlib_search_quadrant_S16_4(node->contents.quadrants[i], distance,
-                                       found_color, c[0], c[1], c[2], c[3], base);
-        else
-          /* This quadrant may require partial checking */
-          distance =
-            mlib_search_quadrant_part_to_right_S16_4(node->contents.
-                                                     quadrants[i], distance,
-                                                     found_color, c, base,
-                                                     position, pass - 1, dir_bit);
-      }
-    }
-  }
-
-  return distance;
-}
-
-/***************************************************************/
-
-#define TAB_SIZE_mlib_u8   256
-#define TAB_SIZE_mlib_s16 1024
-
-#define SRC_mlib_u8(i)    src[i]
-#define SRC_mlib_s16(i)   (((mlib_u16*)src)[i] >> 6)
-
-/***************************************************************/
-
-#define DIMENSIONS_SEARCH_3(STYPE, DTYPE, STEP)                 \
-{                                                               \
-  DTYPE  *tab0 = ((mlib_colormap *)state)->table;               \
-  DTYPE  *tab1 = tab0 + TAB_SIZE_##STYPE;                       \
-  DTYPE  *tab2 = tab1 + TAB_SIZE_##STYPE;                       \
-  mlib_s32 i;                                                   \
-                                                                \
-  for (i = 0; i < length; i++) {                                \
-    dst[i] = tab0[SRC_##STYPE(0)] + tab1[SRC_##STYPE(1)] +      \
-             tab2[SRC_##STYPE(2)];                              \
-    src += STEP;                                                \
-  }                                                             \
-}
-
-/***************************************************************/
-
-#define DIMENSIONS_SEARCH_4(STYPE, DTYPE)                       \
-{                                                               \
-  DTYPE  *tab0 = ((mlib_colormap *)state)->table;               \
-  DTYPE  *tab1 = tab0 + TAB_SIZE_##STYPE;                       \
-  DTYPE  *tab2 = tab1 + TAB_SIZE_##STYPE;                       \
-  DTYPE  *tab3 = tab2 + TAB_SIZE_##STYPE;                       \
-  mlib_s32 i;                                                   \
-                                                                \
-  for (i = 0; i < length; i++) {                                \
-    dst[i] = tab0[SRC_##STYPE(0)] + tab1[SRC_##STYPE(1)] +      \
-             tab2[SRC_##STYPE(2)] + tab3[SRC_##STYPE(3)];       \
-    src += 4;                                                   \
-  }                                                             \
-}
-
-/***************************************************************/
-void mlib_ImageColorTrue2IndexLine_U8_U8_3(const mlib_u8 *src,
-                                           mlib_u8       *dst,
-                                           mlib_s32      length,
-                                           const void    *state)
-{
-  mlib_colormap *s = (mlib_colormap *)state;
-
-  switch (s->method) {
-#if LUT_BYTE_COLORS_3CHANNELS <= 256
-    case LUT_BINARY_TREE_SEARCH:
-      {
-        mlib_s32 bits = s->bits;
-        BINARY_TREE_SEARCH_3(U8, mlib_u8, 8, (MLIB_U8_MAX + 1), 0, 0, 3, 0);
-      }
-      break;
-
-#endif /* LUT_BYTE_COLORS_3CHANNELS <= 256 */
-    case LUT_COLOR_CUBE_SEARCH:
-      {
-        COLOR_CUBE_U8_3_SEARCH(mlib_u8, 0, 3);
-      }
-      break;
-
-    case LUT_STUPID_SEARCH:
-      {
-#ifdef USE_VIS_CODE
-        FIND_NEAREST_U8_3;
-#else
-        FIND_NEAREST_U8_3_C(0, 3);
-#endif
-      }
-      break;
-
-    case LUT_COLOR_DIMENSIONS:
-      DIMENSIONS_SEARCH_3(mlib_u8, mlib_u8, 3)
-      break;
-  }
-}
-
-/***************************************************************/
-void mlib_ImageColorTrue2IndexLine_U8_U8_3_in_4(const mlib_u8 *src,
-                                                mlib_u8       *dst,
-                                                mlib_s32      length,
-                                                const void    *state)
-{
-  mlib_colormap *s = (mlib_colormap *)state;
-
-  switch (s->method) {
-#if LUT_BYTE_COLORS_3CHANNELS <= 256
-    case LUT_BINARY_TREE_SEARCH:
-      {
-        mlib_s32 bits = s->bits;
-        BINARY_TREE_SEARCH_3(U8, mlib_u8, 8, (MLIB_U8_MAX + 1), 0, 1, 4, 0);
-        break;
-      }
-
-#endif /* LUT_BYTE_COLORS_3CHANNELS <= 256 */
-    case LUT_COLOR_CUBE_SEARCH:
-      {
-        COLOR_CUBE_U8_3_SEARCH(mlib_u8, 1, 4);
-        break;
-      }
-
-    case LUT_STUPID_SEARCH:
-      {
-#ifdef USE_VIS_CODE
-        FIND_NEAREST_U8_3_IN4;
-#else
-        FIND_NEAREST_U8_3_C(1, 4);
-#endif
-        break;
-      }
-
-    case LUT_COLOR_DIMENSIONS:
-      src++;
-      DIMENSIONS_SEARCH_3(mlib_u8, mlib_u8, 4)
-      break;
-  }
-}
-
-/***************************************************************/
-void mlib_ImageColorTrue2IndexLine_U8_U8_4(const mlib_u8 *src,
-                                           mlib_u8       *dst,
-                                           mlib_s32      length,
-                                           const void    *state)
-{
-  mlib_colormap *s = (mlib_colormap *)state;
-
-  switch (s->method) {
-#if LUT_BYTE_COLORS_4CHANNELS <= 256
-    case LUT_BINARY_TREE_SEARCH:
-      {
-        mlib_s32 bits = s->bits;
-        BINARY_TREE_SEARCH_4(U8, mlib_u8, 8, (MLIB_U8_MAX + 1), 0, 0);
-        break;
-      }
-
-#endif /* LUT_BYTE_COLORS_4CHANNELS <= 256 */
-    case LUT_COLOR_CUBE_SEARCH:
-      {
-        COLOR_CUBE_U8_4_SEARCH(mlib_u8);
-        break;
-      }
-
-    case LUT_STUPID_SEARCH:
-      {
-#ifdef USE_VIS_CODE
-        FIND_NEAREST_U8_4;
-#else
-        FIND_NEAREST_U8_4_C;
-#endif
-        break;
-      }
-
-    case LUT_COLOR_DIMENSIONS:
-      DIMENSIONS_SEARCH_4(mlib_u8, mlib_u8)
-      break;
-  }
-}
-
-/***************************************************************/
-void mlib_ImageColorTrue2IndexLine_U8_S16_3(const mlib_u8 *src,
-                                            mlib_s16      *dst,
-                                            mlib_s32      length,
-                                            const void    *state)
-{
-  mlib_colormap *s = (mlib_colormap *)state;
-  mlib_s32 bits = s->bits;
-
-  switch (s->method) {
-    case LUT_BINARY_TREE_SEARCH:
-      {
-        BINARY_TREE_SEARCH_3(U8, mlib_u8, 8, (MLIB_U8_MAX + 1), 0, 0, 3, 0);
-        break;
-      }
-
-    case LUT_COLOR_CUBE_SEARCH:
-      {
-        switch (s->indexsize) {
-          case 1:
-            {
-              COLOR_CUBE_U8_3_SEARCH(mlib_u8, 0, 3);
-              break;
-            }
-
-          case 2:
-            {
-              COLOR_CUBE_U8_3_SEARCH(mlib_s16, 0, 3);
-              break;
-            }
-        }
-
-        break;
-      }
-
-    case LUT_STUPID_SEARCH:
-      {
-#ifdef USE_VIS_CODE
-        FIND_NEAREST_U8_3;
-#else
-        FIND_NEAREST_U8_3_C(0, 3);
-#endif
-        break;
-      }
-
-    case LUT_COLOR_DIMENSIONS:
-      DIMENSIONS_SEARCH_3(mlib_u8, mlib_s16, 3)
-      break;
-  }
-}
-
-/***************************************************************/
-void mlib_ImageColorTrue2IndexLine_U8_S16_3_in_4(const mlib_u8 *src,
-                                                 mlib_s16      *dst,
-                                                 mlib_s32      length,
-                                                 const void    *state)
-{
-  mlib_colormap *s = (mlib_colormap *)state;
-  mlib_s32 bits = s->bits;
-
-  switch (s->method) {
-    case LUT_BINARY_TREE_SEARCH:
-      {
-        BINARY_TREE_SEARCH_3(U8, mlib_u8, 8, (MLIB_U8_MAX + 1), 0, 1, 4, 0);
-        break;
-      }
-
-    case LUT_COLOR_CUBE_SEARCH:
-      {
-        switch (s->indexsize) {
-          case 1:
-            {
-              COLOR_CUBE_U8_3_SEARCH(mlib_u8, 1, 4);
-              break;
-            }
-
-          case 2:
-            {
-              COLOR_CUBE_U8_3_SEARCH(mlib_s16, 1, 4);
-              break;
-            }
-        }
-
-        break;
-      }
-
-    case LUT_STUPID_SEARCH:
-      {
-#ifdef USE_VIS_CODE
-        FIND_NEAREST_U8_3_IN4;
-#else
-        FIND_NEAREST_U8_3_C(1, 4);
-#endif
-        break;
-      }
-
-    case LUT_COLOR_DIMENSIONS:
-      src++;
-      DIMENSIONS_SEARCH_3(mlib_u8, mlib_s16, 4)
-      break;
-  }
-}
-
-/***************************************************************/
-void mlib_ImageColorTrue2IndexLine_U8_S16_4(const mlib_u8 *src,
-                                            mlib_s16      *dst,
-                                            mlib_s32      length,
-                                            const void    *state)
-{
-  mlib_colormap *s = (mlib_colormap *)state;
-  mlib_s32 bits = s->bits;
-
-  switch (s->method) {
-    case LUT_BINARY_TREE_SEARCH:
-      {
-        BINARY_TREE_SEARCH_4(U8, mlib_u8, 8, (MLIB_U8_MAX + 1), 0, 0);
-        break;
-      }
-
-    case LUT_COLOR_CUBE_SEARCH:
-      {
-        switch (s->indexsize) {
-          case 1:
-            {
-              COLOR_CUBE_U8_4_SEARCH(mlib_u8);
-              break;
-            }
-
-          case 2:
-            {
-              COLOR_CUBE_U8_4_SEARCH(mlib_s16);
-              break;
-            }
-        }
-
-        break;
-      }
-
-    case LUT_STUPID_SEARCH:
-      {
-#ifdef USE_VIS_CODE
-        FIND_NEAREST_U8_4;
-#else
-        FIND_NEAREST_U8_4_C;
-#endif
-        break;
-      }
-
-    case LUT_COLOR_DIMENSIONS:
-      DIMENSIONS_SEARCH_4(mlib_u8, mlib_s16)
-      break;
-  }
-}
-
-/***************************************************************/
-void mlib_ImageColorTrue2IndexLine_S16_S16_3(const mlib_s16 *src,
-                                             mlib_s16       *dst,
-                                             mlib_s32       length,
-                                             const void     *state)
-{
-  mlib_colormap *s = (mlib_colormap *)state;
-  mlib_s32 bits = s->bits;
-
-  switch (s->method) {
-    case LUT_BINARY_TREE_SEARCH:
-      {
-        BINARY_TREE_SEARCH_3(S16, mlib_s16, 16, ((MLIB_S16_MAX + 1) * 2),
-                             MLIB_S16_MIN, 0, 3, 2);
-        break;
-      }
-
-    case LUT_COLOR_CUBE_SEARCH:
-      {
-        switch (s->indexsize) {
-          case 1:
-            {
-              COLOR_CUBE_S16_3_SEARCH(mlib_u8, 0, 3);
-              break;
-            }
-
-          case 2:
-            {
-              COLOR_CUBE_S16_3_SEARCH(mlib_s16, 0, 3);
-              break;
-            }
-        }
-
-        break;
-      }
-
-    case LUT_STUPID_SEARCH:
-      {
-        FIND_NEAREST_S16_3(0, 3);
-        break;
-      }
-
-    case LUT_COLOR_DIMENSIONS:
-      DIMENSIONS_SEARCH_3(mlib_s16, mlib_s16, 3)
-      break;
-  }
-}
-
-/***************************************************************/
-void mlib_ImageColorTrue2IndexLine_S16_S16_3_in_4(const mlib_s16 *src,
-                                                  mlib_s16       *dst,
-                                                  mlib_s32       length,
-                                                  const void     *state)
-{
-  mlib_colormap *s = (mlib_colormap *)state;
-  mlib_s32 bits = s->bits;
-
-  switch (s->method) {
-    case LUT_BINARY_TREE_SEARCH:
-      {
-        BINARY_TREE_SEARCH_3(S16, mlib_s16, 16, ((MLIB_S16_MAX + 1) * 2),
-                             MLIB_S16_MIN, 1, 4, 2);
-        break;
-      }
-
-    case LUT_COLOR_CUBE_SEARCH:
-      {
-        switch (s->indexsize) {
-          case 1:
-            {
-              COLOR_CUBE_S16_3_SEARCH(mlib_u8, 1, 4);
-              break;
-            }
-
-          case 2:
-            {
-              COLOR_CUBE_S16_3_SEARCH(mlib_s16, 1, 4);
-              break;
-            }
-        }
-
-        break;
-      }
-
-    case LUT_STUPID_SEARCH:
-      {
-        FIND_NEAREST_S16_3(1, 4);
-        break;
-      }
-
-    case LUT_COLOR_DIMENSIONS:
-      src++;
-      DIMENSIONS_SEARCH_3(mlib_s16, mlib_s16, 4)
-      break;
-  }
-}
-
-/***************************************************************/
-void mlib_ImageColorTrue2IndexLine_S16_S16_4(const mlib_s16 *src,
-                                             mlib_s16       *dst,
-                                             mlib_s32       length,
-                                             const void     *state)
-{
-  mlib_colormap *s = (mlib_colormap *)state;
-  mlib_s32 bits = s->bits;
-
-  switch (s->method) {
-    case LUT_BINARY_TREE_SEARCH:
-      {
-        BINARY_TREE_SEARCH_4(S16, mlib_s16, 16, ((MLIB_S16_MAX + 1) * 2),
-                             MLIB_S16_MIN, 2);
-        break;
-      }
-
-    case LUT_COLOR_CUBE_SEARCH:
-      {
-        switch (s->indexsize) {
-          case 1:
-            {
-              COLOR_CUBE_S16_4_SEARCH(mlib_u8);
-              break;
-            }
-
-          case 2:
-            {
-              COLOR_CUBE_S16_4_SEARCH(mlib_s16);
-              break;
-            }
-        }
-
-        break;
-      }
-
-    case LUT_STUPID_SEARCH:
-      {
-        FIND_NEAREST_S16_4;
-        break;
-      }
-
-    case LUT_COLOR_DIMENSIONS:
-      DIMENSIONS_SEARCH_4(mlib_s16, mlib_s16)
-      break;
-  }
-}
-
-/***************************************************************/
-void mlib_ImageColorTrue2IndexLine_S16_U8_3(const mlib_s16 *src,
-                                            mlib_u8        *dst,
-                                            mlib_s32       length,
-                                            const void     *state)
-{
-  mlib_colormap *s = (mlib_colormap *)state;
-
-  switch (s->method) {
-#if LUT_SHORT_COLORS_3CHANNELS <= 256
-    case LUT_BINARY_TREE_SEARCH:
-      {
-        mlib_s32 bits = s->bits;
-        BINARY_TREE_SEARCH_3(S16, mlib_s16, 16, ((MLIB_S16_MAX + 1) * 2),
-                             MLIB_S16_MIN, 0, 3, 2);
-        break;
-      }
-
-#endif /* LUT_SHORT_COLORS_3CHANNELS <= 256 */
-    case LUT_COLOR_CUBE_SEARCH:
-      {
-        COLOR_CUBE_S16_3_SEARCH(mlib_u8, 0, 3);
-        break;
-      }
-
-    case LUT_STUPID_SEARCH:
-      {
-        FIND_NEAREST_S16_3(0, 3);
-        break;
-      }
-
-    case LUT_COLOR_DIMENSIONS:
-      DIMENSIONS_SEARCH_3(mlib_s16, mlib_u8, 3)
-      break;
-  }
-}
-
-/***************************************************************/
-void mlib_ImageColorTrue2IndexLine_S16_U8_3_in_4(const mlib_s16 *src,
-                                                 mlib_u8        *dst,
-                                                 mlib_s32       length,
-                                                 const void     *state)
-{
-  mlib_colormap *s = (mlib_colormap *)state;
-
-  switch (s->method) {
-#if LUT_SHORT_COLORS_3CHANNELS <= 256
-    case LUT_BINARY_TREE_SEARCH:
-      {
-        mlib_s32 bits = s->bits;
-        BINARY_TREE_SEARCH_3(S16, mlib_s16, 16, ((MLIB_S16_MAX + 1) * 2),
-                             MLIB_S16_MIN, 1, 4, 2);
-        break;
-      }
-
-#endif /* LUT_SHORT_COLORS_3CHANNELS <= 256 */
-    case LUT_COLOR_CUBE_SEARCH:
-      {
-        COLOR_CUBE_S16_3_SEARCH(mlib_u8, 1, 4);
-        break;
-      }
-
-    case LUT_STUPID_SEARCH:
-      {
-        FIND_NEAREST_S16_3(1, 4);
-        break;
-      }
-
-    case LUT_COLOR_DIMENSIONS:
-      src++;
-      DIMENSIONS_SEARCH_3(mlib_s16, mlib_u8, 4)
-      break;
-  }
-}
-
-/***************************************************************/
-void mlib_ImageColorTrue2IndexLine_S16_U8_4(const mlib_s16 *src,
-                                            mlib_u8        *dst,
-                                            mlib_s32       length,
-                                            const void     *state)
-{
-  mlib_colormap *s = (mlib_colormap *)state;
-
-  switch (s->method) {
-#if LUT_SHORT_COLORS_4CHANNELS <= 256
-    case LUT_BINARY_TREE_SEARCH:
-      {
-        mlib_s32 bits = s->bits;
-        BINARY_TREE_SEARCH_4(S16, mlib_s16, 16, ((MLIB_S16_MAX + 1) * 2),
-                             MLIB_S16_MIN, 2);
-        break;
-      }
-
-#endif /* LUT_SHORT_COLORS_4CHANNELS <= 256 */
-    case LUT_COLOR_CUBE_SEARCH:
-      {
-        COLOR_CUBE_S16_4_SEARCH(mlib_u8);
-        break;
-      }
-
-    case LUT_STUPID_SEARCH:
-      {
-        FIND_NEAREST_S16_4;
-        break;
-      }
-
-    case LUT_COLOR_DIMENSIONS:
-      DIMENSIONS_SEARCH_4(mlib_s16, mlib_u8)
-      break;
-  }
-}
-
-/***************************************************************/
-
-#ifndef VIS
-
-void mlib_c_ImageThresh1_U81_1B(void     *psrc,
-                                void     *pdst,
-                                mlib_s32 src_stride,
-                                mlib_s32 dst_stride,
-                                mlib_s32 width,
-                                mlib_s32 height,
-                                void     *thresh,
-                                void     *ghigh,
-                                void     *glow,
-                                mlib_s32 dbit_off);
-
-/***************************************************************/
-
-void mlib_ImageColorTrue2IndexLine_U8_BIT_1(const mlib_u8 *src,
-                                            mlib_u8       *dst,
-                                            mlib_s32      bit_offset,
-                                            mlib_s32      length,
-                                            const void    *state)
-{
-  mlib_u8  *lut = ((mlib_colormap *)state)->table;
-  mlib_s32 thresh[1];
-  mlib_s32 ghigh[1];
-  mlib_s32 glow[1];
-
-  thresh[0] = lut[2];
-
-  glow[0]  = lut[0] - lut[1];
-  ghigh[0] = lut[1] - lut[0];
-
-  mlib_c_ImageThresh1_U81_1B((void*)src, dst, 0, 0, length, 1,
-                             thresh, ghigh, glow, bit_offset);
-}
-
-#else
-
-/***************************************************************/
-
-void mlib_v_ImageThresh1B_U8_1(const mlib_u8  *src,
-                               mlib_s32       slb,
-                               mlib_u8        *dst,
-                               mlib_s32       dlb,
-                               mlib_s32       xsize,
-                               mlib_s32       ysize,
-                               mlib_s32       dbit_off,
-                               const mlib_s32 *th,
-                               mlib_s32       hc,
-                               mlib_s32       lc);
-
-/***************************************************************/
-
-void mlib_ImageColorTrue2IndexLine_U8_BIT_1(const mlib_u8 *src,
-                                            mlib_u8       *dst,
-                                            mlib_s32      bit_offset,
-                                            mlib_s32      length,
-                                            const void    *state)
-{
-  mlib_u8  *lut = ((mlib_colormap *)state)->table;
-  mlib_s32 thresh[4];
-  mlib_s32 ghigh[1];
-  mlib_s32 glow[1];
-
-  thresh[0] = thresh[1] = thresh[2] = thresh[3] = lut[2];
-
-  glow[0]  = (lut[1] < lut[0]) ? 0xFF : 0;
-  ghigh[0] = (lut[1] < lut[0]) ? 0 : 0xFF;
-
-  mlib_v_ImageThresh1B_U8_1((void*)src, 0, dst, 0, length, 1,
-                            bit_offset, thresh, ghigh[0], glow[0]);
-}
-
-/***************************************************************/
-
-#endif
--- a/jdk/src/java.desktop/share/native/libmlib_image/mlib_ImageColormap.h	Thu May 12 11:03:07 2016 -0700
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,221 +0,0 @@
-/*
- * Copyright (c) 1998, 2003, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.  Oracle designates this
- * particular file as subject to the "Classpath" exception as provided
- * by Oracle in the LICENSE file that accompanied this code.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- */
-
-
-#ifndef __MLIB_IMAGECOLORMAP_H
-#define __MLIB_IMAGECOLORMAP_H
-
-#ifdef __cplusplus
-extern "C" {
-#endif /* __cplusplus */
-
-typedef struct {
-  void **lut;
-  mlib_s32 channels;
-  mlib_type intype;
-  mlib_s32 offset;
-  void *table;
-  mlib_s32 bits;
-  mlib_s32 method;
-  mlib_s32 lutlength;
-  mlib_s32 indexsize;
-  mlib_type outtype;
-  void *normal_table;
-  mlib_d64 *double_lut;
-} mlib_colormap;
-
-/***************************************************************/
-#define LUT_COLOR_CUBE_SEARCH  0
-#define LUT_BINARY_TREE_SEARCH 1
-#define LUT_STUPID_SEARCH      2
-#define LUT_COLOR_DIMENSIONS   3
-
-/***************************************************************/
-
-/* Bit set in the tag denotes that the corresponding quadrant is a
-   palette index, not node. If the bit is clear, this means that that
-   is a pointer to the down level node. If the bit is clear and the
-   corresponding quadrant is NULL, then there is no way down there and
-   this quadrant is clear. */
-
-struct lut_node_3 {
-  mlib_u8 tag;
-  union {
-    struct lut_node_3 *quadrants[8];
-    long index[8];
-  } contents;
-};
-
-struct lut_node_4 {
-  mlib_u16 tag;
-  union {
-    struct lut_node_4 *quadrants[16];
-    long index[16];
-  } contents;
-};
-
-/***************************************************************/
-
-#define mlib_ImageGetLutData(colormap)                          \
-  ((void **)((( mlib_colormap *)( colormap))->lut))
-
-/***************************************************************/
-#define mlib_ImageGetLutNormalTable(colormap)                   \
-  ((void *)((( mlib_colormap *)( colormap))->normal_table))
-
-/***************************************************************/
-#define mlib_ImageGetLutInversTable(colormap)                   \
-  ((void *)((( mlib_colormap *)( colormap))->table))
-
-/***************************************************************/
-#define mlib_ImageGetLutChannels(colormap)                      \
-  ((mlib_s32)((( mlib_colormap *)( colormap))->channels))
-
-/***************************************************************/
-#define mlib_ImageGetLutType(colormap)                          \
-  ((mlib_type)((( mlib_colormap *)( colormap))->intype))
-
-/***************************************************************/
-#define mlib_ImageGetIndexSize(colormap)                        \
-  ((mlib_s32)((( mlib_colormap *)( colormap))->indexsize))
-
-/***************************************************************/
-#define mlib_ImageGetOutType(colormap)                          \
-  ((mlib_type)((( mlib_colormap *)( colormap))->outtype))
-
-/***************************************************************/
-#define mlib_ImageGetLutOffset(colormap)                        \
-  ((mlib_s32)((( mlib_colormap *)( colormap))->offset))
-
-/***************************************************************/
-#define mlib_ImageGetBits(colormap)                             \
-  ((mlib_s32)((( mlib_colormap *)( colormap))->bits))
-
-/***************************************************************/
-#define mlib_ImageGetMethod(colormap)                           \
-  ((mlib_s32)((( mlib_colormap *)( colormap))->method))
-
-/***************************************************************/
-#define mlib_ImageGetLutDoubleData(colormap)                    \
-  ((mlib_d64 *)((( mlib_colormap *)( colormap))->double_lut))
-
-/***************************************************************/
-#define FIND_DISTANCE_3( x1, x2, y1, y2, z1, z2, SHIFT )        \
- (( ( ( ( x1 ) - ( x2 ) ) * ( ( x1 ) - ( x2 ) ) ) >> SHIFT ) +  \
-  ( ( ( ( y1 ) - ( y2 ) ) * ( ( y1 ) - ( y2 ) ) ) >> SHIFT ) +  \
-  ( ( ( ( z1 ) - ( z2 ) ) * ( ( z1 ) - ( z2 ) ) ) >> SHIFT ) )
-
-/***************************************************************/
-#define FIND_DISTANCE_4( x1, x2, y1, y2, z1, z2, w1, w2, SHIFT ) \
-  (( ( ( ( x1 ) - ( x2 ) ) * ( ( x1 ) - ( x2 ) ) ) >> SHIFT ) +  \
-   ( ( ( ( y1 ) - ( y2 ) ) * ( ( y1 ) - ( y2 ) ) ) >> SHIFT ) +  \
-   ( ( ( ( z1 ) - ( z2 ) ) * ( ( z1 ) - ( z2 ) ) ) >> SHIFT ) +  \
-   ( ( ( ( w1 ) - ( w2 ) ) * ( ( w1 ) - ( w2 ) ) ) >> SHIFT ) )
-
-/***************************************************************/
-
-void mlib_ImageColorTrue2IndexLine_U8_BIT_1(const mlib_u8 *src,
-                                            mlib_u8       *dst,
-                                            mlib_s32      bit_offset,
-                                            mlib_s32      length,
-                                            const void    *state);
-
-
-void mlib_ImageColorTrue2IndexLine_U8_U8_3(const mlib_u8 *src,
-                                           mlib_u8       *dst,
-                                           mlib_s32      length,
-                                           const void    *colormap);
-
-
-void mlib_ImageColorTrue2IndexLine_U8_U8_3_in_4(const mlib_u8 *src,
-                                                mlib_u8       *dst,
-                                                mlib_s32      length,
-                                                const void    *colormap);
-
-
-void mlib_ImageColorTrue2IndexLine_U8_U8_4(const mlib_u8 *src,
-                                           mlib_u8       *dst,
-                                           mlib_s32      length,
-                                           const void    *colormap);
-
-
-void mlib_ImageColorTrue2IndexLine_U8_S16_3(const mlib_u8 *src,
-                                            mlib_s16      *dst,
-                                            mlib_s32      length,
-                                            const void    *colormap);
-
-
-void mlib_ImageColorTrue2IndexLine_U8_S16_3_in_4(const mlib_u8 *src,
-                                                 mlib_s16      *dst,
-                                                 mlib_s32      length,
-                                                 const void    *colormap);
-
-
-void mlib_ImageColorTrue2IndexLine_U8_S16_4(const mlib_u8 *src,
-                                            mlib_s16      *dst,
-                                            mlib_s32      length,
-                                            const void    *colormap);
-
-
-void mlib_ImageColorTrue2IndexLine_S16_S16_3(const mlib_s16 *src,
-                                             mlib_s16       *dst,
-                                             mlib_s32       length,
-                                             const void     *colormap);
-
-
-void mlib_ImageColorTrue2IndexLine_S16_S16_3_in_4(const mlib_s16 *src,
-                                                  mlib_s16       *dst,
-                                                  mlib_s32       length,
-                                                  const void     *colormap);
-
-
-void mlib_ImageColorTrue2IndexLine_S16_S16_4(const mlib_s16 *src,
-                                             mlib_s16       *dst,
-                                             mlib_s32       length,
-                                             const void     *colormap);
-
-
-void mlib_ImageColorTrue2IndexLine_S16_U8_3(const mlib_s16 *src,
-                                            mlib_u8        *dst,
-                                            mlib_s32       length,
-                                            const void     *colormap);
-
-
-void mlib_ImageColorTrue2IndexLine_S16_U8_3_in_4(const mlib_s16 *src,
-                                                 mlib_u8        *dst,
-                                                 mlib_s32       length,
-                                                 const void     *colormap);
-
-
-void mlib_ImageColorTrue2IndexLine_S16_U8_4(const mlib_s16 *src,
-                                            mlib_u8        *dst,
-                                            mlib_s32       length,
-                                            const void     *colormap);
-
-
-#ifdef __cplusplus
-}
-#endif /* __cplusplus */
-#endif /* __MLIB_IMAGECOLORMAP_H */
--- a/jdk/src/java.desktop/share/native/libmlib_image/mlib_ImageConv.h	Thu May 12 11:03:07 2016 -0700
+++ b/jdk/src/java.desktop/share/native/libmlib_image/mlib_ImageConv.h	Fri May 13 11:31:05 2016 +0300
@@ -41,466 +41,6 @@
 }
 #endif /* FREE_AND_RETURN_STATUS */
 
-void mlib_ImageXor80_aa(mlib_u8  *dl,
-                        mlib_s32 wid,
-                        mlib_s32 hgt,
-                        mlib_s32 str);
-
-void mlib_ImageXor80(mlib_u8  *dl,
-                     mlib_s32 wid,
-                     mlib_s32 hgt,
-                     mlib_s32 str,
-                     mlib_s32 nchan,
-                     mlib_s32 cmask);
-
-mlib_status mlib_conv2x2ext_d64(mlib_image       *dst,
-                                const mlib_image *src,
-                                mlib_s32         dx_l,
-                                mlib_s32         dx_r,
-                                mlib_s32         dy_t,
-                                mlib_s32         dy_b,
-                                const mlib_d64   *kern,
-                                mlib_s32         cmask);
-
-mlib_status mlib_conv2x2ext_f32(mlib_image       *dst,
-                                const mlib_image *src,
-                                mlib_s32         dx_l,
-                                mlib_s32         dx_r,
-                                mlib_s32         dy_t,
-                                mlib_s32         dy_b,
-                                const mlib_d64   *kern,
-                                mlib_s32         cmask);
-
-mlib_status mlib_conv2x2ext_s16(mlib_image       *dst,
-                                const mlib_image *src,
-                                mlib_s32         dx_l,
-                                mlib_s32         dx_r,
-                                mlib_s32         dy_t,
-                                mlib_s32         dy_b,
-                                const mlib_s32   *kern,
-                                mlib_s32         scale,
-                                mlib_s32         cmask);
-
-mlib_status mlib_conv2x2ext_s32(mlib_image       *dst,
-                                const mlib_image *src,
-                                mlib_s32         dx_l,
-                                mlib_s32         dx_r,
-                                mlib_s32         dy_t,
-                                mlib_s32         dy_b,
-                                const mlib_s32   *kern,
-                                mlib_s32         scale,
-                                mlib_s32         cmask);
-
-mlib_status mlib_conv2x2ext_u16(mlib_image       *dst,
-                                const mlib_image *src,
-                                mlib_s32         dx_l,
-                                mlib_s32         dx_r,
-                                mlib_s32         dy_t,
-                                mlib_s32         dy_b,
-                                const mlib_s32   *kern,
-                                mlib_s32         scale,
-                                mlib_s32         cmask);
-
-mlib_status mlib_conv2x2ext_u8(mlib_image       *dst,
-                               const mlib_image *src,
-                               mlib_s32         dx_l,
-                               mlib_s32         dx_r,
-                               mlib_s32         dy_t,
-                               mlib_s32         dy_b,
-                               const mlib_s32   *kern,
-                               mlib_s32         scale,
-                               mlib_s32         cmask);
-
-mlib_status mlib_conv2x2nw_d64(mlib_image       *dst,
-                               const mlib_image *src,
-                               const mlib_d64   *kern,
-                               mlib_s32         cmask);
-
-mlib_status mlib_conv2x2nw_f32(mlib_image       *dst,
-                               const mlib_image *src,
-                               const mlib_d64   *kern,
-                               mlib_s32         cmask);
-
-mlib_status mlib_conv2x2nw_s16(mlib_image       *dst,
-                               const mlib_image *src,
-                               const mlib_s32   *kern,
-                               mlib_s32         scale,
-                               mlib_s32         cmask);
-
-mlib_status mlib_conv2x2nw_s32(mlib_image       *dst,
-                               const mlib_image *src,
-                               const mlib_s32   *kern,
-                               mlib_s32         scale,
-                               mlib_s32         cmask);
-
-mlib_status mlib_conv2x2nw_u16(mlib_image       *dst,
-                               const mlib_image *src,
-                               const mlib_s32   *kern,
-                               mlib_s32         scale,
-                               mlib_s32         cmask);
-
-mlib_status mlib_conv2x2nw_u8(mlib_image       *dst,
-                              const mlib_image *src,
-                              const mlib_s32   *kern,
-                              mlib_s32         scale,
-                              mlib_s32         cmask);
-
-mlib_status mlib_conv3x3ext_bit(mlib_image       *dst,
-                                const mlib_image *src,
-                                mlib_s32         dx_l,
-                                mlib_s32         dx_r,
-                                mlib_s32         dy_t,
-                                mlib_s32         dy_b,
-                                const mlib_s32   *kern,
-                                mlib_s32         scale,
-                                mlib_s32         cmask);
-
-mlib_status mlib_conv3x3ext_d64(mlib_image       *dst,
-                                const mlib_image *src,
-                                mlib_s32         dx_l,
-                                mlib_s32         dx_r,
-                                mlib_s32         dy_t,
-                                mlib_s32         dy_b,
-                                const mlib_d64   *kern,
-                                mlib_s32         cmask);
-
-mlib_status mlib_conv3x3ext_f32(mlib_image       *dst,
-                                const mlib_image *src,
-                                mlib_s32         dx_l,
-                                mlib_s32         dx_r,
-                                mlib_s32         dy_t,
-                                mlib_s32         dy_b,
-                                const mlib_d64   *kern,
-                                mlib_s32         cmask);
-
-mlib_status mlib_conv3x3ext_s16(mlib_image       *dst,
-                                const mlib_image *src,
-                                mlib_s32         dx_l,
-                                mlib_s32         dx_r,
-                                mlib_s32         dy_t,
-                                mlib_s32         dy_b,
-                                const mlib_s32   *kern,
-                                mlib_s32         scale,
-                                mlib_s32         cmask);
-
-mlib_status mlib_conv3x3ext_s32(mlib_image       *dst,
-                                const mlib_image *src,
-                                mlib_s32         dx_l,
-                                mlib_s32         dx_r,
-                                mlib_s32         dy_t,
-                                mlib_s32         dy_b,
-                                const mlib_s32   *kern,
-                                mlib_s32         scale,
-                                mlib_s32         cmask);
-
-mlib_status mlib_conv3x3ext_u16(mlib_image       *dst,
-                                const mlib_image *src,
-                                mlib_s32         dx_l,
-                                mlib_s32         dx_r,
-                                mlib_s32         dy_t,
-                                mlib_s32         dy_b,
-                                const mlib_s32   *kern,
-                                mlib_s32         scale,
-                                mlib_s32         cmask);
-
-mlib_status mlib_conv3x3ext_u8(mlib_image       *dst,
-                               const mlib_image *src,
-                               mlib_s32         dx_l,
-                               mlib_s32         dx_r,
-                               mlib_s32         dy_t,
-                               mlib_s32         dy_b,
-                               const mlib_s32   *kern,
-                               mlib_s32         scale,
-                               mlib_s32         cmask);
-
-mlib_status mlib_conv3x3nw_bit(mlib_image       *dst,
-                               const mlib_image *src,
-                               const mlib_s32   *kern,
-                               mlib_s32         scale,
-                               mlib_s32         cmask);
-
-mlib_status mlib_conv3x3nw_d64(mlib_image       *dst,
-                               const mlib_image *src,
-                               const mlib_d64   *kern,
-                               mlib_s32         cmask);
-
-mlib_status mlib_conv3x3nw_f32(mlib_image       *dst,
-                               const mlib_image *src,
-                               const mlib_d64   *kern,
-                               mlib_s32         cmask);
-
-mlib_status mlib_conv3x3nw_s16(mlib_image       *dst,
-                               const mlib_image *src,
-                               const mlib_s32   *kern,
-                               mlib_s32         scale,
-                               mlib_s32         cmask);
-
-mlib_status mlib_conv3x3nw_s32(mlib_image       *dst,
-                               const mlib_image *src,
-                               const mlib_s32   *kern,
-                               mlib_s32         scale,
-                               mlib_s32         cmask);
-
-mlib_status mlib_conv3x3nw_u16(mlib_image       *dst,
-                               const mlib_image *src,
-                               const mlib_s32   *kern,
-                               mlib_s32         scale,
-                               mlib_s32         cmask);
-
-mlib_status mlib_conv3x3nw_u8(mlib_image       *dst,
-                              const mlib_image *src,
-                              const mlib_s32   *kern,
-                              mlib_s32         scale,
-                              mlib_s32         cmask);
-
-mlib_status mlib_conv4x4ext_d64(mlib_image       *dst,
-                                const mlib_image *src,
-                                mlib_s32         dx_l,
-                                mlib_s32         dx_r,
-                                mlib_s32         dy_t,
-                                mlib_s32         dy_b,
-                                const mlib_d64   *kern,
-                                mlib_s32         cmask);
-
-mlib_status mlib_conv4x4ext_f32(mlib_image       *dst,
-                                const mlib_image *src,
-                                mlib_s32         dx_l,
-                                mlib_s32         dx_r,
-                                mlib_s32         dy_t,
-                                mlib_s32         dy_b,
-                                const mlib_d64   *kern,
-                                mlib_s32         cmask);
-
-mlib_status mlib_conv4x4ext_s16(mlib_image       *dst,
-                                const mlib_image *src,
-                                mlib_s32         dx_l,
-                                mlib_s32         dx_r,
-                                mlib_s32         dy_t,
-                                mlib_s32         dy_b,
-                                const mlib_s32   *kern,
-                                mlib_s32         scale,
-                                mlib_s32         cmask);
-
-mlib_status mlib_conv4x4ext_s32(mlib_image       *dst,
-                                const mlib_image *src,
-                                mlib_s32         dx_l,
-                                mlib_s32         dx_r,
-                                mlib_s32         dy_t,
-                                mlib_s32         dy_b,
-                                const mlib_s32   *kern,
-                                mlib_s32         scale,
-                                mlib_s32         cmask);
-
-mlib_status mlib_conv4x4ext_u16(mlib_image       *dst,
-                                const mlib_image *src,
-                                mlib_s32         dx_l,
-                                mlib_s32         dx_r,
-                                mlib_s32         dy_t,
-                                mlib_s32         dy_b,
-                                const mlib_s32   *kern,
-                                mlib_s32         scale,
-                                mlib_s32         cmask);
-
-mlib_status mlib_conv4x4ext_u8(mlib_image       *dst,
-                               const mlib_image *src,
-                               mlib_s32         dx_l,
-                               mlib_s32         dx_r,
-                               mlib_s32         dy_t,
-                               mlib_s32         dy_b,
-                               const mlib_s32   *kern,
-                               mlib_s32         scale,
-                               mlib_s32         cmask);
-
-mlib_status mlib_conv4x4nw_d64(mlib_image       *dst,
-                               const mlib_image *src,
-                               const mlib_d64   *kern,
-                               mlib_s32         cmask);
-
-mlib_status mlib_conv4x4nw_f32(mlib_image       *dst,
-                               const mlib_image *src,
-                               const mlib_d64   *kern,
-                               mlib_s32         cmask);
-
-mlib_status mlib_conv4x4nw_s16(mlib_image       *dst,
-                               const mlib_image *src,
-                               const mlib_s32   *kern,
-                               mlib_s32         scale,
-                               mlib_s32         cmask);
-
-mlib_status mlib_conv4x4nw_s32(mlib_image       *dst,
-                               const mlib_image *src,
-                               const mlib_s32   *kern,
-                               mlib_s32         scale,
-                               mlib_s32         cmask);
-
-mlib_status mlib_conv4x4nw_u16(mlib_image       *dst,
-                               const mlib_image *src,
-                               const mlib_s32   *kern,
-                               mlib_s32         scale,
-                               mlib_s32         cmask);
-
-mlib_status mlib_conv4x4nw_u8(mlib_image       *dst,
-                              const mlib_image *src,
-                              const mlib_s32   *kern,
-                              mlib_s32         scale,
-                              mlib_s32         cmask);
-
-mlib_status mlib_conv5x5ext_d64(mlib_image       *dst,
-                                const mlib_image *src,
-                                mlib_s32         dx_l,
-                                mlib_s32         dx_r,
-                                mlib_s32         dy_t,
-                                mlib_s32         dy_b,
-                                const mlib_d64   *kern,
-                                mlib_s32         cmask);
-
-mlib_status mlib_conv5x5ext_f32(mlib_image       *dst,
-                                const mlib_image *src,
-                                mlib_s32         dx_l,
-                                mlib_s32         dx_r,
-                                mlib_s32         dy_t,
-                                mlib_s32         dy_b,
-                                const mlib_d64   *kern,
-                                mlib_s32         cmask);
-
-mlib_status mlib_conv5x5ext_s16(mlib_image       *dst,
-                                const mlib_image *src,
-                                mlib_s32         dx_l,
-                                mlib_s32         dx_r,
-                                mlib_s32         dy_t,
-                                mlib_s32         dy_b,
-                                const mlib_s32   *kern,
-                                mlib_s32         scale,
-                                mlib_s32         cmask);
-
-mlib_status mlib_conv5x5ext_s32(mlib_image       *dst,
-                                const mlib_image *src,
-                                mlib_s32         dx_l,
-                                mlib_s32         dx_r,
-                                mlib_s32         dy_t,
-                                mlib_s32         dy_b,
-                                const mlib_s32   *kern,
-                                mlib_s32         scale,
-                                mlib_s32         cmask);
-
-mlib_status mlib_conv5x5ext_u16(mlib_image       *dst,
-                                const mlib_image *src,
-                                mlib_s32         dx_l,
-                                mlib_s32         dx_r,
-                                mlib_s32         dy_t,
-                                mlib_s32         dy_b,
-                                const mlib_s32   *kern,
-                                mlib_s32         scale,
-                                mlib_s32         cmask);
-
-mlib_status mlib_conv5x5ext_u8(mlib_image       *dst,
-                               const mlib_image *src,
-                               mlib_s32         dx_l,
-                               mlib_s32         dx_r,
-                               mlib_s32         dy_t,
-                               mlib_s32         dy_b,
-                               const mlib_s32   *kern,
-                               mlib_s32         scale,
-                               mlib_s32         cmask);
-
-mlib_status mlib_conv5x5nw_d64(mlib_image       *dst,
-                               const mlib_image *src,
-                               const mlib_d64   *kern,
-                               mlib_s32         cmask);
-
-mlib_status mlib_conv5x5nw_f32(mlib_image       *dst,
-                               const mlib_image *src,
-                               const mlib_d64   *kern,
-                               mlib_s32         cmask);
-
-mlib_status mlib_conv5x5nw_s16(mlib_image       *dst,
-                               const mlib_image *src,
-                               const mlib_s32   *kern,
-                               mlib_s32         scale,
-                               mlib_s32         cmask);
-
-mlib_status mlib_conv5x5nw_s32(mlib_image       *dst,
-                               const mlib_image *src,
-                               const mlib_s32   *kern,
-                               mlib_s32         scale,
-                               mlib_s32         cmask);
-
-mlib_status mlib_conv5x5nw_u16(mlib_image       *dst,
-                               const mlib_image *src,
-                               const mlib_s32   *kern,
-                               mlib_s32         scale,
-                               mlib_s32         cmask);
-
-mlib_status mlib_conv5x5nw_u8(mlib_image       *dst,
-                              const mlib_image *src,
-                              const mlib_s32   *kern,
-                              mlib_s32         scale,
-                              mlib_s32         cmask);
-
-mlib_status mlib_conv7x7ext_s16(mlib_image       *dst,
-                                const mlib_image *src,
-                                mlib_s32         dx_l,
-                                mlib_s32         dx_r,
-                                mlib_s32         dy_t,
-                                mlib_s32         dy_b,
-                                const mlib_s32   *kern,
-                                mlib_s32         scale,
-                                mlib_s32         cmask);
-
-mlib_status mlib_conv7x7ext_s32(mlib_image       *dst,
-                                const mlib_image *src,
-                                mlib_s32         dx_l,
-                                mlib_s32         dx_r,
-                                mlib_s32         dy_t,
-                                mlib_s32         dy_b,
-                                const mlib_s32   *kern,
-                                mlib_s32         scale,
-                                mlib_s32         cmask);
-
-mlib_status mlib_conv7x7ext_u16(mlib_image       *dst,
-                                const mlib_image *src,
-                                mlib_s32         dx_l,
-                                mlib_s32         dx_r,
-                                mlib_s32         dy_t,
-                                mlib_s32         dy_b,
-                                const mlib_s32   *kern,
-                                mlib_s32         scale,
-                                mlib_s32         cmask);
-
-mlib_status mlib_conv7x7ext_u8(mlib_image       *dst,
-                               const mlib_image *src,
-                               mlib_s32         dx_l,
-                               mlib_s32         dx_r,
-                               mlib_s32         dy_t,
-                               mlib_s32         dy_b,
-                               const mlib_s32   *kern,
-                               mlib_s32         scale,
-                               mlib_s32         cmask);
-
-mlib_status mlib_conv7x7nw_s16(mlib_image       *dst,
-                               const mlib_image *src,
-                               const mlib_s32   *kern,
-                               mlib_s32         scale,
-                               mlib_s32         cmask);
-
-mlib_status mlib_conv7x7nw_s32(mlib_image       *dst,
-                               const mlib_image *src,
-                               const mlib_s32   *kern,
-                               mlib_s32         scale,
-                               mlib_s32         cmask);
-
-mlib_status mlib_conv7x7nw_u16(mlib_image       *dst,
-                               const mlib_image *src,
-                               const mlib_s32   *kern,
-                               mlib_s32         scale,
-                               mlib_s32         cmask);
-
-mlib_status mlib_conv7x7nw_u8(mlib_image       *dst,
-                              const mlib_image *src,
-                              const mlib_s32   *kern,
-                              mlib_s32         scale,
-                              mlib_s32         cmask);
-
 mlib_status mlib_convMxNext_s32(mlib_image       *dst,
                                 const mlib_image *src,
                                 const mlib_s32   *kernel,
--- a/jdk/src/java.desktop/share/native/libmlib_image/mlib_ImageConv2x2_f.c	Thu May 12 11:03:07 2016 -0700
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,1118 +0,0 @@
-/*
- * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.  Oracle designates this
- * particular file as subject to the "Classpath" exception as provided
- * by Oracle in the LICENSE file that accompanied this code.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- */
-
-
-/*
- * FUNCTION
- *      Internal functions for mlib_ImageConv2x2 on U8/S16/U16 types
- *      and MLIB_EDGE_DST_NO_WRITE mask.
- */
-
-#include "mlib_image.h"
-#include "mlib_ImageConv.h"
-#include "mlib_c_ImageConv.h"
-
-/***************************************************************/
-#ifdef i386 /* do not copy by mlib_d64 data type for x86 */
-
-typedef struct {
-  mlib_s32 int0, int1;
-} two_int;
-
-#define TYPE_64BIT two_int
-
-#else /* i386 */
-
-#define TYPE_64BIT mlib_d64
-
-#endif /* i386 ( do not copy by mlib_d64 data type for x86 ) */
-
-/***************************************************************/
-#define LOAD_KERNEL_INTO_DOUBLE()                                        \
-  while (scalef_expon > 30) {                                            \
-    scalef /= (1 << 30);                                                 \
-    scalef_expon -= 30;                                                  \
-  }                                                                      \
-                                                                         \
-  scalef /= (1 << scalef_expon);                                         \
-                                                                         \
-  /* keep kernel in regs */                                              \
-  k0 = scalef * kern[0];  k1 = scalef * kern[1];  k2 = scalef * kern[2]; \
-  k3 = scalef * kern[3]
-
-/***************************************************************/
-#define GET_SRC_DST_PARAMETERS(type)                            \
-  hgt = mlib_ImageGetHeight(src);                               \
-  wid = mlib_ImageGetWidth(src);                                \
-  nchannel = mlib_ImageGetChannels(src);                        \
-  sll = mlib_ImageGetStride(src) / sizeof(type);                \
-  dll = mlib_ImageGetStride(dst) / sizeof(type);                \
-  adr_src = (type *)mlib_ImageGetData(src);                     \
-  adr_dst = (type *)mlib_ImageGetData(dst)
-
-/***************************************************************/
-#ifndef MLIB_USE_FTOI_CLAMPING
-
-#define CLAMP_S32(x)                                            \
-  (((x) <= MLIB_S32_MIN) ? MLIB_S32_MIN :                       \
-  (((x) >= MLIB_S32_MAX) ? MLIB_S32_MAX : (mlib_s32)(x)))
-
-#else
-
-#define CLAMP_S32(x) ((mlib_s32)(x))
-
-#endif /* MLIB_USE_FTOI_CLAMPING */
-
-/***************************************************************/
-#if defined(_LITTLE_ENDIAN) && !defined(_NO_LONGLONG)
-
-/* NB: Explicit cast to DTYPE is necessary to avoid warning from Microsoft VC compiler.
-      And we need to explicitly define cast behavior if source exceeds destination range.
-      (it is undefined according to C99 spec). We use mask here because this macro is typically
-      used to extract bit regions. */
-
-#define STORE2(res0, res1)                                      \
-  dp[0    ] = (DTYPE) ((res1) & DTYPE_MASK);                      \
-  dp[chan1] = (DTYPE) ((res0) & DTYPE_MASK)
-
-#else
-
-#define STORE2(res0, res1)                                      \
-  dp[0    ] = (DTYPE) ((res0) & DTYPE_MASK);                      \
-  dp[chan1] = (DTYPE) ((res1) & DTYPE_MASK)
-
-#endif /* defined(_LITTLE_ENDIAN) && !defined(_NO_LONGLONG) */
-
-/***************************************************************/
-#ifdef _NO_LONGLONG
-
-#define LOAD_BUFF(buff)                                         \
-  buff[i    ] = sp[0];                                          \
-  buff[i + 1] = sp[chan1]
-
-#else /* _NO_LONGLONG */
-
-#ifdef _LITTLE_ENDIAN
-
-#define LOAD_BUFF(buff)                                         \
-  *(mlib_s64*)(buff + i) = (((mlib_s64)sp[chan1]) << 32) | ((mlib_s64)sp[0] & 0xffffffff)
-
-#else /* _LITTLE_ENDIAN */
-
-#define LOAD_BUFF(buff)                                         \
-  *(mlib_s64*)(buff + i) = (((mlib_s64)sp[0]) << 32) | ((mlib_s64)sp[chan1] & 0xffffffff)
-
-#endif /* _LITTLE_ENDIAN */
-
-#endif /* _NO_LONGLONG */
-
-/***************************************************************/
-typedef union {
-  TYPE_64BIT d64;
-  struct {
-    mlib_s32 i0, i1;
-  } i32s;
-} d64_2x32;
-
-/***************************************************************/
-#define D_KER     1
-
-#define BUFF_LINE 256
-
-/***************************************************************/
-#define XOR_80(x) x ^= 0x80
-
-void mlib_ImageXor80_aa(mlib_u8  *dl,
-                        mlib_s32 wid,
-                        mlib_s32 hgt,
-                        mlib_s32 str)
-{
-  mlib_u8  *dp, *dend;
-#ifdef _NO_LONGLONG
-  mlib_u32 cadd = 0x80808080;
-#else /* _NO_LONGLONG */
-  mlib_u64 cadd = MLIB_U64_CONST(0x8080808080808080);
-#endif /* _NO_LONGLONG */
-  mlib_s32 j;
-
-  if (wid == str) {
-    wid *= hgt;
-    hgt = 1;
-  }
-
-  for (j = 0; j < hgt; j++) {
-    dend = dl + wid;
-
-    for (dp = dl; ((mlib_addr)dp & 7) && (dp < dend); dp++) XOR_80(dp[0]);
-
-#ifdef __SUNPRO_C
-#pragma pipeloop(0)
-#endif /* __SUNPRO_C */
-    for (; dp <= (dend - 8); dp += 8) {
-#ifdef _NO_LONGLONG
-      *((mlib_s32*)dp) ^= cadd;
-      *((mlib_s32*)dp+1) ^= cadd;
-#else /* _NO_LONGLONG */
-      *((mlib_u64*)dp) ^= cadd;
-#endif /* _NO_LONGLONG */
-    }
-
-    for (; (dp < dend); dp++) XOR_80(dp[0]);
-
-    dl += str;
-  }
-}
-
-/***************************************************************/
-void mlib_ImageXor80(mlib_u8  *dl,
-                     mlib_s32 wid,
-                     mlib_s32 hgt,
-                     mlib_s32 str,
-                     mlib_s32 nchan,
-                     mlib_s32 cmask)
-{
-  mlib_s32 i, j, c;
-
-  for (j = 0; j < hgt; j++) {
-    for (c = 0; c < nchan; c++) {
-      if (cmask & (1 << (nchan - 1 - c))) {
-        mlib_u8 *dp = dl + c;
-
-#ifdef __SUNPRO_C
-#pragma pipeloop(0)
-#endif /* __SUNPRO_C */
-        for (i = 0; i < wid; i++) XOR_80(dp[i*nchan]);
-      }
-    }
-
-    dl += str;
-  }
-}
-
-/***************************************************************/
-#define DTYPE mlib_s16
-#define DTYPE_MASK 0xffff
-
-mlib_status mlib_c_conv2x2nw_s16(mlib_image       *dst,
-                                 const mlib_image *src,
-                                 const mlib_s32   *kern,
-                                 mlib_s32         scalef_expon,
-                                 mlib_s32         cmask)
-{
-  mlib_d64 buff_arr[2*BUFF_LINE];
-  mlib_s32 *pbuff = (mlib_s32*)buff_arr, *buffo, *buff0, *buff1, *buff2, *buffT;
-  DTYPE    *adr_src, *sl, *sp, *sl1;
-  DTYPE    *adr_dst, *dl, *dp;
-  mlib_d64 k0, k1, k2, k3, scalef = 65536.0;
-  mlib_d64 p00, p01, p02,
-           p10, p11, p12;
-  mlib_s32 wid, hgt, sll, dll, wid1;
-  mlib_s32 nchannel, chan1, chan2;
-  mlib_s32 i, j, c;
-  LOAD_KERNEL_INTO_DOUBLE();
-  GET_SRC_DST_PARAMETERS(DTYPE);
-
-  wid1 = (wid + 1) &~ 1;
-
-  if (wid1 > BUFF_LINE) {
-    pbuff = mlib_malloc(4*sizeof(mlib_s32)*wid1);
-
-    if (pbuff == NULL) return MLIB_FAILURE;
-  }
-
-  buffo = pbuff;
-  buff0 = buffo + wid1;
-  buff1 = buff0 + wid1;
-  buff2 = buff1 + wid1;
-
-  chan1 = nchannel;
-  chan2 = chan1 + chan1;
-
-  wid -= D_KER;
-  hgt -= D_KER;
-
-  for (c = 0; c < nchannel; c++) {
-    if (!(cmask & (1 << (nchannel - 1 - c)))) continue;
-
-    sl = adr_src + c;
-    dl = adr_dst + c;
-
-    sl1 = sl + sll;
-#ifdef __SUNPRO_C
-#pragma pipeloop(0)
-#endif /* __SUNPRO_C */
-    for (i = 0; i < wid + D_KER; i++) {
-      buff0[i - 1] = (mlib_s32)sl[i*chan1];
-      buff1[i - 1] = (mlib_s32)sl1[i*chan1];
-    }
-
-    sl += (D_KER + 1)*sll;
-
-    for (j = 0; j < hgt; j++) {
-      sp = sl;
-      dp = dl;
-
-      buff2[-1] = (mlib_s32)sp[0];
-      sp += chan1;
-
-      p02 = buff0[-1];
-      p12 = buff1[-1];
-
-#ifdef __SUNPRO_C
-#pragma pipeloop(0)
-#endif /* __SUNPRO_C */
-      for (i = 0; i <= (wid - 2); i += 2) {
-#ifdef _NO_LONGLONG
-        mlib_s32 o64_1, o64_2;
-#else /* _NO_LONGLONG */
-        mlib_s64 o64;
-#endif /* _NO_LONGLONG */
-        d64_2x32 sd0, sd1, dd;
-
-        p00 = p02; p10 = p12;
-
-        sd0.d64 = *(TYPE_64BIT*)(buff0 + i);
-        sd1.d64 = *(TYPE_64BIT*)(buff1 + i);
-        p01 = (mlib_d64)sd0.i32s.i0;
-        p02 = (mlib_d64)sd0.i32s.i1;
-        p11 = (mlib_d64)sd1.i32s.i0;
-        p12 = (mlib_d64)sd1.i32s.i1;
-
-        LOAD_BUFF(buff2);
-
-        dd.i32s.i0 = CLAMP_S32(p00 * k0 + p01 * k1 + p10 * k2 + p11 * k3);
-        dd.i32s.i1 = CLAMP_S32(p01 * k0 + p02 * k1 + p11 * k2 + p12 * k3);
-        *(TYPE_64BIT*)(buffo + i) = dd.d64;
-
-#ifdef _NO_LONGLONG
-
-        o64_1 = buffo[i];
-        o64_2 = buffo[i+1];
-        STORE2(o64_1 >> 16, o64_2 >> 16);
-
-#else /* _NO_LONGLONG */
-
-        o64 = *(mlib_s64*)(buffo + i);
-        STORE2(o64 >> 48, o64 >> 16);
-
-#endif /* _NO_LONGLONG */
-
-        sp += chan2;
-        dp += chan2;
-      }
-
-      for (; i < wid; i++) {
-        p00 = buff0[i - 1]; p10 = buff1[i - 1];
-        p01 = buff0[i];     p11 = buff1[i];
-
-        buff2[i] = (mlib_s32)sp[0];
-
-        buffo[i] = CLAMP_S32(p00 * k0 + p01 * k1 + p10 * k2 + p11 * k3);
-        dp[0] = buffo[i] >> 16;
-
-        sp += chan1;
-        dp += chan1;
-      }
-
-      sl += sll;
-      dl += dll;
-
-      buffT = buff0;
-      buff0 = buff1;
-      buff1 = buff2;
-      buff2 = buffT;
-    }
-  }
-
-  if (pbuff != (mlib_s32*)buff_arr) mlib_free(pbuff);
-
-  return MLIB_SUCCESS;
-}
-
-/***************************************************************/
-mlib_status mlib_c_conv2x2ext_s16(mlib_image       *dst,
-                                  const mlib_image *src,
-                                  mlib_s32         dx_l,
-                                  mlib_s32         dx_r,
-                                  mlib_s32         dy_t,
-                                  mlib_s32         dy_b,
-                                  const mlib_s32   *kern,
-                                  mlib_s32         scalef_expon,
-                                  mlib_s32         cmask)
-{
-  mlib_d64 buff_arr[2*BUFF_LINE];
-  mlib_s32 *pbuff = (mlib_s32*)buff_arr, *buffo, *buff0, *buff1, *buff2, *buffT;
-  DTYPE    *adr_src, *sl, *sp, *sl1;
-  DTYPE    *adr_dst, *dl, *dp;
-  mlib_d64 k0, k1, k2, k3, scalef = 65536.0;
-  mlib_d64 p00, p01, p02,
-           p10, p11, p12;
-  mlib_s32 wid, hgt, sll, dll, wid1;
-  mlib_s32 nchannel, chan1, chan2;
-  mlib_s32 i, j, c, swid;
-  LOAD_KERNEL_INTO_DOUBLE();
-  GET_SRC_DST_PARAMETERS(DTYPE);
-
-  swid = wid + D_KER;
-
-  wid1 = (swid + 1) &~ 1;
-
-  if (wid1 > BUFF_LINE) {
-    pbuff = mlib_malloc(4*sizeof(mlib_s32)*wid1);
-
-    if (pbuff == NULL) return MLIB_FAILURE;
-  }
-
-  buffo = pbuff;
-  buff0 = buffo + wid1;
-  buff1 = buff0 + wid1;
-  buff2 = buff1 + wid1;
-
-  swid -= dx_r;
-
-  chan1 = nchannel;
-  chan2 = chan1 + chan1;
-
-  for (c = 0; c < nchannel; c++) {
-    if (!(cmask & (1 << (nchannel - 1 - c)))) continue;
-
-    sl = adr_src + c;
-    dl = adr_dst + c;
-
-    if ((hgt - dy_b) > 0) sl1 = sl + sll;
-    else sl1 = sl;
-
-#ifdef __SUNPRO_C
-#pragma pipeloop(0)
-#endif /* __SUNPRO_C */
-    for (i = 0; i < swid; i++) {
-      buff0[i - 1] = (mlib_s32)sl[i*chan1];
-      buff1[i - 1] = (mlib_s32)sl1[i*chan1];
-    }
-
-    if (dx_r != 0) {
-      buff0[swid - 1] = buff0[swid - 2];
-      buff1[swid - 1] = buff1[swid - 2];
-    }
-
-    if ((hgt - dy_b) > 1) sl = sl1 + sll;
-    else sl = sl1;
-
-    for (j = 0; j < hgt; j++) {
-      sp = sl;
-      dp = dl;
-
-      buff2[-1] = (mlib_s32)sp[0];
-      sp += chan1;
-
-      p02 = buff0[-1];
-      p12 = buff1[-1];
-
-#ifdef __SUNPRO_C
-#pragma pipeloop(0)
-#endif /* __SUNPRO_C */
-      for (i = 0; i <= (wid - 2); i += 2) {
-#ifdef _NO_LONGLONG
-        mlib_s32 o64_1, o64_2;
-#else /* _NO_LONGLONG */
-        mlib_s64 o64;
-#endif /* _NO_LONGLONG */
-        d64_2x32 sd0, sd1, dd;
-
-        p00 = p02; p10 = p12;
-
-        sd0.d64 = *(TYPE_64BIT*)(buff0 + i);
-        sd1.d64 = *(TYPE_64BIT*)(buff1 + i);
-        p01 = (mlib_d64)sd0.i32s.i0;
-        p02 = (mlib_d64)sd0.i32s.i1;
-        p11 = (mlib_d64)sd1.i32s.i0;
-        p12 = (mlib_d64)sd1.i32s.i1;
-
-        LOAD_BUFF(buff2);
-
-        dd.i32s.i0 = CLAMP_S32(p00 * k0 + p01 * k1 + p10 * k2 + p11 * k3);
-        dd.i32s.i1 = CLAMP_S32(p01 * k0 + p02 * k1 + p11 * k2 + p12 * k3);
-        *(TYPE_64BIT*)(buffo + i) = dd.d64;
-
-#ifdef _NO_LONGLONG
-
-        o64_1 = buffo[i];
-        o64_2 = buffo[i+1];
-        STORE2(o64_1 >> 16, o64_2 >> 16);
-
-#else /* _NO_LONGLONG */
-
-        o64 = *(mlib_s64*)(buffo + i);
-        STORE2(o64 >> 48, o64 >> 16);
-
-#endif /* _NO_LONGLONG */
-
-        sp += chan2;
-        dp += chan2;
-      }
-
-      for (; i < wid; i++) {
-        p00 = buff0[i - 1]; p10 = buff1[i - 1];
-        p01 = buff0[i];     p11 = buff1[i];
-
-        buff2[i] = (mlib_s32)sp[0];
-
-        buffo[i] = CLAMP_S32(p00 * k0 + p01 * k1 + p10 * k2 + p11 * k3);
-        dp[0] = buffo[i] >> 16;
-
-        sp += chan1;
-        dp += chan1;
-      }
-
-      if (dx_r != 0) buff2[swid - 1] = buff2[swid - 2];
-
-      if (j < hgt - dy_b - 2) sl += sll;
-      dl += dll;
-
-      buffT = buff0;
-      buff0 = buff1;
-      buff1 = buff2;
-      buff2 = buffT;
-    }
-  }
-
-  if (pbuff != (mlib_s32*)buff_arr) mlib_free(pbuff);
-
-  return MLIB_SUCCESS;
-}
-
-/***************************************************************/
-#undef  DTYPE
-#define DTYPE mlib_u16
-
-mlib_status mlib_c_conv2x2nw_u16(mlib_image       *dst,
-                                 const mlib_image *src,
-                                 const mlib_s32   *kern,
-                                 mlib_s32         scalef_expon,
-                                 mlib_s32         cmask)
-{
-  mlib_d64 buff_arr[2*BUFF_LINE];
-  mlib_s32 *pbuff = (mlib_s32*)buff_arr, *buffo, *buff0, *buff1, *buff2, *buffT;
-  DTYPE    *adr_src, *sl, *sp, *sl1;
-  DTYPE    *adr_dst, *dl, *dp;
-  mlib_d64 k0, k1, k2, k3, scalef = 65536.0;
-  mlib_d64 p00, p01, p02,
-           p10, p11, p12;
-  mlib_s32 wid, hgt, sll, dll, wid1;
-  mlib_s32 nchannel, chan1, chan2;
-  mlib_s32 i, j, c;
-  mlib_d64 doff = 0x7FFF8000;
-  LOAD_KERNEL_INTO_DOUBLE();
-  GET_SRC_DST_PARAMETERS(DTYPE);
-
-  wid1 = (wid + 1) &~ 1;
-
-  if (wid1 > BUFF_LINE) {
-    pbuff = mlib_malloc(4*sizeof(mlib_s32)*wid1);
-
-    if (pbuff == NULL) return MLIB_FAILURE;
-  }
-
-  buffo = pbuff;
-  buff0 = buffo + wid1;
-  buff1 = buff0 + wid1;
-  buff2 = buff1 + wid1;
-
-  chan1 = nchannel;
-  chan2 = chan1 + chan1;
-
-  wid -= D_KER;
-  hgt -= D_KER;
-
-  for (c = 0; c < nchannel; c++) {
-    if (!(cmask & (1 << (nchannel - 1 - c)))) continue;
-
-    sl = adr_src + c;
-    dl = adr_dst + c;
-
-    sl1 = sl + sll;
-#ifdef __SUNPRO_C
-#pragma pipeloop(0)
-#endif /* __SUNPRO_C */
-    for (i = 0; i < wid + D_KER; i++) {
-      buff0[i - 1] = (mlib_s32)sl[i*chan1];
-      buff1[i - 1] = (mlib_s32)sl1[i*chan1];
-    }
-
-    sl += (D_KER + 1)*sll;
-
-    for (j = 0; j < hgt; j++) {
-      sp = sl;
-      dp = dl;
-
-      buff2[-1] = (mlib_s32)sp[0];
-      sp += chan1;
-
-      p02 = buff0[-1];
-      p12 = buff1[-1];
-
-#ifdef __SUNPRO_C
-#pragma pipeloop(0)
-#endif /* __SUNPRO_C */
-      for (i = 0; i <= (wid - 2); i += 2) {
-#ifdef _NO_LONGLONG
-        mlib_s32 o64_1, o64_2;
-#else /* _NO_LONGLONG */
-        mlib_s64 o64;
-#endif /* _NO_LONGLONG */
-        d64_2x32 sd0, sd1, dd;
-
-        p00 = p02; p10 = p12;
-
-        sd0.d64 = *(TYPE_64BIT*)(buff0 + i);
-        sd1.d64 = *(TYPE_64BIT*)(buff1 + i);
-        p01 = (mlib_d64)sd0.i32s.i0;
-        p02 = (mlib_d64)sd0.i32s.i1;
-        p11 = (mlib_d64)sd1.i32s.i0;
-        p12 = (mlib_d64)sd1.i32s.i1;
-
-        LOAD_BUFF(buff2);
-
-        dd.i32s.i0 = CLAMP_S32(p00 * k0 + p01 * k1 + p10 * k2 + p11 * k3 - doff);
-        dd.i32s.i1 = CLAMP_S32(p01 * k0 + p02 * k1 + p11 * k2 + p12 * k3 - doff);
-        *(TYPE_64BIT*)(buffo + i) = dd.d64;
-
-#ifdef _NO_LONGLONG
-
-        o64_1 = buffo[i];
-        o64_2 = buffo[i+1];
-        o64_1 = o64_1 ^ 0x80000000U;
-        o64_2 = o64_2 ^ 0x80000000U;
-        STORE2(o64_1 >> 16, o64_2 >> 16);
-
-#else /* _NO_LONGLONG */
-
-        o64 = *(mlib_s64*)(buffo + i);
-        o64 = o64 ^ MLIB_U64_CONST(0x8000000080000000);
-        STORE2(o64 >> 48, o64 >> 16);
-
-#endif /* _NO_LONGLONG */
-
-        sp += chan2;
-        dp += chan2;
-      }
-
-      for (; i < wid; i++) {
-        p00 = buff0[i - 1]; p10 = buff1[i - 1];
-        p01 = buff0[i];     p11 = buff1[i];
-
-        buff2[i] = (mlib_s32)sp[0];
-
-        buffo[i] = CLAMP_S32(p00 * k0 + p01 * k1 + p10 * k2 + p11 * k3 - doff);
-        dp[0] = (buffo[i] >> 16) ^ 0x8000;
-
-        sp += chan1;
-        dp += chan1;
-      }
-
-      sl += sll;
-      dl += dll;
-
-      buffT = buff0;
-      buff0 = buff1;
-      buff1 = buff2;
-      buff2 = buffT;
-    }
-  }
-
-  if (pbuff != (mlib_s32*)buff_arr) mlib_free(pbuff);
-
-  return MLIB_SUCCESS;
-}
-
-/***************************************************************/
-mlib_status mlib_c_conv2x2ext_u16(mlib_image       *dst,
-                                  const mlib_image *src,
-                                  mlib_s32         dx_l,
-                                  mlib_s32         dx_r,
-                                  mlib_s32         dy_t,
-                                  mlib_s32         dy_b,
-                                  const mlib_s32   *kern,
-                                  mlib_s32         scalef_expon,
-                                  mlib_s32         cmask)
-{
-  mlib_d64 buff_arr[2*BUFF_LINE];
-  mlib_s32 *pbuff = (mlib_s32*)buff_arr, *buffo, *buff0, *buff1, *buff2, *buffT;
-  DTYPE    *adr_src, *sl, *sp, *sl1;
-  DTYPE    *adr_dst, *dl, *dp;
-  mlib_d64 k0, k1, k2, k3, scalef = 65536.0;
-  mlib_d64 p00, p01, p02,
-           p10, p11, p12;
-  mlib_s32 wid, hgt, sll, dll, wid1;
-  mlib_s32 nchannel, chan1, chan2;
-  mlib_s32 i, j, c, swid;
-  mlib_d64 doff = 0x7FFF8000;
-  LOAD_KERNEL_INTO_DOUBLE();
-  GET_SRC_DST_PARAMETERS(DTYPE);
-
-  swid = wid + D_KER;
-
-  wid1 = (swid + 1) &~ 1;
-
-  if (wid1 > BUFF_LINE) {
-    pbuff = mlib_malloc(4*sizeof(mlib_s32)*wid1);
-
-    if (pbuff == NULL) return MLIB_FAILURE;
-  }
-
-  buffo = pbuff;
-  buff0 = buffo + wid1;
-  buff1 = buff0 + wid1;
-  buff2 = buff1 + wid1;
-
-  swid -= dx_r;
-
-  chan1 = nchannel;
-  chan2 = chan1 + chan1;
-
-  for (c = 0; c < nchannel; c++) {
-    if (!(cmask & (1 << (nchannel - 1 - c)))) continue;
-
-    sl = adr_src + c;
-    dl = adr_dst + c;
-
-    if ((hgt - dy_b) > 0) sl1 = sl + sll;
-    else sl1 = sl;
-
-#ifdef __SUNPRO_C
-#pragma pipeloop(0)
-#endif /* __SUNPRO_C */
-    for (i = 0; i < swid; i++) {
-      buff0[i - 1] = (mlib_s32)sl[i*chan1];
-      buff1[i - 1] = (mlib_s32)sl1[i*chan1];
-    }
-
-    if (dx_r != 0) {
-      buff0[swid - 1] = buff0[swid - 2];
-      buff1[swid - 1] = buff1[swid - 2];
-    }
-
-    if ((hgt - dy_b) > 1) sl = sl1 + sll;
-    else sl = sl1;
-
-    for (j = 0; j < hgt; j++) {
-      sp = sl;
-      dp = dl;
-
-      buff2[-1] = (mlib_s32)sp[0];
-      sp += chan1;
-
-      p02 = buff0[-1];
-      p12 = buff1[-1];
-
-#ifdef __SUNPRO_C
-#pragma pipeloop(0)
-#endif /* __SUNPRO_C */
-      for (i = 0; i <= (wid - 2); i += 2) {
-#ifdef _NO_LONGLONG
-        mlib_s32 o64_1, o64_2;
-#else /* _NO_LONGLONG */
-        mlib_s64 o64;
-#endif /* _NO_LONGLONG */
-        d64_2x32 sd0, sd1, dd;
-
-        p00 = p02; p10 = p12;
-
-        sd0.d64 = *(TYPE_64BIT*)(buff0 + i);
-        sd1.d64 = *(TYPE_64BIT*)(buff1 + i);
-        p01 = (mlib_d64)sd0.i32s.i0;
-        p02 = (mlib_d64)sd0.i32s.i1;
-        p11 = (mlib_d64)sd1.i32s.i0;
-        p12 = (mlib_d64)sd1.i32s.i1;
-
-        LOAD_BUFF(buff2);
-
-        dd.i32s.i0 = CLAMP_S32(p00 * k0 + p01 * k1 + p10 * k2 + p11 * k3 - doff);
-        dd.i32s.i1 = CLAMP_S32(p01 * k0 + p02 * k1 + p11 * k2 + p12 * k3 - doff);
-        *(TYPE_64BIT*)(buffo + i) = dd.d64;
-
-#ifdef _NO_LONGLONG
-
-        o64_1 = buffo[i];
-        o64_2 = buffo[i+1];
-        o64_1 = o64_1 ^ 0x80000000U;
-        o64_2 = o64_2 ^ 0x80000000U;
-        STORE2(o64_1 >> 16, o64_2 >> 16);
-
-#else /* _NO_LONGLONG */
-
-        o64 = *(mlib_s64*)(buffo + i);
-        o64 = o64 ^ MLIB_U64_CONST(0x8000000080000000);
-        STORE2(o64 >> 48, o64 >> 16);
-
-#endif /* _NO_LONGLONG */
-
-        sp += chan2;
-        dp += chan2;
-      }
-
-      for (; i < wid; i++) {
-        p00 = buff0[i - 1]; p10 = buff1[i - 1];
-        p01 = buff0[i];     p11 = buff1[i];
-
-        buff2[i] = (mlib_s32)sp[0];
-
-        buffo[i] = CLAMP_S32(p00 * k0 + p01 * k1 + p10 * k2 + p11 * k3 - doff);
-        dp[0] = (buffo[i] >> 16) ^ 0x8000;
-
-        sp += chan1;
-        dp += chan1;
-      }
-
-      if (dx_r != 0) buff2[swid - 1] = buff2[swid - 2];
-
-      if (j < hgt - dy_b - 2) sl += sll;
-      dl += dll;
-
-      buffT = buff0;
-      buff0 = buff1;
-      buff1 = buff2;
-      buff2 = buffT;
-    }
-  }
-
-  if (pbuff != (mlib_s32*)buff_arr) mlib_free(pbuff);
-
-  return MLIB_SUCCESS;
-}
-
-/***************************************************************/
-#undef  DTYPE
-#define DTYPE mlib_u8
-
-mlib_status mlib_c_conv2x2nw_u8(mlib_image       *dst,
-                                const mlib_image *src,
-                                const mlib_s32   *kern,
-                                mlib_s32         scalef_expon,
-                                mlib_s32         cmask)
-{
-  mlib_d64 buff_arr[2*BUFF_LINE];
-  mlib_s32 *pbuff = (mlib_s32*)buff_arr, *buffo, *buff0, *buff1, *buff2, *buffT;
-  DTYPE    *adr_src, *sl, *sp, *sl1;
-  DTYPE    *adr_dst, *dl, *dp;
-  mlib_d64 k0, k1, k2, k3, scalef = (1 << 24);
-  mlib_d64 p00, p01, p02,
-           p10, p11, p12;
-  mlib_s32 wid, hgt, sll, dll, wid1;
-  mlib_s32 nchannel, chan1, chan2;
-  mlib_s32 i, j, c;
-  LOAD_KERNEL_INTO_DOUBLE();
-  GET_SRC_DST_PARAMETERS(DTYPE);
-
-  wid1 = (wid + 1) &~ 1;
-
-  if (wid1 > BUFF_LINE) {
-    pbuff = mlib_malloc(4*sizeof(mlib_s32)*wid1);
-
-    if (pbuff == NULL) return MLIB_FAILURE;
-  }
-
-  buffo = pbuff;
-  buff0 = buffo + wid1;
-  buff1 = buff0 + wid1;
-  buff2 = buff1 + wid1;
-
-  chan1 = nchannel;
-  chan2 = chan1 + chan1;
-
-  wid -= D_KER;
-  hgt -= D_KER;
-
-  for (c = 0; c < nchannel; c++) {
-    if (!(cmask & (1 << (nchannel - 1 - c)))) continue;
-
-    sl = adr_src + c;
-    dl = adr_dst + c;
-
-    sl1 = sl + sll;
-#ifdef __SUNPRO_C
-#pragma pipeloop(0)
-#endif /* __SUNPRO_C */
-    for (i = 0; i < wid + D_KER; i++) {
-      buff0[i - 1] = (mlib_s32)sl[i*chan1];
-      buff1[i - 1] = (mlib_s32)sl1[i*chan1];
-    }
-
-    sl += (D_KER + 1)*sll;
-
-    for (j = 0; j < hgt; j++) {
-      sp = sl;
-      dp = dl;
-
-      buff2[-1] = (mlib_s32)sp[0];
-      sp += chan1;
-
-      p02 = buff0[-1];
-      p12 = buff1[-1];
-
-#ifdef __SUNPRO_C
-#pragma pipeloop(0)
-#endif /* __SUNPRO_C */
-      for (i = 0; i <= (wid - 2); i += 2) {
-#ifdef _NO_LONGLONG
-        mlib_s32 o64_1, o64_2;
-#else /* _NO_LONGLONG */
-        mlib_s64 o64;
-#endif /* _NO_LONGLONG */
-        d64_2x32 sd0, sd1, dd;
-
-        p00 = p02; p10 = p12;
-
-        sd0.d64 = *(TYPE_64BIT*)(buff0 + i);
-        sd1.d64 = *(TYPE_64BIT*)(buff1 + i);
-        p01 = (mlib_d64)sd0.i32s.i0;
-        p02 = (mlib_d64)sd0.i32s.i1;
-        p11 = (mlib_d64)sd1.i32s.i0;
-        p12 = (mlib_d64)sd1.i32s.i1;
-
-        LOAD_BUFF(buff2);
-
-        dd.i32s.i0 = CLAMP_S32(p00 * k0 + p01 * k1 + p10 * k2 + p11 * k3 - (1u << 31));
-        dd.i32s.i1 = CLAMP_S32(p01 * k0 + p02 * k1 + p11 * k2 + p12 * k3 - (1u << 31));
-        *(TYPE_64BIT*)(buffo + i) = dd.d64;
-
-#ifdef _NO_LONGLONG
-
-        o64_1 = buffo[i];
-        o64_2 = buffo[i+1];
-        STORE2(o64_1 >> 24, o64_2 >> 24);
-
-#else /* _NO_LONGLONG */
-
-        o64 = *(mlib_s64*)(buffo + i);
-        STORE2(o64 >> 56, o64 >> 24);
-
-#endif /* _NO_LONGLONG */
-
-        sp += chan2;
-        dp += chan2;
-      }
-
-      for (; i < wid; i++) {
-        p00 = buff0[i - 1]; p10 = buff1[i - 1];
-        p01 = buff0[i];     p11 = buff1[i];
-
-        buff2[i] = (mlib_s32)sp[0];
-
-        buffo[i] = CLAMP_S32(p00 * k0 + p01 * k1 + p10 * k2 + p11 * k3 - (1u << 31));
-        dp[0] = (buffo[i] >> 24);
-
-        sp += chan1;
-        dp += chan1;
-      }
-
-      sl += sll;
-      dl += dll;
-
-      buffT = buff0;
-      buff0 = buff1;
-      buff1 = buff2;
-      buff2 = buffT;
-    }
-  }
-
-  {
-    mlib_s32 amask = (1 << nchannel) - 1;
-
-    if ((cmask & amask) != amask) {
-      mlib_ImageXor80(adr_dst, wid, hgt, dll, nchannel, cmask);
-    } else {
-      mlib_ImageXor80_aa(adr_dst, wid*nchannel, hgt, dll);
-    }
-  }
-
-  if (pbuff != (mlib_s32*)buff_arr) mlib_free(pbuff);
-
-  return MLIB_SUCCESS;
-}
-
-/***************************************************************/
-mlib_status mlib_c_conv2x2ext_u8(mlib_image       *dst,
-                                 const mlib_image *src,
-                                 mlib_s32         dx_l,
-                                 mlib_s32         dx_r,
-                                 mlib_s32         dy_t,
-                                 mlib_s32         dy_b,
-                                 const mlib_s32   *kern,
-                                 mlib_s32         scalef_expon,
-                                 mlib_s32         cmask)
-{
-  mlib_d64 buff_arr[4*BUFF_LINE];
-  mlib_s32 *pbuff = (mlib_s32*)buff_arr, *buffo, *buff0, *buff1, *buff2, *buffT;
-  DTYPE    *adr_src, *sl, *sp, *sl1;
-  DTYPE    *adr_dst, *dl, *dp;
-  mlib_d64 k0, k1, k2, k3, scalef = (1 << 24);
-  mlib_d64 p00, p01, p02,
-           p10, p11, p12;
-  mlib_s32 wid, hgt, sll, dll, wid1;
-  mlib_s32 nchannel, chan1, chan2;
-  mlib_s32 i, j, c, swid;
-  LOAD_KERNEL_INTO_DOUBLE();
-  GET_SRC_DST_PARAMETERS(DTYPE);
-
-  swid = wid + D_KER;
-
-  wid1 = (swid + 1) &~ 1;
-
-  if (wid1 > BUFF_LINE) {
-    pbuff = mlib_malloc(4*sizeof(mlib_s32)*wid1);
-
-    if (pbuff == NULL) return MLIB_FAILURE;
-  }
-
-  buffo = pbuff;
-  buff0 = buffo + wid1;
-  buff1 = buff0 + wid1;
-  buff2 = buff1 + wid1;
-
-  chan1 = nchannel;
-  chan2 = chan1 + chan1;
-
-  swid -= dx_r;
-
-  for (c = 0; c < nchannel; c++) {
-    if (!(cmask & (1 << (nchannel - 1 - c)))) continue;
-
-    sl = adr_src + c;
-    dl = adr_dst + c;
-
-    if ((hgt - dy_b) > 0) sl1 = sl + sll;
-    else sl1 = sl;
-
-#ifdef __SUNPRO_C
-#pragma pipeloop(0)
-#endif /* __SUNPRO_C */
-    for (i = 0; i < swid; i++) {
-      buff0[i - 1] = (mlib_s32)sl[i*chan1];
-      buff1[i - 1] = (mlib_s32)sl1[i*chan1];
-    }
-
-    if (dx_r != 0) {
-      buff0[swid - 1] = buff0[swid - 2];
-      buff1[swid - 1] = buff1[swid - 2];
-    }
-
-    if ((hgt - dy_b) > 1) sl = sl1 + sll;
-    else sl = sl1;
-
-    for (j = 0; j < hgt; j++) {
-      sp = sl;
-      dp = dl;
-
-      buff2[-1] = (mlib_s32)sp[0];
-      sp += chan1;
-
-      p02 = buff0[-1];
-      p12 = buff1[-1];
-
-#ifdef __SUNPRO_C
-#pragma pipeloop(0)
-#endif /* __SUNPRO_C */
-      for (i = 0; i <= (wid - 2); i += 2) {
-#ifdef _NO_LONGLONG
-        mlib_s32 o64_1, o64_2;
-#else /* _NO_LONGLONG */
-        mlib_s64 o64;
-#endif /* _NO_LONGLONG */
-        d64_2x32 sd0, sd1, dd;
-
-        p00 = p02; p10 = p12;
-
-        sd0.d64 = *(TYPE_64BIT*)(buff0 + i);
-        sd1.d64 = *(TYPE_64BIT*)(buff1 + i);
-        p01 = (mlib_d64)sd0.i32s.i0;
-        p02 = (mlib_d64)sd0.i32s.i1;
-        p11 = (mlib_d64)sd1.i32s.i0;
-        p12 = (mlib_d64)sd1.i32s.i1;
-
-        LOAD_BUFF(buff2);
-
-        dd.i32s.i0 = CLAMP_S32(p00 * k0 + p01 * k1 + p10 * k2 + p11 * k3 - (1u << 31));
-        dd.i32s.i1 = CLAMP_S32(p01 * k0 + p02 * k1 + p11 * k2 + p12 * k3 - (1u << 31));
-        *(TYPE_64BIT*)(buffo + i) = dd.d64;
-
-#ifdef _NO_LONGLONG
-
-        o64_1 = buffo[i];
-        o64_2 = buffo[i+1];
-        STORE2(o64_1 >> 24, o64_2 >> 24);
-
-#else /* _NO_LONGLONG */
-
-        o64 = *(mlib_s64*)(buffo + i);
-        STORE2(o64 >> 56, o64 >> 24);
-
-#endif /* _NO_LONGLONG */
-
-        sp += chan2;
-        dp += chan2;
-      }
-
-      for (; i < wid; i++) {
-        p00 = buff0[i - 1]; p10 = buff1[i - 1];
-        p01 = buff0[i];     p11 = buff1[i];
-
-        buff2[i] = (mlib_s32)sp[0];
-
-        buffo[i] = CLAMP_S32(p00 * k0 + p01 * k1 + p10 * k2 + p11 * k3 - (1u << 31));
-        dp[0] = (buffo[i] >> 24);
-
-        sp += chan1;
-        dp += chan1;
-      }
-
-      if (dx_r != 0) buff2[swid - 1] = buff2[swid - 2];
-
-      if (j < hgt - dy_b - 2) sl += sll;
-      dl += dll;
-
-      buffT = buff0;
-      buff0 = buff1;
-      buff1 = buff2;
-      buff2 = buffT;
-    }
-  }
-
-  {
-    mlib_s32 amask = (1 << nchannel) - 1;
-
-    if ((cmask & amask) != amask) {
-      mlib_ImageXor80(adr_dst, wid, hgt, dll, nchannel, cmask);
-    } else {
-      mlib_ImageXor80_aa(adr_dst, wid*nchannel, hgt, dll);
-    }
-  }
-
-  if (pbuff != (mlib_s32*)buff_arr) mlib_free(pbuff);
-
-  return MLIB_SUCCESS;
-}
-
-/***************************************************************/
--- a/jdk/src/java.desktop/share/native/libmlib_image/mlib_ImageConv_16ext.c	Thu May 12 11:03:07 2016 -0700
+++ b/jdk/src/java.desktop/share/native/libmlib_image/mlib_ImageConv_16ext.c	Fri May 13 11:31:05 2016 +0300
@@ -80,9 +80,6 @@
 #endif /* IMG_TYPE == 1 */
 
 /***************************************************************/
-#define KSIZE1 (KSIZE - 1)
-
-/***************************************************************/
 #define PARAM                                                   \
   mlib_image       *dst,                                        \
   const mlib_image *src,                                        \
@@ -163,9 +160,6 @@
 #endif /* _NO_LONGLONG */
 
 /***************************************************************/
-#define MLIB_D2_24 16777216.0f
-
-/***************************************************************/
 typedef union {
   mlib_d64 d64;
   struct {
@@ -175,52 +169,6 @@
 } d64_2x32;
 
 /***************************************************************/
-#define BUFF_LINE 256
-
-/***************************************************************/
-#define DEF_VARS(type)                                          \
-  type     *adr_src, *sl, *sp, *sl1;                            \
-  type     *adr_dst, *dl, *dp;                                  \
-  FTYPE    *pbuff = buff;                                       \
-  mlib_s32 *buffi, *buffo;                                      \
-  mlib_s32 wid, hgt, sll, dll;                                  \
-  mlib_s32 nchannel, chan1, chan2;                              \
-  mlib_s32 i, j, c, swid
-
-/***************************************************************/
-#define LOAD_KERNEL3()                                                   \
-  FTYPE    scalef = DSCALE;                                              \
-  FTYPE    k0, k1, k2, k3, k4, k5, k6, k7, k8;                           \
-  FTYPE    p00, p01, p02, p03,                                           \
-           p10, p11, p12, p13,                                           \
-           p20, p21, p22, p23;                                           \
-                                                                         \
-  while (scalef_expon > 30) {                                            \
-    scalef /= (1 << 30);                                                 \
-    scalef_expon -= 30;                                                  \
-  }                                                                      \
-                                                                         \
-  scalef /= (1 << scalef_expon);                                         \
-                                                                         \
-  /* keep kernel in regs */                                              \
-  k0 = scalef * kern[0];  k1 = scalef * kern[1];  k2 = scalef * kern[2]; \
-  k3 = scalef * kern[3];  k4 = scalef * kern[4];  k5 = scalef * kern[5]; \
-  k6 = scalef * kern[6];  k7 = scalef * kern[7];  k8 = scalef * kern[8]
-
-/***************************************************************/
-#define LOAD_KERNEL(SIZE)                                       \
-  FTYPE    scalef = DSCALE;                                     \
-                                                                \
-  while (scalef_expon > 30) {                                   \
-    scalef /= (1 << 30);                                        \
-    scalef_expon -= 30;                                         \
-  }                                                             \
-                                                                \
-  scalef /= (1 << scalef_expon);                                \
-                                                                \
-  for (j = 0; j < SIZE; j++) k[j] = scalef * kern[j]
-
-/***************************************************************/
 #define GET_SRC_DST_PARAMETERS(type)                            \
   hgt = mlib_ImageGetHeight(src);                               \
   wid = mlib_ImageGetWidth(src);                                \
@@ -278,1334 +226,6 @@
 #endif /* __sparc */
 
 /***************************************************************/
-#define KSIZE  3
-
-mlib_status CONV_FUNC(3x3)
-{
-  FTYPE    buff[(KSIZE + 2)*BUFF_LINE], *buff0, *buff1, *buff2, *buff3, *buffT;
-  DEF_VARS(DTYPE);
-  DTYPE *sl2;
-#ifndef __sparc
-  mlib_s32 d0, d1;
-#endif /* __sparc */
-  LOAD_KERNEL3();
-  GET_SRC_DST_PARAMETERS(DTYPE);
-
-  swid = wid + KSIZE1;
-
-  if (swid > BUFF_LINE) {
-    pbuff = mlib_malloc((KSIZE + 2)*sizeof(FTYPE   )*swid);
-
-    if (pbuff == NULL) return MLIB_FAILURE;
-  }
-
-  buff0 = pbuff;
-  buff1 = buff0 + swid;
-  buff2 = buff1 + swid;
-  buff3 = buff2 + swid;
-  buffo = (mlib_s32*)(buff3 + swid);
-  buffi = buffo + (swid &~ 1);
-
-  swid -= (dx_l + dx_r);
-
-  chan1 = nchannel;
-  chan2 = chan1 + chan1;
-
-  for (c = 0; c < nchannel; c++) {
-    if (!(cmask & (1 << (nchannel - 1 - c)))) continue;
-
-    sl = adr_src + c;
-    dl = adr_dst + c;
-
-    if ((1 > dy_t) && (1 < hgt + KSIZE1 - dy_b)) sl1 = sl + sll;
-    else sl1 = sl;
-
-    if ((hgt - dy_b) > 0) sl2 = sl1 + sll;
-    else sl2 = sl1;
-
-    for (i = 0; i < dx_l; i++) {
-      buff0[i] = (FTYPE)sl[0];
-      buff1[i] = (FTYPE)sl1[0];
-      buff2[i] = (FTYPE)sl2[0];
-    }
-
-#ifdef __SUNPRO_C
-#pragma pipeloop(0)
-#endif /* __SUNPRO_C */
-    for (i = 0; i < swid; i++) {
-      buff0[i + dx_l] = (FTYPE)sl[i*chan1];
-      buff1[i + dx_l] = (FTYPE)sl1[i*chan1];
-      buff2[i + dx_l] = (FTYPE)sl2[i*chan1];
-    }
-
-    for (i = 0; i < dx_r; i++) {
-      buff0[swid + dx_l + i] = buff0[swid + dx_l - 1];
-      buff1[swid + dx_l + i] = buff1[swid + dx_l - 1];
-      buff2[swid + dx_l + i] = buff2[swid + dx_l - 1];
-    }
-
-    if ((hgt - dy_b) > 1) sl = sl2 + sll;
-    else sl = sl2;
-
-    for (j = 0; j < hgt; j++) {
-      FTYPE    s0, s1;
-
-      p02 = buff0[0];
-      p12 = buff1[0];
-      p22 = buff2[0];
-
-      p03 = buff0[1];
-      p13 = buff1[1];
-      p23 = buff2[1];
-
-      s0 = p02 * k0 + p03 * k1 + p12 * k3 + p13 * k4 + p22 * k6 + p23 * k7;
-      s1 = p03 * k0 + p13 * k3 + p23 * k6;
-
-      sp = sl;
-      dp = dl;
-
-#ifdef __SUNPRO_C
-#pragma pipeloop(0)
-#endif /* __SUNPRO_C */
-      for (i = 0; i <= (wid - 2); i += 2) {
-#ifdef __sparc
-#ifdef _NO_LONGLONG
-        mlib_s32 o64_1, o64_2;
-#else /* _NO_LONGLONG */
-        mlib_s64 o64;
-#endif /* _NO_LONGLONG */
-#endif /* __sparc */
-        d64_2x32 dd;
-
-        p02 = buff0[i + 2]; p12 = buff1[i + 2]; p22 = buff2[i + 2];
-        p03 = buff0[i + 3]; p13 = buff1[i + 3]; p23 = buff2[i + 3];
-
-        LOAD_BUFF(buffi);
-
-        dd.d64 = *(FTYPE   *)(buffi + i);
-        buff3[i + dx_l    ] = (FTYPE)dd.i32s.i0;
-        buff3[i + dx_l + 1] = (FTYPE)dd.i32s.i1;
-
-#ifndef __sparc
-
-        d0 = D2I(s0 + p02 * k2 + p12 * k5 + p22 * k8);
-        d1 = D2I(s1 + p02 * k1 + p03 * k2 + p12 * k4 + p13 * k5 + p22 * k7 + p23 * k8);
-
-        s0 = p02 * k0 + p03 * k1 + p12 * k3 + p13 * k4 + p22 * k6 + p23 * k7;
-        s1 = p03 * k0 + p13 * k3 + p23 * k6;
-
-        dp[0    ] = FROM_S32(d0);
-        dp[chan1] = FROM_S32(d1);
-
-#else /* __sparc */
-
-        dd.i32s.i0 = D2I(s0 + p02 * k2 + p12 * k5 + p22 * k8);
-        dd.i32s.i1 = D2I(s1 + p02 * k1 + p03 * k2 + p12 * k4 + p13 * k5 + p22 * k7 + p23 * k8);
-        *(FTYPE   *)(buffo + i) = dd.d64;
-
-        s0 = p02 * k0 + p03 * k1 + p12 * k3 + p13 * k4 + p22 * k6 + p23 * k7;
-        s1 = p03 * k0 + p13 * k3 + p23 * k6;
-
-#ifdef _NO_LONGLONG
-
-        o64_1 = buffo[i];
-        o64_2 = buffo[i+1];
-#if IMG_TYPE != 1
-        STORE2(FROM_S32(o64_1), FROM_S32(o64_2));
-#else
-        STORE2(o64_1 >> 24, o64_2 >> 24);
-#endif /* IMG_TYPE != 1 */
-
-#else /* _NO_LONGLONG */
-
-        o64 = *(mlib_s64*)(buffo + i);
-#if IMG_TYPE != 1
-        STORE2(FROM_S32(o64 >> 32), FROM_S32(o64));
-#else
-        STORE2(o64 >> 56, o64 >> 24);
-#endif /* IMG_TYPE != 1 */
-#endif /* _NO_LONGLONG */
-#endif /* __sparc */
-
-        sp += chan2;
-        dp += chan2;
-      }
-
-      for (; i < wid; i++) {
-        p00 = buff0[i];     p10 = buff1[i];     p20 = buff2[i];
-        p01 = buff0[i + 1]; p11 = buff1[i + 1]; p21 = buff2[i + 1];
-        p02 = buff0[i + 2]; p12 = buff1[i + 2]; p22 = buff2[i + 2];
-
-        buffi[i] = (mlib_s32)sp[0];
-        buff3[i + dx_l] = (FTYPE)buffi[i];
-
-#ifndef __sparc
-
-        d0 = D2I(p00 * k0 + p01 * k1 + p02 * k2 + p10 * k3 + p11 * k4 +
-                 p12 * k5 + p20 * k6 + p21 * k7 + p22 * k8);
-
-        dp[0] = FROM_S32(d0);
-
-#else  /* __sparc */
-
-        buffo[i] = D2I(p00 * k0 + p01 * k1 + p02 * k2 + p10 * k3 + p11 * k4 +
-                       p12 * k5 + p20 * k6 + p21 * k7 + p22 * k8);
-#if IMG_TYPE != 1
-        dp[0] = FROM_S32(buffo[i]);
-#else
-        dp[0] = buffo[i] >> 24;
-#endif /* IMG_TYPE != 1 */
-#endif /* __sparc */
-
-        sp += chan1;
-        dp += chan1;
-      }
-
-      for (; i < swid; i++) {
-        buffi[i] = (mlib_s32)sp[0];
-        buff3[i + dx_l] = (FTYPE)buffi[i];
-        sp += chan1;
-      }
-
-      for (i = 0; i < dx_l; i++) buff3[i] = buff3[dx_l];
-      for (i = 0; i < dx_r; i++) buff3[swid + dx_l + i] = buff3[swid + dx_l - 1];
-
-      if (j < hgt - dy_b - 2) sl += sll;
-      dl += dll;
-
-      buffT = buff0;
-      buff0 = buff1;
-      buff1 = buff2;
-      buff2 = buff3;
-      buff3 = buffT;
-    }
-  }
-
-#ifdef __sparc
-#if IMG_TYPE == 1
-  {
-    mlib_s32 amask = (1 << nchannel) - 1;
-
-    if ((cmask & amask) != amask) {
-      mlib_ImageXor80(adr_dst, wid, hgt, dll, nchannel, cmask);
-    } else {
-      mlib_ImageXor80_aa(adr_dst, wid*nchannel, hgt, dll);
-    }
-  }
-
-#endif /* IMG_TYPE == 1 */
-#endif /* __sparc */
-
-  if (pbuff != buff) mlib_free(pbuff);
-
-  return MLIB_SUCCESS;
-}
-
-/***************************************************************/
-#ifndef __sparc /* for x86, using integer multiplies is faster */
-
-mlib_status CONV_FUNC_I(3x3)
-{
-  DTYPE    *adr_src, *sl, *sp0, *sp1, *sp2, *sp_1, *sp_2;
-  DTYPE    *adr_dst, *dl, *dp;
-  mlib_s32 wid, hgt, sll, dll;
-  mlib_s32 nchannel, chan1, chan2, delta_chan;
-  mlib_s32 i, j, c;
-  mlib_s32 shift1, shift2;
-  mlib_s32 k0, k1, k2, k3, k4, k5, k6, k7, k8;
-  mlib_s32 p02, p03,
-           p12, p13,
-           p22, p23;
-
-#if IMG_TYPE != 1
-  shift1 = 16;
-#else
-  shift1 = 8;
-#endif /* IMG_TYPE != 1 */
-
-  shift2 = scalef_expon - shift1;
-
-  /* keep kernel in regs */
-  k0 = kern[0] >> shift1;  k1 = kern[1] >> shift1;  k2 = kern[2] >> shift1;
-  k3 = kern[3] >> shift1;  k4 = kern[4] >> shift1;  k5 = kern[5] >> shift1;
-  k6 = kern[6] >> shift1;  k7 = kern[7] >> shift1;  k8 = kern[8] >> shift1;
-
-  GET_SRC_DST_PARAMETERS(DTYPE);
-
-  chan1 = nchannel;
-  chan2 = chan1 + chan1;
-  delta_chan = 0;
-
-  if ((1 > dx_l) && (1 < wid + KSIZE1 - dx_r)) delta_chan = chan1;
-
-  for (c = 0; c < chan1; c++) {
-    if (!(cmask & (1 << (chan1 - 1 - c)))) continue;
-
-    sl = adr_src + c;
-    dl = adr_dst + c;
-
-    sp_1 = sl;
-
-    if ((1 > dy_t) && (1 < hgt + KSIZE1 - dy_b)) sl += sll;
-    sp_2 = sl;
-
-    if ((hgt - dy_b) > 0) sl += sll;
-
-    for (j = 0; j < hgt; j++) {
-      mlib_s32 s0, s1;
-      mlib_s32 pix0, pix1;
-
-      dp  = dl;
-      sp0 = sp_1;
-      sp_1 = sp_2;
-      sp_2 = sl;
-
-      sp1 = sp_1;
-      sp2 = sp_2;
-
-      p02 = sp0[0];
-      p12 = sp1[0];
-      p22 = sp2[0];
-
-      p03 = sp0[delta_chan];
-      p13 = sp1[delta_chan];
-      p23 = sp2[delta_chan];
-
-      s0 = p02 * k0 + p03 * k1 + p12 * k3 + p13 * k4 + p22 * k6 + p23 * k7;
-      s1 = p03 * k0 + p13 * k3 + p23 * k6;
-
-      sp0 += (chan1 + delta_chan);
-      sp1 += (chan1 + delta_chan);
-      sp2 += (chan1 + delta_chan);
-
-#ifdef __SUNPRO_C
-#pragma pipeloop(0)
-#endif /* __SUNPRO_C */
-      for (i = 0; i <= (wid - dx_r - 2); i += 2) {
-        p02 = sp0[0];     p12 = sp1[0];     p22 = sp2[0];
-        p03 = sp0[chan1]; p13 = sp1[chan1]; p23 = sp2[chan1];
-
-        pix0 = (s0 + p02 * k2 + p12 * k5 + p22 * k8) >> shift2;
-        pix1 = (s1 + p02 * k1 + p03 * k2 + p12 * k4 +
-                p13 * k5 + p22 * k7 + p23 * k8) >> shift2;
-
-        CLAMP_STORE(dp[0],     pix0);
-        CLAMP_STORE(dp[chan1], pix1);
-
-        s0 = p02 * k0 + p03 * k1 + p12 * k3 + p13 * k4 + p22 * k6 + p23 * k7;
-        s1 = p03 * k0 + p13 * k3 + p23 * k6;
-
-        sp0 += chan2;
-        sp1 += chan2;
-        sp2 += chan2;
-        dp += chan2;
-      }
-
-      p02 = p03; p12 = p13; p22 = p23;
-
-      for (; i < wid - dx_r; i++) {
-        p03 = sp0[0]; p13 = sp1[0]; p23 = sp2[0];
-        pix0 = (s0 + p03 * k2 + p13 * k5 + p23 * k8) >> shift2;
-        CLAMP_STORE(dp[0], pix0);
-        s0 = p02 * k0 + p03 * k1 + p12 * k3 + p13 * k4 + p22 * k6 + p23 * k7;
-        p02 = p03; p12 = p13; p22 = p23;
-        sp0 += chan1;
-        sp1 += chan1;
-        sp2 += chan1;
-        dp += chan1;
-      }
-
-      sp0 -= chan1;
-      sp1 -= chan1;
-      sp2 -= chan1;
-
-      for (; i < wid; i++) {
-        p03 = sp0[0]; p13 = sp1[0]; p23 = sp2[0];
-        pix0 = (s0 + p03 * k2 + p13 * k5 + p23 * k8) >> shift2;
-        CLAMP_STORE(dp[0], pix0);
-        s0 = p02 * k0 + p03 * k1 + p12 * k3 + p13 * k4 + p22 * k6 + p23 * k7;
-        p02 = p03; p12 = p13; p22 = p23;
-        dp += chan1;
-      }
-
-      if (j < hgt - dy_b - 1) sl += sll;
-      dl += dll;
-    }
-  }
-
-  return MLIB_SUCCESS;
-}
-
-#endif /* __sparc ( for x86, using integer multiplies is faster ) */
-
-/***************************************************************/
-#undef  KSIZE
-#define KSIZE 4
-
-mlib_status CONV_FUNC(4x4)
-{
-  FTYPE    buff[(KSIZE + 3)*BUFF_LINE];
-  FTYPE    *buff0, *buff1, *buff2, *buff3, *buff4, *buffd, *buffT;
-  FTYPE    k[KSIZE*KSIZE];
-  mlib_s32 d0, d1;
-  FTYPE    k0, k1, k2, k3, k4, k5, k6, k7;
-  FTYPE    p00, p01, p02, p03, p04,
-           p10, p11, p12, p13, p14,
-           p20, p21, p22, p23,
-           p30, p31, p32, p33;
-  DEF_VARS(DTYPE);
-  DTYPE *sl2, *sl3;
-  LOAD_KERNEL(KSIZE*KSIZE);
-  GET_SRC_DST_PARAMETERS(DTYPE);
-
-  swid = wid + KSIZE1;
-
-  if (swid > BUFF_LINE) {
-    pbuff = mlib_malloc((KSIZE + 3)*sizeof(FTYPE   )*swid);
-
-    if (pbuff == NULL) return MLIB_FAILURE;
-  }
-
-  buff0 = pbuff;
-  buff1 = buff0 + swid;
-  buff2 = buff1 + swid;
-  buff3 = buff2 + swid;
-  buff4 = buff3 + swid;
-  buffd = buff4 + swid;
-  buffo = (mlib_s32*)(buffd + swid);
-  buffi = buffo + (swid &~ 1);
-
-  swid -= (dx_l + dx_r);
-
-  chan1 = nchannel;
-  chan2 = chan1 + chan1;
-
-  for (c = 0; c < nchannel; c++) {
-    if (!(cmask & (1 << (nchannel - 1 - c)))) continue;
-
-    sl = adr_src + c;
-    dl = adr_dst + c;
-
-    if ((1 > dy_t) && (1 < hgt + KSIZE1 - dy_b)) sl1 = sl + sll;
-    else sl1 = sl;
-
-    if ((2 > dy_t) && (2 < hgt + KSIZE1 - dy_b)) sl2 = sl1 + sll;
-    else sl2 = sl1;
-
-    if ((hgt - dy_b) > 0) sl3 = sl2 + sll;
-    else sl3 = sl2;
-
-    for (i = 0; i < dx_l; i++) {
-      buff0[i] = (FTYPE)sl[0];
-      buff1[i] = (FTYPE)sl1[0];
-      buff2[i] = (FTYPE)sl2[0];
-      buff3[i] = (FTYPE)sl3[0];
-    }
-
-#ifdef __SUNPRO_C
-#pragma pipeloop(0)
-#endif /* __SUNPRO_C */
-    for (i = 0; i < swid; i++) {
-      buff0[i + dx_l] = (FTYPE)sl[i*chan1];
-      buff1[i + dx_l] = (FTYPE)sl1[i*chan1];
-      buff2[i + dx_l] = (FTYPE)sl2[i*chan1];
-      buff3[i + dx_l] = (FTYPE)sl3[i*chan1];
-    }
-
-    for (i = 0; i < dx_r; i++) {
-      buff0[swid + dx_l + i] = buff0[swid + dx_l - 1];
-      buff1[swid + dx_l + i] = buff1[swid + dx_l - 1];
-      buff2[swid + dx_l + i] = buff2[swid + dx_l - 1];
-      buff3[swid + dx_l + i] = buff3[swid + dx_l - 1];
-    }
-
-    if ((hgt - dy_b) > 1) sl = sl3 + sll;
-    else sl = sl3;
-
-    for (j = 0; j < hgt; j++) {
-      d64_2x32 dd;
-
-      /*
-       *  First loop on two first lines of kernel
-       */
-      k0 = k[0]; k1 = k[1]; k2 = k[2]; k3 = k[3];
-      k4 = k[4]; k5 = k[5]; k6 = k[6]; k7 = k[7];
-
-      sp = sl;
-      dp = dl;
-
-      p02 = buff0[0];
-      p12 = buff1[0];
-      p03 = buff0[1];
-      p13 = buff1[1];
-      p04 = buff0[2];
-
-#ifdef __SUNPRO_C
-#pragma pipeloop(0)
-#endif /* __SUNPRO_C */
-      for (i = 0; i <= (wid - 2); i += 2) {
-        p00 = p02; p10 = p12;
-        p01 = p03; p11 = p13;
-        p02 = p04; p12 = buff1[i + 2];
-        p03 = buff0[i + 3]; p13 = buff1[i + 3];
-        p04 = buff0[i + 4]; p14 = buff1[i + 4];
-
-        LOAD_BUFF(buffi);
-
-        dd.d64 = *(FTYPE   *)(buffi + i);
-        buff4[i + dx_l    ] = (FTYPE)dd.i32s.i0;
-        buff4[i + dx_l + 1] = (FTYPE)dd.i32s.i1;
-
-        buffd[i    ] = (p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 +
-                        p10 * k4 + p11 * k5 + p12 * k6 + p13 * k7);
-        buffd[i + 1] = (p01 * k0 + p02 * k1 + p03 * k2 + p04 * k3 +
-                        p11 * k4 + p12 * k5 + p13 * k6 + p14 * k7);
-
-        sp += chan2;
-      }
-
-      /*
-       *  Second loop on two last lines of kernel
-       */
-      k0 = k[ 8]; k1 = k[ 9]; k2 = k[10]; k3 = k[11];
-      k4 = k[12]; k5 = k[13]; k6 = k[14]; k7 = k[15];
-
-      p02 = buff2[0];
-      p12 = buff3[0];
-      p03 = buff2[1];
-      p13 = buff3[1];
-      p04 = buff2[2];
-
-#ifdef __SUNPRO_C
-#pragma pipeloop(0)
-#endif /* __SUNPRO_C */
-      for (i = 0; i <= (wid - 2); i += 2) {
-        p00 = p02; p10 = p12;
-        p01 = p03; p11 = p13;
-        p02 = p04; p12 = buff3[i + 2];
-        p03 = buff2[i + 3]; p13 = buff3[i + 3];
-        p04 = buff2[i + 4]; p14 = buff3[i + 4];
-
-        d0 = D2I(p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 +
-                 p10 * k4 + p11 * k5 + p12 * k6 + p13 * k7 + buffd[i]);
-        d1 = D2I(p01 * k0 + p02 * k1 + p03 * k2 + p04 * k3 +
-                 p11 * k4 + p12 * k5 + p13 * k6 + p14 * k7 + buffd[i + 1]);
-
-        dp[0    ] = FROM_S32(d0);
-        dp[chan1] = FROM_S32(d1);
-
-        dp += chan2;
-      }
-
-      /* last pixels */
-      for (; i < wid; i++) {
-        p00 = buff0[i];     p10 = buff1[i];     p20 = buff2[i];     p30 = buff3[i];
-        p01 = buff0[i + 1]; p11 = buff1[i + 1]; p21 = buff2[i + 1]; p31 = buff3[i + 1];
-        p02 = buff0[i + 2]; p12 = buff1[i + 2]; p22 = buff2[i + 2]; p32 = buff3[i + 2];
-        p03 = buff0[i + 3]; p13 = buff1[i + 3]; p23 = buff2[i + 3]; p33 = buff3[i + 3];
-
-        buff4[i + dx_l] = (FTYPE)sp[0];
-
-        buffo[i] = D2I(p00 * k[0] + p01 * k[1] + p02 * k[2] + p03 * k[3] +
-                       p10 * k[4] + p11 * k[5] + p12 * k[6] + p13 * k[7] +
-                       p20 * k[ 8] + p21 * k[ 9] + p22 * k[10] + p23 * k[11] +
-                       p30 * k[12] + p31 * k[13] + p32 * k[14] + p33 * k[15]);
-
-        dp[0] = FROM_S32(buffo[i]);
-
-        sp += chan1;
-        dp += chan1;
-      }
-
-      for (; i < swid; i++) {
-        buff4[i + dx_l] = (FTYPE)sp[0];
-        sp += chan1;
-      }
-
-      for (i = 0; i < dx_l; i++) buff4[i] = buff4[dx_l];
-      for (i = 0; i < dx_r; i++) buff4[swid + dx_l + i] = buff4[swid + dx_l - 1];
-
-      /* next line */
-
-      if (j < hgt - dy_b - 2) sl += sll;
-      dl += dll;
-
-      buffT = buff0;
-      buff0 = buff1;
-      buff1 = buff2;
-      buff2 = buff3;
-      buff3 = buff4;
-      buff4 = buffT;
-    }
-  }
-
-  if (pbuff != buff) mlib_free(pbuff);
-
-  return MLIB_SUCCESS;
-}
-
-/***************************************************************/
-#undef  KSIZE
-#define KSIZE 5
-
-mlib_status CONV_FUNC(5x5)
-{
-  FTYPE    buff[(KSIZE + 3)*BUFF_LINE];
-  FTYPE    *buff0, *buff1, *buff2, *buff3, *buff4, *buff5, *buffd, *buffT;
-  FTYPE    k[KSIZE*KSIZE];
-  mlib_s32 d0, d1;
-  FTYPE    k0, k1, k2, k3, k4, k5, k6, k7, k8, k9;
-  FTYPE    p00, p01, p02, p03, p04, p05,
-           p10, p11, p12, p13, p14, p15,
-           p20, p21, p22, p23, p24,
-           p30, p31, p32, p33, p34,
-           p40, p41, p42, p43, p44;
-  DEF_VARS(DTYPE);
-  DTYPE *sl2, *sl3, *sl4;
-  LOAD_KERNEL(KSIZE*KSIZE);
-  GET_SRC_DST_PARAMETERS(DTYPE);
-
-  swid = wid + KSIZE1;
-
-  if (swid > BUFF_LINE) {
-    pbuff = mlib_malloc((KSIZE + 3)*sizeof(FTYPE   )*swid);
-
-    if (pbuff == NULL) return MLIB_FAILURE;
-  }
-
-  buff0 = pbuff;
-  buff1 = buff0 + swid;
-  buff2 = buff1 + swid;
-  buff3 = buff2 + swid;
-  buff4 = buff3 + swid;
-  buff5 = buff4 + swid;
-  buffd = buff5 + swid;
-  buffo = (mlib_s32*)(buffd + swid);
-  buffi = buffo + (swid &~ 1);
-
-  swid -= (dx_l + dx_r);
-
-  chan1 = nchannel;
-  chan2 = chan1 + chan1;
-
-  for (c = 0; c < nchannel; c++) {
-    if (!(cmask & (1 << (nchannel - 1 - c)))) continue;
-
-    sl = adr_src + c;
-    dl = adr_dst + c;
-
-    if ((1 > dy_t) && (1 < hgt + KSIZE1 - dy_b)) sl1 = sl + sll;
-    else sl1 = sl;
-
-    if ((2 > dy_t) && (2 < hgt + KSIZE1 - dy_b)) sl2 = sl1 + sll;
-    else sl2 = sl1;
-
-    if ((3 > dy_t) && (3 < hgt + KSIZE1 - dy_b)) sl3 = sl2 + sll;
-    else sl3 = sl2;
-
-    if ((hgt - dy_b) > 0) sl4 = sl3 + sll;
-    else sl4 = sl3;
-
-    for (i = 0; i < dx_l; i++) {
-      buff0[i] = (FTYPE)sl[0];
-      buff1[i] = (FTYPE)sl1[0];
-      buff2[i] = (FTYPE)sl2[0];
-      buff3[i] = (FTYPE)sl3[0];
-      buff4[i] = (FTYPE)sl4[0];
-    }
-
-#ifdef __SUNPRO_C
-#pragma pipeloop(0)
-#endif /* __SUNPRO_C */
-    for (i = 0; i < swid; i++) {
-      buff0[i + dx_l] = (FTYPE)sl[i*chan1];
-      buff1[i + dx_l] = (FTYPE)sl1[i*chan1];
-      buff2[i + dx_l] = (FTYPE)sl2[i*chan1];
-      buff3[i + dx_l] = (FTYPE)sl3[i*chan1];
-      buff4[i + dx_l] = (FTYPE)sl4[i*chan1];
-    }
-
-    for (i = 0; i < dx_r; i++) {
-      buff0[swid + dx_l + i] = buff0[swid + dx_l - 1];
-      buff1[swid + dx_l + i] = buff1[swid + dx_l - 1];
-      buff2[swid + dx_l + i] = buff2[swid + dx_l - 1];
-      buff3[swid + dx_l + i] = buff3[swid + dx_l - 1];
-      buff4[swid + dx_l + i] = buff4[swid + dx_l - 1];
-    }
-
-    if ((hgt - dy_b) > 1) sl = sl4 + sll;
-    else sl = sl4;
-
-    for (j = 0; j < hgt; j++) {
-      d64_2x32 dd;
-
-      /*
-       *  First loop
-       */
-      k0 = k[0]; k1 = k[1]; k2 = k[2]; k3 = k[3]; k4 = k[4];
-      k5 = k[5]; k6 = k[6]; k7 = k[7]; k8 = k[8]; k9 = k[9];
-
-      sp = sl;
-      dp = dl;
-
-      p02 = buff0[0];
-      p12 = buff1[0];
-      p03 = buff0[1];
-      p13 = buff1[1];
-      p04 = buff0[2];
-      p14 = buff1[2];
-
-#ifdef __SUNPRO_C
-#pragma pipeloop(0)
-#endif /* __SUNPRO_C */
-      for (i = 0; i <= (wid - 2); i += 2) {
-        p00 = p02; p10 = p12;
-        p01 = p03; p11 = p13;
-        p02 = p04; p12 = p14;
-
-        LOAD_BUFF(buffi);
-
-        p03 = buff0[i + 3]; p13 = buff1[i + 3];
-        p04 = buff0[i + 4]; p14 = buff1[i + 4];
-        p05 = buff0[i + 5]; p15 = buff1[i + 5];
-
-        buffd[i    ] = (p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 + p04 * k4 +
-                        p10 * k5 + p11 * k6 + p12 * k7 + p13 * k8 + p14 * k9);
-        buffd[i + 1] = (p01 * k0 + p02 * k1 + p03 * k2 + p04 * k3 + p05 * k4 +
-                        p11 * k5 + p12 * k6 + p13 * k7 + p14 * k8 + p15 * k9);
-
-        sp += chan2;
-      }
-
-      /*
-       *  Second loop
-       */
-      k0 = k[10]; k1 = k[11]; k2 = k[12]; k3 = k[13]; k4 = k[14];
-      k5 = k[15]; k6 = k[16]; k7 = k[17]; k8 = k[18]; k9 = k[19];
-
-      p02 = buff2[0];
-      p12 = buff3[0];
-      p03 = buff2[1];
-      p13 = buff3[1];
-
-#ifdef __SUNPRO_C
-#pragma pipeloop(0)
-#endif /* __SUNPRO_C */
-      for (i = 0; i <= (wid - 2); i += 2) {
-        p00 = p02; p10 = p12;
-        p01 = p03; p11 = p13;
-
-        p02 = buff2[i + 2]; p12 = buff3[i + 2];
-        p03 = buff2[i + 3]; p13 = buff3[i + 3];
-        p04 = buff2[i + 4]; p14 = buff3[i + 4];
-        p05 = buff2[i + 5]; p15 = buff3[i + 5];
-
-        dd.d64 = *(FTYPE   *)(buffi + i);
-        buff5[i + dx_l    ] = (FTYPE)dd.i32s.i0;
-        buff5[i + dx_l + 1] = (FTYPE)dd.i32s.i1;
-
-        buffd[i    ] += (p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 + p04 * k4 +
-                         p10 * k5 + p11 * k6 + p12 * k7 + p13 * k8 + p14 * k9);
-        buffd[i + 1] += (p01 * k0 + p02 * k1 + p03 * k2 + p04 * k3 + p05 * k4 +
-                         p11 * k5 + p12 * k6 + p13 * k7 + p14 * k8 + p15 * k9);
-      }
-
-      /*
-       *  3 loop
-       */
-      k0 = k[20]; k1 = k[21]; k2 = k[22]; k3 = k[23]; k4 = k[24];
-
-      p02 = buff4[0];
-      p03 = buff4[1];
-      p04 = buff4[2];
-      p05 = buff4[3];
-
-#ifdef __SUNPRO_C
-#pragma pipeloop(0)
-#endif /* __SUNPRO_C */
-      for (i = 0; i <= (wid - 2); i += 2) {
-        p00 = p02; p01 = p03; p02 = p04; p03 = p05;
-
-        p04 = buff4[i + 4]; p05 = buff4[i + 5];
-
-        d0 = D2I(p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 + p04 * k4 + buffd[i]);
-        d1 = D2I(p01 * k0 + p02 * k1 + p03 * k2 + p04 * k3 + p05 * k4 + buffd[i + 1]);
-
-        dp[0    ] = FROM_S32(d0);
-        dp[chan1] = FROM_S32(d1);
-
-        dp += chan2;
-      }
-
-      /* last pixels */
-      for (; i < wid; i++) {
-        p00 = buff0[i];     p10 = buff1[i];     p20 = buff2[i];     p30 = buff3[i];
-        p01 = buff0[i + 1]; p11 = buff1[i + 1]; p21 = buff2[i + 1]; p31 = buff3[i + 1];
-        p02 = buff0[i + 2]; p12 = buff1[i + 2]; p22 = buff2[i + 2]; p32 = buff3[i + 2];
-        p03 = buff0[i + 3]; p13 = buff1[i + 3]; p23 = buff2[i + 3]; p33 = buff3[i + 3];
-        p04 = buff0[i + 4]; p14 = buff1[i + 4]; p24 = buff2[i + 4]; p34 = buff3[i + 4];
-
-        p40 = buff4[i];     p41 = buff4[i + 1]; p42 = buff4[i + 2];
-        p43 = buff4[i + 3]; p44 = buff4[i + 4];
-
-        buff5[i + dx_l] = (FTYPE)sp[0];
-
-        buffo[i] = D2I(p00 * k[0] + p01 * k[1] + p02 * k[2] + p03 * k[3] + p04 * k[4] +
-                       p10 * k[5] + p11 * k[6] + p12 * k[7] + p13 * k[8] + p14 * k[9] +
-                       p20 * k[10] + p21 * k[11] + p22 * k[12] + p23 * k[13] + p24 * k[14] +
-                       p30 * k[15] + p31 * k[16] + p32 * k[17] + p33 * k[18] + p34 * k[19] +
-                       p40 * k[20] + p41 * k[21] + p42 * k[22] + p43 * k[23] + p44 * k[24]);
-
-        dp[0] = FROM_S32(buffo[i]);
-
-        sp += chan1;
-        dp += chan1;
-      }
-
-      for (; i < swid; i++) {
-        buff5[i + dx_l] = (FTYPE)sp[0];
-        sp += chan1;
-      }
-
-      for (i = 0; i < dx_l; i++) buff5[i] = buff5[dx_l];
-      for (i = 0; i < dx_r; i++) buff5[swid + dx_l + i] = buff5[swid + dx_l - 1];
-
-      /* next line */
-
-      if (j < hgt - dy_b - 2) sl += sll;
-      dl += dll;
-
-      buffT = buff0;
-      buff0 = buff1;
-      buff1 = buff2;
-      buff2 = buff3;
-      buff3 = buff4;
-      buff4 = buff5;
-      buff5 = buffT;
-    }
-  }
-
-  if (pbuff != buff) mlib_free(pbuff);
-
-  return MLIB_SUCCESS;
-}
-
-/***************************************************************/
-#ifndef __sparc /* for x86, using integer multiplies is faster */
-
-mlib_status CONV_FUNC_I(5x5)
-{
-  mlib_s32 buff[BUFF_LINE];
-  mlib_s32 *buffd;
-  mlib_s32 k[KSIZE*KSIZE];
-  mlib_s32 shift1, shift2;
-  mlib_s32 k0, k1, k2, k3, k4, k5, k6, k7, k8, k9;
-  mlib_s32 p00, p01, p02, p03, p04, p05,
-           p10, p11, p12, p13, p14, p15;
-  DTYPE    *adr_src, *sl, *sp0, *sp1, *sp2, *sp3, *sp4;
-  DTYPE    *sp_1, *sp_2, *sp_3, *sp_4;
-  DTYPE    *adr_dst, *dl, *dp;
-  mlib_s32 *pbuff = buff;
-  mlib_s32 wid, hgt, sll, dll;
-  mlib_s32 nchannel, chan1, chan2, chan4;
-  mlib_s32 delta_chan1, delta_chan2, delta_chan3;
-  mlib_s32 i, j, c;
-
-#if IMG_TYPE != 1
-  shift1 = 16;
-#else
-  shift1 = 8;
-#endif /* IMG_TYPE != 1 */
-
-  shift2 = scalef_expon - shift1;
-
-  for (j = 0; j < KSIZE*KSIZE; j++) k[j] = kern[j] >> shift1;
-
-  GET_SRC_DST_PARAMETERS(DTYPE);
-
-  if (wid > BUFF_LINE) {
-    pbuff = mlib_malloc(sizeof(mlib_s32)*wid);
-
-    if (pbuff == NULL) return MLIB_FAILURE;
-  }
-
-  buffd = pbuff;
-
-  chan1 = nchannel;
-  chan2 = chan1 + chan1;
-
-  if ((1 > dx_l) && (1 < wid + KSIZE1 - dx_r)) delta_chan1 = chan1;
-  else delta_chan1 = 0;
-
-  if ((2 > dx_l) && (2 < wid + KSIZE1 - dx_r)) delta_chan2 = delta_chan1 + chan1;
-  else delta_chan2 = delta_chan1;
-
-  if ((3 > dx_l) && (3 < wid + KSIZE1 - dx_r)) delta_chan3 = delta_chan2 + chan1;
-  else delta_chan3 = delta_chan2;
-
-  chan4 = chan1 + delta_chan3;
-
-  for (c = 0; c < chan1; c++) {
-    if (!(cmask & (1 << (chan1 - 1 - c)))) continue;
-
-    sl = adr_src + c;
-    dl = adr_dst + c;
-
-    sp_1 = sl;
-
-    if ((1 > dy_t) && (1 < hgt + KSIZE1 - dy_b)) sl += sll;
-    sp_2 = sl;
-
-    if ((2 > dy_t) && (2 < hgt + KSIZE1 - dy_b)) sl += sll;
-    sp_3 = sl;
-
-    if ((3 > dy_t) && (3 < hgt + KSIZE1 - dy_b)) sl += sll;
-    sp_4 = sl;
-
-    if ((hgt - dy_b) > 0) sl += sll;
-
-    for (j = 0; j < hgt; j++) {
-      mlib_s32 pix0, pix1;
-
-      dp  = dl;
-      sp0 = sp_1;
-      sp_1 = sp_2;
-      sp_2 = sp_3;
-      sp_3 = sp_4;
-      sp_4 = sl;
-
-      sp1 = sp_1;
-      sp2 = sp_2;
-      sp3 = sp_3;
-      sp4 = sp_4;
-
-      /*
-       *  First loop
-       */
-
-      k0 = k[0]; k1 = k[1]; k2 = k[2]; k3 = k[3]; k4 = k[4];
-      k5 = k[5]; k6 = k[6]; k7 = k[7]; k8 = k[8]; k9 = k[9];
-
-      p02 = sp0[0];           p12 = sp1[0];
-      p03 = sp0[delta_chan1]; p13 = sp1[delta_chan1];
-      p04 = sp0[delta_chan2]; p14 = sp1[delta_chan2];
-      p05 = sp0[delta_chan3]; p15 = sp1[delta_chan3];
-
-      sp0 += chan4;
-      sp1 += chan4;
-
-#ifdef __SUNPRO_C
-#pragma pipeloop(0)
-#endif /* __SUNPRO_C */
-      for (i = 0; i <= (wid - dx_r - 2); i += 2) {
-        p00 = p02; p10 = p12;
-        p01 = p03; p11 = p13;
-        p02 = p04; p12 = p14;
-        p03 = p05; p13 = p15;
-
-        p04 = sp0[0];     p14 = sp1[0];
-        p05 = sp0[chan1]; p15 = sp1[chan1];
-
-        buffd[i    ] = (p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 + p04 * k4 +
-                        p10 * k5 + p11 * k6 + p12 * k7 + p13 * k8 + p14 * k9);
-        buffd[i + 1] = (p01 * k0 + p02 * k1 + p03 * k2 + p04 * k3 + p05 * k4 +
-                        p11 * k5 + p12 * k6 + p13 * k7 + p14 * k8 + p15 * k9);
-
-        sp0 += chan2;
-        sp1 += chan2;
-      }
-
-      p01 = p02; p02 = p03; p03 = p04; p04 = p05;
-      p11 = p12; p12 = p13; p13 = p14; p14 = p15;
-
-      for (; i < wid - dx_r; i++) {
-        p00 = p01; p10 = p11;
-        p01 = p02; p11 = p12;
-        p02 = p03; p12 = p13;
-        p03 = p04; p13 = p14;
-
-        p04 = sp0[0];     p14 = sp1[0];
-
-        buffd[i] = (p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 + p04 * k4 +
-                    p10 * k5 + p11 * k6 + p12 * k7 + p13 * k8 + p14 * k9);
-
-        sp0 += chan1;
-        sp1 += chan1;
-      }
-
-      sp0 -= chan1;
-      sp1 -= chan1;
-
-      for (; i < wid; i++) {
-        p00 = p01; p10 = p11;
-        p01 = p02; p11 = p12;
-        p02 = p03; p12 = p13;
-        p03 = p04; p13 = p14;
-
-        p04 = sp0[0];     p14 = sp1[0];
-
-        buffd[i] = (p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 + p04 * k4 +
-                    p10 * k5 + p11 * k6 + p12 * k7 + p13 * k8 + p14 * k9);
-      }
-
-      /*
-       *  Second loop
-       */
-
-      k0 = k[10]; k1 = k[11]; k2 = k[12]; k3 = k[13]; k4 = k[14];
-      k5 = k[15]; k6 = k[16]; k7 = k[17]; k8 = k[18]; k9 = k[19];
-
-      p02 = sp2[0];           p12 = sp3[0];
-      p03 = sp2[delta_chan1]; p13 = sp3[delta_chan1];
-      p04 = sp2[delta_chan2]; p14 = sp3[delta_chan2];
-      p05 = sp2[delta_chan3]; p15 = sp3[delta_chan3];
-
-      sp2 += chan4;
-      sp3 += chan4;
-
-#ifdef __SUNPRO_C
-#pragma pipeloop(0)
-#endif /* __SUNPRO_C */
-      for (i = 0; i <= (wid - dx_r - 2); i += 2) {
-        p00 = p02; p10 = p12;
-        p01 = p03; p11 = p13;
-        p02 = p04; p12 = p14;
-        p03 = p05; p13 = p15;
-
-        p04 = sp2[0];     p14 = sp3[0];
-        p05 = sp2[chan1]; p15 = sp3[chan1];
-
-        buffd[i    ] += (p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 + p04 * k4 +
-                         p10 * k5 + p11 * k6 + p12 * k7 + p13 * k8 + p14 * k9);
-        buffd[i + 1] += (p01 * k0 + p02 * k1 + p03 * k2 + p04 * k3 + p05 * k4 +
-                         p11 * k5 + p12 * k6 + p13 * k7 + p14 * k8 + p15 * k9);
-
-        sp2 += chan2;
-        sp3 += chan2;
-      }
-
-      p01 = p02; p02 = p03; p03 = p04; p04 = p05;
-      p11 = p12; p12 = p13; p13 = p14; p14 = p15;
-
-      for (; i < wid - dx_r; i++) {
-        p00 = p01; p10 = p11;
-        p01 = p02; p11 = p12;
-        p02 = p03; p12 = p13;
-        p03 = p04; p13 = p14;
-
-        p04 = sp2[0];     p14 = sp3[0];
-
-        buffd[i] += (p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 + p04 * k4 +
-                     p10 * k5 + p11 * k6 + p12 * k7 + p13 * k8 + p14 * k9);
-
-        sp2 += chan1;
-        sp3 += chan1;
-      }
-
-      sp2 -= chan1;
-      sp3 -= chan1;
-
-      for (; i < wid; i++) {
-        p00 = p01; p10 = p11;
-        p01 = p02; p11 = p12;
-        p02 = p03; p12 = p13;
-        p03 = p04; p13 = p14;
-
-        p04 = sp2[0];     p14 = sp3[0];
-
-        buffd[i] += (p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 + p04 * k4 +
-                     p10 * k5 + p11 * k6 + p12 * k7 + p13 * k8 + p14 * k9);
-      }
-
-      /*
-       *  3 loop
-       */
-
-      k0 = k[20]; k1 = k[21]; k2 = k[22]; k3 = k[23]; k4 = k[24];
-
-      p02 = sp4[0];
-      p03 = sp4[delta_chan1];
-      p04 = sp4[delta_chan2];
-      p05 = sp4[delta_chan3];
-
-      sp4 += chan4;
-
-#ifdef __SUNPRO_C
-#pragma pipeloop(0)
-#endif /* __SUNPRO_C */
-      for (i = 0; i <= (wid - dx_r - 2); i += 2) {
-        p00 = p02; p01 = p03; p02 = p04; p03 = p05;
-
-        p04 = sp4[0]; p05 = sp4[chan1];
-
-        pix0 = (buffd[i    ] + p00 * k0 + p01 * k1 + p02 * k2 +
-                p03 * k3 + p04 * k4) >> shift2;
-        pix1 = (buffd[i + 1] + p01 * k0 + p02 * k1 + p03 * k2 +
-                p04 * k3 + p05 * k4) >> shift2;
-
-        CLAMP_STORE(dp[0],     pix0);
-        CLAMP_STORE(dp[chan1], pix1);
-
-        dp  += chan2;
-        sp4 += chan2;
-      }
-
-      p01 = p02; p02 = p03; p03 = p04; p04 = p05;
-
-      for (; i < wid - dx_r; i++) {
-        p00 = p01; p01 = p02; p02 = p03; p03 = p04;
-
-        p04 = sp4[0];
-
-        pix0 = (buffd[i    ] + p00 * k0 + p01 * k1 + p02 * k2 +
-                p03 * k3 + p04 * k4) >> shift2;
-        CLAMP_STORE(dp[0],     pix0);
-
-        dp  += chan1;
-        sp4 += chan1;
-      }
-
-      sp4 -= chan1;
-
-      for (; i < wid; i++) {
-        p00 = p01; p01 = p02; p02 = p03; p03 = p04;
-
-        p04 = sp4[0];
-
-        pix0 = (buffd[i    ] + p00 * k0 + p01 * k1 + p02 * k2 +
-                p03 * k3 + p04 * k4) >> shift2;
-        CLAMP_STORE(dp[0],     pix0);
-
-        dp  += chan1;
-      }
-
-      /* next line */
-
-      if (j < hgt - dy_b - 1) sl += sll;
-      dl += dll;
-    }
-  }
-
-  if (pbuff != buff) mlib_free(pbuff);
-
-  return MLIB_SUCCESS;
-}
-
-#endif /* __sparc ( for x86, using integer multiplies is faster ) */
-
-/***************************************************************/
-#if IMG_TYPE == 1
-
-#undef  KSIZE
-#define KSIZE 7
-
-mlib_status CONV_FUNC(7x7)
-{
-  FTYPE    buff[(KSIZE + 3)*BUFF_LINE], *buffs[2*(KSIZE + 1)], *buffd;
-  FTYPE    k[KSIZE*KSIZE];
-  mlib_s32 l, m, buff_ind;
-  mlib_s32 d0, d1;
-  FTYPE    k0, k1, k2, k3, k4, k5, k6;
-  FTYPE    p0, p1, p2, p3, p4, p5, p6, p7;
-  DTYPE *sl2, *sl3, *sl4, *sl5, *sl6;
-  DEF_VARS(DTYPE);
-  LOAD_KERNEL(KSIZE*KSIZE);
-  GET_SRC_DST_PARAMETERS(DTYPE);
-
-  swid = wid + KSIZE1;
-
-  if (wid > BUFF_LINE) {
-    pbuff = mlib_malloc((KSIZE + 3)*sizeof(FTYPE   )*wid);
-
-    if (pbuff == NULL) return MLIB_FAILURE;
-  }
-
-  for (l = 0; l < KSIZE + 1; l++) buffs[l] = pbuff + l*swid;
-  for (l = 0; l < KSIZE + 1; l++) buffs[l + (KSIZE + 1)] = buffs[l];
-  buffd = buffs[KSIZE] + swid;
-  buffo = (mlib_s32*)(buffd + swid);
-  buffi = buffo + (swid &~ 1);
-
-  swid -= (dx_l + dx_r);
-
-  chan1 = nchannel;
-  chan2 = chan1 + chan1;
-
-  for (c = 0; c < nchannel; c++) {
-    if (!(cmask & (1 << (nchannel - 1 - c)))) continue;
-
-    sl = adr_src + c;
-    dl = adr_dst + c;
-
-    if ((1 > dy_t) && (1 < hgt + KSIZE1 - dy_b)) sl1 = sl + sll;
-    else sl1 = sl;
-
-    if ((2 > dy_t) && (2 < hgt + KSIZE1 - dy_b)) sl2 = sl1 + sll;
-    else sl2 = sl1;
-
-    if ((3 > dy_t) && (3 < hgt + KSIZE1 - dy_b)) sl3 = sl2 + sll;
-    else sl3 = sl2;
-
-    if ((4 > dy_t) && (4 < hgt + KSIZE1 - dy_b)) sl4 = sl3 + sll;
-    else sl4 = sl3;
-
-    if ((5 > dy_t) && (5 < hgt + KSIZE1 - dy_b)) sl5 = sl4 + sll;
-    else sl5 = sl4;
-
-    if ((hgt - dy_b) > 0) sl6 = sl5 + sll;
-    else sl6 = sl5;
-
-    for (i = 0; i < dx_l; i++) {
-      buffs[0][i] = (FTYPE)sl[0];
-      buffs[1][i] = (FTYPE)sl1[0];
-      buffs[2][i] = (FTYPE)sl2[0];
-      buffs[3][i] = (FTYPE)sl3[0];
-      buffs[4][i] = (FTYPE)sl4[0];
-      buffs[5][i] = (FTYPE)sl5[0];
-      buffs[6][i] = (FTYPE)sl6[0];
-    }
-
-#ifdef __SUNPRO_C
-#pragma pipeloop(0)
-#endif /* __SUNPRO_C */
-    for (i = 0; i < swid; i++) {
-      buffs[0][i + dx_l] = (FTYPE)sl[i*chan1];
-      buffs[1][i + dx_l] = (FTYPE)sl1[i*chan1];
-      buffs[2][i + dx_l] = (FTYPE)sl2[i*chan1];
-      buffs[3][i + dx_l] = (FTYPE)sl3[i*chan1];
-      buffs[4][i + dx_l] = (FTYPE)sl4[i*chan1];
-      buffs[5][i + dx_l] = (FTYPE)sl5[i*chan1];
-      buffs[6][i + dx_l] = (FTYPE)sl6[i*chan1];
-    }
-
-    for (i = 0; i < dx_r; i++) {
-      buffs[0][swid + dx_l + i] = buffs[0][swid + dx_l - 1];
-      buffs[1][swid + dx_l + i] = buffs[1][swid + dx_l - 1];
-      buffs[2][swid + dx_l + i] = buffs[2][swid + dx_l - 1];
-      buffs[3][swid + dx_l + i] = buffs[3][swid + dx_l - 1];
-      buffs[4][swid + dx_l + i] = buffs[4][swid + dx_l - 1];
-      buffs[5][swid + dx_l + i] = buffs[5][swid + dx_l - 1];
-      buffs[6][swid + dx_l + i] = buffs[6][swid + dx_l - 1];
-    }
-
-    buff_ind = 0;
-
-#ifdef __SUNPRO_C
-#pragma pipeloop(0)
-#endif /* __SUNPRO_C */
-    for (i = 0; i < wid; i++) buffd[i] = 0.0;
-
-    if ((hgt - dy_b) > 1) sl = sl6 + sll;
-    else sl = sl6;
-
-    for (j = 0; j < hgt; j++) {
-      FTYPE    **buffc = buffs + buff_ind;
-      FTYPE    *buffn = buffc[KSIZE];
-      FTYPE    *pk = k;
-
-      for (l = 0; l < KSIZE; l++) {
-        FTYPE    *buff = buffc[l];
-        d64_2x32 dd;
-
-        sp = sl;
-        dp = dl;
-
-        p2 = buff[0]; p3 = buff[1]; p4 = buff[2];
-        p5 = buff[3]; p6 = buff[4]; p7 = buff[5];
-
-        k0 = *pk++; k1 = *pk++; k2 = *pk++; k3 = *pk++;
-        k4 = *pk++; k5 = *pk++; k6 = *pk++;
-
-        if (l < (KSIZE - 1)) {
-#ifdef __SUNPRO_C
-#pragma pipeloop(0)
-#endif /* __SUNPRO_C */
-          for (i = 0; i <= (wid - 2); i += 2) {
-            p0 = p2; p1 = p3; p2 = p4; p3 = p5; p4 = p6; p5 = p7;
-
-            p6 = buff[i + 6]; p7 = buff[i + 7];
-
-            buffd[i    ] += p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + p5*k5 + p6*k6;
-            buffd[i + 1] += p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + p6*k5 + p7*k6;
-          }
-
-        } else {
-#ifdef __SUNPRO_C
-#pragma pipeloop(0)
-#endif /* __SUNPRO_C */
-          for (i = 0; i <= (wid - 2); i += 2) {
-            p0 = p2; p1 = p3; p2 = p4; p3 = p5; p4 = p6; p5 = p7;
-
-            p6 = buff[i + 6]; p7 = buff[i + 7];
-
-            LOAD_BUFF(buffi);
-
-            dd.d64 = *(FTYPE   *)(buffi + i);
-            buffn[i + dx_l    ] = (FTYPE)dd.i32s.i0;
-            buffn[i + dx_l + 1] = (FTYPE)dd.i32s.i1;
-
-            d0 = D2I(p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + p5*k5 + p6*k6 + buffd[i    ]);
-            d1 = D2I(p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + p6*k5 + p7*k6 + buffd[i + 1]);
-
-            dp[0    ] = FROM_S32(d0);
-            dp[chan1] = FROM_S32(d1);
-
-            buffd[i    ] = 0.0;
-            buffd[i + 1] = 0.0;
-
-            sp += chan2;
-            dp += chan2;
-          }
-        }
-      }
-
-      /* last pixels */
-      for (; i < wid; i++) {
-        FTYPE    *pk = k, s = 0;
-        mlib_s32 d0;
-
-        for (l = 0; l < KSIZE; l++) {
-          FTYPE    *buff = buffc[l] + i;
-
-          for (m = 0; m < KSIZE; m++) s += buff[m] * (*pk++);
-        }
-
-        d0 = D2I(s);
-        dp[0] = FROM_S32(d0);
-
-        buffn[i + dx_l] = (FTYPE)sp[0];
-
-        sp += chan1;
-        dp += chan1;
-      }
-
-      for (; i < swid; i++) {
-        buffn[i + dx_l] = (FTYPE)sp[0];
-        sp += chan1;
-      }
-
-      for (i = 0; i < dx_l; i++) buffn[i] = buffn[dx_l];
-      for (i = 0; i < dx_r; i++) buffn[swid + dx_l + i] = buffn[swid + dx_l - 1];
-
-      /* next line */
-
-      if (j < hgt - dy_b - 2) sl += sll;
-      dl += dll;
-
-      buff_ind++;
-
-      if (buff_ind >= KSIZE + 1) buff_ind = 0;
-    }
-  }
-
-  if (pbuff != buff) mlib_free(pbuff);
-
-  return MLIB_SUCCESS;
-}
-
-#endif /* IMG_TYPE == 1 */
-
-/***************************************************************/
 #define MAX_KER   7
 #define MAX_N    15
 #define BUFF_SIZE   1600
--- a/jdk/src/java.desktop/share/native/libmlib_image/mlib_ImageConv_16nw.c	Thu May 12 11:03:07 2016 -0700
+++ b/jdk/src/java.desktop/share/native/libmlib_image/mlib_ImageConv_16nw.c	Fri May 13 11:31:05 2016 +0300
@@ -144,9 +144,6 @@
 } d64_2x32;
 
 /***************************************************************/
-#define BUFF_LINE 256
-
-/***************************************************************/
 #define DEF_VARS(type)                                          \
   type     *adr_src, *sl, *sp = NULL;                           \
   type     *adr_dst, *dl, *dp = NULL;                           \
@@ -156,39 +153,6 @@
   mlib_s32 i, j, c
 
 /***************************************************************/
-#define LOAD_KERNEL3()                                                   \
-  FTYPE    scalef = DSCALE;                                              \
-  FTYPE    k0, k1, k2, k3, k4, k5, k6, k7, k8;                           \
-  FTYPE    p00, p01, p02, p03,                                           \
-           p10, p11, p12, p13,                                           \
-           p20, p21, p22, p23;                                           \
-                                                                         \
-  while (scalef_expon > 30) {                                            \
-    scalef /= (1 << 30);                                                 \
-    scalef_expon -= 30;                                                  \
-  }                                                                      \
-                                                                         \
-  scalef /= (1 << scalef_expon);                                         \
-                                                                         \
-  /* keep kernel in regs */                                              \
-  k0 = scalef * kern[0];  k1 = scalef * kern[1];  k2 = scalef * kern[2]; \
-  k3 = scalef * kern[3];  k4 = scalef * kern[4];  k5 = scalef * kern[5]; \
-  k6 = scalef * kern[6];  k7 = scalef * kern[7];  k8 = scalef * kern[8]
-
-/***************************************************************/
-#define LOAD_KERNEL(SIZE)                                       \
-  FTYPE    scalef = DSCALE;                                     \
-                                                                \
-  while (scalef_expon > 30) {                                   \
-    scalef /= (1 << 30);                                        \
-    scalef_expon -= 30;                                         \
-  }                                                             \
-                                                                \
-  scalef /= (1 << scalef_expon);                                \
-                                                                \
-  for (j = 0; j < SIZE; j++) k[j] = scalef * kern[j]
-
-/***************************************************************/
 #define GET_SRC_DST_PARAMETERS(type)                            \
   hgt = mlib_ImageGetHeight(src);                               \
   wid = mlib_ImageGetWidth(src);                                \
@@ -246,1162 +210,6 @@
 #endif /* __sparc */
 
 /***************************************************************/
-#define KSIZE  3
-
-mlib_status CONV_FUNC(3x3)(mlib_image       *dst,
-                           const mlib_image *src,
-                           const mlib_s32   *kern,
-                           mlib_s32         scalef_expon,
-                           mlib_s32         cmask)
-{
-  FTYPE    buff[(KSIZE + 2)*BUFF_LINE], *buff0, *buff1, *buff2, *buff3, *buffT;
-  DEF_VARS(DTYPE);
-  DTYPE *sl1;
-  mlib_s32 chan2;
-  mlib_s32 *buffo, *buffi;
-  DTYPE *sl2;
-#ifndef __sparc
-  mlib_s32 d0, d1;
-#endif /* __sparc */
-  LOAD_KERNEL3();
-  GET_SRC_DST_PARAMETERS(DTYPE);
-
-  if (wid > BUFF_LINE) {
-    pbuff = mlib_malloc((KSIZE + 2)*sizeof(FTYPE)*wid);
-
-    if (pbuff == NULL) return MLIB_FAILURE;
-  }
-
-  buff0 = pbuff;
-  buff1 = buff0 + wid;
-  buff2 = buff1 + wid;
-  buff3 = buff2 + wid;
-  buffo = (mlib_s32*)(buff3 + wid);
-  buffi = buffo + (wid &~ 1);
-
-  chan1 = nchannel;
-  chan2 = chan1 + chan1;
-
-  wid -= (KSIZE - 1);
-  hgt -= (KSIZE - 1);
-
-  adr_dst += ((KSIZE - 1)/2)*(dll + chan1);
-
-  for (c = 0; c < nchannel; c++) {
-    if (!(cmask & (1 << (nchannel - 1 - c)))) continue;
-
-    sl = adr_src + c;
-    dl = adr_dst + c;
-
-    sl1 = sl  + sll;
-    sl2 = sl1 + sll;
-#ifdef __SUNPRO_C
-#pragma pipeloop(0)
-#endif /* __SUNPRO_C */
-    for (i = 0; i < wid + (KSIZE - 1); i++) {
-      buff0[i] = (FTYPE)sl[i*chan1];
-      buff1[i] = (FTYPE)sl1[i*chan1];
-      buff2[i] = (FTYPE)sl2[i*chan1];
-    }
-
-    sl += KSIZE*sll;
-
-    for (j = 0; j < hgt; j++) {
-      FTYPE    s0, s1;
-
-      p02 = buff0[0];
-      p12 = buff1[0];
-      p22 = buff2[0];
-
-      p03 = buff0[1];
-      p13 = buff1[1];
-      p23 = buff2[1];
-
-      s0 = p02 * k0 + p03 * k1 + p12 * k3 + p13 * k4 + p22 * k6 + p23 * k7;
-      s1 = p03 * k0 + p13 * k3 + p23 * k6;
-
-      sp = sl;
-      dp = dl;
-
-#ifdef __SUNPRO_C
-#pragma pipeloop(0)
-#endif /* __SUNPRO_C */
-      for (i = 0; i <= (wid - 2); i += 2) {
-#ifdef __sparc
-#ifdef _NO_LONGLONG
-        mlib_s32 o64_1, o64_2;
-#else /* _NO_LONGLONG */
-        mlib_s64 o64;
-#endif /* _NO_LONGLONG */
-#endif /* __sparc */
-        d64_2x32 dd;
-
-        p02 = buff0[i + 2]; p12 = buff1[i + 2]; p22 = buff2[i + 2];
-        p03 = buff0[i + 3]; p13 = buff1[i + 3]; p23 = buff2[i + 3];
-
-        LOAD_BUFF(buffi);
-
-        dd.d64 = *(FTYPE   *)(buffi + i);
-        buff3[i    ] = (FTYPE)dd.i32s.i0;
-        buff3[i + 1] = (FTYPE)dd.i32s.i1;
-
-#ifndef __sparc
-        d0 = D2I(s0 + p02 * k2 + p12 * k5 + p22 * k8);
-        d1 = D2I(s1 + p02 * k1 + p03 * k2 + p12 * k4 + p13 * k5 + p22 * k7 + p23 * k8);
-
-        s0 = p02 * k0 + p03 * k1 + p12 * k3 + p13 * k4 + p22 * k6 + p23 * k7;
-        s1 = p03 * k0 + p13 * k3 + p23 * k6;
-
-        dp[0    ] = FROM_S32(d0);
-        dp[chan1] = FROM_S32(d1);
-
-#else /* __sparc */
-
-        dd.i32s.i0 = D2I(s0 + p02 * k2 + p12 * k5 + p22 * k8);
-        dd.i32s.i1 = D2I(s1 + p02 * k1 + p03 * k2 + p12 * k4 + p13 * k5 + p22 * k7 + p23 * k8);
-        *(FTYPE   *)(buffo + i) = dd.d64;
-
-        s0 = p02 * k0 + p03 * k1 + p12 * k3 + p13 * k4 + p22 * k6 + p23 * k7;
-        s1 = p03 * k0 + p13 * k3 + p23 * k6;
-
-#ifdef _NO_LONGLONG
-
-        o64_1 = buffo[i];
-        o64_2 = buffo[i+1];
-#if IMG_TYPE != 1
-        STORE2(FROM_S32(o64_1), FROM_S32(o64_2));
-#else
-        STORE2(o64_1 >> 24, o64_2 >> 24);
-#endif /* IMG_TYPE != 1 */
-
-#else /* _NO_LONGLONG */
-
-        o64 = *(mlib_s64*)(buffo + i);
-#if IMG_TYPE != 1
-        STORE2(FROM_S32(o64 >> 32), FROM_S32(o64));
-#else
-        STORE2(o64 >> 56, o64 >> 24);
-#endif /* IMG_TYPE != 1 */
-#endif /* _NO_LONGLONG */
-#endif /* __sparc */
-
-        sp += chan2;
-        dp += chan2;
-      }
-
-      for (; i < wid; i++) {
-        p00 = buff0[i];     p10 = buff1[i];     p20 = buff2[i];
-        p01 = buff0[i + 1]; p11 = buff1[i + 1]; p21 = buff2[i + 1];
-        p02 = buff0[i + 2]; p12 = buff1[i + 2]; p22 = buff2[i + 2];
-
-        buffi[i] = (mlib_s32)sp[0];
-        buff3[i] = (FTYPE)buffi[i];
-
-#ifndef __sparc
-
-        d0 = D2I(p00 * k0 + p01 * k1 + p02 * k2 + p10 * k3 + p11 * k4 +
-                 p12 * k5 + p20 * k6 + p21 * k7 + p22 * k8);
-
-        dp[0] = FROM_S32(d0);
-
-#else  /* __sparc */
-
-        buffo[i] = D2I(p00 * k0 + p01 * k1 + p02 * k2 + p10 * k3 + p11 * k4 +
-                       p12 * k5 + p20 * k6 + p21 * k7 + p22 * k8);
-#if IMG_TYPE != 1
-        dp[0] = FROM_S32(buffo[i]);
-#else
-        dp[0] = buffo[i] >> 24;
-#endif /* IMG_TYPE != 1 */
-#endif /* __sparc */
-
-        sp += chan1;
-        dp += chan1;
-      }
-
-      buffi[wid] = (mlib_s32)sp[0];
-      buff3[wid] = (FTYPE)buffi[wid];
-      buffi[wid + 1] = (mlib_s32)sp[chan1];
-      buff3[wid + 1] = (FTYPE)buffi[wid + 1];
-
-      sl += sll;
-      dl += dll;
-
-      buffT = buff0;
-      buff0 = buff1;
-      buff1 = buff2;
-      buff2 = buff3;
-      buff3 = buffT;
-    }
-  }
-
-#ifdef __sparc
-#if IMG_TYPE == 1
-  {
-    mlib_s32 amask = (1 << nchannel) - 1;
-
-    if ((cmask & amask) != amask) {
-      mlib_ImageXor80(adr_dst, wid, hgt, dll, nchannel, cmask);
-    } else {
-      mlib_ImageXor80_aa(adr_dst, wid*nchannel, hgt, dll);
-    }
-  }
-
-#endif /* IMG_TYPE == 1 */
-#endif /* __sparc */
-
-  if (pbuff != buff) mlib_free(pbuff);
-
-  return MLIB_SUCCESS;
-}
-
-/***************************************************************/
-#ifndef __sparc /* for x86, using integer multiplies is faster */
-
-mlib_status CONV_FUNC_I(3x3)(mlib_image       *dst,
-                             const mlib_image *src,
-                             const mlib_s32   *kern,
-                             mlib_s32         scalef_expon,
-                             mlib_s32         cmask)
-{
-  DTYPE    *adr_src, *sl, *sp0, *sp1, *sp2;
-  DTYPE    *adr_dst, *dl, *dp;
-  mlib_s32 wid, hgt, sll, dll;
-  mlib_s32 nchannel, chan1, chan2;
-  mlib_s32 i, j, c;
-  mlib_s32 shift1, shift2;
-  mlib_s32 k0, k1, k2, k3, k4, k5, k6, k7, k8;
-  mlib_s32 p02, p03,
-           p12, p13,
-           p22, p23;
-
-#if IMG_TYPE != 1
-  shift1 = 16;
-#else
-  shift1 = 8;
-#endif /* IMG_TYPE != 1 */
-
-  shift2 = scalef_expon - shift1;
-
-  /* keep kernel in regs */
-  k0 = kern[0] >> shift1;  k1 = kern[1] >> shift1;  k2 = kern[2] >> shift1;
-  k3 = kern[3] >> shift1;  k4 = kern[4] >> shift1;  k5 = kern[5] >> shift1;
-  k6 = kern[6] >> shift1;  k7 = kern[7] >> shift1;  k8 = kern[8] >> shift1;
-
-  GET_SRC_DST_PARAMETERS(DTYPE);
-
-  chan1 = nchannel;
-  chan2 = chan1 + chan1;
-
-  wid -= (KSIZE - 1);
-  hgt -= (KSIZE - 1);
-
-  adr_dst += ((KSIZE - 1)/2)*(dll + chan1);
-
-  for (c = 0; c < chan1; c++) {
-    if (!(cmask & (1 << (chan1 - 1 - c)))) continue;
-
-    sl = adr_src + c;
-    dl = adr_dst + c;
-
-    for (j = 0; j < hgt; j++) {
-      mlib_s32 s0, s1;
-      mlib_s32 pix0, pix1;
-
-      dp  = dl;
-      sp0 = sl;
-      sp1 = sp0 + sll;
-      sp2 = sp1 + sll;
-
-      p02 = sp0[0];
-      p12 = sp1[0];
-      p22 = sp2[0];
-
-      p03 = sp0[chan1];
-      p13 = sp1[chan1];
-      p23 = sp2[chan1];
-
-      s0 = p02 * k0 + p03 * k1 + p12 * k3 + p13 * k4 + p22 * k6 + p23 * k7;
-      s1 = p03 * k0 + p13 * k3 + p23 * k6;
-
-      sp0 += chan2;
-      sp1 += chan2;
-      sp2 += chan2;
-
-#ifdef __SUNPRO_C
-#pragma pipeloop(0)
-#endif /* __SUNPRO_C */
-      for (i = 0; i <= (wid - 2); i += 2) {
-        p02 = sp0[0];     p12 = sp1[0];     p22 = sp2[0];
-        p03 = sp0[chan1]; p13 = sp1[chan1]; p23 = sp2[chan1];
-
-        pix0 = (s0 + p02 * k2 + p12 * k5 + p22 * k8) >> shift2;
-        pix1 = (s1 + p02 * k1 + p03 * k2 + p12 * k4 +
-                p13 * k5 + p22 * k7 + p23 * k8) >> shift2;
-
-        CLAMP_STORE(dp[0],     pix0);
-        CLAMP_STORE(dp[chan1], pix1);
-
-        s0 = p02 * k0 + p03 * k1 + p12 * k3 + p13 * k4 + p22 * k6 + p23 * k7;
-        s1 = p03 * k0 + p13 * k3 + p23 * k6;
-
-        sp0 += chan2;
-        sp1 += chan2;
-        sp2 += chan2;
-        dp += chan2;
-      }
-
-      if (wid & 1) {
-        p02 = sp0[0]; p12 = sp1[0]; p22 = sp2[0];
-        pix0 = (s0 + p02 * k2 + p12 * k5 + p22 * k8) >> shift2;
-        CLAMP_STORE(dp[0], pix0);
-      }
-
-      sl += sll;
-      dl += dll;
-    }
-  }
-
-  return MLIB_SUCCESS;
-}
-
-#endif /* __sparc ( for x86, using integer multiplies is faster ) */
-
-/***************************************************************/
-#undef  KSIZE
-#define KSIZE 4
-
-mlib_status CONV_FUNC(4x4)(mlib_image       *dst,
-                           const mlib_image *src,
-                           const mlib_s32   *kern,
-                           mlib_s32         scalef_expon,
-                           mlib_s32         cmask)
-{
-  FTYPE    buff[(KSIZE + 3)*BUFF_LINE];
-  FTYPE    *buff0, *buff1, *buff2, *buff3, *buff4, *buffd, *buffT;
-  FTYPE    k[KSIZE*KSIZE];
-  mlib_s32 d0, d1;
-  FTYPE    k0, k1, k2, k3, k4, k5, k6, k7;
-  FTYPE    p00, p01, p02, p03, p04,
-           p10, p11, p12, p13, p14,
-           p20, p21, p22, p23,
-           p30, p31, p32, p33;
-  DEF_VARS(DTYPE);
-  DTYPE *sl1;
-  mlib_s32 chan2;
-  mlib_s32 *buffo, *buffi;
-  DTYPE *sl2, *sl3;
-  LOAD_KERNEL(KSIZE*KSIZE);
-  GET_SRC_DST_PARAMETERS(DTYPE);
-
-  if (wid > BUFF_LINE) {
-    pbuff = mlib_malloc((KSIZE + 3)*sizeof(FTYPE)*wid);
-
-    if (pbuff == NULL) return MLIB_FAILURE;
-  }
-
-  buff0 = pbuff;
-  buff1 = buff0 + wid;
-  buff2 = buff1 + wid;
-  buff3 = buff2 + wid;
-  buff4 = buff3 + wid;
-  buffd = buff4 + wid;
-  buffo = (mlib_s32*)(buffd + wid);
-  buffi = buffo + (wid &~ 1);
-
-  chan1 = nchannel;
-  chan2 = chan1 + chan1;
-
-  wid -= (KSIZE - 1);
-  hgt -= (KSIZE - 1);
-
-  adr_dst += ((KSIZE - 1)/2)*(dll + chan1);
-
-  for (c = 0; c < nchannel; c++) {
-    if (!(cmask & (1 << (nchannel - 1 - c)))) continue;
-
-    sl = adr_src + c;
-    dl = adr_dst + c;
-
-    sl1 = sl  + sll;
-    sl2 = sl1 + sll;
-    sl3 = sl2 + sll;
-#ifdef __SUNPRO_C
-#pragma pipeloop(0)
-#endif /* __SUNPRO_C */
-    for (i = 0; i < wid + (KSIZE - 1); i++) {
-      buff0[i] = (FTYPE)sl[i*chan1];
-      buff1[i] = (FTYPE)sl1[i*chan1];
-      buff2[i] = (FTYPE)sl2[i*chan1];
-      buff3[i] = (FTYPE)sl3[i*chan1];
-    }
-
-    sl += KSIZE*sll;
-
-    for (j = 0; j < hgt; j++) {
-      d64_2x32 dd;
-
-      /*
-       *  First loop on two first lines of kernel
-       */
-      k0 = k[0]; k1 = k[1]; k2 = k[2]; k3 = k[3];
-      k4 = k[4]; k5 = k[5]; k6 = k[6]; k7 = k[7];
-
-      sp = sl;
-      dp = dl;
-
-      p02 = buff0[0];
-      p12 = buff1[0];
-      p03 = buff0[1];
-      p13 = buff1[1];
-      p04 = buff0[2];
-
-#ifdef __SUNPRO_C
-#pragma pipeloop(0)
-#endif /* __SUNPRO_C */
-      for (i = 0; i <= (wid - 2); i += 2) {
-        p00 = p02; p10 = p12;
-        p01 = p03; p11 = p13;
-        p02 = p04; p12 = buff1[i + 2];
-        p03 = buff0[i + 3]; p13 = buff1[i + 3];
-        p04 = buff0[i + 4]; p14 = buff1[i + 4];
-
-        LOAD_BUFF(buffi);
-
-        dd.d64 = *(FTYPE   *)(buffi + i);
-        buff4[i    ] = (FTYPE)dd.i32s.i0;
-        buff4[i + 1] = (FTYPE)dd.i32s.i1;
-
-        buffd[i    ] = (p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 +
-                        p10 * k4 + p11 * k5 + p12 * k6 + p13 * k7);
-        buffd[i + 1] = (p01 * k0 + p02 * k1 + p03 * k2 + p04 * k3 +
-                        p11 * k4 + p12 * k5 + p13 * k6 + p14 * k7);
-
-        sp += chan2;
-        dp += chan2;
-      }
-
-      /*
-       *  Second loop on two last lines of kernel
-       */
-      k0 = k[ 8]; k1 = k[ 9]; k2 = k[10]; k3 = k[11];
-      k4 = k[12]; k5 = k[13]; k6 = k[14]; k7 = k[15];
-
-      sp = sl;
-      dp = dl;
-
-      p02 = buff2[0];
-      p12 = buff3[0];
-      p03 = buff2[1];
-      p13 = buff3[1];
-      p04 = buff2[2];
-
-#ifdef __SUNPRO_C
-#pragma pipeloop(0)
-#endif /* __SUNPRO_C */
-      for (i = 0; i <= (wid - 2); i += 2) {
-        p00 = p02; p10 = p12;
-        p01 = p03; p11 = p13;
-        p02 = p04; p12 = buff3[i + 2];
-        p03 = buff2[i + 3]; p13 = buff3[i + 3];
-        p04 = buff2[i + 4]; p14 = buff3[i + 4];
-
-        d0 = D2I(p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 +
-                 p10 * k4 + p11 * k5 + p12 * k6 + p13 * k7 + buffd[i]);
-        d1 = D2I(p01 * k0 + p02 * k1 + p03 * k2 + p04 * k3 +
-                 p11 * k4 + p12 * k5 + p13 * k6 + p14 * k7 + buffd[i + 1]);
-
-        dp[0    ] = FROM_S32(d0);
-        dp[chan1] = FROM_S32(d1);
-
-        sp += chan2;
-        dp += chan2;
-      }
-
-      /* last pixels */
-      for (; i < wid; i++) {
-        p00 = buff0[i];     p10 = buff1[i];     p20 = buff2[i];     p30 = buff3[i];
-        p01 = buff0[i + 1]; p11 = buff1[i + 1]; p21 = buff2[i + 1]; p31 = buff3[i + 1];
-        p02 = buff0[i + 2]; p12 = buff1[i + 2]; p22 = buff2[i + 2]; p32 = buff3[i + 2];
-        p03 = buff0[i + 3]; p13 = buff1[i + 3]; p23 = buff2[i + 3]; p33 = buff3[i + 3];
-
-        buff4[i] = (FTYPE)sp[0];
-
-        buffo[i] = D2I(p00 * k[0] + p01 * k[1] + p02 * k[2] + p03 * k[3] +
-                       p10 * k[4] + p11 * k[5] + p12 * k[6] + p13 * k[7] +
-                       p20 * k[ 8] + p21 * k[ 9] + p22 * k[10] + p23 * k[11] +
-                       p30 * k[12] + p31 * k[13] + p32 * k[14] + p33 * k[15]);
-
-        dp[0] = FROM_S32(buffo[i]);
-
-        sp += chan1;
-        dp += chan1;
-      }
-
-      buff4[wid    ] = (FTYPE)sp[0];
-      buff4[wid + 1] = (FTYPE)sp[chan1];
-      buff4[wid + 2] = (FTYPE)sp[chan2];
-
-      /* next line */
-      sl += sll;
-      dl += dll;
-
-      buffT = buff0;
-      buff0 = buff1;
-      buff1 = buff2;
-      buff2 = buff3;
-      buff3 = buff4;
-      buff4 = buffT;
-    }
-  }
-
-  if (pbuff != buff) mlib_free(pbuff);
-
-  return MLIB_SUCCESS;
-}
-
-/***************************************************************/
-#undef  KSIZE
-#define KSIZE 5
-
-mlib_status CONV_FUNC(5x5)(mlib_image       *dst,
-                           const mlib_image *src,
-                           const mlib_s32   *kern,
-                           mlib_s32         scalef_expon,
-                           mlib_s32         cmask)
-{
-  FTYPE    buff[(KSIZE + 3)*BUFF_LINE];
-  FTYPE    *buff0, *buff1, *buff2, *buff3, *buff4, *buff5, *buffd, *buffT;
-  FTYPE    k[KSIZE*KSIZE];
-  mlib_s32 d0, d1;
-  FTYPE    k0, k1, k2, k3, k4, k5, k6, k7, k8, k9;
-  FTYPE    p00, p01, p02, p03, p04, p05,
-           p10, p11, p12, p13, p14, p15,
-           p20, p21, p22, p23, p24,
-           p30, p31, p32, p33, p34,
-           p40, p41, p42, p43, p44;
-  DEF_VARS(DTYPE);
-  DTYPE *sl1;
-  mlib_s32 chan2;
-  mlib_s32 *buffo, *buffi;
-  DTYPE *sl2, *sl3, *sl4;
-  LOAD_KERNEL(KSIZE*KSIZE);
-  GET_SRC_DST_PARAMETERS(DTYPE);
-
-  if (wid > BUFF_LINE) {
-    pbuff = mlib_malloc((KSIZE + 3)*sizeof(FTYPE)*wid);
-
-    if (pbuff == NULL) return MLIB_FAILURE;
-  }
-
-  buff0 = pbuff;
-  buff1 = buff0 + wid;
-  buff2 = buff1 + wid;
-  buff3 = buff2 + wid;
-  buff4 = buff3 + wid;
-  buff5 = buff4 + wid;
-  buffd = buff5 + wid;
-  buffo = (mlib_s32*)(buffd + wid);
-  buffi = buffo + (wid &~ 1);
-
-  chan1 = nchannel;
-  chan2 = chan1 + chan1;
-
-  wid -= (KSIZE - 1);
-  hgt -= (KSIZE - 1);
-
-  adr_dst += ((KSIZE - 1)/2)*(dll + chan1);
-
-  for (c = 0; c < nchannel; c++) {
-    if (!(cmask & (1 << (nchannel - 1 - c)))) continue;
-
-    sl = adr_src + c;
-    dl = adr_dst + c;
-
-    sl1 = sl  + sll;
-    sl2 = sl1 + sll;
-    sl3 = sl2 + sll;
-    sl4 = sl3 + sll;
-#ifdef __SUNPRO_C
-#pragma pipeloop(0)
-#endif /* __SUNPRO_C */
-    for (i = 0; i < wid + (KSIZE - 1); i++) {
-      buff0[i] = (FTYPE)sl[i*chan1];
-      buff1[i] = (FTYPE)sl1[i*chan1];
-      buff2[i] = (FTYPE)sl2[i*chan1];
-      buff3[i] = (FTYPE)sl3[i*chan1];
-      buff4[i] = (FTYPE)sl4[i*chan1];
-    }
-
-    sl += KSIZE*sll;
-
-    for (j = 0; j < hgt; j++) {
-      d64_2x32 dd;
-
-      /*
-       *  First loop
-       */
-      k0 = k[0]; k1 = k[1]; k2 = k[2]; k3 = k[3]; k4 = k[4];
-      k5 = k[5]; k6 = k[6]; k7 = k[7]; k8 = k[8]; k9 = k[9];
-
-      sp = sl;
-      dp = dl;
-
-      p02 = buff0[0];
-      p12 = buff1[0];
-      p03 = buff0[1];
-      p13 = buff1[1];
-      p04 = buff0[2];
-      p14 = buff1[2];
-
-#ifdef __SUNPRO_C
-#pragma pipeloop(0)
-#endif /* __SUNPRO_C */
-      for (i = 0; i <= (wid - 2); i += 2) {
-        p00 = p02; p10 = p12;
-        p01 = p03; p11 = p13;
-        p02 = p04; p12 = p14;
-
-        LOAD_BUFF(buffi);
-
-        p03 = buff0[i + 3]; p13 = buff1[i + 3];
-        p04 = buff0[i + 4]; p14 = buff1[i + 4];
-        p05 = buff0[i + 5]; p15 = buff1[i + 5];
-
-        buffd[i    ] = (p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 + p04 * k4 +
-                        p10 * k5 + p11 * k6 + p12 * k7 + p13 * k8 + p14 * k9);
-        buffd[i + 1] = (p01 * k0 + p02 * k1 + p03 * k2 + p04 * k3 + p05 * k4 +
-                        p11 * k5 + p12 * k6 + p13 * k7 + p14 * k8 + p15 * k9);
-
-        sp += chan2;
-        dp += chan2;
-      }
-
-      /*
-       *  Second loop
-       */
-      k0 = k[10]; k1 = k[11]; k2 = k[12]; k3 = k[13]; k4 = k[14];
-      k5 = k[15]; k6 = k[16]; k7 = k[17]; k8 = k[18]; k9 = k[19];
-
-      sp = sl;
-      dp = dl;
-
-      p02 = buff2[0];
-      p12 = buff3[0];
-      p03 = buff2[1];
-      p13 = buff3[1];
-      p04 = buff2[2];
-      p14 = buff3[2];
-
-#ifdef __SUNPRO_C
-#pragma pipeloop(0)
-#endif /* __SUNPRO_C */
-      for (i = 0; i <= (wid - 2); i += 2) {
-        p00 = p02; p10 = p12;
-        p01 = p03; p11 = p13;
-
-        p02 = buff2[i + 2]; p12 = buff3[i + 2];
-        p03 = buff2[i + 3]; p13 = buff3[i + 3];
-        p04 = buff2[i + 4]; p14 = buff3[i + 4];
-        p05 = buff2[i + 5]; p15 = buff3[i + 5];
-
-        dd.d64 = *(FTYPE   *)(buffi + i);
-        buff5[i    ] = (FTYPE)dd.i32s.i0;
-        buff5[i + 1] = (FTYPE)dd.i32s.i1;
-
-        buffd[i    ] += (p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 + p04 * k4 +
-                         p10 * k5 + p11 * k6 + p12 * k7 + p13 * k8 + p14 * k9);
-        buffd[i + 1] += (p01 * k0 + p02 * k1 + p03 * k2 + p04 * k3 + p05 * k4 +
-                         p11 * k5 + p12 * k6 + p13 * k7 + p14 * k8 + p15 * k9);
-
-        sp += chan2;
-        dp += chan2;
-      }
-
-      /*
-       *  3 loop
-       */
-      k0 = k[20]; k1 = k[21]; k2 = k[22]; k3 = k[23]; k4 = k[24];
-
-      sp = sl;
-      dp = dl;
-
-      p02 = buff4[0];
-      p03 = buff4[1];
-      p04 = buff4[2];
-      p05 = buff4[3];
-
-#ifdef __SUNPRO_C
-#pragma pipeloop(0)
-#endif /* __SUNPRO_C */
-      for (i = 0; i <= (wid - 2); i += 2) {
-        p00 = p02; p01 = p03; p02 = p04; p03 = p05;
-
-        p04 = buff4[i + 4]; p05 = buff4[i + 5];
-
-        d0 = D2I(p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 + p04 * k4 + buffd[i]);
-        d1 = D2I(p01 * k0 + p02 * k1 + p03 * k2 + p04 * k3 + p05 * k4 + buffd[i + 1]);
-
-        dp[0    ] = FROM_S32(d0);
-        dp[chan1] = FROM_S32(d1);
-
-        sp += chan2;
-        dp += chan2;
-      }
-
-      /* last pixels */
-      for (; i < wid; i++) {
-        p00 = buff0[i];     p10 = buff1[i];     p20 = buff2[i];     p30 = buff3[i];
-        p01 = buff0[i + 1]; p11 = buff1[i + 1]; p21 = buff2[i + 1]; p31 = buff3[i + 1];
-        p02 = buff0[i + 2]; p12 = buff1[i + 2]; p22 = buff2[i + 2]; p32 = buff3[i + 2];
-        p03 = buff0[i + 3]; p13 = buff1[i + 3]; p23 = buff2[i + 3]; p33 = buff3[i + 3];
-        p04 = buff0[i + 4]; p14 = buff1[i + 4]; p24 = buff2[i + 4]; p34 = buff3[i + 4];
-
-        p40 = buff4[i];     p41 = buff4[i + 1]; p42 = buff4[i + 2];
-        p43 = buff4[i + 3]; p44 = buff4[i + 4];
-
-        buff5[i] = (FTYPE)sp[0];
-
-        buffo[i] = D2I(p00 * k[0] + p01 * k[1] + p02 * k[2] + p03 * k[3] + p04 * k[4] +
-                       p10 * k[5] + p11 * k[6] + p12 * k[7] + p13 * k[8] + p14 * k[9] +
-                       p20 * k[10] + p21 * k[11] + p22 * k[12] + p23 * k[13] + p24 * k[14] +
-                       p30 * k[15] + p31 * k[16] + p32 * k[17] + p33 * k[18] + p34 * k[19] +
-                       p40 * k[20] + p41 * k[21] + p42 * k[22] + p43 * k[23] + p44 * k[24]);
-
-        dp[0] = FROM_S32(buffo[i]);
-
-        sp += chan1;
-        dp += chan1;
-      }
-
-      buff5[wid    ] = (FTYPE)sp[0];
-      buff5[wid + 1] = (FTYPE)sp[chan1];
-      buff5[wid + 2] = (FTYPE)sp[chan2];
-      buff5[wid + 3] = (FTYPE)sp[chan2 + chan1];
-
-      /* next line */
-      sl += sll;
-      dl += dll;
-
-      buffT = buff0;
-      buff0 = buff1;
-      buff1 = buff2;
-      buff2 = buff3;
-      buff3 = buff4;
-      buff4 = buff5;
-      buff5 = buffT;
-    }
-  }
-
-  if (pbuff != buff) mlib_free(pbuff);
-
-  return MLIB_SUCCESS;
-}
-
-/***************************************************************/
-#ifndef __sparc /* for x86, using integer multiplies is faster */
-
-mlib_status CONV_FUNC_I(5x5)(mlib_image       *dst,
-                             const mlib_image *src,
-                             const mlib_s32   *kern,
-                             mlib_s32         scalef_expon,
-                             mlib_s32         cmask)
-{
-  mlib_s32 buff[BUFF_LINE];
-  mlib_s32 *buffd;
-  mlib_s32 k[KSIZE*KSIZE];
-  mlib_s32 shift1, shift2;
-  mlib_s32 k0, k1, k2, k3, k4, k5, k6, k7, k8, k9;
-  mlib_s32 p00, p01, p02, p03, p04, p05,
-           p10, p11, p12, p13, p14, p15;
-  DTYPE    *adr_src, *sl, *sp0, *sp1;
-  DTYPE    *adr_dst, *dl, *dp;
-  mlib_s32 *pbuff = buff;
-  mlib_s32 wid, hgt, sll, dll;
-  mlib_s32 nchannel, chan1, chan2, chan3, chan4;
-  mlib_s32 i, j, c;
-
-#if IMG_TYPE != 1
-  shift1 = 16;
-#else
-  shift1 = 8;
-#endif /* IMG_TYPE != 1 */
-
-  shift2 = scalef_expon - shift1;
-
-  for (j = 0; j < KSIZE*KSIZE; j++) k[j] = kern[j] >> shift1;
-
-  GET_SRC_DST_PARAMETERS(DTYPE);
-
-  if (wid > BUFF_LINE) {
-    pbuff = mlib_malloc(sizeof(mlib_s32)*wid);
-
-    if (pbuff == NULL) return MLIB_FAILURE;
-  }
-
-  buffd = pbuff;
-
-  chan1 = nchannel;
-  chan2 = chan1 + chan1;
-  chan3 = chan2 + chan1;
-  chan4 = chan3 + chan1;
-
-  wid -= (KSIZE - 1);
-  hgt -= (KSIZE - 1);
-
-  adr_dst += ((KSIZE - 1)/2)*(dll + chan1);
-
-  for (c = 0; c < chan1; c++) {
-    if (!(cmask & (1 << (chan1 - 1 - c)))) continue;
-
-    sl = adr_src + c;
-    dl = adr_dst + c;
-
-    for (j = 0; j < hgt; j++) {
-      mlib_s32 pix0, pix1;
-      /*
-       *  First loop
-       */
-      sp0 = sl;
-      sp1 = sp0 + sll;
-      dp = dl;
-
-      k0 = k[0]; k1 = k[1]; k2 = k[2]; k3 = k[3]; k4 = k[4];
-      k5 = k[5]; k6 = k[6]; k7 = k[7]; k8 = k[8]; k9 = k[9];
-
-      p02 = sp0[0];     p12 = sp1[0];
-      p03 = sp0[chan1]; p13 = sp1[chan1];
-      p04 = sp0[chan2]; p14 = sp1[chan2];
-      p05 = sp0[chan3]; p15 = sp1[chan3];
-
-      sp0 += chan4;
-      sp1 += chan4;
-
-#ifdef __SUNPRO_C
-#pragma pipeloop(0)
-#endif /* __SUNPRO_C */
-      for (i = 0; i <= (wid - 2); i += 2) {
-        p00 = p02; p10 = p12;
-        p01 = p03; p11 = p13;
-        p02 = p04; p12 = p14;
-        p03 = p05; p13 = p15;
-
-        p04 = sp0[0];     p14 = sp1[0];
-        p05 = sp0[chan1]; p15 = sp1[chan1];
-
-        buffd[i    ] = (p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 + p04 * k4 +
-                        p10 * k5 + p11 * k6 + p12 * k7 + p13 * k8 + p14 * k9);
-        buffd[i + 1] = (p01 * k0 + p02 * k1 + p03 * k2 + p04 * k3 + p05 * k4 +
-                        p11 * k5 + p12 * k6 + p13 * k7 + p14 * k8 + p15 * k9);
-
-        sp0 += chan2;
-        sp1 += chan2;
-        dp += chan2;
-      }
-
-      if (wid & 1) {
-        p00 = p02; p10 = p12;
-        p01 = p03; p11 = p13;
-        p02 = p04; p12 = p14;
-        p03 = p05; p13 = p15;
-
-        p04 = sp0[0];     p14 = sp1[0];
-
-        buffd[i] = (p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 + p04 * k4 +
-                    p10 * k5 + p11 * k6 + p12 * k7 + p13 * k8 + p14 * k9);
-      }
-
-      /*
-       *  Second loop
-       */
-      sp0 = sl + 2*sll;
-      sp1 = sp0 + sll;
-      dp = dl;
-
-      k0 = k[10]; k1 = k[11]; k2 = k[12]; k3 = k[13]; k4 = k[14];
-      k5 = k[15]; k6 = k[16]; k7 = k[17]; k8 = k[18]; k9 = k[19];
-
-      p02 = sp0[0];     p12 = sp1[0];
-      p03 = sp0[chan1]; p13 = sp1[chan1];
-      p04 = sp0[chan2]; p14 = sp1[chan2];
-      p05 = sp0[chan3]; p15 = sp1[chan3];
-
-      sp0 += chan4;
-      sp1 += chan4;
-
-#ifdef __SUNPRO_C
-#pragma pipeloop(0)
-#endif /* __SUNPRO_C */
-      for (i = 0; i <= (wid - 2); i += 2) {
-        p00 = p02; p10 = p12;
-        p01 = p03; p11 = p13;
-        p02 = p04; p12 = p14;
-        p03 = p05; p13 = p15;
-
-        p04 = sp0[0];     p14 = sp1[0];
-        p05 = sp0[chan1]; p15 = sp1[chan1];
-
-        buffd[i    ] += (p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 + p04 * k4 +
-                         p10 * k5 + p11 * k6 + p12 * k7 + p13 * k8 + p14 * k9);
-        buffd[i + 1] += (p01 * k0 + p02 * k1 + p03 * k2 + p04 * k3 + p05 * k4 +
-                         p11 * k5 + p12 * k6 + p13 * k7 + p14 * k8 + p15 * k9);
-
-        sp0 += chan2;
-        sp1 += chan2;
-        dp += chan2;
-      }
-
-      if (wid & 1) {
-        p00 = p02; p10 = p12;
-        p01 = p03; p11 = p13;
-        p02 = p04; p12 = p14;
-        p03 = p05; p13 = p15;
-
-        p04 = sp0[0];     p14 = sp1[0];
-
-        buffd[i] += (p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 + p04 * k4 +
-                     p10 * k5 + p11 * k6 + p12 * k7 + p13 * k8 + p14 * k9);
-      }
-
-      /*
-       *  3 loop
-       */
-      dp = dl;
-      sp0 = sl + 4*sll;
-
-      k0 = k[20]; k1 = k[21]; k2 = k[22]; k3 = k[23]; k4 = k[24];
-
-      p02 = sp0[0];
-      p03 = sp0[chan1];
-      p04 = sp0[chan2];
-      p05 = sp0[chan3];
-
-      sp0 += chan2 + chan2;
-
-#ifdef __SUNPRO_C
-#pragma pipeloop(0)
-#endif /* __SUNPRO_C */
-      for (i = 0; i <= (wid - 2); i += 2) {
-        p00 = p02; p01 = p03; p02 = p04; p03 = p05;
-
-        p04 = sp0[0]; p05 = sp0[chan1];
-
-        pix0 = (buffd[i    ] + p00 * k0 + p01 * k1 + p02 * k2 +
-                p03 * k3 + p04 * k4) >> shift2;
-        pix1 = (buffd[i + 1] + p01 * k0 + p02 * k1 + p03 * k2 +
-                p04 * k3 + p05 * k4) >> shift2;
-
-        CLAMP_STORE(dp[0],     pix0);
-        CLAMP_STORE(dp[chan1], pix1);
-
-        dp  += chan2;
-        sp0 += chan2;
-      }
-
-      if (wid & 1) {
-        p00 = p02; p01 = p03; p02 = p04; p03 = p05;
-
-        p04 = sp0[0];
-
-        pix0 = (buffd[i    ] + p00 * k0 + p01 * k1 + p02 * k2 +
-                p03 * k3 + p04 * k4) >> shift2;
-        CLAMP_STORE(dp[0],     pix0);
-      }
-
-      /* next line */
-      sl += sll;
-      dl += dll;
-    }
-  }
-
-  if (pbuff != buff) mlib_free(pbuff);
-
-  return MLIB_SUCCESS;
-}
-
-#endif /* __sparc ( for x86, using integer multiplies is faster ) */
-
-/***************************************************************/
-#if IMG_TYPE == 1
-
-#undef  KSIZE
-#define KSIZE 7
-
-mlib_status CONV_FUNC(7x7)(mlib_image       *dst,
-                           const mlib_image *src,
-                           const mlib_s32   *kern,
-                           mlib_s32         scalef_expon,
-                           mlib_s32         cmask)
-{
-  FTYPE    buff[(KSIZE + 3)*BUFF_LINE], *buffs[2*(KSIZE + 1)], *buffd;
-  FTYPE    k[KSIZE*KSIZE];
-  mlib_s32 l, m, buff_ind;
-  mlib_s32 d0, d1;
-  FTYPE    k0, k1, k2, k3, k4, k5, k6;
-  FTYPE    p0, p1, p2, p3, p4, p5, p6, p7;
-  DTYPE *sl2, *sl3, *sl4, *sl5, *sl6;
-  DEF_VARS(DTYPE);
-  DTYPE *sl1;
-  mlib_s32 chan2;
-  mlib_s32 *buffo, *buffi;
-  LOAD_KERNEL(KSIZE*KSIZE);
-  GET_SRC_DST_PARAMETERS(DTYPE);
-
-  if (wid > BUFF_LINE) {
-    pbuff = mlib_malloc((KSIZE + 3)*sizeof(FTYPE)*wid);
-
-    if (pbuff == NULL) return MLIB_FAILURE;
-  }
-
-  for (l = 0; l < KSIZE + 1; l++) buffs[l] = pbuff + l*wid;
-  for (l = 0; l < KSIZE + 1; l++) buffs[l + (KSIZE + 1)] = buffs[l];
-  buffd = buffs[KSIZE] + wid;
-  buffo = (mlib_s32*)(buffd + wid);
-  buffi = buffo + (wid &~ 1);
-
-  chan1 = nchannel;
-  chan2 = chan1 + chan1;
-
-  wid -= (KSIZE - 1);
-  hgt -= (KSIZE - 1);
-
-  adr_dst += ((KSIZE - 1)/2)*(dll + chan1);
-
-  for (c = 0; c < nchannel; c++) {
-    if (!(cmask & (1 << (nchannel - 1 - c)))) continue;
-
-    sl = adr_src + c;
-    dl = adr_dst + c;
-
-    sl1 = sl  + sll;
-    sl2 = sl1 + sll;
-    sl3 = sl2 + sll;
-    sl4 = sl3 + sll;
-    sl5 = sl4 + sll;
-    sl6 = sl5 + sll;
-#ifdef __SUNPRO_C
-#pragma pipeloop(0)
-#endif /* __SUNPRO_C */
-    for (i = 0; i < wid + (KSIZE - 1); i++) {
-      buffs[0][i] = (FTYPE)sl[i*chan1];
-      buffs[1][i] = (FTYPE)sl1[i*chan1];
-      buffs[2][i] = (FTYPE)sl2[i*chan1];
-      buffs[3][i] = (FTYPE)sl3[i*chan1];
-      buffs[4][i] = (FTYPE)sl4[i*chan1];
-      buffs[5][i] = (FTYPE)sl5[i*chan1];
-      buffs[6][i] = (FTYPE)sl6[i*chan1];
-    }
-
-    buff_ind = 0;
-
-#ifdef __SUNPRO_C
-#pragma pipeloop(0)
-#endif /* __SUNPRO_C */
-    for (i = 0; i < wid; i++) buffd[i] = 0.0;
-
-    sl += KSIZE*sll;
-
-    for (j = 0; j < hgt; j++) {
-      FTYPE    **buffc = buffs + buff_ind;
-      FTYPE    *buffn = buffc[KSIZE];
-      FTYPE    *pk = k;
-
-      for (l = 0; l < KSIZE; l++) {
-        FTYPE    *buff = buffc[l];
-        d64_2x32 dd;
-
-        sp = sl;
-        dp = dl;
-
-        p2 = buff[0]; p3 = buff[1]; p4 = buff[2];
-        p5 = buff[3]; p6 = buff[4]; p7 = buff[5];
-
-        k0 = *pk++; k1 = *pk++; k2 = *pk++; k3 = *pk++;
-        k4 = *pk++; k5 = *pk++; k6 = *pk++;
-
-        if (l < (KSIZE - 1)) {
-#ifdef __SUNPRO_C
-#pragma pipeloop(0)
-#endif /* __SUNPRO_C */
-          for (i = 0; i <= (wid - 2); i += 2) {
-            p0 = p2; p1 = p3; p2 = p4; p3 = p5; p4 = p6; p5 = p7;
-
-            p6 = buff[i + 6]; p7 = buff[i + 7];
-
-            buffd[i    ] += p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + p5*k5 + p6*k6;
-            buffd[i + 1] += p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + p6*k5 + p7*k6;
-          }
-
-        } else {
-#ifdef __SUNPRO_C
-#pragma pipeloop(0)
-#endif /* __SUNPRO_C */
-          for (i = 0; i <= (wid - 2); i += 2) {
-            p0 = p2; p1 = p3; p2 = p4; p3 = p5; p4 = p6; p5 = p7;
-
-            p6 = buff[i + 6]; p7 = buff[i + 7];
-
-            LOAD_BUFF(buffi);
-
-            dd.d64 = *(FTYPE   *)(buffi + i);
-            buffn[i    ] = (FTYPE)dd.i32s.i0;
-            buffn[i + 1] = (FTYPE)dd.i32s.i1;
-
-            d0 = D2I(p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + p5*k5 + p6*k6 + buffd[i    ]);
-            d1 = D2I(p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + p6*k5 + p7*k6 + buffd[i + 1]);
-
-            dp[0    ] = FROM_S32(d0);
-            dp[chan1] = FROM_S32(d1);
-
-            buffd[i    ] = 0.0;
-            buffd[i + 1] = 0.0;
-
-            sp += chan2;
-            dp += chan2;
-          }
-        }
-      }
-
-      /* last pixels */
-      for (; i < wid; i++) {
-        FTYPE    *pk = k, s = 0;
-        mlib_s32 d0;
-
-        for (l = 0; l < KSIZE; l++) {
-          FTYPE    *buff = buffc[l] + i;
-
-          for (m = 0; m < KSIZE; m++) s += buff[m] * (*pk++);
-        }
-
-        d0 = D2I(s);
-        dp[0] = FROM_S32(d0);
-
-        buffn[i] = (FTYPE)sp[0];
-
-        sp += chan1;
-        dp += chan1;
-      }
-
-      for (l = 0; l < (KSIZE - 1); l++) buffn[wid + l] = sp[l*chan1];
-
-      /* next line */
-      sl += sll;
-      dl += dll;
-
-      buff_ind++;
-
-      if (buff_ind >= KSIZE + 1) buff_ind = 0;
-    }
-  }
-
-  if (pbuff != buff) mlib_free(pbuff);
-
-  return MLIB_SUCCESS;
-}
-
-#endif /* IMG_TYPE == 1 */
-
-/***************************************************************/
 #define MAX_KER   7
 #define MAX_N    15
 
--- a/jdk/src/java.desktop/share/native/libmlib_image/mlib_ImageConv_32nw.c	Thu May 12 11:03:07 2016 -0700
+++ b/jdk/src/java.desktop/share/native/libmlib_image/mlib_ImageConv_32nw.c	Fri May 13 11:31:05 2016 +0300
@@ -35,8 +35,6 @@
 #include "mlib_ImageConv.h"
 
 /***************************************************************/
-#define BUFF_LINE  256
-
 #define CACHE_SIZE (64*1024)
 
 /***************************************************************/
@@ -83,837 +81,6 @@
   mlib_s32 i, j, c
 
 /***************************************************************/
-#define CALC_SCALE()                                            \
-  scalef = 1.0;                                                 \
-  while (scalef_expon > 30) {                                   \
-    scalef /= (1 << 30);                                        \
-    scalef_expon -= 30;                                         \
-  }                                                             \
-                                                                \
-  scalef /= (1 << scalef_expon)
-
-/***************************************************************/
-#undef  KSIZE
-#define KSIZE 2
-
-mlib_status CONV_FUNC(2x2)(mlib_image       *dst,
-                           const mlib_image *src,
-                           const mlib_s32   *kern,
-                           mlib_s32         scalef_expon,
-                           mlib_s32         cmask)
-{
-  mlib_d64 buff[(KSIZE + 1)*BUFF_LINE];
-  mlib_d64 k0, k1, k2, k3;
-  mlib_d64 p00, p01, p02, p03,
-           p10, p11, p12, p13;
-  mlib_d64 d2;
-  DEF_VARS(mlib_s32);
-  mlib_s32 chan2 = chan1 + chan1;
-  mlib_s32 chan3 = chan1 + chan2;
-
-  if (wid > BUFF_LINE) {
-    pbuff = mlib_malloc((KSIZE + 1)*sizeof(mlib_d64)*wid);
-
-    if (pbuff == NULL) return MLIB_FAILURE;
-  }
-
-  buff0 = pbuff;
-  buff1 = buff0 + wid;
-  buff2 = buff1 + wid;
-
-  wid -= (KSIZE - 1);
-  hgt -= (KSIZE - 1);
-
-  /* keep kernel in regs */
-  CALC_SCALE();
-  k0 = scalef * kern[0];  k1 = scalef * kern[1];
-  k2 = scalef * kern[2];  k3 = scalef * kern[3];
-
-  for (c = 0; c < chan1; c++) {
-    if (!(cmask & (1 << (chan1 - 1 - c)))) continue;
-
-    sl = adr_src + c;
-    dl = adr_dst + c;
-
-    sl1 = sl + sll;
-#ifdef __SUNPRO_C
-#pragma pipeloop(0)
-#endif /* __SUNPRO_C */
-    for (i = 0; i < wid + (KSIZE - 1); i++) {
-      buff0[i] = (mlib_d64)sl[i*chan1];
-      buff1[i] = (mlib_d64)sl1[i*chan1];
-    }
-
-    sl += KSIZE*sll;
-
-    for (j = 0; j < hgt; j++) {
-      p03 = buff0[0];
-      p13 = buff1[0];
-
-      sp = sl;
-      dp = dl;
-
-#ifdef __SUNPRO_C
-#pragma pipeloop(0)
-#endif /* __SUNPRO_C */
-      for (i = 0; i <= (wid - 3); i += 3) {
-
-        p00 = p03; p10 = p13;
-
-        p01 = buff0[i + 1]; p11 = buff1[i + 1];
-        p02 = buff0[i + 2]; p12 = buff1[i + 2];
-        p03 = buff0[i + 3]; p13 = buff1[i + 3];
-
-        buff2[i    ] = (mlib_d64)sp[0];
-        buff2[i + 1] = (mlib_d64)sp[chan1];
-        buff2[i + 2] = (mlib_d64)sp[chan2];
-
-        d0 = p00 * k0 + p01 * k1 + p10 * k2 + p11 * k3;
-        d1 = p01 * k0 + p02 * k1 + p11 * k2 + p12 * k3;
-        d2 = p02 * k0 + p03 * k1 + p12 * k2 + p13 * k3;
-
-        CLAMP_S32(dp[0    ], d0);
-        CLAMP_S32(dp[chan1], d1);
-        CLAMP_S32(dp[chan2], d2);
-
-        sp += chan3;
-        dp += chan3;
-      }
-
-      for (; i < wid; i++) {
-        p00 = buff0[i];     p10 = buff1[i];
-        p01 = buff0[i + 1]; p11 = buff1[i + 1];
-
-        buff2[i] = (mlib_d64)sp[0];
-
-        d0 = p00 * k0 + p01 * k1 + p10 * k2 + p11 * k3;
-        CLAMP_S32(dp[0], d0);
-
-        sp += chan1;
-        dp += chan1;
-      }
-
-      buff2[wid] = (mlib_d64)sp[0];
-
-      sl += sll;
-      dl += dll;
-
-      buffT = buff0;
-      buff0 = buff1;
-      buff1 = buff2;
-      buff2 = buffT;
-    }
-  }
-
-  if (pbuff != buff) mlib_free(pbuff);
-
-  return MLIB_SUCCESS;
-}
-
-/***************************************************************/
-#undef  KSIZE
-#define KSIZE 3
-
-mlib_status CONV_FUNC(3x3)(mlib_image       *dst,
-                           const mlib_image *src,
-                           const mlib_s32   *kern,
-                           mlib_s32         scalef_expon,
-                           mlib_s32         cmask)
-{
-  mlib_d64 buff[(KSIZE + 1)*BUFF_LINE], *buff3;
-  mlib_d64 k0, k1, k2, k3, k4, k5, k6, k7, k8;
-  mlib_d64 p00, p01, p02, p03,
-           p10, p11, p12, p13,
-           p20, p21, p22, p23;
-  mlib_s32 *sl2;
-  DEF_VARS(mlib_s32);
-  mlib_s32 chan2 = chan1 + chan1;
-
-  if (wid > BUFF_LINE) {
-    pbuff = mlib_malloc((KSIZE + 1)*sizeof(mlib_d64)*wid);
-
-    if (pbuff == NULL) return MLIB_FAILURE;
-  }
-
-  buff0 = pbuff;
-  buff1 = buff0 + wid;
-  buff2 = buff1 + wid;
-  buff3 = buff2 + wid;
-
-  wid -= (KSIZE - 1);
-  hgt -= (KSIZE - 1);
-
-  adr_dst += ((KSIZE - 1)/2)*(dll + chan1);
-
-  CALC_SCALE();
-  k0 = scalef * kern[0];  k1 = scalef * kern[1];  k2 = scalef * kern[2];
-  k3 = scalef * kern[3];  k4 = scalef * kern[4];  k5 = scalef * kern[5];
-  k6 = scalef * kern[6];  k7 = scalef * kern[7];  k8 = scalef * kern[8];
-
-  for (c = 0; c < chan1; c++) {
-    if (!(cmask & (1 << (chan1 - 1 - c)))) continue;
-
-    sl = adr_src + c;
-    dl = adr_dst + c;
-
-    sl1 = sl  + sll;
-    sl2 = sl1 + sll;
-#ifdef __SUNPRO_C
-#pragma pipeloop(0)
-#endif /* __SUNPRO_C */
-    for (i = 0; i < wid + (KSIZE - 1); i++) {
-      buff0[i] = (mlib_d64)sl[i*chan1];
-      buff1[i] = (mlib_d64)sl1[i*chan1];
-      buff2[i] = (mlib_d64)sl2[i*chan1];
-    }
-
-    sl += KSIZE*sll;
-
-    for (j = 0; j < hgt; j++) {
-      mlib_d64 s0, s1;
-
-      p02 = buff0[0];
-      p12 = buff1[0];
-      p22 = buff2[0];
-
-      p03 = buff0[1];
-      p13 = buff1[1];
-      p23 = buff2[1];
-
-      sp = sl;
-      dp = dl;
-
-      s0 = p02 * k0 + p03 * k1 + p12 * k3 + p13 * k4 + p22 * k6 + p23 * k7;
-      s1 = p03 * k0 + p13 * k3 + p23 * k6;
-
-#ifdef __SUNPRO_C
-#pragma pipeloop(0)
-#endif /* __SUNPRO_C */
-      for (i = 0; i <= (wid - 2); i += 2) {
-        p02 = buff0[i + 2]; p12 = buff1[i + 2]; p22 = buff2[i + 2];
-        p03 = buff0[i + 3]; p13 = buff1[i + 3]; p23 = buff2[i + 3];
-
-        buff3[i    ] = (mlib_d64)sp[0];
-        buff3[i + 1] = (mlib_d64)sp[chan1];
-
-        d0 = s0 + p02 * k2 + p12 * k5 + p22 * k8;
-        d1 = s1 + p02 * k1 + p03 * k2 + p12 * k4 + p13 * k5 + p22 * k7 + p23 * k8;
-
-        CLAMP_S32(dp[0    ], d0);
-        CLAMP_S32(dp[chan1], d1);
-
-        s0 = p02 * k0 + p03 * k1 + p12 * k3 + p13 * k4 + p22 * k6 + p23 * k7;
-        s1 = p03 * k0 + p13 * k3 + p23 * k6;
-
-        sp += chan2;
-        dp += chan2;
-      }
-
-      for (; i < wid; i++) {
-        p00 = buff0[i];     p10 = buff1[i];     p20 = buff2[i];
-        p01 = buff0[i + 1]; p11 = buff1[i + 1]; p21 = buff2[i + 1];
-        p02 = buff0[i + 2]; p12 = buff1[i + 2]; p22 = buff2[i + 2];
-
-        buff3[i] = (mlib_d64)sp[0];
-
-        d0 = (p00 * k0 + p01 * k1 + p02 * k2 + p10 * k3 + p11 * k4 +
-              p12 * k5 + p20 * k6 + p21 * k7 + p22 * k8);
-
-        CLAMP_S32(dp[0], d0);
-
-        sp += chan1;
-        dp += chan1;
-      }
-
-      buff3[wid    ] = (mlib_d64)sp[0];
-      buff3[wid + 1] = (mlib_d64)sp[chan1];
-
-      sl += sll;
-      dl += dll;
-
-      buffT = buff0;
-      buff0 = buff1;
-      buff1 = buff2;
-      buff2 = buff3;
-      buff3 = buffT;
-    }
-  }
-
-  if (pbuff != buff) mlib_free(pbuff);
-
-  return MLIB_SUCCESS;
-}
-
-/***************************************************************/
-#undef  KSIZE
-#define KSIZE 4
-
-mlib_status CONV_FUNC(4x4)(mlib_image       *dst,
-                           const mlib_image *src,
-                           const mlib_s32   *kern,
-                           mlib_s32         scalef_expon,
-                           mlib_s32         cmask)
-{
-  mlib_d64 buff[(KSIZE + 2)*BUFF_LINE], *buff3, *buff4, *buff5;
-  mlib_d64 k[KSIZE*KSIZE];
-  mlib_d64 k0, k1, k2, k3, k4, k5, k6, k7;
-  mlib_d64 p00, p01, p02, p03, p04,
-           p10, p11, p12, p13, p14,
-           p20, p21, p22, p23,
-           p30, p31, p32, p33;
-  mlib_s32 *sl2, *sl3;
-  DEF_VARS(mlib_s32);
-  mlib_s32 chan2 = chan1 + chan1;
-
-  if (wid > BUFF_LINE) {
-    pbuff = mlib_malloc((KSIZE + 2)*sizeof(mlib_d64)*wid);
-
-    if (pbuff == NULL) return MLIB_FAILURE;
-  }
-
-  buff0 = pbuff;
-  buff1 = buff0 + wid;
-  buff2 = buff1 + wid;
-  buff3 = buff2 + wid;
-  buff4 = buff3 + wid;
-  buff5 = buff4 + wid;
-
-  wid -= (KSIZE - 1);
-  hgt -= (KSIZE - 1);
-
-  adr_dst += ((KSIZE - 1)/2)*(dll + chan1);
-
-  CALC_SCALE();
-  for (j = 0; j < 16; j++) k[j] = scalef * kern[j];
-
-  for (c = 0; c < chan1; c++) {
-    if (!(cmask & (1 << (chan1 - 1 - c)))) continue;
-
-    sl = adr_src + c;
-    dl = adr_dst + c;
-
-    sl1 = sl  + sll;
-    sl2 = sl1 + sll;
-    sl3 = sl2 + sll;
-#ifdef __SUNPRO_C
-#pragma pipeloop(0)
-#endif /* __SUNPRO_C */
-    for (i = 0; i < wid + (KSIZE - 1); i++) {
-      buff0[i] = (mlib_d64)sl[i*chan1];
-      buff1[i] = (mlib_d64)sl1[i*chan1];
-      buff2[i] = (mlib_d64)sl2[i*chan1];
-      buff3[i] = (mlib_d64)sl3[i*chan1];
-    }
-
-    sl += KSIZE*sll;
-
-    for (j = 0; j < hgt; j++) {
-      /*
-       *  First loop on two first lines of kernel
-       */
-      k0 = k[0]; k1 = k[1]; k2 = k[2]; k3 = k[3];
-      k4 = k[4]; k5 = k[5]; k6 = k[6]; k7 = k[7];
-
-      sp = sl;
-      dp = dl;
-
-      p02 = buff0[0];
-      p12 = buff1[0];
-      p03 = buff0[1];
-      p13 = buff1[1];
-      p04 = buff0[2];
-
-#ifdef __SUNPRO_C
-#pragma pipeloop(0)
-#endif /* __SUNPRO_C */
-      for (i = 0; i <= (wid - 2); i += 2) {
-        p00 = p02; p10 = p12;
-        p01 = p03; p11 = p13;
-        p02 = p04; p12 = buff1[i + 2];
-        p03 = buff0[i + 3]; p13 = buff1[i + 3];
-        p04 = buff0[i + 4]; p14 = buff1[i + 4];
-
-        buff4[i] = (mlib_d64)sp[0];
-        buff4[i + 1] = (mlib_d64)sp[chan1];
-
-        buff5[i    ] = (p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 +
-                        p10 * k4 + p11 * k5 + p12 * k6 + p13 * k7);
-        buff5[i + 1] = (p01 * k0 + p02 * k1 + p03 * k2 + p04 * k3 +
-                        p11 * k4 + p12 * k5 + p13 * k6 + p14 * k7);
-
-        sp += chan2;
-        dp += chan2;
-      }
-
-      /*
-       *  Second loop on two last lines of kernel
-       */
-      k0 = k[ 8]; k1 = k[ 9]; k2 = k[10]; k3 = k[11];
-      k4 = k[12]; k5 = k[13]; k6 = k[14]; k7 = k[15];
-
-      sp = sl;
-      dp = dl;
-
-      p02 = buff2[0];
-      p12 = buff3[0];
-      p03 = buff2[1];
-      p13 = buff3[1];
-      p04 = buff2[2];
-
-#ifdef __SUNPRO_C
-#pragma pipeloop(0)
-#endif /* __SUNPRO_C */
-      for (i = 0; i <= (wid - 2); i += 2) {
-        p00 = p02; p10 = p12;
-        p01 = p03; p11 = p13;
-        p02 = p04; p12 = buff3[i + 2];
-        p03 = buff2[i + 3]; p13 = buff3[i + 3];
-        p04 = buff2[i + 4]; p14 = buff3[i + 4];
-
-        d0 = (p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 +
-              p10 * k4 + p11 * k5 + p12 * k6 + p13 * k7 + buff5[i]);
-        d1 = (p01 * k0 + p02 * k1 + p03 * k2 + p04 * k3 +
-              p11 * k4 + p12 * k5 + p13 * k6 + p14 * k7 + buff5[i + 1]);
-
-        CLAMP_S32(dp[0    ], d0);
-        CLAMP_S32(dp[chan1], d1);
-
-        sp += chan2;
-        dp += chan2;
-      }
-
-      /* last pixels */
-      for (; i < wid; i++) {
-        p00 = buff0[i];     p10 = buff1[i];     p20 = buff2[i];     p30 = buff3[i];
-        p01 = buff0[i + 1]; p11 = buff1[i + 1]; p21 = buff2[i + 1]; p31 = buff3[i + 1];
-        p02 = buff0[i + 2]; p12 = buff1[i + 2]; p22 = buff2[i + 2]; p32 = buff3[i + 2];
-        p03 = buff0[i + 3]; p13 = buff1[i + 3]; p23 = buff2[i + 3]; p33 = buff3[i + 3];
-
-        buff4[i] = (mlib_d64)sp[0];
-
-        d0 = (p00 * k[0] + p01 * k[1] + p02 * k[2] + p03 * k[3] +
-              p10 * k[4] + p11 * k[5] + p12 * k[6] + p13 * k[7] +
-              p20 * k[ 8] + p21 * k[ 9] + p22 * k[10] + p23 * k[11] +
-              p30 * k[12] + p31 * k[13] + p32 * k[14] + p33 * k[15]);
-
-        CLAMP_S32(dp[0], d0);
-
-        sp += chan1;
-        dp += chan1;
-      }
-
-      buff4[wid    ] = (mlib_d64)sp[0];
-      buff4[wid + 1] = (mlib_d64)sp[chan1];
-      buff4[wid + 2] = (mlib_d64)sp[chan2];
-
-      /* next line */
-      sl += sll;
-      dl += dll;
-
-      buffT = buff0;
-      buff0 = buff1;
-      buff1 = buff2;
-      buff2 = buff3;
-      buff3 = buff4;
-      buff4 = buffT;
-    }
-  }
-
-  if (pbuff != buff) mlib_free(pbuff);
-
-  return MLIB_SUCCESS;
-}
-
-/***************************************************************/
-#undef  KSIZE
-#define KSIZE 5
-
-mlib_status CONV_FUNC(5x5)(mlib_image       *dst,
-                           const mlib_image *src,
-                           const mlib_s32   *kern,
-                           mlib_s32         scalef_expon,
-                           mlib_s32         cmask)
-{
-  mlib_d64 buff[(KSIZE + 2)*BUFF_LINE], *buff3, *buff4, *buff5, *buff6;
-  mlib_d64 k[KSIZE*KSIZE];
-  mlib_d64 k0, k1, k2, k3, k4, k5, k6, k7, k8, k9;
-  mlib_d64 p00, p01, p02, p03, p04, p05,
-           p10, p11, p12, p13, p14, p15,
-           p20, p21, p22, p23, p24,
-           p30, p31, p32, p33, p34,
-           p40, p41, p42, p43, p44;
-  mlib_s32 *sl2, *sl3, *sl4;
-  DEF_VARS(mlib_s32);
-  mlib_s32 chan2 = chan1 + chan1;
-  mlib_s32 chan3 = chan1 + chan2;
-
-  if (wid > BUFF_LINE) {
-    pbuff = mlib_malloc((KSIZE + 2)*sizeof(mlib_d64)*wid);
-
-    if (pbuff == NULL) return MLIB_FAILURE;
-  }
-
-  buff0 = pbuff;
-  buff1 = buff0 + wid;
-  buff2 = buff1 + wid;
-  buff3 = buff2 + wid;
-  buff4 = buff3 + wid;
-  buff5 = buff4 + wid;
-  buff6 = buff5 + wid;
-
-  wid -= (KSIZE - 1);
-  hgt -= (KSIZE - 1);
-
-  adr_dst += ((KSIZE - 1)/2)*(dll + chan1);
-
-  CALC_SCALE();
-  for (j = 0; j < 25; j++) k[j] = scalef * kern[j];
-
-  for (c = 0; c < chan1; c++) {
-    if (!(cmask & (1 << (chan1 - 1 - c)))) continue;
-
-    sl = adr_src + c;
-    dl = adr_dst + c;
-
-    sl1 = sl  + sll;
-    sl2 = sl1 + sll;
-    sl3 = sl2 + sll;
-    sl4 = sl3 + sll;
-#ifdef __SUNPRO_C
-#pragma pipeloop(0)
-#endif /* __SUNPRO_C */
-    for (i = 0; i < wid + (KSIZE - 1); i++) {
-      buff0[i] = (mlib_d64)sl[i*chan1];
-      buff1[i] = (mlib_d64)sl1[i*chan1];
-      buff2[i] = (mlib_d64)sl2[i*chan1];
-      buff3[i] = (mlib_d64)sl3[i*chan1];
-      buff4[i] = (mlib_d64)sl4[i*chan1];
-    }
-
-    sl += KSIZE*sll;
-
-    for (j = 0; j < hgt; j++) {
-      /*
-       *  First loop
-       */
-      k0 = k[0]; k1 = k[1]; k2 = k[2]; k3 = k[3]; k4 = k[4];
-      k5 = k[5]; k6 = k[6]; k7 = k[7]; k8 = k[8]; k9 = k[9];
-
-      sp = sl;
-      dp = dl;
-
-      p02 = buff0[0];
-      p12 = buff1[0];
-      p03 = buff0[1];
-      p13 = buff1[1];
-      p04 = buff0[2];
-      p14 = buff1[2];
-
-#ifdef __SUNPRO_C
-#pragma pipeloop(0)
-#endif /* __SUNPRO_C */
-      for (i = 0; i <= (wid - 2); i += 2) {
-        p00 = p02; p10 = p12;
-        p01 = p03; p11 = p13;
-        p02 = p04; p12 = p14;
-
-        p03 = buff0[i + 3]; p13 = buff1[i + 3];
-        p04 = buff0[i + 4]; p14 = buff1[i + 4];
-        p05 = buff0[i + 5]; p15 = buff1[i + 5];
-
-        buff6[i    ] = (p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 + p04 * k4 +
-                        p10 * k5 + p11 * k6 + p12 * k7 + p13 * k8 + p14 * k9);
-        buff6[i + 1] = (p01 * k0 + p02 * k1 + p03 * k2 + p04 * k3 + p05 * k4 +
-                        p11 * k5 + p12 * k6 + p13 * k7 + p14 * k8 + p15 * k9);
-
-        sp += chan2;
-        dp += chan2;
-      }
-
-      /*
-       *  Second loop
-       */
-      k0 = k[10]; k1 = k[11]; k2 = k[12]; k3 = k[13]; k4 = k[14];
-      k5 = k[15]; k6 = k[16]; k7 = k[17]; k8 = k[18]; k9 = k[19];
-
-      sp = sl;
-      dp = dl;
-
-      p02 = buff2[0];
-      p12 = buff3[0];
-      p03 = buff2[1];
-      p13 = buff3[1];
-
-#ifdef __SUNPRO_C
-#pragma pipeloop(0)
-#endif /* __SUNPRO_C */
-      for (i = 0; i <= (wid - 2); i += 2) {
-        p00 = p02; p10 = p12;
-        p01 = p03; p11 = p13;
-
-        p02 = buff2[i + 2]; p12 = buff3[i + 2];
-        p03 = buff2[i + 3]; p13 = buff3[i + 3];
-        p04 = buff2[i + 4]; p14 = buff3[i + 4];
-        p05 = buff2[i + 5]; p15 = buff3[i + 5];
-
-        buff6[i    ] += (p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 + p04 * k4 +
-                         p10 * k5 + p11 * k6 + p12 * k7 + p13 * k8 + p14 * k9);
-        buff6[i + 1] += (p01 * k0 + p02 * k1 + p03 * k2 + p04 * k3 + p05 * k4 +
-                         p11 * k5 + p12 * k6 + p13 * k7 + p14 * k8 + p15 * k9);
-
-        sp += chan2;
-        dp += chan2;
-      }
-
-      /*
-       *  3 loop
-       */
-      k0 = k[20]; k1 = k[21]; k2 = k[22]; k3 = k[23]; k4 = k[24];
-
-      sp = sl;
-      dp = dl;
-
-      p02 = buff4[0];
-      p03 = buff4[1];
-      p04 = buff4[2];
-      p05 = buff4[3];
-
-#ifdef __SUNPRO_C
-#pragma pipeloop(0)
-#endif /* __SUNPRO_C */
-      for (i = 0; i <= (wid - 2); i += 2) {
-        p00 = p02; p01 = p03; p02 = p04; p03 = p05;
-
-        p04 = buff4[i + 4]; p05 = buff4[i + 5];
-
-        buff5[i    ] = (mlib_d64)sp[0];
-        buff5[i + 1] = (mlib_d64)sp[chan1];
-
-        d0 = p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 + p04 * k4 + buff6[i];
-        d1 = p01 * k0 + p02 * k1 + p03 * k2 + p04 * k3 + p05 * k4 + buff6[i + 1];
-
-        CLAMP_S32(dp[0    ], d0);
-        CLAMP_S32(dp[chan1], d1);
-
-        sp += chan2;
-        dp += chan2;
-      }
-
-      /* last pixels */
-      for (; i < wid; i++) {
-        p00 = buff0[i];     p10 = buff1[i];     p20 = buff2[i];     p30 = buff3[i];
-        p01 = buff0[i + 1]; p11 = buff1[i + 1]; p21 = buff2[i + 1]; p31 = buff3[i + 1];
-        p02 = buff0[i + 2]; p12 = buff1[i + 2]; p22 = buff2[i + 2]; p32 = buff3[i + 2];
-        p03 = buff0[i + 3]; p13 = buff1[i + 3]; p23 = buff2[i + 3]; p33 = buff3[i + 3];
-        p04 = buff0[i + 4]; p14 = buff1[i + 4]; p24 = buff2[i + 4]; p34 = buff3[i + 4];
-
-        p40 = buff4[i];        p41 = buff4[i + 1]; p42 = buff4[i + 2];
-        p43 = buff4[i + 3]; p44 = buff4[i + 4];
-
-        buff5[i] = (mlib_d64)sp[0];
-
-        d0 = (p00 * k[0] + p01 * k[1] + p02 * k[2] + p03 * k[3] + p04 * k[4] +
-              p10 * k[5] + p11 * k[6] + p12 * k[7] + p13 * k[8] + p14 * k[9] +
-              p20 * k[10] + p21 * k[11] + p22 * k[12] + p23 * k[13] + p24 * k[14] +
-              p30 * k[15] + p31 * k[16] + p32 * k[17] + p33 * k[18] + p34 * k[19] +
-              p40 * k[20] + p41 * k[21] + p42 * k[22] + p43 * k[23] + p44 * k[24]);
-
-        CLAMP_S32(dp[0], d0);
-
-        sp += chan1;
-        dp += chan1;
-      }
-
-      buff5[wid    ] = (mlib_d64)sp[0];
-      buff5[wid + 1] = (mlib_d64)sp[chan1];
-      buff5[wid + 2] = (mlib_d64)sp[chan2];
-      buff5[wid + 3] = (mlib_d64)sp[chan3];
-
-      /* next line */
-      sl += sll;
-      dl += dll;
-
-      buffT = buff0;
-      buff0 = buff1;
-      buff1 = buff2;
-      buff2 = buff3;
-      buff3 = buff4;
-      buff4 = buff5;
-      buff5 = buffT;
-    }
-  }
-
-  if (pbuff != buff) mlib_free(pbuff);
-
-  return MLIB_SUCCESS;
-}
-
-/***************************************************************/
-#undef  KSIZE
-#define KSIZE 7
-
-mlib_status CONV_FUNC(7x7)(mlib_image       *dst,
-                           const mlib_image *src,
-                           const mlib_s32   *kern,
-                           mlib_s32         scalef_expon,
-                           mlib_s32         cmask)
-{
-  mlib_d64 buff[(KSIZE + 2)*BUFF_LINE], *buffs[2*(KSIZE + 1)], *buffd;
-  mlib_d64 k[KSIZE*KSIZE];
-  mlib_d64 k0, k1, k2, k3, k4, k5, k6;
-  mlib_d64 p0, p1, p2, p3, p4, p5, p6, p7;
-  mlib_d64 d0, d1;
-  mlib_s32 l, m, buff_ind, *sl2, *sl3, *sl4, *sl5, *sl6;
-  mlib_d64 scalef;
-  DEF_VARS_MxN(mlib_s32);
-  mlib_s32 chan2 = chan1 + chan1;
-  mlib_s32 *sl1;
-
-  if (wid > BUFF_LINE) {
-    pbuff = mlib_malloc((KSIZE + 2)*sizeof(mlib_d64)*wid);
-
-    if (pbuff == NULL) return MLIB_FAILURE;
-  }
-
-  for (l = 0; l < KSIZE + 1; l++) buffs[l] = pbuff + l*wid;
-  for (l = 0; l < KSIZE + 1; l++) buffs[l + (KSIZE + 1)] = buffs[l];
-  buffd = buffs[KSIZE] + wid;
-
-  wid -= (KSIZE - 1);
-  hgt -= (KSIZE - 1);
-
-  adr_dst += ((KSIZE - 1)/2)*(dll + chan1);
-
-  CALC_SCALE();
-  for (j = 0; j < 49; j++) k[j] = scalef * kern[j];
-
-  for (c = 0; c < chan1; c++) {
-    if (!(cmask & (1 << (chan1 - 1 - c)))) continue;
-
-    sl = adr_src + c;
-    dl = adr_dst + c;
-
-    sl1 = sl  + sll;
-    sl2 = sl1 + sll;
-    sl3 = sl2 + sll;
-    sl4 = sl3 + sll;
-    sl5 = sl4 + sll;
-    sl6 = sl5 + sll;
-#ifdef __SUNPRO_C
-#pragma pipeloop(0)
-#endif /* __SUNPRO_C */
-    for (i = 0; i < wid + (KSIZE - 1); i++) {
-      buffs[0][i] = (mlib_d64)sl[i*chan1];
-      buffs[1][i] = (mlib_d64)sl1[i*chan1];
-      buffs[2][i] = (mlib_d64)sl2[i*chan1];
-      buffs[3][i] = (mlib_d64)sl3[i*chan1];
-      buffs[4][i] = (mlib_d64)sl4[i*chan1];
-      buffs[5][i] = (mlib_d64)sl5[i*chan1];
-      buffs[6][i] = (mlib_d64)sl6[i*chan1];
-    }
-
-    buff_ind = 0;
-
-#ifdef __SUNPRO_C
-#pragma pipeloop(0)
-#endif /* __SUNPRO_C */
-    for (i = 0; i < wid; i++) buffd[i] = 0.0;
-
-    sl += KSIZE*sll;
-
-    for (j = 0; j < hgt; j++) {
-      mlib_d64 **buffc = buffs + buff_ind;
-      mlib_d64 *buffn = buffc[KSIZE];
-      mlib_d64 *pk = k;
-
-      for (l = 0; l < KSIZE; l++) {
-        mlib_d64 *buff = buffc[l];
-
-        sp = sl;
-        dp = dl;
-
-        p2 = buff[0]; p3 = buff[1]; p4 = buff[2];
-        p5 = buff[3]; p6 = buff[4]; p7 = buff[5];
-
-        k0 = *pk++; k1 = *pk++; k2 = *pk++; k3 = *pk++;
-        k4 = *pk++; k5 = *pk++; k6 = *pk++;
-
-        if (l < (KSIZE - 1)) {
-#ifdef __SUNPRO_C
-#pragma pipeloop(0)
-#endif /* __SUNPRO_C */
-          for (i = 0; i <= (wid - 2); i += 2) {
-            p0 = p2; p1 = p3; p2 = p4; p3 = p5; p4 = p6; p5 = p7;
-
-            p6 = buff[i + 6]; p7 = buff[i + 7];
-
-            buffd[i    ] += p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + p5*k5 + p6*k6;
-            buffd[i + 1] += p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + p6*k5 + p7*k6;
-          }
-
-        } else {
-#ifdef __SUNPRO_C
-#pragma pipeloop(0)
-#endif /* __SUNPRO_C */
-          for (i = 0; i <= (wid - 2); i += 2) {
-            p0 = p2; p1 = p3; p2 = p4; p3 = p5; p4 = p6; p5 = p7;
-
-            p6 = buff[i + 6]; p7 = buff[i + 7];
-
-            buffn[i    ] = (mlib_d64)sp[0];
-            buffn[i + 1] = (mlib_d64)sp[chan1];
-
-            d0 = p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + p5*k5 + p6*k6 + buffd[i    ];
-            d1 = p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + p6*k5 + p7*k6 + buffd[i + 1];
-
-            CLAMP_S32(dp[0    ], d0);
-            CLAMP_S32(dp[chan1], d1);
-
-            buffd[i    ] = 0.0;
-            buffd[i + 1] = 0.0;
-
-            sp += chan2;
-            dp += chan2;
-          }
-        }
-      }
-
-      /* last pixels */
-      for (; i < wid; i++) {
-        mlib_d64 *pk = k, s = 0;
-
-        for (l = 0; l < KSIZE; l++) {
-          mlib_d64 *buff = buffc[l] + i;
-
-          for (m = 0; m < KSIZE; m++) s += buff[m] * (*pk++);
-        }
-
-        CLAMP_S32(dp[0], s);
-
-        buffn[i] = (mlib_d64)sp[0];
-
-        sp += chan1;
-        dp += chan1;
-      }
-
-      for (l = 0; l < (KSIZE - 1); l++) buffn[wid + l] = sp[l*chan1];
-
-      /* next line */
-      sl += sll;
-      dl += dll;
-
-      buff_ind++;
-
-      if (buff_ind >= KSIZE + 1) buff_ind = 0;
-    }
-  }
-
-  if (pbuff != buff) mlib_free(pbuff);
-
-  return MLIB_SUCCESS;
-}
-
-/***************************************************************/
 #define FTYPE  mlib_d64
 #define DTYPE  mlib_s32
 
--- a/jdk/src/java.desktop/share/native/libmlib_image/mlib_ImageConv_8ext.c	Thu May 12 11:03:07 2016 -0700
+++ b/jdk/src/java.desktop/share/native/libmlib_image/mlib_ImageConv_8ext.c	Fri May 13 11:31:05 2016 +0300
@@ -80,9 +80,6 @@
 #endif /* IMG_TYPE == 1 */
 
 /***************************************************************/
-#define KSIZE1 (KSIZE - 1)
-
-/***************************************************************/
 #define PARAM                                                   \
   mlib_image       *dst,                                        \
   const mlib_image *src,                                        \
@@ -126,21 +123,6 @@
 #define D2I(x) CLAMP_S32((x) SAT_OFF)
 
 /***************************************************************/
-#ifdef _LITTLE_ENDIAN
-
-#define STORE2(res0, res1)                                      \
-  dp[0    ] = res1;                                             \
-  dp[chan1] = res0
-
-#else
-
-#define STORE2(res0, res1)                                      \
-  dp[0    ] = res0;                                             \
-  dp[chan1] = res1
-
-#endif /* _LITTLE_ENDIAN */
-
-/***************************************************************/
 #ifdef _NO_LONGLONG
 
 #define LOAD_BUFF(buff)                                         \
@@ -163,9 +145,6 @@
 #endif /* _NO_LONGLONG */
 
 /***************************************************************/
-#define MLIB_D2_24 16777216.0f
-
-/***************************************************************/
 typedef union {
   mlib_d64 d64;
   struct {
@@ -175,52 +154,6 @@
 } d64_2x32;
 
 /***************************************************************/
-#define BUFF_LINE 256
-
-/***************************************************************/
-#define DEF_VARS(type)                                          \
-  type     *adr_src, *sl, *sp, *sl1;                            \
-  type     *adr_dst, *dl, *dp;                                  \
-  FTYPE    *pbuff = buff;                                       \
-  mlib_s32 *buffi, *buffo;                                      \
-  mlib_s32 wid, hgt, sll, dll;                                  \
-  mlib_s32 nchannel, chan1, chan2;                              \
-  mlib_s32 i, j, c, swid
-
-/***************************************************************/
-#define LOAD_KERNEL3()                                                   \
-  FTYPE    scalef = DSCALE;                                              \
-  FTYPE    k0, k1, k2, k3, k4, k5, k6, k7, k8;                           \
-  FTYPE    p00, p01, p02, p03,                                           \
-           p10, p11, p12, p13,                                           \
-           p20, p21, p22, p23;                                           \
-                                                                         \
-  while (scalef_expon > 30) {                                            \
-    scalef /= (1 << 30);                                                 \
-    scalef_expon -= 30;                                                  \
-  }                                                                      \
-                                                                         \
-  scalef /= (1 << scalef_expon);                                         \
-                                                                         \
-  /* keep kernel in regs */                                              \
-  k0 = scalef * kern[0];  k1 = scalef * kern[1];  k2 = scalef * kern[2]; \
-  k3 = scalef * kern[3];  k4 = scalef * kern[4];  k5 = scalef * kern[5]; \
-  k6 = scalef * kern[6];  k7 = scalef * kern[7];  k8 = scalef * kern[8]
-
-/***************************************************************/
-#define LOAD_KERNEL(SIZE)                                       \
-  FTYPE    scalef = DSCALE;                                     \
-                                                                \
-  while (scalef_expon > 30) {                                   \
-    scalef /= (1 << 30);                                        \
-    scalef_expon -= 30;                                         \
-  }                                                             \
-                                                                \
-  scalef /= (1 << scalef_expon);                                \
-                                                                \
-  for (j = 0; j < SIZE; j++) k[j] = scalef * kern[j]
-
-/***************************************************************/
 #define GET_SRC_DST_PARAMETERS(type)                            \
   hgt = mlib_ImageGetHeight(src);                               \
   wid = mlib_ImageGetWidth(src);                                \
@@ -278,1334 +211,6 @@
 #endif /* __sparc */
 
 /***************************************************************/
-#define KSIZE  3
-
-mlib_status CONV_FUNC(3x3)
-{
-  FTYPE    buff[(KSIZE + 2)*BUFF_LINE], *buff0, *buff1, *buff2, *buff3, *buffT;
-  DEF_VARS(DTYPE);
-  DTYPE *sl2;
-#ifndef __sparc
-  mlib_s32 d0, d1;
-#endif /* __sparc */
-  LOAD_KERNEL3();
-  GET_SRC_DST_PARAMETERS(DTYPE);
-
-  swid = wid + KSIZE1;
-
-  if (swid > BUFF_LINE) {
-    pbuff = mlib_malloc((KSIZE + 2)*sizeof(FTYPE   )*swid);
-
-    if (pbuff == NULL) return MLIB_FAILURE;
-  }
-
-  buff0 = pbuff;
-  buff1 = buff0 + swid;
-  buff2 = buff1 + swid;
-  buff3 = buff2 + swid;
-  buffo = (mlib_s32*)(buff3 + swid);
-  buffi = buffo + (swid &~ 1);
-
-  swid -= (dx_l + dx_r);
-
-  chan1 = nchannel;
-  chan2 = chan1 + chan1;
-
-  for (c = 0; c < nchannel; c++) {
-    if (!(cmask & (1 << (nchannel - 1 - c)))) continue;
-
-    sl = adr_src + c;
-    dl = adr_dst + c;
-
-    if ((1 > dy_t) && (1 < hgt + KSIZE1 - dy_b)) sl1 = sl + sll;
-    else sl1 = sl;
-
-    if ((hgt - dy_b) > 0) sl2 = sl1 + sll;
-    else sl2 = sl1;
-
-    for (i = 0; i < dx_l; i++) {
-      buff0[i] = (FTYPE)sl[0];
-      buff1[i] = (FTYPE)sl1[0];
-      buff2[i] = (FTYPE)sl2[0];
-    }
-
-#ifdef __SUNPRO_C
-#pragma pipeloop(0)
-#endif /* __SUNPRO_C */
-    for (i = 0; i < swid; i++) {
-      buff0[i + dx_l] = (FTYPE)sl[i*chan1];
-      buff1[i + dx_l] = (FTYPE)sl1[i*chan1];
-      buff2[i + dx_l] = (FTYPE)sl2[i*chan1];
-    }
-
-    for (i = 0; i < dx_r; i++) {
-      buff0[swid + dx_l + i] = buff0[swid + dx_l - 1];
-      buff1[swid + dx_l + i] = buff1[swid + dx_l - 1];
-      buff2[swid + dx_l + i] = buff2[swid + dx_l - 1];
-    }
-
-    if ((hgt - dy_b) > 1) sl = sl2 + sll;
-    else sl = sl2;
-
-    for (j = 0; j < hgt; j++) {
-      FTYPE    s0, s1;
-
-      p02 = buff0[0];
-      p12 = buff1[0];
-      p22 = buff2[0];
-
-      p03 = buff0[1];
-      p13 = buff1[1];
-      p23 = buff2[1];
-
-      s0 = p02 * k0 + p03 * k1 + p12 * k3 + p13 * k4 + p22 * k6 + p23 * k7;
-      s1 = p03 * k0 + p13 * k3 + p23 * k6;
-
-      sp = sl;
-      dp = dl;
-
-#ifdef __SUNPRO_C
-#pragma pipeloop(0)
-#endif /* __SUNPRO_C */
-      for (i = 0; i <= (wid - 2); i += 2) {
-#ifdef __sparc
-#ifdef _NO_LONGLONG
-        mlib_s32 o64_1, o64_2;
-#else /* _NO_LONGLONG */
-        mlib_s64 o64;
-#endif /* _NO_LONGLONG */
-#endif /* __sparc */
-        d64_2x32 dd;
-
-        p02 = buff0[i + 2]; p12 = buff1[i + 2]; p22 = buff2[i + 2];
-        p03 = buff0[i + 3]; p13 = buff1[i + 3]; p23 = buff2[i + 3];
-
-        LOAD_BUFF(buffi);
-
-        dd.d64 = *(FTYPE   *)(buffi + i);
-        buff3[i + dx_l    ] = (FTYPE)dd.i32s.i0;
-        buff3[i + dx_l + 1] = (FTYPE)dd.i32s.i1;
-
-#ifndef __sparc
-
-        d0 = D2I(s0 + p02 * k2 + p12 * k5 + p22 * k8);
-        d1 = D2I(s1 + p02 * k1 + p03 * k2 + p12 * k4 + p13 * k5 + p22 * k7 + p23 * k8);
-
-        s0 = p02 * k0 + p03 * k1 + p12 * k3 + p13 * k4 + p22 * k6 + p23 * k7;
-        s1 = p03 * k0 + p13 * k3 + p23 * k6;
-
-        dp[0    ] = FROM_S32(d0);
-        dp[chan1] = FROM_S32(d1);
-
-#else /* __sparc */
-
-        dd.i32s.i0 = D2I(s0 + p02 * k2 + p12 * k5 + p22 * k8);
-        dd.i32s.i1 = D2I(s1 + p02 * k1 + p03 * k2 + p12 * k4 + p13 * k5 + p22 * k7 + p23 * k8);
-        *(FTYPE   *)(buffo + i) = dd.d64;
-
-        s0 = p02 * k0 + p03 * k1 + p12 * k3 + p13 * k4 + p22 * k6 + p23 * k7;
-        s1 = p03 * k0 + p13 * k3 + p23 * k6;
-
-#ifdef _NO_LONGLONG
-
-        o64_1 = buffo[i];
-        o64_2 = buffo[i+1];
-#if IMG_TYPE != 1
-        STORE2(FROM_S32(o64_1), FROM_S32(o64_2));
-#else
-        STORE2(o64_1 >> 24, o64_2 >> 24);
-#endif /* IMG_TYPE != 1 */
-
-#else /* _NO_LONGLONG */
-
-        o64 = *(mlib_s64*)(buffo + i);
-#if IMG_TYPE != 1
-        STORE2(FROM_S32(o64 >> 32), FROM_S32(o64));
-#else
-        STORE2(o64 >> 56, o64 >> 24);
-#endif /* IMG_TYPE != 1 */
-#endif /* _NO_LONGLONG */
-#endif /* __sparc */
-
-        sp += chan2;
-        dp += chan2;
-      }
-
-      for (; i < wid; i++) {
-        p00 = buff0[i];     p10 = buff1[i];     p20 = buff2[i];
-        p01 = buff0[i + 1]; p11 = buff1[i + 1]; p21 = buff2[i + 1];
-        p02 = buff0[i + 2]; p12 = buff1[i + 2]; p22 = buff2[i + 2];
-
-        buffi[i] = (mlib_s32)sp[0];
-        buff3[i + dx_l] = (FTYPE)buffi[i];
-
-#ifndef __sparc
-
-        d0 = D2I(p00 * k0 + p01 * k1 + p02 * k2 + p10 * k3 + p11 * k4 +
-                 p12 * k5 + p20 * k6 + p21 * k7 + p22 * k8);
-
-        dp[0] = FROM_S32(d0);
-
-#else  /* __sparc */
-
-        buffo[i] = D2I(p00 * k0 + p01 * k1 + p02 * k2 + p10 * k3 + p11 * k4 +
-                       p12 * k5 + p20 * k6 + p21 * k7 + p22 * k8);
-#if IMG_TYPE != 1
-        dp[0] = FROM_S32(buffo[i]);
-#else
-        dp[0] = buffo[i] >> 24;
-#endif /* IMG_TYPE != 1 */
-#endif /* __sparc */
-
-        sp += chan1;
-        dp += chan1;
-      }
-
-      for (; i < swid; i++) {
-        buffi[i] = (mlib_s32)sp[0];
-        buff3[i + dx_l] = (FTYPE)buffi[i];
-        sp += chan1;
-      }
-
-      for (i = 0; i < dx_l; i++) buff3[i] = buff3[dx_l];
-      for (i = 0; i < dx_r; i++) buff3[swid + dx_l + i] = buff3[swid + dx_l - 1];
-
-      if (j < hgt - dy_b - 2) sl += sll;
-      dl += dll;
-
-      buffT = buff0;
-      buff0 = buff1;
-      buff1 = buff2;
-      buff2 = buff3;
-      buff3 = buffT;
-    }
-  }
-
-#ifdef __sparc
-#if IMG_TYPE == 1
-  {
-    mlib_s32 amask = (1 << nchannel) - 1;
-
-    if ((cmask & amask) != amask) {
-      mlib_ImageXor80(adr_dst, wid, hgt, dll, nchannel, cmask);
-    } else {
-      mlib_ImageXor80_aa(adr_dst, wid*nchannel, hgt, dll);
-    }
-  }
-
-#endif /* IMG_TYPE == 1 */
-#endif /* __sparc */
-
-  if (pbuff != buff) mlib_free(pbuff);
-
-  return MLIB_SUCCESS;
-}
-
-/***************************************************************/
-#ifndef __sparc /* for x86, using integer multiplies is faster */
-
-mlib_status CONV_FUNC_I(3x3)
-{
-  DTYPE    *adr_src, *sl, *sp0, *sp1, *sp2, *sp_1, *sp_2;
-  DTYPE    *adr_dst, *dl, *dp;
-  mlib_s32 wid, hgt, sll, dll;
-  mlib_s32 nchannel, chan1, chan2, delta_chan;
-  mlib_s32 i, j, c;
-  mlib_s32 shift1, shift2;
-  mlib_s32 k0, k1, k2, k3, k4, k5, k6, k7, k8;
-  mlib_s32 p02, p03,
-           p12, p13,
-           p22, p23;
-
-#if IMG_TYPE != 1
-  shift1 = 16;
-#else
-  shift1 = 8;
-#endif /* IMG_TYPE != 1 */
-
-  shift2 = scalef_expon - shift1;
-
-  /* keep kernel in regs */
-  k0 = kern[0] >> shift1;  k1 = kern[1] >> shift1;  k2 = kern[2] >> shift1;
-  k3 = kern[3] >> shift1;  k4 = kern[4] >> shift1;  k5 = kern[5] >> shift1;
-  k6 = kern[6] >> shift1;  k7 = kern[7] >> shift1;  k8 = kern[8] >> shift1;
-
-  GET_SRC_DST_PARAMETERS(DTYPE);
-
-  chan1 = nchannel;
-  chan2 = chan1 + chan1;
-  delta_chan = 0;
-
-  if ((1 > dx_l) && (1 < wid + KSIZE1 - dx_r)) delta_chan = chan1;
-
-  for (c = 0; c < chan1; c++) {
-    if (!(cmask & (1 << (chan1 - 1 - c)))) continue;
-
-    sl = adr_src + c;
-    dl = adr_dst + c;
-
-    sp_1 = sl;
-
-    if ((1 > dy_t) && (1 < hgt + KSIZE1 - dy_b)) sl += sll;
-    sp_2 = sl;
-
-    if ((hgt - dy_b) > 0) sl += sll;
-
-    for (j = 0; j < hgt; j++) {
-      mlib_s32 s0, s1;
-      mlib_s32 pix0, pix1;
-
-      dp  = dl;
-      sp0 = sp_1;
-      sp_1 = sp_2;
-      sp_2 = sl;
-
-      sp1 = sp_1;
-      sp2 = sp_2;
-
-      p02 = sp0[0];
-      p12 = sp1[0];
-      p22 = sp2[0];
-
-      p03 = sp0[delta_chan];
-      p13 = sp1[delta_chan];
-      p23 = sp2[delta_chan];
-
-      s0 = p02 * k0 + p03 * k1 + p12 * k3 + p13 * k4 + p22 * k6 + p23 * k7;
-      s1 = p03 * k0 + p13 * k3 + p23 * k6;
-
-      sp0 += (chan1 + delta_chan);
-      sp1 += (chan1 + delta_chan);
-      sp2 += (chan1 + delta_chan);
-
-#ifdef __SUNPRO_C
-#pragma pipeloop(0)
-#endif /* __SUNPRO_C */
-      for (i = 0; i <= (wid - dx_r - 2); i += 2) {
-        p02 = sp0[0];     p12 = sp1[0];     p22 = sp2[0];
-        p03 = sp0[chan1]; p13 = sp1[chan1]; p23 = sp2[chan1];
-
-        pix0 = (s0 + p02 * k2 + p12 * k5 + p22 * k8) >> shift2;
-        pix1 = (s1 + p02 * k1 + p03 * k2 + p12 * k4 +
-                p13 * k5 + p22 * k7 + p23 * k8) >> shift2;
-
-        CLAMP_STORE(dp[0],     pix0)
-        CLAMP_STORE(dp[chan1], pix1)
-
-        s0 = p02 * k0 + p03 * k1 + p12 * k3 + p13 * k4 + p22 * k6 + p23 * k7;
-        s1 = p03 * k0 + p13 * k3 + p23 * k6;
-
-        sp0 += chan2;
-        sp1 += chan2;
-        sp2 += chan2;
-        dp += chan2;
-      }
-
-      p02 = p03; p12 = p13; p22 = p23;
-
-      for (; i < wid - dx_r; i++) {
-        p03 = sp0[0]; p13 = sp1[0]; p23 = sp2[0];
-        pix0 = (s0 + p03 * k2 + p13 * k5 + p23 * k8) >> shift2;
-        CLAMP_STORE(dp[0], pix0)
-        s0 = p02 * k0 + p03 * k1 + p12 * k3 + p13 * k4 + p22 * k6 + p23 * k7;
-        p02 = p03; p12 = p13; p22 = p23;
-        sp0 += chan1;
-        sp1 += chan1;
-        sp2 += chan1;
-        dp += chan1;
-      }
-
-      sp0 -= chan1;
-      sp1 -= chan1;
-      sp2 -= chan1;
-
-      for (; i < wid; i++) {
-        p03 = sp0[0]; p13 = sp1[0]; p23 = sp2[0];
-        pix0 = (s0 + p03 * k2 + p13 * k5 + p23 * k8) >> shift2;
-        CLAMP_STORE(dp[0], pix0)
-        s0 = p02 * k0 + p03 * k1 + p12 * k3 + p13 * k4 + p22 * k6 + p23 * k7;
-        p02 = p03; p12 = p13; p22 = p23;
-        dp += chan1;
-      }
-
-      if (j < hgt - dy_b - 1) sl += sll;
-      dl += dll;
-    }
-  }
-
-  return MLIB_SUCCESS;
-}
-
-#endif /* __sparc ( for x86, using integer multiplies is faster ) */
-
-/***************************************************************/
-#undef  KSIZE
-#define KSIZE 4
-
-mlib_status CONV_FUNC(4x4)
-{
-  FTYPE    buff[(KSIZE + 3)*BUFF_LINE];
-  FTYPE    *buff0, *buff1, *buff2, *buff3, *buff4, *buffd, *buffT;
-  FTYPE    k[KSIZE*KSIZE];
-  mlib_s32 d0, d1;
-  FTYPE    k0, k1, k2, k3, k4, k5, k6, k7;
-  FTYPE    p00, p01, p02, p03, p04,
-           p10, p11, p12, p13, p14,
-           p20, p21, p22, p23,
-           p30, p31, p32, p33;
-  DEF_VARS(DTYPE);
-  DTYPE *sl2, *sl3;
-  LOAD_KERNEL(KSIZE*KSIZE);
-  GET_SRC_DST_PARAMETERS(DTYPE);
-
-  swid = wid + KSIZE1;
-
-  if (swid > BUFF_LINE) {
-    pbuff = mlib_malloc((KSIZE + 3)*sizeof(FTYPE   )*swid);
-
-    if (pbuff == NULL) return MLIB_FAILURE;
-  }
-
-  buff0 = pbuff;
-  buff1 = buff0 + swid;
-  buff2 = buff1 + swid;
-  buff3 = buff2 + swid;
-  buff4 = buff3 + swid;
-  buffd = buff4 + swid;
-  buffo = (mlib_s32*)(buffd + swid);
-  buffi = buffo + (swid &~ 1);
-
-  swid -= (dx_l + dx_r);
-
-  chan1 = nchannel;
-  chan2 = chan1 + chan1;
-
-  for (c = 0; c < nchannel; c++) {
-    if (!(cmask & (1 << (nchannel - 1 - c)))) continue;
-
-    sl = adr_src + c;
-    dl = adr_dst + c;
-
-    if ((1 > dy_t) && (1 < hgt + KSIZE1 - dy_b)) sl1 = sl + sll;
-    else sl1 = sl;
-
-    if ((2 > dy_t) && (2 < hgt + KSIZE1 - dy_b)) sl2 = sl1 + sll;
-    else sl2 = sl1;
-
-    if ((hgt - dy_b) > 0) sl3 = sl2 + sll;
-    else sl3 = sl2;
-
-    for (i = 0; i < dx_l; i++) {
-      buff0[i] = (FTYPE)sl[0];
-      buff1[i] = (FTYPE)sl1[0];
-      buff2[i] = (FTYPE)sl2[0];
-      buff3[i] = (FTYPE)sl3[0];
-    }
-
-#ifdef __SUNPRO_C
-#pragma pipeloop(0)
-#endif /* __SUNPRO_C */
-    for (i = 0; i < swid; i++) {
-      buff0[i + dx_l] = (FTYPE)sl[i*chan1];
-      buff1[i + dx_l] = (FTYPE)sl1[i*chan1];
-      buff2[i + dx_l] = (FTYPE)sl2[i*chan1];
-      buff3[i + dx_l] = (FTYPE)sl3[i*chan1];
-    }
-
-    for (i = 0; i < dx_r; i++) {
-      buff0[swid + dx_l + i] = buff0[swid + dx_l - 1];
-      buff1[swid + dx_l + i] = buff1[swid + dx_l - 1];
-      buff2[swid + dx_l + i] = buff2[swid + dx_l - 1];
-      buff3[swid + dx_l + i] = buff3[swid + dx_l - 1];
-    }
-
-    if ((hgt - dy_b) > 1) sl = sl3 + sll;
-    else sl = sl3;
-
-    for (j = 0; j < hgt; j++) {
-      d64_2x32 dd;
-
-      /*
-       *  First loop on two first lines of kernel
-       */
-      k0 = k[0]; k1 = k[1]; k2 = k[2]; k3 = k[3];
-      k4 = k[4]; k5 = k[5]; k6 = k[6]; k7 = k[7];
-
-      sp = sl;
-      dp = dl;
-
-      p02 = buff0[0];
-      p12 = buff1[0];
-      p03 = buff0[1];
-      p13 = buff1[1];
-      p04 = buff0[2];
-
-#ifdef __SUNPRO_C
-#pragma pipeloop(0)
-#endif /* __SUNPRO_C */
-      for (i = 0; i <= (wid - 2); i += 2) {
-        p00 = p02; p10 = p12;
-        p01 = p03; p11 = p13;
-        p02 = p04; p12 = buff1[i + 2];
-        p03 = buff0[i + 3]; p13 = buff1[i + 3];
-        p04 = buff0[i + 4]; p14 = buff1[i + 4];
-
-        LOAD_BUFF(buffi);
-
-        dd.d64 = *(FTYPE   *)(buffi + i);
-        buff4[i + dx_l    ] = (FTYPE)dd.i32s.i0;
-        buff4[i + dx_l + 1] = (FTYPE)dd.i32s.i1;
-
-        buffd[i    ] = (p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 +
-                        p10 * k4 + p11 * k5 + p12 * k6 + p13 * k7);
-        buffd[i + 1] = (p01 * k0 + p02 * k1 + p03 * k2 + p04 * k3 +
-                        p11 * k4 + p12 * k5 + p13 * k6 + p14 * k7);
-
-        sp += chan2;
-      }
-
-      /*
-       *  Second loop on two last lines of kernel
-       */
-      k0 = k[ 8]; k1 = k[ 9]; k2 = k[10]; k3 = k[11];
-      k4 = k[12]; k5 = k[13]; k6 = k[14]; k7 = k[15];
-
-      p02 = buff2[0];
-      p12 = buff3[0];
-      p03 = buff2[1];
-      p13 = buff3[1];
-      p04 = buff2[2];
-
-#ifdef __SUNPRO_C
-#pragma pipeloop(0)
-#endif /* __SUNPRO_C */
-      for (i = 0; i <= (wid - 2); i += 2) {
-        p00 = p02; p10 = p12;
-        p01 = p03; p11 = p13;
-        p02 = p04; p12 = buff3[i + 2];
-        p03 = buff2[i + 3]; p13 = buff3[i + 3];
-        p04 = buff2[i + 4]; p14 = buff3[i + 4];
-
-        d0 = D2I(p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 +
-                 p10 * k4 + p11 * k5 + p12 * k6 + p13 * k7 + buffd[i]);
-        d1 = D2I(p01 * k0 + p02 * k1 + p03 * k2 + p04 * k3 +
-                 p11 * k4 + p12 * k5 + p13 * k6 + p14 * k7 + buffd[i + 1]);
-
-        dp[0    ] = FROM_S32(d0);
-        dp[chan1] = FROM_S32(d1);
-
-        dp += chan2;
-      }
-
-      /* last pixels */
-      for (; i < wid; i++) {
-        p00 = buff0[i];     p10 = buff1[i];     p20 = buff2[i];     p30 = buff3[i];
-        p01 = buff0[i + 1]; p11 = buff1[i + 1]; p21 = buff2[i + 1]; p31 = buff3[i + 1];
-        p02 = buff0[i + 2]; p12 = buff1[i + 2]; p22 = buff2[i + 2]; p32 = buff3[i + 2];
-        p03 = buff0[i + 3]; p13 = buff1[i + 3]; p23 = buff2[i + 3]; p33 = buff3[i + 3];
-
-        buff4[i + dx_l] = (FTYPE)sp[0];
-
-        buffo[i] = D2I(p00 * k[0] + p01 * k[1] + p02 * k[2] + p03 * k[3] +
-                       p10 * k[4] + p11 * k[5] + p12 * k[6] + p13 * k[7] +
-                       p20 * k[ 8] + p21 * k[ 9] + p22 * k[10] + p23 * k[11] +
-                       p30 * k[12] + p31 * k[13] + p32 * k[14] + p33 * k[15]);
-
-        dp[0] = FROM_S32(buffo[i]);
-
-        sp += chan1;
-        dp += chan1;
-      }
-
-      for (; i < swid; i++) {
-        buff4[i + dx_l] = (FTYPE)sp[0];
-        sp += chan1;
-      }
-
-      for (i = 0; i < dx_l; i++) buff4[i] = buff4[dx_l];
-      for (i = 0; i < dx_r; i++) buff4[swid + dx_l + i] = buff4[swid + dx_l - 1];
-
-      /* next line */
-
-      if (j < hgt - dy_b - 2) sl += sll;
-      dl += dll;
-
-      buffT = buff0;
-      buff0 = buff1;
-      buff1 = buff2;
-      buff2 = buff3;
-      buff3 = buff4;
-      buff4 = buffT;
-    }
-  }
-
-  if (pbuff != buff) mlib_free(pbuff);
-
-  return MLIB_SUCCESS;
-}
-
-/***************************************************************/
-#undef  KSIZE
-#define KSIZE 5
-
-mlib_status CONV_FUNC(5x5)
-{
-  FTYPE    buff[(KSIZE + 3)*BUFF_LINE];
-  FTYPE    *buff0, *buff1, *buff2, *buff3, *buff4, *buff5, *buffd, *buffT;
-  FTYPE    k[KSIZE*KSIZE];
-  mlib_s32 d0, d1;
-  FTYPE    k0, k1, k2, k3, k4, k5, k6, k7, k8, k9;
-  FTYPE    p00, p01, p02, p03, p04, p05,
-           p10, p11, p12, p13, p14, p15,
-           p20, p21, p22, p23, p24,
-           p30, p31, p32, p33, p34,
-           p40, p41, p42, p43, p44;
-  DEF_VARS(DTYPE);
-  DTYPE *sl2, *sl3, *sl4;
-  LOAD_KERNEL(KSIZE*KSIZE);
-  GET_SRC_DST_PARAMETERS(DTYPE);
-
-  swid = wid + KSIZE1;
-
-  if (swid > BUFF_LINE) {
-    pbuff = mlib_malloc((KSIZE + 3)*sizeof(FTYPE   )*swid);
-
-    if (pbuff == NULL) return MLIB_FAILURE;
-  }
-
-  buff0 = pbuff;
-  buff1 = buff0 + swid;
-  buff2 = buff1 + swid;
-  buff3 = buff2 + swid;
-  buff4 = buff3 + swid;
-  buff5 = buff4 + swid;
-  buffd = buff5 + swid;
-  buffo = (mlib_s32*)(buffd + swid);
-  buffi = buffo + (swid &~ 1);
-
-  swid -= (dx_l + dx_r);
-
-  chan1 = nchannel;
-  chan2 = chan1 + chan1;
-
-  for (c = 0; c < nchannel; c++) {
-    if (!(cmask & (1 << (nchannel - 1 - c)))) continue;
-
-    sl = adr_src + c;
-    dl = adr_dst + c;
-
-    if ((1 > dy_t) && (1 < hgt + KSIZE1 - dy_b)) sl1 = sl + sll;
-    else sl1 = sl;
-
-    if ((2 > dy_t) && (2 < hgt + KSIZE1 - dy_b)) sl2 = sl1 + sll;
-    else sl2 = sl1;
-
-    if ((3 > dy_t) && (3 < hgt + KSIZE1 - dy_b)) sl3 = sl2 + sll;
-    else sl3 = sl2;
-
-    if ((hgt - dy_b) > 0) sl4 = sl3 + sll;
-    else sl4 = sl3;
-
-    for (i = 0; i < dx_l; i++) {
-      buff0[i] = (FTYPE)sl[0];
-      buff1[i] = (FTYPE)sl1[0];
-      buff2[i] = (FTYPE)sl2[0];
-      buff3[i] = (FTYPE)sl3[0];
-      buff4[i] = (FTYPE)sl4[0];
-    }
-
-#ifdef __SUNPRO_C
-#pragma pipeloop(0)
-#endif /* __SUNPRO_C */
-    for (i = 0; i < swid; i++) {
-      buff0[i + dx_l] = (FTYPE)sl[i*chan1];
-      buff1[i + dx_l] = (FTYPE)sl1[i*chan1];
-      buff2[i + dx_l] = (FTYPE)sl2[i*chan1];
-      buff3[i + dx_l] = (FTYPE)sl3[i*chan1];
-      buff4[i + dx_l] = (FTYPE)sl4[i*chan1];
-    }
-
-    for (i = 0; i < dx_r; i++) {
-      buff0[swid + dx_l + i] = buff0[swid + dx_l - 1];
-      buff1[swid + dx_l + i] = buff1[swid + dx_l - 1];
-      buff2[swid + dx_l + i] = buff2[swid + dx_l - 1];
-      buff3[swid + dx_l + i] = buff3[swid + dx_l - 1];
-      buff4[swid + dx_l + i] = buff4[swid + dx_l - 1];
-    }
-
-    if ((hgt - dy_b) > 1) sl = sl4 + sll;
-    else sl = sl4;
-
-    for (j = 0; j < hgt; j++) {
-      d64_2x32 dd;
-
-      /*
-       *  First loop
-       */
-      k0 = k[0]; k1 = k[1]; k2 = k[2]; k3 = k[3]; k4 = k[4];
-      k5 = k[5]; k6 = k[6]; k7 = k[7]; k8 = k[8]; k9 = k[9];
-
-      sp = sl;
-      dp = dl;
-
-      p02 = buff0[0];
-      p12 = buff1[0];
-      p03 = buff0[1];
-      p13 = buff1[1];
-      p04 = buff0[2];
-      p14 = buff1[2];
-
-#ifdef __SUNPRO_C
-#pragma pipeloop(0)
-#endif /* __SUNPRO_C */
-      for (i = 0; i <= (wid - 2); i += 2) {
-        p00 = p02; p10 = p12;
-        p01 = p03; p11 = p13;
-        p02 = p04; p12 = p14;
-
-        LOAD_BUFF(buffi);
-
-        p03 = buff0[i + 3]; p13 = buff1[i + 3];
-        p04 = buff0[i + 4]; p14 = buff1[i + 4];
-        p05 = buff0[i + 5]; p15 = buff1[i + 5];
-
-        buffd[i    ] = (p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 + p04 * k4 +
-                        p10 * k5 + p11 * k6 + p12 * k7 + p13 * k8 + p14 * k9);
-        buffd[i + 1] = (p01 * k0 + p02 * k1 + p03 * k2 + p04 * k3 + p05 * k4 +
-                        p11 * k5 + p12 * k6 + p13 * k7 + p14 * k8 + p15 * k9);
-
-        sp += chan2;
-      }
-
-      /*
-       *  Second loop
-       */
-      k0 = k[10]; k1 = k[11]; k2 = k[12]; k3 = k[13]; k4 = k[14];
-      k5 = k[15]; k6 = k[16]; k7 = k[17]; k8 = k[18]; k9 = k[19];
-
-      p02 = buff2[0];
-      p12 = buff3[0];
-      p03 = buff2[1];
-      p13 = buff3[1];
-
-#ifdef __SUNPRO_C
-#pragma pipeloop(0)
-#endif /* __SUNPRO_C */
-      for (i = 0; i <= (wid - 2); i += 2) {
-        p00 = p02; p10 = p12;
-        p01 = p03; p11 = p13;
-
-        p02 = buff2[i + 2]; p12 = buff3[i + 2];
-        p03 = buff2[i + 3]; p13 = buff3[i + 3];
-        p04 = buff2[i + 4]; p14 = buff3[i + 4];
-        p05 = buff2[i + 5]; p15 = buff3[i + 5];
-
-        dd.d64 = *(FTYPE   *)(buffi + i);
-        buff5[i + dx_l    ] = (FTYPE)dd.i32s.i0;
-        buff5[i + dx_l + 1] = (FTYPE)dd.i32s.i1;
-
-        buffd[i    ] += (p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 + p04 * k4 +
-                         p10 * k5 + p11 * k6 + p12 * k7 + p13 * k8 + p14 * k9);
-        buffd[i + 1] += (p01 * k0 + p02 * k1 + p03 * k2 + p04 * k3 + p05 * k4 +
-                         p11 * k5 + p12 * k6 + p13 * k7 + p14 * k8 + p15 * k9);
-      }
-
-      /*
-       *  3 loop
-       */
-      k0 = k[20]; k1 = k[21]; k2 = k[22]; k3 = k[23]; k4 = k[24];
-
-      p02 = buff4[0];
-      p03 = buff4[1];
-      p04 = buff4[2];
-      p05 = buff4[3];
-
-#ifdef __SUNPRO_C
-#pragma pipeloop(0)
-#endif /* __SUNPRO_C */
-      for (i = 0; i <= (wid - 2); i += 2) {
-        p00 = p02; p01 = p03; p02 = p04; p03 = p05;
-
-        p04 = buff4[i + 4]; p05 = buff4[i + 5];
-
-        d0 = D2I(p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 + p04 * k4 + buffd[i]);
-        d1 = D2I(p01 * k0 + p02 * k1 + p03 * k2 + p04 * k3 + p05 * k4 + buffd[i + 1]);
-
-        dp[0    ] = FROM_S32(d0);
-        dp[chan1] = FROM_S32(d1);
-
-        dp += chan2;
-      }
-
-      /* last pixels */
-      for (; i < wid; i++) {
-        p00 = buff0[i];     p10 = buff1[i];     p20 = buff2[i];     p30 = buff3[i];
-        p01 = buff0[i + 1]; p11 = buff1[i + 1]; p21 = buff2[i + 1]; p31 = buff3[i + 1];
-        p02 = buff0[i + 2]; p12 = buff1[i + 2]; p22 = buff2[i + 2]; p32 = buff3[i + 2];
-        p03 = buff0[i + 3]; p13 = buff1[i + 3]; p23 = buff2[i + 3]; p33 = buff3[i + 3];
-        p04 = buff0[i + 4]; p14 = buff1[i + 4]; p24 = buff2[i + 4]; p34 = buff3[i + 4];
-
-        p40 = buff4[i];     p41 = buff4[i + 1]; p42 = buff4[i + 2];
-        p43 = buff4[i + 3]; p44 = buff4[i + 4];
-
-        buff5[i + dx_l] = (FTYPE)sp[0];
-
-        buffo[i] = D2I(p00 * k[0] + p01 * k[1] + p02 * k[2] + p03 * k[3] + p04 * k[4] +
-                       p10 * k[5] + p11 * k[6] + p12 * k[7] + p13 * k[8] + p14 * k[9] +
-                       p20 * k[10] + p21 * k[11] + p22 * k[12] + p23 * k[13] + p24 * k[14] +
-                       p30 * k[15] + p31 * k[16] + p32 * k[17] + p33 * k[18] + p34 * k[19] +
-                       p40 * k[20] + p41 * k[21] + p42 * k[22] + p43 * k[23] + p44 * k[24]);
-
-        dp[0] = FROM_S32(buffo[i]);
-
-        sp += chan1;
-        dp += chan1;
-      }
-
-      for (; i < swid; i++) {
-        buff5[i + dx_l] = (FTYPE)sp[0];
-        sp += chan1;
-      }
-
-      for (i = 0; i < dx_l; i++) buff5[i] = buff5[dx_l];
-      for (i = 0; i < dx_r; i++) buff5[swid + dx_l + i] = buff5[swid + dx_l - 1];
-
-      /* next line */
-
-      if (j < hgt - dy_b - 2) sl += sll;
-      dl += dll;
-
-      buffT = buff0;
-      buff0 = buff1;
-      buff1 = buff2;
-      buff2 = buff3;
-      buff3 = buff4;
-      buff4 = buff5;
-      buff5 = buffT;
-    }
-  }
-
-  if (pbuff != buff) mlib_free(pbuff);
-
-  return MLIB_SUCCESS;
-}
-
-/***************************************************************/
-#ifndef __sparc /* for x86, using integer multiplies is faster */
-
-mlib_status CONV_FUNC_I(5x5)
-{
-  mlib_s32 buff[BUFF_LINE];
-  mlib_s32 *buffd;
-  mlib_s32 k[KSIZE*KSIZE];
-  mlib_s32 shift1, shift2;
-  mlib_s32 k0, k1, k2, k3, k4, k5, k6, k7, k8, k9;
-  mlib_s32 p00, p01, p02, p03, p04, p05,
-           p10, p11, p12, p13, p14, p15;
-  DTYPE    *adr_src, *sl, *sp0, *sp1, *sp2, *sp3, *sp4;
-  DTYPE    *sp_1, *sp_2, *sp_3, *sp_4;
-  DTYPE    *adr_dst, *dl, *dp;
-  mlib_s32 *pbuff = buff;
-  mlib_s32 wid, hgt, sll, dll;
-  mlib_s32 nchannel, chan1, chan2, chan4;
-  mlib_s32 delta_chan1, delta_chan2, delta_chan3;
-  mlib_s32 i, j, c;
-
-#if IMG_TYPE != 1
-  shift1 = 16;
-#else
-  shift1 = 8;
-#endif /* IMG_TYPE != 1 */
-
-  shift2 = scalef_expon - shift1;
-
-  for (j = 0; j < KSIZE*KSIZE; j++) k[j] = kern[j] >> shift1;
-
-  GET_SRC_DST_PARAMETERS(DTYPE);
-
-  if (wid > BUFF_LINE) {
-    pbuff = mlib_malloc(sizeof(mlib_s32)*wid);
-
-    if (pbuff == NULL) return MLIB_FAILURE;
-  }
-
-  buffd = pbuff;
-
-  chan1 = nchannel;
-  chan2 = chan1 + chan1;
-
-  if ((1 > dx_l) && (1 < wid + KSIZE1 - dx_r)) delta_chan1 = chan1;
-  else delta_chan1 = 0;
-
-  if ((2 > dx_l) && (2 < wid + KSIZE1 - dx_r)) delta_chan2 = delta_chan1 + chan1;
-  else delta_chan2 = delta_chan1;
-
-  if ((3 > dx_l) && (3 < wid + KSIZE1 - dx_r)) delta_chan3 = delta_chan2 + chan1;
-  else delta_chan3 = delta_chan2;
-
-  chan4 = chan1 + delta_chan3;
-
-  for (c = 0; c < chan1; c++) {
-    if (!(cmask & (1 << (chan1 - 1 - c)))) continue;
-
-    sl = adr_src + c;
-    dl = adr_dst + c;
-
-    sp_1 = sl;
-
-    if ((1 > dy_t) && (1 < hgt + KSIZE1 - dy_b)) sl += sll;
-    sp_2 = sl;
-
-    if ((2 > dy_t) && (2 < hgt + KSIZE1 - dy_b)) sl += sll;
-    sp_3 = sl;
-
-    if ((3 > dy_t) && (3 < hgt + KSIZE1 - dy_b)) sl += sll;
-    sp_4 = sl;
-
-    if ((hgt - dy_b) > 0) sl += sll;
-
-    for (j = 0; j < hgt; j++) {
-      mlib_s32 pix0, pix1;
-
-      dp  = dl;
-      sp0 = sp_1;
-      sp_1 = sp_2;
-      sp_2 = sp_3;
-      sp_3 = sp_4;
-      sp_4 = sl;
-
-      sp1 = sp_1;
-      sp2 = sp_2;
-      sp3 = sp_3;
-      sp4 = sp_4;
-
-      /*
-       *  First loop
-       */
-
-      k0 = k[0]; k1 = k[1]; k2 = k[2]; k3 = k[3]; k4 = k[4];
-      k5 = k[5]; k6 = k[6]; k7 = k[7]; k8 = k[8]; k9 = k[9];
-
-      p02 = sp0[0];           p12 = sp1[0];
-      p03 = sp0[delta_chan1]; p13 = sp1[delta_chan1];
-      p04 = sp0[delta_chan2]; p14 = sp1[delta_chan2];
-      p05 = sp0[delta_chan3]; p15 = sp1[delta_chan3];
-
-      sp0 += chan4;
-      sp1 += chan4;
-
-#ifdef __SUNPRO_C
-#pragma pipeloop(0)
-#endif /* __SUNPRO_C */
-      for (i = 0; i <= (wid - dx_r - 2); i += 2) {
-        p00 = p02; p10 = p12;
-        p01 = p03; p11 = p13;
-        p02 = p04; p12 = p14;
-        p03 = p05; p13 = p15;
-
-        p04 = sp0[0];     p14 = sp1[0];
-        p05 = sp0[chan1]; p15 = sp1[chan1];
-
-        buffd[i    ] = (p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 + p04 * k4 +
-                        p10 * k5 + p11 * k6 + p12 * k7 + p13 * k8 + p14 * k9);
-        buffd[i + 1] = (p01 * k0 + p02 * k1 + p03 * k2 + p04 * k3 + p05 * k4 +
-                        p11 * k5 + p12 * k6 + p13 * k7 + p14 * k8 + p15 * k9);
-
-        sp0 += chan2;
-        sp1 += chan2;
-      }
-
-      p01 = p02; p02 = p03; p03 = p04; p04 = p05;
-      p11 = p12; p12 = p13; p13 = p14; p14 = p15;
-
-      for (; i < wid - dx_r; i++) {
-        p00 = p01; p10 = p11;
-        p01 = p02; p11 = p12;
-        p02 = p03; p12 = p13;
-        p03 = p04; p13 = p14;
-
-        p04 = sp0[0];     p14 = sp1[0];
-
-        buffd[i] = (p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 + p04 * k4 +
-                    p10 * k5 + p11 * k6 + p12 * k7 + p13 * k8 + p14 * k9);
-
-        sp0 += chan1;
-        sp1 += chan1;
-      }
-
-      sp0 -= chan1;
-      sp1 -= chan1;
-
-      for (; i < wid; i++) {
-        p00 = p01; p10 = p11;
-        p01 = p02; p11 = p12;
-        p02 = p03; p12 = p13;
-        p03 = p04; p13 = p14;
-
-        p04 = sp0[0];     p14 = sp1[0];
-
-        buffd[i] = (p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 + p04 * k4 +
-                    p10 * k5 + p11 * k6 + p12 * k7 + p13 * k8 + p14 * k9);
-      }
-
-      /*
-       *  Second loop
-       */
-
-      k0 = k[10]; k1 = k[11]; k2 = k[12]; k3 = k[13]; k4 = k[14];
-      k5 = k[15]; k6 = k[16]; k7 = k[17]; k8 = k[18]; k9 = k[19];
-
-      p02 = sp2[0];           p12 = sp3[0];
-      p03 = sp2[delta_chan1]; p13 = sp3[delta_chan1];
-      p04 = sp2[delta_chan2]; p14 = sp3[delta_chan2];
-      p05 = sp2[delta_chan3]; p15 = sp3[delta_chan3];
-
-      sp2 += chan4;
-      sp3 += chan4;
-
-#ifdef __SUNPRO_C
-#pragma pipeloop(0)
-#endif /* __SUNPRO_C */
-      for (i = 0; i <= (wid - dx_r - 2); i += 2) {
-        p00 = p02; p10 = p12;
-        p01 = p03; p11 = p13;
-        p02 = p04; p12 = p14;
-        p03 = p05; p13 = p15;
-
-        p04 = sp2[0];     p14 = sp3[0];
-        p05 = sp2[chan1]; p15 = sp3[chan1];
-
-        buffd[i    ] += (p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 + p04 * k4 +
-                         p10 * k5 + p11 * k6 + p12 * k7 + p13 * k8 + p14 * k9);
-        buffd[i + 1] += (p01 * k0 + p02 * k1 + p03 * k2 + p04 * k3 + p05 * k4 +
-                         p11 * k5 + p12 * k6 + p13 * k7 + p14 * k8 + p15 * k9);
-
-        sp2 += chan2;
-        sp3 += chan2;
-      }
-
-      p01 = p02; p02 = p03; p03 = p04; p04 = p05;
-      p11 = p12; p12 = p13; p13 = p14; p14 = p15;
-
-      for (; i < wid - dx_r; i++) {
-        p00 = p01; p10 = p11;
-        p01 = p02; p11 = p12;
-        p02 = p03; p12 = p13;
-        p03 = p04; p13 = p14;
-
-        p04 = sp2[0];     p14 = sp3[0];
-
-        buffd[i] += (p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 + p04 * k4 +
-                     p10 * k5 + p11 * k6 + p12 * k7 + p13 * k8 + p14 * k9);
-
-        sp2 += chan1;
-        sp3 += chan1;
-      }
-
-      sp2 -= chan1;
-      sp3 -= chan1;
-
-      for (; i < wid; i++) {
-        p00 = p01; p10 = p11;
-        p01 = p02; p11 = p12;
-        p02 = p03; p12 = p13;
-        p03 = p04; p13 = p14;
-
-        p04 = sp2[0];     p14 = sp3[0];
-
-        buffd[i] += (p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 + p04 * k4 +
-                     p10 * k5 + p11 * k6 + p12 * k7 + p13 * k8 + p14 * k9);
-      }
-
-      /*
-       *  3 loop
-       */
-
-      k0 = k[20]; k1 = k[21]; k2 = k[22]; k3 = k[23]; k4 = k[24];
-
-      p02 = sp4[0];
-      p03 = sp4[delta_chan1];
-      p04 = sp4[delta_chan2];
-      p05 = sp4[delta_chan3];
-
-      sp4 += chan4;
-
-#ifdef __SUNPRO_C
-#pragma pipeloop(0)
-#endif /* __SUNPRO_C */
-      for (i = 0; i <= (wid - dx_r - 2); i += 2) {
-        p00 = p02; p01 = p03; p02 = p04; p03 = p05;
-
-        p04 = sp4[0]; p05 = sp4[chan1];
-
-        pix0 = (buffd[i    ] + p00 * k0 + p01 * k1 + p02 * k2 +
-                p03 * k3 + p04 * k4) >> shift2;
-        pix1 = (buffd[i + 1] + p01 * k0 + p02 * k1 + p03 * k2 +
-                p04 * k3 + p05 * k4) >> shift2;
-
-        CLAMP_STORE(dp[0],     pix0)
-        CLAMP_STORE(dp[chan1], pix1)
-
-        dp  += chan2;
-        sp4 += chan2;
-      }
-
-      p01 = p02; p02 = p03; p03 = p04; p04 = p05;
-
-      for (; i < wid - dx_r; i++) {
-        p00 = p01; p01 = p02; p02 = p03; p03 = p04;
-
-        p04 = sp4[0];
-
-        pix0 = (buffd[i    ] + p00 * k0 + p01 * k1 + p02 * k2 +
-                p03 * k3 + p04 * k4) >> shift2;
-        CLAMP_STORE(dp[0],     pix0)
-
-        dp  += chan1;
-        sp4 += chan1;
-      }
-
-      sp4 -= chan1;
-
-      for (; i < wid; i++) {
-        p00 = p01; p01 = p02; p02 = p03; p03 = p04;
-
-        p04 = sp4[0];
-
-        pix0 = (buffd[i    ] + p00 * k0 + p01 * k1 + p02 * k2 +
-                p03 * k3 + p04 * k4) >> shift2;
-        CLAMP_STORE(dp[0],     pix0)
-
-        dp  += chan1;
-      }
-
-      /* next line */
-
-      if (j < hgt - dy_b - 1) sl += sll;
-      dl += dll;
-    }
-  }
-
-  if (pbuff != buff) mlib_free(pbuff);
-
-  return MLIB_SUCCESS;
-}
-
-#endif /* __sparc ( for x86, using integer multiplies is faster ) */
-
-/***************************************************************/
-#if IMG_TYPE == 1
-
-#undef  KSIZE
-#define KSIZE 7
-
-mlib_status CONV_FUNC(7x7)
-{
-  FTYPE    buff[(KSIZE + 3)*BUFF_LINE], *buffs[2*(KSIZE + 1)], *buffd;
-  FTYPE    k[KSIZE*KSIZE];
-  mlib_s32 l, m, buff_ind;
-  mlib_s32 d0, d1;
-  FTYPE    k0, k1, k2, k3, k4, k5, k6;
-  FTYPE    p0, p1, p2, p3, p4, p5, p6, p7;
-  DTYPE *sl2, *sl3, *sl4, *sl5, *sl6;
-  DEF_VARS(DTYPE);
-  LOAD_KERNEL(KSIZE*KSIZE);
-  GET_SRC_DST_PARAMETERS(DTYPE);
-
-  swid = wid + KSIZE1;
-
-  if (wid > BUFF_LINE) {
-    pbuff = mlib_malloc((KSIZE + 3)*sizeof(FTYPE   )*wid);
-
-    if (pbuff == NULL) return MLIB_FAILURE;
-  }
-
-  for (l = 0; l < KSIZE + 1; l++) buffs[l] = pbuff + l*swid;
-  for (l = 0; l < KSIZE + 1; l++) buffs[l + (KSIZE + 1)] = buffs[l];
-  buffd = buffs[KSIZE] + swid;
-  buffo = (mlib_s32*)(buffd + swid);
-  buffi = buffo + (swid &~ 1);
-
-  swid -= (dx_l + dx_r);
-
-  chan1 = nchannel;
-  chan2 = chan1 + chan1;
-
-  for (c = 0; c < nchannel; c++) {
-    if (!(cmask & (1 << (nchannel - 1 - c)))) continue;
-
-    sl = adr_src + c;
-    dl = adr_dst + c;
-
-    if ((1 > dy_t) && (1 < hgt + KSIZE1 - dy_b)) sl1 = sl + sll;
-    else sl1 = sl;
-
-    if ((2 > dy_t) && (2 < hgt + KSIZE1 - dy_b)) sl2 = sl1 + sll;
-    else sl2 = sl1;
-
-    if ((3 > dy_t) && (3 < hgt + KSIZE1 - dy_b)) sl3 = sl2 + sll;
-    else sl3 = sl2;
-
-    if ((4 > dy_t) && (4 < hgt + KSIZE1 - dy_b)) sl4 = sl3 + sll;
-    else sl4 = sl3;
-
-    if ((5 > dy_t) && (5 < hgt + KSIZE1 - dy_b)) sl5 = sl4 + sll;
-    else sl5 = sl4;
-
-    if ((hgt - dy_b) > 0) sl6 = sl5 + sll;
-    else sl6 = sl5;
-
-    for (i = 0; i < dx_l; i++) {
-      buffs[0][i] = (FTYPE)sl[0];
-      buffs[1][i] = (FTYPE)sl1[0];
-      buffs[2][i] = (FTYPE)sl2[0];
-      buffs[3][i] = (FTYPE)sl3[0];
-      buffs[4][i] = (FTYPE)sl4[0];
-      buffs[5][i] = (FTYPE)sl5[0];
-      buffs[6][i] = (FTYPE)sl6[0];
-    }
-
-#ifdef __SUNPRO_C
-#pragma pipeloop(0)
-#endif /* __SUNPRO_C */
-    for (i = 0; i < swid; i++) {
-      buffs[0][i + dx_l] = (FTYPE)sl[i*chan1];
-      buffs[1][i + dx_l] = (FTYPE)sl1[i*chan1];
-      buffs[2][i + dx_l] = (FTYPE)sl2[i*chan1];
-      buffs[3][i + dx_l] = (FTYPE)sl3[i*chan1];
-      buffs[4][i + dx_l] = (FTYPE)sl4[i*chan1];
-      buffs[5][i + dx_l] = (FTYPE)sl5[i*chan1];
-      buffs[6][i + dx_l] = (FTYPE)sl6[i*chan1];
-    }
-
-    for (i = 0; i < dx_r; i++) {
-      buffs[0][swid + dx_l + i] = buffs[0][swid + dx_l - 1];
-      buffs[1][swid + dx_l + i] = buffs[1][swid + dx_l - 1];
-      buffs[2][swid + dx_l + i] = buffs[2][swid + dx_l - 1];
-      buffs[3][swid + dx_l + i] = buffs[3][swid + dx_l - 1];
-      buffs[4][swid + dx_l + i] = buffs[4][swid + dx_l - 1];
-      buffs[5][swid + dx_l + i] = buffs[5][swid + dx_l - 1];
-      buffs[6][swid + dx_l + i] = buffs[6][swid + dx_l - 1];
-    }
-
-    buff_ind = 0;
-
-#ifdef __SUNPRO_C
-#pragma pipeloop(0)
-#endif /* __SUNPRO_C */
-    for (i = 0; i < wid; i++) buffd[i] = 0.0;
-
-    if ((hgt - dy_b) > 1) sl = sl6 + sll;
-    else sl = sl6;
-
-    for (j = 0; j < hgt; j++) {
-      FTYPE    **buffc = buffs + buff_ind;
-      FTYPE    *buffn = buffc[KSIZE];
-      FTYPE    *pk = k;
-
-      for (l = 0; l < KSIZE; l++) {
-        FTYPE    *buff = buffc[l];
-        d64_2x32 dd;
-
-        sp = sl;
-        dp = dl;
-
-        p2 = buff[0]; p3 = buff[1]; p4 = buff[2];
-        p5 = buff[3]; p6 = buff[4]; p7 = buff[5];
-
-        k0 = *pk++; k1 = *pk++; k2 = *pk++; k3 = *pk++;
-        k4 = *pk++; k5 = *pk++; k6 = *pk++;
-
-        if (l < (KSIZE - 1)) {
-#ifdef __SUNPRO_C
-#pragma pipeloop(0)
-#endif /* __SUNPRO_C */
-          for (i = 0; i <= (wid - 2); i += 2) {
-            p0 = p2; p1 = p3; p2 = p4; p3 = p5; p4 = p6; p5 = p7;
-
-            p6 = buff[i + 6]; p7 = buff[i + 7];
-
-            buffd[i    ] += p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + p5*k5 + p6*k6;
-            buffd[i + 1] += p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + p6*k5 + p7*k6;
-          }
-
-        } else {
-#ifdef __SUNPRO_C
-#pragma pipeloop(0)
-#endif /* __SUNPRO_C */
-          for (i = 0; i <= (wid - 2); i += 2) {
-            p0 = p2; p1 = p3; p2 = p4; p3 = p5; p4 = p6; p5 = p7;
-
-            p6 = buff[i + 6]; p7 = buff[i + 7];
-
-            LOAD_BUFF(buffi);
-
-            dd.d64 = *(FTYPE   *)(buffi + i);
-            buffn[i + dx_l    ] = (FTYPE)dd.i32s.i0;
-            buffn[i + dx_l + 1] = (FTYPE)dd.i32s.i1;
-
-            d0 = D2I(p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + p5*k5 + p6*k6 + buffd[i    ]);
-            d1 = D2I(p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + p6*k5 + p7*k6 + buffd[i + 1]);
-
-            dp[0    ] = FROM_S32(d0);
-            dp[chan1] = FROM_S32(d1);
-
-            buffd[i    ] = 0.0;
-            buffd[i + 1] = 0.0;
-
-            sp += chan2;
-            dp += chan2;
-          }
-        }
-      }
-
-      /* last pixels */
-      for (; i < wid; i++) {
-        FTYPE    *pk = k, s = 0;
-        mlib_s32 d0;
-
-        for (l = 0; l < KSIZE; l++) {
-          FTYPE    *buff = buffc[l] + i;
-
-          for (m = 0; m < KSIZE; m++) s += buff[m] * (*pk++);
-        }
-
-        d0 = D2I(s);
-        dp[0] = FROM_S32(d0);
-
-        buffn[i + dx_l] = (FTYPE)sp[0];
-
-        sp += chan1;
-        dp += chan1;
-      }
-
-      for (; i < swid; i++) {
-        buffn[i + dx_l] = (FTYPE)sp[0];
-        sp += chan1;
-      }
-
-      for (i = 0; i < dx_l; i++) buffn[i] = buffn[dx_l];
-      for (i = 0; i < dx_r; i++) buffn[swid + dx_l + i] = buffn[swid + dx_l - 1];
-
-      /* next line */
-
-      if (j < hgt - dy_b - 2) sl += sll;
-      dl += dll;
-
-      buff_ind++;
-
-      if (buff_ind >= KSIZE + 1) buff_ind = 0;
-    }
-  }
-
-  if (pbuff != buff) mlib_free(pbuff);
-
-  return MLIB_SUCCESS;
-}
-
-#endif /* IMG_TYPE == 1 */
-
-/***************************************************************/
 #define MAX_KER   7
 #define MAX_N    15
 #define BUFF_SIZE   1600
--- a/jdk/src/java.desktop/share/native/libmlib_image/mlib_ImageConv_8nw.c	Thu May 12 11:03:07 2016 -0700
+++ b/jdk/src/java.desktop/share/native/libmlib_image/mlib_ImageConv_8nw.c	Fri May 13 11:31:05 2016 +0300
@@ -145,9 +145,6 @@
 } d64_2x32;
 
 /***************************************************************/
-#define BUFF_LINE 256
-
-/***************************************************************/
 #define DEF_VARS(type)                                          \
   type     *adr_src, *sl, *sp = NULL;                           \
   type     *adr_dst, *dl, *dp = NULL;                           \
@@ -157,39 +154,6 @@
   mlib_s32 i, j, c
 
 /***************************************************************/
-#define LOAD_KERNEL3()                                                   \
-  FTYPE    scalef = DSCALE;                                              \
-  FTYPE    k0, k1, k2, k3, k4, k5, k6, k7, k8;                           \
-  FTYPE    p00, p01, p02, p03,                                           \
-           p10, p11, p12, p13,                                           \
-           p20, p21, p22, p23;                                           \
-                                                                         \
-  while (scalef_expon > 30) {                                            \
-    scalef /= (1 << 30);                                                 \
-    scalef_expon -= 30;                                                  \
-  }                                                                      \
-                                                                         \
-  scalef /= (1 << scalef_expon);                                         \
-                                                                         \
-  /* keep kernel in regs */                                              \
-  k0 = scalef * kern[0];  k1 = scalef * kern[1];  k2 = scalef * kern[2]; \
-  k3 = scalef * kern[3];  k4 = scalef * kern[4];  k5 = scalef * kern[5]; \
-  k6 = scalef * kern[6];  k7 = scalef * kern[7];  k8 = scalef * kern[8]
-
-/***************************************************************/
-#define LOAD_KERNEL(SIZE)                                       \
-  FTYPE    scalef = DSCALE;                                     \
-                                                                \
-  while (scalef_expon > 30) {                                   \
-    scalef /= (1 << 30);                                        \
-    scalef_expon -= 30;                                         \
-  }                                                             \
-                                                                \
-  scalef /= (1 << scalef_expon);                                \
-                                                                \
-  for (j = 0; j < SIZE; j++) k[j] = scalef * kern[j]
-
-/***************************************************************/
 #define GET_SRC_DST_PARAMETERS(type)                            \
   hgt = mlib_ImageGetHeight(src);                               \
   wid = mlib_ImageGetWidth(src);                                \
@@ -247,1162 +211,6 @@
 #endif /* __sparc */
 
 /***************************************************************/
-#define KSIZE  3
-
-mlib_status CONV_FUNC(3x3)(mlib_image       *dst,
-                           const mlib_image *src,
-                           const mlib_s32   *kern,
-                           mlib_s32         scalef_expon,
-                           mlib_s32         cmask)
-{
-  FTYPE    buff[(KSIZE + 2)*BUFF_LINE], *buff0, *buff1, *buff2, *buff3, *buffT;
-  DEF_VARS(DTYPE);
-  DTYPE *sl1;
-  mlib_s32 chan2;
-  mlib_s32 *buffo, *buffi;
-  DTYPE *sl2;
-#ifndef __sparc
-  mlib_s32 d0, d1;
-#endif /* __sparc */
-  LOAD_KERNEL3();
-  GET_SRC_DST_PARAMETERS(DTYPE);
-
-  if (wid > BUFF_LINE) {
-    pbuff = mlib_malloc((KSIZE + 2)*sizeof(FTYPE)*wid);
-
-    if (pbuff == NULL) return MLIB_FAILURE;
-  }
-
-  buff0 = pbuff;
-  buff1 = buff0 + wid;
-  buff2 = buff1 + wid;
-  buff3 = buff2 + wid;
-  buffo = (mlib_s32*)(buff3 + wid);
-  buffi = buffo + (wid &~ 1);
-
-  chan1 = nchannel;
-  chan2 = chan1 + chan1;
-
-  wid -= (KSIZE - 1);
-  hgt -= (KSIZE - 1);
-
-  adr_dst += ((KSIZE - 1)/2)*(dll + chan1);
-
-  for (c = 0; c < nchannel; c++) {
-    if (!(cmask & (1 << (nchannel - 1 - c)))) continue;
-
-    sl = adr_src + c;
-    dl = adr_dst + c;
-
-    sl1 = sl  + sll;
-    sl2 = sl1 + sll;
-#ifdef __SUNPRO_C
-#pragma pipeloop(0)
-#endif /* __SUNPRO_C */
-    for (i = 0; i < wid + (KSIZE - 1); i++) {
-      buff0[i] = (FTYPE)sl[i*chan1];
-      buff1[i] = (FTYPE)sl1[i*chan1];
-      buff2[i] = (FTYPE)sl2[i*chan1];
-    }
-
-    sl += KSIZE*sll;
-
-    for (j = 0; j < hgt; j++) {
-      FTYPE    s0, s1;
-
-      p02 = buff0[0];
-      p12 = buff1[0];
-      p22 = buff2[0];
-
-      p03 = buff0[1];
-      p13 = buff1[1];
-      p23 = buff2[1];
-
-      s0 = p02 * k0 + p03 * k1 + p12 * k3 + p13 * k4 + p22 * k6 + p23 * k7;
-      s1 = p03 * k0 + p13 * k3 + p23 * k6;
-
-      sp = sl;
-      dp = dl;
-
-#ifdef __SUNPRO_C
-#pragma pipeloop(0)
-#endif /* __SUNPRO_C */
-      for (i = 0; i <= (wid - 2); i += 2) {
-#ifdef __sparc
-#ifdef _NO_LONGLONG
-        mlib_s32 o64_1, o64_2;
-#else /* _NO_LONGLONG */
-        mlib_s64 o64;
-#endif /* _NO_LONGLONG */
-#endif /* __sparc */
-        d64_2x32 dd;
-
-        p02 = buff0[i + 2]; p12 = buff1[i + 2]; p22 = buff2[i + 2];
-        p03 = buff0[i + 3]; p13 = buff1[i + 3]; p23 = buff2[i + 3];
-
-        LOAD_BUFF(buffi);
-
-        dd.d64 = *(FTYPE   *)(buffi + i);
-        buff3[i    ] = (FTYPE)dd.i32s.i0;
-        buff3[i + 1] = (FTYPE)dd.i32s.i1;
-
-#ifndef __sparc
-        d0 = D2I(s0 + p02 * k2 + p12 * k5 + p22 * k8);
-        d1 = D2I(s1 + p02 * k1 + p03 * k2 + p12 * k4 + p13 * k5 + p22 * k7 + p23 * k8);
-
-        s0 = p02 * k0 + p03 * k1 + p12 * k3 + p13 * k4 + p22 * k6 + p23 * k7;
-        s1 = p03 * k0 + p13 * k3 + p23 * k6;
-
-        dp[0    ] = FROM_S32(d0);
-        dp[chan1] = FROM_S32(d1);
-
-#else /* __sparc */
-
-        dd.i32s.i0 = D2I(s0 + p02 * k2 + p12 * k5 + p22 * k8);
-        dd.i32s.i1 = D2I(s1 + p02 * k1 + p03 * k2 + p12 * k4 + p13 * k5 + p22 * k7 + p23 * k8);
-        *(FTYPE   *)(buffo + i) = dd.d64;
-
-        s0 = p02 * k0 + p03 * k1 + p12 * k3 + p13 * k4 + p22 * k6 + p23 * k7;
-        s1 = p03 * k0 + p13 * k3 + p23 * k6;
-
-#ifdef _NO_LONGLONG
-
-        o64_1 = buffo[i];
-        o64_2 = buffo[i+1];
-#if IMG_TYPE != 1
-        STORE2(FROM_S32(o64_1), FROM_S32(o64_2));
-#else
-        STORE2(o64_1 >> 24, o64_2 >> 24);
-#endif /* IMG_TYPE != 1 */
-
-#else /* _NO_LONGLONG */
-
-        o64 = *(mlib_s64*)(buffo + i);
-#if IMG_TYPE != 1
-        STORE2(FROM_S32(o64 >> 32), FROM_S32(o64));
-#else
-        STORE2(o64 >> 56, o64 >> 24);
-#endif /* IMG_TYPE != 1 */
-#endif /* _NO_LONGLONG */
-#endif /* __sparc */
-
-        sp += chan2;
-        dp += chan2;
-      }
-
-      for (; i < wid; i++) {
-        p00 = buff0[i];     p10 = buff1[i];     p20 = buff2[i];
-        p01 = buff0[i + 1]; p11 = buff1[i + 1]; p21 = buff2[i + 1];
-        p02 = buff0[i + 2]; p12 = buff1[i + 2]; p22 = buff2[i + 2];
-
-        buffi[i] = (mlib_s32)sp[0];
-        buff3[i] = (FTYPE)buffi[i];
-
-#ifndef __sparc
-
-        d0 = D2I(p00 * k0 + p01 * k1 + p02 * k2 + p10 * k3 + p11 * k4 +
-                 p12 * k5 + p20 * k6 + p21 * k7 + p22 * k8);
-
-        dp[0] = FROM_S32(d0);
-
-#else  /* __sparc */
-
-        buffo[i] = D2I(p00 * k0 + p01 * k1 + p02 * k2 + p10 * k3 + p11 * k4 +
-                       p12 * k5 + p20 * k6 + p21 * k7 + p22 * k8);
-#if IMG_TYPE != 1
-        dp[0] = FROM_S32(buffo[i]);
-#else
-        dp[0] = buffo[i] >> 24;
-#endif /* IMG_TYPE != 1 */
-#endif /* __sparc */
-
-        sp += chan1;
-        dp += chan1;
-      }
-
-      buffi[wid] = (mlib_s32)sp[0];
-      buff3[wid] = (FTYPE)buffi[wid];
-      buffi[wid + 1] = (mlib_s32)sp[chan1];
-      buff3[wid + 1] = (FTYPE)buffi[wid + 1];
-
-      sl += sll;
-      dl += dll;
-
-      buffT = buff0;
-      buff0 = buff1;
-      buff1 = buff2;
-      buff2 = buff3;
-      buff3 = buffT;
-    }
-  }
-
-#ifdef __sparc
-#if IMG_TYPE == 1
-  {
-    mlib_s32 amask = (1 << nchannel) - 1;
-
-    if ((cmask & amask) != amask) {
-      mlib_ImageXor80(adr_dst, wid, hgt, dll, nchannel, cmask);
-    } else {
-      mlib_ImageXor80_aa(adr_dst, wid*nchannel, hgt, dll);
-    }
-  }
-
-#endif /* IMG_TYPE == 1 */
-#endif /* __sparc */
-
-  if (pbuff != buff) mlib_free(pbuff);
-
-  return MLIB_SUCCESS;
-}
-
-/***************************************************************/
-#ifndef __sparc /* for x86, using integer multiplies is faster */
-
-mlib_status CONV_FUNC_I(3x3)(mlib_image       *dst,
-                             const mlib_image *src,
-                             const mlib_s32   *kern,
-                             mlib_s32         scalef_expon,
-                             mlib_s32         cmask)
-{
-  DTYPE    *adr_src, *sl, *sp0, *sp1, *sp2;
-  DTYPE    *adr_dst, *dl, *dp;
-  mlib_s32 wid, hgt, sll, dll;
-  mlib_s32 nchannel, chan1, chan2;
-  mlib_s32 i, j, c;
-  mlib_s32 shift1, shift2;
-  mlib_s32 k0, k1, k2, k3, k4, k5, k6, k7, k8;
-  mlib_s32 p02, p03,
-           p12, p13,
-           p22, p23;
-
-#if IMG_TYPE != 1
-  shift1 = 16;
-#else
-  shift1 = 8;
-#endif /* IMG_TYPE != 1 */
-
-  shift2 = scalef_expon - shift1;
-
-  /* keep kernel in regs */
-  k0 = kern[0] >> shift1;  k1 = kern[1] >> shift1;  k2 = kern[2] >> shift1;
-  k3 = kern[3] >> shift1;  k4 = kern[4] >> shift1;  k5 = kern[5] >> shift1;
-  k6 = kern[6] >> shift1;  k7 = kern[7] >> shift1;  k8 = kern[8] >> shift1;
-
-  GET_SRC_DST_PARAMETERS(DTYPE);
-
-  chan1 = nchannel;
-  chan2 = chan1 + chan1;
-
-  wid -= (KSIZE - 1);
-  hgt -= (KSIZE - 1);
-
-  adr_dst += ((KSIZE - 1)/2)*(dll + chan1);
-
-  for (c = 0; c < chan1; c++) {
-    if (!(cmask & (1 << (chan1 - 1 - c)))) continue;
-
-    sl = adr_src + c;
-    dl = adr_dst + c;
-
-    for (j = 0; j < hgt; j++) {
-      mlib_s32 s0, s1;
-      mlib_s32 pix0, pix1;
-
-      dp  = dl;
-      sp0 = sl;
-      sp1 = sp0 + sll;
-      sp2 = sp1 + sll;
-
-      p02 = sp0[0];
-      p12 = sp1[0];
-      p22 = sp2[0];
-
-      p03 = sp0[chan1];
-      p13 = sp1[chan1];
-      p23 = sp2[chan1];
-
-      s0 = p02 * k0 + p03 * k1 + p12 * k3 + p13 * k4 + p22 * k6 + p23 * k7;
-      s1 = p03 * k0 + p13 * k3 + p23 * k6;
-
-      sp0 += chan2;
-      sp1 += chan2;
-      sp2 += chan2;
-
-#ifdef __SUNPRO_C
-#pragma pipeloop(0)
-#endif /* __SUNPRO_C */
-      for (i = 0; i <= (wid - 2); i += 2) {
-        p02 = sp0[0];     p12 = sp1[0];     p22 = sp2[0];
-        p03 = sp0[chan1]; p13 = sp1[chan1]; p23 = sp2[chan1];
-
-        pix0 = (s0 + p02 * k2 + p12 * k5 + p22 * k8) >> shift2;
-        pix1 = (s1 + p02 * k1 + p03 * k2 + p12 * k4 +
-                p13 * k5 + p22 * k7 + p23 * k8) >> shift2;
-
-        CLAMP_STORE(dp[0],     pix0)
-        CLAMP_STORE(dp[chan1], pix1)
-
-        s0 = p02 * k0 + p03 * k1 + p12 * k3 + p13 * k4 + p22 * k6 + p23 * k7;
-        s1 = p03 * k0 + p13 * k3 + p23 * k6;
-
-        sp0 += chan2;
-        sp1 += chan2;
-        sp2 += chan2;
-        dp += chan2;
-      }
-
-      if (wid & 1) {
-        p02 = sp0[0]; p12 = sp1[0]; p22 = sp2[0];
-        pix0 = (s0 + p02 * k2 + p12 * k5 + p22 * k8) >> shift2;
-        CLAMP_STORE(dp[0], pix0)
-      }
-
-      sl += sll;
-      dl += dll;
-    }
-  }
-
-  return MLIB_SUCCESS;
-}
-
-#endif /* __sparc ( for x86, using integer multiplies is faster ) */
-
-/***************************************************************/
-#undef  KSIZE
-#define KSIZE 4
-
-mlib_status CONV_FUNC(4x4)(mlib_image       *dst,
-                           const mlib_image *src,
-                           const mlib_s32   *kern,
-                           mlib_s32         scalef_expon,
-                           mlib_s32         cmask)
-{
-  FTYPE    buff[(KSIZE + 3)*BUFF_LINE];
-  FTYPE    *buff0, *buff1, *buff2, *buff3, *buff4, *buffd, *buffT;
-  FTYPE    k[KSIZE*KSIZE];
-  mlib_s32 d0, d1;
-  FTYPE    k0, k1, k2, k3, k4, k5, k6, k7;
-  FTYPE    p00, p01, p02, p03, p04,
-           p10, p11, p12, p13, p14,
-           p20, p21, p22, p23,
-           p30, p31, p32, p33;
-  DEF_VARS(DTYPE);
-  DTYPE *sl1;
-  mlib_s32 chan2;
-  mlib_s32 *buffo, *buffi;
-  DTYPE *sl2, *sl3;
-  LOAD_KERNEL(KSIZE*KSIZE);
-  GET_SRC_DST_PARAMETERS(DTYPE);
-
-  if (wid > BUFF_LINE) {
-    pbuff = mlib_malloc((KSIZE + 3)*sizeof(FTYPE)*wid);
-
-    if (pbuff == NULL) return MLIB_FAILURE;
-  }
-
-  buff0 = pbuff;
-  buff1 = buff0 + wid;
-  buff2 = buff1 + wid;
-  buff3 = buff2 + wid;
-  buff4 = buff3 + wid;
-  buffd = buff4 + wid;
-  buffo = (mlib_s32*)(buffd + wid);
-  buffi = buffo + (wid &~ 1);
-
-  chan1 = nchannel;
-  chan2 = chan1 + chan1;
-
-  wid -= (KSIZE - 1);
-  hgt -= (KSIZE - 1);
-
-  adr_dst += ((KSIZE - 1)/2)*(dll + chan1);
-
-  for (c = 0; c < nchannel; c++) {
-    if (!(cmask & (1 << (nchannel - 1 - c)))) continue;
-
-    sl = adr_src + c;
-    dl = adr_dst + c;
-
-    sl1 = sl  + sll;
-    sl2 = sl1 + sll;
-    sl3 = sl2 + sll;
-#ifdef __SUNPRO_C
-#pragma pipeloop(0)
-#endif /* __SUNPRO_C */
-    for (i = 0; i < wid + (KSIZE - 1); i++) {
-      buff0[i] = (FTYPE)sl[i*chan1];
-      buff1[i] = (FTYPE)sl1[i*chan1];
-      buff2[i] = (FTYPE)sl2[i*chan1];
-      buff3[i] = (FTYPE)sl3[i*chan1];
-    }
-
-    sl += KSIZE*sll;
-
-    for (j = 0; j < hgt; j++) {
-      d64_2x32 dd;
-
-      /*
-       *  First loop on two first lines of kernel
-       */
-      k0 = k[0]; k1 = k[1]; k2 = k[2]; k3 = k[3];
-      k4 = k[4]; k5 = k[5]; k6 = k[6]; k7 = k[7];
-
-      sp = sl;
-      dp = dl;
-
-      p02 = buff0[0];
-      p12 = buff1[0];
-      p03 = buff0[1];
-      p13 = buff1[1];
-      p04 = buff0[2];
-
-#ifdef __SUNPRO_C
-#pragma pipeloop(0)
-#endif /* __SUNPRO_C */
-      for (i = 0; i <= (wid - 2); i += 2) {
-        p00 = p02; p10 = p12;
-        p01 = p03; p11 = p13;
-        p02 = p04; p12 = buff1[i + 2];
-        p03 = buff0[i + 3]; p13 = buff1[i + 3];
-        p04 = buff0[i + 4]; p14 = buff1[i + 4];
-
-        LOAD_BUFF(buffi);
-
-        dd.d64 = *(FTYPE   *)(buffi + i);
-        buff4[i    ] = (FTYPE)dd.i32s.i0;
-        buff4[i + 1] = (FTYPE)dd.i32s.i1;
-
-        buffd[i    ] = (p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 +
-                        p10 * k4 + p11 * k5 + p12 * k6 + p13 * k7);
-        buffd[i + 1] = (p01 * k0 + p02 * k1 + p03 * k2 + p04 * k3 +
-                        p11 * k4 + p12 * k5 + p13 * k6 + p14 * k7);
-
-        sp += chan2;
-        dp += chan2;
-      }
-
-      /*
-       *  Second loop on two last lines of kernel
-       */
-      k0 = k[ 8]; k1 = k[ 9]; k2 = k[10]; k3 = k[11];
-      k4 = k[12]; k5 = k[13]; k6 = k[14]; k7 = k[15];
-
-      sp = sl;
-      dp = dl;
-
-      p02 = buff2[0];
-      p12 = buff3[0];
-      p03 = buff2[1];
-      p13 = buff3[1];
-      p04 = buff2[2];
-
-#ifdef __SUNPRO_C
-#pragma pipeloop(0)
-#endif /* __SUNPRO_C */
-      for (i = 0; i <= (wid - 2); i += 2) {
-        p00 = p02; p10 = p12;
-        p01 = p03; p11 = p13;
-        p02 = p04; p12 = buff3[i + 2];
-        p03 = buff2[i + 3]; p13 = buff3[i + 3];
-        p04 = buff2[i + 4]; p14 = buff3[i + 4];
-
-        d0 = D2I(p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 +
-                 p10 * k4 + p11 * k5 + p12 * k6 + p13 * k7 + buffd[i]);
-        d1 = D2I(p01 * k0 + p02 * k1 + p03 * k2 + p04 * k3 +
-                 p11 * k4 + p12 * k5 + p13 * k6 + p14 * k7 + buffd[i + 1]);
-
-        dp[0    ] = FROM_S32(d0);
-        dp[chan1] = FROM_S32(d1);
-
-        sp += chan2;
-        dp += chan2;
-      }
-
-      /* last pixels */
-      for (; i < wid; i++) {
-        p00 = buff0[i];     p10 = buff1[i];     p20 = buff2[i];     p30 = buff3[i];
-        p01 = buff0[i + 1]; p11 = buff1[i + 1]; p21 = buff2[i + 1]; p31 = buff3[i + 1];
-        p02 = buff0[i + 2]; p12 = buff1[i + 2]; p22 = buff2[i + 2]; p32 = buff3[i + 2];
-        p03 = buff0[i + 3]; p13 = buff1[i + 3]; p23 = buff2[i + 3]; p33 = buff3[i + 3];
-
-        buff4[i] = (FTYPE)sp[0];
-
-        buffo[i] = D2I(p00 * k[0] + p01 * k[1] + p02 * k[2] + p03 * k[3] +
-                       p10 * k[4] + p11 * k[5] + p12 * k[6] + p13 * k[7] +
-                       p20 * k[ 8] + p21 * k[ 9] + p22 * k[10] + p23 * k[11] +
-                       p30 * k[12] + p31 * k[13] + p32 * k[14] + p33 * k[15]);
-
-        dp[0] = FROM_S32(buffo[i]);
-
-        sp += chan1;
-        dp += chan1;
-      }
-
-      buff4[wid    ] = (FTYPE)sp[0];
-      buff4[wid + 1] = (FTYPE)sp[chan1];
-      buff4[wid + 2] = (FTYPE)sp[chan2];
-
-      /* next line */
-      sl += sll;
-      dl += dll;
-
-      buffT = buff0;
-      buff0 = buff1;
-      buff1 = buff2;
-      buff2 = buff3;
-      buff3 = buff4;
-      buff4 = buffT;
-    }
-  }
-
-  if (pbuff != buff) mlib_free(pbuff);
-
-  return MLIB_SUCCESS;
-}
-
-/***************************************************************/
-#undef  KSIZE
-#define KSIZE 5
-
-mlib_status CONV_FUNC(5x5)(mlib_image       *dst,
-                           const mlib_image *src,
-                           const mlib_s32   *kern,
-                           mlib_s32         scalef_expon,
-                           mlib_s32         cmask)
-{
-  FTYPE    buff[(KSIZE + 3)*BUFF_LINE];
-  FTYPE    *buff0, *buff1, *buff2, *buff3, *buff4, *buff5, *buffd, *buffT;
-  FTYPE    k[KSIZE*KSIZE];
-  mlib_s32 d0, d1;
-  FTYPE    k0, k1, k2, k3, k4, k5, k6, k7, k8, k9;
-  FTYPE    p00, p01, p02, p03, p04, p05,
-           p10, p11, p12, p13, p14, p15,
-           p20, p21, p22, p23, p24,
-           p30, p31, p32, p33, p34,
-           p40, p41, p42, p43, p44;
-  DEF_VARS(DTYPE);
-  DTYPE *sl1;
-  mlib_s32 chan2;
-  mlib_s32 *buffo, *buffi;
-  DTYPE *sl2, *sl3, *sl4;
-  LOAD_KERNEL(KSIZE*KSIZE);
-  GET_SRC_DST_PARAMETERS(DTYPE);
-
-  if (wid > BUFF_LINE) {
-    pbuff = mlib_malloc((KSIZE + 3)*sizeof(FTYPE)*wid);
-
-    if (pbuff == NULL) return MLIB_FAILURE;
-  }
-
-  buff0 = pbuff;
-  buff1 = buff0 + wid;
-  buff2 = buff1 + wid;
-  buff3 = buff2 + wid;
-  buff4 = buff3 + wid;
-  buff5 = buff4 + wid;
-  buffd = buff5 + wid;
-  buffo = (mlib_s32*)(buffd + wid);
-  buffi = buffo + (wid &~ 1);
-
-  chan1 = nchannel;
-  chan2 = chan1 + chan1;
-
-  wid -= (KSIZE - 1);
-  hgt -= (KSIZE - 1);
-
-  adr_dst += ((KSIZE - 1)/2)*(dll + chan1);
-
-  for (c = 0; c < nchannel; c++) {
-    if (!(cmask & (1 << (nchannel - 1 - c)))) continue;
-
-    sl = adr_src + c;
-    dl = adr_dst + c;
-
-    sl1 = sl  + sll;
-    sl2 = sl1 + sll;
-    sl3 = sl2 + sll;
-    sl4 = sl3 + sll;
-#ifdef __SUNPRO_C
-#pragma pipeloop(0)
-#endif /* __SUNPRO_C */
-    for (i = 0; i < wid + (KSIZE - 1); i++) {
-      buff0[i] = (FTYPE)sl[i*chan1];
-      buff1[i] = (FTYPE)sl1[i*chan1];
-      buff2[i] = (FTYPE)sl2[i*chan1];
-      buff3[i] = (FTYPE)sl3[i*chan1];
-      buff4[i] = (FTYPE)sl4[i*chan1];
-    }
-
-    sl += KSIZE*sll;
-
-    for (j = 0; j < hgt; j++) {
-      d64_2x32 dd;
-
-      /*
-       *  First loop
-       */
-      k0 = k[0]; k1 = k[1]; k2 = k[2]; k3 = k[3]; k4 = k[4];
-      k5 = k[5]; k6 = k[6]; k7 = k[7]; k8 = k[8]; k9 = k[9];
-
-      sp = sl;
-      dp = dl;
-
-      p02 = buff0[0];
-      p12 = buff1[0];
-      p03 = buff0[1];
-      p13 = buff1[1];
-      p04 = buff0[2];
-      p14 = buff1[2];
-
-#ifdef __SUNPRO_C
-#pragma pipeloop(0)
-#endif /* __SUNPRO_C */
-      for (i = 0; i <= (wid - 2); i += 2) {
-        p00 = p02; p10 = p12;
-        p01 = p03; p11 = p13;
-        p02 = p04; p12 = p14;
-
-        LOAD_BUFF(buffi);
-
-        p03 = buff0[i + 3]; p13 = buff1[i + 3];
-        p04 = buff0[i + 4]; p14 = buff1[i + 4];
-        p05 = buff0[i + 5]; p15 = buff1[i + 5];
-
-        buffd[i    ] = (p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 + p04 * k4 +
-                        p10 * k5 + p11 * k6 + p12 * k7 + p13 * k8 + p14 * k9);
-        buffd[i + 1] = (p01 * k0 + p02 * k1 + p03 * k2 + p04 * k3 + p05 * k4 +
-                        p11 * k5 + p12 * k6 + p13 * k7 + p14 * k8 + p15 * k9);
-
-        sp += chan2;
-        dp += chan2;
-      }
-
-      /*
-       *  Second loop
-       */
-      k0 = k[10]; k1 = k[11]; k2 = k[12]; k3 = k[13]; k4 = k[14];
-      k5 = k[15]; k6 = k[16]; k7 = k[17]; k8 = k[18]; k9 = k[19];
-
-      sp = sl;
-      dp = dl;
-
-      p02 = buff2[0];
-      p12 = buff3[0];
-      p03 = buff2[1];
-      p13 = buff3[1];
-      p04 = buff2[2];
-      p14 = buff3[2];
-
-#ifdef __SUNPRO_C
-#pragma pipeloop(0)
-#endif /* __SUNPRO_C */
-      for (i = 0; i <= (wid - 2); i += 2) {
-        p00 = p02; p10 = p12;
-        p01 = p03; p11 = p13;
-
-        p02 = buff2[i + 2]; p12 = buff3[i + 2];
-        p03 = buff2[i + 3]; p13 = buff3[i + 3];
-        p04 = buff2[i + 4]; p14 = buff3[i + 4];
-        p05 = buff2[i + 5]; p15 = buff3[i + 5];
-
-        dd.d64 = *(FTYPE   *)(buffi + i);
-        buff5[i    ] = (FTYPE)dd.i32s.i0;
-        buff5[i + 1] = (FTYPE)dd.i32s.i1;
-
-        buffd[i    ] += (p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 + p04 * k4 +
-                         p10 * k5 + p11 * k6 + p12 * k7 + p13 * k8 + p14 * k9);
-        buffd[i + 1] += (p01 * k0 + p02 * k1 + p03 * k2 + p04 * k3 + p05 * k4 +
-                         p11 * k5 + p12 * k6 + p13 * k7 + p14 * k8 + p15 * k9);
-
-        sp += chan2;
-        dp += chan2;
-      }
-
-      /*
-       *  3 loop
-       */
-      k0 = k[20]; k1 = k[21]; k2 = k[22]; k3 = k[23]; k4 = k[24];
-
-      sp = sl;
-      dp = dl;
-
-      p02 = buff4[0];
-      p03 = buff4[1];
-      p04 = buff4[2];
-      p05 = buff4[3];
-
-#ifdef __SUNPRO_C
-#pragma pipeloop(0)
-#endif /* __SUNPRO_C */
-      for (i = 0; i <= (wid - 2); i += 2) {
-        p00 = p02; p01 = p03; p02 = p04; p03 = p05;
-
-        p04 = buff4[i + 4]; p05 = buff4[i + 5];
-
-        d0 = D2I(p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 + p04 * k4 + buffd[i]);
-        d1 = D2I(p01 * k0 + p02 * k1 + p03 * k2 + p04 * k3 + p05 * k4 + buffd[i + 1]);
-
-        dp[0    ] = FROM_S32(d0);
-        dp[chan1] = FROM_S32(d1);
-
-        sp += chan2;
-        dp += chan2;
-      }
-
-      /* last pixels */
-      for (; i < wid; i++) {
-        p00 = buff0[i];     p10 = buff1[i];     p20 = buff2[i];     p30 = buff3[i];
-        p01 = buff0[i + 1]; p11 = buff1[i + 1]; p21 = buff2[i + 1]; p31 = buff3[i + 1];
-        p02 = buff0[i + 2]; p12 = buff1[i + 2]; p22 = buff2[i + 2]; p32 = buff3[i + 2];
-        p03 = buff0[i + 3]; p13 = buff1[i + 3]; p23 = buff2[i + 3]; p33 = buff3[i + 3];
-        p04 = buff0[i + 4]; p14 = buff1[i + 4]; p24 = buff2[i + 4]; p34 = buff3[i + 4];
-
-        p40 = buff4[i];     p41 = buff4[i + 1]; p42 = buff4[i + 2];
-        p43 = buff4[i + 3]; p44 = buff4[i + 4];
-
-        buff5[i] = (FTYPE)sp[0];
-
-        buffo[i] = D2I(p00 * k[0] + p01 * k[1] + p02 * k[2] + p03 * k[3] + p04 * k[4] +
-                       p10 * k[5] + p11 * k[6] + p12 * k[7] + p13 * k[8] + p14 * k[9] +
-                       p20 * k[10] + p21 * k[11] + p22 * k[12] + p23 * k[13] + p24 * k[14] +
-                       p30 * k[15] + p31 * k[16] + p32 * k[17] + p33 * k[18] + p34 * k[19] +
-                       p40 * k[20] + p41 * k[21] + p42 * k[22] + p43 * k[23] + p44 * k[24]);
-
-        dp[0] = FROM_S32(buffo[i]);
-
-        sp += chan1;
-        dp += chan1;
-      }
-
-      buff5[wid    ] = (FTYPE)sp[0];
-      buff5[wid + 1] = (FTYPE)sp[chan1];
-      buff5[wid + 2] = (FTYPE)sp[chan2];
-      buff5[wid + 3] = (FTYPE)sp[chan2 + chan1];
-
-      /* next line */
-      sl += sll;
-      dl += dll;
-
-      buffT = buff0;
-      buff0 = buff1;
-      buff1 = buff2;
-      buff2 = buff3;
-      buff3 = buff4;
-      buff4 = buff5;
-      buff5 = buffT;
-    }
-  }
-
-  if (pbuff != buff) mlib_free(pbuff);
-
-  return MLIB_SUCCESS;
-}
-
-/***************************************************************/
-#ifndef __sparc /* for x86, using integer multiplies is faster */
-
-mlib_status CONV_FUNC_I(5x5)(mlib_image       *dst,
-                             const mlib_image *src,
-                             const mlib_s32   *kern,
-                             mlib_s32         scalef_expon,
-                             mlib_s32         cmask)
-{
-  mlib_s32 buff[BUFF_LINE];
-  mlib_s32 *buffd;
-  mlib_s32 k[KSIZE*KSIZE];
-  mlib_s32 shift1, shift2;
-  mlib_s32 k0, k1, k2, k3, k4, k5, k6, k7, k8, k9;
-  mlib_s32 p00, p01, p02, p03, p04, p05,
-           p10, p11, p12, p13, p14, p15;
-  DTYPE    *adr_src, *sl, *sp0, *sp1;
-  DTYPE    *adr_dst, *dl, *dp;
-  mlib_s32 *pbuff = buff;
-  mlib_s32 wid, hgt, sll, dll;
-  mlib_s32 nchannel, chan1, chan2, chan3, chan4;
-  mlib_s32 i, j, c;
-
-#if IMG_TYPE != 1
-  shift1 = 16;
-#else
-  shift1 = 8;
-#endif /* IMG_TYPE != 1 */
-
-  shift2 = scalef_expon - shift1;
-
-  for (j = 0; j < KSIZE*KSIZE; j++) k[j] = kern[j] >> shift1;
-
-  GET_SRC_DST_PARAMETERS(DTYPE);
-
-  if (wid > BUFF_LINE) {
-    pbuff = mlib_malloc(sizeof(mlib_s32)*wid);
-
-    if (pbuff == NULL) return MLIB_FAILURE;
-  }
-
-  buffd = pbuff;
-
-  chan1 = nchannel;
-  chan2 = chan1 + chan1;
-  chan3 = chan2 + chan1;
-  chan4 = chan3 + chan1;
-
-  wid -= (KSIZE - 1);
-  hgt -= (KSIZE - 1);
-
-  adr_dst += ((KSIZE - 1)/2)*(dll + chan1);
-
-  for (c = 0; c < chan1; c++) {
-    if (!(cmask & (1 << (chan1 - 1 - c)))) continue;
-
-    sl = adr_src + c;
-    dl = adr_dst + c;
-
-    for (j = 0; j < hgt; j++) {
-      mlib_s32 pix0, pix1;
-      /*
-       *  First loop
-       */
-      sp0 = sl;
-      sp1 = sp0 + sll;
-      dp = dl;
-
-      k0 = k[0]; k1 = k[1]; k2 = k[2]; k3 = k[3]; k4 = k[4];
-      k5 = k[5]; k6 = k[6]; k7 = k[7]; k8 = k[8]; k9 = k[9];
-
-      p02 = sp0[0];     p12 = sp1[0];
-      p03 = sp0[chan1]; p13 = sp1[chan1];
-      p04 = sp0[chan2]; p14 = sp1[chan2];
-      p05 = sp0[chan3]; p15 = sp1[chan3];
-
-      sp0 += chan4;
-      sp1 += chan4;
-
-#ifdef __SUNPRO_C
-#pragma pipeloop(0)
-#endif /* __SUNPRO_C */
-      for (i = 0; i <= (wid - 2); i += 2) {
-        p00 = p02; p10 = p12;
-        p01 = p03; p11 = p13;
-        p02 = p04; p12 = p14;
-        p03 = p05; p13 = p15;
-
-        p04 = sp0[0];     p14 = sp1[0];
-        p05 = sp0[chan1]; p15 = sp1[chan1];
-
-        buffd[i    ] = (p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 + p04 * k4 +
-                        p10 * k5 + p11 * k6 + p12 * k7 + p13 * k8 + p14 * k9);
-        buffd[i + 1] = (p01 * k0 + p02 * k1 + p03 * k2 + p04 * k3 + p05 * k4 +
-                        p11 * k5 + p12 * k6 + p13 * k7 + p14 * k8 + p15 * k9);
-
-        sp0 += chan2;
-        sp1 += chan2;
-        dp += chan2;
-      }
-
-      if (wid & 1) {
-        p00 = p02; p10 = p12;
-        p01 = p03; p11 = p13;
-        p02 = p04; p12 = p14;
-        p03 = p05; p13 = p15;
-
-        p04 = sp0[0];     p14 = sp1[0];
-
-        buffd[i] = (p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 + p04 * k4 +
-                    p10 * k5 + p11 * k6 + p12 * k7 + p13 * k8 + p14 * k9);
-      }
-
-      /*
-       *  Second loop
-       */
-      sp0 = sl + 2*sll;
-      sp1 = sp0 + sll;
-      dp = dl;
-
-      k0 = k[10]; k1 = k[11]; k2 = k[12]; k3 = k[13]; k4 = k[14];
-      k5 = k[15]; k6 = k[16]; k7 = k[17]; k8 = k[18]; k9 = k[19];
-
-      p02 = sp0[0];     p12 = sp1[0];
-      p03 = sp0[chan1]; p13 = sp1[chan1];
-      p04 = sp0[chan2]; p14 = sp1[chan2];
-      p05 = sp0[chan3]; p15 = sp1[chan3];
-
-      sp0 += chan4;
-      sp1 += chan4;
-
-#ifdef __SUNPRO_C
-#pragma pipeloop(0)
-#endif /* __SUNPRO_C */
-      for (i = 0; i <= (wid - 2); i += 2) {
-        p00 = p02; p10 = p12;
-        p01 = p03; p11 = p13;
-        p02 = p04; p12 = p14;
-        p03 = p05; p13 = p15;
-
-        p04 = sp0[0];     p14 = sp1[0];
-        p05 = sp0[chan1]; p15 = sp1[chan1];
-
-        buffd[i    ] += (p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 + p04 * k4 +
-                         p10 * k5 + p11 * k6 + p12 * k7 + p13 * k8 + p14 * k9);
-        buffd[i + 1] += (p01 * k0 + p02 * k1 + p03 * k2 + p04 * k3 + p05 * k4 +
-                         p11 * k5 + p12 * k6 + p13 * k7 + p14 * k8 + p15 * k9);
-
-        sp0 += chan2;
-        sp1 += chan2;
-        dp += chan2;
-      }
-
-      if (wid & 1) {
-        p00 = p02; p10 = p12;
-        p01 = p03; p11 = p13;
-        p02 = p04; p12 = p14;
-        p03 = p05; p13 = p15;
-
-        p04 = sp0[0];     p14 = sp1[0];
-
-        buffd[i] += (p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 + p04 * k4 +
-                     p10 * k5 + p11 * k6 + p12 * k7 + p13 * k8 + p14 * k9);
-      }
-
-      /*
-       *  3 loop
-       */
-      dp = dl;
-      sp0 = sl + 4*sll;
-
-      k0 = k[20]; k1 = k[21]; k2 = k[22]; k3 = k[23]; k4 = k[24];
-
-      p02 = sp0[0];
-      p03 = sp0[chan1];
-      p04 = sp0[chan2];
-      p05 = sp0[chan3];
-
-      sp0 += chan2 + chan2;
-
-#ifdef __SUNPRO_C
-#pragma pipeloop(0)
-#endif /* __SUNPRO_C */
-      for (i = 0; i <= (wid - 2); i += 2) {
-        p00 = p02; p01 = p03; p02 = p04; p03 = p05;
-
-        p04 = sp0[0]; p05 = sp0[chan1];
-
-        pix0 = (buffd[i    ] + p00 * k0 + p01 * k1 + p02 * k2 +
-                p03 * k3 + p04 * k4) >> shift2;
-        pix1 = (buffd[i + 1] + p01 * k0 + p02 * k1 + p03 * k2 +
-                p04 * k3 + p05 * k4) >> shift2;
-
-        CLAMP_STORE(dp[0],     pix0)
-        CLAMP_STORE(dp[chan1], pix1)
-
-        dp  += chan2;
-        sp0 += chan2;
-      }
-
-      if (wid & 1) {
-        p00 = p02; p01 = p03; p02 = p04; p03 = p05;
-
-        p04 = sp0[0];
-
-        pix0 = (buffd[i    ] + p00 * k0 + p01 * k1 + p02 * k2 +
-                p03 * k3 + p04 * k4) >> shift2;
-        CLAMP_STORE(dp[0],     pix0)
-      }
-
-      /* next line */
-      sl += sll;
-      dl += dll;
-    }
-  }
-
-  if (pbuff != buff) mlib_free(pbuff);
-
-  return MLIB_SUCCESS;
-}
-
-#endif /* __sparc ( for x86, using integer multiplies is faster ) */
-
-/***************************************************************/
-#if IMG_TYPE == 1
-
-#undef  KSIZE
-#define KSIZE 7
-
-mlib_status CONV_FUNC(7x7)(mlib_image       *dst,
-                           const mlib_image *src,
-                           const mlib_s32   *kern,
-                           mlib_s32         scalef_expon,
-                           mlib_s32         cmask)
-{
-  FTYPE    buff[(KSIZE + 3)*BUFF_LINE], *buffs[2*(KSIZE + 1)], *buffd;
-  FTYPE    k[KSIZE*KSIZE];
-  mlib_s32 l, m, buff_ind;
-  mlib_s32 d0, d1;
-  FTYPE    k0, k1, k2, k3, k4, k5, k6;
-  FTYPE    p0, p1, p2, p3, p4, p5, p6, p7;
-  DTYPE *sl2, *sl3, *sl4, *sl5, *sl6;
-  DEF_VARS(DTYPE);
-  DTYPE *sl1;
-  mlib_s32 chan2;
-  mlib_s32 *buffo, *buffi;
-  LOAD_KERNEL(KSIZE*KSIZE);
-  GET_SRC_DST_PARAMETERS(DTYPE);
-
-  if (wid > BUFF_LINE) {
-    pbuff = mlib_malloc((KSIZE + 3)*sizeof(FTYPE)*wid);
-
-    if (pbuff == NULL) return MLIB_FAILURE;
-  }
-
-  for (l = 0; l < KSIZE + 1; l++) buffs[l] = pbuff + l*wid;
-  for (l = 0; l < KSIZE + 1; l++) buffs[l + (KSIZE + 1)] = buffs[l];
-  buffd = buffs[KSIZE] + wid;
-  buffo = (mlib_s32*)(buffd + wid);
-  buffi = buffo + (wid &~ 1);
-
-  chan1 = nchannel;
-  chan2 = chan1 + chan1;
-
-  wid -= (KSIZE - 1);
-  hgt -= (KSIZE - 1);
-
-  adr_dst += ((KSIZE - 1)/2)*(dll + chan1);
-
-  for (c = 0; c < nchannel; c++) {
-    if (!(cmask & (1 << (nchannel - 1 - c)))) continue;
-
-    sl = adr_src + c;
-    dl = adr_dst + c;
-
-    sl1 = sl  + sll;
-    sl2 = sl1 + sll;
-    sl3 = sl2 + sll;
-    sl4 = sl3 + sll;
-    sl5 = sl4 + sll;
-    sl6 = sl5 + sll;
-#ifdef __SUNPRO_C
-#pragma pipeloop(0)
-#endif /* __SUNPRO_C */
-    for (i = 0; i < wid + (KSIZE - 1); i++) {
-      buffs[0][i] = (FTYPE)sl[i*chan1];
-      buffs[1][i] = (FTYPE)sl1[i*chan1];
-      buffs[2][i] = (FTYPE)sl2[i*chan1];
-      buffs[3][i] = (FTYPE)sl3[i*chan1];
-      buffs[4][i] = (FTYPE)sl4[i*chan1];
-      buffs[5][i] = (FTYPE)sl5[i*chan1];
-      buffs[6][i] = (FTYPE)sl6[i*chan1];
-    }
-
-    buff_ind = 0;
-
-#ifdef __SUNPRO_C
-#pragma pipeloop(0)
-#endif /* __SUNPRO_C */
-    for (i = 0; i < wid; i++) buffd[i] = 0.0;
-
-    sl += KSIZE*sll;
-
-    for (j = 0; j < hgt; j++) {
-      FTYPE    **buffc = buffs + buff_ind;
-      FTYPE    *buffn = buffc[KSIZE];
-      FTYPE    *pk = k;
-
-      for (l = 0; l < KSIZE; l++) {
-        FTYPE    *buff = buffc[l];
-        d64_2x32 dd;
-
-        sp = sl;
-        dp = dl;
-
-        p2 = buff[0]; p3 = buff[1]; p4 = buff[2];
-        p5 = buff[3]; p6 = buff[4]; p7 = buff[5];
-
-        k0 = *pk++; k1 = *pk++; k2 = *pk++; k3 = *pk++;
-        k4 = *pk++; k5 = *pk++; k6 = *pk++;
-
-        if (l < (KSIZE - 1)) {
-#ifdef __SUNPRO_C
-#pragma pipeloop(0)
-#endif /* __SUNPRO_C */
-          for (i = 0; i <= (wid - 2); i += 2) {
-            p0 = p2; p1 = p3; p2 = p4; p3 = p5; p4 = p6; p5 = p7;
-
-            p6 = buff[i + 6]; p7 = buff[i + 7];
-
-            buffd[i    ] += p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + p5*k5 + p6*k6;
-            buffd[i + 1] += p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + p6*k5 + p7*k6;
-          }
-
-        } else {
-#ifdef __SUNPRO_C
-#pragma pipeloop(0)
-#endif /* __SUNPRO_C */
-          for (i = 0; i <= (wid - 2); i += 2) {
-            p0 = p2; p1 = p3; p2 = p4; p3 = p5; p4 = p6; p5 = p7;
-
-            p6 = buff[i + 6]; p7 = buff[i + 7];
-
-            LOAD_BUFF(buffi);
-
-            dd.d64 = *(FTYPE   *)(buffi + i);
-            buffn[i    ] = (FTYPE)dd.i32s.i0;
-            buffn[i + 1] = (FTYPE)dd.i32s.i1;
-
-            d0 = D2I(p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + p5*k5 + p6*k6 + buffd[i    ]);
-            d1 = D2I(p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + p6*k5 + p7*k6 + buffd[i + 1]);
-
-            dp[0    ] = FROM_S32(d0);
-            dp[chan1] = FROM_S32(d1);
-
-            buffd[i    ] = 0.0;
-            buffd[i + 1] = 0.0;
-
-            sp += chan2;
-            dp += chan2;
-          }
-        }
-      }
-
-      /* last pixels */
-      for (; i < wid; i++) {
-        FTYPE    *pk = k, s = 0;
-        mlib_s32 d0;
-
-        for (l = 0; l < KSIZE; l++) {
-          FTYPE    *buff = buffc[l] + i;
-
-          for (m = 0; m < KSIZE; m++) s += buff[m] * (*pk++);
-        }
-
-        d0 = D2I(s);
-        dp[0] = FROM_S32(d0);
-
-        buffn[i] = (FTYPE)sp[0];
-
-        sp += chan1;
-        dp += chan1;
-      }
-
-      for (l = 0; l < (KSIZE - 1); l++) buffn[wid + l] = sp[l*chan1];
-
-      /* next line */
-      sl += sll;
-      dl += dll;
-
-      buff_ind++;
-
-      if (buff_ind >= KSIZE + 1) buff_ind = 0;
-    }
-  }
-
-  if (pbuff != buff) mlib_free(pbuff);
-
-  return MLIB_SUCCESS;
-}
-
-#endif /* IMG_TYPE == 1 */
-
-/***************************************************************/
 #define MAX_KER   7
 #define MAX_N    15
 
--- a/jdk/src/java.desktop/share/native/libmlib_image/mlib_ImageConv_D64nw.c	Thu May 12 11:03:07 2016 -0700
+++ b/jdk/src/java.desktop/share/native/libmlib_image/mlib_ImageConv_D64nw.c	Fri May 13 11:31:05 2016 +0300
@@ -75,495 +75,6 @@
   mlib_s32 i = 0, j, c
 
 /***************************************************************/
-#undef  KSIZE
-#define KSIZE 2
-
-mlib_status CONV_FUNC(2x2)(mlib_image       *dst,
-                           const mlib_image *src,
-                           const mlib_d64   *kern,
-                           mlib_s32         cmask)
-{
-  DEF_VARS(DTYPE);
-  DTYPE    *sp0, *sp1;
-  mlib_s32 chan2 = chan1 + chan1;
-  mlib_s32 chan3 = chan1 + chan2;
-  mlib_s32 chan4 = chan3 + chan1;
-  DTYPE k0, k1, k2, k3;
-  DTYPE p00, p01, p02, p03, p04,
-        p10, p11, p12, p13, p14;
-
-  /* keep kernel in regs */
-  k0 = (DTYPE)kern[0];  k1 = (DTYPE)kern[1];
-  k2 = (DTYPE)kern[2];  k3 = (DTYPE)kern[3];
-
-  wid -= (KSIZE - 1);
-  hgt -= (KSIZE - 1);
-
-  for (c = 0; c < chan1; c++) {
-    if (!(cmask & (1 << (chan1 - 1 - c)))) continue;
-
-    dl = adr_dst + c;
-    sl = adr_src + c;
-
-    for (j = 0; j < hgt; j++) {
-      dp  = dl;
-      sp0 = sl;
-      sp1 = sp0 + sll;
-
-      p04 = sp0[0];
-      p14 = sp1[0];
-
-      sp0 += chan1;
-      sp1 += chan1;
-
-#ifdef __SUNPRO_C
-#pragma pipeloop(0)
-#endif /* __SUNPRO_C */
-      for (i = 0; i <= (wid - 4); i += 4) {
-        p00 = p04; p10 = p14;
-
-        p01 = sp0[0];     p11 = sp1[0];
-        p02 = sp0[chan1]; p12 = sp1[chan1];
-        p03 = sp0[chan2]; p13 = sp1[chan2];
-        p04 = sp0[chan3]; p14 = sp1[chan3];
-
-        dp[0    ] = p00 * k0 + p01 * k1 + p10 * k2 + p11 * k3;
-        dp[chan1] = p01 * k0 + p02 * k1 + p11 * k2 + p12 * k3;
-        dp[chan2] = p02 * k0 + p03 * k1 + p12 * k2 + p13 * k3;
-        dp[chan3] = p03 * k0 + p04 * k1 + p13 * k2 + p14 * k3;
-
-        dp  += chan4;
-        sp0 += chan4;
-        sp1 += chan4;
-      }
-
-      if (i < wid) {
-        p00 = p04;    p10 = p14;
-        p01 = sp0[0]; p11 = sp1[0];
-        dp[0] = p00 * k0 + p01 * k1 + p10 * k2 + p11 * k3;
-
-        if ((i + 1) < wid) {
-          p02 = sp0[chan1]; p12 = sp1[chan1];
-          dp[chan1] = p01 * k0 + p02 * k1 + p11 * k2 + p12 * k3;
-
-          if ((i + 2) < wid) {
-            p03 = sp0[chan2]; p13 = sp1[chan2];
-            dp[chan2] = p02 * k0 + p03 * k1 + p12 * k2 + p13 * k3;
-          }
-        }
-      }
-
-      sl += sll;
-      dl += dll;
-    }
-  }
-
-  return MLIB_SUCCESS;
-}
-
-/***************************************************************/
-#undef  KSIZE
-#define KSIZE 3
-
-mlib_status CONV_FUNC(3x3)(mlib_image       *dst,
-                           const mlib_image *src,
-                           const mlib_d64   *kern,
-                           mlib_s32         cmask)
-{
-  DEF_VARS(DTYPE);
-  mlib_s32 chan2 = chan1 + chan1;
-  DTYPE    *sp0, *sp1;
-  DTYPE *sp2;
-  DTYPE k0, k1, k2, k3, k4, k5, k6, k7, k8;
-  DTYPE p02, p03, p12, p13, p22, p23;
-
-  /* keep kernel in regs */
-  k0 = (DTYPE)kern[0];  k1 = (DTYPE)kern[1];  k2 = (DTYPE)kern[2];
-  k3 = (DTYPE)kern[3];  k4 = (DTYPE)kern[4];  k5 = (DTYPE)kern[5];
-  k6 = (DTYPE)kern[6];  k7 = (DTYPE)kern[7];  k8 = (DTYPE)kern[8];
-
-  wid -= (KSIZE - 1);
-  hgt -= (KSIZE - 1);
-
-  adr_dst += ((KSIZE - 1)/2)*(dll + chan1);
-
-  for (c = 0; c < chan1; c++) {
-    if (!(cmask & (1 << (chan1 - 1 - c)))) continue;
-
-    sl = adr_src + c;
-    dl = adr_dst + c;
-
-    for (j = 0; j < hgt; j++) {
-      DTYPE s0, s1;
-
-      dp  = dl;
-      sp0 = sl;
-      sp1 = sp0 + sll;
-      sp2 = sp1 + sll;
-
-      p02 = sp0[0];
-      p12 = sp1[0];
-      p22 = sp2[0];
-
-      p03 = sp0[chan1];
-      p13 = sp1[chan1];
-      p23 = sp2[chan1];
-
-      s0 = p02 * k0 + p03 * k1 + p12 * k3 + p13 * k4 + p22 * k6 + p23 * k7;
-      s1 = p03 * k0 + p13 * k3 + p23 * k6;
-
-      sp0 += chan2;
-      sp1 += chan2;
-      sp2 += chan2;
-
-#ifdef __SUNPRO_C
-#pragma pipeloop(0)
-#endif /* __SUNPRO_C */
-      for (i = 0; i <= (wid - 2); i += 2) {
-        p02 = sp0[0];     p12 = sp1[0];     p22 = sp2[0];
-        p03 = sp0[chan1]; p13 = sp1[chan1]; p23 = sp2[chan1];
-
-        dp[0    ] = s0 + p02 * k2 + p12 * k5 + p22 * k8;
-        dp[chan1] = s1 + p02 * k1 + p03 * k2 + p12 * k4 + p13 * k5 + p22 * k7 + p23 * k8;
-
-        s0 = p02 * k0 + p03 * k1 + p12 * k3 + p13 * k4 + p22 * k6 + p23 * k7;
-        s1 = p03 * k0 + p13 * k3 + p23 * k6;
-
-        sp0 += chan2;
-        sp1 += chan2;
-        sp2 += chan2;
-        dp += chan2;
-      }
-
-      if (wid & 1) {
-        p02 = sp0[0]; p12 = sp1[0]; p22 = sp2[0];
-        dp[0] = s0 + p02 * k2 + p12 * k5 + p22 * k8;
-      }
-
-      sl += sll;
-      dl += dll;
-    }
-  }
-
-  return MLIB_SUCCESS;
-}
-
-/***************************************************************/
-#undef  KSIZE
-#define KSIZE 4
-
-mlib_status CONV_FUNC(4x4)(mlib_image       *dst,
-                           const mlib_image *src,
-                           const mlib_d64   *k,
-                           mlib_s32         cmask)
-{
-  DTYPE k0, k1, k2, k3, k4, k5, k6, k7;
-  DTYPE p00, p01, p02, p03, p04,
-        p10, p11, p12, p13, p14;
-  DEF_VARS(DTYPE);
-  DTYPE    *sp0, *sp1;
-  mlib_s32 chan2 = chan1 + chan1;
-  mlib_s32 chan3 = chan1 + chan2;
-
-  wid -= (KSIZE - 1);
-  hgt -= (KSIZE - 1);
-
-  adr_dst += ((KSIZE - 1)/2)*(dll + chan1);
-
-  for (c = 0; c < chan1; c++) {
-    if (!(cmask & (1 << (chan1 - 1 - c)))) continue;
-
-    sl = adr_src + c;
-    dl = adr_dst + c;
-
-    for (j = 0; j < hgt; j++) {
-      /*
-       *  First loop on two first lines of kernel
-       */
-      sp0 = sl;
-      sp1 = sp0 + sll;
-      dp = dl;
-
-      k0 = (DTYPE)k[0]; k1 = (DTYPE)k[1]; k2 = (DTYPE)k[2]; k3 = (DTYPE)k[3];
-      k4 = (DTYPE)k[4]; k5 = (DTYPE)k[5]; k6 = (DTYPE)k[6]; k7 = (DTYPE)k[7];
-
-      p02 = sp0[0];     p12 = sp1[0];
-      p03 = sp0[chan1]; p13 = sp1[chan1];
-      p04 = sp0[chan2]; p14 = sp1[chan2];
-
-      sp0 += chan3;
-      sp1 += chan3;
-
-#ifdef __SUNPRO_C
-#pragma pipeloop(0)
-#endif /* __SUNPRO_C */
-      for (i = 0; i <= (wid - 2); i += 2) {
-        p00 = p02; p10 = p12;
-        p01 = p03; p11 = p13;
-        p02 = p04; p12 = p14;
-
-        p03 = sp0[0];     p13 = sp1[0];
-        p04 = sp0[chan1]; p14 = sp1[chan1];
-
-        dp[0    ] = (p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 +
-                     p10 * k4 + p11 * k5 + p12 * k6 + p13 * k7);
-        dp[chan1] = (p01 * k0 + p02 * k1 + p03 * k2 + p04 * k3 +
-                     p11 * k4 + p12 * k5 + p13 * k6 + p14 * k7);
-
-        sp0 += chan2;
-        sp1 += chan2;
-        dp += chan2;
-      }
-
-      if (wid & 1) {
-        p00 = p02; p10 = p12;
-        p01 = p03; p11 = p13;
-        p02 = p04; p12 = p14;
-        p03 = sp0[0]; p13 = sp1[0];
-
-        dp[0] = (p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 +
-                 p10 * k4 + p11 * k5 + p12 * k6 + p13 * k7);
-      }
-
-      /*
-       *  Second loop on two last lines of kernel
-       */
-      sp0 = sl + 2*sll;
-      sp1 = sp0 + sll;
-      dp = dl;
-
-      k0 = (DTYPE)k[ 8]; k1 = (DTYPE)k[ 9]; k2 = (DTYPE)k[10]; k3 = (DTYPE)k[11];
-      k4 = (DTYPE)k[12]; k5 = (DTYPE)k[13]; k6 = (DTYPE)k[14]; k7 = (DTYPE)k[15];
-
-      p02 = sp0[0];     p12 = sp1[0];
-      p03 = sp0[chan1]; p13 = sp1[chan1];
-      p04 = sp0[chan2]; p14 = sp1[chan2];
-
-      sp0 += chan3;
-      sp1 += chan3;
-
-#ifdef __SUNPRO_C
-#pragma pipeloop(0)
-#endif /* __SUNPRO_C */
-      for (i = 0; i <= (wid - 2); i += 2) {
-        p00 = p02; p10 = p12;
-        p01 = p03; p11 = p13;
-        p02 = p04; p12 = p14;
-
-        p03 = sp0[0];     p13 = sp1[0];
-        p04 = sp0[chan1]; p14 = sp1[chan1];
-
-        dp[0    ] += (p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 +
-                      p10 * k4 + p11 * k5 + p12 * k6 + p13 * k7);
-        dp[chan1] += (p01 * k0 + p02 * k1 + p03 * k2 + p04 * k3 +
-                      p11 * k4 + p12 * k5 + p13 * k6 + p14 * k7);
-
-        sp0 += chan2;
-        sp1 += chan2;
-        dp += chan2;
-      }
-
-      if (wid & 1) {
-        p00 = p02; p10 = p12;
-        p01 = p03; p11 = p13;
-        p02 = p04; p12 = p14;
-        p03 = sp0[0]; p13 = sp1[0];
-
-        dp[0] += (p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 +
-                  p10 * k4 + p11 * k5 + p12 * k6 + p13 * k7);
-      }
-
-      /* next line */
-      sl += sll;
-      dl += dll;
-    }
-  }
-
-  return MLIB_SUCCESS;
-}
-
-/***************************************************************/
-#undef  KSIZE
-#define KSIZE 5
-
-mlib_status CONV_FUNC(5x5)(mlib_image       *dst,
-                           const mlib_image *src,
-                           const mlib_d64   *k,
-                           mlib_s32         cmask)
-{
-  DTYPE k0, k1, k2, k3, k4, k5, k6, k7, k8, k9;
-  DTYPE p00, p01, p02, p03, p04, p05,
-        p10, p11, p12, p13, p14, p15;
-  DEF_VARS(DTYPE);
-  DTYPE    *sp0, *sp1;
-  mlib_s32 chan2 = chan1 + chan1;
-  mlib_s32 chan3 = chan1 + chan2;
-  mlib_s32 chan4 = chan3 + chan1;
-
-  wid -= (KSIZE - 1);
-  hgt -= (KSIZE - 1);
-
-  adr_dst += ((KSIZE - 1)/2)*(dll + chan1);
-
-  for (c = 0; c < chan1; c++) {
-    if (!(cmask & (1 << (chan1 - 1 - c)))) continue;
-
-    sl = adr_src + c;
-    dl = adr_dst + c;
-
-    for (j = 0; j < hgt; j++) {
-      /*
-       *  First loop
-       */
-      sp0 = sl;
-      sp1 = sp0 + sll;
-      dp = dl;
-
-      k0 = (DTYPE)k[0]; k1 = (DTYPE)k[1]; k2 = (DTYPE)k[2]; k3 = (DTYPE)k[3]; k4 = (DTYPE)k[4];
-      k5 = (DTYPE)k[5]; k6 = (DTYPE)k[6]; k7 = (DTYPE)k[7]; k8 = (DTYPE)k[8]; k9 = (DTYPE)k[9];
-
-      p02 = sp0[0];     p12 = sp1[0];
-      p03 = sp0[chan1]; p13 = sp1[chan1];
-      p04 = sp0[chan2]; p14 = sp1[chan2];
-      p05 = sp0[chan3]; p15 = sp1[chan3];
-
-      sp0 += chan4;
-      sp1 += chan4;
-
-#ifdef __SUNPRO_C
-#pragma pipeloop(0)
-#endif /* __SUNPRO_C */
-      for (i = 0; i <= (wid - 2); i += 2) {
-        p00 = p02; p10 = p12;
-        p01 = p03; p11 = p13;
-        p02 = p04; p12 = p14;
-        p03 = p05; p13 = p15;
-
-        p04 = sp0[0];     p14 = sp1[0];
-        p05 = sp0[chan1]; p15 = sp1[chan1];
-
-        dp[    0] = (p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 + p04 * k4 +
-                     p10 * k5 + p11 * k6 + p12 * k7 + p13 * k8 + p14 * k9);
-        dp[chan1] = (p01 * k0 + p02 * k1 + p03 * k2 + p04 * k3 + p05 * k4 +
-                     p11 * k5 + p12 * k6 + p13 * k7 + p14 * k8 + p15 * k9);
-
-        sp0 += chan2;
-        sp1 += chan2;
-        dp += chan2;
-      }
-
-      if (wid & 1) {
-        p00 = p02; p10 = p12;
-        p01 = p03; p11 = p13;
-        p02 = p04; p12 = p14;
-        p03 = p05; p13 = p15;
-
-        p04 = sp0[0];     p14 = sp1[0];
-
-        dp[0] = (p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 + p04 * k4 +
-                 p10 * k5 + p11 * k6 + p12 * k7 + p13 * k8 + p14 * k9);
-      }
-
-      /*
-       *  Second loop
-       */
-      sp0 = sl + 2*sll;
-      sp1 = sp0 + sll;
-      dp = dl;
-
-      k0 = (DTYPE)k[10]; k1 = (DTYPE)k[11]; k2 = (DTYPE)k[12]; k3 = (DTYPE)k[13]; k4 = (DTYPE)k[14];
-      k5 = (DTYPE)k[15]; k6 = (DTYPE)k[16]; k7 = (DTYPE)k[17]; k8 = (DTYPE)k[18]; k9 = (DTYPE)k[19];
-
-      p02 = sp0[0];     p12 = sp1[0];
-      p03 = sp0[chan1]; p13 = sp1[chan1];
-      p04 = sp0[chan2]; p14 = sp1[chan2];
-      p05 = sp0[chan3]; p15 = sp1[chan3];
-
-      sp0 += chan4;
-      sp1 += chan4;
-
-#ifdef __SUNPRO_C
-#pragma pipeloop(0)
-#endif /* __SUNPRO_C */
-      for (i = 0; i <= (wid - 2); i += 2) {
-        p00 = p02; p10 = p12;
-        p01 = p03; p11 = p13;
-        p02 = p04; p12 = p14;
-        p03 = p05; p13 = p15;
-
-        p04 = sp0[0];     p14 = sp1[0];
-        p05 = sp0[chan1]; p15 = sp1[chan1];
-
-        dp[    0] += (p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 + p04 * k4 +
-                      p10 * k5 + p11 * k6 + p12 * k7 + p13 * k8 + p14 * k9);
-        dp[chan1] += (p01 * k0 + p02 * k1 + p03 * k2 + p04 * k3 + p05 * k4 +
-                      p11 * k5 + p12 * k6 + p13 * k7 + p14 * k8 + p15 * k9);
-
-        sp0 += chan2;
-        sp1 += chan2;
-        dp += chan2;
-      }
-
-      if (wid & 1) {
-        p00 = p02; p10 = p12;
-        p01 = p03; p11 = p13;
-        p02 = p04; p12 = p14;
-        p03 = p05; p13 = p15;
-
-        p04 = sp0[0];     p14 = sp1[0];
-
-        dp[0] += (p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 + p04 * k4 +
-                  p10 * k5 + p11 * k6 + p12 * k7 + p13 * k8 + p14 * k9);
-      }
-
-      /*
-       *  3 loop
-       */
-      dp = dl;
-      sp0 = sl + 4*sll;
-
-      k0 = (DTYPE)k[20]; k1 = (DTYPE)k[21]; k2 = (DTYPE)k[22]; k3 = (DTYPE)k[23]; k4 = (DTYPE)k[24];
-
-      p02 = sp0[0];
-      p03 = sp0[chan1];
-      p04 = sp0[chan2];
-      p05 = sp0[chan3];
-
-      sp0 += chan2 + chan2;
-
-#ifdef __SUNPRO_C
-#pragma pipeloop(0)
-#endif /* __SUNPRO_C */
-      for (i = 0; i <= (wid - 2); i += 2) {
-        p00 = p02; p01 = p03; p02 = p04; p03 = p05;
-
-        p04 = sp0[0]; p05 = sp0[chan1];
-
-        dp[0    ] += p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 + p04 * k4;
-        dp[chan1] += p01 * k0 + p02 * k1 + p03 * k2 + p04 * k3 + p05 * k4;
-
-        dp  += chan2;
-        sp0 += chan2;
-      }
-
-      if (wid & 1) {
-        p00 = p02; p01 = p03; p02 = p04; p03 = p05;
-
-        p04 = sp0[0];
-
-        dp[0] += p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 + p04 * k4;
-      }
-
-      /* next line */
-      sl += sll;
-      dl += dll;
-    }
-  }
-
-  return MLIB_SUCCESS;
-}
-
-/***************************************************************/
 #define BUFF_SIZE  1600
 
 #define CACHE_SIZE (64*1024)
--- a/jdk/src/java.desktop/share/native/libmlib_image/mlib_ImageConv_F32nw.c	Thu May 12 11:03:07 2016 -0700
+++ b/jdk/src/java.desktop/share/native/libmlib_image/mlib_ImageConv_F32nw.c	Fri May 13 11:31:05 2016 +0300
@@ -75,495 +75,6 @@
   mlib_s32 i, j, c
 
 /***************************************************************/
-#undef  KSIZE
-#define KSIZE 2
-
-mlib_status CONV_FUNC(2x2)(mlib_image       *dst,
-                           const mlib_image *src,
-                           const mlib_d64   *kern,
-                           mlib_s32         cmask)
-{
-  DEF_VARS(DTYPE);
-  DTYPE    *sp0, *sp1;
-  mlib_s32 chan2 = chan1 + chan1;
-  mlib_s32 chan3 = chan1 + chan2;
-  mlib_s32 chan4 = chan3 + chan1;
-  DTYPE k0, k1, k2, k3;
-  DTYPE p00, p01, p02, p03, p04,
-        p10, p11, p12, p13, p14;
-
-  /* keep kernel in regs */
-  k0 = (DTYPE)kern[0];  k1 = (DTYPE)kern[1];
-  k2 = (DTYPE)kern[2];  k3 = (DTYPE)kern[3];
-
-  wid -= (KSIZE - 1);
-  hgt -= (KSIZE - 1);
-
-  for (c = 0; c < chan1; c++) {
-    if (!(cmask & (1 << (chan1 - 1 - c)))) continue;
-
-    dl = adr_dst + c;
-    sl = adr_src + c;
-
-    for (j = 0; j < hgt; j++) {
-      dp  = dl;
-      sp0 = sl;
-      sp1 = sp0 + sll;
-
-      p04 = sp0[0];
-      p14 = sp1[0];
-
-      sp0 += chan1;
-      sp1 += chan1;
-
-#ifdef __SUNPRO_C
-#pragma pipeloop(0)
-#endif /* __SUNPRO_C */
-      for (i = 0; i <= (wid - 4); i += 4) {
-        p00 = p04; p10 = p14;
-
-        p01 = sp0[0];     p11 = sp1[0];
-        p02 = sp0[chan1]; p12 = sp1[chan1];
-        p03 = sp0[chan2]; p13 = sp1[chan2];
-        p04 = sp0[chan3]; p14 = sp1[chan3];
-
-        dp[0    ] = p00 * k0 + p01 * k1 + p10 * k2 + p11 * k3;
-        dp[chan1] = p01 * k0 + p02 * k1 + p11 * k2 + p12 * k3;
-        dp[chan2] = p02 * k0 + p03 * k1 + p12 * k2 + p13 * k3;
-        dp[chan3] = p03 * k0 + p04 * k1 + p13 * k2 + p14 * k3;
-
-        dp  += chan4;
-        sp0 += chan4;
-        sp1 += chan4;
-      }
-
-      if (i < wid) {
-        p00 = p04;    p10 = p14;
-        p01 = sp0[0]; p11 = sp1[0];
-        dp[0] = p00 * k0 + p01 * k1 + p10 * k2 + p11 * k3;
-
-        if ((i + 1) < wid) {
-          p02 = sp0[chan1]; p12 = sp1[chan1];
-          dp[chan1] = p01 * k0 + p02 * k1 + p11 * k2 + p12 * k3;
-
-          if ((i + 2) < wid) {
-            p03 = sp0[chan2]; p13 = sp1[chan2];
-            dp[chan2] = p02 * k0 + p03 * k1 + p12 * k2 + p13 * k3;
-          }
-        }
-      }
-
-      sl += sll;
-      dl += dll;
-    }
-  }
-
-  return MLIB_SUCCESS;
-}
-
-/***************************************************************/
-#undef  KSIZE
-#define KSIZE 3
-
-mlib_status CONV_FUNC(3x3)(mlib_image       *dst,
-                           const mlib_image *src,
-                           const mlib_d64   *kern,
-                           mlib_s32         cmask)
-{
-  DEF_VARS(DTYPE);
-  mlib_s32 chan2 = chan1 + chan1;
-  DTYPE    *sp0, *sp1;
-  DTYPE *sp2;
-  DTYPE k0, k1, k2, k3, k4, k5, k6, k7, k8;
-  DTYPE p02, p03, p12, p13, p22, p23;
-
-  /* keep kernel in regs */
-  k0 = (DTYPE)kern[0];  k1 = (DTYPE)kern[1];  k2 = (DTYPE)kern[2];
-  k3 = (DTYPE)kern[3];  k4 = (DTYPE)kern[4];  k5 = (DTYPE)kern[5];
-  k6 = (DTYPE)kern[6];  k7 = (DTYPE)kern[7];  k8 = (DTYPE)kern[8];
-
-  wid -= (KSIZE - 1);
-  hgt -= (KSIZE - 1);
-
-  adr_dst += ((KSIZE - 1)/2)*(dll + chan1);
-
-  for (c = 0; c < chan1; c++) {
-    if (!(cmask & (1 << (chan1 - 1 - c)))) continue;
-
-    sl = adr_src + c;
-    dl = adr_dst + c;
-
-    for (j = 0; j < hgt; j++) {
-      DTYPE s0, s1;
-
-      dp  = dl;
-      sp0 = sl;
-      sp1 = sp0 + sll;
-      sp2 = sp1 + sll;
-
-      p02 = sp0[0];
-      p12 = sp1[0];
-      p22 = sp2[0];
-
-      p03 = sp0[chan1];
-      p13 = sp1[chan1];
-      p23 = sp2[chan1];
-
-      s0 = p02 * k0 + p03 * k1 + p12 * k3 + p13 * k4 + p22 * k6 + p23 * k7;
-      s1 = p03 * k0 + p13 * k3 + p23 * k6;
-
-      sp0 += chan2;
-      sp1 += chan2;
-      sp2 += chan2;
-
-#ifdef __SUNPRO_C
-#pragma pipeloop(0)
-#endif /* __SUNPRO_C */
-      for (i = 0; i <= (wid - 2); i += 2) {
-        p02 = sp0[0];     p12 = sp1[0];     p22 = sp2[0];
-        p03 = sp0[chan1]; p13 = sp1[chan1]; p23 = sp2[chan1];
-
-        dp[0    ] = s0 + p02 * k2 + p12 * k5 + p22 * k8;
-        dp[chan1] = s1 + p02 * k1 + p03 * k2 + p12 * k4 + p13 * k5 + p22 * k7 + p23 * k8;
-
-        s0 = p02 * k0 + p03 * k1 + p12 * k3 + p13 * k4 + p22 * k6 + p23 * k7;
-        s1 = p03 * k0 + p13 * k3 + p23 * k6;
-
-        sp0 += chan2;
-        sp1 += chan2;
-        sp2 += chan2;
-        dp += chan2;
-      }
-
-      if (wid & 1) {
-        p02 = sp0[0]; p12 = sp1[0]; p22 = sp2[0];
-        dp[0] = s0 + p02 * k2 + p12 * k5 + p22 * k8;
-      }
-
-      sl += sll;
-      dl += dll;
-    }
-  }
-
-  return MLIB_SUCCESS;
-}
-
-/***************************************************************/
-#undef  KSIZE
-#define KSIZE 4
-
-mlib_status CONV_FUNC(4x4)(mlib_image       *dst,
-                           const mlib_image *src,
-                           const mlib_d64   *k,
-                           mlib_s32         cmask)
-{
-  DTYPE k0, k1, k2, k3, k4, k5, k6, k7;
-  DTYPE p00, p01, p02, p03, p04,
-        p10, p11, p12, p13, p14;
-  DEF_VARS(DTYPE);
-  DTYPE    *sp0, *sp1;
-  mlib_s32 chan2 = chan1 + chan1;
-  mlib_s32 chan3 = chan1 + chan2;
-
-  wid -= (KSIZE - 1);
-  hgt -= (KSIZE - 1);
-
-  adr_dst += ((KSIZE - 1)/2)*(dll + chan1);
-
-  for (c = 0; c < chan1; c++) {
-    if (!(cmask & (1 << (chan1 - 1 - c)))) continue;
-
-    sl = adr_src + c;
-    dl = adr_dst + c;
-
-    for (j = 0; j < hgt; j++) {
-      /*
-       *  First loop on two first lines of kernel
-       */
-      sp0 = sl;
-      sp1 = sp0 + sll;
-      dp = dl;
-
-      k0 = (DTYPE)k[0]; k1 = (DTYPE)k[1]; k2 = (DTYPE)k[2]; k3 = (DTYPE)k[3];
-      k4 = (DTYPE)k[4]; k5 = (DTYPE)k[5]; k6 = (DTYPE)k[6]; k7 = (DTYPE)k[7];
-
-      p02 = sp0[0];     p12 = sp1[0];
-      p03 = sp0[chan1]; p13 = sp1[chan1];
-      p04 = sp0[chan2]; p14 = sp1[chan2];
-
-      sp0 += chan3;
-      sp1 += chan3;
-
-#ifdef __SUNPRO_C
-#pragma pipeloop(0)
-#endif /* __SUNPRO_C */
-      for (i = 0; i <= (wid - 2); i += 2) {
-        p00 = p02; p10 = p12;
-        p01 = p03; p11 = p13;
-        p02 = p04; p12 = p14;
-
-        p03 = sp0[0];     p13 = sp1[0];
-        p04 = sp0[chan1]; p14 = sp1[chan1];
-
-        dp[0    ] = (p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 +
-                     p10 * k4 + p11 * k5 + p12 * k6 + p13 * k7);
-        dp[chan1] = (p01 * k0 + p02 * k1 + p03 * k2 + p04 * k3 +
-                     p11 * k4 + p12 * k5 + p13 * k6 + p14 * k7);
-
-        sp0 += chan2;
-        sp1 += chan2;
-        dp += chan2;
-      }
-
-      if (wid & 1) {
-        p00 = p02; p10 = p12;
-        p01 = p03; p11 = p13;
-        p02 = p04; p12 = p14;
-        p03 = sp0[0]; p13 = sp1[0];
-
-        dp[0] = (p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 +
-                 p10 * k4 + p11 * k5 + p12 * k6 + p13 * k7);
-      }
-
-      /*
-       *  Second loop on two last lines of kernel
-       */
-      sp0 = sl + 2*sll;
-      sp1 = sp0 + sll;
-      dp = dl;
-
-      k0 = (DTYPE)k[ 8]; k1 = (DTYPE)k[ 9]; k2 = (DTYPE)k[10]; k3 = (DTYPE)k[11];
-      k4 = (DTYPE)k[12]; k5 = (DTYPE)k[13]; k6 = (DTYPE)k[14]; k7 = (DTYPE)k[15];
-
-      p02 = sp0[0];     p12 = sp1[0];
-      p03 = sp0[chan1]; p13 = sp1[chan1];
-      p04 = sp0[chan2]; p14 = sp1[chan2];
-
-      sp0 += chan3;
-      sp1 += chan3;
-
-#ifdef __SUNPRO_C
-#pragma pipeloop(0)
-#endif /* __SUNPRO_C */
-      for (i = 0; i <= (wid - 2); i += 2) {
-        p00 = p02; p10 = p12;
-        p01 = p03; p11 = p13;
-        p02 = p04; p12 = p14;
-
-        p03 = sp0[0];     p13 = sp1[0];
-        p04 = sp0[chan1]; p14 = sp1[chan1];
-
-        dp[0    ] += (p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 +
-                      p10 * k4 + p11 * k5 + p12 * k6 + p13 * k7);
-        dp[chan1] += (p01 * k0 + p02 * k1 + p03 * k2 + p04 * k3 +
-                      p11 * k4 + p12 * k5 + p13 * k6 + p14 * k7);
-
-        sp0 += chan2;
-        sp1 += chan2;
-        dp += chan2;
-      }
-
-      if (wid & 1) {
-        p00 = p02; p10 = p12;
-        p01 = p03; p11 = p13;
-        p02 = p04; p12 = p14;
-        p03 = sp0[0]; p13 = sp1[0];
-
-        dp[0] += (p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 +
-                  p10 * k4 + p11 * k5 + p12 * k6 + p13 * k7);
-      }
-
-      /* next line */
-      sl += sll;
-      dl += dll;
-    }
-  }
-
-  return MLIB_SUCCESS;
-}
-
-/***************************************************************/
-#undef  KSIZE
-#define KSIZE 5
-
-mlib_status CONV_FUNC(5x5)(mlib_image       *dst,
-                           const mlib_image *src,
-                           const mlib_d64   *k,
-                           mlib_s32         cmask)
-{
-  DTYPE k0, k1, k2, k3, k4, k5, k6, k7, k8, k9;
-  DTYPE p00, p01, p02, p03, p04, p05,
-        p10, p11, p12, p13, p14, p15;
-  DEF_VARS(DTYPE);
-  DTYPE    *sp0, *sp1;
-  mlib_s32 chan2 = chan1 + chan1;
-  mlib_s32 chan3 = chan1 + chan2;
-  mlib_s32 chan4 = chan3 + chan1;
-
-  wid -= (KSIZE - 1);
-  hgt -= (KSIZE - 1);
-
-  adr_dst += ((KSIZE - 1)/2)*(dll + chan1);
-
-  for (c = 0; c < chan1; c++) {
-    if (!(cmask & (1 << (chan1 - 1 - c)))) continue;
-
-    sl = adr_src + c;
-    dl = adr_dst + c;
-
-    for (j = 0; j < hgt; j++) {
-      /*
-       *  First loop
-       */
-      sp0 = sl;
-      sp1 = sp0 + sll;
-      dp = dl;
-
-      k0 = (DTYPE)k[0]; k1 = (DTYPE)k[1]; k2 = (DTYPE)k[2]; k3 = (DTYPE)k[3]; k4 = (DTYPE)k[4];
-      k5 = (DTYPE)k[5]; k6 = (DTYPE)k[6]; k7 = (DTYPE)k[7]; k8 = (DTYPE)k[8]; k9 = (DTYPE)k[9];
-
-      p02 = sp0[0];     p12 = sp1[0];
-      p03 = sp0[chan1]; p13 = sp1[chan1];
-      p04 = sp0[chan2]; p14 = sp1[chan2];
-      p05 = sp0[chan3]; p15 = sp1[chan3];
-
-      sp0 += chan4;
-      sp1 += chan4;
-
-#ifdef __SUNPRO_C
-#pragma pipeloop(0)
-#endif /* __SUNPRO_C */
-      for (i = 0; i <= (wid - 2); i += 2) {
-        p00 = p02; p10 = p12;
-        p01 = p03; p11 = p13;
-        p02 = p04; p12 = p14;
-        p03 = p05; p13 = p15;
-
-        p04 = sp0[0];     p14 = sp1[0];
-        p05 = sp0[chan1]; p15 = sp1[chan1];
-
-        dp[    0] = (p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 + p04 * k4 +
-                     p10 * k5 + p11 * k6 + p12 * k7 + p13 * k8 + p14 * k9);
-        dp[chan1] = (p01 * k0 + p02 * k1 + p03 * k2 + p04 * k3 + p05 * k4 +
-                     p11 * k5 + p12 * k6 + p13 * k7 + p14 * k8 + p15 * k9);
-
-        sp0 += chan2;
-        sp1 += chan2;
-        dp += chan2;
-      }
-
-      if (wid & 1) {
-        p00 = p02; p10 = p12;
-        p01 = p03; p11 = p13;
-        p02 = p04; p12 = p14;
-        p03 = p05; p13 = p15;
-
-        p04 = sp0[0];     p14 = sp1[0];
-
-        dp[0] = (p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 + p04 * k4 +
-                 p10 * k5 + p11 * k6 + p12 * k7 + p13 * k8 + p14 * k9);
-      }
-
-      /*
-       *  Second loop
-       */
-      sp0 = sl + 2*sll;
-      sp1 = sp0 + sll;
-      dp = dl;
-
-      k0 = (DTYPE)k[10]; k1 = (DTYPE)k[11]; k2 = (DTYPE)k[12]; k3 = (DTYPE)k[13]; k4 = (DTYPE)k[14];
-      k5 = (DTYPE)k[15]; k6 = (DTYPE)k[16]; k7 = (DTYPE)k[17]; k8 = (DTYPE)k[18]; k9 = (DTYPE)k[19];
-
-      p02 = sp0[0];     p12 = sp1[0];
-      p03 = sp0[chan1]; p13 = sp1[chan1];
-      p04 = sp0[chan2]; p14 = sp1[chan2];
-      p05 = sp0[chan3]; p15 = sp1[chan3];
-
-      sp0 += chan4;
-      sp1 += chan4;
-
-#ifdef __SUNPRO_C
-#pragma pipeloop(0)
-#endif /* __SUNPRO_C */
-      for (i = 0; i <= (wid - 2); i += 2) {
-        p00 = p02; p10 = p12;
-        p01 = p03; p11 = p13;
-        p02 = p04; p12 = p14;
-        p03 = p05; p13 = p15;
-
-        p04 = sp0[0];     p14 = sp1[0];
-        p05 = sp0[chan1]; p15 = sp1[chan1];
-
-        dp[    0] += (p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 + p04 * k4 +
-                      p10 * k5 + p11 * k6 + p12 * k7 + p13 * k8 + p14 * k9);
-        dp[chan1] += (p01 * k0 + p02 * k1 + p03 * k2 + p04 * k3 + p05 * k4 +
-                      p11 * k5 + p12 * k6 + p13 * k7 + p14 * k8 + p15 * k9);
-
-        sp0 += chan2;
-        sp1 += chan2;
-        dp += chan2;
-      }
-
-      if (wid & 1) {
-        p00 = p02; p10 = p12;
-        p01 = p03; p11 = p13;
-        p02 = p04; p12 = p14;
-        p03 = p05; p13 = p15;
-
-        p04 = sp0[0];     p14 = sp1[0];
-
-        dp[0] += (p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 + p04 * k4 +
-                  p10 * k5 + p11 * k6 + p12 * k7 + p13 * k8 + p14 * k9);
-      }
-
-      /*
-       *  3 loop
-       */
-      dp = dl;
-      sp0 = sl + 4*sll;
-
-      k0 = (DTYPE)k[20]; k1 = (DTYPE)k[21]; k2 = (DTYPE)k[22]; k3 = (DTYPE)k[23]; k4 = (DTYPE)k[24];
-
-      p02 = sp0[0];
-      p03 = sp0[chan1];
-      p04 = sp0[chan2];
-      p05 = sp0[chan3];
-
-      sp0 += chan2 + chan2;
-
-#ifdef __SUNPRO_C
-#pragma pipeloop(0)
-#endif /* __SUNPRO_C */
-      for (i = 0; i <= (wid - 2); i += 2) {
-        p00 = p02; p01 = p03; p02 = p04; p03 = p05;
-
-        p04 = sp0[0]; p05 = sp0[chan1];
-
-        dp[0    ] += p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 + p04 * k4;
-        dp[chan1] += p01 * k0 + p02 * k1 + p03 * k2 + p04 * k3 + p05 * k4;
-
-        dp  += chan2;
-        sp0 += chan2;
-      }
-
-      if (wid & 1) {
-        p00 = p02; p01 = p03; p02 = p04; p03 = p05;
-
-        p04 = sp0[0];
-
-        dp[0] += p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 + p04 * k4;
-      }
-
-      /* next line */
-      sl += sll;
-      dl += dll;
-    }
-  }
-
-  return MLIB_SUCCESS;
-}
-
-/***************************************************************/
 #define BUFF_SIZE  1600
 
 #define CACHE_SIZE (64*1024)
--- a/jdk/src/java.desktop/share/native/libmlib_image/mlib_ImageConv_u16ext.c	Thu May 12 11:03:07 2016 -0700
+++ b/jdk/src/java.desktop/share/native/libmlib_image/mlib_ImageConv_u16ext.c	Fri May 13 11:31:05 2016 +0300
@@ -80,9 +80,6 @@
 #endif /* IMG_TYPE == 1 */
 
 /***************************************************************/
-#define KSIZE1 (KSIZE - 1)
-
-/***************************************************************/
 #define PARAM                                                   \
   mlib_image       *dst,                                        \
   const mlib_image *src,                                        \
@@ -163,9 +160,6 @@
 #endif /* _NO_LONGLONG */
 
 /***************************************************************/
-#define MLIB_D2_24 16777216.0f
-
-/***************************************************************/
 typedef union {
   mlib_d64 d64;
   struct {
@@ -175,9 +169,6 @@
 } d64_2x32;
 
 /***************************************************************/
-#define BUFF_LINE 256
-
-/***************************************************************/
 #define DEF_VARS(type)                                          \
   type     *adr_src, *sl, *sp, *sl1;                            \
   type     *adr_dst, *dl, *dp;                                  \
@@ -188,39 +179,6 @@
   mlib_s32 i, j, c, swid
 
 /***************************************************************/
-#define LOAD_KERNEL3()                                                   \
-  FTYPE    scalef = DSCALE;                                              \
-  FTYPE    k0, k1, k2, k3, k4, k5, k6, k7, k8;                           \
-  FTYPE    p00, p01, p02, p03,                                           \
-           p10, p11, p12, p13,                                           \
-           p20, p21, p22, p23;                                           \
-                                                                         \
-  while (scalef_expon > 30) {                                            \
-    scalef /= (1 << 30);                                                 \
-    scalef_expon -= 30;                                                  \
-  }                                                                      \
-                                                                         \
-  scalef /= (1 << scalef_expon);                                         \
-                                                                         \
-  /* keep kernel in regs */                                              \
-  k0 = scalef * kern[0];  k1 = scalef * kern[1];  k2 = scalef * kern[2]; \
-  k3 = scalef * kern[3];  k4 = scalef * kern[4];  k5 = scalef * kern[5]; \
-  k6 = scalef * kern[6];  k7 = scalef * kern[7];  k8 = scalef * kern[8]
-
-/***************************************************************/
-#define LOAD_KERNEL(SIZE)                                       \
-  FTYPE    scalef = DSCALE;                                     \
-                                                                \
-  while (scalef_expon > 30) {                                   \
-    scalef /= (1 << 30);                                        \
-    scalef_expon -= 30;                                         \
-  }                                                             \
-                                                                \
-  scalef /= (1 << scalef_expon);                                \
-                                                                \
-  for (j = 0; j < SIZE; j++) k[j] = scalef * kern[j]
-
-/***************************************************************/
 #define GET_SRC_DST_PARAMETERS(type)                            \
   hgt = mlib_ImageGetHeight(src);                               \
   wid = mlib_ImageGetWidth(src);                                \
@@ -278,1334 +236,6 @@
 #endif /* __sparc */
 
 /***************************************************************/
-#define KSIZE  3
-
-mlib_status CONV_FUNC(3x3)
-{
-  FTYPE    buff[(KSIZE + 2)*BUFF_LINE], *buff0, *buff1, *buff2, *buff3, *buffT;
-  DEF_VARS(DTYPE);
-  DTYPE *sl2;
-#ifndef __sparc
-  mlib_s32 d0, d1;
-#endif /* __sparc */
-  LOAD_KERNEL3();
-  GET_SRC_DST_PARAMETERS(DTYPE);
-
-  swid = wid + KSIZE1;
-
-  if (swid > BUFF_LINE) {
-    pbuff = mlib_malloc((KSIZE + 2)*sizeof(FTYPE   )*swid);
-
-    if (pbuff == NULL) return MLIB_FAILURE;
-  }
-
-  buff0 = pbuff;
-  buff1 = buff0 + swid;
-  buff2 = buff1 + swid;
-  buff3 = buff2 + swid;
-  buffo = (mlib_s32*)(buff3 + swid);
-  buffi = buffo + (swid &~ 1);
-
-  swid -= (dx_l + dx_r);
-
-  chan1 = nchannel;
-  chan2 = chan1 + chan1;
-
-  for (c = 0; c < nchannel; c++) {
-    if (!(cmask & (1 << (nchannel - 1 - c)))) continue;
-
-    sl = adr_src + c;
-    dl = adr_dst + c;
-
-    if ((1 > dy_t) && (1 < hgt + KSIZE1 - dy_b)) sl1 = sl + sll;
-    else sl1 = sl;
-
-    if ((hgt - dy_b) > 0) sl2 = sl1 + sll;
-    else sl2 = sl1;
-
-    for (i = 0; i < dx_l; i++) {
-      buff0[i] = (FTYPE)sl[0];
-      buff1[i] = (FTYPE)sl1[0];
-      buff2[i] = (FTYPE)sl2[0];
-    }
-
-#ifdef __SUNPRO_C
-#pragma pipeloop(0)
-#endif /* __SUNPRO_C */
-    for (i = 0; i < swid; i++) {
-      buff0[i + dx_l] = (FTYPE)sl[i*chan1];
-      buff1[i + dx_l] = (FTYPE)sl1[i*chan1];
-      buff2[i + dx_l] = (FTYPE)sl2[i*chan1];
-    }
-
-    for (i = 0; i < dx_r; i++) {
-      buff0[swid + dx_l + i] = buff0[swid + dx_l - 1];
-      buff1[swid + dx_l + i] = buff1[swid + dx_l - 1];
-      buff2[swid + dx_l + i] = buff2[swid + dx_l - 1];
-    }
-
-    if ((hgt - dy_b) > 1) sl = sl2 + sll;
-    else sl = sl2;
-
-    for (j = 0; j < hgt; j++) {
-      FTYPE    s0, s1;
-
-      p02 = buff0[0];
-      p12 = buff1[0];
-      p22 = buff2[0];
-
-      p03 = buff0[1];
-      p13 = buff1[1];
-      p23 = buff2[1];
-
-      s0 = p02 * k0 + p03 * k1 + p12 * k3 + p13 * k4 + p22 * k6 + p23 * k7;
-      s1 = p03 * k0 + p13 * k3 + p23 * k6;
-
-      sp = sl;
-      dp = dl;
-
-#ifdef __SUNPRO_C
-#pragma pipeloop(0)
-#endif /* __SUNPRO_C */
-      for (i = 0; i <= (wid - 2); i += 2) {
-#ifdef __sparc
-#ifdef _NO_LONGLONG
-        mlib_s32 o64_1, o64_2;
-#else /* _NO_LONGLONG */
-        mlib_s64 o64;
-#endif /* _NO_LONGLONG */
-#endif /* __sparc */
-        d64_2x32 dd;
-
-        p02 = buff0[i + 2]; p12 = buff1[i + 2]; p22 = buff2[i + 2];
-        p03 = buff0[i + 3]; p13 = buff1[i + 3]; p23 = buff2[i + 3];
-
-        LOAD_BUFF(buffi);
-
-        dd.d64 = *(FTYPE   *)(buffi + i);
-        buff3[i + dx_l    ] = (FTYPE)dd.i32s.i0;
-        buff3[i + dx_l + 1] = (FTYPE)dd.i32s.i1;
-
-#ifndef __sparc
-
-        d0 = D2I(s0 + p02 * k2 + p12 * k5 + p22 * k8);
-        d1 = D2I(s1 + p02 * k1 + p03 * k2 + p12 * k4 + p13 * k5 + p22 * k7 + p23 * k8);
-
-        s0 = p02 * k0 + p03 * k1 + p12 * k3 + p13 * k4 + p22 * k6 + p23 * k7;
-        s1 = p03 * k0 + p13 * k3 + p23 * k6;
-
-        dp[0    ] = FROM_S32(d0);
-        dp[chan1] = FROM_S32(d1);
-
-#else /* __sparc */
-
-        dd.i32s.i0 = D2I(s0 + p02 * k2 + p12 * k5 + p22 * k8);
-        dd.i32s.i1 = D2I(s1 + p02 * k1 + p03 * k2 + p12 * k4 + p13 * k5 + p22 * k7 + p23 * k8);
-        *(FTYPE   *)(buffo + i) = dd.d64;
-
-        s0 = p02 * k0 + p03 * k1 + p12 * k3 + p13 * k4 + p22 * k6 + p23 * k7;
-        s1 = p03 * k0 + p13 * k3 + p23 * k6;
-
-#ifdef _NO_LONGLONG
-
-        o64_1 = buffo[i];
-        o64_2 = buffo[i+1];
-#if IMG_TYPE != 1
-        STORE2(FROM_S32(o64_1), FROM_S32(o64_2));
-#else
-        STORE2(o64_1 >> 24, o64_2 >> 24);
-#endif /* IMG_TYPE != 1 */
-
-#else /* _NO_LONGLONG */
-
-        o64 = *(mlib_s64*)(buffo + i);
-#if IMG_TYPE != 1
-        STORE2(FROM_S32(o64 >> 32), FROM_S32(o64));
-#else
-        STORE2(o64 >> 56, o64 >> 24);
-#endif /* IMG_TYPE != 1 */
-#endif /* _NO_LONGLONG */
-#endif /* __sparc */
-
-        sp += chan2;
-        dp += chan2;
-      }
-
-      for (; i < wid; i++) {
-        p00 = buff0[i];     p10 = buff1[i];     p20 = buff2[i];
-        p01 = buff0[i + 1]; p11 = buff1[i + 1]; p21 = buff2[i + 1];
-        p02 = buff0[i + 2]; p12 = buff1[i + 2]; p22 = buff2[i + 2];
-
-        buffi[i] = (mlib_s32)sp[0];
-        buff3[i + dx_l] = (FTYPE)buffi[i];
-
-#ifndef __sparc
-
-        d0 = D2I(p00 * k0 + p01 * k1 + p02 * k2 + p10 * k3 + p11 * k4 +
-                 p12 * k5 + p20 * k6 + p21 * k7 + p22 * k8);
-
-        dp[0] = FROM_S32(d0);
-
-#else  /* __sparc */
-
-        buffo[i] = D2I(p00 * k0 + p01 * k1 + p02 * k2 + p10 * k3 + p11 * k4 +
-                       p12 * k5 + p20 * k6 + p21 * k7 + p22 * k8);
-#if IMG_TYPE != 1
-        dp[0] = FROM_S32(buffo[i]);
-#else
-        dp[0] = buffo[i] >> 24;
-#endif /* IMG_TYPE != 1 */
-#endif /* __sparc */
-
-        sp += chan1;
-        dp += chan1;
-      }
-
-      for (; i < swid; i++) {
-        buffi[i] = (mlib_s32)sp[0];
-        buff3[i + dx_l] = (FTYPE)buffi[i];
-        sp += chan1;
-      }
-
-      for (i = 0; i < dx_l; i++) buff3[i] = buff3[dx_l];
-      for (i = 0; i < dx_r; i++) buff3[swid + dx_l + i] = buff3[swid + dx_l - 1];
-
-      if (j < hgt - dy_b - 2) sl += sll;
-      dl += dll;
-
-      buffT = buff0;
-      buff0 = buff1;
-      buff1 = buff2;
-      buff2 = buff3;
-      buff3 = buffT;
-    }
-  }
-
-#ifdef __sparc
-#if IMG_TYPE == 1
-  {
-    mlib_s32 amask = (1 << nchannel) - 1;
-
-    if ((cmask & amask) != amask) {
-      mlib_ImageXor80(adr_dst, wid, hgt, dll, nchannel, cmask);
-    } else {
-      mlib_ImageXor80_aa(adr_dst, wid*nchannel, hgt, dll);
-    }
-  }
-
-#endif /* IMG_TYPE == 1 */
-#endif /* __sparc */
-
-  if (pbuff != buff) mlib_free(pbuff);
-
-  return MLIB_SUCCESS;
-}
-
-/***************************************************************/
-#ifndef __sparc /* for x86, using integer multiplies is faster */
-
-mlib_status CONV_FUNC_I(3x3)
-{
-  DTYPE    *adr_src, *sl, *sp0, *sp1, *sp2, *sp_1, *sp_2;
-  DTYPE    *adr_dst, *dl, *dp;
-  mlib_s32 wid, hgt, sll, dll;
-  mlib_s32 nchannel, chan1, chan2, delta_chan;
-  mlib_s32 i, j, c;
-  mlib_s32 shift1, shift2;
-  mlib_s32 k0, k1, k2, k3, k4, k5, k6, k7, k8;
-  mlib_s32 p02, p03,
-           p12, p13,
-           p22, p23;
-
-#if IMG_TYPE != 1
-  shift1 = 16;
-#else
-  shift1 = 8;
-#endif /* IMG_TYPE != 1 */
-
-  shift2 = scalef_expon - shift1;
-
-  /* keep kernel in regs */
-  k0 = kern[0] >> shift1;  k1 = kern[1] >> shift1;  k2 = kern[2] >> shift1;
-  k3 = kern[3] >> shift1;  k4 = kern[4] >> shift1;  k5 = kern[5] >> shift1;
-  k6 = kern[6] >> shift1;  k7 = kern[7] >> shift1;  k8 = kern[8] >> shift1;
-
-  GET_SRC_DST_PARAMETERS(DTYPE);
-
-  chan1 = nchannel;
-  chan2 = chan1 + chan1;
-  delta_chan = 0;
-
-  if ((1 > dx_l) && (1 < wid + KSIZE1 - dx_r)) delta_chan = chan1;
-
-  for (c = 0; c < chan1; c++) {
-    if (!(cmask & (1 << (chan1 - 1 - c)))) continue;
-
-    sl = adr_src + c;
-    dl = adr_dst + c;
-
-    sp_1 = sl;
-
-    if ((1 > dy_t) && (1 < hgt + KSIZE1 - dy_b)) sl += sll;
-    sp_2 = sl;
-
-    if ((hgt - dy_b) > 0) sl += sll;
-
-    for (j = 0; j < hgt; j++) {
-      mlib_s32 s0, s1;
-      mlib_s32 pix0, pix1;
-
-      dp  = dl;
-      sp0 = sp_1;
-      sp_1 = sp_2;
-      sp_2 = sl;
-
-      sp1 = sp_1;
-      sp2 = sp_2;
-
-      p02 = sp0[0];
-      p12 = sp1[0];
-      p22 = sp2[0];
-
-      p03 = sp0[delta_chan];
-      p13 = sp1[delta_chan];
-      p23 = sp2[delta_chan];
-
-      s0 = p02 * k0 + p03 * k1 + p12 * k3 + p13 * k4 + p22 * k6 + p23 * k7;
-      s1 = p03 * k0 + p13 * k3 + p23 * k6;
-
-      sp0 += (chan1 + delta_chan);
-      sp1 += (chan1 + delta_chan);
-      sp2 += (chan1 + delta_chan);
-
-#ifdef __SUNPRO_C
-#pragma pipeloop(0)
-#endif /* __SUNPRO_C */
-      for (i = 0; i <= (wid - dx_r - 2); i += 2) {
-        p02 = sp0[0];     p12 = sp1[0];     p22 = sp2[0];
-        p03 = sp0[chan1]; p13 = sp1[chan1]; p23 = sp2[chan1];
-
-        pix0 = (s0 + p02 * k2 + p12 * k5 + p22 * k8) >> shift2;
-        pix1 = (s1 + p02 * k1 + p03 * k2 + p12 * k4 +
-                p13 * k5 + p22 * k7 + p23 * k8) >> shift2;
-
-        CLAMP_STORE(dp[0],     pix0);
-        CLAMP_STORE(dp[chan1], pix1);
-
-        s0 = p02 * k0 + p03 * k1 + p12 * k3 + p13 * k4 + p22 * k6 + p23 * k7;
-        s1 = p03 * k0 + p13 * k3 + p23 * k6;
-
-        sp0 += chan2;
-        sp1 += chan2;
-        sp2 += chan2;
-        dp += chan2;
-      }
-
-      p02 = p03; p12 = p13; p22 = p23;
-
-      for (; i < wid - dx_r; i++) {
-        p03 = sp0[0]; p13 = sp1[0]; p23 = sp2[0];
-        pix0 = (s0 + p03 * k2 + p13 * k5 + p23 * k8) >> shift2;
-        CLAMP_STORE(dp[0], pix0);
-        s0 = p02 * k0 + p03 * k1 + p12 * k3 + p13 * k4 + p22 * k6 + p23 * k7;
-        p02 = p03; p12 = p13; p22 = p23;
-        sp0 += chan1;
-        sp1 += chan1;
-        sp2 += chan1;
-        dp += chan1;
-      }
-
-      sp0 -= chan1;
-      sp1 -= chan1;
-      sp2 -= chan1;
-
-      for (; i < wid; i++) {
-        p03 = sp0[0]; p13 = sp1[0]; p23 = sp2[0];
-        pix0 = (s0 + p03 * k2 + p13 * k5 + p23 * k8) >> shift2;
-        CLAMP_STORE(dp[0], pix0);
-        s0 = p02 * k0 + p03 * k1 + p12 * k3 + p13 * k4 + p22 * k6 + p23 * k7;
-        p02 = p03; p12 = p13; p22 = p23;
-        dp += chan1;
-      }
-
-      if (j < hgt - dy_b - 1) sl += sll;
-      dl += dll;
-    }
-  }
-
-  return MLIB_SUCCESS;
-}
-
-#endif /* __sparc ( for x86, using integer multiplies is faster ) */
-
-/***************************************************************/
-#undef  KSIZE
-#define KSIZE 4
-
-mlib_status CONV_FUNC(4x4)
-{
-  FTYPE    buff[(KSIZE + 3)*BUFF_LINE];
-  FTYPE    *buff0, *buff1, *buff2, *buff3, *buff4, *buffd, *buffT;
-  FTYPE    k[KSIZE*KSIZE];
-  mlib_s32 d0, d1;
-  FTYPE    k0, k1, k2, k3, k4, k5, k6, k7;
-  FTYPE    p00, p01, p02, p03, p04,
-           p10, p11, p12, p13, p14,
-           p20, p21, p22, p23,
-           p30, p31, p32, p33;
-  DEF_VARS(DTYPE);
-  DTYPE *sl2, *sl3;
-  LOAD_KERNEL(KSIZE*KSIZE);
-  GET_SRC_DST_PARAMETERS(DTYPE);
-
-  swid = wid + KSIZE1;
-
-  if (swid > BUFF_LINE) {
-    pbuff = mlib_malloc((KSIZE + 3)*sizeof(FTYPE   )*swid);
-
-    if (pbuff == NULL) return MLIB_FAILURE;
-  }
-
-  buff0 = pbuff;
-  buff1 = buff0 + swid;
-  buff2 = buff1 + swid;
-  buff3 = buff2 + swid;
-  buff4 = buff3 + swid;
-  buffd = buff4 + swid;
-  buffo = (mlib_s32*)(buffd + swid);
-  buffi = buffo + (swid &~ 1);
-
-  swid -= (dx_l + dx_r);
-
-  chan1 = nchannel;
-  chan2 = chan1 + chan1;
-
-  for (c = 0; c < nchannel; c++) {
-    if (!(cmask & (1 << (nchannel - 1 - c)))) continue;
-
-    sl = adr_src + c;
-    dl = adr_dst + c;
-
-    if ((1 > dy_t) && (1 < hgt + KSIZE1 - dy_b)) sl1 = sl + sll;
-    else sl1 = sl;
-
-    if ((2 > dy_t) && (2 < hgt + KSIZE1 - dy_b)) sl2 = sl1 + sll;
-    else sl2 = sl1;
-
-    if ((hgt - dy_b) > 0) sl3 = sl2 + sll;
-    else sl3 = sl2;
-
-    for (i = 0; i < dx_l; i++) {
-      buff0[i] = (FTYPE)sl[0];
-      buff1[i] = (FTYPE)sl1[0];
-      buff2[i] = (FTYPE)sl2[0];
-      buff3[i] = (FTYPE)sl3[0];
-    }
-
-#ifdef __SUNPRO_C
-#pragma pipeloop(0)
-#endif /* __SUNPRO_C */
-    for (i = 0; i < swid; i++) {
-      buff0[i + dx_l] = (FTYPE)sl[i*chan1];
-      buff1[i + dx_l] = (FTYPE)sl1[i*chan1];
-      buff2[i + dx_l] = (FTYPE)sl2[i*chan1];
-      buff3[i + dx_l] = (FTYPE)sl3[i*chan1];
-    }
-
-    for (i = 0; i < dx_r; i++) {
-      buff0[swid + dx_l + i] = buff0[swid + dx_l - 1];
-      buff1[swid + dx_l + i] = buff1[swid + dx_l - 1];
-      buff2[swid + dx_l + i] = buff2[swid + dx_l - 1];
-      buff3[swid + dx_l + i] = buff3[swid + dx_l - 1];
-    }
-
-    if ((hgt - dy_b) > 1) sl = sl3 + sll;
-    else sl = sl3;
-
-    for (j = 0; j < hgt; j++) {
-      d64_2x32 dd;
-
-      /*
-       *  First loop on two first lines of kernel
-       */
-      k0 = k[0]; k1 = k[1]; k2 = k[2]; k3 = k[3];
-      k4 = k[4]; k5 = k[5]; k6 = k[6]; k7 = k[7];
-
-      sp = sl;
-      dp = dl;
-
-      p02 = buff0[0];
-      p12 = buff1[0];
-      p03 = buff0[1];
-      p13 = buff1[1];
-      p04 = buff0[2];
-
-#ifdef __SUNPRO_C
-#pragma pipeloop(0)
-#endif /* __SUNPRO_C */
-      for (i = 0; i <= (wid - 2); i += 2) {
-        p00 = p02; p10 = p12;
-        p01 = p03; p11 = p13;
-        p02 = p04; p12 = buff1[i + 2];
-        p03 = buff0[i + 3]; p13 = buff1[i + 3];
-        p04 = buff0[i + 4]; p14 = buff1[i + 4];
-
-        LOAD_BUFF(buffi);
-
-        dd.d64 = *(FTYPE   *)(buffi + i);
-        buff4[i + dx_l    ] = (FTYPE)dd.i32s.i0;
-        buff4[i + dx_l + 1] = (FTYPE)dd.i32s.i1;
-
-        buffd[i    ] = (p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 +
-                        p10 * k4 + p11 * k5 + p12 * k6 + p13 * k7);
-        buffd[i + 1] = (p01 * k0 + p02 * k1 + p03 * k2 + p04 * k3 +
-                        p11 * k4 + p12 * k5 + p13 * k6 + p14 * k7);
-
-        sp += chan2;
-      }
-
-      /*
-       *  Second loop on two last lines of kernel
-       */
-      k0 = k[ 8]; k1 = k[ 9]; k2 = k[10]; k3 = k[11];
-      k4 = k[12]; k5 = k[13]; k6 = k[14]; k7 = k[15];
-
-      p02 = buff2[0];
-      p12 = buff3[0];
-      p03 = buff2[1];
-      p13 = buff3[1];
-      p04 = buff2[2];
-
-#ifdef __SUNPRO_C
-#pragma pipeloop(0)
-#endif /* __SUNPRO_C */
-      for (i = 0; i <= (wid - 2); i += 2) {
-        p00 = p02; p10 = p12;
-        p01 = p03; p11 = p13;
-        p02 = p04; p12 = buff3[i + 2];
-        p03 = buff2[i + 3]; p13 = buff3[i + 3];
-        p04 = buff2[i + 4]; p14 = buff3[i + 4];
-
-        d0 = D2I(p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 +
-                 p10 * k4 + p11 * k5 + p12 * k6 + p13 * k7 + buffd[i]);
-        d1 = D2I(p01 * k0 + p02 * k1 + p03 * k2 + p04 * k3 +
-                 p11 * k4 + p12 * k5 + p13 * k6 + p14 * k7 + buffd[i + 1]);
-
-        dp[0    ] = FROM_S32(d0);
-        dp[chan1] = FROM_S32(d1);
-
-        dp += chan2;
-      }
-
-      /* last pixels */
-      for (; i < wid; i++) {
-        p00 = buff0[i];     p10 = buff1[i];     p20 = buff2[i];     p30 = buff3[i];
-        p01 = buff0[i + 1]; p11 = buff1[i + 1]; p21 = buff2[i + 1]; p31 = buff3[i + 1];
-        p02 = buff0[i + 2]; p12 = buff1[i + 2]; p22 = buff2[i + 2]; p32 = buff3[i + 2];
-        p03 = buff0[i + 3]; p13 = buff1[i + 3]; p23 = buff2[i + 3]; p33 = buff3[i + 3];
-
-        buff4[i + dx_l] = (FTYPE)sp[0];
-
-        buffo[i] = D2I(p00 * k[0] + p01 * k[1] + p02 * k[2] + p03 * k[3] +
-                       p10 * k[4] + p11 * k[5] + p12 * k[6] + p13 * k[7] +
-                       p20 * k[ 8] + p21 * k[ 9] + p22 * k[10] + p23 * k[11] +
-                       p30 * k[12] + p31 * k[13] + p32 * k[14] + p33 * k[15]);
-
-        dp[0] = FROM_S32(buffo[i]);
-
-        sp += chan1;
-        dp += chan1;
-      }
-
-      for (; i < swid; i++) {
-        buff4[i + dx_l] = (FTYPE)sp[0];
-        sp += chan1;
-      }
-
-      for (i = 0; i < dx_l; i++) buff4[i] = buff4[dx_l];
-      for (i = 0; i < dx_r; i++) buff4[swid + dx_l + i] = buff4[swid + dx_l - 1];
-
-      /* next line */
-
-      if (j < hgt - dy_b - 2) sl += sll;
-      dl += dll;
-
-      buffT = buff0;
-      buff0 = buff1;
-      buff1 = buff2;
-      buff2 = buff3;
-      buff3 = buff4;
-      buff4 = buffT;
-    }
-  }
-
-  if (pbuff != buff) mlib_free(pbuff);
-
-  return MLIB_SUCCESS;
-}
-
-/***************************************************************/
-#undef  KSIZE
-#define KSIZE 5
-
-mlib_status CONV_FUNC(5x5)
-{
-  FTYPE    buff[(KSIZE + 3)*BUFF_LINE];
-  FTYPE    *buff0, *buff1, *buff2, *buff3, *buff4, *buff5, *buffd, *buffT;
-  FTYPE    k[KSIZE*KSIZE];
-  mlib_s32 d0, d1;
-  FTYPE    k0, k1, k2, k3, k4, k5, k6, k7, k8, k9;
-  FTYPE    p00, p01, p02, p03, p04, p05,
-           p10, p11, p12, p13, p14, p15,
-           p20, p21, p22, p23, p24,
-           p30, p31, p32, p33, p34,
-           p40, p41, p42, p43, p44;
-  DEF_VARS(DTYPE);
-  DTYPE *sl2, *sl3, *sl4;
-  LOAD_KERNEL(KSIZE*KSIZE);
-  GET_SRC_DST_PARAMETERS(DTYPE);
-
-  swid = wid + KSIZE1;
-
-  if (swid > BUFF_LINE) {
-    pbuff = mlib_malloc((KSIZE + 3)*sizeof(FTYPE   )*swid);
-
-    if (pbuff == NULL) return MLIB_FAILURE;
-  }
-
-  buff0 = pbuff;
-  buff1 = buff0 + swid;
-  buff2 = buff1 + swid;
-  buff3 = buff2 + swid;
-  buff4 = buff3 + swid;
-  buff5 = buff4 + swid;
-  buffd = buff5 + swid;
-  buffo = (mlib_s32*)(buffd + swid);
-  buffi = buffo + (swid &~ 1);
-
-  swid -= (dx_l + dx_r);
-
-  chan1 = nchannel;
-  chan2 = chan1 + chan1;
-
-  for (c = 0; c < nchannel; c++) {
-    if (!(cmask & (1 << (nchannel - 1 - c)))) continue;
-
-    sl = adr_src + c;
-    dl = adr_dst + c;
-
-    if ((1 > dy_t) && (1 < hgt + KSIZE1 - dy_b)) sl1 = sl + sll;
-    else sl1 = sl;
-
-    if ((2 > dy_t) && (2 < hgt + KSIZE1 - dy_b)) sl2 = sl1 + sll;
-    else sl2 = sl1;
-
-    if ((3 > dy_t) && (3 < hgt + KSIZE1 - dy_b)) sl3 = sl2 + sll;
-    else sl3 = sl2;
-
-    if ((hgt - dy_b) > 0) sl4 = sl3 + sll;
-    else sl4 = sl3;
-
-    for (i = 0; i < dx_l; i++) {
-      buff0[i] = (FTYPE)sl[0];
-      buff1[i] = (FTYPE)sl1[0];
-      buff2[i] = (FTYPE)sl2[0];
-      buff3[i] = (FTYPE)sl3[0];
-      buff4[i] = (FTYPE)sl4[0];
-    }
-
-#ifdef __SUNPRO_C
-#pragma pipeloop(0)
-#endif /* __SUNPRO_C */
-    for (i = 0; i < swid; i++) {
-      buff0[i + dx_l] = (FTYPE)sl[i*chan1];
-      buff1[i + dx_l] = (FTYPE)sl1[i*chan1];
-      buff2[i + dx_l] = (FTYPE)sl2[i*chan1];
-      buff3[i + dx_l] = (FTYPE)sl3[i*chan1];
-      buff4[i + dx_l] = (FTYPE)sl4[i*chan1];
-    }
-
-    for (i = 0; i < dx_r; i++) {
-      buff0[swid + dx_l + i] = buff0[swid + dx_l - 1];
-      buff1[swid + dx_l + i] = buff1[swid + dx_l - 1];
-      buff2[swid + dx_l + i] = buff2[swid + dx_l - 1];
-      buff3[swid + dx_l + i] = buff3[swid + dx_l - 1];
-      buff4[swid + dx_l + i] = buff4[swid + dx_l - 1];
-    }
-
-    if ((hgt - dy_b) > 1) sl = sl4 + sll;
-    else sl = sl4;
-
-    for (j = 0; j < hgt; j++) {
-      d64_2x32 dd;
-
-      /*
-       *  First loop
-       */
-      k0 = k[0]; k1 = k[1]; k2 = k[2]; k3 = k[3]; k4 = k[4];
-      k5 = k[5]; k6 = k[6]; k7 = k[7]; k8 = k[8]; k9 = k[9];
-
-      sp = sl;
-      dp = dl;
-
-      p02 = buff0[0];
-      p12 = buff1[0];
-      p03 = buff0[1];
-      p13 = buff1[1];
-      p04 = buff0[2];
-      p14 = buff1[2];
-
-#ifdef __SUNPRO_C
-#pragma pipeloop(0)
-#endif /* __SUNPRO_C */
-      for (i = 0; i <= (wid - 2); i += 2) {
-        p00 = p02; p10 = p12;
-        p01 = p03; p11 = p13;
-        p02 = p04; p12 = p14;
-
-        LOAD_BUFF(buffi);
-
-        p03 = buff0[i + 3]; p13 = buff1[i + 3];
-        p04 = buff0[i + 4]; p14 = buff1[i + 4];
-        p05 = buff0[i + 5]; p15 = buff1[i + 5];
-
-        buffd[i    ] = (p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 + p04 * k4 +
-                        p10 * k5 + p11 * k6 + p12 * k7 + p13 * k8 + p14 * k9);
-        buffd[i + 1] = (p01 * k0 + p02 * k1 + p03 * k2 + p04 * k3 + p05 * k4 +
-                        p11 * k5 + p12 * k6 + p13 * k7 + p14 * k8 + p15 * k9);
-
-        sp += chan2;
-      }
-
-      /*
-       *  Second loop
-       */
-      k0 = k[10]; k1 = k[11]; k2 = k[12]; k3 = k[13]; k4 = k[14];
-      k5 = k[15]; k6 = k[16]; k7 = k[17]; k8 = k[18]; k9 = k[19];
-
-      p02 = buff2[0];
-      p12 = buff3[0];
-      p03 = buff2[1];
-      p13 = buff3[1];
-
-#ifdef __SUNPRO_C
-#pragma pipeloop(0)
-#endif /* __SUNPRO_C */
-      for (i = 0; i <= (wid - 2); i += 2) {
-        p00 = p02; p10 = p12;
-        p01 = p03; p11 = p13;
-
-        p02 = buff2[i + 2]; p12 = buff3[i + 2];
-        p03 = buff2[i + 3]; p13 = buff3[i + 3];
-        p04 = buff2[i + 4]; p14 = buff3[i + 4];
-        p05 = buff2[i + 5]; p15 = buff3[i + 5];
-
-        dd.d64 = *(FTYPE   *)(buffi + i);
-        buff5[i + dx_l    ] = (FTYPE)dd.i32s.i0;
-        buff5[i + dx_l + 1] = (FTYPE)dd.i32s.i1;
-
-        buffd[i    ] += (p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 + p04 * k4 +
-                         p10 * k5 + p11 * k6 + p12 * k7 + p13 * k8 + p14 * k9);
-        buffd[i + 1] += (p01 * k0 + p02 * k1 + p03 * k2 + p04 * k3 + p05 * k4 +
-                         p11 * k5 + p12 * k6 + p13 * k7 + p14 * k8 + p15 * k9);
-      }
-
-      /*
-       *  3 loop
-       */
-      k0 = k[20]; k1 = k[21]; k2 = k[22]; k3 = k[23]; k4 = k[24];
-
-      p02 = buff4[0];
-      p03 = buff4[1];
-      p04 = buff4[2];
-      p05 = buff4[3];
-
-#ifdef __SUNPRO_C
-#pragma pipeloop(0)
-#endif /* __SUNPRO_C */
-      for (i = 0; i <= (wid - 2); i += 2) {
-        p00 = p02; p01 = p03; p02 = p04; p03 = p05;
-
-        p04 = buff4[i + 4]; p05 = buff4[i + 5];
-
-        d0 = D2I(p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 + p04 * k4 + buffd[i]);
-        d1 = D2I(p01 * k0 + p02 * k1 + p03 * k2 + p04 * k3 + p05 * k4 + buffd[i + 1]);
-
-        dp[0    ] = FROM_S32(d0);
-        dp[chan1] = FROM_S32(d1);
-
-        dp += chan2;
-      }
-
-      /* last pixels */
-      for (; i < wid; i++) {
-        p00 = buff0[i];     p10 = buff1[i];     p20 = buff2[i];     p30 = buff3[i];
-        p01 = buff0[i + 1]; p11 = buff1[i + 1]; p21 = buff2[i + 1]; p31 = buff3[i + 1];
-        p02 = buff0[i + 2]; p12 = buff1[i + 2]; p22 = buff2[i + 2]; p32 = buff3[i + 2];
-        p03 = buff0[i + 3]; p13 = buff1[i + 3]; p23 = buff2[i + 3]; p33 = buff3[i + 3];
-        p04 = buff0[i + 4]; p14 = buff1[i + 4]; p24 = buff2[i + 4]; p34 = buff3[i + 4];
-
-        p40 = buff4[i];     p41 = buff4[i + 1]; p42 = buff4[i + 2];
-        p43 = buff4[i + 3]; p44 = buff4[i + 4];
-
-        buff5[i + dx_l] = (FTYPE)sp[0];
-
-        buffo[i] = D2I(p00 * k[0] + p01 * k[1] + p02 * k[2] + p03 * k[3] + p04 * k[4] +
-                       p10 * k[5] + p11 * k[6] + p12 * k[7] + p13 * k[8] + p14 * k[9] +
-                       p20 * k[10] + p21 * k[11] + p22 * k[12] + p23 * k[13] + p24 * k[14] +
-                       p30 * k[15] + p31 * k[16] + p32 * k[17] + p33 * k[18] + p34 * k[19] +
-                       p40 * k[20] + p41 * k[21] + p42 * k[22] + p43 * k[23] + p44 * k[24]);
-
-        dp[0] = FROM_S32(buffo[i]);
-
-        sp += chan1;
-        dp += chan1;
-      }
-
-      for (; i < swid; i++) {
-        buff5[i + dx_l] = (FTYPE)sp[0];
-        sp += chan1;
-      }
-
-      for (i = 0; i < dx_l; i++) buff5[i] = buff5[dx_l];
-      for (i = 0; i < dx_r; i++) buff5[swid + dx_l + i] = buff5[swid + dx_l - 1];
-
-      /* next line */
-
-      if (j < hgt - dy_b - 2) sl += sll;
-      dl += dll;
-
-      buffT = buff0;
-      buff0 = buff1;
-      buff1 = buff2;
-      buff2 = buff3;
-      buff3 = buff4;
-      buff4 = buff5;
-      buff5 = buffT;
-    }
-  }
-
-  if (pbuff != buff) mlib_free(pbuff);
-
-  return MLIB_SUCCESS;
-}
-
-/***************************************************************/
-#ifndef __sparc /* for x86, using integer multiplies is faster */
-
-mlib_status CONV_FUNC_I(5x5)
-{
-  mlib_s32 buff[BUFF_LINE];
-  mlib_s32 *buffd;
-  mlib_s32 k[KSIZE*KSIZE];
-  mlib_s32 shift1, shift2;
-  mlib_s32 k0, k1, k2, k3, k4, k5, k6, k7, k8, k9;
-  mlib_s32 p00, p01, p02, p03, p04, p05,
-           p10, p11, p12, p13, p14, p15;
-  DTYPE    *adr_src, *sl, *sp0, *sp1, *sp2, *sp3, *sp4;
-  DTYPE    *sp_1, *sp_2, *sp_3, *sp_4;
-  DTYPE    *adr_dst, *dl, *dp;
-  mlib_s32 *pbuff = buff;
-  mlib_s32 wid, hgt, sll, dll;
-  mlib_s32 nchannel, chan1, chan2, chan4;
-  mlib_s32 delta_chan1, delta_chan2, delta_chan3;
-  mlib_s32 i, j, c;
-
-#if IMG_TYPE != 1
-  shift1 = 16;
-#else
-  shift1 = 8;
-#endif /* IMG_TYPE != 1 */
-
-  shift2 = scalef_expon - shift1;
-
-  for (j = 0; j < KSIZE*KSIZE; j++) k[j] = kern[j] >> shift1;
-
-  GET_SRC_DST_PARAMETERS(DTYPE);
-
-  if (wid > BUFF_LINE) {
-    pbuff = mlib_malloc(sizeof(mlib_s32)*wid);
-
-    if (pbuff == NULL) return MLIB_FAILURE;
-  }
-
-  buffd = pbuff;
-
-  chan1 = nchannel;
-  chan2 = chan1 + chan1;
-
-  if ((1 > dx_l) && (1 < wid + KSIZE1 - dx_r)) delta_chan1 = chan1;
-  else delta_chan1 = 0;
-
-  if ((2 > dx_l) && (2 < wid + KSIZE1 - dx_r)) delta_chan2 = delta_chan1 + chan1;
-  else delta_chan2 = delta_chan1;
-
-  if ((3 > dx_l) && (3 < wid + KSIZE1 - dx_r)) delta_chan3 = delta_chan2 + chan1;
-  else delta_chan3 = delta_chan2;
-
-  chan4 = chan1 + delta_chan3;
-
-  for (c = 0; c < chan1; c++) {
-    if (!(cmask & (1 << (chan1 - 1 - c)))) continue;
-
-    sl = adr_src + c;
-    dl = adr_dst + c;
-
-    sp_1 = sl;
-
-    if ((1 > dy_t) && (1 < hgt + KSIZE1 - dy_b)) sl += sll;
-    sp_2 = sl;
-
-    if ((2 > dy_t) && (2 < hgt + KSIZE1 - dy_b)) sl += sll;
-    sp_3 = sl;
-
-    if ((3 > dy_t) && (3 < hgt + KSIZE1 - dy_b)) sl += sll;
-    sp_4 = sl;
-
-    if ((hgt - dy_b) > 0) sl += sll;
-
-    for (j = 0; j < hgt; j++) {
-      mlib_s32 pix0, pix1;
-
-      dp  = dl;
-      sp0 = sp_1;
-      sp_1 = sp_2;
-      sp_2 = sp_3;
-      sp_3 = sp_4;
-      sp_4 = sl;
-
-      sp1 = sp_1;
-      sp2 = sp_2;
-      sp3 = sp_3;
-      sp4 = sp_4;
-
-      /*
-       *  First loop
-       */
-
-      k0 = k[0]; k1 = k[1]; k2 = k[2]; k3 = k[3]; k4 = k[4];
-      k5 = k[5]; k6 = k[6]; k7 = k[7]; k8 = k[8]; k9 = k[9];
-
-      p02 = sp0[0];           p12 = sp1[0];
-      p03 = sp0[delta_chan1]; p13 = sp1[delta_chan1];
-      p04 = sp0[delta_chan2]; p14 = sp1[delta_chan2];
-      p05 = sp0[delta_chan3]; p15 = sp1[delta_chan3];
-
-      sp0 += chan4;
-      sp1 += chan4;
-
-#ifdef __SUNPRO_C
-#pragma pipeloop(0)
-#endif /* __SUNPRO_C */
-      for (i = 0; i <= (wid - dx_r - 2); i += 2) {
-        p00 = p02; p10 = p12;
-        p01 = p03; p11 = p13;
-        p02 = p04; p12 = p14;
-        p03 = p05; p13 = p15;
-
-        p04 = sp0[0];     p14 = sp1[0];
-        p05 = sp0[chan1]; p15 = sp1[chan1];
-
-        buffd[i    ] = (p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 + p04 * k4 +
-                        p10 * k5 + p11 * k6 + p12 * k7 + p13 * k8 + p14 * k9);
-        buffd[i + 1] = (p01 * k0 + p02 * k1 + p03 * k2 + p04 * k3 + p05 * k4 +
-                        p11 * k5 + p12 * k6 + p13 * k7 + p14 * k8 + p15 * k9);
-
-        sp0 += chan2;
-        sp1 += chan2;
-      }
-
-      p01 = p02; p02 = p03; p03 = p04; p04 = p05;
-      p11 = p12; p12 = p13; p13 = p14; p14 = p15;
-
-      for (; i < wid - dx_r; i++) {
-        p00 = p01; p10 = p11;
-        p01 = p02; p11 = p12;
-        p02 = p03; p12 = p13;
-        p03 = p04; p13 = p14;
-
-        p04 = sp0[0];     p14 = sp1[0];
-
-        buffd[i] = (p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 + p04 * k4 +
-                    p10 * k5 + p11 * k6 + p12 * k7 + p13 * k8 + p14 * k9);
-
-        sp0 += chan1;
-        sp1 += chan1;
-      }
-
-      sp0 -= chan1;
-      sp1 -= chan1;
-
-      for (; i < wid; i++) {
-        p00 = p01; p10 = p11;
-        p01 = p02; p11 = p12;
-        p02 = p03; p12 = p13;
-        p03 = p04; p13 = p14;
-
-        p04 = sp0[0];     p14 = sp1[0];
-
-        buffd[i] = (p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 + p04 * k4 +
-                    p10 * k5 + p11 * k6 + p12 * k7 + p13 * k8 + p14 * k9);
-      }
-
-      /*
-       *  Second loop
-       */
-
-      k0 = k[10]; k1 = k[11]; k2 = k[12]; k3 = k[13]; k4 = k[14];
-      k5 = k[15]; k6 = k[16]; k7 = k[17]; k8 = k[18]; k9 = k[19];
-
-      p02 = sp2[0];           p12 = sp3[0];
-      p03 = sp2[delta_chan1]; p13 = sp3[delta_chan1];
-      p04 = sp2[delta_chan2]; p14 = sp3[delta_chan2];
-      p05 = sp2[delta_chan3]; p15 = sp3[delta_chan3];
-
-      sp2 += chan4;
-      sp3 += chan4;
-
-#ifdef __SUNPRO_C
-#pragma pipeloop(0)
-#endif /* __SUNPRO_C */
-      for (i = 0; i <= (wid - dx_r - 2); i += 2) {
-        p00 = p02; p10 = p12;
-        p01 = p03; p11 = p13;
-        p02 = p04; p12 = p14;
-        p03 = p05; p13 = p15;
-
-        p04 = sp2[0];     p14 = sp3[0];
-        p05 = sp2[chan1]; p15 = sp3[chan1];
-
-        buffd[i    ] += (p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 + p04 * k4 +
-                         p10 * k5 + p11 * k6 + p12 * k7 + p13 * k8 + p14 * k9);
-        buffd[i + 1] += (p01 * k0 + p02 * k1 + p03 * k2 + p04 * k3 + p05 * k4 +
-                         p11 * k5 + p12 * k6 + p13 * k7 + p14 * k8 + p15 * k9);
-
-        sp2 += chan2;
-        sp3 += chan2;
-      }
-
-      p01 = p02; p02 = p03; p03 = p04; p04 = p05;
-      p11 = p12; p12 = p13; p13 = p14; p14 = p15;
-
-      for (; i < wid - dx_r; i++) {
-        p00 = p01; p10 = p11;
-        p01 = p02; p11 = p12;
-        p02 = p03; p12 = p13;
-        p03 = p04; p13 = p14;
-
-        p04 = sp2[0];     p14 = sp3[0];
-
-        buffd[i] += (p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 + p04 * k4 +
-                     p10 * k5 + p11 * k6 + p12 * k7 + p13 * k8 + p14 * k9);
-
-        sp2 += chan1;
-        sp3 += chan1;
-      }
-
-      sp2 -= chan1;
-      sp3 -= chan1;
-
-      for (; i < wid; i++) {
-        p00 = p01; p10 = p11;
-        p01 = p02; p11 = p12;
-        p02 = p03; p12 = p13;
-        p03 = p04; p13 = p14;
-
-        p04 = sp2[0];     p14 = sp3[0];
-
-        buffd[i] += (p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 + p04 * k4 +
-                     p10 * k5 + p11 * k6 + p12 * k7 + p13 * k8 + p14 * k9);
-      }
-
-      /*
-       *  3 loop
-       */
-
-      k0 = k[20]; k1 = k[21]; k2 = k[22]; k3 = k[23]; k4 = k[24];
-
-      p02 = sp4[0];
-      p03 = sp4[delta_chan1];
-      p04 = sp4[delta_chan2];
-      p05 = sp4[delta_chan3];
-
-      sp4 += chan4;
-
-#ifdef __SUNPRO_C
-#pragma pipeloop(0)
-#endif /* __SUNPRO_C */
-      for (i = 0; i <= (wid - dx_r - 2); i += 2) {
-        p00 = p02; p01 = p03; p02 = p04; p03 = p05;
-
-        p04 = sp4[0]; p05 = sp4[chan1];
-
-        pix0 = (buffd[i    ] + p00 * k0 + p01 * k1 + p02 * k2 +
-                p03 * k3 + p04 * k4) >> shift2;
-        pix1 = (buffd[i + 1] + p01 * k0 + p02 * k1 + p03 * k2 +
-                p04 * k3 + p05 * k4) >> shift2;
-
-        CLAMP_STORE(dp[0],     pix0);
-        CLAMP_STORE(dp[chan1], pix1);
-
-        dp  += chan2;
-        sp4 += chan2;
-      }
-
-      p01 = p02; p02 = p03; p03 = p04; p04 = p05;
-
-      for (; i < wid - dx_r; i++) {
-        p00 = p01; p01 = p02; p02 = p03; p03 = p04;
-
-        p04 = sp4[0];
-
-        pix0 = (buffd[i    ] + p00 * k0 + p01 * k1 + p02 * k2 +
-                p03 * k3 + p04 * k4) >> shift2;
-        CLAMP_STORE(dp[0],     pix0);
-
-        dp  += chan1;
-        sp4 += chan1;
-      }
-
-      sp4 -= chan1;
-
-      for (; i < wid; i++) {
-        p00 = p01; p01 = p02; p02 = p03; p03 = p04;
-
-        p04 = sp4[0];
-
-        pix0 = (buffd[i    ] + p00 * k0 + p01 * k1 + p02 * k2 +
-                p03 * k3 + p04 * k4) >> shift2;
-        CLAMP_STORE(dp[0],     pix0);
-
-        dp  += chan1;
-      }
-
-      /* next line */
-
-      if (j < hgt - dy_b - 1) sl += sll;
-      dl += dll;
-    }
-  }
-
-  if (pbuff != buff) mlib_free(pbuff);
-
-  return MLIB_SUCCESS;
-}
-
-#endif /* __sparc ( for x86, using integer multiplies is faster ) */
-
-/***************************************************************/
-#if IMG_TYPE == 1
-
-#undef  KSIZE
-#define KSIZE 7
-
-mlib_status CONV_FUNC(7x7)
-{
-  FTYPE    buff[(KSIZE + 3)*BUFF_LINE], *buffs[2*(KSIZE + 1)], *buffd;
-  FTYPE    k[KSIZE*KSIZE];
-  mlib_s32 l, m, buff_ind;
-  mlib_s32 d0, d1;
-  FTYPE    k0, k1, k2, k3, k4, k5, k6;
-  FTYPE    p0, p1, p2, p3, p4, p5, p6, p7;
-  DTYPE *sl2, *sl3, *sl4, *sl5, *sl6;
-  DEF_VARS(DTYPE);
-  LOAD_KERNEL(KSIZE*KSIZE);
-  GET_SRC_DST_PARAMETERS(DTYPE);
-
-  swid = wid + KSIZE1;
-
-  if (wid > BUFF_LINE) {
-    pbuff = mlib_malloc((KSIZE + 3)*sizeof(FTYPE   )*wid);
-
-    if (pbuff == NULL) return MLIB_FAILURE;
-  }
-
-  for (l = 0; l < KSIZE + 1; l++) buffs[l] = pbuff + l*swid;
-  for (l = 0; l < KSIZE + 1; l++) buffs[l + (KSIZE + 1)] = buffs[l];
-  buffd = buffs[KSIZE] + swid;
-  buffo = (mlib_s32*)(buffd + swid);
-  buffi = buffo + (swid &~ 1);
-
-  swid -= (dx_l + dx_r);
-
-  chan1 = nchannel;
-  chan2 = chan1 + chan1;
-
-  for (c = 0; c < nchannel; c++) {
-    if (!(cmask & (1 << (nchannel - 1 - c)))) continue;
-
-    sl = adr_src + c;
-    dl = adr_dst + c;
-
-    if ((1 > dy_t) && (1 < hgt + KSIZE1 - dy_b)) sl1 = sl + sll;
-    else sl1 = sl;
-
-    if ((2 > dy_t) && (2 < hgt + KSIZE1 - dy_b)) sl2 = sl1 + sll;
-    else sl2 = sl1;
-
-    if ((3 > dy_t) && (3 < hgt + KSIZE1 - dy_b)) sl3 = sl2 + sll;
-    else sl3 = sl2;
-
-    if ((4 > dy_t) && (4 < hgt + KSIZE1 - dy_b)) sl4 = sl3 + sll;
-    else sl4 = sl3;
-
-    if ((5 > dy_t) && (5 < hgt + KSIZE1 - dy_b)) sl5 = sl4 + sll;
-    else sl5 = sl4;
-
-    if ((hgt - dy_b) > 0) sl6 = sl5 + sll;
-    else sl6 = sl5;
-
-    for (i = 0; i < dx_l; i++) {
-      buffs[0][i] = (FTYPE)sl[0];
-      buffs[1][i] = (FTYPE)sl1[0];
-      buffs[2][i] = (FTYPE)sl2[0];
-      buffs[3][i] = (FTYPE)sl3[0];
-      buffs[4][i] = (FTYPE)sl4[0];
-      buffs[5][i] = (FTYPE)sl5[0];
-      buffs[6][i] = (FTYPE)sl6[0];
-    }
-
-#ifdef __SUNPRO_C
-#pragma pipeloop(0)
-#endif /* __SUNPRO_C */
-    for (i = 0; i < swid; i++) {
-      buffs[0][i + dx_l] = (FTYPE)sl[i*chan1];
-      buffs[1][i + dx_l] = (FTYPE)sl1[i*chan1];
-      buffs[2][i + dx_l] = (FTYPE)sl2[i*chan1];
-      buffs[3][i + dx_l] = (FTYPE)sl3[i*chan1];
-      buffs[4][i + dx_l] = (FTYPE)sl4[i*chan1];
-      buffs[5][i + dx_l] = (FTYPE)sl5[i*chan1];
-      buffs[6][i + dx_l] = (FTYPE)sl6[i*chan1];
-    }
-
-    for (i = 0; i < dx_r; i++) {
-      buffs[0][swid + dx_l + i] = buffs[0][swid + dx_l - 1];
-      buffs[1][swid + dx_l + i] = buffs[1][swid + dx_l - 1];
-      buffs[2][swid + dx_l + i] = buffs[2][swid + dx_l - 1];
-      buffs[3][swid + dx_l + i] = buffs[3][swid + dx_l - 1];
-      buffs[4][swid + dx_l + i] = buffs[4][swid + dx_l - 1];
-      buffs[5][swid + dx_l + i] = buffs[5][swid + dx_l - 1];
-      buffs[6][swid + dx_l + i] = buffs[6][swid + dx_l - 1];
-    }
-
-    buff_ind = 0;
-
-#ifdef __SUNPRO_C
-#pragma pipeloop(0)
-#endif /* __SUNPRO_C */
-    for (i = 0; i < wid; i++) buffd[i] = 0.0;
-
-    if ((hgt - dy_b) > 1) sl = sl6 + sll;
-    else sl = sl6;
-
-    for (j = 0; j < hgt; j++) {
-      FTYPE    **buffc = buffs + buff_ind;
-      FTYPE    *buffn = buffc[KSIZE];
-      FTYPE    *pk = k;
-
-      for (l = 0; l < KSIZE; l++) {
-        FTYPE    *buff = buffc[l];
-        d64_2x32 dd;
-
-        sp = sl;
-        dp = dl;
-
-        p2 = buff[0]; p3 = buff[1]; p4 = buff[2];
-        p5 = buff[3]; p6 = buff[4]; p7 = buff[5];
-
-        k0 = *pk++; k1 = *pk++; k2 = *pk++; k3 = *pk++;
-        k4 = *pk++; k5 = *pk++; k6 = *pk++;
-
-        if (l < (KSIZE - 1)) {
-#ifdef __SUNPRO_C
-#pragma pipeloop(0)
-#endif /* __SUNPRO_C */
-          for (i = 0; i <= (wid - 2); i += 2) {
-            p0 = p2; p1 = p3; p2 = p4; p3 = p5; p4 = p6; p5 = p7;
-
-            p6 = buff[i + 6]; p7 = buff[i + 7];
-
-            buffd[i    ] += p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + p5*k5 + p6*k6;
-            buffd[i + 1] += p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + p6*k5 + p7*k6;
-          }
-
-        } else {
-#ifdef __SUNPRO_C
-#pragma pipeloop(0)
-#endif /* __SUNPRO_C */
-          for (i = 0; i <= (wid - 2); i += 2) {
-            p0 = p2; p1 = p3; p2 = p4; p3 = p5; p4 = p6; p5 = p7;
-
-            p6 = buff[i + 6]; p7 = buff[i + 7];
-
-            LOAD_BUFF(buffi);
-
-            dd.d64 = *(FTYPE   *)(buffi + i);
-            buffn[i + dx_l    ] = (FTYPE)dd.i32s.i0;
-            buffn[i + dx_l + 1] = (FTYPE)dd.i32s.i1;
-
-            d0 = D2I(p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + p5*k5 + p6*k6 + buffd[i    ]);
-            d1 = D2I(p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + p6*k5 + p7*k6 + buffd[i + 1]);
-
-            dp[0    ] = FROM_S32(d0);
-            dp[chan1] = FROM_S32(d1);
-
-            buffd[i    ] = 0.0;
-            buffd[i + 1] = 0.0;
-
-            sp += chan2;
-            dp += chan2;
-          }
-        }
-      }
-
-      /* last pixels */
-      for (; i < wid; i++) {
-        FTYPE    *pk = k, s = 0;
-        mlib_s32 d0;
-
-        for (l = 0; l < KSIZE; l++) {
-          FTYPE    *buff = buffc[l] + i;
-
-          for (m = 0; m < KSIZE; m++) s += buff[m] * (*pk++);
-        }
-
-        d0 = D2I(s);
-        dp[0] = FROM_S32(d0);
-
-        buffn[i + dx_l] = (FTYPE)sp[0];
-
-        sp += chan1;
-        dp += chan1;
-      }
-
-      for (; i < swid; i++) {
-        buffn[i + dx_l] = (FTYPE)sp[0];
-        sp += chan1;
-      }
-
-      for (i = 0; i < dx_l; i++) buffn[i] = buffn[dx_l];
-      for (i = 0; i < dx_r; i++) buffn[swid + dx_l + i] = buffn[swid + dx_l - 1];
-
-      /* next line */
-
-      if (j < hgt - dy_b - 2) sl += sll;
-      dl += dll;
-
-      buff_ind++;
-
-      if (buff_ind >= KSIZE + 1) buff_ind = 0;
-    }
-  }
-
-  if (pbuff != buff) mlib_free(pbuff);
-
-  return MLIB_SUCCESS;
-}
-
-#endif /* IMG_TYPE == 1 */
-
-/***************************************************************/
 #define MAX_KER   7
 #define MAX_N    15
 #define BUFF_SIZE   1600
--- a/jdk/src/java.desktop/share/native/libmlib_image/mlib_ImageConv_u16nw.c	Thu May 12 11:03:07 2016 -0700
+++ b/jdk/src/java.desktop/share/native/libmlib_image/mlib_ImageConv_u16nw.c	Fri May 13 11:31:05 2016 +0300
@@ -144,9 +144,6 @@
 } d64_2x32;
 
 /***************************************************************/
-#define BUFF_LINE 256
-
-/***************************************************************/
 #define DEF_VARS(type)                                          \
   type     *adr_src, *sl, *sp = NULL;                           \
   type     *adr_dst, *dl, *dp = NULL;                           \
@@ -156,39 +153,6 @@
   mlib_s32 i, j, c
 
 /***************************************************************/
-#define LOAD_KERNEL3()                                                   \
-  FTYPE    scalef = DSCALE;                                              \
-  FTYPE    k0, k1, k2, k3, k4, k5, k6, k7, k8;                           \
-  FTYPE    p00, p01, p02, p03,                                           \
-           p10, p11, p12, p13,                                           \
-           p20, p21, p22, p23;                                           \
-                                                                         \
-  while (scalef_expon > 30) {                                            \
-    scalef /= (1 << 30);                                                 \
-    scalef_expon -= 30;                                                  \
-  }                                                                      \
-                                                                         \
-  scalef /= (1 << scalef_expon);                                         \
-                                                                         \
-  /* keep kernel in regs */                                              \
-  k0 = scalef * kern[0];  k1 = scalef * kern[1];  k2 = scalef * kern[2]; \
-  k3 = scalef * kern[3];  k4 = scalef * kern[4];  k5 = scalef * kern[5]; \
-  k6 = scalef * kern[6];  k7 = scalef * kern[7];  k8 = scalef * kern[8]
-
-/***************************************************************/
-#define LOAD_KERNEL(SIZE)                                       \
-  FTYPE    scalef = DSCALE;                                     \
-                                                                \
-  while (scalef_expon > 30) {                                   \
-    scalef /= (1 << 30);                                        \
-    scalef_expon -= 30;                                         \
-  }                                                             \
-                                                                \
-  scalef /= (1 << scalef_expon);                                \
-                                                                \
-  for (j = 0; j < SIZE; j++) k[j] = scalef * kern[j]
-
-/***************************************************************/
 #define GET_SRC_DST_PARAMETERS(type)                            \
   hgt = mlib_ImageGetHeight(src);                               \
   wid = mlib_ImageGetWidth(src);                                \
@@ -246,1162 +210,6 @@
 #endif /* __sparc */
 
 /***************************************************************/
-#define KSIZE  3
-
-mlib_status CONV_FUNC(3x3)(mlib_image       *dst,
-                           const mlib_image *src,
-                           const mlib_s32   *kern,
-                           mlib_s32         scalef_expon,
-                           mlib_s32         cmask)
-{
-  FTYPE    buff[(KSIZE + 2)*BUFF_LINE], *buff0, *buff1, *buff2, *buff3, *buffT;
-  DEF_VARS(DTYPE);
-  DTYPE *sl1;
-  mlib_s32 chan2;
-  mlib_s32 *buffo, *buffi;
-  DTYPE *sl2;
-#ifndef __sparc
-  mlib_s32 d0, d1;
-#endif /* __sparc */
-  LOAD_KERNEL3();
-  GET_SRC_DST_PARAMETERS(DTYPE);
-
-  if (wid > BUFF_LINE) {
-    pbuff = mlib_malloc((KSIZE + 2)*sizeof(FTYPE)*wid);
-
-    if (pbuff == NULL) return MLIB_FAILURE;
-  }
-
-  buff0 = pbuff;
-  buff1 = buff0 + wid;
-  buff2 = buff1 + wid;
-  buff3 = buff2 + wid;
-  buffo = (mlib_s32*)(buff3 + wid);
-  buffi = buffo + (wid &~ 1);
-
-  chan1 = nchannel;
-  chan2 = chan1 + chan1;
-
-  wid -= (KSIZE - 1);
-  hgt -= (KSIZE - 1);
-
-  adr_dst += ((KSIZE - 1)/2)*(dll + chan1);
-
-  for (c = 0; c < nchannel; c++) {
-    if (!(cmask & (1 << (nchannel - 1 - c)))) continue;
-
-    sl = adr_src + c;
-    dl = adr_dst + c;
-
-    sl1 = sl  + sll;
-    sl2 = sl1 + sll;
-#ifdef __SUNPRO_C
-#pragma pipeloop(0)
-#endif /* __SUNPRO_C */
-    for (i = 0; i < wid + (KSIZE - 1); i++) {
-      buff0[i] = (FTYPE)sl[i*chan1];
-      buff1[i] = (FTYPE)sl1[i*chan1];
-      buff2[i] = (FTYPE)sl2[i*chan1];
-    }
-
-    sl += KSIZE*sll;
-
-    for (j = 0; j < hgt; j++) {
-      FTYPE    s0, s1;
-
-      p02 = buff0[0];
-      p12 = buff1[0];
-      p22 = buff2[0];
-
-      p03 = buff0[1];
-      p13 = buff1[1];
-      p23 = buff2[1];
-
-      s0 = p02 * k0 + p03 * k1 + p12 * k3 + p13 * k4 + p22 * k6 + p23 * k7;
-      s1 = p03 * k0 + p13 * k3 + p23 * k6;
-
-      sp = sl;
-      dp = dl;
-
-#ifdef __SUNPRO_C
-#pragma pipeloop(0)
-#endif /* __SUNPRO_C */
-      for (i = 0; i <= (wid - 2); i += 2) {
-#ifdef __sparc
-#ifdef _NO_LONGLONG
-        mlib_s32 o64_1, o64_2;
-#else /* _NO_LONGLONG */
-        mlib_s64 o64;
-#endif /* _NO_LONGLONG */
-#endif /* __sparc */
-        d64_2x32 dd;
-
-        p02 = buff0[i + 2]; p12 = buff1[i + 2]; p22 = buff2[i + 2];
-        p03 = buff0[i + 3]; p13 = buff1[i + 3]; p23 = buff2[i + 3];
-
-        LOAD_BUFF(buffi);
-
-        dd.d64 = *(FTYPE   *)(buffi + i);
-        buff3[i    ] = (FTYPE)dd.i32s.i0;
-        buff3[i + 1] = (FTYPE)dd.i32s.i1;
-
-#ifndef __sparc
-        d0 = D2I(s0 + p02 * k2 + p12 * k5 + p22 * k8);
-        d1 = D2I(s1 + p02 * k1 + p03 * k2 + p12 * k4 + p13 * k5 + p22 * k7 + p23 * k8);
-
-        s0 = p02 * k0 + p03 * k1 + p12 * k3 + p13 * k4 + p22 * k6 + p23 * k7;
-        s1 = p03 * k0 + p13 * k3 + p23 * k6;
-
-        dp[0    ] = FROM_S32(d0);
-        dp[chan1] = FROM_S32(d1);
-
-#else /* __sparc */
-
-        dd.i32s.i0 = D2I(s0 + p02 * k2 + p12 * k5 + p22 * k8);
-        dd.i32s.i1 = D2I(s1 + p02 * k1 + p03 * k2 + p12 * k4 + p13 * k5 + p22 * k7 + p23 * k8);
-        *(FTYPE   *)(buffo + i) = dd.d64;
-
-        s0 = p02 * k0 + p03 * k1 + p12 * k3 + p13 * k4 + p22 * k6 + p23 * k7;
-        s1 = p03 * k0 + p13 * k3 + p23 * k6;
-
-#ifdef _NO_LONGLONG
-
-        o64_1 = buffo[i];
-        o64_2 = buffo[i+1];
-#if IMG_TYPE != 1
-        STORE2(FROM_S32(o64_1), FROM_S32(o64_2));
-#else
-        STORE2(o64_1 >> 24, o64_2 >> 24);
-#endif /* IMG_TYPE != 1 */
-
-#else /* _NO_LONGLONG */
-
-        o64 = *(mlib_s64*)(buffo + i);
-#if IMG_TYPE != 1
-        STORE2(FROM_S32(o64 >> 32), FROM_S32(o64));
-#else
-        STORE2(o64 >> 56, o64 >> 24);
-#endif /* IMG_TYPE != 1 */
-#endif /* _NO_LONGLONG */
-#endif /* __sparc */
-
-        sp += chan2;
-        dp += chan2;
-      }
-
-      for (; i < wid; i++) {
-        p00 = buff0[i];     p10 = buff1[i];     p20 = buff2[i];
-        p01 = buff0[i + 1]; p11 = buff1[i + 1]; p21 = buff2[i + 1];
-        p02 = buff0[i + 2]; p12 = buff1[i + 2]; p22 = buff2[i + 2];
-
-        buffi[i] = (mlib_s32)sp[0];
-        buff3[i] = (FTYPE)buffi[i];
-
-#ifndef __sparc
-
-        d0 = D2I(p00 * k0 + p01 * k1 + p02 * k2 + p10 * k3 + p11 * k4 +
-                 p12 * k5 + p20 * k6 + p21 * k7 + p22 * k8);
-
-        dp[0] = FROM_S32(d0);
-
-#else  /* __sparc */
-
-        buffo[i] = D2I(p00 * k0 + p01 * k1 + p02 * k2 + p10 * k3 + p11 * k4 +
-                       p12 * k5 + p20 * k6 + p21 * k7 + p22 * k8);
-#if IMG_TYPE != 1
-        dp[0] = FROM_S32(buffo[i]);
-#else
-        dp[0] = buffo[i] >> 24;
-#endif /* IMG_TYPE != 1 */
-#endif /* __sparc */
-
-        sp += chan1;
-        dp += chan1;
-      }
-
-      buffi[wid] = (mlib_s32)sp[0];
-      buff3[wid] = (FTYPE)buffi[wid];
-      buffi[wid + 1] = (mlib_s32)sp[chan1];
-      buff3[wid + 1] = (FTYPE)buffi[wid + 1];
-
-      sl += sll;
-      dl += dll;
-
-      buffT = buff0;
-      buff0 = buff1;
-      buff1 = buff2;
-      buff2 = buff3;
-      buff3 = buffT;
-    }
-  }
-
-#ifdef __sparc
-#if IMG_TYPE == 1
-  {
-    mlib_s32 amask = (1 << nchannel) - 1;
-
-    if ((cmask & amask) != amask) {
-      mlib_ImageXor80(adr_dst, wid, hgt, dll, nchannel, cmask);
-    } else {
-      mlib_ImageXor80_aa(adr_dst, wid*nchannel, hgt, dll);
-    }
-  }
-
-#endif /* IMG_TYPE == 1 */
-#endif /* __sparc */
-
-  if (pbuff != buff) mlib_free(pbuff);
-
-  return MLIB_SUCCESS;
-}
-
-/***************************************************************/
-#ifndef __sparc /* for x86, using integer multiplies is faster */
-
-mlib_status CONV_FUNC_I(3x3)(mlib_image       *dst,
-                             const mlib_image *src,
-                             const mlib_s32   *kern,
-                             mlib_s32         scalef_expon,
-                             mlib_s32         cmask)
-{
-  DTYPE    *adr_src, *sl, *sp0, *sp1, *sp2;
-  DTYPE    *adr_dst, *dl, *dp;
-  mlib_s32 wid, hgt, sll, dll;
-  mlib_s32 nchannel, chan1, chan2;
-  mlib_s32 i, j, c;
-  mlib_s32 shift1, shift2;
-  mlib_s32 k0, k1, k2, k3, k4, k5, k6, k7, k8;
-  mlib_s32 p02, p03,
-           p12, p13,
-           p22, p23;
-
-#if IMG_TYPE != 1
-  shift1 = 16;
-#else
-  shift1 = 8;
-#endif /* IMG_TYPE != 1 */
-
-  shift2 = scalef_expon - shift1;
-
-  /* keep kernel in regs */
-  k0 = kern[0] >> shift1;  k1 = kern[1] >> shift1;  k2 = kern[2] >> shift1;
-  k3 = kern[3] >> shift1;  k4 = kern[4] >> shift1;  k5 = kern[5] >> shift1;
-  k6 = kern[6] >> shift1;  k7 = kern[7] >> shift1;  k8 = kern[8] >> shift1;
-
-  GET_SRC_DST_PARAMETERS(DTYPE);
-
-  chan1 = nchannel;
-  chan2 = chan1 + chan1;
-
-  wid -= (KSIZE - 1);
-  hgt -= (KSIZE - 1);
-
-  adr_dst += ((KSIZE - 1)/2)*(dll + chan1);
-
-  for (c = 0; c < chan1; c++) {
-    if (!(cmask & (1 << (chan1 - 1 - c)))) continue;
-
-    sl = adr_src + c;
-    dl = adr_dst + c;
-
-    for (j = 0; j < hgt; j++) {
-      mlib_s32 s0, s1;
-      mlib_s32 pix0, pix1;
-
-      dp  = dl;
-      sp0 = sl;
-      sp1 = sp0 + sll;
-      sp2 = sp1 + sll;
-
-      p02 = sp0[0];
-      p12 = sp1[0];
-      p22 = sp2[0];
-
-      p03 = sp0[chan1];
-      p13 = sp1[chan1];
-      p23 = sp2[chan1];
-
-      s0 = p02 * k0 + p03 * k1 + p12 * k3 + p13 * k4 + p22 * k6 + p23 * k7;
-      s1 = p03 * k0 + p13 * k3 + p23 * k6;
-
-      sp0 += chan2;
-      sp1 += chan2;
-      sp2 += chan2;
-
-#ifdef __SUNPRO_C
-#pragma pipeloop(0)
-#endif /* __SUNPRO_C */
-      for (i = 0; i <= (wid - 2); i += 2) {
-        p02 = sp0[0];     p12 = sp1[0];     p22 = sp2[0];
-        p03 = sp0[chan1]; p13 = sp1[chan1]; p23 = sp2[chan1];
-
-        pix0 = (s0 + p02 * k2 + p12 * k5 + p22 * k8) >> shift2;
-        pix1 = (s1 + p02 * k1 + p03 * k2 + p12 * k4 +
-                p13 * k5 + p22 * k7 + p23 * k8) >> shift2;
-
-        CLAMP_STORE(dp[0],     pix0);
-        CLAMP_STORE(dp[chan1], pix1);
-
-        s0 = p02 * k0 + p03 * k1 + p12 * k3 + p13 * k4 + p22 * k6 + p23 * k7;
-        s1 = p03 * k0 + p13 * k3 + p23 * k6;
-
-        sp0 += chan2;
-        sp1 += chan2;
-        sp2 += chan2;
-        dp += chan2;
-      }
-
-      if (wid & 1) {
-        p02 = sp0[0]; p12 = sp1[0]; p22 = sp2[0];
-        pix0 = (s0 + p02 * k2 + p12 * k5 + p22 * k8) >> shift2;
-        CLAMP_STORE(dp[0], pix0);
-      }
-
-      sl += sll;
-      dl += dll;
-    }
-  }
-
-  return MLIB_SUCCESS;
-}
-
-#endif /* __sparc ( for x86, using integer multiplies is faster ) */
-
-/***************************************************************/
-#undef  KSIZE
-#define KSIZE 4
-
-mlib_status CONV_FUNC(4x4)(mlib_image       *dst,
-                           const mlib_image *src,
-                           const mlib_s32   *kern,
-                           mlib_s32         scalef_expon,
-                           mlib_s32         cmask)
-{
-  FTYPE    buff[(KSIZE + 3)*BUFF_LINE];
-  FTYPE    *buff0, *buff1, *buff2, *buff3, *buff4, *buffd, *buffT;
-  FTYPE    k[KSIZE*KSIZE];
-  mlib_s32 d0, d1;
-  FTYPE    k0, k1, k2, k3, k4, k5, k6, k7;
-  FTYPE    p00, p01, p02, p03, p04,
-           p10, p11, p12, p13, p14,
-           p20, p21, p22, p23,
-           p30, p31, p32, p33;
-  DEF_VARS(DTYPE);
-  DTYPE *sl1;
-  mlib_s32 chan2;
-  mlib_s32 *buffo, *buffi;
-  DTYPE *sl2, *sl3;
-  LOAD_KERNEL(KSIZE*KSIZE);
-  GET_SRC_DST_PARAMETERS(DTYPE);
-
-  if (wid > BUFF_LINE) {
-    pbuff = mlib_malloc((KSIZE + 3)*sizeof(FTYPE)*wid);
-
-    if (pbuff == NULL) return MLIB_FAILURE;
-  }
-
-  buff0 = pbuff;
-  buff1 = buff0 + wid;
-  buff2 = buff1 + wid;
-  buff3 = buff2 + wid;
-  buff4 = buff3 + wid;
-  buffd = buff4 + wid;
-  buffo = (mlib_s32*)(buffd + wid);
-  buffi = buffo + (wid &~ 1);
-
-  chan1 = nchannel;
-  chan2 = chan1 + chan1;
-
-  wid -= (KSIZE - 1);
-  hgt -= (KSIZE - 1);
-
-  adr_dst += ((KSIZE - 1)/2)*(dll + chan1);
-
-  for (c = 0; c < nchannel; c++) {
-    if (!(cmask & (1 << (nchannel - 1 - c)))) continue;
-
-    sl = adr_src + c;
-    dl = adr_dst + c;
-
-    sl1 = sl  + sll;
-    sl2 = sl1 + sll;
-    sl3 = sl2 + sll;
-#ifdef __SUNPRO_C
-#pragma pipeloop(0)
-#endif /* __SUNPRO_C */
-    for (i = 0; i < wid + (KSIZE - 1); i++) {
-      buff0[i] = (FTYPE)sl[i*chan1];
-      buff1[i] = (FTYPE)sl1[i*chan1];
-      buff2[i] = (FTYPE)sl2[i*chan1];
-      buff3[i] = (FTYPE)sl3[i*chan1];
-    }
-
-    sl += KSIZE*sll;
-
-    for (j = 0; j < hgt; j++) {
-      d64_2x32 dd;
-
-      /*
-       *  First loop on two first lines of kernel
-       */
-      k0 = k[0]; k1 = k[1]; k2 = k[2]; k3 = k[3];
-      k4 = k[4]; k5 = k[5]; k6 = k[6]; k7 = k[7];
-
-      sp = sl;
-      dp = dl;
-
-      p02 = buff0[0];
-      p12 = buff1[0];
-      p03 = buff0[1];
-      p13 = buff1[1];
-      p04 = buff0[2];
-
-#ifdef __SUNPRO_C
-#pragma pipeloop(0)
-#endif /* __SUNPRO_C */
-      for (i = 0; i <= (wid - 2); i += 2) {
-        p00 = p02; p10 = p12;
-        p01 = p03; p11 = p13;
-        p02 = p04; p12 = buff1[i + 2];
-        p03 = buff0[i + 3]; p13 = buff1[i + 3];
-        p04 = buff0[i + 4]; p14 = buff1[i + 4];
-
-        LOAD_BUFF(buffi);
-
-        dd.d64 = *(FTYPE   *)(buffi + i);
-        buff4[i    ] = (FTYPE)dd.i32s.i0;
-        buff4[i + 1] = (FTYPE)dd.i32s.i1;
-
-        buffd[i    ] = (p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 +
-                        p10 * k4 + p11 * k5 + p12 * k6 + p13 * k7);
-        buffd[i + 1] = (p01 * k0 + p02 * k1 + p03 * k2 + p04 * k3 +
-                        p11 * k4 + p12 * k5 + p13 * k6 + p14 * k7);
-
-        sp += chan2;
-        dp += chan2;
-      }
-
-      /*
-       *  Second loop on two last lines of kernel
-       */
-      k0 = k[ 8]; k1 = k[ 9]; k2 = k[10]; k3 = k[11];
-      k4 = k[12]; k5 = k[13]; k6 = k[14]; k7 = k[15];
-
-      sp = sl;
-      dp = dl;
-
-      p02 = buff2[0];
-      p12 = buff3[0];
-      p03 = buff2[1];
-      p13 = buff3[1];
-      p04 = buff2[2];
-
-#ifdef __SUNPRO_C
-#pragma pipeloop(0)
-#endif /* __SUNPRO_C */
-      for (i = 0; i <= (wid - 2); i += 2) {
-        p00 = p02; p10 = p12;
-        p01 = p03; p11 = p13;
-        p02 = p04; p12 = buff3[i + 2];
-        p03 = buff2[i + 3]; p13 = buff3[i + 3];
-        p04 = buff2[i + 4]; p14 = buff3[i + 4];
-
-        d0 = D2I(p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 +
-                 p10 * k4 + p11 * k5 + p12 * k6 + p13 * k7 + buffd[i]);
-        d1 = D2I(p01 * k0 + p02 * k1 + p03 * k2 + p04 * k3 +
-                 p11 * k4 + p12 * k5 + p13 * k6 + p14 * k7 + buffd[i + 1]);
-
-        dp[0    ] = FROM_S32(d0);
-        dp[chan1] = FROM_S32(d1);
-
-        sp += chan2;
-        dp += chan2;
-      }
-
-      /* last pixels */
-      for (; i < wid; i++) {
-        p00 = buff0[i];     p10 = buff1[i];     p20 = buff2[i];     p30 = buff3[i];
-        p01 = buff0[i + 1]; p11 = buff1[i + 1]; p21 = buff2[i + 1]; p31 = buff3[i + 1];
-        p02 = buff0[i + 2]; p12 = buff1[i + 2]; p22 = buff2[i + 2]; p32 = buff3[i + 2];
-        p03 = buff0[i + 3]; p13 = buff1[i + 3]; p23 = buff2[i + 3]; p33 = buff3[i + 3];
-
-        buff4[i] = (FTYPE)sp[0];
-
-        buffo[i] = D2I(p00 * k[0] + p01 * k[1] + p02 * k[2] + p03 * k[3] +
-                       p10 * k[4] + p11 * k[5] + p12 * k[6] + p13 * k[7] +
-                       p20 * k[ 8] + p21 * k[ 9] + p22 * k[10] + p23 * k[11] +
-                       p30 * k[12] + p31 * k[13] + p32 * k[14] + p33 * k[15]);
-
-        dp[0] = FROM_S32(buffo[i]);
-
-        sp += chan1;
-        dp += chan1;
-      }
-
-      buff4[wid    ] = (FTYPE)sp[0];
-      buff4[wid + 1] = (FTYPE)sp[chan1];
-      buff4[wid + 2] = (FTYPE)sp[chan2];
-
-      /* next line */
-      sl += sll;
-      dl += dll;
-
-      buffT = buff0;
-      buff0 = buff1;
-      buff1 = buff2;
-      buff2 = buff3;
-      buff3 = buff4;
-      buff4 = buffT;
-    }
-  }
-
-  if (pbuff != buff) mlib_free(pbuff);
-
-  return MLIB_SUCCESS;
-}
-
-/***************************************************************/
-#undef  KSIZE
-#define KSIZE 5
-
-mlib_status CONV_FUNC(5x5)(mlib_image       *dst,
-                           const mlib_image *src,
-                           const mlib_s32   *kern,
-                           mlib_s32         scalef_expon,
-                           mlib_s32         cmask)
-{
-  FTYPE    buff[(KSIZE + 3)*BUFF_LINE];
-  FTYPE    *buff0, *buff1, *buff2, *buff3, *buff4, *buff5, *buffd, *buffT;
-  FTYPE    k[KSIZE*KSIZE];
-  mlib_s32 d0, d1;
-  FTYPE    k0, k1, k2, k3, k4, k5, k6, k7, k8, k9;
-  FTYPE    p00, p01, p02, p03, p04, p05,
-           p10, p11, p12, p13, p14, p15,
-           p20, p21, p22, p23, p24,
-           p30, p31, p32, p33, p34,
-           p40, p41, p42, p43, p44;
-  DEF_VARS(DTYPE);
-  DTYPE *sl1;
-  mlib_s32 chan2;
-  mlib_s32 *buffo, *buffi;
-  DTYPE *sl2, *sl3, *sl4;
-  LOAD_KERNEL(KSIZE*KSIZE);
-  GET_SRC_DST_PARAMETERS(DTYPE);
-
-  if (wid > BUFF_LINE) {
-    pbuff = mlib_malloc((KSIZE + 3)*sizeof(FTYPE)*wid);
-
-    if (pbuff == NULL) return MLIB_FAILURE;
-  }
-
-  buff0 = pbuff;
-  buff1 = buff0 + wid;
-  buff2 = buff1 + wid;
-  buff3 = buff2 + wid;
-  buff4 = buff3 + wid;
-  buff5 = buff4 + wid;
-  buffd = buff5 + wid;
-  buffo = (mlib_s32*)(buffd + wid);
-  buffi = buffo + (wid &~ 1);
-
-  chan1 = nchannel;
-  chan2 = chan1 + chan1;
-
-  wid -= (KSIZE - 1);
-  hgt -= (KSIZE - 1);
-
-  adr_dst += ((KSIZE - 1)/2)*(dll + chan1);
-
-  for (c = 0; c < nchannel; c++) {
-    if (!(cmask & (1 << (nchannel - 1 - c)))) continue;
-
-    sl = adr_src + c;
-    dl = adr_dst + c;
-
-    sl1 = sl  + sll;
-    sl2 = sl1 + sll;
-    sl3 = sl2 + sll;
-    sl4 = sl3 + sll;
-#ifdef __SUNPRO_C
-#pragma pipeloop(0)
-#endif /* __SUNPRO_C */
-    for (i = 0; i < wid + (KSIZE - 1); i++) {
-      buff0[i] = (FTYPE)sl[i*chan1];
-      buff1[i] = (FTYPE)sl1[i*chan1];
-      buff2[i] = (FTYPE)sl2[i*chan1];
-      buff3[i] = (FTYPE)sl3[i*chan1];
-      buff4[i] = (FTYPE)sl4[i*chan1];
-    }
-
-    sl += KSIZE*sll;
-
-    for (j = 0; j < hgt; j++) {
-      d64_2x32 dd;
-
-      /*
-       *  First loop
-       */
-      k0 = k[0]; k1 = k[1]; k2 = k[2]; k3 = k[3]; k4 = k[4];
-      k5 = k[5]; k6 = k[6]; k7 = k[7]; k8 = k[8]; k9 = k[9];
-
-      sp = sl;
-      dp = dl;
-
-      p02 = buff0[0];
-      p12 = buff1[0];
-      p03 = buff0[1];
-      p13 = buff1[1];
-      p04 = buff0[2];
-      p14 = buff1[2];
-
-#ifdef __SUNPRO_C
-#pragma pipeloop(0)
-#endif /* __SUNPRO_C */
-      for (i = 0; i <= (wid - 2); i += 2) {
-        p00 = p02; p10 = p12;
-        p01 = p03; p11 = p13;
-        p02 = p04; p12 = p14;
-
-        LOAD_BUFF(buffi);
-
-        p03 = buff0[i + 3]; p13 = buff1[i + 3];
-        p04 = buff0[i + 4]; p14 = buff1[i + 4];
-        p05 = buff0[i + 5]; p15 = buff1[i + 5];
-
-        buffd[i    ] = (p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 + p04 * k4 +
-                        p10 * k5 + p11 * k6 + p12 * k7 + p13 * k8 + p14 * k9);
-        buffd[i + 1] = (p01 * k0 + p02 * k1 + p03 * k2 + p04 * k3 + p05 * k4 +
-                        p11 * k5 + p12 * k6 + p13 * k7 + p14 * k8 + p15 * k9);
-
-        sp += chan2;
-        dp += chan2;
-      }
-
-      /*
-       *  Second loop
-       */
-      k0 = k[10]; k1 = k[11]; k2 = k[12]; k3 = k[13]; k4 = k[14];
-      k5 = k[15]; k6 = k[16]; k7 = k[17]; k8 = k[18]; k9 = k[19];
-
-      sp = sl;
-      dp = dl;
-
-      p02 = buff2[0];
-      p12 = buff3[0];
-      p03 = buff2[1];
-      p13 = buff3[1];
-      p04 = buff2[2];
-      p14 = buff3[2];
-
-#ifdef __SUNPRO_C
-#pragma pipeloop(0)
-#endif /* __SUNPRO_C */
-      for (i = 0; i <= (wid - 2); i += 2) {
-        p00 = p02; p10 = p12;
-        p01 = p03; p11 = p13;
-
-        p02 = buff2[i + 2]; p12 = buff3[i + 2];
-        p03 = buff2[i + 3]; p13 = buff3[i + 3];
-        p04 = buff2[i + 4]; p14 = buff3[i + 4];
-        p05 = buff2[i + 5]; p15 = buff3[i + 5];
-
-        dd.d64 = *(FTYPE   *)(buffi + i);
-        buff5[i    ] = (FTYPE)dd.i32s.i0;
-        buff5[i + 1] = (FTYPE)dd.i32s.i1;
-
-        buffd[i    ] += (p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 + p04 * k4 +
-                         p10 * k5 + p11 * k6 + p12 * k7 + p13 * k8 + p14 * k9);
-        buffd[i + 1] += (p01 * k0 + p02 * k1 + p03 * k2 + p04 * k3 + p05 * k4 +
-                         p11 * k5 + p12 * k6 + p13 * k7 + p14 * k8 + p15 * k9);
-
-        sp += chan2;
-        dp += chan2;
-      }
-
-      /*
-       *  3 loop
-       */
-      k0 = k[20]; k1 = k[21]; k2 = k[22]; k3 = k[23]; k4 = k[24];
-
-      sp = sl;
-      dp = dl;
-
-      p02 = buff4[0];
-      p03 = buff4[1];
-      p04 = buff4[2];
-      p05 = buff4[3];
-
-#ifdef __SUNPRO_C
-#pragma pipeloop(0)
-#endif /* __SUNPRO_C */
-      for (i = 0; i <= (wid - 2); i += 2) {
-        p00 = p02; p01 = p03; p02 = p04; p03 = p05;
-
-        p04 = buff4[i + 4]; p05 = buff4[i + 5];
-
-        d0 = D2I(p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 + p04 * k4 + buffd[i]);
-        d1 = D2I(p01 * k0 + p02 * k1 + p03 * k2 + p04 * k3 + p05 * k4 + buffd[i + 1]);
-
-        dp[0    ] = FROM_S32(d0);
-        dp[chan1] = FROM_S32(d1);
-
-        sp += chan2;
-        dp += chan2;
-      }
-
-      /* last pixels */
-      for (; i < wid; i++) {
-        p00 = buff0[i];     p10 = buff1[i];     p20 = buff2[i];     p30 = buff3[i];
-        p01 = buff0[i + 1]; p11 = buff1[i + 1]; p21 = buff2[i + 1]; p31 = buff3[i + 1];
-        p02 = buff0[i + 2]; p12 = buff1[i + 2]; p22 = buff2[i + 2]; p32 = buff3[i + 2];
-        p03 = buff0[i + 3]; p13 = buff1[i + 3]; p23 = buff2[i + 3]; p33 = buff3[i + 3];
-        p04 = buff0[i + 4]; p14 = buff1[i + 4]; p24 = buff2[i + 4]; p34 = buff3[i + 4];
-
-        p40 = buff4[i];     p41 = buff4[i + 1]; p42 = buff4[i + 2];
-        p43 = buff4[i + 3]; p44 = buff4[i + 4];
-
-        buff5[i] = (FTYPE)sp[0];
-
-        buffo[i] = D2I(p00 * k[0] + p01 * k[1] + p02 * k[2] + p03 * k[3] + p04 * k[4] +
-                       p10 * k[5] + p11 * k[6] + p12 * k[7] + p13 * k[8] + p14 * k[9] +
-                       p20 * k[10] + p21 * k[11] + p22 * k[12] + p23 * k[13] + p24 * k[14] +
-                       p30 * k[15] + p31 * k[16] + p32 * k[17] + p33 * k[18] + p34 * k[19] +
-                       p40 * k[20] + p41 * k[21] + p42 * k[22] + p43 * k[23] + p44 * k[24]);
-
-        dp[0] = FROM_S32(buffo[i]);
-
-        sp += chan1;
-        dp += chan1;
-      }
-
-      buff5[wid    ] = (FTYPE)sp[0];
-      buff5[wid + 1] = (FTYPE)sp[chan1];
-      buff5[wid + 2] = (FTYPE)sp[chan2];
-      buff5[wid + 3] = (FTYPE)sp[chan2 + chan1];
-
-      /* next line */
-      sl += sll;
-      dl += dll;
-
-      buffT = buff0;
-      buff0 = buff1;
-      buff1 = buff2;
-      buff2 = buff3;
-      buff3 = buff4;
-      buff4 = buff5;
-      buff5 = buffT;
-    }
-  }
-
-  if (pbuff != buff) mlib_free(pbuff);
-
-  return MLIB_SUCCESS;
-}
-
-/***************************************************************/
-#ifndef __sparc /* for x86, using integer multiplies is faster */
-
-mlib_status CONV_FUNC_I(5x5)(mlib_image       *dst,
-                             const mlib_image *src,
-                             const mlib_s32   *kern,
-                             mlib_s32         scalef_expon,
-                             mlib_s32         cmask)
-{
-  mlib_s32 buff[BUFF_LINE];
-  mlib_s32 *buffd;
-  mlib_s32 k[KSIZE*KSIZE];
-  mlib_s32 shift1, shift2;
-  mlib_s32 k0, k1, k2, k3, k4, k5, k6, k7, k8, k9;
-  mlib_s32 p00, p01, p02, p03, p04, p05,
-           p10, p11, p12, p13, p14, p15;
-  DTYPE    *adr_src, *sl, *sp0, *sp1;
-  DTYPE    *adr_dst, *dl, *dp;
-  mlib_s32 *pbuff = buff;
-  mlib_s32 wid, hgt, sll, dll;
-  mlib_s32 nchannel, chan1, chan2, chan3, chan4;
-  mlib_s32 i, j, c;
-
-#if IMG_TYPE != 1
-  shift1 = 16;
-#else
-  shift1 = 8;
-#endif /* IMG_TYPE != 1 */
-
-  shift2 = scalef_expon - shift1;
-
-  for (j = 0; j < KSIZE*KSIZE; j++) k[j] = kern[j] >> shift1;
-
-  GET_SRC_DST_PARAMETERS(DTYPE);
-
-  if (wid > BUFF_LINE) {
-    pbuff = mlib_malloc(sizeof(mlib_s32)*wid);
-
-    if (pbuff == NULL) return MLIB_FAILURE;
-  }
-
-  buffd = pbuff;
-
-  chan1 = nchannel;
-  chan2 = chan1 + chan1;
-  chan3 = chan2 + chan1;
-  chan4 = chan3 + chan1;
-
-  wid -= (KSIZE - 1);
-  hgt -= (KSIZE - 1);
-
-  adr_dst += ((KSIZE - 1)/2)*(dll + chan1);
-
-  for (c = 0; c < chan1; c++) {
-    if (!(cmask & (1 << (chan1 - 1 - c)))) continue;
-
-    sl = adr_src + c;
-    dl = adr_dst + c;
-
-    for (j = 0; j < hgt; j++) {
-      mlib_s32 pix0, pix1;
-      /*
-       *  First loop
-       */
-      sp0 = sl;
-      sp1 = sp0 + sll;
-      dp = dl;
-
-      k0 = k[0]; k1 = k[1]; k2 = k[2]; k3 = k[3]; k4 = k[4];
-      k5 = k[5]; k6 = k[6]; k7 = k[7]; k8 = k[8]; k9 = k[9];
-
-      p02 = sp0[0];     p12 = sp1[0];
-      p03 = sp0[chan1]; p13 = sp1[chan1];
-      p04 = sp0[chan2]; p14 = sp1[chan2];
-      p05 = sp0[chan3]; p15 = sp1[chan3];
-
-      sp0 += chan4;
-      sp1 += chan4;
-
-#ifdef __SUNPRO_C
-#pragma pipeloop(0)
-#endif /* __SUNPRO_C */
-      for (i = 0; i <= (wid - 2); i += 2) {
-        p00 = p02; p10 = p12;
-        p01 = p03; p11 = p13;
-        p02 = p04; p12 = p14;
-        p03 = p05; p13 = p15;
-
-        p04 = sp0[0];     p14 = sp1[0];
-        p05 = sp0[chan1]; p15 = sp1[chan1];
-
-        buffd[i    ] = (p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 + p04 * k4 +
-                        p10 * k5 + p11 * k6 + p12 * k7 + p13 * k8 + p14 * k9);
-        buffd[i + 1] = (p01 * k0 + p02 * k1 + p03 * k2 + p04 * k3 + p05 * k4 +
-                        p11 * k5 + p12 * k6 + p13 * k7 + p14 * k8 + p15 * k9);
-
-        sp0 += chan2;
-        sp1 += chan2;
-        dp += chan2;
-      }
-
-      if (wid & 1) {
-        p00 = p02; p10 = p12;
-        p01 = p03; p11 = p13;
-        p02 = p04; p12 = p14;
-        p03 = p05; p13 = p15;
-
-        p04 = sp0[0];     p14 = sp1[0];
-
-        buffd[i] = (p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 + p04 * k4 +
-                    p10 * k5 + p11 * k6 + p12 * k7 + p13 * k8 + p14 * k9);
-      }
-
-      /*
-       *  Second loop
-       */
-      sp0 = sl + 2*sll;
-      sp1 = sp0 + sll;
-      dp = dl;
-
-      k0 = k[10]; k1 = k[11]; k2 = k[12]; k3 = k[13]; k4 = k[14];
-      k5 = k[15]; k6 = k[16]; k7 = k[17]; k8 = k[18]; k9 = k[19];
-
-      p02 = sp0[0];     p12 = sp1[0];
-      p03 = sp0[chan1]; p13 = sp1[chan1];
-      p04 = sp0[chan2]; p14 = sp1[chan2];
-      p05 = sp0[chan3]; p15 = sp1[chan3];
-
-      sp0 += chan4;
-      sp1 += chan4;
-
-#ifdef __SUNPRO_C
-#pragma pipeloop(0)
-#endif /* __SUNPRO_C */
-      for (i = 0; i <= (wid - 2); i += 2) {
-        p00 = p02; p10 = p12;
-        p01 = p03; p11 = p13;
-        p02 = p04; p12 = p14;
-        p03 = p05; p13 = p15;
-
-        p04 = sp0[0];     p14 = sp1[0];
-        p05 = sp0[chan1]; p15 = sp1[chan1];
-
-        buffd[i    ] += (p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 + p04 * k4 +
-                         p10 * k5 + p11 * k6 + p12 * k7 + p13 * k8 + p14 * k9);
-        buffd[i + 1] += (p01 * k0 + p02 * k1 + p03 * k2 + p04 * k3 + p05 * k4 +
-                         p11 * k5 + p12 * k6 + p13 * k7 + p14 * k8 + p15 * k9);
-
-        sp0 += chan2;
-        sp1 += chan2;
-        dp += chan2;
-      }
-
-      if (wid & 1) {
-        p00 = p02; p10 = p12;
-        p01 = p03; p11 = p13;
-        p02 = p04; p12 = p14;
-        p03 = p05; p13 = p15;
-
-        p04 = sp0[0];     p14 = sp1[0];
-
-        buffd[i] += (p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 + p04 * k4 +
-                     p10 * k5 + p11 * k6 + p12 * k7 + p13 * k8 + p14 * k9);
-      }
-
-      /*
-       *  3 loop
-       */
-      dp = dl;
-      sp0 = sl + 4*sll;
-
-      k0 = k[20]; k1 = k[21]; k2 = k[22]; k3 = k[23]; k4 = k[24];
-
-      p02 = sp0[0];
-      p03 = sp0[chan1];
-      p04 = sp0[chan2];
-      p05 = sp0[chan3];
-
-      sp0 += chan2 + chan2;
-
-#ifdef __SUNPRO_C
-#pragma pipeloop(0)
-#endif /* __SUNPRO_C */
-      for (i = 0; i <= (wid - 2); i += 2) {
-        p00 = p02; p01 = p03; p02 = p04; p03 = p05;
-
-        p04 = sp0[0]; p05 = sp0[chan1];
-
-        pix0 = (buffd[i    ] + p00 * k0 + p01 * k1 + p02 * k2 +
-                p03 * k3 + p04 * k4) >> shift2;
-        pix1 = (buffd[i + 1] + p01 * k0 + p02 * k1 + p03 * k2 +
-                p04 * k3 + p05 * k4) >> shift2;
-
-        CLAMP_STORE(dp[0],     pix0);
-        CLAMP_STORE(dp[chan1], pix1);
-
-        dp  += chan2;
-        sp0 += chan2;
-      }
-
-      if (wid & 1) {
-        p00 = p02; p01 = p03; p02 = p04; p03 = p05;
-
-        p04 = sp0[0];
-
-        pix0 = (buffd[i    ] + p00 * k0 + p01 * k1 + p02 * k2 +
-                p03 * k3 + p04 * k4) >> shift2;
-        CLAMP_STORE(dp[0],     pix0);
-      }
-
-      /* next line */
-      sl += sll;
-      dl += dll;
-    }
-  }
-
-  if (pbuff != buff) mlib_free(pbuff);
-
-  return MLIB_SUCCESS;
-}
-
-#endif /* __sparc ( for x86, using integer multiplies is faster ) */
-
-/***************************************************************/
-#if IMG_TYPE == 1
-
-#undef  KSIZE
-#define KSIZE 7
-
-mlib_status CONV_FUNC(7x7)(mlib_image       *dst,
-                           const mlib_image *src,
-                           const mlib_s32   *kern,
-                           mlib_s32         scalef_expon,
-                           mlib_s32         cmask)
-{
-  FTYPE    buff[(KSIZE + 3)*BUFF_LINE], *buffs[2*(KSIZE + 1)], *buffd;
-  FTYPE    k[KSIZE*KSIZE];
-  mlib_s32 l, m, buff_ind;
-  mlib_s32 d0, d1;
-  FTYPE    k0, k1, k2, k3, k4, k5, k6;
-  FTYPE    p0, p1, p2, p3, p4, p5, p6, p7;
-  DTYPE *sl2, *sl3, *sl4, *sl5, *sl6;
-  DEF_VARS(DTYPE);
-  DTYPE *sl1;
-  mlib_s32 chan2;
-  mlib_s32 *buffo, *buffi;
-  LOAD_KERNEL(KSIZE*KSIZE);
-  GET_SRC_DST_PARAMETERS(DTYPE);
-
-  if (wid > BUFF_LINE) {
-    pbuff = mlib_malloc((KSIZE + 3)*sizeof(FTYPE)*wid);
-
-    if (pbuff == NULL) return MLIB_FAILURE;
-  }
-
-  for (l = 0; l < KSIZE + 1; l++) buffs[l] = pbuff + l*wid;
-  for (l = 0; l < KSIZE + 1; l++) buffs[l + (KSIZE + 1)] = buffs[l];
-  buffd = buffs[KSIZE] + wid;
-  buffo = (mlib_s32*)(buffd + wid);
-  buffi = buffo + (wid &~ 1);
-
-  chan1 = nchannel;
-  chan2 = chan1 + chan1;
-
-  wid -= (KSIZE - 1);
-  hgt -= (KSIZE - 1);
-
-  adr_dst += ((KSIZE - 1)/2)*(dll + chan1);
-
-  for (c = 0; c < nchannel; c++) {
-    if (!(cmask & (1 << (nchannel - 1 - c)))) continue;
-
-    sl = adr_src + c;
-    dl = adr_dst + c;
-
-    sl1 = sl  + sll;
-    sl2 = sl1 + sll;
-    sl3 = sl2 + sll;
-    sl4 = sl3 + sll;
-    sl5 = sl4 + sll;
-    sl6 = sl5 + sll;
-#ifdef __SUNPRO_C
-#pragma pipeloop(0)
-#endif /* __SUNPRO_C */
-    for (i = 0; i < wid + (KSIZE - 1); i++) {
-      buffs[0][i] = (FTYPE)sl[i*chan1];
-      buffs[1][i] = (FTYPE)sl1[i*chan1];
-      buffs[2][i] = (FTYPE)sl2[i*chan1];
-      buffs[3][i] = (FTYPE)sl3[i*chan1];
-      buffs[4][i] = (FTYPE)sl4[i*chan1];
-      buffs[5][i] = (FTYPE)sl5[i*chan1];
-      buffs[6][i] = (FTYPE)sl6[i*chan1];
-    }
-
-    buff_ind = 0;
-
-#ifdef __SUNPRO_C
-#pragma pipeloop(0)
-#endif /* __SUNPRO_C */
-    for (i = 0; i < wid; i++) buffd[i] = 0.0;
-
-    sl += KSIZE*sll;
-
-    for (j = 0; j < hgt; j++) {
-      FTYPE    **buffc = buffs + buff_ind;
-      FTYPE    *buffn = buffc[KSIZE];
-      FTYPE    *pk = k;
-
-      for (l = 0; l < KSIZE; l++) {
-        FTYPE    *buff = buffc[l];
-        d64_2x32 dd;
-
-        sp = sl;
-        dp = dl;
-
-        p2 = buff[0]; p3 = buff[1]; p4 = buff[2];
-        p5 = buff[3]; p6 = buff[4]; p7 = buff[5];
-
-        k0 = *pk++; k1 = *pk++; k2 = *pk++; k3 = *pk++;
-        k4 = *pk++; k5 = *pk++; k6 = *pk++;
-
-        if (l < (KSIZE - 1)) {
-#ifdef __SUNPRO_C
-#pragma pipeloop(0)
-#endif /* __SUNPRO_C */
-          for (i = 0; i <= (wid - 2); i += 2) {
-            p0 = p2; p1 = p3; p2 = p4; p3 = p5; p4 = p6; p5 = p7;
-
-            p6 = buff[i + 6]; p7 = buff[i + 7];
-
-            buffd[i    ] += p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + p5*k5 + p6*k6;
-            buffd[i + 1] += p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + p6*k5 + p7*k6;
-          }
-
-        } else {
-#ifdef __SUNPRO_C
-#pragma pipeloop(0)
-#endif /* __SUNPRO_C */
-          for (i = 0; i <= (wid - 2); i += 2) {
-            p0 = p2; p1 = p3; p2 = p4; p3 = p5; p4 = p6; p5 = p7;
-
-            p6 = buff[i + 6]; p7 = buff[i + 7];
-
-            LOAD_BUFF(buffi);
-
-            dd.d64 = *(FTYPE   *)(buffi + i);
-            buffn[i    ] = (FTYPE)dd.i32s.i0;
-            buffn[i + 1] = (FTYPE)dd.i32s.i1;
-
-            d0 = D2I(p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + p5*k5 + p6*k6 + buffd[i    ]);
-            d1 = D2I(p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + p6*k5 + p7*k6 + buffd[i + 1]);
-
-            dp[0    ] = FROM_S32(d0);
-            dp[chan1] = FROM_S32(d1);
-
-            buffd[i    ] = 0.0;
-            buffd[i + 1] = 0.0;
-
-            sp += chan2;
-            dp += chan2;
-          }
-        }
-      }
-
-      /* last pixels */
-      for (; i < wid; i++) {
-        FTYPE    *pk = k, s = 0;
-        mlib_s32 d0;
-
-        for (l = 0; l < KSIZE; l++) {
-          FTYPE    *buff = buffc[l] + i;
-
-          for (m = 0; m < KSIZE; m++) s += buff[m] * (*pk++);
-        }
-
-        d0 = D2I(s);
-        dp[0] = FROM_S32(d0);
-
-        buffn[i] = (FTYPE)sp[0];
-
-        sp += chan1;
-        dp += chan1;
-      }
-
-      for (l = 0; l < (KSIZE - 1); l++) buffn[wid + l] = sp[l*chan1];
-
-      /* next line */
-      sl += sll;
-      dl += dll;
-
-      buff_ind++;
-
-      if (buff_ind >= KSIZE + 1) buff_ind = 0;
-    }
-  }
-
-  if (pbuff != buff) mlib_free(pbuff);
-
-  return MLIB_SUCCESS;
-}
-
-#endif /* IMG_TYPE == 1 */
-
-/***************************************************************/
 #define MAX_KER   7
 #define MAX_N    15
 
--- a/jdk/src/java.desktop/share/native/libmlib_image/mlib_c_ImageAffineIndex_BC.c	Thu May 12 11:03:07 2016 -0700
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,413 +0,0 @@
-/*
- * Copyright (c) 1998, 2003, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.  Oracle designates this
- * particular file as subject to the "Classpath" exception as provided
- * by Oracle in the LICENSE file that accompanied this code.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- */
-
-
-#include "mlib_image.h"
-#include "mlib_ImageAffine.h"
-#include "mlib_ImageColormap.h"
-
-/***************************************************************/
-#define MLIB_LIMIT  512
-#define MLIB_SHIFT   16
-#define MLIB_PREC    (1 << MLIB_SHIFT)
-#define MLIB_MASK    (MLIB_PREC - 1)
-
-/***************************************************************/
-#define DTYPE  MLIB_TYPE
-
-/***************************************************************/
-#define DECLAREVAR_IND()                                        \
-  DECLAREVAR0();                                                \
-  mlib_s32  *warp_tbl   = param -> warp_tbl;                    \
-  mlib_s32  xSrc, ySrc;                                         \
-  mlib_s32  srcYStride = param -> srcYStride;                   \
-  mlib_s32  max_xsize  = param -> max_xsize;                    \
-  mlib_filter filter = param -> filter;                         \
-  MLIB_TYPE *sp, *dl;                                           \
-  mlib_d64  xf0, xf1, xf2, xf3;                                 \
-  mlib_d64  yf0, yf1, yf2, yf3;                                 \
-  mlib_d64  c0, c1, c2, c3, val0;                               \
-  mlib_s32  filterpos;                                          \
-  mlib_f32  *fptr;                                              \
-  mlib_d64  s0, s1, s2, s3;                                     \
-  mlib_s32  i, size
-
-/***************************************************************/
-#define GET_FILTERS_KOEF()                                         \
-  filterpos = (X >> FILTER_SHIFT) & FILTER_MASK;                   \
-  fptr = (mlib_f32 *) ((mlib_u8 *)mlib_filters_table + filterpos); \
-                                                                   \
-  xf0 = fptr[0];                                                   \
-  xf1 = fptr[1];                                                   \
-  xf2 = fptr[2];                                                   \
-  xf3 = fptr[3];                                                   \
-                                                                   \
-  filterpos = (Y >> FILTER_SHIFT) & FILTER_MASK;                   \
-  fptr = (mlib_f32 *) ((mlib_u8 *)mlib_filters_table + filterpos); \
-                                                                   \
-  yf0 = fptr[0];                                                   \
-  yf1 = fptr[1];                                                   \
-  yf2 = fptr[2];                                                   \
-  yf3 = fptr[3]
-
-/***************************************************************/
-#define GET_POINTER()                                           \
-  xSrc = (X >> MLIB_SHIFT)-1;                                   \
-  ySrc = (Y >> MLIB_SHIFT)-1;                                   \
-  sp = ((MLIB_TYPE **)lineAddr)[ySrc] + xSrc
-
-/***************************************************************/
-#define LOAD_FIRST_ROW(nchan, chan)                             \
-  s0 = *(lut + sp[0]*nchan + chan);                             \
-  s1 = *(lut + sp[1]*nchan + chan);                             \
-  s2 = *(lut + sp[2]*nchan + chan);                             \
-  s3 = *(lut + sp[3]*nchan + chan)
-
-/***************************************************************/
-#define COUNT_NEXT_ROW(dst, nchan, chan)                        \
-  sp = (MLIB_TYPE*)((mlib_addr)sp + srcYStride);                \
-  dst = ((*(lut + sp[0]*nchan + chan))*xf0 +                    \
-         (*(lut + sp[1]*nchan + chan))*xf1 +                    \
-         (*(lut + sp[2]*nchan + chan))*xf2 +                    \
-         (*(lut + sp[3]*nchan + chan))*xf3)
-
-/***************************************************************/
-#ifdef MLIB_USE_FTOI_CLAMPING
-
-/***********/
-#define STORE_SAT_VALUE_U8(ind)                                 \
-  dp[ind] = ((mlib_s32)(val0 - (mlib_d64)0x7F800000) >> 24) ^ 0x80
-
-/***********/
-#define STORE_SAT_VALUE_S16(ind)                                \
-  dp[ind] = ((mlib_s32)(val0)) >> 16
-
-#else
-
-/***********/
-#define STORE_SAT_VALUE_U8(ind)                                 \
-  val0 -= (mlib_d64)0x7F800000;                                 \
-  if (val0 >= MLIB_S32_MAX)                                     \
-    dp[ind] = MLIB_U8_MAX;                                      \
-  else if (val0 <= MLIB_S32_MIN)                                \
-    dp[ind] = MLIB_U8_MIN;                                      \
-  else                                                          \
-    dp[ind] = ((mlib_s32)val0 >> 24) ^ 0x80
-
-/***********/
-#define STORE_SAT_VALUE_S16(ind)                                \
-  if (val0 >= MLIB_S32_MAX)                                     \
-    dp[ind] = MLIB_S16_MAX;                                     \
-  else if (val0 <= MLIB_S32_MIN)                                \
-    dp[ind] = MLIB_S16_MIN;                                     \
-  else                                                          \
-    dp[ind] = (mlib_s32)val0 >> 16
-
-#endif /* MLIB_USE_FTOI_CLAMPING */
-
-/***************************************************************/
-#define MAKE_BC_3CH(lut_format)                                 \
-  X += dX;                                                      \
-  Y += dY;                                                      \
-  c0 = (s0*xf0 + s1*xf1 + s2*xf2 + s3*xf3);                     \
-  COUNT_NEXT_ROW(c1, 3, 0);                                     \
-  COUNT_NEXT_ROW(c2, 3, 0);                                     \
-  COUNT_NEXT_ROW(c3, 3, 0);                                     \
-  val0 = (c0*yf0 + c1*yf1 + c2*yf2 + c3*yf3);                   \
-  sp = (MLIB_TYPE*)((mlib_addr)sp - 3*srcYStride);              \
-  LOAD_FIRST_ROW(3, 1);                                         \
-  STORE_SAT_VALUE_##lut_format(0);                              \
-  c0 = (s0*xf0 + s1*xf1 + s2*xf2 + s3*xf3);                     \
-  COUNT_NEXT_ROW(c1, 3, 1);                                     \
-  COUNT_NEXT_ROW(c2, 3, 1);                                     \
-  COUNT_NEXT_ROW(c3, 3, 1);                                     \
-  val0 = (c0*yf0 + c1*yf1 + c2*yf2 + c3*yf3);                   \
-  sp = (MLIB_TYPE*)((mlib_addr)sp - 3*srcYStride);              \
-  LOAD_FIRST_ROW(3, 2);                                         \
-  STORE_SAT_VALUE_##lut_format(1);                              \
-  c0 = (s0*xf0 + s1*xf1 + s2*xf2 + s3*xf3);                     \
-  COUNT_NEXT_ROW(c1, 3, 2);                                     \
-  COUNT_NEXT_ROW(c2, 3, 2);                                     \
-  COUNT_NEXT_ROW(c3, 3, 2);                                     \
-  val0 = (c0*yf0 + c1*yf1 + c2*yf2 + c3*yf3);                   \
-  GET_FILTERS_KOEF();                                           \
-  GET_POINTER();                                                \
-  LOAD_FIRST_ROW(3, 0);                                         \
-  STORE_SAT_VALUE_##lut_format(2);
-
-/***************************************************************/
-#define MAKE_LAST_PIXEL_BC_3CH(lut_format)                      \
-  c0 = (s0*xf0 + s1*xf1 + s2*xf2 + s3*xf3);                     \
-  COUNT_NEXT_ROW(c1, 3, 0);                                     \
-  COUNT_NEXT_ROW(c2, 3, 0);                                     \
-  COUNT_NEXT_ROW(c3, 3, 0);                                     \
-  val0 = (c0*yf0 + c1*yf1 + c2*yf2 + c3*yf3);                   \
-  sp = (MLIB_TYPE*)((mlib_addr)sp - 3*srcYStride);              \
-  LOAD_FIRST_ROW(3, 1);                                         \
-  STORE_SAT_VALUE_##lut_format(0);                              \
-  c0 = (s0*xf0 + s1*xf1 + s2*xf2 + s3*xf3);                     \
-  COUNT_NEXT_ROW(c1, 3, 1);                                     \
-  COUNT_NEXT_ROW(c2, 3, 1);                                     \
-  COUNT_NEXT_ROW(c3, 3, 1);                                     \
-  val0 = (c0*yf0 + c1*yf1 + c2*yf2 + c3*yf3);                   \
-  sp = (MLIB_TYPE*)((mlib_addr)sp - 3*srcYStride);              \
-  LOAD_FIRST_ROW(3, 2);                                         \
-  STORE_SAT_VALUE_##lut_format(1);                              \
-  c0 = (s0*xf0 + s1*xf1 + s2*xf2 + s3*xf3);                     \
-  COUNT_NEXT_ROW(c1, 3, 2);                                     \
-  COUNT_NEXT_ROW(c2, 3, 2);                                     \
-  COUNT_NEXT_ROW(c3, 3, 2);                                     \
-  val0 = (c0*yf0 + c1*yf1 + c2*yf2 + c3*yf3);                   \
-  STORE_SAT_VALUE_##lut_format(2);
-
-/***************************************************************/
-#define MAKE_BC_4CH(lut_format)                                 \
-  X += dX;                                                      \
-  Y += dY;                                                      \
-  c0 = (s0*xf0 + s1*xf1 + s2*xf2 + s3*xf3);                     \
-  COUNT_NEXT_ROW(c1, 4, 0);                                     \
-  COUNT_NEXT_ROW(c2, 4, 0);                                     \
-  COUNT_NEXT_ROW(c3, 4, 0);                                     \
-  val0 = (c0*yf0 + c1*yf1 + c2*yf2 + c3*yf3);                   \
-  sp = (MLIB_TYPE*)((mlib_addr)sp - 3*srcYStride);              \
-  LOAD_FIRST_ROW(4, 1);                                         \
-  STORE_SAT_VALUE_##lut_format(0);                              \
-  c0 = (s0*xf0 + s1*xf1 + s2*xf2 + s3*xf3);                     \
-  COUNT_NEXT_ROW(c1, 4, 1);                                     \
-  COUNT_NEXT_ROW(c2, 4, 1);                                     \
-  COUNT_NEXT_ROW(c3, 4, 1);                                     \
-  val0 = (c0*yf0 + c1*yf1 + c2*yf2 + c3*yf3);                   \
-  sp = (MLIB_TYPE*)((mlib_addr)sp - 3*srcYStride);              \
-  LOAD_FIRST_ROW(4, 2);                                         \
-  STORE_SAT_VALUE_##lut_format(1);                              \
-  c0 = (s0*xf0 + s1*xf1 + s2*xf2 + s3*xf3);                     \
-  COUNT_NEXT_ROW(c1, 4, 2);                                     \
-  COUNT_NEXT_ROW(c2, 4, 2);                                     \
-  COUNT_NEXT_ROW(c3, 4, 2);                                     \
-  val0 = (c0*yf0 + c1*yf1 + c2*yf2 + c3*yf3);                   \
-  sp = (MLIB_TYPE*)((mlib_addr)sp - 3*srcYStride);              \
-  LOAD_FIRST_ROW(4, 3);                                         \
-  STORE_SAT_VALUE_##lut_format(2);                              \
-  c0 = (s0*xf0 + s1*xf1 + s2*xf2 + s3*xf3);                     \
-  COUNT_NEXT_ROW(c1, 4, 3);                                     \
-  COUNT_NEXT_ROW(c2, 4, 3);                                     \
-  COUNT_NEXT_ROW(c3, 4, 3);                                     \
-  val0 = (c0*yf0 + c1*yf1 + c2*yf2 + c3*yf3);                   \
-  GET_FILTERS_KOEF();                                           \
-  GET_POINTER();                                                \
-  LOAD_FIRST_ROW(4, 0);                                         \
-  STORE_SAT_VALUE_##lut_format(3);
-
-/***************************************************************/
-#define MAKE_LAST_PIXEL_BC_4CH(lut_format)                      \
-  c0 = (s0*xf0 + s1*xf1 + s2*xf2 + s3*xf3);                     \
-  COUNT_NEXT_ROW(c1, 4, 0);                                     \
-  COUNT_NEXT_ROW(c2, 4, 0);                                     \
-  COUNT_NEXT_ROW(c3, 4, 0);                                     \
-  val0 = (c0*yf0 + c1*yf1 + c2*yf2 + c3*yf3);                   \
-  sp = (MLIB_TYPE*)((mlib_addr)sp - 3*srcYStride);              \
-  LOAD_FIRST_ROW(4, 1);                                         \
-  STORE_SAT_VALUE_##lut_format(0);                              \
-  c0 = (s0*xf0 + s1*xf1 + s2*xf2 + s3*xf3);                     \
-  COUNT_NEXT_ROW(c1, 4, 1);                                     \
-  COUNT_NEXT_ROW(c2, 4, 1);                                     \
-  COUNT_NEXT_ROW(c3, 4, 1);                                     \
-  val0 = (c0*yf0 + c1*yf1 + c2*yf2 + c3*yf3);                   \
-  sp = (MLIB_TYPE*)((mlib_addr)sp - 3*srcYStride);              \
-  LOAD_FIRST_ROW(4, 2);                                         \
-  STORE_SAT_VALUE_##lut_format(1);                              \
-  c0 = (s0*xf0 + s1*xf1 + s2*xf2 + s3*xf3);                     \
-  COUNT_NEXT_ROW(c1, 4, 2);                                     \
-  COUNT_NEXT_ROW(c2, 4, 2);                                     \
-  COUNT_NEXT_ROW(c3, 4, 2);                                     \
-  val0 = (c0*yf0 + c1*yf1 + c2*yf2 + c3*yf3);                   \
-  sp = (MLIB_TYPE*)((mlib_addr)sp - 3*srcYStride);              \
-  LOAD_FIRST_ROW(4, 3);                                         \
-  STORE_SAT_VALUE_##lut_format(2);                              \
-  c0 = (s0*xf0 + s1*xf1 + s2*xf2 + s3*xf3);                     \
-  COUNT_NEXT_ROW(c1, 4, 3);                                     \
-  COUNT_NEXT_ROW(c2, 4, 3);                                     \
-  COUNT_NEXT_ROW(c3, 4, 3);                                     \
-  val0 = (c0*yf0 + c1*yf1 + c2*yf2 + c3*yf3);                   \
-  STORE_SAT_VALUE_##lut_format(3);
-
-/***************************************************************/
-#define FILTER_U8  ((filter == MLIB_BICUBIC) ? mlib_filters_u8f_bc  : mlib_filters_u8f_bc2)
-#define FILTER_S16 ((filter == MLIB_BICUBIC) ? mlib_filters_s16f_bc : mlib_filters_s16f_bc2)
-
-/***************************************************************/
-#define mlib_U8  mlib_u8
-#define mlib_S16 mlib_s16
-
-/***************************************************************/
-#define FUNC_AFFINEINDEX_BC_0(ITYPE, LTYPE, NCHAN)                                                  \
-  mlib_status mlib_ImageAffineIndex_##ITYPE##_##LTYPE##_##NCHAN##CH_BC(mlib_affine_param *param,    \
-                                                                       const void        *colormap) \
-  {                                                                                                 \
-    DECLAREVAR_IND();                                                                               \
-    mlib_##LTYPE buff_lcl[NCHAN * MLIB_LIMIT], *pbuff = buff_lcl, *dp;                              \
-    mlib_d64  *lut = ((mlib_d64*)mlib_ImageGetLutDoubleData(colormap) -                             \
-                      NCHAN * mlib_ImageGetLutOffset(colormap));                                    \
-    const mlib_f32 *mlib_filters_table = FILTER_##LTYPE;                                            \
-                                                                                                    \
-    if (max_xsize > MLIB_LIMIT) {                                                                   \
-      pbuff = mlib_malloc(NCHAN * sizeof(mlib_##LTYPE) * max_xsize);                                \
-      if (pbuff == NULL) return MLIB_FAILURE;                                                       \
-    }                                                                                               \
-                                                                                                    \
-    for (j = yStart; j <= yFinish; j++) {                                                           \
-                                                                                                    \
-      NEW_LINE(1);                                                                                  \
-      dp = pbuff;                                                                                   \
-                                                                                                    \
-      GET_FILTERS_KOEF();                                                                           \
-      GET_POINTER();                                                                                \
-      LOAD_FIRST_ROW(NCHAN, 0);
-
-    /* pragma pipeloop(0) must be here */
-
-/***************************************************************/
-#define FUNC_AFFINEINDEX_BC_1(ITYPE, LTYPE, NCHAN)                         \
-                                                                           \
-      for (i = 0; i < (xRight - xLeft); i++, dp += NCHAN) {                \
-        MAKE_BC_##NCHAN##CH(LTYPE);                                        \
-      }                                                                    \
-                                                                           \
-      MAKE_LAST_PIXEL_BC_##NCHAN##CH(LTYPE);                               \
-                                                                           \
-      mlib_ImageColorTrue2IndexLine_##LTYPE##_##ITYPE##_##NCHAN            \
-                                (pbuff, dl, xRight - xLeft + 1, colormap); \
-    }                                                                      \
-                                                                           \
-    if (pbuff != buff_lcl) mlib_free(pbuff);                               \
-                                                                           \
-    return MLIB_SUCCESS;                                                   \
-  }
-
-/***************************************************************/
-#undef MLIB_TYPE
-#define MLIB_TYPE mlib_u8
-
-/***************************************************************/
-#undef  FILTER_SHIFT
-#define FILTER_SHIFT 4
-#undef  FILTER_MASK
-#define FILTER_MASK  (((1 << 8) - 1) << 4)
-
-FUNC_AFFINEINDEX_BC_0(U8, U8, 3)
-#ifdef __SUNPRO_C
-#pragma pipeloop(0)
-#endif /* __SUNPRO_C */
-FUNC_AFFINEINDEX_BC_1(U8, U8, 3)
-
-/***************************************************************/
-#undef  FILTER_SHIFT
-#define FILTER_SHIFT 3
-#undef  FILTER_MASK
-#define FILTER_MASK  (((1 << 9) - 1) << 4)
-
-FUNC_AFFINEINDEX_BC_0(U8, S16, 3)
-#ifdef __SUNPRO_C
-#pragma pipeloop(0)
-#endif /* __SUNPRO_C */
-FUNC_AFFINEINDEX_BC_1(U8, S16, 3)
-
-/***************************************************************/
-#undef  FILTER_SHIFT
-#define FILTER_SHIFT 4
-#undef  FILTER_MASK
-#define FILTER_MASK  (((1 << 8) - 1) << 4)
-
-FUNC_AFFINEINDEX_BC_0(U8, U8, 4)
-#ifdef __SUNPRO_C
-#pragma pipeloop(0)
-#endif /* __SUNPRO_C */
-FUNC_AFFINEINDEX_BC_1(U8, U8, 4)
-
-/***************************************************************/
-#undef  FILTER_SHIFT
-#define FILTER_SHIFT 3
-#undef  FILTER_MASK
-#define FILTER_MASK  (((1 << 9) - 1) << 4)
-
-FUNC_AFFINEINDEX_BC_0(U8, S16, 4)
-#ifdef __SUNPRO_C
-#pragma pipeloop(0)
-#endif /* __SUNPRO_C */
-FUNC_AFFINEINDEX_BC_1(U8, S16, 4)
-
-/***************************************************************/
-#undef  MLIB_TYPE
-#define MLIB_TYPE mlib_s16
-
-/***************************************************************/
-#undef  FILTER_SHIFT
-#define FILTER_SHIFT 4
-#undef  FILTER_MASK
-#define FILTER_MASK  (((1 << 8) - 1) << 4)
-
-FUNC_AFFINEINDEX_BC_0(S16, U8, 3)
-#ifdef __SUNPRO_C
-#pragma pipeloop(0)
-#endif /* __SUNPRO_C */
-FUNC_AFFINEINDEX_BC_1(S16, U8, 3)
-
-/***************************************************************/
-#undef  FILTER_SHIFT
-#define FILTER_SHIFT 3
-#undef  FILTER_MASK
-#define FILTER_MASK  (((1 << 9) - 1) << 4)
-
-FUNC_AFFINEINDEX_BC_0(S16, S16, 3)
-#ifdef __SUNPRO_C
-#pragma pipeloop(0)
-#endif /* __SUNPRO_C */
-FUNC_AFFINEINDEX_BC_1(S16, S16, 3)
-
-/***************************************************************/
-#undef  FILTER_SHIFT
-#define FILTER_SHIFT 4
-#undef  FILTER_MASK
-#define FILTER_MASK  (((1 << 8) - 1) << 4)
-
-FUNC_AFFINEINDEX_BC_0(S16, U8, 4)
-#ifdef __SUNPRO_C
-#pragma pipeloop(0)
-#endif /* __SUNPRO_C */
-FUNC_AFFINEINDEX_BC_1(S16, U8, 4)
-
-/***************************************************************/
-#undef  FILTER_SHIFT
-#define FILTER_SHIFT 3
-#undef  FILTER_MASK
-#define FILTER_MASK  (((1 << 9) - 1) << 4)
-
-FUNC_AFFINEINDEX_BC_0(S16, S16, 4)
-#ifdef __SUNPRO_C
-#pragma pipeloop(0)
-#endif /* __SUNPRO_C */
-FUNC_AFFINEINDEX_BC_1(S16, S16, 4)
-
-/***************************************************************/
--- a/jdk/src/java.desktop/share/native/libmlib_image/mlib_c_ImageAffineIndex_BL.c	Thu May 12 11:03:07 2016 -0700
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,280 +0,0 @@
-/*
- * Copyright (c) 1998, 2003, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.  Oracle designates this
- * particular file as subject to the "Classpath" exception as provided
- * by Oracle in the LICENSE file that accompanied this code.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- */
-
-
-#include "mlib_image.h"
-#include "mlib_ImageAffine.h"
-#include "mlib_ImageColormap.h"
-
-/***************************************************************/
-#define MLIB_LIMIT  512
-
-/***************************************************************/
-#define DTYPE  MLIB_TYPE
-
-/***************************************************************/
-#define DECLAREVAR_IND()                                        \
-  DECLAREVAR0();                                                \
-  mlib_s32  *warp_tbl   = param -> warp_tbl;                    \
-  mlib_s32  xSrc, ySrc;                                         \
-  mlib_s32  srcYStride = param -> srcYStride;                   \
-  mlib_s32  max_xsize  = param -> max_xsize;                    \
-  MLIB_TYPE *sp0, *sp1;                                         \
-  MLIB_TYPE *dl;                                                \
-  mlib_d64  scale = 1.0 / 65536.0;                              \
-  mlib_s32  i, size
-
-/***************************************************************/
-#define DECLARE_INTERNAL_VAR_3CH()                              \
-  mlib_d64  fdx, fdy;                                           \
-  mlib_d64  a00_0, a01_0, a10_0, a11_0;                         \
-  mlib_d64  a00_1, a01_1, a10_1, a11_1;                         \
-  mlib_d64  a00_2, a01_2, a10_2, a11_2;                         \
-  mlib_d64  pix0_0, pix1_0, res0;                               \
-  mlib_d64  pix0_1, pix1_1, res1;                               \
-  mlib_d64  pix0_2, pix1_2, res2
-
-/***************************************************************/
-#define DECLARE_INTERNAL_VAR_4CH()                              \
-  mlib_d64  fdx, fdy;                                           \
-  mlib_d64  a00_0, a01_0, a10_0, a11_0;                         \
-  mlib_d64  a00_1, a01_1, a10_1, a11_1;                         \
-  mlib_d64  a00_2, a01_2, a10_2, a11_2;                         \
-  mlib_d64  a00_3, a01_3, a10_3, a11_3;                         \
-  mlib_d64  pix0_0, pix1_0, res0;                               \
-  mlib_d64  pix0_1, pix1_1, res1;                               \
-  mlib_d64  pix0_2, pix1_2, res2;                               \
-  mlib_d64  pix0_3, pix1_3, res3
-
-/***************************************************************/
-#define GET_PIXELS_POINTERS()                                   \
-  fdx = (X & MLIB_MASK) * scale;                                \
-  fdy = (Y & MLIB_MASK) * scale;                                \
-  ySrc = MLIB_POINTER_SHIFT(Y);  Y += dY;                       \
-  xSrc = X >> MLIB_SHIFT;  X += dX;                             \
-  sp0 = MLIB_POINTER_GET(lineAddr, ySrc) + xSrc;                \
-  sp1 = (MLIB_TYPE *)((mlib_u8 *)sp0 + srcYStride)
-
-/***************************************************************/
-#define GET_COLOR_POINTERS(ind)                                 \
-  pcolor00 = (lut + sp0[0]*ind);                                \
-  pcolor10 = (lut + sp1[0]*ind);                                \
-  pcolor01 = (lut + sp0[1]*ind);                                \
-  pcolor11 = (lut + sp1[1]*ind)
-
-/***************************************************************/
-#define COUNT_BL_U8(ind)                                        \
-  pix0_##ind = a00_##ind + fdy * (a10_##ind - a00_##ind);       \
-  pix1_##ind = a01_##ind + fdy * (a11_##ind - a01_##ind);       \
-  res##ind = pix0_##ind + fdx * (pix1_##ind - pix0_##ind) + 0.5
-
-/***************************************************************/
-#define COUNT_BL_U8_3CH()                                       \
-  COUNT_BL_U8(0);                                               \
-  COUNT_BL_U8(1);                                               \
-  COUNT_BL_U8(2);
-
-/***************************************************************/
-#define COUNT_BL_U8_4CH()                                       \
-  COUNT_BL_U8_3CH();                                            \
-  COUNT_BL_U8(3);
-
-/***************************************************************/
-#define COUNT_BL_S16(ind)                                       \
-  pix0_##ind = a00_##ind + fdy * (a10_##ind - a00_##ind);       \
-  pix1_##ind = a01_##ind + fdy * (a11_##ind - a01_##ind);       \
-  res##ind = pix0_##ind + fdx * (pix1_##ind - pix0_##ind)
-
-/***************************************************************/
-#define COUNT_BL_S16_3CH()                                      \
-  COUNT_BL_S16(0);                                              \
-  COUNT_BL_S16(1);                                              \
-  COUNT_BL_S16(2);
-
-/***************************************************************/
-#define COUNT_BL_S16_4CH()                                      \
-  COUNT_BL_S16_3CH();                                           \
-  COUNT_BL_S16(3);
-
-/***************************************************************/
-#define LOAD(ind)                                               \
-  a00_##ind = pcolor00[ind];                                    \
-  a01_##ind = pcolor01[ind];                                    \
-  a10_##ind = pcolor10[ind];                                    \
-  a11_##ind = pcolor11[ind]
-
-/***************************************************************/
-#define LOAD_3CH()                                              \
-  LOAD(0);                                                      \
-  LOAD(1);                                                      \
-  LOAD(2);
-
-/***************************************************************/
-#define LOAD_4CH()                                              \
-  LOAD_3CH();                                                   \
-  LOAD(3);
-
-/***************************************************************/
-#define STORE_INTO_INTERM_BUF_3CH(LTYPE)                        \
-  dp[0] = (mlib_##LTYPE)res0;                                   \
-  dp[1] = (mlib_##LTYPE)res1;                                   \
-  dp[2] = (mlib_##LTYPE)res2
-
-/***************************************************************/
-#define STORE_INTO_INTERM_BUF_4CH(LTYPE)                        \
-  dp[0] = (mlib_##LTYPE)res0;                                   \
-  dp[1] = (mlib_##LTYPE)res1;                                   \
-  dp[2] = (mlib_##LTYPE)res2;                                   \
-  dp[3] = (mlib_##LTYPE)res3
-
-/***************************************************************/
-#undef  MLIB_TYPE
-#define MLIB_TYPE mlib_u8
-
-/***************************************************************/
-#define mlib_U8  mlib_u8
-#define mlib_S16 mlib_s16
-
-/***************************************************************/
-#define FUNC_AFFINEINDEX_BL_0(ITYPE, LTYPE, NCHAN)                                               \
-  mlib_status mlib_ImageAffineIndex_##ITYPE##_##LTYPE##_##NCHAN##CH_BL(mlib_affine_param *param, \
-                                                                       const void        *colormap) \
-  {                                                                                              \
-    DECLAREVAR_IND();                                                                            \
-    mlib_##LTYPE  *dp, buff_lcl[NCHAN*MLIB_LIMIT], *pbuff = buff_lcl;                            \
-    mlib_d64 *pcolor00, *pcolor10, *pcolor01, *pcolor11;                                         \
-    mlib_d64 *lut = mlib_ImageGetLutDoubleData(colormap);                                        \
-                                                                                                 \
-    lut -= NCHAN*mlib_ImageGetLutOffset(colormap);                                               \
-                                                                                                 \
-    if (max_xsize > MLIB_LIMIT) {                                                                \
-      pbuff = mlib_malloc(NCHAN * sizeof(mlib_##LTYPE) * max_xsize);                             \
-      if (pbuff == NULL) return MLIB_FAILURE;                                                    \
-    }                                                                                            \
-                                                                                                 \
-    for (j = yStart; j <= yFinish; j++) {                                                        \
-      DECLARE_INTERNAL_VAR_##NCHAN##CH();                                                        \
-                                                                                                 \
-      NEW_LINE(1);                                                                               \
-      dp = pbuff;                                                                                \
-                                                                                                 \
-      GET_PIXELS_POINTERS();                                                                     \
-      GET_COLOR_POINTERS(NCHAN);                                                                 \
-      LOAD_##NCHAN##CH();
-
-    /* pragma pipeloop(0) must be here */
-
-/***************************************************************/
-#define FUNC_AFFINEINDEX_BL_1(ITYPE, LTYPE, NCHAN)                   \
-      for (i = 0; i < (xRight - xLeft); i++, dp += NCHAN) {          \
-        COUNT_BL_##LTYPE##_##NCHAN##CH();                            \
-                                                                     \
-        GET_PIXELS_POINTERS();                                       \
-        GET_COLOR_POINTERS(NCHAN);                                   \
-        LOAD_##NCHAN##CH();                                          \
-                                                                     \
-        STORE_INTO_INTERM_BUF_##NCHAN##CH(LTYPE);                    \
-      }                                                              \
-                                                                     \
-      COUNT_BL_##LTYPE##_##NCHAN##CH();                              \
-      STORE_INTO_INTERM_BUF_##NCHAN##CH(LTYPE);                      \
-                                                                     \
-      mlib_ImageColorTrue2IndexLine_##LTYPE##_##ITYPE##_##NCHAN      \
-                          (pbuff, dl, xRight - xLeft + 1, colormap); \
-    }                                                                \
-                                                                     \
-    if (pbuff != buff_lcl) mlib_free(pbuff);                         \
-                                                                     \
-    return MLIB_SUCCESS;                                             \
-  }
-
-/***************************************************************/
-#undef  MLIB_TYPE
-#define MLIB_TYPE mlib_u8
-
-FUNC_AFFINEINDEX_BL_0(U8, U8, 3)
-#ifdef __SUNPRO_C
-#pragma pipeloop(0)
-#endif /* __SUNPRO_C */
-FUNC_AFFINEINDEX_BL_1(U8, U8, 3)
-
-FUNC_AFFINEINDEX_BL_0(U8, S16, 3)
-#ifdef __SUNPRO_C
-#pragma pipeloop(0)
-#endif /* __SUNPRO_C */
-FUNC_AFFINEINDEX_BL_1(U8, S16, 3)
-
-FUNC_AFFINEINDEX_BL_0(U8, U8, 4)
-#ifdef __SUNPRO_C
-#pragma pipeloop(0)
-#endif /* __SUNPRO_C */
-FUNC_AFFINEINDEX_BL_1(U8, U8, 4)
-
-FUNC_AFFINEINDEX_BL_0(U8, S16, 4)
-#ifdef __SUNPRO_C
-#pragma pipeloop(0)
-#endif /* __SUNPRO_C */
-FUNC_AFFINEINDEX_BL_1(U8, S16, 4)
-
-/***************************************************************/
-#undef  MLIB_TYPE
-#define MLIB_TYPE mlib_s16
-
-FUNC_AFFINEINDEX_BL_0(S16, U8, 3)
-#ifdef __SUNPRO_C
-#pragma pipeloop(0)
-#endif /* __SUNPRO_C */
-FUNC_AFFINEINDEX_BL_1(S16, U8, 3)
-
-FUNC_AFFINEINDEX_BL_0(S16, S16, 3)
-#ifdef __SUNPRO_C
-#pragma pipeloop(0)
-#endif /* __SUNPRO_C */
-FUNC_AFFINEINDEX_BL_1(S16, S16, 3)
-
-FUNC_AFFINEINDEX_BL_0(S16, U8, 4)
-#ifdef __SUNPRO_C
-#pragma pipeloop(0)
-#endif /* __SUNPRO_C */
-FUNC_AFFINEINDEX_BL_1(S16, U8, 4)
-
-FUNC_AFFINEINDEX_BL_0(S16, S16, 4)
-#ifdef __SUNPRO_C
-#pragma pipeloop(0)
-#endif /* __SUNPRO_C */
-FUNC_AFFINEINDEX_BL_1(S16, S16, 4)
-
-/***************************************************************/
-const type_affine_i_fun mlib_AffineFunArr_bl_i[] = {
-  mlib_ImageAffineIndex_U8_U8_3CH_BL,
-  mlib_ImageAffineIndex_U8_U8_4CH_BL,
-  mlib_ImageAffineIndex_S16_U8_3CH_BL,
-  mlib_ImageAffineIndex_S16_U8_4CH_BL,
-  mlib_ImageAffineIndex_U8_S16_3CH_BL,
-  mlib_ImageAffineIndex_U8_S16_4CH_BL,
-  mlib_ImageAffineIndex_S16_S16_3CH_BL,
-  mlib_ImageAffineIndex_S16_S16_4CH_BL
-};
-/***************************************************************/
--- a/jdk/src/java.desktop/share/native/libmlib_image/mlib_c_ImageBlendTable.c	Thu May 12 11:03:07 2016 -0700
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,263 +0,0 @@
-/*
- * Copyright (c) 1998, 2003, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.  Oracle designates this
- * particular file as subject to the "Classpath" exception as provided
- * by Oracle in the LICENSE file that accompanied this code.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- */
-
-#include <mlib_image.h>
-#include "mlib_c_ImageBlendTable.h"
-
-const mlib_f32 mlib_c_blend_u8[] = {
-    0.0f,   1.0f,   2.0f,   3.0f,   4.0f,   5.0f,   6.0f,   7.0f,
-    8.0f,   9.0f,  10.0f,  11.0f,  12.0f,  13.0f,  14.0f,  15.0f,
-   16.0f,  17.0f,  18.0f,  19.0f,  20.0f,  21.0f,  22.0f,  23.0f,
-   24.0f,  25.0f,  26.0f,  27.0f,  28.0f,  29.0f,  30.0f,  31.0f,
-   32.0f,  33.0f,  34.0f,  35.0f,  36.0f,  37.0f,  38.0f,  39.0f,
-   40.0f,  41.0f,  42.0f,  43.0f,  44.0f,  45.0f,  46.0f,  47.0f,
-   48.0f,  49.0f,  50.0f,  51.0f,  52.0f,  53.0f,  54.0f,  55.0f,
-   56.0f,  57.0f,  58.0f,  59.0f,  60.0f,  61.0f,  62.0f,  63.0f,
-   64.0f,  65.0f,  66.0f,  67.0f,  68.0f,  69.0f,  70.0f,  71.0f,
-   72.0f,  73.0f,  74.0f,  75.0f,  76.0f,  77.0f,  78.0f,  79.0f,
-   80.0f,  81.0f,  82.0f,  83.0f,  84.0f,  85.0f,  86.0f,  87.0f,
-   88.0f,  89.0f,  90.0f,  91.0f,  92.0f,  93.0f,  94.0f,  95.0f,
-   96.0f,  97.0f,  98.0f,  99.0f, 100.0f, 101.0f, 102.0f, 103.0f,
-  104.0f, 105.0f, 106.0f, 107.0f, 108.0f, 109.0f, 110.0f, 111.0f,
-  112.0f, 113.0f, 114.0f, 115.0f, 116.0f, 117.0f, 118.0f, 119.0f,
-  120.0f, 121.0f, 122.0f, 123.0f, 124.0f, 125.0f, 126.0f, 127.0f,
-  128.0f, 129.0f, 130.0f, 131.0f, 132.0f, 133.0f, 134.0f, 135.0f,
-  136.0f, 137.0f, 138.0f, 139.0f, 140.0f, 141.0f, 142.0f, 143.0f,
-  144.0f, 145.0f, 146.0f, 147.0f, 148.0f, 149.0f, 150.0f, 151.0f,
-  152.0f, 153.0f, 154.0f, 155.0f, 156.0f, 157.0f, 158.0f, 159.0f,
-  160.0f, 161.0f, 162.0f, 163.0f, 164.0f, 165.0f, 166.0f, 167.0f,
-  168.0f, 169.0f, 170.0f, 171.0f, 172.0f, 173.0f, 174.0f, 175.0f,
-  176.0f, 177.0f, 178.0f, 179.0f, 180.0f, 181.0f, 182.0f, 183.0f,
-  184.0f, 185.0f, 186.0f, 187.0f, 188.0f, 189.0f, 190.0f, 191.0f,
-  192.0f, 193.0f, 194.0f, 195.0f, 196.0f, 197.0f, 198.0f, 199.0f,
-  200.0f, 201.0f, 202.0f, 203.0f, 204.0f, 205.0f, 206.0f, 207.0f,
-  208.0f, 209.0f, 210.0f, 211.0f, 212.0f, 213.0f, 214.0f, 215.0f,
-  216.0f, 217.0f, 218.0f, 219.0f, 220.0f, 221.0f, 222.0f, 223.0f,
-  224.0f, 225.0f, 226.0f, 227.0f, 228.0f, 229.0f, 230.0f, 231.0f,
-  232.0f, 233.0f, 234.0f, 235.0f, 236.0f, 237.0f, 238.0f, 239.0f,
-  240.0f, 241.0f, 242.0f, 243.0f, 244.0f, 245.0f, 246.0f, 247.0f,
-  248.0f, 249.0f, 250.0f, 251.0f, 252.0f, 253.0f, 254.0f, 255.0f,
-  256.0f
-};
-
-const mlib_f32 mlib_c_blend_Q8[] = {
-  0.00000000f, 0.00390625f, 0.00781250f, 0.01171875f,
-  0.01562500f, 0.01953125f, 0.02343750f, 0.02734375f,
-  0.03125000f, 0.03515625f, 0.03906250f, 0.04296875f,
-  0.04687500f, 0.05078125f, 0.05468750f, 0.05859375f,
-  0.06250000f, 0.06640625f, 0.07031250f, 0.07421875f,
-  0.07812500f, 0.08203125f, 0.08593750f, 0.08984375f,
-  0.09375000f, 0.09765625f, 0.10156250f, 0.10546875f,
-  0.10937500f, 0.11328125f, 0.11718750f, 0.12109375f,
-  0.12500000f, 0.12890625f, 0.13281250f, 0.13671875f,
-  0.14062500f, 0.14453125f, 0.14843750f, 0.15234375f,
-  0.15625000f, 0.16015625f, 0.16406250f, 0.16796875f,
-  0.17187500f, 0.17578125f, 0.17968750f, 0.18359375f,
-  0.18750000f, 0.19140625f, 0.19531250f, 0.19921875f,
-  0.20312500f, 0.20703125f, 0.21093750f, 0.21484375f,
-  0.21875000f, 0.22265625f, 0.22656250f, 0.23046875f,
-  0.23437500f, 0.23828125f, 0.24218750f, 0.24609375f,
-  0.25000000f, 0.25390625f, 0.25781250f, 0.26171875f,
-  0.26562500f, 0.26953125f, 0.27343750f, 0.27734375f,
-  0.28125000f, 0.28515625f, 0.28906250f, 0.29296875f,
-  0.29687500f, 0.30078125f, 0.30468750f, 0.30859375f,
-  0.31250000f, 0.31640625f, 0.32031250f, 0.32421875f,
-  0.32812500f, 0.33203125f, 0.33593750f, 0.33984375f,
-  0.34375000f, 0.34765625f, 0.35156250f, 0.35546875f,
-  0.35937500f, 0.36328125f, 0.36718750f, 0.37109375f,
-  0.37500000f, 0.37890625f, 0.38281250f, 0.38671875f,
-  0.39062500f, 0.39453125f, 0.39843750f, 0.40234375f,
-  0.40625000f, 0.41015625f, 0.41406250f, 0.41796875f,
-  0.42187500f, 0.42578125f, 0.42968750f, 0.43359375f,
-  0.43750000f, 0.44140625f, 0.44531250f, 0.44921875f,
-  0.45312500f, 0.45703125f, 0.46093750f, 0.46484375f,
-  0.46875000f, 0.47265625f, 0.47656250f, 0.48046875f,
-  0.48437500f, 0.48828125f, 0.49218750f, 0.49609375f,
-  0.50000000f, 0.50390625f, 0.50781250f, 0.51171875f,
-  0.51562500f, 0.51953125f, 0.52343750f, 0.52734375f,
-  0.53125000f, 0.53515625f, 0.53906250f, 0.54296875f,
-  0.54687500f, 0.55078125f, 0.55468750f, 0.55859375f,
-  0.56250000f, 0.56640625f, 0.57031250f, 0.57421875f,
-  0.57812500f, 0.58203125f, 0.58593750f, 0.58984375f,
-  0.59375000f, 0.59765625f, 0.60156250f, 0.60546875f,
-  0.60937500f, 0.61328125f, 0.61718750f, 0.62109375f,
-  0.62500000f, 0.62890625f, 0.63281250f, 0.63671875f,
-  0.64062500f, 0.64453125f, 0.64843750f, 0.65234375f,
-  0.65625000f, 0.66015625f, 0.66406250f, 0.66796875f,
-  0.67187500f, 0.67578125f, 0.67968750f, 0.68359375f,
-  0.68750000f, 0.69140625f, 0.69531250f, 0.69921875f,
-  0.70312500f, 0.70703125f, 0.71093750f, 0.71484375f,
-  0.71875000f, 0.72265625f, 0.72656250f, 0.73046875f,
-  0.73437500f, 0.73828125f, 0.74218750f, 0.74609375f,
-  0.75000000f, 0.75390625f, 0.75781250f, 0.76171875f,
-  0.76562500f, 0.76953125f, 0.77343750f, 0.77734375f,
-  0.78125000f, 0.78515625f, 0.78906250f, 0.79296875f,
-  0.79687500f, 0.80078125f, 0.80468750f, 0.80859375f,
-  0.81250000f, 0.81640625f, 0.82031250f, 0.82421875f,
-  0.82812500f, 0.83203125f, 0.83593750f, 0.83984375f,
-  0.84375000f, 0.84765625f, 0.85156250f, 0.85546875f,
-  0.85937500f, 0.86328125f, 0.86718750f, 0.87109375f,
-  0.87500000f, 0.87890625f, 0.88281250f, 0.88671875f,
-  0.89062500f, 0.89453125f, 0.89843750f, 0.90234375f,
-  0.90625000f, 0.91015625f, 0.91406250f, 0.91796875f,
-  0.92187500f, 0.92578125f, 0.92968750f, 0.93359375f,
-  0.93750000f, 0.94140625f, 0.94531250f, 0.94921875f,
-  0.95312500f, 0.95703125f, 0.96093750f, 0.96484375f,
-  0.96875000f, 0.97265625f, 0.97656250f, 0.98046875f,
-  0.98437500f, 0.98828125f, 0.99218750f, 0.99609375f,
-  1.00000000f, 1.00390625f, 1.00781250f, 1.01171875f,
-  1.01562500f, 1.01953125f, 1.02343750f, 1.02734375f,
-  1.03125000f, 1.03515625f, 1.03906250f, 1.04296875f,
-  1.04687500f, 1.05078125f, 1.05468750f, 1.05859375f,
-  1.06250000f, 1.06640625f, 1.07031250f, 1.07421875f,
-  1.07812500f, 1.08203125f, 1.08593750f, 1.08984375f,
-  1.09375000f, 1.09765625f, 1.10156250f, 1.10546875f,
-  1.10937500f, 1.11328125f, 1.11718750f, 1.12109375f,
-  1.12500000f, 1.12890625f, 1.13281250f, 1.13671875f,
-  1.14062500f, 1.14453125f, 1.14843750f, 1.15234375f,
-  1.15625000f, 1.16015625f, 1.16406250f, 1.16796875f,
-  1.17187500f, 1.17578125f, 1.17968750f, 1.18359375f,
-  1.18750000f, 1.19140625f, 1.19531250f, 1.19921875f,
-  1.20312500f, 1.20703125f, 1.21093750f, 1.21484375f,
-  1.21875000f, 1.22265625f, 1.22656250f, 1.23046875f,
-  1.23437500f, 1.23828125f, 1.24218750f, 1.24609375f,
-  1.25000000f, 1.25390625f, 1.25781250f, 1.26171875f,
-  1.26562500f, 1.26953125f, 1.27343750f, 1.27734375f,
-  1.28125000f, 1.28515625f, 1.28906250f, 1.29296875f,
-  1.29687500f, 1.30078125f, 1.30468750f, 1.30859375f,
-  1.31250000f, 1.31640625f, 1.32031250f, 1.32421875f,
-  1.32812500f, 1.33203125f, 1.33593750f, 1.33984375f,
-  1.34375000f, 1.34765625f, 1.35156250f, 1.35546875f,
-  1.35937500f, 1.36328125f, 1.36718750f, 1.37109375f,
-  1.37500000f, 1.37890625f, 1.38281250f, 1.38671875f,
-  1.39062500f, 1.39453125f, 1.39843750f, 1.40234375f,
-  1.40625000f, 1.41015625f, 1.41406250f, 1.41796875f,
-  1.42187500f, 1.42578125f, 1.42968750f, 1.43359375f,
-  1.43750000f, 1.44140625f, 1.44531250f, 1.44921875f,
-  1.45312500f, 1.45703125f, 1.46093750f, 1.46484375f,
-  1.46875000f, 1.47265625f, 1.47656250f, 1.48046875f,
-  1.48437500f, 1.48828125f, 1.49218750f, 1.49609375f,
-  1.50000000f, 1.50390625f, 1.50781250f, 1.51171875f,
-  1.51562500f, 1.51953125f, 1.52343750f, 1.52734375f,
-  1.53125000f, 1.53515625f, 1.53906250f, 1.54296875f,
-  1.54687500f, 1.55078125f, 1.55468750f, 1.55859375f,
-  1.56250000f, 1.56640625f, 1.57031250f, 1.57421875f,
-  1.57812500f, 1.58203125f, 1.58593750f, 1.58984375f,
-  1.59375000f, 1.59765625f, 1.60156250f, 1.60546875f,
-  1.60937500f, 1.61328125f, 1.61718750f, 1.62109375f,
-  1.62500000f, 1.62890625f, 1.63281250f, 1.63671875f,
-  1.64062500f, 1.64453125f, 1.64843750f, 1.65234375f,
-  1.65625000f, 1.66015625f, 1.66406250f, 1.66796875f,
-  1.67187500f, 1.67578125f, 1.67968750f, 1.68359375f,
-  1.68750000f, 1.69140625f, 1.69531250f, 1.69921875f,
-  1.70312500f, 1.70703125f, 1.71093750f, 1.71484375f,
-  1.71875000f, 1.72265625f, 1.72656250f, 1.73046875f,
-  1.73437500f, 1.73828125f, 1.74218750f, 1.74609375f,
-  1.75000000f, 1.75390625f, 1.75781250f, 1.76171875f,
-  1.76562500f, 1.76953125f, 1.77343750f, 1.77734375f,
-  1.78125000f, 1.78515625f, 1.78906250f, 1.79296875f,
-  1.79687500f, 1.80078125f, 1.80468750f, 1.80859375f,
-  1.81250000f, 1.81640625f, 1.82031250f, 1.82421875f,
-  1.82812500f, 1.83203125f, 1.83593750f, 1.83984375f,
-  1.84375000f, 1.84765625f, 1.85156250f, 1.85546875f,
-  1.85937500f, 1.86328125f, 1.86718750f, 1.87109375f,
-  1.87500000f, 1.87890625f, 1.88281250f, 1.88671875f,
-  1.89062500f, 1.89453125f, 1.89843750f, 1.90234375f,
-  1.90625000f, 1.91015625f, 1.91406250f, 1.91796875f,
-  1.92187500f, 1.92578125f, 1.92968750f, 1.93359375f,
-  1.93750000f, 1.94140625f, 1.94531250f, 1.94921875f,
-  1.95312500f, 1.95703125f, 1.96093750f, 1.96484375f,
-  1.96875000f, 1.97265625f, 1.97656250f, 1.98046875f,
-  1.98437500f, 1.98828125f, 1.99218750f, 1.99609375f,
-  2.00000000f
-};
-
-const mlib_f32 mlib_c_blend_u8_sat[] = {
-           0.0f,    8388608.0f,   16777216.0f,   25165824.0f,
-    33554432.0f,   41943040.0f,   50331648.0f,   58720256.0f,
-    67108864.0f,   75497472.0f,   83886080.0f,   92274688.0f,
-   100663296.0f,  109051904.0f,  117440512.0f,  125829120.0f,
-   134217728.0f,  142606336.0f,  150994944.0f,  159383552.0f,
-   167772160.0f,  176160768.0f,  184549376.0f,  192937984.0f,
-   201326592.0f,  209715200.0f,  218103808.0f,  226492416.0f,
-   234881024.0f,  243269632.0f,  251658240.0f,  260046848.0f,
-   268435456.0f,  276824064.0f,  285212672.0f,  293601280.0f,
-   301989888.0f,  310378496.0f,  318767104.0f,  327155712.0f,
-   335544320.0f,  343932928.0f,  352321536.0f,  360710144.0f,
-   369098752.0f,  377487360.0f,  385875968.0f,  394264576.0f,
-   402653184.0f,  411041792.0f,  419430400.0f,  427819008.0f,
-   436207616.0f,  444596224.0f,  452984832.0f,  461373440.0f,
-   469762048.0f,  478150656.0f,  486539264.0f,  494927872.0f,
-   503316480.0f,  511705088.0f,  520093696.0f,  528482304.0f,
-   536870912.0f,  545259520.0f,  553648128.0f,  562036736.0f,
-   570425344.0f,  578813952.0f,  587202560.0f,  595591168.0f,
-   603979776.0f,  612368384.0f,  620756992.0f,  629145600.0f,
-   637534208.0f,  645922816.0f,  654311424.0f,  662700032.0f,
-   671088640.0f,  679477248.0f,  687865856.0f,  696254464.0f,
-   704643072.0f,  713031680.0f,  721420288.0f,  729808896.0f,
-   738197504.0f,  746586112.0f,  754974720.0f,  763363328.0f,
-   771751936.0f,  780140544.0f,  788529152.0f,  796917760.0f,
-   805306368.0f,  813694976.0f,  822083584.0f,  830472192.0f,
-   838860800.0f,  847249408.0f,  855638016.0f,  864026624.0f,
-   872415232.0f,  880803840.0f,  889192448.0f,  897581056.0f,
-   905969664.0f,  914358272.0f,  922746880.0f,  931135488.0f,
-   939524096.0f,  947912704.0f,  956301312.0f,  964689920.0f,
-   973078528.0f,  981467136.0f,  989855744.0f,  998244352.0f,
-  1006632960.0f, 1015021568.0f, 1023410176.0f, 1031798784.0f,
-  1040187392.0f, 1048576000.0f, 1056964608.0f, 1065353216.0f,
-  1073741824.0f, 1082130432.0f, 1090519040.0f, 1098907648.0f,
-  1107296256.0f, 1115684864.0f, 1124073472.0f, 1132462080.0f,
-  1140850688.0f, 1149239296.0f, 1157627904.0f, 1166016512.0f,
-  1174405120.0f, 1182793728.0f, 1191182336.0f, 1199570944.0f,
-  1207959552.0f, 1216348160.0f, 1224736768.0f, 1233125376.0f,
-  1241513984.0f, 1249902592.0f, 1258291200.0f, 1266679808.0f,
-  1275068416.0f, 1283457024.0f, 1291845632.0f, 1300234240.0f,
-  1308622848.0f, 1317011456.0f, 1325400064.0f, 1333788672.0f,
-  1342177280.0f, 1350565888.0f, 1358954496.0f, 1367343104.0f,
-  1375731712.0f, 1384120320.0f, 1392508928.0f, 1400897536.0f,
-  1409286144.0f, 1417674752.0f, 1426063360.0f, 1434451968.0f,
-  1442840576.0f, 1451229184.0f, 1459617792.0f, 1468006400.0f,
-  1476395008.0f, 1484783616.0f, 1493172224.0f, 1501560832.0f,
-  1509949440.0f, 1518338048.0f, 1526726656.0f, 1535115264.0f,
-  1543503872.0f, 1551892480.0f, 1560281088.0f, 1568669696.0f,
-  1577058304.0f, 1585446912.0f, 1593835520.0f, 1602224128.0f,
-  1610612736.0f, 1619001344.0f, 1627389952.0f, 1635778560.0f,
-  1644167168.0f, 1652555776.0f, 1660944384.0f, 1669332992.0f,
-  1677721600.0f, 1686110208.0f, 1694498816.0f, 1702887424.0f,
-  1711276032.0f, 1719664640.0f, 1728053248.0f, 1736441856.0f,
-  1744830464.0f, 1753219072.0f, 1761607680.0f, 1769996288.0f,
-  1778384896.0f, 1786773504.0f, 1795162112.0f, 1803550720.0f,
-  1811939328.0f, 1820327936.0f, 1828716544.0f, 1837105152.0f,
-  1845493760.0f, 1853882368.0f, 1862270976.0f, 1870659584.0f,
-  1879048192.0f, 1887436800.0f, 1895825408.0f, 1904214016.0f,
-  1912602624.0f, 1920991232.0f, 1929379840.0f, 1937768448.0f,
-  1946157056.0f, 1954545664.0f, 1962934272.0f, 1971322880.0f,
-  1979711488.0f, 1988100096.0f, 1996488704.0f, 2004877312.0f,
-  2013265920.0f, 2021654528.0f, 2030043136.0f, 2038431744.0f,
-  2046820352.0f, 2055208960.0f, 2063597568.0f, 2071986176.0f,
-  2080374784.0f, 2088763392.0f, 2097152000.0f, 2105540608.0f,
-  2113929216.0f, 2122317824.0f, 2130706432.0f, 2139095040.0f,
-  2147483648.0f
-};
--- a/jdk/src/java.desktop/share/native/libmlib_image/mlib_c_ImageBlendTable.h	Thu May 12 11:03:07 2016 -0700
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,41 +0,0 @@
-/*
- * Copyright (c) 1998, 2003, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.  Oracle designates this
- * particular file as subject to the "Classpath" exception as provided
- * by Oracle in the LICENSE file that accompanied this code.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- */
-
-/*
- *    These tables are used by C versions of the
- *    mlib_ImageBlend_... functions.
- */
-
-#ifndef MLIB_C_IMAGE_BLEND_TABLE_H
-#define MLIB_C_IMAGE_BLEND_TABLE_H
-
-#include "mlib_image.h"
-
-extern const mlib_f32 mlib_c_blend_u8[];
-extern const mlib_f32 mlib_U82F32[];
-extern const mlib_f32 mlib_c_blend_Q8[];
-extern const mlib_f32 mlib_c_blend_u8_sat[];
-
-#endif /* MLIB_C_IMAGEF_BLEND_TABLE_H */
--- a/jdk/src/java.desktop/share/native/libmlib_image/mlib_c_ImageConv.h	Thu May 12 11:03:07 2016 -0700
+++ b/jdk/src/java.desktop/share/native/libmlib_image/mlib_c_ImageConv.h	Fri May 13 11:31:05 2016 +0300
@@ -41,118 +41,6 @@
 }
 #endif /* FREE_AND_RETURN_STATUS */
 
-mlib_status mlib_c_conv2x2ext_s16(mlib_image       *dst,
-                                  const mlib_image *src,
-                                  mlib_s32         dx_l,
-                                  mlib_s32         dx_r,
-                                  mlib_s32         dy_t,
-                                  mlib_s32         dy_b,
-                                  const mlib_s32   *kern,
-                                  mlib_s32         scale,
-                                  mlib_s32         cmask);
-
-mlib_status mlib_c_conv2x2ext_u16(mlib_image       *dst,
-                                  const mlib_image *src,
-                                  mlib_s32         dx_l,
-                                  mlib_s32         dx_r,
-                                  mlib_s32         dy_t,
-                                  mlib_s32         dy_b,
-                                  const mlib_s32   *kern,
-                                  mlib_s32         scale,
-                                  mlib_s32         cmask);
-
-mlib_status mlib_c_conv2x2ext_u8(mlib_image       *dst,
-                                 const mlib_image *src,
-                                 mlib_s32         dx_l,
-                                 mlib_s32         dx_r,
-                                 mlib_s32         dy_t,
-                                 mlib_s32         dy_b,
-                                 const mlib_s32   *kern,
-                                 mlib_s32         scale,
-                                 mlib_s32         cmask);
-
-mlib_status mlib_c_conv2x2nw_s16(mlib_image       *dst,
-                                 const mlib_image *src,
-                                 const mlib_s32   *kern,
-                                 mlib_s32         scale,
-                                 mlib_s32         cmask);
-
-mlib_status mlib_c_conv2x2nw_u16(mlib_image       *dst,
-                                 const mlib_image *src,
-                                 const mlib_s32   *kern,
-                                 mlib_s32         scale,
-                                 mlib_s32         cmask);
-
-mlib_status mlib_c_conv2x2nw_u8(mlib_image       *dst,
-                                const mlib_image *src,
-                                const mlib_s32   *kern,
-                                mlib_s32         scale,
-                                mlib_s32         cmask);
-
-mlib_status mlib_c_conv3x3ext_u8(mlib_image       *dst,
-                                 const mlib_image *src,
-                                 mlib_s32         dx_l,
-                                 mlib_s32         dx_r,
-                                 mlib_s32         dy_t,
-                                 mlib_s32         dy_b,
-                                 const mlib_s32   *kern,
-                                 mlib_s32         scale,
-                                 mlib_s32         cmask);
-
-mlib_status mlib_c_conv3x3nw_u8(mlib_image       *dst,
-                                const mlib_image *src,
-                                const mlib_s32   *kern,
-                                mlib_s32         scale,
-                                mlib_s32         cmask);
-
-mlib_status mlib_c_conv4x4ext_u8(mlib_image       *dst,
-                                 const mlib_image *src,
-                                 mlib_s32         dx_l,
-                                 mlib_s32         dx_r,
-                                 mlib_s32         dy_t,
-                                 mlib_s32         dy_b,
-                                 const mlib_s32   *kern,
-                                 mlib_s32         scale,
-                                 mlib_s32         cmask);
-
-mlib_status mlib_c_conv4x4nw_u8(mlib_image       *dst,
-                                const mlib_image *src,
-                                const mlib_s32   *kern,
-                                mlib_s32         scale,
-                                mlib_s32         cmask);
-
-mlib_status mlib_c_conv5x5ext_u8(mlib_image       *dst,
-                                 const mlib_image *src,
-                                 mlib_s32         dx_l,
-                                 mlib_s32         dx_r,
-                                 mlib_s32         dy_t,
-                                 mlib_s32         dy_b,
-                                 const mlib_s32   *kern,
-                                 mlib_s32         scale,
-                                 mlib_s32         cmask);
-
-mlib_status mlib_c_conv5x5nw_u8(mlib_image       *dst,
-                                const mlib_image *src,
-                                const mlib_s32   *kern,
-                                mlib_s32         scale,
-                                mlib_s32         cmask);
-
-mlib_status mlib_c_conv7x7ext_u8(mlib_image       *dst,
-                                 const mlib_image *src,
-                                 mlib_s32         dx_l,
-                                 mlib_s32         dx_r,
-                                 mlib_s32         dy_t,
-                                 mlib_s32         dy_b,
-                                 const mlib_s32   *kern,
-                                 mlib_s32         scale,
-                                 mlib_s32         cmask);
-
-mlib_status mlib_c_conv7x7nw_u8(mlib_image       *dst,
-                                const mlib_image *src,
-                                const mlib_s32   *kern,
-                                mlib_s32         scale,
-                                mlib_s32         cmask);
-
 mlib_status mlib_c_convMxNnw_u8(mlib_image       *dst,
                                 const mlib_image *src,
                                 const mlib_s32   *kernel,
@@ -177,102 +65,6 @@
 
 #if ! defined ( __sparc ) /* for x86, using integer multiplies is faster */
 
-mlib_status mlib_i_conv3x3ext_s16(mlib_image       *dst,
-                                  const mlib_image *src,
-                                  mlib_s32         dx_l,
-                                  mlib_s32         dx_r,
-                                  mlib_s32         dy_t,
-                                  mlib_s32         dy_b,
-                                  const mlib_s32   *kern,
-                                  mlib_s32         scale,
-                                  mlib_s32         cmask);
-
-mlib_status mlib_i_conv3x3ext_u16(mlib_image       *dst,
-                                  const mlib_image *src,
-                                  mlib_s32         dx_l,
-                                  mlib_s32         dx_r,
-                                  mlib_s32         dy_t,
-                                  mlib_s32         dy_b,
-                                  const mlib_s32   *kern,
-                                  mlib_s32         scale,
-                                  mlib_s32         cmask);
-
-mlib_status mlib_i_conv3x3ext_u8(mlib_image       *dst,
-                                 const mlib_image *src,
-                                 mlib_s32         dx_l,
-                                 mlib_s32         dx_r,
-                                 mlib_s32         dy_t,
-                                 mlib_s32         dy_b,
-                                 const mlib_s32   *kern,
-                                 mlib_s32         scale,
-                                 mlib_s32         cmask);
-
-mlib_status mlib_i_conv3x3nw_s16(mlib_image       *dst,
-                                 const mlib_image *src,
-                                 const mlib_s32   *kern,
-                                 mlib_s32         scale,
-                                 mlib_s32         cmask);
-
-mlib_status mlib_i_conv3x3nw_u16(mlib_image       *dst,
-                                 const mlib_image *src,
-                                 const mlib_s32   *kern,
-                                 mlib_s32         scale,
-                                 mlib_s32         cmask);
-
-mlib_status mlib_i_conv3x3nw_u8(mlib_image       *dst,
-                                const mlib_image *src,
-                                const mlib_s32   *kern,
-                                mlib_s32         scale,
-                                mlib_s32         cmask);
-
-mlib_status mlib_i_conv5x5ext_s16(mlib_image       *dst,
-                                  const mlib_image *src,
-                                  mlib_s32         dx_l,
-                                  mlib_s32         dx_r,
-                                  mlib_s32         dy_t,
-                                  mlib_s32         dy_b,
-                                  const mlib_s32   *kern,
-                                  mlib_s32         scale,
-                                  mlib_s32         cmask);
-
-mlib_status mlib_i_conv5x5ext_u16(mlib_image       *dst,
-                                  const mlib_image *src,
-                                  mlib_s32         dx_l,
-                                  mlib_s32         dx_r,
-                                  mlib_s32         dy_t,
-                                  mlib_s32         dy_b,
-                                  const mlib_s32   *kern,
-                                  mlib_s32         scale,
-                                  mlib_s32         cmask);
-
-mlib_status mlib_i_conv5x5ext_u8(mlib_image       *dst,
-                                 const mlib_image *src,
-                                 mlib_s32         dx_l,
-                                 mlib_s32         dx_r,
-                                 mlib_s32         dy_t,
-                                 mlib_s32         dy_b,
-                                 const mlib_s32   *kern,
-                                 mlib_s32         scale,
-                                 mlib_s32         cmask);
-
-mlib_status mlib_i_conv5x5nw_s16(mlib_image       *dst,
-                                 const mlib_image *src,
-                                 const mlib_s32   *kern,
-                                 mlib_s32         scale,
-                                 mlib_s32         cmask);
-
-mlib_status mlib_i_conv5x5nw_u16(mlib_image       *dst,
-                                 const mlib_image *src,
-                                 const mlib_s32   *kern,
-                                 mlib_s32         scale,
-                                 mlib_s32         cmask);
-
-mlib_status mlib_i_conv5x5nw_u8(mlib_image       *dst,
-                                const mlib_image *src,
-                                const mlib_s32   *kern,
-                                mlib_s32         scale,
-                                mlib_s32         cmask);
-
 mlib_status mlib_i_convMxNnw_s16(mlib_image       *dst,
                                  const mlib_image *src,
                                  const mlib_s32   *kernel,
--- a/jdk/src/java.desktop/share/native/libmlib_image/mlib_c_ImageConv_f.c	Thu May 12 11:03:07 2016 -0700
+++ b/jdk/src/java.desktop/share/native/libmlib_image/mlib_c_ImageConv_f.c	Fri May 13 11:31:05 2016 +0300
@@ -29,34 +29,6 @@
 #include "mlib_c_ImageConv.h"
 
 /***************************************************************/
-#define MLIB_PARAMS_CONV_NW                                     \
-  mlib_image       *dst,                                        \
-  const mlib_image *src,                                        \
-  const mlib_s32   *kern,                                       \
-  mlib_s32         scale,                                       \
-  mlib_s32         cmask
-
-/***************************************************************/
-#define MLIB_CALL_PARAMS_CONV_NW                                \
-  dst, src, kern, scale, cmask
-
-/***************************************************************/
-#define MLIB_PARAMS_CONV_EXT                                    \
-  mlib_image       *dst,                                        \
-  const mlib_image *src,                                        \
-  mlib_s32         dx_l,                                        \
-  mlib_s32         dx_r,                                        \
-  mlib_s32         dy_t,                                        \
-  mlib_s32         dy_b,                                        \
-  const mlib_s32   *kern,                                       \
-  mlib_s32         scale,                                       \
-  mlib_s32         cmask
-
-/***************************************************************/
-#define MLIB_CALL_PARAMS_CONV_EXT                               \
-  dst, src, dx_l, dx_r, dy_t, dy_b, kern, scale, cmask
-
-/***************************************************************/
 #define MLIB_PARAMS_CONV_MN_NW                                  \
   mlib_image *dst,                                              \
   const mlib_image *src,                                        \
@@ -92,52 +64,6 @@
 
 
 /***************************************************************/
-mlib_status mlib_conv2x2nw_u8(MLIB_PARAMS_CONV_NW)
-{
-  return mlib_c_conv2x2nw_u8(MLIB_CALL_PARAMS_CONV_NW);
-}
-
-/***************************************************************/
-mlib_status mlib_conv3x3nw_u8(MLIB_PARAMS_CONV_NW)
-{
-#ifdef __sparc
-  return mlib_c_conv3x3nw_u8(MLIB_CALL_PARAMS_CONV_NW);
-#else
-
-  if (mlib_ImageConvVersion(3, 3, scale, MLIB_BYTE) == 0)
-    return mlib_c_conv3x3nw_u8(MLIB_CALL_PARAMS_CONV_NW);
-  else
-    return mlib_i_conv3x3nw_u8(MLIB_CALL_PARAMS_CONV_NW);
-#endif /* __sparc */
-}
-
-/***************************************************************/
-mlib_status mlib_conv4x4nw_u8(MLIB_PARAMS_CONV_NW)
-{
-  return mlib_c_conv4x4nw_u8(MLIB_CALL_PARAMS_CONV_NW);
-}
-
-/***************************************************************/
-mlib_status mlib_conv5x5nw_u8(MLIB_PARAMS_CONV_NW)
-{
-#ifdef __sparc
-  return mlib_c_conv5x5nw_u8(MLIB_CALL_PARAMS_CONV_NW);
-#else
-
-  if (mlib_ImageConvVersion(5, 5, scale, MLIB_BYTE) == 0)
-    return mlib_c_conv5x5nw_u8(MLIB_CALL_PARAMS_CONV_NW);
-  else
-    return mlib_i_conv5x5nw_u8(MLIB_CALL_PARAMS_CONV_NW);
-#endif /* __sparc */
-}
-
-/***************************************************************/
-mlib_status mlib_conv7x7nw_u8(MLIB_PARAMS_CONV_NW)
-{
-  return mlib_c_conv7x7nw_u8(MLIB_CALL_PARAMS_CONV_NW);
-}
-
-/***************************************************************/
 mlib_status mlib_convMxNnw_u8(MLIB_PARAMS_CONV_MN_NW)
 {
 #ifdef __sparc
@@ -152,52 +78,6 @@
 }
 
 /***************************************************************/
-mlib_status mlib_conv2x2ext_u8(MLIB_PARAMS_CONV_EXT)
-{
-  return mlib_c_conv2x2ext_u8(MLIB_CALL_PARAMS_CONV_EXT);
-}
-
-/***************************************************************/
-mlib_status mlib_conv3x3ext_u8(MLIB_PARAMS_CONV_EXT)
-{
-#ifdef __sparc
-  return mlib_c_conv3x3ext_u8(MLIB_CALL_PARAMS_CONV_EXT);
-#else
-
-  if (mlib_ImageConvVersion(3, 3, scale, MLIB_BYTE) == 0)
-    return mlib_c_conv3x3ext_u8(MLIB_CALL_PARAMS_CONV_EXT);
-  else
-    return mlib_i_conv3x3ext_u8(MLIB_CALL_PARAMS_CONV_EXT);
-#endif /* __sparc */
-}
-
-/***************************************************************/
-mlib_status mlib_conv4x4ext_u8(MLIB_PARAMS_CONV_EXT)
-{
-  return mlib_c_conv4x4ext_u8(MLIB_CALL_PARAMS_CONV_EXT);
-}
-
-/***************************************************************/
-mlib_status mlib_conv5x5ext_u8(MLIB_PARAMS_CONV_EXT)
-{
-#ifdef __sparc
-  return mlib_c_conv5x5ext_u8(MLIB_CALL_PARAMS_CONV_EXT);
-#else
-
-  if (mlib_ImageConvVersion(5, 5, scale, MLIB_BYTE) == 0)
-    return mlib_c_conv5x5ext_u8(MLIB_CALL_PARAMS_CONV_EXT);
-  else
-    return mlib_i_conv5x5ext_u8(MLIB_CALL_PARAMS_CONV_EXT);
-#endif /* __sparc */
-}
-
-/***************************************************************/
-mlib_status mlib_conv7x7ext_u8(MLIB_PARAMS_CONV_EXT)
-{
-  return mlib_c_conv7x7ext_u8(MLIB_CALL_PARAMS_CONV_EXT);
-}
-
-/***************************************************************/
 mlib_status mlib_convMxNext_u8(MLIB_PARAMS_CONV_MN_EXT)
 {
 #ifdef __sparc
@@ -212,27 +92,3 @@
 }
 
 /***************************************************************/
-mlib_status mlib_conv2x2nw_s16(MLIB_PARAMS_CONV_NW)
-{
-  return mlib_c_conv2x2nw_s16(MLIB_CALL_PARAMS_CONV_NW);
-}
-
-/***************************************************************/
-mlib_status mlib_conv2x2nw_u16(MLIB_PARAMS_CONV_NW)
-{
-  return mlib_c_conv2x2nw_u16(MLIB_CALL_PARAMS_CONV_NW);
-}
-
-/***************************************************************/
-mlib_status mlib_conv2x2ext_s16(MLIB_PARAMS_CONV_EXT)
-{
-  return mlib_c_conv2x2ext_s16(MLIB_CALL_PARAMS_CONV_EXT);
-}
-
-/***************************************************************/
-mlib_status mlib_conv2x2ext_u16(MLIB_PARAMS_CONV_EXT)
-{
-  return mlib_c_conv2x2ext_u16(MLIB_CALL_PARAMS_CONV_EXT);
-}
-
-/***************************************************************/
--- a/jdk/src/java.desktop/share/native/libmlib_image/mlib_c_ImageThresh1.h	Thu May 12 11:03:07 2016 -0700
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,103 +0,0 @@
-/*
- * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.  Oracle designates this
- * particular file as subject to the "Classpath" exception as provided
- * by Oracle in the LICENSE file that accompanied this code.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- */
-
-
-#ifndef __MLIB_C_IMAGETHRESH1_H
-#define __MLIB_C_IMAGETHRESH1_H
-
-#ifdef __cplusplus
-extern "C" {
-#endif /* __cplusplus */
-
-/***************************************************************/
-#define PARAMS                                                  \
-  void     *psrc,                                               \
-  void     *pdst,                                               \
-  mlib_s32 src_stride,                                          \
-  mlib_s32 dst_stride,                                          \
-  mlib_s32 width,                                               \
-  mlib_s32 height,                                              \
-  void     *__thresh,                                           \
-  void     *__ghigh,                                            \
-  void     *__glow
-
-void mlib_c_ImageThresh1_D641(PARAMS);
-void mlib_c_ImageThresh1_D642(PARAMS);
-void mlib_c_ImageThresh1_D643(PARAMS);
-void mlib_c_ImageThresh1_D644(PARAMS);
-void mlib_c_ImageThresh1_D641_1B(PARAMS, mlib_s32 dbit_off);
-void mlib_c_ImageThresh1_D642_1B(PARAMS, mlib_s32 dbit_off);
-void mlib_c_ImageThresh1_D643_1B(PARAMS, mlib_s32 dbit_off);
-void mlib_c_ImageThresh1_D644_1B(PARAMS, mlib_s32 dbit_off);
-
-void mlib_c_ImageThresh1_F321(PARAMS);
-void mlib_c_ImageThresh1_F322(PARAMS);
-void mlib_c_ImageThresh1_F323(PARAMS);
-void mlib_c_ImageThresh1_F324(PARAMS);
-void mlib_c_ImageThresh1_F321_1B(PARAMS, mlib_s32 dbit_off);
-void mlib_c_ImageThresh1_F322_1B(PARAMS, mlib_s32 dbit_off);
-void mlib_c_ImageThresh1_F323_1B(PARAMS, mlib_s32 dbit_off);
-void mlib_c_ImageThresh1_F324_1B(PARAMS, mlib_s32 dbit_off);
-
-void mlib_c_ImageThresh1_S321(PARAMS);
-void mlib_c_ImageThresh1_S322(PARAMS);
-void mlib_c_ImageThresh1_S323(PARAMS);
-void mlib_c_ImageThresh1_S324(PARAMS);
-void mlib_c_ImageThresh1_S321_1B(PARAMS, mlib_s32 dbit_off);
-void mlib_c_ImageThresh1_S322_1B(PARAMS, mlib_s32 dbit_off);
-void mlib_c_ImageThresh1_S323_1B(PARAMS, mlib_s32 dbit_off);
-void mlib_c_ImageThresh1_S324_1B(PARAMS, mlib_s32 dbit_off);
-
-void mlib_c_ImageThresh1_S161(PARAMS);
-void mlib_c_ImageThresh1_S162(PARAMS);
-void mlib_c_ImageThresh1_S163(PARAMS);
-void mlib_c_ImageThresh1_S164(PARAMS);
-void mlib_c_ImageThresh1_S161_1B(PARAMS, mlib_s32 dbit_off);
-void mlib_c_ImageThresh1_S162_1B(PARAMS, mlib_s32 dbit_off);
-void mlib_c_ImageThresh1_S163_1B(PARAMS, mlib_s32 dbit_off);
-void mlib_c_ImageThresh1_S164_1B(PARAMS, mlib_s32 dbit_off);
-
-void mlib_c_ImageThresh1_U161(PARAMS);
-void mlib_c_ImageThresh1_U162(PARAMS);
-void mlib_c_ImageThresh1_U163(PARAMS);
-void mlib_c_ImageThresh1_U164(PARAMS);
-void mlib_c_ImageThresh1_U161_1B(PARAMS, mlib_s32 dbit_off);
-void mlib_c_ImageThresh1_U162_1B(PARAMS, mlib_s32 dbit_off);
-void mlib_c_ImageThresh1_U163_1B(PARAMS, mlib_s32 dbit_off);
-void mlib_c_ImageThresh1_U164_1B(PARAMS, mlib_s32 dbit_off);
-
-void mlib_c_ImageThresh1_U81(PARAMS);
-void mlib_c_ImageThresh1_U82(PARAMS);
-void mlib_c_ImageThresh1_U83(PARAMS);
-void mlib_c_ImageThresh1_U84(PARAMS);
-void mlib_c_ImageThresh1_U81_1B(PARAMS, mlib_s32 dbit_off);
-void mlib_c_ImageThresh1_U82_1B(PARAMS, mlib_s32 dbit_off);
-void mlib_c_ImageThresh1_U83_1B(PARAMS, mlib_s32 dbit_off);
-void mlib_c_ImageThresh1_U84_1B(PARAMS, mlib_s32 dbit_off);
-
-#ifdef __cplusplus
-}
-#endif /* __cplusplus */
-#endif /* __MLIB_C_IMAGETHRESH1_H */
--- a/jdk/src/java.desktop/share/native/libmlib_image/mlib_c_ImageThresh1_U8.c	Thu May 12 11:03:07 2016 -0700
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,848 +0,0 @@
-/*
- * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.  Oracle designates this
- * particular file as subject to the "Classpath" exception as provided
- * by Oracle in the LICENSE file that accompanied this code.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- */
-
-
-/*
- * FUNCTION
- *      mlib_ImageThresh1 - thresholding
- *
- * SYNOPSIS
- *      mlib_status mlib_ImageThresh1(mlib_image       *dst,
- *                                    const mlib_image *src,
- *                                    const mlib_s32   *thresh,
- *                                    const mlib_s32   *ghigh,
- *                                    const mlib_s32   *glow);
- *
- * ARGUMENT
- *      dst     pointer to output image
- *      src     pointer to input image
- *      thresh  array of thresholds
- *      ghigh   array of values above thresholds
- *      glow    array of values below thresholds
- *
- * RESTRICTION
- *      The images must have the same size, and the same number
- *      of channels.
- *      The images can have 1, 2, 3, or 4 channels.
- *      The images can be in MLIB_BYTE, MLIB_SHORT or MLIB_INT data type.
- *      The type of the output image can be MLIB_BIT, or the same as the
- *      type of the input image.
- *
- * DESCRIPTION
- *      If the pixel band value is above the threshold for that channel,
- *      set the destination to the ghigh value for that channel.
- *      Otherwise, set the destination to the glow value for that channel.
- *
- *                      +- glow[c]   src[x][y][c] <= thresh[c]
- *      dst[x][y][c]  = |
- *                      +- ghigh[c]  src[x][y][c] >  thresh[c]
- */
-
-#include "mlib_image.h"
-#include "mlib_ImageCheck.h"
-#include "mlib_c_ImageThresh1.h"
-
-/***************************************************************/
-#define STYPE           mlib_u8
-#define TTYPE           mlib_s32
-#define T_SHIFT         31
-
-/***************************************************************/
-#define DO_THRESH(s0, th, gl, gh)                               \
-  (((gh) & (((th) - (TTYPE)(s0)) >> T_SHIFT)) |                 \
-   ((gl) &~ (((th) - (TTYPE)(s0)) >> T_SHIFT)))
-
-/***************************************************************/
-#define THRESH1_CMP_SHIFT(s0, th, sh)                           \
-  ((((th) - (s0)) >> T_SHIFT) & (1 << (sh)))
-
-/***************************************************************/
-#define STRIP(pd, ps, w, h, ch, th, gh, gl) {                   \
-    STYPE s0;                                                   \
-    for ( i = 0; i < h; i++ ) {                                 \
-      for (j = 0; j < w; j ++)  {                               \
-        for (k = 0; k < ch; k++) {                              \
-          s0 = ((STYPE*)ps)[i*src_stride + j*ch + k];           \
-          ((STYPE*)pd)[i*dst_stride + j*ch + k] =               \
-                (s0 <= th[k]) ? gl[k]: gh[k];                   \
-        }                                                       \
-      }                                                         \
-    }                                                           \
-  }
-
-/***************************************************************/
-#define INIT_THRESH0(n)                                         \
-  thresh0 = thresh[n];                                          \
-  ghigh0  = ghigh[n];                                           \
-  glow0   = glow[n]
-
-/***************************************************************/
-#define INIT_THRESH1(n)                                         \
-  thresh1 = thresh[n];                                          \
-  ghigh1  = ghigh[n];                                           \
-  glow1   = glow[n]
-
-/***************************************************************/
-#define INIT_THRESH2(n)                                         \
-  thresh2 = thresh[n];                                          \
-  ghigh2  = ghigh[n];                                           \
-  glow2   = glow[n]
-
-/***************************************************************/
-#define INIT_THRESH3(n)                                         \
-  thresh3 = thresh[n];                                          \
-  ghigh3  = ghigh[n];                                           \
-  glow3   = glow[n]
-
-/***************************************************************/
-#define THRESH0(s0) DO_THRESH(s0, thresh0, glow0, ghigh0)
-#define THRESH1(s0) DO_THRESH(s0, thresh1, glow1, ghigh1)
-#define THRESH2(s0) DO_THRESH(s0, thresh2, glow2, ghigh2)
-#define THRESH3(s0) DO_THRESH(s0, thresh3, glow3, ghigh3)
-
-/***************************************************************/
-void mlib_c_ImageThresh1_U81(PARAMS)
-{
-  mlib_s32 *thresh = (void *)__thresh;
-  mlib_s32 *ghigh = (void *)__ghigh;
-  mlib_s32 *glow = (void *)__glow;
-  STYPE *psrc_row = psrc;
-  STYPE *pdst_row = pdst;
-  TTYPE thresh0;
-  TTYPE ghigh0;
-  TTYPE glow0;
-  mlib_s32 i, j, k;
-
-  if (width < 16) {
-    STRIP(pdst, psrc, width, height, 1, thresh, ghigh, glow);
-    return;
-  }
-
-  INIT_THRESH0(0);
-
-  for (i = 0; i < height; i++) {
-
-#ifdef __SUNPRO_C
-#pragma pipeloop(0)
-#endif /* __SUNPRO_C */
-    for (j = 0; j <= (width - 8); j += 8) {
-      pdst_row[j] = THRESH0(psrc_row[j]);
-      pdst_row[j + 1] = THRESH0(psrc_row[j + 1]);
-      pdst_row[j + 2] = THRESH0(psrc_row[j + 2]);
-      pdst_row[j + 3] = THRESH0(psrc_row[j + 3]);
-      pdst_row[j + 4] = THRESH0(psrc_row[j + 4]);
-      pdst_row[j + 5] = THRESH0(psrc_row[j + 5]);
-      pdst_row[j + 6] = THRESH0(psrc_row[j + 6]);
-      pdst_row[j + 7] = THRESH0(psrc_row[j + 7]);
-    }
-
-    for (; j < width; j++) {
-      pdst_row[j] = THRESH0(psrc_row[j]);
-    }
-
-    psrc_row += src_stride;
-    pdst_row += dst_stride;
-  }
-}
-
-/***************************************************************/
-void mlib_c_ImageThresh1_U82(PARAMS)
-{
-  mlib_s32 *thresh = (void *)__thresh;
-  mlib_s32 *ghigh = (void *)__ghigh;
-  mlib_s32 *glow = (void *)__glow;
-  STYPE *psrc_row = psrc;
-  STYPE *pdst_row = pdst;
-  TTYPE thresh0, thresh1;
-  TTYPE ghigh0, ghigh1;
-  TTYPE glow0, glow1;
-  mlib_s32 i, j, k;
-
-  if (width < 16) {
-    STRIP(pdst, psrc, width, height, 2, thresh, ghigh, glow);
-    return;
-  }
-
-  INIT_THRESH0(0);
-  INIT_THRESH1(1);
-  width <<= 1;
-
-  for (i = 0; i < height; i++) {
-
-#ifdef __SUNPRO_C
-#pragma pipeloop(0)
-#endif /* __SUNPRO_C */
-    for (j = 0; j <= (width - 8); j += 8) {
-      pdst_row[j] = THRESH0(psrc_row[j]);
-      pdst_row[j + 1] = THRESH1(psrc_row[j + 1]);
-      pdst_row[j + 2] = THRESH0(psrc_row[j + 2]);
-      pdst_row[j + 3] = THRESH1(psrc_row[j + 3]);
-      pdst_row[j + 4] = THRESH0(psrc_row[j + 4]);
-      pdst_row[j + 5] = THRESH1(psrc_row[j + 5]);
-      pdst_row[j + 6] = THRESH0(psrc_row[j + 6]);
-      pdst_row[j + 7] = THRESH1(psrc_row[j + 7]);
-    }
-
-    for (; j < width; j += 2) {
-      pdst_row[j] = THRESH0(psrc_row[j]);
-      pdst_row[j + 1] = THRESH1(psrc_row[j + 1]);
-    }
-
-    psrc_row += src_stride;
-    pdst_row += dst_stride;
-  }
-}
-
-/***************************************************************/
-void mlib_c_ImageThresh1_U83(PARAMS)
-{
-  mlib_s32 *thresh = (void *)__thresh;
-  mlib_s32 *ghigh = (void *)__ghigh;
-  mlib_s32 *glow = (void *)__glow;
-  STYPE *psrc_row = psrc;
-  STYPE *pdst_row = pdst;
-  TTYPE thresh0, thresh1, thresh2;
-  TTYPE ghigh0, ghigh1, ghigh2;
-  TTYPE glow0, glow1, glow2;
-  mlib_s32 i, j, k;
-
-  if (width < 16) {
-    STRIP(pdst, psrc, width, height, 3, thresh, ghigh, glow);
-    return;
-  }
-
-  width = 3 * width;
-  INIT_THRESH0(0);
-  INIT_THRESH1(1);
-  INIT_THRESH2(2);
-
-  for (i = 0; i < height; i++) {
-
-#ifdef __SUNPRO_C
-#pragma pipeloop(0)
-#endif /* __SUNPRO_C */
-    for (j = 0; j <= (width - 12); j += 12) {
-      pdst_row[j] = THRESH0(psrc_row[j]);
-      pdst_row[j + 1] = THRESH1(psrc_row[j + 1]);
-      pdst_row[j + 2] = THRESH2(psrc_row[j + 2]);
-      pdst_row[j + 3] = THRESH0(psrc_row[j + 3]);
-      pdst_row[j + 4] = THRESH1(psrc_row[j + 4]);
-      pdst_row[j + 5] = THRESH2(psrc_row[j + 5]);
-      pdst_row[j + 6] = THRESH0(psrc_row[j + 6]);
-      pdst_row[j + 7] = THRESH1(psrc_row[j + 7]);
-      pdst_row[j + 8] = THRESH2(psrc_row[j + 8]);
-      pdst_row[j + 9] = THRESH0(psrc_row[j + 9]);
-      pdst_row[j + 10] = THRESH1(psrc_row[j + 10]);
-      pdst_row[j + 11] = THRESH2(psrc_row[j + 11]);
-    }
-
-    for (; j < width; j += 3) {
-      pdst_row[j] = THRESH0(psrc_row[j]);
-      pdst_row[j + 1] = THRESH1(psrc_row[j + 1]);
-      pdst_row[j + 2] = THRESH2(psrc_row[j + 2]);
-    }
-
-    psrc_row += src_stride;
-    pdst_row += dst_stride;
-  }
-}
-
-/***************************************************************/
-void mlib_c_ImageThresh1_U84(PARAMS)
-{
-  mlib_s32 *thresh = (void *)__thresh;
-  mlib_s32 *ghigh = (void *)__ghigh;
-  mlib_s32 *glow = (void *)__glow;
-  STYPE *psrc_row = psrc;
-  STYPE *pdst_row = pdst;
-  TTYPE thresh0, thresh1, thresh2, thresh3;
-  TTYPE ghigh0, ghigh1, ghigh2, ghigh3;
-  TTYPE glow0, glow1, glow2, glow3;
-  mlib_s32 i, j, k;
-
-  if (width < 16) {
-    STRIP(pdst, psrc, width, height, 4, thresh, ghigh, glow);
-    return;
-  }
-
-  INIT_THRESH0(0);
-  INIT_THRESH1(1);
-  INIT_THRESH2(2);
-  INIT_THRESH3(3);
-
-  width *= 4;
-
-  for (i = 0; i < height; i++) {
-
-#ifdef __SUNPRO_C
-#pragma pipeloop(0)
-#endif /* __SUNPRO_C */
-    for (j = 0; j <= (width - 8); j += 8) {
-      pdst_row[j] = THRESH0(psrc_row[j]);
-      pdst_row[j + 1] = THRESH1(psrc_row[j + 1]);
-      pdst_row[j + 2] = THRESH2(psrc_row[j + 2]);
-      pdst_row[j + 3] = THRESH3(psrc_row[j + 3]);
-      pdst_row[j + 4] = THRESH0(psrc_row[j + 4]);
-      pdst_row[j + 5] = THRESH1(psrc_row[j + 5]);
-      pdst_row[j + 6] = THRESH2(psrc_row[j + 6]);
-      pdst_row[j + 7] = THRESH3(psrc_row[j + 7]);
-    }
-
-    if (j < width) {
-      pdst_row[j] = THRESH0(psrc_row[j]);
-      pdst_row[j + 1] = THRESH1(psrc_row[j + 1]);
-      pdst_row[j + 2] = THRESH2(psrc_row[j + 2]);
-      pdst_row[j + 3] = THRESH3(psrc_row[j + 3]);
-    }
-
-    psrc_row += src_stride;
-    pdst_row += dst_stride;
-  }
-}
-
-/***************************************************************/
-void mlib_c_ImageThresh1_U81_1B(PARAMS,
-                                mlib_s32 dbit_off)
-{
-  mlib_s32 *thresh = (void *)__thresh;
-  mlib_s32 *ghigh = (void *)__ghigh;
-  mlib_s32 *glow = (void *)__glow;
-  STYPE *psrc_row = psrc;
-  mlib_u8 *pdst_row = pdst;
-  TTYPE thresh0 = thresh[0];
-  mlib_s32 mhigh, mlow, emask, dst0;
-  mlib_s32 i, j, jbit, l;
-
-  mhigh = (ghigh[0] > 0) ? 0xff : 0;
-  mlow = (glow[0] > 0) ? 0xff : 0;
-
-  for (i = 0; i < height; i++) {
-    j = 0;
-    jbit = 0;
-
-    if (dbit_off) {
-      mlib_s32 nume = 8 - dbit_off;
-
-      if (nume > width)
-        nume = width;
-      dst0 = 0;
-      emask = 0;
-
-      for (; j < nume; j++) {
-        emask |= (1 << (7 - (dbit_off + j)));
-        dst0 |= THRESH1_CMP_SHIFT(psrc_row[j], thresh0, 7 - (dbit_off + j));
-      }
-
-      dst0 = (mhigh & dst0) | (mlow & ~dst0);
-      pdst_row[0] = (dst0 & emask) | (pdst_row[0] & ~emask);
-      jbit++;
-    }
-
-#ifdef __SUNPRO_C
-#pragma pipeloop(0)
-#endif /* __SUNPRO_C */
-    for (; j <= (width - 16); j += 16) {
-      dst0 = THRESH1_CMP_SHIFT(psrc_row[j], thresh0, 7) |
-        THRESH1_CMP_SHIFT(psrc_row[j + 1], thresh0, 6) |
-        THRESH1_CMP_SHIFT(psrc_row[j + 2], thresh0, 5) |
-        THRESH1_CMP_SHIFT(psrc_row[j + 3], thresh0, 4) |
-        THRESH1_CMP_SHIFT(psrc_row[j + 4], thresh0, 3) |
-        THRESH1_CMP_SHIFT(psrc_row[j + 5], thresh0, 2) |
-        THRESH1_CMP_SHIFT(psrc_row[j + 6], thresh0, 1) |
-        THRESH1_CMP_SHIFT(psrc_row[j + 7], thresh0, 0);
-      dst0 = (mhigh & dst0) | (mlow & ~dst0);
-      *(pdst_row + jbit) = (mlib_u8) dst0;
-      jbit++;
-      dst0 = THRESH1_CMP_SHIFT(psrc_row[j + 8], thresh0, 7) |
-        THRESH1_CMP_SHIFT(psrc_row[j + 9], thresh0, 6) |
-        THRESH1_CMP_SHIFT(psrc_row[j + 10], thresh0, 5) |
-        THRESH1_CMP_SHIFT(psrc_row[j + 11], thresh0, 4) |
-        THRESH1_CMP_SHIFT(psrc_row[j + 12], thresh0, 3) |
-        THRESH1_CMP_SHIFT(psrc_row[j + 13], thresh0, 2) |
-        THRESH1_CMP_SHIFT(psrc_row[j + 14], thresh0, 1) |
-        THRESH1_CMP_SHIFT(psrc_row[j + 15], thresh0, 0);
-      dst0 = (mhigh & dst0) | (mlow & ~dst0);
-      *(pdst_row + jbit) = (mlib_u8) dst0;
-      jbit++;
-    }
-
-    if (width - j >= 8) {
-      dst0 = THRESH1_CMP_SHIFT(psrc_row[j], thresh0, 7) |
-        THRESH1_CMP_SHIFT(psrc_row[j + 1], thresh0, 6) |
-        THRESH1_CMP_SHIFT(psrc_row[j + 2], thresh0, 5) |
-        THRESH1_CMP_SHIFT(psrc_row[j + 3], thresh0, 4) |
-        THRESH1_CMP_SHIFT(psrc_row[j + 4], thresh0, 3) |
-        THRESH1_CMP_SHIFT(psrc_row[j + 5], thresh0, 2) |
-        THRESH1_CMP_SHIFT(psrc_row[j + 6], thresh0, 1) |
-        THRESH1_CMP_SHIFT(psrc_row[j + 7], thresh0, 0);
-      dst0 = (mhigh & dst0) | (mlow & ~dst0);
-      *(pdst_row + jbit) = (mlib_u8) dst0;
-      jbit++;
-      j += 8;
-    }
-
-    if (j < width) {
-      dst0 = 0;
-      l = 7;
-      for (; j < width; j++) {
-        dst0 |= THRESH1_CMP_SHIFT(psrc_row[j], thresh0, l);
-        l--;
-      }
-
-      emask = (0xFF << (l + 1));
-      dst0 = (mhigh & dst0) | (mlow & ~dst0);
-      pdst_row[jbit] = (dst0 & emask) | (pdst_row[jbit] & ~emask);
-    }
-
-    psrc_row += src_stride;
-    pdst_row += dst_stride;
-  }
-}
-
-/***************************************************************/
-void mlib_c_ImageThresh1_U82_1B(PARAMS,
-                                mlib_s32 dbit_off)
-{
-  mlib_s32 *thresh = (void *)__thresh;
-  mlib_s32 *ghigh = (void *)__ghigh;
-  mlib_s32 *glow = (void *)__glow;
-  STYPE *psrc_row = psrc;
-  mlib_u8 *pdst_row = pdst;
-  TTYPE thresh0 = thresh[0], thresh1 = thresh[1];
-  mlib_s32 mhigh0, mlow0, mhigh, mlow, emask, dst0;
-  mlib_s32 i, j, jbit, l;
-
-  mhigh0 = (ghigh[0] > 0) ? 0xaaa : 0;
-  mhigh0 |= (ghigh[1] > 0) ? 0x555 : 0;
-  mlow0 = (glow[0] > 0) ? 0xaaa : 0;
-  mlow0 |= (glow[1] > 0) ? 0x555 : 0;
-
-  width *= 2;
-
-  for (i = 0; i < height; i++) {
-    thresh0 = thresh[0];
-    thresh1 = thresh[1];
-
-    j = 0;
-    jbit = 0;
-    mhigh = mhigh0 >> (dbit_off & 1);
-    mlow = mlow0 >> (dbit_off & 1);
-
-    if (dbit_off) {
-      mlib_s32 nume = 8 - dbit_off;
-
-      if (nume > width)
-        nume = width;
-      dst0 = 0;
-      emask = 0;
-
-      for (; j <= (nume - 2); j += 2) {
-        emask |= (3 << (6 - (dbit_off + j)));
-        dst0 |= THRESH1_CMP_SHIFT(psrc_row[j], thresh0, 7 - (dbit_off + j));
-        dst0 |= THRESH1_CMP_SHIFT(psrc_row[j + 1], thresh1, 6 - (dbit_off + j));
-      }
-
-      if (j < nume) {
-        emask |= (1 << (7 - (dbit_off + j)));
-        dst0 |= THRESH1_CMP_SHIFT(psrc_row[j], thresh0, 7 - (dbit_off + j));
-        /* swap threshes */
-        thresh0 = thresh[1];
-        thresh1 = thresh[0];
-        j++;
-      }
-
-      dst0 = (mhigh & dst0) | (mlow & ~dst0);
-      pdst_row[0] = (dst0 & emask) | (pdst_row[0] & ~emask);
-      jbit++;
-    }
-
-#ifdef __SUNPRO_C
-#pragma pipeloop(0)
-#endif /* __SUNPRO_C */
-    for (; j <= (width - 16); j += 16) {
-      dst0 = THRESH1_CMP_SHIFT(psrc_row[j], thresh0, 7) |
-        THRESH1_CMP_SHIFT(psrc_row[j + 1], thresh1, 6) |
-        THRESH1_CMP_SHIFT(psrc_row[j + 2], thresh0, 5) |
-        THRESH1_CMP_SHIFT(psrc_row[j + 3], thresh1, 4) |
-        THRESH1_CMP_SHIFT(psrc_row[j + 4], thresh0, 3) |
-        THRESH1_CMP_SHIFT(psrc_row[j + 5], thresh1, 2) |
-        THRESH1_CMP_SHIFT(psrc_row[j + 6], thresh0, 1) |
-        THRESH1_CMP_SHIFT(psrc_row[j + 7], thresh1, 0);
-      dst0 = (mhigh & dst0) | (mlow & ~dst0);
-      *(pdst_row + jbit) = (mlib_u8) dst0;
-      jbit++;
-      dst0 = THRESH1_CMP_SHIFT(psrc_row[j + 8], thresh0, 7) |
-        THRESH1_CMP_SHIFT(psrc_row[j + 9], thresh1, 6) |
-        THRESH1_CMP_SHIFT(psrc_row[j + 10], thresh0, 5) |
-        THRESH1_CMP_SHIFT(psrc_row[j + 11], thresh1, 4) |
-        THRESH1_CMP_SHIFT(psrc_row[j + 12], thresh0, 3) |
-        THRESH1_CMP_SHIFT(psrc_row[j + 13], thresh1, 2) |
-        THRESH1_CMP_SHIFT(psrc_row[j + 14], thresh0, 1) |
-        THRESH1_CMP_SHIFT(psrc_row[j + 15], thresh1, 0);
-      dst0 = (mhigh & dst0) | (mlow & ~dst0);
-      *(pdst_row + jbit) = (mlib_u8) dst0;
-      jbit++;
-    }
-
-    if (width - j >= 8) {
-      dst0 = THRESH1_CMP_SHIFT(psrc_row[j], thresh0, 7) |
-        THRESH1_CMP_SHIFT(psrc_row[j + 1], thresh1, 6) |
-        THRESH1_CMP_SHIFT(psrc_row[j + 2], thresh0, 5) |
-        THRESH1_CMP_SHIFT(psrc_row[j + 3], thresh1, 4) |
-        THRESH1_CMP_SHIFT(psrc_row[j + 4], thresh0, 3) |
-        THRESH1_CMP_SHIFT(psrc_row[j + 5], thresh1, 2) |
-        THRESH1_CMP_SHIFT(psrc_row[j + 6], thresh0, 1) |
-        THRESH1_CMP_SHIFT(psrc_row[j + 7], thresh1, 0);
-      dst0 = (mhigh & dst0) | (mlow & ~dst0);
-      *(pdst_row + jbit) = (mlib_u8) dst0;
-      jbit++;
-      j += 8;
-    }
-
-    if (j < width) {
-      dst0 = 0;
-      l = 7;
-      for (; j <= (width - 2); j += 2) {
-        dst0 |= THRESH1_CMP_SHIFT(psrc_row[j], thresh0, l);
-        dst0 |= THRESH1_CMP_SHIFT(psrc_row[j + 1], thresh1, l - 1);
-        l -= 2;
-      }
-
-      if (j < width) {
-        dst0 |= THRESH1_CMP_SHIFT(psrc_row[j], thresh0, l);
-        l--;
-      }
-
-      emask = (0xFF << (l + 1));
-      dst0 = (mhigh & dst0) | (mlow & ~dst0);
-      pdst_row[jbit] = (dst0 & emask) | (pdst_row[jbit] & ~emask);
-    }
-
-    psrc_row += src_stride;
-    pdst_row += dst_stride;
-  }
-}
-
-/***************************************************************/
-void mlib_c_ImageThresh1_U83_1B(PARAMS,
-                                mlib_s32 dbit_off)
-{
-  mlib_s32 *thresh = (void *)__thresh;
-  mlib_s32 *ghigh = (void *)__ghigh;
-  mlib_s32 *glow = (void *)__glow;
-  STYPE *psrc_row = psrc;
-  mlib_u8 *pdst_row = pdst;
-  TTYPE thresh0, thresh1, thresh2, threshT;
-  mlib_s32 mhigh = 0, mlow = 0;
-  mlib_s32 mhigh0, mlow0, mhigh1, mlow1, mhigh2, mlow2, emask, dst0, dst1;
-  mlib_s32 i, j, jbit, k, l;
-
-  if (ghigh[0] > 0)
-    mhigh = 0x492492;
-
-  if (ghigh[1] > 0)
-    mhigh |= 0x249249;
-
-  if (ghigh[2] > 0)
-    mhigh |= 0x924924;
-
-  if (glow[0] > 0)
-    mlow = 0x492492;
-
-  if (glow[1] > 0)
-    mlow |= 0x249249;
-
-  if (glow[2] > 0)
-    mlow |= 0x924924;
-
-  width = 3 * width;
-
-  for (i = 0; i < height; i++) {
-    thresh0 = thresh[0];
-    thresh1 = thresh[1];
-    thresh2 = thresh[2];
-
-    j = 0;
-    jbit = 0;
-    mhigh0 = mhigh >> (dbit_off & 7);
-    mlow0 = mlow >> (dbit_off & 7);
-    mhigh1 = mhigh0 >> 1;
-    mlow1 = mlow0 >> 1;
-    mhigh2 = mhigh0 >> 2;
-    mlow2 = mlow0 >> 2;
-
-    if (dbit_off) {
-      mlib_s32 nume = 8 - dbit_off;
-
-      if (nume > width)
-        nume = width;
-      dst0 = 0;
-      emask = 0;
-
-      for (; j <= (nume - 3); j += 3) {
-        emask |= (7 << (5 - (dbit_off + j)));
-        dst0 |= THRESH1_CMP_SHIFT(psrc_row[j], thresh0, 7 - (dbit_off + j));
-        dst0 |= THRESH1_CMP_SHIFT(psrc_row[j + 1], thresh1, 6 - (dbit_off + j));
-        dst0 |= THRESH1_CMP_SHIFT(psrc_row[j + 2], thresh2, 5 - (dbit_off + j));
-      }
-
-      for (; j < nume; j++) {
-        emask |= (1 << (7 - (dbit_off + j)));
-        dst0 |= THRESH1_CMP_SHIFT(psrc_row[j], thresh0, 7 - (dbit_off + j));
-        /* swap threshes */
-        threshT = thresh0;
-        thresh0 = thresh1;
-        thresh1 = thresh2;
-        thresh2 = threshT;
-      }
-
-      dst0 = (mhigh0 & dst0) | (mlow0 & ~dst0);
-      pdst_row[0] = (dst0 & emask) | (pdst_row[0] & ~emask);
-      jbit++;
-
-      mhigh0 = mhigh >> (9 - nume);
-      mlow0 = mlow >> (9 - nume);
-      mhigh1 = mhigh0 >> 1;
-      mlow1 = mlow0 >> 1;
-      mhigh2 = mhigh0 >> 2;
-      mlow2 = mlow0 >> 2;
-    }
-
-#ifdef __SUNPRO_C
-#pragma pipeloop(0)
-#endif /* __SUNPRO_C */
-    for (; j <= (width - 24); j += 24) {
-      dst0 = (THRESH1_CMP_SHIFT(psrc_row[j], thresh0, 7) |
-              THRESH1_CMP_SHIFT(psrc_row[j + 1], thresh1, 6) |
-              THRESH1_CMP_SHIFT(psrc_row[j + 2], thresh2, 5) |
-              THRESH1_CMP_SHIFT(psrc_row[j + 3], thresh0, 4) |
-              THRESH1_CMP_SHIFT(psrc_row[j + 4], thresh1, 3) |
-              THRESH1_CMP_SHIFT(psrc_row[j + 5], thresh2, 2) |
-              THRESH1_CMP_SHIFT(psrc_row[j + 6], thresh0, 1) |
-              THRESH1_CMP_SHIFT(psrc_row[j + 7], thresh1, 0));
-      dst0 = (mhigh0 & dst0) | (mlow0 & ~dst0);
-      *(pdst_row + jbit) = dst0;
-      jbit++;
-      dst0 = (THRESH1_CMP_SHIFT(psrc_row[j + 8], thresh2, 7) |
-              THRESH1_CMP_SHIFT(psrc_row[j + 9], thresh0, 6) |
-              THRESH1_CMP_SHIFT(psrc_row[j + 10], thresh1, 5) |
-              THRESH1_CMP_SHIFT(psrc_row[j + 11], thresh2, 4) |
-              THRESH1_CMP_SHIFT(psrc_row[j + 12], thresh0, 3) |
-              THRESH1_CMP_SHIFT(psrc_row[j + 13], thresh1, 2) |
-              THRESH1_CMP_SHIFT(psrc_row[j + 14], thresh2, 1) |
-              THRESH1_CMP_SHIFT(psrc_row[j + 15], thresh0, 0));
-      dst0 = (mhigh1 & dst0) | (mlow1 & ~dst0);
-      *(pdst_row + jbit) = dst0;
-      jbit++;
-      dst0 = (THRESH1_CMP_SHIFT(psrc_row[j + 16], thresh1, 7) |
-              THRESH1_CMP_SHIFT(psrc_row[j + 17], thresh2, 6) |
-              THRESH1_CMP_SHIFT(psrc_row[j + 18], thresh0, 5) |
-              THRESH1_CMP_SHIFT(psrc_row[j + 19], thresh1, 4) |
-              THRESH1_CMP_SHIFT(psrc_row[j + 20], thresh2, 3) |
-              THRESH1_CMP_SHIFT(psrc_row[j + 21], thresh0, 2) |
-              THRESH1_CMP_SHIFT(psrc_row[j + 22], thresh1, 1) |
-              THRESH1_CMP_SHIFT(psrc_row[j + 23], thresh2, 0));
-      dst0 = (mhigh2 & dst0) | (mlow2 & ~dst0);
-      *(pdst_row + jbit) = dst0;
-      jbit++;
-    }
-
-    if (j < width) {
-      k = width - j;
-      dst0 = 0;
-      l = 31;
-      for (; j < width; j += 3) {
-        dst0 |= (THRESH1_CMP_SHIFT(psrc_row[j], thresh0, l) |
-                 THRESH1_CMP_SHIFT(psrc_row[j + 1], thresh1, l - 1) |
-                 THRESH1_CMP_SHIFT(psrc_row[j + 2], thresh2, l - 2));
-        l -= 3;
-      }
-
-      l = (k + 7) >> 3;
-      k = (l << 3) - k;
-      emask = (0xFF << k);
-
-      if (l == 3) {
-        dst1 = dst0 >> 24;
-        dst1 = (mhigh0 & dst1) | (mlow0 & ~dst1);
-        pdst_row[jbit] = dst1;
-        dst1 = (dst0 >> 16);
-        dst1 = (mhigh1 & dst1) | (mlow1 & ~dst1);
-        pdst_row[jbit + 1] = dst1;
-        dst1 = (dst0 >> 8);
-        dst1 = (mhigh2 & dst1) | (mlow2 & ~dst1);
-        pdst_row[jbit + 2] = (dst1 & emask) | (pdst_row[jbit + 2] & ~emask);
-      }
-      else if (l == 2) {
-        dst1 = dst0 >> 24;
-        dst1 = (mhigh0 & dst1) | (mlow0 & ~dst1);
-        pdst_row[jbit] = dst1;
-        dst1 = (dst0 >> 16);
-        dst1 = (mhigh1 & dst1) | (mlow1 & ~dst1);
-        pdst_row[jbit + 1] = (dst1 & emask) | (pdst_row[jbit + 1] & ~emask);
-      }
-      else {
-        dst1 = dst0 >> 24;
-        dst1 = (mhigh0 & dst1) | (mlow0 & ~dst1);
-        pdst_row[jbit] = (dst1 & emask) | (pdst_row[jbit] & ~emask);
-      }
-    }
-
-    psrc_row += src_stride;
-    pdst_row += dst_stride;
-  }
-}
-
-/***************************************************************/
-void mlib_c_ImageThresh1_U84_1B(PARAMS,
-                                mlib_s32 dbit_off)
-{
-  mlib_s32 *thresh = (void *)__thresh;
-  mlib_s32 *ghigh = (void *)__ghigh;
-  mlib_s32 *glow = (void *)__glow;
-  STYPE *psrc_row = psrc;
-  mlib_u8 *pdst_row = pdst;
-  TTYPE thresh0, thresh1, thresh2, thresh3, threshT;
-  mlib_s32 mhigh0, mlow0, mhigh, mlow, emask, dst0;
-  mlib_s32 i, j, jbit;
-
-  mhigh0 = (ghigh[0] > 0) ? 0x8888 : 0;
-  mhigh0 |= (ghigh[1] > 0) ? 0x4444 : 0;
-  mhigh0 |= (ghigh[2] > 0) ? 0x2222 : 0;
-  mhigh0 |= (ghigh[3] > 0) ? 0x1111 : 0;
-
-  mlow0 = (glow[0] > 0) ? 0x8888 : 0;
-  mlow0 |= (glow[1] > 0) ? 0x4444 : 0;
-  mlow0 |= (glow[2] > 0) ? 0x2222 : 0;
-  mlow0 |= (glow[3] > 0) ? 0x1111 : 0;
-
-  width *= 4;
-
-  for (i = 0; i < height; i++) {
-    thresh0 = thresh[0];
-    thresh1 = thresh[1];
-    thresh2 = thresh[2];
-    thresh3 = thresh[3];
-
-    j = 0;
-    jbit = 0;
-    mhigh = mhigh0 >> dbit_off;
-    mlow = mlow0 >> dbit_off;
-
-    if (dbit_off) {
-      mlib_s32 nume = 8 - dbit_off;
-
-      if (nume > width)
-        nume = width;
-      dst0 = 0;
-      emask = 0;
-
-      for (; j <= (nume - 4); j += 4) {
-        emask |= (0xf << (4 - (dbit_off + j)));
-        dst0 |= THRESH1_CMP_SHIFT(psrc_row[j], thresh0, 7 - (dbit_off + j));
-        dst0 |= THRESH1_CMP_SHIFT(psrc_row[j + 1], thresh1, 6 - (dbit_off + j));
-        dst0 |= THRESH1_CMP_SHIFT(psrc_row[j + 2], thresh2, 5 - (dbit_off + j));
-        dst0 |= THRESH1_CMP_SHIFT(psrc_row[j + 3], thresh3, 4 - (dbit_off + j));
-      }
-
-      for (; j < nume; j++) {
-        emask |= (1 << (7 - (dbit_off + j)));
-        dst0 |= THRESH1_CMP_SHIFT(psrc_row[j], thresh0, 7 - (dbit_off + j));
-        /* swap threshes */
-        threshT = thresh0;
-        thresh0 = thresh1;
-        thresh1 = thresh2;
-        thresh2 = thresh3;
-        thresh3 = threshT;
-      }
-
-      dst0 = (mhigh & dst0) | (mlow & ~dst0);
-      pdst_row[0] = (dst0 & emask) | (pdst_row[0] & ~emask);
-      jbit++;
-    }
-
-#ifdef __SUNPRO_C
-#pragma pipeloop(0)
-#endif /* __SUNPRO_C */
-    for (; j <= (width - 16); j += 16) {
-      dst0 = (THRESH1_CMP_SHIFT(psrc_row[j], thresh0, 7) |
-              THRESH1_CMP_SHIFT(psrc_row[j + 1], thresh1, 6) |
-              THRESH1_CMP_SHIFT(psrc_row[j + 2], thresh2, 5) |
-              THRESH1_CMP_SHIFT(psrc_row[j + 3], thresh3, 4) |
-              THRESH1_CMP_SHIFT(psrc_row[j + 4], thresh0, 3) |
-              THRESH1_CMP_SHIFT(psrc_row[j + 5], thresh1, 2) |
-              THRESH1_CMP_SHIFT(psrc_row[j + 6], thresh2, 1) |
-              THRESH1_CMP_SHIFT(psrc_row[j + 7], thresh3, 0));
-      dst0 = (mhigh & dst0) | (mlow & ~dst0);
-      pdst_row[jbit] = dst0;
-      jbit++;
-      dst0 = (THRESH1_CMP_SHIFT(psrc_row[j + 8], thresh0, 7) |
-              THRESH1_CMP_SHIFT(psrc_row[j + 9], thresh1, 6) |
-              THRESH1_CMP_SHIFT(psrc_row[j + 10], thresh2, 5) |
-              THRESH1_CMP_SHIFT(psrc_row[j + 11], thresh3, 4) |
-              THRESH1_CMP_SHIFT(psrc_row[j + 12], thresh0, 3) |
-              THRESH1_CMP_SHIFT(psrc_row[j + 13], thresh1, 2) |
-              THRESH1_CMP_SHIFT(psrc_row[j + 14], thresh2, 1) |
-              THRESH1_CMP_SHIFT(psrc_row[j + 15], thresh3, 0));
-      dst0 = (mhigh & dst0) | (mlow & ~dst0);
-      pdst_row[jbit] = dst0;
-      jbit++;
-    }
-
-    if (j <= width - 8) {
-      dst0 = (THRESH1_CMP_SHIFT(psrc_row[j], thresh0, 7) |
-              THRESH1_CMP_SHIFT(psrc_row[j + 1], thresh1, 6) |
-              THRESH1_CMP_SHIFT(psrc_row[j + 2], thresh2, 5) |
-              THRESH1_CMP_SHIFT(psrc_row[j + 3], thresh3, 4) |
-              THRESH1_CMP_SHIFT(psrc_row[j + 4], thresh0, 3) |
-              THRESH1_CMP_SHIFT(psrc_row[j + 5], thresh1, 2) |
-              THRESH1_CMP_SHIFT(psrc_row[j + 6], thresh2, 1) |
-              THRESH1_CMP_SHIFT(psrc_row[j + 7], thresh3, 0));
-      dst0 = (mhigh & dst0) | (mlow & ~dst0);
-      pdst_row[jbit] = dst0;
-      jbit++;
-      j += 8;
-    }
-
-    if (j < width) {
-      dst0 = (THRESH1_CMP_SHIFT(psrc_row[j], thresh0, 7) |
-              THRESH1_CMP_SHIFT(psrc_row[j + 1], thresh1, 6) |
-              THRESH1_CMP_SHIFT(psrc_row[j + 2], thresh2, 5) |
-              THRESH1_CMP_SHIFT(psrc_row[j + 3], thresh3, 4) |
-              THRESH1_CMP_SHIFT(psrc_row[j + 4], thresh0, 3) |
-              THRESH1_CMP_SHIFT(psrc_row[j + 5], thresh1, 2) |
-              THRESH1_CMP_SHIFT(psrc_row[j + 6], thresh2, 1));
-
-      emask = (0xFF << (8 - (width - j)));
-      dst0 = (mhigh & dst0) | (mlow & ~dst0);
-      pdst_row[jbit] = (dst0 & emask) | (pdst_row[jbit] & ~emask);
-    }
-
-    psrc_row += src_stride;
-    pdst_row += dst_stride;
-  }
-}
-
-/***************************************************************/
--- a/jdk/src/java.desktop/share/native/libmlib_image/mlib_image.h	Thu May 12 11:03:07 2016 -0700
+++ b/jdk/src/java.desktop/share/native/libmlib_image/mlib_image.h	Fri May 13 11:31:05 2016 +0300
@@ -35,7 +35,6 @@
 #include <mlib_sys.h>
 #include <mlib_image_types.h>
 #include <mlib_image_proto.h>
-#include <mlib_image_blend_proto.h>
 #include <mlib_image_get.h>
 
 #endif  /* MLIB_IMAGE_H */
--- a/jdk/src/java.desktop/share/native/libmlib_image/mlib_image_blend_proto.h	Thu May 12 11:03:07 2016 -0700
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,1448 +0,0 @@
-/*
- * Copyright (c) 1998, 2003, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.  Oracle designates this
- * particular file as subject to the "Classpath" exception as provided
- * by Oracle in the LICENSE file that accompanied this code.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- */
-
-
-#ifndef __ORIG_MLIB_IMAGE_BLEND_PROTO_H
-#define __ORIG_MLIB_IMAGE_BLEND_PROTO_H
-
-#include <mlib_types.h>
-#include <mlib_status.h>
-#include <mlib_image_types.h>
-#if defined ( __MEDIALIB_OLD_NAMES_ADDED )
-#include <../include/mlib_image_blend_proto.h>
-#endif /* defined ( __MEDIALIB_OLD_NAMES_ADDED ) */
-
-#ifdef __cplusplus
-extern "C" {
-#endif /* __cplusplus */
-
-#if defined ( _MSC_VER )
-#if ! defined ( __MEDIALIB_OLD_NAMES )
-#define __MEDIALIB_OLD_NAMES
-#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
-#endif /* defined ( _MSC_VER ) */
-
-/***********************************************************************
-
-    NOTE: f = min(ALPHAsrc2, 1 - ALPHAsrc1)
-          f = min(ALPHAscr2, 1 - ALPHAsrc1dst) for In-place function
-          ALPHA = (ALPHA, ALPHA, ALPHA, ALPHA)
-
-************************************************************************/
-
-/* dst = 0 */
-
-#if defined ( __MEDIALIB_OLD_NAMES )
-#define __mlib_ImageBlend_ZERO_ZERO mlib_ImageBlend_ZERO_ZERO
-#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
-mlib_status  __mlib_ImageBlend_ZERO_ZERO(mlib_image *dst,
-                                         const mlib_image *src1,
-                                         const mlib_image *src2,
-                                         mlib_s32 cmask);
-
-/* src1dst = 0 */
-
-#if defined ( __MEDIALIB_OLD_NAMES )
-#define __mlib_ImageBlend_ZERO_ZERO_Inp mlib_ImageBlend_ZERO_ZERO_Inp
-#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
-mlib_status  __mlib_ImageBlend_ZERO_ZERO_Inp(mlib_image *src1dst,
-                                             const mlib_image *src2,
-                                             mlib_s32 cmask);
-
-/* dst = src2 */
-
-#if defined ( __MEDIALIB_OLD_NAMES )
-#define __mlib_ImageBlend_ZERO_ONE mlib_ImageBlend_ZERO_ONE
-#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
-mlib_status  __mlib_ImageBlend_ZERO_ONE(mlib_image *dst,
-                                        const mlib_image *src1,
-                                        const mlib_image *src2,
-                                        mlib_s32 cmask);
-
-/* src1dst = src2 */
-
-#if defined ( __MEDIALIB_OLD_NAMES )
-#define __mlib_ImageBlend_ZERO_ONE_Inp mlib_ImageBlend_ZERO_ONE_Inp
-#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
-mlib_status  __mlib_ImageBlend_ZERO_ONE_Inp(mlib_image *src1dst,
-                                            const mlib_image *src2,
-                                            mlib_s32 cmask);
-
-/* dst = src2 * src1 */
-
-#if defined ( __MEDIALIB_OLD_NAMES )
-#define __mlib_ImageBlend_ZERO_DC mlib_ImageBlend_ZERO_DC
-#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
-mlib_status  __mlib_ImageBlend_ZERO_DC(mlib_image *dst,
-                                       const mlib_image *src1,
-                                       const mlib_image *src2,
-                                       mlib_s32 cmask);
-
-/* src1dst = src2 * src1dst */
-
-#if defined ( __MEDIALIB_OLD_NAMES )
-#define __mlib_ImageBlend_ZERO_DC_Inp mlib_ImageBlend_ZERO_DC_Inp
-#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
-mlib_status  __mlib_ImageBlend_ZERO_DC_Inp(mlib_image *src1dst,
-                                           const mlib_image *src2,
-                                           mlib_s32 cmask);
-
-/* dst = src2 * (1 - src1) */
-
-#if defined ( __MEDIALIB_OLD_NAMES )
-#define __mlib_ImageBlend_ZERO_OMDC mlib_ImageBlend_ZERO_OMDC
-#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
-mlib_status  __mlib_ImageBlend_ZERO_OMDC(mlib_image *dst,
-                                         const mlib_image *src1,
-                                         const mlib_image *src2,
-                                         mlib_s32 cmask);
-
-/* src1dst = src2 * (1 - src1dst) */
-
-#if defined ( __MEDIALIB_OLD_NAMES )
-#define __mlib_ImageBlend_ZERO_OMDC_Inp mlib_ImageBlend_ZERO_OMDC_Inp
-#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
-mlib_status  __mlib_ImageBlend_ZERO_OMDC_Inp(mlib_image *src1dst,
-                                             const mlib_image *src2,
-                                             mlib_s32 cmask);
-
-/* dst = src2 * ALPHAsrc2 */
-
-#if defined ( __MEDIALIB_OLD_NAMES )
-#define __mlib_ImageBlend_ZERO_SA mlib_ImageBlend_ZERO_SA
-#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
-mlib_status  __mlib_ImageBlend_ZERO_SA(mlib_image *dst,
-                                       const mlib_image *src1,
-                                       const mlib_image *src2,
-                                       mlib_s32 cmask);
-
-/* src1dst = src2 * ALPHAsrc2 */
-
-#if defined ( __MEDIALIB_OLD_NAMES )
-#define __mlib_ImageBlend_ZERO_SA_Inp mlib_ImageBlend_ZERO_SA_Inp
-#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
-mlib_status  __mlib_ImageBlend_ZERO_SA_Inp(mlib_image *src1dst,
-                                           const mlib_image *src2,
-                                           mlib_s32 cmask);
-
-/* dst = src2 * (1 - ALPHAsrc2) */
-
-#if defined ( __MEDIALIB_OLD_NAMES )
-#define __mlib_ImageBlend_ZERO_OMSA mlib_ImageBlend_ZERO_OMSA
-#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
-mlib_status  __mlib_ImageBlend_ZERO_OMSA(mlib_image *dst,
-                                         const mlib_image *src1,
-                                         const mlib_image *src2,
-                                         mlib_s32 cmask);
-
-/* src1dst = src2 * (1 - ALPHAsrc2) */
-
-#if defined ( __MEDIALIB_OLD_NAMES )
-#define __mlib_ImageBlend_ZERO_OMSA_Inp mlib_ImageBlend_ZERO_OMSA_Inp
-#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
-mlib_status  __mlib_ImageBlend_ZERO_OMSA_Inp(mlib_image *src1dst,
-                                             const mlib_image *src2,
-                                             mlib_s32 cmask);
-
-/* dst = src2 * ALPHAsrc1 */
-
-#if defined ( __MEDIALIB_OLD_NAMES )
-#define __mlib_ImageBlend_ZERO_DA mlib_ImageBlend_ZERO_DA
-#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
-mlib_status  __mlib_ImageBlend_ZERO_DA(mlib_image *dst,
-                                       const mlib_image *src1,
-                                       const mlib_image *src2,
-                                       mlib_s32 cmask);
-
-/* src1dst = src2 * ALPHAsrc1dst */
-
-#if defined ( __MEDIALIB_OLD_NAMES )
-#define __mlib_ImageBlend_ZERO_DA_Inp mlib_ImageBlend_ZERO_DA_Inp
-#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
-mlib_status  __mlib_ImageBlend_ZERO_DA_Inp(mlib_image *src1dst,
-                                           const mlib_image *src2,
-                                           mlib_s32 cmask);
-
-/* dst = src2 * (1 - ALPHAsrc1) */
-
-#if defined ( __MEDIALIB_OLD_NAMES )
-#define __mlib_ImageBlend_ZERO_OMDA mlib_ImageBlend_ZERO_OMDA
-#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
-mlib_status  __mlib_ImageBlend_ZERO_OMDA(mlib_image *dst,
-                                         const mlib_image *src1,
-                                         const mlib_image *src2,
-                                         mlib_s32 cmask);
-
-/* src1dst = src2 * (1 - ALPHAsrc1dst) */
-
-#if defined ( __MEDIALIB_OLD_NAMES )
-#define __mlib_ImageBlend_ZERO_OMDA_Inp mlib_ImageBlend_ZERO_OMDA_Inp
-#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
-mlib_status  __mlib_ImageBlend_ZERO_OMDA_Inp(mlib_image *src1dst,
-                                             const mlib_image *src2,
-                                             mlib_s32 cmask);
-
-/* dst = src2 * (f, f, f, 1) */
-
-#if defined ( __MEDIALIB_OLD_NAMES )
-#define __mlib_ImageBlend_ZERO_SAS mlib_ImageBlend_ZERO_SAS
-#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
-mlib_status  __mlib_ImageBlend_ZERO_SAS(mlib_image *dst,
-                                        const mlib_image *src1,
-                                        const mlib_image *src2,
-                                        mlib_s32 cmask);
-
-/* src1dst = src2 * (f, f, f, 1) */
-
-#if defined ( __MEDIALIB_OLD_NAMES )
-#define __mlib_ImageBlend_ZERO_SAS_Inp mlib_ImageBlend_ZERO_SAS_Inp
-#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
-mlib_status  __mlib_ImageBlend_ZERO_SAS_Inp(mlib_image *src1dst,
-                                            const mlib_image *src2,
-                                            mlib_s32 cmask);
-
-/* dst = src1 */
-
-#if defined ( __MEDIALIB_OLD_NAMES )
-#define __mlib_ImageBlend_ONE_ZERO mlib_ImageBlend_ONE_ZERO
-#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
-mlib_status  __mlib_ImageBlend_ONE_ZERO(mlib_image *dst,
-                                        const mlib_image *src1,
-                                        const mlib_image *src2,
-                                        mlib_s32 cmask);
-
-/* src1dst = src1dst */
-
-#if defined ( __MEDIALIB_OLD_NAMES )
-#define __mlib_ImageBlend_ONE_ZERO_Inp mlib_ImageBlend_ONE_ZERO_Inp
-#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
-mlib_status  __mlib_ImageBlend_ONE_ZERO_Inp(mlib_image *src1dst,
-                                            const mlib_image *src2,
-                                            mlib_s32 cmask);
-
-/* dst = src1 + src2 */
-
-#if defined ( __MEDIALIB_OLD_NAMES )
-#define __mlib_ImageBlend_ONE_ONE mlib_ImageBlend_ONE_ONE
-#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
-mlib_status  __mlib_ImageBlend_ONE_ONE(mlib_image *dst,
-                                       const mlib_image *src1,
-                                       const mlib_image *src2,
-                                       mlib_s32 cmask);
-
-/* src1dst = src1dst + src2 */
-
-#if defined ( __MEDIALIB_OLD_NAMES )
-#define __mlib_ImageBlend_ONE_ONE_Inp mlib_ImageBlend_ONE_ONE_Inp
-#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
-mlib_status  __mlib_ImageBlend_ONE_ONE_Inp(mlib_image *src1dst,
-                                           const mlib_image *src2,
-                                           mlib_s32 cmask);
-
-/* dst = src1 * (1 + src2) */
-
-#if defined ( __MEDIALIB_OLD_NAMES )
-#define __mlib_ImageBlend_ONE_DC mlib_ImageBlend_ONE_DC
-#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
-mlib_status  __mlib_ImageBlend_ONE_DC(mlib_image *dst,
-                                      const mlib_image *src1,
-                                      const mlib_image *src2,
-                                      mlib_s32 cmask);
-
-/* src1dst = src1dst * (1 + src2) */
-
-#if defined ( __MEDIALIB_OLD_NAMES )
-#define __mlib_ImageBlend_ONE_DC_Inp mlib_ImageBlend_ONE_DC_Inp
-#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
-mlib_status  __mlib_ImageBlend_ONE_DC_Inp(mlib_image *src1dst,
-                                          const mlib_image *src2,
-                                          mlib_s32 cmask);
-
-/* dst = src2 + src1 * (1 - src2) */
-
-#if defined ( __MEDIALIB_OLD_NAMES )
-#define __mlib_ImageBlend_ONE_OMDC mlib_ImageBlend_ONE_OMDC
-#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
-mlib_status  __mlib_ImageBlend_ONE_OMDC(mlib_image *dst,
-                                        const mlib_image *src1,
-                                        const mlib_image *src2,
-                                        mlib_s32 cmask);
-
-/* src1dst = src2 + src1dst * (1 - src2) */
-
-#if defined ( __MEDIALIB_OLD_NAMES )
-#define __mlib_ImageBlend_ONE_OMDC_Inp mlib_ImageBlend_ONE_OMDC_Inp
-#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
-mlib_status  __mlib_ImageBlend_ONE_OMDC_Inp(mlib_image *src1dst,
-                                            const mlib_image *src2,
-                                            mlib_s32 cmask);
-
-/* dst = src1 + src2 * ALPHAsrc2 */
-
-#if defined ( __MEDIALIB_OLD_NAMES )
-#define __mlib_ImageBlend_ONE_SA mlib_ImageBlend_ONE_SA
-#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
-mlib_status  __mlib_ImageBlend_ONE_SA(mlib_image *dst,
-                                      const mlib_image *src1,
-                                      const mlib_image *src2,
-                                      mlib_s32 cmask);
-
-/* src1dst = src1dst + src2 * ALPHAsrc2 */
-
-#if defined ( __MEDIALIB_OLD_NAMES )
-#define __mlib_ImageBlend_ONE_SA_Inp mlib_ImageBlend_ONE_SA_Inp
-#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
-mlib_status  __mlib_ImageBlend_ONE_SA_Inp(mlib_image *src1dst,
-                                          const mlib_image *src2,
-                                          mlib_s32 cmask);
-
-/* dst = src1 + src2 * (1 - ALPHAsrc2) */
-
-#if defined ( __MEDIALIB_OLD_NAMES )
-#define __mlib_ImageBlend_ONE_OMSA mlib_ImageBlend_ONE_OMSA
-#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
-mlib_status  __mlib_ImageBlend_ONE_OMSA(mlib_image *dst,
-                                        const mlib_image *src1,
-                                        const mlib_image *src2,
-                                        mlib_s32 cmask);
-
-/* src1dst = src1dst + src2 * (1 - ALPHAsrc2) */
-
-#if defined ( __MEDIALIB_OLD_NAMES )
-#define __mlib_ImageBlend_ONE_OMSA_Inp mlib_ImageBlend_ONE_OMSA_Inp
-#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
-mlib_status  __mlib_ImageBlend_ONE_OMSA_Inp(mlib_image *src1dst,
-                                            const mlib_image *src2,
-                                            mlib_s32 cmask);
-
-/* dst = src1 + src2 * ALPHAsrc1 */
-
-#if defined ( __MEDIALIB_OLD_NAMES )
-#define __mlib_ImageBlend_ONE_DA mlib_ImageBlend_ONE_DA
-#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
-mlib_status  __mlib_ImageBlend_ONE_DA(mlib_image *dst,
-                                      const mlib_image *src1,
-                                      const mlib_image *src2,
-                                      mlib_s32 cmask);
-
-/* src1dst = src1dst + src2 * ALPHAsrc1dst */
-
-#if defined ( __MEDIALIB_OLD_NAMES )
-#define __mlib_ImageBlend_ONE_DA_Inp mlib_ImageBlend_ONE_DA_Inp
-#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
-mlib_status  __mlib_ImageBlend_ONE_DA_Inp(mlib_image *src1dst,
-                                          const mlib_image *src2,
-                                          mlib_s32 cmask);
-
-/* dst = src1 + src2 * (1 - ALPHAsrc1) */
-
-#if defined ( __MEDIALIB_OLD_NAMES )
-#define __mlib_ImageBlend_ONE_OMDA mlib_ImageBlend_ONE_OMDA
-#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
-mlib_status  __mlib_ImageBlend_ONE_OMDA(mlib_image *dst,
-                                        const mlib_image *src1,
-                                        const mlib_image *src2,
-                                        mlib_s32 cmask);
-
-/* src1dst = src1dst + src2 * (1 - ALPHAsrc1dst) */
-
-#if defined ( __MEDIALIB_OLD_NAMES )
-#define __mlib_ImageBlend_ONE_OMDA_Inp mlib_ImageBlend_ONE_OMDA_Inp
-#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
-mlib_status  __mlib_ImageBlend_ONE_OMDA_Inp(mlib_image *src1dst,
-                                            const mlib_image *src2,
-                                            mlib_s32 cmask);
-
-/* dst = src1 + src2 * (f, f, f, 1) */
-
-#if defined ( __MEDIALIB_OLD_NAMES )
-#define __mlib_ImageBlend_ONE_SAS mlib_ImageBlend_ONE_SAS
-#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
-mlib_status  __mlib_ImageBlend_ONE_SAS(mlib_image *dst,
-                                       const mlib_image *src1,
-                                       const mlib_image *src2,
-                                       mlib_s32 cmask);
-
-/* src1dst = src1dst + src2 * (f, f, f, 1) */
-
-#if defined ( __MEDIALIB_OLD_NAMES )
-#define __mlib_ImageBlend_ONE_SAS_Inp mlib_ImageBlend_ONE_SAS_Inp
-#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
-mlib_status  __mlib_ImageBlend_ONE_SAS_Inp(mlib_image *src1dst,
-                                           const mlib_image *src2,
-                                           mlib_s32 cmask);
-
-/* dst = src1 * src2 */
-
-#if defined ( __MEDIALIB_OLD_NAMES )
-#define __mlib_ImageBlend_SC_ZERO mlib_ImageBlend_SC_ZERO
-#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
-mlib_status  __mlib_ImageBlend_SC_ZERO(mlib_image *dst,
-                                       const mlib_image *src1,
-                                       const mlib_image *src2,
-                                       mlib_s32 cmask);
-
-/* src1dst = src1dst * src2 */
-
-#if defined ( __MEDIALIB_OLD_NAMES )
-#define __mlib_ImageBlend_SC_ZERO_Inp mlib_ImageBlend_SC_ZERO_Inp
-#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
-mlib_status  __mlib_ImageBlend_SC_ZERO_Inp(mlib_image *src1dst,
-                                           const mlib_image *src2,
-                                           mlib_s32 cmask);
-
-/* dst = (src1 + 1) * src2 */
-
-#if defined ( __MEDIALIB_OLD_NAMES )
-#define __mlib_ImageBlend_SC_ONE mlib_ImageBlend_SC_ONE
-#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
-mlib_status  __mlib_ImageBlend_SC_ONE(mlib_image *dst,
-                                      const mlib_image *src1,
-                                      const mlib_image *src2,
-                                      mlib_s32 cmask);
-
-/* src1dst = (src1dst + 1) * src2 */
-
-#if defined ( __MEDIALIB_OLD_NAMES )
-#define __mlib_ImageBlend_SC_ONE_Inp mlib_ImageBlend_SC_ONE_Inp
-#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
-mlib_status  __mlib_ImageBlend_SC_ONE_Inp(mlib_image *src1dst,
-                                          const mlib_image *src2,
-                                          mlib_s32 cmask);
-
-/* dst = 2 * src1 * src2 */
-
-#if defined ( __MEDIALIB_OLD_NAMES )
-#define __mlib_ImageBlend_SC_DC mlib_ImageBlend_SC_DC
-#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
-mlib_status  __mlib_ImageBlend_SC_DC(mlib_image *dst,
-                                     const mlib_image *src1,
-                                     const mlib_image *src2,
-                                     mlib_s32 cmask);
-
-/* src1dst = 2 * src1dst * src2 */
-
-#if defined ( __MEDIALIB_OLD_NAMES )
-#define __mlib_ImageBlend_SC_DC_Inp mlib_ImageBlend_SC_DC_Inp
-#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
-mlib_status  __mlib_ImageBlend_SC_DC_Inp(mlib_image *src1dst,
-                                         const mlib_image *src2,
-                                         mlib_s32 cmask);
-
-/* dst = src2 */
-
-#if defined ( __MEDIALIB_OLD_NAMES )
-#define __mlib_ImageBlend_SC_OMDC mlib_ImageBlend_SC_OMDC
-#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
-mlib_status  __mlib_ImageBlend_SC_OMDC(mlib_image *dst,
-                                       const mlib_image *src1,
-                                       const mlib_image *src2,
-                                       mlib_s32 cmask);
-
-/* src1dst = src2 */
-
-#if defined ( __MEDIALIB_OLD_NAMES )
-#define __mlib_ImageBlend_SC_OMDC_Inp mlib_ImageBlend_SC_OMDC_Inp
-#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
-mlib_status  __mlib_ImageBlend_SC_OMDC_Inp(mlib_image *src1dst,
-                                           const mlib_image *src2,
-                                           mlib_s32 cmask);
-
-/* dst = src2 * (src1 + ALPHAsrc2) */
-
-#if defined ( __MEDIALIB_OLD_NAMES )
-#define __mlib_ImageBlend_SC_SA mlib_ImageBlend_SC_SA
-#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
-mlib_status  __mlib_ImageBlend_SC_SA(mlib_image *dst,
-                                     const mlib_image *src1,
-                                     const mlib_image *src2,
-                                     mlib_s32 cmask);
-
-/* src1dst = src2 * (src1dst + ALPHAsrc2) */
-
-#if defined ( __MEDIALIB_OLD_NAMES )
-#define __mlib_ImageBlend_SC_SA_Inp mlib_ImageBlend_SC_SA_Inp
-#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
-mlib_status  __mlib_ImageBlend_SC_SA_Inp(mlib_image *src1dst,
-                                         const mlib_image *src2,
-                                         mlib_s32 cmask);
-
-/* dst = src2 * (1 - ALPHAsrc2 + src1) */
-
-#if defined ( __MEDIALIB_OLD_NAMES )
-#define __mlib_ImageBlend_SC_OMSA mlib_ImageBlend_SC_OMSA
-#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
-mlib_status  __mlib_ImageBlend_SC_OMSA(mlib_image *dst,
-                                       const mlib_image *src1,
-                                       const mlib_image *src2,
-                                       mlib_s32 cmask);
-
-/* src1dst = src2 * (1 - ALPHAsrc2 + src1dst) */
-
-#if defined ( __MEDIALIB_OLD_NAMES )
-#define __mlib_ImageBlend_SC_OMSA_Inp mlib_ImageBlend_SC_OMSA_Inp
-#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
-mlib_status  __mlib_ImageBlend_SC_OMSA_Inp(mlib_image *src1dst,
-                                           const mlib_image *src2,
-                                           mlib_s32 cmask);
-
-/* dst = src2 * (src1 + ALPHAsrc1) */
-
-#if defined ( __MEDIALIB_OLD_NAMES )
-#define __mlib_ImageBlend_SC_DA mlib_ImageBlend_SC_DA
-#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
-mlib_status  __mlib_ImageBlend_SC_DA(mlib_image *dst,
-                                     const mlib_image *src1,
-                                     const mlib_image *src2,
-                                     mlib_s32 cmask);
-
-/* src1dst = src2 * (src1dst + ALPHAsrc1dst) */
-
-#if defined ( __MEDIALIB_OLD_NAMES )
-#define __mlib_ImageBlend_SC_DA_Inp mlib_ImageBlend_SC_DA_Inp
-#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
-mlib_status  __mlib_ImageBlend_SC_DA_Inp(mlib_image *src1dst,
-                                         const mlib_image *src2,
-                                         mlib_s32 cmask);
-
-/* dst = src2 * (1 - ALPHAsrc1 + src1) */
-
-#if defined ( __MEDIALIB_OLD_NAMES )
-#define __mlib_ImageBlend_SC_OMDA mlib_ImageBlend_SC_OMDA
-#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
-mlib_status  __mlib_ImageBlend_SC_OMDA(mlib_image *dst,
-                                       const mlib_image *src1,
-                                       const mlib_image *src2,
-                                       mlib_s32 cmask);
-
-/* src1dst = src2 * (1 - ALPHAsrc1dst + src1dst) */
-
-#if defined ( __MEDIALIB_OLD_NAMES )
-#define __mlib_ImageBlend_SC_OMDA_Inp mlib_ImageBlend_SC_OMDA_Inp
-#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
-mlib_status  __mlib_ImageBlend_SC_OMDA_Inp(mlib_image *src1dst,
-                                           const mlib_image *src2,
-                                           mlib_s32 cmask);
-
-/* dst = src2 * ((f, f, f, 1) + src1) */
-
-#if defined ( __MEDIALIB_OLD_NAMES )
-#define __mlib_ImageBlend_SC_SAS mlib_ImageBlend_SC_SAS
-#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
-mlib_status  __mlib_ImageBlend_SC_SAS(mlib_image *dst,
-                                      const mlib_image *src1,
-                                      const mlib_image *src2,
-                                      mlib_s32 cmask);
-
-/* src1dst = src2 * ((f, f, f, 1) + src1dst) */
-
-#if defined ( __MEDIALIB_OLD_NAMES )
-#define __mlib_ImageBlend_SC_SAS_Inp mlib_ImageBlend_SC_SAS_Inp
-#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
-mlib_status  __mlib_ImageBlend_SC_SAS_Inp(mlib_image *src1dst,
-                                          const mlib_image *src2,
-                                          mlib_s32 cmask);
-
-/* dst = src1 * (1 - src2) */
-
-#if defined ( __MEDIALIB_OLD_NAMES )
-#define __mlib_ImageBlend_OMSC_ZERO mlib_ImageBlend_OMSC_ZERO
-#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
-mlib_status  __mlib_ImageBlend_OMSC_ZERO(mlib_image *dst,
-                                         const mlib_image *src1,
-                                         const mlib_image *src2,
-                                         mlib_s32 cmask);
-
-/* src1dst = src1dst * (1 - src2) */
-
-#if defined ( __MEDIALIB_OLD_NAMES )
-#define __mlib_ImageBlend_OMSC_ZERO_Inp mlib_ImageBlend_OMSC_ZERO_Inp
-#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
-mlib_status  __mlib_ImageBlend_OMSC_ZERO_Inp(mlib_image *src1dst,
-                                             const mlib_image *src2,
-                                             mlib_s32 cmask);
-
-/* dst = src1 + src2 * (1 - src1) */
-
-#if defined ( __MEDIALIB_OLD_NAMES )
-#define __mlib_ImageBlend_OMSC_ONE mlib_ImageBlend_OMSC_ONE
-#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
-mlib_status  __mlib_ImageBlend_OMSC_ONE(mlib_image *dst,
-                                        const mlib_image *src1,
-                                        const mlib_image *src2,
-                                        mlib_s32 cmask);
-
-/* src1dst = src1dst + src2 * (1 - src1dst) */
-
-#if defined ( __MEDIALIB_OLD_NAMES )
-#define __mlib_ImageBlend_OMSC_ONE_Inp mlib_ImageBlend_OMSC_ONE_Inp
-#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
-mlib_status  __mlib_ImageBlend_OMSC_ONE_Inp(mlib_image *src1dst,
-                                            const mlib_image *src2,
-                                            mlib_s32 cmask);
-
-/* dst = src1 */
-
-#if defined ( __MEDIALIB_OLD_NAMES )
-#define __mlib_ImageBlend_OMSC_DC mlib_ImageBlend_OMSC_DC
-#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
-mlib_status  __mlib_ImageBlend_OMSC_DC(mlib_image *dst,
-                                       const mlib_image *src1,
-                                       const mlib_image *src2,
-                                       mlib_s32 cmask);
-
-/* src1dst = src1dst */
-
-#if defined ( __MEDIALIB_OLD_NAMES )
-#define __mlib_ImageBlend_OMSC_DC_Inp mlib_ImageBlend_OMSC_DC_Inp
-#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
-mlib_status  __mlib_ImageBlend_OMSC_DC_Inp(mlib_image *src1dst,
-                                           const mlib_image *src2,
-                                           mlib_s32 cmask);
-
-/* dst = src1 + src2 - 2 * src1 * src2 */
-
-#if defined ( __MEDIALIB_OLD_NAMES )
-#define __mlib_ImageBlend_OMSC_OMDC mlib_ImageBlend_OMSC_OMDC
-#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
-mlib_status  __mlib_ImageBlend_OMSC_OMDC(mlib_image *dst,
-                                         const mlib_image *src1,
-                                         const mlib_image *src2,
-                                         mlib_s32 cmask);
-
-/* src1dst = src1dst + src2 - 2 * src1dst * src2 */
-
-#if defined ( __MEDIALIB_OLD_NAMES )
-#define __mlib_ImageBlend_OMSC_OMDC_Inp mlib_ImageBlend_OMSC_OMDC_Inp
-#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
-mlib_status  __mlib_ImageBlend_OMSC_OMDC_Inp(mlib_image *src1dst,
-                                             const mlib_image *src2,
-                                             mlib_s32 cmask);
-
-/* dst = src1 + src2 * (ALPHAsrc2 - src1) */
-
-#if defined ( __MEDIALIB_OLD_NAMES )
-#define __mlib_ImageBlend_OMSC_SA mlib_ImageBlend_OMSC_SA
-#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
-mlib_status  __mlib_ImageBlend_OMSC_SA(mlib_image *dst,
-                                       const mlib_image *src1,
-                                       const mlib_image *src2,
-                                       mlib_s32 cmask);
-
-/* src1dst = src1dst + src2 * (ALPHAsrc2 - src1dst) */
-
-#if defined ( __MEDIALIB_OLD_NAMES )
-#define __mlib_ImageBlend_OMSC_SA_Inp mlib_ImageBlend_OMSC_SA_Inp
-#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
-mlib_status  __mlib_ImageBlend_OMSC_SA_Inp(mlib_image *src1dst,
-                                           const mlib_image *src2,
-                                           mlib_s32 cmask);
-
-/* dst = src1 + src2 - src2 * (src1 + ALPHAsrc2) */
-
-#if defined ( __MEDIALIB_OLD_NAMES )
-#define __mlib_ImageBlend_OMSC_OMSA mlib_ImageBlend_OMSC_OMSA
-#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
-mlib_status  __mlib_ImageBlend_OMSC_OMSA(mlib_image *dst,
-                                         const mlib_image *src1,
-                                         const mlib_image *src2,
-                                         mlib_s32 cmask);
-
-/* src1dst = src1dst + src2 - src2 * (src1dst + ALPHAsrc2) */
-
-#if defined ( __MEDIALIB_OLD_NAMES )
-#define __mlib_ImageBlend_OMSC_OMSA_Inp mlib_ImageBlend_OMSC_OMSA_Inp
-#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
-mlib_status  __mlib_ImageBlend_OMSC_OMSA_Inp(mlib_image *src1dst,
-                                             const mlib_image *src2,
-                                             mlib_s32 cmask);
-
-/* dst = src1 + src2 * (ALPHAsrc1 - src1) */
-
-#if defined ( __MEDIALIB_OLD_NAMES )
-#define __mlib_ImageBlend_OMSC_DA mlib_ImageBlend_OMSC_DA
-#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
-mlib_status  __mlib_ImageBlend_OMSC_DA(mlib_image *dst,
-                                       const mlib_image *src1,
-                                       const mlib_image *src2,
-                                       mlib_s32 cmask);
-
-/* src1dst = src1dst + src2 * (ALPHAsrc1dst - src1dst) */
-
-#if defined ( __MEDIALIB_OLD_NAMES )
-#define __mlib_ImageBlend_OMSC_DA_Inp mlib_ImageBlend_OMSC_DA_Inp
-#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
-mlib_status  __mlib_ImageBlend_OMSC_DA_Inp(mlib_image *src1dst,
-                                           const mlib_image *src2,
-                                           mlib_s32 cmask);
-
-/* dst = src1 + src2 - src2 * (src1 + ALPHAsrc1) */
-
-#if defined ( __MEDIALIB_OLD_NAMES )
-#define __mlib_ImageBlend_OMSC_OMDA mlib_ImageBlend_OMSC_OMDA
-#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
-mlib_status  __mlib_ImageBlend_OMSC_OMDA(mlib_image *dst,
-                                         const mlib_image *src1,
-                                         const mlib_image *src2,
-                                         mlib_s32 cmask);
-
-/* src1dst = src1dst + src2 - src2 * (src1dst + ALPHAsrc1dst) */
-
-#if defined ( __MEDIALIB_OLD_NAMES )
-#define __mlib_ImageBlend_OMSC_OMDA_Inp mlib_ImageBlend_OMSC_OMDA_Inp
-#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
-mlib_status  __mlib_ImageBlend_OMSC_OMDA_Inp(mlib_image *src1dst,
-                                             const mlib_image *src2,
-                                             mlib_s32 cmask);
-
-/* dst = src1 +  src2 * ((f, f, f, 1) - src1) */
-
-#if defined ( __MEDIALIB_OLD_NAMES )
-#define __mlib_ImageBlend_OMSC_SAS mlib_ImageBlend_OMSC_SAS
-#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
-mlib_status  __mlib_ImageBlend_OMSC_SAS(mlib_image *dst,
-                                        const mlib_image *src1,
-                                        const mlib_image *src2,
-                                        mlib_s32 cmask);
-
-/* src1dst = src1dst +  src2 * ((f, f, f, 1) - src1dst) */
-
-#if defined ( __MEDIALIB_OLD_NAMES )
-#define __mlib_ImageBlend_OMSC_SAS_Inp mlib_ImageBlend_OMSC_SAS_Inp
-#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
-mlib_status  __mlib_ImageBlend_OMSC_SAS_Inp(mlib_image *src1dst,
-                                            const mlib_image *src2,
-                                            mlib_s32 cmask);
-
-/* dst = src1 * ALPHAsrc2 */
-
-#if defined ( __MEDIALIB_OLD_NAMES )
-#define __mlib_ImageBlend_SA_ZERO mlib_ImageBlend_SA_ZERO
-#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
-mlib_status  __mlib_ImageBlend_SA_ZERO(mlib_image *dst,
-                                       const mlib_image *src1,
-                                       const mlib_image *src2,
-                                       mlib_s32 cmask);
-
-/* src1dst = src1dst * ALPHAsrc2 */
-
-#if defined ( __MEDIALIB_OLD_NAMES )
-#define __mlib_ImageBlend_SA_ZERO_Inp mlib_ImageBlend_SA_ZERO_Inp
-#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
-mlib_status  __mlib_ImageBlend_SA_ZERO_Inp(mlib_image *src1dst,
-                                           const mlib_image *src2,
-                                           mlib_s32 cmask);
-
-/* dst = src1 * ALPHAsrc2 + src2 */
-
-#if defined ( __MEDIALIB_OLD_NAMES )
-#define __mlib_ImageBlend_SA_ONE mlib_ImageBlend_SA_ONE
-#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
-mlib_status  __mlib_ImageBlend_SA_ONE(mlib_image *dst,
-                                      const mlib_image *src1,
-                                      const mlib_image *src2,
-                                      mlib_s32 cmask);
-
-/* src1dst = src1dst * ALPHAsrc2 + src2 */
-
-#if defined ( __MEDIALIB_OLD_NAMES )
-#define __mlib_ImageBlend_SA_ONE_Inp mlib_ImageBlend_SA_ONE_Inp
-#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
-mlib_status  __mlib_ImageBlend_SA_ONE_Inp(mlib_image *src1dst,
-                                          const mlib_image *src2,
-                                          mlib_s32 cmask);
-
-/* dst = src1 * (ALPHAsrc2 + src2) */
-
-#if defined ( __MEDIALIB_OLD_NAMES )
-#define __mlib_ImageBlend_SA_DC mlib_ImageBlend_SA_DC
-#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
-mlib_status  __mlib_ImageBlend_SA_DC(mlib_image *dst,
-                                     const mlib_image *src1,
-                                     const mlib_image *src2,
-                                     mlib_s32 cmask);
-
-/* src1dst = src1dst * (ALPHAsrc2 + src2) */
-
-#if defined ( __MEDIALIB_OLD_NAMES )
-#define __mlib_ImageBlend_SA_DC_Inp mlib_ImageBlend_SA_DC_Inp
-#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
-mlib_status  __mlib_ImageBlend_SA_DC_Inp(mlib_image *src1dst,
-                                         const mlib_image *src2,
-                                         mlib_s32 cmask);
-
-/* dst = src1 * (ALPHAsrc2 - src2) + src2 */
-
-#if defined ( __MEDIALIB_OLD_NAMES )
-#define __mlib_ImageBlend_SA_OMDC mlib_ImageBlend_SA_OMDC
-#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
-mlib_status  __mlib_ImageBlend_SA_OMDC(mlib_image *dst,
-                                       const mlib_image *src1,
-                                       const mlib_image *src2,
-                                       mlib_s32 cmask);
-
-/* src1dst = src1dst * (ALPHAsrc2 - src2) + src2 */
-
-#if defined ( __MEDIALIB_OLD_NAMES )
-#define __mlib_ImageBlend_SA_OMDC_Inp mlib_ImageBlend_SA_OMDC_Inp
-#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
-mlib_status  __mlib_ImageBlend_SA_OMDC_Inp(mlib_image *src1dst,
-                                           const mlib_image *src2,
-                                           mlib_s32 cmask);
-
-/* dst = (src1 + src2) * ALPHAsrc2 */
-
-#if defined ( __MEDIALIB_OLD_NAMES )
-#define __mlib_ImageBlend_SA_SA mlib_ImageBlend_SA_SA
-#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
-mlib_status  __mlib_ImageBlend_SA_SA(mlib_image *dst,
-                                     const mlib_image *src1,
-                                     const mlib_image *src2,
-                                     mlib_s32 cmask);
-
-/* src1dst = (src1dst + src2) * ALPHAsrc2 */
-
-#if defined ( __MEDIALIB_OLD_NAMES )
-#define __mlib_ImageBlend_SA_SA_Inp mlib_ImageBlend_SA_SA_Inp
-#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
-mlib_status  __mlib_ImageBlend_SA_SA_Inp(mlib_image *src1dst,
-                                         const mlib_image *src2,
-                                         mlib_s32 cmask);
-
-/* dst = (src1 - src2) * ALPHAsrc2 + src2 */
-
-#if defined ( __MEDIALIB_OLD_NAMES )
-#define __mlib_ImageBlend_SA_OMSA mlib_ImageBlend_SA_OMSA
-#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
-mlib_status  __mlib_ImageBlend_SA_OMSA(mlib_image *dst,
-                                       const mlib_image *src1,
-                                       const mlib_image *src2,
-                                       mlib_s32 cmask);
-
-/* src1dst = (src1dst - src2) * ALPHAsrc2 + src2 */
-
-#if defined ( __MEDIALIB_OLD_NAMES )
-#define __mlib_ImageBlend_SA_OMSA_Inp mlib_ImageBlend_SA_OMSA_Inp
-#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
-mlib_status  __mlib_ImageBlend_SA_OMSA_Inp(mlib_image *src1dst,
-                                           const mlib_image *src2,
-                                           mlib_s32 cmask);
-
-/* dst = src1 * ALPHAsrc2 + src2 * ALPHAsrc1 */
-
-#if defined ( __MEDIALIB_OLD_NAMES )
-#define __mlib_ImageBlend_SA_DA mlib_ImageBlend_SA_DA
-#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
-mlib_status  __mlib_ImageBlend_SA_DA(mlib_image *dst,
-                                     const mlib_image *src1,
-                                     const mlib_image *src2,
-                                     mlib_s32 cmask);
-
-/* src1dst = src1dst * ALPHAsrc2 + src2 * ALPHAsrc1dst */
-
-#if defined ( __MEDIALIB_OLD_NAMES )
-#define __mlib_ImageBlend_SA_DA_Inp mlib_ImageBlend_SA_DA_Inp
-#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
-mlib_status  __mlib_ImageBlend_SA_DA_Inp(mlib_image *src1dst,
-                                         const mlib_image *src2,
-                                         mlib_s32 cmask);
-
-/* dst = src1 * ALPHAsrc2 + src2 * (1 - ALPHAsrc1) */
-
-#if defined ( __MEDIALIB_OLD_NAMES )
-#define __mlib_ImageBlend_SA_OMDA mlib_ImageBlend_SA_OMDA
-#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
-mlib_status  __mlib_ImageBlend_SA_OMDA(mlib_image *dst,
-                                       const mlib_image *src1,
-                                       const mlib_image *src2,
-                                       mlib_s32 cmask);
-
-/* src1dst = src1dst * ALPHAsrc2 + src2 * (1 - ALPHAsrc1dst) */
-
-#if defined ( __MEDIALIB_OLD_NAMES )
-#define __mlib_ImageBlend_SA_OMDA_Inp mlib_ImageBlend_SA_OMDA_Inp
-#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
-mlib_status  __mlib_ImageBlend_SA_OMDA_Inp(mlib_image *src1dst,
-                                           const mlib_image *src2,
-                                           mlib_s32 cmask);
-
-/* dst = src1 * ALPHAsrc2 + src2 * (f, f, f, 1) */
-
-#if defined ( __MEDIALIB_OLD_NAMES )
-#define __mlib_ImageBlend_SA_SAS mlib_ImageBlend_SA_SAS
-#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
-mlib_status  __mlib_ImageBlend_SA_SAS(mlib_image *dst,
-                                      const mlib_image *src1,
-                                      const mlib_image *src2,
-                                      mlib_s32 cmask);
-
-/* src1dst = src1dst * ALPHAsrc2 + src2 * (f, f, f, 1) */
-
-#if defined ( __MEDIALIB_OLD_NAMES )
-#define __mlib_ImageBlend_SA_SAS_Inp mlib_ImageBlend_SA_SAS_Inp
-#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
-mlib_status  __mlib_ImageBlend_SA_SAS_Inp(mlib_image *src1dst,
-                                          const mlib_image *src2,
-                                          mlib_s32 cmask);
-
-/* dst = src1 * (1 - ALPHAsrc2) */
-
-#if defined ( __MEDIALIB_OLD_NAMES )
-#define __mlib_ImageBlend_OMSA_ZERO mlib_ImageBlend_OMSA_ZERO
-#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
-mlib_status  __mlib_ImageBlend_OMSA_ZERO(mlib_image *dst,
-                                         const mlib_image *src1,
-                                         const mlib_image *src2,
-                                         mlib_s32 cmask);
-
-/* src1dst = src1dst * (1 - ALPHAsrc2) */
-
-#if defined ( __MEDIALIB_OLD_NAMES )
-#define __mlib_ImageBlend_OMSA_ZERO_Inp mlib_ImageBlend_OMSA_ZERO_Inp
-#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
-mlib_status  __mlib_ImageBlend_OMSA_ZERO_Inp(mlib_image *src1dst,
-                                             const mlib_image *src2,
-                                             mlib_s32 cmask);
-
-/* dst = src1 * (1 - ALPHAsrc2) + src2 */
-
-#if defined ( __MEDIALIB_OLD_NAMES )
-#define __mlib_ImageBlend_OMSA_ONE mlib_ImageBlend_OMSA_ONE
-#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
-mlib_status  __mlib_ImageBlend_OMSA_ONE(mlib_image *dst,
-                                        const mlib_image *src1,
-                                        const mlib_image *src2,
-                                        mlib_s32 cmask);
-
-/* src1dst = src1dst * (1 - ALPHAsrc2) + src2 */
-
-#if defined ( __MEDIALIB_OLD_NAMES )
-#define __mlib_ImageBlend_OMSA_ONE_Inp mlib_ImageBlend_OMSA_ONE_Inp
-#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
-mlib_status  __mlib_ImageBlend_OMSA_ONE_Inp(mlib_image *src1dst,
-                                            const mlib_image *src2,
-                                            mlib_s32 cmask);
-
-/* dst = src1 * (1 - ALPHAsrc2 + src2) */
-
-#if defined ( __MEDIALIB_OLD_NAMES )
-#define __mlib_ImageBlend_OMSA_DC mlib_ImageBlend_OMSA_DC
-#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
-mlib_status  __mlib_ImageBlend_OMSA_DC(mlib_image *dst,
-                                       const mlib_image *src1,
-                                       const mlib_image *src2,
-                                       mlib_s32 cmask);
-
-/* src1dst = src1dst * (1 - ALPHAsrc2 + src2) */
-
-#if defined ( __MEDIALIB_OLD_NAMES )
-#define __mlib_ImageBlend_OMSA_DC_Inp mlib_ImageBlend_OMSA_DC_Inp
-#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
-mlib_status  __mlib_ImageBlend_OMSA_DC_Inp(mlib_image *src1dst,
-                                           const mlib_image *src2,
-                                           mlib_s32 cmask);
-
-/* dst = src1 * (1 - ALPHAsrc2 - src2) + src2 */
-
-#if defined ( __MEDIALIB_OLD_NAMES )
-#define __mlib_ImageBlend_OMSA_OMDC mlib_ImageBlend_OMSA_OMDC
-#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
-mlib_status  __mlib_ImageBlend_OMSA_OMDC(mlib_image *dst,
-                                         const mlib_image *src1,
-                                         const mlib_image *src2,
-                                         mlib_s32 cmask);
-
-/* src1dst = src1dst * (1 - ALPHAsrc2 - src2) + src2 */
-
-#if defined ( __MEDIALIB_OLD_NAMES )
-#define __mlib_ImageBlend_OMSA_OMDC_Inp mlib_ImageBlend_OMSA_OMDC_Inp
-#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
-mlib_status  __mlib_ImageBlend_OMSA_OMDC_Inp(mlib_image *src1dst,
-                                             const mlib_image *src2,
-                                             mlib_s32 cmask);
-
-/* dst = src1 + (src2 - src1) * ALPHAsrc2 */
-
-#if defined ( __MEDIALIB_OLD_NAMES )
-#define __mlib_ImageBlend_OMSA_SA mlib_ImageBlend_OMSA_SA
-#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
-mlib_status  __mlib_ImageBlend_OMSA_SA(mlib_image *dst,
-                                       const mlib_image *src1,
-                                       const mlib_image *src2,
-                                       mlib_s32 cmask);
-
-/* src1dst = src1dst + (src2 - src1dst) * ALPHAsrc2 */
-
-#if defined ( __MEDIALIB_OLD_NAMES )
-#define __mlib_ImageBlend_OMSA_SA_Inp mlib_ImageBlend_OMSA_SA_Inp
-#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
-mlib_status  __mlib_ImageBlend_OMSA_SA_Inp(mlib_image *src1dst,
-                                           const mlib_image *src2,
-                                           mlib_s32 cmask);
-
-/* dst = (src1 + src2) * (1 - ALPHAsrc2) */
-
-#if defined ( __MEDIALIB_OLD_NAMES )
-#define __mlib_ImageBlend_OMSA_OMSA mlib_ImageBlend_OMSA_OMSA
-#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
-mlib_status  __mlib_ImageBlend_OMSA_OMSA(mlib_image *dst,
-                                         const mlib_image *src1,
-                                         const mlib_image *src2,
-                                         mlib_s32 cmask);
-
-/* src1dst = (src1dst + src2) * (1 - ALPHAsrc2) */
-
-#if defined ( __MEDIALIB_OLD_NAMES )
-#define __mlib_ImageBlend_OMSA_OMSA_Inp mlib_ImageBlend_OMSA_OMSA_Inp
-#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
-mlib_status  __mlib_ImageBlend_OMSA_OMSA_Inp(mlib_image *src1dst,
-                                             const mlib_image *src2,
-                                             mlib_s32 cmask);
-
-/* dst = src1 * (1 - ALPHAsrc2) + src2 * ALPHAsrc1 */
-
-#if defined ( __MEDIALIB_OLD_NAMES )
-#define __mlib_ImageBlend_OMSA_DA mlib_ImageBlend_OMSA_DA
-#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
-mlib_status  __mlib_ImageBlend_OMSA_DA(mlib_image *dst,
-                                       const mlib_image *src1,
-                                       const mlib_image *src2,
-                                       mlib_s32 cmask);
-
-/* src1dst = src1dst * (1 - ALPHAsrc2) + src2 * ALPHAsrc1dst */
-
-#if defined ( __MEDIALIB_OLD_NAMES )
-#define __mlib_ImageBlend_OMSA_DA_Inp mlib_ImageBlend_OMSA_DA_Inp
-#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
-mlib_status  __mlib_ImageBlend_OMSA_DA_Inp(mlib_image *src1dst,
-                                           const mlib_image *src2,
-                                           mlib_s32 cmask);
-
-/* dst = src1 * (1 - ALPHAsrc2) + src2 * (1 - ALPHAsrc1) */
-
-#if defined ( __MEDIALIB_OLD_NAMES )
-#define __mlib_ImageBlend_OMSA_OMDA mlib_ImageBlend_OMSA_OMDA
-#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
-mlib_status  __mlib_ImageBlend_OMSA_OMDA(mlib_image *dst,
-                                         const mlib_image *src1,
-                                         const mlib_image *src2,
-                                         mlib_s32 cmask);
-
-/* src1dst = src1dst * (1 - ALPHAsrc2) + src2 * (1 - ALPHAsrc1dst) */
-
-#if defined ( __MEDIALIB_OLD_NAMES )
-#define __mlib_ImageBlend_OMSA_OMDA_Inp mlib_ImageBlend_OMSA_OMDA_Inp
-#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
-mlib_status  __mlib_ImageBlend_OMSA_OMDA_Inp(mlib_image *src1dst,
-                                             const mlib_image *src2,
-                                             mlib_s32 cmask);
-
-/* dst = src1 * (1 - ALPHAsrc2) + src2 * (f, f, f, 1) */
-
-#if defined ( __MEDIALIB_OLD_NAMES )
-#define __mlib_ImageBlend_OMSA_SAS mlib_ImageBlend_OMSA_SAS
-#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
-mlib_status  __mlib_ImageBlend_OMSA_SAS(mlib_image *dst,
-                                        const mlib_image *src1,
-                                        const mlib_image *src2,
-                                        mlib_s32 cmask);
-
-/* src1dst = src1dst * (1 - ALPHAsrc2) + src2 * (f, f, f, 1) */
-
-#if defined ( __MEDIALIB_OLD_NAMES )
-#define __mlib_ImageBlend_OMSA_SAS_Inp mlib_ImageBlend_OMSA_SAS_Inp
-#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
-mlib_status  __mlib_ImageBlend_OMSA_SAS_Inp(mlib_image *src1dst,
-                                            const mlib_image *src2,
-                                            mlib_s32 cmask);
-
-/* dst = src1 * ALPHAsrc1 */
-
-#if defined ( __MEDIALIB_OLD_NAMES )
-#define __mlib_ImageBlend_DA_ZERO mlib_ImageBlend_DA_ZERO
-#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
-mlib_status  __mlib_ImageBlend_DA_ZERO(mlib_image *dst,
-                                       const mlib_image *src1,
-                                       const mlib_image *src2,
-                                       mlib_s32 cmask);
-
-/* src1dst = src1dst * ALPHAsrc1dst */
-
-#if defined ( __MEDIALIB_OLD_NAMES )
-#define __mlib_ImageBlend_DA_ZERO_Inp mlib_ImageBlend_DA_ZERO_Inp
-#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
-mlib_status  __mlib_ImageBlend_DA_ZERO_Inp(mlib_image *src1dst,
-                                           const mlib_image *src2,
-                                           mlib_s32 cmask);
-
-/* dst = src1 * ALPHAsrc1 + src2 */
-
-#if defined ( __MEDIALIB_OLD_NAMES )
-#define __mlib_ImageBlend_DA_ONE mlib_ImageBlend_DA_ONE
-#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
-mlib_status  __mlib_ImageBlend_DA_ONE(mlib_image *dst,
-                                      const mlib_image *src1,
-                                      const mlib_image *src2,
-                                      mlib_s32 cmask);
-
-/* src1dst = src1dst * ALPHAsrc1dst + src2 */
-
-#if defined ( __MEDIALIB_OLD_NAMES )
-#define __mlib_ImageBlend_DA_ONE_Inp mlib_ImageBlend_DA_ONE_Inp
-#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
-mlib_status  __mlib_ImageBlend_DA_ONE_Inp(mlib_image *src1dst,
-                                          const mlib_image *src2,
-                                          mlib_s32 cmask);
-
-/* dst = src1 * (ALPHAsrc1 + src2) */
-
-#if defined ( __MEDIALIB_OLD_NAMES )
-#define __mlib_ImageBlend_DA_DC mlib_ImageBlend_DA_DC
-#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
-mlib_status  __mlib_ImageBlend_DA_DC(mlib_image *dst,
-                                     const mlib_image *src1,
-                                     const mlib_image *src2,
-                                     mlib_s32 cmask);
-
-/* src1dst = src1dst * (ALPHAsrc1dst + src2) */
-
-#if defined ( __MEDIALIB_OLD_NAMES )
-#define __mlib_ImageBlend_DA_DC_Inp mlib_ImageBlend_DA_DC_Inp
-#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
-mlib_status  __mlib_ImageBlend_DA_DC_Inp(mlib_image *src1dst,
-                                         const mlib_image *src2,
-                                         mlib_s32 cmask);
-
-/* dst = src1 * (ALPHAsrc1 - src2) + src2 */
-
-#if defined ( __MEDIALIB_OLD_NAMES )
-#define __mlib_ImageBlend_DA_OMDC mlib_ImageBlend_DA_OMDC
-#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
-mlib_status  __mlib_ImageBlend_DA_OMDC(mlib_image *dst,
-                                       const mlib_image *src1,
-                                       const mlib_image *src2,
-                                       mlib_s32 cmask);
-
-/* src1dst = src1dst * (ALPHAsrc1dst - src2) + src2 */
-
-#if defined ( __MEDIALIB_OLD_NAMES )
-#define __mlib_ImageBlend_DA_OMDC_Inp mlib_ImageBlend_DA_OMDC_Inp
-#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
-mlib_status  __mlib_ImageBlend_DA_OMDC_Inp(mlib_image *src1dst,
-                                           const mlib_image *src2,
-                                           mlib_s32 cmask);
-
-/* dst = src1 * ALPHAsrc1 + src2 * ALPHAsrc2 */
-
-#if defined ( __MEDIALIB_OLD_NAMES )
-#define __mlib_ImageBlend_DA_SA mlib_ImageBlend_DA_SA
-#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
-mlib_status  __mlib_ImageBlend_DA_SA(mlib_image *dst,
-                                     const mlib_image *src1,
-                                     const mlib_image *src2,
-                                     mlib_s32 cmask);
-
-/* src1dst = src1dst * ALPHAsrc1dst + src2 * ALPHAsrc2 */
-
-#if defined ( __MEDIALIB_OLD_NAMES )
-#define __mlib_ImageBlend_DA_SA_Inp mlib_ImageBlend_DA_SA_Inp
-#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
-mlib_status  __mlib_ImageBlend_DA_SA_Inp(mlib_image *src1dst,
-                                         const mlib_image *src2,
-                                         mlib_s32 cmask);
-
-/* dst = src1 * ALPHAsrc1 + src2 * (1 - ALPHAsrc2) */
-
-#if defined ( __MEDIALIB_OLD_NAMES )
-#define __mlib_ImageBlend_DA_OMSA mlib_ImageBlend_DA_OMSA
-#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
-mlib_status  __mlib_ImageBlend_DA_OMSA(mlib_image *dst,
-                                       const mlib_image *src1,
-                                       const mlib_image *src2,
-                                       mlib_s32 cmask);
-
-/* src1dst = src1dst * ALPHAsrc1dst + src2 * (1 - ALPHAsrc2) */
-
-#if defined ( __MEDIALIB_OLD_NAMES )
-#define __mlib_ImageBlend_DA_OMSA_Inp mlib_ImageBlend_DA_OMSA_Inp
-#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
-mlib_status  __mlib_ImageBlend_DA_OMSA_Inp(mlib_image *src1dst,
-                                           const mlib_image *src2,
-                                           mlib_s32 cmask);
-
-/* dst = (src1 + src2) * ALPHAsrc1 */
-
-#if defined ( __MEDIALIB_OLD_NAMES )
-#define __mlib_ImageBlend_DA_DA mlib_ImageBlend_DA_DA
-#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
-mlib_status  __mlib_ImageBlend_DA_DA(mlib_image *dst,
-                                     const mlib_image *src1,
-                                     const mlib_image *src2,
-                                     mlib_s32 cmask);
-
-/* src1dst = (src1dst + src2) * ALPHAsrc1dst */
-
-#if defined ( __MEDIALIB_OLD_NAMES )
-#define __mlib_ImageBlend_DA_DA_Inp mlib_ImageBlend_DA_DA_Inp
-#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
-mlib_status  __mlib_ImageBlend_DA_DA_Inp(mlib_image *src1dst,
-                                         const mlib_image *src2,
-                                         mlib_s32 cmask);
-
-/* dst = (src1 - src2) * ALPHAsrc1 + src2 */
-
-#if defined ( __MEDIALIB_OLD_NAMES )
-#define __mlib_ImageBlend_DA_OMDA mlib_ImageBlend_DA_OMDA
-#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
-mlib_status  __mlib_ImageBlend_DA_OMDA(mlib_image *dst,
-                                       const mlib_image *src1,
-                                       const mlib_image *src2,
-                                       mlib_s32 cmask);
-
-/* src1dst = (src1dst - src2) * ALPHAsrc1dst + src2 */
-
-#if defined ( __MEDIALIB_OLD_NAMES )
-#define __mlib_ImageBlend_DA_OMDA_Inp mlib_ImageBlend_DA_OMDA_Inp
-#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
-mlib_status  __mlib_ImageBlend_DA_OMDA_Inp(mlib_image *src1dst,
-                                           const mlib_image *src2,
-                                           mlib_s32 cmask);
-
-/* dst = src1 * ALPHAsrc1 + src2 * (f, f, f, 1) */
-
-#if defined ( __MEDIALIB_OLD_NAMES )
-#define __mlib_ImageBlend_DA_SAS mlib_ImageBlend_DA_SAS
-#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
-mlib_status  __mlib_ImageBlend_DA_SAS(mlib_image *dst,
-                                      const mlib_image *src1,
-                                      const mlib_image *src2,
-                                      mlib_s32 cmask);
-
-/* src1dst = src1dst * ALPHAsrc1dst + src2 * (f, f, f, 1) */
-
-#if defined ( __MEDIALIB_OLD_NAMES )
-#define __mlib_ImageBlend_DA_SAS_Inp mlib_ImageBlend_DA_SAS_Inp
-#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
-mlib_status  __mlib_ImageBlend_DA_SAS_Inp(mlib_image *src1dst,
-                                          const mlib_image *src2,
-                                          mlib_s32 cmask);
-
-/* dst = src1 * (1 - ALPHAsrc1) */
-
-#if defined ( __MEDIALIB_OLD_NAMES )
-#define __mlib_ImageBlend_OMDA_ZERO mlib_ImageBlend_OMDA_ZERO
-#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
-mlib_status  __mlib_ImageBlend_OMDA_ZERO(mlib_image *dst,
-                                         const mlib_image *src1,
-                                         const mlib_image *src2,
-                                         mlib_s32 cmask);
-
-/* src1dst = src1dst * (1 - ALPHAsrc1dst) */
-
-#if defined ( __MEDIALIB_OLD_NAMES )
-#define __mlib_ImageBlend_OMDA_ZERO_Inp mlib_ImageBlend_OMDA_ZERO_Inp
-#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
-mlib_status  __mlib_ImageBlend_OMDA_ZERO_Inp(mlib_image *src1dst,
-                                             const mlib_image *src2,
-                                             mlib_s32 cmask);
-
-/* dst = src1 * (1 - ALPHAsrc1) + src2 */
-
-#if defined ( __MEDIALIB_OLD_NAMES )
-#define __mlib_ImageBlend_OMDA_ONE mlib_ImageBlend_OMDA_ONE
-#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
-mlib_status  __mlib_ImageBlend_OMDA_ONE(mlib_image *dst,
-                                        const mlib_image *src1,
-                                        const mlib_image *src2,
-                                        mlib_s32 cmask);
-
-/* src1dst = src1dst * (1 - ALPHAsrc1dst) + src2 */
-
-#if defined ( __MEDIALIB_OLD_NAMES )
-#define __mlib_ImageBlend_OMDA_ONE_Inp mlib_ImageBlend_OMDA_ONE_Inp
-#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
-mlib_status  __mlib_ImageBlend_OMDA_ONE_Inp(mlib_image *src1dst,
-                                            const mlib_image *src2,
-                                            mlib_s32 cmask);
-
-/* dst = src1 * (1 - ALPHAsrc1 + src2) */
-
-#if defined ( __MEDIALIB_OLD_NAMES )
-#define __mlib_ImageBlend_OMDA_DC mlib_ImageBlend_OMDA_DC
-#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
-mlib_status  __mlib_ImageBlend_OMDA_DC(mlib_image *dst,
-                                       const mlib_image *src1,
-                                       const mlib_image *src2,
-                                       mlib_s32 cmask);
-
-/* src1dst = src1dst * (1 - ALPHAsrc1dst + src2) */
-
-#if defined ( __MEDIALIB_OLD_NAMES )
-#define __mlib_ImageBlend_OMDA_DC_Inp mlib_ImageBlend_OMDA_DC_Inp
-#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
-mlib_status  __mlib_ImageBlend_OMDA_DC_Inp(mlib_image *src1dst,
-                                           const mlib_image *src2,
-                                           mlib_s32 cmask);
-
-/* dst = src1 * (1 - ALPHAsrc1 - src2) + src2 */
-
-#if defined ( __MEDIALIB_OLD_NAMES )
-#define __mlib_ImageBlend_OMDA_OMDC mlib_ImageBlend_OMDA_OMDC
-#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
-mlib_status  __mlib_ImageBlend_OMDA_OMDC(mlib_image *dst,
-                                         const mlib_image *src1,
-                                         const mlib_image *src2,
-                                         mlib_s32 cmask);
-
-/* src1dst = src1dst * (1 - ALPHAsrc1dst - src2) + src2 */
-
-#if defined ( __MEDIALIB_OLD_NAMES )
-#define __mlib_ImageBlend_OMDA_OMDC_Inp mlib_ImageBlend_OMDA_OMDC_Inp
-#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
-mlib_status  __mlib_ImageBlend_OMDA_OMDC_Inp(mlib_image *src1dst,
-                                             const mlib_image *src2,
-                                             mlib_s32 cmask);
-
-/* dst = src1 * (1 - ALPHAsrc1) + src2 * ALPHAsrc2 */
-
-#if defined ( __MEDIALIB_OLD_NAMES )
-#define __mlib_ImageBlend_OMDA_SA mlib_ImageBlend_OMDA_SA
-#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
-mlib_status  __mlib_ImageBlend_OMDA_SA(mlib_image *dst,
-                                       const mlib_image *src1,
-                                       const mlib_image *src2,
-                                       mlib_s32 cmask);
-
-/* src1dst = src1dst * (1 - ALPHAsrc1dst) + src2 * ALPHAsrc2 */
-
-#if defined ( __MEDIALIB_OLD_NAMES )
-#define __mlib_ImageBlend_OMDA_SA_Inp mlib_ImageBlend_OMDA_SA_Inp
-#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
-mlib_status  __mlib_ImageBlend_OMDA_SA_Inp(mlib_image *src1dst,
-                                           const mlib_image *src2,
-                                           mlib_s32 cmask);
-
-/* dst = src1 * (1 - ALPHAsrc1) + src2 * (1 - ALPHAsrc2) */
-
-#if defined ( __MEDIALIB_OLD_NAMES )
-#define __mlib_ImageBlend_OMDA_OMSA mlib_ImageBlend_OMDA_OMSA
-#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
-mlib_status  __mlib_ImageBlend_OMDA_OMSA(mlib_image *dst,
-                                         const mlib_image *src1,
-                                         const mlib_image *src2,
-                                         mlib_s32 cmask);
-
-/* src1dst = src1dst * (1 - ALPHAsrc1dst) + src2 * (1 - ALPHAsrc2) */
-
-#if defined ( __MEDIALIB_OLD_NAMES )
-#define __mlib_ImageBlend_OMDA_OMSA_Inp mlib_ImageBlend_OMDA_OMSA_Inp
-#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
-mlib_status  __mlib_ImageBlend_OMDA_OMSA_Inp(mlib_image *src1dst,
-                                             const mlib_image *src2,
-                                             mlib_s32 cmask);
-
-/* dst = src1 + (src2 - src1) * ALPHAsrc1 */
-
-#if defined ( __MEDIALIB_OLD_NAMES )
-#define __mlib_ImageBlend_OMDA_DA mlib_ImageBlend_OMDA_DA
-#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
-mlib_status  __mlib_ImageBlend_OMDA_DA(mlib_image *dst,
-                                       const mlib_image *src1,
-                                       const mlib_image *src2,
-                                       mlib_s32 cmask);
-
-/* src1dst = src1dst + (src2 - src1dst) * ALPHAsrc1dst */
-
-#if defined ( __MEDIALIB_OLD_NAMES )
-#define __mlib_ImageBlend_OMDA_DA_Inp mlib_ImageBlend_OMDA_DA_Inp
-#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
-mlib_status  __mlib_ImageBlend_OMDA_DA_Inp(mlib_image *src1dst,
-                                           const mlib_image *src2,
-                                           mlib_s32 cmask);
-
-/* dst = (src1 + src2) * (1 - ALPHAsrc1) */
-
-#if defined ( __MEDIALIB_OLD_NAMES )
-#define __mlib_ImageBlend_OMDA_OMDA mlib_ImageBlend_OMDA_OMDA
-#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
-mlib_status  __mlib_ImageBlend_OMDA_OMDA(mlib_image *dst,
-                                         const mlib_image *src1,
-                                         const mlib_image *src2,
-                                         mlib_s32 cmask);
-
-/* src1dst = (src1dst + src2) * (1 - ALPHAsrc1dst) */
-
-#if defined ( __MEDIALIB_OLD_NAMES )
-#define __mlib_ImageBlend_OMDA_OMDA_Inp mlib_ImageBlend_OMDA_OMDA_Inp
-#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
-mlib_status  __mlib_ImageBlend_OMDA_OMDA_Inp(mlib_image *src1dst,
-                                             const mlib_image *src2,
-                                             mlib_s32 cmask);
-
-/* dst = src1 * (1 - ALPHAsrc1) + src2 * (f, f, f, 1) */
-
-#if defined ( __MEDIALIB_OLD_NAMES )
-#define __mlib_ImageBlend_OMDA_SAS mlib_ImageBlend_OMDA_SAS
-#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
-mlib_status  __mlib_ImageBlend_OMDA_SAS(mlib_image *dst,
-                                        const mlib_image *src1,
-                                        const mlib_image *src2,
-                                        mlib_s32 cmask);
-
-/* src1dst = src1dst * (1 - ALPHAsrc1dst) + src2 * (f, f, f, 1) */
-
-#if defined ( __MEDIALIB_OLD_NAMES )
-#define __mlib_ImageBlend_OMDA_SAS_Inp mlib_ImageBlend_OMDA_SAS_Inp
-#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
-mlib_status  __mlib_ImageBlend_OMDA_SAS_Inp(mlib_image *src1dst,
-                                            const mlib_image *src2,
-                                            mlib_s32 cmask);
-
-
-
-#if defined ( __MEDIALIB_OLD_NAMES )
-#define __mlib_ImageComposite mlib_ImageComposite
-#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
-mlib_status  __mlib_ImageComposite(mlib_image *dst,
-                                   const mlib_image *src1,
-                                   const mlib_image *src2,
-                                   mlib_blend bsrc1,
-                                   mlib_blend bsrc2,
-                                   mlib_s32 cmask);
-
-
-#if defined ( __MEDIALIB_OLD_NAMES )
-#define __mlib_ImageComposite_Inp mlib_ImageComposite_Inp
-#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
-mlib_status  __mlib_ImageComposite_Inp(mlib_image *src1dst,
-                                       const mlib_image *src2,
-                                       mlib_blend bsrc1,
-                                       mlib_blend bsrc2,
-                                       mlib_s32 cmask);
-
-#ifdef __cplusplus
-}
-#endif /* __cplusplus */
-#endif /* __ORIG_MLIB_IMAGE_BLEND_PROTO_H */
--- a/jdk/src/java.desktop/share/native/libmlib_image/mlib_image_proto.h	Thu May 12 11:03:07 2016 -0700
+++ b/jdk/src/java.desktop/share/native/libmlib_image/mlib_image_proto.h	Fri May 13 11:31:05 2016 +0300
@@ -1430,17 +1430,6 @@
 
 
 #if defined ( __MEDIALIB_OLD_NAMES )
-#define __mlib_ImageAffineIndex mlib_ImageAffineIndex
-#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
-mlib_status  __mlib_ImageAffineIndex(mlib_image *dst,
-                                     const mlib_image *src,
-                                     const mlib_d64 *mtx,
-                                     mlib_filter filter,
-                                     mlib_edge edge,
-                                     const void *colormap);
-
-
-#if defined ( __MEDIALIB_OLD_NAMES )
 #define __mlib_ImageAffineTable mlib_ImageAffineTable
 #endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
 mlib_status  __mlib_ImageAffineTable(mlib_image *dst,
@@ -1471,17 +1460,6 @@
 
 
 #if defined ( __MEDIALIB_OLD_NAMES )
-#define __mlib_ImageAffineTransformIndex mlib_ImageAffineTransformIndex
-#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
-mlib_status  __mlib_ImageAffineTransformIndex(mlib_image *dst,
-                                              const mlib_image *src,
-                                              const mlib_d64 *mtx,
-                                              mlib_filter filter,
-                                              mlib_edge edge,
-                                              const void *colormap);
-
-
-#if defined ( __MEDIALIB_OLD_NAMES )
 #define __mlib_ImageAffineTransform_Fp mlib_ImageAffineTransform_Fp
 #endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
 mlib_status  __mlib_ImageAffineTransform_Fp(mlib_image *dst,
@@ -1825,19 +1803,6 @@
 
 
 #if defined ( __MEDIALIB_OLD_NAMES )
-#define __mlib_ImageRotateIndex mlib_ImageRotateIndex
-#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
-mlib_status  __mlib_ImageRotateIndex(mlib_image *dst,
-                                     const mlib_image *src,
-                                     mlib_d64 angle,
-                                     mlib_d64 xcenter,
-                                     mlib_d64 ycenter,
-                                     mlib_filter filter,
-                                     mlib_edge edge,
-                                     const void *colormap);
-
-
-#if defined ( __MEDIALIB_OLD_NAMES )
 #define __mlib_ImageRotate_Fp mlib_ImageRotate_Fp
 #endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
 mlib_status  __mlib_ImageRotate_Fp(mlib_image *dst,
@@ -1887,16 +1852,6 @@
 
 
 #if defined ( __MEDIALIB_OLD_NAMES )
-#define __mlib_ImageZoomIn2XIndex mlib_ImageZoomIn2XIndex
-#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
-mlib_status  __mlib_ImageZoomIn2XIndex(mlib_image *dst,
-                                       const mlib_image *src,
-                                       mlib_filter filter,
-                                       mlib_edge edge,
-                                       const void *colormap);
-
-
-#if defined ( __MEDIALIB_OLD_NAMES )
 #define __mlib_ImageZoomIn2X_Fp mlib_ImageZoomIn2X_Fp
 #endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
 mlib_status  __mlib_ImageZoomIn2X_Fp(mlib_image *dst,
@@ -1915,16 +1870,6 @@
 
 
 #if defined ( __MEDIALIB_OLD_NAMES )
-#define __mlib_ImageZoomOut2XIndex mlib_ImageZoomOut2XIndex
-#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
-mlib_status  __mlib_ImageZoomOut2XIndex(mlib_image *dst,
-                                        const mlib_image *src,
-                                        mlib_filter filter,
-                                        mlib_edge edge,
-                                        const void *colormap);
-
-
-#if defined ( __MEDIALIB_OLD_NAMES )
 #define __mlib_ImageZoomOut2X_Fp mlib_ImageZoomOut2X_Fp
 #endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
 mlib_status  __mlib_ImageZoomOut2X_Fp(mlib_image *dst,
@@ -2012,18 +1957,6 @@
 
 
 #if defined ( __MEDIALIB_OLD_NAMES )
-#define __mlib_ImageZoomIndex mlib_ImageZoomIndex
-#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
-mlib_status  __mlib_ImageZoomIndex(mlib_image *dst,
-                                   const mlib_image *src,
-                                   mlib_d64 zoomx,
-                                   mlib_d64 zoomy,
-                                   mlib_filter filter,
-                                   mlib_edge edge,
-                                   const void *colormap);
-
-
-#if defined ( __MEDIALIB_OLD_NAMES )
 #define __mlib_ImageZoom_Fp mlib_ImageZoom_Fp
 #endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
 mlib_status  __mlib_ImageZoom_Fp(mlib_image *dst,
@@ -2312,98 +2245,6 @@
 
 
 #if defined ( __MEDIALIB_OLD_NAMES )
-#define __mlib_ImageColorTrue2IndexInit mlib_ImageColorTrue2IndexInit
-#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
-mlib_status  __mlib_ImageColorTrue2IndexInit(void **colormap,
-                                             mlib_s32 bits,
-                                             mlib_type intype,
-                                             mlib_type outtype,
-                                             mlib_s32 channels,
-                                             mlib_s32 entries,
-                                             mlib_s32 offset,
-                                             const void **table);
-
-
-#if defined ( __MEDIALIB_OLD_NAMES )
-#define __mlib_ImageColorTrue2Index mlib_ImageColorTrue2Index
-#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
-mlib_status  __mlib_ImageColorTrue2Index(mlib_image *dst,
-                                         const mlib_image *src,
-                                         const void *colormap);
-
-
-#if defined ( __MEDIALIB_OLD_NAMES )
-#define __mlib_ImageColorTrue2IndexFree mlib_ImageColorTrue2IndexFree
-#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
-void  __mlib_ImageColorTrue2IndexFree(void *colormap);
-
-
-#if defined ( __MEDIALIB_OLD_NAMES )
-#define __mlib_ImageColorDitherInit mlib_ImageColorDitherInit
-#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
-mlib_status  __mlib_ImageColorDitherInit(void **colormap,
-                                         const mlib_s32 *dimensions,
-                                         mlib_type intype,
-                                         mlib_type outtype,
-                                         mlib_s32 channels,
-                                         mlib_s32 entries,
-                                         mlib_s32 offset,
-                                         void **lut);
-
-
-#if defined ( __MEDIALIB_OLD_NAMES )
-#define __mlib_ImageColorDitherFree mlib_ImageColorDitherFree
-#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
-void  __mlib_ImageColorDitherFree(void *colormap);
-
-
-#if defined ( __MEDIALIB_OLD_NAMES )
-#define __mlib_ImageColorErrorDiffusion3x3 mlib_ImageColorErrorDiffusion3x3
-#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
-mlib_status  __mlib_ImageColorErrorDiffusion3x3(mlib_image *dst,
-                                                const mlib_image *src,
-                                                const mlib_s32 *kernel,
-                                                mlib_s32 scale,
-                                                const void *colormap);
-
-
-#if defined ( __MEDIALIB_OLD_NAMES )
-#define __mlib_ImageColorErrorDiffusionMxN mlib_ImageColorErrorDiffusionMxN
-#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
-mlib_status  __mlib_ImageColorErrorDiffusionMxN(mlib_image *dst,
-                                                const mlib_image *src,
-                                                const mlib_s32 *kernel,
-                                                mlib_s32 m,
-                                                mlib_s32 n,
-                                                mlib_s32 dm,
-                                                mlib_s32 dn,
-                                                mlib_s32 scale,
-                                                const void *colormap);
-
-
-#if defined ( __MEDIALIB_OLD_NAMES )
-#define __mlib_ImageColorOrderedDither8x8 mlib_ImageColorOrderedDither8x8
-#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
-mlib_status  __mlib_ImageColorOrderedDither8x8(mlib_image *dst,
-                                               const mlib_image *src,
-                                               const mlib_s32 *dmask,
-                                               mlib_s32 scale,
-                                               const void *colormap);
-
-
-#if defined ( __MEDIALIB_OLD_NAMES )
-#define __mlib_ImageColorOrderedDitherMxN mlib_ImageColorOrderedDitherMxN
-#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
-mlib_status  __mlib_ImageColorOrderedDitherMxN(mlib_image *dst,
-                                               const mlib_image *src,
-                                               const mlib_s32 **dmask,
-                                               mlib_s32 m,
-                                               mlib_s32 n,
-                                               mlib_s32 scale,
-                                               const void *colormap);
-
-
-#if defined ( __MEDIALIB_OLD_NAMES )
 #define __mlib_ImageColorReplace mlib_ImageColorReplace
 #endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
 mlib_status  __mlib_ImageColorReplace(mlib_image *dst,
@@ -2694,17 +2535,6 @@
 
 
 #if defined ( __MEDIALIB_OLD_NAMES )
-#define __mlib_ImageConv2x2Index mlib_ImageConv2x2Index
-#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
-mlib_status  __mlib_ImageConv2x2Index(mlib_image *dst,
-                                      const mlib_image *src,
-                                      const mlib_s32 *kernel,
-                                      mlib_s32 scale,
-                                      mlib_edge edge,
-                                      const void *colormap);
-
-
-#if defined ( __MEDIALIB_OLD_NAMES )
 #define __mlib_ImageConv2x2_Fp mlib_ImageConv2x2_Fp
 #endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
 mlib_status  __mlib_ImageConv2x2_Fp(mlib_image *dst,
@@ -2726,17 +2556,6 @@
 
 
 #if defined ( __MEDIALIB_OLD_NAMES )
-#define __mlib_ImageConv3x3Index mlib_ImageConv3x3Index
-#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
-mlib_status  __mlib_ImageConv3x3Index(mlib_image *dst,
-                                      const mlib_image *src,
-                                      const mlib_s32 *kernel,
-                                      mlib_s32 scale,
-                                      mlib_edge edge,
-                                      const void *colormap);
-
-
-#if defined ( __MEDIALIB_OLD_NAMES )
 #define __mlib_ImageConv3x3_Fp mlib_ImageConv3x3_Fp
 #endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
 mlib_status  __mlib_ImageConv3x3_Fp(mlib_image *dst,
@@ -2758,17 +2577,6 @@
 
 
 #if defined ( __MEDIALIB_OLD_NAMES )
-#define __mlib_ImageConv4x4Index mlib_ImageConv4x4Index
-#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
-mlib_status  __mlib_ImageConv4x4Index(mlib_image *dst,
-                                      const mlib_image *src,
-                                      const mlib_s32 *kernel,
-                                      mlib_s32 scale,
-                                      mlib_edge edge,
-                                      const void *colormap);
-
-
-#if defined ( __MEDIALIB_OLD_NAMES )
 #define __mlib_ImageConv4x4_Fp mlib_ImageConv4x4_Fp
 #endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
 mlib_status  __mlib_ImageConv4x4_Fp(mlib_image *dst,
@@ -2790,17 +2598,6 @@
 
 
 #if defined ( __MEDIALIB_OLD_NAMES )
-#define __mlib_ImageConv5x5Index mlib_ImageConv5x5Index
-#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
-mlib_status  __mlib_ImageConv5x5Index(mlib_image *dst,
-                                      const mlib_image *src,
-                                      const mlib_s32 *kernel,
-                                      mlib_s32 scale,
-                                      mlib_edge edge,
-                                      const void *colormap);
-
-
-#if defined ( __MEDIALIB_OLD_NAMES )
 #define __mlib_ImageConv5x5_Fp mlib_ImageConv5x5_Fp
 #endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
 mlib_status  __mlib_ImageConv5x5_Fp(mlib_image *dst,
@@ -2822,17 +2619,6 @@
 
 
 #if defined ( __MEDIALIB_OLD_NAMES )
-#define __mlib_ImageConv7x7Index mlib_ImageConv7x7Index
-#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
-mlib_status  __mlib_ImageConv7x7Index(mlib_image *dst,
-                                      const mlib_image *src,
-                                      const mlib_s32 *kernel,
-                                      mlib_s32 scale,
-                                      mlib_edge edge,
-                                      const void *colormap);
-
-
-#if defined ( __MEDIALIB_OLD_NAMES )
 #define __mlib_ImageConv7x7_Fp mlib_ImageConv7x7_Fp
 #endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
 mlib_status  __mlib_ImageConv7x7_Fp(mlib_image *dst,
@@ -2871,21 +2657,6 @@
 
 
 #if defined ( __MEDIALIB_OLD_NAMES )
-#define __mlib_ImageConvMxNIndex mlib_ImageConvMxNIndex
-#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
-mlib_status  __mlib_ImageConvMxNIndex(mlib_image *dst,
-                                      const mlib_image *src,
-                                      const mlib_s32 *kernel,
-                                      mlib_s32 m,
-                                      mlib_s32 n,
-                                      mlib_s32 dm,
-                                      mlib_s32 dn,
-                                      mlib_s32 scale,
-                                      mlib_edge edge,
-                                      const void *colormap);
-
-
-#if defined ( __MEDIALIB_OLD_NAMES )
 #define __mlib_ImageConvMxN_Fp mlib_ImageConvMxN_Fp
 #endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */
 mlib_status  __mlib_ImageConvMxN_Fp(mlib_image *dst,
--- a/jdk/src/java.desktop/unix/native/libawt/awt/medialib/mlib_v_ImageCopy.c	Thu May 12 11:03:07 2016 -0700
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,193 +0,0 @@
-/*
- * Copyright (c) 2000, 2003, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.  Oracle designates this
- * particular file as subject to the "Classpath" exception as provided
- * by Oracle in the LICENSE file that accompanied this code.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- */
-
-
-
-/*
- * FUNCTION
- *      mlib_ImageCopy   - Direct copy from one image to another.
- *
- * SYNOPSIS
- *      mlib_status mlib_ImageCopy(mlib_image *dst,
- *                                 mlib_image *src);
- *
- * ARGUMENT
- *      dst     pointer to output or destination image
- *      src     pointer to input or source image
- *
- * RESTRICTION
- *      src and dst must have the same size, type and number of channels.
- *      They can have 1, 2, 3 or 4 channels of MLIB_BIT, MLIB_BYTE,
- *      MLIB_SHORT, MLIB_INT, MLIB_FLOAT or MLIB_DOUBLE data type.
- *
- * DESCRIPTION
- *      Direct copy from one image to another.
- */
-
-#include <stdlib.h>
-#include "mlib_image.h"
-#include "mlib_ImageCheck.h"
-
-/***************************************************************/
-
-extern void mlib_v_ImageCopy_blk(mlib_u8 *sa, mlib_u8 *da, mlib_s32 size);
-extern void mlib_v_ImageCopy_a1(mlib_d64 *sp, mlib_d64 *dp, mlib_s32 size);
-extern void mlib_ImageCopy_na(mlib_u8 *sa, mlib_u8 *da, mlib_s32 size);
-extern void mlib_ImageCopy_bit_al(mlib_u8 *sa, mlib_u8 *da,
-                                  mlib_s32 size, mlib_s32 offset);
-extern void mlib_ImageCopy_bit_na(mlib_u8 *sa, mlib_u8 *da, mlib_s32 size,
-                                  mlib_s32 s_offset, mlib_s32 d_offset);
-
-/***************************************************************/
-
-#ifdef MLIB_TEST
-
-mlib_status mlib_v_ImageCopy(mlib_image *dst, mlib_image *src)
-
-#else
-
-mlib_status mlib_ImageCopy(mlib_image *dst, const mlib_image *src)
-
-#endif
-{
-  mlib_u8  *sa;         /* start point in source */
-  mlib_u8  *da;         /* start points in destination */
-  mlib_s32 width;       /* width in bytes of src and dst */
-  mlib_s32 height;      /* height in lines of src and dst */
-  mlib_s32 s_offset;    /* bit offset of src */
-  mlib_s32 d_offset;    /* bit offset of dst */
-  mlib_s32 stride;      /* stride in bytes in src*/
-  mlib_s32 dstride;     /* stride in bytes in dst */
-  mlib_s32 j;           /* indices for x, y */
-  mlib_s32 size;
-
-  MLIB_IMAGE_CHECK(src);
-  MLIB_IMAGE_CHECK(dst);
-  MLIB_IMAGE_TYPE_EQUAL(src, dst);
-  MLIB_IMAGE_CHAN_EQUAL(src, dst);
-  MLIB_IMAGE_SIZE_EQUAL(src, dst);
-
-  width  = mlib_ImageGetWidth(dst) * mlib_ImageGetChannels(dst);
-  height = mlib_ImageGetHeight(dst);
-  sa = (mlib_u8 *)mlib_ImageGetData(src);
-  da = (mlib_u8 *)mlib_ImageGetData(dst);
-
-  switch (mlib_ImageGetType(dst)) {
-    case MLIB_BIT:
-
-      if (!mlib_ImageIsNotOneDvector(src) &&
-          !mlib_ImageIsNotOneDvector(dst)) {
-          size = height * (width  >> 3);
-          if ((size & 0x3f) == 0 &&
-              !mlib_ImageIsNotAligned64(src) &&
-              !mlib_ImageIsNotAligned64(dst)) {
-
-              mlib_v_ImageCopy_blk(sa, da, size);
-              return MLIB_SUCCESS;
-          }
-          if (((size & 7) == 0) && !mlib_ImageIsNotAligned8(src) &&
-              !mlib_ImageIsNotAligned8(dst)) {
-
-              size >>= 3;                                /* in octlet */
-              mlib_v_ImageCopy_a1((mlib_d64 *)sa, (mlib_d64 *)da, size);
-          }
-          else {
-
-            mlib_ImageCopy_na(sa, da, size);
-          }
-        }
-      else {
-        stride = mlib_ImageGetStride(src);                /* in byte */
-        dstride = mlib_ImageGetStride(dst);               /* in byte */
-        s_offset = mlib_ImageGetBitOffset(src);           /* in bits */
-        d_offset = mlib_ImageGetBitOffset(dst);           /* in bits */
-
-        if (s_offset == d_offset) {
-          for (j = 0; j < height; j++) {
-            mlib_ImageCopy_bit_al(sa, da, width, s_offset);
-            sa += stride;
-            da += dstride;
-          }
-        } else {
-          for (j = 0; j < height; j++) {
-            mlib_ImageCopy_bit_na(sa, da, width, s_offset, d_offset);
-            sa += stride;
-            da += dstride;
-          }
-        }
-      }
-      return MLIB_SUCCESS;
-    case MLIB_BYTE:
-      break;
-    case MLIB_SHORT:
-      width *= 2;
-      break;
-    case MLIB_INT:
-    case MLIB_FLOAT:
-      width *= 4;
-      break;
-    case MLIB_DOUBLE:
-      width *= 8;
-      break;
-    default:
-      return MLIB_FAILURE;
-  }
-
-  if (!mlib_ImageIsNotOneDvector(src) &&
-      !mlib_ImageIsNotOneDvector(dst)) {
-      size = height * width;
-      if ((size & 0x3f) == 0 &&
-          !mlib_ImageIsNotAligned64(src) &&
-          !mlib_ImageIsNotAligned64(dst)) {
-
-          mlib_v_ImageCopy_blk(sa, da, size);
-          return MLIB_SUCCESS;
-      }
-      if (((size & 7) == 0) && !mlib_ImageIsNotAligned8(src) &&
-          !mlib_ImageIsNotAligned8(dst)) {
-
-          size >>= 3;                                /* in octlet */
-          mlib_v_ImageCopy_a1((mlib_d64 *)sa, (mlib_d64 *)da, size);
-      }
-      else {
-
-        mlib_ImageCopy_na(sa, da, size);
-      }
-    }
-  else {
-    stride = mlib_ImageGetStride(src);                /* in byte */
-    dstride = mlib_ImageGetStride(dst);                /* in byte */
-
-    /* row loop */
-    for (j = 0; j < height; j++) {
-      mlib_ImageCopy_na(sa, da, width);
-      sa += stride;
-      da += dstride;
-    }
-  }
-  return MLIB_SUCCESS;
-}
-
-/***************************************************************/
--- a/jdk/src/java.desktop/unix/native/libawt/awt/medialib/mlib_v_ImageCopy_blk.s	Thu May 12 11:03:07 2016 -0700
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,262 +0,0 @@
-!
-!
-! 
-! Copyright 2000 Sun Microsystems, Inc.  All Rights Reserved.
-! DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-!
-! This code is free software; you can redistribute it and/or modify it
-! under the terms of the GNU General Public License version 2 only, as
-! published by the Free Software Foundation.  Oracle designates this
-! particular file as subject to the "Classpath" exception as provided
-! by Oracle in the LICENSE file that accompanied this code.
-!
-! This code is distributed in the hope that it will be useful, but WITHOUT
-! ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-! FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-! version 2 for more details (a copy is included in the LICENSE file that
-! accompanied this code).
-!
-! You should have received a copy of the GNU General Public License version
-! 2 along with this work; if not, write to the Free Software Foundation,
-! Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-!
-! Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-! or visit www.oracle.com if you need additional information or have any
-! questions.
-!
-
-
-! FUNCTION
-!      mlib_v_ImageCopy_blk   - Copy an image into another 
-!				(with Block Load/Store)
-!
-! SYNOPSIS
-!      void mlib_v_ImageCopy_blk(void *src,
-!                                void *dst, 
-!                                int size);
-!
-! ARGUMENT
-!      src     source image data
-!      dst     destination image data
-!      size    image size in bytes
-!
-! NOTES
-!      src and dst must point to 64-byte aligned addresses
-!      size must be multiple of 64
-!
-! DESCRIPTION
-!      dst = src
-!
-
-#include "vis_asi.h"
-
-! Minimum size of stack frame according to SPARC ABI
-#define MINFRAME        96
-
-! ENTRY provides the standard procedure entry code
-#define ENTRY(x) \
-	.align  4; \
-	.global x; \
-x:
-
-! SET_SIZE trails a function and sets the size for the ELF symbol table
-#define SET_SIZE(x) \
-	.size   x, (.-x)
-
-! SPARC have four integer register groups. i-registers %i0 to %i7
-! hold input data. o-registers %o0 to %o7 hold output data. l-registers
-! %l0 to %l7 hold local data. g-registers %g0 to %g7 hold global data.
-! Note that %g0 is alway zero, write to it has no program-visible effect.
-
-! When calling an assembly function, the first 6 arguments are stored
-! in i-registers from %i0 to %i5. The rest arguments are stored in stack.
-! Note that %i6 is reserved for stack pointer and %i7 for return address.
-
-! Only the first 32 f-registers can be used as 32-bit registers.
-! The last 32 f-registers can only be used as 16 64-bit registers.
-
-#define src     %i0
-#define dst     %i1
-#define sz      %i2
-
-!frame pointer  %i6
-!return addr    %i7
-
-!stack pointer  %o6
-!call link      %o7
-
-#define sa      %l0
-#define da      %l1
-#define se      %l2
-#define ns      %l3
-
-#define O0      %f16
-#define O1      %f18
-#define O2      %f20
-#define O3      %f22
-#define O4      %f24
-#define O5      %f26
-#define O6      %f28
-#define O7      %f30
-
-#define A0      %f32
-#define A1      %f34
-#define A2      %f36
-#define A3      %f38
-#define A4      %f40
-#define A5      %f42
-#define A6      %f44
-#define A7      %f46
-
-#define B0      %f48
-#define B1      %f50
-#define B2      %f52
-#define B3      %f54
-#define B4      %f56
-#define B5      %f58
-#define B6      %f60
-#define B7      %f62
-
-#define USE_BLD
-#define USE_BST
-
-#define MEMBAR_BEFORE_BLD        membar  #StoreLoad
-#define MEMBAR_AFTER_BLD         membar  #StoreLoad
-
-#ifdef USE_BLD
-#define BLD_A0                                  \
-        ldda    [sa]ASI_BLK_P,A0;               \
-        cmp     sa,se;                          \
-        blu,pt  %icc,1f;                        \
-        inc     64,sa;                          \
-        dec     64,sa;                          \
-1:
-#else
-#define BLD_A0                                  \
-        ldd     [sa +  0],A0;                   \
-        ldd     [sa +  8],A1;                   \
-        ldd     [sa + 16],A2;                   \
-        ldd     [sa + 24],A3;                   \
-        ldd     [sa + 32],A4;                   \
-        ldd     [sa + 40],A5;                   \
-        ldd     [sa + 48],A6;                   \
-        ldd     [sa + 56],A7;                   \
-        cmp     sa,se;                          \
-        blu,pt  %icc,1f;                        \
-        inc     64,sa;                          \
-        dec     64,sa;                          \
-1:
-#endif
-
-#ifdef USE_BLD
-#define BLD_B0                                  \
-        ldda    [sa]ASI_BLK_P,B0;               \
-        cmp     sa,se;                          \
-        blu,pt  %icc,1f;                        \
-        inc     64,sa;                          \
-        dec     64,sa;                          \
-1:
-#else
-#define BLD_B0                                  \
-        ldd     [sa +  0],B0;                   \
-        ldd     [sa +  8],B1;                   \
-        ldd     [sa + 16],B2;                   \
-        ldd     [sa + 24],B3;                   \
-        ldd     [sa + 32],B4;                   \
-        ldd     [sa + 40],B5;                   \
-        ldd     [sa + 48],B6;                   \
-        ldd     [sa + 56],B7;                   \
-        cmp     sa,se;                          \
-        blu,pt  %icc,1f;                        \
-        inc     64,sa;                          \
-        dec     64,sa;                          \
-1:
-#endif
-
-#ifdef USE_BST
-#define BST                                     \
-        stda    O0,[da]ASI_BLK_P;               \
-        inc     64,da;                          \
-        deccc   ns;                             \
-        ble,pn  %icc,mlib_v_ImageCopy_end;	\
-        nop
-#else
-#define BST                                     \
-        std     O0,[da +  0];                   \
-        std     O1,[da +  8];                   \
-        std     O2,[da + 16];                   \
-        std     O3,[da + 24];                   \
-        std     O4,[da + 32];                   \
-        std     O5,[da + 40];                   \
-        std     O6,[da + 48];                   \
-        std     O7,[da + 56];                   \
-        inc     64,da;                          \
-        deccc   ns;                             \
-        ble,pn  %icc,mlib_v_ImageCopy_end;	\
-        nop
-#endif
-
-#define COPY_A0					\
-        fmovd A0, O0;                           \
-        fmovd A1, O1;                           \
-        fmovd A2, O2;                           \
-        fmovd A3, O3;                           \
-        fmovd A4, O4;                           \
-        fmovd A5, O5;                           \
-        fmovd A6, O6;                           \
-        fmovd A7, O7;
-
-#define COPY_B0					\
-        fmovd B0, O0;                           \
-        fmovd B1, O1;                           \
-        fmovd B2, O2;                           \
-        fmovd B3, O3;                           \
-        fmovd B4, O4;                           \
-        fmovd B5, O5;                           \
-        fmovd B6, O6;                           \
-        fmovd B7, O7;
-
-        .section        ".text",#alloc,#execinstr
-
-        ENTRY(mlib_v_ImageCopy_blk)	! function name
-
-        save    %sp,-MINFRAME,%sp	! reserve space for stack
-                                        ! and adjust register window
-! do some error checking
-        tst     sz                      ! size > 0
-        ble,pn  %icc,mlib_v_ImageCopy_ret
-
-! calculate loop count
-        sra     sz,6,ns                 ! 64 bytes per loop
-
-        add     src,sz,se               ! end address of source
-        mov     src,sa
-        mov     dst,da
-                                        ! issue memory barrier instruction
-        MEMBAR_BEFORE_BLD               ! to ensure all previous memory load
-                                        ! and store has completed
-
-        BLD_A0
-        BLD_B0                          ! issue the 2nd block load instruction
-                                        ! to synchronize with returning data
-mlib_v_ImageCopy_bgn:
-
-        COPY_A0				! process data returned by BLD_A0
-        BLD_A0                          ! block load and sync data from BLD_B0
-        BST                             ! block store data from BLD_A0
-
-        COPY_B0				! process data returned by BLD_B0
-        BLD_B0                          ! block load and sync data from BLD_A0
-        BST                             ! block store data from BLD_B0
-
-        bg,pt   %icc,mlib_v_ImageCopy_bgn
-
-mlib_v_ImageCopy_end:
-                                        ! issue memory barrier instruction
-        MEMBAR_AFTER_BLD                ! to ensure all previous memory load
-                                        ! and store has completed.
-mlib_v_ImageCopy_ret:
-        ret                             ! return
-        restore                         ! restore register window
-
-        SET_SIZE(mlib_v_ImageCopy_blk)
--- a/jdk/src/java.desktop/unix/native/libawt/awt/medialib/vis_asi.h	Thu May 12 11:03:07 2016 -0700
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,210 +0,0 @@
-/*
- * Copyright (c) 1998, 2003, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.  Oracle designates this
- * particular file as subject to the "Classpath" exception as provided
- * by Oracle in the LICENSE file that accompanied this code.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- */
-
-
-#ifndef VIS_ASI_H
-#define VIS_ASI_H
-
-/* evolved from asm_asi.h in VSDK 1.0 */
-
-#ifdef  __cplusplus
-extern "C" {
-#endif
-
-/* ASI definitions for VIS */
-
-#define         ASI_N                                   0x04
-#define         ASI_NL                                  0x0C
-#define         ASI_AIUP                                0x10
-#define         ASI_AIUS                                0x11
-#define         ASI_AIUPL                               0x18
-#define         ASI_AIUSL                               0x19
-#define         ASI_PHYS_USE_EC_L                       0x1C
-#define         ASI_PHYS_BYPASS_EC_WITH_EBIT_L          0x1D
-#define         ASI_DC_DATA                             0x46
-#define         ASI_DC_TAG                              0x47
-#define         ASI_UPA_CONTROL                         0x4A
-#define         ASI_MONDO_SEND_CTRL                     0x48
-#define         ASI_MONDO_RECEIVE_CTRL                  0x49
-#define         ASI_AFSR                                0x4C
-#define         ASI_AFAR                                0x4D
-#define         ASI_EC_TAG_DATA                         0x4E
-#define         ASI_ICACHE_DATA                         0x66
-#define         ASI_IC_INSTR                            0x66
-#define         ASI_IC_TAG                              0x67
-#define         ASI_IC_PRE_DECODE                       0x6E
-#define         ASI_IC_NEXT_FIELD                       0x6F
-#define         ASI_BLK_AIUP                            0x70
-#define         ASI_BLK_AIUS                            0x71
-#define         ASI_EC                                  0x76
-#define         ASI_BLK_AIUPL                           0x78
-#define         ASI_BLK_AIUSL                           0x79
-#define         ASI_P                                   0x80
-#define         ASI_S                                   0x81
-#define         ASI_PNF                                 0x82
-#define         ASI_SNF                                 0x83
-#define         ASI_PL                                  0x88
-#define         ASI_SL                                  0x89
-#define         ASI_PNFL                                0x8A
-#define         ASI_SNFL                                0x8B
-#define         ASI_PST8_P                              0xC0
-#define         ASI_PST8_S                              0xC1
-#define         ASI_PST16_P                             0xC2
-#define         ASI_PST16_S                             0xC3
-#define         ASI_PST32_P                             0xC4
-#define         ASI_PST32_S                             0xC5
-#define         ASI_PST8_PL                             0xC8
-#define         ASI_PST8_SL                             0xC9
-#define         ASI_PST16_PL                            0xCA
-#define         ASI_PST16_SL                            0xCB
-#define         ASI_PST32_PL                            0xCC
-#define         ASI_PST32_SL                            0xCD
-#define         ASI_FL8_P                               0xD0
-#define         ASI_FL8_S                               0xD1
-#define         ASI_FL16_P                              0xD2
-#define         ASI_FL16_S                              0xD3
-#define         ASI_FL8_PL                              0xD8
-#define         ASI_FL8_SL                              0xD9
-#define         ASI_FL16_PL                             0xDA
-#define         ASI_FL16_SL                             0xDB
-#define         ASI_COMMIT_P                            0xE0
-#define         ASI_COMMIT_S                            0xE1
-#define         ASI_BLK_P                               0xF0
-#define         ASI_BLK_S                               0xF1
-#define         ASI_BLK_PL                              0xF8
-#define         ASI_BLK_SL                              0xF9
-
-#define         ASI_NUCLEUS                             0x04
-#define         ASI_NUCLEUS_LITTLE                      0x0C
-#define         ASI_AS_IF_USER_PRIMARY                  0x10
-#define         ASI_AS_IF_USER_SECONDARY                0x11
-#define         ASI_PHYS_USE_EC                         0x14
-#define         ASI_PHYS_BYPASS_EC_WITH_EBIT            0x15
-#define         ASI_AS_IF_USER_PRIMARY_LITTLE           0x18
-#define         ASI_AS_IF_USER_SECONDARY_LITTLE         0x19
-#define         ASI_PHYS_USE_EC_LITTLE                  0x1C
-#define         ASI_PHYS_BYPASS_EC_WITH_EBIT_LITTLE     0x1D
-#define         ASI_LSU_CONTROL_REG                     0x45
-#define         ASI_DCACHE_DATA                         0x46
-#define         ASI_DCACHE_TAG                          0x47
-#define         ASI_INTR_DISPATCH_STATUS                0x48
-#define         ASI_INTR_RECEIVE                        0x49
-#define         ASI_UPA_CONFIG_REG                      0x4A
-#define         ASI_ESTATE_ERROR_EN_REG                 0x4B
-#define         ASI_ASYNC_FAULT_STATUS                  0x4C
-#define         ASI_ASYNC_FAULT_ADDR                    0x4D
-#define         ASI_ECACHE_TAG_DATA                     0x4E
-#define         ASI_OBSERVABILITY_REG                   0x4F
-#define         ASI_IMMU                                0x50
-#define         ASI_IMU_TSB_BASE                        0x50
-#define         ASI_IMU_TAG_ACCESS                      0x50
-#define         ASI_IMU_SFSR                            0x50
-#define         ASI_IMU_TAG_TARGET                      0x50
-#define         ASI_IMU_TSB_POINTER_8K                  0x51
-#define         ASI_IMU_TSB_POINTER_64K                 0x52
-#define         ASI_IMU_DATAIN                          0x54
-#define         ASI_IMMU_DATA_IN                        0x54
-#define         ASI_IMU_DATA_ACCESS                     0x55
-#define         ASI_IMU_TAG_READ                        0x56
-#define         ASI_IMU_DEMAP                           0x57
-#define         ASI_DMMU                                0x58
-#define         ASI_PRIMARY_CONTEXT                     0x58
-#define         ASI_SECONDARY_CONTEXT                   0x58
-#define         ASI_DMU_TSB_BASE                        0x58
-#define         ASI_DMU_TAG_ACCESS                      0x58
-#define         ASI_DMU_TAG_TARGET                      0x58
-#define         ASI_DMU_SFSR                            0x58
-#define         ASI_DMU_SFAR                            0x58
-#define         ASI_DMU_VA_WATCHPOINT                   0x58
-#define         ASI_DMU_PA_WATCHPOINT                   0x58
-#define         ASI_DMU_TSB_POINTER_8K                  0x59
-#define         ASI_DMU_TSB_POINTER_64K                 0x5A
-#define         ASI_DMU_TSB_POINTER_DIRECT              0x5B
-#define         ASI_DMU_DATAIN                          0x5C
-#define         ASI_DMMU_DATA_IN                        0x5C
-#define         ASI_DMU_DATA_ACCESS                     0x5D
-#define         ASI_DMU_TAG_READ                        0x5E
-#define         ASI_DMU_DEMAP                           0x5F
-#define         ASI_ICACHE_INSTR                        0x66
-#define         ASI_ICACHE_TAG                          0x67
-#define         ASI_ICACHE_PRE_DECODE                   0x6E
-#define         ASI_ICACHE_NEXT_FIELD                   0x6F
-#define         ASI_BLOCK_AS_IF_USER_PRIMARY            0x70
-#define         ASI_BLOCK_AS_IF_USER_SECONDARY          0x71
-#define         ASI_EXT                                 0x76
-#define         ASI_ECACHE                              0x76
-#define         ASI_ECACHE_DATA                         0x76
-#define         ASI_ECACHE_TAG                          0x76
-#define         ASI_SDB_INTR                            0x77
-#define         ASI_SDBH_ERR_REG                        0x77
-#define         ASI_SDBL_ERR_REG                        0x77
-#define         ASI_SDBH_CONTROL_REG                    0x77
-#define         ASI_SDBL_CONTROL_REG                    0x77
-#define         ASI_INTR_DISPATCH                       0x77
-#define         ASI_INTR_DATA0                          0x77
-#define         ASI_INTR_DATA1                          0x77
-#define         ASI_INTR_DATA2                          0x77
-#define         ASI_BLOCK_AS_IF_USER_PRIMARY_LITTLE     0x78
-#define         ASI_BLOCK_AS_IF_USER_SECONDARY_LITTLE   0x79
-#define         ASI_PRIMARY                             0x80
-#define         ASI_SECONDARY                           0x81
-#define         ASI_PRIMARY_NO_FAULT                    0x82
-#define         ASI_SECONDARY_NO_FAULT                  0x83
-#define         ASI_PRIMARY_LITTLE                      0x88
-#define         ASI_SECONDARY_LITTLE                    0x89
-#define         ASI_PRIMARY_NO_FAULT_LITTLE             0x8A
-#define         ASI_SECONDARY_NO_FAULT_LITTLE           0x8B
-#define         ASI_PST8_PRIMARY                        0xC0
-#define         ASI_PST8_SECONDARY                      0xC1
-#define         ASI_PST16_PRIMARY                       0xC2
-#define         ASI_PST16_SECONDARY                     0xC3
-#define         ASI_PST32_PRIMARY                       0xC4
-#define         ASI_PST32_SECONDARY                     0xC5
-#define         ASI_PST8_PRIMARY_LITTLE                 0xC8
-#define         ASI_PST8_SECONDARY_LITTLE               0xC9
-#define         ASI_PST16_PRIMARY_LITTLE                0xCA
-#define         ASI_PST16_SECONDARY_LITTLE              0xCB
-#define         ASI_PST32_PRIMARY_LITTLE                0xCC
-#define         ASI_PST32_SECONDARY_LITTLE              0xCD
-#define         ASI_FL8_PRIMARY                         0xD0
-#define         ASI_FL8_SECONDARY                       0xD1
-#define         ASI_FL16_PRIMARY                        0xD2
-#define         ASI_FL16_SECONDARY                      0xD3
-#define         ASI_FL8_PRIMARY_LITTLE                  0xD8
-#define         ASI_FL8_SECONDARY_LITTLE                0xD9
-#define         ASI_FL16_PRIMARY_LITTLE                 0xDA
-#define         ASI_FL16_SECONDARY_LITTLE               0xDB
-#define         ASI_COMMIT_PRIMARY                      0xE0
-#define         ASI_COMMIT_SECONDARY                    0xE1
-#define         ASI_BLOCK_PRIMARY                       0xF0
-#define         ASI_BLOCK_SECONDARY                     0xF1
-#define         ASI_BLOCK_PRIMARY_LITTLE                0xF8
-#define         ASI_BLOCK_SECONDARY_LITTLE              0xF9
-
-#ifdef  __cplusplus
-}
-#endif
-
-#endif  /* VIS_ASI_H */
--- a/jdk/src/java.desktop/unix/native/libmlib_image/mlib_v_ImageAffineIndex_BC.c	Thu May 12 11:03:07 2016 -0700
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,1443 +0,0 @@
-/*
- * Copyright (c) 1998, 2003, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.  Oracle designates this
- * particular file as subject to the "Classpath" exception as provided
- * by Oracle in the LICENSE file that accompanied this code.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- */
-
-
-
-#include "vis_proto.h"
-#include "mlib_image.h"
-#include "mlib_ImageColormap.h"
-#include "mlib_ImageAffine.h"
-#include "mlib_v_ImageFilters.h"
-
-/***************************************************************/
-#define MLIB_LIMIT   512
-#define MLIB_SHIFT    16
-
-/***************************************************************/
-#undef  DECLAREVAR
-#define DECLAREVAR()                                            \
-  DECLAREVAR0();                                                \
-  mlib_s32  *warp_tbl   = param -> warp_tbl;                    \
-  mlib_s32  xSrc, ySrc;                                         \
-  mlib_s32  srcYStride = param -> srcYStride;                   \
-  mlib_s32  filter     = param -> filter;                       \
-  mlib_s32  max_xsize  = param -> max_xsize;                    \
-  MLIB_TYPE *srcIndexPtr;                                       \
-  MLIB_TYPE *dstIndexPtr;                                       \
-  mlib_d64  *dstPixelPtr;                                       \
-  mlib_s32  i
-
-/***************************************************************/
-#define DECLAREVAR_U8()                                         \
-  mlib_s32  filterposx, filterposy;                             \
-  mlib_d64  sum0, sum1, sum2, sum3;                             \
-  mlib_f32  hi_row00, hi_row10, hi_row20, hi_row30;             \
-  mlib_f32  hi_row01, hi_row11, hi_row21, hi_row31;             \
-  mlib_f32  lo_row00, lo_row10, lo_row20, lo_row30;             \
-  mlib_f32  lo_row01, lo_row11, lo_row21, lo_row31;             \
-  mlib_d64  xFilter0, xFilter1, xFilter2, xFilter3, yFilter;    \
-  mlib_d64  v00, v10, v20, v30;                                 \
-  mlib_d64  v01, v11, v21, v31;                                 \
-  mlib_d64  v02, v12, v22, v32;                                 \
-  mlib_d64  v03, v13, v23, v33;                                 \
-  mlib_d64  d0, d1, d2, d3;                                     \
-  mlib_d64  d00, d10, d20, d30;                                 \
-  mlib_d64  d01, d11, d21, d31;                                 \
-  mlib_s32  cols;                                               \
-  mlib_d64  res, *xPtr
-
-/***************************************************************/
-#define DECLAREVAR_S16()                                        \
-  mlib_s32  filterposx, filterposy;                             \
-  mlib_d64  sum0, sum1, sum2, sum3;                             \
-  mlib_d64  row00, row10, row20, row30;                         \
-  mlib_d64  row01, row11, row21, row31;                         \
-  mlib_d64  row02, row12, row22, row32;                         \
-  mlib_d64  row03, row13, row23, row33;                         \
-  mlib_d64  xFilter0, xFilter1, xFilter2, xFilter3;             \
-  mlib_d64  yFilter0, yFilter1, yFilter2, yFilter3;             \
-  mlib_d64  v00, v01, v02, v03, v10, v11, v12, v13;             \
-  mlib_d64  v20, v21, v22, v23, v30, v31, v32, v33;             \
-  mlib_d64  u00, u01, u10, u11, u20, u21, u30, u31;             \
-  mlib_d64  d0, d1, d2, d3;                                     \
-  mlib_d64  *yPtr, *xPtr;                                       \
-  mlib_s32  cols;                                               \
-  mlib_d64  res;                                                \
-  mlib_f32  f_x01000100 = vis_to_float(0x01000100)
-
-/***************************************************************/
-#undef  CLIP
-#define CLIP()                                                  \
-  dstData += dstYStride;                                        \
-  xLeft = leftEdges[j];                                         \
-  xRight = rightEdges[j];                                       \
-  X = xStarts[j];                                               \
-  Y = yStarts[j];                                               \
-  PREPARE_DELTAS                                                \
-  if (xLeft > xRight)                                           \
-    continue;                                                   \
-  dstIndexPtr = (MLIB_TYPE *)dstData + xLeft;                   \
-  dstPixelPtr = dstRowPtr
-
-/***************************************************************/
-#define FADD_4BC_U8()                                           \
-  d0 = vis_fpadd16(d00, d10);                                   \
-  d1 = vis_fpadd16(d20, d30);                                   \
-  d0 = vis_fpadd16(d0, d1);                                     \
-  d2 = vis_fpadd16(d01, d11);                                   \
-  d3 = vis_fpadd16(d21, d31);                                   \
-  d2 = vis_fpadd16(d2, d3);                                     \
-  res = vis_fpack16_pair(d0, d2)
-
-/***************************************************************/
-#define LOAD_BC_U8_4CH_1PIXEL(mlib_filters_u8, mlib_filters_u8_4)      \
-  filterposy = (Y >> FILTER_SHIFT) & FILTER_MASK;                      \
-  yFilter = *((mlib_d64 *) ((mlib_u8 *)mlib_filters_u8 + filterposy)); \
-  filterposx = (X >> FILTER_SHIFT) & FILTER_MASK;                      \
-  xPtr = ((mlib_d64 *)((mlib_u8 *)mlib_filters_u8_4+4*filterposx));    \
-  xFilter0 = xPtr[0];                                                  \
-  xFilter1 = xPtr[1];                                                  \
-  xFilter2 = xPtr[2];                                                  \
-  xFilter3 = xPtr[3];                                                  \
-  X += dX;                                                             \
-  Y += dY;                                                             \
-  hi_row00 = flut[srcIndexPtr[0]];                                     \
-  lo_row00 = flut[srcIndexPtr[1]];                                     \
-  hi_row01 = flut[srcIndexPtr[2]];                                     \
-  lo_row01 = flut[srcIndexPtr[3]];                                     \
-  srcIndexPtr += srcYStride;                                           \
-  hi_row10 = flut[srcIndexPtr[0]];                                     \
-  lo_row10 = flut[srcIndexPtr[1]];                                     \
-  hi_row11 = flut[srcIndexPtr[2]];                                     \
-  lo_row11 = flut[srcIndexPtr[3]];                                     \
-  srcIndexPtr += srcYStride;                                           \
-  hi_row20 = flut[srcIndexPtr[0]];                                     \
-  lo_row20 = flut[srcIndexPtr[1]];                                     \
-  hi_row21 = flut[srcIndexPtr[2]];                                     \
-  lo_row21 = flut[srcIndexPtr[3]];                                     \
-  srcIndexPtr += srcYStride;                                           \
-  hi_row30 = flut[srcIndexPtr[0]];                                     \
-  lo_row30 = flut[srcIndexPtr[1]];                                     \
-  hi_row31 = flut[srcIndexPtr[2]];                                     \
-  lo_row31 = flut[srcIndexPtr[3]]
-
-/***************************************************************/
-#define NEXT_PIXEL_4BC()                                        \
-  xSrc = (X >> MLIB_SHIFT)-1;                                   \
-  ySrc = (Y >> MLIB_SHIFT)-1;                                   \
-  srcIndexPtr = (MLIB_TYPE *)lineAddr[ySrc] + xSrc
-
-/***************************************************************/
-#define RESULT_4BC_U8_1PIXEL(ind)                               \
-  v00 = vis_fmul8x16au(hi_row00, vis_read_hi(yFilter));         \
-  v01 = vis_fmul8x16au(lo_row00, vis_read_hi(yFilter));         \
-  v02 = vis_fmul8x16au(hi_row01, vis_read_hi(yFilter));         \
-  v03 = vis_fmul8x16au(lo_row01, vis_read_hi(yFilter));         \
-  v10 = vis_fmul8x16al(hi_row10, vis_read_hi(yFilter));         \
-  v11 = vis_fmul8x16al(lo_row10, vis_read_hi(yFilter));         \
-  sum0 = vis_fpadd16(v00, v10);                                 \
-  v12 = vis_fmul8x16al(hi_row11, vis_read_hi(yFilter));         \
-  sum1 = vis_fpadd16(v01, v11);                                 \
-  v13 = vis_fmul8x16al(lo_row11, vis_read_hi(yFilter));         \
-  sum2 = vis_fpadd16(v02, v12);                                 \
-  v20 = vis_fmul8x16au(hi_row20, vis_read_lo(yFilter));         \
-  sum3 = vis_fpadd16(v03, v13);                                 \
-  v21 = vis_fmul8x16au(lo_row20, vis_read_lo(yFilter));         \
-  sum0 = vis_fpadd16(sum0, v20);                                \
-  v22 = vis_fmul8x16au(hi_row21, vis_read_lo(yFilter));         \
-  sum1 = vis_fpadd16(sum1, v21);                                \
-  v23 = vis_fmul8x16au(lo_row21, vis_read_lo(yFilter));         \
-  sum2 = vis_fpadd16(sum2, v22);                                \
-  v30 = vis_fmul8x16al(hi_row30, vis_read_lo(yFilter));         \
-  sum3 = vis_fpadd16(sum3, v23);                                \
-  v31 = vis_fmul8x16al(lo_row30, vis_read_lo(yFilter));         \
-  sum0 = vis_fpadd16(sum0, v30);                                \
-  v32 = vis_fmul8x16al(hi_row31, vis_read_lo(yFilter));         \
-  sum1 = vis_fpadd16(sum1, v31);                                \
-  v33 = vis_fmul8x16al(lo_row31, vis_read_lo(yFilter));         \
-  sum2 = vis_fpadd16(sum2, v32);                                \
-  v00 = vis_fmul8sux16(sum0, xFilter0);                         \
-  sum3 = vis_fpadd16(sum3, v33);                                \
-  v01 = vis_fmul8ulx16(sum0, xFilter0);                         \
-  v10 = vis_fmul8sux16(sum1, xFilter1);                         \
-  d0##ind = vis_fpadd16(v00, v01);                              \
-  v11 = vis_fmul8ulx16(sum1, xFilter1);                         \
-  v20 = vis_fmul8sux16(sum2, xFilter2);                         \
-  d1##ind = vis_fpadd16(v10, v11);                              \
-  v21 = vis_fmul8ulx16(sum2, xFilter2);                         \
-  v30 = vis_fmul8sux16(sum3, xFilter3);                         \
-  d2##ind = vis_fpadd16(v20, v21);                              \
-  v31 = vis_fmul8ulx16(sum3, xFilter3);                         \
-  d3##ind = vis_fpadd16(v30, v31)
-
-/***************************************************************/
-#define BC_U8_4CH(ind, mlib_filters_u8, mlib_filters_u8_4)            \
-  v00 = vis_fmul8x16au(hi_row00, vis_read_hi(yFilter));               \
-  v01 = vis_fmul8x16au(lo_row00, vis_read_hi(yFilter));               \
-  v02 = vis_fmul8x16au(hi_row01, vis_read_hi(yFilter));               \
-  v03 = vis_fmul8x16au(lo_row01, vis_read_hi(yFilter));               \
-  hi_row00 = flut[srcIndexPtr[0]];                                    \
-  filterposy = (Y >> FILTER_SHIFT);                                   \
-  v10 = vis_fmul8x16al(hi_row10, vis_read_hi(yFilter));               \
-  lo_row00 = flut[srcIndexPtr[1]];                                    \
-  v11 = vis_fmul8x16al(lo_row10, vis_read_hi(yFilter));               \
-  sum0 = vis_fpadd16(v00, v10);                                       \
-  hi_row01 = flut[srcIndexPtr[2]];                                    \
-  v12 = vis_fmul8x16al(hi_row11, vis_read_hi(yFilter));               \
-  lo_row01 = flut[srcIndexPtr[3]];                                    \
-  filterposx = (X >> FILTER_SHIFT);                                   \
-  v13 = vis_fmul8x16al(lo_row11, vis_read_hi(yFilter));               \
-  srcIndexPtr += srcYStride;                                          \
-  hi_row10 = flut[srcIndexPtr[0]];                                    \
-  v20 = vis_fmul8x16au(hi_row20, vis_read_lo(yFilter));               \
-  sum1 = vis_fpadd16(v01, v11);                                       \
-  lo_row10 = flut[srcIndexPtr[1]];                                    \
-  X += dX;                                                            \
-  hi_row11 = flut[srcIndexPtr[2]];                                    \
-  v21 = vis_fmul8x16au(lo_row20, vis_read_lo(yFilter));               \
-  sum2 = vis_fpadd16(v02, v12);                                       \
-  lo_row11 = flut[srcIndexPtr[3]];                                    \
-  v22 = vis_fmul8x16au(hi_row21, vis_read_lo(yFilter));               \
-  srcIndexPtr += srcYStride;                                          \
-  hi_row20 = flut[srcIndexPtr[0]];                                    \
-  v23 = vis_fmul8x16au(lo_row21, vis_read_lo(yFilter));               \
-  sum3 = vis_fpadd16(v03, v13);                                       \
-  Y += dY;                                                            \
-  xSrc = (X >> MLIB_SHIFT)-1;                                         \
-  v30 = vis_fmul8x16al(hi_row30, vis_read_lo(yFilter));               \
-  sum0 = vis_fpadd16(sum0, v20);                                      \
-  lo_row20 = flut[srcIndexPtr[1]];                                    \
-  ySrc = (Y >> MLIB_SHIFT)-1;                                         \
-  hi_row21 = flut[srcIndexPtr[2]];                                    \
-  v31 = vis_fmul8x16al(lo_row30, vis_read_lo(yFilter));               \
-  sum1 = vis_fpadd16(sum1, v21);                                      \
-  filterposy &= FILTER_MASK;                                          \
-  lo_row21 = flut[srcIndexPtr[3]];                                    \
-  v32 = vis_fmul8x16al(hi_row31, vis_read_lo(yFilter));               \
-  srcIndexPtr += srcYStride;                                          \
-  filterposx &= FILTER_MASK;                                          \
-  v33 = vis_fmul8x16al(lo_row31, vis_read_lo(yFilter));               \
-  sum2 = vis_fpadd16(sum2, v22);                                      \
-  hi_row30 = flut[srcIndexPtr[0]];                                    \
-  sum3 = vis_fpadd16(sum3, v23);                                      \
-  sum0 = vis_fpadd16(sum0, v30);                                      \
-  lo_row30 = flut[srcIndexPtr[1]];                                    \
-  sum1 = vis_fpadd16(sum1, v31);                                      \
-  v00 = vis_fmul8sux16(sum0, xFilter0);                               \
-  hi_row31 = flut[srcIndexPtr[2]];                                    \
-  sum2 = vis_fpadd16(sum2, v32);                                      \
-  v01 = vis_fmul8ulx16(sum0, xFilter0);                               \
-  sum3 = vis_fpadd16(sum3, v33);                                      \
-  lo_row31 = flut[srcIndexPtr[3]];                                    \
-  v10 = vis_fmul8sux16(sum1, xFilter1);                               \
-  d0##ind = vis_fpadd16(v00, v01);                                    \
-  yFilter = *((mlib_d64 *)((mlib_u8 *)mlib_filters_u8 + filterposy)); \
-  v11 = vis_fmul8ulx16(sum1, xFilter1);                               \
-  xPtr = ((mlib_d64 *)((mlib_u8 *)mlib_filters_u8_4+4*filterposx));   \
-  xFilter0 = xPtr[0];                                                 \
-  v20 = vis_fmul8sux16(sum2, xFilter2);                               \
-  d1##ind = vis_fpadd16(v10, v11);                                    \
-  xFilter1 = xPtr[1];                                                 \
-  v21 = vis_fmul8ulx16(sum2, xFilter2);                               \
-  xFilter2 = xPtr[2];                                                 \
-  v30 = vis_fmul8sux16(sum3, xFilter3);                               \
-  d2##ind = vis_fpadd16(v20, v21);                                    \
-  xFilter3 = xPtr[3];                                                 \
-  v31 = vis_fmul8ulx16(sum3, xFilter3);                               \
-  srcIndexPtr = (MLIB_TYPE *)lineAddr[ySrc] + xSrc;                   \
-  d3##ind = vis_fpadd16(v30, v31)
-
-/***************************************************************/
-#define LOAD_BC_S16_4CH_1PIXEL(mlib_filters_s16_4)                      \
-  row00 = flut[srcIndexPtr[0]];                                         \
-  row01 = flut[srcIndexPtr[1]];                                         \
-  row02 = flut[srcIndexPtr[2]];                                         \
-  row03 = flut[srcIndexPtr[3]];                                         \
-  srcIndexPtr += srcYStride;                                            \
-  row10 = flut[srcIndexPtr[0]];                                         \
-  row11 = flut[srcIndexPtr[1]];                                         \
-  row12 = flut[srcIndexPtr[2]];                                         \
-  row13 = flut[srcIndexPtr[3]];                                         \
-  srcIndexPtr += srcYStride;                                            \
-  row20 = flut[srcIndexPtr[0]];                                         \
-  row21 = flut[srcIndexPtr[1]];                                         \
-  row22 = flut[srcIndexPtr[2]];                                         \
-  row23 = flut[srcIndexPtr[3]];                                         \
-  srcIndexPtr += srcYStride;                                            \
-  row30 = flut[srcIndexPtr[0]];                                         \
-  row31 = flut[srcIndexPtr[1]];                                         \
-  row32 = flut[srcIndexPtr[2]];                                         \
-  row33 = flut[srcIndexPtr[3]];                                         \
-  filterposy = (Y >> FILTER_SHIFT) & FILTER_MASK;                       \
-  yPtr = ((mlib_d64 *) ((mlib_u8 *)mlib_filters_s16_4 + filterposy*4)); \
-  yFilter0 = yPtr[0];                                                   \
-  yFilter1 = yPtr[1];                                                   \
-  yFilter2 = yPtr[2];                                                   \
-  yFilter3 = yPtr[3];                                                   \
-  filterposx = (X >> FILTER_SHIFT) & FILTER_MASK;                       \
-  xPtr = ((mlib_d64 *)((mlib_u8 *)mlib_filters_s16_4 + filterposx*4));  \
-  xFilter0 = xPtr[0];                                                   \
-  xFilter1 = xPtr[1];                                                   \
-  xFilter2 = xPtr[2];                                                   \
-  xFilter3 = xPtr[3];                                                   \
-  X += dX;                                                              \
-  Y += dY
-
-/***************************************************************/
-#define RESULT_4BC_S16_1PIXEL()                                 \
-  u00 = vis_fmul8sux16(row00, yFilter0);                        \
-  u01 = vis_fmul8ulx16(row00, yFilter0);                        \
-  u10 = vis_fmul8sux16(row01, yFilter0);                        \
-  u11 = vis_fmul8ulx16(row01, yFilter0);                        \
-  v00 = vis_fpadd16(u00, u01);                                  \
-  u20 = vis_fmul8sux16(row02, yFilter0);                        \
-  v01 = vis_fpadd16(u10, u11);                                  \
-  u21 = vis_fmul8ulx16(row02, yFilter0);                        \
-  u30 = vis_fmul8sux16(row03, yFilter0);                        \
-  u31 = vis_fmul8ulx16(row03, yFilter0);                        \
-  v02 = vis_fpadd16(u20, u21);                                  \
-  u00 = vis_fmul8sux16(row10, yFilter1);                        \
-  u01 = vis_fmul8ulx16(row10, yFilter1);                        \
-  v03 = vis_fpadd16(u30, u31);                                  \
-  u10 = vis_fmul8sux16(row11, yFilter1);                        \
-  u11 = vis_fmul8ulx16(row11, yFilter1);                        \
-  v10 = vis_fpadd16(u00, u01);                                  \
-  u20 = vis_fmul8sux16(row12, yFilter1);                        \
-  v11 = vis_fpadd16(u10, u11);                                  \
-  u21 = vis_fmul8ulx16(row12, yFilter1);                        \
-  u30 = vis_fmul8sux16(row13, yFilter1);                        \
-  u31 = vis_fmul8ulx16(row13, yFilter1);                        \
-  u00 = vis_fmul8sux16(row20, yFilter2);                        \
-  v12 = vis_fpadd16(u20, u21);                                  \
-  u01 = vis_fmul8ulx16(row20, yFilter2);                        \
-  v13 = vis_fpadd16(u30, u31);                                  \
-  u10 = vis_fmul8sux16(row21, yFilter2);                        \
-  u11 = vis_fmul8ulx16(row21, yFilter2);                        \
-  v20 = vis_fpadd16(u00, u01);                                  \
-  u20 = vis_fmul8sux16(row22, yFilter2);                        \
-  sum0 = vis_fpadd16(v00, v10);                                 \
-  u21 = vis_fmul8ulx16(row22, yFilter2);                        \
-  u30 = vis_fmul8sux16(row23, yFilter2);                        \
-  u31 = vis_fmul8ulx16(row23, yFilter2);                        \
-  u00 = vis_fmul8sux16(row30, yFilter3);                        \
-  u01 = vis_fmul8ulx16(row30, yFilter3);                        \
-  v21 = vis_fpadd16(u10, u11);                                  \
-  sum1 = vis_fpadd16(v01, v11);                                 \
-  u10 = vis_fmul8sux16(row31, yFilter3);                        \
-  sum2 = vis_fpadd16(v02, v12);                                 \
-  sum3 = vis_fpadd16(v03, v13);                                 \
-  v22 = vis_fpadd16(u20, u21);                                  \
-  u11 = vis_fmul8ulx16(row31, yFilter3);                        \
-  sum0 = vis_fpadd16(sum0, v20);                                \
-  u20 = vis_fmul8sux16(row32, yFilter3);                        \
-  u21 = vis_fmul8ulx16(row32, yFilter3);                        \
-  v23 = vis_fpadd16(u30, u31);                                  \
-  v30 = vis_fpadd16(u00, u01);                                  \
-  sum1 = vis_fpadd16(sum1, v21);                                \
-  u30 = vis_fmul8sux16(row33, yFilter3);                        \
-  u31 = vis_fmul8ulx16(row33, yFilter3);                        \
-  v31 = vis_fpadd16(u10, u11);                                  \
-  sum2 = vis_fpadd16(sum2, v22);                                \
-  sum3 = vis_fpadd16(sum3, v23);                                \
-  v32 = vis_fpadd16(u20, u21);                                  \
-  sum0 = vis_fpadd16(sum0, v30);                                \
-  v33 = vis_fpadd16(u30, u31);                                  \
-  v00 = vis_fmul8sux16(sum0, xFilter0);                         \
-  sum1 = vis_fpadd16(sum1, v31);                                \
-  sum2 = vis_fpadd16(sum2, v32);                                \
-  v01 = vis_fmul8ulx16(sum0, xFilter0);                         \
-  v10 = vis_fmul8sux16(sum1, xFilter1);                         \
-  sum3 = vis_fpadd16(sum3, v33);                                \
-  v11 = vis_fmul8ulx16(sum1, xFilter1);                         \
-  d0 = vis_fpadd16(v00, v01);                                   \
-  v20 = vis_fmul8sux16(sum2, xFilter2);                         \
-  v21 = vis_fmul8ulx16(sum2, xFilter2);                         \
-  d1 = vis_fpadd16(v10, v11);                                   \
-  v30 = vis_fmul8sux16(sum3, xFilter3);                         \
-  v31 = vis_fmul8ulx16(sum3, xFilter3);                         \
-  d2 = vis_fpadd16(v20, v21);                                   \
-  d3 = vis_fpadd16(v30, v31);                                   \
-  d0 = vis_fpadd16(d0, d1);                                     \
-  d2 = vis_fpadd16(d2, d3);                                     \
-  d0 = vis_fpadd16(d0, d2);                                     \
-  d2 = vis_fmuld8sux16(f_x01000100, vis_read_hi(d0));           \
-  d3 = vis_fmuld8sux16(f_x01000100, vis_read_lo(d0));           \
-  res = vis_fpackfix_pair(d2, d3)
-
-/***************************************************************/
-#define BC_S16_4CH(mlib_filters_s16_4)                                  \
-  u00 = vis_fmul8sux16(row00, yFilter0);                                \
-  u01 = vis_fmul8ulx16(row00, yFilter0);                                \
-  u10 = vis_fmul8sux16(row01, yFilter0);                                \
-  u11 = vis_fmul8ulx16(row01, yFilter0);                                \
-  v00 = vis_fpadd16(u00, u01);                                          \
-  u20 = vis_fmul8sux16(row02, yFilter0);                                \
-  v01 = vis_fpadd16(u10, u11);                                          \
-  u21 = vis_fmul8ulx16(row02, yFilter0);                                \
-  u30 = vis_fmul8sux16(row03, yFilter0);                                \
-  u31 = vis_fmul8ulx16(row03, yFilter0);                                \
-  v02 = vis_fpadd16(u20, u21);                                          \
-  row00 = flut[srcIndexPtr[0]];                                         \
-  u00 = vis_fmul8sux16(row10, yFilter1);                                \
-  u01 = vis_fmul8ulx16(row10, yFilter1);                                \
-  filterposy = (Y >> FILTER_SHIFT);                                     \
-  v03 = vis_fpadd16(u30, u31);                                          \
-  row01 = flut[srcIndexPtr[1]];                                         \
-  u10 = vis_fmul8sux16(row11, yFilter1);                                \
-  u11 = vis_fmul8ulx16(row11, yFilter1);                                \
-  v10 = vis_fpadd16(u00, u01);                                          \
-  row02 = flut[srcIndexPtr[2]];                                         \
-  u20 = vis_fmul8sux16(row12, yFilter1);                                \
-  v11 = vis_fpadd16(u10, u11);                                          \
-  u21 = vis_fmul8ulx16(row12, yFilter1);                                \
-  u30 = vis_fmul8sux16(row13, yFilter1);                                \
-  row03 = flut[srcIndexPtr[3]];                                         \
-  u31 = vis_fmul8ulx16(row13, yFilter1);                                \
-  u00 = vis_fmul8sux16(row20, yFilter2);                                \
-  filterposx = (X >> FILTER_SHIFT);                                     \
-  srcIndexPtr += srcYStride;                                            \
-  v12 = vis_fpadd16(u20, u21);                                          \
-  u01 = vis_fmul8ulx16(row20, yFilter2);                                \
-  v13 = vis_fpadd16(u30, u31);                                          \
-  row10 = flut[srcIndexPtr[0]];                                         \
-  u10 = vis_fmul8sux16(row21, yFilter2);                                \
-  X += dX;                                                              \
-  u11 = vis_fmul8ulx16(row21, yFilter2);                                \
-  v20 = vis_fpadd16(u00, u01);                                          \
-  row11 = flut[srcIndexPtr[1]];                                         \
-  u20 = vis_fmul8sux16(row22, yFilter2);                                \
-  sum0 = vis_fpadd16(v00, v10);                                         \
-  u21 = vis_fmul8ulx16(row22, yFilter2);                                \
-  row12 = flut[srcIndexPtr[2]];                                         \
-  u30 = vis_fmul8sux16(row23, yFilter2);                                \
-  u31 = vis_fmul8ulx16(row23, yFilter2);                                \
-  row13 = flut[srcIndexPtr[3]];                                         \
-  u00 = vis_fmul8sux16(row30, yFilter3);                                \
-  srcIndexPtr += srcYStride;                                            \
-  u01 = vis_fmul8ulx16(row30, yFilter3);                                \
-  v21 = vis_fpadd16(u10, u11);                                          \
-  Y += dY;                                                              \
-  xSrc = (X >> MLIB_SHIFT)-1;                                           \
-  sum1 = vis_fpadd16(v01, v11);                                         \
-  row20 = flut[srcIndexPtr[0]];                                         \
-  u10 = vis_fmul8sux16(row31, yFilter3);                                \
-  sum2 = vis_fpadd16(v02, v12);                                         \
-  sum3 = vis_fpadd16(v03, v13);                                         \
-  ySrc = (Y >> MLIB_SHIFT)-1;                                           \
-  row21 = flut[srcIndexPtr[1]];                                         \
-  v22 = vis_fpadd16(u20, u21);                                          \
-  u11 = vis_fmul8ulx16(row31, yFilter3);                                \
-  sum0 = vis_fpadd16(sum0, v20);                                        \
-  u20 = vis_fmul8sux16(row32, yFilter3);                                \
-  row22 = flut[srcIndexPtr[2]];                                         \
-  u21 = vis_fmul8ulx16(row32, yFilter3);                                \
-  v23 = vis_fpadd16(u30, u31);                                          \
-  v30 = vis_fpadd16(u00, u01);                                          \
-  filterposy &= FILTER_MASK;                                            \
-  sum1 = vis_fpadd16(sum1, v21);                                        \
-  u30 = vis_fmul8sux16(row33, yFilter3);                                \
-  row23 = flut[srcIndexPtr[3]];                                         \
-  u31 = vis_fmul8ulx16(row33, yFilter3);                                \
-  srcIndexPtr += srcYStride;                                            \
-  filterposx &= FILTER_MASK;                                            \
-  v31 = vis_fpadd16(u10, u11);                                          \
-  row30 = flut[srcIndexPtr[0]];                                         \
-  sum2 = vis_fpadd16(sum2, v22);                                        \
-  sum3 = vis_fpadd16(sum3, v23);                                        \
-  row31 = flut[srcIndexPtr[1]];                                         \
-  v32 = vis_fpadd16(u20, u21);                                          \
-  sum0 = vis_fpadd16(sum0, v30);                                        \
-  row32 = flut[srcIndexPtr[2]];                                         \
-  v33 = vis_fpadd16(u30, u31);                                          \
-  row33 = flut[srcIndexPtr[3]];                                         \
-  v00 = vis_fmul8sux16(sum0, xFilter0);                                 \
-  yPtr = ((mlib_d64 *) ((mlib_u8 *)mlib_filters_s16_4 + filterposy*4)); \
-  sum1 = vis_fpadd16(sum1, v31);                                        \
-  yFilter0 = yPtr[0];                                                   \
-  sum2 = vis_fpadd16(sum2, v32);                                        \
-  v01 = vis_fmul8ulx16(sum0, xFilter0);                                 \
-  yFilter1 = yPtr[1];                                                   \
-  v10 = vis_fmul8sux16(sum1, xFilter1);                                 \
-  sum3 = vis_fpadd16(sum3, v33);                                        \
-  yFilter2 = yPtr[2];                                                   \
-  v11 = vis_fmul8ulx16(sum1, xFilter1);                                 \
-  d0 = vis_fpadd16(v00, v01);                                           \
-  yFilter3 = yPtr[3];                                                   \
-  xPtr = ((mlib_d64 *)((mlib_u8 *)mlib_filters_s16_4 + filterposx*4));  \
-  v20 = vis_fmul8sux16(sum2, xFilter2);                                 \
-  xFilter0 = xPtr[0];                                                   \
-  v21 = vis_fmul8ulx16(sum2, xFilter2);                                 \
-  d1 = vis_fpadd16(v10, v11);                                           \
-  xFilter1 = xPtr[1];                                                   \
-  v30 = vis_fmul8sux16(sum3, xFilter3);                                 \
-  v31 = vis_fmul8ulx16(sum3, xFilter3);                                 \
-  d2 = vis_fpadd16(v20, v21);                                           \
-  xFilter2 = xPtr[2];                                                   \
-  d3 = vis_fpadd16(v30, v31);                                           \
-  xFilter3 = xPtr[3];                                                   \
-  srcIndexPtr = (MLIB_TYPE *)lineAddr[ySrc] + xSrc
-
-/***************************************************************/
-#define FADD_4BC_S16()                                          \
-  d0 = vis_fpadd16(d0, d1);                                     \
-  d2 = vis_fpadd16(d2, d3);                                     \
-  d0 = vis_fpadd16(d0, d2);                                     \
-  d2 = vis_fmuld8sux16(f_x01000100, vis_read_hi(d0));           \
-  d3 = vis_fmuld8sux16(f_x01000100, vis_read_lo(d0));           \
-  res = vis_fpackfix_pair(d2, d3)
-
-/***************************************************************/
-#undef  MLIB_TYPE
-#define MLIB_TYPE mlib_u8
-
-/***************************************************************/
-#undef  FILTER_SHIFT
-#define FILTER_SHIFT  5
-#undef  FILTER_MASK
-#define FILTER_MASK   (((1 << 8) - 1) << 3)
-
-/***************************************************************/
-mlib_status mlib_ImageAffineIndex_U8_U8_3CH_BC(mlib_affine_param *param,
-                                               const void        *colormap)
-{
-  DECLAREVAR();
-  DECLAREVAR_U8();
-  mlib_f32  *flut   = (mlib_f32 *)mlib_ImageGetLutNormalTable(colormap) -
-  mlib_ImageGetLutOffset(colormap);
-  mlib_d64  dstRowData[MLIB_LIMIT/2];
-  mlib_d64  *dstRowPtr = dstRowData;
-  const mlib_s16 *mlib_filters_table_u8, *mlib_filters_table_u8_4;
-
-  if (filter == MLIB_BICUBIC) {
-    mlib_filters_table_u8   = mlib_filters_u8_bc;
-    mlib_filters_table_u8_4 = mlib_filters_u8_bc_4;
-  } else {
-    mlib_filters_table_u8   = mlib_filters_u8_bc2;
-    mlib_filters_table_u8_4 = mlib_filters_u8_bc2_4;
-  }
-
-  if (max_xsize > MLIB_LIMIT) {
-    dstRowPtr = mlib_malloc(sizeof(mlib_d64) * ((max_xsize + 1) >> 1));
-
-    if (dstRowPtr == NULL) return MLIB_FAILURE;
-  }
-
-  vis_write_gsr(3 << 3);
-
-  for (j = yStart; j <= yFinish; j++) {
-
-    CLIP();
-
-    cols = xRight - xLeft + 1;
-
-    i = 0;
-
-    if (i <= cols - 6) {
-
-      NEXT_PIXEL_4BC();
-      LOAD_BC_U8_4CH_1PIXEL(mlib_filters_table_u8, mlib_filters_table_u8_4);
-
-      NEXT_PIXEL_4BC();
-
-      BC_U8_4CH(0, mlib_filters_table_u8, mlib_filters_table_u8_4);
-      BC_U8_4CH(1, mlib_filters_table_u8, mlib_filters_table_u8_4);
-      FADD_4BC_U8();
-
-      BC_U8_4CH(0, mlib_filters_table_u8, mlib_filters_table_u8_4);
-      BC_U8_4CH(1, mlib_filters_table_u8, mlib_filters_table_u8_4);
-
-#pragma pipeloop(0)
-      for (; i <= cols-8; i += 2) {
-        *dstPixelPtr++ = res;
-
-        FADD_4BC_U8();
-        BC_U8_4CH(0, mlib_filters_table_u8, mlib_filters_table_u8_4);
-        BC_U8_4CH(1, mlib_filters_table_u8, mlib_filters_table_u8_4);
-      }
-
-      *dstPixelPtr++ = res;
-
-      FADD_4BC_U8();
-      *dstPixelPtr++ = res;
-
-      RESULT_4BC_U8_1PIXEL(0);
-      LOAD_BC_U8_4CH_1PIXEL(mlib_filters_table_u8, mlib_filters_table_u8_4);
-      RESULT_4BC_U8_1PIXEL(1);
-      FADD_4BC_U8();
-
-      *dstPixelPtr++ = res;
-      i += 6;
-    }
-
-    if (i <= cols-4) {
-      NEXT_PIXEL_4BC();
-      LOAD_BC_U8_4CH_1PIXEL(mlib_filters_table_u8, mlib_filters_table_u8_4);
-
-      NEXT_PIXEL_4BC();
-
-      BC_U8_4CH(0, mlib_filters_table_u8, mlib_filters_table_u8_4);
-      BC_U8_4CH(1, mlib_filters_table_u8, mlib_filters_table_u8_4);
-      FADD_4BC_U8();
-      *dstPixelPtr++ = res;
-
-      RESULT_4BC_U8_1PIXEL(0);
-      LOAD_BC_U8_4CH_1PIXEL(mlib_filters_table_u8, mlib_filters_table_u8_4);
-      RESULT_4BC_U8_1PIXEL(1);
-      FADD_4BC_U8();
-
-      *dstPixelPtr++ = res;
-      i += 4;
-    }
-
-    if (i <= cols-2) {
-      NEXT_PIXEL_4BC();
-      LOAD_BC_U8_4CH_1PIXEL(mlib_filters_table_u8, mlib_filters_table_u8_4);
-      RESULT_4BC_U8_1PIXEL(0);
-
-      NEXT_PIXEL_4BC();
-      LOAD_BC_U8_4CH_1PIXEL(mlib_filters_table_u8, mlib_filters_table_u8_4);
-      RESULT_4BC_U8_1PIXEL(1);
-      FADD_4BC_U8();
-
-      *dstPixelPtr++ = res;
-      i += 2;
-    }
-
-    if (i < cols) {
-      NEXT_PIXEL_4BC();
-      LOAD_BC_U8_4CH_1PIXEL(mlib_filters_table_u8, mlib_filters_table_u8_4);
-      RESULT_4BC_U8_1PIXEL(0);
-
-      d0 = vis_fpadd16(d00, d10);
-      d1 = vis_fpadd16(d20, d30);
-      d0 = vis_fpadd16(d0, d1);
-      res = vis_fpack16_pair(d0, d0);
-      *dstPixelPtr++ = res;
-    }
-
-    mlib_ImageColorTrue2IndexLine_U8_U8_3_in_4((mlib_u8 *)dstRowPtr,
-                                               dstIndexPtr,
-                                               xRight - xLeft + 1,
-                                               colormap);
-  }
-
-  if (dstRowPtr != dstRowData) mlib_free(dstRowPtr);
-
-  return MLIB_SUCCESS;
-}
-
-/***************************************************************/
-#undef  FILTER_SHIFT
-#define FILTER_SHIFT  4
-#undef  FILTER_MASK
-#define FILTER_MASK   (((1 << 9) - 1) << 3)
-
-/***************************************************************/
-mlib_status mlib_ImageAffineIndex_U8_S16_3CH_BC(mlib_affine_param *param,
-                                                const void        *colormap)
-{
-  DECLAREVAR();
-  DECLAREVAR_S16();
-  mlib_d64 *flut   = (mlib_d64 *)mlib_ImageGetLutNormalTable(colormap) -
-  mlib_ImageGetLutOffset(colormap);
-  mlib_d64 dstRowData[MLIB_LIMIT];
-  mlib_d64 *dstRowPtr = dstRowData;
-  const mlib_s16 *mlib_filters_table_s16_4;
-
-  if (filter == MLIB_BICUBIC) {
-    mlib_filters_table_s16_4 = mlib_filters_s16_bc_4;
-  } else {
-    mlib_filters_table_s16_4 = mlib_filters_s16_bc2_4;
-  }
-
-  if (max_xsize > MLIB_LIMIT) {
-    dstRowPtr = mlib_malloc(sizeof(mlib_d64) * max_xsize);
-
-    if (dstRowPtr == NULL) return MLIB_FAILURE;
-  }
-
-  for (j = yStart; j <= yFinish; j++) {
-
-    CLIP();
-
-    vis_write_gsr(10 << 3);
-
-    cols = xRight - xLeft + 1;
-    i = 0;
-
-    if (i <= cols - 4) {
-
-      NEXT_PIXEL_4BC();
-      LOAD_BC_S16_4CH_1PIXEL(mlib_filters_table_s16_4);
-
-      NEXT_PIXEL_4BC();
-
-      BC_S16_4CH(mlib_filters_table_s16_4);
-      FADD_4BC_S16();
-
-      BC_S16_4CH(mlib_filters_table_s16_4);
-
-#pragma pipeloop(0)
-
-      for (; i < cols-4; i++) {
-        *dstPixelPtr++ = res;
-
-        FADD_4BC_S16();
-        BC_S16_4CH(mlib_filters_table_s16_4);
-      }
-
-      *dstPixelPtr++ = res;
-
-      FADD_4BC_S16();
-      *dstPixelPtr++ = res;
-
-      RESULT_4BC_S16_1PIXEL();
-      *dstPixelPtr++ = res;
-
-      LOAD_BC_S16_4CH_1PIXEL(mlib_filters_table_s16_4);
-      RESULT_4BC_S16_1PIXEL();
-      *dstPixelPtr++ = res;
-      i += 4;
-    }
-
-#pragma pipeloop(0)
-    for (; i < cols; i++) {
-      NEXT_PIXEL_4BC();
-      LOAD_BC_S16_4CH_1PIXEL(mlib_filters_table_s16_4);
-      RESULT_4BC_S16_1PIXEL();
-      *dstPixelPtr++ = res;
-    }
-
-    mlib_ImageColorTrue2IndexLine_S16_U8_3_in_4((mlib_s16 *)dstRowPtr,
-                                                dstIndexPtr,
-                                                xRight - xLeft + 1,
-                                                colormap);
-  }
-
-  if (dstRowPtr != dstRowData) mlib_free(dstRowPtr);
-
-  return MLIB_SUCCESS;
-}
-
-/***************************************************************/
-#undef  FILTER_SHIFT
-#define FILTER_SHIFT  5
-#undef  FILTER_MASK
-#define FILTER_MASK   (((1 << 8) - 1) << 3)
-
-/***************************************************************/
-mlib_status mlib_ImageAffineIndex_U8_U8_4CH_BC(mlib_affine_param *param,
-                                               const void        *colormap)
-{
-  DECLAREVAR();
-  DECLAREVAR_U8();
-  mlib_f32  *flut   = (mlib_f32 *)mlib_ImageGetLutNormalTable(colormap) -
-  mlib_ImageGetLutOffset(colormap);
-  mlib_d64  dstRowData[MLIB_LIMIT/2];
-  mlib_d64  *dstRowPtr = dstRowData;
-  const mlib_s16 *mlib_filters_table_u8, *mlib_filters_table_u8_4;
-
-  if (filter == MLIB_BICUBIC) {
-    mlib_filters_table_u8   = mlib_filters_u8_bc;
-    mlib_filters_table_u8_4 = mlib_filters_u8_bc_4;
-  } else {
-    mlib_filters_table_u8   = mlib_filters_u8_bc2;
-    mlib_filters_table_u8_4 = mlib_filters_u8_bc2_4;
-  }
-
-  if (max_xsize > MLIB_LIMIT) {
-    dstRowPtr = mlib_malloc(sizeof(mlib_d64) * ((max_xsize + 1) >> 1));
-
-    if (dstRowPtr == NULL) return MLIB_FAILURE;
-  }
-
-  vis_write_gsr(3 << 3);
-
-  for (j = yStart; j <= yFinish; j++) {
-
-    CLIP();
-
-    cols = xRight - xLeft + 1;
-
-    i = 0;
-
-    if (i <= cols - 6) {
-
-      NEXT_PIXEL_4BC();
-      LOAD_BC_U8_4CH_1PIXEL(mlib_filters_table_u8, mlib_filters_table_u8_4);
-
-      NEXT_PIXEL_4BC();
-
-      BC_U8_4CH(0, mlib_filters_table_u8, mlib_filters_table_u8_4);
-      BC_U8_4CH(1, mlib_filters_table_u8, mlib_filters_table_u8_4);
-      FADD_4BC_U8();
-
-      BC_U8_4CH(0, mlib_filters_table_u8, mlib_filters_table_u8_4);
-      BC_U8_4CH(1, mlib_filters_table_u8, mlib_filters_table_u8_4);
-
-#pragma pipeloop(0)
-      for (; i <= cols-8; i += 2) {
-        *dstPixelPtr++ = res;
-
-        FADD_4BC_U8();
-        BC_U8_4CH(0, mlib_filters_table_u8, mlib_filters_table_u8_4);
-        BC_U8_4CH(1, mlib_filters_table_u8, mlib_filters_table_u8_4);
-      }
-
-      *dstPixelPtr++ = res;
-
-      FADD_4BC_U8();
-      *dstPixelPtr++ = res;
-
-      RESULT_4BC_U8_1PIXEL(0);
-      LOAD_BC_U8_4CH_1PIXEL(mlib_filters_table_u8, mlib_filters_table_u8_4);
-      RESULT_4BC_U8_1PIXEL(1);
-      FADD_4BC_U8();
-
-      *dstPixelPtr++ = res;
-      i += 6;
-    }
-
-    if (i <= cols-4) {
-      NEXT_PIXEL_4BC();
-      LOAD_BC_U8_4CH_1PIXEL(mlib_filters_table_u8, mlib_filters_table_u8_4);
-
-      NEXT_PIXEL_4BC();
-
-      BC_U8_4CH(0, mlib_filters_table_u8, mlib_filters_table_u8_4);
-      BC_U8_4CH(1, mlib_filters_table_u8, mlib_filters_table_u8_4);
-      FADD_4BC_U8();
-      *dstPixelPtr++ = res;
-
-      RESULT_4BC_U8_1PIXEL(0);
-      LOAD_BC_U8_4CH_1PIXEL(mlib_filters_table_u8, mlib_filters_table_u8_4);
-      RESULT_4BC_U8_1PIXEL(1);
-      FADD_4BC_U8();
-
-      *dstPixelPtr++ = res;
-      i += 4;
-    }
-
-    if (i <= cols-2) {
-      NEXT_PIXEL_4BC();
-      LOAD_BC_U8_4CH_1PIXEL(mlib_filters_table_u8, mlib_filters_table_u8_4);
-      RESULT_4BC_U8_1PIXEL(0);
-
-      NEXT_PIXEL_4BC();
-      LOAD_BC_U8_4CH_1PIXEL(mlib_filters_table_u8, mlib_filters_table_u8_4);
-      RESULT_4BC_U8_1PIXEL(1);
-      FADD_4BC_U8();
-
-      *dstPixelPtr++ = res;
-      i += 2;
-    }
-
-    if (i < cols) {
-      NEXT_PIXEL_4BC();
-      LOAD_BC_U8_4CH_1PIXEL(mlib_filters_table_u8, mlib_filters_table_u8_4);
-      RESULT_4BC_U8_1PIXEL(0);
-
-      d0 = vis_fpadd16(d00, d10);
-      d1 = vis_fpadd16(d20, d30);
-      d0 = vis_fpadd16(d0, d1);
-      res = vis_fpack16_pair(d0, d0);
-      *dstPixelPtr++ = res;
-    }
-
-    mlib_ImageColorTrue2IndexLine_U8_U8_4((mlib_u8 *)dstRowPtr,
-                                          dstIndexPtr,
-                                          xRight - xLeft + 1,
-                                          colormap);
-  }
-
-  if (dstRowPtr != dstRowData) mlib_free(dstRowPtr);
-
-  return MLIB_SUCCESS;
-}
-
-/***************************************************************/
-#undef  FILTER_SHIFT
-#define FILTER_SHIFT  4
-#undef  FILTER_MASK
-#define FILTER_MASK   (((1 << 9) - 1) << 3)
-
-/***************************************************************/
-mlib_status mlib_ImageAffineIndex_U8_S16_4CH_BC(mlib_affine_param *param,
-                                                const void        *colormap)
-{
-  DECLAREVAR();
-  DECLAREVAR_S16();
-  mlib_d64 *flut   = (mlib_d64 *)mlib_ImageGetLutNormalTable(colormap) -
-  mlib_ImageGetLutOffset(colormap);
-  mlib_d64 dstRowData[MLIB_LIMIT];
-  mlib_d64 *dstRowPtr = dstRowData;
-  const mlib_s16 *mlib_filters_table_s16_4;
-
-  if (filter == MLIB_BICUBIC) {
-    mlib_filters_table_s16_4 = mlib_filters_s16_bc_4;
-  } else {
-    mlib_filters_table_s16_4 = mlib_filters_s16_bc2_4;
-  }
-
-  if (max_xsize > MLIB_LIMIT) {
-    dstRowPtr = mlib_malloc(sizeof(mlib_d64) * max_xsize);
-
-    if (dstRowPtr == NULL) return MLIB_FAILURE;
-  }
-
-  for (j = yStart; j <= yFinish; j++) {
-
-    CLIP();
-
-    vis_write_gsr(10 << 3);
-
-    cols = xRight - xLeft + 1;
-    i = 0;
-
-    if (i <= cols - 4) {
-
-      NEXT_PIXEL_4BC();
-      LOAD_BC_S16_4CH_1PIXEL(mlib_filters_table_s16_4);
-
-      NEXT_PIXEL_4BC();
-
-      BC_S16_4CH(mlib_filters_table_s16_4);
-      FADD_4BC_S16();
-
-      BC_S16_4CH(mlib_filters_table_s16_4);
-
-#pragma pipeloop(0)
-
-      for (; i < cols-4; i++) {
-        *dstPixelPtr++ = res;
-
-        FADD_4BC_S16();
-        BC_S16_4CH(mlib_filters_table_s16_4);
-      }
-
-      *dstPixelPtr++ = res;
-
-      FADD_4BC_S16();
-      *dstPixelPtr++ = res;
-
-      RESULT_4BC_S16_1PIXEL();
-      *dstPixelPtr++ = res;
-
-      LOAD_BC_S16_4CH_1PIXEL(mlib_filters_table_s16_4);
-      RESULT_4BC_S16_1PIXEL();
-      *dstPixelPtr++ = res;
-      i += 4;
-    }
-
-#pragma pipeloop(0)
-    for (; i < cols; i++) {
-      NEXT_PIXEL_4BC();
-      LOAD_BC_S16_4CH_1PIXEL(mlib_filters_table_s16_4);
-      RESULT_4BC_S16_1PIXEL();
-      *dstPixelPtr++ = res;
-    }
-
-    mlib_ImageColorTrue2IndexLine_S16_U8_4((mlib_s16 *)dstRowPtr,
-                                           dstIndexPtr,
-                                           xRight - xLeft + 1,
-                                           colormap);
-  }
-
-  if (dstRowPtr != dstRowData) mlib_free(dstRowPtr);
-
-  return MLIB_SUCCESS;
-}
-
-/***************************************************************/
-#undef  MLIB_TYPE
-#define MLIB_TYPE mlib_s16
-
-/***************************************************************/
-#undef  FILTER_SHIFT
-#define FILTER_SHIFT  5
-#undef  FILTER_MASK
-#define FILTER_MASK   (((1 << 8) - 1) << 3)
-
-/***************************************************************/
-mlib_status mlib_ImageAffineIndex_S16_U8_3CH_BC(mlib_affine_param *param,
-                                                const void        *colormap)
-{
-  DECLAREVAR();
-  DECLAREVAR_U8();
-  mlib_f32  *flut   = (mlib_f32 *)mlib_ImageGetLutNormalTable(colormap) -
-  mlib_ImageGetLutOffset(colormap);
-  mlib_d64  dstRowData[MLIB_LIMIT/2];
-  mlib_d64  *dstRowPtr = dstRowData;
-  const mlib_s16 *mlib_filters_table_u8, *mlib_filters_table_u8_4;
-
-  if (filter == MLIB_BICUBIC) {
-    mlib_filters_table_u8   = mlib_filters_u8_bc;
-    mlib_filters_table_u8_4 = mlib_filters_u8_bc_4;
-  } else {
-    mlib_filters_table_u8   = mlib_filters_u8_bc2;
-    mlib_filters_table_u8_4 = mlib_filters_u8_bc2_4;
-  }
-
-  srcYStride >>= 1;
-
-  if (max_xsize > MLIB_LIMIT) {
-    dstRowPtr = mlib_malloc(sizeof(mlib_d64) * ((max_xsize + 1) >> 1));
-
-    if (dstRowPtr == NULL) return MLIB_FAILURE;
-  }
-
-  vis_write_gsr(3 << 3);
-
-  for (j = yStart; j <= yFinish; j++) {
-
-    CLIP();
-
-    cols = xRight - xLeft + 1;
-
-    i = 0;
-
-    if (i <= cols - 6) {
-
-      NEXT_PIXEL_4BC();
-      LOAD_BC_U8_4CH_1PIXEL(mlib_filters_table_u8, mlib_filters_table_u8_4);
-
-      NEXT_PIXEL_4BC();
-
-      BC_U8_4CH(0, mlib_filters_table_u8, mlib_filters_table_u8_4);
-      BC_U8_4CH(1, mlib_filters_table_u8, mlib_filters_table_u8_4);
-      FADD_4BC_U8();
-
-      BC_U8_4CH(0, mlib_filters_table_u8, mlib_filters_table_u8_4);
-      BC_U8_4CH(1, mlib_filters_table_u8, mlib_filters_table_u8_4);
-
-#pragma pipeloop(0)
-      for (; i <= cols-8; i += 2) {
-        *dstPixelPtr++ = res;
-
-        FADD_4BC_U8();
-        BC_U8_4CH(0, mlib_filters_table_u8, mlib_filters_table_u8_4);
-        BC_U8_4CH(1, mlib_filters_table_u8, mlib_filters_table_u8_4);
-      }
-
-      *dstPixelPtr++ = res;
-
-      FADD_4BC_U8();
-      *dstPixelPtr++ = res;
-
-      RESULT_4BC_U8_1PIXEL(0);
-      LOAD_BC_U8_4CH_1PIXEL(mlib_filters_table_u8, mlib_filters_table_u8_4);
-      RESULT_4BC_U8_1PIXEL(1);
-      FADD_4BC_U8();
-
-      *dstPixelPtr++ = res;
-      i += 6;
-    }
-
-    if (i <= cols-4) {
-      NEXT_PIXEL_4BC();
-      LOAD_BC_U8_4CH_1PIXEL(mlib_filters_table_u8, mlib_filters_table_u8_4);
-
-      NEXT_PIXEL_4BC();
-
-      BC_U8_4CH(0, mlib_filters_table_u8, mlib_filters_table_u8_4);
-      BC_U8_4CH(1, mlib_filters_table_u8, mlib_filters_table_u8_4);
-      FADD_4BC_U8();
-      *dstPixelPtr++ = res;
-
-      RESULT_4BC_U8_1PIXEL(0);
-      LOAD_BC_U8_4CH_1PIXEL(mlib_filters_table_u8, mlib_filters_table_u8_4);
-      RESULT_4BC_U8_1PIXEL(1);
-      FADD_4BC_U8();
-
-      *dstPixelPtr++ = res;
-      i += 4;
-    }
-
-    if (i <= cols-2) {
-      NEXT_PIXEL_4BC();
-      LOAD_BC_U8_4CH_1PIXEL(mlib_filters_table_u8, mlib_filters_table_u8_4);
-      RESULT_4BC_U8_1PIXEL(0);
-
-      NEXT_PIXEL_4BC();
-      LOAD_BC_U8_4CH_1PIXEL(mlib_filters_table_u8, mlib_filters_table_u8_4);
-      RESULT_4BC_U8_1PIXEL(1);
-      FADD_4BC_U8();
-
-      *dstPixelPtr++ = res;
-      i += 2;
-    }
-
-    if (i < cols) {
-      NEXT_PIXEL_4BC();
-      LOAD_BC_U8_4CH_1PIXEL(mlib_filters_table_u8, mlib_filters_table_u8_4);
-      RESULT_4BC_U8_1PIXEL(0);
-
-      d0 = vis_fpadd16(d00, d10);
-      d1 = vis_fpadd16(d20, d30);
-      d0 = vis_fpadd16(d0, d1);
-      res = vis_fpack16_pair(d0, d0);
-      *dstPixelPtr++ = res;
-    }
-
-    mlib_ImageColorTrue2IndexLine_U8_S16_3_in_4((mlib_u8 *)dstRowPtr,
-                                                dstIndexPtr,
-                                                xRight - xLeft + 1,
-                                                colormap);
-  }
-
-  if (dstRowPtr != dstRowData) mlib_free(dstRowPtr);
-
-  return MLIB_SUCCESS;
-}
-
-/***************************************************************/
-#undef  FILTER_SHIFT
-#define FILTER_SHIFT  4
-#undef  FILTER_MASK
-#define FILTER_MASK   (((1 << 9) - 1) << 3)
-
-/***************************************************************/
-mlib_status mlib_ImageAffineIndex_S16_S16_3CH_BC(mlib_affine_param *param,
-                                                 const void        *colormap)
-{
-  DECLAREVAR();
-  DECLAREVAR_S16();
-  mlib_d64 *flut   = (mlib_d64 *)mlib_ImageGetLutNormalTable(colormap) -
-  mlib_ImageGetLutOffset(colormap);
-  mlib_d64 dstRowData[MLIB_LIMIT];
-  mlib_d64 *dstRowPtr = dstRowData;
-  const mlib_s16 *mlib_filters_table_s16_4;
-
-  if (filter == MLIB_BICUBIC) {
-    mlib_filters_table_s16_4 = mlib_filters_s16_bc_4;
-  } else {
-    mlib_filters_table_s16_4 = mlib_filters_s16_bc2_4;
-  }
-
-  srcYStride >>= 1;
-
-  if (max_xsize > MLIB_LIMIT) {
-    dstRowPtr = mlib_malloc(sizeof(mlib_d64) * max_xsize);
-
-    if (dstRowPtr == NULL) return MLIB_FAILURE;
-  }
-
-  for (j = yStart; j <= yFinish; j++) {
-
-    CLIP();
-
-    vis_write_gsr(10 << 3);
-
-    cols = xRight - xLeft + 1;
-    i = 0;
-
-    if (i <= cols - 4) {
-
-      NEXT_PIXEL_4BC();
-      LOAD_BC_S16_4CH_1PIXEL(mlib_filters_table_s16_4);
-
-      NEXT_PIXEL_4BC();
-
-      BC_S16_4CH(mlib_filters_table_s16_4);
-      FADD_4BC_S16();
-
-      BC_S16_4CH(mlib_filters_table_s16_4);
-
-#pragma pipeloop(0)
-
-      for (; i < cols-4; i++) {
-        *dstPixelPtr++ = res;
-
-        FADD_4BC_S16();
-        BC_S16_4CH(mlib_filters_table_s16_4);
-      }
-
-      *dstPixelPtr++ = res;
-
-      FADD_4BC_S16();
-      *dstPixelPtr++ = res;
-
-      RESULT_4BC_S16_1PIXEL();
-      *dstPixelPtr++ = res;
-
-      LOAD_BC_S16_4CH_1PIXEL(mlib_filters_table_s16_4);
-      RESULT_4BC_S16_1PIXEL();
-      *dstPixelPtr++ = res;
-      i += 4;
-    }
-
-#pragma pipeloop(0)
-    for (; i < cols; i++) {
-      NEXT_PIXEL_4BC();
-      LOAD_BC_S16_4CH_1PIXEL(mlib_filters_table_s16_4);
-      RESULT_4BC_S16_1PIXEL();
-      *dstPixelPtr++ = res;
-    }
-
-    mlib_ImageColorTrue2IndexLine_S16_S16_3_in_4((mlib_s16 *)dstRowPtr,
-                                                 dstIndexPtr,
-                                                 xRight - xLeft + 1,
-                                                 colormap);
-  }
-
-  if (dstRowPtr != dstRowData) mlib_free(dstRowPtr);
-
-  return MLIB_SUCCESS;
-}
-
-/***************************************************************/
-#undef  FILTER_SHIFT
-#define FILTER_SHIFT  5
-#undef  FILTER_MASK
-#define FILTER_MASK   (((1 << 8) - 1) << 3)
-
-/***************************************************************/
-mlib_status mlib_ImageAffineIndex_S16_U8_4CH_BC(mlib_affine_param *param,
-                                                const void        *colormap)
-{
-  DECLAREVAR();
-  DECLAREVAR_U8();
-  mlib_f32  *flut   = (mlib_f32 *)mlib_ImageGetLutNormalTable(colormap) -
-  mlib_ImageGetLutOffset(colormap);
-  mlib_d64  dstRowData[MLIB_LIMIT/2];
-  mlib_d64  *dstRowPtr = dstRowData;
-  const mlib_s16 *mlib_filters_table_u8, *mlib_filters_table_u8_4;
-
-  if (filter == MLIB_BICUBIC) {
-    mlib_filters_table_u8   = mlib_filters_u8_bc;
-    mlib_filters_table_u8_4 = mlib_filters_u8_bc_4;
-  } else {
-    mlib_filters_table_u8   = mlib_filters_u8_bc2;
-    mlib_filters_table_u8_4 = mlib_filters_u8_bc2_4;
-  }
-
-  srcYStride >>= 1;
-
-  if (max_xsize > MLIB_LIMIT) {
-    dstRowPtr = mlib_malloc(sizeof(mlib_d64) * ((max_xsize + 1) >> 1));
-
-    if (dstRowPtr == NULL) return MLIB_FAILURE;
-  }
-
-  vis_write_gsr(3 << 3);
-
-  for (j = yStart; j <= yFinish; j++) {
-
-    CLIP();
-
-    cols = xRight - xLeft + 1;
-
-    i = 0;
-
-    if (i <= cols - 6) {
-
-      NEXT_PIXEL_4BC();
-      LOAD_BC_U8_4CH_1PIXEL(mlib_filters_table_u8, mlib_filters_table_u8_4);
-
-      NEXT_PIXEL_4BC();
-
-      BC_U8_4CH(0, mlib_filters_table_u8, mlib_filters_table_u8_4);
-      BC_U8_4CH(1, mlib_filters_table_u8, mlib_filters_table_u8_4);
-      FADD_4BC_U8();
-
-      BC_U8_4CH(0, mlib_filters_table_u8, mlib_filters_table_u8_4);
-      BC_U8_4CH(1, mlib_filters_table_u8, mlib_filters_table_u8_4);
-
-#pragma pipeloop(0)
-      for (; i <= cols-8; i += 2) {
-        *dstPixelPtr++ = res;
-
-        FADD_4BC_U8();
-        BC_U8_4CH(0, mlib_filters_table_u8, mlib_filters_table_u8_4);
-        BC_U8_4CH(1, mlib_filters_table_u8, mlib_filters_table_u8_4);
-      }
-
-      *dstPixelPtr++ = res;
-
-      FADD_4BC_U8();
-      *dstPixelPtr++ = res;
-
-      RESULT_4BC_U8_1PIXEL(0);
-      LOAD_BC_U8_4CH_1PIXEL(mlib_filters_table_u8, mlib_filters_table_u8_4);
-      RESULT_4BC_U8_1PIXEL(1);
-      FADD_4BC_U8();
-
-      *dstPixelPtr++ = res;
-      i += 6;
-    }
-
-    if (i <= cols-4) {
-      NEXT_PIXEL_4BC();
-      LOAD_BC_U8_4CH_1PIXEL(mlib_filters_table_u8, mlib_filters_table_u8_4);
-
-      NEXT_PIXEL_4BC();
-
-      BC_U8_4CH(0, mlib_filters_table_u8, mlib_filters_table_u8_4);
-      BC_U8_4CH(1, mlib_filters_table_u8, mlib_filters_table_u8_4);
-      FADD_4BC_U8();
-      *dstPixelPtr++ = res;
-
-      RESULT_4BC_U8_1PIXEL(0);
-      LOAD_BC_U8_4CH_1PIXEL(mlib_filters_table_u8, mlib_filters_table_u8_4);
-      RESULT_4BC_U8_1PIXEL(1);
-      FADD_4BC_U8();
-
-      *dstPixelPtr++ = res;
-      i += 4;
-    }
-
-    if (i <= cols-2) {
-      NEXT_PIXEL_4BC();
-      LOAD_BC_U8_4CH_1PIXEL(mlib_filters_table_u8, mlib_filters_table_u8_4);
-      RESULT_4BC_U8_1PIXEL(0);
-
-      NEXT_PIXEL_4BC();
-      LOAD_BC_U8_4CH_1PIXEL(mlib_filters_table_u8, mlib_filters_table_u8_4);
-      RESULT_4BC_U8_1PIXEL(1);
-      FADD_4BC_U8();
-
-      *dstPixelPtr++ = res;
-      i += 2;
-    }
-
-    if (i < cols) {
-      NEXT_PIXEL_4BC();
-      LOAD_BC_U8_4CH_1PIXEL(mlib_filters_table_u8, mlib_filters_table_u8_4);
-      RESULT_4BC_U8_1PIXEL(0);
-
-      d0 = vis_fpadd16(d00, d10);
-      d1 = vis_fpadd16(d20, d30);
-      d0 = vis_fpadd16(d0, d1);
-      res = vis_fpack16_pair(d0, d0);
-      *dstPixelPtr++ = res;
-    }
-
-    mlib_ImageColorTrue2IndexLine_U8_S16_4((mlib_u8 *)dstRowPtr,
-                                           dstIndexPtr,
-                                           xRight - xLeft + 1,
-                                           colormap);
-  }
-
-  if (dstRowPtr != dstRowData) mlib_free(dstRowPtr);
-
-  return MLIB_SUCCESS;
-}
-
-/***************************************************************/
-#undef  FILTER_SHIFT
-#define FILTER_SHIFT  4
-#undef  FILTER_MASK
-#define FILTER_MASK   (((1 << 9) - 1) << 3)
-
-/***************************************************************/
-mlib_status mlib_ImageAffineIndex_S16_S16_4CH_BC(mlib_affine_param *param,
-                                                 const void        *colormap)
-{
-  DECLAREVAR();
-  DECLAREVAR_S16();
-  mlib_d64 *flut   = (mlib_d64 *)mlib_ImageGetLutNormalTable(colormap) -
-  mlib_ImageGetLutOffset(colormap);
-  mlib_d64 dstRowData[MLIB_LIMIT];
-  mlib_d64 *dstRowPtr = dstRowData;
-  const mlib_s16 *mlib_filters_table_s16_4;
-
-  if (filter == MLIB_BICUBIC) {
-    mlib_filters_table_s16_4 = mlib_filters_s16_bc_4;
-  } else {
-    mlib_filters_table_s16_4 = mlib_filters_s16_bc2_4;
-  }
-
-  srcYStride >>= 1;
-
-  if (max_xsize > MLIB_LIMIT) {
-    dstRowPtr = mlib_malloc(sizeof(mlib_d64) * max_xsize);
-
-    if (dstRowPtr == NULL) return MLIB_FAILURE;
-  }
-
-  for (j = yStart; j <= yFinish; j++) {
-
-    CLIP();
-
-    vis_write_gsr(10 << 3);
-
-    cols = xRight - xLeft + 1;
-    i = 0;
-
-    if (i <= cols - 4) {
-
-      NEXT_PIXEL_4BC();
-      LOAD_BC_S16_4CH_1PIXEL(mlib_filters_table_s16_4);
-
-      NEXT_PIXEL_4BC();
-
-      BC_S16_4CH(mlib_filters_table_s16_4);
-      FADD_4BC_S16();
-
-      BC_S16_4CH(mlib_filters_table_s16_4);
-
-#pragma pipeloop(0)
-
-      for (; i < cols-4; i++) {
-        *dstPixelPtr++ = res;
-
-        FADD_4BC_S16();
-        BC_S16_4CH(mlib_filters_table_s16_4);
-      }
-
-      *dstPixelPtr++ = res;
-
-      FADD_4BC_S16();
-      *dstPixelPtr++ = res;
-
-      RESULT_4BC_S16_1PIXEL();
-      *dstPixelPtr++ = res;
-
-      LOAD_BC_S16_4CH_1PIXEL(mlib_filters_table_s16_4);
-      RESULT_4BC_S16_1PIXEL();
-      *dstPixelPtr++ = res;
-      i += 4;
-    }
-
-#pragma pipeloop(0)
-    for (; i < cols; i++) {
-      NEXT_PIXEL_4BC();
-      LOAD_BC_S16_4CH_1PIXEL(mlib_filters_table_s16_4);
-      RESULT_4BC_S16_1PIXEL();
-      *dstPixelPtr++ = res;
-    }
-
-    mlib_ImageColorTrue2IndexLine_S16_S16_4((mlib_s16 *)dstRowPtr,
-                                            dstIndexPtr,
-                                            xRight - xLeft + 1,
-                                            colormap);
-  }
-
-  if (dstRowPtr != dstRowData) mlib_free(dstRowPtr);
-
-  return MLIB_SUCCESS;
-}
-
-/***************************************************************/
--- a/jdk/src/java.desktop/unix/native/libmlib_image/mlib_v_ImageAffine_BL.c	Thu May 12 11:03:07 2016 -0700
+++ b/jdk/src/java.desktop/unix/native/libmlib_image/mlib_v_ImageAffine_BL.c	Fri May 13 11:31:05 2016 +0300
@@ -33,7 +33,6 @@
 
 #include "vis_proto.h"
 #include "mlib_image.h"
-#include "mlib_ImageColormap.h"
 #include "mlib_ImageCopy.h"
 #include "mlib_ImageAffine.h"
 #include "mlib_v_ImageFilters.h"
@@ -719,134 +718,3 @@
 }
 
 /***************************************************************/
-#define LUT(x)  plut[x]
-
-mlib_status FUN_NAME(u8_i)(mlib_affine_param *param,
-                           const void        *colormap)
-{
-  DECLAREVAR();
-  mlib_s32 nchan   = mlib_ImageGetLutChannels(colormap);
-  mlib_s32 lut_off = mlib_ImageGetLutOffset(colormap);
-  mlib_f32 *plut = (mlib_f32*)mlib_ImageGetLutNormalTable(colormap) - lut_off;
-  mlib_s32 max_xsize = param -> max_xsize;
-  mlib_f32 buff[BUF_SIZE], *pbuff = buff;
-
-  if (max_xsize > BUF_SIZE) {
-    pbuff = mlib_malloc(max_xsize*sizeof(mlib_f32));
-
-    if (pbuff == NULL) return MLIB_FAILURE;
-  }
-
-  vis_write_gsr(3 << 3);
-
-  for (j = yStart; j <= yFinish; j++) {
-    mlib_f32 s0, s1, s2, s3;
-    DTYPE    *sp;
-
-    NEW_LINE(1);
-
-#pragma pipeloop(0)
-    for (i = 0; i < size; i++) {
-      GET_FILTER_XY();
-
-      sp = *(DTYPE**)((mlib_u8*)lineAddr + PTR_SHIFT(Y)) + (X >> MLIB_SHIFT);
-      s0 = LUT(sp[0]);
-      s1 = LUT(sp[1]);
-      s2 = LUT(sp[srcYStride]);
-      s3 = LUT(sp[srcYStride + 1]);
-
-      PROCESS_4CH(s0, s1, s2, s3);
-
-      pbuff[i] = vis_fpack16(dd);
-      X += dX;
-      Y += dY;
-    }
-
-    if (nchan == 3) {
-      mlib_ImageColorTrue2IndexLine_U8_U8_3_in_4((void*)pbuff, (void*)dl, size, colormap);
-    } else {
-      mlib_ImageColorTrue2IndexLine_U8_U8_4((void*)pbuff, (void*)dl, size, colormap);
-    }
-  }
-
-  if (pbuff != buff) {
-    mlib_free(pbuff);
-  }
-
-  return MLIB_SUCCESS;
-}
-
-/***************************************************************/
-#undef  DTYPE
-#define DTYPE mlib_s16
-
-mlib_status FUN_NAME(s16_i)(mlib_affine_param *param,
-                            const void        *colormap)
-{
-  DECLAREVAR();
-  mlib_s32 nchan   = mlib_ImageGetLutChannels(colormap);
-  mlib_s32 lut_off = mlib_ImageGetLutOffset(colormap);
-  mlib_f32 *plut = (mlib_f32*)mlib_ImageGetLutNormalTable(colormap) - lut_off;
-  mlib_s32 max_xsize = param -> max_xsize;
-  mlib_f32 buff[BUF_SIZE], *pbuff = buff;
-
-  srcYStride /= sizeof(DTYPE);
-
-  if (max_xsize > BUF_SIZE) {
-    pbuff = mlib_malloc(max_xsize*sizeof(mlib_f32));
-
-    if (pbuff == NULL) return MLIB_FAILURE;
-  }
-
-  vis_write_gsr(3 << 3);
-
-  for (j = yStart; j <= yFinish; j++) {
-    mlib_f32 s0, s1, s2, s3;
-    DTYPE    *sp;
-
-    NEW_LINE(1);
-
-#pragma pipeloop(0)
-    for (i = 0; i < size; i++) {
-      GET_FILTER_XY();
-
-      sp = *(DTYPE**)((mlib_u8*)lineAddr + PTR_SHIFT(Y)) + (X >> MLIB_SHIFT);
-      s0 = LUT(sp[0]);
-      s1 = LUT(sp[1]);
-      s2 = LUT(sp[srcYStride]);
-      s3 = LUT(sp[srcYStride + 1]);
-
-      PROCESS_4CH(s0, s1, s2, s3);
-
-      pbuff[i] = vis_fpack16(dd);
-      X += dX;
-      Y += dY;
-    }
-
-    if (nchan == 3) {
-      mlib_ImageColorTrue2IndexLine_U8_S16_3_in_4((void*)pbuff, (void*)dl, size, colormap);
-    } else {
-      mlib_ImageColorTrue2IndexLine_U8_S16_4((void*)pbuff, (void*)dl, size, colormap);
-    }
-  }
-
-  if (pbuff != buff) {
-    mlib_free(pbuff);
-  }
-
-  return MLIB_SUCCESS;
-}
-
-/***************************************************************/
-const type_affine_i_fun mlib_AffineFunArr_bl_i[] = {
-  mlib_ImageAffine_u8_u8_i_bl,
-  mlib_ImageAffine_u8_u8_i_bl,
-  mlib_ImageAffine_u8_s16_i_bl,
-  mlib_ImageAffine_u8_s16_i_bl,
-  mlib_ImageAffine_s16_u8_i_bl,
-  mlib_ImageAffine_s16_u8_i_bl,
-  mlib_ImageAffine_s16_s16_i_bl,
-  mlib_ImageAffine_s16_s16_i_bl
-};
-
-/***************************************************************/
--- a/jdk/src/java.desktop/unix/native/libmlib_image/mlib_v_ImageAffine_BL_S16.c	Thu May 12 11:03:07 2016 -0700
+++ b/jdk/src/java.desktop/unix/native/libmlib_image/mlib_v_ImageAffine_BL_S16.c	Fri May 13 11:31:05 2016 +0300
@@ -33,7 +33,6 @@
 
 #include "vis_proto.h"
 #include "mlib_image.h"
-#include "mlib_ImageColormap.h"
 #include "mlib_ImageCopy.h"
 #include "mlib_ImageAffine.h"
 #include "mlib_v_ImageFilters.h"
@@ -716,128 +715,3 @@
 }
 
 /***************************************************************/
-#define LUT(x)  plut[x]
-
-mlib_status FUN_NAME(s16_i)(mlib_affine_param *param,
-                            const void        *colormap)
-{
-  DECLAREVAR();
-  mlib_s32 nchan   = mlib_ImageGetLutChannels(colormap);
-  mlib_s32 lut_off = mlib_ImageGetLutOffset(colormap);
-  mlib_d64 *plut = (mlib_d64*)mlib_ImageGetLutNormalTable(colormap) - lut_off;
-  mlib_s32 max_xsize = param -> max_xsize;
-  mlib_d64 buff[BUF_SIZE], *pbuff = buff;
-
-  srcYStride /= sizeof(DTYPE);
-
-  if (max_xsize > BUF_SIZE) {
-    pbuff = mlib_malloc(max_xsize*sizeof(mlib_d64));
-
-    if (pbuff == NULL) return MLIB_FAILURE;
-  }
-
-  dX = (dX - (dX >> 31)) &~ 1; /* rounding towards ZERO */
-  dY = (dY - (dY >> 31)) &~ 1; /* rounding towards ZERO */
-  dx64 = vis_to_double_dup((((dX >> 1) & 0xFFFF) << 16) | ((dX >> 1) & 0xFFFF));
-  dy64 = vis_to_double_dup((((dY >> 1) & 0xFFFF) << 16) | ((dY >> 1) & 0xFFFF));
-
-  for (j = yStart; j <= yFinish; j++) {
-    DTYPE *sp;
-
-    NEW_LINE(1);
-
-    deltax = DOUBLE_4U16(X, X, X, X);
-    deltay = DOUBLE_4U16(Y, Y, Y, Y);
-
-#pragma pipeloop(0)
-    for (i = 0; i < size; i++) {
-      sp = *(DTYPE**)((mlib_u8*)lineAddr + PTR_SHIFT(Y)) + (X >> MLIB_SHIFT);
-      s0 = LUT(sp[0]);
-      s1 = LUT(sp[1]);
-      s2 = LUT(sp[srcYStride]);
-      s3 = LUT(sp[srcYStride + 1]);
-
-      BL_SUM();
-
-      pbuff[i] = dd;
-      X += dX;
-      Y += dY;
-    }
-
-    if (nchan == 3) {
-      mlib_ImageColorTrue2IndexLine_S16_S16_3_in_4((void*)pbuff, (void*)dl, size, colormap);
-    } else {
-      mlib_ImageColorTrue2IndexLine_S16_S16_4((void*)pbuff, (void*)dl, size, colormap);
-    }
-  }
-
-  if (pbuff != buff) {
-    mlib_free(pbuff);
-  }
-
-  return MLIB_SUCCESS;
-}
-
-/***************************************************************/
-#undef  DTYPE
-#define DTYPE mlib_u8
-
-mlib_status FUN_NAME(u8_i)(mlib_affine_param *param,
-                           const void        *colormap)
-{
-  DECLAREVAR();
-  mlib_s32 nchan   = mlib_ImageGetLutChannels(colormap);
-  mlib_s32 lut_off = mlib_ImageGetLutOffset(colormap);
-  mlib_d64 *plut = (mlib_d64*)mlib_ImageGetLutNormalTable(colormap) - lut_off;
-  mlib_s32 max_xsize = param -> max_xsize;
-  mlib_d64 buff[BUF_SIZE], *pbuff = buff;
-
-  if (max_xsize > BUF_SIZE) {
-    pbuff = mlib_malloc(max_xsize*sizeof(mlib_d64));
-
-    if (pbuff == NULL) return MLIB_FAILURE;
-  }
-
-  dX = (dX - (dX >> 31)) &~ 1; /* rounding towards ZERO */
-  dY = (dY - (dY >> 31)) &~ 1; /* rounding towards ZERO */
-  dx64 = vis_to_double_dup((((dX >> 1) & 0xFFFF) << 16) | ((dX >> 1) & 0xFFFF));
-  dy64 = vis_to_double_dup((((dY >> 1) & 0xFFFF) << 16) | ((dY >> 1) & 0xFFFF));
-
-  for (j = yStart; j <= yFinish; j++) {
-    DTYPE *sp;
-
-    NEW_LINE(1);
-
-    deltax = DOUBLE_4U16(X, X, X, X);
-    deltay = DOUBLE_4U16(Y, Y, Y, Y);
-
-#pragma pipeloop(0)
-    for (i = 0; i < size; i++) {
-      sp = *(DTYPE**)((mlib_u8*)lineAddr + PTR_SHIFT(Y)) + (X >> MLIB_SHIFT);
-      s0 = LUT(sp[0]);
-      s1 = LUT(sp[1]);
-      s2 = LUT(sp[srcYStride]);
-      s3 = LUT(sp[srcYStride + 1]);
-
-      BL_SUM();
-
-      pbuff[i] = dd;
-      X += dX;
-      Y += dY;
-    }
-
-    if (nchan == 3) {
-      mlib_ImageColorTrue2IndexLine_S16_U8_3_in_4((void*)pbuff, (void*)dl, size, colormap);
-    } else {
-      mlib_ImageColorTrue2IndexLine_S16_U8_4((void*)pbuff, (void*)dl, size, colormap);
-    }
-  }
-
-  if (pbuff != buff) {
-    mlib_free(pbuff);
-  }
-
-  return MLIB_SUCCESS;
-}
-
-/***************************************************************/
--- a/jdk/src/java.desktop/unix/native/libmlib_image/mlib_v_ImageAffine_BL_U16.c	Thu May 12 11:03:07 2016 -0700
+++ b/jdk/src/java.desktop/unix/native/libmlib_image/mlib_v_ImageAffine_BL_U16.c	Fri May 13 11:31:05 2016 +0300
@@ -33,7 +33,6 @@
 
 #include "vis_proto.h"
 #include "mlib_image.h"
-#include "mlib_ImageColormap.h"
 #include "mlib_ImageCopy.h"
 #include "mlib_ImageAffine.h"
 #include "mlib_v_ImageFilters.h"
--- a/jdk/src/java.desktop/unix/native/libmlib_image/mlib_v_ImageChannelExtract.c	Thu May 12 11:03:07 2016 -0700
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,825 +0,0 @@
-/*
- * Copyright (c) 1998, 2003, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.  Oracle designates this
- * particular file as subject to the "Classpath" exception as provided
- * by Oracle in the LICENSE file that accompanied this code.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- */
-
-
-/*
- * FUNCTIONS
- *      mlib_ImageChannelExtract  - Copy the selected channels of the source
- *                                  image into the destination image
- *
- * SYNOPSIS
- *      mlib_status mlib_ImageChannelExtract(mlib_image *dst,
- *                                           mlib_image *src,
- *                                           mlib_s32   cmask);
- * ARGUMENT
- *    dst     Pointer to destination image.
- *    src     Pointer to source image.
- *    cmask   Source channel selection mask.
- *    The least significant bit (LSB) is corresponding to the
- *    last channel in the source image data.
- *    The bits with value 1 stand for the channels selected.
- *    If more than N channels are selected, the leftmost N
- *    channels are extracted, where N is the number of channels
- *    in the destination image.
- *
- * RESTRICTION
- *    The src and dst must have the same width, height and data type.
- *    The src and dst can have 1, 2, 3 or 4 channels.
- *    The src and dst can be either MLIB_BYTE, MLIB_SHORT,  MLIB_INT,
- *    MLIB_FLOAT or  MLIB_DOUBLE.
- *
- * DESCRIPTION
- *    Copy the selected channels of the source image into the
- *    destination image
- */
-
-#include <stdlib.h>
-#include "mlib_image.h"
-#include "mlib_ImageCheck.h"
-
-/***************************************************************/
-/* functions defined in mlib_ImageChannelExtract_1.c */
-
-void
-mlib_v_ImageChannelExtract_U8(mlib_u8  *src,   mlib_s32 slb,
-                              mlib_u8  *dst,   mlib_s32 dlb,
-                              mlib_s32 channels, mlib_s32 channeld,
-                              mlib_s32 width,   mlib_s32 height,
-                              mlib_s32 cmask);
-void
-mlib_v_ImageChannelExtract_S16(mlib_u16 *src,    mlib_s32 slb,
-                               mlib_u16 *dst,    mlib_s32 dlb,
-                               mlib_s32 channels, mlib_s32 channeld,
-                               mlib_s32 width,    mlib_s32 height,
-                               mlib_s32 cmask);
-void
-mlib_v_ImageChannelExtract_S32(mlib_s32 *src,    mlib_s32 slb,
-                               mlib_s32 *dst,    mlib_s32 dlb,
-                               mlib_s32 channels, mlib_s32 channeld,
-                               mlib_s32 width,    mlib_s32 height,
-                               mlib_s32 cmask);
-void
-mlib_v_ImageChannelExtract_D64(mlib_d64 *src,    mlib_s32 slb,
-                               mlib_d64 *dst,    mlib_s32 dlb,
-                               mlib_s32 channels, mlib_s32 channeld,
-                               mlib_s32 width,    mlib_s32 height,
-                               mlib_s32 cmask);
-
-/***************************************************************/
-
-void mlib_v_ImageChannelExtract_U8_2_1(mlib_u8  *sl,  mlib_s32 slb,
-                                       mlib_u8 *dl,  mlib_s32 dlb,
-                                       mlib_s32 width,   mlib_s32 height);
-
-void mlib_v_ImageChannelExtract_U8_3_2(mlib_u8  *sl,  mlib_s32 slb,
-                                       mlib_u8 *dl,  mlib_s32 dlb,
-                                       mlib_s32 width,   mlib_s32 height,
-                                       mlib_s32 count1);
-
-void mlib_v_ImageChannelExtract_U8_4_2(mlib_u8  *sl,  mlib_s32 slb,
-                                       mlib_u8  *dl,  mlib_s32 dlb,
-                                       mlib_s32 width,   mlib_s32 height,
-                                       mlib_s32 count1);
-
-void mlib_v_ImageChannelExtract_32_2_1(mlib_f32 *sl,  mlib_s32 slb,
-                                       mlib_f32 *dl,   mlib_s32 dlb,
-                                       mlib_s32 width, mlib_s32 height);
-
-void mlib_v_ImageChannelExtract_32_3_1(mlib_f32 *sl,  mlib_s32 slb,
-                                       mlib_f32 *dl,   mlib_s32 dlb,
-                                       mlib_s32 width, mlib_s32 height);
-
-void mlib_v_ImageChannelExtract_32_3_2(mlib_f32 *sp, mlib_s32 slb,
-                                       mlib_f32 *dp, mlib_s32 dlb,
-                                       mlib_s32 width, mlib_s32 height,
-                                       mlib_s32 deltac1);
-
-void mlib_v_ImageChannelExtract_32_4_1(mlib_f32 *sl,  mlib_s32 slb,
-                                       mlib_f32 *dl,   mlib_s32 dlb,
-                                       mlib_s32 width, mlib_s32 height);
-
-void mlib_v_ImageChannelExtract_32_4_2(mlib_f32 *sp, mlib_s32 slb,
-                                       mlib_f32 *dp, mlib_s32 dlb,
-                                       mlib_s32 width, mlib_s32 height,
-                                       mlib_s32 deltac1);
-
-void mlib_v_ImageChannelExtract_32_4_3(mlib_f32 *sl,  mlib_s32 slb,
-                                       mlib_f32 *dl,   mlib_s32 dlb,
-                                       mlib_s32 width, mlib_s32 height,
-                                       mlib_s32  mask_off);
-
-/***************************************************************/
-
-void
-mlib_v_ImageChannelExtract_U8_21_A8D1X8(mlib_u8  *src,
-                                        mlib_u8  *dst,
-                                        mlib_s32 dsize,
-                                        mlib_s32 cmask);
-void
-mlib_v_ImageChannelExtract_U8_21_A8D2X8(mlib_u8  *src,  mlib_s32 slb,
-                                        mlib_u8  *dst,  mlib_s32 dlb,
-                                        mlib_s32 xsize, mlib_s32 ysize,
-                                        mlib_s32 cmask);
-void
-mlib_v_ImageChannelExtract_U8_21_D1(mlib_u8  *src,
-                                    mlib_u8  *dst,
-                                    mlib_s32 dsize,
-                                    mlib_s32 cmask);
-void
-mlib_v_ImageChannelExtract_U8_21(mlib_u8  *src,  mlib_s32 slb,
-                                 mlib_u8  *dst,  mlib_s32 dlb,
-                                 mlib_s32 xsize, mlib_s32 ysize,
-                                 mlib_s32 cmask);
-void
-mlib_v_ImageChannelExtract_U8_31_A8D1X8(mlib_u8  *src,
-                                        mlib_u8  *dst,
-                                        mlib_s32 dsize,
-                                        mlib_s32 cmask);
-void
-mlib_v_ImageChannelExtract_U8_31_A8D2X8(mlib_u8  *src,  mlib_s32 slb,
-                                        mlib_u8  *dst,  mlib_s32 dlb,
-                                        mlib_s32 xsize, mlib_s32 ysize,
-                                        mlib_s32 cmask);
-void
-mlib_v_ImageChannelExtract_U8_31_D1(mlib_u8  *src,
-                                    mlib_u8  *dst,
-                                    mlib_s32 dsize,
-                                    mlib_s32 cmask);
-void
-mlib_v_ImageChannelExtract_U8_31(mlib_u8  *src,  mlib_s32 slb,
-                                 mlib_u8  *dst,  mlib_s32 dlb,
-                                 mlib_s32 xsize, mlib_s32 ysize,
-                                 mlib_s32 cmask);
-void
-mlib_v_ImageChannelExtract_U8_41_A8D1X8(mlib_u8  *src,
-                                        mlib_u8  *dst,
-                                        mlib_s32 dsize,
-                                        mlib_s32 cmask);
-void
-mlib_v_ImageChannelExtract_U8_41_A8D2X8(mlib_u8  *src,  mlib_s32 slb,
-                                        mlib_u8  *dst,  mlib_s32 dlb,
-                                        mlib_s32 xsize, mlib_s32 ysize,
-                                        mlib_s32 cmask);
-void
-mlib_v_ImageChannelExtract_U8_41_D1(mlib_u8  *src,
-                                    mlib_u8  *dst,
-                                    mlib_s32 dsize,
-                                    mlib_s32 cmask);
-void
-mlib_v_ImageChannelExtract_U8_41(mlib_u8  *src,  mlib_s32 slb,
-                                 mlib_u8  *dst,  mlib_s32 dlb,
-                                 mlib_s32 xsize, mlib_s32 ysize,
-                                 mlib_s32 cmask);
-void
-mlib_v_ImageChannelExtract_S16_11_A8D1X4(mlib_s16 *src, mlib_s16 *dst,
-                                         mlib_s32 dsize);
-void
-mlib_v_ImageChannelExtract_S16_21_A8D1X4(mlib_s16 *src,
-                                         mlib_s16 *dst,
-                                         mlib_s32 dsize,
-                                         mlib_s32 cmask);
-void
-mlib_v_ImageChannelExtract_S16_21_A8D2X4(mlib_s16 *src,  mlib_s32 slb,
-                                         mlib_s16 *dst,  mlib_s32 dlb,
-                                         mlib_s32 xsize, mlib_s32 ysize,
-                                         mlib_s32 cmask);
-void
-mlib_v_ImageChannelExtract_S16_21_D1(mlib_s16 *src,
-                                     mlib_s16 *dst,
-                                     mlib_s32 dsize,
-                                     mlib_s32 cmask);
-void
-mlib_v_ImageChannelExtract_S16_21(mlib_s16 *src,  mlib_s32 slb,
-                                  mlib_s16 *dst,  mlib_s32 dlb,
-                                  mlib_s32 xsize, mlib_s32 ysize,
-                                  mlib_s32 cmask);
-void
-mlib_v_ImageChannelExtract_S16_31_A8D1X4(mlib_s16 *src,
-                                         mlib_s16 *dst,
-                                         mlib_s32 dsize,
-                                         mlib_s32 cmask);
-void
-mlib_v_ImageChannelExtract_S16_31_A8D2X4(mlib_s16 *src,  mlib_s32 slb,
-                                         mlib_s16 *dst,  mlib_s32 dlb,
-                                         mlib_s32 xsize, mlib_s32 ysize,
-                                         mlib_s32 cmask);
-void
-mlib_v_ImageChannelExtract_S16_31_D1(mlib_s16 *src,
-                                     mlib_s16 *dst,
-                                     mlib_s32 dsize,
-                                     mlib_s32 cmask);
-void
-mlib_v_ImageChannelExtract_S16_31(mlib_s16 *src,  mlib_s32 slb,
-                                  mlib_s16 *dst,  mlib_s32 dlb,
-                                  mlib_s32 xsize, mlib_s32 ysize,
-                                  mlib_s32 cmask);
-void
-mlib_v_ImageChannelExtract_S16_41_A8D1X4(mlib_s16 *src,
-                                         mlib_s16 *dst,
-                                         mlib_s32 dsize,
-                                         mlib_s32 cmask);
-void
-mlib_v_ImageChannelExtract_S16_41_A8D2X4(mlib_s16 *src,  mlib_s32 slb,
-                                         mlib_s16 *dst,  mlib_s32 dlb,
-                                         mlib_s32 xsize, mlib_s32 ysize,
-                                         mlib_s32 cmask);
-void
-mlib_v_ImageChannelExtract_S16_41_D1(mlib_s16 *src,
-                                     mlib_s16 *dst,
-                                     mlib_s32 dsize,
-                                     mlib_s32 cmask);
-void
-mlib_v_ImageChannelExtract_S16_41(mlib_s16 *src,  mlib_s32 slb,
-                                  mlib_s16 *dst,  mlib_s32 dlb,
-                                  mlib_s32 xsize, mlib_s32 ysize,
-                                  mlib_s32 cmask);
-
-/***************************************************************/
-/* functions defined in mlib_ImageChannelExtract_43.c */
-
-void
-mlib_v_ImageChannelExtract_U8_43R_A8D1X8(mlib_u8  *src,
-                                         mlib_u8  *dst,
-                                         mlib_s32 dsize);
-void
-mlib_v_ImageChannelExtract_U8_43R_A8D2X8(mlib_u8  *src,  mlib_s32 slb,
-                                         mlib_u8  *dst,  mlib_s32 dlb,
-                                         mlib_s32 xsize, mlib_s32 ysize);
-void
-mlib_v_ImageChannelExtract_U8_43R_D1(mlib_u8  *src,
-                                     mlib_u8  *dst,
-                                     mlib_s32 dsize);
-void
-mlib_v_ImageChannelExtract_U8_43R(mlib_u8  *src,  mlib_s32 slb,
-                                  mlib_u8  *dst,  mlib_s32 dlb,
-                                  mlib_s32 xsize, mlib_s32 ysize);
-void
-mlib_v_ImageChannelExtract_S16_43R_A8D1X4(mlib_s16 *src,
-                                          mlib_s16 *dst,
-                                          mlib_s32 dsize);
-void
-mlib_v_ImageChannelExtract_S16_43R_A8D2X4(mlib_s16 *src,  mlib_s32 slb,
-                                          mlib_s16 *dst,  mlib_s32 dlb,
-                                          mlib_s32 xsize, mlib_s32 ysize);
-void
-mlib_v_ImageChannelExtract_S16_43R_D1(mlib_s16 *src,
-                                      mlib_s16 *dst,
-                                      mlib_s32 dsize);
-void
-mlib_v_ImageChannelExtract_S16_43R(mlib_s16 *src,  mlib_s32 slb,
-                                   mlib_s16 *dst,  mlib_s32 dlb,
-                                   mlib_s32 xsize, mlib_s32 ysize);
-void
-mlib_v_ImageChannelExtract_U8_43L_A8D1X8(mlib_u8  *src,
-                                         mlib_u8  *dst,
-                                         mlib_s32 dsize);
-void
-mlib_v_ImageChannelExtract_U8_43L_A8D2X8(mlib_u8  *src,  mlib_s32 slb,
-                                         mlib_u8  *dst,  mlib_s32 dlb,
-                                         mlib_s32 xsize, mlib_s32 ysize);
-void
-mlib_v_ImageChannelExtract_U8_43L_D1(mlib_u8  *src,
-                                     mlib_u8  *dst,
-                                     mlib_s32 dsize);
-void
-mlib_v_ImageChannelExtract_U8_43L(mlib_u8  *src,  mlib_s32 slb,
-                                  mlib_u8  *dst,  mlib_s32 dlb,
-                                  mlib_s32 xsize, mlib_s32 ysize);
-void
-mlib_v_ImageChannelExtract_S16_43L_A8D1X4(mlib_s16 *src,
-                                          mlib_s16 *dst,
-                                          mlib_s32 dsize);
-void
-mlib_v_ImageChannelExtract_S16_43L_A8D2X4(mlib_s16 *src,  mlib_s32 slb,
-                                          mlib_s16 *dst,  mlib_s32 dlb,
-                                          mlib_s32 xsize, mlib_s32 ysize);
-void
-mlib_v_ImageChannelExtract_S16_43L_D1(mlib_s16 *src,
-                                      mlib_s16 *dst,
-                                      mlib_s32 dsize);
-void
-mlib_v_ImageChannelExtract_S16_43L(mlib_s16 *src,  mlib_s32 slb,
-                                   mlib_s16 *dst,  mlib_s32 dlb,
-                                   mlib_s32 xsize, mlib_s32 ysize);
-
-/***************************************************************/
-
-#ifdef MLIB_TEST
-mlib_status
-mlib_v_ImageChannelExtract(mlib_image *dst,
-                           mlib_image *src,
-                           mlib_s32   cmask)
-#else
-mlib_status
-mlib_ImageChannelExtract(mlib_image *dst,
-                         mlib_image *src,
-                         mlib_s32   cmask)
-#endif
-{
-  const mlib_s32  X8 = 0x7;
-  const mlib_s32  X4 = 0x3;
-  const mlib_s32  X2 = 0x1;
-  const mlib_s32  A8D1   = MLIB_IMAGE_ALIGNED8 | MLIB_IMAGE_ONEDVECTOR;
-  const mlib_s32  A8D2X8 = MLIB_IMAGE_ALIGNED8 | MLIB_IMAGE_STRIDE8X | MLIB_IMAGE_WIDTH8X;
-  const mlib_s32  A8D2X4 = MLIB_IMAGE_ALIGNED8 | MLIB_IMAGE_STRIDE8X | MLIB_IMAGE_WIDTH4X;
-  const mlib_s32  A8D2X2 = MLIB_IMAGE_ALIGNED8 | MLIB_IMAGE_STRIDE8X | MLIB_IMAGE_WIDTH2X;
-  void      *sp;            /* pointer for pixel in src */
-  void      *dp;            /* pointer for pixel in dst */
-  mlib_s32  ncmask = 0;     /* normalized channel mask */
-  mlib_s32  channels;       /* number of channels for src */
-  mlib_s32  channeld;       /* number of channels for dst */
-  mlib_s32  width, height;  /* for src and dst */
-  mlib_s32  strides;        /* strides in bytes for src */
-  mlib_s32  strided;        /* strides in bytes for dst */
-  mlib_s32  flags;
-  mlib_s32  flagd;
-  mlib_s32  dsize;
-  int       delta0 = 0;     /* offset of first selected channel */
-  int       count1 = 0;     /* number of channels in first group */
-  int       i, bit1count = 0;
-
-  MLIB_IMAGE_CHECK(src);
-  MLIB_IMAGE_CHECK(dst);
-  MLIB_IMAGE_TYPE_EQUAL(src, dst);
-  MLIB_IMAGE_SIZE_EQUAL(src, dst);
-
-  channels = mlib_ImageGetChannels(src);
-  channeld = mlib_ImageGetChannels(dst);
-  width    = mlib_ImageGetWidth(src);
-  height   = mlib_ImageGetHeight(src);
-  strides  = mlib_ImageGetStride(src);
-  strided  = mlib_ImageGetStride(dst);
-  sp       = mlib_ImageGetData(src);
-  dp       = mlib_ImageGetData(dst);
-  flags    = mlib_ImageGetFlags(src);
-  flagd    = mlib_ImageGetFlags(dst);
-  dsize    = width * height;
-
-  /* normalize the cmask, and count the number of bit with value 1 */
-  for (i = (channels - 1); i >= 0; i--) {
-    if (((cmask & (1 << i)) != 0) && (bit1count < channeld)) {
-      ncmask += (1 << i);
-      bit1count++;
-    }
-  }
-
-  /* do not support the cases in which the number of selected channels is
-   * less than the nubmber of channels in the destination image */
-  if (bit1count < channeld) {
-    return MLIB_FAILURE;
-  }
-
-  if (channels == channeld) {
-#ifdef MLIB_TEST
-    mlib_v_ImageCopy(dst, src);
-#else
-    mlib_ImageCopy(dst, src);
-#endif
-    return MLIB_SUCCESS;
-  }
-
-  switch (mlib_ImageGetType(src)) {
-    case MLIB_BYTE:
-      if (channeld == 1) {
-        switch (channels) {
-          case 2:
-            if (((flags & A8D1) == 0) &&
-                ((flagd & A8D1) == 0) &&
-                ((dsize & X8)   == 0)) {
-              mlib_v_ImageChannelExtract_U8_21_A8D1X8((mlib_u8 *)sp,
-                                                      (mlib_u8 *)dp,
-                                                      dsize,
-                                                      ncmask);
-            }
-            else if (((flags & A8D2X8) == 0) &&
-                     ((flagd & A8D2X8) == 0)) {
-              mlib_v_ImageChannelExtract_U8_21_A8D2X8((mlib_u8 *)sp, strides,
-                                                      (mlib_u8 *)dp, strided,
-                                                      width, height,
-                                                      ncmask);
-            }
-            else if (((flags & MLIB_IMAGE_ONEDVECTOR) == 0) &&
-                     ((flagd & MLIB_IMAGE_ONEDVECTOR) == 0)) {
-              mlib_v_ImageChannelExtract_U8_21_D1((mlib_u8 *)sp,
-                                                  (mlib_u8 *)dp,
-                                                  dsize,
-                                                  ncmask);
-            }
-            else {
-              mlib_v_ImageChannelExtract_U8_21((mlib_u8 *)sp, strides,
-                                               (mlib_u8 *)dp, strided,
-                                               width, height,
-                                               ncmask);
-            }
-            return MLIB_SUCCESS;
-
-          case 3:
-            if (((flags & A8D1) == 0) &&
-                ((flagd & A8D1) == 0) &&
-                ((dsize & X8)   == 0)) {
-              mlib_v_ImageChannelExtract_U8_31_A8D1X8((mlib_u8 *)sp,
-                                                      (mlib_u8 *)dp,
-                                                      dsize,
-                                                      ncmask);
-            }
-            else if (((flags & A8D2X8) == 0) &&
-                     ((flagd & A8D2X8) == 0)) {
-              mlib_v_ImageChannelExtract_U8_31_A8D2X8((mlib_u8 *)sp, strides,
-                                                      (mlib_u8 *)dp, strided,
-                                                      width, height,
-                                                      ncmask);
-            }
-            else if (((flags & MLIB_IMAGE_ONEDVECTOR) == 0) &&
-                     ((flagd & MLIB_IMAGE_ONEDVECTOR) == 0)) {
-              mlib_v_ImageChannelExtract_U8_31_D1((mlib_u8 *)sp,
-                                                  (mlib_u8 *)dp,
-                                                  dsize,
-                                                  ncmask);
-            }
-            else {
-              mlib_v_ImageChannelExtract_U8_31((mlib_u8 *)sp, strides,
-                                               (mlib_u8 *)dp, strided,
-                                               width, height,
-                                               ncmask);
-            }
-            return MLIB_SUCCESS;
-
-          case 4:
-            if (((flags & A8D1) == 0) &&
-                ((flagd & A8D1) == 0) &&
-                ((dsize & X8)   == 0)) {
-              mlib_v_ImageChannelExtract_U8_41_A8D1X8((mlib_u8 *)sp,
-                                                      (mlib_u8 *)dp,
-                                                      dsize,
-                                                      ncmask);
-            }
-            else if (((flags & A8D2X8) == 0) &&
-                     ((flagd & A8D2X8) == 0)) {
-              mlib_v_ImageChannelExtract_U8_41_A8D2X8((mlib_u8 *)sp, strides,
-                                                      (mlib_u8 *)dp, strided,
-                                                      width, height,
-                                                      ncmask);
-            }
-            else if (((flags & MLIB_IMAGE_ONEDVECTOR) == 0) &&
-                     ((flagd & MLIB_IMAGE_ONEDVECTOR) == 0)) {
-              mlib_v_ImageChannelExtract_U8_41_D1((mlib_u8 *)sp,
-                                                  (mlib_u8 *)dp,
-                                                  dsize,
-                                                  ncmask);
-            }
-            else {
-              mlib_v_ImageChannelExtract_U8_41((mlib_u8 *)sp, strides,
-                                               (mlib_u8 *)dp, strided,
-                                               width, height,
-                                               ncmask);
-            }
-            return MLIB_SUCCESS;
-
-          default:
-            return MLIB_FAILURE;
-        }
-      }
-      else if ((channels == 4) && (channeld == 3) && (ncmask == 7)) {
-        if (((flags & A8D1) == 0) &&
-            ((flagd & A8D1) == 0) &&
-            ((dsize & X8)   == 0)) {
-          mlib_v_ImageChannelExtract_U8_43R_A8D1X8((mlib_u8 *)sp,
-                                                   (mlib_u8 *)dp,
-                                                   dsize);
-        }
-        else if (((flags & A8D2X8) == 0) &&
-                 ((flagd & A8D2X8) == 0)) {
-          mlib_v_ImageChannelExtract_U8_43R_A8D2X8((mlib_u8 *)sp, strides,
-                                                   (mlib_u8 *)dp, strided,
-                                                   width, height);
-        }
-        else if (((flags & MLIB_IMAGE_ONEDVECTOR) == 0) &&
-                 ((flagd & MLIB_IMAGE_ONEDVECTOR) == 0)) {
-          mlib_v_ImageChannelExtract_U8_43R_D1((mlib_u8 *)sp,
-                                               (mlib_u8 *)dp,
-                                               dsize);
-        }
-        else {
-          mlib_v_ImageChannelExtract_U8_43R((mlib_u8 *)sp, strides,
-                                            (mlib_u8 *)dp, strided,
-                                            width, height);
-        }
-        return MLIB_SUCCESS;
-      }
-      else if ((channels == 4) && (channeld == 3) && (ncmask == 14)) {
-        if (((flags & A8D1) == 0) &&
-            ((flagd & A8D1) == 0) &&
-            ((dsize & X8)   == 0)) {
-          mlib_v_ImageChannelExtract_U8_43L_A8D1X8((mlib_u8 *)sp,
-                                                   (mlib_u8 *)dp,
-                                                   dsize);
-        }
-        else if (((flags & A8D2X8) == 0) &&
-                 ((flagd & A8D2X8) == 0)) {
-          mlib_v_ImageChannelExtract_U8_43L_A8D2X8((mlib_u8 *)sp, strides,
-                                                   (mlib_u8 *)dp, strided,
-                                                   width, height);
-        }
-        else if (((flags & MLIB_IMAGE_ONEDVECTOR) == 0) &&
-                 ((flagd & MLIB_IMAGE_ONEDVECTOR) == 0)) {
-          mlib_v_ImageChannelExtract_U8_43L_D1((mlib_u8 *)sp,
-                                               (mlib_u8 *)dp,
-                                               dsize);
-        }
-        else {
-          mlib_v_ImageChannelExtract_U8_43L((mlib_u8 *)sp, strides,
-                                            (mlib_u8 *)dp, strided,
-                                            width, height);
-        }
-        return MLIB_SUCCESS;
-      }
-      break;
-
-    case MLIB_SHORT:
-      if (channeld == 1) {
-        switch (channels) {
-          case 2:
-            if (((flags & A8D1) == 0) &&
-                ((flagd & A8D1) == 0) &&
-                ((dsize & X4)   == 0)) {
-              mlib_v_ImageChannelExtract_S16_21_A8D1X4((mlib_s16 *)sp,
-                                                       (mlib_s16 *)dp,
-                                                       dsize,
-                                                       ncmask);
-            }
-            else if (((flags & A8D2X4) == 0) &&
-                     ((flagd & A8D2X4) == 0)) {
-              mlib_v_ImageChannelExtract_S16_21_A8D2X4((mlib_s16 *)sp, strides,
-                                                       (mlib_s16 *)dp, strided,
-                                                       width, height,
-                                                       ncmask);
-            }
-            else if (((flags & MLIB_IMAGE_ONEDVECTOR) == 0) &&
-                     ((flagd & MLIB_IMAGE_ONEDVECTOR) == 0)) {
-              mlib_v_ImageChannelExtract_S16_21_D1((mlib_s16 *)sp,
-                                                   (mlib_s16 *)dp,
-                                                   dsize,
-                                                   ncmask);
-            }
-            else {
-              mlib_v_ImageChannelExtract_S16_21((mlib_s16 *)sp, strides,
-                                                (mlib_s16 *)dp, strided,
-                                                width, height,
-                                                ncmask);
-            }
-            return MLIB_SUCCESS;
-
-          case 3:
-            if (((flags & A8D1) == 0) &&
-                ((flagd & A8D1) == 0) &&
-                ((dsize & X4)   == 0)) {
-              mlib_v_ImageChannelExtract_S16_31_A8D1X4((mlib_s16 *)sp,
-                                                       (mlib_s16 *)dp,
-                                                       dsize,
-                                                       ncmask);
-            }
-            else if (((flags & A8D2X4) == 0) &&
-                     ((flagd & A8D2X4) == 0)) {
-              mlib_v_ImageChannelExtract_S16_31_A8D2X4((mlib_s16 *)sp, strides,
-                                                       (mlib_s16 *)dp, strided,
-                                                       width, height,
-                                                       ncmask);
-            }
-            else if (((flags & MLIB_IMAGE_ONEDVECTOR) == 0) &&
-                     ((flagd & MLIB_IMAGE_ONEDVECTOR) == 0)) {
-              mlib_v_ImageChannelExtract_S16_31_D1((mlib_s16 *)sp,
-                                                   (mlib_s16 *)dp,
-                                                   dsize,
-                                                   ncmask);
-            }
-            else {
-              mlib_v_ImageChannelExtract_S16_31((mlib_s16 *)sp, strides,
-                                                (mlib_s16 *)dp, strided,
-                                                width, height,
-                                                ncmask);
-            }
-            return MLIB_SUCCESS;
-
-          case 4:
-            if (((flags & A8D1) == 0) &&
-                ((flagd & A8D1) == 0) &&
-                ((dsize & X4)   == 0)) {
-              mlib_v_ImageChannelExtract_S16_41_A8D1X4((mlib_s16 *)sp,
-                                                       (mlib_s16 *)dp,
-                                                       dsize,
-                                                       ncmask);
-            }
-            else if (((flags & A8D2X4) == 0) &&
-                     ((flagd & A8D2X4) == 0)) {
-              mlib_v_ImageChannelExtract_S16_41_A8D2X4((mlib_s16 *)sp, strides,
-                                                       (mlib_s16 *)dp, strided,
-                                                       width, height,
-                                                       ncmask);
-            }
-            else if (((flags & MLIB_IMAGE_ONEDVECTOR) == 0) &&
-                     ((flagd & MLIB_IMAGE_ONEDVECTOR) == 0)) {
-              mlib_v_ImageChannelExtract_S16_41_D1((mlib_s16 *)sp,
-                                                   (mlib_s16 *)dp,
-                                                   dsize,
-                                                   ncmask);
-            }
-            else {
-              mlib_v_ImageChannelExtract_S16_41((mlib_s16 *)sp, strides,
-                                                (mlib_s16 *)dp, strided,
-                                                width, height,
-                                                ncmask);
-            }
-            return MLIB_SUCCESS;
-          default:
-            return MLIB_FAILURE;
-        }
-      }
-      else if ((channels == 4) && (channeld == 3) && (ncmask == 7)) {
-        if (((flags & A8D1) == 0) &&
-            ((flagd & A8D1) == 0) &&
-            ((dsize & X4)   == 0)) {
-          mlib_v_ImageChannelExtract_S16_43R_A8D1X4((mlib_s16 *)sp,
-                                                    (mlib_s16 *)dp,
-                                                    dsize);
-        }
-        else if (((flags & A8D2X4) == 0) &&
-                 ((flagd & A8D2X4) == 0)) {
-          mlib_v_ImageChannelExtract_S16_43R_A8D2X4((mlib_s16 *)sp, strides,
-                                                    (mlib_s16 *)dp, strided,
-                                                    width, height);
-        }
-        else if (((flags & MLIB_IMAGE_ONEDVECTOR) == 0) &&
-                 ((flagd & MLIB_IMAGE_ONEDVECTOR) == 0)) {
-          mlib_v_ImageChannelExtract_S16_43R_D1((mlib_s16 *)sp,
-                                                (mlib_s16 *)dp,
-                                                dsize);
-        }
-        else {
-          mlib_v_ImageChannelExtract_S16_43R((mlib_s16 *)sp, strides,
-                                             (mlib_s16 *)dp, strided,
-                                             width, height);
-        }
-        return MLIB_SUCCESS;
-      }
-      else if ((channels == 4) && (channeld == 3) && (ncmask == 14)) {
-        if (((flags & A8D1) == 0) &&
-            ((flagd & A8D1) == 0) &&
-            ((dsize & X4)   == 0)) {
-          mlib_v_ImageChannelExtract_S16_43L_A8D1X4((mlib_s16 *)sp,
-                                                    (mlib_s16 *)dp,
-                                                    dsize);
-        }
-        else if (((flags & A8D2X4) == 0) &&
-                 ((flagd & A8D2X4) == 0)) {
-          mlib_v_ImageChannelExtract_S16_43L_A8D2X4((mlib_s16 *)sp, strides,
-                                                    (mlib_s16 *)dp, strided,
-                                                    width, height);
-        }
-        else if (((flags & MLIB_IMAGE_ONEDVECTOR) == 0) &&
-                 ((flagd & MLIB_IMAGE_ONEDVECTOR) == 0)) {
-          mlib_v_ImageChannelExtract_S16_43L_D1((mlib_s16 *)sp,
-                                                (mlib_s16 *)dp,
-                                                dsize);
-        }
-        else {
-          mlib_v_ImageChannelExtract_S16_43L((mlib_s16 *)sp, strides,
-                                             (mlib_s16 *)dp, strided,
-                                             width, height);
-        }
-        return MLIB_SUCCESS;
-      }
-      break;
-
-  }
-
-/***************************************************************/
-  /* From C version */
-
-  for (i = (channels - 1); i >= 0; i--) {
-    if (!(ncmask & (1 << i))) delta0++;
-    else break;
-  }
-  for (; i >= 0; i--) {
-    if (ncmask & (1 << i)) count1++;
-    else break;
-  }
-
-  switch (mlib_ImageGetType(src)) {
-    case MLIB_BYTE:
-      {
-        mlib_u8 *sl = (mlib_u8 *)sp + delta0;
-        mlib_u8 *dl = (mlib_u8 *)dp;
-
-        switch (channels*10 + channeld) {
-          case 32:
-            mlib_v_ImageChannelExtract_U8_3_2(sl, strides, dl, strided, width, height, count1);
-            return MLIB_SUCCESS;
-
-          case 42:
-            if (ncmask == 0xA || ncmask == 0x5) { /* mask 1010 or 0101 */
-              mlib_v_ImageChannelExtract_U8_2_1(sl, strides, dl, strided, 2*width, height);
-              return MLIB_SUCCESS;
-            }
-            mlib_v_ImageChannelExtract_U8_4_2(sl, strides, dl, strided, width, height, count1);
-            return MLIB_SUCCESS;
-
-          case 43:
-            mlib_v_ImageChannelExtract_U8((mlib_u8 *)sp, strides,
-                                          (mlib_u8 *)dp, strided,
-                                          channels, channeld,
-                                          width, height,
-                                          ncmask);
-            return MLIB_SUCCESS;
-
-          default: return MLIB_FAILURE;
-        }
-      }
-
-    case MLIB_SHORT:
-      mlib_v_ImageChannelExtract_S16((mlib_u16 *)sp, strides,
-                                     (mlib_u16 *)dp, strided,
-                                     channels,  channeld,
-                                     width, height,
-                                     ncmask);
-      break;
-
-    case MLIB_INT:
-    case MLIB_FLOAT:
-      {
-        mlib_f32 *sl = (mlib_f32 *)sp + delta0;
-        mlib_f32 *dl = (mlib_f32 *)dp;
-        strides /= 4;
-        strided /= 4;
-
-        switch (channels*10 + channeld) {
-          case 21:
-            mlib_v_ImageChannelExtract_32_2_1(sl, strides, dl, strided, width, height);
-            return MLIB_SUCCESS;
-
-          case 31:
-            mlib_v_ImageChannelExtract_32_3_1(sl, strides, dl, strided, width, height);
-            return MLIB_SUCCESS;
-
-          case 32:
-            mlib_v_ImageChannelExtract_32_3_2(sl, strides, dl, strided, width, height, count1);
-            return MLIB_SUCCESS;
-
-          case 41:
-            mlib_v_ImageChannelExtract_32_4_1(sl, strides, dl, strided, width, height);
-            return MLIB_SUCCESS;
-
-          case 42:
-            if (ncmask == 0xA || ncmask == 0x5) { /* mask 1010 or 0101 */
-              mlib_v_ImageChannelExtract_32_2_1(sl, strides, dl, strided, 2*width, height);
-            } else {
-              mlib_v_ImageChannelExtract_32_4_2(sl, strides, dl, strided, width, height, count1);
-            }
-            return MLIB_SUCCESS;
-
-          case 43:
-            mlib_v_ImageChannelExtract_32_4_3(sl, strides, dl, strided, width, height, count1);
-            return MLIB_SUCCESS;
-
-          default:
-            return MLIB_FAILURE;
-        }
-      }
-    case MLIB_DOUBLE:
-      mlib_v_ImageChannelExtract_D64((mlib_d64 *)sp, strides,
-                                     (mlib_d64 *)dp, strided,
-                                     channels,  channeld,
-                                     width, height,
-                                     ncmask);
-      break;
-
-    case MLIB_BIT:
-    default:
-      return MLIB_FAILURE;  /* MLIB_BIT is not supported here */
-  }
-
-  return MLIB_SUCCESS;
-}
-
-/***************************************************************/
--- a/jdk/src/java.desktop/unix/native/libmlib_image/mlib_v_ImageChannelExtract.h	Thu May 12 11:03:07 2016 -0700
+++ b/jdk/src/java.desktop/unix/native/libmlib_image/mlib_v_ImageChannelExtract.h	Fri May 13 11:31:05 2016 +0300
@@ -32,348 +32,29 @@
 extern "C" {
 #endif /* __cplusplus */
 
-void mlib_v_ImageChannelExtract_U8_21_A8D1X8(const mlib_u8 *src,
-                                             mlib_u8       *dst,
-                                             mlib_s32      dsize,
-                                             mlib_s32      cmask);
-
-void mlib_v_ImageChannelExtract_U8_21_A8D2X8(const mlib_u8 *src,
-                                             mlib_s32      slb,
-                                             mlib_u8       *dst,
-                                             mlib_s32      dlb,
-                                             mlib_s32      xsize,
-                                             mlib_s32      ysize,
-                                             mlib_s32      cmask);
-
 void mlib_v_ImageChannelExtract_U8_21_D1(const mlib_u8 *src,
                                          mlib_u8       *dst,
                                          mlib_s32      dsize,
                                          mlib_s32      cmask);
 
-void mlib_v_ImageChannelExtract_U8_21(const mlib_u8 *src,
-                                      mlib_s32      slb,
-                                      mlib_u8       *dst,
-                                      mlib_s32      dlb,
-                                      mlib_s32      xsize,
-                                      mlib_s32      ysize,
-                                      mlib_s32      cmask);
-
-void mlib_v_ImageChannelExtract_U8_31_A8D1X8(const mlib_u8 *src,
-                                             mlib_u8       *dst,
-                                             mlib_s32      dsize,
-                                             mlib_s32      cmask);
-
-void mlib_v_ImageChannelExtract_U8_31_A8D2X8(const mlib_u8 *src,
-                                             mlib_s32      slb,
-                                             mlib_u8       *dst,
-                                             mlib_s32      dlb,
-                                             mlib_s32      xsize,
-                                             mlib_s32      ysize,
-                                             mlib_s32      cmask);
-
 void mlib_v_ImageChannelExtract_U8_31_D1(const mlib_u8 *src,
                                          mlib_u8       *dst,
                                          mlib_s32      dsize,
                                          mlib_s32      cmask);
 
-void mlib_v_ImageChannelExtract_U8_31(const mlib_u8 *src,
-                                      mlib_s32      slb,
-                                      mlib_u8       *dst,
-                                      mlib_s32      dlb,
-                                      mlib_s32      xsize,
-                                      mlib_s32      ysize,
-                                      mlib_s32      cmask);
-
-void mlib_v_ImageChannelExtract_U8_41_A8D1X8(const mlib_u8 *src,
-                                             mlib_u8       *dst,
-                                             mlib_s32      dsize,
-                                             mlib_s32      cmask);
-
-void mlib_v_ImageChannelExtract_U8_41_A8D2X8(const mlib_u8 *src,
-                                             mlib_s32      slb,
-                                             mlib_u8       *dst,
-                                             mlib_s32      dlb,
-                                             mlib_s32      xsize,
-                                             mlib_s32      ysize,
-                                             mlib_s32      cmask);
-
 void mlib_v_ImageChannelExtract_U8_41_D1(const mlib_u8 *src,
                                          mlib_u8       *dst,
                                          mlib_s32      dsize,
                                          mlib_s32      cmask);
 
-void mlib_v_ImageChannelExtract_U8_41(const mlib_u8 *src,
-                                      mlib_s32      slb,
-                                      mlib_u8       *dst,
-                                      mlib_s32      dlb,
-                                      mlib_s32      xsize,
-                                      mlib_s32      ysize,
-                                      mlib_s32      cmask);
-
-void mlib_v_ImageChannelExtract_S16_21_A8D1X4(const mlib_s16 *src,
-                                              mlib_s16       *dst,
-                                              mlib_s32       dsize,
-                                              mlib_s32       cmask);
-
-void mlib_v_ImageChannelExtract_S16_21_A8D2X4(const mlib_s16 *src,
-                                              mlib_s32       slb,
-                                              mlib_s16       *dst,
-                                              mlib_s32       dlb,
-                                              mlib_s32       xsize,
-                                              mlib_s32       ysize,
-                                              mlib_s32       cmask);
-
-void mlib_v_ImageChannelExtract_S16_21_D1(const mlib_s16 *src,
-                                          mlib_s16       *dst,
-                                          mlib_s32       dsize,
-                                          mlib_s32       cmask);
-
-void mlib_v_ImageChannelExtract_S16_21(const mlib_s16 *src,
-                                       mlib_s32       slb,
-                                       mlib_s16       *dst,
-                                       mlib_s32       dlb,
-                                       mlib_s32       xsize,
-                                       mlib_s32       ysize,
-                                       mlib_s32       cmask);
-
-void mlib_v_ImageChannelExtract_S16_31_A8D1X4(const mlib_s16 *src,
-                                              mlib_s16       *dst,
-                                              mlib_s32       dsize,
-                                              mlib_s32       cmask);
-
-void mlib_v_ImageChannelExtract_S16_31_A8D2X4(const mlib_s16 *src,
-                                              mlib_s32       slb,
-                                              mlib_s16       *dst,
-                                              mlib_s32       dlb,
-                                              mlib_s32       xsize,
-                                              mlib_s32       ysize,
-                                              mlib_s32       cmask);
-
-void mlib_v_ImageChannelExtract_S16_31_D1(const mlib_s16 *src,
-                                          mlib_s16       *dst,
-                                          mlib_s32       dsize,
-                                          mlib_s32       cmask);
-
-void mlib_v_ImageChannelExtract_S16_31(const mlib_s16 *src,
-                                       mlib_s32       slb,
-                                       mlib_s16       *dst,
-                                       mlib_s32       dlb,
-                                       mlib_s32       xsize,
-                                       mlib_s32       ysize,
-                                       mlib_s32       cmask);
-
-void mlib_v_ImageChannelExtract_S16_41_A8D1X4(const mlib_s16 *src,
-                                              mlib_s16       *dst,
-                                              mlib_s32       dsize,
-                                              mlib_s32       cmask);
-
-void mlib_v_ImageChannelExtract_S16_41_A8D2X4(const mlib_s16 *src,
-                                              mlib_s32       slb,
-                                              mlib_s16       *dst,
-                                              mlib_s32       dlb,
-                                              mlib_s32       xsize,
-                                              mlib_s32       ysize,
-                                              mlib_s32       cmask);
-
-void mlib_v_ImageChannelExtract_S16_41_D1(const mlib_s16 *src,
-                                          mlib_s16       *dst,
-                                          mlib_s32       dsize,
-                                          mlib_s32       cmask);
-
-void mlib_v_ImageChannelExtract_S16_41(const mlib_s16 *src,
-                                       mlib_s32       slb,
-                                       mlib_s16       *dst,
-                                       mlib_s32       dlb,
-                                       mlib_s32       xsize,
-                                       mlib_s32       ysize,
-                                       mlib_s32       cmask);
-
-void mlib_v_ImageChannelExtract_U8_43R_A8D1X8(const mlib_u8 *src,
-                                              mlib_u8       *dst,
-                                              mlib_s32      dsize);
-
-void mlib_v_ImageChannelExtract_U8_43R_A8D2X8(const mlib_u8 *src,
-                                              mlib_s32      slb,
-                                              mlib_u8       *dst,
-                                              mlib_s32      dlb,
-                                              mlib_s32      xsize,
-                                              mlib_s32      ysize);
-
-void mlib_v_ImageChannelExtract_U8_43R_D1(const mlib_u8 *src,
-                                          mlib_u8       *dst,
-                                          mlib_s32      dsize);
-
-void mlib_v_ImageChannelExtract_U8_43R(const mlib_u8 *src,
-                                       mlib_s32      slb,
-                                       mlib_u8       *dst,
-                                       mlib_s32      dlb,
-                                       mlib_s32      xsize,
-                                       mlib_s32      ysize);
-
-void mlib_v_ImageChannelExtract_S16_43R_A8D1X4(const mlib_s16 *src,
-                                               mlib_s16       *dst,
-                                               mlib_s32       dsize);
-
-void mlib_v_ImageChannelExtract_S16_43R_A8D2X4(const mlib_s16 *src,
-                                               mlib_s32       slb,
-                                               mlib_s16       *dst,
-                                               mlib_s32       dlb,
-                                               mlib_s32       xsize,
-                                               mlib_s32       ysize);
-
-void mlib_v_ImageChannelExtract_S16_43R_D1(const mlib_s16 *src,
-                                           mlib_s16       *dst,
-                                           mlib_s32       dsize);
-
-void mlib_v_ImageChannelExtract_S16_43R(const mlib_s16 *src,
-                                        mlib_s32       slb,
-                                        mlib_s16       *dst,
-                                        mlib_s32       dlb,
-                                        mlib_s32       xsize,
-                                        mlib_s32       ysize);
-
-void mlib_v_ImageChannelExtract_U8_43L_A8D1X8(const mlib_u8 *src,
-                                              mlib_u8       *dst,
-                                              mlib_s32      dsize);
-
-void mlib_v_ImageChannelExtract_U8_43L_A8D2X8(const mlib_u8 *src,
-                                              mlib_s32      slb,
-                                              mlib_u8       *dst,
-                                              mlib_s32      dlb,
-                                              mlib_s32      xsize,
-                                              mlib_s32      ysize);
-
 void mlib_v_ImageChannelExtract_U8_43L_D1(const mlib_u8 *src,
                                           mlib_u8       *dst,
                                           mlib_s32      dsize);
 
-void mlib_v_ImageChannelExtract_U8_43L(const mlib_u8 *src,
-                                       mlib_s32      slb,
-                                       mlib_u8       *dst,
-                                       mlib_s32      dlb,
-                                       mlib_s32      xsize,
-                                       mlib_s32      ysize);
-
-void mlib_v_ImageChannelExtract_S16_43L_A8D1X4(const mlib_s16 *src,
-                                               mlib_s16       *dst,
-                                               mlib_s32       dsize);
-
-void mlib_v_ImageChannelExtract_S16_43L_A8D2X4(const mlib_s16 *src,
-                                               mlib_s32       slb,
-                                               mlib_s16       *dst,
-                                               mlib_s32       dlb,
-                                               mlib_s32       xsize,
-                                               mlib_s32       ysize);
-
 void mlib_v_ImageChannelExtract_S16_43L_D1(const mlib_s16 *src,
                                            mlib_s16       *dst,
                                            mlib_s32       dsize);
 
-void mlib_v_ImageChannelExtract_S16_43L(const mlib_s16 *src,
-                                        mlib_s32       slb,
-                                        mlib_s16       *dst,
-                                        mlib_s32       dlb,
-                                        mlib_s32       xsize,
-                                        mlib_s32       ysize);
-
-void mlib_v_ImageChannelExtract_U8_2_1(const mlib_u8 *sl,
-                                       mlib_s32      slb,
-                                       mlib_u8       *dl,
-                                       mlib_s32      dlb,
-                                       mlib_s32      width,
-                                       mlib_s32      height);
-
-void mlib_v_ImageChannelExtract_U8_3_2(const mlib_u8 *sl,
-                                       mlib_s32      slb,
-                                       mlib_u8       *dl,
-                                       mlib_s32      dlb,
-                                       mlib_s32      width,
-                                       mlib_s32      height,
-                                       mlib_s32      count1);
-
-void mlib_v_ImageChannelExtract_U8_4_2(const mlib_u8 *sl,
-                                       mlib_s32      slb,
-                                       mlib_u8       *dl,
-                                       mlib_s32      dlb,
-                                       mlib_s32      width,
-                                       mlib_s32      height,
-                                       mlib_s32      count1);
-
-void mlib_v_ImageChannelExtract_32_2_1(const mlib_f32 *sp,
-                                       mlib_s32       slb,
-                                       mlib_f32       *dp,
-                                       mlib_s32       dlb,
-                                       mlib_s32       width,
-                                       mlib_s32       height);
-
-void mlib_v_ImageChannelExtract_32_3_1(const mlib_f32 *sl,
-                                       mlib_s32       slb,
-                                       mlib_f32       *dl,
-                                       mlib_s32       dlb,
-                                       mlib_s32       width,
-                                       mlib_s32       height);
-
-void mlib_v_ImageChannelExtract_32_3_2(const mlib_f32 *sl,
-                                       mlib_s32       slb,
-                                       mlib_f32       *dl,
-                                       mlib_s32       dlb,
-                                       mlib_s32       width,
-                                       mlib_s32       height,
-                                       mlib_s32       count1);
-
-void mlib_v_ImageChannelExtract_32_4_1(const mlib_f32 *sp,
-                                       mlib_s32       slb,
-                                       mlib_f32       *dp,
-                                       mlib_s32       dlb,
-                                       mlib_s32       width,
-                                       mlib_s32       height);
-
-void mlib_v_ImageChannelExtract_32_4_2(const mlib_f32 *sl,
-                                       mlib_s32       slb,
-                                       mlib_f32       *dl,
-                                       mlib_s32       dlb,
-                                       mlib_s32       width,
-                                       mlib_s32       height,
-                                       mlib_s32       count1);
-
-void mlib_v_ImageChannelExtract_32_4_3(const mlib_f32 *sl,
-                                       mlib_s32       slb,
-                                       mlib_f32       *dl,
-                                       mlib_s32       dlb,
-                                       mlib_s32       width,
-                                       mlib_s32       height,
-                                       mlib_s32       count1);
-
-void mlib_v_ImageChannelExtract_U8(const mlib_u8 *src,
-                                   mlib_s32      slb,
-                                   mlib_u8       *dst,
-                                   mlib_s32      dlb,
-                                   mlib_s32      channels,
-                                   mlib_s32      channeld,
-                                   mlib_s32      width,
-                                   mlib_s32      height,
-                                   mlib_s32      cmask);
-
-void mlib_v_ImageChannelExtract_S16(const mlib_u16 *src,
-                                    mlib_s32       slb,
-                                    mlib_u16       *dst,
-                                    mlib_s32       dlb,
-                                    mlib_s32       channels,
-                                    mlib_s32       channeld,
-                                    mlib_s32       width,
-                                    mlib_s32       height,
-                                    mlib_s32       cmask);
-
-void mlib_v_ImageChannelExtract_D64(const mlib_d64 *src,
-                                    mlib_s32       slb,
-                                    mlib_d64       *dst,
-                                    mlib_s32       dlb,
-                                    mlib_s32       channels,
-                                    mlib_s32       channeld,
-                                    mlib_s32       width,
-                                    mlib_s32       height,
-                                    mlib_s32       cmask);
-
 #ifdef __cplusplus
 }
 #endif /* __cplusplus */
--- a/jdk/src/java.desktop/unix/native/libmlib_image/mlib_v_ImageChannelExtract_1.c	Thu May 12 11:03:07 2016 -0700
+++ b/jdk/src/java.desktop/unix/native/libmlib_image/mlib_v_ImageChannelExtract_1.c	Fri May 13 11:31:05 2016 +0300
@@ -29,30 +29,9 @@
  * FILENAME: mlib_ImageChannelExtract_1.c
  *
  * FUNCTIONS
- *      mlib_v_ImageChannelExtract_U8_21_A8D1X8
- *      mlib_v_ImageChannelExtract_U8_21_A8D2X8
  *      mlib_v_ImageChannelExtract_U8_21_D1
- *      mlib_v_ImageChannelExtract_U8_21
- *      mlib_v_ImageChannelExtract_U8_31_A8D1X8
- *      mlib_v_ImageChannelExtract_U8_31_A8D2X8
  *      mlib_v_ImageChannelExtract_U8_31_D1
- *      mlib_v_ImageChannelExtract_U8_31
- *      mlib_v_ImageChannelExtract_U8_41_A8D1X8
- *      mlib_v_ImageChannelExtract_U8_41_A8D2X8
  *      mlib_v_ImageChannelExtract_U8_41_D1
- *      mlib_v_ImageChannelExtract_U8_41
- *      mlib_v_ImageChannelExtract_S16_21_A8D1X4
- *      mlib_v_ImageChannelExtract_S16_21_A8D2X4
- *      mlib_v_ImageChannelExtract_S16_21_D1
- *      mlib_v_ImageChannelExtract_S16_21
- *      mlib_v_ImageChannelExtract_S16_31_A8D1X4
- *      mlib_v_ImageChannelExtract_S16_31_A8D2X4
- *      mlib_v_ImageChannelExtract_S16_31_D1
- *      mlib_v_ImageChannelExtract_S16_31
- *      mlib_v_ImageChannelExtract_S16_41_A8D1X4
- *      mlib_v_ImageChannelExtract_S16_41_A8D2X4
- *      mlib_v_ImageChannelExtract_S16_41_D1
- *      mlib_v_ImageChannelExtract_S16_41
  *
  * ARGUMENT
  *      src    pointer to source image data
@@ -95,100 +74,6 @@
 
 /***************************************************************/
 /* extract one channel from a 2-channel image.
- * both source and destination image data are 8-byte aligned.
- * xsize is multiple of 8.
- */
-
-void mlib_v_ImageChannelExtract_U8_21_A8D1X8(const mlib_u8 *src,
-                                             mlib_u8       *dst,
-                                             mlib_s32      dsize,
-                                             mlib_s32      cmask)
-{
-  mlib_d64 *sp, *dp;
-  mlib_d64 sd0, sd1;
-  mlib_d64 sda, sdb, sdc, sdd;
-  mlib_d64 dd;
-  mlib_s32 i;
-
-  sp = (mlib_d64 *) src;
-  dp = (mlib_d64 *) dst;
-
-  if (cmask == 2) {
-#pragma pipeloop(0)
-    for (i = 0; i < dsize / 8; i++) {
-      sd0 = *sp++;
-      sd1 = *sp++;
-      CHANNELEXTRACT_U8_21L(sd0, sd1, dd);
-      *dp++ = dd;
-    }
-  }
-  else {
-#pragma pipeloop(0)
-    for (i = 0; i < dsize / 8; i++) {
-      sd0 = *sp++;
-      sd1 = *sp++;
-      CHANNELEXTRACT_U8_21R(sd0, sd1, dd);
-      *dp++ = dd;
-    }
-  }
-}
-
-/***************************************************************/
-/* extract one channel from a 2-channel image.
- * both source and destination image data are 8-byte aligned.
- * xsize is multiple of 8.
- */
-
-void mlib_v_ImageChannelExtract_U8_21_A8D2X8(const mlib_u8 *src,
-                                             mlib_s32      slb,
-                                             mlib_u8       *dst,
-                                             mlib_s32      dlb,
-                                             mlib_s32      xsize,
-                                             mlib_s32      ysize,
-                                             mlib_s32      cmask)
-{
-  mlib_d64 *sp, *dp;
-  mlib_d64 *sl, *dl;
-  mlib_d64 sd0, sd1;
-  mlib_d64 sda, sdb, sdc, sdd;
-  mlib_d64 dd;
-  mlib_s32 i, j;
-
-  sp = sl = (mlib_d64 *) src;
-  dp = dl = (mlib_d64 *) dst;
-
-  if (cmask == 2) {
-    for (j = 0; j < ysize; j++) {
-#pragma pipeloop(0)
-      for (i = 0; i < xsize / 8; i++) {
-        sd0 = *sp++;
-        sd1 = *sp++;
-        CHANNELEXTRACT_U8_21L(sd0, sd1, dd);
-        *dp++ = dd;
-      }
-
-      sp = sl = (mlib_d64 *) ((mlib_u8 *) sl + slb);
-      dp = dl = (mlib_d64 *) ((mlib_u8 *) dl + dlb);
-    }
-  }
-  else {
-    for (j = 0; j < ysize; j++) {
-#pragma pipeloop(0)
-      for (i = 0; i < xsize / 8; i++) {
-        sd0 = *sp++;
-        sd1 = *sp++;
-        CHANNELEXTRACT_U8_21R(sd0, sd1, dd);
-        *dp++ = dd;
-      }
-
-      sp = sl = (mlib_d64 *) ((mlib_u8 *) sl + slb);
-      dp = dl = (mlib_d64 *) ((mlib_u8 *) dl + dlb);
-    }
-  }
-}
-
-/***************************************************************/
-/* extract one channel from a 2-channel image.
  */
 
 void mlib_v_ImageChannelExtract_U8_21_D1(const mlib_u8 *src,
@@ -415,32 +300,6 @@
 }
 
 /***************************************************************/
-/* extract one channel from a 2-channel image.
- */
-
-void mlib_v_ImageChannelExtract_U8_21(const mlib_u8 *src,
-                                      mlib_s32      slb,
-                                      mlib_u8       *dst,
-                                      mlib_s32      dlb,
-                                      mlib_s32      xsize,
-                                      mlib_s32      ysize,
-                                      mlib_s32      cmask)
-{
-  mlib_u8 *sa, *da;
-  mlib_u8 *sl, *dl;
-  mlib_s32 j;
-
-  sa = sl = (void *)src;
-  da = dl = dst;
-
-  for (j = 0; j < ysize; j++) {
-    mlib_v_ImageChannelExtract_U8_21_D1(sa, da, xsize, cmask);
-    sa = sl += slb;
-    da = dl += dlb;
-  }
-}
-
-/***************************************************************/
 #define CHANNELEXTRACT_U8_31L(sd0, sd1, sd2, dd)                \
   sda = vis_fpmerge(vis_read_hi(sd0), vis_read_lo(sd1));        \
   sdb = vis_fpmerge(vis_read_lo(sd0), vis_read_hi(sd2));        \
@@ -468,119 +327,6 @@
   dd  = vis_fpmerge(vis_read_hi(sdd), vis_read_lo(sde))
 
 /***************************************************************/
-void mlib_v_ImageChannelExtract_U8_31_A8D1X8(const mlib_u8 *src,
-                                             mlib_u8       *dst,
-                                             mlib_s32      dsize,
-                                             mlib_s32      cmask)
-{
-  mlib_d64 *sp, *dp;
-  mlib_d64 sd0, sd1, sd2;
-  mlib_d64 sda, sdb, sdc, sdd, sde;
-  mlib_d64 dd;
-  mlib_s32 i;
-
-  sp = (mlib_d64 *) src;
-  dp = (mlib_d64 *) dst;
-
-  if (cmask == 4) {
-#pragma pipeloop(0)
-    for (i = 0; i < dsize / 8; i++) {
-      sd0 = *sp++;
-      sd1 = *sp++;
-      sd2 = *sp++;
-      CHANNELEXTRACT_U8_31L(sd0, sd1, sd2, dd);
-      *dp++ = dd;
-    }
-  }
-  else if (cmask == 2) {
-#pragma pipeloop(0)
-    for (i = 0; i < dsize / 8; i++) {
-      sd0 = *sp++;
-      sd1 = *sp++;
-      sd2 = *sp++;
-      CHANNELEXTRACT_U8_31M(sd0, sd1, sd2, dd);
-      *dp++ = dd;
-    }
-  }
-  else {
-#pragma pipeloop(0)
-    for (i = 0; i < dsize / 8; i++) {
-      sd0 = *sp++;
-      sd1 = *sp++;
-      sd2 = *sp++;
-      CHANNELEXTRACT_U8_31R(sd0, sd1, sd2, dd);
-      *dp++ = dd;
-    }
-  }
-}
-
-/***************************************************************/
-void mlib_v_ImageChannelExtract_U8_31_A8D2X8(const mlib_u8 *src,
-                                             mlib_s32      slb,
-                                             mlib_u8       *dst,
-                                             mlib_s32      dlb,
-                                             mlib_s32      xsize,
-                                             mlib_s32      ysize,
-                                             mlib_s32      cmask)
-{
-  mlib_d64 *sp, *dp;
-  mlib_d64 *sl, *dl;
-  mlib_d64 sd0, sd1, sd2;
-  mlib_d64 sda, sdb, sdc, sdd, sde;
-  mlib_d64 dd;
-  mlib_s32 i, j;
-
-  sp = sl = (mlib_d64 *) src;
-  dp = dl = (mlib_d64 *) dst;
-
-  if (cmask == 4) {
-    for (j = 0; j < ysize; j++) {
-#pragma pipeloop(0)
-      for (i = 0; i < xsize / 8; i++) {
-        sd0 = *sp++;
-        sd1 = *sp++;
-        sd2 = *sp++;
-        CHANNELEXTRACT_U8_31L(sd0, sd1, sd2, dd);
-        *dp++ = dd;
-      }
-
-      sp = sl = (mlib_d64 *) ((mlib_u8 *) sl + slb);
-      dp = dl = (mlib_d64 *) ((mlib_u8 *) dl + dlb);
-    }
-  }
-  else if (cmask == 2) {
-    for (j = 0; j < ysize; j++) {
-#pragma pipeloop(0)
-      for (i = 0; i < xsize / 8; i++) {
-        sd0 = *sp++;
-        sd1 = *sp++;
-        sd2 = *sp++;
-        CHANNELEXTRACT_U8_31M(sd0, sd1, sd2, dd);
-        *dp++ = dd;
-      }
-
-      sp = sl = (mlib_d64 *) ((mlib_u8 *) sl + slb);
-      dp = dl = (mlib_d64 *) ((mlib_u8 *) dl + dlb);
-    }
-  }
-  else {
-    for (j = 0; j < ysize; j++) {
-#pragma pipeloop(0)
-      for (i = 0; i < xsize / 8; i++) {
-        sd0 = *sp++;
-        sd1 = *sp++;
-        sd2 = *sp++;
-        CHANNELEXTRACT_U8_31R(sd0, sd1, sd2, dd);
-        *dp++ = dd;
-      }
-
-      sp = sl = (mlib_d64 *) ((mlib_u8 *) sl + slb);
-      dp = dl = (mlib_d64 *) ((mlib_u8 *) dl + dlb);
-    }
-  }
-}
-
-/***************************************************************/
 void mlib_v_ImageChannelExtract_U8_31_D1(const mlib_u8 *src,
                                          mlib_u8       *dst,
                                          mlib_s32      dsize,
@@ -932,29 +678,6 @@
 }
 
 /***************************************************************/
-void mlib_v_ImageChannelExtract_U8_31(const mlib_u8 *src,
-                                      mlib_s32      slb,
-                                      mlib_u8       *dst,
-                                      mlib_s32      dlb,
-                                      mlib_s32      xsize,
-                                      mlib_s32      ysize,
-                                      mlib_s32      cmask)
-{
-  mlib_u8 *sa, *da;
-  mlib_u8 *sl, *dl;
-  mlib_s32 j;
-
-  sa = sl = (void *)src;
-  da = dl = dst;
-
-  for (j = 0; j < ysize; j++) {
-    mlib_v_ImageChannelExtract_U8_31_D1(sa, da, xsize, cmask);
-    sa = sl += slb;
-    da = dl += dlb;
-  }
-}
-
-/***************************************************************/
 #define CHANNELEXTRACT_U8_41L(sd0, sd1, sd2, sd3, dd)           \
   sda = vis_fpmerge(vis_read_hi(sd0), vis_read_hi(sd2));        \
   sdb = vis_fpmerge(vis_read_lo(sd0), vis_read_lo(sd2));        \
@@ -995,152 +718,6 @@
   dd  = vis_fpmerge(vis_read_lo(sde), vis_read_lo(sdf))
 
 /***************************************************************/
-void mlib_v_ImageChannelExtract_U8_41_A8D1X8(const mlib_u8 *src,
-                                             mlib_u8       *dst,
-                                             mlib_s32      dsize,
-                                             mlib_s32      cmask)
-{
-  mlib_d64 *sp, *dp;
-  mlib_d64 sd0, sd1, sd2, sd3;
-  mlib_d64 sda, sdb, sdc, sdd, sde, sdf;
-  mlib_d64 dd;
-  mlib_s32 i;
-
-  sp = (mlib_d64 *) src;
-  dp = (mlib_d64 *) dst;
-
-  if (cmask == 8) {
-#pragma pipeloop(0)
-    for (i = 0; i < dsize / 8; i++) {
-      sd0 = *sp++;
-      sd1 = *sp++;
-      sd2 = *sp++;
-      sd3 = *sp++;
-      CHANNELEXTRACT_U8_41L(sd0, sd1, sd2, sd3, dd);
-      *dp++ = dd;
-    }
-  }
-  else if (cmask == 4) {
-#pragma pipeloop(0)
-    for (i = 0; i < dsize / 8; i++) {
-      sd0 = *sp++;
-      sd1 = *sp++;
-      sd2 = *sp++;
-      sd3 = *sp++;
-      CHANNELEXTRACT_U8_41ML(sd0, sd1, sd2, sd3, dd);
-      *dp++ = dd;
-    }
-  }
-  else if (cmask == 2) {
-#pragma pipeloop(0)
-    for (i = 0; i < dsize / 8; i++) {
-      sd0 = *sp++;
-      sd1 = *sp++;
-      sd2 = *sp++;
-      sd3 = *sp++;
-      CHANNELEXTRACT_U8_41MR(sd0, sd1, sd2, sd3, dd);
-      *dp++ = dd;
-    }
-  }
-  else {
-#pragma pipeloop(0)
-    for (i = 0; i < dsize / 8; i++) {
-      sd0 = *sp++;
-      sd1 = *sp++;
-      sd2 = *sp++;
-      sd3 = *sp++;
-      CHANNELEXTRACT_U8_41R(sd0, sd1, sd2, sd3, dd);
-      *dp++ = dd;
-    }
-  }
-}
-
-/***************************************************************/
-void mlib_v_ImageChannelExtract_U8_41_A8D2X8(const mlib_u8 *src,
-                                             mlib_s32      slb,
-                                             mlib_u8       *dst,
-                                             mlib_s32      dlb,
-                                             mlib_s32      xsize,
-                                             mlib_s32      ysize,
-                                             mlib_s32      cmask)
-{
-  mlib_d64 *sp, *dp;
-  mlib_d64 *sl, *dl;
-  mlib_d64 sd0, sd1, sd2, sd3;
-  mlib_d64 sda, sdb, sdc, sdd, sde, sdf;
-  mlib_d64 dd;
-  mlib_s32 i, j;
-
-  sp = sl = (mlib_d64 *) src;
-  dp = dl = (mlib_d64 *) dst;
-
-  if (cmask == 8) {
-    for (j = 0; j < ysize; j++) {
-#pragma pipeloop(0)
-      for (i = 0; i < xsize / 8; i++) {
-        sd0 = *sp++;
-        sd1 = *sp++;
-        sd2 = *sp++;
-        sd3 = *sp++;
-        CHANNELEXTRACT_U8_41L(sd0, sd1, sd2, sd3, dd);
-        *dp++ = dd;
-      }
-
-      sp = sl = (mlib_d64 *) ((mlib_u8 *) sl + slb);
-      dp = dl = (mlib_d64 *) ((mlib_u8 *) dl + dlb);
-    }
-  }
-  else if (cmask == 4) {
-    for (j = 0; j < ysize; j++) {
-#pragma pipeloop(0)
-      for (i = 0; i < xsize / 8; i++) {
-        sd0 = *sp++;
-        sd1 = *sp++;
-        sd2 = *sp++;
-        sd3 = *sp++;
-        CHANNELEXTRACT_U8_41ML(sd0, sd1, sd2, sd3, dd);
-        *dp++ = dd;
-      }
-
-      sp = sl = (mlib_d64 *) ((mlib_u8 *) sl + slb);
-      dp = dl = (mlib_d64 *) ((mlib_u8 *) dl + dlb);
-    }
-  }
-  else if (cmask == 2) {
-    for (j = 0; j < ysize; j++) {
-#pragma pipeloop(0)
-      for (i = 0; i < xsize / 8; i++) {
-        sd0 = *sp++;
-        sd1 = *sp++;
-        sd2 = *sp++;
-        sd3 = *sp++;
-        CHANNELEXTRACT_U8_41MR(sd0, sd1, sd2, sd3, dd);
-        *dp++ = dd;
-      }
-
-      sp = sl = (mlib_d64 *) ((mlib_u8 *) sl + slb);
-      dp = dl = (mlib_d64 *) ((mlib_u8 *) dl + dlb);
-    }
-  }
-  else {
-    for (j = 0; j < ysize; j++) {
-#pragma pipeloop(0)
-      for (i = 0; i < xsize / 8; i++) {
-        sd0 = *sp++;
-        sd1 = *sp++;
-        sd2 = *sp++;
-        sd3 = *sp++;
-        CHANNELEXTRACT_U8_41R(sd0, sd1, sd2, sd3, dd);
-        *dp++ = dd;
-      }
-
-      sp = sl = (mlib_d64 *) ((mlib_u8 *) sl + slb);
-      dp = dl = (mlib_d64 *) ((mlib_u8 *) dl + dlb);
-    }
-  }
-}
-
-/***************************************************************/
 void mlib_v_ImageChannelExtract_U8_41_D1(const mlib_u8 *src,
                                          mlib_u8       *dst,
                                          mlib_s32      dsize,
@@ -1632,1560 +1209,3 @@
 }
 
 /***************************************************************/
-void mlib_v_ImageChannelExtract_U8_41(const mlib_u8 *src,
-                                      mlib_s32      slb,
-                                      mlib_u8       *dst,
-                                      mlib_s32      dlb,
-                                      mlib_s32      xsize,
-                                      mlib_s32      ysize,
-                                      mlib_s32      cmask)
-{
-  mlib_u8 *sa, *da;
-  mlib_u8 *sl, *dl;
-  mlib_s32 j;
-
-  sa = sl = (void *)src;
-  da = dl = dst;
-
-  for (j = 0; j < ysize; j++) {
-    mlib_v_ImageChannelExtract_U8_41_D1(sa, da, xsize, cmask);
-    sa = sl += slb;
-    da = dl += dlb;
-  }
-}
-
-/***************************************************************/
-#define CHANNELEXTRACT_S16_21L(sd0, sd1, dd)                    \
-  sda = vis_fpmerge(vis_read_hi(sd0), vis_read_hi(sd1));        \
-  sdb = vis_fpmerge(vis_read_lo(sd0), vis_read_lo(sd1));        \
-  sdc = vis_fpmerge(vis_read_hi(sda), vis_read_hi(sdb));        \
-  dd  = vis_fpmerge(vis_read_hi(sdc), vis_read_lo(sdc))
-
-/***************************************************************/
-#define CHANNELEXTRACT_S16_21R(sd0, sd1, dd)                    \
-  sda = vis_fpmerge(vis_read_hi(sd0), vis_read_hi(sd1));        \
-  sdb = vis_fpmerge(vis_read_lo(sd0), vis_read_lo(sd1));        \
-  sdc = vis_fpmerge(vis_read_lo(sda), vis_read_lo(sdb));        \
-  dd  = vis_fpmerge(vis_read_hi(sdc), vis_read_lo(sdc))
-
-/***************************************************************/
-/* extract one channel from a 2-channel image.
- * both source and destination image data are 8-byte aligned.
- * dsize is multiple of 4.
- */
-
-void mlib_v_ImageChannelExtract_S16_21_A8D1X4(const mlib_s16 *src,
-                                              mlib_s16       *dst,
-                                              mlib_s32       dsize,
-                                              mlib_s32       cmask)
-{
-  mlib_d64 *sp, *dp;
-  mlib_d64 sd0, sd1;
-  mlib_d64 sda, sdb, sdc;
-  mlib_d64 dd;
-  mlib_s32 i;
-
-  sp = (mlib_d64 *) src;
-  dp = (mlib_d64 *) dst;
-
-  if (cmask == 2) {
-#pragma pipeloop(0)
-    for (i = 0; i < dsize / 4; i++) {
-      sd0 = *sp++;
-      sd1 = *sp++;
-      CHANNELEXTRACT_S16_21L(sd0, sd1, dd);
-      *dp++ = dd;
-    }
-  }
-  else {
-#pragma pipeloop(0)
-    for (i = 0; i < dsize / 4; i++) {
-      sd0 = *sp++;
-      sd1 = *sp++;
-      CHANNELEXTRACT_S16_21R(sd0, sd1, dd);
-      *dp++ = dd;
-    }
-  }
-}
-
-/***************************************************************/
-void mlib_v_ImageChannelExtract_S16_21_A8D2X4(const mlib_s16 *src,
-                                              mlib_s32       slb,
-                                              mlib_s16       *dst,
-                                              mlib_s32       dlb,
-                                              mlib_s32       xsize,
-                                              mlib_s32       ysize,
-                                              mlib_s32       cmask)
-{
-  mlib_d64 *sp, *dp;
-  mlib_d64 *sl, *dl;
-  mlib_d64 sd0, sd1;
-  mlib_d64 sda, sdb, sdc;
-  mlib_d64 dd;
-  mlib_s32 i, j;
-
-  sp = sl = (mlib_d64 *) src;
-  dp = dl = (mlib_d64 *) dst;
-
-  if (cmask == 2) {
-    for (j = 0; j < ysize; j++) {
-#pragma pipeloop(0)
-      for (i = 0; i < xsize / 4; i++) {
-        sd0 = *sp++;
-        sd1 = *sp++;
-        CHANNELEXTRACT_S16_21L(sd0, sd1, dd);
-        *dp++ = dd;
-      }
-
-      sp = sl = (mlib_d64 *) ((mlib_u8 *) sl + slb);
-      dp = dl = (mlib_d64 *) ((mlib_u8 *) dl + dlb);
-    }
-  }
-  else {
-    for (j = 0; j < ysize; j++) {
-#pragma pipeloop(0)
-      for (i = 0; i < xsize / 4; i++) {
-        sd0 = *sp++;
-        sd1 = *sp++;
-        CHANNELEXTRACT_S16_21R(sd0, sd1, dd);
-        *dp++ = dd;
-      }
-
-      sp = sl = (mlib_d64 *) ((mlib_u8 *) sl + slb);
-      dp = dl = (mlib_d64 *) ((mlib_u8 *) dl + dlb);
-    }
-  }
-}
-
-/***************************************************************/
-void mlib_v_ImageChannelExtract_S16_21_D1(const mlib_s16 *src,
-                                          mlib_s16       *dst,
-                                          mlib_s32       dsize,
-                                          mlib_s32       cmask)
-{
-  mlib_s16 *sa, *da;
-  mlib_s16 *dend, *dend2;                             /* end points in dst */
-  mlib_d64 *dp;                                       /* 8-byte aligned start points in dst */
-  mlib_d64 *sp;                                       /* 8-byte aligned start point in src */
-  mlib_d64 sd0, sd1, sd2, sd3;                        /* 8-byte source data */
-  mlib_d64 sda, sdb, sdc;
-  mlib_d64 dd0, dd1;
-  mlib_s32 soff;                                      /* offset of address in src */
-  mlib_s32 doff;                                      /* offset of address in dst */
-  mlib_s32 off;                                       /* offset of dst over src */
-  mlib_s32 emask;                                     /* edge mask */
-  mlib_s32 i, n;
-
-  sa = (void *)src;
-  da = dst;
-
-  /* prepare the source address */
-  sp = (mlib_d64 *) ((mlib_addr) sa & (~7));
-  soff = ((mlib_addr) sa & 7);
-
-  /* prepare the destination addresses */
-  dp = (mlib_d64 *) ((mlib_addr) da & (~7));
-  doff = ((mlib_addr) da & 7);
-  dend = da + dsize - 1;
-  dend2 = dend - 3;
-
-  /* calculate the src's offset over dst */
-  if (cmask == 2) {
-    off = (soff / 4) * 2 - doff;
-  }
-  else {
-    off = ((soff + 3) / 4) * 2 - doff;
-  }
-
-  if (((cmask == 2) && (soff % 4 == 0)) || ((cmask == 1) && (soff % 4 != 0))) { /* extract even words */
-
-    if (off == 0) {                         /* src and dst have same alignment */
-
-      /* generate edge mask for the start point */
-      emask = vis_edge16(da, dend);
-
-      /* load 16 bytes */
-      sd0 = *sp++;
-      sd1 = *sp++;
-
-      /* extract, including some garbage at the start point */
-      CHANNELEXTRACT_S16_21L(sd0, sd1, dd0);
-
-      /* store 8 bytes result */
-      vis_pst_16(dd0, dp++, emask);
-
-      if ((mlib_addr) dp <= (mlib_addr) dend2) {
-        n = ((mlib_u8 *) dend2 - (mlib_u8 *) dp) / 8 + 1;
-
-        /* 8-pixel column loop, emask not needed */
-#pragma pipeloop(0)
-        for (i = 0; i < n; i++) {
-          sd0 = *sp++;
-          sd1 = *sp++;
-          CHANNELEXTRACT_S16_21L(sd0, sd1, dd0);
-          *dp++ = dd0;
-        }
-      }
-
-      /* end point handling */
-      if ((mlib_addr) dp <= (mlib_addr) dend) {
-        emask = vis_edge16(dp, dend);
-        sd0 = *sp++;
-        sd1 = *sp++;
-        CHANNELEXTRACT_S16_21L(sd0, sd1, dd0);
-        vis_pst_16(dd0, dp++, emask);
-      }
-    }
-    else {
-      vis_alignaddr((void *)0, off);
-
-      /* generate edge mask for the start point */
-      emask = vis_edge16(da, dend);
-
-      if (off < 0) {
-        /* load 16 bytes */
-        sd2 = *sp++;
-        sd3 = *sp++;
-
-        /* extract and store 8 bytes */
-        CHANNELEXTRACT_S16_21L(sd2, sd3, dd1);
-        vis_pst_16(vis_faligndata(dd1, dd1), dp++, emask);
-      }
-      else {
-        /* load 32 bytes */
-        sd0 = *sp++;
-        sd1 = *sp++;
-        sd2 = *sp++;
-        sd3 = *sp++;
-
-        /* extract and store 8 bytes */
-        CHANNELEXTRACT_S16_21L(sd0, sd1, dd0);
-        CHANNELEXTRACT_S16_21L(sd2, sd3, dd1);
-        vis_pst_16(vis_faligndata(dd0, dd1), dp++, emask);
-      }
-
-      if ((mlib_addr) dp <= (mlib_addr) dend2) {
-        n = ((mlib_u8 *) dend2 - (mlib_u8 *) dp) / 8 + 1;
-
-        /* 8-pixel column loop, emask not needed */
-#pragma pipeloop(0)
-        for (i = 0; i < n; i++) {
-          dd0 = dd1;
-          sd2 = *sp++;
-          sd3 = *sp++;
-          CHANNELEXTRACT_S16_21L(sd2, sd3, dd1);
-          *dp++ = vis_faligndata(dd0, dd1);
-        }
-      }
-
-      /* end point handling */
-      if ((mlib_addr) dp <= (mlib_addr) dend) {
-        emask = vis_edge16(dp, dend);
-        dd0 = dd1;
-        sd2 = *sp++;
-        sd3 = *sp++;
-        CHANNELEXTRACT_S16_21L(sd2, sd3, dd1);
-        vis_pst_16(vis_faligndata(dd0, dd1), dp++, emask);
-      }
-    }
-  }
-  else {                                    /* extract odd words */
-
-    if (off == 0) {                         /* src and dst have same alignment */
-
-      /* generate edge mask for the start point */
-      emask = vis_edge16(da, dend);
-
-      /* load 16 bytes, don't care the garbage at the start point */
-      sd0 = *sp++;
-      sd1 = *sp++;
-
-      /* extract and store 8 bytes */
-      CHANNELEXTRACT_S16_21R(sd0, sd1, dd0);
-      vis_pst_16(dd0, dp++, emask);
-
-      if ((mlib_addr) dp <= (mlib_addr) dend2) {
-        n = ((mlib_u8 *) dend2 - (mlib_u8 *) dp) / 8 + 1;
-
-        /* 8-pixel column loop, emask not needed */
-#pragma pipeloop(0)
-        for (i = 0; i < n; i++) {
-          sd0 = *sp++;
-          sd1 = *sp++;
-          CHANNELEXTRACT_S16_21R(sd0, sd1, dd0);
-          *dp++ = dd0;
-        }
-      }
-
-      /* end point handling */
-      if ((mlib_addr) dp <= (mlib_addr) dend) {
-        emask = vis_edge16(dp, dend);
-        sd0 = *sp++;
-        sd1 = *sp++;
-        CHANNELEXTRACT_S16_21R(sd0, sd1, dd0);
-        vis_pst_16(dd0, dp++, emask);
-      }
-    }
-    else {
-      vis_alignaddr((void *)0, off);
-
-      /* generate edge mask for the start point */
-      emask = vis_edge16(da, dend);
-
-      if (off < 0) {
-        /* load 16 bytes */
-        sd2 = *sp++;
-        sd3 = *sp++;
-
-        /* extract and store 8 bytes */
-        CHANNELEXTRACT_S16_21R(sd2, sd3, dd1);
-        vis_pst_16(vis_faligndata(dd1, dd1), dp++, emask);
-      }
-      else {
-        /* load 32 bytes */
-        sd0 = *sp++;
-        sd1 = *sp++;
-        sd2 = *sp++;
-        sd3 = *sp++;
-
-        /* extract and store 8 bytes */
-        CHANNELEXTRACT_S16_21R(sd0, sd1, dd0);
-        CHANNELEXTRACT_S16_21R(sd2, sd3, dd1);
-        vis_pst_16(vis_faligndata(dd0, dd1), dp++, emask);
-      }
-
-      if ((mlib_addr) dp <= (mlib_addr) dend2) {
-        n = ((mlib_u8 *) dend2 - (mlib_u8 *) dp) / 8 + 1;
-
-        /* 8-pixel column loop, emask not needed */
-#pragma pipeloop(0)
-        for (i = 0; i < n; i++) {
-          dd0 = dd1;
-          sd2 = *sp++;
-          sd3 = *sp++;
-          CHANNELEXTRACT_S16_21R(sd2, sd3, dd1);
-          *dp++ = vis_faligndata(dd0, dd1);
-        }
-      }
-
-      /* end point handling */
-      if ((mlib_addr) dp <= (mlib_addr) dend) {
-        emask = vis_edge16(dp, dend);
-        dd0 = dd1;
-        sd2 = *sp++;
-        sd3 = *sp++;
-        CHANNELEXTRACT_S16_21R(sd2, sd3, dd1);
-        vis_pst_16(vis_faligndata(dd0, dd1), dp++, emask);
-      }
-    }
-  }
-}
-
-/***************************************************************/
-void mlib_v_ImageChannelExtract_S16_21(const mlib_s16 *src,
-                                       mlib_s32       slb,
-                                       mlib_s16       *dst,
-                                       mlib_s32       dlb,
-                                       mlib_s32       xsize,
-                                       mlib_s32       ysize,
-                                       mlib_s32       cmask)
-{
-  mlib_s16 *sa, *da;
-  mlib_s16 *sl, *dl;
-  mlib_s32 j;
-
-  sa = sl = (void *)src;
-  da = dl = dst;
-
-  for (j = 0; j < ysize; j++) {
-    mlib_v_ImageChannelExtract_S16_21_D1(sa, da, xsize, cmask);
-    sa = sl = (mlib_s16 *) ((mlib_u8 *) sl + slb);
-    da = dl = (mlib_s16 *) ((mlib_u8 *) dl + dlb);
-  }
-}
-
-/***************************************************************/
-#define CHANNELEXTRACT_S16_31L(sd0, sd1, sd2, dd)               \
-  /* extract the left channel */                                \
-  sda = vis_fpmerge(vis_read_hi(sd0), vis_read_lo(sd1));        \
-  sdb = vis_fpmerge(vis_read_lo(sd0), vis_read_hi(sd2));        \
-  sdc = vis_fpmerge(vis_read_hi(sda), vis_read_lo(sdb));        \
-  dd  = vis_fpmerge(vis_read_hi(sdc), vis_read_lo(sdc))
-
-/***************************************************************/
-#define CHANNELEXTRACT_S16_31M(sd0, sd1, sd2, dd)               \
-  /* extract the middle channel */                              \
-  sda = vis_fpmerge(vis_read_hi(sd0), vis_read_lo(sd1));        \
-  sdb = vis_fpmerge(vis_read_hi(sd1), vis_read_lo(sd2));        \
-  sdc = vis_fpmerge(vis_read_lo(sda), vis_read_hi(sdb));        \
-  dd  = vis_fpmerge(vis_read_hi(sdc), vis_read_lo(sdc))
-
-/***************************************************************/
-#define CHANNELEXTRACT_S16_31R(sd0, sd1, sd2, dd)               \
-  /* extract the right channel */                               \
-  sda = vis_fpmerge(vis_read_lo(sd0), vis_read_hi(sd2));        \
-  sdb = vis_fpmerge(vis_read_hi(sd1), vis_read_lo(sd2));        \
-  sdc = vis_fpmerge(vis_read_hi(sda), vis_read_lo(sdb));        \
-  dd  = vis_fpmerge(vis_read_hi(sdc), vis_read_lo(sdc))
-
-/***************************************************************/
-void mlib_v_ImageChannelExtract_S16_31_A8D1X4(const mlib_s16 *src,
-                                              mlib_s16       *dst,
-                                              mlib_s32       dsize,
-                                              mlib_s32       cmask)
-{
-  mlib_d64 *sp, *dp;
-  mlib_d64 sd0, sd1, sd2;
-  mlib_d64 sda, sdb, sdc;
-  mlib_d64 dd;
-  mlib_s32 i;
-
-  sp = (mlib_d64 *) src;
-  dp = (mlib_d64 *) dst;
-
-  if (cmask == 4) {
-#pragma pipeloop(0)
-    for (i = 0; i < dsize / 4; i++) {
-      sd0 = *sp++;
-      sd1 = *sp++;
-      sd2 = *sp++;
-      CHANNELEXTRACT_S16_31L(sd0, sd1, sd2, dd);
-      *dp++ = dd;
-    }
-  }
-  else if (cmask == 2) {
-#pragma pipeloop(0)
-    for (i = 0; i < dsize / 4; i++) {
-      sd0 = *sp++;
-      sd1 = *sp++;
-      sd2 = *sp++;
-      CHANNELEXTRACT_S16_31M(sd0, sd1, sd2, dd);
-      *dp++ = dd;
-    }
-  }
-  else {
-#pragma pipeloop(0)
-    for (i = 0; i < dsize / 4; i++) {
-      sd0 = *sp++;
-      sd1 = *sp++;
-      sd2 = *sp++;
-      CHANNELEXTRACT_S16_31R(sd0, sd1, sd2, dd);
-      *dp++ = dd;
-    }
-  }
-}
-
-/***************************************************************/
-void mlib_v_ImageChannelExtract_S16_31_A8D2X4(const mlib_s16 *src,
-                                              mlib_s32       slb,
-                                              mlib_s16       *dst,
-                                              mlib_s32       dlb,
-                                              mlib_s32       xsize,
-                                              mlib_s32       ysize,
-                                              mlib_s32       cmask)
-{
-  mlib_d64 *sp, *dp;
-  mlib_d64 *sl, *dl;
-  mlib_d64 sd0, sd1, sd2;
-  mlib_d64 sda, sdb, sdc;
-  mlib_d64 dd;
-  mlib_s32 i, j;
-
-  sp = sl = (mlib_d64 *) src;
-  dp = dl = (mlib_d64 *) dst;
-
-  if (cmask == 4) {
-    for (j = 0; j < ysize; j++) {
-#pragma pipeloop(0)
-      for (i = 0; i < xsize / 4; i++) {
-        sd0 = *sp++;
-        sd1 = *sp++;
-        sd2 = *sp++;
-        CHANNELEXTRACT_S16_31L(sd0, sd1, sd2, dd);
-        *dp++ = dd;
-      }
-
-      sp = sl = (mlib_d64 *) ((mlib_u8 *) sl + slb);
-      dp = dl = (mlib_d64 *) ((mlib_u8 *) dl + dlb);
-    }
-  }
-  else if (cmask == 2) {
-    for (j = 0; j < ysize; j++) {
-#pragma pipeloop(0)
-      for (i = 0; i < xsize / 4; i++) {
-        sd0 = *sp++;
-        sd1 = *sp++;
-        sd2 = *sp++;
-        CHANNELEXTRACT_S16_31M(sd0, sd1, sd2, dd);
-        *dp++ = dd;
-      }
-
-      sp = sl = (mlib_d64 *) ((mlib_u8 *) sl + slb);
-      dp = dl = (mlib_d64 *) ((mlib_u8 *) dl + dlb);
-    }
-  }
-  else {
-    for (j = 0; j < ysize; j++) {
-#pragma pipeloop(0)
-      for (i = 0; i < xsize / 4; i++) {
-        sd0 = *sp++;
-        sd1 = *sp++;
-        sd2 = *sp++;
-        CHANNELEXTRACT_S16_31R(sd0, sd1, sd2, dd);
-        *dp++ = dd;
-      }
-
-      sp = sl = (mlib_d64 *) ((mlib_u8 *) sl + slb);
-      dp = dl = (mlib_d64 *) ((mlib_u8 *) dl + dlb);
-    }
-  }
-}
-
-/***************************************************************/
-void mlib_v_ImageChannelExtract_S16_31_D1(const mlib_s16 *src,
-                                          mlib_s16       *dst,
-                                          mlib_s32       dsize,
-                                          mlib_s32       cmask)
-{
-  mlib_s16 *sa, *da;
-  mlib_s16 *dend, *dend2;                             /* end points in dst */
-  mlib_d64 *dp;                                       /* 8-byte aligned start points in dst */
-  mlib_d64 *sp;                                       /* 8-byte aligned start point in src */
-  mlib_d64 sd0, sd1, sd2;                             /* 8-byte source data */
-  mlib_d64 sd3, sd4, sd5;
-  mlib_d64 sda, sdb, sdc;
-  mlib_d64 dd0, dd1;
-  mlib_s32 soff;                                      /* offset of address in src */
-  mlib_s32 doff;                                      /* offset of address in dst */
-  mlib_s32 off;                                       /* offset of src over dst */
-  mlib_s32 emask;                                     /* edge mask */
-  mlib_s32 i, n;
-
-  sa = (void *)src;
-  da = dst;
-
-  /* prepare the source address */
-  sp = (mlib_d64 *) ((mlib_addr) sa & (~7));
-  soff = ((mlib_addr) sa & 7);
-
-  /* prepare the destination addresses */
-  dp = (mlib_d64 *) ((mlib_addr) da & (~7));
-  doff = ((mlib_addr) da & 7);
-  dend = da + dsize - 1;
-  dend2 = dend - 3;
-
-  /* calculate the src's offset over dst */
-  if (cmask == 4) {
-    off = (soff / 6) * 2 - doff;
-  }
-  else if (cmask == 2) {
-    off = ((soff + 2) / 6) * 2 - doff;
-  }
-  else {
-    off = ((soff + 4) / 6) * 2 - doff;
-  }
-
-  if (((cmask == 4) && (soff % 6 == 0)) ||
-      ((cmask == 2) && (soff % 6 == 4)) ||
-      ((cmask == 1) && (soff % 6 == 2))) { /* extract left channel */
-
-    if (off == 0) {                         /* src and dst have same alignment */
-
-      /* generate edge mask for the start point */
-      emask = vis_edge16(da, dend);
-
-      /* load 16 bytes */
-      sd0 = *sp++;
-      sd1 = *sp++;
-      sd2 = *sp++;
-
-      /* extract, including some garbage at the start point */
-      CHANNELEXTRACT_S16_31L(sd0, sd1, sd2, dd0);
-
-      /* store 8 bytes result */
-      vis_pst_16(dd0, dp++, emask);
-
-      if ((mlib_addr) dp <= (mlib_addr) dend2) {
-        n = ((mlib_u8 *) dend2 - (mlib_u8 *) dp) / 8 + 1;
-
-        /* 8-pixel column loop, emask not needed */
-#pragma pipeloop(0)
-        for (i = 0; i < n; i++) {
-          sd0 = *sp++;
-          sd1 = *sp++;
-          sd2 = *sp++;
-          CHANNELEXTRACT_S16_31L(sd0, sd1, sd2, dd0);
-          *dp++ = dd0;
-        }
-      }
-
-      /* end point handling */
-      if ((mlib_addr) dp <= (mlib_addr) dend) {
-        emask = vis_edge16(dp, dend);
-        sd0 = *sp++;
-        sd1 = *sp++;
-        sd2 = *sp++;
-        CHANNELEXTRACT_S16_31L(sd0, sd1, sd2, dd0);
-        vis_pst_16(dd0, dp++, emask);
-      }
-    }
-    else {
-      vis_alignaddr((void *)0, off);
-
-      /* generate edge mask for the start point */
-      emask = vis_edge16(da, dend);
-
-      if (off < 0) {
-        /* load 24 bytes */
-        sd3 = *sp++;
-        sd4 = *sp++;
-        sd5 = *sp++;
-
-        /* extract and store 8 bytes */
-        CHANNELEXTRACT_S16_31L(sd3, sd4, sd5, dd1);
-        vis_pst_16(vis_faligndata(dd1, dd1), dp++, emask);
-      }
-      else {
-        /* load 48 bytes */
-        sd0 = *sp++;
-        sd1 = *sp++;
-        sd2 = *sp++;
-        sd3 = *sp++;
-        sd4 = *sp++;
-        sd5 = *sp++;
-
-        /* extract and store 8 bytes */
-        CHANNELEXTRACT_S16_31L(sd0, sd1, sd2, dd0);
-        CHANNELEXTRACT_S16_31L(sd3, sd4, sd5, dd1);
-        vis_pst_16(vis_faligndata(dd0, dd1), dp++, emask);
-      }
-
-      if ((mlib_addr) dp <= (mlib_addr) dend2) {
-        n = ((mlib_u8 *) dend2 - (mlib_u8 *) dp) / 8 + 1;
-
-        /* 8-pixel column loop, emask not needed */
-#pragma pipeloop(0)
-        for (i = 0; i < n; i++) {
-          dd0 = dd1;
-          sd3 = *sp++;
-          sd4 = *sp++;
-          sd5 = *sp++;
-          CHANNELEXTRACT_S16_31L(sd3, sd4, sd5, dd1);
-          *dp++ = vis_faligndata(dd0, dd1);
-        }
-      }
-
-      /* end point handling */
-      if ((mlib_addr) dp <= (mlib_addr) dend) {
-        emask = vis_edge16(dp, dend);
-        dd0 = dd1;
-        sd3 = *sp++;
-        sd4 = *sp++;
-        sd5 = *sp++;
-        CHANNELEXTRACT_S16_31L(sd3, sd4, sd5, dd1);
-        vis_pst_16(vis_faligndata(dd0, dd1), dp++, emask);
-      }
-    }
-  }
-  else if (((cmask == 4) && (soff % 6 == 2)) ||
-           ((cmask == 2) && (soff % 6 == 0)) ||
-           ((cmask == 1) && (soff % 6 == 4))) {
-    /* extract middle channel */
-
-    if (off == 0) {                         /* src and dst have same alignment */
-
-      /* generate edge mask for the start point */
-      emask = vis_edge16(da, dend);
-
-      /* load 16 bytes */
-      sd0 = *sp++;
-      sd1 = *sp++;
-      sd2 = *sp++;
-
-      /* extract, including some garbage at the start point */
-      CHANNELEXTRACT_S16_31M(sd0, sd1, sd2, dd0);
-
-      /* store 8 bytes result */
-      vis_pst_16(dd0, dp++, emask);
-
-      if ((mlib_addr) dp <= (mlib_addr) dend2) {
-        n = ((mlib_u8 *) dend2 - (mlib_u8 *) dp) / 8 + 1;
-
-        /* 8-pixel column loop, emask not needed */
-#pragma pipeloop(0)
-        for (i = 0; i < n; i++) {
-          sd0 = *sp++;
-          sd1 = *sp++;
-          sd2 = *sp++;
-          CHANNELEXTRACT_S16_31M(sd0, sd1, sd2, dd0);
-          *dp++ = dd0;
-        }
-      }
-
-      /* end point handling */
-      if ((mlib_addr) dp <= (mlib_addr) dend) {
-        emask = vis_edge16(dp, dend);
-        sd0 = *sp++;
-        sd1 = *sp++;
-        sd2 = *sp++;
-        CHANNELEXTRACT_S16_31M(sd0, sd1, sd2, dd0);
-        vis_pst_16(dd0, dp++, emask);
-      }
-    }
-    else {
-      vis_alignaddr((void *)0, off);
-
-      /* generate edge mask for the start point */
-      emask = vis_edge16(da, dend);
-
-      if (off < 0) {
-        /* load 24 bytes */
-        sd3 = *sp++;
-        sd4 = *sp++;
-        sd5 = *sp++;
-
-        /* extract and store 8 bytes */
-        CHANNELEXTRACT_S16_31M(sd3, sd4, sd5, dd1);
-        vis_pst_16(vis_faligndata(dd1, dd1), dp++, emask);
-      }
-      else {
-        /* load 48 bytes */
-        sd0 = *sp++;
-        sd1 = *sp++;
-        sd2 = *sp++;
-        sd3 = *sp++;
-        sd4 = *sp++;
-        sd5 = *sp++;
-
-        /* extract and store 8 bytes */
-        CHANNELEXTRACT_S16_31M(sd0, sd1, sd2, dd0);
-        CHANNELEXTRACT_S16_31M(sd3, sd4, sd5, dd1);
-        vis_pst_16(vis_faligndata(dd0, dd1), dp++, emask);
-      }
-
-      if ((mlib_addr) dp <= (mlib_addr) dend2) {
-        n = ((mlib_u8 *) dend2 - (mlib_u8 *) dp) / 8 + 1;
-
-        /* 8-pixel column loop, emask not needed */
-#pragma pipeloop(0)
-        for (i = 0; i < n; i++) {
-          dd0 = dd1;
-          sd3 = *sp++;
-          sd4 = *sp++;
-          sd5 = *sp++;
-          CHANNELEXTRACT_S16_31M(sd3, sd4, sd5, dd1);
-          *dp++ = vis_faligndata(dd0, dd1);
-        }
-      }
-
-      /* end point handling */
-      if ((mlib_addr) dp <= (mlib_addr) dend) {
-        emask = vis_edge16(dp, dend);
-        dd0 = dd1;
-        sd3 = *sp++;
-        sd4 = *sp++;
-        sd5 = *sp++;
-        CHANNELEXTRACT_S16_31M(sd3, sd4, sd5, dd1);
-        vis_pst_16(vis_faligndata(dd0, dd1), dp++, emask);
-      }
-    }
-  }
-  else {                                    /* extract right channel */
-
-    if (off == 0) {                         /* src and dst have same alignment */
-
-      /* generate edge mask for the start point */
-      emask = vis_edge16(da, dend);
-
-      /* load 16 bytes */
-      sd0 = *sp++;
-      sd1 = *sp++;
-      sd2 = *sp++;
-
-      /* extract, including some garbage at the start point */
-      CHANNELEXTRACT_S16_31R(sd0, sd1, sd2, dd0);
-
-      /* store 8 bytes result */
-      vis_pst_16(dd0, dp++, emask);
-
-      if ((mlib_addr) dp <= (mlib_addr) dend2) {
-        n = ((mlib_u8 *) dend2 - (mlib_u8 *) dp) / 8 + 1;
-
-        /* 8-pixel column loop, emask not needed */
-#pragma pipeloop(0)
-        for (i = 0; i < n; i++) {
-          sd0 = *sp++;
-          sd1 = *sp++;
-          sd2 = *sp++;
-          CHANNELEXTRACT_S16_31R(sd0, sd1, sd2, dd0);
-          *dp++ = dd0;
-        }
-      }
-
-      /* end point handling */
-      if ((mlib_addr) dp <= (mlib_addr) dend) {
-        emask = vis_edge16(dp, dend);
-        sd0 = *sp++;
-        sd1 = *sp++;
-        sd2 = *sp++;
-        CHANNELEXTRACT_S16_31R(sd0, sd1, sd2, dd0);
-        vis_pst_16(dd0, dp++, emask);
-      }
-    }
-    else {
-      vis_alignaddr((void *)0, off);
-
-      /* generate edge mask for the start point */
-      emask = vis_edge16(da, dend);
-
-      if (off < 0) {
-        /* load 24 bytes */
-        sd3 = *sp++;
-        sd4 = *sp++;
-        sd5 = *sp++;
-
-        /* extract and store 8 bytes */
-        CHANNELEXTRACT_S16_31R(sd3, sd4, sd5, dd1);
-        vis_pst_16(vis_faligndata(dd1, dd1), dp++, emask);
-      }
-      else {
-        /* load 48 bytes */
-        sd0 = *sp++;
-        sd1 = *sp++;
-        sd2 = *sp++;
-        sd3 = *sp++;
-        sd4 = *sp++;
-        sd5 = *sp++;
-
-        /* extract and store 8 bytes */
-        CHANNELEXTRACT_S16_31R(sd0, sd1, sd2, dd0);
-        CHANNELEXTRACT_S16_31R(sd3, sd4, sd5, dd1);
-        vis_pst_16(vis_faligndata(dd0, dd1), dp++, emask);
-      }
-
-      if ((mlib_addr) dp <= (mlib_addr) dend2) {
-        n = ((mlib_u8 *) dend2 - (mlib_u8 *) dp) / 8 + 1;
-
-        /* 8-pixel column loop, emask not needed */
-#pragma pipeloop(0)
-        for (i = 0; i < n; i++) {
-          dd0 = dd1;
-          sd3 = *sp++;
-          sd4 = *sp++;
-          sd5 = *sp++;
-          CHANNELEXTRACT_S16_31R(sd3, sd4, sd5, dd1);
-          *dp++ = vis_faligndata(dd0, dd1);
-        }
-      }
-
-      /* end point handling */
-      if ((mlib_addr) dp <= (mlib_addr) dend) {
-        emask = vis_edge16(dp, dend);
-        dd0 = dd1;
-        sd3 = *sp++;
-        sd4 = *sp++;
-        sd5 = *sp++;
-        CHANNELEXTRACT_S16_31R(sd3, sd4, sd5, dd1);
-        vis_pst_16(vis_faligndata(dd0, dd1), dp++, emask);
-      }
-    }
-  }
-}
-
-/***************************************************************/
-void mlib_v_ImageChannelExtract_S16_31(const mlib_s16 *src,
-                                       mlib_s32       slb,
-                                       mlib_s16       *dst,
-                                       mlib_s32       dlb,
-                                       mlib_s32       xsize,
-                                       mlib_s32       ysize,
-                                       mlib_s32       cmask)
-{
-  mlib_s16 *sa, *da;
-  mlib_s16 *sl, *dl;
-  mlib_s32 j;
-
-  sa = sl = (void *)src;
-  da = dl = dst;
-
-  for (j = 0; j < ysize; j++) {
-    mlib_v_ImageChannelExtract_S16_31_D1(sa, da, xsize, cmask);
-    sa = sl = (mlib_s16 *) ((mlib_u8 *) sl + slb);
-    da = dl = (mlib_s16 *) ((mlib_u8 *) dl + dlb);
-  }
-}
-
-/***************************************************************/
-#define CHANNELEXTRACT_S16_41L(sd0, sd1,  sd2, sd3, dd)         \
-  /* extract the left channel */                                \
-  sda = vis_fpmerge(vis_read_hi(sd0), vis_read_hi(sd2));        \
-  sdb = vis_fpmerge(vis_read_hi(sd1), vis_read_hi(sd3));        \
-  sdc = vis_fpmerge(vis_read_hi(sda), vis_read_hi(sdb));        \
-  dd  = vis_fpmerge(vis_read_hi(sdc), vis_read_lo(sdc))
-
-/***************************************************************/
-#define CHANNELEXTRACT_S16_41ML(sd0, sd1, sd2, sd3, dd)         \
-  /* extract the middle left channel */                         \
-  sda = vis_fpmerge(vis_read_hi(sd0), vis_read_hi(sd2));        \
-  sdb = vis_fpmerge(vis_read_hi(sd1), vis_read_hi(sd3));        \
-  sdc = vis_fpmerge(vis_read_lo(sda), vis_read_lo(sdb));        \
-  dd  = vis_fpmerge(vis_read_hi(sdc), vis_read_lo(sdc))
-
-/***************************************************************/
-#define CHANNELEXTRACT_S16_41MR(sd0, sd1, sd2, sd3, dd)         \
-  /* extract the middle right channel */                        \
-  sda = vis_fpmerge(vis_read_lo(sd0), vis_read_lo(sd2));        \
-  sdb = vis_fpmerge(vis_read_lo(sd1), vis_read_lo(sd3));        \
-  sdc = vis_fpmerge(vis_read_hi(sda), vis_read_hi(sdb));        \
-  dd  = vis_fpmerge(vis_read_hi(sdc), vis_read_lo(sdc))
-
-/***************************************************************/
-#define CHANNELEXTRACT_S16_41R(sd0, sd1, sd2, sd3, dd)          \
-  /* extract the right channel */                               \
-  sda = vis_fpmerge(vis_read_lo(sd0), vis_read_lo(sd2));        \
-  sdb = vis_fpmerge(vis_read_lo(sd1), vis_read_lo(sd3));        \
-  sdc = vis_fpmerge(vis_read_lo(sda), vis_read_lo(sdb));        \
-  dd  = vis_fpmerge(vis_read_hi(sdc), vis_read_lo(sdc))
-
-/***************************************************************/
-void mlib_v_ImageChannelExtract_S16_41_A8D1X4(const mlib_s16 *src,
-                                              mlib_s16       *dst,
-                                              mlib_s32       dsize,
-                                              mlib_s32       cmask)
-{
-  mlib_d64 *sp, *dp;
-  mlib_d64 sd0, sd1, sd2, sd3;
-  mlib_d64 sda, sdb, sdc;
-  mlib_d64 dd;
-  mlib_s32 i;
-
-  sp = (mlib_d64 *) src;
-  dp = (mlib_d64 *) dst;
-
-  if (cmask == 8) {
-#pragma pipeloop(0)
-    for (i = 0; i < dsize / 4; i++) {
-      sd0 = *sp++;
-      sd1 = *sp++;
-      sd2 = *sp++;
-      sd3 = *sp++;
-      CHANNELEXTRACT_S16_41L(sd0, sd1, sd2, sd3, dd);
-      *dp++ = dd;
-    }
-  }
-  else if (cmask == 4) {
-#pragma pipeloop(0)
-    for (i = 0; i < dsize / 4; i++) {
-      sd0 = *sp++;
-      sd1 = *sp++;
-      sd2 = *sp++;
-      sd3 = *sp++;
-      CHANNELEXTRACT_S16_41ML(sd0, sd1, sd2, sd3, dd);
-      *dp++ = dd;
-    }
-  }
-  else if (cmask == 2) {
-#pragma pipeloop(0)
-    for (i = 0; i < dsize / 4; i++) {
-      sd0 = *sp++;
-      sd1 = *sp++;
-      sd2 = *sp++;
-      sd3 = *sp++;
-      CHANNELEXTRACT_S16_41MR(sd0, sd1, sd2, sd3, dd);
-      *dp++ = dd;
-    }
-  }
-  else {
-#pragma pipeloop(0)
-    for (i = 0; i < dsize / 4; i++) {
-      sd0 = *sp++;
-      sd1 = *sp++;
-      sd2 = *sp++;
-      sd3 = *sp++;
-      CHANNELEXTRACT_S16_41R(sd0, sd1, sd2, sd3, dd);
-      *dp++ = dd;
-    }
-  }
-}
-
-/***************************************************************/
-void mlib_v_ImageChannelExtract_S16_41_A8D2X4(const mlib_s16 *src,
-                                              mlib_s32       slb,
-                                              mlib_s16       *dst,
-                                              mlib_s32       dlb,
-                                              mlib_s32       xsize,
-                                              mlib_s32       ysize,
-                                              mlib_s32       cmask)
-{
-  mlib_d64 *sp, *dp;
-  mlib_d64 *sl, *dl;
-  mlib_d64 sd0, sd1, sd2, sd3;
-  mlib_d64 sda, sdb, sdc;
-  mlib_d64 dd;
-  mlib_s32 i, j;
-
-  sp = sl = (mlib_d64 *) src;
-  dp = dl = (mlib_d64 *) dst;
-
-  if (cmask == 8) {
-    for (j = 0; j < ysize; j++) {
-#pragma pipeloop(0)
-      for (i = 0; i < xsize / 4; i++) {
-        sd0 = *sp++;
-        sd1 = *sp++;
-        sd2 = *sp++;
-        sd3 = *sp++;
-        CHANNELEXTRACT_S16_41L(sd0, sd1, sd2, sd3, dd);
-        *dp++ = dd;
-      }
-
-      sp = sl = (mlib_d64 *) ((mlib_u8 *) sl + slb);
-      dp = dl = (mlib_d64 *) ((mlib_u8 *) dl + dlb);
-    }
-  }
-  else if (cmask == 4) {
-    for (j = 0; j < ysize; j++) {
-#pragma pipeloop(0)
-      for (i = 0; i < xsize / 4; i++) {
-        sd0 = *sp++;
-        sd1 = *sp++;
-        sd2 = *sp++;
-        sd3 = *sp++;
-        CHANNELEXTRACT_S16_41ML(sd0, sd1, sd2, sd3, dd);
-        *dp++ = dd;
-      }
-
-      sp = sl = (mlib_d64 *) ((mlib_u8 *) sl + slb);
-      dp = dl = (mlib_d64 *) ((mlib_u8 *) dl + dlb);
-    }
-  }
-  else if (cmask == 2) {
-    for (j = 0; j < ysize; j++) {
-#pragma pipeloop(0)
-      for (i = 0; i < xsize / 4; i++) {
-        sd0 = *sp++;
-        sd1 = *sp++;
-        sd2 = *sp++;
-        sd3 = *sp++;
-        CHANNELEXTRACT_S16_41MR(sd0, sd1, sd2, sd3, dd);
-        *dp++ = dd;
-      }
-
-      sp = sl = (mlib_d64 *) ((mlib_u8 *) sl + slb);
-      dp = dl = (mlib_d64 *) ((mlib_u8 *) dl + dlb);
-    }
-  }
-  else {
-    for (j = 0; j < ysize; j++) {
-#pragma pipeloop(0)
-      for (i = 0; i < xsize / 4; i++) {
-        sd0 = *sp++;
-        sd1 = *sp++;
-        sd2 = *sp++;
-        sd3 = *sp++;
-        CHANNELEXTRACT_S16_41R(sd0, sd1, sd2, sd3, dd);
-        *dp++ = dd;
-      }
-
-      sp = sl = (mlib_d64 *) ((mlib_u8 *) sl + slb);
-      dp = dl = (mlib_d64 *) ((mlib_u8 *) dl + dlb);
-    }
-  }
-}
-
-/***************************************************************/
-void mlib_v_ImageChannelExtract_S16_41_D1(const mlib_s16 *src,
-                                          mlib_s16       *dst,
-                                          mlib_s32       dsize,
-                                          mlib_s32       cmask)
-{
-  mlib_s16 *sa, *da;
-  mlib_s16 *dend, *dend2;                             /* end points in dst */
-  mlib_d64 *dp;                                       /* 8-byte aligned start points in dst */
-  mlib_d64 *sp;                                       /* 8-byte aligned start point in src */
-  mlib_d64 sd0, sd1, sd2, sd3;                        /* 8-byte source data */
-  mlib_d64 sd4, sd5, sd6, sd7;
-  mlib_d64 sda, sdb, sdc;
-  mlib_d64 dd0, dd1;
-  mlib_s32 soff;                                      /* offset of address in src */
-  mlib_s32 doff;                                      /* offset of address in dst */
-  mlib_s32 off;                                       /* offset of src over dst */
-  mlib_s32 emask;                                     /* edge mask */
-  mlib_s32 i, n;
-
-  sa = (void *)src;
-  da = dst;
-
-  /* prepare the source address */
-  sp = (mlib_d64 *) ((mlib_addr) sa & (~7));
-  soff = ((mlib_addr) sa & 7);
-
-  /* prepare the destination addresses */
-  dp = (mlib_d64 *) ((mlib_addr) da & (~7));
-  doff = ((mlib_addr) da & 7);
-  dend = da + dsize - 1;
-  dend2 = dend - 3;
-
-  /* calculate the src's offset over dst */
-  if (cmask == 8) {
-    off = (soff / 8) * 2 - doff;
-  }
-  else if (cmask == 4) {
-    off = ((soff + 2) / 8) * 2 - doff;
-  }
-  else if (cmask == 2) {
-    off = ((soff + 4) / 8) * 2 - doff;
-  }
-  else {
-    off = ((soff + 6) / 8) * 2 - doff;
-  }
-
-  if (((cmask == 8) && (soff == 0)) ||
-      ((cmask == 4) && (soff == 6)) ||
-      ((cmask == 2) && (soff == 4)) ||
-      ((cmask == 1) && (soff == 2))) { /* extract left channel */
-
-    if (off == 0) {                         /* src and dst have same alignment */
-
-      /* generate edge mask for the start point */
-      emask = vis_edge16(da, dend);
-
-      /* load 16 bytes */
-      sd0 = *sp++;
-      sd1 = *sp++;
-      sd2 = *sp++;
-      sd3 = *sp++;
-
-      /* extract, including some garbage at the start point */
-      CHANNELEXTRACT_S16_41L(sd0, sd1, sd2, sd3, dd0);
-
-      /* store 8 bytes result */
-      vis_pst_16(dd0, dp++, emask);
-
-      if ((mlib_addr) dp <= (mlib_addr) dend2) {
-        n = ((mlib_u8 *) dend2 - (mlib_u8 *) dp) / 8 + 1;
-
-        /* 8-pixel column loop, emask not needed */
-#pragma pipeloop(0)
-        for (i = 0; i < n; i++) {
-          sd0 = *sp++;
-          sd1 = *sp++;
-          sd2 = *sp++;
-          sd3 = *sp++;
-          CHANNELEXTRACT_S16_41L(sd0, sd1, sd2, sd3, dd0);
-          *dp++ = dd0;
-        }
-      }
-
-      /* end point handling */
-      if ((mlib_addr) dp <= (mlib_addr) dend) {
-        emask = vis_edge16(dp, dend);
-        sd0 = *sp++;
-        sd1 = *sp++;
-        sd2 = *sp++;
-        sd3 = *sp++;
-        CHANNELEXTRACT_S16_41L(sd0, sd1, sd2, sd3, dd0);
-        vis_pst_16(dd0, dp++, emask);
-      }
-    }
-    else {
-      vis_alignaddr((void *)0, off);
-
-      /* generate edge mask for the start point */
-      emask = vis_edge16(da, dend);
-
-      if (off < 0) {
-        /* load 24 bytes */
-        sd4 = *sp++;
-        sd5 = *sp++;
-        sd6 = *sp++;
-        sd7 = *sp++;
-
-        /* extract and store 8 bytes */
-        CHANNELEXTRACT_S16_41L(sd4, sd5, sd6, sd7, dd1);
-        vis_pst_16(vis_faligndata(dd1, dd1), dp++, emask);
-      }
-      else {
-        /* load 48 bytes */
-        sd0 = *sp++;
-        sd1 = *sp++;
-        sd2 = *sp++;
-        sd3 = *sp++;
-        sd4 = *sp++;
-        sd5 = *sp++;
-        sd6 = *sp++;
-        sd7 = *sp++;
-
-        /* extract and store 8 bytes */
-        CHANNELEXTRACT_S16_41L(sd0, sd1, sd2, sd3, dd0);
-        CHANNELEXTRACT_S16_41L(sd4, sd5, sd6, sd7, dd1);
-        vis_pst_16(vis_faligndata(dd0, dd1), dp++, emask);
-      }
-
-      if ((mlib_addr) dp <= (mlib_addr) dend2) {
-        n = ((mlib_u8 *) dend2 - (mlib_u8 *) dp) / 8 + 1;
-
-        /* 8-pixel column loop, emask not needed */
-#pragma pipeloop(0)
-        for (i = 0; i < n; i++) {
-          dd0 = dd1;
-          sd4 = *sp++;
-          sd5 = *sp++;
-          sd6 = *sp++;
-          sd7 = *sp++;
-          CHANNELEXTRACT_S16_41L(sd4, sd5, sd6, sd7, dd1);
-          *dp++ = vis_faligndata(dd0, dd1);
-        }
-      }
-
-      /* end point handling */
-      if ((mlib_addr) dp <= (mlib_addr) dend) {
-        emask = vis_edge16(dp, dend);
-        dd0 = dd1;
-        sd4 = *sp++;
-        sd5 = *sp++;
-        sd6 = *sp++;
-        sd7 = *sp++;
-        CHANNELEXTRACT_S16_41L(sd4, sd5, sd6, sd7, dd1);
-        vis_pst_16(vis_faligndata(dd0, dd1), dp++, emask);
-      }
-    }
-  }
-  else if (((cmask == 8) && (soff == 2)) ||
-           ((cmask == 4) && (soff == 0)) ||
-           ((cmask == 2) && (soff == 6)) ||
-           ((cmask == 1) && (soff == 4))) { /* extract middle left channel */
-
-    if (off == 0) {                         /* src and dst have same alignment */
-
-      /* generate edge mask for the start point */
-      emask = vis_edge16(da, dend);
-
-      /* load 16 bytes */
-      sd0 = *sp++;
-      sd1 = *sp++;
-      sd2 = *sp++;
-      sd3 = *sp++;
-
-      /* extract, including some garbage at the start point */
-      CHANNELEXTRACT_S16_41ML(sd0, sd1, sd2, sd3, dd0);
-
-      /* store 8 bytes result */
-      vis_pst_16(dd0, dp++, emask);
-
-      if ((mlib_addr) dp <= (mlib_addr) dend2) {
-        n = ((mlib_u8 *) dend2 - (mlib_u8 *) dp) / 8 + 1;
-
-        /* 8-pixel column loop, emask not needed */
-#pragma pipeloop(0)
-        for (i = 0; i < n; i++) {
-          sd0 = *sp++;
-          sd1 = *sp++;
-          sd2 = *sp++;
-          sd3 = *sp++;
-          CHANNELEXTRACT_S16_41ML(sd0, sd1, sd2, sd3, dd0);
-          *dp++ = dd0;
-        }
-      }
-
-      /* end point handling */
-      if ((mlib_addr) dp <= (mlib_addr) dend) {
-        emask = vis_edge16(dp, dend);
-        sd0 = *sp++;
-        sd1 = *sp++;
-        sd2 = *sp++;
-        sd3 = *sp++;
-        CHANNELEXTRACT_S16_41ML(sd0, sd1, sd2, sd3, dd0);
-        vis_pst_16(dd0, dp++, emask);
-      }
-    }
-    else {
-      vis_alignaddr((void *)0, off);
-
-      /* generate edge mask for the start point */
-      emask = vis_edge16(da, dend);
-
-      if (off < 0) {
-        /* load 24 bytes */
-        sd4 = *sp++;
-        sd5 = *sp++;
-        sd6 = *sp++;
-        sd7 = *sp++;
-
-        /* extract and store 8 bytes */
-        CHANNELEXTRACT_S16_41ML(sd4, sd5, sd6, sd7, dd1);
-        vis_pst_16(vis_faligndata(dd1, dd1), dp++, emask);
-      }
-      else {
-        /* load 48 bytes */
-        sd0 = *sp++;
-        sd1 = *sp++;
-        sd2 = *sp++;
-        sd3 = *sp++;
-        sd4 = *sp++;
-        sd5 = *sp++;
-        sd6 = *sp++;
-        sd7 = *sp++;
-
-        /* extract and store 8 bytes */
-        CHANNELEXTRACT_S16_41ML(sd0, sd1, sd2, sd3, dd0);
-        CHANNELEXTRACT_S16_41ML(sd4, sd5, sd6, sd7, dd1);
-        vis_pst_16(vis_faligndata(dd0, dd1), dp++, emask);
-      }
-
-      if ((mlib_addr) dp <= (mlib_addr) dend2) {
-        n = ((mlib_u8 *) dend2 - (mlib_u8 *) dp) / 8 + 1;
-
-        /* 8-pixel column loop, emask not needed */
-#pragma pipeloop(0)
-        for (i = 0; i < n; i++) {
-          dd0 = dd1;
-          sd4 = *sp++;
-          sd5 = *sp++;
-          sd6 = *sp++;
-          sd7 = *sp++;
-          CHANNELEXTRACT_S16_41ML(sd4, sd5, sd6, sd7, dd1);
-          *dp++ = vis_faligndata(dd0, dd1);
-        }
-      }
-
-      /* end point handling */
-      if ((mlib_addr) dp <= (mlib_addr) dend) {
-        emask = vis_edge16(dp, dend);
-        dd0 = dd1;
-        sd4 = *sp++;
-        sd5 = *sp++;
-        sd6 = *sp++;
-        sd7 = *sp++;
-        CHANNELEXTRACT_S16_41ML(sd4, sd5, sd6, sd7, dd1);
-        vis_pst_16(vis_faligndata(dd0, dd1), dp++, emask);
-      }
-    }
-  }
-  else if (((cmask == 8) && (soff == 4)) ||
-           ((cmask == 4) && (soff == 2)) ||
-           ((cmask == 2) && (soff == 0)) ||
-           ((cmask == 1) && (soff == 6))) { /* extract middle right channel */
-
-    if (off == 0) {                         /* src and dst have same alignment */
-
-      /* generate edge mask for the start point */
-      emask = vis_edge16(da, dend);
-
-      /* load 16 bytes */
-      sd0 = *sp++;
-      sd1 = *sp++;
-      sd2 = *sp++;
-      sd3 = *sp++;
-
-      /* extract, including some garbage at the start point */
-      CHANNELEXTRACT_S16_41MR(sd0, sd1, sd2, sd3, dd0);
-
-      /* store 8 bytes result */
-      vis_pst_16(dd0, dp++, emask);
-
-      if ((mlib_addr) dp <= (mlib_addr) dend2) {
-        n = ((mlib_u8 *) dend2 - (mlib_u8 *) dp) / 8 + 1;
-
-        /* 8-pixel column loop, emask not needed */
-#pragma pipeloop(0)
-        for (i = 0; i < n; i++) {
-          sd0 = *sp++;
-          sd1 = *sp++;
-          sd2 = *sp++;
-          sd3 = *sp++;
-          CHANNELEXTRACT_S16_41MR(sd0, sd1, sd2, sd3, dd0);
-          *dp++ = dd0;
-        }
-      }
-
-      /* end point handling */
-      if ((mlib_addr) dp <= (mlib_addr) dend) {
-        emask = vis_edge16(dp, dend);
-        sd0 = *sp++;
-        sd1 = *sp++;
-        sd2 = *sp++;
-        sd3 = *sp++;
-        CHANNELEXTRACT_S16_41MR(sd0, sd1, sd2, sd3, dd0);
-        vis_pst_16(dd0, dp++, emask);
-      }
-    }
-    else {
-      vis_alignaddr((void *)0, off);
-
-      /* generate edge mask for the start point */
-      emask = vis_edge16(da, dend);
-
-      if (off < 0) {
-        /* load 24 bytes */
-        sd4 = *sp++;
-        sd5 = *sp++;
-        sd6 = *sp++;
-        sd7 = *sp++;
-
-        /* extract and store 8 bytes */
-        CHANNELEXTRACT_S16_41MR(sd4, sd5, sd6, sd7, dd1);
-        vis_pst_16(vis_faligndata(dd1, dd1), dp++, emask);
-      }
-      else {
-        /* load 48 bytes */
-        sd0 = *sp++;
-        sd1 = *sp++;
-        sd2 = *sp++;
-        sd3 = *sp++;
-        sd4 = *sp++;
-        sd5 = *sp++;
-        sd6 = *sp++;
-        sd7 = *sp++;
-
-        /* extract and store 8 bytes */
-        CHANNELEXTRACT_S16_41MR(sd0, sd1, sd2, sd3, dd0);
-        CHANNELEXTRACT_S16_41MR(sd4, sd5, sd6, sd7, dd1);
-        vis_pst_16(vis_faligndata(dd0, dd1), dp++, emask);
-      }
-
-      if ((mlib_addr) dp <= (mlib_addr) dend2) {
-        n = ((mlib_u8 *) dend2 - (mlib_u8 *) dp) / 8 + 1;
-
-        /* 8-pixel column loop, emask not needed */
-#pragma pipeloop(0)
-        for (i = 0; i < n; i++) {
-          dd0 = dd1;
-          sd4 = *sp++;
-          sd5 = *sp++;
-          sd6 = *sp++;
-          sd7 = *sp++;
-          CHANNELEXTRACT_S16_41MR(sd4, sd5, sd6, sd7, dd1);
-          *dp++ = vis_faligndata(dd0, dd1);
-        }
-      }
-
-      /* end point handling */
-      if ((mlib_addr) dp <= (mlib_addr) dend) {
-        emask = vis_edge16(dp, dend);
-        dd0 = dd1;
-        sd4 = *sp++;
-        sd5 = *sp++;
-        sd6 = *sp++;
-        sd7 = *sp++;
-        CHANNELEXTRACT_S16_41MR(sd4, sd5, sd6, sd7, dd1);
-        vis_pst_16(vis_faligndata(dd0, dd1), dp++, emask);
-      }
-    }
-  }
-  else {                                    /* extract right channel */
-    if (off == 0) {                         /* src and dst have same alignment */
-
-      /* generate edge mask for the start point */
-      emask = vis_edge16(da, dend);
-
-      /* load 16 bytes */
-      sd0 = *sp++;
-      sd1 = *sp++;
-      sd2 = *sp++;
-      sd3 = *sp++;
-
-      /* extract, including some garbage at the start point */
-      CHANNELEXTRACT_S16_41R(sd0, sd1, sd2, sd3, dd0);
-
-      /* store 8 bytes result */
-      vis_pst_16(dd0, dp++, emask);
-
-      if ((mlib_addr) dp <= (mlib_addr) dend2) {
-        n = ((mlib_u8 *) dend2 - (mlib_u8 *) dp) / 8 + 1;
-
-        /* 8-pixel column loop, emask not needed */
-#pragma pipeloop(0)
-        for (i = 0; i < n; i++) {
-          sd0 = *sp++;
-          sd1 = *sp++;
-          sd2 = *sp++;
-          sd3 = *sp++;
-          CHANNELEXTRACT_S16_41R(sd0, sd1, sd2, sd3, dd0);
-          *dp++ = dd0;
-        }
-      }
-
-      /* end point handling */
-      if ((mlib_addr) dp <= (mlib_addr) dend) {
-        emask = vis_edge16(dp, dend);
-        sd0 = *sp++;
-        sd1 = *sp++;
-        sd2 = *sp++;
-        sd3 = *sp++;
-        CHANNELEXTRACT_S16_41R(sd0, sd1, sd2, sd3, dd0);
-        vis_pst_16(dd0, dp++, emask);
-      }
-    }
-    else {
-      vis_alignaddr((void *)0, off);
-
-      /* generate edge mask for the start point */
-      emask = vis_edge16(da, dend);
-
-      if (off < 0) {
-        /* load 24 bytes */
-        sd4 = *sp++;
-        sd5 = *sp++;
-        sd6 = *sp++;
-        sd7 = *sp++;
-
-        /* extract and store 8 bytes */
-        CHANNELEXTRACT_S16_41R(sd4, sd5, sd6, sd7, dd1);
-        vis_pst_16(vis_faligndata(dd1, dd1), dp++, emask);
-      }
-
-      if ((mlib_addr) dp <= (mlib_addr) dend2) {
-        n = ((mlib_u8 *) dend2 - (mlib_u8 *) dp) / 8 + 1;
-
-        /* 8-pixel column loop, emask not needed */
-#pragma pipeloop(0)
-        for (i = 0; i < n; i++) {
-          dd0 = dd1;
-          sd4 = *sp++;
-          sd5 = *sp++;
-          sd6 = *sp++;
-          sd7 = *sp++;
-          CHANNELEXTRACT_S16_41R(sd4, sd5, sd6, sd7, dd1);
-          *dp++ = vis_faligndata(dd0, dd1);
-        }
-      }
-
-      /* end point handling */
-      if ((mlib_addr) dp <= (mlib_addr) dend) {
-        emask = vis_edge16(dp, dend);
-        dd0 = dd1;
-        sd4 = *sp++;
-        sd5 = *sp++;
-        sd6 = *sp++;
-        sd7 = *sp++;
-        CHANNELEXTRACT_S16_41R(sd4, sd5, sd6, sd7, dd1);
-        vis_pst_16(vis_faligndata(dd0, dd1), dp++, emask);
-      }
-    }
-  }
-}
-
-/***************************************************************/
-void mlib_v_ImageChannelExtract_S16_41(const mlib_s16 *src,
-                                       mlib_s32       slb,
-                                       mlib_s16       *dst,
-                                       mlib_s32       dlb,
-                                       mlib_s32       xsize,
-                                       mlib_s32       ysize,
-                                       mlib_s32       cmask)
-{
-  mlib_s16 *sa, *da;
-  mlib_s16 *sl, *dl;
-  mlib_s32 j;
-
-  sa = sl = (void *)src;
-  da = dl = dst;
-  for (j = 0; j < ysize; j++) {
-    mlib_v_ImageChannelExtract_S16_41_D1(sa, da, xsize, cmask);
-    sa = sl = (mlib_s16 *) ((mlib_u8 *) sl + slb);
-    da = dl = (mlib_s16 *) ((mlib_u8 *) dl + dlb);
-  }
-}
-
-/***************************************************************/
--- a/jdk/src/java.desktop/unix/native/libmlib_image/mlib_v_ImageChannelExtract_43.c	Thu May 12 11:03:07 2016 -0700
+++ b/jdk/src/java.desktop/unix/native/libmlib_image/mlib_v_ImageChannelExtract_43.c	Fri May 13 11:31:05 2016 +0300
@@ -29,22 +29,8 @@
  * FILENAME: mlib_v_ImageChannelExtract_43.c
  *
  * FUNCTIONS
- *      mlib_v_ImageChannelExtract_U8_43R_A8D1X8
- *      mlib_v_ImageChannelExtract_U8_43R_A8D2X8
- *      mlib_v_ImageChannelExtract_U8_43R_D1
- *      mlib_v_ImageChannelExtract_U8_43R
- *      mlib_v_ImageChannelExtract_S16_43R_A8D1X4
- *      mlib_v_ImageChannelExtract_S16_43R_A8D2X4
- *      mlib_v_ImageChannelExtract_S16_43R_D1
- *      mlib_v_ImageChannelExtract_S16_43R
- *      mlib_v_ImageChannelExtract_U8_43L_A8D1X8
- *      mlib_v_ImageChannelExtract_U8_43L_A8D2X8
  *      mlib_v_ImageChannelExtract_U8_43L_D1
- *      mlib_v_ImageChannelExtract_U8_43L
- *      mlib_v_ImageChannelExtract_S16_43L_A8D1X4
- *      mlib_v_ImageChannelExtract_S16_43L_A8D2X4
  *      mlib_v_ImageChannelExtract_S16_43L_D1
- *      mlib_v_ImageChannelExtract_S16_43L
  *
  * SYNOPSIS
  *
@@ -74,705 +60,6 @@
 #include "mlib_v_ImageChannelExtract.h"
 
 /***************************************************************/
-#define EXTRACT_U8_43R_old          /* shift right */           \
-  dd2 = vis_faligndata(sd3, dd2);    /* r7-------------- */     \
-  sd3 = vis_faligndata(sd3, sd3);                               \
-  dd2 = vis_faligndata(sd3, dd2);    /* g7r7------------ */     \
-  sd3 = vis_faligndata(sd3, sd3);                               \
-  dd2 = vis_faligndata(sd3, dd2);    /* b7g7r7---------- */     \
-  sd3 = vis_faligndata(sd3, sd3);                               \
-  sd3 = vis_faligndata(sd3, sd3);                               \
-  dd2 = vis_faligndata(sd3, dd2);    /* r6b7g7r7-------- */     \
-  sd3 = vis_faligndata(sd3, sd3);                               \
-  dd2 = vis_faligndata(sd3, dd2);    /* g6r6b7g7r7------ */     \
-  sd3 = vis_faligndata(sd3, sd3);                               \
-  dd2 = vis_faligndata(sd3, dd2);    /* b6g6r6b7g7r7---- */     \
-                                                                \
-  dd2 = vis_faligndata(sd2, dd2);    /* r5b6g6r6b7g7r7-- */     \
-  sd2 = vis_faligndata(sd2, sd2);                               \
-  dd2 = vis_faligndata(sd2, dd2);    /* g5r5b6g6r6b7g7r7 */     \
-                                                                \
-  sd2 = vis_faligndata(sd2, sd2);                               \
-  dd1 = vis_faligndata(sd2, dd1);    /* b5-------------- */     \
-  sd2 = vis_faligndata(sd2, sd2);                               \
-  sd2 = vis_faligndata(sd2, sd2);                               \
-  dd1 = vis_faligndata(sd2, dd1);    /* r4b5------------ */     \
-  sd2 = vis_faligndata(sd2, sd2);                               \
-  dd1 = vis_faligndata(sd2, dd1);    /* g4r4b5---------- */     \
-  sd2 = vis_faligndata(sd2, sd2);                               \
-  dd1 = vis_faligndata(sd2, dd1);    /* b4g4r4b5-------- */     \
-                                                                \
-  dd1 = vis_faligndata(sd1, dd1);    /* r3b4g4r4b5------ */     \
-  sd1 = vis_faligndata(sd1, sd1);                               \
-  dd1 = vis_faligndata(sd1, dd1);    /* g3r3b4g4r4b5---- */     \
-  sd1 = vis_faligndata(sd1, sd1);                               \
-  dd1 = vis_faligndata(sd1, dd1);    /* b3g3r3b4g4r4b5-- */     \
-  sd1 = vis_faligndata(sd1, sd1);                               \
-  sd1 = vis_faligndata(sd1, sd1);                               \
-  dd1 = vis_faligndata(sd1, dd1);    /* r2b3g3r3b4g4r4b5 */     \
-                                                                \
-  sd1 = vis_faligndata(sd1, sd1);                               \
-  dd0 = vis_faligndata(sd1, dd0);    /* g2-------------- */     \
-  sd1 = vis_faligndata(sd1, sd1);                               \
-  dd0 = vis_faligndata(sd1, dd0);    /* b2g2------------ */     \
-                                                                \
-  dd0 = vis_faligndata(sd0, dd0);    /* r1b2g2---------- */     \
-  sd0 = vis_faligndata(sd0, sd0);                               \
-  dd0 = vis_faligndata(sd0, dd0);    /* g1r1b2g2-------- */     \
-  sd0 = vis_faligndata(sd0, sd0);                               \
-  dd0 = vis_faligndata(sd0, dd0);    /* b1g1r1b2g2------ */     \
-  sd0 = vis_faligndata(sd0, sd0);                               \
-  sd0 = vis_faligndata(sd0, sd0);                               \
-  dd0 = vis_faligndata(sd0, dd0);    /* r0b1g1r1b2g2---- */     \
-  sd0 = vis_faligndata(sd0, sd0);                               \
-  dd0 = vis_faligndata(sd0, dd0);    /* g0r0b1g1r1b2g2-- */     \
-  sd0 = vis_faligndata(sd0, sd0);                               \
-  dd0 = vis_faligndata(sd0, dd0);           /* b0g0r0b1g1r1b2g2 */
-
-/***************************************************************/
-#define EXTRACT_U8_43R              /* shift right */           \
-  vis_alignaddr((void *)0, 5);                                  \
-  dd2 = vis_faligndata(sd3, dd2);    /* b7g7r7---------- */     \
-  sda = vis_freg_pair(vis_read_hi(sd3), vis_read_hi(sd3));      \
-  dd2 = vis_faligndata(sda, dd2);    /* b6g6r6b7g7r7---- */     \
-                                                                \
-  vis_alignaddr((void *)0, 6);                                  \
-  dd2 = vis_faligndata(sd2, dd2);    /* g5r5b6g6r6b7g7r7 */     \
-                                                                \
-  vis_alignaddr((void *)0, 5);                                  \
-  dd1 = vis_faligndata(sd2, dd1);    /* b5g5r5---------- */     \
-  sda = vis_freg_pair(vis_read_hi(sd2), vis_read_hi(sd2));      \
-  dd1 = vis_faligndata(sda, dd1);    /* b4g4r4b5g5r5---- */     \
-  dd1 = vis_faligndata(sd1, dd1);    /* b3g3r3b4g4r4b5g5 */     \
-  sda = vis_freg_pair(vis_read_hi(sd1), vis_read_hi(sd1));      \
-  vis_alignaddr((void *)0, 7);                                  \
-  dd1 = vis_faligndata(sda, dd1);    /* r2b3g3r3b4g4r4b5 */     \
-                                                                \
-  vis_alignaddr((void *)0, 5);                                  \
-  dd0 = vis_faligndata(sda, dd0);    /* b2g2r2---------- */     \
-  dd0 = vis_faligndata(sd0, dd0);    /* b1g1r1b2g2r2---- */     \
-  sda = vis_freg_pair(vis_read_hi(sd0), vis_read_hi(sd0));      \
-  dd0 = vis_faligndata(sda, dd0);           /* b0g0r0b1g1r1b2g2 */
-
-/***************************************************************/
-#define LOAD_EXTRACT_U8_43R_STORE                               \
-  sd0 = *sp++;          /* --b0g0r0--b1g1r1 */                  \
-  sd1 = *sp++;          /* --b2g2r2--b3g3r3 */                  \
-  sd2 = *sp++;          /* --b4g4r4--b5g5r5 */                  \
-  sd3 = *sp++;          /* --b6g6r6--b7g7r7 */                  \
-  EXTRACT_U8_43R;                                               \
-  *dp++ = dd0;          /* b0g0r0b1g1r1b2g2 */                  \
-  *dp++ = dd1;          /* r2b3g3r3b4g4r4b5 */                  \
-  *dp++ = dd2;                              /* g5r5b6g6r6b7g7r7 */
-
-/***************************************************************/
-#define LOAD_EXTRACT_U8_43R                                     \
-  vis_alignaddr((void *)soff, 0);                               \
-  s0 = s4;                                                      \
-  s1 = sp[1];                                                   \
-  s2 = sp[2];                                                   \
-  s3 = sp[3];                                                   \
-  s4 = sp[4];                                                   \
-  sd0 = vis_faligndata(s0, s1);                                 \
-  sd1 = vis_faligndata(s1, s2);                                 \
-  sd2 = vis_faligndata(s2, s3);                                 \
-  sd3 = vis_faligndata(s3, s4);                                 \
-  sp += 4;                                                      \
-  dd2old = dd2;                                                 \
-  EXTRACT_U8_43R
-
-/***************************************************************/
-/*
- * Both source and destination image data are 1-d vectors and
- * 8-byte aligned. And dsize is multiple of 8.
- */
-
-void mlib_v_ImageChannelExtract_U8_43R_A8D1X8(const mlib_u8 *src,
-                                              mlib_u8       *dst,
-                                              mlib_s32      dsize)
-{
-  mlib_d64 *sp, *dp;
-  mlib_d64 sd0, sd1, sd2, sd3;                        /* source data */
-  mlib_d64 dd0, dd1, dd2;                             /* dst data */
-  mlib_d64 sda;
-  mlib_s32 i;
-
-  sp = (mlib_d64 *) src;
-  dp = (mlib_d64 *) dst;
-
-  /* set GSR.offset for vis_faligndata()  */
-/* vis_alignaddr((void *)0, 7); *//* only for _old */
-
-#pragma pipeloop(0)
-  for (i = 0; i < dsize / 8; i++) {
-    LOAD_EXTRACT_U8_43R_STORE;
-  }
-}
-
-/***************************************************************/
-/*
- * Either source or destination image data are not 1-d vectors, but
- * they are 8-byte aligned. And slb and dlb are multiple of 8.
- * The xsize is multiple of 8.
- */
-
-void mlib_v_ImageChannelExtract_U8_43R_A8D2X8(const mlib_u8 *src,
-                                              mlib_s32      slb,
-                                              mlib_u8       *dst,
-                                              mlib_s32      dlb,
-                                              mlib_s32      xsize,
-                                              mlib_s32      ysize)
-{
-  mlib_d64 *sp, *dp;                                  /* 8-byte aligned pointer for pixel */
-  mlib_d64 *sl, *dl;                                  /* 8-byte aligned pointer for line */
-  mlib_d64 sd0, sd1, sd2, sd3;                        /* source data */
-  mlib_d64 dd0, dd1, dd2;                             /* dst data */
-  mlib_d64 sda;
-  mlib_s32 i, j;                                      /* indices for x, y */
-
-  /* set GSR.offset for vis_faligndata()  */
-/* vis_alignaddr((void *)0, 7); *//* only for _old */
-
-  sp = sl = (mlib_d64 *) src;
-  dp = dl = (mlib_d64 *) dst;
-
-  /* row loop */
-  for (j = 0; j < ysize; j++) {
-    /* 8-byte column loop */
-#pragma pipeloop(0)
-    for (i = 0; i < xsize / 8; i++) {
-      LOAD_EXTRACT_U8_43R_STORE;
-    }
-
-    sp = sl = (mlib_d64 *) ((mlib_u8 *) sl + slb);
-    dp = dl = (mlib_d64 *) ((mlib_u8 *) dl + dlb);
-  }
-}
-
-/***************************************************************/
-/*
- * Either source or destination data are not 8-byte aligned.
- * And dsize is in pixels.
- */
-
-void mlib_v_ImageChannelExtract_U8_43R_D1(const mlib_u8 *src,
-                                          mlib_u8       *dst,
-                                          mlib_s32      dsize)
-{
-  mlib_u8 *sa, *da;
-  mlib_u8 *dend, *dend2;                              /* end points in dst */
-  mlib_d64 *dp;                                       /* 8-byte aligned start points in dst */
-  mlib_d64 *sp;                                       /* 8-byte aligned start point in src */
-  mlib_d64 s0, s1, s2, s3, s4;                        /* 8-byte source row data */
-  mlib_d64 sd0, sd1, sd2, sd3;                        /* 8-byte source data */
-  mlib_d64 dd0, dd1, dd2;                             /* dst data */
-  mlib_d64 dd2old;                                    /* the last datum of the last step */
-  mlib_d64 sda;
-  mlib_s32 soff;                                      /* offset of address in src */
-  mlib_s32 doff;                                      /* offset of address in dst */
-  mlib_s32 emask;                                     /* edge mask */
-  mlib_s32 i, n;
-
-  sa = (void *)src;
-  da = dst;
-
-  /* prepare the source address */
-  sp = (mlib_d64 *) ((mlib_addr) sa & (~7));
-  soff = ((mlib_addr) sa & 7);
-
-  /* prepare the destination addresses */
-  dp = (mlib_d64 *) ((mlib_addr) da & (~7));
-  dend = da + dsize * 3 - 1;
-  dend2 = dend - 23;
-  doff = 8 - ((mlib_addr) da & 7);
-
-  /* generate edge mask for the start point */
-  emask = vis_edge8(da, dend);
-
-  /* load 32 byte, convert, store 24 bytes */
-  s4 = sp[0];                               /* initial value */
-  LOAD_EXTRACT_U8_43R;
-
-  if (dsize >= 8) {
-    if (doff == 8) {
-      vis_pst_8(dd0, dp++, emask);
-      *dp++ = dd1;
-      *dp++ = dd2;
-    }
-    else {
-      vis_alignaddr((void *)doff, 0);
-      vis_pst_8(vis_faligndata(dd0, dd0), dp++, emask);
-      *dp++ = vis_faligndata(dd0, dd1);
-      *dp++ = vis_faligndata(dd1, dd2);
-    }
-  }
-  else {                                    /* for very small size */
-    if (doff == 8) {
-      vis_pst_8(dd0, dp++, emask);
-      if ((mlib_addr) dp <= (mlib_addr) dend) {
-        emask = vis_edge8(dp, dend);
-        vis_pst_8(dd1, dp++, emask);
-        if ((mlib_addr) dp <= (mlib_addr) dend) {
-          emask = vis_edge8(dp, dend);
-          vis_pst_8(dd2, dp++, emask);
-        }
-      }
-    }
-    else {
-      vis_alignaddr((void *)doff, 0);
-      vis_pst_8(vis_faligndata(dd0, dd0), dp++, emask);
-      if ((mlib_addr) dp <= (mlib_addr) dend) {
-        emask = vis_edge8(dp, dend);
-        vis_pst_8(vis_faligndata(dd0, dd1), dp++, emask);
-        if ((mlib_addr) dp <= (mlib_addr) dend) {
-          emask = vis_edge8(dp, dend);
-          vis_pst_8(vis_faligndata(dd1, dd2), dp++, emask);
-          if ((mlib_addr) dp <= (mlib_addr) dend) {
-            emask = vis_edge8(dp, dend);
-            vis_pst_8(vis_faligndata(dd2, dd2), dp++, emask);
-          }
-        }
-      }
-    }
-  }
-
-  /* no edge handling is needed in the loop */
-  if (doff == 8) {
-    if ((mlib_addr) dp <= (mlib_addr) dend2) {
-      n = ((mlib_u8 *) dend2 - (mlib_u8 *) dp) / 24 + 1;
-#pragma pipeloop(0)
-      for (i = 0; i < n; i++) {
-        LOAD_EXTRACT_U8_43R;
-        *dp++ = dd0;
-        *dp++ = dd1;
-        *dp++ = dd2;
-      }
-    }
-  }
-  else {
-    if ((mlib_addr) dp <= (mlib_addr) dend2) {
-      n = ((mlib_u8 *) dend2 - (mlib_u8 *) dp) / 24 + 1;
-#pragma pipeloop(0)
-      for (i = 0; i < n; i++) {
-        LOAD_EXTRACT_U8_43R;
-        vis_alignaddr((void *)doff, 0);
-        *dp++ = vis_faligndata(dd2old, dd0);
-        *dp++ = vis_faligndata(dd0, dd1);
-        *dp++ = vis_faligndata(dd1, dd2);
-      }
-    }
-  }
-
-  if ((mlib_addr) dp <= (mlib_addr) dend) {
-    LOAD_EXTRACT_U8_43R;
-    emask = vis_edge8(dp, dend);
-    if (doff == 8) {
-      vis_pst_8(dd0, dp++, emask);
-      if ((mlib_addr) dp <= (mlib_addr) dend) {
-        emask = vis_edge8(dp, dend);
-        vis_pst_8(dd1, dp++, emask);
-        if ((mlib_addr) dp <= (mlib_addr) dend) {
-          emask = vis_edge8(dp, dend);
-          vis_pst_8(dd2, dp++, emask);
-        }
-      }
-    }
-    else {
-      vis_alignaddr((void *)doff, 0);
-      vis_pst_8(vis_faligndata(dd2old, dd0), dp++, emask);
-      if ((mlib_addr) dp <= (mlib_addr) dend) {
-        emask = vis_edge8(dp, dend);
-        vis_pst_8(vis_faligndata(dd0, dd1), dp++, emask);
-        if ((mlib_addr) dp <= (mlib_addr) dend) {
-          emask = vis_edge8(dp, dend);
-          vis_pst_8(vis_faligndata(dd1, dd2), dp++, emask);
-        }
-      }
-    }
-  }
-}
-
-/***************************************************************/
-void mlib_v_ImageChannelExtract_U8_43R(const mlib_u8 *src,
-                                       mlib_s32      slb,
-                                       mlib_u8       *dst,
-                                       mlib_s32      dlb,
-                                       mlib_s32      xsize,
-                                       mlib_s32      ysize)
-{
-  mlib_u8 *sa, *da;
-  mlib_u8 *sl, *dl;
-  mlib_s32 j;
-
-  sa = sl = (void *)src;
-  da = dl = dst;
-
-  for (j = 0; j < ysize; j++) {
-    mlib_v_ImageChannelExtract_U8_43R_D1(sa, da, xsize);
-    sa = sl += slb;
-    da = dl += dlb;
-  }
-}
-
-/***************************************************************/
-#define EXTRACT_S16_43R_old      /* shift right */              \
-                                                                \
-  dd2 = vis_faligndata(sd3, dd2);    /* r3------ */             \
-  sd3 = vis_faligndata(sd3, sd3);                               \
-  dd2 = vis_faligndata(sd3, dd2);    /* g3r3---- */             \
-  sd3 = vis_faligndata(sd3, sd3);                               \
-  dd2 = vis_faligndata(sd3, dd2);    /* b3g3r3-- */             \
-                                                                \
-  dd2 = vis_faligndata(sd2, dd2);    /* r2b3g3r3 */             \
-  sd2 = vis_faligndata(sd2, sd2);                               \
-  dd1 = vis_faligndata(sd2, dd1);    /* g2------ */             \
-  sd2 = vis_faligndata(sd2, sd2);                               \
-  dd1 = vis_faligndata(sd2, dd1);    /* b2g2---- */             \
-                                                                \
-  dd1 = vis_faligndata(sd1, dd1);    /* r1b2g2-- */             \
-  sd1 = vis_faligndata(sd1, sd1);                               \
-  dd1 = vis_faligndata(sd1, dd1);    /* g1r1b2g2 */             \
-  sd1 = vis_faligndata(sd1, sd1);                               \
-  dd0 = vis_faligndata(sd1, dd0);    /* b1------ */             \
-                                                                \
-  dd0 = vis_faligndata(sd0, dd0);    /* r0b1---- */             \
-  sd0 = vis_faligndata(sd0, sd0);                               \
-  dd0 = vis_faligndata(sd0, dd0);    /* g0r0b1-- */             \
-  sd0 = vis_faligndata(sd0, sd0);                               \
-  dd0 = vis_faligndata(sd0, dd0);           /* b0g0r0b1 */
-
-/***************************************************************/
-#define EXTRACT_S16_43R        /* shift right */                \
-                                                                \
-  vis_alignaddr((void *)0, 2);                                  \
-  dd2 = vis_faligndata(sd3, dd2);    /* b3g3r3-- */             \
-                                                                \
-  vis_alignaddr((void *)0, 6);                                  \
-  dd2 = vis_faligndata(sd2, dd2);    /* r2b3g3r3 */             \
-  vis_alignaddr((void *)0, 2);                                  \
-  dd1 = vis_faligndata(sd2, dd1);    /* b2g2r2-- */             \
-                                                                \
-  vis_alignaddr((void *)0, 4);                                  \
-  dd1 = vis_faligndata(sd1, dd1);    /* g1r1b2g2 */             \
-  vis_alignaddr((void *)0, 2);                                  \
-  dd0 = vis_faligndata(sd1, dd0);    /* b1g1r1-- */             \
-  dd0 = vis_faligndata(sd0, dd0);           /* b0g0r0b1 */
-
-/***************************************************************/
-#define LOAD_EXTRACT_S16_43R_STORE                              \
-                                                                \
-  sd0 = *sp++;          /* --b0g0r0 */                          \
-  sd1 = *sp++;          /* --b1g1r1 */                          \
-  sd2 = *sp++;          /* --b2g2r2 */                          \
-  sd3 = *sp++;          /* --b3g3r3 */                          \
-                                                                \
-  EXTRACT_S16_43R;                                              \
-                                                                \
-  *dp++ = dd0;          /* b0g0r0b1 */                          \
-  *dp++ = dd1;          /* g1r1b2g2 */                          \
-  *dp++ = dd2;                              /* r2b3g3r3 */
-
-/***************************************************************/
-#define LOAD_EXTRACT_S16_43R                                    \
-                                                                \
-  vis_alignaddr((void *)soff, 0);                               \
-  s0 = s4;                                                      \
-  s1 = sp[1];                                                   \
-  s2 = sp[2];                                                   \
-  s3 = sp[3];                                                   \
-  s4 = sp[4];                                                   \
-  sd0 = vis_faligndata(s0, s1);                                 \
-  sd1 = vis_faligndata(s1, s2);                                 \
-  sd2 = vis_faligndata(s2, s3);                                 \
-  sd3 = vis_faligndata(s3, s4);                                 \
-  sp += 4;                                                      \
-  dd2old = dd2;                                                 \
-  EXTRACT_S16_43R
-
-/***************************************************************/
-/*
- * Both source and destination image data are 1-d vectors and
- * 8-byte aligned. And size is in 4-pixels.
- */
-
-void mlib_v_ImageChannelExtract_S16_43R_A8D1X4(const mlib_s16 *src,
-                                               mlib_s16       *dst,
-                                               mlib_s32       dsize)
-{
-  mlib_d64 *sp, *dp;                                  /* 8-byte aligned pointer for pixel */
-  mlib_d64 sd0, sd1, sd2, sd3;                        /* source data */
-  mlib_d64 dd0, dd1, dd2;                             /* dst data */
-  mlib_s32 i;
-
-  sp = (mlib_d64 *) src;
-  dp = (mlib_d64 *) dst;
-
-  /* set GSR.offset for vis_faligndata()  */
-/* vis_alignaddr((void *)0, 6); *//* only for _old */
-
-#pragma pipeloop(0)
-  for (i = 0; i < dsize / 4; i++) {
-    LOAD_EXTRACT_S16_43R_STORE;
-  }
-}
-
-/***************************************************************/
-/*
- * Either source or destination image data are not 1-d vectors, but
- * they are 8-byte aligned. The xsize is multiple of 8.
- * slb and dlb are multiple of 8.
- */
-
-void mlib_v_ImageChannelExtract_S16_43R_A8D2X4(const mlib_s16 *src,
-                                               mlib_s32       slb,
-                                               mlib_s16       *dst,
-                                               mlib_s32       dlb,
-                                               mlib_s32       xsize,
-                                               mlib_s32       ysize)
-{
-  mlib_d64 *sp, *dp;                                  /* 8-byte aligned pointer for pixel */
-  mlib_d64 *sl, *dl;                                  /* 8-byte aligned pointer for line */
-  mlib_d64 sd0, sd1, sd2, sd3;                        /* source data */
-  mlib_d64 dd0, dd1, dd2;                             /* dst data */
-  mlib_s32 i, j;                                      /* indices for x, y */
-
-  /* set GSR.offset for vis_faligndata()  */
-/* vis_alignaddr((void *)0, 6); *//* only for _old */
-
-  sp = sl = (mlib_d64 *) src;
-  dp = dl = (mlib_d64 *) dst;
-
-  /* row loop */
-  for (j = 0; j < ysize; j++) {
-    /* 4-pixel column loop */
-#pragma pipeloop(0)
-    for (i = 0; i < xsize / 4; i++) {
-      LOAD_EXTRACT_S16_43R_STORE;
-    }
-
-    sp = sl = (mlib_d64 *) ((mlib_u8 *) sl + slb);
-    dp = dl = (mlib_d64 *) ((mlib_u8 *) dl + dlb);
-  }
-}
-
-/***************************************************************/
-/*
- * Either source or destination data are not 8-byte aligned.
- * And dsize is multiple of 8.
- */
-
-void mlib_v_ImageChannelExtract_S16_43R_D1(const mlib_s16 *src,
-                                           mlib_s16       *dst,
-                                           mlib_s32       dsize)
-{
-  mlib_s16 *sa, *da;                                  /* pointer for pixel */
-  mlib_s16 *dend, *dend2;                             /* end points in dst */
-  mlib_d64 *dp;                                       /* 8-byte aligned start points in dst */
-  mlib_d64 *sp;                                       /* 8-byte aligned start point in src */
-  mlib_d64 s0, s1, s2, s3, s4;                        /* 8-byte source row data */
-  mlib_d64 sd0, sd1, sd2, sd3;                        /* 8-byte source data */
-  mlib_d64 dd0, dd1, dd2;                             /* dst data */
-  mlib_d64 dd2old;                                    /* the last datum of the last step */
-  mlib_s32 soff;                                      /* offset of address in src */
-  mlib_s32 doff;                                      /* offset of address in dst */
-  mlib_s32 emask;                                     /* edge mask */
-  mlib_s32 i, n;
-
-  sa = (void *)src;
-  da = dst;
-
-  /* prepare the source address */
-  sp = (mlib_d64 *) ((mlib_addr) sa & (~7));
-  soff = ((mlib_addr) sa & 7);
-
-  /* prepare the destination addresses */
-  dp = (mlib_d64 *) ((mlib_addr) da & (~7));
-  dend = da + dsize * 3 - 1;
-  dend2 = dend - 11;
-  doff = 8 - ((mlib_addr) da & 7);
-
-  /* generate edge mask for the start point */
-  emask = vis_edge16(da, dend);
-
-  /* load 32 byte, convert, store 24 bytes */
-  s4 = sp[0];                               /* initial value */
-  LOAD_EXTRACT_S16_43R;
-
-  if (dsize >= 4) {
-    if (doff == 8) {
-      vis_pst_16(dd0, dp++, emask);
-      *dp++ = dd1;
-      *dp++ = dd2;
-    }
-    else {
-      vis_alignaddr((void *)doff, 0);
-      vis_pst_16(vis_faligndata(dd0, dd0), dp++, emask);
-      *dp++ = vis_faligndata(dd0, dd1);
-      *dp++ = vis_faligndata(dd1, dd2);
-    }
-  }
-  else {                                    /* for very small size */
-    if (doff == 8) {
-      vis_pst_16(dd0, dp++, emask);
-      if ((mlib_addr) dp <= (mlib_addr) dend) {
-        emask = vis_edge16(dp, dend);
-        vis_pst_16(dd1, dp++, emask);
-        if ((mlib_addr) dp <= (mlib_addr) dend) {
-          emask = vis_edge16(dp, dend);
-          vis_pst_16(dd2, dp++, emask);
-        }
-      }
-    }
-    else {
-      vis_alignaddr((void *)doff, 0);
-      vis_pst_16(vis_faligndata(dd0, dd0), dp++, emask);
-      if ((mlib_addr) dp <= (mlib_addr) dend) {
-        emask = vis_edge16(dp, dend);
-        vis_pst_16(vis_faligndata(dd0, dd1), dp++, emask);
-        if ((mlib_addr) dp <= (mlib_addr) dend) {
-          emask = vis_edge16(dp, dend);
-          vis_pst_16(vis_faligndata(dd1, dd2), dp++, emask);
-        }
-      }
-    }
-  }
-
-  /* no edge handling is needed in the loop */
-  if (doff == 8) {
-    if ((mlib_addr) dp <= (mlib_addr) dend2) {
-      n = ((mlib_u8 *) dend2 - (mlib_u8 *) dp) / 24 + 1;
-#pragma pipeloop(0)
-      for (i = 0; i < n; i++) {
-        LOAD_EXTRACT_S16_43R;
-        *dp++ = dd0;
-        *dp++ = dd1;
-        *dp++ = dd2;
-      }
-    }
-  }
-  else {
-    if ((mlib_addr) dp <= (mlib_addr) dend2) {
-      n = ((mlib_u8 *) dend2 - (mlib_u8 *) dp) / 24 + 1;
-#pragma pipeloop(0)
-      for (i = 0; i < n; i++) {
-        LOAD_EXTRACT_S16_43R;
-        vis_alignaddr((void *)doff, 0);
-        *dp++ = vis_faligndata(dd2old, dd0);
-        *dp++ = vis_faligndata(dd0, dd1);
-        *dp++ = vis_faligndata(dd1, dd2);
-      }
-    }
-  }
-
-  if ((mlib_addr) dp <= (mlib_addr) dend) {
-    LOAD_EXTRACT_S16_43R;
-    emask = vis_edge16(dp, dend);
-    if (doff == 8) {
-      vis_pst_16(dd0, dp++, emask);
-      if ((mlib_addr) dp <= (mlib_addr) dend) {
-        emask = vis_edge16(dp, dend);
-        vis_pst_16(dd1, dp++, emask);
-        if ((mlib_addr) dp <= (mlib_addr) dend) {
-          emask = vis_edge16(dp, dend);
-          vis_pst_16(dd2, dp++, emask);
-        }
-      }
-    }
-    else {
-      vis_alignaddr((void *)doff, 0);
-      vis_pst_16(vis_faligndata(dd2old, dd0), dp++, emask);
-      if ((mlib_addr) dp <= (mlib_addr) dend) {
-        emask = vis_edge16(dp, dend);
-        vis_pst_16(vis_faligndata(dd0, dd1), dp++, emask);
-        if ((mlib_addr) dp <= (mlib_addr) dend) {
-          emask = vis_edge16(dp, dend);
-          vis_pst_16(vis_faligndata(dd1, dd2), dp++, emask);
-        }
-      }
-    }
-  }
-}
-
-/***************************************************************/
-void mlib_v_ImageChannelExtract_S16_43R(const mlib_s16 *src,
-                                        mlib_s32       slb,
-                                        mlib_s16       *dst,
-                                        mlib_s32       dlb,
-                                        mlib_s32       xsize,
-                                        mlib_s32       ysize)
-{
-  mlib_s16 *sa, *da;
-  mlib_s16 *sl, *dl;
-  mlib_s32 j;
-
-  sa = sl = (void *)src;
-  da = dl = dst;
-
-  for (j = 0; j < ysize; j++) {
-    mlib_v_ImageChannelExtract_S16_43R_D1(sa, da, xsize);
-    sa = sl = (mlib_s16 *) ((mlib_u8 *) sl + slb);
-    da = dl = (mlib_s16 *) ((mlib_u8 *) dl + dlb);
-  }
-}
-
-/***************************************************************/
-#define EXTRACT_U8_43L_old      /* shift left */                \
-                                                                \
-  dd0 = vis_faligndata(dd0, sd0);    /* --------------r0 */     \
-  sd0 = vis_faligndata(sd0, sd0);                               \
-  dd0 = vis_faligndata(dd0, sd0);    /* ------------r0g0 */     \
-  sd0 = vis_faligndata(sd0, sd0);                               \
-  dd0 = vis_faligndata(dd0, sd0);    /* ----------r0g0b0 */     \
-  sd0 = vis_faligndata(sd0, sd0);                               \
-  sd0 = vis_faligndata(sd0, sd0);                               \
-  dd0 = vis_faligndata(dd0, sd0);    /* --------r0g0b0r1 */     \
-  sd0 = vis_faligndata(sd0, sd0);                               \
-  dd0 = vis_faligndata(dd0, sd0);    /* ------r0g0b0r1g1 */     \
-  sd0 = vis_faligndata(sd0, sd0);                               \
-  dd0 = vis_faligndata(dd0, sd0);    /* ----r0g0b0r1g1b1 */     \
-                                                                \
-  dd0 = vis_faligndata(dd0, sd1);    /* --r0g0b0r1g1b1r2 */     \
-  sd1 = vis_faligndata(sd1, sd1);                               \
-  dd0 = vis_faligndata(dd0, sd1);    /* r0g0b0r1g1b1r2g2 */     \
-                                                                \
-  sd1 = vis_faligndata(sd1, sd1);                               \
-  dd1 = vis_faligndata(dd1, sd1);    /* --------------b2 */     \
-  sd1 = vis_faligndata(sd1, sd1);                               \
-  sd1 = vis_faligndata(sd1, sd1);                               \
-  dd1 = vis_faligndata(dd1, sd1);    /* ------------b2r3 */     \
-  sd1 = vis_faligndata(sd1, sd1);                               \
-  dd1 = vis_faligndata(dd1, sd1);    /* ----------b2r3g3 */     \
-  sd1 = vis_faligndata(sd1, sd1);                               \
-  dd1 = vis_faligndata(dd1, sd1);    /* --------b2r3g3b3 */     \
-                                                                \
-  dd1 = vis_faligndata(dd1, sd2);    /* ------b2r3g3b3r4 */     \
-  sd2 = vis_faligndata(sd2, sd2);                               \
-  dd1 = vis_faligndata(dd1, sd2);    /* ----b2r3g3b3r4g4 */     \
-  sd2 = vis_faligndata(sd2, sd2);                               \
-  dd1 = vis_faligndata(dd1, sd2);    /* --b2r3g3b3r4g4b4 */     \
-  sd2 = vis_faligndata(sd2, sd2);                               \
-  sd2 = vis_faligndata(sd2, sd2);                               \
-  dd1 = vis_faligndata(dd1, sd2);    /* b2r3g3b3r4g4b4r5 */     \
-                                                                \
-  sd2 = vis_faligndata(sd2, sd2);                               \
-  dd2 = vis_faligndata(dd2, sd2);    /* --------------g5 */     \
-  sd2 = vis_faligndata(sd2, sd2);                               \
-  dd2 = vis_faligndata(dd2, sd2);    /* ------------g5b5 */     \
-                                                                \
-  dd2 = vis_faligndata(dd2, sd3);    /* ----------g5b5r6 */     \
-  sd3 = vis_faligndata(sd3, sd3);                               \
-  dd2 = vis_faligndata(dd2, sd3);    /* --------g5b5r6g6 */     \
-  sd3 = vis_faligndata(sd3, sd3);                               \
-  dd2 = vis_faligndata(dd2, sd3);    /* ------g5b5r6g6b6 */     \
-  sd3 = vis_faligndata(sd3, sd3);                               \
-  sd3 = vis_faligndata(sd3, sd3);                               \
-  dd2 = vis_faligndata(dd2, sd3);    /* ----g5b5r6g6b6r7 */     \
-  sd3 = vis_faligndata(sd3, sd3);                               \
-  dd2 = vis_faligndata(dd2, sd3);    /* --g5b5r6g6b6r7g7 */     \
-  sd3 = vis_faligndata(sd3, sd3);                               \
-  dd2 = vis_faligndata(dd2, sd3);           /* g5b5r6g6b6r7g7b7 */
-
-/***************************************************************/
 #define EXTRACT_U8_43L        /* shift left */                  \
                                                                 \
   vis_alignaddr((void *)0, 3);                                  \
@@ -801,20 +88,6 @@
   dd2 = vis_faligndata(dd2, sda);           /* g5b5r6g6b6r7g7b7 */
 
 /***************************************************************/
-#define LOAD_EXTRACT_U8_43L_STORE                               \
-                                                                \
-  sd0 = *sp++;          /* r0g0b0--r1g1b1-- */                  \
-  sd1 = *sp++;          /* r2g2b2--r3g3b3-- */                  \
-  sd2 = *sp++;          /* r4g4b4--r5g5b5-- */                  \
-  sd3 = *sp++;          /* r6g6b6--r7g7b7-- */                  \
-                                                                \
-  EXTRACT_U8_43L;                                               \
-                                                                \
-  *dp++ = dd0;          /* r0g0b0r1g1b1r2g2 */                  \
-  *dp++ = dd1;          /* b2r3g3b3r4g4b4r5 */                  \
-  *dp++ = dd2;                              /* g5b5r6g6b6r7g7b7 */
-
-/***************************************************************/
 #define LOAD_EXTRACT_U8_43L                                             \
                                                                         \
   vis_alignaddr((void *)soff, 0);                                       \
@@ -835,74 +108,6 @@
 
 /***************************************************************/
 /*
- * Both source and destination image data are 1-d vectors and
- * 8-byte aligned. And dsize is multiple of 8.
- */
-
-void mlib_v_ImageChannelExtract_U8_43L_A8D1X8(const mlib_u8 *src,
-                                              mlib_u8       *dst,
-                                              mlib_s32      dsize)
-{
-  mlib_d64 *sp, *dp;
-  mlib_d64 sd0, sd1, sd2, sd3;                        /* source data */
-  mlib_d64 dd0, dd1, dd2;                             /* dst data */
-  mlib_d64 sda;
-  mlib_s32 i;
-
-  sp = (mlib_d64 *) src;
-  dp = (mlib_d64 *) dst;
-
-  /* set GSR.offset for vis_faligndata()  */
-/* vis_alignaddr((void *)0, 1); *//* for _old only */
-
-#pragma pipeloop(0)
-  for (i = 0; i < dsize / 8; i++) {
-    LOAD_EXTRACT_U8_43L_STORE;
-  }
-}
-
-/***************************************************************/
-/*
- * Either source or destination image data are not 1-d vectors, but
- * they are 8-byte aligned. And slb and dlb are multiple of 8.
- * The xsize is multiple of 8.
- */
-
-void mlib_v_ImageChannelExtract_U8_43L_A8D2X8(const mlib_u8 *src,
-                                              mlib_s32      slb,
-                                              mlib_u8       *dst,
-                                              mlib_s32      dlb,
-                                              mlib_s32      xsize,
-                                              mlib_s32      ysize)
-{
-  mlib_d64 *sp, *dp;                                  /* 8-byte aligned pointer for pixel */
-  mlib_d64 *sl, *dl;                                  /* 8-byte aligned pointer for line */
-  mlib_d64 sd0, sd1, sd2, sd3;                        /* source data */
-  mlib_d64 dd0, dd1, dd2;                             /* dst data */
-  mlib_d64 sda;
-  mlib_s32 i, j;                                      /* indices for x, y */
-
-  /* set GSR.offset for vis_faligndata()  */
-/* vis_alignaddr((void *)0, 1); *//* for _old only */
-
-  sp = sl = (mlib_d64 *) src;
-  dp = dl = (mlib_d64 *) dst;
-
-  /* row loop */
-  for (j = 0; j < ysize; j++) {
-    /* 8-byte column loop */
-#pragma pipeloop(0)
-    for (i = 0; i < xsize / 8; i++) {
-      LOAD_EXTRACT_U8_43L_STORE;
-    }
-
-    sp = sl = (mlib_d64 *) ((mlib_u8 *) sl + slb);
-    dp = dl = (mlib_d64 *) ((mlib_u8 *) dl + dlb);
-  }
-}
-
-/***************************************************************/
-/*
  * Either source or destination data are not 8-byte aligned.
  * And ssize is multiple of 8.
  */
@@ -1045,28 +250,6 @@
 }
 
 /***************************************************************/
-void mlib_v_ImageChannelExtract_U8_43L(const mlib_u8 *src,
-                                       mlib_s32      slb,
-                                       mlib_u8       *dst,
-                                       mlib_s32      dlb,
-                                       mlib_s32      xsize,
-                                       mlib_s32      ysize)
-{
-  mlib_u8 *sa, *da;
-  mlib_u8 *sl, *dl;
-  mlib_s32 j;
-
-  sa = sl = (void *)src;
-  da = dl = dst;
-
-  for (j = 0; j < ysize; j++) {
-    mlib_v_ImageChannelExtract_U8_43L_D1(sa, da, xsize);
-    sa = sl += slb;
-    da = dl += dlb;
-  }
-}
-
-/***************************************************************/
 #define EXTRACT_S16_43L              /* shift left */           \
   vis_alignaddr((void *)0, 6);                                  \
   dd0 = vis_faligndata(dd0, sd0);    /* --r0g0b0 */             \
@@ -1083,20 +266,6 @@
   dd2 = vis_faligndata(dd2, sd3);           /* b2r3g3b3 */
 
 /***************************************************************/
-#define LOAD_EXTRACT_S16_43L_STORE                              \
-                                                                \
-  sd0 = *sp++;          /* r0g0b0-- */                          \
-  sd1 = *sp++;          /* r1g1b1-- */                          \
-  sd2 = *sp++;          /* r2g2b2-- */                          \
-  sd3 = *sp++;          /* r3g3b3-- */                          \
-                                                                \
-  EXTRACT_S16_43L;                                              \
-                                                                \
-  *dp++ = dd0;          /* r0g0b0r1 */                          \
-  *dp++ = dd1;          /* g1b1r2g2 */                          \
-  *dp++ = dd2;                              /* b2r3g3b3 */
-
-/***************************************************************/
 #define LOAD_EXTRACT_S16_43L                                    \
                                                                 \
   vis_alignaddr((void *)soff, 0);                               \
@@ -1115,72 +284,6 @@
 
 /***************************************************************/
 /*
- * Both source and destination image data are 1-d vectors and
- * 8-byte aligned. And dsize is multiple of 4.
- */
-
-void mlib_v_ImageChannelExtract_S16_43L_A8D1X4(const mlib_s16 *src,
-                                               mlib_s16       *dst,
-                                               mlib_s32       dsize)
-{
-  mlib_d64 *sp, *dp;                                  /* 8-byte aligned pointer for pixel */
-  mlib_d64 sd0, sd1, sd2, sd3;                        /* source data */
-  mlib_d64 dd0, dd1, dd2;                             /* dst data */
-  mlib_s32 i;
-
-  sp = (mlib_d64 *) src;
-  dp = (mlib_d64 *) dst;
-
-  /* set GSR.offset for vis_faligndata()  */
-/* vis_alignaddr((void *)0, 2); *//* only for _old */
-
-#pragma pipeloop(0)
-  for (i = 0; i < dsize / 4; i++) {
-    LOAD_EXTRACT_S16_43L_STORE;
-  }
-}
-
-/***************************************************************/
-/*
- * Either source or destination image data are not 1-d vectors, but
- * they are 8-byte aligned. The xsize is multiple of 4.
- * And slb and dlb are multiple of 8.
- */
-
-void mlib_v_ImageChannelExtract_S16_43L_A8D2X4(const mlib_s16 *src,
-                                               mlib_s32       slb,
-                                               mlib_s16       *dst,
-                                               mlib_s32       dlb,
-                                               mlib_s32       xsize,
-                                               mlib_s32       ysize)
-{
-  mlib_d64 *sp, *dp;                                  /* 8-byte aligned pointer for pixel */
-  mlib_d64 *sl, *dl;                                  /* 8-byte aligned pointer for line */
-  mlib_d64 sd0, sd1, sd2, sd3;                        /* source data */
-  mlib_d64 dd0, dd1, dd2;                             /* dst data */
-  mlib_s32 i, j;                                      /* indices for x, y */
-
-  /* set GSR.offset for vis_faligndata()  */
-/* vis_alignaddr((void *)0, 2); *//* only for _old */
-
-  sp = sl = (mlib_d64 *) src;
-  dp = dl = (mlib_d64 *) dst;
-
-  /* row loop */
-  for (j = 0; j < ysize; j++) {
-    /* 4-pixel column loop */
-#pragma pipeloop(0)
-    for (i = 0; i < xsize / 4; i++) {
-      LOAD_EXTRACT_S16_43L_STORE;
-    }
-
-    sp = sl = (mlib_d64 *) ((mlib_u8 *) sl + slb);
-    dp = dl = (mlib_d64 *) ((mlib_u8 *) dl + dlb);
-  }
-}
-
-/***************************************************************/
-/*
  * Either source or destination data are not 8-byte aligned.
  * And size is in pixels.
  */
@@ -1318,25 +421,3 @@
 }
 
 /***************************************************************/
-void mlib_v_ImageChannelExtract_S16_43L(const mlib_s16 *src,
-                                        mlib_s32       slb,
-                                        mlib_s16       *dst,
-                                        mlib_s32       dlb,
-                                        mlib_s32       xsize,
-                                        mlib_s32       ysize)
-{
-  mlib_s16 *sa, *da;
-  mlib_s16 *sl, *dl;
-  mlib_s32 j;
-
-  sa = sl = (void *)src;
-  da = dl = dst;
-
-  for (j = 0; j < ysize; j++) {
-    mlib_v_ImageChannelExtract_S16_43L_D1(sa, da, xsize);
-    sa = sl = (mlib_s16 *) ((mlib_u8 *) sl + slb);
-    da = dl = (mlib_s16 *) ((mlib_u8 *) dl + dlb);
-  }
-}
-
-/***************************************************************/
--- a/jdk/src/java.desktop/unix/native/libmlib_image/mlib_v_ImageChannelExtract_f.c	Thu May 12 11:03:07 2016 -0700
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,784 +0,0 @@
-/*
- * Copyright (c) 2000, 2003, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.  Oracle designates this
- * particular file as subject to the "Classpath" exception as provided
- * by Oracle in the LICENSE file that accompanied this code.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- */
-
-
-
-#include <stdlib.h>
-#include "mlib_image.h"
-#include "mlib_ImageCheck.h"
-
-typedef union {
-  double d64;
-  struct {
-    float f0;
-    float f1;
-  } f32s;
-} d64_2_f32;
-
-/***************************************************************/
-
-void mlib_v_ImageChannelExtract_U8_2_1(mlib_u8  *sl,  mlib_s32 slb,
-                                       mlib_u8  *dl, mlib_s32 dlb,
-                                       mlib_s32 width, mlib_s32 height)
-{
-  mlib_u8   *sp = sl;
-  mlib_u8   *dp = dl;
-  int       i, j;
-
-  for (j = 0; j < height; j++) {
-    mlib_u8  *dend = dl + width;
-    mlib_u32 *sp2;
-    while (((mlib_addr)sp & 7) > 1) {
-      *dp++ = *sp;
-      sp += 2;
-      if (dp >= dend) break;
-    }
-    if ((mlib_addr)sp & 7) {
-      sp2 = (mlib_u32 *)(sp - 1);
-#pragma pipeloop(0)
-      for (; dp <= (dend-2); dp += 2) {
-        mlib_u32 s0;
-        s0 = *sp2++;
-        dp[0] = s0 >> 16;
-        dp[1] = s0;
-      }
-      if (dp < dend) {
-        dp[0] = sp2[0] >> 16;
-      }
-    } else {
-      sp2 = (mlib_u32 *)sp;
-#pragma pipeloop(0)
-      for (; dp <= (dend-2); dp += 2) {
-        mlib_u32 s0;
-        s0 = *sp2++;
-        dp[0] = s0 >> 24;
-        dp[1] = s0 >> 8;
-      }
-      if (dp < dend) {
-        dp[0] = sp2[0] >> 24;
-      }
-    }
-    sp = sl += slb;
-    dp = dl += dlb;
-  }
-}
-
-/***************************************************************/
-
-void mlib_v_ImageChannelExtract_U8_3_2(mlib_u8  *sl, mlib_s32 slb,
-                                       mlib_u8 *dl, mlib_s32 dlb,
-                                       mlib_s32 width, mlib_s32 height,
-                                       mlib_s32 count1)
-{
-  mlib_u8   *sp = sl;
-  mlib_u8   *dp = dl;
-  mlib_u32  *sp2;
-  mlib_u16  *dp2;
-  mlib_u16  *d2end;
-  mlib_u32  s0, s1, s2, s3;
-  int       i, j, off, count_off;
-
-  for (j = 0; j < height; j++) {
-    mlib_u8  *dend  = dl + 2*width;
-
-    if (count1 == 1) {
-      if (dp < dend) *dp++ = sp[0];
-      sp += 2;
-    }
-
-    if ((mlib_addr)dp & 1) {
-#pragma pipeloop(0)
-      for (; dp <= (dend-2); dp += 2) {
-        dp[0] = sp[0];
-        dp[1] = sp[1];
-        sp += 3;
-      }
-      if (dp < dend) {
-        dp[0] = sp[0];
-      }
-      sp = sl += slb;
-      dp = dl += dlb;
-      continue;
-    }
-
-    dp2 = (mlib_u16*)dp;
-    d2end = (mlib_u16*)((mlib_addr)dend &~ 1);
-    off = (mlib_addr)sp & 3;
-    sp2 = (mlib_u32 *)(sp - off);
-
-    switch (off) {
-
-      case 0:
-#pragma pipeloop(0)
-        for (; dp2 <= (d2end-4); dp2 += 4) {
-          s0 = sp2[0];
-          s1 = sp2[1];
-          s2 = sp2[2];
-          dp2[0] = s0 >> 16;
-          dp2[1] = (s0 << 8) | (s1 >> 24);
-          dp2[2] = s1;
-          dp2[3] = s2 >>  8;
-          sp2 += 3;
-        }
-        break;
-
-      case 1:
-#pragma pipeloop(0)
-        for (; dp2 <= (d2end-4); dp2 += 4) {
-          s0 = sp2[0];
-          s1 = sp2[1];
-          s2 = sp2[2];
-          dp2[0] = s0 >> 8;
-          dp2[1] = s1 >> 16;
-          dp2[2] = (s1 << 8) | (s2 >> 24);
-          dp2[3] = s2;
-          sp2 += 3;
-        }
-        break;
-
-      case 2:
-#pragma pipeloop(0)
-        s3 = sp2[0];
-        for (; dp2 <= (d2end-4); dp2 += 4) {
-          s0 = s3;
-          s1 = sp2[1];
-          s2 = sp2[2];
-          s3 = sp2[3];
-          dp2[0] = s0;
-          dp2[1] = s1 >> 8;
-          dp2[2] = s2 >> 16;
-          dp2[3] = (s2 << 8) | (s3 >> 24);
-          sp2 += 3;
-        }
-        break;
-
-      case 3:
-#pragma pipeloop(0)
-        s3 = sp2[0];
-        for (; dp2 <= (d2end-4); dp2 += 4) {
-          s0 = s3;
-          s1 = sp2[1];
-          s2 = sp2[2];
-          s3 = sp2[3];
-          dp2[0] = (s0 << 8) | (s1 >> 24);
-          dp2[1] = s1;
-          dp2[2] = s2 >>  8;
-          dp2[3] = s3 >> 16;
-          sp2 += 3;
-        }
-    }
-
-    sp = (mlib_u8 *)sp2 + off;
-    dp = (mlib_u8 *)dp2;
-    while (dp < dend) {
-      *dp++ = sp[0];
-      if (dp < dend) *dp++ = sp[1];
-      sp += 3;
-    }
-
-    sp = sl += slb;
-    dp = dl += dlb;
-  }
-}
-
-/***************************************************************/
-
-void mlib_v_ImageChannelExtract_U8_4_2(mlib_u8  *sl, mlib_s32 slb,
-                                       mlib_u8 *dl, mlib_s32 dlb,
-                                       mlib_s32 width, mlib_s32 height,
-                                       mlib_s32 count1)
-{
-  mlib_u8   *sp = sl;
-  mlib_u8   *dp = dl;
-  mlib_u32  *sp2;
-  mlib_u16  *dp2;
-  mlib_u16  *d2end;
-  mlib_u32  s0, s1, s2, s3;
-  int       i, j, off, count_off;
-
-  for (j = 0; j < height; j++) {
-    mlib_u8  *dend  = dl + 2*width;
-
-    if (count1 == 1) {
-      if (dp < dend) *dp++ = sp[0];
-      sp += 3;
-    }
-
-    off = (mlib_addr)sp & 3;
-
-    if (((mlib_addr)dp & 1) || (off == 3)) {
-#pragma pipeloop(0)
-      for (; dp <= (dend-2); dp += 2) {
-        dp[0] = sp[0];
-        dp[1] = sp[1];
-        sp += 4;
-      }
-      if (dp < dend) {
-        dp[0] = sp[0];
-      }
-      sp = sl += slb;
-      dp = dl += dlb;
-      continue;
-    }
-
-    dp2 = (mlib_u16*)dp;
-    d2end = (mlib_u16*)((mlib_addr)dend &~ 1);
-    sp2 = (mlib_u32 *)(sp - off);
-
-    switch (off) {
-
-      case 0:
-#pragma pipeloop(0)
-        for (; dp2 < d2end; dp2++) {
-          s0 = sp2[0];
-          dp2[0] = s0 >> 16;
-          sp2++;
-        }
-        break;
-
-      case 1:
-#pragma pipeloop(0)
-        for (; dp2 < d2end; dp2++) {
-          s0 = sp2[0];
-          dp2[0] = s0 >> 8;
-          sp2++;
-        }
-        break;
-
-      case 2:
-#pragma pipeloop(0)
-        for (; dp2 < d2end; dp2++) {
-          s0 = sp2[0];
-          dp2[0] = s0;
-          sp2++;
-        }
-        break;
-    }
-
-    sp = (mlib_u8 *)sp2 + off;
-    dp = (mlib_u8 *)dp2;
-    if (dp < dend) {
-      *dp++ = sp[0];
-    }
-
-    sp = sl += slb;
-    dp = dl += dlb;
-  }
-}
-
-/***************************************************************/
-
-void mlib_v_ImageChannelExtract_32_2_1(mlib_f32 *sp, mlib_s32 slb,
-                                       mlib_f32 *dp, mlib_s32 dlb,
-                                       mlib_s32 width, mlib_s32 height)
-{
-  mlib_d64  *sp2;
-  int       i, j, off;
-
-  for (j = 0; j < height; j++) {
-
-    if (((mlib_addr)sp & 7) == 0) {
-      sp2 = (mlib_d64 *)sp;
-#pragma pipeloop(0)
-      for (i = 0; i < width; i++) {
-        d64_2_f32 d;
-        d.d64 = sp2[i];
-        dp[i] = d.f32s.f0;
-      }
-    } else {
-      sp2 = (mlib_d64 *)(sp - 1);
-#pragma pipeloop(0)
-      for (i = 0; i < width; i++) {
-        d64_2_f32 d;
-        d.d64 = sp2[i];
-        dp[i] = d.f32s.f1;
-      }
-    }
-
-    sp += slb;
-    dp += dlb;
-  }
-}
-
-/***************************************************************/
-
-void mlib_v_ImageChannelExtract_32_3_1(mlib_f32 *sl, mlib_s32 slb,
-                                       mlib_f32 *dl, mlib_s32 dlb,
-                                       mlib_s32 width, mlib_s32 height)
-{
-  mlib_f32  *sp = sl;
-  mlib_f32  *dp = dl;
-  mlib_d64  *sp2;
-  d64_2_f32 d0;
-  int       i, j, off;
-
-  for (j = 0; j < height; j++) {
-    mlib_f32 *dend = dl + width;
-
-    if ((mlib_addr)sp & 7) {
-      dp[0] = sp[0];
-      sp += 3;
-      dp ++;
-    }
-
-    sp2 = (mlib_d64 *)sp;
-#pragma pipeloop(0)
-    for (; dp <= (dend-2); dp += 2) {
-      d64_2_f32 d0, d1;
-      d0.d64 = sp2[0];
-      d1.d64 = sp2[1];
-      dp[0] = d0.f32s.f0;
-      dp[1] = d1.f32s.f1;
-      sp2 += 3;
-    }
-
-    if (dp < dend) {
-      d0.d64 = sp2[0];
-      dp[0] = d0.f32s.f0;
-    }
-
-    sp = sl += slb;
-    dp = dl += dlb;
-  }
-}
-
-/***************************************************************/
-
-void mlib_v_ImageChannelExtract_32_3_2(mlib_f32 *sl, mlib_s32 slb,
-                                       mlib_f32 *dl, mlib_s32 dlb,
-                                       mlib_s32 width, mlib_s32 height,
-                                       mlib_s32 count1)
-{
-  mlib_f32  *sp = sl;
-  mlib_f32  *dp = dl;
-  mlib_d64  *sp2;
-  d64_2_f32 d0;
-  int       i, j, off;
-
-  for (j = 0; j < height; j++) {
-    mlib_f32 *dend = dl + 2*width;
-
-    if (count1 == 1) {
-      if (dp < dend) *dp++ = sp[0];
-      sp += 2;
-    }
-
-    if ((mlib_addr)sp & 7) {
-      if (dp < dend) *dp++ = sp[0];
-      if (dp < dend) *dp++ = sp[1];
-      sp += 3;
-    }
-
-    sp2 = (mlib_d64 *)sp;
-#pragma pipeloop(0)
-    for (; dp <= (dend-4); dp += 4) {
-      d64_2_f32 d0, d1, d2;
-      d0.d64 = sp2[0];
-      d1.d64 = sp2[1];
-      d2.d64 = sp2[2];
-      dp[0] = d0.f32s.f0;
-      dp[1] = d0.f32s.f1;
-      dp[2] = d1.f32s.f1;
-      dp[3] = d2.f32s.f0;
-      sp2 += 3;
-    }
-
-    if (dp < dend) {
-      sp = (mlib_f32 *)sp2;
-      *dp++ = sp[0];
-      if (dp < dend) *dp++ = sp[1];
-      if (dp < dend) *dp++ = sp[3];
-    }
-
-    sp = sl += slb;
-    dp = dl += dlb;
-  }
-}
-
-/***************************************************************/
-
-void mlib_v_ImageChannelExtract_32_4_1(mlib_f32 *sp, mlib_s32 slb,
-                                       mlib_f32 *dp, mlib_s32 dlb,
-                                       mlib_s32 width, mlib_s32 height)
-{
-  mlib_d64  *sp2;
-  int       i, j, off;
-
-  for (j = 0; j < height; j++) {
-
-    if (((mlib_addr)sp & 7) == 0) {
-      sp2 = (mlib_d64 *)sp;
-#pragma pipeloop(0)
-      for (i = 0; i < width; i++) {
-        d64_2_f32 d;
-        d.d64 = sp2[2*i];
-        dp[i] = d.f32s.f0;
-      }
-    } else {
-      sp2 = (mlib_d64 *)(sp - 1);
-#pragma pipeloop(0)
-      for (i = 0; i < width; i++) {
-        d64_2_f32 d;
-        d.d64 = sp2[2*i];
-        dp[i] = d.f32s.f1;
-      }
-    }
-
-    sp += slb;
-    dp += dlb;
-  }
-}
-
-/***************************************************************/
-
-void mlib_v_ImageChannelExtract_32_4_2(mlib_f32 *sl, mlib_s32 slb,
-                                       mlib_f32 *dl, mlib_s32 dlb,
-                                       mlib_s32 width, mlib_s32 height,
-                                       mlib_s32 count1)
-{
-  mlib_f32  *sp = sl;
-  mlib_f32  *dp = dl;
-  mlib_d64  *sp2;
-  int       i, j, off;
-  d64_2_f32 d0, d1;
-
-  for (j = 0; j < height; j++) {
-    mlib_f32 *dend = dl + 2*width;
-
-    if (count1 == 1) {
-      dp[0] = sp[0];
-      sp += 3;
-      dp ++;
-    }
-
-    if (((mlib_addr)sp & 7) == 0) {
-      sp2 = (mlib_d64 *)sp;
-#pragma pipeloop(0)
-      for (; dp <= (dend-2); dp += 2) {
-        d64_2_f32 d;
-        d.d64 = sp2[0];
-        dp[0] = d.f32s.f0;
-        dp[1] = d.f32s.f1;
-        sp2 += 2;
-      }
-      if (dp < dend) {
-        d0.d64 = sp2[0];
-        dp[0] = d0.f32s.f0;
-      }
-    } else {
-      sp2 = (mlib_d64 *)(sp - 1);
-#pragma pipeloop(0)
-      for (; dp <= (dend-2); dp += 2) {
-        d64_2_f32 d0, d1;
-        d0.d64 = sp2[0];
-        d1.d64 = sp2[1];
-        dp[0] = d0.f32s.f1;
-        dp[1] = d1.f32s.f0;
-        sp2 += 2;
-      }
-      if (dp < dend) {
-        d0.d64 = sp2[0];
-        dp[0] = d0.f32s.f1;
-      }
-    }
-
-    sp = sl += slb;
-    dp = dl += dlb;
-  }
-}
-
-/***************************************************************/
-
-void mlib_v_ImageChannelExtract_32_4_3(mlib_f32 *sl, mlib_s32 slb,
-                                       mlib_f32 *dl, mlib_s32 dlb,
-                                       mlib_s32 width, mlib_s32 height,
-                                       mlib_s32 count1)
-{
-  mlib_f32  *sp = sl;
-  mlib_f32  *dp = dl;
-  mlib_d64  *sp2;
-  int       i, j, k;
-  d64_2_f32 d0, d1;
-
-  for (j = 0; j < height; j++) {
-    mlib_f32 *dend = dl + 3*width;
-
-    for (k = 0; k < count1; k++) {
-      if (dp < dend) *dp++ = *sp++;
-    }
-    sp++;
-
-    if (((mlib_addr)sp & 7) == 0) {
-      sp2 = (mlib_d64 *)sp;
-#pragma pipeloop(0)
-      for (; dp <= (dend-3); dp += 3) {
-        d64_2_f32 d0, d1;
-        d0.d64 = sp2[0];
-        d1.d64 = sp2[1];
-        dp[0] = d0.f32s.f0;
-        dp[1] = d0.f32s.f1;
-        dp[2] = d1.f32s.f0;
-        sp2 += 2;
-      }
-      if (dp < dend) {
-        d0.d64 = sp2[0];
-        *dp++ = d0.f32s.f0;
-        if (dp < dend) *dp++ = d0.f32s.f1;
-      }
-    } else {
-      sp2 = (mlib_d64 *)(sp - 1);
-#pragma pipeloop(0)
-      for (; dp <= (dend-3); dp += 3) {
-        d64_2_f32 d0, d1;
-        d0.d64 = sp2[0];
-        d1.d64 = sp2[1];
-        dp[0] = d0.f32s.f1;
-        dp[1] = d1.f32s.f0;
-        dp[2] = d1.f32s.f1;
-        sp2 += 2;
-      }
-      if (dp < dend) {
-        d0.d64 = sp2[0];
-        d1.d64 = sp2[1];
-        *dp++ = d0.f32s.f1;
-        if (dp < dend) *dp++ = d1.f32s.f0;
-      }
-    }
-
-    sp = sl += slb;
-    dp = dl += dlb;
-  }
-}
-
-/***************************************************************/
-/* general channel extraction: slower due to the inner loop */
-
-void mlib_v_ImageChannelExtract_U8(mlib_u8  *src, mlib_s32 slb,
-                              mlib_u8  *dst, mlib_s32 dlb,
-                              mlib_s32 channels, mlib_s32 channeld,
-                              mlib_s32 width, mlib_s32 height,
-                              mlib_s32 cmask)
-{
-  mlib_u8   *sp;              /* pointer for pixel in src */
-  mlib_u8   *sl;              /* pointer for line in src  */
-  mlib_u8   *dp;              /* pointer for pixel in dst */
-  mlib_u8   *dl;              /* pointer for line in dst  */
-  int       i, j, k;          /* indices for x, y, channel */
-  int       deltac[5] = { 0, 1, 1, 1, 1 };
-  int       inc0, inc1, inc2, inc3;
-  mlib_u8   s0, s1, s2, s3;
-
-  deltac[channeld] = 1;
-  for (i = (channels - 1), k = 0; i >= 0; i--) {
-    if ((cmask & (1 << i)) == 0)
-      deltac[k]++;
-    else
-      k++;
-  }
-
-  deltac[channeld] = channels;
-  for (i = 1; i < channeld; i++) {
-    deltac[channeld] -= deltac[i];
-  }
-
-  sp = sl = src + deltac[0];
-  dp = dl = dst;
-
-/* Only THREE CHANNEL CASE could be executed here!!! */
-
-  inc0 = deltac[1];
-  inc1 = deltac[2] + inc0;
-  inc2 = deltac[3] + inc1;
-  for (j = 0; j < height; j++) {
-    for (i = 0; i < width; i++) {
-#pragma pipeloop(0)
-      s0 = sp[0]; s1 = sp[inc0]; s2 = sp[inc1];
-      dp[0] = s0;
-      dp[1] = s1;
-      dp[2] = s2;
-      sp   += inc2;
-      dp   += 3;
-    }
-    sp = sl += slb;
-    dp = dl += dlb;
-  }
-}
-
-/***************************************************************/
-/* general channel extraction: slower due to the inner loop */
-
-void mlib_v_ImageChannelExtract_S16(mlib_u16 *src,    mlib_s32 slb,
-                                    mlib_u16 *dst,    mlib_s32 dlb,
-                                    mlib_s32 channels, mlib_s32 channeld,
-                                    mlib_s32 width,    mlib_s32 height,
-                                    mlib_s32 cmask)
-{
-  mlib_u16   *sp;              /* pointer for pixel in src */
-  mlib_u16   *sl;              /* pointer for line in src  */
-  mlib_u16   *dp;              /* pointer for pixel in dst */
-  mlib_u16   *dl;              /* pointer for line in dst  */
-  int       i, j, k;          /* indices for x, y, channel */
-  int       deltac[5] = { 0, 1, 1, 1, 1 };
-  int       inc0, inc1, inc2, inc3;
-  mlib_u16   s0, s1, s2, s3;
-
-  slb >>= 1;
-  dlb >>= 1;
-
-  deltac[channeld] = 1;
-  for (i = (channels - 1), k = 0; i >= 0; i--) {
-    if ((cmask & (1 << i)) == 0)
-      deltac[k]++;
-    else
-      k++;
-  }
-
-  deltac[channeld] = channels;
-  for (i = 1; i < channeld; i++) {
-    deltac[channeld] -= deltac[i];
-  }
-
-  sp = sl = src + deltac[0];
-  dp = dl = dst;
-
-  if (channeld == 2) {
-    inc0 = deltac[1];
-    inc1 = deltac[2] + inc0;
-    for (j = 0; j < height; j++) {
-#pragma pipeloop(0)
-      for (i = 0; i < width; i++) {
-        s0 = sp[0]; s1 = sp[inc0];
-        dp[0] = s0;
-        dp[1] = s1;
-        sp   += inc1;
-        dp   += 2;
-      }
-      sp = sl = sl + slb;
-      dp = dl = dl + dlb;
-    }
-  } else
-
-  if (channeld == 3) {
-    inc0 = deltac[1];
-    inc1 = deltac[2] + inc0;
-    inc2 = deltac[3] + inc1;
-    for (j = 0; j < height; j++) {
-      for (i = 0; i < width; i++) {
-#pragma pipeloop(0)
-        s0 = sp[0]; s1 = sp[inc0]; s2 = sp[inc1];
-        dp[0] = s0;
-        dp[1] = s1;
-        dp[2] = s2;
-        sp   += inc2;
-        dp   += 3;
-      }
-      sp = sl = sl + slb;
-      dp = dl = dl + dlb;
-    }
-  }}
-
-/***************************************************************/
-/* general channel extraction: slower due to the inner loop */
-
-void mlib_v_ImageChannelExtract_D64(mlib_d64 *src,    mlib_s32 slb,
-                                    mlib_d64 *dst,    mlib_s32 dlb,
-                                    mlib_s32 channels, mlib_s32 channeld,
-                                    mlib_s32 width,    mlib_s32 height,
-                                    mlib_s32 cmask)
-{
-  mlib_d64   *sp;              /* pointer for pixel in src */
-  mlib_d64   *sl;              /* pointer for line in src  */
-  mlib_d64   *dp;              /* pointer for pixel in dst */
-  mlib_d64   *dl;              /* pointer for line in dst  */
-  int        i, j, k;          /* indices for x, y, channel */
-  int        deltac[5] = { 0, 1, 1, 1, 1 };
-  int        inc0, inc1, inc2, inc3;
-  mlib_d64   s0, s1, s2, s3;
-
-  deltac[channeld] = 1;
-  for (i = (channels - 1), k = 0; i >= 0; i--) {
-    if ((cmask & (1 << i)) == 0)
-      deltac[k]++;
-    else
-      k++;
-  }
-
-  deltac[channeld] = channels;
-  for (i = 1; i < channeld; i++) {
-    deltac[channeld] -= deltac[i];
-  }
-
-  sp = sl = src + deltac[0];
-  dp = dl = dst;
-
-  if (channeld == 1) {
-    for (j = 0; j < height; j++) {
-#pragma pipeloop(0)
-      for (i = 0; i < width; i++) {
-        s0 = sp[0];
-        dp[i] = s0;
-        sp   += channels;
-      }
-      sp = sl = (mlib_d64 *)((mlib_u8 *)sl + slb);
-      dp = dl = (mlib_d64 *)((mlib_u8 *)dl + dlb);
-    }
-  } else
-
-  if (channeld == 2) {
-    inc0 = deltac[1];
-    inc1 = deltac[2] + inc0;
-    for (j = 0; j < height; j++) {
-#pragma pipeloop(0)
-      for (i = 0; i < width; i++) {
-        s0 = sp[0]; s1 = sp[inc0];
-        dp[0] = s0;
-        dp[1] = s1;
-        sp   += inc1;
-        dp   += 2;
-      }
-      sp = sl = (mlib_d64 *)((mlib_u8 *)sl + slb);
-      dp = dl = (mlib_d64 *)((mlib_u8 *)dl + dlb);
-    }
-  } else
-
-  if (channeld == 3) {
-    inc0 = deltac[1];
-    inc1 = deltac[2] + inc0;
-    inc2 = deltac[3] + inc1;
-    for (j = 0; j < height; j++) {
-      for (i = 0; i < width; i++) {
-#pragma pipeloop(0)
-        s0 = sp[0]; s1 = sp[inc0]; s2 = sp[inc1];
-        dp[0] = s0;
-        dp[1] = s1;
-        dp[2] = s2;
-        sp   += inc2;
-        dp   += 3;
-      }
-      sp = sl = (mlib_d64 *)((mlib_u8 *)sl + slb);
-      dp = dl = (mlib_d64 *)((mlib_u8 *)dl + dlb);
-    }
-  }
-}
--- a/jdk/src/java.desktop/unix/native/libmlib_image/mlib_v_ImageChannelInsert.c	Thu May 12 11:03:07 2016 -0700
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,715 +0,0 @@
-/*
- * Copyright (c) 1998, 2003, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.  Oracle designates this
- * particular file as subject to the "Classpath" exception as provided
- * by Oracle in the LICENSE file that accompanied this code.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- */
-
-
-
-
-/*
- * FUNCTIONS
- *      mlib_ImageChannelInsert   - Copy the source image into the selected
- *                                                        channels of the destination image
- *
- * SYNOPSIS
- *      mlib_status mlib_ImageChannelInsert(mlib_image *dst,
- *                                                                        mlib_image *src,
- *                                                                      mlib_s32   cmask);
- *
- * ARGUMENT
- *  dst     Pointer to destination image.
- *  src     Pointer to source image.
- *  cmask   Destination channel selection mask.
- *              The least significant bit (LSB) is corresponding to the
- *              last channel in the destination image data.
- *              The bits with value 1 stand for the channels selected.
- *              If more than N channels are selected, the leftmost N
- *              channels are inserted, where N is the number of channels
- *              in the source image.
- *
- * RESTRICTION
- *              The src and dst must have the same width, height and data type.
- *              The src and dst can have 1, 2, 3 or 4 channels.
- *              The src and dst can be either MLIB_BYTE, MLIB_SHORT, MLIB_INT,
- *          MLIB_FLOAT or MLIB_DOUBLE.
- *
- * DESCRIPTION
- *          Copy the source image into the selected channels of the destination
- *              image
- */
-
-#include <stdlib.h>
-#include "mlib_image.h"
-#include "mlib_ImageCheck.h"
-
-/***************************************************************/
-/* functions defined in mlib_v_ImageChannelInsert_1.c */
-
-void
-mlib_v_ImageChannelInsert_U8(mlib_u8  *src,  mlib_s32 slb,
-                             mlib_u8  *dst,  mlib_s32 dlb,
-                             mlib_s32 channels,
-                             mlib_s32 channeld,
-                             mlib_s32 width,  mlib_s32 height,
-                             mlib_s32 cmask);
-void
-mlib_v_ImageChannelInsert_D64(mlib_d64  *src,  mlib_s32 slb,
-                              mlib_d64  *dst,  mlib_s32 dlb,
-                              mlib_s32 channels,
-                              mlib_s32 channeld,
-                              mlib_s32 width,  mlib_s32 height,
-                              mlib_s32 cmask);
-void
-mlib_v_ImageChannelInsert_S16(mlib_s16 *src,  mlib_s32 slb,
-                              mlib_s16 *dst,  mlib_s32 dlb,
-                              mlib_s32 channels,
-                              mlib_s32 channeld,
-                              mlib_s32 width,  mlib_s32 height,
-                              mlib_s32 cmask);
-void
-mlib_v_ImageChannelInsert_S32(mlib_s32 *src,  mlib_s32 slb,
-                              mlib_s32 *dst,  mlib_s32 dlb,
-                              mlib_s32 channels,
-                              mlib_s32 channeld,
-                              mlib_s32 width,  mlib_s32 height,
-                              mlib_s32 cmask);
-void
-mlib_v_ImageChannelInsert_U8_12_A8D1X8(mlib_u8  *src,
-                                                               mlib_u8  *dst,
-                                                         mlib_s32 dsize,
-                                                         mlib_s32 cmask);
-void
-mlib_v_ImageChannelInsert_U8_12_A8D2X8(mlib_u8  *src,  mlib_s32 slb,
-                                                               mlib_u8  *dst,  mlib_s32 dlb,
-                                                       mlib_s32 xsize, mlib_s32 ysize,
-                                                               mlib_s32 cmask);
-void
-mlib_v_ImageChannelInsert_U8_12_D1(mlib_u8  *src,
-                                                           mlib_u8  *dst,
-                                                   mlib_s32 dsize,
-                                                           mlib_s32 cmask);
-void
-mlib_v_ImageChannelInsert_U8_12(mlib_u8  *src,  mlib_s32 slb,
-                                                        mlib_u8  *dst,  mlib_s32 dlb,
-                                                mlib_s32 xsize, mlib_s32 ysize,
-                                                        mlib_s32 cmask);
-void
-mlib_v_ImageChannelInsert_U8_13_A8D1X8(mlib_u8  *src,
-                                                               mlib_u8  *dst,
-                                                       mlib_s32 dsize,
-                                                               mlib_s32 cmask);
-void
-mlib_v_ImageChannelInsert_U8_13_A8D2X8(mlib_u8  *src,  mlib_s32 slb,
-                                                               mlib_u8  *dst,  mlib_s32 dlb,
-                                                         mlib_s32 xsize, mlib_s32 ysize,
-                                                               mlib_s32 cmask);
-void
-mlib_v_ImageChannelInsert_U8_13_D1(mlib_u8  *src,
-                                                           mlib_u8  *dst,
-                                                     mlib_s32 dsize,
-                                                           mlib_s32 cmask);
-void
-mlib_v_ImageChannelInsert_U8_13(mlib_u8  *src,  mlib_s32 slb,
-                                                        mlib_u8  *dst,  mlib_s32 dlb,
-                                                  mlib_s32 xsize, mlib_s32 ysize,
-                                                        mlib_s32 cmask);
-void
-mlib_v_ImageChannelInsert_U8_14_A8D1X8(mlib_u8  *src,
-                                                               mlib_u8  *dst,
-                                                       mlib_s32 dsize,
-                                                               mlib_s32 cmask);
-void
-mlib_v_ImageChannelInsert_U8_14_A8D2X8(mlib_u8  *src,  mlib_s32 slb,
-                                                               mlib_u8  *dst,  mlib_s32 dlb,
-                                                       mlib_s32 xsize, mlib_s32 ysize,
-                                                               mlib_s32 cmask);
-void
-mlib_v_ImageChannelInsert_U8_14_D1(mlib_u8  *src,
-                                                           mlib_u8  *dst,
-                                                   mlib_s32 dsize,
-                                                           mlib_s32 cmask);
-void
-mlib_v_ImageChannelInsert_U8_14(mlib_u8  *src,  mlib_s32 slb,
-                                                        mlib_u8  *dst,  mlib_s32 dlb,
-                                                mlib_s32 xsize, mlib_s32 ysize,
-                                                        mlib_s32 cmask);
-void
-mlib_v_ImageChannelInsert_S16_12_A8D1X4(mlib_s16 *src,
-                                                                      mlib_s16 *dst,
-                                                        mlib_s32 dsize,
-                                                                mlib_s32 cmask);
-void
-mlib_v_ImageChannelInsert_S16_12_A8D2X4(mlib_s16 *src,  mlib_s32 slb,
-                                                                      mlib_s16 *dst,  mlib_s32 dlb,
-                                                        mlib_s32 xsize, mlib_s32 ysize,
-                                                                mlib_s32 cmask);
-void
-mlib_v_ImageChannelInsert_S16_12_D1(mlib_s16 *src,
-                                                            mlib_s16 *dst,
-                                                    mlib_s32 dsize,
-                                                            mlib_s32 cmask);
-void
-mlib_v_ImageChannelInsert_S16_12(mlib_s16 *src,  mlib_s32 slb,
-                                                        mlib_s16 *dst,  mlib_s32 dlb,
-                                                  mlib_s32 xsize, mlib_s32 ysize,
-                                                  mlib_s32 cmask);
-void
-mlib_v_ImageChannelInsert_S16_13_A8D1X4(mlib_s16 *src,
-                                                                      mlib_s16 *dst,
-                                                        mlib_s32 dsize,
-                                                                mlib_s32 cmask);
-void
-mlib_v_ImageChannelInsert_S16_13_A8D2X4(mlib_s16 *src,  mlib_s32 slb,
-                                                                      mlib_s16 *dst,  mlib_s32 dlb,
-                                                        mlib_s32 xsize, mlib_s32 ysize,
-                                                                mlib_s32 cmask);
-void
-mlib_v_ImageChannelInsert_S16_13_D1(mlib_s16 *src,
-                                                            mlib_s16 *dst,
-                                                    mlib_s32 dsize,
-                                                            mlib_s32 cmask);
-void
-mlib_v_ImageChannelInsert_S16_13(mlib_s16 *src,  mlib_s32 slb,
-                                                         mlib_s16 *dst,  mlib_s32 dlb,
-                                                 mlib_s32 xsize, mlib_s32 ysize,
-                                                         mlib_s32 cmask);
-void
-mlib_v_ImageChannelInsert_S16_14_A8D1X4(mlib_s16 *src,
-                                                                      mlib_s16 *dst,
-                                                          mlib_s32 dsize,
-                                                                      mlib_s32 cmask);
-void
-mlib_v_ImageChannelInsert_S16_14_A8D2X4(mlib_s16 *src,  mlib_s32 slb,
-                                                                      mlib_s16 *dst,  mlib_s32 dlb,
-                                                          mlib_s32 xsize, mlib_s32 ysize,
-                                                                      mlib_s32 cmask);
-void
-mlib_v_ImageChannelInsert_S16_14_D1(mlib_s16 *src,
-                                                            mlib_s16 *dst,
-                                                    mlib_s32 dsize,
-                                                            mlib_s32 cmask);
-void
-mlib_v_ImageChannelInsert_S16_14(mlib_s16 *src,  mlib_s32 slb,
-                                                         mlib_s16 *dst,  mlib_s32 dlb,
-                                                 mlib_s32 xsize, mlib_s32 ysize,
-                                                         mlib_s32 cmask);
-
-/***************************************************************/
-/* functions defined in mlib_v_ImageChannelInsert_34.c */
-
-void
-mlib_v_ImageChannelInsert_U8_34R_A8D1X8(mlib_u8  *src,
-                                                                mlib_u8  *dst,
-                                                                mlib_s32 dsize);
-void
-mlib_v_ImageChannelInsert_U8_34R_A8D2X8(mlib_u8  *src,  mlib_s32 slb,
-                                                                mlib_u8  *dst,  mlib_s32 dlb,
-                                                                mlib_s32 xsize, mlib_s32 ysize);
-void
-mlib_v_ImageChannelInsert_U8_34R_D1(mlib_u8  *src,
-                                                            mlib_u8  *dst,
-                                                            mlib_s32 dsize);
-void
-mlib_v_ImageChannelInsert_U8_34R(mlib_u8  *src,  mlib_s32 slb,
-                                                 mlib_u8  *dst,  mlib_s32 dlb,
-                                                         mlib_s32 xsize, mlib_s32 ysize);
-void
-mlib_v_ImageChannelInsert_S16_34R_A8D1X4(mlib_s16 *src,
-                                                                 mlib_s16 *dst,
-                                                                 mlib_s32 dsize);
-void
-mlib_v_ImageChannelInsert_S16_34R_A8D2X4(mlib_s16 *src,  mlib_s32 slb,
-                                                                 mlib_s16 *dst,  mlib_s32 dlb,
-                                                                 mlib_s32 xsize, mlib_s32 ysize);
-void
-mlib_v_ImageChannelInsert_S16_34R_D1(mlib_s16 *src,
-                                                             mlib_s16 *dst,
-                                                             mlib_s32 dsize);
-void
-mlib_v_ImageChannelInsert_S16_34R(mlib_s16 *src,  mlib_s32 slb,
-                                                          mlib_s16 *dst,  mlib_s32 dlb,
-                                                          mlib_s32 xsize, mlib_s32 ysize);
-void
-mlib_v_ImageChannelInsert_U8_34L_A8D1X8(mlib_u8  *src,
-                                                                mlib_u8  *dst,
-                                                                mlib_s32 dsize);
-void
-mlib_v_ImageChannelInsert_U8_34L_A8D2X8(mlib_u8  *src,  mlib_s32 slb,
-                                                                mlib_u8  *dst,  mlib_s32 dlb,
-                                                        mlib_s32 xsize, mlib_s32 ysize);
-void
-mlib_v_ImageChannelInsert_U8_34L_D1(mlib_u8  *src,
-                                                            mlib_u8  *dst,
-                                                            mlib_s32 dsize);
-void
-mlib_v_ImageChannelInsert_U8_34L(mlib_u8  *src,  mlib_s32 slb,
-                                                         mlib_u8  *dst,  mlib_s32 dlb,
-                                                         mlib_s32 xsize, mlib_s32 ysize);
-void
-mlib_v_ImageChannelInsert_S16_34L_A8D1X4(mlib_s16 *src,
-                                                                 mlib_s16 *dst,
-                                                                 mlib_s32 dsize);
-void
-mlib_v_ImageChannelInsert_S16_34L_A8D2X4(mlib_s16 *src,  mlib_s32 slb,
-                                                                 mlib_s16 *dst,  mlib_s32 dlb,
-                                                                 mlib_s32 xsize, mlib_s32 ysize);
-void
-mlib_v_ImageChannelInsert_S16_34L_D1(mlib_s16 *src,
-                                                             mlib_s16 *dst,
-                                                             mlib_s32 dsize);
-void
-mlib_v_ImageChannelInsert_S16_34L(mlib_s16 *src,  mlib_s32 slb,
-                                                          mlib_s16 *dst,  mlib_s32 dlb,
-                                                          mlib_s32 xsize, mlib_s32 ysize);
-
-
-/***************************************************************/
-
-#ifdef MLIB_TEST
-mlib_status
-mlib_v_ImageChannelInsert(mlib_image *dst,
-                                            mlib_image *src,
-                                          mlib_s32   cmask)
-#else
-mlib_status
-mlib_ImageChannelInsert(mlib_image *dst,
-                                        mlib_image *src,
-                                        mlib_s32   cmask)
-#endif
-{
-  const mlib_s32  X8 = 0x7;
-  const mlib_s32  X4 = 0x3;
-  const mlib_s32  X2 = 0x1;
-  const mlib_s32  A8D1   = MLIB_IMAGE_ALIGNED8 | MLIB_IMAGE_ONEDVECTOR;
-  const mlib_s32  A8D2X8 = MLIB_IMAGE_ALIGNED8 | MLIB_IMAGE_STRIDE8X | MLIB_IMAGE_WIDTH8X;
-  const mlib_s32  A8D2X4 = MLIB_IMAGE_ALIGNED8 | MLIB_IMAGE_STRIDE8X | MLIB_IMAGE_WIDTH4X;
-  const mlib_s32  A8D2X2 = MLIB_IMAGE_ALIGNED8 | MLIB_IMAGE_STRIDE8X | MLIB_IMAGE_WIDTH2X;
-
-  void      *sp;                      /* pointer for pixel in src */
-  void      *dp;                      /* pointer for pixel in dst */
-  mlib_s32  ncmask = 0;         /* normalized channel mask */
-  mlib_s32  channels;             /* number of channels for src */
-  mlib_s32  channeld;             /* number of channels for dst */
-  mlib_s32  width, height;/* for src and dst */
-  mlib_s32  strides;              /* strides in bytes for src */
-  mlib_s32  strided;            /* strides in bytes for dst */
-  mlib_s32  flags;
-  mlib_s32  flagd;
-  mlib_s32  dsize;
-  int         i, bit1count = 0;
-
-  MLIB_IMAGE_CHECK(src);
-  MLIB_IMAGE_CHECK(dst);
-  MLIB_IMAGE_TYPE_EQUAL(src,dst);
-  MLIB_IMAGE_SIZE_EQUAL(src,dst);
-
-  channels = mlib_ImageGetChannels(src);
-  channeld = mlib_ImageGetChannels(dst);
-  width    = mlib_ImageGetWidth(src);
-  height   = mlib_ImageGetHeight(src);
-  strides  = mlib_ImageGetStride(src);
-  strided  = mlib_ImageGetStride(dst);
-  sp       = mlib_ImageGetData(src);
-  dp       = mlib_ImageGetData(dst);
-  flags    = mlib_ImageGetFlags(src);
-  flagd    = mlib_ImageGetFlags(dst);
-  dsize    = width * height;
-
-  /* normalize the cmask, and count the number of bit with value 1 */
-  for (i = (channeld - 1); i >= 0; i--) {
-    if (((cmask & (1 << i)) != 0) && (bit1count < channels)) {
-      ncmask += (1 << i);
-      bit1count++;
-    }
-  }
-
-  /* do not support the cases in which the number of selected channels is
-   * less than the nubmber of channels in the source image */
-  if (bit1count < channels) {
-    return MLIB_FAILURE;
-  }
-
-  if (((channels == 1) && (channeld == 1)) ||
-      ((channels == 2) && (channeld == 2)) ||
-      ((channels == 3) && (channeld == 3)) ||
-      ((channels == 4) && (channeld == 4))) {
-      return mlib_ImageCopy(dst, src);
-  }
-
-  switch (mlib_ImageGetType(src)) {
-    case MLIB_BYTE:
-      if (channels == 1) {
-        switch (channeld) {
-          case 2:
-            if (((flags & A8D1) == 0) &&
-                ((flagd & A8D1) == 0) &&
-                ((dsize & X8)   == 0)) {
-                mlib_v_ImageChannelInsert_U8_12_A8D1X8((mlib_u8 *)sp,
-                                                                             (mlib_u8 *)dp,
-                                                                             dsize,
-                                                                                     ncmask);
-            }
-            else if (((flags & A8D2X8) == 0) &&
-              ((flagd & A8D2X8) == 0)) {
-              mlib_v_ImageChannelInsert_U8_12_A8D2X8((mlib_u8 *)sp, strides,
-                                                                             (mlib_u8 *)dp, strided,
-                                                                             width, height,
-                                                                                     ncmask);
-            }
-            else if (((flags & MLIB_IMAGE_ONEDVECTOR) == 0) &&
-               ((flagd & MLIB_IMAGE_ONEDVECTOR) == 0)) {
-                mlib_v_ImageChannelInsert_U8_12_D1((mlib_u8 *)sp,
-                                                                                 (mlib_u8 *)dp,
-                                                                                 dsize,
-                                                                                 ncmask);
-            }
-            else {
-                mlib_v_ImageChannelInsert_U8_12((mlib_u8 *)sp, strides,
-                                                                      (mlib_u8 *)dp, strided,
-                                                                      width, height,
-                                                                              ncmask);
-            }
-            break;
-
-          case 3:
-            if (((flags & A8D1) == 0) &&
-                ((flagd & A8D1) == 0) &&
-                ((dsize & X8)   == 0)) {
-                mlib_v_ImageChannelInsert_U8_13_A8D1X8((mlib_u8 *)sp,
-                                                                                 (mlib_u8 *)dp,
-                                                                               dsize,
-                                                                                           ncmask);
-            }
-            else if (((flags & A8D2X8) == 0) &&
-              ((flagd & A8D2X8) == 0)) {
-                mlib_v_ImageChannelInsert_U8_13_A8D2X8((mlib_u8 *)sp, strides,
-                                                                                     (mlib_u8 *)dp, strided,
-                                                                             width, height,
-                                                                                     ncmask);
-            }
-            else if (((flags & MLIB_IMAGE_ONEDVECTOR) == 0) &&
-               ((flagd & MLIB_IMAGE_ONEDVECTOR) == 0)) {
-                mlib_v_ImageChannelInsert_U8_13_D1((mlib_u8 *)sp,
-                                                                                 (mlib_u8 *)dp,
-                                                                                 dsize,
-                                                                                 ncmask);
-            }
-            else {
-              mlib_v_ImageChannelInsert_U8_13((mlib_u8 *)sp, strides,
-                                                                      (mlib_u8 *)dp, strided,
-                                                                      width, height,
-                                                                      ncmask);
-            }
-            break;
-
-          case 4:
-            if (((flags & A8D1) == 0) &&
-                ((flagd & A8D1) == 0) &&
-                ((dsize & X8)   == 0)) {
-                  mlib_v_ImageChannelInsert_U8_14_A8D1X8((mlib_u8 *)sp,
-                                                                                   (mlib_u8 *)dp,
-                                                                                 dsize,
-                                                                                             ncmask);
-            }
-            else if (((flags & A8D2X8) == 0) &&
-               ((flagd & A8D2X8) == 0)) {
-               mlib_v_ImageChannelInsert_U8_14_A8D2X8((mlib_u8 *)sp, strides,
-                                                                      (mlib_u8 *)dp, strided,
-                                                                              width, height,
-                                                                                          ncmask);
-            }
-            else if (((flags & MLIB_IMAGE_ONEDVECTOR) == 0) &&
-              ((flagd & MLIB_IMAGE_ONEDVECTOR) == 0)) {
-                mlib_v_ImageChannelInsert_U8_14_D1((mlib_u8 *)sp,
-                                                                                 (mlib_u8 *)dp,
-                                                                                 dsize,
-                                                                                 ncmask);
-            }
-            else {
-              mlib_v_ImageChannelInsert_U8_14((mlib_u8 *)sp, strides,
-                                                                      (mlib_u8 *)dp, strided,
-                                                                      width, height,
-                                                                      ncmask);
-            }
-            break;
-
-          default:
-            return MLIB_FAILURE;
-        }
-      }
-      else {
-        if ((channels == 3) && (channeld == 4) && (ncmask == 7)) {
-          if (((flags & A8D1) == 0) &&
-            ((flagd & A8D1) == 0) &&
-            ((dsize & X8)   == 0)) {
-            mlib_v_ImageChannelInsert_U8_34R_A8D1X8((mlib_u8 *)sp,
-                                                                          (mlib_u8 *)dp,
-                                                                          dsize);
-          }
-        else if (((flags & A8D2X8) == 0) &&
-               ((flagd & A8D2X8) == 0)) {
-              mlib_v_ImageChannelInsert_U8_34R_A8D2X8((mlib_u8 *)sp, strides,
-                                                                                    (mlib_u8 *)dp, strided,
-                                                                              width, height);
-        }
-        else if (((flags & MLIB_IMAGE_ONEDVECTOR) == 0) &&
-               ((flagd & MLIB_IMAGE_ONEDVECTOR) == 0)) {
-              mlib_v_ImageChannelInsert_U8_34R_D1((mlib_u8 *)sp,
-                                                                          (mlib_u8 *)dp,
-                                                                          dsize);
-        }
-        else {
-              mlib_v_ImageChannelInsert_U8_34R((mlib_u8 *)sp, strides,
-                                                                      (mlib_u8 *)dp, strided,
-                                                                      width, height);
-        }
-      }
-      else if ((channels == 3) && (channeld == 4) && (ncmask == 14)) {
-        if (((flags & A8D1) == 0) &&
-            ((flagd & A8D1) == 0) &&
-            ((dsize & X8)   == 0)) {
-            mlib_v_ImageChannelInsert_U8_34L_A8D1X8((mlib_u8 *)sp,
-                                                                            (mlib_u8 *)dp,
-                                                                          dsize);
-              }
-        else if (((flags & A8D2X8) == 0) &&
-                 ((flagd & A8D2X8) == 0)) {
-                 mlib_v_ImageChannelInsert_U8_34L_A8D2X8((mlib_u8 *)sp, strides,
-                                                                                  (mlib_u8 *)dp, strided,
-                                                                          width, height);
-        }
-        else if (((flags & MLIB_IMAGE_ONEDVECTOR) == 0) &&
-                 ((flagd & MLIB_IMAGE_ONEDVECTOR) == 0)) {
-                 mlib_v_ImageChannelInsert_U8_34L_D1((mlib_u8 *)sp,
-                                                                      (mlib_u8 *)dp,
-                                                                      dsize);
-        }
-        else mlib_v_ImageChannelInsert_U8_34L((mlib_u8 *)sp, strides,
-                                                                   (mlib_u8 *)dp, strided,
-                                                                   width, height);
-        }
-      else {
-
-      mlib_v_ImageChannelInsert_U8((mlib_u8 *)sp, strides,
-                                                     (mlib_u8 *)dp, strided,
-                                                     channels, channeld,
-                                                     width, height,
-                                                     ncmask);
-      }
-  }
-  break;
-
-    case MLIB_SHORT:
-      if (channels == 1) {
-        switch (channeld) {
-          case 2:
-            if (((flags & A8D1) == 0) &&
-                ((flagd & A8D1) == 0) &&
-                ((dsize & X4)   == 0)) {
-              mlib_v_ImageChannelInsert_S16_12_A8D1X4((mlib_s16 *)sp,
-                                                                                    (mlib_s16 *)dp,
-                                                                                      dsize,
-                                                                                      ncmask);
-            }
-            else if (((flags & A8D2X4) == 0) &&
-               ((flagd & A8D2X4) == 0)) {
-              mlib_v_ImageChannelInsert_S16_12_A8D2X4((mlib_s16 *)sp, strides,
-                                                                              (mlib_s16 *)dp, strided,
-                                                                              width, height,
-                                                                                      ncmask);
-            }
-            else if (((flags & MLIB_IMAGE_ONEDVECTOR) == 0) &&
-               ((flagd & MLIB_IMAGE_ONEDVECTOR) == 0)) {
-             mlib_v_ImageChannelInsert_S16_12_D1((mlib_s16 *)sp,
-                                                                           (mlib_s16 *)dp,
-                                                                          dsize,
-                                                                                  ncmask);
-            }
-            else {
-              mlib_v_ImageChannelInsert_S16_12((mlib_s16 *)sp, strides,
-                                                                       (mlib_s16 *)dp, strided,
-                                                                       width, height,
-                                                                       ncmask);
-            }
-            break;
-
-          case 3:
-            if (((flags & A8D1) == 0) &&
-                ((flagd & A8D1) == 0) &&
-                ((dsize & X4)   == 0)) {
-              mlib_v_ImageChannelInsert_S16_13_A8D1X4((mlib_s16 *)sp,
-                                                                              (mlib_s16 *)dp,
-                                                                                      dsize,
-                                                                                      ncmask);
-            }
-            else if (((flags & A8D2X4) == 0) &&
-               ((flagd & A8D2X4) == 0)) {
-              mlib_v_ImageChannelInsert_S16_13_A8D2X4((mlib_s16 *)sp, strides,
-                                                                              (mlib_s16 *)dp, strided,
-                                                                              width, height,
-                                                                                      ncmask);
-            }
-            else if (((flags & MLIB_IMAGE_ONEDVECTOR) == 0) &&
-               ((flagd & MLIB_IMAGE_ONEDVECTOR) == 0)) {
-                mlib_v_ImageChannelInsert_S16_13_D1((mlib_s16 *)sp,
-                                                                                  (mlib_s16 *)dp,
-                                                                                  dsize,
-                                                                                  ncmask);
-            }
-            else {
-              mlib_v_ImageChannelInsert_S16_13((mlib_s16 *)sp, strides,
-                                                                       (mlib_s16 *)dp, strided,
-                                                                       width, height,
-                                                                       ncmask);
-            }
-            break;
-
-          case 4:
-            if (((flags & A8D1) == 0) &&
-                ((flagd & A8D1) == 0) &&
-                ((dsize & X4)   == 0)) {
-              mlib_v_ImageChannelInsert_S16_14_A8D1X4((mlib_s16 *)sp,
-                                                                                    (mlib_s16 *)dp,
-                                                      dsize,
-                                                      ncmask);
-            }
-            else if (((flags & A8D2X4) == 0) &&
-               ((flagd & A8D2X4) == 0)) {
-              mlib_v_ImageChannelInsert_S16_14_A8D2X4((mlib_s16 *)sp, strides,
-                                                                              (mlib_s16 *)dp, strided,
-                                                                              width, height,
-                                                                                      ncmask);
-            }
-            else if (((flags & MLIB_IMAGE_ONEDVECTOR) == 0) &&
-               ((flagd & MLIB_IMAGE_ONEDVECTOR) == 0)) {
-              mlib_v_ImageChannelInsert_S16_14_D1((mlib_s16 *)sp,
-                                                                          (mlib_s16 *)dp,
-                                                                          dsize,
-                                                                                  ncmask);
-            }
-            else {
-              mlib_v_ImageChannelInsert_S16_14((mlib_s16 *)sp, strides,
-                                                                       (mlib_s16 *)dp, strided,
-                                                                       width, height,
-                                                                       ncmask);
-            }
-            break;
-          default:
-            return MLIB_FAILURE;
-        }
-      }
-      else if ((channels == 3) && (channeld == 4) && (ncmask == 7)) {
-        if (((flags & A8D1) == 0) &&
-            ((flagd & A8D1) == 0) &&
-            ((dsize & X4)   == 0)) {
-          mlib_v_ImageChannelInsert_S16_34R_A8D1X4((mlib_s16 *)sp,
-                                                                           (mlib_s16 *)dp,
-                                                                           dsize);
-        }
-        else if (((flags & A8D2X4) == 0) &&
-           ((flagd & A8D2X4) == 0)) {
-          mlib_v_ImageChannelInsert_S16_34R_A8D2X4((mlib_s16 *)sp, strides,
-                                                                           (mlib_s16 *)dp, strided,
-                                                                           width, height);
-        }
-        else if (((flags & MLIB_IMAGE_ONEDVECTOR) == 0) &&
-           ((flagd & MLIB_IMAGE_ONEDVECTOR) == 0)) {
-          mlib_v_ImageChannelInsert_S16_34R_D1((mlib_s16 *)sp,
-                                                                       (mlib_s16 *)dp,
-                                                                       dsize);
-        }
-        else {
-          mlib_v_ImageChannelInsert_S16_34R((mlib_s16 *)sp, strides,
-                                                                    (mlib_s16 *)dp, strided,
-                                                                     width, height);
-        }
-      }
-      else if ((channels == 3) && (channeld == 4) && (ncmask == 14)) {
-        if (((flags & A8D1) == 0) &&
-            ((flagd & A8D1) == 0) &&
-            ((dsize & X4)   == 0)) {
-          mlib_v_ImageChannelInsert_S16_34L_A8D1X4((mlib_s16 *)sp,
-                                                                           (mlib_s16 *)dp,
-                                                                           dsize);
-        }
-        else if (((flags & A8D2X4) == 0) &&
-           ((flagd & A8D2X4) == 0)) {
-          mlib_v_ImageChannelInsert_S16_34L_A8D2X4((mlib_s16 *)sp, strides,
-                                                                           (mlib_s16 *)dp, strided,
-                                                                           width, height);
-        }
-        else if (((flags & MLIB_IMAGE_ONEDVECTOR) == 0) &&
-           ((flagd & MLIB_IMAGE_ONEDVECTOR) == 0)) {
-          mlib_v_ImageChannelInsert_S16_34L_D1((mlib_s16 *)sp,
-                                                                       (mlib_s16 *)dp,
-                                                                       dsize);
-        }
-        else {
-          mlib_v_ImageChannelInsert_S16_34L((mlib_s16 *)sp, strides,
-                                                                    (mlib_s16 *)dp, strided,
-                                                                    width, height);
-        }
-      }
-      else {
-        mlib_v_ImageChannelInsert_S16((mlib_s16 *)sp, strides,
-                                                              (mlib_s16 *)dp, strided,
-                                                              channels,  channeld,
-                                                              width, height,
-                                                              ncmask);
-      }
-      break;
-
-    case MLIB_INT:
-        mlib_v_ImageChannelInsert_S32((mlib_s32 *)sp, strides,
-                                      (mlib_s32 *)dp, strided,
-                                      channels, channeld,
-                                      width, height,
-                                      ncmask);
-        break;
-
-    case MLIB_FLOAT:
-        mlib_v_ImageChannelInsert_S32((mlib_s32 *)sp, strides,
-                                      (mlib_s32 *)dp, strided,
-                                      channels, channeld,
-                                      width, height,
-                                      ncmask);
-        break;
-
-
-    case MLIB_DOUBLE:
-        mlib_v_ImageChannelInsert_D64((mlib_d64 *)sp, strides,
-                                      (mlib_d64 *)dp, strided,
-                                      channels, channeld,
-                                      width, height,
-                                      ncmask);
-        break;
-
-
-    case MLIB_BIT:
-    default:
-        return MLIB_FAILURE;    /* MLIB_BIT is not supported here */
-  }
-
-  return MLIB_SUCCESS;
-}
-/***************************************************************/
--- a/jdk/src/java.desktop/unix/native/libmlib_image/mlib_v_ImageChannelInsert.h	Thu May 12 11:03:07 2016 -0700
+++ b/jdk/src/java.desktop/unix/native/libmlib_image/mlib_v_ImageChannelInsert.h	Fri May 13 11:31:05 2016 +0300
@@ -32,290 +32,21 @@
 extern "C" {
 #endif /* __cplusplus */
 
-void mlib_v_ImageChannelInsert_U8(const mlib_u8 *src,
-                                  mlib_s32      slb,
-                                  mlib_u8       *dst,
-                                  mlib_s32      dlb,
-                                  mlib_s32      channels,
-                                  mlib_s32      channeld,
-                                  mlib_s32      width,
-                                  mlib_s32      height,
-                                  mlib_s32      cmask);
-
-void mlib_v_ImageChannelInsert_D64(const mlib_d64 *src,
-                                   mlib_s32       slb,
-                                   mlib_d64       *dst,
-                                   mlib_s32       dlb,
-                                   mlib_s32       channels,
-                                   mlib_s32       channeld,
-                                   mlib_s32       width,
-                                   mlib_s32       height,
-                                   mlib_s32       cmask);
-
-void mlib_v_ImageChannelInsert_S16(const mlib_s16 *src,
-                                   mlib_s32       slb,
-                                   mlib_s16       *dst,
-                                   mlib_s32       dlb,
-                                   mlib_s32       channels,
-                                   mlib_s32       channeld,
-                                   mlib_s32       width,
-                                   mlib_s32       height,
-                                   mlib_s32       cmask);
-
-void mlib_v_ImageChannelInsert_S32(const mlib_s32 *src,
-                                   mlib_s32       slb,
-                                   mlib_s32       *dst,
-                                   mlib_s32       dlb,
-                                   mlib_s32       channels,
-                                   mlib_s32       channeld,
-                                   mlib_s32       width,
-                                   mlib_s32       height,
-                                   mlib_s32       cmask);
-
-void mlib_v_ImageChannelInsert_U8_12_A8D1X8(const mlib_u8 *src,
-                                            mlib_u8       *dst,
-                                            mlib_s32      dsize,
-                                            mlib_s32      cmask);
-
-void mlib_v_ImageChannelInsert_U8_12_A8D2X8(const mlib_u8 *src,
-                                            mlib_s32      slb,
-                                            mlib_u8       *dst,
-                                            mlib_s32      dlb,
-                                            mlib_s32      xsize,
-                                            mlib_s32      ysize,
-                                            mlib_s32      cmask);
-
 void mlib_v_ImageChannelInsert_U8_12_D1(const mlib_u8 *src,
                                         mlib_u8       *dst,
                                         mlib_s32      dsize,
                                         mlib_s32      cmask);
 
-void mlib_v_ImageChannelInsert_U8_12(const mlib_u8 *src,
-                                     mlib_s32      slb,
-                                     mlib_u8       *dst,
-                                     mlib_s32      dlb,
-                                     mlib_s32      xsize,
-                                     mlib_s32      ysize,
-                                     mlib_s32      cmask);
-
-void mlib_v_ImageChannelInsert_U8_13_A8D1X8(const mlib_u8 *src,
-                                            mlib_u8       *dst,
-                                            mlib_s32      dsize,
-                                            mlib_s32      cmask);
-
-void mlib_v_ImageChannelInsert_U8_13_A8D2X8(const mlib_u8 *src,
-                                            mlib_s32      slb,
-                                            mlib_u8       *dst,
-                                            mlib_s32      dlb,
-                                            mlib_s32      xsize,
-                                            mlib_s32      ysize,
-                                            mlib_s32      cmask);
-
 void mlib_v_ImageChannelInsert_U8_13_D1(const mlib_u8 *src,
                                         mlib_u8       *dst,
                                         mlib_s32      dsize,
                                         mlib_s32      cmask);
 
-void mlib_v_ImageChannelInsert_U8_13(const mlib_u8 *src,
-                                     mlib_s32      slb,
-                                     mlib_u8       *dst,
-                                     mlib_s32      dlb,
-                                     mlib_s32      xsize,
-                                     mlib_s32      ysize,
-                                     mlib_s32      cmask);
-
-void mlib_v_ImageChannelInsert_U8_14_A8D1X8(const mlib_u8 *src,
-                                            mlib_u8       *dst,
-                                            mlib_s32      dsize,
-                                            mlib_s32      cmask);
-
-void mlib_v_ImageChannelInsert_U8_14_A8D2X8(const mlib_u8 *src,
-                                            mlib_s32      slb,
-                                            mlib_u8       *dst,
-                                            mlib_s32      dlb,
-                                            mlib_s32      xsize,
-                                            mlib_s32      ysize,
-                                            mlib_s32      cmask);
-
 void mlib_v_ImageChannelInsert_U8_14_D1(const mlib_u8 *src,
                                         mlib_u8       *dst,
                                         mlib_s32      dsize,
                                         mlib_s32      cmask);
 
-void mlib_v_ImageChannelInsert_U8_14(const mlib_u8 *src,
-                                     mlib_s32      slb,
-                                     mlib_u8       *dst,
-                                     mlib_s32      dlb,
-                                     mlib_s32      xsize,
-                                     mlib_s32      ysize,
-                                     mlib_s32      cmask);
-
-void mlib_v_ImageChannelInsert_S16_12_A8D1X4(const mlib_s16 *src,
-                                             mlib_s16       *dst,
-                                             mlib_s32       dsize,
-                                             mlib_s32       cmask);
-
-void mlib_v_ImageChannelInsert_S16_12_A8D2X4(const mlib_s16 *src,
-                                             mlib_s32       slb,
-                                             mlib_s16       *dst,
-                                             mlib_s32       dlb,
-                                             mlib_s32       xsize,
-                                             mlib_s32       ysize,
-                                             mlib_s32       cmask);
-
-void mlib_v_ImageChannelInsert_S16_12_D1(const mlib_s16 *src,
-                                         mlib_s16       *dst,
-                                         mlib_s32       dsize,
-                                         mlib_s32       cmask);
-
-void mlib_v_ImageChannelInsert_S16_12(const mlib_s16 *src,
-                                      mlib_s32       slb,
-                                      mlib_s16       *dst,
-                                      mlib_s32       dlb,
-                                      mlib_s32       xsize,
-                                      mlib_s32       ysize,
-                                      mlib_s32       cmask);
-
-void mlib_v_ImageChannelInsert_S16_13_A8D1X4(const mlib_s16 *src,
-                                             mlib_s16       *dst,
-                                             mlib_s32       dsize,
-                                             mlib_s32       cmask);
-
-void mlib_v_ImageChannelInsert_S16_13_A8D2X4(const mlib_s16 *src,
-                                             mlib_s32       slb,
-                                             mlib_s16       *dst,
-                                             mlib_s32       dlb,
-                                             mlib_s32       xsize,
-                                             mlib_s32       ysize,
-                                             mlib_s32       cmask);
-
-void mlib_v_ImageChannelInsert_S16_13_D1(const mlib_s16 *src,
-                                         mlib_s16       *dst,
-                                         mlib_s32       dsize,
-                                         mlib_s32       cmask);
-
-void mlib_v_ImageChannelInsert_S16_13(const mlib_s16 *src,
-                                      mlib_s32       slb,
-                                      mlib_s16       *dst,
-                                      mlib_s32       dlb,
-                                      mlib_s32       xsize,
-                                      mlib_s32       ysize,
-                                      mlib_s32       cmask);
-
-void mlib_v_ImageChannelInsert_S16_14_A8D1X4(const mlib_s16 *src,
-                                             mlib_s16       *dst,
-                                             mlib_s32       dsize,
-                                             mlib_s32       cmask);
-
-void mlib_v_ImageChannelInsert_S16_14_A8D2X4(const mlib_s16 *src,
-                                             mlib_s32       slb,
-                                             mlib_s16       *dst,
-                                             mlib_s32       dlb,
-                                             mlib_s32       xsize,
-                                             mlib_s32       ysize,
-                                             mlib_s32       cmask);
-
-void mlib_v_ImageChannelInsert_S16_14_D1(const mlib_s16 *src,
-                                         mlib_s16       *dst,
-                                         mlib_s32       dsize,
-                                         mlib_s32       cmask);
-
-void mlib_v_ImageChannelInsert_S16_14(const mlib_s16 *src,
-                                      mlib_s32       slb,
-                                      mlib_s16       *dst,
-                                      mlib_s32       dlb,
-                                      mlib_s32       xsize,
-                                      mlib_s32       ysize,
-                                      mlib_s32       cmask);
-
-void mlib_v_ImageChannelInsert_U8_34R_A8D1X8(const mlib_u8 *src,
-                                             mlib_u8       *dst,
-                                             mlib_s32      dsize);
-
-void mlib_v_ImageChannelInsert_U8_34R_A8D2X8(const mlib_u8 *src,
-                                             mlib_s32      slb,
-                                             mlib_u8       *dst,
-                                             mlib_s32      dlb,
-                                             mlib_s32      xsize,
-                                             mlib_s32      ysize);
-
-void mlib_v_ImageChannelInsert_U8_34R_D1(const mlib_u8 *src,
-                                         mlib_u8       *dst,
-                                         mlib_s32      dsize);
-
-void mlib_v_ImageChannelInsert_U8_34R(const mlib_u8 *src,
-                                      mlib_s32      slb,
-                                      mlib_u8       *dst,
-                                      mlib_s32      dlb,
-                                      mlib_s32      xsize,
-                                      mlib_s32      ysize);
-
-void mlib_v_ImageChannelInsert_S16_34R_A8D1X4(const mlib_s16 *src,
-                                              mlib_s16       *dst,
-                                              mlib_s32       dsize);
-
-void mlib_v_ImageChannelInsert_S16_34R_A8D2X4(const mlib_s16 *src,
-                                              mlib_s32       slb,
-                                              mlib_s16       *dst,
-                                              mlib_s32       dlb,
-                                              mlib_s32       xsize,
-                                              mlib_s32       ysize);
-
-void mlib_v_ImageChannelInsert_S16_34R_D1(const mlib_s16 *src,
-                                          mlib_s16       *dst,
-                                          mlib_s32       dsize);
-
-void mlib_v_ImageChannelInsert_S16_34R(const mlib_s16 *src,
-                                       mlib_s32       slb,
-                                       mlib_s16       *dst,
-                                       mlib_s32       dlb,
-                                       mlib_s32       xsize,
-                                       mlib_s32       ysize);
-
-void mlib_v_ImageChannelInsert_U8_34L_A8D1X8(const mlib_u8 *src,
-                                             mlib_u8       *dst,
-                                             mlib_s32      dsize);
-
-void mlib_v_ImageChannelInsert_U8_34L_A8D2X8(const mlib_u8 *src,
-                                             mlib_s32      slb,
-                                             mlib_u8       *dst,
-                                             mlib_s32      dlb,
-                                             mlib_s32      xsize,
-                                             mlib_s32      ysize);
-
-void mlib_v_ImageChannelInsert_U8_34L_D1(const mlib_u8 *src,
-                                         mlib_u8       *dst,
-                                         mlib_s32      dsize);
-
-void mlib_v_ImageChannelInsert_U8_34L(const mlib_u8 *src,
-                                      mlib_s32      slb,
-                                      mlib_u8       *dst,
-                                      mlib_s32      dlb,
-                                      mlib_s32      xsize,
-                                      mlib_s32      ysize);
-
-void mlib_v_ImageChannelInsert_S16_34L_A8D1X4(const mlib_s16 *src,
-                                              mlib_s16       *dst,
-                                              mlib_s32       dsize);
-
-void mlib_v_ImageChannelInsert_S16_34L_A8D2X4(const mlib_s16 *src,
-                                              mlib_s32       slb,
-                                              mlib_s16       *dst,
-                                              mlib_s32       dlb,
-                                              mlib_s32       xsize,
-                                              mlib_s32       ysize);
-
-void mlib_v_ImageChannelInsert_S16_34L_D1(const mlib_s16 *src,
-                                          mlib_s16       *dst,
-                                          mlib_s32       dsize);
-
-void mlib_v_ImageChannelInsert_S16_34L(const mlib_s16 *src,
-                                       mlib_s32       slb,
-                                       mlib_s16       *dst,
-                                       mlib_s32       dlb,
-                                       mlib_s32       xsize,
-                                       mlib_s32       ysize);
-
 #ifdef __cplusplus
 }
 #endif /* __cplusplus */
--- a/jdk/src/java.desktop/unix/native/libmlib_image/mlib_v_ImageChannelInsert_1.c	Thu May 12 11:03:07 2016 -0700
+++ b/jdk/src/java.desktop/unix/native/libmlib_image/mlib_v_ImageChannelInsert_1.c	Fri May 13 11:31:05 2016 +0300
@@ -27,34 +27,9 @@
 
 /*
  * FUNCTIONS
- *      mlib_v_ImageChannelInsert_U8
- *      mlib_v_ImageChannelInsert_U8_12_A8D1X8
- *      mlib_v_ImageChannelInsert_U8_12_A8D2X8
  *      mlib_v_ImageChannelInsert_U8_12_D1
- *      mlib_v_ImageChannelInsert_U8_12
- *      mlib_v_ImageChannelInsert_U8_13_A8D1X8
- *      mlib_v_ImageChannelInsert_U8_13_A8D2X8
  *      mlib_v_ImageChannelInsert_U8_13_D1
- *      mlib_v_ImageChannelInsert_U8_13
- *      mlib_v_ImageChannelInsert_U8_14_A8D1X8
- *      mlib_v_ImageChannelInsert_U8_14_A8D2X8
  *      mlib_v_ImageChannelInsert_U8_14_D1
- *      mlib_v_ImageChannelInsert_U8_14
- *      mlib_v_ImageChannelInsert_S16
- *      mlib_v_ImageChannelInsert_S16_12_A8D1X4
- *      mlib_v_ImageChannelInsert_S16_12_A8D2X4
- *      mlib_v_ImageChannelInsert_S16_12_D1
- *      mlib_v_ImageChannelInsert_S16_12
- *      mlib_v_ImageChannelInsert_S16_13_A8D1X4
- *      mlib_v_ImageChannelInsert_S16_13_A8D2X4
- *      mlib_v_ImageChannelInsert_S16_13_D1
- *      mlib_v_ImageChannelInsert_S16_13
- *      mlib_v_ImageChannelInsert_S16_14_A8D1X4
- *      mlib_v_ImageChannelInsert_S16_14_A8D2X4
- *      mlib_v_ImageChannelInsert_S16_14_D1
- *      mlib_v_ImageChannelInsert_S16_14
- *      mlib_v_ImageChannelInsert_S32
- *      mlib_v_ImageChannelInsert_D64
  *
  * ARGUMENT
  *      src     pointer to source image data
@@ -80,424 +55,12 @@
 #include "mlib_v_ImageChannelInsert.h"
 
 /***************************************************************/
-/* general channel insertion: slower due to the inner loop */
-void mlib_v_ImageChannelInsert_U8(const mlib_u8 *src,
-                                  mlib_s32      slb,
-                                  mlib_u8       *dst,
-                                  mlib_s32      dlb,
-                                  mlib_s32      channels,
-                                  mlib_s32      channeld,
-                                  mlib_s32      width,
-                                  mlib_s32      height,
-                                  mlib_s32      cmask)
-{
-  mlib_u8 *sp;                                        /* pointer for pixel in src */
-  mlib_u8 *sl;                                        /* pointer for line in src */
-  mlib_u8 *dp;                                        /* pointer for pixel in dst */
-  mlib_u8 *dl;                                        /* pointer for line in dst */
-  mlib_s32 i, j, k;                                   /* indices for x, y, channel */
-  mlib_s32 deltac[5] = { 0, 1, 1, 1, 1 };
-  mlib_s32 inc0, inc1, inc2;
-  mlib_u8 s0, s1, s2;
-
-  deltac[channels] = 1;
-  for (i = (channeld - 1), k = 0; i >= 0; i--) {
-    if ((cmask & (1 << i)) == 0)
-      deltac[k]++;
-    else
-      k++;
-  }
-
-  deltac[channels] = channeld;
-  for (i = 1; i < channels; i++) {
-    deltac[channels] -= deltac[i];
-  }
-
-  sp = sl = (void *)src;
-  dp = dl = dst + deltac[0];
-
-  if (channels == 2) {
-    inc0 = deltac[1];
-    inc1 = deltac[2] + inc0;
-    for (j = 0; j < height; j++) {
-#pragma pipeloop(0)
-      for (i = 0; i < width; i++) {
-        s0 = sp[0];
-        s1 = sp[1];
-        dp[0] = s0;
-        dp[inc0] = s1;
-        dp += inc1;
-        sp += 2;
-      }
-
-      sp = sl += slb;
-      dp = dl += dlb;
-    }
-  }
-  else if (channels == 3) {
-    inc0 = deltac[1];
-    inc1 = deltac[2] + inc0;
-    inc2 = deltac[3] + inc1;
-    for (j = 0; j < height; j++) {
-#pragma pipeloop(0)
-      for (i = 0; i < width; i++) {
-        s0 = sp[0];
-        s1 = sp[1];
-        s2 = sp[2];
-        dp[0] = s0;
-        dp[inc0] = s1;
-        dp[inc1] = s2;
-        dp += inc2;
-        sp += 3;
-      }
-
-      sp = sl += slb;
-      dp = dl += dlb;
-    }
-  }
-}
-
-/***************************************************************/
-/* general channel insertion: slower due to the inner loop */
-void mlib_v_ImageChannelInsert_D64(const mlib_d64 *src,
-                                   mlib_s32       slb,
-                                   mlib_d64       *dst,
-                                   mlib_s32       dlb,
-                                   mlib_s32       channels,
-                                   mlib_s32       channeld,
-                                   mlib_s32       width,
-                                   mlib_s32       height,
-                                   mlib_s32       cmask)
-{
-  mlib_d64 *sp;                                       /* pointer for pixel in src */
-  mlib_d64 *sl;                                       /* pointer for line in src */
-  mlib_d64 *dp;                                       /* pointer for pixel in dst */
-  mlib_d64 *dl;                                       /* pointer for line in dst */
-  mlib_s32 i, j, k;                                   /* indices for x, y, channel */
-  mlib_s32 deltac[5] = { 0, 1, 1, 1, 1 };
-  mlib_s32 inc0, inc1, inc2;
-  mlib_d64 s0, s1, s2;
-
-  deltac[channels] = 1;
-  for (i = (channeld - 1), k = 0; i >= 0; i--) {
-    if ((cmask & (1 << i)) == 0)
-      deltac[k]++;
-    else
-      k++;
-  }
-
-  deltac[channels] = channeld;
-  for (i = 1; i < channels; i++) {
-    deltac[channels] -= deltac[i];
-  }
-
-  sp = sl = (void *)src;
-  dp = dl = dst + deltac[0];
-
-  if (channels == 1) {
-    for (j = 0; j < height; j++) {
-#pragma pipeloop(0)
-      for (i = 0; i < width; i++) {
-        s0 = sp[0];
-        dp[0] = s0;
-        dp += channeld;
-        sp++;
-      }
-
-      sp = sl = (mlib_d64 *) ((mlib_u8 *) sl + slb);
-      dp = dl = (mlib_d64 *) ((mlib_u8 *) dl + dlb);
-    }
-  }
-  else if (channels == 2) {
-    inc0 = deltac[1];
-    inc1 = deltac[2] + inc0;
-    for (j = 0; j < height; j++) {
-#pragma pipeloop(0)
-      for (i = 0; i < width; i++) {
-        s0 = sp[0];
-        s1 = sp[1];
-        dp[0] = s0;
-        dp[inc0] = s1;
-        dp += inc1;
-        sp += 2;
-      }
-
-      sp = sl = (mlib_d64 *) ((mlib_u8 *) sl + slb);
-      dp = dl = (mlib_d64 *) ((mlib_u8 *) dl + dlb);
-    }
-  }
-  else if (channels == 3) {
-    inc0 = deltac[1];
-    inc1 = deltac[2] + inc0;
-    inc2 = deltac[3] + inc1;
-    for (j = 0; j < height; j++) {
-#pragma pipeloop(0)
-      for (i = 0; i < width; i++) {
-        s0 = sp[0];
-        s1 = sp[1];
-        s2 = sp[2];
-        dp[0] = s0;
-        dp[inc0] = s1;
-        dp[inc1] = s2;
-        dp += inc2;
-        sp += 3;
-      }
-
-      sp = sl = (mlib_d64 *) ((mlib_u8 *) sl + slb);
-      dp = dl = (mlib_d64 *) ((mlib_u8 *) dl + dlb);
-    }
-  }
-}
-
-/***************************************************************/
-/* general channel insertion: slower due to the inner loop */
-void mlib_v_ImageChannelInsert_S16(const mlib_s16 *src,
-                                   mlib_s32       slb,
-                                   mlib_s16       *dst,
-                                   mlib_s32       dlb,
-                                   mlib_s32       channels,
-                                   mlib_s32       channeld,
-                                   mlib_s32       width,
-                                   mlib_s32       height,
-                                   mlib_s32       cmask)
-{
-  mlib_s16 *sp;                                       /* pointer for pixel in src */
-  mlib_s16 *sl;                                       /* pointer for line in src */
-  mlib_s16 *dp;                                       /* pointer for pixel in dst */
-  mlib_s16 *dl;                                       /* pointer for line in dst */
-  mlib_s32 i, j, k;                                   /* indices for x, y, channel */
-  mlib_s32 deltac[5] = { 0, 1, 1, 1, 1 };
-  mlib_s32 inc0, inc1, inc2;
-  mlib_s16 s0, s1, s2;
-
-  deltac[channels] = 1;
-  for (i = (channeld - 1), k = 0; i >= 0; i--) {
-    if ((cmask & (1 << i)) == 0)
-      deltac[k]++;
-    else
-      k++;
-  }
-
-  deltac[channels] = channeld;
-  for (i = 1; i < channels; i++) {
-    deltac[channels] -= deltac[i];
-  }
-
-  sp = sl = (void *)src;
-  dp = dl = dst + deltac[0];
-
-  if (channels == 2) {
-    inc0 = deltac[1];
-    inc1 = deltac[2] + inc0;
-    for (j = 0; j < height; j++) {
-#pragma pipeloop(0)
-      for (i = 0; i < width; i++) {
-        s0 = sp[0];
-        s1 = sp[1];
-        dp[0] = s0;
-        dp[inc0] = s1;
-        dp += inc1;
-        sp += 2;
-      }
-
-      sp = sl = (mlib_s16 *) ((mlib_u8 *) sl + slb);
-      dp = dl = (mlib_s16 *) ((mlib_u8 *) dl + dlb);
-    }
-  }
-  else if (channels == 3) {
-    inc0 = deltac[1];
-    inc1 = deltac[2] + inc0;
-    inc2 = deltac[3] + inc1;
-    for (j = 0; j < height; j++) {
-#pragma pipeloop(0)
-      for (i = 0; i < width; i++) {
-        s0 = sp[0];
-        s1 = sp[1];
-        s2 = sp[2];
-        dp[0] = s0;
-        dp[inc0] = s1;
-        dp[inc1] = s2;
-        dp += inc2;
-        sp += 3;
-      }
-
-      sp = sl = (mlib_s16 *) ((mlib_u8 *) sl + slb);
-      dp = dl = (mlib_s16 *) ((mlib_u8 *) dl + dlb);
-    }
-  }
-}
-
-/***************************************************************/
-/* general channel insertion: slower due to the inner loop */
-
-void mlib_v_ImageChannelInsert_S32(const mlib_s32 *src,
-                                   mlib_s32       slb,
-                                   mlib_s32       *dst,
-                                   mlib_s32       dlb,
-                                   mlib_s32       channels,
-                                   mlib_s32       channeld,
-                                   mlib_s32       width,
-                                   mlib_s32       height,
-                                   mlib_s32       cmask)
-{
-  mlib_s32 *sp;                                       /* pointer for pixel in src */
-  mlib_s32 *sl;                                       /* pointer for line in src */
-  mlib_s32 *dp;                                       /* pointer for pixel in dst */
-  mlib_s32 *dl;                                       /* pointer for line in dst */
-  mlib_s32 i, j, k;                                   /* indices for x, y, channel */
-  mlib_s32 deltac[5] = { 0, 1, 1, 1, 1 };
-  mlib_s32 inc0, inc1, inc2;
-  mlib_s32 s0, s1, s2;
-
-  deltac[channels] = 1;
-  for (i = (channeld - 1), k = 0; i >= 0; i--) {
-    if ((cmask & (1 << i)) == 0)
-      deltac[k]++;
-    else
-      k++;
-  }
-
-  deltac[channels] = channeld;
-  for (i = 1; i < channels; i++) {
-    deltac[channels] -= deltac[i];
-  }
-
-  sp = sl = (void *)src;
-  dp = dl = dst + deltac[0];
-
-  if (channels == 1) {
-    for (j = 0; j < height; j++) {
-#pragma pipeloop(0)
-      for (i = 0; i < width; i++) {
-        s0 = sp[0];
-        dp[0] = s0;
-        dp += channeld;
-        sp++;
-      }
-
-      sp = sl = (mlib_s32 *) ((mlib_u8 *) sl + slb);
-      dp = dl = (mlib_s32 *) ((mlib_u8 *) dl + dlb);
-    }
-  }
-  else if (channels == 2) {
-    inc0 = deltac[1];
-    inc1 = deltac[2] + inc0;
-    for (j = 0; j < height; j++) {
-#pragma pipeloop(0)
-      for (i = 0; i < width; i++) {
-        s0 = sp[0];
-        s1 = sp[1];
-        dp[0] = s0;
-        dp[inc0] = s1;
-        dp += inc1;
-        sp += 2;
-      }
-
-      sp = sl = (mlib_s32 *) ((mlib_u8 *) sl + slb);
-      dp = dl = (mlib_s32 *) ((mlib_u8 *) dl + dlb);
-    }
-  }
-  else if (channels == 3) {
-    inc0 = deltac[1];
-    inc1 = deltac[2] + inc0;
-    inc2 = deltac[3] + inc1;
-    for (j = 0; j < height; j++) {
-#pragma pipeloop(0)
-      for (i = 0; i < width; i++) {
-        s0 = sp[0];
-        s1 = sp[1];
-        s2 = sp[2];
-        dp[0] = s0;
-        dp[inc0] = s1;
-        dp[inc1] = s2;
-        dp += inc2;
-        sp += 3;
-      }
-
-      sp = sl = (mlib_s32 *) ((mlib_u8 *) sl + slb);
-      dp = dl = (mlib_s32 *) ((mlib_u8 *) dl + dlb);
-    }
-  }
-}
-
-/***************************************************************/
 #define INSERT_U8_12(sd0, dd0, dd1)     /* channel duplicate */ \
   dd0 = vis_fpmerge(vis_read_hi(sd0), vis_read_hi(sd0));        \
   dd1 = vis_fpmerge(vis_read_lo(sd0), vis_read_lo(sd0))
 
 /***************************************************************/
 /* insert one channel to a 2-channel image.
- * both source and destination image data are 8-byte aligned.
- * dsize is multiple of 8.
- */
-
-void mlib_v_ImageChannelInsert_U8_12_A8D1X8(const mlib_u8 *src,
-                                            mlib_u8       *dst,
-                                            mlib_s32      dsize,
-                                            mlib_s32      cmask)
-{
-  mlib_d64 *sp, *dp;
-  mlib_d64 sd0;
-  mlib_d64 dd0, dd1;
-  mlib_s32 bmask;
-  mlib_s32 i;
-
-  bmask = cmask | (cmask << 2) | (cmask << 4) | (cmask << 6);
-
-  sp = (mlib_d64 *) src;
-  dp = (mlib_d64 *) dst;
-
-#pragma pipeloop(0)
-  for (i = 0; i < dsize / 8; i++) {
-    sd0 = *sp++;
-    INSERT_U8_12(sd0, dd0, dd1);
-    vis_pst_8(dd0, dp++, bmask);
-    vis_pst_8(dd1, dp++, bmask);
-  }
-}
-
-/***************************************************************/
-/* insert one channel to a 2-channel image.
- * both source and destination image data are 8-byte aligned.
- * xsize is multiple of 8.
- */
-
-void mlib_v_ImageChannelInsert_U8_12_A8D2X8(const mlib_u8 *src,
-                                            mlib_s32      slb,
-                                            mlib_u8       *dst,
-                                            mlib_s32      dlb,
-                                            mlib_s32      xsize,
-                                            mlib_s32      ysize,
-                                            mlib_s32      cmask)
-{
-  mlib_d64 *sp, *dp;
-  mlib_d64 *sl, *dl;
-  mlib_d64 sd0;
-  mlib_d64 dd0, dd1;
-  mlib_s32 bmask;
-  mlib_s32 i, j;
-
-  bmask = cmask | (cmask << 2) | (cmask << 4) | (cmask << 6);
-
-  sp = sl = (mlib_d64 *) src;
-  dp = dl = (mlib_d64 *) dst;
-
-  for (j = 0; j < ysize; j++) {
-#pragma pipeloop(0)
-    for (i = 0; i < xsize / 8; i++) {
-      sd0 = *sp++;
-      INSERT_U8_12(sd0, dd0, dd1);
-      vis_pst_8(dd0, dp++, bmask);
-      vis_pst_8(dd1, dp++, bmask);
-    }
-
-    sp = sl = (mlib_d64 *) ((mlib_u8 *) sl + slb);
-    dp = dl = (mlib_d64 *) ((mlib_u8 *) dl + dlb);
-  }
-}
-
-/***************************************************************/
-/* insert one channel to a 2-channel image.
  */
 
 void mlib_v_ImageChannelInsert_U8_12_D1(const mlib_u8 *src,
@@ -724,56 +287,6 @@
 }
 
 /***************************************************************/
-/* insert one channel to a 2-channel image.
- */
-
-void mlib_v_ImageChannelInsert_U8_12(const mlib_u8 *src,
-                                     mlib_s32      slb,
-                                     mlib_u8       *dst,
-                                     mlib_s32      dlb,
-                                     mlib_s32      xsize,
-                                     mlib_s32      ysize,
-                                     mlib_s32      cmask)
-{
-  mlib_u8 *sa, *da;
-  mlib_u8 *sl, *dl;
-  mlib_s32 j;
-
-  sa = sl = (void *)src;
-  da = dl = dst;
-
-#pragma pipeloop(0)
-  for (j = 0; j < ysize; j++) {
-    mlib_v_ImageChannelInsert_U8_12_D1(sa, da, xsize, cmask);
-    sa = sl += slb;
-    da = dl += dlb;
-  }
-}
-
-/***************************************************************/
-#define INSERT_U8_13(sd0, dd0, dd1, dd2)                        \
-  sda = vis_fpmerge(vis_read_hi(sd0), vis_read_lo(sd0));        \
-  sdb = vis_fpmerge(vis_read_hi(sda), vis_read_lo(sda));        \
-  sdc = vis_fpmerge(vis_read_hi(sdb), vis_read_hi(sdb));        \
-  sdd = vis_fpmerge(vis_read_lo(sdb), vis_read_lo(sdb));        \
-  dd0 = vis_fpmerge(vis_read_hi(sdc), vis_read_hi(sdd));        \
-  sde = vis_fpmerge(vis_read_lo(sdc), vis_read_lo(sdd));        \
-  dd1 = vis_freg_pair(vis_read_lo(dd0), vis_read_hi(sde));      \
-  dd2 = vis_freg_pair(vis_read_lo(sde), vis_read_lo(sde))
-
-/***************************************************************/
-#define LOAD_INSERT_STORE_U8_A8(channeld)                       \
-  sd = *sp++;                                                   \
-  vis_st_u8(sd = vis_faligndata(sd, sd), da); da += channeld;   \
-  vis_st_u8(sd = vis_faligndata(sd, sd), da); da += channeld;   \
-  vis_st_u8(sd = vis_faligndata(sd, sd), da); da += channeld;   \
-  vis_st_u8(sd = vis_faligndata(sd, sd), da); da += channeld;   \
-  vis_st_u8(sd = vis_faligndata(sd, sd), da); da += channeld;   \
-  vis_st_u8(sd = vis_faligndata(sd, sd), da); da += channeld;   \
-  vis_st_u8(sd = vis_faligndata(sd, sd), da); da += channeld;   \
-  vis_st_u8(sd = vis_faligndata(sd, sd), da); da += channeld
-
-/***************************************************************/
 #define LOAD_INSERT_STORE_U8(channeld)                          \
   vis_alignaddr((void *)0, off);                                \
   sd0 = sd1;                                                    \
@@ -790,58 +303,6 @@
   vis_st_u8(sd = vis_faligndata(sd, sd), da); da += channeld
 
 /***************************************************************/
-void mlib_v_ImageChannelInsert_U8_13_A8D1X8(const mlib_u8 *src,
-                                            mlib_u8       *dst,
-                                            mlib_s32      dsize,
-                                            mlib_s32      cmask)
-{
-  mlib_u8 *da;
-  mlib_d64 *sp;
-  mlib_d64 sd;
-  mlib_s32 i;
-
-  vis_alignaddr((void *)0, 1);              /* for 1-byte left shift */
-
-  sp = (mlib_d64 *) src;
-  da = dst + (2 / cmask);                   /* 4,2,1 -> 0,1,2 */
-
-#pragma pipeloop(0)
-  for (i = 0; i < dsize / 8; i++) {
-    LOAD_INSERT_STORE_U8_A8(3);
-  }
-}
-
-/***************************************************************/
-void mlib_v_ImageChannelInsert_U8_13_A8D2X8(const mlib_u8 *src,
-                                            mlib_s32      slb,
-                                            mlib_u8       *dst,
-                                            mlib_s32      dlb,
-                                            mlib_s32      xsize,
-                                            mlib_s32      ysize,
-                                            mlib_s32      cmask)
-{
-  mlib_u8 *da, *dl;
-  mlib_d64 *sp, *sl;
-  mlib_d64 sd;
-  mlib_s32 i, j;
-
-  vis_alignaddr((void *)0, 1);
-
-  sp = sl = (mlib_d64 *) src;
-  da = dl = dst + (2 / cmask);              /* 4,2,1 -> 0,1,2 */
-
-  for (j = 0; j < ysize; j++) {
-#pragma pipeloop(0)
-    for (i = 0; i < xsize / 8; i++) {
-      LOAD_INSERT_STORE_U8_A8(3);
-    }
-
-    sp = sl = (mlib_d64 *) ((mlib_u8 *) sl + slb);
-    da = dl = (mlib_u8 *) ((mlib_u8 *) dl + dlb);
-  }
-}
-
-/***************************************************************/
 void mlib_v_ImageChannelInsert_U8_13_D1(const mlib_u8 *src,
                                         mlib_u8       *dst,
                                         mlib_s32      dsize,
@@ -908,30 +369,6 @@
 }
 
 /***************************************************************/
-void mlib_v_ImageChannelInsert_U8_13(const mlib_u8 *src,
-                                     mlib_s32      slb,
-                                     mlib_u8       *dst,
-                                     mlib_s32      dlb,
-                                     mlib_s32      xsize,
-                                     mlib_s32      ysize,
-                                     mlib_s32      cmask)
-{
-  mlib_u8 *sa, *da;
-  mlib_u8 *sl, *dl;
-  mlib_s32 j;
-
-  sa = sl = (void *)src;
-  da = dl = dst;
-
-#pragma pipeloop(0)
-  for (j = 0; j < ysize; j++) {
-    mlib_v_ImageChannelInsert_U8_13_D1(sa, da, xsize, cmask);
-    sa = sl += slb;
-    da = dl += dlb;
-  }
-}
-
-/***************************************************************/
 #define INSERT_U8_14(sd0, dd0, dd1, dd2, dd3)                   \
   sda = vis_fpmerge(vis_read_hi(sd0), vis_read_hi(sd0));        \
   sdb = vis_fpmerge(vis_read_lo(sd0), vis_read_lo(sd0));        \
@@ -941,73 +378,6 @@
   dd3 = vis_fpmerge(vis_read_lo(sdb), vis_read_lo(sdb))
 
 /***************************************************************/
-void mlib_v_ImageChannelInsert_U8_14_A8D1X8(const mlib_u8 *src,
-                                            mlib_u8       *dst,
-                                            mlib_s32      dsize,
-                                            mlib_s32      cmask)
-{
-  mlib_d64 *sp, *dp;
-  mlib_d64 sd0;
-  mlib_d64 sda, sdb;
-  mlib_d64 dd0, dd1, dd2, dd3;
-  mlib_s32 bmask;
-  mlib_s32 i;
-
-  bmask = cmask | (cmask << 4);
-
-  sp = (mlib_d64 *) src;
-  dp = (mlib_d64 *) dst;
-
-#pragma pipeloop(0)
-  for (i = 0; i < dsize / 8; i++) {
-    sd0 = *sp++;
-    INSERT_U8_14(sd0, dd0, dd1, dd2, dd3);
-    vis_pst_8(dd0, dp++, bmask);
-    vis_pst_8(dd1, dp++, bmask);
-    vis_pst_8(dd2, dp++, bmask);
-    vis_pst_8(dd3, dp++, bmask);
-  }
-}
-
-/***************************************************************/
-void mlib_v_ImageChannelInsert_U8_14_A8D2X8(const mlib_u8 *src,
-                                            mlib_s32      slb,
-                                            mlib_u8       *dst,
-                                            mlib_s32      dlb,
-                                            mlib_s32      xsize,
-                                            mlib_s32      ysize,
-                                            mlib_s32      cmask)
-{
-  mlib_d64 *sp, *dp;
-  mlib_d64 *sl, *dl;
-  mlib_d64 sd0;
-  mlib_d64 sda, sdb;
-  mlib_d64 dd0, dd1, dd2, dd3;
-  mlib_s32 bmask;
-  mlib_s32 i, j;
-
-  bmask = cmask | (cmask << 4);
-
-  sp = sl = (mlib_d64 *) src;
-  dp = dl = (mlib_d64 *) dst;
-
-  for (j = 0; j < ysize; j++) {
-#pragma pipeloop(0)
-    for (i = 0; i < xsize / 8; i++) {
-      sd0 = *sp++;
-      INSERT_U8_14(sd0, dd0, dd1, dd2, dd3);
-      vis_pst_8(dd0, dp++, bmask);
-      vis_pst_8(dd1, dp++, bmask);
-      vis_pst_8(dd2, dp++, bmask);
-      vis_pst_8(dd3, dp++, bmask);
-    }
-
-    sp = sl = (mlib_d64 *) ((mlib_u8 *) sl + slb);
-    dp = dl = (mlib_d64 *) ((mlib_u8 *) dl + dlb);
-  }
-}
-
-/***************************************************************/
 void mlib_v_ImageChannelInsert_U8_14_D1(const mlib_u8 *src,
                                         mlib_u8       *dst,
                                         mlib_s32      dsize,
@@ -1188,445 +558,5 @@
   }
 }
 
-/***************************************************************/
-void mlib_v_ImageChannelInsert_U8_14(const mlib_u8 *src,
-                                     mlib_s32      slb,
-                                     mlib_u8       *dst,
-                                     mlib_s32      dlb,
-                                     mlib_s32      xsize,
-                                     mlib_s32      ysize,
-                                     mlib_s32      cmask)
-{
-  mlib_u8 *sa, *da;
-  mlib_u8 *sl, *dl;
-  mlib_s32 j;
-
-  sa = sl = (void *)src;
-  da = dl = dst;
-
-#pragma pipeloop(0)
-  for (j = 0; j < ysize; j++) {
-    mlib_v_ImageChannelInsert_U8_14_D1(sa, da, xsize, cmask);
-    sa = sl += slb;
-    da = dl += dlb;
-  }
-}
-
-/***************************************************************/
-#define LOAD_INSERT_STORE_S16_1X_A8(channeld)                   \
-  sd  = *sp++;                                                  \
-  vis_st_u16(sd = vis_faligndata(sd, sd), da); da += channeld;  \
-  vis_st_u16(sd = vis_faligndata(sd, sd), da); da += channeld;  \
-  vis_st_u16(sd = vis_faligndata(sd, sd), da); da += channeld;  \
-  vis_st_u16(sd = vis_faligndata(sd, sd), da); da += channeld
-
-/***************************************************************/
-#define LOAD_INSERT_STORE_S16_1X(channeld)                      \
-  vis_alignaddr((void *)0, off);                                \
-  sd0 = sd1;                                                    \
-  sd1 = *sp++;                                                  \
-  sd  = vis_faligndata(sd0, sd1);                               \
-  vis_alignaddr((void *)0, 2);                                  \
-  vis_st_u16(sd = vis_faligndata(sd, sd), da); da += channeld;  \
-  vis_st_u16(sd = vis_faligndata(sd, sd), da); da += channeld;  \
-  vis_st_u16(sd = vis_faligndata(sd, sd), da); da += channeld;  \
-  vis_st_u16(sd = vis_faligndata(sd, sd), da); da += channeld
-
-/***************************************************************/
-void mlib_v_ImageChannelInsert_S16_12_A8D1X4(const mlib_s16 *src,
-                                             mlib_s16       *dst,
-                                             mlib_s32       dsize,
-                                             mlib_s32       cmask)
-{
-  mlib_s16 *da;
-  mlib_d64 *sp;
-  mlib_d64 sd;
-  mlib_s32 i;
-
-  sp = (mlib_d64 *) src;
-  da = dst + (2 - cmask);                   /* 2,1 -> 0,1 */
-
-  vis_alignaddr((void *)0, 2);
-
-#pragma pipeloop(0)
-  for (i = 0; i < dsize / 4; i++) {
-    LOAD_INSERT_STORE_S16_1X_A8(2);
-  }
-}
-
-/***************************************************************/
-void mlib_v_ImageChannelInsert_S16_12_A8D2X4(const mlib_s16 *src,
-                                             mlib_s32       slb,
-                                             mlib_s16       *dst,
-                                             mlib_s32       dlb,
-                                             mlib_s32       xsize,
-                                             mlib_s32       ysize,
-                                             mlib_s32       cmask)
-{
-  mlib_s16 *da, *dl;
-  mlib_d64 *sp, *sl;
-  mlib_d64 sd;
-  mlib_s32 i, j;
-
-  sp = sl = (mlib_d64 *) src;
-  da = dl = dst + (2 - cmask);              /* 2,1 -> 0,1 */
-
-  vis_alignaddr((void *)0, 2);
-
-  for (j = 0; j < ysize; j++) {
-#pragma pipeloop(0)
-    for (i = 0; i < xsize / 4; i++) {
-      LOAD_INSERT_STORE_S16_1X_A8(2);
-    }
-
-    sp = sl = (mlib_d64 *) ((mlib_u8 *) sl + slb);
-    da = dl = (mlib_s16 *) ((mlib_u8 *) dl + dlb);
-  }
-}
-
-/***************************************************************/
-void mlib_v_ImageChannelInsert_S16_12_D1(const mlib_s16 *src,
-                                         mlib_s16       *dst,
-                                         mlib_s32       dsize,
-                                         mlib_s32       cmask)
-{
-  mlib_s16 *sa, *da;
-  mlib_s16 *dend;                                     /* end point in destination */
-  mlib_d64 *sp;                                       /* 8-byte aligned start points in src */
-  mlib_d64 sd0, sd1, sd;                              /* 8-byte registers for source data */
-  mlib_s32 off;                                       /* offset of address alignment in src */
-  mlib_s32 i;
-
-  sa = (void *)src;
-  da = dst + (2 - cmask);                   /* 2,1 -> 0,1 */
-
-  /* prepare the src address */
-  sp = (mlib_d64 *) ((mlib_addr) sa & (~7));
-  off = (mlib_addr) sa & 7;
-
-  dend = da + dsize * 2 - 1;
-
-  sd1 = *sp++;
-
-#pragma pipeloop(0)
-  for (i = 0; i < dsize / 4; i++) {
-    LOAD_INSERT_STORE_S16_1X(2);
-  }
-
-  /* right end handling */
-  if ((mlib_addr) da <= (mlib_addr) dend) {
-
-    vis_alignaddr((void *)0, off);
-    sd0 = sd1;
-    sd1 = *sp++;
-    sd = vis_faligndata(sd0, sd1);
-
-    vis_alignaddr((void *)0, 2);
-    vis_st_u16(sd = vis_faligndata(sd, sd), da);
-    da += 2;
-    if ((mlib_addr) da <= (mlib_addr) dend) {
-      vis_st_u16(sd = vis_faligndata(sd, sd), da);
-      da += 2;
-      if ((mlib_addr) da <= (mlib_addr) dend) {
-        vis_st_u16(sd = vis_faligndata(sd, sd), da);
-      }
-    }
-  }
-}
-
-/***************************************************************/
-void mlib_v_ImageChannelInsert_S16_12(const mlib_s16 *src,
-                                      mlib_s32       slb,
-                                      mlib_s16       *dst,
-                                      mlib_s32       dlb,
-                                      mlib_s32       xsize,
-                                      mlib_s32       ysize,
-                                      mlib_s32       cmask)
-{
-  mlib_s16 *sa, *da;
-  mlib_s16 *sl, *dl;
-  mlib_s32 j;
-
-  sa = sl = (void *)src;
-  da = dl = dst;
-
-#pragma pipeloop(0)
-  for (j = 0; j < ysize; j++) {
-    mlib_v_ImageChannelInsert_S16_12_D1(sa, da, xsize, cmask);
-    sa = sl = (mlib_s16 *) ((mlib_u8 *) sl + slb);
-    da = dl = (mlib_s16 *) ((mlib_u8 *) dl + dlb);
-  }
-}
-
-/***************************************************************/
-void mlib_v_ImageChannelInsert_S16_13_A8D1X4(const mlib_s16 *src,
-                                             mlib_s16       *dst,
-                                             mlib_s32       dsize,
-                                             mlib_s32       cmask)
-{
-  mlib_s16 *da;
-  mlib_d64 *sp;
-  mlib_d64 sd;
-  mlib_s32 i;
-
-  sp = (mlib_d64 *) src;
-  da = dst + (2 / cmask);                   /* 4,2,1 -> 0,1,2 */
-
-  vis_alignaddr((void *)0, 2);
-
-#pragma pipeloop(0)
-  for (i = 0; i < dsize / 4; i++) {
-    LOAD_INSERT_STORE_S16_1X_A8(3);
-  }
-}
 
 /***************************************************************/
-void mlib_v_ImageChannelInsert_S16_13_A8D2X4(const mlib_s16 *src,
-                                             mlib_s32       slb,
-                                             mlib_s16       *dst,
-                                             mlib_s32       dlb,
-                                             mlib_s32       xsize,
-                                             mlib_s32       ysize,
-                                             mlib_s32       cmask)
-{
-  mlib_s16 *da, *dl;
-  mlib_d64 *sp, *sl;
-  mlib_d64 sd;
-  mlib_s32 i, j;
-
-  sp = sl = (mlib_d64 *) src;
-  da = dl = dst + (2 / cmask);              /* 4,2,1 -> 0,1,2 */
-
-  vis_alignaddr((void *)0, 2);
-
-  for (j = 0; j < ysize; j++) {
-#pragma pipeloop(0)
-    for (i = 0; i < xsize / 4; i++) {
-      LOAD_INSERT_STORE_S16_1X_A8(3);
-    }
-
-    sp = sl = (mlib_d64 *) ((mlib_u8 *) sl + slb);
-    da = dl = (mlib_s16 *) ((mlib_u8 *) dl + dlb);
-  }
-}
-
-/***************************************************************/
-void mlib_v_ImageChannelInsert_S16_13_D1(const mlib_s16 *src,
-                                         mlib_s16       *dst,
-                                         mlib_s32       dsize,
-                                         mlib_s32       cmask)
-{
-  mlib_s16 *sa, *da;
-  mlib_s16 *dend;                                     /* end point in destination */
-  mlib_d64 *sp;                                       /* 8-byte aligned start points in src */
-  mlib_d64 sd0, sd1, sd;                              /* 8-byte registers for source data */
-  mlib_s32 off;                                       /* offset of address alignment in src */
-  mlib_s32 i;
-
-  sa = (void *)src;
-  da = dst + (2 / cmask);                   /* 4,2,1 -> 0,1,2 */
-
-  /* prepare the src address */
-  sp = (mlib_d64 *) ((mlib_addr) sa & (~7));
-  off = (mlib_addr) sa & 7;
-
-  dend = da + dsize * 3 - 1;
-
-  sd1 = *sp++;
-
-#pragma pipeloop(0)
-  for (i = 0; i < dsize / 4; i++) {
-    LOAD_INSERT_STORE_S16_1X(3);
-  }
-
-  /* right end handling */
-  if ((mlib_addr) da <= (mlib_addr) dend) {
-
-    vis_alignaddr((void *)0, off);
-    sd0 = sd1;
-    sd1 = *sp++;
-    sd = vis_faligndata(sd0, sd1);
-
-    vis_alignaddr((void *)0, 2);
-    vis_st_u16(sd = vis_faligndata(sd, sd), da);
-    da += 3;
-    if ((mlib_addr) da <= (mlib_addr) dend) {
-      vis_st_u16(sd = vis_faligndata(sd, sd), da);
-      da += 3;
-      if ((mlib_addr) da <= (mlib_addr) dend) {
-        vis_st_u16(sd = vis_faligndata(sd, sd), da);
-      }
-    }
-  }
-}
-
-/***************************************************************/
-void mlib_v_ImageChannelInsert_S16_13(const mlib_s16 *src,
-                                      mlib_s32       slb,
-                                      mlib_s16       *dst,
-                                      mlib_s32       dlb,
-                                      mlib_s32       xsize,
-                                      mlib_s32       ysize,
-                                      mlib_s32       cmask)
-{
-  mlib_s16 *sa, *da;
-  mlib_s16 *sl, *dl;
-  mlib_s32 j;
-
-  sa = sl = (void *)src;
-  da = dl = dst;
-
-#pragma pipeloop(0)
-  for (j = 0; j < ysize; j++) {
-    mlib_v_ImageChannelInsert_S16_13_D1(sa, da, xsize, cmask);
-    sa = sl = (mlib_s16 *) ((mlib_u8 *) sl + slb);
-    da = dl = (mlib_s16 *) ((mlib_u8 *) dl + dlb);
-  }
-}
-
-/***************************************************************/
-#define INSERT_S16_14(sp, dp, bmask)    /* channel duplicate */ \
-  /* obsolete: it is slower than the vis_st_u16() version*/     \
-  sd0 = *sp++;                                                  \
-  sda = vis_fpmerge(vis_read_hi(sd0), vis_read_hi(sd0));        \
-  sdb = vis_fpmerge(vis_read_lo(sd0), vis_read_lo(sd0));        \
-  sdc = vis_fpmerge(vis_read_hi(sda), vis_read_hi(sda));        \
-  sdd = vis_fpmerge(vis_read_lo(sda), vis_read_lo(sda));        \
-  sde = vis_fpmerge(vis_read_hi(sdb), vis_read_hi(sdb));        \
-  sdf = vis_fpmerge(vis_read_lo(sdb), vis_read_lo(sdb));        \
-  dd0 = vis_fpmerge(vis_read_hi(sdc), vis_read_lo(sdc));        \
-  dd1 = vis_fpmerge(vis_read_hi(sdd), vis_read_lo(sdd));        \
-  dd2 = vis_fpmerge(vis_read_hi(sde), vis_read_lo(sde));        \
-  dd3 = vis_fpmerge(vis_read_hi(sdf), vis_read_lo(sdf));        \
-  vis_pst_16(dd0, dp++, bmask);                                 \
-  vis_pst_16(dd1, dp++, bmask);                                 \
-  vis_pst_16(dd2, dp++, bmask);                                 \
-  vis_pst_16(dd3, dp++, bmask)
-
-/***************************************************************/
-void mlib_v_ImageChannelInsert_S16_14_A8D1X4(const mlib_s16 *src,
-                                             mlib_s16       *dst,
-                                             mlib_s32       dsize,
-                                             mlib_s32       cmask)
-{
-  mlib_s16 *da;
-  mlib_d64 *sp;
-  mlib_d64 sd;
-  mlib_s32 i;
-
-  sp = (mlib_d64 *) src;
-  da = dst + (6 / cmask + 1) / 2;           /* 8,4,2,1 -> 0,1,2,3 */
-
-  vis_alignaddr((void *)0, 2);
-
-#pragma pipeloop(0)
-  for (i = 0; i < dsize / 4; i++) {
-    LOAD_INSERT_STORE_S16_1X_A8(4);
-  }
-}
-
-/***************************************************************/
-void mlib_v_ImageChannelInsert_S16_14_A8D2X4(const mlib_s16 *src,
-                                             mlib_s32       slb,
-                                             mlib_s16       *dst,
-                                             mlib_s32       dlb,
-                                             mlib_s32       xsize,
-                                             mlib_s32       ysize,
-                                             mlib_s32       cmask)
-{
-  mlib_s16 *da, *dl;
-  mlib_d64 *sp, *sl;
-  mlib_d64 sd;
-  mlib_s32 i, j;
-
-  sp = sl = (mlib_d64 *) src;
-  da = dl = dst + (6 / cmask + 1) / 2;      /* 8,4,2,1 -> 0,1,2,3 */
-
-  vis_alignaddr((void *)0, 2);
-
-  for (j = 0; j < ysize; j++) {
-#pragma pipeloop(0)
-    for (i = 0; i < xsize / 4; i++) {
-      LOAD_INSERT_STORE_S16_1X_A8(4);
-    }
-
-    sp = sl = (mlib_d64 *) ((mlib_u8 *) sl + slb);
-    da = dl = (mlib_s16 *) ((mlib_u8 *) dl + dlb);
-  }
-}
-
-/***************************************************************/
-void mlib_v_ImageChannelInsert_S16_14_D1(const mlib_s16 *src,
-                                         mlib_s16       *dst,
-                                         mlib_s32       dsize,
-                                         mlib_s32       cmask)
-{
-  mlib_s16 *sa, *da;
-  mlib_s16 *dend;                                     /* end point in destination */
-  mlib_d64 *sp;                                       /* 8-byte aligned start points in src */
-  mlib_d64 sd0, sd1, sd;                              /* 8-byte registers for source data */
-  mlib_s32 off;                                       /* offset of address alignment in src */
-  mlib_s32 i;
-
-  sa = (void *)src;
-  da = dst + (6 / cmask + 1) / 2;           /* 8,4,2,1 -> 0,1,2,3 */
-
-  /* prepare the src address */
-  sp = (mlib_d64 *) ((mlib_addr) sa & (~7));
-  off = (mlib_addr) sa & 7;
-
-  dend = da + dsize * 4 - 1;
-
-  sd1 = *sp++;
-
-#pragma pipeloop(0)
-  for (i = 0; i < dsize / 4; i++) {
-    LOAD_INSERT_STORE_S16_1X(4);
-  }
-
-  /* right end handling */
-  if ((mlib_addr) da <= (mlib_addr) dend) {
-
-    vis_alignaddr((void *)0, off);
-    sd0 = sd1;
-    sd1 = *sp++;
-    sd = vis_faligndata(sd0, sd1);
-
-    vis_alignaddr((void *)0, 2);
-    vis_st_u16(sd = vis_faligndata(sd, sd), da);
-    da += 4;
-    if ((mlib_addr) da <= (mlib_addr) dend) {
-      vis_st_u16(sd = vis_faligndata(sd, sd), da);
-      da += 4;
-      if ((mlib_addr) da <= (mlib_addr) dend) {
-        vis_st_u16(sd = vis_faligndata(sd, sd), da);
-      }
-    }
-  }
-}
-
-/***************************************************************/
-void mlib_v_ImageChannelInsert_S16_14(const mlib_s16 *src,
-                                      mlib_s32       slb,
-                                      mlib_s16       *dst,
-                                      mlib_s32       dlb,
-                                      mlib_s32       xsize,
-                                      mlib_s32       ysize,
-                                      mlib_s32       cmask)
-{
-  mlib_s16 *sa, *da;
-  mlib_s16 *sl, *dl;
-  mlib_s32 j;
-
-  sa = sl = (void *)src;
-  da = dl = dst;
-
-#pragma pipeloop(0)
-  for (j = 0; j < ysize; j++) {
-    mlib_v_ImageChannelInsert_S16_14_D1(sa, da, xsize, cmask);
-    sa = sl = (mlib_s16 *) ((mlib_u8 *) sl + slb);
-    da = dl = (mlib_s16 *) ((mlib_u8 *) dl + dlb);
-  }
-}
-
-/***************************************************************/
--- a/jdk/src/java.desktop/unix/native/libmlib_image/mlib_v_ImageChannelInsert_34.c	Thu May 12 11:03:07 2016 -0700
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,1225 +0,0 @@
-/*
- * Copyright (c) 1998, 2003, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.  Oracle designates this
- * particular file as subject to the "Classpath" exception as provided
- * by Oracle in the LICENSE file that accompanied this code.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- */
-
-
-
-/*
- * FILENAME: mlib_v_ImageChannelInsert_34.c
- *
- * FUNCTIONS
- *      mlib_v_ImageChannelInsert_U8_34R_A8D1X8
- *      mlib_v_ImageChannelInsert_U8_34R_A8D2X8
- *      mlib_v_ImageChannelInsert_U8_34R_D1
- *      mlib_v_ImageChannelInsert_U8_34R
- *      mlib_v_ImageChannelInsert_S16_34R_A8D1X4
- *      mlib_v_ImageChannelInsert_S16_34R_A8D2X4
- *      mlib_v_ImageChannelInsert_S16_34R_D1
- *      mlib_v_ImageChannelInsert_S16_34R
- *      mlib_v_ImageChannelInsert_U8_34L_A8D1X8
- *      mlib_v_ImageChannelInsert_U8_34L_A8D2X8
- *      mlib_v_ImageChannelInsert_U8_34L_D1
- *      mlib_v_ImageChannelInsert_U8_34L
- *      mlib_v_ImageChannelInsert_S16_34L_A8D1X4
- *      mlib_v_ImageChannelInsert_S16_34L_A8D2X4
- *      mlib_v_ImageChannelInsert_S16_34L_D1
- *      mlib_v_ImageChannelInsert_S16_34L
- *
- * SYNOPSIS
- *
- * ARGUMENT
- *      src       pointer to source image data
- *      dst       pointer to destination image data
- *          slb   source image line stride in bytes
- *          dlb   destination image line stride in bytes
- *          dsize       image data size in pixels
- *          xsize       image width in pixels
- *          ysize       image height in lines
- *          cmask channel mask
- *
- * DESCRIPTION
- *          Insert a 3-channel image into the right or left 3 channels of
- *          a 4-channel image low level functions.
- *
- *                BGR => ABGR   (34R), or       RGB => RGBA     (34L)
- *
- * NOTE
- *          These functions are separated from mlib_v_ImageChannelInsert.c
- *          for loop unrolling and structure clarity.
- */
-
-#include <stdlib.h>
-#include "vis_proto.h"
-#include "mlib_image.h"
-
-/***************************************************************/
-#define INSERT_U8_34R                                                                         \
-  sda = vis_fpmerge(vis_read_hi(sd0), vis_read_lo(sd1));                    \
-  sdb = vis_fpmerge(vis_read_lo(sd0), vis_read_hi(sd2));                    \
-  sdc = vis_fpmerge(vis_read_hi(sd1), vis_read_lo(sd2));                    \
-  sdd = vis_fpmerge(vis_read_hi(sda), vis_read_lo(sdb));                    \
-  sde = vis_fpmerge(vis_read_lo(sda), vis_read_hi(sdc));                    \
-  sdf = vis_fpmerge(vis_read_hi(sdb), vis_read_lo(sdc));                    \
-  sdg = vis_fpmerge(vis_read_hi(sdd), vis_read_lo(sde));                    \
-  sdh = vis_fpmerge(vis_read_lo(sdd), vis_read_hi(sdf));                    \
-  sdi = vis_fpmerge(vis_read_hi(sde), vis_read_lo(sdf));                    \
-  sdj = vis_fpmerge(vis_read_hi(sdg), vis_read_hi(sdi));                    \
-  sdk = vis_fpmerge(vis_read_lo(sdg), vis_read_lo(sdi));                    \
-  sdl = vis_fpmerge(vis_read_hi(sdh), vis_read_hi(sdh));                    \
-  sdm = vis_fpmerge(vis_read_lo(sdh), vis_read_lo(sdh));                    \
-  dd0 = vis_fpmerge(vis_read_hi(sdl), vis_read_hi(sdj));                    \
-  dd1 = vis_fpmerge(vis_read_lo(sdl), vis_read_lo(sdj));                    \
-  dd2 = vis_fpmerge(vis_read_hi(sdm), vis_read_hi(sdk));                    \
-  dd3 = vis_fpmerge(vis_read_lo(sdm), vis_read_lo(sdk));
-
-/***************************************************************/
-#define LOAD_INSERT_STORE_U8_34R_A8                                                         \
-  sd0 = *sp++;                                  /* b0g0r0b1g1r1b2g2 */                  \
-  sd1 = *sp++;                                  /* r2b3g3r3b4g4r4b5 */                  \
-  sd2 = *sp++;                                  /* g5r5b6g6r6b7g7r7 */                  \
-  INSERT_U8_34R                                                                                           \
-  vis_pst_8(dd0, dp++, bmask);                                                                \
-  vis_pst_8(dd1, dp++, bmask);                                                                \
-  vis_pst_8(dd2, dp++, bmask);                                                                \
-  vis_pst_8(dd3, dp++, bmask);
-
-/***************************************************************/
-#define LOAD_INSERT_U8_34R                                                                      \
-  vis_alignaddr((void *)soff, 0);                                                             \
-  s0 = s3;                                                                                                    \
-  s1 = sp[1];                                                                                               \
-  s2 = sp[2];                                                                                               \
-  s3 = sp[3];                                                                                               \
-  sd0 = vis_faligndata(s0, s1);                                 \
-  sd1 = vis_faligndata(s1, s2);                                                               \
-  sd2 = vis_faligndata(s2, s3);                                                               \
-  sp += 3;                                                                                                    \
-  dd4 = dd3;                                                                  \
-  INSERT_U8_34R
-
-/***************************************************************/
-/*
- * Both source and destination image data are 1-d vectors and
- * 8-byte aligned. And dsize is multiple of 8.
- */
-
-void
-mlib_v_ImageChannelInsert_U8_34R_A8D1X8(mlib_u8  *src,
-                                                                mlib_u8  *dst,
-                                                                mlib_s32 dsize)
-{
-  mlib_d64  *sp, *dp;
-  mlib_d64  sd0, sd1, sd2;          /* source data */
-  mlib_d64  dd0, dd1, dd2, dd3; /* dst data */
-  mlib_d64  sda, sdb, sdc, sdd; /* intermediate variables */
-  mlib_d64  sde, sdf, sdg, sdh;
-  mlib_d64  sdi, sdj, sdk, sdl;
-  mlib_d64  sdm;
-  int       bmask = 0x77;
-  int       i;
-
-  sp = (mlib_d64 *)src;
-  dp = (mlib_d64 *)dst;
-
-#pragma pipeloop(0)
-  for (i = 0; i < dsize / 8; i++) {
-    LOAD_INSERT_STORE_U8_34R_A8;
-  }
-}
-
-/***************************************************************/
-/*
- * Either source or destination image data are not 1-d vectors, but
- * they are 8-byte aligned. And slb and dlb are multiple of 8.
- * The xsize is multiple of 8.
- */
-
-void
-mlib_v_ImageChannelInsert_U8_34R_A8D2X8(mlib_u8  *src,  mlib_s32 slb,
-                                                                mlib_u8  *dst,  mlib_s32 dlb,
-                                                                mlib_s32 xsize, mlib_s32 ysize)
-{
-  mlib_d64  *sp, *dp;             /* 8-byte aligned pointer for pixel */
-  mlib_d64  *sl, *dl;             /* 8-byte aligned pointer for line */
-  mlib_d64  sd0, sd1, sd2;      /* source data */
-  mlib_d64  dd0, dd1, dd2, dd3; /* dst data */
-  mlib_d64  sda, sdb, sdc, sdd; /* intermediate variables */
-  mlib_d64  sde, sdf, sdg, sdh;
-  mlib_d64  sdi, sdj, sdk, sdl;
-  mlib_d64  sdm;
-  int         bmask = 0x77;
-  int       i, j;               /* indices for x, y */
-
-  sp = sl = (mlib_d64 *)src;
-  dp = dl = (mlib_d64 *)dst;
-
-  /* row loop */
-  for (j = 0; j < ysize; j++) {
-    /* 8-byte column loop */
-#pragma pipeloop(0)
-    for (i = 0; i < xsize / 8; i++) {
-      LOAD_INSERT_STORE_U8_34R_A8;
-    }
-    sp = sl = (mlib_d64 *)((mlib_u8 *)sl + slb);
-    dp = dl = (mlib_d64 *)((mlib_u8 *)dl + dlb);
-  }
-}
-
-/***************************************************************/
-/*
- * either source or destination data are not 8-byte aligned.
- */
-
-void
-mlib_v_ImageChannelInsert_U8_34R_D1(mlib_u8  *src,
-                                                            mlib_u8  *dst,
-                                                            mlib_s32 dsize)
-{
-  mlib_u8   *sa, *da;
-  mlib_u8   *dend, *dend2;      /* end points in dst */
-  mlib_d64  *dp;                  /* 8-byte aligned start points in dst */
-  mlib_d64  *sp;                  /* 8-byte aligned start point in src */
-  mlib_d64  s0, s1, s2, s3;     /* 8-byte source raw data */
-  mlib_d64  sd0, sd1, sd2;      /* 8-byte source data */
-  mlib_d64  dd0, dd1, dd2, dd3; /* dst data */
-  mlib_d64  dd4;                  /* the last datum of the last step */
-  mlib_d64  sda, sdb, sdc, sdd; /* intermediate variables */
-  mlib_d64  sde, sdf, sdg, sdh;
-  mlib_d64  sdi, sdj, sdk, sdl;
-  mlib_d64  sdm;
-  int       soff;                 /* offset of address in src */
-  int       doff;                 /* offset of address in dst */
-  int       emask;              /* edge mask */
-  int         bmask;            /* channel mask */
-  int         i, n;
-
-  sa = src;
-  da = dst;
-
-  /* prepare the source address */
-  sp    = (mlib_d64 *) ((mlib_addr) sa & (~7));
-  soff  = ((mlib_addr) sa & 7);
-
-  /* prepare the destination addresses */
-  dp    = (mlib_d64 *)((mlib_addr) da & (~7));
-  dend  = da + dsize * 4 - 1;
-  dend2 = dend - 31;
-  doff  = ((mlib_addr) da & 7);
-
-  /* set band mask for vis_pst_8 to store the bytes needed */
-  bmask = 0xff & (0x7777 >> doff) ;
-
-  /* generate edge mask for the start point */
-  emask = vis_edge8(da, dend);
-
-  /* load 24 bytes, convert to 32 bytes */
-  s3 = sp[0];                                   /* initial value */
-  LOAD_INSERT_U8_34R;
-
-  if (doff == 0) {                              /* dst is 8-byte aligned */
-
-    if (dsize >= 8 ) {
-      vis_pst_8(dd0, dp++, emask & bmask);
-      vis_pst_8(dd1, dp++, bmask);
-      vis_pst_8(dd2, dp++, bmask);
-      vis_pst_8(dd3, dp++, bmask);
-    }
-    else {                                      /* for very small size */
-      vis_pst_8(dd0, dp++, emask & bmask);
-      if ((mlib_addr) dp <= (mlib_addr) dend)  {
-        emask = vis_edge8(dp, dend);
-        vis_pst_8(dd1, dp++, emask & bmask);
-        if ((mlib_addr) dp <= (mlib_addr) dend)  {
-          emask = vis_edge8(dp, dend);
-          vis_pst_8(dd2, dp++, emask & bmask);
-          if ((mlib_addr) dp <= (mlib_addr) dend)  {
-            emask = vis_edge8(dp, dend);
-            vis_pst_8(dd3, dp++, emask & bmask);
-          }
-        }
-      }
-    }
-
-    /* no edge handling is needed in the loop */
-    if ((mlib_addr) dp <= (mlib_addr) dend2)  {
-      n = ((mlib_u8 *)dend2 - (mlib_u8 *)dp) / 32 + 1;
-#pragma pipeloop(0)
-      for (i = 0; i < n; i++) {
-        LOAD_INSERT_U8_34R;
-        vis_pst_8(dd0, dp++, bmask);
-        vis_pst_8(dd1, dp++, bmask);
-        vis_pst_8(dd2, dp++, bmask);
-        vis_pst_8(dd3, dp++, bmask);
-      }
-    }
-
-    if ((mlib_addr) dp <= (mlib_addr) dend)  {
-      LOAD_INSERT_U8_34R;
-      emask = vis_edge8(dp, dend);
-      vis_pst_8(dd0, dp++, emask & bmask);
-      if ((mlib_addr) dp <= (mlib_addr) dend)  {
-        emask = vis_edge8(dp, dend);
-        vis_pst_8(dd1, dp++, emask & bmask);
-        if ((mlib_addr) dp <= (mlib_addr) dend)  {
-          emask = vis_edge8(dp, dend);
-          vis_pst_8(dd2, dp++, emask & bmask);
-          if ((mlib_addr) dp <= (mlib_addr) dend)  {
-            emask = vis_edge8(dp, dend);
-            vis_pst_8(dd3, dp++, emask & bmask);
-          }
-        }
-      }
-    }
-  }
-  else {                                        /* (doff != 0) */
-    vis_alignaddr((void *)0, -doff);
-
-    if (dsize >= 8 ) {
-      vis_pst_8(vis_faligndata(dd0, dd0), dp++, emask & bmask);
-      vis_pst_8(vis_faligndata(dd0, dd1), dp++, bmask);
-      vis_pst_8(vis_faligndata(dd1, dd2), dp++, bmask);
-      vis_pst_8(vis_faligndata(dd2, dd3), dp++, bmask);
-    }
-    else {                                      /* for very small size */
-      vis_pst_8(vis_faligndata(dd0, dd0), dp++, emask & bmask);
-      if ((mlib_addr) dp <= (mlib_addr) dend)  {
-        emask = vis_edge8(dp, dend);
-        vis_pst_8(vis_faligndata(dd0, dd1), dp++, emask & bmask);
-        if ((mlib_addr) dp <= (mlib_addr) dend)  {
-          emask = vis_edge8(dp, dend);
-          vis_pst_8(vis_faligndata(dd1, dd2), dp++, emask & bmask);
-          if ((mlib_addr) dp <= (mlib_addr) dend)  {
-            emask = vis_edge8(dp, dend);
-            vis_pst_8(vis_faligndata(dd2, dd3), dp++, emask & bmask);
-            if ((mlib_addr) dp <= (mlib_addr) dend)  {
-              emask = vis_edge8(dp, dend);
-              vis_pst_8(vis_faligndata(dd3, dd3), dp++, emask & bmask);
-            }
-          }
-        }
-      }
-    }
-
-    /* no edge handling is needed in the loop */
-    if ((mlib_addr) dp <= (mlib_addr) dend2)  {
-      n = ((mlib_u8 *)dend2 - (mlib_u8 *)dp) / 32 + 1;
-#pragma pipeloop(0)
-      for (i = 0; i < n; i++) {
-        LOAD_INSERT_U8_34R;
-        vis_alignaddr((void *)0, -doff);
-        vis_pst_8(vis_faligndata(dd4, dd0), dp++, bmask);
-        vis_pst_8(vis_faligndata(dd0, dd1), dp++, bmask);
-        vis_pst_8(vis_faligndata(dd1, dd2), dp++, bmask);
-        vis_pst_8(vis_faligndata(dd2, dd3), dp++, bmask);
-      }
-    }
-
-    if ((mlib_addr) dp <= (mlib_addr) dend)  {
-      LOAD_INSERT_U8_34R;
-      vis_alignaddr((void *)0, -doff);
-      emask = vis_edge8(dp, dend);
-      vis_pst_8(vis_faligndata(dd4, dd0), dp++, emask & bmask);
-      if ((mlib_addr) dp <= (mlib_addr) dend)  {
-        emask = vis_edge8(dp, dend);
-        vis_pst_8(vis_faligndata(dd0, dd1), dp++, emask & bmask);
-        if ((mlib_addr) dp <= (mlib_addr) dend)  {
-          emask = vis_edge8(dp, dend);
-          vis_pst_8(vis_faligndata(dd1, dd2), dp++, emask & bmask);
-          if ((mlib_addr) dp <= (mlib_addr) dend)  {
-            emask = vis_edge8(dp, dend);
-            vis_pst_8(vis_faligndata(dd2, dd3), dp++, emask & bmask);
-          }
-        }
-      }
-    }
-  }
-}
-
-/***************************************************************/
-
-void
-mlib_v_ImageChannelInsert_U8_34R(mlib_u8  *src,  mlib_s32 slb,
-                                                 mlib_u8  *dst,  mlib_s32 dlb,
-                                                         mlib_s32 xsize, mlib_s32 ysize)
-{
-  mlib_u8   *sa, *da;
-  mlib_u8   *sl, *dl;
-  int         j;
-
-  sa = sl = src;
-  da = dl = dst;
-
-#pragma pipeloop(0)
-  for (j = 0; j < ysize; j++) {
-    mlib_v_ImageChannelInsert_U8_34R_D1(sa, da, xsize);
-    sa = sl += slb;
-    da = dl += dlb;
-  }
-}
-
-/***************************************************************/
-#define INSERT_S16_34R                                                                              \
-  vis_alignaddr((void *)0, 6);                                                                \
-  dd0 = vis_faligndata(sd0, sd0);                 /* b1b0g0r0 */                \
-  vis_alignaddr((void *)0, 4);                                                                \
-  dd1 = vis_faligndata(sd0, sd1);                 /* r0b1gbr1 */                \
-  vis_alignaddr((void *)0, 2);                                                                \
-  dd2 = vis_faligndata(sd1, sd2);                       /* r1b2g2r2 */          \
-  dd3 = sd2;                                                          /* r2b3g3r3 */
-
-/***************************************************************/
-#define LOAD_INSERT_STORE_S16_34R_A8                                                      \
-  sd0 = *sp++;                                          /* b0g0r0b1 */                      \
-  sd1 = *sp++;                                          /* g1r1b2g2 */                      \
-  sd2 = *sp++;                                          /* r2b3g3r3 */                      \
-  INSERT_S16_34R                                                                                          \
-  vis_pst_16(dd0, dp++, bmask);                                                               \
-  vis_pst_16(dd1, dp++, bmask);                                                               \
-  vis_pst_16(dd2, dp++, bmask);                                                               \
-  vis_pst_16(dd3, dp++, bmask);
-
-/***************************************************************/
-#define LOAD_INSERT_S16_34R                                                                       \
-  vis_alignaddr((void *)soff, 0);                                                             \
-  s0 = s3;                                                                                                    \
-  s1 = sp[1];                                                                                               \
-  s2 = sp[2];                                                                                               \
-  s3 = sp[3];                                                                                               \
-  sd0 = vis_faligndata(s0, s1);                                                               \
-  sd1 = vis_faligndata(s1, s2);                                                               \
-  sd2 = vis_faligndata(s2, s3);                                                               \
-  sp += 3;                                                                                                    \
-  dd4 = dd3;                                                                                                \
-  INSERT_S16_34R
-
-/***************************************************************/
-/*
- * both source and destination image data are 1-d vectors and
- * 8-byte aligned.  dsize is multiple of 4.
- */
-
-void
-mlib_v_ImageChannelInsert_S16_34R_A8D1X4(mlib_s16 *src,
-                                                                 mlib_s16 *dst,
-                                                                 mlib_s32 dsize)
-{
-  mlib_d64  *sp, *dp;           /* 8-byte aligned pointer for pixel */
-  mlib_d64  sd0, sd1, sd2;      /* source data */
-  mlib_d64  dd0, dd1, dd2, dd3; /* dst data */
-  int       bmask = 0x07;       /* channel mask */
-  int       i;
-
-  sp = (mlib_d64 *)src;
-  dp = (mlib_d64 *)dst;
-
-  /* set GSR.offset for vis_faligndata()  */
-  /* vis_alignaddr((void *)0, 2); */            /* only for _old */
-
-#pragma pipeloop(0)
-  for (i = 0; i < dsize / 4; i++) {
-    LOAD_INSERT_STORE_S16_34R_A8;
-  }
-}
-
-/***************************************************************/
-/*
- * either source or destination image data are not 1-d vectors, but
- * they are 8-byte aligned.  xsize is multiple of 4.
- */
-
-void
-mlib_v_ImageChannelInsert_S16_34R_A8D2X4(mlib_s16 *src,  mlib_s32 slb,
-                                                                 mlib_s16 *dst,  mlib_s32 dlb,
-                                                                 mlib_s32 xsize, mlib_s32 ysize)
-{
-  mlib_d64  *sp, *dp;           /* 8-byte aligned pointer for pixel */
-  mlib_d64  *sl, *dl;           /* 8-byte aligned pointer for line */
-  mlib_d64  sd0, sd1, sd2;      /* source data */
-  mlib_d64  dd0, dd1, dd2, dd3; /* dst data */
-  int       bmask = 0x07;       /* channel mask */
-  int       i, j;               /* indices for x, y */
-
-  sp = sl = (mlib_d64 *)src;
-  dp = dl = (mlib_d64 *)dst;
-
-  /* row loop */
-  for (j = 0; j < ysize; j++) {
-    /* 4-pixel column loop */
-#pragma pipeloop(0)
-    for (i = 0; i < xsize / 4; i++) {
-      LOAD_INSERT_STORE_S16_34R_A8;
-    }
-    sp = sl = (mlib_d64 *)((mlib_u8 *)sl + slb);
-    dp = dl = (mlib_d64 *)((mlib_u8 *)dl + dlb);
-  }
-}
-
-/***************************************************************/
-/*
- * either source or destination data are not 8-byte aligned.
- */
-
-void
-mlib_v_ImageChannelInsert_S16_34R_D1(mlib_s16 *src,
-                                                             mlib_s16 *dst,
-                                                             mlib_s32 dsize)
-{
-  mlib_s16  *sa, *da;           /* pointer for pixel */
-  mlib_s16  *dend, *dend2;      /* end points in dst */
-  mlib_d64  *dp;                /* 8-byte aligned start points in dst */
-  mlib_d64  *sp;                /* 8-byte aligned start point in src */
-  mlib_d64  s0, s1, s2, s3;     /* 8-byte source raw data */
-  mlib_d64  sd0, sd1, sd2;      /* 8-byte source data */
-  mlib_d64  dd0, dd1, dd2, dd3; /* dst data */
-  mlib_d64  dd4;                /* the last datum of the last step */
-  int soff;             /* offset of address in src */
-  int doff;             /* offset of address in dst */
-  int       emask;              /* edge mask */
-  int       bmask;              /* channel mask */
-  int       i, n;
-
-  sa = src;
-  da = dst;
-
-  /* prepare the source address */
-  sp    = (mlib_d64 *) ((mlib_addr) sa & (~7));
-  soff  = ((mlib_addr) sa & 7);
-
-  /* prepare the destination addresses */
-  dp    = (mlib_d64 *)((mlib_addr) da & (~7));
-  dend  = da + dsize * 4 - 1;
-  dend2 = dend - 15;
-  doff  = ((mlib_addr) da & 7);
-
-  /* set channel mask for vis_pst_16 to store the words needed */
-  bmask = 0xff & (0x77 >> (doff / 2));
-
-  /* generate edge mask for the start point */
-  emask = vis_edge16(da, dend);
-
-  /* load 24 byte, convert, store 32 bytes */
-  s3 = sp[0];                                   /* initial value */
-  LOAD_INSERT_S16_34R;
-
-  if (doff == 0) {                              /* dst is 8-byte aligned */
-
-    if (dsize >= 4 ) {
-      vis_pst_16(dd0, dp++, emask & bmask);
-      vis_pst_16(dd1, dp++, bmask);
-      vis_pst_16(dd2, dp++, bmask);
-      vis_pst_16(dd3, dp++, bmask);
-    }
-    else {                                      /* for very small size */
-      vis_pst_16(dd0, dp++, emask & bmask);
-      if ((mlib_addr) dp <= (mlib_addr) dend)  {
-        emask = vis_edge16(dp, dend);
-        vis_pst_16(dd1, dp++, emask & bmask);
-        if ((mlib_addr) dp <= (mlib_addr) dend)  {
-          emask = vis_edge16(dp, dend);
-          vis_pst_16(dd2, dp++, emask & bmask);
-        }
-      }
-    }
-
-    /* no edge handling is needed in the loop */
-    if ((mlib_addr) dp <= (mlib_addr) dend2)  {
-      n = ((mlib_u8 *)dend2 - (mlib_u8 *)dp) / 32 + 1;
-#pragma pipeloop(0)
-      for (i = 0; i < n; i++) {
-        LOAD_INSERT_S16_34R;
-        vis_pst_16(dd0, dp++, bmask);
-        vis_pst_16(dd1, dp++, bmask);
-        vis_pst_16(dd2, dp++, bmask);
-        vis_pst_16(dd3, dp++, bmask);
-      }
-    }
-
-    if ((mlib_addr) dp <= (mlib_addr) dend)  {
-      LOAD_INSERT_S16_34R;
-      emask = vis_edge16(dp, dend);
-      vis_pst_16(dd0, dp++, emask & bmask);
-      if ((mlib_addr) dp <= (mlib_addr) dend)  {
-        emask = vis_edge16(dp, dend);
-        vis_pst_16(dd1, dp++, emask & bmask);
-        if ((mlib_addr) dp <= (mlib_addr) dend)  {
-          emask = vis_edge16(dp, dend);
-          vis_pst_16(dd2, dp++, emask & bmask);
-        }
-      }
-    }
-  }
-  else {                                        /* (doff != 0) */
-    vis_alignaddr((void *)0, -doff);
-
-    if (dsize >= 4 ) {
-      vis_pst_16(vis_faligndata(dd0, dd0), dp++, emask & bmask);
-      vis_pst_16(vis_faligndata(dd0, dd1), dp++, bmask);
-      vis_pst_16(vis_faligndata(dd1, dd2), dp++, bmask);
-      vis_pst_16(vis_faligndata(dd2, dd3), dp++, bmask);
-    }
-    else {                                      /* for very small size */
-      vis_pst_16(vis_faligndata(dd0, dd0), dp++, emask & bmask);
-      if ((mlib_addr) dp <= (mlib_addr) dend)  {
-        emask = vis_edge16(dp, dend);
-        vis_pst_16(vis_faligndata(dd0, dd1), dp++, emask & bmask);
-        if ((mlib_addr) dp <= (mlib_addr) dend)  {
-          emask = vis_edge16(dp, dend);
-          vis_pst_16(vis_faligndata(dd1, dd2), dp++, emask & bmask);
-          if ((mlib_addr) dp <= (mlib_addr) dend)  {
-            emask = vis_edge16(dp, dend);
-            vis_pst_16(vis_faligndata(dd2, dd3), dp++, emask & bmask);
-          }
-        }
-      }
-    }
-
-    /* no edge handling is needed in the loop */
-    if ((mlib_addr) dp <= (mlib_addr) dend2)  {
-      n = ((mlib_u8 *)dend2 - (mlib_u8 *)dp) / 32 + 1;
-#pragma pipeloop(0)
-      for (i = 0; i < n; i++) {
-        LOAD_INSERT_S16_34R;
-        vis_alignaddr((void *)0, -doff);
-        vis_pst_16(vis_faligndata(dd4, dd0), dp++, bmask);
-        vis_pst_16(vis_faligndata(dd0, dd1), dp++, bmask);
-        vis_pst_16(vis_faligndata(dd1, dd2), dp++, bmask);
-        vis_pst_16(vis_faligndata(dd2, dd3), dp++, bmask);
-      }
-    }
-
-    if ((mlib_addr) dp <= (mlib_addr) dend)  {
-      LOAD_INSERT_S16_34R;
-      vis_alignaddr((void *)0, -doff);
-      emask = vis_edge16(dp, dend);
-      vis_pst_16(vis_faligndata(dd4, dd0), dp++, emask & bmask);
-      if ((mlib_addr) dp <= (mlib_addr) dend)  {
-        emask = vis_edge16(dp, dend);
-        vis_pst_16(vis_faligndata(dd0, dd1), dp++, emask & bmask);
-        if ((mlib_addr) dp <= (mlib_addr) dend)  {
-          emask = vis_edge16(dp, dend);
-          vis_pst_16(vis_faligndata(dd1, dd2), dp++, emask & bmask);
-          if ((mlib_addr) dp <= (mlib_addr) dend)  {
-            emask = vis_edge16(dp, dend);
-            vis_pst_16(vis_faligndata(dd2, dd3), dp++, emask & bmask);
-          }
-        }
-      }
-    }
-  }
-}
-
-/***************************************************************/
-
-void
-mlib_v_ImageChannelInsert_S16_34R(mlib_s16 *src,  mlib_s32 slb,
-                                                          mlib_s16 *dst,  mlib_s32 dlb,
-                                                          mlib_s32 xsize, mlib_s32 ysize)
-{
-  mlib_s16  *sa, *da;
-  mlib_s16  *sl, *dl;
-  int       j;
-
-  sa = sl = src;
-  da = dl = dst;
-
-#pragma pipeloop(0)
-  for (j = 0; j < ysize; j++) {
-    mlib_v_ImageChannelInsert_S16_34R_D1(sa, da, xsize);
-    sa = sl = (mlib_s16 *)((mlib_u8 *)sl + slb);
-    da = dl = (mlib_s16 *)((mlib_u8 *)dl + dlb);
-  }
-}
-
-/***************************************************************/
-#define INSERT_U8_34L                                                                                 \
-  sda = vis_fpmerge(vis_read_hi(sd0), vis_read_lo(sd1));                    \
-  sdb = vis_fpmerge(vis_read_lo(sd0), vis_read_hi(sd2));                    \
-  sdc = vis_fpmerge(vis_read_hi(sd1), vis_read_lo(sd2));                    \
-  sdd = vis_fpmerge(vis_read_hi(sda), vis_read_lo(sdb));                    \
-  sde = vis_fpmerge(vis_read_lo(sda), vis_read_hi(sdc));                    \
-  sdf = vis_fpmerge(vis_read_hi(sdb), vis_read_lo(sdc));                    \
-  sdg = vis_fpmerge(vis_read_hi(sdd), vis_read_lo(sde));                    \
-  sdh = vis_fpmerge(vis_read_lo(sdd), vis_read_hi(sdf));                    \
-  sdi = vis_fpmerge(vis_read_hi(sde), vis_read_lo(sdf));                    \
-  sdj = vis_fpmerge(vis_read_hi(sdg), vis_read_hi(sdi));                    \
-  sdk = vis_fpmerge(vis_read_lo(sdg), vis_read_lo(sdi));                    \
-  sdl = vis_fpmerge(vis_read_hi(sdh), vis_read_hi(sdh));                    \
-  sdm = vis_fpmerge(vis_read_lo(sdh), vis_read_lo(sdh));                    \
-  dd0 = vis_fpmerge(vis_read_hi(sdj), vis_read_hi(sdl));                    \
-  dd1 = vis_fpmerge(vis_read_lo(sdj), vis_read_lo(sdl));                    \
-  dd2 = vis_fpmerge(vis_read_hi(sdk), vis_read_hi(sdm));                    \
-  dd3 = vis_fpmerge(vis_read_lo(sdk), vis_read_lo(sdm));
-
-/***************************************************************/
-#define LOAD_INSERT_STORE_U8_34L_A8                                                         \
-  sd0 = *sp++;                                  /* b0g0r0b1g1r1b2g2 */                  \
-  sd1 = *sp++;                                  /* r2b3g3r3b4g4r4b5 */                  \
-  sd2 = *sp++;                                  /* g5r5b6g6r6b7g7r7 */                  \
-  INSERT_U8_34L                                                                                                       \
-  vis_pst_8(dd0, dp++, bmask);                                                                \
-  vis_pst_8(dd1, dp++, bmask);                                                                \
-  vis_pst_8(dd2, dp++, bmask);                                                                \
-  vis_pst_8(dd3, dp++, bmask);
-
-/***************************************************************/
-#define LOAD_INSERT_U8_34L                                                                        \
-  vis_alignaddr((void *)soff, 0);                                                             \
-  s0 = s3;                                                                                                    \
-  s1 = sp[1];                                                                                               \
-  s2 = sp[2];                                                                                               \
-  s3 = sp[3];                                                                                               \
-  sd0 = vis_faligndata(s0, s1);                                 \
-  sd1 = vis_faligndata(s1, s2);                                                               \
-  sd2 = vis_faligndata(s2, s3);                                                               \
-  sp += 3;                                                                                                    \
-  dd4 = dd3;                                                    \
-  INSERT_U8_34L
-
-/***************************************************************/
-/*
- * Both source and destination image data are 1-d vectors and
- * 8-byte aligned. And dsize is multiple of 8.
- */
-void
-mlib_v_ImageChannelInsert_U8_34L_A8D1X8(mlib_u8  *src,
-                                                                mlib_u8  *dst,
-                                                                mlib_s32 dsize)
-{
-  mlib_d64  *sp, *dp;
-  mlib_d64  sd0, sd1, sd2;          /* source data */
-  mlib_d64  dd0, dd1, dd2, dd3; /* dst data */
-  mlib_d64  sda, sdb, sdc, sdd; /* intermediate variables */
-  mlib_d64  sde, sdf, sdg, sdh;
-  mlib_d64  sdi, sdj, sdk, sdl;
-  mlib_d64  sdm;
-  int         bmask = 0xee;
-  int         i;
-
-  sp = (mlib_d64 *)src;
-  dp = (mlib_d64 *)dst;
-
-#pragma pipeloop(0)
-  for (i = 0; i < dsize / 8; i++) {
-    LOAD_INSERT_STORE_U8_34L_A8;
-  }
-}
-
-/***************************************************************/
-/*
- * Either source or destination image data are not 1-d vectors, but
- * they are 8-byte aligned. And slb and dlb are multiple of 8.
- * The xsize is multiple of 8.
- */
-void
-mlib_v_ImageChannelInsert_U8_34L_A8D2X8(mlib_u8  *src,  mlib_s32 slb,
-                                                                mlib_u8  *dst,  mlib_s32 dlb,
-                                                        mlib_s32 xsize, mlib_s32 ysize)
-{
-  mlib_d64  *sp, *dp;           /* 8-byte aligned pointer for pixel */
-  mlib_d64  *sl, *dl;           /* 8-byte aligned pointer for line */
-  mlib_d64  sd0, sd1, sd2;      /* source data */
-  mlib_d64  dd0, dd1, dd2, dd3; /* dst data */
-  mlib_d64  sda, sdb, sdc, sdd; /* intermediate variables */
-  mlib_d64  sde, sdf, sdg, sdh;
-  mlib_d64  sdi, sdj, sdk, sdl;
-  mlib_d64  sdm;
-  int         bmask = 0xee;
-  int       i, j;               /* indices for x, y */
-
-  sp = sl = (mlib_d64 *)src;
-  dp = dl = (mlib_d64 *)dst;
-
-  /* row loop */
-  for (j = 0; j < ysize; j++) {
-    /* 8-byte column loop */
-#pragma pipeloop(0)
-    for (i = 0; i < xsize / 8; i++) {
-      LOAD_INSERT_STORE_U8_34L_A8;
-    }
-    sp = sl = (mlib_d64 *)((mlib_u8 *)sl + slb);
-    dp = dl = (mlib_d64 *)((mlib_u8 *)dl + dlb);
-  }
-}
-
-/***************************************************************/
-/*
- * either source or destination data are not 8-byte aligned.
- */
-void
-mlib_v_ImageChannelInsert_U8_34L_D1(mlib_u8  *src,
-                                                            mlib_u8  *dst,
-                                                            mlib_s32 dsize)
-{
-  mlib_u8   *sa, *da;
-  mlib_u8   *dend, *dend2;      /* end points in dst */
-  mlib_d64  *dp;                /* 8-byte aligned start points in dst */
-  mlib_d64  *sp;                /* 8-byte aligned start point in src */
-  mlib_d64  s0, s1, s2, s3;     /* 8-byte source raw data */
-  mlib_d64  sd0, sd1, sd2;      /* 8-byte source data */
-  mlib_d64  dd0, dd1, dd2, dd3; /* dst data */
-  mlib_d64  dd4;                /* the last datum of the last step */
-  mlib_d64  sda, sdb, sdc, sdd; /* intermediate variables */
-  mlib_d64  sde, sdf, sdg, sdh;
-  mlib_d64  sdi, sdj, sdk, sdl;
-  mlib_d64  sdm;
-  int       soff;               /* offset of address in src */
-  int       doff;               /* offset of address in dst */
-  int       emask;              /* edge mask */
-  int         bmask;            /* channel mask */
-  int         i, n;
-
-  sa = src;
-  da = dst;
-
-  /* prepare the source address */
-  sp    = (mlib_d64 *) ((mlib_addr) sa & (~7));
-  soff  = ((mlib_addr) sa & 7);
-
-  /* prepare the destination addresses */
-  dp    = (mlib_d64 *)((mlib_addr) da & (~7));
-  dend  = da + dsize * 4 - 1;
-  dend2 = dend - 31;
-  doff  = ((mlib_addr) da & 7);
-
-  /* set band mask for vis_pst_8 to store the bytes needed */
-  bmask = 0xff & (0xeeee >> doff) ;
-
-  /* generate edge mask for the start point */
-  emask = vis_edge8(da, dend);
-
-  /* load 24 bytes, convert to 32 bytes */
-  s3 = sp[0];                                   /* initial value */
-  LOAD_INSERT_U8_34L;
-
-  if (doff == 0) {                              /* dst is 8-byte aligned */
-
-    if (dsize >= 8 ) {
-      vis_pst_8(dd0, dp++, emask & bmask);
-      vis_pst_8(dd1, dp++, bmask);
-      vis_pst_8(dd2, dp++, bmask);
-      vis_pst_8(dd3, dp++, bmask);
-    }
-    else {                                      /* for very small size */
-      vis_pst_8(dd0, dp++, emask & bmask);
-      if ((mlib_addr) dp <= (mlib_addr) dend)  {
-        emask = vis_edge8(dp, dend);
-        vis_pst_8(dd1, dp++, emask & bmask);
-        if ((mlib_addr) dp <= (mlib_addr) dend)  {
-          emask = vis_edge8(dp, dend);
-          vis_pst_8(dd2, dp++, emask & bmask);
-          if ((mlib_addr) dp <= (mlib_addr) dend)  {
-            emask = vis_edge8(dp, dend);
-            vis_pst_8(dd3, dp++, emask & bmask);
-          }
-        }
-      }
-    }
-
-    /* no edge handling is needed in the loop */
-    if ((mlib_addr) dp <= (mlib_addr) dend2)  {
-      n = ((mlib_u8 *)dend2 - (mlib_u8 *)dp) / 32 + 1;
-#pragma pipeloop(0)
-      for (i = 0; i < n; i++) {
-        LOAD_INSERT_U8_34L;
-        vis_pst_8(dd0, dp++, bmask);
-        vis_pst_8(dd1, dp++, bmask);
-        vis_pst_8(dd2, dp++, bmask);
-        vis_pst_8(dd3, dp++, bmask);
-      }
-    }
-
-    if ((mlib_addr) dp <= (mlib_addr) dend)  {
-      LOAD_INSERT_U8_34L;
-      emask = vis_edge8(dp, dend);
-      vis_pst_8(dd0, dp++, emask & bmask);
-      if ((mlib_addr) dp <= (mlib_addr) dend)  {
-        emask = vis_edge8(dp, dend);
-        vis_pst_8(dd1, dp++, emask & bmask);
-        if ((mlib_addr) dp <= (mlib_addr) dend)  {
-          emask = vis_edge8(dp, dend);
-          vis_pst_8(dd2, dp++, emask & bmask);
-          if ((mlib_addr) dp <= (mlib_addr) dend)  {
-            emask = vis_edge8(dp, dend);
-            vis_pst_8(dd3, dp++, emask & bmask);
-          }
-        }
-      }
-    }
-  }
-  else {                                        /* (doff != 0) */
-    vis_alignaddr((void *)0, -doff);
-
-    if (dsize >= 8 ) {
-      vis_pst_8(vis_faligndata(dd0, dd0), dp++, emask & bmask);
-      vis_pst_8(vis_faligndata(dd0, dd1), dp++, bmask);
-      vis_pst_8(vis_faligndata(dd1, dd2), dp++, bmask);
-      vis_pst_8(vis_faligndata(dd2, dd3), dp++, bmask);
-    }
-    else {                                      /* for very small size */
-      vis_pst_8(vis_faligndata(dd0, dd0), dp++, emask & bmask);
-      if ((mlib_addr) dp <= (mlib_addr) dend)  {
-        emask = vis_edge8(dp, dend);
-        vis_pst_8(vis_faligndata(dd0, dd1), dp++, emask & bmask);
-        if ((mlib_addr) dp <= (mlib_addr) dend)  {
-          emask = vis_edge8(dp, dend);
-          vis_pst_8(vis_faligndata(dd1, dd2), dp++, emask & bmask);
-          if ((mlib_addr) dp <= (mlib_addr) dend)  {
-            emask = vis_edge8(dp, dend);
-            vis_pst_8(vis_faligndata(dd2, dd3), dp++, emask & bmask);
-            if ((mlib_addr) dp <= (mlib_addr) dend)  {
-              emask = vis_edge8(dp, dend);
-              vis_pst_8(vis_faligndata(dd3, dd3), dp++, emask & bmask);
-            }
-          }
-        }
-      }
-    }
-
-    /* no edge handling is needed in the loop */
-    if ((mlib_addr) dp <= (mlib_addr) dend2)  {
-      n = ((mlib_u8 *)dend2 - (mlib_u8 *)dp) / 32 + 1;
-#pragma pipeloop(0)
-      for (i = 0; i < n; i++) {
-        LOAD_INSERT_U8_34L;
-        vis_alignaddr((void *)0, -doff);
-        vis_pst_8(vis_faligndata(dd4, dd0), dp++, bmask);
-        vis_pst_8(vis_faligndata(dd0, dd1), dp++, bmask);
-        vis_pst_8(vis_faligndata(dd1, dd2), dp++, bmask);
-        vis_pst_8(vis_faligndata(dd2, dd3), dp++, bmask);
-      }
-    }
-
-    if ((mlib_addr) dp <= (mlib_addr) dend)  {
-      LOAD_INSERT_U8_34L;
-      vis_alignaddr((void *)0, -doff);
-      emask = vis_edge8(dp, dend);
-      vis_pst_8(vis_faligndata(dd4, dd0), dp++, emask & bmask);
-      if ((mlib_addr) dp <= (mlib_addr) dend)  {
-        emask = vis_edge8(dp, dend);
-        vis_pst_8(vis_faligndata(dd0, dd1), dp++, emask & bmask);
-        if ((mlib_addr) dp <= (mlib_addr) dend)  {
-          emask = vis_edge8(dp, dend);
-          vis_pst_8(vis_faligndata(dd1, dd2), dp++, emask & bmask);
-          if ((mlib_addr) dp <= (mlib_addr) dend)  {
-            emask = vis_edge8(dp, dend);
-            vis_pst_8(vis_faligndata(dd2, dd3), dp++, emask & bmask);
-          }
-        }
-      }
-    }
-  }
-}
-
-/***************************************************************/
-void
-mlib_v_ImageChannelInsert_U8_34L(mlib_u8  *src,  mlib_s32 slb,
-                                                         mlib_u8  *dst,  mlib_s32 dlb,
-                                                         mlib_s32 xsize, mlib_s32 ysize)
-{
-  mlib_u8   *sa, *da;
-  mlib_u8   *sl, *dl;
-  int         j;
-
-  sa = sl = src;
-  da = dl = dst;
-
-#pragma pipeloop(0)
-  for (j = 0; j < ysize; j++) {
-    mlib_v_ImageChannelInsert_U8_34L_D1(sa, da, xsize);
-    sa = sl += slb;
-    da = dl += dlb;
-  }
-}
-
-/***************************************************************/
-#define INSERT_S16_34L                                                                              \
-  dd0 = sd0;                                                            /* b0g0r0b1 */        \
-  vis_alignaddr((void *)0, 6);                                                                \
-  dd1 = vis_faligndata(sd0, sd1);                       /* b1gbr1b2 */        \
-  vis_alignaddr((void *)0, 4);                                                                \
-  dd2 = vis_faligndata(sd1, sd2);                         /* b2g2r2b3 */              \
-  vis_alignaddr((void *)0, 2);                                                                \
-  dd3 = vis_faligndata(sd2, sd2);                         /* b3g3r3r2 */
-
-/***************************************************************/
-#define LOAD_INSERT_STORE_S16_34L_A8                                                      \
-  sd0 = *sp++;                                          /* b0g0r0b1 */                          \
-  sd1 = *sp++;                                          /* g1r1b2g2 */                      \
-  sd2 = *sp++;                                          /* r2b3g3r3 */                      \
-  INSERT_S16_34L                                                                                          \
-  vis_pst_16(dd0, dp++, bmask);                                                               \
-  vis_pst_16(dd1, dp++, bmask);                                                               \
-  vis_pst_16(dd2, dp++, bmask);                                                               \
-  vis_pst_16(dd3, dp++, bmask);
-
-/***************************************************************/
-#define LOAD_INSERT_S16_34L                                                                       \
-  vis_alignaddr((void *)soff, 0);                                                             \
-  s0 = s3;                                                                                                    \
-  s1 = sp[1];                                                                                               \
-  s2 = sp[2];                                                                                               \
-  s3 = sp[3];                                                                                               \
-  sd0 = vis_faligndata(s0, s1);                                                               \
-  sd1 = vis_faligndata(s1, s2);                                                               \
-  sd2 = vis_faligndata(s2, s3);                                                               \
-  sp += 3;                                                                                                    \
-  dd4 = dd3;                                                                                                \
-  INSERT_S16_34L
-
-/***************************************************************/
-/*
- * both source and destination image data are 1-d vectors and
- * 8-byte aligned.  dsize is multiple of 4.
- */
-
-void
-mlib_v_ImageChannelInsert_S16_34L_A8D1X4(mlib_s16 *src,
-                                                                 mlib_s16 *dst,
-                                                                 mlib_s32 dsize)
-{
-  mlib_d64  *sp, *dp;           /* 8-byte aligned pointer for pixel */
-  mlib_d64  sd0, sd1, sd2;      /* source data */
-  mlib_d64  dd0, dd1, dd2, dd3; /* dst data */
-  int       bmask = 0x0e;       /* channel mask */
-  int       i;
-
-  sp = (mlib_d64 *)src;
-  dp = (mlib_d64 *)dst;
-
-#pragma pipeloop(0)
-  for (i = 0; i < dsize / 4; i++) {
-    LOAD_INSERT_STORE_S16_34L_A8;
-  }
-}
-
-/***************************************************************/
-/*
- * either source or destination image data are not 1-d vectors, but
- * they are 8-byte aligned.  xsize is multiple of 4.
- */
-
-void
-mlib_v_ImageChannelInsert_S16_34L_A8D2X4(mlib_s16 *src,  mlib_s32 slb,
-                                                                 mlib_s16 *dst,  mlib_s32 dlb,
-                                                                 mlib_s32 xsize, mlib_s32 ysize)
-{
-  mlib_d64  *sp, *dp;           /* 8-byte aligned pointer for pixel */
-  mlib_d64  *sl, *dl;           /* 8-byte aligned pointer for line */
-  mlib_d64  sd0, sd1, sd2;      /* source data */
-  mlib_d64  dd0, dd1, dd2, dd3; /* dst data */
-  int       bmask = 0x0e;       /* channel mask */
-  int       i, j;               /* indices for x, y */
-
-  sp = sl = (mlib_d64 *)src;
-  dp = dl = (mlib_d64 *)dst;
-
-  /* row loop */
-  for (j = 0; j < ysize; j++) {
-    /* 4-pixel column loop */
-#pragma pipeloop(0)
-    for (i = 0; i < xsize / 4; i++) {
-      LOAD_INSERT_STORE_S16_34L_A8;
-    }
-    sp = sl = (mlib_d64 *)((mlib_u8 *)sl + slb);
-    dp = dl = (mlib_d64 *)((mlib_u8 *)dl + dlb);
-  }
-}
-
-/***************************************************************/
-/*
- * either source or destination data are not 8-byte aligned.
- */
-
-void
-mlib_v_ImageChannelInsert_S16_34L_D1(mlib_s16 *src,
-                                                             mlib_s16 *dst,
-                                                             mlib_s32 dsize)
-{
-  mlib_s16  *sa, *da;           /* pointer for pixel */
-  mlib_s16  *dend, *dend2;      /* end points in dst */
-  mlib_d64  *dp;                /* 8-byte aligned start points in dst */
-  mlib_d64  *sp;                /* 8-byte aligned start point in src */
-  mlib_d64  s0, s1, s2, s3;     /* 8-byte source raw data */
-  mlib_d64  sd0, sd1, sd2;      /* 8-byte source data */
-  mlib_d64  dd0, dd1, dd2, dd3; /* dst data */
-  mlib_d64  dd4;                /* the last datum of the last step */
-  int soff;             /* offset of address in src */
-  int doff;             /* offset of address in dst */
-  int       emask;              /* edge mask */
-  int       bmask;              /* channel mask */
-  int       i, n;
-
-  sa = src;
-  da = dst;
-
-  /* prepare the source address */
-  sp    = (mlib_d64 *) ((mlib_addr) sa & (~7));
-  soff  = ((mlib_addr) sa & 7);
-
-  /* prepare the destination addresses */
-  dp    = (mlib_d64 *)((mlib_addr) da & (~7));
-  dend  = da + dsize * 4 - 1;
-  dend2 = dend - 15;
-  doff  = ((mlib_addr) da & 7);
-
-  /* set channel mask for vis_pst_16 to store the words needed */
-  bmask = 0xff & (0xee >> (doff / 2));
-
-  /* generate edge mask for the start point */
-  emask = vis_edge16(da, dend);
-
-  /* load 24 byte, convert, store 32 bytes */
-  s3 = sp[0];                                   /* initial value */
-  LOAD_INSERT_S16_34L;
-
-  if (doff == 0) {                              /* dst is 8-byte aligned */
-
-    if (dsize >= 4 ) {
-      vis_pst_16(dd0, dp++, emask & bmask);
-      vis_pst_16(dd1, dp++, bmask);
-      vis_pst_16(dd2, dp++, bmask);
-      vis_pst_16(dd3, dp++, bmask);
-    }
-    else {                                      /* for very small size */
-      vis_pst_16(dd0, dp++, emask & bmask);
-      if ((mlib_addr) dp <= (mlib_addr) dend)  {
-        emask = vis_edge16(dp, dend);
-        vis_pst_16(dd1, dp++, emask & bmask);
-        if ((mlib_addr) dp <= (mlib_addr) dend)  {
-          emask = vis_edge16(dp, dend);
-          vis_pst_16(dd2, dp++, emask & bmask);
-        }
-      }
-    }
-
-    /* no edge handling is needed in the loop */
-    if ((mlib_addr) dp <= (mlib_addr) dend2)  {
-      n = ((mlib_u8 *)dend2 - (mlib_u8 *)dp) / 32 + 1;
-#pragma pipeloop(0)
-      for (i = 0; i < n; i++) {
-        LOAD_INSERT_S16_34L;
-        vis_pst_16(dd0, dp++, bmask);
-        vis_pst_16(dd1, dp++, bmask);
-        vis_pst_16(dd2, dp++, bmask);
-        vis_pst_16(dd3, dp++, bmask);
-      }
-    }
-
-    if ((mlib_addr) dp <= (mlib_addr) dend)  {
-      LOAD_INSERT_S16_34L;
-      emask = vis_edge16(dp, dend);
-      vis_pst_16(dd0, dp++, emask & bmask);
-      if ((mlib_addr) dp <= (mlib_addr) dend)  {
-        emask = vis_edge16(dp, dend);
-        vis_pst_16(dd1, dp++, emask & bmask);
-        if ((mlib_addr) dp <= (mlib_addr) dend)  {
-          emask = vis_edge16(dp, dend);
-          vis_pst_16(dd2, dp++, emask & bmask);
-        }
-      }
-    }
-  }
-  else {                                        /* (doff != 0) */
-    vis_alignaddr((void *)0, -doff);
-
-    if (dsize >= 4 ) {
-      vis_pst_16(vis_faligndata(dd0, dd0), dp++, emask & bmask);
-      vis_pst_16(vis_faligndata(dd0, dd1), dp++, bmask);
-      vis_pst_16(vis_faligndata(dd1, dd2), dp++, bmask);
-      vis_pst_16(vis_faligndata(dd2, dd3), dp++, bmask);
-    }
-    else {                                      /* for very small size */
-      vis_pst_16(vis_faligndata(dd0, dd0), dp++, emask & bmask);
-      if ((mlib_addr) dp <= (mlib_addr) dend)  {
-        emask = vis_edge16(dp, dend);
-        vis_pst_16(vis_faligndata(dd0, dd1), dp++, emask & bmask);
-        if ((mlib_addr) dp <= (mlib_addr) dend)  {
-          emask = vis_edge16(dp, dend);
-          vis_pst_16(vis_faligndata(dd1, dd2), dp++, emask & bmask);
-          if ((mlib_addr) dp <= (mlib_addr) dend)  {
-            emask = vis_edge16(dp, dend);
-            vis_pst_16(vis_faligndata(dd2, dd3), dp++, emask & bmask);
-          }
-        }
-      }
-    }
-
-    /* no edge handling is needed in the loop */
-    if ((mlib_addr) dp <= (mlib_addr) dend2)  {
-      n = ((mlib_u8 *)dend2 - (mlib_u8 *)dp) / 32 + 1;
-#pragma pipeloop(0)
-      for (i = 0; i < n; i++) {
-        LOAD_INSERT_S16_34L;
-        vis_alignaddr((void *)0, -doff);
-        vis_pst_16(vis_faligndata(dd4, dd0), dp++, bmask);
-        vis_pst_16(vis_faligndata(dd0, dd1), dp++, bmask);
-        vis_pst_16(vis_faligndata(dd1, dd2), dp++, bmask);
-        vis_pst_16(vis_faligndata(dd2, dd3), dp++, bmask);
-      }
-    }
-
-    if ((mlib_addr) dp <= (mlib_addr) dend)  {
-      LOAD_INSERT_S16_34L;
-      vis_alignaddr((void *)0, -doff);
-      emask = vis_edge16(dp, dend);
-      vis_pst_16(vis_faligndata(dd4, dd0), dp++, emask & bmask);
-      if ((mlib_addr) dp <= (mlib_addr) dend)  {
-        emask = vis_edge16(dp, dend);
-        vis_pst_16(vis_faligndata(dd0, dd1), dp++, emask & bmask);
-        if ((mlib_addr) dp <= (mlib_addr) dend)  {
-          emask = vis_edge16(dp, dend);
-          vis_pst_16(vis_faligndata(dd1, dd2), dp++, emask & bmask);
-          if ((mlib_addr) dp <= (mlib_addr) dend)  {
-            emask = vis_edge16(dp, dend);
-            vis_pst_16(vis_faligndata(dd2, dd3), dp++, emask & bmask);
-          }
-        }
-      }
-    }
-  }
-}
-
-/***************************************************************/
-
-void
-mlib_v_ImageChannelInsert_S16_34L(mlib_s16 *src,  mlib_s32 slb,
-                                                          mlib_s16 *dst,  mlib_s32 dlb,
-                                                          mlib_s32 xsize, mlib_s32 ysize)
-{
-  mlib_s16  *sa, *da;
-  mlib_s16  *sl, *dl;
-  int       j;
-
-  sa = sl = src;
-  da = dl = dst;
-
-#pragma pipeloop(0)
-  for (j = 0; j < ysize; j++) {
-    mlib_v_ImageChannelInsert_S16_34L_D1(sa, da, xsize);
-    sa = sl = (mlib_s16 *)((mlib_u8 *)sl + slb);
-    da = dl = (mlib_s16 *)((mlib_u8 *)dl + dlb);
-  }
-}
-
-/***************************************************************/
--- a/jdk/src/java.desktop/unix/native/libmlib_image/mlib_v_ImageConv.h	Thu May 12 11:03:07 2016 -0700
+++ b/jdk/src/java.desktop/unix/native/libmlib_image/mlib_v_ImageConv.h	Fri May 13 11:31:05 2016 +0300
@@ -34,18 +34,6 @@
 
 #if defined ( VIS ) && VIS == 0x200
 
-mlib_status mlib_conv2x2_8nw_f(mlib_image       *dst,
-                               const mlib_image *src,
-                               const mlib_s32   *kern,
-                               mlib_s32         scale,
-                               mlib_s32         cmask);
-
-mlib_status mlib_conv3x3_8nw_f(mlib_image       *dst,
-                               const mlib_image *src,
-                               const mlib_s32   *kern,
-                               mlib_s32         scale,
-                               mlib_s32         cmask);
-
 mlib_status mlib_convMxN_8nw_f(mlib_image       *dst,
                                const mlib_image *src,
                                mlib_s32         m,
@@ -58,16 +46,6 @@
 
 #else
 
-mlib_status mlib_conv2x2_8nw_f(mlib_image       *dst,
-                               const mlib_image *src,
-                               const mlib_s32   *kern,
-                               mlib_s32         scale);
-
-mlib_status mlib_conv3x3_8nw_f(mlib_image       *dst,
-                               const mlib_image *src,
-                               const mlib_s32   *kern,
-                               mlib_s32         scale);
-
 mlib_status mlib_convMxN_8nw_f(mlib_image       *dst,
                                const mlib_image *src,
                                mlib_s32         m,
--- a/jdk/src/java.desktop/unix/native/libmlib_image/mlib_v_ImageConvIndex3_8_16nw.c	Thu May 12 11:03:07 2016 -0700
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,1673 +0,0 @@
-/*
- * Copyright (c) 2000, 2003, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.  Oracle designates this
- * particular file as subject to the "Classpath" exception as provided
- * by Oracle in the LICENSE file that accompanied this code.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- */
-
-
-
-/*
- * FUNCTION
- *      Internal functions for mlib_ImageConv* on U8 type
- *      and MLIB_EDGE_DST_NO_WRITE mask
- *
- */
-
-/***************************************************************/
-
-#include <vis_proto.h>
-#include <mlib_image.h>
-#include <mlib_ImageCheck.h>
-#include <mlib_ImageColormap.h>
-
-/*
-  This defines switches between functions in
-  files: mlib_v_ImageConv_8nw.c,
-         mlib_v_ImageConvIndex3_8_16nw.c,
-         mlib_v_ImageConvIndex4_8_16nw.c,
-         mlib_v_ImageConvIndex3_8_16nw.c,
-         mlib_v_ImageConvIndex4_8_16nw.c
-*/
-
-#define CONV_INDEX
-
-#define DTYPE mlib_s16
-#define LTYPE mlib_u8
-
-/***************************************************************/
-
-#ifdef CONV_INDEX
-
-#define CONV_FUNC(KERN)                                         \
-  mlib_conv##KERN##_Index3_8_16nw(mlib_image *dst,              \
-                                  mlib_image *src,              \
-                                  mlib_s32   *kern,             \
-                                  mlib_s32   scale,             \
-                                  void       *colormap)
-
-#else
-
-#define CONV_FUNC(KERN)                         \
-  mlib_conv##KERN##_8nw_f(mlib_image *dst,      \
-                          mlib_image *src,      \
-                          mlib_s32   *kern,     \
-                          mlib_s32   scale)
-
-#endif
-
-/***************************************************************/
-
-#ifdef CONV_INDEX
-
-#define NCHAN  3
-
-#else
-
-#define NCHAN  nchan
-
-#endif
-
-/***************************************************************/
-
-#define DEF_VARS                                                \
-  DTYPE    *sl, *sp, *dl;                                       \
-  mlib_s32 hgt = mlib_ImageGetHeight(src);                      \
-  mlib_s32 wid = mlib_ImageGetWidth(src);                       \
-  mlib_s32 sll = mlib_ImageGetStride(src) / sizeof(DTYPE);      \
-  mlib_s32 dll = mlib_ImageGetStride(dst) / sizeof(DTYPE);      \
-  DTYPE    *adr_src = (DTYPE *)mlib_ImageGetData(src);          \
-  DTYPE    *adr_dst = (DTYPE *)mlib_ImageGetData(dst);          \
-  mlib_s32 ssize, xsize, dsize, esize, emask, buff_ind = 0;     \
-  mlib_d64 *pbuff, *dp;                                         \
-  mlib_f32 *karr = (mlib_f32 *)kern;                            \
-  mlib_s32 gsr_scale = (31 - scale) << 3;                       \
-  mlib_d64 drnd = vis_to_double_dup(mlib_round_8[31 - scale]);  \
-  mlib_s32 i, j, l
-
-/***************************************************************/
-
-#ifdef CONV_INDEX
-
-#define DEF_EXTRA_VARS                                                  \
-  int    offset = mlib_ImageGetLutOffset(colormap);                     \
-  LTYPE  **lut_table = (LTYPE**)mlib_ImageGetLutData(colormap);         \
-  LTYPE  *ltbl0 = lut_table[0] - offset;                                \
-  LTYPE  *ltbl1 = lut_table[1] - offset;                                \
-  LTYPE  *ltbl2 = lut_table[2] - offset;                                \
-  LTYPE  *ltbl3 = (NCHAN > 3) ? lut_table[3] - offset : ltbl2
-
-#else
-
-#define DEF_EXTRA_VARS                          \
-  mlib_s32 nchan = mlib_ImageGetChannels(dst)
-
-#endif
-
-/***************************************************************/
-
-#if NCHAN == 3
-
-#define LOAD_SRC() {                                            \
-    mlib_s32 s0 = sp[0], s1 = sp[1], s2 = sp[2], s3 = sp[3];    \
-    mlib_s32 s4 = sp[4], s5 = sp[5], s6 = sp[6], s7 = sp[7];    \
-    mlib_d64 t0, t1, t2;                                        \
-                                                                \
-    t2 = vis_faligndata(vis_ld_u8_i(ltbl2, s7), t2);            \
-    t2 = vis_faligndata(vis_ld_u8_i(ltbl1, s7), t2);            \
-    t2 = vis_faligndata(vis_ld_u8_i(ltbl0, s7), t2);            \
-    t2 = vis_faligndata(vis_ld_u8_i(ltbl2, s6), t2);            \
-    t2 = vis_faligndata(vis_ld_u8_i(ltbl1, s6), t2);            \
-    t2 = vis_faligndata(vis_ld_u8_i(ltbl0, s6), t2);            \
-    t2 = vis_faligndata(vis_ld_u8_i(ltbl2, s5), t2);            \
-    t2 = vis_faligndata(vis_ld_u8_i(ltbl1, s5), t2);            \
-    t1 = vis_faligndata(vis_ld_u8_i(ltbl0, s5), t1);            \
-    t1 = vis_faligndata(vis_ld_u8_i(ltbl2, s4), t1);            \
-    t1 = vis_faligndata(vis_ld_u8_i(ltbl1, s4), t1);            \
-    t1 = vis_faligndata(vis_ld_u8_i(ltbl0, s4), t1);            \
-    t1 = vis_faligndata(vis_ld_u8_i(ltbl2, s3), t1);            \
-    t1 = vis_faligndata(vis_ld_u8_i(ltbl1, s3), t1);            \
-    t1 = vis_faligndata(vis_ld_u8_i(ltbl0, s3), t1);            \
-    t1 = vis_faligndata(vis_ld_u8_i(ltbl2, s2), t1);            \
-    t0 = vis_faligndata(vis_ld_u8_i(ltbl1, s2), t0);            \
-    t0 = vis_faligndata(vis_ld_u8_i(ltbl0, s2), t0);            \
-    t0 = vis_faligndata(vis_ld_u8_i(ltbl2, s1), t0);            \
-    t0 = vis_faligndata(vis_ld_u8_i(ltbl1, s1), t0);            \
-    t0 = vis_faligndata(vis_ld_u8_i(ltbl0, s1), t0);            \
-    t0 = vis_faligndata(vis_ld_u8_i(ltbl2, s0), t0);            \
-    t0 = vis_faligndata(vis_ld_u8_i(ltbl1, s0), t0);            \
-    t0 = vis_faligndata(vis_ld_u8_i(ltbl0, s0), t0);            \
-                                                                \
-    buffn[i] = t0;                                              \
-    buffn[i + 1] = t1;                                          \
-    buffn[i + 2] = t2;                                          \
-                                                                \
-    sp += 8;                                                    \
-  }
-
-#else
-
-#define LOAD_SRC() {                                            \
-    mlib_s32 s0 = sp[0], s1 = sp[1], s2 = sp[2], s3 = sp[3];    \
-    mlib_s32 s4 = sp[4], s5 = sp[5], s6 = sp[6], s7 = sp[7];    \
-    mlib_d64 t0, t1, t2;                                        \
-                                                                \
-    t2 = vis_faligndata(vis_ld_u8_i(ltbl3, s5), t2);            \
-    t2 = vis_faligndata(vis_ld_u8_i(ltbl2, s5), t2);            \
-    t2 = vis_faligndata(vis_ld_u8_i(ltbl1, s5), t2);            \
-    t2 = vis_faligndata(vis_ld_u8_i(ltbl0, s5), t2);            \
-    t2 = vis_faligndata(vis_ld_u8_i(ltbl3, s4), t2);            \
-    t2 = vis_faligndata(vis_ld_u8_i(ltbl2, s4), t2);            \
-    t2 = vis_faligndata(vis_ld_u8_i(ltbl1, s4), t2);            \
-    t2 = vis_faligndata(vis_ld_u8_i(ltbl0, s4), t2);            \
-    t1 = vis_faligndata(vis_ld_u8_i(ltbl3, s3), t1);            \
-    t1 = vis_faligndata(vis_ld_u8_i(ltbl2, s3), t1);            \
-    t1 = vis_faligndata(vis_ld_u8_i(ltbl1, s3), t1);            \
-    t1 = vis_faligndata(vis_ld_u8_i(ltbl0, s3), t1);            \
-    t1 = vis_faligndata(vis_ld_u8_i(ltbl3, s2), t1);            \
-    t1 = vis_faligndata(vis_ld_u8_i(ltbl2, s2), t1);            \
-    t1 = vis_faligndata(vis_ld_u8_i(ltbl1, s2), t1);            \
-    t1 = vis_faligndata(vis_ld_u8_i(ltbl0, s2), t1);            \
-    t0 = vis_faligndata(vis_ld_u8_i(ltbl3, s1), t0);            \
-    t0 = vis_faligndata(vis_ld_u8_i(ltbl2, s1), t0);            \
-    t0 = vis_faligndata(vis_ld_u8_i(ltbl1, s1), t0);            \
-    t0 = vis_faligndata(vis_ld_u8_i(ltbl0, s1), t0);            \
-    t0 = vis_faligndata(vis_ld_u8_i(ltbl3, s0), t0);            \
-    t0 = vis_faligndata(vis_ld_u8_i(ltbl2, s0), t0);            \
-    t0 = vis_faligndata(vis_ld_u8_i(ltbl1, s0), t0);            \
-    t0 = vis_faligndata(vis_ld_u8_i(ltbl0, s0), t0);            \
-                                                                \
-    buffn[i] = t0;                                              \
-    buffn[i + 1] = t1;                                          \
-    buffn[i + 2] = t2;                                          \
-                                                                \
-    sp += 6;                                                    \
-  }
-
-#endif
-
-/***************************************************************/
-
-static mlib_s32 mlib_round_8[16] = { 0x00400040, 0x00200020, 0x00100010, 0x00080008,
-                                    0x00040004, 0x00020002, 0x00010001, 0x00000000,
-                                    0x00000000, 0x00000000, 0x00000000, 0x00000000,
-                                    0x00000000, 0x00000000, 0x00000000, 0x00000000 };
-
-/***************************************************************/
-
-void mlib_ImageCopy_na(mlib_u8 *sa, mlib_u8 *da, int size);
-
-/***************************************************************/
-
-#define KSIZE  2
-
-mlib_status CONV_FUNC(2x2)
-{
-  mlib_d64 *buffs[2*(KSIZE + 1)];
-  mlib_d64 *buff0, *buff1, *buffn, *buffd, *buffe;
-  mlib_d64 s00, s01, s10, s11, s0, s1;
-  mlib_d64 d0, d1, d00, d01, d10, d11;
-  DEF_VARS;
-  DEF_EXTRA_VARS;
-
-  sl = adr_src;
-  dl = adr_dst;
-
-  ssize = NCHAN*wid;
-  dsize = (ssize + 7)/8;
-  esize = dsize + 4;
-  pbuff = mlib_malloc((KSIZE + 4)*esize*sizeof(mlib_d64));
-  if (pbuff == NULL) return MLIB_FAILURE;
-
-  for (i = 0; i < (KSIZE + 1); i++) buffs[i] = pbuff + i*esize;
-  for (i = 0; i < (KSIZE + 1); i++) buffs[(KSIZE + 1) + i] = buffs[i];
-  buffd = buffs[KSIZE] + esize;
-  buffe = buffd + 2*esize;
-
-  wid -= (KSIZE - 1);
-  hgt -= (KSIZE - 1);
-  xsize = ssize - NCHAN*(KSIZE - 1);
-  emask = (0xFF00 >> (xsize & 7)) & 0xFF;
-
-  vis_write_gsr(gsr_scale + 7);
-
-  for (l = 0; l < KSIZE; l++) {
-    mlib_d64 *buffn = buffs[l];
-    sp = sl + l*sll;
-
-#ifndef CONV_INDEX
-    if ((mlib_addr)sp & 7) mlib_ImageCopy_na((void*)sp, (void*)buffn, ssize);
-
-#else
-#pragma pipeloop(0)
-    for (i = 0; i < dsize; i += 3) {
-      LOAD_SRC();
-    }
-#endif /* CONV_INDEX */
-  }
-
-  for (j = 0; j < hgt; j++) {
-    mlib_d64 **buffc = buffs + buff_ind;
-    mlib_f32 *pk = karr, k0, k1;
-    sp = sl + KSIZE*sll;
-
-    buff0 = buffc[0];
-    buff1 = buffc[1];
-    buffn = buffc[KSIZE];
-
-#ifndef CONV_INDEX
-    if ((((mlib_addr)(sl      )) & 7) == 0) buff0 = (mlib_d64*)sl;
-    if ((((mlib_addr)(sl + sll)) & 7) == 0) buff1 = (mlib_d64*)(sl + sll);
-    if ((mlib_addr)sp & 7) mlib_ImageCopy_na((void*)sp, (void*)buffn, ssize);
-#endif
-
-    k0 = pk[1];
-    k1 = pk[3];
-    vis_write_gsr(gsr_scale + NCHAN);
-
-    s01 = buff0[0];
-    s11 = buff1[0];
-#pragma pipeloop(0)
-    for (i = 0; i < (xsize + 7)/8; i++) {
-      s00 = s01;
-      s10 = s11;
-      s01 = buff0[i + 1];
-      s11 = buff1[i + 1];
-      s0  = vis_faligndata(s00, s01);
-      s1  = vis_faligndata(s10, s11);
-
-      d00 = vis_fmul8x16au(vis_read_hi(s0), k0);
-      d01 = vis_fmul8x16au(vis_read_lo(s0), k0);
-      d10 = vis_fmul8x16au(vis_read_hi(s1), k1);
-      d11 = vis_fmul8x16au(vis_read_lo(s1), k1);
-
-      d0 = vis_fpadd16(d00, d10);
-      d1 = vis_fpadd16(d01, d11);
-      buffd[2*i] = d0;
-      buffd[2*i + 1] = d1;
-    }
-
-    k0 = pk[0];
-    k1 = pk[2];
-#ifndef CONV_INDEX
-    dp = ((mlib_addr)dl & 7) ? buffe : (mlib_d64*)dl;
-
-#pragma pipeloop(0)
-    for (i = 0; i < xsize/8; i++) {
-      s0 = buff0[i];
-      s1 = buff1[i];
-
-      d00 = vis_fmul8x16au(vis_read_hi(s0), k0);
-      d01 = vis_fmul8x16au(vis_read_lo(s0), k0);
-      d10 = vis_fmul8x16au(vis_read_hi(s1), k1);
-      d11 = vis_fmul8x16au(vis_read_lo(s1), k1);
-
-      d0 = buffd[2*i];
-      d1 = buffd[2*i + 1];
-      d00 = vis_fpadd16(d00, d10);
-      d0  = vis_fpadd16(d0, drnd);
-      d0  = vis_fpadd16(d0, d00);
-      d01 = vis_fpadd16(d01, d11);
-      d1  = vis_fpadd16(d1, drnd);
-      d1  = vis_fpadd16(d1, d01);
-      dp[i] = vis_fpack16_pair(d0, d1);
-    }
-
-    if (emask) {
-      s0 = buff0[i];
-      s1 = buff1[i];
-
-      d00 = vis_fmul8x16au(vis_read_hi(s0), k0);
-      d01 = vis_fmul8x16au(vis_read_lo(s0), k0);
-      d10 = vis_fmul8x16au(vis_read_hi(s1), k1);
-      d11 = vis_fmul8x16au(vis_read_lo(s1), k1);
-
-      d0 = buffd[2*i];
-      d1 = buffd[2*i + 1];
-      d00 = vis_fpadd16(d00, d10);
-      d0  = vis_fpadd16(d0, drnd);
-      d0  = vis_fpadd16(d0, d00);
-      d01 = vis_fpadd16(d01, d11);
-      d1  = vis_fpadd16(d1, drnd);
-      d1  = vis_fpadd16(d1, d01);
-
-      d0 = vis_fpack16_pair(d0, d1);
-      vis_pst_8(d0, dp + i, emask);
-    }
-
-    if ((mlib_u8*)dp != dl) mlib_ImageCopy_na((void*)buffe, dl, xsize);
-
-#else
-    vis_write_gsr(gsr_scale + 7);
-
-#pragma pipeloop(0)
-    for (i = 0; i < dsize; i += 3) {
-      mlib_d64 d00, d01, d02, d03, d04, d05;
-      mlib_d64 d10, d11, d12, d13, d14, d15;
-      mlib_d64 d0, d1, d2, d3, d4, d5;
-      mlib_d64 s00 = buff0[i];
-      mlib_d64 s01 = buff0[i + 1];
-      mlib_d64 s02 = buff0[i + 2];
-      mlib_d64 s10 = buff1[i];
-      mlib_d64 s11 = buff1[i + 1];
-      mlib_d64 s12 = buff1[i + 2];
-
-      d00 = vis_fmul8x16au(vis_read_hi(s00), k0);
-      d01 = vis_fmul8x16au(vis_read_lo(s00), k0);
-      d02 = vis_fmul8x16au(vis_read_hi(s01), k0);
-      d03 = vis_fmul8x16au(vis_read_lo(s01), k0);
-      d04 = vis_fmul8x16au(vis_read_hi(s02), k0);
-      d05 = vis_fmul8x16au(vis_read_lo(s02), k0);
-      d10 = vis_fmul8x16au(vis_read_hi(s10), k1);
-      d11 = vis_fmul8x16au(vis_read_lo(s10), k1);
-      d12 = vis_fmul8x16au(vis_read_hi(s11), k1);
-      d13 = vis_fmul8x16au(vis_read_lo(s11), k1);
-      d14 = vis_fmul8x16au(vis_read_hi(s12), k1);
-      d15 = vis_fmul8x16au(vis_read_lo(s12), k1);
-
-      d0 = buffd[2*i];
-      d1 = buffd[2*i + 1];
-      d2 = buffd[2*i + 2];
-      d3 = buffd[2*i + 3];
-      d4 = buffd[2*i + 4];
-      d5 = buffd[2*i + 5];
-      d00 = vis_fpadd16(d00, d10);
-      d0  = vis_fpadd16(d0, drnd);
-      d0  = vis_fpadd16(d0, d00);
-      d01 = vis_fpadd16(d01, d11);
-      d1  = vis_fpadd16(d1, drnd);
-      d1  = vis_fpadd16(d1, d01);
-      d02 = vis_fpadd16(d02, d12);
-      d2  = vis_fpadd16(d2, drnd);
-      d2  = vis_fpadd16(d2, d02);
-      d03 = vis_fpadd16(d03, d13);
-      d3  = vis_fpadd16(d3, drnd);
-      d3  = vis_fpadd16(d3, d03);
-      d04 = vis_fpadd16(d04, d14);
-      d4  = vis_fpadd16(d4, drnd);
-      d4  = vis_fpadd16(d4, d04);
-      d05 = vis_fpadd16(d05, d15);
-      d5  = vis_fpadd16(d5, drnd);
-      d5  = vis_fpadd16(d5, d05);
-
-      buffe[i    ] = vis_fpack16_pair(d0, d1);
-      buffe[i + 1] = vis_fpack16_pair(d2, d3);
-      buffe[i + 2] = vis_fpack16_pair(d4, d5);
-
-      LOAD_SRC();
-    }
-
-    mlib_ImageColorTrue2IndexLine_U8_S16_3((void*)buffe, dl, wid, colormap);
-#endif /* CONV_INDEX */
-
-    sl += sll;
-    dl += dll;
-
-    buff_ind++;
-    if (buff_ind >= (KSIZE + 1)) buff_ind = 0;
-  }
-
-  mlib_free(pbuff);
-
-  return MLIB_SUCCESS;
-}
-
-/***************************************************************/
-
-#undef  KSIZE
-#define KSIZE  3
-
-mlib_status CONV_FUNC(3x3)
-{
-  mlib_d64 *buffs[2*(KSIZE + 1)];
-  mlib_d64 *buff0, *buff1, *buff2, *buffn, *buffd, *buffe;
-  mlib_d64 s00, s01, s10, s11, s20, s21, s0, s1, s2;
-  mlib_d64 dd, d0, d1, d00, d01, d10, d11, d20, d21;
-  mlib_s32 ik, ik_last, off, doff;
-  DEF_VARS;
-  DEF_EXTRA_VARS;
-
-  sl = adr_src;
-#ifdef CONV_INDEX
-  dl = adr_dst + ((KSIZE - 1)/2)*(dll + 1);
-#else
-  dl = adr_dst + ((KSIZE - 1)/2)*(dll + NCHAN);
-#endif
-
-  ssize = NCHAN*wid;
-  dsize = (ssize + 7)/8;
-  esize = dsize + 4;
-  pbuff = mlib_malloc((KSIZE + 4)*esize*sizeof(mlib_d64));
-  if (pbuff == NULL) return MLIB_FAILURE;
-
-  for (i = 0; i < (KSIZE + 1); i++) buffs[i] = pbuff + i*esize;
-  for (i = 0; i < (KSIZE + 1); i++) buffs[(KSIZE + 1) + i] = buffs[i];
-  buffd = buffs[KSIZE] + esize;
-  buffe = buffd + 2*esize;
-
-  wid -= (KSIZE - 1);
-  hgt -= (KSIZE - 1);
-  xsize = ssize - NCHAN*(KSIZE - 1);
-  emask = (0xFF00 >> (xsize & 7)) & 0xFF;
-
-  vis_write_gsr(gsr_scale + 7);
-
-  for (l = 0; l < KSIZE; l++) {
-    mlib_d64 *buffn = buffs[l];
-    sp = sl + l*sll;
-
-#ifndef CONV_INDEX
-    if ((mlib_addr)sp & 7) mlib_ImageCopy_na((void*)sp, (void*)buffn, ssize);
-#else
-#pragma pipeloop(0)
-    for (i = 0; i < dsize; i += 3) {
-      LOAD_SRC();
-    }
-#endif /* CONV_INDEX */
-  }
-
-  /* init buffer */
-#pragma pipeloop(0)
-  for (i = 0; i < (xsize + 7)/8; i++) {
-    buffd[2*i    ] = drnd;
-    buffd[2*i + 1] = drnd;
-  }
-
-  for (j = 0; j < hgt; j++) {
-    mlib_d64 **buffc = buffs + buff_ind, *pbuff0, *pbuff1, *pbuff2;
-    mlib_f32 *pk = karr, k0, k1, k2;
-    sp = sl + KSIZE*sll;
-
-    pbuff0 = buffc[0];
-    pbuff1 = buffc[1];
-    pbuff2 = buffc[2];
-    buffn  = buffc[KSIZE];
-
-#ifndef CONV_INDEX
-    if ((((mlib_addr)(sl        )) & 7) == 0) pbuff0 = (mlib_d64*)sl;
-    if ((((mlib_addr)(sl +   sll)) & 7) == 0) pbuff1 = (mlib_d64*)(sl + sll);
-    if ((((mlib_addr)(sl + 2*sll)) & 7) == 0) pbuff2 = (mlib_d64*)(sl + 2*sll);
-
-    if ((mlib_addr)sp & 7) mlib_ImageCopy_na((void*)sp, (void*)buffn, ssize);
-#endif
-
-#ifdef CONV_INDEX
-    ik_last = 0;
-#else
-    ik_last = (KSIZE - 1);
-#endif
-
-    for (ik = 0; ik < KSIZE; ik++) {
-      k0 = pk[ik];
-      k1 = pk[ik + KSIZE];
-      k2 = pk[ik + 2*KSIZE];
-
-      off  = ik*NCHAN;
-      doff = off/8;
-      off &= 7;
-      buff0 = pbuff0 + doff;
-      buff1 = pbuff1 + doff;
-      buff2 = pbuff2 + doff;
-      vis_write_gsr(gsr_scale + off);
-
-      if (ik == ik_last) continue;
-      /*if (!ik_last) {
-        if ((off & 3) || (ik == (KSIZE - 1))) {
-          ik_last = ik;
-          continue;
-        }
-      }*/
-
-      if (off == 0) {
-#pragma pipeloop(0)
-        for (i = 0; i < (xsize + 7)/8; i++) {
-          s0 = buff0[i];
-          s1 = buff1[i];
-          s2 = buff2[i];
-
-          d00 = vis_fmul8x16au(vis_read_hi(s0), k0);
-          d01 = vis_fmul8x16au(vis_read_lo(s0), k0);
-          d10 = vis_fmul8x16au(vis_read_hi(s1), k1);
-          d11 = vis_fmul8x16au(vis_read_lo(s1), k1);
-          d20 = vis_fmul8x16au(vis_read_hi(s2), k2);
-          d21 = vis_fmul8x16au(vis_read_lo(s2), k2);
-
-          d0 = buffd[2*i];
-          d1 = buffd[2*i + 1];
-          d0 = vis_fpadd16(d00, d0);
-          d0 = vis_fpadd16(d10, d0);
-          d0 = vis_fpadd16(d20, d0);
-          d1 = vis_fpadd16(d01, d1);
-          d1 = vis_fpadd16(d11, d1);
-          d1 = vis_fpadd16(d21, d1);
-          buffd[2*i] = d0;
-          buffd[2*i + 1] = d1;
-        }
-
-      } else if (off == 4) {
-        s01 = buff0[0];
-        s11 = buff1[0];
-        s21 = buff2[0];
-#pragma pipeloop(0)
-        for (i = 0; i < (xsize + 7)/8; i++) {
-          s00 = s01;
-          s10 = s11;
-          s20 = s21;
-          s01 = buff0[i + 1];
-          s11 = buff1[i + 1];
-          s21 = buff2[i + 1];
-
-          d00 = vis_fmul8x16au(vis_read_lo(s00), k0);
-          d01 = vis_fmul8x16au(vis_read_hi(s01), k0);
-          d10 = vis_fmul8x16au(vis_read_lo(s10), k1);
-          d11 = vis_fmul8x16au(vis_read_hi(s11), k1);
-          d20 = vis_fmul8x16au(vis_read_lo(s20), k2);
-          d21 = vis_fmul8x16au(vis_read_hi(s21), k2);
-
-          d0 = buffd[2*i];
-          d1 = buffd[2*i + 1];
-          d0 = vis_fpadd16(d00, d0);
-          d0 = vis_fpadd16(d10, d0);
-          d0 = vis_fpadd16(d20, d0);
-          d1 = vis_fpadd16(d01, d1);
-          d1 = vis_fpadd16(d11, d1);
-          d1 = vis_fpadd16(d21, d1);
-          buffd[2*i] = d0;
-          buffd[2*i + 1] = d1;
-        }
-
-      } else {
-        s01 = buff0[0];
-        s11 = buff1[0];
-        s21 = buff2[0];
-#pragma pipeloop(0)
-        for (i = 0; i < (xsize + 7)/8; i++) {
-          s00 = s01;
-          s10 = s11;
-          s20 = s21;
-          s01 = buff0[i + 1];
-          s11 = buff1[i + 1];
-          s21 = buff2[i + 1];
-          s0  = vis_faligndata(s00, s01);
-          s1  = vis_faligndata(s10, s11);
-          s2  = vis_faligndata(s20, s21);
-
-          d00 = vis_fmul8x16au(vis_read_hi(s0), k0);
-          d01 = vis_fmul8x16au(vis_read_lo(s0), k0);
-          d10 = vis_fmul8x16au(vis_read_hi(s1), k1);
-          d11 = vis_fmul8x16au(vis_read_lo(s1), k1);
-          d20 = vis_fmul8x16au(vis_read_hi(s2), k2);
-          d21 = vis_fmul8x16au(vis_read_lo(s2), k2);
-
-          d0 = buffd[2*i];
-          d1 = buffd[2*i + 1];
-          d0 = vis_fpadd16(d00, d0);
-          d0 = vis_fpadd16(d10, d0);
-          d0 = vis_fpadd16(d20, d0);
-          d1 = vis_fpadd16(d01, d1);
-          d1 = vis_fpadd16(d11, d1);
-          d1 = vis_fpadd16(d21, d1);
-          buffd[2*i] = d0;
-          buffd[2*i + 1] = d1;
-        }
-      }
-    }
-
-    k0 = pk[ik_last];
-    k1 = pk[ik_last + KSIZE];
-    k2 = pk[ik_last + 2*KSIZE];
-
-    off  = ik_last*NCHAN;
-    doff = off/8;
-    off &= 7;
-    buff0 = pbuff0 + doff;
-    buff1 = pbuff1 + doff;
-    buff2 = pbuff2 + doff;
-    vis_write_gsr(gsr_scale + off);
-
-#ifndef CONV_INDEX
-    dp = ((mlib_addr)dl & 7) ? buffe : (mlib_d64*)dl;
-
-    s01 = buff0[0];
-    s11 = buff1[0];
-    s21 = buff2[0];
-#pragma pipeloop(0)
-    for (i = 0; i < xsize/8; i++) {
-      s00 = s01;
-      s10 = s11;
-      s20 = s21;
-      s01 = buff0[i + 1];
-      s11 = buff1[i + 1];
-      s21 = buff2[i + 1];
-      s0  = vis_faligndata(s00, s01);
-      s1  = vis_faligndata(s10, s11);
-      s2  = vis_faligndata(s20, s21);
-
-      d00 = vis_fmul8x16au(vis_read_hi(s0), k0);
-      d01 = vis_fmul8x16au(vis_read_lo(s0), k0);
-      d10 = vis_fmul8x16au(vis_read_hi(s1), k1);
-      d11 = vis_fmul8x16au(vis_read_lo(s1), k1);
-      d20 = vis_fmul8x16au(vis_read_hi(s2), k2);
-      d21 = vis_fmul8x16au(vis_read_lo(s2), k2);
-
-      d0 = buffd[2*i];
-      d1 = buffd[2*i + 1];
-      d0 = vis_fpadd16(d0, d00);
-      d0 = vis_fpadd16(d0, d10);
-      d0 = vis_fpadd16(d0, d20);
-      d1 = vis_fpadd16(d1, d01);
-      d1 = vis_fpadd16(d1, d11);
-      d1 = vis_fpadd16(d1, d21);
-
-      dd = vis_fpack16_pair(d0, d1);
-      dp[i] = dd;
-
-      buffd[2*i    ] = drnd;
-      buffd[2*i + 1] = drnd;
-    }
-
-    if (emask) {
-      s00 = s01;
-      s10 = s11;
-      s20 = s21;
-      s01 = buff0[i + 1];
-      s11 = buff1[i + 1];
-      s21 = buff2[i + 1];
-      s0  = vis_faligndata(s00, s01);
-      s1  = vis_faligndata(s10, s11);
-      s2  = vis_faligndata(s20, s21);
-
-      d00 = vis_fmul8x16au(vis_read_hi(s0), k0);
-      d01 = vis_fmul8x16au(vis_read_lo(s0), k0);
-      d10 = vis_fmul8x16au(vis_read_hi(s1), k1);
-      d11 = vis_fmul8x16au(vis_read_lo(s1), k1);
-      d20 = vis_fmul8x16au(vis_read_hi(s2), k2);
-      d21 = vis_fmul8x16au(vis_read_lo(s2), k2);
-
-      d0 = buffd[2*i];
-      d1 = buffd[2*i + 1];
-      d0 = vis_fpadd16(d0, d00);
-      d0 = vis_fpadd16(d0, d10);
-      d0 = vis_fpadd16(d0, d20);
-      d1 = vis_fpadd16(d1, d01);
-      d1 = vis_fpadd16(d1, d11);
-      d1 = vis_fpadd16(d1, d21);
-
-      dd = vis_fpack16_pair(d0, d1);
-      vis_pst_8(dd, dp + i, emask);
-
-      buffd[2*i    ] = drnd;
-      buffd[2*i + 1] = drnd;
-    }
-
-    if ((mlib_u8*)dp != dl) mlib_ImageCopy_na((void*)buffe, dl, xsize);
-
-#else
-    vis_write_gsr(gsr_scale + 7);
-
-#pragma pipeloop(0)
-    for (i = 0; i < dsize; i += 3) {
-      mlib_d64 d00, d01, d02, d03, d04, d05;
-      mlib_d64 d10, d11, d12, d13, d14, d15;
-      mlib_d64 d20, d21, d22, d23, d24, d25;
-      mlib_d64 d0, d1, d2, d3, d4, d5;
-      mlib_d64 s00 = buff0[i];
-      mlib_d64 s01 = buff0[i + 1];
-      mlib_d64 s02 = buff0[i + 2];
-      mlib_d64 s10 = buff1[i];
-      mlib_d64 s11 = buff1[i + 1];
-      mlib_d64 s12 = buff1[i + 2];
-      mlib_d64 s20 = buff2[i];
-      mlib_d64 s21 = buff2[i + 1];
-      mlib_d64 s22 = buff2[i + 2];
-
-      d00 = vis_fmul8x16au(vis_read_hi(s00), k0);
-      d01 = vis_fmul8x16au(vis_read_lo(s00), k0);
-      d02 = vis_fmul8x16au(vis_read_hi(s01), k0);
-      d03 = vis_fmul8x16au(vis_read_lo(s01), k0);
-      d04 = vis_fmul8x16au(vis_read_hi(s02), k0);
-      d05 = vis_fmul8x16au(vis_read_lo(s02), k0);
-      d10 = vis_fmul8x16au(vis_read_hi(s10), k1);
-      d11 = vis_fmul8x16au(vis_read_lo(s10), k1);
-      d12 = vis_fmul8x16au(vis_read_hi(s11), k1);
-      d13 = vis_fmul8x16au(vis_read_lo(s11), k1);
-      d14 = vis_fmul8x16au(vis_read_hi(s12), k1);
-      d15 = vis_fmul8x16au(vis_read_lo(s12), k1);
-      d20 = vis_fmul8x16au(vis_read_hi(s20), k2);
-      d21 = vis_fmul8x16au(vis_read_lo(s20), k2);
-      d22 = vis_fmul8x16au(vis_read_hi(s21), k2);
-      d23 = vis_fmul8x16au(vis_read_lo(s21), k2);
-      d24 = vis_fmul8x16au(vis_read_hi(s22), k2);
-      d25 = vis_fmul8x16au(vis_read_lo(s22), k2);
-
-      d0 = buffd[2*i];
-      d1 = buffd[2*i + 1];
-      d2 = buffd[2*i + 2];
-      d3 = buffd[2*i + 3];
-      d4 = buffd[2*i + 4];
-      d5 = buffd[2*i + 5];
-      d0 = vis_fpadd16(d0, d00);
-      d0 = vis_fpadd16(d0, d10);
-      d0 = vis_fpadd16(d0, d20);
-      d1 = vis_fpadd16(d1, d01);
-      d1 = vis_fpadd16(d1, d11);
-      d1 = vis_fpadd16(d1, d21);
-      d2 = vis_fpadd16(d2, d02);
-      d2 = vis_fpadd16(d2, d12);
-      d2 = vis_fpadd16(d2, d22);
-      d3 = vis_fpadd16(d3, d03);
-      d3 = vis_fpadd16(d3, d13);
-      d3 = vis_fpadd16(d3, d23);
-      d4 = vis_fpadd16(d4, d04);
-      d4 = vis_fpadd16(d4, d14);
-      d4 = vis_fpadd16(d4, d24);
-      d5 = vis_fpadd16(d5, d05);
-      d5 = vis_fpadd16(d5, d15);
-      d5 = vis_fpadd16(d5, d25);
-
-      buffe[i    ] = vis_fpack16_pair(d0, d1);
-      buffe[i + 1] = vis_fpack16_pair(d2, d3);
-      buffe[i + 2] = vis_fpack16_pair(d4, d5);
-
-      buffd[2*i    ] = drnd;
-      buffd[2*i + 1] = drnd;
-      buffd[2*i + 2] = drnd;
-      buffd[2*i + 3] = drnd;
-      buffd[2*i + 4] = drnd;
-      buffd[2*i + 5] = drnd;
-
-      LOAD_SRC();
-    }
-
-    mlib_ImageColorTrue2IndexLine_U8_S16_3((void*)buffe, dl, wid, colormap);
-#endif /* CONV_INDEX */
-
-    sl += sll;
-    dl += dll;
-
-    buff_ind++;
-    if (buff_ind >= (KSIZE + 1)) buff_ind = 0;
-  }
-
-  mlib_free(pbuff);
-
-  return MLIB_SUCCESS;
-}
-
-/***************************************************************/
-
-#undef  KSIZE
-#define MAX_N   11
-
-#ifdef CONV_INDEX
-
-mlib_status mlib_convMxN_Index3_8_16nw(mlib_image *dst,
-                                       mlib_image *src,
-                                       mlib_s32   m,
-                                       mlib_s32   n,
-                                       mlib_s32   dm,
-                                       mlib_s32   dn,
-                                       mlib_s32   *kern,
-                                       mlib_s32   scale,
-                                       void       *colormap)
-
-#else
-
-mlib_status mlib_convMxN_8nw_f(mlib_image *dst,
-                               mlib_image *src,
-                               mlib_s32   m,
-                               mlib_s32   n,
-                               mlib_s32   dm,
-                               mlib_s32   dn,
-                               mlib_s32   *kern,
-                               mlib_s32   scale)
-
-#endif
-{
-  mlib_d64 *buffs_local[3*(MAX_N + 1)], **buffs = buffs_local, **buff;
-  mlib_d64 *buff0, *buff1, *buff2, *buff3, *buffn, *buffd, *buffe;
-  mlib_d64 s00, s01, s10, s11, s20, s21, s30, s31, s0, s1, s2, s3;
-  mlib_d64 d00, d01, d10, d11, d20, d21, d30, d31;
-  mlib_d64 dd, d0, d1;
-  mlib_s32 ik, jk, ik_last, jk_size, coff, off, doff;
-  DEF_VARS;
-  DEF_EXTRA_VARS;
-
-  if (n > MAX_N) {
-    buffs = mlib_malloc(3*(n + 1)*sizeof(mlib_d64*));
-    if (buffs == NULL) return MLIB_FAILURE;
-  }
-
-  buff = buffs + 2*(n + 1);
-
-  sl = adr_src;
-#ifdef CONV_INDEX
-  dl = adr_dst + dn*dll + dm;
-#else
-  dl = adr_dst + dn*dll + dm*NCHAN;
-#endif
-
-  ssize = NCHAN*wid;
-  dsize = (ssize + 7)/8;
-  esize = dsize + 4;
-  pbuff = mlib_malloc((n + 4)*esize*sizeof(mlib_d64));
-  if (pbuff == NULL) {
-    if (buffs != buffs_local) mlib_free(buffs);
-    return MLIB_FAILURE;
-  }
-
-  for (i = 0; i < (n + 1); i++) buffs[i] = pbuff + i*esize;
-  for (i = 0; i < (n + 1); i++) buffs[(n + 1) + i] = buffs[i];
-  buffd = buffs[n] + esize;
-  buffe = buffd + 2*esize;
-
-  wid -= (m - 1);
-  hgt -= (n - 1);
-  xsize = ssize - NCHAN*(m - 1);
-  emask = (0xFF00 >> (xsize & 7)) & 0xFF;
-
-  vis_write_gsr(gsr_scale + 7);
-
-  for (l = 0; l < n; l++) {
-    mlib_d64 *buffn = buffs[l];
-    sp = sl + l*sll;
-
-#ifndef CONV_INDEX
-    if ((mlib_addr)sp & 7) mlib_ImageCopy_na((void*)sp, (void*)buffn, ssize);
-#else
-#pragma pipeloop(0)
-    for (i = 0; i < dsize; i += 3) {
-      LOAD_SRC();
-    }
-#endif /* CONV_INDEX */
-  }
-
-  /* init buffer */
-#pragma pipeloop(0)
-  for (i = 0; i < (xsize + 7)/8; i++) {
-    buffd[2*i    ] = drnd;
-    buffd[2*i + 1] = drnd;
-  }
-
-  for (j = 0; j < hgt; j++) {
-    mlib_d64 **buffc = buffs + buff_ind;
-    mlib_f32 *pk = karr, k0, k1, k2, k3;
-    sp = sl + n*sll;
-
-    for (l = 0; l < n; l++) {
-      buff[l] = buffc[l];
-    }
-    buffn  = buffc[n];
-
-#ifndef CONV_INDEX
-    for (l = 0; l < n; l++) {
-      if ((((mlib_addr)(sl + l*sll)) & 7) == 0) buff[l] = (mlib_d64*)(sl + l*sll);
-    }
-    if ((mlib_addr)sp & 7) mlib_ImageCopy_na((void*)sp, (void*)buffn, ssize);
-#endif
-
-#ifdef CONV_INDEX
-    ik_last = 0;
-#else
-    ik_last = (m - 1);
-#endif
-
-    for (jk = 0; jk < n; jk += jk_size) {
-      jk_size = n - jk;
-#ifdef CONV_INDEX
-      if (jk_size >= 5) jk_size = 3;
-      if (jk_size == 4) jk_size = 2;
-#else
-      if (jk_size >= 6) jk_size = 4;
-      if (jk_size == 5) jk_size = 3;
-#endif
-      coff = 0;
-
-      if (jk_size == 2) {
-
-        for (ik = 0; ik < m; ik++, coff += NCHAN) {
-          if (!jk && ik == ik_last) continue;
-
-          k0 = pk[ik];
-          k1 = pk[ik + m];
-
-          doff  = coff/8;
-          buff0 = buff[jk    ] + doff;
-          buff1 = buff[jk + 1] + doff;
-
-          off = coff & 7;
-          vis_write_gsr(gsr_scale + off);
-
-          s01 = buff0[0];
-          s11 = buff1[0];
-#pragma pipeloop(0)
-          for (i = 0; i < (xsize + 7)/8; i++) {
-            s00 = s01;
-            s10 = s11;
-            s01 = buff0[i + 1];
-            s11 = buff1[i + 1];
-            s0  = vis_faligndata(s00, s01);
-            s1  = vis_faligndata(s10, s11);
-
-            d00 = vis_fmul8x16au(vis_read_hi(s0), k0);
-            d01 = vis_fmul8x16au(vis_read_lo(s0), k0);
-            d10 = vis_fmul8x16au(vis_read_hi(s1), k1);
-            d11 = vis_fmul8x16au(vis_read_lo(s1), k1);
-
-            d0 = buffd[2*i];
-            d1 = buffd[2*i + 1];
-            d0 = vis_fpadd16(d00, d0);
-            d0 = vis_fpadd16(d10, d0);
-            d1 = vis_fpadd16(d01, d1);
-            d1 = vis_fpadd16(d11, d1);
-            buffd[2*i] = d0;
-            buffd[2*i + 1] = d1;
-          }
-
-        }
-
-        pk += 2*m;
-
-      } else if (jk_size == 3) {
-
-        for (ik = 0; ik < m; ik++, coff += NCHAN) {
-          if (!jk && ik == ik_last) continue;
-
-          k0 = pk[ik];
-          k1 = pk[ik + m];
-          k2 = pk[ik + 2*m];
-
-          doff  = coff/8;
-          buff0 = buff[jk    ] + doff;
-          buff1 = buff[jk + 1] + doff;
-          buff2 = buff[jk + 2] + doff;
-
-          off = coff & 7;
-          vis_write_gsr(gsr_scale + off);
-
-          if (off == 0) {
-#pragma pipeloop(0)
-            for (i = 0; i < (xsize + 7)/8; i++) {
-              d0 = buffd[2*i];
-              d1 = buffd[2*i + 1];
-
-              s0 = buff0[i];
-              s1 = buff1[i];
-              s2 = buff2[i];
-
-              d00 = vis_fmul8x16au(vis_read_hi(s0), k0);
-              d01 = vis_fmul8x16au(vis_read_lo(s0), k0);
-              d10 = vis_fmul8x16au(vis_read_hi(s1), k1);
-              d11 = vis_fmul8x16au(vis_read_lo(s1), k1);
-              d20 = vis_fmul8x16au(vis_read_hi(s2), k2);
-              d21 = vis_fmul8x16au(vis_read_lo(s2), k2);
-
-              d00 = vis_fpadd16(d00, d10);
-              d0  = vis_fpadd16(d20, d0);
-              d0  = vis_fpadd16(d00, d0);
-              d01 = vis_fpadd16(d01, d11);
-              d1  = vis_fpadd16(d21, d1);
-              d1  = vis_fpadd16(d01, d1);
-              buffd[2*i] = d0;
-              buffd[2*i + 1] = d1;
-            }
-
-          } else if (off == 4) {
-            s01 = buff0[0];
-            s11 = buff1[0];
-            s21 = buff2[0];
-#pragma pipeloop(0)
-            for (i = 0; i < (xsize + 7)/8; i++) {
-              d0 = buffd[2*i];
-              d1 = buffd[2*i + 1];
-
-              s00 = s01;
-              s10 = s11;
-              s20 = s21;
-              s01 = buff0[i + 1];
-              s11 = buff1[i + 1];
-              s21 = buff2[i + 1];
-
-              d00 = vis_fmul8x16au(vis_read_lo(s00), k0);
-              d01 = vis_fmul8x16au(vis_read_hi(s01), k0);
-              d10 = vis_fmul8x16au(vis_read_lo(s10), k1);
-              d11 = vis_fmul8x16au(vis_read_hi(s11), k1);
-              d20 = vis_fmul8x16au(vis_read_lo(s20), k2);
-              d21 = vis_fmul8x16au(vis_read_hi(s21), k2);
-
-              d00 = vis_fpadd16(d00, d10);
-              d0  = vis_fpadd16(d20, d0);
-              d0  = vis_fpadd16(d00, d0);
-              d01 = vis_fpadd16(d01, d11);
-              d1  = vis_fpadd16(d21, d1);
-              d1  = vis_fpadd16(d01, d1);
-              buffd[2*i] = d0;
-              buffd[2*i + 1] = d1;
-            }
-
-          } else {
-            s01 = buff0[0];
-            s11 = buff1[0];
-            s21 = buff2[0];
-#pragma pipeloop(0)
-            for (i = 0; i < (xsize + 7)/8; i++) {
-              d0 = buffd[2*i];
-              d1 = buffd[2*i + 1];
-
-              s00 = s01;
-              s10 = s11;
-              s20 = s21;
-              s01 = buff0[i + 1];
-              s11 = buff1[i + 1];
-              s21 = buff2[i + 1];
-              s0  = vis_faligndata(s00, s01);
-              s1  = vis_faligndata(s10, s11);
-              s2  = vis_faligndata(s20, s21);
-
-              d00 = vis_fmul8x16au(vis_read_hi(s0), k0);
-              d01 = vis_fmul8x16au(vis_read_lo(s0), k0);
-              d10 = vis_fmul8x16au(vis_read_hi(s1), k1);
-              d11 = vis_fmul8x16au(vis_read_lo(s1), k1);
-              d20 = vis_fmul8x16au(vis_read_hi(s2), k2);
-              d21 = vis_fmul8x16au(vis_read_lo(s2), k2);
-
-              d00 = vis_fpadd16(d00, d10);
-              d0  = vis_fpadd16(d20, d0);
-              d0  = vis_fpadd16(d00, d0);
-              d01 = vis_fpadd16(d01, d11);
-              d1  = vis_fpadd16(d21, d1);
-              d1  = vis_fpadd16(d01, d1);
-              buffd[2*i] = d0;
-              buffd[2*i + 1] = d1;
-            }
-          }
-        }
-
-        pk += 3*m;
-
-      } else { /* jk_size == 4 */
-
-        for (ik = 0; ik < m; ik++, coff += NCHAN) {
-          if (!jk && ik == ik_last) continue;
-
-          k0 = pk[ik];
-          k1 = pk[ik + m];
-          k2 = pk[ik + 2*m];
-          k3 = pk[ik + 3*m];
-
-          doff  = coff/8;
-          buff0 = buff[jk    ] + doff;
-          buff1 = buff[jk + 1] + doff;
-          buff2 = buff[jk + 2] + doff;
-          buff3 = buff[jk + 3] + doff;
-
-          off = coff & 7;
-          vis_write_gsr(gsr_scale + off);
-
-          if (off == 0) {
-
-#pragma pipeloop(0)
-            for (i = 0; i < (xsize + 7)/8; i++) {
-              d0 = buffd[2*i];
-              d1 = buffd[2*i + 1];
-
-              s0 = buff0[i];
-              s1 = buff1[i];
-              s2 = buff2[i];
-              s3 = buff3[i];
-
-              d00 = vis_fmul8x16au(vis_read_hi(s0), k0);
-              d01 = vis_fmul8x16au(vis_read_lo(s0), k0);
-              d10 = vis_fmul8x16au(vis_read_hi(s1), k1);
-              d11 = vis_fmul8x16au(vis_read_lo(s1), k1);
-              d20 = vis_fmul8x16au(vis_read_hi(s2), k2);
-              d21 = vis_fmul8x16au(vis_read_lo(s2), k2);
-              d30 = vis_fmul8x16au(vis_read_hi(s3), k3);
-              d31 = vis_fmul8x16au(vis_read_lo(s3), k3);
-
-              d00 = vis_fpadd16(d00, d10);
-              d20 = vis_fpadd16(d20, d30);
-              d0  = vis_fpadd16(d0,  d00);
-              d0  = vis_fpadd16(d0,  d20);
-              d01 = vis_fpadd16(d01, d11);
-              d21 = vis_fpadd16(d21, d31);
-              d1  = vis_fpadd16(d1,  d01);
-              d1  = vis_fpadd16(d1,  d21);
-              buffd[2*i] = d0;
-              buffd[2*i + 1] = d1;
-            }
-
-          } else if (off == 4) {
-
-            s01 = buff0[0];
-            s11 = buff1[0];
-            s21 = buff2[0];
-            s31 = buff3[0];
-#pragma pipeloop(0)
-            for (i = 0; i < (xsize + 7)/8; i++) {
-              d0 = buffd[2*i];
-              d1 = buffd[2*i + 1];
-
-              s00 = s01;
-              s10 = s11;
-              s20 = s21;
-              s30 = s31;
-              s01 = buff0[i + 1];
-              s11 = buff1[i + 1];
-              s21 = buff2[i + 1];
-              s31 = buff3[i + 1];
-
-              d00 = vis_fmul8x16au(vis_read_lo(s00), k0);
-              d01 = vis_fmul8x16au(vis_read_hi(s01), k0);
-              d10 = vis_fmul8x16au(vis_read_lo(s10), k1);
-              d11 = vis_fmul8x16au(vis_read_hi(s11), k1);
-              d20 = vis_fmul8x16au(vis_read_lo(s20), k2);
-              d21 = vis_fmul8x16au(vis_read_hi(s21), k2);
-              d30 = vis_fmul8x16au(vis_read_lo(s30), k3);
-              d31 = vis_fmul8x16au(vis_read_hi(s31), k3);
-
-              d00 = vis_fpadd16(d00, d10);
-              d20 = vis_fpadd16(d20, d30);
-              d0  = vis_fpadd16(d0,  d00);
-              d0  = vis_fpadd16(d0,  d20);
-              d01 = vis_fpadd16(d01, d11);
-              d21 = vis_fpadd16(d21, d31);
-              d1  = vis_fpadd16(d1,  d01);
-              d1  = vis_fpadd16(d1,  d21);
-              buffd[2*i] = d0;
-              buffd[2*i + 1] = d1;
-            }
-
-          } else {
-
-            s01 = buff0[0];
-            s11 = buff1[0];
-            s21 = buff2[0];
-            s31 = buff3[0];
-#pragma pipeloop(0)
-            for (i = 0; i < (xsize + 7)/8; i++) {
-              d0 = buffd[2*i];
-              d1 = buffd[2*i + 1];
-
-              s00 = s01;
-              s10 = s11;
-              s20 = s21;
-              s30 = s31;
-              s01 = buff0[i + 1];
-              s11 = buff1[i + 1];
-              s21 = buff2[i + 1];
-              s31 = buff3[i + 1];
-              s0  = vis_faligndata(s00, s01);
-              s1  = vis_faligndata(s10, s11);
-              s2  = vis_faligndata(s20, s21);
-              s3  = vis_faligndata(s30, s31);
-
-              d00 = vis_fmul8x16au(vis_read_hi(s0), k0);
-              d01 = vis_fmul8x16au(vis_read_lo(s0), k0);
-              d10 = vis_fmul8x16au(vis_read_hi(s1), k1);
-              d11 = vis_fmul8x16au(vis_read_lo(s1), k1);
-              d20 = vis_fmul8x16au(vis_read_hi(s2), k2);
-              d21 = vis_fmul8x16au(vis_read_lo(s2), k2);
-              d30 = vis_fmul8x16au(vis_read_hi(s3), k3);
-              d31 = vis_fmul8x16au(vis_read_lo(s3), k3);
-
-              d00 = vis_fpadd16(d00, d10);
-              d20 = vis_fpadd16(d20, d30);
-              d0  = vis_fpadd16(d0,  d00);
-              d0  = vis_fpadd16(d0,  d20);
-              d01 = vis_fpadd16(d01, d11);
-              d21 = vis_fpadd16(d21, d31);
-              d1  = vis_fpadd16(d1,  d01);
-              d1  = vis_fpadd16(d1,  d21);
-              buffd[2*i] = d0;
-              buffd[2*i + 1] = d1;
-            }
-          }
-        }
-
-        pk += 4*m;
-      }
-    }
-
-    /*****************************************
-     *****************************************
-     **          Final iteration            **
-     *****************************************
-     *****************************************/
-
-    jk_size = n;
-#ifdef CONV_INDEX
-    if (jk_size >= 5) jk_size = 3;
-    if (jk_size == 4) jk_size = 2;
-#else
-    if (jk_size >= 6) jk_size = 4;
-    if (jk_size == 5) jk_size = 3;
-#endif
-
-    k0 = karr[ik_last];
-    k1 = karr[ik_last + m];
-    k2 = karr[ik_last + 2*m];
-    k3 = karr[ik_last + 3*m];
-
-    off  = ik_last*NCHAN;
-    doff = off/8;
-    off &= 7;
-    buff0 = buff[0] + doff;
-    buff1 = buff[1] + doff;
-    buff2 = buff[2] + doff;
-    buff3 = buff[3] + doff;
-    vis_write_gsr(gsr_scale + off);
-
-#ifndef CONV_INDEX
-    if (jk_size == 2) {
-      dp = ((mlib_addr)dl & 7) ? buffe : (mlib_d64*)dl;
-
-      s01 = buff0[0];
-      s11 = buff1[0];
-#pragma pipeloop(0)
-      for (i = 0; i < xsize/8; i++) {
-        s00 = s01;
-        s10 = s11;
-        s01 = buff0[i + 1];
-        s11 = buff1[i + 1];
-        s0  = vis_faligndata(s00, s01);
-        s1  = vis_faligndata(s10, s11);
-
-        d00 = vis_fmul8x16au(vis_read_hi(s0), k0);
-        d01 = vis_fmul8x16au(vis_read_lo(s0), k0);
-        d10 = vis_fmul8x16au(vis_read_hi(s1), k1);
-        d11 = vis_fmul8x16au(vis_read_lo(s1), k1);
-
-        d0 = buffd[2*i];
-        d1 = buffd[2*i + 1];
-        d0 = vis_fpadd16(d0, d00);
-        d0 = vis_fpadd16(d0, d10);
-        d1 = vis_fpadd16(d1, d01);
-        d1 = vis_fpadd16(d1, d11);
-
-        dd = vis_fpack16_pair(d0, d1);
-        dp[i] = dd;
-
-        buffd[2*i    ] = drnd;
-        buffd[2*i + 1] = drnd;
-      }
-
-      if (emask) {
-        s00 = s01;
-        s10 = s11;
-        s01 = buff0[i + 1];
-        s11 = buff1[i + 1];
-        s0  = vis_faligndata(s00, s01);
-        s1  = vis_faligndata(s10, s11);
-
-        d00 = vis_fmul8x16au(vis_read_hi(s0), k0);
-        d01 = vis_fmul8x16au(vis_read_lo(s0), k0);
-        d10 = vis_fmul8x16au(vis_read_hi(s1), k1);
-        d11 = vis_fmul8x16au(vis_read_lo(s1), k1);
-
-        d0 = buffd[2*i];
-        d1 = buffd[2*i + 1];
-        d0 = vis_fpadd16(d0, d00);
-        d0 = vis_fpadd16(d0, d10);
-        d1 = vis_fpadd16(d1, d01);
-        d1 = vis_fpadd16(d1, d11);
-
-        dd = vis_fpack16_pair(d0, d1);
-        vis_pst_8(dd, dp + i, emask);
-
-        buffd[2*i    ] = drnd;
-        buffd[2*i + 1] = drnd;
-      }
-
-      if ((mlib_u8*)dp != dl) mlib_ImageCopy_na((void*)buffe, dl, xsize);
-
-    } else if (jk_size == 3) {
-
-      dp = ((mlib_addr)dl & 7) ? buffe : (mlib_d64*)dl;
-
-      s01 = buff0[0];
-      s11 = buff1[0];
-      s21 = buff2[0];
-#pragma pipeloop(0)
-      for (i = 0; i < xsize/8; i++) {
-        s00 = s01;
-        s10 = s11;
-        s20 = s21;
-        s01 = buff0[i + 1];
-        s11 = buff1[i + 1];
-        s21 = buff2[i + 1];
-        s0  = vis_faligndata(s00, s01);
-        s1  = vis_faligndata(s10, s11);
-        s2  = vis_faligndata(s20, s21);
-
-        d00 = vis_fmul8x16au(vis_read_hi(s0), k0);
-        d01 = vis_fmul8x16au(vis_read_lo(s0), k0);
-        d10 = vis_fmul8x16au(vis_read_hi(s1), k1);
-        d11 = vis_fmul8x16au(vis_read_lo(s1), k1);
-        d20 = vis_fmul8x16au(vis_read_hi(s2), k2);
-        d21 = vis_fmul8x16au(vis_read_lo(s2), k2);
-
-        d0 = buffd[2*i];
-        d1 = buffd[2*i + 1];
-        d0 = vis_fpadd16(d0, d00);
-        d0 = vis_fpadd16(d0, d10);
-        d0 = vis_fpadd16(d0, d20);
-        d1 = vis_fpadd16(d1, d01);
-        d1 = vis_fpadd16(d1, d11);
-        d1 = vis_fpadd16(d1, d21);
-
-        dd = vis_fpack16_pair(d0, d1);
-        dp[i] = dd;
-
-        buffd[2*i    ] = drnd;
-        buffd[2*i + 1] = drnd;
-      }
-
-      if (emask) {
-        s00 = s01;
-        s10 = s11;
-        s20 = s21;
-        s01 = buff0[i + 1];
-        s11 = buff1[i + 1];
-        s21 = buff2[i + 1];
-        s0  = vis_faligndata(s00, s01);
-        s1  = vis_faligndata(s10, s11);
-        s2  = vis_faligndata(s20, s21);
-
-        d00 = vis_fmul8x16au(vis_read_hi(s0), k0);
-        d01 = vis_fmul8x16au(vis_read_lo(s0), k0);
-        d10 = vis_fmul8x16au(vis_read_hi(s1), k1);
-        d11 = vis_fmul8x16au(vis_read_lo(s1), k1);
-        d20 = vis_fmul8x16au(vis_read_hi(s2), k2);
-        d21 = vis_fmul8x16au(vis_read_lo(s2), k2);
-
-        d0 = buffd[2*i];
-        d1 = buffd[2*i + 1];
-        d0 = vis_fpadd16(d0, d00);
-        d0 = vis_fpadd16(d0, d10);
-        d0 = vis_fpadd16(d0, d20);
-        d1 = vis_fpadd16(d1, d01);
-        d1 = vis_fpadd16(d1, d11);
-        d1 = vis_fpadd16(d1, d21);
-
-        dd = vis_fpack16_pair(d0, d1);
-        vis_pst_8(dd, dp + i, emask);
-
-        buffd[2*i    ] = drnd;
-        buffd[2*i + 1] = drnd;
-      }
-
-      if ((mlib_u8*)dp != dl) mlib_ImageCopy_na((void*)buffe, dl, xsize);
-
-    } else /* if (jk_size == 4) */ {
-
-      dp = ((mlib_addr)dl & 7) ? buffe : (mlib_d64*)dl;
-
-      s01 = buff0[0];
-      s11 = buff1[0];
-      s21 = buff2[0];
-      s31 = buff3[0];
-#pragma pipeloop(0)
-      for (i = 0; i < xsize/8; i++) {
-        s00 = s01;
-        s10 = s11;
-        s20 = s21;
-        s30 = s31;
-        s01 = buff0[i + 1];
-        s11 = buff1[i + 1];
-        s21 = buff2[i + 1];
-        s31 = buff3[i + 1];
-        s0  = vis_faligndata(s00, s01);
-        s1  = vis_faligndata(s10, s11);
-        s2  = vis_faligndata(s20, s21);
-        s3  = vis_faligndata(s30, s31);
-
-        d00 = vis_fmul8x16au(vis_read_hi(s0), k0);
-        d01 = vis_fmul8x16au(vis_read_lo(s0), k0);
-        d10 = vis_fmul8x16au(vis_read_hi(s1), k1);
-        d11 = vis_fmul8x16au(vis_read_lo(s1), k1);
-        d20 = vis_fmul8x16au(vis_read_hi(s2), k2);
-        d21 = vis_fmul8x16au(vis_read_lo(s2), k2);
-        d30 = vis_fmul8x16au(vis_read_hi(s3), k3);
-        d31 = vis_fmul8x16au(vis_read_lo(s3), k3);
-
-        d0 = buffd[2*i];
-        d1 = buffd[2*i + 1];
-        d0 = vis_fpadd16(d0, d00);
-        d0 = vis_fpadd16(d0, d10);
-        d0 = vis_fpadd16(d0, d20);
-        d0 = vis_fpadd16(d0, d30);
-        d1 = vis_fpadd16(d1, d01);
-        d1 = vis_fpadd16(d1, d11);
-        d1 = vis_fpadd16(d1, d21);
-        d1 = vis_fpadd16(d1, d31);
-
-        dd = vis_fpack16_pair(d0, d1);
-        dp[i] = dd;
-
-        buffd[2*i    ] = drnd;
-        buffd[2*i + 1] = drnd;
-      }
-
-      if (emask) {
-        s00 = s01;
-        s10 = s11;
-        s20 = s21;
-        s30 = s31;
-        s01 = buff0[i + 1];
-        s11 = buff1[i + 1];
-        s21 = buff2[i + 1];
-        s31 = buff3[i + 1];
-        s0  = vis_faligndata(s00, s01);
-        s1  = vis_faligndata(s10, s11);
-        s2  = vis_faligndata(s20, s21);
-        s3  = vis_faligndata(s30, s31);
-
-        d00 = vis_fmul8x16au(vis_read_hi(s0), k0);
-        d01 = vis_fmul8x16au(vis_read_lo(s0), k0);
-        d10 = vis_fmul8x16au(vis_read_hi(s1), k1);
-        d11 = vis_fmul8x16au(vis_read_lo(s1), k1);
-        d20 = vis_fmul8x16au(vis_read_hi(s2), k2);
-        d21 = vis_fmul8x16au(vis_read_lo(s2), k2);
-        d30 = vis_fmul8x16au(vis_read_hi(s3), k3);
-        d31 = vis_fmul8x16au(vis_read_lo(s3), k3);
-
-        d0 = buffd[2*i];
-        d1 = buffd[2*i + 1];
-        d0 = vis_fpadd16(d0, d00);
-        d0 = vis_fpadd16(d0, d10);
-        d0 = vis_fpadd16(d0, d20);
-        d0 = vis_fpadd16(d0, d30);
-        d1 = vis_fpadd16(d1, d01);
-        d1 = vis_fpadd16(d1, d11);
-        d1 = vis_fpadd16(d1, d21);
-        d1 = vis_fpadd16(d1, d31);
-
-        dd = vis_fpack16_pair(d0, d1);
-        vis_pst_8(dd, dp + i, emask);
-
-        buffd[2*i    ] = drnd;
-        buffd[2*i + 1] = drnd;
-      }
-
-      if ((mlib_u8*)dp != dl) mlib_ImageCopy_na((void*)buffe, dl, xsize);
-    }
-
-#else /* CONV_INDEX */
-
-    if (jk_size == 2) {
-      vis_write_gsr(gsr_scale + 7);
-
-#pragma pipeloop(0)
-      for (i = 0; i < dsize; i += 3) {
-        mlib_d64 d00, d01, d02, d03, d04, d05;
-        mlib_d64 d10, d11, d12, d13, d14, d15;
-        mlib_d64 d0, d1, d2, d3, d4, d5;
-        mlib_d64 s00 = buff0[i];
-        mlib_d64 s01 = buff0[i + 1];
-        mlib_d64 s02 = buff0[i + 2];
-        mlib_d64 s10 = buff1[i];
-        mlib_d64 s11 = buff1[i + 1];
-        mlib_d64 s12 = buff1[i + 2];
-
-        d00 = vis_fmul8x16au(vis_read_hi(s00), k0);
-        d01 = vis_fmul8x16au(vis_read_lo(s00), k0);
-        d02 = vis_fmul8x16au(vis_read_hi(s01), k0);
-        d03 = vis_fmul8x16au(vis_read_lo(s01), k0);
-        d04 = vis_fmul8x16au(vis_read_hi(s02), k0);
-        d05 = vis_fmul8x16au(vis_read_lo(s02), k0);
-        d10 = vis_fmul8x16au(vis_read_hi(s10), k1);
-        d11 = vis_fmul8x16au(vis_read_lo(s10), k1);
-        d12 = vis_fmul8x16au(vis_read_hi(s11), k1);
-        d13 = vis_fmul8x16au(vis_read_lo(s11), k1);
-        d14 = vis_fmul8x16au(vis_read_hi(s12), k1);
-        d15 = vis_fmul8x16au(vis_read_lo(s12), k1);
-
-        d0 = buffd[2*i];
-        d1 = buffd[2*i + 1];
-        d2 = buffd[2*i + 2];
-        d3 = buffd[2*i + 3];
-        d4 = buffd[2*i + 4];
-        d5 = buffd[2*i + 5];
-        d0 = vis_fpadd16(d0, d00);
-        d0 = vis_fpadd16(d0, d10);
-        d1 = vis_fpadd16(d1, d01);
-        d1 = vis_fpadd16(d1, d11);
-        d2 = vis_fpadd16(d2, d02);
-        d2 = vis_fpadd16(d2, d12);
-        d3 = vis_fpadd16(d3, d03);
-        d3 = vis_fpadd16(d3, d13);
-        d4 = vis_fpadd16(d4, d04);
-        d4 = vis_fpadd16(d4, d14);
-        d5 = vis_fpadd16(d5, d05);
-        d5 = vis_fpadd16(d5, d15);
-
-        buffe[i    ] = vis_fpack16_pair(d0, d1);
-        buffe[i + 1] = vis_fpack16_pair(d2, d3);
-        buffe[i + 2] = vis_fpack16_pair(d4, d5);
-
-        buffd[2*i    ] = drnd;
-        buffd[2*i + 1] = drnd;
-        buffd[2*i + 2] = drnd;
-        buffd[2*i + 3] = drnd;
-        buffd[2*i + 4] = drnd;
-        buffd[2*i + 5] = drnd;
-
-        LOAD_SRC();
-      }
-
-    } else /* if (jk_size == 3) */ {
-      vis_write_gsr(gsr_scale + 7);
-
-#pragma pipeloop(0)
-      for (i = 0; i < dsize; i += 3) {
-        mlib_d64 d00, d01, d02, d03, d04, d05;
-        mlib_d64 d10, d11, d12, d13, d14, d15;
-        mlib_d64 d20, d21, d22, d23, d24, d25;
-        mlib_d64 d0, d1, d2, d3, d4, d5;
-        mlib_d64 s00 = buff0[i];
-        mlib_d64 s01 = buff0[i + 1];
-        mlib_d64 s02 = buff0[i + 2];
-        mlib_d64 s10 = buff1[i];
-        mlib_d64 s11 = buff1[i + 1];
-        mlib_d64 s12 = buff1[i + 2];
-        mlib_d64 s20 = buff2[i];
-        mlib_d64 s21 = buff2[i + 1];
-        mlib_d64 s22 = buff2[i + 2];
-
-        d00 = vis_fmul8x16au(vis_read_hi(s00), k0);
-        d01 = vis_fmul8x16au(vis_read_lo(s00), k0);
-        d02 = vis_fmul8x16au(vis_read_hi(s01), k0);
-        d03 = vis_fmul8x16au(vis_read_lo(s01), k0);
-        d04 = vis_fmul8x16au(vis_read_hi(s02), k0);
-        d05 = vis_fmul8x16au(vis_read_lo(s02), k0);
-        d10 = vis_fmul8x16au(vis_read_hi(s10), k1);
-        d11 = vis_fmul8x16au(vis_read_lo(s10), k1);
-        d12 = vis_fmul8x16au(vis_read_hi(s11), k1);
-        d13 = vis_fmul8x16au(vis_read_lo(s11), k1);
-        d14 = vis_fmul8x16au(vis_read_hi(s12), k1);
-        d15 = vis_fmul8x16au(vis_read_lo(s12), k1);
-        d20 = vis_fmul8x16au(vis_read_hi(s20), k2);
-        d21 = vis_fmul8x16au(vis_read_lo(s20), k2);
-        d22 = vis_fmul8x16au(vis_read_hi(s21), k2);
-        d23 = vis_fmul8x16au(vis_read_lo(s21), k2);
-        d24 = vis_fmul8x16au(vis_read_hi(s22), k2);
-        d25 = vis_fmul8x16au(vis_read_lo(s22), k2);
-
-        d0 = buffd[2*i];
-        d1 = buffd[2*i + 1];
-        d2 = buffd[2*i + 2];
-        d3 = buffd[2*i + 3];
-        d4 = buffd[2*i + 4];
-        d5 = buffd[2*i + 5];
-        d0 = vis_fpadd16(d0, d00);
-        d0 = vis_fpadd16(d0, d10);
-        d0 = vis_fpadd16(d0, d20);
-        d1 = vis_fpadd16(d1, d01);
-        d1 = vis_fpadd16(d1, d11);
-        d1 = vis_fpadd16(d1, d21);
-        d2 = vis_fpadd16(d2, d02);
-        d2 = vis_fpadd16(d2, d12);
-        d2 = vis_fpadd16(d2, d22);
-        d3 = vis_fpadd16(d3, d03);
-        d3 = vis_fpadd16(d3, d13);
-        d3 = vis_fpadd16(d3, d23);
-        d4 = vis_fpadd16(d4, d04);
-        d4 = vis_fpadd16(d4, d14);
-        d4 = vis_fpadd16(d4, d24);
-        d5 = vis_fpadd16(d5, d05);
-        d5 = vis_fpadd16(d5, d15);
-        d5 = vis_fpadd16(d5, d25);
-
-        buffe[i    ] = vis_fpack16_pair(d0, d1);
-        buffe[i + 1] = vis_fpack16_pair(d2, d3);
-        buffe[i + 2] = vis_fpack16_pair(d4, d5);
-
-        buffd[2*i    ] = drnd;
-        buffd[2*i + 1] = drnd;
-        buffd[2*i + 2] = drnd;
-        buffd[2*i + 3] = drnd;
-        buffd[2*i + 4] = drnd;
-        buffd[2*i + 5] = drnd;
-
-        LOAD_SRC();
-      }
-    }
-#endif /* CONV_INDEX */
-
-#ifdef CONV_INDEX
-    mlib_ImageColorTrue2IndexLine_U8_S16_3((void*)buffe, dl, wid, colormap);
-#endif /* CONV_INDEX */
-
-    sl += sll;
-    dl += dll;
-
-    buff_ind++;
-    if (buff_ind >= (n + 1)) buff_ind = 0;
-  }
-
-  mlib_free(pbuff);
-  if (buffs != buffs_local) mlib_free(buffs);
-
-  return MLIB_SUCCESS;
-}
-
-/***************************************************************/
--- a/jdk/src/java.desktop/unix/native/libmlib_image/mlib_v_ImageConvIndex3_8_8nw.c	Thu May 12 11:03:07 2016 -0700
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,1673 +0,0 @@
-/*
- * Copyright (c) 2000, 2003, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.  Oracle designates this
- * particular file as subject to the "Classpath" exception as provided
- * by Oracle in the LICENSE file that accompanied this code.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- */
-
-
-
-/*
- * FUNCTION
- *      Internal functions for mlib_ImageConv* on U8 type
- *      and MLIB_EDGE_DST_NO_WRITE mask
- *
- */
-
-/***************************************************************/
-
-#include <vis_proto.h>
-#include <mlib_image.h>
-#include <mlib_ImageCheck.h>
-#include <mlib_ImageColormap.h>
-
-/*
-  This defines switches between functions in
-  files: mlib_v_ImageConv_8nw.c,
-         mlib_v_ImageConvIndex3_8_8nw.c,
-         mlib_v_ImageConvIndex4_8_8nw.c,
-         mlib_v_ImageConvIndex3_8_16nw.c,
-         mlib_v_ImageConvIndex4_8_16nw.c
-*/
-
-#define CONV_INDEX
-
-#define DTYPE mlib_u8
-#define LTYPE mlib_u8
-
-/***************************************************************/
-
-#ifdef CONV_INDEX
-
-#define CONV_FUNC(KERN)                                 \
-  mlib_conv##KERN##_Index3_8_8nw(mlib_image *dst,       \
-                                 mlib_image *src,       \
-                                 mlib_s32   *kern,      \
-                                 mlib_s32   scale,      \
-                                 void       *colormap)
-
-#else
-
-#define CONV_FUNC(KERN)                         \
-  mlib_conv##KERN##_8nw_f(mlib_image *dst,      \
-                          mlib_image *src,      \
-                          mlib_s32   *kern,     \
-                          mlib_s32   scale)
-
-#endif
-
-/***************************************************************/
-
-#ifdef CONV_INDEX
-
-#define NCHAN  3
-
-#else
-
-#define NCHAN  nchan
-
-#endif
-
-/***************************************************************/
-
-#define DEF_VARS                                                \
-  DTYPE    *sl, *sp, *dl;                                       \
-  mlib_s32 hgt = mlib_ImageGetHeight(src);                      \
-  mlib_s32 wid = mlib_ImageGetWidth(src);                       \
-  mlib_s32 sll = mlib_ImageGetStride(src) / sizeof(DTYPE);      \
-  mlib_s32 dll = mlib_ImageGetStride(dst) / sizeof(DTYPE);      \
-  DTYPE    *adr_src = (DTYPE *)mlib_ImageGetData(src);          \
-  DTYPE    *adr_dst = (DTYPE *)mlib_ImageGetData(dst);          \
-  mlib_s32 ssize, xsize, dsize, esize, emask, buff_ind = 0;     \
-  mlib_d64 *pbuff, *dp;                                         \
-  mlib_f32 *karr = (mlib_f32 *)kern;                            \
-  mlib_s32 gsr_scale = (31 - scale) << 3;                       \
-  mlib_d64 drnd = vis_to_double_dup(mlib_round_8[31 - scale]);  \
-  mlib_s32 i, j, l
-
-/***************************************************************/
-
-#ifdef CONV_INDEX
-
-#define DEF_EXTRA_VARS                                                  \
-  int    offset = mlib_ImageGetLutOffset(colormap);                     \
-  LTYPE  **lut_table = (LTYPE**)mlib_ImageGetLutData(colormap);         \
-  LTYPE  *ltbl0 = lut_table[0] - offset;                                \
-  LTYPE  *ltbl1 = lut_table[1] - offset;                                \
-  LTYPE  *ltbl2 = lut_table[2] - offset;                                \
-  LTYPE  *ltbl3 = (NCHAN > 3) ? lut_table[3] - offset : ltbl2
-
-#else
-
-#define DEF_EXTRA_VARS                          \
-  mlib_s32 nchan = mlib_ImageGetChannels(dst)
-
-#endif
-
-/***************************************************************/
-
-#if NCHAN == 3
-
-#define LOAD_SRC() {                                            \
-    mlib_s32 s0 = sp[0], s1 = sp[1], s2 = sp[2], s3 = sp[3];    \
-    mlib_s32 s4 = sp[4], s5 = sp[5], s6 = sp[6], s7 = sp[7];    \
-    mlib_d64 t0, t1, t2;                                        \
-                                                                \
-    t2 = vis_faligndata(vis_ld_u8_i(ltbl2, s7), t2);            \
-    t2 = vis_faligndata(vis_ld_u8_i(ltbl1, s7), t2);            \
-    t2 = vis_faligndata(vis_ld_u8_i(ltbl0, s7), t2);            \
-    t2 = vis_faligndata(vis_ld_u8_i(ltbl2, s6), t2);            \
-    t2 = vis_faligndata(vis_ld_u8_i(ltbl1, s6), t2);            \
-    t2 = vis_faligndata(vis_ld_u8_i(ltbl0, s6), t2);            \
-    t2 = vis_faligndata(vis_ld_u8_i(ltbl2, s5), t2);            \
-    t2 = vis_faligndata(vis_ld_u8_i(ltbl1, s5), t2);            \
-    t1 = vis_faligndata(vis_ld_u8_i(ltbl0, s5), t1);            \
-    t1 = vis_faligndata(vis_ld_u8_i(ltbl2, s4), t1);            \
-    t1 = vis_faligndata(vis_ld_u8_i(ltbl1, s4), t1);            \
-    t1 = vis_faligndata(vis_ld_u8_i(ltbl0, s4), t1);            \
-    t1 = vis_faligndata(vis_ld_u8_i(ltbl2, s3), t1);            \
-    t1 = vis_faligndata(vis_ld_u8_i(ltbl1, s3), t1);            \
-    t1 = vis_faligndata(vis_ld_u8_i(ltbl0, s3), t1);            \
-    t1 = vis_faligndata(vis_ld_u8_i(ltbl2, s2), t1);            \
-    t0 = vis_faligndata(vis_ld_u8_i(ltbl1, s2), t0);            \
-    t0 = vis_faligndata(vis_ld_u8_i(ltbl0, s2), t0);            \
-    t0 = vis_faligndata(vis_ld_u8_i(ltbl2, s1), t0);            \
-    t0 = vis_faligndata(vis_ld_u8_i(ltbl1, s1), t0);            \
-    t0 = vis_faligndata(vis_ld_u8_i(ltbl0, s1), t0);            \
-    t0 = vis_faligndata(vis_ld_u8_i(ltbl2, s0), t0);            \
-    t0 = vis_faligndata(vis_ld_u8_i(ltbl1, s0), t0);            \
-    t0 = vis_faligndata(vis_ld_u8_i(ltbl0, s0), t0);            \
-                                                                \
-    buffn[i] = t0;                                              \
-    buffn[i + 1] = t1;                                          \
-    buffn[i + 2] = t2;                                          \
-                                                                \
-    sp += 8;                                                    \
-  }
-
-#else
-
-#define LOAD_SRC() {                                            \
-    mlib_s32 s0 = sp[0], s1 = sp[1], s2 = sp[2], s3 = sp[3];    \
-    mlib_s32 s4 = sp[4], s5 = sp[5], s6 = sp[6], s7 = sp[7];    \
-    mlib_d64 t0, t1, t2;                                        \
-                                                                \
-    t2 = vis_faligndata(vis_ld_u8_i(ltbl3, s5), t2);            \
-    t2 = vis_faligndata(vis_ld_u8_i(ltbl2, s5), t2);            \
-    t2 = vis_faligndata(vis_ld_u8_i(ltbl1, s5), t2);            \
-    t2 = vis_faligndata(vis_ld_u8_i(ltbl0, s5), t2);            \
-    t2 = vis_faligndata(vis_ld_u8_i(ltbl3, s4), t2);            \
-    t2 = vis_faligndata(vis_ld_u8_i(ltbl2, s4), t2);            \
-    t2 = vis_faligndata(vis_ld_u8_i(ltbl1, s4), t2);            \
-    t2 = vis_faligndata(vis_ld_u8_i(ltbl0, s4), t2);            \
-    t1 = vis_faligndata(vis_ld_u8_i(ltbl3, s3), t1);            \
-    t1 = vis_faligndata(vis_ld_u8_i(ltbl2, s3), t1);            \
-    t1 = vis_faligndata(vis_ld_u8_i(ltbl1, s3), t1);            \
-    t1 = vis_faligndata(vis_ld_u8_i(ltbl0, s3), t1);            \
-    t1 = vis_faligndata(vis_ld_u8_i(ltbl3, s2), t1);            \
-    t1 = vis_faligndata(vis_ld_u8_i(ltbl2, s2), t1);            \
-    t1 = vis_faligndata(vis_ld_u8_i(ltbl1, s2), t1);            \
-    t1 = vis_faligndata(vis_ld_u8_i(ltbl0, s2), t1);            \
-    t0 = vis_faligndata(vis_ld_u8_i(ltbl3, s1), t0);            \
-    t0 = vis_faligndata(vis_ld_u8_i(ltbl2, s1), t0);            \
-    t0 = vis_faligndata(vis_ld_u8_i(ltbl1, s1), t0);            \
-    t0 = vis_faligndata(vis_ld_u8_i(ltbl0, s1), t0);            \
-    t0 = vis_faligndata(vis_ld_u8_i(ltbl3, s0), t0);            \
-    t0 = vis_faligndata(vis_ld_u8_i(ltbl2, s0), t0);            \
-    t0 = vis_faligndata(vis_ld_u8_i(ltbl1, s0), t0);            \
-    t0 = vis_faligndata(vis_ld_u8_i(ltbl0, s0), t0);            \
-                                                                \
-    buffn[i] = t0;                                              \
-    buffn[i + 1] = t1;                                          \
-    buffn[i + 2] = t2;                                          \
-                                                                \
-    sp += 6;                                                    \
-  }
-
-#endif
-
-/***************************************************************/
-
-static mlib_s32 mlib_round_8[16] = { 0x00400040, 0x00200020, 0x00100010, 0x00080008,
-                                    0x00040004, 0x00020002, 0x00010001, 0x00000000,
-                                    0x00000000, 0x00000000, 0x00000000, 0x00000000,
-                                    0x00000000, 0x00000000, 0x00000000, 0x00000000 };
-
-/***************************************************************/
-
-void mlib_ImageCopy_na(mlib_u8 *sa, mlib_u8 *da, int size);
-
-/***************************************************************/
-
-#define KSIZE  2
-
-mlib_status CONV_FUNC(2x2)
-{
-  mlib_d64 *buffs[2*(KSIZE + 1)];
-  mlib_d64 *buff0, *buff1, *buffn, *buffd, *buffe;
-  mlib_d64 s00, s01, s10, s11, s0, s1;
-  mlib_d64 d0, d1, d00, d01, d10, d11;
-  DEF_VARS;
-  DEF_EXTRA_VARS;
-
-  sl = adr_src;
-  dl = adr_dst;
-
-  ssize = NCHAN*wid;
-  dsize = (ssize + 7)/8;
-  esize = dsize + 4;
-  pbuff = mlib_malloc((KSIZE + 4)*esize*sizeof(mlib_d64));
-  if (pbuff == NULL) return MLIB_FAILURE;
-
-  for (i = 0; i < (KSIZE + 1); i++) buffs[i] = pbuff + i*esize;
-  for (i = 0; i < (KSIZE + 1); i++) buffs[(KSIZE + 1) + i] = buffs[i];
-  buffd = buffs[KSIZE] + esize;
-  buffe = buffd + 2*esize;
-
-  wid -= (KSIZE - 1);
-  hgt -= (KSIZE - 1);
-  xsize = ssize - NCHAN*(KSIZE - 1);
-  emask = (0xFF00 >> (xsize & 7)) & 0xFF;
-
-  vis_write_gsr(gsr_scale + 7);
-
-  for (l = 0; l < KSIZE; l++) {
-    mlib_d64 *buffn = buffs[l];
-    sp = sl + l*sll;
-
-#ifndef CONV_INDEX
-    if ((mlib_addr)sp & 7) mlib_ImageCopy_na((void*)sp, (void*)buffn, ssize);
-
-#else
-#pragma pipeloop(0)
-    for (i = 0; i < dsize; i += 3) {
-      LOAD_SRC();
-    }
-#endif /* CONV_INDEX */
-  }
-
-  for (j = 0; j < hgt; j++) {
-    mlib_d64 **buffc = buffs + buff_ind;
-    mlib_f32 *pk = karr, k0, k1;
-    sp = sl + KSIZE*sll;
-
-    buff0 = buffc[0];
-    buff1 = buffc[1];
-    buffn = buffc[KSIZE];
-
-#ifndef CONV_INDEX
-    if ((((mlib_addr)(sl      )) & 7) == 0) buff0 = (mlib_d64*)sl;
-    if ((((mlib_addr)(sl + sll)) & 7) == 0) buff1 = (mlib_d64*)(sl + sll);
-    if ((mlib_addr)sp & 7) mlib_ImageCopy_na((void*)sp, (void*)buffn, ssize);
-#endif
-
-    k0 = pk[1];
-    k1 = pk[3];
-    vis_write_gsr(gsr_scale + NCHAN);
-
-    s01 = buff0[0];
-    s11 = buff1[0];
-#pragma pipeloop(0)
-    for (i = 0; i < (xsize + 7)/8; i++) {
-      s00 = s01;
-      s10 = s11;
-      s01 = buff0[i + 1];
-      s11 = buff1[i + 1];
-      s0  = vis_faligndata(s00, s01);
-      s1  = vis_faligndata(s10, s11);
-
-      d00 = vis_fmul8x16au(vis_read_hi(s0), k0);
-      d01 = vis_fmul8x16au(vis_read_lo(s0), k0);
-      d10 = vis_fmul8x16au(vis_read_hi(s1), k1);
-      d11 = vis_fmul8x16au(vis_read_lo(s1), k1);
-
-      d0 = vis_fpadd16(d00, d10);
-      d1 = vis_fpadd16(d01, d11);
-      buffd[2*i] = d0;
-      buffd[2*i + 1] = d1;
-    }
-
-    k0 = pk[0];
-    k1 = pk[2];
-#ifndef CONV_INDEX
-    dp = ((mlib_addr)dl & 7) ? buffe : (mlib_d64*)dl;
-
-#pragma pipeloop(0)
-    for (i = 0; i < xsize/8; i++) {
-      s0 = buff0[i];
-      s1 = buff1[i];
-
-      d00 = vis_fmul8x16au(vis_read_hi(s0), k0);
-      d01 = vis_fmul8x16au(vis_read_lo(s0), k0);
-      d10 = vis_fmul8x16au(vis_read_hi(s1), k1);
-      d11 = vis_fmul8x16au(vis_read_lo(s1), k1);
-
-      d0 = buffd[2*i];
-      d1 = buffd[2*i + 1];
-      d00 = vis_fpadd16(d00, d10);
-      d0  = vis_fpadd16(d0, drnd);
-      d0  = vis_fpadd16(d0, d00);
-      d01 = vis_fpadd16(d01, d11);
-      d1  = vis_fpadd16(d1, drnd);
-      d1  = vis_fpadd16(d1, d01);
-      dp[i] = vis_fpack16_pair(d0, d1);
-    }
-
-    if (emask) {
-      s0 = buff0[i];
-      s1 = buff1[i];
-
-      d00 = vis_fmul8x16au(vis_read_hi(s0), k0);
-      d01 = vis_fmul8x16au(vis_read_lo(s0), k0);
-      d10 = vis_fmul8x16au(vis_read_hi(s1), k1);
-      d11 = vis_fmul8x16au(vis_read_lo(s1), k1);
-
-      d0 = buffd[2*i];
-      d1 = buffd[2*i + 1];
-      d00 = vis_fpadd16(d00, d10);
-      d0  = vis_fpadd16(d0, drnd);
-      d0  = vis_fpadd16(d0, d00);
-      d01 = vis_fpadd16(d01, d11);
-      d1  = vis_fpadd16(d1, drnd);
-      d1  = vis_fpadd16(d1, d01);
-
-      d0 = vis_fpack16_pair(d0, d1);
-      vis_pst_8(d0, dp + i, emask);
-    }
-
-    if ((mlib_u8*)dp != dl) mlib_ImageCopy_na((void*)buffe, dl, xsize);
-
-#else
-    vis_write_gsr(gsr_scale + 7);
-
-#pragma pipeloop(0)
-    for (i = 0; i < dsize; i += 3) {
-      mlib_d64 d00, d01, d02, d03, d04, d05;
-      mlib_d64 d10, d11, d12, d13, d14, d15;
-      mlib_d64 d0, d1, d2, d3, d4, d5;
-      mlib_d64 s00 = buff0[i];
-      mlib_d64 s01 = buff0[i + 1];
-      mlib_d64 s02 = buff0[i + 2];
-      mlib_d64 s10 = buff1[i];
-      mlib_d64 s11 = buff1[i + 1];
-      mlib_d64 s12 = buff1[i + 2];
-
-      d00 = vis_fmul8x16au(vis_read_hi(s00), k0);
-      d01 = vis_fmul8x16au(vis_read_lo(s00), k0);
-      d02 = vis_fmul8x16au(vis_read_hi(s01), k0);
-      d03 = vis_fmul8x16au(vis_read_lo(s01), k0);
-      d04 = vis_fmul8x16au(vis_read_hi(s02), k0);
-      d05 = vis_fmul8x16au(vis_read_lo(s02), k0);
-      d10 = vis_fmul8x16au(vis_read_hi(s10), k1);
-      d11 = vis_fmul8x16au(vis_read_lo(s10), k1);
-      d12 = vis_fmul8x16au(vis_read_hi(s11), k1);
-      d13 = vis_fmul8x16au(vis_read_lo(s11), k1);
-      d14 = vis_fmul8x16au(vis_read_hi(s12), k1);
-      d15 = vis_fmul8x16au(vis_read_lo(s12), k1);
-
-      d0 = buffd[2*i];
-      d1 = buffd[2*i + 1];
-      d2 = buffd[2*i + 2];
-      d3 = buffd[2*i + 3];
-      d4 = buffd[2*i + 4];
-      d5 = buffd[2*i + 5];
-      d00 = vis_fpadd16(d00, d10);
-      d0  = vis_fpadd16(d0, drnd);
-      d0  = vis_fpadd16(d0, d00);
-      d01 = vis_fpadd16(d01, d11);
-      d1  = vis_fpadd16(d1, drnd);
-      d1  = vis_fpadd16(d1, d01);
-      d02 = vis_fpadd16(d02, d12);
-      d2  = vis_fpadd16(d2, drnd);
-      d2  = vis_fpadd16(d2, d02);
-      d03 = vis_fpadd16(d03, d13);
-      d3  = vis_fpadd16(d3, drnd);
-      d3  = vis_fpadd16(d3, d03);
-      d04 = vis_fpadd16(d04, d14);
-      d4  = vis_fpadd16(d4, drnd);
-      d4  = vis_fpadd16(d4, d04);
-      d05 = vis_fpadd16(d05, d15);
-      d5  = vis_fpadd16(d5, drnd);
-      d5  = vis_fpadd16(d5, d05);
-
-      buffe[i    ] = vis_fpack16_pair(d0, d1);
-      buffe[i + 1] = vis_fpack16_pair(d2, d3);
-      buffe[i + 2] = vis_fpack16_pair(d4, d5);
-
-      LOAD_SRC();
-    }
-
-    mlib_ImageColorTrue2IndexLine_U8_U8_3((void*)buffe, dl, wid, colormap);
-#endif /* CONV_INDEX */
-
-    sl += sll;
-    dl += dll;
-
-    buff_ind++;
-    if (buff_ind >= (KSIZE + 1)) buff_ind = 0;
-  }
-
-  mlib_free(pbuff);
-
-  return MLIB_SUCCESS;
-}
-
-/***************************************************************/
-
-#undef  KSIZE
-#define KSIZE  3
-
-mlib_status CONV_FUNC(3x3)
-{
-  mlib_d64 *buffs[2*(KSIZE + 1)];
-  mlib_d64 *buff0, *buff1, *buff2, *buffn, *buffd, *buffe;
-  mlib_d64 s00, s01, s10, s11, s20, s21, s0, s1, s2;
-  mlib_d64 dd, d0, d1, d00, d01, d10, d11, d20, d21;
-  mlib_s32 ik, ik_last, off, doff;
-  DEF_VARS;
-  DEF_EXTRA_VARS;
-
-  sl = adr_src;
-#ifdef CONV_INDEX
-  dl = adr_dst + ((KSIZE - 1)/2)*(dll + 1);
-#else
-  dl = adr_dst + ((KSIZE - 1)/2)*(dll + NCHAN);
-#endif
-
-  ssize = NCHAN*wid;
-  dsize = (ssize + 7)/8;
-  esize = dsize + 4;
-  pbuff = mlib_malloc((KSIZE + 4)*esize*sizeof(mlib_d64));
-  if (pbuff == NULL) return MLIB_FAILURE;
-
-  for (i = 0; i < (KSIZE + 1); i++) buffs[i] = pbuff + i*esize;
-  for (i = 0; i < (KSIZE + 1); i++) buffs[(KSIZE + 1) + i] = buffs[i];
-  buffd = buffs[KSIZE] + esize;
-  buffe = buffd + 2*esize;
-
-  wid -= (KSIZE - 1);
-  hgt -= (KSIZE - 1);
-  xsize = ssize - NCHAN*(KSIZE - 1);
-  emask = (0xFF00 >> (xsize & 7)) & 0xFF;
-
-  vis_write_gsr(gsr_scale + 7);
-
-  for (l = 0; l < KSIZE; l++) {
-    mlib_d64 *buffn = buffs[l];
-    sp = sl + l*sll;
-
-#ifndef CONV_INDEX
-    if ((mlib_addr)sp & 7) mlib_ImageCopy_na((void*)sp, (void*)buffn, ssize);
-#else
-#pragma pipeloop(0)
-    for (i = 0; i < dsize; i += 3) {
-      LOAD_SRC();
-    }
-#endif /* CONV_INDEX */
-  }
-
-  /* init buffer */
-#pragma pipeloop(0)
-  for (i = 0; i < (xsize + 7)/8; i++) {
-    buffd[2*i    ] = drnd;
-    buffd[2*i + 1] = drnd;
-  }
-
-  for (j = 0; j < hgt; j++) {
-    mlib_d64 **buffc = buffs + buff_ind, *pbuff0, *pbuff1, *pbuff2;
-    mlib_f32 *pk = karr, k0, k1, k2;
-    sp = sl + KSIZE*sll;
-
-    pbuff0 = buffc[0];
-    pbuff1 = buffc[1];
-    pbuff2 = buffc[2];
-    buffn  = buffc[KSIZE];
-
-#ifndef CONV_INDEX
-    if ((((mlib_addr)(sl        )) & 7) == 0) pbuff0 = (mlib_d64*)sl;
-    if ((((mlib_addr)(sl +   sll)) & 7) == 0) pbuff1 = (mlib_d64*)(sl + sll);
-    if ((((mlib_addr)(sl + 2*sll)) & 7) == 0) pbuff2 = (mlib_d64*)(sl + 2*sll);
-
-    if ((mlib_addr)sp & 7) mlib_ImageCopy_na((void*)sp, (void*)buffn, ssize);
-#endif
-
-#ifdef CONV_INDEX
-    ik_last = 0;
-#else
-    ik_last = (KSIZE - 1);
-#endif
-
-    for (ik = 0; ik < KSIZE; ik++) {
-      k0 = pk[ik];
-      k1 = pk[ik + KSIZE];
-      k2 = pk[ik + 2*KSIZE];
-
-      off  = ik*NCHAN;
-      doff = off/8;
-      off &= 7;
-      buff0 = pbuff0 + doff;
-      buff1 = pbuff1 + doff;
-      buff2 = pbuff2 + doff;
-      vis_write_gsr(gsr_scale + off);
-
-      if (ik == ik_last) continue;
-      /*if (!ik_last) {
-        if ((off & 3) || (ik == (KSIZE - 1))) {
-          ik_last = ik;
-          continue;
-        }
-      }*/
-
-      if (off == 0) {
-#pragma pipeloop(0)
-        for (i = 0; i < (xsize + 7)/8; i++) {
-          s0 = buff0[i];
-          s1 = buff1[i];
-          s2 = buff2[i];
-
-          d00 = vis_fmul8x16au(vis_read_hi(s0), k0);
-          d01 = vis_fmul8x16au(vis_read_lo(s0), k0);
-          d10 = vis_fmul8x16au(vis_read_hi(s1), k1);
-          d11 = vis_fmul8x16au(vis_read_lo(s1), k1);
-          d20 = vis_fmul8x16au(vis_read_hi(s2), k2);
-          d21 = vis_fmul8x16au(vis_read_lo(s2), k2);
-
-          d0 = buffd[2*i];
-          d1 = buffd[2*i + 1];
-          d0 = vis_fpadd16(d00, d0);
-          d0 = vis_fpadd16(d10, d0);
-          d0 = vis_fpadd16(d20, d0);
-          d1 = vis_fpadd16(d01, d1);
-          d1 = vis_fpadd16(d11, d1);
-          d1 = vis_fpadd16(d21, d1);
-          buffd[2*i] = d0;
-          buffd[2*i + 1] = d1;
-        }
-
-      } else if (off == 4) {
-        s01 = buff0[0];
-        s11 = buff1[0];
-        s21 = buff2[0];
-#pragma pipeloop(0)
-        for (i = 0; i < (xsize + 7)/8; i++) {
-          s00 = s01;
-          s10 = s11;
-          s20 = s21;
-          s01 = buff0[i + 1];
-          s11 = buff1[i + 1];
-          s21 = buff2[i + 1];
-
-          d00 = vis_fmul8x16au(vis_read_lo(s00), k0);
-          d01 = vis_fmul8x16au(vis_read_hi(s01), k0);
-          d10 = vis_fmul8x16au(vis_read_lo(s10), k1);
-          d11 = vis_fmul8x16au(vis_read_hi(s11), k1);
-          d20 = vis_fmul8x16au(vis_read_lo(s20), k2);
-          d21 = vis_fmul8x16au(vis_read_hi(s21), k2);
-
-          d0 = buffd[2*i];
-          d1 = buffd[2*i + 1];
-          d0 = vis_fpadd16(d00, d0);
-          d0 = vis_fpadd16(d10, d0);
-          d0 = vis_fpadd16(d20, d0);
-          d1 = vis_fpadd16(d01, d1);
-          d1 = vis_fpadd16(d11, d1);
-          d1 = vis_fpadd16(d21, d1);
-          buffd[2*i] = d0;
-          buffd[2*i + 1] = d1;
-        }
-
-      } else {
-        s01 = buff0[0];
-        s11 = buff1[0];
-        s21 = buff2[0];
-#pragma pipeloop(0)
-        for (i = 0; i < (xsize + 7)/8; i++) {
-          s00 = s01;
-          s10 = s11;
-          s20 = s21;
-          s01 = buff0[i + 1];
-          s11 = buff1[i + 1];
-          s21 = buff2[i + 1];
-          s0  = vis_faligndata(s00, s01);
-          s1  = vis_faligndata(s10, s11);
-          s2  = vis_faligndata(s20, s21);
-
-          d00 = vis_fmul8x16au(vis_read_hi(s0), k0);
-          d01 = vis_fmul8x16au(vis_read_lo(s0), k0);
-          d10 = vis_fmul8x16au(vis_read_hi(s1), k1);
-          d11 = vis_fmul8x16au(vis_read_lo(s1), k1);
-          d20 = vis_fmul8x16au(vis_read_hi(s2), k2);
-          d21 = vis_fmul8x16au(vis_read_lo(s2), k2);
-
-          d0 = buffd[2*i];
-          d1 = buffd[2*i + 1];
-          d0 = vis_fpadd16(d00, d0);
-          d0 = vis_fpadd16(d10, d0);
-          d0 = vis_fpadd16(d20, d0);
-          d1 = vis_fpadd16(d01, d1);
-          d1 = vis_fpadd16(d11, d1);
-          d1 = vis_fpadd16(d21, d1);
-          buffd[2*i] = d0;
-          buffd[2*i + 1] = d1;
-        }
-      }
-    }
-
-    k0 = pk[ik_last];
-    k1 = pk[ik_last + KSIZE];
-    k2 = pk[ik_last + 2*KSIZE];
-
-    off  = ik_last*NCHAN;
-    doff = off/8;
-    off &= 7;
-    buff0 = pbuff0 + doff;
-    buff1 = pbuff1 + doff;
-    buff2 = pbuff2 + doff;
-    vis_write_gsr(gsr_scale + off);
-
-#ifndef CONV_INDEX
-    dp = ((mlib_addr)dl & 7) ? buffe : (mlib_d64*)dl;
-
-    s01 = buff0[0];
-    s11 = buff1[0];
-    s21 = buff2[0];
-#pragma pipeloop(0)
-    for (i = 0; i < xsize/8; i++) {
-      s00 = s01;
-      s10 = s11;
-      s20 = s21;
-      s01 = buff0[i + 1];
-      s11 = buff1[i + 1];
-      s21 = buff2[i + 1];
-      s0  = vis_faligndata(s00, s01);
-      s1  = vis_faligndata(s10, s11);
-      s2  = vis_faligndata(s20, s21);
-
-      d00 = vis_fmul8x16au(vis_read_hi(s0), k0);
-      d01 = vis_fmul8x16au(vis_read_lo(s0), k0);
-      d10 = vis_fmul8x16au(vis_read_hi(s1), k1);
-      d11 = vis_fmul8x16au(vis_read_lo(s1), k1);
-      d20 = vis_fmul8x16au(vis_read_hi(s2), k2);
-      d21 = vis_fmul8x16au(vis_read_lo(s2), k2);
-
-      d0 = buffd[2*i];
-      d1 = buffd[2*i + 1];
-      d0 = vis_fpadd16(d0, d00);
-      d0 = vis_fpadd16(d0, d10);
-      d0 = vis_fpadd16(d0, d20);
-      d1 = vis_fpadd16(d1, d01);
-      d1 = vis_fpadd16(d1, d11);
-      d1 = vis_fpadd16(d1, d21);
-
-      dd = vis_fpack16_pair(d0, d1);
-      dp[i] = dd;
-
-      buffd[2*i    ] = drnd;
-      buffd[2*i + 1] = drnd;
-    }
-
-    if (emask) {
-      s00 = s01;
-      s10 = s11;
-      s20 = s21;
-      s01 = buff0[i + 1];
-      s11 = buff1[i + 1];
-      s21 = buff2[i + 1];
-      s0  = vis_faligndata(s00, s01);
-      s1  = vis_faligndata(s10, s11);
-      s2  = vis_faligndata(s20, s21);
-
-      d00 = vis_fmul8x16au(vis_read_hi(s0), k0);
-      d01 = vis_fmul8x16au(vis_read_lo(s0), k0);
-      d10 = vis_fmul8x16au(vis_read_hi(s1), k1);
-      d11 = vis_fmul8x16au(vis_read_lo(s1), k1);
-      d20 = vis_fmul8x16au(vis_read_hi(s2), k2);
-      d21 = vis_fmul8x16au(vis_read_lo(s2), k2);
-
-      d0 = buffd[2*i];
-      d1 = buffd[2*i + 1];
-      d0 = vis_fpadd16(d0, d00);
-      d0 = vis_fpadd16(d0, d10);
-      d0 = vis_fpadd16(d0, d20);
-      d1 = vis_fpadd16(d1, d01);
-      d1 = vis_fpadd16(d1, d11);
-      d1 = vis_fpadd16(d1, d21);
-
-      dd = vis_fpack16_pair(d0, d1);
-      vis_pst_8(dd, dp + i, emask);
-
-      buffd[2*i    ] = drnd;
-      buffd[2*i + 1] = drnd;
-    }
-
-    if ((mlib_u8*)dp != dl) mlib_ImageCopy_na((void*)buffe, dl, xsize);
-
-#else
-    vis_write_gsr(gsr_scale + 7);
-
-#pragma pipeloop(0)
-    for (i = 0; i < dsize; i += 3) {
-      mlib_d64 d00, d01, d02, d03, d04, d05;
-      mlib_d64 d10, d11, d12, d13, d14, d15;
-      mlib_d64 d20, d21, d22, d23, d24, d25;
-      mlib_d64 d0, d1, d2, d3, d4, d5;
-      mlib_d64 s00 = buff0[i];
-      mlib_d64 s01 = buff0[i + 1];
-      mlib_d64 s02 = buff0[i + 2];
-      mlib_d64 s10 = buff1[i];
-      mlib_d64 s11 = buff1[i + 1];
-      mlib_d64 s12 = buff1[i + 2];
-      mlib_d64 s20 = buff2[i];
-      mlib_d64 s21 = buff2[i + 1];
-      mlib_d64 s22 = buff2[i + 2];
-
-      d00 = vis_fmul8x16au(vis_read_hi(s00), k0);
-      d01 = vis_fmul8x16au(vis_read_lo(s00), k0);
-      d02 = vis_fmul8x16au(vis_read_hi(s01), k0);
-      d03 = vis_fmul8x16au(vis_read_lo(s01), k0);
-      d04 = vis_fmul8x16au(vis_read_hi(s02), k0);
-      d05 = vis_fmul8x16au(vis_read_lo(s02), k0);
-      d10 = vis_fmul8x16au(vis_read_hi(s10), k1);
-      d11 = vis_fmul8x16au(vis_read_lo(s10), k1);
-      d12 = vis_fmul8x16au(vis_read_hi(s11), k1);
-      d13 = vis_fmul8x16au(vis_read_lo(s11), k1);
-      d14 = vis_fmul8x16au(vis_read_hi(s12), k1);
-      d15 = vis_fmul8x16au(vis_read_lo(s12), k1);
-      d20 = vis_fmul8x16au(vis_read_hi(s20), k2);
-      d21 = vis_fmul8x16au(vis_read_lo(s20), k2);
-      d22 = vis_fmul8x16au(vis_read_hi(s21), k2);
-      d23 = vis_fmul8x16au(vis_read_lo(s21), k2);
-      d24 = vis_fmul8x16au(vis_read_hi(s22), k2);
-      d25 = vis_fmul8x16au(vis_read_lo(s22), k2);
-
-      d0 = buffd[2*i];
-      d1 = buffd[2*i + 1];
-      d2 = buffd[2*i + 2];
-      d3 = buffd[2*i + 3];
-      d4 = buffd[2*i + 4];
-      d5 = buffd[2*i + 5];
-      d0 = vis_fpadd16(d0, d00);
-      d0 = vis_fpadd16(d0, d10);
-      d0 = vis_fpadd16(d0, d20);
-      d1 = vis_fpadd16(d1, d01);
-      d1 = vis_fpadd16(d1, d11);
-      d1 = vis_fpadd16(d1, d21);
-      d2 = vis_fpadd16(d2, d02);
-      d2 = vis_fpadd16(d2, d12);
-      d2 = vis_fpadd16(d2, d22);
-      d3 = vis_fpadd16(d3, d03);
-      d3 = vis_fpadd16(d3, d13);
-      d3 = vis_fpadd16(d3, d23);
-      d4 = vis_fpadd16(d4, d04);
-      d4 = vis_fpadd16(d4, d14);
-      d4 = vis_fpadd16(d4, d24);
-      d5 = vis_fpadd16(d5, d05);
-      d5 = vis_fpadd16(d5, d15);
-      d5 = vis_fpadd16(d5, d25);
-
-      buffe[i    ] = vis_fpack16_pair(d0, d1);
-      buffe[i + 1] = vis_fpack16_pair(d2, d3);
-      buffe[i + 2] = vis_fpack16_pair(d4, d5);
-
-      buffd[2*i    ] = drnd;
-      buffd[2*i + 1] = drnd;
-      buffd[2*i + 2] = drnd;
-      buffd[2*i + 3] = drnd;
-      buffd[2*i + 4] = drnd;
-      buffd[2*i + 5] = drnd;
-
-      LOAD_SRC();
-    }
-
-    mlib_ImageColorTrue2IndexLine_U8_U8_3((void*)buffe, dl, wid, colormap);
-#endif /* CONV_INDEX */
-
-    sl += sll;
-    dl += dll;
-
-    buff_ind++;
-    if (buff_ind >= (KSIZE + 1)) buff_ind = 0;
-  }
-
-  mlib_free(pbuff);
-
-  return MLIB_SUCCESS;
-}
-
-/***************************************************************/
-
-#undef  KSIZE
-#define MAX_N   11
-
-#ifdef CONV_INDEX
-
-mlib_status mlib_convMxN_Index3_8_8nw(mlib_image *dst,
-                                      mlib_image *src,
-                                      mlib_s32   m,
-                                      mlib_s32   n,
-                                      mlib_s32   dm,
-                                      mlib_s32   dn,
-                                      mlib_s32   *kern,
-                                      mlib_s32   scale,
-                                      void       *colormap)
-
-#else
-
-mlib_status mlib_convMxN_8nw_f(mlib_image *dst,
-                               mlib_image *src,
-                               mlib_s32   m,
-                               mlib_s32   n,
-                               mlib_s32   dm,
-                               mlib_s32   dn,
-                               mlib_s32   *kern,
-                               mlib_s32   scale)
-
-#endif
-{
-  mlib_d64 *buffs_local[3*(MAX_N + 1)], **buffs = buffs_local, **buff;
-  mlib_d64 *buff0, *buff1, *buff2, *buff3, *buffn, *buffd, *buffe;
-  mlib_d64 s00, s01, s10, s11, s20, s21, s30, s31, s0, s1, s2, s3;
-  mlib_d64 d00, d01, d10, d11, d20, d21, d30, d31;
-  mlib_d64 dd, d0, d1;
-  mlib_s32 ik, jk, ik_last, jk_size, coff, off, doff;
-  DEF_VARS;
-  DEF_EXTRA_VARS;
-
-  if (n > MAX_N) {
-    buffs = mlib_malloc(3*(n + 1)*sizeof(mlib_d64*));
-    if (buffs == NULL) return MLIB_FAILURE;
-  }
-
-  buff = buffs + 2*(n + 1);
-
-  sl = adr_src;
-#ifdef CONV_INDEX
-  dl = adr_dst + dn*dll + dm;
-#else
-  dl = adr_dst + dn*dll + dm*NCHAN;
-#endif
-
-  ssize = NCHAN*wid;
-  dsize = (ssize + 7)/8;
-  esize = dsize + 4;
-  pbuff = mlib_malloc((n + 4)*esize*sizeof(mlib_d64));
-  if (pbuff == NULL) {
-    if (buffs != buffs_local) mlib_free(buffs);
-    return MLIB_FAILURE;
-  }
-
-  for (i = 0; i < (n + 1); i++) buffs[i] = pbuff + i*esize;
-  for (i = 0; i < (n + 1); i++) buffs[(n + 1) + i] = buffs[i];
-  buffd = buffs[n] + esize;
-  buffe = buffd + 2*esize;
-
-  wid -= (m - 1);
-  hgt -= (n - 1);
-  xsize = ssize - NCHAN*(m - 1);
-  emask = (0xFF00 >> (xsize & 7)) & 0xFF;
-
-  vis_write_gsr(gsr_scale + 7);
-
-  for (l = 0; l < n; l++) {
-    mlib_d64 *buffn = buffs[l];
-    sp = sl + l*sll;
-
-#ifndef CONV_INDEX
-    if ((mlib_addr)sp & 7) mlib_ImageCopy_na((void*)sp, (void*)buffn, ssize);
-#else
-#pragma pipeloop(0)
-    for (i = 0; i < dsize; i += 3) {
-      LOAD_SRC();
-    }
-#endif /* CONV_INDEX */
-  }
-
-  /* init buffer */
-#pragma pipeloop(0)
-  for (i = 0; i < (xsize + 7)/8; i++) {
-    buffd[2*i    ] = drnd;
-    buffd[2*i + 1] = drnd;
-  }
-
-  for (j = 0; j < hgt; j++) {
-    mlib_d64 **buffc = buffs + buff_ind;
-    mlib_f32 *pk = karr, k0, k1, k2, k3;
-    sp = sl + n*sll;
-
-    for (l = 0; l < n; l++) {
-      buff[l] = buffc[l];
-    }
-    buffn  = buffc[n];
-
-#ifndef CONV_INDEX
-    for (l = 0; l < n; l++) {
-      if ((((mlib_addr)(sl + l*sll)) & 7) == 0) buff[l] = (mlib_d64*)(sl + l*sll);
-    }
-    if ((mlib_addr)sp & 7) mlib_ImageCopy_na((void*)sp, (void*)buffn, ssize);
-#endif
-
-#ifdef CONV_INDEX
-    ik_last = 0;
-#else
-    ik_last = (m - 1);
-#endif
-
-    for (jk = 0; jk < n; jk += jk_size) {
-      jk_size = n - jk;
-#ifdef CONV_INDEX
-      if (jk_size >= 5) jk_size = 3;
-      if (jk_size == 4) jk_size = 2;
-#else
-      if (jk_size >= 6) jk_size = 4;
-      if (jk_size == 5) jk_size = 3;
-#endif
-      coff = 0;
-
-      if (jk_size == 2) {
-
-        for (ik = 0; ik < m; ik++, coff += NCHAN) {
-          if (!jk && ik == ik_last) continue;
-
-          k0 = pk[ik];
-          k1 = pk[ik + m];
-
-          doff  = coff/8;
-          buff0 = buff[jk    ] + doff;
-          buff1 = buff[jk + 1] + doff;
-
-          off = coff & 7;
-          vis_write_gsr(gsr_scale + off);
-
-          s01 = buff0[0];
-          s11 = buff1[0];
-#pragma pipeloop(0)
-          for (i = 0; i < (xsize + 7)/8; i++) {
-            s00 = s01;
-            s10 = s11;
-            s01 = buff0[i + 1];
-            s11 = buff1[i + 1];
-            s0  = vis_faligndata(s00, s01);
-            s1  = vis_faligndata(s10, s11);
-
-            d00 = vis_fmul8x16au(vis_read_hi(s0), k0);
-            d01 = vis_fmul8x16au(vis_read_lo(s0), k0);
-            d10 = vis_fmul8x16au(vis_read_hi(s1), k1);
-            d11 = vis_fmul8x16au(vis_read_lo(s1), k1);
-
-            d0 = buffd[2*i];
-            d1 = buffd[2*i + 1];
-            d0 = vis_fpadd16(d00, d0);
-            d0 = vis_fpadd16(d10, d0);
-            d1 = vis_fpadd16(d01, d1);
-            d1 = vis_fpadd16(d11, d1);
-            buffd[2*i] = d0;
-            buffd[2*i + 1] = d1;
-          }
-
-        }
-
-        pk += 2*m;
-
-      } else if (jk_size == 3) {
-
-        for (ik = 0; ik < m; ik++, coff += NCHAN) {
-          if (!jk && ik == ik_last) continue;
-
-          k0 = pk[ik];
-          k1 = pk[ik + m];
-          k2 = pk[ik + 2*m];
-
-          doff  = coff/8;
-          buff0 = buff[jk    ] + doff;
-          buff1 = buff[jk + 1] + doff;
-          buff2 = buff[jk + 2] + doff;
-
-          off = coff & 7;
-          vis_write_gsr(gsr_scale + off);
-
-          if (off == 0) {
-#pragma pipeloop(0)
-            for (i = 0; i < (xsize + 7)/8; i++) {
-              d0 = buffd[2*i];
-              d1 = buffd[2*i + 1];
-
-              s0 = buff0[i];
-              s1 = buff1[i];
-              s2 = buff2[i];
-
-              d00 = vis_fmul8x16au(vis_read_hi(s0), k0);
-              d01 = vis_fmul8x16au(vis_read_lo(s0), k0);
-              d10 = vis_fmul8x16au(vis_read_hi(s1), k1);
-              d11 = vis_fmul8x16au(vis_read_lo(s1), k1);
-              d20 = vis_fmul8x16au(vis_read_hi(s2), k2);
-              d21 = vis_fmul8x16au(vis_read_lo(s2), k2);
-
-              d00 = vis_fpadd16(d00, d10);
-              d0  = vis_fpadd16(d20, d0);
-              d0  = vis_fpadd16(d00, d0);
-              d01 = vis_fpadd16(d01, d11);
-              d1  = vis_fpadd16(d21, d1);
-              d1  = vis_fpadd16(d01, d1);
-              buffd[2*i] = d0;
-              buffd[2*i + 1] = d1;
-            }
-
-          } else if (off == 4) {
-            s01 = buff0[0];
-            s11 = buff1[0];
-            s21 = buff2[0];
-#pragma pipeloop(0)
-            for (i = 0; i < (xsize + 7)/8; i++) {
-              d0 = buffd[2*i];
-              d1 = buffd[2*i + 1];
-
-              s00 = s01;
-              s10 = s11;
-              s20 = s21;
-              s01 = buff0[i + 1];
-              s11 = buff1[i + 1];
-              s21 = buff2[i + 1];
-
-              d00 = vis_fmul8x16au(vis_read_lo(s00), k0);
-              d01 = vis_fmul8x16au(vis_read_hi(s01), k0);
-              d10 = vis_fmul8x16au(vis_read_lo(s10), k1);
-              d11 = vis_fmul8x16au(vis_read_hi(s11), k1);
-              d20 = vis_fmul8x16au(vis_read_lo(s20), k2);
-              d21 = vis_fmul8x16au(vis_read_hi(s21), k2);
-
-              d00 = vis_fpadd16(d00, d10);
-              d0  = vis_fpadd16(d20, d0);
-              d0  = vis_fpadd16(d00, d0);
-              d01 = vis_fpadd16(d01, d11);
-              d1  = vis_fpadd16(d21, d1);
-              d1  = vis_fpadd16(d01, d1);
-              buffd[2*i] = d0;
-              buffd[2*i + 1] = d1;
-            }
-
-          } else {
-            s01 = buff0[0];
-            s11 = buff1[0];
-            s21 = buff2[0];
-#pragma pipeloop(0)
-            for (i = 0; i < (xsize + 7)/8; i++) {
-              d0 = buffd[2*i];
-              d1 = buffd[2*i + 1];
-
-              s00 = s01;
-              s10 = s11;
-              s20 = s21;
-              s01 = buff0[i + 1];
-              s11 = buff1[i + 1];
-              s21 = buff2[i + 1];
-              s0  = vis_faligndata(s00, s01);
-              s1  = vis_faligndata(s10, s11);
-              s2  = vis_faligndata(s20, s21);
-
-              d00 = vis_fmul8x16au(vis_read_hi(s0), k0);
-              d01 = vis_fmul8x16au(vis_read_lo(s0), k0);
-              d10 = vis_fmul8x16au(vis_read_hi(s1), k1);
-              d11 = vis_fmul8x16au(vis_read_lo(s1), k1);
-              d20 = vis_fmul8x16au(vis_read_hi(s2), k2);
-              d21 = vis_fmul8x16au(vis_read_lo(s2), k2);
-
-              d00 = vis_fpadd16(d00, d10);
-              d0  = vis_fpadd16(d20, d0);
-              d0  = vis_fpadd16(d00, d0);
-              d01 = vis_fpadd16(d01, d11);
-              d1  = vis_fpadd16(d21, d1);
-              d1  = vis_fpadd16(d01, d1);
-              buffd[2*i] = d0;
-              buffd[2*i + 1] = d1;
-            }
-          }
-        }
-
-        pk += 3*m;
-
-      } else { /* jk_size == 4 */
-
-        for (ik = 0; ik < m; ik++, coff += NCHAN) {
-          if (!jk && ik == ik_last) continue;
-
-          k0 = pk[ik];
-          k1 = pk[ik + m];
-          k2 = pk[ik + 2*m];
-          k3 = pk[ik + 3*m];
-
-          doff  = coff/8;
-          buff0 = buff[jk    ] + doff;
-          buff1 = buff[jk + 1] + doff;
-          buff2 = buff[jk + 2] + doff;
-          buff3 = buff[jk + 3] + doff;
-
-          off = coff & 7;
-          vis_write_gsr(gsr_scale + off);
-
-          if (off == 0) {
-
-#pragma pipeloop(0)
-            for (i = 0; i < (xsize + 7)/8; i++) {
-              d0 = buffd[2*i];
-              d1 = buffd[2*i + 1];
-
-              s0 = buff0[i];
-              s1 = buff1[i];
-              s2 = buff2[i];
-              s3 = buff3[i];
-
-              d00 = vis_fmul8x16au(vis_read_hi(s0), k0);
-              d01 = vis_fmul8x16au(vis_read_lo(s0), k0);
-              d10 = vis_fmul8x16au(vis_read_hi(s1), k1);
-              d11 = vis_fmul8x16au(vis_read_lo(s1), k1);
-              d20 = vis_fmul8x16au(vis_read_hi(s2), k2);
-              d21 = vis_fmul8x16au(vis_read_lo(s2), k2);
-              d30 = vis_fmul8x16au(vis_read_hi(s3), k3);
-              d31 = vis_fmul8x16au(vis_read_lo(s3), k3);
-
-              d00 = vis_fpadd16(d00, d10);
-              d20 = vis_fpadd16(d20, d30);
-              d0  = vis_fpadd16(d0,  d00);
-              d0  = vis_fpadd16(d0,  d20);
-              d01 = vis_fpadd16(d01, d11);
-              d21 = vis_fpadd16(d21, d31);
-              d1  = vis_fpadd16(d1,  d01);
-              d1  = vis_fpadd16(d1,  d21);
-              buffd[2*i] = d0;
-              buffd[2*i + 1] = d1;
-            }
-
-          } else if (off == 4) {
-
-            s01 = buff0[0];
-            s11 = buff1[0];
-            s21 = buff2[0];
-            s31 = buff3[0];
-#pragma pipeloop(0)
-            for (i = 0; i < (xsize + 7)/8; i++) {
-              d0 = buffd[2*i];
-              d1 = buffd[2*i + 1];
-
-              s00 = s01;
-              s10 = s11;
-              s20 = s21;
-              s30 = s31;
-              s01 = buff0[i + 1];
-              s11 = buff1[i + 1];
-              s21 = buff2[i + 1];
-              s31 = buff3[i + 1];
-
-              d00 = vis_fmul8x16au(vis_read_lo(s00), k0);
-              d01 = vis_fmul8x16au(vis_read_hi(s01), k0);
-              d10 = vis_fmul8x16au(vis_read_lo(s10), k1);
-              d11 = vis_fmul8x16au(vis_read_hi(s11), k1);
-              d20 = vis_fmul8x16au(vis_read_lo(s20), k2);
-              d21 = vis_fmul8x16au(vis_read_hi(s21), k2);
-              d30 = vis_fmul8x16au(vis_read_lo(s30), k3);
-              d31 = vis_fmul8x16au(vis_read_hi(s31), k3);
-
-              d00 = vis_fpadd16(d00, d10);
-              d20 = vis_fpadd16(d20, d30);
-              d0  = vis_fpadd16(d0,  d00);
-              d0  = vis_fpadd16(d0,  d20);
-              d01 = vis_fpadd16(d01, d11);
-              d21 = vis_fpadd16(d21, d31);
-              d1  = vis_fpadd16(d1,  d01);
-              d1  = vis_fpadd16(d1,  d21);
-              buffd[2*i] = d0;
-              buffd[2*i + 1] = d1;
-            }
-
-          } else {
-
-            s01 = buff0[0];
-            s11 = buff1[0];
-            s21 = buff2[0];
-            s31 = buff3[0];
-#pragma pipeloop(0)
-            for (i = 0; i < (xsize + 7)/8; i++) {
-              d0 = buffd[2*i];
-              d1 = buffd[2*i + 1];
-
-              s00 = s01;
-              s10 = s11;
-              s20 = s21;
-              s30 = s31;
-              s01 = buff0[i + 1];
-              s11 = buff1[i + 1];
-              s21 = buff2[i + 1];
-              s31 = buff3[i + 1];
-              s0  = vis_faligndata(s00, s01);
-              s1  = vis_faligndata(s10, s11);
-              s2  = vis_faligndata(s20, s21);
-              s3  = vis_faligndata(s30, s31);
-
-              d00 = vis_fmul8x16au(vis_read_hi(s0), k0);
-              d01 = vis_fmul8x16au(vis_read_lo(s0), k0);
-              d10 = vis_fmul8x16au(vis_read_hi(s1), k1);
-              d11 = vis_fmul8x16au(vis_read_lo(s1), k1);
-              d20 = vis_fmul8x16au(vis_read_hi(s2), k2);
-              d21 = vis_fmul8x16au(vis_read_lo(s2), k2);
-              d30 = vis_fmul8x16au(vis_read_hi(s3), k3);
-              d31 = vis_fmul8x16au(vis_read_lo(s3), k3);
-
-              d00 = vis_fpadd16(d00, d10);
-              d20 = vis_fpadd16(d20, d30);
-              d0  = vis_fpadd16(d0,  d00);
-              d0  = vis_fpadd16(d0,  d20);
-              d01 = vis_fpadd16(d01, d11);
-              d21 = vis_fpadd16(d21, d31);
-              d1  = vis_fpadd16(d1,  d01);
-              d1  = vis_fpadd16(d1,  d21);
-              buffd[2*i] = d0;
-              buffd[2*i + 1] = d1;
-            }
-          }
-        }
-
-        pk += 4*m;
-      }
-    }
-
-    /*****************************************
-     *****************************************
-     **          Final iteration            **
-     *****************************************
-     *****************************************/
-
-    jk_size = n;
-#ifdef CONV_INDEX
-    if (jk_size >= 5) jk_size = 3;
-    if (jk_size == 4) jk_size = 2;
-#else
-    if (jk_size >= 6) jk_size = 4;
-    if (jk_size == 5) jk_size = 3;
-#endif
-
-    k0 = karr[ik_last];
-    k1 = karr[ik_last + m];
-    k2 = karr[ik_last + 2*m];
-    k3 = karr[ik_last + 3*m];
-
-    off  = ik_last*NCHAN;
-    doff = off/8;
-    off &= 7;
-    buff0 = buff[0] + doff;
-    buff1 = buff[1] + doff;
-    buff2 = buff[2] + doff;
-    buff3 = buff[3] + doff;
-    vis_write_gsr(gsr_scale + off);
-
-#ifndef CONV_INDEX
-    if (jk_size == 2) {
-      dp = ((mlib_addr)dl & 7) ? buffe : (mlib_d64*)dl;
-
-      s01 = buff0[0];
-      s11 = buff1[0];
-#pragma pipeloop(0)
-      for (i = 0; i < xsize/8; i++) {
-        s00 = s01;
-        s10 = s11;
-        s01 = buff0[i + 1];
-        s11 = buff1[i + 1];
-        s0  = vis_faligndata(s00, s01);
-        s1  = vis_faligndata(s10, s11);
-
-        d00 = vis_fmul8x16au(vis_read_hi(s0), k0);
-        d01 = vis_fmul8x16au(vis_read_lo(s0), k0);
-        d10 = vis_fmul8x16au(vis_read_hi(s1), k1);
-        d11 = vis_fmul8x16au(vis_read_lo(s1), k1);
-
-        d0 = buffd[2*i];
-        d1 = buffd[2*i + 1];
-        d0 = vis_fpadd16(d0, d00);
-        d0 = vis_fpadd16(d0, d10);
-        d1 = vis_fpadd16(d1, d01);
-        d1 = vis_fpadd16(d1, d11);
-
-        dd = vis_fpack16_pair(d0, d1);
-        dp[i] = dd;
-
-        buffd[2*i    ] = drnd;
-        buffd[2*i + 1] = drnd;
-      }
-
-      if (emask) {
-        s00 = s01;
-        s10 = s11;
-        s01 = buff0[i + 1];
-        s11 = buff1[i + 1];
-        s0  = vis_faligndata(s00, s01);
-        s1  = vis_faligndata(s10, s11);
-
-        d00 = vis_fmul8x16au(vis_read_hi(s0), k0);
-        d01 = vis_fmul8x16au(vis_read_lo(s0), k0);
-        d10 = vis_fmul8x16au(vis_read_hi(s1), k1);
-        d11 = vis_fmul8x16au(vis_read_lo(s1), k1);
-
-        d0 = buffd[2*i];
-        d1 = buffd[2*i + 1];
-        d0 = vis_fpadd16(d0, d00);
-        d0 = vis_fpadd16(d0, d10);
-        d1 = vis_fpadd16(d1, d01);
-        d1 = vis_fpadd16(d1, d11);
-
-        dd = vis_fpack16_pair(d0, d1);
-        vis_pst_8(dd, dp + i, emask);
-
-        buffd[2*i    ] = drnd;
-        buffd[2*i + 1] = drnd;
-      }
-
-      if ((mlib_u8*)dp != dl) mlib_ImageCopy_na((void*)buffe, dl, xsize);
-
-    } else if (jk_size == 3) {
-
-      dp = ((mlib_addr)dl & 7) ? buffe : (mlib_d64*)dl;
-
-      s01 = buff0[0];
-      s11 = buff1[0];
-      s21 = buff2[0];
-#pragma pipeloop(0)
-      for (i = 0; i < xsize/8; i++) {
-        s00 = s01;
-        s10 = s11;
-        s20 = s21;
-        s01 = buff0[i + 1];
-        s11 = buff1[i + 1];
-        s21 = buff2[i + 1];
-        s0  = vis_faligndata(s00, s01);
-        s1  = vis_faligndata(s10, s11);
-        s2  = vis_faligndata(s20, s21);
-
-        d00 = vis_fmul8x16au(vis_read_hi(s0), k0);
-        d01 = vis_fmul8x16au(vis_read_lo(s0), k0);
-        d10 = vis_fmul8x16au(vis_read_hi(s1), k1);
-        d11 = vis_fmul8x16au(vis_read_lo(s1), k1);
-        d20 = vis_fmul8x16au(vis_read_hi(s2), k2);
-        d21 = vis_fmul8x16au(vis_read_lo(s2), k2);
-
-        d0 = buffd[2*i];
-        d1 = buffd[2*i + 1];
-        d0 = vis_fpadd16(d0, d00);
-        d0 = vis_fpadd16(d0, d10);
-        d0 = vis_fpadd16(d0, d20);
-        d1 = vis_fpadd16(d1, d01);
-        d1 = vis_fpadd16(d1, d11);
-        d1 = vis_fpadd16(d1, d21);
-
-        dd = vis_fpack16_pair(d0, d1);
-        dp[i] = dd;
-
-        buffd[2*i    ] = drnd;
-        buffd[2*i + 1] = drnd;
-      }
-
-      if (emask) {
-        s00 = s01;
-        s10 = s11;
-        s20 = s21;
-        s01 = buff0[i + 1];
-        s11 = buff1[i + 1];
-        s21 = buff2[i + 1];
-        s0  = vis_faligndata(s00, s01);
-        s1  = vis_faligndata(s10, s11);
-        s2  = vis_faligndata(s20, s21);
-
-        d00 = vis_fmul8x16au(vis_read_hi(s0), k0);
-        d01 = vis_fmul8x16au(vis_read_lo(s0), k0);
-        d10 = vis_fmul8x16au(vis_read_hi(s1), k1);
-        d11 = vis_fmul8x16au(vis_read_lo(s1), k1);
-        d20 = vis_fmul8x16au(vis_read_hi(s2), k2);
-        d21 = vis_fmul8x16au(vis_read_lo(s2), k2);
-
-        d0 = buffd[2*i];
-        d1 = buffd[2*i + 1];
-        d0 = vis_fpadd16(d0, d00);
-        d0 = vis_fpadd16(d0, d10);
-        d0 = vis_fpadd16(d0, d20);
-        d1 = vis_fpadd16(d1, d01);
-        d1 = vis_fpadd16(d1, d11);
-        d1 = vis_fpadd16(d1, d21);
-
-        dd = vis_fpack16_pair(d0, d1);
-        vis_pst_8(dd, dp + i, emask);
-
-        buffd[2*i    ] = drnd;
-        buffd[2*i + 1] = drnd;
-      }
-
-      if ((mlib_u8*)dp != dl) mlib_ImageCopy_na((void*)buffe, dl, xsize);
-
-    } else /* if (jk_size == 4) */ {
-
-      dp = ((mlib_addr)dl & 7) ? buffe : (mlib_d64*)dl;
-
-      s01 = buff0[0];
-      s11 = buff1[0];
-      s21 = buff2[0];
-      s31 = buff3[0];
-#pragma pipeloop(0)
-      for (i = 0; i < xsize/8; i++) {
-        s00 = s01;
-        s10 = s11;
-        s20 = s21;
-        s30 = s31;
-        s01 = buff0[i + 1];
-        s11 = buff1[i + 1];
-        s21 = buff2[i + 1];
-        s31 = buff3[i + 1];
-        s0  = vis_faligndata(s00, s01);
-        s1  = vis_faligndata(s10, s11);
-        s2  = vis_faligndata(s20, s21);
-        s3  = vis_faligndata(s30, s31);
-
-        d00 = vis_fmul8x16au(vis_read_hi(s0), k0);
-        d01 = vis_fmul8x16au(vis_read_lo(s0), k0);
-        d10 = vis_fmul8x16au(vis_read_hi(s1), k1);
-        d11 = vis_fmul8x16au(vis_read_lo(s1), k1);
-        d20 = vis_fmul8x16au(vis_read_hi(s2), k2);
-        d21 = vis_fmul8x16au(vis_read_lo(s2), k2);
-        d30 = vis_fmul8x16au(vis_read_hi(s3), k3);
-        d31 = vis_fmul8x16au(vis_read_lo(s3), k3);
-
-        d0 = buffd[2*i];
-        d1 = buffd[2*i + 1];
-        d0 = vis_fpadd16(d0, d00);
-        d0 = vis_fpadd16(d0, d10);
-        d0 = vis_fpadd16(d0, d20);
-        d0 = vis_fpadd16(d0, d30);
-        d1 = vis_fpadd16(d1, d01);
-        d1 = vis_fpadd16(d1, d11);
-        d1 = vis_fpadd16(d1, d21);
-        d1 = vis_fpadd16(d1, d31);
-
-        dd = vis_fpack16_pair(d0, d1);
-        dp[i] = dd;
-
-        buffd[2*i    ] = drnd;
-        buffd[2*i + 1] = drnd;
-      }
-
-      if (emask) {
-        s00 = s01;
-        s10 = s11;
-        s20 = s21;
-        s30 = s31;
-        s01 = buff0[i + 1];
-        s11 = buff1[i + 1];
-        s21 = buff2[i + 1];
-        s31 = buff3[i + 1];
-        s0  = vis_faligndata(s00, s01);
-        s1  = vis_faligndata(s10, s11);
-        s2  = vis_faligndata(s20, s21);
-        s3  = vis_faligndata(s30, s31);
-
-        d00 = vis_fmul8x16au(vis_read_hi(s0), k0);
-        d01 = vis_fmul8x16au(vis_read_lo(s0), k0);
-        d10 = vis_fmul8x16au(vis_read_hi(s1), k1);
-        d11 = vis_fmul8x16au(vis_read_lo(s1), k1);
-        d20 = vis_fmul8x16au(vis_read_hi(s2), k2);
-        d21 = vis_fmul8x16au(vis_read_lo(s2), k2);
-        d30 = vis_fmul8x16au(vis_read_hi(s3), k3);
-        d31 = vis_fmul8x16au(vis_read_lo(s3), k3);
-
-        d0 = buffd[2*i];
-        d1 = buffd[2*i + 1];
-        d0 = vis_fpadd16(d0, d00);
-        d0 = vis_fpadd16(d0, d10);
-        d0 = vis_fpadd16(d0, d20);
-        d0 = vis_fpadd16(d0, d30);
-        d1 = vis_fpadd16(d1, d01);
-        d1 = vis_fpadd16(d1, d11);
-        d1 = vis_fpadd16(d1, d21);
-        d1 = vis_fpadd16(d1, d31);
-
-        dd = vis_fpack16_pair(d0, d1);
-        vis_pst_8(dd, dp + i, emask);
-
-        buffd[2*i    ] = drnd;
-        buffd[2*i + 1] = drnd;
-      }
-
-      if ((mlib_u8*)dp != dl) mlib_ImageCopy_na((void*)buffe, dl, xsize);
-    }
-
-#else /* CONV_INDEX */
-
-    if (jk_size == 2) {
-      vis_write_gsr(gsr_scale + 7);
-
-#pragma pipeloop(0)
-      for (i = 0; i < dsize; i += 3) {
-        mlib_d64 d00, d01, d02, d03, d04, d05;
-        mlib_d64 d10, d11, d12, d13, d14, d15;
-        mlib_d64 d0, d1, d2, d3, d4, d5;
-        mlib_d64 s00 = buff0[i];
-        mlib_d64 s01 = buff0[i + 1];
-        mlib_d64 s02 = buff0[i + 2];
-        mlib_d64 s10 = buff1[i];
-        mlib_d64 s11 = buff1[i + 1];
-        mlib_d64 s12 = buff1[i + 2];
-
-        d00 = vis_fmul8x16au(vis_read_hi(s00), k0);
-        d01 = vis_fmul8x16au(vis_read_lo(s00), k0);
-        d02 = vis_fmul8x16au(vis_read_hi(s01), k0);
-        d03 = vis_fmul8x16au(vis_read_lo(s01), k0);
-        d04 = vis_fmul8x16au(vis_read_hi(s02), k0);
-        d05 = vis_fmul8x16au(vis_read_lo(s02), k0);
-        d10 = vis_fmul8x16au(vis_read_hi(s10), k1);
-        d11 = vis_fmul8x16au(vis_read_lo(s10), k1);
-        d12 = vis_fmul8x16au(vis_read_hi(s11), k1);
-        d13 = vis_fmul8x16au(vis_read_lo(s11), k1);
-        d14 = vis_fmul8x16au(vis_read_hi(s12), k1);
-        d15 = vis_fmul8x16au(vis_read_lo(s12), k1);
-
-        d0 = buffd[2*i];
-        d1 = buffd[2*i + 1];
-        d2 = buffd[2*i + 2];
-        d3 = buffd[2*i + 3];
-        d4 = buffd[2*i + 4];
-        d5 = buffd[2*i + 5];
-        d0 = vis_fpadd16(d0, d00);
-        d0 = vis_fpadd16(d0, d10);
-        d1 = vis_fpadd16(d1, d01);
-        d1 = vis_fpadd16(d1, d11);
-        d2 = vis_fpadd16(d2, d02);
-        d2 = vis_fpadd16(d2, d12);
-        d3 = vis_fpadd16(d3, d03);
-        d3 = vis_fpadd16(d3, d13);
-        d4 = vis_fpadd16(d4, d04);
-        d4 = vis_fpadd16(d4, d14);
-        d5 = vis_fpadd16(d5, d05);
-        d5 = vis_fpadd16(d5, d15);
-
-        buffe[i    ] = vis_fpack16_pair(d0, d1);
-        buffe[i + 1] = vis_fpack16_pair(d2, d3);
-        buffe[i + 2] = vis_fpack16_pair(d4, d5);
-
-        buffd[2*i    ] = drnd;
-        buffd[2*i + 1] = drnd;
-        buffd[2*i + 2] = drnd;
-        buffd[2*i + 3] = drnd;
-        buffd[2*i + 4] = drnd;
-        buffd[2*i + 5] = drnd;
-
-        LOAD_SRC();
-      }
-
-    } else /* if (jk_size == 3) */ {
-      vis_write_gsr(gsr_scale + 7);
-
-#pragma pipeloop(0)
-      for (i = 0; i < dsize; i += 3) {
-        mlib_d64 d00, d01, d02, d03, d04, d05;
-        mlib_d64 d10, d11, d12, d13, d14, d15;
-        mlib_d64 d20, d21, d22, d23, d24, d25;
-        mlib_d64 d0, d1, d2, d3, d4, d5;
-        mlib_d64 s00 = buff0[i];
-        mlib_d64 s01 = buff0[i + 1];
-        mlib_d64 s02 = buff0[i + 2];
-        mlib_d64 s10 = buff1[i];
-        mlib_d64 s11 = buff1[i + 1];
-        mlib_d64 s12 = buff1[i + 2];
-        mlib_d64 s20 = buff2[i];
-        mlib_d64 s21 = buff2[i + 1];
-        mlib_d64 s22 = buff2[i + 2];
-
-        d00 = vis_fmul8x16au(vis_read_hi(s00), k0);
-        d01 = vis_fmul8x16au(vis_read_lo(s00), k0);
-        d02 = vis_fmul8x16au(vis_read_hi(s01), k0);
-        d03 = vis_fmul8x16au(vis_read_lo(s01), k0);
-        d04 = vis_fmul8x16au(vis_read_hi(s02), k0);
-        d05 = vis_fmul8x16au(vis_read_lo(s02), k0);
-        d10 = vis_fmul8x16au(vis_read_hi(s10), k1);
-        d11 = vis_fmul8x16au(vis_read_lo(s10), k1);
-        d12 = vis_fmul8x16au(vis_read_hi(s11), k1);
-        d13 = vis_fmul8x16au(vis_read_lo(s11), k1);
-        d14 = vis_fmul8x16au(vis_read_hi(s12), k1);
-        d15 = vis_fmul8x16au(vis_read_lo(s12), k1);
-        d20 = vis_fmul8x16au(vis_read_hi(s20), k2);
-        d21 = vis_fmul8x16au(vis_read_lo(s20), k2);
-        d22 = vis_fmul8x16au(vis_read_hi(s21), k2);
-        d23 = vis_fmul8x16au(vis_read_lo(s21), k2);
-        d24 = vis_fmul8x16au(vis_read_hi(s22), k2);
-        d25 = vis_fmul8x16au(vis_read_lo(s22), k2);
-
-        d0 = buffd[2*i];
-        d1 = buffd[2*i + 1];
-        d2 = buffd[2*i + 2];
-        d3 = buffd[2*i + 3];
-        d4 = buffd[2*i + 4];
-        d5 = buffd[2*i + 5];
-        d0 = vis_fpadd16(d0, d00);
-        d0 = vis_fpadd16(d0, d10);
-        d0 = vis_fpadd16(d0, d20);
-        d1 = vis_fpadd16(d1, d01);
-        d1 = vis_fpadd16(d1, d11);
-        d1 = vis_fpadd16(d1, d21);
-        d2 = vis_fpadd16(d2, d02);
-        d2 = vis_fpadd16(d2, d12);
-        d2 = vis_fpadd16(d2, d22);
-        d3 = vis_fpadd16(d3, d03);
-        d3 = vis_fpadd16(d3, d13);
-        d3 = vis_fpadd16(d3, d23);
-        d4 = vis_fpadd16(d4, d04);
-        d4 = vis_fpadd16(d4, d14);
-        d4 = vis_fpadd16(d4, d24);
-        d5 = vis_fpadd16(d5, d05);
-        d5 = vis_fpadd16(d5, d15);
-        d5 = vis_fpadd16(d5, d25);
-
-        buffe[i    ] = vis_fpack16_pair(d0, d1);
-        buffe[i + 1] = vis_fpack16_pair(d2, d3);
-        buffe[i + 2] = vis_fpack16_pair(d4, d5);
-
-        buffd[2*i    ] = drnd;
-        buffd[2*i + 1] = drnd;
-        buffd[2*i + 2] = drnd;
-        buffd[2*i + 3] = drnd;
-        buffd[2*i + 4] = drnd;
-        buffd[2*i + 5] = drnd;
-
-        LOAD_SRC();
-      }
-    }
-#endif /* CONV_INDEX */
-
-#ifdef CONV_INDEX
-    mlib_ImageColorTrue2IndexLine_U8_U8_3((void*)buffe, dl, wid, colormap);
-#endif /* CONV_INDEX */
-
-    sl += sll;
-    dl += dll;
-
-    buff_ind++;
-    if (buff_ind >= (n + 1)) buff_ind = 0;
-  }
-
-  mlib_free(pbuff);
-  if (buffs != buffs_local) mlib_free(buffs);
-
-  return MLIB_SUCCESS;
-}
-
-/***************************************************************/
--- a/jdk/src/java.desktop/unix/native/libmlib_image/mlib_v_ImageConv_8nw.c	Thu May 12 11:03:07 2016 -0700
+++ b/jdk/src/java.desktop/unix/native/libmlib_image/mlib_v_ImageConv_8nw.c	Fri May 13 11:31:05 2016 +0300
@@ -34,58 +34,16 @@
 #include "vis_proto.h"
 #include "mlib_image.h"
 #include "mlib_ImageCheck.h"
-#include "mlib_ImageColormap.h"
 #include "mlib_ImageCopy.h"
 #include "mlib_ImageConv.h"
 #include "mlib_v_ImageConv.h"
 
-/*
-  This defines switches between functions in
-  files: mlib_v_ImageConvIndex3_8_8nw.c,
-         mlib_v_ImageConvIndex4_8_8nw.c,
-         mlib_v_ImageConvIndex3_8_16nw.c,
-         mlib_v_ImageConvIndex4_8_16nw.c
-*/
-
-/*#define CONV_INDEX*/
-
 /***************************************************************/
 #define DTYPE mlib_u8
-#define LTYPE mlib_u8
 
 /***************************************************************/
-#ifdef CONV_INDEX
-
-#define CONV_FUNC(KERN)                                         \
-  mlib_conv##KERN##_Index3_8_8nw(mlib_image       *dst,         \
-                                 const mlib_image *src,         \
-                                 const mlib_s32   *kern,        \
-                                 mlib_s32         scale,        \
-                                 const void       *colormap)
-
-#else
-
-#define CONV_FUNC(KERN)                                         \
-  mlib_conv##KERN##_8nw_f(mlib_image       *dst,                \
-                          const mlib_image *src,                \
-                          const mlib_s32   *kern,               \
-                          mlib_s32         scale)
-
-#endif /* CONV_INDEX */
-
-#define ColorTrue2IndexLine mlib_ImageColorTrue2IndexLine_U8_U8_3
-
-/***************************************************************/
-#ifdef CONV_INDEX
-
-#define NCHAN  3
-
-#else
-
 #define NCHAN  nchan
 
-#endif /* CONV_INDEX */
-
 /***************************************************************/
 #define DEF_VARS                                                \
   DTYPE    *sl, *sp, *dl;                                       \
@@ -103,104 +61,9 @@
   mlib_s32 i, j, l
 
 /***************************************************************/
-#ifdef CONV_INDEX
-
-#define DEF_EXTRA_VARS                                          \
-  mlib_s32 offset = mlib_ImageGetLutOffset(colormap);           \
-  LTYPE  **lut_table = (LTYPE**)mlib_ImageGetLutData(colormap); \
-  LTYPE  *ltbl0 = lut_table[0] - offset;                        \
-  LTYPE  *ltbl1 = lut_table[1] - offset;                        \
-  LTYPE  *ltbl2 = lut_table[2] - offset;                        \
-  LTYPE  *ltbl3 = (NCHAN > 3) ? lut_table[3] - offset : ltbl2
-
-#else
-
 #define DEF_EXTRA_VARS                                          \
   mlib_s32 nchan = mlib_ImageGetChannels(dst)
 
-#endif /* CONV_INDEX */
-
-/***************************************************************/
-#if NCHAN == 3
-
-#define LOAD_SRC() {                                            \
-    mlib_s32 s0 = sp[0], s1 = sp[1], s2 = sp[2], s3 = sp[3];    \
-    mlib_s32 s4 = sp[4], s5 = sp[5], s6 = sp[6], s7 = sp[7];    \
-    mlib_d64 t0, t1, t2;                                        \
-                                                                \
-    t2 = vis_faligndata(vis_ld_u8_i(ltbl2, s7), t2);            \
-    t2 = vis_faligndata(vis_ld_u8_i(ltbl1, s7), t2);            \
-    t2 = vis_faligndata(vis_ld_u8_i(ltbl0, s7), t2);            \
-    t2 = vis_faligndata(vis_ld_u8_i(ltbl2, s6), t2);            \
-    t2 = vis_faligndata(vis_ld_u8_i(ltbl1, s6), t2);            \
-    t2 = vis_faligndata(vis_ld_u8_i(ltbl0, s6), t2);            \
-    t2 = vis_faligndata(vis_ld_u8_i(ltbl2, s5), t2);            \
-    t2 = vis_faligndata(vis_ld_u8_i(ltbl1, s5), t2);            \
-    t1 = vis_faligndata(vis_ld_u8_i(ltbl0, s5), t1);            \
-    t1 = vis_faligndata(vis_ld_u8_i(ltbl2, s4), t1);            \
-    t1 = vis_faligndata(vis_ld_u8_i(ltbl1, s4), t1);            \
-    t1 = vis_faligndata(vis_ld_u8_i(ltbl0, s4), t1);            \
-    t1 = vis_faligndata(vis_ld_u8_i(ltbl2, s3), t1);            \
-    t1 = vis_faligndata(vis_ld_u8_i(ltbl1, s3), t1);            \
-    t1 = vis_faligndata(vis_ld_u8_i(ltbl0, s3), t1);            \
-    t1 = vis_faligndata(vis_ld_u8_i(ltbl2, s2), t1);            \
-    t0 = vis_faligndata(vis_ld_u8_i(ltbl1, s2), t0);            \
-    t0 = vis_faligndata(vis_ld_u8_i(ltbl0, s2), t0);            \
-    t0 = vis_faligndata(vis_ld_u8_i(ltbl2, s1), t0);            \
-    t0 = vis_faligndata(vis_ld_u8_i(ltbl1, s1), t0);            \
-    t0 = vis_faligndata(vis_ld_u8_i(ltbl0, s1), t0);            \
-    t0 = vis_faligndata(vis_ld_u8_i(ltbl2, s0), t0);            \
-    t0 = vis_faligndata(vis_ld_u8_i(ltbl1, s0), t0);            \
-    t0 = vis_faligndata(vis_ld_u8_i(ltbl0, s0), t0);            \
-                                                                \
-    buffn[i] = t0;                                              \
-    buffn[i + 1] = t1;                                          \
-    buffn[i + 2] = t2;                                          \
-                                                                \
-    sp += 8;                                                    \
-  }
-
-#else
-
-#define LOAD_SRC() {                                            \
-    mlib_s32 s0 = sp[0], s1 = sp[1], s2 = sp[2], s3 = sp[3];    \
-    mlib_s32 s4 = sp[4], s5 = sp[5], s6 = sp[6], s7 = sp[7];    \
-    mlib_d64 t0, t1, t2;                                        \
-                                                                \
-    t2 = vis_faligndata(vis_ld_u8_i(ltbl3, s5), t2);            \
-    t2 = vis_faligndata(vis_ld_u8_i(ltbl2, s5), t2);            \
-    t2 = vis_faligndata(vis_ld_u8_i(ltbl1, s5), t2);            \
-    t2 = vis_faligndata(vis_ld_u8_i(ltbl0, s5), t2);            \
-    t2 = vis_faligndata(vis_ld_u8_i(ltbl3, s4), t2);            \
-    t2 = vis_faligndata(vis_ld_u8_i(ltbl2, s4), t2);            \
-    t2 = vis_faligndata(vis_ld_u8_i(ltbl1, s4), t2);            \
-    t2 = vis_faligndata(vis_ld_u8_i(ltbl0, s4), t2);            \
-    t1 = vis_faligndata(vis_ld_u8_i(ltbl3, s3), t1);            \
-    t1 = vis_faligndata(vis_ld_u8_i(ltbl2, s3), t1);            \
-    t1 = vis_faligndata(vis_ld_u8_i(ltbl1, s3), t1);            \
-    t1 = vis_faligndata(vis_ld_u8_i(ltbl0, s3), t1);            \
-    t1 = vis_faligndata(vis_ld_u8_i(ltbl3, s2), t1);            \
-    t1 = vis_faligndata(vis_ld_u8_i(ltbl2, s2), t1);            \
-    t1 = vis_faligndata(vis_ld_u8_i(ltbl1, s2), t1);            \
-    t1 = vis_faligndata(vis_ld_u8_i(ltbl0, s2), t1);            \
-    t0 = vis_faligndata(vis_ld_u8_i(ltbl3, s1), t0);            \
-    t0 = vis_faligndata(vis_ld_u8_i(ltbl2, s1), t0);            \
-    t0 = vis_faligndata(vis_ld_u8_i(ltbl1, s1), t0);            \
-    t0 = vis_faligndata(vis_ld_u8_i(ltbl0, s1), t0);            \
-    t0 = vis_faligndata(vis_ld_u8_i(ltbl3, s0), t0);            \
-    t0 = vis_faligndata(vis_ld_u8_i(ltbl2, s0), t0);            \
-    t0 = vis_faligndata(vis_ld_u8_i(ltbl1, s0), t0);            \
-    t0 = vis_faligndata(vis_ld_u8_i(ltbl0, s0), t0);            \
-                                                                \
-    buffn[i] = t0;                                              \
-    buffn[i + 1] = t1;                                          \
-    buffn[i + 2] = t2;                                          \
-                                                                \
-    sp += 6;                                                    \
-  }
-
-#endif /* NCHAN == 3 */
-
 /***************************************************************/
 static const mlib_s32 mlib_round_8[16] = {
   0x00400040, 0x00200020, 0x00100010, 0x00080008,
@@ -210,651 +73,8 @@
 };
 
 /***************************************************************/
-#define KSIZE  2
-
-mlib_status CONV_FUNC(2x2)
-{
-  mlib_d64 *buffs[2 * (KSIZE + 1)];
-  mlib_d64 *buff0, *buff1, *buffn, *buffd, *buffe;
-  mlib_d64 s00, s01, s10, s11, s0, s1;
-  mlib_d64 d0, d1, d00, d01, d10, d11;
-  DEF_VARS;
-  DEF_EXTRA_VARS;
-
-  sl = adr_src;
-  dl = adr_dst;
-
-  ssize = NCHAN * wid;
-  dsize = (ssize + 7) / 8;
-  esize = dsize + 4;
-  pbuff = mlib_malloc((KSIZE + 4) * esize * sizeof(mlib_d64));
-
-  if (pbuff == NULL)
-    return MLIB_FAILURE;
-
-  for (i = 0; i < (KSIZE + 1); i++)
-    buffs[i] = pbuff + i * esize;
-  for (i = 0; i < (KSIZE + 1); i++)
-    buffs[(KSIZE + 1) + i] = buffs[i];
-  buffd = buffs[KSIZE] + esize;
-  buffe = buffd + 2 * esize;
-
-  wid -= (KSIZE - 1);
-  hgt -= (KSIZE - 1);
-  xsize = ssize - NCHAN * (KSIZE - 1);
-  emask = (0xFF00 >> (xsize & 7)) & 0xFF;
-
-  vis_write_gsr(gsr_scale + 7);
-
-  for (l = 0; l < KSIZE; l++) {
-    mlib_d64 *buffn = buffs[l];
-    sp = sl + l * sll;
-
-#ifndef CONV_INDEX
-
-    if ((mlib_addr) sp & 7)
-      mlib_ImageCopy_na((void *)sp, (void *)buffn, ssize);
-
-#else
-#pragma pipeloop(0)
-    for (i = 0; i < dsize; i += 3) {
-      LOAD_SRC();
-    }
-
-#endif /* CONV_INDEX */
-  }
-
-  for (j = 0; j < hgt; j++) {
-    mlib_d64 **buffc = buffs + buff_ind;
-    mlib_f32 *pk = karr, k0, k1;
-    sp = sl + KSIZE * sll;
-
-    buff0 = buffc[0];
-    buff1 = buffc[1];
-    buffn = buffc[KSIZE];
-
-#ifndef CONV_INDEX
-
-    if ((((mlib_addr) (sl)) & 7) == 0)
-      buff0 = (mlib_d64 *) sl;
-
-    if ((((mlib_addr) (sl + sll)) & 7) == 0)
-      buff1 = (mlib_d64 *) (sl + sll);
-
-    if ((mlib_addr) sp & 7)
-      mlib_ImageCopy_na((void *)sp, (void *)buffn, ssize);
-#endif /* CONV_INDEX */
-
-    k0 = pk[1];
-    k1 = pk[3];
-    vis_write_gsr(gsr_scale + NCHAN);
-
-    s01 = buff0[0];
-    s11 = buff1[0];
-#pragma pipeloop(0)
-    for (i = 0; i < (xsize + 7) / 8; i++) {
-      s00 = s01;
-      s10 = s11;
-      s01 = buff0[i + 1];
-      s11 = buff1[i + 1];
-      s0 = vis_faligndata(s00, s01);
-      s1 = vis_faligndata(s10, s11);
-
-      d00 = vis_fmul8x16au(vis_read_hi(s0), k0);
-      d01 = vis_fmul8x16au(vis_read_lo(s0), k0);
-      d10 = vis_fmul8x16au(vis_read_hi(s1), k1);
-      d11 = vis_fmul8x16au(vis_read_lo(s1), k1);
-
-      d0 = vis_fpadd16(d00, d10);
-      d1 = vis_fpadd16(d01, d11);
-      buffd[2 * i] = d0;
-      buffd[2 * i + 1] = d1;
-    }
-
-    k0 = pk[0];
-    k1 = pk[2];
-#ifndef CONV_INDEX
-    dp = ((mlib_addr) dl & 7) ? buffe : (mlib_d64 *) dl;
-
-#pragma pipeloop(0)
-    for (i = 0; i < xsize / 8; i++) {
-      s0 = buff0[i];
-      s1 = buff1[i];
-
-      d00 = vis_fmul8x16au(vis_read_hi(s0), k0);
-      d01 = vis_fmul8x16au(vis_read_lo(s0), k0);
-      d10 = vis_fmul8x16au(vis_read_hi(s1), k1);
-      d11 = vis_fmul8x16au(vis_read_lo(s1), k1);
-
-      d0 = buffd[2 * i];
-      d1 = buffd[2 * i + 1];
-      d00 = vis_fpadd16(d00, d10);
-      d0 = vis_fpadd16(d0, drnd);
-      d0 = vis_fpadd16(d0, d00);
-      d01 = vis_fpadd16(d01, d11);
-      d1 = vis_fpadd16(d1, drnd);
-      d1 = vis_fpadd16(d1, d01);
-      dp[i] = vis_fpack16_pair(d0, d1);
-    }
-
-    if (emask) {
-      s0 = buff0[i];
-      s1 = buff1[i];
-
-      d00 = vis_fmul8x16au(vis_read_hi(s0), k0);
-      d01 = vis_fmul8x16au(vis_read_lo(s0), k0);
-      d10 = vis_fmul8x16au(vis_read_hi(s1), k1);
-      d11 = vis_fmul8x16au(vis_read_lo(s1), k1);
-
-      d0 = buffd[2 * i];
-      d1 = buffd[2 * i + 1];
-      d00 = vis_fpadd16(d00, d10);
-      d0 = vis_fpadd16(d0, drnd);
-      d0 = vis_fpadd16(d0, d00);
-      d01 = vis_fpadd16(d01, d11);
-      d1 = vis_fpadd16(d1, drnd);
-      d1 = vis_fpadd16(d1, d01);
-
-      d0 = vis_fpack16_pair(d0, d1);
-      vis_pst_8(d0, dp + i, emask);
-    }
-
-    if ((mlib_u8 *) dp != dl)
-      mlib_ImageCopy_na((void *)buffe, dl, xsize);
-
-#else
-    vis_write_gsr(gsr_scale + 7);
-
-#pragma pipeloop(0)
-    for (i = 0; i < dsize; i += 3) {
-      mlib_d64 d00, d01, d02, d03, d04, d05;
-      mlib_d64 d10, d11, d12, d13, d14, d15;
-      mlib_d64 d0, d1, d2, d3, d4, d5;
-      mlib_d64 s00 = buff0[i];
-      mlib_d64 s01 = buff0[i + 1];
-      mlib_d64 s02 = buff0[i + 2];
-      mlib_d64 s10 = buff1[i];
-      mlib_d64 s11 = buff1[i + 1];
-      mlib_d64 s12 = buff1[i + 2];
-
-      d00 = vis_fmul8x16au(vis_read_hi(s00), k0);
-      d01 = vis_fmul8x16au(vis_read_lo(s00), k0);
-      d02 = vis_fmul8x16au(vis_read_hi(s01), k0);
-      d03 = vis_fmul8x16au(vis_read_lo(s01), k0);
-      d04 = vis_fmul8x16au(vis_read_hi(s02), k0);
-      d05 = vis_fmul8x16au(vis_read_lo(s02), k0);
-      d10 = vis_fmul8x16au(vis_read_hi(s10), k1);
-      d11 = vis_fmul8x16au(vis_read_lo(s10), k1);
-      d12 = vis_fmul8x16au(vis_read_hi(s11), k1);
-      d13 = vis_fmul8x16au(vis_read_lo(s11), k1);
-      d14 = vis_fmul8x16au(vis_read_hi(s12), k1);
-      d15 = vis_fmul8x16au(vis_read_lo(s12), k1);
-
-      d0 = buffd[2 * i];
-      d1 = buffd[2 * i + 1];
-      d2 = buffd[2 * i + 2];
-      d3 = buffd[2 * i + 3];
-      d4 = buffd[2 * i + 4];
-      d5 = buffd[2 * i + 5];
-      d00 = vis_fpadd16(d00, d10);
-      d0 = vis_fpadd16(d0, drnd);
-      d0 = vis_fpadd16(d0, d00);
-      d01 = vis_fpadd16(d01, d11);
-      d1 = vis_fpadd16(d1, drnd);
-      d1 = vis_fpadd16(d1, d01);
-      d02 = vis_fpadd16(d02, d12);
-      d2 = vis_fpadd16(d2, drnd);
-      d2 = vis_fpadd16(d2, d02);
-      d03 = vis_fpadd16(d03, d13);
-      d3 = vis_fpadd16(d3, drnd);
-      d3 = vis_fpadd16(d3, d03);
-      d04 = vis_fpadd16(d04, d14);
-      d4 = vis_fpadd16(d4, drnd);
-      d4 = vis_fpadd16(d4, d04);
-      d05 = vis_fpadd16(d05, d15);
-      d5 = vis_fpadd16(d5, drnd);
-      d5 = vis_fpadd16(d5, d05);
-
-      buffe[i] = vis_fpack16_pair(d0, d1);
-      buffe[i + 1] = vis_fpack16_pair(d2, d3);
-      buffe[i + 2] = vis_fpack16_pair(d4, d5);
-
-      LOAD_SRC();
-    }
-
-    ColorTrue2IndexLine((void *)buffe, dl, wid, colormap);
-#endif /* CONV_INDEX */
-
-    sl += sll;
-    dl += dll;
-
-    buff_ind++;
-
-    if (buff_ind >= (KSIZE + 1))
-      buff_ind = 0;
-  }
-
-  mlib_free(pbuff);
-
-  return MLIB_SUCCESS;
-}
-
-/***************************************************************/
-#undef  KSIZE
-#define KSIZE  3
-
-mlib_status CONV_FUNC(3x3)
-{
-  mlib_d64 *buffs[2 * (KSIZE + 1)];
-  mlib_d64 *buff0, *buff1, *buff2, *buffn, *buffd, *buffe;
-  mlib_d64 s00, s01, s10, s11, s20, s21, s0, s1, s2;
-  mlib_d64 dd, d0, d1, d00, d01, d10, d11, d20, d21;
-  mlib_s32 ik, ik_last, off, doff;
-  DEF_VARS;
-  DEF_EXTRA_VARS;
-
-  sl = adr_src;
-#ifdef CONV_INDEX
-  dl = adr_dst + ((KSIZE - 1) / 2) * (dll + 1);
-#else
-  dl = adr_dst + ((KSIZE - 1) / 2) * (dll + NCHAN);
-#endif /* CONV_INDEX */
-
-  ssize = NCHAN * wid;
-  dsize = (ssize + 7) / 8;
-  esize = dsize + 4;
-  pbuff = mlib_malloc((KSIZE + 4) * esize * sizeof(mlib_d64));
-
-  if (pbuff == NULL)
-    return MLIB_FAILURE;
-
-  for (i = 0; i < (KSIZE + 1); i++)
-    buffs[i] = pbuff + i * esize;
-  for (i = 0; i < (KSIZE + 1); i++)
-    buffs[(KSIZE + 1) + i] = buffs[i];
-  buffd = buffs[KSIZE] + esize;
-  buffe = buffd + 2 * esize;
-
-  wid -= (KSIZE - 1);
-  hgt -= (KSIZE - 1);
-  xsize = ssize - NCHAN * (KSIZE - 1);
-  emask = (0xFF00 >> (xsize & 7)) & 0xFF;
-
-  vis_write_gsr(gsr_scale + 7);
-
-  for (l = 0; l < KSIZE; l++) {
-    mlib_d64 *buffn = buffs[l];
-    sp = sl + l * sll;
-
-#ifndef CONV_INDEX
-
-    if ((mlib_addr) sp & 7)
-      mlib_ImageCopy_na((void *)sp, (void *)buffn, ssize);
-#else
-#pragma pipeloop(0)
-    for (i = 0; i < dsize; i += 3) {
-      LOAD_SRC();
-    }
-
-#endif /* CONV_INDEX */
-  }
-
-  /* init buffer */
-#pragma pipeloop(0)
-  for (i = 0; i < (xsize + 7) / 8; i++) {
-    buffd[2 * i] = drnd;
-    buffd[2 * i + 1] = drnd;
-  }
-
-  for (j = 0; j < hgt; j++) {
-    mlib_d64 **buffc = buffs + buff_ind, *pbuff0, *pbuff1, *pbuff2;
-    mlib_f32 *pk = karr, k0, k1, k2;
-    sp = sl + KSIZE * sll;
-
-    pbuff0 = buffc[0];
-    pbuff1 = buffc[1];
-    pbuff2 = buffc[2];
-    buffn = buffc[KSIZE];
-
-#ifndef CONV_INDEX
-
-    if ((((mlib_addr) (sl)) & 7) == 0)
-      pbuff0 = (mlib_d64 *) sl;
-
-    if ((((mlib_addr) (sl + sll)) & 7) == 0)
-      pbuff1 = (mlib_d64 *) (sl + sll);
-
-    if ((((mlib_addr) (sl + 2 * sll)) & 7) == 0)
-      pbuff2 = (mlib_d64 *) (sl + 2 * sll);
-
-    if ((mlib_addr) sp & 7)
-      mlib_ImageCopy_na((void *)sp, (void *)buffn, ssize);
-#endif /* CONV_INDEX */
-
-#ifdef CONV_INDEX
-    ik_last = 0;
-#else
-    ik_last = (KSIZE - 1);
-#endif /* CONV_INDEX */
-
-    for (ik = 0; ik < KSIZE; ik++) {
-      k0 = pk[ik];
-      k1 = pk[ik + KSIZE];
-      k2 = pk[ik + 2 * KSIZE];
-
-      off = ik * NCHAN;
-      doff = off / 8;
-      off &= 7;
-      buff0 = pbuff0 + doff;
-      buff1 = pbuff1 + doff;
-      buff2 = pbuff2 + doff;
-      vis_write_gsr(gsr_scale + off);
-
-      if (ik == ik_last)
-        continue;
-      /*if (!ik_last) {
-       * if ((off & 3) || (ik == (KSIZE - 1))) {
-       * ik_last = ik;
-       * continue;
-       * }
-       * } */
-
-      if (off == 0) {
-#pragma pipeloop(0)
-        for (i = 0; i < (xsize + 7) / 8; i++) {
-          s0 = buff0[i];
-          s1 = buff1[i];
-          s2 = buff2[i];
-
-          d00 = vis_fmul8x16au(vis_read_hi(s0), k0);
-          d01 = vis_fmul8x16au(vis_read_lo(s0), k0);
-          d10 = vis_fmul8x16au(vis_read_hi(s1), k1);
-          d11 = vis_fmul8x16au(vis_read_lo(s1), k1);
-          d20 = vis_fmul8x16au(vis_read_hi(s2), k2);
-          d21 = vis_fmul8x16au(vis_read_lo(s2), k2);
-
-          d0 = buffd[2 * i];
-          d1 = buffd[2 * i + 1];
-          d0 = vis_fpadd16(d00, d0);
-          d0 = vis_fpadd16(d10, d0);
-          d0 = vis_fpadd16(d20, d0);
-          d1 = vis_fpadd16(d01, d1);
-          d1 = vis_fpadd16(d11, d1);
-          d1 = vis_fpadd16(d21, d1);
-          buffd[2 * i] = d0;
-          buffd[2 * i + 1] = d1;
-        }
-      }
-      else if (off == 4) {
-        s01 = buff0[0];
-        s11 = buff1[0];
-        s21 = buff2[0];
-#pragma pipeloop(0)
-        for (i = 0; i < (xsize + 7) / 8; i++) {
-          s00 = s01;
-          s10 = s11;
-          s20 = s21;
-          s01 = buff0[i + 1];
-          s11 = buff1[i + 1];
-          s21 = buff2[i + 1];
-
-          d00 = vis_fmul8x16au(vis_read_lo(s00), k0);
-          d01 = vis_fmul8x16au(vis_read_hi(s01), k0);
-          d10 = vis_fmul8x16au(vis_read_lo(s10), k1);
-          d11 = vis_fmul8x16au(vis_read_hi(s11), k1);
-          d20 = vis_fmul8x16au(vis_read_lo(s20), k2);
-          d21 = vis_fmul8x16au(vis_read_hi(s21), k2);
-
-          d0 = buffd[2 * i];
-          d1 = buffd[2 * i + 1];
-          d0 = vis_fpadd16(d00, d0);
-          d0 = vis_fpadd16(d10, d0);
-          d0 = vis_fpadd16(d20, d0);
-          d1 = vis_fpadd16(d01, d1);
-          d1 = vis_fpadd16(d11, d1);
-          d1 = vis_fpadd16(d21, d1);
-          buffd[2 * i] = d0;
-          buffd[2 * i + 1] = d1;
-        }
-      }
-      else {
-        s01 = buff0[0];
-        s11 = buff1[0];
-        s21 = buff2[0];
-#pragma pipeloop(0)
-        for (i = 0; i < (xsize + 7) / 8; i++) {
-          s00 = s01;
-          s10 = s11;
-          s20 = s21;
-          s01 = buff0[i + 1];
-          s11 = buff1[i + 1];
-          s21 = buff2[i + 1];
-          s0 = vis_faligndata(s00, s01);
-          s1 = vis_faligndata(s10, s11);
-          s2 = vis_faligndata(s20, s21);
-
-          d00 = vis_fmul8x16au(vis_read_hi(s0), k0);
-          d01 = vis_fmul8x16au(vis_read_lo(s0), k0);
-          d10 = vis_fmul8x16au(vis_read_hi(s1), k1);
-          d11 = vis_fmul8x16au(vis_read_lo(s1), k1);
-          d20 = vis_fmul8x16au(vis_read_hi(s2), k2);
-          d21 = vis_fmul8x16au(vis_read_lo(s2), k2);
-
-          d0 = buffd[2 * i];
-          d1 = buffd[2 * i + 1];
-          d0 = vis_fpadd16(d00, d0);
-          d0 = vis_fpadd16(d10, d0);
-          d0 = vis_fpadd16(d20, d0);
-          d1 = vis_fpadd16(d01, d1);
-          d1 = vis_fpadd16(d11, d1);
-          d1 = vis_fpadd16(d21, d1);
-          buffd[2 * i] = d0;
-          buffd[2 * i + 1] = d1;
-        }
-      }
-    }
-
-    k0 = pk[ik_last];
-    k1 = pk[ik_last + KSIZE];
-    k2 = pk[ik_last + 2 * KSIZE];
-
-    off = ik_last * NCHAN;
-    doff = off / 8;
-    off &= 7;
-    buff0 = pbuff0 + doff;
-    buff1 = pbuff1 + doff;
-    buff2 = pbuff2 + doff;
-    vis_write_gsr(gsr_scale + off);
-
-#ifndef CONV_INDEX
-    dp = ((mlib_addr) dl & 7) ? buffe : (mlib_d64 *) dl;
-
-    s01 = buff0[0];
-    s11 = buff1[0];
-    s21 = buff2[0];
-#pragma pipeloop(0)
-    for (i = 0; i < xsize / 8; i++) {
-      s00 = s01;
-      s10 = s11;
-      s20 = s21;
-      s01 = buff0[i + 1];
-      s11 = buff1[i + 1];
-      s21 = buff2[i + 1];
-      s0 = vis_faligndata(s00, s01);
-      s1 = vis_faligndata(s10, s11);
-      s2 = vis_faligndata(s20, s21);
-
-      d00 = vis_fmul8x16au(vis_read_hi(s0), k0);
-      d01 = vis_fmul8x16au(vis_read_lo(s0), k0);
-      d10 = vis_fmul8x16au(vis_read_hi(s1), k1);
-      d11 = vis_fmul8x16au(vis_read_lo(s1), k1);
-      d20 = vis_fmul8x16au(vis_read_hi(s2), k2);
-      d21 = vis_fmul8x16au(vis_read_lo(s2), k2);
-
-      d0 = buffd[2 * i];
-      d1 = buffd[2 * i + 1];
-      d0 = vis_fpadd16(d0, d00);
-      d0 = vis_fpadd16(d0, d10);
-      d0 = vis_fpadd16(d0, d20);
-      d1 = vis_fpadd16(d1, d01);
-      d1 = vis_fpadd16(d1, d11);
-      d1 = vis_fpadd16(d1, d21);
-
-      dd = vis_fpack16_pair(d0, d1);
-      dp[i] = dd;
-
-      buffd[2 * i] = drnd;
-      buffd[2 * i + 1] = drnd;
-    }
-
-    if (emask) {
-      s00 = s01;
-      s10 = s11;
-      s20 = s21;
-      s01 = buff0[i + 1];
-      s11 = buff1[i + 1];
-      s21 = buff2[i + 1];
-      s0 = vis_faligndata(s00, s01);
-      s1 = vis_faligndata(s10, s11);
-      s2 = vis_faligndata(s20, s21);
-
-      d00 = vis_fmul8x16au(vis_read_hi(s0), k0);
-      d01 = vis_fmul8x16au(vis_read_lo(s0), k0);
-      d10 = vis_fmul8x16au(vis_read_hi(s1), k1);
-      d11 = vis_fmul8x16au(vis_read_lo(s1), k1);
-      d20 = vis_fmul8x16au(vis_read_hi(s2), k2);
-      d21 = vis_fmul8x16au(vis_read_lo(s2), k2);
-
-      d0 = buffd[2 * i];
-      d1 = buffd[2 * i + 1];
-      d0 = vis_fpadd16(d0, d00);
-      d0 = vis_fpadd16(d0, d10);
-      d0 = vis_fpadd16(d0, d20);
-      d1 = vis_fpadd16(d1, d01);
-      d1 = vis_fpadd16(d1, d11);
-      d1 = vis_fpadd16(d1, d21);
-
-      dd = vis_fpack16_pair(d0, d1);
-      vis_pst_8(dd, dp + i, emask);
-
-      buffd[2 * i] = drnd;
-      buffd[2 * i + 1] = drnd;
-    }
-
-    if ((mlib_u8 *) dp != dl)
-      mlib_ImageCopy_na((void *)buffe, dl, xsize);
-
-#else
-    vis_write_gsr(gsr_scale + 7);
-
-#pragma pipeloop(0)
-    for (i = 0; i < dsize; i += 3) {
-      mlib_d64 d00, d01, d02, d03, d04, d05;
-      mlib_d64 d10, d11, d12, d13, d14, d15;
-      mlib_d64 d20, d21, d22, d23, d24, d25;
-      mlib_d64 d0, d1, d2, d3, d4, d5;
-      mlib_d64 s00 = buff0[i];
-      mlib_d64 s01 = buff0[i + 1];
-      mlib_d64 s02 = buff0[i + 2];
-      mlib_d64 s10 = buff1[i];
-      mlib_d64 s11 = buff1[i + 1];
-      mlib_d64 s12 = buff1[i + 2];
-      mlib_d64 s20 = buff2[i];
-      mlib_d64 s21 = buff2[i + 1];
-      mlib_d64 s22 = buff2[i + 2];
-
-      d00 = vis_fmul8x16au(vis_read_hi(s00), k0);
-      d01 = vis_fmul8x16au(vis_read_lo(s00), k0);
-      d02 = vis_fmul8x16au(vis_read_hi(s01), k0);
-      d03 = vis_fmul8x16au(vis_read_lo(s01), k0);
-      d04 = vis_fmul8x16au(vis_read_hi(s02), k0);
-      d05 = vis_fmul8x16au(vis_read_lo(s02), k0);
-      d10 = vis_fmul8x16au(vis_read_hi(s10), k1);
-      d11 = vis_fmul8x16au(vis_read_lo(s10), k1);
-      d12 = vis_fmul8x16au(vis_read_hi(s11), k1);
-      d13 = vis_fmul8x16au(vis_read_lo(s11), k1);
-      d14 = vis_fmul8x16au(vis_read_hi(s12), k1);
-      d15 = vis_fmul8x16au(vis_read_lo(s12), k1);
-      d20 = vis_fmul8x16au(vis_read_hi(s20), k2);
-      d21 = vis_fmul8x16au(vis_read_lo(s20), k2);
-      d22 = vis_fmul8x16au(vis_read_hi(s21), k2);
-      d23 = vis_fmul8x16au(vis_read_lo(s21), k2);
-      d24 = vis_fmul8x16au(vis_read_hi(s22), k2);
-      d25 = vis_fmul8x16au(vis_read_lo(s22), k2);
-
-      d0 = buffd[2 * i];
-      d1 = buffd[2 * i + 1];
-      d2 = buffd[2 * i + 2];
-      d3 = buffd[2 * i + 3];
-      d4 = buffd[2 * i + 4];
-      d5 = buffd[2 * i + 5];
-      d0 = vis_fpadd16(d0, d00);
-      d0 = vis_fpadd16(d0, d10);
-      d0 = vis_fpadd16(d0, d20);
-      d1 = vis_fpadd16(d1, d01);
-      d1 = vis_fpadd16(d1, d11);
-      d1 = vis_fpadd16(d1, d21);
-      d2 = vis_fpadd16(d2, d02);
-      d2 = vis_fpadd16(d2, d12);
-      d2 = vis_fpadd16(d2, d22);
-      d3 = vis_fpadd16(d3, d03);
-      d3 = vis_fpadd16(d3, d13);
-      d3 = vis_fpadd16(d3, d23);
-      d4 = vis_fpadd16(d4, d04);
-      d4 = vis_fpadd16(d4, d14);
-      d4 = vis_fpadd16(d4, d24);
-      d5 = vis_fpadd16(d5, d05);
-      d5 = vis_fpadd16(d5, d15);
-      d5 = vis_fpadd16(d5, d25);
-
-      buffe[i] = vis_fpack16_pair(d0, d1);
-      buffe[i + 1] = vis_fpack16_pair(d2, d3);
-      buffe[i + 2] = vis_fpack16_pair(d4, d5);
-
-      buffd[2 * i] = drnd;
-      buffd[2 * i + 1] = drnd;
-      buffd[2 * i + 2] = drnd;
-      buffd[2 * i + 3] = drnd;
-      buffd[2 * i + 4] = drnd;
-      buffd[2 * i + 5] = drnd;
-
-      LOAD_SRC();
-    }
-
-    ColorTrue2IndexLine((void *)buffe, dl, wid, colormap);
-#endif /* CONV_INDEX */
-
-    sl += sll;
-    dl += dll;
-
-    buff_ind++;
-
-    if (buff_ind >= (KSIZE + 1))
-      buff_ind = 0;
-  }
-
-  mlib_free(pbuff);
-
-  return MLIB_SUCCESS;
-}
-
-/***************************************************************/
-#undef  KSIZE
 #define MAX_N   11
 
-#ifdef CONV_INDEX
-
-mlib_status mlib_convMxN_Index3_8_8nw(mlib_image       *dst,
-                                      const mlib_image *src,
-                                      mlib_s32         m,
-                                      mlib_s32         n,
-                                      mlib_s32         dm,
-                                      mlib_s32         dn,
-                                      const mlib_s32   *kern,
-                                      mlib_s32         scale,
-                                      const void       *colormap)
-#else
-
 mlib_status mlib_convMxN_8nw_f(mlib_image       *dst,
                                const mlib_image *src,
                                mlib_s32         m,
@@ -863,7 +83,6 @@
                                mlib_s32         dn,
                                const mlib_s32   *kern,
                                mlib_s32         scale)
-#endif /* CONV_INDEX */
 {
   mlib_d64 *buffs_local[3 * (MAX_N + 1)], **buffs = buffs_local, **buff;
   mlib_d64 *buff0, *buff1, *buff2, *buff3, *buffn, *buffd, *buffe;
@@ -884,11 +103,7 @@
   buff = buffs + 2 * (n + 1);
 
   sl = adr_src;
-#ifdef CONV_INDEX
-  dl = adr_dst + dn * dll + dm;
-#else
   dl = adr_dst + dn * dll + dm * NCHAN;
-#endif /* CONV_INDEX */
 
   ssize = NCHAN * wid;
   dsize = (ssize + 7) / 8;
@@ -919,17 +134,8 @@
     mlib_d64 *buffn = buffs[l];
     sp = sl + l * sll;
 
-#ifndef CONV_INDEX
-
     if ((mlib_addr) sp & 7)
       mlib_ImageCopy_na((void *)sp, (void *)buffn, ssize);
-#else
-#pragma pipeloop(0)
-    for (i = 0; i < dsize; i += 3) {
-      LOAD_SRC();
-    }
-
-#endif /* CONV_INDEX */
   }
 
   /* init buffer */
@@ -950,7 +156,6 @@
 
     buffn = buffc[n];
 
-#ifndef CONV_INDEX
     for (l = 0; l < n; l++) {
       if ((((mlib_addr) (sl + l * sll)) & 7) == 0)
         buff[l] = (mlib_d64 *) (sl + l * sll);
@@ -958,31 +163,18 @@
 
     if ((mlib_addr) sp & 7)
       mlib_ImageCopy_na((void *)sp, (void *)buffn, ssize);
-#endif /* CONV_INDEX */
 
-#ifdef CONV_INDEX
-    ik_last = 0;
-#else
     ik_last = (m - 1);
-#endif /* CONV_INDEX */
 
     for (jk = 0; jk < n; jk += jk_size) {
       jk_size = n - jk;
-#ifdef CONV_INDEX
-
-      if (jk_size >= 5)
-        jk_size = 3;
-
-      if (jk_size == 4)
-        jk_size = 2;
-#else
 
       if (jk_size >= 6)
         jk_size = 4;
 
       if (jk_size == 5)
         jk_size = 3;
-#endif /* CONV_INDEX */
+
       coff = 0;
 
       if (jk_size == 1) {
@@ -1335,21 +527,12 @@
      *****************************************/
 
     jk_size = n;
-#ifdef CONV_INDEX
-
-    if (jk_size >= 5)
-      jk_size = 3;
-
-    if (jk_size == 4)
-      jk_size = 2;
-#else
 
     if (jk_size >= 6)
       jk_size = 4;
 
     if (jk_size == 5)
       jk_size = 3;
-#endif /* CONV_INDEX */
 
     k0 = karr[ik_last];
     k1 = karr[ik_last + m];
@@ -1365,8 +548,6 @@
     buff3 = buff[3] + doff;
     vis_write_gsr(gsr_scale + off);
 
-#ifndef CONV_INDEX
-
     if (jk_size == 1) {
       dp = ((mlib_addr) dl & 7) ? buffe : (mlib_d64 *) dl;
 
@@ -1652,200 +833,6 @@
         mlib_ImageCopy_na((void *)buffe, dl, xsize);
     }
 
-#else /* CONV_INDEX */
-
-    if (jk_size == 1) {
-      vis_write_gsr(gsr_scale + 7);
-
-#pragma pipeloop(0)
-      for (i = 0; i < dsize; i += 3) {
-        mlib_d64 d00, d01, d02, d03, d04, d05;
-        mlib_d64 d10, d11, d12, d13, d14, d15;
-        mlib_d64 d0, d1, d2, d3, d4, d5;
-        mlib_d64 s00 = buff0[i];
-        mlib_d64 s01 = buff0[i + 1];
-        mlib_d64 s02 = buff0[i + 2];
-
-        d00 = vis_fmul8x16au(vis_read_hi(s00), k0);
-        d01 = vis_fmul8x16au(vis_read_lo(s00), k0);
-        d02 = vis_fmul8x16au(vis_read_hi(s01), k0);
-        d03 = vis_fmul8x16au(vis_read_lo(s01), k0);
-        d04 = vis_fmul8x16au(vis_read_hi(s02), k0);
-        d05 = vis_fmul8x16au(vis_read_lo(s02), k0);
-
-        d0 = buffd[2 * i];
-        d1 = buffd[2 * i + 1];
-        d2 = buffd[2 * i + 2];
-        d3 = buffd[2 * i + 3];
-        d4 = buffd[2 * i + 4];
-        d5 = buffd[2 * i + 5];
-        d0 = vis_fpadd16(d0, d00);
-        d1 = vis_fpadd16(d1, d01);
-        d2 = vis_fpadd16(d2, d02);
-        d3 = vis_fpadd16(d3, d03);
-        d4 = vis_fpadd16(d4, d04);
-        d5 = vis_fpadd16(d5, d05);
-
-        buffe[i] = vis_fpack16_pair(d0, d1);
-        buffe[i + 1] = vis_fpack16_pair(d2, d3);
-        buffe[i + 2] = vis_fpack16_pair(d4, d5);
-
-        buffd[2 * i] = drnd;
-        buffd[2 * i + 1] = drnd;
-        buffd[2 * i + 2] = drnd;
-        buffd[2 * i + 3] = drnd;
-        buffd[2 * i + 4] = drnd;
-        buffd[2 * i + 5] = drnd;
-
-        LOAD_SRC();
-      }
-    }
-    else if (jk_size == 2) {
-      vis_write_gsr(gsr_scale + 7);
-
-#pragma pipeloop(0)
-      for (i = 0; i < dsize; i += 3) {
-        mlib_d64 d00, d01, d02, d03, d04, d05;
-        mlib_d64 d10, d11, d12, d13, d14, d15;
-        mlib_d64 d0, d1, d2, d3, d4, d5;
-        mlib_d64 s00 = buff0[i];
-        mlib_d64 s01 = buff0[i + 1];
-        mlib_d64 s02 = buff0[i + 2];
-        mlib_d64 s10 = buff1[i];
-        mlib_d64 s11 = buff1[i + 1];
-        mlib_d64 s12 = buff1[i + 2];
-
-        d00 = vis_fmul8x16au(vis_read_hi(s00), k0);
-        d01 = vis_fmul8x16au(vis_read_lo(s00), k0);
-        d02 = vis_fmul8x16au(vis_read_hi(s01), k0);
-        d03 = vis_fmul8x16au(vis_read_lo(s01), k0);
-        d04 = vis_fmul8x16au(vis_read_hi(s02), k0);
-        d05 = vis_fmul8x16au(vis_read_lo(s02), k0);
-        d10 = vis_fmul8x16au(vis_read_hi(s10), k1);
-        d11 = vis_fmul8x16au(vis_read_lo(s10), k1);
-        d12 = vis_fmul8x16au(vis_read_hi(s11), k1);
-        d13 = vis_fmul8x16au(vis_read_lo(s11), k1);
-        d14 = vis_fmul8x16au(vis_read_hi(s12), k1);
-        d15 = vis_fmul8x16au(vis_read_lo(s12), k1);
-
-        d0 = buffd[2 * i];
-        d1 = buffd[2 * i + 1];
-        d2 = buffd[2 * i + 2];
-        d3 = buffd[2 * i + 3];
-        d4 = buffd[2 * i + 4];
-        d5 = buffd[2 * i + 5];
-        d0 = vis_fpadd16(d0, d00);
-        d0 = vis_fpadd16(d0, d10);
-        d1 = vis_fpadd16(d1, d01);
-        d1 = vis_fpadd16(d1, d11);
-        d2 = vis_fpadd16(d2, d02);
-        d2 = vis_fpadd16(d2, d12);
-        d3 = vis_fpadd16(d3, d03);
-        d3 = vis_fpadd16(d3, d13);
-        d4 = vis_fpadd16(d4, d04);
-        d4 = vis_fpadd16(d4, d14);
-        d5 = vis_fpadd16(d5, d05);
-        d5 = vis_fpadd16(d5, d15);
-
-        buffe[i] = vis_fpack16_pair(d0, d1);
-        buffe[i + 1] = vis_fpack16_pair(d2, d3);
-        buffe[i + 2] = vis_fpack16_pair(d4, d5);
-
-        buffd[2 * i] = drnd;
-        buffd[2 * i + 1] = drnd;
-        buffd[2 * i + 2] = drnd;
-        buffd[2 * i + 3] = drnd;
-        buffd[2 * i + 4] = drnd;
-        buffd[2 * i + 5] = drnd;
-
-        LOAD_SRC();
-      }
-    }
-    else {                                  /* if (jk_size == 3) */
-
-      vis_write_gsr(gsr_scale + 7);
-
-#pragma pipeloop(0)
-      for (i = 0; i < dsize; i += 3) {
-        mlib_d64 d00, d01, d02, d03, d04, d05;
-        mlib_d64 d10, d11, d12, d13, d14, d15;
-        mlib_d64 d20, d21, d22, d23, d24, d25;
-        mlib_d64 d0, d1, d2, d3, d4, d5;
-        mlib_d64 s00 = buff0[i];
-        mlib_d64 s01 = buff0[i + 1];
-        mlib_d64 s02 = buff0[i + 2];
-        mlib_d64 s10 = buff1[i];
-        mlib_d64 s11 = buff1[i + 1];
-        mlib_d64 s12 = buff1[i + 2];
-        mlib_d64 s20 = buff2[i];
-        mlib_d64 s21 = buff2[i + 1];
-        mlib_d64 s22 = buff2[i + 2];
-
-        d00 = vis_fmul8x16au(vis_read_hi(s00), k0);
-        d01 = vis_fmul8x16au(vis_read_lo(s00), k0);
-        d02 = vis_fmul8x16au(vis_read_hi(s01), k0);
-        d03 = vis_fmul8x16au(vis_read_lo(s01), k0);
-        d04 = vis_fmul8x16au(vis_read_hi(s02), k0);
-        d05 = vis_fmul8x16au(vis_read_lo(s02), k0);
-        d10 = vis_fmul8x16au(vis_read_hi(s10), k1);
-        d11 = vis_fmul8x16au(vis_read_lo(s10), k1);
-        d12 = vis_fmul8x16au(vis_read_hi(s11), k1);
-        d13 = vis_fmul8x16au(vis_read_lo(s11), k1);
-        d14 = vis_fmul8x16au(vis_read_hi(s12), k1);
-        d15 = vis_fmul8x16au(vis_read_lo(s12), k1);
-        d20 = vis_fmul8x16au(vis_read_hi(s20), k2);
-        d21 = vis_fmul8x16au(vis_read_lo(s20), k2);
-        d22 = vis_fmul8x16au(vis_read_hi(s21), k2);
-        d23 = vis_fmul8x16au(vis_read_lo(s21), k2);
-        d24 = vis_fmul8x16au(vis_read_hi(s22), k2);
-        d25 = vis_fmul8x16au(vis_read_lo(s22), k2);
-
-        d0 = buffd[2 * i];
-        d1 = buffd[2 * i + 1];
-        d2 = buffd[2 * i + 2];
-        d3 = buffd[2 * i + 3];
-        d4 = buffd[2 * i + 4];
-        d5 = buffd[2 * i + 5];
-        d0 = vis_fpadd16(d0, d00);
-        d0 = vis_fpadd16(d0, d10);
-        d0 = vis_fpadd16(d0, d20);
-        d1 = vis_fpadd16(d1, d01);
-        d1 = vis_fpadd16(d1, d11);
-        d1 = vis_fpadd16(d1, d21);
-        d2 = vis_fpadd16(d2, d02);
-        d2 = vis_fpadd16(d2, d12);
-        d2 = vis_fpadd16(d2, d22);
-        d3 = vis_fpadd16(d3, d03);
-        d3 = vis_fpadd16(d3, d13);
-        d3 = vis_fpadd16(d3, d23);
-        d4 = vis_fpadd16(d4, d04);
-        d4 = vis_fpadd16(d4, d14);
-        d4 = vis_fpadd16(d4, d24);
-        d5 = vis_fpadd16(d5, d05);
-        d5 = vis_fpadd16(d5, d15);
-        d5 = vis_fpadd16(d5, d25);
-
-        buffe[i] = vis_fpack16_pair(d0, d1);
-        buffe[i + 1] = vis_fpack16_pair(d2, d3);
-        buffe[i + 2] = vis_fpack16_pair(d4, d5);
-
-        buffd[2 * i] = drnd;
-        buffd[2 * i + 1] = drnd;
-        buffd[2 * i + 2] = drnd;
-        buffd[2 * i + 3] = drnd;
-        buffd[2 * i + 4] = drnd;
-        buffd[2 * i + 5] = drnd;
-
-        LOAD_SRC();
-      }
-    }
-
-#endif /* CONV_INDEX */
-
-#ifdef CONV_INDEX
-    ColorTrue2IndexLine((void *)buffe, dl, wid, colormap);
-#endif /* CONV_INDEX */
-
     sl += sll;
     dl += dll;