# HG changeset patch # User vadim # Date 1463128265 -10800 # Node ID acea5f7d354bfb3ec98f9666044b29250d60b062 # Parent 3e22d8fd49125d01c25b95d75d3ee3d4427f9262 8047931: Remove unused medialib code Reviewed-by: bae diff -r 3e22d8fd4912 -r acea5f7d354b jdk/make/lib/Awt2dLibraries.gmk --- a/jdk/make/lib/Awt2dLibraries.gmk Thu May 12 11:03:07 2016 -0700 +++ b/jdk/make/lib/Awt2dLibraries.gmk Fri May 13 11:31:05 2016 +0300 @@ -96,10 +96,7 @@ mlib_c_ImageAffine_BL.c \ mlib_c_ImageAffine_BL_S16.c \ mlib_c_ImageAffine_BL_U16.c \ - mlib_c_ImageAffineIndex_BC.c \ - mlib_c_ImageAffineIndex_BL.c \ mlib_c_ImageAffine_NN.c \ - mlib_c_ImageBlendTable.c \ mlib_c_ImageConvClearEdge.c \ mlib_c_ImageConvCopyEdge.c \ mlib_c_ImageConv_f.c \ @@ -107,14 +104,6 @@ mlib_c_ImageCopy.c \ mlib_c_ImageLookUp.c \ mlib_c_ImageLookUp_f.c \ - mlib_v_ImageChannelExtract.c \ - mlib_v_ImageChannelExtract_f.c \ - mlib_v_ImageChannelInsert_34.c \ - mlib_v_ImageChannelInsert.c \ - mlib_v_ImageConvIndex3_8_16nw.c \ - mlib_v_ImageConvIndex3_8_8nw.c \ - mlib_v_ImageCopy.c \ - mlib_v_ImageCopy_blk.s \ # LIBMLIB_IMAGE_V_CFLAGS += $(filter-out -DMLIB_NO_LIBSUNMATH, $(BUILD_LIBMLIB_CFLAGS)) diff -r 3e22d8fd4912 -r acea5f7d354b jdk/src/java.desktop/share/native/libmlib_image/mlib_ImageAffine.c --- a/jdk/src/java.desktop/share/native/libmlib_image/mlib_ImageAffine.c Thu May 12 11:03:07 2016 -0700 +++ b/jdk/src/java.desktop/share/native/libmlib_image/mlib_ImageAffine.c Fri May 13 11:31:05 2016 +0300 @@ -73,7 +73,6 @@ */ #include "mlib_ImageCheck.h" -#include "mlib_ImageColormap.h" #include "mlib_ImageAffine.h" @@ -125,18 +124,6 @@ }; /***************************************************************/ -const type_affine_i_fun mlib_AffineFunArr_bc_i[] = { - mlib_ImageAffineIndex_U8_U8_3CH_BC, - mlib_ImageAffineIndex_U8_U8_4CH_BC, - mlib_ImageAffineIndex_S16_U8_3CH_BC, - mlib_ImageAffineIndex_S16_U8_4CH_BC, - mlib_ImageAffineIndex_U8_S16_3CH_BC, - mlib_ImageAffineIndex_U8_S16_4CH_BC, - mlib_ImageAffineIndex_S16_S16_3CH_BC, - mlib_ImageAffineIndex_S16_S16_4CH_BC -}; - -/***************************************************************/ #ifdef i386 /* do not perform the coping by mlib_d64 data type for x86 */ #define MAX_T_IND 2 #else @@ -148,8 +135,7 @@ const mlib_image *src, const mlib_d64 *mtx, mlib_filter filter, - mlib_edge edge, - const void *colormap) + mlib_edge edge) { mlib_affine_param param[1]; mlib_status res; @@ -213,18 +199,6 @@ else return MLIB_FAILURE; /* unknown image type */ - if (colormap != NULL && filter != MLIB_NEAREST) { - if (t_ind != 0 && t_ind != 1) - return MLIB_FAILURE; - - if (mlib_ImageGetLutType(colormap) == MLIB_SHORT) - t_ind += 2; - t_ind = 2 * t_ind; - - if (mlib_ImageGetLutChannels(colormap) == 4) - t_ind++; - } - if (type == MLIB_BIT) { mlib_s32 s_bitoff = mlib_ImageGetBitOffset(src); mlib_s32 d_bitoff = mlib_ImageGetBitOffset(dst); @@ -253,25 +227,13 @@ case MLIB_BILINEAR: - if (colormap != NULL) { - res = mlib_AffineFunArr_bl_i[t_ind] (param, colormap); - } - else { - res = mlib_AffineFunArr_bl[4 * t_ind + (nchan - 1)] (param); - } - + res = mlib_AffineFunArr_bl[4 * t_ind + (nchan - 1)] (param); break; case MLIB_BICUBIC: case MLIB_BICUBIC2: - if (colormap != NULL) { - res = mlib_AffineFunArr_bc_i[t_ind] (param, colormap); - } - else { - res = mlib_AffineFunArr_bc[4 * t_ind + (nchan - 1)] (param); - } - + res = mlib_AffineFunArr_bc[4 * t_ind + (nchan - 1)] (param); break; } @@ -303,7 +265,7 @@ switch (edge) { case MLIB_EDGE_DST_FILL_ZERO: - mlib_ImageAffineEdgeZero(param, param_e, colormap); + mlib_ImageAffineEdgeZero(param, param_e); break; case MLIB_EDGE_OP_NEAREST: @@ -313,10 +275,10 @@ case MLIB_EDGE_SRC_EXTEND: if (filter == MLIB_BILINEAR) { - res = mlib_ImageAffineEdgeExtend_BL(param, param_e, colormap); + res = mlib_ImageAffineEdgeExtend_BL(param, param_e); } else { - res = mlib_ImageAffineEdgeExtend_BC(param, param_e, colormap); + res = mlib_ImageAffineEdgeExtend_BC(param, param_e); } break; @@ -355,7 +317,7 @@ return MLIB_FAILURE; } - return mlib_ImageAffine_alltypes(dst, src, mtx, filter, edge, NULL); + return mlib_ImageAffine_alltypes(dst, src, mtx, filter, edge); } /***************************************************************/ diff -r 3e22d8fd4912 -r acea5f7d354b jdk/src/java.desktop/share/native/libmlib_image/mlib_ImageAffine.h --- a/jdk/src/java.desktop/share/native/libmlib_image/mlib_ImageAffine.h Thu May 12 11:03:07 2016 -0700 +++ b/jdk/src/java.desktop/share/native/libmlib_image/mlib_ImageAffine.h Fri May 13 11:31:05 2016 +0300 @@ -162,99 +162,25 @@ /***************************************************************/ void mlib_ImageAffineEdgeZero(mlib_affine_param *param, - mlib_affine_param *param_e, - const void *colormap); + mlib_affine_param *param_e); void mlib_ImageAffineEdgeNearest(mlib_affine_param *param, mlib_affine_param *param_e); mlib_status mlib_ImageAffineEdgeExtend_BL(mlib_affine_param *param, - mlib_affine_param *param_e, - const void *colormap); + mlib_affine_param *param_e); mlib_status mlib_ImageAffineEdgeExtend_BC(mlib_affine_param *param, - mlib_affine_param *param_e, - const void *colormap); - -mlib_status mlib_ImageAffineEdgeExtend_BC2(mlib_affine_param *param, - mlib_affine_param *param_e, - const void *colormap); - -/***************************************************************/ -typedef mlib_status (*type_affine_i_fun)(mlib_affine_param *param, const void *colormap); - -mlib_status mlib_ImageAffine_u8_u8_i_bl(mlib_affine_param *param, - const void *colormap); -mlib_status mlib_ImageAffine_u8_s16_i_bl(mlib_affine_param *param, - const void *colormap); -mlib_status mlib_ImageAffine_s16_u8_i_bl(mlib_affine_param *param, - const void *colormap); -mlib_status mlib_ImageAffine_s16_s16_i_bl(mlib_affine_param *param, - const void *colormap); - -mlib_status mlib_ImageAffine_u8_u8_i_bc(mlib_affine_param *param, - const void *colormap); -mlib_status mlib_ImageAffine_u8_s16_i_bc(mlib_affine_param *param, - const void *colormap); -mlib_status mlib_ImageAffine_s16_u8_i_bc(mlib_affine_param *param, - const void *colormap); -mlib_status mlib_ImageAffine_s16_s16_i_bc(mlib_affine_param *param, - const void *colormap); - -void mlib_ImageAffineEdgeZeroIndex(mlib_affine_param *param, - mlib_affine_param *param_e, - const void *colormap); - -void mlib_ImageAffineEdgeExtendIndex_BL(mlib_affine_param *param, - mlib_affine_param *param_e, - const void *colormap); - -void mlib_ImageAffineEdgeExtendIndex_BC(mlib_affine_param *param, - mlib_affine_param *param_e, - const void *colormap); - -void mlib_ImageAffineEdgeExtendIndex_BC2(mlib_affine_param *param, - mlib_affine_param *param_e, - const void *colormap); - -/***************************************************************/ -#define PROT_AFFINEINDEX_BC(ITYPE, LTYPE, NCHAN) \ - mlib_status mlib_ImageAffineIndex_##ITYPE##_##LTYPE##_##NCHAN##CH_BC(mlib_affine_param *param, \ - const void *colormap) - -PROT_AFFINEINDEX_BC(U8, U8, 3); -PROT_AFFINEINDEX_BC(U8, S16, 3); -PROT_AFFINEINDEX_BC(U8, U8, 4); -PROT_AFFINEINDEX_BC(U8, S16, 4); -PROT_AFFINEINDEX_BC(S16, U8, 3); -PROT_AFFINEINDEX_BC(S16, S16, 3); -PROT_AFFINEINDEX_BC(S16, U8, 4); -PROT_AFFINEINDEX_BC(S16, S16, 4); - -/***************************************************************/ -#define PROT_AFFINEINDEX_BL(ITYPE, LTYPE, NCHAN) \ - mlib_status mlib_ImageAffineIndex_##ITYPE##_##LTYPE##_##NCHAN##CH_BL(mlib_affine_param *param, \ - const void *colormap) - -PROT_AFFINEINDEX_BL(U8, U8, 3); -PROT_AFFINEINDEX_BL(U8, S16, 3); -PROT_AFFINEINDEX_BL(U8, U8, 4); -PROT_AFFINEINDEX_BL(U8, S16, 4); -PROT_AFFINEINDEX_BL(S16, U8, 3); -PROT_AFFINEINDEX_BL(S16, S16, 3); -PROT_AFFINEINDEX_BL(S16, U8, 4); -PROT_AFFINEINDEX_BL(S16, S16, 4); + mlib_affine_param *param_e); /***************************************************************/ mlib_status mlib_ImageAffine_alltypes(mlib_image *dst, const mlib_image *src, const mlib_d64 *mtx, mlib_filter filter, - mlib_edge edge, - const void *colormap); + mlib_edge edge); /***************************************************************/ -extern const type_affine_i_fun mlib_AffineFunArr_bl_i[]; extern const type_affine_fun mlib_AffineFunArr_nn[]; extern const type_affine_fun mlib_AffineFunArr_bl[]; extern const type_affine_fun mlib_AffineFunArr_bc[]; diff -r 3e22d8fd4912 -r acea5f7d354b jdk/src/java.desktop/share/native/libmlib_image/mlib_ImageAffineEdge.c --- a/jdk/src/java.desktop/share/native/libmlib_image/mlib_ImageAffineEdge.c Thu May 12 11:03:07 2016 -0700 +++ b/jdk/src/java.desktop/share/native/libmlib_image/mlib_ImageAffineEdge.c Fri May 13 11:31:05 2016 +0300 @@ -73,7 +73,6 @@ */ #include "mlib_image.h" -#include "mlib_ImageColormap.h" #include "mlib_ImageAffine.h" /***************************************************************/ @@ -218,97 +217,6 @@ } /***************************************************************/ -#define LUT(k, ind) plut[channels*sp[ind] + k] - -/***************************************************************/ -#define MLIB_EDGE_INDEX(ITYPE, DTYPE, size) \ - for (j = 0; j < size; j++) { \ - ySrc = ((Y - 32768) >> MLIB_SHIFT); \ - xSrc = ((X - 32768) >> MLIB_SHIFT); \ - \ - t = ((X - 32768) & MLIB_MASK) * scale; \ - u = ((Y - 32768) & MLIB_MASK) * scale; \ - \ - xDelta = (((xSrc + 1 - srcWidth )) >> MLIB_SIGN_SHIFT) & 1; \ - yDelta = (((ySrc + 1 - srcHeight)) >> MLIB_SIGN_SHIFT) & srcStride; \ - \ - xFlag = (xSrc >> (MLIB_SIGN_SHIFT - MLIB_SHIFT)); \ - xSrc = xSrc + (1 & xFlag); \ - xDelta = xDelta &~ xFlag; \ - \ - yFlag = (ySrc >> (MLIB_SIGN_SHIFT - MLIB_SHIFT)); \ - ySrc = ySrc + (1 & yFlag); \ - yDelta = yDelta &~ yFlag; \ - \ - sp = (ITYPE*)lineAddr[ySrc] + xSrc; \ - \ - for (k = 0; k < channels; k++) { \ - a00 = LUT(k, 0); \ - a01 = LUT(k, xDelta); \ - a10 = LUT(k, yDelta); \ - a11 = LUT(k, yDelta + xDelta); \ - pix0 = (a00 * (1 - t) + a01 * t) * (1 - u) + \ - (a10 * (1 - t) + a11 * t) * u; \ - \ - pbuff[k] = (mlib_s32)pix0; \ - } \ - pbuff += channels; \ - \ - X += dX; \ - Y += dY; \ - } - -/***************************************************************/ -#define MLIB_EDGE_INDEX_u8i(ITYPE, Left, Right) { \ - mlib_u8 *pbuff = buff; \ - \ - size = Right - Left; \ - \ - MLIB_EDGE_INDEX(ITYPE, mlib_u8, size); \ - \ - dp = (ITYPE*)data + Left; \ - \ - if (channels == 3) { \ - if (sizeof(ITYPE) == 1) { \ - mlib_ImageColorTrue2IndexLine_U8_U8_3 (buff, (void*)dp, size, colormap); \ - } else { \ - mlib_ImageColorTrue2IndexLine_U8_S16_3(buff, (void*)dp, size, colormap); \ - } \ - } else { \ - if (sizeof(ITYPE) == 1) { \ - mlib_ImageColorTrue2IndexLine_U8_U8_4 (buff, (void*)dp, size, colormap); \ - } else { \ - mlib_ImageColorTrue2IndexLine_U8_S16_4(buff, (void*)dp, size, colormap); \ - } \ - } \ -} - -/***************************************************************/ -#define MLIB_EDGE_INDEX_s16i(ITYPE, Left, Right) { \ - mlib_s16 *pbuff = buff; \ - \ - size = Right - Left; \ - \ - MLIB_EDGE_INDEX(ITYPE, mlib_s16, size); \ - \ - dp = (ITYPE*)data + Left; \ - \ - if (channels == 3) { \ - if (sizeof(ITYPE) == 1) { \ - mlib_ImageColorTrue2IndexLine_S16_U8_3 (buff, (void*)dp, size, colormap); \ - } else { \ - mlib_ImageColorTrue2IndexLine_S16_S16_3(buff, (void*)dp, size, colormap); \ - } \ - } else { \ - if (sizeof(ITYPE) == 1) { \ - mlib_ImageColorTrue2IndexLine_S16_U8_4 (buff, (void*)dp, size, colormap); \ - } else { \ - mlib_ImageColorTrue2IndexLine_S16_S16_4(buff, (void*)dp, size, colormap); \ - } \ - } \ -} - -/***************************************************************/ #define GET_FLT_TBL(X, xf0, xf1, xf2, xf3) \ filterpos = ((X - 32768) >> flt_shift) & flt_mask; \ fptr = (mlib_f32 *) ((mlib_u8 *)flt_tbl + filterpos); \ @@ -424,47 +332,6 @@ MLIB_EDGE_BC_LINE(TYPE, Left, Right, GET_FLT_BC2) /***************************************************************/ -#define MLIB_EDGE_INDEX_BC(ITYPE, DTYPE, size) \ - for (j = 0; j < size; j++) { \ - GET_FLT_TBL(X, xf0, xf1, xf2, xf3); \ - GET_FLT_TBL(Y, yf0, yf1, yf2, yf3); \ - \ - CALC_SRC_POS(X, Y, 1, srcStride); \ - \ - sp = (ITYPE*)lineAddr[ySrc] + xSrc; \ - \ - for (k = 0; k < channels; k++) { \ - c0 = LUT(k, yDelta0 + xDelta0) * xf0 + \ - LUT(k, yDelta0 ) * xf1 + \ - LUT(k, yDelta0 + xDelta1) * xf2 + \ - LUT(k, yDelta0 + xDelta2) * xf3; \ - \ - c1 = LUT(k, xDelta0) * xf0 + \ - LUT(k, 0 ) * xf1 + \ - LUT(k, xDelta1) * xf2 + \ - LUT(k, xDelta2) * xf3; \ - \ - c2 = LUT(k, yDelta1 + xDelta0) * xf0 + \ - LUT(k, yDelta1 ) * xf1 + \ - LUT(k, yDelta1 + xDelta1) * xf2 + \ - LUT(k, yDelta1 + xDelta2) * xf3; \ - \ - c3 = LUT(k, yDelta2 + xDelta0) * xf0 + \ - LUT(k, yDelta2 ) * xf1 + \ - LUT(k, yDelta2 + xDelta1) * xf2 + \ - LUT(k, yDelta2 + xDelta2) * xf3; \ - \ - val0 = c0*yf0 + c1*yf1 + c2*yf2 + c3*yf3; \ - \ - SAT##DTYPE(pbuff[k], val0); \ - } \ - pbuff += channels; \ - \ - X += dX; \ - Y += dY; \ - } - -/***************************************************************/ #define MLIB_PROCESS_EDGES_ZERO(TYPE) { \ TYPE *dp, *dstLineEnd; \ \ @@ -586,16 +453,11 @@ /***************************************************************/ void mlib_ImageAffineEdgeZero(mlib_affine_param *param, - mlib_affine_param *param_e, - const void *colormap) + mlib_affine_param *param_e) { GET_EDGE_PARAMS_ZERO(); mlib_s32 zero = 0; - if (colormap != NULL) { - zero = mlib_ImageGetLutOffset(colormap); - } - switch (type) { case MLIB_BYTE: MLIB_PROCESS_EDGES_ZERO(mlib_u8); @@ -654,8 +516,7 @@ /***************************************************************/ mlib_status mlib_ImageAffineEdgeExtend_BL(mlib_affine_param *param, - mlib_affine_param *param_e, - const void *colormap) + mlib_affine_param *param_e) { GET_EDGE_PARAMS(); mlib_d64 scale = 1.0 / (mlib_d64) MLIB_PREC; @@ -663,79 +524,6 @@ mlib_d64 t, u, pix0; mlib_d64 a00, a01, a10, a11; - if (colormap != NULL) { - mlib_s32 max_xsize = param_e->max_xsize; - mlib_type ltype = mlib_ImageGetLutType(colormap); - mlib_d64 *plut = (mlib_d64 *) mlib_ImageGetLutDoubleData(colormap); - void *buff; - - channels = mlib_ImageGetLutChannels(colormap); - plut -= channels * mlib_ImageGetLutOffset(colormap); - - if (max_xsize == 0) { - return MLIB_SUCCESS; - } - - if (ltype == MLIB_BYTE) { - buff = mlib_malloc(channels * max_xsize); - } - else if (ltype == MLIB_SHORT) { - buff = mlib_malloc(channels * max_xsize * sizeof(mlib_s16)); - } else { - /* Unsupported type of lookup table. Report a failure */ - return MLIB_FAILURE; - } - - if (buff == NULL) - return MLIB_FAILURE; - - switch (ltype) { - case MLIB_BYTE: - switch (type) { - case MLIB_BYTE: - MLIB_PROCESS_EDGES(MLIB_EDGE_INDEX_u8i, mlib_u8); - break; - - case MLIB_SHORT: - srcStride >>= 1; - MLIB_PROCESS_EDGES(MLIB_EDGE_INDEX_u8i, mlib_s16); - break; - default: - /* Incompatible image type. Ignore it for now. */ - break; - } - - break; - - case MLIB_SHORT: - switch (type) { - case MLIB_BYTE: - MLIB_PROCESS_EDGES(MLIB_EDGE_INDEX_s16i, mlib_u8); - break; - - case MLIB_SHORT: - srcStride >>= 1; - MLIB_PROCESS_EDGES(MLIB_EDGE_INDEX_s16i, mlib_s16); - break; - default: - /* Incompatible image type. Ignore it for now. */ - break; - } - - break; - default: - /* Unsupported type of lookup table. - * Can not be here due to check on line 685, - * so just ignore it. - */ - break; - } - - mlib_free(buff); - - return MLIB_SUCCESS; - } - switch (type) { case MLIB_BYTE: MLIB_PROCESS_EDGES(MLIB_EDGE_BL, mlib_u8); @@ -775,12 +563,8 @@ } /***************************************************************/ -#undef MLIB_EDGE_INDEX -#define MLIB_EDGE_INDEX MLIB_EDGE_INDEX_BC - mlib_status mlib_ImageAffineEdgeExtend_BC(mlib_affine_param *param, - mlib_affine_param *param_e, - const void *colormap) + mlib_affine_param *param_e) { GET_EDGE_PARAMS(); mlib_d64 scale = 1.0 / (mlib_d64) MLIB_PREC; @@ -789,7 +573,6 @@ mlib_d64 xf0, xf1, xf2, xf3; mlib_d64 yf0, yf1, yf2, yf3; mlib_d64 c0, c1, c2, c3, val0; - mlib_type ltype; mlib_filter filter = param->filter; mlib_f32 *fptr; mlib_f32 const *flt_tbl; @@ -798,9 +581,7 @@ mlib_s32 yDelta0, yDelta1, yDelta2; mlib_d64 sat; - ltype = (colormap != NULL) ? mlib_ImageGetLutType(colormap) : type; - - if (ltype == MLIB_BYTE) { + if (type == MLIB_BYTE) { flt_shift = FLT_SHIFT_U8; flt_mask = FLT_MASK_U8; flt_tbl = (filter == MLIB_BICUBIC) ? mlib_filters_u8f_bc : mlib_filters_u8f_bc2; @@ -813,78 +594,6 @@ sat = (mlib_d64) 0x7FFF8000; /* saturation for U16 */ } - if (colormap != NULL) { - mlib_s32 max_xsize = param_e->max_xsize; - mlib_d64 *plut = (mlib_d64 *) mlib_ImageGetLutDoubleData(colormap); - void *buff; - - channels = mlib_ImageGetLutChannels(colormap); - plut -= channels * mlib_ImageGetLutOffset(colormap); - - if (max_xsize == 0) { - return MLIB_SUCCESS; - } - - if (ltype == MLIB_BYTE) { - buff = mlib_malloc(channels * max_xsize); - } - else if (ltype == MLIB_SHORT) { - buff = mlib_malloc(channels * max_xsize * sizeof(mlib_s16)); - } else { - /* Unsupported type of lookup table. */ - return MLIB_FAILURE; - } - - if (buff == NULL) - return MLIB_FAILURE; - - switch (ltype) { - case MLIB_BYTE: - switch (type) { - case MLIB_BYTE: - MLIB_PROCESS_EDGES(MLIB_EDGE_INDEX_u8i, mlib_u8); - break; - - case MLIB_SHORT: - srcStride >>= 1; - MLIB_PROCESS_EDGES(MLIB_EDGE_INDEX_u8i, mlib_s16); - break; - default: - /* Ignore incomatible image type. */ - break; - } - - break; - - case MLIB_SHORT: - switch (type) { - case MLIB_BYTE: - MLIB_PROCESS_EDGES(MLIB_EDGE_INDEX_s16i, mlib_u8); - break; - - case MLIB_SHORT: - srcStride >>= 1; - MLIB_PROCESS_EDGES(MLIB_EDGE_INDEX_s16i, mlib_s16); - break; - default: - /* Ignore incomatible image type. */ - break; - } - - break; - - default: - /* Unsupported type of lookup table. - * Can not be here due to check on line 836, - * so just ignore it. - */ - break; - } - - mlib_free(buff); - - return MLIB_SUCCESS; - } switch (type) { case MLIB_BYTE: diff -r 3e22d8fd4912 -r acea5f7d354b jdk/src/java.desktop/share/native/libmlib_image/mlib_ImageCheck.h --- a/jdk/src/java.desktop/share/native/libmlib_image/mlib_ImageCheck.h Thu May 12 11:03:07 2016 -0700 +++ b/jdk/src/java.desktop/share/native/libmlib_image/mlib_ImageCheck.h Fri May 13 11:31:05 2016 +0300 @@ -82,12 +82,6 @@ return MLIB_FAILURE; \ } -#define MLIB_IMAGE_AND_COLORMAP_ARE_COMPAT(image,colormap) \ - if ((mlib_ImageGetChannels(image) != mlib_ImageGetLutChannels(colormap)) \ - || (mlib_ImageGetLutType(colormap) != mlib_ImageGetType(image))) { \ - return MLIB_FAILURE; \ - } - #define MLIB_IMAGE_GET_ALL_PARAMS(image, type, nchan, width, height, stride, pdata) \ type = mlib_ImageGetType(image); \ nchan = mlib_ImageGetChannels(image); \ diff -r 3e22d8fd4912 -r acea5f7d354b jdk/src/java.desktop/share/native/libmlib_image/mlib_ImageColorTrue2Index.c --- a/jdk/src/java.desktop/share/native/libmlib_image/mlib_ImageColorTrue2Index.c Thu May 12 11:03:07 2016 -0700 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,4256 +0,0 @@ -/* - * Copyright (c) 2003, 2011, Oracle and/or its affiliates. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 only, as - * published by the Free Software Foundation. Oracle designates this - * particular file as subject to the "Classpath" exception as provided - * by Oracle in the LICENSE file that accompanied this code. - * - * This code is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * version 2 for more details (a copy is included in the LICENSE file that - * accompanied this code). - * - * You should have received a copy of the GNU General Public License version - * 2 along with this work; if not, write to the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - * - * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA - * or visit www.oracle.com if you need additional information or have any - * questions. - */ - -/* - * FUNCTION - * mlib_ImageColorTrue2Index - convert a true color image to an indexed - * color image - * - * SYNOPSIS - * mlib_status mlib_ImageColorTrue2Index(mlib_image *dst, - * const mlib_image *src, - * const void *colormap) - * - * ARGUMENTS - * colormap Internal data structure for inverse color mapping. - * dst Pointer to destination image. - * src Pointer to source image. - * - * DESCRIPTION - * Convert a true color image to a pseudo color image with the method - * of finding the nearest matched lut entry for each pixel. - * - * The src can be an MLIB_BYTE or MLIB_SHORT image with 3 or 4 channels. - * The dst must be a 1-channel MLIB_BYTE or MLIB_SHORT image. - * - * The lut might have either 3 or 4 channels. The type of the lut can be - * one of the following: - * MLIB_BYTE in, MLIB_BYTE out (i.e., BYTE-to-BYTE) - * MLIB_BYTE in, MLIB_SHORT out (i.e., BYTE-to-SHORT) - * MLIB_SHORT in, MLIB_SHORT out (i.e., SHORT-to-SHORT) - * MLIB_SHORT in, MLIB_BYTE out (i.e., SHORT-to-BYTE) - * - * The src image and the lut must have same number of channels. - */ - -#include "mlib_image.h" -#include "mlib_ImageColormap.h" -#include "mlib_ImageCheck.h" - -/***************************************************************/ - -/*#define USE_VIS_CODE*/ - -#ifdef USE_VIS_CODE -#include "vis_proto.h" -#define VIS_ALIGNADDR(X, Y) vis_alignaddr((void *)(X), (Y)) -#endif - -/***************************************************************/ - -#define LUT_BYTE_COLORS_3CHANNELS 1000 -#define LUT_BYTE_COLORS_4CHANNELS 3000 -#define LUT_SHORT_COLORS_3CHANNELS 1000 -#define LUT_SHORT_COLORS_4CHANNELS 1000 - -/***************************************************************/ - -#define MAIN_COLORTRUE2INDEX_LOOP( FROM_TYPE, TO_TYPE, NCHANNELS ) \ - for( y = 0; y < height; y++ ) \ - { \ - mlib_ImageColorTrue2IndexLine_##FROM_TYPE##_##TO_TYPE##_##NCHANNELS( \ - sdata, ddata, width, colormap ); \ - \ - sdata += sstride; \ - ddata += dstride; \ - } - -/***************************************************************/ - -#define COLOR_CUBE_U8_3_SEARCH( TABLE_POINTER_TYPE, SHIFT, STEP ) \ -{ \ - const mlib_u8 *c0, *c1, *c2; \ - TABLE_POINTER_TYPE *table = s->table; \ - mlib_s32 bits = s->bits; \ - mlib_s32 nbits = 8 - bits; \ - mlib_s32 mask = ~( ( 1 << nbits ) - 1 ); \ - mlib_s32 j; \ - \ - c0 = src + SHIFT; \ - c1 = src + 1 + SHIFT; \ - c2 = src + 2 + SHIFT; \ - \ - switch( bits ) \ - { \ - case 1: \ - case 2: \ - { \ - mlib_s32 bits0 = 8 - bits; \ - mlib_s32 bits1 = bits0 - bits; \ - mlib_s32 bits2 = bits1 - bits; \ - \ - for( j = 0; j < length; j++ ) \ - { \ - dst[ j ] = table[ ( ( *c0 & mask ) >> bits2 ) | \ - ( ( *c1 & mask ) >> bits1 ) | \ - ( ( *c2 & mask ) >> bits0 ) ]; \ - \ - c0 += STEP; \ - c1 += STEP; \ - c2 += STEP; \ - } \ - break; \ - } \ - case 3: \ - { \ - for( j = 0; j < length; j++ ) \ - { \ - dst[ j ] = table[ ( ( *c0 & mask ) << 1 ) | \ - ( ( *c1 & mask ) >> 2 ) | \ - ( ( *c2 & mask ) >> 5 ) ]; \ - \ - c0 += STEP; \ - c1 += STEP; \ - c2 += STEP; \ - } \ - break; \ - } \ - case 4: \ - { \ - for( j = 0; j < length; j++ ) \ - { \ - dst[ j ] = table[ ( ( *c0 & mask ) << 4 ) | \ - ( *c1 & mask ) | \ - ( ( *c2 & mask ) >> 4 ) ]; \ - \ - c0 += STEP; \ - c1 += STEP; \ - c2 += STEP; \ - } \ - break; \ - } \ - case 5: \ - case 6: \ - case 7: \ - { \ - mlib_s32 bits0 = 8 - bits; \ - mlib_s32 bits1 = bits * 2 - 8; \ - mlib_s32 bits2 = bits1 + bits; \ - \ - for( j = 0; j < length; j++ ) \ - { \ - dst[ j ] = table[ ( ( *c0 & mask ) << bits2 ) | \ - ( ( *c1 & mask ) << bits1 ) | \ - ( ( *c2 & mask ) >> bits0 ) ]; \ - \ - c0 += STEP; \ - c1 += STEP; \ - c2 += STEP; \ - } \ - break; \ - } \ - case 8: \ - { \ - for( j = 0; j < length; j++ ) \ - { \ - dst[ j ] = table[ ( ( *c0 & mask ) << 16 ) | \ - ( ( *c1 & mask ) << 8 ) | \ - ( *c2 & mask ) ]; \ - \ - c0 += STEP; \ - c1 += STEP; \ - c2 += STEP; \ - } \ - break; \ - } \ - } \ -} - -/***************************************************************/ -#define COLOR_CUBE_U8_4_SEARCH( TABLE_TYPE ) \ -{ \ - const mlib_u8 *c0, *c1, *c2, *c3; \ - TABLE_TYPE *table = s->table; \ - mlib_s32 bits = s->bits; \ - mlib_s32 nbits = 8 - bits; \ - mlib_s32 mask = ~( ( 1 << nbits ) - 1 ); \ - mlib_s32 j; \ - \ - c0 = src; \ - c1 = src + 1; \ - c2 = src + 2; \ - c3 = src + 3; \ - \ - switch( bits ) \ - { \ - case 1: \ - { \ - for( j = 0; j < length; j++ ) \ - { \ - dst[ j ] = table[ ( ( *c0 & mask ) >> 4 ) | \ - ( ( *c1 & mask ) >> 5 ) | \ - ( ( *c2 & mask ) >> 6 ) | \ - ( ( *c3 & mask ) >> 7 ) ]; \ - \ - c0 += 4; \ - c1 += 4; \ - c2 += 4; \ - c3 += 4; \ - } \ - break; \ - } \ - case 2: \ - { \ - for( j = 0; j < length; j++ ) \ - { \ - dst[ j ] = table[ ( *c0 & mask ) | \ - ( ( *c1 & mask ) >> 2 ) | \ - ( ( *c2 & mask ) >> 4 ) | \ - ( ( *c3 & mask ) >> 6 ) ]; \ - \ - c0 += 4; \ - c1 += 4; \ - c2 += 4; \ - c3 += 4; \ - } \ - break; \ - } \ - case 3: \ - { \ - for( j = 0; j < length; j++ ) \ - { \ - dst[ j ] = table[ ( ( *c0 & mask ) << 4 ) | \ - ( ( *c1 & mask ) << 1 ) | \ - ( ( *c2 & mask ) >> 2 ) | \ - ( ( *c3 & mask ) >> 5 ) ]; \ - \ - c0 += 4; \ - c1 += 4; \ - c2 += 4; \ - c3 += 4; \ - } \ - break; \ - } \ - case 4: \ - { \ - for( j = 0; j < length; j++ ) \ - { \ - dst[ j ] = table[ ( ( *c0 & mask ) << 8 ) | \ - ( ( *c1 & mask ) << 4 ) | \ - ( *c2 & mask ) | \ - ( ( *c3 & mask ) >> 4 ) ]; \ - \ - c0 += 4; \ - c1 += 4; \ - c2 += 4; \ - c3 += 4; \ - } \ - break; \ - } \ - case 5: \ - case 6: \ - { \ - mlib_s32 bits3 = bits * 4 - 8; \ - mlib_s32 bits2 = bits3 - bits; \ - mlib_s32 bits1 = bits2 - bits; \ - mlib_s32 bits0 = 8 - bits; \ - \ - for( j = 0; j < length; j++ ) \ - { \ - dst[ j ] = table[ ( ( *c0 & mask ) << bits3 ) | \ - ( ( *c1 & mask ) << bits2 ) | \ - ( ( *c2 & mask ) << bits1 ) | \ - ( ( *c3 & mask ) >> bits0 ) ]; \ - \ - c0 += 4; \ - c1 += 4; \ - c2 += 4; \ - c3 += 4; \ - } \ - break; \ - } \ - case 7: \ - { \ - for( j = 0; j < length; j++ ) \ - { \ - dst[ j ] = table[ ( ( *c0 & mask ) << 20 ) | \ - ( ( *c1 & mask ) << 13 ) | \ - ( ( *c2 & mask ) << 6 ) | \ - ( ( *c3 & mask ) >> 1 ) ]; \ - \ - c0 += 4; \ - c1 += 4; \ - c2 += 4; \ - c3 += 4; \ - } \ - break; \ - } \ - case 8: /* will never be called */ \ - { \ - for( j = 0; j < length; j++ ) \ - { \ - dst[ j ] = table[ ( ( *c0 & mask ) << 24 ) | \ - ( ( *c1 & mask ) << 16 ) | \ - ( ( *c2 & mask ) << 8 ) | \ - ( *c3 & mask ) ]; \ - \ - c0 += 4; \ - c1 += 4; \ - c2 += 4; \ - c3 += 4; \ - } \ - break; \ - } \ - } \ -} - -/***************************************************************/ -#define COLOR_CUBE_S16_3_SEARCH( TABLE_TYPE, SHIFT, STEP ) \ -{ \ - const mlib_s16 *c0, *c1, *c2; \ - mlib_s32 bits = s->bits; \ - mlib_s32 nbits = 16 - bits; \ - mlib_s32 mask = ~( ( 1 << nbits ) - 1 ); \ - TABLE_TYPE *table = s->table; \ - mlib_s32 j; \ - \ - c0 = src + SHIFT; \ - c1 = src + 1 + SHIFT; \ - c2 = src + 2 + SHIFT; \ - \ - switch( bits ) \ - { \ - case 1: \ - case 2: \ - case 3: \ - case 4: \ - case 5: \ - { \ - mlib_s32 bits0 = 16 - bits; \ - mlib_s32 bits1 = bits0 - bits; \ - mlib_s32 bits2 = bits1 - bits; \ - \ - for( j = 0; j < length; j++ ) \ - { \ - dst[ j ] = table[ ( ( ( *c0 - MLIB_S16_MIN ) & mask ) >> bits2 ) | \ - ( ( ( *c1 - MLIB_S16_MIN ) & mask ) >> bits1 ) | \ - ( ( ( *c2 - MLIB_S16_MIN ) & mask ) >> bits0 ) ]; \ - \ - c0 += STEP; \ - c1 += STEP; \ - c2 += STEP; \ - } \ - break; \ - } \ - case 6: \ - case 7: \ - { \ - mlib_s32 bits0 = 16 - bits; \ - mlib_s32 bits1 = bits0 - bits; \ - mlib_s32 bits2 = bits * 3 - 16; \ - \ - for( j = 0; j < length; j++ ) \ - { \ - dst[ j ] = table[ ( ( ( *c0 - MLIB_S16_MIN ) & mask ) << bits2 ) | \ - ( ( ( *c1 - MLIB_S16_MIN ) & mask ) >> bits1 ) | \ - ( ( ( *c2 - MLIB_S16_MIN ) & mask ) >> bits0 ) ]; \ - \ - c0 += STEP; \ - c1 += STEP; \ - c2 += STEP; \ - } \ - break; \ - } \ - case 8: \ - { \ - for( j = 0; j < length; j++ ) \ - { \ - dst[ j ] = table[ ( ( ( *c0 - MLIB_S16_MIN ) & mask ) << 8 ) | \ - ( ( *c1 - MLIB_S16_MIN ) & mask ) | \ - ( ( ( *c2 - MLIB_S16_MIN ) & mask ) >> 8 ) ]; \ - \ - c0 += STEP; \ - c1 += STEP; \ - c2 += STEP; \ - } \ - break; \ - } \ - case 9: \ - case 10: \ - { \ - mlib_s32 bits0 = 16 - bits; \ - mlib_s32 bits1 = 2 * bits - 16; \ - mlib_s32 bits2 = bits1 + bits; \ - \ - for( j = 0; j < length; j++ ) \ - { \ - dst[ j ] = table[ ( ( ( *c0 - MLIB_S16_MIN ) & mask ) << bits2 ) | \ - ( ( ( *c1 - MLIB_S16_MIN ) & mask ) << bits1 ) | \ - ( ( ( *c2 - MLIB_S16_MIN ) & mask ) >> bits0 ) ]; \ - \ - c0 += STEP; \ - c1 += STEP; \ - c2 += STEP; \ - } \ - break; \ - } \ - /* Other cases may not be considered as the table size will be more \ - than 2^32 */ \ - } \ -} - -/***************************************************************/ -#define COLOR_CUBE_S16_4_SEARCH( TABLE_TYPE ) \ -{ \ - const mlib_s16 *c0, *c1, *c2, *c3; \ - TABLE_TYPE *table = s->table; \ - mlib_s32 bits = s->bits; \ - mlib_s32 nbits = 16 - bits; \ - mlib_s32 mask = ~( ( 1 << nbits ) - 1 ); \ - mlib_s32 j; \ - \ - c0 = src; \ - c1 = src + 1; \ - c2 = src + 2; \ - c3 = src + 3; \ - \ - switch( bits ) \ - { \ - case 1: \ - case 2: \ - case 3: \ - { \ - mlib_s32 bits0 = 16 - bits; \ - mlib_s32 bits1 = bits0 - bits; \ - mlib_s32 bits2 = bits1 - bits; \ - mlib_s32 bits3 = bits2 - bits; \ - \ - for( j = 0; j < length; j++ ) \ - { \ - dst[ j ] = table[ ( ( ( *c0 - MLIB_S16_MIN ) & mask ) >> bits3 ) | \ - ( ( ( *c1 - MLIB_S16_MIN ) & mask ) >> bits2 ) | \ - ( ( ( *c2 - MLIB_S16_MIN ) & mask ) >> bits1 ) | \ - ( ( ( *c3 - MLIB_S16_MIN ) & mask ) >> bits0 ) ]; \ - \ - c0 += 4; \ - c1 += 4; \ - c2 += 4; \ - c3 += 4; \ - } \ - break; \ - } \ - case 4: \ - { \ - for( j = 0; j < length; j++ ) \ - { \ - dst[ j ] = table[ ( ( *c0 - MLIB_S16_MIN ) & mask ) | \ - ( ( ( *c1 - MLIB_S16_MIN ) & mask ) >> 4 ) | \ - ( ( ( *c2 - MLIB_S16_MIN ) & mask ) >> 8 ) | \ - ( ( ( *c3 - MLIB_S16_MIN ) & mask ) >> 12 ) ]; \ - \ - c0 += 4; \ - c1 += 4; \ - c2 += 4; \ - c3 += 4; \ - } \ - break; \ - } \ - case 5: \ - { \ - for( j = 0; j < length; j++ ) \ - { \ - dst[ j ] = table[ ( ( ( *c0 - MLIB_S16_MIN ) & mask ) << 4 ) | \ - ( ( ( *c1 - MLIB_S16_MIN ) & mask ) >> 1 ) | \ - ( ( ( *c2 - MLIB_S16_MIN ) & mask ) >> 6 ) | \ - ( ( ( *c3 - MLIB_S16_MIN ) & mask ) >> 11 ) ]; \ - \ - c0 += 4; \ - c1 += 4; \ - c2 += 4; \ - c3 += 4; \ - } \ - break; \ - } \ - case 6: \ - case 7: \ - { \ - mlib_s32 bits0 = 16 - bits; \ - mlib_s32 bits1 = bits0 - bits; \ - mlib_s32 bits3 = bits * 4 - 16; \ - mlib_s32 bits2 = bits3 - bits; \ - \ - for( j = 0; j < length; j++ ) \ - { \ - dst[ j ] = table[ ( ( ( *c0 - MLIB_S16_MIN ) & mask ) << bits3 ) | \ - ( ( ( *c1 - MLIB_S16_MIN ) & mask ) << bits2 ) | \ - ( ( ( *c2 - MLIB_S16_MIN ) & mask ) >> bits1 ) | \ - ( ( ( *c3 - MLIB_S16_MIN ) & mask ) >> bits0 ) ]; \ - \ - c0 += 4; \ - c1 += 4; \ - c2 += 4; \ - c3 += 4; \ - } \ - break; \ - } \ - case 8: \ - { \ - for( j = 0; j < length; j++ ) \ - { \ - dst[ j ] = table[ ( ( ( *c0 - MLIB_S16_MIN ) & mask ) << 16 ) | \ - ( ( ( *c1 - MLIB_S16_MIN ) & mask ) << 8 ) | \ - ( ( *c2 - MLIB_S16_MIN ) & mask ) | \ - ( ( ( *c3 - MLIB_S16_MIN ) & mask ) >> 8 ) ]; \ - \ - c0 += 4; \ - c1 += 4; \ - c2 += 4; \ - c3 += 4; \ - } \ - break; \ - } \ - /* Other cases may not be considered as the table size will be more \ - than 2^32 */ \ - } \ -} - -/***************************************************************/ -#define BINARY_TREE_SEARCH_RIGHT( POSITION, COLOR_MAX, SHIFT ) \ -{ \ - if( ( distance >= ( ( ( position[ POSITION ] + current_size - \ - c[ POSITION ] ) * ( position[ POSITION ] + current_size - \ - c[ POSITION ] ) ) >> SHIFT ) ) && \ - ( position[ POSITION ] + current_size != COLOR_MAX ) ) \ - continue_up = 1; \ -} - -/***************************************************************/ -#define BINARY_TREE_EXPLORE_RIGHT_3( POSITION, COLOR_MAX, IMAGE_TYPE, \ - FIRST_NEIBOUR, SECOND_NEIBOUR, SUBSTRACTION, SHIFT ) \ -{ \ - if( distance >= ( ( ( position[ POSITION ] + current_size - \ - c[ POSITION ] ) * ( position[ POSITION ] + \ - current_size - c[ POSITION ] ) ) >> SHIFT ) ) \ - { \ - if( distance < ( ( ( COLOR_MAX - c[ POSITION ] ) * \ - ( COLOR_MAX - c[ POSITION ] ) ) >> SHIFT ) ) \ - { \ - if( distance < ( ( ( position[ POSITION ] + \ - current_size * 2 - c[ POSITION ] ) * \ - ( position[ POSITION ] + current_size * 2 - \ - c[ POSITION ] ) ) >> SHIFT ) ) \ - { \ - /* Check only a part of quadrant */ \ - mlib_s32 qq = q ^ ( 1 << POSITION ); \ - \ - check_neibours[ FIRST_NEIBOUR ] += 1; \ - check_neibours[ SECOND_NEIBOUR ] += 1; \ - check_corner += 1; \ - if( node->tag & ( 1 << qq ) ) \ - { \ - /* Here is another color cell. \ - Check the distance */ \ - mlib_s32 new_found_color = \ - node->contents.index[ qq ]; \ - mlib_u32 newdistance = FIND_DISTANCE_3( c[ 0 ], \ - p[ 0 ][ new_found_color ] - SUBSTRACTION, c[ 1 ], \ - p[ 1 ][ new_found_color ] - SUBSTRACTION, c[ 2 ], \ - p[ 2 ][ new_found_color ] - SUBSTRACTION, SHIFT ); \ - \ - if( newdistance < distance ) \ - { \ - found_color = new_found_color; \ - distance = newdistance; \ - } \ - } \ - else if( node->contents.quadrants[ qq ] ) \ - /* Only a part of quadrant needs checking */ \ - distance = \ - mlib_search_quadrant_part_to_left_##IMAGE_TYPE##_3( \ - node->contents.quadrants[ qq ], \ - distance, &found_color, c, p, \ - position[ POSITION ] + current_size, pass - 1, POSITION ); \ - } \ - else /* Check whole quadrant */ \ - { \ - mlib_s32 qq = q ^ ( 1 << POSITION ); \ - \ - check_neibours[ FIRST_NEIBOUR ] += 2; \ - check_neibours[ SECOND_NEIBOUR ] += 2; \ - check_corner += 2; \ - continue_up = 1; \ - if( node->tag & ( 1 << qq ) ) \ - { \ - /* Here is another color cell. \ - Check the distance */ \ - mlib_s32 new_found_color = \ - node->contents.index[ qq ]; \ - mlib_u32 newdistance = FIND_DISTANCE_3( c[ 0 ], \ - p[ 0 ][ new_found_color ] - SUBSTRACTION, c[ 1 ], \ - p[ 1 ][ new_found_color ] - SUBSTRACTION, c[ 2 ], \ - p[ 2 ][ new_found_color ] - SUBSTRACTION, SHIFT ); \ - \ - if( newdistance < distance ) \ - { \ - found_color = new_found_color; \ - distance = newdistance; \ - } \ - } \ - else if( node->contents.quadrants[ qq ] ) \ - /* Here is a full node. Just explore it */ \ - distance = mlib_search_quadrant_##IMAGE_TYPE##_3( \ - node->contents.quadrants[ qq ], \ - distance, &found_color, c[ 0 ], c[ 1 ], c[ 2 ], p ); \ - } \ - } \ - else /* Cell is on the edge of the space */ \ - { \ - if( position[ POSITION ] + current_size * 2 == \ - COLOR_MAX ) \ - { \ - /* Check only a part of quadrant */ \ - mlib_s32 qq = q ^ ( 1 << POSITION ); \ - \ - check_neibours[ FIRST_NEIBOUR ] += 1; \ - check_neibours[ SECOND_NEIBOUR ] += 1; \ - check_corner += 1; \ - if( node->tag & ( 1 << qq ) ) \ - { \ - /* Here is another color cell. \ - Check the distance */ \ - mlib_s32 new_found_color = \ - node->contents.index[ qq ]; \ - mlib_u32 newdistance = FIND_DISTANCE_3( c[ 0 ], \ - p[ 0 ][ new_found_color ] - SUBSTRACTION, c[ 1 ], \ - p[ 1 ][ new_found_color ] - SUBSTRACTION, c[ 2 ], \ - p[ 2 ][ new_found_color ] - SUBSTRACTION, SHIFT ); \ - \ - if( newdistance < distance ) \ - { \ - found_color = new_found_color; \ - distance = newdistance; \ - } \ - } \ - else if( node->contents.quadrants[ qq ] ) \ - /* Only a part of quadrant needs checking */ \ - distance = \ - mlib_search_quadrant_part_to_left_##IMAGE_TYPE##_3( \ - node->contents.quadrants[ qq ], \ - distance, &found_color, c, p, \ - position[ POSITION ] + current_size, \ - pass - 1, POSITION ); \ - } \ - else /* Check whole quadrant */ \ - { \ - mlib_s32 qq = q ^ ( 1 << POSITION ); \ - \ - check_neibours[ FIRST_NEIBOUR ] += 2; \ - check_neibours[ SECOND_NEIBOUR ] += 2; \ - check_corner += 2; \ - continue_up = 1; \ - if( node->tag & ( 1 << qq ) ) \ - { \ - /* Here is another color cell. \ - Check the distance */ \ - mlib_s32 new_found_color = \ - node->contents.index[ qq ]; \ - mlib_u32 newdistance = FIND_DISTANCE_3( c[ 0 ], \ - p[ 0 ][ new_found_color ] - SUBSTRACTION, c[ 1 ], \ - p[ 1 ][ new_found_color ] - SUBSTRACTION, c[ 2 ], \ - p[ 2 ][ new_found_color ] - SUBSTRACTION, SHIFT ); \ - \ - if( newdistance < distance ) \ - { \ - found_color = new_found_color; \ - distance = newdistance; \ - } \ - } \ - else if( node->contents.quadrants[ qq ] ) \ - /* Here is a full node. Just explore it */ \ - distance = mlib_search_quadrant_##IMAGE_TYPE##_3( \ - node->contents.quadrants[ qq ], \ - distance, &found_color, c[ 0 ], c[ 1 ], c[ 2 ], p ); \ - } \ - } \ - } \ -} - -/***************************************************************/ -#define BINARY_TREE_EXPLORE_RIGHT_4( POSITION, COLOR_MAX, IMAGE_TYPE, \ - FIRST_NEIBOUR, SECOND_NEIBOUR, THIRD_NEIBOUR, SUBSTRACTION, SHIFT ) \ -{ \ - if( distance >= ( ( ( position[ POSITION ] + current_size - \ - c[ POSITION ] ) * ( position[ POSITION ] + \ - current_size - c[ POSITION ] ) ) >> SHIFT ) ) \ - { \ - if( distance < ( ( ( COLOR_MAX - c[ POSITION ] ) * \ - ( COLOR_MAX - c[ POSITION ] ) ) >> SHIFT ) ) \ - { \ - if( distance < ( ( ( position[ POSITION ] + \ - current_size * 2 - c[ POSITION ] ) * \ - ( position[ POSITION ] + current_size * 2 - \ - c[ POSITION ] ) ) >> SHIFT ) ) \ - { \ - /* Check only a part of quadrant */ \ - mlib_s32 qq = q ^ ( 1 << POSITION ); \ - \ - check_neibours[ FIRST_NEIBOUR ] += 1; \ - check_neibours[ SECOND_NEIBOUR ] += 1; \ - check_neibours[ THIRD_NEIBOUR ] += 1; \ - if( node->tag & ( 1 << qq ) ) \ - { \ - /* Here is another color cell. \ - Check the distance */ \ - mlib_s32 new_found_color = \ - node->contents.index[ qq ]; \ - mlib_u32 newdistance = FIND_DISTANCE_4( c[ 0 ], \ - p[ 0 ][ new_found_color ] - SUBSTRACTION, c[ 1 ], \ - p[ 1 ][ new_found_color ] - SUBSTRACTION, c[ 2 ], \ - p[ 2 ][ new_found_color ] - SUBSTRACTION, c[ 3 ], \ - p[ 3 ][ new_found_color ] - SUBSTRACTION, SHIFT ); \ - \ - if( newdistance < distance ) \ - { \ - found_color = new_found_color; \ - distance = newdistance; \ - } \ - } \ - else if( node->contents.quadrants[ qq ] ) \ - /* Only a part of quadrant needs checking */ \ - distance = \ - mlib_search_quadrant_part_to_left_##IMAGE_TYPE##_4( \ - node->contents.quadrants[ qq ], \ - distance, &found_color, c, p, \ - position[ POSITION ] + current_size, pass - 1, POSITION ); \ - } \ - else /* Check whole quadrant */ \ - { \ - mlib_s32 qq = q ^ ( 1 << POSITION ); \ - \ - check_neibours[ FIRST_NEIBOUR ] += 2; \ - check_neibours[ SECOND_NEIBOUR ] += 2; \ - check_neibours[ THIRD_NEIBOUR ] += 2; \ - continue_up = 1; \ - if( node->tag & ( 1 << qq ) ) \ - { \ - /* Here is another color cell. \ - Check the distance */ \ - mlib_s32 new_found_color = \ - node->contents.index[ qq ]; \ - mlib_u32 newdistance = FIND_DISTANCE_4( c[ 0 ], \ - p[ 0 ][ new_found_color ] - SUBSTRACTION, c[ 1 ], \ - p[ 1 ][ new_found_color ] - SUBSTRACTION, c[ 2 ], \ - p[ 2 ][ new_found_color ] - SUBSTRACTION, c[ 3 ], \ - p[ 3 ][ new_found_color ] - SUBSTRACTION, SHIFT ); \ - \ - if( newdistance < distance ) \ - { \ - found_color = new_found_color; \ - distance = newdistance; \ - } \ - } \ - else if( node->contents.quadrants[ qq ] ) \ - /* Here is a full node. Just explore it */ \ - distance = mlib_search_quadrant_##IMAGE_TYPE##_4( \ - node->contents.quadrants[ qq ], \ - distance, &found_color, c[ 0 ], c[ 1 ], c[ 2 ], c[ 3 ], p ); \ - } \ - } \ - else /* Cell is on the edge of the space */ \ - { \ - if( position[ POSITION ] + current_size * 2 == \ - COLOR_MAX ) \ - { \ - /* Check only a part of quadrant */ \ - mlib_s32 qq = q ^ ( 1 << POSITION ); \ - \ - check_neibours[ FIRST_NEIBOUR ] += 1; \ - check_neibours[ SECOND_NEIBOUR ] += 1; \ - check_neibours[ THIRD_NEIBOUR ] += 1; \ - if( node->tag & ( 1 << qq ) ) \ - { \ - /* Here is another color cell. \ - Check the distance */ \ - mlib_s32 new_found_color = \ - node->contents.index[ qq ]; \ - mlib_u32 newdistance = FIND_DISTANCE_4( c[ 0 ], \ - p[ 0 ][ new_found_color ] - SUBSTRACTION, c[ 1 ], \ - p[ 1 ][ new_found_color ] - SUBSTRACTION, c[ 2 ], \ - p[ 2 ][ new_found_color ] - SUBSTRACTION, c[ 3 ], \ - p[ 3 ][ new_found_color ] - SUBSTRACTION, SHIFT ); \ - \ - if( newdistance < distance ) \ - { \ - found_color = new_found_color; \ - distance = newdistance; \ - } \ - } \ - else if( node->contents.quadrants[ qq ] ) \ - /* Only a part of quadrant needs checking */ \ - distance = \ - mlib_search_quadrant_part_to_left_##IMAGE_TYPE##_4( \ - node->contents.quadrants[ qq ], \ - distance, &found_color, c, p, \ - position[ POSITION ] + current_size, \ - pass - 1, POSITION ); \ - } \ - else /* Check whole quadrant */ \ - { \ - mlib_s32 qq = q ^ ( 1 << POSITION ); \ - \ - check_neibours[ FIRST_NEIBOUR ] += 2; \ - check_neibours[ SECOND_NEIBOUR ] += 2; \ - check_neibours[ THIRD_NEIBOUR ] += 2; \ - continue_up = 1; \ - if( node->tag & ( 1 << qq ) ) \ - { \ - /* Here is another color cell. \ - Check the distance */ \ - mlib_s32 new_found_color = \ - node->contents.index[ qq ]; \ - mlib_u32 newdistance = FIND_DISTANCE_4( c[ 0 ], \ - p[ 0 ][ new_found_color ] - SUBSTRACTION, c[ 1 ], \ - p[ 1 ][ new_found_color ] - SUBSTRACTION, c[ 2 ], \ - p[ 2 ][ new_found_color ] - SUBSTRACTION, c[ 3 ], \ - p[ 3 ][ new_found_color ] - SUBSTRACTION, SHIFT ); \ - \ - if( newdistance < distance ) \ - { \ - found_color = new_found_color; \ - distance = newdistance; \ - } \ - } \ - else if( node->contents.quadrants[ qq ] ) \ - /* Here is a full node. Just explore it */ \ - distance = mlib_search_quadrant_##IMAGE_TYPE##_4( \ - node->contents.quadrants[ qq ], \ - distance, &found_color, c[ 0 ], c[ 1 ], c[ 2 ], c[ 3 ], p ); \ - } \ - } \ - } \ -} - -/***************************************************************/ -#define BINARY_TREE_SEARCH_LEFT( POSITION, SHIFT ) \ -{ \ - if( ( distance > ( ( ( position[ POSITION ] - c[ POSITION ] ) * \ - ( position[ POSITION ] - c[ POSITION ] ) ) >> SHIFT ) ) && \ - position[ POSITION ] ) \ - continue_up = 1; \ -} - -/***************************************************************/ -#define BINARY_TREE_EXPLORE_LEFT_3( POSITION, IMAGE_TYPE, \ - FIRST_NEIBOUR, SECOND_NEIBOUR, SUBSTRACTION, SHIFT ) \ -{ \ - if( distance > \ - ( ( ( c[ POSITION ] - position[ POSITION ] ) * \ - ( c[ POSITION ] - position[ POSITION ] ) ) >> SHIFT ) ) \ - { \ - if( distance <= ( ( c[ POSITION ] * c[ POSITION ] ) >> SHIFT ) ) \ - { \ - if( distance <= ( ( ( c[ POSITION ] + current_size - \ - position[ POSITION ] ) * \ - ( c[ POSITION ] + current_size - \ - position[ POSITION ] ) ) >> SHIFT ) ) \ - { \ - mlib_s32 qq = q ^ ( 1 << POSITION ); \ - \ - check_neibours[ FIRST_NEIBOUR ] += 1; \ - check_neibours[ SECOND_NEIBOUR ] += 1; \ - check_corner += 1; \ - if( node->tag & ( 1 << qq ) ) \ - { \ - /* Here is another color cell. \ - Check the distance */ \ - mlib_s32 new_found_color = \ - node->contents.index[ qq ]; \ - mlib_u32 newdistance = FIND_DISTANCE_3( c[ 0 ], \ - p[ 0 ][ new_found_color ] - SUBSTRACTION, c[ 1 ], \ - p[ 1 ][ new_found_color ] - SUBSTRACTION, c[ 2 ], \ - p[ 2 ][ new_found_color ] - SUBSTRACTION, SHIFT ); \ - \ - if( newdistance < distance ) \ - { \ - found_color = new_found_color; \ - distance = newdistance; \ - } \ - } \ - else if( node->contents.quadrants[ qq ] ) \ - /* Only a part of quadrant needs checking */ \ - distance = \ - mlib_search_quadrant_part_to_right_##IMAGE_TYPE##_3( \ - node->contents.quadrants[ qq ], \ - distance, &found_color, c, p, \ - position[ POSITION ] - current_size, pass - 1, POSITION ); \ - } \ - else /* Check whole quadrant */ \ - { \ - mlib_s32 qq = q ^ ( 1 << POSITION ); \ - \ - check_neibours[ FIRST_NEIBOUR ] += 2; \ - check_neibours[ SECOND_NEIBOUR ] += 2; \ - check_corner += 2; \ - continue_up = 1; \ - if( node->tag & ( 1 << qq ) ) \ - { \ - /* Here is another color cell. \ - Check the distance */ \ - mlib_s32 new_found_color = \ - node->contents.index[ qq ]; \ - mlib_u32 newdistance = FIND_DISTANCE_3( c[ 0 ], \ - p[ 0 ][ new_found_color ] - SUBSTRACTION, c[ 1 ], \ - p[ 1 ][ new_found_color ] - SUBSTRACTION, c[ 2 ], \ - p[ 2 ][ new_found_color ] - SUBSTRACTION, SHIFT ); \ - \ - if( newdistance < distance ) \ - { \ - found_color = new_found_color; \ - distance = newdistance; \ - } \ - } \ - else if( node->contents.quadrants[ qq ] ) \ - /* Here is a full node. Just explore it */ \ - distance = mlib_search_quadrant_##IMAGE_TYPE##_3( \ - node->contents.quadrants[ qq ], \ - distance, &found_color, c[ 0 ], c[ 1 ], c[ 2 ], p ); \ - } \ - } \ - else \ - { \ - if( !( position[ POSITION ] - current_size ) ) \ - { \ - mlib_s32 qq = q ^ ( 1 << POSITION ); \ - \ - check_neibours[ FIRST_NEIBOUR ] += 1; \ - check_neibours[ SECOND_NEIBOUR ] += 1; \ - check_corner += 1; \ - if( node->tag & ( 1 << qq ) ) \ - { \ - /* Here is another color cell. \ - Check the distance */ \ - mlib_s32 new_found_color = \ - node->contents.index[ qq ]; \ - mlib_u32 newdistance = FIND_DISTANCE_3( c[ 0 ], \ - p[ 0 ][ new_found_color ] - SUBSTRACTION, c[ 1 ], \ - p[ 1 ][ new_found_color ] - SUBSTRACTION, c[ 2 ], \ - p[ 2 ][ new_found_color ] - SUBSTRACTION, SHIFT ); \ - \ - if( newdistance < distance ) \ - { \ - found_color = new_found_color; \ - distance = newdistance; \ - } \ - } \ - else if( node->contents.quadrants[ qq ] ) \ - /* Only a part of quadrant needs checking */ \ - distance = \ - mlib_search_quadrant_part_to_right_##IMAGE_TYPE##_3( \ - node->contents.quadrants[ qq ], \ - distance, &found_color, c, p, \ - position[ POSITION ] - current_size, pass - 1, POSITION ); \ - } \ - else \ - { \ - mlib_s32 qq = q ^ ( 1 << POSITION ); \ - \ - check_neibours[ FIRST_NEIBOUR ] += 2; \ - check_neibours[ SECOND_NEIBOUR ] += 2; \ - check_corner += 2; \ - continue_up = 1; \ - if( node->tag & ( 1 << qq ) ) \ - { \ - /* Here is another color cell. \ - Check the distance */ \ - mlib_s32 new_found_color = \ - node->contents.index[ qq ]; \ - mlib_u32 newdistance = FIND_DISTANCE_3( c[ 0 ], \ - p[ 0 ][ new_found_color ] - SUBSTRACTION, c[ 1 ], \ - p[ 1 ][ new_found_color ] - SUBSTRACTION, c[ 2 ], \ - p[ 2 ][ new_found_color ] - SUBSTRACTION, SHIFT ); \ - \ - if( newdistance < distance ) \ - { \ - found_color = new_found_color; \ - distance = newdistance; \ - } \ - } \ - else if( node->contents.quadrants[ qq ] ) \ - /* Here is a full node. Just explore it */ \ - distance = mlib_search_quadrant_##IMAGE_TYPE##_3( \ - node->contents.quadrants[ qq ], \ - distance, &found_color, c[ 0 ], c[ 1 ], c[ 2 ], p ); \ - } \ - } \ - } \ -} - -/***************************************************************/ -#define BINARY_TREE_EXPLORE_LEFT_4( POSITION, IMAGE_TYPE, \ - FIRST_NEIBOUR, SECOND_NEIBOUR, THIRD_NEIBOUR, SUBSTRACTION, SHIFT ) \ -{ \ - if( distance > \ - ( ( ( c[ POSITION ] - position[ POSITION ] ) * \ - ( c[ POSITION ] - position[ POSITION ] ) ) >> SHIFT ) ) \ - { \ - if( distance <= ( ( c[ POSITION ] * c[ POSITION ] ) >> SHIFT ) ) \ - { \ - if( distance <= ( ( ( c[ POSITION ] + current_size - \ - position[ POSITION ] ) * \ - ( c[ POSITION ] + current_size - \ - position[ POSITION ] ) ) >> SHIFT ) ) \ - { \ - mlib_s32 qq = q ^ ( 1 << POSITION ); \ - \ - check_neibours[ FIRST_NEIBOUR ] += 1; \ - check_neibours[ SECOND_NEIBOUR ] += 1; \ - check_neibours[ THIRD_NEIBOUR ] += 1; \ - if( node->tag & ( 1 << qq ) ) \ - { \ - /* Here is another color cell. \ - Check the distance */ \ - mlib_s32 new_found_color = \ - node->contents.index[ qq ]; \ - mlib_u32 newdistance = FIND_DISTANCE_4( c[ 0 ], \ - p[ 0 ][ new_found_color ] - SUBSTRACTION, c[ 1 ], \ - p[ 1 ][ new_found_color ] - SUBSTRACTION, c[ 2 ], \ - p[ 2 ][ new_found_color ] - SUBSTRACTION, c[ 3 ], \ - p[ 3 ][ new_found_color ] - SUBSTRACTION, SHIFT ); \ - \ - if( newdistance < distance ) \ - { \ - found_color = new_found_color; \ - distance = newdistance; \ - } \ - } \ - else if( node->contents.quadrants[ qq ] ) \ - /* Only a part of quadrant needs checking */ \ - distance = \ - mlib_search_quadrant_part_to_right_##IMAGE_TYPE##_4( \ - node->contents.quadrants[ qq ], \ - distance, &found_color, c, p, \ - position[ POSITION ] - current_size, pass - 1, POSITION ); \ - } \ - else /* Check whole quadrant */ \ - { \ - mlib_s32 qq = q ^ ( 1 << POSITION ); \ - \ - check_neibours[ FIRST_NEIBOUR ] += 2; \ - check_neibours[ SECOND_NEIBOUR ] += 2; \ - check_neibours[ THIRD_NEIBOUR ] += 2; \ - continue_up = 1; \ - if( node->tag & ( 1 << qq ) ) \ - { \ - /* Here is another color cell. \ - Check the distance */ \ - mlib_s32 new_found_color = \ - node->contents.index[ qq ]; \ - mlib_u32 newdistance = FIND_DISTANCE_4( c[ 0 ], \ - p[ 0 ][ new_found_color ] - SUBSTRACTION, c[ 1 ], \ - p[ 1 ][ new_found_color ] - SUBSTRACTION, c[ 2 ], \ - p[ 2 ][ new_found_color ] - SUBSTRACTION, c[ 3 ], \ - p[ 3 ][ new_found_color ] - SUBSTRACTION, SHIFT ); \ - \ - if( newdistance < distance ) \ - { \ - found_color = new_found_color; \ - distance = newdistance; \ - } \ - } \ - else if( node->contents.quadrants[ qq ] ) \ - /* Here is a full node. Just explore it */ \ - distance = mlib_search_quadrant_##IMAGE_TYPE##_4( \ - node->contents.quadrants[ qq ], \ - distance, &found_color, c[ 0 ], c[ 1 ], c[ 2 ], c[ 3 ], p ); \ - } \ - } \ - else \ - { \ - if( !( position[ POSITION ] - current_size ) ) \ - { \ - mlib_s32 qq = q ^ ( 1 << POSITION ); \ - \ - check_neibours[ FIRST_NEIBOUR ] += 1; \ - check_neibours[ SECOND_NEIBOUR ] += 1; \ - check_neibours[ THIRD_NEIBOUR ] += 1; \ - if( node->tag & ( 1 << qq ) ) \ - { \ - /* Here is another color cell. \ - Check the distance */ \ - mlib_s32 new_found_color = \ - node->contents.index[ qq ]; \ - mlib_u32 newdistance = FIND_DISTANCE_4( c[ 0 ], \ - p[ 0 ][ new_found_color ] - SUBSTRACTION, c[ 1 ], \ - p[ 1 ][ new_found_color ] - SUBSTRACTION, c[ 2 ], \ - p[ 2 ][ new_found_color ] - SUBSTRACTION, c[ 3 ], \ - p[ 3 ][ new_found_color ] - SUBSTRACTION, SHIFT ); \ - \ - if( newdistance < distance ) \ - { \ - found_color = new_found_color; \ - distance = newdistance; \ - } \ - } \ - else if( node->contents.quadrants[ qq ] ) \ - /* Only a part of quadrant needs checking */ \ - distance = \ - mlib_search_quadrant_part_to_right_##IMAGE_TYPE##_4( \ - node->contents.quadrants[ qq ], \ - distance, &found_color, c, p, \ - position[ POSITION ] - current_size, pass - 1, POSITION ); \ - } \ - else \ - { \ - mlib_s32 qq = q ^ ( 1 << POSITION ); \ - \ - check_neibours[ FIRST_NEIBOUR ] += 2; \ - check_neibours[ SECOND_NEIBOUR ] += 2; \ - check_neibours[ THIRD_NEIBOUR ] += 2; \ - continue_up = 1; \ - if( node->tag & ( 1 << qq ) ) \ - { \ - /* Here is another color cell. \ - Check the distance */ \ - mlib_s32 new_found_color = \ - node->contents.index[ qq ]; \ - mlib_u32 newdistance = FIND_DISTANCE_4( c[ 0 ], \ - p[ 0 ][ new_found_color ] - SUBSTRACTION, c[ 1 ], \ - p[ 1 ][ new_found_color ] - SUBSTRACTION, c[ 2 ], \ - p[ 2 ][ new_found_color ] - SUBSTRACTION, c[ 3 ], \ - p[ 3 ][ new_found_color ] - SUBSTRACTION, SHIFT ); \ - \ - if( newdistance < distance ) \ - { \ - found_color = new_found_color; \ - distance = newdistance; \ - } \ - } \ - else if( node->contents.quadrants[ qq ] ) \ - /* Here is a full node. Just explore it */ \ - distance = mlib_search_quadrant_##IMAGE_TYPE##_4( \ - node->contents.quadrants[ qq ], \ - distance, &found_color, c[ 0 ], c[ 1 ], c[ 2 ], c[ 3 ], p ); \ - } \ - } \ - } \ -} - -/***************************************************************/ -#define CHECK_QUADRANT_U8_3( qq ) \ -{ \ - if( node->tag & ( 1 << qq ) ) \ - { \ - /* Here is another color cell. Check the distance */ \ - mlib_s32 new_found_color = node->contents.index[ qq ]; \ - mlib_u32 newdistance = FIND_DISTANCE_3( c[ 0 ], \ - p[ 0 ][ new_found_color ], c[ 1 ], \ - p[ 1 ][ new_found_color ], c[ 2 ], \ - p[ 2 ][ new_found_color ], 0 ); \ - \ - if( newdistance < distance ) \ - { \ - found_color = new_found_color; \ - distance = newdistance; \ - } \ - } \ - else if( node->contents.quadrants[ qq ] ) \ - /* Here is a full node. Just explore it all */ \ - distance = mlib_search_quadrant_U8_3( \ - node->contents.quadrants[ qq ], distance, &found_color, \ - c[ 0 ], c[ 1 ], c[ 2 ], p ); \ -/* Else there is just an empty cell */ \ -} - -/***************************************************************/ -#define CHECK_QUADRANT_S16_3( qq ) \ -{ \ - if( node->tag & ( 1 << qq ) ) \ - { \ - /* Here is another color cell. Check the distance */ \ - mlib_s32 new_found_color = node->contents.index[ qq ]; \ - mlib_u32 palc0, palc1, palc2, newdistance; \ - \ - palc0 = p[ 0 ][ new_found_color ] - MLIB_S16_MIN; \ - palc1 = p[ 1 ][ new_found_color ] - MLIB_S16_MIN; \ - palc2 = p[ 2 ][ new_found_color ] - MLIB_S16_MIN; \ - \ - newdistance = FIND_DISTANCE_3( c[ 0 ], palc0, \ - c[ 1 ], palc1, \ - c[ 2 ], palc2, 2 ); \ - \ - if( newdistance < distance ) \ - { \ - found_color = new_found_color; \ - distance = newdistance; \ - } \ - } \ - else if( node->contents.quadrants[ qq ] ) \ - /* Here is a full node. Just explore it all */ \ - distance = mlib_search_quadrant_S16_3( \ - node->contents.quadrants[ qq ], distance, &found_color, \ - c[ 0 ], c[ 1 ], c[ 2 ], p ); \ -/* Else there is just an empty cell */ \ -} - -/***************************************************************/ -#define BINARY_TREE_SEARCH_3( SOURCE_IMAGE, POINTER_TYPE, BITS, \ - COLOR_MAX, SUBTRACTION, POINTER_SHIFT, STEP, SHIFT ) \ -{ \ - const POINTER_TYPE *channels[ 3 ], *p[ 3 ]; \ - mlib_u32 c[ 3 ]; \ - mlib_s32 j; \ - \ - p[ 0 ] = s->lut[ 0 ]; \ - p[ 1 ] = s->lut[ 1 ]; \ - p[ 2 ] = s->lut[ 2 ]; \ - channels[ 0 ] = src + POINTER_SHIFT; \ - channels[ 1 ] = src + 1 + POINTER_SHIFT; \ - channels[ 2 ] = src + 2 + POINTER_SHIFT; \ - \ - for( j = 0; j < length; j++ ) \ - { \ - mlib_s32 pass = BITS - 1; \ - mlib_u32 position[ 3 ] = { 0, 0, 0 }; \ - mlib_s32 we_found_it = 0; \ - struct lut_node_3 *node = s->table; \ - /* Stack pointer pointers to the first free element of stack. */ \ - /* The node we are in is in the `node' */ \ - struct \ - { \ - struct lut_node_3 *node; \ - mlib_s32 q; \ - } stack[ BITS ]; \ - mlib_s32 stack_pointer = 0; \ - \ - c[ 0 ] = *channels[ 0 ] - SUBTRACTION; \ - c[ 1 ] = *channels[ 1 ] - SUBTRACTION; \ - c[ 2 ] = *channels[ 2 ] - SUBTRACTION; \ - \ - do \ - { \ - mlib_s32 q; \ - mlib_u32 current_size = 1 << pass; \ - \ - q = ( ( c[ 0 ] >> pass ) & 1 ) | \ - ( ( ( c[ 1 ] << 1 ) >> pass ) & 2 ) | \ - ( ( ( c[ 2 ] << 2 ) >> pass ) & 4 ); \ - \ - position[ 0 ] |= c[ 0 ] & current_size; \ - position[ 1 ] |= c[ 1 ] & current_size; \ - position[ 2 ] |= c[ 2 ] & current_size; \ - \ - if( node->tag & ( 1 << q ) ) \ - { \ - /* \ - Here is a cell with one color. We need to be sure it's \ - the one that is the closest to our color \ - */ \ - mlib_s32 palindex = node->contents.index[ q ]; \ - mlib_u32 palc[ 3 ]; \ - mlib_s32 identical; \ - \ - palc[ 0 ] = p[ 0 ][ palindex ] - SUBTRACTION; \ - palc[ 1 ] = p[ 1 ][ palindex ] - SUBTRACTION; \ - palc[ 2 ] = p[ 2 ][ palindex ] - SUBTRACTION; \ - \ - identical = ( palc[ 0 ] - c[ 0 ] ) | ( palc[ 1 ] - c[ 1 ] ) | \ - ( palc[ 2 ] - c[ 2 ] ); \ - \ - if( !identical || BITS - pass == bits ) \ - { \ - /* Oh, here it is :) */ \ - dst[ j ] = palindex + s->offset; \ - we_found_it = 1; \ - } \ - else \ - { \ - mlib_u32 distance; \ - /* First index is the channel, second is the number of the \ - side */ \ - mlib_s32 found_color; \ - mlib_s32 continue_up; \ - \ - distance = FIND_DISTANCE_3( c[ 0 ], palc[ 0 ], \ - c[ 1 ], palc[ 1 ], c[ 2 ], palc[ 2 ], SHIFT ); \ - found_color = palindex; \ - \ - do \ - { \ - mlib_s32 check_corner; \ - \ - /* \ - Neibours are enumerated in a cicle: \ - 0 - between quadrants 0 and 1, \ - 1 - between quadrants 1 and 2 and \ - 2 - between quadrants 2 and 0 \ - */ \ - mlib_s32 check_neibours[ 3 ]; \ - \ - /* \ - Others are three two neibour quadrants \ - \ - Side number is [ ][ ], e.g. 2 is 0..010b, so the sides it has \ - near are: \ - [ 0 (coordinate number) ][ 0 (bit 0 in the number) ] \ - [ 1 (coordinate number) ][ 1 (bit 1 in the number) ] \ - \ - Now we can look in the three nearest quadrants. Do \ - we really need it ? Check it. \ - */ \ - \ - check_corner = check_neibours[ 0 ] = check_neibours[ 1 ] = \ - check_neibours[ 2 ] = 0; \ - continue_up = 0; \ - \ - if( q & 1 ) \ - { \ - BINARY_TREE_EXPLORE_LEFT_3( 0, SOURCE_IMAGE, 2, 0, \ - SUBTRACTION, SHIFT ); \ - } \ - else \ - { \ - BINARY_TREE_EXPLORE_RIGHT_3( 0, COLOR_MAX, SOURCE_IMAGE, 2, 0, \ - SUBTRACTION, SHIFT ); \ - } \ - \ - if( q & 2 ) \ - { \ - BINARY_TREE_EXPLORE_LEFT_3( 1, SOURCE_IMAGE, 0, 1, \ - SUBTRACTION, SHIFT ); \ - } \ - else \ - { \ - BINARY_TREE_EXPLORE_RIGHT_3( 1, COLOR_MAX, SOURCE_IMAGE, 0, 1, \ - SUBTRACTION, SHIFT ); \ - } \ - \ - if( q & 4 ) \ - { \ - BINARY_TREE_EXPLORE_LEFT_3( 2, SOURCE_IMAGE, 1, 2, \ - SUBTRACTION, SHIFT ); \ - } \ - else \ - { \ - BINARY_TREE_EXPLORE_RIGHT_3( 2, COLOR_MAX, SOURCE_IMAGE, 1, 2, \ - SUBTRACTION, SHIFT ); \ - } \ - \ - if( check_neibours[ 0 ] >= 2 ) \ - { \ - mlib_s32 qq = q ^ 3; \ - CHECK_QUADRANT_##SOURCE_IMAGE##_3( qq ); \ - } \ - \ - if( check_neibours[ 1 ] >= 2 ) \ - { \ - mlib_s32 qq = q ^ 6; \ - CHECK_QUADRANT_##SOURCE_IMAGE##_3( qq ); \ - } \ - \ - if( check_neibours[ 2 ] >= 2 ) \ - { \ - mlib_s32 qq = q ^ 5; \ - CHECK_QUADRANT_##SOURCE_IMAGE##_3( qq ); \ - } \ - \ - if( check_corner >= 3 ) \ - { \ - mlib_s32 qq = q ^ 7; \ - CHECK_QUADRANT_##SOURCE_IMAGE##_3( qq ); \ - } \ - \ - if( q & 1 ) \ - { \ - BINARY_TREE_SEARCH_RIGHT( 0, COLOR_MAX, SHIFT ); \ - } \ - else \ - { \ - BINARY_TREE_SEARCH_LEFT( 0, SHIFT ); \ - } \ - \ - if( q & 2 ) \ - { \ - BINARY_TREE_SEARCH_RIGHT( 1, COLOR_MAX, SHIFT ); \ - } \ - else \ - { \ - BINARY_TREE_SEARCH_LEFT( 1, SHIFT ); \ - } \ - \ - if( q & 4 ) \ - { \ - BINARY_TREE_SEARCH_RIGHT( 2, COLOR_MAX, SHIFT ); \ - } \ - else \ - { \ - BINARY_TREE_SEARCH_LEFT( 2, SHIFT ); \ - } \ - \ - position[ 0 ] &= ~( c[ 0 ] & current_size ); \ - position[ 1 ] &= ~( c[ 1 ] & current_size ); \ - position[ 2 ] &= ~( c[ 2 ] & current_size ); \ - \ - current_size <<= 1; \ - \ - pass++; \ - \ - stack_pointer--; \ - q = stack[ stack_pointer ].q; \ - node = stack[ stack_pointer ].node; \ - } while( continue_up ); \ - \ - dst[ j ] = found_color + s->offset; \ - \ - we_found_it = 1; \ - } \ - } \ - else if( node->contents.quadrants[ q ] ) \ - { \ - /* Descend one level */ \ - stack[ stack_pointer ].node = node; \ - stack[ stack_pointer++ ].q = q; \ - node = node->contents.quadrants[ q ]; \ - } \ - else \ - { \ - /* Found the empty quadrant. Look around */ \ - mlib_u32 distance = MLIB_U32_MAX; \ - mlib_s32 found_color; \ - mlib_s32 continue_up; \ - \ - /* \ - As we had come to this level, it is warranted that there \ - are other points on this level near the empty quadrant \ - */ \ - do \ - { \ - mlib_s32 check_corner; \ - mlib_s32 check_neibours[ 3 ]; \ - \ - check_corner = check_neibours[ 0 ] = check_neibours[ 1 ] = \ - check_neibours[ 2 ] = 0; \ - continue_up = 0; \ - \ - if( q & 1 ) \ - { \ - BINARY_TREE_EXPLORE_LEFT_3( 0, SOURCE_IMAGE, 2, 0, \ - SUBTRACTION, SHIFT ); \ - } \ - else \ - { \ - BINARY_TREE_EXPLORE_RIGHT_3( 0, COLOR_MAX, SOURCE_IMAGE, 2, 0, \ - SUBTRACTION, SHIFT ); \ - } \ - \ - if( q & 2 ) \ - { \ - BINARY_TREE_EXPLORE_LEFT_3( 1, SOURCE_IMAGE, 0, 1, \ - SUBTRACTION, SHIFT ); \ - } \ - else \ - { \ - BINARY_TREE_EXPLORE_RIGHT_3( 1, COLOR_MAX, SOURCE_IMAGE, 0, 1, \ - SUBTRACTION, SHIFT ); \ - } \ - \ - if( q & 4 ) \ - { \ - BINARY_TREE_EXPLORE_LEFT_3( 2, SOURCE_IMAGE, 1, 2, \ - SUBTRACTION, SHIFT ); \ - } \ - else \ - { \ - BINARY_TREE_EXPLORE_RIGHT_3( 2, COLOR_MAX, SOURCE_IMAGE, 1, 2, \ - SUBTRACTION, SHIFT ); \ - } \ - \ - if( check_neibours[ 0 ] >= 2 ) \ - { \ - mlib_s32 qq = q ^ 3; \ - CHECK_QUADRANT_##SOURCE_IMAGE##_3( qq ); \ - } \ - \ - if( check_neibours[ 1 ] >= 2 ) \ - { \ - mlib_s32 qq = q ^ 6; \ - CHECK_QUADRANT_##SOURCE_IMAGE##_3( qq ); \ - } \ - \ - if( check_neibours[ 2 ] >= 2 ) \ - { \ - mlib_s32 qq = q ^ 5; \ - CHECK_QUADRANT_##SOURCE_IMAGE##_3( qq ); \ - } \ - \ - if( check_corner >= 3 ) \ - { \ - mlib_s32 qq = q ^ 7; \ - CHECK_QUADRANT_##SOURCE_IMAGE##_3( qq ); \ - } \ - \ - if( q & 1 ) \ - { \ - BINARY_TREE_SEARCH_RIGHT( 0, COLOR_MAX, SHIFT ); \ - } \ - else \ - { \ - BINARY_TREE_SEARCH_LEFT( 0, SHIFT ); \ - } \ - \ - if( q & 2 ) \ - { \ - BINARY_TREE_SEARCH_RIGHT( 1, COLOR_MAX, SHIFT ); \ - } \ - else \ - { \ - BINARY_TREE_SEARCH_LEFT( 1, SHIFT ); \ - } \ - \ - if( q & 4 ) \ - { \ - BINARY_TREE_SEARCH_RIGHT( 2, COLOR_MAX, SHIFT ); \ - } \ - else \ - { \ - BINARY_TREE_SEARCH_LEFT( 2, SHIFT ); \ - } \ - \ - position[ 0 ] &= ~( c[ 0 ] & current_size ); \ - position[ 1 ] &= ~( c[ 1 ] & current_size ); \ - position[ 2 ] &= ~( c[ 2 ] & current_size ); \ - \ - current_size <<= 1; \ - \ - pass++; \ - \ - stack_pointer--; \ - q = stack[ stack_pointer ].q; \ - node = stack[ stack_pointer ].node; \ - } while( continue_up ); \ - \ - dst[ j ] = found_color + s->offset; \ - we_found_it = 1; \ - } \ - \ - pass--; \ - \ - } while( !we_found_it ); \ - \ - channels[ 0 ] += STEP; \ - channels[ 1 ] += STEP; \ - channels[ 2 ] += STEP; \ - } \ -} - -/***************************************************************/ -#define CHECK_QUADRANT_U8_4( qq ) \ -{ \ - if( node->tag & ( 1 << qq ) ) \ - { \ - /* Here is another color cell. Check the distance */ \ - mlib_s32 new_found_color = node->contents.index[ qq ]; \ - mlib_u32 newdistance = FIND_DISTANCE_4( c[ 0 ], \ - p[ 0 ][ new_found_color ], c[ 1 ], \ - p[ 1 ][ new_found_color ], c[ 2 ], \ - p[ 2 ][ new_found_color ], c[ 3 ], \ - p[ 3 ][ new_found_color ], 0 ); \ - \ - if( newdistance < distance ) \ - { \ - found_color = new_found_color; \ - distance = newdistance; \ - } \ - } \ - else if( node->contents.quadrants[ qq ] ) \ - /* Here is a full node. Just explore it all */ \ - distance = mlib_search_quadrant_U8_4( \ - node->contents.quadrants[ qq ], distance, &found_color, \ - c[ 0 ], c[ 1 ], c[ 2 ], c[ 3 ], p ); \ -/* Else there is just an empty cell */ \ -} - -/***************************************************************/ -#define CHECK_QUADRANT_S16_4( qq ) \ -{ \ - if( node->tag & ( 1 << qq ) ) \ - { \ - /* Here is another color cell. Check the distance */ \ - mlib_s32 new_found_color = node->contents.index[ qq ]; \ - mlib_u32 palc0, palc1, palc2, palc3, newdistance; \ - \ - palc0 = p[ 0 ][ new_found_color ] - MLIB_S16_MIN; \ - palc1 = p[ 1 ][ new_found_color ] - MLIB_S16_MIN; \ - palc2 = p[ 2 ][ new_found_color ] - MLIB_S16_MIN; \ - palc3 = p[ 3 ][ new_found_color ] - MLIB_S16_MIN; \ - \ - newdistance = FIND_DISTANCE_4( c[ 0 ], palc0, \ - c[ 1 ], palc1, \ - c[ 2 ], palc2, \ - c[ 3 ], palc3, 2 ); \ - \ - if( newdistance < distance ) \ - { \ - found_color = new_found_color; \ - distance = newdistance; \ - } \ - } \ - else if( node->contents.quadrants[ qq ] ) \ - /* Here is a full node. Just explore it all */ \ - distance = mlib_search_quadrant_S16_4( \ - node->contents.quadrants[ qq ], distance, &found_color, \ - c[ 0 ], c[ 1 ], c[ 2 ], c[ 3 ], p ); \ -/* Else there is just an empty cell */ \ -} - -/***************************************************************/ -#define BINARY_TREE_SEARCH_4( SOURCE_IMAGE, POINTER_TYPE, BITS, \ - COLOR_MAX, SUBTRACTION, SHIFT ) \ -{ \ - const POINTER_TYPE *channels[ 4 ], *p[ 4 ]; \ - mlib_u32 c[ 4 ]; \ - mlib_s32 j; \ - \ - p[ 0 ] = s->lut[ 0 ]; \ - p[ 1 ] = s->lut[ 1 ]; \ - p[ 2 ] = s->lut[ 2 ]; \ - p[ 3 ] = s->lut[ 3 ]; \ - channels[ 0 ] = src; \ - channels[ 1 ] = src + 1; \ - channels[ 2 ] = src + 2; \ - channels[ 3 ] = src + 3; \ - \ - for( j = 0; j < length; j++ ) \ - { \ - mlib_s32 pass = BITS - 1; \ - mlib_u32 position[ 4 ] = { 0, 0, 0, 0 }; \ - mlib_s32 we_found_it = 0; \ - struct lut_node_4 *node = s->table; \ - /* Stack pointer pointers to the first free element of stack. */ \ - /* The node we are in is in the `node' */ \ - struct \ - { \ - struct lut_node_4 *node; \ - mlib_s32 q; \ - } stack[ BITS ]; \ - mlib_s32 stack_pointer = 0; \ - \ - c[ 0 ] = *channels[ 0 ] - SUBTRACTION; \ - c[ 1 ] = *channels[ 1 ] - SUBTRACTION; \ - c[ 2 ] = *channels[ 2 ] - SUBTRACTION; \ - c[ 3 ] = *channels[ 3 ] - SUBTRACTION; \ - \ - do \ - { \ - mlib_s32 q; \ - mlib_u32 current_size = 1 << pass; \ - \ - q = ( ( c[ 0 ] >> pass ) & 1 ) | \ - ( ( ( c[ 1 ] << 1 ) >> pass ) & 2 ) | \ - ( ( ( c[ 2 ] << 2 ) >> pass ) & 4 ) | \ - ( ( ( c[ 3 ] << 3 ) >> pass ) & 8 ); \ - \ - position[ 0 ] |= c[ 0 ] & current_size; \ - position[ 1 ] |= c[ 1 ] & current_size; \ - position[ 2 ] |= c[ 2 ] & current_size; \ - position[ 3 ] |= c[ 3 ] & current_size; \ - \ - if( node->tag & ( 1 << q ) ) \ - { \ - /* \ - Here is a cell with one color. We need to be sure it's \ - the one that is the closest to our color \ - */ \ - mlib_s32 palindex = node->contents.index[ q ]; \ - mlib_u32 palc[ 4 ]; \ - mlib_s32 identical; \ - \ - palc[ 0 ] = p[ 0 ][ palindex ] - SUBTRACTION; \ - palc[ 1 ] = p[ 1 ][ palindex ] - SUBTRACTION; \ - palc[ 2 ] = p[ 2 ][ palindex ] - SUBTRACTION; \ - palc[ 3 ] = p[ 3 ][ palindex ] - SUBTRACTION; \ - \ - identical = ( palc[ 0 ] - c[ 0 ] ) | ( palc[ 1 ] - c[ 1 ] ) | \ - ( palc[ 2 ] - c[ 2 ] ) | ( palc[ 3 ] - c[ 3 ] ); \ - \ - if( !identical || BITS - pass == bits ) \ - { \ - /* Oh, here it is :) */ \ - dst[ j ] = palindex + s->offset; \ - we_found_it = 1; \ - } \ - else \ - { \ - mlib_u32 distance; \ - /* First index is the channel, second is the number of the \ - side */ \ - mlib_s32 found_color; \ - mlib_s32 continue_up; \ - \ - distance = FIND_DISTANCE_4( c[ 0 ], palc[ 0 ], \ - c[ 1 ], palc[ 1 ], c[ 2 ], palc[ 2 ], c[ 3 ], palc[ 3 ], SHIFT ); \ - found_color = palindex; \ - \ - do \ - { \ - mlib_s32 check_corner; \ - mlib_s32 check_neibours[ 6 ]; \ - mlib_s32 check_far_neibours[ 4 ]; \ - \ - /* \ - Check neibours: quadrants that are different by 2 bits \ - from the quadrant, that we are in: \ - 3 - 0 \ - 5 - 1 \ - 6 - 2 \ - 9 - 3 \ - 10 - 4 \ - 12 - 5 \ - Far quadrants: different by 3 bits: \ - 7 - 0 \ - 11 - 1 \ - 13 - 2 \ - 14 - 3 \ - */ \ - \ - check_neibours[ 0 ] = check_neibours[ 1 ] = \ - check_neibours[ 2 ] = check_neibours[ 3 ] = \ - check_neibours[ 4 ] = check_neibours[ 5 ] = 0; \ - continue_up = 0; \ - \ - if( q & 1 ) \ - { \ - BINARY_TREE_EXPLORE_LEFT_4( 0, SOURCE_IMAGE, 0, 1, 3, \ - SUBTRACTION, SHIFT ); \ - } \ - else \ - { \ - BINARY_TREE_EXPLORE_RIGHT_4( 0, COLOR_MAX, SOURCE_IMAGE, \ - 0, 1, 3, SUBTRACTION, SHIFT ); \ - } \ - \ - if( q & 2 ) \ - { \ - BINARY_TREE_EXPLORE_LEFT_4( 1, SOURCE_IMAGE, 0, 2, 4, \ - SUBTRACTION, SHIFT ); \ - } \ - else \ - { \ - BINARY_TREE_EXPLORE_RIGHT_4( 1, COLOR_MAX, SOURCE_IMAGE, \ - 0, 2, 4, SUBTRACTION, SHIFT ); \ - } \ - \ - if( q & 4 ) \ - { \ - BINARY_TREE_EXPLORE_LEFT_4( 2, SOURCE_IMAGE, 1, 2, 5, \ - SUBTRACTION, SHIFT ); \ - } \ - else \ - { \ - BINARY_TREE_EXPLORE_RIGHT_4( 2, COLOR_MAX, SOURCE_IMAGE, \ - 1, 2, 5, SUBTRACTION, SHIFT ); \ - } \ - \ - if( q & 8 ) \ - { \ - BINARY_TREE_EXPLORE_LEFT_4( 3, SOURCE_IMAGE, 3, 4, 5, \ - SUBTRACTION, SHIFT ); \ - } \ - else \ - { \ - BINARY_TREE_EXPLORE_RIGHT_4( 3, COLOR_MAX, SOURCE_IMAGE, \ - 3, 4, 5, SUBTRACTION, SHIFT ); \ - } \ - \ - check_far_neibours[ 0 ] = check_neibours[ 0 ] + \ - check_neibours[ 1 ] + check_neibours[ 2 ]; \ - check_far_neibours[ 1 ] = check_neibours[ 0 ] + \ - check_neibours[ 3 ] + check_neibours[ 4 ]; \ - check_far_neibours[ 2 ] = check_neibours[ 1 ] + \ - check_neibours[ 3 ] + check_neibours[ 5 ]; \ - check_far_neibours[ 3 ] = check_neibours[ 2 ] + \ - check_neibours[ 4 ] + check_neibours[ 5 ]; \ - \ - check_corner = check_far_neibours[ 0 ] + \ - check_far_neibours[ 1 ] + \ - check_far_neibours[ 2 ] + \ - check_far_neibours[ 3 ]; \ - \ - if( check_neibours[ 0 ] >= 2 ) \ - { \ - mlib_s32 qq = q ^ 3; \ - CHECK_QUADRANT_##SOURCE_IMAGE##_4( qq ); \ - } \ - \ - if( check_neibours[ 1 ] >= 2 ) \ - { \ - mlib_s32 qq = q ^ 5; \ - CHECK_QUADRANT_##SOURCE_IMAGE##_4( qq ); \ - } \ - \ - if( check_neibours[ 2 ] >= 2 ) \ - { \ - mlib_s32 qq = q ^ 6; \ - CHECK_QUADRANT_##SOURCE_IMAGE##_4( qq ); \ - } \ - \ - if( check_neibours[ 3 ] >= 2 ) \ - { \ - mlib_s32 qq = q ^ 9; \ - CHECK_QUADRANT_##SOURCE_IMAGE##_4( qq ); \ - } \ - \ - if( check_neibours[ 4 ] >= 2 ) \ - { \ - mlib_s32 qq = q ^ 10; \ - CHECK_QUADRANT_##SOURCE_IMAGE##_4( qq ); \ - } \ - \ - if( check_neibours[ 5 ] >= 2 ) \ - { \ - mlib_s32 qq = q ^ 12; \ - CHECK_QUADRANT_##SOURCE_IMAGE##_4( qq ); \ - } \ - \ - if( check_far_neibours[ 0 ] >= 3 ) \ - { \ - mlib_s32 qq = q ^ 7; \ - CHECK_QUADRANT_##SOURCE_IMAGE##_4( qq ); \ - } \ - \ - if( check_far_neibours[ 1 ] >= 3 ) \ - { \ - mlib_s32 qq = q ^ 11; \ - CHECK_QUADRANT_##SOURCE_IMAGE##_4( qq ); \ - } \ - \ - if( check_far_neibours[ 2 ] >= 3 ) \ - { \ - mlib_s32 qq = q ^ 13; \ - CHECK_QUADRANT_##SOURCE_IMAGE##_4( qq ); \ - } \ - \ - if( check_far_neibours[ 3 ] >= 3 ) \ - { \ - mlib_s32 qq = q ^ 14; \ - CHECK_QUADRANT_##SOURCE_IMAGE##_4( qq ); \ - } \ - \ - if( check_corner >= 4 ) \ - { \ - mlib_s32 qq = q ^ 15; \ - CHECK_QUADRANT_##SOURCE_IMAGE##_4( qq ); \ - } \ - \ - if( q & 1 ) \ - { \ - BINARY_TREE_SEARCH_RIGHT( 0, COLOR_MAX, SHIFT ); \ - } \ - else \ - { \ - BINARY_TREE_SEARCH_LEFT( 0, SHIFT ); \ - } \ - \ - if( q & 2 ) \ - { \ - BINARY_TREE_SEARCH_RIGHT( 1, COLOR_MAX, SHIFT ); \ - } \ - else \ - { \ - BINARY_TREE_SEARCH_LEFT( 1, SHIFT ); \ - } \ - \ - if( q & 4 ) \ - { \ - BINARY_TREE_SEARCH_RIGHT( 2, COLOR_MAX, SHIFT ); \ - } \ - else \ - { \ - BINARY_TREE_SEARCH_LEFT( 2, SHIFT ); \ - } \ - \ - if( q & 8 ) \ - { \ - BINARY_TREE_SEARCH_RIGHT( 3, COLOR_MAX, SHIFT ); \ - } \ - else \ - { \ - BINARY_TREE_SEARCH_LEFT( 3, SHIFT ); \ - } \ - \ - position[ 0 ] &= ~( c[ 0 ] & current_size ); \ - position[ 1 ] &= ~( c[ 1 ] & current_size ); \ - position[ 2 ] &= ~( c[ 2 ] & current_size ); \ - position[ 3 ] &= ~( c[ 3 ] & current_size ); \ - \ - current_size <<= 1; \ - \ - pass++; \ - \ - stack_pointer--; \ - q = stack[ stack_pointer ].q; \ - node = stack[ stack_pointer ].node; \ - } while( continue_up ); \ - \ - dst[ j ] = found_color + s->offset; \ - we_found_it = 1; \ - } \ - } \ - else if( node->contents.quadrants[ q ] ) \ - { \ - /* Descend one level */ \ - stack[ stack_pointer ].node = node; \ - stack[ stack_pointer++ ].q = q; \ - node = node->contents.quadrants[ q ]; \ - } \ - else \ - { \ - /* Found the empty quadrant. Look around */ \ - mlib_u32 distance = MLIB_U32_MAX; \ - mlib_s32 found_color; \ - mlib_s32 continue_up; \ - \ - /* \ - As we had come to this level, it is warranted that there \ - are other points on this level near the empty quadrant \ - */ \ - do \ - { \ - mlib_s32 check_corner; \ - mlib_s32 check_neibours[ 6 ]; \ - mlib_s32 check_far_neibours[ 4 ]; \ - \ - /* \ - Check neibours: quadrants that are different by 2 bits \ - from the quadrant, that we are in: \ - 3 - 0 \ - 5 - 1 \ - 6 - 2 \ - 9 - 3 \ - 10 - 4 \ - 12 - 5 \ - Far quadrants: different by 3 bits: \ - 7 - 0 \ - 11 - 1 \ - 13 - 2 \ - 14 - 3 \ - */ \ - \ - check_neibours[ 0 ] = check_neibours[ 1 ] = \ - check_neibours[ 2 ] = check_neibours[ 3 ] = \ - check_neibours[ 4 ] = check_neibours[ 5 ] = 0; \ - continue_up = 0; \ - \ - if( q & 1 ) \ - { \ - BINARY_TREE_EXPLORE_LEFT_4( 0, SOURCE_IMAGE, 0, 1, 3, \ - SUBTRACTION, SHIFT ); \ - } \ - else \ - { \ - BINARY_TREE_EXPLORE_RIGHT_4( 0, COLOR_MAX, SOURCE_IMAGE, \ - 0, 1, 3, SUBTRACTION, SHIFT ); \ - } \ - \ - if( q & 2 ) \ - { \ - BINARY_TREE_EXPLORE_LEFT_4( 1, SOURCE_IMAGE, 0, 2, 4, \ - SUBTRACTION, SHIFT ); \ - } \ - else \ - { \ - BINARY_TREE_EXPLORE_RIGHT_4( 1, COLOR_MAX, SOURCE_IMAGE, \ - 0, 2, 4, SUBTRACTION, SHIFT ); \ - } \ - \ - if( q & 4 ) \ - { \ - BINARY_TREE_EXPLORE_LEFT_4( 2, SOURCE_IMAGE, 1, 2, 5, \ - SUBTRACTION, SHIFT ); \ - } \ - else \ - { \ - BINARY_TREE_EXPLORE_RIGHT_4( 2, COLOR_MAX, SOURCE_IMAGE, \ - 1, 2, 5, SUBTRACTION, SHIFT ); \ - } \ - \ - if( q & 8 ) \ - { \ - BINARY_TREE_EXPLORE_LEFT_4( 3, SOURCE_IMAGE, 3, 4, 5, \ - SUBTRACTION, SHIFT ); \ - } \ - else \ - { \ - BINARY_TREE_EXPLORE_RIGHT_4( 3, COLOR_MAX, SOURCE_IMAGE, \ - 3, 4, 5, SUBTRACTION, SHIFT ); \ - } \ - \ - check_far_neibours[ 0 ] = check_neibours[ 0 ] + \ - check_neibours[ 1 ] + check_neibours[ 2 ]; \ - check_far_neibours[ 1 ] = check_neibours[ 0 ] + \ - check_neibours[ 3 ] + check_neibours[ 4 ]; \ - check_far_neibours[ 2 ] = check_neibours[ 1 ] + \ - check_neibours[ 3 ] + check_neibours[ 5 ]; \ - check_far_neibours[ 3 ] = check_neibours[ 2 ] + \ - check_neibours[ 4 ] + check_neibours[ 5 ]; \ - \ - check_corner = check_far_neibours[ 0 ] + \ - check_far_neibours[ 1 ] + \ - check_far_neibours[ 2 ] + \ - check_far_neibours[ 3 ]; \ - \ - if( check_neibours[ 0 ] >= 2 ) \ - { \ - mlib_s32 qq = q ^ 3; \ - CHECK_QUADRANT_##SOURCE_IMAGE##_4( qq ); \ - } \ - \ - if( check_neibours[ 1 ] >= 2 ) \ - { \ - mlib_s32 qq = q ^ 5; \ - CHECK_QUADRANT_##SOURCE_IMAGE##_4( qq ); \ - } \ - \ - if( check_neibours[ 2 ] >= 2 ) \ - { \ - mlib_s32 qq = q ^ 6; \ - CHECK_QUADRANT_##SOURCE_IMAGE##_4( qq ); \ - } \ - \ - if( check_neibours[ 3 ] >= 2 ) \ - { \ - mlib_s32 qq = q ^ 9; \ - CHECK_QUADRANT_##SOURCE_IMAGE##_4( qq ); \ - } \ - \ - if( check_neibours[ 4 ] >= 2 ) \ - { \ - mlib_s32 qq = q ^ 10; \ - CHECK_QUADRANT_##SOURCE_IMAGE##_4( qq ); \ - } \ - \ - if( check_neibours[ 5 ] >= 2 ) \ - { \ - mlib_s32 qq = q ^ 12; \ - CHECK_QUADRANT_##SOURCE_IMAGE##_4( qq ); \ - } \ - \ - if( check_far_neibours[ 0 ] >= 3 ) \ - { \ - mlib_s32 qq = q ^ 7; \ - CHECK_QUADRANT_##SOURCE_IMAGE##_4( qq ); \ - } \ - \ - if( check_far_neibours[ 1 ] >= 3 ) \ - { \ - mlib_s32 qq = q ^ 11; \ - CHECK_QUADRANT_##SOURCE_IMAGE##_4( qq ); \ - } \ - \ - if( check_far_neibours[ 2 ] >= 3 ) \ - { \ - mlib_s32 qq = q ^ 13; \ - CHECK_QUADRANT_##SOURCE_IMAGE##_4( qq ); \ - } \ - \ - if( check_far_neibours[ 3 ] >= 3 ) \ - { \ - mlib_s32 qq = q ^ 14; \ - CHECK_QUADRANT_##SOURCE_IMAGE##_4( qq ); \ - } \ - \ - if( check_corner >= 4 ) \ - { \ - mlib_s32 qq = q ^ 15; \ - CHECK_QUADRANT_##SOURCE_IMAGE##_4( qq ); \ - } \ - \ - if( q & 1 ) \ - { \ - BINARY_TREE_SEARCH_RIGHT( 0, COLOR_MAX, SHIFT ); \ - } \ - else \ - { \ - BINARY_TREE_SEARCH_LEFT( 0, SHIFT ); \ - } \ - \ - if( q & 2 ) \ - { \ - BINARY_TREE_SEARCH_RIGHT( 1, COLOR_MAX, SHIFT ); \ - } \ - else \ - { \ - BINARY_TREE_SEARCH_LEFT( 1, SHIFT ); \ - } \ - \ - if( q & 4 ) \ - { \ - BINARY_TREE_SEARCH_RIGHT( 2, COLOR_MAX, SHIFT ); \ - } \ - else \ - { \ - BINARY_TREE_SEARCH_LEFT( 2, SHIFT ); \ - } \ - \ - if( q & 8 ) \ - { \ - BINARY_TREE_SEARCH_RIGHT( 3, COLOR_MAX, SHIFT ); \ - } \ - else \ - { \ - BINARY_TREE_SEARCH_LEFT( 3, SHIFT ); \ - } \ - \ - position[ 0 ] &= ~( c[ 0 ] & current_size ); \ - position[ 1 ] &= ~( c[ 1 ] & current_size ); \ - position[ 2 ] &= ~( c[ 2 ] & current_size ); \ - position[ 3 ] &= ~( c[ 3 ] & current_size ); \ - \ - current_size <<= 1; \ - \ - pass++; \ - \ - stack_pointer--; \ - q = stack[ stack_pointer ].q; \ - node = stack[ stack_pointer ].node; \ - } while( continue_up ); \ - \ - dst[ j ] = found_color + s->offset; \ - we_found_it = 1; \ - } \ - \ - pass--; \ - \ - } while( !we_found_it ); \ - \ - channels[ 0 ] += 4; \ - channels[ 1 ] += 4; \ - channels[ 2 ] += 4; \ - channels[ 3 ] += 4; \ - } \ -} - -/***************************************************************/ -#define FIND_NEAREST_U8_3_C( SHIFT, STEP ) \ - mlib_s32 i, k, k_min, min_dist, diff, mask; \ - mlib_s32 offset = mlib_ImageGetLutOffset( s ) - 1; \ - mlib_s32 entries = s -> lutlength; \ - mlib_d64 *double_lut = mlib_ImageGetLutDoubleData( s ); \ - mlib_d64 col0, col1, col2; \ - mlib_d64 dist, len0, len1, len2; \ - \ - for ( i = 0; i < length; i++ ) { \ - col0 = src[ STEP * i + SHIFT ]; \ - col1 = src[ STEP * i + 1 + SHIFT ]; \ - col2 = src[ STEP * i + 2 + SHIFT ]; \ - min_dist = MLIB_S32_MAX; \ - k_min = 1; \ - len0 = double_lut[ 0 ] - col0; \ - len1 = double_lut[ 1 ] - col1; \ - len2 = double_lut[ 2 ] - col2; \ - \ - for ( k = 1; k <= entries; k++ ) { \ - dist = len0 * len0; \ - len0 = double_lut[ 3 * k ] - col0; \ - dist += len1 * len1; \ - len1 = double_lut[ 3 * k + 1 ] - col1; \ - dist += len2 * len2; \ - len2 = double_lut[ 3 * k + 2 ] - col2; \ - diff = ( mlib_s32 )dist - min_dist; \ - mask = diff >> 31; \ - min_dist += diff & mask; \ - k_min += ( k - k_min ) & mask; \ - } \ - \ - dst[ i ] = k_min + offset; \ - } - -/***************************************************************/ -#define FIND_NEAREST_U8_4_C \ - mlib_s32 i, k, k_min, min_dist, diff, mask; \ - mlib_s32 offset = mlib_ImageGetLutOffset( s ) - 1; \ - mlib_s32 entries = s -> lutlength; \ - mlib_d64 *double_lut = mlib_ImageGetLutDoubleData( s ); \ - mlib_d64 col0, col1, col2, col3; \ - mlib_d64 dist, len0, len1, len2, len3; \ - \ - for ( i = 0; i < length; i++ ) { \ - col0 = src[ 4 * i ]; \ - col1 = src[ 4 * i + 1 ]; \ - col2 = src[ 4 * i + 2 ]; \ - col3 = src[ 4 * i + 3 ]; \ - min_dist = MLIB_S32_MAX; \ - k_min = 1; \ - len0 = double_lut[ 0 ] - col0; \ - len1 = double_lut[ 1 ] - col1; \ - len2 = double_lut[ 2 ] - col2; \ - len3 = double_lut[ 3 ] - col3; \ - \ - for ( k = 1; k <= entries; k++ ) { \ - dist = len0 * len0; \ - len0 = double_lut[ 4 * k ] - col0; \ - dist += len1 * len1; \ - len1 = double_lut[ 4 * k + 1 ] - col1; \ - dist += len2 * len2; \ - len2 = double_lut[ 4 * k + 2 ] - col2; \ - dist += len3 * len3; \ - len3 = double_lut[ 4 * k + 3 ] - col3; \ - diff = ( mlib_s32 )dist - min_dist; \ - mask = diff >> 31; \ - min_dist += diff & mask; \ - k_min += ( k - k_min ) & mask; \ - } \ - \ - dst[ i ] = k_min + offset; \ - } - -/***************************************************************/ -#define FSQR_S16_HI(dsrc) \ - vis_fpadd32( vis_fmuld8ulx16( vis_read_hi( dsrc ), vis_read_hi( dsrc ) ), \ - vis_fmuld8sux16( vis_read_hi( dsrc ), vis_read_hi( dsrc ) ) ) - -/***************************************************************/ -#define FSQR_S16_LO(dsrc) \ - vis_fpadd32( vis_fmuld8ulx16( vis_read_lo( dsrc ), vis_read_lo( dsrc) ), \ - vis_fmuld8sux16( vis_read_lo( dsrc ), vis_read_lo( dsrc ) ) ) - -/***************************************************************/ -#define FIND_NEAREST_U8_3 \ -{ \ - mlib_d64 *dpsrc, dsrc, dsrc1, ddist, ddist1, ddist2, ddist3; \ - mlib_d64 dcolor, dind, dres, dres1, dpind[1], dpmin[1]; \ - mlib_d64 done = vis_to_double_dup( 1 ), \ - dmax = vis_to_double_dup( MLIB_S32_MAX ); \ - mlib_f32 *lut = ( mlib_f32 * )mlib_ImageGetLutNormalTable( s ); \ - mlib_f32 fone = vis_to_float( 0x100 ); \ - mlib_s32 i, k, mask; \ - mlib_s32 gsr[1]; \ - mlib_s32 offset = mlib_ImageGetLutOffset( s ) - 1; \ - mlib_s32 entries = s->lutlength; \ - \ - gsr[0] = vis_read_gsr(); \ - for( i = 0; i <= ( length-2 ); i += 2 ) \ - { \ - dpsrc = VIS_ALIGNADDR( src, -1 ); \ - src += 6; \ - dsrc = dpsrc[ 0 ]; \ - dsrc1 = dpsrc[ 1 ]; \ - dsrc1 = vis_faligndata( dsrc, dsrc1 ); \ - dsrc = vis_fmul8x16al( vis_read_hi( dsrc1 ), fone ); \ - VIS_ALIGNADDR( dpsrc, 3 ); \ - dsrc1 = vis_faligndata( dsrc1, dsrc1 ); \ - dsrc1 = vis_fmul8x16al( vis_read_hi( dsrc1 ), fone ); \ - dpind[ 0 ] = dind = done; \ - dpmin[ 0 ] = dmax; \ - dcolor = vis_fmul8x16al( lut[ 0 ], fone ); \ - for( k = 1; k <= entries; k++ ) \ - { \ - ddist1 = vis_fpsub16( dcolor, dsrc ); \ - ddist = FSQR_S16_HI( ddist1 ); \ - ddist1 = FSQR_S16_LO( ddist1 ); \ - dres = vis_fpadd32( ddist, ddist1 ); \ - ddist3 = vis_fpsub16( dcolor, dsrc1 ); \ - ddist2 = FSQR_S16_HI( ddist3 ); \ - ddist3 = FSQR_S16_LO( ddist3 ); \ - dres1 = vis_fpadd32( ddist2, ddist3 ); \ - dcolor = vis_fmul8x16al( lut[ k ], fone ); \ - dres = vis_freg_pair( \ - vis_fpadd32s( vis_read_hi( dres ), vis_read_lo( dres ) ), \ - vis_fpadd32s( vis_read_hi( dres1 ), vis_read_lo( dres1 ) ) ); \ - mask = vis_fcmplt32( dres, dpmin[ 0 ] ); \ - vis_pst_32( dind, ( void * )dpind, mask ); \ - dind = vis_fpadd32( dind, done ); \ - vis_pst_32( dres, ( void * )dpmin, mask ); \ - } \ - dst[ i ] = ( ( mlib_s32 * )dpind )[ 0 ] + offset; \ - dst[ i + 1 ] = ( ( mlib_s32 * )dpind)[ 1 ] + offset; \ - } \ - if( i < length ) \ - { \ - dpsrc = VIS_ALIGNADDR( src, -1 ); \ - dsrc = dpsrc[ 0 ]; \ - dsrc1 = dpsrc[ 1 ]; \ - dsrc1 = vis_faligndata( dsrc, dsrc1 ); \ - dsrc = vis_fmul8x16al( vis_read_hi( dsrc1 ), fone ); \ - dpind[ 0 ] = dind = done; \ - dpmin[ 0 ] = dmax; \ - for( k = 0; k < entries; k++ ) \ - { \ - dcolor = vis_fmul8x16al( lut[ k ], fone ); \ - ddist1 = vis_fpsub16( dcolor, dsrc ); \ - ddist = FSQR_S16_HI( ddist1 ); \ - ddist1 = FSQR_S16_LO( ddist1 ); \ - dres = vis_fpadd32( ddist, ddist1 ); \ - dres = vis_write_lo( dres, \ - vis_fpadd32s( vis_read_hi( dres ), vis_read_lo( dres ) ) ); \ - mask = vis_fcmplt32( dres, dpmin[ 0 ] ); \ - vis_pst_32( dind, ( void * )dpind, mask ); \ - dind = vis_fpadd32( dind, done ); \ - vis_pst_32( dres, ( void * )dpmin, mask ); \ - } \ - dst[ i ] = ( ( mlib_s32 * )dpind)[ 1 ] + offset; \ - } \ - vis_write_gsr(gsr[0]); \ -} - -/***************************************************************/ -#define FIND_NEAREST_U8_3_IN4 \ -{ \ - mlib_d64 *dpsrc, dsrc, dsrc1, ddist, ddist1, ddist2, ddist3; \ - mlib_d64 dcolor, dind, dres, dres1, dpind[1], dpmin[1]; \ - mlib_d64 done = vis_to_double_dup( 1 ), \ - dmax = vis_to_double_dup( MLIB_S32_MAX ); \ - mlib_f32 *lut = ( mlib_f32 * )mlib_ImageGetLutNormalTable( s ); \ - mlib_f32 fone = vis_to_float( 0x100 ); \ - mlib_s32 i, k, mask, gsr[1]; \ - mlib_s32 offset = mlib_ImageGetLutOffset( s ) - 1; \ - mlib_s32 entries = s->lutlength; \ - \ - gsr[0] = vis_read_gsr(); \ - dpsrc = VIS_ALIGNADDR( src, 0 ); \ - for( i = 0; i <= ( length-2 ); i += 2 ) \ - { \ - dsrc = dpsrc[ 0 ]; \ - dsrc1 = dpsrc[ 1 ]; \ - dsrc1 = vis_faligndata( dsrc, dsrc1 ); \ - dpsrc++; \ - dsrc = vis_fmul8x16al( vis_read_hi( dsrc1 ), fone ); \ - dsrc1 = vis_fmul8x16al( vis_read_lo( dsrc1 ), fone ); \ - dpind[ 0 ] = dind = done; \ - dpmin[ 0 ] = dmax; \ - dcolor = vis_fmul8x16al( lut[ 0 ], fone ); \ - for( k = 1; k <= entries; k++ ) \ - { \ - ddist1 = vis_fpsub16( dcolor, dsrc ); \ - ddist = FSQR_S16_HI( ddist1 ); \ - ddist1 = FSQR_S16_LO( ddist1 ); \ - dres = vis_fpadd32( ddist, ddist1 ); \ - ddist3 = vis_fpsub16( dcolor, dsrc1 ); \ - ddist2 = FSQR_S16_HI( ddist3 ); \ - ddist3 = FSQR_S16_LO( ddist3 ); \ - dres1 = vis_fpadd32( ddist2, ddist3 ); \ - dcolor = vis_fmul8x16al( lut[ k ], fone ); \ - dres = vis_freg_pair( \ - vis_fpadd32s( vis_read_hi( dres ), vis_read_lo( dres ) ), \ - vis_fpadd32s( vis_read_hi( dres1 ), vis_read_lo( dres1 ) ) ); \ - mask = vis_fcmplt32( dres, dpmin[ 0 ] ); \ - vis_pst_32( dind, ( void * )dpind, mask ); \ - dind = vis_fpadd32( dind, done ); \ - vis_pst_32( dres, ( void * )dpmin, mask ); \ - } \ - dst[ i ] = ( ( mlib_s32 * )dpind )[ 0 ] + offset; \ - dst[ i + 1 ] = ( ( mlib_s32 * )dpind)[ 1 ] + offset; \ - } \ - if( i < length ) \ - { \ - dsrc = dpsrc[ 0 ]; \ - dsrc1 = dpsrc[ 1 ]; \ - dsrc1 = vis_faligndata( dsrc, dsrc1 ); \ - dsrc = vis_fmul8x16al( vis_read_hi( dsrc1 ), fone ); \ - dpind[ 0 ] = dind = done; \ - dpmin[ 0 ] = dmax; \ - for( k = 0; k < entries; k++ ) \ - { \ - dcolor = vis_fmul8x16al( lut[ k ], fone ); \ - ddist1 = vis_fpsub16( dcolor, dsrc ); \ - ddist = FSQR_S16_HI( ddist1 ); \ - ddist1 = FSQR_S16_LO( ddist1 ); \ - dres = vis_fpadd32( ddist, ddist1 ); \ - dres = vis_write_lo( dres, \ - vis_fpadd32s( vis_read_hi( dres ), vis_read_lo( dres ) ) ); \ - mask = vis_fcmplt32( dres, dpmin[ 0 ] ); \ - vis_pst_32( dind, ( void * )dpind, mask ); \ - dind = vis_fpadd32( dind, done ); \ - vis_pst_32( dres, ( void * )dpmin, mask ); \ - } \ - dst[ i ] = ( ( mlib_s32 * )dpind)[ 1 ] + offset; \ - } \ - vis_write_gsr(gsr[0]); \ -} - -/***************************************************************/ -#define FIND_NEAREST_U8_4 \ -{ \ - mlib_d64 *dpsrc, dsrc, dsrc1, ddist, ddist1, ddist2, ddist3; \ - mlib_d64 dcolor, dind, dres, dres1, dpind[ 1 ], dpmin[ 1 ]; \ - mlib_d64 done = vis_to_double_dup( 1 ), \ - dmax = vis_to_double_dup( MLIB_S32_MAX ); \ - mlib_f32 *lut = ( mlib_f32 * )mlib_ImageGetLutNormalTable( s ); \ - mlib_f32 fone = vis_to_float( 0x100 ); \ - mlib_s32 i, k, mask, gsr[1]; \ - mlib_s32 offset = mlib_ImageGetLutOffset( s ) - 1; \ - mlib_s32 entries = s->lutlength; \ - \ - gsr[0] = vis_read_gsr(); \ - dpsrc = VIS_ALIGNADDR( src, 0 ); \ - for( i = 0; i <= ( length-2 ); i += 2 ) \ - { \ - dsrc = dpsrc[ 0 ]; \ - dsrc1 = dpsrc[ 1 ]; \ - dsrc1 = vis_faligndata( dsrc, dsrc1 ); \ - dpsrc++; \ - dsrc = vis_fmul8x16al( vis_read_hi( dsrc1 ), fone ); \ - dsrc1 = vis_fmul8x16al( vis_read_lo( dsrc1 ), fone ); \ - dpind[ 0 ] = dind = done; \ - dpmin[ 0 ] = dmax; \ - dcolor = vis_fmul8x16al(lut[0], fone); \ - for( k = 1; k <= entries; k++ ) \ - { \ - ddist1 = vis_fpsub16( dcolor, dsrc ); \ - ddist = FSQR_S16_HI( ddist1 ); \ - ddist1 = FSQR_S16_LO( ddist1 ); \ - dres = vis_fpadd32( ddist, ddist1 ); \ - ddist3 = vis_fpsub16( dcolor, dsrc1 ); \ - ddist2 = FSQR_S16_HI( ddist3 ); \ - ddist3 = FSQR_S16_LO( ddist3 ); \ - dres1 = vis_fpadd32( ddist2, ddist3 ); \ - dcolor = vis_fmul8x16al( lut[ k ], fone ); \ - dres = vis_freg_pair( \ - vis_fpadd32s( vis_read_hi( dres ), vis_read_lo( dres ) ), \ - vis_fpadd32s( vis_read_hi( dres1 ), vis_read_lo( dres1 ) ) ); \ - mask = vis_fcmplt32( dres, dpmin[ 0 ] ); \ - vis_pst_32( dind, ( void * )dpind, mask ); \ - dind = vis_fpadd32( dind, done ); \ - vis_pst_32( dres, ( void * )dpmin, mask ); \ - } \ - dst[ i ] = ( ( mlib_s32 * )dpind )[ 0 ] + offset; \ - dst[ i + 1 ] = ( ( mlib_s32 * )dpind )[ 1 ] + offset; \ - } \ - if( i < length ) \ - { \ - dsrc = dpsrc[ 0 ]; \ - dsrc1 = dpsrc[ 1 ]; \ - dsrc1 = vis_faligndata( dsrc, dsrc1 ); \ - dsrc = vis_fmul8x16al( vis_read_hi( dsrc1 ), fone ); \ - dpind[ 0 ] = dind = done; \ - dpmin[ 0 ] = dmax; \ - for( k = 0; k < entries; k++ ) \ - { \ - dcolor = vis_fmul8x16al( lut[ k ], fone ); \ - ddist1 = vis_fpsub16( dcolor, dsrc ); \ - ddist = FSQR_S16_HI( ddist1 ); \ - ddist1 = FSQR_S16_LO( ddist1 ); \ - dres = vis_fpadd32( ddist, ddist1 ); \ - dres = vis_write_lo( dres, \ - vis_fpadd32s( vis_read_hi( dres ), vis_read_lo( dres ) ) ); \ - mask = vis_fcmplt32( dres, dpmin[ 0 ] ); \ - vis_pst_32( dind, ( void * )dpind, mask ); \ - dind = vis_fpadd32( dind, done ); \ - vis_pst_32( dres, ( void * )dpmin, mask ); \ - } \ - dst[ i ] = ( ( mlib_s32 * )dpind )[ 1 ] + offset; \ - } \ - vis_write_gsr(gsr[0]); \ -} - -/***************************************************************/ -#define FIND_NEAREST_S16_3( SHIFT, STEP ) \ - mlib_s32 i, k, k_min, min_dist, diff, mask; \ - mlib_s32 offset = mlib_ImageGetLutOffset( s ) - 1; \ - mlib_s32 entries = s->lutlength; \ - mlib_d64 *double_lut = mlib_ImageGetLutDoubleData( s ); \ - mlib_d64 col0, col1, col2; \ - mlib_d64 dist, len0, len1, len2; \ - \ - for( i = 0; i < length; i++ ) \ - { \ - col0 = src[ STEP * i + SHIFT ]; \ - col1 = src[ STEP * i + 1 + SHIFT ]; \ - col2 = src[ STEP * i + 2 + SHIFT ]; \ - min_dist = MLIB_S32_MAX; \ - k_min = 1; \ - len0 = double_lut[ 0 ] - col0; \ - len1 = double_lut[ 1 ] - col1; \ - len2 = double_lut[ 2 ] - col2; \ - for( k = 1; k <= entries; k++ ) \ - { \ - dist = len0 * len0; \ - len0 = double_lut[ 3 * k ] - col0; \ - dist += len1 * len1; \ - len1 = double_lut[ 3 * k + 1 ] - col1; \ - dist += len2 * len2; \ - len2 = double_lut[ 3 * k + 2 ] - col2; \ - diff = ( mlib_s32 )( dist * 0.125 ) - min_dist; \ - mask = diff >> 31; \ - min_dist += diff & mask; \ - k_min += ( k - k_min ) & mask; \ - } \ - dst[ i ] = k_min + offset; \ - } - -/***************************************************************/ -#define FIND_NEAREST_S16_4 \ - mlib_s32 i, k, k_min, min_dist, diff, mask; \ - mlib_s32 offset = mlib_ImageGetLutOffset( s ) - 1; \ - mlib_s32 entries = s->lutlength; \ - mlib_d64 *double_lut = mlib_ImageGetLutDoubleData( s ); \ - mlib_d64 col0, col1, col2, col3; \ - mlib_d64 dist, len0, len1, len2, len3; \ - \ - for( i = 0; i < length; i++ ) \ - { \ - col0 = src[ 4 * i ]; \ - col1 = src[ 4 * i + 1 ]; \ - col2 = src[ 4 * i + 2 ]; \ - col3 = src[ 4 * i + 3 ]; \ - min_dist = MLIB_S32_MAX; \ - k_min = 1; \ - len0 = double_lut[ 0 ] - col0; \ - len1 = double_lut[ 1 ] - col1; \ - len2 = double_lut[ 2 ] - col2; \ - len3 = double_lut[ 3 ] - col3; \ - for( k = 1; k <= entries; k++ ) \ - { \ - dist = len0 * len0; \ - len0 = double_lut[ 4 * k ] - col0; \ - dist += len1 * len1; \ - len1 = double_lut[ 4 * k + 1 ] - col1; \ - dist += len2 * len2; \ - len2 = double_lut[ 4 * k + 2 ] - col2; \ - dist += len3 * len3; \ - len3 = double_lut[ 4 * k + 3 ] - col3; \ - diff = ( mlib_s32 )( dist * 0.125 ) - min_dist; \ - mask = diff >> 31; \ - min_dist += diff & mask; \ - k_min += ( k - k_min ) & mask; \ - } \ - dst[ i ] = k_min + offset; \ - } - -/***************************************************************/ -mlib_status mlib_ImageColorTrue2Index(mlib_image *dst, - const mlib_image *src, - const void *colormap) -{ - mlib_s32 y, width, height, sstride, dstride, schann; - mlib_colormap *s = (mlib_colormap *)colormap; - mlib_s32 channels; - mlib_type stype, dtype; - - MLIB_IMAGE_CHECK(src); - MLIB_IMAGE_CHECK(dst); - MLIB_IMAGE_SIZE_EQUAL(src, dst); - MLIB_IMAGE_HAVE_CHAN(dst, 1); - - if (!colormap) - return MLIB_NULLPOINTER; - - channels = s->channels; - stype = mlib_ImageGetType(src); - dtype = mlib_ImageGetType(dst); - width = mlib_ImageGetWidth(src); - height = mlib_ImageGetHeight(src); - sstride = mlib_ImageGetStride(src); - dstride = mlib_ImageGetStride(dst); - schann = mlib_ImageGetChannels(src); - - if (stype != s->intype || dtype != s->outtype) - return MLIB_FAILURE; - - if (channels != schann) - return MLIB_FAILURE; - - switch (stype) { - case MLIB_BYTE: - { - mlib_u8 *sdata = mlib_ImageGetData(src); - - switch (dtype) { - case MLIB_BYTE: - { - mlib_u8 *ddata = mlib_ImageGetData(dst); - - switch (channels) { - case 3: - { - MAIN_COLORTRUE2INDEX_LOOP(U8, U8, 3); - return MLIB_SUCCESS; - } - - case 4: - { - MAIN_COLORTRUE2INDEX_LOOP(U8, U8, 4); - return MLIB_SUCCESS; - } - - default: - return MLIB_FAILURE; - } - } - - case MLIB_SHORT: - { - mlib_s16 *ddata = mlib_ImageGetData(dst); - - dstride /= 2; - switch (channels) { - case 3: - { - MAIN_COLORTRUE2INDEX_LOOP(U8, S16, 3); - return MLIB_SUCCESS; - } - - case 4: - { - MAIN_COLORTRUE2INDEX_LOOP(U8, S16, 4); - return MLIB_SUCCESS; - } - - default: - return MLIB_FAILURE; - } - } - default: - /* Unsupported type of destination image */ - return MLIB_FAILURE; - } - } - - case MLIB_SHORT: - { - mlib_s16 *sdata = mlib_ImageGetData(src); - - sstride /= 2; - switch (dtype) { - case MLIB_BYTE: - { - mlib_u8 *ddata = mlib_ImageGetData(dst); - - switch (channels) { - case 3: - { - MAIN_COLORTRUE2INDEX_LOOP(S16, U8, 3); - return MLIB_SUCCESS; - } - - case 4: - { - MAIN_COLORTRUE2INDEX_LOOP(S16, U8, 4); - return MLIB_SUCCESS; - } - - default: - return MLIB_FAILURE; - } - } - - case MLIB_SHORT: - { - mlib_s16 *ddata = mlib_ImageGetData(dst); - - dstride /= 2; - switch (channels) { - case 3: - { - MAIN_COLORTRUE2INDEX_LOOP(S16, S16, 3); - return MLIB_SUCCESS; - } - - case 4: - { - MAIN_COLORTRUE2INDEX_LOOP(S16, S16, 4); - return MLIB_SUCCESS; - } - - default: - return MLIB_FAILURE; - } - } - default: - /* Unsupported type of destination image */ - return MLIB_FAILURE; - } - } - - default: - return MLIB_FAILURE; - } -} - -/***************************************************************/ -mlib_u32 mlib_search_quadrant_U8_3(struct lut_node_3 *node, - mlib_u32 distance, - mlib_s32 *found_color, - mlib_u32 c0, - mlib_u32 c1, - mlib_u32 c2, - const mlib_u8 **base) -{ - mlib_s32 i; - - for (i = 0; i < 8; i++) { - - if (node->tag & (1 << i)) { - /* Here is alone color cell. Check the distance */ - mlib_s32 newindex = node->contents.index[i]; - mlib_u32 newpalc0, newpalc1, newpalc2; - mlib_u32 newdistance; - - newpalc0 = base[0][newindex]; - newpalc1 = base[1][newindex]; - newpalc2 = base[2][newindex]; - newdistance = FIND_DISTANCE_3(c0, newpalc0, c1, newpalc1, c2, newpalc2, 0); - - if (distance > newdistance) { - *found_color = newindex; - distance = newdistance; - } - } - else if (node->contents.quadrants[i]) - distance = - mlib_search_quadrant_U8_3(node->contents.quadrants[i], distance, - found_color, c0, c1, c2, base); - } - - return distance; -} - -/***************************************************************/ -mlib_u32 mlib_search_quadrant_part_to_left_U8_3(struct lut_node_3 *node, - mlib_u32 distance, - mlib_s32 *found_color, - const mlib_u32 *c, - const mlib_u8 **base, - mlib_u32 position, - mlib_s32 pass, - mlib_s32 dir_bit) -{ - mlib_u32 current_size = 1 << pass; - mlib_s32 i; - static mlib_s32 opposite_quadrants[3][4] = { - {0, 2, 4, 6}, - {0, 1, 4, 5}, - {0, 1, 2, 3} - }; - -/* Search only quadrant's half untill it is necessary to check the - whole quadrant */ - - if (distance < (position + current_size - c[dir_bit]) * (position + current_size - c[dir_bit])) { /* Search half of quadrant */ - for (i = 0; i < 4; i++) { - mlib_s32 qq = opposite_quadrants[dir_bit][i]; - - if (node->tag & (1 << qq)) { - /* Here is alone color cell. Check the distance */ - mlib_s32 newindex = node->contents.index[qq]; - mlib_u32 newpalc0, newpalc1, newpalc2; - mlib_u32 newdistance; - - newpalc0 = base[0][newindex]; - newpalc1 = base[1][newindex]; - newpalc2 = base[2][newindex]; - newdistance = FIND_DISTANCE_3(c[0], newpalc0, c[1], newpalc1, c[2], newpalc2, 0); - - if (distance > newdistance) { - *found_color = newindex; - distance = newdistance; - } - } - else if (node->contents.quadrants[qq]) - distance = - mlib_search_quadrant_part_to_left_U8_3(node->contents.quadrants[qq], - distance, found_color, c, base, - position, pass - 1, dir_bit); - } - } - else { /* Search whole quadrant */ - - mlib_s32 mask = 1 << dir_bit; - - for (i = 0; i < 8; i++) { - - if (node->tag & (1 << i)) { - /* Here is alone color cell. Check the distance */ - mlib_s32 newindex = node->contents.index[i]; - mlib_u32 newpalc0, newpalc1, newpalc2; - mlib_u32 newdistance; - - newpalc0 = base[0][newindex]; - newpalc1 = base[1][newindex]; - newpalc2 = base[2][newindex]; - newdistance = FIND_DISTANCE_3(c[0], newpalc0, c[1], newpalc1, c[2], newpalc2, 0); - - if (distance > newdistance) { - *found_color = newindex; - distance = newdistance; - } - } - else if (node->contents.quadrants[i]) { - - if (i & mask) - /* This quadrant may require partial checking */ - distance = - mlib_search_quadrant_part_to_left_U8_3(node->contents.quadrants[i], - distance, found_color, c, - base, - position + current_size, - pass - 1, dir_bit); - else - /* Here we should check all */ - distance = - mlib_search_quadrant_U8_3(node->contents.quadrants[i], distance, - found_color, c[0], c[1], c[2], base); - } - } - } - - return distance; -} - -/***************************************************************/ -mlib_u32 mlib_search_quadrant_part_to_right_U8_3(struct lut_node_3 *node, - mlib_u32 distance, - mlib_s32 *found_color, - const mlib_u32 *c, - const mlib_u8 **base, - mlib_u32 position, - mlib_s32 pass, - mlib_s32 dir_bit) -{ - mlib_u32 current_size = 1 << pass; - mlib_s32 i; - static mlib_s32 opposite_quadrants[3][4] = { - {1, 3, 5, 7}, - {2, 3, 6, 7}, - {4, 5, 6, 7} - }; - -/* Search only quadrant's half untill it is necessary to check the - whole quadrant */ - - if (distance <= (c[dir_bit] - position - current_size) * (c[dir_bit] - position - current_size)) { /* Search half of quadrant */ - for (i = 0; i < 4; i++) { - mlib_s32 qq = opposite_quadrants[dir_bit][i]; - - if (node->tag & (1 << qq)) { - /* Here is alone color cell. Check the distance */ - mlib_s32 newindex = node->contents.index[qq]; - mlib_u32 newpalc0, newpalc1, newpalc2; - mlib_u32 newdistance; - - newpalc0 = base[0][newindex]; - newpalc1 = base[1][newindex]; - newpalc2 = base[2][newindex]; - newdistance = FIND_DISTANCE_3(c[0], newpalc0, c[1], newpalc1, c[2], newpalc2, 0); - - if (distance > newdistance) { - *found_color = newindex; - distance = newdistance; - } - } - else if (node->contents.quadrants[qq]) - distance = - mlib_search_quadrant_part_to_right_U8_3(node->contents.quadrants[qq], - distance, found_color, c, - base, position + current_size, - pass - 1, dir_bit); - } - } - else { /* Search whole quadrant */ - - mlib_s32 mask = 1 << dir_bit; - - for (i = 0; i < 8; i++) { - - if (node->tag & (1 << i)) { - /* Here is alone color cell. Check the distance */ - mlib_s32 newindex = node->contents.index[i]; - mlib_u32 newpalc0, newpalc1, newpalc2; - mlib_u32 newdistance; - - newpalc0 = base[0][newindex]; - newpalc1 = base[1][newindex]; - newpalc2 = base[2][newindex]; - newdistance = FIND_DISTANCE_3(c[0], newpalc0, c[1], newpalc1, c[2], newpalc2, 0); - - if (distance > newdistance) { - *found_color = newindex; - distance = newdistance; - } - } - else if (node->contents.quadrants[i]) { - - if (i & mask) - /* Here we should check all */ - distance = - mlib_search_quadrant_U8_3(node->contents.quadrants[i], distance, - found_color, c[0], c[1], c[2], base); - else - /* This quadrant may require partial checking */ - distance = - mlib_search_quadrant_part_to_right_U8_3(node->contents.quadrants[i], - distance, found_color, c, - base, position, pass - 1, dir_bit); - } - } - } - - return distance; -} - -/***************************************************************/ -mlib_u32 mlib_search_quadrant_S16_3(struct lut_node_3 *node, - mlib_u32 distance, - mlib_s32 *found_color, - mlib_u32 c0, - mlib_u32 c1, - mlib_u32 c2, - const mlib_s16 **base) -{ - mlib_s32 i; - - for (i = 0; i < 8; i++) { - - if (node->tag & (1 << i)) { - /* Here is alone color cell. Check the distance */ - mlib_s32 newindex = node->contents.index[i]; - mlib_u32 newpalc0, newpalc1, newpalc2; - mlib_u32 newdistance; - - newpalc0 = base[0][newindex] - MLIB_S16_MIN; - newpalc1 = base[1][newindex] - MLIB_S16_MIN; - newpalc2 = base[2][newindex] - MLIB_S16_MIN; - newdistance = FIND_DISTANCE_3(c0, newpalc0, c1, newpalc1, c2, newpalc2, 2); - - if (distance > newdistance) { - *found_color = newindex; - distance = newdistance; - } - } - else if (node->contents.quadrants[i]) - distance = - mlib_search_quadrant_S16_3(node->contents.quadrants[i], distance, - found_color, c0, c1, c2, base); - } - - return distance; -} - -/***************************************************************/ -mlib_u32 mlib_search_quadrant_part_to_left_S16_3(struct lut_node_3 *node, - mlib_u32 distance, - mlib_s32 *found_color, - const mlib_u32 *c, - const mlib_s16 **base, - mlib_u32 position, - mlib_s32 pass, - mlib_s32 dir_bit) -{ - mlib_u32 current_size = 1 << pass; - mlib_s32 i; - static mlib_s32 opposite_quadrants[3][4] = { - {0, 2, 4, 6}, - {0, 1, 4, 5}, - {0, 1, 2, 3} - }; - -/* Search only quadrant's half untill it is necessary to check the - whole quadrant */ - - if (distance < (((position + current_size - c[dir_bit]) * (position + current_size - c[dir_bit])) >> 2)) { /* Search half of quadrant */ - for (i = 0; i < 4; i++) { - mlib_s32 qq = opposite_quadrants[dir_bit][i]; - - if (node->tag & (1 << qq)) { - /* Here is alone color cell. Check the distance */ - mlib_s32 newindex = node->contents.index[qq]; - mlib_u32 newpalc0, newpalc1, newpalc2; - mlib_u32 newdistance; - - newpalc0 = base[0][newindex] - MLIB_S16_MIN; - newpalc1 = base[1][newindex] - MLIB_S16_MIN; - newpalc2 = base[2][newindex] - MLIB_S16_MIN; - newdistance = FIND_DISTANCE_3(c[0], newpalc0, c[1], newpalc1, c[2], newpalc2, 2); - - if (distance > newdistance) { - *found_color = newindex; - distance = newdistance; - } - } - else if (node->contents.quadrants[qq]) - distance = - mlib_search_quadrant_part_to_left_S16_3(node->contents.quadrants[qq], - distance, found_color, c, - base, position, pass - 1, dir_bit); - } - } - else { /* Search whole quadrant */ - - mlib_s32 mask = 1 << dir_bit; - - for (i = 0; i < 8; i++) { - - if (node->tag & (1 << i)) { - /* Here is alone color cell. Check the distance */ - mlib_s32 newindex = node->contents.index[i]; - mlib_u32 newpalc0, newpalc1, newpalc2; - mlib_u32 newdistance; - - newpalc0 = base[0][newindex] - MLIB_S16_MIN; - newpalc1 = base[1][newindex] - MLIB_S16_MIN; - newpalc2 = base[2][newindex] - MLIB_S16_MIN; - newdistance = FIND_DISTANCE_3(c[0], newpalc0, c[1], newpalc1, c[2], newpalc2, 2); - - if (distance > newdistance) { - *found_color = newindex; - distance = newdistance; - } - } - else if (node->contents.quadrants[i]) { - - if (i & mask) - /* This quadrant may require partial checking */ - distance = - mlib_search_quadrant_part_to_left_S16_3(node->contents.quadrants[i], - distance, found_color, c, - base, - position + current_size, - pass - 1, dir_bit); - else - /* Here we should check all */ - distance = - mlib_search_quadrant_S16_3(node->contents.quadrants[i], distance, - found_color, c[0], c[1], c[2], base); - } - } - } - - return distance; -} - -/***************************************************************/ -mlib_u32 mlib_search_quadrant_part_to_right_S16_3(struct lut_node_3 *node, - mlib_u32 distance, - mlib_s32 *found_color, - const mlib_u32 *c, - const mlib_s16 **base, - mlib_u32 position, - mlib_s32 pass, - mlib_s32 dir_bit) -{ - mlib_u32 current_size = 1 << pass; - mlib_s32 i; - static mlib_s32 opposite_quadrants[3][4] = { - {1, 3, 5, 7}, - {2, 3, 6, 7}, - {4, 5, 6, 7} - }; - -/* Search only quadrant's half untill it is necessary to check the - whole quadrant */ - - if (distance <= (((c[dir_bit] - position - current_size) * (c[dir_bit] - position - current_size)) >> 2)) { /* Search half of quadrant */ - for (i = 0; i < 4; i++) { - mlib_s32 qq = opposite_quadrants[dir_bit][i]; - - if (node->tag & (1 << qq)) { - /* Here is alone color cell. Check the distance */ - mlib_s32 newindex = node->contents.index[qq]; - mlib_u32 newpalc0, newpalc1, newpalc2; - mlib_u32 newdistance; - - newpalc0 = base[0][newindex] - MLIB_S16_MIN; - newpalc1 = base[1][newindex] - MLIB_S16_MIN; - newpalc2 = base[2][newindex] - MLIB_S16_MIN; - newdistance = FIND_DISTANCE_3(c[0], newpalc0, c[1], newpalc1, c[2], newpalc2, 2); - - if (distance > newdistance) { - *found_color = newindex; - distance = newdistance; - } - } - else if (node->contents.quadrants[qq]) - distance = - mlib_search_quadrant_part_to_right_S16_3(node->contents.quadrants[qq], - distance, found_color, c, - base, - position + current_size, - pass - 1, dir_bit); - } - } - else { /* Search whole quadrant */ - - mlib_s32 mask = 1 << dir_bit; - - for (i = 0; i < 8; i++) { - - if (node->tag & (1 << i)) { - /* Here is alone color cell. Check the distance */ - mlib_s32 newindex = node->contents.index[i]; - mlib_u32 newpalc0, newpalc1, newpalc2; - mlib_u32 newdistance; - - newpalc0 = base[0][newindex] - MLIB_S16_MIN; - newpalc1 = base[1][newindex] - MLIB_S16_MIN; - newpalc2 = base[2][newindex] - MLIB_S16_MIN; - newdistance = FIND_DISTANCE_3(c[0], newpalc0, c[1], newpalc1, c[2], newpalc2, 2); - - if (distance > newdistance) { - *found_color = newindex; - distance = newdistance; - } - } - else if (node->contents.quadrants[i]) { - - if (i & mask) - /* Here we should check all */ - distance = - mlib_search_quadrant_S16_3(node->contents.quadrants[i], distance, - found_color, c[0], c[1], c[2], base); - else - /* This quadrant may require partial checking */ - distance = - mlib_search_quadrant_part_to_right_S16_3(node->contents. - quadrants[i], distance, - found_color, c, base, - position, pass - 1, dir_bit); - } - } - } - - return distance; -} - -/***************************************************************/ -mlib_u32 mlib_search_quadrant_U8_4(struct lut_node_4 *node, - mlib_u32 distance, - mlib_s32 *found_color, - mlib_u32 c0, - mlib_u32 c1, - mlib_u32 c2, - mlib_u32 c3, - const mlib_u8 **base) -{ - mlib_s32 i; - - for (i = 0; i < 16; i++) { - - if (node->tag & (1 << i)) { - /* Here is alone color cell. Check the distance */ - mlib_s32 newindex = node->contents.index[i]; - mlib_u32 newpalc0, newpalc1, newpalc2, newpalc3; - mlib_u32 newdistance; - - newpalc0 = base[0][newindex]; - newpalc1 = base[1][newindex]; - newpalc2 = base[2][newindex]; - newpalc3 = base[3][newindex]; - newdistance = FIND_DISTANCE_4(c0, newpalc0, - c1, newpalc1, c2, newpalc2, c3, newpalc3, 0); - - if (distance > newdistance) { - *found_color = newindex; - distance = newdistance; - } - } - else if (node->contents.quadrants[i]) - distance = - mlib_search_quadrant_U8_4(node->contents.quadrants[i], distance, - found_color, c0, c1, c2, c3, base); - } - - return distance; -} - -/***************************************************************/ -mlib_u32 mlib_search_quadrant_part_to_left_U8_4(struct lut_node_4 *node, - mlib_u32 distance, - mlib_s32 *found_color, - const mlib_u32 *c, - const mlib_u8 **base, - mlib_u32 position, - mlib_s32 pass, - mlib_s32 dir_bit) -{ - mlib_u32 current_size = 1 << pass; - mlib_s32 i; - static mlib_s32 opposite_quadrants[4][8] = { - {0, 2, 4, 6, 8, 10, 12, 14}, - {0, 1, 4, 5, 8, 9, 12, 13}, - {0, 1, 2, 3, 8, 9, 10, 11}, - {0, 1, 2, 3, 4, 5, 6, 7} - }; - -/* Search only quadrant's half untill it is necessary to check the - whole quadrant */ - - if (distance < (position + current_size - c[dir_bit]) * (position + current_size - c[dir_bit])) { /* Search half of quadrant */ - for (i = 0; i < 8; i++) { - mlib_s32 qq = opposite_quadrants[dir_bit][i]; - - if (node->tag & (1 << qq)) { - /* Here is alone color cell. Check the distance */ - mlib_s32 newindex = node->contents.index[qq]; - mlib_u32 newpalc0, newpalc1, newpalc2, newpalc3; - mlib_u32 newdistance; - - newpalc0 = base[0][newindex]; - newpalc1 = base[1][newindex]; - newpalc2 = base[2][newindex]; - newpalc3 = base[3][newindex]; - newdistance = FIND_DISTANCE_4(c[0], newpalc0, - c[1], newpalc1, c[2], newpalc2, c[3], newpalc3, 0); - - if (distance > newdistance) { - *found_color = newindex; - distance = newdistance; - } - } - else if (node->contents.quadrants[qq]) - distance = - mlib_search_quadrant_part_to_left_U8_4(node->contents.quadrants[qq], - distance, found_color, c, base, - position, pass - 1, dir_bit); - } - } - else { /* Search whole quadrant */ - - mlib_s32 mask = 1 << dir_bit; - - for (i = 0; i < 16; i++) { - - if (node->tag & (1 << i)) { - /* Here is alone color cell. Check the distance */ - mlib_s32 newindex = node->contents.index[i]; - mlib_u32 newpalc0, newpalc1, newpalc2, newpalc3; - mlib_u32 newdistance; - - newpalc0 = base[0][newindex]; - newpalc1 = base[1][newindex]; - newpalc2 = base[2][newindex]; - newpalc3 = base[3][newindex]; - newdistance = FIND_DISTANCE_4(c[0], newpalc0, - c[1], newpalc1, c[2], newpalc2, c[3], newpalc3, 0); - - if (distance > newdistance) { - *found_color = newindex; - distance = newdistance; - } - } - else if (node->contents.quadrants[i]) { - - if (i & mask) - /* This quadrant may require partial checking */ - distance = - mlib_search_quadrant_part_to_left_U8_4(node->contents.quadrants[i], - distance, found_color, c, - base, - position + current_size, - pass - 1, dir_bit); - else - /* Here we should check all */ - distance = - mlib_search_quadrant_U8_4(node->contents.quadrants[i], distance, - found_color, c[0], c[1], c[2], c[3], base); - } - } - } - - return distance; -} - -/***************************************************************/ -mlib_u32 mlib_search_quadrant_part_to_right_U8_4(struct lut_node_4 *node, - mlib_u32 distance, - mlib_s32 *found_color, - const mlib_u32 *c, - const mlib_u8 **base, - mlib_u32 position, - mlib_s32 pass, - mlib_s32 dir_bit) -{ - mlib_u32 current_size = 1 << pass; - mlib_s32 i; - static mlib_s32 opposite_quadrants[4][8] = { - {1, 3, 5, 7, 9, 11, 13, 15}, - {2, 3, 6, 7, 10, 11, 14, 15}, - {4, 5, 6, 7, 12, 13, 14, 15}, - {8, 9, 10, 11, 12, 13, 14, 15} - }; - -/* Search only quadrant's half untill it is necessary to check the - whole quadrant */ - - if (distance <= (c[dir_bit] - position - current_size) * (c[dir_bit] - position - current_size)) { /* Search half of quadrant */ - for (i = 0; i < 8; i++) { - mlib_s32 qq = opposite_quadrants[dir_bit][i]; - - if (node->tag & (1 << qq)) { - /* Here is alone color cell. Check the distance */ - mlib_s32 newindex = node->contents.index[qq]; - mlib_u32 newpalc0, newpalc1, newpalc2, newpalc3; - mlib_u32 newdistance; - - newpalc0 = base[0][newindex]; - newpalc1 = base[1][newindex]; - newpalc2 = base[2][newindex]; - newpalc3 = base[3][newindex]; - newdistance = FIND_DISTANCE_4(c[0], newpalc0, - c[1], newpalc1, c[2], newpalc2, c[3], newpalc3, 0); - - if (distance > newdistance) { - *found_color = newindex; - distance = newdistance; - } - } - else if (node->contents.quadrants[qq]) - distance = - mlib_search_quadrant_part_to_right_U8_4(node->contents.quadrants[qq], - distance, found_color, c, - base, position + current_size, - pass - 1, dir_bit); - } - } - else { /* Search whole quadrant */ - - mlib_s32 mask = 1 << dir_bit; - - for (i = 0; i < 16; i++) { - - if (node->tag & (1 << i)) { - /* Here is alone color cell. Check the distance */ - mlib_s32 newindex = node->contents.index[i]; - mlib_u32 newpalc0, newpalc1, newpalc2, newpalc3; - mlib_u32 newdistance; - - newpalc0 = base[0][newindex]; - newpalc1 = base[1][newindex]; - newpalc2 = base[2][newindex]; - newpalc3 = base[3][newindex]; - newdistance = FIND_DISTANCE_4(c[0], newpalc0, - c[1], newpalc1, c[2], newpalc2, c[3], newpalc3, 0); - - if (distance > newdistance) { - *found_color = newindex; - distance = newdistance; - } - } - else if (node->contents.quadrants[i]) { - - if (i & mask) - /* Here we should check all */ - distance = - mlib_search_quadrant_U8_4(node->contents.quadrants[i], distance, - found_color, c[0], c[1], c[2], c[3], base); - else - /* This quadrant may require partial checking */ - distance = - mlib_search_quadrant_part_to_right_U8_4(node->contents.quadrants[i], - distance, found_color, c, - base, position, pass - 1, dir_bit); - } - } - } - - return distance; -} - -/***************************************************************/ -mlib_u32 mlib_search_quadrant_S16_4(struct lut_node_4 *node, - mlib_u32 distance, - mlib_s32 *found_color, - mlib_u32 c0, - mlib_u32 c1, - mlib_u32 c2, - mlib_u32 c3, - const mlib_s16 **base) -{ - mlib_s32 i; - - for (i = 0; i < 16; i++) { - - if (node->tag & (1 << i)) { - /* Here is alone color cell. Check the distance */ - mlib_s32 newindex = node->contents.index[i]; - mlib_u32 newpalc0, newpalc1, newpalc2, newpalc3; - mlib_u32 newdistance; - - newpalc0 = base[0][newindex] - MLIB_S16_MIN; - newpalc1 = base[1][newindex] - MLIB_S16_MIN; - newpalc2 = base[2][newindex] - MLIB_S16_MIN; - newpalc3 = base[3][newindex] - MLIB_S16_MIN; - newdistance = FIND_DISTANCE_4(c0, newpalc0, - c1, newpalc1, c2, newpalc2, c3, newpalc3, 2); - - if (distance > newdistance) { - *found_color = newindex; - distance = newdistance; - } - } - else if (node->contents.quadrants[i]) - distance = - mlib_search_quadrant_S16_4(node->contents.quadrants[i], distance, - found_color, c0, c1, c2, c3, base); - } - - return distance; -} - -/***************************************************************/ -mlib_u32 mlib_search_quadrant_part_to_left_S16_4(struct lut_node_4 *node, - mlib_u32 distance, - mlib_s32 *found_color, - const mlib_u32 *c, - const mlib_s16 **base, - mlib_u32 position, - mlib_s32 pass, - mlib_s32 dir_bit) -{ - mlib_u32 current_size = 1 << pass; - mlib_s32 i; - static mlib_s32 opposite_quadrants[4][8] = { - {0, 2, 4, 6, 8, 10, 12, 14}, - {0, 1, 4, 5, 8, 9, 12, 13}, - {0, 1, 2, 3, 8, 9, 10, 11}, - {0, 1, 2, 3, 4, 5, 6, 7} - }; - -/* Search only quadrant's half untill it is necessary to check the - whole quadrant */ - - if (distance < (((position + current_size - c[dir_bit]) * (position + current_size - c[dir_bit])) >> 2)) { /* Search half of quadrant */ - for (i = 0; i < 8; i++) { - mlib_s32 qq = opposite_quadrants[dir_bit][i]; - - if (node->tag & (1 << qq)) { - /* Here is alone color cell. Check the distance */ - mlib_s32 newindex = node->contents.index[qq]; - mlib_u32 newpalc0, newpalc1, newpalc2, newpalc3; - mlib_u32 newdistance; - - newpalc0 = base[0][newindex] - MLIB_S16_MIN; - newpalc1 = base[1][newindex] - MLIB_S16_MIN; - newpalc2 = base[2][newindex] - MLIB_S16_MIN; - newpalc3 = base[3][newindex] - MLIB_S16_MIN; - newdistance = FIND_DISTANCE_4(c[0], newpalc0, - c[1], newpalc1, c[2], newpalc2, c[3], newpalc3, 2); - - if (distance > newdistance) { - *found_color = newindex; - distance = newdistance; - } - } - else if (node->contents.quadrants[qq]) - distance = - mlib_search_quadrant_part_to_left_S16_4(node->contents.quadrants[qq], - distance, found_color, c, - base, position, pass - 1, dir_bit); - } - } - else { /* Search whole quadrant */ - - mlib_s32 mask = 1 << dir_bit; - - for (i = 0; i < 16; i++) { - - if (node->tag & (1 << i)) { - /* Here is alone color cell. Check the distance */ - mlib_s32 newindex = node->contents.index[i]; - mlib_u32 newpalc0, newpalc1, newpalc2, newpalc3; - mlib_u32 newdistance; - - newpalc0 = base[0][newindex] - MLIB_S16_MIN; - newpalc1 = base[1][newindex] - MLIB_S16_MIN; - newpalc2 = base[2][newindex] - MLIB_S16_MIN; - newpalc3 = base[3][newindex] - MLIB_S16_MIN; - newdistance = FIND_DISTANCE_4(c[0], newpalc0, - c[1], newpalc1, c[2], newpalc2, c[3], newpalc3, 2); - - if (distance > newdistance) { - *found_color = newindex; - distance = newdistance; - } - } - else if (node->contents.quadrants[i]) { - - if (i & mask) - /* This quadrant may require partial checking */ - distance = - mlib_search_quadrant_part_to_left_S16_4(node->contents.quadrants[i], - distance, found_color, c, - base, - position + current_size, - pass - 1, dir_bit); - else - /* Here we should check all */ - distance = - mlib_search_quadrant_S16_4(node->contents.quadrants[i], distance, - found_color, c[0], c[1], c[2], c[3], base); - } - } - } - - return distance; -} - -/***************************************************************/ -mlib_u32 mlib_search_quadrant_part_to_right_S16_4(struct lut_node_4 *node, - mlib_u32 distance, - mlib_s32 *found_color, - const mlib_u32 *c, - const mlib_s16 **base, - mlib_u32 position, - mlib_s32 pass, - mlib_s32 dir_bit) -{ - mlib_u32 current_size = 1 << pass; - mlib_s32 i; - static mlib_s32 opposite_quadrants[4][8] = { - {1, 3, 5, 7, 9, 11, 13, 15}, - {2, 3, 6, 7, 10, 11, 14, 15}, - {4, 5, 6, 7, 12, 13, 14, 15}, - {8, 9, 10, 11, 12, 13, 14, 15} - }; - -/* Search only quadrant's half untill it is necessary to check the - whole quadrant */ - - if (distance <= (((c[dir_bit] - position - current_size) * (c[dir_bit] - position - current_size)) >> 2)) { /* Search half of quadrant */ - for (i = 0; i < 8; i++) { - mlib_s32 qq = opposite_quadrants[dir_bit][i]; - - if (node->tag & (1 << qq)) { - /* Here is alone color cell. Check the distance */ - mlib_s32 newindex = node->contents.index[qq]; - mlib_u32 newpalc0, newpalc1, newpalc2, newpalc3; - mlib_u32 newdistance; - - newpalc0 = base[0][newindex] - MLIB_S16_MIN; - newpalc1 = base[1][newindex] - MLIB_S16_MIN; - newpalc2 = base[2][newindex] - MLIB_S16_MIN; - newpalc3 = base[3][newindex] - MLIB_S16_MIN; - newdistance = FIND_DISTANCE_4(c[0], newpalc0, - c[1], newpalc1, c[2], newpalc2, c[3], newpalc3, 2); - - if (distance > newdistance) { - *found_color = newindex; - distance = newdistance; - } - } - else if (node->contents.quadrants[qq]) - distance = - mlib_search_quadrant_part_to_right_S16_4(node->contents.quadrants[qq], - distance, found_color, c, - base, - position + current_size, - pass - 1, dir_bit); - } - } - else { /* Search whole quadrant */ - - mlib_s32 mask = 1 << dir_bit; - - for (i = 0; i < 16; i++) { - - if (node->tag & (1 << i)) { - /* Here is alone color cell. Check the distance */ - mlib_s32 newindex = node->contents.index[i]; - mlib_u32 newpalc0, newpalc1, newpalc2, newpalc3; - mlib_u32 newdistance; - - newpalc0 = base[0][newindex] - MLIB_S16_MIN; - newpalc1 = base[1][newindex] - MLIB_S16_MIN; - newpalc2 = base[2][newindex] - MLIB_S16_MIN; - newpalc3 = base[3][newindex] - MLIB_S16_MIN; - newdistance = FIND_DISTANCE_4(c[0], newpalc0, - c[1], newpalc1, c[2], newpalc2, c[3], newpalc3, 2); - - if (distance > newdistance) { - *found_color = newindex; - distance = newdistance; - } - } - else if (node->contents.quadrants[i]) { - - if (i & mask) - /* Here we should check all */ - distance = - mlib_search_quadrant_S16_4(node->contents.quadrants[i], distance, - found_color, c[0], c[1], c[2], c[3], base); - else - /* This quadrant may require partial checking */ - distance = - mlib_search_quadrant_part_to_right_S16_4(node->contents. - quadrants[i], distance, - found_color, c, base, - position, pass - 1, dir_bit); - } - } - } - - return distance; -} - -/***************************************************************/ - -#define TAB_SIZE_mlib_u8 256 -#define TAB_SIZE_mlib_s16 1024 - -#define SRC_mlib_u8(i) src[i] -#define SRC_mlib_s16(i) (((mlib_u16*)src)[i] >> 6) - -/***************************************************************/ - -#define DIMENSIONS_SEARCH_3(STYPE, DTYPE, STEP) \ -{ \ - DTYPE *tab0 = ((mlib_colormap *)state)->table; \ - DTYPE *tab1 = tab0 + TAB_SIZE_##STYPE; \ - DTYPE *tab2 = tab1 + TAB_SIZE_##STYPE; \ - mlib_s32 i; \ - \ - for (i = 0; i < length; i++) { \ - dst[i] = tab0[SRC_##STYPE(0)] + tab1[SRC_##STYPE(1)] + \ - tab2[SRC_##STYPE(2)]; \ - src += STEP; \ - } \ -} - -/***************************************************************/ - -#define DIMENSIONS_SEARCH_4(STYPE, DTYPE) \ -{ \ - DTYPE *tab0 = ((mlib_colormap *)state)->table; \ - DTYPE *tab1 = tab0 + TAB_SIZE_##STYPE; \ - DTYPE *tab2 = tab1 + TAB_SIZE_##STYPE; \ - DTYPE *tab3 = tab2 + TAB_SIZE_##STYPE; \ - mlib_s32 i; \ - \ - for (i = 0; i < length; i++) { \ - dst[i] = tab0[SRC_##STYPE(0)] + tab1[SRC_##STYPE(1)] + \ - tab2[SRC_##STYPE(2)] + tab3[SRC_##STYPE(3)]; \ - src += 4; \ - } \ -} - -/***************************************************************/ -void mlib_ImageColorTrue2IndexLine_U8_U8_3(const mlib_u8 *src, - mlib_u8 *dst, - mlib_s32 length, - const void *state) -{ - mlib_colormap *s = (mlib_colormap *)state; - - switch (s->method) { -#if LUT_BYTE_COLORS_3CHANNELS <= 256 - case LUT_BINARY_TREE_SEARCH: - { - mlib_s32 bits = s->bits; - BINARY_TREE_SEARCH_3(U8, mlib_u8, 8, (MLIB_U8_MAX + 1), 0, 0, 3, 0); - } - break; - -#endif /* LUT_BYTE_COLORS_3CHANNELS <= 256 */ - case LUT_COLOR_CUBE_SEARCH: - { - COLOR_CUBE_U8_3_SEARCH(mlib_u8, 0, 3); - } - break; - - case LUT_STUPID_SEARCH: - { -#ifdef USE_VIS_CODE - FIND_NEAREST_U8_3; -#else - FIND_NEAREST_U8_3_C(0, 3); -#endif - } - break; - - case LUT_COLOR_DIMENSIONS: - DIMENSIONS_SEARCH_3(mlib_u8, mlib_u8, 3) - break; - } -} - -/***************************************************************/ -void mlib_ImageColorTrue2IndexLine_U8_U8_3_in_4(const mlib_u8 *src, - mlib_u8 *dst, - mlib_s32 length, - const void *state) -{ - mlib_colormap *s = (mlib_colormap *)state; - - switch (s->method) { -#if LUT_BYTE_COLORS_3CHANNELS <= 256 - case LUT_BINARY_TREE_SEARCH: - { - mlib_s32 bits = s->bits; - BINARY_TREE_SEARCH_3(U8, mlib_u8, 8, (MLIB_U8_MAX + 1), 0, 1, 4, 0); - break; - } - -#endif /* LUT_BYTE_COLORS_3CHANNELS <= 256 */ - case LUT_COLOR_CUBE_SEARCH: - { - COLOR_CUBE_U8_3_SEARCH(mlib_u8, 1, 4); - break; - } - - case LUT_STUPID_SEARCH: - { -#ifdef USE_VIS_CODE - FIND_NEAREST_U8_3_IN4; -#else - FIND_NEAREST_U8_3_C(1, 4); -#endif - break; - } - - case LUT_COLOR_DIMENSIONS: - src++; - DIMENSIONS_SEARCH_3(mlib_u8, mlib_u8, 4) - break; - } -} - -/***************************************************************/ -void mlib_ImageColorTrue2IndexLine_U8_U8_4(const mlib_u8 *src, - mlib_u8 *dst, - mlib_s32 length, - const void *state) -{ - mlib_colormap *s = (mlib_colormap *)state; - - switch (s->method) { -#if LUT_BYTE_COLORS_4CHANNELS <= 256 - case LUT_BINARY_TREE_SEARCH: - { - mlib_s32 bits = s->bits; - BINARY_TREE_SEARCH_4(U8, mlib_u8, 8, (MLIB_U8_MAX + 1), 0, 0); - break; - } - -#endif /* LUT_BYTE_COLORS_4CHANNELS <= 256 */ - case LUT_COLOR_CUBE_SEARCH: - { - COLOR_CUBE_U8_4_SEARCH(mlib_u8); - break; - } - - case LUT_STUPID_SEARCH: - { -#ifdef USE_VIS_CODE - FIND_NEAREST_U8_4; -#else - FIND_NEAREST_U8_4_C; -#endif - break; - } - - case LUT_COLOR_DIMENSIONS: - DIMENSIONS_SEARCH_4(mlib_u8, mlib_u8) - break; - } -} - -/***************************************************************/ -void mlib_ImageColorTrue2IndexLine_U8_S16_3(const mlib_u8 *src, - mlib_s16 *dst, - mlib_s32 length, - const void *state) -{ - mlib_colormap *s = (mlib_colormap *)state; - mlib_s32 bits = s->bits; - - switch (s->method) { - case LUT_BINARY_TREE_SEARCH: - { - BINARY_TREE_SEARCH_3(U8, mlib_u8, 8, (MLIB_U8_MAX + 1), 0, 0, 3, 0); - break; - } - - case LUT_COLOR_CUBE_SEARCH: - { - switch (s->indexsize) { - case 1: - { - COLOR_CUBE_U8_3_SEARCH(mlib_u8, 0, 3); - break; - } - - case 2: - { - COLOR_CUBE_U8_3_SEARCH(mlib_s16, 0, 3); - break; - } - } - - break; - } - - case LUT_STUPID_SEARCH: - { -#ifdef USE_VIS_CODE - FIND_NEAREST_U8_3; -#else - FIND_NEAREST_U8_3_C(0, 3); -#endif - break; - } - - case LUT_COLOR_DIMENSIONS: - DIMENSIONS_SEARCH_3(mlib_u8, mlib_s16, 3) - break; - } -} - -/***************************************************************/ -void mlib_ImageColorTrue2IndexLine_U8_S16_3_in_4(const mlib_u8 *src, - mlib_s16 *dst, - mlib_s32 length, - const void *state) -{ - mlib_colormap *s = (mlib_colormap *)state; - mlib_s32 bits = s->bits; - - switch (s->method) { - case LUT_BINARY_TREE_SEARCH: - { - BINARY_TREE_SEARCH_3(U8, mlib_u8, 8, (MLIB_U8_MAX + 1), 0, 1, 4, 0); - break; - } - - case LUT_COLOR_CUBE_SEARCH: - { - switch (s->indexsize) { - case 1: - { - COLOR_CUBE_U8_3_SEARCH(mlib_u8, 1, 4); - break; - } - - case 2: - { - COLOR_CUBE_U8_3_SEARCH(mlib_s16, 1, 4); - break; - } - } - - break; - } - - case LUT_STUPID_SEARCH: - { -#ifdef USE_VIS_CODE - FIND_NEAREST_U8_3_IN4; -#else - FIND_NEAREST_U8_3_C(1, 4); -#endif - break; - } - - case LUT_COLOR_DIMENSIONS: - src++; - DIMENSIONS_SEARCH_3(mlib_u8, mlib_s16, 4) - break; - } -} - -/***************************************************************/ -void mlib_ImageColorTrue2IndexLine_U8_S16_4(const mlib_u8 *src, - mlib_s16 *dst, - mlib_s32 length, - const void *state) -{ - mlib_colormap *s = (mlib_colormap *)state; - mlib_s32 bits = s->bits; - - switch (s->method) { - case LUT_BINARY_TREE_SEARCH: - { - BINARY_TREE_SEARCH_4(U8, mlib_u8, 8, (MLIB_U8_MAX + 1), 0, 0); - break; - } - - case LUT_COLOR_CUBE_SEARCH: - { - switch (s->indexsize) { - case 1: - { - COLOR_CUBE_U8_4_SEARCH(mlib_u8); - break; - } - - case 2: - { - COLOR_CUBE_U8_4_SEARCH(mlib_s16); - break; - } - } - - break; - } - - case LUT_STUPID_SEARCH: - { -#ifdef USE_VIS_CODE - FIND_NEAREST_U8_4; -#else - FIND_NEAREST_U8_4_C; -#endif - break; - } - - case LUT_COLOR_DIMENSIONS: - DIMENSIONS_SEARCH_4(mlib_u8, mlib_s16) - break; - } -} - -/***************************************************************/ -void mlib_ImageColorTrue2IndexLine_S16_S16_3(const mlib_s16 *src, - mlib_s16 *dst, - mlib_s32 length, - const void *state) -{ - mlib_colormap *s = (mlib_colormap *)state; - mlib_s32 bits = s->bits; - - switch (s->method) { - case LUT_BINARY_TREE_SEARCH: - { - BINARY_TREE_SEARCH_3(S16, mlib_s16, 16, ((MLIB_S16_MAX + 1) * 2), - MLIB_S16_MIN, 0, 3, 2); - break; - } - - case LUT_COLOR_CUBE_SEARCH: - { - switch (s->indexsize) { - case 1: - { - COLOR_CUBE_S16_3_SEARCH(mlib_u8, 0, 3); - break; - } - - case 2: - { - COLOR_CUBE_S16_3_SEARCH(mlib_s16, 0, 3); - break; - } - } - - break; - } - - case LUT_STUPID_SEARCH: - { - FIND_NEAREST_S16_3(0, 3); - break; - } - - case LUT_COLOR_DIMENSIONS: - DIMENSIONS_SEARCH_3(mlib_s16, mlib_s16, 3) - break; - } -} - -/***************************************************************/ -void mlib_ImageColorTrue2IndexLine_S16_S16_3_in_4(const mlib_s16 *src, - mlib_s16 *dst, - mlib_s32 length, - const void *state) -{ - mlib_colormap *s = (mlib_colormap *)state; - mlib_s32 bits = s->bits; - - switch (s->method) { - case LUT_BINARY_TREE_SEARCH: - { - BINARY_TREE_SEARCH_3(S16, mlib_s16, 16, ((MLIB_S16_MAX + 1) * 2), - MLIB_S16_MIN, 1, 4, 2); - break; - } - - case LUT_COLOR_CUBE_SEARCH: - { - switch (s->indexsize) { - case 1: - { - COLOR_CUBE_S16_3_SEARCH(mlib_u8, 1, 4); - break; - } - - case 2: - { - COLOR_CUBE_S16_3_SEARCH(mlib_s16, 1, 4); - break; - } - } - - break; - } - - case LUT_STUPID_SEARCH: - { - FIND_NEAREST_S16_3(1, 4); - break; - } - - case LUT_COLOR_DIMENSIONS: - src++; - DIMENSIONS_SEARCH_3(mlib_s16, mlib_s16, 4) - break; - } -} - -/***************************************************************/ -void mlib_ImageColorTrue2IndexLine_S16_S16_4(const mlib_s16 *src, - mlib_s16 *dst, - mlib_s32 length, - const void *state) -{ - mlib_colormap *s = (mlib_colormap *)state; - mlib_s32 bits = s->bits; - - switch (s->method) { - case LUT_BINARY_TREE_SEARCH: - { - BINARY_TREE_SEARCH_4(S16, mlib_s16, 16, ((MLIB_S16_MAX + 1) * 2), - MLIB_S16_MIN, 2); - break; - } - - case LUT_COLOR_CUBE_SEARCH: - { - switch (s->indexsize) { - case 1: - { - COLOR_CUBE_S16_4_SEARCH(mlib_u8); - break; - } - - case 2: - { - COLOR_CUBE_S16_4_SEARCH(mlib_s16); - break; - } - } - - break; - } - - case LUT_STUPID_SEARCH: - { - FIND_NEAREST_S16_4; - break; - } - - case LUT_COLOR_DIMENSIONS: - DIMENSIONS_SEARCH_4(mlib_s16, mlib_s16) - break; - } -} - -/***************************************************************/ -void mlib_ImageColorTrue2IndexLine_S16_U8_3(const mlib_s16 *src, - mlib_u8 *dst, - mlib_s32 length, - const void *state) -{ - mlib_colormap *s = (mlib_colormap *)state; - - switch (s->method) { -#if LUT_SHORT_COLORS_3CHANNELS <= 256 - case LUT_BINARY_TREE_SEARCH: - { - mlib_s32 bits = s->bits; - BINARY_TREE_SEARCH_3(S16, mlib_s16, 16, ((MLIB_S16_MAX + 1) * 2), - MLIB_S16_MIN, 0, 3, 2); - break; - } - -#endif /* LUT_SHORT_COLORS_3CHANNELS <= 256 */ - case LUT_COLOR_CUBE_SEARCH: - { - COLOR_CUBE_S16_3_SEARCH(mlib_u8, 0, 3); - break; - } - - case LUT_STUPID_SEARCH: - { - FIND_NEAREST_S16_3(0, 3); - break; - } - - case LUT_COLOR_DIMENSIONS: - DIMENSIONS_SEARCH_3(mlib_s16, mlib_u8, 3) - break; - } -} - -/***************************************************************/ -void mlib_ImageColorTrue2IndexLine_S16_U8_3_in_4(const mlib_s16 *src, - mlib_u8 *dst, - mlib_s32 length, - const void *state) -{ - mlib_colormap *s = (mlib_colormap *)state; - - switch (s->method) { -#if LUT_SHORT_COLORS_3CHANNELS <= 256 - case LUT_BINARY_TREE_SEARCH: - { - mlib_s32 bits = s->bits; - BINARY_TREE_SEARCH_3(S16, mlib_s16, 16, ((MLIB_S16_MAX + 1) * 2), - MLIB_S16_MIN, 1, 4, 2); - break; - } - -#endif /* LUT_SHORT_COLORS_3CHANNELS <= 256 */ - case LUT_COLOR_CUBE_SEARCH: - { - COLOR_CUBE_S16_3_SEARCH(mlib_u8, 1, 4); - break; - } - - case LUT_STUPID_SEARCH: - { - FIND_NEAREST_S16_3(1, 4); - break; - } - - case LUT_COLOR_DIMENSIONS: - src++; - DIMENSIONS_SEARCH_3(mlib_s16, mlib_u8, 4) - break; - } -} - -/***************************************************************/ -void mlib_ImageColorTrue2IndexLine_S16_U8_4(const mlib_s16 *src, - mlib_u8 *dst, - mlib_s32 length, - const void *state) -{ - mlib_colormap *s = (mlib_colormap *)state; - - switch (s->method) { -#if LUT_SHORT_COLORS_4CHANNELS <= 256 - case LUT_BINARY_TREE_SEARCH: - { - mlib_s32 bits = s->bits; - BINARY_TREE_SEARCH_4(S16, mlib_s16, 16, ((MLIB_S16_MAX + 1) * 2), - MLIB_S16_MIN, 2); - break; - } - -#endif /* LUT_SHORT_COLORS_4CHANNELS <= 256 */ - case LUT_COLOR_CUBE_SEARCH: - { - COLOR_CUBE_S16_4_SEARCH(mlib_u8); - break; - } - - case LUT_STUPID_SEARCH: - { - FIND_NEAREST_S16_4; - break; - } - - case LUT_COLOR_DIMENSIONS: - DIMENSIONS_SEARCH_4(mlib_s16, mlib_u8) - break; - } -} - -/***************************************************************/ - -#ifndef VIS - -void mlib_c_ImageThresh1_U81_1B(void *psrc, - void *pdst, - mlib_s32 src_stride, - mlib_s32 dst_stride, - mlib_s32 width, - mlib_s32 height, - void *thresh, - void *ghigh, - void *glow, - mlib_s32 dbit_off); - -/***************************************************************/ - -void mlib_ImageColorTrue2IndexLine_U8_BIT_1(const mlib_u8 *src, - mlib_u8 *dst, - mlib_s32 bit_offset, - mlib_s32 length, - const void *state) -{ - mlib_u8 *lut = ((mlib_colormap *)state)->table; - mlib_s32 thresh[1]; - mlib_s32 ghigh[1]; - mlib_s32 glow[1]; - - thresh[0] = lut[2]; - - glow[0] = lut[0] - lut[1]; - ghigh[0] = lut[1] - lut[0]; - - mlib_c_ImageThresh1_U81_1B((void*)src, dst, 0, 0, length, 1, - thresh, ghigh, glow, bit_offset); -} - -#else - -/***************************************************************/ - -void mlib_v_ImageThresh1B_U8_1(const mlib_u8 *src, - mlib_s32 slb, - mlib_u8 *dst, - mlib_s32 dlb, - mlib_s32 xsize, - mlib_s32 ysize, - mlib_s32 dbit_off, - const mlib_s32 *th, - mlib_s32 hc, - mlib_s32 lc); - -/***************************************************************/ - -void mlib_ImageColorTrue2IndexLine_U8_BIT_1(const mlib_u8 *src, - mlib_u8 *dst, - mlib_s32 bit_offset, - mlib_s32 length, - const void *state) -{ - mlib_u8 *lut = ((mlib_colormap *)state)->table; - mlib_s32 thresh[4]; - mlib_s32 ghigh[1]; - mlib_s32 glow[1]; - - thresh[0] = thresh[1] = thresh[2] = thresh[3] = lut[2]; - - glow[0] = (lut[1] < lut[0]) ? 0xFF : 0; - ghigh[0] = (lut[1] < lut[0]) ? 0 : 0xFF; - - mlib_v_ImageThresh1B_U8_1((void*)src, 0, dst, 0, length, 1, - bit_offset, thresh, ghigh[0], glow[0]); -} - -/***************************************************************/ - -#endif diff -r 3e22d8fd4912 -r acea5f7d354b jdk/src/java.desktop/share/native/libmlib_image/mlib_ImageColormap.h --- a/jdk/src/java.desktop/share/native/libmlib_image/mlib_ImageColormap.h Thu May 12 11:03:07 2016 -0700 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,221 +0,0 @@ -/* - * Copyright (c) 1998, 2003, Oracle and/or its affiliates. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 only, as - * published by the Free Software Foundation. Oracle designates this - * particular file as subject to the "Classpath" exception as provided - * by Oracle in the LICENSE file that accompanied this code. - * - * This code is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * version 2 for more details (a copy is included in the LICENSE file that - * accompanied this code). - * - * You should have received a copy of the GNU General Public License version - * 2 along with this work; if not, write to the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - * - * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA - * or visit www.oracle.com if you need additional information or have any - * questions. - */ - - -#ifndef __MLIB_IMAGECOLORMAP_H -#define __MLIB_IMAGECOLORMAP_H - -#ifdef __cplusplus -extern "C" { -#endif /* __cplusplus */ - -typedef struct { - void **lut; - mlib_s32 channels; - mlib_type intype; - mlib_s32 offset; - void *table; - mlib_s32 bits; - mlib_s32 method; - mlib_s32 lutlength; - mlib_s32 indexsize; - mlib_type outtype; - void *normal_table; - mlib_d64 *double_lut; -} mlib_colormap; - -/***************************************************************/ -#define LUT_COLOR_CUBE_SEARCH 0 -#define LUT_BINARY_TREE_SEARCH 1 -#define LUT_STUPID_SEARCH 2 -#define LUT_COLOR_DIMENSIONS 3 - -/***************************************************************/ - -/* Bit set in the tag denotes that the corresponding quadrant is a - palette index, not node. If the bit is clear, this means that that - is a pointer to the down level node. If the bit is clear and the - corresponding quadrant is NULL, then there is no way down there and - this quadrant is clear. */ - -struct lut_node_3 { - mlib_u8 tag; - union { - struct lut_node_3 *quadrants[8]; - long index[8]; - } contents; -}; - -struct lut_node_4 { - mlib_u16 tag; - union { - struct lut_node_4 *quadrants[16]; - long index[16]; - } contents; -}; - -/***************************************************************/ - -#define mlib_ImageGetLutData(colormap) \ - ((void **)((( mlib_colormap *)( colormap))->lut)) - -/***************************************************************/ -#define mlib_ImageGetLutNormalTable(colormap) \ - ((void *)((( mlib_colormap *)( colormap))->normal_table)) - -/***************************************************************/ -#define mlib_ImageGetLutInversTable(colormap) \ - ((void *)((( mlib_colormap *)( colormap))->table)) - -/***************************************************************/ -#define mlib_ImageGetLutChannels(colormap) \ - ((mlib_s32)((( mlib_colormap *)( colormap))->channels)) - -/***************************************************************/ -#define mlib_ImageGetLutType(colormap) \ - ((mlib_type)((( mlib_colormap *)( colormap))->intype)) - -/***************************************************************/ -#define mlib_ImageGetIndexSize(colormap) \ - ((mlib_s32)((( mlib_colormap *)( colormap))->indexsize)) - -/***************************************************************/ -#define mlib_ImageGetOutType(colormap) \ - ((mlib_type)((( mlib_colormap *)( colormap))->outtype)) - -/***************************************************************/ -#define mlib_ImageGetLutOffset(colormap) \ - ((mlib_s32)((( mlib_colormap *)( colormap))->offset)) - -/***************************************************************/ -#define mlib_ImageGetBits(colormap) \ - ((mlib_s32)((( mlib_colormap *)( colormap))->bits)) - -/***************************************************************/ -#define mlib_ImageGetMethod(colormap) \ - ((mlib_s32)((( mlib_colormap *)( colormap))->method)) - -/***************************************************************/ -#define mlib_ImageGetLutDoubleData(colormap) \ - ((mlib_d64 *)((( mlib_colormap *)( colormap))->double_lut)) - -/***************************************************************/ -#define FIND_DISTANCE_3( x1, x2, y1, y2, z1, z2, SHIFT ) \ - (( ( ( ( x1 ) - ( x2 ) ) * ( ( x1 ) - ( x2 ) ) ) >> SHIFT ) + \ - ( ( ( ( y1 ) - ( y2 ) ) * ( ( y1 ) - ( y2 ) ) ) >> SHIFT ) + \ - ( ( ( ( z1 ) - ( z2 ) ) * ( ( z1 ) - ( z2 ) ) ) >> SHIFT ) ) - -/***************************************************************/ -#define FIND_DISTANCE_4( x1, x2, y1, y2, z1, z2, w1, w2, SHIFT ) \ - (( ( ( ( x1 ) - ( x2 ) ) * ( ( x1 ) - ( x2 ) ) ) >> SHIFT ) + \ - ( ( ( ( y1 ) - ( y2 ) ) * ( ( y1 ) - ( y2 ) ) ) >> SHIFT ) + \ - ( ( ( ( z1 ) - ( z2 ) ) * ( ( z1 ) - ( z2 ) ) ) >> SHIFT ) + \ - ( ( ( ( w1 ) - ( w2 ) ) * ( ( w1 ) - ( w2 ) ) ) >> SHIFT ) ) - -/***************************************************************/ - -void mlib_ImageColorTrue2IndexLine_U8_BIT_1(const mlib_u8 *src, - mlib_u8 *dst, - mlib_s32 bit_offset, - mlib_s32 length, - const void *state); - - -void mlib_ImageColorTrue2IndexLine_U8_U8_3(const mlib_u8 *src, - mlib_u8 *dst, - mlib_s32 length, - const void *colormap); - - -void mlib_ImageColorTrue2IndexLine_U8_U8_3_in_4(const mlib_u8 *src, - mlib_u8 *dst, - mlib_s32 length, - const void *colormap); - - -void mlib_ImageColorTrue2IndexLine_U8_U8_4(const mlib_u8 *src, - mlib_u8 *dst, - mlib_s32 length, - const void *colormap); - - -void mlib_ImageColorTrue2IndexLine_U8_S16_3(const mlib_u8 *src, - mlib_s16 *dst, - mlib_s32 length, - const void *colormap); - - -void mlib_ImageColorTrue2IndexLine_U8_S16_3_in_4(const mlib_u8 *src, - mlib_s16 *dst, - mlib_s32 length, - const void *colormap); - - -void mlib_ImageColorTrue2IndexLine_U8_S16_4(const mlib_u8 *src, - mlib_s16 *dst, - mlib_s32 length, - const void *colormap); - - -void mlib_ImageColorTrue2IndexLine_S16_S16_3(const mlib_s16 *src, - mlib_s16 *dst, - mlib_s32 length, - const void *colormap); - - -void mlib_ImageColorTrue2IndexLine_S16_S16_3_in_4(const mlib_s16 *src, - mlib_s16 *dst, - mlib_s32 length, - const void *colormap); - - -void mlib_ImageColorTrue2IndexLine_S16_S16_4(const mlib_s16 *src, - mlib_s16 *dst, - mlib_s32 length, - const void *colormap); - - -void mlib_ImageColorTrue2IndexLine_S16_U8_3(const mlib_s16 *src, - mlib_u8 *dst, - mlib_s32 length, - const void *colormap); - - -void mlib_ImageColorTrue2IndexLine_S16_U8_3_in_4(const mlib_s16 *src, - mlib_u8 *dst, - mlib_s32 length, - const void *colormap); - - -void mlib_ImageColorTrue2IndexLine_S16_U8_4(const mlib_s16 *src, - mlib_u8 *dst, - mlib_s32 length, - const void *colormap); - - -#ifdef __cplusplus -} -#endif /* __cplusplus */ -#endif /* __MLIB_IMAGECOLORMAP_H */ diff -r 3e22d8fd4912 -r acea5f7d354b jdk/src/java.desktop/share/native/libmlib_image/mlib_ImageConv.h --- a/jdk/src/java.desktop/share/native/libmlib_image/mlib_ImageConv.h Thu May 12 11:03:07 2016 -0700 +++ b/jdk/src/java.desktop/share/native/libmlib_image/mlib_ImageConv.h Fri May 13 11:31:05 2016 +0300 @@ -41,466 +41,6 @@ } #endif /* FREE_AND_RETURN_STATUS */ -void mlib_ImageXor80_aa(mlib_u8 *dl, - mlib_s32 wid, - mlib_s32 hgt, - mlib_s32 str); - -void mlib_ImageXor80(mlib_u8 *dl, - mlib_s32 wid, - mlib_s32 hgt, - mlib_s32 str, - mlib_s32 nchan, - mlib_s32 cmask); - -mlib_status mlib_conv2x2ext_d64(mlib_image *dst, - const mlib_image *src, - mlib_s32 dx_l, - mlib_s32 dx_r, - mlib_s32 dy_t, - mlib_s32 dy_b, - const mlib_d64 *kern, - mlib_s32 cmask); - -mlib_status mlib_conv2x2ext_f32(mlib_image *dst, - const mlib_image *src, - mlib_s32 dx_l, - mlib_s32 dx_r, - mlib_s32 dy_t, - mlib_s32 dy_b, - const mlib_d64 *kern, - mlib_s32 cmask); - -mlib_status mlib_conv2x2ext_s16(mlib_image *dst, - const mlib_image *src, - mlib_s32 dx_l, - mlib_s32 dx_r, - mlib_s32 dy_t, - mlib_s32 dy_b, - const mlib_s32 *kern, - mlib_s32 scale, - mlib_s32 cmask); - -mlib_status mlib_conv2x2ext_s32(mlib_image *dst, - const mlib_image *src, - mlib_s32 dx_l, - mlib_s32 dx_r, - mlib_s32 dy_t, - mlib_s32 dy_b, - const mlib_s32 *kern, - mlib_s32 scale, - mlib_s32 cmask); - -mlib_status mlib_conv2x2ext_u16(mlib_image *dst, - const mlib_image *src, - mlib_s32 dx_l, - mlib_s32 dx_r, - mlib_s32 dy_t, - mlib_s32 dy_b, - const mlib_s32 *kern, - mlib_s32 scale, - mlib_s32 cmask); - -mlib_status mlib_conv2x2ext_u8(mlib_image *dst, - const mlib_image *src, - mlib_s32 dx_l, - mlib_s32 dx_r, - mlib_s32 dy_t, - mlib_s32 dy_b, - const mlib_s32 *kern, - mlib_s32 scale, - mlib_s32 cmask); - -mlib_status mlib_conv2x2nw_d64(mlib_image *dst, - const mlib_image *src, - const mlib_d64 *kern, - mlib_s32 cmask); - -mlib_status mlib_conv2x2nw_f32(mlib_image *dst, - const mlib_image *src, - const mlib_d64 *kern, - mlib_s32 cmask); - -mlib_status mlib_conv2x2nw_s16(mlib_image *dst, - const mlib_image *src, - const mlib_s32 *kern, - mlib_s32 scale, - mlib_s32 cmask); - -mlib_status mlib_conv2x2nw_s32(mlib_image *dst, - const mlib_image *src, - const mlib_s32 *kern, - mlib_s32 scale, - mlib_s32 cmask); - -mlib_status mlib_conv2x2nw_u16(mlib_image *dst, - const mlib_image *src, - const mlib_s32 *kern, - mlib_s32 scale, - mlib_s32 cmask); - -mlib_status mlib_conv2x2nw_u8(mlib_image *dst, - const mlib_image *src, - const mlib_s32 *kern, - mlib_s32 scale, - mlib_s32 cmask); - -mlib_status mlib_conv3x3ext_bit(mlib_image *dst, - const mlib_image *src, - mlib_s32 dx_l, - mlib_s32 dx_r, - mlib_s32 dy_t, - mlib_s32 dy_b, - const mlib_s32 *kern, - mlib_s32 scale, - mlib_s32 cmask); - -mlib_status mlib_conv3x3ext_d64(mlib_image *dst, - const mlib_image *src, - mlib_s32 dx_l, - mlib_s32 dx_r, - mlib_s32 dy_t, - mlib_s32 dy_b, - const mlib_d64 *kern, - mlib_s32 cmask); - -mlib_status mlib_conv3x3ext_f32(mlib_image *dst, - const mlib_image *src, - mlib_s32 dx_l, - mlib_s32 dx_r, - mlib_s32 dy_t, - mlib_s32 dy_b, - const mlib_d64 *kern, - mlib_s32 cmask); - -mlib_status mlib_conv3x3ext_s16(mlib_image *dst, - const mlib_image *src, - mlib_s32 dx_l, - mlib_s32 dx_r, - mlib_s32 dy_t, - mlib_s32 dy_b, - const mlib_s32 *kern, - mlib_s32 scale, - mlib_s32 cmask); - -mlib_status mlib_conv3x3ext_s32(mlib_image *dst, - const mlib_image *src, - mlib_s32 dx_l, - mlib_s32 dx_r, - mlib_s32 dy_t, - mlib_s32 dy_b, - const mlib_s32 *kern, - mlib_s32 scale, - mlib_s32 cmask); - -mlib_status mlib_conv3x3ext_u16(mlib_image *dst, - const mlib_image *src, - mlib_s32 dx_l, - mlib_s32 dx_r, - mlib_s32 dy_t, - mlib_s32 dy_b, - const mlib_s32 *kern, - mlib_s32 scale, - mlib_s32 cmask); - -mlib_status mlib_conv3x3ext_u8(mlib_image *dst, - const mlib_image *src, - mlib_s32 dx_l, - mlib_s32 dx_r, - mlib_s32 dy_t, - mlib_s32 dy_b, - const mlib_s32 *kern, - mlib_s32 scale, - mlib_s32 cmask); - -mlib_status mlib_conv3x3nw_bit(mlib_image *dst, - const mlib_image *src, - const mlib_s32 *kern, - mlib_s32 scale, - mlib_s32 cmask); - -mlib_status mlib_conv3x3nw_d64(mlib_image *dst, - const mlib_image *src, - const mlib_d64 *kern, - mlib_s32 cmask); - -mlib_status mlib_conv3x3nw_f32(mlib_image *dst, - const mlib_image *src, - const mlib_d64 *kern, - mlib_s32 cmask); - -mlib_status mlib_conv3x3nw_s16(mlib_image *dst, - const mlib_image *src, - const mlib_s32 *kern, - mlib_s32 scale, - mlib_s32 cmask); - -mlib_status mlib_conv3x3nw_s32(mlib_image *dst, - const mlib_image *src, - const mlib_s32 *kern, - mlib_s32 scale, - mlib_s32 cmask); - -mlib_status mlib_conv3x3nw_u16(mlib_image *dst, - const mlib_image *src, - const mlib_s32 *kern, - mlib_s32 scale, - mlib_s32 cmask); - -mlib_status mlib_conv3x3nw_u8(mlib_image *dst, - const mlib_image *src, - const mlib_s32 *kern, - mlib_s32 scale, - mlib_s32 cmask); - -mlib_status mlib_conv4x4ext_d64(mlib_image *dst, - const mlib_image *src, - mlib_s32 dx_l, - mlib_s32 dx_r, - mlib_s32 dy_t, - mlib_s32 dy_b, - const mlib_d64 *kern, - mlib_s32 cmask); - -mlib_status mlib_conv4x4ext_f32(mlib_image *dst, - const mlib_image *src, - mlib_s32 dx_l, - mlib_s32 dx_r, - mlib_s32 dy_t, - mlib_s32 dy_b, - const mlib_d64 *kern, - mlib_s32 cmask); - -mlib_status mlib_conv4x4ext_s16(mlib_image *dst, - const mlib_image *src, - mlib_s32 dx_l, - mlib_s32 dx_r, - mlib_s32 dy_t, - mlib_s32 dy_b, - const mlib_s32 *kern, - mlib_s32 scale, - mlib_s32 cmask); - -mlib_status mlib_conv4x4ext_s32(mlib_image *dst, - const mlib_image *src, - mlib_s32 dx_l, - mlib_s32 dx_r, - mlib_s32 dy_t, - mlib_s32 dy_b, - const mlib_s32 *kern, - mlib_s32 scale, - mlib_s32 cmask); - -mlib_status mlib_conv4x4ext_u16(mlib_image *dst, - const mlib_image *src, - mlib_s32 dx_l, - mlib_s32 dx_r, - mlib_s32 dy_t, - mlib_s32 dy_b, - const mlib_s32 *kern, - mlib_s32 scale, - mlib_s32 cmask); - -mlib_status mlib_conv4x4ext_u8(mlib_image *dst, - const mlib_image *src, - mlib_s32 dx_l, - mlib_s32 dx_r, - mlib_s32 dy_t, - mlib_s32 dy_b, - const mlib_s32 *kern, - mlib_s32 scale, - mlib_s32 cmask); - -mlib_status mlib_conv4x4nw_d64(mlib_image *dst, - const mlib_image *src, - const mlib_d64 *kern, - mlib_s32 cmask); - -mlib_status mlib_conv4x4nw_f32(mlib_image *dst, - const mlib_image *src, - const mlib_d64 *kern, - mlib_s32 cmask); - -mlib_status mlib_conv4x4nw_s16(mlib_image *dst, - const mlib_image *src, - const mlib_s32 *kern, - mlib_s32 scale, - mlib_s32 cmask); - -mlib_status mlib_conv4x4nw_s32(mlib_image *dst, - const mlib_image *src, - const mlib_s32 *kern, - mlib_s32 scale, - mlib_s32 cmask); - -mlib_status mlib_conv4x4nw_u16(mlib_image *dst, - const mlib_image *src, - const mlib_s32 *kern, - mlib_s32 scale, - mlib_s32 cmask); - -mlib_status mlib_conv4x4nw_u8(mlib_image *dst, - const mlib_image *src, - const mlib_s32 *kern, - mlib_s32 scale, - mlib_s32 cmask); - -mlib_status mlib_conv5x5ext_d64(mlib_image *dst, - const mlib_image *src, - mlib_s32 dx_l, - mlib_s32 dx_r, - mlib_s32 dy_t, - mlib_s32 dy_b, - const mlib_d64 *kern, - mlib_s32 cmask); - -mlib_status mlib_conv5x5ext_f32(mlib_image *dst, - const mlib_image *src, - mlib_s32 dx_l, - mlib_s32 dx_r, - mlib_s32 dy_t, - mlib_s32 dy_b, - const mlib_d64 *kern, - mlib_s32 cmask); - -mlib_status mlib_conv5x5ext_s16(mlib_image *dst, - const mlib_image *src, - mlib_s32 dx_l, - mlib_s32 dx_r, - mlib_s32 dy_t, - mlib_s32 dy_b, - const mlib_s32 *kern, - mlib_s32 scale, - mlib_s32 cmask); - -mlib_status mlib_conv5x5ext_s32(mlib_image *dst, - const mlib_image *src, - mlib_s32 dx_l, - mlib_s32 dx_r, - mlib_s32 dy_t, - mlib_s32 dy_b, - const mlib_s32 *kern, - mlib_s32 scale, - mlib_s32 cmask); - -mlib_status mlib_conv5x5ext_u16(mlib_image *dst, - const mlib_image *src, - mlib_s32 dx_l, - mlib_s32 dx_r, - mlib_s32 dy_t, - mlib_s32 dy_b, - const mlib_s32 *kern, - mlib_s32 scale, - mlib_s32 cmask); - -mlib_status mlib_conv5x5ext_u8(mlib_image *dst, - const mlib_image *src, - mlib_s32 dx_l, - mlib_s32 dx_r, - mlib_s32 dy_t, - mlib_s32 dy_b, - const mlib_s32 *kern, - mlib_s32 scale, - mlib_s32 cmask); - -mlib_status mlib_conv5x5nw_d64(mlib_image *dst, - const mlib_image *src, - const mlib_d64 *kern, - mlib_s32 cmask); - -mlib_status mlib_conv5x5nw_f32(mlib_image *dst, - const mlib_image *src, - const mlib_d64 *kern, - mlib_s32 cmask); - -mlib_status mlib_conv5x5nw_s16(mlib_image *dst, - const mlib_image *src, - const mlib_s32 *kern, - mlib_s32 scale, - mlib_s32 cmask); - -mlib_status mlib_conv5x5nw_s32(mlib_image *dst, - const mlib_image *src, - const mlib_s32 *kern, - mlib_s32 scale, - mlib_s32 cmask); - -mlib_status mlib_conv5x5nw_u16(mlib_image *dst, - const mlib_image *src, - const mlib_s32 *kern, - mlib_s32 scale, - mlib_s32 cmask); - -mlib_status mlib_conv5x5nw_u8(mlib_image *dst, - const mlib_image *src, - const mlib_s32 *kern, - mlib_s32 scale, - mlib_s32 cmask); - -mlib_status mlib_conv7x7ext_s16(mlib_image *dst, - const mlib_image *src, - mlib_s32 dx_l, - mlib_s32 dx_r, - mlib_s32 dy_t, - mlib_s32 dy_b, - const mlib_s32 *kern, - mlib_s32 scale, - mlib_s32 cmask); - -mlib_status mlib_conv7x7ext_s32(mlib_image *dst, - const mlib_image *src, - mlib_s32 dx_l, - mlib_s32 dx_r, - mlib_s32 dy_t, - mlib_s32 dy_b, - const mlib_s32 *kern, - mlib_s32 scale, - mlib_s32 cmask); - -mlib_status mlib_conv7x7ext_u16(mlib_image *dst, - const mlib_image *src, - mlib_s32 dx_l, - mlib_s32 dx_r, - mlib_s32 dy_t, - mlib_s32 dy_b, - const mlib_s32 *kern, - mlib_s32 scale, - mlib_s32 cmask); - -mlib_status mlib_conv7x7ext_u8(mlib_image *dst, - const mlib_image *src, - mlib_s32 dx_l, - mlib_s32 dx_r, - mlib_s32 dy_t, - mlib_s32 dy_b, - const mlib_s32 *kern, - mlib_s32 scale, - mlib_s32 cmask); - -mlib_status mlib_conv7x7nw_s16(mlib_image *dst, - const mlib_image *src, - const mlib_s32 *kern, - mlib_s32 scale, - mlib_s32 cmask); - -mlib_status mlib_conv7x7nw_s32(mlib_image *dst, - const mlib_image *src, - const mlib_s32 *kern, - mlib_s32 scale, - mlib_s32 cmask); - -mlib_status mlib_conv7x7nw_u16(mlib_image *dst, - const mlib_image *src, - const mlib_s32 *kern, - mlib_s32 scale, - mlib_s32 cmask); - -mlib_status mlib_conv7x7nw_u8(mlib_image *dst, - const mlib_image *src, - const mlib_s32 *kern, - mlib_s32 scale, - mlib_s32 cmask); - mlib_status mlib_convMxNext_s32(mlib_image *dst, const mlib_image *src, const mlib_s32 *kernel, diff -r 3e22d8fd4912 -r acea5f7d354b jdk/src/java.desktop/share/native/libmlib_image/mlib_ImageConv2x2_f.c --- a/jdk/src/java.desktop/share/native/libmlib_image/mlib_ImageConv2x2_f.c Thu May 12 11:03:07 2016 -0700 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,1118 +0,0 @@ -/* - * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 only, as - * published by the Free Software Foundation. Oracle designates this - * particular file as subject to the "Classpath" exception as provided - * by Oracle in the LICENSE file that accompanied this code. - * - * This code is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * version 2 for more details (a copy is included in the LICENSE file that - * accompanied this code). - * - * You should have received a copy of the GNU General Public License version - * 2 along with this work; if not, write to the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - * - * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA - * or visit www.oracle.com if you need additional information or have any - * questions. - */ - - -/* - * FUNCTION - * Internal functions for mlib_ImageConv2x2 on U8/S16/U16 types - * and MLIB_EDGE_DST_NO_WRITE mask. - */ - -#include "mlib_image.h" -#include "mlib_ImageConv.h" -#include "mlib_c_ImageConv.h" - -/***************************************************************/ -#ifdef i386 /* do not copy by mlib_d64 data type for x86 */ - -typedef struct { - mlib_s32 int0, int1; -} two_int; - -#define TYPE_64BIT two_int - -#else /* i386 */ - -#define TYPE_64BIT mlib_d64 - -#endif /* i386 ( do not copy by mlib_d64 data type for x86 ) */ - -/***************************************************************/ -#define LOAD_KERNEL_INTO_DOUBLE() \ - while (scalef_expon > 30) { \ - scalef /= (1 << 30); \ - scalef_expon -= 30; \ - } \ - \ - scalef /= (1 << scalef_expon); \ - \ - /* keep kernel in regs */ \ - k0 = scalef * kern[0]; k1 = scalef * kern[1]; k2 = scalef * kern[2]; \ - k3 = scalef * kern[3] - -/***************************************************************/ -#define GET_SRC_DST_PARAMETERS(type) \ - hgt = mlib_ImageGetHeight(src); \ - wid = mlib_ImageGetWidth(src); \ - nchannel = mlib_ImageGetChannels(src); \ - sll = mlib_ImageGetStride(src) / sizeof(type); \ - dll = mlib_ImageGetStride(dst) / sizeof(type); \ - adr_src = (type *)mlib_ImageGetData(src); \ - adr_dst = (type *)mlib_ImageGetData(dst) - -/***************************************************************/ -#ifndef MLIB_USE_FTOI_CLAMPING - -#define CLAMP_S32(x) \ - (((x) <= MLIB_S32_MIN) ? MLIB_S32_MIN : \ - (((x) >= MLIB_S32_MAX) ? MLIB_S32_MAX : (mlib_s32)(x))) - -#else - -#define CLAMP_S32(x) ((mlib_s32)(x)) - -#endif /* MLIB_USE_FTOI_CLAMPING */ - -/***************************************************************/ -#if defined(_LITTLE_ENDIAN) && !defined(_NO_LONGLONG) - -/* NB: Explicit cast to DTYPE is necessary to avoid warning from Microsoft VC compiler. - And we need to explicitly define cast behavior if source exceeds destination range. - (it is undefined according to C99 spec). We use mask here because this macro is typically - used to extract bit regions. */ - -#define STORE2(res0, res1) \ - dp[0 ] = (DTYPE) ((res1) & DTYPE_MASK); \ - dp[chan1] = (DTYPE) ((res0) & DTYPE_MASK) - -#else - -#define STORE2(res0, res1) \ - dp[0 ] = (DTYPE) ((res0) & DTYPE_MASK); \ - dp[chan1] = (DTYPE) ((res1) & DTYPE_MASK) - -#endif /* defined(_LITTLE_ENDIAN) && !defined(_NO_LONGLONG) */ - -/***************************************************************/ -#ifdef _NO_LONGLONG - -#define LOAD_BUFF(buff) \ - buff[i ] = sp[0]; \ - buff[i + 1] = sp[chan1] - -#else /* _NO_LONGLONG */ - -#ifdef _LITTLE_ENDIAN - -#define LOAD_BUFF(buff) \ - *(mlib_s64*)(buff + i) = (((mlib_s64)sp[chan1]) << 32) | ((mlib_s64)sp[0] & 0xffffffff) - -#else /* _LITTLE_ENDIAN */ - -#define LOAD_BUFF(buff) \ - *(mlib_s64*)(buff + i) = (((mlib_s64)sp[0]) << 32) | ((mlib_s64)sp[chan1] & 0xffffffff) - -#endif /* _LITTLE_ENDIAN */ - -#endif /* _NO_LONGLONG */ - -/***************************************************************/ -typedef union { - TYPE_64BIT d64; - struct { - mlib_s32 i0, i1; - } i32s; -} d64_2x32; - -/***************************************************************/ -#define D_KER 1 - -#define BUFF_LINE 256 - -/***************************************************************/ -#define XOR_80(x) x ^= 0x80 - -void mlib_ImageXor80_aa(mlib_u8 *dl, - mlib_s32 wid, - mlib_s32 hgt, - mlib_s32 str) -{ - mlib_u8 *dp, *dend; -#ifdef _NO_LONGLONG - mlib_u32 cadd = 0x80808080; -#else /* _NO_LONGLONG */ - mlib_u64 cadd = MLIB_U64_CONST(0x8080808080808080); -#endif /* _NO_LONGLONG */ - mlib_s32 j; - - if (wid == str) { - wid *= hgt; - hgt = 1; - } - - for (j = 0; j < hgt; j++) { - dend = dl + wid; - - for (dp = dl; ((mlib_addr)dp & 7) && (dp < dend); dp++) XOR_80(dp[0]); - -#ifdef __SUNPRO_C -#pragma pipeloop(0) -#endif /* __SUNPRO_C */ - for (; dp <= (dend - 8); dp += 8) { -#ifdef _NO_LONGLONG - *((mlib_s32*)dp) ^= cadd; - *((mlib_s32*)dp+1) ^= cadd; -#else /* _NO_LONGLONG */ - *((mlib_u64*)dp) ^= cadd; -#endif /* _NO_LONGLONG */ - } - - for (; (dp < dend); dp++) XOR_80(dp[0]); - - dl += str; - } -} - -/***************************************************************/ -void mlib_ImageXor80(mlib_u8 *dl, - mlib_s32 wid, - mlib_s32 hgt, - mlib_s32 str, - mlib_s32 nchan, - mlib_s32 cmask) -{ - mlib_s32 i, j, c; - - for (j = 0; j < hgt; j++) { - for (c = 0; c < nchan; c++) { - if (cmask & (1 << (nchan - 1 - c))) { - mlib_u8 *dp = dl + c; - -#ifdef __SUNPRO_C -#pragma pipeloop(0) -#endif /* __SUNPRO_C */ - for (i = 0; i < wid; i++) XOR_80(dp[i*nchan]); - } - } - - dl += str; - } -} - -/***************************************************************/ -#define DTYPE mlib_s16 -#define DTYPE_MASK 0xffff - -mlib_status mlib_c_conv2x2nw_s16(mlib_image *dst, - const mlib_image *src, - const mlib_s32 *kern, - mlib_s32 scalef_expon, - mlib_s32 cmask) -{ - mlib_d64 buff_arr[2*BUFF_LINE]; - mlib_s32 *pbuff = (mlib_s32*)buff_arr, *buffo, *buff0, *buff1, *buff2, *buffT; - DTYPE *adr_src, *sl, *sp, *sl1; - DTYPE *adr_dst, *dl, *dp; - mlib_d64 k0, k1, k2, k3, scalef = 65536.0; - mlib_d64 p00, p01, p02, - p10, p11, p12; - mlib_s32 wid, hgt, sll, dll, wid1; - mlib_s32 nchannel, chan1, chan2; - mlib_s32 i, j, c; - LOAD_KERNEL_INTO_DOUBLE(); - GET_SRC_DST_PARAMETERS(DTYPE); - - wid1 = (wid + 1) &~ 1; - - if (wid1 > BUFF_LINE) { - pbuff = mlib_malloc(4*sizeof(mlib_s32)*wid1); - - if (pbuff == NULL) return MLIB_FAILURE; - } - - buffo = pbuff; - buff0 = buffo + wid1; - buff1 = buff0 + wid1; - buff2 = buff1 + wid1; - - chan1 = nchannel; - chan2 = chan1 + chan1; - - wid -= D_KER; - hgt -= D_KER; - - for (c = 0; c < nchannel; c++) { - if (!(cmask & (1 << (nchannel - 1 - c)))) continue; - - sl = adr_src + c; - dl = adr_dst + c; - - sl1 = sl + sll; -#ifdef __SUNPRO_C -#pragma pipeloop(0) -#endif /* __SUNPRO_C */ - for (i = 0; i < wid + D_KER; i++) { - buff0[i - 1] = (mlib_s32)sl[i*chan1]; - buff1[i - 1] = (mlib_s32)sl1[i*chan1]; - } - - sl += (D_KER + 1)*sll; - - for (j = 0; j < hgt; j++) { - sp = sl; - dp = dl; - - buff2[-1] = (mlib_s32)sp[0]; - sp += chan1; - - p02 = buff0[-1]; - p12 = buff1[-1]; - -#ifdef __SUNPRO_C -#pragma pipeloop(0) -#endif /* __SUNPRO_C */ - for (i = 0; i <= (wid - 2); i += 2) { -#ifdef _NO_LONGLONG - mlib_s32 o64_1, o64_2; -#else /* _NO_LONGLONG */ - mlib_s64 o64; -#endif /* _NO_LONGLONG */ - d64_2x32 sd0, sd1, dd; - - p00 = p02; p10 = p12; - - sd0.d64 = *(TYPE_64BIT*)(buff0 + i); - sd1.d64 = *(TYPE_64BIT*)(buff1 + i); - p01 = (mlib_d64)sd0.i32s.i0; - p02 = (mlib_d64)sd0.i32s.i1; - p11 = (mlib_d64)sd1.i32s.i0; - p12 = (mlib_d64)sd1.i32s.i1; - - LOAD_BUFF(buff2); - - dd.i32s.i0 = CLAMP_S32(p00 * k0 + p01 * k1 + p10 * k2 + p11 * k3); - dd.i32s.i1 = CLAMP_S32(p01 * k0 + p02 * k1 + p11 * k2 + p12 * k3); - *(TYPE_64BIT*)(buffo + i) = dd.d64; - -#ifdef _NO_LONGLONG - - o64_1 = buffo[i]; - o64_2 = buffo[i+1]; - STORE2(o64_1 >> 16, o64_2 >> 16); - -#else /* _NO_LONGLONG */ - - o64 = *(mlib_s64*)(buffo + i); - STORE2(o64 >> 48, o64 >> 16); - -#endif /* _NO_LONGLONG */ - - sp += chan2; - dp += chan2; - } - - for (; i < wid; i++) { - p00 = buff0[i - 1]; p10 = buff1[i - 1]; - p01 = buff0[i]; p11 = buff1[i]; - - buff2[i] = (mlib_s32)sp[0]; - - buffo[i] = CLAMP_S32(p00 * k0 + p01 * k1 + p10 * k2 + p11 * k3); - dp[0] = buffo[i] >> 16; - - sp += chan1; - dp += chan1; - } - - sl += sll; - dl += dll; - - buffT = buff0; - buff0 = buff1; - buff1 = buff2; - buff2 = buffT; - } - } - - if (pbuff != (mlib_s32*)buff_arr) mlib_free(pbuff); - - return MLIB_SUCCESS; -} - -/***************************************************************/ -mlib_status mlib_c_conv2x2ext_s16(mlib_image *dst, - const mlib_image *src, - mlib_s32 dx_l, - mlib_s32 dx_r, - mlib_s32 dy_t, - mlib_s32 dy_b, - const mlib_s32 *kern, - mlib_s32 scalef_expon, - mlib_s32 cmask) -{ - mlib_d64 buff_arr[2*BUFF_LINE]; - mlib_s32 *pbuff = (mlib_s32*)buff_arr, *buffo, *buff0, *buff1, *buff2, *buffT; - DTYPE *adr_src, *sl, *sp, *sl1; - DTYPE *adr_dst, *dl, *dp; - mlib_d64 k0, k1, k2, k3, scalef = 65536.0; - mlib_d64 p00, p01, p02, - p10, p11, p12; - mlib_s32 wid, hgt, sll, dll, wid1; - mlib_s32 nchannel, chan1, chan2; - mlib_s32 i, j, c, swid; - LOAD_KERNEL_INTO_DOUBLE(); - GET_SRC_DST_PARAMETERS(DTYPE); - - swid = wid + D_KER; - - wid1 = (swid + 1) &~ 1; - - if (wid1 > BUFF_LINE) { - pbuff = mlib_malloc(4*sizeof(mlib_s32)*wid1); - - if (pbuff == NULL) return MLIB_FAILURE; - } - - buffo = pbuff; - buff0 = buffo + wid1; - buff1 = buff0 + wid1; - buff2 = buff1 + wid1; - - swid -= dx_r; - - chan1 = nchannel; - chan2 = chan1 + chan1; - - for (c = 0; c < nchannel; c++) { - if (!(cmask & (1 << (nchannel - 1 - c)))) continue; - - sl = adr_src + c; - dl = adr_dst + c; - - if ((hgt - dy_b) > 0) sl1 = sl + sll; - else sl1 = sl; - -#ifdef __SUNPRO_C -#pragma pipeloop(0) -#endif /* __SUNPRO_C */ - for (i = 0; i < swid; i++) { - buff0[i - 1] = (mlib_s32)sl[i*chan1]; - buff1[i - 1] = (mlib_s32)sl1[i*chan1]; - } - - if (dx_r != 0) { - buff0[swid - 1] = buff0[swid - 2]; - buff1[swid - 1] = buff1[swid - 2]; - } - - if ((hgt - dy_b) > 1) sl = sl1 + sll; - else sl = sl1; - - for (j = 0; j < hgt; j++) { - sp = sl; - dp = dl; - - buff2[-1] = (mlib_s32)sp[0]; - sp += chan1; - - p02 = buff0[-1]; - p12 = buff1[-1]; - -#ifdef __SUNPRO_C -#pragma pipeloop(0) -#endif /* __SUNPRO_C */ - for (i = 0; i <= (wid - 2); i += 2) { -#ifdef _NO_LONGLONG - mlib_s32 o64_1, o64_2; -#else /* _NO_LONGLONG */ - mlib_s64 o64; -#endif /* _NO_LONGLONG */ - d64_2x32 sd0, sd1, dd; - - p00 = p02; p10 = p12; - - sd0.d64 = *(TYPE_64BIT*)(buff0 + i); - sd1.d64 = *(TYPE_64BIT*)(buff1 + i); - p01 = (mlib_d64)sd0.i32s.i0; - p02 = (mlib_d64)sd0.i32s.i1; - p11 = (mlib_d64)sd1.i32s.i0; - p12 = (mlib_d64)sd1.i32s.i1; - - LOAD_BUFF(buff2); - - dd.i32s.i0 = CLAMP_S32(p00 * k0 + p01 * k1 + p10 * k2 + p11 * k3); - dd.i32s.i1 = CLAMP_S32(p01 * k0 + p02 * k1 + p11 * k2 + p12 * k3); - *(TYPE_64BIT*)(buffo + i) = dd.d64; - -#ifdef _NO_LONGLONG - - o64_1 = buffo[i]; - o64_2 = buffo[i+1]; - STORE2(o64_1 >> 16, o64_2 >> 16); - -#else /* _NO_LONGLONG */ - - o64 = *(mlib_s64*)(buffo + i); - STORE2(o64 >> 48, o64 >> 16); - -#endif /* _NO_LONGLONG */ - - sp += chan2; - dp += chan2; - } - - for (; i < wid; i++) { - p00 = buff0[i - 1]; p10 = buff1[i - 1]; - p01 = buff0[i]; p11 = buff1[i]; - - buff2[i] = (mlib_s32)sp[0]; - - buffo[i] = CLAMP_S32(p00 * k0 + p01 * k1 + p10 * k2 + p11 * k3); - dp[0] = buffo[i] >> 16; - - sp += chan1; - dp += chan1; - } - - if (dx_r != 0) buff2[swid - 1] = buff2[swid - 2]; - - if (j < hgt - dy_b - 2) sl += sll; - dl += dll; - - buffT = buff0; - buff0 = buff1; - buff1 = buff2; - buff2 = buffT; - } - } - - if (pbuff != (mlib_s32*)buff_arr) mlib_free(pbuff); - - return MLIB_SUCCESS; -} - -/***************************************************************/ -#undef DTYPE -#define DTYPE mlib_u16 - -mlib_status mlib_c_conv2x2nw_u16(mlib_image *dst, - const mlib_image *src, - const mlib_s32 *kern, - mlib_s32 scalef_expon, - mlib_s32 cmask) -{ - mlib_d64 buff_arr[2*BUFF_LINE]; - mlib_s32 *pbuff = (mlib_s32*)buff_arr, *buffo, *buff0, *buff1, *buff2, *buffT; - DTYPE *adr_src, *sl, *sp, *sl1; - DTYPE *adr_dst, *dl, *dp; - mlib_d64 k0, k1, k2, k3, scalef = 65536.0; - mlib_d64 p00, p01, p02, - p10, p11, p12; - mlib_s32 wid, hgt, sll, dll, wid1; - mlib_s32 nchannel, chan1, chan2; - mlib_s32 i, j, c; - mlib_d64 doff = 0x7FFF8000; - LOAD_KERNEL_INTO_DOUBLE(); - GET_SRC_DST_PARAMETERS(DTYPE); - - wid1 = (wid + 1) &~ 1; - - if (wid1 > BUFF_LINE) { - pbuff = mlib_malloc(4*sizeof(mlib_s32)*wid1); - - if (pbuff == NULL) return MLIB_FAILURE; - } - - buffo = pbuff; - buff0 = buffo + wid1; - buff1 = buff0 + wid1; - buff2 = buff1 + wid1; - - chan1 = nchannel; - chan2 = chan1 + chan1; - - wid -= D_KER; - hgt -= D_KER; - - for (c = 0; c < nchannel; c++) { - if (!(cmask & (1 << (nchannel - 1 - c)))) continue; - - sl = adr_src + c; - dl = adr_dst + c; - - sl1 = sl + sll; -#ifdef __SUNPRO_C -#pragma pipeloop(0) -#endif /* __SUNPRO_C */ - for (i = 0; i < wid + D_KER; i++) { - buff0[i - 1] = (mlib_s32)sl[i*chan1]; - buff1[i - 1] = (mlib_s32)sl1[i*chan1]; - } - - sl += (D_KER + 1)*sll; - - for (j = 0; j < hgt; j++) { - sp = sl; - dp = dl; - - buff2[-1] = (mlib_s32)sp[0]; - sp += chan1; - - p02 = buff0[-1]; - p12 = buff1[-1]; - -#ifdef __SUNPRO_C -#pragma pipeloop(0) -#endif /* __SUNPRO_C */ - for (i = 0; i <= (wid - 2); i += 2) { -#ifdef _NO_LONGLONG - mlib_s32 o64_1, o64_2; -#else /* _NO_LONGLONG */ - mlib_s64 o64; -#endif /* _NO_LONGLONG */ - d64_2x32 sd0, sd1, dd; - - p00 = p02; p10 = p12; - - sd0.d64 = *(TYPE_64BIT*)(buff0 + i); - sd1.d64 = *(TYPE_64BIT*)(buff1 + i); - p01 = (mlib_d64)sd0.i32s.i0; - p02 = (mlib_d64)sd0.i32s.i1; - p11 = (mlib_d64)sd1.i32s.i0; - p12 = (mlib_d64)sd1.i32s.i1; - - LOAD_BUFF(buff2); - - dd.i32s.i0 = CLAMP_S32(p00 * k0 + p01 * k1 + p10 * k2 + p11 * k3 - doff); - dd.i32s.i1 = CLAMP_S32(p01 * k0 + p02 * k1 + p11 * k2 + p12 * k3 - doff); - *(TYPE_64BIT*)(buffo + i) = dd.d64; - -#ifdef _NO_LONGLONG - - o64_1 = buffo[i]; - o64_2 = buffo[i+1]; - o64_1 = o64_1 ^ 0x80000000U; - o64_2 = o64_2 ^ 0x80000000U; - STORE2(o64_1 >> 16, o64_2 >> 16); - -#else /* _NO_LONGLONG */ - - o64 = *(mlib_s64*)(buffo + i); - o64 = o64 ^ MLIB_U64_CONST(0x8000000080000000); - STORE2(o64 >> 48, o64 >> 16); - -#endif /* _NO_LONGLONG */ - - sp += chan2; - dp += chan2; - } - - for (; i < wid; i++) { - p00 = buff0[i - 1]; p10 = buff1[i - 1]; - p01 = buff0[i]; p11 = buff1[i]; - - buff2[i] = (mlib_s32)sp[0]; - - buffo[i] = CLAMP_S32(p00 * k0 + p01 * k1 + p10 * k2 + p11 * k3 - doff); - dp[0] = (buffo[i] >> 16) ^ 0x8000; - - sp += chan1; - dp += chan1; - } - - sl += sll; - dl += dll; - - buffT = buff0; - buff0 = buff1; - buff1 = buff2; - buff2 = buffT; - } - } - - if (pbuff != (mlib_s32*)buff_arr) mlib_free(pbuff); - - return MLIB_SUCCESS; -} - -/***************************************************************/ -mlib_status mlib_c_conv2x2ext_u16(mlib_image *dst, - const mlib_image *src, - mlib_s32 dx_l, - mlib_s32 dx_r, - mlib_s32 dy_t, - mlib_s32 dy_b, - const mlib_s32 *kern, - mlib_s32 scalef_expon, - mlib_s32 cmask) -{ - mlib_d64 buff_arr[2*BUFF_LINE]; - mlib_s32 *pbuff = (mlib_s32*)buff_arr, *buffo, *buff0, *buff1, *buff2, *buffT; - DTYPE *adr_src, *sl, *sp, *sl1; - DTYPE *adr_dst, *dl, *dp; - mlib_d64 k0, k1, k2, k3, scalef = 65536.0; - mlib_d64 p00, p01, p02, - p10, p11, p12; - mlib_s32 wid, hgt, sll, dll, wid1; - mlib_s32 nchannel, chan1, chan2; - mlib_s32 i, j, c, swid; - mlib_d64 doff = 0x7FFF8000; - LOAD_KERNEL_INTO_DOUBLE(); - GET_SRC_DST_PARAMETERS(DTYPE); - - swid = wid + D_KER; - - wid1 = (swid + 1) &~ 1; - - if (wid1 > BUFF_LINE) { - pbuff = mlib_malloc(4*sizeof(mlib_s32)*wid1); - - if (pbuff == NULL) return MLIB_FAILURE; - } - - buffo = pbuff; - buff0 = buffo + wid1; - buff1 = buff0 + wid1; - buff2 = buff1 + wid1; - - swid -= dx_r; - - chan1 = nchannel; - chan2 = chan1 + chan1; - - for (c = 0; c < nchannel; c++) { - if (!(cmask & (1 << (nchannel - 1 - c)))) continue; - - sl = adr_src + c; - dl = adr_dst + c; - - if ((hgt - dy_b) > 0) sl1 = sl + sll; - else sl1 = sl; - -#ifdef __SUNPRO_C -#pragma pipeloop(0) -#endif /* __SUNPRO_C */ - for (i = 0; i < swid; i++) { - buff0[i - 1] = (mlib_s32)sl[i*chan1]; - buff1[i - 1] = (mlib_s32)sl1[i*chan1]; - } - - if (dx_r != 0) { - buff0[swid - 1] = buff0[swid - 2]; - buff1[swid - 1] = buff1[swid - 2]; - } - - if ((hgt - dy_b) > 1) sl = sl1 + sll; - else sl = sl1; - - for (j = 0; j < hgt; j++) { - sp = sl; - dp = dl; - - buff2[-1] = (mlib_s32)sp[0]; - sp += chan1; - - p02 = buff0[-1]; - p12 = buff1[-1]; - -#ifdef __SUNPRO_C -#pragma pipeloop(0) -#endif /* __SUNPRO_C */ - for (i = 0; i <= (wid - 2); i += 2) { -#ifdef _NO_LONGLONG - mlib_s32 o64_1, o64_2; -#else /* _NO_LONGLONG */ - mlib_s64 o64; -#endif /* _NO_LONGLONG */ - d64_2x32 sd0, sd1, dd; - - p00 = p02; p10 = p12; - - sd0.d64 = *(TYPE_64BIT*)(buff0 + i); - sd1.d64 = *(TYPE_64BIT*)(buff1 + i); - p01 = (mlib_d64)sd0.i32s.i0; - p02 = (mlib_d64)sd0.i32s.i1; - p11 = (mlib_d64)sd1.i32s.i0; - p12 = (mlib_d64)sd1.i32s.i1; - - LOAD_BUFF(buff2); - - dd.i32s.i0 = CLAMP_S32(p00 * k0 + p01 * k1 + p10 * k2 + p11 * k3 - doff); - dd.i32s.i1 = CLAMP_S32(p01 * k0 + p02 * k1 + p11 * k2 + p12 * k3 - doff); - *(TYPE_64BIT*)(buffo + i) = dd.d64; - -#ifdef _NO_LONGLONG - - o64_1 = buffo[i]; - o64_2 = buffo[i+1]; - o64_1 = o64_1 ^ 0x80000000U; - o64_2 = o64_2 ^ 0x80000000U; - STORE2(o64_1 >> 16, o64_2 >> 16); - -#else /* _NO_LONGLONG */ - - o64 = *(mlib_s64*)(buffo + i); - o64 = o64 ^ MLIB_U64_CONST(0x8000000080000000); - STORE2(o64 >> 48, o64 >> 16); - -#endif /* _NO_LONGLONG */ - - sp += chan2; - dp += chan2; - } - - for (; i < wid; i++) { - p00 = buff0[i - 1]; p10 = buff1[i - 1]; - p01 = buff0[i]; p11 = buff1[i]; - - buff2[i] = (mlib_s32)sp[0]; - - buffo[i] = CLAMP_S32(p00 * k0 + p01 * k1 + p10 * k2 + p11 * k3 - doff); - dp[0] = (buffo[i] >> 16) ^ 0x8000; - - sp += chan1; - dp += chan1; - } - - if (dx_r != 0) buff2[swid - 1] = buff2[swid - 2]; - - if (j < hgt - dy_b - 2) sl += sll; - dl += dll; - - buffT = buff0; - buff0 = buff1; - buff1 = buff2; - buff2 = buffT; - } - } - - if (pbuff != (mlib_s32*)buff_arr) mlib_free(pbuff); - - return MLIB_SUCCESS; -} - -/***************************************************************/ -#undef DTYPE -#define DTYPE mlib_u8 - -mlib_status mlib_c_conv2x2nw_u8(mlib_image *dst, - const mlib_image *src, - const mlib_s32 *kern, - mlib_s32 scalef_expon, - mlib_s32 cmask) -{ - mlib_d64 buff_arr[2*BUFF_LINE]; - mlib_s32 *pbuff = (mlib_s32*)buff_arr, *buffo, *buff0, *buff1, *buff2, *buffT; - DTYPE *adr_src, *sl, *sp, *sl1; - DTYPE *adr_dst, *dl, *dp; - mlib_d64 k0, k1, k2, k3, scalef = (1 << 24); - mlib_d64 p00, p01, p02, - p10, p11, p12; - mlib_s32 wid, hgt, sll, dll, wid1; - mlib_s32 nchannel, chan1, chan2; - mlib_s32 i, j, c; - LOAD_KERNEL_INTO_DOUBLE(); - GET_SRC_DST_PARAMETERS(DTYPE); - - wid1 = (wid + 1) &~ 1; - - if (wid1 > BUFF_LINE) { - pbuff = mlib_malloc(4*sizeof(mlib_s32)*wid1); - - if (pbuff == NULL) return MLIB_FAILURE; - } - - buffo = pbuff; - buff0 = buffo + wid1; - buff1 = buff0 + wid1; - buff2 = buff1 + wid1; - - chan1 = nchannel; - chan2 = chan1 + chan1; - - wid -= D_KER; - hgt -= D_KER; - - for (c = 0; c < nchannel; c++) { - if (!(cmask & (1 << (nchannel - 1 - c)))) continue; - - sl = adr_src + c; - dl = adr_dst + c; - - sl1 = sl + sll; -#ifdef __SUNPRO_C -#pragma pipeloop(0) -#endif /* __SUNPRO_C */ - for (i = 0; i < wid + D_KER; i++) { - buff0[i - 1] = (mlib_s32)sl[i*chan1]; - buff1[i - 1] = (mlib_s32)sl1[i*chan1]; - } - - sl += (D_KER + 1)*sll; - - for (j = 0; j < hgt; j++) { - sp = sl; - dp = dl; - - buff2[-1] = (mlib_s32)sp[0]; - sp += chan1; - - p02 = buff0[-1]; - p12 = buff1[-1]; - -#ifdef __SUNPRO_C -#pragma pipeloop(0) -#endif /* __SUNPRO_C */ - for (i = 0; i <= (wid - 2); i += 2) { -#ifdef _NO_LONGLONG - mlib_s32 o64_1, o64_2; -#else /* _NO_LONGLONG */ - mlib_s64 o64; -#endif /* _NO_LONGLONG */ - d64_2x32 sd0, sd1, dd; - - p00 = p02; p10 = p12; - - sd0.d64 = *(TYPE_64BIT*)(buff0 + i); - sd1.d64 = *(TYPE_64BIT*)(buff1 + i); - p01 = (mlib_d64)sd0.i32s.i0; - p02 = (mlib_d64)sd0.i32s.i1; - p11 = (mlib_d64)sd1.i32s.i0; - p12 = (mlib_d64)sd1.i32s.i1; - - LOAD_BUFF(buff2); - - dd.i32s.i0 = CLAMP_S32(p00 * k0 + p01 * k1 + p10 * k2 + p11 * k3 - (1u << 31)); - dd.i32s.i1 = CLAMP_S32(p01 * k0 + p02 * k1 + p11 * k2 + p12 * k3 - (1u << 31)); - *(TYPE_64BIT*)(buffo + i) = dd.d64; - -#ifdef _NO_LONGLONG - - o64_1 = buffo[i]; - o64_2 = buffo[i+1]; - STORE2(o64_1 >> 24, o64_2 >> 24); - -#else /* _NO_LONGLONG */ - - o64 = *(mlib_s64*)(buffo + i); - STORE2(o64 >> 56, o64 >> 24); - -#endif /* _NO_LONGLONG */ - - sp += chan2; - dp += chan2; - } - - for (; i < wid; i++) { - p00 = buff0[i - 1]; p10 = buff1[i - 1]; - p01 = buff0[i]; p11 = buff1[i]; - - buff2[i] = (mlib_s32)sp[0]; - - buffo[i] = CLAMP_S32(p00 * k0 + p01 * k1 + p10 * k2 + p11 * k3 - (1u << 31)); - dp[0] = (buffo[i] >> 24); - - sp += chan1; - dp += chan1; - } - - sl += sll; - dl += dll; - - buffT = buff0; - buff0 = buff1; - buff1 = buff2; - buff2 = buffT; - } - } - - { - mlib_s32 amask = (1 << nchannel) - 1; - - if ((cmask & amask) != amask) { - mlib_ImageXor80(adr_dst, wid, hgt, dll, nchannel, cmask); - } else { - mlib_ImageXor80_aa(adr_dst, wid*nchannel, hgt, dll); - } - } - - if (pbuff != (mlib_s32*)buff_arr) mlib_free(pbuff); - - return MLIB_SUCCESS; -} - -/***************************************************************/ -mlib_status mlib_c_conv2x2ext_u8(mlib_image *dst, - const mlib_image *src, - mlib_s32 dx_l, - mlib_s32 dx_r, - mlib_s32 dy_t, - mlib_s32 dy_b, - const mlib_s32 *kern, - mlib_s32 scalef_expon, - mlib_s32 cmask) -{ - mlib_d64 buff_arr[4*BUFF_LINE]; - mlib_s32 *pbuff = (mlib_s32*)buff_arr, *buffo, *buff0, *buff1, *buff2, *buffT; - DTYPE *adr_src, *sl, *sp, *sl1; - DTYPE *adr_dst, *dl, *dp; - mlib_d64 k0, k1, k2, k3, scalef = (1 << 24); - mlib_d64 p00, p01, p02, - p10, p11, p12; - mlib_s32 wid, hgt, sll, dll, wid1; - mlib_s32 nchannel, chan1, chan2; - mlib_s32 i, j, c, swid; - LOAD_KERNEL_INTO_DOUBLE(); - GET_SRC_DST_PARAMETERS(DTYPE); - - swid = wid + D_KER; - - wid1 = (swid + 1) &~ 1; - - if (wid1 > BUFF_LINE) { - pbuff = mlib_malloc(4*sizeof(mlib_s32)*wid1); - - if (pbuff == NULL) return MLIB_FAILURE; - } - - buffo = pbuff; - buff0 = buffo + wid1; - buff1 = buff0 + wid1; - buff2 = buff1 + wid1; - - chan1 = nchannel; - chan2 = chan1 + chan1; - - swid -= dx_r; - - for (c = 0; c < nchannel; c++) { - if (!(cmask & (1 << (nchannel - 1 - c)))) continue; - - sl = adr_src + c; - dl = adr_dst + c; - - if ((hgt - dy_b) > 0) sl1 = sl + sll; - else sl1 = sl; - -#ifdef __SUNPRO_C -#pragma pipeloop(0) -#endif /* __SUNPRO_C */ - for (i = 0; i < swid; i++) { - buff0[i - 1] = (mlib_s32)sl[i*chan1]; - buff1[i - 1] = (mlib_s32)sl1[i*chan1]; - } - - if (dx_r != 0) { - buff0[swid - 1] = buff0[swid - 2]; - buff1[swid - 1] = buff1[swid - 2]; - } - - if ((hgt - dy_b) > 1) sl = sl1 + sll; - else sl = sl1; - - for (j = 0; j < hgt; j++) { - sp = sl; - dp = dl; - - buff2[-1] = (mlib_s32)sp[0]; - sp += chan1; - - p02 = buff0[-1]; - p12 = buff1[-1]; - -#ifdef __SUNPRO_C -#pragma pipeloop(0) -#endif /* __SUNPRO_C */ - for (i = 0; i <= (wid - 2); i += 2) { -#ifdef _NO_LONGLONG - mlib_s32 o64_1, o64_2; -#else /* _NO_LONGLONG */ - mlib_s64 o64; -#endif /* _NO_LONGLONG */ - d64_2x32 sd0, sd1, dd; - - p00 = p02; p10 = p12; - - sd0.d64 = *(TYPE_64BIT*)(buff0 + i); - sd1.d64 = *(TYPE_64BIT*)(buff1 + i); - p01 = (mlib_d64)sd0.i32s.i0; - p02 = (mlib_d64)sd0.i32s.i1; - p11 = (mlib_d64)sd1.i32s.i0; - p12 = (mlib_d64)sd1.i32s.i1; - - LOAD_BUFF(buff2); - - dd.i32s.i0 = CLAMP_S32(p00 * k0 + p01 * k1 + p10 * k2 + p11 * k3 - (1u << 31)); - dd.i32s.i1 = CLAMP_S32(p01 * k0 + p02 * k1 + p11 * k2 + p12 * k3 - (1u << 31)); - *(TYPE_64BIT*)(buffo + i) = dd.d64; - -#ifdef _NO_LONGLONG - - o64_1 = buffo[i]; - o64_2 = buffo[i+1]; - STORE2(o64_1 >> 24, o64_2 >> 24); - -#else /* _NO_LONGLONG */ - - o64 = *(mlib_s64*)(buffo + i); - STORE2(o64 >> 56, o64 >> 24); - -#endif /* _NO_LONGLONG */ - - sp += chan2; - dp += chan2; - } - - for (; i < wid; i++) { - p00 = buff0[i - 1]; p10 = buff1[i - 1]; - p01 = buff0[i]; p11 = buff1[i]; - - buff2[i] = (mlib_s32)sp[0]; - - buffo[i] = CLAMP_S32(p00 * k0 + p01 * k1 + p10 * k2 + p11 * k3 - (1u << 31)); - dp[0] = (buffo[i] >> 24); - - sp += chan1; - dp += chan1; - } - - if (dx_r != 0) buff2[swid - 1] = buff2[swid - 2]; - - if (j < hgt - dy_b - 2) sl += sll; - dl += dll; - - buffT = buff0; - buff0 = buff1; - buff1 = buff2; - buff2 = buffT; - } - } - - { - mlib_s32 amask = (1 << nchannel) - 1; - - if ((cmask & amask) != amask) { - mlib_ImageXor80(adr_dst, wid, hgt, dll, nchannel, cmask); - } else { - mlib_ImageXor80_aa(adr_dst, wid*nchannel, hgt, dll); - } - } - - if (pbuff != (mlib_s32*)buff_arr) mlib_free(pbuff); - - return MLIB_SUCCESS; -} - -/***************************************************************/ diff -r 3e22d8fd4912 -r acea5f7d354b jdk/src/java.desktop/share/native/libmlib_image/mlib_ImageConv_16ext.c --- a/jdk/src/java.desktop/share/native/libmlib_image/mlib_ImageConv_16ext.c Thu May 12 11:03:07 2016 -0700 +++ b/jdk/src/java.desktop/share/native/libmlib_image/mlib_ImageConv_16ext.c Fri May 13 11:31:05 2016 +0300 @@ -80,9 +80,6 @@ #endif /* IMG_TYPE == 1 */ /***************************************************************/ -#define KSIZE1 (KSIZE - 1) - -/***************************************************************/ #define PARAM \ mlib_image *dst, \ const mlib_image *src, \ @@ -163,9 +160,6 @@ #endif /* _NO_LONGLONG */ /***************************************************************/ -#define MLIB_D2_24 16777216.0f - -/***************************************************************/ typedef union { mlib_d64 d64; struct { @@ -175,52 +169,6 @@ } d64_2x32; /***************************************************************/ -#define BUFF_LINE 256 - -/***************************************************************/ -#define DEF_VARS(type) \ - type *adr_src, *sl, *sp, *sl1; \ - type *adr_dst, *dl, *dp; \ - FTYPE *pbuff = buff; \ - mlib_s32 *buffi, *buffo; \ - mlib_s32 wid, hgt, sll, dll; \ - mlib_s32 nchannel, chan1, chan2; \ - mlib_s32 i, j, c, swid - -/***************************************************************/ -#define LOAD_KERNEL3() \ - FTYPE scalef = DSCALE; \ - FTYPE k0, k1, k2, k3, k4, k5, k6, k7, k8; \ - FTYPE p00, p01, p02, p03, \ - p10, p11, p12, p13, \ - p20, p21, p22, p23; \ - \ - while (scalef_expon > 30) { \ - scalef /= (1 << 30); \ - scalef_expon -= 30; \ - } \ - \ - scalef /= (1 << scalef_expon); \ - \ - /* keep kernel in regs */ \ - k0 = scalef * kern[0]; k1 = scalef * kern[1]; k2 = scalef * kern[2]; \ - k3 = scalef * kern[3]; k4 = scalef * kern[4]; k5 = scalef * kern[5]; \ - k6 = scalef * kern[6]; k7 = scalef * kern[7]; k8 = scalef * kern[8] - -/***************************************************************/ -#define LOAD_KERNEL(SIZE) \ - FTYPE scalef = DSCALE; \ - \ - while (scalef_expon > 30) { \ - scalef /= (1 << 30); \ - scalef_expon -= 30; \ - } \ - \ - scalef /= (1 << scalef_expon); \ - \ - for (j = 0; j < SIZE; j++) k[j] = scalef * kern[j] - -/***************************************************************/ #define GET_SRC_DST_PARAMETERS(type) \ hgt = mlib_ImageGetHeight(src); \ wid = mlib_ImageGetWidth(src); \ @@ -278,1334 +226,6 @@ #endif /* __sparc */ /***************************************************************/ -#define KSIZE 3 - -mlib_status CONV_FUNC(3x3) -{ - FTYPE buff[(KSIZE + 2)*BUFF_LINE], *buff0, *buff1, *buff2, *buff3, *buffT; - DEF_VARS(DTYPE); - DTYPE *sl2; -#ifndef __sparc - mlib_s32 d0, d1; -#endif /* __sparc */ - LOAD_KERNEL3(); - GET_SRC_DST_PARAMETERS(DTYPE); - - swid = wid + KSIZE1; - - if (swid > BUFF_LINE) { - pbuff = mlib_malloc((KSIZE + 2)*sizeof(FTYPE )*swid); - - if (pbuff == NULL) return MLIB_FAILURE; - } - - buff0 = pbuff; - buff1 = buff0 + swid; - buff2 = buff1 + swid; - buff3 = buff2 + swid; - buffo = (mlib_s32*)(buff3 + swid); - buffi = buffo + (swid &~ 1); - - swid -= (dx_l + dx_r); - - chan1 = nchannel; - chan2 = chan1 + chan1; - - for (c = 0; c < nchannel; c++) { - if (!(cmask & (1 << (nchannel - 1 - c)))) continue; - - sl = adr_src + c; - dl = adr_dst + c; - - if ((1 > dy_t) && (1 < hgt + KSIZE1 - dy_b)) sl1 = sl + sll; - else sl1 = sl; - - if ((hgt - dy_b) > 0) sl2 = sl1 + sll; - else sl2 = sl1; - - for (i = 0; i < dx_l; i++) { - buff0[i] = (FTYPE)sl[0]; - buff1[i] = (FTYPE)sl1[0]; - buff2[i] = (FTYPE)sl2[0]; - } - -#ifdef __SUNPRO_C -#pragma pipeloop(0) -#endif /* __SUNPRO_C */ - for (i = 0; i < swid; i++) { - buff0[i + dx_l] = (FTYPE)sl[i*chan1]; - buff1[i + dx_l] = (FTYPE)sl1[i*chan1]; - buff2[i + dx_l] = (FTYPE)sl2[i*chan1]; - } - - for (i = 0; i < dx_r; i++) { - buff0[swid + dx_l + i] = buff0[swid + dx_l - 1]; - buff1[swid + dx_l + i] = buff1[swid + dx_l - 1]; - buff2[swid + dx_l + i] = buff2[swid + dx_l - 1]; - } - - if ((hgt - dy_b) > 1) sl = sl2 + sll; - else sl = sl2; - - for (j = 0; j < hgt; j++) { - FTYPE s0, s1; - - p02 = buff0[0]; - p12 = buff1[0]; - p22 = buff2[0]; - - p03 = buff0[1]; - p13 = buff1[1]; - p23 = buff2[1]; - - s0 = p02 * k0 + p03 * k1 + p12 * k3 + p13 * k4 + p22 * k6 + p23 * k7; - s1 = p03 * k0 + p13 * k3 + p23 * k6; - - sp = sl; - dp = dl; - -#ifdef __SUNPRO_C -#pragma pipeloop(0) -#endif /* __SUNPRO_C */ - for (i = 0; i <= (wid - 2); i += 2) { -#ifdef __sparc -#ifdef _NO_LONGLONG - mlib_s32 o64_1, o64_2; -#else /* _NO_LONGLONG */ - mlib_s64 o64; -#endif /* _NO_LONGLONG */ -#endif /* __sparc */ - d64_2x32 dd; - - p02 = buff0[i + 2]; p12 = buff1[i + 2]; p22 = buff2[i + 2]; - p03 = buff0[i + 3]; p13 = buff1[i + 3]; p23 = buff2[i + 3]; - - LOAD_BUFF(buffi); - - dd.d64 = *(FTYPE *)(buffi + i); - buff3[i + dx_l ] = (FTYPE)dd.i32s.i0; - buff3[i + dx_l + 1] = (FTYPE)dd.i32s.i1; - -#ifndef __sparc - - d0 = D2I(s0 + p02 * k2 + p12 * k5 + p22 * k8); - d1 = D2I(s1 + p02 * k1 + p03 * k2 + p12 * k4 + p13 * k5 + p22 * k7 + p23 * k8); - - s0 = p02 * k0 + p03 * k1 + p12 * k3 + p13 * k4 + p22 * k6 + p23 * k7; - s1 = p03 * k0 + p13 * k3 + p23 * k6; - - dp[0 ] = FROM_S32(d0); - dp[chan1] = FROM_S32(d1); - -#else /* __sparc */ - - dd.i32s.i0 = D2I(s0 + p02 * k2 + p12 * k5 + p22 * k8); - dd.i32s.i1 = D2I(s1 + p02 * k1 + p03 * k2 + p12 * k4 + p13 * k5 + p22 * k7 + p23 * k8); - *(FTYPE *)(buffo + i) = dd.d64; - - s0 = p02 * k0 + p03 * k1 + p12 * k3 + p13 * k4 + p22 * k6 + p23 * k7; - s1 = p03 * k0 + p13 * k3 + p23 * k6; - -#ifdef _NO_LONGLONG - - o64_1 = buffo[i]; - o64_2 = buffo[i+1]; -#if IMG_TYPE != 1 - STORE2(FROM_S32(o64_1), FROM_S32(o64_2)); -#else - STORE2(o64_1 >> 24, o64_2 >> 24); -#endif /* IMG_TYPE != 1 */ - -#else /* _NO_LONGLONG */ - - o64 = *(mlib_s64*)(buffo + i); -#if IMG_TYPE != 1 - STORE2(FROM_S32(o64 >> 32), FROM_S32(o64)); -#else - STORE2(o64 >> 56, o64 >> 24); -#endif /* IMG_TYPE != 1 */ -#endif /* _NO_LONGLONG */ -#endif /* __sparc */ - - sp += chan2; - dp += chan2; - } - - for (; i < wid; i++) { - p00 = buff0[i]; p10 = buff1[i]; p20 = buff2[i]; - p01 = buff0[i + 1]; p11 = buff1[i + 1]; p21 = buff2[i + 1]; - p02 = buff0[i + 2]; p12 = buff1[i + 2]; p22 = buff2[i + 2]; - - buffi[i] = (mlib_s32)sp[0]; - buff3[i + dx_l] = (FTYPE)buffi[i]; - -#ifndef __sparc - - d0 = D2I(p00 * k0 + p01 * k1 + p02 * k2 + p10 * k3 + p11 * k4 + - p12 * k5 + p20 * k6 + p21 * k7 + p22 * k8); - - dp[0] = FROM_S32(d0); - -#else /* __sparc */ - - buffo[i] = D2I(p00 * k0 + p01 * k1 + p02 * k2 + p10 * k3 + p11 * k4 + - p12 * k5 + p20 * k6 + p21 * k7 + p22 * k8); -#if IMG_TYPE != 1 - dp[0] = FROM_S32(buffo[i]); -#else - dp[0] = buffo[i] >> 24; -#endif /* IMG_TYPE != 1 */ -#endif /* __sparc */ - - sp += chan1; - dp += chan1; - } - - for (; i < swid; i++) { - buffi[i] = (mlib_s32)sp[0]; - buff3[i + dx_l] = (FTYPE)buffi[i]; - sp += chan1; - } - - for (i = 0; i < dx_l; i++) buff3[i] = buff3[dx_l]; - for (i = 0; i < dx_r; i++) buff3[swid + dx_l + i] = buff3[swid + dx_l - 1]; - - if (j < hgt - dy_b - 2) sl += sll; - dl += dll; - - buffT = buff0; - buff0 = buff1; - buff1 = buff2; - buff2 = buff3; - buff3 = buffT; - } - } - -#ifdef __sparc -#if IMG_TYPE == 1 - { - mlib_s32 amask = (1 << nchannel) - 1; - - if ((cmask & amask) != amask) { - mlib_ImageXor80(adr_dst, wid, hgt, dll, nchannel, cmask); - } else { - mlib_ImageXor80_aa(adr_dst, wid*nchannel, hgt, dll); - } - } - -#endif /* IMG_TYPE == 1 */ -#endif /* __sparc */ - - if (pbuff != buff) mlib_free(pbuff); - - return MLIB_SUCCESS; -} - -/***************************************************************/ -#ifndef __sparc /* for x86, using integer multiplies is faster */ - -mlib_status CONV_FUNC_I(3x3) -{ - DTYPE *adr_src, *sl, *sp0, *sp1, *sp2, *sp_1, *sp_2; - DTYPE *adr_dst, *dl, *dp; - mlib_s32 wid, hgt, sll, dll; - mlib_s32 nchannel, chan1, chan2, delta_chan; - mlib_s32 i, j, c; - mlib_s32 shift1, shift2; - mlib_s32 k0, k1, k2, k3, k4, k5, k6, k7, k8; - mlib_s32 p02, p03, - p12, p13, - p22, p23; - -#if IMG_TYPE != 1 - shift1 = 16; -#else - shift1 = 8; -#endif /* IMG_TYPE != 1 */ - - shift2 = scalef_expon - shift1; - - /* keep kernel in regs */ - k0 = kern[0] >> shift1; k1 = kern[1] >> shift1; k2 = kern[2] >> shift1; - k3 = kern[3] >> shift1; k4 = kern[4] >> shift1; k5 = kern[5] >> shift1; - k6 = kern[6] >> shift1; k7 = kern[7] >> shift1; k8 = kern[8] >> shift1; - - GET_SRC_DST_PARAMETERS(DTYPE); - - chan1 = nchannel; - chan2 = chan1 + chan1; - delta_chan = 0; - - if ((1 > dx_l) && (1 < wid + KSIZE1 - dx_r)) delta_chan = chan1; - - for (c = 0; c < chan1; c++) { - if (!(cmask & (1 << (chan1 - 1 - c)))) continue; - - sl = adr_src + c; - dl = adr_dst + c; - - sp_1 = sl; - - if ((1 > dy_t) && (1 < hgt + KSIZE1 - dy_b)) sl += sll; - sp_2 = sl; - - if ((hgt - dy_b) > 0) sl += sll; - - for (j = 0; j < hgt; j++) { - mlib_s32 s0, s1; - mlib_s32 pix0, pix1; - - dp = dl; - sp0 = sp_1; - sp_1 = sp_2; - sp_2 = sl; - - sp1 = sp_1; - sp2 = sp_2; - - p02 = sp0[0]; - p12 = sp1[0]; - p22 = sp2[0]; - - p03 = sp0[delta_chan]; - p13 = sp1[delta_chan]; - p23 = sp2[delta_chan]; - - s0 = p02 * k0 + p03 * k1 + p12 * k3 + p13 * k4 + p22 * k6 + p23 * k7; - s1 = p03 * k0 + p13 * k3 + p23 * k6; - - sp0 += (chan1 + delta_chan); - sp1 += (chan1 + delta_chan); - sp2 += (chan1 + delta_chan); - -#ifdef __SUNPRO_C -#pragma pipeloop(0) -#endif /* __SUNPRO_C */ - for (i = 0; i <= (wid - dx_r - 2); i += 2) { - p02 = sp0[0]; p12 = sp1[0]; p22 = sp2[0]; - p03 = sp0[chan1]; p13 = sp1[chan1]; p23 = sp2[chan1]; - - pix0 = (s0 + p02 * k2 + p12 * k5 + p22 * k8) >> shift2; - pix1 = (s1 + p02 * k1 + p03 * k2 + p12 * k4 + - p13 * k5 + p22 * k7 + p23 * k8) >> shift2; - - CLAMP_STORE(dp[0], pix0); - CLAMP_STORE(dp[chan1], pix1); - - s0 = p02 * k0 + p03 * k1 + p12 * k3 + p13 * k4 + p22 * k6 + p23 * k7; - s1 = p03 * k0 + p13 * k3 + p23 * k6; - - sp0 += chan2; - sp1 += chan2; - sp2 += chan2; - dp += chan2; - } - - p02 = p03; p12 = p13; p22 = p23; - - for (; i < wid - dx_r; i++) { - p03 = sp0[0]; p13 = sp1[0]; p23 = sp2[0]; - pix0 = (s0 + p03 * k2 + p13 * k5 + p23 * k8) >> shift2; - CLAMP_STORE(dp[0], pix0); - s0 = p02 * k0 + p03 * k1 + p12 * k3 + p13 * k4 + p22 * k6 + p23 * k7; - p02 = p03; p12 = p13; p22 = p23; - sp0 += chan1; - sp1 += chan1; - sp2 += chan1; - dp += chan1; - } - - sp0 -= chan1; - sp1 -= chan1; - sp2 -= chan1; - - for (; i < wid; i++) { - p03 = sp0[0]; p13 = sp1[0]; p23 = sp2[0]; - pix0 = (s0 + p03 * k2 + p13 * k5 + p23 * k8) >> shift2; - CLAMP_STORE(dp[0], pix0); - s0 = p02 * k0 + p03 * k1 + p12 * k3 + p13 * k4 + p22 * k6 + p23 * k7; - p02 = p03; p12 = p13; p22 = p23; - dp += chan1; - } - - if (j < hgt - dy_b - 1) sl += sll; - dl += dll; - } - } - - return MLIB_SUCCESS; -} - -#endif /* __sparc ( for x86, using integer multiplies is faster ) */ - -/***************************************************************/ -#undef KSIZE -#define KSIZE 4 - -mlib_status CONV_FUNC(4x4) -{ - FTYPE buff[(KSIZE + 3)*BUFF_LINE]; - FTYPE *buff0, *buff1, *buff2, *buff3, *buff4, *buffd, *buffT; - FTYPE k[KSIZE*KSIZE]; - mlib_s32 d0, d1; - FTYPE k0, k1, k2, k3, k4, k5, k6, k7; - FTYPE p00, p01, p02, p03, p04, - p10, p11, p12, p13, p14, - p20, p21, p22, p23, - p30, p31, p32, p33; - DEF_VARS(DTYPE); - DTYPE *sl2, *sl3; - LOAD_KERNEL(KSIZE*KSIZE); - GET_SRC_DST_PARAMETERS(DTYPE); - - swid = wid + KSIZE1; - - if (swid > BUFF_LINE) { - pbuff = mlib_malloc((KSIZE + 3)*sizeof(FTYPE )*swid); - - if (pbuff == NULL) return MLIB_FAILURE; - } - - buff0 = pbuff; - buff1 = buff0 + swid; - buff2 = buff1 + swid; - buff3 = buff2 + swid; - buff4 = buff3 + swid; - buffd = buff4 + swid; - buffo = (mlib_s32*)(buffd + swid); - buffi = buffo + (swid &~ 1); - - swid -= (dx_l + dx_r); - - chan1 = nchannel; - chan2 = chan1 + chan1; - - for (c = 0; c < nchannel; c++) { - if (!(cmask & (1 << (nchannel - 1 - c)))) continue; - - sl = adr_src + c; - dl = adr_dst + c; - - if ((1 > dy_t) && (1 < hgt + KSIZE1 - dy_b)) sl1 = sl + sll; - else sl1 = sl; - - if ((2 > dy_t) && (2 < hgt + KSIZE1 - dy_b)) sl2 = sl1 + sll; - else sl2 = sl1; - - if ((hgt - dy_b) > 0) sl3 = sl2 + sll; - else sl3 = sl2; - - for (i = 0; i < dx_l; i++) { - buff0[i] = (FTYPE)sl[0]; - buff1[i] = (FTYPE)sl1[0]; - buff2[i] = (FTYPE)sl2[0]; - buff3[i] = (FTYPE)sl3[0]; - } - -#ifdef __SUNPRO_C -#pragma pipeloop(0) -#endif /* __SUNPRO_C */ - for (i = 0; i < swid; i++) { - buff0[i + dx_l] = (FTYPE)sl[i*chan1]; - buff1[i + dx_l] = (FTYPE)sl1[i*chan1]; - buff2[i + dx_l] = (FTYPE)sl2[i*chan1]; - buff3[i + dx_l] = (FTYPE)sl3[i*chan1]; - } - - for (i = 0; i < dx_r; i++) { - buff0[swid + dx_l + i] = buff0[swid + dx_l - 1]; - buff1[swid + dx_l + i] = buff1[swid + dx_l - 1]; - buff2[swid + dx_l + i] = buff2[swid + dx_l - 1]; - buff3[swid + dx_l + i] = buff3[swid + dx_l - 1]; - } - - if ((hgt - dy_b) > 1) sl = sl3 + sll; - else sl = sl3; - - for (j = 0; j < hgt; j++) { - d64_2x32 dd; - - /* - * First loop on two first lines of kernel - */ - k0 = k[0]; k1 = k[1]; k2 = k[2]; k3 = k[3]; - k4 = k[4]; k5 = k[5]; k6 = k[6]; k7 = k[7]; - - sp = sl; - dp = dl; - - p02 = buff0[0]; - p12 = buff1[0]; - p03 = buff0[1]; - p13 = buff1[1]; - p04 = buff0[2]; - -#ifdef __SUNPRO_C -#pragma pipeloop(0) -#endif /* __SUNPRO_C */ - for (i = 0; i <= (wid - 2); i += 2) { - p00 = p02; p10 = p12; - p01 = p03; p11 = p13; - p02 = p04; p12 = buff1[i + 2]; - p03 = buff0[i + 3]; p13 = buff1[i + 3]; - p04 = buff0[i + 4]; p14 = buff1[i + 4]; - - LOAD_BUFF(buffi); - - dd.d64 = *(FTYPE *)(buffi + i); - buff4[i + dx_l ] = (FTYPE)dd.i32s.i0; - buff4[i + dx_l + 1] = (FTYPE)dd.i32s.i1; - - buffd[i ] = (p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 + - p10 * k4 + p11 * k5 + p12 * k6 + p13 * k7); - buffd[i + 1] = (p01 * k0 + p02 * k1 + p03 * k2 + p04 * k3 + - p11 * k4 + p12 * k5 + p13 * k6 + p14 * k7); - - sp += chan2; - } - - /* - * Second loop on two last lines of kernel - */ - k0 = k[ 8]; k1 = k[ 9]; k2 = k[10]; k3 = k[11]; - k4 = k[12]; k5 = k[13]; k6 = k[14]; k7 = k[15]; - - p02 = buff2[0]; - p12 = buff3[0]; - p03 = buff2[1]; - p13 = buff3[1]; - p04 = buff2[2]; - -#ifdef __SUNPRO_C -#pragma pipeloop(0) -#endif /* __SUNPRO_C */ - for (i = 0; i <= (wid - 2); i += 2) { - p00 = p02; p10 = p12; - p01 = p03; p11 = p13; - p02 = p04; p12 = buff3[i + 2]; - p03 = buff2[i + 3]; p13 = buff3[i + 3]; - p04 = buff2[i + 4]; p14 = buff3[i + 4]; - - d0 = D2I(p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 + - p10 * k4 + p11 * k5 + p12 * k6 + p13 * k7 + buffd[i]); - d1 = D2I(p01 * k0 + p02 * k1 + p03 * k2 + p04 * k3 + - p11 * k4 + p12 * k5 + p13 * k6 + p14 * k7 + buffd[i + 1]); - - dp[0 ] = FROM_S32(d0); - dp[chan1] = FROM_S32(d1); - - dp += chan2; - } - - /* last pixels */ - for (; i < wid; i++) { - p00 = buff0[i]; p10 = buff1[i]; p20 = buff2[i]; p30 = buff3[i]; - p01 = buff0[i + 1]; p11 = buff1[i + 1]; p21 = buff2[i + 1]; p31 = buff3[i + 1]; - p02 = buff0[i + 2]; p12 = buff1[i + 2]; p22 = buff2[i + 2]; p32 = buff3[i + 2]; - p03 = buff0[i + 3]; p13 = buff1[i + 3]; p23 = buff2[i + 3]; p33 = buff3[i + 3]; - - buff4[i + dx_l] = (FTYPE)sp[0]; - - buffo[i] = D2I(p00 * k[0] + p01 * k[1] + p02 * k[2] + p03 * k[3] + - p10 * k[4] + p11 * k[5] + p12 * k[6] + p13 * k[7] + - p20 * k[ 8] + p21 * k[ 9] + p22 * k[10] + p23 * k[11] + - p30 * k[12] + p31 * k[13] + p32 * k[14] + p33 * k[15]); - - dp[0] = FROM_S32(buffo[i]); - - sp += chan1; - dp += chan1; - } - - for (; i < swid; i++) { - buff4[i + dx_l] = (FTYPE)sp[0]; - sp += chan1; - } - - for (i = 0; i < dx_l; i++) buff4[i] = buff4[dx_l]; - for (i = 0; i < dx_r; i++) buff4[swid + dx_l + i] = buff4[swid + dx_l - 1]; - - /* next line */ - - if (j < hgt - dy_b - 2) sl += sll; - dl += dll; - - buffT = buff0; - buff0 = buff1; - buff1 = buff2; - buff2 = buff3; - buff3 = buff4; - buff4 = buffT; - } - } - - if (pbuff != buff) mlib_free(pbuff); - - return MLIB_SUCCESS; -} - -/***************************************************************/ -#undef KSIZE -#define KSIZE 5 - -mlib_status CONV_FUNC(5x5) -{ - FTYPE buff[(KSIZE + 3)*BUFF_LINE]; - FTYPE *buff0, *buff1, *buff2, *buff3, *buff4, *buff5, *buffd, *buffT; - FTYPE k[KSIZE*KSIZE]; - mlib_s32 d0, d1; - FTYPE k0, k1, k2, k3, k4, k5, k6, k7, k8, k9; - FTYPE p00, p01, p02, p03, p04, p05, - p10, p11, p12, p13, p14, p15, - p20, p21, p22, p23, p24, - p30, p31, p32, p33, p34, - p40, p41, p42, p43, p44; - DEF_VARS(DTYPE); - DTYPE *sl2, *sl3, *sl4; - LOAD_KERNEL(KSIZE*KSIZE); - GET_SRC_DST_PARAMETERS(DTYPE); - - swid = wid + KSIZE1; - - if (swid > BUFF_LINE) { - pbuff = mlib_malloc((KSIZE + 3)*sizeof(FTYPE )*swid); - - if (pbuff == NULL) return MLIB_FAILURE; - } - - buff0 = pbuff; - buff1 = buff0 + swid; - buff2 = buff1 + swid; - buff3 = buff2 + swid; - buff4 = buff3 + swid; - buff5 = buff4 + swid; - buffd = buff5 + swid; - buffo = (mlib_s32*)(buffd + swid); - buffi = buffo + (swid &~ 1); - - swid -= (dx_l + dx_r); - - chan1 = nchannel; - chan2 = chan1 + chan1; - - for (c = 0; c < nchannel; c++) { - if (!(cmask & (1 << (nchannel - 1 - c)))) continue; - - sl = adr_src + c; - dl = adr_dst + c; - - if ((1 > dy_t) && (1 < hgt + KSIZE1 - dy_b)) sl1 = sl + sll; - else sl1 = sl; - - if ((2 > dy_t) && (2 < hgt + KSIZE1 - dy_b)) sl2 = sl1 + sll; - else sl2 = sl1; - - if ((3 > dy_t) && (3 < hgt + KSIZE1 - dy_b)) sl3 = sl2 + sll; - else sl3 = sl2; - - if ((hgt - dy_b) > 0) sl4 = sl3 + sll; - else sl4 = sl3; - - for (i = 0; i < dx_l; i++) { - buff0[i] = (FTYPE)sl[0]; - buff1[i] = (FTYPE)sl1[0]; - buff2[i] = (FTYPE)sl2[0]; - buff3[i] = (FTYPE)sl3[0]; - buff4[i] = (FTYPE)sl4[0]; - } - -#ifdef __SUNPRO_C -#pragma pipeloop(0) -#endif /* __SUNPRO_C */ - for (i = 0; i < swid; i++) { - buff0[i + dx_l] = (FTYPE)sl[i*chan1]; - buff1[i + dx_l] = (FTYPE)sl1[i*chan1]; - buff2[i + dx_l] = (FTYPE)sl2[i*chan1]; - buff3[i + dx_l] = (FTYPE)sl3[i*chan1]; - buff4[i + dx_l] = (FTYPE)sl4[i*chan1]; - } - - for (i = 0; i < dx_r; i++) { - buff0[swid + dx_l + i] = buff0[swid + dx_l - 1]; - buff1[swid + dx_l + i] = buff1[swid + dx_l - 1]; - buff2[swid + dx_l + i] = buff2[swid + dx_l - 1]; - buff3[swid + dx_l + i] = buff3[swid + dx_l - 1]; - buff4[swid + dx_l + i] = buff4[swid + dx_l - 1]; - } - - if ((hgt - dy_b) > 1) sl = sl4 + sll; - else sl = sl4; - - for (j = 0; j < hgt; j++) { - d64_2x32 dd; - - /* - * First loop - */ - k0 = k[0]; k1 = k[1]; k2 = k[2]; k3 = k[3]; k4 = k[4]; - k5 = k[5]; k6 = k[6]; k7 = k[7]; k8 = k[8]; k9 = k[9]; - - sp = sl; - dp = dl; - - p02 = buff0[0]; - p12 = buff1[0]; - p03 = buff0[1]; - p13 = buff1[1]; - p04 = buff0[2]; - p14 = buff1[2]; - -#ifdef __SUNPRO_C -#pragma pipeloop(0) -#endif /* __SUNPRO_C */ - for (i = 0; i <= (wid - 2); i += 2) { - p00 = p02; p10 = p12; - p01 = p03; p11 = p13; - p02 = p04; p12 = p14; - - LOAD_BUFF(buffi); - - p03 = buff0[i + 3]; p13 = buff1[i + 3]; - p04 = buff0[i + 4]; p14 = buff1[i + 4]; - p05 = buff0[i + 5]; p15 = buff1[i + 5]; - - buffd[i ] = (p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 + p04 * k4 + - p10 * k5 + p11 * k6 + p12 * k7 + p13 * k8 + p14 * k9); - buffd[i + 1] = (p01 * k0 + p02 * k1 + p03 * k2 + p04 * k3 + p05 * k4 + - p11 * k5 + p12 * k6 + p13 * k7 + p14 * k8 + p15 * k9); - - sp += chan2; - } - - /* - * Second loop - */ - k0 = k[10]; k1 = k[11]; k2 = k[12]; k3 = k[13]; k4 = k[14]; - k5 = k[15]; k6 = k[16]; k7 = k[17]; k8 = k[18]; k9 = k[19]; - - p02 = buff2[0]; - p12 = buff3[0]; - p03 = buff2[1]; - p13 = buff3[1]; - -#ifdef __SUNPRO_C -#pragma pipeloop(0) -#endif /* __SUNPRO_C */ - for (i = 0; i <= (wid - 2); i += 2) { - p00 = p02; p10 = p12; - p01 = p03; p11 = p13; - - p02 = buff2[i + 2]; p12 = buff3[i + 2]; - p03 = buff2[i + 3]; p13 = buff3[i + 3]; - p04 = buff2[i + 4]; p14 = buff3[i + 4]; - p05 = buff2[i + 5]; p15 = buff3[i + 5]; - - dd.d64 = *(FTYPE *)(buffi + i); - buff5[i + dx_l ] = (FTYPE)dd.i32s.i0; - buff5[i + dx_l + 1] = (FTYPE)dd.i32s.i1; - - buffd[i ] += (p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 + p04 * k4 + - p10 * k5 + p11 * k6 + p12 * k7 + p13 * k8 + p14 * k9); - buffd[i + 1] += (p01 * k0 + p02 * k1 + p03 * k2 + p04 * k3 + p05 * k4 + - p11 * k5 + p12 * k6 + p13 * k7 + p14 * k8 + p15 * k9); - } - - /* - * 3 loop - */ - k0 = k[20]; k1 = k[21]; k2 = k[22]; k3 = k[23]; k4 = k[24]; - - p02 = buff4[0]; - p03 = buff4[1]; - p04 = buff4[2]; - p05 = buff4[3]; - -#ifdef __SUNPRO_C -#pragma pipeloop(0) -#endif /* __SUNPRO_C */ - for (i = 0; i <= (wid - 2); i += 2) { - p00 = p02; p01 = p03; p02 = p04; p03 = p05; - - p04 = buff4[i + 4]; p05 = buff4[i + 5]; - - d0 = D2I(p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 + p04 * k4 + buffd[i]); - d1 = D2I(p01 * k0 + p02 * k1 + p03 * k2 + p04 * k3 + p05 * k4 + buffd[i + 1]); - - dp[0 ] = FROM_S32(d0); - dp[chan1] = FROM_S32(d1); - - dp += chan2; - } - - /* last pixels */ - for (; i < wid; i++) { - p00 = buff0[i]; p10 = buff1[i]; p20 = buff2[i]; p30 = buff3[i]; - p01 = buff0[i + 1]; p11 = buff1[i + 1]; p21 = buff2[i + 1]; p31 = buff3[i + 1]; - p02 = buff0[i + 2]; p12 = buff1[i + 2]; p22 = buff2[i + 2]; p32 = buff3[i + 2]; - p03 = buff0[i + 3]; p13 = buff1[i + 3]; p23 = buff2[i + 3]; p33 = buff3[i + 3]; - p04 = buff0[i + 4]; p14 = buff1[i + 4]; p24 = buff2[i + 4]; p34 = buff3[i + 4]; - - p40 = buff4[i]; p41 = buff4[i + 1]; p42 = buff4[i + 2]; - p43 = buff4[i + 3]; p44 = buff4[i + 4]; - - buff5[i + dx_l] = (FTYPE)sp[0]; - - buffo[i] = D2I(p00 * k[0] + p01 * k[1] + p02 * k[2] + p03 * k[3] + p04 * k[4] + - p10 * k[5] + p11 * k[6] + p12 * k[7] + p13 * k[8] + p14 * k[9] + - p20 * k[10] + p21 * k[11] + p22 * k[12] + p23 * k[13] + p24 * k[14] + - p30 * k[15] + p31 * k[16] + p32 * k[17] + p33 * k[18] + p34 * k[19] + - p40 * k[20] + p41 * k[21] + p42 * k[22] + p43 * k[23] + p44 * k[24]); - - dp[0] = FROM_S32(buffo[i]); - - sp += chan1; - dp += chan1; - } - - for (; i < swid; i++) { - buff5[i + dx_l] = (FTYPE)sp[0]; - sp += chan1; - } - - for (i = 0; i < dx_l; i++) buff5[i] = buff5[dx_l]; - for (i = 0; i < dx_r; i++) buff5[swid + dx_l + i] = buff5[swid + dx_l - 1]; - - /* next line */ - - if (j < hgt - dy_b - 2) sl += sll; - dl += dll; - - buffT = buff0; - buff0 = buff1; - buff1 = buff2; - buff2 = buff3; - buff3 = buff4; - buff4 = buff5; - buff5 = buffT; - } - } - - if (pbuff != buff) mlib_free(pbuff); - - return MLIB_SUCCESS; -} - -/***************************************************************/ -#ifndef __sparc /* for x86, using integer multiplies is faster */ - -mlib_status CONV_FUNC_I(5x5) -{ - mlib_s32 buff[BUFF_LINE]; - mlib_s32 *buffd; - mlib_s32 k[KSIZE*KSIZE]; - mlib_s32 shift1, shift2; - mlib_s32 k0, k1, k2, k3, k4, k5, k6, k7, k8, k9; - mlib_s32 p00, p01, p02, p03, p04, p05, - p10, p11, p12, p13, p14, p15; - DTYPE *adr_src, *sl, *sp0, *sp1, *sp2, *sp3, *sp4; - DTYPE *sp_1, *sp_2, *sp_3, *sp_4; - DTYPE *adr_dst, *dl, *dp; - mlib_s32 *pbuff = buff; - mlib_s32 wid, hgt, sll, dll; - mlib_s32 nchannel, chan1, chan2, chan4; - mlib_s32 delta_chan1, delta_chan2, delta_chan3; - mlib_s32 i, j, c; - -#if IMG_TYPE != 1 - shift1 = 16; -#else - shift1 = 8; -#endif /* IMG_TYPE != 1 */ - - shift2 = scalef_expon - shift1; - - for (j = 0; j < KSIZE*KSIZE; j++) k[j] = kern[j] >> shift1; - - GET_SRC_DST_PARAMETERS(DTYPE); - - if (wid > BUFF_LINE) { - pbuff = mlib_malloc(sizeof(mlib_s32)*wid); - - if (pbuff == NULL) return MLIB_FAILURE; - } - - buffd = pbuff; - - chan1 = nchannel; - chan2 = chan1 + chan1; - - if ((1 > dx_l) && (1 < wid + KSIZE1 - dx_r)) delta_chan1 = chan1; - else delta_chan1 = 0; - - if ((2 > dx_l) && (2 < wid + KSIZE1 - dx_r)) delta_chan2 = delta_chan1 + chan1; - else delta_chan2 = delta_chan1; - - if ((3 > dx_l) && (3 < wid + KSIZE1 - dx_r)) delta_chan3 = delta_chan2 + chan1; - else delta_chan3 = delta_chan2; - - chan4 = chan1 + delta_chan3; - - for (c = 0; c < chan1; c++) { - if (!(cmask & (1 << (chan1 - 1 - c)))) continue; - - sl = adr_src + c; - dl = adr_dst + c; - - sp_1 = sl; - - if ((1 > dy_t) && (1 < hgt + KSIZE1 - dy_b)) sl += sll; - sp_2 = sl; - - if ((2 > dy_t) && (2 < hgt + KSIZE1 - dy_b)) sl += sll; - sp_3 = sl; - - if ((3 > dy_t) && (3 < hgt + KSIZE1 - dy_b)) sl += sll; - sp_4 = sl; - - if ((hgt - dy_b) > 0) sl += sll; - - for (j = 0; j < hgt; j++) { - mlib_s32 pix0, pix1; - - dp = dl; - sp0 = sp_1; - sp_1 = sp_2; - sp_2 = sp_3; - sp_3 = sp_4; - sp_4 = sl; - - sp1 = sp_1; - sp2 = sp_2; - sp3 = sp_3; - sp4 = sp_4; - - /* - * First loop - */ - - k0 = k[0]; k1 = k[1]; k2 = k[2]; k3 = k[3]; k4 = k[4]; - k5 = k[5]; k6 = k[6]; k7 = k[7]; k8 = k[8]; k9 = k[9]; - - p02 = sp0[0]; p12 = sp1[0]; - p03 = sp0[delta_chan1]; p13 = sp1[delta_chan1]; - p04 = sp0[delta_chan2]; p14 = sp1[delta_chan2]; - p05 = sp0[delta_chan3]; p15 = sp1[delta_chan3]; - - sp0 += chan4; - sp1 += chan4; - -#ifdef __SUNPRO_C -#pragma pipeloop(0) -#endif /* __SUNPRO_C */ - for (i = 0; i <= (wid - dx_r - 2); i += 2) { - p00 = p02; p10 = p12; - p01 = p03; p11 = p13; - p02 = p04; p12 = p14; - p03 = p05; p13 = p15; - - p04 = sp0[0]; p14 = sp1[0]; - p05 = sp0[chan1]; p15 = sp1[chan1]; - - buffd[i ] = (p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 + p04 * k4 + - p10 * k5 + p11 * k6 + p12 * k7 + p13 * k8 + p14 * k9); - buffd[i + 1] = (p01 * k0 + p02 * k1 + p03 * k2 + p04 * k3 + p05 * k4 + - p11 * k5 + p12 * k6 + p13 * k7 + p14 * k8 + p15 * k9); - - sp0 += chan2; - sp1 += chan2; - } - - p01 = p02; p02 = p03; p03 = p04; p04 = p05; - p11 = p12; p12 = p13; p13 = p14; p14 = p15; - - for (; i < wid - dx_r; i++) { - p00 = p01; p10 = p11; - p01 = p02; p11 = p12; - p02 = p03; p12 = p13; - p03 = p04; p13 = p14; - - p04 = sp0[0]; p14 = sp1[0]; - - buffd[i] = (p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 + p04 * k4 + - p10 * k5 + p11 * k6 + p12 * k7 + p13 * k8 + p14 * k9); - - sp0 += chan1; - sp1 += chan1; - } - - sp0 -= chan1; - sp1 -= chan1; - - for (; i < wid; i++) { - p00 = p01; p10 = p11; - p01 = p02; p11 = p12; - p02 = p03; p12 = p13; - p03 = p04; p13 = p14; - - p04 = sp0[0]; p14 = sp1[0]; - - buffd[i] = (p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 + p04 * k4 + - p10 * k5 + p11 * k6 + p12 * k7 + p13 * k8 + p14 * k9); - } - - /* - * Second loop - */ - - k0 = k[10]; k1 = k[11]; k2 = k[12]; k3 = k[13]; k4 = k[14]; - k5 = k[15]; k6 = k[16]; k7 = k[17]; k8 = k[18]; k9 = k[19]; - - p02 = sp2[0]; p12 = sp3[0]; - p03 = sp2[delta_chan1]; p13 = sp3[delta_chan1]; - p04 = sp2[delta_chan2]; p14 = sp3[delta_chan2]; - p05 = sp2[delta_chan3]; p15 = sp3[delta_chan3]; - - sp2 += chan4; - sp3 += chan4; - -#ifdef __SUNPRO_C -#pragma pipeloop(0) -#endif /* __SUNPRO_C */ - for (i = 0; i <= (wid - dx_r - 2); i += 2) { - p00 = p02; p10 = p12; - p01 = p03; p11 = p13; - p02 = p04; p12 = p14; - p03 = p05; p13 = p15; - - p04 = sp2[0]; p14 = sp3[0]; - p05 = sp2[chan1]; p15 = sp3[chan1]; - - buffd[i ] += (p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 + p04 * k4 + - p10 * k5 + p11 * k6 + p12 * k7 + p13 * k8 + p14 * k9); - buffd[i + 1] += (p01 * k0 + p02 * k1 + p03 * k2 + p04 * k3 + p05 * k4 + - p11 * k5 + p12 * k6 + p13 * k7 + p14 * k8 + p15 * k9); - - sp2 += chan2; - sp3 += chan2; - } - - p01 = p02; p02 = p03; p03 = p04; p04 = p05; - p11 = p12; p12 = p13; p13 = p14; p14 = p15; - - for (; i < wid - dx_r; i++) { - p00 = p01; p10 = p11; - p01 = p02; p11 = p12; - p02 = p03; p12 = p13; - p03 = p04; p13 = p14; - - p04 = sp2[0]; p14 = sp3[0]; - - buffd[i] += (p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 + p04 * k4 + - p10 * k5 + p11 * k6 + p12 * k7 + p13 * k8 + p14 * k9); - - sp2 += chan1; - sp3 += chan1; - } - - sp2 -= chan1; - sp3 -= chan1; - - for (; i < wid; i++) { - p00 = p01; p10 = p11; - p01 = p02; p11 = p12; - p02 = p03; p12 = p13; - p03 = p04; p13 = p14; - - p04 = sp2[0]; p14 = sp3[0]; - - buffd[i] += (p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 + p04 * k4 + - p10 * k5 + p11 * k6 + p12 * k7 + p13 * k8 + p14 * k9); - } - - /* - * 3 loop - */ - - k0 = k[20]; k1 = k[21]; k2 = k[22]; k3 = k[23]; k4 = k[24]; - - p02 = sp4[0]; - p03 = sp4[delta_chan1]; - p04 = sp4[delta_chan2]; - p05 = sp4[delta_chan3]; - - sp4 += chan4; - -#ifdef __SUNPRO_C -#pragma pipeloop(0) -#endif /* __SUNPRO_C */ - for (i = 0; i <= (wid - dx_r - 2); i += 2) { - p00 = p02; p01 = p03; p02 = p04; p03 = p05; - - p04 = sp4[0]; p05 = sp4[chan1]; - - pix0 = (buffd[i ] + p00 * k0 + p01 * k1 + p02 * k2 + - p03 * k3 + p04 * k4) >> shift2; - pix1 = (buffd[i + 1] + p01 * k0 + p02 * k1 + p03 * k2 + - p04 * k3 + p05 * k4) >> shift2; - - CLAMP_STORE(dp[0], pix0); - CLAMP_STORE(dp[chan1], pix1); - - dp += chan2; - sp4 += chan2; - } - - p01 = p02; p02 = p03; p03 = p04; p04 = p05; - - for (; i < wid - dx_r; i++) { - p00 = p01; p01 = p02; p02 = p03; p03 = p04; - - p04 = sp4[0]; - - pix0 = (buffd[i ] + p00 * k0 + p01 * k1 + p02 * k2 + - p03 * k3 + p04 * k4) >> shift2; - CLAMP_STORE(dp[0], pix0); - - dp += chan1; - sp4 += chan1; - } - - sp4 -= chan1; - - for (; i < wid; i++) { - p00 = p01; p01 = p02; p02 = p03; p03 = p04; - - p04 = sp4[0]; - - pix0 = (buffd[i ] + p00 * k0 + p01 * k1 + p02 * k2 + - p03 * k3 + p04 * k4) >> shift2; - CLAMP_STORE(dp[0], pix0); - - dp += chan1; - } - - /* next line */ - - if (j < hgt - dy_b - 1) sl += sll; - dl += dll; - } - } - - if (pbuff != buff) mlib_free(pbuff); - - return MLIB_SUCCESS; -} - -#endif /* __sparc ( for x86, using integer multiplies is faster ) */ - -/***************************************************************/ -#if IMG_TYPE == 1 - -#undef KSIZE -#define KSIZE 7 - -mlib_status CONV_FUNC(7x7) -{ - FTYPE buff[(KSIZE + 3)*BUFF_LINE], *buffs[2*(KSIZE + 1)], *buffd; - FTYPE k[KSIZE*KSIZE]; - mlib_s32 l, m, buff_ind; - mlib_s32 d0, d1; - FTYPE k0, k1, k2, k3, k4, k5, k6; - FTYPE p0, p1, p2, p3, p4, p5, p6, p7; - DTYPE *sl2, *sl3, *sl4, *sl5, *sl6; - DEF_VARS(DTYPE); - LOAD_KERNEL(KSIZE*KSIZE); - GET_SRC_DST_PARAMETERS(DTYPE); - - swid = wid + KSIZE1; - - if (wid > BUFF_LINE) { - pbuff = mlib_malloc((KSIZE + 3)*sizeof(FTYPE )*wid); - - if (pbuff == NULL) return MLIB_FAILURE; - } - - for (l = 0; l < KSIZE + 1; l++) buffs[l] = pbuff + l*swid; - for (l = 0; l < KSIZE + 1; l++) buffs[l + (KSIZE + 1)] = buffs[l]; - buffd = buffs[KSIZE] + swid; - buffo = (mlib_s32*)(buffd + swid); - buffi = buffo + (swid &~ 1); - - swid -= (dx_l + dx_r); - - chan1 = nchannel; - chan2 = chan1 + chan1; - - for (c = 0; c < nchannel; c++) { - if (!(cmask & (1 << (nchannel - 1 - c)))) continue; - - sl = adr_src + c; - dl = adr_dst + c; - - if ((1 > dy_t) && (1 < hgt + KSIZE1 - dy_b)) sl1 = sl + sll; - else sl1 = sl; - - if ((2 > dy_t) && (2 < hgt + KSIZE1 - dy_b)) sl2 = sl1 + sll; - else sl2 = sl1; - - if ((3 > dy_t) && (3 < hgt + KSIZE1 - dy_b)) sl3 = sl2 + sll; - else sl3 = sl2; - - if ((4 > dy_t) && (4 < hgt + KSIZE1 - dy_b)) sl4 = sl3 + sll; - else sl4 = sl3; - - if ((5 > dy_t) && (5 < hgt + KSIZE1 - dy_b)) sl5 = sl4 + sll; - else sl5 = sl4; - - if ((hgt - dy_b) > 0) sl6 = sl5 + sll; - else sl6 = sl5; - - for (i = 0; i < dx_l; i++) { - buffs[0][i] = (FTYPE)sl[0]; - buffs[1][i] = (FTYPE)sl1[0]; - buffs[2][i] = (FTYPE)sl2[0]; - buffs[3][i] = (FTYPE)sl3[0]; - buffs[4][i] = (FTYPE)sl4[0]; - buffs[5][i] = (FTYPE)sl5[0]; - buffs[6][i] = (FTYPE)sl6[0]; - } - -#ifdef __SUNPRO_C -#pragma pipeloop(0) -#endif /* __SUNPRO_C */ - for (i = 0; i < swid; i++) { - buffs[0][i + dx_l] = (FTYPE)sl[i*chan1]; - buffs[1][i + dx_l] = (FTYPE)sl1[i*chan1]; - buffs[2][i + dx_l] = (FTYPE)sl2[i*chan1]; - buffs[3][i + dx_l] = (FTYPE)sl3[i*chan1]; - buffs[4][i + dx_l] = (FTYPE)sl4[i*chan1]; - buffs[5][i + dx_l] = (FTYPE)sl5[i*chan1]; - buffs[6][i + dx_l] = (FTYPE)sl6[i*chan1]; - } - - for (i = 0; i < dx_r; i++) { - buffs[0][swid + dx_l + i] = buffs[0][swid + dx_l - 1]; - buffs[1][swid + dx_l + i] = buffs[1][swid + dx_l - 1]; - buffs[2][swid + dx_l + i] = buffs[2][swid + dx_l - 1]; - buffs[3][swid + dx_l + i] = buffs[3][swid + dx_l - 1]; - buffs[4][swid + dx_l + i] = buffs[4][swid + dx_l - 1]; - buffs[5][swid + dx_l + i] = buffs[5][swid + dx_l - 1]; - buffs[6][swid + dx_l + i] = buffs[6][swid + dx_l - 1]; - } - - buff_ind = 0; - -#ifdef __SUNPRO_C -#pragma pipeloop(0) -#endif /* __SUNPRO_C */ - for (i = 0; i < wid; i++) buffd[i] = 0.0; - - if ((hgt - dy_b) > 1) sl = sl6 + sll; - else sl = sl6; - - for (j = 0; j < hgt; j++) { - FTYPE **buffc = buffs + buff_ind; - FTYPE *buffn = buffc[KSIZE]; - FTYPE *pk = k; - - for (l = 0; l < KSIZE; l++) { - FTYPE *buff = buffc[l]; - d64_2x32 dd; - - sp = sl; - dp = dl; - - p2 = buff[0]; p3 = buff[1]; p4 = buff[2]; - p5 = buff[3]; p6 = buff[4]; p7 = buff[5]; - - k0 = *pk++; k1 = *pk++; k2 = *pk++; k3 = *pk++; - k4 = *pk++; k5 = *pk++; k6 = *pk++; - - if (l < (KSIZE - 1)) { -#ifdef __SUNPRO_C -#pragma pipeloop(0) -#endif /* __SUNPRO_C */ - for (i = 0; i <= (wid - 2); i += 2) { - p0 = p2; p1 = p3; p2 = p4; p3 = p5; p4 = p6; p5 = p7; - - p6 = buff[i + 6]; p7 = buff[i + 7]; - - buffd[i ] += p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + p5*k5 + p6*k6; - buffd[i + 1] += p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + p6*k5 + p7*k6; - } - - } else { -#ifdef __SUNPRO_C -#pragma pipeloop(0) -#endif /* __SUNPRO_C */ - for (i = 0; i <= (wid - 2); i += 2) { - p0 = p2; p1 = p3; p2 = p4; p3 = p5; p4 = p6; p5 = p7; - - p6 = buff[i + 6]; p7 = buff[i + 7]; - - LOAD_BUFF(buffi); - - dd.d64 = *(FTYPE *)(buffi + i); - buffn[i + dx_l ] = (FTYPE)dd.i32s.i0; - buffn[i + dx_l + 1] = (FTYPE)dd.i32s.i1; - - d0 = D2I(p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + p5*k5 + p6*k6 + buffd[i ]); - d1 = D2I(p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + p6*k5 + p7*k6 + buffd[i + 1]); - - dp[0 ] = FROM_S32(d0); - dp[chan1] = FROM_S32(d1); - - buffd[i ] = 0.0; - buffd[i + 1] = 0.0; - - sp += chan2; - dp += chan2; - } - } - } - - /* last pixels */ - for (; i < wid; i++) { - FTYPE *pk = k, s = 0; - mlib_s32 d0; - - for (l = 0; l < KSIZE; l++) { - FTYPE *buff = buffc[l] + i; - - for (m = 0; m < KSIZE; m++) s += buff[m] * (*pk++); - } - - d0 = D2I(s); - dp[0] = FROM_S32(d0); - - buffn[i + dx_l] = (FTYPE)sp[0]; - - sp += chan1; - dp += chan1; - } - - for (; i < swid; i++) { - buffn[i + dx_l] = (FTYPE)sp[0]; - sp += chan1; - } - - for (i = 0; i < dx_l; i++) buffn[i] = buffn[dx_l]; - for (i = 0; i < dx_r; i++) buffn[swid + dx_l + i] = buffn[swid + dx_l - 1]; - - /* next line */ - - if (j < hgt - dy_b - 2) sl += sll; - dl += dll; - - buff_ind++; - - if (buff_ind >= KSIZE + 1) buff_ind = 0; - } - } - - if (pbuff != buff) mlib_free(pbuff); - - return MLIB_SUCCESS; -} - -#endif /* IMG_TYPE == 1 */ - -/***************************************************************/ #define MAX_KER 7 #define MAX_N 15 #define BUFF_SIZE 1600 diff -r 3e22d8fd4912 -r acea5f7d354b jdk/src/java.desktop/share/native/libmlib_image/mlib_ImageConv_16nw.c --- a/jdk/src/java.desktop/share/native/libmlib_image/mlib_ImageConv_16nw.c Thu May 12 11:03:07 2016 -0700 +++ b/jdk/src/java.desktop/share/native/libmlib_image/mlib_ImageConv_16nw.c Fri May 13 11:31:05 2016 +0300 @@ -144,9 +144,6 @@ } d64_2x32; /***************************************************************/ -#define BUFF_LINE 256 - -/***************************************************************/ #define DEF_VARS(type) \ type *adr_src, *sl, *sp = NULL; \ type *adr_dst, *dl, *dp = NULL; \ @@ -156,39 +153,6 @@ mlib_s32 i, j, c /***************************************************************/ -#define LOAD_KERNEL3() \ - FTYPE scalef = DSCALE; \ - FTYPE k0, k1, k2, k3, k4, k5, k6, k7, k8; \ - FTYPE p00, p01, p02, p03, \ - p10, p11, p12, p13, \ - p20, p21, p22, p23; \ - \ - while (scalef_expon > 30) { \ - scalef /= (1 << 30); \ - scalef_expon -= 30; \ - } \ - \ - scalef /= (1 << scalef_expon); \ - \ - /* keep kernel in regs */ \ - k0 = scalef * kern[0]; k1 = scalef * kern[1]; k2 = scalef * kern[2]; \ - k3 = scalef * kern[3]; k4 = scalef * kern[4]; k5 = scalef * kern[5]; \ - k6 = scalef * kern[6]; k7 = scalef * kern[7]; k8 = scalef * kern[8] - -/***************************************************************/ -#define LOAD_KERNEL(SIZE) \ - FTYPE scalef = DSCALE; \ - \ - while (scalef_expon > 30) { \ - scalef /= (1 << 30); \ - scalef_expon -= 30; \ - } \ - \ - scalef /= (1 << scalef_expon); \ - \ - for (j = 0; j < SIZE; j++) k[j] = scalef * kern[j] - -/***************************************************************/ #define GET_SRC_DST_PARAMETERS(type) \ hgt = mlib_ImageGetHeight(src); \ wid = mlib_ImageGetWidth(src); \ @@ -246,1162 +210,6 @@ #endif /* __sparc */ /***************************************************************/ -#define KSIZE 3 - -mlib_status CONV_FUNC(3x3)(mlib_image *dst, - const mlib_image *src, - const mlib_s32 *kern, - mlib_s32 scalef_expon, - mlib_s32 cmask) -{ - FTYPE buff[(KSIZE + 2)*BUFF_LINE], *buff0, *buff1, *buff2, *buff3, *buffT; - DEF_VARS(DTYPE); - DTYPE *sl1; - mlib_s32 chan2; - mlib_s32 *buffo, *buffi; - DTYPE *sl2; -#ifndef __sparc - mlib_s32 d0, d1; -#endif /* __sparc */ - LOAD_KERNEL3(); - GET_SRC_DST_PARAMETERS(DTYPE); - - if (wid > BUFF_LINE) { - pbuff = mlib_malloc((KSIZE + 2)*sizeof(FTYPE)*wid); - - if (pbuff == NULL) return MLIB_FAILURE; - } - - buff0 = pbuff; - buff1 = buff0 + wid; - buff2 = buff1 + wid; - buff3 = buff2 + wid; - buffo = (mlib_s32*)(buff3 + wid); - buffi = buffo + (wid &~ 1); - - chan1 = nchannel; - chan2 = chan1 + chan1; - - wid -= (KSIZE - 1); - hgt -= (KSIZE - 1); - - adr_dst += ((KSIZE - 1)/2)*(dll + chan1); - - for (c = 0; c < nchannel; c++) { - if (!(cmask & (1 << (nchannel - 1 - c)))) continue; - - sl = adr_src + c; - dl = adr_dst + c; - - sl1 = sl + sll; - sl2 = sl1 + sll; -#ifdef __SUNPRO_C -#pragma pipeloop(0) -#endif /* __SUNPRO_C */ - for (i = 0; i < wid + (KSIZE - 1); i++) { - buff0[i] = (FTYPE)sl[i*chan1]; - buff1[i] = (FTYPE)sl1[i*chan1]; - buff2[i] = (FTYPE)sl2[i*chan1]; - } - - sl += KSIZE*sll; - - for (j = 0; j < hgt; j++) { - FTYPE s0, s1; - - p02 = buff0[0]; - p12 = buff1[0]; - p22 = buff2[0]; - - p03 = buff0[1]; - p13 = buff1[1]; - p23 = buff2[1]; - - s0 = p02 * k0 + p03 * k1 + p12 * k3 + p13 * k4 + p22 * k6 + p23 * k7; - s1 = p03 * k0 + p13 * k3 + p23 * k6; - - sp = sl; - dp = dl; - -#ifdef __SUNPRO_C -#pragma pipeloop(0) -#endif /* __SUNPRO_C */ - for (i = 0; i <= (wid - 2); i += 2) { -#ifdef __sparc -#ifdef _NO_LONGLONG - mlib_s32 o64_1, o64_2; -#else /* _NO_LONGLONG */ - mlib_s64 o64; -#endif /* _NO_LONGLONG */ -#endif /* __sparc */ - d64_2x32 dd; - - p02 = buff0[i + 2]; p12 = buff1[i + 2]; p22 = buff2[i + 2]; - p03 = buff0[i + 3]; p13 = buff1[i + 3]; p23 = buff2[i + 3]; - - LOAD_BUFF(buffi); - - dd.d64 = *(FTYPE *)(buffi + i); - buff3[i ] = (FTYPE)dd.i32s.i0; - buff3[i + 1] = (FTYPE)dd.i32s.i1; - -#ifndef __sparc - d0 = D2I(s0 + p02 * k2 + p12 * k5 + p22 * k8); - d1 = D2I(s1 + p02 * k1 + p03 * k2 + p12 * k4 + p13 * k5 + p22 * k7 + p23 * k8); - - s0 = p02 * k0 + p03 * k1 + p12 * k3 + p13 * k4 + p22 * k6 + p23 * k7; - s1 = p03 * k0 + p13 * k3 + p23 * k6; - - dp[0 ] = FROM_S32(d0); - dp[chan1] = FROM_S32(d1); - -#else /* __sparc */ - - dd.i32s.i0 = D2I(s0 + p02 * k2 + p12 * k5 + p22 * k8); - dd.i32s.i1 = D2I(s1 + p02 * k1 + p03 * k2 + p12 * k4 + p13 * k5 + p22 * k7 + p23 * k8); - *(FTYPE *)(buffo + i) = dd.d64; - - s0 = p02 * k0 + p03 * k1 + p12 * k3 + p13 * k4 + p22 * k6 + p23 * k7; - s1 = p03 * k0 + p13 * k3 + p23 * k6; - -#ifdef _NO_LONGLONG - - o64_1 = buffo[i]; - o64_2 = buffo[i+1]; -#if IMG_TYPE != 1 - STORE2(FROM_S32(o64_1), FROM_S32(o64_2)); -#else - STORE2(o64_1 >> 24, o64_2 >> 24); -#endif /* IMG_TYPE != 1 */ - -#else /* _NO_LONGLONG */ - - o64 = *(mlib_s64*)(buffo + i); -#if IMG_TYPE != 1 - STORE2(FROM_S32(o64 >> 32), FROM_S32(o64)); -#else - STORE2(o64 >> 56, o64 >> 24); -#endif /* IMG_TYPE != 1 */ -#endif /* _NO_LONGLONG */ -#endif /* __sparc */ - - sp += chan2; - dp += chan2; - } - - for (; i < wid; i++) { - p00 = buff0[i]; p10 = buff1[i]; p20 = buff2[i]; - p01 = buff0[i + 1]; p11 = buff1[i + 1]; p21 = buff2[i + 1]; - p02 = buff0[i + 2]; p12 = buff1[i + 2]; p22 = buff2[i + 2]; - - buffi[i] = (mlib_s32)sp[0]; - buff3[i] = (FTYPE)buffi[i]; - -#ifndef __sparc - - d0 = D2I(p00 * k0 + p01 * k1 + p02 * k2 + p10 * k3 + p11 * k4 + - p12 * k5 + p20 * k6 + p21 * k7 + p22 * k8); - - dp[0] = FROM_S32(d0); - -#else /* __sparc */ - - buffo[i] = D2I(p00 * k0 + p01 * k1 + p02 * k2 + p10 * k3 + p11 * k4 + - p12 * k5 + p20 * k6 + p21 * k7 + p22 * k8); -#if IMG_TYPE != 1 - dp[0] = FROM_S32(buffo[i]); -#else - dp[0] = buffo[i] >> 24; -#endif /* IMG_TYPE != 1 */ -#endif /* __sparc */ - - sp += chan1; - dp += chan1; - } - - buffi[wid] = (mlib_s32)sp[0]; - buff3[wid] = (FTYPE)buffi[wid]; - buffi[wid + 1] = (mlib_s32)sp[chan1]; - buff3[wid + 1] = (FTYPE)buffi[wid + 1]; - - sl += sll; - dl += dll; - - buffT = buff0; - buff0 = buff1; - buff1 = buff2; - buff2 = buff3; - buff3 = buffT; - } - } - -#ifdef __sparc -#if IMG_TYPE == 1 - { - mlib_s32 amask = (1 << nchannel) - 1; - - if ((cmask & amask) != amask) { - mlib_ImageXor80(adr_dst, wid, hgt, dll, nchannel, cmask); - } else { - mlib_ImageXor80_aa(adr_dst, wid*nchannel, hgt, dll); - } - } - -#endif /* IMG_TYPE == 1 */ -#endif /* __sparc */ - - if (pbuff != buff) mlib_free(pbuff); - - return MLIB_SUCCESS; -} - -/***************************************************************/ -#ifndef __sparc /* for x86, using integer multiplies is faster */ - -mlib_status CONV_FUNC_I(3x3)(mlib_image *dst, - const mlib_image *src, - const mlib_s32 *kern, - mlib_s32 scalef_expon, - mlib_s32 cmask) -{ - DTYPE *adr_src, *sl, *sp0, *sp1, *sp2; - DTYPE *adr_dst, *dl, *dp; - mlib_s32 wid, hgt, sll, dll; - mlib_s32 nchannel, chan1, chan2; - mlib_s32 i, j, c; - mlib_s32 shift1, shift2; - mlib_s32 k0, k1, k2, k3, k4, k5, k6, k7, k8; - mlib_s32 p02, p03, - p12, p13, - p22, p23; - -#if IMG_TYPE != 1 - shift1 = 16; -#else - shift1 = 8; -#endif /* IMG_TYPE != 1 */ - - shift2 = scalef_expon - shift1; - - /* keep kernel in regs */ - k0 = kern[0] >> shift1; k1 = kern[1] >> shift1; k2 = kern[2] >> shift1; - k3 = kern[3] >> shift1; k4 = kern[4] >> shift1; k5 = kern[5] >> shift1; - k6 = kern[6] >> shift1; k7 = kern[7] >> shift1; k8 = kern[8] >> shift1; - - GET_SRC_DST_PARAMETERS(DTYPE); - - chan1 = nchannel; - chan2 = chan1 + chan1; - - wid -= (KSIZE - 1); - hgt -= (KSIZE - 1); - - adr_dst += ((KSIZE - 1)/2)*(dll + chan1); - - for (c = 0; c < chan1; c++) { - if (!(cmask & (1 << (chan1 - 1 - c)))) continue; - - sl = adr_src + c; - dl = adr_dst + c; - - for (j = 0; j < hgt; j++) { - mlib_s32 s0, s1; - mlib_s32 pix0, pix1; - - dp = dl; - sp0 = sl; - sp1 = sp0 + sll; - sp2 = sp1 + sll; - - p02 = sp0[0]; - p12 = sp1[0]; - p22 = sp2[0]; - - p03 = sp0[chan1]; - p13 = sp1[chan1]; - p23 = sp2[chan1]; - - s0 = p02 * k0 + p03 * k1 + p12 * k3 + p13 * k4 + p22 * k6 + p23 * k7; - s1 = p03 * k0 + p13 * k3 + p23 * k6; - - sp0 += chan2; - sp1 += chan2; - sp2 += chan2; - -#ifdef __SUNPRO_C -#pragma pipeloop(0) -#endif /* __SUNPRO_C */ - for (i = 0; i <= (wid - 2); i += 2) { - p02 = sp0[0]; p12 = sp1[0]; p22 = sp2[0]; - p03 = sp0[chan1]; p13 = sp1[chan1]; p23 = sp2[chan1]; - - pix0 = (s0 + p02 * k2 + p12 * k5 + p22 * k8) >> shift2; - pix1 = (s1 + p02 * k1 + p03 * k2 + p12 * k4 + - p13 * k5 + p22 * k7 + p23 * k8) >> shift2; - - CLAMP_STORE(dp[0], pix0); - CLAMP_STORE(dp[chan1], pix1); - - s0 = p02 * k0 + p03 * k1 + p12 * k3 + p13 * k4 + p22 * k6 + p23 * k7; - s1 = p03 * k0 + p13 * k3 + p23 * k6; - - sp0 += chan2; - sp1 += chan2; - sp2 += chan2; - dp += chan2; - } - - if (wid & 1) { - p02 = sp0[0]; p12 = sp1[0]; p22 = sp2[0]; - pix0 = (s0 + p02 * k2 + p12 * k5 + p22 * k8) >> shift2; - CLAMP_STORE(dp[0], pix0); - } - - sl += sll; - dl += dll; - } - } - - return MLIB_SUCCESS; -} - -#endif /* __sparc ( for x86, using integer multiplies is faster ) */ - -/***************************************************************/ -#undef KSIZE -#define KSIZE 4 - -mlib_status CONV_FUNC(4x4)(mlib_image *dst, - const mlib_image *src, - const mlib_s32 *kern, - mlib_s32 scalef_expon, - mlib_s32 cmask) -{ - FTYPE buff[(KSIZE + 3)*BUFF_LINE]; - FTYPE *buff0, *buff1, *buff2, *buff3, *buff4, *buffd, *buffT; - FTYPE k[KSIZE*KSIZE]; - mlib_s32 d0, d1; - FTYPE k0, k1, k2, k3, k4, k5, k6, k7; - FTYPE p00, p01, p02, p03, p04, - p10, p11, p12, p13, p14, - p20, p21, p22, p23, - p30, p31, p32, p33; - DEF_VARS(DTYPE); - DTYPE *sl1; - mlib_s32 chan2; - mlib_s32 *buffo, *buffi; - DTYPE *sl2, *sl3; - LOAD_KERNEL(KSIZE*KSIZE); - GET_SRC_DST_PARAMETERS(DTYPE); - - if (wid > BUFF_LINE) { - pbuff = mlib_malloc((KSIZE + 3)*sizeof(FTYPE)*wid); - - if (pbuff == NULL) return MLIB_FAILURE; - } - - buff0 = pbuff; - buff1 = buff0 + wid; - buff2 = buff1 + wid; - buff3 = buff2 + wid; - buff4 = buff3 + wid; - buffd = buff4 + wid; - buffo = (mlib_s32*)(buffd + wid); - buffi = buffo + (wid &~ 1); - - chan1 = nchannel; - chan2 = chan1 + chan1; - - wid -= (KSIZE - 1); - hgt -= (KSIZE - 1); - - adr_dst += ((KSIZE - 1)/2)*(dll + chan1); - - for (c = 0; c < nchannel; c++) { - if (!(cmask & (1 << (nchannel - 1 - c)))) continue; - - sl = adr_src + c; - dl = adr_dst + c; - - sl1 = sl + sll; - sl2 = sl1 + sll; - sl3 = sl2 + sll; -#ifdef __SUNPRO_C -#pragma pipeloop(0) -#endif /* __SUNPRO_C */ - for (i = 0; i < wid + (KSIZE - 1); i++) { - buff0[i] = (FTYPE)sl[i*chan1]; - buff1[i] = (FTYPE)sl1[i*chan1]; - buff2[i] = (FTYPE)sl2[i*chan1]; - buff3[i] = (FTYPE)sl3[i*chan1]; - } - - sl += KSIZE*sll; - - for (j = 0; j < hgt; j++) { - d64_2x32 dd; - - /* - * First loop on two first lines of kernel - */ - k0 = k[0]; k1 = k[1]; k2 = k[2]; k3 = k[3]; - k4 = k[4]; k5 = k[5]; k6 = k[6]; k7 = k[7]; - - sp = sl; - dp = dl; - - p02 = buff0[0]; - p12 = buff1[0]; - p03 = buff0[1]; - p13 = buff1[1]; - p04 = buff0[2]; - -#ifdef __SUNPRO_C -#pragma pipeloop(0) -#endif /* __SUNPRO_C */ - for (i = 0; i <= (wid - 2); i += 2) { - p00 = p02; p10 = p12; - p01 = p03; p11 = p13; - p02 = p04; p12 = buff1[i + 2]; - p03 = buff0[i + 3]; p13 = buff1[i + 3]; - p04 = buff0[i + 4]; p14 = buff1[i + 4]; - - LOAD_BUFF(buffi); - - dd.d64 = *(FTYPE *)(buffi + i); - buff4[i ] = (FTYPE)dd.i32s.i0; - buff4[i + 1] = (FTYPE)dd.i32s.i1; - - buffd[i ] = (p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 + - p10 * k4 + p11 * k5 + p12 * k6 + p13 * k7); - buffd[i + 1] = (p01 * k0 + p02 * k1 + p03 * k2 + p04 * k3 + - p11 * k4 + p12 * k5 + p13 * k6 + p14 * k7); - - sp += chan2; - dp += chan2; - } - - /* - * Second loop on two last lines of kernel - */ - k0 = k[ 8]; k1 = k[ 9]; k2 = k[10]; k3 = k[11]; - k4 = k[12]; k5 = k[13]; k6 = k[14]; k7 = k[15]; - - sp = sl; - dp = dl; - - p02 = buff2[0]; - p12 = buff3[0]; - p03 = buff2[1]; - p13 = buff3[1]; - p04 = buff2[2]; - -#ifdef __SUNPRO_C -#pragma pipeloop(0) -#endif /* __SUNPRO_C */ - for (i = 0; i <= (wid - 2); i += 2) { - p00 = p02; p10 = p12; - p01 = p03; p11 = p13; - p02 = p04; p12 = buff3[i + 2]; - p03 = buff2[i + 3]; p13 = buff3[i + 3]; - p04 = buff2[i + 4]; p14 = buff3[i + 4]; - - d0 = D2I(p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 + - p10 * k4 + p11 * k5 + p12 * k6 + p13 * k7 + buffd[i]); - d1 = D2I(p01 * k0 + p02 * k1 + p03 * k2 + p04 * k3 + - p11 * k4 + p12 * k5 + p13 * k6 + p14 * k7 + buffd[i + 1]); - - dp[0 ] = FROM_S32(d0); - dp[chan1] = FROM_S32(d1); - - sp += chan2; - dp += chan2; - } - - /* last pixels */ - for (; i < wid; i++) { - p00 = buff0[i]; p10 = buff1[i]; p20 = buff2[i]; p30 = buff3[i]; - p01 = buff0[i + 1]; p11 = buff1[i + 1]; p21 = buff2[i + 1]; p31 = buff3[i + 1]; - p02 = buff0[i + 2]; p12 = buff1[i + 2]; p22 = buff2[i + 2]; p32 = buff3[i + 2]; - p03 = buff0[i + 3]; p13 = buff1[i + 3]; p23 = buff2[i + 3]; p33 = buff3[i + 3]; - - buff4[i] = (FTYPE)sp[0]; - - buffo[i] = D2I(p00 * k[0] + p01 * k[1] + p02 * k[2] + p03 * k[3] + - p10 * k[4] + p11 * k[5] + p12 * k[6] + p13 * k[7] + - p20 * k[ 8] + p21 * k[ 9] + p22 * k[10] + p23 * k[11] + - p30 * k[12] + p31 * k[13] + p32 * k[14] + p33 * k[15]); - - dp[0] = FROM_S32(buffo[i]); - - sp += chan1; - dp += chan1; - } - - buff4[wid ] = (FTYPE)sp[0]; - buff4[wid + 1] = (FTYPE)sp[chan1]; - buff4[wid + 2] = (FTYPE)sp[chan2]; - - /* next line */ - sl += sll; - dl += dll; - - buffT = buff0; - buff0 = buff1; - buff1 = buff2; - buff2 = buff3; - buff3 = buff4; - buff4 = buffT; - } - } - - if (pbuff != buff) mlib_free(pbuff); - - return MLIB_SUCCESS; -} - -/***************************************************************/ -#undef KSIZE -#define KSIZE 5 - -mlib_status CONV_FUNC(5x5)(mlib_image *dst, - const mlib_image *src, - const mlib_s32 *kern, - mlib_s32 scalef_expon, - mlib_s32 cmask) -{ - FTYPE buff[(KSIZE + 3)*BUFF_LINE]; - FTYPE *buff0, *buff1, *buff2, *buff3, *buff4, *buff5, *buffd, *buffT; - FTYPE k[KSIZE*KSIZE]; - mlib_s32 d0, d1; - FTYPE k0, k1, k2, k3, k4, k5, k6, k7, k8, k9; - FTYPE p00, p01, p02, p03, p04, p05, - p10, p11, p12, p13, p14, p15, - p20, p21, p22, p23, p24, - p30, p31, p32, p33, p34, - p40, p41, p42, p43, p44; - DEF_VARS(DTYPE); - DTYPE *sl1; - mlib_s32 chan2; - mlib_s32 *buffo, *buffi; - DTYPE *sl2, *sl3, *sl4; - LOAD_KERNEL(KSIZE*KSIZE); - GET_SRC_DST_PARAMETERS(DTYPE); - - if (wid > BUFF_LINE) { - pbuff = mlib_malloc((KSIZE + 3)*sizeof(FTYPE)*wid); - - if (pbuff == NULL) return MLIB_FAILURE; - } - - buff0 = pbuff; - buff1 = buff0 + wid; - buff2 = buff1 + wid; - buff3 = buff2 + wid; - buff4 = buff3 + wid; - buff5 = buff4 + wid; - buffd = buff5 + wid; - buffo = (mlib_s32*)(buffd + wid); - buffi = buffo + (wid &~ 1); - - chan1 = nchannel; - chan2 = chan1 + chan1; - - wid -= (KSIZE - 1); - hgt -= (KSIZE - 1); - - adr_dst += ((KSIZE - 1)/2)*(dll + chan1); - - for (c = 0; c < nchannel; c++) { - if (!(cmask & (1 << (nchannel - 1 - c)))) continue; - - sl = adr_src + c; - dl = adr_dst + c; - - sl1 = sl + sll; - sl2 = sl1 + sll; - sl3 = sl2 + sll; - sl4 = sl3 + sll; -#ifdef __SUNPRO_C -#pragma pipeloop(0) -#endif /* __SUNPRO_C */ - for (i = 0; i < wid + (KSIZE - 1); i++) { - buff0[i] = (FTYPE)sl[i*chan1]; - buff1[i] = (FTYPE)sl1[i*chan1]; - buff2[i] = (FTYPE)sl2[i*chan1]; - buff3[i] = (FTYPE)sl3[i*chan1]; - buff4[i] = (FTYPE)sl4[i*chan1]; - } - - sl += KSIZE*sll; - - for (j = 0; j < hgt; j++) { - d64_2x32 dd; - - /* - * First loop - */ - k0 = k[0]; k1 = k[1]; k2 = k[2]; k3 = k[3]; k4 = k[4]; - k5 = k[5]; k6 = k[6]; k7 = k[7]; k8 = k[8]; k9 = k[9]; - - sp = sl; - dp = dl; - - p02 = buff0[0]; - p12 = buff1[0]; - p03 = buff0[1]; - p13 = buff1[1]; - p04 = buff0[2]; - p14 = buff1[2]; - -#ifdef __SUNPRO_C -#pragma pipeloop(0) -#endif /* __SUNPRO_C */ - for (i = 0; i <= (wid - 2); i += 2) { - p00 = p02; p10 = p12; - p01 = p03; p11 = p13; - p02 = p04; p12 = p14; - - LOAD_BUFF(buffi); - - p03 = buff0[i + 3]; p13 = buff1[i + 3]; - p04 = buff0[i + 4]; p14 = buff1[i + 4]; - p05 = buff0[i + 5]; p15 = buff1[i + 5]; - - buffd[i ] = (p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 + p04 * k4 + - p10 * k5 + p11 * k6 + p12 * k7 + p13 * k8 + p14 * k9); - buffd[i + 1] = (p01 * k0 + p02 * k1 + p03 * k2 + p04 * k3 + p05 * k4 + - p11 * k5 + p12 * k6 + p13 * k7 + p14 * k8 + p15 * k9); - - sp += chan2; - dp += chan2; - } - - /* - * Second loop - */ - k0 = k[10]; k1 = k[11]; k2 = k[12]; k3 = k[13]; k4 = k[14]; - k5 = k[15]; k6 = k[16]; k7 = k[17]; k8 = k[18]; k9 = k[19]; - - sp = sl; - dp = dl; - - p02 = buff2[0]; - p12 = buff3[0]; - p03 = buff2[1]; - p13 = buff3[1]; - p04 = buff2[2]; - p14 = buff3[2]; - -#ifdef __SUNPRO_C -#pragma pipeloop(0) -#endif /* __SUNPRO_C */ - for (i = 0; i <= (wid - 2); i += 2) { - p00 = p02; p10 = p12; - p01 = p03; p11 = p13; - - p02 = buff2[i + 2]; p12 = buff3[i + 2]; - p03 = buff2[i + 3]; p13 = buff3[i + 3]; - p04 = buff2[i + 4]; p14 = buff3[i + 4]; - p05 = buff2[i + 5]; p15 = buff3[i + 5]; - - dd.d64 = *(FTYPE *)(buffi + i); - buff5[i ] = (FTYPE)dd.i32s.i0; - buff5[i + 1] = (FTYPE)dd.i32s.i1; - - buffd[i ] += (p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 + p04 * k4 + - p10 * k5 + p11 * k6 + p12 * k7 + p13 * k8 + p14 * k9); - buffd[i + 1] += (p01 * k0 + p02 * k1 + p03 * k2 + p04 * k3 + p05 * k4 + - p11 * k5 + p12 * k6 + p13 * k7 + p14 * k8 + p15 * k9); - - sp += chan2; - dp += chan2; - } - - /* - * 3 loop - */ - k0 = k[20]; k1 = k[21]; k2 = k[22]; k3 = k[23]; k4 = k[24]; - - sp = sl; - dp = dl; - - p02 = buff4[0]; - p03 = buff4[1]; - p04 = buff4[2]; - p05 = buff4[3]; - -#ifdef __SUNPRO_C -#pragma pipeloop(0) -#endif /* __SUNPRO_C */ - for (i = 0; i <= (wid - 2); i += 2) { - p00 = p02; p01 = p03; p02 = p04; p03 = p05; - - p04 = buff4[i + 4]; p05 = buff4[i + 5]; - - d0 = D2I(p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 + p04 * k4 + buffd[i]); - d1 = D2I(p01 * k0 + p02 * k1 + p03 * k2 + p04 * k3 + p05 * k4 + buffd[i + 1]); - - dp[0 ] = FROM_S32(d0); - dp[chan1] = FROM_S32(d1); - - sp += chan2; - dp += chan2; - } - - /* last pixels */ - for (; i < wid; i++) { - p00 = buff0[i]; p10 = buff1[i]; p20 = buff2[i]; p30 = buff3[i]; - p01 = buff0[i + 1]; p11 = buff1[i + 1]; p21 = buff2[i + 1]; p31 = buff3[i + 1]; - p02 = buff0[i + 2]; p12 = buff1[i + 2]; p22 = buff2[i + 2]; p32 = buff3[i + 2]; - p03 = buff0[i + 3]; p13 = buff1[i + 3]; p23 = buff2[i + 3]; p33 = buff3[i + 3]; - p04 = buff0[i + 4]; p14 = buff1[i + 4]; p24 = buff2[i + 4]; p34 = buff3[i + 4]; - - p40 = buff4[i]; p41 = buff4[i + 1]; p42 = buff4[i + 2]; - p43 = buff4[i + 3]; p44 = buff4[i + 4]; - - buff5[i] = (FTYPE)sp[0]; - - buffo[i] = D2I(p00 * k[0] + p01 * k[1] + p02 * k[2] + p03 * k[3] + p04 * k[4] + - p10 * k[5] + p11 * k[6] + p12 * k[7] + p13 * k[8] + p14 * k[9] + - p20 * k[10] + p21 * k[11] + p22 * k[12] + p23 * k[13] + p24 * k[14] + - p30 * k[15] + p31 * k[16] + p32 * k[17] + p33 * k[18] + p34 * k[19] + - p40 * k[20] + p41 * k[21] + p42 * k[22] + p43 * k[23] + p44 * k[24]); - - dp[0] = FROM_S32(buffo[i]); - - sp += chan1; - dp += chan1; - } - - buff5[wid ] = (FTYPE)sp[0]; - buff5[wid + 1] = (FTYPE)sp[chan1]; - buff5[wid + 2] = (FTYPE)sp[chan2]; - buff5[wid + 3] = (FTYPE)sp[chan2 + chan1]; - - /* next line */ - sl += sll; - dl += dll; - - buffT = buff0; - buff0 = buff1; - buff1 = buff2; - buff2 = buff3; - buff3 = buff4; - buff4 = buff5; - buff5 = buffT; - } - } - - if (pbuff != buff) mlib_free(pbuff); - - return MLIB_SUCCESS; -} - -/***************************************************************/ -#ifndef __sparc /* for x86, using integer multiplies is faster */ - -mlib_status CONV_FUNC_I(5x5)(mlib_image *dst, - const mlib_image *src, - const mlib_s32 *kern, - mlib_s32 scalef_expon, - mlib_s32 cmask) -{ - mlib_s32 buff[BUFF_LINE]; - mlib_s32 *buffd; - mlib_s32 k[KSIZE*KSIZE]; - mlib_s32 shift1, shift2; - mlib_s32 k0, k1, k2, k3, k4, k5, k6, k7, k8, k9; - mlib_s32 p00, p01, p02, p03, p04, p05, - p10, p11, p12, p13, p14, p15; - DTYPE *adr_src, *sl, *sp0, *sp1; - DTYPE *adr_dst, *dl, *dp; - mlib_s32 *pbuff = buff; - mlib_s32 wid, hgt, sll, dll; - mlib_s32 nchannel, chan1, chan2, chan3, chan4; - mlib_s32 i, j, c; - -#if IMG_TYPE != 1 - shift1 = 16; -#else - shift1 = 8; -#endif /* IMG_TYPE != 1 */ - - shift2 = scalef_expon - shift1; - - for (j = 0; j < KSIZE*KSIZE; j++) k[j] = kern[j] >> shift1; - - GET_SRC_DST_PARAMETERS(DTYPE); - - if (wid > BUFF_LINE) { - pbuff = mlib_malloc(sizeof(mlib_s32)*wid); - - if (pbuff == NULL) return MLIB_FAILURE; - } - - buffd = pbuff; - - chan1 = nchannel; - chan2 = chan1 + chan1; - chan3 = chan2 + chan1; - chan4 = chan3 + chan1; - - wid -= (KSIZE - 1); - hgt -= (KSIZE - 1); - - adr_dst += ((KSIZE - 1)/2)*(dll + chan1); - - for (c = 0; c < chan1; c++) { - if (!(cmask & (1 << (chan1 - 1 - c)))) continue; - - sl = adr_src + c; - dl = adr_dst + c; - - for (j = 0; j < hgt; j++) { - mlib_s32 pix0, pix1; - /* - * First loop - */ - sp0 = sl; - sp1 = sp0 + sll; - dp = dl; - - k0 = k[0]; k1 = k[1]; k2 = k[2]; k3 = k[3]; k4 = k[4]; - k5 = k[5]; k6 = k[6]; k7 = k[7]; k8 = k[8]; k9 = k[9]; - - p02 = sp0[0]; p12 = sp1[0]; - p03 = sp0[chan1]; p13 = sp1[chan1]; - p04 = sp0[chan2]; p14 = sp1[chan2]; - p05 = sp0[chan3]; p15 = sp1[chan3]; - - sp0 += chan4; - sp1 += chan4; - -#ifdef __SUNPRO_C -#pragma pipeloop(0) -#endif /* __SUNPRO_C */ - for (i = 0; i <= (wid - 2); i += 2) { - p00 = p02; p10 = p12; - p01 = p03; p11 = p13; - p02 = p04; p12 = p14; - p03 = p05; p13 = p15; - - p04 = sp0[0]; p14 = sp1[0]; - p05 = sp0[chan1]; p15 = sp1[chan1]; - - buffd[i ] = (p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 + p04 * k4 + - p10 * k5 + p11 * k6 + p12 * k7 + p13 * k8 + p14 * k9); - buffd[i + 1] = (p01 * k0 + p02 * k1 + p03 * k2 + p04 * k3 + p05 * k4 + - p11 * k5 + p12 * k6 + p13 * k7 + p14 * k8 + p15 * k9); - - sp0 += chan2; - sp1 += chan2; - dp += chan2; - } - - if (wid & 1) { - p00 = p02; p10 = p12; - p01 = p03; p11 = p13; - p02 = p04; p12 = p14; - p03 = p05; p13 = p15; - - p04 = sp0[0]; p14 = sp1[0]; - - buffd[i] = (p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 + p04 * k4 + - p10 * k5 + p11 * k6 + p12 * k7 + p13 * k8 + p14 * k9); - } - - /* - * Second loop - */ - sp0 = sl + 2*sll; - sp1 = sp0 + sll; - dp = dl; - - k0 = k[10]; k1 = k[11]; k2 = k[12]; k3 = k[13]; k4 = k[14]; - k5 = k[15]; k6 = k[16]; k7 = k[17]; k8 = k[18]; k9 = k[19]; - - p02 = sp0[0]; p12 = sp1[0]; - p03 = sp0[chan1]; p13 = sp1[chan1]; - p04 = sp0[chan2]; p14 = sp1[chan2]; - p05 = sp0[chan3]; p15 = sp1[chan3]; - - sp0 += chan4; - sp1 += chan4; - -#ifdef __SUNPRO_C -#pragma pipeloop(0) -#endif /* __SUNPRO_C */ - for (i = 0; i <= (wid - 2); i += 2) { - p00 = p02; p10 = p12; - p01 = p03; p11 = p13; - p02 = p04; p12 = p14; - p03 = p05; p13 = p15; - - p04 = sp0[0]; p14 = sp1[0]; - p05 = sp0[chan1]; p15 = sp1[chan1]; - - buffd[i ] += (p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 + p04 * k4 + - p10 * k5 + p11 * k6 + p12 * k7 + p13 * k8 + p14 * k9); - buffd[i + 1] += (p01 * k0 + p02 * k1 + p03 * k2 + p04 * k3 + p05 * k4 + - p11 * k5 + p12 * k6 + p13 * k7 + p14 * k8 + p15 * k9); - - sp0 += chan2; - sp1 += chan2; - dp += chan2; - } - - if (wid & 1) { - p00 = p02; p10 = p12; - p01 = p03; p11 = p13; - p02 = p04; p12 = p14; - p03 = p05; p13 = p15; - - p04 = sp0[0]; p14 = sp1[0]; - - buffd[i] += (p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 + p04 * k4 + - p10 * k5 + p11 * k6 + p12 * k7 + p13 * k8 + p14 * k9); - } - - /* - * 3 loop - */ - dp = dl; - sp0 = sl + 4*sll; - - k0 = k[20]; k1 = k[21]; k2 = k[22]; k3 = k[23]; k4 = k[24]; - - p02 = sp0[0]; - p03 = sp0[chan1]; - p04 = sp0[chan2]; - p05 = sp0[chan3]; - - sp0 += chan2 + chan2; - -#ifdef __SUNPRO_C -#pragma pipeloop(0) -#endif /* __SUNPRO_C */ - for (i = 0; i <= (wid - 2); i += 2) { - p00 = p02; p01 = p03; p02 = p04; p03 = p05; - - p04 = sp0[0]; p05 = sp0[chan1]; - - pix0 = (buffd[i ] + p00 * k0 + p01 * k1 + p02 * k2 + - p03 * k3 + p04 * k4) >> shift2; - pix1 = (buffd[i + 1] + p01 * k0 + p02 * k1 + p03 * k2 + - p04 * k3 + p05 * k4) >> shift2; - - CLAMP_STORE(dp[0], pix0); - CLAMP_STORE(dp[chan1], pix1); - - dp += chan2; - sp0 += chan2; - } - - if (wid & 1) { - p00 = p02; p01 = p03; p02 = p04; p03 = p05; - - p04 = sp0[0]; - - pix0 = (buffd[i ] + p00 * k0 + p01 * k1 + p02 * k2 + - p03 * k3 + p04 * k4) >> shift2; - CLAMP_STORE(dp[0], pix0); - } - - /* next line */ - sl += sll; - dl += dll; - } - } - - if (pbuff != buff) mlib_free(pbuff); - - return MLIB_SUCCESS; -} - -#endif /* __sparc ( for x86, using integer multiplies is faster ) */ - -/***************************************************************/ -#if IMG_TYPE == 1 - -#undef KSIZE -#define KSIZE 7 - -mlib_status CONV_FUNC(7x7)(mlib_image *dst, - const mlib_image *src, - const mlib_s32 *kern, - mlib_s32 scalef_expon, - mlib_s32 cmask) -{ - FTYPE buff[(KSIZE + 3)*BUFF_LINE], *buffs[2*(KSIZE + 1)], *buffd; - FTYPE k[KSIZE*KSIZE]; - mlib_s32 l, m, buff_ind; - mlib_s32 d0, d1; - FTYPE k0, k1, k2, k3, k4, k5, k6; - FTYPE p0, p1, p2, p3, p4, p5, p6, p7; - DTYPE *sl2, *sl3, *sl4, *sl5, *sl6; - DEF_VARS(DTYPE); - DTYPE *sl1; - mlib_s32 chan2; - mlib_s32 *buffo, *buffi; - LOAD_KERNEL(KSIZE*KSIZE); - GET_SRC_DST_PARAMETERS(DTYPE); - - if (wid > BUFF_LINE) { - pbuff = mlib_malloc((KSIZE + 3)*sizeof(FTYPE)*wid); - - if (pbuff == NULL) return MLIB_FAILURE; - } - - for (l = 0; l < KSIZE + 1; l++) buffs[l] = pbuff + l*wid; - for (l = 0; l < KSIZE + 1; l++) buffs[l + (KSIZE + 1)] = buffs[l]; - buffd = buffs[KSIZE] + wid; - buffo = (mlib_s32*)(buffd + wid); - buffi = buffo + (wid &~ 1); - - chan1 = nchannel; - chan2 = chan1 + chan1; - - wid -= (KSIZE - 1); - hgt -= (KSIZE - 1); - - adr_dst += ((KSIZE - 1)/2)*(dll + chan1); - - for (c = 0; c < nchannel; c++) { - if (!(cmask & (1 << (nchannel - 1 - c)))) continue; - - sl = adr_src + c; - dl = adr_dst + c; - - sl1 = sl + sll; - sl2 = sl1 + sll; - sl3 = sl2 + sll; - sl4 = sl3 + sll; - sl5 = sl4 + sll; - sl6 = sl5 + sll; -#ifdef __SUNPRO_C -#pragma pipeloop(0) -#endif /* __SUNPRO_C */ - for (i = 0; i < wid + (KSIZE - 1); i++) { - buffs[0][i] = (FTYPE)sl[i*chan1]; - buffs[1][i] = (FTYPE)sl1[i*chan1]; - buffs[2][i] = (FTYPE)sl2[i*chan1]; - buffs[3][i] = (FTYPE)sl3[i*chan1]; - buffs[4][i] = (FTYPE)sl4[i*chan1]; - buffs[5][i] = (FTYPE)sl5[i*chan1]; - buffs[6][i] = (FTYPE)sl6[i*chan1]; - } - - buff_ind = 0; - -#ifdef __SUNPRO_C -#pragma pipeloop(0) -#endif /* __SUNPRO_C */ - for (i = 0; i < wid; i++) buffd[i] = 0.0; - - sl += KSIZE*sll; - - for (j = 0; j < hgt; j++) { - FTYPE **buffc = buffs + buff_ind; - FTYPE *buffn = buffc[KSIZE]; - FTYPE *pk = k; - - for (l = 0; l < KSIZE; l++) { - FTYPE *buff = buffc[l]; - d64_2x32 dd; - - sp = sl; - dp = dl; - - p2 = buff[0]; p3 = buff[1]; p4 = buff[2]; - p5 = buff[3]; p6 = buff[4]; p7 = buff[5]; - - k0 = *pk++; k1 = *pk++; k2 = *pk++; k3 = *pk++; - k4 = *pk++; k5 = *pk++; k6 = *pk++; - - if (l < (KSIZE - 1)) { -#ifdef __SUNPRO_C -#pragma pipeloop(0) -#endif /* __SUNPRO_C */ - for (i = 0; i <= (wid - 2); i += 2) { - p0 = p2; p1 = p3; p2 = p4; p3 = p5; p4 = p6; p5 = p7; - - p6 = buff[i + 6]; p7 = buff[i + 7]; - - buffd[i ] += p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + p5*k5 + p6*k6; - buffd[i + 1] += p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + p6*k5 + p7*k6; - } - - } else { -#ifdef __SUNPRO_C -#pragma pipeloop(0) -#endif /* __SUNPRO_C */ - for (i = 0; i <= (wid - 2); i += 2) { - p0 = p2; p1 = p3; p2 = p4; p3 = p5; p4 = p6; p5 = p7; - - p6 = buff[i + 6]; p7 = buff[i + 7]; - - LOAD_BUFF(buffi); - - dd.d64 = *(FTYPE *)(buffi + i); - buffn[i ] = (FTYPE)dd.i32s.i0; - buffn[i + 1] = (FTYPE)dd.i32s.i1; - - d0 = D2I(p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + p5*k5 + p6*k6 + buffd[i ]); - d1 = D2I(p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + p6*k5 + p7*k6 + buffd[i + 1]); - - dp[0 ] = FROM_S32(d0); - dp[chan1] = FROM_S32(d1); - - buffd[i ] = 0.0; - buffd[i + 1] = 0.0; - - sp += chan2; - dp += chan2; - } - } - } - - /* last pixels */ - for (; i < wid; i++) { - FTYPE *pk = k, s = 0; - mlib_s32 d0; - - for (l = 0; l < KSIZE; l++) { - FTYPE *buff = buffc[l] + i; - - for (m = 0; m < KSIZE; m++) s += buff[m] * (*pk++); - } - - d0 = D2I(s); - dp[0] = FROM_S32(d0); - - buffn[i] = (FTYPE)sp[0]; - - sp += chan1; - dp += chan1; - } - - for (l = 0; l < (KSIZE - 1); l++) buffn[wid + l] = sp[l*chan1]; - - /* next line */ - sl += sll; - dl += dll; - - buff_ind++; - - if (buff_ind >= KSIZE + 1) buff_ind = 0; - } - } - - if (pbuff != buff) mlib_free(pbuff); - - return MLIB_SUCCESS; -} - -#endif /* IMG_TYPE == 1 */ - -/***************************************************************/ #define MAX_KER 7 #define MAX_N 15 diff -r 3e22d8fd4912 -r acea5f7d354b jdk/src/java.desktop/share/native/libmlib_image/mlib_ImageConv_32nw.c --- a/jdk/src/java.desktop/share/native/libmlib_image/mlib_ImageConv_32nw.c Thu May 12 11:03:07 2016 -0700 +++ b/jdk/src/java.desktop/share/native/libmlib_image/mlib_ImageConv_32nw.c Fri May 13 11:31:05 2016 +0300 @@ -35,8 +35,6 @@ #include "mlib_ImageConv.h" /***************************************************************/ -#define BUFF_LINE 256 - #define CACHE_SIZE (64*1024) /***************************************************************/ @@ -83,837 +81,6 @@ mlib_s32 i, j, c /***************************************************************/ -#define CALC_SCALE() \ - scalef = 1.0; \ - while (scalef_expon > 30) { \ - scalef /= (1 << 30); \ - scalef_expon -= 30; \ - } \ - \ - scalef /= (1 << scalef_expon) - -/***************************************************************/ -#undef KSIZE -#define KSIZE 2 - -mlib_status CONV_FUNC(2x2)(mlib_image *dst, - const mlib_image *src, - const mlib_s32 *kern, - mlib_s32 scalef_expon, - mlib_s32 cmask) -{ - mlib_d64 buff[(KSIZE + 1)*BUFF_LINE]; - mlib_d64 k0, k1, k2, k3; - mlib_d64 p00, p01, p02, p03, - p10, p11, p12, p13; - mlib_d64 d2; - DEF_VARS(mlib_s32); - mlib_s32 chan2 = chan1 + chan1; - mlib_s32 chan3 = chan1 + chan2; - - if (wid > BUFF_LINE) { - pbuff = mlib_malloc((KSIZE + 1)*sizeof(mlib_d64)*wid); - - if (pbuff == NULL) return MLIB_FAILURE; - } - - buff0 = pbuff; - buff1 = buff0 + wid; - buff2 = buff1 + wid; - - wid -= (KSIZE - 1); - hgt -= (KSIZE - 1); - - /* keep kernel in regs */ - CALC_SCALE(); - k0 = scalef * kern[0]; k1 = scalef * kern[1]; - k2 = scalef * kern[2]; k3 = scalef * kern[3]; - - for (c = 0; c < chan1; c++) { - if (!(cmask & (1 << (chan1 - 1 - c)))) continue; - - sl = adr_src + c; - dl = adr_dst + c; - - sl1 = sl + sll; -#ifdef __SUNPRO_C -#pragma pipeloop(0) -#endif /* __SUNPRO_C */ - for (i = 0; i < wid + (KSIZE - 1); i++) { - buff0[i] = (mlib_d64)sl[i*chan1]; - buff1[i] = (mlib_d64)sl1[i*chan1]; - } - - sl += KSIZE*sll; - - for (j = 0; j < hgt; j++) { - p03 = buff0[0]; - p13 = buff1[0]; - - sp = sl; - dp = dl; - -#ifdef __SUNPRO_C -#pragma pipeloop(0) -#endif /* __SUNPRO_C */ - for (i = 0; i <= (wid - 3); i += 3) { - - p00 = p03; p10 = p13; - - p01 = buff0[i + 1]; p11 = buff1[i + 1]; - p02 = buff0[i + 2]; p12 = buff1[i + 2]; - p03 = buff0[i + 3]; p13 = buff1[i + 3]; - - buff2[i ] = (mlib_d64)sp[0]; - buff2[i + 1] = (mlib_d64)sp[chan1]; - buff2[i + 2] = (mlib_d64)sp[chan2]; - - d0 = p00 * k0 + p01 * k1 + p10 * k2 + p11 * k3; - d1 = p01 * k0 + p02 * k1 + p11 * k2 + p12 * k3; - d2 = p02 * k0 + p03 * k1 + p12 * k2 + p13 * k3; - - CLAMP_S32(dp[0 ], d0); - CLAMP_S32(dp[chan1], d1); - CLAMP_S32(dp[chan2], d2); - - sp += chan3; - dp += chan3; - } - - for (; i < wid; i++) { - p00 = buff0[i]; p10 = buff1[i]; - p01 = buff0[i + 1]; p11 = buff1[i + 1]; - - buff2[i] = (mlib_d64)sp[0]; - - d0 = p00 * k0 + p01 * k1 + p10 * k2 + p11 * k3; - CLAMP_S32(dp[0], d0); - - sp += chan1; - dp += chan1; - } - - buff2[wid] = (mlib_d64)sp[0]; - - sl += sll; - dl += dll; - - buffT = buff0; - buff0 = buff1; - buff1 = buff2; - buff2 = buffT; - } - } - - if (pbuff != buff) mlib_free(pbuff); - - return MLIB_SUCCESS; -} - -/***************************************************************/ -#undef KSIZE -#define KSIZE 3 - -mlib_status CONV_FUNC(3x3)(mlib_image *dst, - const mlib_image *src, - const mlib_s32 *kern, - mlib_s32 scalef_expon, - mlib_s32 cmask) -{ - mlib_d64 buff[(KSIZE + 1)*BUFF_LINE], *buff3; - mlib_d64 k0, k1, k2, k3, k4, k5, k6, k7, k8; - mlib_d64 p00, p01, p02, p03, - p10, p11, p12, p13, - p20, p21, p22, p23; - mlib_s32 *sl2; - DEF_VARS(mlib_s32); - mlib_s32 chan2 = chan1 + chan1; - - if (wid > BUFF_LINE) { - pbuff = mlib_malloc((KSIZE + 1)*sizeof(mlib_d64)*wid); - - if (pbuff == NULL) return MLIB_FAILURE; - } - - buff0 = pbuff; - buff1 = buff0 + wid; - buff2 = buff1 + wid; - buff3 = buff2 + wid; - - wid -= (KSIZE - 1); - hgt -= (KSIZE - 1); - - adr_dst += ((KSIZE - 1)/2)*(dll + chan1); - - CALC_SCALE(); - k0 = scalef * kern[0]; k1 = scalef * kern[1]; k2 = scalef * kern[2]; - k3 = scalef * kern[3]; k4 = scalef * kern[4]; k5 = scalef * kern[5]; - k6 = scalef * kern[6]; k7 = scalef * kern[7]; k8 = scalef * kern[8]; - - for (c = 0; c < chan1; c++) { - if (!(cmask & (1 << (chan1 - 1 - c)))) continue; - - sl = adr_src + c; - dl = adr_dst + c; - - sl1 = sl + sll; - sl2 = sl1 + sll; -#ifdef __SUNPRO_C -#pragma pipeloop(0) -#endif /* __SUNPRO_C */ - for (i = 0; i < wid + (KSIZE - 1); i++) { - buff0[i] = (mlib_d64)sl[i*chan1]; - buff1[i] = (mlib_d64)sl1[i*chan1]; - buff2[i] = (mlib_d64)sl2[i*chan1]; - } - - sl += KSIZE*sll; - - for (j = 0; j < hgt; j++) { - mlib_d64 s0, s1; - - p02 = buff0[0]; - p12 = buff1[0]; - p22 = buff2[0]; - - p03 = buff0[1]; - p13 = buff1[1]; - p23 = buff2[1]; - - sp = sl; - dp = dl; - - s0 = p02 * k0 + p03 * k1 + p12 * k3 + p13 * k4 + p22 * k6 + p23 * k7; - s1 = p03 * k0 + p13 * k3 + p23 * k6; - -#ifdef __SUNPRO_C -#pragma pipeloop(0) -#endif /* __SUNPRO_C */ - for (i = 0; i <= (wid - 2); i += 2) { - p02 = buff0[i + 2]; p12 = buff1[i + 2]; p22 = buff2[i + 2]; - p03 = buff0[i + 3]; p13 = buff1[i + 3]; p23 = buff2[i + 3]; - - buff3[i ] = (mlib_d64)sp[0]; - buff3[i + 1] = (mlib_d64)sp[chan1]; - - d0 = s0 + p02 * k2 + p12 * k5 + p22 * k8; - d1 = s1 + p02 * k1 + p03 * k2 + p12 * k4 + p13 * k5 + p22 * k7 + p23 * k8; - - CLAMP_S32(dp[0 ], d0); - CLAMP_S32(dp[chan1], d1); - - s0 = p02 * k0 + p03 * k1 + p12 * k3 + p13 * k4 + p22 * k6 + p23 * k7; - s1 = p03 * k0 + p13 * k3 + p23 * k6; - - sp += chan2; - dp += chan2; - } - - for (; i < wid; i++) { - p00 = buff0[i]; p10 = buff1[i]; p20 = buff2[i]; - p01 = buff0[i + 1]; p11 = buff1[i + 1]; p21 = buff2[i + 1]; - p02 = buff0[i + 2]; p12 = buff1[i + 2]; p22 = buff2[i + 2]; - - buff3[i] = (mlib_d64)sp[0]; - - d0 = (p00 * k0 + p01 * k1 + p02 * k2 + p10 * k3 + p11 * k4 + - p12 * k5 + p20 * k6 + p21 * k7 + p22 * k8); - - CLAMP_S32(dp[0], d0); - - sp += chan1; - dp += chan1; - } - - buff3[wid ] = (mlib_d64)sp[0]; - buff3[wid + 1] = (mlib_d64)sp[chan1]; - - sl += sll; - dl += dll; - - buffT = buff0; - buff0 = buff1; - buff1 = buff2; - buff2 = buff3; - buff3 = buffT; - } - } - - if (pbuff != buff) mlib_free(pbuff); - - return MLIB_SUCCESS; -} - -/***************************************************************/ -#undef KSIZE -#define KSIZE 4 - -mlib_status CONV_FUNC(4x4)(mlib_image *dst, - const mlib_image *src, - const mlib_s32 *kern, - mlib_s32 scalef_expon, - mlib_s32 cmask) -{ - mlib_d64 buff[(KSIZE + 2)*BUFF_LINE], *buff3, *buff4, *buff5; - mlib_d64 k[KSIZE*KSIZE]; - mlib_d64 k0, k1, k2, k3, k4, k5, k6, k7; - mlib_d64 p00, p01, p02, p03, p04, - p10, p11, p12, p13, p14, - p20, p21, p22, p23, - p30, p31, p32, p33; - mlib_s32 *sl2, *sl3; - DEF_VARS(mlib_s32); - mlib_s32 chan2 = chan1 + chan1; - - if (wid > BUFF_LINE) { - pbuff = mlib_malloc((KSIZE + 2)*sizeof(mlib_d64)*wid); - - if (pbuff == NULL) return MLIB_FAILURE; - } - - buff0 = pbuff; - buff1 = buff0 + wid; - buff2 = buff1 + wid; - buff3 = buff2 + wid; - buff4 = buff3 + wid; - buff5 = buff4 + wid; - - wid -= (KSIZE - 1); - hgt -= (KSIZE - 1); - - adr_dst += ((KSIZE - 1)/2)*(dll + chan1); - - CALC_SCALE(); - for (j = 0; j < 16; j++) k[j] = scalef * kern[j]; - - for (c = 0; c < chan1; c++) { - if (!(cmask & (1 << (chan1 - 1 - c)))) continue; - - sl = adr_src + c; - dl = adr_dst + c; - - sl1 = sl + sll; - sl2 = sl1 + sll; - sl3 = sl2 + sll; -#ifdef __SUNPRO_C -#pragma pipeloop(0) -#endif /* __SUNPRO_C */ - for (i = 0; i < wid + (KSIZE - 1); i++) { - buff0[i] = (mlib_d64)sl[i*chan1]; - buff1[i] = (mlib_d64)sl1[i*chan1]; - buff2[i] = (mlib_d64)sl2[i*chan1]; - buff3[i] = (mlib_d64)sl3[i*chan1]; - } - - sl += KSIZE*sll; - - for (j = 0; j < hgt; j++) { - /* - * First loop on two first lines of kernel - */ - k0 = k[0]; k1 = k[1]; k2 = k[2]; k3 = k[3]; - k4 = k[4]; k5 = k[5]; k6 = k[6]; k7 = k[7]; - - sp = sl; - dp = dl; - - p02 = buff0[0]; - p12 = buff1[0]; - p03 = buff0[1]; - p13 = buff1[1]; - p04 = buff0[2]; - -#ifdef __SUNPRO_C -#pragma pipeloop(0) -#endif /* __SUNPRO_C */ - for (i = 0; i <= (wid - 2); i += 2) { - p00 = p02; p10 = p12; - p01 = p03; p11 = p13; - p02 = p04; p12 = buff1[i + 2]; - p03 = buff0[i + 3]; p13 = buff1[i + 3]; - p04 = buff0[i + 4]; p14 = buff1[i + 4]; - - buff4[i] = (mlib_d64)sp[0]; - buff4[i + 1] = (mlib_d64)sp[chan1]; - - buff5[i ] = (p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 + - p10 * k4 + p11 * k5 + p12 * k6 + p13 * k7); - buff5[i + 1] = (p01 * k0 + p02 * k1 + p03 * k2 + p04 * k3 + - p11 * k4 + p12 * k5 + p13 * k6 + p14 * k7); - - sp += chan2; - dp += chan2; - } - - /* - * Second loop on two last lines of kernel - */ - k0 = k[ 8]; k1 = k[ 9]; k2 = k[10]; k3 = k[11]; - k4 = k[12]; k5 = k[13]; k6 = k[14]; k7 = k[15]; - - sp = sl; - dp = dl; - - p02 = buff2[0]; - p12 = buff3[0]; - p03 = buff2[1]; - p13 = buff3[1]; - p04 = buff2[2]; - -#ifdef __SUNPRO_C -#pragma pipeloop(0) -#endif /* __SUNPRO_C */ - for (i = 0; i <= (wid - 2); i += 2) { - p00 = p02; p10 = p12; - p01 = p03; p11 = p13; - p02 = p04; p12 = buff3[i + 2]; - p03 = buff2[i + 3]; p13 = buff3[i + 3]; - p04 = buff2[i + 4]; p14 = buff3[i + 4]; - - d0 = (p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 + - p10 * k4 + p11 * k5 + p12 * k6 + p13 * k7 + buff5[i]); - d1 = (p01 * k0 + p02 * k1 + p03 * k2 + p04 * k3 + - p11 * k4 + p12 * k5 + p13 * k6 + p14 * k7 + buff5[i + 1]); - - CLAMP_S32(dp[0 ], d0); - CLAMP_S32(dp[chan1], d1); - - sp += chan2; - dp += chan2; - } - - /* last pixels */ - for (; i < wid; i++) { - p00 = buff0[i]; p10 = buff1[i]; p20 = buff2[i]; p30 = buff3[i]; - p01 = buff0[i + 1]; p11 = buff1[i + 1]; p21 = buff2[i + 1]; p31 = buff3[i + 1]; - p02 = buff0[i + 2]; p12 = buff1[i + 2]; p22 = buff2[i + 2]; p32 = buff3[i + 2]; - p03 = buff0[i + 3]; p13 = buff1[i + 3]; p23 = buff2[i + 3]; p33 = buff3[i + 3]; - - buff4[i] = (mlib_d64)sp[0]; - - d0 = (p00 * k[0] + p01 * k[1] + p02 * k[2] + p03 * k[3] + - p10 * k[4] + p11 * k[5] + p12 * k[6] + p13 * k[7] + - p20 * k[ 8] + p21 * k[ 9] + p22 * k[10] + p23 * k[11] + - p30 * k[12] + p31 * k[13] + p32 * k[14] + p33 * k[15]); - - CLAMP_S32(dp[0], d0); - - sp += chan1; - dp += chan1; - } - - buff4[wid ] = (mlib_d64)sp[0]; - buff4[wid + 1] = (mlib_d64)sp[chan1]; - buff4[wid + 2] = (mlib_d64)sp[chan2]; - - /* next line */ - sl += sll; - dl += dll; - - buffT = buff0; - buff0 = buff1; - buff1 = buff2; - buff2 = buff3; - buff3 = buff4; - buff4 = buffT; - } - } - - if (pbuff != buff) mlib_free(pbuff); - - return MLIB_SUCCESS; -} - -/***************************************************************/ -#undef KSIZE -#define KSIZE 5 - -mlib_status CONV_FUNC(5x5)(mlib_image *dst, - const mlib_image *src, - const mlib_s32 *kern, - mlib_s32 scalef_expon, - mlib_s32 cmask) -{ - mlib_d64 buff[(KSIZE + 2)*BUFF_LINE], *buff3, *buff4, *buff5, *buff6; - mlib_d64 k[KSIZE*KSIZE]; - mlib_d64 k0, k1, k2, k3, k4, k5, k6, k7, k8, k9; - mlib_d64 p00, p01, p02, p03, p04, p05, - p10, p11, p12, p13, p14, p15, - p20, p21, p22, p23, p24, - p30, p31, p32, p33, p34, - p40, p41, p42, p43, p44; - mlib_s32 *sl2, *sl3, *sl4; - DEF_VARS(mlib_s32); - mlib_s32 chan2 = chan1 + chan1; - mlib_s32 chan3 = chan1 + chan2; - - if (wid > BUFF_LINE) { - pbuff = mlib_malloc((KSIZE + 2)*sizeof(mlib_d64)*wid); - - if (pbuff == NULL) return MLIB_FAILURE; - } - - buff0 = pbuff; - buff1 = buff0 + wid; - buff2 = buff1 + wid; - buff3 = buff2 + wid; - buff4 = buff3 + wid; - buff5 = buff4 + wid; - buff6 = buff5 + wid; - - wid -= (KSIZE - 1); - hgt -= (KSIZE - 1); - - adr_dst += ((KSIZE - 1)/2)*(dll + chan1); - - CALC_SCALE(); - for (j = 0; j < 25; j++) k[j] = scalef * kern[j]; - - for (c = 0; c < chan1; c++) { - if (!(cmask & (1 << (chan1 - 1 - c)))) continue; - - sl = adr_src + c; - dl = adr_dst + c; - - sl1 = sl + sll; - sl2 = sl1 + sll; - sl3 = sl2 + sll; - sl4 = sl3 + sll; -#ifdef __SUNPRO_C -#pragma pipeloop(0) -#endif /* __SUNPRO_C */ - for (i = 0; i < wid + (KSIZE - 1); i++) { - buff0[i] = (mlib_d64)sl[i*chan1]; - buff1[i] = (mlib_d64)sl1[i*chan1]; - buff2[i] = (mlib_d64)sl2[i*chan1]; - buff3[i] = (mlib_d64)sl3[i*chan1]; - buff4[i] = (mlib_d64)sl4[i*chan1]; - } - - sl += KSIZE*sll; - - for (j = 0; j < hgt; j++) { - /* - * First loop - */ - k0 = k[0]; k1 = k[1]; k2 = k[2]; k3 = k[3]; k4 = k[4]; - k5 = k[5]; k6 = k[6]; k7 = k[7]; k8 = k[8]; k9 = k[9]; - - sp = sl; - dp = dl; - - p02 = buff0[0]; - p12 = buff1[0]; - p03 = buff0[1]; - p13 = buff1[1]; - p04 = buff0[2]; - p14 = buff1[2]; - -#ifdef __SUNPRO_C -#pragma pipeloop(0) -#endif /* __SUNPRO_C */ - for (i = 0; i <= (wid - 2); i += 2) { - p00 = p02; p10 = p12; - p01 = p03; p11 = p13; - p02 = p04; p12 = p14; - - p03 = buff0[i + 3]; p13 = buff1[i + 3]; - p04 = buff0[i + 4]; p14 = buff1[i + 4]; - p05 = buff0[i + 5]; p15 = buff1[i + 5]; - - buff6[i ] = (p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 + p04 * k4 + - p10 * k5 + p11 * k6 + p12 * k7 + p13 * k8 + p14 * k9); - buff6[i + 1] = (p01 * k0 + p02 * k1 + p03 * k2 + p04 * k3 + p05 * k4 + - p11 * k5 + p12 * k6 + p13 * k7 + p14 * k8 + p15 * k9); - - sp += chan2; - dp += chan2; - } - - /* - * Second loop - */ - k0 = k[10]; k1 = k[11]; k2 = k[12]; k3 = k[13]; k4 = k[14]; - k5 = k[15]; k6 = k[16]; k7 = k[17]; k8 = k[18]; k9 = k[19]; - - sp = sl; - dp = dl; - - p02 = buff2[0]; - p12 = buff3[0]; - p03 = buff2[1]; - p13 = buff3[1]; - -#ifdef __SUNPRO_C -#pragma pipeloop(0) -#endif /* __SUNPRO_C */ - for (i = 0; i <= (wid - 2); i += 2) { - p00 = p02; p10 = p12; - p01 = p03; p11 = p13; - - p02 = buff2[i + 2]; p12 = buff3[i + 2]; - p03 = buff2[i + 3]; p13 = buff3[i + 3]; - p04 = buff2[i + 4]; p14 = buff3[i + 4]; - p05 = buff2[i + 5]; p15 = buff3[i + 5]; - - buff6[i ] += (p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 + p04 * k4 + - p10 * k5 + p11 * k6 + p12 * k7 + p13 * k8 + p14 * k9); - buff6[i + 1] += (p01 * k0 + p02 * k1 + p03 * k2 + p04 * k3 + p05 * k4 + - p11 * k5 + p12 * k6 + p13 * k7 + p14 * k8 + p15 * k9); - - sp += chan2; - dp += chan2; - } - - /* - * 3 loop - */ - k0 = k[20]; k1 = k[21]; k2 = k[22]; k3 = k[23]; k4 = k[24]; - - sp = sl; - dp = dl; - - p02 = buff4[0]; - p03 = buff4[1]; - p04 = buff4[2]; - p05 = buff4[3]; - -#ifdef __SUNPRO_C -#pragma pipeloop(0) -#endif /* __SUNPRO_C */ - for (i = 0; i <= (wid - 2); i += 2) { - p00 = p02; p01 = p03; p02 = p04; p03 = p05; - - p04 = buff4[i + 4]; p05 = buff4[i + 5]; - - buff5[i ] = (mlib_d64)sp[0]; - buff5[i + 1] = (mlib_d64)sp[chan1]; - - d0 = p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 + p04 * k4 + buff6[i]; - d1 = p01 * k0 + p02 * k1 + p03 * k2 + p04 * k3 + p05 * k4 + buff6[i + 1]; - - CLAMP_S32(dp[0 ], d0); - CLAMP_S32(dp[chan1], d1); - - sp += chan2; - dp += chan2; - } - - /* last pixels */ - for (; i < wid; i++) { - p00 = buff0[i]; p10 = buff1[i]; p20 = buff2[i]; p30 = buff3[i]; - p01 = buff0[i + 1]; p11 = buff1[i + 1]; p21 = buff2[i + 1]; p31 = buff3[i + 1]; - p02 = buff0[i + 2]; p12 = buff1[i + 2]; p22 = buff2[i + 2]; p32 = buff3[i + 2]; - p03 = buff0[i + 3]; p13 = buff1[i + 3]; p23 = buff2[i + 3]; p33 = buff3[i + 3]; - p04 = buff0[i + 4]; p14 = buff1[i + 4]; p24 = buff2[i + 4]; p34 = buff3[i + 4]; - - p40 = buff4[i]; p41 = buff4[i + 1]; p42 = buff4[i + 2]; - p43 = buff4[i + 3]; p44 = buff4[i + 4]; - - buff5[i] = (mlib_d64)sp[0]; - - d0 = (p00 * k[0] + p01 * k[1] + p02 * k[2] + p03 * k[3] + p04 * k[4] + - p10 * k[5] + p11 * k[6] + p12 * k[7] + p13 * k[8] + p14 * k[9] + - p20 * k[10] + p21 * k[11] + p22 * k[12] + p23 * k[13] + p24 * k[14] + - p30 * k[15] + p31 * k[16] + p32 * k[17] + p33 * k[18] + p34 * k[19] + - p40 * k[20] + p41 * k[21] + p42 * k[22] + p43 * k[23] + p44 * k[24]); - - CLAMP_S32(dp[0], d0); - - sp += chan1; - dp += chan1; - } - - buff5[wid ] = (mlib_d64)sp[0]; - buff5[wid + 1] = (mlib_d64)sp[chan1]; - buff5[wid + 2] = (mlib_d64)sp[chan2]; - buff5[wid + 3] = (mlib_d64)sp[chan3]; - - /* next line */ - sl += sll; - dl += dll; - - buffT = buff0; - buff0 = buff1; - buff1 = buff2; - buff2 = buff3; - buff3 = buff4; - buff4 = buff5; - buff5 = buffT; - } - } - - if (pbuff != buff) mlib_free(pbuff); - - return MLIB_SUCCESS; -} - -/***************************************************************/ -#undef KSIZE -#define KSIZE 7 - -mlib_status CONV_FUNC(7x7)(mlib_image *dst, - const mlib_image *src, - const mlib_s32 *kern, - mlib_s32 scalef_expon, - mlib_s32 cmask) -{ - mlib_d64 buff[(KSIZE + 2)*BUFF_LINE], *buffs[2*(KSIZE + 1)], *buffd; - mlib_d64 k[KSIZE*KSIZE]; - mlib_d64 k0, k1, k2, k3, k4, k5, k6; - mlib_d64 p0, p1, p2, p3, p4, p5, p6, p7; - mlib_d64 d0, d1; - mlib_s32 l, m, buff_ind, *sl2, *sl3, *sl4, *sl5, *sl6; - mlib_d64 scalef; - DEF_VARS_MxN(mlib_s32); - mlib_s32 chan2 = chan1 + chan1; - mlib_s32 *sl1; - - if (wid > BUFF_LINE) { - pbuff = mlib_malloc((KSIZE + 2)*sizeof(mlib_d64)*wid); - - if (pbuff == NULL) return MLIB_FAILURE; - } - - for (l = 0; l < KSIZE + 1; l++) buffs[l] = pbuff + l*wid; - for (l = 0; l < KSIZE + 1; l++) buffs[l + (KSIZE + 1)] = buffs[l]; - buffd = buffs[KSIZE] + wid; - - wid -= (KSIZE - 1); - hgt -= (KSIZE - 1); - - adr_dst += ((KSIZE - 1)/2)*(dll + chan1); - - CALC_SCALE(); - for (j = 0; j < 49; j++) k[j] = scalef * kern[j]; - - for (c = 0; c < chan1; c++) { - if (!(cmask & (1 << (chan1 - 1 - c)))) continue; - - sl = adr_src + c; - dl = adr_dst + c; - - sl1 = sl + sll; - sl2 = sl1 + sll; - sl3 = sl2 + sll; - sl4 = sl3 + sll; - sl5 = sl4 + sll; - sl6 = sl5 + sll; -#ifdef __SUNPRO_C -#pragma pipeloop(0) -#endif /* __SUNPRO_C */ - for (i = 0; i < wid + (KSIZE - 1); i++) { - buffs[0][i] = (mlib_d64)sl[i*chan1]; - buffs[1][i] = (mlib_d64)sl1[i*chan1]; - buffs[2][i] = (mlib_d64)sl2[i*chan1]; - buffs[3][i] = (mlib_d64)sl3[i*chan1]; - buffs[4][i] = (mlib_d64)sl4[i*chan1]; - buffs[5][i] = (mlib_d64)sl5[i*chan1]; - buffs[6][i] = (mlib_d64)sl6[i*chan1]; - } - - buff_ind = 0; - -#ifdef __SUNPRO_C -#pragma pipeloop(0) -#endif /* __SUNPRO_C */ - for (i = 0; i < wid; i++) buffd[i] = 0.0; - - sl += KSIZE*sll; - - for (j = 0; j < hgt; j++) { - mlib_d64 **buffc = buffs + buff_ind; - mlib_d64 *buffn = buffc[KSIZE]; - mlib_d64 *pk = k; - - for (l = 0; l < KSIZE; l++) { - mlib_d64 *buff = buffc[l]; - - sp = sl; - dp = dl; - - p2 = buff[0]; p3 = buff[1]; p4 = buff[2]; - p5 = buff[3]; p6 = buff[4]; p7 = buff[5]; - - k0 = *pk++; k1 = *pk++; k2 = *pk++; k3 = *pk++; - k4 = *pk++; k5 = *pk++; k6 = *pk++; - - if (l < (KSIZE - 1)) { -#ifdef __SUNPRO_C -#pragma pipeloop(0) -#endif /* __SUNPRO_C */ - for (i = 0; i <= (wid - 2); i += 2) { - p0 = p2; p1 = p3; p2 = p4; p3 = p5; p4 = p6; p5 = p7; - - p6 = buff[i + 6]; p7 = buff[i + 7]; - - buffd[i ] += p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + p5*k5 + p6*k6; - buffd[i + 1] += p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + p6*k5 + p7*k6; - } - - } else { -#ifdef __SUNPRO_C -#pragma pipeloop(0) -#endif /* __SUNPRO_C */ - for (i = 0; i <= (wid - 2); i += 2) { - p0 = p2; p1 = p3; p2 = p4; p3 = p5; p4 = p6; p5 = p7; - - p6 = buff[i + 6]; p7 = buff[i + 7]; - - buffn[i ] = (mlib_d64)sp[0]; - buffn[i + 1] = (mlib_d64)sp[chan1]; - - d0 = p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + p5*k5 + p6*k6 + buffd[i ]; - d1 = p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + p6*k5 + p7*k6 + buffd[i + 1]; - - CLAMP_S32(dp[0 ], d0); - CLAMP_S32(dp[chan1], d1); - - buffd[i ] = 0.0; - buffd[i + 1] = 0.0; - - sp += chan2; - dp += chan2; - } - } - } - - /* last pixels */ - for (; i < wid; i++) { - mlib_d64 *pk = k, s = 0; - - for (l = 0; l < KSIZE; l++) { - mlib_d64 *buff = buffc[l] + i; - - for (m = 0; m < KSIZE; m++) s += buff[m] * (*pk++); - } - - CLAMP_S32(dp[0], s); - - buffn[i] = (mlib_d64)sp[0]; - - sp += chan1; - dp += chan1; - } - - for (l = 0; l < (KSIZE - 1); l++) buffn[wid + l] = sp[l*chan1]; - - /* next line */ - sl += sll; - dl += dll; - - buff_ind++; - - if (buff_ind >= KSIZE + 1) buff_ind = 0; - } - } - - if (pbuff != buff) mlib_free(pbuff); - - return MLIB_SUCCESS; -} - -/***************************************************************/ #define FTYPE mlib_d64 #define DTYPE mlib_s32 diff -r 3e22d8fd4912 -r acea5f7d354b jdk/src/java.desktop/share/native/libmlib_image/mlib_ImageConv_8ext.c --- a/jdk/src/java.desktop/share/native/libmlib_image/mlib_ImageConv_8ext.c Thu May 12 11:03:07 2016 -0700 +++ b/jdk/src/java.desktop/share/native/libmlib_image/mlib_ImageConv_8ext.c Fri May 13 11:31:05 2016 +0300 @@ -80,9 +80,6 @@ #endif /* IMG_TYPE == 1 */ /***************************************************************/ -#define KSIZE1 (KSIZE - 1) - -/***************************************************************/ #define PARAM \ mlib_image *dst, \ const mlib_image *src, \ @@ -126,21 +123,6 @@ #define D2I(x) CLAMP_S32((x) SAT_OFF) /***************************************************************/ -#ifdef _LITTLE_ENDIAN - -#define STORE2(res0, res1) \ - dp[0 ] = res1; \ - dp[chan1] = res0 - -#else - -#define STORE2(res0, res1) \ - dp[0 ] = res0; \ - dp[chan1] = res1 - -#endif /* _LITTLE_ENDIAN */ - -/***************************************************************/ #ifdef _NO_LONGLONG #define LOAD_BUFF(buff) \ @@ -163,9 +145,6 @@ #endif /* _NO_LONGLONG */ /***************************************************************/ -#define MLIB_D2_24 16777216.0f - -/***************************************************************/ typedef union { mlib_d64 d64; struct { @@ -175,52 +154,6 @@ } d64_2x32; /***************************************************************/ -#define BUFF_LINE 256 - -/***************************************************************/ -#define DEF_VARS(type) \ - type *adr_src, *sl, *sp, *sl1; \ - type *adr_dst, *dl, *dp; \ - FTYPE *pbuff = buff; \ - mlib_s32 *buffi, *buffo; \ - mlib_s32 wid, hgt, sll, dll; \ - mlib_s32 nchannel, chan1, chan2; \ - mlib_s32 i, j, c, swid - -/***************************************************************/ -#define LOAD_KERNEL3() \ - FTYPE scalef = DSCALE; \ - FTYPE k0, k1, k2, k3, k4, k5, k6, k7, k8; \ - FTYPE p00, p01, p02, p03, \ - p10, p11, p12, p13, \ - p20, p21, p22, p23; \ - \ - while (scalef_expon > 30) { \ - scalef /= (1 << 30); \ - scalef_expon -= 30; \ - } \ - \ - scalef /= (1 << scalef_expon); \ - \ - /* keep kernel in regs */ \ - k0 = scalef * kern[0]; k1 = scalef * kern[1]; k2 = scalef * kern[2]; \ - k3 = scalef * kern[3]; k4 = scalef * kern[4]; k5 = scalef * kern[5]; \ - k6 = scalef * kern[6]; k7 = scalef * kern[7]; k8 = scalef * kern[8] - -/***************************************************************/ -#define LOAD_KERNEL(SIZE) \ - FTYPE scalef = DSCALE; \ - \ - while (scalef_expon > 30) { \ - scalef /= (1 << 30); \ - scalef_expon -= 30; \ - } \ - \ - scalef /= (1 << scalef_expon); \ - \ - for (j = 0; j < SIZE; j++) k[j] = scalef * kern[j] - -/***************************************************************/ #define GET_SRC_DST_PARAMETERS(type) \ hgt = mlib_ImageGetHeight(src); \ wid = mlib_ImageGetWidth(src); \ @@ -278,1334 +211,6 @@ #endif /* __sparc */ /***************************************************************/ -#define KSIZE 3 - -mlib_status CONV_FUNC(3x3) -{ - FTYPE buff[(KSIZE + 2)*BUFF_LINE], *buff0, *buff1, *buff2, *buff3, *buffT; - DEF_VARS(DTYPE); - DTYPE *sl2; -#ifndef __sparc - mlib_s32 d0, d1; -#endif /* __sparc */ - LOAD_KERNEL3(); - GET_SRC_DST_PARAMETERS(DTYPE); - - swid = wid + KSIZE1; - - if (swid > BUFF_LINE) { - pbuff = mlib_malloc((KSIZE + 2)*sizeof(FTYPE )*swid); - - if (pbuff == NULL) return MLIB_FAILURE; - } - - buff0 = pbuff; - buff1 = buff0 + swid; - buff2 = buff1 + swid; - buff3 = buff2 + swid; - buffo = (mlib_s32*)(buff3 + swid); - buffi = buffo + (swid &~ 1); - - swid -= (dx_l + dx_r); - - chan1 = nchannel; - chan2 = chan1 + chan1; - - for (c = 0; c < nchannel; c++) { - if (!(cmask & (1 << (nchannel - 1 - c)))) continue; - - sl = adr_src + c; - dl = adr_dst + c; - - if ((1 > dy_t) && (1 < hgt + KSIZE1 - dy_b)) sl1 = sl + sll; - else sl1 = sl; - - if ((hgt - dy_b) > 0) sl2 = sl1 + sll; - else sl2 = sl1; - - for (i = 0; i < dx_l; i++) { - buff0[i] = (FTYPE)sl[0]; - buff1[i] = (FTYPE)sl1[0]; - buff2[i] = (FTYPE)sl2[0]; - } - -#ifdef __SUNPRO_C -#pragma pipeloop(0) -#endif /* __SUNPRO_C */ - for (i = 0; i < swid; i++) { - buff0[i + dx_l] = (FTYPE)sl[i*chan1]; - buff1[i + dx_l] = (FTYPE)sl1[i*chan1]; - buff2[i + dx_l] = (FTYPE)sl2[i*chan1]; - } - - for (i = 0; i < dx_r; i++) { - buff0[swid + dx_l + i] = buff0[swid + dx_l - 1]; - buff1[swid + dx_l + i] = buff1[swid + dx_l - 1]; - buff2[swid + dx_l + i] = buff2[swid + dx_l - 1]; - } - - if ((hgt - dy_b) > 1) sl = sl2 + sll; - else sl = sl2; - - for (j = 0; j < hgt; j++) { - FTYPE s0, s1; - - p02 = buff0[0]; - p12 = buff1[0]; - p22 = buff2[0]; - - p03 = buff0[1]; - p13 = buff1[1]; - p23 = buff2[1]; - - s0 = p02 * k0 + p03 * k1 + p12 * k3 + p13 * k4 + p22 * k6 + p23 * k7; - s1 = p03 * k0 + p13 * k3 + p23 * k6; - - sp = sl; - dp = dl; - -#ifdef __SUNPRO_C -#pragma pipeloop(0) -#endif /* __SUNPRO_C */ - for (i = 0; i <= (wid - 2); i += 2) { -#ifdef __sparc -#ifdef _NO_LONGLONG - mlib_s32 o64_1, o64_2; -#else /* _NO_LONGLONG */ - mlib_s64 o64; -#endif /* _NO_LONGLONG */ -#endif /* __sparc */ - d64_2x32 dd; - - p02 = buff0[i + 2]; p12 = buff1[i + 2]; p22 = buff2[i + 2]; - p03 = buff0[i + 3]; p13 = buff1[i + 3]; p23 = buff2[i + 3]; - - LOAD_BUFF(buffi); - - dd.d64 = *(FTYPE *)(buffi + i); - buff3[i + dx_l ] = (FTYPE)dd.i32s.i0; - buff3[i + dx_l + 1] = (FTYPE)dd.i32s.i1; - -#ifndef __sparc - - d0 = D2I(s0 + p02 * k2 + p12 * k5 + p22 * k8); - d1 = D2I(s1 + p02 * k1 + p03 * k2 + p12 * k4 + p13 * k5 + p22 * k7 + p23 * k8); - - s0 = p02 * k0 + p03 * k1 + p12 * k3 + p13 * k4 + p22 * k6 + p23 * k7; - s1 = p03 * k0 + p13 * k3 + p23 * k6; - - dp[0 ] = FROM_S32(d0); - dp[chan1] = FROM_S32(d1); - -#else /* __sparc */ - - dd.i32s.i0 = D2I(s0 + p02 * k2 + p12 * k5 + p22 * k8); - dd.i32s.i1 = D2I(s1 + p02 * k1 + p03 * k2 + p12 * k4 + p13 * k5 + p22 * k7 + p23 * k8); - *(FTYPE *)(buffo + i) = dd.d64; - - s0 = p02 * k0 + p03 * k1 + p12 * k3 + p13 * k4 + p22 * k6 + p23 * k7; - s1 = p03 * k0 + p13 * k3 + p23 * k6; - -#ifdef _NO_LONGLONG - - o64_1 = buffo[i]; - o64_2 = buffo[i+1]; -#if IMG_TYPE != 1 - STORE2(FROM_S32(o64_1), FROM_S32(o64_2)); -#else - STORE2(o64_1 >> 24, o64_2 >> 24); -#endif /* IMG_TYPE != 1 */ - -#else /* _NO_LONGLONG */ - - o64 = *(mlib_s64*)(buffo + i); -#if IMG_TYPE != 1 - STORE2(FROM_S32(o64 >> 32), FROM_S32(o64)); -#else - STORE2(o64 >> 56, o64 >> 24); -#endif /* IMG_TYPE != 1 */ -#endif /* _NO_LONGLONG */ -#endif /* __sparc */ - - sp += chan2; - dp += chan2; - } - - for (; i < wid; i++) { - p00 = buff0[i]; p10 = buff1[i]; p20 = buff2[i]; - p01 = buff0[i + 1]; p11 = buff1[i + 1]; p21 = buff2[i + 1]; - p02 = buff0[i + 2]; p12 = buff1[i + 2]; p22 = buff2[i + 2]; - - buffi[i] = (mlib_s32)sp[0]; - buff3[i + dx_l] = (FTYPE)buffi[i]; - -#ifndef __sparc - - d0 = D2I(p00 * k0 + p01 * k1 + p02 * k2 + p10 * k3 + p11 * k4 + - p12 * k5 + p20 * k6 + p21 * k7 + p22 * k8); - - dp[0] = FROM_S32(d0); - -#else /* __sparc */ - - buffo[i] = D2I(p00 * k0 + p01 * k1 + p02 * k2 + p10 * k3 + p11 * k4 + - p12 * k5 + p20 * k6 + p21 * k7 + p22 * k8); -#if IMG_TYPE != 1 - dp[0] = FROM_S32(buffo[i]); -#else - dp[0] = buffo[i] >> 24; -#endif /* IMG_TYPE != 1 */ -#endif /* __sparc */ - - sp += chan1; - dp += chan1; - } - - for (; i < swid; i++) { - buffi[i] = (mlib_s32)sp[0]; - buff3[i + dx_l] = (FTYPE)buffi[i]; - sp += chan1; - } - - for (i = 0; i < dx_l; i++) buff3[i] = buff3[dx_l]; - for (i = 0; i < dx_r; i++) buff3[swid + dx_l + i] = buff3[swid + dx_l - 1]; - - if (j < hgt - dy_b - 2) sl += sll; - dl += dll; - - buffT = buff0; - buff0 = buff1; - buff1 = buff2; - buff2 = buff3; - buff3 = buffT; - } - } - -#ifdef __sparc -#if IMG_TYPE == 1 - { - mlib_s32 amask = (1 << nchannel) - 1; - - if ((cmask & amask) != amask) { - mlib_ImageXor80(adr_dst, wid, hgt, dll, nchannel, cmask); - } else { - mlib_ImageXor80_aa(adr_dst, wid*nchannel, hgt, dll); - } - } - -#endif /* IMG_TYPE == 1 */ -#endif /* __sparc */ - - if (pbuff != buff) mlib_free(pbuff); - - return MLIB_SUCCESS; -} - -/***************************************************************/ -#ifndef __sparc /* for x86, using integer multiplies is faster */ - -mlib_status CONV_FUNC_I(3x3) -{ - DTYPE *adr_src, *sl, *sp0, *sp1, *sp2, *sp_1, *sp_2; - DTYPE *adr_dst, *dl, *dp; - mlib_s32 wid, hgt, sll, dll; - mlib_s32 nchannel, chan1, chan2, delta_chan; - mlib_s32 i, j, c; - mlib_s32 shift1, shift2; - mlib_s32 k0, k1, k2, k3, k4, k5, k6, k7, k8; - mlib_s32 p02, p03, - p12, p13, - p22, p23; - -#if IMG_TYPE != 1 - shift1 = 16; -#else - shift1 = 8; -#endif /* IMG_TYPE != 1 */ - - shift2 = scalef_expon - shift1; - - /* keep kernel in regs */ - k0 = kern[0] >> shift1; k1 = kern[1] >> shift1; k2 = kern[2] >> shift1; - k3 = kern[3] >> shift1; k4 = kern[4] >> shift1; k5 = kern[5] >> shift1; - k6 = kern[6] >> shift1; k7 = kern[7] >> shift1; k8 = kern[8] >> shift1; - - GET_SRC_DST_PARAMETERS(DTYPE); - - chan1 = nchannel; - chan2 = chan1 + chan1; - delta_chan = 0; - - if ((1 > dx_l) && (1 < wid + KSIZE1 - dx_r)) delta_chan = chan1; - - for (c = 0; c < chan1; c++) { - if (!(cmask & (1 << (chan1 - 1 - c)))) continue; - - sl = adr_src + c; - dl = adr_dst + c; - - sp_1 = sl; - - if ((1 > dy_t) && (1 < hgt + KSIZE1 - dy_b)) sl += sll; - sp_2 = sl; - - if ((hgt - dy_b) > 0) sl += sll; - - for (j = 0; j < hgt; j++) { - mlib_s32 s0, s1; - mlib_s32 pix0, pix1; - - dp = dl; - sp0 = sp_1; - sp_1 = sp_2; - sp_2 = sl; - - sp1 = sp_1; - sp2 = sp_2; - - p02 = sp0[0]; - p12 = sp1[0]; - p22 = sp2[0]; - - p03 = sp0[delta_chan]; - p13 = sp1[delta_chan]; - p23 = sp2[delta_chan]; - - s0 = p02 * k0 + p03 * k1 + p12 * k3 + p13 * k4 + p22 * k6 + p23 * k7; - s1 = p03 * k0 + p13 * k3 + p23 * k6; - - sp0 += (chan1 + delta_chan); - sp1 += (chan1 + delta_chan); - sp2 += (chan1 + delta_chan); - -#ifdef __SUNPRO_C -#pragma pipeloop(0) -#endif /* __SUNPRO_C */ - for (i = 0; i <= (wid - dx_r - 2); i += 2) { - p02 = sp0[0]; p12 = sp1[0]; p22 = sp2[0]; - p03 = sp0[chan1]; p13 = sp1[chan1]; p23 = sp2[chan1]; - - pix0 = (s0 + p02 * k2 + p12 * k5 + p22 * k8) >> shift2; - pix1 = (s1 + p02 * k1 + p03 * k2 + p12 * k4 + - p13 * k5 + p22 * k7 + p23 * k8) >> shift2; - - CLAMP_STORE(dp[0], pix0) - CLAMP_STORE(dp[chan1], pix1) - - s0 = p02 * k0 + p03 * k1 + p12 * k3 + p13 * k4 + p22 * k6 + p23 * k7; - s1 = p03 * k0 + p13 * k3 + p23 * k6; - - sp0 += chan2; - sp1 += chan2; - sp2 += chan2; - dp += chan2; - } - - p02 = p03; p12 = p13; p22 = p23; - - for (; i < wid - dx_r; i++) { - p03 = sp0[0]; p13 = sp1[0]; p23 = sp2[0]; - pix0 = (s0 + p03 * k2 + p13 * k5 + p23 * k8) >> shift2; - CLAMP_STORE(dp[0], pix0) - s0 = p02 * k0 + p03 * k1 + p12 * k3 + p13 * k4 + p22 * k6 + p23 * k7; - p02 = p03; p12 = p13; p22 = p23; - sp0 += chan1; - sp1 += chan1; - sp2 += chan1; - dp += chan1; - } - - sp0 -= chan1; - sp1 -= chan1; - sp2 -= chan1; - - for (; i < wid; i++) { - p03 = sp0[0]; p13 = sp1[0]; p23 = sp2[0]; - pix0 = (s0 + p03 * k2 + p13 * k5 + p23 * k8) >> shift2; - CLAMP_STORE(dp[0], pix0) - s0 = p02 * k0 + p03 * k1 + p12 * k3 + p13 * k4 + p22 * k6 + p23 * k7; - p02 = p03; p12 = p13; p22 = p23; - dp += chan1; - } - - if (j < hgt - dy_b - 1) sl += sll; - dl += dll; - } - } - - return MLIB_SUCCESS; -} - -#endif /* __sparc ( for x86, using integer multiplies is faster ) */ - -/***************************************************************/ -#undef KSIZE -#define KSIZE 4 - -mlib_status CONV_FUNC(4x4) -{ - FTYPE buff[(KSIZE + 3)*BUFF_LINE]; - FTYPE *buff0, *buff1, *buff2, *buff3, *buff4, *buffd, *buffT; - FTYPE k[KSIZE*KSIZE]; - mlib_s32 d0, d1; - FTYPE k0, k1, k2, k3, k4, k5, k6, k7; - FTYPE p00, p01, p02, p03, p04, - p10, p11, p12, p13, p14, - p20, p21, p22, p23, - p30, p31, p32, p33; - DEF_VARS(DTYPE); - DTYPE *sl2, *sl3; - LOAD_KERNEL(KSIZE*KSIZE); - GET_SRC_DST_PARAMETERS(DTYPE); - - swid = wid + KSIZE1; - - if (swid > BUFF_LINE) { - pbuff = mlib_malloc((KSIZE + 3)*sizeof(FTYPE )*swid); - - if (pbuff == NULL) return MLIB_FAILURE; - } - - buff0 = pbuff; - buff1 = buff0 + swid; - buff2 = buff1 + swid; - buff3 = buff2 + swid; - buff4 = buff3 + swid; - buffd = buff4 + swid; - buffo = (mlib_s32*)(buffd + swid); - buffi = buffo + (swid &~ 1); - - swid -= (dx_l + dx_r); - - chan1 = nchannel; - chan2 = chan1 + chan1; - - for (c = 0; c < nchannel; c++) { - if (!(cmask & (1 << (nchannel - 1 - c)))) continue; - - sl = adr_src + c; - dl = adr_dst + c; - - if ((1 > dy_t) && (1 < hgt + KSIZE1 - dy_b)) sl1 = sl + sll; - else sl1 = sl; - - if ((2 > dy_t) && (2 < hgt + KSIZE1 - dy_b)) sl2 = sl1 + sll; - else sl2 = sl1; - - if ((hgt - dy_b) > 0) sl3 = sl2 + sll; - else sl3 = sl2; - - for (i = 0; i < dx_l; i++) { - buff0[i] = (FTYPE)sl[0]; - buff1[i] = (FTYPE)sl1[0]; - buff2[i] = (FTYPE)sl2[0]; - buff3[i] = (FTYPE)sl3[0]; - } - -#ifdef __SUNPRO_C -#pragma pipeloop(0) -#endif /* __SUNPRO_C */ - for (i = 0; i < swid; i++) { - buff0[i + dx_l] = (FTYPE)sl[i*chan1]; - buff1[i + dx_l] = (FTYPE)sl1[i*chan1]; - buff2[i + dx_l] = (FTYPE)sl2[i*chan1]; - buff3[i + dx_l] = (FTYPE)sl3[i*chan1]; - } - - for (i = 0; i < dx_r; i++) { - buff0[swid + dx_l + i] = buff0[swid + dx_l - 1]; - buff1[swid + dx_l + i] = buff1[swid + dx_l - 1]; - buff2[swid + dx_l + i] = buff2[swid + dx_l - 1]; - buff3[swid + dx_l + i] = buff3[swid + dx_l - 1]; - } - - if ((hgt - dy_b) > 1) sl = sl3 + sll; - else sl = sl3; - - for (j = 0; j < hgt; j++) { - d64_2x32 dd; - - /* - * First loop on two first lines of kernel - */ - k0 = k[0]; k1 = k[1]; k2 = k[2]; k3 = k[3]; - k4 = k[4]; k5 = k[5]; k6 = k[6]; k7 = k[7]; - - sp = sl; - dp = dl; - - p02 = buff0[0]; - p12 = buff1[0]; - p03 = buff0[1]; - p13 = buff1[1]; - p04 = buff0[2]; - -#ifdef __SUNPRO_C -#pragma pipeloop(0) -#endif /* __SUNPRO_C */ - for (i = 0; i <= (wid - 2); i += 2) { - p00 = p02; p10 = p12; - p01 = p03; p11 = p13; - p02 = p04; p12 = buff1[i + 2]; - p03 = buff0[i + 3]; p13 = buff1[i + 3]; - p04 = buff0[i + 4]; p14 = buff1[i + 4]; - - LOAD_BUFF(buffi); - - dd.d64 = *(FTYPE *)(buffi + i); - buff4[i + dx_l ] = (FTYPE)dd.i32s.i0; - buff4[i + dx_l + 1] = (FTYPE)dd.i32s.i1; - - buffd[i ] = (p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 + - p10 * k4 + p11 * k5 + p12 * k6 + p13 * k7); - buffd[i + 1] = (p01 * k0 + p02 * k1 + p03 * k2 + p04 * k3 + - p11 * k4 + p12 * k5 + p13 * k6 + p14 * k7); - - sp += chan2; - } - - /* - * Second loop on two last lines of kernel - */ - k0 = k[ 8]; k1 = k[ 9]; k2 = k[10]; k3 = k[11]; - k4 = k[12]; k5 = k[13]; k6 = k[14]; k7 = k[15]; - - p02 = buff2[0]; - p12 = buff3[0]; - p03 = buff2[1]; - p13 = buff3[1]; - p04 = buff2[2]; - -#ifdef __SUNPRO_C -#pragma pipeloop(0) -#endif /* __SUNPRO_C */ - for (i = 0; i <= (wid - 2); i += 2) { - p00 = p02; p10 = p12; - p01 = p03; p11 = p13; - p02 = p04; p12 = buff3[i + 2]; - p03 = buff2[i + 3]; p13 = buff3[i + 3]; - p04 = buff2[i + 4]; p14 = buff3[i + 4]; - - d0 = D2I(p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 + - p10 * k4 + p11 * k5 + p12 * k6 + p13 * k7 + buffd[i]); - d1 = D2I(p01 * k0 + p02 * k1 + p03 * k2 + p04 * k3 + - p11 * k4 + p12 * k5 + p13 * k6 + p14 * k7 + buffd[i + 1]); - - dp[0 ] = FROM_S32(d0); - dp[chan1] = FROM_S32(d1); - - dp += chan2; - } - - /* last pixels */ - for (; i < wid; i++) { - p00 = buff0[i]; p10 = buff1[i]; p20 = buff2[i]; p30 = buff3[i]; - p01 = buff0[i + 1]; p11 = buff1[i + 1]; p21 = buff2[i + 1]; p31 = buff3[i + 1]; - p02 = buff0[i + 2]; p12 = buff1[i + 2]; p22 = buff2[i + 2]; p32 = buff3[i + 2]; - p03 = buff0[i + 3]; p13 = buff1[i + 3]; p23 = buff2[i + 3]; p33 = buff3[i + 3]; - - buff4[i + dx_l] = (FTYPE)sp[0]; - - buffo[i] = D2I(p00 * k[0] + p01 * k[1] + p02 * k[2] + p03 * k[3] + - p10 * k[4] + p11 * k[5] + p12 * k[6] + p13 * k[7] + - p20 * k[ 8] + p21 * k[ 9] + p22 * k[10] + p23 * k[11] + - p30 * k[12] + p31 * k[13] + p32 * k[14] + p33 * k[15]); - - dp[0] = FROM_S32(buffo[i]); - - sp += chan1; - dp += chan1; - } - - for (; i < swid; i++) { - buff4[i + dx_l] = (FTYPE)sp[0]; - sp += chan1; - } - - for (i = 0; i < dx_l; i++) buff4[i] = buff4[dx_l]; - for (i = 0; i < dx_r; i++) buff4[swid + dx_l + i] = buff4[swid + dx_l - 1]; - - /* next line */ - - if (j < hgt - dy_b - 2) sl += sll; - dl += dll; - - buffT = buff0; - buff0 = buff1; - buff1 = buff2; - buff2 = buff3; - buff3 = buff4; - buff4 = buffT; - } - } - - if (pbuff != buff) mlib_free(pbuff); - - return MLIB_SUCCESS; -} - -/***************************************************************/ -#undef KSIZE -#define KSIZE 5 - -mlib_status CONV_FUNC(5x5) -{ - FTYPE buff[(KSIZE + 3)*BUFF_LINE]; - FTYPE *buff0, *buff1, *buff2, *buff3, *buff4, *buff5, *buffd, *buffT; - FTYPE k[KSIZE*KSIZE]; - mlib_s32 d0, d1; - FTYPE k0, k1, k2, k3, k4, k5, k6, k7, k8, k9; - FTYPE p00, p01, p02, p03, p04, p05, - p10, p11, p12, p13, p14, p15, - p20, p21, p22, p23, p24, - p30, p31, p32, p33, p34, - p40, p41, p42, p43, p44; - DEF_VARS(DTYPE); - DTYPE *sl2, *sl3, *sl4; - LOAD_KERNEL(KSIZE*KSIZE); - GET_SRC_DST_PARAMETERS(DTYPE); - - swid = wid + KSIZE1; - - if (swid > BUFF_LINE) { - pbuff = mlib_malloc((KSIZE + 3)*sizeof(FTYPE )*swid); - - if (pbuff == NULL) return MLIB_FAILURE; - } - - buff0 = pbuff; - buff1 = buff0 + swid; - buff2 = buff1 + swid; - buff3 = buff2 + swid; - buff4 = buff3 + swid; - buff5 = buff4 + swid; - buffd = buff5 + swid; - buffo = (mlib_s32*)(buffd + swid); - buffi = buffo + (swid &~ 1); - - swid -= (dx_l + dx_r); - - chan1 = nchannel; - chan2 = chan1 + chan1; - - for (c = 0; c < nchannel; c++) { - if (!(cmask & (1 << (nchannel - 1 - c)))) continue; - - sl = adr_src + c; - dl = adr_dst + c; - - if ((1 > dy_t) && (1 < hgt + KSIZE1 - dy_b)) sl1 = sl + sll; - else sl1 = sl; - - if ((2 > dy_t) && (2 < hgt + KSIZE1 - dy_b)) sl2 = sl1 + sll; - else sl2 = sl1; - - if ((3 > dy_t) && (3 < hgt + KSIZE1 - dy_b)) sl3 = sl2 + sll; - else sl3 = sl2; - - if ((hgt - dy_b) > 0) sl4 = sl3 + sll; - else sl4 = sl3; - - for (i = 0; i < dx_l; i++) { - buff0[i] = (FTYPE)sl[0]; - buff1[i] = (FTYPE)sl1[0]; - buff2[i] = (FTYPE)sl2[0]; - buff3[i] = (FTYPE)sl3[0]; - buff4[i] = (FTYPE)sl4[0]; - } - -#ifdef __SUNPRO_C -#pragma pipeloop(0) -#endif /* __SUNPRO_C */ - for (i = 0; i < swid; i++) { - buff0[i + dx_l] = (FTYPE)sl[i*chan1]; - buff1[i + dx_l] = (FTYPE)sl1[i*chan1]; - buff2[i + dx_l] = (FTYPE)sl2[i*chan1]; - buff3[i + dx_l] = (FTYPE)sl3[i*chan1]; - buff4[i + dx_l] = (FTYPE)sl4[i*chan1]; - } - - for (i = 0; i < dx_r; i++) { - buff0[swid + dx_l + i] = buff0[swid + dx_l - 1]; - buff1[swid + dx_l + i] = buff1[swid + dx_l - 1]; - buff2[swid + dx_l + i] = buff2[swid + dx_l - 1]; - buff3[swid + dx_l + i] = buff3[swid + dx_l - 1]; - buff4[swid + dx_l + i] = buff4[swid + dx_l - 1]; - } - - if ((hgt - dy_b) > 1) sl = sl4 + sll; - else sl = sl4; - - for (j = 0; j < hgt; j++) { - d64_2x32 dd; - - /* - * First loop - */ - k0 = k[0]; k1 = k[1]; k2 = k[2]; k3 = k[3]; k4 = k[4]; - k5 = k[5]; k6 = k[6]; k7 = k[7]; k8 = k[8]; k9 = k[9]; - - sp = sl; - dp = dl; - - p02 = buff0[0]; - p12 = buff1[0]; - p03 = buff0[1]; - p13 = buff1[1]; - p04 = buff0[2]; - p14 = buff1[2]; - -#ifdef __SUNPRO_C -#pragma pipeloop(0) -#endif /* __SUNPRO_C */ - for (i = 0; i <= (wid - 2); i += 2) { - p00 = p02; p10 = p12; - p01 = p03; p11 = p13; - p02 = p04; p12 = p14; - - LOAD_BUFF(buffi); - - p03 = buff0[i + 3]; p13 = buff1[i + 3]; - p04 = buff0[i + 4]; p14 = buff1[i + 4]; - p05 = buff0[i + 5]; p15 = buff1[i + 5]; - - buffd[i ] = (p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 + p04 * k4 + - p10 * k5 + p11 * k6 + p12 * k7 + p13 * k8 + p14 * k9); - buffd[i + 1] = (p01 * k0 + p02 * k1 + p03 * k2 + p04 * k3 + p05 * k4 + - p11 * k5 + p12 * k6 + p13 * k7 + p14 * k8 + p15 * k9); - - sp += chan2; - } - - /* - * Second loop - */ - k0 = k[10]; k1 = k[11]; k2 = k[12]; k3 = k[13]; k4 = k[14]; - k5 = k[15]; k6 = k[16]; k7 = k[17]; k8 = k[18]; k9 = k[19]; - - p02 = buff2[0]; - p12 = buff3[0]; - p03 = buff2[1]; - p13 = buff3[1]; - -#ifdef __SUNPRO_C -#pragma pipeloop(0) -#endif /* __SUNPRO_C */ - for (i = 0; i <= (wid - 2); i += 2) { - p00 = p02; p10 = p12; - p01 = p03; p11 = p13; - - p02 = buff2[i + 2]; p12 = buff3[i + 2]; - p03 = buff2[i + 3]; p13 = buff3[i + 3]; - p04 = buff2[i + 4]; p14 = buff3[i + 4]; - p05 = buff2[i + 5]; p15 = buff3[i + 5]; - - dd.d64 = *(FTYPE *)(buffi + i); - buff5[i + dx_l ] = (FTYPE)dd.i32s.i0; - buff5[i + dx_l + 1] = (FTYPE)dd.i32s.i1; - - buffd[i ] += (p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 + p04 * k4 + - p10 * k5 + p11 * k6 + p12 * k7 + p13 * k8 + p14 * k9); - buffd[i + 1] += (p01 * k0 + p02 * k1 + p03 * k2 + p04 * k3 + p05 * k4 + - p11 * k5 + p12 * k6 + p13 * k7 + p14 * k8 + p15 * k9); - } - - /* - * 3 loop - */ - k0 = k[20]; k1 = k[21]; k2 = k[22]; k3 = k[23]; k4 = k[24]; - - p02 = buff4[0]; - p03 = buff4[1]; - p04 = buff4[2]; - p05 = buff4[3]; - -#ifdef __SUNPRO_C -#pragma pipeloop(0) -#endif /* __SUNPRO_C */ - for (i = 0; i <= (wid - 2); i += 2) { - p00 = p02; p01 = p03; p02 = p04; p03 = p05; - - p04 = buff4[i + 4]; p05 = buff4[i + 5]; - - d0 = D2I(p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 + p04 * k4 + buffd[i]); - d1 = D2I(p01 * k0 + p02 * k1 + p03 * k2 + p04 * k3 + p05 * k4 + buffd[i + 1]); - - dp[0 ] = FROM_S32(d0); - dp[chan1] = FROM_S32(d1); - - dp += chan2; - } - - /* last pixels */ - for (; i < wid; i++) { - p00 = buff0[i]; p10 = buff1[i]; p20 = buff2[i]; p30 = buff3[i]; - p01 = buff0[i + 1]; p11 = buff1[i + 1]; p21 = buff2[i + 1]; p31 = buff3[i + 1]; - p02 = buff0[i + 2]; p12 = buff1[i + 2]; p22 = buff2[i + 2]; p32 = buff3[i + 2]; - p03 = buff0[i + 3]; p13 = buff1[i + 3]; p23 = buff2[i + 3]; p33 = buff3[i + 3]; - p04 = buff0[i + 4]; p14 = buff1[i + 4]; p24 = buff2[i + 4]; p34 = buff3[i + 4]; - - p40 = buff4[i]; p41 = buff4[i + 1]; p42 = buff4[i + 2]; - p43 = buff4[i + 3]; p44 = buff4[i + 4]; - - buff5[i + dx_l] = (FTYPE)sp[0]; - - buffo[i] = D2I(p00 * k[0] + p01 * k[1] + p02 * k[2] + p03 * k[3] + p04 * k[4] + - p10 * k[5] + p11 * k[6] + p12 * k[7] + p13 * k[8] + p14 * k[9] + - p20 * k[10] + p21 * k[11] + p22 * k[12] + p23 * k[13] + p24 * k[14] + - p30 * k[15] + p31 * k[16] + p32 * k[17] + p33 * k[18] + p34 * k[19] + - p40 * k[20] + p41 * k[21] + p42 * k[22] + p43 * k[23] + p44 * k[24]); - - dp[0] = FROM_S32(buffo[i]); - - sp += chan1; - dp += chan1; - } - - for (; i < swid; i++) { - buff5[i + dx_l] = (FTYPE)sp[0]; - sp += chan1; - } - - for (i = 0; i < dx_l; i++) buff5[i] = buff5[dx_l]; - for (i = 0; i < dx_r; i++) buff5[swid + dx_l + i] = buff5[swid + dx_l - 1]; - - /* next line */ - - if (j < hgt - dy_b - 2) sl += sll; - dl += dll; - - buffT = buff0; - buff0 = buff1; - buff1 = buff2; - buff2 = buff3; - buff3 = buff4; - buff4 = buff5; - buff5 = buffT; - } - } - - if (pbuff != buff) mlib_free(pbuff); - - return MLIB_SUCCESS; -} - -/***************************************************************/ -#ifndef __sparc /* for x86, using integer multiplies is faster */ - -mlib_status CONV_FUNC_I(5x5) -{ - mlib_s32 buff[BUFF_LINE]; - mlib_s32 *buffd; - mlib_s32 k[KSIZE*KSIZE]; - mlib_s32 shift1, shift2; - mlib_s32 k0, k1, k2, k3, k4, k5, k6, k7, k8, k9; - mlib_s32 p00, p01, p02, p03, p04, p05, - p10, p11, p12, p13, p14, p15; - DTYPE *adr_src, *sl, *sp0, *sp1, *sp2, *sp3, *sp4; - DTYPE *sp_1, *sp_2, *sp_3, *sp_4; - DTYPE *adr_dst, *dl, *dp; - mlib_s32 *pbuff = buff; - mlib_s32 wid, hgt, sll, dll; - mlib_s32 nchannel, chan1, chan2, chan4; - mlib_s32 delta_chan1, delta_chan2, delta_chan3; - mlib_s32 i, j, c; - -#if IMG_TYPE != 1 - shift1 = 16; -#else - shift1 = 8; -#endif /* IMG_TYPE != 1 */ - - shift2 = scalef_expon - shift1; - - for (j = 0; j < KSIZE*KSIZE; j++) k[j] = kern[j] >> shift1; - - GET_SRC_DST_PARAMETERS(DTYPE); - - if (wid > BUFF_LINE) { - pbuff = mlib_malloc(sizeof(mlib_s32)*wid); - - if (pbuff == NULL) return MLIB_FAILURE; - } - - buffd = pbuff; - - chan1 = nchannel; - chan2 = chan1 + chan1; - - if ((1 > dx_l) && (1 < wid + KSIZE1 - dx_r)) delta_chan1 = chan1; - else delta_chan1 = 0; - - if ((2 > dx_l) && (2 < wid + KSIZE1 - dx_r)) delta_chan2 = delta_chan1 + chan1; - else delta_chan2 = delta_chan1; - - if ((3 > dx_l) && (3 < wid + KSIZE1 - dx_r)) delta_chan3 = delta_chan2 + chan1; - else delta_chan3 = delta_chan2; - - chan4 = chan1 + delta_chan3; - - for (c = 0; c < chan1; c++) { - if (!(cmask & (1 << (chan1 - 1 - c)))) continue; - - sl = adr_src + c; - dl = adr_dst + c; - - sp_1 = sl; - - if ((1 > dy_t) && (1 < hgt + KSIZE1 - dy_b)) sl += sll; - sp_2 = sl; - - if ((2 > dy_t) && (2 < hgt + KSIZE1 - dy_b)) sl += sll; - sp_3 = sl; - - if ((3 > dy_t) && (3 < hgt + KSIZE1 - dy_b)) sl += sll; - sp_4 = sl; - - if ((hgt - dy_b) > 0) sl += sll; - - for (j = 0; j < hgt; j++) { - mlib_s32 pix0, pix1; - - dp = dl; - sp0 = sp_1; - sp_1 = sp_2; - sp_2 = sp_3; - sp_3 = sp_4; - sp_4 = sl; - - sp1 = sp_1; - sp2 = sp_2; - sp3 = sp_3; - sp4 = sp_4; - - /* - * First loop - */ - - k0 = k[0]; k1 = k[1]; k2 = k[2]; k3 = k[3]; k4 = k[4]; - k5 = k[5]; k6 = k[6]; k7 = k[7]; k8 = k[8]; k9 = k[9]; - - p02 = sp0[0]; p12 = sp1[0]; - p03 = sp0[delta_chan1]; p13 = sp1[delta_chan1]; - p04 = sp0[delta_chan2]; p14 = sp1[delta_chan2]; - p05 = sp0[delta_chan3]; p15 = sp1[delta_chan3]; - - sp0 += chan4; - sp1 += chan4; - -#ifdef __SUNPRO_C -#pragma pipeloop(0) -#endif /* __SUNPRO_C */ - for (i = 0; i <= (wid - dx_r - 2); i += 2) { - p00 = p02; p10 = p12; - p01 = p03; p11 = p13; - p02 = p04; p12 = p14; - p03 = p05; p13 = p15; - - p04 = sp0[0]; p14 = sp1[0]; - p05 = sp0[chan1]; p15 = sp1[chan1]; - - buffd[i ] = (p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 + p04 * k4 + - p10 * k5 + p11 * k6 + p12 * k7 + p13 * k8 + p14 * k9); - buffd[i + 1] = (p01 * k0 + p02 * k1 + p03 * k2 + p04 * k3 + p05 * k4 + - p11 * k5 + p12 * k6 + p13 * k7 + p14 * k8 + p15 * k9); - - sp0 += chan2; - sp1 += chan2; - } - - p01 = p02; p02 = p03; p03 = p04; p04 = p05; - p11 = p12; p12 = p13; p13 = p14; p14 = p15; - - for (; i < wid - dx_r; i++) { - p00 = p01; p10 = p11; - p01 = p02; p11 = p12; - p02 = p03; p12 = p13; - p03 = p04; p13 = p14; - - p04 = sp0[0]; p14 = sp1[0]; - - buffd[i] = (p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 + p04 * k4 + - p10 * k5 + p11 * k6 + p12 * k7 + p13 * k8 + p14 * k9); - - sp0 += chan1; - sp1 += chan1; - } - - sp0 -= chan1; - sp1 -= chan1; - - for (; i < wid; i++) { - p00 = p01; p10 = p11; - p01 = p02; p11 = p12; - p02 = p03; p12 = p13; - p03 = p04; p13 = p14; - - p04 = sp0[0]; p14 = sp1[0]; - - buffd[i] = (p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 + p04 * k4 + - p10 * k5 + p11 * k6 + p12 * k7 + p13 * k8 + p14 * k9); - } - - /* - * Second loop - */ - - k0 = k[10]; k1 = k[11]; k2 = k[12]; k3 = k[13]; k4 = k[14]; - k5 = k[15]; k6 = k[16]; k7 = k[17]; k8 = k[18]; k9 = k[19]; - - p02 = sp2[0]; p12 = sp3[0]; - p03 = sp2[delta_chan1]; p13 = sp3[delta_chan1]; - p04 = sp2[delta_chan2]; p14 = sp3[delta_chan2]; - p05 = sp2[delta_chan3]; p15 = sp3[delta_chan3]; - - sp2 += chan4; - sp3 += chan4; - -#ifdef __SUNPRO_C -#pragma pipeloop(0) -#endif /* __SUNPRO_C */ - for (i = 0; i <= (wid - dx_r - 2); i += 2) { - p00 = p02; p10 = p12; - p01 = p03; p11 = p13; - p02 = p04; p12 = p14; - p03 = p05; p13 = p15; - - p04 = sp2[0]; p14 = sp3[0]; - p05 = sp2[chan1]; p15 = sp3[chan1]; - - buffd[i ] += (p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 + p04 * k4 + - p10 * k5 + p11 * k6 + p12 * k7 + p13 * k8 + p14 * k9); - buffd[i + 1] += (p01 * k0 + p02 * k1 + p03 * k2 + p04 * k3 + p05 * k4 + - p11 * k5 + p12 * k6 + p13 * k7 + p14 * k8 + p15 * k9); - - sp2 += chan2; - sp3 += chan2; - } - - p01 = p02; p02 = p03; p03 = p04; p04 = p05; - p11 = p12; p12 = p13; p13 = p14; p14 = p15; - - for (; i < wid - dx_r; i++) { - p00 = p01; p10 = p11; - p01 = p02; p11 = p12; - p02 = p03; p12 = p13; - p03 = p04; p13 = p14; - - p04 = sp2[0]; p14 = sp3[0]; - - buffd[i] += (p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 + p04 * k4 + - p10 * k5 + p11 * k6 + p12 * k7 + p13 * k8 + p14 * k9); - - sp2 += chan1; - sp3 += chan1; - } - - sp2 -= chan1; - sp3 -= chan1; - - for (; i < wid; i++) { - p00 = p01; p10 = p11; - p01 = p02; p11 = p12; - p02 = p03; p12 = p13; - p03 = p04; p13 = p14; - - p04 = sp2[0]; p14 = sp3[0]; - - buffd[i] += (p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 + p04 * k4 + - p10 * k5 + p11 * k6 + p12 * k7 + p13 * k8 + p14 * k9); - } - - /* - * 3 loop - */ - - k0 = k[20]; k1 = k[21]; k2 = k[22]; k3 = k[23]; k4 = k[24]; - - p02 = sp4[0]; - p03 = sp4[delta_chan1]; - p04 = sp4[delta_chan2]; - p05 = sp4[delta_chan3]; - - sp4 += chan4; - -#ifdef __SUNPRO_C -#pragma pipeloop(0) -#endif /* __SUNPRO_C */ - for (i = 0; i <= (wid - dx_r - 2); i += 2) { - p00 = p02; p01 = p03; p02 = p04; p03 = p05; - - p04 = sp4[0]; p05 = sp4[chan1]; - - pix0 = (buffd[i ] + p00 * k0 + p01 * k1 + p02 * k2 + - p03 * k3 + p04 * k4) >> shift2; - pix1 = (buffd[i + 1] + p01 * k0 + p02 * k1 + p03 * k2 + - p04 * k3 + p05 * k4) >> shift2; - - CLAMP_STORE(dp[0], pix0) - CLAMP_STORE(dp[chan1], pix1) - - dp += chan2; - sp4 += chan2; - } - - p01 = p02; p02 = p03; p03 = p04; p04 = p05; - - for (; i < wid - dx_r; i++) { - p00 = p01; p01 = p02; p02 = p03; p03 = p04; - - p04 = sp4[0]; - - pix0 = (buffd[i ] + p00 * k0 + p01 * k1 + p02 * k2 + - p03 * k3 + p04 * k4) >> shift2; - CLAMP_STORE(dp[0], pix0) - - dp += chan1; - sp4 += chan1; - } - - sp4 -= chan1; - - for (; i < wid; i++) { - p00 = p01; p01 = p02; p02 = p03; p03 = p04; - - p04 = sp4[0]; - - pix0 = (buffd[i ] + p00 * k0 + p01 * k1 + p02 * k2 + - p03 * k3 + p04 * k4) >> shift2; - CLAMP_STORE(dp[0], pix0) - - dp += chan1; - } - - /* next line */ - - if (j < hgt - dy_b - 1) sl += sll; - dl += dll; - } - } - - if (pbuff != buff) mlib_free(pbuff); - - return MLIB_SUCCESS; -} - -#endif /* __sparc ( for x86, using integer multiplies is faster ) */ - -/***************************************************************/ -#if IMG_TYPE == 1 - -#undef KSIZE -#define KSIZE 7 - -mlib_status CONV_FUNC(7x7) -{ - FTYPE buff[(KSIZE + 3)*BUFF_LINE], *buffs[2*(KSIZE + 1)], *buffd; - FTYPE k[KSIZE*KSIZE]; - mlib_s32 l, m, buff_ind; - mlib_s32 d0, d1; - FTYPE k0, k1, k2, k3, k4, k5, k6; - FTYPE p0, p1, p2, p3, p4, p5, p6, p7; - DTYPE *sl2, *sl3, *sl4, *sl5, *sl6; - DEF_VARS(DTYPE); - LOAD_KERNEL(KSIZE*KSIZE); - GET_SRC_DST_PARAMETERS(DTYPE); - - swid = wid + KSIZE1; - - if (wid > BUFF_LINE) { - pbuff = mlib_malloc((KSIZE + 3)*sizeof(FTYPE )*wid); - - if (pbuff == NULL) return MLIB_FAILURE; - } - - for (l = 0; l < KSIZE + 1; l++) buffs[l] = pbuff + l*swid; - for (l = 0; l < KSIZE + 1; l++) buffs[l + (KSIZE + 1)] = buffs[l]; - buffd = buffs[KSIZE] + swid; - buffo = (mlib_s32*)(buffd + swid); - buffi = buffo + (swid &~ 1); - - swid -= (dx_l + dx_r); - - chan1 = nchannel; - chan2 = chan1 + chan1; - - for (c = 0; c < nchannel; c++) { - if (!(cmask & (1 << (nchannel - 1 - c)))) continue; - - sl = adr_src + c; - dl = adr_dst + c; - - if ((1 > dy_t) && (1 < hgt + KSIZE1 - dy_b)) sl1 = sl + sll; - else sl1 = sl; - - if ((2 > dy_t) && (2 < hgt + KSIZE1 - dy_b)) sl2 = sl1 + sll; - else sl2 = sl1; - - if ((3 > dy_t) && (3 < hgt + KSIZE1 - dy_b)) sl3 = sl2 + sll; - else sl3 = sl2; - - if ((4 > dy_t) && (4 < hgt + KSIZE1 - dy_b)) sl4 = sl3 + sll; - else sl4 = sl3; - - if ((5 > dy_t) && (5 < hgt + KSIZE1 - dy_b)) sl5 = sl4 + sll; - else sl5 = sl4; - - if ((hgt - dy_b) > 0) sl6 = sl5 + sll; - else sl6 = sl5; - - for (i = 0; i < dx_l; i++) { - buffs[0][i] = (FTYPE)sl[0]; - buffs[1][i] = (FTYPE)sl1[0]; - buffs[2][i] = (FTYPE)sl2[0]; - buffs[3][i] = (FTYPE)sl3[0]; - buffs[4][i] = (FTYPE)sl4[0]; - buffs[5][i] = (FTYPE)sl5[0]; - buffs[6][i] = (FTYPE)sl6[0]; - } - -#ifdef __SUNPRO_C -#pragma pipeloop(0) -#endif /* __SUNPRO_C */ - for (i = 0; i < swid; i++) { - buffs[0][i + dx_l] = (FTYPE)sl[i*chan1]; - buffs[1][i + dx_l] = (FTYPE)sl1[i*chan1]; - buffs[2][i + dx_l] = (FTYPE)sl2[i*chan1]; - buffs[3][i + dx_l] = (FTYPE)sl3[i*chan1]; - buffs[4][i + dx_l] = (FTYPE)sl4[i*chan1]; - buffs[5][i + dx_l] = (FTYPE)sl5[i*chan1]; - buffs[6][i + dx_l] = (FTYPE)sl6[i*chan1]; - } - - for (i = 0; i < dx_r; i++) { - buffs[0][swid + dx_l + i] = buffs[0][swid + dx_l - 1]; - buffs[1][swid + dx_l + i] = buffs[1][swid + dx_l - 1]; - buffs[2][swid + dx_l + i] = buffs[2][swid + dx_l - 1]; - buffs[3][swid + dx_l + i] = buffs[3][swid + dx_l - 1]; - buffs[4][swid + dx_l + i] = buffs[4][swid + dx_l - 1]; - buffs[5][swid + dx_l + i] = buffs[5][swid + dx_l - 1]; - buffs[6][swid + dx_l + i] = buffs[6][swid + dx_l - 1]; - } - - buff_ind = 0; - -#ifdef __SUNPRO_C -#pragma pipeloop(0) -#endif /* __SUNPRO_C */ - for (i = 0; i < wid; i++) buffd[i] = 0.0; - - if ((hgt - dy_b) > 1) sl = sl6 + sll; - else sl = sl6; - - for (j = 0; j < hgt; j++) { - FTYPE **buffc = buffs + buff_ind; - FTYPE *buffn = buffc[KSIZE]; - FTYPE *pk = k; - - for (l = 0; l < KSIZE; l++) { - FTYPE *buff = buffc[l]; - d64_2x32 dd; - - sp = sl; - dp = dl; - - p2 = buff[0]; p3 = buff[1]; p4 = buff[2]; - p5 = buff[3]; p6 = buff[4]; p7 = buff[5]; - - k0 = *pk++; k1 = *pk++; k2 = *pk++; k3 = *pk++; - k4 = *pk++; k5 = *pk++; k6 = *pk++; - - if (l < (KSIZE - 1)) { -#ifdef __SUNPRO_C -#pragma pipeloop(0) -#endif /* __SUNPRO_C */ - for (i = 0; i <= (wid - 2); i += 2) { - p0 = p2; p1 = p3; p2 = p4; p3 = p5; p4 = p6; p5 = p7; - - p6 = buff[i + 6]; p7 = buff[i + 7]; - - buffd[i ] += p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + p5*k5 + p6*k6; - buffd[i + 1] += p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + p6*k5 + p7*k6; - } - - } else { -#ifdef __SUNPRO_C -#pragma pipeloop(0) -#endif /* __SUNPRO_C */ - for (i = 0; i <= (wid - 2); i += 2) { - p0 = p2; p1 = p3; p2 = p4; p3 = p5; p4 = p6; p5 = p7; - - p6 = buff[i + 6]; p7 = buff[i + 7]; - - LOAD_BUFF(buffi); - - dd.d64 = *(FTYPE *)(buffi + i); - buffn[i + dx_l ] = (FTYPE)dd.i32s.i0; - buffn[i + dx_l + 1] = (FTYPE)dd.i32s.i1; - - d0 = D2I(p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + p5*k5 + p6*k6 + buffd[i ]); - d1 = D2I(p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + p6*k5 + p7*k6 + buffd[i + 1]); - - dp[0 ] = FROM_S32(d0); - dp[chan1] = FROM_S32(d1); - - buffd[i ] = 0.0; - buffd[i + 1] = 0.0; - - sp += chan2; - dp += chan2; - } - } - } - - /* last pixels */ - for (; i < wid; i++) { - FTYPE *pk = k, s = 0; - mlib_s32 d0; - - for (l = 0; l < KSIZE; l++) { - FTYPE *buff = buffc[l] + i; - - for (m = 0; m < KSIZE; m++) s += buff[m] * (*pk++); - } - - d0 = D2I(s); - dp[0] = FROM_S32(d0); - - buffn[i + dx_l] = (FTYPE)sp[0]; - - sp += chan1; - dp += chan1; - } - - for (; i < swid; i++) { - buffn[i + dx_l] = (FTYPE)sp[0]; - sp += chan1; - } - - for (i = 0; i < dx_l; i++) buffn[i] = buffn[dx_l]; - for (i = 0; i < dx_r; i++) buffn[swid + dx_l + i] = buffn[swid + dx_l - 1]; - - /* next line */ - - if (j < hgt - dy_b - 2) sl += sll; - dl += dll; - - buff_ind++; - - if (buff_ind >= KSIZE + 1) buff_ind = 0; - } - } - - if (pbuff != buff) mlib_free(pbuff); - - return MLIB_SUCCESS; -} - -#endif /* IMG_TYPE == 1 */ - -/***************************************************************/ #define MAX_KER 7 #define MAX_N 15 #define BUFF_SIZE 1600 diff -r 3e22d8fd4912 -r acea5f7d354b jdk/src/java.desktop/share/native/libmlib_image/mlib_ImageConv_8nw.c --- a/jdk/src/java.desktop/share/native/libmlib_image/mlib_ImageConv_8nw.c Thu May 12 11:03:07 2016 -0700 +++ b/jdk/src/java.desktop/share/native/libmlib_image/mlib_ImageConv_8nw.c Fri May 13 11:31:05 2016 +0300 @@ -145,9 +145,6 @@ } d64_2x32; /***************************************************************/ -#define BUFF_LINE 256 - -/***************************************************************/ #define DEF_VARS(type) \ type *adr_src, *sl, *sp = NULL; \ type *adr_dst, *dl, *dp = NULL; \ @@ -157,39 +154,6 @@ mlib_s32 i, j, c /***************************************************************/ -#define LOAD_KERNEL3() \ - FTYPE scalef = DSCALE; \ - FTYPE k0, k1, k2, k3, k4, k5, k6, k7, k8; \ - FTYPE p00, p01, p02, p03, \ - p10, p11, p12, p13, \ - p20, p21, p22, p23; \ - \ - while (scalef_expon > 30) { \ - scalef /= (1 << 30); \ - scalef_expon -= 30; \ - } \ - \ - scalef /= (1 << scalef_expon); \ - \ - /* keep kernel in regs */ \ - k0 = scalef * kern[0]; k1 = scalef * kern[1]; k2 = scalef * kern[2]; \ - k3 = scalef * kern[3]; k4 = scalef * kern[4]; k5 = scalef * kern[5]; \ - k6 = scalef * kern[6]; k7 = scalef * kern[7]; k8 = scalef * kern[8] - -/***************************************************************/ -#define LOAD_KERNEL(SIZE) \ - FTYPE scalef = DSCALE; \ - \ - while (scalef_expon > 30) { \ - scalef /= (1 << 30); \ - scalef_expon -= 30; \ - } \ - \ - scalef /= (1 << scalef_expon); \ - \ - for (j = 0; j < SIZE; j++) k[j] = scalef * kern[j] - -/***************************************************************/ #define GET_SRC_DST_PARAMETERS(type) \ hgt = mlib_ImageGetHeight(src); \ wid = mlib_ImageGetWidth(src); \ @@ -247,1162 +211,6 @@ #endif /* __sparc */ /***************************************************************/ -#define KSIZE 3 - -mlib_status CONV_FUNC(3x3)(mlib_image *dst, - const mlib_image *src, - const mlib_s32 *kern, - mlib_s32 scalef_expon, - mlib_s32 cmask) -{ - FTYPE buff[(KSIZE + 2)*BUFF_LINE], *buff0, *buff1, *buff2, *buff3, *buffT; - DEF_VARS(DTYPE); - DTYPE *sl1; - mlib_s32 chan2; - mlib_s32 *buffo, *buffi; - DTYPE *sl2; -#ifndef __sparc - mlib_s32 d0, d1; -#endif /* __sparc */ - LOAD_KERNEL3(); - GET_SRC_DST_PARAMETERS(DTYPE); - - if (wid > BUFF_LINE) { - pbuff = mlib_malloc((KSIZE + 2)*sizeof(FTYPE)*wid); - - if (pbuff == NULL) return MLIB_FAILURE; - } - - buff0 = pbuff; - buff1 = buff0 + wid; - buff2 = buff1 + wid; - buff3 = buff2 + wid; - buffo = (mlib_s32*)(buff3 + wid); - buffi = buffo + (wid &~ 1); - - chan1 = nchannel; - chan2 = chan1 + chan1; - - wid -= (KSIZE - 1); - hgt -= (KSIZE - 1); - - adr_dst += ((KSIZE - 1)/2)*(dll + chan1); - - for (c = 0; c < nchannel; c++) { - if (!(cmask & (1 << (nchannel - 1 - c)))) continue; - - sl = adr_src + c; - dl = adr_dst + c; - - sl1 = sl + sll; - sl2 = sl1 + sll; -#ifdef __SUNPRO_C -#pragma pipeloop(0) -#endif /* __SUNPRO_C */ - for (i = 0; i < wid + (KSIZE - 1); i++) { - buff0[i] = (FTYPE)sl[i*chan1]; - buff1[i] = (FTYPE)sl1[i*chan1]; - buff2[i] = (FTYPE)sl2[i*chan1]; - } - - sl += KSIZE*sll; - - for (j = 0; j < hgt; j++) { - FTYPE s0, s1; - - p02 = buff0[0]; - p12 = buff1[0]; - p22 = buff2[0]; - - p03 = buff0[1]; - p13 = buff1[1]; - p23 = buff2[1]; - - s0 = p02 * k0 + p03 * k1 + p12 * k3 + p13 * k4 + p22 * k6 + p23 * k7; - s1 = p03 * k0 + p13 * k3 + p23 * k6; - - sp = sl; - dp = dl; - -#ifdef __SUNPRO_C -#pragma pipeloop(0) -#endif /* __SUNPRO_C */ - for (i = 0; i <= (wid - 2); i += 2) { -#ifdef __sparc -#ifdef _NO_LONGLONG - mlib_s32 o64_1, o64_2; -#else /* _NO_LONGLONG */ - mlib_s64 o64; -#endif /* _NO_LONGLONG */ -#endif /* __sparc */ - d64_2x32 dd; - - p02 = buff0[i + 2]; p12 = buff1[i + 2]; p22 = buff2[i + 2]; - p03 = buff0[i + 3]; p13 = buff1[i + 3]; p23 = buff2[i + 3]; - - LOAD_BUFF(buffi); - - dd.d64 = *(FTYPE *)(buffi + i); - buff3[i ] = (FTYPE)dd.i32s.i0; - buff3[i + 1] = (FTYPE)dd.i32s.i1; - -#ifndef __sparc - d0 = D2I(s0 + p02 * k2 + p12 * k5 + p22 * k8); - d1 = D2I(s1 + p02 * k1 + p03 * k2 + p12 * k4 + p13 * k5 + p22 * k7 + p23 * k8); - - s0 = p02 * k0 + p03 * k1 + p12 * k3 + p13 * k4 + p22 * k6 + p23 * k7; - s1 = p03 * k0 + p13 * k3 + p23 * k6; - - dp[0 ] = FROM_S32(d0); - dp[chan1] = FROM_S32(d1); - -#else /* __sparc */ - - dd.i32s.i0 = D2I(s0 + p02 * k2 + p12 * k5 + p22 * k8); - dd.i32s.i1 = D2I(s1 + p02 * k1 + p03 * k2 + p12 * k4 + p13 * k5 + p22 * k7 + p23 * k8); - *(FTYPE *)(buffo + i) = dd.d64; - - s0 = p02 * k0 + p03 * k1 + p12 * k3 + p13 * k4 + p22 * k6 + p23 * k7; - s1 = p03 * k0 + p13 * k3 + p23 * k6; - -#ifdef _NO_LONGLONG - - o64_1 = buffo[i]; - o64_2 = buffo[i+1]; -#if IMG_TYPE != 1 - STORE2(FROM_S32(o64_1), FROM_S32(o64_2)); -#else - STORE2(o64_1 >> 24, o64_2 >> 24); -#endif /* IMG_TYPE != 1 */ - -#else /* _NO_LONGLONG */ - - o64 = *(mlib_s64*)(buffo + i); -#if IMG_TYPE != 1 - STORE2(FROM_S32(o64 >> 32), FROM_S32(o64)); -#else - STORE2(o64 >> 56, o64 >> 24); -#endif /* IMG_TYPE != 1 */ -#endif /* _NO_LONGLONG */ -#endif /* __sparc */ - - sp += chan2; - dp += chan2; - } - - for (; i < wid; i++) { - p00 = buff0[i]; p10 = buff1[i]; p20 = buff2[i]; - p01 = buff0[i + 1]; p11 = buff1[i + 1]; p21 = buff2[i + 1]; - p02 = buff0[i + 2]; p12 = buff1[i + 2]; p22 = buff2[i + 2]; - - buffi[i] = (mlib_s32)sp[0]; - buff3[i] = (FTYPE)buffi[i]; - -#ifndef __sparc - - d0 = D2I(p00 * k0 + p01 * k1 + p02 * k2 + p10 * k3 + p11 * k4 + - p12 * k5 + p20 * k6 + p21 * k7 + p22 * k8); - - dp[0] = FROM_S32(d0); - -#else /* __sparc */ - - buffo[i] = D2I(p00 * k0 + p01 * k1 + p02 * k2 + p10 * k3 + p11 * k4 + - p12 * k5 + p20 * k6 + p21 * k7 + p22 * k8); -#if IMG_TYPE != 1 - dp[0] = FROM_S32(buffo[i]); -#else - dp[0] = buffo[i] >> 24; -#endif /* IMG_TYPE != 1 */ -#endif /* __sparc */ - - sp += chan1; - dp += chan1; - } - - buffi[wid] = (mlib_s32)sp[0]; - buff3[wid] = (FTYPE)buffi[wid]; - buffi[wid + 1] = (mlib_s32)sp[chan1]; - buff3[wid + 1] = (FTYPE)buffi[wid + 1]; - - sl += sll; - dl += dll; - - buffT = buff0; - buff0 = buff1; - buff1 = buff2; - buff2 = buff3; - buff3 = buffT; - } - } - -#ifdef __sparc -#if IMG_TYPE == 1 - { - mlib_s32 amask = (1 << nchannel) - 1; - - if ((cmask & amask) != amask) { - mlib_ImageXor80(adr_dst, wid, hgt, dll, nchannel, cmask); - } else { - mlib_ImageXor80_aa(adr_dst, wid*nchannel, hgt, dll); - } - } - -#endif /* IMG_TYPE == 1 */ -#endif /* __sparc */ - - if (pbuff != buff) mlib_free(pbuff); - - return MLIB_SUCCESS; -} - -/***************************************************************/ -#ifndef __sparc /* for x86, using integer multiplies is faster */ - -mlib_status CONV_FUNC_I(3x3)(mlib_image *dst, - const mlib_image *src, - const mlib_s32 *kern, - mlib_s32 scalef_expon, - mlib_s32 cmask) -{ - DTYPE *adr_src, *sl, *sp0, *sp1, *sp2; - DTYPE *adr_dst, *dl, *dp; - mlib_s32 wid, hgt, sll, dll; - mlib_s32 nchannel, chan1, chan2; - mlib_s32 i, j, c; - mlib_s32 shift1, shift2; - mlib_s32 k0, k1, k2, k3, k4, k5, k6, k7, k8; - mlib_s32 p02, p03, - p12, p13, - p22, p23; - -#if IMG_TYPE != 1 - shift1 = 16; -#else - shift1 = 8; -#endif /* IMG_TYPE != 1 */ - - shift2 = scalef_expon - shift1; - - /* keep kernel in regs */ - k0 = kern[0] >> shift1; k1 = kern[1] >> shift1; k2 = kern[2] >> shift1; - k3 = kern[3] >> shift1; k4 = kern[4] >> shift1; k5 = kern[5] >> shift1; - k6 = kern[6] >> shift1; k7 = kern[7] >> shift1; k8 = kern[8] >> shift1; - - GET_SRC_DST_PARAMETERS(DTYPE); - - chan1 = nchannel; - chan2 = chan1 + chan1; - - wid -= (KSIZE - 1); - hgt -= (KSIZE - 1); - - adr_dst += ((KSIZE - 1)/2)*(dll + chan1); - - for (c = 0; c < chan1; c++) { - if (!(cmask & (1 << (chan1 - 1 - c)))) continue; - - sl = adr_src + c; - dl = adr_dst + c; - - for (j = 0; j < hgt; j++) { - mlib_s32 s0, s1; - mlib_s32 pix0, pix1; - - dp = dl; - sp0 = sl; - sp1 = sp0 + sll; - sp2 = sp1 + sll; - - p02 = sp0[0]; - p12 = sp1[0]; - p22 = sp2[0]; - - p03 = sp0[chan1]; - p13 = sp1[chan1]; - p23 = sp2[chan1]; - - s0 = p02 * k0 + p03 * k1 + p12 * k3 + p13 * k4 + p22 * k6 + p23 * k7; - s1 = p03 * k0 + p13 * k3 + p23 * k6; - - sp0 += chan2; - sp1 += chan2; - sp2 += chan2; - -#ifdef __SUNPRO_C -#pragma pipeloop(0) -#endif /* __SUNPRO_C */ - for (i = 0; i <= (wid - 2); i += 2) { - p02 = sp0[0]; p12 = sp1[0]; p22 = sp2[0]; - p03 = sp0[chan1]; p13 = sp1[chan1]; p23 = sp2[chan1]; - - pix0 = (s0 + p02 * k2 + p12 * k5 + p22 * k8) >> shift2; - pix1 = (s1 + p02 * k1 + p03 * k2 + p12 * k4 + - p13 * k5 + p22 * k7 + p23 * k8) >> shift2; - - CLAMP_STORE(dp[0], pix0) - CLAMP_STORE(dp[chan1], pix1) - - s0 = p02 * k0 + p03 * k1 + p12 * k3 + p13 * k4 + p22 * k6 + p23 * k7; - s1 = p03 * k0 + p13 * k3 + p23 * k6; - - sp0 += chan2; - sp1 += chan2; - sp2 += chan2; - dp += chan2; - } - - if (wid & 1) { - p02 = sp0[0]; p12 = sp1[0]; p22 = sp2[0]; - pix0 = (s0 + p02 * k2 + p12 * k5 + p22 * k8) >> shift2; - CLAMP_STORE(dp[0], pix0) - } - - sl += sll; - dl += dll; - } - } - - return MLIB_SUCCESS; -} - -#endif /* __sparc ( for x86, using integer multiplies is faster ) */ - -/***************************************************************/ -#undef KSIZE -#define KSIZE 4 - -mlib_status CONV_FUNC(4x4)(mlib_image *dst, - const mlib_image *src, - const mlib_s32 *kern, - mlib_s32 scalef_expon, - mlib_s32 cmask) -{ - FTYPE buff[(KSIZE + 3)*BUFF_LINE]; - FTYPE *buff0, *buff1, *buff2, *buff3, *buff4, *buffd, *buffT; - FTYPE k[KSIZE*KSIZE]; - mlib_s32 d0, d1; - FTYPE k0, k1, k2, k3, k4, k5, k6, k7; - FTYPE p00, p01, p02, p03, p04, - p10, p11, p12, p13, p14, - p20, p21, p22, p23, - p30, p31, p32, p33; - DEF_VARS(DTYPE); - DTYPE *sl1; - mlib_s32 chan2; - mlib_s32 *buffo, *buffi; - DTYPE *sl2, *sl3; - LOAD_KERNEL(KSIZE*KSIZE); - GET_SRC_DST_PARAMETERS(DTYPE); - - if (wid > BUFF_LINE) { - pbuff = mlib_malloc((KSIZE + 3)*sizeof(FTYPE)*wid); - - if (pbuff == NULL) return MLIB_FAILURE; - } - - buff0 = pbuff; - buff1 = buff0 + wid; - buff2 = buff1 + wid; - buff3 = buff2 + wid; - buff4 = buff3 + wid; - buffd = buff4 + wid; - buffo = (mlib_s32*)(buffd + wid); - buffi = buffo + (wid &~ 1); - - chan1 = nchannel; - chan2 = chan1 + chan1; - - wid -= (KSIZE - 1); - hgt -= (KSIZE - 1); - - adr_dst += ((KSIZE - 1)/2)*(dll + chan1); - - for (c = 0; c < nchannel; c++) { - if (!(cmask & (1 << (nchannel - 1 - c)))) continue; - - sl = adr_src + c; - dl = adr_dst + c; - - sl1 = sl + sll; - sl2 = sl1 + sll; - sl3 = sl2 + sll; -#ifdef __SUNPRO_C -#pragma pipeloop(0) -#endif /* __SUNPRO_C */ - for (i = 0; i < wid + (KSIZE - 1); i++) { - buff0[i] = (FTYPE)sl[i*chan1]; - buff1[i] = (FTYPE)sl1[i*chan1]; - buff2[i] = (FTYPE)sl2[i*chan1]; - buff3[i] = (FTYPE)sl3[i*chan1]; - } - - sl += KSIZE*sll; - - for (j = 0; j < hgt; j++) { - d64_2x32 dd; - - /* - * First loop on two first lines of kernel - */ - k0 = k[0]; k1 = k[1]; k2 = k[2]; k3 = k[3]; - k4 = k[4]; k5 = k[5]; k6 = k[6]; k7 = k[7]; - - sp = sl; - dp = dl; - - p02 = buff0[0]; - p12 = buff1[0]; - p03 = buff0[1]; - p13 = buff1[1]; - p04 = buff0[2]; - -#ifdef __SUNPRO_C -#pragma pipeloop(0) -#endif /* __SUNPRO_C */ - for (i = 0; i <= (wid - 2); i += 2) { - p00 = p02; p10 = p12; - p01 = p03; p11 = p13; - p02 = p04; p12 = buff1[i + 2]; - p03 = buff0[i + 3]; p13 = buff1[i + 3]; - p04 = buff0[i + 4]; p14 = buff1[i + 4]; - - LOAD_BUFF(buffi); - - dd.d64 = *(FTYPE *)(buffi + i); - buff4[i ] = (FTYPE)dd.i32s.i0; - buff4[i + 1] = (FTYPE)dd.i32s.i1; - - buffd[i ] = (p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 + - p10 * k4 + p11 * k5 + p12 * k6 + p13 * k7); - buffd[i + 1] = (p01 * k0 + p02 * k1 + p03 * k2 + p04 * k3 + - p11 * k4 + p12 * k5 + p13 * k6 + p14 * k7); - - sp += chan2; - dp += chan2; - } - - /* - * Second loop on two last lines of kernel - */ - k0 = k[ 8]; k1 = k[ 9]; k2 = k[10]; k3 = k[11]; - k4 = k[12]; k5 = k[13]; k6 = k[14]; k7 = k[15]; - - sp = sl; - dp = dl; - - p02 = buff2[0]; - p12 = buff3[0]; - p03 = buff2[1]; - p13 = buff3[1]; - p04 = buff2[2]; - -#ifdef __SUNPRO_C -#pragma pipeloop(0) -#endif /* __SUNPRO_C */ - for (i = 0; i <= (wid - 2); i += 2) { - p00 = p02; p10 = p12; - p01 = p03; p11 = p13; - p02 = p04; p12 = buff3[i + 2]; - p03 = buff2[i + 3]; p13 = buff3[i + 3]; - p04 = buff2[i + 4]; p14 = buff3[i + 4]; - - d0 = D2I(p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 + - p10 * k4 + p11 * k5 + p12 * k6 + p13 * k7 + buffd[i]); - d1 = D2I(p01 * k0 + p02 * k1 + p03 * k2 + p04 * k3 + - p11 * k4 + p12 * k5 + p13 * k6 + p14 * k7 + buffd[i + 1]); - - dp[0 ] = FROM_S32(d0); - dp[chan1] = FROM_S32(d1); - - sp += chan2; - dp += chan2; - } - - /* last pixels */ - for (; i < wid; i++) { - p00 = buff0[i]; p10 = buff1[i]; p20 = buff2[i]; p30 = buff3[i]; - p01 = buff0[i + 1]; p11 = buff1[i + 1]; p21 = buff2[i + 1]; p31 = buff3[i + 1]; - p02 = buff0[i + 2]; p12 = buff1[i + 2]; p22 = buff2[i + 2]; p32 = buff3[i + 2]; - p03 = buff0[i + 3]; p13 = buff1[i + 3]; p23 = buff2[i + 3]; p33 = buff3[i + 3]; - - buff4[i] = (FTYPE)sp[0]; - - buffo[i] = D2I(p00 * k[0] + p01 * k[1] + p02 * k[2] + p03 * k[3] + - p10 * k[4] + p11 * k[5] + p12 * k[6] + p13 * k[7] + - p20 * k[ 8] + p21 * k[ 9] + p22 * k[10] + p23 * k[11] + - p30 * k[12] + p31 * k[13] + p32 * k[14] + p33 * k[15]); - - dp[0] = FROM_S32(buffo[i]); - - sp += chan1; - dp += chan1; - } - - buff4[wid ] = (FTYPE)sp[0]; - buff4[wid + 1] = (FTYPE)sp[chan1]; - buff4[wid + 2] = (FTYPE)sp[chan2]; - - /* next line */ - sl += sll; - dl += dll; - - buffT = buff0; - buff0 = buff1; - buff1 = buff2; - buff2 = buff3; - buff3 = buff4; - buff4 = buffT; - } - } - - if (pbuff != buff) mlib_free(pbuff); - - return MLIB_SUCCESS; -} - -/***************************************************************/ -#undef KSIZE -#define KSIZE 5 - -mlib_status CONV_FUNC(5x5)(mlib_image *dst, - const mlib_image *src, - const mlib_s32 *kern, - mlib_s32 scalef_expon, - mlib_s32 cmask) -{ - FTYPE buff[(KSIZE + 3)*BUFF_LINE]; - FTYPE *buff0, *buff1, *buff2, *buff3, *buff4, *buff5, *buffd, *buffT; - FTYPE k[KSIZE*KSIZE]; - mlib_s32 d0, d1; - FTYPE k0, k1, k2, k3, k4, k5, k6, k7, k8, k9; - FTYPE p00, p01, p02, p03, p04, p05, - p10, p11, p12, p13, p14, p15, - p20, p21, p22, p23, p24, - p30, p31, p32, p33, p34, - p40, p41, p42, p43, p44; - DEF_VARS(DTYPE); - DTYPE *sl1; - mlib_s32 chan2; - mlib_s32 *buffo, *buffi; - DTYPE *sl2, *sl3, *sl4; - LOAD_KERNEL(KSIZE*KSIZE); - GET_SRC_DST_PARAMETERS(DTYPE); - - if (wid > BUFF_LINE) { - pbuff = mlib_malloc((KSIZE + 3)*sizeof(FTYPE)*wid); - - if (pbuff == NULL) return MLIB_FAILURE; - } - - buff0 = pbuff; - buff1 = buff0 + wid; - buff2 = buff1 + wid; - buff3 = buff2 + wid; - buff4 = buff3 + wid; - buff5 = buff4 + wid; - buffd = buff5 + wid; - buffo = (mlib_s32*)(buffd + wid); - buffi = buffo + (wid &~ 1); - - chan1 = nchannel; - chan2 = chan1 + chan1; - - wid -= (KSIZE - 1); - hgt -= (KSIZE - 1); - - adr_dst += ((KSIZE - 1)/2)*(dll + chan1); - - for (c = 0; c < nchannel; c++) { - if (!(cmask & (1 << (nchannel - 1 - c)))) continue; - - sl = adr_src + c; - dl = adr_dst + c; - - sl1 = sl + sll; - sl2 = sl1 + sll; - sl3 = sl2 + sll; - sl4 = sl3 + sll; -#ifdef __SUNPRO_C -#pragma pipeloop(0) -#endif /* __SUNPRO_C */ - for (i = 0; i < wid + (KSIZE - 1); i++) { - buff0[i] = (FTYPE)sl[i*chan1]; - buff1[i] = (FTYPE)sl1[i*chan1]; - buff2[i] = (FTYPE)sl2[i*chan1]; - buff3[i] = (FTYPE)sl3[i*chan1]; - buff4[i] = (FTYPE)sl4[i*chan1]; - } - - sl += KSIZE*sll; - - for (j = 0; j < hgt; j++) { - d64_2x32 dd; - - /* - * First loop - */ - k0 = k[0]; k1 = k[1]; k2 = k[2]; k3 = k[3]; k4 = k[4]; - k5 = k[5]; k6 = k[6]; k7 = k[7]; k8 = k[8]; k9 = k[9]; - - sp = sl; - dp = dl; - - p02 = buff0[0]; - p12 = buff1[0]; - p03 = buff0[1]; - p13 = buff1[1]; - p04 = buff0[2]; - p14 = buff1[2]; - -#ifdef __SUNPRO_C -#pragma pipeloop(0) -#endif /* __SUNPRO_C */ - for (i = 0; i <= (wid - 2); i += 2) { - p00 = p02; p10 = p12; - p01 = p03; p11 = p13; - p02 = p04; p12 = p14; - - LOAD_BUFF(buffi); - - p03 = buff0[i + 3]; p13 = buff1[i + 3]; - p04 = buff0[i + 4]; p14 = buff1[i + 4]; - p05 = buff0[i + 5]; p15 = buff1[i + 5]; - - buffd[i ] = (p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 + p04 * k4 + - p10 * k5 + p11 * k6 + p12 * k7 + p13 * k8 + p14 * k9); - buffd[i + 1] = (p01 * k0 + p02 * k1 + p03 * k2 + p04 * k3 + p05 * k4 + - p11 * k5 + p12 * k6 + p13 * k7 + p14 * k8 + p15 * k9); - - sp += chan2; - dp += chan2; - } - - /* - * Second loop - */ - k0 = k[10]; k1 = k[11]; k2 = k[12]; k3 = k[13]; k4 = k[14]; - k5 = k[15]; k6 = k[16]; k7 = k[17]; k8 = k[18]; k9 = k[19]; - - sp = sl; - dp = dl; - - p02 = buff2[0]; - p12 = buff3[0]; - p03 = buff2[1]; - p13 = buff3[1]; - p04 = buff2[2]; - p14 = buff3[2]; - -#ifdef __SUNPRO_C -#pragma pipeloop(0) -#endif /* __SUNPRO_C */ - for (i = 0; i <= (wid - 2); i += 2) { - p00 = p02; p10 = p12; - p01 = p03; p11 = p13; - - p02 = buff2[i + 2]; p12 = buff3[i + 2]; - p03 = buff2[i + 3]; p13 = buff3[i + 3]; - p04 = buff2[i + 4]; p14 = buff3[i + 4]; - p05 = buff2[i + 5]; p15 = buff3[i + 5]; - - dd.d64 = *(FTYPE *)(buffi + i); - buff5[i ] = (FTYPE)dd.i32s.i0; - buff5[i + 1] = (FTYPE)dd.i32s.i1; - - buffd[i ] += (p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 + p04 * k4 + - p10 * k5 + p11 * k6 + p12 * k7 + p13 * k8 + p14 * k9); - buffd[i + 1] += (p01 * k0 + p02 * k1 + p03 * k2 + p04 * k3 + p05 * k4 + - p11 * k5 + p12 * k6 + p13 * k7 + p14 * k8 + p15 * k9); - - sp += chan2; - dp += chan2; - } - - /* - * 3 loop - */ - k0 = k[20]; k1 = k[21]; k2 = k[22]; k3 = k[23]; k4 = k[24]; - - sp = sl; - dp = dl; - - p02 = buff4[0]; - p03 = buff4[1]; - p04 = buff4[2]; - p05 = buff4[3]; - -#ifdef __SUNPRO_C -#pragma pipeloop(0) -#endif /* __SUNPRO_C */ - for (i = 0; i <= (wid - 2); i += 2) { - p00 = p02; p01 = p03; p02 = p04; p03 = p05; - - p04 = buff4[i + 4]; p05 = buff4[i + 5]; - - d0 = D2I(p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 + p04 * k4 + buffd[i]); - d1 = D2I(p01 * k0 + p02 * k1 + p03 * k2 + p04 * k3 + p05 * k4 + buffd[i + 1]); - - dp[0 ] = FROM_S32(d0); - dp[chan1] = FROM_S32(d1); - - sp += chan2; - dp += chan2; - } - - /* last pixels */ - for (; i < wid; i++) { - p00 = buff0[i]; p10 = buff1[i]; p20 = buff2[i]; p30 = buff3[i]; - p01 = buff0[i + 1]; p11 = buff1[i + 1]; p21 = buff2[i + 1]; p31 = buff3[i + 1]; - p02 = buff0[i + 2]; p12 = buff1[i + 2]; p22 = buff2[i + 2]; p32 = buff3[i + 2]; - p03 = buff0[i + 3]; p13 = buff1[i + 3]; p23 = buff2[i + 3]; p33 = buff3[i + 3]; - p04 = buff0[i + 4]; p14 = buff1[i + 4]; p24 = buff2[i + 4]; p34 = buff3[i + 4]; - - p40 = buff4[i]; p41 = buff4[i + 1]; p42 = buff4[i + 2]; - p43 = buff4[i + 3]; p44 = buff4[i + 4]; - - buff5[i] = (FTYPE)sp[0]; - - buffo[i] = D2I(p00 * k[0] + p01 * k[1] + p02 * k[2] + p03 * k[3] + p04 * k[4] + - p10 * k[5] + p11 * k[6] + p12 * k[7] + p13 * k[8] + p14 * k[9] + - p20 * k[10] + p21 * k[11] + p22 * k[12] + p23 * k[13] + p24 * k[14] + - p30 * k[15] + p31 * k[16] + p32 * k[17] + p33 * k[18] + p34 * k[19] + - p40 * k[20] + p41 * k[21] + p42 * k[22] + p43 * k[23] + p44 * k[24]); - - dp[0] = FROM_S32(buffo[i]); - - sp += chan1; - dp += chan1; - } - - buff5[wid ] = (FTYPE)sp[0]; - buff5[wid + 1] = (FTYPE)sp[chan1]; - buff5[wid + 2] = (FTYPE)sp[chan2]; - buff5[wid + 3] = (FTYPE)sp[chan2 + chan1]; - - /* next line */ - sl += sll; - dl += dll; - - buffT = buff0; - buff0 = buff1; - buff1 = buff2; - buff2 = buff3; - buff3 = buff4; - buff4 = buff5; - buff5 = buffT; - } - } - - if (pbuff != buff) mlib_free(pbuff); - - return MLIB_SUCCESS; -} - -/***************************************************************/ -#ifndef __sparc /* for x86, using integer multiplies is faster */ - -mlib_status CONV_FUNC_I(5x5)(mlib_image *dst, - const mlib_image *src, - const mlib_s32 *kern, - mlib_s32 scalef_expon, - mlib_s32 cmask) -{ - mlib_s32 buff[BUFF_LINE]; - mlib_s32 *buffd; - mlib_s32 k[KSIZE*KSIZE]; - mlib_s32 shift1, shift2; - mlib_s32 k0, k1, k2, k3, k4, k5, k6, k7, k8, k9; - mlib_s32 p00, p01, p02, p03, p04, p05, - p10, p11, p12, p13, p14, p15; - DTYPE *adr_src, *sl, *sp0, *sp1; - DTYPE *adr_dst, *dl, *dp; - mlib_s32 *pbuff = buff; - mlib_s32 wid, hgt, sll, dll; - mlib_s32 nchannel, chan1, chan2, chan3, chan4; - mlib_s32 i, j, c; - -#if IMG_TYPE != 1 - shift1 = 16; -#else - shift1 = 8; -#endif /* IMG_TYPE != 1 */ - - shift2 = scalef_expon - shift1; - - for (j = 0; j < KSIZE*KSIZE; j++) k[j] = kern[j] >> shift1; - - GET_SRC_DST_PARAMETERS(DTYPE); - - if (wid > BUFF_LINE) { - pbuff = mlib_malloc(sizeof(mlib_s32)*wid); - - if (pbuff == NULL) return MLIB_FAILURE; - } - - buffd = pbuff; - - chan1 = nchannel; - chan2 = chan1 + chan1; - chan3 = chan2 + chan1; - chan4 = chan3 + chan1; - - wid -= (KSIZE - 1); - hgt -= (KSIZE - 1); - - adr_dst += ((KSIZE - 1)/2)*(dll + chan1); - - for (c = 0; c < chan1; c++) { - if (!(cmask & (1 << (chan1 - 1 - c)))) continue; - - sl = adr_src + c; - dl = adr_dst + c; - - for (j = 0; j < hgt; j++) { - mlib_s32 pix0, pix1; - /* - * First loop - */ - sp0 = sl; - sp1 = sp0 + sll; - dp = dl; - - k0 = k[0]; k1 = k[1]; k2 = k[2]; k3 = k[3]; k4 = k[4]; - k5 = k[5]; k6 = k[6]; k7 = k[7]; k8 = k[8]; k9 = k[9]; - - p02 = sp0[0]; p12 = sp1[0]; - p03 = sp0[chan1]; p13 = sp1[chan1]; - p04 = sp0[chan2]; p14 = sp1[chan2]; - p05 = sp0[chan3]; p15 = sp1[chan3]; - - sp0 += chan4; - sp1 += chan4; - -#ifdef __SUNPRO_C -#pragma pipeloop(0) -#endif /* __SUNPRO_C */ - for (i = 0; i <= (wid - 2); i += 2) { - p00 = p02; p10 = p12; - p01 = p03; p11 = p13; - p02 = p04; p12 = p14; - p03 = p05; p13 = p15; - - p04 = sp0[0]; p14 = sp1[0]; - p05 = sp0[chan1]; p15 = sp1[chan1]; - - buffd[i ] = (p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 + p04 * k4 + - p10 * k5 + p11 * k6 + p12 * k7 + p13 * k8 + p14 * k9); - buffd[i + 1] = (p01 * k0 + p02 * k1 + p03 * k2 + p04 * k3 + p05 * k4 + - p11 * k5 + p12 * k6 + p13 * k7 + p14 * k8 + p15 * k9); - - sp0 += chan2; - sp1 += chan2; - dp += chan2; - } - - if (wid & 1) { - p00 = p02; p10 = p12; - p01 = p03; p11 = p13; - p02 = p04; p12 = p14; - p03 = p05; p13 = p15; - - p04 = sp0[0]; p14 = sp1[0]; - - buffd[i] = (p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 + p04 * k4 + - p10 * k5 + p11 * k6 + p12 * k7 + p13 * k8 + p14 * k9); - } - - /* - * Second loop - */ - sp0 = sl + 2*sll; - sp1 = sp0 + sll; - dp = dl; - - k0 = k[10]; k1 = k[11]; k2 = k[12]; k3 = k[13]; k4 = k[14]; - k5 = k[15]; k6 = k[16]; k7 = k[17]; k8 = k[18]; k9 = k[19]; - - p02 = sp0[0]; p12 = sp1[0]; - p03 = sp0[chan1]; p13 = sp1[chan1]; - p04 = sp0[chan2]; p14 = sp1[chan2]; - p05 = sp0[chan3]; p15 = sp1[chan3]; - - sp0 += chan4; - sp1 += chan4; - -#ifdef __SUNPRO_C -#pragma pipeloop(0) -#endif /* __SUNPRO_C */ - for (i = 0; i <= (wid - 2); i += 2) { - p00 = p02; p10 = p12; - p01 = p03; p11 = p13; - p02 = p04; p12 = p14; - p03 = p05; p13 = p15; - - p04 = sp0[0]; p14 = sp1[0]; - p05 = sp0[chan1]; p15 = sp1[chan1]; - - buffd[i ] += (p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 + p04 * k4 + - p10 * k5 + p11 * k6 + p12 * k7 + p13 * k8 + p14 * k9); - buffd[i + 1] += (p01 * k0 + p02 * k1 + p03 * k2 + p04 * k3 + p05 * k4 + - p11 * k5 + p12 * k6 + p13 * k7 + p14 * k8 + p15 * k9); - - sp0 += chan2; - sp1 += chan2; - dp += chan2; - } - - if (wid & 1) { - p00 = p02; p10 = p12; - p01 = p03; p11 = p13; - p02 = p04; p12 = p14; - p03 = p05; p13 = p15; - - p04 = sp0[0]; p14 = sp1[0]; - - buffd[i] += (p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 + p04 * k4 + - p10 * k5 + p11 * k6 + p12 * k7 + p13 * k8 + p14 * k9); - } - - /* - * 3 loop - */ - dp = dl; - sp0 = sl + 4*sll; - - k0 = k[20]; k1 = k[21]; k2 = k[22]; k3 = k[23]; k4 = k[24]; - - p02 = sp0[0]; - p03 = sp0[chan1]; - p04 = sp0[chan2]; - p05 = sp0[chan3]; - - sp0 += chan2 + chan2; - -#ifdef __SUNPRO_C -#pragma pipeloop(0) -#endif /* __SUNPRO_C */ - for (i = 0; i <= (wid - 2); i += 2) { - p00 = p02; p01 = p03; p02 = p04; p03 = p05; - - p04 = sp0[0]; p05 = sp0[chan1]; - - pix0 = (buffd[i ] + p00 * k0 + p01 * k1 + p02 * k2 + - p03 * k3 + p04 * k4) >> shift2; - pix1 = (buffd[i + 1] + p01 * k0 + p02 * k1 + p03 * k2 + - p04 * k3 + p05 * k4) >> shift2; - - CLAMP_STORE(dp[0], pix0) - CLAMP_STORE(dp[chan1], pix1) - - dp += chan2; - sp0 += chan2; - } - - if (wid & 1) { - p00 = p02; p01 = p03; p02 = p04; p03 = p05; - - p04 = sp0[0]; - - pix0 = (buffd[i ] + p00 * k0 + p01 * k1 + p02 * k2 + - p03 * k3 + p04 * k4) >> shift2; - CLAMP_STORE(dp[0], pix0) - } - - /* next line */ - sl += sll; - dl += dll; - } - } - - if (pbuff != buff) mlib_free(pbuff); - - return MLIB_SUCCESS; -} - -#endif /* __sparc ( for x86, using integer multiplies is faster ) */ - -/***************************************************************/ -#if IMG_TYPE == 1 - -#undef KSIZE -#define KSIZE 7 - -mlib_status CONV_FUNC(7x7)(mlib_image *dst, - const mlib_image *src, - const mlib_s32 *kern, - mlib_s32 scalef_expon, - mlib_s32 cmask) -{ - FTYPE buff[(KSIZE + 3)*BUFF_LINE], *buffs[2*(KSIZE + 1)], *buffd; - FTYPE k[KSIZE*KSIZE]; - mlib_s32 l, m, buff_ind; - mlib_s32 d0, d1; - FTYPE k0, k1, k2, k3, k4, k5, k6; - FTYPE p0, p1, p2, p3, p4, p5, p6, p7; - DTYPE *sl2, *sl3, *sl4, *sl5, *sl6; - DEF_VARS(DTYPE); - DTYPE *sl1; - mlib_s32 chan2; - mlib_s32 *buffo, *buffi; - LOAD_KERNEL(KSIZE*KSIZE); - GET_SRC_DST_PARAMETERS(DTYPE); - - if (wid > BUFF_LINE) { - pbuff = mlib_malloc((KSIZE + 3)*sizeof(FTYPE)*wid); - - if (pbuff == NULL) return MLIB_FAILURE; - } - - for (l = 0; l < KSIZE + 1; l++) buffs[l] = pbuff + l*wid; - for (l = 0; l < KSIZE + 1; l++) buffs[l + (KSIZE + 1)] = buffs[l]; - buffd = buffs[KSIZE] + wid; - buffo = (mlib_s32*)(buffd + wid); - buffi = buffo + (wid &~ 1); - - chan1 = nchannel; - chan2 = chan1 + chan1; - - wid -= (KSIZE - 1); - hgt -= (KSIZE - 1); - - adr_dst += ((KSIZE - 1)/2)*(dll + chan1); - - for (c = 0; c < nchannel; c++) { - if (!(cmask & (1 << (nchannel - 1 - c)))) continue; - - sl = adr_src + c; - dl = adr_dst + c; - - sl1 = sl + sll; - sl2 = sl1 + sll; - sl3 = sl2 + sll; - sl4 = sl3 + sll; - sl5 = sl4 + sll; - sl6 = sl5 + sll; -#ifdef __SUNPRO_C -#pragma pipeloop(0) -#endif /* __SUNPRO_C */ - for (i = 0; i < wid + (KSIZE - 1); i++) { - buffs[0][i] = (FTYPE)sl[i*chan1]; - buffs[1][i] = (FTYPE)sl1[i*chan1]; - buffs[2][i] = (FTYPE)sl2[i*chan1]; - buffs[3][i] = (FTYPE)sl3[i*chan1]; - buffs[4][i] = (FTYPE)sl4[i*chan1]; - buffs[5][i] = (FTYPE)sl5[i*chan1]; - buffs[6][i] = (FTYPE)sl6[i*chan1]; - } - - buff_ind = 0; - -#ifdef __SUNPRO_C -#pragma pipeloop(0) -#endif /* __SUNPRO_C */ - for (i = 0; i < wid; i++) buffd[i] = 0.0; - - sl += KSIZE*sll; - - for (j = 0; j < hgt; j++) { - FTYPE **buffc = buffs + buff_ind; - FTYPE *buffn = buffc[KSIZE]; - FTYPE *pk = k; - - for (l = 0; l < KSIZE; l++) { - FTYPE *buff = buffc[l]; - d64_2x32 dd; - - sp = sl; - dp = dl; - - p2 = buff[0]; p3 = buff[1]; p4 = buff[2]; - p5 = buff[3]; p6 = buff[4]; p7 = buff[5]; - - k0 = *pk++; k1 = *pk++; k2 = *pk++; k3 = *pk++; - k4 = *pk++; k5 = *pk++; k6 = *pk++; - - if (l < (KSIZE - 1)) { -#ifdef __SUNPRO_C -#pragma pipeloop(0) -#endif /* __SUNPRO_C */ - for (i = 0; i <= (wid - 2); i += 2) { - p0 = p2; p1 = p3; p2 = p4; p3 = p5; p4 = p6; p5 = p7; - - p6 = buff[i + 6]; p7 = buff[i + 7]; - - buffd[i ] += p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + p5*k5 + p6*k6; - buffd[i + 1] += p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + p6*k5 + p7*k6; - } - - } else { -#ifdef __SUNPRO_C -#pragma pipeloop(0) -#endif /* __SUNPRO_C */ - for (i = 0; i <= (wid - 2); i += 2) { - p0 = p2; p1 = p3; p2 = p4; p3 = p5; p4 = p6; p5 = p7; - - p6 = buff[i + 6]; p7 = buff[i + 7]; - - LOAD_BUFF(buffi); - - dd.d64 = *(FTYPE *)(buffi + i); - buffn[i ] = (FTYPE)dd.i32s.i0; - buffn[i + 1] = (FTYPE)dd.i32s.i1; - - d0 = D2I(p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + p5*k5 + p6*k6 + buffd[i ]); - d1 = D2I(p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + p6*k5 + p7*k6 + buffd[i + 1]); - - dp[0 ] = FROM_S32(d0); - dp[chan1] = FROM_S32(d1); - - buffd[i ] = 0.0; - buffd[i + 1] = 0.0; - - sp += chan2; - dp += chan2; - } - } - } - - /* last pixels */ - for (; i < wid; i++) { - FTYPE *pk = k, s = 0; - mlib_s32 d0; - - for (l = 0; l < KSIZE; l++) { - FTYPE *buff = buffc[l] + i; - - for (m = 0; m < KSIZE; m++) s += buff[m] * (*pk++); - } - - d0 = D2I(s); - dp[0] = FROM_S32(d0); - - buffn[i] = (FTYPE)sp[0]; - - sp += chan1; - dp += chan1; - } - - for (l = 0; l < (KSIZE - 1); l++) buffn[wid + l] = sp[l*chan1]; - - /* next line */ - sl += sll; - dl += dll; - - buff_ind++; - - if (buff_ind >= KSIZE + 1) buff_ind = 0; - } - } - - if (pbuff != buff) mlib_free(pbuff); - - return MLIB_SUCCESS; -} - -#endif /* IMG_TYPE == 1 */ - -/***************************************************************/ #define MAX_KER 7 #define MAX_N 15 diff -r 3e22d8fd4912 -r acea5f7d354b jdk/src/java.desktop/share/native/libmlib_image/mlib_ImageConv_D64nw.c --- a/jdk/src/java.desktop/share/native/libmlib_image/mlib_ImageConv_D64nw.c Thu May 12 11:03:07 2016 -0700 +++ b/jdk/src/java.desktop/share/native/libmlib_image/mlib_ImageConv_D64nw.c Fri May 13 11:31:05 2016 +0300 @@ -75,495 +75,6 @@ mlib_s32 i = 0, j, c /***************************************************************/ -#undef KSIZE -#define KSIZE 2 - -mlib_status CONV_FUNC(2x2)(mlib_image *dst, - const mlib_image *src, - const mlib_d64 *kern, - mlib_s32 cmask) -{ - DEF_VARS(DTYPE); - DTYPE *sp0, *sp1; - mlib_s32 chan2 = chan1 + chan1; - mlib_s32 chan3 = chan1 + chan2; - mlib_s32 chan4 = chan3 + chan1; - DTYPE k0, k1, k2, k3; - DTYPE p00, p01, p02, p03, p04, - p10, p11, p12, p13, p14; - - /* keep kernel in regs */ - k0 = (DTYPE)kern[0]; k1 = (DTYPE)kern[1]; - k2 = (DTYPE)kern[2]; k3 = (DTYPE)kern[3]; - - wid -= (KSIZE - 1); - hgt -= (KSIZE - 1); - - for (c = 0; c < chan1; c++) { - if (!(cmask & (1 << (chan1 - 1 - c)))) continue; - - dl = adr_dst + c; - sl = adr_src + c; - - for (j = 0; j < hgt; j++) { - dp = dl; - sp0 = sl; - sp1 = sp0 + sll; - - p04 = sp0[0]; - p14 = sp1[0]; - - sp0 += chan1; - sp1 += chan1; - -#ifdef __SUNPRO_C -#pragma pipeloop(0) -#endif /* __SUNPRO_C */ - for (i = 0; i <= (wid - 4); i += 4) { - p00 = p04; p10 = p14; - - p01 = sp0[0]; p11 = sp1[0]; - p02 = sp0[chan1]; p12 = sp1[chan1]; - p03 = sp0[chan2]; p13 = sp1[chan2]; - p04 = sp0[chan3]; p14 = sp1[chan3]; - - dp[0 ] = p00 * k0 + p01 * k1 + p10 * k2 + p11 * k3; - dp[chan1] = p01 * k0 + p02 * k1 + p11 * k2 + p12 * k3; - dp[chan2] = p02 * k0 + p03 * k1 + p12 * k2 + p13 * k3; - dp[chan3] = p03 * k0 + p04 * k1 + p13 * k2 + p14 * k3; - - dp += chan4; - sp0 += chan4; - sp1 += chan4; - } - - if (i < wid) { - p00 = p04; p10 = p14; - p01 = sp0[0]; p11 = sp1[0]; - dp[0] = p00 * k0 + p01 * k1 + p10 * k2 + p11 * k3; - - if ((i + 1) < wid) { - p02 = sp0[chan1]; p12 = sp1[chan1]; - dp[chan1] = p01 * k0 + p02 * k1 + p11 * k2 + p12 * k3; - - if ((i + 2) < wid) { - p03 = sp0[chan2]; p13 = sp1[chan2]; - dp[chan2] = p02 * k0 + p03 * k1 + p12 * k2 + p13 * k3; - } - } - } - - sl += sll; - dl += dll; - } - } - - return MLIB_SUCCESS; -} - -/***************************************************************/ -#undef KSIZE -#define KSIZE 3 - -mlib_status CONV_FUNC(3x3)(mlib_image *dst, - const mlib_image *src, - const mlib_d64 *kern, - mlib_s32 cmask) -{ - DEF_VARS(DTYPE); - mlib_s32 chan2 = chan1 + chan1; - DTYPE *sp0, *sp1; - DTYPE *sp2; - DTYPE k0, k1, k2, k3, k4, k5, k6, k7, k8; - DTYPE p02, p03, p12, p13, p22, p23; - - /* keep kernel in regs */ - k0 = (DTYPE)kern[0]; k1 = (DTYPE)kern[1]; k2 = (DTYPE)kern[2]; - k3 = (DTYPE)kern[3]; k4 = (DTYPE)kern[4]; k5 = (DTYPE)kern[5]; - k6 = (DTYPE)kern[6]; k7 = (DTYPE)kern[7]; k8 = (DTYPE)kern[8]; - - wid -= (KSIZE - 1); - hgt -= (KSIZE - 1); - - adr_dst += ((KSIZE - 1)/2)*(dll + chan1); - - for (c = 0; c < chan1; c++) { - if (!(cmask & (1 << (chan1 - 1 - c)))) continue; - - sl = adr_src + c; - dl = adr_dst + c; - - for (j = 0; j < hgt; j++) { - DTYPE s0, s1; - - dp = dl; - sp0 = sl; - sp1 = sp0 + sll; - sp2 = sp1 + sll; - - p02 = sp0[0]; - p12 = sp1[0]; - p22 = sp2[0]; - - p03 = sp0[chan1]; - p13 = sp1[chan1]; - p23 = sp2[chan1]; - - s0 = p02 * k0 + p03 * k1 + p12 * k3 + p13 * k4 + p22 * k6 + p23 * k7; - s1 = p03 * k0 + p13 * k3 + p23 * k6; - - sp0 += chan2; - sp1 += chan2; - sp2 += chan2; - -#ifdef __SUNPRO_C -#pragma pipeloop(0) -#endif /* __SUNPRO_C */ - for (i = 0; i <= (wid - 2); i += 2) { - p02 = sp0[0]; p12 = sp1[0]; p22 = sp2[0]; - p03 = sp0[chan1]; p13 = sp1[chan1]; p23 = sp2[chan1]; - - dp[0 ] = s0 + p02 * k2 + p12 * k5 + p22 * k8; - dp[chan1] = s1 + p02 * k1 + p03 * k2 + p12 * k4 + p13 * k5 + p22 * k7 + p23 * k8; - - s0 = p02 * k0 + p03 * k1 + p12 * k3 + p13 * k4 + p22 * k6 + p23 * k7; - s1 = p03 * k0 + p13 * k3 + p23 * k6; - - sp0 += chan2; - sp1 += chan2; - sp2 += chan2; - dp += chan2; - } - - if (wid & 1) { - p02 = sp0[0]; p12 = sp1[0]; p22 = sp2[0]; - dp[0] = s0 + p02 * k2 + p12 * k5 + p22 * k8; - } - - sl += sll; - dl += dll; - } - } - - return MLIB_SUCCESS; -} - -/***************************************************************/ -#undef KSIZE -#define KSIZE 4 - -mlib_status CONV_FUNC(4x4)(mlib_image *dst, - const mlib_image *src, - const mlib_d64 *k, - mlib_s32 cmask) -{ - DTYPE k0, k1, k2, k3, k4, k5, k6, k7; - DTYPE p00, p01, p02, p03, p04, - p10, p11, p12, p13, p14; - DEF_VARS(DTYPE); - DTYPE *sp0, *sp1; - mlib_s32 chan2 = chan1 + chan1; - mlib_s32 chan3 = chan1 + chan2; - - wid -= (KSIZE - 1); - hgt -= (KSIZE - 1); - - adr_dst += ((KSIZE - 1)/2)*(dll + chan1); - - for (c = 0; c < chan1; c++) { - if (!(cmask & (1 << (chan1 - 1 - c)))) continue; - - sl = adr_src + c; - dl = adr_dst + c; - - for (j = 0; j < hgt; j++) { - /* - * First loop on two first lines of kernel - */ - sp0 = sl; - sp1 = sp0 + sll; - dp = dl; - - k0 = (DTYPE)k[0]; k1 = (DTYPE)k[1]; k2 = (DTYPE)k[2]; k3 = (DTYPE)k[3]; - k4 = (DTYPE)k[4]; k5 = (DTYPE)k[5]; k6 = (DTYPE)k[6]; k7 = (DTYPE)k[7]; - - p02 = sp0[0]; p12 = sp1[0]; - p03 = sp0[chan1]; p13 = sp1[chan1]; - p04 = sp0[chan2]; p14 = sp1[chan2]; - - sp0 += chan3; - sp1 += chan3; - -#ifdef __SUNPRO_C -#pragma pipeloop(0) -#endif /* __SUNPRO_C */ - for (i = 0; i <= (wid - 2); i += 2) { - p00 = p02; p10 = p12; - p01 = p03; p11 = p13; - p02 = p04; p12 = p14; - - p03 = sp0[0]; p13 = sp1[0]; - p04 = sp0[chan1]; p14 = sp1[chan1]; - - dp[0 ] = (p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 + - p10 * k4 + p11 * k5 + p12 * k6 + p13 * k7); - dp[chan1] = (p01 * k0 + p02 * k1 + p03 * k2 + p04 * k3 + - p11 * k4 + p12 * k5 + p13 * k6 + p14 * k7); - - sp0 += chan2; - sp1 += chan2; - dp += chan2; - } - - if (wid & 1) { - p00 = p02; p10 = p12; - p01 = p03; p11 = p13; - p02 = p04; p12 = p14; - p03 = sp0[0]; p13 = sp1[0]; - - dp[0] = (p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 + - p10 * k4 + p11 * k5 + p12 * k6 + p13 * k7); - } - - /* - * Second loop on two last lines of kernel - */ - sp0 = sl + 2*sll; - sp1 = sp0 + sll; - dp = dl; - - k0 = (DTYPE)k[ 8]; k1 = (DTYPE)k[ 9]; k2 = (DTYPE)k[10]; k3 = (DTYPE)k[11]; - k4 = (DTYPE)k[12]; k5 = (DTYPE)k[13]; k6 = (DTYPE)k[14]; k7 = (DTYPE)k[15]; - - p02 = sp0[0]; p12 = sp1[0]; - p03 = sp0[chan1]; p13 = sp1[chan1]; - p04 = sp0[chan2]; p14 = sp1[chan2]; - - sp0 += chan3; - sp1 += chan3; - -#ifdef __SUNPRO_C -#pragma pipeloop(0) -#endif /* __SUNPRO_C */ - for (i = 0; i <= (wid - 2); i += 2) { - p00 = p02; p10 = p12; - p01 = p03; p11 = p13; - p02 = p04; p12 = p14; - - p03 = sp0[0]; p13 = sp1[0]; - p04 = sp0[chan1]; p14 = sp1[chan1]; - - dp[0 ] += (p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 + - p10 * k4 + p11 * k5 + p12 * k6 + p13 * k7); - dp[chan1] += (p01 * k0 + p02 * k1 + p03 * k2 + p04 * k3 + - p11 * k4 + p12 * k5 + p13 * k6 + p14 * k7); - - sp0 += chan2; - sp1 += chan2; - dp += chan2; - } - - if (wid & 1) { - p00 = p02; p10 = p12; - p01 = p03; p11 = p13; - p02 = p04; p12 = p14; - p03 = sp0[0]; p13 = sp1[0]; - - dp[0] += (p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 + - p10 * k4 + p11 * k5 + p12 * k6 + p13 * k7); - } - - /* next line */ - sl += sll; - dl += dll; - } - } - - return MLIB_SUCCESS; -} - -/***************************************************************/ -#undef KSIZE -#define KSIZE 5 - -mlib_status CONV_FUNC(5x5)(mlib_image *dst, - const mlib_image *src, - const mlib_d64 *k, - mlib_s32 cmask) -{ - DTYPE k0, k1, k2, k3, k4, k5, k6, k7, k8, k9; - DTYPE p00, p01, p02, p03, p04, p05, - p10, p11, p12, p13, p14, p15; - DEF_VARS(DTYPE); - DTYPE *sp0, *sp1; - mlib_s32 chan2 = chan1 + chan1; - mlib_s32 chan3 = chan1 + chan2; - mlib_s32 chan4 = chan3 + chan1; - - wid -= (KSIZE - 1); - hgt -= (KSIZE - 1); - - adr_dst += ((KSIZE - 1)/2)*(dll + chan1); - - for (c = 0; c < chan1; c++) { - if (!(cmask & (1 << (chan1 - 1 - c)))) continue; - - sl = adr_src + c; - dl = adr_dst + c; - - for (j = 0; j < hgt; j++) { - /* - * First loop - */ - sp0 = sl; - sp1 = sp0 + sll; - dp = dl; - - k0 = (DTYPE)k[0]; k1 = (DTYPE)k[1]; k2 = (DTYPE)k[2]; k3 = (DTYPE)k[3]; k4 = (DTYPE)k[4]; - k5 = (DTYPE)k[5]; k6 = (DTYPE)k[6]; k7 = (DTYPE)k[7]; k8 = (DTYPE)k[8]; k9 = (DTYPE)k[9]; - - p02 = sp0[0]; p12 = sp1[0]; - p03 = sp0[chan1]; p13 = sp1[chan1]; - p04 = sp0[chan2]; p14 = sp1[chan2]; - p05 = sp0[chan3]; p15 = sp1[chan3]; - - sp0 += chan4; - sp1 += chan4; - -#ifdef __SUNPRO_C -#pragma pipeloop(0) -#endif /* __SUNPRO_C */ - for (i = 0; i <= (wid - 2); i += 2) { - p00 = p02; p10 = p12; - p01 = p03; p11 = p13; - p02 = p04; p12 = p14; - p03 = p05; p13 = p15; - - p04 = sp0[0]; p14 = sp1[0]; - p05 = sp0[chan1]; p15 = sp1[chan1]; - - dp[ 0] = (p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 + p04 * k4 + - p10 * k5 + p11 * k6 + p12 * k7 + p13 * k8 + p14 * k9); - dp[chan1] = (p01 * k0 + p02 * k1 + p03 * k2 + p04 * k3 + p05 * k4 + - p11 * k5 + p12 * k6 + p13 * k7 + p14 * k8 + p15 * k9); - - sp0 += chan2; - sp1 += chan2; - dp += chan2; - } - - if (wid & 1) { - p00 = p02; p10 = p12; - p01 = p03; p11 = p13; - p02 = p04; p12 = p14; - p03 = p05; p13 = p15; - - p04 = sp0[0]; p14 = sp1[0]; - - dp[0] = (p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 + p04 * k4 + - p10 * k5 + p11 * k6 + p12 * k7 + p13 * k8 + p14 * k9); - } - - /* - * Second loop - */ - sp0 = sl + 2*sll; - sp1 = sp0 + sll; - dp = dl; - - k0 = (DTYPE)k[10]; k1 = (DTYPE)k[11]; k2 = (DTYPE)k[12]; k3 = (DTYPE)k[13]; k4 = (DTYPE)k[14]; - k5 = (DTYPE)k[15]; k6 = (DTYPE)k[16]; k7 = (DTYPE)k[17]; k8 = (DTYPE)k[18]; k9 = (DTYPE)k[19]; - - p02 = sp0[0]; p12 = sp1[0]; - p03 = sp0[chan1]; p13 = sp1[chan1]; - p04 = sp0[chan2]; p14 = sp1[chan2]; - p05 = sp0[chan3]; p15 = sp1[chan3]; - - sp0 += chan4; - sp1 += chan4; - -#ifdef __SUNPRO_C -#pragma pipeloop(0) -#endif /* __SUNPRO_C */ - for (i = 0; i <= (wid - 2); i += 2) { - p00 = p02; p10 = p12; - p01 = p03; p11 = p13; - p02 = p04; p12 = p14; - p03 = p05; p13 = p15; - - p04 = sp0[0]; p14 = sp1[0]; - p05 = sp0[chan1]; p15 = sp1[chan1]; - - dp[ 0] += (p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 + p04 * k4 + - p10 * k5 + p11 * k6 + p12 * k7 + p13 * k8 + p14 * k9); - dp[chan1] += (p01 * k0 + p02 * k1 + p03 * k2 + p04 * k3 + p05 * k4 + - p11 * k5 + p12 * k6 + p13 * k7 + p14 * k8 + p15 * k9); - - sp0 += chan2; - sp1 += chan2; - dp += chan2; - } - - if (wid & 1) { - p00 = p02; p10 = p12; - p01 = p03; p11 = p13; - p02 = p04; p12 = p14; - p03 = p05; p13 = p15; - - p04 = sp0[0]; p14 = sp1[0]; - - dp[0] += (p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 + p04 * k4 + - p10 * k5 + p11 * k6 + p12 * k7 + p13 * k8 + p14 * k9); - } - - /* - * 3 loop - */ - dp = dl; - sp0 = sl + 4*sll; - - k0 = (DTYPE)k[20]; k1 = (DTYPE)k[21]; k2 = (DTYPE)k[22]; k3 = (DTYPE)k[23]; k4 = (DTYPE)k[24]; - - p02 = sp0[0]; - p03 = sp0[chan1]; - p04 = sp0[chan2]; - p05 = sp0[chan3]; - - sp0 += chan2 + chan2; - -#ifdef __SUNPRO_C -#pragma pipeloop(0) -#endif /* __SUNPRO_C */ - for (i = 0; i <= (wid - 2); i += 2) { - p00 = p02; p01 = p03; p02 = p04; p03 = p05; - - p04 = sp0[0]; p05 = sp0[chan1]; - - dp[0 ] += p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 + p04 * k4; - dp[chan1] += p01 * k0 + p02 * k1 + p03 * k2 + p04 * k3 + p05 * k4; - - dp += chan2; - sp0 += chan2; - } - - if (wid & 1) { - p00 = p02; p01 = p03; p02 = p04; p03 = p05; - - p04 = sp0[0]; - - dp[0] += p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 + p04 * k4; - } - - /* next line */ - sl += sll; - dl += dll; - } - } - - return MLIB_SUCCESS; -} - -/***************************************************************/ #define BUFF_SIZE 1600 #define CACHE_SIZE (64*1024) diff -r 3e22d8fd4912 -r acea5f7d354b jdk/src/java.desktop/share/native/libmlib_image/mlib_ImageConv_F32nw.c --- a/jdk/src/java.desktop/share/native/libmlib_image/mlib_ImageConv_F32nw.c Thu May 12 11:03:07 2016 -0700 +++ b/jdk/src/java.desktop/share/native/libmlib_image/mlib_ImageConv_F32nw.c Fri May 13 11:31:05 2016 +0300 @@ -75,495 +75,6 @@ mlib_s32 i, j, c /***************************************************************/ -#undef KSIZE -#define KSIZE 2 - -mlib_status CONV_FUNC(2x2)(mlib_image *dst, - const mlib_image *src, - const mlib_d64 *kern, - mlib_s32 cmask) -{ - DEF_VARS(DTYPE); - DTYPE *sp0, *sp1; - mlib_s32 chan2 = chan1 + chan1; - mlib_s32 chan3 = chan1 + chan2; - mlib_s32 chan4 = chan3 + chan1; - DTYPE k0, k1, k2, k3; - DTYPE p00, p01, p02, p03, p04, - p10, p11, p12, p13, p14; - - /* keep kernel in regs */ - k0 = (DTYPE)kern[0]; k1 = (DTYPE)kern[1]; - k2 = (DTYPE)kern[2]; k3 = (DTYPE)kern[3]; - - wid -= (KSIZE - 1); - hgt -= (KSIZE - 1); - - for (c = 0; c < chan1; c++) { - if (!(cmask & (1 << (chan1 - 1 - c)))) continue; - - dl = adr_dst + c; - sl = adr_src + c; - - for (j = 0; j < hgt; j++) { - dp = dl; - sp0 = sl; - sp1 = sp0 + sll; - - p04 = sp0[0]; - p14 = sp1[0]; - - sp0 += chan1; - sp1 += chan1; - -#ifdef __SUNPRO_C -#pragma pipeloop(0) -#endif /* __SUNPRO_C */ - for (i = 0; i <= (wid - 4); i += 4) { - p00 = p04; p10 = p14; - - p01 = sp0[0]; p11 = sp1[0]; - p02 = sp0[chan1]; p12 = sp1[chan1]; - p03 = sp0[chan2]; p13 = sp1[chan2]; - p04 = sp0[chan3]; p14 = sp1[chan3]; - - dp[0 ] = p00 * k0 + p01 * k1 + p10 * k2 + p11 * k3; - dp[chan1] = p01 * k0 + p02 * k1 + p11 * k2 + p12 * k3; - dp[chan2] = p02 * k0 + p03 * k1 + p12 * k2 + p13 * k3; - dp[chan3] = p03 * k0 + p04 * k1 + p13 * k2 + p14 * k3; - - dp += chan4; - sp0 += chan4; - sp1 += chan4; - } - - if (i < wid) { - p00 = p04; p10 = p14; - p01 = sp0[0]; p11 = sp1[0]; - dp[0] = p00 * k0 + p01 * k1 + p10 * k2 + p11 * k3; - - if ((i + 1) < wid) { - p02 = sp0[chan1]; p12 = sp1[chan1]; - dp[chan1] = p01 * k0 + p02 * k1 + p11 * k2 + p12 * k3; - - if ((i + 2) < wid) { - p03 = sp0[chan2]; p13 = sp1[chan2]; - dp[chan2] = p02 * k0 + p03 * k1 + p12 * k2 + p13 * k3; - } - } - } - - sl += sll; - dl += dll; - } - } - - return MLIB_SUCCESS; -} - -/***************************************************************/ -#undef KSIZE -#define KSIZE 3 - -mlib_status CONV_FUNC(3x3)(mlib_image *dst, - const mlib_image *src, - const mlib_d64 *kern, - mlib_s32 cmask) -{ - DEF_VARS(DTYPE); - mlib_s32 chan2 = chan1 + chan1; - DTYPE *sp0, *sp1; - DTYPE *sp2; - DTYPE k0, k1, k2, k3, k4, k5, k6, k7, k8; - DTYPE p02, p03, p12, p13, p22, p23; - - /* keep kernel in regs */ - k0 = (DTYPE)kern[0]; k1 = (DTYPE)kern[1]; k2 = (DTYPE)kern[2]; - k3 = (DTYPE)kern[3]; k4 = (DTYPE)kern[4]; k5 = (DTYPE)kern[5]; - k6 = (DTYPE)kern[6]; k7 = (DTYPE)kern[7]; k8 = (DTYPE)kern[8]; - - wid -= (KSIZE - 1); - hgt -= (KSIZE - 1); - - adr_dst += ((KSIZE - 1)/2)*(dll + chan1); - - for (c = 0; c < chan1; c++) { - if (!(cmask & (1 << (chan1 - 1 - c)))) continue; - - sl = adr_src + c; - dl = adr_dst + c; - - for (j = 0; j < hgt; j++) { - DTYPE s0, s1; - - dp = dl; - sp0 = sl; - sp1 = sp0 + sll; - sp2 = sp1 + sll; - - p02 = sp0[0]; - p12 = sp1[0]; - p22 = sp2[0]; - - p03 = sp0[chan1]; - p13 = sp1[chan1]; - p23 = sp2[chan1]; - - s0 = p02 * k0 + p03 * k1 + p12 * k3 + p13 * k4 + p22 * k6 + p23 * k7; - s1 = p03 * k0 + p13 * k3 + p23 * k6; - - sp0 += chan2; - sp1 += chan2; - sp2 += chan2; - -#ifdef __SUNPRO_C -#pragma pipeloop(0) -#endif /* __SUNPRO_C */ - for (i = 0; i <= (wid - 2); i += 2) { - p02 = sp0[0]; p12 = sp1[0]; p22 = sp2[0]; - p03 = sp0[chan1]; p13 = sp1[chan1]; p23 = sp2[chan1]; - - dp[0 ] = s0 + p02 * k2 + p12 * k5 + p22 * k8; - dp[chan1] = s1 + p02 * k1 + p03 * k2 + p12 * k4 + p13 * k5 + p22 * k7 + p23 * k8; - - s0 = p02 * k0 + p03 * k1 + p12 * k3 + p13 * k4 + p22 * k6 + p23 * k7; - s1 = p03 * k0 + p13 * k3 + p23 * k6; - - sp0 += chan2; - sp1 += chan2; - sp2 += chan2; - dp += chan2; - } - - if (wid & 1) { - p02 = sp0[0]; p12 = sp1[0]; p22 = sp2[0]; - dp[0] = s0 + p02 * k2 + p12 * k5 + p22 * k8; - } - - sl += sll; - dl += dll; - } - } - - return MLIB_SUCCESS; -} - -/***************************************************************/ -#undef KSIZE -#define KSIZE 4 - -mlib_status CONV_FUNC(4x4)(mlib_image *dst, - const mlib_image *src, - const mlib_d64 *k, - mlib_s32 cmask) -{ - DTYPE k0, k1, k2, k3, k4, k5, k6, k7; - DTYPE p00, p01, p02, p03, p04, - p10, p11, p12, p13, p14; - DEF_VARS(DTYPE); - DTYPE *sp0, *sp1; - mlib_s32 chan2 = chan1 + chan1; - mlib_s32 chan3 = chan1 + chan2; - - wid -= (KSIZE - 1); - hgt -= (KSIZE - 1); - - adr_dst += ((KSIZE - 1)/2)*(dll + chan1); - - for (c = 0; c < chan1; c++) { - if (!(cmask & (1 << (chan1 - 1 - c)))) continue; - - sl = adr_src + c; - dl = adr_dst + c; - - for (j = 0; j < hgt; j++) { - /* - * First loop on two first lines of kernel - */ - sp0 = sl; - sp1 = sp0 + sll; - dp = dl; - - k0 = (DTYPE)k[0]; k1 = (DTYPE)k[1]; k2 = (DTYPE)k[2]; k3 = (DTYPE)k[3]; - k4 = (DTYPE)k[4]; k5 = (DTYPE)k[5]; k6 = (DTYPE)k[6]; k7 = (DTYPE)k[7]; - - p02 = sp0[0]; p12 = sp1[0]; - p03 = sp0[chan1]; p13 = sp1[chan1]; - p04 = sp0[chan2]; p14 = sp1[chan2]; - - sp0 += chan3; - sp1 += chan3; - -#ifdef __SUNPRO_C -#pragma pipeloop(0) -#endif /* __SUNPRO_C */ - for (i = 0; i <= (wid - 2); i += 2) { - p00 = p02; p10 = p12; - p01 = p03; p11 = p13; - p02 = p04; p12 = p14; - - p03 = sp0[0]; p13 = sp1[0]; - p04 = sp0[chan1]; p14 = sp1[chan1]; - - dp[0 ] = (p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 + - p10 * k4 + p11 * k5 + p12 * k6 + p13 * k7); - dp[chan1] = (p01 * k0 + p02 * k1 + p03 * k2 + p04 * k3 + - p11 * k4 + p12 * k5 + p13 * k6 + p14 * k7); - - sp0 += chan2; - sp1 += chan2; - dp += chan2; - } - - if (wid & 1) { - p00 = p02; p10 = p12; - p01 = p03; p11 = p13; - p02 = p04; p12 = p14; - p03 = sp0[0]; p13 = sp1[0]; - - dp[0] = (p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 + - p10 * k4 + p11 * k5 + p12 * k6 + p13 * k7); - } - - /* - * Second loop on two last lines of kernel - */ - sp0 = sl + 2*sll; - sp1 = sp0 + sll; - dp = dl; - - k0 = (DTYPE)k[ 8]; k1 = (DTYPE)k[ 9]; k2 = (DTYPE)k[10]; k3 = (DTYPE)k[11]; - k4 = (DTYPE)k[12]; k5 = (DTYPE)k[13]; k6 = (DTYPE)k[14]; k7 = (DTYPE)k[15]; - - p02 = sp0[0]; p12 = sp1[0]; - p03 = sp0[chan1]; p13 = sp1[chan1]; - p04 = sp0[chan2]; p14 = sp1[chan2]; - - sp0 += chan3; - sp1 += chan3; - -#ifdef __SUNPRO_C -#pragma pipeloop(0) -#endif /* __SUNPRO_C */ - for (i = 0; i <= (wid - 2); i += 2) { - p00 = p02; p10 = p12; - p01 = p03; p11 = p13; - p02 = p04; p12 = p14; - - p03 = sp0[0]; p13 = sp1[0]; - p04 = sp0[chan1]; p14 = sp1[chan1]; - - dp[0 ] += (p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 + - p10 * k4 + p11 * k5 + p12 * k6 + p13 * k7); - dp[chan1] += (p01 * k0 + p02 * k1 + p03 * k2 + p04 * k3 + - p11 * k4 + p12 * k5 + p13 * k6 + p14 * k7); - - sp0 += chan2; - sp1 += chan2; - dp += chan2; - } - - if (wid & 1) { - p00 = p02; p10 = p12; - p01 = p03; p11 = p13; - p02 = p04; p12 = p14; - p03 = sp0[0]; p13 = sp1[0]; - - dp[0] += (p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 + - p10 * k4 + p11 * k5 + p12 * k6 + p13 * k7); - } - - /* next line */ - sl += sll; - dl += dll; - } - } - - return MLIB_SUCCESS; -} - -/***************************************************************/ -#undef KSIZE -#define KSIZE 5 - -mlib_status CONV_FUNC(5x5)(mlib_image *dst, - const mlib_image *src, - const mlib_d64 *k, - mlib_s32 cmask) -{ - DTYPE k0, k1, k2, k3, k4, k5, k6, k7, k8, k9; - DTYPE p00, p01, p02, p03, p04, p05, - p10, p11, p12, p13, p14, p15; - DEF_VARS(DTYPE); - DTYPE *sp0, *sp1; - mlib_s32 chan2 = chan1 + chan1; - mlib_s32 chan3 = chan1 + chan2; - mlib_s32 chan4 = chan3 + chan1; - - wid -= (KSIZE - 1); - hgt -= (KSIZE - 1); - - adr_dst += ((KSIZE - 1)/2)*(dll + chan1); - - for (c = 0; c < chan1; c++) { - if (!(cmask & (1 << (chan1 - 1 - c)))) continue; - - sl = adr_src + c; - dl = adr_dst + c; - - for (j = 0; j < hgt; j++) { - /* - * First loop - */ - sp0 = sl; - sp1 = sp0 + sll; - dp = dl; - - k0 = (DTYPE)k[0]; k1 = (DTYPE)k[1]; k2 = (DTYPE)k[2]; k3 = (DTYPE)k[3]; k4 = (DTYPE)k[4]; - k5 = (DTYPE)k[5]; k6 = (DTYPE)k[6]; k7 = (DTYPE)k[7]; k8 = (DTYPE)k[8]; k9 = (DTYPE)k[9]; - - p02 = sp0[0]; p12 = sp1[0]; - p03 = sp0[chan1]; p13 = sp1[chan1]; - p04 = sp0[chan2]; p14 = sp1[chan2]; - p05 = sp0[chan3]; p15 = sp1[chan3]; - - sp0 += chan4; - sp1 += chan4; - -#ifdef __SUNPRO_C -#pragma pipeloop(0) -#endif /* __SUNPRO_C */ - for (i = 0; i <= (wid - 2); i += 2) { - p00 = p02; p10 = p12; - p01 = p03; p11 = p13; - p02 = p04; p12 = p14; - p03 = p05; p13 = p15; - - p04 = sp0[0]; p14 = sp1[0]; - p05 = sp0[chan1]; p15 = sp1[chan1]; - - dp[ 0] = (p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 + p04 * k4 + - p10 * k5 + p11 * k6 + p12 * k7 + p13 * k8 + p14 * k9); - dp[chan1] = (p01 * k0 + p02 * k1 + p03 * k2 + p04 * k3 + p05 * k4 + - p11 * k5 + p12 * k6 + p13 * k7 + p14 * k8 + p15 * k9); - - sp0 += chan2; - sp1 += chan2; - dp += chan2; - } - - if (wid & 1) { - p00 = p02; p10 = p12; - p01 = p03; p11 = p13; - p02 = p04; p12 = p14; - p03 = p05; p13 = p15; - - p04 = sp0[0]; p14 = sp1[0]; - - dp[0] = (p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 + p04 * k4 + - p10 * k5 + p11 * k6 + p12 * k7 + p13 * k8 + p14 * k9); - } - - /* - * Second loop - */ - sp0 = sl + 2*sll; - sp1 = sp0 + sll; - dp = dl; - - k0 = (DTYPE)k[10]; k1 = (DTYPE)k[11]; k2 = (DTYPE)k[12]; k3 = (DTYPE)k[13]; k4 = (DTYPE)k[14]; - k5 = (DTYPE)k[15]; k6 = (DTYPE)k[16]; k7 = (DTYPE)k[17]; k8 = (DTYPE)k[18]; k9 = (DTYPE)k[19]; - - p02 = sp0[0]; p12 = sp1[0]; - p03 = sp0[chan1]; p13 = sp1[chan1]; - p04 = sp0[chan2]; p14 = sp1[chan2]; - p05 = sp0[chan3]; p15 = sp1[chan3]; - - sp0 += chan4; - sp1 += chan4; - -#ifdef __SUNPRO_C -#pragma pipeloop(0) -#endif /* __SUNPRO_C */ - for (i = 0; i <= (wid - 2); i += 2) { - p00 = p02; p10 = p12; - p01 = p03; p11 = p13; - p02 = p04; p12 = p14; - p03 = p05; p13 = p15; - - p04 = sp0[0]; p14 = sp1[0]; - p05 = sp0[chan1]; p15 = sp1[chan1]; - - dp[ 0] += (p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 + p04 * k4 + - p10 * k5 + p11 * k6 + p12 * k7 + p13 * k8 + p14 * k9); - dp[chan1] += (p01 * k0 + p02 * k1 + p03 * k2 + p04 * k3 + p05 * k4 + - p11 * k5 + p12 * k6 + p13 * k7 + p14 * k8 + p15 * k9); - - sp0 += chan2; - sp1 += chan2; - dp += chan2; - } - - if (wid & 1) { - p00 = p02; p10 = p12; - p01 = p03; p11 = p13; - p02 = p04; p12 = p14; - p03 = p05; p13 = p15; - - p04 = sp0[0]; p14 = sp1[0]; - - dp[0] += (p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 + p04 * k4 + - p10 * k5 + p11 * k6 + p12 * k7 + p13 * k8 + p14 * k9); - } - - /* - * 3 loop - */ - dp = dl; - sp0 = sl + 4*sll; - - k0 = (DTYPE)k[20]; k1 = (DTYPE)k[21]; k2 = (DTYPE)k[22]; k3 = (DTYPE)k[23]; k4 = (DTYPE)k[24]; - - p02 = sp0[0]; - p03 = sp0[chan1]; - p04 = sp0[chan2]; - p05 = sp0[chan3]; - - sp0 += chan2 + chan2; - -#ifdef __SUNPRO_C -#pragma pipeloop(0) -#endif /* __SUNPRO_C */ - for (i = 0; i <= (wid - 2); i += 2) { - p00 = p02; p01 = p03; p02 = p04; p03 = p05; - - p04 = sp0[0]; p05 = sp0[chan1]; - - dp[0 ] += p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 + p04 * k4; - dp[chan1] += p01 * k0 + p02 * k1 + p03 * k2 + p04 * k3 + p05 * k4; - - dp += chan2; - sp0 += chan2; - } - - if (wid & 1) { - p00 = p02; p01 = p03; p02 = p04; p03 = p05; - - p04 = sp0[0]; - - dp[0] += p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 + p04 * k4; - } - - /* next line */ - sl += sll; - dl += dll; - } - } - - return MLIB_SUCCESS; -} - -/***************************************************************/ #define BUFF_SIZE 1600 #define CACHE_SIZE (64*1024) diff -r 3e22d8fd4912 -r acea5f7d354b jdk/src/java.desktop/share/native/libmlib_image/mlib_ImageConv_u16ext.c --- a/jdk/src/java.desktop/share/native/libmlib_image/mlib_ImageConv_u16ext.c Thu May 12 11:03:07 2016 -0700 +++ b/jdk/src/java.desktop/share/native/libmlib_image/mlib_ImageConv_u16ext.c Fri May 13 11:31:05 2016 +0300 @@ -80,9 +80,6 @@ #endif /* IMG_TYPE == 1 */ /***************************************************************/ -#define KSIZE1 (KSIZE - 1) - -/***************************************************************/ #define PARAM \ mlib_image *dst, \ const mlib_image *src, \ @@ -163,9 +160,6 @@ #endif /* _NO_LONGLONG */ /***************************************************************/ -#define MLIB_D2_24 16777216.0f - -/***************************************************************/ typedef union { mlib_d64 d64; struct { @@ -175,9 +169,6 @@ } d64_2x32; /***************************************************************/ -#define BUFF_LINE 256 - -/***************************************************************/ #define DEF_VARS(type) \ type *adr_src, *sl, *sp, *sl1; \ type *adr_dst, *dl, *dp; \ @@ -188,39 +179,6 @@ mlib_s32 i, j, c, swid /***************************************************************/ -#define LOAD_KERNEL3() \ - FTYPE scalef = DSCALE; \ - FTYPE k0, k1, k2, k3, k4, k5, k6, k7, k8; \ - FTYPE p00, p01, p02, p03, \ - p10, p11, p12, p13, \ - p20, p21, p22, p23; \ - \ - while (scalef_expon > 30) { \ - scalef /= (1 << 30); \ - scalef_expon -= 30; \ - } \ - \ - scalef /= (1 << scalef_expon); \ - \ - /* keep kernel in regs */ \ - k0 = scalef * kern[0]; k1 = scalef * kern[1]; k2 = scalef * kern[2]; \ - k3 = scalef * kern[3]; k4 = scalef * kern[4]; k5 = scalef * kern[5]; \ - k6 = scalef * kern[6]; k7 = scalef * kern[7]; k8 = scalef * kern[8] - -/***************************************************************/ -#define LOAD_KERNEL(SIZE) \ - FTYPE scalef = DSCALE; \ - \ - while (scalef_expon > 30) { \ - scalef /= (1 << 30); \ - scalef_expon -= 30; \ - } \ - \ - scalef /= (1 << scalef_expon); \ - \ - for (j = 0; j < SIZE; j++) k[j] = scalef * kern[j] - -/***************************************************************/ #define GET_SRC_DST_PARAMETERS(type) \ hgt = mlib_ImageGetHeight(src); \ wid = mlib_ImageGetWidth(src); \ @@ -278,1334 +236,6 @@ #endif /* __sparc */ /***************************************************************/ -#define KSIZE 3 - -mlib_status CONV_FUNC(3x3) -{ - FTYPE buff[(KSIZE + 2)*BUFF_LINE], *buff0, *buff1, *buff2, *buff3, *buffT; - DEF_VARS(DTYPE); - DTYPE *sl2; -#ifndef __sparc - mlib_s32 d0, d1; -#endif /* __sparc */ - LOAD_KERNEL3(); - GET_SRC_DST_PARAMETERS(DTYPE); - - swid = wid + KSIZE1; - - if (swid > BUFF_LINE) { - pbuff = mlib_malloc((KSIZE + 2)*sizeof(FTYPE )*swid); - - if (pbuff == NULL) return MLIB_FAILURE; - } - - buff0 = pbuff; - buff1 = buff0 + swid; - buff2 = buff1 + swid; - buff3 = buff2 + swid; - buffo = (mlib_s32*)(buff3 + swid); - buffi = buffo + (swid &~ 1); - - swid -= (dx_l + dx_r); - - chan1 = nchannel; - chan2 = chan1 + chan1; - - for (c = 0; c < nchannel; c++) { - if (!(cmask & (1 << (nchannel - 1 - c)))) continue; - - sl = adr_src + c; - dl = adr_dst + c; - - if ((1 > dy_t) && (1 < hgt + KSIZE1 - dy_b)) sl1 = sl + sll; - else sl1 = sl; - - if ((hgt - dy_b) > 0) sl2 = sl1 + sll; - else sl2 = sl1; - - for (i = 0; i < dx_l; i++) { - buff0[i] = (FTYPE)sl[0]; - buff1[i] = (FTYPE)sl1[0]; - buff2[i] = (FTYPE)sl2[0]; - } - -#ifdef __SUNPRO_C -#pragma pipeloop(0) -#endif /* __SUNPRO_C */ - for (i = 0; i < swid; i++) { - buff0[i + dx_l] = (FTYPE)sl[i*chan1]; - buff1[i + dx_l] = (FTYPE)sl1[i*chan1]; - buff2[i + dx_l] = (FTYPE)sl2[i*chan1]; - } - - for (i = 0; i < dx_r; i++) { - buff0[swid + dx_l + i] = buff0[swid + dx_l - 1]; - buff1[swid + dx_l + i] = buff1[swid + dx_l - 1]; - buff2[swid + dx_l + i] = buff2[swid + dx_l - 1]; - } - - if ((hgt - dy_b) > 1) sl = sl2 + sll; - else sl = sl2; - - for (j = 0; j < hgt; j++) { - FTYPE s0, s1; - - p02 = buff0[0]; - p12 = buff1[0]; - p22 = buff2[0]; - - p03 = buff0[1]; - p13 = buff1[1]; - p23 = buff2[1]; - - s0 = p02 * k0 + p03 * k1 + p12 * k3 + p13 * k4 + p22 * k6 + p23 * k7; - s1 = p03 * k0 + p13 * k3 + p23 * k6; - - sp = sl; - dp = dl; - -#ifdef __SUNPRO_C -#pragma pipeloop(0) -#endif /* __SUNPRO_C */ - for (i = 0; i <= (wid - 2); i += 2) { -#ifdef __sparc -#ifdef _NO_LONGLONG - mlib_s32 o64_1, o64_2; -#else /* _NO_LONGLONG */ - mlib_s64 o64; -#endif /* _NO_LONGLONG */ -#endif /* __sparc */ - d64_2x32 dd; - - p02 = buff0[i + 2]; p12 = buff1[i + 2]; p22 = buff2[i + 2]; - p03 = buff0[i + 3]; p13 = buff1[i + 3]; p23 = buff2[i + 3]; - - LOAD_BUFF(buffi); - - dd.d64 = *(FTYPE *)(buffi + i); - buff3[i + dx_l ] = (FTYPE)dd.i32s.i0; - buff3[i + dx_l + 1] = (FTYPE)dd.i32s.i1; - -#ifndef __sparc - - d0 = D2I(s0 + p02 * k2 + p12 * k5 + p22 * k8); - d1 = D2I(s1 + p02 * k1 + p03 * k2 + p12 * k4 + p13 * k5 + p22 * k7 + p23 * k8); - - s0 = p02 * k0 + p03 * k1 + p12 * k3 + p13 * k4 + p22 * k6 + p23 * k7; - s1 = p03 * k0 + p13 * k3 + p23 * k6; - - dp[0 ] = FROM_S32(d0); - dp[chan1] = FROM_S32(d1); - -#else /* __sparc */ - - dd.i32s.i0 = D2I(s0 + p02 * k2 + p12 * k5 + p22 * k8); - dd.i32s.i1 = D2I(s1 + p02 * k1 + p03 * k2 + p12 * k4 + p13 * k5 + p22 * k7 + p23 * k8); - *(FTYPE *)(buffo + i) = dd.d64; - - s0 = p02 * k0 + p03 * k1 + p12 * k3 + p13 * k4 + p22 * k6 + p23 * k7; - s1 = p03 * k0 + p13 * k3 + p23 * k6; - -#ifdef _NO_LONGLONG - - o64_1 = buffo[i]; - o64_2 = buffo[i+1]; -#if IMG_TYPE != 1 - STORE2(FROM_S32(o64_1), FROM_S32(o64_2)); -#else - STORE2(o64_1 >> 24, o64_2 >> 24); -#endif /* IMG_TYPE != 1 */ - -#else /* _NO_LONGLONG */ - - o64 = *(mlib_s64*)(buffo + i); -#if IMG_TYPE != 1 - STORE2(FROM_S32(o64 >> 32), FROM_S32(o64)); -#else - STORE2(o64 >> 56, o64 >> 24); -#endif /* IMG_TYPE != 1 */ -#endif /* _NO_LONGLONG */ -#endif /* __sparc */ - - sp += chan2; - dp += chan2; - } - - for (; i < wid; i++) { - p00 = buff0[i]; p10 = buff1[i]; p20 = buff2[i]; - p01 = buff0[i + 1]; p11 = buff1[i + 1]; p21 = buff2[i + 1]; - p02 = buff0[i + 2]; p12 = buff1[i + 2]; p22 = buff2[i + 2]; - - buffi[i] = (mlib_s32)sp[0]; - buff3[i + dx_l] = (FTYPE)buffi[i]; - -#ifndef __sparc - - d0 = D2I(p00 * k0 + p01 * k1 + p02 * k2 + p10 * k3 + p11 * k4 + - p12 * k5 + p20 * k6 + p21 * k7 + p22 * k8); - - dp[0] = FROM_S32(d0); - -#else /* __sparc */ - - buffo[i] = D2I(p00 * k0 + p01 * k1 + p02 * k2 + p10 * k3 + p11 * k4 + - p12 * k5 + p20 * k6 + p21 * k7 + p22 * k8); -#if IMG_TYPE != 1 - dp[0] = FROM_S32(buffo[i]); -#else - dp[0] = buffo[i] >> 24; -#endif /* IMG_TYPE != 1 */ -#endif /* __sparc */ - - sp += chan1; - dp += chan1; - } - - for (; i < swid; i++) { - buffi[i] = (mlib_s32)sp[0]; - buff3[i + dx_l] = (FTYPE)buffi[i]; - sp += chan1; - } - - for (i = 0; i < dx_l; i++) buff3[i] = buff3[dx_l]; - for (i = 0; i < dx_r; i++) buff3[swid + dx_l + i] = buff3[swid + dx_l - 1]; - - if (j < hgt - dy_b - 2) sl += sll; - dl += dll; - - buffT = buff0; - buff0 = buff1; - buff1 = buff2; - buff2 = buff3; - buff3 = buffT; - } - } - -#ifdef __sparc -#if IMG_TYPE == 1 - { - mlib_s32 amask = (1 << nchannel) - 1; - - if ((cmask & amask) != amask) { - mlib_ImageXor80(adr_dst, wid, hgt, dll, nchannel, cmask); - } else { - mlib_ImageXor80_aa(adr_dst, wid*nchannel, hgt, dll); - } - } - -#endif /* IMG_TYPE == 1 */ -#endif /* __sparc */ - - if (pbuff != buff) mlib_free(pbuff); - - return MLIB_SUCCESS; -} - -/***************************************************************/ -#ifndef __sparc /* for x86, using integer multiplies is faster */ - -mlib_status CONV_FUNC_I(3x3) -{ - DTYPE *adr_src, *sl, *sp0, *sp1, *sp2, *sp_1, *sp_2; - DTYPE *adr_dst, *dl, *dp; - mlib_s32 wid, hgt, sll, dll; - mlib_s32 nchannel, chan1, chan2, delta_chan; - mlib_s32 i, j, c; - mlib_s32 shift1, shift2; - mlib_s32 k0, k1, k2, k3, k4, k5, k6, k7, k8; - mlib_s32 p02, p03, - p12, p13, - p22, p23; - -#if IMG_TYPE != 1 - shift1 = 16; -#else - shift1 = 8; -#endif /* IMG_TYPE != 1 */ - - shift2 = scalef_expon - shift1; - - /* keep kernel in regs */ - k0 = kern[0] >> shift1; k1 = kern[1] >> shift1; k2 = kern[2] >> shift1; - k3 = kern[3] >> shift1; k4 = kern[4] >> shift1; k5 = kern[5] >> shift1; - k6 = kern[6] >> shift1; k7 = kern[7] >> shift1; k8 = kern[8] >> shift1; - - GET_SRC_DST_PARAMETERS(DTYPE); - - chan1 = nchannel; - chan2 = chan1 + chan1; - delta_chan = 0; - - if ((1 > dx_l) && (1 < wid + KSIZE1 - dx_r)) delta_chan = chan1; - - for (c = 0; c < chan1; c++) { - if (!(cmask & (1 << (chan1 - 1 - c)))) continue; - - sl = adr_src + c; - dl = adr_dst + c; - - sp_1 = sl; - - if ((1 > dy_t) && (1 < hgt + KSIZE1 - dy_b)) sl += sll; - sp_2 = sl; - - if ((hgt - dy_b) > 0) sl += sll; - - for (j = 0; j < hgt; j++) { - mlib_s32 s0, s1; - mlib_s32 pix0, pix1; - - dp = dl; - sp0 = sp_1; - sp_1 = sp_2; - sp_2 = sl; - - sp1 = sp_1; - sp2 = sp_2; - - p02 = sp0[0]; - p12 = sp1[0]; - p22 = sp2[0]; - - p03 = sp0[delta_chan]; - p13 = sp1[delta_chan]; - p23 = sp2[delta_chan]; - - s0 = p02 * k0 + p03 * k1 + p12 * k3 + p13 * k4 + p22 * k6 + p23 * k7; - s1 = p03 * k0 + p13 * k3 + p23 * k6; - - sp0 += (chan1 + delta_chan); - sp1 += (chan1 + delta_chan); - sp2 += (chan1 + delta_chan); - -#ifdef __SUNPRO_C -#pragma pipeloop(0) -#endif /* __SUNPRO_C */ - for (i = 0; i <= (wid - dx_r - 2); i += 2) { - p02 = sp0[0]; p12 = sp1[0]; p22 = sp2[0]; - p03 = sp0[chan1]; p13 = sp1[chan1]; p23 = sp2[chan1]; - - pix0 = (s0 + p02 * k2 + p12 * k5 + p22 * k8) >> shift2; - pix1 = (s1 + p02 * k1 + p03 * k2 + p12 * k4 + - p13 * k5 + p22 * k7 + p23 * k8) >> shift2; - - CLAMP_STORE(dp[0], pix0); - CLAMP_STORE(dp[chan1], pix1); - - s0 = p02 * k0 + p03 * k1 + p12 * k3 + p13 * k4 + p22 * k6 + p23 * k7; - s1 = p03 * k0 + p13 * k3 + p23 * k6; - - sp0 += chan2; - sp1 += chan2; - sp2 += chan2; - dp += chan2; - } - - p02 = p03; p12 = p13; p22 = p23; - - for (; i < wid - dx_r; i++) { - p03 = sp0[0]; p13 = sp1[0]; p23 = sp2[0]; - pix0 = (s0 + p03 * k2 + p13 * k5 + p23 * k8) >> shift2; - CLAMP_STORE(dp[0], pix0); - s0 = p02 * k0 + p03 * k1 + p12 * k3 + p13 * k4 + p22 * k6 + p23 * k7; - p02 = p03; p12 = p13; p22 = p23; - sp0 += chan1; - sp1 += chan1; - sp2 += chan1; - dp += chan1; - } - - sp0 -= chan1; - sp1 -= chan1; - sp2 -= chan1; - - for (; i < wid; i++) { - p03 = sp0[0]; p13 = sp1[0]; p23 = sp2[0]; - pix0 = (s0 + p03 * k2 + p13 * k5 + p23 * k8) >> shift2; - CLAMP_STORE(dp[0], pix0); - s0 = p02 * k0 + p03 * k1 + p12 * k3 + p13 * k4 + p22 * k6 + p23 * k7; - p02 = p03; p12 = p13; p22 = p23; - dp += chan1; - } - - if (j < hgt - dy_b - 1) sl += sll; - dl += dll; - } - } - - return MLIB_SUCCESS; -} - -#endif /* __sparc ( for x86, using integer multiplies is faster ) */ - -/***************************************************************/ -#undef KSIZE -#define KSIZE 4 - -mlib_status CONV_FUNC(4x4) -{ - FTYPE buff[(KSIZE + 3)*BUFF_LINE]; - FTYPE *buff0, *buff1, *buff2, *buff3, *buff4, *buffd, *buffT; - FTYPE k[KSIZE*KSIZE]; - mlib_s32 d0, d1; - FTYPE k0, k1, k2, k3, k4, k5, k6, k7; - FTYPE p00, p01, p02, p03, p04, - p10, p11, p12, p13, p14, - p20, p21, p22, p23, - p30, p31, p32, p33; - DEF_VARS(DTYPE); - DTYPE *sl2, *sl3; - LOAD_KERNEL(KSIZE*KSIZE); - GET_SRC_DST_PARAMETERS(DTYPE); - - swid = wid + KSIZE1; - - if (swid > BUFF_LINE) { - pbuff = mlib_malloc((KSIZE + 3)*sizeof(FTYPE )*swid); - - if (pbuff == NULL) return MLIB_FAILURE; - } - - buff0 = pbuff; - buff1 = buff0 + swid; - buff2 = buff1 + swid; - buff3 = buff2 + swid; - buff4 = buff3 + swid; - buffd = buff4 + swid; - buffo = (mlib_s32*)(buffd + swid); - buffi = buffo + (swid &~ 1); - - swid -= (dx_l + dx_r); - - chan1 = nchannel; - chan2 = chan1 + chan1; - - for (c = 0; c < nchannel; c++) { - if (!(cmask & (1 << (nchannel - 1 - c)))) continue; - - sl = adr_src + c; - dl = adr_dst + c; - - if ((1 > dy_t) && (1 < hgt + KSIZE1 - dy_b)) sl1 = sl + sll; - else sl1 = sl; - - if ((2 > dy_t) && (2 < hgt + KSIZE1 - dy_b)) sl2 = sl1 + sll; - else sl2 = sl1; - - if ((hgt - dy_b) > 0) sl3 = sl2 + sll; - else sl3 = sl2; - - for (i = 0; i < dx_l; i++) { - buff0[i] = (FTYPE)sl[0]; - buff1[i] = (FTYPE)sl1[0]; - buff2[i] = (FTYPE)sl2[0]; - buff3[i] = (FTYPE)sl3[0]; - } - -#ifdef __SUNPRO_C -#pragma pipeloop(0) -#endif /* __SUNPRO_C */ - for (i = 0; i < swid; i++) { - buff0[i + dx_l] = (FTYPE)sl[i*chan1]; - buff1[i + dx_l] = (FTYPE)sl1[i*chan1]; - buff2[i + dx_l] = (FTYPE)sl2[i*chan1]; - buff3[i + dx_l] = (FTYPE)sl3[i*chan1]; - } - - for (i = 0; i < dx_r; i++) { - buff0[swid + dx_l + i] = buff0[swid + dx_l - 1]; - buff1[swid + dx_l + i] = buff1[swid + dx_l - 1]; - buff2[swid + dx_l + i] = buff2[swid + dx_l - 1]; - buff3[swid + dx_l + i] = buff3[swid + dx_l - 1]; - } - - if ((hgt - dy_b) > 1) sl = sl3 + sll; - else sl = sl3; - - for (j = 0; j < hgt; j++) { - d64_2x32 dd; - - /* - * First loop on two first lines of kernel - */ - k0 = k[0]; k1 = k[1]; k2 = k[2]; k3 = k[3]; - k4 = k[4]; k5 = k[5]; k6 = k[6]; k7 = k[7]; - - sp = sl; - dp = dl; - - p02 = buff0[0]; - p12 = buff1[0]; - p03 = buff0[1]; - p13 = buff1[1]; - p04 = buff0[2]; - -#ifdef __SUNPRO_C -#pragma pipeloop(0) -#endif /* __SUNPRO_C */ - for (i = 0; i <= (wid - 2); i += 2) { - p00 = p02; p10 = p12; - p01 = p03; p11 = p13; - p02 = p04; p12 = buff1[i + 2]; - p03 = buff0[i + 3]; p13 = buff1[i + 3]; - p04 = buff0[i + 4]; p14 = buff1[i + 4]; - - LOAD_BUFF(buffi); - - dd.d64 = *(FTYPE *)(buffi + i); - buff4[i + dx_l ] = (FTYPE)dd.i32s.i0; - buff4[i + dx_l + 1] = (FTYPE)dd.i32s.i1; - - buffd[i ] = (p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 + - p10 * k4 + p11 * k5 + p12 * k6 + p13 * k7); - buffd[i + 1] = (p01 * k0 + p02 * k1 + p03 * k2 + p04 * k3 + - p11 * k4 + p12 * k5 + p13 * k6 + p14 * k7); - - sp += chan2; - } - - /* - * Second loop on two last lines of kernel - */ - k0 = k[ 8]; k1 = k[ 9]; k2 = k[10]; k3 = k[11]; - k4 = k[12]; k5 = k[13]; k6 = k[14]; k7 = k[15]; - - p02 = buff2[0]; - p12 = buff3[0]; - p03 = buff2[1]; - p13 = buff3[1]; - p04 = buff2[2]; - -#ifdef __SUNPRO_C -#pragma pipeloop(0) -#endif /* __SUNPRO_C */ - for (i = 0; i <= (wid - 2); i += 2) { - p00 = p02; p10 = p12; - p01 = p03; p11 = p13; - p02 = p04; p12 = buff3[i + 2]; - p03 = buff2[i + 3]; p13 = buff3[i + 3]; - p04 = buff2[i + 4]; p14 = buff3[i + 4]; - - d0 = D2I(p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 + - p10 * k4 + p11 * k5 + p12 * k6 + p13 * k7 + buffd[i]); - d1 = D2I(p01 * k0 + p02 * k1 + p03 * k2 + p04 * k3 + - p11 * k4 + p12 * k5 + p13 * k6 + p14 * k7 + buffd[i + 1]); - - dp[0 ] = FROM_S32(d0); - dp[chan1] = FROM_S32(d1); - - dp += chan2; - } - - /* last pixels */ - for (; i < wid; i++) { - p00 = buff0[i]; p10 = buff1[i]; p20 = buff2[i]; p30 = buff3[i]; - p01 = buff0[i + 1]; p11 = buff1[i + 1]; p21 = buff2[i + 1]; p31 = buff3[i + 1]; - p02 = buff0[i + 2]; p12 = buff1[i + 2]; p22 = buff2[i + 2]; p32 = buff3[i + 2]; - p03 = buff0[i + 3]; p13 = buff1[i + 3]; p23 = buff2[i + 3]; p33 = buff3[i + 3]; - - buff4[i + dx_l] = (FTYPE)sp[0]; - - buffo[i] = D2I(p00 * k[0] + p01 * k[1] + p02 * k[2] + p03 * k[3] + - p10 * k[4] + p11 * k[5] + p12 * k[6] + p13 * k[7] + - p20 * k[ 8] + p21 * k[ 9] + p22 * k[10] + p23 * k[11] + - p30 * k[12] + p31 * k[13] + p32 * k[14] + p33 * k[15]); - - dp[0] = FROM_S32(buffo[i]); - - sp += chan1; - dp += chan1; - } - - for (; i < swid; i++) { - buff4[i + dx_l] = (FTYPE)sp[0]; - sp += chan1; - } - - for (i = 0; i < dx_l; i++) buff4[i] = buff4[dx_l]; - for (i = 0; i < dx_r; i++) buff4[swid + dx_l + i] = buff4[swid + dx_l - 1]; - - /* next line */ - - if (j < hgt - dy_b - 2) sl += sll; - dl += dll; - - buffT = buff0; - buff0 = buff1; - buff1 = buff2; - buff2 = buff3; - buff3 = buff4; - buff4 = buffT; - } - } - - if (pbuff != buff) mlib_free(pbuff); - - return MLIB_SUCCESS; -} - -/***************************************************************/ -#undef KSIZE -#define KSIZE 5 - -mlib_status CONV_FUNC(5x5) -{ - FTYPE buff[(KSIZE + 3)*BUFF_LINE]; - FTYPE *buff0, *buff1, *buff2, *buff3, *buff4, *buff5, *buffd, *buffT; - FTYPE k[KSIZE*KSIZE]; - mlib_s32 d0, d1; - FTYPE k0, k1, k2, k3, k4, k5, k6, k7, k8, k9; - FTYPE p00, p01, p02, p03, p04, p05, - p10, p11, p12, p13, p14, p15, - p20, p21, p22, p23, p24, - p30, p31, p32, p33, p34, - p40, p41, p42, p43, p44; - DEF_VARS(DTYPE); - DTYPE *sl2, *sl3, *sl4; - LOAD_KERNEL(KSIZE*KSIZE); - GET_SRC_DST_PARAMETERS(DTYPE); - - swid = wid + KSIZE1; - - if (swid > BUFF_LINE) { - pbuff = mlib_malloc((KSIZE + 3)*sizeof(FTYPE )*swid); - - if (pbuff == NULL) return MLIB_FAILURE; - } - - buff0 = pbuff; - buff1 = buff0 + swid; - buff2 = buff1 + swid; - buff3 = buff2 + swid; - buff4 = buff3 + swid; - buff5 = buff4 + swid; - buffd = buff5 + swid; - buffo = (mlib_s32*)(buffd + swid); - buffi = buffo + (swid &~ 1); - - swid -= (dx_l + dx_r); - - chan1 = nchannel; - chan2 = chan1 + chan1; - - for (c = 0; c < nchannel; c++) { - if (!(cmask & (1 << (nchannel - 1 - c)))) continue; - - sl = adr_src + c; - dl = adr_dst + c; - - if ((1 > dy_t) && (1 < hgt + KSIZE1 - dy_b)) sl1 = sl + sll; - else sl1 = sl; - - if ((2 > dy_t) && (2 < hgt + KSIZE1 - dy_b)) sl2 = sl1 + sll; - else sl2 = sl1; - - if ((3 > dy_t) && (3 < hgt + KSIZE1 - dy_b)) sl3 = sl2 + sll; - else sl3 = sl2; - - if ((hgt - dy_b) > 0) sl4 = sl3 + sll; - else sl4 = sl3; - - for (i = 0; i < dx_l; i++) { - buff0[i] = (FTYPE)sl[0]; - buff1[i] = (FTYPE)sl1[0]; - buff2[i] = (FTYPE)sl2[0]; - buff3[i] = (FTYPE)sl3[0]; - buff4[i] = (FTYPE)sl4[0]; - } - -#ifdef __SUNPRO_C -#pragma pipeloop(0) -#endif /* __SUNPRO_C */ - for (i = 0; i < swid; i++) { - buff0[i + dx_l] = (FTYPE)sl[i*chan1]; - buff1[i + dx_l] = (FTYPE)sl1[i*chan1]; - buff2[i + dx_l] = (FTYPE)sl2[i*chan1]; - buff3[i + dx_l] = (FTYPE)sl3[i*chan1]; - buff4[i + dx_l] = (FTYPE)sl4[i*chan1]; - } - - for (i = 0; i < dx_r; i++) { - buff0[swid + dx_l + i] = buff0[swid + dx_l - 1]; - buff1[swid + dx_l + i] = buff1[swid + dx_l - 1]; - buff2[swid + dx_l + i] = buff2[swid + dx_l - 1]; - buff3[swid + dx_l + i] = buff3[swid + dx_l - 1]; - buff4[swid + dx_l + i] = buff4[swid + dx_l - 1]; - } - - if ((hgt - dy_b) > 1) sl = sl4 + sll; - else sl = sl4; - - for (j = 0; j < hgt; j++) { - d64_2x32 dd; - - /* - * First loop - */ - k0 = k[0]; k1 = k[1]; k2 = k[2]; k3 = k[3]; k4 = k[4]; - k5 = k[5]; k6 = k[6]; k7 = k[7]; k8 = k[8]; k9 = k[9]; - - sp = sl; - dp = dl; - - p02 = buff0[0]; - p12 = buff1[0]; - p03 = buff0[1]; - p13 = buff1[1]; - p04 = buff0[2]; - p14 = buff1[2]; - -#ifdef __SUNPRO_C -#pragma pipeloop(0) -#endif /* __SUNPRO_C */ - for (i = 0; i <= (wid - 2); i += 2) { - p00 = p02; p10 = p12; - p01 = p03; p11 = p13; - p02 = p04; p12 = p14; - - LOAD_BUFF(buffi); - - p03 = buff0[i + 3]; p13 = buff1[i + 3]; - p04 = buff0[i + 4]; p14 = buff1[i + 4]; - p05 = buff0[i + 5]; p15 = buff1[i + 5]; - - buffd[i ] = (p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 + p04 * k4 + - p10 * k5 + p11 * k6 + p12 * k7 + p13 * k8 + p14 * k9); - buffd[i + 1] = (p01 * k0 + p02 * k1 + p03 * k2 + p04 * k3 + p05 * k4 + - p11 * k5 + p12 * k6 + p13 * k7 + p14 * k8 + p15 * k9); - - sp += chan2; - } - - /* - * Second loop - */ - k0 = k[10]; k1 = k[11]; k2 = k[12]; k3 = k[13]; k4 = k[14]; - k5 = k[15]; k6 = k[16]; k7 = k[17]; k8 = k[18]; k9 = k[19]; - - p02 = buff2[0]; - p12 = buff3[0]; - p03 = buff2[1]; - p13 = buff3[1]; - -#ifdef __SUNPRO_C -#pragma pipeloop(0) -#endif /* __SUNPRO_C */ - for (i = 0; i <= (wid - 2); i += 2) { - p00 = p02; p10 = p12; - p01 = p03; p11 = p13; - - p02 = buff2[i + 2]; p12 = buff3[i + 2]; - p03 = buff2[i + 3]; p13 = buff3[i + 3]; - p04 = buff2[i + 4]; p14 = buff3[i + 4]; - p05 = buff2[i + 5]; p15 = buff3[i + 5]; - - dd.d64 = *(FTYPE *)(buffi + i); - buff5[i + dx_l ] = (FTYPE)dd.i32s.i0; - buff5[i + dx_l + 1] = (FTYPE)dd.i32s.i1; - - buffd[i ] += (p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 + p04 * k4 + - p10 * k5 + p11 * k6 + p12 * k7 + p13 * k8 + p14 * k9); - buffd[i + 1] += (p01 * k0 + p02 * k1 + p03 * k2 + p04 * k3 + p05 * k4 + - p11 * k5 + p12 * k6 + p13 * k7 + p14 * k8 + p15 * k9); - } - - /* - * 3 loop - */ - k0 = k[20]; k1 = k[21]; k2 = k[22]; k3 = k[23]; k4 = k[24]; - - p02 = buff4[0]; - p03 = buff4[1]; - p04 = buff4[2]; - p05 = buff4[3]; - -#ifdef __SUNPRO_C -#pragma pipeloop(0) -#endif /* __SUNPRO_C */ - for (i = 0; i <= (wid - 2); i += 2) { - p00 = p02; p01 = p03; p02 = p04; p03 = p05; - - p04 = buff4[i + 4]; p05 = buff4[i + 5]; - - d0 = D2I(p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 + p04 * k4 + buffd[i]); - d1 = D2I(p01 * k0 + p02 * k1 + p03 * k2 + p04 * k3 + p05 * k4 + buffd[i + 1]); - - dp[0 ] = FROM_S32(d0); - dp[chan1] = FROM_S32(d1); - - dp += chan2; - } - - /* last pixels */ - for (; i < wid; i++) { - p00 = buff0[i]; p10 = buff1[i]; p20 = buff2[i]; p30 = buff3[i]; - p01 = buff0[i + 1]; p11 = buff1[i + 1]; p21 = buff2[i + 1]; p31 = buff3[i + 1]; - p02 = buff0[i + 2]; p12 = buff1[i + 2]; p22 = buff2[i + 2]; p32 = buff3[i + 2]; - p03 = buff0[i + 3]; p13 = buff1[i + 3]; p23 = buff2[i + 3]; p33 = buff3[i + 3]; - p04 = buff0[i + 4]; p14 = buff1[i + 4]; p24 = buff2[i + 4]; p34 = buff3[i + 4]; - - p40 = buff4[i]; p41 = buff4[i + 1]; p42 = buff4[i + 2]; - p43 = buff4[i + 3]; p44 = buff4[i + 4]; - - buff5[i + dx_l] = (FTYPE)sp[0]; - - buffo[i] = D2I(p00 * k[0] + p01 * k[1] + p02 * k[2] + p03 * k[3] + p04 * k[4] + - p10 * k[5] + p11 * k[6] + p12 * k[7] + p13 * k[8] + p14 * k[9] + - p20 * k[10] + p21 * k[11] + p22 * k[12] + p23 * k[13] + p24 * k[14] + - p30 * k[15] + p31 * k[16] + p32 * k[17] + p33 * k[18] + p34 * k[19] + - p40 * k[20] + p41 * k[21] + p42 * k[22] + p43 * k[23] + p44 * k[24]); - - dp[0] = FROM_S32(buffo[i]); - - sp += chan1; - dp += chan1; - } - - for (; i < swid; i++) { - buff5[i + dx_l] = (FTYPE)sp[0]; - sp += chan1; - } - - for (i = 0; i < dx_l; i++) buff5[i] = buff5[dx_l]; - for (i = 0; i < dx_r; i++) buff5[swid + dx_l + i] = buff5[swid + dx_l - 1]; - - /* next line */ - - if (j < hgt - dy_b - 2) sl += sll; - dl += dll; - - buffT = buff0; - buff0 = buff1; - buff1 = buff2; - buff2 = buff3; - buff3 = buff4; - buff4 = buff5; - buff5 = buffT; - } - } - - if (pbuff != buff) mlib_free(pbuff); - - return MLIB_SUCCESS; -} - -/***************************************************************/ -#ifndef __sparc /* for x86, using integer multiplies is faster */ - -mlib_status CONV_FUNC_I(5x5) -{ - mlib_s32 buff[BUFF_LINE]; - mlib_s32 *buffd; - mlib_s32 k[KSIZE*KSIZE]; - mlib_s32 shift1, shift2; - mlib_s32 k0, k1, k2, k3, k4, k5, k6, k7, k8, k9; - mlib_s32 p00, p01, p02, p03, p04, p05, - p10, p11, p12, p13, p14, p15; - DTYPE *adr_src, *sl, *sp0, *sp1, *sp2, *sp3, *sp4; - DTYPE *sp_1, *sp_2, *sp_3, *sp_4; - DTYPE *adr_dst, *dl, *dp; - mlib_s32 *pbuff = buff; - mlib_s32 wid, hgt, sll, dll; - mlib_s32 nchannel, chan1, chan2, chan4; - mlib_s32 delta_chan1, delta_chan2, delta_chan3; - mlib_s32 i, j, c; - -#if IMG_TYPE != 1 - shift1 = 16; -#else - shift1 = 8; -#endif /* IMG_TYPE != 1 */ - - shift2 = scalef_expon - shift1; - - for (j = 0; j < KSIZE*KSIZE; j++) k[j] = kern[j] >> shift1; - - GET_SRC_DST_PARAMETERS(DTYPE); - - if (wid > BUFF_LINE) { - pbuff = mlib_malloc(sizeof(mlib_s32)*wid); - - if (pbuff == NULL) return MLIB_FAILURE; - } - - buffd = pbuff; - - chan1 = nchannel; - chan2 = chan1 + chan1; - - if ((1 > dx_l) && (1 < wid + KSIZE1 - dx_r)) delta_chan1 = chan1; - else delta_chan1 = 0; - - if ((2 > dx_l) && (2 < wid + KSIZE1 - dx_r)) delta_chan2 = delta_chan1 + chan1; - else delta_chan2 = delta_chan1; - - if ((3 > dx_l) && (3 < wid + KSIZE1 - dx_r)) delta_chan3 = delta_chan2 + chan1; - else delta_chan3 = delta_chan2; - - chan4 = chan1 + delta_chan3; - - for (c = 0; c < chan1; c++) { - if (!(cmask & (1 << (chan1 - 1 - c)))) continue; - - sl = adr_src + c; - dl = adr_dst + c; - - sp_1 = sl; - - if ((1 > dy_t) && (1 < hgt + KSIZE1 - dy_b)) sl += sll; - sp_2 = sl; - - if ((2 > dy_t) && (2 < hgt + KSIZE1 - dy_b)) sl += sll; - sp_3 = sl; - - if ((3 > dy_t) && (3 < hgt + KSIZE1 - dy_b)) sl += sll; - sp_4 = sl; - - if ((hgt - dy_b) > 0) sl += sll; - - for (j = 0; j < hgt; j++) { - mlib_s32 pix0, pix1; - - dp = dl; - sp0 = sp_1; - sp_1 = sp_2; - sp_2 = sp_3; - sp_3 = sp_4; - sp_4 = sl; - - sp1 = sp_1; - sp2 = sp_2; - sp3 = sp_3; - sp4 = sp_4; - - /* - * First loop - */ - - k0 = k[0]; k1 = k[1]; k2 = k[2]; k3 = k[3]; k4 = k[4]; - k5 = k[5]; k6 = k[6]; k7 = k[7]; k8 = k[8]; k9 = k[9]; - - p02 = sp0[0]; p12 = sp1[0]; - p03 = sp0[delta_chan1]; p13 = sp1[delta_chan1]; - p04 = sp0[delta_chan2]; p14 = sp1[delta_chan2]; - p05 = sp0[delta_chan3]; p15 = sp1[delta_chan3]; - - sp0 += chan4; - sp1 += chan4; - -#ifdef __SUNPRO_C -#pragma pipeloop(0) -#endif /* __SUNPRO_C */ - for (i = 0; i <= (wid - dx_r - 2); i += 2) { - p00 = p02; p10 = p12; - p01 = p03; p11 = p13; - p02 = p04; p12 = p14; - p03 = p05; p13 = p15; - - p04 = sp0[0]; p14 = sp1[0]; - p05 = sp0[chan1]; p15 = sp1[chan1]; - - buffd[i ] = (p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 + p04 * k4 + - p10 * k5 + p11 * k6 + p12 * k7 + p13 * k8 + p14 * k9); - buffd[i + 1] = (p01 * k0 + p02 * k1 + p03 * k2 + p04 * k3 + p05 * k4 + - p11 * k5 + p12 * k6 + p13 * k7 + p14 * k8 + p15 * k9); - - sp0 += chan2; - sp1 += chan2; - } - - p01 = p02; p02 = p03; p03 = p04; p04 = p05; - p11 = p12; p12 = p13; p13 = p14; p14 = p15; - - for (; i < wid - dx_r; i++) { - p00 = p01; p10 = p11; - p01 = p02; p11 = p12; - p02 = p03; p12 = p13; - p03 = p04; p13 = p14; - - p04 = sp0[0]; p14 = sp1[0]; - - buffd[i] = (p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 + p04 * k4 + - p10 * k5 + p11 * k6 + p12 * k7 + p13 * k8 + p14 * k9); - - sp0 += chan1; - sp1 += chan1; - } - - sp0 -= chan1; - sp1 -= chan1; - - for (; i < wid; i++) { - p00 = p01; p10 = p11; - p01 = p02; p11 = p12; - p02 = p03; p12 = p13; - p03 = p04; p13 = p14; - - p04 = sp0[0]; p14 = sp1[0]; - - buffd[i] = (p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 + p04 * k4 + - p10 * k5 + p11 * k6 + p12 * k7 + p13 * k8 + p14 * k9); - } - - /* - * Second loop - */ - - k0 = k[10]; k1 = k[11]; k2 = k[12]; k3 = k[13]; k4 = k[14]; - k5 = k[15]; k6 = k[16]; k7 = k[17]; k8 = k[18]; k9 = k[19]; - - p02 = sp2[0]; p12 = sp3[0]; - p03 = sp2[delta_chan1]; p13 = sp3[delta_chan1]; - p04 = sp2[delta_chan2]; p14 = sp3[delta_chan2]; - p05 = sp2[delta_chan3]; p15 = sp3[delta_chan3]; - - sp2 += chan4; - sp3 += chan4; - -#ifdef __SUNPRO_C -#pragma pipeloop(0) -#endif /* __SUNPRO_C */ - for (i = 0; i <= (wid - dx_r - 2); i += 2) { - p00 = p02; p10 = p12; - p01 = p03; p11 = p13; - p02 = p04; p12 = p14; - p03 = p05; p13 = p15; - - p04 = sp2[0]; p14 = sp3[0]; - p05 = sp2[chan1]; p15 = sp3[chan1]; - - buffd[i ] += (p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 + p04 * k4 + - p10 * k5 + p11 * k6 + p12 * k7 + p13 * k8 + p14 * k9); - buffd[i + 1] += (p01 * k0 + p02 * k1 + p03 * k2 + p04 * k3 + p05 * k4 + - p11 * k5 + p12 * k6 + p13 * k7 + p14 * k8 + p15 * k9); - - sp2 += chan2; - sp3 += chan2; - } - - p01 = p02; p02 = p03; p03 = p04; p04 = p05; - p11 = p12; p12 = p13; p13 = p14; p14 = p15; - - for (; i < wid - dx_r; i++) { - p00 = p01; p10 = p11; - p01 = p02; p11 = p12; - p02 = p03; p12 = p13; - p03 = p04; p13 = p14; - - p04 = sp2[0]; p14 = sp3[0]; - - buffd[i] += (p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 + p04 * k4 + - p10 * k5 + p11 * k6 + p12 * k7 + p13 * k8 + p14 * k9); - - sp2 += chan1; - sp3 += chan1; - } - - sp2 -= chan1; - sp3 -= chan1; - - for (; i < wid; i++) { - p00 = p01; p10 = p11; - p01 = p02; p11 = p12; - p02 = p03; p12 = p13; - p03 = p04; p13 = p14; - - p04 = sp2[0]; p14 = sp3[0]; - - buffd[i] += (p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 + p04 * k4 + - p10 * k5 + p11 * k6 + p12 * k7 + p13 * k8 + p14 * k9); - } - - /* - * 3 loop - */ - - k0 = k[20]; k1 = k[21]; k2 = k[22]; k3 = k[23]; k4 = k[24]; - - p02 = sp4[0]; - p03 = sp4[delta_chan1]; - p04 = sp4[delta_chan2]; - p05 = sp4[delta_chan3]; - - sp4 += chan4; - -#ifdef __SUNPRO_C -#pragma pipeloop(0) -#endif /* __SUNPRO_C */ - for (i = 0; i <= (wid - dx_r - 2); i += 2) { - p00 = p02; p01 = p03; p02 = p04; p03 = p05; - - p04 = sp4[0]; p05 = sp4[chan1]; - - pix0 = (buffd[i ] + p00 * k0 + p01 * k1 + p02 * k2 + - p03 * k3 + p04 * k4) >> shift2; - pix1 = (buffd[i + 1] + p01 * k0 + p02 * k1 + p03 * k2 + - p04 * k3 + p05 * k4) >> shift2; - - CLAMP_STORE(dp[0], pix0); - CLAMP_STORE(dp[chan1], pix1); - - dp += chan2; - sp4 += chan2; - } - - p01 = p02; p02 = p03; p03 = p04; p04 = p05; - - for (; i < wid - dx_r; i++) { - p00 = p01; p01 = p02; p02 = p03; p03 = p04; - - p04 = sp4[0]; - - pix0 = (buffd[i ] + p00 * k0 + p01 * k1 + p02 * k2 + - p03 * k3 + p04 * k4) >> shift2; - CLAMP_STORE(dp[0], pix0); - - dp += chan1; - sp4 += chan1; - } - - sp4 -= chan1; - - for (; i < wid; i++) { - p00 = p01; p01 = p02; p02 = p03; p03 = p04; - - p04 = sp4[0]; - - pix0 = (buffd[i ] + p00 * k0 + p01 * k1 + p02 * k2 + - p03 * k3 + p04 * k4) >> shift2; - CLAMP_STORE(dp[0], pix0); - - dp += chan1; - } - - /* next line */ - - if (j < hgt - dy_b - 1) sl += sll; - dl += dll; - } - } - - if (pbuff != buff) mlib_free(pbuff); - - return MLIB_SUCCESS; -} - -#endif /* __sparc ( for x86, using integer multiplies is faster ) */ - -/***************************************************************/ -#if IMG_TYPE == 1 - -#undef KSIZE -#define KSIZE 7 - -mlib_status CONV_FUNC(7x7) -{ - FTYPE buff[(KSIZE + 3)*BUFF_LINE], *buffs[2*(KSIZE + 1)], *buffd; - FTYPE k[KSIZE*KSIZE]; - mlib_s32 l, m, buff_ind; - mlib_s32 d0, d1; - FTYPE k0, k1, k2, k3, k4, k5, k6; - FTYPE p0, p1, p2, p3, p4, p5, p6, p7; - DTYPE *sl2, *sl3, *sl4, *sl5, *sl6; - DEF_VARS(DTYPE); - LOAD_KERNEL(KSIZE*KSIZE); - GET_SRC_DST_PARAMETERS(DTYPE); - - swid = wid + KSIZE1; - - if (wid > BUFF_LINE) { - pbuff = mlib_malloc((KSIZE + 3)*sizeof(FTYPE )*wid); - - if (pbuff == NULL) return MLIB_FAILURE; - } - - for (l = 0; l < KSIZE + 1; l++) buffs[l] = pbuff + l*swid; - for (l = 0; l < KSIZE + 1; l++) buffs[l + (KSIZE + 1)] = buffs[l]; - buffd = buffs[KSIZE] + swid; - buffo = (mlib_s32*)(buffd + swid); - buffi = buffo + (swid &~ 1); - - swid -= (dx_l + dx_r); - - chan1 = nchannel; - chan2 = chan1 + chan1; - - for (c = 0; c < nchannel; c++) { - if (!(cmask & (1 << (nchannel - 1 - c)))) continue; - - sl = adr_src + c; - dl = adr_dst + c; - - if ((1 > dy_t) && (1 < hgt + KSIZE1 - dy_b)) sl1 = sl + sll; - else sl1 = sl; - - if ((2 > dy_t) && (2 < hgt + KSIZE1 - dy_b)) sl2 = sl1 + sll; - else sl2 = sl1; - - if ((3 > dy_t) && (3 < hgt + KSIZE1 - dy_b)) sl3 = sl2 + sll; - else sl3 = sl2; - - if ((4 > dy_t) && (4 < hgt + KSIZE1 - dy_b)) sl4 = sl3 + sll; - else sl4 = sl3; - - if ((5 > dy_t) && (5 < hgt + KSIZE1 - dy_b)) sl5 = sl4 + sll; - else sl5 = sl4; - - if ((hgt - dy_b) > 0) sl6 = sl5 + sll; - else sl6 = sl5; - - for (i = 0; i < dx_l; i++) { - buffs[0][i] = (FTYPE)sl[0]; - buffs[1][i] = (FTYPE)sl1[0]; - buffs[2][i] = (FTYPE)sl2[0]; - buffs[3][i] = (FTYPE)sl3[0]; - buffs[4][i] = (FTYPE)sl4[0]; - buffs[5][i] = (FTYPE)sl5[0]; - buffs[6][i] = (FTYPE)sl6[0]; - } - -#ifdef __SUNPRO_C -#pragma pipeloop(0) -#endif /* __SUNPRO_C */ - for (i = 0; i < swid; i++) { - buffs[0][i + dx_l] = (FTYPE)sl[i*chan1]; - buffs[1][i + dx_l] = (FTYPE)sl1[i*chan1]; - buffs[2][i + dx_l] = (FTYPE)sl2[i*chan1]; - buffs[3][i + dx_l] = (FTYPE)sl3[i*chan1]; - buffs[4][i + dx_l] = (FTYPE)sl4[i*chan1]; - buffs[5][i + dx_l] = (FTYPE)sl5[i*chan1]; - buffs[6][i + dx_l] = (FTYPE)sl6[i*chan1]; - } - - for (i = 0; i < dx_r; i++) { - buffs[0][swid + dx_l + i] = buffs[0][swid + dx_l - 1]; - buffs[1][swid + dx_l + i] = buffs[1][swid + dx_l - 1]; - buffs[2][swid + dx_l + i] = buffs[2][swid + dx_l - 1]; - buffs[3][swid + dx_l + i] = buffs[3][swid + dx_l - 1]; - buffs[4][swid + dx_l + i] = buffs[4][swid + dx_l - 1]; - buffs[5][swid + dx_l + i] = buffs[5][swid + dx_l - 1]; - buffs[6][swid + dx_l + i] = buffs[6][swid + dx_l - 1]; - } - - buff_ind = 0; - -#ifdef __SUNPRO_C -#pragma pipeloop(0) -#endif /* __SUNPRO_C */ - for (i = 0; i < wid; i++) buffd[i] = 0.0; - - if ((hgt - dy_b) > 1) sl = sl6 + sll; - else sl = sl6; - - for (j = 0; j < hgt; j++) { - FTYPE **buffc = buffs + buff_ind; - FTYPE *buffn = buffc[KSIZE]; - FTYPE *pk = k; - - for (l = 0; l < KSIZE; l++) { - FTYPE *buff = buffc[l]; - d64_2x32 dd; - - sp = sl; - dp = dl; - - p2 = buff[0]; p3 = buff[1]; p4 = buff[2]; - p5 = buff[3]; p6 = buff[4]; p7 = buff[5]; - - k0 = *pk++; k1 = *pk++; k2 = *pk++; k3 = *pk++; - k4 = *pk++; k5 = *pk++; k6 = *pk++; - - if (l < (KSIZE - 1)) { -#ifdef __SUNPRO_C -#pragma pipeloop(0) -#endif /* __SUNPRO_C */ - for (i = 0; i <= (wid - 2); i += 2) { - p0 = p2; p1 = p3; p2 = p4; p3 = p5; p4 = p6; p5 = p7; - - p6 = buff[i + 6]; p7 = buff[i + 7]; - - buffd[i ] += p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + p5*k5 + p6*k6; - buffd[i + 1] += p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + p6*k5 + p7*k6; - } - - } else { -#ifdef __SUNPRO_C -#pragma pipeloop(0) -#endif /* __SUNPRO_C */ - for (i = 0; i <= (wid - 2); i += 2) { - p0 = p2; p1 = p3; p2 = p4; p3 = p5; p4 = p6; p5 = p7; - - p6 = buff[i + 6]; p7 = buff[i + 7]; - - LOAD_BUFF(buffi); - - dd.d64 = *(FTYPE *)(buffi + i); - buffn[i + dx_l ] = (FTYPE)dd.i32s.i0; - buffn[i + dx_l + 1] = (FTYPE)dd.i32s.i1; - - d0 = D2I(p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + p5*k5 + p6*k6 + buffd[i ]); - d1 = D2I(p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + p6*k5 + p7*k6 + buffd[i + 1]); - - dp[0 ] = FROM_S32(d0); - dp[chan1] = FROM_S32(d1); - - buffd[i ] = 0.0; - buffd[i + 1] = 0.0; - - sp += chan2; - dp += chan2; - } - } - } - - /* last pixels */ - for (; i < wid; i++) { - FTYPE *pk = k, s = 0; - mlib_s32 d0; - - for (l = 0; l < KSIZE; l++) { - FTYPE *buff = buffc[l] + i; - - for (m = 0; m < KSIZE; m++) s += buff[m] * (*pk++); - } - - d0 = D2I(s); - dp[0] = FROM_S32(d0); - - buffn[i + dx_l] = (FTYPE)sp[0]; - - sp += chan1; - dp += chan1; - } - - for (; i < swid; i++) { - buffn[i + dx_l] = (FTYPE)sp[0]; - sp += chan1; - } - - for (i = 0; i < dx_l; i++) buffn[i] = buffn[dx_l]; - for (i = 0; i < dx_r; i++) buffn[swid + dx_l + i] = buffn[swid + dx_l - 1]; - - /* next line */ - - if (j < hgt - dy_b - 2) sl += sll; - dl += dll; - - buff_ind++; - - if (buff_ind >= KSIZE + 1) buff_ind = 0; - } - } - - if (pbuff != buff) mlib_free(pbuff); - - return MLIB_SUCCESS; -} - -#endif /* IMG_TYPE == 1 */ - -/***************************************************************/ #define MAX_KER 7 #define MAX_N 15 #define BUFF_SIZE 1600 diff -r 3e22d8fd4912 -r acea5f7d354b jdk/src/java.desktop/share/native/libmlib_image/mlib_ImageConv_u16nw.c --- a/jdk/src/java.desktop/share/native/libmlib_image/mlib_ImageConv_u16nw.c Thu May 12 11:03:07 2016 -0700 +++ b/jdk/src/java.desktop/share/native/libmlib_image/mlib_ImageConv_u16nw.c Fri May 13 11:31:05 2016 +0300 @@ -144,9 +144,6 @@ } d64_2x32; /***************************************************************/ -#define BUFF_LINE 256 - -/***************************************************************/ #define DEF_VARS(type) \ type *adr_src, *sl, *sp = NULL; \ type *adr_dst, *dl, *dp = NULL; \ @@ -156,39 +153,6 @@ mlib_s32 i, j, c /***************************************************************/ -#define LOAD_KERNEL3() \ - FTYPE scalef = DSCALE; \ - FTYPE k0, k1, k2, k3, k4, k5, k6, k7, k8; \ - FTYPE p00, p01, p02, p03, \ - p10, p11, p12, p13, \ - p20, p21, p22, p23; \ - \ - while (scalef_expon > 30) { \ - scalef /= (1 << 30); \ - scalef_expon -= 30; \ - } \ - \ - scalef /= (1 << scalef_expon); \ - \ - /* keep kernel in regs */ \ - k0 = scalef * kern[0]; k1 = scalef * kern[1]; k2 = scalef * kern[2]; \ - k3 = scalef * kern[3]; k4 = scalef * kern[4]; k5 = scalef * kern[5]; \ - k6 = scalef * kern[6]; k7 = scalef * kern[7]; k8 = scalef * kern[8] - -/***************************************************************/ -#define LOAD_KERNEL(SIZE) \ - FTYPE scalef = DSCALE; \ - \ - while (scalef_expon > 30) { \ - scalef /= (1 << 30); \ - scalef_expon -= 30; \ - } \ - \ - scalef /= (1 << scalef_expon); \ - \ - for (j = 0; j < SIZE; j++) k[j] = scalef * kern[j] - -/***************************************************************/ #define GET_SRC_DST_PARAMETERS(type) \ hgt = mlib_ImageGetHeight(src); \ wid = mlib_ImageGetWidth(src); \ @@ -246,1162 +210,6 @@ #endif /* __sparc */ /***************************************************************/ -#define KSIZE 3 - -mlib_status CONV_FUNC(3x3)(mlib_image *dst, - const mlib_image *src, - const mlib_s32 *kern, - mlib_s32 scalef_expon, - mlib_s32 cmask) -{ - FTYPE buff[(KSIZE + 2)*BUFF_LINE], *buff0, *buff1, *buff2, *buff3, *buffT; - DEF_VARS(DTYPE); - DTYPE *sl1; - mlib_s32 chan2; - mlib_s32 *buffo, *buffi; - DTYPE *sl2; -#ifndef __sparc - mlib_s32 d0, d1; -#endif /* __sparc */ - LOAD_KERNEL3(); - GET_SRC_DST_PARAMETERS(DTYPE); - - if (wid > BUFF_LINE) { - pbuff = mlib_malloc((KSIZE + 2)*sizeof(FTYPE)*wid); - - if (pbuff == NULL) return MLIB_FAILURE; - } - - buff0 = pbuff; - buff1 = buff0 + wid; - buff2 = buff1 + wid; - buff3 = buff2 + wid; - buffo = (mlib_s32*)(buff3 + wid); - buffi = buffo + (wid &~ 1); - - chan1 = nchannel; - chan2 = chan1 + chan1; - - wid -= (KSIZE - 1); - hgt -= (KSIZE - 1); - - adr_dst += ((KSIZE - 1)/2)*(dll + chan1); - - for (c = 0; c < nchannel; c++) { - if (!(cmask & (1 << (nchannel - 1 - c)))) continue; - - sl = adr_src + c; - dl = adr_dst + c; - - sl1 = sl + sll; - sl2 = sl1 + sll; -#ifdef __SUNPRO_C -#pragma pipeloop(0) -#endif /* __SUNPRO_C */ - for (i = 0; i < wid + (KSIZE - 1); i++) { - buff0[i] = (FTYPE)sl[i*chan1]; - buff1[i] = (FTYPE)sl1[i*chan1]; - buff2[i] = (FTYPE)sl2[i*chan1]; - } - - sl += KSIZE*sll; - - for (j = 0; j < hgt; j++) { - FTYPE s0, s1; - - p02 = buff0[0]; - p12 = buff1[0]; - p22 = buff2[0]; - - p03 = buff0[1]; - p13 = buff1[1]; - p23 = buff2[1]; - - s0 = p02 * k0 + p03 * k1 + p12 * k3 + p13 * k4 + p22 * k6 + p23 * k7; - s1 = p03 * k0 + p13 * k3 + p23 * k6; - - sp = sl; - dp = dl; - -#ifdef __SUNPRO_C -#pragma pipeloop(0) -#endif /* __SUNPRO_C */ - for (i = 0; i <= (wid - 2); i += 2) { -#ifdef __sparc -#ifdef _NO_LONGLONG - mlib_s32 o64_1, o64_2; -#else /* _NO_LONGLONG */ - mlib_s64 o64; -#endif /* _NO_LONGLONG */ -#endif /* __sparc */ - d64_2x32 dd; - - p02 = buff0[i + 2]; p12 = buff1[i + 2]; p22 = buff2[i + 2]; - p03 = buff0[i + 3]; p13 = buff1[i + 3]; p23 = buff2[i + 3]; - - LOAD_BUFF(buffi); - - dd.d64 = *(FTYPE *)(buffi + i); - buff3[i ] = (FTYPE)dd.i32s.i0; - buff3[i + 1] = (FTYPE)dd.i32s.i1; - -#ifndef __sparc - d0 = D2I(s0 + p02 * k2 + p12 * k5 + p22 * k8); - d1 = D2I(s1 + p02 * k1 + p03 * k2 + p12 * k4 + p13 * k5 + p22 * k7 + p23 * k8); - - s0 = p02 * k0 + p03 * k1 + p12 * k3 + p13 * k4 + p22 * k6 + p23 * k7; - s1 = p03 * k0 + p13 * k3 + p23 * k6; - - dp[0 ] = FROM_S32(d0); - dp[chan1] = FROM_S32(d1); - -#else /* __sparc */ - - dd.i32s.i0 = D2I(s0 + p02 * k2 + p12 * k5 + p22 * k8); - dd.i32s.i1 = D2I(s1 + p02 * k1 + p03 * k2 + p12 * k4 + p13 * k5 + p22 * k7 + p23 * k8); - *(FTYPE *)(buffo + i) = dd.d64; - - s0 = p02 * k0 + p03 * k1 + p12 * k3 + p13 * k4 + p22 * k6 + p23 * k7; - s1 = p03 * k0 + p13 * k3 + p23 * k6; - -#ifdef _NO_LONGLONG - - o64_1 = buffo[i]; - o64_2 = buffo[i+1]; -#if IMG_TYPE != 1 - STORE2(FROM_S32(o64_1), FROM_S32(o64_2)); -#else - STORE2(o64_1 >> 24, o64_2 >> 24); -#endif /* IMG_TYPE != 1 */ - -#else /* _NO_LONGLONG */ - - o64 = *(mlib_s64*)(buffo + i); -#if IMG_TYPE != 1 - STORE2(FROM_S32(o64 >> 32), FROM_S32(o64)); -#else - STORE2(o64 >> 56, o64 >> 24); -#endif /* IMG_TYPE != 1 */ -#endif /* _NO_LONGLONG */ -#endif /* __sparc */ - - sp += chan2; - dp += chan2; - } - - for (; i < wid; i++) { - p00 = buff0[i]; p10 = buff1[i]; p20 = buff2[i]; - p01 = buff0[i + 1]; p11 = buff1[i + 1]; p21 = buff2[i + 1]; - p02 = buff0[i + 2]; p12 = buff1[i + 2]; p22 = buff2[i + 2]; - - buffi[i] = (mlib_s32)sp[0]; - buff3[i] = (FTYPE)buffi[i]; - -#ifndef __sparc - - d0 = D2I(p00 * k0 + p01 * k1 + p02 * k2 + p10 * k3 + p11 * k4 + - p12 * k5 + p20 * k6 + p21 * k7 + p22 * k8); - - dp[0] = FROM_S32(d0); - -#else /* __sparc */ - - buffo[i] = D2I(p00 * k0 + p01 * k1 + p02 * k2 + p10 * k3 + p11 * k4 + - p12 * k5 + p20 * k6 + p21 * k7 + p22 * k8); -#if IMG_TYPE != 1 - dp[0] = FROM_S32(buffo[i]); -#else - dp[0] = buffo[i] >> 24; -#endif /* IMG_TYPE != 1 */ -#endif /* __sparc */ - - sp += chan1; - dp += chan1; - } - - buffi[wid] = (mlib_s32)sp[0]; - buff3[wid] = (FTYPE)buffi[wid]; - buffi[wid + 1] = (mlib_s32)sp[chan1]; - buff3[wid + 1] = (FTYPE)buffi[wid + 1]; - - sl += sll; - dl += dll; - - buffT = buff0; - buff0 = buff1; - buff1 = buff2; - buff2 = buff3; - buff3 = buffT; - } - } - -#ifdef __sparc -#if IMG_TYPE == 1 - { - mlib_s32 amask = (1 << nchannel) - 1; - - if ((cmask & amask) != amask) { - mlib_ImageXor80(adr_dst, wid, hgt, dll, nchannel, cmask); - } else { - mlib_ImageXor80_aa(adr_dst, wid*nchannel, hgt, dll); - } - } - -#endif /* IMG_TYPE == 1 */ -#endif /* __sparc */ - - if (pbuff != buff) mlib_free(pbuff); - - return MLIB_SUCCESS; -} - -/***************************************************************/ -#ifndef __sparc /* for x86, using integer multiplies is faster */ - -mlib_status CONV_FUNC_I(3x3)(mlib_image *dst, - const mlib_image *src, - const mlib_s32 *kern, - mlib_s32 scalef_expon, - mlib_s32 cmask) -{ - DTYPE *adr_src, *sl, *sp0, *sp1, *sp2; - DTYPE *adr_dst, *dl, *dp; - mlib_s32 wid, hgt, sll, dll; - mlib_s32 nchannel, chan1, chan2; - mlib_s32 i, j, c; - mlib_s32 shift1, shift2; - mlib_s32 k0, k1, k2, k3, k4, k5, k6, k7, k8; - mlib_s32 p02, p03, - p12, p13, - p22, p23; - -#if IMG_TYPE != 1 - shift1 = 16; -#else - shift1 = 8; -#endif /* IMG_TYPE != 1 */ - - shift2 = scalef_expon - shift1; - - /* keep kernel in regs */ - k0 = kern[0] >> shift1; k1 = kern[1] >> shift1; k2 = kern[2] >> shift1; - k3 = kern[3] >> shift1; k4 = kern[4] >> shift1; k5 = kern[5] >> shift1; - k6 = kern[6] >> shift1; k7 = kern[7] >> shift1; k8 = kern[8] >> shift1; - - GET_SRC_DST_PARAMETERS(DTYPE); - - chan1 = nchannel; - chan2 = chan1 + chan1; - - wid -= (KSIZE - 1); - hgt -= (KSIZE - 1); - - adr_dst += ((KSIZE - 1)/2)*(dll + chan1); - - for (c = 0; c < chan1; c++) { - if (!(cmask & (1 << (chan1 - 1 - c)))) continue; - - sl = adr_src + c; - dl = adr_dst + c; - - for (j = 0; j < hgt; j++) { - mlib_s32 s0, s1; - mlib_s32 pix0, pix1; - - dp = dl; - sp0 = sl; - sp1 = sp0 + sll; - sp2 = sp1 + sll; - - p02 = sp0[0]; - p12 = sp1[0]; - p22 = sp2[0]; - - p03 = sp0[chan1]; - p13 = sp1[chan1]; - p23 = sp2[chan1]; - - s0 = p02 * k0 + p03 * k1 + p12 * k3 + p13 * k4 + p22 * k6 + p23 * k7; - s1 = p03 * k0 + p13 * k3 + p23 * k6; - - sp0 += chan2; - sp1 += chan2; - sp2 += chan2; - -#ifdef __SUNPRO_C -#pragma pipeloop(0) -#endif /* __SUNPRO_C */ - for (i = 0; i <= (wid - 2); i += 2) { - p02 = sp0[0]; p12 = sp1[0]; p22 = sp2[0]; - p03 = sp0[chan1]; p13 = sp1[chan1]; p23 = sp2[chan1]; - - pix0 = (s0 + p02 * k2 + p12 * k5 + p22 * k8) >> shift2; - pix1 = (s1 + p02 * k1 + p03 * k2 + p12 * k4 + - p13 * k5 + p22 * k7 + p23 * k8) >> shift2; - - CLAMP_STORE(dp[0], pix0); - CLAMP_STORE(dp[chan1], pix1); - - s0 = p02 * k0 + p03 * k1 + p12 * k3 + p13 * k4 + p22 * k6 + p23 * k7; - s1 = p03 * k0 + p13 * k3 + p23 * k6; - - sp0 += chan2; - sp1 += chan2; - sp2 += chan2; - dp += chan2; - } - - if (wid & 1) { - p02 = sp0[0]; p12 = sp1[0]; p22 = sp2[0]; - pix0 = (s0 + p02 * k2 + p12 * k5 + p22 * k8) >> shift2; - CLAMP_STORE(dp[0], pix0); - } - - sl += sll; - dl += dll; - } - } - - return MLIB_SUCCESS; -} - -#endif /* __sparc ( for x86, using integer multiplies is faster ) */ - -/***************************************************************/ -#undef KSIZE -#define KSIZE 4 - -mlib_status CONV_FUNC(4x4)(mlib_image *dst, - const mlib_image *src, - const mlib_s32 *kern, - mlib_s32 scalef_expon, - mlib_s32 cmask) -{ - FTYPE buff[(KSIZE + 3)*BUFF_LINE]; - FTYPE *buff0, *buff1, *buff2, *buff3, *buff4, *buffd, *buffT; - FTYPE k[KSIZE*KSIZE]; - mlib_s32 d0, d1; - FTYPE k0, k1, k2, k3, k4, k5, k6, k7; - FTYPE p00, p01, p02, p03, p04, - p10, p11, p12, p13, p14, - p20, p21, p22, p23, - p30, p31, p32, p33; - DEF_VARS(DTYPE); - DTYPE *sl1; - mlib_s32 chan2; - mlib_s32 *buffo, *buffi; - DTYPE *sl2, *sl3; - LOAD_KERNEL(KSIZE*KSIZE); - GET_SRC_DST_PARAMETERS(DTYPE); - - if (wid > BUFF_LINE) { - pbuff = mlib_malloc((KSIZE + 3)*sizeof(FTYPE)*wid); - - if (pbuff == NULL) return MLIB_FAILURE; - } - - buff0 = pbuff; - buff1 = buff0 + wid; - buff2 = buff1 + wid; - buff3 = buff2 + wid; - buff4 = buff3 + wid; - buffd = buff4 + wid; - buffo = (mlib_s32*)(buffd + wid); - buffi = buffo + (wid &~ 1); - - chan1 = nchannel; - chan2 = chan1 + chan1; - - wid -= (KSIZE - 1); - hgt -= (KSIZE - 1); - - adr_dst += ((KSIZE - 1)/2)*(dll + chan1); - - for (c = 0; c < nchannel; c++) { - if (!(cmask & (1 << (nchannel - 1 - c)))) continue; - - sl = adr_src + c; - dl = adr_dst + c; - - sl1 = sl + sll; - sl2 = sl1 + sll; - sl3 = sl2 + sll; -#ifdef __SUNPRO_C -#pragma pipeloop(0) -#endif /* __SUNPRO_C */ - for (i = 0; i < wid + (KSIZE - 1); i++) { - buff0[i] = (FTYPE)sl[i*chan1]; - buff1[i] = (FTYPE)sl1[i*chan1]; - buff2[i] = (FTYPE)sl2[i*chan1]; - buff3[i] = (FTYPE)sl3[i*chan1]; - } - - sl += KSIZE*sll; - - for (j = 0; j < hgt; j++) { - d64_2x32 dd; - - /* - * First loop on two first lines of kernel - */ - k0 = k[0]; k1 = k[1]; k2 = k[2]; k3 = k[3]; - k4 = k[4]; k5 = k[5]; k6 = k[6]; k7 = k[7]; - - sp = sl; - dp = dl; - - p02 = buff0[0]; - p12 = buff1[0]; - p03 = buff0[1]; - p13 = buff1[1]; - p04 = buff0[2]; - -#ifdef __SUNPRO_C -#pragma pipeloop(0) -#endif /* __SUNPRO_C */ - for (i = 0; i <= (wid - 2); i += 2) { - p00 = p02; p10 = p12; - p01 = p03; p11 = p13; - p02 = p04; p12 = buff1[i + 2]; - p03 = buff0[i + 3]; p13 = buff1[i + 3]; - p04 = buff0[i + 4]; p14 = buff1[i + 4]; - - LOAD_BUFF(buffi); - - dd.d64 = *(FTYPE *)(buffi + i); - buff4[i ] = (FTYPE)dd.i32s.i0; - buff4[i + 1] = (FTYPE)dd.i32s.i1; - - buffd[i ] = (p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 + - p10 * k4 + p11 * k5 + p12 * k6 + p13 * k7); - buffd[i + 1] = (p01 * k0 + p02 * k1 + p03 * k2 + p04 * k3 + - p11 * k4 + p12 * k5 + p13 * k6 + p14 * k7); - - sp += chan2; - dp += chan2; - } - - /* - * Second loop on two last lines of kernel - */ - k0 = k[ 8]; k1 = k[ 9]; k2 = k[10]; k3 = k[11]; - k4 = k[12]; k5 = k[13]; k6 = k[14]; k7 = k[15]; - - sp = sl; - dp = dl; - - p02 = buff2[0]; - p12 = buff3[0]; - p03 = buff2[1]; - p13 = buff3[1]; - p04 = buff2[2]; - -#ifdef __SUNPRO_C -#pragma pipeloop(0) -#endif /* __SUNPRO_C */ - for (i = 0; i <= (wid - 2); i += 2) { - p00 = p02; p10 = p12; - p01 = p03; p11 = p13; - p02 = p04; p12 = buff3[i + 2]; - p03 = buff2[i + 3]; p13 = buff3[i + 3]; - p04 = buff2[i + 4]; p14 = buff3[i + 4]; - - d0 = D2I(p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 + - p10 * k4 + p11 * k5 + p12 * k6 + p13 * k7 + buffd[i]); - d1 = D2I(p01 * k0 + p02 * k1 + p03 * k2 + p04 * k3 + - p11 * k4 + p12 * k5 + p13 * k6 + p14 * k7 + buffd[i + 1]); - - dp[0 ] = FROM_S32(d0); - dp[chan1] = FROM_S32(d1); - - sp += chan2; - dp += chan2; - } - - /* last pixels */ - for (; i < wid; i++) { - p00 = buff0[i]; p10 = buff1[i]; p20 = buff2[i]; p30 = buff3[i]; - p01 = buff0[i + 1]; p11 = buff1[i + 1]; p21 = buff2[i + 1]; p31 = buff3[i + 1]; - p02 = buff0[i + 2]; p12 = buff1[i + 2]; p22 = buff2[i + 2]; p32 = buff3[i + 2]; - p03 = buff0[i + 3]; p13 = buff1[i + 3]; p23 = buff2[i + 3]; p33 = buff3[i + 3]; - - buff4[i] = (FTYPE)sp[0]; - - buffo[i] = D2I(p00 * k[0] + p01 * k[1] + p02 * k[2] + p03 * k[3] + - p10 * k[4] + p11 * k[5] + p12 * k[6] + p13 * k[7] + - p20 * k[ 8] + p21 * k[ 9] + p22 * k[10] + p23 * k[11] + - p30 * k[12] + p31 * k[13] + p32 * k[14] + p33 * k[15]); - - dp[0] = FROM_S32(buffo[i]); - - sp += chan1; - dp += chan1; - } - - buff4[wid ] = (FTYPE)sp[0]; - buff4[wid + 1] = (FTYPE)sp[chan1]; - buff4[wid + 2] = (FTYPE)sp[chan2]; - - /* next line */ - sl += sll; - dl += dll; - - buffT = buff0; - buff0 = buff1; - buff1 = buff2; - buff2 = buff3; - buff3 = buff4; - buff4 = buffT; - } - } - - if (pbuff != buff) mlib_free(pbuff); - - return MLIB_SUCCESS; -} - -/***************************************************************/ -#undef KSIZE -#define KSIZE 5 - -mlib_status CONV_FUNC(5x5)(mlib_image *dst, - const mlib_image *src, - const mlib_s32 *kern, - mlib_s32 scalef_expon, - mlib_s32 cmask) -{ - FTYPE buff[(KSIZE + 3)*BUFF_LINE]; - FTYPE *buff0, *buff1, *buff2, *buff3, *buff4, *buff5, *buffd, *buffT; - FTYPE k[KSIZE*KSIZE]; - mlib_s32 d0, d1; - FTYPE k0, k1, k2, k3, k4, k5, k6, k7, k8, k9; - FTYPE p00, p01, p02, p03, p04, p05, - p10, p11, p12, p13, p14, p15, - p20, p21, p22, p23, p24, - p30, p31, p32, p33, p34, - p40, p41, p42, p43, p44; - DEF_VARS(DTYPE); - DTYPE *sl1; - mlib_s32 chan2; - mlib_s32 *buffo, *buffi; - DTYPE *sl2, *sl3, *sl4; - LOAD_KERNEL(KSIZE*KSIZE); - GET_SRC_DST_PARAMETERS(DTYPE); - - if (wid > BUFF_LINE) { - pbuff = mlib_malloc((KSIZE + 3)*sizeof(FTYPE)*wid); - - if (pbuff == NULL) return MLIB_FAILURE; - } - - buff0 = pbuff; - buff1 = buff0 + wid; - buff2 = buff1 + wid; - buff3 = buff2 + wid; - buff4 = buff3 + wid; - buff5 = buff4 + wid; - buffd = buff5 + wid; - buffo = (mlib_s32*)(buffd + wid); - buffi = buffo + (wid &~ 1); - - chan1 = nchannel; - chan2 = chan1 + chan1; - - wid -= (KSIZE - 1); - hgt -= (KSIZE - 1); - - adr_dst += ((KSIZE - 1)/2)*(dll + chan1); - - for (c = 0; c < nchannel; c++) { - if (!(cmask & (1 << (nchannel - 1 - c)))) continue; - - sl = adr_src + c; - dl = adr_dst + c; - - sl1 = sl + sll; - sl2 = sl1 + sll; - sl3 = sl2 + sll; - sl4 = sl3 + sll; -#ifdef __SUNPRO_C -#pragma pipeloop(0) -#endif /* __SUNPRO_C */ - for (i = 0; i < wid + (KSIZE - 1); i++) { - buff0[i] = (FTYPE)sl[i*chan1]; - buff1[i] = (FTYPE)sl1[i*chan1]; - buff2[i] = (FTYPE)sl2[i*chan1]; - buff3[i] = (FTYPE)sl3[i*chan1]; - buff4[i] = (FTYPE)sl4[i*chan1]; - } - - sl += KSIZE*sll; - - for (j = 0; j < hgt; j++) { - d64_2x32 dd; - - /* - * First loop - */ - k0 = k[0]; k1 = k[1]; k2 = k[2]; k3 = k[3]; k4 = k[4]; - k5 = k[5]; k6 = k[6]; k7 = k[7]; k8 = k[8]; k9 = k[9]; - - sp = sl; - dp = dl; - - p02 = buff0[0]; - p12 = buff1[0]; - p03 = buff0[1]; - p13 = buff1[1]; - p04 = buff0[2]; - p14 = buff1[2]; - -#ifdef __SUNPRO_C -#pragma pipeloop(0) -#endif /* __SUNPRO_C */ - for (i = 0; i <= (wid - 2); i += 2) { - p00 = p02; p10 = p12; - p01 = p03; p11 = p13; - p02 = p04; p12 = p14; - - LOAD_BUFF(buffi); - - p03 = buff0[i + 3]; p13 = buff1[i + 3]; - p04 = buff0[i + 4]; p14 = buff1[i + 4]; - p05 = buff0[i + 5]; p15 = buff1[i + 5]; - - buffd[i ] = (p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 + p04 * k4 + - p10 * k5 + p11 * k6 + p12 * k7 + p13 * k8 + p14 * k9); - buffd[i + 1] = (p01 * k0 + p02 * k1 + p03 * k2 + p04 * k3 + p05 * k4 + - p11 * k5 + p12 * k6 + p13 * k7 + p14 * k8 + p15 * k9); - - sp += chan2; - dp += chan2; - } - - /* - * Second loop - */ - k0 = k[10]; k1 = k[11]; k2 = k[12]; k3 = k[13]; k4 = k[14]; - k5 = k[15]; k6 = k[16]; k7 = k[17]; k8 = k[18]; k9 = k[19]; - - sp = sl; - dp = dl; - - p02 = buff2[0]; - p12 = buff3[0]; - p03 = buff2[1]; - p13 = buff3[1]; - p04 = buff2[2]; - p14 = buff3[2]; - -#ifdef __SUNPRO_C -#pragma pipeloop(0) -#endif /* __SUNPRO_C */ - for (i = 0; i <= (wid - 2); i += 2) { - p00 = p02; p10 = p12; - p01 = p03; p11 = p13; - - p02 = buff2[i + 2]; p12 = buff3[i + 2]; - p03 = buff2[i + 3]; p13 = buff3[i + 3]; - p04 = buff2[i + 4]; p14 = buff3[i + 4]; - p05 = buff2[i + 5]; p15 = buff3[i + 5]; - - dd.d64 = *(FTYPE *)(buffi + i); - buff5[i ] = (FTYPE)dd.i32s.i0; - buff5[i + 1] = (FTYPE)dd.i32s.i1; - - buffd[i ] += (p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 + p04 * k4 + - p10 * k5 + p11 * k6 + p12 * k7 + p13 * k8 + p14 * k9); - buffd[i + 1] += (p01 * k0 + p02 * k1 + p03 * k2 + p04 * k3 + p05 * k4 + - p11 * k5 + p12 * k6 + p13 * k7 + p14 * k8 + p15 * k9); - - sp += chan2; - dp += chan2; - } - - /* - * 3 loop - */ - k0 = k[20]; k1 = k[21]; k2 = k[22]; k3 = k[23]; k4 = k[24]; - - sp = sl; - dp = dl; - - p02 = buff4[0]; - p03 = buff4[1]; - p04 = buff4[2]; - p05 = buff4[3]; - -#ifdef __SUNPRO_C -#pragma pipeloop(0) -#endif /* __SUNPRO_C */ - for (i = 0; i <= (wid - 2); i += 2) { - p00 = p02; p01 = p03; p02 = p04; p03 = p05; - - p04 = buff4[i + 4]; p05 = buff4[i + 5]; - - d0 = D2I(p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 + p04 * k4 + buffd[i]); - d1 = D2I(p01 * k0 + p02 * k1 + p03 * k2 + p04 * k3 + p05 * k4 + buffd[i + 1]); - - dp[0 ] = FROM_S32(d0); - dp[chan1] = FROM_S32(d1); - - sp += chan2; - dp += chan2; - } - - /* last pixels */ - for (; i < wid; i++) { - p00 = buff0[i]; p10 = buff1[i]; p20 = buff2[i]; p30 = buff3[i]; - p01 = buff0[i + 1]; p11 = buff1[i + 1]; p21 = buff2[i + 1]; p31 = buff3[i + 1]; - p02 = buff0[i + 2]; p12 = buff1[i + 2]; p22 = buff2[i + 2]; p32 = buff3[i + 2]; - p03 = buff0[i + 3]; p13 = buff1[i + 3]; p23 = buff2[i + 3]; p33 = buff3[i + 3]; - p04 = buff0[i + 4]; p14 = buff1[i + 4]; p24 = buff2[i + 4]; p34 = buff3[i + 4]; - - p40 = buff4[i]; p41 = buff4[i + 1]; p42 = buff4[i + 2]; - p43 = buff4[i + 3]; p44 = buff4[i + 4]; - - buff5[i] = (FTYPE)sp[0]; - - buffo[i] = D2I(p00 * k[0] + p01 * k[1] + p02 * k[2] + p03 * k[3] + p04 * k[4] + - p10 * k[5] + p11 * k[6] + p12 * k[7] + p13 * k[8] + p14 * k[9] + - p20 * k[10] + p21 * k[11] + p22 * k[12] + p23 * k[13] + p24 * k[14] + - p30 * k[15] + p31 * k[16] + p32 * k[17] + p33 * k[18] + p34 * k[19] + - p40 * k[20] + p41 * k[21] + p42 * k[22] + p43 * k[23] + p44 * k[24]); - - dp[0] = FROM_S32(buffo[i]); - - sp += chan1; - dp += chan1; - } - - buff5[wid ] = (FTYPE)sp[0]; - buff5[wid + 1] = (FTYPE)sp[chan1]; - buff5[wid + 2] = (FTYPE)sp[chan2]; - buff5[wid + 3] = (FTYPE)sp[chan2 + chan1]; - - /* next line */ - sl += sll; - dl += dll; - - buffT = buff0; - buff0 = buff1; - buff1 = buff2; - buff2 = buff3; - buff3 = buff4; - buff4 = buff5; - buff5 = buffT; - } - } - - if (pbuff != buff) mlib_free(pbuff); - - return MLIB_SUCCESS; -} - -/***************************************************************/ -#ifndef __sparc /* for x86, using integer multiplies is faster */ - -mlib_status CONV_FUNC_I(5x5)(mlib_image *dst, - const mlib_image *src, - const mlib_s32 *kern, - mlib_s32 scalef_expon, - mlib_s32 cmask) -{ - mlib_s32 buff[BUFF_LINE]; - mlib_s32 *buffd; - mlib_s32 k[KSIZE*KSIZE]; - mlib_s32 shift1, shift2; - mlib_s32 k0, k1, k2, k3, k4, k5, k6, k7, k8, k9; - mlib_s32 p00, p01, p02, p03, p04, p05, - p10, p11, p12, p13, p14, p15; - DTYPE *adr_src, *sl, *sp0, *sp1; - DTYPE *adr_dst, *dl, *dp; - mlib_s32 *pbuff = buff; - mlib_s32 wid, hgt, sll, dll; - mlib_s32 nchannel, chan1, chan2, chan3, chan4; - mlib_s32 i, j, c; - -#if IMG_TYPE != 1 - shift1 = 16; -#else - shift1 = 8; -#endif /* IMG_TYPE != 1 */ - - shift2 = scalef_expon - shift1; - - for (j = 0; j < KSIZE*KSIZE; j++) k[j] = kern[j] >> shift1; - - GET_SRC_DST_PARAMETERS(DTYPE); - - if (wid > BUFF_LINE) { - pbuff = mlib_malloc(sizeof(mlib_s32)*wid); - - if (pbuff == NULL) return MLIB_FAILURE; - } - - buffd = pbuff; - - chan1 = nchannel; - chan2 = chan1 + chan1; - chan3 = chan2 + chan1; - chan4 = chan3 + chan1; - - wid -= (KSIZE - 1); - hgt -= (KSIZE - 1); - - adr_dst += ((KSIZE - 1)/2)*(dll + chan1); - - for (c = 0; c < chan1; c++) { - if (!(cmask & (1 << (chan1 - 1 - c)))) continue; - - sl = adr_src + c; - dl = adr_dst + c; - - for (j = 0; j < hgt; j++) { - mlib_s32 pix0, pix1; - /* - * First loop - */ - sp0 = sl; - sp1 = sp0 + sll; - dp = dl; - - k0 = k[0]; k1 = k[1]; k2 = k[2]; k3 = k[3]; k4 = k[4]; - k5 = k[5]; k6 = k[6]; k7 = k[7]; k8 = k[8]; k9 = k[9]; - - p02 = sp0[0]; p12 = sp1[0]; - p03 = sp0[chan1]; p13 = sp1[chan1]; - p04 = sp0[chan2]; p14 = sp1[chan2]; - p05 = sp0[chan3]; p15 = sp1[chan3]; - - sp0 += chan4; - sp1 += chan4; - -#ifdef __SUNPRO_C -#pragma pipeloop(0) -#endif /* __SUNPRO_C */ - for (i = 0; i <= (wid - 2); i += 2) { - p00 = p02; p10 = p12; - p01 = p03; p11 = p13; - p02 = p04; p12 = p14; - p03 = p05; p13 = p15; - - p04 = sp0[0]; p14 = sp1[0]; - p05 = sp0[chan1]; p15 = sp1[chan1]; - - buffd[i ] = (p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 + p04 * k4 + - p10 * k5 + p11 * k6 + p12 * k7 + p13 * k8 + p14 * k9); - buffd[i + 1] = (p01 * k0 + p02 * k1 + p03 * k2 + p04 * k3 + p05 * k4 + - p11 * k5 + p12 * k6 + p13 * k7 + p14 * k8 + p15 * k9); - - sp0 += chan2; - sp1 += chan2; - dp += chan2; - } - - if (wid & 1) { - p00 = p02; p10 = p12; - p01 = p03; p11 = p13; - p02 = p04; p12 = p14; - p03 = p05; p13 = p15; - - p04 = sp0[0]; p14 = sp1[0]; - - buffd[i] = (p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 + p04 * k4 + - p10 * k5 + p11 * k6 + p12 * k7 + p13 * k8 + p14 * k9); - } - - /* - * Second loop - */ - sp0 = sl + 2*sll; - sp1 = sp0 + sll; - dp = dl; - - k0 = k[10]; k1 = k[11]; k2 = k[12]; k3 = k[13]; k4 = k[14]; - k5 = k[15]; k6 = k[16]; k7 = k[17]; k8 = k[18]; k9 = k[19]; - - p02 = sp0[0]; p12 = sp1[0]; - p03 = sp0[chan1]; p13 = sp1[chan1]; - p04 = sp0[chan2]; p14 = sp1[chan2]; - p05 = sp0[chan3]; p15 = sp1[chan3]; - - sp0 += chan4; - sp1 += chan4; - -#ifdef __SUNPRO_C -#pragma pipeloop(0) -#endif /* __SUNPRO_C */ - for (i = 0; i <= (wid - 2); i += 2) { - p00 = p02; p10 = p12; - p01 = p03; p11 = p13; - p02 = p04; p12 = p14; - p03 = p05; p13 = p15; - - p04 = sp0[0]; p14 = sp1[0]; - p05 = sp0[chan1]; p15 = sp1[chan1]; - - buffd[i ] += (p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 + p04 * k4 + - p10 * k5 + p11 * k6 + p12 * k7 + p13 * k8 + p14 * k9); - buffd[i + 1] += (p01 * k0 + p02 * k1 + p03 * k2 + p04 * k3 + p05 * k4 + - p11 * k5 + p12 * k6 + p13 * k7 + p14 * k8 + p15 * k9); - - sp0 += chan2; - sp1 += chan2; - dp += chan2; - } - - if (wid & 1) { - p00 = p02; p10 = p12; - p01 = p03; p11 = p13; - p02 = p04; p12 = p14; - p03 = p05; p13 = p15; - - p04 = sp0[0]; p14 = sp1[0]; - - buffd[i] += (p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 + p04 * k4 + - p10 * k5 + p11 * k6 + p12 * k7 + p13 * k8 + p14 * k9); - } - - /* - * 3 loop - */ - dp = dl; - sp0 = sl + 4*sll; - - k0 = k[20]; k1 = k[21]; k2 = k[22]; k3 = k[23]; k4 = k[24]; - - p02 = sp0[0]; - p03 = sp0[chan1]; - p04 = sp0[chan2]; - p05 = sp0[chan3]; - - sp0 += chan2 + chan2; - -#ifdef __SUNPRO_C -#pragma pipeloop(0) -#endif /* __SUNPRO_C */ - for (i = 0; i <= (wid - 2); i += 2) { - p00 = p02; p01 = p03; p02 = p04; p03 = p05; - - p04 = sp0[0]; p05 = sp0[chan1]; - - pix0 = (buffd[i ] + p00 * k0 + p01 * k1 + p02 * k2 + - p03 * k3 + p04 * k4) >> shift2; - pix1 = (buffd[i + 1] + p01 * k0 + p02 * k1 + p03 * k2 + - p04 * k3 + p05 * k4) >> shift2; - - CLAMP_STORE(dp[0], pix0); - CLAMP_STORE(dp[chan1], pix1); - - dp += chan2; - sp0 += chan2; - } - - if (wid & 1) { - p00 = p02; p01 = p03; p02 = p04; p03 = p05; - - p04 = sp0[0]; - - pix0 = (buffd[i ] + p00 * k0 + p01 * k1 + p02 * k2 + - p03 * k3 + p04 * k4) >> shift2; - CLAMP_STORE(dp[0], pix0); - } - - /* next line */ - sl += sll; - dl += dll; - } - } - - if (pbuff != buff) mlib_free(pbuff); - - return MLIB_SUCCESS; -} - -#endif /* __sparc ( for x86, using integer multiplies is faster ) */ - -/***************************************************************/ -#if IMG_TYPE == 1 - -#undef KSIZE -#define KSIZE 7 - -mlib_status CONV_FUNC(7x7)(mlib_image *dst, - const mlib_image *src, - const mlib_s32 *kern, - mlib_s32 scalef_expon, - mlib_s32 cmask) -{ - FTYPE buff[(KSIZE + 3)*BUFF_LINE], *buffs[2*(KSIZE + 1)], *buffd; - FTYPE k[KSIZE*KSIZE]; - mlib_s32 l, m, buff_ind; - mlib_s32 d0, d1; - FTYPE k0, k1, k2, k3, k4, k5, k6; - FTYPE p0, p1, p2, p3, p4, p5, p6, p7; - DTYPE *sl2, *sl3, *sl4, *sl5, *sl6; - DEF_VARS(DTYPE); - DTYPE *sl1; - mlib_s32 chan2; - mlib_s32 *buffo, *buffi; - LOAD_KERNEL(KSIZE*KSIZE); - GET_SRC_DST_PARAMETERS(DTYPE); - - if (wid > BUFF_LINE) { - pbuff = mlib_malloc((KSIZE + 3)*sizeof(FTYPE)*wid); - - if (pbuff == NULL) return MLIB_FAILURE; - } - - for (l = 0; l < KSIZE + 1; l++) buffs[l] = pbuff + l*wid; - for (l = 0; l < KSIZE + 1; l++) buffs[l + (KSIZE + 1)] = buffs[l]; - buffd = buffs[KSIZE] + wid; - buffo = (mlib_s32*)(buffd + wid); - buffi = buffo + (wid &~ 1); - - chan1 = nchannel; - chan2 = chan1 + chan1; - - wid -= (KSIZE - 1); - hgt -= (KSIZE - 1); - - adr_dst += ((KSIZE - 1)/2)*(dll + chan1); - - for (c = 0; c < nchannel; c++) { - if (!(cmask & (1 << (nchannel - 1 - c)))) continue; - - sl = adr_src + c; - dl = adr_dst + c; - - sl1 = sl + sll; - sl2 = sl1 + sll; - sl3 = sl2 + sll; - sl4 = sl3 + sll; - sl5 = sl4 + sll; - sl6 = sl5 + sll; -#ifdef __SUNPRO_C -#pragma pipeloop(0) -#endif /* __SUNPRO_C */ - for (i = 0; i < wid + (KSIZE - 1); i++) { - buffs[0][i] = (FTYPE)sl[i*chan1]; - buffs[1][i] = (FTYPE)sl1[i*chan1]; - buffs[2][i] = (FTYPE)sl2[i*chan1]; - buffs[3][i] = (FTYPE)sl3[i*chan1]; - buffs[4][i] = (FTYPE)sl4[i*chan1]; - buffs[5][i] = (FTYPE)sl5[i*chan1]; - buffs[6][i] = (FTYPE)sl6[i*chan1]; - } - - buff_ind = 0; - -#ifdef __SUNPRO_C -#pragma pipeloop(0) -#endif /* __SUNPRO_C */ - for (i = 0; i < wid; i++) buffd[i] = 0.0; - - sl += KSIZE*sll; - - for (j = 0; j < hgt; j++) { - FTYPE **buffc = buffs + buff_ind; - FTYPE *buffn = buffc[KSIZE]; - FTYPE *pk = k; - - for (l = 0; l < KSIZE; l++) { - FTYPE *buff = buffc[l]; - d64_2x32 dd; - - sp = sl; - dp = dl; - - p2 = buff[0]; p3 = buff[1]; p4 = buff[2]; - p5 = buff[3]; p6 = buff[4]; p7 = buff[5]; - - k0 = *pk++; k1 = *pk++; k2 = *pk++; k3 = *pk++; - k4 = *pk++; k5 = *pk++; k6 = *pk++; - - if (l < (KSIZE - 1)) { -#ifdef __SUNPRO_C -#pragma pipeloop(0) -#endif /* __SUNPRO_C */ - for (i = 0; i <= (wid - 2); i += 2) { - p0 = p2; p1 = p3; p2 = p4; p3 = p5; p4 = p6; p5 = p7; - - p6 = buff[i + 6]; p7 = buff[i + 7]; - - buffd[i ] += p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + p5*k5 + p6*k6; - buffd[i + 1] += p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + p6*k5 + p7*k6; - } - - } else { -#ifdef __SUNPRO_C -#pragma pipeloop(0) -#endif /* __SUNPRO_C */ - for (i = 0; i <= (wid - 2); i += 2) { - p0 = p2; p1 = p3; p2 = p4; p3 = p5; p4 = p6; p5 = p7; - - p6 = buff[i + 6]; p7 = buff[i + 7]; - - LOAD_BUFF(buffi); - - dd.d64 = *(FTYPE *)(buffi + i); - buffn[i ] = (FTYPE)dd.i32s.i0; - buffn[i + 1] = (FTYPE)dd.i32s.i1; - - d0 = D2I(p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + p5*k5 + p6*k6 + buffd[i ]); - d1 = D2I(p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + p6*k5 + p7*k6 + buffd[i + 1]); - - dp[0 ] = FROM_S32(d0); - dp[chan1] = FROM_S32(d1); - - buffd[i ] = 0.0; - buffd[i + 1] = 0.0; - - sp += chan2; - dp += chan2; - } - } - } - - /* last pixels */ - for (; i < wid; i++) { - FTYPE *pk = k, s = 0; - mlib_s32 d0; - - for (l = 0; l < KSIZE; l++) { - FTYPE *buff = buffc[l] + i; - - for (m = 0; m < KSIZE; m++) s += buff[m] * (*pk++); - } - - d0 = D2I(s); - dp[0] = FROM_S32(d0); - - buffn[i] = (FTYPE)sp[0]; - - sp += chan1; - dp += chan1; - } - - for (l = 0; l < (KSIZE - 1); l++) buffn[wid + l] = sp[l*chan1]; - - /* next line */ - sl += sll; - dl += dll; - - buff_ind++; - - if (buff_ind >= KSIZE + 1) buff_ind = 0; - } - } - - if (pbuff != buff) mlib_free(pbuff); - - return MLIB_SUCCESS; -} - -#endif /* IMG_TYPE == 1 */ - -/***************************************************************/ #define MAX_KER 7 #define MAX_N 15 diff -r 3e22d8fd4912 -r acea5f7d354b jdk/src/java.desktop/share/native/libmlib_image/mlib_c_ImageAffineIndex_BC.c --- a/jdk/src/java.desktop/share/native/libmlib_image/mlib_c_ImageAffineIndex_BC.c Thu May 12 11:03:07 2016 -0700 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,413 +0,0 @@ -/* - * Copyright (c) 1998, 2003, Oracle and/or its affiliates. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 only, as - * published by the Free Software Foundation. Oracle designates this - * particular file as subject to the "Classpath" exception as provided - * by Oracle in the LICENSE file that accompanied this code. - * - * This code is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * version 2 for more details (a copy is included in the LICENSE file that - * accompanied this code). - * - * You should have received a copy of the GNU General Public License version - * 2 along with this work; if not, write to the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - * - * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA - * or visit www.oracle.com if you need additional information or have any - * questions. - */ - - -#include "mlib_image.h" -#include "mlib_ImageAffine.h" -#include "mlib_ImageColormap.h" - -/***************************************************************/ -#define MLIB_LIMIT 512 -#define MLIB_SHIFT 16 -#define MLIB_PREC (1 << MLIB_SHIFT) -#define MLIB_MASK (MLIB_PREC - 1) - -/***************************************************************/ -#define DTYPE MLIB_TYPE - -/***************************************************************/ -#define DECLAREVAR_IND() \ - DECLAREVAR0(); \ - mlib_s32 *warp_tbl = param -> warp_tbl; \ - mlib_s32 xSrc, ySrc; \ - mlib_s32 srcYStride = param -> srcYStride; \ - mlib_s32 max_xsize = param -> max_xsize; \ - mlib_filter filter = param -> filter; \ - MLIB_TYPE *sp, *dl; \ - mlib_d64 xf0, xf1, xf2, xf3; \ - mlib_d64 yf0, yf1, yf2, yf3; \ - mlib_d64 c0, c1, c2, c3, val0; \ - mlib_s32 filterpos; \ - mlib_f32 *fptr; \ - mlib_d64 s0, s1, s2, s3; \ - mlib_s32 i, size - -/***************************************************************/ -#define GET_FILTERS_KOEF() \ - filterpos = (X >> FILTER_SHIFT) & FILTER_MASK; \ - fptr = (mlib_f32 *) ((mlib_u8 *)mlib_filters_table + filterpos); \ - \ - xf0 = fptr[0]; \ - xf1 = fptr[1]; \ - xf2 = fptr[2]; \ - xf3 = fptr[3]; \ - \ - filterpos = (Y >> FILTER_SHIFT) & FILTER_MASK; \ - fptr = (mlib_f32 *) ((mlib_u8 *)mlib_filters_table + filterpos); \ - \ - yf0 = fptr[0]; \ - yf1 = fptr[1]; \ - yf2 = fptr[2]; \ - yf3 = fptr[3] - -/***************************************************************/ -#define GET_POINTER() \ - xSrc = (X >> MLIB_SHIFT)-1; \ - ySrc = (Y >> MLIB_SHIFT)-1; \ - sp = ((MLIB_TYPE **)lineAddr)[ySrc] + xSrc - -/***************************************************************/ -#define LOAD_FIRST_ROW(nchan, chan) \ - s0 = *(lut + sp[0]*nchan + chan); \ - s1 = *(lut + sp[1]*nchan + chan); \ - s2 = *(lut + sp[2]*nchan + chan); \ - s3 = *(lut + sp[3]*nchan + chan) - -/***************************************************************/ -#define COUNT_NEXT_ROW(dst, nchan, chan) \ - sp = (MLIB_TYPE*)((mlib_addr)sp + srcYStride); \ - dst = ((*(lut + sp[0]*nchan + chan))*xf0 + \ - (*(lut + sp[1]*nchan + chan))*xf1 + \ - (*(lut + sp[2]*nchan + chan))*xf2 + \ - (*(lut + sp[3]*nchan + chan))*xf3) - -/***************************************************************/ -#ifdef MLIB_USE_FTOI_CLAMPING - -/***********/ -#define STORE_SAT_VALUE_U8(ind) \ - dp[ind] = ((mlib_s32)(val0 - (mlib_d64)0x7F800000) >> 24) ^ 0x80 - -/***********/ -#define STORE_SAT_VALUE_S16(ind) \ - dp[ind] = ((mlib_s32)(val0)) >> 16 - -#else - -/***********/ -#define STORE_SAT_VALUE_U8(ind) \ - val0 -= (mlib_d64)0x7F800000; \ - if (val0 >= MLIB_S32_MAX) \ - dp[ind] = MLIB_U8_MAX; \ - else if (val0 <= MLIB_S32_MIN) \ - dp[ind] = MLIB_U8_MIN; \ - else \ - dp[ind] = ((mlib_s32)val0 >> 24) ^ 0x80 - -/***********/ -#define STORE_SAT_VALUE_S16(ind) \ - if (val0 >= MLIB_S32_MAX) \ - dp[ind] = MLIB_S16_MAX; \ - else if (val0 <= MLIB_S32_MIN) \ - dp[ind] = MLIB_S16_MIN; \ - else \ - dp[ind] = (mlib_s32)val0 >> 16 - -#endif /* MLIB_USE_FTOI_CLAMPING */ - -/***************************************************************/ -#define MAKE_BC_3CH(lut_format) \ - X += dX; \ - Y += dY; \ - c0 = (s0*xf0 + s1*xf1 + s2*xf2 + s3*xf3); \ - COUNT_NEXT_ROW(c1, 3, 0); \ - COUNT_NEXT_ROW(c2, 3, 0); \ - COUNT_NEXT_ROW(c3, 3, 0); \ - val0 = (c0*yf0 + c1*yf1 + c2*yf2 + c3*yf3); \ - sp = (MLIB_TYPE*)((mlib_addr)sp - 3*srcYStride); \ - LOAD_FIRST_ROW(3, 1); \ - STORE_SAT_VALUE_##lut_format(0); \ - c0 = (s0*xf0 + s1*xf1 + s2*xf2 + s3*xf3); \ - COUNT_NEXT_ROW(c1, 3, 1); \ - COUNT_NEXT_ROW(c2, 3, 1); \ - COUNT_NEXT_ROW(c3, 3, 1); \ - val0 = (c0*yf0 + c1*yf1 + c2*yf2 + c3*yf3); \ - sp = (MLIB_TYPE*)((mlib_addr)sp - 3*srcYStride); \ - LOAD_FIRST_ROW(3, 2); \ - STORE_SAT_VALUE_##lut_format(1); \ - c0 = (s0*xf0 + s1*xf1 + s2*xf2 + s3*xf3); \ - COUNT_NEXT_ROW(c1, 3, 2); \ - COUNT_NEXT_ROW(c2, 3, 2); \ - COUNT_NEXT_ROW(c3, 3, 2); \ - val0 = (c0*yf0 + c1*yf1 + c2*yf2 + c3*yf3); \ - GET_FILTERS_KOEF(); \ - GET_POINTER(); \ - LOAD_FIRST_ROW(3, 0); \ - STORE_SAT_VALUE_##lut_format(2); - -/***************************************************************/ -#define MAKE_LAST_PIXEL_BC_3CH(lut_format) \ - c0 = (s0*xf0 + s1*xf1 + s2*xf2 + s3*xf3); \ - COUNT_NEXT_ROW(c1, 3, 0); \ - COUNT_NEXT_ROW(c2, 3, 0); \ - COUNT_NEXT_ROW(c3, 3, 0); \ - val0 = (c0*yf0 + c1*yf1 + c2*yf2 + c3*yf3); \ - sp = (MLIB_TYPE*)((mlib_addr)sp - 3*srcYStride); \ - LOAD_FIRST_ROW(3, 1); \ - STORE_SAT_VALUE_##lut_format(0); \ - c0 = (s0*xf0 + s1*xf1 + s2*xf2 + s3*xf3); \ - COUNT_NEXT_ROW(c1, 3, 1); \ - COUNT_NEXT_ROW(c2, 3, 1); \ - COUNT_NEXT_ROW(c3, 3, 1); \ - val0 = (c0*yf0 + c1*yf1 + c2*yf2 + c3*yf3); \ - sp = (MLIB_TYPE*)((mlib_addr)sp - 3*srcYStride); \ - LOAD_FIRST_ROW(3, 2); \ - STORE_SAT_VALUE_##lut_format(1); \ - c0 = (s0*xf0 + s1*xf1 + s2*xf2 + s3*xf3); \ - COUNT_NEXT_ROW(c1, 3, 2); \ - COUNT_NEXT_ROW(c2, 3, 2); \ - COUNT_NEXT_ROW(c3, 3, 2); \ - val0 = (c0*yf0 + c1*yf1 + c2*yf2 + c3*yf3); \ - STORE_SAT_VALUE_##lut_format(2); - -/***************************************************************/ -#define MAKE_BC_4CH(lut_format) \ - X += dX; \ - Y += dY; \ - c0 = (s0*xf0 + s1*xf1 + s2*xf2 + s3*xf3); \ - COUNT_NEXT_ROW(c1, 4, 0); \ - COUNT_NEXT_ROW(c2, 4, 0); \ - COUNT_NEXT_ROW(c3, 4, 0); \ - val0 = (c0*yf0 + c1*yf1 + c2*yf2 + c3*yf3); \ - sp = (MLIB_TYPE*)((mlib_addr)sp - 3*srcYStride); \ - LOAD_FIRST_ROW(4, 1); \ - STORE_SAT_VALUE_##lut_format(0); \ - c0 = (s0*xf0 + s1*xf1 + s2*xf2 + s3*xf3); \ - COUNT_NEXT_ROW(c1, 4, 1); \ - COUNT_NEXT_ROW(c2, 4, 1); \ - COUNT_NEXT_ROW(c3, 4, 1); \ - val0 = (c0*yf0 + c1*yf1 + c2*yf2 + c3*yf3); \ - sp = (MLIB_TYPE*)((mlib_addr)sp - 3*srcYStride); \ - LOAD_FIRST_ROW(4, 2); \ - STORE_SAT_VALUE_##lut_format(1); \ - c0 = (s0*xf0 + s1*xf1 + s2*xf2 + s3*xf3); \ - COUNT_NEXT_ROW(c1, 4, 2); \ - COUNT_NEXT_ROW(c2, 4, 2); \ - COUNT_NEXT_ROW(c3, 4, 2); \ - val0 = (c0*yf0 + c1*yf1 + c2*yf2 + c3*yf3); \ - sp = (MLIB_TYPE*)((mlib_addr)sp - 3*srcYStride); \ - LOAD_FIRST_ROW(4, 3); \ - STORE_SAT_VALUE_##lut_format(2); \ - c0 = (s0*xf0 + s1*xf1 + s2*xf2 + s3*xf3); \ - COUNT_NEXT_ROW(c1, 4, 3); \ - COUNT_NEXT_ROW(c2, 4, 3); \ - COUNT_NEXT_ROW(c3, 4, 3); \ - val0 = (c0*yf0 + c1*yf1 + c2*yf2 + c3*yf3); \ - GET_FILTERS_KOEF(); \ - GET_POINTER(); \ - LOAD_FIRST_ROW(4, 0); \ - STORE_SAT_VALUE_##lut_format(3); - -/***************************************************************/ -#define MAKE_LAST_PIXEL_BC_4CH(lut_format) \ - c0 = (s0*xf0 + s1*xf1 + s2*xf2 + s3*xf3); \ - COUNT_NEXT_ROW(c1, 4, 0); \ - COUNT_NEXT_ROW(c2, 4, 0); \ - COUNT_NEXT_ROW(c3, 4, 0); \ - val0 = (c0*yf0 + c1*yf1 + c2*yf2 + c3*yf3); \ - sp = (MLIB_TYPE*)((mlib_addr)sp - 3*srcYStride); \ - LOAD_FIRST_ROW(4, 1); \ - STORE_SAT_VALUE_##lut_format(0); \ - c0 = (s0*xf0 + s1*xf1 + s2*xf2 + s3*xf3); \ - COUNT_NEXT_ROW(c1, 4, 1); \ - COUNT_NEXT_ROW(c2, 4, 1); \ - COUNT_NEXT_ROW(c3, 4, 1); \ - val0 = (c0*yf0 + c1*yf1 + c2*yf2 + c3*yf3); \ - sp = (MLIB_TYPE*)((mlib_addr)sp - 3*srcYStride); \ - LOAD_FIRST_ROW(4, 2); \ - STORE_SAT_VALUE_##lut_format(1); \ - c0 = (s0*xf0 + s1*xf1 + s2*xf2 + s3*xf3); \ - COUNT_NEXT_ROW(c1, 4, 2); \ - COUNT_NEXT_ROW(c2, 4, 2); \ - COUNT_NEXT_ROW(c3, 4, 2); \ - val0 = (c0*yf0 + c1*yf1 + c2*yf2 + c3*yf3); \ - sp = (MLIB_TYPE*)((mlib_addr)sp - 3*srcYStride); \ - LOAD_FIRST_ROW(4, 3); \ - STORE_SAT_VALUE_##lut_format(2); \ - c0 = (s0*xf0 + s1*xf1 + s2*xf2 + s3*xf3); \ - COUNT_NEXT_ROW(c1, 4, 3); \ - COUNT_NEXT_ROW(c2, 4, 3); \ - COUNT_NEXT_ROW(c3, 4, 3); \ - val0 = (c0*yf0 + c1*yf1 + c2*yf2 + c3*yf3); \ - STORE_SAT_VALUE_##lut_format(3); - -/***************************************************************/ -#define FILTER_U8 ((filter == MLIB_BICUBIC) ? mlib_filters_u8f_bc : mlib_filters_u8f_bc2) -#define FILTER_S16 ((filter == MLIB_BICUBIC) ? mlib_filters_s16f_bc : mlib_filters_s16f_bc2) - -/***************************************************************/ -#define mlib_U8 mlib_u8 -#define mlib_S16 mlib_s16 - -/***************************************************************/ -#define FUNC_AFFINEINDEX_BC_0(ITYPE, LTYPE, NCHAN) \ - mlib_status mlib_ImageAffineIndex_##ITYPE##_##LTYPE##_##NCHAN##CH_BC(mlib_affine_param *param, \ - const void *colormap) \ - { \ - DECLAREVAR_IND(); \ - mlib_##LTYPE buff_lcl[NCHAN * MLIB_LIMIT], *pbuff = buff_lcl, *dp; \ - mlib_d64 *lut = ((mlib_d64*)mlib_ImageGetLutDoubleData(colormap) - \ - NCHAN * mlib_ImageGetLutOffset(colormap)); \ - const mlib_f32 *mlib_filters_table = FILTER_##LTYPE; \ - \ - if (max_xsize > MLIB_LIMIT) { \ - pbuff = mlib_malloc(NCHAN * sizeof(mlib_##LTYPE) * max_xsize); \ - if (pbuff == NULL) return MLIB_FAILURE; \ - } \ - \ - for (j = yStart; j <= yFinish; j++) { \ - \ - NEW_LINE(1); \ - dp = pbuff; \ - \ - GET_FILTERS_KOEF(); \ - GET_POINTER(); \ - LOAD_FIRST_ROW(NCHAN, 0); - - /* pragma pipeloop(0) must be here */ - -/***************************************************************/ -#define FUNC_AFFINEINDEX_BC_1(ITYPE, LTYPE, NCHAN) \ - \ - for (i = 0; i < (xRight - xLeft); i++, dp += NCHAN) { \ - MAKE_BC_##NCHAN##CH(LTYPE); \ - } \ - \ - MAKE_LAST_PIXEL_BC_##NCHAN##CH(LTYPE); \ - \ - mlib_ImageColorTrue2IndexLine_##LTYPE##_##ITYPE##_##NCHAN \ - (pbuff, dl, xRight - xLeft + 1, colormap); \ - } \ - \ - if (pbuff != buff_lcl) mlib_free(pbuff); \ - \ - return MLIB_SUCCESS; \ - } - -/***************************************************************/ -#undef MLIB_TYPE -#define MLIB_TYPE mlib_u8 - -/***************************************************************/ -#undef FILTER_SHIFT -#define FILTER_SHIFT 4 -#undef FILTER_MASK -#define FILTER_MASK (((1 << 8) - 1) << 4) - -FUNC_AFFINEINDEX_BC_0(U8, U8, 3) -#ifdef __SUNPRO_C -#pragma pipeloop(0) -#endif /* __SUNPRO_C */ -FUNC_AFFINEINDEX_BC_1(U8, U8, 3) - -/***************************************************************/ -#undef FILTER_SHIFT -#define FILTER_SHIFT 3 -#undef FILTER_MASK -#define FILTER_MASK (((1 << 9) - 1) << 4) - -FUNC_AFFINEINDEX_BC_0(U8, S16, 3) -#ifdef __SUNPRO_C -#pragma pipeloop(0) -#endif /* __SUNPRO_C */ -FUNC_AFFINEINDEX_BC_1(U8, S16, 3) - -/***************************************************************/ -#undef FILTER_SHIFT -#define FILTER_SHIFT 4 -#undef FILTER_MASK -#define FILTER_MASK (((1 << 8) - 1) << 4) - -FUNC_AFFINEINDEX_BC_0(U8, U8, 4) -#ifdef __SUNPRO_C -#pragma pipeloop(0) -#endif /* __SUNPRO_C */ -FUNC_AFFINEINDEX_BC_1(U8, U8, 4) - -/***************************************************************/ -#undef FILTER_SHIFT -#define FILTER_SHIFT 3 -#undef FILTER_MASK -#define FILTER_MASK (((1 << 9) - 1) << 4) - -FUNC_AFFINEINDEX_BC_0(U8, S16, 4) -#ifdef __SUNPRO_C -#pragma pipeloop(0) -#endif /* __SUNPRO_C */ -FUNC_AFFINEINDEX_BC_1(U8, S16, 4) - -/***************************************************************/ -#undef MLIB_TYPE -#define MLIB_TYPE mlib_s16 - -/***************************************************************/ -#undef FILTER_SHIFT -#define FILTER_SHIFT 4 -#undef FILTER_MASK -#define FILTER_MASK (((1 << 8) - 1) << 4) - -FUNC_AFFINEINDEX_BC_0(S16, U8, 3) -#ifdef __SUNPRO_C -#pragma pipeloop(0) -#endif /* __SUNPRO_C */ -FUNC_AFFINEINDEX_BC_1(S16, U8, 3) - -/***************************************************************/ -#undef FILTER_SHIFT -#define FILTER_SHIFT 3 -#undef FILTER_MASK -#define FILTER_MASK (((1 << 9) - 1) << 4) - -FUNC_AFFINEINDEX_BC_0(S16, S16, 3) -#ifdef __SUNPRO_C -#pragma pipeloop(0) -#endif /* __SUNPRO_C */ -FUNC_AFFINEINDEX_BC_1(S16, S16, 3) - -/***************************************************************/ -#undef FILTER_SHIFT -#define FILTER_SHIFT 4 -#undef FILTER_MASK -#define FILTER_MASK (((1 << 8) - 1) << 4) - -FUNC_AFFINEINDEX_BC_0(S16, U8, 4) -#ifdef __SUNPRO_C -#pragma pipeloop(0) -#endif /* __SUNPRO_C */ -FUNC_AFFINEINDEX_BC_1(S16, U8, 4) - -/***************************************************************/ -#undef FILTER_SHIFT -#define FILTER_SHIFT 3 -#undef FILTER_MASK -#define FILTER_MASK (((1 << 9) - 1) << 4) - -FUNC_AFFINEINDEX_BC_0(S16, S16, 4) -#ifdef __SUNPRO_C -#pragma pipeloop(0) -#endif /* __SUNPRO_C */ -FUNC_AFFINEINDEX_BC_1(S16, S16, 4) - -/***************************************************************/ diff -r 3e22d8fd4912 -r acea5f7d354b jdk/src/java.desktop/share/native/libmlib_image/mlib_c_ImageAffineIndex_BL.c --- a/jdk/src/java.desktop/share/native/libmlib_image/mlib_c_ImageAffineIndex_BL.c Thu May 12 11:03:07 2016 -0700 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,280 +0,0 @@ -/* - * Copyright (c) 1998, 2003, Oracle and/or its affiliates. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 only, as - * published by the Free Software Foundation. Oracle designates this - * particular file as subject to the "Classpath" exception as provided - * by Oracle in the LICENSE file that accompanied this code. - * - * This code is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * version 2 for more details (a copy is included in the LICENSE file that - * accompanied this code). - * - * You should have received a copy of the GNU General Public License version - * 2 along with this work; if not, write to the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - * - * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA - * or visit www.oracle.com if you need additional information or have any - * questions. - */ - - -#include "mlib_image.h" -#include "mlib_ImageAffine.h" -#include "mlib_ImageColormap.h" - -/***************************************************************/ -#define MLIB_LIMIT 512 - -/***************************************************************/ -#define DTYPE MLIB_TYPE - -/***************************************************************/ -#define DECLAREVAR_IND() \ - DECLAREVAR0(); \ - mlib_s32 *warp_tbl = param -> warp_tbl; \ - mlib_s32 xSrc, ySrc; \ - mlib_s32 srcYStride = param -> srcYStride; \ - mlib_s32 max_xsize = param -> max_xsize; \ - MLIB_TYPE *sp0, *sp1; \ - MLIB_TYPE *dl; \ - mlib_d64 scale = 1.0 / 65536.0; \ - mlib_s32 i, size - -/***************************************************************/ -#define DECLARE_INTERNAL_VAR_3CH() \ - mlib_d64 fdx, fdy; \ - mlib_d64 a00_0, a01_0, a10_0, a11_0; \ - mlib_d64 a00_1, a01_1, a10_1, a11_1; \ - mlib_d64 a00_2, a01_2, a10_2, a11_2; \ - mlib_d64 pix0_0, pix1_0, res0; \ - mlib_d64 pix0_1, pix1_1, res1; \ - mlib_d64 pix0_2, pix1_2, res2 - -/***************************************************************/ -#define DECLARE_INTERNAL_VAR_4CH() \ - mlib_d64 fdx, fdy; \ - mlib_d64 a00_0, a01_0, a10_0, a11_0; \ - mlib_d64 a00_1, a01_1, a10_1, a11_1; \ - mlib_d64 a00_2, a01_2, a10_2, a11_2; \ - mlib_d64 a00_3, a01_3, a10_3, a11_3; \ - mlib_d64 pix0_0, pix1_0, res0; \ - mlib_d64 pix0_1, pix1_1, res1; \ - mlib_d64 pix0_2, pix1_2, res2; \ - mlib_d64 pix0_3, pix1_3, res3 - -/***************************************************************/ -#define GET_PIXELS_POINTERS() \ - fdx = (X & MLIB_MASK) * scale; \ - fdy = (Y & MLIB_MASK) * scale; \ - ySrc = MLIB_POINTER_SHIFT(Y); Y += dY; \ - xSrc = X >> MLIB_SHIFT; X += dX; \ - sp0 = MLIB_POINTER_GET(lineAddr, ySrc) + xSrc; \ - sp1 = (MLIB_TYPE *)((mlib_u8 *)sp0 + srcYStride) - -/***************************************************************/ -#define GET_COLOR_POINTERS(ind) \ - pcolor00 = (lut + sp0[0]*ind); \ - pcolor10 = (lut + sp1[0]*ind); \ - pcolor01 = (lut + sp0[1]*ind); \ - pcolor11 = (lut + sp1[1]*ind) - -/***************************************************************/ -#define COUNT_BL_U8(ind) \ - pix0_##ind = a00_##ind + fdy * (a10_##ind - a00_##ind); \ - pix1_##ind = a01_##ind + fdy * (a11_##ind - a01_##ind); \ - res##ind = pix0_##ind + fdx * (pix1_##ind - pix0_##ind) + 0.5 - -/***************************************************************/ -#define COUNT_BL_U8_3CH() \ - COUNT_BL_U8(0); \ - COUNT_BL_U8(1); \ - COUNT_BL_U8(2); - -/***************************************************************/ -#define COUNT_BL_U8_4CH() \ - COUNT_BL_U8_3CH(); \ - COUNT_BL_U8(3); - -/***************************************************************/ -#define COUNT_BL_S16(ind) \ - pix0_##ind = a00_##ind + fdy * (a10_##ind - a00_##ind); \ - pix1_##ind = a01_##ind + fdy * (a11_##ind - a01_##ind); \ - res##ind = pix0_##ind + fdx * (pix1_##ind - pix0_##ind) - -/***************************************************************/ -#define COUNT_BL_S16_3CH() \ - COUNT_BL_S16(0); \ - COUNT_BL_S16(1); \ - COUNT_BL_S16(2); - -/***************************************************************/ -#define COUNT_BL_S16_4CH() \ - COUNT_BL_S16_3CH(); \ - COUNT_BL_S16(3); - -/***************************************************************/ -#define LOAD(ind) \ - a00_##ind = pcolor00[ind]; \ - a01_##ind = pcolor01[ind]; \ - a10_##ind = pcolor10[ind]; \ - a11_##ind = pcolor11[ind] - -/***************************************************************/ -#define LOAD_3CH() \ - LOAD(0); \ - LOAD(1); \ - LOAD(2); - -/***************************************************************/ -#define LOAD_4CH() \ - LOAD_3CH(); \ - LOAD(3); - -/***************************************************************/ -#define STORE_INTO_INTERM_BUF_3CH(LTYPE) \ - dp[0] = (mlib_##LTYPE)res0; \ - dp[1] = (mlib_##LTYPE)res1; \ - dp[2] = (mlib_##LTYPE)res2 - -/***************************************************************/ -#define STORE_INTO_INTERM_BUF_4CH(LTYPE) \ - dp[0] = (mlib_##LTYPE)res0; \ - dp[1] = (mlib_##LTYPE)res1; \ - dp[2] = (mlib_##LTYPE)res2; \ - dp[3] = (mlib_##LTYPE)res3 - -/***************************************************************/ -#undef MLIB_TYPE -#define MLIB_TYPE mlib_u8 - -/***************************************************************/ -#define mlib_U8 mlib_u8 -#define mlib_S16 mlib_s16 - -/***************************************************************/ -#define FUNC_AFFINEINDEX_BL_0(ITYPE, LTYPE, NCHAN) \ - mlib_status mlib_ImageAffineIndex_##ITYPE##_##LTYPE##_##NCHAN##CH_BL(mlib_affine_param *param, \ - const void *colormap) \ - { \ - DECLAREVAR_IND(); \ - mlib_##LTYPE *dp, buff_lcl[NCHAN*MLIB_LIMIT], *pbuff = buff_lcl; \ - mlib_d64 *pcolor00, *pcolor10, *pcolor01, *pcolor11; \ - mlib_d64 *lut = mlib_ImageGetLutDoubleData(colormap); \ - \ - lut -= NCHAN*mlib_ImageGetLutOffset(colormap); \ - \ - if (max_xsize > MLIB_LIMIT) { \ - pbuff = mlib_malloc(NCHAN * sizeof(mlib_##LTYPE) * max_xsize); \ - if (pbuff == NULL) return MLIB_FAILURE; \ - } \ - \ - for (j = yStart; j <= yFinish; j++) { \ - DECLARE_INTERNAL_VAR_##NCHAN##CH(); \ - \ - NEW_LINE(1); \ - dp = pbuff; \ - \ - GET_PIXELS_POINTERS(); \ - GET_COLOR_POINTERS(NCHAN); \ - LOAD_##NCHAN##CH(); - - /* pragma pipeloop(0) must be here */ - -/***************************************************************/ -#define FUNC_AFFINEINDEX_BL_1(ITYPE, LTYPE, NCHAN) \ - for (i = 0; i < (xRight - xLeft); i++, dp += NCHAN) { \ - COUNT_BL_##LTYPE##_##NCHAN##CH(); \ - \ - GET_PIXELS_POINTERS(); \ - GET_COLOR_POINTERS(NCHAN); \ - LOAD_##NCHAN##CH(); \ - \ - STORE_INTO_INTERM_BUF_##NCHAN##CH(LTYPE); \ - } \ - \ - COUNT_BL_##LTYPE##_##NCHAN##CH(); \ - STORE_INTO_INTERM_BUF_##NCHAN##CH(LTYPE); \ - \ - mlib_ImageColorTrue2IndexLine_##LTYPE##_##ITYPE##_##NCHAN \ - (pbuff, dl, xRight - xLeft + 1, colormap); \ - } \ - \ - if (pbuff != buff_lcl) mlib_free(pbuff); \ - \ - return MLIB_SUCCESS; \ - } - -/***************************************************************/ -#undef MLIB_TYPE -#define MLIB_TYPE mlib_u8 - -FUNC_AFFINEINDEX_BL_0(U8, U8, 3) -#ifdef __SUNPRO_C -#pragma pipeloop(0) -#endif /* __SUNPRO_C */ -FUNC_AFFINEINDEX_BL_1(U8, U8, 3) - -FUNC_AFFINEINDEX_BL_0(U8, S16, 3) -#ifdef __SUNPRO_C -#pragma pipeloop(0) -#endif /* __SUNPRO_C */ -FUNC_AFFINEINDEX_BL_1(U8, S16, 3) - -FUNC_AFFINEINDEX_BL_0(U8, U8, 4) -#ifdef __SUNPRO_C -#pragma pipeloop(0) -#endif /* __SUNPRO_C */ -FUNC_AFFINEINDEX_BL_1(U8, U8, 4) - -FUNC_AFFINEINDEX_BL_0(U8, S16, 4) -#ifdef __SUNPRO_C -#pragma pipeloop(0) -#endif /* __SUNPRO_C */ -FUNC_AFFINEINDEX_BL_1(U8, S16, 4) - -/***************************************************************/ -#undef MLIB_TYPE -#define MLIB_TYPE mlib_s16 - -FUNC_AFFINEINDEX_BL_0(S16, U8, 3) -#ifdef __SUNPRO_C -#pragma pipeloop(0) -#endif /* __SUNPRO_C */ -FUNC_AFFINEINDEX_BL_1(S16, U8, 3) - -FUNC_AFFINEINDEX_BL_0(S16, S16, 3) -#ifdef __SUNPRO_C -#pragma pipeloop(0) -#endif /* __SUNPRO_C */ -FUNC_AFFINEINDEX_BL_1(S16, S16, 3) - -FUNC_AFFINEINDEX_BL_0(S16, U8, 4) -#ifdef __SUNPRO_C -#pragma pipeloop(0) -#endif /* __SUNPRO_C */ -FUNC_AFFINEINDEX_BL_1(S16, U8, 4) - -FUNC_AFFINEINDEX_BL_0(S16, S16, 4) -#ifdef __SUNPRO_C -#pragma pipeloop(0) -#endif /* __SUNPRO_C */ -FUNC_AFFINEINDEX_BL_1(S16, S16, 4) - -/***************************************************************/ -const type_affine_i_fun mlib_AffineFunArr_bl_i[] = { - mlib_ImageAffineIndex_U8_U8_3CH_BL, - mlib_ImageAffineIndex_U8_U8_4CH_BL, - mlib_ImageAffineIndex_S16_U8_3CH_BL, - mlib_ImageAffineIndex_S16_U8_4CH_BL, - mlib_ImageAffineIndex_U8_S16_3CH_BL, - mlib_ImageAffineIndex_U8_S16_4CH_BL, - mlib_ImageAffineIndex_S16_S16_3CH_BL, - mlib_ImageAffineIndex_S16_S16_4CH_BL -}; -/***************************************************************/ diff -r 3e22d8fd4912 -r acea5f7d354b jdk/src/java.desktop/share/native/libmlib_image/mlib_c_ImageBlendTable.c --- a/jdk/src/java.desktop/share/native/libmlib_image/mlib_c_ImageBlendTable.c Thu May 12 11:03:07 2016 -0700 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,263 +0,0 @@ -/* - * Copyright (c) 1998, 2003, Oracle and/or its affiliates. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 only, as - * published by the Free Software Foundation. Oracle designates this - * particular file as subject to the "Classpath" exception as provided - * by Oracle in the LICENSE file that accompanied this code. - * - * This code is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * version 2 for more details (a copy is included in the LICENSE file that - * accompanied this code). - * - * You should have received a copy of the GNU General Public License version - * 2 along with this work; if not, write to the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - * - * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA - * or visit www.oracle.com if you need additional information or have any - * questions. - */ - -#include -#include "mlib_c_ImageBlendTable.h" - -const mlib_f32 mlib_c_blend_u8[] = { - 0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, - 8.0f, 9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, - 16.0f, 17.0f, 18.0f, 19.0f, 20.0f, 21.0f, 22.0f, 23.0f, - 24.0f, 25.0f, 26.0f, 27.0f, 28.0f, 29.0f, 30.0f, 31.0f, - 32.0f, 33.0f, 34.0f, 35.0f, 36.0f, 37.0f, 38.0f, 39.0f, - 40.0f, 41.0f, 42.0f, 43.0f, 44.0f, 45.0f, 46.0f, 47.0f, - 48.0f, 49.0f, 50.0f, 51.0f, 52.0f, 53.0f, 54.0f, 55.0f, - 56.0f, 57.0f, 58.0f, 59.0f, 60.0f, 61.0f, 62.0f, 63.0f, - 64.0f, 65.0f, 66.0f, 67.0f, 68.0f, 69.0f, 70.0f, 71.0f, - 72.0f, 73.0f, 74.0f, 75.0f, 76.0f, 77.0f, 78.0f, 79.0f, - 80.0f, 81.0f, 82.0f, 83.0f, 84.0f, 85.0f, 86.0f, 87.0f, - 88.0f, 89.0f, 90.0f, 91.0f, 92.0f, 93.0f, 94.0f, 95.0f, - 96.0f, 97.0f, 98.0f, 99.0f, 100.0f, 101.0f, 102.0f, 103.0f, - 104.0f, 105.0f, 106.0f, 107.0f, 108.0f, 109.0f, 110.0f, 111.0f, - 112.0f, 113.0f, 114.0f, 115.0f, 116.0f, 117.0f, 118.0f, 119.0f, - 120.0f, 121.0f, 122.0f, 123.0f, 124.0f, 125.0f, 126.0f, 127.0f, - 128.0f, 129.0f, 130.0f, 131.0f, 132.0f, 133.0f, 134.0f, 135.0f, - 136.0f, 137.0f, 138.0f, 139.0f, 140.0f, 141.0f, 142.0f, 143.0f, - 144.0f, 145.0f, 146.0f, 147.0f, 148.0f, 149.0f, 150.0f, 151.0f, - 152.0f, 153.0f, 154.0f, 155.0f, 156.0f, 157.0f, 158.0f, 159.0f, - 160.0f, 161.0f, 162.0f, 163.0f, 164.0f, 165.0f, 166.0f, 167.0f, - 168.0f, 169.0f, 170.0f, 171.0f, 172.0f, 173.0f, 174.0f, 175.0f, - 176.0f, 177.0f, 178.0f, 179.0f, 180.0f, 181.0f, 182.0f, 183.0f, - 184.0f, 185.0f, 186.0f, 187.0f, 188.0f, 189.0f, 190.0f, 191.0f, - 192.0f, 193.0f, 194.0f, 195.0f, 196.0f, 197.0f, 198.0f, 199.0f, - 200.0f, 201.0f, 202.0f, 203.0f, 204.0f, 205.0f, 206.0f, 207.0f, - 208.0f, 209.0f, 210.0f, 211.0f, 212.0f, 213.0f, 214.0f, 215.0f, - 216.0f, 217.0f, 218.0f, 219.0f, 220.0f, 221.0f, 222.0f, 223.0f, - 224.0f, 225.0f, 226.0f, 227.0f, 228.0f, 229.0f, 230.0f, 231.0f, - 232.0f, 233.0f, 234.0f, 235.0f, 236.0f, 237.0f, 238.0f, 239.0f, - 240.0f, 241.0f, 242.0f, 243.0f, 244.0f, 245.0f, 246.0f, 247.0f, - 248.0f, 249.0f, 250.0f, 251.0f, 252.0f, 253.0f, 254.0f, 255.0f, - 256.0f -}; - -const mlib_f32 mlib_c_blend_Q8[] = { - 0.00000000f, 0.00390625f, 0.00781250f, 0.01171875f, - 0.01562500f, 0.01953125f, 0.02343750f, 0.02734375f, - 0.03125000f, 0.03515625f, 0.03906250f, 0.04296875f, - 0.04687500f, 0.05078125f, 0.05468750f, 0.05859375f, - 0.06250000f, 0.06640625f, 0.07031250f, 0.07421875f, - 0.07812500f, 0.08203125f, 0.08593750f, 0.08984375f, - 0.09375000f, 0.09765625f, 0.10156250f, 0.10546875f, - 0.10937500f, 0.11328125f, 0.11718750f, 0.12109375f, - 0.12500000f, 0.12890625f, 0.13281250f, 0.13671875f, - 0.14062500f, 0.14453125f, 0.14843750f, 0.15234375f, - 0.15625000f, 0.16015625f, 0.16406250f, 0.16796875f, - 0.17187500f, 0.17578125f, 0.17968750f, 0.18359375f, - 0.18750000f, 0.19140625f, 0.19531250f, 0.19921875f, - 0.20312500f, 0.20703125f, 0.21093750f, 0.21484375f, - 0.21875000f, 0.22265625f, 0.22656250f, 0.23046875f, - 0.23437500f, 0.23828125f, 0.24218750f, 0.24609375f, - 0.25000000f, 0.25390625f, 0.25781250f, 0.26171875f, - 0.26562500f, 0.26953125f, 0.27343750f, 0.27734375f, - 0.28125000f, 0.28515625f, 0.28906250f, 0.29296875f, - 0.29687500f, 0.30078125f, 0.30468750f, 0.30859375f, - 0.31250000f, 0.31640625f, 0.32031250f, 0.32421875f, - 0.32812500f, 0.33203125f, 0.33593750f, 0.33984375f, - 0.34375000f, 0.34765625f, 0.35156250f, 0.35546875f, - 0.35937500f, 0.36328125f, 0.36718750f, 0.37109375f, - 0.37500000f, 0.37890625f, 0.38281250f, 0.38671875f, - 0.39062500f, 0.39453125f, 0.39843750f, 0.40234375f, - 0.40625000f, 0.41015625f, 0.41406250f, 0.41796875f, - 0.42187500f, 0.42578125f, 0.42968750f, 0.43359375f, - 0.43750000f, 0.44140625f, 0.44531250f, 0.44921875f, - 0.45312500f, 0.45703125f, 0.46093750f, 0.46484375f, - 0.46875000f, 0.47265625f, 0.47656250f, 0.48046875f, - 0.48437500f, 0.48828125f, 0.49218750f, 0.49609375f, - 0.50000000f, 0.50390625f, 0.50781250f, 0.51171875f, - 0.51562500f, 0.51953125f, 0.52343750f, 0.52734375f, - 0.53125000f, 0.53515625f, 0.53906250f, 0.54296875f, - 0.54687500f, 0.55078125f, 0.55468750f, 0.55859375f, - 0.56250000f, 0.56640625f, 0.57031250f, 0.57421875f, - 0.57812500f, 0.58203125f, 0.58593750f, 0.58984375f, - 0.59375000f, 0.59765625f, 0.60156250f, 0.60546875f, - 0.60937500f, 0.61328125f, 0.61718750f, 0.62109375f, - 0.62500000f, 0.62890625f, 0.63281250f, 0.63671875f, - 0.64062500f, 0.64453125f, 0.64843750f, 0.65234375f, - 0.65625000f, 0.66015625f, 0.66406250f, 0.66796875f, - 0.67187500f, 0.67578125f, 0.67968750f, 0.68359375f, - 0.68750000f, 0.69140625f, 0.69531250f, 0.69921875f, - 0.70312500f, 0.70703125f, 0.71093750f, 0.71484375f, - 0.71875000f, 0.72265625f, 0.72656250f, 0.73046875f, - 0.73437500f, 0.73828125f, 0.74218750f, 0.74609375f, - 0.75000000f, 0.75390625f, 0.75781250f, 0.76171875f, - 0.76562500f, 0.76953125f, 0.77343750f, 0.77734375f, - 0.78125000f, 0.78515625f, 0.78906250f, 0.79296875f, - 0.79687500f, 0.80078125f, 0.80468750f, 0.80859375f, - 0.81250000f, 0.81640625f, 0.82031250f, 0.82421875f, - 0.82812500f, 0.83203125f, 0.83593750f, 0.83984375f, - 0.84375000f, 0.84765625f, 0.85156250f, 0.85546875f, - 0.85937500f, 0.86328125f, 0.86718750f, 0.87109375f, - 0.87500000f, 0.87890625f, 0.88281250f, 0.88671875f, - 0.89062500f, 0.89453125f, 0.89843750f, 0.90234375f, - 0.90625000f, 0.91015625f, 0.91406250f, 0.91796875f, - 0.92187500f, 0.92578125f, 0.92968750f, 0.93359375f, - 0.93750000f, 0.94140625f, 0.94531250f, 0.94921875f, - 0.95312500f, 0.95703125f, 0.96093750f, 0.96484375f, - 0.96875000f, 0.97265625f, 0.97656250f, 0.98046875f, - 0.98437500f, 0.98828125f, 0.99218750f, 0.99609375f, - 1.00000000f, 1.00390625f, 1.00781250f, 1.01171875f, - 1.01562500f, 1.01953125f, 1.02343750f, 1.02734375f, - 1.03125000f, 1.03515625f, 1.03906250f, 1.04296875f, - 1.04687500f, 1.05078125f, 1.05468750f, 1.05859375f, - 1.06250000f, 1.06640625f, 1.07031250f, 1.07421875f, - 1.07812500f, 1.08203125f, 1.08593750f, 1.08984375f, - 1.09375000f, 1.09765625f, 1.10156250f, 1.10546875f, - 1.10937500f, 1.11328125f, 1.11718750f, 1.12109375f, - 1.12500000f, 1.12890625f, 1.13281250f, 1.13671875f, - 1.14062500f, 1.14453125f, 1.14843750f, 1.15234375f, - 1.15625000f, 1.16015625f, 1.16406250f, 1.16796875f, - 1.17187500f, 1.17578125f, 1.17968750f, 1.18359375f, - 1.18750000f, 1.19140625f, 1.19531250f, 1.19921875f, - 1.20312500f, 1.20703125f, 1.21093750f, 1.21484375f, - 1.21875000f, 1.22265625f, 1.22656250f, 1.23046875f, - 1.23437500f, 1.23828125f, 1.24218750f, 1.24609375f, - 1.25000000f, 1.25390625f, 1.25781250f, 1.26171875f, - 1.26562500f, 1.26953125f, 1.27343750f, 1.27734375f, - 1.28125000f, 1.28515625f, 1.28906250f, 1.29296875f, - 1.29687500f, 1.30078125f, 1.30468750f, 1.30859375f, - 1.31250000f, 1.31640625f, 1.32031250f, 1.32421875f, - 1.32812500f, 1.33203125f, 1.33593750f, 1.33984375f, - 1.34375000f, 1.34765625f, 1.35156250f, 1.35546875f, - 1.35937500f, 1.36328125f, 1.36718750f, 1.37109375f, - 1.37500000f, 1.37890625f, 1.38281250f, 1.38671875f, - 1.39062500f, 1.39453125f, 1.39843750f, 1.40234375f, - 1.40625000f, 1.41015625f, 1.41406250f, 1.41796875f, - 1.42187500f, 1.42578125f, 1.42968750f, 1.43359375f, - 1.43750000f, 1.44140625f, 1.44531250f, 1.44921875f, - 1.45312500f, 1.45703125f, 1.46093750f, 1.46484375f, - 1.46875000f, 1.47265625f, 1.47656250f, 1.48046875f, - 1.48437500f, 1.48828125f, 1.49218750f, 1.49609375f, - 1.50000000f, 1.50390625f, 1.50781250f, 1.51171875f, - 1.51562500f, 1.51953125f, 1.52343750f, 1.52734375f, - 1.53125000f, 1.53515625f, 1.53906250f, 1.54296875f, - 1.54687500f, 1.55078125f, 1.55468750f, 1.55859375f, - 1.56250000f, 1.56640625f, 1.57031250f, 1.57421875f, - 1.57812500f, 1.58203125f, 1.58593750f, 1.58984375f, - 1.59375000f, 1.59765625f, 1.60156250f, 1.60546875f, - 1.60937500f, 1.61328125f, 1.61718750f, 1.62109375f, - 1.62500000f, 1.62890625f, 1.63281250f, 1.63671875f, - 1.64062500f, 1.64453125f, 1.64843750f, 1.65234375f, - 1.65625000f, 1.66015625f, 1.66406250f, 1.66796875f, - 1.67187500f, 1.67578125f, 1.67968750f, 1.68359375f, - 1.68750000f, 1.69140625f, 1.69531250f, 1.69921875f, - 1.70312500f, 1.70703125f, 1.71093750f, 1.71484375f, - 1.71875000f, 1.72265625f, 1.72656250f, 1.73046875f, - 1.73437500f, 1.73828125f, 1.74218750f, 1.74609375f, - 1.75000000f, 1.75390625f, 1.75781250f, 1.76171875f, - 1.76562500f, 1.76953125f, 1.77343750f, 1.77734375f, - 1.78125000f, 1.78515625f, 1.78906250f, 1.79296875f, - 1.79687500f, 1.80078125f, 1.80468750f, 1.80859375f, - 1.81250000f, 1.81640625f, 1.82031250f, 1.82421875f, - 1.82812500f, 1.83203125f, 1.83593750f, 1.83984375f, - 1.84375000f, 1.84765625f, 1.85156250f, 1.85546875f, - 1.85937500f, 1.86328125f, 1.86718750f, 1.87109375f, - 1.87500000f, 1.87890625f, 1.88281250f, 1.88671875f, - 1.89062500f, 1.89453125f, 1.89843750f, 1.90234375f, - 1.90625000f, 1.91015625f, 1.91406250f, 1.91796875f, - 1.92187500f, 1.92578125f, 1.92968750f, 1.93359375f, - 1.93750000f, 1.94140625f, 1.94531250f, 1.94921875f, - 1.95312500f, 1.95703125f, 1.96093750f, 1.96484375f, - 1.96875000f, 1.97265625f, 1.97656250f, 1.98046875f, - 1.98437500f, 1.98828125f, 1.99218750f, 1.99609375f, - 2.00000000f -}; - -const mlib_f32 mlib_c_blend_u8_sat[] = { - 0.0f, 8388608.0f, 16777216.0f, 25165824.0f, - 33554432.0f, 41943040.0f, 50331648.0f, 58720256.0f, - 67108864.0f, 75497472.0f, 83886080.0f, 92274688.0f, - 100663296.0f, 109051904.0f, 117440512.0f, 125829120.0f, - 134217728.0f, 142606336.0f, 150994944.0f, 159383552.0f, - 167772160.0f, 176160768.0f, 184549376.0f, 192937984.0f, - 201326592.0f, 209715200.0f, 218103808.0f, 226492416.0f, - 234881024.0f, 243269632.0f, 251658240.0f, 260046848.0f, - 268435456.0f, 276824064.0f, 285212672.0f, 293601280.0f, - 301989888.0f, 310378496.0f, 318767104.0f, 327155712.0f, - 335544320.0f, 343932928.0f, 352321536.0f, 360710144.0f, - 369098752.0f, 377487360.0f, 385875968.0f, 394264576.0f, - 402653184.0f, 411041792.0f, 419430400.0f, 427819008.0f, - 436207616.0f, 444596224.0f, 452984832.0f, 461373440.0f, - 469762048.0f, 478150656.0f, 486539264.0f, 494927872.0f, - 503316480.0f, 511705088.0f, 520093696.0f, 528482304.0f, - 536870912.0f, 545259520.0f, 553648128.0f, 562036736.0f, - 570425344.0f, 578813952.0f, 587202560.0f, 595591168.0f, - 603979776.0f, 612368384.0f, 620756992.0f, 629145600.0f, - 637534208.0f, 645922816.0f, 654311424.0f, 662700032.0f, - 671088640.0f, 679477248.0f, 687865856.0f, 696254464.0f, - 704643072.0f, 713031680.0f, 721420288.0f, 729808896.0f, - 738197504.0f, 746586112.0f, 754974720.0f, 763363328.0f, - 771751936.0f, 780140544.0f, 788529152.0f, 796917760.0f, - 805306368.0f, 813694976.0f, 822083584.0f, 830472192.0f, - 838860800.0f, 847249408.0f, 855638016.0f, 864026624.0f, - 872415232.0f, 880803840.0f, 889192448.0f, 897581056.0f, - 905969664.0f, 914358272.0f, 922746880.0f, 931135488.0f, - 939524096.0f, 947912704.0f, 956301312.0f, 964689920.0f, - 973078528.0f, 981467136.0f, 989855744.0f, 998244352.0f, - 1006632960.0f, 1015021568.0f, 1023410176.0f, 1031798784.0f, - 1040187392.0f, 1048576000.0f, 1056964608.0f, 1065353216.0f, - 1073741824.0f, 1082130432.0f, 1090519040.0f, 1098907648.0f, - 1107296256.0f, 1115684864.0f, 1124073472.0f, 1132462080.0f, - 1140850688.0f, 1149239296.0f, 1157627904.0f, 1166016512.0f, - 1174405120.0f, 1182793728.0f, 1191182336.0f, 1199570944.0f, - 1207959552.0f, 1216348160.0f, 1224736768.0f, 1233125376.0f, - 1241513984.0f, 1249902592.0f, 1258291200.0f, 1266679808.0f, - 1275068416.0f, 1283457024.0f, 1291845632.0f, 1300234240.0f, - 1308622848.0f, 1317011456.0f, 1325400064.0f, 1333788672.0f, - 1342177280.0f, 1350565888.0f, 1358954496.0f, 1367343104.0f, - 1375731712.0f, 1384120320.0f, 1392508928.0f, 1400897536.0f, - 1409286144.0f, 1417674752.0f, 1426063360.0f, 1434451968.0f, - 1442840576.0f, 1451229184.0f, 1459617792.0f, 1468006400.0f, - 1476395008.0f, 1484783616.0f, 1493172224.0f, 1501560832.0f, - 1509949440.0f, 1518338048.0f, 1526726656.0f, 1535115264.0f, - 1543503872.0f, 1551892480.0f, 1560281088.0f, 1568669696.0f, - 1577058304.0f, 1585446912.0f, 1593835520.0f, 1602224128.0f, - 1610612736.0f, 1619001344.0f, 1627389952.0f, 1635778560.0f, - 1644167168.0f, 1652555776.0f, 1660944384.0f, 1669332992.0f, - 1677721600.0f, 1686110208.0f, 1694498816.0f, 1702887424.0f, - 1711276032.0f, 1719664640.0f, 1728053248.0f, 1736441856.0f, - 1744830464.0f, 1753219072.0f, 1761607680.0f, 1769996288.0f, - 1778384896.0f, 1786773504.0f, 1795162112.0f, 1803550720.0f, - 1811939328.0f, 1820327936.0f, 1828716544.0f, 1837105152.0f, - 1845493760.0f, 1853882368.0f, 1862270976.0f, 1870659584.0f, - 1879048192.0f, 1887436800.0f, 1895825408.0f, 1904214016.0f, - 1912602624.0f, 1920991232.0f, 1929379840.0f, 1937768448.0f, - 1946157056.0f, 1954545664.0f, 1962934272.0f, 1971322880.0f, - 1979711488.0f, 1988100096.0f, 1996488704.0f, 2004877312.0f, - 2013265920.0f, 2021654528.0f, 2030043136.0f, 2038431744.0f, - 2046820352.0f, 2055208960.0f, 2063597568.0f, 2071986176.0f, - 2080374784.0f, 2088763392.0f, 2097152000.0f, 2105540608.0f, - 2113929216.0f, 2122317824.0f, 2130706432.0f, 2139095040.0f, - 2147483648.0f -}; diff -r 3e22d8fd4912 -r acea5f7d354b jdk/src/java.desktop/share/native/libmlib_image/mlib_c_ImageBlendTable.h --- a/jdk/src/java.desktop/share/native/libmlib_image/mlib_c_ImageBlendTable.h Thu May 12 11:03:07 2016 -0700 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,41 +0,0 @@ -/* - * Copyright (c) 1998, 2003, Oracle and/or its affiliates. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 only, as - * published by the Free Software Foundation. Oracle designates this - * particular file as subject to the "Classpath" exception as provided - * by Oracle in the LICENSE file that accompanied this code. - * - * This code is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * version 2 for more details (a copy is included in the LICENSE file that - * accompanied this code). - * - * You should have received a copy of the GNU General Public License version - * 2 along with this work; if not, write to the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - * - * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA - * or visit www.oracle.com if you need additional information or have any - * questions. - */ - -/* - * These tables are used by C versions of the - * mlib_ImageBlend_... functions. - */ - -#ifndef MLIB_C_IMAGE_BLEND_TABLE_H -#define MLIB_C_IMAGE_BLEND_TABLE_H - -#include "mlib_image.h" - -extern const mlib_f32 mlib_c_blend_u8[]; -extern const mlib_f32 mlib_U82F32[]; -extern const mlib_f32 mlib_c_blend_Q8[]; -extern const mlib_f32 mlib_c_blend_u8_sat[]; - -#endif /* MLIB_C_IMAGEF_BLEND_TABLE_H */ diff -r 3e22d8fd4912 -r acea5f7d354b jdk/src/java.desktop/share/native/libmlib_image/mlib_c_ImageConv.h --- a/jdk/src/java.desktop/share/native/libmlib_image/mlib_c_ImageConv.h Thu May 12 11:03:07 2016 -0700 +++ b/jdk/src/java.desktop/share/native/libmlib_image/mlib_c_ImageConv.h Fri May 13 11:31:05 2016 +0300 @@ -41,118 +41,6 @@ } #endif /* FREE_AND_RETURN_STATUS */ -mlib_status mlib_c_conv2x2ext_s16(mlib_image *dst, - const mlib_image *src, - mlib_s32 dx_l, - mlib_s32 dx_r, - mlib_s32 dy_t, - mlib_s32 dy_b, - const mlib_s32 *kern, - mlib_s32 scale, - mlib_s32 cmask); - -mlib_status mlib_c_conv2x2ext_u16(mlib_image *dst, - const mlib_image *src, - mlib_s32 dx_l, - mlib_s32 dx_r, - mlib_s32 dy_t, - mlib_s32 dy_b, - const mlib_s32 *kern, - mlib_s32 scale, - mlib_s32 cmask); - -mlib_status mlib_c_conv2x2ext_u8(mlib_image *dst, - const mlib_image *src, - mlib_s32 dx_l, - mlib_s32 dx_r, - mlib_s32 dy_t, - mlib_s32 dy_b, - const mlib_s32 *kern, - mlib_s32 scale, - mlib_s32 cmask); - -mlib_status mlib_c_conv2x2nw_s16(mlib_image *dst, - const mlib_image *src, - const mlib_s32 *kern, - mlib_s32 scale, - mlib_s32 cmask); - -mlib_status mlib_c_conv2x2nw_u16(mlib_image *dst, - const mlib_image *src, - const mlib_s32 *kern, - mlib_s32 scale, - mlib_s32 cmask); - -mlib_status mlib_c_conv2x2nw_u8(mlib_image *dst, - const mlib_image *src, - const mlib_s32 *kern, - mlib_s32 scale, - mlib_s32 cmask); - -mlib_status mlib_c_conv3x3ext_u8(mlib_image *dst, - const mlib_image *src, - mlib_s32 dx_l, - mlib_s32 dx_r, - mlib_s32 dy_t, - mlib_s32 dy_b, - const mlib_s32 *kern, - mlib_s32 scale, - mlib_s32 cmask); - -mlib_status mlib_c_conv3x3nw_u8(mlib_image *dst, - const mlib_image *src, - const mlib_s32 *kern, - mlib_s32 scale, - mlib_s32 cmask); - -mlib_status mlib_c_conv4x4ext_u8(mlib_image *dst, - const mlib_image *src, - mlib_s32 dx_l, - mlib_s32 dx_r, - mlib_s32 dy_t, - mlib_s32 dy_b, - const mlib_s32 *kern, - mlib_s32 scale, - mlib_s32 cmask); - -mlib_status mlib_c_conv4x4nw_u8(mlib_image *dst, - const mlib_image *src, - const mlib_s32 *kern, - mlib_s32 scale, - mlib_s32 cmask); - -mlib_status mlib_c_conv5x5ext_u8(mlib_image *dst, - const mlib_image *src, - mlib_s32 dx_l, - mlib_s32 dx_r, - mlib_s32 dy_t, - mlib_s32 dy_b, - const mlib_s32 *kern, - mlib_s32 scale, - mlib_s32 cmask); - -mlib_status mlib_c_conv5x5nw_u8(mlib_image *dst, - const mlib_image *src, - const mlib_s32 *kern, - mlib_s32 scale, - mlib_s32 cmask); - -mlib_status mlib_c_conv7x7ext_u8(mlib_image *dst, - const mlib_image *src, - mlib_s32 dx_l, - mlib_s32 dx_r, - mlib_s32 dy_t, - mlib_s32 dy_b, - const mlib_s32 *kern, - mlib_s32 scale, - mlib_s32 cmask); - -mlib_status mlib_c_conv7x7nw_u8(mlib_image *dst, - const mlib_image *src, - const mlib_s32 *kern, - mlib_s32 scale, - mlib_s32 cmask); - mlib_status mlib_c_convMxNnw_u8(mlib_image *dst, const mlib_image *src, const mlib_s32 *kernel, @@ -177,102 +65,6 @@ #if ! defined ( __sparc ) /* for x86, using integer multiplies is faster */ -mlib_status mlib_i_conv3x3ext_s16(mlib_image *dst, - const mlib_image *src, - mlib_s32 dx_l, - mlib_s32 dx_r, - mlib_s32 dy_t, - mlib_s32 dy_b, - const mlib_s32 *kern, - mlib_s32 scale, - mlib_s32 cmask); - -mlib_status mlib_i_conv3x3ext_u16(mlib_image *dst, - const mlib_image *src, - mlib_s32 dx_l, - mlib_s32 dx_r, - mlib_s32 dy_t, - mlib_s32 dy_b, - const mlib_s32 *kern, - mlib_s32 scale, - mlib_s32 cmask); - -mlib_status mlib_i_conv3x3ext_u8(mlib_image *dst, - const mlib_image *src, - mlib_s32 dx_l, - mlib_s32 dx_r, - mlib_s32 dy_t, - mlib_s32 dy_b, - const mlib_s32 *kern, - mlib_s32 scale, - mlib_s32 cmask); - -mlib_status mlib_i_conv3x3nw_s16(mlib_image *dst, - const mlib_image *src, - const mlib_s32 *kern, - mlib_s32 scale, - mlib_s32 cmask); - -mlib_status mlib_i_conv3x3nw_u16(mlib_image *dst, - const mlib_image *src, - const mlib_s32 *kern, - mlib_s32 scale, - mlib_s32 cmask); - -mlib_status mlib_i_conv3x3nw_u8(mlib_image *dst, - const mlib_image *src, - const mlib_s32 *kern, - mlib_s32 scale, - mlib_s32 cmask); - -mlib_status mlib_i_conv5x5ext_s16(mlib_image *dst, - const mlib_image *src, - mlib_s32 dx_l, - mlib_s32 dx_r, - mlib_s32 dy_t, - mlib_s32 dy_b, - const mlib_s32 *kern, - mlib_s32 scale, - mlib_s32 cmask); - -mlib_status mlib_i_conv5x5ext_u16(mlib_image *dst, - const mlib_image *src, - mlib_s32 dx_l, - mlib_s32 dx_r, - mlib_s32 dy_t, - mlib_s32 dy_b, - const mlib_s32 *kern, - mlib_s32 scale, - mlib_s32 cmask); - -mlib_status mlib_i_conv5x5ext_u8(mlib_image *dst, - const mlib_image *src, - mlib_s32 dx_l, - mlib_s32 dx_r, - mlib_s32 dy_t, - mlib_s32 dy_b, - const mlib_s32 *kern, - mlib_s32 scale, - mlib_s32 cmask); - -mlib_status mlib_i_conv5x5nw_s16(mlib_image *dst, - const mlib_image *src, - const mlib_s32 *kern, - mlib_s32 scale, - mlib_s32 cmask); - -mlib_status mlib_i_conv5x5nw_u16(mlib_image *dst, - const mlib_image *src, - const mlib_s32 *kern, - mlib_s32 scale, - mlib_s32 cmask); - -mlib_status mlib_i_conv5x5nw_u8(mlib_image *dst, - const mlib_image *src, - const mlib_s32 *kern, - mlib_s32 scale, - mlib_s32 cmask); - mlib_status mlib_i_convMxNnw_s16(mlib_image *dst, const mlib_image *src, const mlib_s32 *kernel, diff -r 3e22d8fd4912 -r acea5f7d354b jdk/src/java.desktop/share/native/libmlib_image/mlib_c_ImageConv_f.c --- a/jdk/src/java.desktop/share/native/libmlib_image/mlib_c_ImageConv_f.c Thu May 12 11:03:07 2016 -0700 +++ b/jdk/src/java.desktop/share/native/libmlib_image/mlib_c_ImageConv_f.c Fri May 13 11:31:05 2016 +0300 @@ -29,34 +29,6 @@ #include "mlib_c_ImageConv.h" /***************************************************************/ -#define MLIB_PARAMS_CONV_NW \ - mlib_image *dst, \ - const mlib_image *src, \ - const mlib_s32 *kern, \ - mlib_s32 scale, \ - mlib_s32 cmask - -/***************************************************************/ -#define MLIB_CALL_PARAMS_CONV_NW \ - dst, src, kern, scale, cmask - -/***************************************************************/ -#define MLIB_PARAMS_CONV_EXT \ - mlib_image *dst, \ - const mlib_image *src, \ - mlib_s32 dx_l, \ - mlib_s32 dx_r, \ - mlib_s32 dy_t, \ - mlib_s32 dy_b, \ - const mlib_s32 *kern, \ - mlib_s32 scale, \ - mlib_s32 cmask - -/***************************************************************/ -#define MLIB_CALL_PARAMS_CONV_EXT \ - dst, src, dx_l, dx_r, dy_t, dy_b, kern, scale, cmask - -/***************************************************************/ #define MLIB_PARAMS_CONV_MN_NW \ mlib_image *dst, \ const mlib_image *src, \ @@ -92,52 +64,6 @@ /***************************************************************/ -mlib_status mlib_conv2x2nw_u8(MLIB_PARAMS_CONV_NW) -{ - return mlib_c_conv2x2nw_u8(MLIB_CALL_PARAMS_CONV_NW); -} - -/***************************************************************/ -mlib_status mlib_conv3x3nw_u8(MLIB_PARAMS_CONV_NW) -{ -#ifdef __sparc - return mlib_c_conv3x3nw_u8(MLIB_CALL_PARAMS_CONV_NW); -#else - - if (mlib_ImageConvVersion(3, 3, scale, MLIB_BYTE) == 0) - return mlib_c_conv3x3nw_u8(MLIB_CALL_PARAMS_CONV_NW); - else - return mlib_i_conv3x3nw_u8(MLIB_CALL_PARAMS_CONV_NW); -#endif /* __sparc */ -} - -/***************************************************************/ -mlib_status mlib_conv4x4nw_u8(MLIB_PARAMS_CONV_NW) -{ - return mlib_c_conv4x4nw_u8(MLIB_CALL_PARAMS_CONV_NW); -} - -/***************************************************************/ -mlib_status mlib_conv5x5nw_u8(MLIB_PARAMS_CONV_NW) -{ -#ifdef __sparc - return mlib_c_conv5x5nw_u8(MLIB_CALL_PARAMS_CONV_NW); -#else - - if (mlib_ImageConvVersion(5, 5, scale, MLIB_BYTE) == 0) - return mlib_c_conv5x5nw_u8(MLIB_CALL_PARAMS_CONV_NW); - else - return mlib_i_conv5x5nw_u8(MLIB_CALL_PARAMS_CONV_NW); -#endif /* __sparc */ -} - -/***************************************************************/ -mlib_status mlib_conv7x7nw_u8(MLIB_PARAMS_CONV_NW) -{ - return mlib_c_conv7x7nw_u8(MLIB_CALL_PARAMS_CONV_NW); -} - -/***************************************************************/ mlib_status mlib_convMxNnw_u8(MLIB_PARAMS_CONV_MN_NW) { #ifdef __sparc @@ -152,52 +78,6 @@ } /***************************************************************/ -mlib_status mlib_conv2x2ext_u8(MLIB_PARAMS_CONV_EXT) -{ - return mlib_c_conv2x2ext_u8(MLIB_CALL_PARAMS_CONV_EXT); -} - -/***************************************************************/ -mlib_status mlib_conv3x3ext_u8(MLIB_PARAMS_CONV_EXT) -{ -#ifdef __sparc - return mlib_c_conv3x3ext_u8(MLIB_CALL_PARAMS_CONV_EXT); -#else - - if (mlib_ImageConvVersion(3, 3, scale, MLIB_BYTE) == 0) - return mlib_c_conv3x3ext_u8(MLIB_CALL_PARAMS_CONV_EXT); - else - return mlib_i_conv3x3ext_u8(MLIB_CALL_PARAMS_CONV_EXT); -#endif /* __sparc */ -} - -/***************************************************************/ -mlib_status mlib_conv4x4ext_u8(MLIB_PARAMS_CONV_EXT) -{ - return mlib_c_conv4x4ext_u8(MLIB_CALL_PARAMS_CONV_EXT); -} - -/***************************************************************/ -mlib_status mlib_conv5x5ext_u8(MLIB_PARAMS_CONV_EXT) -{ -#ifdef __sparc - return mlib_c_conv5x5ext_u8(MLIB_CALL_PARAMS_CONV_EXT); -#else - - if (mlib_ImageConvVersion(5, 5, scale, MLIB_BYTE) == 0) - return mlib_c_conv5x5ext_u8(MLIB_CALL_PARAMS_CONV_EXT); - else - return mlib_i_conv5x5ext_u8(MLIB_CALL_PARAMS_CONV_EXT); -#endif /* __sparc */ -} - -/***************************************************************/ -mlib_status mlib_conv7x7ext_u8(MLIB_PARAMS_CONV_EXT) -{ - return mlib_c_conv7x7ext_u8(MLIB_CALL_PARAMS_CONV_EXT); -} - -/***************************************************************/ mlib_status mlib_convMxNext_u8(MLIB_PARAMS_CONV_MN_EXT) { #ifdef __sparc @@ -212,27 +92,3 @@ } /***************************************************************/ -mlib_status mlib_conv2x2nw_s16(MLIB_PARAMS_CONV_NW) -{ - return mlib_c_conv2x2nw_s16(MLIB_CALL_PARAMS_CONV_NW); -} - -/***************************************************************/ -mlib_status mlib_conv2x2nw_u16(MLIB_PARAMS_CONV_NW) -{ - return mlib_c_conv2x2nw_u16(MLIB_CALL_PARAMS_CONV_NW); -} - -/***************************************************************/ -mlib_status mlib_conv2x2ext_s16(MLIB_PARAMS_CONV_EXT) -{ - return mlib_c_conv2x2ext_s16(MLIB_CALL_PARAMS_CONV_EXT); -} - -/***************************************************************/ -mlib_status mlib_conv2x2ext_u16(MLIB_PARAMS_CONV_EXT) -{ - return mlib_c_conv2x2ext_u16(MLIB_CALL_PARAMS_CONV_EXT); -} - -/***************************************************************/ diff -r 3e22d8fd4912 -r acea5f7d354b jdk/src/java.desktop/share/native/libmlib_image/mlib_c_ImageThresh1.h --- a/jdk/src/java.desktop/share/native/libmlib_image/mlib_c_ImageThresh1.h Thu May 12 11:03:07 2016 -0700 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,103 +0,0 @@ -/* - * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 only, as - * published by the Free Software Foundation. Oracle designates this - * particular file as subject to the "Classpath" exception as provided - * by Oracle in the LICENSE file that accompanied this code. - * - * This code is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * version 2 for more details (a copy is included in the LICENSE file that - * accompanied this code). - * - * You should have received a copy of the GNU General Public License version - * 2 along with this work; if not, write to the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - * - * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA - * or visit www.oracle.com if you need additional information or have any - * questions. - */ - - -#ifndef __MLIB_C_IMAGETHRESH1_H -#define __MLIB_C_IMAGETHRESH1_H - -#ifdef __cplusplus -extern "C" { -#endif /* __cplusplus */ - -/***************************************************************/ -#define PARAMS \ - void *psrc, \ - void *pdst, \ - mlib_s32 src_stride, \ - mlib_s32 dst_stride, \ - mlib_s32 width, \ - mlib_s32 height, \ - void *__thresh, \ - void *__ghigh, \ - void *__glow - -void mlib_c_ImageThresh1_D641(PARAMS); -void mlib_c_ImageThresh1_D642(PARAMS); -void mlib_c_ImageThresh1_D643(PARAMS); -void mlib_c_ImageThresh1_D644(PARAMS); -void mlib_c_ImageThresh1_D641_1B(PARAMS, mlib_s32 dbit_off); -void mlib_c_ImageThresh1_D642_1B(PARAMS, mlib_s32 dbit_off); -void mlib_c_ImageThresh1_D643_1B(PARAMS, mlib_s32 dbit_off); -void mlib_c_ImageThresh1_D644_1B(PARAMS, mlib_s32 dbit_off); - -void mlib_c_ImageThresh1_F321(PARAMS); -void mlib_c_ImageThresh1_F322(PARAMS); -void mlib_c_ImageThresh1_F323(PARAMS); -void mlib_c_ImageThresh1_F324(PARAMS); -void mlib_c_ImageThresh1_F321_1B(PARAMS, mlib_s32 dbit_off); -void mlib_c_ImageThresh1_F322_1B(PARAMS, mlib_s32 dbit_off); -void mlib_c_ImageThresh1_F323_1B(PARAMS, mlib_s32 dbit_off); -void mlib_c_ImageThresh1_F324_1B(PARAMS, mlib_s32 dbit_off); - -void mlib_c_ImageThresh1_S321(PARAMS); -void mlib_c_ImageThresh1_S322(PARAMS); -void mlib_c_ImageThresh1_S323(PARAMS); -void mlib_c_ImageThresh1_S324(PARAMS); -void mlib_c_ImageThresh1_S321_1B(PARAMS, mlib_s32 dbit_off); -void mlib_c_ImageThresh1_S322_1B(PARAMS, mlib_s32 dbit_off); -void mlib_c_ImageThresh1_S323_1B(PARAMS, mlib_s32 dbit_off); -void mlib_c_ImageThresh1_S324_1B(PARAMS, mlib_s32 dbit_off); - -void mlib_c_ImageThresh1_S161(PARAMS); -void mlib_c_ImageThresh1_S162(PARAMS); -void mlib_c_ImageThresh1_S163(PARAMS); -void mlib_c_ImageThresh1_S164(PARAMS); -void mlib_c_ImageThresh1_S161_1B(PARAMS, mlib_s32 dbit_off); -void mlib_c_ImageThresh1_S162_1B(PARAMS, mlib_s32 dbit_off); -void mlib_c_ImageThresh1_S163_1B(PARAMS, mlib_s32 dbit_off); -void mlib_c_ImageThresh1_S164_1B(PARAMS, mlib_s32 dbit_off); - -void mlib_c_ImageThresh1_U161(PARAMS); -void mlib_c_ImageThresh1_U162(PARAMS); -void mlib_c_ImageThresh1_U163(PARAMS); -void mlib_c_ImageThresh1_U164(PARAMS); -void mlib_c_ImageThresh1_U161_1B(PARAMS, mlib_s32 dbit_off); -void mlib_c_ImageThresh1_U162_1B(PARAMS, mlib_s32 dbit_off); -void mlib_c_ImageThresh1_U163_1B(PARAMS, mlib_s32 dbit_off); -void mlib_c_ImageThresh1_U164_1B(PARAMS, mlib_s32 dbit_off); - -void mlib_c_ImageThresh1_U81(PARAMS); -void mlib_c_ImageThresh1_U82(PARAMS); -void mlib_c_ImageThresh1_U83(PARAMS); -void mlib_c_ImageThresh1_U84(PARAMS); -void mlib_c_ImageThresh1_U81_1B(PARAMS, mlib_s32 dbit_off); -void mlib_c_ImageThresh1_U82_1B(PARAMS, mlib_s32 dbit_off); -void mlib_c_ImageThresh1_U83_1B(PARAMS, mlib_s32 dbit_off); -void mlib_c_ImageThresh1_U84_1B(PARAMS, mlib_s32 dbit_off); - -#ifdef __cplusplus -} -#endif /* __cplusplus */ -#endif /* __MLIB_C_IMAGETHRESH1_H */ diff -r 3e22d8fd4912 -r acea5f7d354b jdk/src/java.desktop/share/native/libmlib_image/mlib_c_ImageThresh1_U8.c --- a/jdk/src/java.desktop/share/native/libmlib_image/mlib_c_ImageThresh1_U8.c Thu May 12 11:03:07 2016 -0700 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,848 +0,0 @@ -/* - * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 only, as - * published by the Free Software Foundation. Oracle designates this - * particular file as subject to the "Classpath" exception as provided - * by Oracle in the LICENSE file that accompanied this code. - * - * This code is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * version 2 for more details (a copy is included in the LICENSE file that - * accompanied this code). - * - * You should have received a copy of the GNU General Public License version - * 2 along with this work; if not, write to the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - * - * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA - * or visit www.oracle.com if you need additional information or have any - * questions. - */ - - -/* - * FUNCTION - * mlib_ImageThresh1 - thresholding - * - * SYNOPSIS - * mlib_status mlib_ImageThresh1(mlib_image *dst, - * const mlib_image *src, - * const mlib_s32 *thresh, - * const mlib_s32 *ghigh, - * const mlib_s32 *glow); - * - * ARGUMENT - * dst pointer to output image - * src pointer to input image - * thresh array of thresholds - * ghigh array of values above thresholds - * glow array of values below thresholds - * - * RESTRICTION - * The images must have the same size, and the same number - * of channels. - * The images can have 1, 2, 3, or 4 channels. - * The images can be in MLIB_BYTE, MLIB_SHORT or MLIB_INT data type. - * The type of the output image can be MLIB_BIT, or the same as the - * type of the input image. - * - * DESCRIPTION - * If the pixel band value is above the threshold for that channel, - * set the destination to the ghigh value for that channel. - * Otherwise, set the destination to the glow value for that channel. - * - * +- glow[c] src[x][y][c] <= thresh[c] - * dst[x][y][c] = | - * +- ghigh[c] src[x][y][c] > thresh[c] - */ - -#include "mlib_image.h" -#include "mlib_ImageCheck.h" -#include "mlib_c_ImageThresh1.h" - -/***************************************************************/ -#define STYPE mlib_u8 -#define TTYPE mlib_s32 -#define T_SHIFT 31 - -/***************************************************************/ -#define DO_THRESH(s0, th, gl, gh) \ - (((gh) & (((th) - (TTYPE)(s0)) >> T_SHIFT)) | \ - ((gl) &~ (((th) - (TTYPE)(s0)) >> T_SHIFT))) - -/***************************************************************/ -#define THRESH1_CMP_SHIFT(s0, th, sh) \ - ((((th) - (s0)) >> T_SHIFT) & (1 << (sh))) - -/***************************************************************/ -#define STRIP(pd, ps, w, h, ch, th, gh, gl) { \ - STYPE s0; \ - for ( i = 0; i < h; i++ ) { \ - for (j = 0; j < w; j ++) { \ - for (k = 0; k < ch; k++) { \ - s0 = ((STYPE*)ps)[i*src_stride + j*ch + k]; \ - ((STYPE*)pd)[i*dst_stride + j*ch + k] = \ - (s0 <= th[k]) ? gl[k]: gh[k]; \ - } \ - } \ - } \ - } - -/***************************************************************/ -#define INIT_THRESH0(n) \ - thresh0 = thresh[n]; \ - ghigh0 = ghigh[n]; \ - glow0 = glow[n] - -/***************************************************************/ -#define INIT_THRESH1(n) \ - thresh1 = thresh[n]; \ - ghigh1 = ghigh[n]; \ - glow1 = glow[n] - -/***************************************************************/ -#define INIT_THRESH2(n) \ - thresh2 = thresh[n]; \ - ghigh2 = ghigh[n]; \ - glow2 = glow[n] - -/***************************************************************/ -#define INIT_THRESH3(n) \ - thresh3 = thresh[n]; \ - ghigh3 = ghigh[n]; \ - glow3 = glow[n] - -/***************************************************************/ -#define THRESH0(s0) DO_THRESH(s0, thresh0, glow0, ghigh0) -#define THRESH1(s0) DO_THRESH(s0, thresh1, glow1, ghigh1) -#define THRESH2(s0) DO_THRESH(s0, thresh2, glow2, ghigh2) -#define THRESH3(s0) DO_THRESH(s0, thresh3, glow3, ghigh3) - -/***************************************************************/ -void mlib_c_ImageThresh1_U81(PARAMS) -{ - mlib_s32 *thresh = (void *)__thresh; - mlib_s32 *ghigh = (void *)__ghigh; - mlib_s32 *glow = (void *)__glow; - STYPE *psrc_row = psrc; - STYPE *pdst_row = pdst; - TTYPE thresh0; - TTYPE ghigh0; - TTYPE glow0; - mlib_s32 i, j, k; - - if (width < 16) { - STRIP(pdst, psrc, width, height, 1, thresh, ghigh, glow); - return; - } - - INIT_THRESH0(0); - - for (i = 0; i < height; i++) { - -#ifdef __SUNPRO_C -#pragma pipeloop(0) -#endif /* __SUNPRO_C */ - for (j = 0; j <= (width - 8); j += 8) { - pdst_row[j] = THRESH0(psrc_row[j]); - pdst_row[j + 1] = THRESH0(psrc_row[j + 1]); - pdst_row[j + 2] = THRESH0(psrc_row[j + 2]); - pdst_row[j + 3] = THRESH0(psrc_row[j + 3]); - pdst_row[j + 4] = THRESH0(psrc_row[j + 4]); - pdst_row[j + 5] = THRESH0(psrc_row[j + 5]); - pdst_row[j + 6] = THRESH0(psrc_row[j + 6]); - pdst_row[j + 7] = THRESH0(psrc_row[j + 7]); - } - - for (; j < width; j++) { - pdst_row[j] = THRESH0(psrc_row[j]); - } - - psrc_row += src_stride; - pdst_row += dst_stride; - } -} - -/***************************************************************/ -void mlib_c_ImageThresh1_U82(PARAMS) -{ - mlib_s32 *thresh = (void *)__thresh; - mlib_s32 *ghigh = (void *)__ghigh; - mlib_s32 *glow = (void *)__glow; - STYPE *psrc_row = psrc; - STYPE *pdst_row = pdst; - TTYPE thresh0, thresh1; - TTYPE ghigh0, ghigh1; - TTYPE glow0, glow1; - mlib_s32 i, j, k; - - if (width < 16) { - STRIP(pdst, psrc, width, height, 2, thresh, ghigh, glow); - return; - } - - INIT_THRESH0(0); - INIT_THRESH1(1); - width <<= 1; - - for (i = 0; i < height; i++) { - -#ifdef __SUNPRO_C -#pragma pipeloop(0) -#endif /* __SUNPRO_C */ - for (j = 0; j <= (width - 8); j += 8) { - pdst_row[j] = THRESH0(psrc_row[j]); - pdst_row[j + 1] = THRESH1(psrc_row[j + 1]); - pdst_row[j + 2] = THRESH0(psrc_row[j + 2]); - pdst_row[j + 3] = THRESH1(psrc_row[j + 3]); - pdst_row[j + 4] = THRESH0(psrc_row[j + 4]); - pdst_row[j + 5] = THRESH1(psrc_row[j + 5]); - pdst_row[j + 6] = THRESH0(psrc_row[j + 6]); - pdst_row[j + 7] = THRESH1(psrc_row[j + 7]); - } - - for (; j < width; j += 2) { - pdst_row[j] = THRESH0(psrc_row[j]); - pdst_row[j + 1] = THRESH1(psrc_row[j + 1]); - } - - psrc_row += src_stride; - pdst_row += dst_stride; - } -} - -/***************************************************************/ -void mlib_c_ImageThresh1_U83(PARAMS) -{ - mlib_s32 *thresh = (void *)__thresh; - mlib_s32 *ghigh = (void *)__ghigh; - mlib_s32 *glow = (void *)__glow; - STYPE *psrc_row = psrc; - STYPE *pdst_row = pdst; - TTYPE thresh0, thresh1, thresh2; - TTYPE ghigh0, ghigh1, ghigh2; - TTYPE glow0, glow1, glow2; - mlib_s32 i, j, k; - - if (width < 16) { - STRIP(pdst, psrc, width, height, 3, thresh, ghigh, glow); - return; - } - - width = 3 * width; - INIT_THRESH0(0); - INIT_THRESH1(1); - INIT_THRESH2(2); - - for (i = 0; i < height; i++) { - -#ifdef __SUNPRO_C -#pragma pipeloop(0) -#endif /* __SUNPRO_C */ - for (j = 0; j <= (width - 12); j += 12) { - pdst_row[j] = THRESH0(psrc_row[j]); - pdst_row[j + 1] = THRESH1(psrc_row[j + 1]); - pdst_row[j + 2] = THRESH2(psrc_row[j + 2]); - pdst_row[j + 3] = THRESH0(psrc_row[j + 3]); - pdst_row[j + 4] = THRESH1(psrc_row[j + 4]); - pdst_row[j + 5] = THRESH2(psrc_row[j + 5]); - pdst_row[j + 6] = THRESH0(psrc_row[j + 6]); - pdst_row[j + 7] = THRESH1(psrc_row[j + 7]); - pdst_row[j + 8] = THRESH2(psrc_row[j + 8]); - pdst_row[j + 9] = THRESH0(psrc_row[j + 9]); - pdst_row[j + 10] = THRESH1(psrc_row[j + 10]); - pdst_row[j + 11] = THRESH2(psrc_row[j + 11]); - } - - for (; j < width; j += 3) { - pdst_row[j] = THRESH0(psrc_row[j]); - pdst_row[j + 1] = THRESH1(psrc_row[j + 1]); - pdst_row[j + 2] = THRESH2(psrc_row[j + 2]); - } - - psrc_row += src_stride; - pdst_row += dst_stride; - } -} - -/***************************************************************/ -void mlib_c_ImageThresh1_U84(PARAMS) -{ - mlib_s32 *thresh = (void *)__thresh; - mlib_s32 *ghigh = (void *)__ghigh; - mlib_s32 *glow = (void *)__glow; - STYPE *psrc_row = psrc; - STYPE *pdst_row = pdst; - TTYPE thresh0, thresh1, thresh2, thresh3; - TTYPE ghigh0, ghigh1, ghigh2, ghigh3; - TTYPE glow0, glow1, glow2, glow3; - mlib_s32 i, j, k; - - if (width < 16) { - STRIP(pdst, psrc, width, height, 4, thresh, ghigh, glow); - return; - } - - INIT_THRESH0(0); - INIT_THRESH1(1); - INIT_THRESH2(2); - INIT_THRESH3(3); - - width *= 4; - - for (i = 0; i < height; i++) { - -#ifdef __SUNPRO_C -#pragma pipeloop(0) -#endif /* __SUNPRO_C */ - for (j = 0; j <= (width - 8); j += 8) { - pdst_row[j] = THRESH0(psrc_row[j]); - pdst_row[j + 1] = THRESH1(psrc_row[j + 1]); - pdst_row[j + 2] = THRESH2(psrc_row[j + 2]); - pdst_row[j + 3] = THRESH3(psrc_row[j + 3]); - pdst_row[j + 4] = THRESH0(psrc_row[j + 4]); - pdst_row[j + 5] = THRESH1(psrc_row[j + 5]); - pdst_row[j + 6] = THRESH2(psrc_row[j + 6]); - pdst_row[j + 7] = THRESH3(psrc_row[j + 7]); - } - - if (j < width) { - pdst_row[j] = THRESH0(psrc_row[j]); - pdst_row[j + 1] = THRESH1(psrc_row[j + 1]); - pdst_row[j + 2] = THRESH2(psrc_row[j + 2]); - pdst_row[j + 3] = THRESH3(psrc_row[j + 3]); - } - - psrc_row += src_stride; - pdst_row += dst_stride; - } -} - -/***************************************************************/ -void mlib_c_ImageThresh1_U81_1B(PARAMS, - mlib_s32 dbit_off) -{ - mlib_s32 *thresh = (void *)__thresh; - mlib_s32 *ghigh = (void *)__ghigh; - mlib_s32 *glow = (void *)__glow; - STYPE *psrc_row = psrc; - mlib_u8 *pdst_row = pdst; - TTYPE thresh0 = thresh[0]; - mlib_s32 mhigh, mlow, emask, dst0; - mlib_s32 i, j, jbit, l; - - mhigh = (ghigh[0] > 0) ? 0xff : 0; - mlow = (glow[0] > 0) ? 0xff : 0; - - for (i = 0; i < height; i++) { - j = 0; - jbit = 0; - - if (dbit_off) { - mlib_s32 nume = 8 - dbit_off; - - if (nume > width) - nume = width; - dst0 = 0; - emask = 0; - - for (; j < nume; j++) { - emask |= (1 << (7 - (dbit_off + j))); - dst0 |= THRESH1_CMP_SHIFT(psrc_row[j], thresh0, 7 - (dbit_off + j)); - } - - dst0 = (mhigh & dst0) | (mlow & ~dst0); - pdst_row[0] = (dst0 & emask) | (pdst_row[0] & ~emask); - jbit++; - } - -#ifdef __SUNPRO_C -#pragma pipeloop(0) -#endif /* __SUNPRO_C */ - for (; j <= (width - 16); j += 16) { - dst0 = THRESH1_CMP_SHIFT(psrc_row[j], thresh0, 7) | - THRESH1_CMP_SHIFT(psrc_row[j + 1], thresh0, 6) | - THRESH1_CMP_SHIFT(psrc_row[j + 2], thresh0, 5) | - THRESH1_CMP_SHIFT(psrc_row[j + 3], thresh0, 4) | - THRESH1_CMP_SHIFT(psrc_row[j + 4], thresh0, 3) | - THRESH1_CMP_SHIFT(psrc_row[j + 5], thresh0, 2) | - THRESH1_CMP_SHIFT(psrc_row[j + 6], thresh0, 1) | - THRESH1_CMP_SHIFT(psrc_row[j + 7], thresh0, 0); - dst0 = (mhigh & dst0) | (mlow & ~dst0); - *(pdst_row + jbit) = (mlib_u8) dst0; - jbit++; - dst0 = THRESH1_CMP_SHIFT(psrc_row[j + 8], thresh0, 7) | - THRESH1_CMP_SHIFT(psrc_row[j + 9], thresh0, 6) | - THRESH1_CMP_SHIFT(psrc_row[j + 10], thresh0, 5) | - THRESH1_CMP_SHIFT(psrc_row[j + 11], thresh0, 4) | - THRESH1_CMP_SHIFT(psrc_row[j + 12], thresh0, 3) | - THRESH1_CMP_SHIFT(psrc_row[j + 13], thresh0, 2) | - THRESH1_CMP_SHIFT(psrc_row[j + 14], thresh0, 1) | - THRESH1_CMP_SHIFT(psrc_row[j + 15], thresh0, 0); - dst0 = (mhigh & dst0) | (mlow & ~dst0); - *(pdst_row + jbit) = (mlib_u8) dst0; - jbit++; - } - - if (width - j >= 8) { - dst0 = THRESH1_CMP_SHIFT(psrc_row[j], thresh0, 7) | - THRESH1_CMP_SHIFT(psrc_row[j + 1], thresh0, 6) | - THRESH1_CMP_SHIFT(psrc_row[j + 2], thresh0, 5) | - THRESH1_CMP_SHIFT(psrc_row[j + 3], thresh0, 4) | - THRESH1_CMP_SHIFT(psrc_row[j + 4], thresh0, 3) | - THRESH1_CMP_SHIFT(psrc_row[j + 5], thresh0, 2) | - THRESH1_CMP_SHIFT(psrc_row[j + 6], thresh0, 1) | - THRESH1_CMP_SHIFT(psrc_row[j + 7], thresh0, 0); - dst0 = (mhigh & dst0) | (mlow & ~dst0); - *(pdst_row + jbit) = (mlib_u8) dst0; - jbit++; - j += 8; - } - - if (j < width) { - dst0 = 0; - l = 7; - for (; j < width; j++) { - dst0 |= THRESH1_CMP_SHIFT(psrc_row[j], thresh0, l); - l--; - } - - emask = (0xFF << (l + 1)); - dst0 = (mhigh & dst0) | (mlow & ~dst0); - pdst_row[jbit] = (dst0 & emask) | (pdst_row[jbit] & ~emask); - } - - psrc_row += src_stride; - pdst_row += dst_stride; - } -} - -/***************************************************************/ -void mlib_c_ImageThresh1_U82_1B(PARAMS, - mlib_s32 dbit_off) -{ - mlib_s32 *thresh = (void *)__thresh; - mlib_s32 *ghigh = (void *)__ghigh; - mlib_s32 *glow = (void *)__glow; - STYPE *psrc_row = psrc; - mlib_u8 *pdst_row = pdst; - TTYPE thresh0 = thresh[0], thresh1 = thresh[1]; - mlib_s32 mhigh0, mlow0, mhigh, mlow, emask, dst0; - mlib_s32 i, j, jbit, l; - - mhigh0 = (ghigh[0] > 0) ? 0xaaa : 0; - mhigh0 |= (ghigh[1] > 0) ? 0x555 : 0; - mlow0 = (glow[0] > 0) ? 0xaaa : 0; - mlow0 |= (glow[1] > 0) ? 0x555 : 0; - - width *= 2; - - for (i = 0; i < height; i++) { - thresh0 = thresh[0]; - thresh1 = thresh[1]; - - j = 0; - jbit = 0; - mhigh = mhigh0 >> (dbit_off & 1); - mlow = mlow0 >> (dbit_off & 1); - - if (dbit_off) { - mlib_s32 nume = 8 - dbit_off; - - if (nume > width) - nume = width; - dst0 = 0; - emask = 0; - - for (; j <= (nume - 2); j += 2) { - emask |= (3 << (6 - (dbit_off + j))); - dst0 |= THRESH1_CMP_SHIFT(psrc_row[j], thresh0, 7 - (dbit_off + j)); - dst0 |= THRESH1_CMP_SHIFT(psrc_row[j + 1], thresh1, 6 - (dbit_off + j)); - } - - if (j < nume) { - emask |= (1 << (7 - (dbit_off + j))); - dst0 |= THRESH1_CMP_SHIFT(psrc_row[j], thresh0, 7 - (dbit_off + j)); - /* swap threshes */ - thresh0 = thresh[1]; - thresh1 = thresh[0]; - j++; - } - - dst0 = (mhigh & dst0) | (mlow & ~dst0); - pdst_row[0] = (dst0 & emask) | (pdst_row[0] & ~emask); - jbit++; - } - -#ifdef __SUNPRO_C -#pragma pipeloop(0) -#endif /* __SUNPRO_C */ - for (; j <= (width - 16); j += 16) { - dst0 = THRESH1_CMP_SHIFT(psrc_row[j], thresh0, 7) | - THRESH1_CMP_SHIFT(psrc_row[j + 1], thresh1, 6) | - THRESH1_CMP_SHIFT(psrc_row[j + 2], thresh0, 5) | - THRESH1_CMP_SHIFT(psrc_row[j + 3], thresh1, 4) | - THRESH1_CMP_SHIFT(psrc_row[j + 4], thresh0, 3) | - THRESH1_CMP_SHIFT(psrc_row[j + 5], thresh1, 2) | - THRESH1_CMP_SHIFT(psrc_row[j + 6], thresh0, 1) | - THRESH1_CMP_SHIFT(psrc_row[j + 7], thresh1, 0); - dst0 = (mhigh & dst0) | (mlow & ~dst0); - *(pdst_row + jbit) = (mlib_u8) dst0; - jbit++; - dst0 = THRESH1_CMP_SHIFT(psrc_row[j + 8], thresh0, 7) | - THRESH1_CMP_SHIFT(psrc_row[j + 9], thresh1, 6) | - THRESH1_CMP_SHIFT(psrc_row[j + 10], thresh0, 5) | - THRESH1_CMP_SHIFT(psrc_row[j + 11], thresh1, 4) | - THRESH1_CMP_SHIFT(psrc_row[j + 12], thresh0, 3) | - THRESH1_CMP_SHIFT(psrc_row[j + 13], thresh1, 2) | - THRESH1_CMP_SHIFT(psrc_row[j + 14], thresh0, 1) | - THRESH1_CMP_SHIFT(psrc_row[j + 15], thresh1, 0); - dst0 = (mhigh & dst0) | (mlow & ~dst0); - *(pdst_row + jbit) = (mlib_u8) dst0; - jbit++; - } - - if (width - j >= 8) { - dst0 = THRESH1_CMP_SHIFT(psrc_row[j], thresh0, 7) | - THRESH1_CMP_SHIFT(psrc_row[j + 1], thresh1, 6) | - THRESH1_CMP_SHIFT(psrc_row[j + 2], thresh0, 5) | - THRESH1_CMP_SHIFT(psrc_row[j + 3], thresh1, 4) | - THRESH1_CMP_SHIFT(psrc_row[j + 4], thresh0, 3) | - THRESH1_CMP_SHIFT(psrc_row[j + 5], thresh1, 2) | - THRESH1_CMP_SHIFT(psrc_row[j + 6], thresh0, 1) | - THRESH1_CMP_SHIFT(psrc_row[j + 7], thresh1, 0); - dst0 = (mhigh & dst0) | (mlow & ~dst0); - *(pdst_row + jbit) = (mlib_u8) dst0; - jbit++; - j += 8; - } - - if (j < width) { - dst0 = 0; - l = 7; - for (; j <= (width - 2); j += 2) { - dst0 |= THRESH1_CMP_SHIFT(psrc_row[j], thresh0, l); - dst0 |= THRESH1_CMP_SHIFT(psrc_row[j + 1], thresh1, l - 1); - l -= 2; - } - - if (j < width) { - dst0 |= THRESH1_CMP_SHIFT(psrc_row[j], thresh0, l); - l--; - } - - emask = (0xFF << (l + 1)); - dst0 = (mhigh & dst0) | (mlow & ~dst0); - pdst_row[jbit] = (dst0 & emask) | (pdst_row[jbit] & ~emask); - } - - psrc_row += src_stride; - pdst_row += dst_stride; - } -} - -/***************************************************************/ -void mlib_c_ImageThresh1_U83_1B(PARAMS, - mlib_s32 dbit_off) -{ - mlib_s32 *thresh = (void *)__thresh; - mlib_s32 *ghigh = (void *)__ghigh; - mlib_s32 *glow = (void *)__glow; - STYPE *psrc_row = psrc; - mlib_u8 *pdst_row = pdst; - TTYPE thresh0, thresh1, thresh2, threshT; - mlib_s32 mhigh = 0, mlow = 0; - mlib_s32 mhigh0, mlow0, mhigh1, mlow1, mhigh2, mlow2, emask, dst0, dst1; - mlib_s32 i, j, jbit, k, l; - - if (ghigh[0] > 0) - mhigh = 0x492492; - - if (ghigh[1] > 0) - mhigh |= 0x249249; - - if (ghigh[2] > 0) - mhigh |= 0x924924; - - if (glow[0] > 0) - mlow = 0x492492; - - if (glow[1] > 0) - mlow |= 0x249249; - - if (glow[2] > 0) - mlow |= 0x924924; - - width = 3 * width; - - for (i = 0; i < height; i++) { - thresh0 = thresh[0]; - thresh1 = thresh[1]; - thresh2 = thresh[2]; - - j = 0; - jbit = 0; - mhigh0 = mhigh >> (dbit_off & 7); - mlow0 = mlow >> (dbit_off & 7); - mhigh1 = mhigh0 >> 1; - mlow1 = mlow0 >> 1; - mhigh2 = mhigh0 >> 2; - mlow2 = mlow0 >> 2; - - if (dbit_off) { - mlib_s32 nume = 8 - dbit_off; - - if (nume > width) - nume = width; - dst0 = 0; - emask = 0; - - for (; j <= (nume - 3); j += 3) { - emask |= (7 << (5 - (dbit_off + j))); - dst0 |= THRESH1_CMP_SHIFT(psrc_row[j], thresh0, 7 - (dbit_off + j)); - dst0 |= THRESH1_CMP_SHIFT(psrc_row[j + 1], thresh1, 6 - (dbit_off + j)); - dst0 |= THRESH1_CMP_SHIFT(psrc_row[j + 2], thresh2, 5 - (dbit_off + j)); - } - - for (; j < nume; j++) { - emask |= (1 << (7 - (dbit_off + j))); - dst0 |= THRESH1_CMP_SHIFT(psrc_row[j], thresh0, 7 - (dbit_off + j)); - /* swap threshes */ - threshT = thresh0; - thresh0 = thresh1; - thresh1 = thresh2; - thresh2 = threshT; - } - - dst0 = (mhigh0 & dst0) | (mlow0 & ~dst0); - pdst_row[0] = (dst0 & emask) | (pdst_row[0] & ~emask); - jbit++; - - mhigh0 = mhigh >> (9 - nume); - mlow0 = mlow >> (9 - nume); - mhigh1 = mhigh0 >> 1; - mlow1 = mlow0 >> 1; - mhigh2 = mhigh0 >> 2; - mlow2 = mlow0 >> 2; - } - -#ifdef __SUNPRO_C -#pragma pipeloop(0) -#endif /* __SUNPRO_C */ - for (; j <= (width - 24); j += 24) { - dst0 = (THRESH1_CMP_SHIFT(psrc_row[j], thresh0, 7) | - THRESH1_CMP_SHIFT(psrc_row[j + 1], thresh1, 6) | - THRESH1_CMP_SHIFT(psrc_row[j + 2], thresh2, 5) | - THRESH1_CMP_SHIFT(psrc_row[j + 3], thresh0, 4) | - THRESH1_CMP_SHIFT(psrc_row[j + 4], thresh1, 3) | - THRESH1_CMP_SHIFT(psrc_row[j + 5], thresh2, 2) | - THRESH1_CMP_SHIFT(psrc_row[j + 6], thresh0, 1) | - THRESH1_CMP_SHIFT(psrc_row[j + 7], thresh1, 0)); - dst0 = (mhigh0 & dst0) | (mlow0 & ~dst0); - *(pdst_row + jbit) = dst0; - jbit++; - dst0 = (THRESH1_CMP_SHIFT(psrc_row[j + 8], thresh2, 7) | - THRESH1_CMP_SHIFT(psrc_row[j + 9], thresh0, 6) | - THRESH1_CMP_SHIFT(psrc_row[j + 10], thresh1, 5) | - THRESH1_CMP_SHIFT(psrc_row[j + 11], thresh2, 4) | - THRESH1_CMP_SHIFT(psrc_row[j + 12], thresh0, 3) | - THRESH1_CMP_SHIFT(psrc_row[j + 13], thresh1, 2) | - THRESH1_CMP_SHIFT(psrc_row[j + 14], thresh2, 1) | - THRESH1_CMP_SHIFT(psrc_row[j + 15], thresh0, 0)); - dst0 = (mhigh1 & dst0) | (mlow1 & ~dst0); - *(pdst_row + jbit) = dst0; - jbit++; - dst0 = (THRESH1_CMP_SHIFT(psrc_row[j + 16], thresh1, 7) | - THRESH1_CMP_SHIFT(psrc_row[j + 17], thresh2, 6) | - THRESH1_CMP_SHIFT(psrc_row[j + 18], thresh0, 5) | - THRESH1_CMP_SHIFT(psrc_row[j + 19], thresh1, 4) | - THRESH1_CMP_SHIFT(psrc_row[j + 20], thresh2, 3) | - THRESH1_CMP_SHIFT(psrc_row[j + 21], thresh0, 2) | - THRESH1_CMP_SHIFT(psrc_row[j + 22], thresh1, 1) | - THRESH1_CMP_SHIFT(psrc_row[j + 23], thresh2, 0)); - dst0 = (mhigh2 & dst0) | (mlow2 & ~dst0); - *(pdst_row + jbit) = dst0; - jbit++; - } - - if (j < width) { - k = width - j; - dst0 = 0; - l = 31; - for (; j < width; j += 3) { - dst0 |= (THRESH1_CMP_SHIFT(psrc_row[j], thresh0, l) | - THRESH1_CMP_SHIFT(psrc_row[j + 1], thresh1, l - 1) | - THRESH1_CMP_SHIFT(psrc_row[j + 2], thresh2, l - 2)); - l -= 3; - } - - l = (k + 7) >> 3; - k = (l << 3) - k; - emask = (0xFF << k); - - if (l == 3) { - dst1 = dst0 >> 24; - dst1 = (mhigh0 & dst1) | (mlow0 & ~dst1); - pdst_row[jbit] = dst1; - dst1 = (dst0 >> 16); - dst1 = (mhigh1 & dst1) | (mlow1 & ~dst1); - pdst_row[jbit + 1] = dst1; - dst1 = (dst0 >> 8); - dst1 = (mhigh2 & dst1) | (mlow2 & ~dst1); - pdst_row[jbit + 2] = (dst1 & emask) | (pdst_row[jbit + 2] & ~emask); - } - else if (l == 2) { - dst1 = dst0 >> 24; - dst1 = (mhigh0 & dst1) | (mlow0 & ~dst1); - pdst_row[jbit] = dst1; - dst1 = (dst0 >> 16); - dst1 = (mhigh1 & dst1) | (mlow1 & ~dst1); - pdst_row[jbit + 1] = (dst1 & emask) | (pdst_row[jbit + 1] & ~emask); - } - else { - dst1 = dst0 >> 24; - dst1 = (mhigh0 & dst1) | (mlow0 & ~dst1); - pdst_row[jbit] = (dst1 & emask) | (pdst_row[jbit] & ~emask); - } - } - - psrc_row += src_stride; - pdst_row += dst_stride; - } -} - -/***************************************************************/ -void mlib_c_ImageThresh1_U84_1B(PARAMS, - mlib_s32 dbit_off) -{ - mlib_s32 *thresh = (void *)__thresh; - mlib_s32 *ghigh = (void *)__ghigh; - mlib_s32 *glow = (void *)__glow; - STYPE *psrc_row = psrc; - mlib_u8 *pdst_row = pdst; - TTYPE thresh0, thresh1, thresh2, thresh3, threshT; - mlib_s32 mhigh0, mlow0, mhigh, mlow, emask, dst0; - mlib_s32 i, j, jbit; - - mhigh0 = (ghigh[0] > 0) ? 0x8888 : 0; - mhigh0 |= (ghigh[1] > 0) ? 0x4444 : 0; - mhigh0 |= (ghigh[2] > 0) ? 0x2222 : 0; - mhigh0 |= (ghigh[3] > 0) ? 0x1111 : 0; - - mlow0 = (glow[0] > 0) ? 0x8888 : 0; - mlow0 |= (glow[1] > 0) ? 0x4444 : 0; - mlow0 |= (glow[2] > 0) ? 0x2222 : 0; - mlow0 |= (glow[3] > 0) ? 0x1111 : 0; - - width *= 4; - - for (i = 0; i < height; i++) { - thresh0 = thresh[0]; - thresh1 = thresh[1]; - thresh2 = thresh[2]; - thresh3 = thresh[3]; - - j = 0; - jbit = 0; - mhigh = mhigh0 >> dbit_off; - mlow = mlow0 >> dbit_off; - - if (dbit_off) { - mlib_s32 nume = 8 - dbit_off; - - if (nume > width) - nume = width; - dst0 = 0; - emask = 0; - - for (; j <= (nume - 4); j += 4) { - emask |= (0xf << (4 - (dbit_off + j))); - dst0 |= THRESH1_CMP_SHIFT(psrc_row[j], thresh0, 7 - (dbit_off + j)); - dst0 |= THRESH1_CMP_SHIFT(psrc_row[j + 1], thresh1, 6 - (dbit_off + j)); - dst0 |= THRESH1_CMP_SHIFT(psrc_row[j + 2], thresh2, 5 - (dbit_off + j)); - dst0 |= THRESH1_CMP_SHIFT(psrc_row[j + 3], thresh3, 4 - (dbit_off + j)); - } - - for (; j < nume; j++) { - emask |= (1 << (7 - (dbit_off + j))); - dst0 |= THRESH1_CMP_SHIFT(psrc_row[j], thresh0, 7 - (dbit_off + j)); - /* swap threshes */ - threshT = thresh0; - thresh0 = thresh1; - thresh1 = thresh2; - thresh2 = thresh3; - thresh3 = threshT; - } - - dst0 = (mhigh & dst0) | (mlow & ~dst0); - pdst_row[0] = (dst0 & emask) | (pdst_row[0] & ~emask); - jbit++; - } - -#ifdef __SUNPRO_C -#pragma pipeloop(0) -#endif /* __SUNPRO_C */ - for (; j <= (width - 16); j += 16) { - dst0 = (THRESH1_CMP_SHIFT(psrc_row[j], thresh0, 7) | - THRESH1_CMP_SHIFT(psrc_row[j + 1], thresh1, 6) | - THRESH1_CMP_SHIFT(psrc_row[j + 2], thresh2, 5) | - THRESH1_CMP_SHIFT(psrc_row[j + 3], thresh3, 4) | - THRESH1_CMP_SHIFT(psrc_row[j + 4], thresh0, 3) | - THRESH1_CMP_SHIFT(psrc_row[j + 5], thresh1, 2) | - THRESH1_CMP_SHIFT(psrc_row[j + 6], thresh2, 1) | - THRESH1_CMP_SHIFT(psrc_row[j + 7], thresh3, 0)); - dst0 = (mhigh & dst0) | (mlow & ~dst0); - pdst_row[jbit] = dst0; - jbit++; - dst0 = (THRESH1_CMP_SHIFT(psrc_row[j + 8], thresh0, 7) | - THRESH1_CMP_SHIFT(psrc_row[j + 9], thresh1, 6) | - THRESH1_CMP_SHIFT(psrc_row[j + 10], thresh2, 5) | - THRESH1_CMP_SHIFT(psrc_row[j + 11], thresh3, 4) | - THRESH1_CMP_SHIFT(psrc_row[j + 12], thresh0, 3) | - THRESH1_CMP_SHIFT(psrc_row[j + 13], thresh1, 2) | - THRESH1_CMP_SHIFT(psrc_row[j + 14], thresh2, 1) | - THRESH1_CMP_SHIFT(psrc_row[j + 15], thresh3, 0)); - dst0 = (mhigh & dst0) | (mlow & ~dst0); - pdst_row[jbit] = dst0; - jbit++; - } - - if (j <= width - 8) { - dst0 = (THRESH1_CMP_SHIFT(psrc_row[j], thresh0, 7) | - THRESH1_CMP_SHIFT(psrc_row[j + 1], thresh1, 6) | - THRESH1_CMP_SHIFT(psrc_row[j + 2], thresh2, 5) | - THRESH1_CMP_SHIFT(psrc_row[j + 3], thresh3, 4) | - THRESH1_CMP_SHIFT(psrc_row[j + 4], thresh0, 3) | - THRESH1_CMP_SHIFT(psrc_row[j + 5], thresh1, 2) | - THRESH1_CMP_SHIFT(psrc_row[j + 6], thresh2, 1) | - THRESH1_CMP_SHIFT(psrc_row[j + 7], thresh3, 0)); - dst0 = (mhigh & dst0) | (mlow & ~dst0); - pdst_row[jbit] = dst0; - jbit++; - j += 8; - } - - if (j < width) { - dst0 = (THRESH1_CMP_SHIFT(psrc_row[j], thresh0, 7) | - THRESH1_CMP_SHIFT(psrc_row[j + 1], thresh1, 6) | - THRESH1_CMP_SHIFT(psrc_row[j + 2], thresh2, 5) | - THRESH1_CMP_SHIFT(psrc_row[j + 3], thresh3, 4) | - THRESH1_CMP_SHIFT(psrc_row[j + 4], thresh0, 3) | - THRESH1_CMP_SHIFT(psrc_row[j + 5], thresh1, 2) | - THRESH1_CMP_SHIFT(psrc_row[j + 6], thresh2, 1)); - - emask = (0xFF << (8 - (width - j))); - dst0 = (mhigh & dst0) | (mlow & ~dst0); - pdst_row[jbit] = (dst0 & emask) | (pdst_row[jbit] & ~emask); - } - - psrc_row += src_stride; - pdst_row += dst_stride; - } -} - -/***************************************************************/ diff -r 3e22d8fd4912 -r acea5f7d354b jdk/src/java.desktop/share/native/libmlib_image/mlib_image.h --- a/jdk/src/java.desktop/share/native/libmlib_image/mlib_image.h Thu May 12 11:03:07 2016 -0700 +++ b/jdk/src/java.desktop/share/native/libmlib_image/mlib_image.h Fri May 13 11:31:05 2016 +0300 @@ -35,7 +35,6 @@ #include #include #include -#include #include #endif /* MLIB_IMAGE_H */ diff -r 3e22d8fd4912 -r acea5f7d354b jdk/src/java.desktop/share/native/libmlib_image/mlib_image_blend_proto.h --- a/jdk/src/java.desktop/share/native/libmlib_image/mlib_image_blend_proto.h Thu May 12 11:03:07 2016 -0700 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,1448 +0,0 @@ -/* - * Copyright (c) 1998, 2003, Oracle and/or its affiliates. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 only, as - * published by the Free Software Foundation. Oracle designates this - * particular file as subject to the "Classpath" exception as provided - * by Oracle in the LICENSE file that accompanied this code. - * - * This code is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * version 2 for more details (a copy is included in the LICENSE file that - * accompanied this code). - * - * You should have received a copy of the GNU General Public License version - * 2 along with this work; if not, write to the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - * - * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA - * or visit www.oracle.com if you need additional information or have any - * questions. - */ - - -#ifndef __ORIG_MLIB_IMAGE_BLEND_PROTO_H -#define __ORIG_MLIB_IMAGE_BLEND_PROTO_H - -#include -#include -#include -#if defined ( __MEDIALIB_OLD_NAMES_ADDED ) -#include <../include/mlib_image_blend_proto.h> -#endif /* defined ( __MEDIALIB_OLD_NAMES_ADDED ) */ - -#ifdef __cplusplus -extern "C" { -#endif /* __cplusplus */ - -#if defined ( _MSC_VER ) -#if ! defined ( __MEDIALIB_OLD_NAMES ) -#define __MEDIALIB_OLD_NAMES -#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ -#endif /* defined ( _MSC_VER ) */ - -/*********************************************************************** - - NOTE: f = min(ALPHAsrc2, 1 - ALPHAsrc1) - f = min(ALPHAscr2, 1 - ALPHAsrc1dst) for In-place function - ALPHA = (ALPHA, ALPHA, ALPHA, ALPHA) - -************************************************************************/ - -/* dst = 0 */ - -#if defined ( __MEDIALIB_OLD_NAMES ) -#define __mlib_ImageBlend_ZERO_ZERO mlib_ImageBlend_ZERO_ZERO -#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ -mlib_status __mlib_ImageBlend_ZERO_ZERO(mlib_image *dst, - const mlib_image *src1, - const mlib_image *src2, - mlib_s32 cmask); - -/* src1dst = 0 */ - -#if defined ( __MEDIALIB_OLD_NAMES ) -#define __mlib_ImageBlend_ZERO_ZERO_Inp mlib_ImageBlend_ZERO_ZERO_Inp -#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ -mlib_status __mlib_ImageBlend_ZERO_ZERO_Inp(mlib_image *src1dst, - const mlib_image *src2, - mlib_s32 cmask); - -/* dst = src2 */ - -#if defined ( __MEDIALIB_OLD_NAMES ) -#define __mlib_ImageBlend_ZERO_ONE mlib_ImageBlend_ZERO_ONE -#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ -mlib_status __mlib_ImageBlend_ZERO_ONE(mlib_image *dst, - const mlib_image *src1, - const mlib_image *src2, - mlib_s32 cmask); - -/* src1dst = src2 */ - -#if defined ( __MEDIALIB_OLD_NAMES ) -#define __mlib_ImageBlend_ZERO_ONE_Inp mlib_ImageBlend_ZERO_ONE_Inp -#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ -mlib_status __mlib_ImageBlend_ZERO_ONE_Inp(mlib_image *src1dst, - const mlib_image *src2, - mlib_s32 cmask); - -/* dst = src2 * src1 */ - -#if defined ( __MEDIALIB_OLD_NAMES ) -#define __mlib_ImageBlend_ZERO_DC mlib_ImageBlend_ZERO_DC -#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ -mlib_status __mlib_ImageBlend_ZERO_DC(mlib_image *dst, - const mlib_image *src1, - const mlib_image *src2, - mlib_s32 cmask); - -/* src1dst = src2 * src1dst */ - -#if defined ( __MEDIALIB_OLD_NAMES ) -#define __mlib_ImageBlend_ZERO_DC_Inp mlib_ImageBlend_ZERO_DC_Inp -#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ -mlib_status __mlib_ImageBlend_ZERO_DC_Inp(mlib_image *src1dst, - const mlib_image *src2, - mlib_s32 cmask); - -/* dst = src2 * (1 - src1) */ - -#if defined ( __MEDIALIB_OLD_NAMES ) -#define __mlib_ImageBlend_ZERO_OMDC mlib_ImageBlend_ZERO_OMDC -#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ -mlib_status __mlib_ImageBlend_ZERO_OMDC(mlib_image *dst, - const mlib_image *src1, - const mlib_image *src2, - mlib_s32 cmask); - -/* src1dst = src2 * (1 - src1dst) */ - -#if defined ( __MEDIALIB_OLD_NAMES ) -#define __mlib_ImageBlend_ZERO_OMDC_Inp mlib_ImageBlend_ZERO_OMDC_Inp -#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ -mlib_status __mlib_ImageBlend_ZERO_OMDC_Inp(mlib_image *src1dst, - const mlib_image *src2, - mlib_s32 cmask); - -/* dst = src2 * ALPHAsrc2 */ - -#if defined ( __MEDIALIB_OLD_NAMES ) -#define __mlib_ImageBlend_ZERO_SA mlib_ImageBlend_ZERO_SA -#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ -mlib_status __mlib_ImageBlend_ZERO_SA(mlib_image *dst, - const mlib_image *src1, - const mlib_image *src2, - mlib_s32 cmask); - -/* src1dst = src2 * ALPHAsrc2 */ - -#if defined ( __MEDIALIB_OLD_NAMES ) -#define __mlib_ImageBlend_ZERO_SA_Inp mlib_ImageBlend_ZERO_SA_Inp -#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ -mlib_status __mlib_ImageBlend_ZERO_SA_Inp(mlib_image *src1dst, - const mlib_image *src2, - mlib_s32 cmask); - -/* dst = src2 * (1 - ALPHAsrc2) */ - -#if defined ( __MEDIALIB_OLD_NAMES ) -#define __mlib_ImageBlend_ZERO_OMSA mlib_ImageBlend_ZERO_OMSA -#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ -mlib_status __mlib_ImageBlend_ZERO_OMSA(mlib_image *dst, - const mlib_image *src1, - const mlib_image *src2, - mlib_s32 cmask); - -/* src1dst = src2 * (1 - ALPHAsrc2) */ - -#if defined ( __MEDIALIB_OLD_NAMES ) -#define __mlib_ImageBlend_ZERO_OMSA_Inp mlib_ImageBlend_ZERO_OMSA_Inp -#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ -mlib_status __mlib_ImageBlend_ZERO_OMSA_Inp(mlib_image *src1dst, - const mlib_image *src2, - mlib_s32 cmask); - -/* dst = src2 * ALPHAsrc1 */ - -#if defined ( __MEDIALIB_OLD_NAMES ) -#define __mlib_ImageBlend_ZERO_DA mlib_ImageBlend_ZERO_DA -#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ -mlib_status __mlib_ImageBlend_ZERO_DA(mlib_image *dst, - const mlib_image *src1, - const mlib_image *src2, - mlib_s32 cmask); - -/* src1dst = src2 * ALPHAsrc1dst */ - -#if defined ( __MEDIALIB_OLD_NAMES ) -#define __mlib_ImageBlend_ZERO_DA_Inp mlib_ImageBlend_ZERO_DA_Inp -#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ -mlib_status __mlib_ImageBlend_ZERO_DA_Inp(mlib_image *src1dst, - const mlib_image *src2, - mlib_s32 cmask); - -/* dst = src2 * (1 - ALPHAsrc1) */ - -#if defined ( __MEDIALIB_OLD_NAMES ) -#define __mlib_ImageBlend_ZERO_OMDA mlib_ImageBlend_ZERO_OMDA -#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ -mlib_status __mlib_ImageBlend_ZERO_OMDA(mlib_image *dst, - const mlib_image *src1, - const mlib_image *src2, - mlib_s32 cmask); - -/* src1dst = src2 * (1 - ALPHAsrc1dst) */ - -#if defined ( __MEDIALIB_OLD_NAMES ) -#define __mlib_ImageBlend_ZERO_OMDA_Inp mlib_ImageBlend_ZERO_OMDA_Inp -#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ -mlib_status __mlib_ImageBlend_ZERO_OMDA_Inp(mlib_image *src1dst, - const mlib_image *src2, - mlib_s32 cmask); - -/* dst = src2 * (f, f, f, 1) */ - -#if defined ( __MEDIALIB_OLD_NAMES ) -#define __mlib_ImageBlend_ZERO_SAS mlib_ImageBlend_ZERO_SAS -#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ -mlib_status __mlib_ImageBlend_ZERO_SAS(mlib_image *dst, - const mlib_image *src1, - const mlib_image *src2, - mlib_s32 cmask); - -/* src1dst = src2 * (f, f, f, 1) */ - -#if defined ( __MEDIALIB_OLD_NAMES ) -#define __mlib_ImageBlend_ZERO_SAS_Inp mlib_ImageBlend_ZERO_SAS_Inp -#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ -mlib_status __mlib_ImageBlend_ZERO_SAS_Inp(mlib_image *src1dst, - const mlib_image *src2, - mlib_s32 cmask); - -/* dst = src1 */ - -#if defined ( __MEDIALIB_OLD_NAMES ) -#define __mlib_ImageBlend_ONE_ZERO mlib_ImageBlend_ONE_ZERO -#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ -mlib_status __mlib_ImageBlend_ONE_ZERO(mlib_image *dst, - const mlib_image *src1, - const mlib_image *src2, - mlib_s32 cmask); - -/* src1dst = src1dst */ - -#if defined ( __MEDIALIB_OLD_NAMES ) -#define __mlib_ImageBlend_ONE_ZERO_Inp mlib_ImageBlend_ONE_ZERO_Inp -#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ -mlib_status __mlib_ImageBlend_ONE_ZERO_Inp(mlib_image *src1dst, - const mlib_image *src2, - mlib_s32 cmask); - -/* dst = src1 + src2 */ - -#if defined ( __MEDIALIB_OLD_NAMES ) -#define __mlib_ImageBlend_ONE_ONE mlib_ImageBlend_ONE_ONE -#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ -mlib_status __mlib_ImageBlend_ONE_ONE(mlib_image *dst, - const mlib_image *src1, - const mlib_image *src2, - mlib_s32 cmask); - -/* src1dst = src1dst + src2 */ - -#if defined ( __MEDIALIB_OLD_NAMES ) -#define __mlib_ImageBlend_ONE_ONE_Inp mlib_ImageBlend_ONE_ONE_Inp -#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ -mlib_status __mlib_ImageBlend_ONE_ONE_Inp(mlib_image *src1dst, - const mlib_image *src2, - mlib_s32 cmask); - -/* dst = src1 * (1 + src2) */ - -#if defined ( __MEDIALIB_OLD_NAMES ) -#define __mlib_ImageBlend_ONE_DC mlib_ImageBlend_ONE_DC -#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ -mlib_status __mlib_ImageBlend_ONE_DC(mlib_image *dst, - const mlib_image *src1, - const mlib_image *src2, - mlib_s32 cmask); - -/* src1dst = src1dst * (1 + src2) */ - -#if defined ( __MEDIALIB_OLD_NAMES ) -#define __mlib_ImageBlend_ONE_DC_Inp mlib_ImageBlend_ONE_DC_Inp -#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ -mlib_status __mlib_ImageBlend_ONE_DC_Inp(mlib_image *src1dst, - const mlib_image *src2, - mlib_s32 cmask); - -/* dst = src2 + src1 * (1 - src2) */ - -#if defined ( __MEDIALIB_OLD_NAMES ) -#define __mlib_ImageBlend_ONE_OMDC mlib_ImageBlend_ONE_OMDC -#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ -mlib_status __mlib_ImageBlend_ONE_OMDC(mlib_image *dst, - const mlib_image *src1, - const mlib_image *src2, - mlib_s32 cmask); - -/* src1dst = src2 + src1dst * (1 - src2) */ - -#if defined ( __MEDIALIB_OLD_NAMES ) -#define __mlib_ImageBlend_ONE_OMDC_Inp mlib_ImageBlend_ONE_OMDC_Inp -#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ -mlib_status __mlib_ImageBlend_ONE_OMDC_Inp(mlib_image *src1dst, - const mlib_image *src2, - mlib_s32 cmask); - -/* dst = src1 + src2 * ALPHAsrc2 */ - -#if defined ( __MEDIALIB_OLD_NAMES ) -#define __mlib_ImageBlend_ONE_SA mlib_ImageBlend_ONE_SA -#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ -mlib_status __mlib_ImageBlend_ONE_SA(mlib_image *dst, - const mlib_image *src1, - const mlib_image *src2, - mlib_s32 cmask); - -/* src1dst = src1dst + src2 * ALPHAsrc2 */ - -#if defined ( __MEDIALIB_OLD_NAMES ) -#define __mlib_ImageBlend_ONE_SA_Inp mlib_ImageBlend_ONE_SA_Inp -#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ -mlib_status __mlib_ImageBlend_ONE_SA_Inp(mlib_image *src1dst, - const mlib_image *src2, - mlib_s32 cmask); - -/* dst = src1 + src2 * (1 - ALPHAsrc2) */ - -#if defined ( __MEDIALIB_OLD_NAMES ) -#define __mlib_ImageBlend_ONE_OMSA mlib_ImageBlend_ONE_OMSA -#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ -mlib_status __mlib_ImageBlend_ONE_OMSA(mlib_image *dst, - const mlib_image *src1, - const mlib_image *src2, - mlib_s32 cmask); - -/* src1dst = src1dst + src2 * (1 - ALPHAsrc2) */ - -#if defined ( __MEDIALIB_OLD_NAMES ) -#define __mlib_ImageBlend_ONE_OMSA_Inp mlib_ImageBlend_ONE_OMSA_Inp -#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ -mlib_status __mlib_ImageBlend_ONE_OMSA_Inp(mlib_image *src1dst, - const mlib_image *src2, - mlib_s32 cmask); - -/* dst = src1 + src2 * ALPHAsrc1 */ - -#if defined ( __MEDIALIB_OLD_NAMES ) -#define __mlib_ImageBlend_ONE_DA mlib_ImageBlend_ONE_DA -#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ -mlib_status __mlib_ImageBlend_ONE_DA(mlib_image *dst, - const mlib_image *src1, - const mlib_image *src2, - mlib_s32 cmask); - -/* src1dst = src1dst + src2 * ALPHAsrc1dst */ - -#if defined ( __MEDIALIB_OLD_NAMES ) -#define __mlib_ImageBlend_ONE_DA_Inp mlib_ImageBlend_ONE_DA_Inp -#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ -mlib_status __mlib_ImageBlend_ONE_DA_Inp(mlib_image *src1dst, - const mlib_image *src2, - mlib_s32 cmask); - -/* dst = src1 + src2 * (1 - ALPHAsrc1) */ - -#if defined ( __MEDIALIB_OLD_NAMES ) -#define __mlib_ImageBlend_ONE_OMDA mlib_ImageBlend_ONE_OMDA -#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ -mlib_status __mlib_ImageBlend_ONE_OMDA(mlib_image *dst, - const mlib_image *src1, - const mlib_image *src2, - mlib_s32 cmask); - -/* src1dst = src1dst + src2 * (1 - ALPHAsrc1dst) */ - -#if defined ( __MEDIALIB_OLD_NAMES ) -#define __mlib_ImageBlend_ONE_OMDA_Inp mlib_ImageBlend_ONE_OMDA_Inp -#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ -mlib_status __mlib_ImageBlend_ONE_OMDA_Inp(mlib_image *src1dst, - const mlib_image *src2, - mlib_s32 cmask); - -/* dst = src1 + src2 * (f, f, f, 1) */ - -#if defined ( __MEDIALIB_OLD_NAMES ) -#define __mlib_ImageBlend_ONE_SAS mlib_ImageBlend_ONE_SAS -#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ -mlib_status __mlib_ImageBlend_ONE_SAS(mlib_image *dst, - const mlib_image *src1, - const mlib_image *src2, - mlib_s32 cmask); - -/* src1dst = src1dst + src2 * (f, f, f, 1) */ - -#if defined ( __MEDIALIB_OLD_NAMES ) -#define __mlib_ImageBlend_ONE_SAS_Inp mlib_ImageBlend_ONE_SAS_Inp -#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ -mlib_status __mlib_ImageBlend_ONE_SAS_Inp(mlib_image *src1dst, - const mlib_image *src2, - mlib_s32 cmask); - -/* dst = src1 * src2 */ - -#if defined ( __MEDIALIB_OLD_NAMES ) -#define __mlib_ImageBlend_SC_ZERO mlib_ImageBlend_SC_ZERO -#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ -mlib_status __mlib_ImageBlend_SC_ZERO(mlib_image *dst, - const mlib_image *src1, - const mlib_image *src2, - mlib_s32 cmask); - -/* src1dst = src1dst * src2 */ - -#if defined ( __MEDIALIB_OLD_NAMES ) -#define __mlib_ImageBlend_SC_ZERO_Inp mlib_ImageBlend_SC_ZERO_Inp -#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ -mlib_status __mlib_ImageBlend_SC_ZERO_Inp(mlib_image *src1dst, - const mlib_image *src2, - mlib_s32 cmask); - -/* dst = (src1 + 1) * src2 */ - -#if defined ( __MEDIALIB_OLD_NAMES ) -#define __mlib_ImageBlend_SC_ONE mlib_ImageBlend_SC_ONE -#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ -mlib_status __mlib_ImageBlend_SC_ONE(mlib_image *dst, - const mlib_image *src1, - const mlib_image *src2, - mlib_s32 cmask); - -/* src1dst = (src1dst + 1) * src2 */ - -#if defined ( __MEDIALIB_OLD_NAMES ) -#define __mlib_ImageBlend_SC_ONE_Inp mlib_ImageBlend_SC_ONE_Inp -#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ -mlib_status __mlib_ImageBlend_SC_ONE_Inp(mlib_image *src1dst, - const mlib_image *src2, - mlib_s32 cmask); - -/* dst = 2 * src1 * src2 */ - -#if defined ( __MEDIALIB_OLD_NAMES ) -#define __mlib_ImageBlend_SC_DC mlib_ImageBlend_SC_DC -#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ -mlib_status __mlib_ImageBlend_SC_DC(mlib_image *dst, - const mlib_image *src1, - const mlib_image *src2, - mlib_s32 cmask); - -/* src1dst = 2 * src1dst * src2 */ - -#if defined ( __MEDIALIB_OLD_NAMES ) -#define __mlib_ImageBlend_SC_DC_Inp mlib_ImageBlend_SC_DC_Inp -#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ -mlib_status __mlib_ImageBlend_SC_DC_Inp(mlib_image *src1dst, - const mlib_image *src2, - mlib_s32 cmask); - -/* dst = src2 */ - -#if defined ( __MEDIALIB_OLD_NAMES ) -#define __mlib_ImageBlend_SC_OMDC mlib_ImageBlend_SC_OMDC -#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ -mlib_status __mlib_ImageBlend_SC_OMDC(mlib_image *dst, - const mlib_image *src1, - const mlib_image *src2, - mlib_s32 cmask); - -/* src1dst = src2 */ - -#if defined ( __MEDIALIB_OLD_NAMES ) -#define __mlib_ImageBlend_SC_OMDC_Inp mlib_ImageBlend_SC_OMDC_Inp -#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ -mlib_status __mlib_ImageBlend_SC_OMDC_Inp(mlib_image *src1dst, - const mlib_image *src2, - mlib_s32 cmask); - -/* dst = src2 * (src1 + ALPHAsrc2) */ - -#if defined ( __MEDIALIB_OLD_NAMES ) -#define __mlib_ImageBlend_SC_SA mlib_ImageBlend_SC_SA -#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ -mlib_status __mlib_ImageBlend_SC_SA(mlib_image *dst, - const mlib_image *src1, - const mlib_image *src2, - mlib_s32 cmask); - -/* src1dst = src2 * (src1dst + ALPHAsrc2) */ - -#if defined ( __MEDIALIB_OLD_NAMES ) -#define __mlib_ImageBlend_SC_SA_Inp mlib_ImageBlend_SC_SA_Inp -#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ -mlib_status __mlib_ImageBlend_SC_SA_Inp(mlib_image *src1dst, - const mlib_image *src2, - mlib_s32 cmask); - -/* dst = src2 * (1 - ALPHAsrc2 + src1) */ - -#if defined ( __MEDIALIB_OLD_NAMES ) -#define __mlib_ImageBlend_SC_OMSA mlib_ImageBlend_SC_OMSA -#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ -mlib_status __mlib_ImageBlend_SC_OMSA(mlib_image *dst, - const mlib_image *src1, - const mlib_image *src2, - mlib_s32 cmask); - -/* src1dst = src2 * (1 - ALPHAsrc2 + src1dst) */ - -#if defined ( __MEDIALIB_OLD_NAMES ) -#define __mlib_ImageBlend_SC_OMSA_Inp mlib_ImageBlend_SC_OMSA_Inp -#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ -mlib_status __mlib_ImageBlend_SC_OMSA_Inp(mlib_image *src1dst, - const mlib_image *src2, - mlib_s32 cmask); - -/* dst = src2 * (src1 + ALPHAsrc1) */ - -#if defined ( __MEDIALIB_OLD_NAMES ) -#define __mlib_ImageBlend_SC_DA mlib_ImageBlend_SC_DA -#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ -mlib_status __mlib_ImageBlend_SC_DA(mlib_image *dst, - const mlib_image *src1, - const mlib_image *src2, - mlib_s32 cmask); - -/* src1dst = src2 * (src1dst + ALPHAsrc1dst) */ - -#if defined ( __MEDIALIB_OLD_NAMES ) -#define __mlib_ImageBlend_SC_DA_Inp mlib_ImageBlend_SC_DA_Inp -#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ -mlib_status __mlib_ImageBlend_SC_DA_Inp(mlib_image *src1dst, - const mlib_image *src2, - mlib_s32 cmask); - -/* dst = src2 * (1 - ALPHAsrc1 + src1) */ - -#if defined ( __MEDIALIB_OLD_NAMES ) -#define __mlib_ImageBlend_SC_OMDA mlib_ImageBlend_SC_OMDA -#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ -mlib_status __mlib_ImageBlend_SC_OMDA(mlib_image *dst, - const mlib_image *src1, - const mlib_image *src2, - mlib_s32 cmask); - -/* src1dst = src2 * (1 - ALPHAsrc1dst + src1dst) */ - -#if defined ( __MEDIALIB_OLD_NAMES ) -#define __mlib_ImageBlend_SC_OMDA_Inp mlib_ImageBlend_SC_OMDA_Inp -#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ -mlib_status __mlib_ImageBlend_SC_OMDA_Inp(mlib_image *src1dst, - const mlib_image *src2, - mlib_s32 cmask); - -/* dst = src2 * ((f, f, f, 1) + src1) */ - -#if defined ( __MEDIALIB_OLD_NAMES ) -#define __mlib_ImageBlend_SC_SAS mlib_ImageBlend_SC_SAS -#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ -mlib_status __mlib_ImageBlend_SC_SAS(mlib_image *dst, - const mlib_image *src1, - const mlib_image *src2, - mlib_s32 cmask); - -/* src1dst = src2 * ((f, f, f, 1) + src1dst) */ - -#if defined ( __MEDIALIB_OLD_NAMES ) -#define __mlib_ImageBlend_SC_SAS_Inp mlib_ImageBlend_SC_SAS_Inp -#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ -mlib_status __mlib_ImageBlend_SC_SAS_Inp(mlib_image *src1dst, - const mlib_image *src2, - mlib_s32 cmask); - -/* dst = src1 * (1 - src2) */ - -#if defined ( __MEDIALIB_OLD_NAMES ) -#define __mlib_ImageBlend_OMSC_ZERO mlib_ImageBlend_OMSC_ZERO -#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ -mlib_status __mlib_ImageBlend_OMSC_ZERO(mlib_image *dst, - const mlib_image *src1, - const mlib_image *src2, - mlib_s32 cmask); - -/* src1dst = src1dst * (1 - src2) */ - -#if defined ( __MEDIALIB_OLD_NAMES ) -#define __mlib_ImageBlend_OMSC_ZERO_Inp mlib_ImageBlend_OMSC_ZERO_Inp -#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ -mlib_status __mlib_ImageBlend_OMSC_ZERO_Inp(mlib_image *src1dst, - const mlib_image *src2, - mlib_s32 cmask); - -/* dst = src1 + src2 * (1 - src1) */ - -#if defined ( __MEDIALIB_OLD_NAMES ) -#define __mlib_ImageBlend_OMSC_ONE mlib_ImageBlend_OMSC_ONE -#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ -mlib_status __mlib_ImageBlend_OMSC_ONE(mlib_image *dst, - const mlib_image *src1, - const mlib_image *src2, - mlib_s32 cmask); - -/* src1dst = src1dst + src2 * (1 - src1dst) */ - -#if defined ( __MEDIALIB_OLD_NAMES ) -#define __mlib_ImageBlend_OMSC_ONE_Inp mlib_ImageBlend_OMSC_ONE_Inp -#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ -mlib_status __mlib_ImageBlend_OMSC_ONE_Inp(mlib_image *src1dst, - const mlib_image *src2, - mlib_s32 cmask); - -/* dst = src1 */ - -#if defined ( __MEDIALIB_OLD_NAMES ) -#define __mlib_ImageBlend_OMSC_DC mlib_ImageBlend_OMSC_DC -#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ -mlib_status __mlib_ImageBlend_OMSC_DC(mlib_image *dst, - const mlib_image *src1, - const mlib_image *src2, - mlib_s32 cmask); - -/* src1dst = src1dst */ - -#if defined ( __MEDIALIB_OLD_NAMES ) -#define __mlib_ImageBlend_OMSC_DC_Inp mlib_ImageBlend_OMSC_DC_Inp -#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ -mlib_status __mlib_ImageBlend_OMSC_DC_Inp(mlib_image *src1dst, - const mlib_image *src2, - mlib_s32 cmask); - -/* dst = src1 + src2 - 2 * src1 * src2 */ - -#if defined ( __MEDIALIB_OLD_NAMES ) -#define __mlib_ImageBlend_OMSC_OMDC mlib_ImageBlend_OMSC_OMDC -#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ -mlib_status __mlib_ImageBlend_OMSC_OMDC(mlib_image *dst, - const mlib_image *src1, - const mlib_image *src2, - mlib_s32 cmask); - -/* src1dst = src1dst + src2 - 2 * src1dst * src2 */ - -#if defined ( __MEDIALIB_OLD_NAMES ) -#define __mlib_ImageBlend_OMSC_OMDC_Inp mlib_ImageBlend_OMSC_OMDC_Inp -#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ -mlib_status __mlib_ImageBlend_OMSC_OMDC_Inp(mlib_image *src1dst, - const mlib_image *src2, - mlib_s32 cmask); - -/* dst = src1 + src2 * (ALPHAsrc2 - src1) */ - -#if defined ( __MEDIALIB_OLD_NAMES ) -#define __mlib_ImageBlend_OMSC_SA mlib_ImageBlend_OMSC_SA -#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ -mlib_status __mlib_ImageBlend_OMSC_SA(mlib_image *dst, - const mlib_image *src1, - const mlib_image *src2, - mlib_s32 cmask); - -/* src1dst = src1dst + src2 * (ALPHAsrc2 - src1dst) */ - -#if defined ( __MEDIALIB_OLD_NAMES ) -#define __mlib_ImageBlend_OMSC_SA_Inp mlib_ImageBlend_OMSC_SA_Inp -#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ -mlib_status __mlib_ImageBlend_OMSC_SA_Inp(mlib_image *src1dst, - const mlib_image *src2, - mlib_s32 cmask); - -/* dst = src1 + src2 - src2 * (src1 + ALPHAsrc2) */ - -#if defined ( __MEDIALIB_OLD_NAMES ) -#define __mlib_ImageBlend_OMSC_OMSA mlib_ImageBlend_OMSC_OMSA -#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ -mlib_status __mlib_ImageBlend_OMSC_OMSA(mlib_image *dst, - const mlib_image *src1, - const mlib_image *src2, - mlib_s32 cmask); - -/* src1dst = src1dst + src2 - src2 * (src1dst + ALPHAsrc2) */ - -#if defined ( __MEDIALIB_OLD_NAMES ) -#define __mlib_ImageBlend_OMSC_OMSA_Inp mlib_ImageBlend_OMSC_OMSA_Inp -#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ -mlib_status __mlib_ImageBlend_OMSC_OMSA_Inp(mlib_image *src1dst, - const mlib_image *src2, - mlib_s32 cmask); - -/* dst = src1 + src2 * (ALPHAsrc1 - src1) */ - -#if defined ( __MEDIALIB_OLD_NAMES ) -#define __mlib_ImageBlend_OMSC_DA mlib_ImageBlend_OMSC_DA -#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ -mlib_status __mlib_ImageBlend_OMSC_DA(mlib_image *dst, - const mlib_image *src1, - const mlib_image *src2, - mlib_s32 cmask); - -/* src1dst = src1dst + src2 * (ALPHAsrc1dst - src1dst) */ - -#if defined ( __MEDIALIB_OLD_NAMES ) -#define __mlib_ImageBlend_OMSC_DA_Inp mlib_ImageBlend_OMSC_DA_Inp -#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ -mlib_status __mlib_ImageBlend_OMSC_DA_Inp(mlib_image *src1dst, - const mlib_image *src2, - mlib_s32 cmask); - -/* dst = src1 + src2 - src2 * (src1 + ALPHAsrc1) */ - -#if defined ( __MEDIALIB_OLD_NAMES ) -#define __mlib_ImageBlend_OMSC_OMDA mlib_ImageBlend_OMSC_OMDA -#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ -mlib_status __mlib_ImageBlend_OMSC_OMDA(mlib_image *dst, - const mlib_image *src1, - const mlib_image *src2, - mlib_s32 cmask); - -/* src1dst = src1dst + src2 - src2 * (src1dst + ALPHAsrc1dst) */ - -#if defined ( __MEDIALIB_OLD_NAMES ) -#define __mlib_ImageBlend_OMSC_OMDA_Inp mlib_ImageBlend_OMSC_OMDA_Inp -#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ -mlib_status __mlib_ImageBlend_OMSC_OMDA_Inp(mlib_image *src1dst, - const mlib_image *src2, - mlib_s32 cmask); - -/* dst = src1 + src2 * ((f, f, f, 1) - src1) */ - -#if defined ( __MEDIALIB_OLD_NAMES ) -#define __mlib_ImageBlend_OMSC_SAS mlib_ImageBlend_OMSC_SAS -#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ -mlib_status __mlib_ImageBlend_OMSC_SAS(mlib_image *dst, - const mlib_image *src1, - const mlib_image *src2, - mlib_s32 cmask); - -/* src1dst = src1dst + src2 * ((f, f, f, 1) - src1dst) */ - -#if defined ( __MEDIALIB_OLD_NAMES ) -#define __mlib_ImageBlend_OMSC_SAS_Inp mlib_ImageBlend_OMSC_SAS_Inp -#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ -mlib_status __mlib_ImageBlend_OMSC_SAS_Inp(mlib_image *src1dst, - const mlib_image *src2, - mlib_s32 cmask); - -/* dst = src1 * ALPHAsrc2 */ - -#if defined ( __MEDIALIB_OLD_NAMES ) -#define __mlib_ImageBlend_SA_ZERO mlib_ImageBlend_SA_ZERO -#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ -mlib_status __mlib_ImageBlend_SA_ZERO(mlib_image *dst, - const mlib_image *src1, - const mlib_image *src2, - mlib_s32 cmask); - -/* src1dst = src1dst * ALPHAsrc2 */ - -#if defined ( __MEDIALIB_OLD_NAMES ) -#define __mlib_ImageBlend_SA_ZERO_Inp mlib_ImageBlend_SA_ZERO_Inp -#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ -mlib_status __mlib_ImageBlend_SA_ZERO_Inp(mlib_image *src1dst, - const mlib_image *src2, - mlib_s32 cmask); - -/* dst = src1 * ALPHAsrc2 + src2 */ - -#if defined ( __MEDIALIB_OLD_NAMES ) -#define __mlib_ImageBlend_SA_ONE mlib_ImageBlend_SA_ONE -#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ -mlib_status __mlib_ImageBlend_SA_ONE(mlib_image *dst, - const mlib_image *src1, - const mlib_image *src2, - mlib_s32 cmask); - -/* src1dst = src1dst * ALPHAsrc2 + src2 */ - -#if defined ( __MEDIALIB_OLD_NAMES ) -#define __mlib_ImageBlend_SA_ONE_Inp mlib_ImageBlend_SA_ONE_Inp -#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ -mlib_status __mlib_ImageBlend_SA_ONE_Inp(mlib_image *src1dst, - const mlib_image *src2, - mlib_s32 cmask); - -/* dst = src1 * (ALPHAsrc2 + src2) */ - -#if defined ( __MEDIALIB_OLD_NAMES ) -#define __mlib_ImageBlend_SA_DC mlib_ImageBlend_SA_DC -#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ -mlib_status __mlib_ImageBlend_SA_DC(mlib_image *dst, - const mlib_image *src1, - const mlib_image *src2, - mlib_s32 cmask); - -/* src1dst = src1dst * (ALPHAsrc2 + src2) */ - -#if defined ( __MEDIALIB_OLD_NAMES ) -#define __mlib_ImageBlend_SA_DC_Inp mlib_ImageBlend_SA_DC_Inp -#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ -mlib_status __mlib_ImageBlend_SA_DC_Inp(mlib_image *src1dst, - const mlib_image *src2, - mlib_s32 cmask); - -/* dst = src1 * (ALPHAsrc2 - src2) + src2 */ - -#if defined ( __MEDIALIB_OLD_NAMES ) -#define __mlib_ImageBlend_SA_OMDC mlib_ImageBlend_SA_OMDC -#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ -mlib_status __mlib_ImageBlend_SA_OMDC(mlib_image *dst, - const mlib_image *src1, - const mlib_image *src2, - mlib_s32 cmask); - -/* src1dst = src1dst * (ALPHAsrc2 - src2) + src2 */ - -#if defined ( __MEDIALIB_OLD_NAMES ) -#define __mlib_ImageBlend_SA_OMDC_Inp mlib_ImageBlend_SA_OMDC_Inp -#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ -mlib_status __mlib_ImageBlend_SA_OMDC_Inp(mlib_image *src1dst, - const mlib_image *src2, - mlib_s32 cmask); - -/* dst = (src1 + src2) * ALPHAsrc2 */ - -#if defined ( __MEDIALIB_OLD_NAMES ) -#define __mlib_ImageBlend_SA_SA mlib_ImageBlend_SA_SA -#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ -mlib_status __mlib_ImageBlend_SA_SA(mlib_image *dst, - const mlib_image *src1, - const mlib_image *src2, - mlib_s32 cmask); - -/* src1dst = (src1dst + src2) * ALPHAsrc2 */ - -#if defined ( __MEDIALIB_OLD_NAMES ) -#define __mlib_ImageBlend_SA_SA_Inp mlib_ImageBlend_SA_SA_Inp -#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ -mlib_status __mlib_ImageBlend_SA_SA_Inp(mlib_image *src1dst, - const mlib_image *src2, - mlib_s32 cmask); - -/* dst = (src1 - src2) * ALPHAsrc2 + src2 */ - -#if defined ( __MEDIALIB_OLD_NAMES ) -#define __mlib_ImageBlend_SA_OMSA mlib_ImageBlend_SA_OMSA -#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ -mlib_status __mlib_ImageBlend_SA_OMSA(mlib_image *dst, - const mlib_image *src1, - const mlib_image *src2, - mlib_s32 cmask); - -/* src1dst = (src1dst - src2) * ALPHAsrc2 + src2 */ - -#if defined ( __MEDIALIB_OLD_NAMES ) -#define __mlib_ImageBlend_SA_OMSA_Inp mlib_ImageBlend_SA_OMSA_Inp -#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ -mlib_status __mlib_ImageBlend_SA_OMSA_Inp(mlib_image *src1dst, - const mlib_image *src2, - mlib_s32 cmask); - -/* dst = src1 * ALPHAsrc2 + src2 * ALPHAsrc1 */ - -#if defined ( __MEDIALIB_OLD_NAMES ) -#define __mlib_ImageBlend_SA_DA mlib_ImageBlend_SA_DA -#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ -mlib_status __mlib_ImageBlend_SA_DA(mlib_image *dst, - const mlib_image *src1, - const mlib_image *src2, - mlib_s32 cmask); - -/* src1dst = src1dst * ALPHAsrc2 + src2 * ALPHAsrc1dst */ - -#if defined ( __MEDIALIB_OLD_NAMES ) -#define __mlib_ImageBlend_SA_DA_Inp mlib_ImageBlend_SA_DA_Inp -#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ -mlib_status __mlib_ImageBlend_SA_DA_Inp(mlib_image *src1dst, - const mlib_image *src2, - mlib_s32 cmask); - -/* dst = src1 * ALPHAsrc2 + src2 * (1 - ALPHAsrc1) */ - -#if defined ( __MEDIALIB_OLD_NAMES ) -#define __mlib_ImageBlend_SA_OMDA mlib_ImageBlend_SA_OMDA -#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ -mlib_status __mlib_ImageBlend_SA_OMDA(mlib_image *dst, - const mlib_image *src1, - const mlib_image *src2, - mlib_s32 cmask); - -/* src1dst = src1dst * ALPHAsrc2 + src2 * (1 - ALPHAsrc1dst) */ - -#if defined ( __MEDIALIB_OLD_NAMES ) -#define __mlib_ImageBlend_SA_OMDA_Inp mlib_ImageBlend_SA_OMDA_Inp -#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ -mlib_status __mlib_ImageBlend_SA_OMDA_Inp(mlib_image *src1dst, - const mlib_image *src2, - mlib_s32 cmask); - -/* dst = src1 * ALPHAsrc2 + src2 * (f, f, f, 1) */ - -#if defined ( __MEDIALIB_OLD_NAMES ) -#define __mlib_ImageBlend_SA_SAS mlib_ImageBlend_SA_SAS -#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ -mlib_status __mlib_ImageBlend_SA_SAS(mlib_image *dst, - const mlib_image *src1, - const mlib_image *src2, - mlib_s32 cmask); - -/* src1dst = src1dst * ALPHAsrc2 + src2 * (f, f, f, 1) */ - -#if defined ( __MEDIALIB_OLD_NAMES ) -#define __mlib_ImageBlend_SA_SAS_Inp mlib_ImageBlend_SA_SAS_Inp -#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ -mlib_status __mlib_ImageBlend_SA_SAS_Inp(mlib_image *src1dst, - const mlib_image *src2, - mlib_s32 cmask); - -/* dst = src1 * (1 - ALPHAsrc2) */ - -#if defined ( __MEDIALIB_OLD_NAMES ) -#define __mlib_ImageBlend_OMSA_ZERO mlib_ImageBlend_OMSA_ZERO -#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ -mlib_status __mlib_ImageBlend_OMSA_ZERO(mlib_image *dst, - const mlib_image *src1, - const mlib_image *src2, - mlib_s32 cmask); - -/* src1dst = src1dst * (1 - ALPHAsrc2) */ - -#if defined ( __MEDIALIB_OLD_NAMES ) -#define __mlib_ImageBlend_OMSA_ZERO_Inp mlib_ImageBlend_OMSA_ZERO_Inp -#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ -mlib_status __mlib_ImageBlend_OMSA_ZERO_Inp(mlib_image *src1dst, - const mlib_image *src2, - mlib_s32 cmask); - -/* dst = src1 * (1 - ALPHAsrc2) + src2 */ - -#if defined ( __MEDIALIB_OLD_NAMES ) -#define __mlib_ImageBlend_OMSA_ONE mlib_ImageBlend_OMSA_ONE -#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ -mlib_status __mlib_ImageBlend_OMSA_ONE(mlib_image *dst, - const mlib_image *src1, - const mlib_image *src2, - mlib_s32 cmask); - -/* src1dst = src1dst * (1 - ALPHAsrc2) + src2 */ - -#if defined ( __MEDIALIB_OLD_NAMES ) -#define __mlib_ImageBlend_OMSA_ONE_Inp mlib_ImageBlend_OMSA_ONE_Inp -#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ -mlib_status __mlib_ImageBlend_OMSA_ONE_Inp(mlib_image *src1dst, - const mlib_image *src2, - mlib_s32 cmask); - -/* dst = src1 * (1 - ALPHAsrc2 + src2) */ - -#if defined ( __MEDIALIB_OLD_NAMES ) -#define __mlib_ImageBlend_OMSA_DC mlib_ImageBlend_OMSA_DC -#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ -mlib_status __mlib_ImageBlend_OMSA_DC(mlib_image *dst, - const mlib_image *src1, - const mlib_image *src2, - mlib_s32 cmask); - -/* src1dst = src1dst * (1 - ALPHAsrc2 + src2) */ - -#if defined ( __MEDIALIB_OLD_NAMES ) -#define __mlib_ImageBlend_OMSA_DC_Inp mlib_ImageBlend_OMSA_DC_Inp -#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ -mlib_status __mlib_ImageBlend_OMSA_DC_Inp(mlib_image *src1dst, - const mlib_image *src2, - mlib_s32 cmask); - -/* dst = src1 * (1 - ALPHAsrc2 - src2) + src2 */ - -#if defined ( __MEDIALIB_OLD_NAMES ) -#define __mlib_ImageBlend_OMSA_OMDC mlib_ImageBlend_OMSA_OMDC -#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ -mlib_status __mlib_ImageBlend_OMSA_OMDC(mlib_image *dst, - const mlib_image *src1, - const mlib_image *src2, - mlib_s32 cmask); - -/* src1dst = src1dst * (1 - ALPHAsrc2 - src2) + src2 */ - -#if defined ( __MEDIALIB_OLD_NAMES ) -#define __mlib_ImageBlend_OMSA_OMDC_Inp mlib_ImageBlend_OMSA_OMDC_Inp -#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ -mlib_status __mlib_ImageBlend_OMSA_OMDC_Inp(mlib_image *src1dst, - const mlib_image *src2, - mlib_s32 cmask); - -/* dst = src1 + (src2 - src1) * ALPHAsrc2 */ - -#if defined ( __MEDIALIB_OLD_NAMES ) -#define __mlib_ImageBlend_OMSA_SA mlib_ImageBlend_OMSA_SA -#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ -mlib_status __mlib_ImageBlend_OMSA_SA(mlib_image *dst, - const mlib_image *src1, - const mlib_image *src2, - mlib_s32 cmask); - -/* src1dst = src1dst + (src2 - src1dst) * ALPHAsrc2 */ - -#if defined ( __MEDIALIB_OLD_NAMES ) -#define __mlib_ImageBlend_OMSA_SA_Inp mlib_ImageBlend_OMSA_SA_Inp -#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ -mlib_status __mlib_ImageBlend_OMSA_SA_Inp(mlib_image *src1dst, - const mlib_image *src2, - mlib_s32 cmask); - -/* dst = (src1 + src2) * (1 - ALPHAsrc2) */ - -#if defined ( __MEDIALIB_OLD_NAMES ) -#define __mlib_ImageBlend_OMSA_OMSA mlib_ImageBlend_OMSA_OMSA -#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ -mlib_status __mlib_ImageBlend_OMSA_OMSA(mlib_image *dst, - const mlib_image *src1, - const mlib_image *src2, - mlib_s32 cmask); - -/* src1dst = (src1dst + src2) * (1 - ALPHAsrc2) */ - -#if defined ( __MEDIALIB_OLD_NAMES ) -#define __mlib_ImageBlend_OMSA_OMSA_Inp mlib_ImageBlend_OMSA_OMSA_Inp -#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ -mlib_status __mlib_ImageBlend_OMSA_OMSA_Inp(mlib_image *src1dst, - const mlib_image *src2, - mlib_s32 cmask); - -/* dst = src1 * (1 - ALPHAsrc2) + src2 * ALPHAsrc1 */ - -#if defined ( __MEDIALIB_OLD_NAMES ) -#define __mlib_ImageBlend_OMSA_DA mlib_ImageBlend_OMSA_DA -#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ -mlib_status __mlib_ImageBlend_OMSA_DA(mlib_image *dst, - const mlib_image *src1, - const mlib_image *src2, - mlib_s32 cmask); - -/* src1dst = src1dst * (1 - ALPHAsrc2) + src2 * ALPHAsrc1dst */ - -#if defined ( __MEDIALIB_OLD_NAMES ) -#define __mlib_ImageBlend_OMSA_DA_Inp mlib_ImageBlend_OMSA_DA_Inp -#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ -mlib_status __mlib_ImageBlend_OMSA_DA_Inp(mlib_image *src1dst, - const mlib_image *src2, - mlib_s32 cmask); - -/* dst = src1 * (1 - ALPHAsrc2) + src2 * (1 - ALPHAsrc1) */ - -#if defined ( __MEDIALIB_OLD_NAMES ) -#define __mlib_ImageBlend_OMSA_OMDA mlib_ImageBlend_OMSA_OMDA -#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ -mlib_status __mlib_ImageBlend_OMSA_OMDA(mlib_image *dst, - const mlib_image *src1, - const mlib_image *src2, - mlib_s32 cmask); - -/* src1dst = src1dst * (1 - ALPHAsrc2) + src2 * (1 - ALPHAsrc1dst) */ - -#if defined ( __MEDIALIB_OLD_NAMES ) -#define __mlib_ImageBlend_OMSA_OMDA_Inp mlib_ImageBlend_OMSA_OMDA_Inp -#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ -mlib_status __mlib_ImageBlend_OMSA_OMDA_Inp(mlib_image *src1dst, - const mlib_image *src2, - mlib_s32 cmask); - -/* dst = src1 * (1 - ALPHAsrc2) + src2 * (f, f, f, 1) */ - -#if defined ( __MEDIALIB_OLD_NAMES ) -#define __mlib_ImageBlend_OMSA_SAS mlib_ImageBlend_OMSA_SAS -#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ -mlib_status __mlib_ImageBlend_OMSA_SAS(mlib_image *dst, - const mlib_image *src1, - const mlib_image *src2, - mlib_s32 cmask); - -/* src1dst = src1dst * (1 - ALPHAsrc2) + src2 * (f, f, f, 1) */ - -#if defined ( __MEDIALIB_OLD_NAMES ) -#define __mlib_ImageBlend_OMSA_SAS_Inp mlib_ImageBlend_OMSA_SAS_Inp -#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ -mlib_status __mlib_ImageBlend_OMSA_SAS_Inp(mlib_image *src1dst, - const mlib_image *src2, - mlib_s32 cmask); - -/* dst = src1 * ALPHAsrc1 */ - -#if defined ( __MEDIALIB_OLD_NAMES ) -#define __mlib_ImageBlend_DA_ZERO mlib_ImageBlend_DA_ZERO -#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ -mlib_status __mlib_ImageBlend_DA_ZERO(mlib_image *dst, - const mlib_image *src1, - const mlib_image *src2, - mlib_s32 cmask); - -/* src1dst = src1dst * ALPHAsrc1dst */ - -#if defined ( __MEDIALIB_OLD_NAMES ) -#define __mlib_ImageBlend_DA_ZERO_Inp mlib_ImageBlend_DA_ZERO_Inp -#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ -mlib_status __mlib_ImageBlend_DA_ZERO_Inp(mlib_image *src1dst, - const mlib_image *src2, - mlib_s32 cmask); - -/* dst = src1 * ALPHAsrc1 + src2 */ - -#if defined ( __MEDIALIB_OLD_NAMES ) -#define __mlib_ImageBlend_DA_ONE mlib_ImageBlend_DA_ONE -#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ -mlib_status __mlib_ImageBlend_DA_ONE(mlib_image *dst, - const mlib_image *src1, - const mlib_image *src2, - mlib_s32 cmask); - -/* src1dst = src1dst * ALPHAsrc1dst + src2 */ - -#if defined ( __MEDIALIB_OLD_NAMES ) -#define __mlib_ImageBlend_DA_ONE_Inp mlib_ImageBlend_DA_ONE_Inp -#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ -mlib_status __mlib_ImageBlend_DA_ONE_Inp(mlib_image *src1dst, - const mlib_image *src2, - mlib_s32 cmask); - -/* dst = src1 * (ALPHAsrc1 + src2) */ - -#if defined ( __MEDIALIB_OLD_NAMES ) -#define __mlib_ImageBlend_DA_DC mlib_ImageBlend_DA_DC -#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ -mlib_status __mlib_ImageBlend_DA_DC(mlib_image *dst, - const mlib_image *src1, - const mlib_image *src2, - mlib_s32 cmask); - -/* src1dst = src1dst * (ALPHAsrc1dst + src2) */ - -#if defined ( __MEDIALIB_OLD_NAMES ) -#define __mlib_ImageBlend_DA_DC_Inp mlib_ImageBlend_DA_DC_Inp -#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ -mlib_status __mlib_ImageBlend_DA_DC_Inp(mlib_image *src1dst, - const mlib_image *src2, - mlib_s32 cmask); - -/* dst = src1 * (ALPHAsrc1 - src2) + src2 */ - -#if defined ( __MEDIALIB_OLD_NAMES ) -#define __mlib_ImageBlend_DA_OMDC mlib_ImageBlend_DA_OMDC -#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ -mlib_status __mlib_ImageBlend_DA_OMDC(mlib_image *dst, - const mlib_image *src1, - const mlib_image *src2, - mlib_s32 cmask); - -/* src1dst = src1dst * (ALPHAsrc1dst - src2) + src2 */ - -#if defined ( __MEDIALIB_OLD_NAMES ) -#define __mlib_ImageBlend_DA_OMDC_Inp mlib_ImageBlend_DA_OMDC_Inp -#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ -mlib_status __mlib_ImageBlend_DA_OMDC_Inp(mlib_image *src1dst, - const mlib_image *src2, - mlib_s32 cmask); - -/* dst = src1 * ALPHAsrc1 + src2 * ALPHAsrc2 */ - -#if defined ( __MEDIALIB_OLD_NAMES ) -#define __mlib_ImageBlend_DA_SA mlib_ImageBlend_DA_SA -#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ -mlib_status __mlib_ImageBlend_DA_SA(mlib_image *dst, - const mlib_image *src1, - const mlib_image *src2, - mlib_s32 cmask); - -/* src1dst = src1dst * ALPHAsrc1dst + src2 * ALPHAsrc2 */ - -#if defined ( __MEDIALIB_OLD_NAMES ) -#define __mlib_ImageBlend_DA_SA_Inp mlib_ImageBlend_DA_SA_Inp -#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ -mlib_status __mlib_ImageBlend_DA_SA_Inp(mlib_image *src1dst, - const mlib_image *src2, - mlib_s32 cmask); - -/* dst = src1 * ALPHAsrc1 + src2 * (1 - ALPHAsrc2) */ - -#if defined ( __MEDIALIB_OLD_NAMES ) -#define __mlib_ImageBlend_DA_OMSA mlib_ImageBlend_DA_OMSA -#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ -mlib_status __mlib_ImageBlend_DA_OMSA(mlib_image *dst, - const mlib_image *src1, - const mlib_image *src2, - mlib_s32 cmask); - -/* src1dst = src1dst * ALPHAsrc1dst + src2 * (1 - ALPHAsrc2) */ - -#if defined ( __MEDIALIB_OLD_NAMES ) -#define __mlib_ImageBlend_DA_OMSA_Inp mlib_ImageBlend_DA_OMSA_Inp -#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ -mlib_status __mlib_ImageBlend_DA_OMSA_Inp(mlib_image *src1dst, - const mlib_image *src2, - mlib_s32 cmask); - -/* dst = (src1 + src2) * ALPHAsrc1 */ - -#if defined ( __MEDIALIB_OLD_NAMES ) -#define __mlib_ImageBlend_DA_DA mlib_ImageBlend_DA_DA -#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ -mlib_status __mlib_ImageBlend_DA_DA(mlib_image *dst, - const mlib_image *src1, - const mlib_image *src2, - mlib_s32 cmask); - -/* src1dst = (src1dst + src2) * ALPHAsrc1dst */ - -#if defined ( __MEDIALIB_OLD_NAMES ) -#define __mlib_ImageBlend_DA_DA_Inp mlib_ImageBlend_DA_DA_Inp -#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ -mlib_status __mlib_ImageBlend_DA_DA_Inp(mlib_image *src1dst, - const mlib_image *src2, - mlib_s32 cmask); - -/* dst = (src1 - src2) * ALPHAsrc1 + src2 */ - -#if defined ( __MEDIALIB_OLD_NAMES ) -#define __mlib_ImageBlend_DA_OMDA mlib_ImageBlend_DA_OMDA -#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ -mlib_status __mlib_ImageBlend_DA_OMDA(mlib_image *dst, - const mlib_image *src1, - const mlib_image *src2, - mlib_s32 cmask); - -/* src1dst = (src1dst - src2) * ALPHAsrc1dst + src2 */ - -#if defined ( __MEDIALIB_OLD_NAMES ) -#define __mlib_ImageBlend_DA_OMDA_Inp mlib_ImageBlend_DA_OMDA_Inp -#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ -mlib_status __mlib_ImageBlend_DA_OMDA_Inp(mlib_image *src1dst, - const mlib_image *src2, - mlib_s32 cmask); - -/* dst = src1 * ALPHAsrc1 + src2 * (f, f, f, 1) */ - -#if defined ( __MEDIALIB_OLD_NAMES ) -#define __mlib_ImageBlend_DA_SAS mlib_ImageBlend_DA_SAS -#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ -mlib_status __mlib_ImageBlend_DA_SAS(mlib_image *dst, - const mlib_image *src1, - const mlib_image *src2, - mlib_s32 cmask); - -/* src1dst = src1dst * ALPHAsrc1dst + src2 * (f, f, f, 1) */ - -#if defined ( __MEDIALIB_OLD_NAMES ) -#define __mlib_ImageBlend_DA_SAS_Inp mlib_ImageBlend_DA_SAS_Inp -#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ -mlib_status __mlib_ImageBlend_DA_SAS_Inp(mlib_image *src1dst, - const mlib_image *src2, - mlib_s32 cmask); - -/* dst = src1 * (1 - ALPHAsrc1) */ - -#if defined ( __MEDIALIB_OLD_NAMES ) -#define __mlib_ImageBlend_OMDA_ZERO mlib_ImageBlend_OMDA_ZERO -#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ -mlib_status __mlib_ImageBlend_OMDA_ZERO(mlib_image *dst, - const mlib_image *src1, - const mlib_image *src2, - mlib_s32 cmask); - -/* src1dst = src1dst * (1 - ALPHAsrc1dst) */ - -#if defined ( __MEDIALIB_OLD_NAMES ) -#define __mlib_ImageBlend_OMDA_ZERO_Inp mlib_ImageBlend_OMDA_ZERO_Inp -#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ -mlib_status __mlib_ImageBlend_OMDA_ZERO_Inp(mlib_image *src1dst, - const mlib_image *src2, - mlib_s32 cmask); - -/* dst = src1 * (1 - ALPHAsrc1) + src2 */ - -#if defined ( __MEDIALIB_OLD_NAMES ) -#define __mlib_ImageBlend_OMDA_ONE mlib_ImageBlend_OMDA_ONE -#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ -mlib_status __mlib_ImageBlend_OMDA_ONE(mlib_image *dst, - const mlib_image *src1, - const mlib_image *src2, - mlib_s32 cmask); - -/* src1dst = src1dst * (1 - ALPHAsrc1dst) + src2 */ - -#if defined ( __MEDIALIB_OLD_NAMES ) -#define __mlib_ImageBlend_OMDA_ONE_Inp mlib_ImageBlend_OMDA_ONE_Inp -#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ -mlib_status __mlib_ImageBlend_OMDA_ONE_Inp(mlib_image *src1dst, - const mlib_image *src2, - mlib_s32 cmask); - -/* dst = src1 * (1 - ALPHAsrc1 + src2) */ - -#if defined ( __MEDIALIB_OLD_NAMES ) -#define __mlib_ImageBlend_OMDA_DC mlib_ImageBlend_OMDA_DC -#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ -mlib_status __mlib_ImageBlend_OMDA_DC(mlib_image *dst, - const mlib_image *src1, - const mlib_image *src2, - mlib_s32 cmask); - -/* src1dst = src1dst * (1 - ALPHAsrc1dst + src2) */ - -#if defined ( __MEDIALIB_OLD_NAMES ) -#define __mlib_ImageBlend_OMDA_DC_Inp mlib_ImageBlend_OMDA_DC_Inp -#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ -mlib_status __mlib_ImageBlend_OMDA_DC_Inp(mlib_image *src1dst, - const mlib_image *src2, - mlib_s32 cmask); - -/* dst = src1 * (1 - ALPHAsrc1 - src2) + src2 */ - -#if defined ( __MEDIALIB_OLD_NAMES ) -#define __mlib_ImageBlend_OMDA_OMDC mlib_ImageBlend_OMDA_OMDC -#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ -mlib_status __mlib_ImageBlend_OMDA_OMDC(mlib_image *dst, - const mlib_image *src1, - const mlib_image *src2, - mlib_s32 cmask); - -/* src1dst = src1dst * (1 - ALPHAsrc1dst - src2) + src2 */ - -#if defined ( __MEDIALIB_OLD_NAMES ) -#define __mlib_ImageBlend_OMDA_OMDC_Inp mlib_ImageBlend_OMDA_OMDC_Inp -#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ -mlib_status __mlib_ImageBlend_OMDA_OMDC_Inp(mlib_image *src1dst, - const mlib_image *src2, - mlib_s32 cmask); - -/* dst = src1 * (1 - ALPHAsrc1) + src2 * ALPHAsrc2 */ - -#if defined ( __MEDIALIB_OLD_NAMES ) -#define __mlib_ImageBlend_OMDA_SA mlib_ImageBlend_OMDA_SA -#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ -mlib_status __mlib_ImageBlend_OMDA_SA(mlib_image *dst, - const mlib_image *src1, - const mlib_image *src2, - mlib_s32 cmask); - -/* src1dst = src1dst * (1 - ALPHAsrc1dst) + src2 * ALPHAsrc2 */ - -#if defined ( __MEDIALIB_OLD_NAMES ) -#define __mlib_ImageBlend_OMDA_SA_Inp mlib_ImageBlend_OMDA_SA_Inp -#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ -mlib_status __mlib_ImageBlend_OMDA_SA_Inp(mlib_image *src1dst, - const mlib_image *src2, - mlib_s32 cmask); - -/* dst = src1 * (1 - ALPHAsrc1) + src2 * (1 - ALPHAsrc2) */ - -#if defined ( __MEDIALIB_OLD_NAMES ) -#define __mlib_ImageBlend_OMDA_OMSA mlib_ImageBlend_OMDA_OMSA -#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ -mlib_status __mlib_ImageBlend_OMDA_OMSA(mlib_image *dst, - const mlib_image *src1, - const mlib_image *src2, - mlib_s32 cmask); - -/* src1dst = src1dst * (1 - ALPHAsrc1dst) + src2 * (1 - ALPHAsrc2) */ - -#if defined ( __MEDIALIB_OLD_NAMES ) -#define __mlib_ImageBlend_OMDA_OMSA_Inp mlib_ImageBlend_OMDA_OMSA_Inp -#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ -mlib_status __mlib_ImageBlend_OMDA_OMSA_Inp(mlib_image *src1dst, - const mlib_image *src2, - mlib_s32 cmask); - -/* dst = src1 + (src2 - src1) * ALPHAsrc1 */ - -#if defined ( __MEDIALIB_OLD_NAMES ) -#define __mlib_ImageBlend_OMDA_DA mlib_ImageBlend_OMDA_DA -#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ -mlib_status __mlib_ImageBlend_OMDA_DA(mlib_image *dst, - const mlib_image *src1, - const mlib_image *src2, - mlib_s32 cmask); - -/* src1dst = src1dst + (src2 - src1dst) * ALPHAsrc1dst */ - -#if defined ( __MEDIALIB_OLD_NAMES ) -#define __mlib_ImageBlend_OMDA_DA_Inp mlib_ImageBlend_OMDA_DA_Inp -#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ -mlib_status __mlib_ImageBlend_OMDA_DA_Inp(mlib_image *src1dst, - const mlib_image *src2, - mlib_s32 cmask); - -/* dst = (src1 + src2) * (1 - ALPHAsrc1) */ - -#if defined ( __MEDIALIB_OLD_NAMES ) -#define __mlib_ImageBlend_OMDA_OMDA mlib_ImageBlend_OMDA_OMDA -#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ -mlib_status __mlib_ImageBlend_OMDA_OMDA(mlib_image *dst, - const mlib_image *src1, - const mlib_image *src2, - mlib_s32 cmask); - -/* src1dst = (src1dst + src2) * (1 - ALPHAsrc1dst) */ - -#if defined ( __MEDIALIB_OLD_NAMES ) -#define __mlib_ImageBlend_OMDA_OMDA_Inp mlib_ImageBlend_OMDA_OMDA_Inp -#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ -mlib_status __mlib_ImageBlend_OMDA_OMDA_Inp(mlib_image *src1dst, - const mlib_image *src2, - mlib_s32 cmask); - -/* dst = src1 * (1 - ALPHAsrc1) + src2 * (f, f, f, 1) */ - -#if defined ( __MEDIALIB_OLD_NAMES ) -#define __mlib_ImageBlend_OMDA_SAS mlib_ImageBlend_OMDA_SAS -#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ -mlib_status __mlib_ImageBlend_OMDA_SAS(mlib_image *dst, - const mlib_image *src1, - const mlib_image *src2, - mlib_s32 cmask); - -/* src1dst = src1dst * (1 - ALPHAsrc1dst) + src2 * (f, f, f, 1) */ - -#if defined ( __MEDIALIB_OLD_NAMES ) -#define __mlib_ImageBlend_OMDA_SAS_Inp mlib_ImageBlend_OMDA_SAS_Inp -#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ -mlib_status __mlib_ImageBlend_OMDA_SAS_Inp(mlib_image *src1dst, - const mlib_image *src2, - mlib_s32 cmask); - - - -#if defined ( __MEDIALIB_OLD_NAMES ) -#define __mlib_ImageComposite mlib_ImageComposite -#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ -mlib_status __mlib_ImageComposite(mlib_image *dst, - const mlib_image *src1, - const mlib_image *src2, - mlib_blend bsrc1, - mlib_blend bsrc2, - mlib_s32 cmask); - - -#if defined ( __MEDIALIB_OLD_NAMES ) -#define __mlib_ImageComposite_Inp mlib_ImageComposite_Inp -#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ -mlib_status __mlib_ImageComposite_Inp(mlib_image *src1dst, - const mlib_image *src2, - mlib_blend bsrc1, - mlib_blend bsrc2, - mlib_s32 cmask); - -#ifdef __cplusplus -} -#endif /* __cplusplus */ -#endif /* __ORIG_MLIB_IMAGE_BLEND_PROTO_H */ diff -r 3e22d8fd4912 -r acea5f7d354b jdk/src/java.desktop/share/native/libmlib_image/mlib_image_proto.h --- a/jdk/src/java.desktop/share/native/libmlib_image/mlib_image_proto.h Thu May 12 11:03:07 2016 -0700 +++ b/jdk/src/java.desktop/share/native/libmlib_image/mlib_image_proto.h Fri May 13 11:31:05 2016 +0300 @@ -1430,17 +1430,6 @@ #if defined ( __MEDIALIB_OLD_NAMES ) -#define __mlib_ImageAffineIndex mlib_ImageAffineIndex -#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ -mlib_status __mlib_ImageAffineIndex(mlib_image *dst, - const mlib_image *src, - const mlib_d64 *mtx, - mlib_filter filter, - mlib_edge edge, - const void *colormap); - - -#if defined ( __MEDIALIB_OLD_NAMES ) #define __mlib_ImageAffineTable mlib_ImageAffineTable #endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ mlib_status __mlib_ImageAffineTable(mlib_image *dst, @@ -1471,17 +1460,6 @@ #if defined ( __MEDIALIB_OLD_NAMES ) -#define __mlib_ImageAffineTransformIndex mlib_ImageAffineTransformIndex -#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ -mlib_status __mlib_ImageAffineTransformIndex(mlib_image *dst, - const mlib_image *src, - const mlib_d64 *mtx, - mlib_filter filter, - mlib_edge edge, - const void *colormap); - - -#if defined ( __MEDIALIB_OLD_NAMES ) #define __mlib_ImageAffineTransform_Fp mlib_ImageAffineTransform_Fp #endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ mlib_status __mlib_ImageAffineTransform_Fp(mlib_image *dst, @@ -1825,19 +1803,6 @@ #if defined ( __MEDIALIB_OLD_NAMES ) -#define __mlib_ImageRotateIndex mlib_ImageRotateIndex -#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ -mlib_status __mlib_ImageRotateIndex(mlib_image *dst, - const mlib_image *src, - mlib_d64 angle, - mlib_d64 xcenter, - mlib_d64 ycenter, - mlib_filter filter, - mlib_edge edge, - const void *colormap); - - -#if defined ( __MEDIALIB_OLD_NAMES ) #define __mlib_ImageRotate_Fp mlib_ImageRotate_Fp #endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ mlib_status __mlib_ImageRotate_Fp(mlib_image *dst, @@ -1887,16 +1852,6 @@ #if defined ( __MEDIALIB_OLD_NAMES ) -#define __mlib_ImageZoomIn2XIndex mlib_ImageZoomIn2XIndex -#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ -mlib_status __mlib_ImageZoomIn2XIndex(mlib_image *dst, - const mlib_image *src, - mlib_filter filter, - mlib_edge edge, - const void *colormap); - - -#if defined ( __MEDIALIB_OLD_NAMES ) #define __mlib_ImageZoomIn2X_Fp mlib_ImageZoomIn2X_Fp #endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ mlib_status __mlib_ImageZoomIn2X_Fp(mlib_image *dst, @@ -1915,16 +1870,6 @@ #if defined ( __MEDIALIB_OLD_NAMES ) -#define __mlib_ImageZoomOut2XIndex mlib_ImageZoomOut2XIndex -#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ -mlib_status __mlib_ImageZoomOut2XIndex(mlib_image *dst, - const mlib_image *src, - mlib_filter filter, - mlib_edge edge, - const void *colormap); - - -#if defined ( __MEDIALIB_OLD_NAMES ) #define __mlib_ImageZoomOut2X_Fp mlib_ImageZoomOut2X_Fp #endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ mlib_status __mlib_ImageZoomOut2X_Fp(mlib_image *dst, @@ -2012,18 +1957,6 @@ #if defined ( __MEDIALIB_OLD_NAMES ) -#define __mlib_ImageZoomIndex mlib_ImageZoomIndex -#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ -mlib_status __mlib_ImageZoomIndex(mlib_image *dst, - const mlib_image *src, - mlib_d64 zoomx, - mlib_d64 zoomy, - mlib_filter filter, - mlib_edge edge, - const void *colormap); - - -#if defined ( __MEDIALIB_OLD_NAMES ) #define __mlib_ImageZoom_Fp mlib_ImageZoom_Fp #endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ mlib_status __mlib_ImageZoom_Fp(mlib_image *dst, @@ -2312,98 +2245,6 @@ #if defined ( __MEDIALIB_OLD_NAMES ) -#define __mlib_ImageColorTrue2IndexInit mlib_ImageColorTrue2IndexInit -#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ -mlib_status __mlib_ImageColorTrue2IndexInit(void **colormap, - mlib_s32 bits, - mlib_type intype, - mlib_type outtype, - mlib_s32 channels, - mlib_s32 entries, - mlib_s32 offset, - const void **table); - - -#if defined ( __MEDIALIB_OLD_NAMES ) -#define __mlib_ImageColorTrue2Index mlib_ImageColorTrue2Index -#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ -mlib_status __mlib_ImageColorTrue2Index(mlib_image *dst, - const mlib_image *src, - const void *colormap); - - -#if defined ( __MEDIALIB_OLD_NAMES ) -#define __mlib_ImageColorTrue2IndexFree mlib_ImageColorTrue2IndexFree -#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ -void __mlib_ImageColorTrue2IndexFree(void *colormap); - - -#if defined ( __MEDIALIB_OLD_NAMES ) -#define __mlib_ImageColorDitherInit mlib_ImageColorDitherInit -#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ -mlib_status __mlib_ImageColorDitherInit(void **colormap, - const mlib_s32 *dimensions, - mlib_type intype, - mlib_type outtype, - mlib_s32 channels, - mlib_s32 entries, - mlib_s32 offset, - void **lut); - - -#if defined ( __MEDIALIB_OLD_NAMES ) -#define __mlib_ImageColorDitherFree mlib_ImageColorDitherFree -#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ -void __mlib_ImageColorDitherFree(void *colormap); - - -#if defined ( __MEDIALIB_OLD_NAMES ) -#define __mlib_ImageColorErrorDiffusion3x3 mlib_ImageColorErrorDiffusion3x3 -#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ -mlib_status __mlib_ImageColorErrorDiffusion3x3(mlib_image *dst, - const mlib_image *src, - const mlib_s32 *kernel, - mlib_s32 scale, - const void *colormap); - - -#if defined ( __MEDIALIB_OLD_NAMES ) -#define __mlib_ImageColorErrorDiffusionMxN mlib_ImageColorErrorDiffusionMxN -#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ -mlib_status __mlib_ImageColorErrorDiffusionMxN(mlib_image *dst, - const mlib_image *src, - const mlib_s32 *kernel, - mlib_s32 m, - mlib_s32 n, - mlib_s32 dm, - mlib_s32 dn, - mlib_s32 scale, - const void *colormap); - - -#if defined ( __MEDIALIB_OLD_NAMES ) -#define __mlib_ImageColorOrderedDither8x8 mlib_ImageColorOrderedDither8x8 -#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ -mlib_status __mlib_ImageColorOrderedDither8x8(mlib_image *dst, - const mlib_image *src, - const mlib_s32 *dmask, - mlib_s32 scale, - const void *colormap); - - -#if defined ( __MEDIALIB_OLD_NAMES ) -#define __mlib_ImageColorOrderedDitherMxN mlib_ImageColorOrderedDitherMxN -#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ -mlib_status __mlib_ImageColorOrderedDitherMxN(mlib_image *dst, - const mlib_image *src, - const mlib_s32 **dmask, - mlib_s32 m, - mlib_s32 n, - mlib_s32 scale, - const void *colormap); - - -#if defined ( __MEDIALIB_OLD_NAMES ) #define __mlib_ImageColorReplace mlib_ImageColorReplace #endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ mlib_status __mlib_ImageColorReplace(mlib_image *dst, @@ -2694,17 +2535,6 @@ #if defined ( __MEDIALIB_OLD_NAMES ) -#define __mlib_ImageConv2x2Index mlib_ImageConv2x2Index -#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ -mlib_status __mlib_ImageConv2x2Index(mlib_image *dst, - const mlib_image *src, - const mlib_s32 *kernel, - mlib_s32 scale, - mlib_edge edge, - const void *colormap); - - -#if defined ( __MEDIALIB_OLD_NAMES ) #define __mlib_ImageConv2x2_Fp mlib_ImageConv2x2_Fp #endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ mlib_status __mlib_ImageConv2x2_Fp(mlib_image *dst, @@ -2726,17 +2556,6 @@ #if defined ( __MEDIALIB_OLD_NAMES ) -#define __mlib_ImageConv3x3Index mlib_ImageConv3x3Index -#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ -mlib_status __mlib_ImageConv3x3Index(mlib_image *dst, - const mlib_image *src, - const mlib_s32 *kernel, - mlib_s32 scale, - mlib_edge edge, - const void *colormap); - - -#if defined ( __MEDIALIB_OLD_NAMES ) #define __mlib_ImageConv3x3_Fp mlib_ImageConv3x3_Fp #endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ mlib_status __mlib_ImageConv3x3_Fp(mlib_image *dst, @@ -2758,17 +2577,6 @@ #if defined ( __MEDIALIB_OLD_NAMES ) -#define __mlib_ImageConv4x4Index mlib_ImageConv4x4Index -#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ -mlib_status __mlib_ImageConv4x4Index(mlib_image *dst, - const mlib_image *src, - const mlib_s32 *kernel, - mlib_s32 scale, - mlib_edge edge, - const void *colormap); - - -#if defined ( __MEDIALIB_OLD_NAMES ) #define __mlib_ImageConv4x4_Fp mlib_ImageConv4x4_Fp #endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ mlib_status __mlib_ImageConv4x4_Fp(mlib_image *dst, @@ -2790,17 +2598,6 @@ #if defined ( __MEDIALIB_OLD_NAMES ) -#define __mlib_ImageConv5x5Index mlib_ImageConv5x5Index -#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ -mlib_status __mlib_ImageConv5x5Index(mlib_image *dst, - const mlib_image *src, - const mlib_s32 *kernel, - mlib_s32 scale, - mlib_edge edge, - const void *colormap); - - -#if defined ( __MEDIALIB_OLD_NAMES ) #define __mlib_ImageConv5x5_Fp mlib_ImageConv5x5_Fp #endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ mlib_status __mlib_ImageConv5x5_Fp(mlib_image *dst, @@ -2822,17 +2619,6 @@ #if defined ( __MEDIALIB_OLD_NAMES ) -#define __mlib_ImageConv7x7Index mlib_ImageConv7x7Index -#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ -mlib_status __mlib_ImageConv7x7Index(mlib_image *dst, - const mlib_image *src, - const mlib_s32 *kernel, - mlib_s32 scale, - mlib_edge edge, - const void *colormap); - - -#if defined ( __MEDIALIB_OLD_NAMES ) #define __mlib_ImageConv7x7_Fp mlib_ImageConv7x7_Fp #endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ mlib_status __mlib_ImageConv7x7_Fp(mlib_image *dst, @@ -2871,21 +2657,6 @@ #if defined ( __MEDIALIB_OLD_NAMES ) -#define __mlib_ImageConvMxNIndex mlib_ImageConvMxNIndex -#endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ -mlib_status __mlib_ImageConvMxNIndex(mlib_image *dst, - const mlib_image *src, - const mlib_s32 *kernel, - mlib_s32 m, - mlib_s32 n, - mlib_s32 dm, - mlib_s32 dn, - mlib_s32 scale, - mlib_edge edge, - const void *colormap); - - -#if defined ( __MEDIALIB_OLD_NAMES ) #define __mlib_ImageConvMxN_Fp mlib_ImageConvMxN_Fp #endif /* ! defined ( __MEDIALIB_OLD_NAMES ) */ mlib_status __mlib_ImageConvMxN_Fp(mlib_image *dst, diff -r 3e22d8fd4912 -r acea5f7d354b jdk/src/java.desktop/unix/native/libawt/awt/medialib/mlib_v_ImageCopy.c --- a/jdk/src/java.desktop/unix/native/libawt/awt/medialib/mlib_v_ImageCopy.c Thu May 12 11:03:07 2016 -0700 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,193 +0,0 @@ -/* - * Copyright (c) 2000, 2003, Oracle and/or its affiliates. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 only, as - * published by the Free Software Foundation. Oracle designates this - * particular file as subject to the "Classpath" exception as provided - * by Oracle in the LICENSE file that accompanied this code. - * - * This code is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * version 2 for more details (a copy is included in the LICENSE file that - * accompanied this code). - * - * You should have received a copy of the GNU General Public License version - * 2 along with this work; if not, write to the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - * - * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA - * or visit www.oracle.com if you need additional information or have any - * questions. - */ - - - -/* - * FUNCTION - * mlib_ImageCopy - Direct copy from one image to another. - * - * SYNOPSIS - * mlib_status mlib_ImageCopy(mlib_image *dst, - * mlib_image *src); - * - * ARGUMENT - * dst pointer to output or destination image - * src pointer to input or source image - * - * RESTRICTION - * src and dst must have the same size, type and number of channels. - * They can have 1, 2, 3 or 4 channels of MLIB_BIT, MLIB_BYTE, - * MLIB_SHORT, MLIB_INT, MLIB_FLOAT or MLIB_DOUBLE data type. - * - * DESCRIPTION - * Direct copy from one image to another. - */ - -#include -#include "mlib_image.h" -#include "mlib_ImageCheck.h" - -/***************************************************************/ - -extern void mlib_v_ImageCopy_blk(mlib_u8 *sa, mlib_u8 *da, mlib_s32 size); -extern void mlib_v_ImageCopy_a1(mlib_d64 *sp, mlib_d64 *dp, mlib_s32 size); -extern void mlib_ImageCopy_na(mlib_u8 *sa, mlib_u8 *da, mlib_s32 size); -extern void mlib_ImageCopy_bit_al(mlib_u8 *sa, mlib_u8 *da, - mlib_s32 size, mlib_s32 offset); -extern void mlib_ImageCopy_bit_na(mlib_u8 *sa, mlib_u8 *da, mlib_s32 size, - mlib_s32 s_offset, mlib_s32 d_offset); - -/***************************************************************/ - -#ifdef MLIB_TEST - -mlib_status mlib_v_ImageCopy(mlib_image *dst, mlib_image *src) - -#else - -mlib_status mlib_ImageCopy(mlib_image *dst, const mlib_image *src) - -#endif -{ - mlib_u8 *sa; /* start point in source */ - mlib_u8 *da; /* start points in destination */ - mlib_s32 width; /* width in bytes of src and dst */ - mlib_s32 height; /* height in lines of src and dst */ - mlib_s32 s_offset; /* bit offset of src */ - mlib_s32 d_offset; /* bit offset of dst */ - mlib_s32 stride; /* stride in bytes in src*/ - mlib_s32 dstride; /* stride in bytes in dst */ - mlib_s32 j; /* indices for x, y */ - mlib_s32 size; - - MLIB_IMAGE_CHECK(src); - MLIB_IMAGE_CHECK(dst); - MLIB_IMAGE_TYPE_EQUAL(src, dst); - MLIB_IMAGE_CHAN_EQUAL(src, dst); - MLIB_IMAGE_SIZE_EQUAL(src, dst); - - width = mlib_ImageGetWidth(dst) * mlib_ImageGetChannels(dst); - height = mlib_ImageGetHeight(dst); - sa = (mlib_u8 *)mlib_ImageGetData(src); - da = (mlib_u8 *)mlib_ImageGetData(dst); - - switch (mlib_ImageGetType(dst)) { - case MLIB_BIT: - - if (!mlib_ImageIsNotOneDvector(src) && - !mlib_ImageIsNotOneDvector(dst)) { - size = height * (width >> 3); - if ((size & 0x3f) == 0 && - !mlib_ImageIsNotAligned64(src) && - !mlib_ImageIsNotAligned64(dst)) { - - mlib_v_ImageCopy_blk(sa, da, size); - return MLIB_SUCCESS; - } - if (((size & 7) == 0) && !mlib_ImageIsNotAligned8(src) && - !mlib_ImageIsNotAligned8(dst)) { - - size >>= 3; /* in octlet */ - mlib_v_ImageCopy_a1((mlib_d64 *)sa, (mlib_d64 *)da, size); - } - else { - - mlib_ImageCopy_na(sa, da, size); - } - } - else { - stride = mlib_ImageGetStride(src); /* in byte */ - dstride = mlib_ImageGetStride(dst); /* in byte */ - s_offset = mlib_ImageGetBitOffset(src); /* in bits */ - d_offset = mlib_ImageGetBitOffset(dst); /* in bits */ - - if (s_offset == d_offset) { - for (j = 0; j < height; j++) { - mlib_ImageCopy_bit_al(sa, da, width, s_offset); - sa += stride; - da += dstride; - } - } else { - for (j = 0; j < height; j++) { - mlib_ImageCopy_bit_na(sa, da, width, s_offset, d_offset); - sa += stride; - da += dstride; - } - } - } - return MLIB_SUCCESS; - case MLIB_BYTE: - break; - case MLIB_SHORT: - width *= 2; - break; - case MLIB_INT: - case MLIB_FLOAT: - width *= 4; - break; - case MLIB_DOUBLE: - width *= 8; - break; - default: - return MLIB_FAILURE; - } - - if (!mlib_ImageIsNotOneDvector(src) && - !mlib_ImageIsNotOneDvector(dst)) { - size = height * width; - if ((size & 0x3f) == 0 && - !mlib_ImageIsNotAligned64(src) && - !mlib_ImageIsNotAligned64(dst)) { - - mlib_v_ImageCopy_blk(sa, da, size); - return MLIB_SUCCESS; - } - if (((size & 7) == 0) && !mlib_ImageIsNotAligned8(src) && - !mlib_ImageIsNotAligned8(dst)) { - - size >>= 3; /* in octlet */ - mlib_v_ImageCopy_a1((mlib_d64 *)sa, (mlib_d64 *)da, size); - } - else { - - mlib_ImageCopy_na(sa, da, size); - } - } - else { - stride = mlib_ImageGetStride(src); /* in byte */ - dstride = mlib_ImageGetStride(dst); /* in byte */ - - /* row loop */ - for (j = 0; j < height; j++) { - mlib_ImageCopy_na(sa, da, width); - sa += stride; - da += dstride; - } - } - return MLIB_SUCCESS; -} - -/***************************************************************/ diff -r 3e22d8fd4912 -r acea5f7d354b jdk/src/java.desktop/unix/native/libawt/awt/medialib/mlib_v_ImageCopy_blk.s --- a/jdk/src/java.desktop/unix/native/libawt/awt/medialib/mlib_v_ImageCopy_blk.s Thu May 12 11:03:07 2016 -0700 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,262 +0,0 @@ -! -! -! -! Copyright 2000 Sun Microsystems, Inc. All Rights Reserved. -! DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -! -! This code is free software; you can redistribute it and/or modify it -! under the terms of the GNU General Public License version 2 only, as -! published by the Free Software Foundation. Oracle designates this -! particular file as subject to the "Classpath" exception as provided -! by Oracle in the LICENSE file that accompanied this code. -! -! This code is distributed in the hope that it will be useful, but WITHOUT -! ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -! FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -! version 2 for more details (a copy is included in the LICENSE file that -! accompanied this code). -! -! You should have received a copy of the GNU General Public License version -! 2 along with this work; if not, write to the Free Software Foundation, -! Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -! -! Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -! or visit www.oracle.com if you need additional information or have any -! questions. -! - - -! FUNCTION -! mlib_v_ImageCopy_blk - Copy an image into another -! (with Block Load/Store) -! -! SYNOPSIS -! void mlib_v_ImageCopy_blk(void *src, -! void *dst, -! int size); -! -! ARGUMENT -! src source image data -! dst destination image data -! size image size in bytes -! -! NOTES -! src and dst must point to 64-byte aligned addresses -! size must be multiple of 64 -! -! DESCRIPTION -! dst = src -! - -#include "vis_asi.h" - -! Minimum size of stack frame according to SPARC ABI -#define MINFRAME 96 - -! ENTRY provides the standard procedure entry code -#define ENTRY(x) \ - .align 4; \ - .global x; \ -x: - -! SET_SIZE trails a function and sets the size for the ELF symbol table -#define SET_SIZE(x) \ - .size x, (.-x) - -! SPARC have four integer register groups. i-registers %i0 to %i7 -! hold input data. o-registers %o0 to %o7 hold output data. l-registers -! %l0 to %l7 hold local data. g-registers %g0 to %g7 hold global data. -! Note that %g0 is alway zero, write to it has no program-visible effect. - -! When calling an assembly function, the first 6 arguments are stored -! in i-registers from %i0 to %i5. The rest arguments are stored in stack. -! Note that %i6 is reserved for stack pointer and %i7 for return address. - -! Only the first 32 f-registers can be used as 32-bit registers. -! The last 32 f-registers can only be used as 16 64-bit registers. - -#define src %i0 -#define dst %i1 -#define sz %i2 - -!frame pointer %i6 -!return addr %i7 - -!stack pointer %o6 -!call link %o7 - -#define sa %l0 -#define da %l1 -#define se %l2 -#define ns %l3 - -#define O0 %f16 -#define O1 %f18 -#define O2 %f20 -#define O3 %f22 -#define O4 %f24 -#define O5 %f26 -#define O6 %f28 -#define O7 %f30 - -#define A0 %f32 -#define A1 %f34 -#define A2 %f36 -#define A3 %f38 -#define A4 %f40 -#define A5 %f42 -#define A6 %f44 -#define A7 %f46 - -#define B0 %f48 -#define B1 %f50 -#define B2 %f52 -#define B3 %f54 -#define B4 %f56 -#define B5 %f58 -#define B6 %f60 -#define B7 %f62 - -#define USE_BLD -#define USE_BST - -#define MEMBAR_BEFORE_BLD membar #StoreLoad -#define MEMBAR_AFTER_BLD membar #StoreLoad - -#ifdef USE_BLD -#define BLD_A0 \ - ldda [sa]ASI_BLK_P,A0; \ - cmp sa,se; \ - blu,pt %icc,1f; \ - inc 64,sa; \ - dec 64,sa; \ -1: -#else -#define BLD_A0 \ - ldd [sa + 0],A0; \ - ldd [sa + 8],A1; \ - ldd [sa + 16],A2; \ - ldd [sa + 24],A3; \ - ldd [sa + 32],A4; \ - ldd [sa + 40],A5; \ - ldd [sa + 48],A6; \ - ldd [sa + 56],A7; \ - cmp sa,se; \ - blu,pt %icc,1f; \ - inc 64,sa; \ - dec 64,sa; \ -1: -#endif - -#ifdef USE_BLD -#define BLD_B0 \ - ldda [sa]ASI_BLK_P,B0; \ - cmp sa,se; \ - blu,pt %icc,1f; \ - inc 64,sa; \ - dec 64,sa; \ -1: -#else -#define BLD_B0 \ - ldd [sa + 0],B0; \ - ldd [sa + 8],B1; \ - ldd [sa + 16],B2; \ - ldd [sa + 24],B3; \ - ldd [sa + 32],B4; \ - ldd [sa + 40],B5; \ - ldd [sa + 48],B6; \ - ldd [sa + 56],B7; \ - cmp sa,se; \ - blu,pt %icc,1f; \ - inc 64,sa; \ - dec 64,sa; \ -1: -#endif - -#ifdef USE_BST -#define BST \ - stda O0,[da]ASI_BLK_P; \ - inc 64,da; \ - deccc ns; \ - ble,pn %icc,mlib_v_ImageCopy_end; \ - nop -#else -#define BST \ - std O0,[da + 0]; \ - std O1,[da + 8]; \ - std O2,[da + 16]; \ - std O3,[da + 24]; \ - std O4,[da + 32]; \ - std O5,[da + 40]; \ - std O6,[da + 48]; \ - std O7,[da + 56]; \ - inc 64,da; \ - deccc ns; \ - ble,pn %icc,mlib_v_ImageCopy_end; \ - nop -#endif - -#define COPY_A0 \ - fmovd A0, O0; \ - fmovd A1, O1; \ - fmovd A2, O2; \ - fmovd A3, O3; \ - fmovd A4, O4; \ - fmovd A5, O5; \ - fmovd A6, O6; \ - fmovd A7, O7; - -#define COPY_B0 \ - fmovd B0, O0; \ - fmovd B1, O1; \ - fmovd B2, O2; \ - fmovd B3, O3; \ - fmovd B4, O4; \ - fmovd B5, O5; \ - fmovd B6, O6; \ - fmovd B7, O7; - - .section ".text",#alloc,#execinstr - - ENTRY(mlib_v_ImageCopy_blk) ! function name - - save %sp,-MINFRAME,%sp ! reserve space for stack - ! and adjust register window -! do some error checking - tst sz ! size > 0 - ble,pn %icc,mlib_v_ImageCopy_ret - -! calculate loop count - sra sz,6,ns ! 64 bytes per loop - - add src,sz,se ! end address of source - mov src,sa - mov dst,da - ! issue memory barrier instruction - MEMBAR_BEFORE_BLD ! to ensure all previous memory load - ! and store has completed - - BLD_A0 - BLD_B0 ! issue the 2nd block load instruction - ! to synchronize with returning data -mlib_v_ImageCopy_bgn: - - COPY_A0 ! process data returned by BLD_A0 - BLD_A0 ! block load and sync data from BLD_B0 - BST ! block store data from BLD_A0 - - COPY_B0 ! process data returned by BLD_B0 - BLD_B0 ! block load and sync data from BLD_A0 - BST ! block store data from BLD_B0 - - bg,pt %icc,mlib_v_ImageCopy_bgn - -mlib_v_ImageCopy_end: - ! issue memory barrier instruction - MEMBAR_AFTER_BLD ! to ensure all previous memory load - ! and store has completed. -mlib_v_ImageCopy_ret: - ret ! return - restore ! restore register window - - SET_SIZE(mlib_v_ImageCopy_blk) diff -r 3e22d8fd4912 -r acea5f7d354b jdk/src/java.desktop/unix/native/libawt/awt/medialib/vis_asi.h --- a/jdk/src/java.desktop/unix/native/libawt/awt/medialib/vis_asi.h Thu May 12 11:03:07 2016 -0700 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,210 +0,0 @@ -/* - * Copyright (c) 1998, 2003, Oracle and/or its affiliates. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 only, as - * published by the Free Software Foundation. Oracle designates this - * particular file as subject to the "Classpath" exception as provided - * by Oracle in the LICENSE file that accompanied this code. - * - * This code is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * version 2 for more details (a copy is included in the LICENSE file that - * accompanied this code). - * - * You should have received a copy of the GNU General Public License version - * 2 along with this work; if not, write to the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - * - * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA - * or visit www.oracle.com if you need additional information or have any - * questions. - */ - - -#ifndef VIS_ASI_H -#define VIS_ASI_H - -/* evolved from asm_asi.h in VSDK 1.0 */ - -#ifdef __cplusplus -extern "C" { -#endif - -/* ASI definitions for VIS */ - -#define ASI_N 0x04 -#define ASI_NL 0x0C -#define ASI_AIUP 0x10 -#define ASI_AIUS 0x11 -#define ASI_AIUPL 0x18 -#define ASI_AIUSL 0x19 -#define ASI_PHYS_USE_EC_L 0x1C -#define ASI_PHYS_BYPASS_EC_WITH_EBIT_L 0x1D -#define ASI_DC_DATA 0x46 -#define ASI_DC_TAG 0x47 -#define ASI_UPA_CONTROL 0x4A -#define ASI_MONDO_SEND_CTRL 0x48 -#define ASI_MONDO_RECEIVE_CTRL 0x49 -#define ASI_AFSR 0x4C -#define ASI_AFAR 0x4D -#define ASI_EC_TAG_DATA 0x4E -#define ASI_ICACHE_DATA 0x66 -#define ASI_IC_INSTR 0x66 -#define ASI_IC_TAG 0x67 -#define ASI_IC_PRE_DECODE 0x6E -#define ASI_IC_NEXT_FIELD 0x6F -#define ASI_BLK_AIUP 0x70 -#define ASI_BLK_AIUS 0x71 -#define ASI_EC 0x76 -#define ASI_BLK_AIUPL 0x78 -#define ASI_BLK_AIUSL 0x79 -#define ASI_P 0x80 -#define ASI_S 0x81 -#define ASI_PNF 0x82 -#define ASI_SNF 0x83 -#define ASI_PL 0x88 -#define ASI_SL 0x89 -#define ASI_PNFL 0x8A -#define ASI_SNFL 0x8B -#define ASI_PST8_P 0xC0 -#define ASI_PST8_S 0xC1 -#define ASI_PST16_P 0xC2 -#define ASI_PST16_S 0xC3 -#define ASI_PST32_P 0xC4 -#define ASI_PST32_S 0xC5 -#define ASI_PST8_PL 0xC8 -#define ASI_PST8_SL 0xC9 -#define ASI_PST16_PL 0xCA -#define ASI_PST16_SL 0xCB -#define ASI_PST32_PL 0xCC -#define ASI_PST32_SL 0xCD -#define ASI_FL8_P 0xD0 -#define ASI_FL8_S 0xD1 -#define ASI_FL16_P 0xD2 -#define ASI_FL16_S 0xD3 -#define ASI_FL8_PL 0xD8 -#define ASI_FL8_SL 0xD9 -#define ASI_FL16_PL 0xDA -#define ASI_FL16_SL 0xDB -#define ASI_COMMIT_P 0xE0 -#define ASI_COMMIT_S 0xE1 -#define ASI_BLK_P 0xF0 -#define ASI_BLK_S 0xF1 -#define ASI_BLK_PL 0xF8 -#define ASI_BLK_SL 0xF9 - -#define ASI_NUCLEUS 0x04 -#define ASI_NUCLEUS_LITTLE 0x0C -#define ASI_AS_IF_USER_PRIMARY 0x10 -#define ASI_AS_IF_USER_SECONDARY 0x11 -#define ASI_PHYS_USE_EC 0x14 -#define ASI_PHYS_BYPASS_EC_WITH_EBIT 0x15 -#define ASI_AS_IF_USER_PRIMARY_LITTLE 0x18 -#define ASI_AS_IF_USER_SECONDARY_LITTLE 0x19 -#define ASI_PHYS_USE_EC_LITTLE 0x1C -#define ASI_PHYS_BYPASS_EC_WITH_EBIT_LITTLE 0x1D -#define ASI_LSU_CONTROL_REG 0x45 -#define ASI_DCACHE_DATA 0x46 -#define ASI_DCACHE_TAG 0x47 -#define ASI_INTR_DISPATCH_STATUS 0x48 -#define ASI_INTR_RECEIVE 0x49 -#define ASI_UPA_CONFIG_REG 0x4A -#define ASI_ESTATE_ERROR_EN_REG 0x4B -#define ASI_ASYNC_FAULT_STATUS 0x4C -#define ASI_ASYNC_FAULT_ADDR 0x4D -#define ASI_ECACHE_TAG_DATA 0x4E -#define ASI_OBSERVABILITY_REG 0x4F -#define ASI_IMMU 0x50 -#define ASI_IMU_TSB_BASE 0x50 -#define ASI_IMU_TAG_ACCESS 0x50 -#define ASI_IMU_SFSR 0x50 -#define ASI_IMU_TAG_TARGET 0x50 -#define ASI_IMU_TSB_POINTER_8K 0x51 -#define ASI_IMU_TSB_POINTER_64K 0x52 -#define ASI_IMU_DATAIN 0x54 -#define ASI_IMMU_DATA_IN 0x54 -#define ASI_IMU_DATA_ACCESS 0x55 -#define ASI_IMU_TAG_READ 0x56 -#define ASI_IMU_DEMAP 0x57 -#define ASI_DMMU 0x58 -#define ASI_PRIMARY_CONTEXT 0x58 -#define ASI_SECONDARY_CONTEXT 0x58 -#define ASI_DMU_TSB_BASE 0x58 -#define ASI_DMU_TAG_ACCESS 0x58 -#define ASI_DMU_TAG_TARGET 0x58 -#define ASI_DMU_SFSR 0x58 -#define ASI_DMU_SFAR 0x58 -#define ASI_DMU_VA_WATCHPOINT 0x58 -#define ASI_DMU_PA_WATCHPOINT 0x58 -#define ASI_DMU_TSB_POINTER_8K 0x59 -#define ASI_DMU_TSB_POINTER_64K 0x5A -#define ASI_DMU_TSB_POINTER_DIRECT 0x5B -#define ASI_DMU_DATAIN 0x5C -#define ASI_DMMU_DATA_IN 0x5C -#define ASI_DMU_DATA_ACCESS 0x5D -#define ASI_DMU_TAG_READ 0x5E -#define ASI_DMU_DEMAP 0x5F -#define ASI_ICACHE_INSTR 0x66 -#define ASI_ICACHE_TAG 0x67 -#define ASI_ICACHE_PRE_DECODE 0x6E -#define ASI_ICACHE_NEXT_FIELD 0x6F -#define ASI_BLOCK_AS_IF_USER_PRIMARY 0x70 -#define ASI_BLOCK_AS_IF_USER_SECONDARY 0x71 -#define ASI_EXT 0x76 -#define ASI_ECACHE 0x76 -#define ASI_ECACHE_DATA 0x76 -#define ASI_ECACHE_TAG 0x76 -#define ASI_SDB_INTR 0x77 -#define ASI_SDBH_ERR_REG 0x77 -#define ASI_SDBL_ERR_REG 0x77 -#define ASI_SDBH_CONTROL_REG 0x77 -#define ASI_SDBL_CONTROL_REG 0x77 -#define ASI_INTR_DISPATCH 0x77 -#define ASI_INTR_DATA0 0x77 -#define ASI_INTR_DATA1 0x77 -#define ASI_INTR_DATA2 0x77 -#define ASI_BLOCK_AS_IF_USER_PRIMARY_LITTLE 0x78 -#define ASI_BLOCK_AS_IF_USER_SECONDARY_LITTLE 0x79 -#define ASI_PRIMARY 0x80 -#define ASI_SECONDARY 0x81 -#define ASI_PRIMARY_NO_FAULT 0x82 -#define ASI_SECONDARY_NO_FAULT 0x83 -#define ASI_PRIMARY_LITTLE 0x88 -#define ASI_SECONDARY_LITTLE 0x89 -#define ASI_PRIMARY_NO_FAULT_LITTLE 0x8A -#define ASI_SECONDARY_NO_FAULT_LITTLE 0x8B -#define ASI_PST8_PRIMARY 0xC0 -#define ASI_PST8_SECONDARY 0xC1 -#define ASI_PST16_PRIMARY 0xC2 -#define ASI_PST16_SECONDARY 0xC3 -#define ASI_PST32_PRIMARY 0xC4 -#define ASI_PST32_SECONDARY 0xC5 -#define ASI_PST8_PRIMARY_LITTLE 0xC8 -#define ASI_PST8_SECONDARY_LITTLE 0xC9 -#define ASI_PST16_PRIMARY_LITTLE 0xCA -#define ASI_PST16_SECONDARY_LITTLE 0xCB -#define ASI_PST32_PRIMARY_LITTLE 0xCC -#define ASI_PST32_SECONDARY_LITTLE 0xCD -#define ASI_FL8_PRIMARY 0xD0 -#define ASI_FL8_SECONDARY 0xD1 -#define ASI_FL16_PRIMARY 0xD2 -#define ASI_FL16_SECONDARY 0xD3 -#define ASI_FL8_PRIMARY_LITTLE 0xD8 -#define ASI_FL8_SECONDARY_LITTLE 0xD9 -#define ASI_FL16_PRIMARY_LITTLE 0xDA -#define ASI_FL16_SECONDARY_LITTLE 0xDB -#define ASI_COMMIT_PRIMARY 0xE0 -#define ASI_COMMIT_SECONDARY 0xE1 -#define ASI_BLOCK_PRIMARY 0xF0 -#define ASI_BLOCK_SECONDARY 0xF1 -#define ASI_BLOCK_PRIMARY_LITTLE 0xF8 -#define ASI_BLOCK_SECONDARY_LITTLE 0xF9 - -#ifdef __cplusplus -} -#endif - -#endif /* VIS_ASI_H */ diff -r 3e22d8fd4912 -r acea5f7d354b jdk/src/java.desktop/unix/native/libmlib_image/mlib_v_ImageAffineIndex_BC.c --- a/jdk/src/java.desktop/unix/native/libmlib_image/mlib_v_ImageAffineIndex_BC.c Thu May 12 11:03:07 2016 -0700 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,1443 +0,0 @@ -/* - * Copyright (c) 1998, 2003, Oracle and/or its affiliates. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 only, as - * published by the Free Software Foundation. Oracle designates this - * particular file as subject to the "Classpath" exception as provided - * by Oracle in the LICENSE file that accompanied this code. - * - * This code is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * version 2 for more details (a copy is included in the LICENSE file that - * accompanied this code). - * - * You should have received a copy of the GNU General Public License version - * 2 along with this work; if not, write to the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - * - * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA - * or visit www.oracle.com if you need additional information or have any - * questions. - */ - - - -#include "vis_proto.h" -#include "mlib_image.h" -#include "mlib_ImageColormap.h" -#include "mlib_ImageAffine.h" -#include "mlib_v_ImageFilters.h" - -/***************************************************************/ -#define MLIB_LIMIT 512 -#define MLIB_SHIFT 16 - -/***************************************************************/ -#undef DECLAREVAR -#define DECLAREVAR() \ - DECLAREVAR0(); \ - mlib_s32 *warp_tbl = param -> warp_tbl; \ - mlib_s32 xSrc, ySrc; \ - mlib_s32 srcYStride = param -> srcYStride; \ - mlib_s32 filter = param -> filter; \ - mlib_s32 max_xsize = param -> max_xsize; \ - MLIB_TYPE *srcIndexPtr; \ - MLIB_TYPE *dstIndexPtr; \ - mlib_d64 *dstPixelPtr; \ - mlib_s32 i - -/***************************************************************/ -#define DECLAREVAR_U8() \ - mlib_s32 filterposx, filterposy; \ - mlib_d64 sum0, sum1, sum2, sum3; \ - mlib_f32 hi_row00, hi_row10, hi_row20, hi_row30; \ - mlib_f32 hi_row01, hi_row11, hi_row21, hi_row31; \ - mlib_f32 lo_row00, lo_row10, lo_row20, lo_row30; \ - mlib_f32 lo_row01, lo_row11, lo_row21, lo_row31; \ - mlib_d64 xFilter0, xFilter1, xFilter2, xFilter3, yFilter; \ - mlib_d64 v00, v10, v20, v30; \ - mlib_d64 v01, v11, v21, v31; \ - mlib_d64 v02, v12, v22, v32; \ - mlib_d64 v03, v13, v23, v33; \ - mlib_d64 d0, d1, d2, d3; \ - mlib_d64 d00, d10, d20, d30; \ - mlib_d64 d01, d11, d21, d31; \ - mlib_s32 cols; \ - mlib_d64 res, *xPtr - -/***************************************************************/ -#define DECLAREVAR_S16() \ - mlib_s32 filterposx, filterposy; \ - mlib_d64 sum0, sum1, sum2, sum3; \ - mlib_d64 row00, row10, row20, row30; \ - mlib_d64 row01, row11, row21, row31; \ - mlib_d64 row02, row12, row22, row32; \ - mlib_d64 row03, row13, row23, row33; \ - mlib_d64 xFilter0, xFilter1, xFilter2, xFilter3; \ - mlib_d64 yFilter0, yFilter1, yFilter2, yFilter3; \ - mlib_d64 v00, v01, v02, v03, v10, v11, v12, v13; \ - mlib_d64 v20, v21, v22, v23, v30, v31, v32, v33; \ - mlib_d64 u00, u01, u10, u11, u20, u21, u30, u31; \ - mlib_d64 d0, d1, d2, d3; \ - mlib_d64 *yPtr, *xPtr; \ - mlib_s32 cols; \ - mlib_d64 res; \ - mlib_f32 f_x01000100 = vis_to_float(0x01000100) - -/***************************************************************/ -#undef CLIP -#define CLIP() \ - dstData += dstYStride; \ - xLeft = leftEdges[j]; \ - xRight = rightEdges[j]; \ - X = xStarts[j]; \ - Y = yStarts[j]; \ - PREPARE_DELTAS \ - if (xLeft > xRight) \ - continue; \ - dstIndexPtr = (MLIB_TYPE *)dstData + xLeft; \ - dstPixelPtr = dstRowPtr - -/***************************************************************/ -#define FADD_4BC_U8() \ - d0 = vis_fpadd16(d00, d10); \ - d1 = vis_fpadd16(d20, d30); \ - d0 = vis_fpadd16(d0, d1); \ - d2 = vis_fpadd16(d01, d11); \ - d3 = vis_fpadd16(d21, d31); \ - d2 = vis_fpadd16(d2, d3); \ - res = vis_fpack16_pair(d0, d2) - -/***************************************************************/ -#define LOAD_BC_U8_4CH_1PIXEL(mlib_filters_u8, mlib_filters_u8_4) \ - filterposy = (Y >> FILTER_SHIFT) & FILTER_MASK; \ - yFilter = *((mlib_d64 *) ((mlib_u8 *)mlib_filters_u8 + filterposy)); \ - filterposx = (X >> FILTER_SHIFT) & FILTER_MASK; \ - xPtr = ((mlib_d64 *)((mlib_u8 *)mlib_filters_u8_4+4*filterposx)); \ - xFilter0 = xPtr[0]; \ - xFilter1 = xPtr[1]; \ - xFilter2 = xPtr[2]; \ - xFilter3 = xPtr[3]; \ - X += dX; \ - Y += dY; \ - hi_row00 = flut[srcIndexPtr[0]]; \ - lo_row00 = flut[srcIndexPtr[1]]; \ - hi_row01 = flut[srcIndexPtr[2]]; \ - lo_row01 = flut[srcIndexPtr[3]]; \ - srcIndexPtr += srcYStride; \ - hi_row10 = flut[srcIndexPtr[0]]; \ - lo_row10 = flut[srcIndexPtr[1]]; \ - hi_row11 = flut[srcIndexPtr[2]]; \ - lo_row11 = flut[srcIndexPtr[3]]; \ - srcIndexPtr += srcYStride; \ - hi_row20 = flut[srcIndexPtr[0]]; \ - lo_row20 = flut[srcIndexPtr[1]]; \ - hi_row21 = flut[srcIndexPtr[2]]; \ - lo_row21 = flut[srcIndexPtr[3]]; \ - srcIndexPtr += srcYStride; \ - hi_row30 = flut[srcIndexPtr[0]]; \ - lo_row30 = flut[srcIndexPtr[1]]; \ - hi_row31 = flut[srcIndexPtr[2]]; \ - lo_row31 = flut[srcIndexPtr[3]] - -/***************************************************************/ -#define NEXT_PIXEL_4BC() \ - xSrc = (X >> MLIB_SHIFT)-1; \ - ySrc = (Y >> MLIB_SHIFT)-1; \ - srcIndexPtr = (MLIB_TYPE *)lineAddr[ySrc] + xSrc - -/***************************************************************/ -#define RESULT_4BC_U8_1PIXEL(ind) \ - v00 = vis_fmul8x16au(hi_row00, vis_read_hi(yFilter)); \ - v01 = vis_fmul8x16au(lo_row00, vis_read_hi(yFilter)); \ - v02 = vis_fmul8x16au(hi_row01, vis_read_hi(yFilter)); \ - v03 = vis_fmul8x16au(lo_row01, vis_read_hi(yFilter)); \ - v10 = vis_fmul8x16al(hi_row10, vis_read_hi(yFilter)); \ - v11 = vis_fmul8x16al(lo_row10, vis_read_hi(yFilter)); \ - sum0 = vis_fpadd16(v00, v10); \ - v12 = vis_fmul8x16al(hi_row11, vis_read_hi(yFilter)); \ - sum1 = vis_fpadd16(v01, v11); \ - v13 = vis_fmul8x16al(lo_row11, vis_read_hi(yFilter)); \ - sum2 = vis_fpadd16(v02, v12); \ - v20 = vis_fmul8x16au(hi_row20, vis_read_lo(yFilter)); \ - sum3 = vis_fpadd16(v03, v13); \ - v21 = vis_fmul8x16au(lo_row20, vis_read_lo(yFilter)); \ - sum0 = vis_fpadd16(sum0, v20); \ - v22 = vis_fmul8x16au(hi_row21, vis_read_lo(yFilter)); \ - sum1 = vis_fpadd16(sum1, v21); \ - v23 = vis_fmul8x16au(lo_row21, vis_read_lo(yFilter)); \ - sum2 = vis_fpadd16(sum2, v22); \ - v30 = vis_fmul8x16al(hi_row30, vis_read_lo(yFilter)); \ - sum3 = vis_fpadd16(sum3, v23); \ - v31 = vis_fmul8x16al(lo_row30, vis_read_lo(yFilter)); \ - sum0 = vis_fpadd16(sum0, v30); \ - v32 = vis_fmul8x16al(hi_row31, vis_read_lo(yFilter)); \ - sum1 = vis_fpadd16(sum1, v31); \ - v33 = vis_fmul8x16al(lo_row31, vis_read_lo(yFilter)); \ - sum2 = vis_fpadd16(sum2, v32); \ - v00 = vis_fmul8sux16(sum0, xFilter0); \ - sum3 = vis_fpadd16(sum3, v33); \ - v01 = vis_fmul8ulx16(sum0, xFilter0); \ - v10 = vis_fmul8sux16(sum1, xFilter1); \ - d0##ind = vis_fpadd16(v00, v01); \ - v11 = vis_fmul8ulx16(sum1, xFilter1); \ - v20 = vis_fmul8sux16(sum2, xFilter2); \ - d1##ind = vis_fpadd16(v10, v11); \ - v21 = vis_fmul8ulx16(sum2, xFilter2); \ - v30 = vis_fmul8sux16(sum3, xFilter3); \ - d2##ind = vis_fpadd16(v20, v21); \ - v31 = vis_fmul8ulx16(sum3, xFilter3); \ - d3##ind = vis_fpadd16(v30, v31) - -/***************************************************************/ -#define BC_U8_4CH(ind, mlib_filters_u8, mlib_filters_u8_4) \ - v00 = vis_fmul8x16au(hi_row00, vis_read_hi(yFilter)); \ - v01 = vis_fmul8x16au(lo_row00, vis_read_hi(yFilter)); \ - v02 = vis_fmul8x16au(hi_row01, vis_read_hi(yFilter)); \ - v03 = vis_fmul8x16au(lo_row01, vis_read_hi(yFilter)); \ - hi_row00 = flut[srcIndexPtr[0]]; \ - filterposy = (Y >> FILTER_SHIFT); \ - v10 = vis_fmul8x16al(hi_row10, vis_read_hi(yFilter)); \ - lo_row00 = flut[srcIndexPtr[1]]; \ - v11 = vis_fmul8x16al(lo_row10, vis_read_hi(yFilter)); \ - sum0 = vis_fpadd16(v00, v10); \ - hi_row01 = flut[srcIndexPtr[2]]; \ - v12 = vis_fmul8x16al(hi_row11, vis_read_hi(yFilter)); \ - lo_row01 = flut[srcIndexPtr[3]]; \ - filterposx = (X >> FILTER_SHIFT); \ - v13 = vis_fmul8x16al(lo_row11, vis_read_hi(yFilter)); \ - srcIndexPtr += srcYStride; \ - hi_row10 = flut[srcIndexPtr[0]]; \ - v20 = vis_fmul8x16au(hi_row20, vis_read_lo(yFilter)); \ - sum1 = vis_fpadd16(v01, v11); \ - lo_row10 = flut[srcIndexPtr[1]]; \ - X += dX; \ - hi_row11 = flut[srcIndexPtr[2]]; \ - v21 = vis_fmul8x16au(lo_row20, vis_read_lo(yFilter)); \ - sum2 = vis_fpadd16(v02, v12); \ - lo_row11 = flut[srcIndexPtr[3]]; \ - v22 = vis_fmul8x16au(hi_row21, vis_read_lo(yFilter)); \ - srcIndexPtr += srcYStride; \ - hi_row20 = flut[srcIndexPtr[0]]; \ - v23 = vis_fmul8x16au(lo_row21, vis_read_lo(yFilter)); \ - sum3 = vis_fpadd16(v03, v13); \ - Y += dY; \ - xSrc = (X >> MLIB_SHIFT)-1; \ - v30 = vis_fmul8x16al(hi_row30, vis_read_lo(yFilter)); \ - sum0 = vis_fpadd16(sum0, v20); \ - lo_row20 = flut[srcIndexPtr[1]]; \ - ySrc = (Y >> MLIB_SHIFT)-1; \ - hi_row21 = flut[srcIndexPtr[2]]; \ - v31 = vis_fmul8x16al(lo_row30, vis_read_lo(yFilter)); \ - sum1 = vis_fpadd16(sum1, v21); \ - filterposy &= FILTER_MASK; \ - lo_row21 = flut[srcIndexPtr[3]]; \ - v32 = vis_fmul8x16al(hi_row31, vis_read_lo(yFilter)); \ - srcIndexPtr += srcYStride; \ - filterposx &= FILTER_MASK; \ - v33 = vis_fmul8x16al(lo_row31, vis_read_lo(yFilter)); \ - sum2 = vis_fpadd16(sum2, v22); \ - hi_row30 = flut[srcIndexPtr[0]]; \ - sum3 = vis_fpadd16(sum3, v23); \ - sum0 = vis_fpadd16(sum0, v30); \ - lo_row30 = flut[srcIndexPtr[1]]; \ - sum1 = vis_fpadd16(sum1, v31); \ - v00 = vis_fmul8sux16(sum0, xFilter0); \ - hi_row31 = flut[srcIndexPtr[2]]; \ - sum2 = vis_fpadd16(sum2, v32); \ - v01 = vis_fmul8ulx16(sum0, xFilter0); \ - sum3 = vis_fpadd16(sum3, v33); \ - lo_row31 = flut[srcIndexPtr[3]]; \ - v10 = vis_fmul8sux16(sum1, xFilter1); \ - d0##ind = vis_fpadd16(v00, v01); \ - yFilter = *((mlib_d64 *)((mlib_u8 *)mlib_filters_u8 + filterposy)); \ - v11 = vis_fmul8ulx16(sum1, xFilter1); \ - xPtr = ((mlib_d64 *)((mlib_u8 *)mlib_filters_u8_4+4*filterposx)); \ - xFilter0 = xPtr[0]; \ - v20 = vis_fmul8sux16(sum2, xFilter2); \ - d1##ind = vis_fpadd16(v10, v11); \ - xFilter1 = xPtr[1]; \ - v21 = vis_fmul8ulx16(sum2, xFilter2); \ - xFilter2 = xPtr[2]; \ - v30 = vis_fmul8sux16(sum3, xFilter3); \ - d2##ind = vis_fpadd16(v20, v21); \ - xFilter3 = xPtr[3]; \ - v31 = vis_fmul8ulx16(sum3, xFilter3); \ - srcIndexPtr = (MLIB_TYPE *)lineAddr[ySrc] + xSrc; \ - d3##ind = vis_fpadd16(v30, v31) - -/***************************************************************/ -#define LOAD_BC_S16_4CH_1PIXEL(mlib_filters_s16_4) \ - row00 = flut[srcIndexPtr[0]]; \ - row01 = flut[srcIndexPtr[1]]; \ - row02 = flut[srcIndexPtr[2]]; \ - row03 = flut[srcIndexPtr[3]]; \ - srcIndexPtr += srcYStride; \ - row10 = flut[srcIndexPtr[0]]; \ - row11 = flut[srcIndexPtr[1]]; \ - row12 = flut[srcIndexPtr[2]]; \ - row13 = flut[srcIndexPtr[3]]; \ - srcIndexPtr += srcYStride; \ - row20 = flut[srcIndexPtr[0]]; \ - row21 = flut[srcIndexPtr[1]]; \ - row22 = flut[srcIndexPtr[2]]; \ - row23 = flut[srcIndexPtr[3]]; \ - srcIndexPtr += srcYStride; \ - row30 = flut[srcIndexPtr[0]]; \ - row31 = flut[srcIndexPtr[1]]; \ - row32 = flut[srcIndexPtr[2]]; \ - row33 = flut[srcIndexPtr[3]]; \ - filterposy = (Y >> FILTER_SHIFT) & FILTER_MASK; \ - yPtr = ((mlib_d64 *) ((mlib_u8 *)mlib_filters_s16_4 + filterposy*4)); \ - yFilter0 = yPtr[0]; \ - yFilter1 = yPtr[1]; \ - yFilter2 = yPtr[2]; \ - yFilter3 = yPtr[3]; \ - filterposx = (X >> FILTER_SHIFT) & FILTER_MASK; \ - xPtr = ((mlib_d64 *)((mlib_u8 *)mlib_filters_s16_4 + filterposx*4)); \ - xFilter0 = xPtr[0]; \ - xFilter1 = xPtr[1]; \ - xFilter2 = xPtr[2]; \ - xFilter3 = xPtr[3]; \ - X += dX; \ - Y += dY - -/***************************************************************/ -#define RESULT_4BC_S16_1PIXEL() \ - u00 = vis_fmul8sux16(row00, yFilter0); \ - u01 = vis_fmul8ulx16(row00, yFilter0); \ - u10 = vis_fmul8sux16(row01, yFilter0); \ - u11 = vis_fmul8ulx16(row01, yFilter0); \ - v00 = vis_fpadd16(u00, u01); \ - u20 = vis_fmul8sux16(row02, yFilter0); \ - v01 = vis_fpadd16(u10, u11); \ - u21 = vis_fmul8ulx16(row02, yFilter0); \ - u30 = vis_fmul8sux16(row03, yFilter0); \ - u31 = vis_fmul8ulx16(row03, yFilter0); \ - v02 = vis_fpadd16(u20, u21); \ - u00 = vis_fmul8sux16(row10, yFilter1); \ - u01 = vis_fmul8ulx16(row10, yFilter1); \ - v03 = vis_fpadd16(u30, u31); \ - u10 = vis_fmul8sux16(row11, yFilter1); \ - u11 = vis_fmul8ulx16(row11, yFilter1); \ - v10 = vis_fpadd16(u00, u01); \ - u20 = vis_fmul8sux16(row12, yFilter1); \ - v11 = vis_fpadd16(u10, u11); \ - u21 = vis_fmul8ulx16(row12, yFilter1); \ - u30 = vis_fmul8sux16(row13, yFilter1); \ - u31 = vis_fmul8ulx16(row13, yFilter1); \ - u00 = vis_fmul8sux16(row20, yFilter2); \ - v12 = vis_fpadd16(u20, u21); \ - u01 = vis_fmul8ulx16(row20, yFilter2); \ - v13 = vis_fpadd16(u30, u31); \ - u10 = vis_fmul8sux16(row21, yFilter2); \ - u11 = vis_fmul8ulx16(row21, yFilter2); \ - v20 = vis_fpadd16(u00, u01); \ - u20 = vis_fmul8sux16(row22, yFilter2); \ - sum0 = vis_fpadd16(v00, v10); \ - u21 = vis_fmul8ulx16(row22, yFilter2); \ - u30 = vis_fmul8sux16(row23, yFilter2); \ - u31 = vis_fmul8ulx16(row23, yFilter2); \ - u00 = vis_fmul8sux16(row30, yFilter3); \ - u01 = vis_fmul8ulx16(row30, yFilter3); \ - v21 = vis_fpadd16(u10, u11); \ - sum1 = vis_fpadd16(v01, v11); \ - u10 = vis_fmul8sux16(row31, yFilter3); \ - sum2 = vis_fpadd16(v02, v12); \ - sum3 = vis_fpadd16(v03, v13); \ - v22 = vis_fpadd16(u20, u21); \ - u11 = vis_fmul8ulx16(row31, yFilter3); \ - sum0 = vis_fpadd16(sum0, v20); \ - u20 = vis_fmul8sux16(row32, yFilter3); \ - u21 = vis_fmul8ulx16(row32, yFilter3); \ - v23 = vis_fpadd16(u30, u31); \ - v30 = vis_fpadd16(u00, u01); \ - sum1 = vis_fpadd16(sum1, v21); \ - u30 = vis_fmul8sux16(row33, yFilter3); \ - u31 = vis_fmul8ulx16(row33, yFilter3); \ - v31 = vis_fpadd16(u10, u11); \ - sum2 = vis_fpadd16(sum2, v22); \ - sum3 = vis_fpadd16(sum3, v23); \ - v32 = vis_fpadd16(u20, u21); \ - sum0 = vis_fpadd16(sum0, v30); \ - v33 = vis_fpadd16(u30, u31); \ - v00 = vis_fmul8sux16(sum0, xFilter0); \ - sum1 = vis_fpadd16(sum1, v31); \ - sum2 = vis_fpadd16(sum2, v32); \ - v01 = vis_fmul8ulx16(sum0, xFilter0); \ - v10 = vis_fmul8sux16(sum1, xFilter1); \ - sum3 = vis_fpadd16(sum3, v33); \ - v11 = vis_fmul8ulx16(sum1, xFilter1); \ - d0 = vis_fpadd16(v00, v01); \ - v20 = vis_fmul8sux16(sum2, xFilter2); \ - v21 = vis_fmul8ulx16(sum2, xFilter2); \ - d1 = vis_fpadd16(v10, v11); \ - v30 = vis_fmul8sux16(sum3, xFilter3); \ - v31 = vis_fmul8ulx16(sum3, xFilter3); \ - d2 = vis_fpadd16(v20, v21); \ - d3 = vis_fpadd16(v30, v31); \ - d0 = vis_fpadd16(d0, d1); \ - d2 = vis_fpadd16(d2, d3); \ - d0 = vis_fpadd16(d0, d2); \ - d2 = vis_fmuld8sux16(f_x01000100, vis_read_hi(d0)); \ - d3 = vis_fmuld8sux16(f_x01000100, vis_read_lo(d0)); \ - res = vis_fpackfix_pair(d2, d3) - -/***************************************************************/ -#define BC_S16_4CH(mlib_filters_s16_4) \ - u00 = vis_fmul8sux16(row00, yFilter0); \ - u01 = vis_fmul8ulx16(row00, yFilter0); \ - u10 = vis_fmul8sux16(row01, yFilter0); \ - u11 = vis_fmul8ulx16(row01, yFilter0); \ - v00 = vis_fpadd16(u00, u01); \ - u20 = vis_fmul8sux16(row02, yFilter0); \ - v01 = vis_fpadd16(u10, u11); \ - u21 = vis_fmul8ulx16(row02, yFilter0); \ - u30 = vis_fmul8sux16(row03, yFilter0); \ - u31 = vis_fmul8ulx16(row03, yFilter0); \ - v02 = vis_fpadd16(u20, u21); \ - row00 = flut[srcIndexPtr[0]]; \ - u00 = vis_fmul8sux16(row10, yFilter1); \ - u01 = vis_fmul8ulx16(row10, yFilter1); \ - filterposy = (Y >> FILTER_SHIFT); \ - v03 = vis_fpadd16(u30, u31); \ - row01 = flut[srcIndexPtr[1]]; \ - u10 = vis_fmul8sux16(row11, yFilter1); \ - u11 = vis_fmul8ulx16(row11, yFilter1); \ - v10 = vis_fpadd16(u00, u01); \ - row02 = flut[srcIndexPtr[2]]; \ - u20 = vis_fmul8sux16(row12, yFilter1); \ - v11 = vis_fpadd16(u10, u11); \ - u21 = vis_fmul8ulx16(row12, yFilter1); \ - u30 = vis_fmul8sux16(row13, yFilter1); \ - row03 = flut[srcIndexPtr[3]]; \ - u31 = vis_fmul8ulx16(row13, yFilter1); \ - u00 = vis_fmul8sux16(row20, yFilter2); \ - filterposx = (X >> FILTER_SHIFT); \ - srcIndexPtr += srcYStride; \ - v12 = vis_fpadd16(u20, u21); \ - u01 = vis_fmul8ulx16(row20, yFilter2); \ - v13 = vis_fpadd16(u30, u31); \ - row10 = flut[srcIndexPtr[0]]; \ - u10 = vis_fmul8sux16(row21, yFilter2); \ - X += dX; \ - u11 = vis_fmul8ulx16(row21, yFilter2); \ - v20 = vis_fpadd16(u00, u01); \ - row11 = flut[srcIndexPtr[1]]; \ - u20 = vis_fmul8sux16(row22, yFilter2); \ - sum0 = vis_fpadd16(v00, v10); \ - u21 = vis_fmul8ulx16(row22, yFilter2); \ - row12 = flut[srcIndexPtr[2]]; \ - u30 = vis_fmul8sux16(row23, yFilter2); \ - u31 = vis_fmul8ulx16(row23, yFilter2); \ - row13 = flut[srcIndexPtr[3]]; \ - u00 = vis_fmul8sux16(row30, yFilter3); \ - srcIndexPtr += srcYStride; \ - u01 = vis_fmul8ulx16(row30, yFilter3); \ - v21 = vis_fpadd16(u10, u11); \ - Y += dY; \ - xSrc = (X >> MLIB_SHIFT)-1; \ - sum1 = vis_fpadd16(v01, v11); \ - row20 = flut[srcIndexPtr[0]]; \ - u10 = vis_fmul8sux16(row31, yFilter3); \ - sum2 = vis_fpadd16(v02, v12); \ - sum3 = vis_fpadd16(v03, v13); \ - ySrc = (Y >> MLIB_SHIFT)-1; \ - row21 = flut[srcIndexPtr[1]]; \ - v22 = vis_fpadd16(u20, u21); \ - u11 = vis_fmul8ulx16(row31, yFilter3); \ - sum0 = vis_fpadd16(sum0, v20); \ - u20 = vis_fmul8sux16(row32, yFilter3); \ - row22 = flut[srcIndexPtr[2]]; \ - u21 = vis_fmul8ulx16(row32, yFilter3); \ - v23 = vis_fpadd16(u30, u31); \ - v30 = vis_fpadd16(u00, u01); \ - filterposy &= FILTER_MASK; \ - sum1 = vis_fpadd16(sum1, v21); \ - u30 = vis_fmul8sux16(row33, yFilter3); \ - row23 = flut[srcIndexPtr[3]]; \ - u31 = vis_fmul8ulx16(row33, yFilter3); \ - srcIndexPtr += srcYStride; \ - filterposx &= FILTER_MASK; \ - v31 = vis_fpadd16(u10, u11); \ - row30 = flut[srcIndexPtr[0]]; \ - sum2 = vis_fpadd16(sum2, v22); \ - sum3 = vis_fpadd16(sum3, v23); \ - row31 = flut[srcIndexPtr[1]]; \ - v32 = vis_fpadd16(u20, u21); \ - sum0 = vis_fpadd16(sum0, v30); \ - row32 = flut[srcIndexPtr[2]]; \ - v33 = vis_fpadd16(u30, u31); \ - row33 = flut[srcIndexPtr[3]]; \ - v00 = vis_fmul8sux16(sum0, xFilter0); \ - yPtr = ((mlib_d64 *) ((mlib_u8 *)mlib_filters_s16_4 + filterposy*4)); \ - sum1 = vis_fpadd16(sum1, v31); \ - yFilter0 = yPtr[0]; \ - sum2 = vis_fpadd16(sum2, v32); \ - v01 = vis_fmul8ulx16(sum0, xFilter0); \ - yFilter1 = yPtr[1]; \ - v10 = vis_fmul8sux16(sum1, xFilter1); \ - sum3 = vis_fpadd16(sum3, v33); \ - yFilter2 = yPtr[2]; \ - v11 = vis_fmul8ulx16(sum1, xFilter1); \ - d0 = vis_fpadd16(v00, v01); \ - yFilter3 = yPtr[3]; \ - xPtr = ((mlib_d64 *)((mlib_u8 *)mlib_filters_s16_4 + filterposx*4)); \ - v20 = vis_fmul8sux16(sum2, xFilter2); \ - xFilter0 = xPtr[0]; \ - v21 = vis_fmul8ulx16(sum2, xFilter2); \ - d1 = vis_fpadd16(v10, v11); \ - xFilter1 = xPtr[1]; \ - v30 = vis_fmul8sux16(sum3, xFilter3); \ - v31 = vis_fmul8ulx16(sum3, xFilter3); \ - d2 = vis_fpadd16(v20, v21); \ - xFilter2 = xPtr[2]; \ - d3 = vis_fpadd16(v30, v31); \ - xFilter3 = xPtr[3]; \ - srcIndexPtr = (MLIB_TYPE *)lineAddr[ySrc] + xSrc - -/***************************************************************/ -#define FADD_4BC_S16() \ - d0 = vis_fpadd16(d0, d1); \ - d2 = vis_fpadd16(d2, d3); \ - d0 = vis_fpadd16(d0, d2); \ - d2 = vis_fmuld8sux16(f_x01000100, vis_read_hi(d0)); \ - d3 = vis_fmuld8sux16(f_x01000100, vis_read_lo(d0)); \ - res = vis_fpackfix_pair(d2, d3) - -/***************************************************************/ -#undef MLIB_TYPE -#define MLIB_TYPE mlib_u8 - -/***************************************************************/ -#undef FILTER_SHIFT -#define FILTER_SHIFT 5 -#undef FILTER_MASK -#define FILTER_MASK (((1 << 8) - 1) << 3) - -/***************************************************************/ -mlib_status mlib_ImageAffineIndex_U8_U8_3CH_BC(mlib_affine_param *param, - const void *colormap) -{ - DECLAREVAR(); - DECLAREVAR_U8(); - mlib_f32 *flut = (mlib_f32 *)mlib_ImageGetLutNormalTable(colormap) - - mlib_ImageGetLutOffset(colormap); - mlib_d64 dstRowData[MLIB_LIMIT/2]; - mlib_d64 *dstRowPtr = dstRowData; - const mlib_s16 *mlib_filters_table_u8, *mlib_filters_table_u8_4; - - if (filter == MLIB_BICUBIC) { - mlib_filters_table_u8 = mlib_filters_u8_bc; - mlib_filters_table_u8_4 = mlib_filters_u8_bc_4; - } else { - mlib_filters_table_u8 = mlib_filters_u8_bc2; - mlib_filters_table_u8_4 = mlib_filters_u8_bc2_4; - } - - if (max_xsize > MLIB_LIMIT) { - dstRowPtr = mlib_malloc(sizeof(mlib_d64) * ((max_xsize + 1) >> 1)); - - if (dstRowPtr == NULL) return MLIB_FAILURE; - } - - vis_write_gsr(3 << 3); - - for (j = yStart; j <= yFinish; j++) { - - CLIP(); - - cols = xRight - xLeft + 1; - - i = 0; - - if (i <= cols - 6) { - - NEXT_PIXEL_4BC(); - LOAD_BC_U8_4CH_1PIXEL(mlib_filters_table_u8, mlib_filters_table_u8_4); - - NEXT_PIXEL_4BC(); - - BC_U8_4CH(0, mlib_filters_table_u8, mlib_filters_table_u8_4); - BC_U8_4CH(1, mlib_filters_table_u8, mlib_filters_table_u8_4); - FADD_4BC_U8(); - - BC_U8_4CH(0, mlib_filters_table_u8, mlib_filters_table_u8_4); - BC_U8_4CH(1, mlib_filters_table_u8, mlib_filters_table_u8_4); - -#pragma pipeloop(0) - for (; i <= cols-8; i += 2) { - *dstPixelPtr++ = res; - - FADD_4BC_U8(); - BC_U8_4CH(0, mlib_filters_table_u8, mlib_filters_table_u8_4); - BC_U8_4CH(1, mlib_filters_table_u8, mlib_filters_table_u8_4); - } - - *dstPixelPtr++ = res; - - FADD_4BC_U8(); - *dstPixelPtr++ = res; - - RESULT_4BC_U8_1PIXEL(0); - LOAD_BC_U8_4CH_1PIXEL(mlib_filters_table_u8, mlib_filters_table_u8_4); - RESULT_4BC_U8_1PIXEL(1); - FADD_4BC_U8(); - - *dstPixelPtr++ = res; - i += 6; - } - - if (i <= cols-4) { - NEXT_PIXEL_4BC(); - LOAD_BC_U8_4CH_1PIXEL(mlib_filters_table_u8, mlib_filters_table_u8_4); - - NEXT_PIXEL_4BC(); - - BC_U8_4CH(0, mlib_filters_table_u8, mlib_filters_table_u8_4); - BC_U8_4CH(1, mlib_filters_table_u8, mlib_filters_table_u8_4); - FADD_4BC_U8(); - *dstPixelPtr++ = res; - - RESULT_4BC_U8_1PIXEL(0); - LOAD_BC_U8_4CH_1PIXEL(mlib_filters_table_u8, mlib_filters_table_u8_4); - RESULT_4BC_U8_1PIXEL(1); - FADD_4BC_U8(); - - *dstPixelPtr++ = res; - i += 4; - } - - if (i <= cols-2) { - NEXT_PIXEL_4BC(); - LOAD_BC_U8_4CH_1PIXEL(mlib_filters_table_u8, mlib_filters_table_u8_4); - RESULT_4BC_U8_1PIXEL(0); - - NEXT_PIXEL_4BC(); - LOAD_BC_U8_4CH_1PIXEL(mlib_filters_table_u8, mlib_filters_table_u8_4); - RESULT_4BC_U8_1PIXEL(1); - FADD_4BC_U8(); - - *dstPixelPtr++ = res; - i += 2; - } - - if (i < cols) { - NEXT_PIXEL_4BC(); - LOAD_BC_U8_4CH_1PIXEL(mlib_filters_table_u8, mlib_filters_table_u8_4); - RESULT_4BC_U8_1PIXEL(0); - - d0 = vis_fpadd16(d00, d10); - d1 = vis_fpadd16(d20, d30); - d0 = vis_fpadd16(d0, d1); - res = vis_fpack16_pair(d0, d0); - *dstPixelPtr++ = res; - } - - mlib_ImageColorTrue2IndexLine_U8_U8_3_in_4((mlib_u8 *)dstRowPtr, - dstIndexPtr, - xRight - xLeft + 1, - colormap); - } - - if (dstRowPtr != dstRowData) mlib_free(dstRowPtr); - - return MLIB_SUCCESS; -} - -/***************************************************************/ -#undef FILTER_SHIFT -#define FILTER_SHIFT 4 -#undef FILTER_MASK -#define FILTER_MASK (((1 << 9) - 1) << 3) - -/***************************************************************/ -mlib_status mlib_ImageAffineIndex_U8_S16_3CH_BC(mlib_affine_param *param, - const void *colormap) -{ - DECLAREVAR(); - DECLAREVAR_S16(); - mlib_d64 *flut = (mlib_d64 *)mlib_ImageGetLutNormalTable(colormap) - - mlib_ImageGetLutOffset(colormap); - mlib_d64 dstRowData[MLIB_LIMIT]; - mlib_d64 *dstRowPtr = dstRowData; - const mlib_s16 *mlib_filters_table_s16_4; - - if (filter == MLIB_BICUBIC) { - mlib_filters_table_s16_4 = mlib_filters_s16_bc_4; - } else { - mlib_filters_table_s16_4 = mlib_filters_s16_bc2_4; - } - - if (max_xsize > MLIB_LIMIT) { - dstRowPtr = mlib_malloc(sizeof(mlib_d64) * max_xsize); - - if (dstRowPtr == NULL) return MLIB_FAILURE; - } - - for (j = yStart; j <= yFinish; j++) { - - CLIP(); - - vis_write_gsr(10 << 3); - - cols = xRight - xLeft + 1; - i = 0; - - if (i <= cols - 4) { - - NEXT_PIXEL_4BC(); - LOAD_BC_S16_4CH_1PIXEL(mlib_filters_table_s16_4); - - NEXT_PIXEL_4BC(); - - BC_S16_4CH(mlib_filters_table_s16_4); - FADD_4BC_S16(); - - BC_S16_4CH(mlib_filters_table_s16_4); - -#pragma pipeloop(0) - - for (; i < cols-4; i++) { - *dstPixelPtr++ = res; - - FADD_4BC_S16(); - BC_S16_4CH(mlib_filters_table_s16_4); - } - - *dstPixelPtr++ = res; - - FADD_4BC_S16(); - *dstPixelPtr++ = res; - - RESULT_4BC_S16_1PIXEL(); - *dstPixelPtr++ = res; - - LOAD_BC_S16_4CH_1PIXEL(mlib_filters_table_s16_4); - RESULT_4BC_S16_1PIXEL(); - *dstPixelPtr++ = res; - i += 4; - } - -#pragma pipeloop(0) - for (; i < cols; i++) { - NEXT_PIXEL_4BC(); - LOAD_BC_S16_4CH_1PIXEL(mlib_filters_table_s16_4); - RESULT_4BC_S16_1PIXEL(); - *dstPixelPtr++ = res; - } - - mlib_ImageColorTrue2IndexLine_S16_U8_3_in_4((mlib_s16 *)dstRowPtr, - dstIndexPtr, - xRight - xLeft + 1, - colormap); - } - - if (dstRowPtr != dstRowData) mlib_free(dstRowPtr); - - return MLIB_SUCCESS; -} - -/***************************************************************/ -#undef FILTER_SHIFT -#define FILTER_SHIFT 5 -#undef FILTER_MASK -#define FILTER_MASK (((1 << 8) - 1) << 3) - -/***************************************************************/ -mlib_status mlib_ImageAffineIndex_U8_U8_4CH_BC(mlib_affine_param *param, - const void *colormap) -{ - DECLAREVAR(); - DECLAREVAR_U8(); - mlib_f32 *flut = (mlib_f32 *)mlib_ImageGetLutNormalTable(colormap) - - mlib_ImageGetLutOffset(colormap); - mlib_d64 dstRowData[MLIB_LIMIT/2]; - mlib_d64 *dstRowPtr = dstRowData; - const mlib_s16 *mlib_filters_table_u8, *mlib_filters_table_u8_4; - - if (filter == MLIB_BICUBIC) { - mlib_filters_table_u8 = mlib_filters_u8_bc; - mlib_filters_table_u8_4 = mlib_filters_u8_bc_4; - } else { - mlib_filters_table_u8 = mlib_filters_u8_bc2; - mlib_filters_table_u8_4 = mlib_filters_u8_bc2_4; - } - - if (max_xsize > MLIB_LIMIT) { - dstRowPtr = mlib_malloc(sizeof(mlib_d64) * ((max_xsize + 1) >> 1)); - - if (dstRowPtr == NULL) return MLIB_FAILURE; - } - - vis_write_gsr(3 << 3); - - for (j = yStart; j <= yFinish; j++) { - - CLIP(); - - cols = xRight - xLeft + 1; - - i = 0; - - if (i <= cols - 6) { - - NEXT_PIXEL_4BC(); - LOAD_BC_U8_4CH_1PIXEL(mlib_filters_table_u8, mlib_filters_table_u8_4); - - NEXT_PIXEL_4BC(); - - BC_U8_4CH(0, mlib_filters_table_u8, mlib_filters_table_u8_4); - BC_U8_4CH(1, mlib_filters_table_u8, mlib_filters_table_u8_4); - FADD_4BC_U8(); - - BC_U8_4CH(0, mlib_filters_table_u8, mlib_filters_table_u8_4); - BC_U8_4CH(1, mlib_filters_table_u8, mlib_filters_table_u8_4); - -#pragma pipeloop(0) - for (; i <= cols-8; i += 2) { - *dstPixelPtr++ = res; - - FADD_4BC_U8(); - BC_U8_4CH(0, mlib_filters_table_u8, mlib_filters_table_u8_4); - BC_U8_4CH(1, mlib_filters_table_u8, mlib_filters_table_u8_4); - } - - *dstPixelPtr++ = res; - - FADD_4BC_U8(); - *dstPixelPtr++ = res; - - RESULT_4BC_U8_1PIXEL(0); - LOAD_BC_U8_4CH_1PIXEL(mlib_filters_table_u8, mlib_filters_table_u8_4); - RESULT_4BC_U8_1PIXEL(1); - FADD_4BC_U8(); - - *dstPixelPtr++ = res; - i += 6; - } - - if (i <= cols-4) { - NEXT_PIXEL_4BC(); - LOAD_BC_U8_4CH_1PIXEL(mlib_filters_table_u8, mlib_filters_table_u8_4); - - NEXT_PIXEL_4BC(); - - BC_U8_4CH(0, mlib_filters_table_u8, mlib_filters_table_u8_4); - BC_U8_4CH(1, mlib_filters_table_u8, mlib_filters_table_u8_4); - FADD_4BC_U8(); - *dstPixelPtr++ = res; - - RESULT_4BC_U8_1PIXEL(0); - LOAD_BC_U8_4CH_1PIXEL(mlib_filters_table_u8, mlib_filters_table_u8_4); - RESULT_4BC_U8_1PIXEL(1); - FADD_4BC_U8(); - - *dstPixelPtr++ = res; - i += 4; - } - - if (i <= cols-2) { - NEXT_PIXEL_4BC(); - LOAD_BC_U8_4CH_1PIXEL(mlib_filters_table_u8, mlib_filters_table_u8_4); - RESULT_4BC_U8_1PIXEL(0); - - NEXT_PIXEL_4BC(); - LOAD_BC_U8_4CH_1PIXEL(mlib_filters_table_u8, mlib_filters_table_u8_4); - RESULT_4BC_U8_1PIXEL(1); - FADD_4BC_U8(); - - *dstPixelPtr++ = res; - i += 2; - } - - if (i < cols) { - NEXT_PIXEL_4BC(); - LOAD_BC_U8_4CH_1PIXEL(mlib_filters_table_u8, mlib_filters_table_u8_4); - RESULT_4BC_U8_1PIXEL(0); - - d0 = vis_fpadd16(d00, d10); - d1 = vis_fpadd16(d20, d30); - d0 = vis_fpadd16(d0, d1); - res = vis_fpack16_pair(d0, d0); - *dstPixelPtr++ = res; - } - - mlib_ImageColorTrue2IndexLine_U8_U8_4((mlib_u8 *)dstRowPtr, - dstIndexPtr, - xRight - xLeft + 1, - colormap); - } - - if (dstRowPtr != dstRowData) mlib_free(dstRowPtr); - - return MLIB_SUCCESS; -} - -/***************************************************************/ -#undef FILTER_SHIFT -#define FILTER_SHIFT 4 -#undef FILTER_MASK -#define FILTER_MASK (((1 << 9) - 1) << 3) - -/***************************************************************/ -mlib_status mlib_ImageAffineIndex_U8_S16_4CH_BC(mlib_affine_param *param, - const void *colormap) -{ - DECLAREVAR(); - DECLAREVAR_S16(); - mlib_d64 *flut = (mlib_d64 *)mlib_ImageGetLutNormalTable(colormap) - - mlib_ImageGetLutOffset(colormap); - mlib_d64 dstRowData[MLIB_LIMIT]; - mlib_d64 *dstRowPtr = dstRowData; - const mlib_s16 *mlib_filters_table_s16_4; - - if (filter == MLIB_BICUBIC) { - mlib_filters_table_s16_4 = mlib_filters_s16_bc_4; - } else { - mlib_filters_table_s16_4 = mlib_filters_s16_bc2_4; - } - - if (max_xsize > MLIB_LIMIT) { - dstRowPtr = mlib_malloc(sizeof(mlib_d64) * max_xsize); - - if (dstRowPtr == NULL) return MLIB_FAILURE; - } - - for (j = yStart; j <= yFinish; j++) { - - CLIP(); - - vis_write_gsr(10 << 3); - - cols = xRight - xLeft + 1; - i = 0; - - if (i <= cols - 4) { - - NEXT_PIXEL_4BC(); - LOAD_BC_S16_4CH_1PIXEL(mlib_filters_table_s16_4); - - NEXT_PIXEL_4BC(); - - BC_S16_4CH(mlib_filters_table_s16_4); - FADD_4BC_S16(); - - BC_S16_4CH(mlib_filters_table_s16_4); - -#pragma pipeloop(0) - - for (; i < cols-4; i++) { - *dstPixelPtr++ = res; - - FADD_4BC_S16(); - BC_S16_4CH(mlib_filters_table_s16_4); - } - - *dstPixelPtr++ = res; - - FADD_4BC_S16(); - *dstPixelPtr++ = res; - - RESULT_4BC_S16_1PIXEL(); - *dstPixelPtr++ = res; - - LOAD_BC_S16_4CH_1PIXEL(mlib_filters_table_s16_4); - RESULT_4BC_S16_1PIXEL(); - *dstPixelPtr++ = res; - i += 4; - } - -#pragma pipeloop(0) - for (; i < cols; i++) { - NEXT_PIXEL_4BC(); - LOAD_BC_S16_4CH_1PIXEL(mlib_filters_table_s16_4); - RESULT_4BC_S16_1PIXEL(); - *dstPixelPtr++ = res; - } - - mlib_ImageColorTrue2IndexLine_S16_U8_4((mlib_s16 *)dstRowPtr, - dstIndexPtr, - xRight - xLeft + 1, - colormap); - } - - if (dstRowPtr != dstRowData) mlib_free(dstRowPtr); - - return MLIB_SUCCESS; -} - -/***************************************************************/ -#undef MLIB_TYPE -#define MLIB_TYPE mlib_s16 - -/***************************************************************/ -#undef FILTER_SHIFT -#define FILTER_SHIFT 5 -#undef FILTER_MASK -#define FILTER_MASK (((1 << 8) - 1) << 3) - -/***************************************************************/ -mlib_status mlib_ImageAffineIndex_S16_U8_3CH_BC(mlib_affine_param *param, - const void *colormap) -{ - DECLAREVAR(); - DECLAREVAR_U8(); - mlib_f32 *flut = (mlib_f32 *)mlib_ImageGetLutNormalTable(colormap) - - mlib_ImageGetLutOffset(colormap); - mlib_d64 dstRowData[MLIB_LIMIT/2]; - mlib_d64 *dstRowPtr = dstRowData; - const mlib_s16 *mlib_filters_table_u8, *mlib_filters_table_u8_4; - - if (filter == MLIB_BICUBIC) { - mlib_filters_table_u8 = mlib_filters_u8_bc; - mlib_filters_table_u8_4 = mlib_filters_u8_bc_4; - } else { - mlib_filters_table_u8 = mlib_filters_u8_bc2; - mlib_filters_table_u8_4 = mlib_filters_u8_bc2_4; - } - - srcYStride >>= 1; - - if (max_xsize > MLIB_LIMIT) { - dstRowPtr = mlib_malloc(sizeof(mlib_d64) * ((max_xsize + 1) >> 1)); - - if (dstRowPtr == NULL) return MLIB_FAILURE; - } - - vis_write_gsr(3 << 3); - - for (j = yStart; j <= yFinish; j++) { - - CLIP(); - - cols = xRight - xLeft + 1; - - i = 0; - - if (i <= cols - 6) { - - NEXT_PIXEL_4BC(); - LOAD_BC_U8_4CH_1PIXEL(mlib_filters_table_u8, mlib_filters_table_u8_4); - - NEXT_PIXEL_4BC(); - - BC_U8_4CH(0, mlib_filters_table_u8, mlib_filters_table_u8_4); - BC_U8_4CH(1, mlib_filters_table_u8, mlib_filters_table_u8_4); - FADD_4BC_U8(); - - BC_U8_4CH(0, mlib_filters_table_u8, mlib_filters_table_u8_4); - BC_U8_4CH(1, mlib_filters_table_u8, mlib_filters_table_u8_4); - -#pragma pipeloop(0) - for (; i <= cols-8; i += 2) { - *dstPixelPtr++ = res; - - FADD_4BC_U8(); - BC_U8_4CH(0, mlib_filters_table_u8, mlib_filters_table_u8_4); - BC_U8_4CH(1, mlib_filters_table_u8, mlib_filters_table_u8_4); - } - - *dstPixelPtr++ = res; - - FADD_4BC_U8(); - *dstPixelPtr++ = res; - - RESULT_4BC_U8_1PIXEL(0); - LOAD_BC_U8_4CH_1PIXEL(mlib_filters_table_u8, mlib_filters_table_u8_4); - RESULT_4BC_U8_1PIXEL(1); - FADD_4BC_U8(); - - *dstPixelPtr++ = res; - i += 6; - } - - if (i <= cols-4) { - NEXT_PIXEL_4BC(); - LOAD_BC_U8_4CH_1PIXEL(mlib_filters_table_u8, mlib_filters_table_u8_4); - - NEXT_PIXEL_4BC(); - - BC_U8_4CH(0, mlib_filters_table_u8, mlib_filters_table_u8_4); - BC_U8_4CH(1, mlib_filters_table_u8, mlib_filters_table_u8_4); - FADD_4BC_U8(); - *dstPixelPtr++ = res; - - RESULT_4BC_U8_1PIXEL(0); - LOAD_BC_U8_4CH_1PIXEL(mlib_filters_table_u8, mlib_filters_table_u8_4); - RESULT_4BC_U8_1PIXEL(1); - FADD_4BC_U8(); - - *dstPixelPtr++ = res; - i += 4; - } - - if (i <= cols-2) { - NEXT_PIXEL_4BC(); - LOAD_BC_U8_4CH_1PIXEL(mlib_filters_table_u8, mlib_filters_table_u8_4); - RESULT_4BC_U8_1PIXEL(0); - - NEXT_PIXEL_4BC(); - LOAD_BC_U8_4CH_1PIXEL(mlib_filters_table_u8, mlib_filters_table_u8_4); - RESULT_4BC_U8_1PIXEL(1); - FADD_4BC_U8(); - - *dstPixelPtr++ = res; - i += 2; - } - - if (i < cols) { - NEXT_PIXEL_4BC(); - LOAD_BC_U8_4CH_1PIXEL(mlib_filters_table_u8, mlib_filters_table_u8_4); - RESULT_4BC_U8_1PIXEL(0); - - d0 = vis_fpadd16(d00, d10); - d1 = vis_fpadd16(d20, d30); - d0 = vis_fpadd16(d0, d1); - res = vis_fpack16_pair(d0, d0); - *dstPixelPtr++ = res; - } - - mlib_ImageColorTrue2IndexLine_U8_S16_3_in_4((mlib_u8 *)dstRowPtr, - dstIndexPtr, - xRight - xLeft + 1, - colormap); - } - - if (dstRowPtr != dstRowData) mlib_free(dstRowPtr); - - return MLIB_SUCCESS; -} - -/***************************************************************/ -#undef FILTER_SHIFT -#define FILTER_SHIFT 4 -#undef FILTER_MASK -#define FILTER_MASK (((1 << 9) - 1) << 3) - -/***************************************************************/ -mlib_status mlib_ImageAffineIndex_S16_S16_3CH_BC(mlib_affine_param *param, - const void *colormap) -{ - DECLAREVAR(); - DECLAREVAR_S16(); - mlib_d64 *flut = (mlib_d64 *)mlib_ImageGetLutNormalTable(colormap) - - mlib_ImageGetLutOffset(colormap); - mlib_d64 dstRowData[MLIB_LIMIT]; - mlib_d64 *dstRowPtr = dstRowData; - const mlib_s16 *mlib_filters_table_s16_4; - - if (filter == MLIB_BICUBIC) { - mlib_filters_table_s16_4 = mlib_filters_s16_bc_4; - } else { - mlib_filters_table_s16_4 = mlib_filters_s16_bc2_4; - } - - srcYStride >>= 1; - - if (max_xsize > MLIB_LIMIT) { - dstRowPtr = mlib_malloc(sizeof(mlib_d64) * max_xsize); - - if (dstRowPtr == NULL) return MLIB_FAILURE; - } - - for (j = yStart; j <= yFinish; j++) { - - CLIP(); - - vis_write_gsr(10 << 3); - - cols = xRight - xLeft + 1; - i = 0; - - if (i <= cols - 4) { - - NEXT_PIXEL_4BC(); - LOAD_BC_S16_4CH_1PIXEL(mlib_filters_table_s16_4); - - NEXT_PIXEL_4BC(); - - BC_S16_4CH(mlib_filters_table_s16_4); - FADD_4BC_S16(); - - BC_S16_4CH(mlib_filters_table_s16_4); - -#pragma pipeloop(0) - - for (; i < cols-4; i++) { - *dstPixelPtr++ = res; - - FADD_4BC_S16(); - BC_S16_4CH(mlib_filters_table_s16_4); - } - - *dstPixelPtr++ = res; - - FADD_4BC_S16(); - *dstPixelPtr++ = res; - - RESULT_4BC_S16_1PIXEL(); - *dstPixelPtr++ = res; - - LOAD_BC_S16_4CH_1PIXEL(mlib_filters_table_s16_4); - RESULT_4BC_S16_1PIXEL(); - *dstPixelPtr++ = res; - i += 4; - } - -#pragma pipeloop(0) - for (; i < cols; i++) { - NEXT_PIXEL_4BC(); - LOAD_BC_S16_4CH_1PIXEL(mlib_filters_table_s16_4); - RESULT_4BC_S16_1PIXEL(); - *dstPixelPtr++ = res; - } - - mlib_ImageColorTrue2IndexLine_S16_S16_3_in_4((mlib_s16 *)dstRowPtr, - dstIndexPtr, - xRight - xLeft + 1, - colormap); - } - - if (dstRowPtr != dstRowData) mlib_free(dstRowPtr); - - return MLIB_SUCCESS; -} - -/***************************************************************/ -#undef FILTER_SHIFT -#define FILTER_SHIFT 5 -#undef FILTER_MASK -#define FILTER_MASK (((1 << 8) - 1) << 3) - -/***************************************************************/ -mlib_status mlib_ImageAffineIndex_S16_U8_4CH_BC(mlib_affine_param *param, - const void *colormap) -{ - DECLAREVAR(); - DECLAREVAR_U8(); - mlib_f32 *flut = (mlib_f32 *)mlib_ImageGetLutNormalTable(colormap) - - mlib_ImageGetLutOffset(colormap); - mlib_d64 dstRowData[MLIB_LIMIT/2]; - mlib_d64 *dstRowPtr = dstRowData; - const mlib_s16 *mlib_filters_table_u8, *mlib_filters_table_u8_4; - - if (filter == MLIB_BICUBIC) { - mlib_filters_table_u8 = mlib_filters_u8_bc; - mlib_filters_table_u8_4 = mlib_filters_u8_bc_4; - } else { - mlib_filters_table_u8 = mlib_filters_u8_bc2; - mlib_filters_table_u8_4 = mlib_filters_u8_bc2_4; - } - - srcYStride >>= 1; - - if (max_xsize > MLIB_LIMIT) { - dstRowPtr = mlib_malloc(sizeof(mlib_d64) * ((max_xsize + 1) >> 1)); - - if (dstRowPtr == NULL) return MLIB_FAILURE; - } - - vis_write_gsr(3 << 3); - - for (j = yStart; j <= yFinish; j++) { - - CLIP(); - - cols = xRight - xLeft + 1; - - i = 0; - - if (i <= cols - 6) { - - NEXT_PIXEL_4BC(); - LOAD_BC_U8_4CH_1PIXEL(mlib_filters_table_u8, mlib_filters_table_u8_4); - - NEXT_PIXEL_4BC(); - - BC_U8_4CH(0, mlib_filters_table_u8, mlib_filters_table_u8_4); - BC_U8_4CH(1, mlib_filters_table_u8, mlib_filters_table_u8_4); - FADD_4BC_U8(); - - BC_U8_4CH(0, mlib_filters_table_u8, mlib_filters_table_u8_4); - BC_U8_4CH(1, mlib_filters_table_u8, mlib_filters_table_u8_4); - -#pragma pipeloop(0) - for (; i <= cols-8; i += 2) { - *dstPixelPtr++ = res; - - FADD_4BC_U8(); - BC_U8_4CH(0, mlib_filters_table_u8, mlib_filters_table_u8_4); - BC_U8_4CH(1, mlib_filters_table_u8, mlib_filters_table_u8_4); - } - - *dstPixelPtr++ = res; - - FADD_4BC_U8(); - *dstPixelPtr++ = res; - - RESULT_4BC_U8_1PIXEL(0); - LOAD_BC_U8_4CH_1PIXEL(mlib_filters_table_u8, mlib_filters_table_u8_4); - RESULT_4BC_U8_1PIXEL(1); - FADD_4BC_U8(); - - *dstPixelPtr++ = res; - i += 6; - } - - if (i <= cols-4) { - NEXT_PIXEL_4BC(); - LOAD_BC_U8_4CH_1PIXEL(mlib_filters_table_u8, mlib_filters_table_u8_4); - - NEXT_PIXEL_4BC(); - - BC_U8_4CH(0, mlib_filters_table_u8, mlib_filters_table_u8_4); - BC_U8_4CH(1, mlib_filters_table_u8, mlib_filters_table_u8_4); - FADD_4BC_U8(); - *dstPixelPtr++ = res; - - RESULT_4BC_U8_1PIXEL(0); - LOAD_BC_U8_4CH_1PIXEL(mlib_filters_table_u8, mlib_filters_table_u8_4); - RESULT_4BC_U8_1PIXEL(1); - FADD_4BC_U8(); - - *dstPixelPtr++ = res; - i += 4; - } - - if (i <= cols-2) { - NEXT_PIXEL_4BC(); - LOAD_BC_U8_4CH_1PIXEL(mlib_filters_table_u8, mlib_filters_table_u8_4); - RESULT_4BC_U8_1PIXEL(0); - - NEXT_PIXEL_4BC(); - LOAD_BC_U8_4CH_1PIXEL(mlib_filters_table_u8, mlib_filters_table_u8_4); - RESULT_4BC_U8_1PIXEL(1); - FADD_4BC_U8(); - - *dstPixelPtr++ = res; - i += 2; - } - - if (i < cols) { - NEXT_PIXEL_4BC(); - LOAD_BC_U8_4CH_1PIXEL(mlib_filters_table_u8, mlib_filters_table_u8_4); - RESULT_4BC_U8_1PIXEL(0); - - d0 = vis_fpadd16(d00, d10); - d1 = vis_fpadd16(d20, d30); - d0 = vis_fpadd16(d0, d1); - res = vis_fpack16_pair(d0, d0); - *dstPixelPtr++ = res; - } - - mlib_ImageColorTrue2IndexLine_U8_S16_4((mlib_u8 *)dstRowPtr, - dstIndexPtr, - xRight - xLeft + 1, - colormap); - } - - if (dstRowPtr != dstRowData) mlib_free(dstRowPtr); - - return MLIB_SUCCESS; -} - -/***************************************************************/ -#undef FILTER_SHIFT -#define FILTER_SHIFT 4 -#undef FILTER_MASK -#define FILTER_MASK (((1 << 9) - 1) << 3) - -/***************************************************************/ -mlib_status mlib_ImageAffineIndex_S16_S16_4CH_BC(mlib_affine_param *param, - const void *colormap) -{ - DECLAREVAR(); - DECLAREVAR_S16(); - mlib_d64 *flut = (mlib_d64 *)mlib_ImageGetLutNormalTable(colormap) - - mlib_ImageGetLutOffset(colormap); - mlib_d64 dstRowData[MLIB_LIMIT]; - mlib_d64 *dstRowPtr = dstRowData; - const mlib_s16 *mlib_filters_table_s16_4; - - if (filter == MLIB_BICUBIC) { - mlib_filters_table_s16_4 = mlib_filters_s16_bc_4; - } else { - mlib_filters_table_s16_4 = mlib_filters_s16_bc2_4; - } - - srcYStride >>= 1; - - if (max_xsize > MLIB_LIMIT) { - dstRowPtr = mlib_malloc(sizeof(mlib_d64) * max_xsize); - - if (dstRowPtr == NULL) return MLIB_FAILURE; - } - - for (j = yStart; j <= yFinish; j++) { - - CLIP(); - - vis_write_gsr(10 << 3); - - cols = xRight - xLeft + 1; - i = 0; - - if (i <= cols - 4) { - - NEXT_PIXEL_4BC(); - LOAD_BC_S16_4CH_1PIXEL(mlib_filters_table_s16_4); - - NEXT_PIXEL_4BC(); - - BC_S16_4CH(mlib_filters_table_s16_4); - FADD_4BC_S16(); - - BC_S16_4CH(mlib_filters_table_s16_4); - -#pragma pipeloop(0) - - for (; i < cols-4; i++) { - *dstPixelPtr++ = res; - - FADD_4BC_S16(); - BC_S16_4CH(mlib_filters_table_s16_4); - } - - *dstPixelPtr++ = res; - - FADD_4BC_S16(); - *dstPixelPtr++ = res; - - RESULT_4BC_S16_1PIXEL(); - *dstPixelPtr++ = res; - - LOAD_BC_S16_4CH_1PIXEL(mlib_filters_table_s16_4); - RESULT_4BC_S16_1PIXEL(); - *dstPixelPtr++ = res; - i += 4; - } - -#pragma pipeloop(0) - for (; i < cols; i++) { - NEXT_PIXEL_4BC(); - LOAD_BC_S16_4CH_1PIXEL(mlib_filters_table_s16_4); - RESULT_4BC_S16_1PIXEL(); - *dstPixelPtr++ = res; - } - - mlib_ImageColorTrue2IndexLine_S16_S16_4((mlib_s16 *)dstRowPtr, - dstIndexPtr, - xRight - xLeft + 1, - colormap); - } - - if (dstRowPtr != dstRowData) mlib_free(dstRowPtr); - - return MLIB_SUCCESS; -} - -/***************************************************************/ diff -r 3e22d8fd4912 -r acea5f7d354b jdk/src/java.desktop/unix/native/libmlib_image/mlib_v_ImageAffine_BL.c --- a/jdk/src/java.desktop/unix/native/libmlib_image/mlib_v_ImageAffine_BL.c Thu May 12 11:03:07 2016 -0700 +++ b/jdk/src/java.desktop/unix/native/libmlib_image/mlib_v_ImageAffine_BL.c Fri May 13 11:31:05 2016 +0300 @@ -33,7 +33,6 @@ #include "vis_proto.h" #include "mlib_image.h" -#include "mlib_ImageColormap.h" #include "mlib_ImageCopy.h" #include "mlib_ImageAffine.h" #include "mlib_v_ImageFilters.h" @@ -719,134 +718,3 @@ } /***************************************************************/ -#define LUT(x) plut[x] - -mlib_status FUN_NAME(u8_i)(mlib_affine_param *param, - const void *colormap) -{ - DECLAREVAR(); - mlib_s32 nchan = mlib_ImageGetLutChannels(colormap); - mlib_s32 lut_off = mlib_ImageGetLutOffset(colormap); - mlib_f32 *plut = (mlib_f32*)mlib_ImageGetLutNormalTable(colormap) - lut_off; - mlib_s32 max_xsize = param -> max_xsize; - mlib_f32 buff[BUF_SIZE], *pbuff = buff; - - if (max_xsize > BUF_SIZE) { - pbuff = mlib_malloc(max_xsize*sizeof(mlib_f32)); - - if (pbuff == NULL) return MLIB_FAILURE; - } - - vis_write_gsr(3 << 3); - - for (j = yStart; j <= yFinish; j++) { - mlib_f32 s0, s1, s2, s3; - DTYPE *sp; - - NEW_LINE(1); - -#pragma pipeloop(0) - for (i = 0; i < size; i++) { - GET_FILTER_XY(); - - sp = *(DTYPE**)((mlib_u8*)lineAddr + PTR_SHIFT(Y)) + (X >> MLIB_SHIFT); - s0 = LUT(sp[0]); - s1 = LUT(sp[1]); - s2 = LUT(sp[srcYStride]); - s3 = LUT(sp[srcYStride + 1]); - - PROCESS_4CH(s0, s1, s2, s3); - - pbuff[i] = vis_fpack16(dd); - X += dX; - Y += dY; - } - - if (nchan == 3) { - mlib_ImageColorTrue2IndexLine_U8_U8_3_in_4((void*)pbuff, (void*)dl, size, colormap); - } else { - mlib_ImageColorTrue2IndexLine_U8_U8_4((void*)pbuff, (void*)dl, size, colormap); - } - } - - if (pbuff != buff) { - mlib_free(pbuff); - } - - return MLIB_SUCCESS; -} - -/***************************************************************/ -#undef DTYPE -#define DTYPE mlib_s16 - -mlib_status FUN_NAME(s16_i)(mlib_affine_param *param, - const void *colormap) -{ - DECLAREVAR(); - mlib_s32 nchan = mlib_ImageGetLutChannels(colormap); - mlib_s32 lut_off = mlib_ImageGetLutOffset(colormap); - mlib_f32 *plut = (mlib_f32*)mlib_ImageGetLutNormalTable(colormap) - lut_off; - mlib_s32 max_xsize = param -> max_xsize; - mlib_f32 buff[BUF_SIZE], *pbuff = buff; - - srcYStride /= sizeof(DTYPE); - - if (max_xsize > BUF_SIZE) { - pbuff = mlib_malloc(max_xsize*sizeof(mlib_f32)); - - if (pbuff == NULL) return MLIB_FAILURE; - } - - vis_write_gsr(3 << 3); - - for (j = yStart; j <= yFinish; j++) { - mlib_f32 s0, s1, s2, s3; - DTYPE *sp; - - NEW_LINE(1); - -#pragma pipeloop(0) - for (i = 0; i < size; i++) { - GET_FILTER_XY(); - - sp = *(DTYPE**)((mlib_u8*)lineAddr + PTR_SHIFT(Y)) + (X >> MLIB_SHIFT); - s0 = LUT(sp[0]); - s1 = LUT(sp[1]); - s2 = LUT(sp[srcYStride]); - s3 = LUT(sp[srcYStride + 1]); - - PROCESS_4CH(s0, s1, s2, s3); - - pbuff[i] = vis_fpack16(dd); - X += dX; - Y += dY; - } - - if (nchan == 3) { - mlib_ImageColorTrue2IndexLine_U8_S16_3_in_4((void*)pbuff, (void*)dl, size, colormap); - } else { - mlib_ImageColorTrue2IndexLine_U8_S16_4((void*)pbuff, (void*)dl, size, colormap); - } - } - - if (pbuff != buff) { - mlib_free(pbuff); - } - - return MLIB_SUCCESS; -} - -/***************************************************************/ -const type_affine_i_fun mlib_AffineFunArr_bl_i[] = { - mlib_ImageAffine_u8_u8_i_bl, - mlib_ImageAffine_u8_u8_i_bl, - mlib_ImageAffine_u8_s16_i_bl, - mlib_ImageAffine_u8_s16_i_bl, - mlib_ImageAffine_s16_u8_i_bl, - mlib_ImageAffine_s16_u8_i_bl, - mlib_ImageAffine_s16_s16_i_bl, - mlib_ImageAffine_s16_s16_i_bl -}; - -/***************************************************************/ diff -r 3e22d8fd4912 -r acea5f7d354b jdk/src/java.desktop/unix/native/libmlib_image/mlib_v_ImageAffine_BL_S16.c --- a/jdk/src/java.desktop/unix/native/libmlib_image/mlib_v_ImageAffine_BL_S16.c Thu May 12 11:03:07 2016 -0700 +++ b/jdk/src/java.desktop/unix/native/libmlib_image/mlib_v_ImageAffine_BL_S16.c Fri May 13 11:31:05 2016 +0300 @@ -33,7 +33,6 @@ #include "vis_proto.h" #include "mlib_image.h" -#include "mlib_ImageColormap.h" #include "mlib_ImageCopy.h" #include "mlib_ImageAffine.h" #include "mlib_v_ImageFilters.h" @@ -716,128 +715,3 @@ } /***************************************************************/ -#define LUT(x) plut[x] - -mlib_status FUN_NAME(s16_i)(mlib_affine_param *param, - const void *colormap) -{ - DECLAREVAR(); - mlib_s32 nchan = mlib_ImageGetLutChannels(colormap); - mlib_s32 lut_off = mlib_ImageGetLutOffset(colormap); - mlib_d64 *plut = (mlib_d64*)mlib_ImageGetLutNormalTable(colormap) - lut_off; - mlib_s32 max_xsize = param -> max_xsize; - mlib_d64 buff[BUF_SIZE], *pbuff = buff; - - srcYStride /= sizeof(DTYPE); - - if (max_xsize > BUF_SIZE) { - pbuff = mlib_malloc(max_xsize*sizeof(mlib_d64)); - - if (pbuff == NULL) return MLIB_FAILURE; - } - - dX = (dX - (dX >> 31)) &~ 1; /* rounding towards ZERO */ - dY = (dY - (dY >> 31)) &~ 1; /* rounding towards ZERO */ - dx64 = vis_to_double_dup((((dX >> 1) & 0xFFFF) << 16) | ((dX >> 1) & 0xFFFF)); - dy64 = vis_to_double_dup((((dY >> 1) & 0xFFFF) << 16) | ((dY >> 1) & 0xFFFF)); - - for (j = yStart; j <= yFinish; j++) { - DTYPE *sp; - - NEW_LINE(1); - - deltax = DOUBLE_4U16(X, X, X, X); - deltay = DOUBLE_4U16(Y, Y, Y, Y); - -#pragma pipeloop(0) - for (i = 0; i < size; i++) { - sp = *(DTYPE**)((mlib_u8*)lineAddr + PTR_SHIFT(Y)) + (X >> MLIB_SHIFT); - s0 = LUT(sp[0]); - s1 = LUT(sp[1]); - s2 = LUT(sp[srcYStride]); - s3 = LUT(sp[srcYStride + 1]); - - BL_SUM(); - - pbuff[i] = dd; - X += dX; - Y += dY; - } - - if (nchan == 3) { - mlib_ImageColorTrue2IndexLine_S16_S16_3_in_4((void*)pbuff, (void*)dl, size, colormap); - } else { - mlib_ImageColorTrue2IndexLine_S16_S16_4((void*)pbuff, (void*)dl, size, colormap); - } - } - - if (pbuff != buff) { - mlib_free(pbuff); - } - - return MLIB_SUCCESS; -} - -/***************************************************************/ -#undef DTYPE -#define DTYPE mlib_u8 - -mlib_status FUN_NAME(u8_i)(mlib_affine_param *param, - const void *colormap) -{ - DECLAREVAR(); - mlib_s32 nchan = mlib_ImageGetLutChannels(colormap); - mlib_s32 lut_off = mlib_ImageGetLutOffset(colormap); - mlib_d64 *plut = (mlib_d64*)mlib_ImageGetLutNormalTable(colormap) - lut_off; - mlib_s32 max_xsize = param -> max_xsize; - mlib_d64 buff[BUF_SIZE], *pbuff = buff; - - if (max_xsize > BUF_SIZE) { - pbuff = mlib_malloc(max_xsize*sizeof(mlib_d64)); - - if (pbuff == NULL) return MLIB_FAILURE; - } - - dX = (dX - (dX >> 31)) &~ 1; /* rounding towards ZERO */ - dY = (dY - (dY >> 31)) &~ 1; /* rounding towards ZERO */ - dx64 = vis_to_double_dup((((dX >> 1) & 0xFFFF) << 16) | ((dX >> 1) & 0xFFFF)); - dy64 = vis_to_double_dup((((dY >> 1) & 0xFFFF) << 16) | ((dY >> 1) & 0xFFFF)); - - for (j = yStart; j <= yFinish; j++) { - DTYPE *sp; - - NEW_LINE(1); - - deltax = DOUBLE_4U16(X, X, X, X); - deltay = DOUBLE_4U16(Y, Y, Y, Y); - -#pragma pipeloop(0) - for (i = 0; i < size; i++) { - sp = *(DTYPE**)((mlib_u8*)lineAddr + PTR_SHIFT(Y)) + (X >> MLIB_SHIFT); - s0 = LUT(sp[0]); - s1 = LUT(sp[1]); - s2 = LUT(sp[srcYStride]); - s3 = LUT(sp[srcYStride + 1]); - - BL_SUM(); - - pbuff[i] = dd; - X += dX; - Y += dY; - } - - if (nchan == 3) { - mlib_ImageColorTrue2IndexLine_S16_U8_3_in_4((void*)pbuff, (void*)dl, size, colormap); - } else { - mlib_ImageColorTrue2IndexLine_S16_U8_4((void*)pbuff, (void*)dl, size, colormap); - } - } - - if (pbuff != buff) { - mlib_free(pbuff); - } - - return MLIB_SUCCESS; -} - -/***************************************************************/ diff -r 3e22d8fd4912 -r acea5f7d354b jdk/src/java.desktop/unix/native/libmlib_image/mlib_v_ImageAffine_BL_U16.c --- a/jdk/src/java.desktop/unix/native/libmlib_image/mlib_v_ImageAffine_BL_U16.c Thu May 12 11:03:07 2016 -0700 +++ b/jdk/src/java.desktop/unix/native/libmlib_image/mlib_v_ImageAffine_BL_U16.c Fri May 13 11:31:05 2016 +0300 @@ -33,7 +33,6 @@ #include "vis_proto.h" #include "mlib_image.h" -#include "mlib_ImageColormap.h" #include "mlib_ImageCopy.h" #include "mlib_ImageAffine.h" #include "mlib_v_ImageFilters.h" diff -r 3e22d8fd4912 -r acea5f7d354b jdk/src/java.desktop/unix/native/libmlib_image/mlib_v_ImageChannelExtract.c --- a/jdk/src/java.desktop/unix/native/libmlib_image/mlib_v_ImageChannelExtract.c Thu May 12 11:03:07 2016 -0700 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,825 +0,0 @@ -/* - * Copyright (c) 1998, 2003, Oracle and/or its affiliates. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 only, as - * published by the Free Software Foundation. Oracle designates this - * particular file as subject to the "Classpath" exception as provided - * by Oracle in the LICENSE file that accompanied this code. - * - * This code is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * version 2 for more details (a copy is included in the LICENSE file that - * accompanied this code). - * - * You should have received a copy of the GNU General Public License version - * 2 along with this work; if not, write to the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - * - * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA - * or visit www.oracle.com if you need additional information or have any - * questions. - */ - - -/* - * FUNCTIONS - * mlib_ImageChannelExtract - Copy the selected channels of the source - * image into the destination image - * - * SYNOPSIS - * mlib_status mlib_ImageChannelExtract(mlib_image *dst, - * mlib_image *src, - * mlib_s32 cmask); - * ARGUMENT - * dst Pointer to destination image. - * src Pointer to source image. - * cmask Source channel selection mask. - * The least significant bit (LSB) is corresponding to the - * last channel in the source image data. - * The bits with value 1 stand for the channels selected. - * If more than N channels are selected, the leftmost N - * channels are extracted, where N is the number of channels - * in the destination image. - * - * RESTRICTION - * The src and dst must have the same width, height and data type. - * The src and dst can have 1, 2, 3 or 4 channels. - * The src and dst can be either MLIB_BYTE, MLIB_SHORT, MLIB_INT, - * MLIB_FLOAT or MLIB_DOUBLE. - * - * DESCRIPTION - * Copy the selected channels of the source image into the - * destination image - */ - -#include -#include "mlib_image.h" -#include "mlib_ImageCheck.h" - -/***************************************************************/ -/* functions defined in mlib_ImageChannelExtract_1.c */ - -void -mlib_v_ImageChannelExtract_U8(mlib_u8 *src, mlib_s32 slb, - mlib_u8 *dst, mlib_s32 dlb, - mlib_s32 channels, mlib_s32 channeld, - mlib_s32 width, mlib_s32 height, - mlib_s32 cmask); -void -mlib_v_ImageChannelExtract_S16(mlib_u16 *src, mlib_s32 slb, - mlib_u16 *dst, mlib_s32 dlb, - mlib_s32 channels, mlib_s32 channeld, - mlib_s32 width, mlib_s32 height, - mlib_s32 cmask); -void -mlib_v_ImageChannelExtract_S32(mlib_s32 *src, mlib_s32 slb, - mlib_s32 *dst, mlib_s32 dlb, - mlib_s32 channels, mlib_s32 channeld, - mlib_s32 width, mlib_s32 height, - mlib_s32 cmask); -void -mlib_v_ImageChannelExtract_D64(mlib_d64 *src, mlib_s32 slb, - mlib_d64 *dst, mlib_s32 dlb, - mlib_s32 channels, mlib_s32 channeld, - mlib_s32 width, mlib_s32 height, - mlib_s32 cmask); - -/***************************************************************/ - -void mlib_v_ImageChannelExtract_U8_2_1(mlib_u8 *sl, mlib_s32 slb, - mlib_u8 *dl, mlib_s32 dlb, - mlib_s32 width, mlib_s32 height); - -void mlib_v_ImageChannelExtract_U8_3_2(mlib_u8 *sl, mlib_s32 slb, - mlib_u8 *dl, mlib_s32 dlb, - mlib_s32 width, mlib_s32 height, - mlib_s32 count1); - -void mlib_v_ImageChannelExtract_U8_4_2(mlib_u8 *sl, mlib_s32 slb, - mlib_u8 *dl, mlib_s32 dlb, - mlib_s32 width, mlib_s32 height, - mlib_s32 count1); - -void mlib_v_ImageChannelExtract_32_2_1(mlib_f32 *sl, mlib_s32 slb, - mlib_f32 *dl, mlib_s32 dlb, - mlib_s32 width, mlib_s32 height); - -void mlib_v_ImageChannelExtract_32_3_1(mlib_f32 *sl, mlib_s32 slb, - mlib_f32 *dl, mlib_s32 dlb, - mlib_s32 width, mlib_s32 height); - -void mlib_v_ImageChannelExtract_32_3_2(mlib_f32 *sp, mlib_s32 slb, - mlib_f32 *dp, mlib_s32 dlb, - mlib_s32 width, mlib_s32 height, - mlib_s32 deltac1); - -void mlib_v_ImageChannelExtract_32_4_1(mlib_f32 *sl, mlib_s32 slb, - mlib_f32 *dl, mlib_s32 dlb, - mlib_s32 width, mlib_s32 height); - -void mlib_v_ImageChannelExtract_32_4_2(mlib_f32 *sp, mlib_s32 slb, - mlib_f32 *dp, mlib_s32 dlb, - mlib_s32 width, mlib_s32 height, - mlib_s32 deltac1); - -void mlib_v_ImageChannelExtract_32_4_3(mlib_f32 *sl, mlib_s32 slb, - mlib_f32 *dl, mlib_s32 dlb, - mlib_s32 width, mlib_s32 height, - mlib_s32 mask_off); - -/***************************************************************/ - -void -mlib_v_ImageChannelExtract_U8_21_A8D1X8(mlib_u8 *src, - mlib_u8 *dst, - mlib_s32 dsize, - mlib_s32 cmask); -void -mlib_v_ImageChannelExtract_U8_21_A8D2X8(mlib_u8 *src, mlib_s32 slb, - mlib_u8 *dst, mlib_s32 dlb, - mlib_s32 xsize, mlib_s32 ysize, - mlib_s32 cmask); -void -mlib_v_ImageChannelExtract_U8_21_D1(mlib_u8 *src, - mlib_u8 *dst, - mlib_s32 dsize, - mlib_s32 cmask); -void -mlib_v_ImageChannelExtract_U8_21(mlib_u8 *src, mlib_s32 slb, - mlib_u8 *dst, mlib_s32 dlb, - mlib_s32 xsize, mlib_s32 ysize, - mlib_s32 cmask); -void -mlib_v_ImageChannelExtract_U8_31_A8D1X8(mlib_u8 *src, - mlib_u8 *dst, - mlib_s32 dsize, - mlib_s32 cmask); -void -mlib_v_ImageChannelExtract_U8_31_A8D2X8(mlib_u8 *src, mlib_s32 slb, - mlib_u8 *dst, mlib_s32 dlb, - mlib_s32 xsize, mlib_s32 ysize, - mlib_s32 cmask); -void -mlib_v_ImageChannelExtract_U8_31_D1(mlib_u8 *src, - mlib_u8 *dst, - mlib_s32 dsize, - mlib_s32 cmask); -void -mlib_v_ImageChannelExtract_U8_31(mlib_u8 *src, mlib_s32 slb, - mlib_u8 *dst, mlib_s32 dlb, - mlib_s32 xsize, mlib_s32 ysize, - mlib_s32 cmask); -void -mlib_v_ImageChannelExtract_U8_41_A8D1X8(mlib_u8 *src, - mlib_u8 *dst, - mlib_s32 dsize, - mlib_s32 cmask); -void -mlib_v_ImageChannelExtract_U8_41_A8D2X8(mlib_u8 *src, mlib_s32 slb, - mlib_u8 *dst, mlib_s32 dlb, - mlib_s32 xsize, mlib_s32 ysize, - mlib_s32 cmask); -void -mlib_v_ImageChannelExtract_U8_41_D1(mlib_u8 *src, - mlib_u8 *dst, - mlib_s32 dsize, - mlib_s32 cmask); -void -mlib_v_ImageChannelExtract_U8_41(mlib_u8 *src, mlib_s32 slb, - mlib_u8 *dst, mlib_s32 dlb, - mlib_s32 xsize, mlib_s32 ysize, - mlib_s32 cmask); -void -mlib_v_ImageChannelExtract_S16_11_A8D1X4(mlib_s16 *src, mlib_s16 *dst, - mlib_s32 dsize); -void -mlib_v_ImageChannelExtract_S16_21_A8D1X4(mlib_s16 *src, - mlib_s16 *dst, - mlib_s32 dsize, - mlib_s32 cmask); -void -mlib_v_ImageChannelExtract_S16_21_A8D2X4(mlib_s16 *src, mlib_s32 slb, - mlib_s16 *dst, mlib_s32 dlb, - mlib_s32 xsize, mlib_s32 ysize, - mlib_s32 cmask); -void -mlib_v_ImageChannelExtract_S16_21_D1(mlib_s16 *src, - mlib_s16 *dst, - mlib_s32 dsize, - mlib_s32 cmask); -void -mlib_v_ImageChannelExtract_S16_21(mlib_s16 *src, mlib_s32 slb, - mlib_s16 *dst, mlib_s32 dlb, - mlib_s32 xsize, mlib_s32 ysize, - mlib_s32 cmask); -void -mlib_v_ImageChannelExtract_S16_31_A8D1X4(mlib_s16 *src, - mlib_s16 *dst, - mlib_s32 dsize, - mlib_s32 cmask); -void -mlib_v_ImageChannelExtract_S16_31_A8D2X4(mlib_s16 *src, mlib_s32 slb, - mlib_s16 *dst, mlib_s32 dlb, - mlib_s32 xsize, mlib_s32 ysize, - mlib_s32 cmask); -void -mlib_v_ImageChannelExtract_S16_31_D1(mlib_s16 *src, - mlib_s16 *dst, - mlib_s32 dsize, - mlib_s32 cmask); -void -mlib_v_ImageChannelExtract_S16_31(mlib_s16 *src, mlib_s32 slb, - mlib_s16 *dst, mlib_s32 dlb, - mlib_s32 xsize, mlib_s32 ysize, - mlib_s32 cmask); -void -mlib_v_ImageChannelExtract_S16_41_A8D1X4(mlib_s16 *src, - mlib_s16 *dst, - mlib_s32 dsize, - mlib_s32 cmask); -void -mlib_v_ImageChannelExtract_S16_41_A8D2X4(mlib_s16 *src, mlib_s32 slb, - mlib_s16 *dst, mlib_s32 dlb, - mlib_s32 xsize, mlib_s32 ysize, - mlib_s32 cmask); -void -mlib_v_ImageChannelExtract_S16_41_D1(mlib_s16 *src, - mlib_s16 *dst, - mlib_s32 dsize, - mlib_s32 cmask); -void -mlib_v_ImageChannelExtract_S16_41(mlib_s16 *src, mlib_s32 slb, - mlib_s16 *dst, mlib_s32 dlb, - mlib_s32 xsize, mlib_s32 ysize, - mlib_s32 cmask); - -/***************************************************************/ -/* functions defined in mlib_ImageChannelExtract_43.c */ - -void -mlib_v_ImageChannelExtract_U8_43R_A8D1X8(mlib_u8 *src, - mlib_u8 *dst, - mlib_s32 dsize); -void -mlib_v_ImageChannelExtract_U8_43R_A8D2X8(mlib_u8 *src, mlib_s32 slb, - mlib_u8 *dst, mlib_s32 dlb, - mlib_s32 xsize, mlib_s32 ysize); -void -mlib_v_ImageChannelExtract_U8_43R_D1(mlib_u8 *src, - mlib_u8 *dst, - mlib_s32 dsize); -void -mlib_v_ImageChannelExtract_U8_43R(mlib_u8 *src, mlib_s32 slb, - mlib_u8 *dst, mlib_s32 dlb, - mlib_s32 xsize, mlib_s32 ysize); -void -mlib_v_ImageChannelExtract_S16_43R_A8D1X4(mlib_s16 *src, - mlib_s16 *dst, - mlib_s32 dsize); -void -mlib_v_ImageChannelExtract_S16_43R_A8D2X4(mlib_s16 *src, mlib_s32 slb, - mlib_s16 *dst, mlib_s32 dlb, - mlib_s32 xsize, mlib_s32 ysize); -void -mlib_v_ImageChannelExtract_S16_43R_D1(mlib_s16 *src, - mlib_s16 *dst, - mlib_s32 dsize); -void -mlib_v_ImageChannelExtract_S16_43R(mlib_s16 *src, mlib_s32 slb, - mlib_s16 *dst, mlib_s32 dlb, - mlib_s32 xsize, mlib_s32 ysize); -void -mlib_v_ImageChannelExtract_U8_43L_A8D1X8(mlib_u8 *src, - mlib_u8 *dst, - mlib_s32 dsize); -void -mlib_v_ImageChannelExtract_U8_43L_A8D2X8(mlib_u8 *src, mlib_s32 slb, - mlib_u8 *dst, mlib_s32 dlb, - mlib_s32 xsize, mlib_s32 ysize); -void -mlib_v_ImageChannelExtract_U8_43L_D1(mlib_u8 *src, - mlib_u8 *dst, - mlib_s32 dsize); -void -mlib_v_ImageChannelExtract_U8_43L(mlib_u8 *src, mlib_s32 slb, - mlib_u8 *dst, mlib_s32 dlb, - mlib_s32 xsize, mlib_s32 ysize); -void -mlib_v_ImageChannelExtract_S16_43L_A8D1X4(mlib_s16 *src, - mlib_s16 *dst, - mlib_s32 dsize); -void -mlib_v_ImageChannelExtract_S16_43L_A8D2X4(mlib_s16 *src, mlib_s32 slb, - mlib_s16 *dst, mlib_s32 dlb, - mlib_s32 xsize, mlib_s32 ysize); -void -mlib_v_ImageChannelExtract_S16_43L_D1(mlib_s16 *src, - mlib_s16 *dst, - mlib_s32 dsize); -void -mlib_v_ImageChannelExtract_S16_43L(mlib_s16 *src, mlib_s32 slb, - mlib_s16 *dst, mlib_s32 dlb, - mlib_s32 xsize, mlib_s32 ysize); - -/***************************************************************/ - -#ifdef MLIB_TEST -mlib_status -mlib_v_ImageChannelExtract(mlib_image *dst, - mlib_image *src, - mlib_s32 cmask) -#else -mlib_status -mlib_ImageChannelExtract(mlib_image *dst, - mlib_image *src, - mlib_s32 cmask) -#endif -{ - const mlib_s32 X8 = 0x7; - const mlib_s32 X4 = 0x3; - const mlib_s32 X2 = 0x1; - const mlib_s32 A8D1 = MLIB_IMAGE_ALIGNED8 | MLIB_IMAGE_ONEDVECTOR; - const mlib_s32 A8D2X8 = MLIB_IMAGE_ALIGNED8 | MLIB_IMAGE_STRIDE8X | MLIB_IMAGE_WIDTH8X; - const mlib_s32 A8D2X4 = MLIB_IMAGE_ALIGNED8 | MLIB_IMAGE_STRIDE8X | MLIB_IMAGE_WIDTH4X; - const mlib_s32 A8D2X2 = MLIB_IMAGE_ALIGNED8 | MLIB_IMAGE_STRIDE8X | MLIB_IMAGE_WIDTH2X; - void *sp; /* pointer for pixel in src */ - void *dp; /* pointer for pixel in dst */ - mlib_s32 ncmask = 0; /* normalized channel mask */ - mlib_s32 channels; /* number of channels for src */ - mlib_s32 channeld; /* number of channels for dst */ - mlib_s32 width, height; /* for src and dst */ - mlib_s32 strides; /* strides in bytes for src */ - mlib_s32 strided; /* strides in bytes for dst */ - mlib_s32 flags; - mlib_s32 flagd; - mlib_s32 dsize; - int delta0 = 0; /* offset of first selected channel */ - int count1 = 0; /* number of channels in first group */ - int i, bit1count = 0; - - MLIB_IMAGE_CHECK(src); - MLIB_IMAGE_CHECK(dst); - MLIB_IMAGE_TYPE_EQUAL(src, dst); - MLIB_IMAGE_SIZE_EQUAL(src, dst); - - channels = mlib_ImageGetChannels(src); - channeld = mlib_ImageGetChannels(dst); - width = mlib_ImageGetWidth(src); - height = mlib_ImageGetHeight(src); - strides = mlib_ImageGetStride(src); - strided = mlib_ImageGetStride(dst); - sp = mlib_ImageGetData(src); - dp = mlib_ImageGetData(dst); - flags = mlib_ImageGetFlags(src); - flagd = mlib_ImageGetFlags(dst); - dsize = width * height; - - /* normalize the cmask, and count the number of bit with value 1 */ - for (i = (channels - 1); i >= 0; i--) { - if (((cmask & (1 << i)) != 0) && (bit1count < channeld)) { - ncmask += (1 << i); - bit1count++; - } - } - - /* do not support the cases in which the number of selected channels is - * less than the nubmber of channels in the destination image */ - if (bit1count < channeld) { - return MLIB_FAILURE; - } - - if (channels == channeld) { -#ifdef MLIB_TEST - mlib_v_ImageCopy(dst, src); -#else - mlib_ImageCopy(dst, src); -#endif - return MLIB_SUCCESS; - } - - switch (mlib_ImageGetType(src)) { - case MLIB_BYTE: - if (channeld == 1) { - switch (channels) { - case 2: - if (((flags & A8D1) == 0) && - ((flagd & A8D1) == 0) && - ((dsize & X8) == 0)) { - mlib_v_ImageChannelExtract_U8_21_A8D1X8((mlib_u8 *)sp, - (mlib_u8 *)dp, - dsize, - ncmask); - } - else if (((flags & A8D2X8) == 0) && - ((flagd & A8D2X8) == 0)) { - mlib_v_ImageChannelExtract_U8_21_A8D2X8((mlib_u8 *)sp, strides, - (mlib_u8 *)dp, strided, - width, height, - ncmask); - } - else if (((flags & MLIB_IMAGE_ONEDVECTOR) == 0) && - ((flagd & MLIB_IMAGE_ONEDVECTOR) == 0)) { - mlib_v_ImageChannelExtract_U8_21_D1((mlib_u8 *)sp, - (mlib_u8 *)dp, - dsize, - ncmask); - } - else { - mlib_v_ImageChannelExtract_U8_21((mlib_u8 *)sp, strides, - (mlib_u8 *)dp, strided, - width, height, - ncmask); - } - return MLIB_SUCCESS; - - case 3: - if (((flags & A8D1) == 0) && - ((flagd & A8D1) == 0) && - ((dsize & X8) == 0)) { - mlib_v_ImageChannelExtract_U8_31_A8D1X8((mlib_u8 *)sp, - (mlib_u8 *)dp, - dsize, - ncmask); - } - else if (((flags & A8D2X8) == 0) && - ((flagd & A8D2X8) == 0)) { - mlib_v_ImageChannelExtract_U8_31_A8D2X8((mlib_u8 *)sp, strides, - (mlib_u8 *)dp, strided, - width, height, - ncmask); - } - else if (((flags & MLIB_IMAGE_ONEDVECTOR) == 0) && - ((flagd & MLIB_IMAGE_ONEDVECTOR) == 0)) { - mlib_v_ImageChannelExtract_U8_31_D1((mlib_u8 *)sp, - (mlib_u8 *)dp, - dsize, - ncmask); - } - else { - mlib_v_ImageChannelExtract_U8_31((mlib_u8 *)sp, strides, - (mlib_u8 *)dp, strided, - width, height, - ncmask); - } - return MLIB_SUCCESS; - - case 4: - if (((flags & A8D1) == 0) && - ((flagd & A8D1) == 0) && - ((dsize & X8) == 0)) { - mlib_v_ImageChannelExtract_U8_41_A8D1X8((mlib_u8 *)sp, - (mlib_u8 *)dp, - dsize, - ncmask); - } - else if (((flags & A8D2X8) == 0) && - ((flagd & A8D2X8) == 0)) { - mlib_v_ImageChannelExtract_U8_41_A8D2X8((mlib_u8 *)sp, strides, - (mlib_u8 *)dp, strided, - width, height, - ncmask); - } - else if (((flags & MLIB_IMAGE_ONEDVECTOR) == 0) && - ((flagd & MLIB_IMAGE_ONEDVECTOR) == 0)) { - mlib_v_ImageChannelExtract_U8_41_D1((mlib_u8 *)sp, - (mlib_u8 *)dp, - dsize, - ncmask); - } - else { - mlib_v_ImageChannelExtract_U8_41((mlib_u8 *)sp, strides, - (mlib_u8 *)dp, strided, - width, height, - ncmask); - } - return MLIB_SUCCESS; - - default: - return MLIB_FAILURE; - } - } - else if ((channels == 4) && (channeld == 3) && (ncmask == 7)) { - if (((flags & A8D1) == 0) && - ((flagd & A8D1) == 0) && - ((dsize & X8) == 0)) { - mlib_v_ImageChannelExtract_U8_43R_A8D1X8((mlib_u8 *)sp, - (mlib_u8 *)dp, - dsize); - } - else if (((flags & A8D2X8) == 0) && - ((flagd & A8D2X8) == 0)) { - mlib_v_ImageChannelExtract_U8_43R_A8D2X8((mlib_u8 *)sp, strides, - (mlib_u8 *)dp, strided, - width, height); - } - else if (((flags & MLIB_IMAGE_ONEDVECTOR) == 0) && - ((flagd & MLIB_IMAGE_ONEDVECTOR) == 0)) { - mlib_v_ImageChannelExtract_U8_43R_D1((mlib_u8 *)sp, - (mlib_u8 *)dp, - dsize); - } - else { - mlib_v_ImageChannelExtract_U8_43R((mlib_u8 *)sp, strides, - (mlib_u8 *)dp, strided, - width, height); - } - return MLIB_SUCCESS; - } - else if ((channels == 4) && (channeld == 3) && (ncmask == 14)) { - if (((flags & A8D1) == 0) && - ((flagd & A8D1) == 0) && - ((dsize & X8) == 0)) { - mlib_v_ImageChannelExtract_U8_43L_A8D1X8((mlib_u8 *)sp, - (mlib_u8 *)dp, - dsize); - } - else if (((flags & A8D2X8) == 0) && - ((flagd & A8D2X8) == 0)) { - mlib_v_ImageChannelExtract_U8_43L_A8D2X8((mlib_u8 *)sp, strides, - (mlib_u8 *)dp, strided, - width, height); - } - else if (((flags & MLIB_IMAGE_ONEDVECTOR) == 0) && - ((flagd & MLIB_IMAGE_ONEDVECTOR) == 0)) { - mlib_v_ImageChannelExtract_U8_43L_D1((mlib_u8 *)sp, - (mlib_u8 *)dp, - dsize); - } - else { - mlib_v_ImageChannelExtract_U8_43L((mlib_u8 *)sp, strides, - (mlib_u8 *)dp, strided, - width, height); - } - return MLIB_SUCCESS; - } - break; - - case MLIB_SHORT: - if (channeld == 1) { - switch (channels) { - case 2: - if (((flags & A8D1) == 0) && - ((flagd & A8D1) == 0) && - ((dsize & X4) == 0)) { - mlib_v_ImageChannelExtract_S16_21_A8D1X4((mlib_s16 *)sp, - (mlib_s16 *)dp, - dsize, - ncmask); - } - else if (((flags & A8D2X4) == 0) && - ((flagd & A8D2X4) == 0)) { - mlib_v_ImageChannelExtract_S16_21_A8D2X4((mlib_s16 *)sp, strides, - (mlib_s16 *)dp, strided, - width, height, - ncmask); - } - else if (((flags & MLIB_IMAGE_ONEDVECTOR) == 0) && - ((flagd & MLIB_IMAGE_ONEDVECTOR) == 0)) { - mlib_v_ImageChannelExtract_S16_21_D1((mlib_s16 *)sp, - (mlib_s16 *)dp, - dsize, - ncmask); - } - else { - mlib_v_ImageChannelExtract_S16_21((mlib_s16 *)sp, strides, - (mlib_s16 *)dp, strided, - width, height, - ncmask); - } - return MLIB_SUCCESS; - - case 3: - if (((flags & A8D1) == 0) && - ((flagd & A8D1) == 0) && - ((dsize & X4) == 0)) { - mlib_v_ImageChannelExtract_S16_31_A8D1X4((mlib_s16 *)sp, - (mlib_s16 *)dp, - dsize, - ncmask); - } - else if (((flags & A8D2X4) == 0) && - ((flagd & A8D2X4) == 0)) { - mlib_v_ImageChannelExtract_S16_31_A8D2X4((mlib_s16 *)sp, strides, - (mlib_s16 *)dp, strided, - width, height, - ncmask); - } - else if (((flags & MLIB_IMAGE_ONEDVECTOR) == 0) && - ((flagd & MLIB_IMAGE_ONEDVECTOR) == 0)) { - mlib_v_ImageChannelExtract_S16_31_D1((mlib_s16 *)sp, - (mlib_s16 *)dp, - dsize, - ncmask); - } - else { - mlib_v_ImageChannelExtract_S16_31((mlib_s16 *)sp, strides, - (mlib_s16 *)dp, strided, - width, height, - ncmask); - } - return MLIB_SUCCESS; - - case 4: - if (((flags & A8D1) == 0) && - ((flagd & A8D1) == 0) && - ((dsize & X4) == 0)) { - mlib_v_ImageChannelExtract_S16_41_A8D1X4((mlib_s16 *)sp, - (mlib_s16 *)dp, - dsize, - ncmask); - } - else if (((flags & A8D2X4) == 0) && - ((flagd & A8D2X4) == 0)) { - mlib_v_ImageChannelExtract_S16_41_A8D2X4((mlib_s16 *)sp, strides, - (mlib_s16 *)dp, strided, - width, height, - ncmask); - } - else if (((flags & MLIB_IMAGE_ONEDVECTOR) == 0) && - ((flagd & MLIB_IMAGE_ONEDVECTOR) == 0)) { - mlib_v_ImageChannelExtract_S16_41_D1((mlib_s16 *)sp, - (mlib_s16 *)dp, - dsize, - ncmask); - } - else { - mlib_v_ImageChannelExtract_S16_41((mlib_s16 *)sp, strides, - (mlib_s16 *)dp, strided, - width, height, - ncmask); - } - return MLIB_SUCCESS; - default: - return MLIB_FAILURE; - } - } - else if ((channels == 4) && (channeld == 3) && (ncmask == 7)) { - if (((flags & A8D1) == 0) && - ((flagd & A8D1) == 0) && - ((dsize & X4) == 0)) { - mlib_v_ImageChannelExtract_S16_43R_A8D1X4((mlib_s16 *)sp, - (mlib_s16 *)dp, - dsize); - } - else if (((flags & A8D2X4) == 0) && - ((flagd & A8D2X4) == 0)) { - mlib_v_ImageChannelExtract_S16_43R_A8D2X4((mlib_s16 *)sp, strides, - (mlib_s16 *)dp, strided, - width, height); - } - else if (((flags & MLIB_IMAGE_ONEDVECTOR) == 0) && - ((flagd & MLIB_IMAGE_ONEDVECTOR) == 0)) { - mlib_v_ImageChannelExtract_S16_43R_D1((mlib_s16 *)sp, - (mlib_s16 *)dp, - dsize); - } - else { - mlib_v_ImageChannelExtract_S16_43R((mlib_s16 *)sp, strides, - (mlib_s16 *)dp, strided, - width, height); - } - return MLIB_SUCCESS; - } - else if ((channels == 4) && (channeld == 3) && (ncmask == 14)) { - if (((flags & A8D1) == 0) && - ((flagd & A8D1) == 0) && - ((dsize & X4) == 0)) { - mlib_v_ImageChannelExtract_S16_43L_A8D1X4((mlib_s16 *)sp, - (mlib_s16 *)dp, - dsize); - } - else if (((flags & A8D2X4) == 0) && - ((flagd & A8D2X4) == 0)) { - mlib_v_ImageChannelExtract_S16_43L_A8D2X4((mlib_s16 *)sp, strides, - (mlib_s16 *)dp, strided, - width, height); - } - else if (((flags & MLIB_IMAGE_ONEDVECTOR) == 0) && - ((flagd & MLIB_IMAGE_ONEDVECTOR) == 0)) { - mlib_v_ImageChannelExtract_S16_43L_D1((mlib_s16 *)sp, - (mlib_s16 *)dp, - dsize); - } - else { - mlib_v_ImageChannelExtract_S16_43L((mlib_s16 *)sp, strides, - (mlib_s16 *)dp, strided, - width, height); - } - return MLIB_SUCCESS; - } - break; - - } - -/***************************************************************/ - /* From C version */ - - for (i = (channels - 1); i >= 0; i--) { - if (!(ncmask & (1 << i))) delta0++; - else break; - } - for (; i >= 0; i--) { - if (ncmask & (1 << i)) count1++; - else break; - } - - switch (mlib_ImageGetType(src)) { - case MLIB_BYTE: - { - mlib_u8 *sl = (mlib_u8 *)sp + delta0; - mlib_u8 *dl = (mlib_u8 *)dp; - - switch (channels*10 + channeld) { - case 32: - mlib_v_ImageChannelExtract_U8_3_2(sl, strides, dl, strided, width, height, count1); - return MLIB_SUCCESS; - - case 42: - if (ncmask == 0xA || ncmask == 0x5) { /* mask 1010 or 0101 */ - mlib_v_ImageChannelExtract_U8_2_1(sl, strides, dl, strided, 2*width, height); - return MLIB_SUCCESS; - } - mlib_v_ImageChannelExtract_U8_4_2(sl, strides, dl, strided, width, height, count1); - return MLIB_SUCCESS; - - case 43: - mlib_v_ImageChannelExtract_U8((mlib_u8 *)sp, strides, - (mlib_u8 *)dp, strided, - channels, channeld, - width, height, - ncmask); - return MLIB_SUCCESS; - - default: return MLIB_FAILURE; - } - } - - case MLIB_SHORT: - mlib_v_ImageChannelExtract_S16((mlib_u16 *)sp, strides, - (mlib_u16 *)dp, strided, - channels, channeld, - width, height, - ncmask); - break; - - case MLIB_INT: - case MLIB_FLOAT: - { - mlib_f32 *sl = (mlib_f32 *)sp + delta0; - mlib_f32 *dl = (mlib_f32 *)dp; - strides /= 4; - strided /= 4; - - switch (channels*10 + channeld) { - case 21: - mlib_v_ImageChannelExtract_32_2_1(sl, strides, dl, strided, width, height); - return MLIB_SUCCESS; - - case 31: - mlib_v_ImageChannelExtract_32_3_1(sl, strides, dl, strided, width, height); - return MLIB_SUCCESS; - - case 32: - mlib_v_ImageChannelExtract_32_3_2(sl, strides, dl, strided, width, height, count1); - return MLIB_SUCCESS; - - case 41: - mlib_v_ImageChannelExtract_32_4_1(sl, strides, dl, strided, width, height); - return MLIB_SUCCESS; - - case 42: - if (ncmask == 0xA || ncmask == 0x5) { /* mask 1010 or 0101 */ - mlib_v_ImageChannelExtract_32_2_1(sl, strides, dl, strided, 2*width, height); - } else { - mlib_v_ImageChannelExtract_32_4_2(sl, strides, dl, strided, width, height, count1); - } - return MLIB_SUCCESS; - - case 43: - mlib_v_ImageChannelExtract_32_4_3(sl, strides, dl, strided, width, height, count1); - return MLIB_SUCCESS; - - default: - return MLIB_FAILURE; - } - } - case MLIB_DOUBLE: - mlib_v_ImageChannelExtract_D64((mlib_d64 *)sp, strides, - (mlib_d64 *)dp, strided, - channels, channeld, - width, height, - ncmask); - break; - - case MLIB_BIT: - default: - return MLIB_FAILURE; /* MLIB_BIT is not supported here */ - } - - return MLIB_SUCCESS; -} - -/***************************************************************/ diff -r 3e22d8fd4912 -r acea5f7d354b jdk/src/java.desktop/unix/native/libmlib_image/mlib_v_ImageChannelExtract.h --- a/jdk/src/java.desktop/unix/native/libmlib_image/mlib_v_ImageChannelExtract.h Thu May 12 11:03:07 2016 -0700 +++ b/jdk/src/java.desktop/unix/native/libmlib_image/mlib_v_ImageChannelExtract.h Fri May 13 11:31:05 2016 +0300 @@ -32,348 +32,29 @@ extern "C" { #endif /* __cplusplus */ -void mlib_v_ImageChannelExtract_U8_21_A8D1X8(const mlib_u8 *src, - mlib_u8 *dst, - mlib_s32 dsize, - mlib_s32 cmask); - -void mlib_v_ImageChannelExtract_U8_21_A8D2X8(const mlib_u8 *src, - mlib_s32 slb, - mlib_u8 *dst, - mlib_s32 dlb, - mlib_s32 xsize, - mlib_s32 ysize, - mlib_s32 cmask); - void mlib_v_ImageChannelExtract_U8_21_D1(const mlib_u8 *src, mlib_u8 *dst, mlib_s32 dsize, mlib_s32 cmask); -void mlib_v_ImageChannelExtract_U8_21(const mlib_u8 *src, - mlib_s32 slb, - mlib_u8 *dst, - mlib_s32 dlb, - mlib_s32 xsize, - mlib_s32 ysize, - mlib_s32 cmask); - -void mlib_v_ImageChannelExtract_U8_31_A8D1X8(const mlib_u8 *src, - mlib_u8 *dst, - mlib_s32 dsize, - mlib_s32 cmask); - -void mlib_v_ImageChannelExtract_U8_31_A8D2X8(const mlib_u8 *src, - mlib_s32 slb, - mlib_u8 *dst, - mlib_s32 dlb, - mlib_s32 xsize, - mlib_s32 ysize, - mlib_s32 cmask); - void mlib_v_ImageChannelExtract_U8_31_D1(const mlib_u8 *src, mlib_u8 *dst, mlib_s32 dsize, mlib_s32 cmask); -void mlib_v_ImageChannelExtract_U8_31(const mlib_u8 *src, - mlib_s32 slb, - mlib_u8 *dst, - mlib_s32 dlb, - mlib_s32 xsize, - mlib_s32 ysize, - mlib_s32 cmask); - -void mlib_v_ImageChannelExtract_U8_41_A8D1X8(const mlib_u8 *src, - mlib_u8 *dst, - mlib_s32 dsize, - mlib_s32 cmask); - -void mlib_v_ImageChannelExtract_U8_41_A8D2X8(const mlib_u8 *src, - mlib_s32 slb, - mlib_u8 *dst, - mlib_s32 dlb, - mlib_s32 xsize, - mlib_s32 ysize, - mlib_s32 cmask); - void mlib_v_ImageChannelExtract_U8_41_D1(const mlib_u8 *src, mlib_u8 *dst, mlib_s32 dsize, mlib_s32 cmask); -void mlib_v_ImageChannelExtract_U8_41(const mlib_u8 *src, - mlib_s32 slb, - mlib_u8 *dst, - mlib_s32 dlb, - mlib_s32 xsize, - mlib_s32 ysize, - mlib_s32 cmask); - -void mlib_v_ImageChannelExtract_S16_21_A8D1X4(const mlib_s16 *src, - mlib_s16 *dst, - mlib_s32 dsize, - mlib_s32 cmask); - -void mlib_v_ImageChannelExtract_S16_21_A8D2X4(const mlib_s16 *src, - mlib_s32 slb, - mlib_s16 *dst, - mlib_s32 dlb, - mlib_s32 xsize, - mlib_s32 ysize, - mlib_s32 cmask); - -void mlib_v_ImageChannelExtract_S16_21_D1(const mlib_s16 *src, - mlib_s16 *dst, - mlib_s32 dsize, - mlib_s32 cmask); - -void mlib_v_ImageChannelExtract_S16_21(const mlib_s16 *src, - mlib_s32 slb, - mlib_s16 *dst, - mlib_s32 dlb, - mlib_s32 xsize, - mlib_s32 ysize, - mlib_s32 cmask); - -void mlib_v_ImageChannelExtract_S16_31_A8D1X4(const mlib_s16 *src, - mlib_s16 *dst, - mlib_s32 dsize, - mlib_s32 cmask); - -void mlib_v_ImageChannelExtract_S16_31_A8D2X4(const mlib_s16 *src, - mlib_s32 slb, - mlib_s16 *dst, - mlib_s32 dlb, - mlib_s32 xsize, - mlib_s32 ysize, - mlib_s32 cmask); - -void mlib_v_ImageChannelExtract_S16_31_D1(const mlib_s16 *src, - mlib_s16 *dst, - mlib_s32 dsize, - mlib_s32 cmask); - -void mlib_v_ImageChannelExtract_S16_31(const mlib_s16 *src, - mlib_s32 slb, - mlib_s16 *dst, - mlib_s32 dlb, - mlib_s32 xsize, - mlib_s32 ysize, - mlib_s32 cmask); - -void mlib_v_ImageChannelExtract_S16_41_A8D1X4(const mlib_s16 *src, - mlib_s16 *dst, - mlib_s32 dsize, - mlib_s32 cmask); - -void mlib_v_ImageChannelExtract_S16_41_A8D2X4(const mlib_s16 *src, - mlib_s32 slb, - mlib_s16 *dst, - mlib_s32 dlb, - mlib_s32 xsize, - mlib_s32 ysize, - mlib_s32 cmask); - -void mlib_v_ImageChannelExtract_S16_41_D1(const mlib_s16 *src, - mlib_s16 *dst, - mlib_s32 dsize, - mlib_s32 cmask); - -void mlib_v_ImageChannelExtract_S16_41(const mlib_s16 *src, - mlib_s32 slb, - mlib_s16 *dst, - mlib_s32 dlb, - mlib_s32 xsize, - mlib_s32 ysize, - mlib_s32 cmask); - -void mlib_v_ImageChannelExtract_U8_43R_A8D1X8(const mlib_u8 *src, - mlib_u8 *dst, - mlib_s32 dsize); - -void mlib_v_ImageChannelExtract_U8_43R_A8D2X8(const mlib_u8 *src, - mlib_s32 slb, - mlib_u8 *dst, - mlib_s32 dlb, - mlib_s32 xsize, - mlib_s32 ysize); - -void mlib_v_ImageChannelExtract_U8_43R_D1(const mlib_u8 *src, - mlib_u8 *dst, - mlib_s32 dsize); - -void mlib_v_ImageChannelExtract_U8_43R(const mlib_u8 *src, - mlib_s32 slb, - mlib_u8 *dst, - mlib_s32 dlb, - mlib_s32 xsize, - mlib_s32 ysize); - -void mlib_v_ImageChannelExtract_S16_43R_A8D1X4(const mlib_s16 *src, - mlib_s16 *dst, - mlib_s32 dsize); - -void mlib_v_ImageChannelExtract_S16_43R_A8D2X4(const mlib_s16 *src, - mlib_s32 slb, - mlib_s16 *dst, - mlib_s32 dlb, - mlib_s32 xsize, - mlib_s32 ysize); - -void mlib_v_ImageChannelExtract_S16_43R_D1(const mlib_s16 *src, - mlib_s16 *dst, - mlib_s32 dsize); - -void mlib_v_ImageChannelExtract_S16_43R(const mlib_s16 *src, - mlib_s32 slb, - mlib_s16 *dst, - mlib_s32 dlb, - mlib_s32 xsize, - mlib_s32 ysize); - -void mlib_v_ImageChannelExtract_U8_43L_A8D1X8(const mlib_u8 *src, - mlib_u8 *dst, - mlib_s32 dsize); - -void mlib_v_ImageChannelExtract_U8_43L_A8D2X8(const mlib_u8 *src, - mlib_s32 slb, - mlib_u8 *dst, - mlib_s32 dlb, - mlib_s32 xsize, - mlib_s32 ysize); - void mlib_v_ImageChannelExtract_U8_43L_D1(const mlib_u8 *src, mlib_u8 *dst, mlib_s32 dsize); -void mlib_v_ImageChannelExtract_U8_43L(const mlib_u8 *src, - mlib_s32 slb, - mlib_u8 *dst, - mlib_s32 dlb, - mlib_s32 xsize, - mlib_s32 ysize); - -void mlib_v_ImageChannelExtract_S16_43L_A8D1X4(const mlib_s16 *src, - mlib_s16 *dst, - mlib_s32 dsize); - -void mlib_v_ImageChannelExtract_S16_43L_A8D2X4(const mlib_s16 *src, - mlib_s32 slb, - mlib_s16 *dst, - mlib_s32 dlb, - mlib_s32 xsize, - mlib_s32 ysize); - void mlib_v_ImageChannelExtract_S16_43L_D1(const mlib_s16 *src, mlib_s16 *dst, mlib_s32 dsize); -void mlib_v_ImageChannelExtract_S16_43L(const mlib_s16 *src, - mlib_s32 slb, - mlib_s16 *dst, - mlib_s32 dlb, - mlib_s32 xsize, - mlib_s32 ysize); - -void mlib_v_ImageChannelExtract_U8_2_1(const mlib_u8 *sl, - mlib_s32 slb, - mlib_u8 *dl, - mlib_s32 dlb, - mlib_s32 width, - mlib_s32 height); - -void mlib_v_ImageChannelExtract_U8_3_2(const mlib_u8 *sl, - mlib_s32 slb, - mlib_u8 *dl, - mlib_s32 dlb, - mlib_s32 width, - mlib_s32 height, - mlib_s32 count1); - -void mlib_v_ImageChannelExtract_U8_4_2(const mlib_u8 *sl, - mlib_s32 slb, - mlib_u8 *dl, - mlib_s32 dlb, - mlib_s32 width, - mlib_s32 height, - mlib_s32 count1); - -void mlib_v_ImageChannelExtract_32_2_1(const mlib_f32 *sp, - mlib_s32 slb, - mlib_f32 *dp, - mlib_s32 dlb, - mlib_s32 width, - mlib_s32 height); - -void mlib_v_ImageChannelExtract_32_3_1(const mlib_f32 *sl, - mlib_s32 slb, - mlib_f32 *dl, - mlib_s32 dlb, - mlib_s32 width, - mlib_s32 height); - -void mlib_v_ImageChannelExtract_32_3_2(const mlib_f32 *sl, - mlib_s32 slb, - mlib_f32 *dl, - mlib_s32 dlb, - mlib_s32 width, - mlib_s32 height, - mlib_s32 count1); - -void mlib_v_ImageChannelExtract_32_4_1(const mlib_f32 *sp, - mlib_s32 slb, - mlib_f32 *dp, - mlib_s32 dlb, - mlib_s32 width, - mlib_s32 height); - -void mlib_v_ImageChannelExtract_32_4_2(const mlib_f32 *sl, - mlib_s32 slb, - mlib_f32 *dl, - mlib_s32 dlb, - mlib_s32 width, - mlib_s32 height, - mlib_s32 count1); - -void mlib_v_ImageChannelExtract_32_4_3(const mlib_f32 *sl, - mlib_s32 slb, - mlib_f32 *dl, - mlib_s32 dlb, - mlib_s32 width, - mlib_s32 height, - mlib_s32 count1); - -void mlib_v_ImageChannelExtract_U8(const mlib_u8 *src, - mlib_s32 slb, - mlib_u8 *dst, - mlib_s32 dlb, - mlib_s32 channels, - mlib_s32 channeld, - mlib_s32 width, - mlib_s32 height, - mlib_s32 cmask); - -void mlib_v_ImageChannelExtract_S16(const mlib_u16 *src, - mlib_s32 slb, - mlib_u16 *dst, - mlib_s32 dlb, - mlib_s32 channels, - mlib_s32 channeld, - mlib_s32 width, - mlib_s32 height, - mlib_s32 cmask); - -void mlib_v_ImageChannelExtract_D64(const mlib_d64 *src, - mlib_s32 slb, - mlib_d64 *dst, - mlib_s32 dlb, - mlib_s32 channels, - mlib_s32 channeld, - mlib_s32 width, - mlib_s32 height, - mlib_s32 cmask); - #ifdef __cplusplus } #endif /* __cplusplus */ diff -r 3e22d8fd4912 -r acea5f7d354b jdk/src/java.desktop/unix/native/libmlib_image/mlib_v_ImageChannelExtract_1.c --- a/jdk/src/java.desktop/unix/native/libmlib_image/mlib_v_ImageChannelExtract_1.c Thu May 12 11:03:07 2016 -0700 +++ b/jdk/src/java.desktop/unix/native/libmlib_image/mlib_v_ImageChannelExtract_1.c Fri May 13 11:31:05 2016 +0300 @@ -29,30 +29,9 @@ * FILENAME: mlib_ImageChannelExtract_1.c * * FUNCTIONS - * mlib_v_ImageChannelExtract_U8_21_A8D1X8 - * mlib_v_ImageChannelExtract_U8_21_A8D2X8 * mlib_v_ImageChannelExtract_U8_21_D1 - * mlib_v_ImageChannelExtract_U8_21 - * mlib_v_ImageChannelExtract_U8_31_A8D1X8 - * mlib_v_ImageChannelExtract_U8_31_A8D2X8 * mlib_v_ImageChannelExtract_U8_31_D1 - * mlib_v_ImageChannelExtract_U8_31 - * mlib_v_ImageChannelExtract_U8_41_A8D1X8 - * mlib_v_ImageChannelExtract_U8_41_A8D2X8 * mlib_v_ImageChannelExtract_U8_41_D1 - * mlib_v_ImageChannelExtract_U8_41 - * mlib_v_ImageChannelExtract_S16_21_A8D1X4 - * mlib_v_ImageChannelExtract_S16_21_A8D2X4 - * mlib_v_ImageChannelExtract_S16_21_D1 - * mlib_v_ImageChannelExtract_S16_21 - * mlib_v_ImageChannelExtract_S16_31_A8D1X4 - * mlib_v_ImageChannelExtract_S16_31_A8D2X4 - * mlib_v_ImageChannelExtract_S16_31_D1 - * mlib_v_ImageChannelExtract_S16_31 - * mlib_v_ImageChannelExtract_S16_41_A8D1X4 - * mlib_v_ImageChannelExtract_S16_41_A8D2X4 - * mlib_v_ImageChannelExtract_S16_41_D1 - * mlib_v_ImageChannelExtract_S16_41 * * ARGUMENT * src pointer to source image data @@ -95,100 +74,6 @@ /***************************************************************/ /* extract one channel from a 2-channel image. - * both source and destination image data are 8-byte aligned. - * xsize is multiple of 8. - */ - -void mlib_v_ImageChannelExtract_U8_21_A8D1X8(const mlib_u8 *src, - mlib_u8 *dst, - mlib_s32 dsize, - mlib_s32 cmask) -{ - mlib_d64 *sp, *dp; - mlib_d64 sd0, sd1; - mlib_d64 sda, sdb, sdc, sdd; - mlib_d64 dd; - mlib_s32 i; - - sp = (mlib_d64 *) src; - dp = (mlib_d64 *) dst; - - if (cmask == 2) { -#pragma pipeloop(0) - for (i = 0; i < dsize / 8; i++) { - sd0 = *sp++; - sd1 = *sp++; - CHANNELEXTRACT_U8_21L(sd0, sd1, dd); - *dp++ = dd; - } - } - else { -#pragma pipeloop(0) - for (i = 0; i < dsize / 8; i++) { - sd0 = *sp++; - sd1 = *sp++; - CHANNELEXTRACT_U8_21R(sd0, sd1, dd); - *dp++ = dd; - } - } -} - -/***************************************************************/ -/* extract one channel from a 2-channel image. - * both source and destination image data are 8-byte aligned. - * xsize is multiple of 8. - */ - -void mlib_v_ImageChannelExtract_U8_21_A8D2X8(const mlib_u8 *src, - mlib_s32 slb, - mlib_u8 *dst, - mlib_s32 dlb, - mlib_s32 xsize, - mlib_s32 ysize, - mlib_s32 cmask) -{ - mlib_d64 *sp, *dp; - mlib_d64 *sl, *dl; - mlib_d64 sd0, sd1; - mlib_d64 sda, sdb, sdc, sdd; - mlib_d64 dd; - mlib_s32 i, j; - - sp = sl = (mlib_d64 *) src; - dp = dl = (mlib_d64 *) dst; - - if (cmask == 2) { - for (j = 0; j < ysize; j++) { -#pragma pipeloop(0) - for (i = 0; i < xsize / 8; i++) { - sd0 = *sp++; - sd1 = *sp++; - CHANNELEXTRACT_U8_21L(sd0, sd1, dd); - *dp++ = dd; - } - - sp = sl = (mlib_d64 *) ((mlib_u8 *) sl + slb); - dp = dl = (mlib_d64 *) ((mlib_u8 *) dl + dlb); - } - } - else { - for (j = 0; j < ysize; j++) { -#pragma pipeloop(0) - for (i = 0; i < xsize / 8; i++) { - sd0 = *sp++; - sd1 = *sp++; - CHANNELEXTRACT_U8_21R(sd0, sd1, dd); - *dp++ = dd; - } - - sp = sl = (mlib_d64 *) ((mlib_u8 *) sl + slb); - dp = dl = (mlib_d64 *) ((mlib_u8 *) dl + dlb); - } - } -} - -/***************************************************************/ -/* extract one channel from a 2-channel image. */ void mlib_v_ImageChannelExtract_U8_21_D1(const mlib_u8 *src, @@ -415,32 +300,6 @@ } /***************************************************************/ -/* extract one channel from a 2-channel image. - */ - -void mlib_v_ImageChannelExtract_U8_21(const mlib_u8 *src, - mlib_s32 slb, - mlib_u8 *dst, - mlib_s32 dlb, - mlib_s32 xsize, - mlib_s32 ysize, - mlib_s32 cmask) -{ - mlib_u8 *sa, *da; - mlib_u8 *sl, *dl; - mlib_s32 j; - - sa = sl = (void *)src; - da = dl = dst; - - for (j = 0; j < ysize; j++) { - mlib_v_ImageChannelExtract_U8_21_D1(sa, da, xsize, cmask); - sa = sl += slb; - da = dl += dlb; - } -} - -/***************************************************************/ #define CHANNELEXTRACT_U8_31L(sd0, sd1, sd2, dd) \ sda = vis_fpmerge(vis_read_hi(sd0), vis_read_lo(sd1)); \ sdb = vis_fpmerge(vis_read_lo(sd0), vis_read_hi(sd2)); \ @@ -468,119 +327,6 @@ dd = vis_fpmerge(vis_read_hi(sdd), vis_read_lo(sde)) /***************************************************************/ -void mlib_v_ImageChannelExtract_U8_31_A8D1X8(const mlib_u8 *src, - mlib_u8 *dst, - mlib_s32 dsize, - mlib_s32 cmask) -{ - mlib_d64 *sp, *dp; - mlib_d64 sd0, sd1, sd2; - mlib_d64 sda, sdb, sdc, sdd, sde; - mlib_d64 dd; - mlib_s32 i; - - sp = (mlib_d64 *) src; - dp = (mlib_d64 *) dst; - - if (cmask == 4) { -#pragma pipeloop(0) - for (i = 0; i < dsize / 8; i++) { - sd0 = *sp++; - sd1 = *sp++; - sd2 = *sp++; - CHANNELEXTRACT_U8_31L(sd0, sd1, sd2, dd); - *dp++ = dd; - } - } - else if (cmask == 2) { -#pragma pipeloop(0) - for (i = 0; i < dsize / 8; i++) { - sd0 = *sp++; - sd1 = *sp++; - sd2 = *sp++; - CHANNELEXTRACT_U8_31M(sd0, sd1, sd2, dd); - *dp++ = dd; - } - } - else { -#pragma pipeloop(0) - for (i = 0; i < dsize / 8; i++) { - sd0 = *sp++; - sd1 = *sp++; - sd2 = *sp++; - CHANNELEXTRACT_U8_31R(sd0, sd1, sd2, dd); - *dp++ = dd; - } - } -} - -/***************************************************************/ -void mlib_v_ImageChannelExtract_U8_31_A8D2X8(const mlib_u8 *src, - mlib_s32 slb, - mlib_u8 *dst, - mlib_s32 dlb, - mlib_s32 xsize, - mlib_s32 ysize, - mlib_s32 cmask) -{ - mlib_d64 *sp, *dp; - mlib_d64 *sl, *dl; - mlib_d64 sd0, sd1, sd2; - mlib_d64 sda, sdb, sdc, sdd, sde; - mlib_d64 dd; - mlib_s32 i, j; - - sp = sl = (mlib_d64 *) src; - dp = dl = (mlib_d64 *) dst; - - if (cmask == 4) { - for (j = 0; j < ysize; j++) { -#pragma pipeloop(0) - for (i = 0; i < xsize / 8; i++) { - sd0 = *sp++; - sd1 = *sp++; - sd2 = *sp++; - CHANNELEXTRACT_U8_31L(sd0, sd1, sd2, dd); - *dp++ = dd; - } - - sp = sl = (mlib_d64 *) ((mlib_u8 *) sl + slb); - dp = dl = (mlib_d64 *) ((mlib_u8 *) dl + dlb); - } - } - else if (cmask == 2) { - for (j = 0; j < ysize; j++) { -#pragma pipeloop(0) - for (i = 0; i < xsize / 8; i++) { - sd0 = *sp++; - sd1 = *sp++; - sd2 = *sp++; - CHANNELEXTRACT_U8_31M(sd0, sd1, sd2, dd); - *dp++ = dd; - } - - sp = sl = (mlib_d64 *) ((mlib_u8 *) sl + slb); - dp = dl = (mlib_d64 *) ((mlib_u8 *) dl + dlb); - } - } - else { - for (j = 0; j < ysize; j++) { -#pragma pipeloop(0) - for (i = 0; i < xsize / 8; i++) { - sd0 = *sp++; - sd1 = *sp++; - sd2 = *sp++; - CHANNELEXTRACT_U8_31R(sd0, sd1, sd2, dd); - *dp++ = dd; - } - - sp = sl = (mlib_d64 *) ((mlib_u8 *) sl + slb); - dp = dl = (mlib_d64 *) ((mlib_u8 *) dl + dlb); - } - } -} - -/***************************************************************/ void mlib_v_ImageChannelExtract_U8_31_D1(const mlib_u8 *src, mlib_u8 *dst, mlib_s32 dsize, @@ -932,29 +678,6 @@ } /***************************************************************/ -void mlib_v_ImageChannelExtract_U8_31(const mlib_u8 *src, - mlib_s32 slb, - mlib_u8 *dst, - mlib_s32 dlb, - mlib_s32 xsize, - mlib_s32 ysize, - mlib_s32 cmask) -{ - mlib_u8 *sa, *da; - mlib_u8 *sl, *dl; - mlib_s32 j; - - sa = sl = (void *)src; - da = dl = dst; - - for (j = 0; j < ysize; j++) { - mlib_v_ImageChannelExtract_U8_31_D1(sa, da, xsize, cmask); - sa = sl += slb; - da = dl += dlb; - } -} - -/***************************************************************/ #define CHANNELEXTRACT_U8_41L(sd0, sd1, sd2, sd3, dd) \ sda = vis_fpmerge(vis_read_hi(sd0), vis_read_hi(sd2)); \ sdb = vis_fpmerge(vis_read_lo(sd0), vis_read_lo(sd2)); \ @@ -995,152 +718,6 @@ dd = vis_fpmerge(vis_read_lo(sde), vis_read_lo(sdf)) /***************************************************************/ -void mlib_v_ImageChannelExtract_U8_41_A8D1X8(const mlib_u8 *src, - mlib_u8 *dst, - mlib_s32 dsize, - mlib_s32 cmask) -{ - mlib_d64 *sp, *dp; - mlib_d64 sd0, sd1, sd2, sd3; - mlib_d64 sda, sdb, sdc, sdd, sde, sdf; - mlib_d64 dd; - mlib_s32 i; - - sp = (mlib_d64 *) src; - dp = (mlib_d64 *) dst; - - if (cmask == 8) { -#pragma pipeloop(0) - for (i = 0; i < dsize / 8; i++) { - sd0 = *sp++; - sd1 = *sp++; - sd2 = *sp++; - sd3 = *sp++; - CHANNELEXTRACT_U8_41L(sd0, sd1, sd2, sd3, dd); - *dp++ = dd; - } - } - else if (cmask == 4) { -#pragma pipeloop(0) - for (i = 0; i < dsize / 8; i++) { - sd0 = *sp++; - sd1 = *sp++; - sd2 = *sp++; - sd3 = *sp++; - CHANNELEXTRACT_U8_41ML(sd0, sd1, sd2, sd3, dd); - *dp++ = dd; - } - } - else if (cmask == 2) { -#pragma pipeloop(0) - for (i = 0; i < dsize / 8; i++) { - sd0 = *sp++; - sd1 = *sp++; - sd2 = *sp++; - sd3 = *sp++; - CHANNELEXTRACT_U8_41MR(sd0, sd1, sd2, sd3, dd); - *dp++ = dd; - } - } - else { -#pragma pipeloop(0) - for (i = 0; i < dsize / 8; i++) { - sd0 = *sp++; - sd1 = *sp++; - sd2 = *sp++; - sd3 = *sp++; - CHANNELEXTRACT_U8_41R(sd0, sd1, sd2, sd3, dd); - *dp++ = dd; - } - } -} - -/***************************************************************/ -void mlib_v_ImageChannelExtract_U8_41_A8D2X8(const mlib_u8 *src, - mlib_s32 slb, - mlib_u8 *dst, - mlib_s32 dlb, - mlib_s32 xsize, - mlib_s32 ysize, - mlib_s32 cmask) -{ - mlib_d64 *sp, *dp; - mlib_d64 *sl, *dl; - mlib_d64 sd0, sd1, sd2, sd3; - mlib_d64 sda, sdb, sdc, sdd, sde, sdf; - mlib_d64 dd; - mlib_s32 i, j; - - sp = sl = (mlib_d64 *) src; - dp = dl = (mlib_d64 *) dst; - - if (cmask == 8) { - for (j = 0; j < ysize; j++) { -#pragma pipeloop(0) - for (i = 0; i < xsize / 8; i++) { - sd0 = *sp++; - sd1 = *sp++; - sd2 = *sp++; - sd3 = *sp++; - CHANNELEXTRACT_U8_41L(sd0, sd1, sd2, sd3, dd); - *dp++ = dd; - } - - sp = sl = (mlib_d64 *) ((mlib_u8 *) sl + slb); - dp = dl = (mlib_d64 *) ((mlib_u8 *) dl + dlb); - } - } - else if (cmask == 4) { - for (j = 0; j < ysize; j++) { -#pragma pipeloop(0) - for (i = 0; i < xsize / 8; i++) { - sd0 = *sp++; - sd1 = *sp++; - sd2 = *sp++; - sd3 = *sp++; - CHANNELEXTRACT_U8_41ML(sd0, sd1, sd2, sd3, dd); - *dp++ = dd; - } - - sp = sl = (mlib_d64 *) ((mlib_u8 *) sl + slb); - dp = dl = (mlib_d64 *) ((mlib_u8 *) dl + dlb); - } - } - else if (cmask == 2) { - for (j = 0; j < ysize; j++) { -#pragma pipeloop(0) - for (i = 0; i < xsize / 8; i++) { - sd0 = *sp++; - sd1 = *sp++; - sd2 = *sp++; - sd3 = *sp++; - CHANNELEXTRACT_U8_41MR(sd0, sd1, sd2, sd3, dd); - *dp++ = dd; - } - - sp = sl = (mlib_d64 *) ((mlib_u8 *) sl + slb); - dp = dl = (mlib_d64 *) ((mlib_u8 *) dl + dlb); - } - } - else { - for (j = 0; j < ysize; j++) { -#pragma pipeloop(0) - for (i = 0; i < xsize / 8; i++) { - sd0 = *sp++; - sd1 = *sp++; - sd2 = *sp++; - sd3 = *sp++; - CHANNELEXTRACT_U8_41R(sd0, sd1, sd2, sd3, dd); - *dp++ = dd; - } - - sp = sl = (mlib_d64 *) ((mlib_u8 *) sl + slb); - dp = dl = (mlib_d64 *) ((mlib_u8 *) dl + dlb); - } - } -} - -/***************************************************************/ void mlib_v_ImageChannelExtract_U8_41_D1(const mlib_u8 *src, mlib_u8 *dst, mlib_s32 dsize, @@ -1632,1560 +1209,3 @@ } /***************************************************************/ -void mlib_v_ImageChannelExtract_U8_41(const mlib_u8 *src, - mlib_s32 slb, - mlib_u8 *dst, - mlib_s32 dlb, - mlib_s32 xsize, - mlib_s32 ysize, - mlib_s32 cmask) -{ - mlib_u8 *sa, *da; - mlib_u8 *sl, *dl; - mlib_s32 j; - - sa = sl = (void *)src; - da = dl = dst; - - for (j = 0; j < ysize; j++) { - mlib_v_ImageChannelExtract_U8_41_D1(sa, da, xsize, cmask); - sa = sl += slb; - da = dl += dlb; - } -} - -/***************************************************************/ -#define CHANNELEXTRACT_S16_21L(sd0, sd1, dd) \ - sda = vis_fpmerge(vis_read_hi(sd0), vis_read_hi(sd1)); \ - sdb = vis_fpmerge(vis_read_lo(sd0), vis_read_lo(sd1)); \ - sdc = vis_fpmerge(vis_read_hi(sda), vis_read_hi(sdb)); \ - dd = vis_fpmerge(vis_read_hi(sdc), vis_read_lo(sdc)) - -/***************************************************************/ -#define CHANNELEXTRACT_S16_21R(sd0, sd1, dd) \ - sda = vis_fpmerge(vis_read_hi(sd0), vis_read_hi(sd1)); \ - sdb = vis_fpmerge(vis_read_lo(sd0), vis_read_lo(sd1)); \ - sdc = vis_fpmerge(vis_read_lo(sda), vis_read_lo(sdb)); \ - dd = vis_fpmerge(vis_read_hi(sdc), vis_read_lo(sdc)) - -/***************************************************************/ -/* extract one channel from a 2-channel image. - * both source and destination image data are 8-byte aligned. - * dsize is multiple of 4. - */ - -void mlib_v_ImageChannelExtract_S16_21_A8D1X4(const mlib_s16 *src, - mlib_s16 *dst, - mlib_s32 dsize, - mlib_s32 cmask) -{ - mlib_d64 *sp, *dp; - mlib_d64 sd0, sd1; - mlib_d64 sda, sdb, sdc; - mlib_d64 dd; - mlib_s32 i; - - sp = (mlib_d64 *) src; - dp = (mlib_d64 *) dst; - - if (cmask == 2) { -#pragma pipeloop(0) - for (i = 0; i < dsize / 4; i++) { - sd0 = *sp++; - sd1 = *sp++; - CHANNELEXTRACT_S16_21L(sd0, sd1, dd); - *dp++ = dd; - } - } - else { -#pragma pipeloop(0) - for (i = 0; i < dsize / 4; i++) { - sd0 = *sp++; - sd1 = *sp++; - CHANNELEXTRACT_S16_21R(sd0, sd1, dd); - *dp++ = dd; - } - } -} - -/***************************************************************/ -void mlib_v_ImageChannelExtract_S16_21_A8D2X4(const mlib_s16 *src, - mlib_s32 slb, - mlib_s16 *dst, - mlib_s32 dlb, - mlib_s32 xsize, - mlib_s32 ysize, - mlib_s32 cmask) -{ - mlib_d64 *sp, *dp; - mlib_d64 *sl, *dl; - mlib_d64 sd0, sd1; - mlib_d64 sda, sdb, sdc; - mlib_d64 dd; - mlib_s32 i, j; - - sp = sl = (mlib_d64 *) src; - dp = dl = (mlib_d64 *) dst; - - if (cmask == 2) { - for (j = 0; j < ysize; j++) { -#pragma pipeloop(0) - for (i = 0; i < xsize / 4; i++) { - sd0 = *sp++; - sd1 = *sp++; - CHANNELEXTRACT_S16_21L(sd0, sd1, dd); - *dp++ = dd; - } - - sp = sl = (mlib_d64 *) ((mlib_u8 *) sl + slb); - dp = dl = (mlib_d64 *) ((mlib_u8 *) dl + dlb); - } - } - else { - for (j = 0; j < ysize; j++) { -#pragma pipeloop(0) - for (i = 0; i < xsize / 4; i++) { - sd0 = *sp++; - sd1 = *sp++; - CHANNELEXTRACT_S16_21R(sd0, sd1, dd); - *dp++ = dd; - } - - sp = sl = (mlib_d64 *) ((mlib_u8 *) sl + slb); - dp = dl = (mlib_d64 *) ((mlib_u8 *) dl + dlb); - } - } -} - -/***************************************************************/ -void mlib_v_ImageChannelExtract_S16_21_D1(const mlib_s16 *src, - mlib_s16 *dst, - mlib_s32 dsize, - mlib_s32 cmask) -{ - mlib_s16 *sa, *da; - mlib_s16 *dend, *dend2; /* end points in dst */ - mlib_d64 *dp; /* 8-byte aligned start points in dst */ - mlib_d64 *sp; /* 8-byte aligned start point in src */ - mlib_d64 sd0, sd1, sd2, sd3; /* 8-byte source data */ - mlib_d64 sda, sdb, sdc; - mlib_d64 dd0, dd1; - mlib_s32 soff; /* offset of address in src */ - mlib_s32 doff; /* offset of address in dst */ - mlib_s32 off; /* offset of dst over src */ - mlib_s32 emask; /* edge mask */ - mlib_s32 i, n; - - sa = (void *)src; - da = dst; - - /* prepare the source address */ - sp = (mlib_d64 *) ((mlib_addr) sa & (~7)); - soff = ((mlib_addr) sa & 7); - - /* prepare the destination addresses */ - dp = (mlib_d64 *) ((mlib_addr) da & (~7)); - doff = ((mlib_addr) da & 7); - dend = da + dsize - 1; - dend2 = dend - 3; - - /* calculate the src's offset over dst */ - if (cmask == 2) { - off = (soff / 4) * 2 - doff; - } - else { - off = ((soff + 3) / 4) * 2 - doff; - } - - if (((cmask == 2) && (soff % 4 == 0)) || ((cmask == 1) && (soff % 4 != 0))) { /* extract even words */ - - if (off == 0) { /* src and dst have same alignment */ - - /* generate edge mask for the start point */ - emask = vis_edge16(da, dend); - - /* load 16 bytes */ - sd0 = *sp++; - sd1 = *sp++; - - /* extract, including some garbage at the start point */ - CHANNELEXTRACT_S16_21L(sd0, sd1, dd0); - - /* store 8 bytes result */ - vis_pst_16(dd0, dp++, emask); - - if ((mlib_addr) dp <= (mlib_addr) dend2) { - n = ((mlib_u8 *) dend2 - (mlib_u8 *) dp) / 8 + 1; - - /* 8-pixel column loop, emask not needed */ -#pragma pipeloop(0) - for (i = 0; i < n; i++) { - sd0 = *sp++; - sd1 = *sp++; - CHANNELEXTRACT_S16_21L(sd0, sd1, dd0); - *dp++ = dd0; - } - } - - /* end point handling */ - if ((mlib_addr) dp <= (mlib_addr) dend) { - emask = vis_edge16(dp, dend); - sd0 = *sp++; - sd1 = *sp++; - CHANNELEXTRACT_S16_21L(sd0, sd1, dd0); - vis_pst_16(dd0, dp++, emask); - } - } - else { - vis_alignaddr((void *)0, off); - - /* generate edge mask for the start point */ - emask = vis_edge16(da, dend); - - if (off < 0) { - /* load 16 bytes */ - sd2 = *sp++; - sd3 = *sp++; - - /* extract and store 8 bytes */ - CHANNELEXTRACT_S16_21L(sd2, sd3, dd1); - vis_pst_16(vis_faligndata(dd1, dd1), dp++, emask); - } - else { - /* load 32 bytes */ - sd0 = *sp++; - sd1 = *sp++; - sd2 = *sp++; - sd3 = *sp++; - - /* extract and store 8 bytes */ - CHANNELEXTRACT_S16_21L(sd0, sd1, dd0); - CHANNELEXTRACT_S16_21L(sd2, sd3, dd1); - vis_pst_16(vis_faligndata(dd0, dd1), dp++, emask); - } - - if ((mlib_addr) dp <= (mlib_addr) dend2) { - n = ((mlib_u8 *) dend2 - (mlib_u8 *) dp) / 8 + 1; - - /* 8-pixel column loop, emask not needed */ -#pragma pipeloop(0) - for (i = 0; i < n; i++) { - dd0 = dd1; - sd2 = *sp++; - sd3 = *sp++; - CHANNELEXTRACT_S16_21L(sd2, sd3, dd1); - *dp++ = vis_faligndata(dd0, dd1); - } - } - - /* end point handling */ - if ((mlib_addr) dp <= (mlib_addr) dend) { - emask = vis_edge16(dp, dend); - dd0 = dd1; - sd2 = *sp++; - sd3 = *sp++; - CHANNELEXTRACT_S16_21L(sd2, sd3, dd1); - vis_pst_16(vis_faligndata(dd0, dd1), dp++, emask); - } - } - } - else { /* extract odd words */ - - if (off == 0) { /* src and dst have same alignment */ - - /* generate edge mask for the start point */ - emask = vis_edge16(da, dend); - - /* load 16 bytes, don't care the garbage at the start point */ - sd0 = *sp++; - sd1 = *sp++; - - /* extract and store 8 bytes */ - CHANNELEXTRACT_S16_21R(sd0, sd1, dd0); - vis_pst_16(dd0, dp++, emask); - - if ((mlib_addr) dp <= (mlib_addr) dend2) { - n = ((mlib_u8 *) dend2 - (mlib_u8 *) dp) / 8 + 1; - - /* 8-pixel column loop, emask not needed */ -#pragma pipeloop(0) - for (i = 0; i < n; i++) { - sd0 = *sp++; - sd1 = *sp++; - CHANNELEXTRACT_S16_21R(sd0, sd1, dd0); - *dp++ = dd0; - } - } - - /* end point handling */ - if ((mlib_addr) dp <= (mlib_addr) dend) { - emask = vis_edge16(dp, dend); - sd0 = *sp++; - sd1 = *sp++; - CHANNELEXTRACT_S16_21R(sd0, sd1, dd0); - vis_pst_16(dd0, dp++, emask); - } - } - else { - vis_alignaddr((void *)0, off); - - /* generate edge mask for the start point */ - emask = vis_edge16(da, dend); - - if (off < 0) { - /* load 16 bytes */ - sd2 = *sp++; - sd3 = *sp++; - - /* extract and store 8 bytes */ - CHANNELEXTRACT_S16_21R(sd2, sd3, dd1); - vis_pst_16(vis_faligndata(dd1, dd1), dp++, emask); - } - else { - /* load 32 bytes */ - sd0 = *sp++; - sd1 = *sp++; - sd2 = *sp++; - sd3 = *sp++; - - /* extract and store 8 bytes */ - CHANNELEXTRACT_S16_21R(sd0, sd1, dd0); - CHANNELEXTRACT_S16_21R(sd2, sd3, dd1); - vis_pst_16(vis_faligndata(dd0, dd1), dp++, emask); - } - - if ((mlib_addr) dp <= (mlib_addr) dend2) { - n = ((mlib_u8 *) dend2 - (mlib_u8 *) dp) / 8 + 1; - - /* 8-pixel column loop, emask not needed */ -#pragma pipeloop(0) - for (i = 0; i < n; i++) { - dd0 = dd1; - sd2 = *sp++; - sd3 = *sp++; - CHANNELEXTRACT_S16_21R(sd2, sd3, dd1); - *dp++ = vis_faligndata(dd0, dd1); - } - } - - /* end point handling */ - if ((mlib_addr) dp <= (mlib_addr) dend) { - emask = vis_edge16(dp, dend); - dd0 = dd1; - sd2 = *sp++; - sd3 = *sp++; - CHANNELEXTRACT_S16_21R(sd2, sd3, dd1); - vis_pst_16(vis_faligndata(dd0, dd1), dp++, emask); - } - } - } -} - -/***************************************************************/ -void mlib_v_ImageChannelExtract_S16_21(const mlib_s16 *src, - mlib_s32 slb, - mlib_s16 *dst, - mlib_s32 dlb, - mlib_s32 xsize, - mlib_s32 ysize, - mlib_s32 cmask) -{ - mlib_s16 *sa, *da; - mlib_s16 *sl, *dl; - mlib_s32 j; - - sa = sl = (void *)src; - da = dl = dst; - - for (j = 0; j < ysize; j++) { - mlib_v_ImageChannelExtract_S16_21_D1(sa, da, xsize, cmask); - sa = sl = (mlib_s16 *) ((mlib_u8 *) sl + slb); - da = dl = (mlib_s16 *) ((mlib_u8 *) dl + dlb); - } -} - -/***************************************************************/ -#define CHANNELEXTRACT_S16_31L(sd0, sd1, sd2, dd) \ - /* extract the left channel */ \ - sda = vis_fpmerge(vis_read_hi(sd0), vis_read_lo(sd1)); \ - sdb = vis_fpmerge(vis_read_lo(sd0), vis_read_hi(sd2)); \ - sdc = vis_fpmerge(vis_read_hi(sda), vis_read_lo(sdb)); \ - dd = vis_fpmerge(vis_read_hi(sdc), vis_read_lo(sdc)) - -/***************************************************************/ -#define CHANNELEXTRACT_S16_31M(sd0, sd1, sd2, dd) \ - /* extract the middle channel */ \ - sda = vis_fpmerge(vis_read_hi(sd0), vis_read_lo(sd1)); \ - sdb = vis_fpmerge(vis_read_hi(sd1), vis_read_lo(sd2)); \ - sdc = vis_fpmerge(vis_read_lo(sda), vis_read_hi(sdb)); \ - dd = vis_fpmerge(vis_read_hi(sdc), vis_read_lo(sdc)) - -/***************************************************************/ -#define CHANNELEXTRACT_S16_31R(sd0, sd1, sd2, dd) \ - /* extract the right channel */ \ - sda = vis_fpmerge(vis_read_lo(sd0), vis_read_hi(sd2)); \ - sdb = vis_fpmerge(vis_read_hi(sd1), vis_read_lo(sd2)); \ - sdc = vis_fpmerge(vis_read_hi(sda), vis_read_lo(sdb)); \ - dd = vis_fpmerge(vis_read_hi(sdc), vis_read_lo(sdc)) - -/***************************************************************/ -void mlib_v_ImageChannelExtract_S16_31_A8D1X4(const mlib_s16 *src, - mlib_s16 *dst, - mlib_s32 dsize, - mlib_s32 cmask) -{ - mlib_d64 *sp, *dp; - mlib_d64 sd0, sd1, sd2; - mlib_d64 sda, sdb, sdc; - mlib_d64 dd; - mlib_s32 i; - - sp = (mlib_d64 *) src; - dp = (mlib_d64 *) dst; - - if (cmask == 4) { -#pragma pipeloop(0) - for (i = 0; i < dsize / 4; i++) { - sd0 = *sp++; - sd1 = *sp++; - sd2 = *sp++; - CHANNELEXTRACT_S16_31L(sd0, sd1, sd2, dd); - *dp++ = dd; - } - } - else if (cmask == 2) { -#pragma pipeloop(0) - for (i = 0; i < dsize / 4; i++) { - sd0 = *sp++; - sd1 = *sp++; - sd2 = *sp++; - CHANNELEXTRACT_S16_31M(sd0, sd1, sd2, dd); - *dp++ = dd; - } - } - else { -#pragma pipeloop(0) - for (i = 0; i < dsize / 4; i++) { - sd0 = *sp++; - sd1 = *sp++; - sd2 = *sp++; - CHANNELEXTRACT_S16_31R(sd0, sd1, sd2, dd); - *dp++ = dd; - } - } -} - -/***************************************************************/ -void mlib_v_ImageChannelExtract_S16_31_A8D2X4(const mlib_s16 *src, - mlib_s32 slb, - mlib_s16 *dst, - mlib_s32 dlb, - mlib_s32 xsize, - mlib_s32 ysize, - mlib_s32 cmask) -{ - mlib_d64 *sp, *dp; - mlib_d64 *sl, *dl; - mlib_d64 sd0, sd1, sd2; - mlib_d64 sda, sdb, sdc; - mlib_d64 dd; - mlib_s32 i, j; - - sp = sl = (mlib_d64 *) src; - dp = dl = (mlib_d64 *) dst; - - if (cmask == 4) { - for (j = 0; j < ysize; j++) { -#pragma pipeloop(0) - for (i = 0; i < xsize / 4; i++) { - sd0 = *sp++; - sd1 = *sp++; - sd2 = *sp++; - CHANNELEXTRACT_S16_31L(sd0, sd1, sd2, dd); - *dp++ = dd; - } - - sp = sl = (mlib_d64 *) ((mlib_u8 *) sl + slb); - dp = dl = (mlib_d64 *) ((mlib_u8 *) dl + dlb); - } - } - else if (cmask == 2) { - for (j = 0; j < ysize; j++) { -#pragma pipeloop(0) - for (i = 0; i < xsize / 4; i++) { - sd0 = *sp++; - sd1 = *sp++; - sd2 = *sp++; - CHANNELEXTRACT_S16_31M(sd0, sd1, sd2, dd); - *dp++ = dd; - } - - sp = sl = (mlib_d64 *) ((mlib_u8 *) sl + slb); - dp = dl = (mlib_d64 *) ((mlib_u8 *) dl + dlb); - } - } - else { - for (j = 0; j < ysize; j++) { -#pragma pipeloop(0) - for (i = 0; i < xsize / 4; i++) { - sd0 = *sp++; - sd1 = *sp++; - sd2 = *sp++; - CHANNELEXTRACT_S16_31R(sd0, sd1, sd2, dd); - *dp++ = dd; - } - - sp = sl = (mlib_d64 *) ((mlib_u8 *) sl + slb); - dp = dl = (mlib_d64 *) ((mlib_u8 *) dl + dlb); - } - } -} - -/***************************************************************/ -void mlib_v_ImageChannelExtract_S16_31_D1(const mlib_s16 *src, - mlib_s16 *dst, - mlib_s32 dsize, - mlib_s32 cmask) -{ - mlib_s16 *sa, *da; - mlib_s16 *dend, *dend2; /* end points in dst */ - mlib_d64 *dp; /* 8-byte aligned start points in dst */ - mlib_d64 *sp; /* 8-byte aligned start point in src */ - mlib_d64 sd0, sd1, sd2; /* 8-byte source data */ - mlib_d64 sd3, sd4, sd5; - mlib_d64 sda, sdb, sdc; - mlib_d64 dd0, dd1; - mlib_s32 soff; /* offset of address in src */ - mlib_s32 doff; /* offset of address in dst */ - mlib_s32 off; /* offset of src over dst */ - mlib_s32 emask; /* edge mask */ - mlib_s32 i, n; - - sa = (void *)src; - da = dst; - - /* prepare the source address */ - sp = (mlib_d64 *) ((mlib_addr) sa & (~7)); - soff = ((mlib_addr) sa & 7); - - /* prepare the destination addresses */ - dp = (mlib_d64 *) ((mlib_addr) da & (~7)); - doff = ((mlib_addr) da & 7); - dend = da + dsize - 1; - dend2 = dend - 3; - - /* calculate the src's offset over dst */ - if (cmask == 4) { - off = (soff / 6) * 2 - doff; - } - else if (cmask == 2) { - off = ((soff + 2) / 6) * 2 - doff; - } - else { - off = ((soff + 4) / 6) * 2 - doff; - } - - if (((cmask == 4) && (soff % 6 == 0)) || - ((cmask == 2) && (soff % 6 == 4)) || - ((cmask == 1) && (soff % 6 == 2))) { /* extract left channel */ - - if (off == 0) { /* src and dst have same alignment */ - - /* generate edge mask for the start point */ - emask = vis_edge16(da, dend); - - /* load 16 bytes */ - sd0 = *sp++; - sd1 = *sp++; - sd2 = *sp++; - - /* extract, including some garbage at the start point */ - CHANNELEXTRACT_S16_31L(sd0, sd1, sd2, dd0); - - /* store 8 bytes result */ - vis_pst_16(dd0, dp++, emask); - - if ((mlib_addr) dp <= (mlib_addr) dend2) { - n = ((mlib_u8 *) dend2 - (mlib_u8 *) dp) / 8 + 1; - - /* 8-pixel column loop, emask not needed */ -#pragma pipeloop(0) - for (i = 0; i < n; i++) { - sd0 = *sp++; - sd1 = *sp++; - sd2 = *sp++; - CHANNELEXTRACT_S16_31L(sd0, sd1, sd2, dd0); - *dp++ = dd0; - } - } - - /* end point handling */ - if ((mlib_addr) dp <= (mlib_addr) dend) { - emask = vis_edge16(dp, dend); - sd0 = *sp++; - sd1 = *sp++; - sd2 = *sp++; - CHANNELEXTRACT_S16_31L(sd0, sd1, sd2, dd0); - vis_pst_16(dd0, dp++, emask); - } - } - else { - vis_alignaddr((void *)0, off); - - /* generate edge mask for the start point */ - emask = vis_edge16(da, dend); - - if (off < 0) { - /* load 24 bytes */ - sd3 = *sp++; - sd4 = *sp++; - sd5 = *sp++; - - /* extract and store 8 bytes */ - CHANNELEXTRACT_S16_31L(sd3, sd4, sd5, dd1); - vis_pst_16(vis_faligndata(dd1, dd1), dp++, emask); - } - else { - /* load 48 bytes */ - sd0 = *sp++; - sd1 = *sp++; - sd2 = *sp++; - sd3 = *sp++; - sd4 = *sp++; - sd5 = *sp++; - - /* extract and store 8 bytes */ - CHANNELEXTRACT_S16_31L(sd0, sd1, sd2, dd0); - CHANNELEXTRACT_S16_31L(sd3, sd4, sd5, dd1); - vis_pst_16(vis_faligndata(dd0, dd1), dp++, emask); - } - - if ((mlib_addr) dp <= (mlib_addr) dend2) { - n = ((mlib_u8 *) dend2 - (mlib_u8 *) dp) / 8 + 1; - - /* 8-pixel column loop, emask not needed */ -#pragma pipeloop(0) - for (i = 0; i < n; i++) { - dd0 = dd1; - sd3 = *sp++; - sd4 = *sp++; - sd5 = *sp++; - CHANNELEXTRACT_S16_31L(sd3, sd4, sd5, dd1); - *dp++ = vis_faligndata(dd0, dd1); - } - } - - /* end point handling */ - if ((mlib_addr) dp <= (mlib_addr) dend) { - emask = vis_edge16(dp, dend); - dd0 = dd1; - sd3 = *sp++; - sd4 = *sp++; - sd5 = *sp++; - CHANNELEXTRACT_S16_31L(sd3, sd4, sd5, dd1); - vis_pst_16(vis_faligndata(dd0, dd1), dp++, emask); - } - } - } - else if (((cmask == 4) && (soff % 6 == 2)) || - ((cmask == 2) && (soff % 6 == 0)) || - ((cmask == 1) && (soff % 6 == 4))) { - /* extract middle channel */ - - if (off == 0) { /* src and dst have same alignment */ - - /* generate edge mask for the start point */ - emask = vis_edge16(da, dend); - - /* load 16 bytes */ - sd0 = *sp++; - sd1 = *sp++; - sd2 = *sp++; - - /* extract, including some garbage at the start point */ - CHANNELEXTRACT_S16_31M(sd0, sd1, sd2, dd0); - - /* store 8 bytes result */ - vis_pst_16(dd0, dp++, emask); - - if ((mlib_addr) dp <= (mlib_addr) dend2) { - n = ((mlib_u8 *) dend2 - (mlib_u8 *) dp) / 8 + 1; - - /* 8-pixel column loop, emask not needed */ -#pragma pipeloop(0) - for (i = 0; i < n; i++) { - sd0 = *sp++; - sd1 = *sp++; - sd2 = *sp++; - CHANNELEXTRACT_S16_31M(sd0, sd1, sd2, dd0); - *dp++ = dd0; - } - } - - /* end point handling */ - if ((mlib_addr) dp <= (mlib_addr) dend) { - emask = vis_edge16(dp, dend); - sd0 = *sp++; - sd1 = *sp++; - sd2 = *sp++; - CHANNELEXTRACT_S16_31M(sd0, sd1, sd2, dd0); - vis_pst_16(dd0, dp++, emask); - } - } - else { - vis_alignaddr((void *)0, off); - - /* generate edge mask for the start point */ - emask = vis_edge16(da, dend); - - if (off < 0) { - /* load 24 bytes */ - sd3 = *sp++; - sd4 = *sp++; - sd5 = *sp++; - - /* extract and store 8 bytes */ - CHANNELEXTRACT_S16_31M(sd3, sd4, sd5, dd1); - vis_pst_16(vis_faligndata(dd1, dd1), dp++, emask); - } - else { - /* load 48 bytes */ - sd0 = *sp++; - sd1 = *sp++; - sd2 = *sp++; - sd3 = *sp++; - sd4 = *sp++; - sd5 = *sp++; - - /* extract and store 8 bytes */ - CHANNELEXTRACT_S16_31M(sd0, sd1, sd2, dd0); - CHANNELEXTRACT_S16_31M(sd3, sd4, sd5, dd1); - vis_pst_16(vis_faligndata(dd0, dd1), dp++, emask); - } - - if ((mlib_addr) dp <= (mlib_addr) dend2) { - n = ((mlib_u8 *) dend2 - (mlib_u8 *) dp) / 8 + 1; - - /* 8-pixel column loop, emask not needed */ -#pragma pipeloop(0) - for (i = 0; i < n; i++) { - dd0 = dd1; - sd3 = *sp++; - sd4 = *sp++; - sd5 = *sp++; - CHANNELEXTRACT_S16_31M(sd3, sd4, sd5, dd1); - *dp++ = vis_faligndata(dd0, dd1); - } - } - - /* end point handling */ - if ((mlib_addr) dp <= (mlib_addr) dend) { - emask = vis_edge16(dp, dend); - dd0 = dd1; - sd3 = *sp++; - sd4 = *sp++; - sd5 = *sp++; - CHANNELEXTRACT_S16_31M(sd3, sd4, sd5, dd1); - vis_pst_16(vis_faligndata(dd0, dd1), dp++, emask); - } - } - } - else { /* extract right channel */ - - if (off == 0) { /* src and dst have same alignment */ - - /* generate edge mask for the start point */ - emask = vis_edge16(da, dend); - - /* load 16 bytes */ - sd0 = *sp++; - sd1 = *sp++; - sd2 = *sp++; - - /* extract, including some garbage at the start point */ - CHANNELEXTRACT_S16_31R(sd0, sd1, sd2, dd0); - - /* store 8 bytes result */ - vis_pst_16(dd0, dp++, emask); - - if ((mlib_addr) dp <= (mlib_addr) dend2) { - n = ((mlib_u8 *) dend2 - (mlib_u8 *) dp) / 8 + 1; - - /* 8-pixel column loop, emask not needed */ -#pragma pipeloop(0) - for (i = 0; i < n; i++) { - sd0 = *sp++; - sd1 = *sp++; - sd2 = *sp++; - CHANNELEXTRACT_S16_31R(sd0, sd1, sd2, dd0); - *dp++ = dd0; - } - } - - /* end point handling */ - if ((mlib_addr) dp <= (mlib_addr) dend) { - emask = vis_edge16(dp, dend); - sd0 = *sp++; - sd1 = *sp++; - sd2 = *sp++; - CHANNELEXTRACT_S16_31R(sd0, sd1, sd2, dd0); - vis_pst_16(dd0, dp++, emask); - } - } - else { - vis_alignaddr((void *)0, off); - - /* generate edge mask for the start point */ - emask = vis_edge16(da, dend); - - if (off < 0) { - /* load 24 bytes */ - sd3 = *sp++; - sd4 = *sp++; - sd5 = *sp++; - - /* extract and store 8 bytes */ - CHANNELEXTRACT_S16_31R(sd3, sd4, sd5, dd1); - vis_pst_16(vis_faligndata(dd1, dd1), dp++, emask); - } - else { - /* load 48 bytes */ - sd0 = *sp++; - sd1 = *sp++; - sd2 = *sp++; - sd3 = *sp++; - sd4 = *sp++; - sd5 = *sp++; - - /* extract and store 8 bytes */ - CHANNELEXTRACT_S16_31R(sd0, sd1, sd2, dd0); - CHANNELEXTRACT_S16_31R(sd3, sd4, sd5, dd1); - vis_pst_16(vis_faligndata(dd0, dd1), dp++, emask); - } - - if ((mlib_addr) dp <= (mlib_addr) dend2) { - n = ((mlib_u8 *) dend2 - (mlib_u8 *) dp) / 8 + 1; - - /* 8-pixel column loop, emask not needed */ -#pragma pipeloop(0) - for (i = 0; i < n; i++) { - dd0 = dd1; - sd3 = *sp++; - sd4 = *sp++; - sd5 = *sp++; - CHANNELEXTRACT_S16_31R(sd3, sd4, sd5, dd1); - *dp++ = vis_faligndata(dd0, dd1); - } - } - - /* end point handling */ - if ((mlib_addr) dp <= (mlib_addr) dend) { - emask = vis_edge16(dp, dend); - dd0 = dd1; - sd3 = *sp++; - sd4 = *sp++; - sd5 = *sp++; - CHANNELEXTRACT_S16_31R(sd3, sd4, sd5, dd1); - vis_pst_16(vis_faligndata(dd0, dd1), dp++, emask); - } - } - } -} - -/***************************************************************/ -void mlib_v_ImageChannelExtract_S16_31(const mlib_s16 *src, - mlib_s32 slb, - mlib_s16 *dst, - mlib_s32 dlb, - mlib_s32 xsize, - mlib_s32 ysize, - mlib_s32 cmask) -{ - mlib_s16 *sa, *da; - mlib_s16 *sl, *dl; - mlib_s32 j; - - sa = sl = (void *)src; - da = dl = dst; - - for (j = 0; j < ysize; j++) { - mlib_v_ImageChannelExtract_S16_31_D1(sa, da, xsize, cmask); - sa = sl = (mlib_s16 *) ((mlib_u8 *) sl + slb); - da = dl = (mlib_s16 *) ((mlib_u8 *) dl + dlb); - } -} - -/***************************************************************/ -#define CHANNELEXTRACT_S16_41L(sd0, sd1, sd2, sd3, dd) \ - /* extract the left channel */ \ - sda = vis_fpmerge(vis_read_hi(sd0), vis_read_hi(sd2)); \ - sdb = vis_fpmerge(vis_read_hi(sd1), vis_read_hi(sd3)); \ - sdc = vis_fpmerge(vis_read_hi(sda), vis_read_hi(sdb)); \ - dd = vis_fpmerge(vis_read_hi(sdc), vis_read_lo(sdc)) - -/***************************************************************/ -#define CHANNELEXTRACT_S16_41ML(sd0, sd1, sd2, sd3, dd) \ - /* extract the middle left channel */ \ - sda = vis_fpmerge(vis_read_hi(sd0), vis_read_hi(sd2)); \ - sdb = vis_fpmerge(vis_read_hi(sd1), vis_read_hi(sd3)); \ - sdc = vis_fpmerge(vis_read_lo(sda), vis_read_lo(sdb)); \ - dd = vis_fpmerge(vis_read_hi(sdc), vis_read_lo(sdc)) - -/***************************************************************/ -#define CHANNELEXTRACT_S16_41MR(sd0, sd1, sd2, sd3, dd) \ - /* extract the middle right channel */ \ - sda = vis_fpmerge(vis_read_lo(sd0), vis_read_lo(sd2)); \ - sdb = vis_fpmerge(vis_read_lo(sd1), vis_read_lo(sd3)); \ - sdc = vis_fpmerge(vis_read_hi(sda), vis_read_hi(sdb)); \ - dd = vis_fpmerge(vis_read_hi(sdc), vis_read_lo(sdc)) - -/***************************************************************/ -#define CHANNELEXTRACT_S16_41R(sd0, sd1, sd2, sd3, dd) \ - /* extract the right channel */ \ - sda = vis_fpmerge(vis_read_lo(sd0), vis_read_lo(sd2)); \ - sdb = vis_fpmerge(vis_read_lo(sd1), vis_read_lo(sd3)); \ - sdc = vis_fpmerge(vis_read_lo(sda), vis_read_lo(sdb)); \ - dd = vis_fpmerge(vis_read_hi(sdc), vis_read_lo(sdc)) - -/***************************************************************/ -void mlib_v_ImageChannelExtract_S16_41_A8D1X4(const mlib_s16 *src, - mlib_s16 *dst, - mlib_s32 dsize, - mlib_s32 cmask) -{ - mlib_d64 *sp, *dp; - mlib_d64 sd0, sd1, sd2, sd3; - mlib_d64 sda, sdb, sdc; - mlib_d64 dd; - mlib_s32 i; - - sp = (mlib_d64 *) src; - dp = (mlib_d64 *) dst; - - if (cmask == 8) { -#pragma pipeloop(0) - for (i = 0; i < dsize / 4; i++) { - sd0 = *sp++; - sd1 = *sp++; - sd2 = *sp++; - sd3 = *sp++; - CHANNELEXTRACT_S16_41L(sd0, sd1, sd2, sd3, dd); - *dp++ = dd; - } - } - else if (cmask == 4) { -#pragma pipeloop(0) - for (i = 0; i < dsize / 4; i++) { - sd0 = *sp++; - sd1 = *sp++; - sd2 = *sp++; - sd3 = *sp++; - CHANNELEXTRACT_S16_41ML(sd0, sd1, sd2, sd3, dd); - *dp++ = dd; - } - } - else if (cmask == 2) { -#pragma pipeloop(0) - for (i = 0; i < dsize / 4; i++) { - sd0 = *sp++; - sd1 = *sp++; - sd2 = *sp++; - sd3 = *sp++; - CHANNELEXTRACT_S16_41MR(sd0, sd1, sd2, sd3, dd); - *dp++ = dd; - } - } - else { -#pragma pipeloop(0) - for (i = 0; i < dsize / 4; i++) { - sd0 = *sp++; - sd1 = *sp++; - sd2 = *sp++; - sd3 = *sp++; - CHANNELEXTRACT_S16_41R(sd0, sd1, sd2, sd3, dd); - *dp++ = dd; - } - } -} - -/***************************************************************/ -void mlib_v_ImageChannelExtract_S16_41_A8D2X4(const mlib_s16 *src, - mlib_s32 slb, - mlib_s16 *dst, - mlib_s32 dlb, - mlib_s32 xsize, - mlib_s32 ysize, - mlib_s32 cmask) -{ - mlib_d64 *sp, *dp; - mlib_d64 *sl, *dl; - mlib_d64 sd0, sd1, sd2, sd3; - mlib_d64 sda, sdb, sdc; - mlib_d64 dd; - mlib_s32 i, j; - - sp = sl = (mlib_d64 *) src; - dp = dl = (mlib_d64 *) dst; - - if (cmask == 8) { - for (j = 0; j < ysize; j++) { -#pragma pipeloop(0) - for (i = 0; i < xsize / 4; i++) { - sd0 = *sp++; - sd1 = *sp++; - sd2 = *sp++; - sd3 = *sp++; - CHANNELEXTRACT_S16_41L(sd0, sd1, sd2, sd3, dd); - *dp++ = dd; - } - - sp = sl = (mlib_d64 *) ((mlib_u8 *) sl + slb); - dp = dl = (mlib_d64 *) ((mlib_u8 *) dl + dlb); - } - } - else if (cmask == 4) { - for (j = 0; j < ysize; j++) { -#pragma pipeloop(0) - for (i = 0; i < xsize / 4; i++) { - sd0 = *sp++; - sd1 = *sp++; - sd2 = *sp++; - sd3 = *sp++; - CHANNELEXTRACT_S16_41ML(sd0, sd1, sd2, sd3, dd); - *dp++ = dd; - } - - sp = sl = (mlib_d64 *) ((mlib_u8 *) sl + slb); - dp = dl = (mlib_d64 *) ((mlib_u8 *) dl + dlb); - } - } - else if (cmask == 2) { - for (j = 0; j < ysize; j++) { -#pragma pipeloop(0) - for (i = 0; i < xsize / 4; i++) { - sd0 = *sp++; - sd1 = *sp++; - sd2 = *sp++; - sd3 = *sp++; - CHANNELEXTRACT_S16_41MR(sd0, sd1, sd2, sd3, dd); - *dp++ = dd; - } - - sp = sl = (mlib_d64 *) ((mlib_u8 *) sl + slb); - dp = dl = (mlib_d64 *) ((mlib_u8 *) dl + dlb); - } - } - else { - for (j = 0; j < ysize; j++) { -#pragma pipeloop(0) - for (i = 0; i < xsize / 4; i++) { - sd0 = *sp++; - sd1 = *sp++; - sd2 = *sp++; - sd3 = *sp++; - CHANNELEXTRACT_S16_41R(sd0, sd1, sd2, sd3, dd); - *dp++ = dd; - } - - sp = sl = (mlib_d64 *) ((mlib_u8 *) sl + slb); - dp = dl = (mlib_d64 *) ((mlib_u8 *) dl + dlb); - } - } -} - -/***************************************************************/ -void mlib_v_ImageChannelExtract_S16_41_D1(const mlib_s16 *src, - mlib_s16 *dst, - mlib_s32 dsize, - mlib_s32 cmask) -{ - mlib_s16 *sa, *da; - mlib_s16 *dend, *dend2; /* end points in dst */ - mlib_d64 *dp; /* 8-byte aligned start points in dst */ - mlib_d64 *sp; /* 8-byte aligned start point in src */ - mlib_d64 sd0, sd1, sd2, sd3; /* 8-byte source data */ - mlib_d64 sd4, sd5, sd6, sd7; - mlib_d64 sda, sdb, sdc; - mlib_d64 dd0, dd1; - mlib_s32 soff; /* offset of address in src */ - mlib_s32 doff; /* offset of address in dst */ - mlib_s32 off; /* offset of src over dst */ - mlib_s32 emask; /* edge mask */ - mlib_s32 i, n; - - sa = (void *)src; - da = dst; - - /* prepare the source address */ - sp = (mlib_d64 *) ((mlib_addr) sa & (~7)); - soff = ((mlib_addr) sa & 7); - - /* prepare the destination addresses */ - dp = (mlib_d64 *) ((mlib_addr) da & (~7)); - doff = ((mlib_addr) da & 7); - dend = da + dsize - 1; - dend2 = dend - 3; - - /* calculate the src's offset over dst */ - if (cmask == 8) { - off = (soff / 8) * 2 - doff; - } - else if (cmask == 4) { - off = ((soff + 2) / 8) * 2 - doff; - } - else if (cmask == 2) { - off = ((soff + 4) / 8) * 2 - doff; - } - else { - off = ((soff + 6) / 8) * 2 - doff; - } - - if (((cmask == 8) && (soff == 0)) || - ((cmask == 4) && (soff == 6)) || - ((cmask == 2) && (soff == 4)) || - ((cmask == 1) && (soff == 2))) { /* extract left channel */ - - if (off == 0) { /* src and dst have same alignment */ - - /* generate edge mask for the start point */ - emask = vis_edge16(da, dend); - - /* load 16 bytes */ - sd0 = *sp++; - sd1 = *sp++; - sd2 = *sp++; - sd3 = *sp++; - - /* extract, including some garbage at the start point */ - CHANNELEXTRACT_S16_41L(sd0, sd1, sd2, sd3, dd0); - - /* store 8 bytes result */ - vis_pst_16(dd0, dp++, emask); - - if ((mlib_addr) dp <= (mlib_addr) dend2) { - n = ((mlib_u8 *) dend2 - (mlib_u8 *) dp) / 8 + 1; - - /* 8-pixel column loop, emask not needed */ -#pragma pipeloop(0) - for (i = 0; i < n; i++) { - sd0 = *sp++; - sd1 = *sp++; - sd2 = *sp++; - sd3 = *sp++; - CHANNELEXTRACT_S16_41L(sd0, sd1, sd2, sd3, dd0); - *dp++ = dd0; - } - } - - /* end point handling */ - if ((mlib_addr) dp <= (mlib_addr) dend) { - emask = vis_edge16(dp, dend); - sd0 = *sp++; - sd1 = *sp++; - sd2 = *sp++; - sd3 = *sp++; - CHANNELEXTRACT_S16_41L(sd0, sd1, sd2, sd3, dd0); - vis_pst_16(dd0, dp++, emask); - } - } - else { - vis_alignaddr((void *)0, off); - - /* generate edge mask for the start point */ - emask = vis_edge16(da, dend); - - if (off < 0) { - /* load 24 bytes */ - sd4 = *sp++; - sd5 = *sp++; - sd6 = *sp++; - sd7 = *sp++; - - /* extract and store 8 bytes */ - CHANNELEXTRACT_S16_41L(sd4, sd5, sd6, sd7, dd1); - vis_pst_16(vis_faligndata(dd1, dd1), dp++, emask); - } - else { - /* load 48 bytes */ - sd0 = *sp++; - sd1 = *sp++; - sd2 = *sp++; - sd3 = *sp++; - sd4 = *sp++; - sd5 = *sp++; - sd6 = *sp++; - sd7 = *sp++; - - /* extract and store 8 bytes */ - CHANNELEXTRACT_S16_41L(sd0, sd1, sd2, sd3, dd0); - CHANNELEXTRACT_S16_41L(sd4, sd5, sd6, sd7, dd1); - vis_pst_16(vis_faligndata(dd0, dd1), dp++, emask); - } - - if ((mlib_addr) dp <= (mlib_addr) dend2) { - n = ((mlib_u8 *) dend2 - (mlib_u8 *) dp) / 8 + 1; - - /* 8-pixel column loop, emask not needed */ -#pragma pipeloop(0) - for (i = 0; i < n; i++) { - dd0 = dd1; - sd4 = *sp++; - sd5 = *sp++; - sd6 = *sp++; - sd7 = *sp++; - CHANNELEXTRACT_S16_41L(sd4, sd5, sd6, sd7, dd1); - *dp++ = vis_faligndata(dd0, dd1); - } - } - - /* end point handling */ - if ((mlib_addr) dp <= (mlib_addr) dend) { - emask = vis_edge16(dp, dend); - dd0 = dd1; - sd4 = *sp++; - sd5 = *sp++; - sd6 = *sp++; - sd7 = *sp++; - CHANNELEXTRACT_S16_41L(sd4, sd5, sd6, sd7, dd1); - vis_pst_16(vis_faligndata(dd0, dd1), dp++, emask); - } - } - } - else if (((cmask == 8) && (soff == 2)) || - ((cmask == 4) && (soff == 0)) || - ((cmask == 2) && (soff == 6)) || - ((cmask == 1) && (soff == 4))) { /* extract middle left channel */ - - if (off == 0) { /* src and dst have same alignment */ - - /* generate edge mask for the start point */ - emask = vis_edge16(da, dend); - - /* load 16 bytes */ - sd0 = *sp++; - sd1 = *sp++; - sd2 = *sp++; - sd3 = *sp++; - - /* extract, including some garbage at the start point */ - CHANNELEXTRACT_S16_41ML(sd0, sd1, sd2, sd3, dd0); - - /* store 8 bytes result */ - vis_pst_16(dd0, dp++, emask); - - if ((mlib_addr) dp <= (mlib_addr) dend2) { - n = ((mlib_u8 *) dend2 - (mlib_u8 *) dp) / 8 + 1; - - /* 8-pixel column loop, emask not needed */ -#pragma pipeloop(0) - for (i = 0; i < n; i++) { - sd0 = *sp++; - sd1 = *sp++; - sd2 = *sp++; - sd3 = *sp++; - CHANNELEXTRACT_S16_41ML(sd0, sd1, sd2, sd3, dd0); - *dp++ = dd0; - } - } - - /* end point handling */ - if ((mlib_addr) dp <= (mlib_addr) dend) { - emask = vis_edge16(dp, dend); - sd0 = *sp++; - sd1 = *sp++; - sd2 = *sp++; - sd3 = *sp++; - CHANNELEXTRACT_S16_41ML(sd0, sd1, sd2, sd3, dd0); - vis_pst_16(dd0, dp++, emask); - } - } - else { - vis_alignaddr((void *)0, off); - - /* generate edge mask for the start point */ - emask = vis_edge16(da, dend); - - if (off < 0) { - /* load 24 bytes */ - sd4 = *sp++; - sd5 = *sp++; - sd6 = *sp++; - sd7 = *sp++; - - /* extract and store 8 bytes */ - CHANNELEXTRACT_S16_41ML(sd4, sd5, sd6, sd7, dd1); - vis_pst_16(vis_faligndata(dd1, dd1), dp++, emask); - } - else { - /* load 48 bytes */ - sd0 = *sp++; - sd1 = *sp++; - sd2 = *sp++; - sd3 = *sp++; - sd4 = *sp++; - sd5 = *sp++; - sd6 = *sp++; - sd7 = *sp++; - - /* extract and store 8 bytes */ - CHANNELEXTRACT_S16_41ML(sd0, sd1, sd2, sd3, dd0); - CHANNELEXTRACT_S16_41ML(sd4, sd5, sd6, sd7, dd1); - vis_pst_16(vis_faligndata(dd0, dd1), dp++, emask); - } - - if ((mlib_addr) dp <= (mlib_addr) dend2) { - n = ((mlib_u8 *) dend2 - (mlib_u8 *) dp) / 8 + 1; - - /* 8-pixel column loop, emask not needed */ -#pragma pipeloop(0) - for (i = 0; i < n; i++) { - dd0 = dd1; - sd4 = *sp++; - sd5 = *sp++; - sd6 = *sp++; - sd7 = *sp++; - CHANNELEXTRACT_S16_41ML(sd4, sd5, sd6, sd7, dd1); - *dp++ = vis_faligndata(dd0, dd1); - } - } - - /* end point handling */ - if ((mlib_addr) dp <= (mlib_addr) dend) { - emask = vis_edge16(dp, dend); - dd0 = dd1; - sd4 = *sp++; - sd5 = *sp++; - sd6 = *sp++; - sd7 = *sp++; - CHANNELEXTRACT_S16_41ML(sd4, sd5, sd6, sd7, dd1); - vis_pst_16(vis_faligndata(dd0, dd1), dp++, emask); - } - } - } - else if (((cmask == 8) && (soff == 4)) || - ((cmask == 4) && (soff == 2)) || - ((cmask == 2) && (soff == 0)) || - ((cmask == 1) && (soff == 6))) { /* extract middle right channel */ - - if (off == 0) { /* src and dst have same alignment */ - - /* generate edge mask for the start point */ - emask = vis_edge16(da, dend); - - /* load 16 bytes */ - sd0 = *sp++; - sd1 = *sp++; - sd2 = *sp++; - sd3 = *sp++; - - /* extract, including some garbage at the start point */ - CHANNELEXTRACT_S16_41MR(sd0, sd1, sd2, sd3, dd0); - - /* store 8 bytes result */ - vis_pst_16(dd0, dp++, emask); - - if ((mlib_addr) dp <= (mlib_addr) dend2) { - n = ((mlib_u8 *) dend2 - (mlib_u8 *) dp) / 8 + 1; - - /* 8-pixel column loop, emask not needed */ -#pragma pipeloop(0) - for (i = 0; i < n; i++) { - sd0 = *sp++; - sd1 = *sp++; - sd2 = *sp++; - sd3 = *sp++; - CHANNELEXTRACT_S16_41MR(sd0, sd1, sd2, sd3, dd0); - *dp++ = dd0; - } - } - - /* end point handling */ - if ((mlib_addr) dp <= (mlib_addr) dend) { - emask = vis_edge16(dp, dend); - sd0 = *sp++; - sd1 = *sp++; - sd2 = *sp++; - sd3 = *sp++; - CHANNELEXTRACT_S16_41MR(sd0, sd1, sd2, sd3, dd0); - vis_pst_16(dd0, dp++, emask); - } - } - else { - vis_alignaddr((void *)0, off); - - /* generate edge mask for the start point */ - emask = vis_edge16(da, dend); - - if (off < 0) { - /* load 24 bytes */ - sd4 = *sp++; - sd5 = *sp++; - sd6 = *sp++; - sd7 = *sp++; - - /* extract and store 8 bytes */ - CHANNELEXTRACT_S16_41MR(sd4, sd5, sd6, sd7, dd1); - vis_pst_16(vis_faligndata(dd1, dd1), dp++, emask); - } - else { - /* load 48 bytes */ - sd0 = *sp++; - sd1 = *sp++; - sd2 = *sp++; - sd3 = *sp++; - sd4 = *sp++; - sd5 = *sp++; - sd6 = *sp++; - sd7 = *sp++; - - /* extract and store 8 bytes */ - CHANNELEXTRACT_S16_41MR(sd0, sd1, sd2, sd3, dd0); - CHANNELEXTRACT_S16_41MR(sd4, sd5, sd6, sd7, dd1); - vis_pst_16(vis_faligndata(dd0, dd1), dp++, emask); - } - - if ((mlib_addr) dp <= (mlib_addr) dend2) { - n = ((mlib_u8 *) dend2 - (mlib_u8 *) dp) / 8 + 1; - - /* 8-pixel column loop, emask not needed */ -#pragma pipeloop(0) - for (i = 0; i < n; i++) { - dd0 = dd1; - sd4 = *sp++; - sd5 = *sp++; - sd6 = *sp++; - sd7 = *sp++; - CHANNELEXTRACT_S16_41MR(sd4, sd5, sd6, sd7, dd1); - *dp++ = vis_faligndata(dd0, dd1); - } - } - - /* end point handling */ - if ((mlib_addr) dp <= (mlib_addr) dend) { - emask = vis_edge16(dp, dend); - dd0 = dd1; - sd4 = *sp++; - sd5 = *sp++; - sd6 = *sp++; - sd7 = *sp++; - CHANNELEXTRACT_S16_41MR(sd4, sd5, sd6, sd7, dd1); - vis_pst_16(vis_faligndata(dd0, dd1), dp++, emask); - } - } - } - else { /* extract right channel */ - if (off == 0) { /* src and dst have same alignment */ - - /* generate edge mask for the start point */ - emask = vis_edge16(da, dend); - - /* load 16 bytes */ - sd0 = *sp++; - sd1 = *sp++; - sd2 = *sp++; - sd3 = *sp++; - - /* extract, including some garbage at the start point */ - CHANNELEXTRACT_S16_41R(sd0, sd1, sd2, sd3, dd0); - - /* store 8 bytes result */ - vis_pst_16(dd0, dp++, emask); - - if ((mlib_addr) dp <= (mlib_addr) dend2) { - n = ((mlib_u8 *) dend2 - (mlib_u8 *) dp) / 8 + 1; - - /* 8-pixel column loop, emask not needed */ -#pragma pipeloop(0) - for (i = 0; i < n; i++) { - sd0 = *sp++; - sd1 = *sp++; - sd2 = *sp++; - sd3 = *sp++; - CHANNELEXTRACT_S16_41R(sd0, sd1, sd2, sd3, dd0); - *dp++ = dd0; - } - } - - /* end point handling */ - if ((mlib_addr) dp <= (mlib_addr) dend) { - emask = vis_edge16(dp, dend); - sd0 = *sp++; - sd1 = *sp++; - sd2 = *sp++; - sd3 = *sp++; - CHANNELEXTRACT_S16_41R(sd0, sd1, sd2, sd3, dd0); - vis_pst_16(dd0, dp++, emask); - } - } - else { - vis_alignaddr((void *)0, off); - - /* generate edge mask for the start point */ - emask = vis_edge16(da, dend); - - if (off < 0) { - /* load 24 bytes */ - sd4 = *sp++; - sd5 = *sp++; - sd6 = *sp++; - sd7 = *sp++; - - /* extract and store 8 bytes */ - CHANNELEXTRACT_S16_41R(sd4, sd5, sd6, sd7, dd1); - vis_pst_16(vis_faligndata(dd1, dd1), dp++, emask); - } - - if ((mlib_addr) dp <= (mlib_addr) dend2) { - n = ((mlib_u8 *) dend2 - (mlib_u8 *) dp) / 8 + 1; - - /* 8-pixel column loop, emask not needed */ -#pragma pipeloop(0) - for (i = 0; i < n; i++) { - dd0 = dd1; - sd4 = *sp++; - sd5 = *sp++; - sd6 = *sp++; - sd7 = *sp++; - CHANNELEXTRACT_S16_41R(sd4, sd5, sd6, sd7, dd1); - *dp++ = vis_faligndata(dd0, dd1); - } - } - - /* end point handling */ - if ((mlib_addr) dp <= (mlib_addr) dend) { - emask = vis_edge16(dp, dend); - dd0 = dd1; - sd4 = *sp++; - sd5 = *sp++; - sd6 = *sp++; - sd7 = *sp++; - CHANNELEXTRACT_S16_41R(sd4, sd5, sd6, sd7, dd1); - vis_pst_16(vis_faligndata(dd0, dd1), dp++, emask); - } - } - } -} - -/***************************************************************/ -void mlib_v_ImageChannelExtract_S16_41(const mlib_s16 *src, - mlib_s32 slb, - mlib_s16 *dst, - mlib_s32 dlb, - mlib_s32 xsize, - mlib_s32 ysize, - mlib_s32 cmask) -{ - mlib_s16 *sa, *da; - mlib_s16 *sl, *dl; - mlib_s32 j; - - sa = sl = (void *)src; - da = dl = dst; - for (j = 0; j < ysize; j++) { - mlib_v_ImageChannelExtract_S16_41_D1(sa, da, xsize, cmask); - sa = sl = (mlib_s16 *) ((mlib_u8 *) sl + slb); - da = dl = (mlib_s16 *) ((mlib_u8 *) dl + dlb); - } -} - -/***************************************************************/ diff -r 3e22d8fd4912 -r acea5f7d354b jdk/src/java.desktop/unix/native/libmlib_image/mlib_v_ImageChannelExtract_43.c --- a/jdk/src/java.desktop/unix/native/libmlib_image/mlib_v_ImageChannelExtract_43.c Thu May 12 11:03:07 2016 -0700 +++ b/jdk/src/java.desktop/unix/native/libmlib_image/mlib_v_ImageChannelExtract_43.c Fri May 13 11:31:05 2016 +0300 @@ -29,22 +29,8 @@ * FILENAME: mlib_v_ImageChannelExtract_43.c * * FUNCTIONS - * mlib_v_ImageChannelExtract_U8_43R_A8D1X8 - * mlib_v_ImageChannelExtract_U8_43R_A8D2X8 - * mlib_v_ImageChannelExtract_U8_43R_D1 - * mlib_v_ImageChannelExtract_U8_43R - * mlib_v_ImageChannelExtract_S16_43R_A8D1X4 - * mlib_v_ImageChannelExtract_S16_43R_A8D2X4 - * mlib_v_ImageChannelExtract_S16_43R_D1 - * mlib_v_ImageChannelExtract_S16_43R - * mlib_v_ImageChannelExtract_U8_43L_A8D1X8 - * mlib_v_ImageChannelExtract_U8_43L_A8D2X8 * mlib_v_ImageChannelExtract_U8_43L_D1 - * mlib_v_ImageChannelExtract_U8_43L - * mlib_v_ImageChannelExtract_S16_43L_A8D1X4 - * mlib_v_ImageChannelExtract_S16_43L_A8D2X4 * mlib_v_ImageChannelExtract_S16_43L_D1 - * mlib_v_ImageChannelExtract_S16_43L * * SYNOPSIS * @@ -74,705 +60,6 @@ #include "mlib_v_ImageChannelExtract.h" /***************************************************************/ -#define EXTRACT_U8_43R_old /* shift right */ \ - dd2 = vis_faligndata(sd3, dd2); /* r7-------------- */ \ - sd3 = vis_faligndata(sd3, sd3); \ - dd2 = vis_faligndata(sd3, dd2); /* g7r7------------ */ \ - sd3 = vis_faligndata(sd3, sd3); \ - dd2 = vis_faligndata(sd3, dd2); /* b7g7r7---------- */ \ - sd3 = vis_faligndata(sd3, sd3); \ - sd3 = vis_faligndata(sd3, sd3); \ - dd2 = vis_faligndata(sd3, dd2); /* r6b7g7r7-------- */ \ - sd3 = vis_faligndata(sd3, sd3); \ - dd2 = vis_faligndata(sd3, dd2); /* g6r6b7g7r7------ */ \ - sd3 = vis_faligndata(sd3, sd3); \ - dd2 = vis_faligndata(sd3, dd2); /* b6g6r6b7g7r7---- */ \ - \ - dd2 = vis_faligndata(sd2, dd2); /* r5b6g6r6b7g7r7-- */ \ - sd2 = vis_faligndata(sd2, sd2); \ - dd2 = vis_faligndata(sd2, dd2); /* g5r5b6g6r6b7g7r7 */ \ - \ - sd2 = vis_faligndata(sd2, sd2); \ - dd1 = vis_faligndata(sd2, dd1); /* b5-------------- */ \ - sd2 = vis_faligndata(sd2, sd2); \ - sd2 = vis_faligndata(sd2, sd2); \ - dd1 = vis_faligndata(sd2, dd1); /* r4b5------------ */ \ - sd2 = vis_faligndata(sd2, sd2); \ - dd1 = vis_faligndata(sd2, dd1); /* g4r4b5---------- */ \ - sd2 = vis_faligndata(sd2, sd2); \ - dd1 = vis_faligndata(sd2, dd1); /* b4g4r4b5-------- */ \ - \ - dd1 = vis_faligndata(sd1, dd1); /* r3b4g4r4b5------ */ \ - sd1 = vis_faligndata(sd1, sd1); \ - dd1 = vis_faligndata(sd1, dd1); /* g3r3b4g4r4b5---- */ \ - sd1 = vis_faligndata(sd1, sd1); \ - dd1 = vis_faligndata(sd1, dd1); /* b3g3r3b4g4r4b5-- */ \ - sd1 = vis_faligndata(sd1, sd1); \ - sd1 = vis_faligndata(sd1, sd1); \ - dd1 = vis_faligndata(sd1, dd1); /* r2b3g3r3b4g4r4b5 */ \ - \ - sd1 = vis_faligndata(sd1, sd1); \ - dd0 = vis_faligndata(sd1, dd0); /* g2-------------- */ \ - sd1 = vis_faligndata(sd1, sd1); \ - dd0 = vis_faligndata(sd1, dd0); /* b2g2------------ */ \ - \ - dd0 = vis_faligndata(sd0, dd0); /* r1b2g2---------- */ \ - sd0 = vis_faligndata(sd0, sd0); \ - dd0 = vis_faligndata(sd0, dd0); /* g1r1b2g2-------- */ \ - sd0 = vis_faligndata(sd0, sd0); \ - dd0 = vis_faligndata(sd0, dd0); /* b1g1r1b2g2------ */ \ - sd0 = vis_faligndata(sd0, sd0); \ - sd0 = vis_faligndata(sd0, sd0); \ - dd0 = vis_faligndata(sd0, dd0); /* r0b1g1r1b2g2---- */ \ - sd0 = vis_faligndata(sd0, sd0); \ - dd0 = vis_faligndata(sd0, dd0); /* g0r0b1g1r1b2g2-- */ \ - sd0 = vis_faligndata(sd0, sd0); \ - dd0 = vis_faligndata(sd0, dd0); /* b0g0r0b1g1r1b2g2 */ - -/***************************************************************/ -#define EXTRACT_U8_43R /* shift right */ \ - vis_alignaddr((void *)0, 5); \ - dd2 = vis_faligndata(sd3, dd2); /* b7g7r7---------- */ \ - sda = vis_freg_pair(vis_read_hi(sd3), vis_read_hi(sd3)); \ - dd2 = vis_faligndata(sda, dd2); /* b6g6r6b7g7r7---- */ \ - \ - vis_alignaddr((void *)0, 6); \ - dd2 = vis_faligndata(sd2, dd2); /* g5r5b6g6r6b7g7r7 */ \ - \ - vis_alignaddr((void *)0, 5); \ - dd1 = vis_faligndata(sd2, dd1); /* b5g5r5---------- */ \ - sda = vis_freg_pair(vis_read_hi(sd2), vis_read_hi(sd2)); \ - dd1 = vis_faligndata(sda, dd1); /* b4g4r4b5g5r5---- */ \ - dd1 = vis_faligndata(sd1, dd1); /* b3g3r3b4g4r4b5g5 */ \ - sda = vis_freg_pair(vis_read_hi(sd1), vis_read_hi(sd1)); \ - vis_alignaddr((void *)0, 7); \ - dd1 = vis_faligndata(sda, dd1); /* r2b3g3r3b4g4r4b5 */ \ - \ - vis_alignaddr((void *)0, 5); \ - dd0 = vis_faligndata(sda, dd0); /* b2g2r2---------- */ \ - dd0 = vis_faligndata(sd0, dd0); /* b1g1r1b2g2r2---- */ \ - sda = vis_freg_pair(vis_read_hi(sd0), vis_read_hi(sd0)); \ - dd0 = vis_faligndata(sda, dd0); /* b0g0r0b1g1r1b2g2 */ - -/***************************************************************/ -#define LOAD_EXTRACT_U8_43R_STORE \ - sd0 = *sp++; /* --b0g0r0--b1g1r1 */ \ - sd1 = *sp++; /* --b2g2r2--b3g3r3 */ \ - sd2 = *sp++; /* --b4g4r4--b5g5r5 */ \ - sd3 = *sp++; /* --b6g6r6--b7g7r7 */ \ - EXTRACT_U8_43R; \ - *dp++ = dd0; /* b0g0r0b1g1r1b2g2 */ \ - *dp++ = dd1; /* r2b3g3r3b4g4r4b5 */ \ - *dp++ = dd2; /* g5r5b6g6r6b7g7r7 */ - -/***************************************************************/ -#define LOAD_EXTRACT_U8_43R \ - vis_alignaddr((void *)soff, 0); \ - s0 = s4; \ - s1 = sp[1]; \ - s2 = sp[2]; \ - s3 = sp[3]; \ - s4 = sp[4]; \ - sd0 = vis_faligndata(s0, s1); \ - sd1 = vis_faligndata(s1, s2); \ - sd2 = vis_faligndata(s2, s3); \ - sd3 = vis_faligndata(s3, s4); \ - sp += 4; \ - dd2old = dd2; \ - EXTRACT_U8_43R - -/***************************************************************/ -/* - * Both source and destination image data are 1-d vectors and - * 8-byte aligned. And dsize is multiple of 8. - */ - -void mlib_v_ImageChannelExtract_U8_43R_A8D1X8(const mlib_u8 *src, - mlib_u8 *dst, - mlib_s32 dsize) -{ - mlib_d64 *sp, *dp; - mlib_d64 sd0, sd1, sd2, sd3; /* source data */ - mlib_d64 dd0, dd1, dd2; /* dst data */ - mlib_d64 sda; - mlib_s32 i; - - sp = (mlib_d64 *) src; - dp = (mlib_d64 *) dst; - - /* set GSR.offset for vis_faligndata() */ -/* vis_alignaddr((void *)0, 7); *//* only for _old */ - -#pragma pipeloop(0) - for (i = 0; i < dsize / 8; i++) { - LOAD_EXTRACT_U8_43R_STORE; - } -} - -/***************************************************************/ -/* - * Either source or destination image data are not 1-d vectors, but - * they are 8-byte aligned. And slb and dlb are multiple of 8. - * The xsize is multiple of 8. - */ - -void mlib_v_ImageChannelExtract_U8_43R_A8D2X8(const mlib_u8 *src, - mlib_s32 slb, - mlib_u8 *dst, - mlib_s32 dlb, - mlib_s32 xsize, - mlib_s32 ysize) -{ - mlib_d64 *sp, *dp; /* 8-byte aligned pointer for pixel */ - mlib_d64 *sl, *dl; /* 8-byte aligned pointer for line */ - mlib_d64 sd0, sd1, sd2, sd3; /* source data */ - mlib_d64 dd0, dd1, dd2; /* dst data */ - mlib_d64 sda; - mlib_s32 i, j; /* indices for x, y */ - - /* set GSR.offset for vis_faligndata() */ -/* vis_alignaddr((void *)0, 7); *//* only for _old */ - - sp = sl = (mlib_d64 *) src; - dp = dl = (mlib_d64 *) dst; - - /* row loop */ - for (j = 0; j < ysize; j++) { - /* 8-byte column loop */ -#pragma pipeloop(0) - for (i = 0; i < xsize / 8; i++) { - LOAD_EXTRACT_U8_43R_STORE; - } - - sp = sl = (mlib_d64 *) ((mlib_u8 *) sl + slb); - dp = dl = (mlib_d64 *) ((mlib_u8 *) dl + dlb); - } -} - -/***************************************************************/ -/* - * Either source or destination data are not 8-byte aligned. - * And dsize is in pixels. - */ - -void mlib_v_ImageChannelExtract_U8_43R_D1(const mlib_u8 *src, - mlib_u8 *dst, - mlib_s32 dsize) -{ - mlib_u8 *sa, *da; - mlib_u8 *dend, *dend2; /* end points in dst */ - mlib_d64 *dp; /* 8-byte aligned start points in dst */ - mlib_d64 *sp; /* 8-byte aligned start point in src */ - mlib_d64 s0, s1, s2, s3, s4; /* 8-byte source row data */ - mlib_d64 sd0, sd1, sd2, sd3; /* 8-byte source data */ - mlib_d64 dd0, dd1, dd2; /* dst data */ - mlib_d64 dd2old; /* the last datum of the last step */ - mlib_d64 sda; - mlib_s32 soff; /* offset of address in src */ - mlib_s32 doff; /* offset of address in dst */ - mlib_s32 emask; /* edge mask */ - mlib_s32 i, n; - - sa = (void *)src; - da = dst; - - /* prepare the source address */ - sp = (mlib_d64 *) ((mlib_addr) sa & (~7)); - soff = ((mlib_addr) sa & 7); - - /* prepare the destination addresses */ - dp = (mlib_d64 *) ((mlib_addr) da & (~7)); - dend = da + dsize * 3 - 1; - dend2 = dend - 23; - doff = 8 - ((mlib_addr) da & 7); - - /* generate edge mask for the start point */ - emask = vis_edge8(da, dend); - - /* load 32 byte, convert, store 24 bytes */ - s4 = sp[0]; /* initial value */ - LOAD_EXTRACT_U8_43R; - - if (dsize >= 8) { - if (doff == 8) { - vis_pst_8(dd0, dp++, emask); - *dp++ = dd1; - *dp++ = dd2; - } - else { - vis_alignaddr((void *)doff, 0); - vis_pst_8(vis_faligndata(dd0, dd0), dp++, emask); - *dp++ = vis_faligndata(dd0, dd1); - *dp++ = vis_faligndata(dd1, dd2); - } - } - else { /* for very small size */ - if (doff == 8) { - vis_pst_8(dd0, dp++, emask); - if ((mlib_addr) dp <= (mlib_addr) dend) { - emask = vis_edge8(dp, dend); - vis_pst_8(dd1, dp++, emask); - if ((mlib_addr) dp <= (mlib_addr) dend) { - emask = vis_edge8(dp, dend); - vis_pst_8(dd2, dp++, emask); - } - } - } - else { - vis_alignaddr((void *)doff, 0); - vis_pst_8(vis_faligndata(dd0, dd0), dp++, emask); - if ((mlib_addr) dp <= (mlib_addr) dend) { - emask = vis_edge8(dp, dend); - vis_pst_8(vis_faligndata(dd0, dd1), dp++, emask); - if ((mlib_addr) dp <= (mlib_addr) dend) { - emask = vis_edge8(dp, dend); - vis_pst_8(vis_faligndata(dd1, dd2), dp++, emask); - if ((mlib_addr) dp <= (mlib_addr) dend) { - emask = vis_edge8(dp, dend); - vis_pst_8(vis_faligndata(dd2, dd2), dp++, emask); - } - } - } - } - } - - /* no edge handling is needed in the loop */ - if (doff == 8) { - if ((mlib_addr) dp <= (mlib_addr) dend2) { - n = ((mlib_u8 *) dend2 - (mlib_u8 *) dp) / 24 + 1; -#pragma pipeloop(0) - for (i = 0; i < n; i++) { - LOAD_EXTRACT_U8_43R; - *dp++ = dd0; - *dp++ = dd1; - *dp++ = dd2; - } - } - } - else { - if ((mlib_addr) dp <= (mlib_addr) dend2) { - n = ((mlib_u8 *) dend2 - (mlib_u8 *) dp) / 24 + 1; -#pragma pipeloop(0) - for (i = 0; i < n; i++) { - LOAD_EXTRACT_U8_43R; - vis_alignaddr((void *)doff, 0); - *dp++ = vis_faligndata(dd2old, dd0); - *dp++ = vis_faligndata(dd0, dd1); - *dp++ = vis_faligndata(dd1, dd2); - } - } - } - - if ((mlib_addr) dp <= (mlib_addr) dend) { - LOAD_EXTRACT_U8_43R; - emask = vis_edge8(dp, dend); - if (doff == 8) { - vis_pst_8(dd0, dp++, emask); - if ((mlib_addr) dp <= (mlib_addr) dend) { - emask = vis_edge8(dp, dend); - vis_pst_8(dd1, dp++, emask); - if ((mlib_addr) dp <= (mlib_addr) dend) { - emask = vis_edge8(dp, dend); - vis_pst_8(dd2, dp++, emask); - } - } - } - else { - vis_alignaddr((void *)doff, 0); - vis_pst_8(vis_faligndata(dd2old, dd0), dp++, emask); - if ((mlib_addr) dp <= (mlib_addr) dend) { - emask = vis_edge8(dp, dend); - vis_pst_8(vis_faligndata(dd0, dd1), dp++, emask); - if ((mlib_addr) dp <= (mlib_addr) dend) { - emask = vis_edge8(dp, dend); - vis_pst_8(vis_faligndata(dd1, dd2), dp++, emask); - } - } - } - } -} - -/***************************************************************/ -void mlib_v_ImageChannelExtract_U8_43R(const mlib_u8 *src, - mlib_s32 slb, - mlib_u8 *dst, - mlib_s32 dlb, - mlib_s32 xsize, - mlib_s32 ysize) -{ - mlib_u8 *sa, *da; - mlib_u8 *sl, *dl; - mlib_s32 j; - - sa = sl = (void *)src; - da = dl = dst; - - for (j = 0; j < ysize; j++) { - mlib_v_ImageChannelExtract_U8_43R_D1(sa, da, xsize); - sa = sl += slb; - da = dl += dlb; - } -} - -/***************************************************************/ -#define EXTRACT_S16_43R_old /* shift right */ \ - \ - dd2 = vis_faligndata(sd3, dd2); /* r3------ */ \ - sd3 = vis_faligndata(sd3, sd3); \ - dd2 = vis_faligndata(sd3, dd2); /* g3r3---- */ \ - sd3 = vis_faligndata(sd3, sd3); \ - dd2 = vis_faligndata(sd3, dd2); /* b3g3r3-- */ \ - \ - dd2 = vis_faligndata(sd2, dd2); /* r2b3g3r3 */ \ - sd2 = vis_faligndata(sd2, sd2); \ - dd1 = vis_faligndata(sd2, dd1); /* g2------ */ \ - sd2 = vis_faligndata(sd2, sd2); \ - dd1 = vis_faligndata(sd2, dd1); /* b2g2---- */ \ - \ - dd1 = vis_faligndata(sd1, dd1); /* r1b2g2-- */ \ - sd1 = vis_faligndata(sd1, sd1); \ - dd1 = vis_faligndata(sd1, dd1); /* g1r1b2g2 */ \ - sd1 = vis_faligndata(sd1, sd1); \ - dd0 = vis_faligndata(sd1, dd0); /* b1------ */ \ - \ - dd0 = vis_faligndata(sd0, dd0); /* r0b1---- */ \ - sd0 = vis_faligndata(sd0, sd0); \ - dd0 = vis_faligndata(sd0, dd0); /* g0r0b1-- */ \ - sd0 = vis_faligndata(sd0, sd0); \ - dd0 = vis_faligndata(sd0, dd0); /* b0g0r0b1 */ - -/***************************************************************/ -#define EXTRACT_S16_43R /* shift right */ \ - \ - vis_alignaddr((void *)0, 2); \ - dd2 = vis_faligndata(sd3, dd2); /* b3g3r3-- */ \ - \ - vis_alignaddr((void *)0, 6); \ - dd2 = vis_faligndata(sd2, dd2); /* r2b3g3r3 */ \ - vis_alignaddr((void *)0, 2); \ - dd1 = vis_faligndata(sd2, dd1); /* b2g2r2-- */ \ - \ - vis_alignaddr((void *)0, 4); \ - dd1 = vis_faligndata(sd1, dd1); /* g1r1b2g2 */ \ - vis_alignaddr((void *)0, 2); \ - dd0 = vis_faligndata(sd1, dd0); /* b1g1r1-- */ \ - dd0 = vis_faligndata(sd0, dd0); /* b0g0r0b1 */ - -/***************************************************************/ -#define LOAD_EXTRACT_S16_43R_STORE \ - \ - sd0 = *sp++; /* --b0g0r0 */ \ - sd1 = *sp++; /* --b1g1r1 */ \ - sd2 = *sp++; /* --b2g2r2 */ \ - sd3 = *sp++; /* --b3g3r3 */ \ - \ - EXTRACT_S16_43R; \ - \ - *dp++ = dd0; /* b0g0r0b1 */ \ - *dp++ = dd1; /* g1r1b2g2 */ \ - *dp++ = dd2; /* r2b3g3r3 */ - -/***************************************************************/ -#define LOAD_EXTRACT_S16_43R \ - \ - vis_alignaddr((void *)soff, 0); \ - s0 = s4; \ - s1 = sp[1]; \ - s2 = sp[2]; \ - s3 = sp[3]; \ - s4 = sp[4]; \ - sd0 = vis_faligndata(s0, s1); \ - sd1 = vis_faligndata(s1, s2); \ - sd2 = vis_faligndata(s2, s3); \ - sd3 = vis_faligndata(s3, s4); \ - sp += 4; \ - dd2old = dd2; \ - EXTRACT_S16_43R - -/***************************************************************/ -/* - * Both source and destination image data are 1-d vectors and - * 8-byte aligned. And size is in 4-pixels. - */ - -void mlib_v_ImageChannelExtract_S16_43R_A8D1X4(const mlib_s16 *src, - mlib_s16 *dst, - mlib_s32 dsize) -{ - mlib_d64 *sp, *dp; /* 8-byte aligned pointer for pixel */ - mlib_d64 sd0, sd1, sd2, sd3; /* source data */ - mlib_d64 dd0, dd1, dd2; /* dst data */ - mlib_s32 i; - - sp = (mlib_d64 *) src; - dp = (mlib_d64 *) dst; - - /* set GSR.offset for vis_faligndata() */ -/* vis_alignaddr((void *)0, 6); *//* only for _old */ - -#pragma pipeloop(0) - for (i = 0; i < dsize / 4; i++) { - LOAD_EXTRACT_S16_43R_STORE; - } -} - -/***************************************************************/ -/* - * Either source or destination image data are not 1-d vectors, but - * they are 8-byte aligned. The xsize is multiple of 8. - * slb and dlb are multiple of 8. - */ - -void mlib_v_ImageChannelExtract_S16_43R_A8D2X4(const mlib_s16 *src, - mlib_s32 slb, - mlib_s16 *dst, - mlib_s32 dlb, - mlib_s32 xsize, - mlib_s32 ysize) -{ - mlib_d64 *sp, *dp; /* 8-byte aligned pointer for pixel */ - mlib_d64 *sl, *dl; /* 8-byte aligned pointer for line */ - mlib_d64 sd0, sd1, sd2, sd3; /* source data */ - mlib_d64 dd0, dd1, dd2; /* dst data */ - mlib_s32 i, j; /* indices for x, y */ - - /* set GSR.offset for vis_faligndata() */ -/* vis_alignaddr((void *)0, 6); *//* only for _old */ - - sp = sl = (mlib_d64 *) src; - dp = dl = (mlib_d64 *) dst; - - /* row loop */ - for (j = 0; j < ysize; j++) { - /* 4-pixel column loop */ -#pragma pipeloop(0) - for (i = 0; i < xsize / 4; i++) { - LOAD_EXTRACT_S16_43R_STORE; - } - - sp = sl = (mlib_d64 *) ((mlib_u8 *) sl + slb); - dp = dl = (mlib_d64 *) ((mlib_u8 *) dl + dlb); - } -} - -/***************************************************************/ -/* - * Either source or destination data are not 8-byte aligned. - * And dsize is multiple of 8. - */ - -void mlib_v_ImageChannelExtract_S16_43R_D1(const mlib_s16 *src, - mlib_s16 *dst, - mlib_s32 dsize) -{ - mlib_s16 *sa, *da; /* pointer for pixel */ - mlib_s16 *dend, *dend2; /* end points in dst */ - mlib_d64 *dp; /* 8-byte aligned start points in dst */ - mlib_d64 *sp; /* 8-byte aligned start point in src */ - mlib_d64 s0, s1, s2, s3, s4; /* 8-byte source row data */ - mlib_d64 sd0, sd1, sd2, sd3; /* 8-byte source data */ - mlib_d64 dd0, dd1, dd2; /* dst data */ - mlib_d64 dd2old; /* the last datum of the last step */ - mlib_s32 soff; /* offset of address in src */ - mlib_s32 doff; /* offset of address in dst */ - mlib_s32 emask; /* edge mask */ - mlib_s32 i, n; - - sa = (void *)src; - da = dst; - - /* prepare the source address */ - sp = (mlib_d64 *) ((mlib_addr) sa & (~7)); - soff = ((mlib_addr) sa & 7); - - /* prepare the destination addresses */ - dp = (mlib_d64 *) ((mlib_addr) da & (~7)); - dend = da + dsize * 3 - 1; - dend2 = dend - 11; - doff = 8 - ((mlib_addr) da & 7); - - /* generate edge mask for the start point */ - emask = vis_edge16(da, dend); - - /* load 32 byte, convert, store 24 bytes */ - s4 = sp[0]; /* initial value */ - LOAD_EXTRACT_S16_43R; - - if (dsize >= 4) { - if (doff == 8) { - vis_pst_16(dd0, dp++, emask); - *dp++ = dd1; - *dp++ = dd2; - } - else { - vis_alignaddr((void *)doff, 0); - vis_pst_16(vis_faligndata(dd0, dd0), dp++, emask); - *dp++ = vis_faligndata(dd0, dd1); - *dp++ = vis_faligndata(dd1, dd2); - } - } - else { /* for very small size */ - if (doff == 8) { - vis_pst_16(dd0, dp++, emask); - if ((mlib_addr) dp <= (mlib_addr) dend) { - emask = vis_edge16(dp, dend); - vis_pst_16(dd1, dp++, emask); - if ((mlib_addr) dp <= (mlib_addr) dend) { - emask = vis_edge16(dp, dend); - vis_pst_16(dd2, dp++, emask); - } - } - } - else { - vis_alignaddr((void *)doff, 0); - vis_pst_16(vis_faligndata(dd0, dd0), dp++, emask); - if ((mlib_addr) dp <= (mlib_addr) dend) { - emask = vis_edge16(dp, dend); - vis_pst_16(vis_faligndata(dd0, dd1), dp++, emask); - if ((mlib_addr) dp <= (mlib_addr) dend) { - emask = vis_edge16(dp, dend); - vis_pst_16(vis_faligndata(dd1, dd2), dp++, emask); - } - } - } - } - - /* no edge handling is needed in the loop */ - if (doff == 8) { - if ((mlib_addr) dp <= (mlib_addr) dend2) { - n = ((mlib_u8 *) dend2 - (mlib_u8 *) dp) / 24 + 1; -#pragma pipeloop(0) - for (i = 0; i < n; i++) { - LOAD_EXTRACT_S16_43R; - *dp++ = dd0; - *dp++ = dd1; - *dp++ = dd2; - } - } - } - else { - if ((mlib_addr) dp <= (mlib_addr) dend2) { - n = ((mlib_u8 *) dend2 - (mlib_u8 *) dp) / 24 + 1; -#pragma pipeloop(0) - for (i = 0; i < n; i++) { - LOAD_EXTRACT_S16_43R; - vis_alignaddr((void *)doff, 0); - *dp++ = vis_faligndata(dd2old, dd0); - *dp++ = vis_faligndata(dd0, dd1); - *dp++ = vis_faligndata(dd1, dd2); - } - } - } - - if ((mlib_addr) dp <= (mlib_addr) dend) { - LOAD_EXTRACT_S16_43R; - emask = vis_edge16(dp, dend); - if (doff == 8) { - vis_pst_16(dd0, dp++, emask); - if ((mlib_addr) dp <= (mlib_addr) dend) { - emask = vis_edge16(dp, dend); - vis_pst_16(dd1, dp++, emask); - if ((mlib_addr) dp <= (mlib_addr) dend) { - emask = vis_edge16(dp, dend); - vis_pst_16(dd2, dp++, emask); - } - } - } - else { - vis_alignaddr((void *)doff, 0); - vis_pst_16(vis_faligndata(dd2old, dd0), dp++, emask); - if ((mlib_addr) dp <= (mlib_addr) dend) { - emask = vis_edge16(dp, dend); - vis_pst_16(vis_faligndata(dd0, dd1), dp++, emask); - if ((mlib_addr) dp <= (mlib_addr) dend) { - emask = vis_edge16(dp, dend); - vis_pst_16(vis_faligndata(dd1, dd2), dp++, emask); - } - } - } - } -} - -/***************************************************************/ -void mlib_v_ImageChannelExtract_S16_43R(const mlib_s16 *src, - mlib_s32 slb, - mlib_s16 *dst, - mlib_s32 dlb, - mlib_s32 xsize, - mlib_s32 ysize) -{ - mlib_s16 *sa, *da; - mlib_s16 *sl, *dl; - mlib_s32 j; - - sa = sl = (void *)src; - da = dl = dst; - - for (j = 0; j < ysize; j++) { - mlib_v_ImageChannelExtract_S16_43R_D1(sa, da, xsize); - sa = sl = (mlib_s16 *) ((mlib_u8 *) sl + slb); - da = dl = (mlib_s16 *) ((mlib_u8 *) dl + dlb); - } -} - -/***************************************************************/ -#define EXTRACT_U8_43L_old /* shift left */ \ - \ - dd0 = vis_faligndata(dd0, sd0); /* --------------r0 */ \ - sd0 = vis_faligndata(sd0, sd0); \ - dd0 = vis_faligndata(dd0, sd0); /* ------------r0g0 */ \ - sd0 = vis_faligndata(sd0, sd0); \ - dd0 = vis_faligndata(dd0, sd0); /* ----------r0g0b0 */ \ - sd0 = vis_faligndata(sd0, sd0); \ - sd0 = vis_faligndata(sd0, sd0); \ - dd0 = vis_faligndata(dd0, sd0); /* --------r0g0b0r1 */ \ - sd0 = vis_faligndata(sd0, sd0); \ - dd0 = vis_faligndata(dd0, sd0); /* ------r0g0b0r1g1 */ \ - sd0 = vis_faligndata(sd0, sd0); \ - dd0 = vis_faligndata(dd0, sd0); /* ----r0g0b0r1g1b1 */ \ - \ - dd0 = vis_faligndata(dd0, sd1); /* --r0g0b0r1g1b1r2 */ \ - sd1 = vis_faligndata(sd1, sd1); \ - dd0 = vis_faligndata(dd0, sd1); /* r0g0b0r1g1b1r2g2 */ \ - \ - sd1 = vis_faligndata(sd1, sd1); \ - dd1 = vis_faligndata(dd1, sd1); /* --------------b2 */ \ - sd1 = vis_faligndata(sd1, sd1); \ - sd1 = vis_faligndata(sd1, sd1); \ - dd1 = vis_faligndata(dd1, sd1); /* ------------b2r3 */ \ - sd1 = vis_faligndata(sd1, sd1); \ - dd1 = vis_faligndata(dd1, sd1); /* ----------b2r3g3 */ \ - sd1 = vis_faligndata(sd1, sd1); \ - dd1 = vis_faligndata(dd1, sd1); /* --------b2r3g3b3 */ \ - \ - dd1 = vis_faligndata(dd1, sd2); /* ------b2r3g3b3r4 */ \ - sd2 = vis_faligndata(sd2, sd2); \ - dd1 = vis_faligndata(dd1, sd2); /* ----b2r3g3b3r4g4 */ \ - sd2 = vis_faligndata(sd2, sd2); \ - dd1 = vis_faligndata(dd1, sd2); /* --b2r3g3b3r4g4b4 */ \ - sd2 = vis_faligndata(sd2, sd2); \ - sd2 = vis_faligndata(sd2, sd2); \ - dd1 = vis_faligndata(dd1, sd2); /* b2r3g3b3r4g4b4r5 */ \ - \ - sd2 = vis_faligndata(sd2, sd2); \ - dd2 = vis_faligndata(dd2, sd2); /* --------------g5 */ \ - sd2 = vis_faligndata(sd2, sd2); \ - dd2 = vis_faligndata(dd2, sd2); /* ------------g5b5 */ \ - \ - dd2 = vis_faligndata(dd2, sd3); /* ----------g5b5r6 */ \ - sd3 = vis_faligndata(sd3, sd3); \ - dd2 = vis_faligndata(dd2, sd3); /* --------g5b5r6g6 */ \ - sd3 = vis_faligndata(sd3, sd3); \ - dd2 = vis_faligndata(dd2, sd3); /* ------g5b5r6g6b6 */ \ - sd3 = vis_faligndata(sd3, sd3); \ - sd3 = vis_faligndata(sd3, sd3); \ - dd2 = vis_faligndata(dd2, sd3); /* ----g5b5r6g6b6r7 */ \ - sd3 = vis_faligndata(sd3, sd3); \ - dd2 = vis_faligndata(dd2, sd3); /* --g5b5r6g6b6r7g7 */ \ - sd3 = vis_faligndata(sd3, sd3); \ - dd2 = vis_faligndata(dd2, sd3); /* g5b5r6g6b6r7g7b7 */ - -/***************************************************************/ #define EXTRACT_U8_43L /* shift left */ \ \ vis_alignaddr((void *)0, 3); \ @@ -801,20 +88,6 @@ dd2 = vis_faligndata(dd2, sda); /* g5b5r6g6b6r7g7b7 */ /***************************************************************/ -#define LOAD_EXTRACT_U8_43L_STORE \ - \ - sd0 = *sp++; /* r0g0b0--r1g1b1-- */ \ - sd1 = *sp++; /* r2g2b2--r3g3b3-- */ \ - sd2 = *sp++; /* r4g4b4--r5g5b5-- */ \ - sd3 = *sp++; /* r6g6b6--r7g7b7-- */ \ - \ - EXTRACT_U8_43L; \ - \ - *dp++ = dd0; /* r0g0b0r1g1b1r2g2 */ \ - *dp++ = dd1; /* b2r3g3b3r4g4b4r5 */ \ - *dp++ = dd2; /* g5b5r6g6b6r7g7b7 */ - -/***************************************************************/ #define LOAD_EXTRACT_U8_43L \ \ vis_alignaddr((void *)soff, 0); \ @@ -835,74 +108,6 @@ /***************************************************************/ /* - * Both source and destination image data are 1-d vectors and - * 8-byte aligned. And dsize is multiple of 8. - */ - -void mlib_v_ImageChannelExtract_U8_43L_A8D1X8(const mlib_u8 *src, - mlib_u8 *dst, - mlib_s32 dsize) -{ - mlib_d64 *sp, *dp; - mlib_d64 sd0, sd1, sd2, sd3; /* source data */ - mlib_d64 dd0, dd1, dd2; /* dst data */ - mlib_d64 sda; - mlib_s32 i; - - sp = (mlib_d64 *) src; - dp = (mlib_d64 *) dst; - - /* set GSR.offset for vis_faligndata() */ -/* vis_alignaddr((void *)0, 1); *//* for _old only */ - -#pragma pipeloop(0) - for (i = 0; i < dsize / 8; i++) { - LOAD_EXTRACT_U8_43L_STORE; - } -} - -/***************************************************************/ -/* - * Either source or destination image data are not 1-d vectors, but - * they are 8-byte aligned. And slb and dlb are multiple of 8. - * The xsize is multiple of 8. - */ - -void mlib_v_ImageChannelExtract_U8_43L_A8D2X8(const mlib_u8 *src, - mlib_s32 slb, - mlib_u8 *dst, - mlib_s32 dlb, - mlib_s32 xsize, - mlib_s32 ysize) -{ - mlib_d64 *sp, *dp; /* 8-byte aligned pointer for pixel */ - mlib_d64 *sl, *dl; /* 8-byte aligned pointer for line */ - mlib_d64 sd0, sd1, sd2, sd3; /* source data */ - mlib_d64 dd0, dd1, dd2; /* dst data */ - mlib_d64 sda; - mlib_s32 i, j; /* indices for x, y */ - - /* set GSR.offset for vis_faligndata() */ -/* vis_alignaddr((void *)0, 1); *//* for _old only */ - - sp = sl = (mlib_d64 *) src; - dp = dl = (mlib_d64 *) dst; - - /* row loop */ - for (j = 0; j < ysize; j++) { - /* 8-byte column loop */ -#pragma pipeloop(0) - for (i = 0; i < xsize / 8; i++) { - LOAD_EXTRACT_U8_43L_STORE; - } - - sp = sl = (mlib_d64 *) ((mlib_u8 *) sl + slb); - dp = dl = (mlib_d64 *) ((mlib_u8 *) dl + dlb); - } -} - -/***************************************************************/ -/* * Either source or destination data are not 8-byte aligned. * And ssize is multiple of 8. */ @@ -1045,28 +250,6 @@ } /***************************************************************/ -void mlib_v_ImageChannelExtract_U8_43L(const mlib_u8 *src, - mlib_s32 slb, - mlib_u8 *dst, - mlib_s32 dlb, - mlib_s32 xsize, - mlib_s32 ysize) -{ - mlib_u8 *sa, *da; - mlib_u8 *sl, *dl; - mlib_s32 j; - - sa = sl = (void *)src; - da = dl = dst; - - for (j = 0; j < ysize; j++) { - mlib_v_ImageChannelExtract_U8_43L_D1(sa, da, xsize); - sa = sl += slb; - da = dl += dlb; - } -} - -/***************************************************************/ #define EXTRACT_S16_43L /* shift left */ \ vis_alignaddr((void *)0, 6); \ dd0 = vis_faligndata(dd0, sd0); /* --r0g0b0 */ \ @@ -1083,20 +266,6 @@ dd2 = vis_faligndata(dd2, sd3); /* b2r3g3b3 */ /***************************************************************/ -#define LOAD_EXTRACT_S16_43L_STORE \ - \ - sd0 = *sp++; /* r0g0b0-- */ \ - sd1 = *sp++; /* r1g1b1-- */ \ - sd2 = *sp++; /* r2g2b2-- */ \ - sd3 = *sp++; /* r3g3b3-- */ \ - \ - EXTRACT_S16_43L; \ - \ - *dp++ = dd0; /* r0g0b0r1 */ \ - *dp++ = dd1; /* g1b1r2g2 */ \ - *dp++ = dd2; /* b2r3g3b3 */ - -/***************************************************************/ #define LOAD_EXTRACT_S16_43L \ \ vis_alignaddr((void *)soff, 0); \ @@ -1115,72 +284,6 @@ /***************************************************************/ /* - * Both source and destination image data are 1-d vectors and - * 8-byte aligned. And dsize is multiple of 4. - */ - -void mlib_v_ImageChannelExtract_S16_43L_A8D1X4(const mlib_s16 *src, - mlib_s16 *dst, - mlib_s32 dsize) -{ - mlib_d64 *sp, *dp; /* 8-byte aligned pointer for pixel */ - mlib_d64 sd0, sd1, sd2, sd3; /* source data */ - mlib_d64 dd0, dd1, dd2; /* dst data */ - mlib_s32 i; - - sp = (mlib_d64 *) src; - dp = (mlib_d64 *) dst; - - /* set GSR.offset for vis_faligndata() */ -/* vis_alignaddr((void *)0, 2); *//* only for _old */ - -#pragma pipeloop(0) - for (i = 0; i < dsize / 4; i++) { - LOAD_EXTRACT_S16_43L_STORE; - } -} - -/***************************************************************/ -/* - * Either source or destination image data are not 1-d vectors, but - * they are 8-byte aligned. The xsize is multiple of 4. - * And slb and dlb are multiple of 8. - */ - -void mlib_v_ImageChannelExtract_S16_43L_A8D2X4(const mlib_s16 *src, - mlib_s32 slb, - mlib_s16 *dst, - mlib_s32 dlb, - mlib_s32 xsize, - mlib_s32 ysize) -{ - mlib_d64 *sp, *dp; /* 8-byte aligned pointer for pixel */ - mlib_d64 *sl, *dl; /* 8-byte aligned pointer for line */ - mlib_d64 sd0, sd1, sd2, sd3; /* source data */ - mlib_d64 dd0, dd1, dd2; /* dst data */ - mlib_s32 i, j; /* indices for x, y */ - - /* set GSR.offset for vis_faligndata() */ -/* vis_alignaddr((void *)0, 2); *//* only for _old */ - - sp = sl = (mlib_d64 *) src; - dp = dl = (mlib_d64 *) dst; - - /* row loop */ - for (j = 0; j < ysize; j++) { - /* 4-pixel column loop */ -#pragma pipeloop(0) - for (i = 0; i < xsize / 4; i++) { - LOAD_EXTRACT_S16_43L_STORE; - } - - sp = sl = (mlib_d64 *) ((mlib_u8 *) sl + slb); - dp = dl = (mlib_d64 *) ((mlib_u8 *) dl + dlb); - } -} - -/***************************************************************/ -/* * Either source or destination data are not 8-byte aligned. * And size is in pixels. */ @@ -1318,25 +421,3 @@ } /***************************************************************/ -void mlib_v_ImageChannelExtract_S16_43L(const mlib_s16 *src, - mlib_s32 slb, - mlib_s16 *dst, - mlib_s32 dlb, - mlib_s32 xsize, - mlib_s32 ysize) -{ - mlib_s16 *sa, *da; - mlib_s16 *sl, *dl; - mlib_s32 j; - - sa = sl = (void *)src; - da = dl = dst; - - for (j = 0; j < ysize; j++) { - mlib_v_ImageChannelExtract_S16_43L_D1(sa, da, xsize); - sa = sl = (mlib_s16 *) ((mlib_u8 *) sl + slb); - da = dl = (mlib_s16 *) ((mlib_u8 *) dl + dlb); - } -} - -/***************************************************************/ diff -r 3e22d8fd4912 -r acea5f7d354b jdk/src/java.desktop/unix/native/libmlib_image/mlib_v_ImageChannelExtract_f.c --- a/jdk/src/java.desktop/unix/native/libmlib_image/mlib_v_ImageChannelExtract_f.c Thu May 12 11:03:07 2016 -0700 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,784 +0,0 @@ -/* - * Copyright (c) 2000, 2003, Oracle and/or its affiliates. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 only, as - * published by the Free Software Foundation. Oracle designates this - * particular file as subject to the "Classpath" exception as provided - * by Oracle in the LICENSE file that accompanied this code. - * - * This code is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * version 2 for more details (a copy is included in the LICENSE file that - * accompanied this code). - * - * You should have received a copy of the GNU General Public License version - * 2 along with this work; if not, write to the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - * - * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA - * or visit www.oracle.com if you need additional information or have any - * questions. - */ - - - -#include -#include "mlib_image.h" -#include "mlib_ImageCheck.h" - -typedef union { - double d64; - struct { - float f0; - float f1; - } f32s; -} d64_2_f32; - -/***************************************************************/ - -void mlib_v_ImageChannelExtract_U8_2_1(mlib_u8 *sl, mlib_s32 slb, - mlib_u8 *dl, mlib_s32 dlb, - mlib_s32 width, mlib_s32 height) -{ - mlib_u8 *sp = sl; - mlib_u8 *dp = dl; - int i, j; - - for (j = 0; j < height; j++) { - mlib_u8 *dend = dl + width; - mlib_u32 *sp2; - while (((mlib_addr)sp & 7) > 1) { - *dp++ = *sp; - sp += 2; - if (dp >= dend) break; - } - if ((mlib_addr)sp & 7) { - sp2 = (mlib_u32 *)(sp - 1); -#pragma pipeloop(0) - for (; dp <= (dend-2); dp += 2) { - mlib_u32 s0; - s0 = *sp2++; - dp[0] = s0 >> 16; - dp[1] = s0; - } - if (dp < dend) { - dp[0] = sp2[0] >> 16; - } - } else { - sp2 = (mlib_u32 *)sp; -#pragma pipeloop(0) - for (; dp <= (dend-2); dp += 2) { - mlib_u32 s0; - s0 = *sp2++; - dp[0] = s0 >> 24; - dp[1] = s0 >> 8; - } - if (dp < dend) { - dp[0] = sp2[0] >> 24; - } - } - sp = sl += slb; - dp = dl += dlb; - } -} - -/***************************************************************/ - -void mlib_v_ImageChannelExtract_U8_3_2(mlib_u8 *sl, mlib_s32 slb, - mlib_u8 *dl, mlib_s32 dlb, - mlib_s32 width, mlib_s32 height, - mlib_s32 count1) -{ - mlib_u8 *sp = sl; - mlib_u8 *dp = dl; - mlib_u32 *sp2; - mlib_u16 *dp2; - mlib_u16 *d2end; - mlib_u32 s0, s1, s2, s3; - int i, j, off, count_off; - - for (j = 0; j < height; j++) { - mlib_u8 *dend = dl + 2*width; - - if (count1 == 1) { - if (dp < dend) *dp++ = sp[0]; - sp += 2; - } - - if ((mlib_addr)dp & 1) { -#pragma pipeloop(0) - for (; dp <= (dend-2); dp += 2) { - dp[0] = sp[0]; - dp[1] = sp[1]; - sp += 3; - } - if (dp < dend) { - dp[0] = sp[0]; - } - sp = sl += slb; - dp = dl += dlb; - continue; - } - - dp2 = (mlib_u16*)dp; - d2end = (mlib_u16*)((mlib_addr)dend &~ 1); - off = (mlib_addr)sp & 3; - sp2 = (mlib_u32 *)(sp - off); - - switch (off) { - - case 0: -#pragma pipeloop(0) - for (; dp2 <= (d2end-4); dp2 += 4) { - s0 = sp2[0]; - s1 = sp2[1]; - s2 = sp2[2]; - dp2[0] = s0 >> 16; - dp2[1] = (s0 << 8) | (s1 >> 24); - dp2[2] = s1; - dp2[3] = s2 >> 8; - sp2 += 3; - } - break; - - case 1: -#pragma pipeloop(0) - for (; dp2 <= (d2end-4); dp2 += 4) { - s0 = sp2[0]; - s1 = sp2[1]; - s2 = sp2[2]; - dp2[0] = s0 >> 8; - dp2[1] = s1 >> 16; - dp2[2] = (s1 << 8) | (s2 >> 24); - dp2[3] = s2; - sp2 += 3; - } - break; - - case 2: -#pragma pipeloop(0) - s3 = sp2[0]; - for (; dp2 <= (d2end-4); dp2 += 4) { - s0 = s3; - s1 = sp2[1]; - s2 = sp2[2]; - s3 = sp2[3]; - dp2[0] = s0; - dp2[1] = s1 >> 8; - dp2[2] = s2 >> 16; - dp2[3] = (s2 << 8) | (s3 >> 24); - sp2 += 3; - } - break; - - case 3: -#pragma pipeloop(0) - s3 = sp2[0]; - for (; dp2 <= (d2end-4); dp2 += 4) { - s0 = s3; - s1 = sp2[1]; - s2 = sp2[2]; - s3 = sp2[3]; - dp2[0] = (s0 << 8) | (s1 >> 24); - dp2[1] = s1; - dp2[2] = s2 >> 8; - dp2[3] = s3 >> 16; - sp2 += 3; - } - } - - sp = (mlib_u8 *)sp2 + off; - dp = (mlib_u8 *)dp2; - while (dp < dend) { - *dp++ = sp[0]; - if (dp < dend) *dp++ = sp[1]; - sp += 3; - } - - sp = sl += slb; - dp = dl += dlb; - } -} - -/***************************************************************/ - -void mlib_v_ImageChannelExtract_U8_4_2(mlib_u8 *sl, mlib_s32 slb, - mlib_u8 *dl, mlib_s32 dlb, - mlib_s32 width, mlib_s32 height, - mlib_s32 count1) -{ - mlib_u8 *sp = sl; - mlib_u8 *dp = dl; - mlib_u32 *sp2; - mlib_u16 *dp2; - mlib_u16 *d2end; - mlib_u32 s0, s1, s2, s3; - int i, j, off, count_off; - - for (j = 0; j < height; j++) { - mlib_u8 *dend = dl + 2*width; - - if (count1 == 1) { - if (dp < dend) *dp++ = sp[0]; - sp += 3; - } - - off = (mlib_addr)sp & 3; - - if (((mlib_addr)dp & 1) || (off == 3)) { -#pragma pipeloop(0) - for (; dp <= (dend-2); dp += 2) { - dp[0] = sp[0]; - dp[1] = sp[1]; - sp += 4; - } - if (dp < dend) { - dp[0] = sp[0]; - } - sp = sl += slb; - dp = dl += dlb; - continue; - } - - dp2 = (mlib_u16*)dp; - d2end = (mlib_u16*)((mlib_addr)dend &~ 1); - sp2 = (mlib_u32 *)(sp - off); - - switch (off) { - - case 0: -#pragma pipeloop(0) - for (; dp2 < d2end; dp2++) { - s0 = sp2[0]; - dp2[0] = s0 >> 16; - sp2++; - } - break; - - case 1: -#pragma pipeloop(0) - for (; dp2 < d2end; dp2++) { - s0 = sp2[0]; - dp2[0] = s0 >> 8; - sp2++; - } - break; - - case 2: -#pragma pipeloop(0) - for (; dp2 < d2end; dp2++) { - s0 = sp2[0]; - dp2[0] = s0; - sp2++; - } - break; - } - - sp = (mlib_u8 *)sp2 + off; - dp = (mlib_u8 *)dp2; - if (dp < dend) { - *dp++ = sp[0]; - } - - sp = sl += slb; - dp = dl += dlb; - } -} - -/***************************************************************/ - -void mlib_v_ImageChannelExtract_32_2_1(mlib_f32 *sp, mlib_s32 slb, - mlib_f32 *dp, mlib_s32 dlb, - mlib_s32 width, mlib_s32 height) -{ - mlib_d64 *sp2; - int i, j, off; - - for (j = 0; j < height; j++) { - - if (((mlib_addr)sp & 7) == 0) { - sp2 = (mlib_d64 *)sp; -#pragma pipeloop(0) - for (i = 0; i < width; i++) { - d64_2_f32 d; - d.d64 = sp2[i]; - dp[i] = d.f32s.f0; - } - } else { - sp2 = (mlib_d64 *)(sp - 1); -#pragma pipeloop(0) - for (i = 0; i < width; i++) { - d64_2_f32 d; - d.d64 = sp2[i]; - dp[i] = d.f32s.f1; - } - } - - sp += slb; - dp += dlb; - } -} - -/***************************************************************/ - -void mlib_v_ImageChannelExtract_32_3_1(mlib_f32 *sl, mlib_s32 slb, - mlib_f32 *dl, mlib_s32 dlb, - mlib_s32 width, mlib_s32 height) -{ - mlib_f32 *sp = sl; - mlib_f32 *dp = dl; - mlib_d64 *sp2; - d64_2_f32 d0; - int i, j, off; - - for (j = 0; j < height; j++) { - mlib_f32 *dend = dl + width; - - if ((mlib_addr)sp & 7) { - dp[0] = sp[0]; - sp += 3; - dp ++; - } - - sp2 = (mlib_d64 *)sp; -#pragma pipeloop(0) - for (; dp <= (dend-2); dp += 2) { - d64_2_f32 d0, d1; - d0.d64 = sp2[0]; - d1.d64 = sp2[1]; - dp[0] = d0.f32s.f0; - dp[1] = d1.f32s.f1; - sp2 += 3; - } - - if (dp < dend) { - d0.d64 = sp2[0]; - dp[0] = d0.f32s.f0; - } - - sp = sl += slb; - dp = dl += dlb; - } -} - -/***************************************************************/ - -void mlib_v_ImageChannelExtract_32_3_2(mlib_f32 *sl, mlib_s32 slb, - mlib_f32 *dl, mlib_s32 dlb, - mlib_s32 width, mlib_s32 height, - mlib_s32 count1) -{ - mlib_f32 *sp = sl; - mlib_f32 *dp = dl; - mlib_d64 *sp2; - d64_2_f32 d0; - int i, j, off; - - for (j = 0; j < height; j++) { - mlib_f32 *dend = dl + 2*width; - - if (count1 == 1) { - if (dp < dend) *dp++ = sp[0]; - sp += 2; - } - - if ((mlib_addr)sp & 7) { - if (dp < dend) *dp++ = sp[0]; - if (dp < dend) *dp++ = sp[1]; - sp += 3; - } - - sp2 = (mlib_d64 *)sp; -#pragma pipeloop(0) - for (; dp <= (dend-4); dp += 4) { - d64_2_f32 d0, d1, d2; - d0.d64 = sp2[0]; - d1.d64 = sp2[1]; - d2.d64 = sp2[2]; - dp[0] = d0.f32s.f0; - dp[1] = d0.f32s.f1; - dp[2] = d1.f32s.f1; - dp[3] = d2.f32s.f0; - sp2 += 3; - } - - if (dp < dend) { - sp = (mlib_f32 *)sp2; - *dp++ = sp[0]; - if (dp < dend) *dp++ = sp[1]; - if (dp < dend) *dp++ = sp[3]; - } - - sp = sl += slb; - dp = dl += dlb; - } -} - -/***************************************************************/ - -void mlib_v_ImageChannelExtract_32_4_1(mlib_f32 *sp, mlib_s32 slb, - mlib_f32 *dp, mlib_s32 dlb, - mlib_s32 width, mlib_s32 height) -{ - mlib_d64 *sp2; - int i, j, off; - - for (j = 0; j < height; j++) { - - if (((mlib_addr)sp & 7) == 0) { - sp2 = (mlib_d64 *)sp; -#pragma pipeloop(0) - for (i = 0; i < width; i++) { - d64_2_f32 d; - d.d64 = sp2[2*i]; - dp[i] = d.f32s.f0; - } - } else { - sp2 = (mlib_d64 *)(sp - 1); -#pragma pipeloop(0) - for (i = 0; i < width; i++) { - d64_2_f32 d; - d.d64 = sp2[2*i]; - dp[i] = d.f32s.f1; - } - } - - sp += slb; - dp += dlb; - } -} - -/***************************************************************/ - -void mlib_v_ImageChannelExtract_32_4_2(mlib_f32 *sl, mlib_s32 slb, - mlib_f32 *dl, mlib_s32 dlb, - mlib_s32 width, mlib_s32 height, - mlib_s32 count1) -{ - mlib_f32 *sp = sl; - mlib_f32 *dp = dl; - mlib_d64 *sp2; - int i, j, off; - d64_2_f32 d0, d1; - - for (j = 0; j < height; j++) { - mlib_f32 *dend = dl + 2*width; - - if (count1 == 1) { - dp[0] = sp[0]; - sp += 3; - dp ++; - } - - if (((mlib_addr)sp & 7) == 0) { - sp2 = (mlib_d64 *)sp; -#pragma pipeloop(0) - for (; dp <= (dend-2); dp += 2) { - d64_2_f32 d; - d.d64 = sp2[0]; - dp[0] = d.f32s.f0; - dp[1] = d.f32s.f1; - sp2 += 2; - } - if (dp < dend) { - d0.d64 = sp2[0]; - dp[0] = d0.f32s.f0; - } - } else { - sp2 = (mlib_d64 *)(sp - 1); -#pragma pipeloop(0) - for (; dp <= (dend-2); dp += 2) { - d64_2_f32 d0, d1; - d0.d64 = sp2[0]; - d1.d64 = sp2[1]; - dp[0] = d0.f32s.f1; - dp[1] = d1.f32s.f0; - sp2 += 2; - } - if (dp < dend) { - d0.d64 = sp2[0]; - dp[0] = d0.f32s.f1; - } - } - - sp = sl += slb; - dp = dl += dlb; - } -} - -/***************************************************************/ - -void mlib_v_ImageChannelExtract_32_4_3(mlib_f32 *sl, mlib_s32 slb, - mlib_f32 *dl, mlib_s32 dlb, - mlib_s32 width, mlib_s32 height, - mlib_s32 count1) -{ - mlib_f32 *sp = sl; - mlib_f32 *dp = dl; - mlib_d64 *sp2; - int i, j, k; - d64_2_f32 d0, d1; - - for (j = 0; j < height; j++) { - mlib_f32 *dend = dl + 3*width; - - for (k = 0; k < count1; k++) { - if (dp < dend) *dp++ = *sp++; - } - sp++; - - if (((mlib_addr)sp & 7) == 0) { - sp2 = (mlib_d64 *)sp; -#pragma pipeloop(0) - for (; dp <= (dend-3); dp += 3) { - d64_2_f32 d0, d1; - d0.d64 = sp2[0]; - d1.d64 = sp2[1]; - dp[0] = d0.f32s.f0; - dp[1] = d0.f32s.f1; - dp[2] = d1.f32s.f0; - sp2 += 2; - } - if (dp < dend) { - d0.d64 = sp2[0]; - *dp++ = d0.f32s.f0; - if (dp < dend) *dp++ = d0.f32s.f1; - } - } else { - sp2 = (mlib_d64 *)(sp - 1); -#pragma pipeloop(0) - for (; dp <= (dend-3); dp += 3) { - d64_2_f32 d0, d1; - d0.d64 = sp2[0]; - d1.d64 = sp2[1]; - dp[0] = d0.f32s.f1; - dp[1] = d1.f32s.f0; - dp[2] = d1.f32s.f1; - sp2 += 2; - } - if (dp < dend) { - d0.d64 = sp2[0]; - d1.d64 = sp2[1]; - *dp++ = d0.f32s.f1; - if (dp < dend) *dp++ = d1.f32s.f0; - } - } - - sp = sl += slb; - dp = dl += dlb; - } -} - -/***************************************************************/ -/* general channel extraction: slower due to the inner loop */ - -void mlib_v_ImageChannelExtract_U8(mlib_u8 *src, mlib_s32 slb, - mlib_u8 *dst, mlib_s32 dlb, - mlib_s32 channels, mlib_s32 channeld, - mlib_s32 width, mlib_s32 height, - mlib_s32 cmask) -{ - mlib_u8 *sp; /* pointer for pixel in src */ - mlib_u8 *sl; /* pointer for line in src */ - mlib_u8 *dp; /* pointer for pixel in dst */ - mlib_u8 *dl; /* pointer for line in dst */ - int i, j, k; /* indices for x, y, channel */ - int deltac[5] = { 0, 1, 1, 1, 1 }; - int inc0, inc1, inc2, inc3; - mlib_u8 s0, s1, s2, s3; - - deltac[channeld] = 1; - for (i = (channels - 1), k = 0; i >= 0; i--) { - if ((cmask & (1 << i)) == 0) - deltac[k]++; - else - k++; - } - - deltac[channeld] = channels; - for (i = 1; i < channeld; i++) { - deltac[channeld] -= deltac[i]; - } - - sp = sl = src + deltac[0]; - dp = dl = dst; - -/* Only THREE CHANNEL CASE could be executed here!!! */ - - inc0 = deltac[1]; - inc1 = deltac[2] + inc0; - inc2 = deltac[3] + inc1; - for (j = 0; j < height; j++) { - for (i = 0; i < width; i++) { -#pragma pipeloop(0) - s0 = sp[0]; s1 = sp[inc0]; s2 = sp[inc1]; - dp[0] = s0; - dp[1] = s1; - dp[2] = s2; - sp += inc2; - dp += 3; - } - sp = sl += slb; - dp = dl += dlb; - } -} - -/***************************************************************/ -/* general channel extraction: slower due to the inner loop */ - -void mlib_v_ImageChannelExtract_S16(mlib_u16 *src, mlib_s32 slb, - mlib_u16 *dst, mlib_s32 dlb, - mlib_s32 channels, mlib_s32 channeld, - mlib_s32 width, mlib_s32 height, - mlib_s32 cmask) -{ - mlib_u16 *sp; /* pointer for pixel in src */ - mlib_u16 *sl; /* pointer for line in src */ - mlib_u16 *dp; /* pointer for pixel in dst */ - mlib_u16 *dl; /* pointer for line in dst */ - int i, j, k; /* indices for x, y, channel */ - int deltac[5] = { 0, 1, 1, 1, 1 }; - int inc0, inc1, inc2, inc3; - mlib_u16 s0, s1, s2, s3; - - slb >>= 1; - dlb >>= 1; - - deltac[channeld] = 1; - for (i = (channels - 1), k = 0; i >= 0; i--) { - if ((cmask & (1 << i)) == 0) - deltac[k]++; - else - k++; - } - - deltac[channeld] = channels; - for (i = 1; i < channeld; i++) { - deltac[channeld] -= deltac[i]; - } - - sp = sl = src + deltac[0]; - dp = dl = dst; - - if (channeld == 2) { - inc0 = deltac[1]; - inc1 = deltac[2] + inc0; - for (j = 0; j < height; j++) { -#pragma pipeloop(0) - for (i = 0; i < width; i++) { - s0 = sp[0]; s1 = sp[inc0]; - dp[0] = s0; - dp[1] = s1; - sp += inc1; - dp += 2; - } - sp = sl = sl + slb; - dp = dl = dl + dlb; - } - } else - - if (channeld == 3) { - inc0 = deltac[1]; - inc1 = deltac[2] + inc0; - inc2 = deltac[3] + inc1; - for (j = 0; j < height; j++) { - for (i = 0; i < width; i++) { -#pragma pipeloop(0) - s0 = sp[0]; s1 = sp[inc0]; s2 = sp[inc1]; - dp[0] = s0; - dp[1] = s1; - dp[2] = s2; - sp += inc2; - dp += 3; - } - sp = sl = sl + slb; - dp = dl = dl + dlb; - } - }} - -/***************************************************************/ -/* general channel extraction: slower due to the inner loop */ - -void mlib_v_ImageChannelExtract_D64(mlib_d64 *src, mlib_s32 slb, - mlib_d64 *dst, mlib_s32 dlb, - mlib_s32 channels, mlib_s32 channeld, - mlib_s32 width, mlib_s32 height, - mlib_s32 cmask) -{ - mlib_d64 *sp; /* pointer for pixel in src */ - mlib_d64 *sl; /* pointer for line in src */ - mlib_d64 *dp; /* pointer for pixel in dst */ - mlib_d64 *dl; /* pointer for line in dst */ - int i, j, k; /* indices for x, y, channel */ - int deltac[5] = { 0, 1, 1, 1, 1 }; - int inc0, inc1, inc2, inc3; - mlib_d64 s0, s1, s2, s3; - - deltac[channeld] = 1; - for (i = (channels - 1), k = 0; i >= 0; i--) { - if ((cmask & (1 << i)) == 0) - deltac[k]++; - else - k++; - } - - deltac[channeld] = channels; - for (i = 1; i < channeld; i++) { - deltac[channeld] -= deltac[i]; - } - - sp = sl = src + deltac[0]; - dp = dl = dst; - - if (channeld == 1) { - for (j = 0; j < height; j++) { -#pragma pipeloop(0) - for (i = 0; i < width; i++) { - s0 = sp[0]; - dp[i] = s0; - sp += channels; - } - sp = sl = (mlib_d64 *)((mlib_u8 *)sl + slb); - dp = dl = (mlib_d64 *)((mlib_u8 *)dl + dlb); - } - } else - - if (channeld == 2) { - inc0 = deltac[1]; - inc1 = deltac[2] + inc0; - for (j = 0; j < height; j++) { -#pragma pipeloop(0) - for (i = 0; i < width; i++) { - s0 = sp[0]; s1 = sp[inc0]; - dp[0] = s0; - dp[1] = s1; - sp += inc1; - dp += 2; - } - sp = sl = (mlib_d64 *)((mlib_u8 *)sl + slb); - dp = dl = (mlib_d64 *)((mlib_u8 *)dl + dlb); - } - } else - - if (channeld == 3) { - inc0 = deltac[1]; - inc1 = deltac[2] + inc0; - inc2 = deltac[3] + inc1; - for (j = 0; j < height; j++) { - for (i = 0; i < width; i++) { -#pragma pipeloop(0) - s0 = sp[0]; s1 = sp[inc0]; s2 = sp[inc1]; - dp[0] = s0; - dp[1] = s1; - dp[2] = s2; - sp += inc2; - dp += 3; - } - sp = sl = (mlib_d64 *)((mlib_u8 *)sl + slb); - dp = dl = (mlib_d64 *)((mlib_u8 *)dl + dlb); - } - } -} diff -r 3e22d8fd4912 -r acea5f7d354b jdk/src/java.desktop/unix/native/libmlib_image/mlib_v_ImageChannelInsert.c --- a/jdk/src/java.desktop/unix/native/libmlib_image/mlib_v_ImageChannelInsert.c Thu May 12 11:03:07 2016 -0700 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,715 +0,0 @@ -/* - * Copyright (c) 1998, 2003, Oracle and/or its affiliates. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 only, as - * published by the Free Software Foundation. Oracle designates this - * particular file as subject to the "Classpath" exception as provided - * by Oracle in the LICENSE file that accompanied this code. - * - * This code is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * version 2 for more details (a copy is included in the LICENSE file that - * accompanied this code). - * - * You should have received a copy of the GNU General Public License version - * 2 along with this work; if not, write to the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - * - * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA - * or visit www.oracle.com if you need additional information or have any - * questions. - */ - - - - -/* - * FUNCTIONS - * mlib_ImageChannelInsert - Copy the source image into the selected - * channels of the destination image - * - * SYNOPSIS - * mlib_status mlib_ImageChannelInsert(mlib_image *dst, - * mlib_image *src, - * mlib_s32 cmask); - * - * ARGUMENT - * dst Pointer to destination image. - * src Pointer to source image. - * cmask Destination channel selection mask. - * The least significant bit (LSB) is corresponding to the - * last channel in the destination image data. - * The bits with value 1 stand for the channels selected. - * If more than N channels are selected, the leftmost N - * channels are inserted, where N is the number of channels - * in the source image. - * - * RESTRICTION - * The src and dst must have the same width, height and data type. - * The src and dst can have 1, 2, 3 or 4 channels. - * The src and dst can be either MLIB_BYTE, MLIB_SHORT, MLIB_INT, - * MLIB_FLOAT or MLIB_DOUBLE. - * - * DESCRIPTION - * Copy the source image into the selected channels of the destination - * image - */ - -#include -#include "mlib_image.h" -#include "mlib_ImageCheck.h" - -/***************************************************************/ -/* functions defined in mlib_v_ImageChannelInsert_1.c */ - -void -mlib_v_ImageChannelInsert_U8(mlib_u8 *src, mlib_s32 slb, - mlib_u8 *dst, mlib_s32 dlb, - mlib_s32 channels, - mlib_s32 channeld, - mlib_s32 width, mlib_s32 height, - mlib_s32 cmask); -void -mlib_v_ImageChannelInsert_D64(mlib_d64 *src, mlib_s32 slb, - mlib_d64 *dst, mlib_s32 dlb, - mlib_s32 channels, - mlib_s32 channeld, - mlib_s32 width, mlib_s32 height, - mlib_s32 cmask); -void -mlib_v_ImageChannelInsert_S16(mlib_s16 *src, mlib_s32 slb, - mlib_s16 *dst, mlib_s32 dlb, - mlib_s32 channels, - mlib_s32 channeld, - mlib_s32 width, mlib_s32 height, - mlib_s32 cmask); -void -mlib_v_ImageChannelInsert_S32(mlib_s32 *src, mlib_s32 slb, - mlib_s32 *dst, mlib_s32 dlb, - mlib_s32 channels, - mlib_s32 channeld, - mlib_s32 width, mlib_s32 height, - mlib_s32 cmask); -void -mlib_v_ImageChannelInsert_U8_12_A8D1X8(mlib_u8 *src, - mlib_u8 *dst, - mlib_s32 dsize, - mlib_s32 cmask); -void -mlib_v_ImageChannelInsert_U8_12_A8D2X8(mlib_u8 *src, mlib_s32 slb, - mlib_u8 *dst, mlib_s32 dlb, - mlib_s32 xsize, mlib_s32 ysize, - mlib_s32 cmask); -void -mlib_v_ImageChannelInsert_U8_12_D1(mlib_u8 *src, - mlib_u8 *dst, - mlib_s32 dsize, - mlib_s32 cmask); -void -mlib_v_ImageChannelInsert_U8_12(mlib_u8 *src, mlib_s32 slb, - mlib_u8 *dst, mlib_s32 dlb, - mlib_s32 xsize, mlib_s32 ysize, - mlib_s32 cmask); -void -mlib_v_ImageChannelInsert_U8_13_A8D1X8(mlib_u8 *src, - mlib_u8 *dst, - mlib_s32 dsize, - mlib_s32 cmask); -void -mlib_v_ImageChannelInsert_U8_13_A8D2X8(mlib_u8 *src, mlib_s32 slb, - mlib_u8 *dst, mlib_s32 dlb, - mlib_s32 xsize, mlib_s32 ysize, - mlib_s32 cmask); -void -mlib_v_ImageChannelInsert_U8_13_D1(mlib_u8 *src, - mlib_u8 *dst, - mlib_s32 dsize, - mlib_s32 cmask); -void -mlib_v_ImageChannelInsert_U8_13(mlib_u8 *src, mlib_s32 slb, - mlib_u8 *dst, mlib_s32 dlb, - mlib_s32 xsize, mlib_s32 ysize, - mlib_s32 cmask); -void -mlib_v_ImageChannelInsert_U8_14_A8D1X8(mlib_u8 *src, - mlib_u8 *dst, - mlib_s32 dsize, - mlib_s32 cmask); -void -mlib_v_ImageChannelInsert_U8_14_A8D2X8(mlib_u8 *src, mlib_s32 slb, - mlib_u8 *dst, mlib_s32 dlb, - mlib_s32 xsize, mlib_s32 ysize, - mlib_s32 cmask); -void -mlib_v_ImageChannelInsert_U8_14_D1(mlib_u8 *src, - mlib_u8 *dst, - mlib_s32 dsize, - mlib_s32 cmask); -void -mlib_v_ImageChannelInsert_U8_14(mlib_u8 *src, mlib_s32 slb, - mlib_u8 *dst, mlib_s32 dlb, - mlib_s32 xsize, mlib_s32 ysize, - mlib_s32 cmask); -void -mlib_v_ImageChannelInsert_S16_12_A8D1X4(mlib_s16 *src, - mlib_s16 *dst, - mlib_s32 dsize, - mlib_s32 cmask); -void -mlib_v_ImageChannelInsert_S16_12_A8D2X4(mlib_s16 *src, mlib_s32 slb, - mlib_s16 *dst, mlib_s32 dlb, - mlib_s32 xsize, mlib_s32 ysize, - mlib_s32 cmask); -void -mlib_v_ImageChannelInsert_S16_12_D1(mlib_s16 *src, - mlib_s16 *dst, - mlib_s32 dsize, - mlib_s32 cmask); -void -mlib_v_ImageChannelInsert_S16_12(mlib_s16 *src, mlib_s32 slb, - mlib_s16 *dst, mlib_s32 dlb, - mlib_s32 xsize, mlib_s32 ysize, - mlib_s32 cmask); -void -mlib_v_ImageChannelInsert_S16_13_A8D1X4(mlib_s16 *src, - mlib_s16 *dst, - mlib_s32 dsize, - mlib_s32 cmask); -void -mlib_v_ImageChannelInsert_S16_13_A8D2X4(mlib_s16 *src, mlib_s32 slb, - mlib_s16 *dst, mlib_s32 dlb, - mlib_s32 xsize, mlib_s32 ysize, - mlib_s32 cmask); -void -mlib_v_ImageChannelInsert_S16_13_D1(mlib_s16 *src, - mlib_s16 *dst, - mlib_s32 dsize, - mlib_s32 cmask); -void -mlib_v_ImageChannelInsert_S16_13(mlib_s16 *src, mlib_s32 slb, - mlib_s16 *dst, mlib_s32 dlb, - mlib_s32 xsize, mlib_s32 ysize, - mlib_s32 cmask); -void -mlib_v_ImageChannelInsert_S16_14_A8D1X4(mlib_s16 *src, - mlib_s16 *dst, - mlib_s32 dsize, - mlib_s32 cmask); -void -mlib_v_ImageChannelInsert_S16_14_A8D2X4(mlib_s16 *src, mlib_s32 slb, - mlib_s16 *dst, mlib_s32 dlb, - mlib_s32 xsize, mlib_s32 ysize, - mlib_s32 cmask); -void -mlib_v_ImageChannelInsert_S16_14_D1(mlib_s16 *src, - mlib_s16 *dst, - mlib_s32 dsize, - mlib_s32 cmask); -void -mlib_v_ImageChannelInsert_S16_14(mlib_s16 *src, mlib_s32 slb, - mlib_s16 *dst, mlib_s32 dlb, - mlib_s32 xsize, mlib_s32 ysize, - mlib_s32 cmask); - -/***************************************************************/ -/* functions defined in mlib_v_ImageChannelInsert_34.c */ - -void -mlib_v_ImageChannelInsert_U8_34R_A8D1X8(mlib_u8 *src, - mlib_u8 *dst, - mlib_s32 dsize); -void -mlib_v_ImageChannelInsert_U8_34R_A8D2X8(mlib_u8 *src, mlib_s32 slb, - mlib_u8 *dst, mlib_s32 dlb, - mlib_s32 xsize, mlib_s32 ysize); -void -mlib_v_ImageChannelInsert_U8_34R_D1(mlib_u8 *src, - mlib_u8 *dst, - mlib_s32 dsize); -void -mlib_v_ImageChannelInsert_U8_34R(mlib_u8 *src, mlib_s32 slb, - mlib_u8 *dst, mlib_s32 dlb, - mlib_s32 xsize, mlib_s32 ysize); -void -mlib_v_ImageChannelInsert_S16_34R_A8D1X4(mlib_s16 *src, - mlib_s16 *dst, - mlib_s32 dsize); -void -mlib_v_ImageChannelInsert_S16_34R_A8D2X4(mlib_s16 *src, mlib_s32 slb, - mlib_s16 *dst, mlib_s32 dlb, - mlib_s32 xsize, mlib_s32 ysize); -void -mlib_v_ImageChannelInsert_S16_34R_D1(mlib_s16 *src, - mlib_s16 *dst, - mlib_s32 dsize); -void -mlib_v_ImageChannelInsert_S16_34R(mlib_s16 *src, mlib_s32 slb, - mlib_s16 *dst, mlib_s32 dlb, - mlib_s32 xsize, mlib_s32 ysize); -void -mlib_v_ImageChannelInsert_U8_34L_A8D1X8(mlib_u8 *src, - mlib_u8 *dst, - mlib_s32 dsize); -void -mlib_v_ImageChannelInsert_U8_34L_A8D2X8(mlib_u8 *src, mlib_s32 slb, - mlib_u8 *dst, mlib_s32 dlb, - mlib_s32 xsize, mlib_s32 ysize); -void -mlib_v_ImageChannelInsert_U8_34L_D1(mlib_u8 *src, - mlib_u8 *dst, - mlib_s32 dsize); -void -mlib_v_ImageChannelInsert_U8_34L(mlib_u8 *src, mlib_s32 slb, - mlib_u8 *dst, mlib_s32 dlb, - mlib_s32 xsize, mlib_s32 ysize); -void -mlib_v_ImageChannelInsert_S16_34L_A8D1X4(mlib_s16 *src, - mlib_s16 *dst, - mlib_s32 dsize); -void -mlib_v_ImageChannelInsert_S16_34L_A8D2X4(mlib_s16 *src, mlib_s32 slb, - mlib_s16 *dst, mlib_s32 dlb, - mlib_s32 xsize, mlib_s32 ysize); -void -mlib_v_ImageChannelInsert_S16_34L_D1(mlib_s16 *src, - mlib_s16 *dst, - mlib_s32 dsize); -void -mlib_v_ImageChannelInsert_S16_34L(mlib_s16 *src, mlib_s32 slb, - mlib_s16 *dst, mlib_s32 dlb, - mlib_s32 xsize, mlib_s32 ysize); - - -/***************************************************************/ - -#ifdef MLIB_TEST -mlib_status -mlib_v_ImageChannelInsert(mlib_image *dst, - mlib_image *src, - mlib_s32 cmask) -#else -mlib_status -mlib_ImageChannelInsert(mlib_image *dst, - mlib_image *src, - mlib_s32 cmask) -#endif -{ - const mlib_s32 X8 = 0x7; - const mlib_s32 X4 = 0x3; - const mlib_s32 X2 = 0x1; - const mlib_s32 A8D1 = MLIB_IMAGE_ALIGNED8 | MLIB_IMAGE_ONEDVECTOR; - const mlib_s32 A8D2X8 = MLIB_IMAGE_ALIGNED8 | MLIB_IMAGE_STRIDE8X | MLIB_IMAGE_WIDTH8X; - const mlib_s32 A8D2X4 = MLIB_IMAGE_ALIGNED8 | MLIB_IMAGE_STRIDE8X | MLIB_IMAGE_WIDTH4X; - const mlib_s32 A8D2X2 = MLIB_IMAGE_ALIGNED8 | MLIB_IMAGE_STRIDE8X | MLIB_IMAGE_WIDTH2X; - - void *sp; /* pointer for pixel in src */ - void *dp; /* pointer for pixel in dst */ - mlib_s32 ncmask = 0; /* normalized channel mask */ - mlib_s32 channels; /* number of channels for src */ - mlib_s32 channeld; /* number of channels for dst */ - mlib_s32 width, height;/* for src and dst */ - mlib_s32 strides; /* strides in bytes for src */ - mlib_s32 strided; /* strides in bytes for dst */ - mlib_s32 flags; - mlib_s32 flagd; - mlib_s32 dsize; - int i, bit1count = 0; - - MLIB_IMAGE_CHECK(src); - MLIB_IMAGE_CHECK(dst); - MLIB_IMAGE_TYPE_EQUAL(src,dst); - MLIB_IMAGE_SIZE_EQUAL(src,dst); - - channels = mlib_ImageGetChannels(src); - channeld = mlib_ImageGetChannels(dst); - width = mlib_ImageGetWidth(src); - height = mlib_ImageGetHeight(src); - strides = mlib_ImageGetStride(src); - strided = mlib_ImageGetStride(dst); - sp = mlib_ImageGetData(src); - dp = mlib_ImageGetData(dst); - flags = mlib_ImageGetFlags(src); - flagd = mlib_ImageGetFlags(dst); - dsize = width * height; - - /* normalize the cmask, and count the number of bit with value 1 */ - for (i = (channeld - 1); i >= 0; i--) { - if (((cmask & (1 << i)) != 0) && (bit1count < channels)) { - ncmask += (1 << i); - bit1count++; - } - } - - /* do not support the cases in which the number of selected channels is - * less than the nubmber of channels in the source image */ - if (bit1count < channels) { - return MLIB_FAILURE; - } - - if (((channels == 1) && (channeld == 1)) || - ((channels == 2) && (channeld == 2)) || - ((channels == 3) && (channeld == 3)) || - ((channels == 4) && (channeld == 4))) { - return mlib_ImageCopy(dst, src); - } - - switch (mlib_ImageGetType(src)) { - case MLIB_BYTE: - if (channels == 1) { - switch (channeld) { - case 2: - if (((flags & A8D1) == 0) && - ((flagd & A8D1) == 0) && - ((dsize & X8) == 0)) { - mlib_v_ImageChannelInsert_U8_12_A8D1X8((mlib_u8 *)sp, - (mlib_u8 *)dp, - dsize, - ncmask); - } - else if (((flags & A8D2X8) == 0) && - ((flagd & A8D2X8) == 0)) { - mlib_v_ImageChannelInsert_U8_12_A8D2X8((mlib_u8 *)sp, strides, - (mlib_u8 *)dp, strided, - width, height, - ncmask); - } - else if (((flags & MLIB_IMAGE_ONEDVECTOR) == 0) && - ((flagd & MLIB_IMAGE_ONEDVECTOR) == 0)) { - mlib_v_ImageChannelInsert_U8_12_D1((mlib_u8 *)sp, - (mlib_u8 *)dp, - dsize, - ncmask); - } - else { - mlib_v_ImageChannelInsert_U8_12((mlib_u8 *)sp, strides, - (mlib_u8 *)dp, strided, - width, height, - ncmask); - } - break; - - case 3: - if (((flags & A8D1) == 0) && - ((flagd & A8D1) == 0) && - ((dsize & X8) == 0)) { - mlib_v_ImageChannelInsert_U8_13_A8D1X8((mlib_u8 *)sp, - (mlib_u8 *)dp, - dsize, - ncmask); - } - else if (((flags & A8D2X8) == 0) && - ((flagd & A8D2X8) == 0)) { - mlib_v_ImageChannelInsert_U8_13_A8D2X8((mlib_u8 *)sp, strides, - (mlib_u8 *)dp, strided, - width, height, - ncmask); - } - else if (((flags & MLIB_IMAGE_ONEDVECTOR) == 0) && - ((flagd & MLIB_IMAGE_ONEDVECTOR) == 0)) { - mlib_v_ImageChannelInsert_U8_13_D1((mlib_u8 *)sp, - (mlib_u8 *)dp, - dsize, - ncmask); - } - else { - mlib_v_ImageChannelInsert_U8_13((mlib_u8 *)sp, strides, - (mlib_u8 *)dp, strided, - width, height, - ncmask); - } - break; - - case 4: - if (((flags & A8D1) == 0) && - ((flagd & A8D1) == 0) && - ((dsize & X8) == 0)) { - mlib_v_ImageChannelInsert_U8_14_A8D1X8((mlib_u8 *)sp, - (mlib_u8 *)dp, - dsize, - ncmask); - } - else if (((flags & A8D2X8) == 0) && - ((flagd & A8D2X8) == 0)) { - mlib_v_ImageChannelInsert_U8_14_A8D2X8((mlib_u8 *)sp, strides, - (mlib_u8 *)dp, strided, - width, height, - ncmask); - } - else if (((flags & MLIB_IMAGE_ONEDVECTOR) == 0) && - ((flagd & MLIB_IMAGE_ONEDVECTOR) == 0)) { - mlib_v_ImageChannelInsert_U8_14_D1((mlib_u8 *)sp, - (mlib_u8 *)dp, - dsize, - ncmask); - } - else { - mlib_v_ImageChannelInsert_U8_14((mlib_u8 *)sp, strides, - (mlib_u8 *)dp, strided, - width, height, - ncmask); - } - break; - - default: - return MLIB_FAILURE; - } - } - else { - if ((channels == 3) && (channeld == 4) && (ncmask == 7)) { - if (((flags & A8D1) == 0) && - ((flagd & A8D1) == 0) && - ((dsize & X8) == 0)) { - mlib_v_ImageChannelInsert_U8_34R_A8D1X8((mlib_u8 *)sp, - (mlib_u8 *)dp, - dsize); - } - else if (((flags & A8D2X8) == 0) && - ((flagd & A8D2X8) == 0)) { - mlib_v_ImageChannelInsert_U8_34R_A8D2X8((mlib_u8 *)sp, strides, - (mlib_u8 *)dp, strided, - width, height); - } - else if (((flags & MLIB_IMAGE_ONEDVECTOR) == 0) && - ((flagd & MLIB_IMAGE_ONEDVECTOR) == 0)) { - mlib_v_ImageChannelInsert_U8_34R_D1((mlib_u8 *)sp, - (mlib_u8 *)dp, - dsize); - } - else { - mlib_v_ImageChannelInsert_U8_34R((mlib_u8 *)sp, strides, - (mlib_u8 *)dp, strided, - width, height); - } - } - else if ((channels == 3) && (channeld == 4) && (ncmask == 14)) { - if (((flags & A8D1) == 0) && - ((flagd & A8D1) == 0) && - ((dsize & X8) == 0)) { - mlib_v_ImageChannelInsert_U8_34L_A8D1X8((mlib_u8 *)sp, - (mlib_u8 *)dp, - dsize); - } - else if (((flags & A8D2X8) == 0) && - ((flagd & A8D2X8) == 0)) { - mlib_v_ImageChannelInsert_U8_34L_A8D2X8((mlib_u8 *)sp, strides, - (mlib_u8 *)dp, strided, - width, height); - } - else if (((flags & MLIB_IMAGE_ONEDVECTOR) == 0) && - ((flagd & MLIB_IMAGE_ONEDVECTOR) == 0)) { - mlib_v_ImageChannelInsert_U8_34L_D1((mlib_u8 *)sp, - (mlib_u8 *)dp, - dsize); - } - else mlib_v_ImageChannelInsert_U8_34L((mlib_u8 *)sp, strides, - (mlib_u8 *)dp, strided, - width, height); - } - else { - - mlib_v_ImageChannelInsert_U8((mlib_u8 *)sp, strides, - (mlib_u8 *)dp, strided, - channels, channeld, - width, height, - ncmask); - } - } - break; - - case MLIB_SHORT: - if (channels == 1) { - switch (channeld) { - case 2: - if (((flags & A8D1) == 0) && - ((flagd & A8D1) == 0) && - ((dsize & X4) == 0)) { - mlib_v_ImageChannelInsert_S16_12_A8D1X4((mlib_s16 *)sp, - (mlib_s16 *)dp, - dsize, - ncmask); - } - else if (((flags & A8D2X4) == 0) && - ((flagd & A8D2X4) == 0)) { - mlib_v_ImageChannelInsert_S16_12_A8D2X4((mlib_s16 *)sp, strides, - (mlib_s16 *)dp, strided, - width, height, - ncmask); - } - else if (((flags & MLIB_IMAGE_ONEDVECTOR) == 0) && - ((flagd & MLIB_IMAGE_ONEDVECTOR) == 0)) { - mlib_v_ImageChannelInsert_S16_12_D1((mlib_s16 *)sp, - (mlib_s16 *)dp, - dsize, - ncmask); - } - else { - mlib_v_ImageChannelInsert_S16_12((mlib_s16 *)sp, strides, - (mlib_s16 *)dp, strided, - width, height, - ncmask); - } - break; - - case 3: - if (((flags & A8D1) == 0) && - ((flagd & A8D1) == 0) && - ((dsize & X4) == 0)) { - mlib_v_ImageChannelInsert_S16_13_A8D1X4((mlib_s16 *)sp, - (mlib_s16 *)dp, - dsize, - ncmask); - } - else if (((flags & A8D2X4) == 0) && - ((flagd & A8D2X4) == 0)) { - mlib_v_ImageChannelInsert_S16_13_A8D2X4((mlib_s16 *)sp, strides, - (mlib_s16 *)dp, strided, - width, height, - ncmask); - } - else if (((flags & MLIB_IMAGE_ONEDVECTOR) == 0) && - ((flagd & MLIB_IMAGE_ONEDVECTOR) == 0)) { - mlib_v_ImageChannelInsert_S16_13_D1((mlib_s16 *)sp, - (mlib_s16 *)dp, - dsize, - ncmask); - } - else { - mlib_v_ImageChannelInsert_S16_13((mlib_s16 *)sp, strides, - (mlib_s16 *)dp, strided, - width, height, - ncmask); - } - break; - - case 4: - if (((flags & A8D1) == 0) && - ((flagd & A8D1) == 0) && - ((dsize & X4) == 0)) { - mlib_v_ImageChannelInsert_S16_14_A8D1X4((mlib_s16 *)sp, - (mlib_s16 *)dp, - dsize, - ncmask); - } - else if (((flags & A8D2X4) == 0) && - ((flagd & A8D2X4) == 0)) { - mlib_v_ImageChannelInsert_S16_14_A8D2X4((mlib_s16 *)sp, strides, - (mlib_s16 *)dp, strided, - width, height, - ncmask); - } - else if (((flags & MLIB_IMAGE_ONEDVECTOR) == 0) && - ((flagd & MLIB_IMAGE_ONEDVECTOR) == 0)) { - mlib_v_ImageChannelInsert_S16_14_D1((mlib_s16 *)sp, - (mlib_s16 *)dp, - dsize, - ncmask); - } - else { - mlib_v_ImageChannelInsert_S16_14((mlib_s16 *)sp, strides, - (mlib_s16 *)dp, strided, - width, height, - ncmask); - } - break; - default: - return MLIB_FAILURE; - } - } - else if ((channels == 3) && (channeld == 4) && (ncmask == 7)) { - if (((flags & A8D1) == 0) && - ((flagd & A8D1) == 0) && - ((dsize & X4) == 0)) { - mlib_v_ImageChannelInsert_S16_34R_A8D1X4((mlib_s16 *)sp, - (mlib_s16 *)dp, - dsize); - } - else if (((flags & A8D2X4) == 0) && - ((flagd & A8D2X4) == 0)) { - mlib_v_ImageChannelInsert_S16_34R_A8D2X4((mlib_s16 *)sp, strides, - (mlib_s16 *)dp, strided, - width, height); - } - else if (((flags & MLIB_IMAGE_ONEDVECTOR) == 0) && - ((flagd & MLIB_IMAGE_ONEDVECTOR) == 0)) { - mlib_v_ImageChannelInsert_S16_34R_D1((mlib_s16 *)sp, - (mlib_s16 *)dp, - dsize); - } - else { - mlib_v_ImageChannelInsert_S16_34R((mlib_s16 *)sp, strides, - (mlib_s16 *)dp, strided, - width, height); - } - } - else if ((channels == 3) && (channeld == 4) && (ncmask == 14)) { - if (((flags & A8D1) == 0) && - ((flagd & A8D1) == 0) && - ((dsize & X4) == 0)) { - mlib_v_ImageChannelInsert_S16_34L_A8D1X4((mlib_s16 *)sp, - (mlib_s16 *)dp, - dsize); - } - else if (((flags & A8D2X4) == 0) && - ((flagd & A8D2X4) == 0)) { - mlib_v_ImageChannelInsert_S16_34L_A8D2X4((mlib_s16 *)sp, strides, - (mlib_s16 *)dp, strided, - width, height); - } - else if (((flags & MLIB_IMAGE_ONEDVECTOR) == 0) && - ((flagd & MLIB_IMAGE_ONEDVECTOR) == 0)) { - mlib_v_ImageChannelInsert_S16_34L_D1((mlib_s16 *)sp, - (mlib_s16 *)dp, - dsize); - } - else { - mlib_v_ImageChannelInsert_S16_34L((mlib_s16 *)sp, strides, - (mlib_s16 *)dp, strided, - width, height); - } - } - else { - mlib_v_ImageChannelInsert_S16((mlib_s16 *)sp, strides, - (mlib_s16 *)dp, strided, - channels, channeld, - width, height, - ncmask); - } - break; - - case MLIB_INT: - mlib_v_ImageChannelInsert_S32((mlib_s32 *)sp, strides, - (mlib_s32 *)dp, strided, - channels, channeld, - width, height, - ncmask); - break; - - case MLIB_FLOAT: - mlib_v_ImageChannelInsert_S32((mlib_s32 *)sp, strides, - (mlib_s32 *)dp, strided, - channels, channeld, - width, height, - ncmask); - break; - - - case MLIB_DOUBLE: - mlib_v_ImageChannelInsert_D64((mlib_d64 *)sp, strides, - (mlib_d64 *)dp, strided, - channels, channeld, - width, height, - ncmask); - break; - - - case MLIB_BIT: - default: - return MLIB_FAILURE; /* MLIB_BIT is not supported here */ - } - - return MLIB_SUCCESS; -} -/***************************************************************/ diff -r 3e22d8fd4912 -r acea5f7d354b jdk/src/java.desktop/unix/native/libmlib_image/mlib_v_ImageChannelInsert.h --- a/jdk/src/java.desktop/unix/native/libmlib_image/mlib_v_ImageChannelInsert.h Thu May 12 11:03:07 2016 -0700 +++ b/jdk/src/java.desktop/unix/native/libmlib_image/mlib_v_ImageChannelInsert.h Fri May 13 11:31:05 2016 +0300 @@ -32,290 +32,21 @@ extern "C" { #endif /* __cplusplus */ -void mlib_v_ImageChannelInsert_U8(const mlib_u8 *src, - mlib_s32 slb, - mlib_u8 *dst, - mlib_s32 dlb, - mlib_s32 channels, - mlib_s32 channeld, - mlib_s32 width, - mlib_s32 height, - mlib_s32 cmask); - -void mlib_v_ImageChannelInsert_D64(const mlib_d64 *src, - mlib_s32 slb, - mlib_d64 *dst, - mlib_s32 dlb, - mlib_s32 channels, - mlib_s32 channeld, - mlib_s32 width, - mlib_s32 height, - mlib_s32 cmask); - -void mlib_v_ImageChannelInsert_S16(const mlib_s16 *src, - mlib_s32 slb, - mlib_s16 *dst, - mlib_s32 dlb, - mlib_s32 channels, - mlib_s32 channeld, - mlib_s32 width, - mlib_s32 height, - mlib_s32 cmask); - -void mlib_v_ImageChannelInsert_S32(const mlib_s32 *src, - mlib_s32 slb, - mlib_s32 *dst, - mlib_s32 dlb, - mlib_s32 channels, - mlib_s32 channeld, - mlib_s32 width, - mlib_s32 height, - mlib_s32 cmask); - -void mlib_v_ImageChannelInsert_U8_12_A8D1X8(const mlib_u8 *src, - mlib_u8 *dst, - mlib_s32 dsize, - mlib_s32 cmask); - -void mlib_v_ImageChannelInsert_U8_12_A8D2X8(const mlib_u8 *src, - mlib_s32 slb, - mlib_u8 *dst, - mlib_s32 dlb, - mlib_s32 xsize, - mlib_s32 ysize, - mlib_s32 cmask); - void mlib_v_ImageChannelInsert_U8_12_D1(const mlib_u8 *src, mlib_u8 *dst, mlib_s32 dsize, mlib_s32 cmask); -void mlib_v_ImageChannelInsert_U8_12(const mlib_u8 *src, - mlib_s32 slb, - mlib_u8 *dst, - mlib_s32 dlb, - mlib_s32 xsize, - mlib_s32 ysize, - mlib_s32 cmask); - -void mlib_v_ImageChannelInsert_U8_13_A8D1X8(const mlib_u8 *src, - mlib_u8 *dst, - mlib_s32 dsize, - mlib_s32 cmask); - -void mlib_v_ImageChannelInsert_U8_13_A8D2X8(const mlib_u8 *src, - mlib_s32 slb, - mlib_u8 *dst, - mlib_s32 dlb, - mlib_s32 xsize, - mlib_s32 ysize, - mlib_s32 cmask); - void mlib_v_ImageChannelInsert_U8_13_D1(const mlib_u8 *src, mlib_u8 *dst, mlib_s32 dsize, mlib_s32 cmask); -void mlib_v_ImageChannelInsert_U8_13(const mlib_u8 *src, - mlib_s32 slb, - mlib_u8 *dst, - mlib_s32 dlb, - mlib_s32 xsize, - mlib_s32 ysize, - mlib_s32 cmask); - -void mlib_v_ImageChannelInsert_U8_14_A8D1X8(const mlib_u8 *src, - mlib_u8 *dst, - mlib_s32 dsize, - mlib_s32 cmask); - -void mlib_v_ImageChannelInsert_U8_14_A8D2X8(const mlib_u8 *src, - mlib_s32 slb, - mlib_u8 *dst, - mlib_s32 dlb, - mlib_s32 xsize, - mlib_s32 ysize, - mlib_s32 cmask); - void mlib_v_ImageChannelInsert_U8_14_D1(const mlib_u8 *src, mlib_u8 *dst, mlib_s32 dsize, mlib_s32 cmask); -void mlib_v_ImageChannelInsert_U8_14(const mlib_u8 *src, - mlib_s32 slb, - mlib_u8 *dst, - mlib_s32 dlb, - mlib_s32 xsize, - mlib_s32 ysize, - mlib_s32 cmask); - -void mlib_v_ImageChannelInsert_S16_12_A8D1X4(const mlib_s16 *src, - mlib_s16 *dst, - mlib_s32 dsize, - mlib_s32 cmask); - -void mlib_v_ImageChannelInsert_S16_12_A8D2X4(const mlib_s16 *src, - mlib_s32 slb, - mlib_s16 *dst, - mlib_s32 dlb, - mlib_s32 xsize, - mlib_s32 ysize, - mlib_s32 cmask); - -void mlib_v_ImageChannelInsert_S16_12_D1(const mlib_s16 *src, - mlib_s16 *dst, - mlib_s32 dsize, - mlib_s32 cmask); - -void mlib_v_ImageChannelInsert_S16_12(const mlib_s16 *src, - mlib_s32 slb, - mlib_s16 *dst, - mlib_s32 dlb, - mlib_s32 xsize, - mlib_s32 ysize, - mlib_s32 cmask); - -void mlib_v_ImageChannelInsert_S16_13_A8D1X4(const mlib_s16 *src, - mlib_s16 *dst, - mlib_s32 dsize, - mlib_s32 cmask); - -void mlib_v_ImageChannelInsert_S16_13_A8D2X4(const mlib_s16 *src, - mlib_s32 slb, - mlib_s16 *dst, - mlib_s32 dlb, - mlib_s32 xsize, - mlib_s32 ysize, - mlib_s32 cmask); - -void mlib_v_ImageChannelInsert_S16_13_D1(const mlib_s16 *src, - mlib_s16 *dst, - mlib_s32 dsize, - mlib_s32 cmask); - -void mlib_v_ImageChannelInsert_S16_13(const mlib_s16 *src, - mlib_s32 slb, - mlib_s16 *dst, - mlib_s32 dlb, - mlib_s32 xsize, - mlib_s32 ysize, - mlib_s32 cmask); - -void mlib_v_ImageChannelInsert_S16_14_A8D1X4(const mlib_s16 *src, - mlib_s16 *dst, - mlib_s32 dsize, - mlib_s32 cmask); - -void mlib_v_ImageChannelInsert_S16_14_A8D2X4(const mlib_s16 *src, - mlib_s32 slb, - mlib_s16 *dst, - mlib_s32 dlb, - mlib_s32 xsize, - mlib_s32 ysize, - mlib_s32 cmask); - -void mlib_v_ImageChannelInsert_S16_14_D1(const mlib_s16 *src, - mlib_s16 *dst, - mlib_s32 dsize, - mlib_s32 cmask); - -void mlib_v_ImageChannelInsert_S16_14(const mlib_s16 *src, - mlib_s32 slb, - mlib_s16 *dst, - mlib_s32 dlb, - mlib_s32 xsize, - mlib_s32 ysize, - mlib_s32 cmask); - -void mlib_v_ImageChannelInsert_U8_34R_A8D1X8(const mlib_u8 *src, - mlib_u8 *dst, - mlib_s32 dsize); - -void mlib_v_ImageChannelInsert_U8_34R_A8D2X8(const mlib_u8 *src, - mlib_s32 slb, - mlib_u8 *dst, - mlib_s32 dlb, - mlib_s32 xsize, - mlib_s32 ysize); - -void mlib_v_ImageChannelInsert_U8_34R_D1(const mlib_u8 *src, - mlib_u8 *dst, - mlib_s32 dsize); - -void mlib_v_ImageChannelInsert_U8_34R(const mlib_u8 *src, - mlib_s32 slb, - mlib_u8 *dst, - mlib_s32 dlb, - mlib_s32 xsize, - mlib_s32 ysize); - -void mlib_v_ImageChannelInsert_S16_34R_A8D1X4(const mlib_s16 *src, - mlib_s16 *dst, - mlib_s32 dsize); - -void mlib_v_ImageChannelInsert_S16_34R_A8D2X4(const mlib_s16 *src, - mlib_s32 slb, - mlib_s16 *dst, - mlib_s32 dlb, - mlib_s32 xsize, - mlib_s32 ysize); - -void mlib_v_ImageChannelInsert_S16_34R_D1(const mlib_s16 *src, - mlib_s16 *dst, - mlib_s32 dsize); - -void mlib_v_ImageChannelInsert_S16_34R(const mlib_s16 *src, - mlib_s32 slb, - mlib_s16 *dst, - mlib_s32 dlb, - mlib_s32 xsize, - mlib_s32 ysize); - -void mlib_v_ImageChannelInsert_U8_34L_A8D1X8(const mlib_u8 *src, - mlib_u8 *dst, - mlib_s32 dsize); - -void mlib_v_ImageChannelInsert_U8_34L_A8D2X8(const mlib_u8 *src, - mlib_s32 slb, - mlib_u8 *dst, - mlib_s32 dlb, - mlib_s32 xsize, - mlib_s32 ysize); - -void mlib_v_ImageChannelInsert_U8_34L_D1(const mlib_u8 *src, - mlib_u8 *dst, - mlib_s32 dsize); - -void mlib_v_ImageChannelInsert_U8_34L(const mlib_u8 *src, - mlib_s32 slb, - mlib_u8 *dst, - mlib_s32 dlb, - mlib_s32 xsize, - mlib_s32 ysize); - -void mlib_v_ImageChannelInsert_S16_34L_A8D1X4(const mlib_s16 *src, - mlib_s16 *dst, - mlib_s32 dsize); - -void mlib_v_ImageChannelInsert_S16_34L_A8D2X4(const mlib_s16 *src, - mlib_s32 slb, - mlib_s16 *dst, - mlib_s32 dlb, - mlib_s32 xsize, - mlib_s32 ysize); - -void mlib_v_ImageChannelInsert_S16_34L_D1(const mlib_s16 *src, - mlib_s16 *dst, - mlib_s32 dsize); - -void mlib_v_ImageChannelInsert_S16_34L(const mlib_s16 *src, - mlib_s32 slb, - mlib_s16 *dst, - mlib_s32 dlb, - mlib_s32 xsize, - mlib_s32 ysize); - #ifdef __cplusplus } #endif /* __cplusplus */ diff -r 3e22d8fd4912 -r acea5f7d354b jdk/src/java.desktop/unix/native/libmlib_image/mlib_v_ImageChannelInsert_1.c --- a/jdk/src/java.desktop/unix/native/libmlib_image/mlib_v_ImageChannelInsert_1.c Thu May 12 11:03:07 2016 -0700 +++ b/jdk/src/java.desktop/unix/native/libmlib_image/mlib_v_ImageChannelInsert_1.c Fri May 13 11:31:05 2016 +0300 @@ -27,34 +27,9 @@ /* * FUNCTIONS - * mlib_v_ImageChannelInsert_U8 - * mlib_v_ImageChannelInsert_U8_12_A8D1X8 - * mlib_v_ImageChannelInsert_U8_12_A8D2X8 * mlib_v_ImageChannelInsert_U8_12_D1 - * mlib_v_ImageChannelInsert_U8_12 - * mlib_v_ImageChannelInsert_U8_13_A8D1X8 - * mlib_v_ImageChannelInsert_U8_13_A8D2X8 * mlib_v_ImageChannelInsert_U8_13_D1 - * mlib_v_ImageChannelInsert_U8_13 - * mlib_v_ImageChannelInsert_U8_14_A8D1X8 - * mlib_v_ImageChannelInsert_U8_14_A8D2X8 * mlib_v_ImageChannelInsert_U8_14_D1 - * mlib_v_ImageChannelInsert_U8_14 - * mlib_v_ImageChannelInsert_S16 - * mlib_v_ImageChannelInsert_S16_12_A8D1X4 - * mlib_v_ImageChannelInsert_S16_12_A8D2X4 - * mlib_v_ImageChannelInsert_S16_12_D1 - * mlib_v_ImageChannelInsert_S16_12 - * mlib_v_ImageChannelInsert_S16_13_A8D1X4 - * mlib_v_ImageChannelInsert_S16_13_A8D2X4 - * mlib_v_ImageChannelInsert_S16_13_D1 - * mlib_v_ImageChannelInsert_S16_13 - * mlib_v_ImageChannelInsert_S16_14_A8D1X4 - * mlib_v_ImageChannelInsert_S16_14_A8D2X4 - * mlib_v_ImageChannelInsert_S16_14_D1 - * mlib_v_ImageChannelInsert_S16_14 - * mlib_v_ImageChannelInsert_S32 - * mlib_v_ImageChannelInsert_D64 * * ARGUMENT * src pointer to source image data @@ -80,424 +55,12 @@ #include "mlib_v_ImageChannelInsert.h" /***************************************************************/ -/* general channel insertion: slower due to the inner loop */ -void mlib_v_ImageChannelInsert_U8(const mlib_u8 *src, - mlib_s32 slb, - mlib_u8 *dst, - mlib_s32 dlb, - mlib_s32 channels, - mlib_s32 channeld, - mlib_s32 width, - mlib_s32 height, - mlib_s32 cmask) -{ - mlib_u8 *sp; /* pointer for pixel in src */ - mlib_u8 *sl; /* pointer for line in src */ - mlib_u8 *dp; /* pointer for pixel in dst */ - mlib_u8 *dl; /* pointer for line in dst */ - mlib_s32 i, j, k; /* indices for x, y, channel */ - mlib_s32 deltac[5] = { 0, 1, 1, 1, 1 }; - mlib_s32 inc0, inc1, inc2; - mlib_u8 s0, s1, s2; - - deltac[channels] = 1; - for (i = (channeld - 1), k = 0; i >= 0; i--) { - if ((cmask & (1 << i)) == 0) - deltac[k]++; - else - k++; - } - - deltac[channels] = channeld; - for (i = 1; i < channels; i++) { - deltac[channels] -= deltac[i]; - } - - sp = sl = (void *)src; - dp = dl = dst + deltac[0]; - - if (channels == 2) { - inc0 = deltac[1]; - inc1 = deltac[2] + inc0; - for (j = 0; j < height; j++) { -#pragma pipeloop(0) - for (i = 0; i < width; i++) { - s0 = sp[0]; - s1 = sp[1]; - dp[0] = s0; - dp[inc0] = s1; - dp += inc1; - sp += 2; - } - - sp = sl += slb; - dp = dl += dlb; - } - } - else if (channels == 3) { - inc0 = deltac[1]; - inc1 = deltac[2] + inc0; - inc2 = deltac[3] + inc1; - for (j = 0; j < height; j++) { -#pragma pipeloop(0) - for (i = 0; i < width; i++) { - s0 = sp[0]; - s1 = sp[1]; - s2 = sp[2]; - dp[0] = s0; - dp[inc0] = s1; - dp[inc1] = s2; - dp += inc2; - sp += 3; - } - - sp = sl += slb; - dp = dl += dlb; - } - } -} - -/***************************************************************/ -/* general channel insertion: slower due to the inner loop */ -void mlib_v_ImageChannelInsert_D64(const mlib_d64 *src, - mlib_s32 slb, - mlib_d64 *dst, - mlib_s32 dlb, - mlib_s32 channels, - mlib_s32 channeld, - mlib_s32 width, - mlib_s32 height, - mlib_s32 cmask) -{ - mlib_d64 *sp; /* pointer for pixel in src */ - mlib_d64 *sl; /* pointer for line in src */ - mlib_d64 *dp; /* pointer for pixel in dst */ - mlib_d64 *dl; /* pointer for line in dst */ - mlib_s32 i, j, k; /* indices for x, y, channel */ - mlib_s32 deltac[5] = { 0, 1, 1, 1, 1 }; - mlib_s32 inc0, inc1, inc2; - mlib_d64 s0, s1, s2; - - deltac[channels] = 1; - for (i = (channeld - 1), k = 0; i >= 0; i--) { - if ((cmask & (1 << i)) == 0) - deltac[k]++; - else - k++; - } - - deltac[channels] = channeld; - for (i = 1; i < channels; i++) { - deltac[channels] -= deltac[i]; - } - - sp = sl = (void *)src; - dp = dl = dst + deltac[0]; - - if (channels == 1) { - for (j = 0; j < height; j++) { -#pragma pipeloop(0) - for (i = 0; i < width; i++) { - s0 = sp[0]; - dp[0] = s0; - dp += channeld; - sp++; - } - - sp = sl = (mlib_d64 *) ((mlib_u8 *) sl + slb); - dp = dl = (mlib_d64 *) ((mlib_u8 *) dl + dlb); - } - } - else if (channels == 2) { - inc0 = deltac[1]; - inc1 = deltac[2] + inc0; - for (j = 0; j < height; j++) { -#pragma pipeloop(0) - for (i = 0; i < width; i++) { - s0 = sp[0]; - s1 = sp[1]; - dp[0] = s0; - dp[inc0] = s1; - dp += inc1; - sp += 2; - } - - sp = sl = (mlib_d64 *) ((mlib_u8 *) sl + slb); - dp = dl = (mlib_d64 *) ((mlib_u8 *) dl + dlb); - } - } - else if (channels == 3) { - inc0 = deltac[1]; - inc1 = deltac[2] + inc0; - inc2 = deltac[3] + inc1; - for (j = 0; j < height; j++) { -#pragma pipeloop(0) - for (i = 0; i < width; i++) { - s0 = sp[0]; - s1 = sp[1]; - s2 = sp[2]; - dp[0] = s0; - dp[inc0] = s1; - dp[inc1] = s2; - dp += inc2; - sp += 3; - } - - sp = sl = (mlib_d64 *) ((mlib_u8 *) sl + slb); - dp = dl = (mlib_d64 *) ((mlib_u8 *) dl + dlb); - } - } -} - -/***************************************************************/ -/* general channel insertion: slower due to the inner loop */ -void mlib_v_ImageChannelInsert_S16(const mlib_s16 *src, - mlib_s32 slb, - mlib_s16 *dst, - mlib_s32 dlb, - mlib_s32 channels, - mlib_s32 channeld, - mlib_s32 width, - mlib_s32 height, - mlib_s32 cmask) -{ - mlib_s16 *sp; /* pointer for pixel in src */ - mlib_s16 *sl; /* pointer for line in src */ - mlib_s16 *dp; /* pointer for pixel in dst */ - mlib_s16 *dl; /* pointer for line in dst */ - mlib_s32 i, j, k; /* indices for x, y, channel */ - mlib_s32 deltac[5] = { 0, 1, 1, 1, 1 }; - mlib_s32 inc0, inc1, inc2; - mlib_s16 s0, s1, s2; - - deltac[channels] = 1; - for (i = (channeld - 1), k = 0; i >= 0; i--) { - if ((cmask & (1 << i)) == 0) - deltac[k]++; - else - k++; - } - - deltac[channels] = channeld; - for (i = 1; i < channels; i++) { - deltac[channels] -= deltac[i]; - } - - sp = sl = (void *)src; - dp = dl = dst + deltac[0]; - - if (channels == 2) { - inc0 = deltac[1]; - inc1 = deltac[2] + inc0; - for (j = 0; j < height; j++) { -#pragma pipeloop(0) - for (i = 0; i < width; i++) { - s0 = sp[0]; - s1 = sp[1]; - dp[0] = s0; - dp[inc0] = s1; - dp += inc1; - sp += 2; - } - - sp = sl = (mlib_s16 *) ((mlib_u8 *) sl + slb); - dp = dl = (mlib_s16 *) ((mlib_u8 *) dl + dlb); - } - } - else if (channels == 3) { - inc0 = deltac[1]; - inc1 = deltac[2] + inc0; - inc2 = deltac[3] + inc1; - for (j = 0; j < height; j++) { -#pragma pipeloop(0) - for (i = 0; i < width; i++) { - s0 = sp[0]; - s1 = sp[1]; - s2 = sp[2]; - dp[0] = s0; - dp[inc0] = s1; - dp[inc1] = s2; - dp += inc2; - sp += 3; - } - - sp = sl = (mlib_s16 *) ((mlib_u8 *) sl + slb); - dp = dl = (mlib_s16 *) ((mlib_u8 *) dl + dlb); - } - } -} - -/***************************************************************/ -/* general channel insertion: slower due to the inner loop */ - -void mlib_v_ImageChannelInsert_S32(const mlib_s32 *src, - mlib_s32 slb, - mlib_s32 *dst, - mlib_s32 dlb, - mlib_s32 channels, - mlib_s32 channeld, - mlib_s32 width, - mlib_s32 height, - mlib_s32 cmask) -{ - mlib_s32 *sp; /* pointer for pixel in src */ - mlib_s32 *sl; /* pointer for line in src */ - mlib_s32 *dp; /* pointer for pixel in dst */ - mlib_s32 *dl; /* pointer for line in dst */ - mlib_s32 i, j, k; /* indices for x, y, channel */ - mlib_s32 deltac[5] = { 0, 1, 1, 1, 1 }; - mlib_s32 inc0, inc1, inc2; - mlib_s32 s0, s1, s2; - - deltac[channels] = 1; - for (i = (channeld - 1), k = 0; i >= 0; i--) { - if ((cmask & (1 << i)) == 0) - deltac[k]++; - else - k++; - } - - deltac[channels] = channeld; - for (i = 1; i < channels; i++) { - deltac[channels] -= deltac[i]; - } - - sp = sl = (void *)src; - dp = dl = dst + deltac[0]; - - if (channels == 1) { - for (j = 0; j < height; j++) { -#pragma pipeloop(0) - for (i = 0; i < width; i++) { - s0 = sp[0]; - dp[0] = s0; - dp += channeld; - sp++; - } - - sp = sl = (mlib_s32 *) ((mlib_u8 *) sl + slb); - dp = dl = (mlib_s32 *) ((mlib_u8 *) dl + dlb); - } - } - else if (channels == 2) { - inc0 = deltac[1]; - inc1 = deltac[2] + inc0; - for (j = 0; j < height; j++) { -#pragma pipeloop(0) - for (i = 0; i < width; i++) { - s0 = sp[0]; - s1 = sp[1]; - dp[0] = s0; - dp[inc0] = s1; - dp += inc1; - sp += 2; - } - - sp = sl = (mlib_s32 *) ((mlib_u8 *) sl + slb); - dp = dl = (mlib_s32 *) ((mlib_u8 *) dl + dlb); - } - } - else if (channels == 3) { - inc0 = deltac[1]; - inc1 = deltac[2] + inc0; - inc2 = deltac[3] + inc1; - for (j = 0; j < height; j++) { -#pragma pipeloop(0) - for (i = 0; i < width; i++) { - s0 = sp[0]; - s1 = sp[1]; - s2 = sp[2]; - dp[0] = s0; - dp[inc0] = s1; - dp[inc1] = s2; - dp += inc2; - sp += 3; - } - - sp = sl = (mlib_s32 *) ((mlib_u8 *) sl + slb); - dp = dl = (mlib_s32 *) ((mlib_u8 *) dl + dlb); - } - } -} - -/***************************************************************/ #define INSERT_U8_12(sd0, dd0, dd1) /* channel duplicate */ \ dd0 = vis_fpmerge(vis_read_hi(sd0), vis_read_hi(sd0)); \ dd1 = vis_fpmerge(vis_read_lo(sd0), vis_read_lo(sd0)) /***************************************************************/ /* insert one channel to a 2-channel image. - * both source and destination image data are 8-byte aligned. - * dsize is multiple of 8. - */ - -void mlib_v_ImageChannelInsert_U8_12_A8D1X8(const mlib_u8 *src, - mlib_u8 *dst, - mlib_s32 dsize, - mlib_s32 cmask) -{ - mlib_d64 *sp, *dp; - mlib_d64 sd0; - mlib_d64 dd0, dd1; - mlib_s32 bmask; - mlib_s32 i; - - bmask = cmask | (cmask << 2) | (cmask << 4) | (cmask << 6); - - sp = (mlib_d64 *) src; - dp = (mlib_d64 *) dst; - -#pragma pipeloop(0) - for (i = 0; i < dsize / 8; i++) { - sd0 = *sp++; - INSERT_U8_12(sd0, dd0, dd1); - vis_pst_8(dd0, dp++, bmask); - vis_pst_8(dd1, dp++, bmask); - } -} - -/***************************************************************/ -/* insert one channel to a 2-channel image. - * both source and destination image data are 8-byte aligned. - * xsize is multiple of 8. - */ - -void mlib_v_ImageChannelInsert_U8_12_A8D2X8(const mlib_u8 *src, - mlib_s32 slb, - mlib_u8 *dst, - mlib_s32 dlb, - mlib_s32 xsize, - mlib_s32 ysize, - mlib_s32 cmask) -{ - mlib_d64 *sp, *dp; - mlib_d64 *sl, *dl; - mlib_d64 sd0; - mlib_d64 dd0, dd1; - mlib_s32 bmask; - mlib_s32 i, j; - - bmask = cmask | (cmask << 2) | (cmask << 4) | (cmask << 6); - - sp = sl = (mlib_d64 *) src; - dp = dl = (mlib_d64 *) dst; - - for (j = 0; j < ysize; j++) { -#pragma pipeloop(0) - for (i = 0; i < xsize / 8; i++) { - sd0 = *sp++; - INSERT_U8_12(sd0, dd0, dd1); - vis_pst_8(dd0, dp++, bmask); - vis_pst_8(dd1, dp++, bmask); - } - - sp = sl = (mlib_d64 *) ((mlib_u8 *) sl + slb); - dp = dl = (mlib_d64 *) ((mlib_u8 *) dl + dlb); - } -} - -/***************************************************************/ -/* insert one channel to a 2-channel image. */ void mlib_v_ImageChannelInsert_U8_12_D1(const mlib_u8 *src, @@ -724,56 +287,6 @@ } /***************************************************************/ -/* insert one channel to a 2-channel image. - */ - -void mlib_v_ImageChannelInsert_U8_12(const mlib_u8 *src, - mlib_s32 slb, - mlib_u8 *dst, - mlib_s32 dlb, - mlib_s32 xsize, - mlib_s32 ysize, - mlib_s32 cmask) -{ - mlib_u8 *sa, *da; - mlib_u8 *sl, *dl; - mlib_s32 j; - - sa = sl = (void *)src; - da = dl = dst; - -#pragma pipeloop(0) - for (j = 0; j < ysize; j++) { - mlib_v_ImageChannelInsert_U8_12_D1(sa, da, xsize, cmask); - sa = sl += slb; - da = dl += dlb; - } -} - -/***************************************************************/ -#define INSERT_U8_13(sd0, dd0, dd1, dd2) \ - sda = vis_fpmerge(vis_read_hi(sd0), vis_read_lo(sd0)); \ - sdb = vis_fpmerge(vis_read_hi(sda), vis_read_lo(sda)); \ - sdc = vis_fpmerge(vis_read_hi(sdb), vis_read_hi(sdb)); \ - sdd = vis_fpmerge(vis_read_lo(sdb), vis_read_lo(sdb)); \ - dd0 = vis_fpmerge(vis_read_hi(sdc), vis_read_hi(sdd)); \ - sde = vis_fpmerge(vis_read_lo(sdc), vis_read_lo(sdd)); \ - dd1 = vis_freg_pair(vis_read_lo(dd0), vis_read_hi(sde)); \ - dd2 = vis_freg_pair(vis_read_lo(sde), vis_read_lo(sde)) - -/***************************************************************/ -#define LOAD_INSERT_STORE_U8_A8(channeld) \ - sd = *sp++; \ - vis_st_u8(sd = vis_faligndata(sd, sd), da); da += channeld; \ - vis_st_u8(sd = vis_faligndata(sd, sd), da); da += channeld; \ - vis_st_u8(sd = vis_faligndata(sd, sd), da); da += channeld; \ - vis_st_u8(sd = vis_faligndata(sd, sd), da); da += channeld; \ - vis_st_u8(sd = vis_faligndata(sd, sd), da); da += channeld; \ - vis_st_u8(sd = vis_faligndata(sd, sd), da); da += channeld; \ - vis_st_u8(sd = vis_faligndata(sd, sd), da); da += channeld; \ - vis_st_u8(sd = vis_faligndata(sd, sd), da); da += channeld - -/***************************************************************/ #define LOAD_INSERT_STORE_U8(channeld) \ vis_alignaddr((void *)0, off); \ sd0 = sd1; \ @@ -790,58 +303,6 @@ vis_st_u8(sd = vis_faligndata(sd, sd), da); da += channeld /***************************************************************/ -void mlib_v_ImageChannelInsert_U8_13_A8D1X8(const mlib_u8 *src, - mlib_u8 *dst, - mlib_s32 dsize, - mlib_s32 cmask) -{ - mlib_u8 *da; - mlib_d64 *sp; - mlib_d64 sd; - mlib_s32 i; - - vis_alignaddr((void *)0, 1); /* for 1-byte left shift */ - - sp = (mlib_d64 *) src; - da = dst + (2 / cmask); /* 4,2,1 -> 0,1,2 */ - -#pragma pipeloop(0) - for (i = 0; i < dsize / 8; i++) { - LOAD_INSERT_STORE_U8_A8(3); - } -} - -/***************************************************************/ -void mlib_v_ImageChannelInsert_U8_13_A8D2X8(const mlib_u8 *src, - mlib_s32 slb, - mlib_u8 *dst, - mlib_s32 dlb, - mlib_s32 xsize, - mlib_s32 ysize, - mlib_s32 cmask) -{ - mlib_u8 *da, *dl; - mlib_d64 *sp, *sl; - mlib_d64 sd; - mlib_s32 i, j; - - vis_alignaddr((void *)0, 1); - - sp = sl = (mlib_d64 *) src; - da = dl = dst + (2 / cmask); /* 4,2,1 -> 0,1,2 */ - - for (j = 0; j < ysize; j++) { -#pragma pipeloop(0) - for (i = 0; i < xsize / 8; i++) { - LOAD_INSERT_STORE_U8_A8(3); - } - - sp = sl = (mlib_d64 *) ((mlib_u8 *) sl + slb); - da = dl = (mlib_u8 *) ((mlib_u8 *) dl + dlb); - } -} - -/***************************************************************/ void mlib_v_ImageChannelInsert_U8_13_D1(const mlib_u8 *src, mlib_u8 *dst, mlib_s32 dsize, @@ -908,30 +369,6 @@ } /***************************************************************/ -void mlib_v_ImageChannelInsert_U8_13(const mlib_u8 *src, - mlib_s32 slb, - mlib_u8 *dst, - mlib_s32 dlb, - mlib_s32 xsize, - mlib_s32 ysize, - mlib_s32 cmask) -{ - mlib_u8 *sa, *da; - mlib_u8 *sl, *dl; - mlib_s32 j; - - sa = sl = (void *)src; - da = dl = dst; - -#pragma pipeloop(0) - for (j = 0; j < ysize; j++) { - mlib_v_ImageChannelInsert_U8_13_D1(sa, da, xsize, cmask); - sa = sl += slb; - da = dl += dlb; - } -} - -/***************************************************************/ #define INSERT_U8_14(sd0, dd0, dd1, dd2, dd3) \ sda = vis_fpmerge(vis_read_hi(sd0), vis_read_hi(sd0)); \ sdb = vis_fpmerge(vis_read_lo(sd0), vis_read_lo(sd0)); \ @@ -941,73 +378,6 @@ dd3 = vis_fpmerge(vis_read_lo(sdb), vis_read_lo(sdb)) /***************************************************************/ -void mlib_v_ImageChannelInsert_U8_14_A8D1X8(const mlib_u8 *src, - mlib_u8 *dst, - mlib_s32 dsize, - mlib_s32 cmask) -{ - mlib_d64 *sp, *dp; - mlib_d64 sd0; - mlib_d64 sda, sdb; - mlib_d64 dd0, dd1, dd2, dd3; - mlib_s32 bmask; - mlib_s32 i; - - bmask = cmask | (cmask << 4); - - sp = (mlib_d64 *) src; - dp = (mlib_d64 *) dst; - -#pragma pipeloop(0) - for (i = 0; i < dsize / 8; i++) { - sd0 = *sp++; - INSERT_U8_14(sd0, dd0, dd1, dd2, dd3); - vis_pst_8(dd0, dp++, bmask); - vis_pst_8(dd1, dp++, bmask); - vis_pst_8(dd2, dp++, bmask); - vis_pst_8(dd3, dp++, bmask); - } -} - -/***************************************************************/ -void mlib_v_ImageChannelInsert_U8_14_A8D2X8(const mlib_u8 *src, - mlib_s32 slb, - mlib_u8 *dst, - mlib_s32 dlb, - mlib_s32 xsize, - mlib_s32 ysize, - mlib_s32 cmask) -{ - mlib_d64 *sp, *dp; - mlib_d64 *sl, *dl; - mlib_d64 sd0; - mlib_d64 sda, sdb; - mlib_d64 dd0, dd1, dd2, dd3; - mlib_s32 bmask; - mlib_s32 i, j; - - bmask = cmask | (cmask << 4); - - sp = sl = (mlib_d64 *) src; - dp = dl = (mlib_d64 *) dst; - - for (j = 0; j < ysize; j++) { -#pragma pipeloop(0) - for (i = 0; i < xsize / 8; i++) { - sd0 = *sp++; - INSERT_U8_14(sd0, dd0, dd1, dd2, dd3); - vis_pst_8(dd0, dp++, bmask); - vis_pst_8(dd1, dp++, bmask); - vis_pst_8(dd2, dp++, bmask); - vis_pst_8(dd3, dp++, bmask); - } - - sp = sl = (mlib_d64 *) ((mlib_u8 *) sl + slb); - dp = dl = (mlib_d64 *) ((mlib_u8 *) dl + dlb); - } -} - -/***************************************************************/ void mlib_v_ImageChannelInsert_U8_14_D1(const mlib_u8 *src, mlib_u8 *dst, mlib_s32 dsize, @@ -1188,445 +558,5 @@ } } -/***************************************************************/ -void mlib_v_ImageChannelInsert_U8_14(const mlib_u8 *src, - mlib_s32 slb, - mlib_u8 *dst, - mlib_s32 dlb, - mlib_s32 xsize, - mlib_s32 ysize, - mlib_s32 cmask) -{ - mlib_u8 *sa, *da; - mlib_u8 *sl, *dl; - mlib_s32 j; - - sa = sl = (void *)src; - da = dl = dst; - -#pragma pipeloop(0) - for (j = 0; j < ysize; j++) { - mlib_v_ImageChannelInsert_U8_14_D1(sa, da, xsize, cmask); - sa = sl += slb; - da = dl += dlb; - } -} - -/***************************************************************/ -#define LOAD_INSERT_STORE_S16_1X_A8(channeld) \ - sd = *sp++; \ - vis_st_u16(sd = vis_faligndata(sd, sd), da); da += channeld; \ - vis_st_u16(sd = vis_faligndata(sd, sd), da); da += channeld; \ - vis_st_u16(sd = vis_faligndata(sd, sd), da); da += channeld; \ - vis_st_u16(sd = vis_faligndata(sd, sd), da); da += channeld - -/***************************************************************/ -#define LOAD_INSERT_STORE_S16_1X(channeld) \ - vis_alignaddr((void *)0, off); \ - sd0 = sd1; \ - sd1 = *sp++; \ - sd = vis_faligndata(sd0, sd1); \ - vis_alignaddr((void *)0, 2); \ - vis_st_u16(sd = vis_faligndata(sd, sd), da); da += channeld; \ - vis_st_u16(sd = vis_faligndata(sd, sd), da); da += channeld; \ - vis_st_u16(sd = vis_faligndata(sd, sd), da); da += channeld; \ - vis_st_u16(sd = vis_faligndata(sd, sd), da); da += channeld - -/***************************************************************/ -void mlib_v_ImageChannelInsert_S16_12_A8D1X4(const mlib_s16 *src, - mlib_s16 *dst, - mlib_s32 dsize, - mlib_s32 cmask) -{ - mlib_s16 *da; - mlib_d64 *sp; - mlib_d64 sd; - mlib_s32 i; - - sp = (mlib_d64 *) src; - da = dst + (2 - cmask); /* 2,1 -> 0,1 */ - - vis_alignaddr((void *)0, 2); - -#pragma pipeloop(0) - for (i = 0; i < dsize / 4; i++) { - LOAD_INSERT_STORE_S16_1X_A8(2); - } -} - -/***************************************************************/ -void mlib_v_ImageChannelInsert_S16_12_A8D2X4(const mlib_s16 *src, - mlib_s32 slb, - mlib_s16 *dst, - mlib_s32 dlb, - mlib_s32 xsize, - mlib_s32 ysize, - mlib_s32 cmask) -{ - mlib_s16 *da, *dl; - mlib_d64 *sp, *sl; - mlib_d64 sd; - mlib_s32 i, j; - - sp = sl = (mlib_d64 *) src; - da = dl = dst + (2 - cmask); /* 2,1 -> 0,1 */ - - vis_alignaddr((void *)0, 2); - - for (j = 0; j < ysize; j++) { -#pragma pipeloop(0) - for (i = 0; i < xsize / 4; i++) { - LOAD_INSERT_STORE_S16_1X_A8(2); - } - - sp = sl = (mlib_d64 *) ((mlib_u8 *) sl + slb); - da = dl = (mlib_s16 *) ((mlib_u8 *) dl + dlb); - } -} - -/***************************************************************/ -void mlib_v_ImageChannelInsert_S16_12_D1(const mlib_s16 *src, - mlib_s16 *dst, - mlib_s32 dsize, - mlib_s32 cmask) -{ - mlib_s16 *sa, *da; - mlib_s16 *dend; /* end point in destination */ - mlib_d64 *sp; /* 8-byte aligned start points in src */ - mlib_d64 sd0, sd1, sd; /* 8-byte registers for source data */ - mlib_s32 off; /* offset of address alignment in src */ - mlib_s32 i; - - sa = (void *)src; - da = dst + (2 - cmask); /* 2,1 -> 0,1 */ - - /* prepare the src address */ - sp = (mlib_d64 *) ((mlib_addr) sa & (~7)); - off = (mlib_addr) sa & 7; - - dend = da + dsize * 2 - 1; - - sd1 = *sp++; - -#pragma pipeloop(0) - for (i = 0; i < dsize / 4; i++) { - LOAD_INSERT_STORE_S16_1X(2); - } - - /* right end handling */ - if ((mlib_addr) da <= (mlib_addr) dend) { - - vis_alignaddr((void *)0, off); - sd0 = sd1; - sd1 = *sp++; - sd = vis_faligndata(sd0, sd1); - - vis_alignaddr((void *)0, 2); - vis_st_u16(sd = vis_faligndata(sd, sd), da); - da += 2; - if ((mlib_addr) da <= (mlib_addr) dend) { - vis_st_u16(sd = vis_faligndata(sd, sd), da); - da += 2; - if ((mlib_addr) da <= (mlib_addr) dend) { - vis_st_u16(sd = vis_faligndata(sd, sd), da); - } - } - } -} - -/***************************************************************/ -void mlib_v_ImageChannelInsert_S16_12(const mlib_s16 *src, - mlib_s32 slb, - mlib_s16 *dst, - mlib_s32 dlb, - mlib_s32 xsize, - mlib_s32 ysize, - mlib_s32 cmask) -{ - mlib_s16 *sa, *da; - mlib_s16 *sl, *dl; - mlib_s32 j; - - sa = sl = (void *)src; - da = dl = dst; - -#pragma pipeloop(0) - for (j = 0; j < ysize; j++) { - mlib_v_ImageChannelInsert_S16_12_D1(sa, da, xsize, cmask); - sa = sl = (mlib_s16 *) ((mlib_u8 *) sl + slb); - da = dl = (mlib_s16 *) ((mlib_u8 *) dl + dlb); - } -} - -/***************************************************************/ -void mlib_v_ImageChannelInsert_S16_13_A8D1X4(const mlib_s16 *src, - mlib_s16 *dst, - mlib_s32 dsize, - mlib_s32 cmask) -{ - mlib_s16 *da; - mlib_d64 *sp; - mlib_d64 sd; - mlib_s32 i; - - sp = (mlib_d64 *) src; - da = dst + (2 / cmask); /* 4,2,1 -> 0,1,2 */ - - vis_alignaddr((void *)0, 2); - -#pragma pipeloop(0) - for (i = 0; i < dsize / 4; i++) { - LOAD_INSERT_STORE_S16_1X_A8(3); - } -} /***************************************************************/ -void mlib_v_ImageChannelInsert_S16_13_A8D2X4(const mlib_s16 *src, - mlib_s32 slb, - mlib_s16 *dst, - mlib_s32 dlb, - mlib_s32 xsize, - mlib_s32 ysize, - mlib_s32 cmask) -{ - mlib_s16 *da, *dl; - mlib_d64 *sp, *sl; - mlib_d64 sd; - mlib_s32 i, j; - - sp = sl = (mlib_d64 *) src; - da = dl = dst + (2 / cmask); /* 4,2,1 -> 0,1,2 */ - - vis_alignaddr((void *)0, 2); - - for (j = 0; j < ysize; j++) { -#pragma pipeloop(0) - for (i = 0; i < xsize / 4; i++) { - LOAD_INSERT_STORE_S16_1X_A8(3); - } - - sp = sl = (mlib_d64 *) ((mlib_u8 *) sl + slb); - da = dl = (mlib_s16 *) ((mlib_u8 *) dl + dlb); - } -} - -/***************************************************************/ -void mlib_v_ImageChannelInsert_S16_13_D1(const mlib_s16 *src, - mlib_s16 *dst, - mlib_s32 dsize, - mlib_s32 cmask) -{ - mlib_s16 *sa, *da; - mlib_s16 *dend; /* end point in destination */ - mlib_d64 *sp; /* 8-byte aligned start points in src */ - mlib_d64 sd0, sd1, sd; /* 8-byte registers for source data */ - mlib_s32 off; /* offset of address alignment in src */ - mlib_s32 i; - - sa = (void *)src; - da = dst + (2 / cmask); /* 4,2,1 -> 0,1,2 */ - - /* prepare the src address */ - sp = (mlib_d64 *) ((mlib_addr) sa & (~7)); - off = (mlib_addr) sa & 7; - - dend = da + dsize * 3 - 1; - - sd1 = *sp++; - -#pragma pipeloop(0) - for (i = 0; i < dsize / 4; i++) { - LOAD_INSERT_STORE_S16_1X(3); - } - - /* right end handling */ - if ((mlib_addr) da <= (mlib_addr) dend) { - - vis_alignaddr((void *)0, off); - sd0 = sd1; - sd1 = *sp++; - sd = vis_faligndata(sd0, sd1); - - vis_alignaddr((void *)0, 2); - vis_st_u16(sd = vis_faligndata(sd, sd), da); - da += 3; - if ((mlib_addr) da <= (mlib_addr) dend) { - vis_st_u16(sd = vis_faligndata(sd, sd), da); - da += 3; - if ((mlib_addr) da <= (mlib_addr) dend) { - vis_st_u16(sd = vis_faligndata(sd, sd), da); - } - } - } -} - -/***************************************************************/ -void mlib_v_ImageChannelInsert_S16_13(const mlib_s16 *src, - mlib_s32 slb, - mlib_s16 *dst, - mlib_s32 dlb, - mlib_s32 xsize, - mlib_s32 ysize, - mlib_s32 cmask) -{ - mlib_s16 *sa, *da; - mlib_s16 *sl, *dl; - mlib_s32 j; - - sa = sl = (void *)src; - da = dl = dst; - -#pragma pipeloop(0) - for (j = 0; j < ysize; j++) { - mlib_v_ImageChannelInsert_S16_13_D1(sa, da, xsize, cmask); - sa = sl = (mlib_s16 *) ((mlib_u8 *) sl + slb); - da = dl = (mlib_s16 *) ((mlib_u8 *) dl + dlb); - } -} - -/***************************************************************/ -#define INSERT_S16_14(sp, dp, bmask) /* channel duplicate */ \ - /* obsolete: it is slower than the vis_st_u16() version*/ \ - sd0 = *sp++; \ - sda = vis_fpmerge(vis_read_hi(sd0), vis_read_hi(sd0)); \ - sdb = vis_fpmerge(vis_read_lo(sd0), vis_read_lo(sd0)); \ - sdc = vis_fpmerge(vis_read_hi(sda), vis_read_hi(sda)); \ - sdd = vis_fpmerge(vis_read_lo(sda), vis_read_lo(sda)); \ - sde = vis_fpmerge(vis_read_hi(sdb), vis_read_hi(sdb)); \ - sdf = vis_fpmerge(vis_read_lo(sdb), vis_read_lo(sdb)); \ - dd0 = vis_fpmerge(vis_read_hi(sdc), vis_read_lo(sdc)); \ - dd1 = vis_fpmerge(vis_read_hi(sdd), vis_read_lo(sdd)); \ - dd2 = vis_fpmerge(vis_read_hi(sde), vis_read_lo(sde)); \ - dd3 = vis_fpmerge(vis_read_hi(sdf), vis_read_lo(sdf)); \ - vis_pst_16(dd0, dp++, bmask); \ - vis_pst_16(dd1, dp++, bmask); \ - vis_pst_16(dd2, dp++, bmask); \ - vis_pst_16(dd3, dp++, bmask) - -/***************************************************************/ -void mlib_v_ImageChannelInsert_S16_14_A8D1X4(const mlib_s16 *src, - mlib_s16 *dst, - mlib_s32 dsize, - mlib_s32 cmask) -{ - mlib_s16 *da; - mlib_d64 *sp; - mlib_d64 sd; - mlib_s32 i; - - sp = (mlib_d64 *) src; - da = dst + (6 / cmask + 1) / 2; /* 8,4,2,1 -> 0,1,2,3 */ - - vis_alignaddr((void *)0, 2); - -#pragma pipeloop(0) - for (i = 0; i < dsize / 4; i++) { - LOAD_INSERT_STORE_S16_1X_A8(4); - } -} - -/***************************************************************/ -void mlib_v_ImageChannelInsert_S16_14_A8D2X4(const mlib_s16 *src, - mlib_s32 slb, - mlib_s16 *dst, - mlib_s32 dlb, - mlib_s32 xsize, - mlib_s32 ysize, - mlib_s32 cmask) -{ - mlib_s16 *da, *dl; - mlib_d64 *sp, *sl; - mlib_d64 sd; - mlib_s32 i, j; - - sp = sl = (mlib_d64 *) src; - da = dl = dst + (6 / cmask + 1) / 2; /* 8,4,2,1 -> 0,1,2,3 */ - - vis_alignaddr((void *)0, 2); - - for (j = 0; j < ysize; j++) { -#pragma pipeloop(0) - for (i = 0; i < xsize / 4; i++) { - LOAD_INSERT_STORE_S16_1X_A8(4); - } - - sp = sl = (mlib_d64 *) ((mlib_u8 *) sl + slb); - da = dl = (mlib_s16 *) ((mlib_u8 *) dl + dlb); - } -} - -/***************************************************************/ -void mlib_v_ImageChannelInsert_S16_14_D1(const mlib_s16 *src, - mlib_s16 *dst, - mlib_s32 dsize, - mlib_s32 cmask) -{ - mlib_s16 *sa, *da; - mlib_s16 *dend; /* end point in destination */ - mlib_d64 *sp; /* 8-byte aligned start points in src */ - mlib_d64 sd0, sd1, sd; /* 8-byte registers for source data */ - mlib_s32 off; /* offset of address alignment in src */ - mlib_s32 i; - - sa = (void *)src; - da = dst + (6 / cmask + 1) / 2; /* 8,4,2,1 -> 0,1,2,3 */ - - /* prepare the src address */ - sp = (mlib_d64 *) ((mlib_addr) sa & (~7)); - off = (mlib_addr) sa & 7; - - dend = da + dsize * 4 - 1; - - sd1 = *sp++; - -#pragma pipeloop(0) - for (i = 0; i < dsize / 4; i++) { - LOAD_INSERT_STORE_S16_1X(4); - } - - /* right end handling */ - if ((mlib_addr) da <= (mlib_addr) dend) { - - vis_alignaddr((void *)0, off); - sd0 = sd1; - sd1 = *sp++; - sd = vis_faligndata(sd0, sd1); - - vis_alignaddr((void *)0, 2); - vis_st_u16(sd = vis_faligndata(sd, sd), da); - da += 4; - if ((mlib_addr) da <= (mlib_addr) dend) { - vis_st_u16(sd = vis_faligndata(sd, sd), da); - da += 4; - if ((mlib_addr) da <= (mlib_addr) dend) { - vis_st_u16(sd = vis_faligndata(sd, sd), da); - } - } - } -} - -/***************************************************************/ -void mlib_v_ImageChannelInsert_S16_14(const mlib_s16 *src, - mlib_s32 slb, - mlib_s16 *dst, - mlib_s32 dlb, - mlib_s32 xsize, - mlib_s32 ysize, - mlib_s32 cmask) -{ - mlib_s16 *sa, *da; - mlib_s16 *sl, *dl; - mlib_s32 j; - - sa = sl = (void *)src; - da = dl = dst; - -#pragma pipeloop(0) - for (j = 0; j < ysize; j++) { - mlib_v_ImageChannelInsert_S16_14_D1(sa, da, xsize, cmask); - sa = sl = (mlib_s16 *) ((mlib_u8 *) sl + slb); - da = dl = (mlib_s16 *) ((mlib_u8 *) dl + dlb); - } -} - -/***************************************************************/ diff -r 3e22d8fd4912 -r acea5f7d354b jdk/src/java.desktop/unix/native/libmlib_image/mlib_v_ImageChannelInsert_34.c --- a/jdk/src/java.desktop/unix/native/libmlib_image/mlib_v_ImageChannelInsert_34.c Thu May 12 11:03:07 2016 -0700 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,1225 +0,0 @@ -/* - * Copyright (c) 1998, 2003, Oracle and/or its affiliates. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 only, as - * published by the Free Software Foundation. Oracle designates this - * particular file as subject to the "Classpath" exception as provided - * by Oracle in the LICENSE file that accompanied this code. - * - * This code is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * version 2 for more details (a copy is included in the LICENSE file that - * accompanied this code). - * - * You should have received a copy of the GNU General Public License version - * 2 along with this work; if not, write to the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - * - * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA - * or visit www.oracle.com if you need additional information or have any - * questions. - */ - - - -/* - * FILENAME: mlib_v_ImageChannelInsert_34.c - * - * FUNCTIONS - * mlib_v_ImageChannelInsert_U8_34R_A8D1X8 - * mlib_v_ImageChannelInsert_U8_34R_A8D2X8 - * mlib_v_ImageChannelInsert_U8_34R_D1 - * mlib_v_ImageChannelInsert_U8_34R - * mlib_v_ImageChannelInsert_S16_34R_A8D1X4 - * mlib_v_ImageChannelInsert_S16_34R_A8D2X4 - * mlib_v_ImageChannelInsert_S16_34R_D1 - * mlib_v_ImageChannelInsert_S16_34R - * mlib_v_ImageChannelInsert_U8_34L_A8D1X8 - * mlib_v_ImageChannelInsert_U8_34L_A8D2X8 - * mlib_v_ImageChannelInsert_U8_34L_D1 - * mlib_v_ImageChannelInsert_U8_34L - * mlib_v_ImageChannelInsert_S16_34L_A8D1X4 - * mlib_v_ImageChannelInsert_S16_34L_A8D2X4 - * mlib_v_ImageChannelInsert_S16_34L_D1 - * mlib_v_ImageChannelInsert_S16_34L - * - * SYNOPSIS - * - * ARGUMENT - * src pointer to source image data - * dst pointer to destination image data - * slb source image line stride in bytes - * dlb destination image line stride in bytes - * dsize image data size in pixels - * xsize image width in pixels - * ysize image height in lines - * cmask channel mask - * - * DESCRIPTION - * Insert a 3-channel image into the right or left 3 channels of - * a 4-channel image low level functions. - * - * BGR => ABGR (34R), or RGB => RGBA (34L) - * - * NOTE - * These functions are separated from mlib_v_ImageChannelInsert.c - * for loop unrolling and structure clarity. - */ - -#include -#include "vis_proto.h" -#include "mlib_image.h" - -/***************************************************************/ -#define INSERT_U8_34R \ - sda = vis_fpmerge(vis_read_hi(sd0), vis_read_lo(sd1)); \ - sdb = vis_fpmerge(vis_read_lo(sd0), vis_read_hi(sd2)); \ - sdc = vis_fpmerge(vis_read_hi(sd1), vis_read_lo(sd2)); \ - sdd = vis_fpmerge(vis_read_hi(sda), vis_read_lo(sdb)); \ - sde = vis_fpmerge(vis_read_lo(sda), vis_read_hi(sdc)); \ - sdf = vis_fpmerge(vis_read_hi(sdb), vis_read_lo(sdc)); \ - sdg = vis_fpmerge(vis_read_hi(sdd), vis_read_lo(sde)); \ - sdh = vis_fpmerge(vis_read_lo(sdd), vis_read_hi(sdf)); \ - sdi = vis_fpmerge(vis_read_hi(sde), vis_read_lo(sdf)); \ - sdj = vis_fpmerge(vis_read_hi(sdg), vis_read_hi(sdi)); \ - sdk = vis_fpmerge(vis_read_lo(sdg), vis_read_lo(sdi)); \ - sdl = vis_fpmerge(vis_read_hi(sdh), vis_read_hi(sdh)); \ - sdm = vis_fpmerge(vis_read_lo(sdh), vis_read_lo(sdh)); \ - dd0 = vis_fpmerge(vis_read_hi(sdl), vis_read_hi(sdj)); \ - dd1 = vis_fpmerge(vis_read_lo(sdl), vis_read_lo(sdj)); \ - dd2 = vis_fpmerge(vis_read_hi(sdm), vis_read_hi(sdk)); \ - dd3 = vis_fpmerge(vis_read_lo(sdm), vis_read_lo(sdk)); - -/***************************************************************/ -#define LOAD_INSERT_STORE_U8_34R_A8 \ - sd0 = *sp++; /* b0g0r0b1g1r1b2g2 */ \ - sd1 = *sp++; /* r2b3g3r3b4g4r4b5 */ \ - sd2 = *sp++; /* g5r5b6g6r6b7g7r7 */ \ - INSERT_U8_34R \ - vis_pst_8(dd0, dp++, bmask); \ - vis_pst_8(dd1, dp++, bmask); \ - vis_pst_8(dd2, dp++, bmask); \ - vis_pst_8(dd3, dp++, bmask); - -/***************************************************************/ -#define LOAD_INSERT_U8_34R \ - vis_alignaddr((void *)soff, 0); \ - s0 = s3; \ - s1 = sp[1]; \ - s2 = sp[2]; \ - s3 = sp[3]; \ - sd0 = vis_faligndata(s0, s1); \ - sd1 = vis_faligndata(s1, s2); \ - sd2 = vis_faligndata(s2, s3); \ - sp += 3; \ - dd4 = dd3; \ - INSERT_U8_34R - -/***************************************************************/ -/* - * Both source and destination image data are 1-d vectors and - * 8-byte aligned. And dsize is multiple of 8. - */ - -void -mlib_v_ImageChannelInsert_U8_34R_A8D1X8(mlib_u8 *src, - mlib_u8 *dst, - mlib_s32 dsize) -{ - mlib_d64 *sp, *dp; - mlib_d64 sd0, sd1, sd2; /* source data */ - mlib_d64 dd0, dd1, dd2, dd3; /* dst data */ - mlib_d64 sda, sdb, sdc, sdd; /* intermediate variables */ - mlib_d64 sde, sdf, sdg, sdh; - mlib_d64 sdi, sdj, sdk, sdl; - mlib_d64 sdm; - int bmask = 0x77; - int i; - - sp = (mlib_d64 *)src; - dp = (mlib_d64 *)dst; - -#pragma pipeloop(0) - for (i = 0; i < dsize / 8; i++) { - LOAD_INSERT_STORE_U8_34R_A8; - } -} - -/***************************************************************/ -/* - * Either source or destination image data are not 1-d vectors, but - * they are 8-byte aligned. And slb and dlb are multiple of 8. - * The xsize is multiple of 8. - */ - -void -mlib_v_ImageChannelInsert_U8_34R_A8D2X8(mlib_u8 *src, mlib_s32 slb, - mlib_u8 *dst, mlib_s32 dlb, - mlib_s32 xsize, mlib_s32 ysize) -{ - mlib_d64 *sp, *dp; /* 8-byte aligned pointer for pixel */ - mlib_d64 *sl, *dl; /* 8-byte aligned pointer for line */ - mlib_d64 sd0, sd1, sd2; /* source data */ - mlib_d64 dd0, dd1, dd2, dd3; /* dst data */ - mlib_d64 sda, sdb, sdc, sdd; /* intermediate variables */ - mlib_d64 sde, sdf, sdg, sdh; - mlib_d64 sdi, sdj, sdk, sdl; - mlib_d64 sdm; - int bmask = 0x77; - int i, j; /* indices for x, y */ - - sp = sl = (mlib_d64 *)src; - dp = dl = (mlib_d64 *)dst; - - /* row loop */ - for (j = 0; j < ysize; j++) { - /* 8-byte column loop */ -#pragma pipeloop(0) - for (i = 0; i < xsize / 8; i++) { - LOAD_INSERT_STORE_U8_34R_A8; - } - sp = sl = (mlib_d64 *)((mlib_u8 *)sl + slb); - dp = dl = (mlib_d64 *)((mlib_u8 *)dl + dlb); - } -} - -/***************************************************************/ -/* - * either source or destination data are not 8-byte aligned. - */ - -void -mlib_v_ImageChannelInsert_U8_34R_D1(mlib_u8 *src, - mlib_u8 *dst, - mlib_s32 dsize) -{ - mlib_u8 *sa, *da; - mlib_u8 *dend, *dend2; /* end points in dst */ - mlib_d64 *dp; /* 8-byte aligned start points in dst */ - mlib_d64 *sp; /* 8-byte aligned start point in src */ - mlib_d64 s0, s1, s2, s3; /* 8-byte source raw data */ - mlib_d64 sd0, sd1, sd2; /* 8-byte source data */ - mlib_d64 dd0, dd1, dd2, dd3; /* dst data */ - mlib_d64 dd4; /* the last datum of the last step */ - mlib_d64 sda, sdb, sdc, sdd; /* intermediate variables */ - mlib_d64 sde, sdf, sdg, sdh; - mlib_d64 sdi, sdj, sdk, sdl; - mlib_d64 sdm; - int soff; /* offset of address in src */ - int doff; /* offset of address in dst */ - int emask; /* edge mask */ - int bmask; /* channel mask */ - int i, n; - - sa = src; - da = dst; - - /* prepare the source address */ - sp = (mlib_d64 *) ((mlib_addr) sa & (~7)); - soff = ((mlib_addr) sa & 7); - - /* prepare the destination addresses */ - dp = (mlib_d64 *)((mlib_addr) da & (~7)); - dend = da + dsize * 4 - 1; - dend2 = dend - 31; - doff = ((mlib_addr) da & 7); - - /* set band mask for vis_pst_8 to store the bytes needed */ - bmask = 0xff & (0x7777 >> doff) ; - - /* generate edge mask for the start point */ - emask = vis_edge8(da, dend); - - /* load 24 bytes, convert to 32 bytes */ - s3 = sp[0]; /* initial value */ - LOAD_INSERT_U8_34R; - - if (doff == 0) { /* dst is 8-byte aligned */ - - if (dsize >= 8 ) { - vis_pst_8(dd0, dp++, emask & bmask); - vis_pst_8(dd1, dp++, bmask); - vis_pst_8(dd2, dp++, bmask); - vis_pst_8(dd3, dp++, bmask); - } - else { /* for very small size */ - vis_pst_8(dd0, dp++, emask & bmask); - if ((mlib_addr) dp <= (mlib_addr) dend) { - emask = vis_edge8(dp, dend); - vis_pst_8(dd1, dp++, emask & bmask); - if ((mlib_addr) dp <= (mlib_addr) dend) { - emask = vis_edge8(dp, dend); - vis_pst_8(dd2, dp++, emask & bmask); - if ((mlib_addr) dp <= (mlib_addr) dend) { - emask = vis_edge8(dp, dend); - vis_pst_8(dd3, dp++, emask & bmask); - } - } - } - } - - /* no edge handling is needed in the loop */ - if ((mlib_addr) dp <= (mlib_addr) dend2) { - n = ((mlib_u8 *)dend2 - (mlib_u8 *)dp) / 32 + 1; -#pragma pipeloop(0) - for (i = 0; i < n; i++) { - LOAD_INSERT_U8_34R; - vis_pst_8(dd0, dp++, bmask); - vis_pst_8(dd1, dp++, bmask); - vis_pst_8(dd2, dp++, bmask); - vis_pst_8(dd3, dp++, bmask); - } - } - - if ((mlib_addr) dp <= (mlib_addr) dend) { - LOAD_INSERT_U8_34R; - emask = vis_edge8(dp, dend); - vis_pst_8(dd0, dp++, emask & bmask); - if ((mlib_addr) dp <= (mlib_addr) dend) { - emask = vis_edge8(dp, dend); - vis_pst_8(dd1, dp++, emask & bmask); - if ((mlib_addr) dp <= (mlib_addr) dend) { - emask = vis_edge8(dp, dend); - vis_pst_8(dd2, dp++, emask & bmask); - if ((mlib_addr) dp <= (mlib_addr) dend) { - emask = vis_edge8(dp, dend); - vis_pst_8(dd3, dp++, emask & bmask); - } - } - } - } - } - else { /* (doff != 0) */ - vis_alignaddr((void *)0, -doff); - - if (dsize >= 8 ) { - vis_pst_8(vis_faligndata(dd0, dd0), dp++, emask & bmask); - vis_pst_8(vis_faligndata(dd0, dd1), dp++, bmask); - vis_pst_8(vis_faligndata(dd1, dd2), dp++, bmask); - vis_pst_8(vis_faligndata(dd2, dd3), dp++, bmask); - } - else { /* for very small size */ - vis_pst_8(vis_faligndata(dd0, dd0), dp++, emask & bmask); - if ((mlib_addr) dp <= (mlib_addr) dend) { - emask = vis_edge8(dp, dend); - vis_pst_8(vis_faligndata(dd0, dd1), dp++, emask & bmask); - if ((mlib_addr) dp <= (mlib_addr) dend) { - emask = vis_edge8(dp, dend); - vis_pst_8(vis_faligndata(dd1, dd2), dp++, emask & bmask); - if ((mlib_addr) dp <= (mlib_addr) dend) { - emask = vis_edge8(dp, dend); - vis_pst_8(vis_faligndata(dd2, dd3), dp++, emask & bmask); - if ((mlib_addr) dp <= (mlib_addr) dend) { - emask = vis_edge8(dp, dend); - vis_pst_8(vis_faligndata(dd3, dd3), dp++, emask & bmask); - } - } - } - } - } - - /* no edge handling is needed in the loop */ - if ((mlib_addr) dp <= (mlib_addr) dend2) { - n = ((mlib_u8 *)dend2 - (mlib_u8 *)dp) / 32 + 1; -#pragma pipeloop(0) - for (i = 0; i < n; i++) { - LOAD_INSERT_U8_34R; - vis_alignaddr((void *)0, -doff); - vis_pst_8(vis_faligndata(dd4, dd0), dp++, bmask); - vis_pst_8(vis_faligndata(dd0, dd1), dp++, bmask); - vis_pst_8(vis_faligndata(dd1, dd2), dp++, bmask); - vis_pst_8(vis_faligndata(dd2, dd3), dp++, bmask); - } - } - - if ((mlib_addr) dp <= (mlib_addr) dend) { - LOAD_INSERT_U8_34R; - vis_alignaddr((void *)0, -doff); - emask = vis_edge8(dp, dend); - vis_pst_8(vis_faligndata(dd4, dd0), dp++, emask & bmask); - if ((mlib_addr) dp <= (mlib_addr) dend) { - emask = vis_edge8(dp, dend); - vis_pst_8(vis_faligndata(dd0, dd1), dp++, emask & bmask); - if ((mlib_addr) dp <= (mlib_addr) dend) { - emask = vis_edge8(dp, dend); - vis_pst_8(vis_faligndata(dd1, dd2), dp++, emask & bmask); - if ((mlib_addr) dp <= (mlib_addr) dend) { - emask = vis_edge8(dp, dend); - vis_pst_8(vis_faligndata(dd2, dd3), dp++, emask & bmask); - } - } - } - } - } -} - -/***************************************************************/ - -void -mlib_v_ImageChannelInsert_U8_34R(mlib_u8 *src, mlib_s32 slb, - mlib_u8 *dst, mlib_s32 dlb, - mlib_s32 xsize, mlib_s32 ysize) -{ - mlib_u8 *sa, *da; - mlib_u8 *sl, *dl; - int j; - - sa = sl = src; - da = dl = dst; - -#pragma pipeloop(0) - for (j = 0; j < ysize; j++) { - mlib_v_ImageChannelInsert_U8_34R_D1(sa, da, xsize); - sa = sl += slb; - da = dl += dlb; - } -} - -/***************************************************************/ -#define INSERT_S16_34R \ - vis_alignaddr((void *)0, 6); \ - dd0 = vis_faligndata(sd0, sd0); /* b1b0g0r0 */ \ - vis_alignaddr((void *)0, 4); \ - dd1 = vis_faligndata(sd0, sd1); /* r0b1gbr1 */ \ - vis_alignaddr((void *)0, 2); \ - dd2 = vis_faligndata(sd1, sd2); /* r1b2g2r2 */ \ - dd3 = sd2; /* r2b3g3r3 */ - -/***************************************************************/ -#define LOAD_INSERT_STORE_S16_34R_A8 \ - sd0 = *sp++; /* b0g0r0b1 */ \ - sd1 = *sp++; /* g1r1b2g2 */ \ - sd2 = *sp++; /* r2b3g3r3 */ \ - INSERT_S16_34R \ - vis_pst_16(dd0, dp++, bmask); \ - vis_pst_16(dd1, dp++, bmask); \ - vis_pst_16(dd2, dp++, bmask); \ - vis_pst_16(dd3, dp++, bmask); - -/***************************************************************/ -#define LOAD_INSERT_S16_34R \ - vis_alignaddr((void *)soff, 0); \ - s0 = s3; \ - s1 = sp[1]; \ - s2 = sp[2]; \ - s3 = sp[3]; \ - sd0 = vis_faligndata(s0, s1); \ - sd1 = vis_faligndata(s1, s2); \ - sd2 = vis_faligndata(s2, s3); \ - sp += 3; \ - dd4 = dd3; \ - INSERT_S16_34R - -/***************************************************************/ -/* - * both source and destination image data are 1-d vectors and - * 8-byte aligned. dsize is multiple of 4. - */ - -void -mlib_v_ImageChannelInsert_S16_34R_A8D1X4(mlib_s16 *src, - mlib_s16 *dst, - mlib_s32 dsize) -{ - mlib_d64 *sp, *dp; /* 8-byte aligned pointer for pixel */ - mlib_d64 sd0, sd1, sd2; /* source data */ - mlib_d64 dd0, dd1, dd2, dd3; /* dst data */ - int bmask = 0x07; /* channel mask */ - int i; - - sp = (mlib_d64 *)src; - dp = (mlib_d64 *)dst; - - /* set GSR.offset for vis_faligndata() */ - /* vis_alignaddr((void *)0, 2); */ /* only for _old */ - -#pragma pipeloop(0) - for (i = 0; i < dsize / 4; i++) { - LOAD_INSERT_STORE_S16_34R_A8; - } -} - -/***************************************************************/ -/* - * either source or destination image data are not 1-d vectors, but - * they are 8-byte aligned. xsize is multiple of 4. - */ - -void -mlib_v_ImageChannelInsert_S16_34R_A8D2X4(mlib_s16 *src, mlib_s32 slb, - mlib_s16 *dst, mlib_s32 dlb, - mlib_s32 xsize, mlib_s32 ysize) -{ - mlib_d64 *sp, *dp; /* 8-byte aligned pointer for pixel */ - mlib_d64 *sl, *dl; /* 8-byte aligned pointer for line */ - mlib_d64 sd0, sd1, sd2; /* source data */ - mlib_d64 dd0, dd1, dd2, dd3; /* dst data */ - int bmask = 0x07; /* channel mask */ - int i, j; /* indices for x, y */ - - sp = sl = (mlib_d64 *)src; - dp = dl = (mlib_d64 *)dst; - - /* row loop */ - for (j = 0; j < ysize; j++) { - /* 4-pixel column loop */ -#pragma pipeloop(0) - for (i = 0; i < xsize / 4; i++) { - LOAD_INSERT_STORE_S16_34R_A8; - } - sp = sl = (mlib_d64 *)((mlib_u8 *)sl + slb); - dp = dl = (mlib_d64 *)((mlib_u8 *)dl + dlb); - } -} - -/***************************************************************/ -/* - * either source or destination data are not 8-byte aligned. - */ - -void -mlib_v_ImageChannelInsert_S16_34R_D1(mlib_s16 *src, - mlib_s16 *dst, - mlib_s32 dsize) -{ - mlib_s16 *sa, *da; /* pointer for pixel */ - mlib_s16 *dend, *dend2; /* end points in dst */ - mlib_d64 *dp; /* 8-byte aligned start points in dst */ - mlib_d64 *sp; /* 8-byte aligned start point in src */ - mlib_d64 s0, s1, s2, s3; /* 8-byte source raw data */ - mlib_d64 sd0, sd1, sd2; /* 8-byte source data */ - mlib_d64 dd0, dd1, dd2, dd3; /* dst data */ - mlib_d64 dd4; /* the last datum of the last step */ - int soff; /* offset of address in src */ - int doff; /* offset of address in dst */ - int emask; /* edge mask */ - int bmask; /* channel mask */ - int i, n; - - sa = src; - da = dst; - - /* prepare the source address */ - sp = (mlib_d64 *) ((mlib_addr) sa & (~7)); - soff = ((mlib_addr) sa & 7); - - /* prepare the destination addresses */ - dp = (mlib_d64 *)((mlib_addr) da & (~7)); - dend = da + dsize * 4 - 1; - dend2 = dend - 15; - doff = ((mlib_addr) da & 7); - - /* set channel mask for vis_pst_16 to store the words needed */ - bmask = 0xff & (0x77 >> (doff / 2)); - - /* generate edge mask for the start point */ - emask = vis_edge16(da, dend); - - /* load 24 byte, convert, store 32 bytes */ - s3 = sp[0]; /* initial value */ - LOAD_INSERT_S16_34R; - - if (doff == 0) { /* dst is 8-byte aligned */ - - if (dsize >= 4 ) { - vis_pst_16(dd0, dp++, emask & bmask); - vis_pst_16(dd1, dp++, bmask); - vis_pst_16(dd2, dp++, bmask); - vis_pst_16(dd3, dp++, bmask); - } - else { /* for very small size */ - vis_pst_16(dd0, dp++, emask & bmask); - if ((mlib_addr) dp <= (mlib_addr) dend) { - emask = vis_edge16(dp, dend); - vis_pst_16(dd1, dp++, emask & bmask); - if ((mlib_addr) dp <= (mlib_addr) dend) { - emask = vis_edge16(dp, dend); - vis_pst_16(dd2, dp++, emask & bmask); - } - } - } - - /* no edge handling is needed in the loop */ - if ((mlib_addr) dp <= (mlib_addr) dend2) { - n = ((mlib_u8 *)dend2 - (mlib_u8 *)dp) / 32 + 1; -#pragma pipeloop(0) - for (i = 0; i < n; i++) { - LOAD_INSERT_S16_34R; - vis_pst_16(dd0, dp++, bmask); - vis_pst_16(dd1, dp++, bmask); - vis_pst_16(dd2, dp++, bmask); - vis_pst_16(dd3, dp++, bmask); - } - } - - if ((mlib_addr) dp <= (mlib_addr) dend) { - LOAD_INSERT_S16_34R; - emask = vis_edge16(dp, dend); - vis_pst_16(dd0, dp++, emask & bmask); - if ((mlib_addr) dp <= (mlib_addr) dend) { - emask = vis_edge16(dp, dend); - vis_pst_16(dd1, dp++, emask & bmask); - if ((mlib_addr) dp <= (mlib_addr) dend) { - emask = vis_edge16(dp, dend); - vis_pst_16(dd2, dp++, emask & bmask); - } - } - } - } - else { /* (doff != 0) */ - vis_alignaddr((void *)0, -doff); - - if (dsize >= 4 ) { - vis_pst_16(vis_faligndata(dd0, dd0), dp++, emask & bmask); - vis_pst_16(vis_faligndata(dd0, dd1), dp++, bmask); - vis_pst_16(vis_faligndata(dd1, dd2), dp++, bmask); - vis_pst_16(vis_faligndata(dd2, dd3), dp++, bmask); - } - else { /* for very small size */ - vis_pst_16(vis_faligndata(dd0, dd0), dp++, emask & bmask); - if ((mlib_addr) dp <= (mlib_addr) dend) { - emask = vis_edge16(dp, dend); - vis_pst_16(vis_faligndata(dd0, dd1), dp++, emask & bmask); - if ((mlib_addr) dp <= (mlib_addr) dend) { - emask = vis_edge16(dp, dend); - vis_pst_16(vis_faligndata(dd1, dd2), dp++, emask & bmask); - if ((mlib_addr) dp <= (mlib_addr) dend) { - emask = vis_edge16(dp, dend); - vis_pst_16(vis_faligndata(dd2, dd3), dp++, emask & bmask); - } - } - } - } - - /* no edge handling is needed in the loop */ - if ((mlib_addr) dp <= (mlib_addr) dend2) { - n = ((mlib_u8 *)dend2 - (mlib_u8 *)dp) / 32 + 1; -#pragma pipeloop(0) - for (i = 0; i < n; i++) { - LOAD_INSERT_S16_34R; - vis_alignaddr((void *)0, -doff); - vis_pst_16(vis_faligndata(dd4, dd0), dp++, bmask); - vis_pst_16(vis_faligndata(dd0, dd1), dp++, bmask); - vis_pst_16(vis_faligndata(dd1, dd2), dp++, bmask); - vis_pst_16(vis_faligndata(dd2, dd3), dp++, bmask); - } - } - - if ((mlib_addr) dp <= (mlib_addr) dend) { - LOAD_INSERT_S16_34R; - vis_alignaddr((void *)0, -doff); - emask = vis_edge16(dp, dend); - vis_pst_16(vis_faligndata(dd4, dd0), dp++, emask & bmask); - if ((mlib_addr) dp <= (mlib_addr) dend) { - emask = vis_edge16(dp, dend); - vis_pst_16(vis_faligndata(dd0, dd1), dp++, emask & bmask); - if ((mlib_addr) dp <= (mlib_addr) dend) { - emask = vis_edge16(dp, dend); - vis_pst_16(vis_faligndata(dd1, dd2), dp++, emask & bmask); - if ((mlib_addr) dp <= (mlib_addr) dend) { - emask = vis_edge16(dp, dend); - vis_pst_16(vis_faligndata(dd2, dd3), dp++, emask & bmask); - } - } - } - } - } -} - -/***************************************************************/ - -void -mlib_v_ImageChannelInsert_S16_34R(mlib_s16 *src, mlib_s32 slb, - mlib_s16 *dst, mlib_s32 dlb, - mlib_s32 xsize, mlib_s32 ysize) -{ - mlib_s16 *sa, *da; - mlib_s16 *sl, *dl; - int j; - - sa = sl = src; - da = dl = dst; - -#pragma pipeloop(0) - for (j = 0; j < ysize; j++) { - mlib_v_ImageChannelInsert_S16_34R_D1(sa, da, xsize); - sa = sl = (mlib_s16 *)((mlib_u8 *)sl + slb); - da = dl = (mlib_s16 *)((mlib_u8 *)dl + dlb); - } -} - -/***************************************************************/ -#define INSERT_U8_34L \ - sda = vis_fpmerge(vis_read_hi(sd0), vis_read_lo(sd1)); \ - sdb = vis_fpmerge(vis_read_lo(sd0), vis_read_hi(sd2)); \ - sdc = vis_fpmerge(vis_read_hi(sd1), vis_read_lo(sd2)); \ - sdd = vis_fpmerge(vis_read_hi(sda), vis_read_lo(sdb)); \ - sde = vis_fpmerge(vis_read_lo(sda), vis_read_hi(sdc)); \ - sdf = vis_fpmerge(vis_read_hi(sdb), vis_read_lo(sdc)); \ - sdg = vis_fpmerge(vis_read_hi(sdd), vis_read_lo(sde)); \ - sdh = vis_fpmerge(vis_read_lo(sdd), vis_read_hi(sdf)); \ - sdi = vis_fpmerge(vis_read_hi(sde), vis_read_lo(sdf)); \ - sdj = vis_fpmerge(vis_read_hi(sdg), vis_read_hi(sdi)); \ - sdk = vis_fpmerge(vis_read_lo(sdg), vis_read_lo(sdi)); \ - sdl = vis_fpmerge(vis_read_hi(sdh), vis_read_hi(sdh)); \ - sdm = vis_fpmerge(vis_read_lo(sdh), vis_read_lo(sdh)); \ - dd0 = vis_fpmerge(vis_read_hi(sdj), vis_read_hi(sdl)); \ - dd1 = vis_fpmerge(vis_read_lo(sdj), vis_read_lo(sdl)); \ - dd2 = vis_fpmerge(vis_read_hi(sdk), vis_read_hi(sdm)); \ - dd3 = vis_fpmerge(vis_read_lo(sdk), vis_read_lo(sdm)); - -/***************************************************************/ -#define LOAD_INSERT_STORE_U8_34L_A8 \ - sd0 = *sp++; /* b0g0r0b1g1r1b2g2 */ \ - sd1 = *sp++; /* r2b3g3r3b4g4r4b5 */ \ - sd2 = *sp++; /* g5r5b6g6r6b7g7r7 */ \ - INSERT_U8_34L \ - vis_pst_8(dd0, dp++, bmask); \ - vis_pst_8(dd1, dp++, bmask); \ - vis_pst_8(dd2, dp++, bmask); \ - vis_pst_8(dd3, dp++, bmask); - -/***************************************************************/ -#define LOAD_INSERT_U8_34L \ - vis_alignaddr((void *)soff, 0); \ - s0 = s3; \ - s1 = sp[1]; \ - s2 = sp[2]; \ - s3 = sp[3]; \ - sd0 = vis_faligndata(s0, s1); \ - sd1 = vis_faligndata(s1, s2); \ - sd2 = vis_faligndata(s2, s3); \ - sp += 3; \ - dd4 = dd3; \ - INSERT_U8_34L - -/***************************************************************/ -/* - * Both source and destination image data are 1-d vectors and - * 8-byte aligned. And dsize is multiple of 8. - */ -void -mlib_v_ImageChannelInsert_U8_34L_A8D1X8(mlib_u8 *src, - mlib_u8 *dst, - mlib_s32 dsize) -{ - mlib_d64 *sp, *dp; - mlib_d64 sd0, sd1, sd2; /* source data */ - mlib_d64 dd0, dd1, dd2, dd3; /* dst data */ - mlib_d64 sda, sdb, sdc, sdd; /* intermediate variables */ - mlib_d64 sde, sdf, sdg, sdh; - mlib_d64 sdi, sdj, sdk, sdl; - mlib_d64 sdm; - int bmask = 0xee; - int i; - - sp = (mlib_d64 *)src; - dp = (mlib_d64 *)dst; - -#pragma pipeloop(0) - for (i = 0; i < dsize / 8; i++) { - LOAD_INSERT_STORE_U8_34L_A8; - } -} - -/***************************************************************/ -/* - * Either source or destination image data are not 1-d vectors, but - * they are 8-byte aligned. And slb and dlb are multiple of 8. - * The xsize is multiple of 8. - */ -void -mlib_v_ImageChannelInsert_U8_34L_A8D2X8(mlib_u8 *src, mlib_s32 slb, - mlib_u8 *dst, mlib_s32 dlb, - mlib_s32 xsize, mlib_s32 ysize) -{ - mlib_d64 *sp, *dp; /* 8-byte aligned pointer for pixel */ - mlib_d64 *sl, *dl; /* 8-byte aligned pointer for line */ - mlib_d64 sd0, sd1, sd2; /* source data */ - mlib_d64 dd0, dd1, dd2, dd3; /* dst data */ - mlib_d64 sda, sdb, sdc, sdd; /* intermediate variables */ - mlib_d64 sde, sdf, sdg, sdh; - mlib_d64 sdi, sdj, sdk, sdl; - mlib_d64 sdm; - int bmask = 0xee; - int i, j; /* indices for x, y */ - - sp = sl = (mlib_d64 *)src; - dp = dl = (mlib_d64 *)dst; - - /* row loop */ - for (j = 0; j < ysize; j++) { - /* 8-byte column loop */ -#pragma pipeloop(0) - for (i = 0; i < xsize / 8; i++) { - LOAD_INSERT_STORE_U8_34L_A8; - } - sp = sl = (mlib_d64 *)((mlib_u8 *)sl + slb); - dp = dl = (mlib_d64 *)((mlib_u8 *)dl + dlb); - } -} - -/***************************************************************/ -/* - * either source or destination data are not 8-byte aligned. - */ -void -mlib_v_ImageChannelInsert_U8_34L_D1(mlib_u8 *src, - mlib_u8 *dst, - mlib_s32 dsize) -{ - mlib_u8 *sa, *da; - mlib_u8 *dend, *dend2; /* end points in dst */ - mlib_d64 *dp; /* 8-byte aligned start points in dst */ - mlib_d64 *sp; /* 8-byte aligned start point in src */ - mlib_d64 s0, s1, s2, s3; /* 8-byte source raw data */ - mlib_d64 sd0, sd1, sd2; /* 8-byte source data */ - mlib_d64 dd0, dd1, dd2, dd3; /* dst data */ - mlib_d64 dd4; /* the last datum of the last step */ - mlib_d64 sda, sdb, sdc, sdd; /* intermediate variables */ - mlib_d64 sde, sdf, sdg, sdh; - mlib_d64 sdi, sdj, sdk, sdl; - mlib_d64 sdm; - int soff; /* offset of address in src */ - int doff; /* offset of address in dst */ - int emask; /* edge mask */ - int bmask; /* channel mask */ - int i, n; - - sa = src; - da = dst; - - /* prepare the source address */ - sp = (mlib_d64 *) ((mlib_addr) sa & (~7)); - soff = ((mlib_addr) sa & 7); - - /* prepare the destination addresses */ - dp = (mlib_d64 *)((mlib_addr) da & (~7)); - dend = da + dsize * 4 - 1; - dend2 = dend - 31; - doff = ((mlib_addr) da & 7); - - /* set band mask for vis_pst_8 to store the bytes needed */ - bmask = 0xff & (0xeeee >> doff) ; - - /* generate edge mask for the start point */ - emask = vis_edge8(da, dend); - - /* load 24 bytes, convert to 32 bytes */ - s3 = sp[0]; /* initial value */ - LOAD_INSERT_U8_34L; - - if (doff == 0) { /* dst is 8-byte aligned */ - - if (dsize >= 8 ) { - vis_pst_8(dd0, dp++, emask & bmask); - vis_pst_8(dd1, dp++, bmask); - vis_pst_8(dd2, dp++, bmask); - vis_pst_8(dd3, dp++, bmask); - } - else { /* for very small size */ - vis_pst_8(dd0, dp++, emask & bmask); - if ((mlib_addr) dp <= (mlib_addr) dend) { - emask = vis_edge8(dp, dend); - vis_pst_8(dd1, dp++, emask & bmask); - if ((mlib_addr) dp <= (mlib_addr) dend) { - emask = vis_edge8(dp, dend); - vis_pst_8(dd2, dp++, emask & bmask); - if ((mlib_addr) dp <= (mlib_addr) dend) { - emask = vis_edge8(dp, dend); - vis_pst_8(dd3, dp++, emask & bmask); - } - } - } - } - - /* no edge handling is needed in the loop */ - if ((mlib_addr) dp <= (mlib_addr) dend2) { - n = ((mlib_u8 *)dend2 - (mlib_u8 *)dp) / 32 + 1; -#pragma pipeloop(0) - for (i = 0; i < n; i++) { - LOAD_INSERT_U8_34L; - vis_pst_8(dd0, dp++, bmask); - vis_pst_8(dd1, dp++, bmask); - vis_pst_8(dd2, dp++, bmask); - vis_pst_8(dd3, dp++, bmask); - } - } - - if ((mlib_addr) dp <= (mlib_addr) dend) { - LOAD_INSERT_U8_34L; - emask = vis_edge8(dp, dend); - vis_pst_8(dd0, dp++, emask & bmask); - if ((mlib_addr) dp <= (mlib_addr) dend) { - emask = vis_edge8(dp, dend); - vis_pst_8(dd1, dp++, emask & bmask); - if ((mlib_addr) dp <= (mlib_addr) dend) { - emask = vis_edge8(dp, dend); - vis_pst_8(dd2, dp++, emask & bmask); - if ((mlib_addr) dp <= (mlib_addr) dend) { - emask = vis_edge8(dp, dend); - vis_pst_8(dd3, dp++, emask & bmask); - } - } - } - } - } - else { /* (doff != 0) */ - vis_alignaddr((void *)0, -doff); - - if (dsize >= 8 ) { - vis_pst_8(vis_faligndata(dd0, dd0), dp++, emask & bmask); - vis_pst_8(vis_faligndata(dd0, dd1), dp++, bmask); - vis_pst_8(vis_faligndata(dd1, dd2), dp++, bmask); - vis_pst_8(vis_faligndata(dd2, dd3), dp++, bmask); - } - else { /* for very small size */ - vis_pst_8(vis_faligndata(dd0, dd0), dp++, emask & bmask); - if ((mlib_addr) dp <= (mlib_addr) dend) { - emask = vis_edge8(dp, dend); - vis_pst_8(vis_faligndata(dd0, dd1), dp++, emask & bmask); - if ((mlib_addr) dp <= (mlib_addr) dend) { - emask = vis_edge8(dp, dend); - vis_pst_8(vis_faligndata(dd1, dd2), dp++, emask & bmask); - if ((mlib_addr) dp <= (mlib_addr) dend) { - emask = vis_edge8(dp, dend); - vis_pst_8(vis_faligndata(dd2, dd3), dp++, emask & bmask); - if ((mlib_addr) dp <= (mlib_addr) dend) { - emask = vis_edge8(dp, dend); - vis_pst_8(vis_faligndata(dd3, dd3), dp++, emask & bmask); - } - } - } - } - } - - /* no edge handling is needed in the loop */ - if ((mlib_addr) dp <= (mlib_addr) dend2) { - n = ((mlib_u8 *)dend2 - (mlib_u8 *)dp) / 32 + 1; -#pragma pipeloop(0) - for (i = 0; i < n; i++) { - LOAD_INSERT_U8_34L; - vis_alignaddr((void *)0, -doff); - vis_pst_8(vis_faligndata(dd4, dd0), dp++, bmask); - vis_pst_8(vis_faligndata(dd0, dd1), dp++, bmask); - vis_pst_8(vis_faligndata(dd1, dd2), dp++, bmask); - vis_pst_8(vis_faligndata(dd2, dd3), dp++, bmask); - } - } - - if ((mlib_addr) dp <= (mlib_addr) dend) { - LOAD_INSERT_U8_34L; - vis_alignaddr((void *)0, -doff); - emask = vis_edge8(dp, dend); - vis_pst_8(vis_faligndata(dd4, dd0), dp++, emask & bmask); - if ((mlib_addr) dp <= (mlib_addr) dend) { - emask = vis_edge8(dp, dend); - vis_pst_8(vis_faligndata(dd0, dd1), dp++, emask & bmask); - if ((mlib_addr) dp <= (mlib_addr) dend) { - emask = vis_edge8(dp, dend); - vis_pst_8(vis_faligndata(dd1, dd2), dp++, emask & bmask); - if ((mlib_addr) dp <= (mlib_addr) dend) { - emask = vis_edge8(dp, dend); - vis_pst_8(vis_faligndata(dd2, dd3), dp++, emask & bmask); - } - } - } - } - } -} - -/***************************************************************/ -void -mlib_v_ImageChannelInsert_U8_34L(mlib_u8 *src, mlib_s32 slb, - mlib_u8 *dst, mlib_s32 dlb, - mlib_s32 xsize, mlib_s32 ysize) -{ - mlib_u8 *sa, *da; - mlib_u8 *sl, *dl; - int j; - - sa = sl = src; - da = dl = dst; - -#pragma pipeloop(0) - for (j = 0; j < ysize; j++) { - mlib_v_ImageChannelInsert_U8_34L_D1(sa, da, xsize); - sa = sl += slb; - da = dl += dlb; - } -} - -/***************************************************************/ -#define INSERT_S16_34L \ - dd0 = sd0; /* b0g0r0b1 */ \ - vis_alignaddr((void *)0, 6); \ - dd1 = vis_faligndata(sd0, sd1); /* b1gbr1b2 */ \ - vis_alignaddr((void *)0, 4); \ - dd2 = vis_faligndata(sd1, sd2); /* b2g2r2b3 */ \ - vis_alignaddr((void *)0, 2); \ - dd3 = vis_faligndata(sd2, sd2); /* b3g3r3r2 */ - -/***************************************************************/ -#define LOAD_INSERT_STORE_S16_34L_A8 \ - sd0 = *sp++; /* b0g0r0b1 */ \ - sd1 = *sp++; /* g1r1b2g2 */ \ - sd2 = *sp++; /* r2b3g3r3 */ \ - INSERT_S16_34L \ - vis_pst_16(dd0, dp++, bmask); \ - vis_pst_16(dd1, dp++, bmask); \ - vis_pst_16(dd2, dp++, bmask); \ - vis_pst_16(dd3, dp++, bmask); - -/***************************************************************/ -#define LOAD_INSERT_S16_34L \ - vis_alignaddr((void *)soff, 0); \ - s0 = s3; \ - s1 = sp[1]; \ - s2 = sp[2]; \ - s3 = sp[3]; \ - sd0 = vis_faligndata(s0, s1); \ - sd1 = vis_faligndata(s1, s2); \ - sd2 = vis_faligndata(s2, s3); \ - sp += 3; \ - dd4 = dd3; \ - INSERT_S16_34L - -/***************************************************************/ -/* - * both source and destination image data are 1-d vectors and - * 8-byte aligned. dsize is multiple of 4. - */ - -void -mlib_v_ImageChannelInsert_S16_34L_A8D1X4(mlib_s16 *src, - mlib_s16 *dst, - mlib_s32 dsize) -{ - mlib_d64 *sp, *dp; /* 8-byte aligned pointer for pixel */ - mlib_d64 sd0, sd1, sd2; /* source data */ - mlib_d64 dd0, dd1, dd2, dd3; /* dst data */ - int bmask = 0x0e; /* channel mask */ - int i; - - sp = (mlib_d64 *)src; - dp = (mlib_d64 *)dst; - -#pragma pipeloop(0) - for (i = 0; i < dsize / 4; i++) { - LOAD_INSERT_STORE_S16_34L_A8; - } -} - -/***************************************************************/ -/* - * either source or destination image data are not 1-d vectors, but - * they are 8-byte aligned. xsize is multiple of 4. - */ - -void -mlib_v_ImageChannelInsert_S16_34L_A8D2X4(mlib_s16 *src, mlib_s32 slb, - mlib_s16 *dst, mlib_s32 dlb, - mlib_s32 xsize, mlib_s32 ysize) -{ - mlib_d64 *sp, *dp; /* 8-byte aligned pointer for pixel */ - mlib_d64 *sl, *dl; /* 8-byte aligned pointer for line */ - mlib_d64 sd0, sd1, sd2; /* source data */ - mlib_d64 dd0, dd1, dd2, dd3; /* dst data */ - int bmask = 0x0e; /* channel mask */ - int i, j; /* indices for x, y */ - - sp = sl = (mlib_d64 *)src; - dp = dl = (mlib_d64 *)dst; - - /* row loop */ - for (j = 0; j < ysize; j++) { - /* 4-pixel column loop */ -#pragma pipeloop(0) - for (i = 0; i < xsize / 4; i++) { - LOAD_INSERT_STORE_S16_34L_A8; - } - sp = sl = (mlib_d64 *)((mlib_u8 *)sl + slb); - dp = dl = (mlib_d64 *)((mlib_u8 *)dl + dlb); - } -} - -/***************************************************************/ -/* - * either source or destination data are not 8-byte aligned. - */ - -void -mlib_v_ImageChannelInsert_S16_34L_D1(mlib_s16 *src, - mlib_s16 *dst, - mlib_s32 dsize) -{ - mlib_s16 *sa, *da; /* pointer for pixel */ - mlib_s16 *dend, *dend2; /* end points in dst */ - mlib_d64 *dp; /* 8-byte aligned start points in dst */ - mlib_d64 *sp; /* 8-byte aligned start point in src */ - mlib_d64 s0, s1, s2, s3; /* 8-byte source raw data */ - mlib_d64 sd0, sd1, sd2; /* 8-byte source data */ - mlib_d64 dd0, dd1, dd2, dd3; /* dst data */ - mlib_d64 dd4; /* the last datum of the last step */ - int soff; /* offset of address in src */ - int doff; /* offset of address in dst */ - int emask; /* edge mask */ - int bmask; /* channel mask */ - int i, n; - - sa = src; - da = dst; - - /* prepare the source address */ - sp = (mlib_d64 *) ((mlib_addr) sa & (~7)); - soff = ((mlib_addr) sa & 7); - - /* prepare the destination addresses */ - dp = (mlib_d64 *)((mlib_addr) da & (~7)); - dend = da + dsize * 4 - 1; - dend2 = dend - 15; - doff = ((mlib_addr) da & 7); - - /* set channel mask for vis_pst_16 to store the words needed */ - bmask = 0xff & (0xee >> (doff / 2)); - - /* generate edge mask for the start point */ - emask = vis_edge16(da, dend); - - /* load 24 byte, convert, store 32 bytes */ - s3 = sp[0]; /* initial value */ - LOAD_INSERT_S16_34L; - - if (doff == 0) { /* dst is 8-byte aligned */ - - if (dsize >= 4 ) { - vis_pst_16(dd0, dp++, emask & bmask); - vis_pst_16(dd1, dp++, bmask); - vis_pst_16(dd2, dp++, bmask); - vis_pst_16(dd3, dp++, bmask); - } - else { /* for very small size */ - vis_pst_16(dd0, dp++, emask & bmask); - if ((mlib_addr) dp <= (mlib_addr) dend) { - emask = vis_edge16(dp, dend); - vis_pst_16(dd1, dp++, emask & bmask); - if ((mlib_addr) dp <= (mlib_addr) dend) { - emask = vis_edge16(dp, dend); - vis_pst_16(dd2, dp++, emask & bmask); - } - } - } - - /* no edge handling is needed in the loop */ - if ((mlib_addr) dp <= (mlib_addr) dend2) { - n = ((mlib_u8 *)dend2 - (mlib_u8 *)dp) / 32 + 1; -#pragma pipeloop(0) - for (i = 0; i < n; i++) { - LOAD_INSERT_S16_34L; - vis_pst_16(dd0, dp++, bmask); - vis_pst_16(dd1, dp++, bmask); - vis_pst_16(dd2, dp++, bmask); - vis_pst_16(dd3, dp++, bmask); - } - } - - if ((mlib_addr) dp <= (mlib_addr) dend) { - LOAD_INSERT_S16_34L; - emask = vis_edge16(dp, dend); - vis_pst_16(dd0, dp++, emask & bmask); - if ((mlib_addr) dp <= (mlib_addr) dend) { - emask = vis_edge16(dp, dend); - vis_pst_16(dd1, dp++, emask & bmask); - if ((mlib_addr) dp <= (mlib_addr) dend) { - emask = vis_edge16(dp, dend); - vis_pst_16(dd2, dp++, emask & bmask); - } - } - } - } - else { /* (doff != 0) */ - vis_alignaddr((void *)0, -doff); - - if (dsize >= 4 ) { - vis_pst_16(vis_faligndata(dd0, dd0), dp++, emask & bmask); - vis_pst_16(vis_faligndata(dd0, dd1), dp++, bmask); - vis_pst_16(vis_faligndata(dd1, dd2), dp++, bmask); - vis_pst_16(vis_faligndata(dd2, dd3), dp++, bmask); - } - else { /* for very small size */ - vis_pst_16(vis_faligndata(dd0, dd0), dp++, emask & bmask); - if ((mlib_addr) dp <= (mlib_addr) dend) { - emask = vis_edge16(dp, dend); - vis_pst_16(vis_faligndata(dd0, dd1), dp++, emask & bmask); - if ((mlib_addr) dp <= (mlib_addr) dend) { - emask = vis_edge16(dp, dend); - vis_pst_16(vis_faligndata(dd1, dd2), dp++, emask & bmask); - if ((mlib_addr) dp <= (mlib_addr) dend) { - emask = vis_edge16(dp, dend); - vis_pst_16(vis_faligndata(dd2, dd3), dp++, emask & bmask); - } - } - } - } - - /* no edge handling is needed in the loop */ - if ((mlib_addr) dp <= (mlib_addr) dend2) { - n = ((mlib_u8 *)dend2 - (mlib_u8 *)dp) / 32 + 1; -#pragma pipeloop(0) - for (i = 0; i < n; i++) { - LOAD_INSERT_S16_34L; - vis_alignaddr((void *)0, -doff); - vis_pst_16(vis_faligndata(dd4, dd0), dp++, bmask); - vis_pst_16(vis_faligndata(dd0, dd1), dp++, bmask); - vis_pst_16(vis_faligndata(dd1, dd2), dp++, bmask); - vis_pst_16(vis_faligndata(dd2, dd3), dp++, bmask); - } - } - - if ((mlib_addr) dp <= (mlib_addr) dend) { - LOAD_INSERT_S16_34L; - vis_alignaddr((void *)0, -doff); - emask = vis_edge16(dp, dend); - vis_pst_16(vis_faligndata(dd4, dd0), dp++, emask & bmask); - if ((mlib_addr) dp <= (mlib_addr) dend) { - emask = vis_edge16(dp, dend); - vis_pst_16(vis_faligndata(dd0, dd1), dp++, emask & bmask); - if ((mlib_addr) dp <= (mlib_addr) dend) { - emask = vis_edge16(dp, dend); - vis_pst_16(vis_faligndata(dd1, dd2), dp++, emask & bmask); - if ((mlib_addr) dp <= (mlib_addr) dend) { - emask = vis_edge16(dp, dend); - vis_pst_16(vis_faligndata(dd2, dd3), dp++, emask & bmask); - } - } - } - } - } -} - -/***************************************************************/ - -void -mlib_v_ImageChannelInsert_S16_34L(mlib_s16 *src, mlib_s32 slb, - mlib_s16 *dst, mlib_s32 dlb, - mlib_s32 xsize, mlib_s32 ysize) -{ - mlib_s16 *sa, *da; - mlib_s16 *sl, *dl; - int j; - - sa = sl = src; - da = dl = dst; - -#pragma pipeloop(0) - for (j = 0; j < ysize; j++) { - mlib_v_ImageChannelInsert_S16_34L_D1(sa, da, xsize); - sa = sl = (mlib_s16 *)((mlib_u8 *)sl + slb); - da = dl = (mlib_s16 *)((mlib_u8 *)dl + dlb); - } -} - -/***************************************************************/ diff -r 3e22d8fd4912 -r acea5f7d354b jdk/src/java.desktop/unix/native/libmlib_image/mlib_v_ImageConv.h --- a/jdk/src/java.desktop/unix/native/libmlib_image/mlib_v_ImageConv.h Thu May 12 11:03:07 2016 -0700 +++ b/jdk/src/java.desktop/unix/native/libmlib_image/mlib_v_ImageConv.h Fri May 13 11:31:05 2016 +0300 @@ -34,18 +34,6 @@ #if defined ( VIS ) && VIS == 0x200 -mlib_status mlib_conv2x2_8nw_f(mlib_image *dst, - const mlib_image *src, - const mlib_s32 *kern, - mlib_s32 scale, - mlib_s32 cmask); - -mlib_status mlib_conv3x3_8nw_f(mlib_image *dst, - const mlib_image *src, - const mlib_s32 *kern, - mlib_s32 scale, - mlib_s32 cmask); - mlib_status mlib_convMxN_8nw_f(mlib_image *dst, const mlib_image *src, mlib_s32 m, @@ -58,16 +46,6 @@ #else -mlib_status mlib_conv2x2_8nw_f(mlib_image *dst, - const mlib_image *src, - const mlib_s32 *kern, - mlib_s32 scale); - -mlib_status mlib_conv3x3_8nw_f(mlib_image *dst, - const mlib_image *src, - const mlib_s32 *kern, - mlib_s32 scale); - mlib_status mlib_convMxN_8nw_f(mlib_image *dst, const mlib_image *src, mlib_s32 m, diff -r 3e22d8fd4912 -r acea5f7d354b jdk/src/java.desktop/unix/native/libmlib_image/mlib_v_ImageConvIndex3_8_16nw.c --- a/jdk/src/java.desktop/unix/native/libmlib_image/mlib_v_ImageConvIndex3_8_16nw.c Thu May 12 11:03:07 2016 -0700 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,1673 +0,0 @@ -/* - * Copyright (c) 2000, 2003, Oracle and/or its affiliates. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 only, as - * published by the Free Software Foundation. Oracle designates this - * particular file as subject to the "Classpath" exception as provided - * by Oracle in the LICENSE file that accompanied this code. - * - * This code is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * version 2 for more details (a copy is included in the LICENSE file that - * accompanied this code). - * - * You should have received a copy of the GNU General Public License version - * 2 along with this work; if not, write to the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - * - * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA - * or visit www.oracle.com if you need additional information or have any - * questions. - */ - - - -/* - * FUNCTION - * Internal functions for mlib_ImageConv* on U8 type - * and MLIB_EDGE_DST_NO_WRITE mask - * - */ - -/***************************************************************/ - -#include -#include -#include -#include - -/* - This defines switches between functions in - files: mlib_v_ImageConv_8nw.c, - mlib_v_ImageConvIndex3_8_16nw.c, - mlib_v_ImageConvIndex4_8_16nw.c, - mlib_v_ImageConvIndex3_8_16nw.c, - mlib_v_ImageConvIndex4_8_16nw.c -*/ - -#define CONV_INDEX - -#define DTYPE mlib_s16 -#define LTYPE mlib_u8 - -/***************************************************************/ - -#ifdef CONV_INDEX - -#define CONV_FUNC(KERN) \ - mlib_conv##KERN##_Index3_8_16nw(mlib_image *dst, \ - mlib_image *src, \ - mlib_s32 *kern, \ - mlib_s32 scale, \ - void *colormap) - -#else - -#define CONV_FUNC(KERN) \ - mlib_conv##KERN##_8nw_f(mlib_image *dst, \ - mlib_image *src, \ - mlib_s32 *kern, \ - mlib_s32 scale) - -#endif - -/***************************************************************/ - -#ifdef CONV_INDEX - -#define NCHAN 3 - -#else - -#define NCHAN nchan - -#endif - -/***************************************************************/ - -#define DEF_VARS \ - DTYPE *sl, *sp, *dl; \ - mlib_s32 hgt = mlib_ImageGetHeight(src); \ - mlib_s32 wid = mlib_ImageGetWidth(src); \ - mlib_s32 sll = mlib_ImageGetStride(src) / sizeof(DTYPE); \ - mlib_s32 dll = mlib_ImageGetStride(dst) / sizeof(DTYPE); \ - DTYPE *adr_src = (DTYPE *)mlib_ImageGetData(src); \ - DTYPE *adr_dst = (DTYPE *)mlib_ImageGetData(dst); \ - mlib_s32 ssize, xsize, dsize, esize, emask, buff_ind = 0; \ - mlib_d64 *pbuff, *dp; \ - mlib_f32 *karr = (mlib_f32 *)kern; \ - mlib_s32 gsr_scale = (31 - scale) << 3; \ - mlib_d64 drnd = vis_to_double_dup(mlib_round_8[31 - scale]); \ - mlib_s32 i, j, l - -/***************************************************************/ - -#ifdef CONV_INDEX - -#define DEF_EXTRA_VARS \ - int offset = mlib_ImageGetLutOffset(colormap); \ - LTYPE **lut_table = (LTYPE**)mlib_ImageGetLutData(colormap); \ - LTYPE *ltbl0 = lut_table[0] - offset; \ - LTYPE *ltbl1 = lut_table[1] - offset; \ - LTYPE *ltbl2 = lut_table[2] - offset; \ - LTYPE *ltbl3 = (NCHAN > 3) ? lut_table[3] - offset : ltbl2 - -#else - -#define DEF_EXTRA_VARS \ - mlib_s32 nchan = mlib_ImageGetChannels(dst) - -#endif - -/***************************************************************/ - -#if NCHAN == 3 - -#define LOAD_SRC() { \ - mlib_s32 s0 = sp[0], s1 = sp[1], s2 = sp[2], s3 = sp[3]; \ - mlib_s32 s4 = sp[4], s5 = sp[5], s6 = sp[6], s7 = sp[7]; \ - mlib_d64 t0, t1, t2; \ - \ - t2 = vis_faligndata(vis_ld_u8_i(ltbl2, s7), t2); \ - t2 = vis_faligndata(vis_ld_u8_i(ltbl1, s7), t2); \ - t2 = vis_faligndata(vis_ld_u8_i(ltbl0, s7), t2); \ - t2 = vis_faligndata(vis_ld_u8_i(ltbl2, s6), t2); \ - t2 = vis_faligndata(vis_ld_u8_i(ltbl1, s6), t2); \ - t2 = vis_faligndata(vis_ld_u8_i(ltbl0, s6), t2); \ - t2 = vis_faligndata(vis_ld_u8_i(ltbl2, s5), t2); \ - t2 = vis_faligndata(vis_ld_u8_i(ltbl1, s5), t2); \ - t1 = vis_faligndata(vis_ld_u8_i(ltbl0, s5), t1); \ - t1 = vis_faligndata(vis_ld_u8_i(ltbl2, s4), t1); \ - t1 = vis_faligndata(vis_ld_u8_i(ltbl1, s4), t1); \ - t1 = vis_faligndata(vis_ld_u8_i(ltbl0, s4), t1); \ - t1 = vis_faligndata(vis_ld_u8_i(ltbl2, s3), t1); \ - t1 = vis_faligndata(vis_ld_u8_i(ltbl1, s3), t1); \ - t1 = vis_faligndata(vis_ld_u8_i(ltbl0, s3), t1); \ - t1 = vis_faligndata(vis_ld_u8_i(ltbl2, s2), t1); \ - t0 = vis_faligndata(vis_ld_u8_i(ltbl1, s2), t0); \ - t0 = vis_faligndata(vis_ld_u8_i(ltbl0, s2), t0); \ - t0 = vis_faligndata(vis_ld_u8_i(ltbl2, s1), t0); \ - t0 = vis_faligndata(vis_ld_u8_i(ltbl1, s1), t0); \ - t0 = vis_faligndata(vis_ld_u8_i(ltbl0, s1), t0); \ - t0 = vis_faligndata(vis_ld_u8_i(ltbl2, s0), t0); \ - t0 = vis_faligndata(vis_ld_u8_i(ltbl1, s0), t0); \ - t0 = vis_faligndata(vis_ld_u8_i(ltbl0, s0), t0); \ - \ - buffn[i] = t0; \ - buffn[i + 1] = t1; \ - buffn[i + 2] = t2; \ - \ - sp += 8; \ - } - -#else - -#define LOAD_SRC() { \ - mlib_s32 s0 = sp[0], s1 = sp[1], s2 = sp[2], s3 = sp[3]; \ - mlib_s32 s4 = sp[4], s5 = sp[5], s6 = sp[6], s7 = sp[7]; \ - mlib_d64 t0, t1, t2; \ - \ - t2 = vis_faligndata(vis_ld_u8_i(ltbl3, s5), t2); \ - t2 = vis_faligndata(vis_ld_u8_i(ltbl2, s5), t2); \ - t2 = vis_faligndata(vis_ld_u8_i(ltbl1, s5), t2); \ - t2 = vis_faligndata(vis_ld_u8_i(ltbl0, s5), t2); \ - t2 = vis_faligndata(vis_ld_u8_i(ltbl3, s4), t2); \ - t2 = vis_faligndata(vis_ld_u8_i(ltbl2, s4), t2); \ - t2 = vis_faligndata(vis_ld_u8_i(ltbl1, s4), t2); \ - t2 = vis_faligndata(vis_ld_u8_i(ltbl0, s4), t2); \ - t1 = vis_faligndata(vis_ld_u8_i(ltbl3, s3), t1); \ - t1 = vis_faligndata(vis_ld_u8_i(ltbl2, s3), t1); \ - t1 = vis_faligndata(vis_ld_u8_i(ltbl1, s3), t1); \ - t1 = vis_faligndata(vis_ld_u8_i(ltbl0, s3), t1); \ - t1 = vis_faligndata(vis_ld_u8_i(ltbl3, s2), t1); \ - t1 = vis_faligndata(vis_ld_u8_i(ltbl2, s2), t1); \ - t1 = vis_faligndata(vis_ld_u8_i(ltbl1, s2), t1); \ - t1 = vis_faligndata(vis_ld_u8_i(ltbl0, s2), t1); \ - t0 = vis_faligndata(vis_ld_u8_i(ltbl3, s1), t0); \ - t0 = vis_faligndata(vis_ld_u8_i(ltbl2, s1), t0); \ - t0 = vis_faligndata(vis_ld_u8_i(ltbl1, s1), t0); \ - t0 = vis_faligndata(vis_ld_u8_i(ltbl0, s1), t0); \ - t0 = vis_faligndata(vis_ld_u8_i(ltbl3, s0), t0); \ - t0 = vis_faligndata(vis_ld_u8_i(ltbl2, s0), t0); \ - t0 = vis_faligndata(vis_ld_u8_i(ltbl1, s0), t0); \ - t0 = vis_faligndata(vis_ld_u8_i(ltbl0, s0), t0); \ - \ - buffn[i] = t0; \ - buffn[i + 1] = t1; \ - buffn[i + 2] = t2; \ - \ - sp += 6; \ - } - -#endif - -/***************************************************************/ - -static mlib_s32 mlib_round_8[16] = { 0x00400040, 0x00200020, 0x00100010, 0x00080008, - 0x00040004, 0x00020002, 0x00010001, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000000 }; - -/***************************************************************/ - -void mlib_ImageCopy_na(mlib_u8 *sa, mlib_u8 *da, int size); - -/***************************************************************/ - -#define KSIZE 2 - -mlib_status CONV_FUNC(2x2) -{ - mlib_d64 *buffs[2*(KSIZE + 1)]; - mlib_d64 *buff0, *buff1, *buffn, *buffd, *buffe; - mlib_d64 s00, s01, s10, s11, s0, s1; - mlib_d64 d0, d1, d00, d01, d10, d11; - DEF_VARS; - DEF_EXTRA_VARS; - - sl = adr_src; - dl = adr_dst; - - ssize = NCHAN*wid; - dsize = (ssize + 7)/8; - esize = dsize + 4; - pbuff = mlib_malloc((KSIZE + 4)*esize*sizeof(mlib_d64)); - if (pbuff == NULL) return MLIB_FAILURE; - - for (i = 0; i < (KSIZE + 1); i++) buffs[i] = pbuff + i*esize; - for (i = 0; i < (KSIZE + 1); i++) buffs[(KSIZE + 1) + i] = buffs[i]; - buffd = buffs[KSIZE] + esize; - buffe = buffd + 2*esize; - - wid -= (KSIZE - 1); - hgt -= (KSIZE - 1); - xsize = ssize - NCHAN*(KSIZE - 1); - emask = (0xFF00 >> (xsize & 7)) & 0xFF; - - vis_write_gsr(gsr_scale + 7); - - for (l = 0; l < KSIZE; l++) { - mlib_d64 *buffn = buffs[l]; - sp = sl + l*sll; - -#ifndef CONV_INDEX - if ((mlib_addr)sp & 7) mlib_ImageCopy_na((void*)sp, (void*)buffn, ssize); - -#else -#pragma pipeloop(0) - for (i = 0; i < dsize; i += 3) { - LOAD_SRC(); - } -#endif /* CONV_INDEX */ - } - - for (j = 0; j < hgt; j++) { - mlib_d64 **buffc = buffs + buff_ind; - mlib_f32 *pk = karr, k0, k1; - sp = sl + KSIZE*sll; - - buff0 = buffc[0]; - buff1 = buffc[1]; - buffn = buffc[KSIZE]; - -#ifndef CONV_INDEX - if ((((mlib_addr)(sl )) & 7) == 0) buff0 = (mlib_d64*)sl; - if ((((mlib_addr)(sl + sll)) & 7) == 0) buff1 = (mlib_d64*)(sl + sll); - if ((mlib_addr)sp & 7) mlib_ImageCopy_na((void*)sp, (void*)buffn, ssize); -#endif - - k0 = pk[1]; - k1 = pk[3]; - vis_write_gsr(gsr_scale + NCHAN); - - s01 = buff0[0]; - s11 = buff1[0]; -#pragma pipeloop(0) - for (i = 0; i < (xsize + 7)/8; i++) { - s00 = s01; - s10 = s11; - s01 = buff0[i + 1]; - s11 = buff1[i + 1]; - s0 = vis_faligndata(s00, s01); - s1 = vis_faligndata(s10, s11); - - d00 = vis_fmul8x16au(vis_read_hi(s0), k0); - d01 = vis_fmul8x16au(vis_read_lo(s0), k0); - d10 = vis_fmul8x16au(vis_read_hi(s1), k1); - d11 = vis_fmul8x16au(vis_read_lo(s1), k1); - - d0 = vis_fpadd16(d00, d10); - d1 = vis_fpadd16(d01, d11); - buffd[2*i] = d0; - buffd[2*i + 1] = d1; - } - - k0 = pk[0]; - k1 = pk[2]; -#ifndef CONV_INDEX - dp = ((mlib_addr)dl & 7) ? buffe : (mlib_d64*)dl; - -#pragma pipeloop(0) - for (i = 0; i < xsize/8; i++) { - s0 = buff0[i]; - s1 = buff1[i]; - - d00 = vis_fmul8x16au(vis_read_hi(s0), k0); - d01 = vis_fmul8x16au(vis_read_lo(s0), k0); - d10 = vis_fmul8x16au(vis_read_hi(s1), k1); - d11 = vis_fmul8x16au(vis_read_lo(s1), k1); - - d0 = buffd[2*i]; - d1 = buffd[2*i + 1]; - d00 = vis_fpadd16(d00, d10); - d0 = vis_fpadd16(d0, drnd); - d0 = vis_fpadd16(d0, d00); - d01 = vis_fpadd16(d01, d11); - d1 = vis_fpadd16(d1, drnd); - d1 = vis_fpadd16(d1, d01); - dp[i] = vis_fpack16_pair(d0, d1); - } - - if (emask) { - s0 = buff0[i]; - s1 = buff1[i]; - - d00 = vis_fmul8x16au(vis_read_hi(s0), k0); - d01 = vis_fmul8x16au(vis_read_lo(s0), k0); - d10 = vis_fmul8x16au(vis_read_hi(s1), k1); - d11 = vis_fmul8x16au(vis_read_lo(s1), k1); - - d0 = buffd[2*i]; - d1 = buffd[2*i + 1]; - d00 = vis_fpadd16(d00, d10); - d0 = vis_fpadd16(d0, drnd); - d0 = vis_fpadd16(d0, d00); - d01 = vis_fpadd16(d01, d11); - d1 = vis_fpadd16(d1, drnd); - d1 = vis_fpadd16(d1, d01); - - d0 = vis_fpack16_pair(d0, d1); - vis_pst_8(d0, dp + i, emask); - } - - if ((mlib_u8*)dp != dl) mlib_ImageCopy_na((void*)buffe, dl, xsize); - -#else - vis_write_gsr(gsr_scale + 7); - -#pragma pipeloop(0) - for (i = 0; i < dsize; i += 3) { - mlib_d64 d00, d01, d02, d03, d04, d05; - mlib_d64 d10, d11, d12, d13, d14, d15; - mlib_d64 d0, d1, d2, d3, d4, d5; - mlib_d64 s00 = buff0[i]; - mlib_d64 s01 = buff0[i + 1]; - mlib_d64 s02 = buff0[i + 2]; - mlib_d64 s10 = buff1[i]; - mlib_d64 s11 = buff1[i + 1]; - mlib_d64 s12 = buff1[i + 2]; - - d00 = vis_fmul8x16au(vis_read_hi(s00), k0); - d01 = vis_fmul8x16au(vis_read_lo(s00), k0); - d02 = vis_fmul8x16au(vis_read_hi(s01), k0); - d03 = vis_fmul8x16au(vis_read_lo(s01), k0); - d04 = vis_fmul8x16au(vis_read_hi(s02), k0); - d05 = vis_fmul8x16au(vis_read_lo(s02), k0); - d10 = vis_fmul8x16au(vis_read_hi(s10), k1); - d11 = vis_fmul8x16au(vis_read_lo(s10), k1); - d12 = vis_fmul8x16au(vis_read_hi(s11), k1); - d13 = vis_fmul8x16au(vis_read_lo(s11), k1); - d14 = vis_fmul8x16au(vis_read_hi(s12), k1); - d15 = vis_fmul8x16au(vis_read_lo(s12), k1); - - d0 = buffd[2*i]; - d1 = buffd[2*i + 1]; - d2 = buffd[2*i + 2]; - d3 = buffd[2*i + 3]; - d4 = buffd[2*i + 4]; - d5 = buffd[2*i + 5]; - d00 = vis_fpadd16(d00, d10); - d0 = vis_fpadd16(d0, drnd); - d0 = vis_fpadd16(d0, d00); - d01 = vis_fpadd16(d01, d11); - d1 = vis_fpadd16(d1, drnd); - d1 = vis_fpadd16(d1, d01); - d02 = vis_fpadd16(d02, d12); - d2 = vis_fpadd16(d2, drnd); - d2 = vis_fpadd16(d2, d02); - d03 = vis_fpadd16(d03, d13); - d3 = vis_fpadd16(d3, drnd); - d3 = vis_fpadd16(d3, d03); - d04 = vis_fpadd16(d04, d14); - d4 = vis_fpadd16(d4, drnd); - d4 = vis_fpadd16(d4, d04); - d05 = vis_fpadd16(d05, d15); - d5 = vis_fpadd16(d5, drnd); - d5 = vis_fpadd16(d5, d05); - - buffe[i ] = vis_fpack16_pair(d0, d1); - buffe[i + 1] = vis_fpack16_pair(d2, d3); - buffe[i + 2] = vis_fpack16_pair(d4, d5); - - LOAD_SRC(); - } - - mlib_ImageColorTrue2IndexLine_U8_S16_3((void*)buffe, dl, wid, colormap); -#endif /* CONV_INDEX */ - - sl += sll; - dl += dll; - - buff_ind++; - if (buff_ind >= (KSIZE + 1)) buff_ind = 0; - } - - mlib_free(pbuff); - - return MLIB_SUCCESS; -} - -/***************************************************************/ - -#undef KSIZE -#define KSIZE 3 - -mlib_status CONV_FUNC(3x3) -{ - mlib_d64 *buffs[2*(KSIZE + 1)]; - mlib_d64 *buff0, *buff1, *buff2, *buffn, *buffd, *buffe; - mlib_d64 s00, s01, s10, s11, s20, s21, s0, s1, s2; - mlib_d64 dd, d0, d1, d00, d01, d10, d11, d20, d21; - mlib_s32 ik, ik_last, off, doff; - DEF_VARS; - DEF_EXTRA_VARS; - - sl = adr_src; -#ifdef CONV_INDEX - dl = adr_dst + ((KSIZE - 1)/2)*(dll + 1); -#else - dl = adr_dst + ((KSIZE - 1)/2)*(dll + NCHAN); -#endif - - ssize = NCHAN*wid; - dsize = (ssize + 7)/8; - esize = dsize + 4; - pbuff = mlib_malloc((KSIZE + 4)*esize*sizeof(mlib_d64)); - if (pbuff == NULL) return MLIB_FAILURE; - - for (i = 0; i < (KSIZE + 1); i++) buffs[i] = pbuff + i*esize; - for (i = 0; i < (KSIZE + 1); i++) buffs[(KSIZE + 1) + i] = buffs[i]; - buffd = buffs[KSIZE] + esize; - buffe = buffd + 2*esize; - - wid -= (KSIZE - 1); - hgt -= (KSIZE - 1); - xsize = ssize - NCHAN*(KSIZE - 1); - emask = (0xFF00 >> (xsize & 7)) & 0xFF; - - vis_write_gsr(gsr_scale + 7); - - for (l = 0; l < KSIZE; l++) { - mlib_d64 *buffn = buffs[l]; - sp = sl + l*sll; - -#ifndef CONV_INDEX - if ((mlib_addr)sp & 7) mlib_ImageCopy_na((void*)sp, (void*)buffn, ssize); -#else -#pragma pipeloop(0) - for (i = 0; i < dsize; i += 3) { - LOAD_SRC(); - } -#endif /* CONV_INDEX */ - } - - /* init buffer */ -#pragma pipeloop(0) - for (i = 0; i < (xsize + 7)/8; i++) { - buffd[2*i ] = drnd; - buffd[2*i + 1] = drnd; - } - - for (j = 0; j < hgt; j++) { - mlib_d64 **buffc = buffs + buff_ind, *pbuff0, *pbuff1, *pbuff2; - mlib_f32 *pk = karr, k0, k1, k2; - sp = sl + KSIZE*sll; - - pbuff0 = buffc[0]; - pbuff1 = buffc[1]; - pbuff2 = buffc[2]; - buffn = buffc[KSIZE]; - -#ifndef CONV_INDEX - if ((((mlib_addr)(sl )) & 7) == 0) pbuff0 = (mlib_d64*)sl; - if ((((mlib_addr)(sl + sll)) & 7) == 0) pbuff1 = (mlib_d64*)(sl + sll); - if ((((mlib_addr)(sl + 2*sll)) & 7) == 0) pbuff2 = (mlib_d64*)(sl + 2*sll); - - if ((mlib_addr)sp & 7) mlib_ImageCopy_na((void*)sp, (void*)buffn, ssize); -#endif - -#ifdef CONV_INDEX - ik_last = 0; -#else - ik_last = (KSIZE - 1); -#endif - - for (ik = 0; ik < KSIZE; ik++) { - k0 = pk[ik]; - k1 = pk[ik + KSIZE]; - k2 = pk[ik + 2*KSIZE]; - - off = ik*NCHAN; - doff = off/8; - off &= 7; - buff0 = pbuff0 + doff; - buff1 = pbuff1 + doff; - buff2 = pbuff2 + doff; - vis_write_gsr(gsr_scale + off); - - if (ik == ik_last) continue; - /*if (!ik_last) { - if ((off & 3) || (ik == (KSIZE - 1))) { - ik_last = ik; - continue; - } - }*/ - - if (off == 0) { -#pragma pipeloop(0) - for (i = 0; i < (xsize + 7)/8; i++) { - s0 = buff0[i]; - s1 = buff1[i]; - s2 = buff2[i]; - - d00 = vis_fmul8x16au(vis_read_hi(s0), k0); - d01 = vis_fmul8x16au(vis_read_lo(s0), k0); - d10 = vis_fmul8x16au(vis_read_hi(s1), k1); - d11 = vis_fmul8x16au(vis_read_lo(s1), k1); - d20 = vis_fmul8x16au(vis_read_hi(s2), k2); - d21 = vis_fmul8x16au(vis_read_lo(s2), k2); - - d0 = buffd[2*i]; - d1 = buffd[2*i + 1]; - d0 = vis_fpadd16(d00, d0); - d0 = vis_fpadd16(d10, d0); - d0 = vis_fpadd16(d20, d0); - d1 = vis_fpadd16(d01, d1); - d1 = vis_fpadd16(d11, d1); - d1 = vis_fpadd16(d21, d1); - buffd[2*i] = d0; - buffd[2*i + 1] = d1; - } - - } else if (off == 4) { - s01 = buff0[0]; - s11 = buff1[0]; - s21 = buff2[0]; -#pragma pipeloop(0) - for (i = 0; i < (xsize + 7)/8; i++) { - s00 = s01; - s10 = s11; - s20 = s21; - s01 = buff0[i + 1]; - s11 = buff1[i + 1]; - s21 = buff2[i + 1]; - - d00 = vis_fmul8x16au(vis_read_lo(s00), k0); - d01 = vis_fmul8x16au(vis_read_hi(s01), k0); - d10 = vis_fmul8x16au(vis_read_lo(s10), k1); - d11 = vis_fmul8x16au(vis_read_hi(s11), k1); - d20 = vis_fmul8x16au(vis_read_lo(s20), k2); - d21 = vis_fmul8x16au(vis_read_hi(s21), k2); - - d0 = buffd[2*i]; - d1 = buffd[2*i + 1]; - d0 = vis_fpadd16(d00, d0); - d0 = vis_fpadd16(d10, d0); - d0 = vis_fpadd16(d20, d0); - d1 = vis_fpadd16(d01, d1); - d1 = vis_fpadd16(d11, d1); - d1 = vis_fpadd16(d21, d1); - buffd[2*i] = d0; - buffd[2*i + 1] = d1; - } - - } else { - s01 = buff0[0]; - s11 = buff1[0]; - s21 = buff2[0]; -#pragma pipeloop(0) - for (i = 0; i < (xsize + 7)/8; i++) { - s00 = s01; - s10 = s11; - s20 = s21; - s01 = buff0[i + 1]; - s11 = buff1[i + 1]; - s21 = buff2[i + 1]; - s0 = vis_faligndata(s00, s01); - s1 = vis_faligndata(s10, s11); - s2 = vis_faligndata(s20, s21); - - d00 = vis_fmul8x16au(vis_read_hi(s0), k0); - d01 = vis_fmul8x16au(vis_read_lo(s0), k0); - d10 = vis_fmul8x16au(vis_read_hi(s1), k1); - d11 = vis_fmul8x16au(vis_read_lo(s1), k1); - d20 = vis_fmul8x16au(vis_read_hi(s2), k2); - d21 = vis_fmul8x16au(vis_read_lo(s2), k2); - - d0 = buffd[2*i]; - d1 = buffd[2*i + 1]; - d0 = vis_fpadd16(d00, d0); - d0 = vis_fpadd16(d10, d0); - d0 = vis_fpadd16(d20, d0); - d1 = vis_fpadd16(d01, d1); - d1 = vis_fpadd16(d11, d1); - d1 = vis_fpadd16(d21, d1); - buffd[2*i] = d0; - buffd[2*i + 1] = d1; - } - } - } - - k0 = pk[ik_last]; - k1 = pk[ik_last + KSIZE]; - k2 = pk[ik_last + 2*KSIZE]; - - off = ik_last*NCHAN; - doff = off/8; - off &= 7; - buff0 = pbuff0 + doff; - buff1 = pbuff1 + doff; - buff2 = pbuff2 + doff; - vis_write_gsr(gsr_scale + off); - -#ifndef CONV_INDEX - dp = ((mlib_addr)dl & 7) ? buffe : (mlib_d64*)dl; - - s01 = buff0[0]; - s11 = buff1[0]; - s21 = buff2[0]; -#pragma pipeloop(0) - for (i = 0; i < xsize/8; i++) { - s00 = s01; - s10 = s11; - s20 = s21; - s01 = buff0[i + 1]; - s11 = buff1[i + 1]; - s21 = buff2[i + 1]; - s0 = vis_faligndata(s00, s01); - s1 = vis_faligndata(s10, s11); - s2 = vis_faligndata(s20, s21); - - d00 = vis_fmul8x16au(vis_read_hi(s0), k0); - d01 = vis_fmul8x16au(vis_read_lo(s0), k0); - d10 = vis_fmul8x16au(vis_read_hi(s1), k1); - d11 = vis_fmul8x16au(vis_read_lo(s1), k1); - d20 = vis_fmul8x16au(vis_read_hi(s2), k2); - d21 = vis_fmul8x16au(vis_read_lo(s2), k2); - - d0 = buffd[2*i]; - d1 = buffd[2*i + 1]; - d0 = vis_fpadd16(d0, d00); - d0 = vis_fpadd16(d0, d10); - d0 = vis_fpadd16(d0, d20); - d1 = vis_fpadd16(d1, d01); - d1 = vis_fpadd16(d1, d11); - d1 = vis_fpadd16(d1, d21); - - dd = vis_fpack16_pair(d0, d1); - dp[i] = dd; - - buffd[2*i ] = drnd; - buffd[2*i + 1] = drnd; - } - - if (emask) { - s00 = s01; - s10 = s11; - s20 = s21; - s01 = buff0[i + 1]; - s11 = buff1[i + 1]; - s21 = buff2[i + 1]; - s0 = vis_faligndata(s00, s01); - s1 = vis_faligndata(s10, s11); - s2 = vis_faligndata(s20, s21); - - d00 = vis_fmul8x16au(vis_read_hi(s0), k0); - d01 = vis_fmul8x16au(vis_read_lo(s0), k0); - d10 = vis_fmul8x16au(vis_read_hi(s1), k1); - d11 = vis_fmul8x16au(vis_read_lo(s1), k1); - d20 = vis_fmul8x16au(vis_read_hi(s2), k2); - d21 = vis_fmul8x16au(vis_read_lo(s2), k2); - - d0 = buffd[2*i]; - d1 = buffd[2*i + 1]; - d0 = vis_fpadd16(d0, d00); - d0 = vis_fpadd16(d0, d10); - d0 = vis_fpadd16(d0, d20); - d1 = vis_fpadd16(d1, d01); - d1 = vis_fpadd16(d1, d11); - d1 = vis_fpadd16(d1, d21); - - dd = vis_fpack16_pair(d0, d1); - vis_pst_8(dd, dp + i, emask); - - buffd[2*i ] = drnd; - buffd[2*i + 1] = drnd; - } - - if ((mlib_u8*)dp != dl) mlib_ImageCopy_na((void*)buffe, dl, xsize); - -#else - vis_write_gsr(gsr_scale + 7); - -#pragma pipeloop(0) - for (i = 0; i < dsize; i += 3) { - mlib_d64 d00, d01, d02, d03, d04, d05; - mlib_d64 d10, d11, d12, d13, d14, d15; - mlib_d64 d20, d21, d22, d23, d24, d25; - mlib_d64 d0, d1, d2, d3, d4, d5; - mlib_d64 s00 = buff0[i]; - mlib_d64 s01 = buff0[i + 1]; - mlib_d64 s02 = buff0[i + 2]; - mlib_d64 s10 = buff1[i]; - mlib_d64 s11 = buff1[i + 1]; - mlib_d64 s12 = buff1[i + 2]; - mlib_d64 s20 = buff2[i]; - mlib_d64 s21 = buff2[i + 1]; - mlib_d64 s22 = buff2[i + 2]; - - d00 = vis_fmul8x16au(vis_read_hi(s00), k0); - d01 = vis_fmul8x16au(vis_read_lo(s00), k0); - d02 = vis_fmul8x16au(vis_read_hi(s01), k0); - d03 = vis_fmul8x16au(vis_read_lo(s01), k0); - d04 = vis_fmul8x16au(vis_read_hi(s02), k0); - d05 = vis_fmul8x16au(vis_read_lo(s02), k0); - d10 = vis_fmul8x16au(vis_read_hi(s10), k1); - d11 = vis_fmul8x16au(vis_read_lo(s10), k1); - d12 = vis_fmul8x16au(vis_read_hi(s11), k1); - d13 = vis_fmul8x16au(vis_read_lo(s11), k1); - d14 = vis_fmul8x16au(vis_read_hi(s12), k1); - d15 = vis_fmul8x16au(vis_read_lo(s12), k1); - d20 = vis_fmul8x16au(vis_read_hi(s20), k2); - d21 = vis_fmul8x16au(vis_read_lo(s20), k2); - d22 = vis_fmul8x16au(vis_read_hi(s21), k2); - d23 = vis_fmul8x16au(vis_read_lo(s21), k2); - d24 = vis_fmul8x16au(vis_read_hi(s22), k2); - d25 = vis_fmul8x16au(vis_read_lo(s22), k2); - - d0 = buffd[2*i]; - d1 = buffd[2*i + 1]; - d2 = buffd[2*i + 2]; - d3 = buffd[2*i + 3]; - d4 = buffd[2*i + 4]; - d5 = buffd[2*i + 5]; - d0 = vis_fpadd16(d0, d00); - d0 = vis_fpadd16(d0, d10); - d0 = vis_fpadd16(d0, d20); - d1 = vis_fpadd16(d1, d01); - d1 = vis_fpadd16(d1, d11); - d1 = vis_fpadd16(d1, d21); - d2 = vis_fpadd16(d2, d02); - d2 = vis_fpadd16(d2, d12); - d2 = vis_fpadd16(d2, d22); - d3 = vis_fpadd16(d3, d03); - d3 = vis_fpadd16(d3, d13); - d3 = vis_fpadd16(d3, d23); - d4 = vis_fpadd16(d4, d04); - d4 = vis_fpadd16(d4, d14); - d4 = vis_fpadd16(d4, d24); - d5 = vis_fpadd16(d5, d05); - d5 = vis_fpadd16(d5, d15); - d5 = vis_fpadd16(d5, d25); - - buffe[i ] = vis_fpack16_pair(d0, d1); - buffe[i + 1] = vis_fpack16_pair(d2, d3); - buffe[i + 2] = vis_fpack16_pair(d4, d5); - - buffd[2*i ] = drnd; - buffd[2*i + 1] = drnd; - buffd[2*i + 2] = drnd; - buffd[2*i + 3] = drnd; - buffd[2*i + 4] = drnd; - buffd[2*i + 5] = drnd; - - LOAD_SRC(); - } - - mlib_ImageColorTrue2IndexLine_U8_S16_3((void*)buffe, dl, wid, colormap); -#endif /* CONV_INDEX */ - - sl += sll; - dl += dll; - - buff_ind++; - if (buff_ind >= (KSIZE + 1)) buff_ind = 0; - } - - mlib_free(pbuff); - - return MLIB_SUCCESS; -} - -/***************************************************************/ - -#undef KSIZE -#define MAX_N 11 - -#ifdef CONV_INDEX - -mlib_status mlib_convMxN_Index3_8_16nw(mlib_image *dst, - mlib_image *src, - mlib_s32 m, - mlib_s32 n, - mlib_s32 dm, - mlib_s32 dn, - mlib_s32 *kern, - mlib_s32 scale, - void *colormap) - -#else - -mlib_status mlib_convMxN_8nw_f(mlib_image *dst, - mlib_image *src, - mlib_s32 m, - mlib_s32 n, - mlib_s32 dm, - mlib_s32 dn, - mlib_s32 *kern, - mlib_s32 scale) - -#endif -{ - mlib_d64 *buffs_local[3*(MAX_N + 1)], **buffs = buffs_local, **buff; - mlib_d64 *buff0, *buff1, *buff2, *buff3, *buffn, *buffd, *buffe; - mlib_d64 s00, s01, s10, s11, s20, s21, s30, s31, s0, s1, s2, s3; - mlib_d64 d00, d01, d10, d11, d20, d21, d30, d31; - mlib_d64 dd, d0, d1; - mlib_s32 ik, jk, ik_last, jk_size, coff, off, doff; - DEF_VARS; - DEF_EXTRA_VARS; - - if (n > MAX_N) { - buffs = mlib_malloc(3*(n + 1)*sizeof(mlib_d64*)); - if (buffs == NULL) return MLIB_FAILURE; - } - - buff = buffs + 2*(n + 1); - - sl = adr_src; -#ifdef CONV_INDEX - dl = adr_dst + dn*dll + dm; -#else - dl = adr_dst + dn*dll + dm*NCHAN; -#endif - - ssize = NCHAN*wid; - dsize = (ssize + 7)/8; - esize = dsize + 4; - pbuff = mlib_malloc((n + 4)*esize*sizeof(mlib_d64)); - if (pbuff == NULL) { - if (buffs != buffs_local) mlib_free(buffs); - return MLIB_FAILURE; - } - - for (i = 0; i < (n + 1); i++) buffs[i] = pbuff + i*esize; - for (i = 0; i < (n + 1); i++) buffs[(n + 1) + i] = buffs[i]; - buffd = buffs[n] + esize; - buffe = buffd + 2*esize; - - wid -= (m - 1); - hgt -= (n - 1); - xsize = ssize - NCHAN*(m - 1); - emask = (0xFF00 >> (xsize & 7)) & 0xFF; - - vis_write_gsr(gsr_scale + 7); - - for (l = 0; l < n; l++) { - mlib_d64 *buffn = buffs[l]; - sp = sl + l*sll; - -#ifndef CONV_INDEX - if ((mlib_addr)sp & 7) mlib_ImageCopy_na((void*)sp, (void*)buffn, ssize); -#else -#pragma pipeloop(0) - for (i = 0; i < dsize; i += 3) { - LOAD_SRC(); - } -#endif /* CONV_INDEX */ - } - - /* init buffer */ -#pragma pipeloop(0) - for (i = 0; i < (xsize + 7)/8; i++) { - buffd[2*i ] = drnd; - buffd[2*i + 1] = drnd; - } - - for (j = 0; j < hgt; j++) { - mlib_d64 **buffc = buffs + buff_ind; - mlib_f32 *pk = karr, k0, k1, k2, k3; - sp = sl + n*sll; - - for (l = 0; l < n; l++) { - buff[l] = buffc[l]; - } - buffn = buffc[n]; - -#ifndef CONV_INDEX - for (l = 0; l < n; l++) { - if ((((mlib_addr)(sl + l*sll)) & 7) == 0) buff[l] = (mlib_d64*)(sl + l*sll); - } - if ((mlib_addr)sp & 7) mlib_ImageCopy_na((void*)sp, (void*)buffn, ssize); -#endif - -#ifdef CONV_INDEX - ik_last = 0; -#else - ik_last = (m - 1); -#endif - - for (jk = 0; jk < n; jk += jk_size) { - jk_size = n - jk; -#ifdef CONV_INDEX - if (jk_size >= 5) jk_size = 3; - if (jk_size == 4) jk_size = 2; -#else - if (jk_size >= 6) jk_size = 4; - if (jk_size == 5) jk_size = 3; -#endif - coff = 0; - - if (jk_size == 2) { - - for (ik = 0; ik < m; ik++, coff += NCHAN) { - if (!jk && ik == ik_last) continue; - - k0 = pk[ik]; - k1 = pk[ik + m]; - - doff = coff/8; - buff0 = buff[jk ] + doff; - buff1 = buff[jk + 1] + doff; - - off = coff & 7; - vis_write_gsr(gsr_scale + off); - - s01 = buff0[0]; - s11 = buff1[0]; -#pragma pipeloop(0) - for (i = 0; i < (xsize + 7)/8; i++) { - s00 = s01; - s10 = s11; - s01 = buff0[i + 1]; - s11 = buff1[i + 1]; - s0 = vis_faligndata(s00, s01); - s1 = vis_faligndata(s10, s11); - - d00 = vis_fmul8x16au(vis_read_hi(s0), k0); - d01 = vis_fmul8x16au(vis_read_lo(s0), k0); - d10 = vis_fmul8x16au(vis_read_hi(s1), k1); - d11 = vis_fmul8x16au(vis_read_lo(s1), k1); - - d0 = buffd[2*i]; - d1 = buffd[2*i + 1]; - d0 = vis_fpadd16(d00, d0); - d0 = vis_fpadd16(d10, d0); - d1 = vis_fpadd16(d01, d1); - d1 = vis_fpadd16(d11, d1); - buffd[2*i] = d0; - buffd[2*i + 1] = d1; - } - - } - - pk += 2*m; - - } else if (jk_size == 3) { - - for (ik = 0; ik < m; ik++, coff += NCHAN) { - if (!jk && ik == ik_last) continue; - - k0 = pk[ik]; - k1 = pk[ik + m]; - k2 = pk[ik + 2*m]; - - doff = coff/8; - buff0 = buff[jk ] + doff; - buff1 = buff[jk + 1] + doff; - buff2 = buff[jk + 2] + doff; - - off = coff & 7; - vis_write_gsr(gsr_scale + off); - - if (off == 0) { -#pragma pipeloop(0) - for (i = 0; i < (xsize + 7)/8; i++) { - d0 = buffd[2*i]; - d1 = buffd[2*i + 1]; - - s0 = buff0[i]; - s1 = buff1[i]; - s2 = buff2[i]; - - d00 = vis_fmul8x16au(vis_read_hi(s0), k0); - d01 = vis_fmul8x16au(vis_read_lo(s0), k0); - d10 = vis_fmul8x16au(vis_read_hi(s1), k1); - d11 = vis_fmul8x16au(vis_read_lo(s1), k1); - d20 = vis_fmul8x16au(vis_read_hi(s2), k2); - d21 = vis_fmul8x16au(vis_read_lo(s2), k2); - - d00 = vis_fpadd16(d00, d10); - d0 = vis_fpadd16(d20, d0); - d0 = vis_fpadd16(d00, d0); - d01 = vis_fpadd16(d01, d11); - d1 = vis_fpadd16(d21, d1); - d1 = vis_fpadd16(d01, d1); - buffd[2*i] = d0; - buffd[2*i + 1] = d1; - } - - } else if (off == 4) { - s01 = buff0[0]; - s11 = buff1[0]; - s21 = buff2[0]; -#pragma pipeloop(0) - for (i = 0; i < (xsize + 7)/8; i++) { - d0 = buffd[2*i]; - d1 = buffd[2*i + 1]; - - s00 = s01; - s10 = s11; - s20 = s21; - s01 = buff0[i + 1]; - s11 = buff1[i + 1]; - s21 = buff2[i + 1]; - - d00 = vis_fmul8x16au(vis_read_lo(s00), k0); - d01 = vis_fmul8x16au(vis_read_hi(s01), k0); - d10 = vis_fmul8x16au(vis_read_lo(s10), k1); - d11 = vis_fmul8x16au(vis_read_hi(s11), k1); - d20 = vis_fmul8x16au(vis_read_lo(s20), k2); - d21 = vis_fmul8x16au(vis_read_hi(s21), k2); - - d00 = vis_fpadd16(d00, d10); - d0 = vis_fpadd16(d20, d0); - d0 = vis_fpadd16(d00, d0); - d01 = vis_fpadd16(d01, d11); - d1 = vis_fpadd16(d21, d1); - d1 = vis_fpadd16(d01, d1); - buffd[2*i] = d0; - buffd[2*i + 1] = d1; - } - - } else { - s01 = buff0[0]; - s11 = buff1[0]; - s21 = buff2[0]; -#pragma pipeloop(0) - for (i = 0; i < (xsize + 7)/8; i++) { - d0 = buffd[2*i]; - d1 = buffd[2*i + 1]; - - s00 = s01; - s10 = s11; - s20 = s21; - s01 = buff0[i + 1]; - s11 = buff1[i + 1]; - s21 = buff2[i + 1]; - s0 = vis_faligndata(s00, s01); - s1 = vis_faligndata(s10, s11); - s2 = vis_faligndata(s20, s21); - - d00 = vis_fmul8x16au(vis_read_hi(s0), k0); - d01 = vis_fmul8x16au(vis_read_lo(s0), k0); - d10 = vis_fmul8x16au(vis_read_hi(s1), k1); - d11 = vis_fmul8x16au(vis_read_lo(s1), k1); - d20 = vis_fmul8x16au(vis_read_hi(s2), k2); - d21 = vis_fmul8x16au(vis_read_lo(s2), k2); - - d00 = vis_fpadd16(d00, d10); - d0 = vis_fpadd16(d20, d0); - d0 = vis_fpadd16(d00, d0); - d01 = vis_fpadd16(d01, d11); - d1 = vis_fpadd16(d21, d1); - d1 = vis_fpadd16(d01, d1); - buffd[2*i] = d0; - buffd[2*i + 1] = d1; - } - } - } - - pk += 3*m; - - } else { /* jk_size == 4 */ - - for (ik = 0; ik < m; ik++, coff += NCHAN) { - if (!jk && ik == ik_last) continue; - - k0 = pk[ik]; - k1 = pk[ik + m]; - k2 = pk[ik + 2*m]; - k3 = pk[ik + 3*m]; - - doff = coff/8; - buff0 = buff[jk ] + doff; - buff1 = buff[jk + 1] + doff; - buff2 = buff[jk + 2] + doff; - buff3 = buff[jk + 3] + doff; - - off = coff & 7; - vis_write_gsr(gsr_scale + off); - - if (off == 0) { - -#pragma pipeloop(0) - for (i = 0; i < (xsize + 7)/8; i++) { - d0 = buffd[2*i]; - d1 = buffd[2*i + 1]; - - s0 = buff0[i]; - s1 = buff1[i]; - s2 = buff2[i]; - s3 = buff3[i]; - - d00 = vis_fmul8x16au(vis_read_hi(s0), k0); - d01 = vis_fmul8x16au(vis_read_lo(s0), k0); - d10 = vis_fmul8x16au(vis_read_hi(s1), k1); - d11 = vis_fmul8x16au(vis_read_lo(s1), k1); - d20 = vis_fmul8x16au(vis_read_hi(s2), k2); - d21 = vis_fmul8x16au(vis_read_lo(s2), k2); - d30 = vis_fmul8x16au(vis_read_hi(s3), k3); - d31 = vis_fmul8x16au(vis_read_lo(s3), k3); - - d00 = vis_fpadd16(d00, d10); - d20 = vis_fpadd16(d20, d30); - d0 = vis_fpadd16(d0, d00); - d0 = vis_fpadd16(d0, d20); - d01 = vis_fpadd16(d01, d11); - d21 = vis_fpadd16(d21, d31); - d1 = vis_fpadd16(d1, d01); - d1 = vis_fpadd16(d1, d21); - buffd[2*i] = d0; - buffd[2*i + 1] = d1; - } - - } else if (off == 4) { - - s01 = buff0[0]; - s11 = buff1[0]; - s21 = buff2[0]; - s31 = buff3[0]; -#pragma pipeloop(0) - for (i = 0; i < (xsize + 7)/8; i++) { - d0 = buffd[2*i]; - d1 = buffd[2*i + 1]; - - s00 = s01; - s10 = s11; - s20 = s21; - s30 = s31; - s01 = buff0[i + 1]; - s11 = buff1[i + 1]; - s21 = buff2[i + 1]; - s31 = buff3[i + 1]; - - d00 = vis_fmul8x16au(vis_read_lo(s00), k0); - d01 = vis_fmul8x16au(vis_read_hi(s01), k0); - d10 = vis_fmul8x16au(vis_read_lo(s10), k1); - d11 = vis_fmul8x16au(vis_read_hi(s11), k1); - d20 = vis_fmul8x16au(vis_read_lo(s20), k2); - d21 = vis_fmul8x16au(vis_read_hi(s21), k2); - d30 = vis_fmul8x16au(vis_read_lo(s30), k3); - d31 = vis_fmul8x16au(vis_read_hi(s31), k3); - - d00 = vis_fpadd16(d00, d10); - d20 = vis_fpadd16(d20, d30); - d0 = vis_fpadd16(d0, d00); - d0 = vis_fpadd16(d0, d20); - d01 = vis_fpadd16(d01, d11); - d21 = vis_fpadd16(d21, d31); - d1 = vis_fpadd16(d1, d01); - d1 = vis_fpadd16(d1, d21); - buffd[2*i] = d0; - buffd[2*i + 1] = d1; - } - - } else { - - s01 = buff0[0]; - s11 = buff1[0]; - s21 = buff2[0]; - s31 = buff3[0]; -#pragma pipeloop(0) - for (i = 0; i < (xsize + 7)/8; i++) { - d0 = buffd[2*i]; - d1 = buffd[2*i + 1]; - - s00 = s01; - s10 = s11; - s20 = s21; - s30 = s31; - s01 = buff0[i + 1]; - s11 = buff1[i + 1]; - s21 = buff2[i + 1]; - s31 = buff3[i + 1]; - s0 = vis_faligndata(s00, s01); - s1 = vis_faligndata(s10, s11); - s2 = vis_faligndata(s20, s21); - s3 = vis_faligndata(s30, s31); - - d00 = vis_fmul8x16au(vis_read_hi(s0), k0); - d01 = vis_fmul8x16au(vis_read_lo(s0), k0); - d10 = vis_fmul8x16au(vis_read_hi(s1), k1); - d11 = vis_fmul8x16au(vis_read_lo(s1), k1); - d20 = vis_fmul8x16au(vis_read_hi(s2), k2); - d21 = vis_fmul8x16au(vis_read_lo(s2), k2); - d30 = vis_fmul8x16au(vis_read_hi(s3), k3); - d31 = vis_fmul8x16au(vis_read_lo(s3), k3); - - d00 = vis_fpadd16(d00, d10); - d20 = vis_fpadd16(d20, d30); - d0 = vis_fpadd16(d0, d00); - d0 = vis_fpadd16(d0, d20); - d01 = vis_fpadd16(d01, d11); - d21 = vis_fpadd16(d21, d31); - d1 = vis_fpadd16(d1, d01); - d1 = vis_fpadd16(d1, d21); - buffd[2*i] = d0; - buffd[2*i + 1] = d1; - } - } - } - - pk += 4*m; - } - } - - /***************************************** - ***************************************** - ** Final iteration ** - ***************************************** - *****************************************/ - - jk_size = n; -#ifdef CONV_INDEX - if (jk_size >= 5) jk_size = 3; - if (jk_size == 4) jk_size = 2; -#else - if (jk_size >= 6) jk_size = 4; - if (jk_size == 5) jk_size = 3; -#endif - - k0 = karr[ik_last]; - k1 = karr[ik_last + m]; - k2 = karr[ik_last + 2*m]; - k3 = karr[ik_last + 3*m]; - - off = ik_last*NCHAN; - doff = off/8; - off &= 7; - buff0 = buff[0] + doff; - buff1 = buff[1] + doff; - buff2 = buff[2] + doff; - buff3 = buff[3] + doff; - vis_write_gsr(gsr_scale + off); - -#ifndef CONV_INDEX - if (jk_size == 2) { - dp = ((mlib_addr)dl & 7) ? buffe : (mlib_d64*)dl; - - s01 = buff0[0]; - s11 = buff1[0]; -#pragma pipeloop(0) - for (i = 0; i < xsize/8; i++) { - s00 = s01; - s10 = s11; - s01 = buff0[i + 1]; - s11 = buff1[i + 1]; - s0 = vis_faligndata(s00, s01); - s1 = vis_faligndata(s10, s11); - - d00 = vis_fmul8x16au(vis_read_hi(s0), k0); - d01 = vis_fmul8x16au(vis_read_lo(s0), k0); - d10 = vis_fmul8x16au(vis_read_hi(s1), k1); - d11 = vis_fmul8x16au(vis_read_lo(s1), k1); - - d0 = buffd[2*i]; - d1 = buffd[2*i + 1]; - d0 = vis_fpadd16(d0, d00); - d0 = vis_fpadd16(d0, d10); - d1 = vis_fpadd16(d1, d01); - d1 = vis_fpadd16(d1, d11); - - dd = vis_fpack16_pair(d0, d1); - dp[i] = dd; - - buffd[2*i ] = drnd; - buffd[2*i + 1] = drnd; - } - - if (emask) { - s00 = s01; - s10 = s11; - s01 = buff0[i + 1]; - s11 = buff1[i + 1]; - s0 = vis_faligndata(s00, s01); - s1 = vis_faligndata(s10, s11); - - d00 = vis_fmul8x16au(vis_read_hi(s0), k0); - d01 = vis_fmul8x16au(vis_read_lo(s0), k0); - d10 = vis_fmul8x16au(vis_read_hi(s1), k1); - d11 = vis_fmul8x16au(vis_read_lo(s1), k1); - - d0 = buffd[2*i]; - d1 = buffd[2*i + 1]; - d0 = vis_fpadd16(d0, d00); - d0 = vis_fpadd16(d0, d10); - d1 = vis_fpadd16(d1, d01); - d1 = vis_fpadd16(d1, d11); - - dd = vis_fpack16_pair(d0, d1); - vis_pst_8(dd, dp + i, emask); - - buffd[2*i ] = drnd; - buffd[2*i + 1] = drnd; - } - - if ((mlib_u8*)dp != dl) mlib_ImageCopy_na((void*)buffe, dl, xsize); - - } else if (jk_size == 3) { - - dp = ((mlib_addr)dl & 7) ? buffe : (mlib_d64*)dl; - - s01 = buff0[0]; - s11 = buff1[0]; - s21 = buff2[0]; -#pragma pipeloop(0) - for (i = 0; i < xsize/8; i++) { - s00 = s01; - s10 = s11; - s20 = s21; - s01 = buff0[i + 1]; - s11 = buff1[i + 1]; - s21 = buff2[i + 1]; - s0 = vis_faligndata(s00, s01); - s1 = vis_faligndata(s10, s11); - s2 = vis_faligndata(s20, s21); - - d00 = vis_fmul8x16au(vis_read_hi(s0), k0); - d01 = vis_fmul8x16au(vis_read_lo(s0), k0); - d10 = vis_fmul8x16au(vis_read_hi(s1), k1); - d11 = vis_fmul8x16au(vis_read_lo(s1), k1); - d20 = vis_fmul8x16au(vis_read_hi(s2), k2); - d21 = vis_fmul8x16au(vis_read_lo(s2), k2); - - d0 = buffd[2*i]; - d1 = buffd[2*i + 1]; - d0 = vis_fpadd16(d0, d00); - d0 = vis_fpadd16(d0, d10); - d0 = vis_fpadd16(d0, d20); - d1 = vis_fpadd16(d1, d01); - d1 = vis_fpadd16(d1, d11); - d1 = vis_fpadd16(d1, d21); - - dd = vis_fpack16_pair(d0, d1); - dp[i] = dd; - - buffd[2*i ] = drnd; - buffd[2*i + 1] = drnd; - } - - if (emask) { - s00 = s01; - s10 = s11; - s20 = s21; - s01 = buff0[i + 1]; - s11 = buff1[i + 1]; - s21 = buff2[i + 1]; - s0 = vis_faligndata(s00, s01); - s1 = vis_faligndata(s10, s11); - s2 = vis_faligndata(s20, s21); - - d00 = vis_fmul8x16au(vis_read_hi(s0), k0); - d01 = vis_fmul8x16au(vis_read_lo(s0), k0); - d10 = vis_fmul8x16au(vis_read_hi(s1), k1); - d11 = vis_fmul8x16au(vis_read_lo(s1), k1); - d20 = vis_fmul8x16au(vis_read_hi(s2), k2); - d21 = vis_fmul8x16au(vis_read_lo(s2), k2); - - d0 = buffd[2*i]; - d1 = buffd[2*i + 1]; - d0 = vis_fpadd16(d0, d00); - d0 = vis_fpadd16(d0, d10); - d0 = vis_fpadd16(d0, d20); - d1 = vis_fpadd16(d1, d01); - d1 = vis_fpadd16(d1, d11); - d1 = vis_fpadd16(d1, d21); - - dd = vis_fpack16_pair(d0, d1); - vis_pst_8(dd, dp + i, emask); - - buffd[2*i ] = drnd; - buffd[2*i + 1] = drnd; - } - - if ((mlib_u8*)dp != dl) mlib_ImageCopy_na((void*)buffe, dl, xsize); - - } else /* if (jk_size == 4) */ { - - dp = ((mlib_addr)dl & 7) ? buffe : (mlib_d64*)dl; - - s01 = buff0[0]; - s11 = buff1[0]; - s21 = buff2[0]; - s31 = buff3[0]; -#pragma pipeloop(0) - for (i = 0; i < xsize/8; i++) { - s00 = s01; - s10 = s11; - s20 = s21; - s30 = s31; - s01 = buff0[i + 1]; - s11 = buff1[i + 1]; - s21 = buff2[i + 1]; - s31 = buff3[i + 1]; - s0 = vis_faligndata(s00, s01); - s1 = vis_faligndata(s10, s11); - s2 = vis_faligndata(s20, s21); - s3 = vis_faligndata(s30, s31); - - d00 = vis_fmul8x16au(vis_read_hi(s0), k0); - d01 = vis_fmul8x16au(vis_read_lo(s0), k0); - d10 = vis_fmul8x16au(vis_read_hi(s1), k1); - d11 = vis_fmul8x16au(vis_read_lo(s1), k1); - d20 = vis_fmul8x16au(vis_read_hi(s2), k2); - d21 = vis_fmul8x16au(vis_read_lo(s2), k2); - d30 = vis_fmul8x16au(vis_read_hi(s3), k3); - d31 = vis_fmul8x16au(vis_read_lo(s3), k3); - - d0 = buffd[2*i]; - d1 = buffd[2*i + 1]; - d0 = vis_fpadd16(d0, d00); - d0 = vis_fpadd16(d0, d10); - d0 = vis_fpadd16(d0, d20); - d0 = vis_fpadd16(d0, d30); - d1 = vis_fpadd16(d1, d01); - d1 = vis_fpadd16(d1, d11); - d1 = vis_fpadd16(d1, d21); - d1 = vis_fpadd16(d1, d31); - - dd = vis_fpack16_pair(d0, d1); - dp[i] = dd; - - buffd[2*i ] = drnd; - buffd[2*i + 1] = drnd; - } - - if (emask) { - s00 = s01; - s10 = s11; - s20 = s21; - s30 = s31; - s01 = buff0[i + 1]; - s11 = buff1[i + 1]; - s21 = buff2[i + 1]; - s31 = buff3[i + 1]; - s0 = vis_faligndata(s00, s01); - s1 = vis_faligndata(s10, s11); - s2 = vis_faligndata(s20, s21); - s3 = vis_faligndata(s30, s31); - - d00 = vis_fmul8x16au(vis_read_hi(s0), k0); - d01 = vis_fmul8x16au(vis_read_lo(s0), k0); - d10 = vis_fmul8x16au(vis_read_hi(s1), k1); - d11 = vis_fmul8x16au(vis_read_lo(s1), k1); - d20 = vis_fmul8x16au(vis_read_hi(s2), k2); - d21 = vis_fmul8x16au(vis_read_lo(s2), k2); - d30 = vis_fmul8x16au(vis_read_hi(s3), k3); - d31 = vis_fmul8x16au(vis_read_lo(s3), k3); - - d0 = buffd[2*i]; - d1 = buffd[2*i + 1]; - d0 = vis_fpadd16(d0, d00); - d0 = vis_fpadd16(d0, d10); - d0 = vis_fpadd16(d0, d20); - d0 = vis_fpadd16(d0, d30); - d1 = vis_fpadd16(d1, d01); - d1 = vis_fpadd16(d1, d11); - d1 = vis_fpadd16(d1, d21); - d1 = vis_fpadd16(d1, d31); - - dd = vis_fpack16_pair(d0, d1); - vis_pst_8(dd, dp + i, emask); - - buffd[2*i ] = drnd; - buffd[2*i + 1] = drnd; - } - - if ((mlib_u8*)dp != dl) mlib_ImageCopy_na((void*)buffe, dl, xsize); - } - -#else /* CONV_INDEX */ - - if (jk_size == 2) { - vis_write_gsr(gsr_scale + 7); - -#pragma pipeloop(0) - for (i = 0; i < dsize; i += 3) { - mlib_d64 d00, d01, d02, d03, d04, d05; - mlib_d64 d10, d11, d12, d13, d14, d15; - mlib_d64 d0, d1, d2, d3, d4, d5; - mlib_d64 s00 = buff0[i]; - mlib_d64 s01 = buff0[i + 1]; - mlib_d64 s02 = buff0[i + 2]; - mlib_d64 s10 = buff1[i]; - mlib_d64 s11 = buff1[i + 1]; - mlib_d64 s12 = buff1[i + 2]; - - d00 = vis_fmul8x16au(vis_read_hi(s00), k0); - d01 = vis_fmul8x16au(vis_read_lo(s00), k0); - d02 = vis_fmul8x16au(vis_read_hi(s01), k0); - d03 = vis_fmul8x16au(vis_read_lo(s01), k0); - d04 = vis_fmul8x16au(vis_read_hi(s02), k0); - d05 = vis_fmul8x16au(vis_read_lo(s02), k0); - d10 = vis_fmul8x16au(vis_read_hi(s10), k1); - d11 = vis_fmul8x16au(vis_read_lo(s10), k1); - d12 = vis_fmul8x16au(vis_read_hi(s11), k1); - d13 = vis_fmul8x16au(vis_read_lo(s11), k1); - d14 = vis_fmul8x16au(vis_read_hi(s12), k1); - d15 = vis_fmul8x16au(vis_read_lo(s12), k1); - - d0 = buffd[2*i]; - d1 = buffd[2*i + 1]; - d2 = buffd[2*i + 2]; - d3 = buffd[2*i + 3]; - d4 = buffd[2*i + 4]; - d5 = buffd[2*i + 5]; - d0 = vis_fpadd16(d0, d00); - d0 = vis_fpadd16(d0, d10); - d1 = vis_fpadd16(d1, d01); - d1 = vis_fpadd16(d1, d11); - d2 = vis_fpadd16(d2, d02); - d2 = vis_fpadd16(d2, d12); - d3 = vis_fpadd16(d3, d03); - d3 = vis_fpadd16(d3, d13); - d4 = vis_fpadd16(d4, d04); - d4 = vis_fpadd16(d4, d14); - d5 = vis_fpadd16(d5, d05); - d5 = vis_fpadd16(d5, d15); - - buffe[i ] = vis_fpack16_pair(d0, d1); - buffe[i + 1] = vis_fpack16_pair(d2, d3); - buffe[i + 2] = vis_fpack16_pair(d4, d5); - - buffd[2*i ] = drnd; - buffd[2*i + 1] = drnd; - buffd[2*i + 2] = drnd; - buffd[2*i + 3] = drnd; - buffd[2*i + 4] = drnd; - buffd[2*i + 5] = drnd; - - LOAD_SRC(); - } - - } else /* if (jk_size == 3) */ { - vis_write_gsr(gsr_scale + 7); - -#pragma pipeloop(0) - for (i = 0; i < dsize; i += 3) { - mlib_d64 d00, d01, d02, d03, d04, d05; - mlib_d64 d10, d11, d12, d13, d14, d15; - mlib_d64 d20, d21, d22, d23, d24, d25; - mlib_d64 d0, d1, d2, d3, d4, d5; - mlib_d64 s00 = buff0[i]; - mlib_d64 s01 = buff0[i + 1]; - mlib_d64 s02 = buff0[i + 2]; - mlib_d64 s10 = buff1[i]; - mlib_d64 s11 = buff1[i + 1]; - mlib_d64 s12 = buff1[i + 2]; - mlib_d64 s20 = buff2[i]; - mlib_d64 s21 = buff2[i + 1]; - mlib_d64 s22 = buff2[i + 2]; - - d00 = vis_fmul8x16au(vis_read_hi(s00), k0); - d01 = vis_fmul8x16au(vis_read_lo(s00), k0); - d02 = vis_fmul8x16au(vis_read_hi(s01), k0); - d03 = vis_fmul8x16au(vis_read_lo(s01), k0); - d04 = vis_fmul8x16au(vis_read_hi(s02), k0); - d05 = vis_fmul8x16au(vis_read_lo(s02), k0); - d10 = vis_fmul8x16au(vis_read_hi(s10), k1); - d11 = vis_fmul8x16au(vis_read_lo(s10), k1); - d12 = vis_fmul8x16au(vis_read_hi(s11), k1); - d13 = vis_fmul8x16au(vis_read_lo(s11), k1); - d14 = vis_fmul8x16au(vis_read_hi(s12), k1); - d15 = vis_fmul8x16au(vis_read_lo(s12), k1); - d20 = vis_fmul8x16au(vis_read_hi(s20), k2); - d21 = vis_fmul8x16au(vis_read_lo(s20), k2); - d22 = vis_fmul8x16au(vis_read_hi(s21), k2); - d23 = vis_fmul8x16au(vis_read_lo(s21), k2); - d24 = vis_fmul8x16au(vis_read_hi(s22), k2); - d25 = vis_fmul8x16au(vis_read_lo(s22), k2); - - d0 = buffd[2*i]; - d1 = buffd[2*i + 1]; - d2 = buffd[2*i + 2]; - d3 = buffd[2*i + 3]; - d4 = buffd[2*i + 4]; - d5 = buffd[2*i + 5]; - d0 = vis_fpadd16(d0, d00); - d0 = vis_fpadd16(d0, d10); - d0 = vis_fpadd16(d0, d20); - d1 = vis_fpadd16(d1, d01); - d1 = vis_fpadd16(d1, d11); - d1 = vis_fpadd16(d1, d21); - d2 = vis_fpadd16(d2, d02); - d2 = vis_fpadd16(d2, d12); - d2 = vis_fpadd16(d2, d22); - d3 = vis_fpadd16(d3, d03); - d3 = vis_fpadd16(d3, d13); - d3 = vis_fpadd16(d3, d23); - d4 = vis_fpadd16(d4, d04); - d4 = vis_fpadd16(d4, d14); - d4 = vis_fpadd16(d4, d24); - d5 = vis_fpadd16(d5, d05); - d5 = vis_fpadd16(d5, d15); - d5 = vis_fpadd16(d5, d25); - - buffe[i ] = vis_fpack16_pair(d0, d1); - buffe[i + 1] = vis_fpack16_pair(d2, d3); - buffe[i + 2] = vis_fpack16_pair(d4, d5); - - buffd[2*i ] = drnd; - buffd[2*i + 1] = drnd; - buffd[2*i + 2] = drnd; - buffd[2*i + 3] = drnd; - buffd[2*i + 4] = drnd; - buffd[2*i + 5] = drnd; - - LOAD_SRC(); - } - } -#endif /* CONV_INDEX */ - -#ifdef CONV_INDEX - mlib_ImageColorTrue2IndexLine_U8_S16_3((void*)buffe, dl, wid, colormap); -#endif /* CONV_INDEX */ - - sl += sll; - dl += dll; - - buff_ind++; - if (buff_ind >= (n + 1)) buff_ind = 0; - } - - mlib_free(pbuff); - if (buffs != buffs_local) mlib_free(buffs); - - return MLIB_SUCCESS; -} - -/***************************************************************/ diff -r 3e22d8fd4912 -r acea5f7d354b jdk/src/java.desktop/unix/native/libmlib_image/mlib_v_ImageConvIndex3_8_8nw.c --- a/jdk/src/java.desktop/unix/native/libmlib_image/mlib_v_ImageConvIndex3_8_8nw.c Thu May 12 11:03:07 2016 -0700 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,1673 +0,0 @@ -/* - * Copyright (c) 2000, 2003, Oracle and/or its affiliates. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 only, as - * published by the Free Software Foundation. Oracle designates this - * particular file as subject to the "Classpath" exception as provided - * by Oracle in the LICENSE file that accompanied this code. - * - * This code is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * version 2 for more details (a copy is included in the LICENSE file that - * accompanied this code). - * - * You should have received a copy of the GNU General Public License version - * 2 along with this work; if not, write to the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - * - * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA - * or visit www.oracle.com if you need additional information or have any - * questions. - */ - - - -/* - * FUNCTION - * Internal functions for mlib_ImageConv* on U8 type - * and MLIB_EDGE_DST_NO_WRITE mask - * - */ - -/***************************************************************/ - -#include -#include -#include -#include - -/* - This defines switches between functions in - files: mlib_v_ImageConv_8nw.c, - mlib_v_ImageConvIndex3_8_8nw.c, - mlib_v_ImageConvIndex4_8_8nw.c, - mlib_v_ImageConvIndex3_8_16nw.c, - mlib_v_ImageConvIndex4_8_16nw.c -*/ - -#define CONV_INDEX - -#define DTYPE mlib_u8 -#define LTYPE mlib_u8 - -/***************************************************************/ - -#ifdef CONV_INDEX - -#define CONV_FUNC(KERN) \ - mlib_conv##KERN##_Index3_8_8nw(mlib_image *dst, \ - mlib_image *src, \ - mlib_s32 *kern, \ - mlib_s32 scale, \ - void *colormap) - -#else - -#define CONV_FUNC(KERN) \ - mlib_conv##KERN##_8nw_f(mlib_image *dst, \ - mlib_image *src, \ - mlib_s32 *kern, \ - mlib_s32 scale) - -#endif - -/***************************************************************/ - -#ifdef CONV_INDEX - -#define NCHAN 3 - -#else - -#define NCHAN nchan - -#endif - -/***************************************************************/ - -#define DEF_VARS \ - DTYPE *sl, *sp, *dl; \ - mlib_s32 hgt = mlib_ImageGetHeight(src); \ - mlib_s32 wid = mlib_ImageGetWidth(src); \ - mlib_s32 sll = mlib_ImageGetStride(src) / sizeof(DTYPE); \ - mlib_s32 dll = mlib_ImageGetStride(dst) / sizeof(DTYPE); \ - DTYPE *adr_src = (DTYPE *)mlib_ImageGetData(src); \ - DTYPE *adr_dst = (DTYPE *)mlib_ImageGetData(dst); \ - mlib_s32 ssize, xsize, dsize, esize, emask, buff_ind = 0; \ - mlib_d64 *pbuff, *dp; \ - mlib_f32 *karr = (mlib_f32 *)kern; \ - mlib_s32 gsr_scale = (31 - scale) << 3; \ - mlib_d64 drnd = vis_to_double_dup(mlib_round_8[31 - scale]); \ - mlib_s32 i, j, l - -/***************************************************************/ - -#ifdef CONV_INDEX - -#define DEF_EXTRA_VARS \ - int offset = mlib_ImageGetLutOffset(colormap); \ - LTYPE **lut_table = (LTYPE**)mlib_ImageGetLutData(colormap); \ - LTYPE *ltbl0 = lut_table[0] - offset; \ - LTYPE *ltbl1 = lut_table[1] - offset; \ - LTYPE *ltbl2 = lut_table[2] - offset; \ - LTYPE *ltbl3 = (NCHAN > 3) ? lut_table[3] - offset : ltbl2 - -#else - -#define DEF_EXTRA_VARS \ - mlib_s32 nchan = mlib_ImageGetChannels(dst) - -#endif - -/***************************************************************/ - -#if NCHAN == 3 - -#define LOAD_SRC() { \ - mlib_s32 s0 = sp[0], s1 = sp[1], s2 = sp[2], s3 = sp[3]; \ - mlib_s32 s4 = sp[4], s5 = sp[5], s6 = sp[6], s7 = sp[7]; \ - mlib_d64 t0, t1, t2; \ - \ - t2 = vis_faligndata(vis_ld_u8_i(ltbl2, s7), t2); \ - t2 = vis_faligndata(vis_ld_u8_i(ltbl1, s7), t2); \ - t2 = vis_faligndata(vis_ld_u8_i(ltbl0, s7), t2); \ - t2 = vis_faligndata(vis_ld_u8_i(ltbl2, s6), t2); \ - t2 = vis_faligndata(vis_ld_u8_i(ltbl1, s6), t2); \ - t2 = vis_faligndata(vis_ld_u8_i(ltbl0, s6), t2); \ - t2 = vis_faligndata(vis_ld_u8_i(ltbl2, s5), t2); \ - t2 = vis_faligndata(vis_ld_u8_i(ltbl1, s5), t2); \ - t1 = vis_faligndata(vis_ld_u8_i(ltbl0, s5), t1); \ - t1 = vis_faligndata(vis_ld_u8_i(ltbl2, s4), t1); \ - t1 = vis_faligndata(vis_ld_u8_i(ltbl1, s4), t1); \ - t1 = vis_faligndata(vis_ld_u8_i(ltbl0, s4), t1); \ - t1 = vis_faligndata(vis_ld_u8_i(ltbl2, s3), t1); \ - t1 = vis_faligndata(vis_ld_u8_i(ltbl1, s3), t1); \ - t1 = vis_faligndata(vis_ld_u8_i(ltbl0, s3), t1); \ - t1 = vis_faligndata(vis_ld_u8_i(ltbl2, s2), t1); \ - t0 = vis_faligndata(vis_ld_u8_i(ltbl1, s2), t0); \ - t0 = vis_faligndata(vis_ld_u8_i(ltbl0, s2), t0); \ - t0 = vis_faligndata(vis_ld_u8_i(ltbl2, s1), t0); \ - t0 = vis_faligndata(vis_ld_u8_i(ltbl1, s1), t0); \ - t0 = vis_faligndata(vis_ld_u8_i(ltbl0, s1), t0); \ - t0 = vis_faligndata(vis_ld_u8_i(ltbl2, s0), t0); \ - t0 = vis_faligndata(vis_ld_u8_i(ltbl1, s0), t0); \ - t0 = vis_faligndata(vis_ld_u8_i(ltbl0, s0), t0); \ - \ - buffn[i] = t0; \ - buffn[i + 1] = t1; \ - buffn[i + 2] = t2; \ - \ - sp += 8; \ - } - -#else - -#define LOAD_SRC() { \ - mlib_s32 s0 = sp[0], s1 = sp[1], s2 = sp[2], s3 = sp[3]; \ - mlib_s32 s4 = sp[4], s5 = sp[5], s6 = sp[6], s7 = sp[7]; \ - mlib_d64 t0, t1, t2; \ - \ - t2 = vis_faligndata(vis_ld_u8_i(ltbl3, s5), t2); \ - t2 = vis_faligndata(vis_ld_u8_i(ltbl2, s5), t2); \ - t2 = vis_faligndata(vis_ld_u8_i(ltbl1, s5), t2); \ - t2 = vis_faligndata(vis_ld_u8_i(ltbl0, s5), t2); \ - t2 = vis_faligndata(vis_ld_u8_i(ltbl3, s4), t2); \ - t2 = vis_faligndata(vis_ld_u8_i(ltbl2, s4), t2); \ - t2 = vis_faligndata(vis_ld_u8_i(ltbl1, s4), t2); \ - t2 = vis_faligndata(vis_ld_u8_i(ltbl0, s4), t2); \ - t1 = vis_faligndata(vis_ld_u8_i(ltbl3, s3), t1); \ - t1 = vis_faligndata(vis_ld_u8_i(ltbl2, s3), t1); \ - t1 = vis_faligndata(vis_ld_u8_i(ltbl1, s3), t1); \ - t1 = vis_faligndata(vis_ld_u8_i(ltbl0, s3), t1); \ - t1 = vis_faligndata(vis_ld_u8_i(ltbl3, s2), t1); \ - t1 = vis_faligndata(vis_ld_u8_i(ltbl2, s2), t1); \ - t1 = vis_faligndata(vis_ld_u8_i(ltbl1, s2), t1); \ - t1 = vis_faligndata(vis_ld_u8_i(ltbl0, s2), t1); \ - t0 = vis_faligndata(vis_ld_u8_i(ltbl3, s1), t0); \ - t0 = vis_faligndata(vis_ld_u8_i(ltbl2, s1), t0); \ - t0 = vis_faligndata(vis_ld_u8_i(ltbl1, s1), t0); \ - t0 = vis_faligndata(vis_ld_u8_i(ltbl0, s1), t0); \ - t0 = vis_faligndata(vis_ld_u8_i(ltbl3, s0), t0); \ - t0 = vis_faligndata(vis_ld_u8_i(ltbl2, s0), t0); \ - t0 = vis_faligndata(vis_ld_u8_i(ltbl1, s0), t0); \ - t0 = vis_faligndata(vis_ld_u8_i(ltbl0, s0), t0); \ - \ - buffn[i] = t0; \ - buffn[i + 1] = t1; \ - buffn[i + 2] = t2; \ - \ - sp += 6; \ - } - -#endif - -/***************************************************************/ - -static mlib_s32 mlib_round_8[16] = { 0x00400040, 0x00200020, 0x00100010, 0x00080008, - 0x00040004, 0x00020002, 0x00010001, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000000 }; - -/***************************************************************/ - -void mlib_ImageCopy_na(mlib_u8 *sa, mlib_u8 *da, int size); - -/***************************************************************/ - -#define KSIZE 2 - -mlib_status CONV_FUNC(2x2) -{ - mlib_d64 *buffs[2*(KSIZE + 1)]; - mlib_d64 *buff0, *buff1, *buffn, *buffd, *buffe; - mlib_d64 s00, s01, s10, s11, s0, s1; - mlib_d64 d0, d1, d00, d01, d10, d11; - DEF_VARS; - DEF_EXTRA_VARS; - - sl = adr_src; - dl = adr_dst; - - ssize = NCHAN*wid; - dsize = (ssize + 7)/8; - esize = dsize + 4; - pbuff = mlib_malloc((KSIZE + 4)*esize*sizeof(mlib_d64)); - if (pbuff == NULL) return MLIB_FAILURE; - - for (i = 0; i < (KSIZE + 1); i++) buffs[i] = pbuff + i*esize; - for (i = 0; i < (KSIZE + 1); i++) buffs[(KSIZE + 1) + i] = buffs[i]; - buffd = buffs[KSIZE] + esize; - buffe = buffd + 2*esize; - - wid -= (KSIZE - 1); - hgt -= (KSIZE - 1); - xsize = ssize - NCHAN*(KSIZE - 1); - emask = (0xFF00 >> (xsize & 7)) & 0xFF; - - vis_write_gsr(gsr_scale + 7); - - for (l = 0; l < KSIZE; l++) { - mlib_d64 *buffn = buffs[l]; - sp = sl + l*sll; - -#ifndef CONV_INDEX - if ((mlib_addr)sp & 7) mlib_ImageCopy_na((void*)sp, (void*)buffn, ssize); - -#else -#pragma pipeloop(0) - for (i = 0; i < dsize; i += 3) { - LOAD_SRC(); - } -#endif /* CONV_INDEX */ - } - - for (j = 0; j < hgt; j++) { - mlib_d64 **buffc = buffs + buff_ind; - mlib_f32 *pk = karr, k0, k1; - sp = sl + KSIZE*sll; - - buff0 = buffc[0]; - buff1 = buffc[1]; - buffn = buffc[KSIZE]; - -#ifndef CONV_INDEX - if ((((mlib_addr)(sl )) & 7) == 0) buff0 = (mlib_d64*)sl; - if ((((mlib_addr)(sl + sll)) & 7) == 0) buff1 = (mlib_d64*)(sl + sll); - if ((mlib_addr)sp & 7) mlib_ImageCopy_na((void*)sp, (void*)buffn, ssize); -#endif - - k0 = pk[1]; - k1 = pk[3]; - vis_write_gsr(gsr_scale + NCHAN); - - s01 = buff0[0]; - s11 = buff1[0]; -#pragma pipeloop(0) - for (i = 0; i < (xsize + 7)/8; i++) { - s00 = s01; - s10 = s11; - s01 = buff0[i + 1]; - s11 = buff1[i + 1]; - s0 = vis_faligndata(s00, s01); - s1 = vis_faligndata(s10, s11); - - d00 = vis_fmul8x16au(vis_read_hi(s0), k0); - d01 = vis_fmul8x16au(vis_read_lo(s0), k0); - d10 = vis_fmul8x16au(vis_read_hi(s1), k1); - d11 = vis_fmul8x16au(vis_read_lo(s1), k1); - - d0 = vis_fpadd16(d00, d10); - d1 = vis_fpadd16(d01, d11); - buffd[2*i] = d0; - buffd[2*i + 1] = d1; - } - - k0 = pk[0]; - k1 = pk[2]; -#ifndef CONV_INDEX - dp = ((mlib_addr)dl & 7) ? buffe : (mlib_d64*)dl; - -#pragma pipeloop(0) - for (i = 0; i < xsize/8; i++) { - s0 = buff0[i]; - s1 = buff1[i]; - - d00 = vis_fmul8x16au(vis_read_hi(s0), k0); - d01 = vis_fmul8x16au(vis_read_lo(s0), k0); - d10 = vis_fmul8x16au(vis_read_hi(s1), k1); - d11 = vis_fmul8x16au(vis_read_lo(s1), k1); - - d0 = buffd[2*i]; - d1 = buffd[2*i + 1]; - d00 = vis_fpadd16(d00, d10); - d0 = vis_fpadd16(d0, drnd); - d0 = vis_fpadd16(d0, d00); - d01 = vis_fpadd16(d01, d11); - d1 = vis_fpadd16(d1, drnd); - d1 = vis_fpadd16(d1, d01); - dp[i] = vis_fpack16_pair(d0, d1); - } - - if (emask) { - s0 = buff0[i]; - s1 = buff1[i]; - - d00 = vis_fmul8x16au(vis_read_hi(s0), k0); - d01 = vis_fmul8x16au(vis_read_lo(s0), k0); - d10 = vis_fmul8x16au(vis_read_hi(s1), k1); - d11 = vis_fmul8x16au(vis_read_lo(s1), k1); - - d0 = buffd[2*i]; - d1 = buffd[2*i + 1]; - d00 = vis_fpadd16(d00, d10); - d0 = vis_fpadd16(d0, drnd); - d0 = vis_fpadd16(d0, d00); - d01 = vis_fpadd16(d01, d11); - d1 = vis_fpadd16(d1, drnd); - d1 = vis_fpadd16(d1, d01); - - d0 = vis_fpack16_pair(d0, d1); - vis_pst_8(d0, dp + i, emask); - } - - if ((mlib_u8*)dp != dl) mlib_ImageCopy_na((void*)buffe, dl, xsize); - -#else - vis_write_gsr(gsr_scale + 7); - -#pragma pipeloop(0) - for (i = 0; i < dsize; i += 3) { - mlib_d64 d00, d01, d02, d03, d04, d05; - mlib_d64 d10, d11, d12, d13, d14, d15; - mlib_d64 d0, d1, d2, d3, d4, d5; - mlib_d64 s00 = buff0[i]; - mlib_d64 s01 = buff0[i + 1]; - mlib_d64 s02 = buff0[i + 2]; - mlib_d64 s10 = buff1[i]; - mlib_d64 s11 = buff1[i + 1]; - mlib_d64 s12 = buff1[i + 2]; - - d00 = vis_fmul8x16au(vis_read_hi(s00), k0); - d01 = vis_fmul8x16au(vis_read_lo(s00), k0); - d02 = vis_fmul8x16au(vis_read_hi(s01), k0); - d03 = vis_fmul8x16au(vis_read_lo(s01), k0); - d04 = vis_fmul8x16au(vis_read_hi(s02), k0); - d05 = vis_fmul8x16au(vis_read_lo(s02), k0); - d10 = vis_fmul8x16au(vis_read_hi(s10), k1); - d11 = vis_fmul8x16au(vis_read_lo(s10), k1); - d12 = vis_fmul8x16au(vis_read_hi(s11), k1); - d13 = vis_fmul8x16au(vis_read_lo(s11), k1); - d14 = vis_fmul8x16au(vis_read_hi(s12), k1); - d15 = vis_fmul8x16au(vis_read_lo(s12), k1); - - d0 = buffd[2*i]; - d1 = buffd[2*i + 1]; - d2 = buffd[2*i + 2]; - d3 = buffd[2*i + 3]; - d4 = buffd[2*i + 4]; - d5 = buffd[2*i + 5]; - d00 = vis_fpadd16(d00, d10); - d0 = vis_fpadd16(d0, drnd); - d0 = vis_fpadd16(d0, d00); - d01 = vis_fpadd16(d01, d11); - d1 = vis_fpadd16(d1, drnd); - d1 = vis_fpadd16(d1, d01); - d02 = vis_fpadd16(d02, d12); - d2 = vis_fpadd16(d2, drnd); - d2 = vis_fpadd16(d2, d02); - d03 = vis_fpadd16(d03, d13); - d3 = vis_fpadd16(d3, drnd); - d3 = vis_fpadd16(d3, d03); - d04 = vis_fpadd16(d04, d14); - d4 = vis_fpadd16(d4, drnd); - d4 = vis_fpadd16(d4, d04); - d05 = vis_fpadd16(d05, d15); - d5 = vis_fpadd16(d5, drnd); - d5 = vis_fpadd16(d5, d05); - - buffe[i ] = vis_fpack16_pair(d0, d1); - buffe[i + 1] = vis_fpack16_pair(d2, d3); - buffe[i + 2] = vis_fpack16_pair(d4, d5); - - LOAD_SRC(); - } - - mlib_ImageColorTrue2IndexLine_U8_U8_3((void*)buffe, dl, wid, colormap); -#endif /* CONV_INDEX */ - - sl += sll; - dl += dll; - - buff_ind++; - if (buff_ind >= (KSIZE + 1)) buff_ind = 0; - } - - mlib_free(pbuff); - - return MLIB_SUCCESS; -} - -/***************************************************************/ - -#undef KSIZE -#define KSIZE 3 - -mlib_status CONV_FUNC(3x3) -{ - mlib_d64 *buffs[2*(KSIZE + 1)]; - mlib_d64 *buff0, *buff1, *buff2, *buffn, *buffd, *buffe; - mlib_d64 s00, s01, s10, s11, s20, s21, s0, s1, s2; - mlib_d64 dd, d0, d1, d00, d01, d10, d11, d20, d21; - mlib_s32 ik, ik_last, off, doff; - DEF_VARS; - DEF_EXTRA_VARS; - - sl = adr_src; -#ifdef CONV_INDEX - dl = adr_dst + ((KSIZE - 1)/2)*(dll + 1); -#else - dl = adr_dst + ((KSIZE - 1)/2)*(dll + NCHAN); -#endif - - ssize = NCHAN*wid; - dsize = (ssize + 7)/8; - esize = dsize + 4; - pbuff = mlib_malloc((KSIZE + 4)*esize*sizeof(mlib_d64)); - if (pbuff == NULL) return MLIB_FAILURE; - - for (i = 0; i < (KSIZE + 1); i++) buffs[i] = pbuff + i*esize; - for (i = 0; i < (KSIZE + 1); i++) buffs[(KSIZE + 1) + i] = buffs[i]; - buffd = buffs[KSIZE] + esize; - buffe = buffd + 2*esize; - - wid -= (KSIZE - 1); - hgt -= (KSIZE - 1); - xsize = ssize - NCHAN*(KSIZE - 1); - emask = (0xFF00 >> (xsize & 7)) & 0xFF; - - vis_write_gsr(gsr_scale + 7); - - for (l = 0; l < KSIZE; l++) { - mlib_d64 *buffn = buffs[l]; - sp = sl + l*sll; - -#ifndef CONV_INDEX - if ((mlib_addr)sp & 7) mlib_ImageCopy_na((void*)sp, (void*)buffn, ssize); -#else -#pragma pipeloop(0) - for (i = 0; i < dsize; i += 3) { - LOAD_SRC(); - } -#endif /* CONV_INDEX */ - } - - /* init buffer */ -#pragma pipeloop(0) - for (i = 0; i < (xsize + 7)/8; i++) { - buffd[2*i ] = drnd; - buffd[2*i + 1] = drnd; - } - - for (j = 0; j < hgt; j++) { - mlib_d64 **buffc = buffs + buff_ind, *pbuff0, *pbuff1, *pbuff2; - mlib_f32 *pk = karr, k0, k1, k2; - sp = sl + KSIZE*sll; - - pbuff0 = buffc[0]; - pbuff1 = buffc[1]; - pbuff2 = buffc[2]; - buffn = buffc[KSIZE]; - -#ifndef CONV_INDEX - if ((((mlib_addr)(sl )) & 7) == 0) pbuff0 = (mlib_d64*)sl; - if ((((mlib_addr)(sl + sll)) & 7) == 0) pbuff1 = (mlib_d64*)(sl + sll); - if ((((mlib_addr)(sl + 2*sll)) & 7) == 0) pbuff2 = (mlib_d64*)(sl + 2*sll); - - if ((mlib_addr)sp & 7) mlib_ImageCopy_na((void*)sp, (void*)buffn, ssize); -#endif - -#ifdef CONV_INDEX - ik_last = 0; -#else - ik_last = (KSIZE - 1); -#endif - - for (ik = 0; ik < KSIZE; ik++) { - k0 = pk[ik]; - k1 = pk[ik + KSIZE]; - k2 = pk[ik + 2*KSIZE]; - - off = ik*NCHAN; - doff = off/8; - off &= 7; - buff0 = pbuff0 + doff; - buff1 = pbuff1 + doff; - buff2 = pbuff2 + doff; - vis_write_gsr(gsr_scale + off); - - if (ik == ik_last) continue; - /*if (!ik_last) { - if ((off & 3) || (ik == (KSIZE - 1))) { - ik_last = ik; - continue; - } - }*/ - - if (off == 0) { -#pragma pipeloop(0) - for (i = 0; i < (xsize + 7)/8; i++) { - s0 = buff0[i]; - s1 = buff1[i]; - s2 = buff2[i]; - - d00 = vis_fmul8x16au(vis_read_hi(s0), k0); - d01 = vis_fmul8x16au(vis_read_lo(s0), k0); - d10 = vis_fmul8x16au(vis_read_hi(s1), k1); - d11 = vis_fmul8x16au(vis_read_lo(s1), k1); - d20 = vis_fmul8x16au(vis_read_hi(s2), k2); - d21 = vis_fmul8x16au(vis_read_lo(s2), k2); - - d0 = buffd[2*i]; - d1 = buffd[2*i + 1]; - d0 = vis_fpadd16(d00, d0); - d0 = vis_fpadd16(d10, d0); - d0 = vis_fpadd16(d20, d0); - d1 = vis_fpadd16(d01, d1); - d1 = vis_fpadd16(d11, d1); - d1 = vis_fpadd16(d21, d1); - buffd[2*i] = d0; - buffd[2*i + 1] = d1; - } - - } else if (off == 4) { - s01 = buff0[0]; - s11 = buff1[0]; - s21 = buff2[0]; -#pragma pipeloop(0) - for (i = 0; i < (xsize + 7)/8; i++) { - s00 = s01; - s10 = s11; - s20 = s21; - s01 = buff0[i + 1]; - s11 = buff1[i + 1]; - s21 = buff2[i + 1]; - - d00 = vis_fmul8x16au(vis_read_lo(s00), k0); - d01 = vis_fmul8x16au(vis_read_hi(s01), k0); - d10 = vis_fmul8x16au(vis_read_lo(s10), k1); - d11 = vis_fmul8x16au(vis_read_hi(s11), k1); - d20 = vis_fmul8x16au(vis_read_lo(s20), k2); - d21 = vis_fmul8x16au(vis_read_hi(s21), k2); - - d0 = buffd[2*i]; - d1 = buffd[2*i + 1]; - d0 = vis_fpadd16(d00, d0); - d0 = vis_fpadd16(d10, d0); - d0 = vis_fpadd16(d20, d0); - d1 = vis_fpadd16(d01, d1); - d1 = vis_fpadd16(d11, d1); - d1 = vis_fpadd16(d21, d1); - buffd[2*i] = d0; - buffd[2*i + 1] = d1; - } - - } else { - s01 = buff0[0]; - s11 = buff1[0]; - s21 = buff2[0]; -#pragma pipeloop(0) - for (i = 0; i < (xsize + 7)/8; i++) { - s00 = s01; - s10 = s11; - s20 = s21; - s01 = buff0[i + 1]; - s11 = buff1[i + 1]; - s21 = buff2[i + 1]; - s0 = vis_faligndata(s00, s01); - s1 = vis_faligndata(s10, s11); - s2 = vis_faligndata(s20, s21); - - d00 = vis_fmul8x16au(vis_read_hi(s0), k0); - d01 = vis_fmul8x16au(vis_read_lo(s0), k0); - d10 = vis_fmul8x16au(vis_read_hi(s1), k1); - d11 = vis_fmul8x16au(vis_read_lo(s1), k1); - d20 = vis_fmul8x16au(vis_read_hi(s2), k2); - d21 = vis_fmul8x16au(vis_read_lo(s2), k2); - - d0 = buffd[2*i]; - d1 = buffd[2*i + 1]; - d0 = vis_fpadd16(d00, d0); - d0 = vis_fpadd16(d10, d0); - d0 = vis_fpadd16(d20, d0); - d1 = vis_fpadd16(d01, d1); - d1 = vis_fpadd16(d11, d1); - d1 = vis_fpadd16(d21, d1); - buffd[2*i] = d0; - buffd[2*i + 1] = d1; - } - } - } - - k0 = pk[ik_last]; - k1 = pk[ik_last + KSIZE]; - k2 = pk[ik_last + 2*KSIZE]; - - off = ik_last*NCHAN; - doff = off/8; - off &= 7; - buff0 = pbuff0 + doff; - buff1 = pbuff1 + doff; - buff2 = pbuff2 + doff; - vis_write_gsr(gsr_scale + off); - -#ifndef CONV_INDEX - dp = ((mlib_addr)dl & 7) ? buffe : (mlib_d64*)dl; - - s01 = buff0[0]; - s11 = buff1[0]; - s21 = buff2[0]; -#pragma pipeloop(0) - for (i = 0; i < xsize/8; i++) { - s00 = s01; - s10 = s11; - s20 = s21; - s01 = buff0[i + 1]; - s11 = buff1[i + 1]; - s21 = buff2[i + 1]; - s0 = vis_faligndata(s00, s01); - s1 = vis_faligndata(s10, s11); - s2 = vis_faligndata(s20, s21); - - d00 = vis_fmul8x16au(vis_read_hi(s0), k0); - d01 = vis_fmul8x16au(vis_read_lo(s0), k0); - d10 = vis_fmul8x16au(vis_read_hi(s1), k1); - d11 = vis_fmul8x16au(vis_read_lo(s1), k1); - d20 = vis_fmul8x16au(vis_read_hi(s2), k2); - d21 = vis_fmul8x16au(vis_read_lo(s2), k2); - - d0 = buffd[2*i]; - d1 = buffd[2*i + 1]; - d0 = vis_fpadd16(d0, d00); - d0 = vis_fpadd16(d0, d10); - d0 = vis_fpadd16(d0, d20); - d1 = vis_fpadd16(d1, d01); - d1 = vis_fpadd16(d1, d11); - d1 = vis_fpadd16(d1, d21); - - dd = vis_fpack16_pair(d0, d1); - dp[i] = dd; - - buffd[2*i ] = drnd; - buffd[2*i + 1] = drnd; - } - - if (emask) { - s00 = s01; - s10 = s11; - s20 = s21; - s01 = buff0[i + 1]; - s11 = buff1[i + 1]; - s21 = buff2[i + 1]; - s0 = vis_faligndata(s00, s01); - s1 = vis_faligndata(s10, s11); - s2 = vis_faligndata(s20, s21); - - d00 = vis_fmul8x16au(vis_read_hi(s0), k0); - d01 = vis_fmul8x16au(vis_read_lo(s0), k0); - d10 = vis_fmul8x16au(vis_read_hi(s1), k1); - d11 = vis_fmul8x16au(vis_read_lo(s1), k1); - d20 = vis_fmul8x16au(vis_read_hi(s2), k2); - d21 = vis_fmul8x16au(vis_read_lo(s2), k2); - - d0 = buffd[2*i]; - d1 = buffd[2*i + 1]; - d0 = vis_fpadd16(d0, d00); - d0 = vis_fpadd16(d0, d10); - d0 = vis_fpadd16(d0, d20); - d1 = vis_fpadd16(d1, d01); - d1 = vis_fpadd16(d1, d11); - d1 = vis_fpadd16(d1, d21); - - dd = vis_fpack16_pair(d0, d1); - vis_pst_8(dd, dp + i, emask); - - buffd[2*i ] = drnd; - buffd[2*i + 1] = drnd; - } - - if ((mlib_u8*)dp != dl) mlib_ImageCopy_na((void*)buffe, dl, xsize); - -#else - vis_write_gsr(gsr_scale + 7); - -#pragma pipeloop(0) - for (i = 0; i < dsize; i += 3) { - mlib_d64 d00, d01, d02, d03, d04, d05; - mlib_d64 d10, d11, d12, d13, d14, d15; - mlib_d64 d20, d21, d22, d23, d24, d25; - mlib_d64 d0, d1, d2, d3, d4, d5; - mlib_d64 s00 = buff0[i]; - mlib_d64 s01 = buff0[i + 1]; - mlib_d64 s02 = buff0[i + 2]; - mlib_d64 s10 = buff1[i]; - mlib_d64 s11 = buff1[i + 1]; - mlib_d64 s12 = buff1[i + 2]; - mlib_d64 s20 = buff2[i]; - mlib_d64 s21 = buff2[i + 1]; - mlib_d64 s22 = buff2[i + 2]; - - d00 = vis_fmul8x16au(vis_read_hi(s00), k0); - d01 = vis_fmul8x16au(vis_read_lo(s00), k0); - d02 = vis_fmul8x16au(vis_read_hi(s01), k0); - d03 = vis_fmul8x16au(vis_read_lo(s01), k0); - d04 = vis_fmul8x16au(vis_read_hi(s02), k0); - d05 = vis_fmul8x16au(vis_read_lo(s02), k0); - d10 = vis_fmul8x16au(vis_read_hi(s10), k1); - d11 = vis_fmul8x16au(vis_read_lo(s10), k1); - d12 = vis_fmul8x16au(vis_read_hi(s11), k1); - d13 = vis_fmul8x16au(vis_read_lo(s11), k1); - d14 = vis_fmul8x16au(vis_read_hi(s12), k1); - d15 = vis_fmul8x16au(vis_read_lo(s12), k1); - d20 = vis_fmul8x16au(vis_read_hi(s20), k2); - d21 = vis_fmul8x16au(vis_read_lo(s20), k2); - d22 = vis_fmul8x16au(vis_read_hi(s21), k2); - d23 = vis_fmul8x16au(vis_read_lo(s21), k2); - d24 = vis_fmul8x16au(vis_read_hi(s22), k2); - d25 = vis_fmul8x16au(vis_read_lo(s22), k2); - - d0 = buffd[2*i]; - d1 = buffd[2*i + 1]; - d2 = buffd[2*i + 2]; - d3 = buffd[2*i + 3]; - d4 = buffd[2*i + 4]; - d5 = buffd[2*i + 5]; - d0 = vis_fpadd16(d0, d00); - d0 = vis_fpadd16(d0, d10); - d0 = vis_fpadd16(d0, d20); - d1 = vis_fpadd16(d1, d01); - d1 = vis_fpadd16(d1, d11); - d1 = vis_fpadd16(d1, d21); - d2 = vis_fpadd16(d2, d02); - d2 = vis_fpadd16(d2, d12); - d2 = vis_fpadd16(d2, d22); - d3 = vis_fpadd16(d3, d03); - d3 = vis_fpadd16(d3, d13); - d3 = vis_fpadd16(d3, d23); - d4 = vis_fpadd16(d4, d04); - d4 = vis_fpadd16(d4, d14); - d4 = vis_fpadd16(d4, d24); - d5 = vis_fpadd16(d5, d05); - d5 = vis_fpadd16(d5, d15); - d5 = vis_fpadd16(d5, d25); - - buffe[i ] = vis_fpack16_pair(d0, d1); - buffe[i + 1] = vis_fpack16_pair(d2, d3); - buffe[i + 2] = vis_fpack16_pair(d4, d5); - - buffd[2*i ] = drnd; - buffd[2*i + 1] = drnd; - buffd[2*i + 2] = drnd; - buffd[2*i + 3] = drnd; - buffd[2*i + 4] = drnd; - buffd[2*i + 5] = drnd; - - LOAD_SRC(); - } - - mlib_ImageColorTrue2IndexLine_U8_U8_3((void*)buffe, dl, wid, colormap); -#endif /* CONV_INDEX */ - - sl += sll; - dl += dll; - - buff_ind++; - if (buff_ind >= (KSIZE + 1)) buff_ind = 0; - } - - mlib_free(pbuff); - - return MLIB_SUCCESS; -} - -/***************************************************************/ - -#undef KSIZE -#define MAX_N 11 - -#ifdef CONV_INDEX - -mlib_status mlib_convMxN_Index3_8_8nw(mlib_image *dst, - mlib_image *src, - mlib_s32 m, - mlib_s32 n, - mlib_s32 dm, - mlib_s32 dn, - mlib_s32 *kern, - mlib_s32 scale, - void *colormap) - -#else - -mlib_status mlib_convMxN_8nw_f(mlib_image *dst, - mlib_image *src, - mlib_s32 m, - mlib_s32 n, - mlib_s32 dm, - mlib_s32 dn, - mlib_s32 *kern, - mlib_s32 scale) - -#endif -{ - mlib_d64 *buffs_local[3*(MAX_N + 1)], **buffs = buffs_local, **buff; - mlib_d64 *buff0, *buff1, *buff2, *buff3, *buffn, *buffd, *buffe; - mlib_d64 s00, s01, s10, s11, s20, s21, s30, s31, s0, s1, s2, s3; - mlib_d64 d00, d01, d10, d11, d20, d21, d30, d31; - mlib_d64 dd, d0, d1; - mlib_s32 ik, jk, ik_last, jk_size, coff, off, doff; - DEF_VARS; - DEF_EXTRA_VARS; - - if (n > MAX_N) { - buffs = mlib_malloc(3*(n + 1)*sizeof(mlib_d64*)); - if (buffs == NULL) return MLIB_FAILURE; - } - - buff = buffs + 2*(n + 1); - - sl = adr_src; -#ifdef CONV_INDEX - dl = adr_dst + dn*dll + dm; -#else - dl = adr_dst + dn*dll + dm*NCHAN; -#endif - - ssize = NCHAN*wid; - dsize = (ssize + 7)/8; - esize = dsize + 4; - pbuff = mlib_malloc((n + 4)*esize*sizeof(mlib_d64)); - if (pbuff == NULL) { - if (buffs != buffs_local) mlib_free(buffs); - return MLIB_FAILURE; - } - - for (i = 0; i < (n + 1); i++) buffs[i] = pbuff + i*esize; - for (i = 0; i < (n + 1); i++) buffs[(n + 1) + i] = buffs[i]; - buffd = buffs[n] + esize; - buffe = buffd + 2*esize; - - wid -= (m - 1); - hgt -= (n - 1); - xsize = ssize - NCHAN*(m - 1); - emask = (0xFF00 >> (xsize & 7)) & 0xFF; - - vis_write_gsr(gsr_scale + 7); - - for (l = 0; l < n; l++) { - mlib_d64 *buffn = buffs[l]; - sp = sl + l*sll; - -#ifndef CONV_INDEX - if ((mlib_addr)sp & 7) mlib_ImageCopy_na((void*)sp, (void*)buffn, ssize); -#else -#pragma pipeloop(0) - for (i = 0; i < dsize; i += 3) { - LOAD_SRC(); - } -#endif /* CONV_INDEX */ - } - - /* init buffer */ -#pragma pipeloop(0) - for (i = 0; i < (xsize + 7)/8; i++) { - buffd[2*i ] = drnd; - buffd[2*i + 1] = drnd; - } - - for (j = 0; j < hgt; j++) { - mlib_d64 **buffc = buffs + buff_ind; - mlib_f32 *pk = karr, k0, k1, k2, k3; - sp = sl + n*sll; - - for (l = 0; l < n; l++) { - buff[l] = buffc[l]; - } - buffn = buffc[n]; - -#ifndef CONV_INDEX - for (l = 0; l < n; l++) { - if ((((mlib_addr)(sl + l*sll)) & 7) == 0) buff[l] = (mlib_d64*)(sl + l*sll); - } - if ((mlib_addr)sp & 7) mlib_ImageCopy_na((void*)sp, (void*)buffn, ssize); -#endif - -#ifdef CONV_INDEX - ik_last = 0; -#else - ik_last = (m - 1); -#endif - - for (jk = 0; jk < n; jk += jk_size) { - jk_size = n - jk; -#ifdef CONV_INDEX - if (jk_size >= 5) jk_size = 3; - if (jk_size == 4) jk_size = 2; -#else - if (jk_size >= 6) jk_size = 4; - if (jk_size == 5) jk_size = 3; -#endif - coff = 0; - - if (jk_size == 2) { - - for (ik = 0; ik < m; ik++, coff += NCHAN) { - if (!jk && ik == ik_last) continue; - - k0 = pk[ik]; - k1 = pk[ik + m]; - - doff = coff/8; - buff0 = buff[jk ] + doff; - buff1 = buff[jk + 1] + doff; - - off = coff & 7; - vis_write_gsr(gsr_scale + off); - - s01 = buff0[0]; - s11 = buff1[0]; -#pragma pipeloop(0) - for (i = 0; i < (xsize + 7)/8; i++) { - s00 = s01; - s10 = s11; - s01 = buff0[i + 1]; - s11 = buff1[i + 1]; - s0 = vis_faligndata(s00, s01); - s1 = vis_faligndata(s10, s11); - - d00 = vis_fmul8x16au(vis_read_hi(s0), k0); - d01 = vis_fmul8x16au(vis_read_lo(s0), k0); - d10 = vis_fmul8x16au(vis_read_hi(s1), k1); - d11 = vis_fmul8x16au(vis_read_lo(s1), k1); - - d0 = buffd[2*i]; - d1 = buffd[2*i + 1]; - d0 = vis_fpadd16(d00, d0); - d0 = vis_fpadd16(d10, d0); - d1 = vis_fpadd16(d01, d1); - d1 = vis_fpadd16(d11, d1); - buffd[2*i] = d0; - buffd[2*i + 1] = d1; - } - - } - - pk += 2*m; - - } else if (jk_size == 3) { - - for (ik = 0; ik < m; ik++, coff += NCHAN) { - if (!jk && ik == ik_last) continue; - - k0 = pk[ik]; - k1 = pk[ik + m]; - k2 = pk[ik + 2*m]; - - doff = coff/8; - buff0 = buff[jk ] + doff; - buff1 = buff[jk + 1] + doff; - buff2 = buff[jk + 2] + doff; - - off = coff & 7; - vis_write_gsr(gsr_scale + off); - - if (off == 0) { -#pragma pipeloop(0) - for (i = 0; i < (xsize + 7)/8; i++) { - d0 = buffd[2*i]; - d1 = buffd[2*i + 1]; - - s0 = buff0[i]; - s1 = buff1[i]; - s2 = buff2[i]; - - d00 = vis_fmul8x16au(vis_read_hi(s0), k0); - d01 = vis_fmul8x16au(vis_read_lo(s0), k0); - d10 = vis_fmul8x16au(vis_read_hi(s1), k1); - d11 = vis_fmul8x16au(vis_read_lo(s1), k1); - d20 = vis_fmul8x16au(vis_read_hi(s2), k2); - d21 = vis_fmul8x16au(vis_read_lo(s2), k2); - - d00 = vis_fpadd16(d00, d10); - d0 = vis_fpadd16(d20, d0); - d0 = vis_fpadd16(d00, d0); - d01 = vis_fpadd16(d01, d11); - d1 = vis_fpadd16(d21, d1); - d1 = vis_fpadd16(d01, d1); - buffd[2*i] = d0; - buffd[2*i + 1] = d1; - } - - } else if (off == 4) { - s01 = buff0[0]; - s11 = buff1[0]; - s21 = buff2[0]; -#pragma pipeloop(0) - for (i = 0; i < (xsize + 7)/8; i++) { - d0 = buffd[2*i]; - d1 = buffd[2*i + 1]; - - s00 = s01; - s10 = s11; - s20 = s21; - s01 = buff0[i + 1]; - s11 = buff1[i + 1]; - s21 = buff2[i + 1]; - - d00 = vis_fmul8x16au(vis_read_lo(s00), k0); - d01 = vis_fmul8x16au(vis_read_hi(s01), k0); - d10 = vis_fmul8x16au(vis_read_lo(s10), k1); - d11 = vis_fmul8x16au(vis_read_hi(s11), k1); - d20 = vis_fmul8x16au(vis_read_lo(s20), k2); - d21 = vis_fmul8x16au(vis_read_hi(s21), k2); - - d00 = vis_fpadd16(d00, d10); - d0 = vis_fpadd16(d20, d0); - d0 = vis_fpadd16(d00, d0); - d01 = vis_fpadd16(d01, d11); - d1 = vis_fpadd16(d21, d1); - d1 = vis_fpadd16(d01, d1); - buffd[2*i] = d0; - buffd[2*i + 1] = d1; - } - - } else { - s01 = buff0[0]; - s11 = buff1[0]; - s21 = buff2[0]; -#pragma pipeloop(0) - for (i = 0; i < (xsize + 7)/8; i++) { - d0 = buffd[2*i]; - d1 = buffd[2*i + 1]; - - s00 = s01; - s10 = s11; - s20 = s21; - s01 = buff0[i + 1]; - s11 = buff1[i + 1]; - s21 = buff2[i + 1]; - s0 = vis_faligndata(s00, s01); - s1 = vis_faligndata(s10, s11); - s2 = vis_faligndata(s20, s21); - - d00 = vis_fmul8x16au(vis_read_hi(s0), k0); - d01 = vis_fmul8x16au(vis_read_lo(s0), k0); - d10 = vis_fmul8x16au(vis_read_hi(s1), k1); - d11 = vis_fmul8x16au(vis_read_lo(s1), k1); - d20 = vis_fmul8x16au(vis_read_hi(s2), k2); - d21 = vis_fmul8x16au(vis_read_lo(s2), k2); - - d00 = vis_fpadd16(d00, d10); - d0 = vis_fpadd16(d20, d0); - d0 = vis_fpadd16(d00, d0); - d01 = vis_fpadd16(d01, d11); - d1 = vis_fpadd16(d21, d1); - d1 = vis_fpadd16(d01, d1); - buffd[2*i] = d0; - buffd[2*i + 1] = d1; - } - } - } - - pk += 3*m; - - } else { /* jk_size == 4 */ - - for (ik = 0; ik < m; ik++, coff += NCHAN) { - if (!jk && ik == ik_last) continue; - - k0 = pk[ik]; - k1 = pk[ik + m]; - k2 = pk[ik + 2*m]; - k3 = pk[ik + 3*m]; - - doff = coff/8; - buff0 = buff[jk ] + doff; - buff1 = buff[jk + 1] + doff; - buff2 = buff[jk + 2] + doff; - buff3 = buff[jk + 3] + doff; - - off = coff & 7; - vis_write_gsr(gsr_scale + off); - - if (off == 0) { - -#pragma pipeloop(0) - for (i = 0; i < (xsize + 7)/8; i++) { - d0 = buffd[2*i]; - d1 = buffd[2*i + 1]; - - s0 = buff0[i]; - s1 = buff1[i]; - s2 = buff2[i]; - s3 = buff3[i]; - - d00 = vis_fmul8x16au(vis_read_hi(s0), k0); - d01 = vis_fmul8x16au(vis_read_lo(s0), k0); - d10 = vis_fmul8x16au(vis_read_hi(s1), k1); - d11 = vis_fmul8x16au(vis_read_lo(s1), k1); - d20 = vis_fmul8x16au(vis_read_hi(s2), k2); - d21 = vis_fmul8x16au(vis_read_lo(s2), k2); - d30 = vis_fmul8x16au(vis_read_hi(s3), k3); - d31 = vis_fmul8x16au(vis_read_lo(s3), k3); - - d00 = vis_fpadd16(d00, d10); - d20 = vis_fpadd16(d20, d30); - d0 = vis_fpadd16(d0, d00); - d0 = vis_fpadd16(d0, d20); - d01 = vis_fpadd16(d01, d11); - d21 = vis_fpadd16(d21, d31); - d1 = vis_fpadd16(d1, d01); - d1 = vis_fpadd16(d1, d21); - buffd[2*i] = d0; - buffd[2*i + 1] = d1; - } - - } else if (off == 4) { - - s01 = buff0[0]; - s11 = buff1[0]; - s21 = buff2[0]; - s31 = buff3[0]; -#pragma pipeloop(0) - for (i = 0; i < (xsize + 7)/8; i++) { - d0 = buffd[2*i]; - d1 = buffd[2*i + 1]; - - s00 = s01; - s10 = s11; - s20 = s21; - s30 = s31; - s01 = buff0[i + 1]; - s11 = buff1[i + 1]; - s21 = buff2[i + 1]; - s31 = buff3[i + 1]; - - d00 = vis_fmul8x16au(vis_read_lo(s00), k0); - d01 = vis_fmul8x16au(vis_read_hi(s01), k0); - d10 = vis_fmul8x16au(vis_read_lo(s10), k1); - d11 = vis_fmul8x16au(vis_read_hi(s11), k1); - d20 = vis_fmul8x16au(vis_read_lo(s20), k2); - d21 = vis_fmul8x16au(vis_read_hi(s21), k2); - d30 = vis_fmul8x16au(vis_read_lo(s30), k3); - d31 = vis_fmul8x16au(vis_read_hi(s31), k3); - - d00 = vis_fpadd16(d00, d10); - d20 = vis_fpadd16(d20, d30); - d0 = vis_fpadd16(d0, d00); - d0 = vis_fpadd16(d0, d20); - d01 = vis_fpadd16(d01, d11); - d21 = vis_fpadd16(d21, d31); - d1 = vis_fpadd16(d1, d01); - d1 = vis_fpadd16(d1, d21); - buffd[2*i] = d0; - buffd[2*i + 1] = d1; - } - - } else { - - s01 = buff0[0]; - s11 = buff1[0]; - s21 = buff2[0]; - s31 = buff3[0]; -#pragma pipeloop(0) - for (i = 0; i < (xsize + 7)/8; i++) { - d0 = buffd[2*i]; - d1 = buffd[2*i + 1]; - - s00 = s01; - s10 = s11; - s20 = s21; - s30 = s31; - s01 = buff0[i + 1]; - s11 = buff1[i + 1]; - s21 = buff2[i + 1]; - s31 = buff3[i + 1]; - s0 = vis_faligndata(s00, s01); - s1 = vis_faligndata(s10, s11); - s2 = vis_faligndata(s20, s21); - s3 = vis_faligndata(s30, s31); - - d00 = vis_fmul8x16au(vis_read_hi(s0), k0); - d01 = vis_fmul8x16au(vis_read_lo(s0), k0); - d10 = vis_fmul8x16au(vis_read_hi(s1), k1); - d11 = vis_fmul8x16au(vis_read_lo(s1), k1); - d20 = vis_fmul8x16au(vis_read_hi(s2), k2); - d21 = vis_fmul8x16au(vis_read_lo(s2), k2); - d30 = vis_fmul8x16au(vis_read_hi(s3), k3); - d31 = vis_fmul8x16au(vis_read_lo(s3), k3); - - d00 = vis_fpadd16(d00, d10); - d20 = vis_fpadd16(d20, d30); - d0 = vis_fpadd16(d0, d00); - d0 = vis_fpadd16(d0, d20); - d01 = vis_fpadd16(d01, d11); - d21 = vis_fpadd16(d21, d31); - d1 = vis_fpadd16(d1, d01); - d1 = vis_fpadd16(d1, d21); - buffd[2*i] = d0; - buffd[2*i + 1] = d1; - } - } - } - - pk += 4*m; - } - } - - /***************************************** - ***************************************** - ** Final iteration ** - ***************************************** - *****************************************/ - - jk_size = n; -#ifdef CONV_INDEX - if (jk_size >= 5) jk_size = 3; - if (jk_size == 4) jk_size = 2; -#else - if (jk_size >= 6) jk_size = 4; - if (jk_size == 5) jk_size = 3; -#endif - - k0 = karr[ik_last]; - k1 = karr[ik_last + m]; - k2 = karr[ik_last + 2*m]; - k3 = karr[ik_last + 3*m]; - - off = ik_last*NCHAN; - doff = off/8; - off &= 7; - buff0 = buff[0] + doff; - buff1 = buff[1] + doff; - buff2 = buff[2] + doff; - buff3 = buff[3] + doff; - vis_write_gsr(gsr_scale + off); - -#ifndef CONV_INDEX - if (jk_size == 2) { - dp = ((mlib_addr)dl & 7) ? buffe : (mlib_d64*)dl; - - s01 = buff0[0]; - s11 = buff1[0]; -#pragma pipeloop(0) - for (i = 0; i < xsize/8; i++) { - s00 = s01; - s10 = s11; - s01 = buff0[i + 1]; - s11 = buff1[i + 1]; - s0 = vis_faligndata(s00, s01); - s1 = vis_faligndata(s10, s11); - - d00 = vis_fmul8x16au(vis_read_hi(s0), k0); - d01 = vis_fmul8x16au(vis_read_lo(s0), k0); - d10 = vis_fmul8x16au(vis_read_hi(s1), k1); - d11 = vis_fmul8x16au(vis_read_lo(s1), k1); - - d0 = buffd[2*i]; - d1 = buffd[2*i + 1]; - d0 = vis_fpadd16(d0, d00); - d0 = vis_fpadd16(d0, d10); - d1 = vis_fpadd16(d1, d01); - d1 = vis_fpadd16(d1, d11); - - dd = vis_fpack16_pair(d0, d1); - dp[i] = dd; - - buffd[2*i ] = drnd; - buffd[2*i + 1] = drnd; - } - - if (emask) { - s00 = s01; - s10 = s11; - s01 = buff0[i + 1]; - s11 = buff1[i + 1]; - s0 = vis_faligndata(s00, s01); - s1 = vis_faligndata(s10, s11); - - d00 = vis_fmul8x16au(vis_read_hi(s0), k0); - d01 = vis_fmul8x16au(vis_read_lo(s0), k0); - d10 = vis_fmul8x16au(vis_read_hi(s1), k1); - d11 = vis_fmul8x16au(vis_read_lo(s1), k1); - - d0 = buffd[2*i]; - d1 = buffd[2*i + 1]; - d0 = vis_fpadd16(d0, d00); - d0 = vis_fpadd16(d0, d10); - d1 = vis_fpadd16(d1, d01); - d1 = vis_fpadd16(d1, d11); - - dd = vis_fpack16_pair(d0, d1); - vis_pst_8(dd, dp + i, emask); - - buffd[2*i ] = drnd; - buffd[2*i + 1] = drnd; - } - - if ((mlib_u8*)dp != dl) mlib_ImageCopy_na((void*)buffe, dl, xsize); - - } else if (jk_size == 3) { - - dp = ((mlib_addr)dl & 7) ? buffe : (mlib_d64*)dl; - - s01 = buff0[0]; - s11 = buff1[0]; - s21 = buff2[0]; -#pragma pipeloop(0) - for (i = 0; i < xsize/8; i++) { - s00 = s01; - s10 = s11; - s20 = s21; - s01 = buff0[i + 1]; - s11 = buff1[i + 1]; - s21 = buff2[i + 1]; - s0 = vis_faligndata(s00, s01); - s1 = vis_faligndata(s10, s11); - s2 = vis_faligndata(s20, s21); - - d00 = vis_fmul8x16au(vis_read_hi(s0), k0); - d01 = vis_fmul8x16au(vis_read_lo(s0), k0); - d10 = vis_fmul8x16au(vis_read_hi(s1), k1); - d11 = vis_fmul8x16au(vis_read_lo(s1), k1); - d20 = vis_fmul8x16au(vis_read_hi(s2), k2); - d21 = vis_fmul8x16au(vis_read_lo(s2), k2); - - d0 = buffd[2*i]; - d1 = buffd[2*i + 1]; - d0 = vis_fpadd16(d0, d00); - d0 = vis_fpadd16(d0, d10); - d0 = vis_fpadd16(d0, d20); - d1 = vis_fpadd16(d1, d01); - d1 = vis_fpadd16(d1, d11); - d1 = vis_fpadd16(d1, d21); - - dd = vis_fpack16_pair(d0, d1); - dp[i] = dd; - - buffd[2*i ] = drnd; - buffd[2*i + 1] = drnd; - } - - if (emask) { - s00 = s01; - s10 = s11; - s20 = s21; - s01 = buff0[i + 1]; - s11 = buff1[i + 1]; - s21 = buff2[i + 1]; - s0 = vis_faligndata(s00, s01); - s1 = vis_faligndata(s10, s11); - s2 = vis_faligndata(s20, s21); - - d00 = vis_fmul8x16au(vis_read_hi(s0), k0); - d01 = vis_fmul8x16au(vis_read_lo(s0), k0); - d10 = vis_fmul8x16au(vis_read_hi(s1), k1); - d11 = vis_fmul8x16au(vis_read_lo(s1), k1); - d20 = vis_fmul8x16au(vis_read_hi(s2), k2); - d21 = vis_fmul8x16au(vis_read_lo(s2), k2); - - d0 = buffd[2*i]; - d1 = buffd[2*i + 1]; - d0 = vis_fpadd16(d0, d00); - d0 = vis_fpadd16(d0, d10); - d0 = vis_fpadd16(d0, d20); - d1 = vis_fpadd16(d1, d01); - d1 = vis_fpadd16(d1, d11); - d1 = vis_fpadd16(d1, d21); - - dd = vis_fpack16_pair(d0, d1); - vis_pst_8(dd, dp + i, emask); - - buffd[2*i ] = drnd; - buffd[2*i + 1] = drnd; - } - - if ((mlib_u8*)dp != dl) mlib_ImageCopy_na((void*)buffe, dl, xsize); - - } else /* if (jk_size == 4) */ { - - dp = ((mlib_addr)dl & 7) ? buffe : (mlib_d64*)dl; - - s01 = buff0[0]; - s11 = buff1[0]; - s21 = buff2[0]; - s31 = buff3[0]; -#pragma pipeloop(0) - for (i = 0; i < xsize/8; i++) { - s00 = s01; - s10 = s11; - s20 = s21; - s30 = s31; - s01 = buff0[i + 1]; - s11 = buff1[i + 1]; - s21 = buff2[i + 1]; - s31 = buff3[i + 1]; - s0 = vis_faligndata(s00, s01); - s1 = vis_faligndata(s10, s11); - s2 = vis_faligndata(s20, s21); - s3 = vis_faligndata(s30, s31); - - d00 = vis_fmul8x16au(vis_read_hi(s0), k0); - d01 = vis_fmul8x16au(vis_read_lo(s0), k0); - d10 = vis_fmul8x16au(vis_read_hi(s1), k1); - d11 = vis_fmul8x16au(vis_read_lo(s1), k1); - d20 = vis_fmul8x16au(vis_read_hi(s2), k2); - d21 = vis_fmul8x16au(vis_read_lo(s2), k2); - d30 = vis_fmul8x16au(vis_read_hi(s3), k3); - d31 = vis_fmul8x16au(vis_read_lo(s3), k3); - - d0 = buffd[2*i]; - d1 = buffd[2*i + 1]; - d0 = vis_fpadd16(d0, d00); - d0 = vis_fpadd16(d0, d10); - d0 = vis_fpadd16(d0, d20); - d0 = vis_fpadd16(d0, d30); - d1 = vis_fpadd16(d1, d01); - d1 = vis_fpadd16(d1, d11); - d1 = vis_fpadd16(d1, d21); - d1 = vis_fpadd16(d1, d31); - - dd = vis_fpack16_pair(d0, d1); - dp[i] = dd; - - buffd[2*i ] = drnd; - buffd[2*i + 1] = drnd; - } - - if (emask) { - s00 = s01; - s10 = s11; - s20 = s21; - s30 = s31; - s01 = buff0[i + 1]; - s11 = buff1[i + 1]; - s21 = buff2[i + 1]; - s31 = buff3[i + 1]; - s0 = vis_faligndata(s00, s01); - s1 = vis_faligndata(s10, s11); - s2 = vis_faligndata(s20, s21); - s3 = vis_faligndata(s30, s31); - - d00 = vis_fmul8x16au(vis_read_hi(s0), k0); - d01 = vis_fmul8x16au(vis_read_lo(s0), k0); - d10 = vis_fmul8x16au(vis_read_hi(s1), k1); - d11 = vis_fmul8x16au(vis_read_lo(s1), k1); - d20 = vis_fmul8x16au(vis_read_hi(s2), k2); - d21 = vis_fmul8x16au(vis_read_lo(s2), k2); - d30 = vis_fmul8x16au(vis_read_hi(s3), k3); - d31 = vis_fmul8x16au(vis_read_lo(s3), k3); - - d0 = buffd[2*i]; - d1 = buffd[2*i + 1]; - d0 = vis_fpadd16(d0, d00); - d0 = vis_fpadd16(d0, d10); - d0 = vis_fpadd16(d0, d20); - d0 = vis_fpadd16(d0, d30); - d1 = vis_fpadd16(d1, d01); - d1 = vis_fpadd16(d1, d11); - d1 = vis_fpadd16(d1, d21); - d1 = vis_fpadd16(d1, d31); - - dd = vis_fpack16_pair(d0, d1); - vis_pst_8(dd, dp + i, emask); - - buffd[2*i ] = drnd; - buffd[2*i + 1] = drnd; - } - - if ((mlib_u8*)dp != dl) mlib_ImageCopy_na((void*)buffe, dl, xsize); - } - -#else /* CONV_INDEX */ - - if (jk_size == 2) { - vis_write_gsr(gsr_scale + 7); - -#pragma pipeloop(0) - for (i = 0; i < dsize; i += 3) { - mlib_d64 d00, d01, d02, d03, d04, d05; - mlib_d64 d10, d11, d12, d13, d14, d15; - mlib_d64 d0, d1, d2, d3, d4, d5; - mlib_d64 s00 = buff0[i]; - mlib_d64 s01 = buff0[i + 1]; - mlib_d64 s02 = buff0[i + 2]; - mlib_d64 s10 = buff1[i]; - mlib_d64 s11 = buff1[i + 1]; - mlib_d64 s12 = buff1[i + 2]; - - d00 = vis_fmul8x16au(vis_read_hi(s00), k0); - d01 = vis_fmul8x16au(vis_read_lo(s00), k0); - d02 = vis_fmul8x16au(vis_read_hi(s01), k0); - d03 = vis_fmul8x16au(vis_read_lo(s01), k0); - d04 = vis_fmul8x16au(vis_read_hi(s02), k0); - d05 = vis_fmul8x16au(vis_read_lo(s02), k0); - d10 = vis_fmul8x16au(vis_read_hi(s10), k1); - d11 = vis_fmul8x16au(vis_read_lo(s10), k1); - d12 = vis_fmul8x16au(vis_read_hi(s11), k1); - d13 = vis_fmul8x16au(vis_read_lo(s11), k1); - d14 = vis_fmul8x16au(vis_read_hi(s12), k1); - d15 = vis_fmul8x16au(vis_read_lo(s12), k1); - - d0 = buffd[2*i]; - d1 = buffd[2*i + 1]; - d2 = buffd[2*i + 2]; - d3 = buffd[2*i + 3]; - d4 = buffd[2*i + 4]; - d5 = buffd[2*i + 5]; - d0 = vis_fpadd16(d0, d00); - d0 = vis_fpadd16(d0, d10); - d1 = vis_fpadd16(d1, d01); - d1 = vis_fpadd16(d1, d11); - d2 = vis_fpadd16(d2, d02); - d2 = vis_fpadd16(d2, d12); - d3 = vis_fpadd16(d3, d03); - d3 = vis_fpadd16(d3, d13); - d4 = vis_fpadd16(d4, d04); - d4 = vis_fpadd16(d4, d14); - d5 = vis_fpadd16(d5, d05); - d5 = vis_fpadd16(d5, d15); - - buffe[i ] = vis_fpack16_pair(d0, d1); - buffe[i + 1] = vis_fpack16_pair(d2, d3); - buffe[i + 2] = vis_fpack16_pair(d4, d5); - - buffd[2*i ] = drnd; - buffd[2*i + 1] = drnd; - buffd[2*i + 2] = drnd; - buffd[2*i + 3] = drnd; - buffd[2*i + 4] = drnd; - buffd[2*i + 5] = drnd; - - LOAD_SRC(); - } - - } else /* if (jk_size == 3) */ { - vis_write_gsr(gsr_scale + 7); - -#pragma pipeloop(0) - for (i = 0; i < dsize; i += 3) { - mlib_d64 d00, d01, d02, d03, d04, d05; - mlib_d64 d10, d11, d12, d13, d14, d15; - mlib_d64 d20, d21, d22, d23, d24, d25; - mlib_d64 d0, d1, d2, d3, d4, d5; - mlib_d64 s00 = buff0[i]; - mlib_d64 s01 = buff0[i + 1]; - mlib_d64 s02 = buff0[i + 2]; - mlib_d64 s10 = buff1[i]; - mlib_d64 s11 = buff1[i + 1]; - mlib_d64 s12 = buff1[i + 2]; - mlib_d64 s20 = buff2[i]; - mlib_d64 s21 = buff2[i + 1]; - mlib_d64 s22 = buff2[i + 2]; - - d00 = vis_fmul8x16au(vis_read_hi(s00), k0); - d01 = vis_fmul8x16au(vis_read_lo(s00), k0); - d02 = vis_fmul8x16au(vis_read_hi(s01), k0); - d03 = vis_fmul8x16au(vis_read_lo(s01), k0); - d04 = vis_fmul8x16au(vis_read_hi(s02), k0); - d05 = vis_fmul8x16au(vis_read_lo(s02), k0); - d10 = vis_fmul8x16au(vis_read_hi(s10), k1); - d11 = vis_fmul8x16au(vis_read_lo(s10), k1); - d12 = vis_fmul8x16au(vis_read_hi(s11), k1); - d13 = vis_fmul8x16au(vis_read_lo(s11), k1); - d14 = vis_fmul8x16au(vis_read_hi(s12), k1); - d15 = vis_fmul8x16au(vis_read_lo(s12), k1); - d20 = vis_fmul8x16au(vis_read_hi(s20), k2); - d21 = vis_fmul8x16au(vis_read_lo(s20), k2); - d22 = vis_fmul8x16au(vis_read_hi(s21), k2); - d23 = vis_fmul8x16au(vis_read_lo(s21), k2); - d24 = vis_fmul8x16au(vis_read_hi(s22), k2); - d25 = vis_fmul8x16au(vis_read_lo(s22), k2); - - d0 = buffd[2*i]; - d1 = buffd[2*i + 1]; - d2 = buffd[2*i + 2]; - d3 = buffd[2*i + 3]; - d4 = buffd[2*i + 4]; - d5 = buffd[2*i + 5]; - d0 = vis_fpadd16(d0, d00); - d0 = vis_fpadd16(d0, d10); - d0 = vis_fpadd16(d0, d20); - d1 = vis_fpadd16(d1, d01); - d1 = vis_fpadd16(d1, d11); - d1 = vis_fpadd16(d1, d21); - d2 = vis_fpadd16(d2, d02); - d2 = vis_fpadd16(d2, d12); - d2 = vis_fpadd16(d2, d22); - d3 = vis_fpadd16(d3, d03); - d3 = vis_fpadd16(d3, d13); - d3 = vis_fpadd16(d3, d23); - d4 = vis_fpadd16(d4, d04); - d4 = vis_fpadd16(d4, d14); - d4 = vis_fpadd16(d4, d24); - d5 = vis_fpadd16(d5, d05); - d5 = vis_fpadd16(d5, d15); - d5 = vis_fpadd16(d5, d25); - - buffe[i ] = vis_fpack16_pair(d0, d1); - buffe[i + 1] = vis_fpack16_pair(d2, d3); - buffe[i + 2] = vis_fpack16_pair(d4, d5); - - buffd[2*i ] = drnd; - buffd[2*i + 1] = drnd; - buffd[2*i + 2] = drnd; - buffd[2*i + 3] = drnd; - buffd[2*i + 4] = drnd; - buffd[2*i + 5] = drnd; - - LOAD_SRC(); - } - } -#endif /* CONV_INDEX */ - -#ifdef CONV_INDEX - mlib_ImageColorTrue2IndexLine_U8_U8_3((void*)buffe, dl, wid, colormap); -#endif /* CONV_INDEX */ - - sl += sll; - dl += dll; - - buff_ind++; - if (buff_ind >= (n + 1)) buff_ind = 0; - } - - mlib_free(pbuff); - if (buffs != buffs_local) mlib_free(buffs); - - return MLIB_SUCCESS; -} - -/***************************************************************/ diff -r 3e22d8fd4912 -r acea5f7d354b jdk/src/java.desktop/unix/native/libmlib_image/mlib_v_ImageConv_8nw.c --- a/jdk/src/java.desktop/unix/native/libmlib_image/mlib_v_ImageConv_8nw.c Thu May 12 11:03:07 2016 -0700 +++ b/jdk/src/java.desktop/unix/native/libmlib_image/mlib_v_ImageConv_8nw.c Fri May 13 11:31:05 2016 +0300 @@ -34,58 +34,16 @@ #include "vis_proto.h" #include "mlib_image.h" #include "mlib_ImageCheck.h" -#include "mlib_ImageColormap.h" #include "mlib_ImageCopy.h" #include "mlib_ImageConv.h" #include "mlib_v_ImageConv.h" -/* - This defines switches between functions in - files: mlib_v_ImageConvIndex3_8_8nw.c, - mlib_v_ImageConvIndex4_8_8nw.c, - mlib_v_ImageConvIndex3_8_16nw.c, - mlib_v_ImageConvIndex4_8_16nw.c -*/ - -/*#define CONV_INDEX*/ - /***************************************************************/ #define DTYPE mlib_u8 -#define LTYPE mlib_u8 /***************************************************************/ -#ifdef CONV_INDEX - -#define CONV_FUNC(KERN) \ - mlib_conv##KERN##_Index3_8_8nw(mlib_image *dst, \ - const mlib_image *src, \ - const mlib_s32 *kern, \ - mlib_s32 scale, \ - const void *colormap) - -#else - -#define CONV_FUNC(KERN) \ - mlib_conv##KERN##_8nw_f(mlib_image *dst, \ - const mlib_image *src, \ - const mlib_s32 *kern, \ - mlib_s32 scale) - -#endif /* CONV_INDEX */ - -#define ColorTrue2IndexLine mlib_ImageColorTrue2IndexLine_U8_U8_3 - -/***************************************************************/ -#ifdef CONV_INDEX - -#define NCHAN 3 - -#else - #define NCHAN nchan -#endif /* CONV_INDEX */ - /***************************************************************/ #define DEF_VARS \ DTYPE *sl, *sp, *dl; \ @@ -103,104 +61,9 @@ mlib_s32 i, j, l /***************************************************************/ -#ifdef CONV_INDEX - -#define DEF_EXTRA_VARS \ - mlib_s32 offset = mlib_ImageGetLutOffset(colormap); \ - LTYPE **lut_table = (LTYPE**)mlib_ImageGetLutData(colormap); \ - LTYPE *ltbl0 = lut_table[0] - offset; \ - LTYPE *ltbl1 = lut_table[1] - offset; \ - LTYPE *ltbl2 = lut_table[2] - offset; \ - LTYPE *ltbl3 = (NCHAN > 3) ? lut_table[3] - offset : ltbl2 - -#else - #define DEF_EXTRA_VARS \ mlib_s32 nchan = mlib_ImageGetChannels(dst) -#endif /* CONV_INDEX */ - -/***************************************************************/ -#if NCHAN == 3 - -#define LOAD_SRC() { \ - mlib_s32 s0 = sp[0], s1 = sp[1], s2 = sp[2], s3 = sp[3]; \ - mlib_s32 s4 = sp[4], s5 = sp[5], s6 = sp[6], s7 = sp[7]; \ - mlib_d64 t0, t1, t2; \ - \ - t2 = vis_faligndata(vis_ld_u8_i(ltbl2, s7), t2); \ - t2 = vis_faligndata(vis_ld_u8_i(ltbl1, s7), t2); \ - t2 = vis_faligndata(vis_ld_u8_i(ltbl0, s7), t2); \ - t2 = vis_faligndata(vis_ld_u8_i(ltbl2, s6), t2); \ - t2 = vis_faligndata(vis_ld_u8_i(ltbl1, s6), t2); \ - t2 = vis_faligndata(vis_ld_u8_i(ltbl0, s6), t2); \ - t2 = vis_faligndata(vis_ld_u8_i(ltbl2, s5), t2); \ - t2 = vis_faligndata(vis_ld_u8_i(ltbl1, s5), t2); \ - t1 = vis_faligndata(vis_ld_u8_i(ltbl0, s5), t1); \ - t1 = vis_faligndata(vis_ld_u8_i(ltbl2, s4), t1); \ - t1 = vis_faligndata(vis_ld_u8_i(ltbl1, s4), t1); \ - t1 = vis_faligndata(vis_ld_u8_i(ltbl0, s4), t1); \ - t1 = vis_faligndata(vis_ld_u8_i(ltbl2, s3), t1); \ - t1 = vis_faligndata(vis_ld_u8_i(ltbl1, s3), t1); \ - t1 = vis_faligndata(vis_ld_u8_i(ltbl0, s3), t1); \ - t1 = vis_faligndata(vis_ld_u8_i(ltbl2, s2), t1); \ - t0 = vis_faligndata(vis_ld_u8_i(ltbl1, s2), t0); \ - t0 = vis_faligndata(vis_ld_u8_i(ltbl0, s2), t0); \ - t0 = vis_faligndata(vis_ld_u8_i(ltbl2, s1), t0); \ - t0 = vis_faligndata(vis_ld_u8_i(ltbl1, s1), t0); \ - t0 = vis_faligndata(vis_ld_u8_i(ltbl0, s1), t0); \ - t0 = vis_faligndata(vis_ld_u8_i(ltbl2, s0), t0); \ - t0 = vis_faligndata(vis_ld_u8_i(ltbl1, s0), t0); \ - t0 = vis_faligndata(vis_ld_u8_i(ltbl0, s0), t0); \ - \ - buffn[i] = t0; \ - buffn[i + 1] = t1; \ - buffn[i + 2] = t2; \ - \ - sp += 8; \ - } - -#else - -#define LOAD_SRC() { \ - mlib_s32 s0 = sp[0], s1 = sp[1], s2 = sp[2], s3 = sp[3]; \ - mlib_s32 s4 = sp[4], s5 = sp[5], s6 = sp[6], s7 = sp[7]; \ - mlib_d64 t0, t1, t2; \ - \ - t2 = vis_faligndata(vis_ld_u8_i(ltbl3, s5), t2); \ - t2 = vis_faligndata(vis_ld_u8_i(ltbl2, s5), t2); \ - t2 = vis_faligndata(vis_ld_u8_i(ltbl1, s5), t2); \ - t2 = vis_faligndata(vis_ld_u8_i(ltbl0, s5), t2); \ - t2 = vis_faligndata(vis_ld_u8_i(ltbl3, s4), t2); \ - t2 = vis_faligndata(vis_ld_u8_i(ltbl2, s4), t2); \ - t2 = vis_faligndata(vis_ld_u8_i(ltbl1, s4), t2); \ - t2 = vis_faligndata(vis_ld_u8_i(ltbl0, s4), t2); \ - t1 = vis_faligndata(vis_ld_u8_i(ltbl3, s3), t1); \ - t1 = vis_faligndata(vis_ld_u8_i(ltbl2, s3), t1); \ - t1 = vis_faligndata(vis_ld_u8_i(ltbl1, s3), t1); \ - t1 = vis_faligndata(vis_ld_u8_i(ltbl0, s3), t1); \ - t1 = vis_faligndata(vis_ld_u8_i(ltbl3, s2), t1); \ - t1 = vis_faligndata(vis_ld_u8_i(ltbl2, s2), t1); \ - t1 = vis_faligndata(vis_ld_u8_i(ltbl1, s2), t1); \ - t1 = vis_faligndata(vis_ld_u8_i(ltbl0, s2), t1); \ - t0 = vis_faligndata(vis_ld_u8_i(ltbl3, s1), t0); \ - t0 = vis_faligndata(vis_ld_u8_i(ltbl2, s1), t0); \ - t0 = vis_faligndata(vis_ld_u8_i(ltbl1, s1), t0); \ - t0 = vis_faligndata(vis_ld_u8_i(ltbl0, s1), t0); \ - t0 = vis_faligndata(vis_ld_u8_i(ltbl3, s0), t0); \ - t0 = vis_faligndata(vis_ld_u8_i(ltbl2, s0), t0); \ - t0 = vis_faligndata(vis_ld_u8_i(ltbl1, s0), t0); \ - t0 = vis_faligndata(vis_ld_u8_i(ltbl0, s0), t0); \ - \ - buffn[i] = t0; \ - buffn[i + 1] = t1; \ - buffn[i + 2] = t2; \ - \ - sp += 6; \ - } - -#endif /* NCHAN == 3 */ - /***************************************************************/ static const mlib_s32 mlib_round_8[16] = { 0x00400040, 0x00200020, 0x00100010, 0x00080008, @@ -210,651 +73,8 @@ }; /***************************************************************/ -#define KSIZE 2 - -mlib_status CONV_FUNC(2x2) -{ - mlib_d64 *buffs[2 * (KSIZE + 1)]; - mlib_d64 *buff0, *buff1, *buffn, *buffd, *buffe; - mlib_d64 s00, s01, s10, s11, s0, s1; - mlib_d64 d0, d1, d00, d01, d10, d11; - DEF_VARS; - DEF_EXTRA_VARS; - - sl = adr_src; - dl = adr_dst; - - ssize = NCHAN * wid; - dsize = (ssize + 7) / 8; - esize = dsize + 4; - pbuff = mlib_malloc((KSIZE + 4) * esize * sizeof(mlib_d64)); - - if (pbuff == NULL) - return MLIB_FAILURE; - - for (i = 0; i < (KSIZE + 1); i++) - buffs[i] = pbuff + i * esize; - for (i = 0; i < (KSIZE + 1); i++) - buffs[(KSIZE + 1) + i] = buffs[i]; - buffd = buffs[KSIZE] + esize; - buffe = buffd + 2 * esize; - - wid -= (KSIZE - 1); - hgt -= (KSIZE - 1); - xsize = ssize - NCHAN * (KSIZE - 1); - emask = (0xFF00 >> (xsize & 7)) & 0xFF; - - vis_write_gsr(gsr_scale + 7); - - for (l = 0; l < KSIZE; l++) { - mlib_d64 *buffn = buffs[l]; - sp = sl + l * sll; - -#ifndef CONV_INDEX - - if ((mlib_addr) sp & 7) - mlib_ImageCopy_na((void *)sp, (void *)buffn, ssize); - -#else -#pragma pipeloop(0) - for (i = 0; i < dsize; i += 3) { - LOAD_SRC(); - } - -#endif /* CONV_INDEX */ - } - - for (j = 0; j < hgt; j++) { - mlib_d64 **buffc = buffs + buff_ind; - mlib_f32 *pk = karr, k0, k1; - sp = sl + KSIZE * sll; - - buff0 = buffc[0]; - buff1 = buffc[1]; - buffn = buffc[KSIZE]; - -#ifndef CONV_INDEX - - if ((((mlib_addr) (sl)) & 7) == 0) - buff0 = (mlib_d64 *) sl; - - if ((((mlib_addr) (sl + sll)) & 7) == 0) - buff1 = (mlib_d64 *) (sl + sll); - - if ((mlib_addr) sp & 7) - mlib_ImageCopy_na((void *)sp, (void *)buffn, ssize); -#endif /* CONV_INDEX */ - - k0 = pk[1]; - k1 = pk[3]; - vis_write_gsr(gsr_scale + NCHAN); - - s01 = buff0[0]; - s11 = buff1[0]; -#pragma pipeloop(0) - for (i = 0; i < (xsize + 7) / 8; i++) { - s00 = s01; - s10 = s11; - s01 = buff0[i + 1]; - s11 = buff1[i + 1]; - s0 = vis_faligndata(s00, s01); - s1 = vis_faligndata(s10, s11); - - d00 = vis_fmul8x16au(vis_read_hi(s0), k0); - d01 = vis_fmul8x16au(vis_read_lo(s0), k0); - d10 = vis_fmul8x16au(vis_read_hi(s1), k1); - d11 = vis_fmul8x16au(vis_read_lo(s1), k1); - - d0 = vis_fpadd16(d00, d10); - d1 = vis_fpadd16(d01, d11); - buffd[2 * i] = d0; - buffd[2 * i + 1] = d1; - } - - k0 = pk[0]; - k1 = pk[2]; -#ifndef CONV_INDEX - dp = ((mlib_addr) dl & 7) ? buffe : (mlib_d64 *) dl; - -#pragma pipeloop(0) - for (i = 0; i < xsize / 8; i++) { - s0 = buff0[i]; - s1 = buff1[i]; - - d00 = vis_fmul8x16au(vis_read_hi(s0), k0); - d01 = vis_fmul8x16au(vis_read_lo(s0), k0); - d10 = vis_fmul8x16au(vis_read_hi(s1), k1); - d11 = vis_fmul8x16au(vis_read_lo(s1), k1); - - d0 = buffd[2 * i]; - d1 = buffd[2 * i + 1]; - d00 = vis_fpadd16(d00, d10); - d0 = vis_fpadd16(d0, drnd); - d0 = vis_fpadd16(d0, d00); - d01 = vis_fpadd16(d01, d11); - d1 = vis_fpadd16(d1, drnd); - d1 = vis_fpadd16(d1, d01); - dp[i] = vis_fpack16_pair(d0, d1); - } - - if (emask) { - s0 = buff0[i]; - s1 = buff1[i]; - - d00 = vis_fmul8x16au(vis_read_hi(s0), k0); - d01 = vis_fmul8x16au(vis_read_lo(s0), k0); - d10 = vis_fmul8x16au(vis_read_hi(s1), k1); - d11 = vis_fmul8x16au(vis_read_lo(s1), k1); - - d0 = buffd[2 * i]; - d1 = buffd[2 * i + 1]; - d00 = vis_fpadd16(d00, d10); - d0 = vis_fpadd16(d0, drnd); - d0 = vis_fpadd16(d0, d00); - d01 = vis_fpadd16(d01, d11); - d1 = vis_fpadd16(d1, drnd); - d1 = vis_fpadd16(d1, d01); - - d0 = vis_fpack16_pair(d0, d1); - vis_pst_8(d0, dp + i, emask); - } - - if ((mlib_u8 *) dp != dl) - mlib_ImageCopy_na((void *)buffe, dl, xsize); - -#else - vis_write_gsr(gsr_scale + 7); - -#pragma pipeloop(0) - for (i = 0; i < dsize; i += 3) { - mlib_d64 d00, d01, d02, d03, d04, d05; - mlib_d64 d10, d11, d12, d13, d14, d15; - mlib_d64 d0, d1, d2, d3, d4, d5; - mlib_d64 s00 = buff0[i]; - mlib_d64 s01 = buff0[i + 1]; - mlib_d64 s02 = buff0[i + 2]; - mlib_d64 s10 = buff1[i]; - mlib_d64 s11 = buff1[i + 1]; - mlib_d64 s12 = buff1[i + 2]; - - d00 = vis_fmul8x16au(vis_read_hi(s00), k0); - d01 = vis_fmul8x16au(vis_read_lo(s00), k0); - d02 = vis_fmul8x16au(vis_read_hi(s01), k0); - d03 = vis_fmul8x16au(vis_read_lo(s01), k0); - d04 = vis_fmul8x16au(vis_read_hi(s02), k0); - d05 = vis_fmul8x16au(vis_read_lo(s02), k0); - d10 = vis_fmul8x16au(vis_read_hi(s10), k1); - d11 = vis_fmul8x16au(vis_read_lo(s10), k1); - d12 = vis_fmul8x16au(vis_read_hi(s11), k1); - d13 = vis_fmul8x16au(vis_read_lo(s11), k1); - d14 = vis_fmul8x16au(vis_read_hi(s12), k1); - d15 = vis_fmul8x16au(vis_read_lo(s12), k1); - - d0 = buffd[2 * i]; - d1 = buffd[2 * i + 1]; - d2 = buffd[2 * i + 2]; - d3 = buffd[2 * i + 3]; - d4 = buffd[2 * i + 4]; - d5 = buffd[2 * i + 5]; - d00 = vis_fpadd16(d00, d10); - d0 = vis_fpadd16(d0, drnd); - d0 = vis_fpadd16(d0, d00); - d01 = vis_fpadd16(d01, d11); - d1 = vis_fpadd16(d1, drnd); - d1 = vis_fpadd16(d1, d01); - d02 = vis_fpadd16(d02, d12); - d2 = vis_fpadd16(d2, drnd); - d2 = vis_fpadd16(d2, d02); - d03 = vis_fpadd16(d03, d13); - d3 = vis_fpadd16(d3, drnd); - d3 = vis_fpadd16(d3, d03); - d04 = vis_fpadd16(d04, d14); - d4 = vis_fpadd16(d4, drnd); - d4 = vis_fpadd16(d4, d04); - d05 = vis_fpadd16(d05, d15); - d5 = vis_fpadd16(d5, drnd); - d5 = vis_fpadd16(d5, d05); - - buffe[i] = vis_fpack16_pair(d0, d1); - buffe[i + 1] = vis_fpack16_pair(d2, d3); - buffe[i + 2] = vis_fpack16_pair(d4, d5); - - LOAD_SRC(); - } - - ColorTrue2IndexLine((void *)buffe, dl, wid, colormap); -#endif /* CONV_INDEX */ - - sl += sll; - dl += dll; - - buff_ind++; - - if (buff_ind >= (KSIZE + 1)) - buff_ind = 0; - } - - mlib_free(pbuff); - - return MLIB_SUCCESS; -} - -/***************************************************************/ -#undef KSIZE -#define KSIZE 3 - -mlib_status CONV_FUNC(3x3) -{ - mlib_d64 *buffs[2 * (KSIZE + 1)]; - mlib_d64 *buff0, *buff1, *buff2, *buffn, *buffd, *buffe; - mlib_d64 s00, s01, s10, s11, s20, s21, s0, s1, s2; - mlib_d64 dd, d0, d1, d00, d01, d10, d11, d20, d21; - mlib_s32 ik, ik_last, off, doff; - DEF_VARS; - DEF_EXTRA_VARS; - - sl = adr_src; -#ifdef CONV_INDEX - dl = adr_dst + ((KSIZE - 1) / 2) * (dll + 1); -#else - dl = adr_dst + ((KSIZE - 1) / 2) * (dll + NCHAN); -#endif /* CONV_INDEX */ - - ssize = NCHAN * wid; - dsize = (ssize + 7) / 8; - esize = dsize + 4; - pbuff = mlib_malloc((KSIZE + 4) * esize * sizeof(mlib_d64)); - - if (pbuff == NULL) - return MLIB_FAILURE; - - for (i = 0; i < (KSIZE + 1); i++) - buffs[i] = pbuff + i * esize; - for (i = 0; i < (KSIZE + 1); i++) - buffs[(KSIZE + 1) + i] = buffs[i]; - buffd = buffs[KSIZE] + esize; - buffe = buffd + 2 * esize; - - wid -= (KSIZE - 1); - hgt -= (KSIZE - 1); - xsize = ssize - NCHAN * (KSIZE - 1); - emask = (0xFF00 >> (xsize & 7)) & 0xFF; - - vis_write_gsr(gsr_scale + 7); - - for (l = 0; l < KSIZE; l++) { - mlib_d64 *buffn = buffs[l]; - sp = sl + l * sll; - -#ifndef CONV_INDEX - - if ((mlib_addr) sp & 7) - mlib_ImageCopy_na((void *)sp, (void *)buffn, ssize); -#else -#pragma pipeloop(0) - for (i = 0; i < dsize; i += 3) { - LOAD_SRC(); - } - -#endif /* CONV_INDEX */ - } - - /* init buffer */ -#pragma pipeloop(0) - for (i = 0; i < (xsize + 7) / 8; i++) { - buffd[2 * i] = drnd; - buffd[2 * i + 1] = drnd; - } - - for (j = 0; j < hgt; j++) { - mlib_d64 **buffc = buffs + buff_ind, *pbuff0, *pbuff1, *pbuff2; - mlib_f32 *pk = karr, k0, k1, k2; - sp = sl + KSIZE * sll; - - pbuff0 = buffc[0]; - pbuff1 = buffc[1]; - pbuff2 = buffc[2]; - buffn = buffc[KSIZE]; - -#ifndef CONV_INDEX - - if ((((mlib_addr) (sl)) & 7) == 0) - pbuff0 = (mlib_d64 *) sl; - - if ((((mlib_addr) (sl + sll)) & 7) == 0) - pbuff1 = (mlib_d64 *) (sl + sll); - - if ((((mlib_addr) (sl + 2 * sll)) & 7) == 0) - pbuff2 = (mlib_d64 *) (sl + 2 * sll); - - if ((mlib_addr) sp & 7) - mlib_ImageCopy_na((void *)sp, (void *)buffn, ssize); -#endif /* CONV_INDEX */ - -#ifdef CONV_INDEX - ik_last = 0; -#else - ik_last = (KSIZE - 1); -#endif /* CONV_INDEX */ - - for (ik = 0; ik < KSIZE; ik++) { - k0 = pk[ik]; - k1 = pk[ik + KSIZE]; - k2 = pk[ik + 2 * KSIZE]; - - off = ik * NCHAN; - doff = off / 8; - off &= 7; - buff0 = pbuff0 + doff; - buff1 = pbuff1 + doff; - buff2 = pbuff2 + doff; - vis_write_gsr(gsr_scale + off); - - if (ik == ik_last) - continue; - /*if (!ik_last) { - * if ((off & 3) || (ik == (KSIZE - 1))) { - * ik_last = ik; - * continue; - * } - * } */ - - if (off == 0) { -#pragma pipeloop(0) - for (i = 0; i < (xsize + 7) / 8; i++) { - s0 = buff0[i]; - s1 = buff1[i]; - s2 = buff2[i]; - - d00 = vis_fmul8x16au(vis_read_hi(s0), k0); - d01 = vis_fmul8x16au(vis_read_lo(s0), k0); - d10 = vis_fmul8x16au(vis_read_hi(s1), k1); - d11 = vis_fmul8x16au(vis_read_lo(s1), k1); - d20 = vis_fmul8x16au(vis_read_hi(s2), k2); - d21 = vis_fmul8x16au(vis_read_lo(s2), k2); - - d0 = buffd[2 * i]; - d1 = buffd[2 * i + 1]; - d0 = vis_fpadd16(d00, d0); - d0 = vis_fpadd16(d10, d0); - d0 = vis_fpadd16(d20, d0); - d1 = vis_fpadd16(d01, d1); - d1 = vis_fpadd16(d11, d1); - d1 = vis_fpadd16(d21, d1); - buffd[2 * i] = d0; - buffd[2 * i + 1] = d1; - } - } - else if (off == 4) { - s01 = buff0[0]; - s11 = buff1[0]; - s21 = buff2[0]; -#pragma pipeloop(0) - for (i = 0; i < (xsize + 7) / 8; i++) { - s00 = s01; - s10 = s11; - s20 = s21; - s01 = buff0[i + 1]; - s11 = buff1[i + 1]; - s21 = buff2[i + 1]; - - d00 = vis_fmul8x16au(vis_read_lo(s00), k0); - d01 = vis_fmul8x16au(vis_read_hi(s01), k0); - d10 = vis_fmul8x16au(vis_read_lo(s10), k1); - d11 = vis_fmul8x16au(vis_read_hi(s11), k1); - d20 = vis_fmul8x16au(vis_read_lo(s20), k2); - d21 = vis_fmul8x16au(vis_read_hi(s21), k2); - - d0 = buffd[2 * i]; - d1 = buffd[2 * i + 1]; - d0 = vis_fpadd16(d00, d0); - d0 = vis_fpadd16(d10, d0); - d0 = vis_fpadd16(d20, d0); - d1 = vis_fpadd16(d01, d1); - d1 = vis_fpadd16(d11, d1); - d1 = vis_fpadd16(d21, d1); - buffd[2 * i] = d0; - buffd[2 * i + 1] = d1; - } - } - else { - s01 = buff0[0]; - s11 = buff1[0]; - s21 = buff2[0]; -#pragma pipeloop(0) - for (i = 0; i < (xsize + 7) / 8; i++) { - s00 = s01; - s10 = s11; - s20 = s21; - s01 = buff0[i + 1]; - s11 = buff1[i + 1]; - s21 = buff2[i + 1]; - s0 = vis_faligndata(s00, s01); - s1 = vis_faligndata(s10, s11); - s2 = vis_faligndata(s20, s21); - - d00 = vis_fmul8x16au(vis_read_hi(s0), k0); - d01 = vis_fmul8x16au(vis_read_lo(s0), k0); - d10 = vis_fmul8x16au(vis_read_hi(s1), k1); - d11 = vis_fmul8x16au(vis_read_lo(s1), k1); - d20 = vis_fmul8x16au(vis_read_hi(s2), k2); - d21 = vis_fmul8x16au(vis_read_lo(s2), k2); - - d0 = buffd[2 * i]; - d1 = buffd[2 * i + 1]; - d0 = vis_fpadd16(d00, d0); - d0 = vis_fpadd16(d10, d0); - d0 = vis_fpadd16(d20, d0); - d1 = vis_fpadd16(d01, d1); - d1 = vis_fpadd16(d11, d1); - d1 = vis_fpadd16(d21, d1); - buffd[2 * i] = d0; - buffd[2 * i + 1] = d1; - } - } - } - - k0 = pk[ik_last]; - k1 = pk[ik_last + KSIZE]; - k2 = pk[ik_last + 2 * KSIZE]; - - off = ik_last * NCHAN; - doff = off / 8; - off &= 7; - buff0 = pbuff0 + doff; - buff1 = pbuff1 + doff; - buff2 = pbuff2 + doff; - vis_write_gsr(gsr_scale + off); - -#ifndef CONV_INDEX - dp = ((mlib_addr) dl & 7) ? buffe : (mlib_d64 *) dl; - - s01 = buff0[0]; - s11 = buff1[0]; - s21 = buff2[0]; -#pragma pipeloop(0) - for (i = 0; i < xsize / 8; i++) { - s00 = s01; - s10 = s11; - s20 = s21; - s01 = buff0[i + 1]; - s11 = buff1[i + 1]; - s21 = buff2[i + 1]; - s0 = vis_faligndata(s00, s01); - s1 = vis_faligndata(s10, s11); - s2 = vis_faligndata(s20, s21); - - d00 = vis_fmul8x16au(vis_read_hi(s0), k0); - d01 = vis_fmul8x16au(vis_read_lo(s0), k0); - d10 = vis_fmul8x16au(vis_read_hi(s1), k1); - d11 = vis_fmul8x16au(vis_read_lo(s1), k1); - d20 = vis_fmul8x16au(vis_read_hi(s2), k2); - d21 = vis_fmul8x16au(vis_read_lo(s2), k2); - - d0 = buffd[2 * i]; - d1 = buffd[2 * i + 1]; - d0 = vis_fpadd16(d0, d00); - d0 = vis_fpadd16(d0, d10); - d0 = vis_fpadd16(d0, d20); - d1 = vis_fpadd16(d1, d01); - d1 = vis_fpadd16(d1, d11); - d1 = vis_fpadd16(d1, d21); - - dd = vis_fpack16_pair(d0, d1); - dp[i] = dd; - - buffd[2 * i] = drnd; - buffd[2 * i + 1] = drnd; - } - - if (emask) { - s00 = s01; - s10 = s11; - s20 = s21; - s01 = buff0[i + 1]; - s11 = buff1[i + 1]; - s21 = buff2[i + 1]; - s0 = vis_faligndata(s00, s01); - s1 = vis_faligndata(s10, s11); - s2 = vis_faligndata(s20, s21); - - d00 = vis_fmul8x16au(vis_read_hi(s0), k0); - d01 = vis_fmul8x16au(vis_read_lo(s0), k0); - d10 = vis_fmul8x16au(vis_read_hi(s1), k1); - d11 = vis_fmul8x16au(vis_read_lo(s1), k1); - d20 = vis_fmul8x16au(vis_read_hi(s2), k2); - d21 = vis_fmul8x16au(vis_read_lo(s2), k2); - - d0 = buffd[2 * i]; - d1 = buffd[2 * i + 1]; - d0 = vis_fpadd16(d0, d00); - d0 = vis_fpadd16(d0, d10); - d0 = vis_fpadd16(d0, d20); - d1 = vis_fpadd16(d1, d01); - d1 = vis_fpadd16(d1, d11); - d1 = vis_fpadd16(d1, d21); - - dd = vis_fpack16_pair(d0, d1); - vis_pst_8(dd, dp + i, emask); - - buffd[2 * i] = drnd; - buffd[2 * i + 1] = drnd; - } - - if ((mlib_u8 *) dp != dl) - mlib_ImageCopy_na((void *)buffe, dl, xsize); - -#else - vis_write_gsr(gsr_scale + 7); - -#pragma pipeloop(0) - for (i = 0; i < dsize; i += 3) { - mlib_d64 d00, d01, d02, d03, d04, d05; - mlib_d64 d10, d11, d12, d13, d14, d15; - mlib_d64 d20, d21, d22, d23, d24, d25; - mlib_d64 d0, d1, d2, d3, d4, d5; - mlib_d64 s00 = buff0[i]; - mlib_d64 s01 = buff0[i + 1]; - mlib_d64 s02 = buff0[i + 2]; - mlib_d64 s10 = buff1[i]; - mlib_d64 s11 = buff1[i + 1]; - mlib_d64 s12 = buff1[i + 2]; - mlib_d64 s20 = buff2[i]; - mlib_d64 s21 = buff2[i + 1]; - mlib_d64 s22 = buff2[i + 2]; - - d00 = vis_fmul8x16au(vis_read_hi(s00), k0); - d01 = vis_fmul8x16au(vis_read_lo(s00), k0); - d02 = vis_fmul8x16au(vis_read_hi(s01), k0); - d03 = vis_fmul8x16au(vis_read_lo(s01), k0); - d04 = vis_fmul8x16au(vis_read_hi(s02), k0); - d05 = vis_fmul8x16au(vis_read_lo(s02), k0); - d10 = vis_fmul8x16au(vis_read_hi(s10), k1); - d11 = vis_fmul8x16au(vis_read_lo(s10), k1); - d12 = vis_fmul8x16au(vis_read_hi(s11), k1); - d13 = vis_fmul8x16au(vis_read_lo(s11), k1); - d14 = vis_fmul8x16au(vis_read_hi(s12), k1); - d15 = vis_fmul8x16au(vis_read_lo(s12), k1); - d20 = vis_fmul8x16au(vis_read_hi(s20), k2); - d21 = vis_fmul8x16au(vis_read_lo(s20), k2); - d22 = vis_fmul8x16au(vis_read_hi(s21), k2); - d23 = vis_fmul8x16au(vis_read_lo(s21), k2); - d24 = vis_fmul8x16au(vis_read_hi(s22), k2); - d25 = vis_fmul8x16au(vis_read_lo(s22), k2); - - d0 = buffd[2 * i]; - d1 = buffd[2 * i + 1]; - d2 = buffd[2 * i + 2]; - d3 = buffd[2 * i + 3]; - d4 = buffd[2 * i + 4]; - d5 = buffd[2 * i + 5]; - d0 = vis_fpadd16(d0, d00); - d0 = vis_fpadd16(d0, d10); - d0 = vis_fpadd16(d0, d20); - d1 = vis_fpadd16(d1, d01); - d1 = vis_fpadd16(d1, d11); - d1 = vis_fpadd16(d1, d21); - d2 = vis_fpadd16(d2, d02); - d2 = vis_fpadd16(d2, d12); - d2 = vis_fpadd16(d2, d22); - d3 = vis_fpadd16(d3, d03); - d3 = vis_fpadd16(d3, d13); - d3 = vis_fpadd16(d3, d23); - d4 = vis_fpadd16(d4, d04); - d4 = vis_fpadd16(d4, d14); - d4 = vis_fpadd16(d4, d24); - d5 = vis_fpadd16(d5, d05); - d5 = vis_fpadd16(d5, d15); - d5 = vis_fpadd16(d5, d25); - - buffe[i] = vis_fpack16_pair(d0, d1); - buffe[i + 1] = vis_fpack16_pair(d2, d3); - buffe[i + 2] = vis_fpack16_pair(d4, d5); - - buffd[2 * i] = drnd; - buffd[2 * i + 1] = drnd; - buffd[2 * i + 2] = drnd; - buffd[2 * i + 3] = drnd; - buffd[2 * i + 4] = drnd; - buffd[2 * i + 5] = drnd; - - LOAD_SRC(); - } - - ColorTrue2IndexLine((void *)buffe, dl, wid, colormap); -#endif /* CONV_INDEX */ - - sl += sll; - dl += dll; - - buff_ind++; - - if (buff_ind >= (KSIZE + 1)) - buff_ind = 0; - } - - mlib_free(pbuff); - - return MLIB_SUCCESS; -} - -/***************************************************************/ -#undef KSIZE #define MAX_N 11 -#ifdef CONV_INDEX - -mlib_status mlib_convMxN_Index3_8_8nw(mlib_image *dst, - const mlib_image *src, - mlib_s32 m, - mlib_s32 n, - mlib_s32 dm, - mlib_s32 dn, - const mlib_s32 *kern, - mlib_s32 scale, - const void *colormap) -#else - mlib_status mlib_convMxN_8nw_f(mlib_image *dst, const mlib_image *src, mlib_s32 m, @@ -863,7 +83,6 @@ mlib_s32 dn, const mlib_s32 *kern, mlib_s32 scale) -#endif /* CONV_INDEX */ { mlib_d64 *buffs_local[3 * (MAX_N + 1)], **buffs = buffs_local, **buff; mlib_d64 *buff0, *buff1, *buff2, *buff3, *buffn, *buffd, *buffe; @@ -884,11 +103,7 @@ buff = buffs + 2 * (n + 1); sl = adr_src; -#ifdef CONV_INDEX - dl = adr_dst + dn * dll + dm; -#else dl = adr_dst + dn * dll + dm * NCHAN; -#endif /* CONV_INDEX */ ssize = NCHAN * wid; dsize = (ssize + 7) / 8; @@ -919,17 +134,8 @@ mlib_d64 *buffn = buffs[l]; sp = sl + l * sll; -#ifndef CONV_INDEX - if ((mlib_addr) sp & 7) mlib_ImageCopy_na((void *)sp, (void *)buffn, ssize); -#else -#pragma pipeloop(0) - for (i = 0; i < dsize; i += 3) { - LOAD_SRC(); - } - -#endif /* CONV_INDEX */ } /* init buffer */ @@ -950,7 +156,6 @@ buffn = buffc[n]; -#ifndef CONV_INDEX for (l = 0; l < n; l++) { if ((((mlib_addr) (sl + l * sll)) & 7) == 0) buff[l] = (mlib_d64 *) (sl + l * sll); @@ -958,31 +163,18 @@ if ((mlib_addr) sp & 7) mlib_ImageCopy_na((void *)sp, (void *)buffn, ssize); -#endif /* CONV_INDEX */ -#ifdef CONV_INDEX - ik_last = 0; -#else ik_last = (m - 1); -#endif /* CONV_INDEX */ for (jk = 0; jk < n; jk += jk_size) { jk_size = n - jk; -#ifdef CONV_INDEX - - if (jk_size >= 5) - jk_size = 3; - - if (jk_size == 4) - jk_size = 2; -#else if (jk_size >= 6) jk_size = 4; if (jk_size == 5) jk_size = 3; -#endif /* CONV_INDEX */ + coff = 0; if (jk_size == 1) { @@ -1335,21 +527,12 @@ *****************************************/ jk_size = n; -#ifdef CONV_INDEX - - if (jk_size >= 5) - jk_size = 3; - - if (jk_size == 4) - jk_size = 2; -#else if (jk_size >= 6) jk_size = 4; if (jk_size == 5) jk_size = 3; -#endif /* CONV_INDEX */ k0 = karr[ik_last]; k1 = karr[ik_last + m]; @@ -1365,8 +548,6 @@ buff3 = buff[3] + doff; vis_write_gsr(gsr_scale + off); -#ifndef CONV_INDEX - if (jk_size == 1) { dp = ((mlib_addr) dl & 7) ? buffe : (mlib_d64 *) dl; @@ -1652,200 +833,6 @@ mlib_ImageCopy_na((void *)buffe, dl, xsize); } -#else /* CONV_INDEX */ - - if (jk_size == 1) { - vis_write_gsr(gsr_scale + 7); - -#pragma pipeloop(0) - for (i = 0; i < dsize; i += 3) { - mlib_d64 d00, d01, d02, d03, d04, d05; - mlib_d64 d10, d11, d12, d13, d14, d15; - mlib_d64 d0, d1, d2, d3, d4, d5; - mlib_d64 s00 = buff0[i]; - mlib_d64 s01 = buff0[i + 1]; - mlib_d64 s02 = buff0[i + 2]; - - d00 = vis_fmul8x16au(vis_read_hi(s00), k0); - d01 = vis_fmul8x16au(vis_read_lo(s00), k0); - d02 = vis_fmul8x16au(vis_read_hi(s01), k0); - d03 = vis_fmul8x16au(vis_read_lo(s01), k0); - d04 = vis_fmul8x16au(vis_read_hi(s02), k0); - d05 = vis_fmul8x16au(vis_read_lo(s02), k0); - - d0 = buffd[2 * i]; - d1 = buffd[2 * i + 1]; - d2 = buffd[2 * i + 2]; - d3 = buffd[2 * i + 3]; - d4 = buffd[2 * i + 4]; - d5 = buffd[2 * i + 5]; - d0 = vis_fpadd16(d0, d00); - d1 = vis_fpadd16(d1, d01); - d2 = vis_fpadd16(d2, d02); - d3 = vis_fpadd16(d3, d03); - d4 = vis_fpadd16(d4, d04); - d5 = vis_fpadd16(d5, d05); - - buffe[i] = vis_fpack16_pair(d0, d1); - buffe[i + 1] = vis_fpack16_pair(d2, d3); - buffe[i + 2] = vis_fpack16_pair(d4, d5); - - buffd[2 * i] = drnd; - buffd[2 * i + 1] = drnd; - buffd[2 * i + 2] = drnd; - buffd[2 * i + 3] = drnd; - buffd[2 * i + 4] = drnd; - buffd[2 * i + 5] = drnd; - - LOAD_SRC(); - } - } - else if (jk_size == 2) { - vis_write_gsr(gsr_scale + 7); - -#pragma pipeloop(0) - for (i = 0; i < dsize; i += 3) { - mlib_d64 d00, d01, d02, d03, d04, d05; - mlib_d64 d10, d11, d12, d13, d14, d15; - mlib_d64 d0, d1, d2, d3, d4, d5; - mlib_d64 s00 = buff0[i]; - mlib_d64 s01 = buff0[i + 1]; - mlib_d64 s02 = buff0[i + 2]; - mlib_d64 s10 = buff1[i]; - mlib_d64 s11 = buff1[i + 1]; - mlib_d64 s12 = buff1[i + 2]; - - d00 = vis_fmul8x16au(vis_read_hi(s00), k0); - d01 = vis_fmul8x16au(vis_read_lo(s00), k0); - d02 = vis_fmul8x16au(vis_read_hi(s01), k0); - d03 = vis_fmul8x16au(vis_read_lo(s01), k0); - d04 = vis_fmul8x16au(vis_read_hi(s02), k0); - d05 = vis_fmul8x16au(vis_read_lo(s02), k0); - d10 = vis_fmul8x16au(vis_read_hi(s10), k1); - d11 = vis_fmul8x16au(vis_read_lo(s10), k1); - d12 = vis_fmul8x16au(vis_read_hi(s11), k1); - d13 = vis_fmul8x16au(vis_read_lo(s11), k1); - d14 = vis_fmul8x16au(vis_read_hi(s12), k1); - d15 = vis_fmul8x16au(vis_read_lo(s12), k1); - - d0 = buffd[2 * i]; - d1 = buffd[2 * i + 1]; - d2 = buffd[2 * i + 2]; - d3 = buffd[2 * i + 3]; - d4 = buffd[2 * i + 4]; - d5 = buffd[2 * i + 5]; - d0 = vis_fpadd16(d0, d00); - d0 = vis_fpadd16(d0, d10); - d1 = vis_fpadd16(d1, d01); - d1 = vis_fpadd16(d1, d11); - d2 = vis_fpadd16(d2, d02); - d2 = vis_fpadd16(d2, d12); - d3 = vis_fpadd16(d3, d03); - d3 = vis_fpadd16(d3, d13); - d4 = vis_fpadd16(d4, d04); - d4 = vis_fpadd16(d4, d14); - d5 = vis_fpadd16(d5, d05); - d5 = vis_fpadd16(d5, d15); - - buffe[i] = vis_fpack16_pair(d0, d1); - buffe[i + 1] = vis_fpack16_pair(d2, d3); - buffe[i + 2] = vis_fpack16_pair(d4, d5); - - buffd[2 * i] = drnd; - buffd[2 * i + 1] = drnd; - buffd[2 * i + 2] = drnd; - buffd[2 * i + 3] = drnd; - buffd[2 * i + 4] = drnd; - buffd[2 * i + 5] = drnd; - - LOAD_SRC(); - } - } - else { /* if (jk_size == 3) */ - - vis_write_gsr(gsr_scale + 7); - -#pragma pipeloop(0) - for (i = 0; i < dsize; i += 3) { - mlib_d64 d00, d01, d02, d03, d04, d05; - mlib_d64 d10, d11, d12, d13, d14, d15; - mlib_d64 d20, d21, d22, d23, d24, d25; - mlib_d64 d0, d1, d2, d3, d4, d5; - mlib_d64 s00 = buff0[i]; - mlib_d64 s01 = buff0[i + 1]; - mlib_d64 s02 = buff0[i + 2]; - mlib_d64 s10 = buff1[i]; - mlib_d64 s11 = buff1[i + 1]; - mlib_d64 s12 = buff1[i + 2]; - mlib_d64 s20 = buff2[i]; - mlib_d64 s21 = buff2[i + 1]; - mlib_d64 s22 = buff2[i + 2]; - - d00 = vis_fmul8x16au(vis_read_hi(s00), k0); - d01 = vis_fmul8x16au(vis_read_lo(s00), k0); - d02 = vis_fmul8x16au(vis_read_hi(s01), k0); - d03 = vis_fmul8x16au(vis_read_lo(s01), k0); - d04 = vis_fmul8x16au(vis_read_hi(s02), k0); - d05 = vis_fmul8x16au(vis_read_lo(s02), k0); - d10 = vis_fmul8x16au(vis_read_hi(s10), k1); - d11 = vis_fmul8x16au(vis_read_lo(s10), k1); - d12 = vis_fmul8x16au(vis_read_hi(s11), k1); - d13 = vis_fmul8x16au(vis_read_lo(s11), k1); - d14 = vis_fmul8x16au(vis_read_hi(s12), k1); - d15 = vis_fmul8x16au(vis_read_lo(s12), k1); - d20 = vis_fmul8x16au(vis_read_hi(s20), k2); - d21 = vis_fmul8x16au(vis_read_lo(s20), k2); - d22 = vis_fmul8x16au(vis_read_hi(s21), k2); - d23 = vis_fmul8x16au(vis_read_lo(s21), k2); - d24 = vis_fmul8x16au(vis_read_hi(s22), k2); - d25 = vis_fmul8x16au(vis_read_lo(s22), k2); - - d0 = buffd[2 * i]; - d1 = buffd[2 * i + 1]; - d2 = buffd[2 * i + 2]; - d3 = buffd[2 * i + 3]; - d4 = buffd[2 * i + 4]; - d5 = buffd[2 * i + 5]; - d0 = vis_fpadd16(d0, d00); - d0 = vis_fpadd16(d0, d10); - d0 = vis_fpadd16(d0, d20); - d1 = vis_fpadd16(d1, d01); - d1 = vis_fpadd16(d1, d11); - d1 = vis_fpadd16(d1, d21); - d2 = vis_fpadd16(d2, d02); - d2 = vis_fpadd16(d2, d12); - d2 = vis_fpadd16(d2, d22); - d3 = vis_fpadd16(d3, d03); - d3 = vis_fpadd16(d3, d13); - d3 = vis_fpadd16(d3, d23); - d4 = vis_fpadd16(d4, d04); - d4 = vis_fpadd16(d4, d14); - d4 = vis_fpadd16(d4, d24); - d5 = vis_fpadd16(d5, d05); - d5 = vis_fpadd16(d5, d15); - d5 = vis_fpadd16(d5, d25); - - buffe[i] = vis_fpack16_pair(d0, d1); - buffe[i + 1] = vis_fpack16_pair(d2, d3); - buffe[i + 2] = vis_fpack16_pair(d4, d5); - - buffd[2 * i] = drnd; - buffd[2 * i + 1] = drnd; - buffd[2 * i + 2] = drnd; - buffd[2 * i + 3] = drnd; - buffd[2 * i + 4] = drnd; - buffd[2 * i + 5] = drnd; - - LOAD_SRC(); - } - } - -#endif /* CONV_INDEX */ - -#ifdef CONV_INDEX - ColorTrue2IndexLine((void *)buffe, dl, wid, colormap); -#endif /* CONV_INDEX */ - sl += sll; dl += dll;