jdk-sandbox: jdk/src/solaris/native/sun/awt/medialib/mlib_v_ImageConvIndex3_8

2 90ce3da70b43 Initial load duke parents: diff changeset	1	/*
90ce3da70b43 Initial load duke parents: diff changeset	2	* Copyright 2000-2003 Sun Microsystems, Inc. All Rights Reserved.
90ce3da70b43 Initial load duke parents: diff changeset	3	* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
90ce3da70b43 Initial load duke parents: diff changeset	4	*
90ce3da70b43 Initial load duke parents: diff changeset	5	* This code is free software; you can redistribute it and/or modify it
90ce3da70b43 Initial load duke parents: diff changeset	6	* under the terms of the GNU General Public License version 2 only, as
90ce3da70b43 Initial load duke parents: diff changeset	7	* published by the Free Software Foundation. Sun designates this
90ce3da70b43 Initial load duke parents: diff changeset	8	* particular file as subject to the "Classpath" exception as provided
90ce3da70b43 Initial load duke parents: diff changeset	9	* by Sun in the LICENSE file that accompanied this code.
90ce3da70b43 Initial load duke parents: diff changeset	10	*
90ce3da70b43 Initial load duke parents: diff changeset	11	* This code is distributed in the hope that it will be useful, but WITHOUT
90ce3da70b43 Initial load duke parents: diff changeset	12	* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
90ce3da70b43 Initial load duke parents: diff changeset	13	* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
90ce3da70b43 Initial load duke parents: diff changeset	14	* version 2 for more details (a copy is included in the LICENSE file that
90ce3da70b43 Initial load duke parents: diff changeset	15	* accompanied this code).
90ce3da70b43 Initial load duke parents: diff changeset	16	*
90ce3da70b43 Initial load duke parents: diff changeset	17	* You should have received a copy of the GNU General Public License version
90ce3da70b43 Initial load duke parents: diff changeset	18	* 2 along with this work; if not, write to the Free Software Foundation,
90ce3da70b43 Initial load duke parents: diff changeset	19	* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
90ce3da70b43 Initial load duke parents: diff changeset	20	*
90ce3da70b43 Initial load duke parents: diff changeset	21	* Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
90ce3da70b43 Initial load duke parents: diff changeset	22	* CA 95054 USA or visit www.sun.com if you need additional information or
90ce3da70b43 Initial load duke parents: diff changeset	23	* have any questions.
90ce3da70b43 Initial load duke parents: diff changeset	24	*/
90ce3da70b43 Initial load duke parents: diff changeset	25
90ce3da70b43 Initial load duke parents: diff changeset	26
90ce3da70b43 Initial load duke parents: diff changeset	27
90ce3da70b43 Initial load duke parents: diff changeset	28	/*
90ce3da70b43 Initial load duke parents: diff changeset	29	* FUNCTION
90ce3da70b43 Initial load duke parents: diff changeset	30	* Internal functions for mlib_ImageConv* on U8 type
90ce3da70b43 Initial load duke parents: diff changeset	31	* and MLIB_EDGE_DST_NO_WRITE mask
90ce3da70b43 Initial load duke parents: diff changeset	32	*
90ce3da70b43 Initial load duke parents: diff changeset	33	*/
90ce3da70b43 Initial load duke parents: diff changeset	34
90ce3da70b43 Initial load duke parents: diff changeset	35	/***************************************************************/
90ce3da70b43 Initial load duke parents: diff changeset	36
90ce3da70b43 Initial load duke parents: diff changeset	37	#include <vis_proto.h>
90ce3da70b43 Initial load duke parents: diff changeset	38	#include <mlib_image.h>
90ce3da70b43 Initial load duke parents: diff changeset	39	#include <mlib_ImageCheck.h>
90ce3da70b43 Initial load duke parents: diff changeset	40	#include <mlib_ImageColormap.h>
90ce3da70b43 Initial load duke parents: diff changeset	41
90ce3da70b43 Initial load duke parents: diff changeset	42	/*
90ce3da70b43 Initial load duke parents: diff changeset	43	This defines switches between functions in
90ce3da70b43 Initial load duke parents: diff changeset	44	files: mlib_v_ImageConv_8nw.c,
90ce3da70b43 Initial load duke parents: diff changeset	45	mlib_v_ImageConvIndex3_8_16nw.c,
90ce3da70b43 Initial load duke parents: diff changeset	46	mlib_v_ImageConvIndex4_8_16nw.c,
90ce3da70b43 Initial load duke parents: diff changeset	47	mlib_v_ImageConvIndex3_8_16nw.c,
90ce3da70b43 Initial load duke parents: diff changeset	48	mlib_v_ImageConvIndex4_8_16nw.c
90ce3da70b43 Initial load duke parents: diff changeset	49	*/
90ce3da70b43 Initial load duke parents: diff changeset	50
90ce3da70b43 Initial load duke parents: diff changeset	51	#define CONV_INDEX
90ce3da70b43 Initial load duke parents: diff changeset	52
90ce3da70b43 Initial load duke parents: diff changeset	53	#define DTYPE mlib_s16
90ce3da70b43 Initial load duke parents: diff changeset	54	#define LTYPE mlib_u8
90ce3da70b43 Initial load duke parents: diff changeset	55
90ce3da70b43 Initial load duke parents: diff changeset	56	/***************************************************************/
90ce3da70b43 Initial load duke parents: diff changeset	57
90ce3da70b43 Initial load duke parents: diff changeset	58	#ifdef CONV_INDEX
90ce3da70b43 Initial load duke parents: diff changeset	59
90ce3da70b43 Initial load duke parents: diff changeset	60	#define CONV_FUNC(KERN) \
90ce3da70b43 Initial load duke parents: diff changeset	61	mlib_conv##KERN##_Index3_8_16nw(mlib_image *dst, \
90ce3da70b43 Initial load duke parents: diff changeset	62	mlib_image *src, \
90ce3da70b43 Initial load duke parents: diff changeset	63	mlib_s32 *kern, \
90ce3da70b43 Initial load duke parents: diff changeset	64	mlib_s32 scale, \
90ce3da70b43 Initial load duke parents: diff changeset	65	void *colormap)
90ce3da70b43 Initial load duke parents: diff changeset	66
90ce3da70b43 Initial load duke parents: diff changeset	67	#else
90ce3da70b43 Initial load duke parents: diff changeset	68
90ce3da70b43 Initial load duke parents: diff changeset	69	#define CONV_FUNC(KERN) \
90ce3da70b43 Initial load duke parents: diff changeset	70	mlib_conv##KERN##_8nw_f(mlib_image *dst, \
90ce3da70b43 Initial load duke parents: diff changeset	71	mlib_image *src, \
90ce3da70b43 Initial load duke parents: diff changeset	72	mlib_s32 *kern, \
90ce3da70b43 Initial load duke parents: diff changeset	73	mlib_s32 scale)
90ce3da70b43 Initial load duke parents: diff changeset	74
90ce3da70b43 Initial load duke parents: diff changeset	75	#endif
90ce3da70b43 Initial load duke parents: diff changeset	76
90ce3da70b43 Initial load duke parents: diff changeset	77	/***************************************************************/
90ce3da70b43 Initial load duke parents: diff changeset	78
90ce3da70b43 Initial load duke parents: diff changeset	79	#ifdef CONV_INDEX
90ce3da70b43 Initial load duke parents: diff changeset	80
90ce3da70b43 Initial load duke parents: diff changeset	81	#define NCHAN 3
90ce3da70b43 Initial load duke parents: diff changeset	82
90ce3da70b43 Initial load duke parents: diff changeset	83	#else
90ce3da70b43 Initial load duke parents: diff changeset	84
90ce3da70b43 Initial load duke parents: diff changeset	85	#define NCHAN nchan
90ce3da70b43 Initial load duke parents: diff changeset	86
90ce3da70b43 Initial load duke parents: diff changeset	87	#endif
90ce3da70b43 Initial load duke parents: diff changeset	88
90ce3da70b43 Initial load duke parents: diff changeset	89	/***************************************************************/
90ce3da70b43 Initial load duke parents: diff changeset	90
90ce3da70b43 Initial load duke parents: diff changeset	91	#define DEF_VARS \
90ce3da70b43 Initial load duke parents: diff changeset	92	DTYPE sl, sp, *dl; \
90ce3da70b43 Initial load duke parents: diff changeset	93	mlib_s32 hgt = mlib_ImageGetHeight(src); \
90ce3da70b43 Initial load duke parents: diff changeset	94	mlib_s32 wid = mlib_ImageGetWidth(src); \
90ce3da70b43 Initial load duke parents: diff changeset	95	mlib_s32 sll = mlib_ImageGetStride(src) / sizeof(DTYPE); \
90ce3da70b43 Initial load duke parents: diff changeset	96	mlib_s32 dll = mlib_ImageGetStride(dst) / sizeof(DTYPE); \
90ce3da70b43 Initial load duke parents: diff changeset	97	DTYPE adr_src = (DTYPE )mlib_ImageGetData(src); \
90ce3da70b43 Initial load duke parents: diff changeset	98	DTYPE adr_dst = (DTYPE )mlib_ImageGetData(dst); \
90ce3da70b43 Initial load duke parents: diff changeset	99	mlib_s32 ssize, xsize, dsize, esize, emask, buff_ind = 0; \
90ce3da70b43 Initial load duke parents: diff changeset	100	mlib_d64 pbuff, dp; \
90ce3da70b43 Initial load duke parents: diff changeset	101	mlib_f32 karr = (mlib_f32 )kern; \
90ce3da70b43 Initial load duke parents: diff changeset	102	mlib_s32 gsr_scale = (31 - scale) << 3; \
90ce3da70b43 Initial load duke parents: diff changeset	103	mlib_d64 drnd = vis_to_double_dup(mlib_round_8[31 - scale]); \
90ce3da70b43 Initial load duke parents: diff changeset	104	mlib_s32 i, j, l
90ce3da70b43 Initial load duke parents: diff changeset	105
90ce3da70b43 Initial load duke parents: diff changeset	106	/***************************************************************/
90ce3da70b43 Initial load duke parents: diff changeset	107
90ce3da70b43 Initial load duke parents: diff changeset	108	#ifdef CONV_INDEX
90ce3da70b43 Initial load duke parents: diff changeset	109
90ce3da70b43 Initial load duke parents: diff changeset	110	#define DEF_EXTRA_VARS \
90ce3da70b43 Initial load duke parents: diff changeset	111	int offset = mlib_ImageGetLutOffset(colormap); \
90ce3da70b43 Initial load duke parents: diff changeset	112	LTYPE lut_table = (LTYPE)mlib_ImageGetLutData(colormap); \
90ce3da70b43 Initial load duke parents: diff changeset	113	LTYPE *ltbl0 = lut_table[0] - offset; \
90ce3da70b43 Initial load duke parents: diff changeset	114	LTYPE *ltbl1 = lut_table[1] - offset; \
90ce3da70b43 Initial load duke parents: diff changeset	115	LTYPE *ltbl2 = lut_table[2] - offset; \
90ce3da70b43 Initial load duke parents: diff changeset	116	LTYPE *ltbl3 = (NCHAN > 3) ? lut_table[3] - offset : ltbl2
90ce3da70b43 Initial load duke parents: diff changeset	117
90ce3da70b43 Initial load duke parents: diff changeset	118	#else
90ce3da70b43 Initial load duke parents: diff changeset	119
90ce3da70b43 Initial load duke parents: diff changeset	120	#define DEF_EXTRA_VARS \
90ce3da70b43 Initial load duke parents: diff changeset	121	mlib_s32 nchan = mlib_ImageGetChannels(dst)
90ce3da70b43 Initial load duke parents: diff changeset	122
90ce3da70b43 Initial load duke parents: diff changeset	123	#endif
90ce3da70b43 Initial load duke parents: diff changeset	124
90ce3da70b43 Initial load duke parents: diff changeset	125	/***************************************************************/
90ce3da70b43 Initial load duke parents: diff changeset	126
90ce3da70b43 Initial load duke parents: diff changeset	127	#if NCHAN == 3
90ce3da70b43 Initial load duke parents: diff changeset	128
90ce3da70b43 Initial load duke parents: diff changeset	129	#define LOAD_SRC() { \
90ce3da70b43 Initial load duke parents: diff changeset	130	mlib_s32 s0 = sp[0], s1 = sp[1], s2 = sp[2], s3 = sp[3]; \
90ce3da70b43 Initial load duke parents: diff changeset	131	mlib_s32 s4 = sp[4], s5 = sp[5], s6 = sp[6], s7 = sp[7]; \
90ce3da70b43 Initial load duke parents: diff changeset	132	mlib_d64 t0, t1, t2; \
90ce3da70b43 Initial load duke parents: diff changeset	133	\
90ce3da70b43 Initial load duke parents: diff changeset	134	t2 = vis_faligndata(vis_ld_u8_i(ltbl2, s7), t2); \
90ce3da70b43 Initial load duke parents: diff changeset	135	t2 = vis_faligndata(vis_ld_u8_i(ltbl1, s7), t2); \
90ce3da70b43 Initial load duke parents: diff changeset	136	t2 = vis_faligndata(vis_ld_u8_i(ltbl0, s7), t2); \
90ce3da70b43 Initial load duke parents: diff changeset	137	t2 = vis_faligndata(vis_ld_u8_i(ltbl2, s6), t2); \
90ce3da70b43 Initial load duke parents: diff changeset	138	t2 = vis_faligndata(vis_ld_u8_i(ltbl1, s6), t2); \
90ce3da70b43 Initial load duke parents: diff changeset	139	t2 = vis_faligndata(vis_ld_u8_i(ltbl0, s6), t2); \
90ce3da70b43 Initial load duke parents: diff changeset	140	t2 = vis_faligndata(vis_ld_u8_i(ltbl2, s5), t2); \
90ce3da70b43 Initial load duke parents: diff changeset	141	t2 = vis_faligndata(vis_ld_u8_i(ltbl1, s5), t2); \
90ce3da70b43 Initial load duke parents: diff changeset	142	t1 = vis_faligndata(vis_ld_u8_i(ltbl0, s5), t1); \
90ce3da70b43 Initial load duke parents: diff changeset	143	t1 = vis_faligndata(vis_ld_u8_i(ltbl2, s4), t1); \
90ce3da70b43 Initial load duke parents: diff changeset	144	t1 = vis_faligndata(vis_ld_u8_i(ltbl1, s4), t1); \
90ce3da70b43 Initial load duke parents: diff changeset	145	t1 = vis_faligndata(vis_ld_u8_i(ltbl0, s4), t1); \
90ce3da70b43 Initial load duke parents: diff changeset	146	t1 = vis_faligndata(vis_ld_u8_i(ltbl2, s3), t1); \
90ce3da70b43 Initial load duke parents: diff changeset	147	t1 = vis_faligndata(vis_ld_u8_i(ltbl1, s3), t1); \
90ce3da70b43 Initial load duke parents: diff changeset	148	t1 = vis_faligndata(vis_ld_u8_i(ltbl0, s3), t1); \
90ce3da70b43 Initial load duke parents: diff changeset	149	t1 = vis_faligndata(vis_ld_u8_i(ltbl2, s2), t1); \
90ce3da70b43 Initial load duke parents: diff changeset	150	t0 = vis_faligndata(vis_ld_u8_i(ltbl1, s2), t0); \
90ce3da70b43 Initial load duke parents: diff changeset	151	t0 = vis_faligndata(vis_ld_u8_i(ltbl0, s2), t0); \
90ce3da70b43 Initial load duke parents: diff changeset	152	t0 = vis_faligndata(vis_ld_u8_i(ltbl2, s1), t0); \
90ce3da70b43 Initial load duke parents: diff changeset	153	t0 = vis_faligndata(vis_ld_u8_i(ltbl1, s1), t0); \
90ce3da70b43 Initial load duke parents: diff changeset	154	t0 = vis_faligndata(vis_ld_u8_i(ltbl0, s1), t0); \
90ce3da70b43 Initial load duke parents: diff changeset	155	t0 = vis_faligndata(vis_ld_u8_i(ltbl2, s0), t0); \
90ce3da70b43 Initial load duke parents: diff changeset	156	t0 = vis_faligndata(vis_ld_u8_i(ltbl1, s0), t0); \
90ce3da70b43 Initial load duke parents: diff changeset	157	t0 = vis_faligndata(vis_ld_u8_i(ltbl0, s0), t0); \
90ce3da70b43 Initial load duke parents: diff changeset	158	\
90ce3da70b43 Initial load duke parents: diff changeset	159	buffn[i] = t0; \
90ce3da70b43 Initial load duke parents: diff changeset	160	buffn[i + 1] = t1; \
90ce3da70b43 Initial load duke parents: diff changeset	161	buffn[i + 2] = t2; \
90ce3da70b43 Initial load duke parents: diff changeset	162	\
90ce3da70b43 Initial load duke parents: diff changeset	163	sp += 8; \
90ce3da70b43 Initial load duke parents: diff changeset	164	}
90ce3da70b43 Initial load duke parents: diff changeset	165
90ce3da70b43 Initial load duke parents: diff changeset	166	#else
90ce3da70b43 Initial load duke parents: diff changeset	167
90ce3da70b43 Initial load duke parents: diff changeset	168	#define LOAD_SRC() { \
90ce3da70b43 Initial load duke parents: diff changeset	169	mlib_s32 s0 = sp[0], s1 = sp[1], s2 = sp[2], s3 = sp[3]; \
90ce3da70b43 Initial load duke parents: diff changeset	170	mlib_s32 s4 = sp[4], s5 = sp[5], s6 = sp[6], s7 = sp[7]; \
90ce3da70b43 Initial load duke parents: diff changeset	171	mlib_d64 t0, t1, t2; \
90ce3da70b43 Initial load duke parents: diff changeset	172	\
90ce3da70b43 Initial load duke parents: diff changeset	173	t2 = vis_faligndata(vis_ld_u8_i(ltbl3, s5), t2); \
90ce3da70b43 Initial load duke parents: diff changeset	174	t2 = vis_faligndata(vis_ld_u8_i(ltbl2, s5), t2); \
90ce3da70b43 Initial load duke parents: diff changeset	175	t2 = vis_faligndata(vis_ld_u8_i(ltbl1, s5), t2); \
90ce3da70b43 Initial load duke parents: diff changeset	176	t2 = vis_faligndata(vis_ld_u8_i(ltbl0, s5), t2); \
90ce3da70b43 Initial load duke parents: diff changeset	177	t2 = vis_faligndata(vis_ld_u8_i(ltbl3, s4), t2); \
90ce3da70b43 Initial load duke parents: diff changeset	178	t2 = vis_faligndata(vis_ld_u8_i(ltbl2, s4), t2); \
90ce3da70b43 Initial load duke parents: diff changeset	179	t2 = vis_faligndata(vis_ld_u8_i(ltbl1, s4), t2); \
90ce3da70b43 Initial load duke parents: diff changeset	180	t2 = vis_faligndata(vis_ld_u8_i(ltbl0, s4), t2); \
90ce3da70b43 Initial load duke parents: diff changeset	181	t1 = vis_faligndata(vis_ld_u8_i(ltbl3, s3), t1); \
90ce3da70b43 Initial load duke parents: diff changeset	182	t1 = vis_faligndata(vis_ld_u8_i(ltbl2, s3), t1); \
90ce3da70b43 Initial load duke parents: diff changeset	183	t1 = vis_faligndata(vis_ld_u8_i(ltbl1, s3), t1); \
90ce3da70b43 Initial load duke parents: diff changeset	184	t1 = vis_faligndata(vis_ld_u8_i(ltbl0, s3), t1); \
90ce3da70b43 Initial load duke parents: diff changeset	185	t1 = vis_faligndata(vis_ld_u8_i(ltbl3, s2), t1); \
90ce3da70b43 Initial load duke parents: diff changeset	186	t1 = vis_faligndata(vis_ld_u8_i(ltbl2, s2), t1); \
90ce3da70b43 Initial load duke parents: diff changeset	187	t1 = vis_faligndata(vis_ld_u8_i(ltbl1, s2), t1); \
90ce3da70b43 Initial load duke parents: diff changeset	188	t1 = vis_faligndata(vis_ld_u8_i(ltbl0, s2), t1); \
90ce3da70b43 Initial load duke parents: diff changeset	189	t0 = vis_faligndata(vis_ld_u8_i(ltbl3, s1), t0); \
90ce3da70b43 Initial load duke parents: diff changeset	190	t0 = vis_faligndata(vis_ld_u8_i(ltbl2, s1), t0); \
90ce3da70b43 Initial load duke parents: diff changeset	191	t0 = vis_faligndata(vis_ld_u8_i(ltbl1, s1), t0); \
90ce3da70b43 Initial load duke parents: diff changeset	192	t0 = vis_faligndata(vis_ld_u8_i(ltbl0, s1), t0); \
90ce3da70b43 Initial load duke parents: diff changeset	193	t0 = vis_faligndata(vis_ld_u8_i(ltbl3, s0), t0); \
90ce3da70b43 Initial load duke parents: diff changeset	194	t0 = vis_faligndata(vis_ld_u8_i(ltbl2, s0), t0); \
90ce3da70b43 Initial load duke parents: diff changeset	195	t0 = vis_faligndata(vis_ld_u8_i(ltbl1, s0), t0); \
90ce3da70b43 Initial load duke parents: diff changeset	196	t0 = vis_faligndata(vis_ld_u8_i(ltbl0, s0), t0); \
90ce3da70b43 Initial load duke parents: diff changeset	197	\
90ce3da70b43 Initial load duke parents: diff changeset	198	buffn[i] = t0; \
90ce3da70b43 Initial load duke parents: diff changeset	199	buffn[i + 1] = t1; \
90ce3da70b43 Initial load duke parents: diff changeset	200	buffn[i + 2] = t2; \
90ce3da70b43 Initial load duke parents: diff changeset	201	\
90ce3da70b43 Initial load duke parents: diff changeset	202	sp += 6; \
90ce3da70b43 Initial load duke parents: diff changeset	203	}
90ce3da70b43 Initial load duke parents: diff changeset	204
90ce3da70b43 Initial load duke parents: diff changeset	205	#endif
90ce3da70b43 Initial load duke parents: diff changeset	206
90ce3da70b43 Initial load duke parents: diff changeset	207	/***************************************************************/
90ce3da70b43 Initial load duke parents: diff changeset	208
90ce3da70b43 Initial load duke parents: diff changeset	209	static mlib_s32 mlib_round_8[16] = { 0x00400040, 0x00200020, 0x00100010, 0x00080008,
90ce3da70b43 Initial load duke parents: diff changeset	210	0x00040004, 0x00020002, 0x00010001, 0x00000000,
90ce3da70b43 Initial load duke parents: diff changeset	211	0x00000000, 0x00000000, 0x00000000, 0x00000000,
90ce3da70b43 Initial load duke parents: diff changeset	212	0x00000000, 0x00000000, 0x00000000, 0x00000000 };
90ce3da70b43 Initial load duke parents: diff changeset	213
90ce3da70b43 Initial load duke parents: diff changeset	214	/***************************************************************/
90ce3da70b43 Initial load duke parents: diff changeset	215
90ce3da70b43 Initial load duke parents: diff changeset	216	void mlib_ImageCopy_na(mlib_u8 sa, mlib_u8 da, int size);
90ce3da70b43 Initial load duke parents: diff changeset	217
90ce3da70b43 Initial load duke parents: diff changeset	218	/***************************************************************/
90ce3da70b43 Initial load duke parents: diff changeset	219
90ce3da70b43 Initial load duke parents: diff changeset	220	#define KSIZE 2
90ce3da70b43 Initial load duke parents: diff changeset	221
90ce3da70b43 Initial load duke parents: diff changeset	222	mlib_status CONV_FUNC(2x2)
90ce3da70b43 Initial load duke parents: diff changeset	223	{
90ce3da70b43 Initial load duke parents: diff changeset	224	mlib_d64 buffs[2(KSIZE + 1)];
90ce3da70b43 Initial load duke parents: diff changeset	225	mlib_d64 buff0, buff1, buffn, buffd, *buffe;
90ce3da70b43 Initial load duke parents: diff changeset	226	mlib_d64 s00, s01, s10, s11, s0, s1;
90ce3da70b43 Initial load duke parents: diff changeset	227	mlib_d64 d0, d1, d00, d01, d10, d11;
90ce3da70b43 Initial load duke parents: diff changeset	228	DEF_VARS;
90ce3da70b43 Initial load duke parents: diff changeset	229	DEF_EXTRA_VARS;
90ce3da70b43 Initial load duke parents: diff changeset	230
90ce3da70b43 Initial load duke parents: diff changeset	231	sl = adr_src;
90ce3da70b43 Initial load duke parents: diff changeset	232	dl = adr_dst;
90ce3da70b43 Initial load duke parents: diff changeset	233
90ce3da70b43 Initial load duke parents: diff changeset	234	ssize = NCHAN*wid;
90ce3da70b43 Initial load duke parents: diff changeset	235	dsize = (ssize + 7)/8;
90ce3da70b43 Initial load duke parents: diff changeset	236	esize = dsize + 4;
90ce3da70b43 Initial load duke parents: diff changeset	237	pbuff = mlib_malloc((KSIZE + 4)esizesizeof(mlib_d64));
90ce3da70b43 Initial load duke parents: diff changeset	238	if (pbuff == NULL) return MLIB_FAILURE;
90ce3da70b43 Initial load duke parents: diff changeset	239
90ce3da70b43 Initial load duke parents: diff changeset	240	for (i = 0; i < (KSIZE + 1); i++) buffs[i] = pbuff + i*esize;
90ce3da70b43 Initial load duke parents: diff changeset	241	for (i = 0; i < (KSIZE + 1); i++) buffs[(KSIZE + 1) + i] = buffs[i];
90ce3da70b43 Initial load duke parents: diff changeset	242	buffd = buffs[KSIZE] + esize;
90ce3da70b43 Initial load duke parents: diff changeset	243	buffe = buffd + 2*esize;
90ce3da70b43 Initial load duke parents: diff changeset	244
90ce3da70b43 Initial load duke parents: diff changeset	245	wid -= (KSIZE - 1);
90ce3da70b43 Initial load duke parents: diff changeset	246	hgt -= (KSIZE - 1);
90ce3da70b43 Initial load duke parents: diff changeset	247	xsize = ssize - NCHAN*(KSIZE - 1);
90ce3da70b43 Initial load duke parents: diff changeset	248	emask = (0xFF00 >> (xsize & 7)) & 0xFF;
90ce3da70b43 Initial load duke parents: diff changeset	249
90ce3da70b43 Initial load duke parents: diff changeset	250	vis_write_gsr(gsr_scale + 7);
90ce3da70b43 Initial load duke parents: diff changeset	251
90ce3da70b43 Initial load duke parents: diff changeset	252	for (l = 0; l < KSIZE; l++) {
90ce3da70b43 Initial load duke parents: diff changeset	253	mlib_d64 *buffn = buffs[l];
90ce3da70b43 Initial load duke parents: diff changeset	254	sp = sl + l*sll;
90ce3da70b43 Initial load duke parents: diff changeset	255
90ce3da70b43 Initial load duke parents: diff changeset	256	#ifndef CONV_INDEX
90ce3da70b43 Initial load duke parents: diff changeset	257	if ((mlib_addr)sp & 7) mlib_ImageCopy_na((void)sp, (void)buffn, ssize);
90ce3da70b43 Initial load duke parents: diff changeset	258
90ce3da70b43 Initial load duke parents: diff changeset	259	#else
90ce3da70b43 Initial load duke parents: diff changeset	260	#pragma pipeloop(0)
90ce3da70b43 Initial load duke parents: diff changeset	261	for (i = 0; i < dsize; i += 3) {
90ce3da70b43 Initial load duke parents: diff changeset	262	LOAD_SRC();
90ce3da70b43 Initial load duke parents: diff changeset	263	}
90ce3da70b43 Initial load duke parents: diff changeset	264	#endif /* CONV_INDEX */
90ce3da70b43 Initial load duke parents: diff changeset	265	}
90ce3da70b43 Initial load duke parents: diff changeset	266
90ce3da70b43 Initial load duke parents: diff changeset	267	for (j = 0; j < hgt; j++) {
90ce3da70b43 Initial load duke parents: diff changeset	268	mlib_d64 **buffc = buffs + buff_ind;
90ce3da70b43 Initial load duke parents: diff changeset	269	mlib_f32 *pk = karr, k0, k1;
90ce3da70b43 Initial load duke parents: diff changeset	270	sp = sl + KSIZE*sll;
90ce3da70b43 Initial load duke parents: diff changeset	271
90ce3da70b43 Initial load duke parents: diff changeset	272	buff0 = buffc[0];
90ce3da70b43 Initial load duke parents: diff changeset	273	buff1 = buffc[1];
90ce3da70b43 Initial load duke parents: diff changeset	274	buffn = buffc[KSIZE];
90ce3da70b43 Initial load duke parents: diff changeset	275
90ce3da70b43 Initial load duke parents: diff changeset	276	#ifndef CONV_INDEX
90ce3da70b43 Initial load duke parents: diff changeset	277	if ((((mlib_addr)(sl )) & 7) == 0) buff0 = (mlib_d64*)sl;
90ce3da70b43 Initial load duke parents: diff changeset	278	if ((((mlib_addr)(sl + sll)) & 7) == 0) buff1 = (mlib_d64*)(sl + sll);
90ce3da70b43 Initial load duke parents: diff changeset	279	if ((mlib_addr)sp & 7) mlib_ImageCopy_na((void)sp, (void)buffn, ssize);
90ce3da70b43 Initial load duke parents: diff changeset	280	#endif
90ce3da70b43 Initial load duke parents: diff changeset	281
90ce3da70b43 Initial load duke parents: diff changeset	282	k0 = pk[1];
90ce3da70b43 Initial load duke parents: diff changeset	283	k1 = pk[3];
90ce3da70b43 Initial load duke parents: diff changeset	284	vis_write_gsr(gsr_scale + NCHAN);
90ce3da70b43 Initial load duke parents: diff changeset	285
90ce3da70b43 Initial load duke parents: diff changeset	286	s01 = buff0[0];
90ce3da70b43 Initial load duke parents: diff changeset	287	s11 = buff1[0];
90ce3da70b43 Initial load duke parents: diff changeset	288	#pragma pipeloop(0)
90ce3da70b43 Initial load duke parents: diff changeset	289	for (i = 0; i < (xsize + 7)/8; i++) {
90ce3da70b43 Initial load duke parents: diff changeset	290	s00 = s01;
90ce3da70b43 Initial load duke parents: diff changeset	291	s10 = s11;
90ce3da70b43 Initial load duke parents: diff changeset	292	s01 = buff0[i + 1];
90ce3da70b43 Initial load duke parents: diff changeset	293	s11 = buff1[i + 1];
90ce3da70b43 Initial load duke parents: diff changeset	294	s0 = vis_faligndata(s00, s01);
90ce3da70b43 Initial load duke parents: diff changeset	295	s1 = vis_faligndata(s10, s11);
90ce3da70b43 Initial load duke parents: diff changeset	296
90ce3da70b43 Initial load duke parents: diff changeset	297	d00 = vis_fmul8x16au(vis_read_hi(s0), k0);
90ce3da70b43 Initial load duke parents: diff changeset	298	d01 = vis_fmul8x16au(vis_read_lo(s0), k0);
90ce3da70b43 Initial load duke parents: diff changeset	299	d10 = vis_fmul8x16au(vis_read_hi(s1), k1);
90ce3da70b43 Initial load duke parents: diff changeset	300	d11 = vis_fmul8x16au(vis_read_lo(s1), k1);
90ce3da70b43 Initial load duke parents: diff changeset	301
90ce3da70b43 Initial load duke parents: diff changeset	302	d0 = vis_fpadd16(d00, d10);
90ce3da70b43 Initial load duke parents: diff changeset	303	d1 = vis_fpadd16(d01, d11);
90ce3da70b43 Initial load duke parents: diff changeset	304	buffd[2*i] = d0;
90ce3da70b43 Initial load duke parents: diff changeset	305	buffd[2*i + 1] = d1;
90ce3da70b43 Initial load duke parents: diff changeset	306	}
90ce3da70b43 Initial load duke parents: diff changeset	307
90ce3da70b43 Initial load duke parents: diff changeset	308	k0 = pk[0];
90ce3da70b43 Initial load duke parents: diff changeset	309	k1 = pk[2];
90ce3da70b43 Initial load duke parents: diff changeset	310	#ifndef CONV_INDEX
90ce3da70b43 Initial load duke parents: diff changeset	311	dp = ((mlib_addr)dl & 7) ? buffe : (mlib_d64*)dl;
90ce3da70b43 Initial load duke parents: diff changeset	312
90ce3da70b43 Initial load duke parents: diff changeset	313	#pragma pipeloop(0)
90ce3da70b43 Initial load duke parents: diff changeset	314	for (i = 0; i < xsize/8; i++) {
90ce3da70b43 Initial load duke parents: diff changeset	315	s0 = buff0[i];
90ce3da70b43 Initial load duke parents: diff changeset	316	s1 = buff1[i];
90ce3da70b43 Initial load duke parents: diff changeset	317
90ce3da70b43 Initial load duke parents: diff changeset	318	d00 = vis_fmul8x16au(vis_read_hi(s0), k0);
90ce3da70b43 Initial load duke parents: diff changeset	319	d01 = vis_fmul8x16au(vis_read_lo(s0), k0);
90ce3da70b43 Initial load duke parents: diff changeset	320	d10 = vis_fmul8x16au(vis_read_hi(s1), k1);
90ce3da70b43 Initial load duke parents: diff changeset	321	d11 = vis_fmul8x16au(vis_read_lo(s1), k1);
90ce3da70b43 Initial load duke parents: diff changeset	322
90ce3da70b43 Initial load duke parents: diff changeset	323	d0 = buffd[2*i];
90ce3da70b43 Initial load duke parents: diff changeset	324	d1 = buffd[2*i + 1];
90ce3da70b43 Initial load duke parents: diff changeset	325	d00 = vis_fpadd16(d00, d10);
90ce3da70b43 Initial load duke parents: diff changeset	326	d0 = vis_fpadd16(d0, drnd);
90ce3da70b43 Initial load duke parents: diff changeset	327	d0 = vis_fpadd16(d0, d00);
90ce3da70b43 Initial load duke parents: diff changeset	328	d01 = vis_fpadd16(d01, d11);
90ce3da70b43 Initial load duke parents: diff changeset	329	d1 = vis_fpadd16(d1, drnd);
90ce3da70b43 Initial load duke parents: diff changeset	330	d1 = vis_fpadd16(d1, d01);
90ce3da70b43 Initial load duke parents: diff changeset	331	dp[i] = vis_fpack16_pair(d0, d1);
90ce3da70b43 Initial load duke parents: diff changeset	332	}
90ce3da70b43 Initial load duke parents: diff changeset	333
90ce3da70b43 Initial load duke parents: diff changeset	334	if (emask) {
90ce3da70b43 Initial load duke parents: diff changeset	335	s0 = buff0[i];
90ce3da70b43 Initial load duke parents: diff changeset	336	s1 = buff1[i];
90ce3da70b43 Initial load duke parents: diff changeset	337
90ce3da70b43 Initial load duke parents: diff changeset	338	d00 = vis_fmul8x16au(vis_read_hi(s0), k0);
90ce3da70b43 Initial load duke parents: diff changeset	339	d01 = vis_fmul8x16au(vis_read_lo(s0), k0);
90ce3da70b43 Initial load duke parents: diff changeset	340	d10 = vis_fmul8x16au(vis_read_hi(s1), k1);
90ce3da70b43 Initial load duke parents: diff changeset	341	d11 = vis_fmul8x16au(vis_read_lo(s1), k1);
90ce3da70b43 Initial load duke parents: diff changeset	342
90ce3da70b43 Initial load duke parents: diff changeset	343	d0 = buffd[2*i];
90ce3da70b43 Initial load duke parents: diff changeset	344	d1 = buffd[2*i + 1];
90ce3da70b43 Initial load duke parents: diff changeset	345	d00 = vis_fpadd16(d00, d10);
90ce3da70b43 Initial load duke parents: diff changeset	346	d0 = vis_fpadd16(d0, drnd);
90ce3da70b43 Initial load duke parents: diff changeset	347	d0 = vis_fpadd16(d0, d00);
90ce3da70b43 Initial load duke parents: diff changeset	348	d01 = vis_fpadd16(d01, d11);
90ce3da70b43 Initial load duke parents: diff changeset	349	d1 = vis_fpadd16(d1, drnd);
90ce3da70b43 Initial load duke parents: diff changeset	350	d1 = vis_fpadd16(d1, d01);
90ce3da70b43 Initial load duke parents: diff changeset	351
90ce3da70b43 Initial load duke parents: diff changeset	352	d0 = vis_fpack16_pair(d0, d1);
90ce3da70b43 Initial load duke parents: diff changeset	353	vis_pst_8(d0, dp + i, emask);
90ce3da70b43 Initial load duke parents: diff changeset	354	}
90ce3da70b43 Initial load duke parents: diff changeset	355
90ce3da70b43 Initial load duke parents: diff changeset	356	if ((mlib_u8)dp != dl) mlib_ImageCopy_na((void)buffe, dl, xsize);
90ce3da70b43 Initial load duke parents: diff changeset	357
90ce3da70b43 Initial load duke parents: diff changeset	358	#else
90ce3da70b43 Initial load duke parents: diff changeset	359	vis_write_gsr(gsr_scale + 7);
90ce3da70b43 Initial load duke parents: diff changeset	360
90ce3da70b43 Initial load duke parents: diff changeset	361	#pragma pipeloop(0)
90ce3da70b43 Initial load duke parents: diff changeset	362	for (i = 0; i < dsize; i += 3) {
90ce3da70b43 Initial load duke parents: diff changeset	363	mlib_d64 d00, d01, d02, d03, d04, d05;
90ce3da70b43 Initial load duke parents: diff changeset	364	mlib_d64 d10, d11, d12, d13, d14, d15;
90ce3da70b43 Initial load duke parents: diff changeset	365	mlib_d64 d0, d1, d2, d3, d4, d5;
90ce3da70b43 Initial load duke parents: diff changeset	366	mlib_d64 s00 = buff0[i];
90ce3da70b43 Initial load duke parents: diff changeset	367	mlib_d64 s01 = buff0[i + 1];
90ce3da70b43 Initial load duke parents: diff changeset	368	mlib_d64 s02 = buff0[i + 2];
90ce3da70b43 Initial load duke parents: diff changeset	369	mlib_d64 s10 = buff1[i];
90ce3da70b43 Initial load duke parents: diff changeset	370	mlib_d64 s11 = buff1[i + 1];
90ce3da70b43 Initial load duke parents: diff changeset	371	mlib_d64 s12 = buff1[i + 2];
90ce3da70b43 Initial load duke parents: diff changeset	372
90ce3da70b43 Initial load duke parents: diff changeset	373	d00 = vis_fmul8x16au(vis_read_hi(s00), k0);
90ce3da70b43 Initial load duke parents: diff changeset	374	d01 = vis_fmul8x16au(vis_read_lo(s00), k0);
90ce3da70b43 Initial load duke parents: diff changeset	375	d02 = vis_fmul8x16au(vis_read_hi(s01), k0);
90ce3da70b43 Initial load duke parents: diff changeset	376	d03 = vis_fmul8x16au(vis_read_lo(s01), k0);
90ce3da70b43 Initial load duke parents: diff changeset	377	d04 = vis_fmul8x16au(vis_read_hi(s02), k0);
90ce3da70b43 Initial load duke parents: diff changeset	378	d05 = vis_fmul8x16au(vis_read_lo(s02), k0);
90ce3da70b43 Initial load duke parents: diff changeset	379	d10 = vis_fmul8x16au(vis_read_hi(s10), k1);
90ce3da70b43 Initial load duke parents: diff changeset	380	d11 = vis_fmul8x16au(vis_read_lo(s10), k1);
90ce3da70b43 Initial load duke parents: diff changeset	381	d12 = vis_fmul8x16au(vis_read_hi(s11), k1);
90ce3da70b43 Initial load duke parents: diff changeset	382	d13 = vis_fmul8x16au(vis_read_lo(s11), k1);
90ce3da70b43 Initial load duke parents: diff changeset	383	d14 = vis_fmul8x16au(vis_read_hi(s12), k1);
90ce3da70b43 Initial load duke parents: diff changeset	384	d15 = vis_fmul8x16au(vis_read_lo(s12), k1);
90ce3da70b43 Initial load duke parents: diff changeset	385
90ce3da70b43 Initial load duke parents: diff changeset	386	d0 = buffd[2*i];
90ce3da70b43 Initial load duke parents: diff changeset	387	d1 = buffd[2*i + 1];
90ce3da70b43 Initial load duke parents: diff changeset	388	d2 = buffd[2*i + 2];
90ce3da70b43 Initial load duke parents: diff changeset	389	d3 = buffd[2*i + 3];
90ce3da70b43 Initial load duke parents: diff changeset	390	d4 = buffd[2*i + 4];
90ce3da70b43 Initial load duke parents: diff changeset	391	d5 = buffd[2*i + 5];
90ce3da70b43 Initial load duke parents: diff changeset	392	d00 = vis_fpadd16(d00, d10);
90ce3da70b43 Initial load duke parents: diff changeset	393	d0 = vis_fpadd16(d0, drnd);
90ce3da70b43 Initial load duke parents: diff changeset	394	d0 = vis_fpadd16(d0, d00);
90ce3da70b43 Initial load duke parents: diff changeset	395	d01 = vis_fpadd16(d01, d11);
90ce3da70b43 Initial load duke parents: diff changeset	396	d1 = vis_fpadd16(d1, drnd);
90ce3da70b43 Initial load duke parents: diff changeset	397	d1 = vis_fpadd16(d1, d01);
90ce3da70b43 Initial load duke parents: diff changeset	398	d02 = vis_fpadd16(d02, d12);
90ce3da70b43 Initial load duke parents: diff changeset	399	d2 = vis_fpadd16(d2, drnd);
90ce3da70b43 Initial load duke parents: diff changeset	400	d2 = vis_fpadd16(d2, d02);
90ce3da70b43 Initial load duke parents: diff changeset	401	d03 = vis_fpadd16(d03, d13);
90ce3da70b43 Initial load duke parents: diff changeset	402	d3 = vis_fpadd16(d3, drnd);
90ce3da70b43 Initial load duke parents: diff changeset	403	d3 = vis_fpadd16(d3, d03);
90ce3da70b43 Initial load duke parents: diff changeset	404	d04 = vis_fpadd16(d04, d14);
90ce3da70b43 Initial load duke parents: diff changeset	405	d4 = vis_fpadd16(d4, drnd);
90ce3da70b43 Initial load duke parents: diff changeset	406	d4 = vis_fpadd16(d4, d04);
90ce3da70b43 Initial load duke parents: diff changeset	407	d05 = vis_fpadd16(d05, d15);
90ce3da70b43 Initial load duke parents: diff changeset	408	d5 = vis_fpadd16(d5, drnd);
90ce3da70b43 Initial load duke parents: diff changeset	409	d5 = vis_fpadd16(d5, d05);
90ce3da70b43 Initial load duke parents: diff changeset	410
90ce3da70b43 Initial load duke parents: diff changeset	411	buffe[i ] = vis_fpack16_pair(d0, d1);
90ce3da70b43 Initial load duke parents: diff changeset	412	buffe[i + 1] = vis_fpack16_pair(d2, d3);
90ce3da70b43 Initial load duke parents: diff changeset	413	buffe[i + 2] = vis_fpack16_pair(d4, d5);
90ce3da70b43 Initial load duke parents: diff changeset	414
90ce3da70b43 Initial load duke parents: diff changeset	415	LOAD_SRC();
90ce3da70b43 Initial load duke parents: diff changeset	416	}
90ce3da70b43 Initial load duke parents: diff changeset	417
90ce3da70b43 Initial load duke parents: diff changeset	418	mlib_ImageColorTrue2IndexLine_U8_S16_3((void*)buffe, dl, wid, colormap);
90ce3da70b43 Initial load duke parents: diff changeset	419	#endif /* CONV_INDEX */
90ce3da70b43 Initial load duke parents: diff changeset	420
90ce3da70b43 Initial load duke parents: diff changeset	421	sl += sll;
90ce3da70b43 Initial load duke parents: diff changeset	422	dl += dll;
90ce3da70b43 Initial load duke parents: diff changeset	423
90ce3da70b43 Initial load duke parents: diff changeset	424	buff_ind++;
90ce3da70b43 Initial load duke parents: diff changeset	425	if (buff_ind >= (KSIZE + 1)) buff_ind = 0;
90ce3da70b43 Initial load duke parents: diff changeset	426	}
90ce3da70b43 Initial load duke parents: diff changeset	427
90ce3da70b43 Initial load duke parents: diff changeset	428	mlib_free(pbuff);
90ce3da70b43 Initial load duke parents: diff changeset	429
90ce3da70b43 Initial load duke parents: diff changeset	430	return MLIB_SUCCESS;
90ce3da70b43 Initial load duke parents: diff changeset	431	}
90ce3da70b43 Initial load duke parents: diff changeset	432
90ce3da70b43 Initial load duke parents: diff changeset	433	/***************************************************************/
90ce3da70b43 Initial load duke parents: diff changeset	434
90ce3da70b43 Initial load duke parents: diff changeset	435	#undef KSIZE
90ce3da70b43 Initial load duke parents: diff changeset	436	#define KSIZE 3
90ce3da70b43 Initial load duke parents: diff changeset	437
90ce3da70b43 Initial load duke parents: diff changeset	438	mlib_status CONV_FUNC(3x3)
90ce3da70b43 Initial load duke parents: diff changeset	439	{
90ce3da70b43 Initial load duke parents: diff changeset	440	mlib_d64 buffs[2(KSIZE + 1)];
90ce3da70b43 Initial load duke parents: diff changeset	441	mlib_d64 buff0, buff1, buff2, buffn, buffd, buffe;
90ce3da70b43 Initial load duke parents: diff changeset	442	mlib_d64 s00, s01, s10, s11, s20, s21, s0, s1, s2;
90ce3da70b43 Initial load duke parents: diff changeset	443	mlib_d64 dd, d0, d1, d00, d01, d10, d11, d20, d21;
90ce3da70b43 Initial load duke parents: diff changeset	444	mlib_s32 ik, ik_last, off, doff;
90ce3da70b43 Initial load duke parents: diff changeset	445	DEF_VARS;
90ce3da70b43 Initial load duke parents: diff changeset	446	DEF_EXTRA_VARS;
90ce3da70b43 Initial load duke parents: diff changeset	447
90ce3da70b43 Initial load duke parents: diff changeset	448	sl = adr_src;
90ce3da70b43 Initial load duke parents: diff changeset	449	#ifdef CONV_INDEX
90ce3da70b43 Initial load duke parents: diff changeset	450	dl = adr_dst + ((KSIZE - 1)/2)*(dll + 1);
90ce3da70b43 Initial load duke parents: diff changeset	451	#else
90ce3da70b43 Initial load duke parents: diff changeset	452	dl = adr_dst + ((KSIZE - 1)/2)*(dll + NCHAN);
90ce3da70b43 Initial load duke parents: diff changeset	453	#endif
90ce3da70b43 Initial load duke parents: diff changeset	454
90ce3da70b43 Initial load duke parents: diff changeset	455	ssize = NCHAN*wid;
90ce3da70b43 Initial load duke parents: diff changeset	456	dsize = (ssize + 7)/8;
90ce3da70b43 Initial load duke parents: diff changeset	457	esize = dsize + 4;
90ce3da70b43 Initial load duke parents: diff changeset	458	pbuff = mlib_malloc((KSIZE + 4)esizesizeof(mlib_d64));
90ce3da70b43 Initial load duke parents: diff changeset	459	if (pbuff == NULL) return MLIB_FAILURE;
90ce3da70b43 Initial load duke parents: diff changeset	460
90ce3da70b43 Initial load duke parents: diff changeset	461	for (i = 0; i < (KSIZE + 1); i++) buffs[i] = pbuff + i*esize;
90ce3da70b43 Initial load duke parents: diff changeset	462	for (i = 0; i < (KSIZE + 1); i++) buffs[(KSIZE + 1) + i] = buffs[i];
90ce3da70b43 Initial load duke parents: diff changeset	463	buffd = buffs[KSIZE] + esize;
90ce3da70b43 Initial load duke parents: diff changeset	464	buffe = buffd + 2*esize;
90ce3da70b43 Initial load duke parents: diff changeset	465
90ce3da70b43 Initial load duke parents: diff changeset	466	wid -= (KSIZE - 1);
90ce3da70b43 Initial load duke parents: diff changeset	467	hgt -= (KSIZE - 1);
90ce3da70b43 Initial load duke parents: diff changeset	468	xsize = ssize - NCHAN*(KSIZE - 1);
90ce3da70b43 Initial load duke parents: diff changeset	469	emask = (0xFF00 >> (xsize & 7)) & 0xFF;
90ce3da70b43 Initial load duke parents: diff changeset	470
90ce3da70b43 Initial load duke parents: diff changeset	471	vis_write_gsr(gsr_scale + 7);
90ce3da70b43 Initial load duke parents: diff changeset	472
90ce3da70b43 Initial load duke parents: diff changeset	473	for (l = 0; l < KSIZE; l++) {
90ce3da70b43 Initial load duke parents: diff changeset	474	mlib_d64 *buffn = buffs[l];
90ce3da70b43 Initial load duke parents: diff changeset	475	sp = sl + l*sll;
90ce3da70b43 Initial load duke parents: diff changeset	476
90ce3da70b43 Initial load duke parents: diff changeset	477	#ifndef CONV_INDEX
90ce3da70b43 Initial load duke parents: diff changeset	478	if ((mlib_addr)sp & 7) mlib_ImageCopy_na((void)sp, (void)buffn, ssize);
90ce3da70b43 Initial load duke parents: diff changeset	479	#else
90ce3da70b43 Initial load duke parents: diff changeset	480	#pragma pipeloop(0)
90ce3da70b43 Initial load duke parents: diff changeset	481	for (i = 0; i < dsize; i += 3) {
90ce3da70b43 Initial load duke parents: diff changeset	482	LOAD_SRC();
90ce3da70b43 Initial load duke parents: diff changeset	483	}
90ce3da70b43 Initial load duke parents: diff changeset	484	#endif /* CONV_INDEX */
90ce3da70b43 Initial load duke parents: diff changeset	485	}
90ce3da70b43 Initial load duke parents: diff changeset	486
90ce3da70b43 Initial load duke parents: diff changeset	487	/* init buffer */
90ce3da70b43 Initial load duke parents: diff changeset	488	#pragma pipeloop(0)
90ce3da70b43 Initial load duke parents: diff changeset	489	for (i = 0; i < (xsize + 7)/8; i++) {
90ce3da70b43 Initial load duke parents: diff changeset	490	buffd[2*i ] = drnd;
90ce3da70b43 Initial load duke parents: diff changeset	491	buffd[2*i + 1] = drnd;
90ce3da70b43 Initial load duke parents: diff changeset	492	}
90ce3da70b43 Initial load duke parents: diff changeset	493
90ce3da70b43 Initial load duke parents: diff changeset	494	for (j = 0; j < hgt; j++) {
90ce3da70b43 Initial load duke parents: diff changeset	495	mlib_d64 *buffc = buffs + buff_ind, pbuff0, pbuff1, pbuff2;
90ce3da70b43 Initial load duke parents: diff changeset	496	mlib_f32 *pk = karr, k0, k1, k2;
90ce3da70b43 Initial load duke parents: diff changeset	497	sp = sl + KSIZE*sll;
90ce3da70b43 Initial load duke parents: diff changeset	498
90ce3da70b43 Initial load duke parents: diff changeset	499	pbuff0 = buffc[0];
90ce3da70b43 Initial load duke parents: diff changeset	500	pbuff1 = buffc[1];
90ce3da70b43 Initial load duke parents: diff changeset	501	pbuff2 = buffc[2];
90ce3da70b43 Initial load duke parents: diff changeset	502	buffn = buffc[KSIZE];
90ce3da70b43 Initial load duke parents: diff changeset	503
90ce3da70b43 Initial load duke parents: diff changeset	504	#ifndef CONV_INDEX
90ce3da70b43 Initial load duke parents: diff changeset	505	if ((((mlib_addr)(sl )) & 7) == 0) pbuff0 = (mlib_d64*)sl;
90ce3da70b43 Initial load duke parents: diff changeset	506	if ((((mlib_addr)(sl + sll)) & 7) == 0) pbuff1 = (mlib_d64*)(sl + sll);
90ce3da70b43 Initial load duke parents: diff changeset	507	if ((((mlib_addr)(sl + 2sll)) & 7) == 0) pbuff2 = (mlib_d64)(sl + 2*sll);
90ce3da70b43 Initial load duke parents: diff changeset	508
90ce3da70b43 Initial load duke parents: diff changeset	509	if ((mlib_addr)sp & 7) mlib_ImageCopy_na((void)sp, (void)buffn, ssize);
90ce3da70b43 Initial load duke parents: diff changeset	510	#endif
90ce3da70b43 Initial load duke parents: diff changeset	511
90ce3da70b43 Initial load duke parents: diff changeset	512	#ifdef CONV_INDEX
90ce3da70b43 Initial load duke parents: diff changeset	513	ik_last = 0;
90ce3da70b43 Initial load duke parents: diff changeset	514	#else
90ce3da70b43 Initial load duke parents: diff changeset	515	ik_last = (KSIZE - 1);
90ce3da70b43 Initial load duke parents: diff changeset	516	#endif
90ce3da70b43 Initial load duke parents: diff changeset	517
90ce3da70b43 Initial load duke parents: diff changeset	518	for (ik = 0; ik < KSIZE; ik++) {
90ce3da70b43 Initial load duke parents: diff changeset	519	k0 = pk[ik];
90ce3da70b43 Initial load duke parents: diff changeset	520	k1 = pk[ik + KSIZE];
90ce3da70b43 Initial load duke parents: diff changeset	521	k2 = pk[ik + 2*KSIZE];
90ce3da70b43 Initial load duke parents: diff changeset	522
90ce3da70b43 Initial load duke parents: diff changeset	523	off = ik*NCHAN;
90ce3da70b43 Initial load duke parents: diff changeset	524	doff = off/8;
90ce3da70b43 Initial load duke parents: diff changeset	525	off &= 7;
90ce3da70b43 Initial load duke parents: diff changeset	526	buff0 = pbuff0 + doff;
90ce3da70b43 Initial load duke parents: diff changeset	527	buff1 = pbuff1 + doff;
90ce3da70b43 Initial load duke parents: diff changeset	528	buff2 = pbuff2 + doff;
90ce3da70b43 Initial load duke parents: diff changeset	529	vis_write_gsr(gsr_scale + off);
90ce3da70b43 Initial load duke parents: diff changeset	530
90ce3da70b43 Initial load duke parents: diff changeset	531	if (ik == ik_last) continue;
90ce3da70b43 Initial load duke parents: diff changeset	532	/*if (!ik_last) {
90ce3da70b43 Initial load duke parents: diff changeset	533	if ((off & 3) \|\| (ik == (KSIZE - 1))) {
90ce3da70b43 Initial load duke parents: diff changeset	534	ik_last = ik;
90ce3da70b43 Initial load duke parents: diff changeset	535	continue;
90ce3da70b43 Initial load duke parents: diff changeset	536	}
90ce3da70b43 Initial load duke parents: diff changeset	537	}*/
90ce3da70b43 Initial load duke parents: diff changeset	538
90ce3da70b43 Initial load duke parents: diff changeset	539	if (off == 0) {
90ce3da70b43 Initial load duke parents: diff changeset	540	#pragma pipeloop(0)
90ce3da70b43 Initial load duke parents: diff changeset	541	for (i = 0; i < (xsize + 7)/8; i++) {
90ce3da70b43 Initial load duke parents: diff changeset	542	s0 = buff0[i];
90ce3da70b43 Initial load duke parents: diff changeset	543	s1 = buff1[i];
90ce3da70b43 Initial load duke parents: diff changeset	544	s2 = buff2[i];
90ce3da70b43 Initial load duke parents: diff changeset	545
90ce3da70b43 Initial load duke parents: diff changeset	546	d00 = vis_fmul8x16au(vis_read_hi(s0), k0);
90ce3da70b43 Initial load duke parents: diff changeset	547	d01 = vis_fmul8x16au(vis_read_lo(s0), k0);
90ce3da70b43 Initial load duke parents: diff changeset	548	d10 = vis_fmul8x16au(vis_read_hi(s1), k1);
90ce3da70b43 Initial load duke parents: diff changeset	549	d11 = vis_fmul8x16au(vis_read_lo(s1), k1);
90ce3da70b43 Initial load duke parents: diff changeset	550	d20 = vis_fmul8x16au(vis_read_hi(s2), k2);
90ce3da70b43 Initial load duke parents: diff changeset	551	d21 = vis_fmul8x16au(vis_read_lo(s2), k2);
90ce3da70b43 Initial load duke parents: diff changeset	552
90ce3da70b43 Initial load duke parents: diff changeset	553	d0 = buffd[2*i];
90ce3da70b43 Initial load duke parents: diff changeset	554	d1 = buffd[2*i + 1];
90ce3da70b43 Initial load duke parents: diff changeset	555	d0 = vis_fpadd16(d00, d0);
90ce3da70b43 Initial load duke parents: diff changeset	556	d0 = vis_fpadd16(d10, d0);
90ce3da70b43 Initial load duke parents: diff changeset	557	d0 = vis_fpadd16(d20, d0);
90ce3da70b43 Initial load duke parents: diff changeset	558	d1 = vis_fpadd16(d01, d1);
90ce3da70b43 Initial load duke parents: diff changeset	559	d1 = vis_fpadd16(d11, d1);
90ce3da70b43 Initial load duke parents: diff changeset	560	d1 = vis_fpadd16(d21, d1);
90ce3da70b43 Initial load duke parents: diff changeset	561	buffd[2*i] = d0;
90ce3da70b43 Initial load duke parents: diff changeset	562	buffd[2*i + 1] = d1;
90ce3da70b43 Initial load duke parents: diff changeset	563	}
90ce3da70b43 Initial load duke parents: diff changeset	564
90ce3da70b43 Initial load duke parents: diff changeset	565	} else if (off == 4) {
90ce3da70b43 Initial load duke parents: diff changeset	566	s01 = buff0[0];
90ce3da70b43 Initial load duke parents: diff changeset	567	s11 = buff1[0];
90ce3da70b43 Initial load duke parents: diff changeset	568	s21 = buff2[0];
90ce3da70b43 Initial load duke parents: diff changeset	569	#pragma pipeloop(0)
90ce3da70b43 Initial load duke parents: diff changeset	570	for (i = 0; i < (xsize + 7)/8; i++) {
90ce3da70b43 Initial load duke parents: diff changeset	571	s00 = s01;
90ce3da70b43 Initial load duke parents: diff changeset	572	s10 = s11;
90ce3da70b43 Initial load duke parents: diff changeset	573	s20 = s21;
90ce3da70b43 Initial load duke parents: diff changeset	574	s01 = buff0[i + 1];
90ce3da70b43 Initial load duke parents: diff changeset	575	s11 = buff1[i + 1];
90ce3da70b43 Initial load duke parents: diff changeset	576	s21 = buff2[i + 1];
90ce3da70b43 Initial load duke parents: diff changeset	577
90ce3da70b43 Initial load duke parents: diff changeset	578	d00 = vis_fmul8x16au(vis_read_lo(s00), k0);
90ce3da70b43 Initial load duke parents: diff changeset	579	d01 = vis_fmul8x16au(vis_read_hi(s01), k0);
90ce3da70b43 Initial load duke parents: diff changeset	580	d10 = vis_fmul8x16au(vis_read_lo(s10), k1);
90ce3da70b43 Initial load duke parents: diff changeset	581	d11 = vis_fmul8x16au(vis_read_hi(s11), k1);
90ce3da70b43 Initial load duke parents: diff changeset	582	d20 = vis_fmul8x16au(vis_read_lo(s20), k2);
90ce3da70b43 Initial load duke parents: diff changeset	583	d21 = vis_fmul8x16au(vis_read_hi(s21), k2);
90ce3da70b43 Initial load duke parents: diff changeset	584
90ce3da70b43 Initial load duke parents: diff changeset	585	d0 = buffd[2*i];
90ce3da70b43 Initial load duke parents: diff changeset	586	d1 = buffd[2*i + 1];
90ce3da70b43 Initial load duke parents: diff changeset	587	d0 = vis_fpadd16(d00, d0);
90ce3da70b43 Initial load duke parents: diff changeset	588	d0 = vis_fpadd16(d10, d0);
90ce3da70b43 Initial load duke parents: diff changeset	589	d0 = vis_fpadd16(d20, d0);
90ce3da70b43 Initial load duke parents: diff changeset	590	d1 = vis_fpadd16(d01, d1);
90ce3da70b43 Initial load duke parents: diff changeset	591	d1 = vis_fpadd16(d11, d1);
90ce3da70b43 Initial load duke parents: diff changeset	592	d1 = vis_fpadd16(d21, d1);
90ce3da70b43 Initial load duke parents: diff changeset	593	buffd[2*i] = d0;
90ce3da70b43 Initial load duke parents: diff changeset	594	buffd[2*i + 1] = d1;
90ce3da70b43 Initial load duke parents: diff changeset	595	}
90ce3da70b43 Initial load duke parents: diff changeset	596
90ce3da70b43 Initial load duke parents: diff changeset	597	} else {
90ce3da70b43 Initial load duke parents: diff changeset	598	s01 = buff0[0];
90ce3da70b43 Initial load duke parents: diff changeset	599	s11 = buff1[0];
90ce3da70b43 Initial load duke parents: diff changeset	600	s21 = buff2[0];
90ce3da70b43 Initial load duke parents: diff changeset	601	#pragma pipeloop(0)
90ce3da70b43 Initial load duke parents: diff changeset	602	for (i = 0; i < (xsize + 7)/8; i++) {
90ce3da70b43 Initial load duke parents: diff changeset	603	s00 = s01;
90ce3da70b43 Initial load duke parents: diff changeset	604	s10 = s11;
90ce3da70b43 Initial load duke parents: diff changeset	605	s20 = s21;
90ce3da70b43 Initial load duke parents: diff changeset	606	s01 = buff0[i + 1];
90ce3da70b43 Initial load duke parents: diff changeset	607	s11 = buff1[i + 1];
90ce3da70b43 Initial load duke parents: diff changeset	608	s21 = buff2[i + 1];
90ce3da70b43 Initial load duke parents: diff changeset	609	s0 = vis_faligndata(s00, s01);
90ce3da70b43 Initial load duke parents: diff changeset	610	s1 = vis_faligndata(s10, s11);
90ce3da70b43 Initial load duke parents: diff changeset	611	s2 = vis_faligndata(s20, s21);
90ce3da70b43 Initial load duke parents: diff changeset	612
90ce3da70b43 Initial load duke parents: diff changeset	613	d00 = vis_fmul8x16au(vis_read_hi(s0), k0);
90ce3da70b43 Initial load duke parents: diff changeset	614	d01 = vis_fmul8x16au(vis_read_lo(s0), k0);
90ce3da70b43 Initial load duke parents: diff changeset	615	d10 = vis_fmul8x16au(vis_read_hi(s1), k1);
90ce3da70b43 Initial load duke parents: diff changeset	616	d11 = vis_fmul8x16au(vis_read_lo(s1), k1);
90ce3da70b43 Initial load duke parents: diff changeset	617	d20 = vis_fmul8x16au(vis_read_hi(s2), k2);
90ce3da70b43 Initial load duke parents: diff changeset	618	d21 = vis_fmul8x16au(vis_read_lo(s2), k2);
90ce3da70b43 Initial load duke parents: diff changeset	619
90ce3da70b43 Initial load duke parents: diff changeset	620	d0 = buffd[2*i];
90ce3da70b43 Initial load duke parents: diff changeset	621	d1 = buffd[2*i + 1];
90ce3da70b43 Initial load duke parents: diff changeset	622	d0 = vis_fpadd16(d00, d0);
90ce3da70b43 Initial load duke parents: diff changeset	623	d0 = vis_fpadd16(d10, d0);
90ce3da70b43 Initial load duke parents: diff changeset	624	d0 = vis_fpadd16(d20, d0);
90ce3da70b43 Initial load duke parents: diff changeset	625	d1 = vis_fpadd16(d01, d1);
90ce3da70b43 Initial load duke parents: diff changeset	626	d1 = vis_fpadd16(d11, d1);
90ce3da70b43 Initial load duke parents: diff changeset	627	d1 = vis_fpadd16(d21, d1);
90ce3da70b43 Initial load duke parents: diff changeset	628	buffd[2*i] = d0;
90ce3da70b43 Initial load duke parents: diff changeset	629	buffd[2*i + 1] = d1;
90ce3da70b43 Initial load duke parents: diff changeset	630	}
90ce3da70b43 Initial load duke parents: diff changeset	631	}
90ce3da70b43 Initial load duke parents: diff changeset	632	}
90ce3da70b43 Initial load duke parents: diff changeset	633
90ce3da70b43 Initial load duke parents: diff changeset	634	k0 = pk[ik_last];
90ce3da70b43 Initial load duke parents: diff changeset	635	k1 = pk[ik_last + KSIZE];
90ce3da70b43 Initial load duke parents: diff changeset	636	k2 = pk[ik_last + 2*KSIZE];
90ce3da70b43 Initial load duke parents: diff changeset	637
90ce3da70b43 Initial load duke parents: diff changeset	638	off = ik_last*NCHAN;
90ce3da70b43 Initial load duke parents: diff changeset	639	doff = off/8;
90ce3da70b43 Initial load duke parents: diff changeset	640	off &= 7;
90ce3da70b43 Initial load duke parents: diff changeset	641	buff0 = pbuff0 + doff;
90ce3da70b43 Initial load duke parents: diff changeset	642	buff1 = pbuff1 + doff;
90ce3da70b43 Initial load duke parents: diff changeset	643	buff2 = pbuff2 + doff;
90ce3da70b43 Initial load duke parents: diff changeset	644	vis_write_gsr(gsr_scale + off);
90ce3da70b43 Initial load duke parents: diff changeset	645
90ce3da70b43 Initial load duke parents: diff changeset	646	#ifndef CONV_INDEX
90ce3da70b43 Initial load duke parents: diff changeset	647	dp = ((mlib_addr)dl & 7) ? buffe : (mlib_d64*)dl;
90ce3da70b43 Initial load duke parents: diff changeset	648
90ce3da70b43 Initial load duke parents: diff changeset	649	s01 = buff0[0];
90ce3da70b43 Initial load duke parents: diff changeset	650	s11 = buff1[0];
90ce3da70b43 Initial load duke parents: diff changeset	651	s21 = buff2[0];
90ce3da70b43 Initial load duke parents: diff changeset	652	#pragma pipeloop(0)
90ce3da70b43 Initial load duke parents: diff changeset	653	for (i = 0; i < xsize/8; i++) {
90ce3da70b43 Initial load duke parents: diff changeset	654	s00 = s01;
90ce3da70b43 Initial load duke parents: diff changeset	655	s10 = s11;
90ce3da70b43 Initial load duke parents: diff changeset	656	s20 = s21;
90ce3da70b43 Initial load duke parents: diff changeset	657	s01 = buff0[i + 1];
90ce3da70b43 Initial load duke parents: diff changeset	658	s11 = buff1[i + 1];
90ce3da70b43 Initial load duke parents: diff changeset	659	s21 = buff2[i + 1];
90ce3da70b43 Initial load duke parents: diff changeset	660	s0 = vis_faligndata(s00, s01);
90ce3da70b43 Initial load duke parents: diff changeset	661	s1 = vis_faligndata(s10, s11);
90ce3da70b43 Initial load duke parents: diff changeset	662	s2 = vis_faligndata(s20, s21);
90ce3da70b43 Initial load duke parents: diff changeset	663
90ce3da70b43 Initial load duke parents: diff changeset	664	d00 = vis_fmul8x16au(vis_read_hi(s0), k0);
90ce3da70b43 Initial load duke parents: diff changeset	665	d01 = vis_fmul8x16au(vis_read_lo(s0), k0);
90ce3da70b43 Initial load duke parents: diff changeset	666	d10 = vis_fmul8x16au(vis_read_hi(s1), k1);
90ce3da70b43 Initial load duke parents: diff changeset	667	d11 = vis_fmul8x16au(vis_read_lo(s1), k1);
90ce3da70b43 Initial load duke parents: diff changeset	668	d20 = vis_fmul8x16au(vis_read_hi(s2), k2);
90ce3da70b43 Initial load duke parents: diff changeset	669	d21 = vis_fmul8x16au(vis_read_lo(s2), k2);
90ce3da70b43 Initial load duke parents: diff changeset	670
90ce3da70b43 Initial load duke parents: diff changeset	671	d0 = buffd[2*i];
90ce3da70b43 Initial load duke parents: diff changeset	672	d1 = buffd[2*i + 1];
90ce3da70b43 Initial load duke parents: diff changeset	673	d0 = vis_fpadd16(d0, d00);
90ce3da70b43 Initial load duke parents: diff changeset	674	d0 = vis_fpadd16(d0, d10);
90ce3da70b43 Initial load duke parents: diff changeset	675	d0 = vis_fpadd16(d0, d20);
90ce3da70b43 Initial load duke parents: diff changeset	676	d1 = vis_fpadd16(d1, d01);
90ce3da70b43 Initial load duke parents: diff changeset	677	d1 = vis_fpadd16(d1, d11);
90ce3da70b43 Initial load duke parents: diff changeset	678	d1 = vis_fpadd16(d1, d21);
90ce3da70b43 Initial load duke parents: diff changeset	679
90ce3da70b43 Initial load duke parents: diff changeset	680	dd = vis_fpack16_pair(d0, d1);
90ce3da70b43 Initial load duke parents: diff changeset	681	dp[i] = dd;
90ce3da70b43 Initial load duke parents: diff changeset	682
90ce3da70b43 Initial load duke parents: diff changeset	683	buffd[2*i ] = drnd;
90ce3da70b43 Initial load duke parents: diff changeset	684	buffd[2*i + 1] = drnd;
90ce3da70b43 Initial load duke parents: diff changeset	685	}
90ce3da70b43 Initial load duke parents: diff changeset	686
90ce3da70b43 Initial load duke parents: diff changeset	687	if (emask) {
90ce3da70b43 Initial load duke parents: diff changeset	688	s00 = s01;
90ce3da70b43 Initial load duke parents: diff changeset	689	s10 = s11;
90ce3da70b43 Initial load duke parents: diff changeset	690	s20 = s21;
90ce3da70b43 Initial load duke parents: diff changeset	691	s01 = buff0[i + 1];
90ce3da70b43 Initial load duke parents: diff changeset	692	s11 = buff1[i + 1];
90ce3da70b43 Initial load duke parents: diff changeset	693	s21 = buff2[i + 1];
90ce3da70b43 Initial load duke parents: diff changeset	694	s0 = vis_faligndata(s00, s01);
90ce3da70b43 Initial load duke parents: diff changeset	695	s1 = vis_faligndata(s10, s11);
90ce3da70b43 Initial load duke parents: diff changeset	696	s2 = vis_faligndata(s20, s21);
90ce3da70b43 Initial load duke parents: diff changeset	697
90ce3da70b43 Initial load duke parents: diff changeset	698	d00 = vis_fmul8x16au(vis_read_hi(s0), k0);
90ce3da70b43 Initial load duke parents: diff changeset	699	d01 = vis_fmul8x16au(vis_read_lo(s0), k0);
90ce3da70b43 Initial load duke parents: diff changeset	700	d10 = vis_fmul8x16au(vis_read_hi(s1), k1);
90ce3da70b43 Initial load duke parents: diff changeset	701	d11 = vis_fmul8x16au(vis_read_lo(s1), k1);
90ce3da70b43 Initial load duke parents: diff changeset	702	d20 = vis_fmul8x16au(vis_read_hi(s2), k2);
90ce3da70b43 Initial load duke parents: diff changeset	703	d21 = vis_fmul8x16au(vis_read_lo(s2), k2);
90ce3da70b43 Initial load duke parents: diff changeset	704
90ce3da70b43 Initial load duke parents: diff changeset	705	d0 = buffd[2*i];
90ce3da70b43 Initial load duke parents: diff changeset	706	d1 = buffd[2*i + 1];
90ce3da70b43 Initial load duke parents: diff changeset	707	d0 = vis_fpadd16(d0, d00);
90ce3da70b43 Initial load duke parents: diff changeset	708	d0 = vis_fpadd16(d0, d10);
90ce3da70b43 Initial load duke parents: diff changeset	709	d0 = vis_fpadd16(d0, d20);
90ce3da70b43 Initial load duke parents: diff changeset	710	d1 = vis_fpadd16(d1, d01);
90ce3da70b43 Initial load duke parents: diff changeset	711	d1 = vis_fpadd16(d1, d11);
90ce3da70b43 Initial load duke parents: diff changeset	712	d1 = vis_fpadd16(d1, d21);
90ce3da70b43 Initial load duke parents: diff changeset	713
90ce3da70b43 Initial load duke parents: diff changeset	714	dd = vis_fpack16_pair(d0, d1);
90ce3da70b43 Initial load duke parents: diff changeset	715	vis_pst_8(dd, dp + i, emask);
90ce3da70b43 Initial load duke parents: diff changeset	716
90ce3da70b43 Initial load duke parents: diff changeset	717	buffd[2*i ] = drnd;
90ce3da70b43 Initial load duke parents: diff changeset	718	buffd[2*i + 1] = drnd;
90ce3da70b43 Initial load duke parents: diff changeset	719	}
90ce3da70b43 Initial load duke parents: diff changeset	720
90ce3da70b43 Initial load duke parents: diff changeset	721	if ((mlib_u8)dp != dl) mlib_ImageCopy_na((void)buffe, dl, xsize);
90ce3da70b43 Initial load duke parents: diff changeset	722
90ce3da70b43 Initial load duke parents: diff changeset	723	#else
90ce3da70b43 Initial load duke parents: diff changeset	724	vis_write_gsr(gsr_scale + 7);
90ce3da70b43 Initial load duke parents: diff changeset	725
90ce3da70b43 Initial load duke parents: diff changeset	726	#pragma pipeloop(0)
90ce3da70b43 Initial load duke parents: diff changeset	727	for (i = 0; i < dsize; i += 3) {
90ce3da70b43 Initial load duke parents: diff changeset	728	mlib_d64 d00, d01, d02, d03, d04, d05;
90ce3da70b43 Initial load duke parents: diff changeset	729	mlib_d64 d10, d11, d12, d13, d14, d15;
90ce3da70b43 Initial load duke parents: diff changeset	730	mlib_d64 d20, d21, d22, d23, d24, d25;
90ce3da70b43 Initial load duke parents: diff changeset	731	mlib_d64 d0, d1, d2, d3, d4, d5;
90ce3da70b43 Initial load duke parents: diff changeset	732	mlib_d64 s00 = buff0[i];
90ce3da70b43 Initial load duke parents: diff changeset	733	mlib_d64 s01 = buff0[i + 1];
90ce3da70b43 Initial load duke parents: diff changeset	734	mlib_d64 s02 = buff0[i + 2];
90ce3da70b43 Initial load duke parents: diff changeset	735	mlib_d64 s10 = buff1[i];
90ce3da70b43 Initial load duke parents: diff changeset	736	mlib_d64 s11 = buff1[i + 1];
90ce3da70b43 Initial load duke parents: diff changeset	737	mlib_d64 s12 = buff1[i + 2];
90ce3da70b43 Initial load duke parents: diff changeset	738	mlib_d64 s20 = buff2[i];
90ce3da70b43 Initial load duke parents: diff changeset	739	mlib_d64 s21 = buff2[i + 1];
90ce3da70b43 Initial load duke parents: diff changeset	740	mlib_d64 s22 = buff2[i + 2];
90ce3da70b43 Initial load duke parents: diff changeset	741
90ce3da70b43 Initial load duke parents: diff changeset	742	d00 = vis_fmul8x16au(vis_read_hi(s00), k0);
90ce3da70b43 Initial load duke parents: diff changeset	743	d01 = vis_fmul8x16au(vis_read_lo(s00), k0);
90ce3da70b43 Initial load duke parents: diff changeset	744	d02 = vis_fmul8x16au(vis_read_hi(s01), k0);
90ce3da70b43 Initial load duke parents: diff changeset	745	d03 = vis_fmul8x16au(vis_read_lo(s01), k0);
90ce3da70b43 Initial load duke parents: diff changeset	746	d04 = vis_fmul8x16au(vis_read_hi(s02), k0);
90ce3da70b43 Initial load duke parents: diff changeset	747	d05 = vis_fmul8x16au(vis_read_lo(s02), k0);
90ce3da70b43 Initial load duke parents: diff changeset	748	d10 = vis_fmul8x16au(vis_read_hi(s10), k1);
90ce3da70b43 Initial load duke parents: diff changeset	749	d11 = vis_fmul8x16au(vis_read_lo(s10), k1);
90ce3da70b43 Initial load duke parents: diff changeset	750	d12 = vis_fmul8x16au(vis_read_hi(s11), k1);
90ce3da70b43 Initial load duke parents: diff changeset	751	d13 = vis_fmul8x16au(vis_read_lo(s11), k1);
90ce3da70b43 Initial load duke parents: diff changeset	752	d14 = vis_fmul8x16au(vis_read_hi(s12), k1);
90ce3da70b43 Initial load duke parents: diff changeset	753	d15 = vis_fmul8x16au(vis_read_lo(s12), k1);
90ce3da70b43 Initial load duke parents: diff changeset	754	d20 = vis_fmul8x16au(vis_read_hi(s20), k2);
90ce3da70b43 Initial load duke parents: diff changeset	755	d21 = vis_fmul8x16au(vis_read_lo(s20), k2);
90ce3da70b43 Initial load duke parents: diff changeset	756	d22 = vis_fmul8x16au(vis_read_hi(s21), k2);
90ce3da70b43 Initial load duke parents: diff changeset	757	d23 = vis_fmul8x16au(vis_read_lo(s21), k2);
90ce3da70b43 Initial load duke parents: diff changeset	758	d24 = vis_fmul8x16au(vis_read_hi(s22), k2);
90ce3da70b43 Initial load duke parents: diff changeset	759	d25 = vis_fmul8x16au(vis_read_lo(s22), k2);
90ce3da70b43 Initial load duke parents: diff changeset	760
90ce3da70b43 Initial load duke parents: diff changeset	761	d0 = buffd[2*i];
90ce3da70b43 Initial load duke parents: diff changeset	762	d1 = buffd[2*i + 1];
90ce3da70b43 Initial load duke parents: diff changeset	763	d2 = buffd[2*i + 2];
90ce3da70b43 Initial load duke parents: diff changeset	764	d3 = buffd[2*i + 3];
90ce3da70b43 Initial load duke parents: diff changeset	765	d4 = buffd[2*i + 4];
90ce3da70b43 Initial load duke parents: diff changeset	766	d5 = buffd[2*i + 5];
90ce3da70b43 Initial load duke parents: diff changeset	767	d0 = vis_fpadd16(d0, d00);
90ce3da70b43 Initial load duke parents: diff changeset	768	d0 = vis_fpadd16(d0, d10);
90ce3da70b43 Initial load duke parents: diff changeset	769	d0 = vis_fpadd16(d0, d20);
90ce3da70b43 Initial load duke parents: diff changeset	770	d1 = vis_fpadd16(d1, d01);
90ce3da70b43 Initial load duke parents: diff changeset	771	d1 = vis_fpadd16(d1, d11);
90ce3da70b43 Initial load duke parents: diff changeset	772	d1 = vis_fpadd16(d1, d21);
90ce3da70b43 Initial load duke parents: diff changeset	773	d2 = vis_fpadd16(d2, d02);
90ce3da70b43 Initial load duke parents: diff changeset	774	d2 = vis_fpadd16(d2, d12);
90ce3da70b43 Initial load duke parents: diff changeset	775	d2 = vis_fpadd16(d2, d22);
90ce3da70b43 Initial load duke parents: diff changeset	776	d3 = vis_fpadd16(d3, d03);
90ce3da70b43 Initial load duke parents: diff changeset	777	d3 = vis_fpadd16(d3, d13);
90ce3da70b43 Initial load duke parents: diff changeset	778	d3 = vis_fpadd16(d3, d23);
90ce3da70b43 Initial load duke parents: diff changeset	779	d4 = vis_fpadd16(d4, d04);
90ce3da70b43 Initial load duke parents: diff changeset	780	d4 = vis_fpadd16(d4, d14);
90ce3da70b43 Initial load duke parents: diff changeset	781	d4 = vis_fpadd16(d4, d24);
90ce3da70b43 Initial load duke parents: diff changeset	782	d5 = vis_fpadd16(d5, d05);
90ce3da70b43 Initial load duke parents: diff changeset	783	d5 = vis_fpadd16(d5, d15);
90ce3da70b43 Initial load duke parents: diff changeset	784	d5 = vis_fpadd16(d5, d25);
90ce3da70b43 Initial load duke parents: diff changeset	785
90ce3da70b43 Initial load duke parents: diff changeset	786	buffe[i ] = vis_fpack16_pair(d0, d1);
90ce3da70b43 Initial load duke parents: diff changeset	787	buffe[i + 1] = vis_fpack16_pair(d2, d3);
90ce3da70b43 Initial load duke parents: diff changeset	788	buffe[i + 2] = vis_fpack16_pair(d4, d5);
90ce3da70b43 Initial load duke parents: diff changeset	789
90ce3da70b43 Initial load duke parents: diff changeset	790	buffd[2*i ] = drnd;
90ce3da70b43 Initial load duke parents: diff changeset	791	buffd[2*i + 1] = drnd;
90ce3da70b43 Initial load duke parents: diff changeset	792	buffd[2*i + 2] = drnd;
90ce3da70b43 Initial load duke parents: diff changeset	793	buffd[2*i + 3] = drnd;
90ce3da70b43 Initial load duke parents: diff changeset	794	buffd[2*i + 4] = drnd;
90ce3da70b43 Initial load duke parents: diff changeset	795	buffd[2*i + 5] = drnd;
90ce3da70b43 Initial load duke parents: diff changeset	796
90ce3da70b43 Initial load duke parents: diff changeset	797	LOAD_SRC();
90ce3da70b43 Initial load duke parents: diff changeset	798	}
90ce3da70b43 Initial load duke parents: diff changeset	799
90ce3da70b43 Initial load duke parents: diff changeset	800	mlib_ImageColorTrue2IndexLine_U8_S16_3((void*)buffe, dl, wid, colormap);
90ce3da70b43 Initial load duke parents: diff changeset	801	#endif /* CONV_INDEX */
90ce3da70b43 Initial load duke parents: diff changeset	802
90ce3da70b43 Initial load duke parents: diff changeset	803	sl += sll;
90ce3da70b43 Initial load duke parents: diff changeset	804	dl += dll;
90ce3da70b43 Initial load duke parents: diff changeset	805
90ce3da70b43 Initial load duke parents: diff changeset	806	buff_ind++;
90ce3da70b43 Initial load duke parents: diff changeset	807	if (buff_ind >= (KSIZE + 1)) buff_ind = 0;
90ce3da70b43 Initial load duke parents: diff changeset	808	}
90ce3da70b43 Initial load duke parents: diff changeset	809
90ce3da70b43 Initial load duke parents: diff changeset	810	mlib_free(pbuff);
90ce3da70b43 Initial load duke parents: diff changeset	811
90ce3da70b43 Initial load duke parents: diff changeset	812	return MLIB_SUCCESS;
90ce3da70b43 Initial load duke parents: diff changeset	813	}
90ce3da70b43 Initial load duke parents: diff changeset	814
90ce3da70b43 Initial load duke parents: diff changeset	815	/***************************************************************/
90ce3da70b43 Initial load duke parents: diff changeset	816
90ce3da70b43 Initial load duke parents: diff changeset	817	#undef KSIZE
90ce3da70b43 Initial load duke parents: diff changeset	818	#define MAX_N 11
90ce3da70b43 Initial load duke parents: diff changeset	819
90ce3da70b43 Initial load duke parents: diff changeset	820	#ifdef CONV_INDEX
90ce3da70b43 Initial load duke parents: diff changeset	821
90ce3da70b43 Initial load duke parents: diff changeset	822	mlib_status mlib_convMxN_Index3_8_16nw(mlib_image *dst,
90ce3da70b43 Initial load duke parents: diff changeset	823	mlib_image *src,
90ce3da70b43 Initial load duke parents: diff changeset	824	mlib_s32 m,
90ce3da70b43 Initial load duke parents: diff changeset	825	mlib_s32 n,
90ce3da70b43 Initial load duke parents: diff changeset	826	mlib_s32 dm,
90ce3da70b43 Initial load duke parents: diff changeset	827	mlib_s32 dn,
90ce3da70b43 Initial load duke parents: diff changeset	828	mlib_s32 *kern,
90ce3da70b43 Initial load duke parents: diff changeset	829	mlib_s32 scale,
90ce3da70b43 Initial load duke parents: diff changeset	830	void *colormap)
90ce3da70b43 Initial load duke parents: diff changeset	831
90ce3da70b43 Initial load duke parents: diff changeset	832	#else
90ce3da70b43 Initial load duke parents: diff changeset	833
90ce3da70b43 Initial load duke parents: diff changeset	834	mlib_status mlib_convMxN_8nw_f(mlib_image *dst,
90ce3da70b43 Initial load duke parents: diff changeset	835	mlib_image *src,
90ce3da70b43 Initial load duke parents: diff changeset	836	mlib_s32 m,
90ce3da70b43 Initial load duke parents: diff changeset	837	mlib_s32 n,
90ce3da70b43 Initial load duke parents: diff changeset	838	mlib_s32 dm,
90ce3da70b43 Initial load duke parents: diff changeset	839	mlib_s32 dn,
90ce3da70b43 Initial load duke parents: diff changeset	840	mlib_s32 *kern,
90ce3da70b43 Initial load duke parents: diff changeset	841	mlib_s32 scale)
90ce3da70b43 Initial load duke parents: diff changeset	842
90ce3da70b43 Initial load duke parents: diff changeset	843	#endif
90ce3da70b43 Initial load duke parents: diff changeset	844	{
90ce3da70b43 Initial load duke parents: diff changeset	845	mlib_d64 buffs_local[3(MAX_N + 1)], buffs = buffs_local, buff;
90ce3da70b43 Initial load duke parents: diff changeset	846	mlib_d64 buff0, buff1, buff2, buff3, buffn, buffd, *buffe;
90ce3da70b43 Initial load duke parents: diff changeset	847	mlib_d64 s00, s01, s10, s11, s20, s21, s30, s31, s0, s1, s2, s3;
90ce3da70b43 Initial load duke parents: diff changeset	848	mlib_d64 d00, d01, d10, d11, d20, d21, d30, d31;
90ce3da70b43 Initial load duke parents: diff changeset	849	mlib_d64 dd, d0, d1;
90ce3da70b43 Initial load duke parents: diff changeset	850	mlib_s32 ik, jk, ik_last, jk_size, coff, off, doff;
90ce3da70b43 Initial load duke parents: diff changeset	851	DEF_VARS;
90ce3da70b43 Initial load duke parents: diff changeset	852	DEF_EXTRA_VARS;
90ce3da70b43 Initial load duke parents: diff changeset	853
90ce3da70b43 Initial load duke parents: diff changeset	854	if (n > MAX_N) {
90ce3da70b43 Initial load duke parents: diff changeset	855	buffs = mlib_malloc(3(n + 1)sizeof(mlib_d64*));
90ce3da70b43 Initial load duke parents: diff changeset	856	if (buffs == NULL) return MLIB_FAILURE;
90ce3da70b43 Initial load duke parents: diff changeset	857	}
90ce3da70b43 Initial load duke parents: diff changeset	858
90ce3da70b43 Initial load duke parents: diff changeset	859	buff = buffs + 2*(n + 1);
90ce3da70b43 Initial load duke parents: diff changeset	860
90ce3da70b43 Initial load duke parents: diff changeset	861	sl = adr_src;
90ce3da70b43 Initial load duke parents: diff changeset	862	#ifdef CONV_INDEX
90ce3da70b43 Initial load duke parents: diff changeset	863	dl = adr_dst + dn*dll + dm;
90ce3da70b43 Initial load duke parents: diff changeset	864	#else
90ce3da70b43 Initial load duke parents: diff changeset	865	dl = adr_dst + dndll + dmNCHAN;
90ce3da70b43 Initial load duke parents: diff changeset	866	#endif
90ce3da70b43 Initial load duke parents: diff changeset	867
90ce3da70b43 Initial load duke parents: diff changeset	868	ssize = NCHAN*wid;
90ce3da70b43 Initial load duke parents: diff changeset	869	dsize = (ssize + 7)/8;
90ce3da70b43 Initial load duke parents: diff changeset	870	esize = dsize + 4;
90ce3da70b43 Initial load duke parents: diff changeset	871	pbuff = mlib_malloc((n + 4)esizesizeof(mlib_d64));
90ce3da70b43 Initial load duke parents: diff changeset	872	if (pbuff == NULL) {
90ce3da70b43 Initial load duke parents: diff changeset	873	if (buffs != buffs_local) mlib_free(buffs);
90ce3da70b43 Initial load duke parents: diff changeset	874	return MLIB_FAILURE;
90ce3da70b43 Initial load duke parents: diff changeset	875	}
90ce3da70b43 Initial load duke parents: diff changeset	876
90ce3da70b43 Initial load duke parents: diff changeset	877	for (i = 0; i < (n + 1); i++) buffs[i] = pbuff + i*esize;
90ce3da70b43 Initial load duke parents: diff changeset	878	for (i = 0; i < (n + 1); i++) buffs[(n + 1) + i] = buffs[i];
90ce3da70b43 Initial load duke parents: diff changeset	879	buffd = buffs[n] + esize;
90ce3da70b43 Initial load duke parents: diff changeset	880	buffe = buffd + 2*esize;
90ce3da70b43 Initial load duke parents: diff changeset	881
90ce3da70b43 Initial load duke parents: diff changeset	882	wid -= (m - 1);
90ce3da70b43 Initial load duke parents: diff changeset	883	hgt -= (n - 1);
90ce3da70b43 Initial load duke parents: diff changeset	884	xsize = ssize - NCHAN*(m - 1);
90ce3da70b43 Initial load duke parents: diff changeset	885	emask = (0xFF00 >> (xsize & 7)) & 0xFF;
90ce3da70b43 Initial load duke parents: diff changeset	886
90ce3da70b43 Initial load duke parents: diff changeset	887	vis_write_gsr(gsr_scale + 7);
90ce3da70b43 Initial load duke parents: diff changeset	888
90ce3da70b43 Initial load duke parents: diff changeset	889	for (l = 0; l < n; l++) {
90ce3da70b43 Initial load duke parents: diff changeset	890	mlib_d64 *buffn = buffs[l];
90ce3da70b43 Initial load duke parents: diff changeset	891	sp = sl + l*sll;
90ce3da70b43 Initial load duke parents: diff changeset	892
90ce3da70b43 Initial load duke parents: diff changeset	893	#ifndef CONV_INDEX
90ce3da70b43 Initial load duke parents: diff changeset	894	if ((mlib_addr)sp & 7) mlib_ImageCopy_na((void)sp, (void)buffn, ssize);
90ce3da70b43 Initial load duke parents: diff changeset	895	#else
90ce3da70b43 Initial load duke parents: diff changeset	896	#pragma pipeloop(0)
90ce3da70b43 Initial load duke parents: diff changeset	897	for (i = 0; i < dsize; i += 3) {
90ce3da70b43 Initial load duke parents: diff changeset	898	LOAD_SRC();
90ce3da70b43 Initial load duke parents: diff changeset	899	}
90ce3da70b43 Initial load duke parents: diff changeset	900	#endif /* CONV_INDEX */
90ce3da70b43 Initial load duke parents: diff changeset	901	}
90ce3da70b43 Initial load duke parents: diff changeset	902
90ce3da70b43 Initial load duke parents: diff changeset	903	/* init buffer */
90ce3da70b43 Initial load duke parents: diff changeset	904	#pragma pipeloop(0)
90ce3da70b43 Initial load duke parents: diff changeset	905	for (i = 0; i < (xsize + 7)/8; i++) {
90ce3da70b43 Initial load duke parents: diff changeset	906	buffd[2*i ] = drnd;
90ce3da70b43 Initial load duke parents: diff changeset	907	buffd[2*i + 1] = drnd;
90ce3da70b43 Initial load duke parents: diff changeset	908	}
90ce3da70b43 Initial load duke parents: diff changeset	909
90ce3da70b43 Initial load duke parents: diff changeset	910	for (j = 0; j < hgt; j++) {
90ce3da70b43 Initial load duke parents: diff changeset	911	mlib_d64 **buffc = buffs + buff_ind;
90ce3da70b43 Initial load duke parents: diff changeset	912	mlib_f32 *pk = karr, k0, k1, k2, k3;
90ce3da70b43 Initial load duke parents: diff changeset	913	sp = sl + n*sll;
90ce3da70b43 Initial load duke parents: diff changeset	914
90ce3da70b43 Initial load duke parents: diff changeset	915	for (l = 0; l < n; l++) {
90ce3da70b43 Initial load duke parents: diff changeset	916	buff[l] = buffc[l];
90ce3da70b43 Initial load duke parents: diff changeset	917	}
90ce3da70b43 Initial load duke parents: diff changeset	918	buffn = buffc[n];
90ce3da70b43 Initial load duke parents: diff changeset	919
90ce3da70b43 Initial load duke parents: diff changeset	920	#ifndef CONV_INDEX
90ce3da70b43 Initial load duke parents: diff changeset	921	for (l = 0; l < n; l++) {
90ce3da70b43 Initial load duke parents: diff changeset	922	if ((((mlib_addr)(sl + lsll)) & 7) == 0) buff[l] = (mlib_d64)(sl + l*sll);
90ce3da70b43 Initial load duke parents: diff changeset	923	}
90ce3da70b43 Initial load duke parents: diff changeset	924	if ((mlib_addr)sp & 7) mlib_ImageCopy_na((void)sp, (void)buffn, ssize);
90ce3da70b43 Initial load duke parents: diff changeset	925	#endif
90ce3da70b43 Initial load duke parents: diff changeset	926
90ce3da70b43 Initial load duke parents: diff changeset	927	#ifdef CONV_INDEX
90ce3da70b43 Initial load duke parents: diff changeset	928	ik_last = 0;
90ce3da70b43 Initial load duke parents: diff changeset	929	#else
90ce3da70b43 Initial load duke parents: diff changeset	930	ik_last = (m - 1);
90ce3da70b43 Initial load duke parents: diff changeset	931	#endif
90ce3da70b43 Initial load duke parents: diff changeset	932
90ce3da70b43 Initial load duke parents: diff changeset	933	for (jk = 0; jk < n; jk += jk_size) {
90ce3da70b43 Initial load duke parents: diff changeset	934	jk_size = n - jk;
90ce3da70b43 Initial load duke parents: diff changeset	935	#ifdef CONV_INDEX
90ce3da70b43 Initial load duke parents: diff changeset	936	if (jk_size >= 5) jk_size = 3;
90ce3da70b43 Initial load duke parents: diff changeset	937	if (jk_size == 4) jk_size = 2;
90ce3da70b43 Initial load duke parents: diff changeset	938	#else
90ce3da70b43 Initial load duke parents: diff changeset	939	if (jk_size >= 6) jk_size = 4;
90ce3da70b43 Initial load duke parents: diff changeset	940	if (jk_size == 5) jk_size = 3;
90ce3da70b43 Initial load duke parents: diff changeset	941	#endif
90ce3da70b43 Initial load duke parents: diff changeset	942	coff = 0;
90ce3da70b43 Initial load duke parents: diff changeset	943
90ce3da70b43 Initial load duke parents: diff changeset	944	if (jk_size == 2) {
90ce3da70b43 Initial load duke parents: diff changeset	945
90ce3da70b43 Initial load duke parents: diff changeset	946	for (ik = 0; ik < m; ik++, coff += NCHAN) {
90ce3da70b43 Initial load duke parents: diff changeset	947	if (!jk && ik == ik_last) continue;
90ce3da70b43 Initial load duke parents: diff changeset	948
90ce3da70b43 Initial load duke parents: diff changeset	949	k0 = pk[ik];
90ce3da70b43 Initial load duke parents: diff changeset	950	k1 = pk[ik + m];
90ce3da70b43 Initial load duke parents: diff changeset	951
90ce3da70b43 Initial load duke parents: diff changeset	952	doff = coff/8;
90ce3da70b43 Initial load duke parents: diff changeset	953	buff0 = buff[jk ] + doff;
90ce3da70b43 Initial load duke parents: diff changeset	954	buff1 = buff[jk + 1] + doff;
90ce3da70b43 Initial load duke parents: diff changeset	955
90ce3da70b43 Initial load duke parents: diff changeset	956	off = coff & 7;
90ce3da70b43 Initial load duke parents: diff changeset	957	vis_write_gsr(gsr_scale + off);
90ce3da70b43 Initial load duke parents: diff changeset	958
90ce3da70b43 Initial load duke parents: diff changeset	959	s01 = buff0[0];
90ce3da70b43 Initial load duke parents: diff changeset	960	s11 = buff1[0];
90ce3da70b43 Initial load duke parents: diff changeset	961	#pragma pipeloop(0)
90ce3da70b43 Initial load duke parents: diff changeset	962	for (i = 0; i < (xsize + 7)/8; i++) {
90ce3da70b43 Initial load duke parents: diff changeset	963	s00 = s01;
90ce3da70b43 Initial load duke parents: diff changeset	964	s10 = s11;
90ce3da70b43 Initial load duke parents: diff changeset	965	s01 = buff0[i + 1];
90ce3da70b43 Initial load duke parents: diff changeset	966	s11 = buff1[i + 1];
90ce3da70b43 Initial load duke parents: diff changeset	967	s0 = vis_faligndata(s00, s01);
90ce3da70b43 Initial load duke parents: diff changeset	968	s1 = vis_faligndata(s10, s11);
90ce3da70b43 Initial load duke parents: diff changeset	969
90ce3da70b43 Initial load duke parents: diff changeset	970	d00 = vis_fmul8x16au(vis_read_hi(s0), k0);
90ce3da70b43 Initial load duke parents: diff changeset	971	d01 = vis_fmul8x16au(vis_read_lo(s0), k0);
90ce3da70b43 Initial load duke parents: diff changeset	972	d10 = vis_fmul8x16au(vis_read_hi(s1), k1);
90ce3da70b43 Initial load duke parents: diff changeset	973	d11 = vis_fmul8x16au(vis_read_lo(s1), k1);
90ce3da70b43 Initial load duke parents: diff changeset	974
90ce3da70b43 Initial load duke parents: diff changeset	975	d0 = buffd[2*i];
90ce3da70b43 Initial load duke parents: diff changeset	976	d1 = buffd[2*i + 1];
90ce3da70b43 Initial load duke parents: diff changeset	977	d0 = vis_fpadd16(d00, d0);
90ce3da70b43 Initial load duke parents: diff changeset	978	d0 = vis_fpadd16(d10, d0);
90ce3da70b43 Initial load duke parents: diff changeset	979	d1 = vis_fpadd16(d01, d1);
90ce3da70b43 Initial load duke parents: diff changeset	980	d1 = vis_fpadd16(d11, d1);
90ce3da70b43 Initial load duke parents: diff changeset	981	buffd[2*i] = d0;
90ce3da70b43 Initial load duke parents: diff changeset	982	buffd[2*i + 1] = d1;
90ce3da70b43 Initial load duke parents: diff changeset	983	}
90ce3da70b43 Initial load duke parents: diff changeset	984
90ce3da70b43 Initial load duke parents: diff changeset	985	}
90ce3da70b43 Initial load duke parents: diff changeset	986
90ce3da70b43 Initial load duke parents: diff changeset	987	pk += 2*m;
90ce3da70b43 Initial load duke parents: diff changeset	988
90ce3da70b43 Initial load duke parents: diff changeset	989	} else if (jk_size == 3) {
90ce3da70b43 Initial load duke parents: diff changeset	990
90ce3da70b43 Initial load duke parents: diff changeset	991	for (ik = 0; ik < m; ik++, coff += NCHAN) {
90ce3da70b43 Initial load duke parents: diff changeset	992	if (!jk && ik == ik_last) continue;
90ce3da70b43 Initial load duke parents: diff changeset	993
90ce3da70b43 Initial load duke parents: diff changeset	994	k0 = pk[ik];
90ce3da70b43 Initial load duke parents: diff changeset	995	k1 = pk[ik + m];
90ce3da70b43 Initial load duke parents: diff changeset	996	k2 = pk[ik + 2*m];
90ce3da70b43 Initial load duke parents: diff changeset	997
90ce3da70b43 Initial load duke parents: diff changeset	998	doff = coff/8;
90ce3da70b43 Initial load duke parents: diff changeset	999	buff0 = buff[jk ] + doff;
90ce3da70b43 Initial load duke parents: diff changeset	1000	buff1 = buff[jk + 1] + doff;
90ce3da70b43 Initial load duke parents: diff changeset	1001	buff2 = buff[jk + 2] + doff;
90ce3da70b43 Initial load duke parents: diff changeset	1002
90ce3da70b43 Initial load duke parents: diff changeset	1003	off = coff & 7;
90ce3da70b43 Initial load duke parents: diff changeset	1004	vis_write_gsr(gsr_scale + off);
90ce3da70b43 Initial load duke parents: diff changeset	1005
90ce3da70b43 Initial load duke parents: diff changeset	1006	if (off == 0) {
90ce3da70b43 Initial load duke parents: diff changeset	1007	#pragma pipeloop(0)
90ce3da70b43 Initial load duke parents: diff changeset	1008	for (i = 0; i < (xsize + 7)/8; i++) {
90ce3da70b43 Initial load duke parents: diff changeset	1009	d0 = buffd[2*i];
90ce3da70b43 Initial load duke parents: diff changeset	1010	d1 = buffd[2*i + 1];
90ce3da70b43 Initial load duke parents: diff changeset	1011
90ce3da70b43 Initial load duke parents: diff changeset	1012	s0 = buff0[i];
90ce3da70b43 Initial load duke parents: diff changeset	1013	s1 = buff1[i];
90ce3da70b43 Initial load duke parents: diff changeset	1014	s2 = buff2[i];
90ce3da70b43 Initial load duke parents: diff changeset	1015
90ce3da70b43 Initial load duke parents: diff changeset	1016	d00 = vis_fmul8x16au(vis_read_hi(s0), k0);
90ce3da70b43 Initial load duke parents: diff changeset	1017	d01 = vis_fmul8x16au(vis_read_lo(s0), k0);
90ce3da70b43 Initial load duke parents: diff changeset	1018	d10 = vis_fmul8x16au(vis_read_hi(s1), k1);
90ce3da70b43 Initial load duke parents: diff changeset	1019	d11 = vis_fmul8x16au(vis_read_lo(s1), k1);
90ce3da70b43 Initial load duke parents: diff changeset	1020	d20 = vis_fmul8x16au(vis_read_hi(s2), k2);
90ce3da70b43 Initial load duke parents: diff changeset	1021	d21 = vis_fmul8x16au(vis_read_lo(s2), k2);
90ce3da70b43 Initial load duke parents: diff changeset	1022
90ce3da70b43 Initial load duke parents: diff changeset	1023	d00 = vis_fpadd16(d00, d10);
90ce3da70b43 Initial load duke parents: diff changeset	1024	d0 = vis_fpadd16(d20, d0);
90ce3da70b43 Initial load duke parents: diff changeset	1025	d0 = vis_fpadd16(d00, d0);
90ce3da70b43 Initial load duke parents: diff changeset	1026	d01 = vis_fpadd16(d01, d11);
90ce3da70b43 Initial load duke parents: diff changeset	1027	d1 = vis_fpadd16(d21, d1);
90ce3da70b43 Initial load duke parents: diff changeset	1028	d1 = vis_fpadd16(d01, d1);
90ce3da70b43 Initial load duke parents: diff changeset	1029	buffd[2*i] = d0;
90ce3da70b43 Initial load duke parents: diff changeset	1030	buffd[2*i + 1] = d1;
90ce3da70b43 Initial load duke parents: diff changeset	1031	}
90ce3da70b43 Initial load duke parents: diff changeset	1032
90ce3da70b43 Initial load duke parents: diff changeset	1033	} else if (off == 4) {
90ce3da70b43 Initial load duke parents: diff changeset	1034	s01 = buff0[0];
90ce3da70b43 Initial load duke parents: diff changeset	1035	s11 = buff1[0];
90ce3da70b43 Initial load duke parents: diff changeset	1036	s21 = buff2[0];
90ce3da70b43 Initial load duke parents: diff changeset	1037	#pragma pipeloop(0)
90ce3da70b43 Initial load duke parents: diff changeset	1038	for (i = 0; i < (xsize + 7)/8; i++) {
90ce3da70b43 Initial load duke parents: diff changeset	1039	d0 = buffd[2*i];
90ce3da70b43 Initial load duke parents: diff changeset	1040	d1 = buffd[2*i + 1];
90ce3da70b43 Initial load duke parents: diff changeset	1041
90ce3da70b43 Initial load duke parents: diff changeset	1042	s00 = s01;
90ce3da70b43 Initial load duke parents: diff changeset	1043	s10 = s11;
90ce3da70b43 Initial load duke parents: diff changeset	1044	s20 = s21;
90ce3da70b43 Initial load duke parents: diff changeset	1045	s01 = buff0[i + 1];
90ce3da70b43 Initial load duke parents: diff changeset	1046	s11 = buff1[i + 1];
90ce3da70b43 Initial load duke parents: diff changeset	1047	s21 = buff2[i + 1];
90ce3da70b43 Initial load duke parents: diff changeset	1048
90ce3da70b43 Initial load duke parents: diff changeset	1049	d00 = vis_fmul8x16au(vis_read_lo(s00), k0);
90ce3da70b43 Initial load duke parents: diff changeset	1050	d01 = vis_fmul8x16au(vis_read_hi(s01), k0);
90ce3da70b43 Initial load duke parents: diff changeset	1051	d10 = vis_fmul8x16au(vis_read_lo(s10), k1);
90ce3da70b43 Initial load duke parents: diff changeset	1052	d11 = vis_fmul8x16au(vis_read_hi(s11), k1);
90ce3da70b43 Initial load duke parents: diff changeset	1053	d20 = vis_fmul8x16au(vis_read_lo(s20), k2);
90ce3da70b43 Initial load duke parents: diff changeset	1054	d21 = vis_fmul8x16au(vis_read_hi(s21), k2);
90ce3da70b43 Initial load duke parents: diff changeset	1055
90ce3da70b43 Initial load duke parents: diff changeset	1056	d00 = vis_fpadd16(d00, d10);
90ce3da70b43 Initial load duke parents: diff changeset	1057	d0 = vis_fpadd16(d20, d0);
90ce3da70b43 Initial load duke parents: diff changeset	1058	d0 = vis_fpadd16(d00, d0);
90ce3da70b43 Initial load duke parents: diff changeset	1059	d01 = vis_fpadd16(d01, d11);
90ce3da70b43 Initial load duke parents: diff changeset	1060	d1 = vis_fpadd16(d21, d1);
90ce3da70b43 Initial load duke parents: diff changeset	1061	d1 = vis_fpadd16(d01, d1);
90ce3da70b43 Initial load duke parents: diff changeset	1062	buffd[2*i] = d0;
90ce3da70b43 Initial load duke parents: diff changeset	1063	buffd[2*i + 1] = d1;
90ce3da70b43 Initial load duke parents: diff changeset	1064	}
90ce3da70b43 Initial load duke parents: diff changeset	1065
90ce3da70b43 Initial load duke parents: diff changeset	1066	} else {
90ce3da70b43 Initial load duke parents: diff changeset	1067	s01 = buff0[0];
90ce3da70b43 Initial load duke parents: diff changeset	1068	s11 = buff1[0];
90ce3da70b43 Initial load duke parents: diff changeset	1069	s21 = buff2[0];
90ce3da70b43 Initial load duke parents: diff changeset	1070	#pragma pipeloop(0)
90ce3da70b43 Initial load duke parents: diff changeset	1071	for (i = 0; i < (xsize + 7)/8; i++) {
90ce3da70b43 Initial load duke parents: diff changeset	1072	d0 = buffd[2*i];
90ce3da70b43 Initial load duke parents: diff changeset	1073	d1 = buffd[2*i + 1];
90ce3da70b43 Initial load duke parents: diff changeset	1074
90ce3da70b43 Initial load duke parents: diff changeset	1075	s00 = s01;
90ce3da70b43 Initial load duke parents: diff changeset	1076	s10 = s11;
90ce3da70b43 Initial load duke parents: diff changeset	1077	s20 = s21;
90ce3da70b43 Initial load duke parents: diff changeset	1078	s01 = buff0[i + 1];
90ce3da70b43 Initial load duke parents: diff changeset	1079	s11 = buff1[i + 1];
90ce3da70b43 Initial load duke parents: diff changeset	1080	s21 = buff2[i + 1];
90ce3da70b43 Initial load duke parents: diff changeset	1081	s0 = vis_faligndata(s00, s01);
90ce3da70b43 Initial load duke parents: diff changeset	1082	s1 = vis_faligndata(s10, s11);
90ce3da70b43 Initial load duke parents: diff changeset	1083	s2 = vis_faligndata(s20, s21);
90ce3da70b43 Initial load duke parents: diff changeset	1084
90ce3da70b43 Initial load duke parents: diff changeset	1085	d00 = vis_fmul8x16au(vis_read_hi(s0), k0);
90ce3da70b43 Initial load duke parents: diff changeset	1086	d01 = vis_fmul8x16au(vis_read_lo(s0), k0);
90ce3da70b43 Initial load duke parents: diff changeset	1087	d10 = vis_fmul8x16au(vis_read_hi(s1), k1);
90ce3da70b43 Initial load duke parents: diff changeset	1088	d11 = vis_fmul8x16au(vis_read_lo(s1), k1);
90ce3da70b43 Initial load duke parents: diff changeset	1089	d20 = vis_fmul8x16au(vis_read_hi(s2), k2);
90ce3da70b43 Initial load duke parents: diff changeset	1090	d21 = vis_fmul8x16au(vis_read_lo(s2), k2);
90ce3da70b43 Initial load duke parents: diff changeset	1091
90ce3da70b43 Initial load duke parents: diff changeset	1092	d00 = vis_fpadd16(d00, d10);
90ce3da70b43 Initial load duke parents: diff changeset	1093	d0 = vis_fpadd16(d20, d0);
90ce3da70b43 Initial load duke parents: diff changeset	1094	d0 = vis_fpadd16(d00, d0);
90ce3da70b43 Initial load duke parents: diff changeset	1095	d01 = vis_fpadd16(d01, d11);
90ce3da70b43 Initial load duke parents: diff changeset	1096	d1 = vis_fpadd16(d21, d1);
90ce3da70b43 Initial load duke parents: diff changeset	1097	d1 = vis_fpadd16(d01, d1);
90ce3da70b43 Initial load duke parents: diff changeset	1098	buffd[2*i] = d0;
90ce3da70b43 Initial load duke parents: diff changeset	1099	buffd[2*i + 1] = d1;
90ce3da70b43 Initial load duke parents: diff changeset	1100	}
90ce3da70b43 Initial load duke parents: diff changeset	1101	}
90ce3da70b43 Initial load duke parents: diff changeset	1102	}
90ce3da70b43 Initial load duke parents: diff changeset	1103
90ce3da70b43 Initial load duke parents: diff changeset	1104	pk += 3*m;
90ce3da70b43 Initial load duke parents: diff changeset	1105
90ce3da70b43 Initial load duke parents: diff changeset	1106	} else { /* jk_size == 4 */
90ce3da70b43 Initial load duke parents: diff changeset	1107
90ce3da70b43 Initial load duke parents: diff changeset	1108	for (ik = 0; ik < m; ik++, coff += NCHAN) {
90ce3da70b43 Initial load duke parents: diff changeset	1109	if (!jk && ik == ik_last) continue;
90ce3da70b43 Initial load duke parents: diff changeset	1110
90ce3da70b43 Initial load duke parents: diff changeset	1111	k0 = pk[ik];
90ce3da70b43 Initial load duke parents: diff changeset	1112	k1 = pk[ik + m];
90ce3da70b43 Initial load duke parents: diff changeset	1113	k2 = pk[ik + 2*m];
90ce3da70b43 Initial load duke parents: diff changeset	1114	k3 = pk[ik + 3*m];
90ce3da70b43 Initial load duke parents: diff changeset	1115
90ce3da70b43 Initial load duke parents: diff changeset	1116	doff = coff/8;
90ce3da70b43 Initial load duke parents: diff changeset	1117	buff0 = buff[jk ] + doff;
90ce3da70b43 Initial load duke parents: diff changeset	1118	buff1 = buff[jk + 1] + doff;
90ce3da70b43 Initial load duke parents: diff changeset	1119	buff2 = buff[jk + 2] + doff;
90ce3da70b43 Initial load duke parents: diff changeset	1120	buff3 = buff[jk + 3] + doff;
90ce3da70b43 Initial load duke parents: diff changeset	1121
90ce3da70b43 Initial load duke parents: diff changeset	1122	off = coff & 7;
90ce3da70b43 Initial load duke parents: diff changeset	1123	vis_write_gsr(gsr_scale + off);
90ce3da70b43 Initial load duke parents: diff changeset	1124
90ce3da70b43 Initial load duke parents: diff changeset	1125	if (off == 0) {
90ce3da70b43 Initial load duke parents: diff changeset	1126
90ce3da70b43 Initial load duke parents: diff changeset	1127	#pragma pipeloop(0)
90ce3da70b43 Initial load duke parents: diff changeset	1128	for (i = 0; i < (xsize + 7)/8; i++) {
90ce3da70b43 Initial load duke parents: diff changeset	1129	d0 = buffd[2*i];
90ce3da70b43 Initial load duke parents: diff changeset	1130	d1 = buffd[2*i + 1];
90ce3da70b43 Initial load duke parents: diff changeset	1131
90ce3da70b43 Initial load duke parents: diff changeset	1132	s0 = buff0[i];
90ce3da70b43 Initial load duke parents: diff changeset	1133	s1 = buff1[i];
90ce3da70b43 Initial load duke parents: diff changeset	1134	s2 = buff2[i];
90ce3da70b43 Initial load duke parents: diff changeset	1135	s3 = buff3[i];
90ce3da70b43 Initial load duke parents: diff changeset	1136
90ce3da70b43 Initial load duke parents: diff changeset	1137	d00 = vis_fmul8x16au(vis_read_hi(s0), k0);
90ce3da70b43 Initial load duke parents: diff changeset	1138	d01 = vis_fmul8x16au(vis_read_lo(s0), k0);
90ce3da70b43 Initial load duke parents: diff changeset	1139	d10 = vis_fmul8x16au(vis_read_hi(s1), k1);
90ce3da70b43 Initial load duke parents: diff changeset	1140	d11 = vis_fmul8x16au(vis_read_lo(s1), k1);
90ce3da70b43 Initial load duke parents: diff changeset	1141	d20 = vis_fmul8x16au(vis_read_hi(s2), k2);
90ce3da70b43 Initial load duke parents: diff changeset	1142	d21 = vis_fmul8x16au(vis_read_lo(s2), k2);
90ce3da70b43 Initial load duke parents: diff changeset	1143	d30 = vis_fmul8x16au(vis_read_hi(s3), k3);
90ce3da70b43 Initial load duke parents: diff changeset	1144	d31 = vis_fmul8x16au(vis_read_lo(s3), k3);
90ce3da70b43 Initial load duke parents: diff changeset	1145
90ce3da70b43 Initial load duke parents: diff changeset	1146	d00 = vis_fpadd16(d00, d10);
90ce3da70b43 Initial load duke parents: diff changeset	1147	d20 = vis_fpadd16(d20, d30);
90ce3da70b43 Initial load duke parents: diff changeset	1148	d0 = vis_fpadd16(d0, d00);
90ce3da70b43 Initial load duke parents: diff changeset	1149	d0 = vis_fpadd16(d0, d20);
90ce3da70b43 Initial load duke parents: diff changeset	1150	d01 = vis_fpadd16(d01, d11);
90ce3da70b43 Initial load duke parents: diff changeset	1151	d21 = vis_fpadd16(d21, d31);
90ce3da70b43 Initial load duke parents: diff changeset	1152	d1 = vis_fpadd16(d1, d01);
90ce3da70b43 Initial load duke parents: diff changeset	1153	d1 = vis_fpadd16(d1, d21);
90ce3da70b43 Initial load duke parents: diff changeset	1154	buffd[2*i] = d0;
90ce3da70b43 Initial load duke parents: diff changeset	1155	buffd[2*i + 1] = d1;
90ce3da70b43 Initial load duke parents: diff changeset	1156	}
90ce3da70b43 Initial load duke parents: diff changeset	1157
90ce3da70b43 Initial load duke parents: diff changeset	1158	} else if (off == 4) {
90ce3da70b43 Initial load duke parents: diff changeset	1159
90ce3da70b43 Initial load duke parents: diff changeset	1160	s01 = buff0[0];
90ce3da70b43 Initial load duke parents: diff changeset	1161	s11 = buff1[0];
90ce3da70b43 Initial load duke parents: diff changeset	1162	s21 = buff2[0];
90ce3da70b43 Initial load duke parents: diff changeset	1163	s31 = buff3[0];
90ce3da70b43 Initial load duke parents: diff changeset	1164	#pragma pipeloop(0)
90ce3da70b43 Initial load duke parents: diff changeset	1165	for (i = 0; i < (xsize + 7)/8; i++) {
90ce3da70b43 Initial load duke parents: diff changeset	1166	d0 = buffd[2*i];
90ce3da70b43 Initial load duke parents: diff changeset	1167	d1 = buffd[2*i + 1];
90ce3da70b43 Initial load duke parents: diff changeset	1168
90ce3da70b43 Initial load duke parents: diff changeset	1169	s00 = s01;
90ce3da70b43 Initial load duke parents: diff changeset	1170	s10 = s11;
90ce3da70b43 Initial load duke parents: diff changeset	1171	s20 = s21;
90ce3da70b43 Initial load duke parents: diff changeset	1172	s30 = s31;
90ce3da70b43 Initial load duke parents: diff changeset	1173	s01 = buff0[i + 1];
90ce3da70b43 Initial load duke parents: diff changeset	1174	s11 = buff1[i + 1];
90ce3da70b43 Initial load duke parents: diff changeset	1175	s21 = buff2[i + 1];
90ce3da70b43 Initial load duke parents: diff changeset	1176	s31 = buff3[i + 1];
90ce3da70b43 Initial load duke parents: diff changeset	1177
90ce3da70b43 Initial load duke parents: diff changeset	1178	d00 = vis_fmul8x16au(vis_read_lo(s00), k0);
90ce3da70b43 Initial load duke parents: diff changeset	1179	d01 = vis_fmul8x16au(vis_read_hi(s01), k0);
90ce3da70b43 Initial load duke parents: diff changeset	1180	d10 = vis_fmul8x16au(vis_read_lo(s10), k1);
90ce3da70b43 Initial load duke parents: diff changeset	1181	d11 = vis_fmul8x16au(vis_read_hi(s11), k1);
90ce3da70b43 Initial load duke parents: diff changeset	1182	d20 = vis_fmul8x16au(vis_read_lo(s20), k2);
90ce3da70b43 Initial load duke parents: diff changeset	1183	d21 = vis_fmul8x16au(vis_read_hi(s21), k2);
90ce3da70b43 Initial load duke parents: diff changeset	1184	d30 = vis_fmul8x16au(vis_read_lo(s30), k3);
90ce3da70b43 Initial load duke parents: diff changeset	1185	d31 = vis_fmul8x16au(vis_read_hi(s31), k3);
90ce3da70b43 Initial load duke parents: diff changeset	1186
90ce3da70b43 Initial load duke parents: diff changeset	1187	d00 = vis_fpadd16(d00, d10);
90ce3da70b43 Initial load duke parents: diff changeset	1188	d20 = vis_fpadd16(d20, d30);
90ce3da70b43 Initial load duke parents: diff changeset	1189	d0 = vis_fpadd16(d0, d00);
90ce3da70b43 Initial load duke parents: diff changeset	1190	d0 = vis_fpadd16(d0, d20);
90ce3da70b43 Initial load duke parents: diff changeset	1191	d01 = vis_fpadd16(d01, d11);
90ce3da70b43 Initial load duke parents: diff changeset	1192	d21 = vis_fpadd16(d21, d31);
90ce3da70b43 Initial load duke parents: diff changeset	1193	d1 = vis_fpadd16(d1, d01);
90ce3da70b43 Initial load duke parents: diff changeset	1194	d1 = vis_fpadd16(d1, d21);
90ce3da70b43 Initial load duke parents: diff changeset	1195	buffd[2*i] = d0;
90ce3da70b43 Initial load duke parents: diff changeset	1196	buffd[2*i + 1] = d1;
90ce3da70b43 Initial load duke parents: diff changeset	1197	}
90ce3da70b43 Initial load duke parents: diff changeset	1198
90ce3da70b43 Initial load duke parents: diff changeset	1199	} else {
90ce3da70b43 Initial load duke parents: diff changeset	1200
90ce3da70b43 Initial load duke parents: diff changeset	1201	s01 = buff0[0];
90ce3da70b43 Initial load duke parents: diff changeset	1202	s11 = buff1[0];
90ce3da70b43 Initial load duke parents: diff changeset	1203	s21 = buff2[0];
90ce3da70b43 Initial load duke parents: diff changeset	1204	s31 = buff3[0];
90ce3da70b43 Initial load duke parents: diff changeset	1205	#pragma pipeloop(0)
90ce3da70b43 Initial load duke parents: diff changeset	1206	for (i = 0; i < (xsize + 7)/8; i++) {
90ce3da70b43 Initial load duke parents: diff changeset	1207	d0 = buffd[2*i];
90ce3da70b43 Initial load duke parents: diff changeset	1208	d1 = buffd[2*i + 1];
90ce3da70b43 Initial load duke parents: diff changeset	1209
90ce3da70b43 Initial load duke parents: diff changeset	1210	s00 = s01;
90ce3da70b43 Initial load duke parents: diff changeset	1211	s10 = s11;
90ce3da70b43 Initial load duke parents: diff changeset	1212	s20 = s21;
90ce3da70b43 Initial load duke parents: diff changeset	1213	s30 = s31;
90ce3da70b43 Initial load duke parents: diff changeset	1214	s01 = buff0[i + 1];
90ce3da70b43 Initial load duke parents: diff changeset	1215	s11 = buff1[i + 1];
90ce3da70b43 Initial load duke parents: diff changeset	1216	s21 = buff2[i + 1];
90ce3da70b43 Initial load duke parents: diff changeset	1217	s31 = buff3[i + 1];
90ce3da70b43 Initial load duke parents: diff changeset	1218	s0 = vis_faligndata(s00, s01);
90ce3da70b43 Initial load duke parents: diff changeset	1219	s1 = vis_faligndata(s10, s11);
90ce3da70b43 Initial load duke parents: diff changeset	1220	s2 = vis_faligndata(s20, s21);
90ce3da70b43 Initial load duke parents: diff changeset	1221	s3 = vis_faligndata(s30, s31);
90ce3da70b43 Initial load duke parents: diff changeset	1222
90ce3da70b43 Initial load duke parents: diff changeset	1223	d00 = vis_fmul8x16au(vis_read_hi(s0), k0);
90ce3da70b43 Initial load duke parents: diff changeset	1224	d01 = vis_fmul8x16au(vis_read_lo(s0), k0);
90ce3da70b43 Initial load duke parents: diff changeset	1225	d10 = vis_fmul8x16au(vis_read_hi(s1), k1);
90ce3da70b43 Initial load duke parents: diff changeset	1226	d11 = vis_fmul8x16au(vis_read_lo(s1), k1);
90ce3da70b43 Initial load duke parents: diff changeset	1227	d20 = vis_fmul8x16au(vis_read_hi(s2), k2);
90ce3da70b43 Initial load duke parents: diff changeset	1228	d21 = vis_fmul8x16au(vis_read_lo(s2), k2);
90ce3da70b43 Initial load duke parents: diff changeset	1229	d30 = vis_fmul8x16au(vis_read_hi(s3), k3);
90ce3da70b43 Initial load duke parents: diff changeset	1230	d31 = vis_fmul8x16au(vis_read_lo(s3), k3);
90ce3da70b43 Initial load duke parents: diff changeset	1231
90ce3da70b43 Initial load duke parents: diff changeset	1232	d00 = vis_fpadd16(d00, d10);
90ce3da70b43 Initial load duke parents: diff changeset	1233	d20 = vis_fpadd16(d20, d30);
90ce3da70b43 Initial load duke parents: diff changeset	1234	d0 = vis_fpadd16(d0, d00);
90ce3da70b43 Initial load duke parents: diff changeset	1235	d0 = vis_fpadd16(d0, d20);
90ce3da70b43 Initial load duke parents: diff changeset	1236	d01 = vis_fpadd16(d01, d11);
90ce3da70b43 Initial load duke parents: diff changeset	1237	d21 = vis_fpadd16(d21, d31);
90ce3da70b43 Initial load duke parents: diff changeset	1238	d1 = vis_fpadd16(d1, d01);
90ce3da70b43 Initial load duke parents: diff changeset	1239	d1 = vis_fpadd16(d1, d21);
90ce3da70b43 Initial load duke parents: diff changeset	1240	buffd[2*i] = d0;
90ce3da70b43 Initial load duke parents: diff changeset	1241	buffd[2*i + 1] = d1;
90ce3da70b43 Initial load duke parents: diff changeset	1242	}
90ce3da70b43 Initial load duke parents: diff changeset	1243	}
90ce3da70b43 Initial load duke parents: diff changeset	1244	}
90ce3da70b43 Initial load duke parents: diff changeset	1245
90ce3da70b43 Initial load duke parents: diff changeset	1246	pk += 4*m;
90ce3da70b43 Initial load duke parents: diff changeset	1247	}
90ce3da70b43 Initial load duke parents: diff changeset	1248	}
90ce3da70b43 Initial load duke parents: diff changeset	1249
90ce3da70b43 Initial load duke parents: diff changeset	1250	/*****************************************
90ce3da70b43 Initial load duke parents: diff changeset	1251	*****************************************
90ce3da70b43 Initial load duke parents: diff changeset	1252	Final iteration
90ce3da70b43 Initial load duke parents: diff changeset	1253	*****************************************
90ce3da70b43 Initial load duke parents: diff changeset	1254	*****************************************/
90ce3da70b43 Initial load duke parents: diff changeset	1255
90ce3da70b43 Initial load duke parents: diff changeset	1256	jk_size = n;
90ce3da70b43 Initial load duke parents: diff changeset	1257	#ifdef CONV_INDEX
90ce3da70b43 Initial load duke parents: diff changeset	1258	if (jk_size >= 5) jk_size = 3;
90ce3da70b43 Initial load duke parents: diff changeset	1259	if (jk_size == 4) jk_size = 2;
90ce3da70b43 Initial load duke parents: diff changeset	1260	#else
90ce3da70b43 Initial load duke parents: diff changeset	1261	if (jk_size >= 6) jk_size = 4;
90ce3da70b43 Initial load duke parents: diff changeset	1262	if (jk_size == 5) jk_size = 3;
90ce3da70b43 Initial load duke parents: diff changeset	1263	#endif
90ce3da70b43 Initial load duke parents: diff changeset	1264
90ce3da70b43 Initial load duke parents: diff changeset	1265	k0 = karr[ik_last];
90ce3da70b43 Initial load duke parents: diff changeset	1266	k1 = karr[ik_last + m];
90ce3da70b43 Initial load duke parents: diff changeset	1267	k2 = karr[ik_last + 2*m];
90ce3da70b43 Initial load duke parents: diff changeset	1268	k3 = karr[ik_last + 3*m];
90ce3da70b43 Initial load duke parents: diff changeset	1269
90ce3da70b43 Initial load duke parents: diff changeset	1270	off = ik_last*NCHAN;
90ce3da70b43 Initial load duke parents: diff changeset	1271	doff = off/8;
90ce3da70b43 Initial load duke parents: diff changeset	1272	off &= 7;
90ce3da70b43 Initial load duke parents: diff changeset	1273	buff0 = buff[0] + doff;
90ce3da70b43 Initial load duke parents: diff changeset	1274	buff1 = buff[1] + doff;
90ce3da70b43 Initial load duke parents: diff changeset	1275	buff2 = buff[2] + doff;
90ce3da70b43 Initial load duke parents: diff changeset	1276	buff3 = buff[3] + doff;
90ce3da70b43 Initial load duke parents: diff changeset	1277	vis_write_gsr(gsr_scale + off);
90ce3da70b43 Initial load duke parents: diff changeset	1278
90ce3da70b43 Initial load duke parents: diff changeset	1279	#ifndef CONV_INDEX
90ce3da70b43 Initial load duke parents: diff changeset	1280	if (jk_size == 2) {
90ce3da70b43 Initial load duke parents: diff changeset	1281	dp = ((mlib_addr)dl & 7) ? buffe : (mlib_d64*)dl;
90ce3da70b43 Initial load duke parents: diff changeset	1282
90ce3da70b43 Initial load duke parents: diff changeset	1283	s01 = buff0[0];
90ce3da70b43 Initial load duke parents: diff changeset	1284	s11 = buff1[0];
90ce3da70b43 Initial load duke parents: diff changeset	1285	#pragma pipeloop(0)
90ce3da70b43 Initial load duke parents: diff changeset	1286	for (i = 0; i < xsize/8; i++) {
90ce3da70b43 Initial load duke parents: diff changeset	1287	s00 = s01;
90ce3da70b43 Initial load duke parents: diff changeset	1288	s10 = s11;
90ce3da70b43 Initial load duke parents: diff changeset	1289	s01 = buff0[i + 1];
90ce3da70b43 Initial load duke parents: diff changeset	1290	s11 = buff1[i + 1];
90ce3da70b43 Initial load duke parents: diff changeset	1291	s0 = vis_faligndata(s00, s01);
90ce3da70b43 Initial load duke parents: diff changeset	1292	s1 = vis_faligndata(s10, s11);
90ce3da70b43 Initial load duke parents: diff changeset	1293
90ce3da70b43 Initial load duke parents: diff changeset	1294	d00 = vis_fmul8x16au(vis_read_hi(s0), k0);
90ce3da70b43 Initial load duke parents: diff changeset	1295	d01 = vis_fmul8x16au(vis_read_lo(s0), k0);
90ce3da70b43 Initial load duke parents: diff changeset	1296	d10 = vis_fmul8x16au(vis_read_hi(s1), k1);
90ce3da70b43 Initial load duke parents: diff changeset	1297	d11 = vis_fmul8x16au(vis_read_lo(s1), k1);
90ce3da70b43 Initial load duke parents: diff changeset	1298
90ce3da70b43 Initial load duke parents: diff changeset	1299	d0 = buffd[2*i];
90ce3da70b43 Initial load duke parents: diff changeset	1300	d1 = buffd[2*i + 1];
90ce3da70b43 Initial load duke parents: diff changeset	1301	d0 = vis_fpadd16(d0, d00);
90ce3da70b43 Initial load duke parents: diff changeset	1302	d0 = vis_fpadd16(d0, d10);
90ce3da70b43 Initial load duke parents: diff changeset	1303	d1 = vis_fpadd16(d1, d01);
90ce3da70b43 Initial load duke parents: diff changeset	1304	d1 = vis_fpadd16(d1, d11);
90ce3da70b43 Initial load duke parents: diff changeset	1305
90ce3da70b43 Initial load duke parents: diff changeset	1306	dd = vis_fpack16_pair(d0, d1);
90ce3da70b43 Initial load duke parents: diff changeset	1307	dp[i] = dd;
90ce3da70b43 Initial load duke parents: diff changeset	1308
90ce3da70b43 Initial load duke parents: diff changeset	1309	buffd[2*i ] = drnd;
90ce3da70b43 Initial load duke parents: diff changeset	1310	buffd[2*i + 1] = drnd;
90ce3da70b43 Initial load duke parents: diff changeset	1311	}
90ce3da70b43 Initial load duke parents: diff changeset	1312
90ce3da70b43 Initial load duke parents: diff changeset	1313	if (emask) {
90ce3da70b43 Initial load duke parents: diff changeset	1314	s00 = s01;
90ce3da70b43 Initial load duke parents: diff changeset	1315	s10 = s11;
90ce3da70b43 Initial load duke parents: diff changeset	1316	s01 = buff0[i + 1];
90ce3da70b43 Initial load duke parents: diff changeset	1317	s11 = buff1[i + 1];
90ce3da70b43 Initial load duke parents: diff changeset	1318	s0 = vis_faligndata(s00, s01);
90ce3da70b43 Initial load duke parents: diff changeset	1319	s1 = vis_faligndata(s10, s11);
90ce3da70b43 Initial load duke parents: diff changeset	1320
90ce3da70b43 Initial load duke parents: diff changeset	1321	d00 = vis_fmul8x16au(vis_read_hi(s0), k0);
90ce3da70b43 Initial load duke parents: diff changeset	1322	d01 = vis_fmul8x16au(vis_read_lo(s0), k0);
90ce3da70b43 Initial load duke parents: diff changeset	1323	d10 = vis_fmul8x16au(vis_read_hi(s1), k1);
90ce3da70b43 Initial load duke parents: diff changeset	1324	d11 = vis_fmul8x16au(vis_read_lo(s1), k1);
90ce3da70b43 Initial load duke parents: diff changeset	1325
90ce3da70b43 Initial load duke parents: diff changeset	1326	d0 = buffd[2*i];
90ce3da70b43 Initial load duke parents: diff changeset	1327	d1 = buffd[2*i + 1];
90ce3da70b43 Initial load duke parents: diff changeset	1328	d0 = vis_fpadd16(d0, d00);
90ce3da70b43 Initial load duke parents: diff changeset	1329	d0 = vis_fpadd16(d0, d10);
90ce3da70b43 Initial load duke parents: diff changeset	1330	d1 = vis_fpadd16(d1, d01);
90ce3da70b43 Initial load duke parents: diff changeset	1331	d1 = vis_fpadd16(d1, d11);
90ce3da70b43 Initial load duke parents: diff changeset	1332
90ce3da70b43 Initial load duke parents: diff changeset	1333	dd = vis_fpack16_pair(d0, d1);
90ce3da70b43 Initial load duke parents: diff changeset	1334	vis_pst_8(dd, dp + i, emask);
90ce3da70b43 Initial load duke parents: diff changeset	1335
90ce3da70b43 Initial load duke parents: diff changeset	1336	buffd[2*i ] = drnd;
90ce3da70b43 Initial load duke parents: diff changeset	1337	buffd[2*i + 1] = drnd;
90ce3da70b43 Initial load duke parents: diff changeset	1338	}
90ce3da70b43 Initial load duke parents: diff changeset	1339
90ce3da70b43 Initial load duke parents: diff changeset	1340	if ((mlib_u8)dp != dl) mlib_ImageCopy_na((void)buffe, dl, xsize);
90ce3da70b43 Initial load duke parents: diff changeset	1341
90ce3da70b43 Initial load duke parents: diff changeset	1342	} else if (jk_size == 3) {
90ce3da70b43 Initial load duke parents: diff changeset	1343
90ce3da70b43 Initial load duke parents: diff changeset	1344	dp = ((mlib_addr)dl & 7) ? buffe : (mlib_d64*)dl;
90ce3da70b43 Initial load duke parents: diff changeset	1345
90ce3da70b43 Initial load duke parents: diff changeset	1346	s01 = buff0[0];
90ce3da70b43 Initial load duke parents: diff changeset	1347	s11 = buff1[0];
90ce3da70b43 Initial load duke parents: diff changeset	1348	s21 = buff2[0];
90ce3da70b43 Initial load duke parents: diff changeset	1349	#pragma pipeloop(0)
90ce3da70b43 Initial load duke parents: diff changeset	1350	for (i = 0; i < xsize/8; i++) {
90ce3da70b43 Initial load duke parents: diff changeset	1351	s00 = s01;
90ce3da70b43 Initial load duke parents: diff changeset	1352	s10 = s11;
90ce3da70b43 Initial load duke parents: diff changeset	1353	s20 = s21;
90ce3da70b43 Initial load duke parents: diff changeset	1354	s01 = buff0[i + 1];
90ce3da70b43 Initial load duke parents: diff changeset	1355	s11 = buff1[i + 1];
90ce3da70b43 Initial load duke parents: diff changeset	1356	s21 = buff2[i + 1];
90ce3da70b43 Initial load duke parents: diff changeset	1357	s0 = vis_faligndata(s00, s01);
90ce3da70b43 Initial load duke parents: diff changeset	1358	s1 = vis_faligndata(s10, s11);
90ce3da70b43 Initial load duke parents: diff changeset	1359	s2 = vis_faligndata(s20, s21);
90ce3da70b43 Initial load duke parents: diff changeset	1360
90ce3da70b43 Initial load duke parents: diff changeset	1361	d00 = vis_fmul8x16au(vis_read_hi(s0), k0);
90ce3da70b43 Initial load duke parents: diff changeset	1362	d01 = vis_fmul8x16au(vis_read_lo(s0), k0);
90ce3da70b43 Initial load duke parents: diff changeset	1363	d10 = vis_fmul8x16au(vis_read_hi(s1), k1);
90ce3da70b43 Initial load duke parents: diff changeset	1364	d11 = vis_fmul8x16au(vis_read_lo(s1), k1);
90ce3da70b43 Initial load duke parents: diff changeset	1365	d20 = vis_fmul8x16au(vis_read_hi(s2), k2);
90ce3da70b43 Initial load duke parents: diff changeset	1366	d21 = vis_fmul8x16au(vis_read_lo(s2), k2);
90ce3da70b43 Initial load duke parents: diff changeset	1367
90ce3da70b43 Initial load duke parents: diff changeset	1368	d0 = buffd[2*i];
90ce3da70b43 Initial load duke parents: diff changeset	1369	d1 = buffd[2*i + 1];
90ce3da70b43 Initial load duke parents: diff changeset	1370	d0 = vis_fpadd16(d0, d00);
90ce3da70b43 Initial load duke parents: diff changeset	1371	d0 = vis_fpadd16(d0, d10);
90ce3da70b43 Initial load duke parents: diff changeset	1372	d0 = vis_fpadd16(d0, d20);
90ce3da70b43 Initial load duke parents: diff changeset	1373	d1 = vis_fpadd16(d1, d01);
90ce3da70b43 Initial load duke parents: diff changeset	1374	d1 = vis_fpadd16(d1, d11);
90ce3da70b43 Initial load duke parents: diff changeset	1375	d1 = vis_fpadd16(d1, d21);
90ce3da70b43 Initial load duke parents: diff changeset	1376
90ce3da70b43 Initial load duke parents: diff changeset	1377	dd = vis_fpack16_pair(d0, d1);
90ce3da70b43 Initial load duke parents: diff changeset	1378	dp[i] = dd;
90ce3da70b43 Initial load duke parents: diff changeset	1379
90ce3da70b43 Initial load duke parents: diff changeset	1380	buffd[2*i ] = drnd;
90ce3da70b43 Initial load duke parents: diff changeset	1381	buffd[2*i + 1] = drnd;
90ce3da70b43 Initial load duke parents: diff changeset	1382	}
90ce3da70b43 Initial load duke parents: diff changeset	1383
90ce3da70b43 Initial load duke parents: diff changeset	1384	if (emask) {
90ce3da70b43 Initial load duke parents: diff changeset	1385	s00 = s01;
90ce3da70b43 Initial load duke parents: diff changeset	1386	s10 = s11;
90ce3da70b43 Initial load duke parents: diff changeset	1387	s20 = s21;
90ce3da70b43 Initial load duke parents: diff changeset	1388	s01 = buff0[i + 1];
90ce3da70b43 Initial load duke parents: diff changeset	1389	s11 = buff1[i + 1];
90ce3da70b43 Initial load duke parents: diff changeset	1390	s21 = buff2[i + 1];
90ce3da70b43 Initial load duke parents: diff changeset	1391	s0 = vis_faligndata(s00, s01);
90ce3da70b43 Initial load duke parents: diff changeset	1392	s1 = vis_faligndata(s10, s11);
90ce3da70b43 Initial load duke parents: diff changeset	1393	s2 = vis_faligndata(s20, s21);
90ce3da70b43 Initial load duke parents: diff changeset	1394
90ce3da70b43 Initial load duke parents: diff changeset	1395	d00 = vis_fmul8x16au(vis_read_hi(s0), k0);
90ce3da70b43 Initial load duke parents: diff changeset	1396	d01 = vis_fmul8x16au(vis_read_lo(s0), k0);
90ce3da70b43 Initial load duke parents: diff changeset	1397	d10 = vis_fmul8x16au(vis_read_hi(s1), k1);
90ce3da70b43 Initial load duke parents: diff changeset	1398	d11 = vis_fmul8x16au(vis_read_lo(s1), k1);
90ce3da70b43 Initial load duke parents: diff changeset	1399	d20 = vis_fmul8x16au(vis_read_hi(s2), k2);
90ce3da70b43 Initial load duke parents: diff changeset	1400	d21 = vis_fmul8x16au(vis_read_lo(s2), k2);
90ce3da70b43 Initial load duke parents: diff changeset	1401
90ce3da70b43 Initial load duke parents: diff changeset	1402	d0 = buffd[2*i];
90ce3da70b43 Initial load duke parents: diff changeset	1403	d1 = buffd[2*i + 1];
90ce3da70b43 Initial load duke parents: diff changeset	1404	d0 = vis_fpadd16(d0, d00);
90ce3da70b43 Initial load duke parents: diff changeset	1405	d0 = vis_fpadd16(d0, d10);
90ce3da70b43 Initial load duke parents: diff changeset	1406	d0 = vis_fpadd16(d0, d20);
90ce3da70b43 Initial load duke parents: diff changeset	1407	d1 = vis_fpadd16(d1, d01);
90ce3da70b43 Initial load duke parents: diff changeset	1408	d1 = vis_fpadd16(d1, d11);
90ce3da70b43 Initial load duke parents: diff changeset	1409	d1 = vis_fpadd16(d1, d21);
90ce3da70b43 Initial load duke parents: diff changeset	1410
90ce3da70b43 Initial load duke parents: diff changeset	1411	dd = vis_fpack16_pair(d0, d1);
90ce3da70b43 Initial load duke parents: diff changeset	1412	vis_pst_8(dd, dp + i, emask);
90ce3da70b43 Initial load duke parents: diff changeset	1413
90ce3da70b43 Initial load duke parents: diff changeset	1414	buffd[2*i ] = drnd;
90ce3da70b43 Initial load duke parents: diff changeset	1415	buffd[2*i + 1] = drnd;
90ce3da70b43 Initial load duke parents: diff changeset	1416	}
90ce3da70b43 Initial load duke parents: diff changeset	1417
90ce3da70b43 Initial load duke parents: diff changeset	1418	if ((mlib_u8)dp != dl) mlib_ImageCopy_na((void)buffe, dl, xsize);
90ce3da70b43 Initial load duke parents: diff changeset	1419
90ce3da70b43 Initial load duke parents: diff changeset	1420	} else /* if (jk_size == 4) */ {
90ce3da70b43 Initial load duke parents: diff changeset	1421
90ce3da70b43 Initial load duke parents: diff changeset	1422	dp = ((mlib_addr)dl & 7) ? buffe : (mlib_d64*)dl;
90ce3da70b43 Initial load duke parents: diff changeset	1423
90ce3da70b43 Initial load duke parents: diff changeset	1424	s01 = buff0[0];
90ce3da70b43 Initial load duke parents: diff changeset	1425	s11 = buff1[0];
90ce3da70b43 Initial load duke parents: diff changeset	1426	s21 = buff2[0];
90ce3da70b43 Initial load duke parents: diff changeset	1427	s31 = buff3[0];
90ce3da70b43 Initial load duke parents: diff changeset	1428	#pragma pipeloop(0)
90ce3da70b43 Initial load duke parents: diff changeset	1429	for (i = 0; i < xsize/8; i++) {
90ce3da70b43 Initial load duke parents: diff changeset	1430	s00 = s01;
90ce3da70b43 Initial load duke parents: diff changeset	1431	s10 = s11;
90ce3da70b43 Initial load duke parents: diff changeset	1432	s20 = s21;
90ce3da70b43 Initial load duke parents: diff changeset	1433	s30 = s31;
90ce3da70b43 Initial load duke parents: diff changeset	1434	s01 = buff0[i + 1];
90ce3da70b43 Initial load duke parents: diff changeset	1435	s11 = buff1[i + 1];
90ce3da70b43 Initial load duke parents: diff changeset	1436	s21 = buff2[i + 1];
90ce3da70b43 Initial load duke parents: diff changeset	1437	s31 = buff3[i + 1];
90ce3da70b43 Initial load duke parents: diff changeset	1438	s0 = vis_faligndata(s00, s01);
90ce3da70b43 Initial load duke parents: diff changeset	1439	s1 = vis_faligndata(s10, s11);
90ce3da70b43 Initial load duke parents: diff changeset	1440	s2 = vis_faligndata(s20, s21);
90ce3da70b43 Initial load duke parents: diff changeset	1441	s3 = vis_faligndata(s30, s31);
90ce3da70b43 Initial load duke parents: diff changeset	1442
90ce3da70b43 Initial load duke parents: diff changeset	1443	d00 = vis_fmul8x16au(vis_read_hi(s0), k0);
90ce3da70b43 Initial load duke parents: diff changeset	1444	d01 = vis_fmul8x16au(vis_read_lo(s0), k0);
90ce3da70b43 Initial load duke parents: diff changeset	1445	d10 = vis_fmul8x16au(vis_read_hi(s1), k1);
90ce3da70b43 Initial load duke parents: diff changeset	1446	d11 = vis_fmul8x16au(vis_read_lo(s1), k1);
90ce3da70b43 Initial load duke parents: diff changeset	1447	d20 = vis_fmul8x16au(vis_read_hi(s2), k2);
90ce3da70b43 Initial load duke parents: diff changeset	1448	d21 = vis_fmul8x16au(vis_read_lo(s2), k2);
90ce3da70b43 Initial load duke parents: diff changeset	1449	d30 = vis_fmul8x16au(vis_read_hi(s3), k3);
90ce3da70b43 Initial load duke parents: diff changeset	1450	d31 = vis_fmul8x16au(vis_read_lo(s3), k3);
90ce3da70b43 Initial load duke parents: diff changeset	1451
90ce3da70b43 Initial load duke parents: diff changeset	1452	d0 = buffd[2*i];
90ce3da70b43 Initial load duke parents: diff changeset	1453	d1 = buffd[2*i + 1];
90ce3da70b43 Initial load duke parents: diff changeset	1454	d0 = vis_fpadd16(d0, d00);
90ce3da70b43 Initial load duke parents: diff changeset	1455	d0 = vis_fpadd16(d0, d10);
90ce3da70b43 Initial load duke parents: diff changeset	1456	d0 = vis_fpadd16(d0, d20);
90ce3da70b43 Initial load duke parents: diff changeset	1457	d0 = vis_fpadd16(d0, d30);
90ce3da70b43 Initial load duke parents: diff changeset	1458	d1 = vis_fpadd16(d1, d01);
90ce3da70b43 Initial load duke parents: diff changeset	1459	d1 = vis_fpadd16(d1, d11);
90ce3da70b43 Initial load duke parents: diff changeset	1460	d1 = vis_fpadd16(d1, d21);
90ce3da70b43 Initial load duke parents: diff changeset	1461	d1 = vis_fpadd16(d1, d31);
90ce3da70b43 Initial load duke parents: diff changeset	1462
90ce3da70b43 Initial load duke parents: diff changeset	1463	dd = vis_fpack16_pair(d0, d1);
90ce3da70b43 Initial load duke parents: diff changeset	1464	dp[i] = dd;
90ce3da70b43 Initial load duke parents: diff changeset	1465
90ce3da70b43 Initial load duke parents: diff changeset	1466	buffd[2*i ] = drnd;
90ce3da70b43 Initial load duke parents: diff changeset	1467	buffd[2*i + 1] = drnd;
90ce3da70b43 Initial load duke parents: diff changeset	1468	}
90ce3da70b43 Initial load duke parents: diff changeset	1469
90ce3da70b43 Initial load duke parents: diff changeset	1470	if (emask) {
90ce3da70b43 Initial load duke parents: diff changeset	1471	s00 = s01;
90ce3da70b43 Initial load duke parents: diff changeset	1472	s10 = s11;
90ce3da70b43 Initial load duke parents: diff changeset	1473	s20 = s21;
90ce3da70b43 Initial load duke parents: diff changeset	1474	s30 = s31;
90ce3da70b43 Initial load duke parents: diff changeset	1475	s01 = buff0[i + 1];
90ce3da70b43 Initial load duke parents: diff changeset	1476	s11 = buff1[i + 1];
90ce3da70b43 Initial load duke parents: diff changeset	1477	s21 = buff2[i + 1];
90ce3da70b43 Initial load duke parents: diff changeset	1478	s31 = buff3[i + 1];
90ce3da70b43 Initial load duke parents: diff changeset	1479	s0 = vis_faligndata(s00, s01);
90ce3da70b43 Initial load duke parents: diff changeset	1480	s1 = vis_faligndata(s10, s11);
90ce3da70b43 Initial load duke parents: diff changeset	1481	s2 = vis_faligndata(s20, s21);
90ce3da70b43 Initial load duke parents: diff changeset	1482	s3 = vis_faligndata(s30, s31);
90ce3da70b43 Initial load duke parents: diff changeset	1483
90ce3da70b43 Initial load duke parents: diff changeset	1484	d00 = vis_fmul8x16au(vis_read_hi(s0), k0);
90ce3da70b43 Initial load duke parents: diff changeset	1485	d01 = vis_fmul8x16au(vis_read_lo(s0), k0);
90ce3da70b43 Initial load duke parents: diff changeset	1486	d10 = vis_fmul8x16au(vis_read_hi(s1), k1);
90ce3da70b43 Initial load duke parents: diff changeset	1487	d11 = vis_fmul8x16au(vis_read_lo(s1), k1);
90ce3da70b43 Initial load duke parents: diff changeset	1488	d20 = vis_fmul8x16au(vis_read_hi(s2), k2);
90ce3da70b43 Initial load duke parents: diff changeset	1489	d21 = vis_fmul8x16au(vis_read_lo(s2), k2);
90ce3da70b43 Initial load duke parents: diff changeset	1490	d30 = vis_fmul8x16au(vis_read_hi(s3), k3);
90ce3da70b43 Initial load duke parents: diff changeset	1491	d31 = vis_fmul8x16au(vis_read_lo(s3), k3);
90ce3da70b43 Initial load duke parents: diff changeset	1492
90ce3da70b43 Initial load duke parents: diff changeset	1493	d0 = buffd[2*i];
90ce3da70b43 Initial load duke parents: diff changeset	1494	d1 = buffd[2*i + 1];
90ce3da70b43 Initial load duke parents: diff changeset	1495	d0 = vis_fpadd16(d0, d00);
90ce3da70b43 Initial load duke parents: diff changeset	1496	d0 = vis_fpadd16(d0, d10);
90ce3da70b43 Initial load duke parents: diff changeset	1497	d0 = vis_fpadd16(d0, d20);
90ce3da70b43 Initial load duke parents: diff changeset	1498	d0 = vis_fpadd16(d0, d30);
90ce3da70b43 Initial load duke parents: diff changeset	1499	d1 = vis_fpadd16(d1, d01);
90ce3da70b43 Initial load duke parents: diff changeset	1500	d1 = vis_fpadd16(d1, d11);
90ce3da70b43 Initial load duke parents: diff changeset	1501	d1 = vis_fpadd16(d1, d21);
90ce3da70b43 Initial load duke parents: diff changeset	1502	d1 = vis_fpadd16(d1, d31);
90ce3da70b43 Initial load duke parents: diff changeset	1503
90ce3da70b43 Initial load duke parents: diff changeset	1504	dd = vis_fpack16_pair(d0, d1);
90ce3da70b43 Initial load duke parents: diff changeset	1505	vis_pst_8(dd, dp + i, emask);
90ce3da70b43 Initial load duke parents: diff changeset	1506
90ce3da70b43 Initial load duke parents: diff changeset	1507	buffd[2*i ] = drnd;
90ce3da70b43 Initial load duke parents: diff changeset	1508	buffd[2*i + 1] = drnd;
90ce3da70b43 Initial load duke parents: diff changeset	1509	}
90ce3da70b43 Initial load duke parents: diff changeset	1510
90ce3da70b43 Initial load duke parents: diff changeset	1511	if ((mlib_u8)dp != dl) mlib_ImageCopy_na((void)buffe, dl, xsize);
90ce3da70b43 Initial load duke parents: diff changeset	1512	}
90ce3da70b43 Initial load duke parents: diff changeset	1513
90ce3da70b43 Initial load duke parents: diff changeset	1514	#else /* CONV_INDEX */
90ce3da70b43 Initial load duke parents: diff changeset	1515
90ce3da70b43 Initial load duke parents: diff changeset	1516	if (jk_size == 2) {
90ce3da70b43 Initial load duke parents: diff changeset	1517	vis_write_gsr(gsr_scale + 7);
90ce3da70b43 Initial load duke parents: diff changeset	1518
90ce3da70b43 Initial load duke parents: diff changeset	1519	#pragma pipeloop(0)
90ce3da70b43 Initial load duke parents: diff changeset	1520	for (i = 0; i < dsize; i += 3) {
90ce3da70b43 Initial load duke parents: diff changeset	1521	mlib_d64 d00, d01, d02, d03, d04, d05;
90ce3da70b43 Initial load duke parents: diff changeset	1522	mlib_d64 d10, d11, d12, d13, d14, d15;
90ce3da70b43 Initial load duke parents: diff changeset	1523	mlib_d64 d0, d1, d2, d3, d4, d5;
90ce3da70b43 Initial load duke parents: diff changeset	1524	mlib_d64 s00 = buff0[i];
90ce3da70b43 Initial load duke parents: diff changeset	1525	mlib_d64 s01 = buff0[i + 1];
90ce3da70b43 Initial load duke parents: diff changeset	1526	mlib_d64 s02 = buff0[i + 2];
90ce3da70b43 Initial load duke parents: diff changeset	1527	mlib_d64 s10 = buff1[i];
90ce3da70b43 Initial load duke parents: diff changeset	1528	mlib_d64 s11 = buff1[i + 1];
90ce3da70b43 Initial load duke parents: diff changeset	1529	mlib_d64 s12 = buff1[i + 2];
90ce3da70b43 Initial load duke parents: diff changeset	1530
90ce3da70b43 Initial load duke parents: diff changeset	1531	d00 = vis_fmul8x16au(vis_read_hi(s00), k0);
90ce3da70b43 Initial load duke parents: diff changeset	1532	d01 = vis_fmul8x16au(vis_read_lo(s00), k0);
90ce3da70b43 Initial load duke parents: diff changeset	1533	d02 = vis_fmul8x16au(vis_read_hi(s01), k0);
90ce3da70b43 Initial load duke parents: diff changeset	1534	d03 = vis_fmul8x16au(vis_read_lo(s01), k0);
90ce3da70b43 Initial load duke parents: diff changeset	1535	d04 = vis_fmul8x16au(vis_read_hi(s02), k0);
90ce3da70b43 Initial load duke parents: diff changeset	1536	d05 = vis_fmul8x16au(vis_read_lo(s02), k0);
90ce3da70b43 Initial load duke parents: diff changeset	1537	d10 = vis_fmul8x16au(vis_read_hi(s10), k1);
90ce3da70b43 Initial load duke parents: diff changeset	1538	d11 = vis_fmul8x16au(vis_read_lo(s10), k1);
90ce3da70b43 Initial load duke parents: diff changeset	1539	d12 = vis_fmul8x16au(vis_read_hi(s11), k1);
90ce3da70b43 Initial load duke parents: diff changeset	1540	d13 = vis_fmul8x16au(vis_read_lo(s11), k1);
90ce3da70b43 Initial load duke parents: diff changeset	1541	d14 = vis_fmul8x16au(vis_read_hi(s12), k1);
90ce3da70b43 Initial load duke parents: diff changeset	1542	d15 = vis_fmul8x16au(vis_read_lo(s12), k1);
90ce3da70b43 Initial load duke parents: diff changeset	1543
90ce3da70b43 Initial load duke parents: diff changeset	1544	d0 = buffd[2*i];
90ce3da70b43 Initial load duke parents: diff changeset	1545	d1 = buffd[2*i + 1];
90ce3da70b43 Initial load duke parents: diff changeset	1546	d2 = buffd[2*i + 2];
90ce3da70b43 Initial load duke parents: diff changeset	1547	d3 = buffd[2*i + 3];
90ce3da70b43 Initial load duke parents: diff changeset	1548	d4 = buffd[2*i + 4];
90ce3da70b43 Initial load duke parents: diff changeset	1549	d5 = buffd[2*i + 5];
90ce3da70b43 Initial load duke parents: diff changeset	1550	d0 = vis_fpadd16(d0, d00);
90ce3da70b43 Initial load duke parents: diff changeset	1551	d0 = vis_fpadd16(d0, d10);
90ce3da70b43 Initial load duke parents: diff changeset	1552	d1 = vis_fpadd16(d1, d01);
90ce3da70b43 Initial load duke parents: diff changeset	1553	d1 = vis_fpadd16(d1, d11);
90ce3da70b43 Initial load duke parents: diff changeset	1554	d2 = vis_fpadd16(d2, d02);
90ce3da70b43 Initial load duke parents: diff changeset	1555	d2 = vis_fpadd16(d2, d12);
90ce3da70b43 Initial load duke parents: diff changeset	1556	d3 = vis_fpadd16(d3, d03);
90ce3da70b43 Initial load duke parents: diff changeset	1557	d3 = vis_fpadd16(d3, d13);
90ce3da70b43 Initial load duke parents: diff changeset	1558	d4 = vis_fpadd16(d4, d04);
90ce3da70b43 Initial load duke parents: diff changeset	1559	d4 = vis_fpadd16(d4, d14);
90ce3da70b43 Initial load duke parents: diff changeset	1560	d5 = vis_fpadd16(d5, d05);
90ce3da70b43 Initial load duke parents: diff changeset	1561	d5 = vis_fpadd16(d5, d15);
90ce3da70b43 Initial load duke parents: diff changeset	1562
90ce3da70b43 Initial load duke parents: diff changeset	1563	buffe[i ] = vis_fpack16_pair(d0, d1);
90ce3da70b43 Initial load duke parents: diff changeset	1564	buffe[i + 1] = vis_fpack16_pair(d2, d3);
90ce3da70b43 Initial load duke parents: diff changeset	1565	buffe[i + 2] = vis_fpack16_pair(d4, d5);
90ce3da70b43 Initial load duke parents: diff changeset	1566
90ce3da70b43 Initial load duke parents: diff changeset	1567	buffd[2*i ] = drnd;
90ce3da70b43 Initial load duke parents: diff changeset	1568	buffd[2*i + 1] = drnd;
90ce3da70b43 Initial load duke parents: diff changeset	1569	buffd[2*i + 2] = drnd;
90ce3da70b43 Initial load duke parents: diff changeset	1570	buffd[2*i + 3] = drnd;
90ce3da70b43 Initial load duke parents: diff changeset	1571	buffd[2*i + 4] = drnd;
90ce3da70b43 Initial load duke parents: diff changeset	1572	buffd[2*i + 5] = drnd;
90ce3da70b43 Initial load duke parents: diff changeset	1573
90ce3da70b43 Initial load duke parents: diff changeset	1574	LOAD_SRC();
90ce3da70b43 Initial load duke parents: diff changeset	1575	}
90ce3da70b43 Initial load duke parents: diff changeset	1576
90ce3da70b43 Initial load duke parents: diff changeset	1577	} else /* if (jk_size == 3) */ {
90ce3da70b43 Initial load duke parents: diff changeset	1578	vis_write_gsr(gsr_scale + 7);
90ce3da70b43 Initial load duke parents: diff changeset	1579
90ce3da70b43 Initial load duke parents: diff changeset	1580	#pragma pipeloop(0)
90ce3da70b43 Initial load duke parents: diff changeset	1581	for (i = 0; i < dsize; i += 3) {
90ce3da70b43 Initial load duke parents: diff changeset	1582	mlib_d64 d00, d01, d02, d03, d04, d05;
90ce3da70b43 Initial load duke parents: diff changeset	1583	mlib_d64 d10, d11, d12, d13, d14, d15;
90ce3da70b43 Initial load duke parents: diff changeset	1584	mlib_d64 d20, d21, d22, d23, d24, d25;
90ce3da70b43 Initial load duke parents: diff changeset	1585	mlib_d64 d0, d1, d2, d3, d4, d5;
90ce3da70b43 Initial load duke parents: diff changeset	1586	mlib_d64 s00 = buff0[i];
90ce3da70b43 Initial load duke parents: diff changeset	1587	mlib_d64 s01 = buff0[i + 1];
90ce3da70b43 Initial load duke parents: diff changeset	1588	mlib_d64 s02 = buff0[i + 2];
90ce3da70b43 Initial load duke parents: diff changeset	1589	mlib_d64 s10 = buff1[i];
90ce3da70b43 Initial load duke parents: diff changeset	1590	mlib_d64 s11 = buff1[i + 1];
90ce3da70b43 Initial load duke parents: diff changeset	1591	mlib_d64 s12 = buff1[i + 2];
90ce3da70b43 Initial load duke parents: diff changeset	1592	mlib_d64 s20 = buff2[i];
90ce3da70b43 Initial load duke parents: diff changeset	1593	mlib_d64 s21 = buff2[i + 1];
90ce3da70b43 Initial load duke parents: diff changeset	1594	mlib_d64 s22 = buff2[i + 2];
90ce3da70b43 Initial load duke parents: diff changeset	1595
90ce3da70b43 Initial load duke parents: diff changeset	1596	d00 = vis_fmul8x16au(vis_read_hi(s00), k0);
90ce3da70b43 Initial load duke parents: diff changeset	1597	d01 = vis_fmul8x16au(vis_read_lo(s00), k0);
90ce3da70b43 Initial load duke parents: diff changeset	1598	d02 = vis_fmul8x16au(vis_read_hi(s01), k0);
90ce3da70b43 Initial load duke parents: diff changeset	1599	d03 = vis_fmul8x16au(vis_read_lo(s01), k0);
90ce3da70b43 Initial load duke parents: diff changeset	1600	d04 = vis_fmul8x16au(vis_read_hi(s02), k0);
90ce3da70b43 Initial load duke parents: diff changeset	1601	d05 = vis_fmul8x16au(vis_read_lo(s02), k0);
90ce3da70b43 Initial load duke parents: diff changeset	1602	d10 = vis_fmul8x16au(vis_read_hi(s10), k1);
90ce3da70b43 Initial load duke parents: diff changeset	1603	d11 = vis_fmul8x16au(vis_read_lo(s10), k1);
90ce3da70b43 Initial load duke parents: diff changeset	1604	d12 = vis_fmul8x16au(vis_read_hi(s11), k1);
90ce3da70b43 Initial load duke parents: diff changeset	1605	d13 = vis_fmul8x16au(vis_read_lo(s11), k1);
90ce3da70b43 Initial load duke parents: diff changeset	1606	d14 = vis_fmul8x16au(vis_read_hi(s12), k1);
90ce3da70b43 Initial load duke parents: diff changeset	1607	d15 = vis_fmul8x16au(vis_read_lo(s12), k1);
90ce3da70b43 Initial load duke parents: diff changeset	1608	d20 = vis_fmul8x16au(vis_read_hi(s20), k2);
90ce3da70b43 Initial load duke parents: diff changeset	1609	d21 = vis_fmul8x16au(vis_read_lo(s20), k2);
90ce3da70b43 Initial load duke parents: diff changeset	1610	d22 = vis_fmul8x16au(vis_read_hi(s21), k2);
90ce3da70b43 Initial load duke parents: diff changeset	1611	d23 = vis_fmul8x16au(vis_read_lo(s21), k2);
90ce3da70b43 Initial load duke parents: diff changeset	1612	d24 = vis_fmul8x16au(vis_read_hi(s22), k2);
90ce3da70b43 Initial load duke parents: diff changeset	1613	d25 = vis_fmul8x16au(vis_read_lo(s22), k2);
90ce3da70b43 Initial load duke parents: diff changeset	1614
90ce3da70b43 Initial load duke parents: diff changeset	1615	d0 = buffd[2*i];
90ce3da70b43 Initial load duke parents: diff changeset	1616	d1 = buffd[2*i + 1];
90ce3da70b43 Initial load duke parents: diff changeset	1617	d2 = buffd[2*i + 2];
90ce3da70b43 Initial load duke parents: diff changeset	1618	d3 = buffd[2*i + 3];
90ce3da70b43 Initial load duke parents: diff changeset	1619	d4 = buffd[2*i + 4];
90ce3da70b43 Initial load duke parents: diff changeset	1620	d5 = buffd[2*i + 5];
90ce3da70b43 Initial load duke parents: diff changeset	1621	d0 = vis_fpadd16(d0, d00);
90ce3da70b43 Initial load duke parents: diff changeset	1622	d0 = vis_fpadd16(d0, d10);
90ce3da70b43 Initial load duke parents: diff changeset	1623	d0 = vis_fpadd16(d0, d20);
90ce3da70b43 Initial load duke parents: diff changeset	1624	d1 = vis_fpadd16(d1, d01);
90ce3da70b43 Initial load duke parents: diff changeset	1625	d1 = vis_fpadd16(d1, d11);
90ce3da70b43 Initial load duke parents: diff changeset	1626	d1 = vis_fpadd16(d1, d21);
90ce3da70b43 Initial load duke parents: diff changeset	1627	d2 = vis_fpadd16(d2, d02);
90ce3da70b43 Initial load duke parents: diff changeset	1628	d2 = vis_fpadd16(d2, d12);
90ce3da70b43 Initial load duke parents: diff changeset	1629	d2 = vis_fpadd16(d2, d22);
90ce3da70b43 Initial load duke parents: diff changeset	1630	d3 = vis_fpadd16(d3, d03);
90ce3da70b43 Initial load duke parents: diff changeset	1631	d3 = vis_fpadd16(d3, d13);
90ce3da70b43 Initial load duke parents: diff changeset	1632	d3 = vis_fpadd16(d3, d23);
90ce3da70b43 Initial load duke parents: diff changeset	1633	d4 = vis_fpadd16(d4, d04);
90ce3da70b43 Initial load duke parents: diff changeset	1634	d4 = vis_fpadd16(d4, d14);
90ce3da70b43 Initial load duke parents: diff changeset	1635	d4 = vis_fpadd16(d4, d24);
90ce3da70b43 Initial load duke parents: diff changeset	1636	d5 = vis_fpadd16(d5, d05);
90ce3da70b43 Initial load duke parents: diff changeset	1637	d5 = vis_fpadd16(d5, d15);
90ce3da70b43 Initial load duke parents: diff changeset	1638	d5 = vis_fpadd16(d5, d25);
90ce3da70b43 Initial load duke parents: diff changeset	1639
90ce3da70b43 Initial load duke parents: diff changeset	1640	buffe[i ] = vis_fpack16_pair(d0, d1);
90ce3da70b43 Initial load duke parents: diff changeset	1641	buffe[i + 1] = vis_fpack16_pair(d2, d3);
90ce3da70b43 Initial load duke parents: diff changeset	1642	buffe[i + 2] = vis_fpack16_pair(d4, d5);
90ce3da70b43 Initial load duke parents: diff changeset	1643
90ce3da70b43 Initial load duke parents: diff changeset	1644	buffd[2*i ] = drnd;
90ce3da70b43 Initial load duke parents: diff changeset	1645	buffd[2*i + 1] = drnd;
90ce3da70b43 Initial load duke parents: diff changeset	1646	buffd[2*i + 2] = drnd;
90ce3da70b43 Initial load duke parents: diff changeset	1647	buffd[2*i + 3] = drnd;
90ce3da70b43 Initial load duke parents: diff changeset	1648	buffd[2*i + 4] = drnd;
90ce3da70b43 Initial load duke parents: diff changeset	1649	buffd[2*i + 5] = drnd;
90ce3da70b43 Initial load duke parents: diff changeset	1650
90ce3da70b43 Initial load duke parents: diff changeset	1651	LOAD_SRC();
90ce3da70b43 Initial load duke parents: diff changeset	1652	}
90ce3da70b43 Initial load duke parents: diff changeset	1653	}
90ce3da70b43 Initial load duke parents: diff changeset	1654	#endif /* CONV_INDEX */
90ce3da70b43 Initial load duke parents: diff changeset	1655
90ce3da70b43 Initial load duke parents: diff changeset	1656	#ifdef CONV_INDEX
90ce3da70b43 Initial load duke parents: diff changeset	1657	mlib_ImageColorTrue2IndexLine_U8_S16_3((void*)buffe, dl, wid, colormap);
90ce3da70b43 Initial load duke parents: diff changeset	1658	#endif /* CONV_INDEX */
90ce3da70b43 Initial load duke parents: diff changeset	1659
90ce3da70b43 Initial load duke parents: diff changeset	1660	sl += sll;
90ce3da70b43 Initial load duke parents: diff changeset	1661	dl += dll;
90ce3da70b43 Initial load duke parents: diff changeset	1662
90ce3da70b43 Initial load duke parents: diff changeset	1663	buff_ind++;
90ce3da70b43 Initial load duke parents: diff changeset	1664	if (buff_ind >= (n + 1)) buff_ind = 0;
90ce3da70b43 Initial load duke parents: diff changeset	1665	}
90ce3da70b43 Initial load duke parents: diff changeset	1666
90ce3da70b43 Initial load duke parents: diff changeset	1667	mlib_free(pbuff);
90ce3da70b43 Initial load duke parents: diff changeset	1668	if (buffs != buffs_local) mlib_free(buffs);
90ce3da70b43 Initial load duke parents: diff changeset	1669
90ce3da70b43 Initial load duke parents: diff changeset	1670	return MLIB_SUCCESS;
90ce3da70b43 Initial load duke parents: diff changeset	1671	}
90ce3da70b43 Initial load duke parents: diff changeset	1672
90ce3da70b43 Initial load duke parents: diff changeset	1673	/***************************************************************/

author	duke
	Sat, 01 Dec 2007 00:00:00 +0000
changeset 2	90ce3da70b43
child 5506	202f599c92aa
permissions	-rw-r--r--