jdk-sandbox: jdk/src/share/native/sun/awt/libpng/pngvcrd.c@90ce3da70b43 (annotated)

2 90ce3da70b43 Initial load duke parents: diff changeset	1	/*
90ce3da70b43 Initial load duke parents: diff changeset	2	* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
90ce3da70b43 Initial load duke parents: diff changeset	3	*
90ce3da70b43 Initial load duke parents: diff changeset	4	* This code is free software; you can redistribute it and/or modify it
90ce3da70b43 Initial load duke parents: diff changeset	5	* under the terms of the GNU General Public License version 2 only, as
90ce3da70b43 Initial load duke parents: diff changeset	6	* published by the Free Software Foundation. Sun designates this
90ce3da70b43 Initial load duke parents: diff changeset	7	* particular file as subject to the "Classpath" exception as provided
90ce3da70b43 Initial load duke parents: diff changeset	8	* by Sun in the LICENSE file that accompanied this code.
90ce3da70b43 Initial load duke parents: diff changeset	9	*
90ce3da70b43 Initial load duke parents: diff changeset	10	* This code is distributed in the hope that it will be useful, but WITHOUT
90ce3da70b43 Initial load duke parents: diff changeset	11	* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
90ce3da70b43 Initial load duke parents: diff changeset	12	* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
90ce3da70b43 Initial load duke parents: diff changeset	13	* version 2 for more details (a copy is included in the LICENSE file that
90ce3da70b43 Initial load duke parents: diff changeset	14	* accompanied this code).
90ce3da70b43 Initial load duke parents: diff changeset	15	*
90ce3da70b43 Initial load duke parents: diff changeset	16	* You should have received a copy of the GNU General Public License version
90ce3da70b43 Initial load duke parents: diff changeset	17	* 2 along with this work; if not, write to the Free Software Foundation,
90ce3da70b43 Initial load duke parents: diff changeset	18	* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
90ce3da70b43 Initial load duke parents: diff changeset	19	*
90ce3da70b43 Initial load duke parents: diff changeset	20	* Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
90ce3da70b43 Initial load duke parents: diff changeset	21	* CA 95054 USA or visit www.sun.com if you need additional information or
90ce3da70b43 Initial load duke parents: diff changeset	22	* have any questions.
90ce3da70b43 Initial load duke parents: diff changeset	23	*/
90ce3da70b43 Initial load duke parents: diff changeset	24
90ce3da70b43 Initial load duke parents: diff changeset	25	/* pngvcrd.c - mixed C/assembler version of utilities to read a PNG file
90ce3da70b43 Initial load duke parents: diff changeset	26	*
90ce3da70b43 Initial load duke parents: diff changeset	27	* This file is available under and governed by the GNU General Public
90ce3da70b43 Initial load duke parents: diff changeset	28	* License version 2 only, as published by the Free Software Foundation.
90ce3da70b43 Initial load duke parents: diff changeset	29	* However, the following notice accompanied the original version of this
90ce3da70b43 Initial load duke parents: diff changeset	30	* file and, per its terms, should not be removed:
90ce3da70b43 Initial load duke parents: diff changeset	31	*
90ce3da70b43 Initial load duke parents: diff changeset	32	* For Intel x86 CPU and Microsoft Visual C++ compiler
90ce3da70b43 Initial load duke parents: diff changeset	33	*
90ce3da70b43 Initial load duke parents: diff changeset	34	* Last changed in libpng 1.2.6 - August 15, 2004
90ce3da70b43 Initial load duke parents: diff changeset	35	* For conditions of distribution and use, see copyright notice in png.h
90ce3da70b43 Initial load duke parents: diff changeset	36	* Copyright (c) 1998-2004 Glenn Randers-Pehrson
90ce3da70b43 Initial load duke parents: diff changeset	37	* Copyright (c) 1998, Intel Corporation
90ce3da70b43 Initial load duke parents: diff changeset	38	*
90ce3da70b43 Initial load duke parents: diff changeset	39	* Contributed by Nirav Chhatrapati, Intel Corporation, 1998
90ce3da70b43 Initial load duke parents: diff changeset	40	* Interface to libpng contributed by Gilles Vollant, 1999
90ce3da70b43 Initial load duke parents: diff changeset	41	*
90ce3da70b43 Initial load duke parents: diff changeset	42	*
90ce3da70b43 Initial load duke parents: diff changeset	43	* In png_do_read_interlace() in libpng versions 1.0.3a through 1.0.4d,
90ce3da70b43 Initial load duke parents: diff changeset	44	* a sign error in the post-MMX cleanup code for each pixel_depth resulted
90ce3da70b43 Initial load duke parents: diff changeset	45	* in bad pixels at the beginning of some rows of some images, and also
90ce3da70b43 Initial load duke parents: diff changeset	46	* (due to out-of-range memory reads and writes) caused heap corruption
90ce3da70b43 Initial load duke parents: diff changeset	47	* when compiled with MSVC 6.0. The error was fixed in version 1.0.4e.
90ce3da70b43 Initial load duke parents: diff changeset	48	*
90ce3da70b43 Initial load duke parents: diff changeset	49	* [png_read_filter_row_mmx_avg() bpp == 2 bugfix, GRR 20000916]
90ce3da70b43 Initial load duke parents: diff changeset	50	*
90ce3da70b43 Initial load duke parents: diff changeset	51	* [runtime MMX configuration, GRR 20010102]
90ce3da70b43 Initial load duke parents: diff changeset	52	*
90ce3da70b43 Initial load duke parents: diff changeset	53	*/
90ce3da70b43 Initial load duke parents: diff changeset	54
90ce3da70b43 Initial load duke parents: diff changeset	55	#define PNG_INTERNAL
90ce3da70b43 Initial load duke parents: diff changeset	56	#include "png.h"
90ce3da70b43 Initial load duke parents: diff changeset	57
90ce3da70b43 Initial load duke parents: diff changeset	58	#if defined(PNG_ASSEMBLER_CODE_SUPPORTED) && defined(PNG_USE_PNGVCRD)
90ce3da70b43 Initial load duke parents: diff changeset	59
90ce3da70b43 Initial load duke parents: diff changeset	60	static int mmx_supported=2;
90ce3da70b43 Initial load duke parents: diff changeset	61
90ce3da70b43 Initial load duke parents: diff changeset	62
90ce3da70b43 Initial load duke parents: diff changeset	63	int PNGAPI
90ce3da70b43 Initial load duke parents: diff changeset	64	png_mmx_support(void)
90ce3da70b43 Initial load duke parents: diff changeset	65	{
90ce3da70b43 Initial load duke parents: diff changeset	66	int mmx_supported_local = 0;
90ce3da70b43 Initial load duke parents: diff changeset	67	_asm {
90ce3da70b43 Initial load duke parents: diff changeset	68	push ebx //CPUID will trash these
90ce3da70b43 Initial load duke parents: diff changeset	69	push ecx
90ce3da70b43 Initial load duke parents: diff changeset	70	push edx
90ce3da70b43 Initial load duke parents: diff changeset	71
90ce3da70b43 Initial load duke parents: diff changeset	72	pushfd //Save Eflag to stack
90ce3da70b43 Initial load duke parents: diff changeset	73	pop eax //Get Eflag from stack into eax
90ce3da70b43 Initial load duke parents: diff changeset	74	mov ecx, eax //Make another copy of Eflag in ecx
90ce3da70b43 Initial load duke parents: diff changeset	75	xor eax, 0x200000 //Toggle ID bit in Eflag [i.e. bit(21)]
90ce3da70b43 Initial load duke parents: diff changeset	76	push eax //Save modified Eflag back to stack
90ce3da70b43 Initial load duke parents: diff changeset	77
90ce3da70b43 Initial load duke parents: diff changeset	78	popfd //Restored modified value back to Eflag reg
90ce3da70b43 Initial load duke parents: diff changeset	79	pushfd //Save Eflag to stack
90ce3da70b43 Initial load duke parents: diff changeset	80	pop eax //Get Eflag from stack
90ce3da70b43 Initial load duke parents: diff changeset	81	push ecx // save original Eflag to stack
90ce3da70b43 Initial load duke parents: diff changeset	82	popfd // restore original Eflag
90ce3da70b43 Initial load duke parents: diff changeset	83	xor eax, ecx //Compare the new Eflag with the original Eflag
90ce3da70b43 Initial load duke parents: diff changeset	84	jz NOT_SUPPORTED //If the same, CPUID instruction is not supported,
90ce3da70b43 Initial load duke parents: diff changeset	85	//skip following instructions and jump to
90ce3da70b43 Initial load duke parents: diff changeset	86	//NOT_SUPPORTED label
90ce3da70b43 Initial load duke parents: diff changeset	87
90ce3da70b43 Initial load duke parents: diff changeset	88	xor eax, eax //Set eax to zero
90ce3da70b43 Initial load duke parents: diff changeset	89
90ce3da70b43 Initial load duke parents: diff changeset	90	_asm _emit 0x0f //CPUID instruction (two bytes opcode)
90ce3da70b43 Initial load duke parents: diff changeset	91	_asm _emit 0xa2
90ce3da70b43 Initial load duke parents: diff changeset	92
90ce3da70b43 Initial load duke parents: diff changeset	93	cmp eax, 1 //make sure eax return non-zero value
90ce3da70b43 Initial load duke parents: diff changeset	94	jl NOT_SUPPORTED //If eax is zero, mmx not supported
90ce3da70b43 Initial load duke parents: diff changeset	95
90ce3da70b43 Initial load duke parents: diff changeset	96	xor eax, eax //set eax to zero
90ce3da70b43 Initial load duke parents: diff changeset	97	inc eax //Now increment eax to 1. This instruction is
90ce3da70b43 Initial load duke parents: diff changeset	98	//faster than the instruction "mov eax, 1"
90ce3da70b43 Initial load duke parents: diff changeset	99
90ce3da70b43 Initial load duke parents: diff changeset	100	_asm _emit 0x0f //CPUID instruction
90ce3da70b43 Initial load duke parents: diff changeset	101	_asm _emit 0xa2
90ce3da70b43 Initial load duke parents: diff changeset	102
90ce3da70b43 Initial load duke parents: diff changeset	103	and edx, 0x00800000 //mask out all bits but mmx bit(24)
90ce3da70b43 Initial load duke parents: diff changeset	104	cmp edx, 0 // 0 = mmx not supported
90ce3da70b43 Initial load duke parents: diff changeset	105	jz NOT_SUPPORTED // non-zero = Yes, mmx IS supported
90ce3da70b43 Initial load duke parents: diff changeset	106
90ce3da70b43 Initial load duke parents: diff changeset	107	mov mmx_supported_local, 1 //set return value to 1
90ce3da70b43 Initial load duke parents: diff changeset	108
90ce3da70b43 Initial load duke parents: diff changeset	109	NOT_SUPPORTED:
90ce3da70b43 Initial load duke parents: diff changeset	110	mov eax, mmx_supported_local //move return value to eax
90ce3da70b43 Initial load duke parents: diff changeset	111	pop edx //CPUID trashed these
90ce3da70b43 Initial load duke parents: diff changeset	112	pop ecx
90ce3da70b43 Initial load duke parents: diff changeset	113	pop ebx
90ce3da70b43 Initial load duke parents: diff changeset	114	}
90ce3da70b43 Initial load duke parents: diff changeset	115
90ce3da70b43 Initial load duke parents: diff changeset	116	//mmx_supported_local=0; // test code for force don't support MMX
90ce3da70b43 Initial load duke parents: diff changeset	117	//printf("MMX : %u (1=MMX supported)\n",mmx_supported_local);
90ce3da70b43 Initial load duke parents: diff changeset	118
90ce3da70b43 Initial load duke parents: diff changeset	119	mmx_supported = mmx_supported_local;
90ce3da70b43 Initial load duke parents: diff changeset	120	return mmx_supported_local;
90ce3da70b43 Initial load duke parents: diff changeset	121	}
90ce3da70b43 Initial load duke parents: diff changeset	122
90ce3da70b43 Initial load duke parents: diff changeset	123	/* Combines the row recently read in with the previous row.
90ce3da70b43 Initial load duke parents: diff changeset	124	This routine takes care of alpha and transparency if requested.
90ce3da70b43 Initial load duke parents: diff changeset	125	This routine also handles the two methods of progressive display
90ce3da70b43 Initial load duke parents: diff changeset	126	of interlaced images, depending on the mask value.
90ce3da70b43 Initial load duke parents: diff changeset	127	The mask value describes which pixels are to be combined with
90ce3da70b43 Initial load duke parents: diff changeset	128	the row. The pattern always repeats every 8 pixels, so just 8
90ce3da70b43 Initial load duke parents: diff changeset	129	bits are needed. A one indicates the pixel is to be combined; a
90ce3da70b43 Initial load duke parents: diff changeset	130	zero indicates the pixel is to be skipped. This is in addition
90ce3da70b43 Initial load duke parents: diff changeset	131	to any alpha or transparency value associated with the pixel. If
90ce3da70b43 Initial load duke parents: diff changeset	132	you want all pixels to be combined, pass 0xff (255) in mask. */
90ce3da70b43 Initial load duke parents: diff changeset	133
90ce3da70b43 Initial load duke parents: diff changeset	134	/* Use this routine for x86 platform - uses faster MMX routine if machine
90ce3da70b43 Initial load duke parents: diff changeset	135	supports MMX */
90ce3da70b43 Initial load duke parents: diff changeset	136
90ce3da70b43 Initial load duke parents: diff changeset	137	void /* PRIVATE */
90ce3da70b43 Initial load duke parents: diff changeset	138	png_combine_row(png_structp png_ptr, png_bytep row, int mask)
90ce3da70b43 Initial load duke parents: diff changeset	139	{
90ce3da70b43 Initial load duke parents: diff changeset	140	#ifdef PNG_USE_LOCAL_ARRAYS
90ce3da70b43 Initial load duke parents: diff changeset	141	const int png_pass_inc[7] = {8, 8, 4, 4, 2, 2, 1};
90ce3da70b43 Initial load duke parents: diff changeset	142	#endif
90ce3da70b43 Initial load duke parents: diff changeset	143
90ce3da70b43 Initial load duke parents: diff changeset	144	png_debug(1,"in png_combine_row_asm\n");
90ce3da70b43 Initial load duke parents: diff changeset	145
90ce3da70b43 Initial load duke parents: diff changeset	146	if (mmx_supported == 2) {
90ce3da70b43 Initial load duke parents: diff changeset	147	#if !defined(PNG_1_0_X)
90ce3da70b43 Initial load duke parents: diff changeset	148	/* this should have happened in png_init_mmx_flags() already */
90ce3da70b43 Initial load duke parents: diff changeset	149	png_warning(png_ptr, "asm_flags may not have been initialized");
90ce3da70b43 Initial load duke parents: diff changeset	150	#endif
90ce3da70b43 Initial load duke parents: diff changeset	151	png_mmx_support();
90ce3da70b43 Initial load duke parents: diff changeset	152	}
90ce3da70b43 Initial load duke parents: diff changeset	153
90ce3da70b43 Initial load duke parents: diff changeset	154	if (mask == 0xff)
90ce3da70b43 Initial load duke parents: diff changeset	155	{
90ce3da70b43 Initial load duke parents: diff changeset	156	png_memcpy(row, png_ptr->row_buf + 1,
90ce3da70b43 Initial load duke parents: diff changeset	157	(png_size_t)PNG_ROWBYTES(png_ptr->row_info.pixel_depth,
90ce3da70b43 Initial load duke parents: diff changeset	158	png_ptr->width));
90ce3da70b43 Initial load duke parents: diff changeset	159	}
90ce3da70b43 Initial load duke parents: diff changeset	160	/* GRR: add "else if (mask == 0)" case?
90ce3da70b43 Initial load duke parents: diff changeset	161	* or does png_combine_row() not even get called in that case? */
90ce3da70b43 Initial load duke parents: diff changeset	162	else
90ce3da70b43 Initial load duke parents: diff changeset	163	{
90ce3da70b43 Initial load duke parents: diff changeset	164	switch (png_ptr->row_info.pixel_depth)
90ce3da70b43 Initial load duke parents: diff changeset	165	{
90ce3da70b43 Initial load duke parents: diff changeset	166	case 1:
90ce3da70b43 Initial load duke parents: diff changeset	167	{
90ce3da70b43 Initial load duke parents: diff changeset	168	png_bytep sp;
90ce3da70b43 Initial load duke parents: diff changeset	169	png_bytep dp;
90ce3da70b43 Initial load duke parents: diff changeset	170	int s_inc, s_start, s_end;
90ce3da70b43 Initial load duke parents: diff changeset	171	int m;
90ce3da70b43 Initial load duke parents: diff changeset	172	int shift;
90ce3da70b43 Initial load duke parents: diff changeset	173	png_uint_32 i;
90ce3da70b43 Initial load duke parents: diff changeset	174
90ce3da70b43 Initial load duke parents: diff changeset	175	sp = png_ptr->row_buf + 1;
90ce3da70b43 Initial load duke parents: diff changeset	176	dp = row;
90ce3da70b43 Initial load duke parents: diff changeset	177	m = 0x80;
90ce3da70b43 Initial load duke parents: diff changeset	178	#if defined(PNG_READ_PACKSWAP_SUPPORTED)
90ce3da70b43 Initial load duke parents: diff changeset	179	if (png_ptr->transformations & PNG_PACKSWAP)
90ce3da70b43 Initial load duke parents: diff changeset	180	{
90ce3da70b43 Initial load duke parents: diff changeset	181	s_start = 0;
90ce3da70b43 Initial load duke parents: diff changeset	182	s_end = 7;
90ce3da70b43 Initial load duke parents: diff changeset	183	s_inc = 1;
90ce3da70b43 Initial load duke parents: diff changeset	184	}
90ce3da70b43 Initial load duke parents: diff changeset	185	else
90ce3da70b43 Initial load duke parents: diff changeset	186	#endif
90ce3da70b43 Initial load duke parents: diff changeset	187	{
90ce3da70b43 Initial load duke parents: diff changeset	188	s_start = 7;
90ce3da70b43 Initial load duke parents: diff changeset	189	s_end = 0;
90ce3da70b43 Initial load duke parents: diff changeset	190	s_inc = -1;
90ce3da70b43 Initial load duke parents: diff changeset	191	}
90ce3da70b43 Initial load duke parents: diff changeset	192
90ce3da70b43 Initial load duke parents: diff changeset	193	shift = s_start;
90ce3da70b43 Initial load duke parents: diff changeset	194
90ce3da70b43 Initial load duke parents: diff changeset	195	for (i = 0; i < png_ptr->width; i++)
90ce3da70b43 Initial load duke parents: diff changeset	196	{
90ce3da70b43 Initial load duke parents: diff changeset	197	if (m & mask)
90ce3da70b43 Initial load duke parents: diff changeset	198	{
90ce3da70b43 Initial load duke parents: diff changeset	199	int value;
90ce3da70b43 Initial load duke parents: diff changeset	200
90ce3da70b43 Initial load duke parents: diff changeset	201	value = (*sp >> shift) & 0x1;
90ce3da70b43 Initial load duke parents: diff changeset	202	*dp &= (png_byte)((0x7f7f >> (7 - shift)) & 0xff);
90ce3da70b43 Initial load duke parents: diff changeset	203	*dp \|= (png_byte)(value << shift);
90ce3da70b43 Initial load duke parents: diff changeset	204	}
90ce3da70b43 Initial load duke parents: diff changeset	205
90ce3da70b43 Initial load duke parents: diff changeset	206	if (shift == s_end)
90ce3da70b43 Initial load duke parents: diff changeset	207	{
90ce3da70b43 Initial load duke parents: diff changeset	208	shift = s_start;
90ce3da70b43 Initial load duke parents: diff changeset	209	sp++;
90ce3da70b43 Initial load duke parents: diff changeset	210	dp++;
90ce3da70b43 Initial load duke parents: diff changeset	211	}
90ce3da70b43 Initial load duke parents: diff changeset	212	else
90ce3da70b43 Initial load duke parents: diff changeset	213	shift += s_inc;
90ce3da70b43 Initial load duke parents: diff changeset	214
90ce3da70b43 Initial load duke parents: diff changeset	215	if (m == 1)
90ce3da70b43 Initial load duke parents: diff changeset	216	m = 0x80;
90ce3da70b43 Initial load duke parents: diff changeset	217	else
90ce3da70b43 Initial load duke parents: diff changeset	218	m >>= 1;
90ce3da70b43 Initial load duke parents: diff changeset	219	}
90ce3da70b43 Initial load duke parents: diff changeset	220	break;
90ce3da70b43 Initial load duke parents: diff changeset	221	}
90ce3da70b43 Initial load duke parents: diff changeset	222
90ce3da70b43 Initial load duke parents: diff changeset	223	case 2:
90ce3da70b43 Initial load duke parents: diff changeset	224	{
90ce3da70b43 Initial load duke parents: diff changeset	225	png_bytep sp;
90ce3da70b43 Initial load duke parents: diff changeset	226	png_bytep dp;
90ce3da70b43 Initial load duke parents: diff changeset	227	int s_start, s_end, s_inc;
90ce3da70b43 Initial load duke parents: diff changeset	228	int m;
90ce3da70b43 Initial load duke parents: diff changeset	229	int shift;
90ce3da70b43 Initial load duke parents: diff changeset	230	png_uint_32 i;
90ce3da70b43 Initial load duke parents: diff changeset	231	int value;
90ce3da70b43 Initial load duke parents: diff changeset	232
90ce3da70b43 Initial load duke parents: diff changeset	233	sp = png_ptr->row_buf + 1;
90ce3da70b43 Initial load duke parents: diff changeset	234	dp = row;
90ce3da70b43 Initial load duke parents: diff changeset	235	m = 0x80;
90ce3da70b43 Initial load duke parents: diff changeset	236	#if defined(PNG_READ_PACKSWAP_SUPPORTED)
90ce3da70b43 Initial load duke parents: diff changeset	237	if (png_ptr->transformations & PNG_PACKSWAP)
90ce3da70b43 Initial load duke parents: diff changeset	238	{
90ce3da70b43 Initial load duke parents: diff changeset	239	s_start = 0;
90ce3da70b43 Initial load duke parents: diff changeset	240	s_end = 6;
90ce3da70b43 Initial load duke parents: diff changeset	241	s_inc = 2;
90ce3da70b43 Initial load duke parents: diff changeset	242	}
90ce3da70b43 Initial load duke parents: diff changeset	243	else
90ce3da70b43 Initial load duke parents: diff changeset	244	#endif
90ce3da70b43 Initial load duke parents: diff changeset	245	{
90ce3da70b43 Initial load duke parents: diff changeset	246	s_start = 6;
90ce3da70b43 Initial load duke parents: diff changeset	247	s_end = 0;
90ce3da70b43 Initial load duke parents: diff changeset	248	s_inc = -2;
90ce3da70b43 Initial load duke parents: diff changeset	249	}
90ce3da70b43 Initial load duke parents: diff changeset	250
90ce3da70b43 Initial load duke parents: diff changeset	251	shift = s_start;
90ce3da70b43 Initial load duke parents: diff changeset	252
90ce3da70b43 Initial load duke parents: diff changeset	253	for (i = 0; i < png_ptr->width; i++)
90ce3da70b43 Initial load duke parents: diff changeset	254	{
90ce3da70b43 Initial load duke parents: diff changeset	255	if (m & mask)
90ce3da70b43 Initial load duke parents: diff changeset	256	{
90ce3da70b43 Initial load duke parents: diff changeset	257	value = (*sp >> shift) & 0x3;
90ce3da70b43 Initial load duke parents: diff changeset	258	*dp &= (png_byte)((0x3f3f >> (6 - shift)) & 0xff);
90ce3da70b43 Initial load duke parents: diff changeset	259	*dp \|= (png_byte)(value << shift);
90ce3da70b43 Initial load duke parents: diff changeset	260	}
90ce3da70b43 Initial load duke parents: diff changeset	261
90ce3da70b43 Initial load duke parents: diff changeset	262	if (shift == s_end)
90ce3da70b43 Initial load duke parents: diff changeset	263	{
90ce3da70b43 Initial load duke parents: diff changeset	264	shift = s_start;
90ce3da70b43 Initial load duke parents: diff changeset	265	sp++;
90ce3da70b43 Initial load duke parents: diff changeset	266	dp++;
90ce3da70b43 Initial load duke parents: diff changeset	267	}
90ce3da70b43 Initial load duke parents: diff changeset	268	else
90ce3da70b43 Initial load duke parents: diff changeset	269	shift += s_inc;
90ce3da70b43 Initial load duke parents: diff changeset	270	if (m == 1)
90ce3da70b43 Initial load duke parents: diff changeset	271	m = 0x80;
90ce3da70b43 Initial load duke parents: diff changeset	272	else
90ce3da70b43 Initial load duke parents: diff changeset	273	m >>= 1;
90ce3da70b43 Initial load duke parents: diff changeset	274	}
90ce3da70b43 Initial load duke parents: diff changeset	275	break;
90ce3da70b43 Initial load duke parents: diff changeset	276	}
90ce3da70b43 Initial load duke parents: diff changeset	277
90ce3da70b43 Initial load duke parents: diff changeset	278	case 4:
90ce3da70b43 Initial load duke parents: diff changeset	279	{
90ce3da70b43 Initial load duke parents: diff changeset	280	png_bytep sp;
90ce3da70b43 Initial load duke parents: diff changeset	281	png_bytep dp;
90ce3da70b43 Initial load duke parents: diff changeset	282	int s_start, s_end, s_inc;
90ce3da70b43 Initial load duke parents: diff changeset	283	int m;
90ce3da70b43 Initial load duke parents: diff changeset	284	int shift;
90ce3da70b43 Initial load duke parents: diff changeset	285	png_uint_32 i;
90ce3da70b43 Initial load duke parents: diff changeset	286	int value;
90ce3da70b43 Initial load duke parents: diff changeset	287
90ce3da70b43 Initial load duke parents: diff changeset	288	sp = png_ptr->row_buf + 1;
90ce3da70b43 Initial load duke parents: diff changeset	289	dp = row;
90ce3da70b43 Initial load duke parents: diff changeset	290	m = 0x80;
90ce3da70b43 Initial load duke parents: diff changeset	291	#if defined(PNG_READ_PACKSWAP_SUPPORTED)
90ce3da70b43 Initial load duke parents: diff changeset	292	if (png_ptr->transformations & PNG_PACKSWAP)
90ce3da70b43 Initial load duke parents: diff changeset	293	{
90ce3da70b43 Initial load duke parents: diff changeset	294	s_start = 0;
90ce3da70b43 Initial load duke parents: diff changeset	295	s_end = 4;
90ce3da70b43 Initial load duke parents: diff changeset	296	s_inc = 4;
90ce3da70b43 Initial load duke parents: diff changeset	297	}
90ce3da70b43 Initial load duke parents: diff changeset	298	else
90ce3da70b43 Initial load duke parents: diff changeset	299	#endif
90ce3da70b43 Initial load duke parents: diff changeset	300	{
90ce3da70b43 Initial load duke parents: diff changeset	301	s_start = 4;
90ce3da70b43 Initial load duke parents: diff changeset	302	s_end = 0;
90ce3da70b43 Initial load duke parents: diff changeset	303	s_inc = -4;
90ce3da70b43 Initial load duke parents: diff changeset	304	}
90ce3da70b43 Initial load duke parents: diff changeset	305	shift = s_start;
90ce3da70b43 Initial load duke parents: diff changeset	306
90ce3da70b43 Initial load duke parents: diff changeset	307	for (i = 0; i < png_ptr->width; i++)
90ce3da70b43 Initial load duke parents: diff changeset	308	{
90ce3da70b43 Initial load duke parents: diff changeset	309	if (m & mask)
90ce3da70b43 Initial load duke parents: diff changeset	310	{
90ce3da70b43 Initial load duke parents: diff changeset	311	value = (*sp >> shift) & 0xf;
90ce3da70b43 Initial load duke parents: diff changeset	312	*dp &= (png_byte)((0xf0f >> (4 - shift)) & 0xff);
90ce3da70b43 Initial load duke parents: diff changeset	313	*dp \|= (png_byte)(value << shift);
90ce3da70b43 Initial load duke parents: diff changeset	314	}
90ce3da70b43 Initial load duke parents: diff changeset	315
90ce3da70b43 Initial load duke parents: diff changeset	316	if (shift == s_end)
90ce3da70b43 Initial load duke parents: diff changeset	317	{
90ce3da70b43 Initial load duke parents: diff changeset	318	shift = s_start;
90ce3da70b43 Initial load duke parents: diff changeset	319	sp++;
90ce3da70b43 Initial load duke parents: diff changeset	320	dp++;
90ce3da70b43 Initial load duke parents: diff changeset	321	}
90ce3da70b43 Initial load duke parents: diff changeset	322	else
90ce3da70b43 Initial load duke parents: diff changeset	323	shift += s_inc;
90ce3da70b43 Initial load duke parents: diff changeset	324	if (m == 1)
90ce3da70b43 Initial load duke parents: diff changeset	325	m = 0x80;
90ce3da70b43 Initial load duke parents: diff changeset	326	else
90ce3da70b43 Initial load duke parents: diff changeset	327	m >>= 1;
90ce3da70b43 Initial load duke parents: diff changeset	328	}
90ce3da70b43 Initial load duke parents: diff changeset	329	break;
90ce3da70b43 Initial load duke parents: diff changeset	330	}
90ce3da70b43 Initial load duke parents: diff changeset	331
90ce3da70b43 Initial load duke parents: diff changeset	332	case 8:
90ce3da70b43 Initial load duke parents: diff changeset	333	{
90ce3da70b43 Initial load duke parents: diff changeset	334	png_bytep srcptr;
90ce3da70b43 Initial load duke parents: diff changeset	335	png_bytep dstptr;
90ce3da70b43 Initial load duke parents: diff changeset	336	png_uint_32 len;
90ce3da70b43 Initial load duke parents: diff changeset	337	int m;
90ce3da70b43 Initial load duke parents: diff changeset	338	int diff, unmask;
90ce3da70b43 Initial load duke parents: diff changeset	339
90ce3da70b43 Initial load duke parents: diff changeset	340	__int64 mask0=0x0102040810204080;
90ce3da70b43 Initial load duke parents: diff changeset	341
90ce3da70b43 Initial load duke parents: diff changeset	342	#if !defined(PNG_1_0_X)
90ce3da70b43 Initial load duke parents: diff changeset	343	if ((png_ptr->asm_flags & PNG_ASM_FLAG_MMX_READ_COMBINE_ROW)
90ce3da70b43 Initial load duke parents: diff changeset	344	/* && mmx_supported */ )
90ce3da70b43 Initial load duke parents: diff changeset	345	#else
90ce3da70b43 Initial load duke parents: diff changeset	346	if (mmx_supported)
90ce3da70b43 Initial load duke parents: diff changeset	347	#endif
90ce3da70b43 Initial load duke parents: diff changeset	348	{
90ce3da70b43 Initial load duke parents: diff changeset	349	srcptr = png_ptr->row_buf + 1;
90ce3da70b43 Initial load duke parents: diff changeset	350	dstptr = row;
90ce3da70b43 Initial load duke parents: diff changeset	351	m = 0x80;
90ce3da70b43 Initial load duke parents: diff changeset	352	unmask = ~mask;
90ce3da70b43 Initial load duke parents: diff changeset	353	len = png_ptr->width &~7; //reduce to multiple of 8
90ce3da70b43 Initial load duke parents: diff changeset	354	diff = png_ptr->width & 7; //amount lost
90ce3da70b43 Initial load duke parents: diff changeset	355
90ce3da70b43 Initial load duke parents: diff changeset	356	_asm
90ce3da70b43 Initial load duke parents: diff changeset	357	{
90ce3da70b43 Initial load duke parents: diff changeset	358	movd mm7, unmask //load bit pattern
90ce3da70b43 Initial load duke parents: diff changeset	359	psubb mm6,mm6 //zero mm6
90ce3da70b43 Initial load duke parents: diff changeset	360	punpcklbw mm7,mm7
90ce3da70b43 Initial load duke parents: diff changeset	361	punpcklwd mm7,mm7
90ce3da70b43 Initial load duke parents: diff changeset	362	punpckldq mm7,mm7 //fill register with 8 masks
90ce3da70b43 Initial load duke parents: diff changeset	363
90ce3da70b43 Initial load duke parents: diff changeset	364	movq mm0,mask0
90ce3da70b43 Initial load duke parents: diff changeset	365
90ce3da70b43 Initial load duke parents: diff changeset	366	pand mm0,mm7 //nonzero if keep byte
90ce3da70b43 Initial load duke parents: diff changeset	367	pcmpeqb mm0,mm6 //zeros->1s, v versa
90ce3da70b43 Initial load duke parents: diff changeset	368
90ce3da70b43 Initial load duke parents: diff changeset	369	mov ecx,len //load length of line (pixels)
90ce3da70b43 Initial load duke parents: diff changeset	370	mov esi,srcptr //load source
90ce3da70b43 Initial load duke parents: diff changeset	371	mov ebx,dstptr //load dest
90ce3da70b43 Initial load duke parents: diff changeset	372	cmp ecx,0 //lcr
90ce3da70b43 Initial load duke parents: diff changeset	373	je mainloop8end
90ce3da70b43 Initial load duke parents: diff changeset	374
90ce3da70b43 Initial load duke parents: diff changeset	375	mainloop8:
90ce3da70b43 Initial load duke parents: diff changeset	376	movq mm4,[esi]
90ce3da70b43 Initial load duke parents: diff changeset	377	pand mm4,mm0
90ce3da70b43 Initial load duke parents: diff changeset	378	movq mm6,mm0
90ce3da70b43 Initial load duke parents: diff changeset	379	pandn mm6,[ebx]
90ce3da70b43 Initial load duke parents: diff changeset	380	por mm4,mm6
90ce3da70b43 Initial load duke parents: diff changeset	381	movq [ebx],mm4
90ce3da70b43 Initial load duke parents: diff changeset	382
90ce3da70b43 Initial load duke parents: diff changeset	383	add esi,8 //inc by 8 bytes processed
90ce3da70b43 Initial load duke parents: diff changeset	384	add ebx,8
90ce3da70b43 Initial load duke parents: diff changeset	385	sub ecx,8 //dec by 8 pixels processed
90ce3da70b43 Initial load duke parents: diff changeset	386
90ce3da70b43 Initial load duke parents: diff changeset	387	ja mainloop8
90ce3da70b43 Initial load duke parents: diff changeset	388	mainloop8end:
90ce3da70b43 Initial load duke parents: diff changeset	389
90ce3da70b43 Initial load duke parents: diff changeset	390	mov ecx,diff
90ce3da70b43 Initial load duke parents: diff changeset	391	cmp ecx,0
90ce3da70b43 Initial load duke parents: diff changeset	392	jz end8
90ce3da70b43 Initial load duke parents: diff changeset	393
90ce3da70b43 Initial load duke parents: diff changeset	394	mov edx,mask
90ce3da70b43 Initial load duke parents: diff changeset	395	sal edx,24 //make low byte the high byte
90ce3da70b43 Initial load duke parents: diff changeset	396
90ce3da70b43 Initial load duke parents: diff changeset	397	secondloop8:
90ce3da70b43 Initial load duke parents: diff changeset	398	sal edx,1 //move high bit to CF
90ce3da70b43 Initial load duke parents: diff changeset	399	jnc skip8 //if CF = 0
90ce3da70b43 Initial load duke parents: diff changeset	400	mov al,[esi]
90ce3da70b43 Initial load duke parents: diff changeset	401	mov [ebx],al
90ce3da70b43 Initial load duke parents: diff changeset	402	skip8:
90ce3da70b43 Initial load duke parents: diff changeset	403	inc esi
90ce3da70b43 Initial load duke parents: diff changeset	404	inc ebx
90ce3da70b43 Initial load duke parents: diff changeset	405
90ce3da70b43 Initial load duke parents: diff changeset	406	dec ecx
90ce3da70b43 Initial load duke parents: diff changeset	407	jnz secondloop8
90ce3da70b43 Initial load duke parents: diff changeset	408	end8:
90ce3da70b43 Initial load duke parents: diff changeset	409	emms
90ce3da70b43 Initial load duke parents: diff changeset	410	}
90ce3da70b43 Initial load duke parents: diff changeset	411	}
90ce3da70b43 Initial load duke parents: diff changeset	412	else /* mmx not supported - use modified C routine */
90ce3da70b43 Initial load duke parents: diff changeset	413	{
90ce3da70b43 Initial load duke parents: diff changeset	414	register unsigned int incr1, initial_val, final_val;
90ce3da70b43 Initial load duke parents: diff changeset	415	png_size_t pixel_bytes;
90ce3da70b43 Initial load duke parents: diff changeset	416	png_uint_32 i;
90ce3da70b43 Initial load duke parents: diff changeset	417	register int disp = png_pass_inc[png_ptr->pass];
90ce3da70b43 Initial load duke parents: diff changeset	418	int offset_table[7] = {0, 4, 0, 2, 0, 1, 0};
90ce3da70b43 Initial load duke parents: diff changeset	419
90ce3da70b43 Initial load duke parents: diff changeset	420	pixel_bytes = (png_ptr->row_info.pixel_depth >> 3);
90ce3da70b43 Initial load duke parents: diff changeset	421	srcptr = png_ptr->row_buf + 1 + offset_table[png_ptr->pass]*
90ce3da70b43 Initial load duke parents: diff changeset	422	pixel_bytes;
90ce3da70b43 Initial load duke parents: diff changeset	423	dstptr = row + offset_table[png_ptr->pass]*pixel_bytes;
90ce3da70b43 Initial load duke parents: diff changeset	424	initial_val = offset_table[png_ptr->pass]*pixel_bytes;
90ce3da70b43 Initial load duke parents: diff changeset	425	final_val = png_ptr->width*pixel_bytes;
90ce3da70b43 Initial load duke parents: diff changeset	426	incr1 = (disp)*pixel_bytes;
90ce3da70b43 Initial load duke parents: diff changeset	427	for (i = initial_val; i < final_val; i += incr1)
90ce3da70b43 Initial load duke parents: diff changeset	428	{
90ce3da70b43 Initial load duke parents: diff changeset	429	png_memcpy(dstptr, srcptr, pixel_bytes);
90ce3da70b43 Initial load duke parents: diff changeset	430	srcptr += incr1;
90ce3da70b43 Initial load duke parents: diff changeset	431	dstptr += incr1;
90ce3da70b43 Initial load duke parents: diff changeset	432	}
90ce3da70b43 Initial load duke parents: diff changeset	433	} /* end of else */
90ce3da70b43 Initial load duke parents: diff changeset	434
90ce3da70b43 Initial load duke parents: diff changeset	435	break;
90ce3da70b43 Initial load duke parents: diff changeset	436	} // end 8 bpp
90ce3da70b43 Initial load duke parents: diff changeset	437
90ce3da70b43 Initial load duke parents: diff changeset	438	case 16:
90ce3da70b43 Initial load duke parents: diff changeset	439	{
90ce3da70b43 Initial load duke parents: diff changeset	440	png_bytep srcptr;
90ce3da70b43 Initial load duke parents: diff changeset	441	png_bytep dstptr;
90ce3da70b43 Initial load duke parents: diff changeset	442	png_uint_32 len;
90ce3da70b43 Initial load duke parents: diff changeset	443	int unmask, diff;
90ce3da70b43 Initial load duke parents: diff changeset	444	__int64 mask1=0x0101020204040808,
90ce3da70b43 Initial load duke parents: diff changeset	445	mask0=0x1010202040408080;
90ce3da70b43 Initial load duke parents: diff changeset	446
90ce3da70b43 Initial load duke parents: diff changeset	447	#if !defined(PNG_1_0_X)
90ce3da70b43 Initial load duke parents: diff changeset	448	if ((png_ptr->asm_flags & PNG_ASM_FLAG_MMX_READ_COMBINE_ROW)
90ce3da70b43 Initial load duke parents: diff changeset	449	/* && mmx_supported */ )
90ce3da70b43 Initial load duke parents: diff changeset	450	#else
90ce3da70b43 Initial load duke parents: diff changeset	451	if (mmx_supported)
90ce3da70b43 Initial load duke parents: diff changeset	452	#endif
90ce3da70b43 Initial load duke parents: diff changeset	453	{
90ce3da70b43 Initial load duke parents: diff changeset	454	srcptr = png_ptr->row_buf + 1;
90ce3da70b43 Initial load duke parents: diff changeset	455	dstptr = row;
90ce3da70b43 Initial load duke parents: diff changeset	456
90ce3da70b43 Initial load duke parents: diff changeset	457	unmask = ~mask;
90ce3da70b43 Initial load duke parents: diff changeset	458	len = (png_ptr->width)&~7;
90ce3da70b43 Initial load duke parents: diff changeset	459	diff = (png_ptr->width)&7;
90ce3da70b43 Initial load duke parents: diff changeset	460	_asm
90ce3da70b43 Initial load duke parents: diff changeset	461	{
90ce3da70b43 Initial load duke parents: diff changeset	462	movd mm7, unmask //load bit pattern
90ce3da70b43 Initial load duke parents: diff changeset	463	psubb mm6,mm6 //zero mm6
90ce3da70b43 Initial load duke parents: diff changeset	464	punpcklbw mm7,mm7
90ce3da70b43 Initial load duke parents: diff changeset	465	punpcklwd mm7,mm7
90ce3da70b43 Initial load duke parents: diff changeset	466	punpckldq mm7,mm7 //fill register with 8 masks
90ce3da70b43 Initial load duke parents: diff changeset	467
90ce3da70b43 Initial load duke parents: diff changeset	468	movq mm0,mask0
90ce3da70b43 Initial load duke parents: diff changeset	469	movq mm1,mask1
90ce3da70b43 Initial load duke parents: diff changeset	470
90ce3da70b43 Initial load duke parents: diff changeset	471	pand mm0,mm7
90ce3da70b43 Initial load duke parents: diff changeset	472	pand mm1,mm7
90ce3da70b43 Initial load duke parents: diff changeset	473
90ce3da70b43 Initial load duke parents: diff changeset	474	pcmpeqb mm0,mm6
90ce3da70b43 Initial load duke parents: diff changeset	475	pcmpeqb mm1,mm6
90ce3da70b43 Initial load duke parents: diff changeset	476
90ce3da70b43 Initial load duke parents: diff changeset	477	mov ecx,len //load length of line
90ce3da70b43 Initial load duke parents: diff changeset	478	mov esi,srcptr //load source
90ce3da70b43 Initial load duke parents: diff changeset	479	mov ebx,dstptr //load dest
90ce3da70b43 Initial load duke parents: diff changeset	480	cmp ecx,0 //lcr
90ce3da70b43 Initial load duke parents: diff changeset	481	jz mainloop16end
90ce3da70b43 Initial load duke parents: diff changeset	482
90ce3da70b43 Initial load duke parents: diff changeset	483	mainloop16:
90ce3da70b43 Initial load duke parents: diff changeset	484	movq mm4,[esi]
90ce3da70b43 Initial load duke parents: diff changeset	485	pand mm4,mm0
90ce3da70b43 Initial load duke parents: diff changeset	486	movq mm6,mm0
90ce3da70b43 Initial load duke parents: diff changeset	487	movq mm7,[ebx]
90ce3da70b43 Initial load duke parents: diff changeset	488	pandn mm6,mm7
90ce3da70b43 Initial load duke parents: diff changeset	489	por mm4,mm6
90ce3da70b43 Initial load duke parents: diff changeset	490	movq [ebx],mm4
90ce3da70b43 Initial load duke parents: diff changeset	491
90ce3da70b43 Initial load duke parents: diff changeset	492	movq mm5,[esi+8]
90ce3da70b43 Initial load duke parents: diff changeset	493	pand mm5,mm1
90ce3da70b43 Initial load duke parents: diff changeset	494	movq mm7,mm1
90ce3da70b43 Initial load duke parents: diff changeset	495	movq mm6,[ebx+8]
90ce3da70b43 Initial load duke parents: diff changeset	496	pandn mm7,mm6
90ce3da70b43 Initial load duke parents: diff changeset	497	por mm5,mm7
90ce3da70b43 Initial load duke parents: diff changeset	498	movq [ebx+8],mm5
90ce3da70b43 Initial load duke parents: diff changeset	499
90ce3da70b43 Initial load duke parents: diff changeset	500	add esi,16 //inc by 16 bytes processed
90ce3da70b43 Initial load duke parents: diff changeset	501	add ebx,16
90ce3da70b43 Initial load duke parents: diff changeset	502	sub ecx,8 //dec by 8 pixels processed
90ce3da70b43 Initial load duke parents: diff changeset	503
90ce3da70b43 Initial load duke parents: diff changeset	504	ja mainloop16
90ce3da70b43 Initial load duke parents: diff changeset	505
90ce3da70b43 Initial load duke parents: diff changeset	506	mainloop16end:
90ce3da70b43 Initial load duke parents: diff changeset	507	mov ecx,diff
90ce3da70b43 Initial load duke parents: diff changeset	508	cmp ecx,0
90ce3da70b43 Initial load duke parents: diff changeset	509	jz end16
90ce3da70b43 Initial load duke parents: diff changeset	510
90ce3da70b43 Initial load duke parents: diff changeset	511	mov edx,mask
90ce3da70b43 Initial load duke parents: diff changeset	512	sal edx,24 //make low byte the high byte
90ce3da70b43 Initial load duke parents: diff changeset	513	secondloop16:
90ce3da70b43 Initial load duke parents: diff changeset	514	sal edx,1 //move high bit to CF
90ce3da70b43 Initial load duke parents: diff changeset	515	jnc skip16 //if CF = 0
90ce3da70b43 Initial load duke parents: diff changeset	516	mov ax,[esi]
90ce3da70b43 Initial load duke parents: diff changeset	517	mov [ebx],ax
90ce3da70b43 Initial load duke parents: diff changeset	518	skip16:
90ce3da70b43 Initial load duke parents: diff changeset	519	add esi,2
90ce3da70b43 Initial load duke parents: diff changeset	520	add ebx,2
90ce3da70b43 Initial load duke parents: diff changeset	521
90ce3da70b43 Initial load duke parents: diff changeset	522	dec ecx
90ce3da70b43 Initial load duke parents: diff changeset	523	jnz secondloop16
90ce3da70b43 Initial load duke parents: diff changeset	524	end16:
90ce3da70b43 Initial load duke parents: diff changeset	525	emms
90ce3da70b43 Initial load duke parents: diff changeset	526	}
90ce3da70b43 Initial load duke parents: diff changeset	527	}
90ce3da70b43 Initial load duke parents: diff changeset	528	else /* mmx not supported - use modified C routine */
90ce3da70b43 Initial load duke parents: diff changeset	529	{
90ce3da70b43 Initial load duke parents: diff changeset	530	register unsigned int incr1, initial_val, final_val;
90ce3da70b43 Initial load duke parents: diff changeset	531	png_size_t pixel_bytes;
90ce3da70b43 Initial load duke parents: diff changeset	532	png_uint_32 i;
90ce3da70b43 Initial load duke parents: diff changeset	533	register int disp = png_pass_inc[png_ptr->pass];
90ce3da70b43 Initial load duke parents: diff changeset	534	int offset_table[7] = {0, 4, 0, 2, 0, 1, 0};
90ce3da70b43 Initial load duke parents: diff changeset	535
90ce3da70b43 Initial load duke parents: diff changeset	536	pixel_bytes = (png_ptr->row_info.pixel_depth >> 3);
90ce3da70b43 Initial load duke parents: diff changeset	537	srcptr = png_ptr->row_buf + 1 + offset_table[png_ptr->pass]*
90ce3da70b43 Initial load duke parents: diff changeset	538	pixel_bytes;
90ce3da70b43 Initial load duke parents: diff changeset	539	dstptr = row + offset_table[png_ptr->pass]*pixel_bytes;
90ce3da70b43 Initial load duke parents: diff changeset	540	initial_val = offset_table[png_ptr->pass]*pixel_bytes;
90ce3da70b43 Initial load duke parents: diff changeset	541	final_val = png_ptr->width*pixel_bytes;
90ce3da70b43 Initial load duke parents: diff changeset	542	incr1 = (disp)*pixel_bytes;
90ce3da70b43 Initial load duke parents: diff changeset	543	for (i = initial_val; i < final_val; i += incr1)
90ce3da70b43 Initial load duke parents: diff changeset	544	{
90ce3da70b43 Initial load duke parents: diff changeset	545	png_memcpy(dstptr, srcptr, pixel_bytes);
90ce3da70b43 Initial load duke parents: diff changeset	546	srcptr += incr1;
90ce3da70b43 Initial load duke parents: diff changeset	547	dstptr += incr1;
90ce3da70b43 Initial load duke parents: diff changeset	548	}
90ce3da70b43 Initial load duke parents: diff changeset	549	} /* end of else */
90ce3da70b43 Initial load duke parents: diff changeset	550
90ce3da70b43 Initial load duke parents: diff changeset	551	break;
90ce3da70b43 Initial load duke parents: diff changeset	552	} // end 16 bpp
90ce3da70b43 Initial load duke parents: diff changeset	553
90ce3da70b43 Initial load duke parents: diff changeset	554	case 24:
90ce3da70b43 Initial load duke parents: diff changeset	555	{
90ce3da70b43 Initial load duke parents: diff changeset	556	png_bytep srcptr;
90ce3da70b43 Initial load duke parents: diff changeset	557	png_bytep dstptr;
90ce3da70b43 Initial load duke parents: diff changeset	558	png_uint_32 len;
90ce3da70b43 Initial load duke parents: diff changeset	559	int unmask, diff;
90ce3da70b43 Initial load duke parents: diff changeset	560
90ce3da70b43 Initial load duke parents: diff changeset	561	__int64 mask2=0x0101010202020404, //24bpp
90ce3da70b43 Initial load duke parents: diff changeset	562	mask1=0x0408080810101020,
90ce3da70b43 Initial load duke parents: diff changeset	563	mask0=0x2020404040808080;
90ce3da70b43 Initial load duke parents: diff changeset	564
90ce3da70b43 Initial load duke parents: diff changeset	565	srcptr = png_ptr->row_buf + 1;
90ce3da70b43 Initial load duke parents: diff changeset	566	dstptr = row;
90ce3da70b43 Initial load duke parents: diff changeset	567
90ce3da70b43 Initial load duke parents: diff changeset	568	unmask = ~mask;
90ce3da70b43 Initial load duke parents: diff changeset	569	len = (png_ptr->width)&~7;
90ce3da70b43 Initial load duke parents: diff changeset	570	diff = (png_ptr->width)&7;
90ce3da70b43 Initial load duke parents: diff changeset	571
90ce3da70b43 Initial load duke parents: diff changeset	572	#if !defined(PNG_1_0_X)
90ce3da70b43 Initial load duke parents: diff changeset	573	if ((png_ptr->asm_flags & PNG_ASM_FLAG_MMX_READ_COMBINE_ROW)
90ce3da70b43 Initial load duke parents: diff changeset	574	/* && mmx_supported */ )
90ce3da70b43 Initial load duke parents: diff changeset	575	#else
90ce3da70b43 Initial load duke parents: diff changeset	576	if (mmx_supported)
90ce3da70b43 Initial load duke parents: diff changeset	577	#endif
90ce3da70b43 Initial load duke parents: diff changeset	578	{
90ce3da70b43 Initial load duke parents: diff changeset	579	_asm
90ce3da70b43 Initial load duke parents: diff changeset	580	{
90ce3da70b43 Initial load duke parents: diff changeset	581	movd mm7, unmask //load bit pattern
90ce3da70b43 Initial load duke parents: diff changeset	582	psubb mm6,mm6 //zero mm6
90ce3da70b43 Initial load duke parents: diff changeset	583	punpcklbw mm7,mm7
90ce3da70b43 Initial load duke parents: diff changeset	584	punpcklwd mm7,mm7
90ce3da70b43 Initial load duke parents: diff changeset	585	punpckldq mm7,mm7 //fill register with 8 masks
90ce3da70b43 Initial load duke parents: diff changeset	586
90ce3da70b43 Initial load duke parents: diff changeset	587	movq mm0,mask0
90ce3da70b43 Initial load duke parents: diff changeset	588	movq mm1,mask1
90ce3da70b43 Initial load duke parents: diff changeset	589	movq mm2,mask2
90ce3da70b43 Initial load duke parents: diff changeset	590
90ce3da70b43 Initial load duke parents: diff changeset	591	pand mm0,mm7
90ce3da70b43 Initial load duke parents: diff changeset	592	pand mm1,mm7
90ce3da70b43 Initial load duke parents: diff changeset	593	pand mm2,mm7
90ce3da70b43 Initial load duke parents: diff changeset	594
90ce3da70b43 Initial load duke parents: diff changeset	595	pcmpeqb mm0,mm6
90ce3da70b43 Initial load duke parents: diff changeset	596	pcmpeqb mm1,mm6
90ce3da70b43 Initial load duke parents: diff changeset	597	pcmpeqb mm2,mm6
90ce3da70b43 Initial load duke parents: diff changeset	598
90ce3da70b43 Initial load duke parents: diff changeset	599	mov ecx,len //load length of line
90ce3da70b43 Initial load duke parents: diff changeset	600	mov esi,srcptr //load source
90ce3da70b43 Initial load duke parents: diff changeset	601	mov ebx,dstptr //load dest
90ce3da70b43 Initial load duke parents: diff changeset	602	cmp ecx,0
90ce3da70b43 Initial load duke parents: diff changeset	603	jz mainloop24end
90ce3da70b43 Initial load duke parents: diff changeset	604
90ce3da70b43 Initial load duke parents: diff changeset	605	mainloop24:
90ce3da70b43 Initial load duke parents: diff changeset	606	movq mm4,[esi]
90ce3da70b43 Initial load duke parents: diff changeset	607	pand mm4,mm0
90ce3da70b43 Initial load duke parents: diff changeset	608	movq mm6,mm0
90ce3da70b43 Initial load duke parents: diff changeset	609	movq mm7,[ebx]
90ce3da70b43 Initial load duke parents: diff changeset	610	pandn mm6,mm7
90ce3da70b43 Initial load duke parents: diff changeset	611	por mm4,mm6
90ce3da70b43 Initial load duke parents: diff changeset	612	movq [ebx],mm4
90ce3da70b43 Initial load duke parents: diff changeset	613
90ce3da70b43 Initial load duke parents: diff changeset	614
90ce3da70b43 Initial load duke parents: diff changeset	615	movq mm5,[esi+8]
90ce3da70b43 Initial load duke parents: diff changeset	616	pand mm5,mm1
90ce3da70b43 Initial load duke parents: diff changeset	617	movq mm7,mm1
90ce3da70b43 Initial load duke parents: diff changeset	618	movq mm6,[ebx+8]
90ce3da70b43 Initial load duke parents: diff changeset	619	pandn mm7,mm6
90ce3da70b43 Initial load duke parents: diff changeset	620	por mm5,mm7
90ce3da70b43 Initial load duke parents: diff changeset	621	movq [ebx+8],mm5
90ce3da70b43 Initial load duke parents: diff changeset	622
90ce3da70b43 Initial load duke parents: diff changeset	623	movq mm6,[esi+16]
90ce3da70b43 Initial load duke parents: diff changeset	624	pand mm6,mm2
90ce3da70b43 Initial load duke parents: diff changeset	625	movq mm4,mm2
90ce3da70b43 Initial load duke parents: diff changeset	626	movq mm7,[ebx+16]
90ce3da70b43 Initial load duke parents: diff changeset	627	pandn mm4,mm7
90ce3da70b43 Initial load duke parents: diff changeset	628	por mm6,mm4
90ce3da70b43 Initial load duke parents: diff changeset	629	movq [ebx+16],mm6
90ce3da70b43 Initial load duke parents: diff changeset	630
90ce3da70b43 Initial load duke parents: diff changeset	631	add esi,24 //inc by 24 bytes processed
90ce3da70b43 Initial load duke parents: diff changeset	632	add ebx,24
90ce3da70b43 Initial load duke parents: diff changeset	633	sub ecx,8 //dec by 8 pixels processed
90ce3da70b43 Initial load duke parents: diff changeset	634
90ce3da70b43 Initial load duke parents: diff changeset	635	ja mainloop24
90ce3da70b43 Initial load duke parents: diff changeset	636
90ce3da70b43 Initial load duke parents: diff changeset	637	mainloop24end:
90ce3da70b43 Initial load duke parents: diff changeset	638	mov ecx,diff
90ce3da70b43 Initial load duke parents: diff changeset	639	cmp ecx,0
90ce3da70b43 Initial load duke parents: diff changeset	640	jz end24
90ce3da70b43 Initial load duke parents: diff changeset	641
90ce3da70b43 Initial load duke parents: diff changeset	642	mov edx,mask
90ce3da70b43 Initial load duke parents: diff changeset	643	sal edx,24 //make low byte the high byte
90ce3da70b43 Initial load duke parents: diff changeset	644	secondloop24:
90ce3da70b43 Initial load duke parents: diff changeset	645	sal edx,1 //move high bit to CF
90ce3da70b43 Initial load duke parents: diff changeset	646	jnc skip24 //if CF = 0
90ce3da70b43 Initial load duke parents: diff changeset	647	mov ax,[esi]
90ce3da70b43 Initial load duke parents: diff changeset	648	mov [ebx],ax
90ce3da70b43 Initial load duke parents: diff changeset	649	xor eax,eax
90ce3da70b43 Initial load duke parents: diff changeset	650	mov al,[esi+2]
90ce3da70b43 Initial load duke parents: diff changeset	651	mov [ebx+2],al
90ce3da70b43 Initial load duke parents: diff changeset	652	skip24:
90ce3da70b43 Initial load duke parents: diff changeset	653	add esi,3
90ce3da70b43 Initial load duke parents: diff changeset	654	add ebx,3
90ce3da70b43 Initial load duke parents: diff changeset	655
90ce3da70b43 Initial load duke parents: diff changeset	656	dec ecx
90ce3da70b43 Initial load duke parents: diff changeset	657	jnz secondloop24
90ce3da70b43 Initial load duke parents: diff changeset	658
90ce3da70b43 Initial load duke parents: diff changeset	659	end24:
90ce3da70b43 Initial load duke parents: diff changeset	660	emms
90ce3da70b43 Initial load duke parents: diff changeset	661	}
90ce3da70b43 Initial load duke parents: diff changeset	662	}
90ce3da70b43 Initial load duke parents: diff changeset	663	else /* mmx not supported - use modified C routine */
90ce3da70b43 Initial load duke parents: diff changeset	664	{
90ce3da70b43 Initial load duke parents: diff changeset	665	register unsigned int incr1, initial_val, final_val;
90ce3da70b43 Initial load duke parents: diff changeset	666	png_size_t pixel_bytes;
90ce3da70b43 Initial load duke parents: diff changeset	667	png_uint_32 i;
90ce3da70b43 Initial load duke parents: diff changeset	668	register int disp = png_pass_inc[png_ptr->pass];
90ce3da70b43 Initial load duke parents: diff changeset	669	int offset_table[7] = {0, 4, 0, 2, 0, 1, 0};
90ce3da70b43 Initial load duke parents: diff changeset	670
90ce3da70b43 Initial load duke parents: diff changeset	671	pixel_bytes = (png_ptr->row_info.pixel_depth >> 3);
90ce3da70b43 Initial load duke parents: diff changeset	672	srcptr = png_ptr->row_buf + 1 + offset_table[png_ptr->pass]*
90ce3da70b43 Initial load duke parents: diff changeset	673	pixel_bytes;
90ce3da70b43 Initial load duke parents: diff changeset	674	dstptr = row + offset_table[png_ptr->pass]*pixel_bytes;
90ce3da70b43 Initial load duke parents: diff changeset	675	initial_val = offset_table[png_ptr->pass]*pixel_bytes;
90ce3da70b43 Initial load duke parents: diff changeset	676	final_val = png_ptr->width*pixel_bytes;
90ce3da70b43 Initial load duke parents: diff changeset	677	incr1 = (disp)*pixel_bytes;
90ce3da70b43 Initial load duke parents: diff changeset	678	for (i = initial_val; i < final_val; i += incr1)
90ce3da70b43 Initial load duke parents: diff changeset	679	{
90ce3da70b43 Initial load duke parents: diff changeset	680	png_memcpy(dstptr, srcptr, pixel_bytes);
90ce3da70b43 Initial load duke parents: diff changeset	681	srcptr += incr1;
90ce3da70b43 Initial load duke parents: diff changeset	682	dstptr += incr1;
90ce3da70b43 Initial load duke parents: diff changeset	683	}
90ce3da70b43 Initial load duke parents: diff changeset	684	} /* end of else */
90ce3da70b43 Initial load duke parents: diff changeset	685
90ce3da70b43 Initial load duke parents: diff changeset	686	break;
90ce3da70b43 Initial load duke parents: diff changeset	687	} // end 24 bpp
90ce3da70b43 Initial load duke parents: diff changeset	688
90ce3da70b43 Initial load duke parents: diff changeset	689	case 32:
90ce3da70b43 Initial load duke parents: diff changeset	690	{
90ce3da70b43 Initial load duke parents: diff changeset	691	png_bytep srcptr;
90ce3da70b43 Initial load duke parents: diff changeset	692	png_bytep dstptr;
90ce3da70b43 Initial load duke parents: diff changeset	693	png_uint_32 len;
90ce3da70b43 Initial load duke parents: diff changeset	694	int unmask, diff;
90ce3da70b43 Initial load duke parents: diff changeset	695
90ce3da70b43 Initial load duke parents: diff changeset	696	__int64 mask3=0x0101010102020202, //32bpp
90ce3da70b43 Initial load duke parents: diff changeset	697	mask2=0x0404040408080808,
90ce3da70b43 Initial load duke parents: diff changeset	698	mask1=0x1010101020202020,
90ce3da70b43 Initial load duke parents: diff changeset	699	mask0=0x4040404080808080;
90ce3da70b43 Initial load duke parents: diff changeset	700
90ce3da70b43 Initial load duke parents: diff changeset	701	srcptr = png_ptr->row_buf + 1;
90ce3da70b43 Initial load duke parents: diff changeset	702	dstptr = row;
90ce3da70b43 Initial load duke parents: diff changeset	703
90ce3da70b43 Initial load duke parents: diff changeset	704	unmask = ~mask;
90ce3da70b43 Initial load duke parents: diff changeset	705	len = (png_ptr->width)&~7;
90ce3da70b43 Initial load duke parents: diff changeset	706	diff = (png_ptr->width)&7;
90ce3da70b43 Initial load duke parents: diff changeset	707
90ce3da70b43 Initial load duke parents: diff changeset	708	#if !defined(PNG_1_0_X)
90ce3da70b43 Initial load duke parents: diff changeset	709	if ((png_ptr->asm_flags & PNG_ASM_FLAG_MMX_READ_COMBINE_ROW)
90ce3da70b43 Initial load duke parents: diff changeset	710	/* && mmx_supported */ )
90ce3da70b43 Initial load duke parents: diff changeset	711	#else
90ce3da70b43 Initial load duke parents: diff changeset	712	if (mmx_supported)
90ce3da70b43 Initial load duke parents: diff changeset	713	#endif
90ce3da70b43 Initial load duke parents: diff changeset	714	{
90ce3da70b43 Initial load duke parents: diff changeset	715	_asm
90ce3da70b43 Initial load duke parents: diff changeset	716	{
90ce3da70b43 Initial load duke parents: diff changeset	717	movd mm7, unmask //load bit pattern
90ce3da70b43 Initial load duke parents: diff changeset	718	psubb mm6,mm6 //zero mm6
90ce3da70b43 Initial load duke parents: diff changeset	719	punpcklbw mm7,mm7
90ce3da70b43 Initial load duke parents: diff changeset	720	punpcklwd mm7,mm7
90ce3da70b43 Initial load duke parents: diff changeset	721	punpckldq mm7,mm7 //fill register with 8 masks
90ce3da70b43 Initial load duke parents: diff changeset	722
90ce3da70b43 Initial load duke parents: diff changeset	723	movq mm0,mask0
90ce3da70b43 Initial load duke parents: diff changeset	724	movq mm1,mask1
90ce3da70b43 Initial load duke parents: diff changeset	725	movq mm2,mask2
90ce3da70b43 Initial load duke parents: diff changeset	726	movq mm3,mask3
90ce3da70b43 Initial load duke parents: diff changeset	727
90ce3da70b43 Initial load duke parents: diff changeset	728	pand mm0,mm7
90ce3da70b43 Initial load duke parents: diff changeset	729	pand mm1,mm7
90ce3da70b43 Initial load duke parents: diff changeset	730	pand mm2,mm7
90ce3da70b43 Initial load duke parents: diff changeset	731	pand mm3,mm7
90ce3da70b43 Initial load duke parents: diff changeset	732
90ce3da70b43 Initial load duke parents: diff changeset	733	pcmpeqb mm0,mm6
90ce3da70b43 Initial load duke parents: diff changeset	734	pcmpeqb mm1,mm6
90ce3da70b43 Initial load duke parents: diff changeset	735	pcmpeqb mm2,mm6
90ce3da70b43 Initial load duke parents: diff changeset	736	pcmpeqb mm3,mm6
90ce3da70b43 Initial load duke parents: diff changeset	737
90ce3da70b43 Initial load duke parents: diff changeset	738	mov ecx,len //load length of line
90ce3da70b43 Initial load duke parents: diff changeset	739	mov esi,srcptr //load source
90ce3da70b43 Initial load duke parents: diff changeset	740	mov ebx,dstptr //load dest
90ce3da70b43 Initial load duke parents: diff changeset	741
90ce3da70b43 Initial load duke parents: diff changeset	742	cmp ecx,0 //lcr
90ce3da70b43 Initial load duke parents: diff changeset	743	jz mainloop32end
90ce3da70b43 Initial load duke parents: diff changeset	744
90ce3da70b43 Initial load duke parents: diff changeset	745	mainloop32:
90ce3da70b43 Initial load duke parents: diff changeset	746	movq mm4,[esi]
90ce3da70b43 Initial load duke parents: diff changeset	747	pand mm4,mm0
90ce3da70b43 Initial load duke parents: diff changeset	748	movq mm6,mm0
90ce3da70b43 Initial load duke parents: diff changeset	749	movq mm7,[ebx]
90ce3da70b43 Initial load duke parents: diff changeset	750	pandn mm6,mm7
90ce3da70b43 Initial load duke parents: diff changeset	751	por mm4,mm6
90ce3da70b43 Initial load duke parents: diff changeset	752	movq [ebx],mm4
90ce3da70b43 Initial load duke parents: diff changeset	753
90ce3da70b43 Initial load duke parents: diff changeset	754	movq mm5,[esi+8]
90ce3da70b43 Initial load duke parents: diff changeset	755	pand mm5,mm1
90ce3da70b43 Initial load duke parents: diff changeset	756	movq mm7,mm1
90ce3da70b43 Initial load duke parents: diff changeset	757	movq mm6,[ebx+8]
90ce3da70b43 Initial load duke parents: diff changeset	758	pandn mm7,mm6
90ce3da70b43 Initial load duke parents: diff changeset	759	por mm5,mm7
90ce3da70b43 Initial load duke parents: diff changeset	760	movq [ebx+8],mm5
90ce3da70b43 Initial load duke parents: diff changeset	761
90ce3da70b43 Initial load duke parents: diff changeset	762	movq mm6,[esi+16]
90ce3da70b43 Initial load duke parents: diff changeset	763	pand mm6,mm2
90ce3da70b43 Initial load duke parents: diff changeset	764	movq mm4,mm2
90ce3da70b43 Initial load duke parents: diff changeset	765	movq mm7,[ebx+16]
90ce3da70b43 Initial load duke parents: diff changeset	766	pandn mm4,mm7
90ce3da70b43 Initial load duke parents: diff changeset	767	por mm6,mm4
90ce3da70b43 Initial load duke parents: diff changeset	768	movq [ebx+16],mm6
90ce3da70b43 Initial load duke parents: diff changeset	769
90ce3da70b43 Initial load duke parents: diff changeset	770	movq mm7,[esi+24]
90ce3da70b43 Initial load duke parents: diff changeset	771	pand mm7,mm3
90ce3da70b43 Initial load duke parents: diff changeset	772	movq mm5,mm3
90ce3da70b43 Initial load duke parents: diff changeset	773	movq mm4,[ebx+24]
90ce3da70b43 Initial load duke parents: diff changeset	774	pandn mm5,mm4
90ce3da70b43 Initial load duke parents: diff changeset	775	por mm7,mm5
90ce3da70b43 Initial load duke parents: diff changeset	776	movq [ebx+24],mm7
90ce3da70b43 Initial load duke parents: diff changeset	777
90ce3da70b43 Initial load duke parents: diff changeset	778	add esi,32 //inc by 32 bytes processed
90ce3da70b43 Initial load duke parents: diff changeset	779	add ebx,32
90ce3da70b43 Initial load duke parents: diff changeset	780	sub ecx,8 //dec by 8 pixels processed
90ce3da70b43 Initial load duke parents: diff changeset	781
90ce3da70b43 Initial load duke parents: diff changeset	782	ja mainloop32
90ce3da70b43 Initial load duke parents: diff changeset	783
90ce3da70b43 Initial load duke parents: diff changeset	784	mainloop32end:
90ce3da70b43 Initial load duke parents: diff changeset	785	mov ecx,diff
90ce3da70b43 Initial load duke parents: diff changeset	786	cmp ecx,0
90ce3da70b43 Initial load duke parents: diff changeset	787	jz end32
90ce3da70b43 Initial load duke parents: diff changeset	788
90ce3da70b43 Initial load duke parents: diff changeset	789	mov edx,mask
90ce3da70b43 Initial load duke parents: diff changeset	790	sal edx,24 //make low byte the high byte
90ce3da70b43 Initial load duke parents: diff changeset	791	secondloop32:
90ce3da70b43 Initial load duke parents: diff changeset	792	sal edx,1 //move high bit to CF
90ce3da70b43 Initial load duke parents: diff changeset	793	jnc skip32 //if CF = 0
90ce3da70b43 Initial load duke parents: diff changeset	794	mov eax,[esi]
90ce3da70b43 Initial load duke parents: diff changeset	795	mov [ebx],eax
90ce3da70b43 Initial load duke parents: diff changeset	796	skip32:
90ce3da70b43 Initial load duke parents: diff changeset	797	add esi,4
90ce3da70b43 Initial load duke parents: diff changeset	798	add ebx,4
90ce3da70b43 Initial load duke parents: diff changeset	799
90ce3da70b43 Initial load duke parents: diff changeset	800	dec ecx
90ce3da70b43 Initial load duke parents: diff changeset	801	jnz secondloop32
90ce3da70b43 Initial load duke parents: diff changeset	802
90ce3da70b43 Initial load duke parents: diff changeset	803	end32:
90ce3da70b43 Initial load duke parents: diff changeset	804	emms
90ce3da70b43 Initial load duke parents: diff changeset	805	}
90ce3da70b43 Initial load duke parents: diff changeset	806	}
90ce3da70b43 Initial load duke parents: diff changeset	807	else /* mmx _not supported - Use modified C routine */
90ce3da70b43 Initial load duke parents: diff changeset	808	{
90ce3da70b43 Initial load duke parents: diff changeset	809	register unsigned int incr1, initial_val, final_val;
90ce3da70b43 Initial load duke parents: diff changeset	810	png_size_t pixel_bytes;
90ce3da70b43 Initial load duke parents: diff changeset	811	png_uint_32 i;
90ce3da70b43 Initial load duke parents: diff changeset	812	register int disp = png_pass_inc[png_ptr->pass];
90ce3da70b43 Initial load duke parents: diff changeset	813	int offset_table[7] = {0, 4, 0, 2, 0, 1, 0};
90ce3da70b43 Initial load duke parents: diff changeset	814
90ce3da70b43 Initial load duke parents: diff changeset	815	pixel_bytes = (png_ptr->row_info.pixel_depth >> 3);
90ce3da70b43 Initial load duke parents: diff changeset	816	srcptr = png_ptr->row_buf + 1 + offset_table[png_ptr->pass]*
90ce3da70b43 Initial load duke parents: diff changeset	817	pixel_bytes;
90ce3da70b43 Initial load duke parents: diff changeset	818	dstptr = row + offset_table[png_ptr->pass]*pixel_bytes;
90ce3da70b43 Initial load duke parents: diff changeset	819	initial_val = offset_table[png_ptr->pass]*pixel_bytes;
90ce3da70b43 Initial load duke parents: diff changeset	820	final_val = png_ptr->width*pixel_bytes;
90ce3da70b43 Initial load duke parents: diff changeset	821	incr1 = (disp)*pixel_bytes;
90ce3da70b43 Initial load duke parents: diff changeset	822	for (i = initial_val; i < final_val; i += incr1)
90ce3da70b43 Initial load duke parents: diff changeset	823	{
90ce3da70b43 Initial load duke parents: diff changeset	824	png_memcpy(dstptr, srcptr, pixel_bytes);
90ce3da70b43 Initial load duke parents: diff changeset	825	srcptr += incr1;
90ce3da70b43 Initial load duke parents: diff changeset	826	dstptr += incr1;
90ce3da70b43 Initial load duke parents: diff changeset	827	}
90ce3da70b43 Initial load duke parents: diff changeset	828	} /* end of else */
90ce3da70b43 Initial load duke parents: diff changeset	829
90ce3da70b43 Initial load duke parents: diff changeset	830	break;
90ce3da70b43 Initial load duke parents: diff changeset	831	} // end 32 bpp
90ce3da70b43 Initial load duke parents: diff changeset	832
90ce3da70b43 Initial load duke parents: diff changeset	833	case 48:
90ce3da70b43 Initial load duke parents: diff changeset	834	{
90ce3da70b43 Initial load duke parents: diff changeset	835	png_bytep srcptr;
90ce3da70b43 Initial load duke parents: diff changeset	836	png_bytep dstptr;
90ce3da70b43 Initial load duke parents: diff changeset	837	png_uint_32 len;
90ce3da70b43 Initial load duke parents: diff changeset	838	int unmask, diff;
90ce3da70b43 Initial load duke parents: diff changeset	839
90ce3da70b43 Initial load duke parents: diff changeset	840	__int64 mask5=0x0101010101010202,
90ce3da70b43 Initial load duke parents: diff changeset	841	mask4=0x0202020204040404,
90ce3da70b43 Initial load duke parents: diff changeset	842	mask3=0x0404080808080808,
90ce3da70b43 Initial load duke parents: diff changeset	843	mask2=0x1010101010102020,
90ce3da70b43 Initial load duke parents: diff changeset	844	mask1=0x2020202040404040,
90ce3da70b43 Initial load duke parents: diff changeset	845	mask0=0x4040808080808080;
90ce3da70b43 Initial load duke parents: diff changeset	846
90ce3da70b43 Initial load duke parents: diff changeset	847	#if !defined(PNG_1_0_X)
90ce3da70b43 Initial load duke parents: diff changeset	848	if ((png_ptr->asm_flags & PNG_ASM_FLAG_MMX_READ_COMBINE_ROW)
90ce3da70b43 Initial load duke parents: diff changeset	849	/* && mmx_supported */ )
90ce3da70b43 Initial load duke parents: diff changeset	850	#else
90ce3da70b43 Initial load duke parents: diff changeset	851	if (mmx_supported)
90ce3da70b43 Initial load duke parents: diff changeset	852	#endif
90ce3da70b43 Initial load duke parents: diff changeset	853	{
90ce3da70b43 Initial load duke parents: diff changeset	854	srcptr = png_ptr->row_buf + 1;
90ce3da70b43 Initial load duke parents: diff changeset	855	dstptr = row;
90ce3da70b43 Initial load duke parents: diff changeset	856
90ce3da70b43 Initial load duke parents: diff changeset	857	unmask = ~mask;
90ce3da70b43 Initial load duke parents: diff changeset	858	len = (png_ptr->width)&~7;
90ce3da70b43 Initial load duke parents: diff changeset	859	diff = (png_ptr->width)&7;
90ce3da70b43 Initial load duke parents: diff changeset	860	_asm
90ce3da70b43 Initial load duke parents: diff changeset	861	{
90ce3da70b43 Initial load duke parents: diff changeset	862	movd mm7, unmask //load bit pattern
90ce3da70b43 Initial load duke parents: diff changeset	863	psubb mm6,mm6 //zero mm6
90ce3da70b43 Initial load duke parents: diff changeset	864	punpcklbw mm7,mm7
90ce3da70b43 Initial load duke parents: diff changeset	865	punpcklwd mm7,mm7
90ce3da70b43 Initial load duke parents: diff changeset	866	punpckldq mm7,mm7 //fill register with 8 masks
90ce3da70b43 Initial load duke parents: diff changeset	867
90ce3da70b43 Initial load duke parents: diff changeset	868	movq mm0,mask0
90ce3da70b43 Initial load duke parents: diff changeset	869	movq mm1,mask1
90ce3da70b43 Initial load duke parents: diff changeset	870	movq mm2,mask2
90ce3da70b43 Initial load duke parents: diff changeset	871	movq mm3,mask3
90ce3da70b43 Initial load duke parents: diff changeset	872	movq mm4,mask4
90ce3da70b43 Initial load duke parents: diff changeset	873	movq mm5,mask5
90ce3da70b43 Initial load duke parents: diff changeset	874
90ce3da70b43 Initial load duke parents: diff changeset	875	pand mm0,mm7
90ce3da70b43 Initial load duke parents: diff changeset	876	pand mm1,mm7
90ce3da70b43 Initial load duke parents: diff changeset	877	pand mm2,mm7
90ce3da70b43 Initial load duke parents: diff changeset	878	pand mm3,mm7
90ce3da70b43 Initial load duke parents: diff changeset	879	pand mm4,mm7
90ce3da70b43 Initial load duke parents: diff changeset	880	pand mm5,mm7
90ce3da70b43 Initial load duke parents: diff changeset	881
90ce3da70b43 Initial load duke parents: diff changeset	882	pcmpeqb mm0,mm6
90ce3da70b43 Initial load duke parents: diff changeset	883	pcmpeqb mm1,mm6
90ce3da70b43 Initial load duke parents: diff changeset	884	pcmpeqb mm2,mm6
90ce3da70b43 Initial load duke parents: diff changeset	885	pcmpeqb mm3,mm6
90ce3da70b43 Initial load duke parents: diff changeset	886	pcmpeqb mm4,mm6
90ce3da70b43 Initial load duke parents: diff changeset	887	pcmpeqb mm5,mm6
90ce3da70b43 Initial load duke parents: diff changeset	888
90ce3da70b43 Initial load duke parents: diff changeset	889	mov ecx,len //load length of line
90ce3da70b43 Initial load duke parents: diff changeset	890	mov esi,srcptr //load source
90ce3da70b43 Initial load duke parents: diff changeset	891	mov ebx,dstptr //load dest
90ce3da70b43 Initial load duke parents: diff changeset	892
90ce3da70b43 Initial load duke parents: diff changeset	893	cmp ecx,0
90ce3da70b43 Initial load duke parents: diff changeset	894	jz mainloop48end
90ce3da70b43 Initial load duke parents: diff changeset	895
90ce3da70b43 Initial load duke parents: diff changeset	896	mainloop48:
90ce3da70b43 Initial load duke parents: diff changeset	897	movq mm7,[esi]
90ce3da70b43 Initial load duke parents: diff changeset	898	pand mm7,mm0
90ce3da70b43 Initial load duke parents: diff changeset	899	movq mm6,mm0
90ce3da70b43 Initial load duke parents: diff changeset	900	pandn mm6,[ebx]
90ce3da70b43 Initial load duke parents: diff changeset	901	por mm7,mm6
90ce3da70b43 Initial load duke parents: diff changeset	902	movq [ebx],mm7
90ce3da70b43 Initial load duke parents: diff changeset	903
90ce3da70b43 Initial load duke parents: diff changeset	904	movq mm6,[esi+8]
90ce3da70b43 Initial load duke parents: diff changeset	905	pand mm6,mm1
90ce3da70b43 Initial load duke parents: diff changeset	906	movq mm7,mm1
90ce3da70b43 Initial load duke parents: diff changeset	907	pandn mm7,[ebx+8]
90ce3da70b43 Initial load duke parents: diff changeset	908	por mm6,mm7
90ce3da70b43 Initial load duke parents: diff changeset	909	movq [ebx+8],mm6
90ce3da70b43 Initial load duke parents: diff changeset	910
90ce3da70b43 Initial load duke parents: diff changeset	911	movq mm6,[esi+16]
90ce3da70b43 Initial load duke parents: diff changeset	912	pand mm6,mm2
90ce3da70b43 Initial load duke parents: diff changeset	913	movq mm7,mm2
90ce3da70b43 Initial load duke parents: diff changeset	914	pandn mm7,[ebx+16]
90ce3da70b43 Initial load duke parents: diff changeset	915	por mm6,mm7
90ce3da70b43 Initial load duke parents: diff changeset	916	movq [ebx+16],mm6
90ce3da70b43 Initial load duke parents: diff changeset	917
90ce3da70b43 Initial load duke parents: diff changeset	918	movq mm7,[esi+24]
90ce3da70b43 Initial load duke parents: diff changeset	919	pand mm7,mm3
90ce3da70b43 Initial load duke parents: diff changeset	920	movq mm6,mm3
90ce3da70b43 Initial load duke parents: diff changeset	921	pandn mm6,[ebx+24]
90ce3da70b43 Initial load duke parents: diff changeset	922	por mm7,mm6
90ce3da70b43 Initial load duke parents: diff changeset	923	movq [ebx+24],mm7
90ce3da70b43 Initial load duke parents: diff changeset	924
90ce3da70b43 Initial load duke parents: diff changeset	925	movq mm6,[esi+32]
90ce3da70b43 Initial load duke parents: diff changeset	926	pand mm6,mm4
90ce3da70b43 Initial load duke parents: diff changeset	927	movq mm7,mm4
90ce3da70b43 Initial load duke parents: diff changeset	928	pandn mm7,[ebx+32]
90ce3da70b43 Initial load duke parents: diff changeset	929	por mm6,mm7
90ce3da70b43 Initial load duke parents: diff changeset	930	movq [ebx+32],mm6
90ce3da70b43 Initial load duke parents: diff changeset	931
90ce3da70b43 Initial load duke parents: diff changeset	932	movq mm7,[esi+40]
90ce3da70b43 Initial load duke parents: diff changeset	933	pand mm7,mm5
90ce3da70b43 Initial load duke parents: diff changeset	934	movq mm6,mm5
90ce3da70b43 Initial load duke parents: diff changeset	935	pandn mm6,[ebx+40]
90ce3da70b43 Initial load duke parents: diff changeset	936	por mm7,mm6
90ce3da70b43 Initial load duke parents: diff changeset	937	movq [ebx+40],mm7
90ce3da70b43 Initial load duke parents: diff changeset	938
90ce3da70b43 Initial load duke parents: diff changeset	939	add esi,48 //inc by 32 bytes processed
90ce3da70b43 Initial load duke parents: diff changeset	940	add ebx,48
90ce3da70b43 Initial load duke parents: diff changeset	941	sub ecx,8 //dec by 8 pixels processed
90ce3da70b43 Initial load duke parents: diff changeset	942
90ce3da70b43 Initial load duke parents: diff changeset	943	ja mainloop48
90ce3da70b43 Initial load duke parents: diff changeset	944	mainloop48end:
90ce3da70b43 Initial load duke parents: diff changeset	945
90ce3da70b43 Initial load duke parents: diff changeset	946	mov ecx,diff
90ce3da70b43 Initial load duke parents: diff changeset	947	cmp ecx,0
90ce3da70b43 Initial load duke parents: diff changeset	948	jz end48
90ce3da70b43 Initial load duke parents: diff changeset	949
90ce3da70b43 Initial load duke parents: diff changeset	950	mov edx,mask
90ce3da70b43 Initial load duke parents: diff changeset	951	sal edx,24 //make low byte the high byte
90ce3da70b43 Initial load duke parents: diff changeset	952
90ce3da70b43 Initial load duke parents: diff changeset	953	secondloop48:
90ce3da70b43 Initial load duke parents: diff changeset	954	sal edx,1 //move high bit to CF
90ce3da70b43 Initial load duke parents: diff changeset	955	jnc skip48 //if CF = 0
90ce3da70b43 Initial load duke parents: diff changeset	956	mov eax,[esi]
90ce3da70b43 Initial load duke parents: diff changeset	957	mov [ebx],eax
90ce3da70b43 Initial load duke parents: diff changeset	958	skip48:
90ce3da70b43 Initial load duke parents: diff changeset	959	add esi,4
90ce3da70b43 Initial load duke parents: diff changeset	960	add ebx,4
90ce3da70b43 Initial load duke parents: diff changeset	961
90ce3da70b43 Initial load duke parents: diff changeset	962	dec ecx
90ce3da70b43 Initial load duke parents: diff changeset	963	jnz secondloop48
90ce3da70b43 Initial load duke parents: diff changeset	964
90ce3da70b43 Initial load duke parents: diff changeset	965	end48:
90ce3da70b43 Initial load duke parents: diff changeset	966	emms
90ce3da70b43 Initial load duke parents: diff changeset	967	}
90ce3da70b43 Initial load duke parents: diff changeset	968	}
90ce3da70b43 Initial load duke parents: diff changeset	969	else /* mmx _not supported - Use modified C routine */
90ce3da70b43 Initial load duke parents: diff changeset	970	{
90ce3da70b43 Initial load duke parents: diff changeset	971	register unsigned int incr1, initial_val, final_val;
90ce3da70b43 Initial load duke parents: diff changeset	972	png_size_t pixel_bytes;
90ce3da70b43 Initial load duke parents: diff changeset	973	png_uint_32 i;
90ce3da70b43 Initial load duke parents: diff changeset	974	register int disp = png_pass_inc[png_ptr->pass];
90ce3da70b43 Initial load duke parents: diff changeset	975	int offset_table[7] = {0, 4, 0, 2, 0, 1, 0};
90ce3da70b43 Initial load duke parents: diff changeset	976
90ce3da70b43 Initial load duke parents: diff changeset	977	pixel_bytes = (png_ptr->row_info.pixel_depth >> 3);
90ce3da70b43 Initial load duke parents: diff changeset	978	srcptr = png_ptr->row_buf + 1 + offset_table[png_ptr->pass]*
90ce3da70b43 Initial load duke parents: diff changeset	979	pixel_bytes;
90ce3da70b43 Initial load duke parents: diff changeset	980	dstptr = row + offset_table[png_ptr->pass]*pixel_bytes;
90ce3da70b43 Initial load duke parents: diff changeset	981	initial_val = offset_table[png_ptr->pass]*pixel_bytes;
90ce3da70b43 Initial load duke parents: diff changeset	982	final_val = png_ptr->width*pixel_bytes;
90ce3da70b43 Initial load duke parents: diff changeset	983	incr1 = (disp)*pixel_bytes;
90ce3da70b43 Initial load duke parents: diff changeset	984	for (i = initial_val; i < final_val; i += incr1)
90ce3da70b43 Initial load duke parents: diff changeset	985	{
90ce3da70b43 Initial load duke parents: diff changeset	986	png_memcpy(dstptr, srcptr, pixel_bytes);
90ce3da70b43 Initial load duke parents: diff changeset	987	srcptr += incr1;
90ce3da70b43 Initial load duke parents: diff changeset	988	dstptr += incr1;
90ce3da70b43 Initial load duke parents: diff changeset	989	}
90ce3da70b43 Initial load duke parents: diff changeset	990	} /* end of else */
90ce3da70b43 Initial load duke parents: diff changeset	991
90ce3da70b43 Initial load duke parents: diff changeset	992	break;
90ce3da70b43 Initial load duke parents: diff changeset	993	} // end 48 bpp
90ce3da70b43 Initial load duke parents: diff changeset	994
90ce3da70b43 Initial load duke parents: diff changeset	995	default:
90ce3da70b43 Initial load duke parents: diff changeset	996	{
90ce3da70b43 Initial load duke parents: diff changeset	997	png_bytep sptr;
90ce3da70b43 Initial load duke parents: diff changeset	998	png_bytep dp;
90ce3da70b43 Initial load duke parents: diff changeset	999	png_size_t pixel_bytes;
90ce3da70b43 Initial load duke parents: diff changeset	1000	int offset_table[7] = {0, 4, 0, 2, 0, 1, 0};
90ce3da70b43 Initial load duke parents: diff changeset	1001	unsigned int i;
90ce3da70b43 Initial load duke parents: diff changeset	1002	register int disp = png_pass_inc[png_ptr->pass]; // get the offset
90ce3da70b43 Initial load duke parents: diff changeset	1003	register unsigned int incr1, initial_val, final_val;
90ce3da70b43 Initial load duke parents: diff changeset	1004
90ce3da70b43 Initial load duke parents: diff changeset	1005	pixel_bytes = (png_ptr->row_info.pixel_depth >> 3);
90ce3da70b43 Initial load duke parents: diff changeset	1006	sptr = png_ptr->row_buf + 1 + offset_table[png_ptr->pass]*
90ce3da70b43 Initial load duke parents: diff changeset	1007	pixel_bytes;
90ce3da70b43 Initial load duke parents: diff changeset	1008	dp = row + offset_table[png_ptr->pass]*pixel_bytes;
90ce3da70b43 Initial load duke parents: diff changeset	1009	initial_val = offset_table[png_ptr->pass]*pixel_bytes;
90ce3da70b43 Initial load duke parents: diff changeset	1010	final_val = png_ptr->width*pixel_bytes;
90ce3da70b43 Initial load duke parents: diff changeset	1011	incr1 = (disp)*pixel_bytes;
90ce3da70b43 Initial load duke parents: diff changeset	1012	for (i = initial_val; i < final_val; i += incr1)
90ce3da70b43 Initial load duke parents: diff changeset	1013	{
90ce3da70b43 Initial load duke parents: diff changeset	1014	png_memcpy(dp, sptr, pixel_bytes);
90ce3da70b43 Initial load duke parents: diff changeset	1015	sptr += incr1;
90ce3da70b43 Initial load duke parents: diff changeset	1016	dp += incr1;
90ce3da70b43 Initial load duke parents: diff changeset	1017	}
90ce3da70b43 Initial load duke parents: diff changeset	1018	break;
90ce3da70b43 Initial load duke parents: diff changeset	1019	}
90ce3da70b43 Initial load duke parents: diff changeset	1020	} /* end switch (png_ptr->row_info.pixel_depth) */
90ce3da70b43 Initial load duke parents: diff changeset	1021	} /* end if (non-trivial mask) */
90ce3da70b43 Initial load duke parents: diff changeset	1022
90ce3da70b43 Initial load duke parents: diff changeset	1023	} /* end png_combine_row() */
90ce3da70b43 Initial load duke parents: diff changeset	1024
90ce3da70b43 Initial load duke parents: diff changeset	1025
90ce3da70b43 Initial load duke parents: diff changeset	1026	#if defined(PNG_READ_INTERLACING_SUPPORTED)
90ce3da70b43 Initial load duke parents: diff changeset	1027
90ce3da70b43 Initial load duke parents: diff changeset	1028	void /* PRIVATE */
90ce3da70b43 Initial load duke parents: diff changeset	1029	png_do_read_interlace(png_structp png_ptr)
90ce3da70b43 Initial load duke parents: diff changeset	1030	{
90ce3da70b43 Initial load duke parents: diff changeset	1031	png_row_infop row_info = &(png_ptr->row_info);
90ce3da70b43 Initial load duke parents: diff changeset	1032	png_bytep row = png_ptr->row_buf + 1;
90ce3da70b43 Initial load duke parents: diff changeset	1033	int pass = png_ptr->pass;
90ce3da70b43 Initial load duke parents: diff changeset	1034	png_uint_32 transformations = png_ptr->transformations;
90ce3da70b43 Initial load duke parents: diff changeset	1035	#ifdef PNG_USE_LOCAL_ARRAYS
90ce3da70b43 Initial load duke parents: diff changeset	1036	const int png_pass_inc[7] = {8, 8, 4, 4, 2, 2, 1};
90ce3da70b43 Initial load duke parents: diff changeset	1037	#endif
90ce3da70b43 Initial load duke parents: diff changeset	1038
90ce3da70b43 Initial load duke parents: diff changeset	1039	png_debug(1,"in png_do_read_interlace\n");
90ce3da70b43 Initial load duke parents: diff changeset	1040
90ce3da70b43 Initial load duke parents: diff changeset	1041	if (mmx_supported == 2) {
90ce3da70b43 Initial load duke parents: diff changeset	1042	#if !defined(PNG_1_0_X)
90ce3da70b43 Initial load duke parents: diff changeset	1043	/* this should have happened in png_init_mmx_flags() already */
90ce3da70b43 Initial load duke parents: diff changeset	1044	png_warning(png_ptr, "asm_flags may not have been initialized");
90ce3da70b43 Initial load duke parents: diff changeset	1045	#endif
90ce3da70b43 Initial load duke parents: diff changeset	1046	png_mmx_support();
90ce3da70b43 Initial load duke parents: diff changeset	1047	}
90ce3da70b43 Initial load duke parents: diff changeset	1048
90ce3da70b43 Initial load duke parents: diff changeset	1049	if (row != NULL && row_info != NULL)
90ce3da70b43 Initial load duke parents: diff changeset	1050	{
90ce3da70b43 Initial load duke parents: diff changeset	1051	png_uint_32 final_width;
90ce3da70b43 Initial load duke parents: diff changeset	1052
90ce3da70b43 Initial load duke parents: diff changeset	1053	final_width = row_info->width * png_pass_inc[pass];
90ce3da70b43 Initial load duke parents: diff changeset	1054
90ce3da70b43 Initial load duke parents: diff changeset	1055	switch (row_info->pixel_depth)
90ce3da70b43 Initial load duke parents: diff changeset	1056	{
90ce3da70b43 Initial load duke parents: diff changeset	1057	case 1:
90ce3da70b43 Initial load duke parents: diff changeset	1058	{
90ce3da70b43 Initial load duke parents: diff changeset	1059	png_bytep sp, dp;
90ce3da70b43 Initial load duke parents: diff changeset	1060	int sshift, dshift;
90ce3da70b43 Initial load duke parents: diff changeset	1061	int s_start, s_end, s_inc;
90ce3da70b43 Initial load duke parents: diff changeset	1062	png_byte v;
90ce3da70b43 Initial load duke parents: diff changeset	1063	png_uint_32 i;
90ce3da70b43 Initial load duke parents: diff changeset	1064	int j;
90ce3da70b43 Initial load duke parents: diff changeset	1065
90ce3da70b43 Initial load duke parents: diff changeset	1066	sp = row + (png_size_t)((row_info->width - 1) >> 3);
90ce3da70b43 Initial load duke parents: diff changeset	1067	dp = row + (png_size_t)((final_width - 1) >> 3);
90ce3da70b43 Initial load duke parents: diff changeset	1068	#if defined(PNG_READ_PACKSWAP_SUPPORTED)
90ce3da70b43 Initial load duke parents: diff changeset	1069	if (transformations & PNG_PACKSWAP)
90ce3da70b43 Initial load duke parents: diff changeset	1070	{
90ce3da70b43 Initial load duke parents: diff changeset	1071	sshift = (int)((row_info->width + 7) & 7);
90ce3da70b43 Initial load duke parents: diff changeset	1072	dshift = (int)((final_width + 7) & 7);
90ce3da70b43 Initial load duke parents: diff changeset	1073	s_start = 7;
90ce3da70b43 Initial load duke parents: diff changeset	1074	s_end = 0;
90ce3da70b43 Initial load duke parents: diff changeset	1075	s_inc = -1;
90ce3da70b43 Initial load duke parents: diff changeset	1076	}
90ce3da70b43 Initial load duke parents: diff changeset	1077	else
90ce3da70b43 Initial load duke parents: diff changeset	1078	#endif
90ce3da70b43 Initial load duke parents: diff changeset	1079	{
90ce3da70b43 Initial load duke parents: diff changeset	1080	sshift = 7 - (int)((row_info->width + 7) & 7);
90ce3da70b43 Initial load duke parents: diff changeset	1081	dshift = 7 - (int)((final_width + 7) & 7);
90ce3da70b43 Initial load duke parents: diff changeset	1082	s_start = 0;
90ce3da70b43 Initial load duke parents: diff changeset	1083	s_end = 7;
90ce3da70b43 Initial load duke parents: diff changeset	1084	s_inc = 1;
90ce3da70b43 Initial load duke parents: diff changeset	1085	}
90ce3da70b43 Initial load duke parents: diff changeset	1086
90ce3da70b43 Initial load duke parents: diff changeset	1087	for (i = row_info->width; i; i--)
90ce3da70b43 Initial load duke parents: diff changeset	1088	{
90ce3da70b43 Initial load duke parents: diff changeset	1089	v = (png_byte)((*sp >> sshift) & 0x1);
90ce3da70b43 Initial load duke parents: diff changeset	1090	for (j = 0; j < png_pass_inc[pass]; j++)
90ce3da70b43 Initial load duke parents: diff changeset	1091	{
90ce3da70b43 Initial load duke parents: diff changeset	1092	*dp &= (png_byte)((0x7f7f >> (7 - dshift)) & 0xff);
90ce3da70b43 Initial load duke parents: diff changeset	1093	*dp \|= (png_byte)(v << dshift);
90ce3da70b43 Initial load duke parents: diff changeset	1094	if (dshift == s_end)
90ce3da70b43 Initial load duke parents: diff changeset	1095	{
90ce3da70b43 Initial load duke parents: diff changeset	1096	dshift = s_start;
90ce3da70b43 Initial load duke parents: diff changeset	1097	dp--;
90ce3da70b43 Initial load duke parents: diff changeset	1098	}
90ce3da70b43 Initial load duke parents: diff changeset	1099	else
90ce3da70b43 Initial load duke parents: diff changeset	1100	dshift += s_inc;
90ce3da70b43 Initial load duke parents: diff changeset	1101	}
90ce3da70b43 Initial load duke parents: diff changeset	1102	if (sshift == s_end)
90ce3da70b43 Initial load duke parents: diff changeset	1103	{
90ce3da70b43 Initial load duke parents: diff changeset	1104	sshift = s_start;
90ce3da70b43 Initial load duke parents: diff changeset	1105	sp--;
90ce3da70b43 Initial load duke parents: diff changeset	1106	}
90ce3da70b43 Initial load duke parents: diff changeset	1107	else
90ce3da70b43 Initial load duke parents: diff changeset	1108	sshift += s_inc;
90ce3da70b43 Initial load duke parents: diff changeset	1109	}
90ce3da70b43 Initial load duke parents: diff changeset	1110	break;
90ce3da70b43 Initial load duke parents: diff changeset	1111	}
90ce3da70b43 Initial load duke parents: diff changeset	1112
90ce3da70b43 Initial load duke parents: diff changeset	1113	case 2:
90ce3da70b43 Initial load duke parents: diff changeset	1114	{
90ce3da70b43 Initial load duke parents: diff changeset	1115	png_bytep sp, dp;
90ce3da70b43 Initial load duke parents: diff changeset	1116	int sshift, dshift;
90ce3da70b43 Initial load duke parents: diff changeset	1117	int s_start, s_end, s_inc;
90ce3da70b43 Initial load duke parents: diff changeset	1118	png_uint_32 i;
90ce3da70b43 Initial load duke parents: diff changeset	1119
90ce3da70b43 Initial load duke parents: diff changeset	1120	sp = row + (png_size_t)((row_info->width - 1) >> 2);
90ce3da70b43 Initial load duke parents: diff changeset	1121	dp = row + (png_size_t)((final_width - 1) >> 2);
90ce3da70b43 Initial load duke parents: diff changeset	1122	#if defined(PNG_READ_PACKSWAP_SUPPORTED)
90ce3da70b43 Initial load duke parents: diff changeset	1123	if (transformations & PNG_PACKSWAP)
90ce3da70b43 Initial load duke parents: diff changeset	1124	{
90ce3da70b43 Initial load duke parents: diff changeset	1125	sshift = (png_size_t)(((row_info->width + 3) & 3) << 1);
90ce3da70b43 Initial load duke parents: diff changeset	1126	dshift = (png_size_t)(((final_width + 3) & 3) << 1);
90ce3da70b43 Initial load duke parents: diff changeset	1127	s_start = 6;
90ce3da70b43 Initial load duke parents: diff changeset	1128	s_end = 0;
90ce3da70b43 Initial load duke parents: diff changeset	1129	s_inc = -2;
90ce3da70b43 Initial load duke parents: diff changeset	1130	}
90ce3da70b43 Initial load duke parents: diff changeset	1131	else
90ce3da70b43 Initial load duke parents: diff changeset	1132	#endif
90ce3da70b43 Initial load duke parents: diff changeset	1133	{
90ce3da70b43 Initial load duke parents: diff changeset	1134	sshift = (png_size_t)((3 - ((row_info->width + 3) & 3)) << 1);
90ce3da70b43 Initial load duke parents: diff changeset	1135	dshift = (png_size_t)((3 - ((final_width + 3) & 3)) << 1);
90ce3da70b43 Initial load duke parents: diff changeset	1136	s_start = 0;
90ce3da70b43 Initial load duke parents: diff changeset	1137	s_end = 6;
90ce3da70b43 Initial load duke parents: diff changeset	1138	s_inc = 2;
90ce3da70b43 Initial load duke parents: diff changeset	1139	}
90ce3da70b43 Initial load duke parents: diff changeset	1140
90ce3da70b43 Initial load duke parents: diff changeset	1141	for (i = row_info->width; i; i--)
90ce3da70b43 Initial load duke parents: diff changeset	1142	{
90ce3da70b43 Initial load duke parents: diff changeset	1143	png_byte v;
90ce3da70b43 Initial load duke parents: diff changeset	1144	int j;
90ce3da70b43 Initial load duke parents: diff changeset	1145
90ce3da70b43 Initial load duke parents: diff changeset	1146	v = (png_byte)((*sp >> sshift) & 0x3);
90ce3da70b43 Initial load duke parents: diff changeset	1147	for (j = 0; j < png_pass_inc[pass]; j++)
90ce3da70b43 Initial load duke parents: diff changeset	1148	{
90ce3da70b43 Initial load duke parents: diff changeset	1149	*dp &= (png_byte)((0x3f3f >> (6 - dshift)) & 0xff);
90ce3da70b43 Initial load duke parents: diff changeset	1150	*dp \|= (png_byte)(v << dshift);
90ce3da70b43 Initial load duke parents: diff changeset	1151	if (dshift == s_end)
90ce3da70b43 Initial load duke parents: diff changeset	1152	{
90ce3da70b43 Initial load duke parents: diff changeset	1153	dshift = s_start;
90ce3da70b43 Initial load duke parents: diff changeset	1154	dp--;
90ce3da70b43 Initial load duke parents: diff changeset	1155	}
90ce3da70b43 Initial load duke parents: diff changeset	1156	else
90ce3da70b43 Initial load duke parents: diff changeset	1157	dshift += s_inc;
90ce3da70b43 Initial load duke parents: diff changeset	1158	}
90ce3da70b43 Initial load duke parents: diff changeset	1159	if (sshift == s_end)
90ce3da70b43 Initial load duke parents: diff changeset	1160	{
90ce3da70b43 Initial load duke parents: diff changeset	1161	sshift = s_start;
90ce3da70b43 Initial load duke parents: diff changeset	1162	sp--;
90ce3da70b43 Initial load duke parents: diff changeset	1163	}
90ce3da70b43 Initial load duke parents: diff changeset	1164	else
90ce3da70b43 Initial load duke parents: diff changeset	1165	sshift += s_inc;
90ce3da70b43 Initial load duke parents: diff changeset	1166	}
90ce3da70b43 Initial load duke parents: diff changeset	1167	break;
90ce3da70b43 Initial load duke parents: diff changeset	1168	}
90ce3da70b43 Initial load duke parents: diff changeset	1169
90ce3da70b43 Initial load duke parents: diff changeset	1170	case 4:
90ce3da70b43 Initial load duke parents: diff changeset	1171	{
90ce3da70b43 Initial load duke parents: diff changeset	1172	png_bytep sp, dp;
90ce3da70b43 Initial load duke parents: diff changeset	1173	int sshift, dshift;
90ce3da70b43 Initial load duke parents: diff changeset	1174	int s_start, s_end, s_inc;
90ce3da70b43 Initial load duke parents: diff changeset	1175	png_uint_32 i;
90ce3da70b43 Initial load duke parents: diff changeset	1176
90ce3da70b43 Initial load duke parents: diff changeset	1177	sp = row + (png_size_t)((row_info->width - 1) >> 1);
90ce3da70b43 Initial load duke parents: diff changeset	1178	dp = row + (png_size_t)((final_width - 1) >> 1);
90ce3da70b43 Initial load duke parents: diff changeset	1179	#if defined(PNG_READ_PACKSWAP_SUPPORTED)
90ce3da70b43 Initial load duke parents: diff changeset	1180	if (transformations & PNG_PACKSWAP)
90ce3da70b43 Initial load duke parents: diff changeset	1181	{
90ce3da70b43 Initial load duke parents: diff changeset	1182	sshift = (png_size_t)(((row_info->width + 1) & 1) << 2);
90ce3da70b43 Initial load duke parents: diff changeset	1183	dshift = (png_size_t)(((final_width + 1) & 1) << 2);
90ce3da70b43 Initial load duke parents: diff changeset	1184	s_start = 4;
90ce3da70b43 Initial load duke parents: diff changeset	1185	s_end = 0;
90ce3da70b43 Initial load duke parents: diff changeset	1186	s_inc = -4;
90ce3da70b43 Initial load duke parents: diff changeset	1187	}
90ce3da70b43 Initial load duke parents: diff changeset	1188	else
90ce3da70b43 Initial load duke parents: diff changeset	1189	#endif
90ce3da70b43 Initial load duke parents: diff changeset	1190	{
90ce3da70b43 Initial load duke parents: diff changeset	1191	sshift = (png_size_t)((1 - ((row_info->width + 1) & 1)) << 2);
90ce3da70b43 Initial load duke parents: diff changeset	1192	dshift = (png_size_t)((1 - ((final_width + 1) & 1)) << 2);
90ce3da70b43 Initial load duke parents: diff changeset	1193	s_start = 0;
90ce3da70b43 Initial load duke parents: diff changeset	1194	s_end = 4;
90ce3da70b43 Initial load duke parents: diff changeset	1195	s_inc = 4;
90ce3da70b43 Initial load duke parents: diff changeset	1196	}
90ce3da70b43 Initial load duke parents: diff changeset	1197
90ce3da70b43 Initial load duke parents: diff changeset	1198	for (i = row_info->width; i; i--)
90ce3da70b43 Initial load duke parents: diff changeset	1199	{
90ce3da70b43 Initial load duke parents: diff changeset	1200	png_byte v;
90ce3da70b43 Initial load duke parents: diff changeset	1201	int j;
90ce3da70b43 Initial load duke parents: diff changeset	1202
90ce3da70b43 Initial load duke parents: diff changeset	1203	v = (png_byte)((*sp >> sshift) & 0xf);
90ce3da70b43 Initial load duke parents: diff changeset	1204	for (j = 0; j < png_pass_inc[pass]; j++)
90ce3da70b43 Initial load duke parents: diff changeset	1205	{
90ce3da70b43 Initial load duke parents: diff changeset	1206	*dp &= (png_byte)((0xf0f >> (4 - dshift)) & 0xff);
90ce3da70b43 Initial load duke parents: diff changeset	1207	*dp \|= (png_byte)(v << dshift);
90ce3da70b43 Initial load duke parents: diff changeset	1208	if (dshift == s_end)
90ce3da70b43 Initial load duke parents: diff changeset	1209	{
90ce3da70b43 Initial load duke parents: diff changeset	1210	dshift = s_start;
90ce3da70b43 Initial load duke parents: diff changeset	1211	dp--;
90ce3da70b43 Initial load duke parents: diff changeset	1212	}
90ce3da70b43 Initial load duke parents: diff changeset	1213	else
90ce3da70b43 Initial load duke parents: diff changeset	1214	dshift += s_inc;
90ce3da70b43 Initial load duke parents: diff changeset	1215	}
90ce3da70b43 Initial load duke parents: diff changeset	1216	if (sshift == s_end)
90ce3da70b43 Initial load duke parents: diff changeset	1217	{
90ce3da70b43 Initial load duke parents: diff changeset	1218	sshift = s_start;
90ce3da70b43 Initial load duke parents: diff changeset	1219	sp--;
90ce3da70b43 Initial load duke parents: diff changeset	1220	}
90ce3da70b43 Initial load duke parents: diff changeset	1221	else
90ce3da70b43 Initial load duke parents: diff changeset	1222	sshift += s_inc;
90ce3da70b43 Initial load duke parents: diff changeset	1223	}
90ce3da70b43 Initial load duke parents: diff changeset	1224	break;
90ce3da70b43 Initial load duke parents: diff changeset	1225	}
90ce3da70b43 Initial load duke parents: diff changeset	1226
90ce3da70b43 Initial load duke parents: diff changeset	1227	default: // This is the place where the routine is modified
90ce3da70b43 Initial load duke parents: diff changeset	1228	{
90ce3da70b43 Initial load duke parents: diff changeset	1229	__int64 const4 = 0x0000000000FFFFFF;
90ce3da70b43 Initial load duke parents: diff changeset	1230	// __int64 const5 = 0x000000FFFFFF0000; // unused...
90ce3da70b43 Initial load duke parents: diff changeset	1231	__int64 const6 = 0x00000000000000FF;
90ce3da70b43 Initial load duke parents: diff changeset	1232	png_bytep sptr, dp;
90ce3da70b43 Initial load duke parents: diff changeset	1233	png_uint_32 i;
90ce3da70b43 Initial load duke parents: diff changeset	1234	png_size_t pixel_bytes;
90ce3da70b43 Initial load duke parents: diff changeset	1235	int width = row_info->width;
90ce3da70b43 Initial load duke parents: diff changeset	1236
90ce3da70b43 Initial load duke parents: diff changeset	1237	pixel_bytes = (row_info->pixel_depth >> 3);
90ce3da70b43 Initial load duke parents: diff changeset	1238
90ce3da70b43 Initial load duke parents: diff changeset	1239	sptr = row + (width - 1) * pixel_bytes;
90ce3da70b43 Initial load duke parents: diff changeset	1240	dp = row + (final_width - 1) * pixel_bytes;
90ce3da70b43 Initial load duke parents: diff changeset	1241	// New code by Nirav Chhatrapati - Intel Corporation
90ce3da70b43 Initial load duke parents: diff changeset	1242	// sign fix by GRR
90ce3da70b43 Initial load duke parents: diff changeset	1243	// NOTE: there is NO MMX code for 48-bit and 64-bit images
90ce3da70b43 Initial load duke parents: diff changeset	1244
90ce3da70b43 Initial load duke parents: diff changeset	1245	// use MMX routine if machine supports it
90ce3da70b43 Initial load duke parents: diff changeset	1246	#if !defined(PNG_1_0_X)
90ce3da70b43 Initial load duke parents: diff changeset	1247	if ((png_ptr->asm_flags & PNG_ASM_FLAG_MMX_READ_INTERLACE)
90ce3da70b43 Initial load duke parents: diff changeset	1248	/* && mmx_supported */ )
90ce3da70b43 Initial load duke parents: diff changeset	1249	#else
90ce3da70b43 Initial load duke parents: diff changeset	1250	if (mmx_supported)
90ce3da70b43 Initial load duke parents: diff changeset	1251	#endif
90ce3da70b43 Initial load duke parents: diff changeset	1252	{
90ce3da70b43 Initial load duke parents: diff changeset	1253	if (pixel_bytes == 3)
90ce3da70b43 Initial load duke parents: diff changeset	1254	{
90ce3da70b43 Initial load duke parents: diff changeset	1255	if (((pass == 0) \|\| (pass == 1)) && width)
90ce3da70b43 Initial load duke parents: diff changeset	1256	{
90ce3da70b43 Initial load duke parents: diff changeset	1257	_asm
90ce3da70b43 Initial load duke parents: diff changeset	1258	{
90ce3da70b43 Initial load duke parents: diff changeset	1259	mov esi, sptr
90ce3da70b43 Initial load duke parents: diff changeset	1260	mov edi, dp
90ce3da70b43 Initial load duke parents: diff changeset	1261	mov ecx, width
90ce3da70b43 Initial load duke parents: diff changeset	1262	sub edi, 21 // (png_pass_inc[pass] - 1)*pixel_bytes
90ce3da70b43 Initial load duke parents: diff changeset	1263	loop_pass0:
90ce3da70b43 Initial load duke parents: diff changeset	1264	movd mm0, [esi] ; X X X X X v2 v1 v0
90ce3da70b43 Initial load duke parents: diff changeset	1265	pand mm0, const4 ; 0 0 0 0 0 v2 v1 v0
90ce3da70b43 Initial load duke parents: diff changeset	1266	movq mm1, mm0 ; 0 0 0 0 0 v2 v1 v0
90ce3da70b43 Initial load duke parents: diff changeset	1267	psllq mm0, 16 ; 0 0 0 v2 v1 v0 0 0
90ce3da70b43 Initial load duke parents: diff changeset	1268	movq mm2, mm0 ; 0 0 0 v2 v1 v0 0 0
90ce3da70b43 Initial load duke parents: diff changeset	1269	psllq mm0, 24 ; v2 v1 v0 0 0 0 0 0
90ce3da70b43 Initial load duke parents: diff changeset	1270	psrlq mm1, 8 ; 0 0 0 0 0 0 v2 v1
90ce3da70b43 Initial load duke parents: diff changeset	1271	por mm0, mm2 ; v2 v1 v0 v2 v1 v0 0 0
90ce3da70b43 Initial load duke parents: diff changeset	1272	por mm0, mm1 ; v2 v1 v0 v2 v1 v0 v2 v1
90ce3da70b43 Initial load duke parents: diff changeset	1273	movq mm3, mm0 ; v2 v1 v0 v2 v1 v0 v2 v1
90ce3da70b43 Initial load duke parents: diff changeset	1274	psllq mm0, 16 ; v0 v2 v1 v0 v2 v1 0 0
90ce3da70b43 Initial load duke parents: diff changeset	1275	movq mm4, mm3 ; v2 v1 v0 v2 v1 v0 v2 v1
90ce3da70b43 Initial load duke parents: diff changeset	1276	punpckhdq mm3, mm0 ; v0 v2 v1 v0 v2 v1 v0 v2
90ce3da70b43 Initial load duke parents: diff changeset	1277	movq [edi+16] , mm4
90ce3da70b43 Initial load duke parents: diff changeset	1278	psrlq mm0, 32 ; 0 0 0 0 v0 v2 v1 v0
90ce3da70b43 Initial load duke parents: diff changeset	1279	movq [edi+8] , mm3
90ce3da70b43 Initial load duke parents: diff changeset	1280	punpckldq mm0, mm4 ; v1 v0 v2 v1 v0 v2 v1 v0
90ce3da70b43 Initial load duke parents: diff changeset	1281	sub esi, 3
90ce3da70b43 Initial load duke parents: diff changeset	1282	movq [edi], mm0
90ce3da70b43 Initial load duke parents: diff changeset	1283	sub edi, 24
90ce3da70b43 Initial load duke parents: diff changeset	1284	//sub esi, 3
90ce3da70b43 Initial load duke parents: diff changeset	1285	dec ecx
90ce3da70b43 Initial load duke parents: diff changeset	1286	jnz loop_pass0
90ce3da70b43 Initial load duke parents: diff changeset	1287	EMMS
90ce3da70b43 Initial load duke parents: diff changeset	1288	}
90ce3da70b43 Initial load duke parents: diff changeset	1289	}
90ce3da70b43 Initial load duke parents: diff changeset	1290	else if (((pass == 2) \|\| (pass == 3)) && width)
90ce3da70b43 Initial load duke parents: diff changeset	1291	{
90ce3da70b43 Initial load duke parents: diff changeset	1292	_asm
90ce3da70b43 Initial load duke parents: diff changeset	1293	{
90ce3da70b43 Initial load duke parents: diff changeset	1294	mov esi, sptr
90ce3da70b43 Initial load duke parents: diff changeset	1295	mov edi, dp
90ce3da70b43 Initial load duke parents: diff changeset	1296	mov ecx, width
90ce3da70b43 Initial load duke parents: diff changeset	1297	sub edi, 9 // (png_pass_inc[pass] - 1)*pixel_bytes
90ce3da70b43 Initial load duke parents: diff changeset	1298	loop_pass2:
90ce3da70b43 Initial load duke parents: diff changeset	1299	movd mm0, [esi] ; X X X X X v2 v1 v0
90ce3da70b43 Initial load duke parents: diff changeset	1300	pand mm0, const4 ; 0 0 0 0 0 v2 v1 v0
90ce3da70b43 Initial load duke parents: diff changeset	1301	movq mm1, mm0 ; 0 0 0 0 0 v2 v1 v0
90ce3da70b43 Initial load duke parents: diff changeset	1302	psllq mm0, 16 ; 0 0 0 v2 v1 v0 0 0
90ce3da70b43 Initial load duke parents: diff changeset	1303	movq mm2, mm0 ; 0 0 0 v2 v1 v0 0 0
90ce3da70b43 Initial load duke parents: diff changeset	1304	psllq mm0, 24 ; v2 v1 v0 0 0 0 0 0
90ce3da70b43 Initial load duke parents: diff changeset	1305	psrlq mm1, 8 ; 0 0 0 0 0 0 v2 v1
90ce3da70b43 Initial load duke parents: diff changeset	1306	por mm0, mm2 ; v2 v1 v0 v2 v1 v0 0 0
90ce3da70b43 Initial load duke parents: diff changeset	1307	por mm0, mm1 ; v2 v1 v0 v2 v1 v0 v2 v1
90ce3da70b43 Initial load duke parents: diff changeset	1308	movq [edi+4], mm0 ; move to memory
90ce3da70b43 Initial load duke parents: diff changeset	1309	psrlq mm0, 16 ; 0 0 v2 v1 v0 v2 v1 v0
90ce3da70b43 Initial load duke parents: diff changeset	1310	movd [edi], mm0 ; move to memory
90ce3da70b43 Initial load duke parents: diff changeset	1311	sub esi, 3
90ce3da70b43 Initial load duke parents: diff changeset	1312	sub edi, 12
90ce3da70b43 Initial load duke parents: diff changeset	1313	dec ecx
90ce3da70b43 Initial load duke parents: diff changeset	1314	jnz loop_pass2
90ce3da70b43 Initial load duke parents: diff changeset	1315	EMMS
90ce3da70b43 Initial load duke parents: diff changeset	1316	}
90ce3da70b43 Initial load duke parents: diff changeset	1317	}
90ce3da70b43 Initial load duke parents: diff changeset	1318	else if (width) /* && ((pass == 4) \|\| (pass == 5)) */
90ce3da70b43 Initial load duke parents: diff changeset	1319	{
90ce3da70b43 Initial load duke parents: diff changeset	1320	int width_mmx = ((width >> 1) << 1) - 8;
90ce3da70b43 Initial load duke parents: diff changeset	1321	if (width_mmx < 0)
90ce3da70b43 Initial load duke parents: diff changeset	1322	width_mmx = 0;
90ce3da70b43 Initial load duke parents: diff changeset	1323	width -= width_mmx; // 8 or 9 pix, 24 or 27 bytes
90ce3da70b43 Initial load duke parents: diff changeset	1324	if (width_mmx)
90ce3da70b43 Initial load duke parents: diff changeset	1325	{
90ce3da70b43 Initial load duke parents: diff changeset	1326	_asm
90ce3da70b43 Initial load duke parents: diff changeset	1327	{
90ce3da70b43 Initial load duke parents: diff changeset	1328	mov esi, sptr
90ce3da70b43 Initial load duke parents: diff changeset	1329	mov edi, dp
90ce3da70b43 Initial load duke parents: diff changeset	1330	mov ecx, width_mmx
90ce3da70b43 Initial load duke parents: diff changeset	1331	sub esi, 3
90ce3da70b43 Initial load duke parents: diff changeset	1332	sub edi, 9
90ce3da70b43 Initial load duke parents: diff changeset	1333	loop_pass4:
90ce3da70b43 Initial load duke parents: diff changeset	1334	movq mm0, [esi] ; X X v2 v1 v0 v5 v4 v3
90ce3da70b43 Initial load duke parents: diff changeset	1335	movq mm7, mm0 ; X X v2 v1 v0 v5 v4 v3
90ce3da70b43 Initial load duke parents: diff changeset	1336	movq mm6, mm0 ; X X v2 v1 v0 v5 v4 v3
90ce3da70b43 Initial load duke parents: diff changeset	1337	psllq mm0, 24 ; v1 v0 v5 v4 v3 0 0 0
90ce3da70b43 Initial load duke parents: diff changeset	1338	pand mm7, const4 ; 0 0 0 0 0 v5 v4 v3
90ce3da70b43 Initial load duke parents: diff changeset	1339	psrlq mm6, 24 ; 0 0 0 X X v2 v1 v0
90ce3da70b43 Initial load duke parents: diff changeset	1340	por mm0, mm7 ; v1 v0 v5 v4 v3 v5 v4 v3
90ce3da70b43 Initial load duke parents: diff changeset	1341	movq mm5, mm6 ; 0 0 0 X X v2 v1 v0
90ce3da70b43 Initial load duke parents: diff changeset	1342	psllq mm6, 8 ; 0 0 X X v2 v1 v0 0
90ce3da70b43 Initial load duke parents: diff changeset	1343	movq [edi], mm0 ; move quad to memory
90ce3da70b43 Initial load duke parents: diff changeset	1344	psrlq mm5, 16 ; 0 0 0 0 0 X X v2
90ce3da70b43 Initial load duke parents: diff changeset	1345	pand mm5, const6 ; 0 0 0 0 0 0 0 v2
90ce3da70b43 Initial load duke parents: diff changeset	1346	por mm6, mm5 ; 0 0 X X v2 v1 v0 v2
90ce3da70b43 Initial load duke parents: diff changeset	1347	movd [edi+8], mm6 ; move double to memory
90ce3da70b43 Initial load duke parents: diff changeset	1348	sub esi, 6
90ce3da70b43 Initial load duke parents: diff changeset	1349	sub edi, 12
90ce3da70b43 Initial load duke parents: diff changeset	1350	sub ecx, 2
90ce3da70b43 Initial load duke parents: diff changeset	1351	jnz loop_pass4
90ce3da70b43 Initial load duke parents: diff changeset	1352	EMMS
90ce3da70b43 Initial load duke parents: diff changeset	1353	}
90ce3da70b43 Initial load duke parents: diff changeset	1354	}
90ce3da70b43 Initial load duke parents: diff changeset	1355
90ce3da70b43 Initial load duke parents: diff changeset	1356	sptr -= width_mmx*3;
90ce3da70b43 Initial load duke parents: diff changeset	1357	dp -= width_mmx*6;
90ce3da70b43 Initial load duke parents: diff changeset	1358	for (i = width; i; i--)
90ce3da70b43 Initial load duke parents: diff changeset	1359	{
90ce3da70b43 Initial load duke parents: diff changeset	1360	png_byte v[8];
90ce3da70b43 Initial load duke parents: diff changeset	1361	int j;
90ce3da70b43 Initial load duke parents: diff changeset	1362
90ce3da70b43 Initial load duke parents: diff changeset	1363	png_memcpy(v, sptr, 3);
90ce3da70b43 Initial load duke parents: diff changeset	1364	for (j = 0; j < png_pass_inc[pass]; j++)
90ce3da70b43 Initial load duke parents: diff changeset	1365	{
90ce3da70b43 Initial load duke parents: diff changeset	1366	png_memcpy(dp, v, 3);
90ce3da70b43 Initial load duke parents: diff changeset	1367	dp -= 3;
90ce3da70b43 Initial load duke parents: diff changeset	1368	}
90ce3da70b43 Initial load duke parents: diff changeset	1369	sptr -= 3;
90ce3da70b43 Initial load duke parents: diff changeset	1370	}
90ce3da70b43 Initial load duke parents: diff changeset	1371	}
90ce3da70b43 Initial load duke parents: diff changeset	1372	} /* end of pixel_bytes == 3 */
90ce3da70b43 Initial load duke parents: diff changeset	1373
90ce3da70b43 Initial load duke parents: diff changeset	1374	else if (pixel_bytes == 1)
90ce3da70b43 Initial load duke parents: diff changeset	1375	{
90ce3da70b43 Initial load duke parents: diff changeset	1376	if (((pass == 0) \|\| (pass == 1)) && width)
90ce3da70b43 Initial load duke parents: diff changeset	1377	{
90ce3da70b43 Initial load duke parents: diff changeset	1378	int width_mmx = ((width >> 2) << 2);
90ce3da70b43 Initial load duke parents: diff changeset	1379	width -= width_mmx;
90ce3da70b43 Initial load duke parents: diff changeset	1380	if (width_mmx)
90ce3da70b43 Initial load duke parents: diff changeset	1381	{
90ce3da70b43 Initial load duke parents: diff changeset	1382	_asm
90ce3da70b43 Initial load duke parents: diff changeset	1383	{
90ce3da70b43 Initial load duke parents: diff changeset	1384	mov esi, sptr
90ce3da70b43 Initial load duke parents: diff changeset	1385	mov edi, dp
90ce3da70b43 Initial load duke parents: diff changeset	1386	mov ecx, width_mmx
90ce3da70b43 Initial load duke parents: diff changeset	1387	sub edi, 31
90ce3da70b43 Initial load duke parents: diff changeset	1388	sub esi, 3
90ce3da70b43 Initial load duke parents: diff changeset	1389	loop1_pass0:
90ce3da70b43 Initial load duke parents: diff changeset	1390	movd mm0, [esi] ; X X X X v0 v1 v2 v3
90ce3da70b43 Initial load duke parents: diff changeset	1391	movq mm1, mm0 ; X X X X v0 v1 v2 v3
90ce3da70b43 Initial load duke parents: diff changeset	1392	punpcklbw mm0, mm0 ; v0 v0 v1 v1 v2 v2 v3 v3
90ce3da70b43 Initial load duke parents: diff changeset	1393	movq mm2, mm0 ; v0 v0 v1 v1 v2 v2 v3 v3
90ce3da70b43 Initial load duke parents: diff changeset	1394	punpcklwd mm0, mm0 ; v2 v2 v2 v2 v3 v3 v3 v3
90ce3da70b43 Initial load duke parents: diff changeset	1395	movq mm3, mm0 ; v2 v2 v2 v2 v3 v3 v3 v3
90ce3da70b43 Initial load duke parents: diff changeset	1396	punpckldq mm0, mm0 ; v3 v3 v3 v3 v3 v3 v3 v3
90ce3da70b43 Initial load duke parents: diff changeset	1397	punpckhdq mm3, mm3 ; v2 v2 v2 v2 v2 v2 v2 v2
90ce3da70b43 Initial load duke parents: diff changeset	1398	movq [edi], mm0 ; move to memory v3
90ce3da70b43 Initial load duke parents: diff changeset	1399	punpckhwd mm2, mm2 ; v0 v0 v0 v0 v1 v1 v1 v1
90ce3da70b43 Initial load duke parents: diff changeset	1400	movq [edi+8], mm3 ; move to memory v2
90ce3da70b43 Initial load duke parents: diff changeset	1401	movq mm4, mm2 ; v0 v0 v0 v0 v1 v1 v1 v1
90ce3da70b43 Initial load duke parents: diff changeset	1402	punpckldq mm2, mm2 ; v1 v1 v1 v1 v1 v1 v1 v1
90ce3da70b43 Initial load duke parents: diff changeset	1403	punpckhdq mm4, mm4 ; v0 v0 v0 v0 v0 v0 v0 v0
90ce3da70b43 Initial load duke parents: diff changeset	1404	movq [edi+16], mm2 ; move to memory v1
90ce3da70b43 Initial load duke parents: diff changeset	1405	movq [edi+24], mm4 ; move to memory v0
90ce3da70b43 Initial load duke parents: diff changeset	1406	sub esi, 4
90ce3da70b43 Initial load duke parents: diff changeset	1407	sub edi, 32
90ce3da70b43 Initial load duke parents: diff changeset	1408	sub ecx, 4
90ce3da70b43 Initial load duke parents: diff changeset	1409	jnz loop1_pass0
90ce3da70b43 Initial load duke parents: diff changeset	1410	EMMS
90ce3da70b43 Initial load duke parents: diff changeset	1411	}
90ce3da70b43 Initial load duke parents: diff changeset	1412	}
90ce3da70b43 Initial load duke parents: diff changeset	1413
90ce3da70b43 Initial load duke parents: diff changeset	1414	sptr -= width_mmx;
90ce3da70b43 Initial load duke parents: diff changeset	1415	dp -= width_mmx*8;
90ce3da70b43 Initial load duke parents: diff changeset	1416	for (i = width; i; i--)
90ce3da70b43 Initial load duke parents: diff changeset	1417	{
90ce3da70b43 Initial load duke parents: diff changeset	1418	int j;
90ce3da70b43 Initial load duke parents: diff changeset	1419
90ce3da70b43 Initial load duke parents: diff changeset	1420	/* I simplified this part in version 1.0.4e
90ce3da70b43 Initial load duke parents: diff changeset	1421	* here and in several other instances where
90ce3da70b43 Initial load duke parents: diff changeset	1422	* pixel_bytes == 1 -- GR-P
90ce3da70b43 Initial load duke parents: diff changeset	1423	*
90ce3da70b43 Initial load duke parents: diff changeset	1424	* Original code:
90ce3da70b43 Initial load duke parents: diff changeset	1425	*
90ce3da70b43 Initial load duke parents: diff changeset	1426	* png_byte v[8];
90ce3da70b43 Initial load duke parents: diff changeset	1427	* png_memcpy(v, sptr, pixel_bytes);
90ce3da70b43 Initial load duke parents: diff changeset	1428	* for (j = 0; j < png_pass_inc[pass]; j++)
90ce3da70b43 Initial load duke parents: diff changeset	1429	* {
90ce3da70b43 Initial load duke parents: diff changeset	1430	* png_memcpy(dp, v, pixel_bytes);
90ce3da70b43 Initial load duke parents: diff changeset	1431	* dp -= pixel_bytes;
90ce3da70b43 Initial load duke parents: diff changeset	1432	* }
90ce3da70b43 Initial load duke parents: diff changeset	1433	* sptr -= pixel_bytes;
90ce3da70b43 Initial load duke parents: diff changeset	1434	*
90ce3da70b43 Initial load duke parents: diff changeset	1435	* Replacement code is in the next three lines:
90ce3da70b43 Initial load duke parents: diff changeset	1436	*/
90ce3da70b43 Initial load duke parents: diff changeset	1437
90ce3da70b43 Initial load duke parents: diff changeset	1438	for (j = 0; j < png_pass_inc[pass]; j++)
90ce3da70b43 Initial load duke parents: diff changeset	1439	dp-- = sptr;
90ce3da70b43 Initial load duke parents: diff changeset	1440	sptr--;
90ce3da70b43 Initial load duke parents: diff changeset	1441	}
90ce3da70b43 Initial load duke parents: diff changeset	1442	}
90ce3da70b43 Initial load duke parents: diff changeset	1443	else if (((pass == 2) \|\| (pass == 3)) && width)
90ce3da70b43 Initial load duke parents: diff changeset	1444	{
90ce3da70b43 Initial load duke parents: diff changeset	1445	int width_mmx = ((width >> 2) << 2);
90ce3da70b43 Initial load duke parents: diff changeset	1446	width -= width_mmx;
90ce3da70b43 Initial load duke parents: diff changeset	1447	if (width_mmx)
90ce3da70b43 Initial load duke parents: diff changeset	1448	{
90ce3da70b43 Initial load duke parents: diff changeset	1449	_asm
90ce3da70b43 Initial load duke parents: diff changeset	1450	{
90ce3da70b43 Initial load duke parents: diff changeset	1451	mov esi, sptr
90ce3da70b43 Initial load duke parents: diff changeset	1452	mov edi, dp
90ce3da70b43 Initial load duke parents: diff changeset	1453	mov ecx, width_mmx
90ce3da70b43 Initial load duke parents: diff changeset	1454	sub edi, 15
90ce3da70b43 Initial load duke parents: diff changeset	1455	sub esi, 3
90ce3da70b43 Initial load duke parents: diff changeset	1456	loop1_pass2:
90ce3da70b43 Initial load duke parents: diff changeset	1457	movd mm0, [esi] ; X X X X v0 v1 v2 v3
90ce3da70b43 Initial load duke parents: diff changeset	1458	punpcklbw mm0, mm0 ; v0 v0 v1 v1 v2 v2 v3 v3
90ce3da70b43 Initial load duke parents: diff changeset	1459	movq mm1, mm0 ; v0 v0 v1 v1 v2 v2 v3 v3
90ce3da70b43 Initial load duke parents: diff changeset	1460	punpcklwd mm0, mm0 ; v2 v2 v2 v2 v3 v3 v3 v3
90ce3da70b43 Initial load duke parents: diff changeset	1461	punpckhwd mm1, mm1 ; v0 v0 v0 v0 v1 v1 v1 v1
90ce3da70b43 Initial load duke parents: diff changeset	1462	movq [edi], mm0 ; move to memory v2 and v3
90ce3da70b43 Initial load duke parents: diff changeset	1463	sub esi, 4
90ce3da70b43 Initial load duke parents: diff changeset	1464	movq [edi+8], mm1 ; move to memory v1 and v0
90ce3da70b43 Initial load duke parents: diff changeset	1465	sub edi, 16
90ce3da70b43 Initial load duke parents: diff changeset	1466	sub ecx, 4
90ce3da70b43 Initial load duke parents: diff changeset	1467	jnz loop1_pass2
90ce3da70b43 Initial load duke parents: diff changeset	1468	EMMS
90ce3da70b43 Initial load duke parents: diff changeset	1469	}
90ce3da70b43 Initial load duke parents: diff changeset	1470	}
90ce3da70b43 Initial load duke parents: diff changeset	1471
90ce3da70b43 Initial load duke parents: diff changeset	1472	sptr -= width_mmx;
90ce3da70b43 Initial load duke parents: diff changeset	1473	dp -= width_mmx*4;
90ce3da70b43 Initial load duke parents: diff changeset	1474	for (i = width; i; i--)
90ce3da70b43 Initial load duke parents: diff changeset	1475	{
90ce3da70b43 Initial load duke parents: diff changeset	1476	int j;
90ce3da70b43 Initial load duke parents: diff changeset	1477
90ce3da70b43 Initial load duke parents: diff changeset	1478	for (j = 0; j < png_pass_inc[pass]; j++)
90ce3da70b43 Initial load duke parents: diff changeset	1479	{
90ce3da70b43 Initial load duke parents: diff changeset	1480	dp-- = sptr;
90ce3da70b43 Initial load duke parents: diff changeset	1481	}
90ce3da70b43 Initial load duke parents: diff changeset	1482	sptr --;
90ce3da70b43 Initial load duke parents: diff changeset	1483	}
90ce3da70b43 Initial load duke parents: diff changeset	1484	}
90ce3da70b43 Initial load duke parents: diff changeset	1485	else if (width) /* && ((pass == 4) \|\| (pass == 5))) */
90ce3da70b43 Initial load duke parents: diff changeset	1486	{
90ce3da70b43 Initial load duke parents: diff changeset	1487	int width_mmx = ((width >> 3) << 3);
90ce3da70b43 Initial load duke parents: diff changeset	1488	width -= width_mmx;
90ce3da70b43 Initial load duke parents: diff changeset	1489	if (width_mmx)
90ce3da70b43 Initial load duke parents: diff changeset	1490	{
90ce3da70b43 Initial load duke parents: diff changeset	1491	_asm
90ce3da70b43 Initial load duke parents: diff changeset	1492	{
90ce3da70b43 Initial load duke parents: diff changeset	1493	mov esi, sptr
90ce3da70b43 Initial load duke parents: diff changeset	1494	mov edi, dp
90ce3da70b43 Initial load duke parents: diff changeset	1495	mov ecx, width_mmx
90ce3da70b43 Initial load duke parents: diff changeset	1496	sub edi, 15
90ce3da70b43 Initial load duke parents: diff changeset	1497	sub esi, 7
90ce3da70b43 Initial load duke parents: diff changeset	1498	loop1_pass4:
90ce3da70b43 Initial load duke parents: diff changeset	1499	movq mm0, [esi] ; v0 v1 v2 v3 v4 v5 v6 v7
90ce3da70b43 Initial load duke parents: diff changeset	1500	movq mm1, mm0 ; v0 v1 v2 v3 v4 v5 v6 v7
90ce3da70b43 Initial load duke parents: diff changeset	1501	punpcklbw mm0, mm0 ; v4 v4 v5 v5 v6 v6 v7 v7
90ce3da70b43 Initial load duke parents: diff changeset	1502	//movq mm1, mm0 ; v0 v0 v1 v1 v2 v2 v3 v3
90ce3da70b43 Initial load duke parents: diff changeset	1503	punpckhbw mm1, mm1 ;v0 v0 v1 v1 v2 v2 v3 v3
90ce3da70b43 Initial load duke parents: diff changeset	1504	movq [edi+8], mm1 ; move to memory v0 v1 v2 and v3
90ce3da70b43 Initial load duke parents: diff changeset	1505	sub esi, 8
90ce3da70b43 Initial load duke parents: diff changeset	1506	movq [edi], mm0 ; move to memory v4 v5 v6 and v7
90ce3da70b43 Initial load duke parents: diff changeset	1507	//sub esi, 4
90ce3da70b43 Initial load duke parents: diff changeset	1508	sub edi, 16
90ce3da70b43 Initial load duke parents: diff changeset	1509	sub ecx, 8
90ce3da70b43 Initial load duke parents: diff changeset	1510	jnz loop1_pass4
90ce3da70b43 Initial load duke parents: diff changeset	1511	EMMS
90ce3da70b43 Initial load duke parents: diff changeset	1512	}
90ce3da70b43 Initial load duke parents: diff changeset	1513	}
90ce3da70b43 Initial load duke parents: diff changeset	1514
90ce3da70b43 Initial load duke parents: diff changeset	1515	sptr -= width_mmx;
90ce3da70b43 Initial load duke parents: diff changeset	1516	dp -= width_mmx*2;
90ce3da70b43 Initial load duke parents: diff changeset	1517	for (i = width; i; i--)
90ce3da70b43 Initial load duke parents: diff changeset	1518	{
90ce3da70b43 Initial load duke parents: diff changeset	1519	int j;
90ce3da70b43 Initial load duke parents: diff changeset	1520
90ce3da70b43 Initial load duke parents: diff changeset	1521	for (j = 0; j < png_pass_inc[pass]; j++)
90ce3da70b43 Initial load duke parents: diff changeset	1522	{
90ce3da70b43 Initial load duke parents: diff changeset	1523	dp-- = sptr;
90ce3da70b43 Initial load duke parents: diff changeset	1524	}
90ce3da70b43 Initial load duke parents: diff changeset	1525	sptr --;
90ce3da70b43 Initial load duke parents: diff changeset	1526	}
90ce3da70b43 Initial load duke parents: diff changeset	1527	}
90ce3da70b43 Initial load duke parents: diff changeset	1528	} /* end of pixel_bytes == 1 */
90ce3da70b43 Initial load duke parents: diff changeset	1529
90ce3da70b43 Initial load duke parents: diff changeset	1530	else if (pixel_bytes == 2)
90ce3da70b43 Initial load duke parents: diff changeset	1531	{
90ce3da70b43 Initial load duke parents: diff changeset	1532	if (((pass == 0) \|\| (pass == 1)) && width)
90ce3da70b43 Initial load duke parents: diff changeset	1533	{
90ce3da70b43 Initial load duke parents: diff changeset	1534	int width_mmx = ((width >> 1) << 1);
90ce3da70b43 Initial load duke parents: diff changeset	1535	width -= width_mmx;
90ce3da70b43 Initial load duke parents: diff changeset	1536	if (width_mmx)
90ce3da70b43 Initial load duke parents: diff changeset	1537	{
90ce3da70b43 Initial load duke parents: diff changeset	1538	_asm
90ce3da70b43 Initial load duke parents: diff changeset	1539	{
90ce3da70b43 Initial load duke parents: diff changeset	1540	mov esi, sptr
90ce3da70b43 Initial load duke parents: diff changeset	1541	mov edi, dp
90ce3da70b43 Initial load duke parents: diff changeset	1542	mov ecx, width_mmx
90ce3da70b43 Initial load duke parents: diff changeset	1543	sub esi, 2
90ce3da70b43 Initial load duke parents: diff changeset	1544	sub edi, 30
90ce3da70b43 Initial load duke parents: diff changeset	1545	loop2_pass0:
90ce3da70b43 Initial load duke parents: diff changeset	1546	movd mm0, [esi] ; X X X X v1 v0 v3 v2
90ce3da70b43 Initial load duke parents: diff changeset	1547	punpcklwd mm0, mm0 ; v1 v0 v1 v0 v3 v2 v3 v2
90ce3da70b43 Initial load duke parents: diff changeset	1548	movq mm1, mm0 ; v1 v0 v1 v0 v3 v2 v3 v2
90ce3da70b43 Initial load duke parents: diff changeset	1549	punpckldq mm0, mm0 ; v3 v2 v3 v2 v3 v2 v3 v2
90ce3da70b43 Initial load duke parents: diff changeset	1550	punpckhdq mm1, mm1 ; v1 v0 v1 v0 v1 v0 v1 v0
90ce3da70b43 Initial load duke parents: diff changeset	1551	movq [edi], mm0
90ce3da70b43 Initial load duke parents: diff changeset	1552	movq [edi + 8], mm0
90ce3da70b43 Initial load duke parents: diff changeset	1553	movq [edi + 16], mm1
90ce3da70b43 Initial load duke parents: diff changeset	1554	movq [edi + 24], mm1
90ce3da70b43 Initial load duke parents: diff changeset	1555	sub esi, 4
90ce3da70b43 Initial load duke parents: diff changeset	1556	sub edi, 32
90ce3da70b43 Initial load duke parents: diff changeset	1557	sub ecx, 2
90ce3da70b43 Initial load duke parents: diff changeset	1558	jnz loop2_pass0
90ce3da70b43 Initial load duke parents: diff changeset	1559	EMMS
90ce3da70b43 Initial load duke parents: diff changeset	1560	}
90ce3da70b43 Initial load duke parents: diff changeset	1561	}
90ce3da70b43 Initial load duke parents: diff changeset	1562
90ce3da70b43 Initial load duke parents: diff changeset	1563	sptr -= (width_mmx*2 - 2); // sign fixed
90ce3da70b43 Initial load duke parents: diff changeset	1564	dp -= (width_mmx*16 - 2); // sign fixed
90ce3da70b43 Initial load duke parents: diff changeset	1565	for (i = width; i; i--)
90ce3da70b43 Initial load duke parents: diff changeset	1566	{
90ce3da70b43 Initial load duke parents: diff changeset	1567	png_byte v[8];
90ce3da70b43 Initial load duke parents: diff changeset	1568	int j;
90ce3da70b43 Initial load duke parents: diff changeset	1569	sptr -= 2;
90ce3da70b43 Initial load duke parents: diff changeset	1570	png_memcpy(v, sptr, 2);
90ce3da70b43 Initial load duke parents: diff changeset	1571	for (j = 0; j < png_pass_inc[pass]; j++)
90ce3da70b43 Initial load duke parents: diff changeset	1572	{
90ce3da70b43 Initial load duke parents: diff changeset	1573	dp -= 2;
90ce3da70b43 Initial load duke parents: diff changeset	1574	png_memcpy(dp, v, 2);
90ce3da70b43 Initial load duke parents: diff changeset	1575	}
90ce3da70b43 Initial load duke parents: diff changeset	1576	}
90ce3da70b43 Initial load duke parents: diff changeset	1577	}
90ce3da70b43 Initial load duke parents: diff changeset	1578	else if (((pass == 2) \|\| (pass == 3)) && width)
90ce3da70b43 Initial load duke parents: diff changeset	1579	{
90ce3da70b43 Initial load duke parents: diff changeset	1580	int width_mmx = ((width >> 1) << 1) ;
90ce3da70b43 Initial load duke parents: diff changeset	1581	width -= width_mmx;
90ce3da70b43 Initial load duke parents: diff changeset	1582	if (width_mmx)
90ce3da70b43 Initial load duke parents: diff changeset	1583	{
90ce3da70b43 Initial load duke parents: diff changeset	1584	_asm
90ce3da70b43 Initial load duke parents: diff changeset	1585	{
90ce3da70b43 Initial load duke parents: diff changeset	1586	mov esi, sptr
90ce3da70b43 Initial load duke parents: diff changeset	1587	mov edi, dp
90ce3da70b43 Initial load duke parents: diff changeset	1588	mov ecx, width_mmx
90ce3da70b43 Initial load duke parents: diff changeset	1589	sub esi, 2
90ce3da70b43 Initial load duke parents: diff changeset	1590	sub edi, 14
90ce3da70b43 Initial load duke parents: diff changeset	1591	loop2_pass2:
90ce3da70b43 Initial load duke parents: diff changeset	1592	movd mm0, [esi] ; X X X X v1 v0 v3 v2
90ce3da70b43 Initial load duke parents: diff changeset	1593	punpcklwd mm0, mm0 ; v1 v0 v1 v0 v3 v2 v3 v2
90ce3da70b43 Initial load duke parents: diff changeset	1594	movq mm1, mm0 ; v1 v0 v1 v0 v3 v2 v3 v2
90ce3da70b43 Initial load duke parents: diff changeset	1595	punpckldq mm0, mm0 ; v3 v2 v3 v2 v3 v2 v3 v2
90ce3da70b43 Initial load duke parents: diff changeset	1596	punpckhdq mm1, mm1 ; v1 v0 v1 v0 v1 v0 v1 v0
90ce3da70b43 Initial load duke parents: diff changeset	1597	movq [edi], mm0
90ce3da70b43 Initial load duke parents: diff changeset	1598	sub esi, 4
90ce3da70b43 Initial load duke parents: diff changeset	1599	movq [edi + 8], mm1
90ce3da70b43 Initial load duke parents: diff changeset	1600	//sub esi, 4
90ce3da70b43 Initial load duke parents: diff changeset	1601	sub edi, 16
90ce3da70b43 Initial load duke parents: diff changeset	1602	sub ecx, 2
90ce3da70b43 Initial load duke parents: diff changeset	1603	jnz loop2_pass2
90ce3da70b43 Initial load duke parents: diff changeset	1604	EMMS
90ce3da70b43 Initial load duke parents: diff changeset	1605	}
90ce3da70b43 Initial load duke parents: diff changeset	1606	}
90ce3da70b43 Initial load duke parents: diff changeset	1607
90ce3da70b43 Initial load duke parents: diff changeset	1608	sptr -= (width_mmx*2 - 2); // sign fixed
90ce3da70b43 Initial load duke parents: diff changeset	1609	dp -= (width_mmx*8 - 2); // sign fixed
90ce3da70b43 Initial load duke parents: diff changeset	1610	for (i = width; i; i--)
90ce3da70b43 Initial load duke parents: diff changeset	1611	{
90ce3da70b43 Initial load duke parents: diff changeset	1612	png_byte v[8];
90ce3da70b43 Initial load duke parents: diff changeset	1613	int j;
90ce3da70b43 Initial load duke parents: diff changeset	1614	sptr -= 2;
90ce3da70b43 Initial load duke parents: diff changeset	1615	png_memcpy(v, sptr, 2);
90ce3da70b43 Initial load duke parents: diff changeset	1616	for (j = 0; j < png_pass_inc[pass]; j++)
90ce3da70b43 Initial load duke parents: diff changeset	1617	{
90ce3da70b43 Initial load duke parents: diff changeset	1618	dp -= 2;
90ce3da70b43 Initial load duke parents: diff changeset	1619	png_memcpy(dp, v, 2);
90ce3da70b43 Initial load duke parents: diff changeset	1620	}
90ce3da70b43 Initial load duke parents: diff changeset	1621	}
90ce3da70b43 Initial load duke parents: diff changeset	1622	}
90ce3da70b43 Initial load duke parents: diff changeset	1623	else if (width) // pass == 4 or 5
90ce3da70b43 Initial load duke parents: diff changeset	1624	{
90ce3da70b43 Initial load duke parents: diff changeset	1625	int width_mmx = ((width >> 1) << 1) ;
90ce3da70b43 Initial load duke parents: diff changeset	1626	width -= width_mmx;
90ce3da70b43 Initial load duke parents: diff changeset	1627	if (width_mmx)
90ce3da70b43 Initial load duke parents: diff changeset	1628	{
90ce3da70b43 Initial load duke parents: diff changeset	1629	_asm
90ce3da70b43 Initial load duke parents: diff changeset	1630	{
90ce3da70b43 Initial load duke parents: diff changeset	1631	mov esi, sptr
90ce3da70b43 Initial load duke parents: diff changeset	1632	mov edi, dp
90ce3da70b43 Initial load duke parents: diff changeset	1633	mov ecx, width_mmx
90ce3da70b43 Initial load duke parents: diff changeset	1634	sub esi, 2
90ce3da70b43 Initial load duke parents: diff changeset	1635	sub edi, 6
90ce3da70b43 Initial load duke parents: diff changeset	1636	loop2_pass4:
90ce3da70b43 Initial load duke parents: diff changeset	1637	movd mm0, [esi] ; X X X X v1 v0 v3 v2
90ce3da70b43 Initial load duke parents: diff changeset	1638	punpcklwd mm0, mm0 ; v1 v0 v1 v0 v3 v2 v3 v2
90ce3da70b43 Initial load duke parents: diff changeset	1639	sub esi, 4
90ce3da70b43 Initial load duke parents: diff changeset	1640	movq [edi], mm0
90ce3da70b43 Initial load duke parents: diff changeset	1641	sub edi, 8
90ce3da70b43 Initial load duke parents: diff changeset	1642	sub ecx, 2
90ce3da70b43 Initial load duke parents: diff changeset	1643	jnz loop2_pass4
90ce3da70b43 Initial load duke parents: diff changeset	1644	EMMS
90ce3da70b43 Initial load duke parents: diff changeset	1645	}
90ce3da70b43 Initial load duke parents: diff changeset	1646	}
90ce3da70b43 Initial load duke parents: diff changeset	1647
90ce3da70b43 Initial load duke parents: diff changeset	1648	sptr -= (width_mmx*2 - 2); // sign fixed
90ce3da70b43 Initial load duke parents: diff changeset	1649	dp -= (width_mmx*4 - 2); // sign fixed
90ce3da70b43 Initial load duke parents: diff changeset	1650	for (i = width; i; i--)
90ce3da70b43 Initial load duke parents: diff changeset	1651	{
90ce3da70b43 Initial load duke parents: diff changeset	1652	png_byte v[8];
90ce3da70b43 Initial load duke parents: diff changeset	1653	int j;
90ce3da70b43 Initial load duke parents: diff changeset	1654	sptr -= 2;
90ce3da70b43 Initial load duke parents: diff changeset	1655	png_memcpy(v, sptr, 2);
90ce3da70b43 Initial load duke parents: diff changeset	1656	for (j = 0; j < png_pass_inc[pass]; j++)
90ce3da70b43 Initial load duke parents: diff changeset	1657	{
90ce3da70b43 Initial load duke parents: diff changeset	1658	dp -= 2;
90ce3da70b43 Initial load duke parents: diff changeset	1659	png_memcpy(dp, v, 2);
90ce3da70b43 Initial load duke parents: diff changeset	1660	}
90ce3da70b43 Initial load duke parents: diff changeset	1661	}
90ce3da70b43 Initial load duke parents: diff changeset	1662	}
90ce3da70b43 Initial load duke parents: diff changeset	1663	} /* end of pixel_bytes == 2 */
90ce3da70b43 Initial load duke parents: diff changeset	1664
90ce3da70b43 Initial load duke parents: diff changeset	1665	else if (pixel_bytes == 4)
90ce3da70b43 Initial load duke parents: diff changeset	1666	{
90ce3da70b43 Initial load duke parents: diff changeset	1667	if (((pass == 0) \|\| (pass == 1)) && width)
90ce3da70b43 Initial load duke parents: diff changeset	1668	{
90ce3da70b43 Initial load duke parents: diff changeset	1669	int width_mmx = ((width >> 1) << 1) ;
90ce3da70b43 Initial load duke parents: diff changeset	1670	width -= width_mmx;
90ce3da70b43 Initial load duke parents: diff changeset	1671	if (width_mmx)
90ce3da70b43 Initial load duke parents: diff changeset	1672	{
90ce3da70b43 Initial load duke parents: diff changeset	1673	_asm
90ce3da70b43 Initial load duke parents: diff changeset	1674	{
90ce3da70b43 Initial load duke parents: diff changeset	1675	mov esi, sptr
90ce3da70b43 Initial load duke parents: diff changeset	1676	mov edi, dp
90ce3da70b43 Initial load duke parents: diff changeset	1677	mov ecx, width_mmx
90ce3da70b43 Initial load duke parents: diff changeset	1678	sub esi, 4
90ce3da70b43 Initial load duke parents: diff changeset	1679	sub edi, 60
90ce3da70b43 Initial load duke parents: diff changeset	1680	loop4_pass0:
90ce3da70b43 Initial load duke parents: diff changeset	1681	movq mm0, [esi] ; v3 v2 v1 v0 v7 v6 v5 v4
90ce3da70b43 Initial load duke parents: diff changeset	1682	movq mm1, mm0 ; v3 v2 v1 v0 v7 v6 v5 v4
90ce3da70b43 Initial load duke parents: diff changeset	1683	punpckldq mm0, mm0 ; v7 v6 v5 v4 v7 v6 v5 v4
90ce3da70b43 Initial load duke parents: diff changeset	1684	punpckhdq mm1, mm1 ; v3 v2 v1 v0 v3 v2 v1 v0
90ce3da70b43 Initial load duke parents: diff changeset	1685	movq [edi], mm0
90ce3da70b43 Initial load duke parents: diff changeset	1686	movq [edi + 8], mm0
90ce3da70b43 Initial load duke parents: diff changeset	1687	movq [edi + 16], mm0
90ce3da70b43 Initial load duke parents: diff changeset	1688	movq [edi + 24], mm0
90ce3da70b43 Initial load duke parents: diff changeset	1689	movq [edi+32], mm1
90ce3da70b43 Initial load duke parents: diff changeset	1690	movq [edi + 40], mm1
90ce3da70b43 Initial load duke parents: diff changeset	1691	movq [edi+ 48], mm1
90ce3da70b43 Initial load duke parents: diff changeset	1692	sub esi, 8
90ce3da70b43 Initial load duke parents: diff changeset	1693	movq [edi + 56], mm1
90ce3da70b43 Initial load duke parents: diff changeset	1694	sub edi, 64
90ce3da70b43 Initial load duke parents: diff changeset	1695	sub ecx, 2
90ce3da70b43 Initial load duke parents: diff changeset	1696	jnz loop4_pass0
90ce3da70b43 Initial load duke parents: diff changeset	1697	EMMS
90ce3da70b43 Initial load duke parents: diff changeset	1698	}
90ce3da70b43 Initial load duke parents: diff changeset	1699	}
90ce3da70b43 Initial load duke parents: diff changeset	1700
90ce3da70b43 Initial load duke parents: diff changeset	1701	sptr -= (width_mmx*4 - 4); // sign fixed
90ce3da70b43 Initial load duke parents: diff changeset	1702	dp -= (width_mmx*32 - 4); // sign fixed
90ce3da70b43 Initial load duke parents: diff changeset	1703	for (i = width; i; i--)
90ce3da70b43 Initial load duke parents: diff changeset	1704	{
90ce3da70b43 Initial load duke parents: diff changeset	1705	png_byte v[8];
90ce3da70b43 Initial load duke parents: diff changeset	1706	int j;
90ce3da70b43 Initial load duke parents: diff changeset	1707	sptr -= 4;
90ce3da70b43 Initial load duke parents: diff changeset	1708	png_memcpy(v, sptr, 4);
90ce3da70b43 Initial load duke parents: diff changeset	1709	for (j = 0; j < png_pass_inc[pass]; j++)
90ce3da70b43 Initial load duke parents: diff changeset	1710	{
90ce3da70b43 Initial load duke parents: diff changeset	1711	dp -= 4;
90ce3da70b43 Initial load duke parents: diff changeset	1712	png_memcpy(dp, v, 4);
90ce3da70b43 Initial load duke parents: diff changeset	1713	}
90ce3da70b43 Initial load duke parents: diff changeset	1714	}
90ce3da70b43 Initial load duke parents: diff changeset	1715	}
90ce3da70b43 Initial load duke parents: diff changeset	1716	else if (((pass == 2) \|\| (pass == 3)) && width)
90ce3da70b43 Initial load duke parents: diff changeset	1717	{
90ce3da70b43 Initial load duke parents: diff changeset	1718	int width_mmx = ((width >> 1) << 1) ;
90ce3da70b43 Initial load duke parents: diff changeset	1719	width -= width_mmx;
90ce3da70b43 Initial load duke parents: diff changeset	1720	if (width_mmx)
90ce3da70b43 Initial load duke parents: diff changeset	1721	{
90ce3da70b43 Initial load duke parents: diff changeset	1722	_asm
90ce3da70b43 Initial load duke parents: diff changeset	1723	{
90ce3da70b43 Initial load duke parents: diff changeset	1724	mov esi, sptr
90ce3da70b43 Initial load duke parents: diff changeset	1725	mov edi, dp
90ce3da70b43 Initial load duke parents: diff changeset	1726	mov ecx, width_mmx
90ce3da70b43 Initial load duke parents: diff changeset	1727	sub esi, 4
90ce3da70b43 Initial load duke parents: diff changeset	1728	sub edi, 28
90ce3da70b43 Initial load duke parents: diff changeset	1729	loop4_pass2:
90ce3da70b43 Initial load duke parents: diff changeset	1730	movq mm0, [esi] ; v3 v2 v1 v0 v7 v6 v5 v4
90ce3da70b43 Initial load duke parents: diff changeset	1731	movq mm1, mm0 ; v3 v2 v1 v0 v7 v6 v5 v4
90ce3da70b43 Initial load duke parents: diff changeset	1732	punpckldq mm0, mm0 ; v7 v6 v5 v4 v7 v6 v5 v4
90ce3da70b43 Initial load duke parents: diff changeset	1733	punpckhdq mm1, mm1 ; v3 v2 v1 v0 v3 v2 v1 v0
90ce3da70b43 Initial load duke parents: diff changeset	1734	movq [edi], mm0
90ce3da70b43 Initial load duke parents: diff changeset	1735	movq [edi + 8], mm0
90ce3da70b43 Initial load duke parents: diff changeset	1736	movq [edi+16], mm1
90ce3da70b43 Initial load duke parents: diff changeset	1737	movq [edi + 24], mm1
90ce3da70b43 Initial load duke parents: diff changeset	1738	sub esi, 8
90ce3da70b43 Initial load duke parents: diff changeset	1739	sub edi, 32
90ce3da70b43 Initial load duke parents: diff changeset	1740	sub ecx, 2
90ce3da70b43 Initial load duke parents: diff changeset	1741	jnz loop4_pass2
90ce3da70b43 Initial load duke parents: diff changeset	1742	EMMS
90ce3da70b43 Initial load duke parents: diff changeset	1743	}
90ce3da70b43 Initial load duke parents: diff changeset	1744	}
90ce3da70b43 Initial load duke parents: diff changeset	1745
90ce3da70b43 Initial load duke parents: diff changeset	1746	sptr -= (width_mmx*4 - 4); // sign fixed
90ce3da70b43 Initial load duke parents: diff changeset	1747	dp -= (width_mmx*16 - 4); // sign fixed
90ce3da70b43 Initial load duke parents: diff changeset	1748	for (i = width; i; i--)
90ce3da70b43 Initial load duke parents: diff changeset	1749	{
90ce3da70b43 Initial load duke parents: diff changeset	1750	png_byte v[8];
90ce3da70b43 Initial load duke parents: diff changeset	1751	int j;
90ce3da70b43 Initial load duke parents: diff changeset	1752	sptr -= 4;
90ce3da70b43 Initial load duke parents: diff changeset	1753	png_memcpy(v, sptr, 4);
90ce3da70b43 Initial load duke parents: diff changeset	1754	for (j = 0; j < png_pass_inc[pass]; j++)
90ce3da70b43 Initial load duke parents: diff changeset	1755	{
90ce3da70b43 Initial load duke parents: diff changeset	1756	dp -= 4;
90ce3da70b43 Initial load duke parents: diff changeset	1757	png_memcpy(dp, v, 4);
90ce3da70b43 Initial load duke parents: diff changeset	1758	}
90ce3da70b43 Initial load duke parents: diff changeset	1759	}
90ce3da70b43 Initial load duke parents: diff changeset	1760	}
90ce3da70b43 Initial load duke parents: diff changeset	1761	else if (width) // pass == 4 or 5
90ce3da70b43 Initial load duke parents: diff changeset	1762	{
90ce3da70b43 Initial load duke parents: diff changeset	1763	int width_mmx = ((width >> 1) << 1) ;
90ce3da70b43 Initial load duke parents: diff changeset	1764	width -= width_mmx;
90ce3da70b43 Initial load duke parents: diff changeset	1765	if (width_mmx)
90ce3da70b43 Initial load duke parents: diff changeset	1766	{
90ce3da70b43 Initial load duke parents: diff changeset	1767	_asm
90ce3da70b43 Initial load duke parents: diff changeset	1768	{
90ce3da70b43 Initial load duke parents: diff changeset	1769	mov esi, sptr
90ce3da70b43 Initial load duke parents: diff changeset	1770	mov edi, dp
90ce3da70b43 Initial load duke parents: diff changeset	1771	mov ecx, width_mmx
90ce3da70b43 Initial load duke parents: diff changeset	1772	sub esi, 4
90ce3da70b43 Initial load duke parents: diff changeset	1773	sub edi, 12
90ce3da70b43 Initial load duke parents: diff changeset	1774	loop4_pass4:
90ce3da70b43 Initial load duke parents: diff changeset	1775	movq mm0, [esi] ; v3 v2 v1 v0 v7 v6 v5 v4
90ce3da70b43 Initial load duke parents: diff changeset	1776	movq mm1, mm0 ; v3 v2 v1 v0 v7 v6 v5 v4
90ce3da70b43 Initial load duke parents: diff changeset	1777	punpckldq mm0, mm0 ; v7 v6 v5 v4 v7 v6 v5 v4
90ce3da70b43 Initial load duke parents: diff changeset	1778	punpckhdq mm1, mm1 ; v3 v2 v1 v0 v3 v2 v1 v0
90ce3da70b43 Initial load duke parents: diff changeset	1779	movq [edi], mm0
90ce3da70b43 Initial load duke parents: diff changeset	1780	sub esi, 8
90ce3da70b43 Initial load duke parents: diff changeset	1781	movq [edi + 8], mm1
90ce3da70b43 Initial load duke parents: diff changeset	1782	sub edi, 16
90ce3da70b43 Initial load duke parents: diff changeset	1783	sub ecx, 2
90ce3da70b43 Initial load duke parents: diff changeset	1784	jnz loop4_pass4
90ce3da70b43 Initial load duke parents: diff changeset	1785	EMMS
90ce3da70b43 Initial load duke parents: diff changeset	1786	}
90ce3da70b43 Initial load duke parents: diff changeset	1787	}
90ce3da70b43 Initial load duke parents: diff changeset	1788
90ce3da70b43 Initial load duke parents: diff changeset	1789	sptr -= (width_mmx*4 - 4); // sign fixed
90ce3da70b43 Initial load duke parents: diff changeset	1790	dp -= (width_mmx*8 - 4); // sign fixed
90ce3da70b43 Initial load duke parents: diff changeset	1791	for (i = width; i; i--)
90ce3da70b43 Initial load duke parents: diff changeset	1792	{
90ce3da70b43 Initial load duke parents: diff changeset	1793	png_byte v[8];
90ce3da70b43 Initial load duke parents: diff changeset	1794	int j;
90ce3da70b43 Initial load duke parents: diff changeset	1795	sptr -= 4;
90ce3da70b43 Initial load duke parents: diff changeset	1796	png_memcpy(v, sptr, 4);
90ce3da70b43 Initial load duke parents: diff changeset	1797	for (j = 0; j < png_pass_inc[pass]; j++)
90ce3da70b43 Initial load duke parents: diff changeset	1798	{
90ce3da70b43 Initial load duke parents: diff changeset	1799	dp -= 4;
90ce3da70b43 Initial load duke parents: diff changeset	1800	png_memcpy(dp, v, 4);
90ce3da70b43 Initial load duke parents: diff changeset	1801	}
90ce3da70b43 Initial load duke parents: diff changeset	1802	}
90ce3da70b43 Initial load duke parents: diff changeset	1803	}
90ce3da70b43 Initial load duke parents: diff changeset	1804
90ce3da70b43 Initial load duke parents: diff changeset	1805	} /* end of pixel_bytes == 4 */
90ce3da70b43 Initial load duke parents: diff changeset	1806
90ce3da70b43 Initial load duke parents: diff changeset	1807	else if (pixel_bytes == 6)
90ce3da70b43 Initial load duke parents: diff changeset	1808	{
90ce3da70b43 Initial load duke parents: diff changeset	1809	for (i = width; i; i--)
90ce3da70b43 Initial load duke parents: diff changeset	1810	{
90ce3da70b43 Initial load duke parents: diff changeset	1811	png_byte v[8];
90ce3da70b43 Initial load duke parents: diff changeset	1812	int j;
90ce3da70b43 Initial load duke parents: diff changeset	1813	png_memcpy(v, sptr, 6);
90ce3da70b43 Initial load duke parents: diff changeset	1814	for (j = 0; j < png_pass_inc[pass]; j++)
90ce3da70b43 Initial load duke parents: diff changeset	1815	{
90ce3da70b43 Initial load duke parents: diff changeset	1816	png_memcpy(dp, v, 6);
90ce3da70b43 Initial load duke parents: diff changeset	1817	dp -= 6;
90ce3da70b43 Initial load duke parents: diff changeset	1818	}
90ce3da70b43 Initial load duke parents: diff changeset	1819	sptr -= 6;
90ce3da70b43 Initial load duke parents: diff changeset	1820	}
90ce3da70b43 Initial load duke parents: diff changeset	1821	} /* end of pixel_bytes == 6 */
90ce3da70b43 Initial load duke parents: diff changeset	1822
90ce3da70b43 Initial load duke parents: diff changeset	1823	else
90ce3da70b43 Initial load duke parents: diff changeset	1824	{
90ce3da70b43 Initial load duke parents: diff changeset	1825	for (i = width; i; i--)
90ce3da70b43 Initial load duke parents: diff changeset	1826	{
90ce3da70b43 Initial load duke parents: diff changeset	1827	png_byte v[8];
90ce3da70b43 Initial load duke parents: diff changeset	1828	int j;
90ce3da70b43 Initial load duke parents: diff changeset	1829	png_memcpy(v, sptr, pixel_bytes);
90ce3da70b43 Initial load duke parents: diff changeset	1830	for (j = 0; j < png_pass_inc[pass]; j++)
90ce3da70b43 Initial load duke parents: diff changeset	1831	{
90ce3da70b43 Initial load duke parents: diff changeset	1832	png_memcpy(dp, v, pixel_bytes);
90ce3da70b43 Initial load duke parents: diff changeset	1833	dp -= pixel_bytes;
90ce3da70b43 Initial load duke parents: diff changeset	1834	}
90ce3da70b43 Initial load duke parents: diff changeset	1835	sptr-= pixel_bytes;
90ce3da70b43 Initial load duke parents: diff changeset	1836	}
90ce3da70b43 Initial load duke parents: diff changeset	1837	}
90ce3da70b43 Initial load duke parents: diff changeset	1838	} /* end of mmx_supported */
90ce3da70b43 Initial load duke parents: diff changeset	1839
90ce3da70b43 Initial load duke parents: diff changeset	1840	else /* MMX not supported: use modified C code - takes advantage
90ce3da70b43 Initial load duke parents: diff changeset	1841	* of inlining of memcpy for a constant */
90ce3da70b43 Initial load duke parents: diff changeset	1842	{
90ce3da70b43 Initial load duke parents: diff changeset	1843	if (pixel_bytes == 1)
90ce3da70b43 Initial load duke parents: diff changeset	1844	{
90ce3da70b43 Initial load duke parents: diff changeset	1845	for (i = width; i; i--)
90ce3da70b43 Initial load duke parents: diff changeset	1846	{
90ce3da70b43 Initial load duke parents: diff changeset	1847	int j;
90ce3da70b43 Initial load duke parents: diff changeset	1848	for (j = 0; j < png_pass_inc[pass]; j++)
90ce3da70b43 Initial load duke parents: diff changeset	1849	dp-- = sptr;
90ce3da70b43 Initial load duke parents: diff changeset	1850	sptr--;
90ce3da70b43 Initial load duke parents: diff changeset	1851	}
90ce3da70b43 Initial load duke parents: diff changeset	1852	}
90ce3da70b43 Initial load duke parents: diff changeset	1853	else if (pixel_bytes == 3)
90ce3da70b43 Initial load duke parents: diff changeset	1854	{
90ce3da70b43 Initial load duke parents: diff changeset	1855	for (i = width; i; i--)
90ce3da70b43 Initial load duke parents: diff changeset	1856	{
90ce3da70b43 Initial load duke parents: diff changeset	1857	png_byte v[8];
90ce3da70b43 Initial load duke parents: diff changeset	1858	int j;
90ce3da70b43 Initial load duke parents: diff changeset	1859	png_memcpy(v, sptr, pixel_bytes);
90ce3da70b43 Initial load duke parents: diff changeset	1860	for (j = 0; j < png_pass_inc[pass]; j++)
90ce3da70b43 Initial load duke parents: diff changeset	1861	{
90ce3da70b43 Initial load duke parents: diff changeset	1862	png_memcpy(dp, v, pixel_bytes);
90ce3da70b43 Initial load duke parents: diff changeset	1863	dp -= pixel_bytes;
90ce3da70b43 Initial load duke parents: diff changeset	1864	}
90ce3da70b43 Initial load duke parents: diff changeset	1865	sptr -= pixel_bytes;
90ce3da70b43 Initial load duke parents: diff changeset	1866	}
90ce3da70b43 Initial load duke parents: diff changeset	1867	}
90ce3da70b43 Initial load duke parents: diff changeset	1868	else if (pixel_bytes == 2)
90ce3da70b43 Initial load duke parents: diff changeset	1869	{
90ce3da70b43 Initial load duke parents: diff changeset	1870	for (i = width; i; i--)
90ce3da70b43 Initial load duke parents: diff changeset	1871	{
90ce3da70b43 Initial load duke parents: diff changeset	1872	png_byte v[8];
90ce3da70b43 Initial load duke parents: diff changeset	1873	int j;
90ce3da70b43 Initial load duke parents: diff changeset	1874	png_memcpy(v, sptr, pixel_bytes);
90ce3da70b43 Initial load duke parents: diff changeset	1875	for (j = 0; j < png_pass_inc[pass]; j++)
90ce3da70b43 Initial load duke parents: diff changeset	1876	{
90ce3da70b43 Initial load duke parents: diff changeset	1877	png_memcpy(dp, v, pixel_bytes);
90ce3da70b43 Initial load duke parents: diff changeset	1878	dp -= pixel_bytes;
90ce3da70b43 Initial load duke parents: diff changeset	1879	}
90ce3da70b43 Initial load duke parents: diff changeset	1880	sptr -= pixel_bytes;
90ce3da70b43 Initial load duke parents: diff changeset	1881	}
90ce3da70b43 Initial load duke parents: diff changeset	1882	}
90ce3da70b43 Initial load duke parents: diff changeset	1883	else if (pixel_bytes == 4)
90ce3da70b43 Initial load duke parents: diff changeset	1884	{
90ce3da70b43 Initial load duke parents: diff changeset	1885	for (i = width; i; i--)
90ce3da70b43 Initial load duke parents: diff changeset	1886	{
90ce3da70b43 Initial load duke parents: diff changeset	1887	png_byte v[8];
90ce3da70b43 Initial load duke parents: diff changeset	1888	int j;
90ce3da70b43 Initial load duke parents: diff changeset	1889	png_memcpy(v, sptr, pixel_bytes);
90ce3da70b43 Initial load duke parents: diff changeset	1890	for (j = 0; j < png_pass_inc[pass]; j++)
90ce3da70b43 Initial load duke parents: diff changeset	1891	{
90ce3da70b43 Initial load duke parents: diff changeset	1892	png_memcpy(dp, v, pixel_bytes);
90ce3da70b43 Initial load duke parents: diff changeset	1893	dp -= pixel_bytes;
90ce3da70b43 Initial load duke parents: diff changeset	1894	}
90ce3da70b43 Initial load duke parents: diff changeset	1895	sptr -= pixel_bytes;
90ce3da70b43 Initial load duke parents: diff changeset	1896	}
90ce3da70b43 Initial load duke parents: diff changeset	1897	}
90ce3da70b43 Initial load duke parents: diff changeset	1898	else if (pixel_bytes == 6)
90ce3da70b43 Initial load duke parents: diff changeset	1899	{
90ce3da70b43 Initial load duke parents: diff changeset	1900	for (i = width; i; i--)
90ce3da70b43 Initial load duke parents: diff changeset	1901	{
90ce3da70b43 Initial load duke parents: diff changeset	1902	png_byte v[8];
90ce3da70b43 Initial load duke parents: diff changeset	1903	int j;
90ce3da70b43 Initial load duke parents: diff changeset	1904	png_memcpy(v, sptr, pixel_bytes);
90ce3da70b43 Initial load duke parents: diff changeset	1905	for (j = 0; j < png_pass_inc[pass]; j++)
90ce3da70b43 Initial load duke parents: diff changeset	1906	{
90ce3da70b43 Initial load duke parents: diff changeset	1907	png_memcpy(dp, v, pixel_bytes);
90ce3da70b43 Initial load duke parents: diff changeset	1908	dp -= pixel_bytes;
90ce3da70b43 Initial load duke parents: diff changeset	1909	}
90ce3da70b43 Initial load duke parents: diff changeset	1910	sptr -= pixel_bytes;
90ce3da70b43 Initial load duke parents: diff changeset	1911	}
90ce3da70b43 Initial load duke parents: diff changeset	1912	}
90ce3da70b43 Initial load duke parents: diff changeset	1913	else
90ce3da70b43 Initial load duke parents: diff changeset	1914	{
90ce3da70b43 Initial load duke parents: diff changeset	1915	for (i = width; i; i--)
90ce3da70b43 Initial load duke parents: diff changeset	1916	{
90ce3da70b43 Initial load duke parents: diff changeset	1917	png_byte v[8];
90ce3da70b43 Initial load duke parents: diff changeset	1918	int j;
90ce3da70b43 Initial load duke parents: diff changeset	1919	png_memcpy(v, sptr, pixel_bytes);
90ce3da70b43 Initial load duke parents: diff changeset	1920	for (j = 0; j < png_pass_inc[pass]; j++)
90ce3da70b43 Initial load duke parents: diff changeset	1921	{
90ce3da70b43 Initial load duke parents: diff changeset	1922	png_memcpy(dp, v, pixel_bytes);
90ce3da70b43 Initial load duke parents: diff changeset	1923	dp -= pixel_bytes;
90ce3da70b43 Initial load duke parents: diff changeset	1924	}
90ce3da70b43 Initial load duke parents: diff changeset	1925	sptr -= pixel_bytes;
90ce3da70b43 Initial load duke parents: diff changeset	1926	}
90ce3da70b43 Initial load duke parents: diff changeset	1927	}
90ce3da70b43 Initial load duke parents: diff changeset	1928
90ce3da70b43 Initial load duke parents: diff changeset	1929	} /* end of MMX not supported */
90ce3da70b43 Initial load duke parents: diff changeset	1930	break;
90ce3da70b43 Initial load duke parents: diff changeset	1931	}
90ce3da70b43 Initial load duke parents: diff changeset	1932	} /* end switch (row_info->pixel_depth) */
90ce3da70b43 Initial load duke parents: diff changeset	1933
90ce3da70b43 Initial load duke parents: diff changeset	1934	row_info->width = final_width;
90ce3da70b43 Initial load duke parents: diff changeset	1935
90ce3da70b43 Initial load duke parents: diff changeset	1936	row_info->rowbytes = PNG_ROWBYTES(row_info->pixel_depth,final_width);
90ce3da70b43 Initial load duke parents: diff changeset	1937	}
90ce3da70b43 Initial load duke parents: diff changeset	1938
90ce3da70b43 Initial load duke parents: diff changeset	1939	}
90ce3da70b43 Initial load duke parents: diff changeset	1940
90ce3da70b43 Initial load duke parents: diff changeset	1941	#endif /* PNG_READ_INTERLACING_SUPPORTED */
90ce3da70b43 Initial load duke parents: diff changeset	1942
90ce3da70b43 Initial load duke parents: diff changeset	1943
90ce3da70b43 Initial load duke parents: diff changeset	1944	// These variables are utilized in the functions below. They are declared
90ce3da70b43 Initial load duke parents: diff changeset	1945	// globally here to ensure alignment on 8-byte boundaries.
90ce3da70b43 Initial load duke parents: diff changeset	1946
90ce3da70b43 Initial load duke parents: diff changeset	1947	union uAll {
90ce3da70b43 Initial load duke parents: diff changeset	1948	__int64 use;
90ce3da70b43 Initial load duke parents: diff changeset	1949	double align;
90ce3da70b43 Initial load duke parents: diff changeset	1950	} LBCarryMask = {0x0101010101010101},
90ce3da70b43 Initial load duke parents: diff changeset	1951	HBClearMask = {0x7f7f7f7f7f7f7f7f},
90ce3da70b43 Initial load duke parents: diff changeset	1952	ActiveMask, ActiveMask2, ActiveMaskEnd, ShiftBpp, ShiftRem;
90ce3da70b43 Initial load duke parents: diff changeset	1953
90ce3da70b43 Initial load duke parents: diff changeset	1954
90ce3da70b43 Initial load duke parents: diff changeset	1955	// Optimized code for PNG Average filter decoder
90ce3da70b43 Initial load duke parents: diff changeset	1956	void /* PRIVATE */
90ce3da70b43 Initial load duke parents: diff changeset	1957	png_read_filter_row_mmx_avg(png_row_infop row_info, png_bytep row
90ce3da70b43 Initial load duke parents: diff changeset	1958	, png_bytep prev_row)
90ce3da70b43 Initial load duke parents: diff changeset	1959	{
90ce3da70b43 Initial load duke parents: diff changeset	1960	int bpp;
90ce3da70b43 Initial load duke parents: diff changeset	1961	png_uint_32 FullLength;
90ce3da70b43 Initial load duke parents: diff changeset	1962	png_uint_32 MMXLength;
90ce3da70b43 Initial load duke parents: diff changeset	1963	//png_uint_32 len;
90ce3da70b43 Initial load duke parents: diff changeset	1964	int diff;
90ce3da70b43 Initial load duke parents: diff changeset	1965
90ce3da70b43 Initial load duke parents: diff changeset	1966	bpp = (row_info->pixel_depth + 7) >> 3; // Get # bytes per pixel
90ce3da70b43 Initial load duke parents: diff changeset	1967	FullLength = row_info->rowbytes; // # of bytes to filter
90ce3da70b43 Initial load duke parents: diff changeset	1968	_asm {
90ce3da70b43 Initial load duke parents: diff changeset	1969	// Init address pointers and offset
90ce3da70b43 Initial load duke parents: diff changeset	1970	mov edi, row // edi ==> Avg(x)
90ce3da70b43 Initial load duke parents: diff changeset	1971	xor ebx, ebx // ebx ==> x
90ce3da70b43 Initial load duke parents: diff changeset	1972	mov edx, edi
90ce3da70b43 Initial load duke parents: diff changeset	1973	mov esi, prev_row // esi ==> Prior(x)
90ce3da70b43 Initial load duke parents: diff changeset	1974	sub edx, bpp // edx ==> Raw(x-bpp)
90ce3da70b43 Initial load duke parents: diff changeset	1975
90ce3da70b43 Initial load duke parents: diff changeset	1976	xor eax, eax
90ce3da70b43 Initial load duke parents: diff changeset	1977	// Compute the Raw value for the first bpp bytes
90ce3da70b43 Initial load duke parents: diff changeset	1978	// Raw(x) = Avg(x) + (Prior(x)/2)
90ce3da70b43 Initial load duke parents: diff changeset	1979	davgrlp:
90ce3da70b43 Initial load duke parents: diff changeset	1980	mov al, [esi + ebx] // Load al with Prior(x)
90ce3da70b43 Initial load duke parents: diff changeset	1981	inc ebx
90ce3da70b43 Initial load duke parents: diff changeset	1982	shr al, 1 // divide by 2
90ce3da70b43 Initial load duke parents: diff changeset	1983	add al, [edi+ebx-1] // Add Avg(x); -1 to offset inc ebx
90ce3da70b43 Initial load duke parents: diff changeset	1984	cmp ebx, bpp
90ce3da70b43 Initial load duke parents: diff changeset	1985	mov [edi+ebx-1], al // Write back Raw(x);
90ce3da70b43 Initial load duke parents: diff changeset	1986	// mov does not affect flags; -1 to offset inc ebx
90ce3da70b43 Initial load duke parents: diff changeset	1987	jb davgrlp
90ce3da70b43 Initial load duke parents: diff changeset	1988	// get # of bytes to alignment
90ce3da70b43 Initial load duke parents: diff changeset	1989	mov diff, edi // take start of row
90ce3da70b43 Initial load duke parents: diff changeset	1990	add diff, ebx // add bpp
90ce3da70b43 Initial load duke parents: diff changeset	1991	add diff, 0xf // add 7 + 8 to incr past alignment boundary
90ce3da70b43 Initial load duke parents: diff changeset	1992	and diff, 0xfffffff8 // mask to alignment boundary
90ce3da70b43 Initial load duke parents: diff changeset	1993	sub diff, edi // subtract from start ==> value ebx at alignment
90ce3da70b43 Initial load duke parents: diff changeset	1994	jz davggo
90ce3da70b43 Initial load duke parents: diff changeset	1995	// fix alignment
90ce3da70b43 Initial load duke parents: diff changeset	1996	// Compute the Raw value for the bytes upto the alignment boundary
90ce3da70b43 Initial load duke parents: diff changeset	1997	// Raw(x) = Avg(x) + ((Raw(x-bpp) + Prior(x))/2)
90ce3da70b43 Initial load duke parents: diff changeset	1998	xor ecx, ecx
90ce3da70b43 Initial load duke parents: diff changeset	1999	davglp1:
90ce3da70b43 Initial load duke parents: diff changeset	2000	xor eax, eax
90ce3da70b43 Initial load duke parents: diff changeset	2001	mov cl, [esi + ebx] // load cl with Prior(x)
90ce3da70b43 Initial load duke parents: diff changeset	2002	mov al, [edx + ebx] // load al with Raw(x-bpp)
90ce3da70b43 Initial load duke parents: diff changeset	2003	add ax, cx
90ce3da70b43 Initial load duke parents: diff changeset	2004	inc ebx
90ce3da70b43 Initial load duke parents: diff changeset	2005	shr ax, 1 // divide by 2
90ce3da70b43 Initial load duke parents: diff changeset	2006	add al, [edi+ebx-1] // Add Avg(x); -1 to offset inc ebx
90ce3da70b43 Initial load duke parents: diff changeset	2007	cmp ebx, diff // Check if at alignment boundary
90ce3da70b43 Initial load duke parents: diff changeset	2008	mov [edi+ebx-1], al // Write back Raw(x);
90ce3da70b43 Initial load duke parents: diff changeset	2009	// mov does not affect flags; -1 to offset inc ebx
90ce3da70b43 Initial load duke parents: diff changeset	2010	jb davglp1 // Repeat until at alignment boundary
90ce3da70b43 Initial load duke parents: diff changeset	2011	davggo:
90ce3da70b43 Initial load duke parents: diff changeset	2012	mov eax, FullLength
90ce3da70b43 Initial load duke parents: diff changeset	2013	mov ecx, eax
90ce3da70b43 Initial load duke parents: diff changeset	2014	sub eax, ebx // subtract alignment fix
90ce3da70b43 Initial load duke parents: diff changeset	2015	and eax, 0x00000007 // calc bytes over mult of 8
90ce3da70b43 Initial load duke parents: diff changeset	2016	sub ecx, eax // drop over bytes from original length
90ce3da70b43 Initial load duke parents: diff changeset	2017	mov MMXLength, ecx
90ce3da70b43 Initial load duke parents: diff changeset	2018	} // end _asm block
90ce3da70b43 Initial load duke parents: diff changeset	2019	// Now do the math for the rest of the row
90ce3da70b43 Initial load duke parents: diff changeset	2020	switch ( bpp )
90ce3da70b43 Initial load duke parents: diff changeset	2021	{
90ce3da70b43 Initial load duke parents: diff changeset	2022	case 3:
90ce3da70b43 Initial load duke parents: diff changeset	2023	{
90ce3da70b43 Initial load duke parents: diff changeset	2024	ActiveMask.use = 0x0000000000ffffff;
90ce3da70b43 Initial load duke parents: diff changeset	2025	ShiftBpp.use = 24; // == 3 * 8
90ce3da70b43 Initial load duke parents: diff changeset	2026	ShiftRem.use = 40; // == 64 - 24
90ce3da70b43 Initial load duke parents: diff changeset	2027	_asm {
90ce3da70b43 Initial load duke parents: diff changeset	2028	// Re-init address pointers and offset
90ce3da70b43 Initial load duke parents: diff changeset	2029	movq mm7, ActiveMask
90ce3da70b43 Initial load duke parents: diff changeset	2030	mov ebx, diff // ebx ==> x = offset to alignment boundary
90ce3da70b43 Initial load duke parents: diff changeset	2031	movq mm5, LBCarryMask
90ce3da70b43 Initial load duke parents: diff changeset	2032	mov edi, row // edi ==> Avg(x)
90ce3da70b43 Initial load duke parents: diff changeset	2033	movq mm4, HBClearMask
90ce3da70b43 Initial load duke parents: diff changeset	2034	mov esi, prev_row // esi ==> Prior(x)
90ce3da70b43 Initial load duke parents: diff changeset	2035	// PRIME the pump (load the first Raw(x-bpp) data set
90ce3da70b43 Initial load duke parents: diff changeset	2036	movq mm2, [edi + ebx - 8] // Load previous aligned 8 bytes
90ce3da70b43 Initial load duke parents: diff changeset	2037	// (we correct position in loop below)
90ce3da70b43 Initial load duke parents: diff changeset	2038	davg3lp:
90ce3da70b43 Initial load duke parents: diff changeset	2039	movq mm0, [edi + ebx] // Load mm0 with Avg(x)
90ce3da70b43 Initial load duke parents: diff changeset	2040	// Add (Prev_row/2) to Average
90ce3da70b43 Initial load duke parents: diff changeset	2041	movq mm3, mm5
90ce3da70b43 Initial load duke parents: diff changeset	2042	psrlq mm2, ShiftRem // Correct position Raw(x-bpp) data
90ce3da70b43 Initial load duke parents: diff changeset	2043	movq mm1, [esi + ebx] // Load mm1 with Prior(x)
90ce3da70b43 Initial load duke parents: diff changeset	2044	movq mm6, mm7
90ce3da70b43 Initial load duke parents: diff changeset	2045	pand mm3, mm1 // get lsb for each prev_row byte
90ce3da70b43 Initial load duke parents: diff changeset	2046	psrlq mm1, 1 // divide prev_row bytes by 2
90ce3da70b43 Initial load duke parents: diff changeset	2047	pand mm1, mm4 // clear invalid bit 7 of each byte
90ce3da70b43 Initial load duke parents: diff changeset	2048	paddb mm0, mm1 // add (Prev_row/2) to Avg for each byte
90ce3da70b43 Initial load duke parents: diff changeset	2049	// Add 1st active group (Raw(x-bpp)/2) to Average with LBCarry
90ce3da70b43 Initial load duke parents: diff changeset	2050	movq mm1, mm3 // now use mm1 for getting LBCarrys
90ce3da70b43 Initial load duke parents: diff changeset	2051	pand mm1, mm2 // get LBCarrys for each byte where both
90ce3da70b43 Initial load duke parents: diff changeset	2052	// lsb's were == 1 (Only valid for active group)
90ce3da70b43 Initial load duke parents: diff changeset	2053	psrlq mm2, 1 // divide raw bytes by 2
90ce3da70b43 Initial load duke parents: diff changeset	2054	pand mm2, mm4 // clear invalid bit 7 of each byte
90ce3da70b43 Initial load duke parents: diff changeset	2055	paddb mm2, mm1 // add LBCarrys to (Raw(x-bpp)/2) for each byte
90ce3da70b43 Initial load duke parents: diff changeset	2056	pand mm2, mm6 // Leave only Active Group 1 bytes to add to Avg
90ce3da70b43 Initial load duke parents: diff changeset	2057	paddb mm0, mm2 // add (Raw/2) + LBCarrys to Avg for each Active
90ce3da70b43 Initial load duke parents: diff changeset	2058	// byte
90ce3da70b43 Initial load duke parents: diff changeset	2059	// Add 2nd active group (Raw(x-bpp)/2) to Average with LBCarry
90ce3da70b43 Initial load duke parents: diff changeset	2060	psllq mm6, ShiftBpp // shift the mm6 mask to cover bytes 3-5
90ce3da70b43 Initial load duke parents: diff changeset	2061	movq mm2, mm0 // mov updated Raws to mm2
90ce3da70b43 Initial load duke parents: diff changeset	2062	psllq mm2, ShiftBpp // shift data to position correctly
90ce3da70b43 Initial load duke parents: diff changeset	2063	movq mm1, mm3 // now use mm1 for getting LBCarrys
90ce3da70b43 Initial load duke parents: diff changeset	2064	pand mm1, mm2 // get LBCarrys for each byte where both
90ce3da70b43 Initial load duke parents: diff changeset	2065	// lsb's were == 1 (Only valid for active group)
90ce3da70b43 Initial load duke parents: diff changeset	2066	psrlq mm2, 1 // divide raw bytes by 2
90ce3da70b43 Initial load duke parents: diff changeset	2067	pand mm2, mm4 // clear invalid bit 7 of each byte
90ce3da70b43 Initial load duke parents: diff changeset	2068	paddb mm2, mm1 // add LBCarrys to (Raw(x-bpp)/2) for each byte
90ce3da70b43 Initial load duke parents: diff changeset	2069	pand mm2, mm6 // Leave only Active Group 2 bytes to add to Avg
90ce3da70b43 Initial load duke parents: diff changeset	2070	paddb mm0, mm2 // add (Raw/2) + LBCarrys to Avg for each Active
90ce3da70b43 Initial load duke parents: diff changeset	2071	// byte
90ce3da70b43 Initial load duke parents: diff changeset	2072
90ce3da70b43 Initial load duke parents: diff changeset	2073	// Add 3rd active group (Raw(x-bpp)/2) to Average with LBCarry
90ce3da70b43 Initial load duke parents: diff changeset	2074	psllq mm6, ShiftBpp // shift the mm6 mask to cover the last two
90ce3da70b43 Initial load duke parents: diff changeset	2075	// bytes
90ce3da70b43 Initial load duke parents: diff changeset	2076	movq mm2, mm0 // mov updated Raws to mm2
90ce3da70b43 Initial load duke parents: diff changeset	2077	psllq mm2, ShiftBpp // shift data to position correctly
90ce3da70b43 Initial load duke parents: diff changeset	2078	// Data only needs to be shifted once here to
90ce3da70b43 Initial load duke parents: diff changeset	2079	// get the correct x-bpp offset.
90ce3da70b43 Initial load duke parents: diff changeset	2080	movq mm1, mm3 // now use mm1 for getting LBCarrys
90ce3da70b43 Initial load duke parents: diff changeset	2081	pand mm1, mm2 // get LBCarrys for each byte where both
90ce3da70b43 Initial load duke parents: diff changeset	2082	// lsb's were == 1 (Only valid for active group)
90ce3da70b43 Initial load duke parents: diff changeset	2083	psrlq mm2, 1 // divide raw bytes by 2
90ce3da70b43 Initial load duke parents: diff changeset	2084	pand mm2, mm4 // clear invalid bit 7 of each byte
90ce3da70b43 Initial load duke parents: diff changeset	2085	paddb mm2, mm1 // add LBCarrys to (Raw(x-bpp)/2) for each byte
90ce3da70b43 Initial load duke parents: diff changeset	2086	pand mm2, mm6 // Leave only Active Group 2 bytes to add to Avg
90ce3da70b43 Initial load duke parents: diff changeset	2087	add ebx, 8
90ce3da70b43 Initial load duke parents: diff changeset	2088	paddb mm0, mm2 // add (Raw/2) + LBCarrys to Avg for each Active
90ce3da70b43 Initial load duke parents: diff changeset	2089	// byte
90ce3da70b43 Initial load duke parents: diff changeset	2090
90ce3da70b43 Initial load duke parents: diff changeset	2091	// Now ready to write back to memory
90ce3da70b43 Initial load duke parents: diff changeset	2092	movq [edi + ebx - 8], mm0
90ce3da70b43 Initial load duke parents: diff changeset	2093	// Move updated Raw(x) to use as Raw(x-bpp) for next loop
90ce3da70b43 Initial load duke parents: diff changeset	2094	cmp ebx, MMXLength
90ce3da70b43 Initial load duke parents: diff changeset	2095	movq mm2, mm0 // mov updated Raw(x) to mm2
90ce3da70b43 Initial load duke parents: diff changeset	2096	jb davg3lp
90ce3da70b43 Initial load duke parents: diff changeset	2097	} // end _asm block
90ce3da70b43 Initial load duke parents: diff changeset	2098	}
90ce3da70b43 Initial load duke parents: diff changeset	2099	break;
90ce3da70b43 Initial load duke parents: diff changeset	2100
90ce3da70b43 Initial load duke parents: diff changeset	2101	case 6:
90ce3da70b43 Initial load duke parents: diff changeset	2102	case 4:
90ce3da70b43 Initial load duke parents: diff changeset	2103	case 7:
90ce3da70b43 Initial load duke parents: diff changeset	2104	case 5:
90ce3da70b43 Initial load duke parents: diff changeset	2105	{
90ce3da70b43 Initial load duke parents: diff changeset	2106	ActiveMask.use = 0xffffffffffffffff; // use shift below to clear
90ce3da70b43 Initial load duke parents: diff changeset	2107	// appropriate inactive bytes
90ce3da70b43 Initial load duke parents: diff changeset	2108	ShiftBpp.use = bpp << 3;
90ce3da70b43 Initial load duke parents: diff changeset	2109	ShiftRem.use = 64 - ShiftBpp.use;
90ce3da70b43 Initial load duke parents: diff changeset	2110	_asm {
90ce3da70b43 Initial load duke parents: diff changeset	2111	movq mm4, HBClearMask
90ce3da70b43 Initial load duke parents: diff changeset	2112	// Re-init address pointers and offset
90ce3da70b43 Initial load duke parents: diff changeset	2113	mov ebx, diff // ebx ==> x = offset to alignment boundary
90ce3da70b43 Initial load duke parents: diff changeset	2114	// Load ActiveMask and clear all bytes except for 1st active group
90ce3da70b43 Initial load duke parents: diff changeset	2115	movq mm7, ActiveMask
90ce3da70b43 Initial load duke parents: diff changeset	2116	mov edi, row // edi ==> Avg(x)
90ce3da70b43 Initial load duke parents: diff changeset	2117	psrlq mm7, ShiftRem
90ce3da70b43 Initial load duke parents: diff changeset	2118	mov esi, prev_row // esi ==> Prior(x)
90ce3da70b43 Initial load duke parents: diff changeset	2119	movq mm6, mm7
90ce3da70b43 Initial load duke parents: diff changeset	2120	movq mm5, LBCarryMask
90ce3da70b43 Initial load duke parents: diff changeset	2121	psllq mm6, ShiftBpp // Create mask for 2nd active group
90ce3da70b43 Initial load duke parents: diff changeset	2122	// PRIME the pump (load the first Raw(x-bpp) data set
90ce3da70b43 Initial load duke parents: diff changeset	2123	movq mm2, [edi + ebx - 8] // Load previous aligned 8 bytes
90ce3da70b43 Initial load duke parents: diff changeset	2124	// (we correct position in loop below)
90ce3da70b43 Initial load duke parents: diff changeset	2125	davg4lp:
90ce3da70b43 Initial load duke parents: diff changeset	2126	movq mm0, [edi + ebx]
90ce3da70b43 Initial load duke parents: diff changeset	2127	psrlq mm2, ShiftRem // shift data to position correctly
90ce3da70b43 Initial load duke parents: diff changeset	2128	movq mm1, [esi + ebx]
90ce3da70b43 Initial load duke parents: diff changeset	2129	// Add (Prev_row/2) to Average
90ce3da70b43 Initial load duke parents: diff changeset	2130	movq mm3, mm5
90ce3da70b43 Initial load duke parents: diff changeset	2131	pand mm3, mm1 // get lsb for each prev_row byte
90ce3da70b43 Initial load duke parents: diff changeset	2132	psrlq mm1, 1 // divide prev_row bytes by 2
90ce3da70b43 Initial load duke parents: diff changeset	2133	pand mm1, mm4 // clear invalid bit 7 of each byte
90ce3da70b43 Initial load duke parents: diff changeset	2134	paddb mm0, mm1 // add (Prev_row/2) to Avg for each byte
90ce3da70b43 Initial load duke parents: diff changeset	2135	// Add 1st active group (Raw(x-bpp)/2) to Average with LBCarry
90ce3da70b43 Initial load duke parents: diff changeset	2136	movq mm1, mm3 // now use mm1 for getting LBCarrys
90ce3da70b43 Initial load duke parents: diff changeset	2137	pand mm1, mm2 // get LBCarrys for each byte where both
90ce3da70b43 Initial load duke parents: diff changeset	2138	// lsb's were == 1 (Only valid for active group)
90ce3da70b43 Initial load duke parents: diff changeset	2139	psrlq mm2, 1 // divide raw bytes by 2
90ce3da70b43 Initial load duke parents: diff changeset	2140	pand mm2, mm4 // clear invalid bit 7 of each byte
90ce3da70b43 Initial load duke parents: diff changeset	2141	paddb mm2, mm1 // add LBCarrys to (Raw(x-bpp)/2) for each byte
90ce3da70b43 Initial load duke parents: diff changeset	2142	pand mm2, mm7 // Leave only Active Group 1 bytes to add to Avg
90ce3da70b43 Initial load duke parents: diff changeset	2143	paddb mm0, mm2 // add (Raw/2) + LBCarrys to Avg for each Active
90ce3da70b43 Initial load duke parents: diff changeset	2144	// byte
90ce3da70b43 Initial load duke parents: diff changeset	2145	// Add 2nd active group (Raw(x-bpp)/2) to Average with LBCarry
90ce3da70b43 Initial load duke parents: diff changeset	2146	movq mm2, mm0 // mov updated Raws to mm2
90ce3da70b43 Initial load duke parents: diff changeset	2147	psllq mm2, ShiftBpp // shift data to position correctly
90ce3da70b43 Initial load duke parents: diff changeset	2148	add ebx, 8
90ce3da70b43 Initial load duke parents: diff changeset	2149	movq mm1, mm3 // now use mm1 for getting LBCarrys
90ce3da70b43 Initial load duke parents: diff changeset	2150	pand mm1, mm2 // get LBCarrys for each byte where both
90ce3da70b43 Initial load duke parents: diff changeset	2151	// lsb's were == 1 (Only valid for active group)
90ce3da70b43 Initial load duke parents: diff changeset	2152	psrlq mm2, 1 // divide raw bytes by 2
90ce3da70b43 Initial load duke parents: diff changeset	2153	pand mm2, mm4 // clear invalid bit 7 of each byte
90ce3da70b43 Initial load duke parents: diff changeset	2154	paddb mm2, mm1 // add LBCarrys to (Raw(x-bpp)/2) for each byte
90ce3da70b43 Initial load duke parents: diff changeset	2155	pand mm2, mm6 // Leave only Active Group 2 bytes to add to Avg
90ce3da70b43 Initial load duke parents: diff changeset	2156	paddb mm0, mm2 // add (Raw/2) + LBCarrys to Avg for each Active
90ce3da70b43 Initial load duke parents: diff changeset	2157	// byte
90ce3da70b43 Initial load duke parents: diff changeset	2158	cmp ebx, MMXLength
90ce3da70b43 Initial load duke parents: diff changeset	2159	// Now ready to write back to memory
90ce3da70b43 Initial load duke parents: diff changeset	2160	movq [edi + ebx - 8], mm0
90ce3da70b43 Initial load duke parents: diff changeset	2161	// Prep Raw(x-bpp) for next loop
90ce3da70b43 Initial load duke parents: diff changeset	2162	movq mm2, mm0 // mov updated Raws to mm2
90ce3da70b43 Initial load duke parents: diff changeset	2163	jb davg4lp
90ce3da70b43 Initial load duke parents: diff changeset	2164	} // end _asm block
90ce3da70b43 Initial load duke parents: diff changeset	2165	}
90ce3da70b43 Initial load duke parents: diff changeset	2166	break;
90ce3da70b43 Initial load duke parents: diff changeset	2167	case 2:
90ce3da70b43 Initial load duke parents: diff changeset	2168	{
90ce3da70b43 Initial load duke parents: diff changeset	2169	ActiveMask.use = 0x000000000000ffff;
90ce3da70b43 Initial load duke parents: diff changeset	2170	ShiftBpp.use = 16; // == 2 * 8 [BUGFIX]
90ce3da70b43 Initial load duke parents: diff changeset	2171	ShiftRem.use = 48; // == 64 - 16 [BUGFIX]
90ce3da70b43 Initial load duke parents: diff changeset	2172	_asm {
90ce3da70b43 Initial load duke parents: diff changeset	2173	// Load ActiveMask
90ce3da70b43 Initial load duke parents: diff changeset	2174	movq mm7, ActiveMask
90ce3da70b43 Initial load duke parents: diff changeset	2175	// Re-init address pointers and offset
90ce3da70b43 Initial load duke parents: diff changeset	2176	mov ebx, diff // ebx ==> x = offset to alignment boundary
90ce3da70b43 Initial load duke parents: diff changeset	2177	movq mm5, LBCarryMask
90ce3da70b43 Initial load duke parents: diff changeset	2178	mov edi, row // edi ==> Avg(x)
90ce3da70b43 Initial load duke parents: diff changeset	2179	movq mm4, HBClearMask
90ce3da70b43 Initial load duke parents: diff changeset	2180	mov esi, prev_row // esi ==> Prior(x)
90ce3da70b43 Initial load duke parents: diff changeset	2181	// PRIME the pump (load the first Raw(x-bpp) data set
90ce3da70b43 Initial load duke parents: diff changeset	2182	movq mm2, [edi + ebx - 8] // Load previous aligned 8 bytes
90ce3da70b43 Initial load duke parents: diff changeset	2183	// (we correct position in loop below)
90ce3da70b43 Initial load duke parents: diff changeset	2184	davg2lp:
90ce3da70b43 Initial load duke parents: diff changeset	2185	movq mm0, [edi + ebx]
90ce3da70b43 Initial load duke parents: diff changeset	2186	psrlq mm2, ShiftRem // shift data to position correctly [BUGFIX]
90ce3da70b43 Initial load duke parents: diff changeset	2187	movq mm1, [esi + ebx]
90ce3da70b43 Initial load duke parents: diff changeset	2188	// Add (Prev_row/2) to Average
90ce3da70b43 Initial load duke parents: diff changeset	2189	movq mm3, mm5
90ce3da70b43 Initial load duke parents: diff changeset	2190	pand mm3, mm1 // get lsb for each prev_row byte
90ce3da70b43 Initial load duke parents: diff changeset	2191	psrlq mm1, 1 // divide prev_row bytes by 2
90ce3da70b43 Initial load duke parents: diff changeset	2192	pand mm1, mm4 // clear invalid bit 7 of each byte
90ce3da70b43 Initial load duke parents: diff changeset	2193	movq mm6, mm7
90ce3da70b43 Initial load duke parents: diff changeset	2194	paddb mm0, mm1 // add (Prev_row/2) to Avg for each byte
90ce3da70b43 Initial load duke parents: diff changeset	2195	// Add 1st active group (Raw(x-bpp)/2) to Average with LBCarry
90ce3da70b43 Initial load duke parents: diff changeset	2196	movq mm1, mm3 // now use mm1 for getting LBCarrys
90ce3da70b43 Initial load duke parents: diff changeset	2197	pand mm1, mm2 // get LBCarrys for each byte where both
90ce3da70b43 Initial load duke parents: diff changeset	2198	// lsb's were == 1 (Only valid for active group)
90ce3da70b43 Initial load duke parents: diff changeset	2199	psrlq mm2, 1 // divide raw bytes by 2
90ce3da70b43 Initial load duke parents: diff changeset	2200	pand mm2, mm4 // clear invalid bit 7 of each byte
90ce3da70b43 Initial load duke parents: diff changeset	2201	paddb mm2, mm1 // add LBCarrys to (Raw(x-bpp)/2) for each byte
90ce3da70b43 Initial load duke parents: diff changeset	2202	pand mm2, mm6 // Leave only Active Group 1 bytes to add to Avg
90ce3da70b43 Initial load duke parents: diff changeset	2203	paddb mm0, mm2 // add (Raw/2) + LBCarrys to Avg for each Active byte
90ce3da70b43 Initial load duke parents: diff changeset	2204	// Add 2nd active group (Raw(x-bpp)/2) to Average with LBCarry
90ce3da70b43 Initial load duke parents: diff changeset	2205	psllq mm6, ShiftBpp // shift the mm6 mask to cover bytes 2 & 3
90ce3da70b43 Initial load duke parents: diff changeset	2206	movq mm2, mm0 // mov updated Raws to mm2
90ce3da70b43 Initial load duke parents: diff changeset	2207	psllq mm2, ShiftBpp // shift data to position correctly
90ce3da70b43 Initial load duke parents: diff changeset	2208	movq mm1, mm3 // now use mm1 for getting LBCarrys
90ce3da70b43 Initial load duke parents: diff changeset	2209	pand mm1, mm2 // get LBCarrys for each byte where both
90ce3da70b43 Initial load duke parents: diff changeset	2210	// lsb's were == 1 (Only valid for active group)
90ce3da70b43 Initial load duke parents: diff changeset	2211	psrlq mm2, 1 // divide raw bytes by 2
90ce3da70b43 Initial load duke parents: diff changeset	2212	pand mm2, mm4 // clear invalid bit 7 of each byte
90ce3da70b43 Initial load duke parents: diff changeset	2213	paddb mm2, mm1 // add LBCarrys to (Raw(x-bpp)/2) for each byte
90ce3da70b43 Initial load duke parents: diff changeset	2214	pand mm2, mm6 // Leave only Active Group 2 bytes to add to Avg
90ce3da70b43 Initial load duke parents: diff changeset	2215	paddb mm0, mm2 // add (Raw/2) + LBCarrys to Avg for each Active byte
90ce3da70b43 Initial load duke parents: diff changeset	2216
90ce3da70b43 Initial load duke parents: diff changeset	2217	// Add rdd active group (Raw(x-bpp)/2) to Average with LBCarry
90ce3da70b43 Initial load duke parents: diff changeset	2218	psllq mm6, ShiftBpp // shift the mm6 mask to cover bytes 4 & 5
90ce3da70b43 Initial load duke parents: diff changeset	2219	movq mm2, mm0 // mov updated Raws to mm2
90ce3da70b43 Initial load duke parents: diff changeset	2220	psllq mm2, ShiftBpp // shift data to position correctly
90ce3da70b43 Initial load duke parents: diff changeset	2221	// Data only needs to be shifted once here to
90ce3da70b43 Initial load duke parents: diff changeset	2222	// get the correct x-bpp offset.
90ce3da70b43 Initial load duke parents: diff changeset	2223	movq mm1, mm3 // now use mm1 for getting LBCarrys
90ce3da70b43 Initial load duke parents: diff changeset	2224	pand mm1, mm2 // get LBCarrys for each byte where both
90ce3da70b43 Initial load duke parents: diff changeset	2225	// lsb's were == 1 (Only valid for active group)
90ce3da70b43 Initial load duke parents: diff changeset	2226	psrlq mm2, 1 // divide raw bytes by 2
90ce3da70b43 Initial load duke parents: diff changeset	2227	pand mm2, mm4 // clear invalid bit 7 of each byte
90ce3da70b43 Initial load duke parents: diff changeset	2228	paddb mm2, mm1 // add LBCarrys to (Raw(x-bpp)/2) for each byte
90ce3da70b43 Initial load duke parents: diff changeset	2229	pand mm2, mm6 // Leave only Active Group 2 bytes to add to Avg
90ce3da70b43 Initial load duke parents: diff changeset	2230	paddb mm0, mm2 // add (Raw/2) + LBCarrys to Avg for each Active byte
90ce3da70b43 Initial load duke parents: diff changeset	2231
90ce3da70b43 Initial load duke parents: diff changeset	2232	// Add 4th active group (Raw(x-bpp)/2) to Average with LBCarry
90ce3da70b43 Initial load duke parents: diff changeset	2233	psllq mm6, ShiftBpp // shift the mm6 mask to cover bytes 6 & 7
90ce3da70b43 Initial load duke parents: diff changeset	2234	movq mm2, mm0 // mov updated Raws to mm2
90ce3da70b43 Initial load duke parents: diff changeset	2235	psllq mm2, ShiftBpp // shift data to position correctly
90ce3da70b43 Initial load duke parents: diff changeset	2236	// Data only needs to be shifted once here to
90ce3da70b43 Initial load duke parents: diff changeset	2237	// get the correct x-bpp offset.
90ce3da70b43 Initial load duke parents: diff changeset	2238	add ebx, 8
90ce3da70b43 Initial load duke parents: diff changeset	2239	movq mm1, mm3 // now use mm1 for getting LBCarrys
90ce3da70b43 Initial load duke parents: diff changeset	2240	pand mm1, mm2 // get LBCarrys for each byte where both
90ce3da70b43 Initial load duke parents: diff changeset	2241	// lsb's were == 1 (Only valid for active group)
90ce3da70b43 Initial load duke parents: diff changeset	2242	psrlq mm2, 1 // divide raw bytes by 2
90ce3da70b43 Initial load duke parents: diff changeset	2243	pand mm2, mm4 // clear invalid bit 7 of each byte
90ce3da70b43 Initial load duke parents: diff changeset	2244	paddb mm2, mm1 // add LBCarrys to (Raw(x-bpp)/2) for each byte
90ce3da70b43 Initial load duke parents: diff changeset	2245	pand mm2, mm6 // Leave only Active Group 2 bytes to add to Avg
90ce3da70b43 Initial load duke parents: diff changeset	2246	paddb mm0, mm2 // add (Raw/2) + LBCarrys to Avg for each Active byte
90ce3da70b43 Initial load duke parents: diff changeset	2247
90ce3da70b43 Initial load duke parents: diff changeset	2248	cmp ebx, MMXLength
90ce3da70b43 Initial load duke parents: diff changeset	2249	// Now ready to write back to memory
90ce3da70b43 Initial load duke parents: diff changeset	2250	movq [edi + ebx - 8], mm0
90ce3da70b43 Initial load duke parents: diff changeset	2251	// Prep Raw(x-bpp) for next loop
90ce3da70b43 Initial load duke parents: diff changeset	2252	movq mm2, mm0 // mov updated Raws to mm2
90ce3da70b43 Initial load duke parents: diff changeset	2253	jb davg2lp
90ce3da70b43 Initial load duke parents: diff changeset	2254	} // end _asm block
90ce3da70b43 Initial load duke parents: diff changeset	2255	}
90ce3da70b43 Initial load duke parents: diff changeset	2256	break;
90ce3da70b43 Initial load duke parents: diff changeset	2257
90ce3da70b43 Initial load duke parents: diff changeset	2258	case 1: // bpp == 1
90ce3da70b43 Initial load duke parents: diff changeset	2259	{
90ce3da70b43 Initial load duke parents: diff changeset	2260	_asm {
90ce3da70b43 Initial load duke parents: diff changeset	2261	// Re-init address pointers and offset
90ce3da70b43 Initial load duke parents: diff changeset	2262	mov ebx, diff // ebx ==> x = offset to alignment boundary
90ce3da70b43 Initial load duke parents: diff changeset	2263	mov edi, row // edi ==> Avg(x)
90ce3da70b43 Initial load duke parents: diff changeset	2264	cmp ebx, FullLength // Test if offset at end of array
90ce3da70b43 Initial load duke parents: diff changeset	2265	jnb davg1end
90ce3da70b43 Initial load duke parents: diff changeset	2266	// Do Paeth decode for remaining bytes
90ce3da70b43 Initial load duke parents: diff changeset	2267	mov esi, prev_row // esi ==> Prior(x)
90ce3da70b43 Initial load duke parents: diff changeset	2268	mov edx, edi
90ce3da70b43 Initial load duke parents: diff changeset	2269	xor ecx, ecx // zero ecx before using cl & cx in loop below
90ce3da70b43 Initial load duke parents: diff changeset	2270	sub edx, bpp // edx ==> Raw(x-bpp)
90ce3da70b43 Initial load duke parents: diff changeset	2271	davg1lp:
90ce3da70b43 Initial load duke parents: diff changeset	2272	// Raw(x) = Avg(x) + ((Raw(x-bpp) + Prior(x))/2)
90ce3da70b43 Initial load duke parents: diff changeset	2273	xor eax, eax
90ce3da70b43 Initial load duke parents: diff changeset	2274	mov cl, [esi + ebx] // load cl with Prior(x)
90ce3da70b43 Initial load duke parents: diff changeset	2275	mov al, [edx + ebx] // load al with Raw(x-bpp)
90ce3da70b43 Initial load duke parents: diff changeset	2276	add ax, cx
90ce3da70b43 Initial load duke parents: diff changeset	2277	inc ebx
90ce3da70b43 Initial load duke parents: diff changeset	2278	shr ax, 1 // divide by 2
90ce3da70b43 Initial load duke parents: diff changeset	2279	add al, [edi+ebx-1] // Add Avg(x); -1 to offset inc ebx
90ce3da70b43 Initial load duke parents: diff changeset	2280	cmp ebx, FullLength // Check if at end of array
90ce3da70b43 Initial load duke parents: diff changeset	2281	mov [edi+ebx-1], al // Write back Raw(x);
90ce3da70b43 Initial load duke parents: diff changeset	2282	// mov does not affect flags; -1 to offset inc ebx
90ce3da70b43 Initial load duke parents: diff changeset	2283	jb davg1lp
90ce3da70b43 Initial load duke parents: diff changeset	2284	davg1end:
90ce3da70b43 Initial load duke parents: diff changeset	2285	} // end _asm block
90ce3da70b43 Initial load duke parents: diff changeset	2286	}
90ce3da70b43 Initial load duke parents: diff changeset	2287	return;
90ce3da70b43 Initial load duke parents: diff changeset	2288
90ce3da70b43 Initial load duke parents: diff changeset	2289	case 8: // bpp == 8
90ce3da70b43 Initial load duke parents: diff changeset	2290	{
90ce3da70b43 Initial load duke parents: diff changeset	2291	_asm {
90ce3da70b43 Initial load duke parents: diff changeset	2292	// Re-init address pointers and offset
90ce3da70b43 Initial load duke parents: diff changeset	2293	mov ebx, diff // ebx ==> x = offset to alignment boundary
90ce3da70b43 Initial load duke parents: diff changeset	2294	movq mm5, LBCarryMask
90ce3da70b43 Initial load duke parents: diff changeset	2295	mov edi, row // edi ==> Avg(x)
90ce3da70b43 Initial load duke parents: diff changeset	2296	movq mm4, HBClearMask
90ce3da70b43 Initial load duke parents: diff changeset	2297	mov esi, prev_row // esi ==> Prior(x)
90ce3da70b43 Initial load duke parents: diff changeset	2298	// PRIME the pump (load the first Raw(x-bpp) data set
90ce3da70b43 Initial load duke parents: diff changeset	2299	movq mm2, [edi + ebx - 8] // Load previous aligned 8 bytes
90ce3da70b43 Initial load duke parents: diff changeset	2300	// (NO NEED to correct position in loop below)
90ce3da70b43 Initial load duke parents: diff changeset	2301	davg8lp:
90ce3da70b43 Initial load duke parents: diff changeset	2302	movq mm0, [edi + ebx]
90ce3da70b43 Initial load duke parents: diff changeset	2303	movq mm3, mm5
90ce3da70b43 Initial load duke parents: diff changeset	2304	movq mm1, [esi + ebx]
90ce3da70b43 Initial load duke parents: diff changeset	2305	add ebx, 8
90ce3da70b43 Initial load duke parents: diff changeset	2306	pand mm3, mm1 // get lsb for each prev_row byte
90ce3da70b43 Initial load duke parents: diff changeset	2307	psrlq mm1, 1 // divide prev_row bytes by 2
90ce3da70b43 Initial load duke parents: diff changeset	2308	pand mm3, mm2 // get LBCarrys for each byte where both
90ce3da70b43 Initial load duke parents: diff changeset	2309	// lsb's were == 1
90ce3da70b43 Initial load duke parents: diff changeset	2310	psrlq mm2, 1 // divide raw bytes by 2
90ce3da70b43 Initial load duke parents: diff changeset	2311	pand mm1, mm4 // clear invalid bit 7 of each byte
90ce3da70b43 Initial load duke parents: diff changeset	2312	paddb mm0, mm3 // add LBCarrys to Avg for each byte
90ce3da70b43 Initial load duke parents: diff changeset	2313	pand mm2, mm4 // clear invalid bit 7 of each byte
90ce3da70b43 Initial load duke parents: diff changeset	2314	paddb mm0, mm1 // add (Prev_row/2) to Avg for each byte
90ce3da70b43 Initial load duke parents: diff changeset	2315	paddb mm0, mm2 // add (Raw/2) to Avg for each byte
90ce3da70b43 Initial load duke parents: diff changeset	2316	cmp ebx, MMXLength
90ce3da70b43 Initial load duke parents: diff changeset	2317	movq [edi + ebx - 8], mm0
90ce3da70b43 Initial load duke parents: diff changeset	2318	movq mm2, mm0 // reuse as Raw(x-bpp)
90ce3da70b43 Initial load duke parents: diff changeset	2319	jb davg8lp
90ce3da70b43 Initial load duke parents: diff changeset	2320	} // end _asm block
90ce3da70b43 Initial load duke parents: diff changeset	2321	}
90ce3da70b43 Initial load duke parents: diff changeset	2322	break;
90ce3da70b43 Initial load duke parents: diff changeset	2323	default: // bpp greater than 8
90ce3da70b43 Initial load duke parents: diff changeset	2324	{
90ce3da70b43 Initial load duke parents: diff changeset	2325	_asm {
90ce3da70b43 Initial load duke parents: diff changeset	2326	movq mm5, LBCarryMask
90ce3da70b43 Initial load duke parents: diff changeset	2327	// Re-init address pointers and offset
90ce3da70b43 Initial load duke parents: diff changeset	2328	mov ebx, diff // ebx ==> x = offset to alignment boundary
90ce3da70b43 Initial load duke parents: diff changeset	2329	mov edi, row // edi ==> Avg(x)
90ce3da70b43 Initial load duke parents: diff changeset	2330	movq mm4, HBClearMask
90ce3da70b43 Initial load duke parents: diff changeset	2331	mov edx, edi
90ce3da70b43 Initial load duke parents: diff changeset	2332	mov esi, prev_row // esi ==> Prior(x)
90ce3da70b43 Initial load duke parents: diff changeset	2333	sub edx, bpp // edx ==> Raw(x-bpp)
90ce3da70b43 Initial load duke parents: diff changeset	2334	davgAlp:
90ce3da70b43 Initial load duke parents: diff changeset	2335	movq mm0, [edi + ebx]
90ce3da70b43 Initial load duke parents: diff changeset	2336	movq mm3, mm5
90ce3da70b43 Initial load duke parents: diff changeset	2337	movq mm1, [esi + ebx]
90ce3da70b43 Initial load duke parents: diff changeset	2338	pand mm3, mm1 // get lsb for each prev_row byte
90ce3da70b43 Initial load duke parents: diff changeset	2339	movq mm2, [edx + ebx]
90ce3da70b43 Initial load duke parents: diff changeset	2340	psrlq mm1, 1 // divide prev_row bytes by 2
90ce3da70b43 Initial load duke parents: diff changeset	2341	pand mm3, mm2 // get LBCarrys for each byte where both
90ce3da70b43 Initial load duke parents: diff changeset	2342	// lsb's were == 1
90ce3da70b43 Initial load duke parents: diff changeset	2343	psrlq mm2, 1 // divide raw bytes by 2
90ce3da70b43 Initial load duke parents: diff changeset	2344	pand mm1, mm4 // clear invalid bit 7 of each byte
90ce3da70b43 Initial load duke parents: diff changeset	2345	paddb mm0, mm3 // add LBCarrys to Avg for each byte
90ce3da70b43 Initial load duke parents: diff changeset	2346	pand mm2, mm4 // clear invalid bit 7 of each byte
90ce3da70b43 Initial load duke parents: diff changeset	2347	paddb mm0, mm1 // add (Prev_row/2) to Avg for each byte
90ce3da70b43 Initial load duke parents: diff changeset	2348	add ebx, 8
90ce3da70b43 Initial load duke parents: diff changeset	2349	paddb mm0, mm2 // add (Raw/2) to Avg for each byte
90ce3da70b43 Initial load duke parents: diff changeset	2350	cmp ebx, MMXLength
90ce3da70b43 Initial load duke parents: diff changeset	2351	movq [edi + ebx - 8], mm0
90ce3da70b43 Initial load duke parents: diff changeset	2352	jb davgAlp
90ce3da70b43 Initial load duke parents: diff changeset	2353	} // end _asm block
90ce3da70b43 Initial load duke parents: diff changeset	2354	}
90ce3da70b43 Initial load duke parents: diff changeset	2355	break;
90ce3da70b43 Initial load duke parents: diff changeset	2356	} // end switch ( bpp )
90ce3da70b43 Initial load duke parents: diff changeset	2357
90ce3da70b43 Initial load duke parents: diff changeset	2358	_asm {
90ce3da70b43 Initial load duke parents: diff changeset	2359	// MMX acceleration complete now do clean-up
90ce3da70b43 Initial load duke parents: diff changeset	2360	// Check if any remaining bytes left to decode
90ce3da70b43 Initial load duke parents: diff changeset	2361	mov ebx, MMXLength // ebx ==> x = offset bytes remaining after MMX
90ce3da70b43 Initial load duke parents: diff changeset	2362	mov edi, row // edi ==> Avg(x)
90ce3da70b43 Initial load duke parents: diff changeset	2363	cmp ebx, FullLength // Test if offset at end of array
90ce3da70b43 Initial load duke parents: diff changeset	2364	jnb davgend
90ce3da70b43 Initial load duke parents: diff changeset	2365	// Do Paeth decode for remaining bytes
90ce3da70b43 Initial load duke parents: diff changeset	2366	mov esi, prev_row // esi ==> Prior(x)
90ce3da70b43 Initial load duke parents: diff changeset	2367	mov edx, edi
90ce3da70b43 Initial load duke parents: diff changeset	2368	xor ecx, ecx // zero ecx before using cl & cx in loop below
90ce3da70b43 Initial load duke parents: diff changeset	2369	sub edx, bpp // edx ==> Raw(x-bpp)
90ce3da70b43 Initial load duke parents: diff changeset	2370	davglp2:
90ce3da70b43 Initial load duke parents: diff changeset	2371	// Raw(x) = Avg(x) + ((Raw(x-bpp) + Prior(x))/2)
90ce3da70b43 Initial load duke parents: diff changeset	2372	xor eax, eax
90ce3da70b43 Initial load duke parents: diff changeset	2373	mov cl, [esi + ebx] // load cl with Prior(x)
90ce3da70b43 Initial load duke parents: diff changeset	2374	mov al, [edx + ebx] // load al with Raw(x-bpp)
90ce3da70b43 Initial load duke parents: diff changeset	2375	add ax, cx
90ce3da70b43 Initial load duke parents: diff changeset	2376	inc ebx
90ce3da70b43 Initial load duke parents: diff changeset	2377	shr ax, 1 // divide by 2
90ce3da70b43 Initial load duke parents: diff changeset	2378	add al, [edi+ebx-1] // Add Avg(x); -1 to offset inc ebx
90ce3da70b43 Initial load duke parents: diff changeset	2379	cmp ebx, FullLength // Check if at end of array
90ce3da70b43 Initial load duke parents: diff changeset	2380	mov [edi+ebx-1], al // Write back Raw(x);
90ce3da70b43 Initial load duke parents: diff changeset	2381	// mov does not affect flags; -1 to offset inc ebx
90ce3da70b43 Initial load duke parents: diff changeset	2382	jb davglp2
90ce3da70b43 Initial load duke parents: diff changeset	2383	davgend:
90ce3da70b43 Initial load duke parents: diff changeset	2384	emms // End MMX instructions; prep for possible FP instrs.
90ce3da70b43 Initial load duke parents: diff changeset	2385	} // end _asm block
90ce3da70b43 Initial load duke parents: diff changeset	2386	}
90ce3da70b43 Initial load duke parents: diff changeset	2387
90ce3da70b43 Initial load duke parents: diff changeset	2388	// Optimized code for PNG Paeth filter decoder
90ce3da70b43 Initial load duke parents: diff changeset	2389	void /* PRIVATE */
90ce3da70b43 Initial load duke parents: diff changeset	2390	png_read_filter_row_mmx_paeth(png_row_infop row_info, png_bytep row,
90ce3da70b43 Initial load duke parents: diff changeset	2391	png_bytep prev_row)
90ce3da70b43 Initial load duke parents: diff changeset	2392	{
90ce3da70b43 Initial load duke parents: diff changeset	2393	png_uint_32 FullLength;
90ce3da70b43 Initial load duke parents: diff changeset	2394	png_uint_32 MMXLength;
90ce3da70b43 Initial load duke parents: diff changeset	2395	//png_uint_32 len;
90ce3da70b43 Initial load duke parents: diff changeset	2396	int bpp;
90ce3da70b43 Initial load duke parents: diff changeset	2397	int diff;
90ce3da70b43 Initial load duke parents: diff changeset	2398	//int ptemp;
90ce3da70b43 Initial load duke parents: diff changeset	2399	int patemp, pbtemp, pctemp;
90ce3da70b43 Initial load duke parents: diff changeset	2400
90ce3da70b43 Initial load duke parents: diff changeset	2401	bpp = (row_info->pixel_depth + 7) >> 3; // Get # bytes per pixel
90ce3da70b43 Initial load duke parents: diff changeset	2402	FullLength = row_info->rowbytes; // # of bytes to filter
90ce3da70b43 Initial load duke parents: diff changeset	2403	_asm
90ce3da70b43 Initial load duke parents: diff changeset	2404	{
90ce3da70b43 Initial load duke parents: diff changeset	2405	xor ebx, ebx // ebx ==> x offset
90ce3da70b43 Initial load duke parents: diff changeset	2406	mov edi, row
90ce3da70b43 Initial load duke parents: diff changeset	2407	xor edx, edx // edx ==> x-bpp offset
90ce3da70b43 Initial load duke parents: diff changeset	2408	mov esi, prev_row
90ce3da70b43 Initial load duke parents: diff changeset	2409	xor eax, eax
90ce3da70b43 Initial load duke parents: diff changeset	2410
90ce3da70b43 Initial load duke parents: diff changeset	2411	// Compute the Raw value for the first bpp bytes
90ce3da70b43 Initial load duke parents: diff changeset	2412	// Note: the formula works out to be always
90ce3da70b43 Initial load duke parents: diff changeset	2413	// Paeth(x) = Raw(x) + Prior(x) where x < bpp
90ce3da70b43 Initial load duke parents: diff changeset	2414	dpthrlp:
90ce3da70b43 Initial load duke parents: diff changeset	2415	mov al, [edi + ebx]
90ce3da70b43 Initial load duke parents: diff changeset	2416	add al, [esi + ebx]
90ce3da70b43 Initial load duke parents: diff changeset	2417	inc ebx
90ce3da70b43 Initial load duke parents: diff changeset	2418	cmp ebx, bpp
90ce3da70b43 Initial load duke parents: diff changeset	2419	mov [edi + ebx - 1], al
90ce3da70b43 Initial load duke parents: diff changeset	2420	jb dpthrlp
90ce3da70b43 Initial load duke parents: diff changeset	2421	// get # of bytes to alignment
90ce3da70b43 Initial load duke parents: diff changeset	2422	mov diff, edi // take start of row
90ce3da70b43 Initial load duke parents: diff changeset	2423	add diff, ebx // add bpp
90ce3da70b43 Initial load duke parents: diff changeset	2424	xor ecx, ecx
90ce3da70b43 Initial load duke parents: diff changeset	2425	add diff, 0xf // add 7 + 8 to incr past alignment boundary
90ce3da70b43 Initial load duke parents: diff changeset	2426	and diff, 0xfffffff8 // mask to alignment boundary
90ce3da70b43 Initial load duke parents: diff changeset	2427	sub diff, edi // subtract from start ==> value ebx at alignment
90ce3da70b43 Initial load duke parents: diff changeset	2428	jz dpthgo
90ce3da70b43 Initial load duke parents: diff changeset	2429	// fix alignment
90ce3da70b43 Initial load duke parents: diff changeset	2430	dpthlp1:
90ce3da70b43 Initial load duke parents: diff changeset	2431	xor eax, eax
90ce3da70b43 Initial load duke parents: diff changeset	2432	// pav = p - a = (a + b - c) - a = b - c
90ce3da70b43 Initial load duke parents: diff changeset	2433	mov al, [esi + ebx] // load Prior(x) into al
90ce3da70b43 Initial load duke parents: diff changeset	2434	mov cl, [esi + edx] // load Prior(x-bpp) into cl
90ce3da70b43 Initial load duke parents: diff changeset	2435	sub eax, ecx // subtract Prior(x-bpp)
90ce3da70b43 Initial load duke parents: diff changeset	2436	mov patemp, eax // Save pav for later use
90ce3da70b43 Initial load duke parents: diff changeset	2437	xor eax, eax
90ce3da70b43 Initial load duke parents: diff changeset	2438	// pbv = p - b = (a + b - c) - b = a - c
90ce3da70b43 Initial load duke parents: diff changeset	2439	mov al, [edi + edx] // load Raw(x-bpp) into al
90ce3da70b43 Initial load duke parents: diff changeset	2440	sub eax, ecx // subtract Prior(x-bpp)
90ce3da70b43 Initial load duke parents: diff changeset	2441	mov ecx, eax
90ce3da70b43 Initial load duke parents: diff changeset	2442	// pcv = p - c = (a + b - c) -c = (a - c) + (b - c) = pav + pbv
90ce3da70b43 Initial load duke parents: diff changeset	2443	add eax, patemp // pcv = pav + pbv
90ce3da70b43 Initial load duke parents: diff changeset	2444	// pc = abs(pcv)
90ce3da70b43 Initial load duke parents: diff changeset	2445	test eax, 0x80000000
90ce3da70b43 Initial load duke parents: diff changeset	2446	jz dpthpca
90ce3da70b43 Initial load duke parents: diff changeset	2447	neg eax // reverse sign of neg values
90ce3da70b43 Initial load duke parents: diff changeset	2448	dpthpca:
90ce3da70b43 Initial load duke parents: diff changeset	2449	mov pctemp, eax // save pc for later use
90ce3da70b43 Initial load duke parents: diff changeset	2450	// pb = abs(pbv)
90ce3da70b43 Initial load duke parents: diff changeset	2451	test ecx, 0x80000000
90ce3da70b43 Initial load duke parents: diff changeset	2452	jz dpthpba
90ce3da70b43 Initial load duke parents: diff changeset	2453	neg ecx // reverse sign of neg values
90ce3da70b43 Initial load duke parents: diff changeset	2454	dpthpba:
90ce3da70b43 Initial load duke parents: diff changeset	2455	mov pbtemp, ecx // save pb for later use
90ce3da70b43 Initial load duke parents: diff changeset	2456	// pa = abs(pav)
90ce3da70b43 Initial load duke parents: diff changeset	2457	mov eax, patemp
90ce3da70b43 Initial load duke parents: diff changeset	2458	test eax, 0x80000000
90ce3da70b43 Initial load duke parents: diff changeset	2459	jz dpthpaa
90ce3da70b43 Initial load duke parents: diff changeset	2460	neg eax // reverse sign of neg values
90ce3da70b43 Initial load duke parents: diff changeset	2461	dpthpaa:
90ce3da70b43 Initial load duke parents: diff changeset	2462	mov patemp, eax // save pa for later use
90ce3da70b43 Initial load duke parents: diff changeset	2463	// test if pa <= pb
90ce3da70b43 Initial load duke parents: diff changeset	2464	cmp eax, ecx
90ce3da70b43 Initial load duke parents: diff changeset	2465	jna dpthabb
90ce3da70b43 Initial load duke parents: diff changeset	2466	// pa > pb; now test if pb <= pc
90ce3da70b43 Initial load duke parents: diff changeset	2467	cmp ecx, pctemp
90ce3da70b43 Initial load duke parents: diff changeset	2468	jna dpthbbc
90ce3da70b43 Initial load duke parents: diff changeset	2469	// pb > pc; Raw(x) = Paeth(x) + Prior(x-bpp)
90ce3da70b43 Initial load duke parents: diff changeset	2470	mov cl, [esi + edx] // load Prior(x-bpp) into cl
90ce3da70b43 Initial load duke parents: diff changeset	2471	jmp dpthpaeth
90ce3da70b43 Initial load duke parents: diff changeset	2472	dpthbbc:
90ce3da70b43 Initial load duke parents: diff changeset	2473	// pb <= pc; Raw(x) = Paeth(x) + Prior(x)
90ce3da70b43 Initial load duke parents: diff changeset	2474	mov cl, [esi + ebx] // load Prior(x) into cl
90ce3da70b43 Initial load duke parents: diff changeset	2475	jmp dpthpaeth
90ce3da70b43 Initial load duke parents: diff changeset	2476	dpthabb:
90ce3da70b43 Initial load duke parents: diff changeset	2477	// pa <= pb; now test if pa <= pc
90ce3da70b43 Initial load duke parents: diff changeset	2478	cmp eax, pctemp
90ce3da70b43 Initial load duke parents: diff changeset	2479	jna dpthabc
90ce3da70b43 Initial load duke parents: diff changeset	2480	// pa > pc; Raw(x) = Paeth(x) + Prior(x-bpp)
90ce3da70b43 Initial load duke parents: diff changeset	2481	mov cl, [esi + edx] // load Prior(x-bpp) into cl
90ce3da70b43 Initial load duke parents: diff changeset	2482	jmp dpthpaeth
90ce3da70b43 Initial load duke parents: diff changeset	2483	dpthabc:
90ce3da70b43 Initial load duke parents: diff changeset	2484	// pa <= pc; Raw(x) = Paeth(x) + Raw(x-bpp)
90ce3da70b43 Initial load duke parents: diff changeset	2485	mov cl, [edi + edx] // load Raw(x-bpp) into cl
90ce3da70b43 Initial load duke parents: diff changeset	2486	dpthpaeth:
90ce3da70b43 Initial load duke parents: diff changeset	2487	inc ebx
90ce3da70b43 Initial load duke parents: diff changeset	2488	inc edx
90ce3da70b43 Initial load duke parents: diff changeset	2489	// Raw(x) = (Paeth(x) + Paeth_Predictor( a, b, c )) mod 256
90ce3da70b43 Initial load duke parents: diff changeset	2490	add [edi + ebx - 1], cl
90ce3da70b43 Initial load duke parents: diff changeset	2491	cmp ebx, diff
90ce3da70b43 Initial load duke parents: diff changeset	2492	jb dpthlp1
90ce3da70b43 Initial load duke parents: diff changeset	2493	dpthgo:
90ce3da70b43 Initial load duke parents: diff changeset	2494	mov ecx, FullLength
90ce3da70b43 Initial load duke parents: diff changeset	2495	mov eax, ecx
90ce3da70b43 Initial load duke parents: diff changeset	2496	sub eax, ebx // subtract alignment fix
90ce3da70b43 Initial load duke parents: diff changeset	2497	and eax, 0x00000007 // calc bytes over mult of 8
90ce3da70b43 Initial load duke parents: diff changeset	2498	sub ecx, eax // drop over bytes from original length
90ce3da70b43 Initial load duke parents: diff changeset	2499	mov MMXLength, ecx
90ce3da70b43 Initial load duke parents: diff changeset	2500	} // end _asm block
90ce3da70b43 Initial load duke parents: diff changeset	2501	// Now do the math for the rest of the row
90ce3da70b43 Initial load duke parents: diff changeset	2502	switch ( bpp )
90ce3da70b43 Initial load duke parents: diff changeset	2503	{
90ce3da70b43 Initial load duke parents: diff changeset	2504	case 3:
90ce3da70b43 Initial load duke parents: diff changeset	2505	{
90ce3da70b43 Initial load duke parents: diff changeset	2506	ActiveMask.use = 0x0000000000ffffff;
90ce3da70b43 Initial load duke parents: diff changeset	2507	ActiveMaskEnd.use = 0xffff000000000000;
90ce3da70b43 Initial load duke parents: diff changeset	2508	ShiftBpp.use = 24; // == bpp(3) * 8
90ce3da70b43 Initial load duke parents: diff changeset	2509	ShiftRem.use = 40; // == 64 - 24
90ce3da70b43 Initial load duke parents: diff changeset	2510	_asm
90ce3da70b43 Initial load duke parents: diff changeset	2511	{
90ce3da70b43 Initial load duke parents: diff changeset	2512	mov ebx, diff
90ce3da70b43 Initial load duke parents: diff changeset	2513	mov edi, row
90ce3da70b43 Initial load duke parents: diff changeset	2514	mov esi, prev_row
90ce3da70b43 Initial load duke parents: diff changeset	2515	pxor mm0, mm0
90ce3da70b43 Initial load duke parents: diff changeset	2516	// PRIME the pump (load the first Raw(x-bpp) data set
90ce3da70b43 Initial load duke parents: diff changeset	2517	movq mm1, [edi+ebx-8]
90ce3da70b43 Initial load duke parents: diff changeset	2518	dpth3lp:
90ce3da70b43 Initial load duke parents: diff changeset	2519	psrlq mm1, ShiftRem // shift last 3 bytes to 1st 3 bytes
90ce3da70b43 Initial load duke parents: diff changeset	2520	movq mm2, [esi + ebx] // load b=Prior(x)
90ce3da70b43 Initial load duke parents: diff changeset	2521	punpcklbw mm1, mm0 // Unpack High bytes of a
90ce3da70b43 Initial load duke parents: diff changeset	2522	movq mm3, [esi+ebx-8] // Prep c=Prior(x-bpp) bytes
90ce3da70b43 Initial load duke parents: diff changeset	2523	punpcklbw mm2, mm0 // Unpack High bytes of b
90ce3da70b43 Initial load duke parents: diff changeset	2524	psrlq mm3, ShiftRem // shift last 3 bytes to 1st 3 bytes
90ce3da70b43 Initial load duke parents: diff changeset	2525	// pav = p - a = (a + b - c) - a = b - c
90ce3da70b43 Initial load duke parents: diff changeset	2526	movq mm4, mm2
90ce3da70b43 Initial load duke parents: diff changeset	2527	punpcklbw mm3, mm0 // Unpack High bytes of c
90ce3da70b43 Initial load duke parents: diff changeset	2528	// pbv = p - b = (a + b - c) - b = a - c
90ce3da70b43 Initial load duke parents: diff changeset	2529	movq mm5, mm1
90ce3da70b43 Initial load duke parents: diff changeset	2530	psubw mm4, mm3
90ce3da70b43 Initial load duke parents: diff changeset	2531	pxor mm7, mm7
90ce3da70b43 Initial load duke parents: diff changeset	2532	// pcv = p - c = (a + b - c) -c = (a - c) + (b - c) = pav + pbv
90ce3da70b43 Initial load duke parents: diff changeset	2533	movq mm6, mm4
90ce3da70b43 Initial load duke parents: diff changeset	2534	psubw mm5, mm3
90ce3da70b43 Initial load duke parents: diff changeset	2535
90ce3da70b43 Initial load duke parents: diff changeset	2536	// pa = abs(p-a) = abs(pav)
90ce3da70b43 Initial load duke parents: diff changeset	2537	// pb = abs(p-b) = abs(pbv)
90ce3da70b43 Initial load duke parents: diff changeset	2538	// pc = abs(p-c) = abs(pcv)
90ce3da70b43 Initial load duke parents: diff changeset	2539	pcmpgtw mm0, mm4 // Create mask pav bytes < 0
90ce3da70b43 Initial load duke parents: diff changeset	2540	paddw mm6, mm5
90ce3da70b43 Initial load duke parents: diff changeset	2541	pand mm0, mm4 // Only pav bytes < 0 in mm7
90ce3da70b43 Initial load duke parents: diff changeset	2542	pcmpgtw mm7, mm5 // Create mask pbv bytes < 0
90ce3da70b43 Initial load duke parents: diff changeset	2543	psubw mm4, mm0
90ce3da70b43 Initial load duke parents: diff changeset	2544	pand mm7, mm5 // Only pbv bytes < 0 in mm0
90ce3da70b43 Initial load duke parents: diff changeset	2545	psubw mm4, mm0
90ce3da70b43 Initial load duke parents: diff changeset	2546	psubw mm5, mm7
90ce3da70b43 Initial load duke parents: diff changeset	2547	pxor mm0, mm0
90ce3da70b43 Initial load duke parents: diff changeset	2548	pcmpgtw mm0, mm6 // Create mask pcv bytes < 0
90ce3da70b43 Initial load duke parents: diff changeset	2549	pand mm0, mm6 // Only pav bytes < 0 in mm7
90ce3da70b43 Initial load duke parents: diff changeset	2550	psubw mm5, mm7
90ce3da70b43 Initial load duke parents: diff changeset	2551	psubw mm6, mm0
90ce3da70b43 Initial load duke parents: diff changeset	2552	// test pa <= pb
90ce3da70b43 Initial load duke parents: diff changeset	2553	movq mm7, mm4
90ce3da70b43 Initial load duke parents: diff changeset	2554	psubw mm6, mm0
90ce3da70b43 Initial load duke parents: diff changeset	2555	pcmpgtw mm7, mm5 // pa > pb?
90ce3da70b43 Initial load duke parents: diff changeset	2556	movq mm0, mm7
90ce3da70b43 Initial load duke parents: diff changeset	2557	// use mm7 mask to merge pa & pb
90ce3da70b43 Initial load duke parents: diff changeset	2558	pand mm5, mm7
90ce3da70b43 Initial load duke parents: diff changeset	2559	// use mm0 mask copy to merge a & b
90ce3da70b43 Initial load duke parents: diff changeset	2560	pand mm2, mm0
90ce3da70b43 Initial load duke parents: diff changeset	2561	pandn mm7, mm4
90ce3da70b43 Initial load duke parents: diff changeset	2562	pandn mm0, mm1
90ce3da70b43 Initial load duke parents: diff changeset	2563	paddw mm7, mm5
90ce3da70b43 Initial load duke parents: diff changeset	2564	paddw mm0, mm2
90ce3da70b43 Initial load duke parents: diff changeset	2565	// test ((pa <= pb)? pa:pb) <= pc
90ce3da70b43 Initial load duke parents: diff changeset	2566	pcmpgtw mm7, mm6 // pab > pc?
90ce3da70b43 Initial load duke parents: diff changeset	2567	pxor mm1, mm1
90ce3da70b43 Initial load duke parents: diff changeset	2568	pand mm3, mm7
90ce3da70b43 Initial load duke parents: diff changeset	2569	pandn mm7, mm0
90ce3da70b43 Initial load duke parents: diff changeset	2570	paddw mm7, mm3
90ce3da70b43 Initial load duke parents: diff changeset	2571	pxor mm0, mm0
90ce3da70b43 Initial load duke parents: diff changeset	2572	packuswb mm7, mm1
90ce3da70b43 Initial load duke parents: diff changeset	2573	movq mm3, [esi + ebx] // load c=Prior(x-bpp)
90ce3da70b43 Initial load duke parents: diff changeset	2574	pand mm7, ActiveMask
90ce3da70b43 Initial load duke parents: diff changeset	2575	movq mm2, mm3 // load b=Prior(x) step 1
90ce3da70b43 Initial load duke parents: diff changeset	2576	paddb mm7, [edi + ebx] // add Paeth predictor with Raw(x)
90ce3da70b43 Initial load duke parents: diff changeset	2577	punpcklbw mm3, mm0 // Unpack High bytes of c
90ce3da70b43 Initial load duke parents: diff changeset	2578	movq [edi + ebx], mm7 // write back updated value
90ce3da70b43 Initial load duke parents: diff changeset	2579	movq mm1, mm7 // Now mm1 will be used as Raw(x-bpp)
90ce3da70b43 Initial load duke parents: diff changeset	2580	// Now do Paeth for 2nd set of bytes (3-5)
90ce3da70b43 Initial load duke parents: diff changeset	2581	psrlq mm2, ShiftBpp // load b=Prior(x) step 2
90ce3da70b43 Initial load duke parents: diff changeset	2582	punpcklbw mm1, mm0 // Unpack High bytes of a
90ce3da70b43 Initial load duke parents: diff changeset	2583	pxor mm7, mm7
90ce3da70b43 Initial load duke parents: diff changeset	2584	punpcklbw mm2, mm0 // Unpack High bytes of b
90ce3da70b43 Initial load duke parents: diff changeset	2585	// pbv = p - b = (a + b - c) - b = a - c
90ce3da70b43 Initial load duke parents: diff changeset	2586	movq mm5, mm1
90ce3da70b43 Initial load duke parents: diff changeset	2587	// pav = p - a = (a + b - c) - a = b - c
90ce3da70b43 Initial load duke parents: diff changeset	2588	movq mm4, mm2
90ce3da70b43 Initial load duke parents: diff changeset	2589	psubw mm5, mm3
90ce3da70b43 Initial load duke parents: diff changeset	2590	psubw mm4, mm3
90ce3da70b43 Initial load duke parents: diff changeset	2591	// pcv = p - c = (a + b - c) -c = (a - c) + (b - c) =
90ce3da70b43 Initial load duke parents: diff changeset	2592	// pav + pbv = pbv + pav
90ce3da70b43 Initial load duke parents: diff changeset	2593	movq mm6, mm5
90ce3da70b43 Initial load duke parents: diff changeset	2594	paddw mm6, mm4
90ce3da70b43 Initial load duke parents: diff changeset	2595
90ce3da70b43 Initial load duke parents: diff changeset	2596	// pa = abs(p-a) = abs(pav)
90ce3da70b43 Initial load duke parents: diff changeset	2597	// pb = abs(p-b) = abs(pbv)
90ce3da70b43 Initial load duke parents: diff changeset	2598	// pc = abs(p-c) = abs(pcv)
90ce3da70b43 Initial load duke parents: diff changeset	2599	pcmpgtw mm0, mm5 // Create mask pbv bytes < 0
90ce3da70b43 Initial load duke parents: diff changeset	2600	pcmpgtw mm7, mm4 // Create mask pav bytes < 0
90ce3da70b43 Initial load duke parents: diff changeset	2601	pand mm0, mm5 // Only pbv bytes < 0 in mm0
90ce3da70b43 Initial load duke parents: diff changeset	2602	pand mm7, mm4 // Only pav bytes < 0 in mm7
90ce3da70b43 Initial load duke parents: diff changeset	2603	psubw mm5, mm0
90ce3da70b43 Initial load duke parents: diff changeset	2604	psubw mm4, mm7
90ce3da70b43 Initial load duke parents: diff changeset	2605	psubw mm5, mm0
90ce3da70b43 Initial load duke parents: diff changeset	2606	psubw mm4, mm7
90ce3da70b43 Initial load duke parents: diff changeset	2607	pxor mm0, mm0
90ce3da70b43 Initial load duke parents: diff changeset	2608	pcmpgtw mm0, mm6 // Create mask pcv bytes < 0
90ce3da70b43 Initial load duke parents: diff changeset	2609	pand mm0, mm6 // Only pav bytes < 0 in mm7
90ce3da70b43 Initial load duke parents: diff changeset	2610	psubw mm6, mm0
90ce3da70b43 Initial load duke parents: diff changeset	2611	// test pa <= pb
90ce3da70b43 Initial load duke parents: diff changeset	2612	movq mm7, mm4
90ce3da70b43 Initial load duke parents: diff changeset	2613	psubw mm6, mm0
90ce3da70b43 Initial load duke parents: diff changeset	2614	pcmpgtw mm7, mm5 // pa > pb?
90ce3da70b43 Initial load duke parents: diff changeset	2615	movq mm0, mm7
90ce3da70b43 Initial load duke parents: diff changeset	2616	// use mm7 mask to merge pa & pb
90ce3da70b43 Initial load duke parents: diff changeset	2617	pand mm5, mm7
90ce3da70b43 Initial load duke parents: diff changeset	2618	// use mm0 mask copy to merge a & b
90ce3da70b43 Initial load duke parents: diff changeset	2619	pand mm2, mm0
90ce3da70b43 Initial load duke parents: diff changeset	2620	pandn mm7, mm4
90ce3da70b43 Initial load duke parents: diff changeset	2621	pandn mm0, mm1
90ce3da70b43 Initial load duke parents: diff changeset	2622	paddw mm7, mm5
90ce3da70b43 Initial load duke parents: diff changeset	2623	paddw mm0, mm2
90ce3da70b43 Initial load duke parents: diff changeset	2624	// test ((pa <= pb)? pa:pb) <= pc
90ce3da70b43 Initial load duke parents: diff changeset	2625	pcmpgtw mm7, mm6 // pab > pc?
90ce3da70b43 Initial load duke parents: diff changeset	2626	movq mm2, [esi + ebx] // load b=Prior(x)
90ce3da70b43 Initial load duke parents: diff changeset	2627	pand mm3, mm7
90ce3da70b43 Initial load duke parents: diff changeset	2628	pandn mm7, mm0
90ce3da70b43 Initial load duke parents: diff changeset	2629	pxor mm1, mm1
90ce3da70b43 Initial load duke parents: diff changeset	2630	paddw mm7, mm3
90ce3da70b43 Initial load duke parents: diff changeset	2631	pxor mm0, mm0
90ce3da70b43 Initial load duke parents: diff changeset	2632	packuswb mm7, mm1
90ce3da70b43 Initial load duke parents: diff changeset	2633	movq mm3, mm2 // load c=Prior(x-bpp) step 1
90ce3da70b43 Initial load duke parents: diff changeset	2634	pand mm7, ActiveMask
90ce3da70b43 Initial load duke parents: diff changeset	2635	punpckhbw mm2, mm0 // Unpack High bytes of b
90ce3da70b43 Initial load duke parents: diff changeset	2636	psllq mm7, ShiftBpp // Shift bytes to 2nd group of 3 bytes
90ce3da70b43 Initial load duke parents: diff changeset	2637	// pav = p - a = (a + b - c) - a = b - c
90ce3da70b43 Initial load duke parents: diff changeset	2638	movq mm4, mm2
90ce3da70b43 Initial load duke parents: diff changeset	2639	paddb mm7, [edi + ebx] // add Paeth predictor with Raw(x)
90ce3da70b43 Initial load duke parents: diff changeset	2640	psllq mm3, ShiftBpp // load c=Prior(x-bpp) step 2
90ce3da70b43 Initial load duke parents: diff changeset	2641	movq [edi + ebx], mm7 // write back updated value
90ce3da70b43 Initial load duke parents: diff changeset	2642	movq mm1, mm7
90ce3da70b43 Initial load duke parents: diff changeset	2643	punpckhbw mm3, mm0 // Unpack High bytes of c
90ce3da70b43 Initial load duke parents: diff changeset	2644	psllq mm1, ShiftBpp // Shift bytes
90ce3da70b43 Initial load duke parents: diff changeset	2645	// Now mm1 will be used as Raw(x-bpp)
90ce3da70b43 Initial load duke parents: diff changeset	2646	// Now do Paeth for 3rd, and final, set of bytes (6-7)
90ce3da70b43 Initial load duke parents: diff changeset	2647	pxor mm7, mm7
90ce3da70b43 Initial load duke parents: diff changeset	2648	punpckhbw mm1, mm0 // Unpack High bytes of a
90ce3da70b43 Initial load duke parents: diff changeset	2649	psubw mm4, mm3
90ce3da70b43 Initial load duke parents: diff changeset	2650	// pbv = p - b = (a + b - c) - b = a - c
90ce3da70b43 Initial load duke parents: diff changeset	2651	movq mm5, mm1
90ce3da70b43 Initial load duke parents: diff changeset	2652	// pcv = p - c = (a + b - c) -c = (a - c) + (b - c) = pav + pbv
90ce3da70b43 Initial load duke parents: diff changeset	2653	movq mm6, mm4
90ce3da70b43 Initial load duke parents: diff changeset	2654	psubw mm5, mm3
90ce3da70b43 Initial load duke parents: diff changeset	2655	pxor mm0, mm0
90ce3da70b43 Initial load duke parents: diff changeset	2656	paddw mm6, mm5
90ce3da70b43 Initial load duke parents: diff changeset	2657
90ce3da70b43 Initial load duke parents: diff changeset	2658	// pa = abs(p-a) = abs(pav)
90ce3da70b43 Initial load duke parents: diff changeset	2659	// pb = abs(p-b) = abs(pbv)
90ce3da70b43 Initial load duke parents: diff changeset	2660	// pc = abs(p-c) = abs(pcv)
90ce3da70b43 Initial load duke parents: diff changeset	2661	pcmpgtw mm0, mm4 // Create mask pav bytes < 0
90ce3da70b43 Initial load duke parents: diff changeset	2662	pcmpgtw mm7, mm5 // Create mask pbv bytes < 0
90ce3da70b43 Initial load duke parents: diff changeset	2663	pand mm0, mm4 // Only pav bytes < 0 in mm7
90ce3da70b43 Initial load duke parents: diff changeset	2664	pand mm7, mm5 // Only pbv bytes < 0 in mm0
90ce3da70b43 Initial load duke parents: diff changeset	2665	psubw mm4, mm0
90ce3da70b43 Initial load duke parents: diff changeset	2666	psubw mm5, mm7
90ce3da70b43 Initial load duke parents: diff changeset	2667	psubw mm4, mm0
90ce3da70b43 Initial load duke parents: diff changeset	2668	psubw mm5, mm7
90ce3da70b43 Initial load duke parents: diff changeset	2669	pxor mm0, mm0
90ce3da70b43 Initial load duke parents: diff changeset	2670	pcmpgtw mm0, mm6 // Create mask pcv bytes < 0
90ce3da70b43 Initial load duke parents: diff changeset	2671	pand mm0, mm6 // Only pav bytes < 0 in mm7
90ce3da70b43 Initial load duke parents: diff changeset	2672	psubw mm6, mm0
90ce3da70b43 Initial load duke parents: diff changeset	2673	// test pa <= pb
90ce3da70b43 Initial load duke parents: diff changeset	2674	movq mm7, mm4
90ce3da70b43 Initial load duke parents: diff changeset	2675	psubw mm6, mm0
90ce3da70b43 Initial load duke parents: diff changeset	2676	pcmpgtw mm7, mm5 // pa > pb?
90ce3da70b43 Initial load duke parents: diff changeset	2677	movq mm0, mm7
90ce3da70b43 Initial load duke parents: diff changeset	2678	// use mm0 mask copy to merge a & b
90ce3da70b43 Initial load duke parents: diff changeset	2679	pand mm2, mm0
90ce3da70b43 Initial load duke parents: diff changeset	2680	// use mm7 mask to merge pa & pb
90ce3da70b43 Initial load duke parents: diff changeset	2681	pand mm5, mm7
90ce3da70b43 Initial load duke parents: diff changeset	2682	pandn mm0, mm1
90ce3da70b43 Initial load duke parents: diff changeset	2683	pandn mm7, mm4
90ce3da70b43 Initial load duke parents: diff changeset	2684	paddw mm0, mm2
90ce3da70b43 Initial load duke parents: diff changeset	2685	paddw mm7, mm5
90ce3da70b43 Initial load duke parents: diff changeset	2686	// test ((pa <= pb)? pa:pb) <= pc
90ce3da70b43 Initial load duke parents: diff changeset	2687	pcmpgtw mm7, mm6 // pab > pc?
90ce3da70b43 Initial load duke parents: diff changeset	2688	pand mm3, mm7
90ce3da70b43 Initial load duke parents: diff changeset	2689	pandn mm7, mm0
90ce3da70b43 Initial load duke parents: diff changeset	2690	paddw mm7, mm3
90ce3da70b43 Initial load duke parents: diff changeset	2691	pxor mm1, mm1
90ce3da70b43 Initial load duke parents: diff changeset	2692	packuswb mm1, mm7
90ce3da70b43 Initial load duke parents: diff changeset	2693	// Step ebx to next set of 8 bytes and repeat loop til done
90ce3da70b43 Initial load duke parents: diff changeset	2694	add ebx, 8
90ce3da70b43 Initial load duke parents: diff changeset	2695	pand mm1, ActiveMaskEnd
90ce3da70b43 Initial load duke parents: diff changeset	2696	paddb mm1, [edi + ebx - 8] // add Paeth predictor with Raw(x)
90ce3da70b43 Initial load duke parents: diff changeset	2697
90ce3da70b43 Initial load duke parents: diff changeset	2698	cmp ebx, MMXLength
90ce3da70b43 Initial load duke parents: diff changeset	2699	pxor mm0, mm0 // pxor does not affect flags
90ce3da70b43 Initial load duke parents: diff changeset	2700	movq [edi + ebx - 8], mm1 // write back updated value
90ce3da70b43 Initial load duke parents: diff changeset	2701	// mm1 will be used as Raw(x-bpp) next loop
90ce3da70b43 Initial load duke parents: diff changeset	2702	// mm3 ready to be used as Prior(x-bpp) next loop
90ce3da70b43 Initial load duke parents: diff changeset	2703	jb dpth3lp
90ce3da70b43 Initial load duke parents: diff changeset	2704	} // end _asm block
90ce3da70b43 Initial load duke parents: diff changeset	2705	}
90ce3da70b43 Initial load duke parents: diff changeset	2706	break;
90ce3da70b43 Initial load duke parents: diff changeset	2707
90ce3da70b43 Initial load duke parents: diff changeset	2708	case 6:
90ce3da70b43 Initial load duke parents: diff changeset	2709	case 7:
90ce3da70b43 Initial load duke parents: diff changeset	2710	case 5:
90ce3da70b43 Initial load duke parents: diff changeset	2711	{
90ce3da70b43 Initial load duke parents: diff changeset	2712	ActiveMask.use = 0x00000000ffffffff;
90ce3da70b43 Initial load duke parents: diff changeset	2713	ActiveMask2.use = 0xffffffff00000000;
90ce3da70b43 Initial load duke parents: diff changeset	2714	ShiftBpp.use = bpp << 3; // == bpp * 8
90ce3da70b43 Initial load duke parents: diff changeset	2715	ShiftRem.use = 64 - ShiftBpp.use;
90ce3da70b43 Initial load duke parents: diff changeset	2716	_asm
90ce3da70b43 Initial load duke parents: diff changeset	2717	{
90ce3da70b43 Initial load duke parents: diff changeset	2718	mov ebx, diff
90ce3da70b43 Initial load duke parents: diff changeset	2719	mov edi, row
90ce3da70b43 Initial load duke parents: diff changeset	2720	mov esi, prev_row
90ce3da70b43 Initial load duke parents: diff changeset	2721	// PRIME the pump (load the first Raw(x-bpp) data set
90ce3da70b43 Initial load duke parents: diff changeset	2722	movq mm1, [edi+ebx-8]
90ce3da70b43 Initial load duke parents: diff changeset	2723	pxor mm0, mm0
90ce3da70b43 Initial load duke parents: diff changeset	2724	dpth6lp:
90ce3da70b43 Initial load duke parents: diff changeset	2725	// Must shift to position Raw(x-bpp) data
90ce3da70b43 Initial load duke parents: diff changeset	2726	psrlq mm1, ShiftRem
90ce3da70b43 Initial load duke parents: diff changeset	2727	// Do first set of 4 bytes
90ce3da70b43 Initial load duke parents: diff changeset	2728	movq mm3, [esi+ebx-8] // read c=Prior(x-bpp) bytes
90ce3da70b43 Initial load duke parents: diff changeset	2729	punpcklbw mm1, mm0 // Unpack Low bytes of a
90ce3da70b43 Initial load duke parents: diff changeset	2730	movq mm2, [esi + ebx] // load b=Prior(x)
90ce3da70b43 Initial load duke parents: diff changeset	2731	punpcklbw mm2, mm0 // Unpack Low bytes of b
90ce3da70b43 Initial load duke parents: diff changeset	2732	// Must shift to position Prior(x-bpp) data
90ce3da70b43 Initial load duke parents: diff changeset	2733	psrlq mm3, ShiftRem
90ce3da70b43 Initial load duke parents: diff changeset	2734	// pav = p - a = (a + b - c) - a = b - c
90ce3da70b43 Initial load duke parents: diff changeset	2735	movq mm4, mm2
90ce3da70b43 Initial load duke parents: diff changeset	2736	punpcklbw mm3, mm0 // Unpack Low bytes of c
90ce3da70b43 Initial load duke parents: diff changeset	2737	// pbv = p - b = (a + b - c) - b = a - c
90ce3da70b43 Initial load duke parents: diff changeset	2738	movq mm5, mm1
90ce3da70b43 Initial load duke parents: diff changeset	2739	psubw mm4, mm3
90ce3da70b43 Initial load duke parents: diff changeset	2740	pxor mm7, mm7
90ce3da70b43 Initial load duke parents: diff changeset	2741	// pcv = p - c = (a + b - c) -c = (a - c) + (b - c) = pav + pbv
90ce3da70b43 Initial load duke parents: diff changeset	2742	movq mm6, mm4
90ce3da70b43 Initial load duke parents: diff changeset	2743	psubw mm5, mm3
90ce3da70b43 Initial load duke parents: diff changeset	2744	// pa = abs(p-a) = abs(pav)
90ce3da70b43 Initial load duke parents: diff changeset	2745	// pb = abs(p-b) = abs(pbv)
90ce3da70b43 Initial load duke parents: diff changeset	2746	// pc = abs(p-c) = abs(pcv)
90ce3da70b43 Initial load duke parents: diff changeset	2747	pcmpgtw mm0, mm4 // Create mask pav bytes < 0
90ce3da70b43 Initial load duke parents: diff changeset	2748	paddw mm6, mm5
90ce3da70b43 Initial load duke parents: diff changeset	2749	pand mm0, mm4 // Only pav bytes < 0 in mm7
90ce3da70b43 Initial load duke parents: diff changeset	2750	pcmpgtw mm7, mm5 // Create mask pbv bytes < 0
90ce3da70b43 Initial load duke parents: diff changeset	2751	psubw mm4, mm0
90ce3da70b43 Initial load duke parents: diff changeset	2752	pand mm7, mm5 // Only pbv bytes < 0 in mm0
90ce3da70b43 Initial load duke parents: diff changeset	2753	psubw mm4, mm0
90ce3da70b43 Initial load duke parents: diff changeset	2754	psubw mm5, mm7
90ce3da70b43 Initial load duke parents: diff changeset	2755	pxor mm0, mm0
90ce3da70b43 Initial load duke parents: diff changeset	2756	pcmpgtw mm0, mm6 // Create mask pcv bytes < 0
90ce3da70b43 Initial load duke parents: diff changeset	2757	pand mm0, mm6 // Only pav bytes < 0 in mm7
90ce3da70b43 Initial load duke parents: diff changeset	2758	psubw mm5, mm7
90ce3da70b43 Initial load duke parents: diff changeset	2759	psubw mm6, mm0
90ce3da70b43 Initial load duke parents: diff changeset	2760	// test pa <= pb
90ce3da70b43 Initial load duke parents: diff changeset	2761	movq mm7, mm4
90ce3da70b43 Initial load duke parents: diff changeset	2762	psubw mm6, mm0
90ce3da70b43 Initial load duke parents: diff changeset	2763	pcmpgtw mm7, mm5 // pa > pb?
90ce3da70b43 Initial load duke parents: diff changeset	2764	movq mm0, mm7
90ce3da70b43 Initial load duke parents: diff changeset	2765	// use mm7 mask to merge pa & pb
90ce3da70b43 Initial load duke parents: diff changeset	2766	pand mm5, mm7
90ce3da70b43 Initial load duke parents: diff changeset	2767	// use mm0 mask copy to merge a & b
90ce3da70b43 Initial load duke parents: diff changeset	2768	pand mm2, mm0
90ce3da70b43 Initial load duke parents: diff changeset	2769	pandn mm7, mm4
90ce3da70b43 Initial load duke parents: diff changeset	2770	pandn mm0, mm1
90ce3da70b43 Initial load duke parents: diff changeset	2771	paddw mm7, mm5
90ce3da70b43 Initial load duke parents: diff changeset	2772	paddw mm0, mm2
90ce3da70b43 Initial load duke parents: diff changeset	2773	// test ((pa <= pb)? pa:pb) <= pc
90ce3da70b43 Initial load duke parents: diff changeset	2774	pcmpgtw mm7, mm6 // pab > pc?
90ce3da70b43 Initial load duke parents: diff changeset	2775	pxor mm1, mm1
90ce3da70b43 Initial load duke parents: diff changeset	2776	pand mm3, mm7
90ce3da70b43 Initial load duke parents: diff changeset	2777	pandn mm7, mm0
90ce3da70b43 Initial load duke parents: diff changeset	2778	paddw mm7, mm3
90ce3da70b43 Initial load duke parents: diff changeset	2779	pxor mm0, mm0
90ce3da70b43 Initial load duke parents: diff changeset	2780	packuswb mm7, mm1
90ce3da70b43 Initial load duke parents: diff changeset	2781	movq mm3, [esi + ebx - 8] // load c=Prior(x-bpp)
90ce3da70b43 Initial load duke parents: diff changeset	2782	pand mm7, ActiveMask
90ce3da70b43 Initial load duke parents: diff changeset	2783	psrlq mm3, ShiftRem
90ce3da70b43 Initial load duke parents: diff changeset	2784	movq mm2, [esi + ebx] // load b=Prior(x) step 1
90ce3da70b43 Initial load duke parents: diff changeset	2785	paddb mm7, [edi + ebx] // add Paeth predictor with Raw(x)
90ce3da70b43 Initial load duke parents: diff changeset	2786	movq mm6, mm2
90ce3da70b43 Initial load duke parents: diff changeset	2787	movq [edi + ebx], mm7 // write back updated value
90ce3da70b43 Initial load duke parents: diff changeset	2788	movq mm1, [edi+ebx-8]
90ce3da70b43 Initial load duke parents: diff changeset	2789	psllq mm6, ShiftBpp
90ce3da70b43 Initial load duke parents: diff changeset	2790	movq mm5, mm7
90ce3da70b43 Initial load duke parents: diff changeset	2791	psrlq mm1, ShiftRem
90ce3da70b43 Initial load duke parents: diff changeset	2792	por mm3, mm6
90ce3da70b43 Initial load duke parents: diff changeset	2793	psllq mm5, ShiftBpp
90ce3da70b43 Initial load duke parents: diff changeset	2794	punpckhbw mm3, mm0 // Unpack High bytes of c
90ce3da70b43 Initial load duke parents: diff changeset	2795	por mm1, mm5
90ce3da70b43 Initial load duke parents: diff changeset	2796	// Do second set of 4 bytes
90ce3da70b43 Initial load duke parents: diff changeset	2797	punpckhbw mm2, mm0 // Unpack High bytes of b
90ce3da70b43 Initial load duke parents: diff changeset	2798	punpckhbw mm1, mm0 // Unpack High bytes of a
90ce3da70b43 Initial load duke parents: diff changeset	2799	// pav = p - a = (a + b - c) - a = b - c
90ce3da70b43 Initial load duke parents: diff changeset	2800	movq mm4, mm2
90ce3da70b43 Initial load duke parents: diff changeset	2801	// pbv = p - b = (a + b - c) - b = a - c
90ce3da70b43 Initial load duke parents: diff changeset	2802	movq mm5, mm1
90ce3da70b43 Initial load duke parents: diff changeset	2803	psubw mm4, mm3
90ce3da70b43 Initial load duke parents: diff changeset	2804	pxor mm7, mm7
90ce3da70b43 Initial load duke parents: diff changeset	2805	// pcv = p - c = (a + b - c) -c = (a - c) + (b - c) = pav + pbv
90ce3da70b43 Initial load duke parents: diff changeset	2806	movq mm6, mm4
90ce3da70b43 Initial load duke parents: diff changeset	2807	psubw mm5, mm3
90ce3da70b43 Initial load duke parents: diff changeset	2808	// pa = abs(p-a) = abs(pav)
90ce3da70b43 Initial load duke parents: diff changeset	2809	// pb = abs(p-b) = abs(pbv)
90ce3da70b43 Initial load duke parents: diff changeset	2810	// pc = abs(p-c) = abs(pcv)
90ce3da70b43 Initial load duke parents: diff changeset	2811	pcmpgtw mm0, mm4 // Create mask pav bytes < 0
90ce3da70b43 Initial load duke parents: diff changeset	2812	paddw mm6, mm5
90ce3da70b43 Initial load duke parents: diff changeset	2813	pand mm0, mm4 // Only pav bytes < 0 in mm7
90ce3da70b43 Initial load duke parents: diff changeset	2814	pcmpgtw mm7, mm5 // Create mask pbv bytes < 0
90ce3da70b43 Initial load duke parents: diff changeset	2815	psubw mm4, mm0
90ce3da70b43 Initial load duke parents: diff changeset	2816	pand mm7, mm5 // Only pbv bytes < 0 in mm0
90ce3da70b43 Initial load duke parents: diff changeset	2817	psubw mm4, mm0
90ce3da70b43 Initial load duke parents: diff changeset	2818	psubw mm5, mm7
90ce3da70b43 Initial load duke parents: diff changeset	2819	pxor mm0, mm0
90ce3da70b43 Initial load duke parents: diff changeset	2820	pcmpgtw mm0, mm6 // Create mask pcv bytes < 0
90ce3da70b43 Initial load duke parents: diff changeset	2821	pand mm0, mm6 // Only pav bytes < 0 in mm7
90ce3da70b43 Initial load duke parents: diff changeset	2822	psubw mm5, mm7
90ce3da70b43 Initial load duke parents: diff changeset	2823	psubw mm6, mm0
90ce3da70b43 Initial load duke parents: diff changeset	2824	// test pa <= pb
90ce3da70b43 Initial load duke parents: diff changeset	2825	movq mm7, mm4
90ce3da70b43 Initial load duke parents: diff changeset	2826	psubw mm6, mm0
90ce3da70b43 Initial load duke parents: diff changeset	2827	pcmpgtw mm7, mm5 // pa > pb?
90ce3da70b43 Initial load duke parents: diff changeset	2828	movq mm0, mm7
90ce3da70b43 Initial load duke parents: diff changeset	2829	// use mm7 mask to merge pa & pb
90ce3da70b43 Initial load duke parents: diff changeset	2830	pand mm5, mm7
90ce3da70b43 Initial load duke parents: diff changeset	2831	// use mm0 mask copy to merge a & b
90ce3da70b43 Initial load duke parents: diff changeset	2832	pand mm2, mm0
90ce3da70b43 Initial load duke parents: diff changeset	2833	pandn mm7, mm4
90ce3da70b43 Initial load duke parents: diff changeset	2834	pandn mm0, mm1
90ce3da70b43 Initial load duke parents: diff changeset	2835	paddw mm7, mm5
90ce3da70b43 Initial load duke parents: diff changeset	2836	paddw mm0, mm2
90ce3da70b43 Initial load duke parents: diff changeset	2837	// test ((pa <= pb)? pa:pb) <= pc
90ce3da70b43 Initial load duke parents: diff changeset	2838	pcmpgtw mm7, mm6 // pab > pc?
90ce3da70b43 Initial load duke parents: diff changeset	2839	pxor mm1, mm1
90ce3da70b43 Initial load duke parents: diff changeset	2840	pand mm3, mm7
90ce3da70b43 Initial load duke parents: diff changeset	2841	pandn mm7, mm0
90ce3da70b43 Initial load duke parents: diff changeset	2842	pxor mm1, mm1
90ce3da70b43 Initial load duke parents: diff changeset	2843	paddw mm7, mm3
90ce3da70b43 Initial load duke parents: diff changeset	2844	pxor mm0, mm0
90ce3da70b43 Initial load duke parents: diff changeset	2845	// Step ex to next set of 8 bytes and repeat loop til done
90ce3da70b43 Initial load duke parents: diff changeset	2846	add ebx, 8
90ce3da70b43 Initial load duke parents: diff changeset	2847	packuswb mm1, mm7
90ce3da70b43 Initial load duke parents: diff changeset	2848	paddb mm1, [edi + ebx - 8] // add Paeth predictor with Raw(x)
90ce3da70b43 Initial load duke parents: diff changeset	2849	cmp ebx, MMXLength
90ce3da70b43 Initial load duke parents: diff changeset	2850	movq [edi + ebx - 8], mm1 // write back updated value
90ce3da70b43 Initial load duke parents: diff changeset	2851	// mm1 will be used as Raw(x-bpp) next loop
90ce3da70b43 Initial load duke parents: diff changeset	2852	jb dpth6lp
90ce3da70b43 Initial load duke parents: diff changeset	2853	} // end _asm block
90ce3da70b43 Initial load duke parents: diff changeset	2854	}
90ce3da70b43 Initial load duke parents: diff changeset	2855	break;
90ce3da70b43 Initial load duke parents: diff changeset	2856
90ce3da70b43 Initial load duke parents: diff changeset	2857	case 4:
90ce3da70b43 Initial load duke parents: diff changeset	2858	{
90ce3da70b43 Initial load duke parents: diff changeset	2859	ActiveMask.use = 0x00000000ffffffff;
90ce3da70b43 Initial load duke parents: diff changeset	2860	_asm {
90ce3da70b43 Initial load duke parents: diff changeset	2861	mov ebx, diff
90ce3da70b43 Initial load duke parents: diff changeset	2862	mov edi, row
90ce3da70b43 Initial load duke parents: diff changeset	2863	mov esi, prev_row
90ce3da70b43 Initial load duke parents: diff changeset	2864	pxor mm0, mm0
90ce3da70b43 Initial load duke parents: diff changeset	2865	// PRIME the pump (load the first Raw(x-bpp) data set
90ce3da70b43 Initial load duke parents: diff changeset	2866	movq mm1, [edi+ebx-8] // Only time should need to read
90ce3da70b43 Initial load duke parents: diff changeset	2867	// a=Raw(x-bpp) bytes
90ce3da70b43 Initial load duke parents: diff changeset	2868	dpth4lp:
90ce3da70b43 Initial load duke parents: diff changeset	2869	// Do first set of 4 bytes
90ce3da70b43 Initial load duke parents: diff changeset	2870	movq mm3, [esi+ebx-8] // read c=Prior(x-bpp) bytes
90ce3da70b43 Initial load duke parents: diff changeset	2871	punpckhbw mm1, mm0 // Unpack Low bytes of a
90ce3da70b43 Initial load duke parents: diff changeset	2872	movq mm2, [esi + ebx] // load b=Prior(x)
90ce3da70b43 Initial load duke parents: diff changeset	2873	punpcklbw mm2, mm0 // Unpack High bytes of b
90ce3da70b43 Initial load duke parents: diff changeset	2874	// pav = p - a = (a + b - c) - a = b - c
90ce3da70b43 Initial load duke parents: diff changeset	2875	movq mm4, mm2
90ce3da70b43 Initial load duke parents: diff changeset	2876	punpckhbw mm3, mm0 // Unpack High bytes of c
90ce3da70b43 Initial load duke parents: diff changeset	2877	// pbv = p - b = (a + b - c) - b = a - c
90ce3da70b43 Initial load duke parents: diff changeset	2878	movq mm5, mm1
90ce3da70b43 Initial load duke parents: diff changeset	2879	psubw mm4, mm3
90ce3da70b43 Initial load duke parents: diff changeset	2880	pxor mm7, mm7
90ce3da70b43 Initial load duke parents: diff changeset	2881	// pcv = p - c = (a + b - c) -c = (a - c) + (b - c) = pav + pbv
90ce3da70b43 Initial load duke parents: diff changeset	2882	movq mm6, mm4
90ce3da70b43 Initial load duke parents: diff changeset	2883	psubw mm5, mm3
90ce3da70b43 Initial load duke parents: diff changeset	2884	// pa = abs(p-a) = abs(pav)
90ce3da70b43 Initial load duke parents: diff changeset	2885	// pb = abs(p-b) = abs(pbv)
90ce3da70b43 Initial load duke parents: diff changeset	2886	// pc = abs(p-c) = abs(pcv)
90ce3da70b43 Initial load duke parents: diff changeset	2887	pcmpgtw mm0, mm4 // Create mask pav bytes < 0
90ce3da70b43 Initial load duke parents: diff changeset	2888	paddw mm6, mm5
90ce3da70b43 Initial load duke parents: diff changeset	2889	pand mm0, mm4 // Only pav bytes < 0 in mm7
90ce3da70b43 Initial load duke parents: diff changeset	2890	pcmpgtw mm7, mm5 // Create mask pbv bytes < 0
90ce3da70b43 Initial load duke parents: diff changeset	2891	psubw mm4, mm0
90ce3da70b43 Initial load duke parents: diff changeset	2892	pand mm7, mm5 // Only pbv bytes < 0 in mm0
90ce3da70b43 Initial load duke parents: diff changeset	2893	psubw mm4, mm0
90ce3da70b43 Initial load duke parents: diff changeset	2894	psubw mm5, mm7
90ce3da70b43 Initial load duke parents: diff changeset	2895	pxor mm0, mm0
90ce3da70b43 Initial load duke parents: diff changeset	2896	pcmpgtw mm0, mm6 // Create mask pcv bytes < 0
90ce3da70b43 Initial load duke parents: diff changeset	2897	pand mm0, mm6 // Only pav bytes < 0 in mm7
90ce3da70b43 Initial load duke parents: diff changeset	2898	psubw mm5, mm7
90ce3da70b43 Initial load duke parents: diff changeset	2899	psubw mm6, mm0
90ce3da70b43 Initial load duke parents: diff changeset	2900	// test pa <= pb
90ce3da70b43 Initial load duke parents: diff changeset	2901	movq mm7, mm4
90ce3da70b43 Initial load duke parents: diff changeset	2902	psubw mm6, mm0
90ce3da70b43 Initial load duke parents: diff changeset	2903	pcmpgtw mm7, mm5 // pa > pb?
90ce3da70b43 Initial load duke parents: diff changeset	2904	movq mm0, mm7
90ce3da70b43 Initial load duke parents: diff changeset	2905	// use mm7 mask to merge pa & pb
90ce3da70b43 Initial load duke parents: diff changeset	2906	pand mm5, mm7
90ce3da70b43 Initial load duke parents: diff changeset	2907	// use mm0 mask copy to merge a & b
90ce3da70b43 Initial load duke parents: diff changeset	2908	pand mm2, mm0
90ce3da70b43 Initial load duke parents: diff changeset	2909	pandn mm7, mm4
90ce3da70b43 Initial load duke parents: diff changeset	2910	pandn mm0, mm1
90ce3da70b43 Initial load duke parents: diff changeset	2911	paddw mm7, mm5
90ce3da70b43 Initial load duke parents: diff changeset	2912	paddw mm0, mm2
90ce3da70b43 Initial load duke parents: diff changeset	2913	// test ((pa <= pb)? pa:pb) <= pc
90ce3da70b43 Initial load duke parents: diff changeset	2914	pcmpgtw mm7, mm6 // pab > pc?
90ce3da70b43 Initial load duke parents: diff changeset	2915	pxor mm1, mm1
90ce3da70b43 Initial load duke parents: diff changeset	2916	pand mm3, mm7
90ce3da70b43 Initial load duke parents: diff changeset	2917	pandn mm7, mm0
90ce3da70b43 Initial load duke parents: diff changeset	2918	paddw mm7, mm3
90ce3da70b43 Initial load duke parents: diff changeset	2919	pxor mm0, mm0
90ce3da70b43 Initial load duke parents: diff changeset	2920	packuswb mm7, mm1
90ce3da70b43 Initial load duke parents: diff changeset	2921	movq mm3, [esi + ebx] // load c=Prior(x-bpp)
90ce3da70b43 Initial load duke parents: diff changeset	2922	pand mm7, ActiveMask
90ce3da70b43 Initial load duke parents: diff changeset	2923	movq mm2, mm3 // load b=Prior(x) step 1
90ce3da70b43 Initial load duke parents: diff changeset	2924	paddb mm7, [edi + ebx] // add Paeth predictor with Raw(x)
90ce3da70b43 Initial load duke parents: diff changeset	2925	punpcklbw mm3, mm0 // Unpack High bytes of c
90ce3da70b43 Initial load duke parents: diff changeset	2926	movq [edi + ebx], mm7 // write back updated value
90ce3da70b43 Initial load duke parents: diff changeset	2927	movq mm1, mm7 // Now mm1 will be used as Raw(x-bpp)
90ce3da70b43 Initial load duke parents: diff changeset	2928	// Do second set of 4 bytes
90ce3da70b43 Initial load duke parents: diff changeset	2929	punpckhbw mm2, mm0 // Unpack Low bytes of b
90ce3da70b43 Initial load duke parents: diff changeset	2930	punpcklbw mm1, mm0 // Unpack Low bytes of a
90ce3da70b43 Initial load duke parents: diff changeset	2931	// pav = p - a = (a + b - c) - a = b - c
90ce3da70b43 Initial load duke parents: diff changeset	2932	movq mm4, mm2
90ce3da70b43 Initial load duke parents: diff changeset	2933	// pbv = p - b = (a + b - c) - b = a - c
90ce3da70b43 Initial load duke parents: diff changeset	2934	movq mm5, mm1
90ce3da70b43 Initial load duke parents: diff changeset	2935	psubw mm4, mm3
90ce3da70b43 Initial load duke parents: diff changeset	2936	pxor mm7, mm7
90ce3da70b43 Initial load duke parents: diff changeset	2937	// pcv = p - c = (a + b - c) -c = (a - c) + (b - c) = pav + pbv
90ce3da70b43 Initial load duke parents: diff changeset	2938	movq mm6, mm4
90ce3da70b43 Initial load duke parents: diff changeset	2939	psubw mm5, mm3
90ce3da70b43 Initial load duke parents: diff changeset	2940	// pa = abs(p-a) = abs(pav)
90ce3da70b43 Initial load duke parents: diff changeset	2941	// pb = abs(p-b) = abs(pbv)
90ce3da70b43 Initial load duke parents: diff changeset	2942	// pc = abs(p-c) = abs(pcv)
90ce3da70b43 Initial load duke parents: diff changeset	2943	pcmpgtw mm0, mm4 // Create mask pav bytes < 0
90ce3da70b43 Initial load duke parents: diff changeset	2944	paddw mm6, mm5
90ce3da70b43 Initial load duke parents: diff changeset	2945	pand mm0, mm4 // Only pav bytes < 0 in mm7
90ce3da70b43 Initial load duke parents: diff changeset	2946	pcmpgtw mm7, mm5 // Create mask pbv bytes < 0
90ce3da70b43 Initial load duke parents: diff changeset	2947	psubw mm4, mm0
90ce3da70b43 Initial load duke parents: diff changeset	2948	pand mm7, mm5 // Only pbv bytes < 0 in mm0
90ce3da70b43 Initial load duke parents: diff changeset	2949	psubw mm4, mm0
90ce3da70b43 Initial load duke parents: diff changeset	2950	psubw mm5, mm7
90ce3da70b43 Initial load duke parents: diff changeset	2951	pxor mm0, mm0
90ce3da70b43 Initial load duke parents: diff changeset	2952	pcmpgtw mm0, mm6 // Create mask pcv bytes < 0
90ce3da70b43 Initial load duke parents: diff changeset	2953	pand mm0, mm6 // Only pav bytes < 0 in mm7
90ce3da70b43 Initial load duke parents: diff changeset	2954	psubw mm5, mm7
90ce3da70b43 Initial load duke parents: diff changeset	2955	psubw mm6, mm0
90ce3da70b43 Initial load duke parents: diff changeset	2956	// test pa <= pb
90ce3da70b43 Initial load duke parents: diff changeset	2957	movq mm7, mm4
90ce3da70b43 Initial load duke parents: diff changeset	2958	psubw mm6, mm0
90ce3da70b43 Initial load duke parents: diff changeset	2959	pcmpgtw mm7, mm5 // pa > pb?
90ce3da70b43 Initial load duke parents: diff changeset	2960	movq mm0, mm7
90ce3da70b43 Initial load duke parents: diff changeset	2961	// use mm7 mask to merge pa & pb
90ce3da70b43 Initial load duke parents: diff changeset	2962	pand mm5, mm7
90ce3da70b43 Initial load duke parents: diff changeset	2963	// use mm0 mask copy to merge a & b
90ce3da70b43 Initial load duke parents: diff changeset	2964	pand mm2, mm0
90ce3da70b43 Initial load duke parents: diff changeset	2965	pandn mm7, mm4
90ce3da70b43 Initial load duke parents: diff changeset	2966	pandn mm0, mm1
90ce3da70b43 Initial load duke parents: diff changeset	2967	paddw mm7, mm5
90ce3da70b43 Initial load duke parents: diff changeset	2968	paddw mm0, mm2
90ce3da70b43 Initial load duke parents: diff changeset	2969	// test ((pa <= pb)? pa:pb) <= pc
90ce3da70b43 Initial load duke parents: diff changeset	2970	pcmpgtw mm7, mm6 // pab > pc?
90ce3da70b43 Initial load duke parents: diff changeset	2971	pxor mm1, mm1
90ce3da70b43 Initial load duke parents: diff changeset	2972	pand mm3, mm7
90ce3da70b43 Initial load duke parents: diff changeset	2973	pandn mm7, mm0
90ce3da70b43 Initial load duke parents: diff changeset	2974	pxor mm1, mm1
90ce3da70b43 Initial load duke parents: diff changeset	2975	paddw mm7, mm3
90ce3da70b43 Initial load duke parents: diff changeset	2976	pxor mm0, mm0
90ce3da70b43 Initial load duke parents: diff changeset	2977	// Step ex to next set of 8 bytes and repeat loop til done
90ce3da70b43 Initial load duke parents: diff changeset	2978	add ebx, 8
90ce3da70b43 Initial load duke parents: diff changeset	2979	packuswb mm1, mm7
90ce3da70b43 Initial load duke parents: diff changeset	2980	paddb mm1, [edi + ebx - 8] // add Paeth predictor with Raw(x)
90ce3da70b43 Initial load duke parents: diff changeset	2981	cmp ebx, MMXLength
90ce3da70b43 Initial load duke parents: diff changeset	2982	movq [edi + ebx - 8], mm1 // write back updated value
90ce3da70b43 Initial load duke parents: diff changeset	2983	// mm1 will be used as Raw(x-bpp) next loop
90ce3da70b43 Initial load duke parents: diff changeset	2984	jb dpth4lp
90ce3da70b43 Initial load duke parents: diff changeset	2985	} // end _asm block
90ce3da70b43 Initial load duke parents: diff changeset	2986	}
90ce3da70b43 Initial load duke parents: diff changeset	2987	break;
90ce3da70b43 Initial load duke parents: diff changeset	2988	case 8: // bpp == 8
90ce3da70b43 Initial load duke parents: diff changeset	2989	{
90ce3da70b43 Initial load duke parents: diff changeset	2990	ActiveMask.use = 0x00000000ffffffff;
90ce3da70b43 Initial load duke parents: diff changeset	2991	_asm {
90ce3da70b43 Initial load duke parents: diff changeset	2992	mov ebx, diff
90ce3da70b43 Initial load duke parents: diff changeset	2993	mov edi, row
90ce3da70b43 Initial load duke parents: diff changeset	2994	mov esi, prev_row
90ce3da70b43 Initial load duke parents: diff changeset	2995	pxor mm0, mm0
90ce3da70b43 Initial load duke parents: diff changeset	2996	// PRIME the pump (load the first Raw(x-bpp) data set
90ce3da70b43 Initial load duke parents: diff changeset	2997	movq mm1, [edi+ebx-8] // Only time should need to read
90ce3da70b43 Initial load duke parents: diff changeset	2998	// a=Raw(x-bpp) bytes
90ce3da70b43 Initial load duke parents: diff changeset	2999	dpth8lp:
90ce3da70b43 Initial load duke parents: diff changeset	3000	// Do first set of 4 bytes
90ce3da70b43 Initial load duke parents: diff changeset	3001	movq mm3, [esi+ebx-8] // read c=Prior(x-bpp) bytes
90ce3da70b43 Initial load duke parents: diff changeset	3002	punpcklbw mm1, mm0 // Unpack Low bytes of a
90ce3da70b43 Initial load duke parents: diff changeset	3003	movq mm2, [esi + ebx] // load b=Prior(x)
90ce3da70b43 Initial load duke parents: diff changeset	3004	punpcklbw mm2, mm0 // Unpack Low bytes of b
90ce3da70b43 Initial load duke parents: diff changeset	3005	// pav = p - a = (a + b - c) - a = b - c
90ce3da70b43 Initial load duke parents: diff changeset	3006	movq mm4, mm2
90ce3da70b43 Initial load duke parents: diff changeset	3007	punpcklbw mm3, mm0 // Unpack Low bytes of c
90ce3da70b43 Initial load duke parents: diff changeset	3008	// pbv = p - b = (a + b - c) - b = a - c
90ce3da70b43 Initial load duke parents: diff changeset	3009	movq mm5, mm1
90ce3da70b43 Initial load duke parents: diff changeset	3010	psubw mm4, mm3
90ce3da70b43 Initial load duke parents: diff changeset	3011	pxor mm7, mm7
90ce3da70b43 Initial load duke parents: diff changeset	3012	// pcv = p - c = (a + b - c) -c = (a - c) + (b - c) = pav + pbv
90ce3da70b43 Initial load duke parents: diff changeset	3013	movq mm6, mm4
90ce3da70b43 Initial load duke parents: diff changeset	3014	psubw mm5, mm3
90ce3da70b43 Initial load duke parents: diff changeset	3015	// pa = abs(p-a) = abs(pav)
90ce3da70b43 Initial load duke parents: diff changeset	3016	// pb = abs(p-b) = abs(pbv)
90ce3da70b43 Initial load duke parents: diff changeset	3017	// pc = abs(p-c) = abs(pcv)
90ce3da70b43 Initial load duke parents: diff changeset	3018	pcmpgtw mm0, mm4 // Create mask pav bytes < 0
90ce3da70b43 Initial load duke parents: diff changeset	3019	paddw mm6, mm5
90ce3da70b43 Initial load duke parents: diff changeset	3020	pand mm0, mm4 // Only pav bytes < 0 in mm7
90ce3da70b43 Initial load duke parents: diff changeset	3021	pcmpgtw mm7, mm5 // Create mask pbv bytes < 0
90ce3da70b43 Initial load duke parents: diff changeset	3022	psubw mm4, mm0
90ce3da70b43 Initial load duke parents: diff changeset	3023	pand mm7, mm5 // Only pbv bytes < 0 in mm0
90ce3da70b43 Initial load duke parents: diff changeset	3024	psubw mm4, mm0
90ce3da70b43 Initial load duke parents: diff changeset	3025	psubw mm5, mm7
90ce3da70b43 Initial load duke parents: diff changeset	3026	pxor mm0, mm0
90ce3da70b43 Initial load duke parents: diff changeset	3027	pcmpgtw mm0, mm6 // Create mask pcv bytes < 0
90ce3da70b43 Initial load duke parents: diff changeset	3028	pand mm0, mm6 // Only pav bytes < 0 in mm7
90ce3da70b43 Initial load duke parents: diff changeset	3029	psubw mm5, mm7
90ce3da70b43 Initial load duke parents: diff changeset	3030	psubw mm6, mm0
90ce3da70b43 Initial load duke parents: diff changeset	3031	// test pa <= pb
90ce3da70b43 Initial load duke parents: diff changeset	3032	movq mm7, mm4
90ce3da70b43 Initial load duke parents: diff changeset	3033	psubw mm6, mm0
90ce3da70b43 Initial load duke parents: diff changeset	3034	pcmpgtw mm7, mm5 // pa > pb?
90ce3da70b43 Initial load duke parents: diff changeset	3035	movq mm0, mm7
90ce3da70b43 Initial load duke parents: diff changeset	3036	// use mm7 mask to merge pa & pb
90ce3da70b43 Initial load duke parents: diff changeset	3037	pand mm5, mm7
90ce3da70b43 Initial load duke parents: diff changeset	3038	// use mm0 mask copy to merge a & b
90ce3da70b43 Initial load duke parents: diff changeset	3039	pand mm2, mm0
90ce3da70b43 Initial load duke parents: diff changeset	3040	pandn mm7, mm4
90ce3da70b43 Initial load duke parents: diff changeset	3041	pandn mm0, mm1
90ce3da70b43 Initial load duke parents: diff changeset	3042	paddw mm7, mm5
90ce3da70b43 Initial load duke parents: diff changeset	3043	paddw mm0, mm2
90ce3da70b43 Initial load duke parents: diff changeset	3044	// test ((pa <= pb)? pa:pb) <= pc
90ce3da70b43 Initial load duke parents: diff changeset	3045	pcmpgtw mm7, mm6 // pab > pc?
90ce3da70b43 Initial load duke parents: diff changeset	3046	pxor mm1, mm1
90ce3da70b43 Initial load duke parents: diff changeset	3047	pand mm3, mm7
90ce3da70b43 Initial load duke parents: diff changeset	3048	pandn mm7, mm0
90ce3da70b43 Initial load duke parents: diff changeset	3049	paddw mm7, mm3
90ce3da70b43 Initial load duke parents: diff changeset	3050	pxor mm0, mm0
90ce3da70b43 Initial load duke parents: diff changeset	3051	packuswb mm7, mm1
90ce3da70b43 Initial load duke parents: diff changeset	3052	movq mm3, [esi+ebx-8] // read c=Prior(x-bpp) bytes
90ce3da70b43 Initial load duke parents: diff changeset	3053	pand mm7, ActiveMask
90ce3da70b43 Initial load duke parents: diff changeset	3054	movq mm2, [esi + ebx] // load b=Prior(x)
90ce3da70b43 Initial load duke parents: diff changeset	3055	paddb mm7, [edi + ebx] // add Paeth predictor with Raw(x)
90ce3da70b43 Initial load duke parents: diff changeset	3056	punpckhbw mm3, mm0 // Unpack High bytes of c
90ce3da70b43 Initial load duke parents: diff changeset	3057	movq [edi + ebx], mm7 // write back updated value
90ce3da70b43 Initial load duke parents: diff changeset	3058	movq mm1, [edi+ebx-8] // read a=Raw(x-bpp) bytes
90ce3da70b43 Initial load duke parents: diff changeset	3059
90ce3da70b43 Initial load duke parents: diff changeset	3060	// Do second set of 4 bytes
90ce3da70b43 Initial load duke parents: diff changeset	3061	punpckhbw mm2, mm0 // Unpack High bytes of b
90ce3da70b43 Initial load duke parents: diff changeset	3062	punpckhbw mm1, mm0 // Unpack High bytes of a
90ce3da70b43 Initial load duke parents: diff changeset	3063	// pav = p - a = (a + b - c) - a = b - c
90ce3da70b43 Initial load duke parents: diff changeset	3064	movq mm4, mm2
90ce3da70b43 Initial load duke parents: diff changeset	3065	// pbv = p - b = (a + b - c) - b = a - c
90ce3da70b43 Initial load duke parents: diff changeset	3066	movq mm5, mm1
90ce3da70b43 Initial load duke parents: diff changeset	3067	psubw mm4, mm3
90ce3da70b43 Initial load duke parents: diff changeset	3068	pxor mm7, mm7
90ce3da70b43 Initial load duke parents: diff changeset	3069	// pcv = p - c = (a + b - c) -c = (a - c) + (b - c) = pav + pbv
90ce3da70b43 Initial load duke parents: diff changeset	3070	movq mm6, mm4
90ce3da70b43 Initial load duke parents: diff changeset	3071	psubw mm5, mm3
90ce3da70b43 Initial load duke parents: diff changeset	3072	// pa = abs(p-a) = abs(pav)
90ce3da70b43 Initial load duke parents: diff changeset	3073	// pb = abs(p-b) = abs(pbv)
90ce3da70b43 Initial load duke parents: diff changeset	3074	// pc = abs(p-c) = abs(pcv)
90ce3da70b43 Initial load duke parents: diff changeset	3075	pcmpgtw mm0, mm4 // Create mask pav bytes < 0
90ce3da70b43 Initial load duke parents: diff changeset	3076	paddw mm6, mm5
90ce3da70b43 Initial load duke parents: diff changeset	3077	pand mm0, mm4 // Only pav bytes < 0 in mm7
90ce3da70b43 Initial load duke parents: diff changeset	3078	pcmpgtw mm7, mm5 // Create mask pbv bytes < 0
90ce3da70b43 Initial load duke parents: diff changeset	3079	psubw mm4, mm0
90ce3da70b43 Initial load duke parents: diff changeset	3080	pand mm7, mm5 // Only pbv bytes < 0 in mm0
90ce3da70b43 Initial load duke parents: diff changeset	3081	psubw mm4, mm0
90ce3da70b43 Initial load duke parents: diff changeset	3082	psubw mm5, mm7
90ce3da70b43 Initial load duke parents: diff changeset	3083	pxor mm0, mm0
90ce3da70b43 Initial load duke parents: diff changeset	3084	pcmpgtw mm0, mm6 // Create mask pcv bytes < 0
90ce3da70b43 Initial load duke parents: diff changeset	3085	pand mm0, mm6 // Only pav bytes < 0 in mm7
90ce3da70b43 Initial load duke parents: diff changeset	3086	psubw mm5, mm7
90ce3da70b43 Initial load duke parents: diff changeset	3087	psubw mm6, mm0
90ce3da70b43 Initial load duke parents: diff changeset	3088	// test pa <= pb
90ce3da70b43 Initial load duke parents: diff changeset	3089	movq mm7, mm4
90ce3da70b43 Initial load duke parents: diff changeset	3090	psubw mm6, mm0
90ce3da70b43 Initial load duke parents: diff changeset	3091	pcmpgtw mm7, mm5 // pa > pb?
90ce3da70b43 Initial load duke parents: diff changeset	3092	movq mm0, mm7
90ce3da70b43 Initial load duke parents: diff changeset	3093	// use mm7 mask to merge pa & pb
90ce3da70b43 Initial load duke parents: diff changeset	3094	pand mm5, mm7
90ce3da70b43 Initial load duke parents: diff changeset	3095	// use mm0 mask copy to merge a & b
90ce3da70b43 Initial load duke parents: diff changeset	3096	pand mm2, mm0
90ce3da70b43 Initial load duke parents: diff changeset	3097	pandn mm7, mm4
90ce3da70b43 Initial load duke parents: diff changeset	3098	pandn mm0, mm1
90ce3da70b43 Initial load duke parents: diff changeset	3099	paddw mm7, mm5
90ce3da70b43 Initial load duke parents: diff changeset	3100	paddw mm0, mm2
90ce3da70b43 Initial load duke parents: diff changeset	3101	// test ((pa <= pb)? pa:pb) <= pc
90ce3da70b43 Initial load duke parents: diff changeset	3102	pcmpgtw mm7, mm6 // pab > pc?
90ce3da70b43 Initial load duke parents: diff changeset	3103	pxor mm1, mm1
90ce3da70b43 Initial load duke parents: diff changeset	3104	pand mm3, mm7
90ce3da70b43 Initial load duke parents: diff changeset	3105	pandn mm7, mm0
90ce3da70b43 Initial load duke parents: diff changeset	3106	pxor mm1, mm1
90ce3da70b43 Initial load duke parents: diff changeset	3107	paddw mm7, mm3
90ce3da70b43 Initial load duke parents: diff changeset	3108	pxor mm0, mm0
90ce3da70b43 Initial load duke parents: diff changeset	3109	// Step ex to next set of 8 bytes and repeat loop til done
90ce3da70b43 Initial load duke parents: diff changeset	3110	add ebx, 8
90ce3da70b43 Initial load duke parents: diff changeset	3111	packuswb mm1, mm7
90ce3da70b43 Initial load duke parents: diff changeset	3112	paddb mm1, [edi + ebx - 8] // add Paeth predictor with Raw(x)
90ce3da70b43 Initial load duke parents: diff changeset	3113	cmp ebx, MMXLength
90ce3da70b43 Initial load duke parents: diff changeset	3114	movq [edi + ebx - 8], mm1 // write back updated value
90ce3da70b43 Initial load duke parents: diff changeset	3115	// mm1 will be used as Raw(x-bpp) next loop
90ce3da70b43 Initial load duke parents: diff changeset	3116	jb dpth8lp
90ce3da70b43 Initial load duke parents: diff changeset	3117	} // end _asm block
90ce3da70b43 Initial load duke parents: diff changeset	3118	}
90ce3da70b43 Initial load duke parents: diff changeset	3119	break;
90ce3da70b43 Initial load duke parents: diff changeset	3120
90ce3da70b43 Initial load duke parents: diff changeset	3121	case 1: // bpp = 1
90ce3da70b43 Initial load duke parents: diff changeset	3122	case 2: // bpp = 2
90ce3da70b43 Initial load duke parents: diff changeset	3123	default: // bpp > 8
90ce3da70b43 Initial load duke parents: diff changeset	3124	{
90ce3da70b43 Initial load duke parents: diff changeset	3125	_asm {
90ce3da70b43 Initial load duke parents: diff changeset	3126	mov ebx, diff
90ce3da70b43 Initial load duke parents: diff changeset	3127	cmp ebx, FullLength
90ce3da70b43 Initial load duke parents: diff changeset	3128	jnb dpthdend
90ce3da70b43 Initial load duke parents: diff changeset	3129	mov edi, row
90ce3da70b43 Initial load duke parents: diff changeset	3130	mov esi, prev_row
90ce3da70b43 Initial load duke parents: diff changeset	3131	// Do Paeth decode for remaining bytes
90ce3da70b43 Initial load duke parents: diff changeset	3132	mov edx, ebx
90ce3da70b43 Initial load duke parents: diff changeset	3133	xor ecx, ecx // zero ecx before using cl & cx in loop below
90ce3da70b43 Initial load duke parents: diff changeset	3134	sub edx, bpp // Set edx = ebx - bpp
90ce3da70b43 Initial load duke parents: diff changeset	3135	dpthdlp:
90ce3da70b43 Initial load duke parents: diff changeset	3136	xor eax, eax
90ce3da70b43 Initial load duke parents: diff changeset	3137	// pav = p - a = (a + b - c) - a = b - c
90ce3da70b43 Initial load duke parents: diff changeset	3138	mov al, [esi + ebx] // load Prior(x) into al
90ce3da70b43 Initial load duke parents: diff changeset	3139	mov cl, [esi + edx] // load Prior(x-bpp) into cl
90ce3da70b43 Initial load duke parents: diff changeset	3140	sub eax, ecx // subtract Prior(x-bpp)
90ce3da70b43 Initial load duke parents: diff changeset	3141	mov patemp, eax // Save pav for later use
90ce3da70b43 Initial load duke parents: diff changeset	3142	xor eax, eax
90ce3da70b43 Initial load duke parents: diff changeset	3143	// pbv = p - b = (a + b - c) - b = a - c
90ce3da70b43 Initial load duke parents: diff changeset	3144	mov al, [edi + edx] // load Raw(x-bpp) into al
90ce3da70b43 Initial load duke parents: diff changeset	3145	sub eax, ecx // subtract Prior(x-bpp)
90ce3da70b43 Initial load duke parents: diff changeset	3146	mov ecx, eax
90ce3da70b43 Initial load duke parents: diff changeset	3147	// pcv = p - c = (a + b - c) -c = (a - c) + (b - c) = pav + pbv
90ce3da70b43 Initial load duke parents: diff changeset	3148	add eax, patemp // pcv = pav + pbv
90ce3da70b43 Initial load duke parents: diff changeset	3149	// pc = abs(pcv)
90ce3da70b43 Initial load duke parents: diff changeset	3150	test eax, 0x80000000
90ce3da70b43 Initial load duke parents: diff changeset	3151	jz dpthdpca
90ce3da70b43 Initial load duke parents: diff changeset	3152	neg eax // reverse sign of neg values
90ce3da70b43 Initial load duke parents: diff changeset	3153	dpthdpca:
90ce3da70b43 Initial load duke parents: diff changeset	3154	mov pctemp, eax // save pc for later use
90ce3da70b43 Initial load duke parents: diff changeset	3155	// pb = abs(pbv)
90ce3da70b43 Initial load duke parents: diff changeset	3156	test ecx, 0x80000000
90ce3da70b43 Initial load duke parents: diff changeset	3157	jz dpthdpba
90ce3da70b43 Initial load duke parents: diff changeset	3158	neg ecx // reverse sign of neg values
90ce3da70b43 Initial load duke parents: diff changeset	3159	dpthdpba:
90ce3da70b43 Initial load duke parents: diff changeset	3160	mov pbtemp, ecx // save pb for later use
90ce3da70b43 Initial load duke parents: diff changeset	3161	// pa = abs(pav)
90ce3da70b43 Initial load duke parents: diff changeset	3162	mov eax, patemp
90ce3da70b43 Initial load duke parents: diff changeset	3163	test eax, 0x80000000
90ce3da70b43 Initial load duke parents: diff changeset	3164	jz dpthdpaa
90ce3da70b43 Initial load duke parents: diff changeset	3165	neg eax // reverse sign of neg values
90ce3da70b43 Initial load duke parents: diff changeset	3166	dpthdpaa:
90ce3da70b43 Initial load duke parents: diff changeset	3167	mov patemp, eax // save pa for later use
90ce3da70b43 Initial load duke parents: diff changeset	3168	// test if pa <= pb
90ce3da70b43 Initial load duke parents: diff changeset	3169	cmp eax, ecx
90ce3da70b43 Initial load duke parents: diff changeset	3170	jna dpthdabb
90ce3da70b43 Initial load duke parents: diff changeset	3171	// pa > pb; now test if pb <= pc
90ce3da70b43 Initial load duke parents: diff changeset	3172	cmp ecx, pctemp
90ce3da70b43 Initial load duke parents: diff changeset	3173	jna dpthdbbc
90ce3da70b43 Initial load duke parents: diff changeset	3174	// pb > pc; Raw(x) = Paeth(x) + Prior(x-bpp)
90ce3da70b43 Initial load duke parents: diff changeset	3175	mov cl, [esi + edx] // load Prior(x-bpp) into cl
90ce3da70b43 Initial load duke parents: diff changeset	3176	jmp dpthdpaeth
90ce3da70b43 Initial load duke parents: diff changeset	3177	dpthdbbc:
90ce3da70b43 Initial load duke parents: diff changeset	3178	// pb <= pc; Raw(x) = Paeth(x) + Prior(x)
90ce3da70b43 Initial load duke parents: diff changeset	3179	mov cl, [esi + ebx] // load Prior(x) into cl
90ce3da70b43 Initial load duke parents: diff changeset	3180	jmp dpthdpaeth
90ce3da70b43 Initial load duke parents: diff changeset	3181	dpthdabb:
90ce3da70b43 Initial load duke parents: diff changeset	3182	// pa <= pb; now test if pa <= pc
90ce3da70b43 Initial load duke parents: diff changeset	3183	cmp eax, pctemp
90ce3da70b43 Initial load duke parents: diff changeset	3184	jna dpthdabc
90ce3da70b43 Initial load duke parents: diff changeset	3185	// pa > pc; Raw(x) = Paeth(x) + Prior(x-bpp)
90ce3da70b43 Initial load duke parents: diff changeset	3186	mov cl, [esi + edx] // load Prior(x-bpp) into cl
90ce3da70b43 Initial load duke parents: diff changeset	3187	jmp dpthdpaeth
90ce3da70b43 Initial load duke parents: diff changeset	3188	dpthdabc:
90ce3da70b43 Initial load duke parents: diff changeset	3189	// pa <= pc; Raw(x) = Paeth(x) + Raw(x-bpp)
90ce3da70b43 Initial load duke parents: diff changeset	3190	mov cl, [edi + edx] // load Raw(x-bpp) into cl
90ce3da70b43 Initial load duke parents: diff changeset	3191	dpthdpaeth:
90ce3da70b43 Initial load duke parents: diff changeset	3192	inc ebx
90ce3da70b43 Initial load duke parents: diff changeset	3193	inc edx
90ce3da70b43 Initial load duke parents: diff changeset	3194	// Raw(x) = (Paeth(x) + Paeth_Predictor( a, b, c )) mod 256
90ce3da70b43 Initial load duke parents: diff changeset	3195	add [edi + ebx - 1], cl
90ce3da70b43 Initial load duke parents: diff changeset	3196	cmp ebx, FullLength
90ce3da70b43 Initial load duke parents: diff changeset	3197	jb dpthdlp
90ce3da70b43 Initial load duke parents: diff changeset	3198	dpthdend:
90ce3da70b43 Initial load duke parents: diff changeset	3199	} // end _asm block
90ce3da70b43 Initial load duke parents: diff changeset	3200	}
90ce3da70b43 Initial load duke parents: diff changeset	3201	return; // No need to go further with this one
90ce3da70b43 Initial load duke parents: diff changeset	3202	} // end switch ( bpp )
90ce3da70b43 Initial load duke parents: diff changeset	3203	_asm
90ce3da70b43 Initial load duke parents: diff changeset	3204	{
90ce3da70b43 Initial load duke parents: diff changeset	3205	// MMX acceleration complete now do clean-up
90ce3da70b43 Initial load duke parents: diff changeset	3206	// Check if any remaining bytes left to decode
90ce3da70b43 Initial load duke parents: diff changeset	3207	mov ebx, MMXLength
90ce3da70b43 Initial load duke parents: diff changeset	3208	cmp ebx, FullLength
90ce3da70b43 Initial load duke parents: diff changeset	3209	jnb dpthend
90ce3da70b43 Initial load duke parents: diff changeset	3210	mov edi, row
90ce3da70b43 Initial load duke parents: diff changeset	3211	mov esi, prev_row
90ce3da70b43 Initial load duke parents: diff changeset	3212	// Do Paeth decode for remaining bytes
90ce3da70b43 Initial load duke parents: diff changeset	3213	mov edx, ebx
90ce3da70b43 Initial load duke parents: diff changeset	3214	xor ecx, ecx // zero ecx before using cl & cx in loop below
90ce3da70b43 Initial load duke parents: diff changeset	3215	sub edx, bpp // Set edx = ebx - bpp
90ce3da70b43 Initial load duke parents: diff changeset	3216	dpthlp2:
90ce3da70b43 Initial load duke parents: diff changeset	3217	xor eax, eax
90ce3da70b43 Initial load duke parents: diff changeset	3218	// pav = p - a = (a + b - c) - a = b - c
90ce3da70b43 Initial load duke parents: diff changeset	3219	mov al, [esi + ebx] // load Prior(x) into al
90ce3da70b43 Initial load duke parents: diff changeset	3220	mov cl, [esi + edx] // load Prior(x-bpp) into cl
90ce3da70b43 Initial load duke parents: diff changeset	3221	sub eax, ecx // subtract Prior(x-bpp)
90ce3da70b43 Initial load duke parents: diff changeset	3222	mov patemp, eax // Save pav for later use
90ce3da70b43 Initial load duke parents: diff changeset	3223	xor eax, eax
90ce3da70b43 Initial load duke parents: diff changeset	3224	// pbv = p - b = (a + b - c) - b = a - c
90ce3da70b43 Initial load duke parents: diff changeset	3225	mov al, [edi + edx] // load Raw(x-bpp) into al
90ce3da70b43 Initial load duke parents: diff changeset	3226	sub eax, ecx // subtract Prior(x-bpp)
90ce3da70b43 Initial load duke parents: diff changeset	3227	mov ecx, eax
90ce3da70b43 Initial load duke parents: diff changeset	3228	// pcv = p - c = (a + b - c) -c = (a - c) + (b - c) = pav + pbv
90ce3da70b43 Initial load duke parents: diff changeset	3229	add eax, patemp // pcv = pav + pbv
90ce3da70b43 Initial load duke parents: diff changeset	3230	// pc = abs(pcv)
90ce3da70b43 Initial load duke parents: diff changeset	3231	test eax, 0x80000000
90ce3da70b43 Initial load duke parents: diff changeset	3232	jz dpthpca2
90ce3da70b43 Initial load duke parents: diff changeset	3233	neg eax // reverse sign of neg values
90ce3da70b43 Initial load duke parents: diff changeset	3234	dpthpca2:
90ce3da70b43 Initial load duke parents: diff changeset	3235	mov pctemp, eax // save pc for later use
90ce3da70b43 Initial load duke parents: diff changeset	3236	// pb = abs(pbv)
90ce3da70b43 Initial load duke parents: diff changeset	3237	test ecx, 0x80000000
90ce3da70b43 Initial load duke parents: diff changeset	3238	jz dpthpba2
90ce3da70b43 Initial load duke parents: diff changeset	3239	neg ecx // reverse sign of neg values
90ce3da70b43 Initial load duke parents: diff changeset	3240	dpthpba2:
90ce3da70b43 Initial load duke parents: diff changeset	3241	mov pbtemp, ecx // save pb for later use
90ce3da70b43 Initial load duke parents: diff changeset	3242	// pa = abs(pav)
90ce3da70b43 Initial load duke parents: diff changeset	3243	mov eax, patemp
90ce3da70b43 Initial load duke parents: diff changeset	3244	test eax, 0x80000000
90ce3da70b43 Initial load duke parents: diff changeset	3245	jz dpthpaa2
90ce3da70b43 Initial load duke parents: diff changeset	3246	neg eax // reverse sign of neg values
90ce3da70b43 Initial load duke parents: diff changeset	3247	dpthpaa2:
90ce3da70b43 Initial load duke parents: diff changeset	3248	mov patemp, eax // save pa for later use
90ce3da70b43 Initial load duke parents: diff changeset	3249	// test if pa <= pb
90ce3da70b43 Initial load duke parents: diff changeset	3250	cmp eax, ecx
90ce3da70b43 Initial load duke parents: diff changeset	3251	jna dpthabb2
90ce3da70b43 Initial load duke parents: diff changeset	3252	// pa > pb; now test if pb <= pc
90ce3da70b43 Initial load duke parents: diff changeset	3253	cmp ecx, pctemp
90ce3da70b43 Initial load duke parents: diff changeset	3254	jna dpthbbc2
90ce3da70b43 Initial load duke parents: diff changeset	3255	// pb > pc; Raw(x) = Paeth(x) + Prior(x-bpp)
90ce3da70b43 Initial load duke parents: diff changeset	3256	mov cl, [esi + edx] // load Prior(x-bpp) into cl
90ce3da70b43 Initial load duke parents: diff changeset	3257	jmp dpthpaeth2
90ce3da70b43 Initial load duke parents: diff changeset	3258	dpthbbc2:
90ce3da70b43 Initial load duke parents: diff changeset	3259	// pb <= pc; Raw(x) = Paeth(x) + Prior(x)
90ce3da70b43 Initial load duke parents: diff changeset	3260	mov cl, [esi + ebx] // load Prior(x) into cl
90ce3da70b43 Initial load duke parents: diff changeset	3261	jmp dpthpaeth2
90ce3da70b43 Initial load duke parents: diff changeset	3262	dpthabb2:
90ce3da70b43 Initial load duke parents: diff changeset	3263	// pa <= pb; now test if pa <= pc
90ce3da70b43 Initial load duke parents: diff changeset	3264	cmp eax, pctemp
90ce3da70b43 Initial load duke parents: diff changeset	3265	jna dpthabc2
90ce3da70b43 Initial load duke parents: diff changeset	3266	// pa > pc; Raw(x) = Paeth(x) + Prior(x-bpp)
90ce3da70b43 Initial load duke parents: diff changeset	3267	mov cl, [esi + edx] // load Prior(x-bpp) into cl
90ce3da70b43 Initial load duke parents: diff changeset	3268	jmp dpthpaeth2
90ce3da70b43 Initial load duke parents: diff changeset	3269	dpthabc2:
90ce3da70b43 Initial load duke parents: diff changeset	3270	// pa <= pc; Raw(x) = Paeth(x) + Raw(x-bpp)
90ce3da70b43 Initial load duke parents: diff changeset	3271	mov cl, [edi + edx] // load Raw(x-bpp) into cl
90ce3da70b43 Initial load duke parents: diff changeset	3272	dpthpaeth2:
90ce3da70b43 Initial load duke parents: diff changeset	3273	inc ebx
90ce3da70b43 Initial load duke parents: diff changeset	3274	inc edx
90ce3da70b43 Initial load duke parents: diff changeset	3275	// Raw(x) = (Paeth(x) + Paeth_Predictor( a, b, c )) mod 256
90ce3da70b43 Initial load duke parents: diff changeset	3276	add [edi + ebx - 1], cl
90ce3da70b43 Initial load duke parents: diff changeset	3277	cmp ebx, FullLength
90ce3da70b43 Initial load duke parents: diff changeset	3278	jb dpthlp2
90ce3da70b43 Initial load duke parents: diff changeset	3279	dpthend:
90ce3da70b43 Initial load duke parents: diff changeset	3280	emms // End MMX instructions; prep for possible FP instrs.
90ce3da70b43 Initial load duke parents: diff changeset	3281	} // end _asm block
90ce3da70b43 Initial load duke parents: diff changeset	3282	}
90ce3da70b43 Initial load duke parents: diff changeset	3283
90ce3da70b43 Initial load duke parents: diff changeset	3284	// Optimized code for PNG Sub filter decoder
90ce3da70b43 Initial load duke parents: diff changeset	3285	void /* PRIVATE */
90ce3da70b43 Initial load duke parents: diff changeset	3286	png_read_filter_row_mmx_sub(png_row_infop row_info, png_bytep row)
90ce3da70b43 Initial load duke parents: diff changeset	3287	{
90ce3da70b43 Initial load duke parents: diff changeset	3288	//int test;
90ce3da70b43 Initial load duke parents: diff changeset	3289	int bpp;
90ce3da70b43 Initial load duke parents: diff changeset	3290	png_uint_32 FullLength;
90ce3da70b43 Initial load duke parents: diff changeset	3291	png_uint_32 MMXLength;
90ce3da70b43 Initial load duke parents: diff changeset	3292	int diff;
90ce3da70b43 Initial load duke parents: diff changeset	3293
90ce3da70b43 Initial load duke parents: diff changeset	3294	bpp = (row_info->pixel_depth + 7) >> 3; // Get # bytes per pixel
90ce3da70b43 Initial load duke parents: diff changeset	3295	FullLength = row_info->rowbytes - bpp; // # of bytes to filter
90ce3da70b43 Initial load duke parents: diff changeset	3296	_asm {
90ce3da70b43 Initial load duke parents: diff changeset	3297	mov edi, row
90ce3da70b43 Initial load duke parents: diff changeset	3298	mov esi, edi // lp = row
90ce3da70b43 Initial load duke parents: diff changeset	3299	add edi, bpp // rp = row + bpp
90ce3da70b43 Initial load duke parents: diff changeset	3300	xor eax, eax
90ce3da70b43 Initial load duke parents: diff changeset	3301	// get # of bytes to alignment
90ce3da70b43 Initial load duke parents: diff changeset	3302	mov diff, edi // take start of row
90ce3da70b43 Initial load duke parents: diff changeset	3303	add diff, 0xf // add 7 + 8 to incr past
90ce3da70b43 Initial load duke parents: diff changeset	3304	// alignment boundary
90ce3da70b43 Initial load duke parents: diff changeset	3305	xor ebx, ebx
90ce3da70b43 Initial load duke parents: diff changeset	3306	and diff, 0xfffffff8 // mask to alignment boundary
90ce3da70b43 Initial load duke parents: diff changeset	3307	sub diff, edi // subtract from start ==> value
90ce3da70b43 Initial load duke parents: diff changeset	3308	// ebx at alignment
90ce3da70b43 Initial load duke parents: diff changeset	3309	jz dsubgo
90ce3da70b43 Initial load duke parents: diff changeset	3310	// fix alignment
90ce3da70b43 Initial load duke parents: diff changeset	3311	dsublp1:
90ce3da70b43 Initial load duke parents: diff changeset	3312	mov al, [esi+ebx]
90ce3da70b43 Initial load duke parents: diff changeset	3313	add [edi+ebx], al
90ce3da70b43 Initial load duke parents: diff changeset	3314	inc ebx
90ce3da70b43 Initial load duke parents: diff changeset	3315	cmp ebx, diff
90ce3da70b43 Initial load duke parents: diff changeset	3316	jb dsublp1
90ce3da70b43 Initial load duke parents: diff changeset	3317	dsubgo:
90ce3da70b43 Initial load duke parents: diff changeset	3318	mov ecx, FullLength
90ce3da70b43 Initial load duke parents: diff changeset	3319	mov edx, ecx
90ce3da70b43 Initial load duke parents: diff changeset	3320	sub edx, ebx // subtract alignment fix
90ce3da70b43 Initial load duke parents: diff changeset	3321	and edx, 0x00000007 // calc bytes over mult of 8
90ce3da70b43 Initial load duke parents: diff changeset	3322	sub ecx, edx // drop over bytes from length
90ce3da70b43 Initial load duke parents: diff changeset	3323	mov MMXLength, ecx
90ce3da70b43 Initial load duke parents: diff changeset	3324	} // end _asm block
90ce3da70b43 Initial load duke parents: diff changeset	3325
90ce3da70b43 Initial load duke parents: diff changeset	3326	// Now do the math for the rest of the row
90ce3da70b43 Initial load duke parents: diff changeset	3327	switch ( bpp )
90ce3da70b43 Initial load duke parents: diff changeset	3328	{
90ce3da70b43 Initial load duke parents: diff changeset	3329	case 3:
90ce3da70b43 Initial load duke parents: diff changeset	3330	{
90ce3da70b43 Initial load duke parents: diff changeset	3331	ActiveMask.use = 0x0000ffffff000000;
90ce3da70b43 Initial load duke parents: diff changeset	3332	ShiftBpp.use = 24; // == 3 * 8
90ce3da70b43 Initial load duke parents: diff changeset	3333	ShiftRem.use = 40; // == 64 - 24
90ce3da70b43 Initial load duke parents: diff changeset	3334	_asm {
90ce3da70b43 Initial load duke parents: diff changeset	3335	mov edi, row
90ce3da70b43 Initial load duke parents: diff changeset	3336	movq mm7, ActiveMask // Load ActiveMask for 2nd active byte group
90ce3da70b43 Initial load duke parents: diff changeset	3337	mov esi, edi // lp = row
90ce3da70b43 Initial load duke parents: diff changeset	3338	add edi, bpp // rp = row + bpp
90ce3da70b43 Initial load duke parents: diff changeset	3339	movq mm6, mm7
90ce3da70b43 Initial load duke parents: diff changeset	3340	mov ebx, diff
90ce3da70b43 Initial load duke parents: diff changeset	3341	psllq mm6, ShiftBpp // Move mask in mm6 to cover 3rd active
90ce3da70b43 Initial load duke parents: diff changeset	3342	// byte group
90ce3da70b43 Initial load duke parents: diff changeset	3343	// PRIME the pump (load the first Raw(x-bpp) data set
90ce3da70b43 Initial load duke parents: diff changeset	3344	movq mm1, [edi+ebx-8]
90ce3da70b43 Initial load duke parents: diff changeset	3345	dsub3lp:
90ce3da70b43 Initial load duke parents: diff changeset	3346	psrlq mm1, ShiftRem // Shift data for adding 1st bpp bytes
90ce3da70b43 Initial load duke parents: diff changeset	3347	// no need for mask; shift clears inactive bytes
90ce3da70b43 Initial load duke parents: diff changeset	3348	// Add 1st active group
90ce3da70b43 Initial load duke parents: diff changeset	3349	movq mm0, [edi+ebx]
90ce3da70b43 Initial load duke parents: diff changeset	3350	paddb mm0, mm1
90ce3da70b43 Initial load duke parents: diff changeset	3351	// Add 2nd active group
90ce3da70b43 Initial load duke parents: diff changeset	3352	movq mm1, mm0 // mov updated Raws to mm1
90ce3da70b43 Initial load duke parents: diff changeset	3353	psllq mm1, ShiftBpp // shift data to position correctly
90ce3da70b43 Initial load duke parents: diff changeset	3354	pand mm1, mm7 // mask to use only 2nd active group
90ce3da70b43 Initial load duke parents: diff changeset	3355	paddb mm0, mm1
90ce3da70b43 Initial load duke parents: diff changeset	3356	// Add 3rd active group
90ce3da70b43 Initial load duke parents: diff changeset	3357	movq mm1, mm0 // mov updated Raws to mm1
90ce3da70b43 Initial load duke parents: diff changeset	3358	psllq mm1, ShiftBpp // shift data to position correctly
90ce3da70b43 Initial load duke parents: diff changeset	3359	pand mm1, mm6 // mask to use only 3rd active group
90ce3da70b43 Initial load duke parents: diff changeset	3360	add ebx, 8
90ce3da70b43 Initial load duke parents: diff changeset	3361	paddb mm0, mm1
90ce3da70b43 Initial load duke parents: diff changeset	3362	cmp ebx, MMXLength
90ce3da70b43 Initial load duke parents: diff changeset	3363	movq [edi+ebx-8], mm0 // Write updated Raws back to array
90ce3da70b43 Initial load duke parents: diff changeset	3364	// Prep for doing 1st add at top of loop
90ce3da70b43 Initial load duke parents: diff changeset	3365	movq mm1, mm0
90ce3da70b43 Initial load duke parents: diff changeset	3366	jb dsub3lp
90ce3da70b43 Initial load duke parents: diff changeset	3367	} // end _asm block
90ce3da70b43 Initial load duke parents: diff changeset	3368	}
90ce3da70b43 Initial load duke parents: diff changeset	3369	break;
90ce3da70b43 Initial load duke parents: diff changeset	3370
90ce3da70b43 Initial load duke parents: diff changeset	3371	case 1:
90ce3da70b43 Initial load duke parents: diff changeset	3372	{
90ce3da70b43 Initial load duke parents: diff changeset	3373	// Placed here just in case this is a duplicate of the
90ce3da70b43 Initial load duke parents: diff changeset	3374	// non-MMX code for the SUB filter in png_read_filter_row below
90ce3da70b43 Initial load duke parents: diff changeset	3375	//
90ce3da70b43 Initial load duke parents: diff changeset	3376	// png_bytep rp;
90ce3da70b43 Initial load duke parents: diff changeset	3377	// png_bytep lp;
90ce3da70b43 Initial load duke parents: diff changeset	3378	// png_uint_32 i;
90ce3da70b43 Initial load duke parents: diff changeset	3379	// bpp = (row_info->pixel_depth + 7) >> 3;
90ce3da70b43 Initial load duke parents: diff changeset	3380	// for (i = (png_uint_32)bpp, rp = row + bpp, lp = row;
90ce3da70b43 Initial load duke parents: diff changeset	3381	// i < row_info->rowbytes; i++, rp++, lp++)
90ce3da70b43 Initial load duke parents: diff changeset	3382	// {
90ce3da70b43 Initial load duke parents: diff changeset	3383	// rp = (png_byte)(((int)(rp) + (int)(*lp)) & 0xff);
90ce3da70b43 Initial load duke parents: diff changeset	3384	// }
90ce3da70b43 Initial load duke parents: diff changeset	3385	_asm {
90ce3da70b43 Initial load duke parents: diff changeset	3386	mov ebx, diff
90ce3da70b43 Initial load duke parents: diff changeset	3387	mov edi, row
90ce3da70b43 Initial load duke parents: diff changeset	3388	cmp ebx, FullLength
90ce3da70b43 Initial load duke parents: diff changeset	3389	jnb dsub1end
90ce3da70b43 Initial load duke parents: diff changeset	3390	mov esi, edi // lp = row
90ce3da70b43 Initial load duke parents: diff changeset	3391	xor eax, eax
90ce3da70b43 Initial load duke parents: diff changeset	3392	add edi, bpp // rp = row + bpp
90ce3da70b43 Initial load duke parents: diff changeset	3393	dsub1lp:
90ce3da70b43 Initial load duke parents: diff changeset	3394	mov al, [esi+ebx]
90ce3da70b43 Initial load duke parents: diff changeset	3395	add [edi+ebx], al
90ce3da70b43 Initial load duke parents: diff changeset	3396	inc ebx
90ce3da70b43 Initial load duke parents: diff changeset	3397	cmp ebx, FullLength
90ce3da70b43 Initial load duke parents: diff changeset	3398	jb dsub1lp
90ce3da70b43 Initial load duke parents: diff changeset	3399	dsub1end:
90ce3da70b43 Initial load duke parents: diff changeset	3400	} // end _asm block
90ce3da70b43 Initial load duke parents: diff changeset	3401	}
90ce3da70b43 Initial load duke parents: diff changeset	3402	return;
90ce3da70b43 Initial load duke parents: diff changeset	3403
90ce3da70b43 Initial load duke parents: diff changeset	3404	case 6:
90ce3da70b43 Initial load duke parents: diff changeset	3405	case 7:
90ce3da70b43 Initial load duke parents: diff changeset	3406	case 4:
90ce3da70b43 Initial load duke parents: diff changeset	3407	case 5:
90ce3da70b43 Initial load duke parents: diff changeset	3408	{
90ce3da70b43 Initial load duke parents: diff changeset	3409	ShiftBpp.use = bpp << 3;
90ce3da70b43 Initial load duke parents: diff changeset	3410	ShiftRem.use = 64 - ShiftBpp.use;
90ce3da70b43 Initial load duke parents: diff changeset	3411	_asm {
90ce3da70b43 Initial load duke parents: diff changeset	3412	mov edi, row
90ce3da70b43 Initial load duke parents: diff changeset	3413	mov ebx, diff
90ce3da70b43 Initial load duke parents: diff changeset	3414	mov esi, edi // lp = row
90ce3da70b43 Initial load duke parents: diff changeset	3415	add edi, bpp // rp = row + bpp
90ce3da70b43 Initial load duke parents: diff changeset	3416	// PRIME the pump (load the first Raw(x-bpp) data set
90ce3da70b43 Initial load duke parents: diff changeset	3417	movq mm1, [edi+ebx-8]
90ce3da70b43 Initial load duke parents: diff changeset	3418	dsub4lp:
90ce3da70b43 Initial load duke parents: diff changeset	3419	psrlq mm1, ShiftRem // Shift data for adding 1st bpp bytes
90ce3da70b43 Initial load duke parents: diff changeset	3420	// no need for mask; shift clears inactive bytes
90ce3da70b43 Initial load duke parents: diff changeset	3421	movq mm0, [edi+ebx]
90ce3da70b43 Initial load duke parents: diff changeset	3422	paddb mm0, mm1
90ce3da70b43 Initial load duke parents: diff changeset	3423	// Add 2nd active group
90ce3da70b43 Initial load duke parents: diff changeset	3424	movq mm1, mm0 // mov updated Raws to mm1
90ce3da70b43 Initial load duke parents: diff changeset	3425	psllq mm1, ShiftBpp // shift data to position correctly
90ce3da70b43 Initial load duke parents: diff changeset	3426	// there is no need for any mask
90ce3da70b43 Initial load duke parents: diff changeset	3427	// since shift clears inactive bits/bytes
90ce3da70b43 Initial load duke parents: diff changeset	3428	add ebx, 8
90ce3da70b43 Initial load duke parents: diff changeset	3429	paddb mm0, mm1
90ce3da70b43 Initial load duke parents: diff changeset	3430	cmp ebx, MMXLength
90ce3da70b43 Initial load duke parents: diff changeset	3431	movq [edi+ebx-8], mm0
90ce3da70b43 Initial load duke parents: diff changeset	3432	movq mm1, mm0 // Prep for doing 1st add at top of loop
90ce3da70b43 Initial load duke parents: diff changeset	3433	jb dsub4lp
90ce3da70b43 Initial load duke parents: diff changeset	3434	} // end _asm block
90ce3da70b43 Initial load duke parents: diff changeset	3435	}
90ce3da70b43 Initial load duke parents: diff changeset	3436	break;
90ce3da70b43 Initial load duke parents: diff changeset	3437
90ce3da70b43 Initial load duke parents: diff changeset	3438	case 2:
90ce3da70b43 Initial load duke parents: diff changeset	3439	{
90ce3da70b43 Initial load duke parents: diff changeset	3440	ActiveMask.use = 0x00000000ffff0000;
90ce3da70b43 Initial load duke parents: diff changeset	3441	ShiftBpp.use = 16; // == 2 * 8
90ce3da70b43 Initial load duke parents: diff changeset	3442	ShiftRem.use = 48; // == 64 - 16
90ce3da70b43 Initial load duke parents: diff changeset	3443	_asm {
90ce3da70b43 Initial load duke parents: diff changeset	3444	movq mm7, ActiveMask // Load ActiveMask for 2nd active byte group
90ce3da70b43 Initial load duke parents: diff changeset	3445	mov ebx, diff
90ce3da70b43 Initial load duke parents: diff changeset	3446	movq mm6, mm7
90ce3da70b43 Initial load duke parents: diff changeset	3447	mov edi, row
90ce3da70b43 Initial load duke parents: diff changeset	3448	psllq mm6, ShiftBpp // Move mask in mm6 to cover 3rd active
90ce3da70b43 Initial load duke parents: diff changeset	3449	// byte group
90ce3da70b43 Initial load duke parents: diff changeset	3450	mov esi, edi // lp = row
90ce3da70b43 Initial load duke parents: diff changeset	3451	movq mm5, mm6
90ce3da70b43 Initial load duke parents: diff changeset	3452	add edi, bpp // rp = row + bpp
90ce3da70b43 Initial load duke parents: diff changeset	3453	psllq mm5, ShiftBpp // Move mask in mm5 to cover 4th active
90ce3da70b43 Initial load duke parents: diff changeset	3454	// byte group
90ce3da70b43 Initial load duke parents: diff changeset	3455	// PRIME the pump (load the first Raw(x-bpp) data set
90ce3da70b43 Initial load duke parents: diff changeset	3456	movq mm1, [edi+ebx-8]
90ce3da70b43 Initial load duke parents: diff changeset	3457	dsub2lp:
90ce3da70b43 Initial load duke parents: diff changeset	3458	// Add 1st active group
90ce3da70b43 Initial load duke parents: diff changeset	3459	psrlq mm1, ShiftRem // Shift data for adding 1st bpp bytes
90ce3da70b43 Initial load duke parents: diff changeset	3460	// no need for mask; shift clears inactive
90ce3da70b43 Initial load duke parents: diff changeset	3461	// bytes
90ce3da70b43 Initial load duke parents: diff changeset	3462	movq mm0, [edi+ebx]
90ce3da70b43 Initial load duke parents: diff changeset	3463	paddb mm0, mm1
90ce3da70b43 Initial load duke parents: diff changeset	3464	// Add 2nd active group
90ce3da70b43 Initial load duke parents: diff changeset	3465	movq mm1, mm0 // mov updated Raws to mm1
90ce3da70b43 Initial load duke parents: diff changeset	3466	psllq mm1, ShiftBpp // shift data to position correctly
90ce3da70b43 Initial load duke parents: diff changeset	3467	pand mm1, mm7 // mask to use only 2nd active group
90ce3da70b43 Initial load duke parents: diff changeset	3468	paddb mm0, mm1
90ce3da70b43 Initial load duke parents: diff changeset	3469	// Add 3rd active group
90ce3da70b43 Initial load duke parents: diff changeset	3470	movq mm1, mm0 // mov updated Raws to mm1
90ce3da70b43 Initial load duke parents: diff changeset	3471	psllq mm1, ShiftBpp // shift data to position correctly
90ce3da70b43 Initial load duke parents: diff changeset	3472	pand mm1, mm6 // mask to use only 3rd active group
90ce3da70b43 Initial load duke parents: diff changeset	3473	paddb mm0, mm1
90ce3da70b43 Initial load duke parents: diff changeset	3474	// Add 4th active group
90ce3da70b43 Initial load duke parents: diff changeset	3475	movq mm1, mm0 // mov updated Raws to mm1
90ce3da70b43 Initial load duke parents: diff changeset	3476	psllq mm1, ShiftBpp // shift data to position correctly
90ce3da70b43 Initial load duke parents: diff changeset	3477	pand mm1, mm5 // mask to use only 4th active group
90ce3da70b43 Initial load duke parents: diff changeset	3478	add ebx, 8
90ce3da70b43 Initial load duke parents: diff changeset	3479	paddb mm0, mm1
90ce3da70b43 Initial load duke parents: diff changeset	3480	cmp ebx, MMXLength
90ce3da70b43 Initial load duke parents: diff changeset	3481	movq [edi+ebx-8], mm0 // Write updated Raws back to array
90ce3da70b43 Initial load duke parents: diff changeset	3482	movq mm1, mm0 // Prep for doing 1st add at top of loop
90ce3da70b43 Initial load duke parents: diff changeset	3483	jb dsub2lp
90ce3da70b43 Initial load duke parents: diff changeset	3484	} // end _asm block
90ce3da70b43 Initial load duke parents: diff changeset	3485	}
90ce3da70b43 Initial load duke parents: diff changeset	3486	break;
90ce3da70b43 Initial load duke parents: diff changeset	3487	case 8:
90ce3da70b43 Initial load duke parents: diff changeset	3488	{
90ce3da70b43 Initial load duke parents: diff changeset	3489	_asm {
90ce3da70b43 Initial load duke parents: diff changeset	3490	mov edi, row
90ce3da70b43 Initial load duke parents: diff changeset	3491	mov ebx, diff
90ce3da70b43 Initial load duke parents: diff changeset	3492	mov esi, edi // lp = row
90ce3da70b43 Initial load duke parents: diff changeset	3493	add edi, bpp // rp = row + bpp
90ce3da70b43 Initial load duke parents: diff changeset	3494	mov ecx, MMXLength
90ce3da70b43 Initial load duke parents: diff changeset	3495	movq mm7, [edi+ebx-8] // PRIME the pump (load the first
90ce3da70b43 Initial load duke parents: diff changeset	3496	// Raw(x-bpp) data set
90ce3da70b43 Initial load duke parents: diff changeset	3497	and ecx, 0x0000003f // calc bytes over mult of 64
90ce3da70b43 Initial load duke parents: diff changeset	3498	dsub8lp:
90ce3da70b43 Initial load duke parents: diff changeset	3499	movq mm0, [edi+ebx] // Load Sub(x) for 1st 8 bytes
90ce3da70b43 Initial load duke parents: diff changeset	3500	paddb mm0, mm7
90ce3da70b43 Initial load duke parents: diff changeset	3501	movq mm1, [edi+ebx+8] // Load Sub(x) for 2nd 8 bytes
90ce3da70b43 Initial load duke parents: diff changeset	3502	movq [edi+ebx], mm0 // Write Raw(x) for 1st 8 bytes
90ce3da70b43 Initial load duke parents: diff changeset	3503	// Now mm0 will be used as Raw(x-bpp) for
90ce3da70b43 Initial load duke parents: diff changeset	3504	// the 2nd group of 8 bytes. This will be
90ce3da70b43 Initial load duke parents: diff changeset	3505	// repeated for each group of 8 bytes with
90ce3da70b43 Initial load duke parents: diff changeset	3506	// the 8th group being used as the Raw(x-bpp)
90ce3da70b43 Initial load duke parents: diff changeset	3507	// for the 1st group of the next loop.
90ce3da70b43 Initial load duke parents: diff changeset	3508	paddb mm1, mm0
90ce3da70b43 Initial load duke parents: diff changeset	3509	movq mm2, [edi+ebx+16] // Load Sub(x) for 3rd 8 bytes
90ce3da70b43 Initial load duke parents: diff changeset	3510	movq [edi+ebx+8], mm1 // Write Raw(x) for 2nd 8 bytes
90ce3da70b43 Initial load duke parents: diff changeset	3511	paddb mm2, mm1
90ce3da70b43 Initial load duke parents: diff changeset	3512	movq mm3, [edi+ebx+24] // Load Sub(x) for 4th 8 bytes
90ce3da70b43 Initial load duke parents: diff changeset	3513	movq [edi+ebx+16], mm2 // Write Raw(x) for 3rd 8 bytes
90ce3da70b43 Initial load duke parents: diff changeset	3514	paddb mm3, mm2
90ce3da70b43 Initial load duke parents: diff changeset	3515	movq mm4, [edi+ebx+32] // Load Sub(x) for 5th 8 bytes
90ce3da70b43 Initial load duke parents: diff changeset	3516	movq [edi+ebx+24], mm3 // Write Raw(x) for 4th 8 bytes
90ce3da70b43 Initial load duke parents: diff changeset	3517	paddb mm4, mm3
90ce3da70b43 Initial load duke parents: diff changeset	3518	movq mm5, [edi+ebx+40] // Load Sub(x) for 6th 8 bytes
90ce3da70b43 Initial load duke parents: diff changeset	3519	movq [edi+ebx+32], mm4 // Write Raw(x) for 5th 8 bytes
90ce3da70b43 Initial load duke parents: diff changeset	3520	paddb mm5, mm4
90ce3da70b43 Initial load duke parents: diff changeset	3521	movq mm6, [edi+ebx+48] // Load Sub(x) for 7th 8 bytes
90ce3da70b43 Initial load duke parents: diff changeset	3522	movq [edi+ebx+40], mm5 // Write Raw(x) for 6th 8 bytes
90ce3da70b43 Initial load duke parents: diff changeset	3523	paddb mm6, mm5
90ce3da70b43 Initial load duke parents: diff changeset	3524	movq mm7, [edi+ebx+56] // Load Sub(x) for 8th 8 bytes
90ce3da70b43 Initial load duke parents: diff changeset	3525	movq [edi+ebx+48], mm6 // Write Raw(x) for 7th 8 bytes
90ce3da70b43 Initial load duke parents: diff changeset	3526	add ebx, 64
90ce3da70b43 Initial load duke parents: diff changeset	3527	paddb mm7, mm6
90ce3da70b43 Initial load duke parents: diff changeset	3528	cmp ebx, ecx
90ce3da70b43 Initial load duke parents: diff changeset	3529	movq [edi+ebx-8], mm7 // Write Raw(x) for 8th 8 bytes
90ce3da70b43 Initial load duke parents: diff changeset	3530	jb dsub8lp
90ce3da70b43 Initial load duke parents: diff changeset	3531	cmp ebx, MMXLength
90ce3da70b43 Initial load duke parents: diff changeset	3532	jnb dsub8lt8
90ce3da70b43 Initial load duke parents: diff changeset	3533	dsub8lpA:
90ce3da70b43 Initial load duke parents: diff changeset	3534	movq mm0, [edi+ebx]
90ce3da70b43 Initial load duke parents: diff changeset	3535	add ebx, 8
90ce3da70b43 Initial load duke parents: diff changeset	3536	paddb mm0, mm7
90ce3da70b43 Initial load duke parents: diff changeset	3537	cmp ebx, MMXLength
90ce3da70b43 Initial load duke parents: diff changeset	3538	movq [edi+ebx-8], mm0 // use -8 to offset early add to ebx
90ce3da70b43 Initial load duke parents: diff changeset	3539	movq mm7, mm0 // Move calculated Raw(x) data to mm1 to
90ce3da70b43 Initial load duke parents: diff changeset	3540	// be the new Raw(x-bpp) for the next loop
90ce3da70b43 Initial load duke parents: diff changeset	3541	jb dsub8lpA
90ce3da70b43 Initial load duke parents: diff changeset	3542	dsub8lt8:
90ce3da70b43 Initial load duke parents: diff changeset	3543	} // end _asm block
90ce3da70b43 Initial load duke parents: diff changeset	3544	}
90ce3da70b43 Initial load duke parents: diff changeset	3545	break;
90ce3da70b43 Initial load duke parents: diff changeset	3546
90ce3da70b43 Initial load duke parents: diff changeset	3547	default: // bpp greater than 8 bytes
90ce3da70b43 Initial load duke parents: diff changeset	3548	{
90ce3da70b43 Initial load duke parents: diff changeset	3549	_asm {
90ce3da70b43 Initial load duke parents: diff changeset	3550	mov ebx, diff
90ce3da70b43 Initial load duke parents: diff changeset	3551	mov edi, row
90ce3da70b43 Initial load duke parents: diff changeset	3552	mov esi, edi // lp = row
90ce3da70b43 Initial load duke parents: diff changeset	3553	add edi, bpp // rp = row + bpp
90ce3da70b43 Initial load duke parents: diff changeset	3554	dsubAlp:
90ce3da70b43 Initial load duke parents: diff changeset	3555	movq mm0, [edi+ebx]
90ce3da70b43 Initial load duke parents: diff changeset	3556	movq mm1, [esi+ebx]
90ce3da70b43 Initial load duke parents: diff changeset	3557	add ebx, 8
90ce3da70b43 Initial load duke parents: diff changeset	3558	paddb mm0, mm1
90ce3da70b43 Initial load duke parents: diff changeset	3559	cmp ebx, MMXLength
90ce3da70b43 Initial load duke parents: diff changeset	3560	movq [edi+ebx-8], mm0 // mov does not affect flags; -8 to offset
90ce3da70b43 Initial load duke parents: diff changeset	3561	// add ebx
90ce3da70b43 Initial load duke parents: diff changeset	3562	jb dsubAlp
90ce3da70b43 Initial load duke parents: diff changeset	3563	} // end _asm block
90ce3da70b43 Initial load duke parents: diff changeset	3564	}
90ce3da70b43 Initial load duke parents: diff changeset	3565	break;
90ce3da70b43 Initial load duke parents: diff changeset	3566
90ce3da70b43 Initial load duke parents: diff changeset	3567	} // end switch ( bpp )
90ce3da70b43 Initial load duke parents: diff changeset	3568
90ce3da70b43 Initial load duke parents: diff changeset	3569	_asm {
90ce3da70b43 Initial load duke parents: diff changeset	3570	mov ebx, MMXLength
90ce3da70b43 Initial load duke parents: diff changeset	3571	mov edi, row
90ce3da70b43 Initial load duke parents: diff changeset	3572	cmp ebx, FullLength
90ce3da70b43 Initial load duke parents: diff changeset	3573	jnb dsubend
90ce3da70b43 Initial load duke parents: diff changeset	3574	mov esi, edi // lp = row
90ce3da70b43 Initial load duke parents: diff changeset	3575	xor eax, eax
90ce3da70b43 Initial load duke parents: diff changeset	3576	add edi, bpp // rp = row + bpp
90ce3da70b43 Initial load duke parents: diff changeset	3577	dsublp2:
90ce3da70b43 Initial load duke parents: diff changeset	3578	mov al, [esi+ebx]
90ce3da70b43 Initial load duke parents: diff changeset	3579	add [edi+ebx], al
90ce3da70b43 Initial load duke parents: diff changeset	3580	inc ebx
90ce3da70b43 Initial load duke parents: diff changeset	3581	cmp ebx, FullLength
90ce3da70b43 Initial load duke parents: diff changeset	3582	jb dsublp2
90ce3da70b43 Initial load duke parents: diff changeset	3583	dsubend:
90ce3da70b43 Initial load duke parents: diff changeset	3584	emms // End MMX instructions; prep for possible FP instrs.
90ce3da70b43 Initial load duke parents: diff changeset	3585	} // end _asm block
90ce3da70b43 Initial load duke parents: diff changeset	3586	}
90ce3da70b43 Initial load duke parents: diff changeset	3587
90ce3da70b43 Initial load duke parents: diff changeset	3588	// Optimized code for PNG Up filter decoder
90ce3da70b43 Initial load duke parents: diff changeset	3589	void /* PRIVATE */
90ce3da70b43 Initial load duke parents: diff changeset	3590	png_read_filter_row_mmx_up(png_row_infop row_info, png_bytep row,
90ce3da70b43 Initial load duke parents: diff changeset	3591	png_bytep prev_row)
90ce3da70b43 Initial load duke parents: diff changeset	3592	{
90ce3da70b43 Initial load duke parents: diff changeset	3593	png_uint_32 len;
90ce3da70b43 Initial load duke parents: diff changeset	3594	len = row_info->rowbytes; // # of bytes to filter
90ce3da70b43 Initial load duke parents: diff changeset	3595	_asm {
90ce3da70b43 Initial load duke parents: diff changeset	3596	mov edi, row
90ce3da70b43 Initial load duke parents: diff changeset	3597	// get # of bytes to alignment
90ce3da70b43 Initial load duke parents: diff changeset	3598	mov ecx, edi
90ce3da70b43 Initial load duke parents: diff changeset	3599	xor ebx, ebx
90ce3da70b43 Initial load duke parents: diff changeset	3600	add ecx, 0x7
90ce3da70b43 Initial load duke parents: diff changeset	3601	xor eax, eax
90ce3da70b43 Initial load duke parents: diff changeset	3602	and ecx, 0xfffffff8
90ce3da70b43 Initial load duke parents: diff changeset	3603	mov esi, prev_row
90ce3da70b43 Initial load duke parents: diff changeset	3604	sub ecx, edi
90ce3da70b43 Initial load duke parents: diff changeset	3605	jz dupgo
90ce3da70b43 Initial load duke parents: diff changeset	3606	// fix alignment
90ce3da70b43 Initial load duke parents: diff changeset	3607	duplp1:
90ce3da70b43 Initial load duke parents: diff changeset	3608	mov al, [edi+ebx]
90ce3da70b43 Initial load duke parents: diff changeset	3609	add al, [esi+ebx]
90ce3da70b43 Initial load duke parents: diff changeset	3610	inc ebx
90ce3da70b43 Initial load duke parents: diff changeset	3611	cmp ebx, ecx
90ce3da70b43 Initial load duke parents: diff changeset	3612	mov [edi + ebx-1], al // mov does not affect flags; -1 to offset inc ebx
90ce3da70b43 Initial load duke parents: diff changeset	3613	jb duplp1
90ce3da70b43 Initial load duke parents: diff changeset	3614	dupgo:
90ce3da70b43 Initial load duke parents: diff changeset	3615	mov ecx, len
90ce3da70b43 Initial load duke parents: diff changeset	3616	mov edx, ecx
90ce3da70b43 Initial load duke parents: diff changeset	3617	sub edx, ebx // subtract alignment fix
90ce3da70b43 Initial load duke parents: diff changeset	3618	and edx, 0x0000003f // calc bytes over mult of 64
90ce3da70b43 Initial load duke parents: diff changeset	3619	sub ecx, edx // drop over bytes from length
90ce3da70b43 Initial load duke parents: diff changeset	3620	// Unrolled loop - use all MMX registers and interleave to reduce
90ce3da70b43 Initial load duke parents: diff changeset	3621	// number of branch instructions (loops) and reduce partial stalls
90ce3da70b43 Initial load duke parents: diff changeset	3622	duploop:
90ce3da70b43 Initial load duke parents: diff changeset	3623	movq mm1, [esi+ebx]
90ce3da70b43 Initial load duke parents: diff changeset	3624	movq mm0, [edi+ebx]
90ce3da70b43 Initial load duke parents: diff changeset	3625	movq mm3, [esi+ebx+8]
90ce3da70b43 Initial load duke parents: diff changeset	3626	paddb mm0, mm1
90ce3da70b43 Initial load duke parents: diff changeset	3627	movq mm2, [edi+ebx+8]
90ce3da70b43 Initial load duke parents: diff changeset	3628	movq [edi+ebx], mm0
90ce3da70b43 Initial load duke parents: diff changeset	3629	paddb mm2, mm3
90ce3da70b43 Initial load duke parents: diff changeset	3630	movq mm5, [esi+ebx+16]
90ce3da70b43 Initial load duke parents: diff changeset	3631	movq [edi+ebx+8], mm2
90ce3da70b43 Initial load duke parents: diff changeset	3632	movq mm4, [edi+ebx+16]
90ce3da70b43 Initial load duke parents: diff changeset	3633	movq mm7, [esi+ebx+24]
90ce3da70b43 Initial load duke parents: diff changeset	3634	paddb mm4, mm5
90ce3da70b43 Initial load duke parents: diff changeset	3635	movq mm6, [edi+ebx+24]
90ce3da70b43 Initial load duke parents: diff changeset	3636	movq [edi+ebx+16], mm4
90ce3da70b43 Initial load duke parents: diff changeset	3637	paddb mm6, mm7
90ce3da70b43 Initial load duke parents: diff changeset	3638	movq mm1, [esi+ebx+32]
90ce3da70b43 Initial load duke parents: diff changeset	3639	movq [edi+ebx+24], mm6
90ce3da70b43 Initial load duke parents: diff changeset	3640	movq mm0, [edi+ebx+32]
90ce3da70b43 Initial load duke parents: diff changeset	3641	movq mm3, [esi+ebx+40]
90ce3da70b43 Initial load duke parents: diff changeset	3642	paddb mm0, mm1
90ce3da70b43 Initial load duke parents: diff changeset	3643	movq mm2, [edi+ebx+40]
90ce3da70b43 Initial load duke parents: diff changeset	3644	movq [edi+ebx+32], mm0
90ce3da70b43 Initial load duke parents: diff changeset	3645	paddb mm2, mm3
90ce3da70b43 Initial load duke parents: diff changeset	3646	movq mm5, [esi+ebx+48]
90ce3da70b43 Initial load duke parents: diff changeset	3647	movq [edi+ebx+40], mm2
90ce3da70b43 Initial load duke parents: diff changeset	3648	movq mm4, [edi+ebx+48]
90ce3da70b43 Initial load duke parents: diff changeset	3649	movq mm7, [esi+ebx+56]
90ce3da70b43 Initial load duke parents: diff changeset	3650	paddb mm4, mm5
90ce3da70b43 Initial load duke parents: diff changeset	3651	movq mm6, [edi+ebx+56]
90ce3da70b43 Initial load duke parents: diff changeset	3652	movq [edi+ebx+48], mm4
90ce3da70b43 Initial load duke parents: diff changeset	3653	add ebx, 64
90ce3da70b43 Initial load duke parents: diff changeset	3654	paddb mm6, mm7
90ce3da70b43 Initial load duke parents: diff changeset	3655	cmp ebx, ecx
90ce3da70b43 Initial load duke parents: diff changeset	3656	movq [edi+ebx-8], mm6 // (+56)movq does not affect flags;
90ce3da70b43 Initial load duke parents: diff changeset	3657	// -8 to offset add ebx
90ce3da70b43 Initial load duke parents: diff changeset	3658	jb duploop
90ce3da70b43 Initial load duke parents: diff changeset	3659
90ce3da70b43 Initial load duke parents: diff changeset	3660	cmp edx, 0 // Test for bytes over mult of 64
90ce3da70b43 Initial load duke parents: diff changeset	3661	jz dupend
90ce3da70b43 Initial load duke parents: diff changeset	3662
90ce3da70b43 Initial load duke parents: diff changeset	3663
90ce3da70b43 Initial load duke parents: diff changeset	3664	// 2 lines added by lcreeve at netins.net
90ce3da70b43 Initial load duke parents: diff changeset	3665	// (mail 11 Jul 98 in png-implement list)
90ce3da70b43 Initial load duke parents: diff changeset	3666	cmp edx, 8 //test for less than 8 bytes
90ce3da70b43 Initial load duke parents: diff changeset	3667	jb duplt8
90ce3da70b43 Initial load duke parents: diff changeset	3668
90ce3da70b43 Initial load duke parents: diff changeset	3669
90ce3da70b43 Initial load duke parents: diff changeset	3670	add ecx, edx
90ce3da70b43 Initial load duke parents: diff changeset	3671	and edx, 0x00000007 // calc bytes over mult of 8
90ce3da70b43 Initial load duke parents: diff changeset	3672	sub ecx, edx // drop over bytes from length
90ce3da70b43 Initial load duke parents: diff changeset	3673	jz duplt8
90ce3da70b43 Initial load duke parents: diff changeset	3674	// Loop using MMX registers mm0 & mm1 to update 8 bytes simultaneously
90ce3da70b43 Initial load duke parents: diff changeset	3675	duplpA:
90ce3da70b43 Initial load duke parents: diff changeset	3676	movq mm1, [esi+ebx]
90ce3da70b43 Initial load duke parents: diff changeset	3677	movq mm0, [edi+ebx]
90ce3da70b43 Initial load duke parents: diff changeset	3678	add ebx, 8
90ce3da70b43 Initial load duke parents: diff changeset	3679	paddb mm0, mm1
90ce3da70b43 Initial load duke parents: diff changeset	3680	cmp ebx, ecx
90ce3da70b43 Initial load duke parents: diff changeset	3681	movq [edi+ebx-8], mm0 // movq does not affect flags; -8 to offset add ebx
90ce3da70b43 Initial load duke parents: diff changeset	3682	jb duplpA
90ce3da70b43 Initial load duke parents: diff changeset	3683	cmp edx, 0 // Test for bytes over mult of 8
90ce3da70b43 Initial load duke parents: diff changeset	3684	jz dupend
90ce3da70b43 Initial load duke parents: diff changeset	3685	duplt8:
90ce3da70b43 Initial load duke parents: diff changeset	3686	xor eax, eax
90ce3da70b43 Initial load duke parents: diff changeset	3687	add ecx, edx // move over byte count into counter
90ce3da70b43 Initial load duke parents: diff changeset	3688	// Loop using x86 registers to update remaining bytes
90ce3da70b43 Initial load duke parents: diff changeset	3689	duplp2:
90ce3da70b43 Initial load duke parents: diff changeset	3690	mov al, [edi + ebx]
90ce3da70b43 Initial load duke parents: diff changeset	3691	add al, [esi + ebx]
90ce3da70b43 Initial load duke parents: diff changeset	3692	inc ebx
90ce3da70b43 Initial load duke parents: diff changeset	3693	cmp ebx, ecx
90ce3da70b43 Initial load duke parents: diff changeset	3694	mov [edi + ebx-1], al // mov does not affect flags; -1 to offset inc ebx
90ce3da70b43 Initial load duke parents: diff changeset	3695	jb duplp2
90ce3da70b43 Initial load duke parents: diff changeset	3696	dupend:
90ce3da70b43 Initial load duke parents: diff changeset	3697	// Conversion of filtered row completed
90ce3da70b43 Initial load duke parents: diff changeset	3698	emms // End MMX instructions; prep for possible FP instrs.
90ce3da70b43 Initial load duke parents: diff changeset	3699	} // end _asm block
90ce3da70b43 Initial load duke parents: diff changeset	3700	}
90ce3da70b43 Initial load duke parents: diff changeset	3701
90ce3da70b43 Initial load duke parents: diff changeset	3702
90ce3da70b43 Initial load duke parents: diff changeset	3703	// Optimized png_read_filter_row routines
90ce3da70b43 Initial load duke parents: diff changeset	3704	void /* PRIVATE */
90ce3da70b43 Initial load duke parents: diff changeset	3705	png_read_filter_row(png_structp png_ptr, png_row_infop row_info, png_bytep
90ce3da70b43 Initial load duke parents: diff changeset	3706	row, png_bytep prev_row, int filter)
90ce3da70b43 Initial load duke parents: diff changeset	3707	{
90ce3da70b43 Initial load duke parents: diff changeset	3708	#ifdef PNG_DEBUG
90ce3da70b43 Initial load duke parents: diff changeset	3709	char filnm[10];
90ce3da70b43 Initial load duke parents: diff changeset	3710	#endif
90ce3da70b43 Initial load duke parents: diff changeset	3711
90ce3da70b43 Initial load duke parents: diff changeset	3712	if (mmx_supported == 2) {
90ce3da70b43 Initial load duke parents: diff changeset	3713	#if !defined(PNG_1_0_X)
90ce3da70b43 Initial load duke parents: diff changeset	3714	/* this should have happened in png_init_mmx_flags() already */
90ce3da70b43 Initial load duke parents: diff changeset	3715	png_warning(png_ptr, "asm_flags may not have been initialized");
90ce3da70b43 Initial load duke parents: diff changeset	3716	#endif
90ce3da70b43 Initial load duke parents: diff changeset	3717	png_mmx_support();
90ce3da70b43 Initial load duke parents: diff changeset	3718	}
90ce3da70b43 Initial load duke parents: diff changeset	3719
90ce3da70b43 Initial load duke parents: diff changeset	3720	#ifdef PNG_DEBUG
90ce3da70b43 Initial load duke parents: diff changeset	3721	png_debug(1, "in png_read_filter_row\n");
90ce3da70b43 Initial load duke parents: diff changeset	3722	switch (filter)
90ce3da70b43 Initial load duke parents: diff changeset	3723	{
90ce3da70b43 Initial load duke parents: diff changeset	3724	case 0: sprintf(filnm, "none");
90ce3da70b43 Initial load duke parents: diff changeset	3725	break;
90ce3da70b43 Initial load duke parents: diff changeset	3726	#if !defined(PNG_1_0_X)
90ce3da70b43 Initial load duke parents: diff changeset	3727	case 1: sprintf(filnm, "sub-%s",
90ce3da70b43 Initial load duke parents: diff changeset	3728	(png_ptr->asm_flags & PNG_ASM_FLAG_MMX_READ_FILTER_SUB)? "MMX" : "x86");
90ce3da70b43 Initial load duke parents: diff changeset	3729	break;
90ce3da70b43 Initial load duke parents: diff changeset	3730	case 2: sprintf(filnm, "up-%s",
90ce3da70b43 Initial load duke parents: diff changeset	3731	(png_ptr->asm_flags & PNG_ASM_FLAG_MMX_READ_FILTER_UP)? "MMX" : "x86");
90ce3da70b43 Initial load duke parents: diff changeset	3732	break;
90ce3da70b43 Initial load duke parents: diff changeset	3733	case 3: sprintf(filnm, "avg-%s",
90ce3da70b43 Initial load duke parents: diff changeset	3734	(png_ptr->asm_flags & PNG_ASM_FLAG_MMX_READ_FILTER_AVG)? "MMX" : "x86");
90ce3da70b43 Initial load duke parents: diff changeset	3735	break;
90ce3da70b43 Initial load duke parents: diff changeset	3736	case 4: sprintf(filnm, "Paeth-%s",
90ce3da70b43 Initial load duke parents: diff changeset	3737	(png_ptr->asm_flags & PNG_ASM_FLAG_MMX_READ_FILTER_PAETH)? "MMX":"x86");
90ce3da70b43 Initial load duke parents: diff changeset	3738	break;
90ce3da70b43 Initial load duke parents: diff changeset	3739	#else
90ce3da70b43 Initial load duke parents: diff changeset	3740	case 1: sprintf(filnm, "sub");
90ce3da70b43 Initial load duke parents: diff changeset	3741	break;
90ce3da70b43 Initial load duke parents: diff changeset	3742	case 2: sprintf(filnm, "up");
90ce3da70b43 Initial load duke parents: diff changeset	3743	break;
90ce3da70b43 Initial load duke parents: diff changeset	3744	case 3: sprintf(filnm, "avg");
90ce3da70b43 Initial load duke parents: diff changeset	3745	break;
90ce3da70b43 Initial load duke parents: diff changeset	3746	case 4: sprintf(filnm, "Paeth");
90ce3da70b43 Initial load duke parents: diff changeset	3747	break;
90ce3da70b43 Initial load duke parents: diff changeset	3748	#endif
90ce3da70b43 Initial load duke parents: diff changeset	3749	default: sprintf(filnm, "unknw");
90ce3da70b43 Initial load duke parents: diff changeset	3750	break;
90ce3da70b43 Initial load duke parents: diff changeset	3751	}
90ce3da70b43 Initial load duke parents: diff changeset	3752	png_debug2(0,"row=%5d, %s, ", png_ptr->row_number, filnm);
90ce3da70b43 Initial load duke parents: diff changeset	3753	png_debug2(0, "pd=%2d, b=%d, ", (int)row_info->pixel_depth,
90ce3da70b43 Initial load duke parents: diff changeset	3754	(int)((row_info->pixel_depth + 7) >> 3));
90ce3da70b43 Initial load duke parents: diff changeset	3755	png_debug1(0,"len=%8d, ", row_info->rowbytes);
90ce3da70b43 Initial load duke parents: diff changeset	3756	#endif /* PNG_DEBUG */
90ce3da70b43 Initial load duke parents: diff changeset	3757
90ce3da70b43 Initial load duke parents: diff changeset	3758	switch (filter)
90ce3da70b43 Initial load duke parents: diff changeset	3759	{
90ce3da70b43 Initial load duke parents: diff changeset	3760	case PNG_FILTER_VALUE_NONE:
90ce3da70b43 Initial load duke parents: diff changeset	3761	break;
90ce3da70b43 Initial load duke parents: diff changeset	3762
90ce3da70b43 Initial load duke parents: diff changeset	3763	case PNG_FILTER_VALUE_SUB:
90ce3da70b43 Initial load duke parents: diff changeset	3764	{
90ce3da70b43 Initial load duke parents: diff changeset	3765	#if !defined(PNG_1_0_X)
90ce3da70b43 Initial load duke parents: diff changeset	3766	if ((png_ptr->asm_flags & PNG_ASM_FLAG_MMX_READ_FILTER_SUB) &&
90ce3da70b43 Initial load duke parents: diff changeset	3767	(row_info->pixel_depth >= png_ptr->mmx_bitdepth_threshold) &&
90ce3da70b43 Initial load duke parents: diff changeset	3768	(row_info->rowbytes >= png_ptr->mmx_rowbytes_threshold))
90ce3da70b43 Initial load duke parents: diff changeset	3769	#else
90ce3da70b43 Initial load duke parents: diff changeset	3770	if (mmx_supported)
90ce3da70b43 Initial load duke parents: diff changeset	3771	#endif
90ce3da70b43 Initial load duke parents: diff changeset	3772	{
90ce3da70b43 Initial load duke parents: diff changeset	3773	png_read_filter_row_mmx_sub(row_info, row);
90ce3da70b43 Initial load duke parents: diff changeset	3774	}
90ce3da70b43 Initial load duke parents: diff changeset	3775	else
90ce3da70b43 Initial load duke parents: diff changeset	3776	{
90ce3da70b43 Initial load duke parents: diff changeset	3777	png_uint_32 i;
90ce3da70b43 Initial load duke parents: diff changeset	3778	png_uint_32 istop = row_info->rowbytes;
90ce3da70b43 Initial load duke parents: diff changeset	3779	png_uint_32 bpp = (row_info->pixel_depth + 7) >> 3;
90ce3da70b43 Initial load duke parents: diff changeset	3780	png_bytep rp = row + bpp;
90ce3da70b43 Initial load duke parents: diff changeset	3781	png_bytep lp = row;
90ce3da70b43 Initial load duke parents: diff changeset	3782
90ce3da70b43 Initial load duke parents: diff changeset	3783	for (i = bpp; i < istop; i++)
90ce3da70b43 Initial load duke parents: diff changeset	3784	{
90ce3da70b43 Initial load duke parents: diff changeset	3785	rp = (png_byte)(((int)(rp) + (int)(*lp++)) & 0xff);
90ce3da70b43 Initial load duke parents: diff changeset	3786	rp++;
90ce3da70b43 Initial load duke parents: diff changeset	3787	}
90ce3da70b43 Initial load duke parents: diff changeset	3788	}
90ce3da70b43 Initial load duke parents: diff changeset	3789	break;
90ce3da70b43 Initial load duke parents: diff changeset	3790	}
90ce3da70b43 Initial load duke parents: diff changeset	3791
90ce3da70b43 Initial load duke parents: diff changeset	3792	case PNG_FILTER_VALUE_UP:
90ce3da70b43 Initial load duke parents: diff changeset	3793	{
90ce3da70b43 Initial load duke parents: diff changeset	3794	#if !defined(PNG_1_0_X)
90ce3da70b43 Initial load duke parents: diff changeset	3795	if ((png_ptr->asm_flags & PNG_ASM_FLAG_MMX_READ_FILTER_UP) &&
90ce3da70b43 Initial load duke parents: diff changeset	3796	(row_info->pixel_depth >= png_ptr->mmx_bitdepth_threshold) &&
90ce3da70b43 Initial load duke parents: diff changeset	3797	(row_info->rowbytes >= png_ptr->mmx_rowbytes_threshold))
90ce3da70b43 Initial load duke parents: diff changeset	3798	#else
90ce3da70b43 Initial load duke parents: diff changeset	3799	if (mmx_supported)
90ce3da70b43 Initial load duke parents: diff changeset	3800	#endif
90ce3da70b43 Initial load duke parents: diff changeset	3801	{
90ce3da70b43 Initial load duke parents: diff changeset	3802	png_read_filter_row_mmx_up(row_info, row, prev_row);
90ce3da70b43 Initial load duke parents: diff changeset	3803	}
90ce3da70b43 Initial load duke parents: diff changeset	3804	else
90ce3da70b43 Initial load duke parents: diff changeset	3805	{
90ce3da70b43 Initial load duke parents: diff changeset	3806	png_uint_32 i;
90ce3da70b43 Initial load duke parents: diff changeset	3807	png_uint_32 istop = row_info->rowbytes;
90ce3da70b43 Initial load duke parents: diff changeset	3808	png_bytep rp = row;
90ce3da70b43 Initial load duke parents: diff changeset	3809	png_bytep pp = prev_row;
90ce3da70b43 Initial load duke parents: diff changeset	3810
90ce3da70b43 Initial load duke parents: diff changeset	3811	for (i = 0; i < istop; ++i)
90ce3da70b43 Initial load duke parents: diff changeset	3812	{
90ce3da70b43 Initial load duke parents: diff changeset	3813	rp = (png_byte)(((int)(rp) + (int)(*pp++)) & 0xff);
90ce3da70b43 Initial load duke parents: diff changeset	3814	rp++;
90ce3da70b43 Initial load duke parents: diff changeset	3815	}
90ce3da70b43 Initial load duke parents: diff changeset	3816	}
90ce3da70b43 Initial load duke parents: diff changeset	3817	break;
90ce3da70b43 Initial load duke parents: diff changeset	3818	}
90ce3da70b43 Initial load duke parents: diff changeset	3819
90ce3da70b43 Initial load duke parents: diff changeset	3820	case PNG_FILTER_VALUE_AVG:
90ce3da70b43 Initial load duke parents: diff changeset	3821	{
90ce3da70b43 Initial load duke parents: diff changeset	3822	#if !defined(PNG_1_0_X)
90ce3da70b43 Initial load duke parents: diff changeset	3823	if ((png_ptr->asm_flags & PNG_ASM_FLAG_MMX_READ_FILTER_AVG) &&
90ce3da70b43 Initial load duke parents: diff changeset	3824	(row_info->pixel_depth >= png_ptr->mmx_bitdepth_threshold) &&
90ce3da70b43 Initial load duke parents: diff changeset	3825	(row_info->rowbytes >= png_ptr->mmx_rowbytes_threshold))
90ce3da70b43 Initial load duke parents: diff changeset	3826	#else
90ce3da70b43 Initial load duke parents: diff changeset	3827	if (mmx_supported)
90ce3da70b43 Initial load duke parents: diff changeset	3828	#endif
90ce3da70b43 Initial load duke parents: diff changeset	3829	{
90ce3da70b43 Initial load duke parents: diff changeset	3830	png_read_filter_row_mmx_avg(row_info, row, prev_row);
90ce3da70b43 Initial load duke parents: diff changeset	3831	}
90ce3da70b43 Initial load duke parents: diff changeset	3832	else
90ce3da70b43 Initial load duke parents: diff changeset	3833	{
90ce3da70b43 Initial load duke parents: diff changeset	3834	png_uint_32 i;
90ce3da70b43 Initial load duke parents: diff changeset	3835	png_bytep rp = row;
90ce3da70b43 Initial load duke parents: diff changeset	3836	png_bytep pp = prev_row;
90ce3da70b43 Initial load duke parents: diff changeset	3837	png_bytep lp = row;
90ce3da70b43 Initial load duke parents: diff changeset	3838	png_uint_32 bpp = (row_info->pixel_depth + 7) >> 3;
90ce3da70b43 Initial load duke parents: diff changeset	3839	png_uint_32 istop = row_info->rowbytes - bpp;
90ce3da70b43 Initial load duke parents: diff changeset	3840
90ce3da70b43 Initial load duke parents: diff changeset	3841	for (i = 0; i < bpp; i++)
90ce3da70b43 Initial load duke parents: diff changeset	3842	{
90ce3da70b43 Initial load duke parents: diff changeset	3843	rp = (png_byte)(((int)(rp) +
90ce3da70b43 Initial load duke parents: diff changeset	3844	((int)(*pp++) >> 1)) & 0xff);
90ce3da70b43 Initial load duke parents: diff changeset	3845	rp++;
90ce3da70b43 Initial load duke parents: diff changeset	3846	}
90ce3da70b43 Initial load duke parents: diff changeset	3847
90ce3da70b43 Initial load duke parents: diff changeset	3848	for (i = 0; i < istop; i++)
90ce3da70b43 Initial load duke parents: diff changeset	3849	{
90ce3da70b43 Initial load duke parents: diff changeset	3850	rp = (png_byte)(((int)(rp) +
90ce3da70b43 Initial load duke parents: diff changeset	3851	((int)(pp++ + lp++) >> 1)) & 0xff);
90ce3da70b43 Initial load duke parents: diff changeset	3852	rp++;
90ce3da70b43 Initial load duke parents: diff changeset	3853	}
90ce3da70b43 Initial load duke parents: diff changeset	3854	}
90ce3da70b43 Initial load duke parents: diff changeset	3855	break;
90ce3da70b43 Initial load duke parents: diff changeset	3856	}
90ce3da70b43 Initial load duke parents: diff changeset	3857
90ce3da70b43 Initial load duke parents: diff changeset	3858	case PNG_FILTER_VALUE_PAETH:
90ce3da70b43 Initial load duke parents: diff changeset	3859	{
90ce3da70b43 Initial load duke parents: diff changeset	3860	#if !defined(PNG_1_0_X)
90ce3da70b43 Initial load duke parents: diff changeset	3861	if ((png_ptr->asm_flags & PNG_ASM_FLAG_MMX_READ_FILTER_PAETH) &&
90ce3da70b43 Initial load duke parents: diff changeset	3862	(row_info->pixel_depth >= png_ptr->mmx_bitdepth_threshold) &&
90ce3da70b43 Initial load duke parents: diff changeset	3863	(row_info->rowbytes >= png_ptr->mmx_rowbytes_threshold))
90ce3da70b43 Initial load duke parents: diff changeset	3864	#else
90ce3da70b43 Initial load duke parents: diff changeset	3865	if (mmx_supported)
90ce3da70b43 Initial load duke parents: diff changeset	3866	#endif
90ce3da70b43 Initial load duke parents: diff changeset	3867	{
90ce3da70b43 Initial load duke parents: diff changeset	3868	png_read_filter_row_mmx_paeth(row_info, row, prev_row);
90ce3da70b43 Initial load duke parents: diff changeset	3869	}
90ce3da70b43 Initial load duke parents: diff changeset	3870	else
90ce3da70b43 Initial load duke parents: diff changeset	3871	{
90ce3da70b43 Initial load duke parents: diff changeset	3872	png_uint_32 i;
90ce3da70b43 Initial load duke parents: diff changeset	3873	png_bytep rp = row;
90ce3da70b43 Initial load duke parents: diff changeset	3874	png_bytep pp = prev_row;
90ce3da70b43 Initial load duke parents: diff changeset	3875	png_bytep lp = row;
90ce3da70b43 Initial load duke parents: diff changeset	3876	png_bytep cp = prev_row;
90ce3da70b43 Initial load duke parents: diff changeset	3877	png_uint_32 bpp = (row_info->pixel_depth + 7) >> 3;
90ce3da70b43 Initial load duke parents: diff changeset	3878	png_uint_32 istop=row_info->rowbytes - bpp;
90ce3da70b43 Initial load duke parents: diff changeset	3879
90ce3da70b43 Initial load duke parents: diff changeset	3880	for (i = 0; i < bpp; i++)
90ce3da70b43 Initial load duke parents: diff changeset	3881	{
90ce3da70b43 Initial load duke parents: diff changeset	3882	rp = (png_byte)(((int)(rp) + (int)(*pp++)) & 0xff);
90ce3da70b43 Initial load duke parents: diff changeset	3883	rp++;
90ce3da70b43 Initial load duke parents: diff changeset	3884	}
90ce3da70b43 Initial load duke parents: diff changeset	3885
90ce3da70b43 Initial load duke parents: diff changeset	3886	for (i = 0; i < istop; i++) // use leftover rp,pp
90ce3da70b43 Initial load duke parents: diff changeset	3887	{
90ce3da70b43 Initial load duke parents: diff changeset	3888	int a, b, c, pa, pb, pc, p;
90ce3da70b43 Initial load duke parents: diff changeset	3889
90ce3da70b43 Initial load duke parents: diff changeset	3890	a = *lp++;
90ce3da70b43 Initial load duke parents: diff changeset	3891	b = *pp++;
90ce3da70b43 Initial load duke parents: diff changeset	3892	c = *cp++;
90ce3da70b43 Initial load duke parents: diff changeset	3893
90ce3da70b43 Initial load duke parents: diff changeset	3894	p = b - c;
90ce3da70b43 Initial load duke parents: diff changeset	3895	pc = a - c;
90ce3da70b43 Initial load duke parents: diff changeset	3896
90ce3da70b43 Initial load duke parents: diff changeset	3897	#ifdef PNG_USE_ABS
90ce3da70b43 Initial load duke parents: diff changeset	3898	pa = abs(p);
90ce3da70b43 Initial load duke parents: diff changeset	3899	pb = abs(pc);
90ce3da70b43 Initial load duke parents: diff changeset	3900	pc = abs(p + pc);
90ce3da70b43 Initial load duke parents: diff changeset	3901	#else
90ce3da70b43 Initial load duke parents: diff changeset	3902	pa = p < 0 ? -p : p;
90ce3da70b43 Initial load duke parents: diff changeset	3903	pb = pc < 0 ? -pc : pc;
90ce3da70b43 Initial load duke parents: diff changeset	3904	pc = (p + pc) < 0 ? -(p + pc) : p + pc;
90ce3da70b43 Initial load duke parents: diff changeset	3905	#endif
90ce3da70b43 Initial load duke parents: diff changeset	3906
90ce3da70b43 Initial load duke parents: diff changeset	3907	/*
90ce3da70b43 Initial load duke parents: diff changeset	3908	if (pa <= pb && pa <= pc)
90ce3da70b43 Initial load duke parents: diff changeset	3909	p = a;
90ce3da70b43 Initial load duke parents: diff changeset	3910	else if (pb <= pc)
90ce3da70b43 Initial load duke parents: diff changeset	3911	p = b;
90ce3da70b43 Initial load duke parents: diff changeset	3912	else
90ce3da70b43 Initial load duke parents: diff changeset	3913	p = c;
90ce3da70b43 Initial load duke parents: diff changeset	3914	*/
90ce3da70b43 Initial load duke parents: diff changeset	3915
90ce3da70b43 Initial load duke parents: diff changeset	3916	p = (pa <= pb && pa <=pc) ? a : (pb <= pc) ? b : c;
90ce3da70b43 Initial load duke parents: diff changeset	3917
90ce3da70b43 Initial load duke parents: diff changeset	3918	rp = (png_byte)(((int)(rp) + p) & 0xff);
90ce3da70b43 Initial load duke parents: diff changeset	3919	rp++;
90ce3da70b43 Initial load duke parents: diff changeset	3920	}
90ce3da70b43 Initial load duke parents: diff changeset	3921	}
90ce3da70b43 Initial load duke parents: diff changeset	3922	break;
90ce3da70b43 Initial load duke parents: diff changeset	3923	}
90ce3da70b43 Initial load duke parents: diff changeset	3924
90ce3da70b43 Initial load duke parents: diff changeset	3925	default:
90ce3da70b43 Initial load duke parents: diff changeset	3926	png_warning(png_ptr, "Ignoring bad row filter type");
90ce3da70b43 Initial load duke parents: diff changeset	3927	*row=0;
90ce3da70b43 Initial load duke parents: diff changeset	3928	break;
90ce3da70b43 Initial load duke parents: diff changeset	3929	}
90ce3da70b43 Initial load duke parents: diff changeset	3930	}
90ce3da70b43 Initial load duke parents: diff changeset	3931
90ce3da70b43 Initial load duke parents: diff changeset	3932	#endif /* PNG_ASSEMBLER_CODE_SUPPORTED && PNG_USE_PNGVCRD */

author	duke
	Sat, 01 Dec 2007 00:00:00 +0000
changeset 2	90ce3da70b43
child 5506	202f599c92aa
permissions	-rw-r--r--