jdk/src/share/native/sun/awt/libpng/pngvcrd.c
author duke
Sat, 01 Dec 2007 00:00:00 +0000
changeset 2 90ce3da70b43
child 5506 202f599c92aa
permissions -rw-r--r--
Initial load
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
     1
/*
90ce3da70b43 Initial load
duke
parents:
diff changeset
     2
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
90ce3da70b43 Initial load
duke
parents:
diff changeset
     3
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
     4
 * This code is free software; you can redistribute it and/or modify it
90ce3da70b43 Initial load
duke
parents:
diff changeset
     5
 * under the terms of the GNU General Public License version 2 only, as
90ce3da70b43 Initial load
duke
parents:
diff changeset
     6
 * published by the Free Software Foundation.  Sun designates this
90ce3da70b43 Initial load
duke
parents:
diff changeset
     7
 * particular file as subject to the "Classpath" exception as provided
90ce3da70b43 Initial load
duke
parents:
diff changeset
     8
 * by Sun in the LICENSE file that accompanied this code.
90ce3da70b43 Initial load
duke
parents:
diff changeset
     9
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
    10
 * This code is distributed in the hope that it will be useful, but WITHOUT
90ce3da70b43 Initial load
duke
parents:
diff changeset
    11
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
90ce3da70b43 Initial load
duke
parents:
diff changeset
    12
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
90ce3da70b43 Initial load
duke
parents:
diff changeset
    13
 * version 2 for more details (a copy is included in the LICENSE file that
90ce3da70b43 Initial load
duke
parents:
diff changeset
    14
 * accompanied this code).
90ce3da70b43 Initial load
duke
parents:
diff changeset
    15
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
    16
 * You should have received a copy of the GNU General Public License version
90ce3da70b43 Initial load
duke
parents:
diff changeset
    17
 * 2 along with this work; if not, write to the Free Software Foundation,
90ce3da70b43 Initial load
duke
parents:
diff changeset
    18
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
90ce3da70b43 Initial load
duke
parents:
diff changeset
    19
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
    20
 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
90ce3da70b43 Initial load
duke
parents:
diff changeset
    21
 * CA 95054 USA or visit www.sun.com if you need additional information or
90ce3da70b43 Initial load
duke
parents:
diff changeset
    22
 * have any questions.
90ce3da70b43 Initial load
duke
parents:
diff changeset
    23
 */
90ce3da70b43 Initial load
duke
parents:
diff changeset
    24
90ce3da70b43 Initial load
duke
parents:
diff changeset
    25
/* pngvcrd.c - mixed C/assembler version of utilities to read a PNG file
90ce3da70b43 Initial load
duke
parents:
diff changeset
    26
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
    27
 * This file is available under and governed by the GNU General Public
90ce3da70b43 Initial load
duke
parents:
diff changeset
    28
 * License version 2 only, as published by the Free Software Foundation.
90ce3da70b43 Initial load
duke
parents:
diff changeset
    29
 * However, the following notice accompanied the original version of this
90ce3da70b43 Initial load
duke
parents:
diff changeset
    30
 * file and, per its terms, should not be removed:
90ce3da70b43 Initial load
duke
parents:
diff changeset
    31
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
    32
 * For Intel x86 CPU and Microsoft Visual C++ compiler
90ce3da70b43 Initial load
duke
parents:
diff changeset
    33
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
    34
 * Last changed in libpng 1.2.6 - August 15, 2004
90ce3da70b43 Initial load
duke
parents:
diff changeset
    35
 * For conditions of distribution and use, see copyright notice in png.h
90ce3da70b43 Initial load
duke
parents:
diff changeset
    36
 * Copyright (c) 1998-2004 Glenn Randers-Pehrson
90ce3da70b43 Initial load
duke
parents:
diff changeset
    37
 * Copyright (c) 1998, Intel Corporation
90ce3da70b43 Initial load
duke
parents:
diff changeset
    38
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
    39
 * Contributed by Nirav Chhatrapati, Intel Corporation, 1998
90ce3da70b43 Initial load
duke
parents:
diff changeset
    40
 * Interface to libpng contributed by Gilles Vollant, 1999
90ce3da70b43 Initial load
duke
parents:
diff changeset
    41
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
    42
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
    43
 * In png_do_read_interlace() in libpng versions 1.0.3a through 1.0.4d,
90ce3da70b43 Initial load
duke
parents:
diff changeset
    44
 * a sign error in the post-MMX cleanup code for each pixel_depth resulted
90ce3da70b43 Initial load
duke
parents:
diff changeset
    45
 * in bad pixels at the beginning of some rows of some images, and also
90ce3da70b43 Initial load
duke
parents:
diff changeset
    46
 * (due to out-of-range memory reads and writes) caused heap corruption
90ce3da70b43 Initial load
duke
parents:
diff changeset
    47
 * when compiled with MSVC 6.0.  The error was fixed in version 1.0.4e.
90ce3da70b43 Initial load
duke
parents:
diff changeset
    48
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
    49
 * [png_read_filter_row_mmx_avg() bpp == 2 bugfix, GRR 20000916]
90ce3da70b43 Initial load
duke
parents:
diff changeset
    50
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
    51
 * [runtime MMX configuration, GRR 20010102]
90ce3da70b43 Initial load
duke
parents:
diff changeset
    52
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
    53
 */
90ce3da70b43 Initial load
duke
parents:
diff changeset
    54
90ce3da70b43 Initial load
duke
parents:
diff changeset
    55
#define PNG_INTERNAL
90ce3da70b43 Initial load
duke
parents:
diff changeset
    56
#include "png.h"
90ce3da70b43 Initial load
duke
parents:
diff changeset
    57
90ce3da70b43 Initial load
duke
parents:
diff changeset
    58
#if defined(PNG_ASSEMBLER_CODE_SUPPORTED) && defined(PNG_USE_PNGVCRD)
90ce3da70b43 Initial load
duke
parents:
diff changeset
    59
90ce3da70b43 Initial load
duke
parents:
diff changeset
    60
static int mmx_supported=2;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    61
90ce3da70b43 Initial load
duke
parents:
diff changeset
    62
90ce3da70b43 Initial load
duke
parents:
diff changeset
    63
int PNGAPI
90ce3da70b43 Initial load
duke
parents:
diff changeset
    64
png_mmx_support(void)
90ce3da70b43 Initial load
duke
parents:
diff changeset
    65
{
90ce3da70b43 Initial load
duke
parents:
diff changeset
    66
  int mmx_supported_local = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    67
  _asm {
90ce3da70b43 Initial load
duke
parents:
diff changeset
    68
    push ebx          //CPUID will trash these
90ce3da70b43 Initial load
duke
parents:
diff changeset
    69
    push ecx
90ce3da70b43 Initial load
duke
parents:
diff changeset
    70
    push edx
90ce3da70b43 Initial load
duke
parents:
diff changeset
    71
90ce3da70b43 Initial load
duke
parents:
diff changeset
    72
    pushfd            //Save Eflag to stack
90ce3da70b43 Initial load
duke
parents:
diff changeset
    73
    pop eax           //Get Eflag from stack into eax
90ce3da70b43 Initial load
duke
parents:
diff changeset
    74
    mov ecx, eax      //Make another copy of Eflag in ecx
90ce3da70b43 Initial load
duke
parents:
diff changeset
    75
    xor eax, 0x200000 //Toggle ID bit in Eflag [i.e. bit(21)]
90ce3da70b43 Initial load
duke
parents:
diff changeset
    76
    push eax          //Save modified Eflag back to stack
90ce3da70b43 Initial load
duke
parents:
diff changeset
    77
90ce3da70b43 Initial load
duke
parents:
diff changeset
    78
    popfd             //Restored modified value back to Eflag reg
90ce3da70b43 Initial load
duke
parents:
diff changeset
    79
    pushfd            //Save Eflag to stack
90ce3da70b43 Initial load
duke
parents:
diff changeset
    80
    pop eax           //Get Eflag from stack
90ce3da70b43 Initial load
duke
parents:
diff changeset
    81
    push ecx          // save original Eflag to stack
90ce3da70b43 Initial load
duke
parents:
diff changeset
    82
    popfd             // restore original Eflag
90ce3da70b43 Initial load
duke
parents:
diff changeset
    83
    xor eax, ecx      //Compare the new Eflag with the original Eflag
90ce3da70b43 Initial load
duke
parents:
diff changeset
    84
    jz NOT_SUPPORTED  //If the same, CPUID instruction is not supported,
90ce3da70b43 Initial load
duke
parents:
diff changeset
    85
                      //skip following instructions and jump to
90ce3da70b43 Initial load
duke
parents:
diff changeset
    86
                      //NOT_SUPPORTED label
90ce3da70b43 Initial load
duke
parents:
diff changeset
    87
90ce3da70b43 Initial load
duke
parents:
diff changeset
    88
    xor eax, eax      //Set eax to zero
90ce3da70b43 Initial load
duke
parents:
diff changeset
    89
90ce3da70b43 Initial load
duke
parents:
diff changeset
    90
    _asm _emit 0x0f   //CPUID instruction  (two bytes opcode)
90ce3da70b43 Initial load
duke
parents:
diff changeset
    91
    _asm _emit 0xa2
90ce3da70b43 Initial load
duke
parents:
diff changeset
    92
90ce3da70b43 Initial load
duke
parents:
diff changeset
    93
    cmp eax, 1        //make sure eax return non-zero value
90ce3da70b43 Initial load
duke
parents:
diff changeset
    94
    jl NOT_SUPPORTED  //If eax is zero, mmx not supported
90ce3da70b43 Initial load
duke
parents:
diff changeset
    95
90ce3da70b43 Initial load
duke
parents:
diff changeset
    96
    xor eax, eax      //set eax to zero
90ce3da70b43 Initial load
duke
parents:
diff changeset
    97
    inc eax           //Now increment eax to 1.  This instruction is
90ce3da70b43 Initial load
duke
parents:
diff changeset
    98
                      //faster than the instruction "mov eax, 1"
90ce3da70b43 Initial load
duke
parents:
diff changeset
    99
90ce3da70b43 Initial load
duke
parents:
diff changeset
   100
    _asm _emit 0x0f   //CPUID instruction
90ce3da70b43 Initial load
duke
parents:
diff changeset
   101
    _asm _emit 0xa2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   102
90ce3da70b43 Initial load
duke
parents:
diff changeset
   103
    and edx, 0x00800000  //mask out all bits but mmx bit(24)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   104
    cmp edx, 0        // 0 = mmx not supported
90ce3da70b43 Initial load
duke
parents:
diff changeset
   105
    jz  NOT_SUPPORTED // non-zero = Yes, mmx IS supported
90ce3da70b43 Initial load
duke
parents:
diff changeset
   106
90ce3da70b43 Initial load
duke
parents:
diff changeset
   107
    mov  mmx_supported_local, 1  //set return value to 1
90ce3da70b43 Initial load
duke
parents:
diff changeset
   108
90ce3da70b43 Initial load
duke
parents:
diff changeset
   109
NOT_SUPPORTED:
90ce3da70b43 Initial load
duke
parents:
diff changeset
   110
    mov  eax, mmx_supported_local  //move return value to eax
90ce3da70b43 Initial load
duke
parents:
diff changeset
   111
    pop edx          //CPUID trashed these
90ce3da70b43 Initial load
duke
parents:
diff changeset
   112
    pop ecx
90ce3da70b43 Initial load
duke
parents:
diff changeset
   113
    pop ebx
90ce3da70b43 Initial load
duke
parents:
diff changeset
   114
  }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   115
90ce3da70b43 Initial load
duke
parents:
diff changeset
   116
  //mmx_supported_local=0; // test code for force don't support MMX
90ce3da70b43 Initial load
duke
parents:
diff changeset
   117
  //printf("MMX : %u (1=MMX supported)\n",mmx_supported_local);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   118
90ce3da70b43 Initial load
duke
parents:
diff changeset
   119
  mmx_supported = mmx_supported_local;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   120
  return mmx_supported_local;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   121
}
90ce3da70b43 Initial load
duke
parents:
diff changeset
   122
90ce3da70b43 Initial load
duke
parents:
diff changeset
   123
/* Combines the row recently read in with the previous row.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   124
   This routine takes care of alpha and transparency if requested.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   125
   This routine also handles the two methods of progressive display
90ce3da70b43 Initial load
duke
parents:
diff changeset
   126
   of interlaced images, depending on the mask value.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   127
   The mask value describes which pixels are to be combined with
90ce3da70b43 Initial load
duke
parents:
diff changeset
   128
   the row.  The pattern always repeats every 8 pixels, so just 8
90ce3da70b43 Initial load
duke
parents:
diff changeset
   129
   bits are needed.  A one indicates the pixel is to be combined; a
90ce3da70b43 Initial load
duke
parents:
diff changeset
   130
   zero indicates the pixel is to be skipped.  This is in addition
90ce3da70b43 Initial load
duke
parents:
diff changeset
   131
   to any alpha or transparency value associated with the pixel.  If
90ce3da70b43 Initial load
duke
parents:
diff changeset
   132
   you want all pixels to be combined, pass 0xff (255) in mask.  */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   133
90ce3da70b43 Initial load
duke
parents:
diff changeset
   134
/* Use this routine for x86 platform - uses faster MMX routine if machine
90ce3da70b43 Initial load
duke
parents:
diff changeset
   135
   supports MMX */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   136
90ce3da70b43 Initial load
duke
parents:
diff changeset
   137
void /* PRIVATE */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   138
png_combine_row(png_structp png_ptr, png_bytep row, int mask)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   139
{
90ce3da70b43 Initial load
duke
parents:
diff changeset
   140
#ifdef PNG_USE_LOCAL_ARRAYS
90ce3da70b43 Initial load
duke
parents:
diff changeset
   141
   const int png_pass_inc[7] = {8, 8, 4, 4, 2, 2, 1};
90ce3da70b43 Initial load
duke
parents:
diff changeset
   142
#endif
90ce3da70b43 Initial load
duke
parents:
diff changeset
   143
90ce3da70b43 Initial load
duke
parents:
diff changeset
   144
   png_debug(1,"in png_combine_row_asm\n");
90ce3da70b43 Initial load
duke
parents:
diff changeset
   145
90ce3da70b43 Initial load
duke
parents:
diff changeset
   146
   if (mmx_supported == 2) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   147
#if !defined(PNG_1_0_X)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   148
       /* this should have happened in png_init_mmx_flags() already */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   149
       png_warning(png_ptr, "asm_flags may not have been initialized");
90ce3da70b43 Initial load
duke
parents:
diff changeset
   150
#endif
90ce3da70b43 Initial load
duke
parents:
diff changeset
   151
       png_mmx_support();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   152
   }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   153
90ce3da70b43 Initial load
duke
parents:
diff changeset
   154
   if (mask == 0xff)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   155
   {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   156
      png_memcpy(row, png_ptr->row_buf + 1,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   157
       (png_size_t)PNG_ROWBYTES(png_ptr->row_info.pixel_depth,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   158
       png_ptr->width));
90ce3da70b43 Initial load
duke
parents:
diff changeset
   159
   }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   160
   /* GRR:  add "else if (mask == 0)" case?
90ce3da70b43 Initial load
duke
parents:
diff changeset
   161
    *       or does png_combine_row() not even get called in that case? */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   162
   else
90ce3da70b43 Initial load
duke
parents:
diff changeset
   163
   {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   164
      switch (png_ptr->row_info.pixel_depth)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   165
      {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   166
         case 1:
90ce3da70b43 Initial load
duke
parents:
diff changeset
   167
         {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   168
            png_bytep sp;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   169
            png_bytep dp;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   170
            int s_inc, s_start, s_end;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   171
            int m;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   172
            int shift;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   173
            png_uint_32 i;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   174
90ce3da70b43 Initial load
duke
parents:
diff changeset
   175
            sp = png_ptr->row_buf + 1;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   176
            dp = row;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   177
            m = 0x80;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   178
#if defined(PNG_READ_PACKSWAP_SUPPORTED)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   179
            if (png_ptr->transformations & PNG_PACKSWAP)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   180
            {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   181
                s_start = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   182
                s_end = 7;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   183
                s_inc = 1;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   184
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   185
            else
90ce3da70b43 Initial load
duke
parents:
diff changeset
   186
#endif
90ce3da70b43 Initial load
duke
parents:
diff changeset
   187
            {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   188
                s_start = 7;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   189
                s_end = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   190
                s_inc = -1;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   191
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   192
90ce3da70b43 Initial load
duke
parents:
diff changeset
   193
            shift = s_start;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   194
90ce3da70b43 Initial load
duke
parents:
diff changeset
   195
            for (i = 0; i < png_ptr->width; i++)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   196
            {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   197
               if (m & mask)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   198
               {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   199
                  int value;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   200
90ce3da70b43 Initial load
duke
parents:
diff changeset
   201
                  value = (*sp >> shift) & 0x1;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   202
                  *dp &= (png_byte)((0x7f7f >> (7 - shift)) & 0xff);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   203
                  *dp |= (png_byte)(value << shift);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   204
               }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   205
90ce3da70b43 Initial load
duke
parents:
diff changeset
   206
               if (shift == s_end)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   207
               {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   208
                  shift = s_start;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   209
                  sp++;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   210
                  dp++;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   211
               }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   212
               else
90ce3da70b43 Initial load
duke
parents:
diff changeset
   213
                  shift += s_inc;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   214
90ce3da70b43 Initial load
duke
parents:
diff changeset
   215
               if (m == 1)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   216
                  m = 0x80;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   217
               else
90ce3da70b43 Initial load
duke
parents:
diff changeset
   218
                  m >>= 1;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   219
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   220
            break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   221
         }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   222
90ce3da70b43 Initial load
duke
parents:
diff changeset
   223
         case 2:
90ce3da70b43 Initial load
duke
parents:
diff changeset
   224
         {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   225
            png_bytep sp;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   226
            png_bytep dp;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   227
            int s_start, s_end, s_inc;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   228
            int m;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   229
            int shift;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   230
            png_uint_32 i;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   231
            int value;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   232
90ce3da70b43 Initial load
duke
parents:
diff changeset
   233
            sp = png_ptr->row_buf + 1;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   234
            dp = row;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   235
            m = 0x80;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   236
#if defined(PNG_READ_PACKSWAP_SUPPORTED)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   237
            if (png_ptr->transformations & PNG_PACKSWAP)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   238
            {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   239
               s_start = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   240
               s_end = 6;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   241
               s_inc = 2;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   242
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   243
            else
90ce3da70b43 Initial load
duke
parents:
diff changeset
   244
#endif
90ce3da70b43 Initial load
duke
parents:
diff changeset
   245
            {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   246
               s_start = 6;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   247
               s_end = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   248
               s_inc = -2;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   249
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   250
90ce3da70b43 Initial load
duke
parents:
diff changeset
   251
            shift = s_start;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   252
90ce3da70b43 Initial load
duke
parents:
diff changeset
   253
            for (i = 0; i < png_ptr->width; i++)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   254
            {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   255
               if (m & mask)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   256
               {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   257
                  value = (*sp >> shift) & 0x3;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   258
                  *dp &= (png_byte)((0x3f3f >> (6 - shift)) & 0xff);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   259
                  *dp |= (png_byte)(value << shift);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   260
               }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   261
90ce3da70b43 Initial load
duke
parents:
diff changeset
   262
               if (shift == s_end)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   263
               {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   264
                  shift = s_start;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   265
                  sp++;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   266
                  dp++;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   267
               }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   268
               else
90ce3da70b43 Initial load
duke
parents:
diff changeset
   269
                  shift += s_inc;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   270
               if (m == 1)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   271
                  m = 0x80;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   272
               else
90ce3da70b43 Initial load
duke
parents:
diff changeset
   273
                  m >>= 1;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   274
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   275
            break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   276
         }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   277
90ce3da70b43 Initial load
duke
parents:
diff changeset
   278
         case 4:
90ce3da70b43 Initial load
duke
parents:
diff changeset
   279
         {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   280
            png_bytep sp;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   281
            png_bytep dp;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   282
            int s_start, s_end, s_inc;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   283
            int m;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   284
            int shift;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   285
            png_uint_32 i;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   286
            int value;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   287
90ce3da70b43 Initial load
duke
parents:
diff changeset
   288
            sp = png_ptr->row_buf + 1;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   289
            dp = row;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   290
            m = 0x80;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   291
#if defined(PNG_READ_PACKSWAP_SUPPORTED)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   292
            if (png_ptr->transformations & PNG_PACKSWAP)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   293
            {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   294
               s_start = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   295
               s_end = 4;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   296
               s_inc = 4;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   297
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   298
            else
90ce3da70b43 Initial load
duke
parents:
diff changeset
   299
#endif
90ce3da70b43 Initial load
duke
parents:
diff changeset
   300
            {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   301
               s_start = 4;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   302
               s_end = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   303
               s_inc = -4;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   304
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   305
            shift = s_start;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   306
90ce3da70b43 Initial load
duke
parents:
diff changeset
   307
            for (i = 0; i < png_ptr->width; i++)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   308
            {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   309
               if (m & mask)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   310
               {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   311
                  value = (*sp >> shift) & 0xf;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   312
                  *dp &= (png_byte)((0xf0f >> (4 - shift)) & 0xff);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   313
                  *dp |= (png_byte)(value << shift);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   314
               }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   315
90ce3da70b43 Initial load
duke
parents:
diff changeset
   316
               if (shift == s_end)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   317
               {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   318
                  shift = s_start;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   319
                  sp++;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   320
                  dp++;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   321
               }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   322
               else
90ce3da70b43 Initial load
duke
parents:
diff changeset
   323
                  shift += s_inc;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   324
               if (m == 1)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   325
                  m = 0x80;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   326
               else
90ce3da70b43 Initial load
duke
parents:
diff changeset
   327
                  m >>= 1;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   328
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   329
            break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   330
         }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   331
90ce3da70b43 Initial load
duke
parents:
diff changeset
   332
         case 8:
90ce3da70b43 Initial load
duke
parents:
diff changeset
   333
         {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   334
            png_bytep srcptr;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   335
            png_bytep dstptr;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   336
            png_uint_32 len;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   337
            int m;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   338
            int diff, unmask;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   339
90ce3da70b43 Initial load
duke
parents:
diff changeset
   340
            __int64 mask0=0x0102040810204080;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   341
90ce3da70b43 Initial load
duke
parents:
diff changeset
   342
#if !defined(PNG_1_0_X)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   343
            if ((png_ptr->asm_flags & PNG_ASM_FLAG_MMX_READ_COMBINE_ROW)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   344
                /* && mmx_supported */ )
90ce3da70b43 Initial load
duke
parents:
diff changeset
   345
#else
90ce3da70b43 Initial load
duke
parents:
diff changeset
   346
            if (mmx_supported)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   347
#endif
90ce3da70b43 Initial load
duke
parents:
diff changeset
   348
            {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   349
               srcptr = png_ptr->row_buf + 1;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   350
               dstptr = row;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   351
               m = 0x80;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   352
               unmask = ~mask;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   353
               len  = png_ptr->width &~7;  //reduce to multiple of 8
90ce3da70b43 Initial load
duke
parents:
diff changeset
   354
               diff = png_ptr->width & 7;  //amount lost
90ce3da70b43 Initial load
duke
parents:
diff changeset
   355
90ce3da70b43 Initial load
duke
parents:
diff changeset
   356
               _asm
90ce3da70b43 Initial load
duke
parents:
diff changeset
   357
               {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   358
                  movd       mm7, unmask   //load bit pattern
90ce3da70b43 Initial load
duke
parents:
diff changeset
   359
                  psubb      mm6,mm6       //zero mm6
90ce3da70b43 Initial load
duke
parents:
diff changeset
   360
                  punpcklbw  mm7,mm7
90ce3da70b43 Initial load
duke
parents:
diff changeset
   361
                  punpcklwd  mm7,mm7
90ce3da70b43 Initial load
duke
parents:
diff changeset
   362
                  punpckldq  mm7,mm7       //fill register with 8 masks
90ce3da70b43 Initial load
duke
parents:
diff changeset
   363
90ce3da70b43 Initial load
duke
parents:
diff changeset
   364
                  movq       mm0,mask0
90ce3da70b43 Initial load
duke
parents:
diff changeset
   365
90ce3da70b43 Initial load
duke
parents:
diff changeset
   366
                  pand       mm0,mm7       //nonzero if keep byte
90ce3da70b43 Initial load
duke
parents:
diff changeset
   367
                  pcmpeqb    mm0,mm6       //zeros->1s, v versa
90ce3da70b43 Initial load
duke
parents:
diff changeset
   368
90ce3da70b43 Initial load
duke
parents:
diff changeset
   369
                  mov        ecx,len       //load length of line (pixels)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   370
                  mov        esi,srcptr    //load source
90ce3da70b43 Initial load
duke
parents:
diff changeset
   371
                  mov        ebx,dstptr    //load dest
90ce3da70b43 Initial load
duke
parents:
diff changeset
   372
                  cmp        ecx,0         //lcr
90ce3da70b43 Initial load
duke
parents:
diff changeset
   373
                  je         mainloop8end
90ce3da70b43 Initial load
duke
parents:
diff changeset
   374
90ce3da70b43 Initial load
duke
parents:
diff changeset
   375
mainloop8:
90ce3da70b43 Initial load
duke
parents:
diff changeset
   376
                  movq       mm4,[esi]
90ce3da70b43 Initial load
duke
parents:
diff changeset
   377
                  pand       mm4,mm0
90ce3da70b43 Initial load
duke
parents:
diff changeset
   378
                  movq       mm6,mm0
90ce3da70b43 Initial load
duke
parents:
diff changeset
   379
                  pandn      mm6,[ebx]
90ce3da70b43 Initial load
duke
parents:
diff changeset
   380
                  por        mm4,mm6
90ce3da70b43 Initial load
duke
parents:
diff changeset
   381
                  movq       [ebx],mm4
90ce3da70b43 Initial load
duke
parents:
diff changeset
   382
90ce3da70b43 Initial load
duke
parents:
diff changeset
   383
                  add        esi,8         //inc by 8 bytes processed
90ce3da70b43 Initial load
duke
parents:
diff changeset
   384
                  add        ebx,8
90ce3da70b43 Initial load
duke
parents:
diff changeset
   385
                  sub        ecx,8         //dec by 8 pixels processed
90ce3da70b43 Initial load
duke
parents:
diff changeset
   386
90ce3da70b43 Initial load
duke
parents:
diff changeset
   387
                  ja         mainloop8
90ce3da70b43 Initial load
duke
parents:
diff changeset
   388
mainloop8end:
90ce3da70b43 Initial load
duke
parents:
diff changeset
   389
90ce3da70b43 Initial load
duke
parents:
diff changeset
   390
                  mov        ecx,diff
90ce3da70b43 Initial load
duke
parents:
diff changeset
   391
                  cmp        ecx,0
90ce3da70b43 Initial load
duke
parents:
diff changeset
   392
                  jz         end8
90ce3da70b43 Initial load
duke
parents:
diff changeset
   393
90ce3da70b43 Initial load
duke
parents:
diff changeset
   394
                  mov        edx,mask
90ce3da70b43 Initial load
duke
parents:
diff changeset
   395
                  sal        edx,24        //make low byte the high byte
90ce3da70b43 Initial load
duke
parents:
diff changeset
   396
90ce3da70b43 Initial load
duke
parents:
diff changeset
   397
secondloop8:
90ce3da70b43 Initial load
duke
parents:
diff changeset
   398
                  sal        edx,1         //move high bit to CF
90ce3da70b43 Initial load
duke
parents:
diff changeset
   399
                  jnc        skip8         //if CF = 0
90ce3da70b43 Initial load
duke
parents:
diff changeset
   400
                  mov        al,[esi]
90ce3da70b43 Initial load
duke
parents:
diff changeset
   401
                  mov        [ebx],al
90ce3da70b43 Initial load
duke
parents:
diff changeset
   402
skip8:
90ce3da70b43 Initial load
duke
parents:
diff changeset
   403
                  inc        esi
90ce3da70b43 Initial load
duke
parents:
diff changeset
   404
                  inc        ebx
90ce3da70b43 Initial load
duke
parents:
diff changeset
   405
90ce3da70b43 Initial load
duke
parents:
diff changeset
   406
                  dec        ecx
90ce3da70b43 Initial load
duke
parents:
diff changeset
   407
                  jnz        secondloop8
90ce3da70b43 Initial load
duke
parents:
diff changeset
   408
end8:
90ce3da70b43 Initial load
duke
parents:
diff changeset
   409
                  emms
90ce3da70b43 Initial load
duke
parents:
diff changeset
   410
               }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   411
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   412
            else /* mmx not supported - use modified C routine */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   413
            {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   414
               register unsigned int incr1, initial_val, final_val;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   415
               png_size_t pixel_bytes;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   416
               png_uint_32 i;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   417
               register int disp = png_pass_inc[png_ptr->pass];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   418
               int offset_table[7] = {0, 4, 0, 2, 0, 1, 0};
90ce3da70b43 Initial load
duke
parents:
diff changeset
   419
90ce3da70b43 Initial load
duke
parents:
diff changeset
   420
               pixel_bytes = (png_ptr->row_info.pixel_depth >> 3);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   421
               srcptr = png_ptr->row_buf + 1 + offset_table[png_ptr->pass]*
90ce3da70b43 Initial load
duke
parents:
diff changeset
   422
                  pixel_bytes;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   423
               dstptr = row + offset_table[png_ptr->pass]*pixel_bytes;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   424
               initial_val = offset_table[png_ptr->pass]*pixel_bytes;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   425
               final_val = png_ptr->width*pixel_bytes;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   426
               incr1 = (disp)*pixel_bytes;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   427
               for (i = initial_val; i < final_val; i += incr1)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   428
               {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   429
                  png_memcpy(dstptr, srcptr, pixel_bytes);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   430
                  srcptr += incr1;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   431
                  dstptr += incr1;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   432
               }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   433
            } /* end of else */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   434
90ce3da70b43 Initial load
duke
parents:
diff changeset
   435
            break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   436
         }       // end 8 bpp
90ce3da70b43 Initial load
duke
parents:
diff changeset
   437
90ce3da70b43 Initial load
duke
parents:
diff changeset
   438
         case 16:
90ce3da70b43 Initial load
duke
parents:
diff changeset
   439
         {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   440
            png_bytep srcptr;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   441
            png_bytep dstptr;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   442
            png_uint_32 len;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   443
            int unmask, diff;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   444
            __int64 mask1=0x0101020204040808,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   445
                    mask0=0x1010202040408080;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   446
90ce3da70b43 Initial load
duke
parents:
diff changeset
   447
#if !defined(PNG_1_0_X)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   448
            if ((png_ptr->asm_flags & PNG_ASM_FLAG_MMX_READ_COMBINE_ROW)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   449
                /* && mmx_supported */ )
90ce3da70b43 Initial load
duke
parents:
diff changeset
   450
#else
90ce3da70b43 Initial load
duke
parents:
diff changeset
   451
            if (mmx_supported)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   452
#endif
90ce3da70b43 Initial load
duke
parents:
diff changeset
   453
            {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   454
               srcptr = png_ptr->row_buf + 1;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   455
               dstptr = row;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   456
90ce3da70b43 Initial load
duke
parents:
diff changeset
   457
               unmask = ~mask;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   458
               len     = (png_ptr->width)&~7;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   459
               diff = (png_ptr->width)&7;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   460
               _asm
90ce3da70b43 Initial load
duke
parents:
diff changeset
   461
               {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   462
                  movd       mm7, unmask       //load bit pattern
90ce3da70b43 Initial load
duke
parents:
diff changeset
   463
                  psubb      mm6,mm6           //zero mm6
90ce3da70b43 Initial load
duke
parents:
diff changeset
   464
                  punpcklbw  mm7,mm7
90ce3da70b43 Initial load
duke
parents:
diff changeset
   465
                  punpcklwd  mm7,mm7
90ce3da70b43 Initial load
duke
parents:
diff changeset
   466
                  punpckldq  mm7,mm7           //fill register with 8 masks
90ce3da70b43 Initial load
duke
parents:
diff changeset
   467
90ce3da70b43 Initial load
duke
parents:
diff changeset
   468
                  movq       mm0,mask0
90ce3da70b43 Initial load
duke
parents:
diff changeset
   469
                  movq       mm1,mask1
90ce3da70b43 Initial load
duke
parents:
diff changeset
   470
90ce3da70b43 Initial load
duke
parents:
diff changeset
   471
                  pand       mm0,mm7
90ce3da70b43 Initial load
duke
parents:
diff changeset
   472
                  pand       mm1,mm7
90ce3da70b43 Initial load
duke
parents:
diff changeset
   473
90ce3da70b43 Initial load
duke
parents:
diff changeset
   474
                  pcmpeqb    mm0,mm6
90ce3da70b43 Initial load
duke
parents:
diff changeset
   475
                  pcmpeqb    mm1,mm6
90ce3da70b43 Initial load
duke
parents:
diff changeset
   476
90ce3da70b43 Initial load
duke
parents:
diff changeset
   477
                  mov        ecx,len           //load length of line
90ce3da70b43 Initial load
duke
parents:
diff changeset
   478
                  mov        esi,srcptr        //load source
90ce3da70b43 Initial load
duke
parents:
diff changeset
   479
                  mov        ebx,dstptr        //load dest
90ce3da70b43 Initial load
duke
parents:
diff changeset
   480
                  cmp        ecx,0             //lcr
90ce3da70b43 Initial load
duke
parents:
diff changeset
   481
                  jz         mainloop16end
90ce3da70b43 Initial load
duke
parents:
diff changeset
   482
90ce3da70b43 Initial load
duke
parents:
diff changeset
   483
mainloop16:
90ce3da70b43 Initial load
duke
parents:
diff changeset
   484
                  movq       mm4,[esi]
90ce3da70b43 Initial load
duke
parents:
diff changeset
   485
                  pand       mm4,mm0
90ce3da70b43 Initial load
duke
parents:
diff changeset
   486
                  movq       mm6,mm0
90ce3da70b43 Initial load
duke
parents:
diff changeset
   487
                  movq       mm7,[ebx]
90ce3da70b43 Initial load
duke
parents:
diff changeset
   488
                  pandn      mm6,mm7
90ce3da70b43 Initial load
duke
parents:
diff changeset
   489
                  por        mm4,mm6
90ce3da70b43 Initial load
duke
parents:
diff changeset
   490
                  movq       [ebx],mm4
90ce3da70b43 Initial load
duke
parents:
diff changeset
   491
90ce3da70b43 Initial load
duke
parents:
diff changeset
   492
                  movq       mm5,[esi+8]
90ce3da70b43 Initial load
duke
parents:
diff changeset
   493
                  pand       mm5,mm1
90ce3da70b43 Initial load
duke
parents:
diff changeset
   494
                  movq       mm7,mm1
90ce3da70b43 Initial load
duke
parents:
diff changeset
   495
                  movq       mm6,[ebx+8]
90ce3da70b43 Initial load
duke
parents:
diff changeset
   496
                  pandn      mm7,mm6
90ce3da70b43 Initial load
duke
parents:
diff changeset
   497
                  por        mm5,mm7
90ce3da70b43 Initial load
duke
parents:
diff changeset
   498
                  movq       [ebx+8],mm5
90ce3da70b43 Initial load
duke
parents:
diff changeset
   499
90ce3da70b43 Initial load
duke
parents:
diff changeset
   500
                  add        esi,16            //inc by 16 bytes processed
90ce3da70b43 Initial load
duke
parents:
diff changeset
   501
                  add        ebx,16
90ce3da70b43 Initial load
duke
parents:
diff changeset
   502
                  sub        ecx,8             //dec by 8 pixels processed
90ce3da70b43 Initial load
duke
parents:
diff changeset
   503
90ce3da70b43 Initial load
duke
parents:
diff changeset
   504
                  ja         mainloop16
90ce3da70b43 Initial load
duke
parents:
diff changeset
   505
90ce3da70b43 Initial load
duke
parents:
diff changeset
   506
mainloop16end:
90ce3da70b43 Initial load
duke
parents:
diff changeset
   507
                  mov        ecx,diff
90ce3da70b43 Initial load
duke
parents:
diff changeset
   508
                  cmp        ecx,0
90ce3da70b43 Initial load
duke
parents:
diff changeset
   509
                  jz         end16
90ce3da70b43 Initial load
duke
parents:
diff changeset
   510
90ce3da70b43 Initial load
duke
parents:
diff changeset
   511
                  mov        edx,mask
90ce3da70b43 Initial load
duke
parents:
diff changeset
   512
                  sal        edx,24            //make low byte the high byte
90ce3da70b43 Initial load
duke
parents:
diff changeset
   513
secondloop16:
90ce3da70b43 Initial load
duke
parents:
diff changeset
   514
                  sal        edx,1             //move high bit to CF
90ce3da70b43 Initial load
duke
parents:
diff changeset
   515
                  jnc        skip16            //if CF = 0
90ce3da70b43 Initial load
duke
parents:
diff changeset
   516
                  mov        ax,[esi]
90ce3da70b43 Initial load
duke
parents:
diff changeset
   517
                  mov        [ebx],ax
90ce3da70b43 Initial load
duke
parents:
diff changeset
   518
skip16:
90ce3da70b43 Initial load
duke
parents:
diff changeset
   519
                  add        esi,2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   520
                  add        ebx,2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   521
90ce3da70b43 Initial load
duke
parents:
diff changeset
   522
                  dec        ecx
90ce3da70b43 Initial load
duke
parents:
diff changeset
   523
                  jnz        secondloop16
90ce3da70b43 Initial load
duke
parents:
diff changeset
   524
end16:
90ce3da70b43 Initial load
duke
parents:
diff changeset
   525
                  emms
90ce3da70b43 Initial load
duke
parents:
diff changeset
   526
               }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   527
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   528
            else /* mmx not supported - use modified C routine */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   529
            {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   530
               register unsigned int incr1, initial_val, final_val;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   531
               png_size_t pixel_bytes;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   532
               png_uint_32 i;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   533
               register int disp = png_pass_inc[png_ptr->pass];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   534
               int offset_table[7] = {0, 4, 0, 2, 0, 1, 0};
90ce3da70b43 Initial load
duke
parents:
diff changeset
   535
90ce3da70b43 Initial load
duke
parents:
diff changeset
   536
               pixel_bytes = (png_ptr->row_info.pixel_depth >> 3);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   537
               srcptr = png_ptr->row_buf + 1 + offset_table[png_ptr->pass]*
90ce3da70b43 Initial load
duke
parents:
diff changeset
   538
                  pixel_bytes;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   539
               dstptr = row + offset_table[png_ptr->pass]*pixel_bytes;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   540
               initial_val = offset_table[png_ptr->pass]*pixel_bytes;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   541
               final_val = png_ptr->width*pixel_bytes;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   542
               incr1 = (disp)*pixel_bytes;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   543
               for (i = initial_val; i < final_val; i += incr1)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   544
               {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   545
                  png_memcpy(dstptr, srcptr, pixel_bytes);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   546
                  srcptr += incr1;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   547
                  dstptr += incr1;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   548
               }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   549
            } /* end of else */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   550
90ce3da70b43 Initial load
duke
parents:
diff changeset
   551
            break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   552
         }       // end 16 bpp
90ce3da70b43 Initial load
duke
parents:
diff changeset
   553
90ce3da70b43 Initial load
duke
parents:
diff changeset
   554
         case 24:
90ce3da70b43 Initial load
duke
parents:
diff changeset
   555
         {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   556
            png_bytep srcptr;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   557
            png_bytep dstptr;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   558
            png_uint_32 len;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   559
            int unmask, diff;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   560
90ce3da70b43 Initial load
duke
parents:
diff changeset
   561
            __int64 mask2=0x0101010202020404,  //24bpp
90ce3da70b43 Initial load
duke
parents:
diff changeset
   562
                    mask1=0x0408080810101020,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   563
                    mask0=0x2020404040808080;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   564
90ce3da70b43 Initial load
duke
parents:
diff changeset
   565
            srcptr = png_ptr->row_buf + 1;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   566
            dstptr = row;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   567
90ce3da70b43 Initial load
duke
parents:
diff changeset
   568
            unmask = ~mask;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   569
            len     = (png_ptr->width)&~7;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   570
            diff = (png_ptr->width)&7;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   571
90ce3da70b43 Initial load
duke
parents:
diff changeset
   572
#if !defined(PNG_1_0_X)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   573
            if ((png_ptr->asm_flags & PNG_ASM_FLAG_MMX_READ_COMBINE_ROW)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   574
                /* && mmx_supported */ )
90ce3da70b43 Initial load
duke
parents:
diff changeset
   575
#else
90ce3da70b43 Initial load
duke
parents:
diff changeset
   576
            if (mmx_supported)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   577
#endif
90ce3da70b43 Initial load
duke
parents:
diff changeset
   578
            {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   579
               _asm
90ce3da70b43 Initial load
duke
parents:
diff changeset
   580
               {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   581
                  movd       mm7, unmask       //load bit pattern
90ce3da70b43 Initial load
duke
parents:
diff changeset
   582
                  psubb      mm6,mm6           //zero mm6
90ce3da70b43 Initial load
duke
parents:
diff changeset
   583
                  punpcklbw  mm7,mm7
90ce3da70b43 Initial load
duke
parents:
diff changeset
   584
                  punpcklwd  mm7,mm7
90ce3da70b43 Initial load
duke
parents:
diff changeset
   585
                  punpckldq  mm7,mm7           //fill register with 8 masks
90ce3da70b43 Initial load
duke
parents:
diff changeset
   586
90ce3da70b43 Initial load
duke
parents:
diff changeset
   587
                  movq       mm0,mask0
90ce3da70b43 Initial load
duke
parents:
diff changeset
   588
                  movq       mm1,mask1
90ce3da70b43 Initial load
duke
parents:
diff changeset
   589
                  movq       mm2,mask2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   590
90ce3da70b43 Initial load
duke
parents:
diff changeset
   591
                  pand       mm0,mm7
90ce3da70b43 Initial load
duke
parents:
diff changeset
   592
                  pand       mm1,mm7
90ce3da70b43 Initial load
duke
parents:
diff changeset
   593
                  pand       mm2,mm7
90ce3da70b43 Initial load
duke
parents:
diff changeset
   594
90ce3da70b43 Initial load
duke
parents:
diff changeset
   595
                  pcmpeqb    mm0,mm6
90ce3da70b43 Initial load
duke
parents:
diff changeset
   596
                  pcmpeqb    mm1,mm6
90ce3da70b43 Initial load
duke
parents:
diff changeset
   597
                  pcmpeqb    mm2,mm6
90ce3da70b43 Initial load
duke
parents:
diff changeset
   598
90ce3da70b43 Initial load
duke
parents:
diff changeset
   599
                  mov        ecx,len           //load length of line
90ce3da70b43 Initial load
duke
parents:
diff changeset
   600
                  mov        esi,srcptr        //load source
90ce3da70b43 Initial load
duke
parents:
diff changeset
   601
                  mov        ebx,dstptr        //load dest
90ce3da70b43 Initial load
duke
parents:
diff changeset
   602
                  cmp        ecx,0
90ce3da70b43 Initial load
duke
parents:
diff changeset
   603
                  jz         mainloop24end
90ce3da70b43 Initial load
duke
parents:
diff changeset
   604
90ce3da70b43 Initial load
duke
parents:
diff changeset
   605
mainloop24:
90ce3da70b43 Initial load
duke
parents:
diff changeset
   606
                  movq       mm4,[esi]
90ce3da70b43 Initial load
duke
parents:
diff changeset
   607
                  pand       mm4,mm0
90ce3da70b43 Initial load
duke
parents:
diff changeset
   608
                  movq       mm6,mm0
90ce3da70b43 Initial load
duke
parents:
diff changeset
   609
                  movq       mm7,[ebx]
90ce3da70b43 Initial load
duke
parents:
diff changeset
   610
                  pandn      mm6,mm7
90ce3da70b43 Initial load
duke
parents:
diff changeset
   611
                  por        mm4,mm6
90ce3da70b43 Initial load
duke
parents:
diff changeset
   612
                  movq       [ebx],mm4
90ce3da70b43 Initial load
duke
parents:
diff changeset
   613
90ce3da70b43 Initial load
duke
parents:
diff changeset
   614
90ce3da70b43 Initial load
duke
parents:
diff changeset
   615
                  movq       mm5,[esi+8]
90ce3da70b43 Initial load
duke
parents:
diff changeset
   616
                  pand       mm5,mm1
90ce3da70b43 Initial load
duke
parents:
diff changeset
   617
                  movq       mm7,mm1
90ce3da70b43 Initial load
duke
parents:
diff changeset
   618
                  movq       mm6,[ebx+8]
90ce3da70b43 Initial load
duke
parents:
diff changeset
   619
                  pandn      mm7,mm6
90ce3da70b43 Initial load
duke
parents:
diff changeset
   620
                  por        mm5,mm7
90ce3da70b43 Initial load
duke
parents:
diff changeset
   621
                  movq       [ebx+8],mm5
90ce3da70b43 Initial load
duke
parents:
diff changeset
   622
90ce3da70b43 Initial load
duke
parents:
diff changeset
   623
                  movq       mm6,[esi+16]
90ce3da70b43 Initial load
duke
parents:
diff changeset
   624
                  pand       mm6,mm2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   625
                  movq       mm4,mm2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   626
                  movq       mm7,[ebx+16]
90ce3da70b43 Initial load
duke
parents:
diff changeset
   627
                  pandn      mm4,mm7
90ce3da70b43 Initial load
duke
parents:
diff changeset
   628
                  por        mm6,mm4
90ce3da70b43 Initial load
duke
parents:
diff changeset
   629
                  movq       [ebx+16],mm6
90ce3da70b43 Initial load
duke
parents:
diff changeset
   630
90ce3da70b43 Initial load
duke
parents:
diff changeset
   631
                  add        esi,24            //inc by 24 bytes processed
90ce3da70b43 Initial load
duke
parents:
diff changeset
   632
                  add        ebx,24
90ce3da70b43 Initial load
duke
parents:
diff changeset
   633
                  sub        ecx,8             //dec by 8 pixels processed
90ce3da70b43 Initial load
duke
parents:
diff changeset
   634
90ce3da70b43 Initial load
duke
parents:
diff changeset
   635
                  ja         mainloop24
90ce3da70b43 Initial load
duke
parents:
diff changeset
   636
90ce3da70b43 Initial load
duke
parents:
diff changeset
   637
mainloop24end:
90ce3da70b43 Initial load
duke
parents:
diff changeset
   638
                  mov        ecx,diff
90ce3da70b43 Initial load
duke
parents:
diff changeset
   639
                  cmp        ecx,0
90ce3da70b43 Initial load
duke
parents:
diff changeset
   640
                  jz         end24
90ce3da70b43 Initial load
duke
parents:
diff changeset
   641
90ce3da70b43 Initial load
duke
parents:
diff changeset
   642
                  mov        edx,mask
90ce3da70b43 Initial load
duke
parents:
diff changeset
   643
                  sal        edx,24            //make low byte the high byte
90ce3da70b43 Initial load
duke
parents:
diff changeset
   644
secondloop24:
90ce3da70b43 Initial load
duke
parents:
diff changeset
   645
                  sal        edx,1             //move high bit to CF
90ce3da70b43 Initial load
duke
parents:
diff changeset
   646
                  jnc        skip24            //if CF = 0
90ce3da70b43 Initial load
duke
parents:
diff changeset
   647
                  mov        ax,[esi]
90ce3da70b43 Initial load
duke
parents:
diff changeset
   648
                  mov        [ebx],ax
90ce3da70b43 Initial load
duke
parents:
diff changeset
   649
                  xor        eax,eax
90ce3da70b43 Initial load
duke
parents:
diff changeset
   650
                  mov        al,[esi+2]
90ce3da70b43 Initial load
duke
parents:
diff changeset
   651
                  mov        [ebx+2],al
90ce3da70b43 Initial load
duke
parents:
diff changeset
   652
skip24:
90ce3da70b43 Initial load
duke
parents:
diff changeset
   653
                  add        esi,3
90ce3da70b43 Initial load
duke
parents:
diff changeset
   654
                  add        ebx,3
90ce3da70b43 Initial load
duke
parents:
diff changeset
   655
90ce3da70b43 Initial load
duke
parents:
diff changeset
   656
                  dec        ecx
90ce3da70b43 Initial load
duke
parents:
diff changeset
   657
                  jnz        secondloop24
90ce3da70b43 Initial load
duke
parents:
diff changeset
   658
90ce3da70b43 Initial load
duke
parents:
diff changeset
   659
end24:
90ce3da70b43 Initial load
duke
parents:
diff changeset
   660
                  emms
90ce3da70b43 Initial load
duke
parents:
diff changeset
   661
               }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   662
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   663
            else /* mmx not supported - use modified C routine */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   664
            {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   665
               register unsigned int incr1, initial_val, final_val;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   666
               png_size_t pixel_bytes;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   667
               png_uint_32 i;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   668
               register int disp = png_pass_inc[png_ptr->pass];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   669
               int offset_table[7] = {0, 4, 0, 2, 0, 1, 0};
90ce3da70b43 Initial load
duke
parents:
diff changeset
   670
90ce3da70b43 Initial load
duke
parents:
diff changeset
   671
               pixel_bytes = (png_ptr->row_info.pixel_depth >> 3);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   672
               srcptr = png_ptr->row_buf + 1 + offset_table[png_ptr->pass]*
90ce3da70b43 Initial load
duke
parents:
diff changeset
   673
                  pixel_bytes;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   674
               dstptr = row + offset_table[png_ptr->pass]*pixel_bytes;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   675
               initial_val = offset_table[png_ptr->pass]*pixel_bytes;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   676
               final_val = png_ptr->width*pixel_bytes;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   677
               incr1 = (disp)*pixel_bytes;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   678
               for (i = initial_val; i < final_val; i += incr1)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   679
               {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   680
                  png_memcpy(dstptr, srcptr, pixel_bytes);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   681
                  srcptr += incr1;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   682
                  dstptr += incr1;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   683
               }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   684
            } /* end of else */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   685
90ce3da70b43 Initial load
duke
parents:
diff changeset
   686
            break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   687
         }       // end 24 bpp
90ce3da70b43 Initial load
duke
parents:
diff changeset
   688
90ce3da70b43 Initial load
duke
parents:
diff changeset
   689
         case 32:
90ce3da70b43 Initial load
duke
parents:
diff changeset
   690
         {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   691
            png_bytep srcptr;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   692
            png_bytep dstptr;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   693
            png_uint_32 len;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   694
            int unmask, diff;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   695
90ce3da70b43 Initial load
duke
parents:
diff changeset
   696
            __int64 mask3=0x0101010102020202,  //32bpp
90ce3da70b43 Initial load
duke
parents:
diff changeset
   697
                    mask2=0x0404040408080808,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   698
                    mask1=0x1010101020202020,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   699
                    mask0=0x4040404080808080;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   700
90ce3da70b43 Initial load
duke
parents:
diff changeset
   701
            srcptr = png_ptr->row_buf + 1;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   702
            dstptr = row;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   703
90ce3da70b43 Initial load
duke
parents:
diff changeset
   704
            unmask = ~mask;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   705
            len     = (png_ptr->width)&~7;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   706
            diff = (png_ptr->width)&7;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   707
90ce3da70b43 Initial load
duke
parents:
diff changeset
   708
#if !defined(PNG_1_0_X)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   709
            if ((png_ptr->asm_flags & PNG_ASM_FLAG_MMX_READ_COMBINE_ROW)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   710
                /* && mmx_supported */ )
90ce3da70b43 Initial load
duke
parents:
diff changeset
   711
#else
90ce3da70b43 Initial load
duke
parents:
diff changeset
   712
            if (mmx_supported)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   713
#endif
90ce3da70b43 Initial load
duke
parents:
diff changeset
   714
            {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   715
               _asm
90ce3da70b43 Initial load
duke
parents:
diff changeset
   716
               {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   717
                  movd       mm7, unmask       //load bit pattern
90ce3da70b43 Initial load
duke
parents:
diff changeset
   718
                  psubb      mm6,mm6           //zero mm6
90ce3da70b43 Initial load
duke
parents:
diff changeset
   719
                  punpcklbw  mm7,mm7
90ce3da70b43 Initial load
duke
parents:
diff changeset
   720
                  punpcklwd  mm7,mm7
90ce3da70b43 Initial load
duke
parents:
diff changeset
   721
                  punpckldq  mm7,mm7           //fill register with 8 masks
90ce3da70b43 Initial load
duke
parents:
diff changeset
   722
90ce3da70b43 Initial load
duke
parents:
diff changeset
   723
                  movq       mm0,mask0
90ce3da70b43 Initial load
duke
parents:
diff changeset
   724
                  movq       mm1,mask1
90ce3da70b43 Initial load
duke
parents:
diff changeset
   725
                  movq       mm2,mask2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   726
                  movq       mm3,mask3
90ce3da70b43 Initial load
duke
parents:
diff changeset
   727
90ce3da70b43 Initial load
duke
parents:
diff changeset
   728
                  pand       mm0,mm7
90ce3da70b43 Initial load
duke
parents:
diff changeset
   729
                  pand       mm1,mm7
90ce3da70b43 Initial load
duke
parents:
diff changeset
   730
                  pand       mm2,mm7
90ce3da70b43 Initial load
duke
parents:
diff changeset
   731
                  pand       mm3,mm7
90ce3da70b43 Initial load
duke
parents:
diff changeset
   732
90ce3da70b43 Initial load
duke
parents:
diff changeset
   733
                  pcmpeqb    mm0,mm6
90ce3da70b43 Initial load
duke
parents:
diff changeset
   734
                  pcmpeqb    mm1,mm6
90ce3da70b43 Initial load
duke
parents:
diff changeset
   735
                  pcmpeqb    mm2,mm6
90ce3da70b43 Initial load
duke
parents:
diff changeset
   736
                  pcmpeqb    mm3,mm6
90ce3da70b43 Initial load
duke
parents:
diff changeset
   737
90ce3da70b43 Initial load
duke
parents:
diff changeset
   738
                  mov        ecx,len           //load length of line
90ce3da70b43 Initial load
duke
parents:
diff changeset
   739
                  mov        esi,srcptr        //load source
90ce3da70b43 Initial load
duke
parents:
diff changeset
   740
                  mov        ebx,dstptr        //load dest
90ce3da70b43 Initial load
duke
parents:
diff changeset
   741
90ce3da70b43 Initial load
duke
parents:
diff changeset
   742
                  cmp        ecx,0             //lcr
90ce3da70b43 Initial load
duke
parents:
diff changeset
   743
                  jz         mainloop32end
90ce3da70b43 Initial load
duke
parents:
diff changeset
   744
90ce3da70b43 Initial load
duke
parents:
diff changeset
   745
mainloop32:
90ce3da70b43 Initial load
duke
parents:
diff changeset
   746
                  movq       mm4,[esi]
90ce3da70b43 Initial load
duke
parents:
diff changeset
   747
                  pand       mm4,mm0
90ce3da70b43 Initial load
duke
parents:
diff changeset
   748
                  movq       mm6,mm0
90ce3da70b43 Initial load
duke
parents:
diff changeset
   749
                  movq       mm7,[ebx]
90ce3da70b43 Initial load
duke
parents:
diff changeset
   750
                  pandn      mm6,mm7
90ce3da70b43 Initial load
duke
parents:
diff changeset
   751
                  por        mm4,mm6
90ce3da70b43 Initial load
duke
parents:
diff changeset
   752
                  movq       [ebx],mm4
90ce3da70b43 Initial load
duke
parents:
diff changeset
   753
90ce3da70b43 Initial load
duke
parents:
diff changeset
   754
                  movq       mm5,[esi+8]
90ce3da70b43 Initial load
duke
parents:
diff changeset
   755
                  pand       mm5,mm1
90ce3da70b43 Initial load
duke
parents:
diff changeset
   756
                  movq       mm7,mm1
90ce3da70b43 Initial load
duke
parents:
diff changeset
   757
                  movq       mm6,[ebx+8]
90ce3da70b43 Initial load
duke
parents:
diff changeset
   758
                  pandn      mm7,mm6
90ce3da70b43 Initial load
duke
parents:
diff changeset
   759
                  por        mm5,mm7
90ce3da70b43 Initial load
duke
parents:
diff changeset
   760
                  movq       [ebx+8],mm5
90ce3da70b43 Initial load
duke
parents:
diff changeset
   761
90ce3da70b43 Initial load
duke
parents:
diff changeset
   762
                  movq       mm6,[esi+16]
90ce3da70b43 Initial load
duke
parents:
diff changeset
   763
                  pand       mm6,mm2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   764
                  movq       mm4,mm2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   765
                  movq       mm7,[ebx+16]
90ce3da70b43 Initial load
duke
parents:
diff changeset
   766
                  pandn      mm4,mm7
90ce3da70b43 Initial load
duke
parents:
diff changeset
   767
                  por        mm6,mm4
90ce3da70b43 Initial load
duke
parents:
diff changeset
   768
                  movq       [ebx+16],mm6
90ce3da70b43 Initial load
duke
parents:
diff changeset
   769
90ce3da70b43 Initial load
duke
parents:
diff changeset
   770
                  movq       mm7,[esi+24]
90ce3da70b43 Initial load
duke
parents:
diff changeset
   771
                  pand       mm7,mm3
90ce3da70b43 Initial load
duke
parents:
diff changeset
   772
                  movq       mm5,mm3
90ce3da70b43 Initial load
duke
parents:
diff changeset
   773
                  movq       mm4,[ebx+24]
90ce3da70b43 Initial load
duke
parents:
diff changeset
   774
                  pandn      mm5,mm4
90ce3da70b43 Initial load
duke
parents:
diff changeset
   775
                  por        mm7,mm5
90ce3da70b43 Initial load
duke
parents:
diff changeset
   776
                  movq       [ebx+24],mm7
90ce3da70b43 Initial load
duke
parents:
diff changeset
   777
90ce3da70b43 Initial load
duke
parents:
diff changeset
   778
                  add        esi,32            //inc by 32 bytes processed
90ce3da70b43 Initial load
duke
parents:
diff changeset
   779
                  add        ebx,32
90ce3da70b43 Initial load
duke
parents:
diff changeset
   780
                  sub        ecx,8             //dec by 8 pixels processed
90ce3da70b43 Initial load
duke
parents:
diff changeset
   781
90ce3da70b43 Initial load
duke
parents:
diff changeset
   782
                  ja         mainloop32
90ce3da70b43 Initial load
duke
parents:
diff changeset
   783
90ce3da70b43 Initial load
duke
parents:
diff changeset
   784
mainloop32end:
90ce3da70b43 Initial load
duke
parents:
diff changeset
   785
                  mov        ecx,diff
90ce3da70b43 Initial load
duke
parents:
diff changeset
   786
                  cmp        ecx,0
90ce3da70b43 Initial load
duke
parents:
diff changeset
   787
                  jz         end32
90ce3da70b43 Initial load
duke
parents:
diff changeset
   788
90ce3da70b43 Initial load
duke
parents:
diff changeset
   789
                  mov        edx,mask
90ce3da70b43 Initial load
duke
parents:
diff changeset
   790
                  sal        edx,24            //make low byte the high byte
90ce3da70b43 Initial load
duke
parents:
diff changeset
   791
secondloop32:
90ce3da70b43 Initial load
duke
parents:
diff changeset
   792
                  sal        edx,1             //move high bit to CF
90ce3da70b43 Initial load
duke
parents:
diff changeset
   793
                  jnc        skip32            //if CF = 0
90ce3da70b43 Initial load
duke
parents:
diff changeset
   794
                  mov        eax,[esi]
90ce3da70b43 Initial load
duke
parents:
diff changeset
   795
                  mov        [ebx],eax
90ce3da70b43 Initial load
duke
parents:
diff changeset
   796
skip32:
90ce3da70b43 Initial load
duke
parents:
diff changeset
   797
                  add        esi,4
90ce3da70b43 Initial load
duke
parents:
diff changeset
   798
                  add        ebx,4
90ce3da70b43 Initial load
duke
parents:
diff changeset
   799
90ce3da70b43 Initial load
duke
parents:
diff changeset
   800
                  dec        ecx
90ce3da70b43 Initial load
duke
parents:
diff changeset
   801
                  jnz        secondloop32
90ce3da70b43 Initial load
duke
parents:
diff changeset
   802
90ce3da70b43 Initial load
duke
parents:
diff changeset
   803
end32:
90ce3da70b43 Initial load
duke
parents:
diff changeset
   804
                  emms
90ce3da70b43 Initial load
duke
parents:
diff changeset
   805
               }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   806
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   807
            else /* mmx _not supported - Use modified C routine */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   808
            {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   809
               register unsigned int incr1, initial_val, final_val;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   810
               png_size_t pixel_bytes;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   811
               png_uint_32 i;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   812
               register int disp = png_pass_inc[png_ptr->pass];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   813
               int offset_table[7] = {0, 4, 0, 2, 0, 1, 0};
90ce3da70b43 Initial load
duke
parents:
diff changeset
   814
90ce3da70b43 Initial load
duke
parents:
diff changeset
   815
               pixel_bytes = (png_ptr->row_info.pixel_depth >> 3);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   816
               srcptr = png_ptr->row_buf + 1 + offset_table[png_ptr->pass]*
90ce3da70b43 Initial load
duke
parents:
diff changeset
   817
                  pixel_bytes;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   818
               dstptr = row + offset_table[png_ptr->pass]*pixel_bytes;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   819
               initial_val = offset_table[png_ptr->pass]*pixel_bytes;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   820
               final_val = png_ptr->width*pixel_bytes;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   821
               incr1 = (disp)*pixel_bytes;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   822
               for (i = initial_val; i < final_val; i += incr1)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   823
               {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   824
                  png_memcpy(dstptr, srcptr, pixel_bytes);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   825
                  srcptr += incr1;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   826
                  dstptr += incr1;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   827
               }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   828
            } /* end of else */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   829
90ce3da70b43 Initial load
duke
parents:
diff changeset
   830
            break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   831
         }       // end 32 bpp
90ce3da70b43 Initial load
duke
parents:
diff changeset
   832
90ce3da70b43 Initial load
duke
parents:
diff changeset
   833
         case 48:
90ce3da70b43 Initial load
duke
parents:
diff changeset
   834
         {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   835
            png_bytep srcptr;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   836
            png_bytep dstptr;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   837
            png_uint_32 len;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   838
            int unmask, diff;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   839
90ce3da70b43 Initial load
duke
parents:
diff changeset
   840
            __int64 mask5=0x0101010101010202,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   841
                    mask4=0x0202020204040404,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   842
                    mask3=0x0404080808080808,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   843
                    mask2=0x1010101010102020,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   844
                    mask1=0x2020202040404040,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   845
                    mask0=0x4040808080808080;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   846
90ce3da70b43 Initial load
duke
parents:
diff changeset
   847
#if !defined(PNG_1_0_X)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   848
            if ((png_ptr->asm_flags & PNG_ASM_FLAG_MMX_READ_COMBINE_ROW)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   849
                /* && mmx_supported */ )
90ce3da70b43 Initial load
duke
parents:
diff changeset
   850
#else
90ce3da70b43 Initial load
duke
parents:
diff changeset
   851
            if (mmx_supported)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   852
#endif
90ce3da70b43 Initial load
duke
parents:
diff changeset
   853
            {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   854
               srcptr = png_ptr->row_buf + 1;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   855
               dstptr = row;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   856
90ce3da70b43 Initial load
duke
parents:
diff changeset
   857
               unmask = ~mask;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   858
               len     = (png_ptr->width)&~7;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   859
               diff = (png_ptr->width)&7;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   860
               _asm
90ce3da70b43 Initial load
duke
parents:
diff changeset
   861
               {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   862
                  movd       mm7, unmask       //load bit pattern
90ce3da70b43 Initial load
duke
parents:
diff changeset
   863
                  psubb      mm6,mm6           //zero mm6
90ce3da70b43 Initial load
duke
parents:
diff changeset
   864
                  punpcklbw  mm7,mm7
90ce3da70b43 Initial load
duke
parents:
diff changeset
   865
                  punpcklwd  mm7,mm7
90ce3da70b43 Initial load
duke
parents:
diff changeset
   866
                  punpckldq  mm7,mm7           //fill register with 8 masks
90ce3da70b43 Initial load
duke
parents:
diff changeset
   867
90ce3da70b43 Initial load
duke
parents:
diff changeset
   868
                  movq       mm0,mask0
90ce3da70b43 Initial load
duke
parents:
diff changeset
   869
                  movq       mm1,mask1
90ce3da70b43 Initial load
duke
parents:
diff changeset
   870
                  movq       mm2,mask2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   871
                  movq       mm3,mask3
90ce3da70b43 Initial load
duke
parents:
diff changeset
   872
                  movq       mm4,mask4
90ce3da70b43 Initial load
duke
parents:
diff changeset
   873
                  movq       mm5,mask5
90ce3da70b43 Initial load
duke
parents:
diff changeset
   874
90ce3da70b43 Initial load
duke
parents:
diff changeset
   875
                  pand       mm0,mm7
90ce3da70b43 Initial load
duke
parents:
diff changeset
   876
                  pand       mm1,mm7
90ce3da70b43 Initial load
duke
parents:
diff changeset
   877
                  pand       mm2,mm7
90ce3da70b43 Initial load
duke
parents:
diff changeset
   878
                  pand       mm3,mm7
90ce3da70b43 Initial load
duke
parents:
diff changeset
   879
                  pand       mm4,mm7
90ce3da70b43 Initial load
duke
parents:
diff changeset
   880
                  pand       mm5,mm7
90ce3da70b43 Initial load
duke
parents:
diff changeset
   881
90ce3da70b43 Initial load
duke
parents:
diff changeset
   882
                  pcmpeqb    mm0,mm6
90ce3da70b43 Initial load
duke
parents:
diff changeset
   883
                  pcmpeqb    mm1,mm6
90ce3da70b43 Initial load
duke
parents:
diff changeset
   884
                  pcmpeqb    mm2,mm6
90ce3da70b43 Initial load
duke
parents:
diff changeset
   885
                  pcmpeqb    mm3,mm6
90ce3da70b43 Initial load
duke
parents:
diff changeset
   886
                  pcmpeqb    mm4,mm6
90ce3da70b43 Initial load
duke
parents:
diff changeset
   887
                  pcmpeqb    mm5,mm6
90ce3da70b43 Initial load
duke
parents:
diff changeset
   888
90ce3da70b43 Initial load
duke
parents:
diff changeset
   889
                  mov        ecx,len           //load length of line
90ce3da70b43 Initial load
duke
parents:
diff changeset
   890
                  mov        esi,srcptr        //load source
90ce3da70b43 Initial load
duke
parents:
diff changeset
   891
                  mov        ebx,dstptr        //load dest
90ce3da70b43 Initial load
duke
parents:
diff changeset
   892
90ce3da70b43 Initial load
duke
parents:
diff changeset
   893
                  cmp        ecx,0
90ce3da70b43 Initial load
duke
parents:
diff changeset
   894
                  jz         mainloop48end
90ce3da70b43 Initial load
duke
parents:
diff changeset
   895
90ce3da70b43 Initial load
duke
parents:
diff changeset
   896
mainloop48:
90ce3da70b43 Initial load
duke
parents:
diff changeset
   897
                  movq       mm7,[esi]
90ce3da70b43 Initial load
duke
parents:
diff changeset
   898
                  pand       mm7,mm0
90ce3da70b43 Initial load
duke
parents:
diff changeset
   899
                  movq       mm6,mm0
90ce3da70b43 Initial load
duke
parents:
diff changeset
   900
                  pandn      mm6,[ebx]
90ce3da70b43 Initial load
duke
parents:
diff changeset
   901
                  por        mm7,mm6
90ce3da70b43 Initial load
duke
parents:
diff changeset
   902
                  movq       [ebx],mm7
90ce3da70b43 Initial load
duke
parents:
diff changeset
   903
90ce3da70b43 Initial load
duke
parents:
diff changeset
   904
                  movq       mm6,[esi+8]
90ce3da70b43 Initial load
duke
parents:
diff changeset
   905
                  pand       mm6,mm1
90ce3da70b43 Initial load
duke
parents:
diff changeset
   906
                  movq       mm7,mm1
90ce3da70b43 Initial load
duke
parents:
diff changeset
   907
                  pandn      mm7,[ebx+8]
90ce3da70b43 Initial load
duke
parents:
diff changeset
   908
                  por        mm6,mm7
90ce3da70b43 Initial load
duke
parents:
diff changeset
   909
                  movq       [ebx+8],mm6
90ce3da70b43 Initial load
duke
parents:
diff changeset
   910
90ce3da70b43 Initial load
duke
parents:
diff changeset
   911
                  movq       mm6,[esi+16]
90ce3da70b43 Initial load
duke
parents:
diff changeset
   912
                  pand       mm6,mm2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   913
                  movq       mm7,mm2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   914
                  pandn      mm7,[ebx+16]
90ce3da70b43 Initial load
duke
parents:
diff changeset
   915
                  por        mm6,mm7
90ce3da70b43 Initial load
duke
parents:
diff changeset
   916
                  movq       [ebx+16],mm6
90ce3da70b43 Initial load
duke
parents:
diff changeset
   917
90ce3da70b43 Initial load
duke
parents:
diff changeset
   918
                  movq       mm7,[esi+24]
90ce3da70b43 Initial load
duke
parents:
diff changeset
   919
                  pand       mm7,mm3
90ce3da70b43 Initial load
duke
parents:
diff changeset
   920
                  movq       mm6,mm3
90ce3da70b43 Initial load
duke
parents:
diff changeset
   921
                  pandn      mm6,[ebx+24]
90ce3da70b43 Initial load
duke
parents:
diff changeset
   922
                  por        mm7,mm6
90ce3da70b43 Initial load
duke
parents:
diff changeset
   923
                  movq       [ebx+24],mm7
90ce3da70b43 Initial load
duke
parents:
diff changeset
   924
90ce3da70b43 Initial load
duke
parents:
diff changeset
   925
                  movq       mm6,[esi+32]
90ce3da70b43 Initial load
duke
parents:
diff changeset
   926
                  pand       mm6,mm4
90ce3da70b43 Initial load
duke
parents:
diff changeset
   927
                  movq       mm7,mm4
90ce3da70b43 Initial load
duke
parents:
diff changeset
   928
                  pandn      mm7,[ebx+32]
90ce3da70b43 Initial load
duke
parents:
diff changeset
   929
                  por        mm6,mm7
90ce3da70b43 Initial load
duke
parents:
diff changeset
   930
                  movq       [ebx+32],mm6
90ce3da70b43 Initial load
duke
parents:
diff changeset
   931
90ce3da70b43 Initial load
duke
parents:
diff changeset
   932
                  movq       mm7,[esi+40]
90ce3da70b43 Initial load
duke
parents:
diff changeset
   933
                  pand       mm7,mm5
90ce3da70b43 Initial load
duke
parents:
diff changeset
   934
                  movq       mm6,mm5
90ce3da70b43 Initial load
duke
parents:
diff changeset
   935
                  pandn      mm6,[ebx+40]
90ce3da70b43 Initial load
duke
parents:
diff changeset
   936
                  por        mm7,mm6
90ce3da70b43 Initial load
duke
parents:
diff changeset
   937
                  movq       [ebx+40],mm7
90ce3da70b43 Initial load
duke
parents:
diff changeset
   938
90ce3da70b43 Initial load
duke
parents:
diff changeset
   939
                  add        esi,48            //inc by 32 bytes processed
90ce3da70b43 Initial load
duke
parents:
diff changeset
   940
                  add        ebx,48
90ce3da70b43 Initial load
duke
parents:
diff changeset
   941
                  sub        ecx,8             //dec by 8 pixels processed
90ce3da70b43 Initial load
duke
parents:
diff changeset
   942
90ce3da70b43 Initial load
duke
parents:
diff changeset
   943
                  ja         mainloop48
90ce3da70b43 Initial load
duke
parents:
diff changeset
   944
mainloop48end:
90ce3da70b43 Initial load
duke
parents:
diff changeset
   945
90ce3da70b43 Initial load
duke
parents:
diff changeset
   946
                  mov        ecx,diff
90ce3da70b43 Initial load
duke
parents:
diff changeset
   947
                  cmp        ecx,0
90ce3da70b43 Initial load
duke
parents:
diff changeset
   948
                  jz         end48
90ce3da70b43 Initial load
duke
parents:
diff changeset
   949
90ce3da70b43 Initial load
duke
parents:
diff changeset
   950
                  mov        edx,mask
90ce3da70b43 Initial load
duke
parents:
diff changeset
   951
                  sal        edx,24            //make low byte the high byte
90ce3da70b43 Initial load
duke
parents:
diff changeset
   952
90ce3da70b43 Initial load
duke
parents:
diff changeset
   953
secondloop48:
90ce3da70b43 Initial load
duke
parents:
diff changeset
   954
                  sal        edx,1             //move high bit to CF
90ce3da70b43 Initial load
duke
parents:
diff changeset
   955
                  jnc        skip48            //if CF = 0
90ce3da70b43 Initial load
duke
parents:
diff changeset
   956
                  mov        eax,[esi]
90ce3da70b43 Initial load
duke
parents:
diff changeset
   957
                  mov        [ebx],eax
90ce3da70b43 Initial load
duke
parents:
diff changeset
   958
skip48:
90ce3da70b43 Initial load
duke
parents:
diff changeset
   959
                  add        esi,4
90ce3da70b43 Initial load
duke
parents:
diff changeset
   960
                  add        ebx,4
90ce3da70b43 Initial load
duke
parents:
diff changeset
   961
90ce3da70b43 Initial load
duke
parents:
diff changeset
   962
                  dec        ecx
90ce3da70b43 Initial load
duke
parents:
diff changeset
   963
                  jnz        secondloop48
90ce3da70b43 Initial load
duke
parents:
diff changeset
   964
90ce3da70b43 Initial load
duke
parents:
diff changeset
   965
end48:
90ce3da70b43 Initial load
duke
parents:
diff changeset
   966
                  emms
90ce3da70b43 Initial load
duke
parents:
diff changeset
   967
               }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   968
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   969
            else /* mmx _not supported - Use modified C routine */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   970
            {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   971
               register unsigned int incr1, initial_val, final_val;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   972
               png_size_t pixel_bytes;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   973
               png_uint_32 i;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   974
               register int disp = png_pass_inc[png_ptr->pass];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   975
               int offset_table[7] = {0, 4, 0, 2, 0, 1, 0};
90ce3da70b43 Initial load
duke
parents:
diff changeset
   976
90ce3da70b43 Initial load
duke
parents:
diff changeset
   977
               pixel_bytes = (png_ptr->row_info.pixel_depth >> 3);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   978
               srcptr = png_ptr->row_buf + 1 + offset_table[png_ptr->pass]*
90ce3da70b43 Initial load
duke
parents:
diff changeset
   979
                  pixel_bytes;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   980
               dstptr = row + offset_table[png_ptr->pass]*pixel_bytes;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   981
               initial_val = offset_table[png_ptr->pass]*pixel_bytes;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   982
               final_val = png_ptr->width*pixel_bytes;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   983
               incr1 = (disp)*pixel_bytes;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   984
               for (i = initial_val; i < final_val; i += incr1)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   985
               {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   986
                  png_memcpy(dstptr, srcptr, pixel_bytes);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   987
                  srcptr += incr1;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   988
                  dstptr += incr1;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   989
               }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   990
            } /* end of else */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   991
90ce3da70b43 Initial load
duke
parents:
diff changeset
   992
            break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   993
         }       // end 48 bpp
90ce3da70b43 Initial load
duke
parents:
diff changeset
   994
90ce3da70b43 Initial load
duke
parents:
diff changeset
   995
         default:
90ce3da70b43 Initial load
duke
parents:
diff changeset
   996
         {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   997
            png_bytep sptr;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   998
            png_bytep dp;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   999
            png_size_t pixel_bytes;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1000
            int offset_table[7] = {0, 4, 0, 2, 0, 1, 0};
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1001
            unsigned int i;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1002
            register int disp = png_pass_inc[png_ptr->pass];  // get the offset
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1003
            register unsigned int incr1, initial_val, final_val;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1004
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1005
            pixel_bytes = (png_ptr->row_info.pixel_depth >> 3);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1006
            sptr = png_ptr->row_buf + 1 + offset_table[png_ptr->pass]*
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1007
               pixel_bytes;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1008
            dp = row + offset_table[png_ptr->pass]*pixel_bytes;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1009
            initial_val = offset_table[png_ptr->pass]*pixel_bytes;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1010
            final_val = png_ptr->width*pixel_bytes;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1011
            incr1 = (disp)*pixel_bytes;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1012
            for (i = initial_val; i < final_val; i += incr1)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1013
            {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1014
               png_memcpy(dp, sptr, pixel_bytes);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1015
               sptr += incr1;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1016
               dp += incr1;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1017
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1018
            break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1019
         }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1020
      } /* end switch (png_ptr->row_info.pixel_depth) */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1021
   } /* end if (non-trivial mask) */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1022
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1023
} /* end png_combine_row() */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1024
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1025
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1026
#if defined(PNG_READ_INTERLACING_SUPPORTED)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1027
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1028
void /* PRIVATE */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1029
png_do_read_interlace(png_structp png_ptr)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1030
{
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1031
   png_row_infop row_info = &(png_ptr->row_info);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1032
   png_bytep row = png_ptr->row_buf + 1;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1033
   int pass = png_ptr->pass;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1034
   png_uint_32 transformations = png_ptr->transformations;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1035
#ifdef PNG_USE_LOCAL_ARRAYS
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1036
   const int png_pass_inc[7] = {8, 8, 4, 4, 2, 2, 1};
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1037
#endif
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1038
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1039
   png_debug(1,"in png_do_read_interlace\n");
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1040
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1041
   if (mmx_supported == 2) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1042
#if !defined(PNG_1_0_X)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1043
       /* this should have happened in png_init_mmx_flags() already */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1044
       png_warning(png_ptr, "asm_flags may not have been initialized");
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1045
#endif
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1046
       png_mmx_support();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1047
   }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1048
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1049
   if (row != NULL && row_info != NULL)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1050
   {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1051
      png_uint_32 final_width;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1052
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1053
      final_width = row_info->width * png_pass_inc[pass];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1054
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1055
      switch (row_info->pixel_depth)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1056
      {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1057
         case 1:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1058
         {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1059
            png_bytep sp, dp;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1060
            int sshift, dshift;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1061
            int s_start, s_end, s_inc;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1062
            png_byte v;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1063
            png_uint_32 i;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1064
            int j;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1065
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1066
            sp = row + (png_size_t)((row_info->width - 1) >> 3);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1067
            dp = row + (png_size_t)((final_width - 1) >> 3);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1068
#if defined(PNG_READ_PACKSWAP_SUPPORTED)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1069
            if (transformations & PNG_PACKSWAP)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1070
            {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1071
               sshift = (int)((row_info->width + 7) & 7);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1072
               dshift = (int)((final_width + 7) & 7);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1073
               s_start = 7;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1074
               s_end = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1075
               s_inc = -1;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1076
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1077
            else
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1078
#endif
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1079
            {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1080
               sshift = 7 - (int)((row_info->width + 7) & 7);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1081
               dshift = 7 - (int)((final_width + 7) & 7);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1082
               s_start = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1083
               s_end = 7;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1084
               s_inc = 1;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1085
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1086
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1087
            for (i = row_info->width; i; i--)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1088
            {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1089
               v = (png_byte)((*sp >> sshift) & 0x1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1090
               for (j = 0; j < png_pass_inc[pass]; j++)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1091
               {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1092
                  *dp &= (png_byte)((0x7f7f >> (7 - dshift)) & 0xff);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1093
                  *dp |= (png_byte)(v << dshift);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1094
                  if (dshift == s_end)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1095
                  {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1096
                     dshift = s_start;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1097
                     dp--;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1098
                  }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1099
                  else
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1100
                     dshift += s_inc;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1101
               }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1102
               if (sshift == s_end)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1103
               {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1104
                  sshift = s_start;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1105
                  sp--;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1106
               }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1107
               else
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1108
                  sshift += s_inc;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1109
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1110
            break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1111
         }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1112
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1113
         case 2:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1114
         {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1115
            png_bytep sp, dp;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1116
            int sshift, dshift;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1117
            int s_start, s_end, s_inc;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1118
            png_uint_32 i;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1119
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1120
            sp = row + (png_size_t)((row_info->width - 1) >> 2);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1121
            dp = row + (png_size_t)((final_width - 1) >> 2);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1122
#if defined(PNG_READ_PACKSWAP_SUPPORTED)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1123
            if (transformations & PNG_PACKSWAP)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1124
            {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1125
               sshift = (png_size_t)(((row_info->width + 3) & 3) << 1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1126
               dshift = (png_size_t)(((final_width + 3) & 3) << 1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1127
               s_start = 6;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1128
               s_end = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1129
               s_inc = -2;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1130
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1131
            else
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1132
#endif
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1133
            {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1134
               sshift = (png_size_t)((3 - ((row_info->width + 3) & 3)) << 1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1135
               dshift = (png_size_t)((3 - ((final_width + 3) & 3)) << 1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1136
               s_start = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1137
               s_end = 6;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1138
               s_inc = 2;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1139
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1140
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1141
            for (i = row_info->width; i; i--)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1142
            {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1143
               png_byte v;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1144
               int j;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1145
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1146
               v = (png_byte)((*sp >> sshift) & 0x3);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1147
               for (j = 0; j < png_pass_inc[pass]; j++)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1148
               {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1149
                  *dp &= (png_byte)((0x3f3f >> (6 - dshift)) & 0xff);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1150
                  *dp |= (png_byte)(v << dshift);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1151
                  if (dshift == s_end)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1152
                  {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1153
                     dshift = s_start;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1154
                     dp--;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1155
                  }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1156
                  else
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1157
                     dshift += s_inc;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1158
               }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1159
               if (sshift == s_end)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1160
               {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1161
                  sshift = s_start;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1162
                  sp--;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1163
               }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1164
               else
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1165
                  sshift += s_inc;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1166
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1167
            break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1168
         }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1169
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1170
         case 4:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1171
         {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1172
            png_bytep sp, dp;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1173
            int sshift, dshift;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1174
            int s_start, s_end, s_inc;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1175
            png_uint_32 i;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1176
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1177
            sp = row + (png_size_t)((row_info->width - 1) >> 1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1178
            dp = row + (png_size_t)((final_width - 1) >> 1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1179
#if defined(PNG_READ_PACKSWAP_SUPPORTED)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1180
            if (transformations & PNG_PACKSWAP)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1181
            {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1182
               sshift = (png_size_t)(((row_info->width + 1) & 1) << 2);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1183
               dshift = (png_size_t)(((final_width + 1) & 1) << 2);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1184
               s_start = 4;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1185
               s_end = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1186
               s_inc = -4;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1187
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1188
            else
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1189
#endif
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1190
            {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1191
               sshift = (png_size_t)((1 - ((row_info->width + 1) & 1)) << 2);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1192
               dshift = (png_size_t)((1 - ((final_width + 1) & 1)) << 2);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1193
               s_start = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1194
               s_end = 4;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1195
               s_inc = 4;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1196
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1197
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1198
            for (i = row_info->width; i; i--)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1199
            {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1200
               png_byte v;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1201
               int j;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1202
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1203
               v = (png_byte)((*sp >> sshift) & 0xf);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1204
               for (j = 0; j < png_pass_inc[pass]; j++)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1205
               {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1206
                  *dp &= (png_byte)((0xf0f >> (4 - dshift)) & 0xff);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1207
                  *dp |= (png_byte)(v << dshift);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1208
                  if (dshift == s_end)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1209
                  {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1210
                     dshift = s_start;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1211
                     dp--;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1212
                  }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1213
                  else
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1214
                     dshift += s_inc;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1215
               }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1216
               if (sshift == s_end)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1217
               {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1218
                  sshift = s_start;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1219
                  sp--;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1220
               }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1221
               else
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1222
                  sshift += s_inc;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1223
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1224
            break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1225
         }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1226
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1227
         default:         // This is the place where the routine is modified
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1228
         {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1229
            __int64 const4 = 0x0000000000FFFFFF;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1230
            // __int64 const5 = 0x000000FFFFFF0000;  // unused...
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1231
            __int64 const6 = 0x00000000000000FF;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1232
            png_bytep sptr, dp;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1233
            png_uint_32 i;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1234
            png_size_t pixel_bytes;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1235
            int width = row_info->width;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1236
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1237
            pixel_bytes = (row_info->pixel_depth >> 3);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1238
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1239
            sptr = row + (width - 1) * pixel_bytes;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1240
            dp = row + (final_width - 1) * pixel_bytes;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1241
            // New code by Nirav Chhatrapati - Intel Corporation
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1242
            // sign fix by GRR
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1243
            // NOTE:  there is NO MMX code for 48-bit and 64-bit images
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1244
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1245
            // use MMX routine if machine supports it
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1246
#if !defined(PNG_1_0_X)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1247
            if ((png_ptr->asm_flags & PNG_ASM_FLAG_MMX_READ_INTERLACE)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1248
                /* && mmx_supported */ )
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1249
#else
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1250
            if (mmx_supported)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1251
#endif
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1252
            {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1253
               if (pixel_bytes == 3)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1254
               {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1255
                  if (((pass == 0) || (pass == 1)) && width)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1256
                  {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1257
                     _asm
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1258
                     {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1259
                        mov esi, sptr
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1260
                        mov edi, dp
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1261
                        mov ecx, width
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1262
                        sub edi, 21   // (png_pass_inc[pass] - 1)*pixel_bytes
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1263
loop_pass0:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1264
                        movd mm0, [esi]     ; X X X X X v2 v1 v0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1265
                        pand mm0, const4    ; 0 0 0 0 0 v2 v1 v0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1266
                        movq mm1, mm0       ; 0 0 0 0 0 v2 v1 v0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1267
                        psllq mm0, 16       ; 0 0 0 v2 v1 v0 0 0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1268
                        movq mm2, mm0       ; 0 0 0 v2 v1 v0 0 0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1269
                        psllq mm0, 24       ; v2 v1 v0 0 0 0 0 0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1270
                        psrlq mm1, 8        ; 0 0 0 0 0 0 v2 v1
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1271
                        por mm0, mm2        ; v2 v1 v0 v2 v1 v0 0 0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1272
                        por mm0, mm1        ; v2 v1 v0 v2 v1 v0 v2 v1
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1273
                        movq mm3, mm0       ; v2 v1 v0 v2 v1 v0 v2 v1
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1274
                        psllq mm0, 16       ; v0 v2 v1 v0 v2 v1 0 0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1275
                        movq mm4, mm3       ; v2 v1 v0 v2 v1 v0 v2 v1
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1276
                        punpckhdq mm3, mm0  ; v0 v2 v1 v0 v2 v1 v0 v2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1277
                        movq [edi+16] , mm4
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1278
                        psrlq mm0, 32       ; 0 0 0 0 v0 v2 v1 v0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1279
                        movq [edi+8] , mm3
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1280
                        punpckldq mm0, mm4  ; v1 v0 v2 v1 v0 v2 v1 v0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1281
                        sub esi, 3
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1282
                        movq [edi], mm0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1283
                        sub edi, 24
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1284
                        //sub esi, 3
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1285
                        dec ecx
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1286
                        jnz loop_pass0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1287
                        EMMS
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1288
                     }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1289
                  }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1290
                  else if (((pass == 2) || (pass == 3)) && width)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1291
                  {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1292
                     _asm
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1293
                     {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1294
                        mov esi, sptr
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1295
                        mov edi, dp
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1296
                        mov ecx, width
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1297
                        sub edi, 9   // (png_pass_inc[pass] - 1)*pixel_bytes
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1298
loop_pass2:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1299
                        movd mm0, [esi]     ; X X X X X v2 v1 v0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1300
                        pand mm0, const4    ; 0 0 0 0 0 v2 v1 v0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1301
                        movq mm1, mm0       ; 0 0 0 0 0 v2 v1 v0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1302
                        psllq mm0, 16       ; 0 0 0 v2 v1 v0 0 0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1303
                        movq mm2, mm0       ; 0 0 0 v2 v1 v0 0 0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1304
                        psllq mm0, 24       ; v2 v1 v0 0 0 0 0 0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1305
                        psrlq mm1, 8        ; 0 0 0 0 0 0 v2 v1
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1306
                        por mm0, mm2        ; v2 v1 v0 v2 v1 v0 0 0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1307
                        por mm0, mm1        ; v2 v1 v0 v2 v1 v0 v2 v1
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1308
                        movq [edi+4], mm0   ; move to memory
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1309
                        psrlq mm0, 16       ; 0 0 v2 v1 v0 v2 v1 v0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1310
                        movd [edi], mm0     ; move to memory
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1311
                        sub esi, 3
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1312
                        sub edi, 12
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1313
                        dec ecx
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1314
                        jnz loop_pass2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1315
                        EMMS
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1316
                     }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1317
                  }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1318
                  else if (width) /* && ((pass == 4) || (pass == 5)) */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1319
                  {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1320
                     int width_mmx = ((width >> 1) << 1) - 8;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1321
                     if (width_mmx < 0)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1322
                         width_mmx = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1323
                     width -= width_mmx;        // 8 or 9 pix, 24 or 27 bytes
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1324
                     if (width_mmx)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1325
                     {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1326
                        _asm
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1327
                        {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1328
                           mov esi, sptr
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1329
                           mov edi, dp
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1330
                           mov ecx, width_mmx
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1331
                           sub esi, 3
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1332
                           sub edi, 9
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1333
loop_pass4:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1334
                           movq mm0, [esi]     ; X X v2 v1 v0 v5 v4 v3
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1335
                           movq mm7, mm0       ; X X v2 v1 v0 v5 v4 v3
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1336
                           movq mm6, mm0       ; X X v2 v1 v0 v5 v4 v3
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1337
                           psllq mm0, 24       ; v1 v0 v5 v4 v3 0 0 0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1338
                           pand mm7, const4    ; 0 0 0 0 0 v5 v4 v3
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1339
                           psrlq mm6, 24       ; 0 0 0 X X v2 v1 v0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1340
                           por mm0, mm7        ; v1 v0 v5 v4 v3 v5 v4 v3
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1341
                           movq mm5, mm6       ; 0 0 0 X X v2 v1 v0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1342
                           psllq mm6, 8        ; 0 0 X X v2 v1 v0 0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1343
                           movq [edi], mm0     ; move quad to memory
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1344
                           psrlq mm5, 16       ; 0 0 0 0 0 X X v2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1345
                           pand mm5, const6    ; 0 0 0 0 0 0 0 v2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1346
                           por mm6, mm5        ; 0 0 X X v2 v1 v0 v2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1347
                           movd [edi+8], mm6   ; move double to memory
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1348
                           sub esi, 6
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1349
                           sub edi, 12
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1350
                           sub ecx, 2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1351
                           jnz loop_pass4
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1352
                           EMMS
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1353
                        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1354
                     }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1355
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1356
                     sptr -= width_mmx*3;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1357
                     dp -= width_mmx*6;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1358
                     for (i = width; i; i--)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1359
                     {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1360
                        png_byte v[8];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1361
                        int j;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1362
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1363
                        png_memcpy(v, sptr, 3);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1364
                        for (j = 0; j < png_pass_inc[pass]; j++)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1365
                        {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1366
                           png_memcpy(dp, v, 3);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1367
                           dp -= 3;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1368
                        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1369
                        sptr -= 3;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1370
                     }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1371
                  }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1372
               } /* end of pixel_bytes == 3 */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1373
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1374
               else if (pixel_bytes == 1)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1375
               {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1376
                  if (((pass == 0) || (pass == 1)) && width)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1377
                  {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1378
                     int width_mmx = ((width >> 2) << 2);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1379
                     width -= width_mmx;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1380
                     if (width_mmx)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1381
                     {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1382
                        _asm
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1383
                        {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1384
                           mov esi, sptr
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1385
                           mov edi, dp
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1386
                           mov ecx, width_mmx
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1387
                           sub edi, 31
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1388
                           sub esi, 3
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1389
loop1_pass0:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1390
                           movd mm0, [esi]     ; X X X X v0 v1 v2 v3
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1391
                           movq mm1, mm0       ; X X X X v0 v1 v2 v3
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1392
                           punpcklbw mm0, mm0  ; v0 v0 v1 v1 v2 v2 v3 v3
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1393
                           movq mm2, mm0       ; v0 v0 v1 v1 v2 v2 v3 v3
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1394
                           punpcklwd mm0, mm0  ; v2 v2 v2 v2 v3 v3 v3 v3
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1395
                           movq mm3, mm0       ; v2 v2 v2 v2 v3 v3 v3 v3
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1396
                           punpckldq mm0, mm0  ; v3 v3 v3 v3 v3 v3 v3 v3
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1397
                           punpckhdq mm3, mm3  ; v2 v2 v2 v2 v2 v2 v2 v2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1398
                           movq [edi], mm0     ; move to memory v3
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1399
                           punpckhwd mm2, mm2  ; v0 v0 v0 v0 v1 v1 v1 v1
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1400
                           movq [edi+8], mm3   ; move to memory v2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1401
                           movq mm4, mm2       ; v0 v0 v0 v0 v1 v1 v1 v1
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1402
                           punpckldq mm2, mm2  ; v1 v1 v1 v1 v1 v1 v1 v1
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1403
                           punpckhdq mm4, mm4  ; v0 v0 v0 v0 v0 v0 v0 v0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1404
                           movq [edi+16], mm2  ; move to memory v1
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1405
                           movq [edi+24], mm4  ; move to memory v0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1406
                           sub esi, 4
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1407
                           sub edi, 32
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1408
                           sub ecx, 4
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1409
                           jnz loop1_pass0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1410
                           EMMS
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1411
                        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1412
                     }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1413
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1414
                     sptr -= width_mmx;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1415
                     dp -= width_mmx*8;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1416
                     for (i = width; i; i--)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1417
                     {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1418
                        int j;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1419
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1420
                       /* I simplified this part in version 1.0.4e
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1421
                        * here and in several other instances where
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1422
                        * pixel_bytes == 1  -- GR-P
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1423
                        *
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1424
                        * Original code:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1425
                        *
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1426
                        * png_byte v[8];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1427
                        * png_memcpy(v, sptr, pixel_bytes);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1428
                        * for (j = 0; j < png_pass_inc[pass]; j++)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1429
                        * {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1430
                        *    png_memcpy(dp, v, pixel_bytes);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1431
                        *    dp -= pixel_bytes;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1432
                        * }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1433
                        * sptr -= pixel_bytes;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1434
                        *
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1435
                        * Replacement code is in the next three lines:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1436
                        */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1437
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1438
                        for (j = 0; j < png_pass_inc[pass]; j++)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1439
                           *dp-- = *sptr;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1440
                        sptr--;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1441
                     }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1442
                  }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1443
                  else if (((pass == 2) || (pass == 3)) && width)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1444
                  {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1445
                     int width_mmx = ((width >> 2) << 2);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1446
                     width -= width_mmx;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1447
                     if (width_mmx)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1448
                     {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1449
                        _asm
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1450
                        {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1451
                           mov esi, sptr
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1452
                           mov edi, dp
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1453
                           mov ecx, width_mmx
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1454
                           sub edi, 15
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1455
                           sub esi, 3
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1456
loop1_pass2:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1457
                           movd mm0, [esi]     ; X X X X v0 v1 v2 v3
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1458
                           punpcklbw mm0, mm0  ; v0 v0 v1 v1 v2 v2 v3 v3
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1459
                           movq mm1, mm0       ; v0 v0 v1 v1 v2 v2 v3 v3
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1460
                           punpcklwd mm0, mm0  ; v2 v2 v2 v2 v3 v3 v3 v3
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1461
                           punpckhwd mm1, mm1  ; v0 v0 v0 v0 v1 v1 v1 v1
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1462
                           movq [edi], mm0     ; move to memory v2 and v3
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1463
                           sub esi, 4
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1464
                           movq [edi+8], mm1   ; move to memory v1     and v0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1465
                           sub edi, 16
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1466
                           sub ecx, 4
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1467
                           jnz loop1_pass2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1468
                           EMMS
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1469
                        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1470
                     }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1471
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1472
                     sptr -= width_mmx;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1473
                     dp -= width_mmx*4;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1474
                     for (i = width; i; i--)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1475
                     {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1476
                        int j;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1477
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1478
                        for (j = 0; j < png_pass_inc[pass]; j++)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1479
                        {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1480
                           *dp-- = *sptr;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1481
                        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1482
                        sptr --;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1483
                     }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1484
                  }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1485
                  else if (width) /* && ((pass == 4) || (pass == 5))) */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1486
                  {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1487
                     int width_mmx = ((width >> 3) << 3);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1488
                     width -= width_mmx;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1489
                     if (width_mmx)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1490
                     {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1491
                        _asm
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1492
                        {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1493
                           mov esi, sptr
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1494
                           mov edi, dp
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1495
                           mov ecx, width_mmx
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1496
                           sub edi, 15
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1497
                           sub esi, 7
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1498
loop1_pass4:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1499
                           movq mm0, [esi]     ; v0 v1 v2 v3 v4 v5 v6 v7
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1500
                           movq mm1, mm0       ; v0 v1 v2 v3 v4 v5 v6 v7
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1501
                           punpcklbw mm0, mm0  ; v4 v4 v5 v5 v6 v6 v7 v7
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1502
                           //movq mm1, mm0     ; v0 v0 v1 v1 v2 v2 v3 v3
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1503
                           punpckhbw mm1, mm1  ;v0 v0 v1 v1 v2 v2 v3 v3
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1504
                           movq [edi+8], mm1   ; move to memory v0 v1 v2 and v3
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1505
                           sub esi, 8
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1506
                           movq [edi], mm0     ; move to memory v4 v5 v6 and v7
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1507
                           //sub esi, 4
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1508
                           sub edi, 16
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1509
                           sub ecx, 8
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1510
                           jnz loop1_pass4
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1511
                           EMMS
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1512
                        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1513
                     }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1514
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1515
                     sptr -= width_mmx;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1516
                     dp -= width_mmx*2;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1517
                     for (i = width; i; i--)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1518
                     {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1519
                        int j;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1520
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1521
                        for (j = 0; j < png_pass_inc[pass]; j++)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1522
                        {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1523
                           *dp-- = *sptr;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1524
                        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1525
                        sptr --;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1526
                     }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1527
                  }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1528
               } /* end of pixel_bytes == 1 */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1529
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1530
               else if (pixel_bytes == 2)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1531
               {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1532
                  if (((pass == 0) || (pass == 1)) && width)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1533
                  {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1534
                     int width_mmx = ((width >> 1) << 1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1535
                     width -= width_mmx;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1536
                     if (width_mmx)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1537
                     {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1538
                        _asm
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1539
                        {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1540
                           mov esi, sptr
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1541
                           mov edi, dp
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1542
                           mov ecx, width_mmx
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1543
                           sub esi, 2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1544
                           sub edi, 30
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1545
loop2_pass0:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1546
                           movd mm0, [esi]        ; X X X X v1 v0 v3 v2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1547
                           punpcklwd mm0, mm0     ; v1 v0 v1 v0 v3 v2 v3 v2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1548
                           movq mm1, mm0          ; v1 v0 v1 v0 v3 v2 v3 v2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1549
                           punpckldq mm0, mm0     ; v3 v2 v3 v2 v3 v2 v3 v2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1550
                           punpckhdq mm1, mm1     ; v1 v0 v1 v0 v1 v0 v1 v0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1551
                           movq [edi], mm0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1552
                           movq [edi + 8], mm0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1553
                           movq [edi + 16], mm1
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1554
                           movq [edi + 24], mm1
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1555
                           sub esi, 4
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1556
                           sub edi, 32
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1557
                           sub ecx, 2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1558
                           jnz loop2_pass0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1559
                           EMMS
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1560
                        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1561
                     }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1562
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1563
                     sptr -= (width_mmx*2 - 2);            // sign fixed
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1564
                     dp -= (width_mmx*16 - 2);            // sign fixed
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1565
                     for (i = width; i; i--)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1566
                     {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1567
                        png_byte v[8];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1568
                        int j;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1569
                        sptr -= 2;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1570
                        png_memcpy(v, sptr, 2);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1571
                        for (j = 0; j < png_pass_inc[pass]; j++)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1572
                        {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1573
                           dp -= 2;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1574
                           png_memcpy(dp, v, 2);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1575
                        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1576
                     }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1577
                  }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1578
                  else if (((pass == 2) || (pass == 3)) && width)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1579
                  {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1580
                     int width_mmx = ((width >> 1) << 1) ;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1581
                     width -= width_mmx;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1582
                     if (width_mmx)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1583
                     {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1584
                        _asm
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1585
                        {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1586
                           mov esi, sptr
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1587
                           mov edi, dp
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1588
                           mov ecx, width_mmx
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1589
                           sub esi, 2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1590
                           sub edi, 14
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1591
loop2_pass2:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1592
                           movd mm0, [esi]        ; X X X X v1 v0 v3 v2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1593
                           punpcklwd mm0, mm0     ; v1 v0 v1 v0 v3 v2 v3 v2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1594
                           movq mm1, mm0          ; v1 v0 v1 v0 v3 v2 v3 v2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1595
                           punpckldq mm0, mm0     ; v3 v2 v3 v2 v3 v2 v3 v2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1596
                           punpckhdq mm1, mm1     ; v1 v0 v1 v0 v1 v0 v1 v0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1597
                           movq [edi], mm0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1598
                           sub esi, 4
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1599
                           movq [edi + 8], mm1
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1600
                           //sub esi, 4
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1601
                           sub edi, 16
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1602
                           sub ecx, 2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1603
                           jnz loop2_pass2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1604
                           EMMS
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1605
                        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1606
                     }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1607
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1608
                     sptr -= (width_mmx*2 - 2);            // sign fixed
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1609
                     dp -= (width_mmx*8 - 2);            // sign fixed
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1610
                     for (i = width; i; i--)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1611
                     {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1612
                        png_byte v[8];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1613
                        int j;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1614
                        sptr -= 2;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1615
                        png_memcpy(v, sptr, 2);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1616
                        for (j = 0; j < png_pass_inc[pass]; j++)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1617
                        {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1618
                           dp -= 2;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1619
                           png_memcpy(dp, v, 2);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1620
                        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1621
                     }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1622
                  }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1623
                  else if (width)  // pass == 4 or 5
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1624
                  {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1625
                     int width_mmx = ((width >> 1) << 1) ;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1626
                     width -= width_mmx;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1627
                     if (width_mmx)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1628
                     {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1629
                        _asm
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1630
                        {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1631
                           mov esi, sptr
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1632
                           mov edi, dp
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1633
                           mov ecx, width_mmx
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1634
                           sub esi, 2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1635
                           sub edi, 6
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1636
loop2_pass4:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1637
                           movd mm0, [esi]        ; X X X X v1 v0 v3 v2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1638
                           punpcklwd mm0, mm0     ; v1 v0 v1 v0 v3 v2 v3 v2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1639
                           sub esi, 4
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1640
                           movq [edi], mm0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1641
                           sub edi, 8
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1642
                           sub ecx, 2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1643
                           jnz loop2_pass4
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1644
                           EMMS
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1645
                        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1646
                     }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1647
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1648
                     sptr -= (width_mmx*2 - 2);            // sign fixed
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1649
                     dp -= (width_mmx*4 - 2);            // sign fixed
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1650
                     for (i = width; i; i--)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1651
                     {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1652
                        png_byte v[8];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1653
                        int j;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1654
                        sptr -= 2;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1655
                        png_memcpy(v, sptr, 2);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1656
                        for (j = 0; j < png_pass_inc[pass]; j++)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1657
                        {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1658
                           dp -= 2;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1659
                           png_memcpy(dp, v, 2);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1660
                        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1661
                     }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1662
                  }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1663
               } /* end of pixel_bytes == 2 */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1664
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1665
               else if (pixel_bytes == 4)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1666
               {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1667
                  if (((pass == 0) || (pass == 1)) && width)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1668
                  {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1669
                     int width_mmx = ((width >> 1) << 1) ;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1670
                     width -= width_mmx;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1671
                     if (width_mmx)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1672
                     {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1673
                        _asm
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1674
                        {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1675
                           mov esi, sptr
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1676
                           mov edi, dp
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1677
                           mov ecx, width_mmx
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1678
                           sub esi, 4
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1679
                           sub edi, 60
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1680
loop4_pass0:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1681
                           movq mm0, [esi]        ; v3 v2 v1 v0 v7 v6 v5 v4
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1682
                           movq mm1, mm0          ; v3 v2 v1 v0 v7 v6 v5 v4
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1683
                           punpckldq mm0, mm0     ; v7 v6 v5 v4 v7 v6 v5 v4
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1684
                           punpckhdq mm1, mm1     ; v3 v2 v1 v0 v3 v2 v1 v0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1685
                           movq [edi], mm0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1686
                           movq [edi + 8], mm0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1687
                           movq [edi + 16], mm0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1688
                           movq [edi + 24], mm0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1689
                           movq [edi+32], mm1
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1690
                           movq [edi + 40], mm1
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1691
                           movq [edi+ 48], mm1
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1692
                           sub esi, 8
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1693
                           movq [edi + 56], mm1
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1694
                           sub edi, 64
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1695
                           sub ecx, 2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1696
                           jnz loop4_pass0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1697
                           EMMS
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1698
                        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1699
                     }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1700
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1701
                     sptr -= (width_mmx*4 - 4);            // sign fixed
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1702
                     dp -= (width_mmx*32 - 4);            // sign fixed
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1703
                     for (i = width; i; i--)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1704
                     {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1705
                        png_byte v[8];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1706
                        int j;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1707
                        sptr -= 4;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1708
                        png_memcpy(v, sptr, 4);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1709
                        for (j = 0; j < png_pass_inc[pass]; j++)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1710
                        {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1711
                           dp -= 4;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1712
                           png_memcpy(dp, v, 4);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1713
                        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1714
                     }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1715
                  }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1716
                  else if (((pass == 2) || (pass == 3)) && width)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1717
                  {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1718
                     int width_mmx = ((width >> 1) << 1) ;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1719
                     width -= width_mmx;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1720
                     if (width_mmx)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1721
                     {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1722
                        _asm
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1723
                        {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1724
                           mov esi, sptr
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1725
                           mov edi, dp
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1726
                           mov ecx, width_mmx
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1727
                           sub esi, 4
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1728
                           sub edi, 28
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1729
loop4_pass2:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1730
                           movq mm0, [esi]      ; v3 v2 v1 v0 v7 v6 v5 v4
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1731
                           movq mm1, mm0        ; v3 v2 v1 v0 v7 v6 v5 v4
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1732
                           punpckldq mm0, mm0   ; v7 v6 v5 v4 v7 v6 v5 v4
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1733
                           punpckhdq mm1, mm1   ; v3 v2 v1 v0 v3 v2 v1 v0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1734
                           movq [edi], mm0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1735
                           movq [edi + 8], mm0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1736
                           movq [edi+16], mm1
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1737
                           movq [edi + 24], mm1
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1738
                           sub esi, 8
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1739
                           sub edi, 32
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1740
                           sub ecx, 2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1741
                           jnz loop4_pass2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1742
                           EMMS
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1743
                        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1744
                     }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1745
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1746
                     sptr -= (width_mmx*4 - 4);            // sign fixed
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1747
                     dp -= (width_mmx*16 - 4);            // sign fixed
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1748
                     for (i = width; i; i--)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1749
                     {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1750
                        png_byte v[8];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1751
                        int j;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1752
                        sptr -= 4;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1753
                        png_memcpy(v, sptr, 4);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1754
                        for (j = 0; j < png_pass_inc[pass]; j++)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1755
                        {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1756
                           dp -= 4;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1757
                           png_memcpy(dp, v, 4);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1758
                        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1759
                     }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1760
                  }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1761
                  else if (width)  // pass == 4 or 5
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1762
                  {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1763
                     int width_mmx = ((width >> 1) << 1) ;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1764
                     width -= width_mmx;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1765
                     if (width_mmx)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1766
                     {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1767
                        _asm
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1768
                        {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1769
                           mov esi, sptr
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1770
                           mov edi, dp
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1771
                           mov ecx, width_mmx
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1772
                           sub esi, 4
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1773
                           sub edi, 12
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1774
loop4_pass4:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1775
                           movq mm0, [esi]      ; v3 v2 v1 v0 v7 v6 v5 v4
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1776
                           movq mm1, mm0        ; v3 v2 v1 v0 v7 v6 v5 v4
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1777
                           punpckldq mm0, mm0   ; v7 v6 v5 v4 v7 v6 v5 v4
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1778
                           punpckhdq mm1, mm1   ; v3 v2 v1 v0 v3 v2 v1 v0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1779
                           movq [edi], mm0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1780
                           sub esi, 8
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1781
                           movq [edi + 8], mm1
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1782
                           sub edi, 16
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1783
                           sub ecx, 2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1784
                           jnz loop4_pass4
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1785
                           EMMS
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1786
                        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1787
                     }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1788
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1789
                     sptr -= (width_mmx*4 - 4);          // sign fixed
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1790
                     dp -= (width_mmx*8 - 4);            // sign fixed
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1791
                     for (i = width; i; i--)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1792
                     {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1793
                        png_byte v[8];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1794
                        int j;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1795
                        sptr -= 4;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1796
                        png_memcpy(v, sptr, 4);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1797
                        for (j = 0; j < png_pass_inc[pass]; j++)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1798
                        {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1799
                           dp -= 4;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1800
                           png_memcpy(dp, v, 4);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1801
                        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1802
                     }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1803
                  }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1804
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1805
               } /* end of pixel_bytes == 4 */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1806
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1807
               else if (pixel_bytes == 6)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1808
               {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1809
                  for (i = width; i; i--)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1810
                  {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1811
                     png_byte v[8];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1812
                     int j;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1813
                     png_memcpy(v, sptr, 6);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1814
                     for (j = 0; j < png_pass_inc[pass]; j++)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1815
                     {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1816
                        png_memcpy(dp, v, 6);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1817
                        dp -= 6;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1818
                     }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1819
                     sptr -= 6;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1820
                  }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1821
               } /* end of pixel_bytes == 6 */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1822
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1823
               else
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1824
               {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1825
                  for (i = width; i; i--)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1826
                  {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1827
                     png_byte v[8];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1828
                     int j;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1829
                     png_memcpy(v, sptr, pixel_bytes);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1830
                     for (j = 0; j < png_pass_inc[pass]; j++)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1831
                     {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1832
                        png_memcpy(dp, v, pixel_bytes);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1833
                        dp -= pixel_bytes;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1834
                     }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1835
                     sptr-= pixel_bytes;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1836
                  }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1837
               }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1838
            } /* end of mmx_supported */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1839
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1840
            else /* MMX not supported:  use modified C code - takes advantage
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1841
                  * of inlining of memcpy for a constant */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1842
            {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1843
               if (pixel_bytes == 1)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1844
               {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1845
                  for (i = width; i; i--)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1846
                  {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1847
                     int j;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1848
                     for (j = 0; j < png_pass_inc[pass]; j++)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1849
                        *dp-- = *sptr;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1850
                     sptr--;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1851
                  }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1852
               }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1853
               else if (pixel_bytes == 3)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1854
               {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1855
                  for (i = width; i; i--)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1856
                  {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1857
                     png_byte v[8];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1858
                     int j;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1859
                     png_memcpy(v, sptr, pixel_bytes);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1860
                     for (j = 0; j < png_pass_inc[pass]; j++)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1861
                     {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1862
                        png_memcpy(dp, v, pixel_bytes);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1863
                        dp -= pixel_bytes;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1864
                     }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1865
                     sptr -= pixel_bytes;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1866
                  }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1867
               }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1868
               else if (pixel_bytes == 2)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1869
               {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1870
                  for (i = width; i; i--)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1871
                  {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1872
                     png_byte v[8];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1873
                     int j;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1874
                     png_memcpy(v, sptr, pixel_bytes);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1875
                     for (j = 0; j < png_pass_inc[pass]; j++)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1876
                     {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1877
                        png_memcpy(dp, v, pixel_bytes);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1878
                        dp -= pixel_bytes;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1879
                     }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1880
                     sptr -= pixel_bytes;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1881
                  }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1882
               }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1883
               else if (pixel_bytes == 4)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1884
               {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1885
                  for (i = width; i; i--)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1886
                  {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1887
                     png_byte v[8];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1888
                     int j;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1889
                     png_memcpy(v, sptr, pixel_bytes);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1890
                     for (j = 0; j < png_pass_inc[pass]; j++)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1891
                     {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1892
                        png_memcpy(dp, v, pixel_bytes);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1893
                        dp -= pixel_bytes;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1894
                     }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1895
                     sptr -= pixel_bytes;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1896
                  }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1897
               }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1898
               else if (pixel_bytes == 6)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1899
               {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1900
                  for (i = width; i; i--)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1901
                  {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1902
                     png_byte v[8];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1903
                     int j;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1904
                     png_memcpy(v, sptr, pixel_bytes);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1905
                     for (j = 0; j < png_pass_inc[pass]; j++)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1906
                     {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1907
                        png_memcpy(dp, v, pixel_bytes);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1908
                        dp -= pixel_bytes;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1909
                     }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1910
                     sptr -= pixel_bytes;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1911
                  }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1912
               }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1913
               else
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1914
               {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1915
                  for (i = width; i; i--)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1916
                  {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1917
                     png_byte v[8];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1918
                     int j;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1919
                     png_memcpy(v, sptr, pixel_bytes);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1920
                     for (j = 0; j < png_pass_inc[pass]; j++)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1921
                     {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1922
                        png_memcpy(dp, v, pixel_bytes);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1923
                        dp -= pixel_bytes;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1924
                     }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1925
                     sptr -= pixel_bytes;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1926
                  }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1927
               }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1928
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1929
            } /* end of MMX not supported */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1930
            break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1931
         }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1932
      } /* end switch (row_info->pixel_depth) */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1933
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1934
      row_info->width = final_width;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1935
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1936
      row_info->rowbytes = PNG_ROWBYTES(row_info->pixel_depth,final_width);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1937
   }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1938
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1939
}
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1940
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1941
#endif /* PNG_READ_INTERLACING_SUPPORTED */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1942
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1943
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1944
// These variables are utilized in the functions below.  They are declared
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1945
// globally here to ensure alignment on 8-byte boundaries.
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1946
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1947
union uAll {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1948
   __int64 use;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1949
   double  align;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1950
} LBCarryMask = {0x0101010101010101},
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1951
  HBClearMask = {0x7f7f7f7f7f7f7f7f},
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1952
  ActiveMask, ActiveMask2, ActiveMaskEnd, ShiftBpp, ShiftRem;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1953
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1954
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1955
// Optimized code for PNG Average filter decoder
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1956
void /* PRIVATE */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1957
png_read_filter_row_mmx_avg(png_row_infop row_info, png_bytep row
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1958
                            , png_bytep prev_row)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1959
{
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1960
   int bpp;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1961
   png_uint_32 FullLength;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1962
   png_uint_32 MMXLength;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1963
   //png_uint_32 len;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1964
   int diff;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1965
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1966
   bpp = (row_info->pixel_depth + 7) >> 3; // Get # bytes per pixel
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1967
   FullLength  = row_info->rowbytes; // # of bytes to filter
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1968
   _asm {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1969
         // Init address pointers and offset
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1970
         mov edi, row          // edi ==> Avg(x)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1971
         xor ebx, ebx          // ebx ==> x
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1972
         mov edx, edi
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1973
         mov esi, prev_row           // esi ==> Prior(x)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1974
         sub edx, bpp          // edx ==> Raw(x-bpp)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1975
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1976
         xor eax, eax
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1977
         // Compute the Raw value for the first bpp bytes
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1978
         //    Raw(x) = Avg(x) + (Prior(x)/2)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1979
davgrlp:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1980
         mov al, [esi + ebx]   // Load al with Prior(x)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1981
         inc ebx
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1982
         shr al, 1             // divide by 2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1983
         add al, [edi+ebx-1]   // Add Avg(x); -1 to offset inc ebx
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1984
         cmp ebx, bpp
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1985
         mov [edi+ebx-1], al    // Write back Raw(x);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1986
                            // mov does not affect flags; -1 to offset inc ebx
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1987
         jb davgrlp
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1988
         // get # of bytes to alignment
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1989
         mov diff, edi         // take start of row
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1990
         add diff, ebx         // add bpp
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1991
         add diff, 0xf         // add 7 + 8 to incr past alignment boundary
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1992
         and diff, 0xfffffff8  // mask to alignment boundary
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1993
         sub diff, edi         // subtract from start ==> value ebx at alignment
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1994
         jz davggo
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1995
         // fix alignment
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1996
         // Compute the Raw value for the bytes upto the alignment boundary
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1997
         //    Raw(x) = Avg(x) + ((Raw(x-bpp) + Prior(x))/2)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1998
         xor ecx, ecx
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1999
davglp1:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2000
         xor eax, eax
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2001
         mov cl, [esi + ebx]        // load cl with Prior(x)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2002
         mov al, [edx + ebx]  // load al with Raw(x-bpp)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2003
         add ax, cx
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2004
         inc ebx
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2005
         shr ax, 1            // divide by 2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2006
         add al, [edi+ebx-1]  // Add Avg(x); -1 to offset inc ebx
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2007
         cmp ebx, diff              // Check if at alignment boundary
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2008
         mov [edi+ebx-1], al        // Write back Raw(x);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2009
                            // mov does not affect flags; -1 to offset inc ebx
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2010
         jb davglp1               // Repeat until at alignment boundary
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2011
davggo:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2012
         mov eax, FullLength
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2013
         mov ecx, eax
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2014
         sub eax, ebx          // subtract alignment fix
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2015
         and eax, 0x00000007   // calc bytes over mult of 8
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2016
         sub ecx, eax          // drop over bytes from original length
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2017
         mov MMXLength, ecx
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2018
   } // end _asm block
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2019
   // Now do the math for the rest of the row
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2020
   switch ( bpp )
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2021
   {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2022
      case 3:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2023
      {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2024
         ActiveMask.use  = 0x0000000000ffffff;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2025
         ShiftBpp.use = 24;    // == 3 * 8
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2026
         ShiftRem.use = 40;    // == 64 - 24
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2027
         _asm {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2028
            // Re-init address pointers and offset
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2029
            movq mm7, ActiveMask
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2030
            mov ebx, diff      // ebx ==> x = offset to alignment boundary
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2031
            movq mm5, LBCarryMask
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2032
            mov edi, row       // edi ==> Avg(x)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2033
            movq mm4, HBClearMask
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2034
            mov esi, prev_row        // esi ==> Prior(x)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2035
            // PRIME the pump (load the first Raw(x-bpp) data set
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2036
            movq mm2, [edi + ebx - 8]  // Load previous aligned 8 bytes
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2037
                               // (we correct position in loop below)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2038
davg3lp:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2039
            movq mm0, [edi + ebx]      // Load mm0 with Avg(x)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2040
            // Add (Prev_row/2) to Average
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2041
            movq mm3, mm5
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2042
            psrlq mm2, ShiftRem      // Correct position Raw(x-bpp) data
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2043
            movq mm1, [esi + ebx]    // Load mm1 with Prior(x)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2044
            movq mm6, mm7
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2045
            pand mm3, mm1      // get lsb for each prev_row byte
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2046
            psrlq mm1, 1       // divide prev_row bytes by 2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2047
            pand  mm1, mm4     // clear invalid bit 7 of each byte
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2048
            paddb mm0, mm1     // add (Prev_row/2) to Avg for each byte
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2049
            // Add 1st active group (Raw(x-bpp)/2) to Average with LBCarry
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2050
            movq mm1, mm3      // now use mm1 for getting LBCarrys
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2051
            pand mm1, mm2      // get LBCarrys for each byte where both
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2052
                               // lsb's were == 1 (Only valid for active group)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2053
            psrlq mm2, 1       // divide raw bytes by 2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2054
            pand  mm2, mm4     // clear invalid bit 7 of each byte
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2055
            paddb mm2, mm1     // add LBCarrys to (Raw(x-bpp)/2) for each byte
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2056
            pand mm2, mm6      // Leave only Active Group 1 bytes to add to Avg
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2057
            paddb mm0, mm2     // add (Raw/2) + LBCarrys to Avg for each Active
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2058
                               //  byte
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2059
            // Add 2nd active group (Raw(x-bpp)/2) to Average with LBCarry
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2060
            psllq mm6, ShiftBpp  // shift the mm6 mask to cover bytes 3-5
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2061
            movq mm2, mm0        // mov updated Raws to mm2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2062
            psllq mm2, ShiftBpp  // shift data to position correctly
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2063
            movq mm1, mm3        // now use mm1 for getting LBCarrys
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2064
            pand mm1, mm2      // get LBCarrys for each byte where both
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2065
                               // lsb's were == 1 (Only valid for active group)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2066
            psrlq mm2, 1       // divide raw bytes by 2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2067
            pand  mm2, mm4     // clear invalid bit 7 of each byte
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2068
            paddb mm2, mm1     // add LBCarrys to (Raw(x-bpp)/2) for each byte
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2069
            pand mm2, mm6      // Leave only Active Group 2 bytes to add to Avg
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2070
            paddb mm0, mm2     // add (Raw/2) + LBCarrys to Avg for each Active
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2071
                               //  byte
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2072
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2073
            // Add 3rd active group (Raw(x-bpp)/2) to Average with LBCarry
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2074
            psllq mm6, ShiftBpp  // shift the mm6 mask to cover the last two
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2075
                                 // bytes
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2076
            movq mm2, mm0        // mov updated Raws to mm2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2077
            psllq mm2, ShiftBpp  // shift data to position correctly
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2078
                              // Data only needs to be shifted once here to
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2079
                              // get the correct x-bpp offset.
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2080
            movq mm1, mm3     // now use mm1 for getting LBCarrys
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2081
            pand mm1, mm2     // get LBCarrys for each byte where both
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2082
                              // lsb's were == 1 (Only valid for active group)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2083
            psrlq mm2, 1      // divide raw bytes by 2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2084
            pand  mm2, mm4    // clear invalid bit 7 of each byte
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2085
            paddb mm2, mm1    // add LBCarrys to (Raw(x-bpp)/2) for each byte
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2086
            pand mm2, mm6     // Leave only Active Group 2 bytes to add to Avg
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2087
            add ebx, 8
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2088
            paddb mm0, mm2    // add (Raw/2) + LBCarrys to Avg for each Active
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2089
                              // byte
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2090
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2091
            // Now ready to write back to memory
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2092
            movq [edi + ebx - 8], mm0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2093
            // Move updated Raw(x) to use as Raw(x-bpp) for next loop
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2094
            cmp ebx, MMXLength
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2095
            movq mm2, mm0     // mov updated Raw(x) to mm2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2096
            jb davg3lp
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2097
         } // end _asm block
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2098
      }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2099
      break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2100
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2101
      case 6:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2102
      case 4:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2103
      case 7:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2104
      case 5:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2105
      {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2106
         ActiveMask.use  = 0xffffffffffffffff;  // use shift below to clear
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2107
                                                // appropriate inactive bytes
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2108
         ShiftBpp.use = bpp << 3;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2109
         ShiftRem.use = 64 - ShiftBpp.use;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2110
         _asm {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2111
            movq mm4, HBClearMask
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2112
            // Re-init address pointers and offset
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2113
            mov ebx, diff       // ebx ==> x = offset to alignment boundary
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2114
            // Load ActiveMask and clear all bytes except for 1st active group
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2115
            movq mm7, ActiveMask
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2116
            mov edi, row         // edi ==> Avg(x)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2117
            psrlq mm7, ShiftRem
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2118
            mov esi, prev_row    // esi ==> Prior(x)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2119
            movq mm6, mm7
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2120
            movq mm5, LBCarryMask
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2121
            psllq mm6, ShiftBpp  // Create mask for 2nd active group
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2122
            // PRIME the pump (load the first Raw(x-bpp) data set
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2123
            movq mm2, [edi + ebx - 8]  // Load previous aligned 8 bytes
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2124
                                 // (we correct position in loop below)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2125
davg4lp:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2126
            movq mm0, [edi + ebx]
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2127
            psrlq mm2, ShiftRem  // shift data to position correctly
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2128
            movq mm1, [esi + ebx]
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2129
            // Add (Prev_row/2) to Average
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2130
            movq mm3, mm5
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2131
            pand mm3, mm1     // get lsb for each prev_row byte
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2132
            psrlq mm1, 1      // divide prev_row bytes by 2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2133
            pand  mm1, mm4    // clear invalid bit 7 of each byte
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2134
            paddb mm0, mm1    // add (Prev_row/2) to Avg for each byte
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2135
            // Add 1st active group (Raw(x-bpp)/2) to Average with LBCarry
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2136
            movq mm1, mm3     // now use mm1 for getting LBCarrys
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2137
            pand mm1, mm2     // get LBCarrys for each byte where both
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2138
                              // lsb's were == 1 (Only valid for active group)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2139
            psrlq mm2, 1      // divide raw bytes by 2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2140
            pand  mm2, mm4    // clear invalid bit 7 of each byte
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2141
            paddb mm2, mm1    // add LBCarrys to (Raw(x-bpp)/2) for each byte
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2142
            pand mm2, mm7     // Leave only Active Group 1 bytes to add to Avg
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2143
            paddb mm0, mm2    // add (Raw/2) + LBCarrys to Avg for each Active
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2144
                              // byte
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2145
            // Add 2nd active group (Raw(x-bpp)/2) to Average with LBCarry
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2146
            movq mm2, mm0     // mov updated Raws to mm2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2147
            psllq mm2, ShiftBpp // shift data to position correctly
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2148
            add ebx, 8
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2149
            movq mm1, mm3     // now use mm1 for getting LBCarrys
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2150
            pand mm1, mm2     // get LBCarrys for each byte where both
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2151
                              // lsb's were == 1 (Only valid for active group)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2152
            psrlq mm2, 1      // divide raw bytes by 2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2153
            pand  mm2, mm4    // clear invalid bit 7 of each byte
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2154
            paddb mm2, mm1    // add LBCarrys to (Raw(x-bpp)/2) for each byte
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2155
            pand mm2, mm6     // Leave only Active Group 2 bytes to add to Avg
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2156
            paddb mm0, mm2    // add (Raw/2) + LBCarrys to Avg for each Active
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2157
                              // byte
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2158
            cmp ebx, MMXLength
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2159
            // Now ready to write back to memory
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2160
            movq [edi + ebx - 8], mm0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2161
            // Prep Raw(x-bpp) for next loop
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2162
            movq mm2, mm0     // mov updated Raws to mm2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2163
            jb davg4lp
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2164
         } // end _asm block
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2165
      }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2166
      break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2167
      case 2:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2168
      {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2169
         ActiveMask.use  = 0x000000000000ffff;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2170
         ShiftBpp.use = 16;   // == 2 * 8     [BUGFIX]
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2171
         ShiftRem.use = 48;   // == 64 - 16   [BUGFIX]
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2172
         _asm {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2173
            // Load ActiveMask
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2174
            movq mm7, ActiveMask
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2175
            // Re-init address pointers and offset
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2176
            mov ebx, diff     // ebx ==> x = offset to alignment boundary
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2177
            movq mm5, LBCarryMask
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2178
            mov edi, row      // edi ==> Avg(x)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2179
            movq mm4, HBClearMask
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2180
            mov esi, prev_row  // esi ==> Prior(x)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2181
            // PRIME the pump (load the first Raw(x-bpp) data set
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2182
            movq mm2, [edi + ebx - 8]  // Load previous aligned 8 bytes
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2183
                              // (we correct position in loop below)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2184
davg2lp:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2185
            movq mm0, [edi + ebx]
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2186
            psrlq mm2, ShiftRem  // shift data to position correctly   [BUGFIX]
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2187
            movq mm1, [esi + ebx]
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2188
            // Add (Prev_row/2) to Average
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2189
            movq mm3, mm5
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2190
            pand mm3, mm1     // get lsb for each prev_row byte
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2191
            psrlq mm1, 1      // divide prev_row bytes by 2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2192
            pand  mm1, mm4    // clear invalid bit 7 of each byte
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2193
            movq mm6, mm7
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2194
            paddb mm0, mm1    // add (Prev_row/2) to Avg for each byte
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2195
            // Add 1st active group (Raw(x-bpp)/2) to Average with LBCarry
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2196
            movq mm1, mm3     // now use mm1 for getting LBCarrys
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2197
            pand mm1, mm2     // get LBCarrys for each byte where both
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2198
                              // lsb's were == 1 (Only valid for active group)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2199
            psrlq mm2, 1      // divide raw bytes by 2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2200
            pand  mm2, mm4    // clear invalid bit 7 of each byte
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2201
            paddb mm2, mm1    // add LBCarrys to (Raw(x-bpp)/2) for each byte
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2202
            pand mm2, mm6     // Leave only Active Group 1 bytes to add to Avg
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2203
            paddb mm0, mm2 // add (Raw/2) + LBCarrys to Avg for each Active byte
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2204
            // Add 2nd active group (Raw(x-bpp)/2) to Average with LBCarry
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2205
            psllq mm6, ShiftBpp // shift the mm6 mask to cover bytes 2 & 3
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2206
            movq mm2, mm0       // mov updated Raws to mm2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2207
            psllq mm2, ShiftBpp // shift data to position correctly
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2208
            movq mm1, mm3       // now use mm1 for getting LBCarrys
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2209
            pand mm1, mm2       // get LBCarrys for each byte where both
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2210
                                // lsb's were == 1 (Only valid for active group)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2211
            psrlq mm2, 1        // divide raw bytes by 2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2212
            pand  mm2, mm4      // clear invalid bit 7 of each byte
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2213
            paddb mm2, mm1      // add LBCarrys to (Raw(x-bpp)/2) for each byte
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2214
            pand mm2, mm6       // Leave only Active Group 2 bytes to add to Avg
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2215
            paddb mm0, mm2 // add (Raw/2) + LBCarrys to Avg for each Active byte
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2216
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2217
            // Add rdd active group (Raw(x-bpp)/2) to Average with LBCarry
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2218
            psllq mm6, ShiftBpp // shift the mm6 mask to cover bytes 4 & 5
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2219
            movq mm2, mm0       // mov updated Raws to mm2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2220
            psllq mm2, ShiftBpp // shift data to position correctly
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2221
                                // Data only needs to be shifted once here to
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2222
                                // get the correct x-bpp offset.
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2223
            movq mm1, mm3       // now use mm1 for getting LBCarrys
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2224
            pand mm1, mm2       // get LBCarrys for each byte where both
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2225
                                // lsb's were == 1 (Only valid for active group)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2226
            psrlq mm2, 1        // divide raw bytes by 2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2227
            pand  mm2, mm4      // clear invalid bit 7 of each byte
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2228
            paddb mm2, mm1      // add LBCarrys to (Raw(x-bpp)/2) for each byte
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2229
            pand mm2, mm6       // Leave only Active Group 2 bytes to add to Avg
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2230
            paddb mm0, mm2 // add (Raw/2) + LBCarrys to Avg for each Active byte
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2231
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2232
            // Add 4th active group (Raw(x-bpp)/2) to Average with LBCarry
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2233
            psllq mm6, ShiftBpp  // shift the mm6 mask to cover bytes 6 & 7
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2234
            movq mm2, mm0        // mov updated Raws to mm2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2235
            psllq mm2, ShiftBpp  // shift data to position correctly
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2236
                                 // Data only needs to be shifted once here to
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2237
                                 // get the correct x-bpp offset.
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2238
            add ebx, 8
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2239
            movq mm1, mm3    // now use mm1 for getting LBCarrys
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2240
            pand mm1, mm2    // get LBCarrys for each byte where both
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2241
                             // lsb's were == 1 (Only valid for active group)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2242
            psrlq mm2, 1     // divide raw bytes by 2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2243
            pand  mm2, mm4   // clear invalid bit 7 of each byte
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2244
            paddb mm2, mm1   // add LBCarrys to (Raw(x-bpp)/2) for each byte
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2245
            pand mm2, mm6    // Leave only Active Group 2 bytes to add to Avg
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2246
            paddb mm0, mm2 // add (Raw/2) + LBCarrys to Avg for each Active byte
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2247
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2248
            cmp ebx, MMXLength
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2249
            // Now ready to write back to memory
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2250
            movq [edi + ebx - 8], mm0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2251
            // Prep Raw(x-bpp) for next loop
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2252
            movq mm2, mm0    // mov updated Raws to mm2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2253
            jb davg2lp
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2254
        } // end _asm block
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2255
      }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2256
      break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2257
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2258
      case 1:                 // bpp == 1
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2259
      {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2260
         _asm {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2261
            // Re-init address pointers and offset
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2262
            mov ebx, diff     // ebx ==> x = offset to alignment boundary
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2263
            mov edi, row      // edi ==> Avg(x)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2264
            cmp ebx, FullLength  // Test if offset at end of array
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2265
            jnb davg1end
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2266
            // Do Paeth decode for remaining bytes
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2267
            mov esi, prev_row    // esi ==> Prior(x)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2268
            mov edx, edi
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2269
            xor ecx, ecx         // zero ecx before using cl & cx in loop below
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2270
            sub edx, bpp         // edx ==> Raw(x-bpp)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2271
davg1lp:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2272
            // Raw(x) = Avg(x) + ((Raw(x-bpp) + Prior(x))/2)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2273
            xor eax, eax
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2274
            mov cl, [esi + ebx]  // load cl with Prior(x)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2275
            mov al, [edx + ebx]  // load al with Raw(x-bpp)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2276
            add ax, cx
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2277
            inc ebx
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2278
            shr ax, 1            // divide by 2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2279
            add al, [edi+ebx-1]  // Add Avg(x); -1 to offset inc ebx
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2280
            cmp ebx, FullLength  // Check if at end of array
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2281
            mov [edi+ebx-1], al  // Write back Raw(x);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2282
                         // mov does not affect flags; -1 to offset inc ebx
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2283
            jb davg1lp
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2284
davg1end:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2285
         } // end _asm block
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2286
      }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2287
      return;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2288
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2289
      case 8:             // bpp == 8
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2290
      {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2291
         _asm {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2292
            // Re-init address pointers and offset
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2293
            mov ebx, diff           // ebx ==> x = offset to alignment boundary
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2294
            movq mm5, LBCarryMask
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2295
            mov edi, row            // edi ==> Avg(x)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2296
            movq mm4, HBClearMask
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2297
            mov esi, prev_row       // esi ==> Prior(x)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2298
            // PRIME the pump (load the first Raw(x-bpp) data set
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2299
            movq mm2, [edi + ebx - 8]  // Load previous aligned 8 bytes
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2300
                                // (NO NEED to correct position in loop below)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2301
davg8lp:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2302
            movq mm0, [edi + ebx]
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2303
            movq mm3, mm5
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2304
            movq mm1, [esi + ebx]
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2305
            add ebx, 8
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2306
            pand mm3, mm1       // get lsb for each prev_row byte
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2307
            psrlq mm1, 1        // divide prev_row bytes by 2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2308
            pand mm3, mm2       // get LBCarrys for each byte where both
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2309
                                // lsb's were == 1
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2310
            psrlq mm2, 1        // divide raw bytes by 2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2311
            pand  mm1, mm4      // clear invalid bit 7 of each byte
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2312
            paddb mm0, mm3      // add LBCarrys to Avg for each byte
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2313
            pand  mm2, mm4      // clear invalid bit 7 of each byte
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2314
            paddb mm0, mm1      // add (Prev_row/2) to Avg for each byte
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2315
            paddb mm0, mm2      // add (Raw/2) to Avg for each byte
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2316
            cmp ebx, MMXLength
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2317
            movq [edi + ebx - 8], mm0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2318
            movq mm2, mm0       // reuse as Raw(x-bpp)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2319
            jb davg8lp
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2320
        } // end _asm block
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2321
      }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2322
      break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2323
      default:                  // bpp greater than 8
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2324
      {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2325
        _asm {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2326
            movq mm5, LBCarryMask
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2327
            // Re-init address pointers and offset
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2328
            mov ebx, diff       // ebx ==> x = offset to alignment boundary
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2329
            mov edi, row        // edi ==> Avg(x)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2330
            movq mm4, HBClearMask
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2331
            mov edx, edi
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2332
            mov esi, prev_row   // esi ==> Prior(x)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2333
            sub edx, bpp        // edx ==> Raw(x-bpp)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2334
davgAlp:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2335
            movq mm0, [edi + ebx]
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2336
            movq mm3, mm5
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2337
            movq mm1, [esi + ebx]
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2338
            pand mm3, mm1       // get lsb for each prev_row byte
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2339
            movq mm2, [edx + ebx]
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2340
            psrlq mm1, 1        // divide prev_row bytes by 2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2341
            pand mm3, mm2       // get LBCarrys for each byte where both
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2342
                                // lsb's were == 1
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2343
            psrlq mm2, 1        // divide raw bytes by 2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2344
            pand  mm1, mm4      // clear invalid bit 7 of each byte
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2345
            paddb mm0, mm3      // add LBCarrys to Avg for each byte
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2346
            pand  mm2, mm4      // clear invalid bit 7 of each byte
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2347
            paddb mm0, mm1      // add (Prev_row/2) to Avg for each byte
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2348
            add ebx, 8
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2349
            paddb mm0, mm2      // add (Raw/2) to Avg for each byte
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2350
            cmp ebx, MMXLength
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2351
            movq [edi + ebx - 8], mm0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2352
            jb davgAlp
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2353
        } // end _asm block
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2354
      }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2355
      break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2356
   }                         // end switch ( bpp )
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2357
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2358
   _asm {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2359
         // MMX acceleration complete now do clean-up
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2360
         // Check if any remaining bytes left to decode
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2361
         mov ebx, MMXLength    // ebx ==> x = offset bytes remaining after MMX
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2362
         mov edi, row          // edi ==> Avg(x)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2363
         cmp ebx, FullLength   // Test if offset at end of array
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2364
         jnb davgend
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2365
         // Do Paeth decode for remaining bytes
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2366
         mov esi, prev_row     // esi ==> Prior(x)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2367
         mov edx, edi
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2368
         xor ecx, ecx          // zero ecx before using cl & cx in loop below
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2369
         sub edx, bpp          // edx ==> Raw(x-bpp)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2370
davglp2:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2371
         // Raw(x) = Avg(x) + ((Raw(x-bpp) + Prior(x))/2)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2372
         xor eax, eax
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2373
         mov cl, [esi + ebx]   // load cl with Prior(x)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2374
         mov al, [edx + ebx]   // load al with Raw(x-bpp)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2375
         add ax, cx
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2376
         inc ebx
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2377
         shr ax, 1              // divide by 2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2378
         add al, [edi+ebx-1]    // Add Avg(x); -1 to offset inc ebx
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2379
         cmp ebx, FullLength    // Check if at end of array
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2380
         mov [edi+ebx-1], al    // Write back Raw(x);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2381
                          // mov does not affect flags; -1 to offset inc ebx
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2382
         jb davglp2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2383
davgend:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2384
         emms             // End MMX instructions; prep for possible FP instrs.
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2385
   } // end _asm block
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2386
}
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2387
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2388
// Optimized code for PNG Paeth filter decoder
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2389
void /* PRIVATE */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2390
png_read_filter_row_mmx_paeth(png_row_infop row_info, png_bytep row,
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2391
                              png_bytep prev_row)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2392
{
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2393
   png_uint_32 FullLength;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2394
   png_uint_32 MMXLength;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2395
   //png_uint_32 len;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2396
   int bpp;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2397
   int diff;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2398
   //int ptemp;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2399
   int patemp, pbtemp, pctemp;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2400
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2401
   bpp = (row_info->pixel_depth + 7) >> 3; // Get # bytes per pixel
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2402
   FullLength  = row_info->rowbytes; // # of bytes to filter
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2403
   _asm
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2404
   {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2405
         xor ebx, ebx        // ebx ==> x offset
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2406
         mov edi, row
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2407
         xor edx, edx        // edx ==> x-bpp offset
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2408
         mov esi, prev_row
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2409
         xor eax, eax
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2410
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2411
         // Compute the Raw value for the first bpp bytes
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2412
         // Note: the formula works out to be always
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2413
         //   Paeth(x) = Raw(x) + Prior(x)      where x < bpp
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2414
dpthrlp:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2415
         mov al, [edi + ebx]
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2416
         add al, [esi + ebx]
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2417
         inc ebx
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2418
         cmp ebx, bpp
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2419
         mov [edi + ebx - 1], al
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2420
         jb dpthrlp
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2421
         // get # of bytes to alignment
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2422
         mov diff, edi         // take start of row
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2423
         add diff, ebx         // add bpp
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2424
         xor ecx, ecx
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2425
         add diff, 0xf         // add 7 + 8 to incr past alignment boundary
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2426
         and diff, 0xfffffff8  // mask to alignment boundary
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2427
         sub diff, edi         // subtract from start ==> value ebx at alignment
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2428
         jz dpthgo
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2429
         // fix alignment
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2430
dpthlp1:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2431
         xor eax, eax
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2432
         // pav = p - a = (a + b - c) - a = b - c
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2433
         mov al, [esi + ebx]   // load Prior(x) into al
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2434
         mov cl, [esi + edx]   // load Prior(x-bpp) into cl
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2435
         sub eax, ecx          // subtract Prior(x-bpp)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2436
         mov patemp, eax       // Save pav for later use
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2437
         xor eax, eax
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2438
         // pbv = p - b = (a + b - c) - b = a - c
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2439
         mov al, [edi + edx]   // load Raw(x-bpp) into al
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2440
         sub eax, ecx          // subtract Prior(x-bpp)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2441
         mov ecx, eax
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2442
         // pcv = p - c = (a + b - c) -c = (a - c) + (b - c) = pav + pbv
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2443
         add eax, patemp       // pcv = pav + pbv
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2444
         // pc = abs(pcv)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2445
         test eax, 0x80000000
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2446
         jz dpthpca
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2447
         neg eax               // reverse sign of neg values
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2448
dpthpca:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2449
         mov pctemp, eax       // save pc for later use
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2450
         // pb = abs(pbv)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2451
         test ecx, 0x80000000
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2452
         jz dpthpba
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2453
         neg ecx               // reverse sign of neg values
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2454
dpthpba:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2455
         mov pbtemp, ecx       // save pb for later use
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2456
         // pa = abs(pav)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2457
         mov eax, patemp
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2458
         test eax, 0x80000000
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2459
         jz dpthpaa
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2460
         neg eax               // reverse sign of neg values
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2461
dpthpaa:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2462
         mov patemp, eax       // save pa for later use
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2463
         // test if pa <= pb
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2464
         cmp eax, ecx
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2465
         jna dpthabb
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2466
         // pa > pb; now test if pb <= pc
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2467
         cmp ecx, pctemp
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2468
         jna dpthbbc
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2469
         // pb > pc; Raw(x) = Paeth(x) + Prior(x-bpp)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2470
         mov cl, [esi + edx]  // load Prior(x-bpp) into cl
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2471
         jmp dpthpaeth
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2472
dpthbbc:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2473
         // pb <= pc; Raw(x) = Paeth(x) + Prior(x)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2474
         mov cl, [esi + ebx]   // load Prior(x) into cl
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2475
         jmp dpthpaeth
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2476
dpthabb:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2477
         // pa <= pb; now test if pa <= pc
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2478
         cmp eax, pctemp
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2479
         jna dpthabc
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2480
         // pa > pc; Raw(x) = Paeth(x) + Prior(x-bpp)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2481
         mov cl, [esi + edx]  // load Prior(x-bpp) into cl
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2482
         jmp dpthpaeth
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2483
dpthabc:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2484
         // pa <= pc; Raw(x) = Paeth(x) + Raw(x-bpp)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2485
         mov cl, [edi + edx]  // load Raw(x-bpp) into cl
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2486
dpthpaeth:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2487
         inc ebx
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2488
         inc edx
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2489
         // Raw(x) = (Paeth(x) + Paeth_Predictor( a, b, c )) mod 256
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2490
         add [edi + ebx - 1], cl
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2491
         cmp ebx, diff
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2492
         jb dpthlp1
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2493
dpthgo:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2494
         mov ecx, FullLength
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2495
         mov eax, ecx
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2496
         sub eax, ebx          // subtract alignment fix
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2497
         and eax, 0x00000007   // calc bytes over mult of 8
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2498
         sub ecx, eax          // drop over bytes from original length
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2499
         mov MMXLength, ecx
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2500
   } // end _asm block
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2501
   // Now do the math for the rest of the row
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2502
   switch ( bpp )
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2503
   {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2504
      case 3:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2505
      {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2506
         ActiveMask.use = 0x0000000000ffffff;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2507
         ActiveMaskEnd.use = 0xffff000000000000;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2508
         ShiftBpp.use = 24;    // == bpp(3) * 8
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2509
         ShiftRem.use = 40;    // == 64 - 24
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2510
         _asm
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2511
         {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2512
            mov ebx, diff
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2513
            mov edi, row
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2514
            mov esi, prev_row
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2515
            pxor mm0, mm0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2516
            // PRIME the pump (load the first Raw(x-bpp) data set
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2517
            movq mm1, [edi+ebx-8]
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2518
dpth3lp:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2519
            psrlq mm1, ShiftRem     // shift last 3 bytes to 1st 3 bytes
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2520
            movq mm2, [esi + ebx]   // load b=Prior(x)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2521
            punpcklbw mm1, mm0      // Unpack High bytes of a
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2522
            movq mm3, [esi+ebx-8]   // Prep c=Prior(x-bpp) bytes
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2523
            punpcklbw mm2, mm0      // Unpack High bytes of b
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2524
            psrlq mm3, ShiftRem     // shift last 3 bytes to 1st 3 bytes
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2525
            // pav = p - a = (a + b - c) - a = b - c
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2526
            movq mm4, mm2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2527
            punpcklbw mm3, mm0      // Unpack High bytes of c
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2528
            // pbv = p - b = (a + b - c) - b = a - c
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2529
            movq mm5, mm1
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2530
            psubw mm4, mm3
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2531
            pxor mm7, mm7
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2532
            // pcv = p - c = (a + b - c) -c = (a - c) + (b - c) = pav + pbv
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2533
            movq mm6, mm4
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2534
            psubw mm5, mm3
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2535
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2536
            // pa = abs(p-a) = abs(pav)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2537
            // pb = abs(p-b) = abs(pbv)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2538
            // pc = abs(p-c) = abs(pcv)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2539
            pcmpgtw mm0, mm4    // Create mask pav bytes < 0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2540
            paddw mm6, mm5
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2541
            pand mm0, mm4       // Only pav bytes < 0 in mm7
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2542
            pcmpgtw mm7, mm5    // Create mask pbv bytes < 0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2543
            psubw mm4, mm0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2544
            pand mm7, mm5       // Only pbv bytes < 0 in mm0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2545
            psubw mm4, mm0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2546
            psubw mm5, mm7
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2547
            pxor mm0, mm0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2548
            pcmpgtw mm0, mm6    // Create mask pcv bytes < 0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2549
            pand mm0, mm6       // Only pav bytes < 0 in mm7
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2550
            psubw mm5, mm7
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2551
            psubw mm6, mm0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2552
            //  test pa <= pb
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2553
            movq mm7, mm4
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2554
            psubw mm6, mm0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2555
            pcmpgtw mm7, mm5    // pa > pb?
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2556
            movq mm0, mm7
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2557
            // use mm7 mask to merge pa & pb
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2558
            pand mm5, mm7
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2559
            // use mm0 mask copy to merge a & b
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2560
            pand mm2, mm0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2561
            pandn mm7, mm4
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2562
            pandn mm0, mm1
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2563
            paddw mm7, mm5
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2564
            paddw mm0, mm2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2565
            //  test  ((pa <= pb)? pa:pb) <= pc
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2566
            pcmpgtw mm7, mm6       // pab > pc?
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2567
            pxor mm1, mm1
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2568
            pand mm3, mm7
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2569
            pandn mm7, mm0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2570
            paddw mm7, mm3
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2571
            pxor mm0, mm0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2572
            packuswb mm7, mm1
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2573
            movq mm3, [esi + ebx]   // load c=Prior(x-bpp)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2574
            pand mm7, ActiveMask
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2575
            movq mm2, mm3           // load b=Prior(x) step 1
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2576
            paddb mm7, [edi + ebx]  // add Paeth predictor with Raw(x)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2577
            punpcklbw mm3, mm0      // Unpack High bytes of c
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2578
            movq [edi + ebx], mm7   // write back updated value
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2579
            movq mm1, mm7           // Now mm1 will be used as Raw(x-bpp)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2580
            // Now do Paeth for 2nd set of bytes (3-5)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2581
            psrlq mm2, ShiftBpp     // load b=Prior(x) step 2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2582
            punpcklbw mm1, mm0      // Unpack High bytes of a
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2583
            pxor mm7, mm7
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2584
            punpcklbw mm2, mm0      // Unpack High bytes of b
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2585
            // pbv = p - b = (a + b - c) - b = a - c
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2586
            movq mm5, mm1
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2587
            // pav = p - a = (a + b - c) - a = b - c
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2588
            movq mm4, mm2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2589
            psubw mm5, mm3
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2590
            psubw mm4, mm3
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2591
            // pcv = p - c = (a + b - c) -c = (a - c) + (b - c) =
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2592
            //       pav + pbv = pbv + pav
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2593
            movq mm6, mm5
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2594
            paddw mm6, mm4
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2595
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2596
            // pa = abs(p-a) = abs(pav)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2597
            // pb = abs(p-b) = abs(pbv)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2598
            // pc = abs(p-c) = abs(pcv)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2599
            pcmpgtw mm0, mm5       // Create mask pbv bytes < 0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2600
            pcmpgtw mm7, mm4       // Create mask pav bytes < 0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2601
            pand mm0, mm5          // Only pbv bytes < 0 in mm0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2602
            pand mm7, mm4          // Only pav bytes < 0 in mm7
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2603
            psubw mm5, mm0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2604
            psubw mm4, mm7
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2605
            psubw mm5, mm0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2606
            psubw mm4, mm7
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2607
            pxor mm0, mm0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2608
            pcmpgtw mm0, mm6       // Create mask pcv bytes < 0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2609
            pand mm0, mm6          // Only pav bytes < 0 in mm7
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2610
            psubw mm6, mm0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2611
            //  test pa <= pb
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2612
            movq mm7, mm4
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2613
            psubw mm6, mm0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2614
            pcmpgtw mm7, mm5       // pa > pb?
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2615
            movq mm0, mm7
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2616
            // use mm7 mask to merge pa & pb
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2617
            pand mm5, mm7
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2618
            // use mm0 mask copy to merge a & b
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2619
            pand mm2, mm0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2620
            pandn mm7, mm4
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2621
            pandn mm0, mm1
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2622
            paddw mm7, mm5
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2623
            paddw mm0, mm2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2624
            //  test  ((pa <= pb)? pa:pb) <= pc
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2625
            pcmpgtw mm7, mm6       // pab > pc?
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2626
            movq mm2, [esi + ebx]  // load b=Prior(x)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2627
            pand mm3, mm7
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2628
            pandn mm7, mm0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2629
            pxor mm1, mm1
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2630
            paddw mm7, mm3
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2631
            pxor mm0, mm0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2632
            packuswb mm7, mm1
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2633
            movq mm3, mm2           // load c=Prior(x-bpp) step 1
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2634
            pand mm7, ActiveMask
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2635
            punpckhbw mm2, mm0      // Unpack High bytes of b
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2636
            psllq mm7, ShiftBpp     // Shift bytes to 2nd group of 3 bytes
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2637
             // pav = p - a = (a + b - c) - a = b - c
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2638
            movq mm4, mm2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2639
            paddb mm7, [edi + ebx]  // add Paeth predictor with Raw(x)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2640
            psllq mm3, ShiftBpp     // load c=Prior(x-bpp) step 2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2641
            movq [edi + ebx], mm7   // write back updated value
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2642
            movq mm1, mm7
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2643
            punpckhbw mm3, mm0      // Unpack High bytes of c
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2644
            psllq mm1, ShiftBpp     // Shift bytes
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2645
                                    // Now mm1 will be used as Raw(x-bpp)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2646
            // Now do Paeth for 3rd, and final, set of bytes (6-7)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2647
            pxor mm7, mm7
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2648
            punpckhbw mm1, mm0      // Unpack High bytes of a
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2649
            psubw mm4, mm3
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2650
            // pbv = p - b = (a + b - c) - b = a - c
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2651
            movq mm5, mm1
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2652
            // pcv = p - c = (a + b - c) -c = (a - c) + (b - c) = pav + pbv
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2653
            movq mm6, mm4
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2654
            psubw mm5, mm3
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2655
            pxor mm0, mm0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2656
            paddw mm6, mm5
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2657
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2658
            // pa = abs(p-a) = abs(pav)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2659
            // pb = abs(p-b) = abs(pbv)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2660
            // pc = abs(p-c) = abs(pcv)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2661
            pcmpgtw mm0, mm4    // Create mask pav bytes < 0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2662
            pcmpgtw mm7, mm5    // Create mask pbv bytes < 0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2663
            pand mm0, mm4       // Only pav bytes < 0 in mm7
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2664
            pand mm7, mm5       // Only pbv bytes < 0 in mm0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2665
            psubw mm4, mm0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2666
            psubw mm5, mm7
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2667
            psubw mm4, mm0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2668
            psubw mm5, mm7
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2669
            pxor mm0, mm0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2670
            pcmpgtw mm0, mm6    // Create mask pcv bytes < 0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2671
            pand mm0, mm6       // Only pav bytes < 0 in mm7
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2672
            psubw mm6, mm0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2673
            //  test pa <= pb
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2674
            movq mm7, mm4
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2675
            psubw mm6, mm0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2676
            pcmpgtw mm7, mm5    // pa > pb?
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2677
            movq mm0, mm7
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2678
            // use mm0 mask copy to merge a & b
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2679
            pand mm2, mm0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2680
            // use mm7 mask to merge pa & pb
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2681
            pand mm5, mm7
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2682
            pandn mm0, mm1
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2683
            pandn mm7, mm4
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2684
            paddw mm0, mm2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2685
            paddw mm7, mm5
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2686
            //  test  ((pa <= pb)? pa:pb) <= pc
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2687
            pcmpgtw mm7, mm6    // pab > pc?
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2688
            pand mm3, mm7
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2689
            pandn mm7, mm0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2690
            paddw mm7, mm3
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2691
            pxor mm1, mm1
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2692
            packuswb mm1, mm7
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2693
            // Step ebx to next set of 8 bytes and repeat loop til done
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2694
            add ebx, 8
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2695
            pand mm1, ActiveMaskEnd
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2696
            paddb mm1, [edi + ebx - 8] // add Paeth predictor with Raw(x)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2697
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2698
            cmp ebx, MMXLength
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2699
            pxor mm0, mm0              // pxor does not affect flags
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2700
            movq [edi + ebx - 8], mm1  // write back updated value
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2701
                                 // mm1 will be used as Raw(x-bpp) next loop
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2702
                           // mm3 ready to be used as Prior(x-bpp) next loop
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2703
            jb dpth3lp
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2704
         } // end _asm block
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2705
      }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2706
      break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2707
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2708
      case 6:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2709
      case 7:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2710
      case 5:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2711
      {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2712
         ActiveMask.use  = 0x00000000ffffffff;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2713
         ActiveMask2.use = 0xffffffff00000000;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2714
         ShiftBpp.use = bpp << 3;    // == bpp * 8
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2715
         ShiftRem.use = 64 - ShiftBpp.use;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2716
         _asm
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2717
         {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2718
            mov ebx, diff
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2719
            mov edi, row
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2720
            mov esi, prev_row
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2721
            // PRIME the pump (load the first Raw(x-bpp) data set
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2722
            movq mm1, [edi+ebx-8]
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2723
            pxor mm0, mm0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2724
dpth6lp:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2725
            // Must shift to position Raw(x-bpp) data
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2726
            psrlq mm1, ShiftRem
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2727
            // Do first set of 4 bytes
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2728
            movq mm3, [esi+ebx-8]      // read c=Prior(x-bpp) bytes
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2729
            punpcklbw mm1, mm0      // Unpack Low bytes of a
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2730
            movq mm2, [esi + ebx]   // load b=Prior(x)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2731
            punpcklbw mm2, mm0      // Unpack Low bytes of b
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2732
            // Must shift to position Prior(x-bpp) data
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2733
            psrlq mm3, ShiftRem
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2734
            // pav = p - a = (a + b - c) - a = b - c
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2735
            movq mm4, mm2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2736
            punpcklbw mm3, mm0      // Unpack Low bytes of c
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2737
            // pbv = p - b = (a + b - c) - b = a - c
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2738
            movq mm5, mm1
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2739
            psubw mm4, mm3
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2740
            pxor mm7, mm7
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2741
            // pcv = p - c = (a + b - c) -c = (a - c) + (b - c) = pav + pbv
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2742
            movq mm6, mm4
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2743
            psubw mm5, mm3
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2744
            // pa = abs(p-a) = abs(pav)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2745
            // pb = abs(p-b) = abs(pbv)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2746
            // pc = abs(p-c) = abs(pcv)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2747
            pcmpgtw mm0, mm4    // Create mask pav bytes < 0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2748
            paddw mm6, mm5
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2749
            pand mm0, mm4       // Only pav bytes < 0 in mm7
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2750
            pcmpgtw mm7, mm5    // Create mask pbv bytes < 0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2751
            psubw mm4, mm0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2752
            pand mm7, mm5       // Only pbv bytes < 0 in mm0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2753
            psubw mm4, mm0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2754
            psubw mm5, mm7
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2755
            pxor mm0, mm0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2756
            pcmpgtw mm0, mm6    // Create mask pcv bytes < 0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2757
            pand mm0, mm6       // Only pav bytes < 0 in mm7
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2758
            psubw mm5, mm7
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2759
            psubw mm6, mm0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2760
            //  test pa <= pb
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2761
            movq mm7, mm4
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2762
            psubw mm6, mm0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2763
            pcmpgtw mm7, mm5    // pa > pb?
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2764
            movq mm0, mm7
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2765
            // use mm7 mask to merge pa & pb
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2766
            pand mm5, mm7
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2767
            // use mm0 mask copy to merge a & b
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2768
            pand mm2, mm0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2769
            pandn mm7, mm4
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2770
            pandn mm0, mm1
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2771
            paddw mm7, mm5
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2772
            paddw mm0, mm2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2773
            //  test  ((pa <= pb)? pa:pb) <= pc
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2774
            pcmpgtw mm7, mm6    // pab > pc?
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2775
            pxor mm1, mm1
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2776
            pand mm3, mm7
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2777
            pandn mm7, mm0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2778
            paddw mm7, mm3
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2779
            pxor mm0, mm0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2780
            packuswb mm7, mm1
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2781
            movq mm3, [esi + ebx - 8]  // load c=Prior(x-bpp)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2782
            pand mm7, ActiveMask
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2783
            psrlq mm3, ShiftRem
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2784
            movq mm2, [esi + ebx]      // load b=Prior(x) step 1
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2785
            paddb mm7, [edi + ebx]     // add Paeth predictor with Raw(x)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2786
            movq mm6, mm2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2787
            movq [edi + ebx], mm7      // write back updated value
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2788
            movq mm1, [edi+ebx-8]
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2789
            psllq mm6, ShiftBpp
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2790
            movq mm5, mm7
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2791
            psrlq mm1, ShiftRem
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2792
            por mm3, mm6
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2793
            psllq mm5, ShiftBpp
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2794
            punpckhbw mm3, mm0         // Unpack High bytes of c
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2795
            por mm1, mm5
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2796
            // Do second set of 4 bytes
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2797
            punpckhbw mm2, mm0         // Unpack High bytes of b
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2798
            punpckhbw mm1, mm0         // Unpack High bytes of a
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2799
            // pav = p - a = (a + b - c) - a = b - c
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2800
            movq mm4, mm2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2801
            // pbv = p - b = (a + b - c) - b = a - c
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2802
            movq mm5, mm1
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2803
            psubw mm4, mm3
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2804
            pxor mm7, mm7
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2805
            // pcv = p - c = (a + b - c) -c = (a - c) + (b - c) = pav + pbv
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2806
            movq mm6, mm4
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2807
            psubw mm5, mm3
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2808
            // pa = abs(p-a) = abs(pav)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2809
            // pb = abs(p-b) = abs(pbv)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2810
            // pc = abs(p-c) = abs(pcv)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2811
            pcmpgtw mm0, mm4       // Create mask pav bytes < 0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2812
            paddw mm6, mm5
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2813
            pand mm0, mm4          // Only pav bytes < 0 in mm7
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2814
            pcmpgtw mm7, mm5       // Create mask pbv bytes < 0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2815
            psubw mm4, mm0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2816
            pand mm7, mm5          // Only pbv bytes < 0 in mm0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2817
            psubw mm4, mm0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2818
            psubw mm5, mm7
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2819
            pxor mm0, mm0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2820
            pcmpgtw mm0, mm6       // Create mask pcv bytes < 0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2821
            pand mm0, mm6          // Only pav bytes < 0 in mm7
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2822
            psubw mm5, mm7
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2823
            psubw mm6, mm0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2824
            //  test pa <= pb
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2825
            movq mm7, mm4
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2826
            psubw mm6, mm0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2827
            pcmpgtw mm7, mm5       // pa > pb?
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2828
            movq mm0, mm7
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2829
            // use mm7 mask to merge pa & pb
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2830
            pand mm5, mm7
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2831
            // use mm0 mask copy to merge a & b
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2832
            pand mm2, mm0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2833
            pandn mm7, mm4
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2834
            pandn mm0, mm1
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2835
            paddw mm7, mm5
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2836
            paddw mm0, mm2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2837
            //  test  ((pa <= pb)? pa:pb) <= pc
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2838
            pcmpgtw mm7, mm6           // pab > pc?
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2839
            pxor mm1, mm1
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2840
            pand mm3, mm7
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2841
            pandn mm7, mm0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2842
            pxor mm1, mm1
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2843
            paddw mm7, mm3
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2844
            pxor mm0, mm0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2845
            // Step ex to next set of 8 bytes and repeat loop til done
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2846
            add ebx, 8
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2847
            packuswb mm1, mm7
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2848
            paddb mm1, [edi + ebx - 8]     // add Paeth predictor with Raw(x)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2849
            cmp ebx, MMXLength
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2850
            movq [edi + ebx - 8], mm1      // write back updated value
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2851
                                // mm1 will be used as Raw(x-bpp) next loop
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2852
            jb dpth6lp
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2853
         } // end _asm block
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2854
      }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2855
      break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2856
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2857
      case 4:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2858
      {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2859
         ActiveMask.use  = 0x00000000ffffffff;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2860
         _asm {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2861
            mov ebx, diff
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2862
            mov edi, row
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2863
            mov esi, prev_row
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2864
            pxor mm0, mm0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2865
            // PRIME the pump (load the first Raw(x-bpp) data set
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2866
            movq mm1, [edi+ebx-8]    // Only time should need to read
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2867
                                     //  a=Raw(x-bpp) bytes
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2868
dpth4lp:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2869
            // Do first set of 4 bytes
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2870
            movq mm3, [esi+ebx-8]    // read c=Prior(x-bpp) bytes
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2871
            punpckhbw mm1, mm0       // Unpack Low bytes of a
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2872
            movq mm2, [esi + ebx]    // load b=Prior(x)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2873
            punpcklbw mm2, mm0       // Unpack High bytes of b
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2874
            // pav = p - a = (a + b - c) - a = b - c
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2875
            movq mm4, mm2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2876
            punpckhbw mm3, mm0       // Unpack High bytes of c
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2877
            // pbv = p - b = (a + b - c) - b = a - c
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2878
            movq mm5, mm1
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2879
            psubw mm4, mm3
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2880
            pxor mm7, mm7
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2881
            // pcv = p - c = (a + b - c) -c = (a - c) + (b - c) = pav + pbv
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2882
            movq mm6, mm4
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2883
            psubw mm5, mm3
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2884
            // pa = abs(p-a) = abs(pav)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2885
            // pb = abs(p-b) = abs(pbv)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2886
            // pc = abs(p-c) = abs(pcv)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2887
            pcmpgtw mm0, mm4       // Create mask pav bytes < 0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2888
            paddw mm6, mm5
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2889
            pand mm0, mm4          // Only pav bytes < 0 in mm7
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2890
            pcmpgtw mm7, mm5       // Create mask pbv bytes < 0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2891
            psubw mm4, mm0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2892
            pand mm7, mm5          // Only pbv bytes < 0 in mm0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2893
            psubw mm4, mm0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2894
            psubw mm5, mm7
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2895
            pxor mm0, mm0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2896
            pcmpgtw mm0, mm6       // Create mask pcv bytes < 0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2897
            pand mm0, mm6          // Only pav bytes < 0 in mm7
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2898
            psubw mm5, mm7
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2899
            psubw mm6, mm0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2900
            //  test pa <= pb
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2901
            movq mm7, mm4
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2902
            psubw mm6, mm0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2903
            pcmpgtw mm7, mm5       // pa > pb?
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2904
            movq mm0, mm7
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2905
            // use mm7 mask to merge pa & pb
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2906
            pand mm5, mm7
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2907
            // use mm0 mask copy to merge a & b
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2908
            pand mm2, mm0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2909
            pandn mm7, mm4
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2910
            pandn mm0, mm1
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2911
            paddw mm7, mm5
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2912
            paddw mm0, mm2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2913
            //  test  ((pa <= pb)? pa:pb) <= pc
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2914
            pcmpgtw mm7, mm6       // pab > pc?
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2915
            pxor mm1, mm1
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2916
            pand mm3, mm7
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2917
            pandn mm7, mm0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2918
            paddw mm7, mm3
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2919
            pxor mm0, mm0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2920
            packuswb mm7, mm1
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2921
            movq mm3, [esi + ebx]      // load c=Prior(x-bpp)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2922
            pand mm7, ActiveMask
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2923
            movq mm2, mm3              // load b=Prior(x) step 1
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2924
            paddb mm7, [edi + ebx]     // add Paeth predictor with Raw(x)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2925
            punpcklbw mm3, mm0         // Unpack High bytes of c
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2926
            movq [edi + ebx], mm7      // write back updated value
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2927
            movq mm1, mm7              // Now mm1 will be used as Raw(x-bpp)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2928
            // Do second set of 4 bytes
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2929
            punpckhbw mm2, mm0         // Unpack Low bytes of b
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2930
            punpcklbw mm1, mm0         // Unpack Low bytes of a
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2931
            // pav = p - a = (a + b - c) - a = b - c
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2932
            movq mm4, mm2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2933
            // pbv = p - b = (a + b - c) - b = a - c
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2934
            movq mm5, mm1
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2935
            psubw mm4, mm3
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2936
            pxor mm7, mm7
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2937
            // pcv = p - c = (a + b - c) -c = (a - c) + (b - c) = pav + pbv
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2938
            movq mm6, mm4
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2939
            psubw mm5, mm3
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2940
            // pa = abs(p-a) = abs(pav)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2941
            // pb = abs(p-b) = abs(pbv)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2942
            // pc = abs(p-c) = abs(pcv)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2943
            pcmpgtw mm0, mm4       // Create mask pav bytes < 0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2944
            paddw mm6, mm5
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2945
            pand mm0, mm4          // Only pav bytes < 0 in mm7
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2946
            pcmpgtw mm7, mm5       // Create mask pbv bytes < 0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2947
            psubw mm4, mm0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2948
            pand mm7, mm5          // Only pbv bytes < 0 in mm0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2949
            psubw mm4, mm0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2950
            psubw mm5, mm7
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2951
            pxor mm0, mm0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2952
            pcmpgtw mm0, mm6       // Create mask pcv bytes < 0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2953
            pand mm0, mm6          // Only pav bytes < 0 in mm7
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2954
            psubw mm5, mm7
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2955
            psubw mm6, mm0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2956
            //  test pa <= pb
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2957
            movq mm7, mm4
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2958
            psubw mm6, mm0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2959
            pcmpgtw mm7, mm5       // pa > pb?
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2960
            movq mm0, mm7
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2961
            // use mm7 mask to merge pa & pb
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2962
            pand mm5, mm7
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2963
            // use mm0 mask copy to merge a & b
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2964
            pand mm2, mm0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2965
            pandn mm7, mm4
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2966
            pandn mm0, mm1
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2967
            paddw mm7, mm5
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2968
            paddw mm0, mm2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2969
            //  test  ((pa <= pb)? pa:pb) <= pc
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2970
            pcmpgtw mm7, mm6       // pab > pc?
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2971
            pxor mm1, mm1
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2972
            pand mm3, mm7
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2973
            pandn mm7, mm0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2974
            pxor mm1, mm1
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2975
            paddw mm7, mm3
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2976
            pxor mm0, mm0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2977
            // Step ex to next set of 8 bytes and repeat loop til done
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2978
            add ebx, 8
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2979
            packuswb mm1, mm7
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2980
            paddb mm1, [edi + ebx - 8]     // add Paeth predictor with Raw(x)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2981
            cmp ebx, MMXLength
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2982
            movq [edi + ebx - 8], mm1      // write back updated value
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2983
                                // mm1 will be used as Raw(x-bpp) next loop
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2984
            jb dpth4lp
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2985
         } // end _asm block
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2986
      }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2987
      break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2988
      case 8:                          // bpp == 8
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2989
      {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2990
         ActiveMask.use  = 0x00000000ffffffff;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2991
         _asm {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2992
            mov ebx, diff
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2993
            mov edi, row
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2994
            mov esi, prev_row
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2995
            pxor mm0, mm0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2996
            // PRIME the pump (load the first Raw(x-bpp) data set
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2997
            movq mm1, [edi+ebx-8]      // Only time should need to read
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2998
                                       //  a=Raw(x-bpp) bytes
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2999
dpth8lp:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3000
            // Do first set of 4 bytes
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3001
            movq mm3, [esi+ebx-8]      // read c=Prior(x-bpp) bytes
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3002
            punpcklbw mm1, mm0         // Unpack Low bytes of a
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3003
            movq mm2, [esi + ebx]      // load b=Prior(x)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3004
            punpcklbw mm2, mm0         // Unpack Low bytes of b
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3005
            // pav = p - a = (a + b - c) - a = b - c
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3006
            movq mm4, mm2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3007
            punpcklbw mm3, mm0         // Unpack Low bytes of c
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3008
            // pbv = p - b = (a + b - c) - b = a - c
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3009
            movq mm5, mm1
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3010
            psubw mm4, mm3
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3011
            pxor mm7, mm7
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3012
            // pcv = p - c = (a + b - c) -c = (a - c) + (b - c) = pav + pbv
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3013
            movq mm6, mm4
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3014
            psubw mm5, mm3
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3015
            // pa = abs(p-a) = abs(pav)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3016
            // pb = abs(p-b) = abs(pbv)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3017
            // pc = abs(p-c) = abs(pcv)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3018
            pcmpgtw mm0, mm4       // Create mask pav bytes < 0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3019
            paddw mm6, mm5
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3020
            pand mm0, mm4          // Only pav bytes < 0 in mm7
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3021
            pcmpgtw mm7, mm5       // Create mask pbv bytes < 0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3022
            psubw mm4, mm0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3023
            pand mm7, mm5          // Only pbv bytes < 0 in mm0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3024
            psubw mm4, mm0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3025
            psubw mm5, mm7
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3026
            pxor mm0, mm0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3027
            pcmpgtw mm0, mm6       // Create mask pcv bytes < 0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3028
            pand mm0, mm6          // Only pav bytes < 0 in mm7
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3029
            psubw mm5, mm7
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3030
            psubw mm6, mm0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3031
            //  test pa <= pb
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3032
            movq mm7, mm4
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3033
            psubw mm6, mm0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3034
            pcmpgtw mm7, mm5       // pa > pb?
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3035
            movq mm0, mm7
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3036
            // use mm7 mask to merge pa & pb
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3037
            pand mm5, mm7
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3038
            // use mm0 mask copy to merge a & b
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3039
            pand mm2, mm0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3040
            pandn mm7, mm4
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3041
            pandn mm0, mm1
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3042
            paddw mm7, mm5
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3043
            paddw mm0, mm2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3044
            //  test  ((pa <= pb)? pa:pb) <= pc
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3045
            pcmpgtw mm7, mm6       // pab > pc?
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3046
            pxor mm1, mm1
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3047
            pand mm3, mm7
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3048
            pandn mm7, mm0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3049
            paddw mm7, mm3
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3050
            pxor mm0, mm0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3051
            packuswb mm7, mm1
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3052
            movq mm3, [esi+ebx-8]    // read c=Prior(x-bpp) bytes
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3053
            pand mm7, ActiveMask
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3054
            movq mm2, [esi + ebx]    // load b=Prior(x)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3055
            paddb mm7, [edi + ebx]   // add Paeth predictor with Raw(x)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3056
            punpckhbw mm3, mm0       // Unpack High bytes of c
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3057
            movq [edi + ebx], mm7    // write back updated value
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3058
            movq mm1, [edi+ebx-8]    // read a=Raw(x-bpp) bytes
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3059
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3060
            // Do second set of 4 bytes
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3061
            punpckhbw mm2, mm0       // Unpack High bytes of b
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3062
            punpckhbw mm1, mm0       // Unpack High bytes of a
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3063
            // pav = p - a = (a + b - c) - a = b - c
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3064
            movq mm4, mm2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3065
            // pbv = p - b = (a + b - c) - b = a - c
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3066
            movq mm5, mm1
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3067
            psubw mm4, mm3
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3068
            pxor mm7, mm7
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3069
            // pcv = p - c = (a + b - c) -c = (a - c) + (b - c) = pav + pbv
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3070
            movq mm6, mm4
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3071
            psubw mm5, mm3
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3072
            // pa = abs(p-a) = abs(pav)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3073
            // pb = abs(p-b) = abs(pbv)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3074
            // pc = abs(p-c) = abs(pcv)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3075
            pcmpgtw mm0, mm4       // Create mask pav bytes < 0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3076
            paddw mm6, mm5
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3077
            pand mm0, mm4          // Only pav bytes < 0 in mm7
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3078
            pcmpgtw mm7, mm5       // Create mask pbv bytes < 0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3079
            psubw mm4, mm0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3080
            pand mm7, mm5          // Only pbv bytes < 0 in mm0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3081
            psubw mm4, mm0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3082
            psubw mm5, mm7
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3083
            pxor mm0, mm0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3084
            pcmpgtw mm0, mm6       // Create mask pcv bytes < 0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3085
            pand mm0, mm6          // Only pav bytes < 0 in mm7
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3086
            psubw mm5, mm7
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3087
            psubw mm6, mm0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3088
            //  test pa <= pb
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3089
            movq mm7, mm4
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3090
            psubw mm6, mm0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3091
            pcmpgtw mm7, mm5       // pa > pb?
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3092
            movq mm0, mm7
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3093
            // use mm7 mask to merge pa & pb
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3094
            pand mm5, mm7
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3095
            // use mm0 mask copy to merge a & b
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3096
            pand mm2, mm0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3097
            pandn mm7, mm4
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3098
            pandn mm0, mm1
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3099
            paddw mm7, mm5
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3100
            paddw mm0, mm2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3101
            //  test  ((pa <= pb)? pa:pb) <= pc
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3102
            pcmpgtw mm7, mm6       // pab > pc?
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3103
            pxor mm1, mm1
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3104
            pand mm3, mm7
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3105
            pandn mm7, mm0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3106
            pxor mm1, mm1
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3107
            paddw mm7, mm3
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3108
            pxor mm0, mm0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3109
            // Step ex to next set of 8 bytes and repeat loop til done
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3110
            add ebx, 8
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3111
            packuswb mm1, mm7
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3112
            paddb mm1, [edi + ebx - 8]     // add Paeth predictor with Raw(x)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3113
            cmp ebx, MMXLength
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3114
            movq [edi + ebx - 8], mm1      // write back updated value
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3115
                            // mm1 will be used as Raw(x-bpp) next loop
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3116
            jb dpth8lp
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3117
         } // end _asm block
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3118
      }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3119
      break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3120
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3121
      case 1:                // bpp = 1
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3122
      case 2:                // bpp = 2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3123
      default:               // bpp > 8
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3124
      {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3125
         _asm {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3126
            mov ebx, diff
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3127
            cmp ebx, FullLength
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3128
            jnb dpthdend
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3129
            mov edi, row
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3130
            mov esi, prev_row
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3131
            // Do Paeth decode for remaining bytes
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3132
            mov edx, ebx
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3133
            xor ecx, ecx        // zero ecx before using cl & cx in loop below
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3134
            sub edx, bpp        // Set edx = ebx - bpp
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3135
dpthdlp:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3136
            xor eax, eax
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3137
            // pav = p - a = (a + b - c) - a = b - c
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3138
            mov al, [esi + ebx]        // load Prior(x) into al
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3139
            mov cl, [esi + edx]        // load Prior(x-bpp) into cl
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3140
            sub eax, ecx                 // subtract Prior(x-bpp)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3141
            mov patemp, eax                 // Save pav for later use
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3142
            xor eax, eax
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3143
            // pbv = p - b = (a + b - c) - b = a - c
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3144
            mov al, [edi + edx]        // load Raw(x-bpp) into al
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3145
            sub eax, ecx                 // subtract Prior(x-bpp)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3146
            mov ecx, eax
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3147
            // pcv = p - c = (a + b - c) -c = (a - c) + (b - c) = pav + pbv
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3148
            add eax, patemp                 // pcv = pav + pbv
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3149
            // pc = abs(pcv)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3150
            test eax, 0x80000000
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3151
            jz dpthdpca
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3152
            neg eax                     // reverse sign of neg values
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3153
dpthdpca:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3154
            mov pctemp, eax             // save pc for later use
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3155
            // pb = abs(pbv)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3156
            test ecx, 0x80000000
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3157
            jz dpthdpba
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3158
            neg ecx                     // reverse sign of neg values
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3159
dpthdpba:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3160
            mov pbtemp, ecx             // save pb for later use
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3161
            // pa = abs(pav)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3162
            mov eax, patemp
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3163
            test eax, 0x80000000
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3164
            jz dpthdpaa
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3165
            neg eax                     // reverse sign of neg values
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3166
dpthdpaa:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3167
            mov patemp, eax             // save pa for later use
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3168
            // test if pa <= pb
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3169
            cmp eax, ecx
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3170
            jna dpthdabb
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3171
            // pa > pb; now test if pb <= pc
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3172
            cmp ecx, pctemp
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3173
            jna dpthdbbc
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3174
            // pb > pc; Raw(x) = Paeth(x) + Prior(x-bpp)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3175
            mov cl, [esi + edx]  // load Prior(x-bpp) into cl
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3176
            jmp dpthdpaeth
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3177
dpthdbbc:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3178
            // pb <= pc; Raw(x) = Paeth(x) + Prior(x)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3179
            mov cl, [esi + ebx]        // load Prior(x) into cl
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3180
            jmp dpthdpaeth
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3181
dpthdabb:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3182
            // pa <= pb; now test if pa <= pc
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3183
            cmp eax, pctemp
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3184
            jna dpthdabc
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3185
            // pa > pc; Raw(x) = Paeth(x) + Prior(x-bpp)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3186
            mov cl, [esi + edx]  // load Prior(x-bpp) into cl
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3187
            jmp dpthdpaeth
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3188
dpthdabc:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3189
            // pa <= pc; Raw(x) = Paeth(x) + Raw(x-bpp)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3190
            mov cl, [edi + edx]  // load Raw(x-bpp) into cl
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3191
dpthdpaeth:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3192
            inc ebx
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3193
            inc edx
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3194
            // Raw(x) = (Paeth(x) + Paeth_Predictor( a, b, c )) mod 256
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3195
            add [edi + ebx - 1], cl
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3196
            cmp ebx, FullLength
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3197
            jb dpthdlp
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3198
dpthdend:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3199
         } // end _asm block
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3200
      }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3201
      return;                   // No need to go further with this one
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3202
   }                         // end switch ( bpp )
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3203
   _asm
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3204
   {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3205
         // MMX acceleration complete now do clean-up
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3206
         // Check if any remaining bytes left to decode
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3207
         mov ebx, MMXLength
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3208
         cmp ebx, FullLength
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3209
         jnb dpthend
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3210
         mov edi, row
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3211
         mov esi, prev_row
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3212
         // Do Paeth decode for remaining bytes
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3213
         mov edx, ebx
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3214
         xor ecx, ecx         // zero ecx before using cl & cx in loop below
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3215
         sub edx, bpp         // Set edx = ebx - bpp
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3216
dpthlp2:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3217
         xor eax, eax
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3218
         // pav = p - a = (a + b - c) - a = b - c
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3219
         mov al, [esi + ebx]  // load Prior(x) into al
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3220
         mov cl, [esi + edx]  // load Prior(x-bpp) into cl
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3221
         sub eax, ecx         // subtract Prior(x-bpp)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3222
         mov patemp, eax      // Save pav for later use
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3223
         xor eax, eax
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3224
         // pbv = p - b = (a + b - c) - b = a - c
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3225
         mov al, [edi + edx]  // load Raw(x-bpp) into al
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3226
         sub eax, ecx         // subtract Prior(x-bpp)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3227
         mov ecx, eax
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3228
         // pcv = p - c = (a + b - c) -c = (a - c) + (b - c) = pav + pbv
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3229
         add eax, patemp      // pcv = pav + pbv
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3230
         // pc = abs(pcv)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3231
         test eax, 0x80000000
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3232
         jz dpthpca2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3233
         neg eax              // reverse sign of neg values
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3234
dpthpca2:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3235
         mov pctemp, eax      // save pc for later use
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3236
         // pb = abs(pbv)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3237
         test ecx, 0x80000000
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3238
         jz dpthpba2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3239
         neg ecx              // reverse sign of neg values
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3240
dpthpba2:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3241
         mov pbtemp, ecx      // save pb for later use
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3242
         // pa = abs(pav)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3243
         mov eax, patemp
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3244
         test eax, 0x80000000
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3245
         jz dpthpaa2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3246
         neg eax              // reverse sign of neg values
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3247
dpthpaa2:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3248
         mov patemp, eax      // save pa for later use
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3249
         // test if pa <= pb
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3250
         cmp eax, ecx
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3251
         jna dpthabb2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3252
         // pa > pb; now test if pb <= pc
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3253
         cmp ecx, pctemp
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3254
         jna dpthbbc2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3255
         // pb > pc; Raw(x) = Paeth(x) + Prior(x-bpp)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3256
         mov cl, [esi + edx]  // load Prior(x-bpp) into cl
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3257
         jmp dpthpaeth2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3258
dpthbbc2:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3259
         // pb <= pc; Raw(x) = Paeth(x) + Prior(x)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3260
         mov cl, [esi + ebx]        // load Prior(x) into cl
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3261
         jmp dpthpaeth2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3262
dpthabb2:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3263
         // pa <= pb; now test if pa <= pc
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3264
         cmp eax, pctemp
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3265
         jna dpthabc2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3266
         // pa > pc; Raw(x) = Paeth(x) + Prior(x-bpp)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3267
         mov cl, [esi + edx]  // load Prior(x-bpp) into cl
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3268
         jmp dpthpaeth2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3269
dpthabc2:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3270
         // pa <= pc; Raw(x) = Paeth(x) + Raw(x-bpp)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3271
         mov cl, [edi + edx]  // load Raw(x-bpp) into cl
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3272
dpthpaeth2:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3273
         inc ebx
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3274
         inc edx
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3275
         // Raw(x) = (Paeth(x) + Paeth_Predictor( a, b, c )) mod 256
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3276
         add [edi + ebx - 1], cl
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3277
         cmp ebx, FullLength
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3278
         jb dpthlp2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3279
dpthend:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3280
         emms             // End MMX instructions; prep for possible FP instrs.
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3281
   } // end _asm block
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3282
}
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3283
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3284
// Optimized code for PNG Sub filter decoder
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3285
void /* PRIVATE */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3286
png_read_filter_row_mmx_sub(png_row_infop row_info, png_bytep row)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3287
{
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3288
   //int test;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3289
   int bpp;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3290
   png_uint_32 FullLength;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3291
   png_uint_32 MMXLength;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3292
   int diff;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3293
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3294
   bpp = (row_info->pixel_depth + 7) >> 3; // Get # bytes per pixel
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3295
   FullLength  = row_info->rowbytes - bpp; // # of bytes to filter
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3296
   _asm {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3297
        mov edi, row
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3298
        mov esi, edi               // lp = row
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3299
        add edi, bpp               // rp = row + bpp
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3300
        xor eax, eax
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3301
        // get # of bytes to alignment
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3302
        mov diff, edi               // take start of row
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3303
        add diff, 0xf               // add 7 + 8 to incr past
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3304
                                        // alignment boundary
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3305
        xor ebx, ebx
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3306
        and diff, 0xfffffff8        // mask to alignment boundary
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3307
        sub diff, edi               // subtract from start ==> value
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3308
                                        //  ebx at alignment
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3309
        jz dsubgo
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3310
        // fix alignment
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3311
dsublp1:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3312
        mov al, [esi+ebx]
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3313
        add [edi+ebx], al
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3314
        inc ebx
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3315
        cmp ebx, diff
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3316
        jb dsublp1
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3317
dsubgo:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3318
        mov ecx, FullLength
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3319
        mov edx, ecx
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3320
        sub edx, ebx                  // subtract alignment fix
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3321
        and edx, 0x00000007           // calc bytes over mult of 8
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3322
        sub ecx, edx                  // drop over bytes from length
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3323
        mov MMXLength, ecx
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3324
   } // end _asm block
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3325
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3326
   // Now do the math for the rest of the row
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3327
   switch ( bpp )
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3328
   {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3329
        case 3:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3330
        {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3331
         ActiveMask.use  = 0x0000ffffff000000;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3332
         ShiftBpp.use = 24;       // == 3 * 8
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3333
         ShiftRem.use  = 40;      // == 64 - 24
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3334
         _asm {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3335
            mov edi, row
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3336
            movq mm7, ActiveMask  // Load ActiveMask for 2nd active byte group
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3337
            mov esi, edi              // lp = row
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3338
            add edi, bpp          // rp = row + bpp
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3339
            movq mm6, mm7
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3340
            mov ebx, diff
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3341
            psllq mm6, ShiftBpp   // Move mask in mm6 to cover 3rd active
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3342
                                  // byte group
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3343
            // PRIME the pump (load the first Raw(x-bpp) data set
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3344
            movq mm1, [edi+ebx-8]
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3345
dsub3lp:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3346
            psrlq mm1, ShiftRem   // Shift data for adding 1st bpp bytes
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3347
                          // no need for mask; shift clears inactive bytes
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3348
            // Add 1st active group
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3349
            movq mm0, [edi+ebx]
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3350
            paddb mm0, mm1
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3351
            // Add 2nd active group
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3352
            movq mm1, mm0         // mov updated Raws to mm1
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3353
            psllq mm1, ShiftBpp   // shift data to position correctly
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3354
            pand mm1, mm7         // mask to use only 2nd active group
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3355
            paddb mm0, mm1
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3356
            // Add 3rd active group
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3357
            movq mm1, mm0         // mov updated Raws to mm1
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3358
            psllq mm1, ShiftBpp   // shift data to position correctly
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3359
            pand mm1, mm6         // mask to use only 3rd active group
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3360
            add ebx, 8
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3361
            paddb mm0, mm1
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3362
            cmp ebx, MMXLength
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3363
            movq [edi+ebx-8], mm0     // Write updated Raws back to array
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3364
            // Prep for doing 1st add at top of loop
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3365
            movq mm1, mm0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3366
            jb dsub3lp
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3367
         } // end _asm block
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3368
      }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3369
      break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3370
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3371
      case 1:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3372
      {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3373
         // Placed here just in case this is a duplicate of the
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3374
         // non-MMX code for the SUB filter in png_read_filter_row below
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3375
         //
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3376
         //         png_bytep rp;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3377
         //         png_bytep lp;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3378
         //         png_uint_32 i;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3379
         //         bpp = (row_info->pixel_depth + 7) >> 3;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3380
         //         for (i = (png_uint_32)bpp, rp = row + bpp, lp = row;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3381
         //            i < row_info->rowbytes; i++, rp++, lp++)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3382
         //      {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3383
         //            *rp = (png_byte)(((int)(*rp) + (int)(*lp)) & 0xff);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3384
         //      }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3385
         _asm {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3386
            mov ebx, diff
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3387
            mov edi, row
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3388
            cmp ebx, FullLength
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3389
            jnb dsub1end
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3390
            mov esi, edi          // lp = row
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3391
            xor eax, eax
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3392
            add edi, bpp      // rp = row + bpp
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3393
dsub1lp:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3394
            mov al, [esi+ebx]
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3395
            add [edi+ebx], al
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3396
            inc ebx
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3397
            cmp ebx, FullLength
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3398
            jb dsub1lp
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3399
dsub1end:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3400
         } // end _asm block
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3401
      }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3402
      return;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3403
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3404
      case 6:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3405
      case 7:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3406
      case 4:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3407
      case 5:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3408
      {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3409
         ShiftBpp.use = bpp << 3;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3410
         ShiftRem.use = 64 - ShiftBpp.use;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3411
         _asm {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3412
            mov edi, row
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3413
            mov ebx, diff
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3414
            mov esi, edi               // lp = row
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3415
            add edi, bpp           // rp = row + bpp
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3416
            // PRIME the pump (load the first Raw(x-bpp) data set
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3417
            movq mm1, [edi+ebx-8]
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3418
dsub4lp:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3419
            psrlq mm1, ShiftRem // Shift data for adding 1st bpp bytes
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3420
                          // no need for mask; shift clears inactive bytes
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3421
            movq mm0, [edi+ebx]
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3422
            paddb mm0, mm1
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3423
            // Add 2nd active group
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3424
            movq mm1, mm0          // mov updated Raws to mm1
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3425
            psllq mm1, ShiftBpp    // shift data to position correctly
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3426
                                   // there is no need for any mask
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3427
                                   // since shift clears inactive bits/bytes
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3428
            add ebx, 8
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3429
            paddb mm0, mm1
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3430
            cmp ebx, MMXLength
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3431
            movq [edi+ebx-8], mm0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3432
            movq mm1, mm0          // Prep for doing 1st add at top of loop
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3433
            jb dsub4lp
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3434
         } // end _asm block
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3435
      }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3436
      break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3437
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3438
      case 2:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3439
      {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3440
         ActiveMask.use  = 0x00000000ffff0000;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3441
         ShiftBpp.use = 16;       // == 2 * 8
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3442
         ShiftRem.use = 48;       // == 64 - 16
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3443
         _asm {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3444
            movq mm7, ActiveMask  // Load ActiveMask for 2nd active byte group
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3445
            mov ebx, diff
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3446
            movq mm6, mm7
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3447
            mov edi, row
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3448
            psllq mm6, ShiftBpp     // Move mask in mm6 to cover 3rd active
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3449
                                    //  byte group
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3450
            mov esi, edi            // lp = row
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3451
            movq mm5, mm6
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3452
            add edi, bpp            // rp = row + bpp
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3453
            psllq mm5, ShiftBpp     // Move mask in mm5 to cover 4th active
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3454
                                    //  byte group
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3455
            // PRIME the pump (load the first Raw(x-bpp) data set
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3456
            movq mm1, [edi+ebx-8]
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3457
dsub2lp:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3458
            // Add 1st active group
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3459
            psrlq mm1, ShiftRem     // Shift data for adding 1st bpp bytes
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3460
                                    // no need for mask; shift clears inactive
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3461
                                    //  bytes
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3462
            movq mm0, [edi+ebx]
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3463
            paddb mm0, mm1
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3464
            // Add 2nd active group
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3465
            movq mm1, mm0           // mov updated Raws to mm1
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3466
            psllq mm1, ShiftBpp     // shift data to position correctly
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3467
            pand mm1, mm7           // mask to use only 2nd active group
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3468
            paddb mm0, mm1
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3469
            // Add 3rd active group
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3470
            movq mm1, mm0           // mov updated Raws to mm1
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3471
            psllq mm1, ShiftBpp     // shift data to position correctly
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3472
            pand mm1, mm6           // mask to use only 3rd active group
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3473
            paddb mm0, mm1
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3474
            // Add 4th active group
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3475
            movq mm1, mm0           // mov updated Raws to mm1
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3476
            psllq mm1, ShiftBpp     // shift data to position correctly
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3477
            pand mm1, mm5           // mask to use only 4th active group
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3478
            add ebx, 8
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3479
            paddb mm0, mm1
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3480
            cmp ebx, MMXLength
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3481
            movq [edi+ebx-8], mm0   // Write updated Raws back to array
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3482
            movq mm1, mm0           // Prep for doing 1st add at top of loop
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3483
            jb dsub2lp
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3484
         } // end _asm block
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3485
      }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3486
      break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3487
      case 8:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3488
      {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3489
         _asm {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3490
            mov edi, row
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3491
            mov ebx, diff
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3492
            mov esi, edi            // lp = row
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3493
            add edi, bpp            // rp = row + bpp
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3494
            mov ecx, MMXLength
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3495
            movq mm7, [edi+ebx-8]   // PRIME the pump (load the first
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3496
                                    // Raw(x-bpp) data set
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3497
            and ecx, 0x0000003f     // calc bytes over mult of 64
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3498
dsub8lp:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3499
            movq mm0, [edi+ebx]     // Load Sub(x) for 1st 8 bytes
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3500
            paddb mm0, mm7
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3501
            movq mm1, [edi+ebx+8]   // Load Sub(x) for 2nd 8 bytes
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3502
            movq [edi+ebx], mm0    // Write Raw(x) for 1st 8 bytes
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3503
                                   // Now mm0 will be used as Raw(x-bpp) for
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3504
                                   // the 2nd group of 8 bytes.  This will be
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3505
                                   // repeated for each group of 8 bytes with
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3506
                                   // the 8th group being used as the Raw(x-bpp)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3507
                                   // for the 1st group of the next loop.
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3508
            paddb mm1, mm0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3509
            movq mm2, [edi+ebx+16]  // Load Sub(x) for 3rd 8 bytes
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3510
            movq [edi+ebx+8], mm1   // Write Raw(x) for 2nd 8 bytes
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3511
            paddb mm2, mm1
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3512
            movq mm3, [edi+ebx+24]  // Load Sub(x) for 4th 8 bytes
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3513
            movq [edi+ebx+16], mm2  // Write Raw(x) for 3rd 8 bytes
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3514
            paddb mm3, mm2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3515
            movq mm4, [edi+ebx+32]  // Load Sub(x) for 5th 8 bytes
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3516
            movq [edi+ebx+24], mm3  // Write Raw(x) for 4th 8 bytes
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3517
            paddb mm4, mm3
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3518
            movq mm5, [edi+ebx+40]  // Load Sub(x) for 6th 8 bytes
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3519
            movq [edi+ebx+32], mm4  // Write Raw(x) for 5th 8 bytes
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3520
            paddb mm5, mm4
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3521
            movq mm6, [edi+ebx+48]  // Load Sub(x) for 7th 8 bytes
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3522
            movq [edi+ebx+40], mm5  // Write Raw(x) for 6th 8 bytes
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3523
            paddb mm6, mm5
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3524
            movq mm7, [edi+ebx+56]  // Load Sub(x) for 8th 8 bytes
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3525
            movq [edi+ebx+48], mm6  // Write Raw(x) for 7th 8 bytes
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3526
            add ebx, 64
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3527
            paddb mm7, mm6
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3528
            cmp ebx, ecx
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3529
            movq [edi+ebx-8], mm7   // Write Raw(x) for 8th 8 bytes
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3530
            jb dsub8lp
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3531
            cmp ebx, MMXLength
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3532
            jnb dsub8lt8
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3533
dsub8lpA:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3534
            movq mm0, [edi+ebx]
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3535
            add ebx, 8
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3536
            paddb mm0, mm7
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3537
            cmp ebx, MMXLength
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3538
            movq [edi+ebx-8], mm0   // use -8 to offset early add to ebx
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3539
            movq mm7, mm0           // Move calculated Raw(x) data to mm1 to
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3540
                                    // be the new Raw(x-bpp) for the next loop
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3541
            jb dsub8lpA
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3542
dsub8lt8:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3543
         } // end _asm block
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3544
      }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3545
      break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3546
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3547
      default:                // bpp greater than 8 bytes
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3548
      {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3549
         _asm {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3550
            mov ebx, diff
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3551
            mov edi, row
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3552
            mov esi, edi           // lp = row
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3553
            add edi, bpp           // rp = row + bpp
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3554
dsubAlp:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3555
            movq mm0, [edi+ebx]
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3556
            movq mm1, [esi+ebx]
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3557
            add ebx, 8
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3558
            paddb mm0, mm1
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3559
            cmp ebx, MMXLength
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3560
            movq [edi+ebx-8], mm0  // mov does not affect flags; -8 to offset
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3561
                                   //  add ebx
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3562
            jb dsubAlp
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3563
         } // end _asm block
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3564
      }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3565
      break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3566
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3567
   } // end switch ( bpp )
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3568
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3569
   _asm {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3570
        mov ebx, MMXLength
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3571
        mov edi, row
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3572
        cmp ebx, FullLength
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3573
        jnb dsubend
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3574
        mov esi, edi               // lp = row
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3575
        xor eax, eax
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3576
        add edi, bpp               // rp = row + bpp
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3577
dsublp2:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3578
        mov al, [esi+ebx]
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3579
        add [edi+ebx], al
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3580
        inc ebx
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3581
        cmp ebx, FullLength
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3582
        jb dsublp2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3583
dsubend:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3584
        emms             // End MMX instructions; prep for possible FP instrs.
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3585
   } // end _asm block
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3586
}
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3587
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3588
// Optimized code for PNG Up filter decoder
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3589
void /* PRIVATE */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3590
png_read_filter_row_mmx_up(png_row_infop row_info, png_bytep row,
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3591
   png_bytep prev_row)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3592
{
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3593
   png_uint_32 len;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3594
   len  = row_info->rowbytes;       // # of bytes to filter
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3595
   _asm {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3596
      mov edi, row
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3597
      // get # of bytes to alignment
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3598
      mov ecx, edi
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3599
      xor ebx, ebx
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3600
      add ecx, 0x7
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3601
      xor eax, eax
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3602
      and ecx, 0xfffffff8
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3603
      mov esi, prev_row
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3604
      sub ecx, edi
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3605
      jz dupgo
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3606
      // fix alignment
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3607
duplp1:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3608
      mov al, [edi+ebx]
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3609
      add al, [esi+ebx]
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3610
      inc ebx
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3611
      cmp ebx, ecx
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3612
      mov [edi + ebx-1], al  // mov does not affect flags; -1 to offset inc ebx
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3613
      jb duplp1
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3614
dupgo:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3615
      mov ecx, len
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3616
      mov edx, ecx
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3617
      sub edx, ebx                  // subtract alignment fix
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3618
      and edx, 0x0000003f           // calc bytes over mult of 64
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3619
      sub ecx, edx                  // drop over bytes from length
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3620
      // Unrolled loop - use all MMX registers and interleave to reduce
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3621
      // number of branch instructions (loops) and reduce partial stalls
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3622
duploop:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3623
      movq mm1, [esi+ebx]
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3624
      movq mm0, [edi+ebx]
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3625
      movq mm3, [esi+ebx+8]
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3626
      paddb mm0, mm1
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3627
      movq mm2, [edi+ebx+8]
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3628
      movq [edi+ebx], mm0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3629
      paddb mm2, mm3
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3630
      movq mm5, [esi+ebx+16]
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3631
      movq [edi+ebx+8], mm2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3632
      movq mm4, [edi+ebx+16]
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3633
      movq mm7, [esi+ebx+24]
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3634
      paddb mm4, mm5
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3635
      movq mm6, [edi+ebx+24]
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3636
      movq [edi+ebx+16], mm4
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3637
      paddb mm6, mm7
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3638
      movq mm1, [esi+ebx+32]
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3639
      movq [edi+ebx+24], mm6
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3640
      movq mm0, [edi+ebx+32]
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3641
      movq mm3, [esi+ebx+40]
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3642
      paddb mm0, mm1
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3643
      movq mm2, [edi+ebx+40]
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3644
      movq [edi+ebx+32], mm0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3645
      paddb mm2, mm3
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3646
      movq mm5, [esi+ebx+48]
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3647
      movq [edi+ebx+40], mm2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3648
      movq mm4, [edi+ebx+48]
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3649
      movq mm7, [esi+ebx+56]
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3650
      paddb mm4, mm5
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3651
      movq mm6, [edi+ebx+56]
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3652
      movq [edi+ebx+48], mm4
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3653
      add ebx, 64
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3654
      paddb mm6, mm7
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3655
      cmp ebx, ecx
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3656
      movq [edi+ebx-8], mm6 // (+56)movq does not affect flags;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3657
                                     // -8 to offset add ebx
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3658
      jb duploop
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3659
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3660
      cmp edx, 0                     // Test for bytes over mult of 64
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3661
      jz dupend
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3662
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3663
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3664
      // 2 lines added by lcreeve at netins.net
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3665
      // (mail 11 Jul 98 in png-implement list)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3666
      cmp edx, 8 //test for less than 8 bytes
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3667
      jb duplt8
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3668
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3669
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3670
      add ecx, edx
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3671
      and edx, 0x00000007           // calc bytes over mult of 8
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3672
      sub ecx, edx                  // drop over bytes from length
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3673
      jz duplt8
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3674
      // Loop using MMX registers mm0 & mm1 to update 8 bytes simultaneously
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3675
duplpA:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3676
      movq mm1, [esi+ebx]
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3677
      movq mm0, [edi+ebx]
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3678
      add ebx, 8
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3679
      paddb mm0, mm1
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3680
      cmp ebx, ecx
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3681
      movq [edi+ebx-8], mm0 // movq does not affect flags; -8 to offset add ebx
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3682
      jb duplpA
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3683
      cmp edx, 0            // Test for bytes over mult of 8
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3684
      jz dupend
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3685
duplt8:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3686
      xor eax, eax
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3687
      add ecx, edx          // move over byte count into counter
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3688
      // Loop using x86 registers to update remaining bytes
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3689
duplp2:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3690
      mov al, [edi + ebx]
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3691
      add al, [esi + ebx]
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3692
      inc ebx
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3693
      cmp ebx, ecx
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3694
      mov [edi + ebx-1], al // mov does not affect flags; -1 to offset inc ebx
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3695
      jb duplp2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3696
dupend:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3697
      // Conversion of filtered row completed
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3698
      emms          // End MMX instructions; prep for possible FP instrs.
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3699
   } // end _asm block
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3700
}
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3701
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3702
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3703
// Optimized png_read_filter_row routines
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3704
void /* PRIVATE */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3705
png_read_filter_row(png_structp png_ptr, png_row_infop row_info, png_bytep
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3706
   row, png_bytep prev_row, int filter)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3707
{
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3708
#ifdef PNG_DEBUG
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3709
   char filnm[10];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3710
#endif
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3711
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3712
   if (mmx_supported == 2) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3713
#if !defined(PNG_1_0_X)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3714
       /* this should have happened in png_init_mmx_flags() already */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3715
       png_warning(png_ptr, "asm_flags may not have been initialized");
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3716
#endif
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3717
       png_mmx_support();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3718
   }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3719
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3720
#ifdef PNG_DEBUG
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3721
   png_debug(1, "in png_read_filter_row\n");
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3722
   switch (filter)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3723
   {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3724
      case 0: sprintf(filnm, "none");
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3725
         break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3726
#if !defined(PNG_1_0_X)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3727
      case 1: sprintf(filnm, "sub-%s",
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3728
        (png_ptr->asm_flags & PNG_ASM_FLAG_MMX_READ_FILTER_SUB)? "MMX" : "x86");
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3729
         break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3730
      case 2: sprintf(filnm, "up-%s",
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3731
        (png_ptr->asm_flags & PNG_ASM_FLAG_MMX_READ_FILTER_UP)? "MMX" : "x86");
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3732
         break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3733
      case 3: sprintf(filnm, "avg-%s",
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3734
        (png_ptr->asm_flags & PNG_ASM_FLAG_MMX_READ_FILTER_AVG)? "MMX" : "x86");
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3735
         break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3736
      case 4: sprintf(filnm, "Paeth-%s",
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3737
        (png_ptr->asm_flags & PNG_ASM_FLAG_MMX_READ_FILTER_PAETH)? "MMX":"x86");
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3738
         break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3739
#else
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3740
      case 1: sprintf(filnm, "sub");
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3741
         break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3742
      case 2: sprintf(filnm, "up");
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3743
         break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3744
      case 3: sprintf(filnm, "avg");
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3745
         break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3746
      case 4: sprintf(filnm, "Paeth");
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3747
         break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3748
#endif
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3749
      default: sprintf(filnm, "unknw");
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3750
         break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3751
   }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3752
   png_debug2(0,"row=%5d, %s, ", png_ptr->row_number, filnm);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3753
   png_debug2(0, "pd=%2d, b=%d, ", (int)row_info->pixel_depth,
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3754
      (int)((row_info->pixel_depth + 7) >> 3));
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3755
   png_debug1(0,"len=%8d, ", row_info->rowbytes);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3756
#endif /* PNG_DEBUG */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3757
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3758
   switch (filter)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3759
   {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3760
      case PNG_FILTER_VALUE_NONE:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3761
         break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3762
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3763
      case PNG_FILTER_VALUE_SUB:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3764
      {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3765
#if !defined(PNG_1_0_X)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3766
         if ((png_ptr->asm_flags & PNG_ASM_FLAG_MMX_READ_FILTER_SUB) &&
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3767
             (row_info->pixel_depth >= png_ptr->mmx_bitdepth_threshold) &&
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3768
             (row_info->rowbytes >= png_ptr->mmx_rowbytes_threshold))
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3769
#else
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3770
         if (mmx_supported)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3771
#endif
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3772
         {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3773
            png_read_filter_row_mmx_sub(row_info, row);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3774
         }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3775
         else
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3776
         {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3777
            png_uint_32 i;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3778
            png_uint_32 istop = row_info->rowbytes;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3779
            png_uint_32 bpp = (row_info->pixel_depth + 7) >> 3;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3780
            png_bytep rp = row + bpp;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3781
            png_bytep lp = row;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3782
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3783
            for (i = bpp; i < istop; i++)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3784
            {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3785
               *rp = (png_byte)(((int)(*rp) + (int)(*lp++)) & 0xff);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3786
               rp++;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3787
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3788
         }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3789
         break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3790
      }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3791
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3792
      case PNG_FILTER_VALUE_UP:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3793
      {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3794
#if !defined(PNG_1_0_X)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3795
         if ((png_ptr->asm_flags & PNG_ASM_FLAG_MMX_READ_FILTER_UP) &&
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3796
             (row_info->pixel_depth >= png_ptr->mmx_bitdepth_threshold) &&
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3797
             (row_info->rowbytes >= png_ptr->mmx_rowbytes_threshold))
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3798
#else
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3799
         if (mmx_supported)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3800
#endif
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3801
         {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3802
            png_read_filter_row_mmx_up(row_info, row, prev_row);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3803
         }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3804
         else
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3805
         {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3806
            png_uint_32 i;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3807
            png_uint_32 istop = row_info->rowbytes;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3808
            png_bytep rp = row;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3809
            png_bytep pp = prev_row;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3810
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3811
            for (i = 0; i < istop; ++i)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3812
            {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3813
               *rp = (png_byte)(((int)(*rp) + (int)(*pp++)) & 0xff);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3814
               rp++;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3815
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3816
         }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3817
         break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3818
      }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3819
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3820
      case PNG_FILTER_VALUE_AVG:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3821
      {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3822
#if !defined(PNG_1_0_X)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3823
         if ((png_ptr->asm_flags & PNG_ASM_FLAG_MMX_READ_FILTER_AVG) &&
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3824
             (row_info->pixel_depth >= png_ptr->mmx_bitdepth_threshold) &&
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3825
             (row_info->rowbytes >= png_ptr->mmx_rowbytes_threshold))
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3826
#else
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3827
         if (mmx_supported)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3828
#endif
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3829
         {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3830
            png_read_filter_row_mmx_avg(row_info, row, prev_row);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3831
         }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3832
         else
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3833
         {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3834
            png_uint_32 i;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3835
            png_bytep rp = row;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3836
            png_bytep pp = prev_row;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3837
            png_bytep lp = row;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3838
            png_uint_32 bpp = (row_info->pixel_depth + 7) >> 3;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3839
            png_uint_32 istop = row_info->rowbytes - bpp;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3840
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3841
            for (i = 0; i < bpp; i++)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3842
            {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3843
               *rp = (png_byte)(((int)(*rp) +
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3844
                  ((int)(*pp++) >> 1)) & 0xff);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3845
               rp++;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3846
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3847
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3848
            for (i = 0; i < istop; i++)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3849
            {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3850
               *rp = (png_byte)(((int)(*rp) +
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3851
                  ((int)(*pp++ + *lp++) >> 1)) & 0xff);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3852
               rp++;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3853
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3854
         }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3855
         break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3856
      }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3857
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3858
      case PNG_FILTER_VALUE_PAETH:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3859
      {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3860
#if !defined(PNG_1_0_X)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3861
         if ((png_ptr->asm_flags & PNG_ASM_FLAG_MMX_READ_FILTER_PAETH) &&
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3862
             (row_info->pixel_depth >= png_ptr->mmx_bitdepth_threshold) &&
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3863
             (row_info->rowbytes >= png_ptr->mmx_rowbytes_threshold))
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3864
#else
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3865
         if (mmx_supported)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3866
#endif
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3867
         {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3868
            png_read_filter_row_mmx_paeth(row_info, row, prev_row);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3869
         }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3870
         else
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3871
         {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3872
            png_uint_32 i;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3873
            png_bytep rp = row;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3874
            png_bytep pp = prev_row;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3875
            png_bytep lp = row;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3876
            png_bytep cp = prev_row;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3877
            png_uint_32 bpp = (row_info->pixel_depth + 7) >> 3;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3878
            png_uint_32 istop=row_info->rowbytes - bpp;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3879
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3880
            for (i = 0; i < bpp; i++)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3881
            {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3882
               *rp = (png_byte)(((int)(*rp) + (int)(*pp++)) & 0xff);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3883
               rp++;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3884
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3885
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3886
            for (i = 0; i < istop; i++)   // use leftover rp,pp
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3887
            {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3888
               int a, b, c, pa, pb, pc, p;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3889
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3890
               a = *lp++;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3891
               b = *pp++;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3892
               c = *cp++;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3893
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3894
               p = b - c;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3895
               pc = a - c;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3896
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3897
#ifdef PNG_USE_ABS
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3898
               pa = abs(p);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3899
               pb = abs(pc);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3900
               pc = abs(p + pc);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3901
#else
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3902
               pa = p < 0 ? -p : p;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3903
               pb = pc < 0 ? -pc : pc;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3904
               pc = (p + pc) < 0 ? -(p + pc) : p + pc;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3905
#endif
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3906
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3907
               /*
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3908
                  if (pa <= pb && pa <= pc)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3909
                     p = a;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3910
                  else if (pb <= pc)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3911
                     p = b;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3912
                  else
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3913
                     p = c;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3914
                */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3915
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3916
               p = (pa <= pb && pa <=pc) ? a : (pb <= pc) ? b : c;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3917
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3918
               *rp = (png_byte)(((int)(*rp) + p) & 0xff);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3919
               rp++;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3920
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3921
         }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3922
         break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3923
      }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3924
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3925
      default:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3926
         png_warning(png_ptr, "Ignoring bad row filter type");
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3927
         *row=0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3928
         break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3929
   }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3930
}
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3931
90ce3da70b43 Initial load
duke
parents:
diff changeset
  3932
#endif /* PNG_ASSEMBLER_CODE_SUPPORTED && PNG_USE_PNGVCRD */