jdk/src/solaris/native/sun/awt/medialib/mlib_v_ImageConvMxN_8ext.c
author bae
Fri, 15 Oct 2010 10:42:39 +0400
changeset 6814 c6e347fb5b20
parent 5506 202f599c92aa
permissions -rw-r--r--
6725821: Compiler warnings in medialib code Reviewed-by: igor, prr
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
     1
/*
5506
202f599c92aa 6943119: Rebrand source copyright notices
ohair
parents: 2
diff changeset
     2
 * Copyright (c) 1998, 2003, Oracle and/or its affiliates. All rights reserved.
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
     3
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
90ce3da70b43 Initial load
duke
parents:
diff changeset
     4
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
     5
 * This code is free software; you can redistribute it and/or modify it
90ce3da70b43 Initial load
duke
parents:
diff changeset
     6
 * under the terms of the GNU General Public License version 2 only, as
5506
202f599c92aa 6943119: Rebrand source copyright notices
ohair
parents: 2
diff changeset
     7
 * published by the Free Software Foundation.  Oracle designates this
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
     8
 * particular file as subject to the "Classpath" exception as provided
5506
202f599c92aa 6943119: Rebrand source copyright notices
ohair
parents: 2
diff changeset
     9
 * by Oracle in the LICENSE file that accompanied this code.
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
    10
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
    11
 * This code is distributed in the hope that it will be useful, but WITHOUT
90ce3da70b43 Initial load
duke
parents:
diff changeset
    12
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
90ce3da70b43 Initial load
duke
parents:
diff changeset
    13
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
90ce3da70b43 Initial load
duke
parents:
diff changeset
    14
 * version 2 for more details (a copy is included in the LICENSE file that
90ce3da70b43 Initial load
duke
parents:
diff changeset
    15
 * accompanied this code).
90ce3da70b43 Initial load
duke
parents:
diff changeset
    16
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
    17
 * You should have received a copy of the GNU General Public License version
90ce3da70b43 Initial load
duke
parents:
diff changeset
    18
 * 2 along with this work; if not, write to the Free Software Foundation,
90ce3da70b43 Initial load
duke
parents:
diff changeset
    19
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
90ce3da70b43 Initial load
duke
parents:
diff changeset
    20
 *
5506
202f599c92aa 6943119: Rebrand source copyright notices
ohair
parents: 2
diff changeset
    21
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
202f599c92aa 6943119: Rebrand source copyright notices
ohair
parents: 2
diff changeset
    22
 * or visit www.oracle.com if you need additional information or have any
202f599c92aa 6943119: Rebrand source copyright notices
ohair
parents: 2
diff changeset
    23
 * questions.
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
    24
 */
90ce3da70b43 Initial load
duke
parents:
diff changeset
    25
90ce3da70b43 Initial load
duke
parents:
diff changeset
    26
90ce3da70b43 Initial load
duke
parents:
diff changeset
    27
90ce3da70b43 Initial load
duke
parents:
diff changeset
    28
/*
90ce3da70b43 Initial load
duke
parents:
diff changeset
    29
 * FUNCTION
90ce3da70b43 Initial load
duke
parents:
diff changeset
    30
 *      mlib_v_convMxN_8ext - convolve a 8-bit image, MxN kernel,
90ce3da70b43 Initial load
duke
parents:
diff changeset
    31
 *                            edge = src extended
90ce3da70b43 Initial load
duke
parents:
diff changeset
    32
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
    33
 * SYNOPSIS
90ce3da70b43 Initial load
duke
parents:
diff changeset
    34
 *      mlib_status mlib_v_convMxNext_u8(mlib_image       *dst,
90ce3da70b43 Initial load
duke
parents:
diff changeset
    35
 *                                       cosmt mlib_image *dst,
90ce3da70b43 Initial load
duke
parents:
diff changeset
    36
 *                                       mlib_s32         kwid,
90ce3da70b43 Initial load
duke
parents:
diff changeset
    37
 *                                       mlib_s32         khgt,
90ce3da70b43 Initial load
duke
parents:
diff changeset
    38
 *                                       mlib_s32         dx_l,
90ce3da70b43 Initial load
duke
parents:
diff changeset
    39
 *                                       mlib_s32         dx_r,
90ce3da70b43 Initial load
duke
parents:
diff changeset
    40
 *                                       mlib_s32         dy_t,
90ce3da70b43 Initial load
duke
parents:
diff changeset
    41
 *                                       mlib_s32         dy_b,
90ce3da70b43 Initial load
duke
parents:
diff changeset
    42
 *                                       const mlib_s32   *skernel,
90ce3da70b43 Initial load
duke
parents:
diff changeset
    43
 *                                       mlib_s32         discardbits,
90ce3da70b43 Initial load
duke
parents:
diff changeset
    44
 *                                       mlib_s32         cmask)
90ce3da70b43 Initial load
duke
parents:
diff changeset
    45
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
    46
 * ARGUMENT
90ce3da70b43 Initial load
duke
parents:
diff changeset
    47
 *      src       Ptr to source image structure
90ce3da70b43 Initial load
duke
parents:
diff changeset
    48
 *      dst       Ptr to destination image structure
90ce3da70b43 Initial load
duke
parents:
diff changeset
    49
 *      khgt         Kernel height (# of rows)
90ce3da70b43 Initial load
duke
parents:
diff changeset
    50
 *      kwid         Kernel width (# of cols)
90ce3da70b43 Initial load
duke
parents:
diff changeset
    51
 *      skernel      Ptr to convolution kernel
90ce3da70b43 Initial load
duke
parents:
diff changeset
    52
 *      discardbits  The number of LSBits of the 32-bit accumulator that
90ce3da70b43 Initial load
duke
parents:
diff changeset
    53
 *                   are discarded when the 32-bit accumulator is converted
90ce3da70b43 Initial load
duke
parents:
diff changeset
    54
 *                   to 16-bit output data; discardbits must be 1-15 (it
90ce3da70b43 Initial load
duke
parents:
diff changeset
    55
 *                   cannot be zero). Same as exponent N for scalefac=2**N.
90ce3da70b43 Initial load
duke
parents:
diff changeset
    56
 *      cmask        Channel mask to indicate the channels to be convolved.
90ce3da70b43 Initial load
duke
parents:
diff changeset
    57
 *                   Each bit of which represents a channel in the image. The
90ce3da70b43 Initial load
duke
parents:
diff changeset
    58
 *                   channels corresponded to 1 bits are those to be processed.
90ce3da70b43 Initial load
duke
parents:
diff changeset
    59
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
    60
 * DESCRIPTION
90ce3da70b43 Initial load
duke
parents:
diff changeset
    61
 *      A 2-D convolution (MxN kernel) for 8-bit images.
90ce3da70b43 Initial load
duke
parents:
diff changeset
    62
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
    63
 */
90ce3da70b43 Initial load
duke
parents:
diff changeset
    64
90ce3da70b43 Initial load
duke
parents:
diff changeset
    65
#include "vis_proto.h"
90ce3da70b43 Initial load
duke
parents:
diff changeset
    66
#include "mlib_image.h"
90ce3da70b43 Initial load
duke
parents:
diff changeset
    67
#include "mlib_ImageCopy.h"
90ce3da70b43 Initial load
duke
parents:
diff changeset
    68
#include "mlib_ImageConv.h"
90ce3da70b43 Initial load
duke
parents:
diff changeset
    69
#include "mlib_c_ImageConv.h"
90ce3da70b43 Initial load
duke
parents:
diff changeset
    70
#include "mlib_v_ImageChannelExtract.h"
90ce3da70b43 Initial load
duke
parents:
diff changeset
    71
#include "mlib_v_ImageChannelInsert.h"
90ce3da70b43 Initial load
duke
parents:
diff changeset
    72
90ce3da70b43 Initial load
duke
parents:
diff changeset
    73
/***************************************************************/
90ce3da70b43 Initial load
duke
parents:
diff changeset
    74
static mlib_status mlib_convMxN_8ext_f(mlib_image       *dst,
90ce3da70b43 Initial load
duke
parents:
diff changeset
    75
                                       const mlib_image *src,
90ce3da70b43 Initial load
duke
parents:
diff changeset
    76
                                       mlib_s32         m,
90ce3da70b43 Initial load
duke
parents:
diff changeset
    77
                                       mlib_s32         n,
90ce3da70b43 Initial load
duke
parents:
diff changeset
    78
                                       mlib_s32         dx_l,
90ce3da70b43 Initial load
duke
parents:
diff changeset
    79
                                       mlib_s32         dx_r,
90ce3da70b43 Initial load
duke
parents:
diff changeset
    80
                                       mlib_s32         dy_t,
90ce3da70b43 Initial load
duke
parents:
diff changeset
    81
                                       mlib_s32         dy_b,
90ce3da70b43 Initial load
duke
parents:
diff changeset
    82
                                       const mlib_s32   *kern,
90ce3da70b43 Initial load
duke
parents:
diff changeset
    83
                                       mlib_s32         scale);
90ce3da70b43 Initial load
duke
parents:
diff changeset
    84
90ce3da70b43 Initial load
duke
parents:
diff changeset
    85
static mlib_status mlib_convMxN_8ext_mask(mlib_image       *dst,
90ce3da70b43 Initial load
duke
parents:
diff changeset
    86
                                          const mlib_image *src,
90ce3da70b43 Initial load
duke
parents:
diff changeset
    87
                                          mlib_s32         m,
90ce3da70b43 Initial load
duke
parents:
diff changeset
    88
                                          mlib_s32         n,
90ce3da70b43 Initial load
duke
parents:
diff changeset
    89
                                          mlib_s32         dx_l,
90ce3da70b43 Initial load
duke
parents:
diff changeset
    90
                                          mlib_s32         dx_r,
90ce3da70b43 Initial load
duke
parents:
diff changeset
    91
                                          mlib_s32         dy_t,
90ce3da70b43 Initial load
duke
parents:
diff changeset
    92
                                          mlib_s32         dy_b,
90ce3da70b43 Initial load
duke
parents:
diff changeset
    93
                                          const mlib_s32   *kern,
90ce3da70b43 Initial load
duke
parents:
diff changeset
    94
                                          mlib_s32         scale,
90ce3da70b43 Initial load
duke
parents:
diff changeset
    95
                                          mlib_s32         cmask);
90ce3da70b43 Initial load
duke
parents:
diff changeset
    96
90ce3da70b43 Initial load
duke
parents:
diff changeset
    97
/***************************************************************/
90ce3da70b43 Initial load
duke
parents:
diff changeset
    98
static mlib_s32 mlib_round_8[16] = {
90ce3da70b43 Initial load
duke
parents:
diff changeset
    99
  0x00400040, 0x00200020, 0x00100010, 0x00080008,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   100
  0x00040004, 0x00020002, 0x00010001, 0x00000000,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   101
  0x00000000, 0x00000000, 0x00000000, 0x00000000,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   102
  0x00000000, 0x00000000, 0x00000000, 0x00000000
90ce3da70b43 Initial load
duke
parents:
diff changeset
   103
};
90ce3da70b43 Initial load
duke
parents:
diff changeset
   104
90ce3da70b43 Initial load
duke
parents:
diff changeset
   105
/***************************************************************/
90ce3da70b43 Initial load
duke
parents:
diff changeset
   106
mlib_status mlib_convMxNext_u8(mlib_image       *dst,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   107
                               const mlib_image *src,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   108
                               const mlib_s32   *kernel,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   109
                               mlib_s32         kwid,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   110
                               mlib_s32         khgt,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   111
                               mlib_s32         dx_l,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   112
                               mlib_s32         dx_r,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   113
                               mlib_s32         dy_t,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   114
                               mlib_s32         dy_b,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   115
                               mlib_s32         discardbits,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   116
                               mlib_s32         cmask)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   117
{
90ce3da70b43 Initial load
duke
parents:
diff changeset
   118
  mlib_s32 nchannel, amask;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   119
90ce3da70b43 Initial load
duke
parents:
diff changeset
   120
  if (mlib_ImageConvVersion(kwid, khgt, discardbits, MLIB_BYTE) == 0)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   121
    return mlib_c_convMxNext_u8(dst, src, kernel, kwid, khgt,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   122
                                dx_l, dx_r, dy_t, dy_b, discardbits, cmask);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   123
90ce3da70b43 Initial load
duke
parents:
diff changeset
   124
  nchannel = mlib_ImageGetChannels(src);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   125
90ce3da70b43 Initial load
duke
parents:
diff changeset
   126
  if (nchannel == 1)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   127
    cmask = 1;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   128
  amask = (1 << nchannel) - 1;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   129
90ce3da70b43 Initial load
duke
parents:
diff changeset
   130
  if ((cmask & amask) == amask) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   131
    return mlib_convMxN_8ext_f(dst, src, kwid, khgt, dx_l, dx_r, dy_t, dy_b, kernel,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   132
                               discardbits);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   133
  }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   134
  else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   135
    return mlib_convMxN_8ext_mask(dst, src, kwid, khgt, dx_l, dx_r, dy_t, dy_b, kernel,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   136
                                  discardbits, cmask);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   137
  }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   138
}
90ce3da70b43 Initial load
duke
parents:
diff changeset
   139
90ce3da70b43 Initial load
duke
parents:
diff changeset
   140
#define MAX_N   11
90ce3da70b43 Initial load
duke
parents:
diff changeset
   141
90ce3da70b43 Initial load
duke
parents:
diff changeset
   142
/***************************************************************/
90ce3da70b43 Initial load
duke
parents:
diff changeset
   143
mlib_status mlib_convMxN_8ext_f(mlib_image       *dst,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   144
                                const mlib_image *src,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   145
                                mlib_s32         m,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   146
                                mlib_s32         n,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   147
                                mlib_s32         dx_l,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   148
                                mlib_s32         dx_r,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   149
                                mlib_s32         dy_t,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   150
                                mlib_s32         dy_b,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   151
                                const mlib_s32   *kern,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   152
                                mlib_s32         scale)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   153
{
90ce3da70b43 Initial load
duke
parents:
diff changeset
   154
  mlib_d64 *buffs_local[3 * (MAX_N + 1)], **buffs = buffs_local, **buff;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   155
  mlib_d64 *buff0, *buff1, *buff2, *buff3, *buffn, *buffd, *buffe;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   156
  mlib_d64 s00, s01, s10, s11, s20, s21, s30, s31, s0, s1, s2, s3;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   157
  mlib_d64 d00, d01, d10, d11, d20, d21, d30, d31;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   158
  mlib_d64 dd, d0, d1;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   159
  mlib_s32 ik, jk, ik_last, jk_size, coff, off, doff;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   160
  mlib_u8 *sl, *dl;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   161
  mlib_s32 hgt = mlib_ImageGetHeight(src);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   162
  mlib_s32 wid = mlib_ImageGetWidth(src);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   163
  mlib_s32 sll = mlib_ImageGetStride(src);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   164
  mlib_s32 dll = mlib_ImageGetStride(dst);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   165
  mlib_u8 *adr_src = (mlib_u8 *) mlib_ImageGetData(src);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   166
  mlib_u8 *adr_dst = (mlib_u8 *) mlib_ImageGetData(dst);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   167
  mlib_s32 ssize, xsize, dsize, esize, buff_ind = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   168
  mlib_d64 *pbuff, *dp;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   169
  mlib_f32 *karr = (mlib_f32 *) kern;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   170
  mlib_s32 gsr_scale = (31 - scale) << 3;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   171
  mlib_d64 drnd = vis_to_double_dup(mlib_round_8[31 - scale]);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   172
  mlib_s32 i, j, l, ii;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   173
  mlib_s32 nchan = mlib_ImageGetChannels(dst);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   174
90ce3da70b43 Initial load
duke
parents:
diff changeset
   175
  if (n > MAX_N) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   176
    buffs = mlib_malloc(3 * (n + 1) * sizeof(mlib_d64 *));
90ce3da70b43 Initial load
duke
parents:
diff changeset
   177
90ce3da70b43 Initial load
duke
parents:
diff changeset
   178
    if (buffs == NULL)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   179
      return MLIB_FAILURE;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   180
  }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   181
90ce3da70b43 Initial load
duke
parents:
diff changeset
   182
  buff = buffs + 2 * (n + 1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   183
90ce3da70b43 Initial load
duke
parents:
diff changeset
   184
  sl = adr_src;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   185
  dl = adr_dst;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   186
90ce3da70b43 Initial load
duke
parents:
diff changeset
   187
  ssize = nchan * (wid + (m - 1));
90ce3da70b43 Initial load
duke
parents:
diff changeset
   188
  dsize = (ssize + 7) / 8;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   189
  esize = dsize + 4;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   190
  pbuff = mlib_malloc((n + 4) * esize * sizeof(mlib_d64));
90ce3da70b43 Initial load
duke
parents:
diff changeset
   191
90ce3da70b43 Initial load
duke
parents:
diff changeset
   192
  if (pbuff == NULL) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   193
    if (buffs != buffs_local)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   194
      mlib_free(buffs);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   195
    return MLIB_FAILURE;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   196
  }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   197
90ce3da70b43 Initial load
duke
parents:
diff changeset
   198
  for (i = 0; i < (n + 1); i++)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   199
    buffs[i] = pbuff + i * esize;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   200
  for (i = 0; i < (n + 1); i++)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   201
    buffs[(n + 1) + i] = buffs[i];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   202
  buffd = buffs[n] + esize;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   203
  buffe = buffd + 2 * esize;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   204
90ce3da70b43 Initial load
duke
parents:
diff changeset
   205
  xsize = ssize - nchan * (m - 1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   206
  ssize -= nchan * (dx_l + dx_r);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   207
90ce3da70b43 Initial load
duke
parents:
diff changeset
   208
  vis_write_gsr(gsr_scale + 7);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   209
90ce3da70b43 Initial load
duke
parents:
diff changeset
   210
  for (l = 0; l < n; l++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   211
    mlib_d64 *buffn = buffs[l];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   212
90ce3da70b43 Initial load
duke
parents:
diff changeset
   213
    mlib_ImageCopy_na((mlib_u8 *) sl, (mlib_u8 *) buffn + dx_l * nchan, ssize);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   214
90ce3da70b43 Initial load
duke
parents:
diff changeset
   215
    for (i = 0; i < nchan; i++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   216
      for (ii = 0; ii < dx_l; ii++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   217
        *((mlib_u8 *) buffn + i + nchan * ii) = *((mlib_u8 *) buffn + i + nchan * dx_l);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   218
      }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   219
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   220
90ce3da70b43 Initial load
duke
parents:
diff changeset
   221
    for (i = 0; i < nchan; i++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   222
      for (ii = 0; ii < dx_r; ii++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   223
        *((mlib_u8 *) buffn + i + nchan * ii + ssize + dx_l * nchan) =
90ce3da70b43 Initial load
duke
parents:
diff changeset
   224
          *((mlib_u8 *) buffn + i + nchan * (dx_l - 1) + ssize);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   225
      }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   226
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   227
90ce3da70b43 Initial load
duke
parents:
diff changeset
   228
    if ((l >= dy_t) && (l < hgt + n - dy_b - 2))
90ce3da70b43 Initial load
duke
parents:
diff changeset
   229
      sl += sll;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   230
  }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   231
90ce3da70b43 Initial load
duke
parents:
diff changeset
   232
  /* init buffer */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   233
#pragma pipeloop(0)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   234
  for (i = 0; i < (xsize + 7) / 8; i++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   235
    buffd[2 * i] = drnd;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   236
    buffd[2 * i + 1] = drnd;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   237
  }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   238
90ce3da70b43 Initial load
duke
parents:
diff changeset
   239
  for (j = 0; j < hgt; j++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   240
    mlib_d64 **buffc = buffs + buff_ind;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   241
    mlib_f32 *pk = karr, k0, k1, k2, k3;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   242
90ce3da70b43 Initial load
duke
parents:
diff changeset
   243
    for (l = 0; l < n; l++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   244
      buff[l] = buffc[l];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   245
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   246
90ce3da70b43 Initial load
duke
parents:
diff changeset
   247
    buffn = buffc[n];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   248
90ce3da70b43 Initial load
duke
parents:
diff changeset
   249
    mlib_ImageCopy_na((mlib_u8 *) sl, (mlib_u8 *) buffn + dx_l * nchan, ssize);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   250
90ce3da70b43 Initial load
duke
parents:
diff changeset
   251
    for (i = 0; i < nchan; i++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   252
      for (ii = 0; ii < dx_l; ii++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   253
        *((mlib_u8 *) buffn + i + nchan * ii) = *((mlib_u8 *) buffn + i + nchan * dx_l);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   254
      }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   255
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   256
90ce3da70b43 Initial load
duke
parents:
diff changeset
   257
    for (i = 0; i < nchan; i++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   258
      for (ii = 0; ii < dx_r; ii++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   259
        *((mlib_u8 *) buffn + i + nchan * ii + ssize + dx_l * nchan) =
90ce3da70b43 Initial load
duke
parents:
diff changeset
   260
          *((mlib_u8 *) buffn + i + nchan * (dx_l - 1) + ssize);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   261
      }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   262
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   263
90ce3da70b43 Initial load
duke
parents:
diff changeset
   264
    ik_last = (m - 1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   265
90ce3da70b43 Initial load
duke
parents:
diff changeset
   266
    for (jk = 0; jk < n; jk += jk_size) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   267
      jk_size = n - jk;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   268
90ce3da70b43 Initial load
duke
parents:
diff changeset
   269
      if (jk_size >= 6)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   270
        jk_size = 4;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   271
      if (jk_size == 5)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   272
        jk_size = 3;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   273
90ce3da70b43 Initial load
duke
parents:
diff changeset
   274
      coff = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   275
90ce3da70b43 Initial load
duke
parents:
diff changeset
   276
      if (jk_size == 1) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   277
90ce3da70b43 Initial load
duke
parents:
diff changeset
   278
        for (ik = 0; ik < m; ik++, coff += nchan) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   279
          if (!jk && ik == ik_last)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   280
            continue;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   281
90ce3da70b43 Initial load
duke
parents:
diff changeset
   282
          k0 = pk[ik];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   283
90ce3da70b43 Initial load
duke
parents:
diff changeset
   284
          doff = coff / 8;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   285
          buff0 = buff[jk] + doff;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   286
90ce3da70b43 Initial load
duke
parents:
diff changeset
   287
          off = coff & 7;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   288
          vis_write_gsr(gsr_scale + off);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   289
90ce3da70b43 Initial load
duke
parents:
diff changeset
   290
          s01 = buff0[0];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   291
#pragma pipeloop(0)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   292
          for (i = 0; i < (xsize + 7) / 8; i++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   293
            s00 = s01;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   294
            s01 = buff0[i + 1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   295
            s0 = vis_faligndata(s00, s01);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   296
90ce3da70b43 Initial load
duke
parents:
diff changeset
   297
            d00 = vis_fmul8x16au(vis_read_hi(s0), k0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   298
            d01 = vis_fmul8x16au(vis_read_lo(s0), k0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   299
90ce3da70b43 Initial load
duke
parents:
diff changeset
   300
            d0 = buffd[2 * i];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   301
            d1 = buffd[2 * i + 1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   302
            d0 = vis_fpadd16(d00, d0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   303
            d1 = vis_fpadd16(d01, d1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   304
            buffd[2 * i] = d0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   305
            buffd[2 * i + 1] = d1;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   306
          }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   307
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   308
90ce3da70b43 Initial load
duke
parents:
diff changeset
   309
        pk += m;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   310
90ce3da70b43 Initial load
duke
parents:
diff changeset
   311
      }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   312
      else if (jk_size == 2) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   313
90ce3da70b43 Initial load
duke
parents:
diff changeset
   314
        for (ik = 0; ik < m; ik++, coff += nchan) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   315
          if (!jk && ik == ik_last)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   316
            continue;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   317
90ce3da70b43 Initial load
duke
parents:
diff changeset
   318
          k0 = pk[ik];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   319
          k1 = pk[ik + m];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   320
90ce3da70b43 Initial load
duke
parents:
diff changeset
   321
          doff = coff / 8;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   322
          buff0 = buff[jk] + doff;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   323
          buff1 = buff[jk + 1] + doff;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   324
90ce3da70b43 Initial load
duke
parents:
diff changeset
   325
          off = coff & 7;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   326
          vis_write_gsr(gsr_scale + off);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   327
90ce3da70b43 Initial load
duke
parents:
diff changeset
   328
          s01 = buff0[0];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   329
          s11 = buff1[0];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   330
#pragma pipeloop(0)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   331
          for (i = 0; i < (xsize + 7) / 8; i++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   332
            s00 = s01;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   333
            s10 = s11;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   334
            s01 = buff0[i + 1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   335
            s11 = buff1[i + 1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   336
            s0 = vis_faligndata(s00, s01);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   337
            s1 = vis_faligndata(s10, s11);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   338
90ce3da70b43 Initial load
duke
parents:
diff changeset
   339
            d00 = vis_fmul8x16au(vis_read_hi(s0), k0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   340
            d01 = vis_fmul8x16au(vis_read_lo(s0), k0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   341
            d10 = vis_fmul8x16au(vis_read_hi(s1), k1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   342
            d11 = vis_fmul8x16au(vis_read_lo(s1), k1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   343
90ce3da70b43 Initial load
duke
parents:
diff changeset
   344
            d0 = buffd[2 * i];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   345
            d1 = buffd[2 * i + 1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   346
            d0 = vis_fpadd16(d00, d0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   347
            d0 = vis_fpadd16(d10, d0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   348
            d1 = vis_fpadd16(d01, d1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   349
            d1 = vis_fpadd16(d11, d1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   350
            buffd[2 * i] = d0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   351
            buffd[2 * i + 1] = d1;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   352
          }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   353
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   354
90ce3da70b43 Initial load
duke
parents:
diff changeset
   355
        pk += 2 * m;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   356
90ce3da70b43 Initial load
duke
parents:
diff changeset
   357
      }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   358
      else if (jk_size == 3) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   359
90ce3da70b43 Initial load
duke
parents:
diff changeset
   360
        for (ik = 0; ik < m; ik++, coff += nchan) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   361
          if (!jk && ik == ik_last)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   362
            continue;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   363
90ce3da70b43 Initial load
duke
parents:
diff changeset
   364
          k0 = pk[ik];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   365
          k1 = pk[ik + m];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   366
          k2 = pk[ik + 2 * m];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   367
90ce3da70b43 Initial load
duke
parents:
diff changeset
   368
          doff = coff / 8;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   369
          buff0 = buff[jk] + doff;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   370
          buff1 = buff[jk + 1] + doff;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   371
          buff2 = buff[jk + 2] + doff;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   372
90ce3da70b43 Initial load
duke
parents:
diff changeset
   373
          off = coff & 7;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   374
          vis_write_gsr(gsr_scale + off);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   375
90ce3da70b43 Initial load
duke
parents:
diff changeset
   376
          if (off == 0) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   377
#pragma pipeloop(0)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   378
            for (i = 0; i < (xsize + 7) / 8; i++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   379
              d0 = buffd[2 * i];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   380
              d1 = buffd[2 * i + 1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   381
90ce3da70b43 Initial load
duke
parents:
diff changeset
   382
              s0 = buff0[i];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   383
              s1 = buff1[i];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   384
              s2 = buff2[i];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   385
90ce3da70b43 Initial load
duke
parents:
diff changeset
   386
              d00 = vis_fmul8x16au(vis_read_hi(s0), k0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   387
              d01 = vis_fmul8x16au(vis_read_lo(s0), k0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   388
              d10 = vis_fmul8x16au(vis_read_hi(s1), k1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   389
              d11 = vis_fmul8x16au(vis_read_lo(s1), k1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   390
              d20 = vis_fmul8x16au(vis_read_hi(s2), k2);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   391
              d21 = vis_fmul8x16au(vis_read_lo(s2), k2);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   392
90ce3da70b43 Initial load
duke
parents:
diff changeset
   393
              d00 = vis_fpadd16(d00, d10);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   394
              d0 = vis_fpadd16(d20, d0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   395
              d0 = vis_fpadd16(d00, d0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   396
              d01 = vis_fpadd16(d01, d11);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   397
              d1 = vis_fpadd16(d21, d1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   398
              d1 = vis_fpadd16(d01, d1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   399
              buffd[2 * i] = d0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   400
              buffd[2 * i + 1] = d1;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   401
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   402
90ce3da70b43 Initial load
duke
parents:
diff changeset
   403
          }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   404
          else if (off == 4) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   405
            s01 = buff0[0];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   406
            s11 = buff1[0];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   407
            s21 = buff2[0];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   408
#pragma pipeloop(0)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   409
            for (i = 0; i < (xsize + 7) / 8; i++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   410
              d0 = buffd[2 * i];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   411
              d1 = buffd[2 * i + 1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   412
90ce3da70b43 Initial load
duke
parents:
diff changeset
   413
              s00 = s01;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   414
              s10 = s11;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   415
              s20 = s21;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   416
              s01 = buff0[i + 1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   417
              s11 = buff1[i + 1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   418
              s21 = buff2[i + 1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   419
90ce3da70b43 Initial load
duke
parents:
diff changeset
   420
              d00 = vis_fmul8x16au(vis_read_lo(s00), k0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   421
              d01 = vis_fmul8x16au(vis_read_hi(s01), k0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   422
              d10 = vis_fmul8x16au(vis_read_lo(s10), k1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   423
              d11 = vis_fmul8x16au(vis_read_hi(s11), k1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   424
              d20 = vis_fmul8x16au(vis_read_lo(s20), k2);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   425
              d21 = vis_fmul8x16au(vis_read_hi(s21), k2);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   426
90ce3da70b43 Initial load
duke
parents:
diff changeset
   427
              d00 = vis_fpadd16(d00, d10);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   428
              d0 = vis_fpadd16(d20, d0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   429
              d0 = vis_fpadd16(d00, d0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   430
              d01 = vis_fpadd16(d01, d11);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   431
              d1 = vis_fpadd16(d21, d1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   432
              d1 = vis_fpadd16(d01, d1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   433
              buffd[2 * i] = d0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   434
              buffd[2 * i + 1] = d1;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   435
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   436
90ce3da70b43 Initial load
duke
parents:
diff changeset
   437
          }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   438
          else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   439
            s01 = buff0[0];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   440
            s11 = buff1[0];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   441
            s21 = buff2[0];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   442
#pragma pipeloop(0)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   443
            for (i = 0; i < (xsize + 7) / 8; i++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   444
              d0 = buffd[2 * i];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   445
              d1 = buffd[2 * i + 1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   446
90ce3da70b43 Initial load
duke
parents:
diff changeset
   447
              s00 = s01;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   448
              s10 = s11;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   449
              s20 = s21;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   450
              s01 = buff0[i + 1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   451
              s11 = buff1[i + 1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   452
              s21 = buff2[i + 1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   453
              s0 = vis_faligndata(s00, s01);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   454
              s1 = vis_faligndata(s10, s11);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   455
              s2 = vis_faligndata(s20, s21);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   456
90ce3da70b43 Initial load
duke
parents:
diff changeset
   457
              d00 = vis_fmul8x16au(vis_read_hi(s0), k0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   458
              d01 = vis_fmul8x16au(vis_read_lo(s0), k0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   459
              d10 = vis_fmul8x16au(vis_read_hi(s1), k1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   460
              d11 = vis_fmul8x16au(vis_read_lo(s1), k1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   461
              d20 = vis_fmul8x16au(vis_read_hi(s2), k2);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   462
              d21 = vis_fmul8x16au(vis_read_lo(s2), k2);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   463
90ce3da70b43 Initial load
duke
parents:
diff changeset
   464
              d00 = vis_fpadd16(d00, d10);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   465
              d0 = vis_fpadd16(d20, d0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   466
              d0 = vis_fpadd16(d00, d0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   467
              d01 = vis_fpadd16(d01, d11);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   468
              d1 = vis_fpadd16(d21, d1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   469
              d1 = vis_fpadd16(d01, d1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   470
              buffd[2 * i] = d0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   471
              buffd[2 * i + 1] = d1;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   472
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   473
          }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   474
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   475
90ce3da70b43 Initial load
duke
parents:
diff changeset
   476
        pk += 3 * m;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   477
90ce3da70b43 Initial load
duke
parents:
diff changeset
   478
      }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   479
      else {                                /* jk_size == 4 */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   480
90ce3da70b43 Initial load
duke
parents:
diff changeset
   481
        for (ik = 0; ik < m; ik++, coff += nchan) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   482
          if (!jk && ik == ik_last)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   483
            continue;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   484
90ce3da70b43 Initial load
duke
parents:
diff changeset
   485
          k0 = pk[ik];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   486
          k1 = pk[ik + m];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   487
          k2 = pk[ik + 2 * m];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   488
          k3 = pk[ik + 3 * m];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   489
90ce3da70b43 Initial load
duke
parents:
diff changeset
   490
          doff = coff / 8;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   491
          buff0 = buff[jk] + doff;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   492
          buff1 = buff[jk + 1] + doff;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   493
          buff2 = buff[jk + 2] + doff;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   494
          buff3 = buff[jk + 3] + doff;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   495
90ce3da70b43 Initial load
duke
parents:
diff changeset
   496
          off = coff & 7;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   497
          vis_write_gsr(gsr_scale + off);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   498
90ce3da70b43 Initial load
duke
parents:
diff changeset
   499
          if (off == 0) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   500
90ce3da70b43 Initial load
duke
parents:
diff changeset
   501
#pragma pipeloop(0)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   502
            for (i = 0; i < (xsize + 7) / 8; i++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   503
              d0 = buffd[2 * i];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   504
              d1 = buffd[2 * i + 1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   505
90ce3da70b43 Initial load
duke
parents:
diff changeset
   506
              s0 = buff0[i];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   507
              s1 = buff1[i];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   508
              s2 = buff2[i];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   509
              s3 = buff3[i];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   510
90ce3da70b43 Initial load
duke
parents:
diff changeset
   511
              d00 = vis_fmul8x16au(vis_read_hi(s0), k0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   512
              d01 = vis_fmul8x16au(vis_read_lo(s0), k0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   513
              d10 = vis_fmul8x16au(vis_read_hi(s1), k1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   514
              d11 = vis_fmul8x16au(vis_read_lo(s1), k1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   515
              d20 = vis_fmul8x16au(vis_read_hi(s2), k2);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   516
              d21 = vis_fmul8x16au(vis_read_lo(s2), k2);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   517
              d30 = vis_fmul8x16au(vis_read_hi(s3), k3);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   518
              d31 = vis_fmul8x16au(vis_read_lo(s3), k3);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   519
90ce3da70b43 Initial load
duke
parents:
diff changeset
   520
              d00 = vis_fpadd16(d00, d10);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   521
              d20 = vis_fpadd16(d20, d30);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   522
              d0 = vis_fpadd16(d0, d00);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   523
              d0 = vis_fpadd16(d0, d20);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   524
              d01 = vis_fpadd16(d01, d11);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   525
              d21 = vis_fpadd16(d21, d31);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   526
              d1 = vis_fpadd16(d1, d01);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   527
              d1 = vis_fpadd16(d1, d21);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   528
              buffd[2 * i] = d0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   529
              buffd[2 * i + 1] = d1;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   530
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   531
90ce3da70b43 Initial load
duke
parents:
diff changeset
   532
          }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   533
          else if (off == 4) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   534
90ce3da70b43 Initial load
duke
parents:
diff changeset
   535
            s01 = buff0[0];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   536
            s11 = buff1[0];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   537
            s21 = buff2[0];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   538
            s31 = buff3[0];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   539
#pragma pipeloop(0)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   540
            for (i = 0; i < (xsize + 7) / 8; i++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   541
              d0 = buffd[2 * i];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   542
              d1 = buffd[2 * i + 1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   543
90ce3da70b43 Initial load
duke
parents:
diff changeset
   544
              s00 = s01;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   545
              s10 = s11;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   546
              s20 = s21;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   547
              s30 = s31;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   548
              s01 = buff0[i + 1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   549
              s11 = buff1[i + 1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   550
              s21 = buff2[i + 1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   551
              s31 = buff3[i + 1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   552
90ce3da70b43 Initial load
duke
parents:
diff changeset
   553
              d00 = vis_fmul8x16au(vis_read_lo(s00), k0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   554
              d01 = vis_fmul8x16au(vis_read_hi(s01), k0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   555
              d10 = vis_fmul8x16au(vis_read_lo(s10), k1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   556
              d11 = vis_fmul8x16au(vis_read_hi(s11), k1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   557
              d20 = vis_fmul8x16au(vis_read_lo(s20), k2);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   558
              d21 = vis_fmul8x16au(vis_read_hi(s21), k2);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   559
              d30 = vis_fmul8x16au(vis_read_lo(s30), k3);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   560
              d31 = vis_fmul8x16au(vis_read_hi(s31), k3);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   561
90ce3da70b43 Initial load
duke
parents:
diff changeset
   562
              d00 = vis_fpadd16(d00, d10);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   563
              d20 = vis_fpadd16(d20, d30);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   564
              d0 = vis_fpadd16(d0, d00);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   565
              d0 = vis_fpadd16(d0, d20);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   566
              d01 = vis_fpadd16(d01, d11);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   567
              d21 = vis_fpadd16(d21, d31);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   568
              d1 = vis_fpadd16(d1, d01);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   569
              d1 = vis_fpadd16(d1, d21);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   570
              buffd[2 * i] = d0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   571
              buffd[2 * i + 1] = d1;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   572
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   573
90ce3da70b43 Initial load
duke
parents:
diff changeset
   574
          }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   575
          else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   576
90ce3da70b43 Initial load
duke
parents:
diff changeset
   577
            s01 = buff0[0];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   578
            s11 = buff1[0];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   579
            s21 = buff2[0];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   580
            s31 = buff3[0];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   581
#pragma pipeloop(0)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   582
            for (i = 0; i < (xsize + 7) / 8; i++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   583
              d0 = buffd[2 * i];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   584
              d1 = buffd[2 * i + 1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   585
90ce3da70b43 Initial load
duke
parents:
diff changeset
   586
              s00 = s01;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   587
              s10 = s11;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   588
              s20 = s21;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   589
              s30 = s31;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   590
              s01 = buff0[i + 1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   591
              s11 = buff1[i + 1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   592
              s21 = buff2[i + 1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   593
              s31 = buff3[i + 1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   594
              s0 = vis_faligndata(s00, s01);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   595
              s1 = vis_faligndata(s10, s11);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   596
              s2 = vis_faligndata(s20, s21);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   597
              s3 = vis_faligndata(s30, s31);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   598
90ce3da70b43 Initial load
duke
parents:
diff changeset
   599
              d00 = vis_fmul8x16au(vis_read_hi(s0), k0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   600
              d01 = vis_fmul8x16au(vis_read_lo(s0), k0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   601
              d10 = vis_fmul8x16au(vis_read_hi(s1), k1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   602
              d11 = vis_fmul8x16au(vis_read_lo(s1), k1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   603
              d20 = vis_fmul8x16au(vis_read_hi(s2), k2);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   604
              d21 = vis_fmul8x16au(vis_read_lo(s2), k2);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   605
              d30 = vis_fmul8x16au(vis_read_hi(s3), k3);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   606
              d31 = vis_fmul8x16au(vis_read_lo(s3), k3);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   607
90ce3da70b43 Initial load
duke
parents:
diff changeset
   608
              d00 = vis_fpadd16(d00, d10);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   609
              d20 = vis_fpadd16(d20, d30);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   610
              d0 = vis_fpadd16(d0, d00);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   611
              d0 = vis_fpadd16(d0, d20);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   612
              d01 = vis_fpadd16(d01, d11);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   613
              d21 = vis_fpadd16(d21, d31);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   614
              d1 = vis_fpadd16(d1, d01);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   615
              d1 = vis_fpadd16(d1, d21);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   616
              buffd[2 * i] = d0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   617
              buffd[2 * i + 1] = d1;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   618
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   619
          }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   620
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   621
90ce3da70b43 Initial load
duke
parents:
diff changeset
   622
        pk += 4 * m;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   623
      }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   624
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   625
90ce3da70b43 Initial load
duke
parents:
diff changeset
   626
    /*****************************************
90ce3da70b43 Initial load
duke
parents:
diff changeset
   627
     *****************************************
90ce3da70b43 Initial load
duke
parents:
diff changeset
   628
     **          Final iteration            **
90ce3da70b43 Initial load
duke
parents:
diff changeset
   629
     *****************************************
90ce3da70b43 Initial load
duke
parents:
diff changeset
   630
     *****************************************/
90ce3da70b43 Initial load
duke
parents:
diff changeset
   631
90ce3da70b43 Initial load
duke
parents:
diff changeset
   632
    jk_size = n;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   633
90ce3da70b43 Initial load
duke
parents:
diff changeset
   634
    if (jk_size >= 6)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   635
      jk_size = 4;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   636
    if (jk_size == 5)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   637
      jk_size = 3;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   638
90ce3da70b43 Initial load
duke
parents:
diff changeset
   639
    k0 = karr[ik_last];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   640
    k1 = karr[ik_last + m];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   641
    k2 = karr[ik_last + 2 * m];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   642
    k3 = karr[ik_last + 3 * m];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   643
90ce3da70b43 Initial load
duke
parents:
diff changeset
   644
    off = ik_last * nchan;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   645
    doff = off / 8;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   646
    off &= 7;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   647
    buff0 = buff[0] + doff;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   648
    buff1 = buff[1] + doff;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   649
    buff2 = buff[2] + doff;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   650
    buff3 = buff[3] + doff;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   651
    vis_write_gsr(gsr_scale + off);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   652
90ce3da70b43 Initial load
duke
parents:
diff changeset
   653
    if (jk_size == 1) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   654
      dp = buffe;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   655
90ce3da70b43 Initial load
duke
parents:
diff changeset
   656
      s01 = buff0[0];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   657
#pragma pipeloop(0)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   658
      for (i = 0; i < (xsize + 7) / 8; i++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   659
        s00 = s01;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   660
        s01 = buff0[i + 1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   661
        s0 = vis_faligndata(s00, s01);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   662
90ce3da70b43 Initial load
duke
parents:
diff changeset
   663
        d00 = vis_fmul8x16au(vis_read_hi(s0), k0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   664
        d01 = vis_fmul8x16au(vis_read_lo(s0), k0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   665
90ce3da70b43 Initial load
duke
parents:
diff changeset
   666
        d0 = buffd[2 * i];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   667
        d1 = buffd[2 * i + 1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   668
        d0 = vis_fpadd16(d0, d00);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   669
        d1 = vis_fpadd16(d1, d01);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   670
90ce3da70b43 Initial load
duke
parents:
diff changeset
   671
        dd = vis_fpack16_pair(d0, d1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   672
        dp[i] = dd;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   673
90ce3da70b43 Initial load
duke
parents:
diff changeset
   674
        buffd[2 * i] = drnd;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   675
        buffd[2 * i + 1] = drnd;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   676
      }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   677
90ce3da70b43 Initial load
duke
parents:
diff changeset
   678
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   679
    else if (jk_size == 2) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   680
      dp = buffe;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   681
90ce3da70b43 Initial load
duke
parents:
diff changeset
   682
      s01 = buff0[0];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   683
      s11 = buff1[0];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   684
#pragma pipeloop(0)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   685
      for (i = 0; i < (xsize + 7) / 8; i++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   686
        s00 = s01;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   687
        s10 = s11;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   688
        s01 = buff0[i + 1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   689
        s11 = buff1[i + 1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   690
        s0 = vis_faligndata(s00, s01);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   691
        s1 = vis_faligndata(s10, s11);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   692
90ce3da70b43 Initial load
duke
parents:
diff changeset
   693
        d00 = vis_fmul8x16au(vis_read_hi(s0), k0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   694
        d01 = vis_fmul8x16au(vis_read_lo(s0), k0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   695
        d10 = vis_fmul8x16au(vis_read_hi(s1), k1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   696
        d11 = vis_fmul8x16au(vis_read_lo(s1), k1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   697
90ce3da70b43 Initial load
duke
parents:
diff changeset
   698
        d0 = buffd[2 * i];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   699
        d1 = buffd[2 * i + 1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   700
        d0 = vis_fpadd16(d0, d00);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   701
        d0 = vis_fpadd16(d0, d10);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   702
        d1 = vis_fpadd16(d1, d01);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   703
        d1 = vis_fpadd16(d1, d11);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   704
90ce3da70b43 Initial load
duke
parents:
diff changeset
   705
        dd = vis_fpack16_pair(d0, d1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   706
        dp[i] = dd;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   707
90ce3da70b43 Initial load
duke
parents:
diff changeset
   708
        buffd[2 * i] = drnd;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   709
        buffd[2 * i + 1] = drnd;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   710
      }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   711
90ce3da70b43 Initial load
duke
parents:
diff changeset
   712
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   713
    else if (jk_size == 3) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   714
90ce3da70b43 Initial load
duke
parents:
diff changeset
   715
      dp = buffe;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   716
90ce3da70b43 Initial load
duke
parents:
diff changeset
   717
      s01 = buff0[0];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   718
      s11 = buff1[0];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   719
      s21 = buff2[0];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   720
#pragma pipeloop(0)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   721
      for (i = 0; i < (xsize + 7) / 8; i++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   722
        s00 = s01;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   723
        s10 = s11;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   724
        s20 = s21;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   725
        s01 = buff0[i + 1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   726
        s11 = buff1[i + 1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   727
        s21 = buff2[i + 1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   728
        s0 = vis_faligndata(s00, s01);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   729
        s1 = vis_faligndata(s10, s11);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   730
        s2 = vis_faligndata(s20, s21);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   731
90ce3da70b43 Initial load
duke
parents:
diff changeset
   732
        d00 = vis_fmul8x16au(vis_read_hi(s0), k0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   733
        d01 = vis_fmul8x16au(vis_read_lo(s0), k0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   734
        d10 = vis_fmul8x16au(vis_read_hi(s1), k1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   735
        d11 = vis_fmul8x16au(vis_read_lo(s1), k1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   736
        d20 = vis_fmul8x16au(vis_read_hi(s2), k2);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   737
        d21 = vis_fmul8x16au(vis_read_lo(s2), k2);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   738
90ce3da70b43 Initial load
duke
parents:
diff changeset
   739
        d0 = buffd[2 * i];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   740
        d1 = buffd[2 * i + 1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   741
        d0 = vis_fpadd16(d0, d00);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   742
        d0 = vis_fpadd16(d0, d10);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   743
        d0 = vis_fpadd16(d0, d20);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   744
        d1 = vis_fpadd16(d1, d01);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   745
        d1 = vis_fpadd16(d1, d11);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   746
        d1 = vis_fpadd16(d1, d21);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   747
90ce3da70b43 Initial load
duke
parents:
diff changeset
   748
        dd = vis_fpack16_pair(d0, d1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   749
        dp[i] = dd;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   750
90ce3da70b43 Initial load
duke
parents:
diff changeset
   751
        buffd[2 * i] = drnd;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   752
        buffd[2 * i + 1] = drnd;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   753
      }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   754
90ce3da70b43 Initial load
duke
parents:
diff changeset
   755
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   756
    else {                                  /* if (jk_size == 4) */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   757
90ce3da70b43 Initial load
duke
parents:
diff changeset
   758
      dp = buffe;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   759
90ce3da70b43 Initial load
duke
parents:
diff changeset
   760
      s01 = buff0[0];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   761
      s11 = buff1[0];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   762
      s21 = buff2[0];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   763
      s31 = buff3[0];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   764
#pragma pipeloop(0)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   765
      for (i = 0; i < (xsize + 7) / 8; i++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   766
        s00 = s01;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   767
        s10 = s11;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   768
        s20 = s21;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   769
        s30 = s31;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   770
        s01 = buff0[i + 1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   771
        s11 = buff1[i + 1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   772
        s21 = buff2[i + 1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   773
        s31 = buff3[i + 1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   774
        s0 = vis_faligndata(s00, s01);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   775
        s1 = vis_faligndata(s10, s11);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   776
        s2 = vis_faligndata(s20, s21);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   777
        s3 = vis_faligndata(s30, s31);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   778
90ce3da70b43 Initial load
duke
parents:
diff changeset
   779
        d00 = vis_fmul8x16au(vis_read_hi(s0), k0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   780
        d01 = vis_fmul8x16au(vis_read_lo(s0), k0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   781
        d10 = vis_fmul8x16au(vis_read_hi(s1), k1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   782
        d11 = vis_fmul8x16au(vis_read_lo(s1), k1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   783
        d20 = vis_fmul8x16au(vis_read_hi(s2), k2);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   784
        d21 = vis_fmul8x16au(vis_read_lo(s2), k2);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   785
        d30 = vis_fmul8x16au(vis_read_hi(s3), k3);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   786
        d31 = vis_fmul8x16au(vis_read_lo(s3), k3);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   787
90ce3da70b43 Initial load
duke
parents:
diff changeset
   788
        d0 = buffd[2 * i];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   789
        d1 = buffd[2 * i + 1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   790
        d0 = vis_fpadd16(d0, d00);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   791
        d0 = vis_fpadd16(d0, d10);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   792
        d0 = vis_fpadd16(d0, d20);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   793
        d0 = vis_fpadd16(d0, d30);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   794
        d1 = vis_fpadd16(d1, d01);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   795
        d1 = vis_fpadd16(d1, d11);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   796
        d1 = vis_fpadd16(d1, d21);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   797
        d1 = vis_fpadd16(d1, d31);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   798
90ce3da70b43 Initial load
duke
parents:
diff changeset
   799
        dd = vis_fpack16_pair(d0, d1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   800
        dp[i] = dd;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   801
90ce3da70b43 Initial load
duke
parents:
diff changeset
   802
        buffd[2 * i] = drnd;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   803
        buffd[2 * i + 1] = drnd;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   804
      }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   805
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   806
90ce3da70b43 Initial load
duke
parents:
diff changeset
   807
    mlib_ImageCopy_na((mlib_u8 *) buffe, dl, xsize);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   808
90ce3da70b43 Initial load
duke
parents:
diff changeset
   809
    if (j < hgt - dy_b - 2)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   810
      sl += sll;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   811
    dl += dll;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   812
90ce3da70b43 Initial load
duke
parents:
diff changeset
   813
    buff_ind++;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   814
90ce3da70b43 Initial load
duke
parents:
diff changeset
   815
    if (buff_ind >= (n + 1))
90ce3da70b43 Initial load
duke
parents:
diff changeset
   816
      buff_ind = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   817
  }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   818
90ce3da70b43 Initial load
duke
parents:
diff changeset
   819
  mlib_free(pbuff);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   820
90ce3da70b43 Initial load
duke
parents:
diff changeset
   821
  if (buffs != buffs_local)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   822
    mlib_free(buffs);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   823
90ce3da70b43 Initial load
duke
parents:
diff changeset
   824
  return MLIB_SUCCESS;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   825
}
90ce3da70b43 Initial load
duke
parents:
diff changeset
   826
90ce3da70b43 Initial load
duke
parents:
diff changeset
   827
/***************************************************************/
90ce3da70b43 Initial load
duke
parents:
diff changeset
   828
mlib_status mlib_convMxN_8ext_mask(mlib_image       *dst,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   829
                                   const mlib_image *src,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   830
                                   mlib_s32         m,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   831
                                   mlib_s32         n,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   832
                                   mlib_s32         dx_l,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   833
                                   mlib_s32         dx_r,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   834
                                   mlib_s32         dy_t,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   835
                                   mlib_s32         dy_b,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   836
                                   const mlib_s32   *kern,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   837
                                   mlib_s32         scale,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   838
                                   mlib_s32         cmask)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   839
{
90ce3da70b43 Initial load
duke
parents:
diff changeset
   840
  mlib_d64 *buffs_local[3 * (MAX_N + 1)], **buffs = buffs_local, **buff;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   841
  mlib_d64 *buff0, *buff1, *buff2, *buff3, *buffn, *buffd, *buffe;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   842
  mlib_d64 s00, s01, s10, s11, s20, s21, s30, s31, s0, s1, s2, s3;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   843
  mlib_d64 d00, d01, d10, d11, d20, d21, d30, d31;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   844
  mlib_d64 dd, d0, d1;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   845
  mlib_s32 ik, jk, ik_last, jk_size, coff, off, doff;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   846
  mlib_u8 *sl, *dl;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   847
  mlib_s32 hgt = mlib_ImageGetHeight(src);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   848
  mlib_s32 wid = mlib_ImageGetWidth(src);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   849
  mlib_s32 sll = mlib_ImageGetStride(src);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   850
  mlib_s32 dll = mlib_ImageGetStride(dst);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   851
  mlib_u8 *adr_src = (mlib_u8 *) mlib_ImageGetData(src);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   852
  mlib_u8 *adr_dst = (mlib_u8 *) mlib_ImageGetData(dst);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   853
  mlib_s32 ssize, xsize, dsize, esize, buff_ind;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   854
  mlib_d64 *pbuff, *dp;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   855
  mlib_f32 *karr = (mlib_f32 *) kern;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   856
  mlib_s32 gsr_scale = (31 - scale) << 3;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   857
  mlib_d64 drnd = vis_to_double_dup(mlib_round_8[31 - scale]);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   858
  mlib_s32 i, j, l, chan, testchan;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   859
  mlib_s32 nchan = mlib_ImageGetChannels(dst);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   860
  void (*p_proc_load) (const mlib_u8 *, mlib_u8 *, mlib_s32, mlib_s32);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   861
  void (*p_proc_store) (const mlib_u8 *, mlib_u8 *, mlib_s32, mlib_s32);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   862
90ce3da70b43 Initial load
duke
parents:
diff changeset
   863
  if (n > MAX_N) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   864
    buffs = mlib_malloc(3 * (n + 1) * sizeof(mlib_d64 *));
90ce3da70b43 Initial load
duke
parents:
diff changeset
   865
90ce3da70b43 Initial load
duke
parents:
diff changeset
   866
    if (buffs == NULL)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   867
      return MLIB_FAILURE;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   868
  }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   869
90ce3da70b43 Initial load
duke
parents:
diff changeset
   870
  buff = buffs + 2 * (n + 1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   871
90ce3da70b43 Initial load
duke
parents:
diff changeset
   872
  ssize = (wid + (m - 1));
90ce3da70b43 Initial load
duke
parents:
diff changeset
   873
  dsize = (ssize + 7) / 8;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   874
  esize = dsize + 4;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   875
  pbuff = mlib_malloc((n + 4) * esize * sizeof(mlib_d64));
90ce3da70b43 Initial load
duke
parents:
diff changeset
   876
90ce3da70b43 Initial load
duke
parents:
diff changeset
   877
  if (pbuff == NULL) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   878
    if (buffs != buffs_local)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   879
      mlib_free(buffs);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   880
    return MLIB_FAILURE;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   881
  }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   882
90ce3da70b43 Initial load
duke
parents:
diff changeset
   883
  for (i = 0; i < (n + 1); i++)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   884
    buffs[i] = pbuff + i * esize;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   885
  for (i = 0; i < (n + 1); i++)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   886
    buffs[(n + 1) + i] = buffs[i];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   887
  buffd = buffs[n] + esize;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   888
  buffe = buffd + 2 * esize;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   889
90ce3da70b43 Initial load
duke
parents:
diff changeset
   890
  xsize = wid;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   891
  ssize -= (dx_l + dx_r);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   892
90ce3da70b43 Initial load
duke
parents:
diff changeset
   893
  vis_write_gsr(gsr_scale + 7);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   894
90ce3da70b43 Initial load
duke
parents:
diff changeset
   895
  if (nchan == 2) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   896
    p_proc_load = &mlib_v_ImageChannelExtract_U8_21_D1;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   897
    p_proc_store = &mlib_v_ImageChannelInsert_U8_12_D1;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   898
  }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   899
  else if (nchan == 3) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   900
    p_proc_load = &mlib_v_ImageChannelExtract_U8_31_D1;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   901
    p_proc_store = &mlib_v_ImageChannelInsert_U8_13_D1;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   902
  }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   903
  else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   904
    p_proc_load = &mlib_v_ImageChannelExtract_U8_41_D1;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   905
    p_proc_store = &mlib_v_ImageChannelInsert_U8_14_D1;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   906
  }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   907
90ce3da70b43 Initial load
duke
parents:
diff changeset
   908
  testchan = 1;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   909
  for (chan = 0; chan < nchan; chan++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   910
    buff_ind = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   911
    sl = adr_src;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   912
    dl = adr_dst;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   913
90ce3da70b43 Initial load
duke
parents:
diff changeset
   914
    if ((cmask & testchan) == 0) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   915
      testchan <<= 1;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   916
      continue;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   917
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   918
90ce3da70b43 Initial load
duke
parents:
diff changeset
   919
    for (l = 0; l < n; l++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   920
      mlib_d64 *buffn = buffs[l];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   921
90ce3da70b43 Initial load
duke
parents:
diff changeset
   922
      (*p_proc_load) ((mlib_u8 *) sl, (mlib_u8 *) buffn + dx_l, ssize, testchan);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   923
90ce3da70b43 Initial load
duke
parents:
diff changeset
   924
      for (i = 0; i < dx_l; i++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   925
        *((mlib_u8 *) buffn + i) = *((mlib_u8 *) buffn + dx_l);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   926
      }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   927
90ce3da70b43 Initial load
duke
parents:
diff changeset
   928
      for (i = 0; i < dx_r; i++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   929
        *((mlib_u8 *) buffn + i + ssize + dx_l) =
90ce3da70b43 Initial load
duke
parents:
diff changeset
   930
          *((mlib_u8 *) buffn + (dx_l - 1) + ssize);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   931
      }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   932
90ce3da70b43 Initial load
duke
parents:
diff changeset
   933
      if ((l >= dy_t) && (l < hgt + n - dy_b - 2))
90ce3da70b43 Initial load
duke
parents:
diff changeset
   934
        sl += sll;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   935
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   936
90ce3da70b43 Initial load
duke
parents:
diff changeset
   937
    /* init buffer */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   938
#pragma pipeloop(0)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   939
    for (i = 0; i < (xsize + 7) / 8; i++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   940
      buffd[2 * i] = drnd;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   941
      buffd[2 * i + 1] = drnd;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   942
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   943
90ce3da70b43 Initial load
duke
parents:
diff changeset
   944
    for (j = 0; j < hgt; j++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   945
      mlib_d64 **buffc = buffs + buff_ind;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   946
      mlib_f32 *pk = karr, k0, k1, k2, k3;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   947
90ce3da70b43 Initial load
duke
parents:
diff changeset
   948
      for (l = 0; l < n; l++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   949
        buff[l] = buffc[l];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   950
      }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   951
90ce3da70b43 Initial load
duke
parents:
diff changeset
   952
      buffn = buffc[n];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   953
90ce3da70b43 Initial load
duke
parents:
diff changeset
   954
      (*p_proc_load) ((mlib_u8 *) sl, (mlib_u8 *) buffn + dx_l, ssize, testchan);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   955
90ce3da70b43 Initial load
duke
parents:
diff changeset
   956
      for (i = 0; i < dx_l; i++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   957
        *((mlib_u8 *) buffn + i) = *((mlib_u8 *) buffn + dx_l);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   958
      }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   959
90ce3da70b43 Initial load
duke
parents:
diff changeset
   960
      for (i = 0; i < dx_r; i++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   961
        *((mlib_u8 *) buffn + i + ssize + dx_l) =
90ce3da70b43 Initial load
duke
parents:
diff changeset
   962
          *((mlib_u8 *) buffn + (dx_l - 1) + ssize);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   963
      }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   964
90ce3da70b43 Initial load
duke
parents:
diff changeset
   965
      ik_last = (m - 1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   966
90ce3da70b43 Initial load
duke
parents:
diff changeset
   967
      for (jk = 0; jk < n; jk += jk_size) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   968
        jk_size = n - jk;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   969
90ce3da70b43 Initial load
duke
parents:
diff changeset
   970
        if (jk_size >= 6)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   971
          jk_size = 4;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   972
        if (jk_size == 5)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   973
          jk_size = 3;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   974
90ce3da70b43 Initial load
duke
parents:
diff changeset
   975
        coff = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   976
90ce3da70b43 Initial load
duke
parents:
diff changeset
   977
        if (jk_size == 1) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   978
90ce3da70b43 Initial load
duke
parents:
diff changeset
   979
          for (ik = 0; ik < m; ik++, coff++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   980
            if (!jk && ik == ik_last)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   981
              continue;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   982
90ce3da70b43 Initial load
duke
parents:
diff changeset
   983
            k0 = pk[ik];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   984
90ce3da70b43 Initial load
duke
parents:
diff changeset
   985
            doff = coff / 8;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   986
            buff0 = buff[jk] + doff;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   987
90ce3da70b43 Initial load
duke
parents:
diff changeset
   988
            off = coff & 7;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   989
            vis_write_gsr(gsr_scale + off);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   990
90ce3da70b43 Initial load
duke
parents:
diff changeset
   991
            s01 = buff0[0];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   992
#pragma pipeloop(0)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   993
            for (i = 0; i < (xsize + 7) / 8; i++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   994
              s00 = s01;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   995
              s01 = buff0[i + 1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   996
              s0 = vis_faligndata(s00, s01);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   997
90ce3da70b43 Initial load
duke
parents:
diff changeset
   998
              d00 = vis_fmul8x16au(vis_read_hi(s0), k0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   999
              d01 = vis_fmul8x16au(vis_read_lo(s0), k0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1000
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1001
              d0 = buffd[2 * i];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1002
              d1 = buffd[2 * i + 1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1003
              d0 = vis_fpadd16(d00, d0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1004
              d1 = vis_fpadd16(d01, d1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1005
              buffd[2 * i] = d0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1006
              buffd[2 * i + 1] = d1;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1007
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1008
          }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1009
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1010
          pk += m;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1011
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1012
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1013
        else if (jk_size == 2) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1014
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1015
          for (ik = 0; ik < m; ik++, coff++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1016
            if (!jk && ik == ik_last)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1017
              continue;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1018
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1019
            k0 = pk[ik];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1020
            k1 = pk[ik + m];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1021
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1022
            doff = coff / 8;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1023
            buff0 = buff[jk] + doff;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1024
            buff1 = buff[jk + 1] + doff;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1025
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1026
            off = coff & 7;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1027
            vis_write_gsr(gsr_scale + off);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1028
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1029
            s01 = buff0[0];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1030
            s11 = buff1[0];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1031
#pragma pipeloop(0)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1032
            for (i = 0; i < (xsize + 7) / 8; i++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1033
              s00 = s01;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1034
              s10 = s11;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1035
              s01 = buff0[i + 1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1036
              s11 = buff1[i + 1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1037
              s0 = vis_faligndata(s00, s01);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1038
              s1 = vis_faligndata(s10, s11);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1039
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1040
              d00 = vis_fmul8x16au(vis_read_hi(s0), k0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1041
              d01 = vis_fmul8x16au(vis_read_lo(s0), k0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1042
              d10 = vis_fmul8x16au(vis_read_hi(s1), k1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1043
              d11 = vis_fmul8x16au(vis_read_lo(s1), k1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1044
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1045
              d0 = buffd[2 * i];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1046
              d1 = buffd[2 * i + 1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1047
              d0 = vis_fpadd16(d00, d0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1048
              d0 = vis_fpadd16(d10, d0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1049
              d1 = vis_fpadd16(d01, d1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1050
              d1 = vis_fpadd16(d11, d1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1051
              buffd[2 * i] = d0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1052
              buffd[2 * i + 1] = d1;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1053
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1054
          }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1055
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1056
          pk += 2 * m;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1057
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1058
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1059
        else if (jk_size == 3) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1060
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1061
          for (ik = 0; ik < m; ik++, coff++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1062
            if (!jk && ik == ik_last)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1063
              continue;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1064
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1065
            k0 = pk[ik];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1066
            k1 = pk[ik + m];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1067
            k2 = pk[ik + 2 * m];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1068
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1069
            doff = coff / 8;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1070
            buff0 = buff[jk] + doff;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1071
            buff1 = buff[jk + 1] + doff;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1072
            buff2 = buff[jk + 2] + doff;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1073
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1074
            off = coff & 7;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1075
            vis_write_gsr(gsr_scale + off);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1076
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1077
            if (off == 0) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1078
#pragma pipeloop(0)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1079
              for (i = 0; i < (xsize + 7) / 8; i++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1080
                d0 = buffd[2 * i];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1081
                d1 = buffd[2 * i + 1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1082
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1083
                s0 = buff0[i];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1084
                s1 = buff1[i];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1085
                s2 = buff2[i];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1086
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1087
                d00 = vis_fmul8x16au(vis_read_hi(s0), k0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1088
                d01 = vis_fmul8x16au(vis_read_lo(s0), k0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1089
                d10 = vis_fmul8x16au(vis_read_hi(s1), k1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1090
                d11 = vis_fmul8x16au(vis_read_lo(s1), k1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1091
                d20 = vis_fmul8x16au(vis_read_hi(s2), k2);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1092
                d21 = vis_fmul8x16au(vis_read_lo(s2), k2);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1093
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1094
                d00 = vis_fpadd16(d00, d10);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1095
                d0 = vis_fpadd16(d20, d0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1096
                d0 = vis_fpadd16(d00, d0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1097
                d01 = vis_fpadd16(d01, d11);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1098
                d1 = vis_fpadd16(d21, d1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1099
                d1 = vis_fpadd16(d01, d1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1100
                buffd[2 * i] = d0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1101
                buffd[2 * i + 1] = d1;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1102
              }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1103
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1104
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1105
            else if (off == 4) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1106
              s01 = buff0[0];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1107
              s11 = buff1[0];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1108
              s21 = buff2[0];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1109
#pragma pipeloop(0)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1110
              for (i = 0; i < (xsize + 7) / 8; i++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1111
                d0 = buffd[2 * i];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1112
                d1 = buffd[2 * i + 1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1113
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1114
                s00 = s01;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1115
                s10 = s11;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1116
                s20 = s21;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1117
                s01 = buff0[i + 1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1118
                s11 = buff1[i + 1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1119
                s21 = buff2[i + 1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1120
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1121
                d00 = vis_fmul8x16au(vis_read_lo(s00), k0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1122
                d01 = vis_fmul8x16au(vis_read_hi(s01), k0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1123
                d10 = vis_fmul8x16au(vis_read_lo(s10), k1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1124
                d11 = vis_fmul8x16au(vis_read_hi(s11), k1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1125
                d20 = vis_fmul8x16au(vis_read_lo(s20), k2);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1126
                d21 = vis_fmul8x16au(vis_read_hi(s21), k2);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1127
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1128
                d00 = vis_fpadd16(d00, d10);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1129
                d0 = vis_fpadd16(d20, d0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1130
                d0 = vis_fpadd16(d00, d0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1131
                d01 = vis_fpadd16(d01, d11);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1132
                d1 = vis_fpadd16(d21, d1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1133
                d1 = vis_fpadd16(d01, d1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1134
                buffd[2 * i] = d0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1135
                buffd[2 * i + 1] = d1;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1136
              }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1137
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1138
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1139
            else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1140
              s01 = buff0[0];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1141
              s11 = buff1[0];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1142
              s21 = buff2[0];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1143
#pragma pipeloop(0)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1144
              for (i = 0; i < (xsize + 7) / 8; i++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1145
                d0 = buffd[2 * i];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1146
                d1 = buffd[2 * i + 1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1147
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1148
                s00 = s01;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1149
                s10 = s11;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1150
                s20 = s21;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1151
                s01 = buff0[i + 1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1152
                s11 = buff1[i + 1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1153
                s21 = buff2[i + 1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1154
                s0 = vis_faligndata(s00, s01);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1155
                s1 = vis_faligndata(s10, s11);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1156
                s2 = vis_faligndata(s20, s21);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1157
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1158
                d00 = vis_fmul8x16au(vis_read_hi(s0), k0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1159
                d01 = vis_fmul8x16au(vis_read_lo(s0), k0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1160
                d10 = vis_fmul8x16au(vis_read_hi(s1), k1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1161
                d11 = vis_fmul8x16au(vis_read_lo(s1), k1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1162
                d20 = vis_fmul8x16au(vis_read_hi(s2), k2);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1163
                d21 = vis_fmul8x16au(vis_read_lo(s2), k2);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1164
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1165
                d00 = vis_fpadd16(d00, d10);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1166
                d0 = vis_fpadd16(d20, d0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1167
                d0 = vis_fpadd16(d00, d0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1168
                d01 = vis_fpadd16(d01, d11);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1169
                d1 = vis_fpadd16(d21, d1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1170
                d1 = vis_fpadd16(d01, d1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1171
                buffd[2 * i] = d0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1172
                buffd[2 * i + 1] = d1;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1173
              }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1174
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1175
          }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1176
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1177
          pk += 3 * m;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1178
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1179
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1180
        else {                              /* jk_size == 4 */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1181
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1182
          for (ik = 0; ik < m; ik++, coff++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1183
            if (!jk && ik == ik_last)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1184
              continue;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1185
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1186
            k0 = pk[ik];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1187
            k1 = pk[ik + m];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1188
            k2 = pk[ik + 2 * m];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1189
            k3 = pk[ik + 3 * m];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1190
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1191
            doff = coff / 8;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1192
            buff0 = buff[jk] + doff;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1193
            buff1 = buff[jk + 1] + doff;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1194
            buff2 = buff[jk + 2] + doff;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1195
            buff3 = buff[jk + 3] + doff;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1196
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1197
            off = coff & 7;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1198
            vis_write_gsr(gsr_scale + off);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1199
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1200
            if (off == 0) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1201
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1202
#pragma pipeloop(0)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1203
              for (i = 0; i < (xsize + 7) / 8; i++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1204
                d0 = buffd[2 * i];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1205
                d1 = buffd[2 * i + 1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1206
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1207
                s0 = buff0[i];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1208
                s1 = buff1[i];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1209
                s2 = buff2[i];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1210
                s3 = buff3[i];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1211
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1212
                d00 = vis_fmul8x16au(vis_read_hi(s0), k0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1213
                d01 = vis_fmul8x16au(vis_read_lo(s0), k0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1214
                d10 = vis_fmul8x16au(vis_read_hi(s1), k1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1215
                d11 = vis_fmul8x16au(vis_read_lo(s1), k1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1216
                d20 = vis_fmul8x16au(vis_read_hi(s2), k2);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1217
                d21 = vis_fmul8x16au(vis_read_lo(s2), k2);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1218
                d30 = vis_fmul8x16au(vis_read_hi(s3), k3);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1219
                d31 = vis_fmul8x16au(vis_read_lo(s3), k3);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1220
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1221
                d00 = vis_fpadd16(d00, d10);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1222
                d20 = vis_fpadd16(d20, d30);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1223
                d0 = vis_fpadd16(d0, d00);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1224
                d0 = vis_fpadd16(d0, d20);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1225
                d01 = vis_fpadd16(d01, d11);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1226
                d21 = vis_fpadd16(d21, d31);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1227
                d1 = vis_fpadd16(d1, d01);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1228
                d1 = vis_fpadd16(d1, d21);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1229
                buffd[2 * i] = d0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1230
                buffd[2 * i + 1] = d1;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1231
              }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1232
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1233
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1234
            else if (off == 4) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1235
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1236
              s01 = buff0[0];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1237
              s11 = buff1[0];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1238
              s21 = buff2[0];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1239
              s31 = buff3[0];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1240
#pragma pipeloop(0)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1241
              for (i = 0; i < (xsize + 7) / 8; i++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1242
                d0 = buffd[2 * i];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1243
                d1 = buffd[2 * i + 1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1244
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1245
                s00 = s01;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1246
                s10 = s11;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1247
                s20 = s21;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1248
                s30 = s31;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1249
                s01 = buff0[i + 1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1250
                s11 = buff1[i + 1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1251
                s21 = buff2[i + 1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1252
                s31 = buff3[i + 1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1253
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1254
                d00 = vis_fmul8x16au(vis_read_lo(s00), k0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1255
                d01 = vis_fmul8x16au(vis_read_hi(s01), k0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1256
                d10 = vis_fmul8x16au(vis_read_lo(s10), k1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1257
                d11 = vis_fmul8x16au(vis_read_hi(s11), k1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1258
                d20 = vis_fmul8x16au(vis_read_lo(s20), k2);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1259
                d21 = vis_fmul8x16au(vis_read_hi(s21), k2);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1260
                d30 = vis_fmul8x16au(vis_read_lo(s30), k3);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1261
                d31 = vis_fmul8x16au(vis_read_hi(s31), k3);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1262
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1263
                d00 = vis_fpadd16(d00, d10);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1264
                d20 = vis_fpadd16(d20, d30);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1265
                d0 = vis_fpadd16(d0, d00);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1266
                d0 = vis_fpadd16(d0, d20);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1267
                d01 = vis_fpadd16(d01, d11);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1268
                d21 = vis_fpadd16(d21, d31);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1269
                d1 = vis_fpadd16(d1, d01);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1270
                d1 = vis_fpadd16(d1, d21);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1271
                buffd[2 * i] = d0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1272
                buffd[2 * i + 1] = d1;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1273
              }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1274
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1275
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1276
            else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1277
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1278
              s01 = buff0[0];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1279
              s11 = buff1[0];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1280
              s21 = buff2[0];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1281
              s31 = buff3[0];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1282
#pragma pipeloop(0)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1283
              for (i = 0; i < (xsize + 7) / 8; i++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1284
                d0 = buffd[2 * i];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1285
                d1 = buffd[2 * i + 1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1286
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1287
                s00 = s01;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1288
                s10 = s11;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1289
                s20 = s21;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1290
                s30 = s31;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1291
                s01 = buff0[i + 1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1292
                s11 = buff1[i + 1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1293
                s21 = buff2[i + 1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1294
                s31 = buff3[i + 1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1295
                s0 = vis_faligndata(s00, s01);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1296
                s1 = vis_faligndata(s10, s11);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1297
                s2 = vis_faligndata(s20, s21);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1298
                s3 = vis_faligndata(s30, s31);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1299
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1300
                d00 = vis_fmul8x16au(vis_read_hi(s0), k0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1301
                d01 = vis_fmul8x16au(vis_read_lo(s0), k0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1302
                d10 = vis_fmul8x16au(vis_read_hi(s1), k1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1303
                d11 = vis_fmul8x16au(vis_read_lo(s1), k1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1304
                d20 = vis_fmul8x16au(vis_read_hi(s2), k2);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1305
                d21 = vis_fmul8x16au(vis_read_lo(s2), k2);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1306
                d30 = vis_fmul8x16au(vis_read_hi(s3), k3);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1307
                d31 = vis_fmul8x16au(vis_read_lo(s3), k3);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1308
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1309
                d00 = vis_fpadd16(d00, d10);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1310
                d20 = vis_fpadd16(d20, d30);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1311
                d0 = vis_fpadd16(d0, d00);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1312
                d0 = vis_fpadd16(d0, d20);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1313
                d01 = vis_fpadd16(d01, d11);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1314
                d21 = vis_fpadd16(d21, d31);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1315
                d1 = vis_fpadd16(d1, d01);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1316
                d1 = vis_fpadd16(d1, d21);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1317
                buffd[2 * i] = d0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1318
                buffd[2 * i + 1] = d1;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1319
              }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1320
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1321
          }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1322
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1323
          pk += 4 * m;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1324
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1325
      }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1326
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1327
      /*****************************************
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1328
       *****************************************
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1329
       **          Final iteration            **
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1330
       *****************************************
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1331
       *****************************************/
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1332
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1333
      jk_size = n;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1334
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1335
      if (jk_size >= 6)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1336
        jk_size = 4;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1337
      if (jk_size == 5)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1338
        jk_size = 3;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1339
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1340
      k0 = karr[ik_last];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1341
      k1 = karr[ik_last + m];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1342
      k2 = karr[ik_last + 2 * m];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1343
      k3 = karr[ik_last + 3 * m];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1344
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1345
      off = ik_last;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1346
      doff = off / 8;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1347
      off &= 7;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1348
      buff0 = buff[0] + doff;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1349
      buff1 = buff[1] + doff;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1350
      buff2 = buff[2] + doff;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1351
      buff3 = buff[3] + doff;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1352
      vis_write_gsr(gsr_scale + off);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1353
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1354
      if (jk_size == 1) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1355
        dp = buffe;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1356
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1357
        s01 = buff0[0];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1358
#pragma pipeloop(0)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1359
        for (i = 0; i < (xsize + 7) / 8; i++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1360
          s00 = s01;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1361
          s01 = buff0[i + 1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1362
          s0 = vis_faligndata(s00, s01);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1363
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1364
          d00 = vis_fmul8x16au(vis_read_hi(s0), k0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1365
          d01 = vis_fmul8x16au(vis_read_lo(s0), k0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1366
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1367
          d0 = buffd[2 * i];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1368
          d1 = buffd[2 * i + 1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1369
          d0 = vis_fpadd16(d0, d00);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1370
          d1 = vis_fpadd16(d1, d01);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1371
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1372
          dd = vis_fpack16_pair(d0, d1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1373
          dp[i] = dd;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1374
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1375
          buffd[2 * i] = drnd;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1376
          buffd[2 * i + 1] = drnd;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1377
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1378
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1379
      }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1380
      else if (jk_size == 2) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1381
        dp = buffe;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1382
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1383
        s01 = buff0[0];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1384
        s11 = buff1[0];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1385
#pragma pipeloop(0)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1386
        for (i = 0; i < (xsize + 7) / 8; i++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1387
          s00 = s01;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1388
          s10 = s11;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1389
          s01 = buff0[i + 1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1390
          s11 = buff1[i + 1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1391
          s0 = vis_faligndata(s00, s01);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1392
          s1 = vis_faligndata(s10, s11);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1393
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1394
          d00 = vis_fmul8x16au(vis_read_hi(s0), k0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1395
          d01 = vis_fmul8x16au(vis_read_lo(s0), k0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1396
          d10 = vis_fmul8x16au(vis_read_hi(s1), k1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1397
          d11 = vis_fmul8x16au(vis_read_lo(s1), k1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1398
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1399
          d0 = buffd[2 * i];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1400
          d1 = buffd[2 * i + 1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1401
          d0 = vis_fpadd16(d0, d00);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1402
          d0 = vis_fpadd16(d0, d10);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1403
          d1 = vis_fpadd16(d1, d01);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1404
          d1 = vis_fpadd16(d1, d11);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1405
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1406
          dd = vis_fpack16_pair(d0, d1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1407
          dp[i] = dd;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1408
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1409
          buffd[2 * i] = drnd;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1410
          buffd[2 * i + 1] = drnd;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1411
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1412
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1413
      }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1414
      else if (jk_size == 3) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1415
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1416
        dp = buffe;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1417
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1418
        s01 = buff0[0];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1419
        s11 = buff1[0];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1420
        s21 = buff2[0];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1421
#pragma pipeloop(0)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1422
        for (i = 0; i < (xsize + 7) / 8; i++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1423
          s00 = s01;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1424
          s10 = s11;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1425
          s20 = s21;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1426
          s01 = buff0[i + 1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1427
          s11 = buff1[i + 1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1428
          s21 = buff2[i + 1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1429
          s0 = vis_faligndata(s00, s01);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1430
          s1 = vis_faligndata(s10, s11);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1431
          s2 = vis_faligndata(s20, s21);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1432
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1433
          d00 = vis_fmul8x16au(vis_read_hi(s0), k0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1434
          d01 = vis_fmul8x16au(vis_read_lo(s0), k0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1435
          d10 = vis_fmul8x16au(vis_read_hi(s1), k1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1436
          d11 = vis_fmul8x16au(vis_read_lo(s1), k1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1437
          d20 = vis_fmul8x16au(vis_read_hi(s2), k2);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1438
          d21 = vis_fmul8x16au(vis_read_lo(s2), k2);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1439
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1440
          d0 = buffd[2 * i];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1441
          d1 = buffd[2 * i + 1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1442
          d0 = vis_fpadd16(d0, d00);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1443
          d0 = vis_fpadd16(d0, d10);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1444
          d0 = vis_fpadd16(d0, d20);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1445
          d1 = vis_fpadd16(d1, d01);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1446
          d1 = vis_fpadd16(d1, d11);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1447
          d1 = vis_fpadd16(d1, d21);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1448
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1449
          dd = vis_fpack16_pair(d0, d1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1450
          dp[i] = dd;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1451
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1452
          buffd[2 * i] = drnd;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1453
          buffd[2 * i + 1] = drnd;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1454
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1455
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1456
      }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1457
      else {                                /* if (jk_size == 4) */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1458
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1459
        dp = buffe;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1460
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1461
        s01 = buff0[0];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1462
        s11 = buff1[0];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1463
        s21 = buff2[0];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1464
        s31 = buff3[0];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1465
#pragma pipeloop(0)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1466
        for (i = 0; i < (xsize + 7) / 8; i++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1467
          s00 = s01;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1468
          s10 = s11;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1469
          s20 = s21;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1470
          s30 = s31;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1471
          s01 = buff0[i + 1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1472
          s11 = buff1[i + 1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1473
          s21 = buff2[i + 1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1474
          s31 = buff3[i + 1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1475
          s0 = vis_faligndata(s00, s01);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1476
          s1 = vis_faligndata(s10, s11);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1477
          s2 = vis_faligndata(s20, s21);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1478
          s3 = vis_faligndata(s30, s31);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1479
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1480
          d00 = vis_fmul8x16au(vis_read_hi(s0), k0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1481
          d01 = vis_fmul8x16au(vis_read_lo(s0), k0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1482
          d10 = vis_fmul8x16au(vis_read_hi(s1), k1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1483
          d11 = vis_fmul8x16au(vis_read_lo(s1), k1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1484
          d20 = vis_fmul8x16au(vis_read_hi(s2), k2);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1485
          d21 = vis_fmul8x16au(vis_read_lo(s2), k2);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1486
          d30 = vis_fmul8x16au(vis_read_hi(s3), k3);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1487
          d31 = vis_fmul8x16au(vis_read_lo(s3), k3);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1488
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1489
          d0 = buffd[2 * i];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1490
          d1 = buffd[2 * i + 1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1491
          d0 = vis_fpadd16(d0, d00);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1492
          d0 = vis_fpadd16(d0, d10);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1493
          d0 = vis_fpadd16(d0, d20);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1494
          d0 = vis_fpadd16(d0, d30);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1495
          d1 = vis_fpadd16(d1, d01);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1496
          d1 = vis_fpadd16(d1, d11);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1497
          d1 = vis_fpadd16(d1, d21);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1498
          d1 = vis_fpadd16(d1, d31);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1499
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1500
          dd = vis_fpack16_pair(d0, d1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1501
          dp[i] = dd;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1502
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1503
          buffd[2 * i] = drnd;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1504
          buffd[2 * i + 1] = drnd;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1505
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1506
      }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1507
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1508
      (*p_proc_store) ((mlib_u8 *) buffe, (mlib_u8 *) dl, xsize, testchan);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1509
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1510
      if (j < hgt - dy_b - 2)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1511
        sl += sll;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1512
      dl += dll;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1513
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1514
      buff_ind++;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1515
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1516
      if (buff_ind >= (n + 1))
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1517
        buff_ind = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1518
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1519
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1520
    testchan <<= 1;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1521
  }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1522
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1523
  mlib_free(pbuff);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1524
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1525
  if (buffs != buffs_local)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1526
    mlib_free(buffs);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1527
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1528
  return MLIB_SUCCESS;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1529
}
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1530
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1531
/***************************************************************/