jdk/src/solaris/native/sun/awt/medialib/mlib_v_ImageConvIndex3_8_16nw.c
author duke
Sat, 01 Dec 2007 00:00:00 +0000
changeset 2 90ce3da70b43
child 5506 202f599c92aa
permissions -rw-r--r--
Initial load
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
     1
/*
90ce3da70b43 Initial load
duke
parents:
diff changeset
     2
 * Copyright 2000-2003 Sun Microsystems, Inc.  All Rights Reserved.
90ce3da70b43 Initial load
duke
parents:
diff changeset
     3
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
90ce3da70b43 Initial load
duke
parents:
diff changeset
     4
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
     5
 * This code is free software; you can redistribute it and/or modify it
90ce3da70b43 Initial load
duke
parents:
diff changeset
     6
 * under the terms of the GNU General Public License version 2 only, as
90ce3da70b43 Initial load
duke
parents:
diff changeset
     7
 * published by the Free Software Foundation.  Sun designates this
90ce3da70b43 Initial load
duke
parents:
diff changeset
     8
 * particular file as subject to the "Classpath" exception as provided
90ce3da70b43 Initial load
duke
parents:
diff changeset
     9
 * by Sun in the LICENSE file that accompanied this code.
90ce3da70b43 Initial load
duke
parents:
diff changeset
    10
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
    11
 * This code is distributed in the hope that it will be useful, but WITHOUT
90ce3da70b43 Initial load
duke
parents:
diff changeset
    12
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
90ce3da70b43 Initial load
duke
parents:
diff changeset
    13
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
90ce3da70b43 Initial load
duke
parents:
diff changeset
    14
 * version 2 for more details (a copy is included in the LICENSE file that
90ce3da70b43 Initial load
duke
parents:
diff changeset
    15
 * accompanied this code).
90ce3da70b43 Initial load
duke
parents:
diff changeset
    16
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
    17
 * You should have received a copy of the GNU General Public License version
90ce3da70b43 Initial load
duke
parents:
diff changeset
    18
 * 2 along with this work; if not, write to the Free Software Foundation,
90ce3da70b43 Initial load
duke
parents:
diff changeset
    19
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
90ce3da70b43 Initial load
duke
parents:
diff changeset
    20
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
    21
 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
90ce3da70b43 Initial load
duke
parents:
diff changeset
    22
 * CA 95054 USA or visit www.sun.com if you need additional information or
90ce3da70b43 Initial load
duke
parents:
diff changeset
    23
 * have any questions.
90ce3da70b43 Initial load
duke
parents:
diff changeset
    24
 */
90ce3da70b43 Initial load
duke
parents:
diff changeset
    25
90ce3da70b43 Initial load
duke
parents:
diff changeset
    26
90ce3da70b43 Initial load
duke
parents:
diff changeset
    27
90ce3da70b43 Initial load
duke
parents:
diff changeset
    28
/*
90ce3da70b43 Initial load
duke
parents:
diff changeset
    29
 * FUNCTION
90ce3da70b43 Initial load
duke
parents:
diff changeset
    30
 *      Internal functions for mlib_ImageConv* on U8 type
90ce3da70b43 Initial load
duke
parents:
diff changeset
    31
 *      and MLIB_EDGE_DST_NO_WRITE mask
90ce3da70b43 Initial load
duke
parents:
diff changeset
    32
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
    33
 */
90ce3da70b43 Initial load
duke
parents:
diff changeset
    34
90ce3da70b43 Initial load
duke
parents:
diff changeset
    35
/***************************************************************/
90ce3da70b43 Initial load
duke
parents:
diff changeset
    36
90ce3da70b43 Initial load
duke
parents:
diff changeset
    37
#include <vis_proto.h>
90ce3da70b43 Initial load
duke
parents:
diff changeset
    38
#include <mlib_image.h>
90ce3da70b43 Initial load
duke
parents:
diff changeset
    39
#include <mlib_ImageCheck.h>
90ce3da70b43 Initial load
duke
parents:
diff changeset
    40
#include <mlib_ImageColormap.h>
90ce3da70b43 Initial load
duke
parents:
diff changeset
    41
90ce3da70b43 Initial load
duke
parents:
diff changeset
    42
/*
90ce3da70b43 Initial load
duke
parents:
diff changeset
    43
  This defines switches between functions in
90ce3da70b43 Initial load
duke
parents:
diff changeset
    44
  files: mlib_v_ImageConv_8nw.c,
90ce3da70b43 Initial load
duke
parents:
diff changeset
    45
         mlib_v_ImageConvIndex3_8_16nw.c,
90ce3da70b43 Initial load
duke
parents:
diff changeset
    46
         mlib_v_ImageConvIndex4_8_16nw.c,
90ce3da70b43 Initial load
duke
parents:
diff changeset
    47
         mlib_v_ImageConvIndex3_8_16nw.c,
90ce3da70b43 Initial load
duke
parents:
diff changeset
    48
         mlib_v_ImageConvIndex4_8_16nw.c
90ce3da70b43 Initial load
duke
parents:
diff changeset
    49
*/
90ce3da70b43 Initial load
duke
parents:
diff changeset
    50
90ce3da70b43 Initial load
duke
parents:
diff changeset
    51
#define CONV_INDEX
90ce3da70b43 Initial load
duke
parents:
diff changeset
    52
90ce3da70b43 Initial load
duke
parents:
diff changeset
    53
#define DTYPE mlib_s16
90ce3da70b43 Initial load
duke
parents:
diff changeset
    54
#define LTYPE mlib_u8
90ce3da70b43 Initial load
duke
parents:
diff changeset
    55
90ce3da70b43 Initial load
duke
parents:
diff changeset
    56
/***************************************************************/
90ce3da70b43 Initial load
duke
parents:
diff changeset
    57
90ce3da70b43 Initial load
duke
parents:
diff changeset
    58
#ifdef CONV_INDEX
90ce3da70b43 Initial load
duke
parents:
diff changeset
    59
90ce3da70b43 Initial load
duke
parents:
diff changeset
    60
#define CONV_FUNC(KERN)                                         \
90ce3da70b43 Initial load
duke
parents:
diff changeset
    61
  mlib_conv##KERN##_Index3_8_16nw(mlib_image *dst,              \
90ce3da70b43 Initial load
duke
parents:
diff changeset
    62
                                  mlib_image *src,              \
90ce3da70b43 Initial load
duke
parents:
diff changeset
    63
                                  mlib_s32   *kern,             \
90ce3da70b43 Initial load
duke
parents:
diff changeset
    64
                                  mlib_s32   scale,             \
90ce3da70b43 Initial load
duke
parents:
diff changeset
    65
                                  void       *colormap)
90ce3da70b43 Initial load
duke
parents:
diff changeset
    66
90ce3da70b43 Initial load
duke
parents:
diff changeset
    67
#else
90ce3da70b43 Initial load
duke
parents:
diff changeset
    68
90ce3da70b43 Initial load
duke
parents:
diff changeset
    69
#define CONV_FUNC(KERN)                         \
90ce3da70b43 Initial load
duke
parents:
diff changeset
    70
  mlib_conv##KERN##_8nw_f(mlib_image *dst,      \
90ce3da70b43 Initial load
duke
parents:
diff changeset
    71
                          mlib_image *src,      \
90ce3da70b43 Initial load
duke
parents:
diff changeset
    72
                          mlib_s32   *kern,     \
90ce3da70b43 Initial load
duke
parents:
diff changeset
    73
                          mlib_s32   scale)
90ce3da70b43 Initial load
duke
parents:
diff changeset
    74
90ce3da70b43 Initial load
duke
parents:
diff changeset
    75
#endif
90ce3da70b43 Initial load
duke
parents:
diff changeset
    76
90ce3da70b43 Initial load
duke
parents:
diff changeset
    77
/***************************************************************/
90ce3da70b43 Initial load
duke
parents:
diff changeset
    78
90ce3da70b43 Initial load
duke
parents:
diff changeset
    79
#ifdef CONV_INDEX
90ce3da70b43 Initial load
duke
parents:
diff changeset
    80
90ce3da70b43 Initial load
duke
parents:
diff changeset
    81
#define NCHAN  3
90ce3da70b43 Initial load
duke
parents:
diff changeset
    82
90ce3da70b43 Initial load
duke
parents:
diff changeset
    83
#else
90ce3da70b43 Initial load
duke
parents:
diff changeset
    84
90ce3da70b43 Initial load
duke
parents:
diff changeset
    85
#define NCHAN  nchan
90ce3da70b43 Initial load
duke
parents:
diff changeset
    86
90ce3da70b43 Initial load
duke
parents:
diff changeset
    87
#endif
90ce3da70b43 Initial load
duke
parents:
diff changeset
    88
90ce3da70b43 Initial load
duke
parents:
diff changeset
    89
/***************************************************************/
90ce3da70b43 Initial load
duke
parents:
diff changeset
    90
90ce3da70b43 Initial load
duke
parents:
diff changeset
    91
#define DEF_VARS                                                \
90ce3da70b43 Initial load
duke
parents:
diff changeset
    92
  DTYPE    *sl, *sp, *dl;                                       \
90ce3da70b43 Initial load
duke
parents:
diff changeset
    93
  mlib_s32 hgt = mlib_ImageGetHeight(src);                      \
90ce3da70b43 Initial load
duke
parents:
diff changeset
    94
  mlib_s32 wid = mlib_ImageGetWidth(src);                       \
90ce3da70b43 Initial load
duke
parents:
diff changeset
    95
  mlib_s32 sll = mlib_ImageGetStride(src) / sizeof(DTYPE);      \
90ce3da70b43 Initial load
duke
parents:
diff changeset
    96
  mlib_s32 dll = mlib_ImageGetStride(dst) / sizeof(DTYPE);      \
90ce3da70b43 Initial load
duke
parents:
diff changeset
    97
  DTYPE    *adr_src = (DTYPE *)mlib_ImageGetData(src);          \
90ce3da70b43 Initial load
duke
parents:
diff changeset
    98
  DTYPE    *adr_dst = (DTYPE *)mlib_ImageGetData(dst);          \
90ce3da70b43 Initial load
duke
parents:
diff changeset
    99
  mlib_s32 ssize, xsize, dsize, esize, emask, buff_ind = 0;     \
90ce3da70b43 Initial load
duke
parents:
diff changeset
   100
  mlib_d64 *pbuff, *dp;                                         \
90ce3da70b43 Initial load
duke
parents:
diff changeset
   101
  mlib_f32 *karr = (mlib_f32 *)kern;                            \
90ce3da70b43 Initial load
duke
parents:
diff changeset
   102
  mlib_s32 gsr_scale = (31 - scale) << 3;                       \
90ce3da70b43 Initial load
duke
parents:
diff changeset
   103
  mlib_d64 drnd = vis_to_double_dup(mlib_round_8[31 - scale]);  \
90ce3da70b43 Initial load
duke
parents:
diff changeset
   104
  mlib_s32 i, j, l
90ce3da70b43 Initial load
duke
parents:
diff changeset
   105
90ce3da70b43 Initial load
duke
parents:
diff changeset
   106
/***************************************************************/
90ce3da70b43 Initial load
duke
parents:
diff changeset
   107
90ce3da70b43 Initial load
duke
parents:
diff changeset
   108
#ifdef CONV_INDEX
90ce3da70b43 Initial load
duke
parents:
diff changeset
   109
90ce3da70b43 Initial load
duke
parents:
diff changeset
   110
#define DEF_EXTRA_VARS                                                  \
90ce3da70b43 Initial load
duke
parents:
diff changeset
   111
  int    offset = mlib_ImageGetLutOffset(colormap);                     \
90ce3da70b43 Initial load
duke
parents:
diff changeset
   112
  LTYPE  **lut_table = (LTYPE**)mlib_ImageGetLutData(colormap);         \
90ce3da70b43 Initial load
duke
parents:
diff changeset
   113
  LTYPE  *ltbl0 = lut_table[0] - offset;                                \
90ce3da70b43 Initial load
duke
parents:
diff changeset
   114
  LTYPE  *ltbl1 = lut_table[1] - offset;                                \
90ce3da70b43 Initial load
duke
parents:
diff changeset
   115
  LTYPE  *ltbl2 = lut_table[2] - offset;                                \
90ce3da70b43 Initial load
duke
parents:
diff changeset
   116
  LTYPE  *ltbl3 = (NCHAN > 3) ? lut_table[3] - offset : ltbl2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   117
90ce3da70b43 Initial load
duke
parents:
diff changeset
   118
#else
90ce3da70b43 Initial load
duke
parents:
diff changeset
   119
90ce3da70b43 Initial load
duke
parents:
diff changeset
   120
#define DEF_EXTRA_VARS                          \
90ce3da70b43 Initial load
duke
parents:
diff changeset
   121
  mlib_s32 nchan = mlib_ImageGetChannels(dst)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   122
90ce3da70b43 Initial load
duke
parents:
diff changeset
   123
#endif
90ce3da70b43 Initial load
duke
parents:
diff changeset
   124
90ce3da70b43 Initial load
duke
parents:
diff changeset
   125
/***************************************************************/
90ce3da70b43 Initial load
duke
parents:
diff changeset
   126
90ce3da70b43 Initial load
duke
parents:
diff changeset
   127
#if NCHAN == 3
90ce3da70b43 Initial load
duke
parents:
diff changeset
   128
90ce3da70b43 Initial load
duke
parents:
diff changeset
   129
#define LOAD_SRC() {                                            \
90ce3da70b43 Initial load
duke
parents:
diff changeset
   130
    mlib_s32 s0 = sp[0], s1 = sp[1], s2 = sp[2], s3 = sp[3];    \
90ce3da70b43 Initial load
duke
parents:
diff changeset
   131
    mlib_s32 s4 = sp[4], s5 = sp[5], s6 = sp[6], s7 = sp[7];    \
90ce3da70b43 Initial load
duke
parents:
diff changeset
   132
    mlib_d64 t0, t1, t2;                                        \
90ce3da70b43 Initial load
duke
parents:
diff changeset
   133
                                                                \
90ce3da70b43 Initial load
duke
parents:
diff changeset
   134
    t2 = vis_faligndata(vis_ld_u8_i(ltbl2, s7), t2);            \
90ce3da70b43 Initial load
duke
parents:
diff changeset
   135
    t2 = vis_faligndata(vis_ld_u8_i(ltbl1, s7), t2);            \
90ce3da70b43 Initial load
duke
parents:
diff changeset
   136
    t2 = vis_faligndata(vis_ld_u8_i(ltbl0, s7), t2);            \
90ce3da70b43 Initial load
duke
parents:
diff changeset
   137
    t2 = vis_faligndata(vis_ld_u8_i(ltbl2, s6), t2);            \
90ce3da70b43 Initial load
duke
parents:
diff changeset
   138
    t2 = vis_faligndata(vis_ld_u8_i(ltbl1, s6), t2);            \
90ce3da70b43 Initial load
duke
parents:
diff changeset
   139
    t2 = vis_faligndata(vis_ld_u8_i(ltbl0, s6), t2);            \
90ce3da70b43 Initial load
duke
parents:
diff changeset
   140
    t2 = vis_faligndata(vis_ld_u8_i(ltbl2, s5), t2);            \
90ce3da70b43 Initial load
duke
parents:
diff changeset
   141
    t2 = vis_faligndata(vis_ld_u8_i(ltbl1, s5), t2);            \
90ce3da70b43 Initial load
duke
parents:
diff changeset
   142
    t1 = vis_faligndata(vis_ld_u8_i(ltbl0, s5), t1);            \
90ce3da70b43 Initial load
duke
parents:
diff changeset
   143
    t1 = vis_faligndata(vis_ld_u8_i(ltbl2, s4), t1);            \
90ce3da70b43 Initial load
duke
parents:
diff changeset
   144
    t1 = vis_faligndata(vis_ld_u8_i(ltbl1, s4), t1);            \
90ce3da70b43 Initial load
duke
parents:
diff changeset
   145
    t1 = vis_faligndata(vis_ld_u8_i(ltbl0, s4), t1);            \
90ce3da70b43 Initial load
duke
parents:
diff changeset
   146
    t1 = vis_faligndata(vis_ld_u8_i(ltbl2, s3), t1);            \
90ce3da70b43 Initial load
duke
parents:
diff changeset
   147
    t1 = vis_faligndata(vis_ld_u8_i(ltbl1, s3), t1);            \
90ce3da70b43 Initial load
duke
parents:
diff changeset
   148
    t1 = vis_faligndata(vis_ld_u8_i(ltbl0, s3), t1);            \
90ce3da70b43 Initial load
duke
parents:
diff changeset
   149
    t1 = vis_faligndata(vis_ld_u8_i(ltbl2, s2), t1);            \
90ce3da70b43 Initial load
duke
parents:
diff changeset
   150
    t0 = vis_faligndata(vis_ld_u8_i(ltbl1, s2), t0);            \
90ce3da70b43 Initial load
duke
parents:
diff changeset
   151
    t0 = vis_faligndata(vis_ld_u8_i(ltbl0, s2), t0);            \
90ce3da70b43 Initial load
duke
parents:
diff changeset
   152
    t0 = vis_faligndata(vis_ld_u8_i(ltbl2, s1), t0);            \
90ce3da70b43 Initial load
duke
parents:
diff changeset
   153
    t0 = vis_faligndata(vis_ld_u8_i(ltbl1, s1), t0);            \
90ce3da70b43 Initial load
duke
parents:
diff changeset
   154
    t0 = vis_faligndata(vis_ld_u8_i(ltbl0, s1), t0);            \
90ce3da70b43 Initial load
duke
parents:
diff changeset
   155
    t0 = vis_faligndata(vis_ld_u8_i(ltbl2, s0), t0);            \
90ce3da70b43 Initial load
duke
parents:
diff changeset
   156
    t0 = vis_faligndata(vis_ld_u8_i(ltbl1, s0), t0);            \
90ce3da70b43 Initial load
duke
parents:
diff changeset
   157
    t0 = vis_faligndata(vis_ld_u8_i(ltbl0, s0), t0);            \
90ce3da70b43 Initial load
duke
parents:
diff changeset
   158
                                                                \
90ce3da70b43 Initial load
duke
parents:
diff changeset
   159
    buffn[i] = t0;                                              \
90ce3da70b43 Initial load
duke
parents:
diff changeset
   160
    buffn[i + 1] = t1;                                          \
90ce3da70b43 Initial load
duke
parents:
diff changeset
   161
    buffn[i + 2] = t2;                                          \
90ce3da70b43 Initial load
duke
parents:
diff changeset
   162
                                                                \
90ce3da70b43 Initial load
duke
parents:
diff changeset
   163
    sp += 8;                                                    \
90ce3da70b43 Initial load
duke
parents:
diff changeset
   164
  }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   165
90ce3da70b43 Initial load
duke
parents:
diff changeset
   166
#else
90ce3da70b43 Initial load
duke
parents:
diff changeset
   167
90ce3da70b43 Initial load
duke
parents:
diff changeset
   168
#define LOAD_SRC() {                                            \
90ce3da70b43 Initial load
duke
parents:
diff changeset
   169
    mlib_s32 s0 = sp[0], s1 = sp[1], s2 = sp[2], s3 = sp[3];    \
90ce3da70b43 Initial load
duke
parents:
diff changeset
   170
    mlib_s32 s4 = sp[4], s5 = sp[5], s6 = sp[6], s7 = sp[7];    \
90ce3da70b43 Initial load
duke
parents:
diff changeset
   171
    mlib_d64 t0, t1, t2;                                        \
90ce3da70b43 Initial load
duke
parents:
diff changeset
   172
                                                                \
90ce3da70b43 Initial load
duke
parents:
diff changeset
   173
    t2 = vis_faligndata(vis_ld_u8_i(ltbl3, s5), t2);            \
90ce3da70b43 Initial load
duke
parents:
diff changeset
   174
    t2 = vis_faligndata(vis_ld_u8_i(ltbl2, s5), t2);            \
90ce3da70b43 Initial load
duke
parents:
diff changeset
   175
    t2 = vis_faligndata(vis_ld_u8_i(ltbl1, s5), t2);            \
90ce3da70b43 Initial load
duke
parents:
diff changeset
   176
    t2 = vis_faligndata(vis_ld_u8_i(ltbl0, s5), t2);            \
90ce3da70b43 Initial load
duke
parents:
diff changeset
   177
    t2 = vis_faligndata(vis_ld_u8_i(ltbl3, s4), t2);            \
90ce3da70b43 Initial load
duke
parents:
diff changeset
   178
    t2 = vis_faligndata(vis_ld_u8_i(ltbl2, s4), t2);            \
90ce3da70b43 Initial load
duke
parents:
diff changeset
   179
    t2 = vis_faligndata(vis_ld_u8_i(ltbl1, s4), t2);            \
90ce3da70b43 Initial load
duke
parents:
diff changeset
   180
    t2 = vis_faligndata(vis_ld_u8_i(ltbl0, s4), t2);            \
90ce3da70b43 Initial load
duke
parents:
diff changeset
   181
    t1 = vis_faligndata(vis_ld_u8_i(ltbl3, s3), t1);            \
90ce3da70b43 Initial load
duke
parents:
diff changeset
   182
    t1 = vis_faligndata(vis_ld_u8_i(ltbl2, s3), t1);            \
90ce3da70b43 Initial load
duke
parents:
diff changeset
   183
    t1 = vis_faligndata(vis_ld_u8_i(ltbl1, s3), t1);            \
90ce3da70b43 Initial load
duke
parents:
diff changeset
   184
    t1 = vis_faligndata(vis_ld_u8_i(ltbl0, s3), t1);            \
90ce3da70b43 Initial load
duke
parents:
diff changeset
   185
    t1 = vis_faligndata(vis_ld_u8_i(ltbl3, s2), t1);            \
90ce3da70b43 Initial load
duke
parents:
diff changeset
   186
    t1 = vis_faligndata(vis_ld_u8_i(ltbl2, s2), t1);            \
90ce3da70b43 Initial load
duke
parents:
diff changeset
   187
    t1 = vis_faligndata(vis_ld_u8_i(ltbl1, s2), t1);            \
90ce3da70b43 Initial load
duke
parents:
diff changeset
   188
    t1 = vis_faligndata(vis_ld_u8_i(ltbl0, s2), t1);            \
90ce3da70b43 Initial load
duke
parents:
diff changeset
   189
    t0 = vis_faligndata(vis_ld_u8_i(ltbl3, s1), t0);            \
90ce3da70b43 Initial load
duke
parents:
diff changeset
   190
    t0 = vis_faligndata(vis_ld_u8_i(ltbl2, s1), t0);            \
90ce3da70b43 Initial load
duke
parents:
diff changeset
   191
    t0 = vis_faligndata(vis_ld_u8_i(ltbl1, s1), t0);            \
90ce3da70b43 Initial load
duke
parents:
diff changeset
   192
    t0 = vis_faligndata(vis_ld_u8_i(ltbl0, s1), t0);            \
90ce3da70b43 Initial load
duke
parents:
diff changeset
   193
    t0 = vis_faligndata(vis_ld_u8_i(ltbl3, s0), t0);            \
90ce3da70b43 Initial load
duke
parents:
diff changeset
   194
    t0 = vis_faligndata(vis_ld_u8_i(ltbl2, s0), t0);            \
90ce3da70b43 Initial load
duke
parents:
diff changeset
   195
    t0 = vis_faligndata(vis_ld_u8_i(ltbl1, s0), t0);            \
90ce3da70b43 Initial load
duke
parents:
diff changeset
   196
    t0 = vis_faligndata(vis_ld_u8_i(ltbl0, s0), t0);            \
90ce3da70b43 Initial load
duke
parents:
diff changeset
   197
                                                                \
90ce3da70b43 Initial load
duke
parents:
diff changeset
   198
    buffn[i] = t0;                                              \
90ce3da70b43 Initial load
duke
parents:
diff changeset
   199
    buffn[i + 1] = t1;                                          \
90ce3da70b43 Initial load
duke
parents:
diff changeset
   200
    buffn[i + 2] = t2;                                          \
90ce3da70b43 Initial load
duke
parents:
diff changeset
   201
                                                                \
90ce3da70b43 Initial load
duke
parents:
diff changeset
   202
    sp += 6;                                                    \
90ce3da70b43 Initial load
duke
parents:
diff changeset
   203
  }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   204
90ce3da70b43 Initial load
duke
parents:
diff changeset
   205
#endif
90ce3da70b43 Initial load
duke
parents:
diff changeset
   206
90ce3da70b43 Initial load
duke
parents:
diff changeset
   207
/***************************************************************/
90ce3da70b43 Initial load
duke
parents:
diff changeset
   208
90ce3da70b43 Initial load
duke
parents:
diff changeset
   209
static mlib_s32 mlib_round_8[16] = { 0x00400040, 0x00200020, 0x00100010, 0x00080008,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   210
                                    0x00040004, 0x00020002, 0x00010001, 0x00000000,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   211
                                    0x00000000, 0x00000000, 0x00000000, 0x00000000,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   212
                                    0x00000000, 0x00000000, 0x00000000, 0x00000000 };
90ce3da70b43 Initial load
duke
parents:
diff changeset
   213
90ce3da70b43 Initial load
duke
parents:
diff changeset
   214
/***************************************************************/
90ce3da70b43 Initial load
duke
parents:
diff changeset
   215
90ce3da70b43 Initial load
duke
parents:
diff changeset
   216
void mlib_ImageCopy_na(mlib_u8 *sa, mlib_u8 *da, int size);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   217
90ce3da70b43 Initial load
duke
parents:
diff changeset
   218
/***************************************************************/
90ce3da70b43 Initial load
duke
parents:
diff changeset
   219
90ce3da70b43 Initial load
duke
parents:
diff changeset
   220
#define KSIZE  2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   221
90ce3da70b43 Initial load
duke
parents:
diff changeset
   222
mlib_status CONV_FUNC(2x2)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   223
{
90ce3da70b43 Initial load
duke
parents:
diff changeset
   224
  mlib_d64 *buffs[2*(KSIZE + 1)];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   225
  mlib_d64 *buff0, *buff1, *buffn, *buffd, *buffe;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   226
  mlib_d64 s00, s01, s10, s11, s0, s1;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   227
  mlib_d64 d0, d1, d00, d01, d10, d11;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   228
  DEF_VARS;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   229
  DEF_EXTRA_VARS;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   230
90ce3da70b43 Initial load
duke
parents:
diff changeset
   231
  sl = adr_src;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   232
  dl = adr_dst;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   233
90ce3da70b43 Initial load
duke
parents:
diff changeset
   234
  ssize = NCHAN*wid;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   235
  dsize = (ssize + 7)/8;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   236
  esize = dsize + 4;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   237
  pbuff = mlib_malloc((KSIZE + 4)*esize*sizeof(mlib_d64));
90ce3da70b43 Initial load
duke
parents:
diff changeset
   238
  if (pbuff == NULL) return MLIB_FAILURE;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   239
90ce3da70b43 Initial load
duke
parents:
diff changeset
   240
  for (i = 0; i < (KSIZE + 1); i++) buffs[i] = pbuff + i*esize;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   241
  for (i = 0; i < (KSIZE + 1); i++) buffs[(KSIZE + 1) + i] = buffs[i];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   242
  buffd = buffs[KSIZE] + esize;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   243
  buffe = buffd + 2*esize;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   244
90ce3da70b43 Initial load
duke
parents:
diff changeset
   245
  wid -= (KSIZE - 1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   246
  hgt -= (KSIZE - 1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   247
  xsize = ssize - NCHAN*(KSIZE - 1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   248
  emask = (0xFF00 >> (xsize & 7)) & 0xFF;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   249
90ce3da70b43 Initial load
duke
parents:
diff changeset
   250
  vis_write_gsr(gsr_scale + 7);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   251
90ce3da70b43 Initial load
duke
parents:
diff changeset
   252
  for (l = 0; l < KSIZE; l++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   253
    mlib_d64 *buffn = buffs[l];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   254
    sp = sl + l*sll;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   255
90ce3da70b43 Initial load
duke
parents:
diff changeset
   256
#ifndef CONV_INDEX
90ce3da70b43 Initial load
duke
parents:
diff changeset
   257
    if ((mlib_addr)sp & 7) mlib_ImageCopy_na((void*)sp, (void*)buffn, ssize);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   258
90ce3da70b43 Initial load
duke
parents:
diff changeset
   259
#else
90ce3da70b43 Initial load
duke
parents:
diff changeset
   260
#pragma pipeloop(0)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   261
    for (i = 0; i < dsize; i += 3) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   262
      LOAD_SRC();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   263
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   264
#endif /* CONV_INDEX */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   265
  }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   266
90ce3da70b43 Initial load
duke
parents:
diff changeset
   267
  for (j = 0; j < hgt; j++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   268
    mlib_d64 **buffc = buffs + buff_ind;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   269
    mlib_f32 *pk = karr, k0, k1;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   270
    sp = sl + KSIZE*sll;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   271
90ce3da70b43 Initial load
duke
parents:
diff changeset
   272
    buff0 = buffc[0];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   273
    buff1 = buffc[1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   274
    buffn = buffc[KSIZE];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   275
90ce3da70b43 Initial load
duke
parents:
diff changeset
   276
#ifndef CONV_INDEX
90ce3da70b43 Initial load
duke
parents:
diff changeset
   277
    if ((((mlib_addr)(sl      )) & 7) == 0) buff0 = (mlib_d64*)sl;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   278
    if ((((mlib_addr)(sl + sll)) & 7) == 0) buff1 = (mlib_d64*)(sl + sll);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   279
    if ((mlib_addr)sp & 7) mlib_ImageCopy_na((void*)sp, (void*)buffn, ssize);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   280
#endif
90ce3da70b43 Initial load
duke
parents:
diff changeset
   281
90ce3da70b43 Initial load
duke
parents:
diff changeset
   282
    k0 = pk[1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   283
    k1 = pk[3];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   284
    vis_write_gsr(gsr_scale + NCHAN);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   285
90ce3da70b43 Initial load
duke
parents:
diff changeset
   286
    s01 = buff0[0];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   287
    s11 = buff1[0];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   288
#pragma pipeloop(0)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   289
    for (i = 0; i < (xsize + 7)/8; i++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   290
      s00 = s01;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   291
      s10 = s11;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   292
      s01 = buff0[i + 1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   293
      s11 = buff1[i + 1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   294
      s0  = vis_faligndata(s00, s01);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   295
      s1  = vis_faligndata(s10, s11);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   296
90ce3da70b43 Initial load
duke
parents:
diff changeset
   297
      d00 = vis_fmul8x16au(vis_read_hi(s0), k0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   298
      d01 = vis_fmul8x16au(vis_read_lo(s0), k0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   299
      d10 = vis_fmul8x16au(vis_read_hi(s1), k1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   300
      d11 = vis_fmul8x16au(vis_read_lo(s1), k1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   301
90ce3da70b43 Initial load
duke
parents:
diff changeset
   302
      d0 = vis_fpadd16(d00, d10);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   303
      d1 = vis_fpadd16(d01, d11);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   304
      buffd[2*i] = d0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   305
      buffd[2*i + 1] = d1;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   306
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   307
90ce3da70b43 Initial load
duke
parents:
diff changeset
   308
    k0 = pk[0];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   309
    k1 = pk[2];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   310
#ifndef CONV_INDEX
90ce3da70b43 Initial load
duke
parents:
diff changeset
   311
    dp = ((mlib_addr)dl & 7) ? buffe : (mlib_d64*)dl;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   312
90ce3da70b43 Initial load
duke
parents:
diff changeset
   313
#pragma pipeloop(0)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   314
    for (i = 0; i < xsize/8; i++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   315
      s0 = buff0[i];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   316
      s1 = buff1[i];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   317
90ce3da70b43 Initial load
duke
parents:
diff changeset
   318
      d00 = vis_fmul8x16au(vis_read_hi(s0), k0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   319
      d01 = vis_fmul8x16au(vis_read_lo(s0), k0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   320
      d10 = vis_fmul8x16au(vis_read_hi(s1), k1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   321
      d11 = vis_fmul8x16au(vis_read_lo(s1), k1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   322
90ce3da70b43 Initial load
duke
parents:
diff changeset
   323
      d0 = buffd[2*i];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   324
      d1 = buffd[2*i + 1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   325
      d00 = vis_fpadd16(d00, d10);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   326
      d0  = vis_fpadd16(d0, drnd);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   327
      d0  = vis_fpadd16(d0, d00);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   328
      d01 = vis_fpadd16(d01, d11);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   329
      d1  = vis_fpadd16(d1, drnd);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   330
      d1  = vis_fpadd16(d1, d01);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   331
      dp[i] = vis_fpack16_pair(d0, d1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   332
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   333
90ce3da70b43 Initial load
duke
parents:
diff changeset
   334
    if (emask) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   335
      s0 = buff0[i];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   336
      s1 = buff1[i];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   337
90ce3da70b43 Initial load
duke
parents:
diff changeset
   338
      d00 = vis_fmul8x16au(vis_read_hi(s0), k0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   339
      d01 = vis_fmul8x16au(vis_read_lo(s0), k0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   340
      d10 = vis_fmul8x16au(vis_read_hi(s1), k1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   341
      d11 = vis_fmul8x16au(vis_read_lo(s1), k1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   342
90ce3da70b43 Initial load
duke
parents:
diff changeset
   343
      d0 = buffd[2*i];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   344
      d1 = buffd[2*i + 1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   345
      d00 = vis_fpadd16(d00, d10);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   346
      d0  = vis_fpadd16(d0, drnd);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   347
      d0  = vis_fpadd16(d0, d00);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   348
      d01 = vis_fpadd16(d01, d11);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   349
      d1  = vis_fpadd16(d1, drnd);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   350
      d1  = vis_fpadd16(d1, d01);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   351
90ce3da70b43 Initial load
duke
parents:
diff changeset
   352
      d0 = vis_fpack16_pair(d0, d1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   353
      vis_pst_8(d0, dp + i, emask);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   354
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   355
90ce3da70b43 Initial load
duke
parents:
diff changeset
   356
    if ((mlib_u8*)dp != dl) mlib_ImageCopy_na((void*)buffe, dl, xsize);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   357
90ce3da70b43 Initial load
duke
parents:
diff changeset
   358
#else
90ce3da70b43 Initial load
duke
parents:
diff changeset
   359
    vis_write_gsr(gsr_scale + 7);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   360
90ce3da70b43 Initial load
duke
parents:
diff changeset
   361
#pragma pipeloop(0)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   362
    for (i = 0; i < dsize; i += 3) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   363
      mlib_d64 d00, d01, d02, d03, d04, d05;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   364
      mlib_d64 d10, d11, d12, d13, d14, d15;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   365
      mlib_d64 d0, d1, d2, d3, d4, d5;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   366
      mlib_d64 s00 = buff0[i];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   367
      mlib_d64 s01 = buff0[i + 1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   368
      mlib_d64 s02 = buff0[i + 2];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   369
      mlib_d64 s10 = buff1[i];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   370
      mlib_d64 s11 = buff1[i + 1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   371
      mlib_d64 s12 = buff1[i + 2];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   372
90ce3da70b43 Initial load
duke
parents:
diff changeset
   373
      d00 = vis_fmul8x16au(vis_read_hi(s00), k0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   374
      d01 = vis_fmul8x16au(vis_read_lo(s00), k0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   375
      d02 = vis_fmul8x16au(vis_read_hi(s01), k0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   376
      d03 = vis_fmul8x16au(vis_read_lo(s01), k0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   377
      d04 = vis_fmul8x16au(vis_read_hi(s02), k0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   378
      d05 = vis_fmul8x16au(vis_read_lo(s02), k0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   379
      d10 = vis_fmul8x16au(vis_read_hi(s10), k1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   380
      d11 = vis_fmul8x16au(vis_read_lo(s10), k1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   381
      d12 = vis_fmul8x16au(vis_read_hi(s11), k1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   382
      d13 = vis_fmul8x16au(vis_read_lo(s11), k1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   383
      d14 = vis_fmul8x16au(vis_read_hi(s12), k1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   384
      d15 = vis_fmul8x16au(vis_read_lo(s12), k1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   385
90ce3da70b43 Initial load
duke
parents:
diff changeset
   386
      d0 = buffd[2*i];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   387
      d1 = buffd[2*i + 1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   388
      d2 = buffd[2*i + 2];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   389
      d3 = buffd[2*i + 3];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   390
      d4 = buffd[2*i + 4];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   391
      d5 = buffd[2*i + 5];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   392
      d00 = vis_fpadd16(d00, d10);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   393
      d0  = vis_fpadd16(d0, drnd);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   394
      d0  = vis_fpadd16(d0, d00);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   395
      d01 = vis_fpadd16(d01, d11);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   396
      d1  = vis_fpadd16(d1, drnd);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   397
      d1  = vis_fpadd16(d1, d01);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   398
      d02 = vis_fpadd16(d02, d12);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   399
      d2  = vis_fpadd16(d2, drnd);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   400
      d2  = vis_fpadd16(d2, d02);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   401
      d03 = vis_fpadd16(d03, d13);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   402
      d3  = vis_fpadd16(d3, drnd);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   403
      d3  = vis_fpadd16(d3, d03);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   404
      d04 = vis_fpadd16(d04, d14);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   405
      d4  = vis_fpadd16(d4, drnd);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   406
      d4  = vis_fpadd16(d4, d04);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   407
      d05 = vis_fpadd16(d05, d15);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   408
      d5  = vis_fpadd16(d5, drnd);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   409
      d5  = vis_fpadd16(d5, d05);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   410
90ce3da70b43 Initial load
duke
parents:
diff changeset
   411
      buffe[i    ] = vis_fpack16_pair(d0, d1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   412
      buffe[i + 1] = vis_fpack16_pair(d2, d3);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   413
      buffe[i + 2] = vis_fpack16_pair(d4, d5);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   414
90ce3da70b43 Initial load
duke
parents:
diff changeset
   415
      LOAD_SRC();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   416
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   417
90ce3da70b43 Initial load
duke
parents:
diff changeset
   418
    mlib_ImageColorTrue2IndexLine_U8_S16_3((void*)buffe, dl, wid, colormap);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   419
#endif /* CONV_INDEX */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   420
90ce3da70b43 Initial load
duke
parents:
diff changeset
   421
    sl += sll;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   422
    dl += dll;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   423
90ce3da70b43 Initial load
duke
parents:
diff changeset
   424
    buff_ind++;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   425
    if (buff_ind >= (KSIZE + 1)) buff_ind = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   426
  }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   427
90ce3da70b43 Initial load
duke
parents:
diff changeset
   428
  mlib_free(pbuff);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   429
90ce3da70b43 Initial load
duke
parents:
diff changeset
   430
  return MLIB_SUCCESS;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   431
}
90ce3da70b43 Initial load
duke
parents:
diff changeset
   432
90ce3da70b43 Initial load
duke
parents:
diff changeset
   433
/***************************************************************/
90ce3da70b43 Initial load
duke
parents:
diff changeset
   434
90ce3da70b43 Initial load
duke
parents:
diff changeset
   435
#undef  KSIZE
90ce3da70b43 Initial load
duke
parents:
diff changeset
   436
#define KSIZE  3
90ce3da70b43 Initial load
duke
parents:
diff changeset
   437
90ce3da70b43 Initial load
duke
parents:
diff changeset
   438
mlib_status CONV_FUNC(3x3)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   439
{
90ce3da70b43 Initial load
duke
parents:
diff changeset
   440
  mlib_d64 *buffs[2*(KSIZE + 1)];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   441
  mlib_d64 *buff0, *buff1, *buff2, *buffn, *buffd, *buffe;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   442
  mlib_d64 s00, s01, s10, s11, s20, s21, s0, s1, s2;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   443
  mlib_d64 dd, d0, d1, d00, d01, d10, d11, d20, d21;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   444
  mlib_s32 ik, ik_last, off, doff;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   445
  DEF_VARS;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   446
  DEF_EXTRA_VARS;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   447
90ce3da70b43 Initial load
duke
parents:
diff changeset
   448
  sl = adr_src;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   449
#ifdef CONV_INDEX
90ce3da70b43 Initial load
duke
parents:
diff changeset
   450
  dl = adr_dst + ((KSIZE - 1)/2)*(dll + 1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   451
#else
90ce3da70b43 Initial load
duke
parents:
diff changeset
   452
  dl = adr_dst + ((KSIZE - 1)/2)*(dll + NCHAN);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   453
#endif
90ce3da70b43 Initial load
duke
parents:
diff changeset
   454
90ce3da70b43 Initial load
duke
parents:
diff changeset
   455
  ssize = NCHAN*wid;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   456
  dsize = (ssize + 7)/8;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   457
  esize = dsize + 4;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   458
  pbuff = mlib_malloc((KSIZE + 4)*esize*sizeof(mlib_d64));
90ce3da70b43 Initial load
duke
parents:
diff changeset
   459
  if (pbuff == NULL) return MLIB_FAILURE;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   460
90ce3da70b43 Initial load
duke
parents:
diff changeset
   461
  for (i = 0; i < (KSIZE + 1); i++) buffs[i] = pbuff + i*esize;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   462
  for (i = 0; i < (KSIZE + 1); i++) buffs[(KSIZE + 1) + i] = buffs[i];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   463
  buffd = buffs[KSIZE] + esize;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   464
  buffe = buffd + 2*esize;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   465
90ce3da70b43 Initial load
duke
parents:
diff changeset
   466
  wid -= (KSIZE - 1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   467
  hgt -= (KSIZE - 1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   468
  xsize = ssize - NCHAN*(KSIZE - 1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   469
  emask = (0xFF00 >> (xsize & 7)) & 0xFF;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   470
90ce3da70b43 Initial load
duke
parents:
diff changeset
   471
  vis_write_gsr(gsr_scale + 7);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   472
90ce3da70b43 Initial load
duke
parents:
diff changeset
   473
  for (l = 0; l < KSIZE; l++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   474
    mlib_d64 *buffn = buffs[l];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   475
    sp = sl + l*sll;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   476
90ce3da70b43 Initial load
duke
parents:
diff changeset
   477
#ifndef CONV_INDEX
90ce3da70b43 Initial load
duke
parents:
diff changeset
   478
    if ((mlib_addr)sp & 7) mlib_ImageCopy_na((void*)sp, (void*)buffn, ssize);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   479
#else
90ce3da70b43 Initial load
duke
parents:
diff changeset
   480
#pragma pipeloop(0)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   481
    for (i = 0; i < dsize; i += 3) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   482
      LOAD_SRC();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   483
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   484
#endif /* CONV_INDEX */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   485
  }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   486
90ce3da70b43 Initial load
duke
parents:
diff changeset
   487
  /* init buffer */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   488
#pragma pipeloop(0)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   489
  for (i = 0; i < (xsize + 7)/8; i++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   490
    buffd[2*i    ] = drnd;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   491
    buffd[2*i + 1] = drnd;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   492
  }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   493
90ce3da70b43 Initial load
duke
parents:
diff changeset
   494
  for (j = 0; j < hgt; j++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   495
    mlib_d64 **buffc = buffs + buff_ind, *pbuff0, *pbuff1, *pbuff2;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   496
    mlib_f32 *pk = karr, k0, k1, k2;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   497
    sp = sl + KSIZE*sll;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   498
90ce3da70b43 Initial load
duke
parents:
diff changeset
   499
    pbuff0 = buffc[0];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   500
    pbuff1 = buffc[1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   501
    pbuff2 = buffc[2];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   502
    buffn  = buffc[KSIZE];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   503
90ce3da70b43 Initial load
duke
parents:
diff changeset
   504
#ifndef CONV_INDEX
90ce3da70b43 Initial load
duke
parents:
diff changeset
   505
    if ((((mlib_addr)(sl        )) & 7) == 0) pbuff0 = (mlib_d64*)sl;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   506
    if ((((mlib_addr)(sl +   sll)) & 7) == 0) pbuff1 = (mlib_d64*)(sl + sll);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   507
    if ((((mlib_addr)(sl + 2*sll)) & 7) == 0) pbuff2 = (mlib_d64*)(sl + 2*sll);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   508
90ce3da70b43 Initial load
duke
parents:
diff changeset
   509
    if ((mlib_addr)sp & 7) mlib_ImageCopy_na((void*)sp, (void*)buffn, ssize);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   510
#endif
90ce3da70b43 Initial load
duke
parents:
diff changeset
   511
90ce3da70b43 Initial load
duke
parents:
diff changeset
   512
#ifdef CONV_INDEX
90ce3da70b43 Initial load
duke
parents:
diff changeset
   513
    ik_last = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   514
#else
90ce3da70b43 Initial load
duke
parents:
diff changeset
   515
    ik_last = (KSIZE - 1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   516
#endif
90ce3da70b43 Initial load
duke
parents:
diff changeset
   517
90ce3da70b43 Initial load
duke
parents:
diff changeset
   518
    for (ik = 0; ik < KSIZE; ik++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   519
      k0 = pk[ik];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   520
      k1 = pk[ik + KSIZE];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   521
      k2 = pk[ik + 2*KSIZE];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   522
90ce3da70b43 Initial load
duke
parents:
diff changeset
   523
      off  = ik*NCHAN;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   524
      doff = off/8;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   525
      off &= 7;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   526
      buff0 = pbuff0 + doff;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   527
      buff1 = pbuff1 + doff;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   528
      buff2 = pbuff2 + doff;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   529
      vis_write_gsr(gsr_scale + off);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   530
90ce3da70b43 Initial load
duke
parents:
diff changeset
   531
      if (ik == ik_last) continue;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   532
      /*if (!ik_last) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   533
        if ((off & 3) || (ik == (KSIZE - 1))) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   534
          ik_last = ik;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   535
          continue;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   536
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   537
      }*/
90ce3da70b43 Initial load
duke
parents:
diff changeset
   538
90ce3da70b43 Initial load
duke
parents:
diff changeset
   539
      if (off == 0) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   540
#pragma pipeloop(0)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   541
        for (i = 0; i < (xsize + 7)/8; i++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   542
          s0 = buff0[i];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   543
          s1 = buff1[i];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   544
          s2 = buff2[i];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   545
90ce3da70b43 Initial load
duke
parents:
diff changeset
   546
          d00 = vis_fmul8x16au(vis_read_hi(s0), k0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   547
          d01 = vis_fmul8x16au(vis_read_lo(s0), k0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   548
          d10 = vis_fmul8x16au(vis_read_hi(s1), k1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   549
          d11 = vis_fmul8x16au(vis_read_lo(s1), k1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   550
          d20 = vis_fmul8x16au(vis_read_hi(s2), k2);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   551
          d21 = vis_fmul8x16au(vis_read_lo(s2), k2);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   552
90ce3da70b43 Initial load
duke
parents:
diff changeset
   553
          d0 = buffd[2*i];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   554
          d1 = buffd[2*i + 1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   555
          d0 = vis_fpadd16(d00, d0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   556
          d0 = vis_fpadd16(d10, d0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   557
          d0 = vis_fpadd16(d20, d0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   558
          d1 = vis_fpadd16(d01, d1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   559
          d1 = vis_fpadd16(d11, d1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   560
          d1 = vis_fpadd16(d21, d1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   561
          buffd[2*i] = d0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   562
          buffd[2*i + 1] = d1;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   563
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   564
90ce3da70b43 Initial load
duke
parents:
diff changeset
   565
      } else if (off == 4) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   566
        s01 = buff0[0];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   567
        s11 = buff1[0];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   568
        s21 = buff2[0];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   569
#pragma pipeloop(0)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   570
        for (i = 0; i < (xsize + 7)/8; i++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   571
          s00 = s01;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   572
          s10 = s11;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   573
          s20 = s21;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   574
          s01 = buff0[i + 1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   575
          s11 = buff1[i + 1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   576
          s21 = buff2[i + 1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   577
90ce3da70b43 Initial load
duke
parents:
diff changeset
   578
          d00 = vis_fmul8x16au(vis_read_lo(s00), k0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   579
          d01 = vis_fmul8x16au(vis_read_hi(s01), k0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   580
          d10 = vis_fmul8x16au(vis_read_lo(s10), k1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   581
          d11 = vis_fmul8x16au(vis_read_hi(s11), k1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   582
          d20 = vis_fmul8x16au(vis_read_lo(s20), k2);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   583
          d21 = vis_fmul8x16au(vis_read_hi(s21), k2);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   584
90ce3da70b43 Initial load
duke
parents:
diff changeset
   585
          d0 = buffd[2*i];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   586
          d1 = buffd[2*i + 1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   587
          d0 = vis_fpadd16(d00, d0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   588
          d0 = vis_fpadd16(d10, d0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   589
          d0 = vis_fpadd16(d20, d0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   590
          d1 = vis_fpadd16(d01, d1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   591
          d1 = vis_fpadd16(d11, d1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   592
          d1 = vis_fpadd16(d21, d1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   593
          buffd[2*i] = d0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   594
          buffd[2*i + 1] = d1;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   595
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   596
90ce3da70b43 Initial load
duke
parents:
diff changeset
   597
      } else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   598
        s01 = buff0[0];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   599
        s11 = buff1[0];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   600
        s21 = buff2[0];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   601
#pragma pipeloop(0)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   602
        for (i = 0; i < (xsize + 7)/8; i++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   603
          s00 = s01;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   604
          s10 = s11;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   605
          s20 = s21;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   606
          s01 = buff0[i + 1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   607
          s11 = buff1[i + 1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   608
          s21 = buff2[i + 1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   609
          s0  = vis_faligndata(s00, s01);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   610
          s1  = vis_faligndata(s10, s11);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   611
          s2  = vis_faligndata(s20, s21);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   612
90ce3da70b43 Initial load
duke
parents:
diff changeset
   613
          d00 = vis_fmul8x16au(vis_read_hi(s0), k0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   614
          d01 = vis_fmul8x16au(vis_read_lo(s0), k0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   615
          d10 = vis_fmul8x16au(vis_read_hi(s1), k1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   616
          d11 = vis_fmul8x16au(vis_read_lo(s1), k1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   617
          d20 = vis_fmul8x16au(vis_read_hi(s2), k2);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   618
          d21 = vis_fmul8x16au(vis_read_lo(s2), k2);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   619
90ce3da70b43 Initial load
duke
parents:
diff changeset
   620
          d0 = buffd[2*i];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   621
          d1 = buffd[2*i + 1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   622
          d0 = vis_fpadd16(d00, d0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   623
          d0 = vis_fpadd16(d10, d0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   624
          d0 = vis_fpadd16(d20, d0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   625
          d1 = vis_fpadd16(d01, d1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   626
          d1 = vis_fpadd16(d11, d1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   627
          d1 = vis_fpadd16(d21, d1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   628
          buffd[2*i] = d0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   629
          buffd[2*i + 1] = d1;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   630
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   631
      }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   632
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   633
90ce3da70b43 Initial load
duke
parents:
diff changeset
   634
    k0 = pk[ik_last];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   635
    k1 = pk[ik_last + KSIZE];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   636
    k2 = pk[ik_last + 2*KSIZE];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   637
90ce3da70b43 Initial load
duke
parents:
diff changeset
   638
    off  = ik_last*NCHAN;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   639
    doff = off/8;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   640
    off &= 7;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   641
    buff0 = pbuff0 + doff;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   642
    buff1 = pbuff1 + doff;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   643
    buff2 = pbuff2 + doff;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   644
    vis_write_gsr(gsr_scale + off);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   645
90ce3da70b43 Initial load
duke
parents:
diff changeset
   646
#ifndef CONV_INDEX
90ce3da70b43 Initial load
duke
parents:
diff changeset
   647
    dp = ((mlib_addr)dl & 7) ? buffe : (mlib_d64*)dl;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   648
90ce3da70b43 Initial load
duke
parents:
diff changeset
   649
    s01 = buff0[0];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   650
    s11 = buff1[0];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   651
    s21 = buff2[0];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   652
#pragma pipeloop(0)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   653
    for (i = 0; i < xsize/8; i++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   654
      s00 = s01;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   655
      s10 = s11;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   656
      s20 = s21;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   657
      s01 = buff0[i + 1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   658
      s11 = buff1[i + 1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   659
      s21 = buff2[i + 1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   660
      s0  = vis_faligndata(s00, s01);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   661
      s1  = vis_faligndata(s10, s11);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   662
      s2  = vis_faligndata(s20, s21);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   663
90ce3da70b43 Initial load
duke
parents:
diff changeset
   664
      d00 = vis_fmul8x16au(vis_read_hi(s0), k0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   665
      d01 = vis_fmul8x16au(vis_read_lo(s0), k0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   666
      d10 = vis_fmul8x16au(vis_read_hi(s1), k1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   667
      d11 = vis_fmul8x16au(vis_read_lo(s1), k1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   668
      d20 = vis_fmul8x16au(vis_read_hi(s2), k2);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   669
      d21 = vis_fmul8x16au(vis_read_lo(s2), k2);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   670
90ce3da70b43 Initial load
duke
parents:
diff changeset
   671
      d0 = buffd[2*i];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   672
      d1 = buffd[2*i + 1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   673
      d0 = vis_fpadd16(d0, d00);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   674
      d0 = vis_fpadd16(d0, d10);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   675
      d0 = vis_fpadd16(d0, d20);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   676
      d1 = vis_fpadd16(d1, d01);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   677
      d1 = vis_fpadd16(d1, d11);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   678
      d1 = vis_fpadd16(d1, d21);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   679
90ce3da70b43 Initial load
duke
parents:
diff changeset
   680
      dd = vis_fpack16_pair(d0, d1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   681
      dp[i] = dd;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   682
90ce3da70b43 Initial load
duke
parents:
diff changeset
   683
      buffd[2*i    ] = drnd;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   684
      buffd[2*i + 1] = drnd;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   685
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   686
90ce3da70b43 Initial load
duke
parents:
diff changeset
   687
    if (emask) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   688
      s00 = s01;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   689
      s10 = s11;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   690
      s20 = s21;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   691
      s01 = buff0[i + 1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   692
      s11 = buff1[i + 1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   693
      s21 = buff2[i + 1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   694
      s0  = vis_faligndata(s00, s01);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   695
      s1  = vis_faligndata(s10, s11);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   696
      s2  = vis_faligndata(s20, s21);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   697
90ce3da70b43 Initial load
duke
parents:
diff changeset
   698
      d00 = vis_fmul8x16au(vis_read_hi(s0), k0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   699
      d01 = vis_fmul8x16au(vis_read_lo(s0), k0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   700
      d10 = vis_fmul8x16au(vis_read_hi(s1), k1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   701
      d11 = vis_fmul8x16au(vis_read_lo(s1), k1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   702
      d20 = vis_fmul8x16au(vis_read_hi(s2), k2);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   703
      d21 = vis_fmul8x16au(vis_read_lo(s2), k2);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   704
90ce3da70b43 Initial load
duke
parents:
diff changeset
   705
      d0 = buffd[2*i];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   706
      d1 = buffd[2*i + 1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   707
      d0 = vis_fpadd16(d0, d00);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   708
      d0 = vis_fpadd16(d0, d10);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   709
      d0 = vis_fpadd16(d0, d20);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   710
      d1 = vis_fpadd16(d1, d01);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   711
      d1 = vis_fpadd16(d1, d11);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   712
      d1 = vis_fpadd16(d1, d21);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   713
90ce3da70b43 Initial load
duke
parents:
diff changeset
   714
      dd = vis_fpack16_pair(d0, d1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   715
      vis_pst_8(dd, dp + i, emask);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   716
90ce3da70b43 Initial load
duke
parents:
diff changeset
   717
      buffd[2*i    ] = drnd;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   718
      buffd[2*i + 1] = drnd;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   719
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   720
90ce3da70b43 Initial load
duke
parents:
diff changeset
   721
    if ((mlib_u8*)dp != dl) mlib_ImageCopy_na((void*)buffe, dl, xsize);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   722
90ce3da70b43 Initial load
duke
parents:
diff changeset
   723
#else
90ce3da70b43 Initial load
duke
parents:
diff changeset
   724
    vis_write_gsr(gsr_scale + 7);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   725
90ce3da70b43 Initial load
duke
parents:
diff changeset
   726
#pragma pipeloop(0)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   727
    for (i = 0; i < dsize; i += 3) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   728
      mlib_d64 d00, d01, d02, d03, d04, d05;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   729
      mlib_d64 d10, d11, d12, d13, d14, d15;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   730
      mlib_d64 d20, d21, d22, d23, d24, d25;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   731
      mlib_d64 d0, d1, d2, d3, d4, d5;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   732
      mlib_d64 s00 = buff0[i];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   733
      mlib_d64 s01 = buff0[i + 1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   734
      mlib_d64 s02 = buff0[i + 2];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   735
      mlib_d64 s10 = buff1[i];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   736
      mlib_d64 s11 = buff1[i + 1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   737
      mlib_d64 s12 = buff1[i + 2];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   738
      mlib_d64 s20 = buff2[i];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   739
      mlib_d64 s21 = buff2[i + 1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   740
      mlib_d64 s22 = buff2[i + 2];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   741
90ce3da70b43 Initial load
duke
parents:
diff changeset
   742
      d00 = vis_fmul8x16au(vis_read_hi(s00), k0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   743
      d01 = vis_fmul8x16au(vis_read_lo(s00), k0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   744
      d02 = vis_fmul8x16au(vis_read_hi(s01), k0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   745
      d03 = vis_fmul8x16au(vis_read_lo(s01), k0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   746
      d04 = vis_fmul8x16au(vis_read_hi(s02), k0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   747
      d05 = vis_fmul8x16au(vis_read_lo(s02), k0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   748
      d10 = vis_fmul8x16au(vis_read_hi(s10), k1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   749
      d11 = vis_fmul8x16au(vis_read_lo(s10), k1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   750
      d12 = vis_fmul8x16au(vis_read_hi(s11), k1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   751
      d13 = vis_fmul8x16au(vis_read_lo(s11), k1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   752
      d14 = vis_fmul8x16au(vis_read_hi(s12), k1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   753
      d15 = vis_fmul8x16au(vis_read_lo(s12), k1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   754
      d20 = vis_fmul8x16au(vis_read_hi(s20), k2);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   755
      d21 = vis_fmul8x16au(vis_read_lo(s20), k2);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   756
      d22 = vis_fmul8x16au(vis_read_hi(s21), k2);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   757
      d23 = vis_fmul8x16au(vis_read_lo(s21), k2);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   758
      d24 = vis_fmul8x16au(vis_read_hi(s22), k2);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   759
      d25 = vis_fmul8x16au(vis_read_lo(s22), k2);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   760
90ce3da70b43 Initial load
duke
parents:
diff changeset
   761
      d0 = buffd[2*i];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   762
      d1 = buffd[2*i + 1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   763
      d2 = buffd[2*i + 2];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   764
      d3 = buffd[2*i + 3];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   765
      d4 = buffd[2*i + 4];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   766
      d5 = buffd[2*i + 5];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   767
      d0 = vis_fpadd16(d0, d00);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   768
      d0 = vis_fpadd16(d0, d10);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   769
      d0 = vis_fpadd16(d0, d20);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   770
      d1 = vis_fpadd16(d1, d01);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   771
      d1 = vis_fpadd16(d1, d11);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   772
      d1 = vis_fpadd16(d1, d21);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   773
      d2 = vis_fpadd16(d2, d02);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   774
      d2 = vis_fpadd16(d2, d12);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   775
      d2 = vis_fpadd16(d2, d22);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   776
      d3 = vis_fpadd16(d3, d03);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   777
      d3 = vis_fpadd16(d3, d13);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   778
      d3 = vis_fpadd16(d3, d23);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   779
      d4 = vis_fpadd16(d4, d04);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   780
      d4 = vis_fpadd16(d4, d14);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   781
      d4 = vis_fpadd16(d4, d24);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   782
      d5 = vis_fpadd16(d5, d05);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   783
      d5 = vis_fpadd16(d5, d15);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   784
      d5 = vis_fpadd16(d5, d25);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   785
90ce3da70b43 Initial load
duke
parents:
diff changeset
   786
      buffe[i    ] = vis_fpack16_pair(d0, d1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   787
      buffe[i + 1] = vis_fpack16_pair(d2, d3);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   788
      buffe[i + 2] = vis_fpack16_pair(d4, d5);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   789
90ce3da70b43 Initial load
duke
parents:
diff changeset
   790
      buffd[2*i    ] = drnd;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   791
      buffd[2*i + 1] = drnd;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   792
      buffd[2*i + 2] = drnd;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   793
      buffd[2*i + 3] = drnd;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   794
      buffd[2*i + 4] = drnd;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   795
      buffd[2*i + 5] = drnd;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   796
90ce3da70b43 Initial load
duke
parents:
diff changeset
   797
      LOAD_SRC();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   798
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   799
90ce3da70b43 Initial load
duke
parents:
diff changeset
   800
    mlib_ImageColorTrue2IndexLine_U8_S16_3((void*)buffe, dl, wid, colormap);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   801
#endif /* CONV_INDEX */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   802
90ce3da70b43 Initial load
duke
parents:
diff changeset
   803
    sl += sll;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   804
    dl += dll;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   805
90ce3da70b43 Initial load
duke
parents:
diff changeset
   806
    buff_ind++;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   807
    if (buff_ind >= (KSIZE + 1)) buff_ind = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   808
  }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   809
90ce3da70b43 Initial load
duke
parents:
diff changeset
   810
  mlib_free(pbuff);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   811
90ce3da70b43 Initial load
duke
parents:
diff changeset
   812
  return MLIB_SUCCESS;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   813
}
90ce3da70b43 Initial load
duke
parents:
diff changeset
   814
90ce3da70b43 Initial load
duke
parents:
diff changeset
   815
/***************************************************************/
90ce3da70b43 Initial load
duke
parents:
diff changeset
   816
90ce3da70b43 Initial load
duke
parents:
diff changeset
   817
#undef  KSIZE
90ce3da70b43 Initial load
duke
parents:
diff changeset
   818
#define MAX_N   11
90ce3da70b43 Initial load
duke
parents:
diff changeset
   819
90ce3da70b43 Initial load
duke
parents:
diff changeset
   820
#ifdef CONV_INDEX
90ce3da70b43 Initial load
duke
parents:
diff changeset
   821
90ce3da70b43 Initial load
duke
parents:
diff changeset
   822
mlib_status mlib_convMxN_Index3_8_16nw(mlib_image *dst,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   823
                                       mlib_image *src,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   824
                                       mlib_s32   m,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   825
                                       mlib_s32   n,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   826
                                       mlib_s32   dm,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   827
                                       mlib_s32   dn,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   828
                                       mlib_s32   *kern,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   829
                                       mlib_s32   scale,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   830
                                       void       *colormap)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   831
90ce3da70b43 Initial load
duke
parents:
diff changeset
   832
#else
90ce3da70b43 Initial load
duke
parents:
diff changeset
   833
90ce3da70b43 Initial load
duke
parents:
diff changeset
   834
mlib_status mlib_convMxN_8nw_f(mlib_image *dst,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   835
                               mlib_image *src,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   836
                               mlib_s32   m,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   837
                               mlib_s32   n,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   838
                               mlib_s32   dm,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   839
                               mlib_s32   dn,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   840
                               mlib_s32   *kern,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   841
                               mlib_s32   scale)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   842
90ce3da70b43 Initial load
duke
parents:
diff changeset
   843
#endif
90ce3da70b43 Initial load
duke
parents:
diff changeset
   844
{
90ce3da70b43 Initial load
duke
parents:
diff changeset
   845
  mlib_d64 *buffs_local[3*(MAX_N + 1)], **buffs = buffs_local, **buff;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   846
  mlib_d64 *buff0, *buff1, *buff2, *buff3, *buffn, *buffd, *buffe;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   847
  mlib_d64 s00, s01, s10, s11, s20, s21, s30, s31, s0, s1, s2, s3;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   848
  mlib_d64 d00, d01, d10, d11, d20, d21, d30, d31;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   849
  mlib_d64 dd, d0, d1;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   850
  mlib_s32 ik, jk, ik_last, jk_size, coff, off, doff;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   851
  DEF_VARS;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   852
  DEF_EXTRA_VARS;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   853
90ce3da70b43 Initial load
duke
parents:
diff changeset
   854
  if (n > MAX_N) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   855
    buffs = mlib_malloc(3*(n + 1)*sizeof(mlib_d64*));
90ce3da70b43 Initial load
duke
parents:
diff changeset
   856
    if (buffs == NULL) return MLIB_FAILURE;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   857
  }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   858
90ce3da70b43 Initial load
duke
parents:
diff changeset
   859
  buff = buffs + 2*(n + 1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   860
90ce3da70b43 Initial load
duke
parents:
diff changeset
   861
  sl = adr_src;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   862
#ifdef CONV_INDEX
90ce3da70b43 Initial load
duke
parents:
diff changeset
   863
  dl = adr_dst + dn*dll + dm;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   864
#else
90ce3da70b43 Initial load
duke
parents:
diff changeset
   865
  dl = adr_dst + dn*dll + dm*NCHAN;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   866
#endif
90ce3da70b43 Initial load
duke
parents:
diff changeset
   867
90ce3da70b43 Initial load
duke
parents:
diff changeset
   868
  ssize = NCHAN*wid;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   869
  dsize = (ssize + 7)/8;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   870
  esize = dsize + 4;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   871
  pbuff = mlib_malloc((n + 4)*esize*sizeof(mlib_d64));
90ce3da70b43 Initial load
duke
parents:
diff changeset
   872
  if (pbuff == NULL) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   873
    if (buffs != buffs_local) mlib_free(buffs);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   874
    return MLIB_FAILURE;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   875
  }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   876
90ce3da70b43 Initial load
duke
parents:
diff changeset
   877
  for (i = 0; i < (n + 1); i++) buffs[i] = pbuff + i*esize;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   878
  for (i = 0; i < (n + 1); i++) buffs[(n + 1) + i] = buffs[i];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   879
  buffd = buffs[n] + esize;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   880
  buffe = buffd + 2*esize;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   881
90ce3da70b43 Initial load
duke
parents:
diff changeset
   882
  wid -= (m - 1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   883
  hgt -= (n - 1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   884
  xsize = ssize - NCHAN*(m - 1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   885
  emask = (0xFF00 >> (xsize & 7)) & 0xFF;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   886
90ce3da70b43 Initial load
duke
parents:
diff changeset
   887
  vis_write_gsr(gsr_scale + 7);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   888
90ce3da70b43 Initial load
duke
parents:
diff changeset
   889
  for (l = 0; l < n; l++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   890
    mlib_d64 *buffn = buffs[l];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   891
    sp = sl + l*sll;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   892
90ce3da70b43 Initial load
duke
parents:
diff changeset
   893
#ifndef CONV_INDEX
90ce3da70b43 Initial load
duke
parents:
diff changeset
   894
    if ((mlib_addr)sp & 7) mlib_ImageCopy_na((void*)sp, (void*)buffn, ssize);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   895
#else
90ce3da70b43 Initial load
duke
parents:
diff changeset
   896
#pragma pipeloop(0)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   897
    for (i = 0; i < dsize; i += 3) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   898
      LOAD_SRC();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   899
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   900
#endif /* CONV_INDEX */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   901
  }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   902
90ce3da70b43 Initial load
duke
parents:
diff changeset
   903
  /* init buffer */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   904
#pragma pipeloop(0)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   905
  for (i = 0; i < (xsize + 7)/8; i++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   906
    buffd[2*i    ] = drnd;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   907
    buffd[2*i + 1] = drnd;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   908
  }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   909
90ce3da70b43 Initial load
duke
parents:
diff changeset
   910
  for (j = 0; j < hgt; j++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   911
    mlib_d64 **buffc = buffs + buff_ind;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   912
    mlib_f32 *pk = karr, k0, k1, k2, k3;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   913
    sp = sl + n*sll;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   914
90ce3da70b43 Initial load
duke
parents:
diff changeset
   915
    for (l = 0; l < n; l++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   916
      buff[l] = buffc[l];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   917
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   918
    buffn  = buffc[n];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   919
90ce3da70b43 Initial load
duke
parents:
diff changeset
   920
#ifndef CONV_INDEX
90ce3da70b43 Initial load
duke
parents:
diff changeset
   921
    for (l = 0; l < n; l++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   922
      if ((((mlib_addr)(sl + l*sll)) & 7) == 0) buff[l] = (mlib_d64*)(sl + l*sll);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   923
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   924
    if ((mlib_addr)sp & 7) mlib_ImageCopy_na((void*)sp, (void*)buffn, ssize);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   925
#endif
90ce3da70b43 Initial load
duke
parents:
diff changeset
   926
90ce3da70b43 Initial load
duke
parents:
diff changeset
   927
#ifdef CONV_INDEX
90ce3da70b43 Initial load
duke
parents:
diff changeset
   928
    ik_last = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   929
#else
90ce3da70b43 Initial load
duke
parents:
diff changeset
   930
    ik_last = (m - 1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   931
#endif
90ce3da70b43 Initial load
duke
parents:
diff changeset
   932
90ce3da70b43 Initial load
duke
parents:
diff changeset
   933
    for (jk = 0; jk < n; jk += jk_size) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   934
      jk_size = n - jk;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   935
#ifdef CONV_INDEX
90ce3da70b43 Initial load
duke
parents:
diff changeset
   936
      if (jk_size >= 5) jk_size = 3;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   937
      if (jk_size == 4) jk_size = 2;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   938
#else
90ce3da70b43 Initial load
duke
parents:
diff changeset
   939
      if (jk_size >= 6) jk_size = 4;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   940
      if (jk_size == 5) jk_size = 3;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   941
#endif
90ce3da70b43 Initial load
duke
parents:
diff changeset
   942
      coff = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   943
90ce3da70b43 Initial load
duke
parents:
diff changeset
   944
      if (jk_size == 2) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   945
90ce3da70b43 Initial load
duke
parents:
diff changeset
   946
        for (ik = 0; ik < m; ik++, coff += NCHAN) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   947
          if (!jk && ik == ik_last) continue;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   948
90ce3da70b43 Initial load
duke
parents:
diff changeset
   949
          k0 = pk[ik];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   950
          k1 = pk[ik + m];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   951
90ce3da70b43 Initial load
duke
parents:
diff changeset
   952
          doff  = coff/8;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   953
          buff0 = buff[jk    ] + doff;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   954
          buff1 = buff[jk + 1] + doff;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   955
90ce3da70b43 Initial load
duke
parents:
diff changeset
   956
          off = coff & 7;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   957
          vis_write_gsr(gsr_scale + off);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   958
90ce3da70b43 Initial load
duke
parents:
diff changeset
   959
          s01 = buff0[0];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   960
          s11 = buff1[0];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   961
#pragma pipeloop(0)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   962
          for (i = 0; i < (xsize + 7)/8; i++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   963
            s00 = s01;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   964
            s10 = s11;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   965
            s01 = buff0[i + 1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   966
            s11 = buff1[i + 1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   967
            s0  = vis_faligndata(s00, s01);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   968
            s1  = vis_faligndata(s10, s11);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   969
90ce3da70b43 Initial load
duke
parents:
diff changeset
   970
            d00 = vis_fmul8x16au(vis_read_hi(s0), k0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   971
            d01 = vis_fmul8x16au(vis_read_lo(s0), k0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   972
            d10 = vis_fmul8x16au(vis_read_hi(s1), k1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   973
            d11 = vis_fmul8x16au(vis_read_lo(s1), k1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   974
90ce3da70b43 Initial load
duke
parents:
diff changeset
   975
            d0 = buffd[2*i];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   976
            d1 = buffd[2*i + 1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   977
            d0 = vis_fpadd16(d00, d0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   978
            d0 = vis_fpadd16(d10, d0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   979
            d1 = vis_fpadd16(d01, d1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   980
            d1 = vis_fpadd16(d11, d1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   981
            buffd[2*i] = d0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   982
            buffd[2*i + 1] = d1;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   983
          }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   984
90ce3da70b43 Initial load
duke
parents:
diff changeset
   985
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   986
90ce3da70b43 Initial load
duke
parents:
diff changeset
   987
        pk += 2*m;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   988
90ce3da70b43 Initial load
duke
parents:
diff changeset
   989
      } else if (jk_size == 3) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   990
90ce3da70b43 Initial load
duke
parents:
diff changeset
   991
        for (ik = 0; ik < m; ik++, coff += NCHAN) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   992
          if (!jk && ik == ik_last) continue;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   993
90ce3da70b43 Initial load
duke
parents:
diff changeset
   994
          k0 = pk[ik];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   995
          k1 = pk[ik + m];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   996
          k2 = pk[ik + 2*m];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   997
90ce3da70b43 Initial load
duke
parents:
diff changeset
   998
          doff  = coff/8;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   999
          buff0 = buff[jk    ] + doff;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1000
          buff1 = buff[jk + 1] + doff;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1001
          buff2 = buff[jk + 2] + doff;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1002
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1003
          off = coff & 7;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1004
          vis_write_gsr(gsr_scale + off);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1005
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1006
          if (off == 0) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1007
#pragma pipeloop(0)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1008
            for (i = 0; i < (xsize + 7)/8; i++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1009
              d0 = buffd[2*i];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1010
              d1 = buffd[2*i + 1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1011
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1012
              s0 = buff0[i];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1013
              s1 = buff1[i];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1014
              s2 = buff2[i];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1015
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1016
              d00 = vis_fmul8x16au(vis_read_hi(s0), k0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1017
              d01 = vis_fmul8x16au(vis_read_lo(s0), k0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1018
              d10 = vis_fmul8x16au(vis_read_hi(s1), k1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1019
              d11 = vis_fmul8x16au(vis_read_lo(s1), k1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1020
              d20 = vis_fmul8x16au(vis_read_hi(s2), k2);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1021
              d21 = vis_fmul8x16au(vis_read_lo(s2), k2);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1022
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1023
              d00 = vis_fpadd16(d00, d10);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1024
              d0  = vis_fpadd16(d20, d0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1025
              d0  = vis_fpadd16(d00, d0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1026
              d01 = vis_fpadd16(d01, d11);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1027
              d1  = vis_fpadd16(d21, d1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1028
              d1  = vis_fpadd16(d01, d1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1029
              buffd[2*i] = d0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1030
              buffd[2*i + 1] = d1;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1031
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1032
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1033
          } else if (off == 4) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1034
            s01 = buff0[0];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1035
            s11 = buff1[0];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1036
            s21 = buff2[0];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1037
#pragma pipeloop(0)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1038
            for (i = 0; i < (xsize + 7)/8; i++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1039
              d0 = buffd[2*i];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1040
              d1 = buffd[2*i + 1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1041
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1042
              s00 = s01;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1043
              s10 = s11;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1044
              s20 = s21;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1045
              s01 = buff0[i + 1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1046
              s11 = buff1[i + 1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1047
              s21 = buff2[i + 1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1048
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1049
              d00 = vis_fmul8x16au(vis_read_lo(s00), k0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1050
              d01 = vis_fmul8x16au(vis_read_hi(s01), k0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1051
              d10 = vis_fmul8x16au(vis_read_lo(s10), k1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1052
              d11 = vis_fmul8x16au(vis_read_hi(s11), k1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1053
              d20 = vis_fmul8x16au(vis_read_lo(s20), k2);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1054
              d21 = vis_fmul8x16au(vis_read_hi(s21), k2);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1055
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1056
              d00 = vis_fpadd16(d00, d10);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1057
              d0  = vis_fpadd16(d20, d0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1058
              d0  = vis_fpadd16(d00, d0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1059
              d01 = vis_fpadd16(d01, d11);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1060
              d1  = vis_fpadd16(d21, d1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1061
              d1  = vis_fpadd16(d01, d1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1062
              buffd[2*i] = d0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1063
              buffd[2*i + 1] = d1;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1064
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1065
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1066
          } else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1067
            s01 = buff0[0];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1068
            s11 = buff1[0];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1069
            s21 = buff2[0];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1070
#pragma pipeloop(0)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1071
            for (i = 0; i < (xsize + 7)/8; i++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1072
              d0 = buffd[2*i];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1073
              d1 = buffd[2*i + 1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1074
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1075
              s00 = s01;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1076
              s10 = s11;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1077
              s20 = s21;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1078
              s01 = buff0[i + 1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1079
              s11 = buff1[i + 1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1080
              s21 = buff2[i + 1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1081
              s0  = vis_faligndata(s00, s01);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1082
              s1  = vis_faligndata(s10, s11);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1083
              s2  = vis_faligndata(s20, s21);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1084
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1085
              d00 = vis_fmul8x16au(vis_read_hi(s0), k0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1086
              d01 = vis_fmul8x16au(vis_read_lo(s0), k0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1087
              d10 = vis_fmul8x16au(vis_read_hi(s1), k1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1088
              d11 = vis_fmul8x16au(vis_read_lo(s1), k1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1089
              d20 = vis_fmul8x16au(vis_read_hi(s2), k2);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1090
              d21 = vis_fmul8x16au(vis_read_lo(s2), k2);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1091
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1092
              d00 = vis_fpadd16(d00, d10);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1093
              d0  = vis_fpadd16(d20, d0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1094
              d0  = vis_fpadd16(d00, d0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1095
              d01 = vis_fpadd16(d01, d11);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1096
              d1  = vis_fpadd16(d21, d1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1097
              d1  = vis_fpadd16(d01, d1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1098
              buffd[2*i] = d0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1099
              buffd[2*i + 1] = d1;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1100
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1101
          }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1102
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1103
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1104
        pk += 3*m;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1105
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1106
      } else { /* jk_size == 4 */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1107
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1108
        for (ik = 0; ik < m; ik++, coff += NCHAN) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1109
          if (!jk && ik == ik_last) continue;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1110
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1111
          k0 = pk[ik];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1112
          k1 = pk[ik + m];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1113
          k2 = pk[ik + 2*m];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1114
          k3 = pk[ik + 3*m];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1115
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1116
          doff  = coff/8;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1117
          buff0 = buff[jk    ] + doff;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1118
          buff1 = buff[jk + 1] + doff;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1119
          buff2 = buff[jk + 2] + doff;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1120
          buff3 = buff[jk + 3] + doff;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1121
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1122
          off = coff & 7;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1123
          vis_write_gsr(gsr_scale + off);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1124
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1125
          if (off == 0) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1126
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1127
#pragma pipeloop(0)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1128
            for (i = 0; i < (xsize + 7)/8; i++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1129
              d0 = buffd[2*i];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1130
              d1 = buffd[2*i + 1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1131
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1132
              s0 = buff0[i];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1133
              s1 = buff1[i];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1134
              s2 = buff2[i];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1135
              s3 = buff3[i];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1136
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1137
              d00 = vis_fmul8x16au(vis_read_hi(s0), k0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1138
              d01 = vis_fmul8x16au(vis_read_lo(s0), k0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1139
              d10 = vis_fmul8x16au(vis_read_hi(s1), k1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1140
              d11 = vis_fmul8x16au(vis_read_lo(s1), k1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1141
              d20 = vis_fmul8x16au(vis_read_hi(s2), k2);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1142
              d21 = vis_fmul8x16au(vis_read_lo(s2), k2);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1143
              d30 = vis_fmul8x16au(vis_read_hi(s3), k3);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1144
              d31 = vis_fmul8x16au(vis_read_lo(s3), k3);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1145
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1146
              d00 = vis_fpadd16(d00, d10);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1147
              d20 = vis_fpadd16(d20, d30);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1148
              d0  = vis_fpadd16(d0,  d00);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1149
              d0  = vis_fpadd16(d0,  d20);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1150
              d01 = vis_fpadd16(d01, d11);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1151
              d21 = vis_fpadd16(d21, d31);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1152
              d1  = vis_fpadd16(d1,  d01);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1153
              d1  = vis_fpadd16(d1,  d21);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1154
              buffd[2*i] = d0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1155
              buffd[2*i + 1] = d1;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1156
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1157
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1158
          } else if (off == 4) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1159
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1160
            s01 = buff0[0];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1161
            s11 = buff1[0];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1162
            s21 = buff2[0];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1163
            s31 = buff3[0];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1164
#pragma pipeloop(0)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1165
            for (i = 0; i < (xsize + 7)/8; i++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1166
              d0 = buffd[2*i];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1167
              d1 = buffd[2*i + 1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1168
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1169
              s00 = s01;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1170
              s10 = s11;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1171
              s20 = s21;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1172
              s30 = s31;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1173
              s01 = buff0[i + 1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1174
              s11 = buff1[i + 1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1175
              s21 = buff2[i + 1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1176
              s31 = buff3[i + 1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1177
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1178
              d00 = vis_fmul8x16au(vis_read_lo(s00), k0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1179
              d01 = vis_fmul8x16au(vis_read_hi(s01), k0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1180
              d10 = vis_fmul8x16au(vis_read_lo(s10), k1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1181
              d11 = vis_fmul8x16au(vis_read_hi(s11), k1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1182
              d20 = vis_fmul8x16au(vis_read_lo(s20), k2);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1183
              d21 = vis_fmul8x16au(vis_read_hi(s21), k2);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1184
              d30 = vis_fmul8x16au(vis_read_lo(s30), k3);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1185
              d31 = vis_fmul8x16au(vis_read_hi(s31), k3);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1186
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1187
              d00 = vis_fpadd16(d00, d10);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1188
              d20 = vis_fpadd16(d20, d30);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1189
              d0  = vis_fpadd16(d0,  d00);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1190
              d0  = vis_fpadd16(d0,  d20);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1191
              d01 = vis_fpadd16(d01, d11);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1192
              d21 = vis_fpadd16(d21, d31);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1193
              d1  = vis_fpadd16(d1,  d01);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1194
              d1  = vis_fpadd16(d1,  d21);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1195
              buffd[2*i] = d0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1196
              buffd[2*i + 1] = d1;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1197
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1198
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1199
          } else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1200
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1201
            s01 = buff0[0];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1202
            s11 = buff1[0];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1203
            s21 = buff2[0];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1204
            s31 = buff3[0];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1205
#pragma pipeloop(0)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1206
            for (i = 0; i < (xsize + 7)/8; i++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1207
              d0 = buffd[2*i];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1208
              d1 = buffd[2*i + 1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1209
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1210
              s00 = s01;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1211
              s10 = s11;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1212
              s20 = s21;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1213
              s30 = s31;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1214
              s01 = buff0[i + 1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1215
              s11 = buff1[i + 1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1216
              s21 = buff2[i + 1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1217
              s31 = buff3[i + 1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1218
              s0  = vis_faligndata(s00, s01);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1219
              s1  = vis_faligndata(s10, s11);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1220
              s2  = vis_faligndata(s20, s21);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1221
              s3  = vis_faligndata(s30, s31);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1222
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1223
              d00 = vis_fmul8x16au(vis_read_hi(s0), k0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1224
              d01 = vis_fmul8x16au(vis_read_lo(s0), k0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1225
              d10 = vis_fmul8x16au(vis_read_hi(s1), k1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1226
              d11 = vis_fmul8x16au(vis_read_lo(s1), k1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1227
              d20 = vis_fmul8x16au(vis_read_hi(s2), k2);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1228
              d21 = vis_fmul8x16au(vis_read_lo(s2), k2);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1229
              d30 = vis_fmul8x16au(vis_read_hi(s3), k3);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1230
              d31 = vis_fmul8x16au(vis_read_lo(s3), k3);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1231
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1232
              d00 = vis_fpadd16(d00, d10);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1233
              d20 = vis_fpadd16(d20, d30);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1234
              d0  = vis_fpadd16(d0,  d00);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1235
              d0  = vis_fpadd16(d0,  d20);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1236
              d01 = vis_fpadd16(d01, d11);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1237
              d21 = vis_fpadd16(d21, d31);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1238
              d1  = vis_fpadd16(d1,  d01);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1239
              d1  = vis_fpadd16(d1,  d21);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1240
              buffd[2*i] = d0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1241
              buffd[2*i + 1] = d1;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1242
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1243
          }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1244
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1245
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1246
        pk += 4*m;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1247
      }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1248
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1249
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1250
    /*****************************************
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1251
     *****************************************
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1252
     **          Final iteration            **
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1253
     *****************************************
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1254
     *****************************************/
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1255
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1256
    jk_size = n;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1257
#ifdef CONV_INDEX
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1258
    if (jk_size >= 5) jk_size = 3;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1259
    if (jk_size == 4) jk_size = 2;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1260
#else
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1261
    if (jk_size >= 6) jk_size = 4;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1262
    if (jk_size == 5) jk_size = 3;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1263
#endif
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1264
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1265
    k0 = karr[ik_last];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1266
    k1 = karr[ik_last + m];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1267
    k2 = karr[ik_last + 2*m];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1268
    k3 = karr[ik_last + 3*m];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1269
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1270
    off  = ik_last*NCHAN;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1271
    doff = off/8;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1272
    off &= 7;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1273
    buff0 = buff[0] + doff;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1274
    buff1 = buff[1] + doff;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1275
    buff2 = buff[2] + doff;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1276
    buff3 = buff[3] + doff;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1277
    vis_write_gsr(gsr_scale + off);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1278
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1279
#ifndef CONV_INDEX
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1280
    if (jk_size == 2) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1281
      dp = ((mlib_addr)dl & 7) ? buffe : (mlib_d64*)dl;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1282
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1283
      s01 = buff0[0];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1284
      s11 = buff1[0];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1285
#pragma pipeloop(0)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1286
      for (i = 0; i < xsize/8; i++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1287
        s00 = s01;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1288
        s10 = s11;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1289
        s01 = buff0[i + 1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1290
        s11 = buff1[i + 1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1291
        s0  = vis_faligndata(s00, s01);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1292
        s1  = vis_faligndata(s10, s11);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1293
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1294
        d00 = vis_fmul8x16au(vis_read_hi(s0), k0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1295
        d01 = vis_fmul8x16au(vis_read_lo(s0), k0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1296
        d10 = vis_fmul8x16au(vis_read_hi(s1), k1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1297
        d11 = vis_fmul8x16au(vis_read_lo(s1), k1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1298
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1299
        d0 = buffd[2*i];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1300
        d1 = buffd[2*i + 1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1301
        d0 = vis_fpadd16(d0, d00);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1302
        d0 = vis_fpadd16(d0, d10);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1303
        d1 = vis_fpadd16(d1, d01);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1304
        d1 = vis_fpadd16(d1, d11);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1305
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1306
        dd = vis_fpack16_pair(d0, d1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1307
        dp[i] = dd;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1308
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1309
        buffd[2*i    ] = drnd;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1310
        buffd[2*i + 1] = drnd;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1311
      }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1312
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1313
      if (emask) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1314
        s00 = s01;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1315
        s10 = s11;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1316
        s01 = buff0[i + 1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1317
        s11 = buff1[i + 1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1318
        s0  = vis_faligndata(s00, s01);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1319
        s1  = vis_faligndata(s10, s11);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1320
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1321
        d00 = vis_fmul8x16au(vis_read_hi(s0), k0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1322
        d01 = vis_fmul8x16au(vis_read_lo(s0), k0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1323
        d10 = vis_fmul8x16au(vis_read_hi(s1), k1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1324
        d11 = vis_fmul8x16au(vis_read_lo(s1), k1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1325
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1326
        d0 = buffd[2*i];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1327
        d1 = buffd[2*i + 1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1328
        d0 = vis_fpadd16(d0, d00);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1329
        d0 = vis_fpadd16(d0, d10);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1330
        d1 = vis_fpadd16(d1, d01);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1331
        d1 = vis_fpadd16(d1, d11);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1332
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1333
        dd = vis_fpack16_pair(d0, d1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1334
        vis_pst_8(dd, dp + i, emask);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1335
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1336
        buffd[2*i    ] = drnd;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1337
        buffd[2*i + 1] = drnd;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1338
      }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1339
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1340
      if ((mlib_u8*)dp != dl) mlib_ImageCopy_na((void*)buffe, dl, xsize);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1341
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1342
    } else if (jk_size == 3) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1343
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1344
      dp = ((mlib_addr)dl & 7) ? buffe : (mlib_d64*)dl;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1345
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1346
      s01 = buff0[0];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1347
      s11 = buff1[0];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1348
      s21 = buff2[0];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1349
#pragma pipeloop(0)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1350
      for (i = 0; i < xsize/8; i++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1351
        s00 = s01;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1352
        s10 = s11;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1353
        s20 = s21;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1354
        s01 = buff0[i + 1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1355
        s11 = buff1[i + 1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1356
        s21 = buff2[i + 1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1357
        s0  = vis_faligndata(s00, s01);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1358
        s1  = vis_faligndata(s10, s11);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1359
        s2  = vis_faligndata(s20, s21);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1360
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1361
        d00 = vis_fmul8x16au(vis_read_hi(s0), k0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1362
        d01 = vis_fmul8x16au(vis_read_lo(s0), k0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1363
        d10 = vis_fmul8x16au(vis_read_hi(s1), k1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1364
        d11 = vis_fmul8x16au(vis_read_lo(s1), k1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1365
        d20 = vis_fmul8x16au(vis_read_hi(s2), k2);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1366
        d21 = vis_fmul8x16au(vis_read_lo(s2), k2);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1367
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1368
        d0 = buffd[2*i];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1369
        d1 = buffd[2*i + 1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1370
        d0 = vis_fpadd16(d0, d00);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1371
        d0 = vis_fpadd16(d0, d10);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1372
        d0 = vis_fpadd16(d0, d20);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1373
        d1 = vis_fpadd16(d1, d01);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1374
        d1 = vis_fpadd16(d1, d11);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1375
        d1 = vis_fpadd16(d1, d21);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1376
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1377
        dd = vis_fpack16_pair(d0, d1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1378
        dp[i] = dd;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1379
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1380
        buffd[2*i    ] = drnd;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1381
        buffd[2*i + 1] = drnd;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1382
      }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1383
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1384
      if (emask) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1385
        s00 = s01;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1386
        s10 = s11;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1387
        s20 = s21;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1388
        s01 = buff0[i + 1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1389
        s11 = buff1[i + 1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1390
        s21 = buff2[i + 1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1391
        s0  = vis_faligndata(s00, s01);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1392
        s1  = vis_faligndata(s10, s11);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1393
        s2  = vis_faligndata(s20, s21);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1394
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1395
        d00 = vis_fmul8x16au(vis_read_hi(s0), k0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1396
        d01 = vis_fmul8x16au(vis_read_lo(s0), k0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1397
        d10 = vis_fmul8x16au(vis_read_hi(s1), k1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1398
        d11 = vis_fmul8x16au(vis_read_lo(s1), k1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1399
        d20 = vis_fmul8x16au(vis_read_hi(s2), k2);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1400
        d21 = vis_fmul8x16au(vis_read_lo(s2), k2);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1401
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1402
        d0 = buffd[2*i];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1403
        d1 = buffd[2*i + 1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1404
        d0 = vis_fpadd16(d0, d00);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1405
        d0 = vis_fpadd16(d0, d10);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1406
        d0 = vis_fpadd16(d0, d20);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1407
        d1 = vis_fpadd16(d1, d01);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1408
        d1 = vis_fpadd16(d1, d11);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1409
        d1 = vis_fpadd16(d1, d21);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1410
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1411
        dd = vis_fpack16_pair(d0, d1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1412
        vis_pst_8(dd, dp + i, emask);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1413
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1414
        buffd[2*i    ] = drnd;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1415
        buffd[2*i + 1] = drnd;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1416
      }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1417
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1418
      if ((mlib_u8*)dp != dl) mlib_ImageCopy_na((void*)buffe, dl, xsize);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1419
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1420
    } else /* if (jk_size == 4) */ {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1421
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1422
      dp = ((mlib_addr)dl & 7) ? buffe : (mlib_d64*)dl;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1423
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1424
      s01 = buff0[0];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1425
      s11 = buff1[0];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1426
      s21 = buff2[0];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1427
      s31 = buff3[0];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1428
#pragma pipeloop(0)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1429
      for (i = 0; i < xsize/8; i++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1430
        s00 = s01;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1431
        s10 = s11;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1432
        s20 = s21;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1433
        s30 = s31;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1434
        s01 = buff0[i + 1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1435
        s11 = buff1[i + 1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1436
        s21 = buff2[i + 1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1437
        s31 = buff3[i + 1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1438
        s0  = vis_faligndata(s00, s01);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1439
        s1  = vis_faligndata(s10, s11);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1440
        s2  = vis_faligndata(s20, s21);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1441
        s3  = vis_faligndata(s30, s31);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1442
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1443
        d00 = vis_fmul8x16au(vis_read_hi(s0), k0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1444
        d01 = vis_fmul8x16au(vis_read_lo(s0), k0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1445
        d10 = vis_fmul8x16au(vis_read_hi(s1), k1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1446
        d11 = vis_fmul8x16au(vis_read_lo(s1), k1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1447
        d20 = vis_fmul8x16au(vis_read_hi(s2), k2);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1448
        d21 = vis_fmul8x16au(vis_read_lo(s2), k2);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1449
        d30 = vis_fmul8x16au(vis_read_hi(s3), k3);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1450
        d31 = vis_fmul8x16au(vis_read_lo(s3), k3);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1451
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1452
        d0 = buffd[2*i];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1453
        d1 = buffd[2*i + 1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1454
        d0 = vis_fpadd16(d0, d00);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1455
        d0 = vis_fpadd16(d0, d10);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1456
        d0 = vis_fpadd16(d0, d20);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1457
        d0 = vis_fpadd16(d0, d30);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1458
        d1 = vis_fpadd16(d1, d01);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1459
        d1 = vis_fpadd16(d1, d11);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1460
        d1 = vis_fpadd16(d1, d21);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1461
        d1 = vis_fpadd16(d1, d31);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1462
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1463
        dd = vis_fpack16_pair(d0, d1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1464
        dp[i] = dd;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1465
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1466
        buffd[2*i    ] = drnd;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1467
        buffd[2*i + 1] = drnd;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1468
      }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1469
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1470
      if (emask) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1471
        s00 = s01;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1472
        s10 = s11;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1473
        s20 = s21;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1474
        s30 = s31;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1475
        s01 = buff0[i + 1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1476
        s11 = buff1[i + 1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1477
        s21 = buff2[i + 1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1478
        s31 = buff3[i + 1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1479
        s0  = vis_faligndata(s00, s01);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1480
        s1  = vis_faligndata(s10, s11);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1481
        s2  = vis_faligndata(s20, s21);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1482
        s3  = vis_faligndata(s30, s31);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1483
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1484
        d00 = vis_fmul8x16au(vis_read_hi(s0), k0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1485
        d01 = vis_fmul8x16au(vis_read_lo(s0), k0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1486
        d10 = vis_fmul8x16au(vis_read_hi(s1), k1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1487
        d11 = vis_fmul8x16au(vis_read_lo(s1), k1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1488
        d20 = vis_fmul8x16au(vis_read_hi(s2), k2);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1489
        d21 = vis_fmul8x16au(vis_read_lo(s2), k2);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1490
        d30 = vis_fmul8x16au(vis_read_hi(s3), k3);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1491
        d31 = vis_fmul8x16au(vis_read_lo(s3), k3);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1492
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1493
        d0 = buffd[2*i];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1494
        d1 = buffd[2*i + 1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1495
        d0 = vis_fpadd16(d0, d00);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1496
        d0 = vis_fpadd16(d0, d10);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1497
        d0 = vis_fpadd16(d0, d20);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1498
        d0 = vis_fpadd16(d0, d30);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1499
        d1 = vis_fpadd16(d1, d01);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1500
        d1 = vis_fpadd16(d1, d11);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1501
        d1 = vis_fpadd16(d1, d21);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1502
        d1 = vis_fpadd16(d1, d31);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1503
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1504
        dd = vis_fpack16_pair(d0, d1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1505
        vis_pst_8(dd, dp + i, emask);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1506
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1507
        buffd[2*i    ] = drnd;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1508
        buffd[2*i + 1] = drnd;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1509
      }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1510
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1511
      if ((mlib_u8*)dp != dl) mlib_ImageCopy_na((void*)buffe, dl, xsize);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1512
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1513
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1514
#else /* CONV_INDEX */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1515
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1516
    if (jk_size == 2) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1517
      vis_write_gsr(gsr_scale + 7);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1518
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1519
#pragma pipeloop(0)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1520
      for (i = 0; i < dsize; i += 3) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1521
        mlib_d64 d00, d01, d02, d03, d04, d05;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1522
        mlib_d64 d10, d11, d12, d13, d14, d15;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1523
        mlib_d64 d0, d1, d2, d3, d4, d5;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1524
        mlib_d64 s00 = buff0[i];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1525
        mlib_d64 s01 = buff0[i + 1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1526
        mlib_d64 s02 = buff0[i + 2];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1527
        mlib_d64 s10 = buff1[i];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1528
        mlib_d64 s11 = buff1[i + 1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1529
        mlib_d64 s12 = buff1[i + 2];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1530
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1531
        d00 = vis_fmul8x16au(vis_read_hi(s00), k0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1532
        d01 = vis_fmul8x16au(vis_read_lo(s00), k0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1533
        d02 = vis_fmul8x16au(vis_read_hi(s01), k0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1534
        d03 = vis_fmul8x16au(vis_read_lo(s01), k0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1535
        d04 = vis_fmul8x16au(vis_read_hi(s02), k0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1536
        d05 = vis_fmul8x16au(vis_read_lo(s02), k0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1537
        d10 = vis_fmul8x16au(vis_read_hi(s10), k1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1538
        d11 = vis_fmul8x16au(vis_read_lo(s10), k1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1539
        d12 = vis_fmul8x16au(vis_read_hi(s11), k1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1540
        d13 = vis_fmul8x16au(vis_read_lo(s11), k1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1541
        d14 = vis_fmul8x16au(vis_read_hi(s12), k1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1542
        d15 = vis_fmul8x16au(vis_read_lo(s12), k1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1543
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1544
        d0 = buffd[2*i];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1545
        d1 = buffd[2*i + 1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1546
        d2 = buffd[2*i + 2];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1547
        d3 = buffd[2*i + 3];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1548
        d4 = buffd[2*i + 4];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1549
        d5 = buffd[2*i + 5];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1550
        d0 = vis_fpadd16(d0, d00);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1551
        d0 = vis_fpadd16(d0, d10);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1552
        d1 = vis_fpadd16(d1, d01);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1553
        d1 = vis_fpadd16(d1, d11);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1554
        d2 = vis_fpadd16(d2, d02);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1555
        d2 = vis_fpadd16(d2, d12);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1556
        d3 = vis_fpadd16(d3, d03);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1557
        d3 = vis_fpadd16(d3, d13);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1558
        d4 = vis_fpadd16(d4, d04);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1559
        d4 = vis_fpadd16(d4, d14);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1560
        d5 = vis_fpadd16(d5, d05);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1561
        d5 = vis_fpadd16(d5, d15);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1562
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1563
        buffe[i    ] = vis_fpack16_pair(d0, d1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1564
        buffe[i + 1] = vis_fpack16_pair(d2, d3);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1565
        buffe[i + 2] = vis_fpack16_pair(d4, d5);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1566
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1567
        buffd[2*i    ] = drnd;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1568
        buffd[2*i + 1] = drnd;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1569
        buffd[2*i + 2] = drnd;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1570
        buffd[2*i + 3] = drnd;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1571
        buffd[2*i + 4] = drnd;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1572
        buffd[2*i + 5] = drnd;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1573
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1574
        LOAD_SRC();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1575
      }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1576
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1577
    } else /* if (jk_size == 3) */ {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1578
      vis_write_gsr(gsr_scale + 7);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1579
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1580
#pragma pipeloop(0)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1581
      for (i = 0; i < dsize; i += 3) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1582
        mlib_d64 d00, d01, d02, d03, d04, d05;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1583
        mlib_d64 d10, d11, d12, d13, d14, d15;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1584
        mlib_d64 d20, d21, d22, d23, d24, d25;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1585
        mlib_d64 d0, d1, d2, d3, d4, d5;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1586
        mlib_d64 s00 = buff0[i];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1587
        mlib_d64 s01 = buff0[i + 1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1588
        mlib_d64 s02 = buff0[i + 2];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1589
        mlib_d64 s10 = buff1[i];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1590
        mlib_d64 s11 = buff1[i + 1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1591
        mlib_d64 s12 = buff1[i + 2];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1592
        mlib_d64 s20 = buff2[i];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1593
        mlib_d64 s21 = buff2[i + 1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1594
        mlib_d64 s22 = buff2[i + 2];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1595
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1596
        d00 = vis_fmul8x16au(vis_read_hi(s00), k0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1597
        d01 = vis_fmul8x16au(vis_read_lo(s00), k0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1598
        d02 = vis_fmul8x16au(vis_read_hi(s01), k0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1599
        d03 = vis_fmul8x16au(vis_read_lo(s01), k0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1600
        d04 = vis_fmul8x16au(vis_read_hi(s02), k0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1601
        d05 = vis_fmul8x16au(vis_read_lo(s02), k0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1602
        d10 = vis_fmul8x16au(vis_read_hi(s10), k1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1603
        d11 = vis_fmul8x16au(vis_read_lo(s10), k1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1604
        d12 = vis_fmul8x16au(vis_read_hi(s11), k1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1605
        d13 = vis_fmul8x16au(vis_read_lo(s11), k1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1606
        d14 = vis_fmul8x16au(vis_read_hi(s12), k1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1607
        d15 = vis_fmul8x16au(vis_read_lo(s12), k1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1608
        d20 = vis_fmul8x16au(vis_read_hi(s20), k2);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1609
        d21 = vis_fmul8x16au(vis_read_lo(s20), k2);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1610
        d22 = vis_fmul8x16au(vis_read_hi(s21), k2);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1611
        d23 = vis_fmul8x16au(vis_read_lo(s21), k2);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1612
        d24 = vis_fmul8x16au(vis_read_hi(s22), k2);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1613
        d25 = vis_fmul8x16au(vis_read_lo(s22), k2);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1614
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1615
        d0 = buffd[2*i];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1616
        d1 = buffd[2*i + 1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1617
        d2 = buffd[2*i + 2];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1618
        d3 = buffd[2*i + 3];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1619
        d4 = buffd[2*i + 4];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1620
        d5 = buffd[2*i + 5];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1621
        d0 = vis_fpadd16(d0, d00);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1622
        d0 = vis_fpadd16(d0, d10);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1623
        d0 = vis_fpadd16(d0, d20);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1624
        d1 = vis_fpadd16(d1, d01);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1625
        d1 = vis_fpadd16(d1, d11);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1626
        d1 = vis_fpadd16(d1, d21);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1627
        d2 = vis_fpadd16(d2, d02);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1628
        d2 = vis_fpadd16(d2, d12);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1629
        d2 = vis_fpadd16(d2, d22);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1630
        d3 = vis_fpadd16(d3, d03);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1631
        d3 = vis_fpadd16(d3, d13);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1632
        d3 = vis_fpadd16(d3, d23);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1633
        d4 = vis_fpadd16(d4, d04);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1634
        d4 = vis_fpadd16(d4, d14);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1635
        d4 = vis_fpadd16(d4, d24);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1636
        d5 = vis_fpadd16(d5, d05);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1637
        d5 = vis_fpadd16(d5, d15);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1638
        d5 = vis_fpadd16(d5, d25);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1639
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1640
        buffe[i    ] = vis_fpack16_pair(d0, d1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1641
        buffe[i + 1] = vis_fpack16_pair(d2, d3);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1642
        buffe[i + 2] = vis_fpack16_pair(d4, d5);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1643
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1644
        buffd[2*i    ] = drnd;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1645
        buffd[2*i + 1] = drnd;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1646
        buffd[2*i + 2] = drnd;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1647
        buffd[2*i + 3] = drnd;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1648
        buffd[2*i + 4] = drnd;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1649
        buffd[2*i + 5] = drnd;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1650
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1651
        LOAD_SRC();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1652
      }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1653
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1654
#endif /* CONV_INDEX */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1655
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1656
#ifdef CONV_INDEX
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1657
    mlib_ImageColorTrue2IndexLine_U8_S16_3((void*)buffe, dl, wid, colormap);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1658
#endif /* CONV_INDEX */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1659
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1660
    sl += sll;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1661
    dl += dll;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1662
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1663
    buff_ind++;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1664
    if (buff_ind >= (n + 1)) buff_ind = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1665
  }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1666
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1667
  mlib_free(pbuff);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1668
  if (buffs != buffs_local) mlib_free(buffs);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1669
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1670
  return MLIB_SUCCESS;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1671
}
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1672
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1673
/***************************************************************/