jdk/src/share/native/sun/awt/medialib/mlib_c_ImageCopy.c
author bae
Fri, 25 Mar 2011 12:50:59 +0300
changeset 8939 04615dca2a76
parent 5506 202f599c92aa
permissions -rw-r--r--
6989717: media native code compiler warnings Reviewed-by: jgodinez, prr

/*
 * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.  Oracle designates this
 * particular file as subject to the "Classpath" exception as provided
 * by Oracle in the LICENSE file that accompanied this code.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
 */


/*
 * FUNCTIONS
 *      mlib_ImageCopy - Direct copy from one image to another.
 *
 * SYNOPSIS
 *      mlib_status mlib_ImageCopy(mlib_image       *dst,
 *                                 const mlib_image *src);
 *
 * ARGUMENT
 *      dst     pointer to output or destination image
 *      src     pointer to input or source image
 *
 * RESTRICTION
 *      src and dst must have the same size, type and number of channels.
 *      They can have 1, 2, 3 or 4 channels of MLIB_BIT, MLIB_BYTE, MLIB_SHORT,
 *      MLIB_USHORT, MLIB_INT, MLIB_FLOAT or MLIB_DOUBLE data type.
 *
 * DESCRIPTION
 *      Direct copy from one image to another
 */

#include <stdlib.h>
#include "mlib_image.h"
#include "mlib_ImageCheck.h"
#include "mlib_ImageCopy.h"

/***************************************************************/
#ifdef _MSC_VER
#pragma optimize("", off)                   /* Fix bug 4195132 */
#endif /* _MSC_VER */

/***************************************************************/
/* do not perform the coping by mlib_d64 data type for x86 */
#ifdef i386

typedef struct {
  mlib_s32 int0, int1;
} two_int;

#define TYPE_64BIT two_int

#else /* i386 */

#define TYPE_64BIT mlib_d64
#endif /* i386 */

/***************************************************************/
static void mlib_c_ImageCopy_u8(const mlib_image *src,
                                mlib_image       *dst);
static void mlib_c_ImageCopy_s16(const mlib_image *src,
                                 mlib_image       *dst);
static void mlib_c_ImageCopy_s32(const mlib_image *src,
                                 mlib_image       *dst);
static void mlib_c_ImageCopy_d64(const mlib_image *src,
                                 mlib_image       *dst);
static void mlib_c_ImageCopy_a1(const TYPE_64BIT *sp,
                                TYPE_64BIT       *dp,
                                mlib_s32         size);

/***************************************************************/
mlib_status mlib_ImageCopy(mlib_image       *dst,
                           const mlib_image *src)
{
  mlib_s32 s_offset, d_offset;
  mlib_s32 size, s_stride, d_stride;
  mlib_s32 width;                                     /* width in bytes of src and dst */
  mlib_s32 height;                                    /* height in lines of src and dst */
  mlib_u8 *sa, *da;
  mlib_s32 j;

  MLIB_IMAGE_CHECK(src);
  MLIB_IMAGE_CHECK(dst);
  MLIB_IMAGE_TYPE_EQUAL(src, dst);
  MLIB_IMAGE_CHAN_EQUAL(src, dst);
  MLIB_IMAGE_SIZE_EQUAL(src, dst);

  switch (mlib_ImageGetType(dst)) {
    case MLIB_BIT:
      width = mlib_ImageGetWidth(dst) * mlib_ImageGetChannels(dst); /* size in bits */
      height = mlib_ImageGetHeight(src);
      sa = (mlib_u8 *) mlib_ImageGetData(src);
      da = (mlib_u8 *) mlib_ImageGetData(dst);

      if (!mlib_ImageIsNotOneDvector(src) && !mlib_ImageIsNotOneDvector(dst)) {
        size = height * (width >> 3);
        if (!mlib_ImageIsNotAligned8(src) && !mlib_ImageIsNotAligned8(dst) && ((size & 7) == 0)) {

          mlib_c_ImageCopy_a1((TYPE_64BIT *) sa, (TYPE_64BIT *) da, size >> 3);
        }
        else {

          mlib_ImageCopy_na(sa, da, size);
        }
      }
      else {
        s_stride = mlib_ImageGetStride(src);
        d_stride = mlib_ImageGetStride(dst);
        s_offset = mlib_ImageGetBitOffset(src); /* in bits */
        d_offset = mlib_ImageGetBitOffset(dst); /* in bits */
        if (s_offset == d_offset) {
          for (j = 0; j < height; j++) {
            mlib_ImageCopy_bit_al(sa, da, width, s_offset);
            sa += s_stride;
            da += d_stride;
          }
        }
        else {
          for (j = 0; j < height; j++) {
            mlib_ImageCopy_bit_na(sa, da, width, s_offset, d_offset);
            sa += s_stride;
            da += d_stride;
          }
        }
      }

      break;
    case MLIB_BYTE:
      mlib_c_ImageCopy_u8(src, dst);
      break;
    case MLIB_SHORT:
    case MLIB_USHORT:
      mlib_c_ImageCopy_s16(src, dst);
      break;
    case MLIB_INT:
    case MLIB_FLOAT:
      mlib_c_ImageCopy_s32(src, dst);
      break;
    case MLIB_DOUBLE:
      mlib_c_ImageCopy_d64(src, dst);
      break;
    default:
      return MLIB_FAILURE;                  /* MLIB_BIT is not supported here */
  }

  return MLIB_SUCCESS;
}

/***************************************************************/
#define PREPAREVARS(type)                                        \
  type *psrc = (type *) mlib_ImageGetData(src);                  \
  type *pdst = (type *) mlib_ImageGetData(dst);                  \
  mlib_s32 src_height = mlib_ImageGetHeight(src);                \
  mlib_s32 src_width  = mlib_ImageGetWidth(src);                 \
  mlib_s32 src_stride = mlib_ImageGetStride(src) / sizeof(type); \
  mlib_s32 dst_stride = mlib_ImageGetStride(dst) / sizeof(type); \
  mlib_s32 chan = mlib_ImageGetChannels(dst);                    \
  mlib_s32 i, j;                                                 \
                                                                 \
  src_width *= chan;                                             \
  if (src_width == src_stride && src_width == dst_stride) {      \
    src_width *= src_height;                                     \
    src_height = 1;                                              \
  }

/***************************************************************/
#define STRIP(pd, ps, w, h, data_type) {                        \
  data_type s0, s1;                                             \
  for ( i = 0; i < h; i++ ) {                                   \
    if (j = w & 1)                                              \
      pd[i * dst_stride] = ps[i * src_stride];                  \
    for (; j < w; j += 2) {                                     \
      s0 = ps[i * src_stride + j];                              \
      s1 = ps[i * src_stride + j + 1];                          \
      pd[i * dst_stride + j]   = s0;                            \
      pd[i * dst_stride + j + 1] = s1;                          \
    }                                                           \
  }                                                             \
}

/***************************************************************/
/*
 * Both bit offsets of source and distination are the same
 */

void mlib_ImageCopy_bit_al(const mlib_u8 *sa,
                           mlib_u8       *da,
                           mlib_s32      size,
                           mlib_s32      offset)
{
  mlib_s32 b_size, i, j;
  TYPE_64BIT *sp, *dp;
  mlib_u8 mask0 = 0xFF;
  mlib_u8 src, mask;

  if (size <= 0) return;

  if (size <= (8 - offset)) {
    mask = mask0 << (8 - size);
    mask >>= offset;
    src = da[0];
    da[0] = (src & (~mask)) | (sa[0] & mask);
    return;
  }

  mask = mask0 >> offset;
  src = da[0];
  da[0] = (src & (~mask)) | (sa[0] & mask);
  da++;
  sa++;
  size = size - 8 + offset;
  b_size = size >> 3;                       /* size in bytes */

  for (j = 0; (j < b_size) && (((mlib_addr) da & 7) != 0); j++)
    *da++ = *sa++;

  if ((((mlib_addr) sa ^ (mlib_addr) da) & 7) == 0) {
    sp = (TYPE_64BIT *) sa;
    dp = (TYPE_64BIT *) da;
#ifdef __SUNPRO_C
#pragma pipeloop(0)
#endif /* __SUNPRO_C */
    for (i = 0; j <= (b_size - 8); j += 8, i++) {
      dp[i] = sp[i];
    }

    sa += i << 3;
    da += i << 3;
  }
  else {
#ifdef _NO_LONGLONG
    if ((((mlib_addr) sa ^ (mlib_addr) da) & 3) == 0) {
      mlib_u32 *pws, *pwd;

      pws = (mlib_u32 *) sa;
      pwd = (mlib_u32 *) da;
#ifdef __SUNPRO_C
#pragma pipeloop(0)
#endif /* __SUNPRO_C */
      for (i = 0; j <= (b_size - 4); j += 4, i++) {
        pwd[i] = pws[i];
      }

      sa += i << 2;
      da += i << 2;
    }
    else {
      mlib_u32 *pws, *pwd, src0, src1;
      mlib_s32 lshift = (mlib_addr) sa & 3, rshift;

      pwd = (mlib_u32 *) da;
      pws = (mlib_u32 *) (sa - lshift);
      lshift <<= 3;
      rshift = 32 - lshift;

      src1 = pws[0];
#ifdef __SUNPRO_C
#pragma pipeloop(0)
#endif /* __SUNPRO_C */
      for (i = 0; j <= (b_size - 4); j += 4, i++) {
        src0 = src1;
        src1 = pws[i + 1];
#ifdef _LITTLE_ENDIAN
        pwd[i] = (src0 >> lshift) | (src1 << rshift);
#else
        pwd[i] = (src0 << lshift) | (src1 >> rshift);
#endif /* _LITTLE_ENDIAN */
      }

      sa += i << 2;
      da += i << 2;
    }

#else
    mlib_u64 *pws, *pwd, src0, src1;
    mlib_s32 lshift = (mlib_s32) ((mlib_addr) sa & 7), rshift;

    pwd = (mlib_u64 *) da;
    pws = (mlib_u64 *) (sa - lshift);
    lshift <<= 3;
    rshift = 64 - lshift;

    src1 = pws[0];
#ifdef __SUNPRO_C
#pragma pipeloop(0)
#endif /* __SUNPRO_C */
    for (i = 0; j <= (b_size - 8); j += 8, i++) {
      src0 = src1;
      src1 = pws[i + 1];
      pwd[i] = (src0 << lshift) | (src1 >> rshift);
    }

    sa += i << 3;
    da += i << 3;
#endif /* _NO_LONGLONG */
  }

  for (; j < b_size; j++)
    *da++ = *sa++;

  j = size & 7;

  if (j > 0) {
    mask = mask0 << (8 - j);
    src = da[0];
    da[0] = (src & (~mask)) | (sa[0] & mask);
  }
}

/***************************************************************/
void mlib_c_ImageCopy_u8(const mlib_image *src,
                         mlib_image       *dst)
{
  PREPAREVARS(mlib_u8);
  if (src_width < 16) {
    STRIP(pdst, psrc, src_width, src_height, mlib_u8);
    return;
  }

  for (i = 0; i < src_height; i++) {
    mlib_u8 *psrc_row = psrc + i * src_stride, *pdst_row = pdst + i * dst_stride;

    if (!(((mlib_addr) psrc_row ^ (mlib_addr) pdst_row) & 7)) {
      for (j = 0; j < (mlib_s32) ((8 - (mlib_addr) psrc_row) & 7); j++) {
        pdst_row[j] = psrc_row[j];
      }

#ifdef __SUNPRO_C
#pragma pipeloop(0)
#endif /* __SUNPRO_C */
      for (; j <= (src_width - 8); j += 8) {
        TYPE_64BIT dsrc0 = *((TYPE_64BIT *) (psrc_row + j));

        *((TYPE_64BIT *) (pdst_row + j)) = dsrc0;
      }
    }
    else {

#ifdef _NO_LONGLONG

      for (j = 0; j < (mlib_s32) ((4 - (mlib_addr) pdst_row) & 3); j++) {
        pdst_row[j] = psrc_row[j];
      }

      if (!(((mlib_addr) psrc_row ^ (mlib_addr) pdst_row) & 3)) {
#ifdef __SUNPRO_C
#pragma pipeloop(0)
#endif /* __SUNPRO_C */
        for (; j <= (src_width - 4); j += 4) {
          *((mlib_s32 *) (pdst_row + j)) = *((mlib_s32 *) (psrc_row + j));
        }
      }
      else {
        mlib_u32 *ps, shl, shr, src0, src1;

        ps = (mlib_u32 *) (psrc_row + j);
        shl = (mlib_addr) ps & 3;
        ps = (mlib_u32 *) ((mlib_addr) ps - shl);
        shl <<= 3;
        shr = 32 - shl;

        src1 = ps[0];
#ifdef __SUNPRO_C
#pragma pipeloop(0)
#endif /* __SUNPRO_C */
        for (; j <= (src_width - 4); j += 4) {
          src0 = src1;
          src1 = ps[1];
#ifdef _LITTLE_ENDIAN
          *((mlib_s32 *) (pdst_row + j)) = (src0 >> shl) | (src1 << shr);
#else
          *((mlib_s32 *) (pdst_row + j)) = (src0 << shl) | (src1 >> shr);
#endif /* _LITTLE_ENDIAN */
          ps++;
        }
      }

#else

      for (j = 0; j < (mlib_s32) ((8 - (mlib_addr) pdst_row) & 7); j++) {
        pdst_row[j] = psrc_row[j];
      }

      {
        mlib_s32 shl, shr;
        mlib_u64 *ps, src0, src1;

        ps = (mlib_u64 *) (psrc_row + j);
        /* shl and shr are in range [0, 64] */
        shl = (mlib_s32) ((mlib_addr) ps & 7);
        ps = (mlib_u64 *) ((mlib_addr) ps - shl);
        shl <<= 3;
        shr = 64 - shl;

        src1 = ps[0];
#ifdef __SUNPRO_C
#pragma pipeloop(0)
#endif /* __SUNPRO_C */
        for (; j <= (src_width - 8); j += 8) {
          src0 = src1;
          src1 = ps[1];
#ifdef _LITTLE_ENDIAN
          *((mlib_s64 *) (pdst_row + j)) = (src0 >> shl) | (src1 << shr);
#else
          *((mlib_s64 *) (pdst_row + j)) = (src0 << shl) | (src1 >> shr);
#endif /* _LITTLE_ENDIAN */
          ps++;
        }
      }
#endif /* _NO_LONGLONG */
    }

    for (; j < src_width; j++)
      pdst_row[j] = psrc_row[j];
  }
}

/***************************************************************/
void mlib_c_ImageCopy_s16(const mlib_image       *src,
                          mlib_image *dst)
{
  PREPAREVARS(mlib_u16);
  if (src_width < 8) {
    STRIP(pdst, psrc, src_width, src_height, mlib_u16);
    return;
  }

  for (i = 0; i < src_height; i++) {
    mlib_u16 *psrc_row = psrc + i * src_stride, *pdst_row = pdst + i * dst_stride;

    if (!(((mlib_addr) psrc_row ^ (mlib_addr) pdst_row) & 7)) {
      for (j = 0; j < (mlib_s32) (((8 - (mlib_addr) psrc_row) & 7) >> 1); j++) {
        pdst_row[j] = psrc_row[j];
      }

#ifdef __SUNPRO_C
#pragma pipeloop(0)
#endif /* __SUNPRO_C */
      for (; j <= (src_width - 4); j += 4) {
        TYPE_64BIT dsrc0 = *((TYPE_64BIT *) (psrc_row + j));

        *((TYPE_64BIT *) (pdst_row + j)) = dsrc0;
      }
    }
    else {

#ifdef _NO_LONGLONG

      if (j = (((mlib_addr) pdst_row & 2) != 0)) {
        pdst_row[0] = psrc_row[0];
      }

      if (!(((mlib_addr) psrc_row ^ (mlib_addr) pdst_row) & 3)) {
#ifdef __SUNPRO_C
#pragma pipeloop(0)
#endif /* __SUNPRO_C */
        for (; j <= (src_width - 2); j += 2) {
          *((mlib_s32 *) (pdst_row + j)) = *((mlib_s32 *) (psrc_row + j));
        }
      }
      else {
        mlib_u32 *ps, src0, src1;

        ps = (mlib_u32 *) (psrc_row + j - 1);
        src1 = ps[0];
#ifdef __SUNPRO_C
#pragma pipeloop(0)
#endif /* __SUNPRO_C */
        for (; j <= (src_width - 2); j += 2) {
          src0 = src1;
          src1 = ps[1];
#ifdef _LITTLE_ENDIAN
          *((mlib_s32 *) (pdst_row + j)) = (src0 >> 16) | (src1 << 16);
#else
          *((mlib_s32 *) (pdst_row + j)) = (src0 << 16) | (src1 >> 16);
#endif /* _LITTLE_ENDIAN */
          ps++;
        }
      }

#else

      for (j = 0; j < (mlib_s32) (((8 - (mlib_addr) pdst_row) & 7) >> 1); j++) {
        pdst_row[j] = psrc_row[j];
      }

      {
        mlib_s32 shl, shr;
        mlib_u64 *ps, src0, src1;

        ps = (mlib_u64 *) (psrc_row + j);
        shl = (mlib_s32) ((mlib_addr) ps & 7);
        ps = (mlib_u64 *) ((mlib_addr) ps - shl);
        shl <<= 3;
        shr = 64 - shl;

        src1 = ps[0];
#ifdef __SUNPRO_C
#pragma pipeloop(0)
#endif /* __SUNPRO_C */
        for (; j <= (src_width - 4); j += 4) {
          src0 = src1;
          src1 = ps[1];
#ifdef _LITTLE_ENDIAN
          *((mlib_s64 *) (pdst_row + j)) = (src0 >> shl) | (src1 << shr);
#else
          *((mlib_s64 *) (pdst_row + j)) = (src0 << shl) | (src1 >> shr);
#endif /* _LITTLE_ENDIAN */
          ps++;
        }
      }
#endif /* _NO_LONGLONG */
    }

    for (; j < src_width; j++)
      pdst_row[j] = psrc_row[j];
  }
}

/***************************************************************/
void mlib_c_ImageCopy_s32(const mlib_image       *src,
                          mlib_image *dst)
{
  PREPAREVARS(mlib_u32);
  if (src_width < 4) {
    STRIP(pdst, psrc, src_width, src_height, mlib_u32);
    return;
  }

  for (i = 0; i < src_height; i++) {
    mlib_u32 *psrc_row = psrc + i * src_stride, *pdst_row = pdst + i * dst_stride;

    if (!(((mlib_addr) psrc_row ^ (mlib_addr) pdst_row) & 7)) {
      if (j = ((mlib_s32) ((mlib_addr) psrc_row & 4) >> 2)) {
        pdst_row[0] = psrc_row[0];
      }

#ifdef __SUNPRO_C
#pragma pipeloop(0)
#endif /* __SUNPRO_C */
      for (; j <= (src_width - 2); j += 2) {
        TYPE_64BIT dsrc0 = *((TYPE_64BIT *) (psrc_row + j));

        *((TYPE_64BIT *) (pdst_row + j)) = dsrc0;
      }
    }
    else {

#ifdef _NO_LONGLONG

#ifdef __SUNPRO_C
#pragma pipeloop(0)
#endif /* __SUNPRO_C */
      for (j = 0; j <= (src_width - 1); j++) {
        *((mlib_s32 *) (pdst_row + j)) = *((mlib_s32 *) (psrc_row + j));
      }

#else

      {
        mlib_u64 *ps, src0, src1;

        if (j = ((mlib_s32) ((mlib_addr) pdst_row & 4) >> 2))
          pdst_row[0] = psrc_row[0];
        ps = (mlib_u64 *) (psrc_row + j - 1);
        src1 = ps[0];
#ifdef __SUNPRO_C
#pragma pipeloop(0)
#endif /* __SUNPRO_C */
        for (; j <= (src_width - 2); j += 2) {
          src0 = src1;
          src1 = ps[1];
#ifdef _LITTLE_ENDIAN
          *((mlib_s64 *) (pdst_row + j)) = (src0 >> 32) | (src1 << 32);
#else
          *((mlib_s64 *) (pdst_row + j)) = (src0 << 32) | (src1 >> 32);
#endif /* _LITTLE_ENDIAN */
          ps++;
        }
      }
#endif /* _NO_LONGLONG */
    }

    for (; j < src_width; j++)
      pdst_row[j] = psrc_row[j];
  }
}

/***************************************************************/
void mlib_c_ImageCopy_d64(const mlib_image       *src,
                          mlib_image *dst)
{
  PREPAREVARS(mlib_d64);
  for (i = 0; i < src_height; i++) {
    mlib_d64 *psrc_row = psrc + i * src_stride, *pdst_row = pdst + i * dst_stride;

#ifdef __SUNPRO_C
#pragma pipeloop(0)
#endif /* __SUNPRO_C */
    for (j = 0; j < src_width; j++)
      *((mlib_d64 *) (pdst_row + j)) = *((mlib_d64 *) (psrc_row + j));
  }
}

/***************************************************************/
/*
 * Both source and destination image data are 1 - d vectors and
 * 8 - byte aligned. And size is in 8 - bytes.
 */

void mlib_c_ImageCopy_a1(const TYPE_64BIT *sp,
                         TYPE_64BIT       *dp,
                         mlib_s32         size)
{
  mlib_s32 i;

#ifdef __SUNPRO_C
#pragma pipeloop(0)
#endif /* __SUNPRO_C */
  for (i = 0; i < size; i++) {
    *dp++ = *sp++;
  }
}

/***************************************************************/
#ifndef _NO_LONGLONG
#define TYPE    mlib_u64
#define BSIZE   64
#define SIZE    8
#else
#define TYPE    mlib_u32
#define BSIZE   32
#define SIZE    4
#endif /* _NO_LONGLONG */

/***************************************************************/
void mlib_ImageCopy_na(const mlib_u8 *sp,
                       mlib_u8       *dp,
                       mlib_s32      n)
{
  mlib_s32 shr, shl;
  TYPE *tmp, s0, s1;

  if (((mlib_addr) sp ^ (mlib_addr) dp) & 7) {

#ifdef __SUNPRO_C
#pragma pipeloop(0)
#endif /* __SUNPRO_C */
    for (; (n > 0) && (mlib_addr) dp & (SIZE - 1); n--)
      *dp++ = *sp++;

#ifdef _NO_LONGLONG

    if (((mlib_addr) sp & (SIZE - 1)) == 0) {
      for (; n > SIZE; n -= SIZE) {
        *(TYPE *) dp = *(TYPE *) sp;
        dp += SIZE;
        sp += SIZE;
      }
    }
    else
#endif /* _NO_LONGLONG */
    {
      tmp = (TYPE *) ((mlib_addr) sp & ~(SIZE - 1));
      /* shl and shr do not exceed 64 here */
      shl = (mlib_s32) (((mlib_addr) sp & (SIZE - 1)) << 3);
      shr = BSIZE - shl;
      s0 = *tmp++;

#ifdef __SUNPRO_C
#pragma pipeloop(0)
#endif /* __SUNPRO_C */
      for (; n > SIZE; n -= SIZE) {
        s1 = *tmp++;
#ifdef _LITTLE_ENDIAN
        *(TYPE *) dp = (s0 >> shl) | (s1 << shr);
#else
        *(TYPE *) dp = (s0 << shl) | (s1 >> shr);
#endif /* _LITTLE_ENDIAN */
        s0 = s1;
        dp += SIZE;
        sp += SIZE;
      }
    }
  }
  else {
#ifdef __SUNPRO_C
#pragma pipeloop(0)
#endif /* __SUNPRO_C */
    for (; (n > 0) && (mlib_addr) dp & 7; n--)
      *dp++ = *sp++;

#ifdef __SUNPRO_C
#pragma pipeloop(0)
#endif /* __SUNPRO_C */
    for (; n > 8; n -= 8) {
      *(TYPE_64BIT *) dp = *(TYPE_64BIT *) sp;
      dp += 8;
      sp += 8;
    }
  }

#ifdef __SUNPRO_C
#pragma pipeloop(0)
#endif /* __SUNPRO_C */
  for (; n > 0; n--)
    *dp++ = *sp++;
}

/***************************************************************/
#ifdef _MSC_VER
#pragma optimize("", on)
#endif /* _MSC_VER */

/***************************************************************/