/* This source file and the corresponding header were ripped from an in-house
   library. It is distributed under the same license as the rest of SGL. */
#include "bilib4.h"

#if defined(__powerpc__) || defined(__ppc__)
#define POWERPC 1
#endif

#if POWERPC
#define P2RGB() \
__asm__("rlwimi %0,%1,16,0xFF" : "=r"(r) : "r"(*p)); \
__asm__("rlwimi %0,%1,24,0xFF" : "=r"(g) : "r"(*p)); \
b = *p
#else
#define P2RGB() \
r = *p >> 16; \
g = *p >> 8; \
b = *p
#endif

void bi4(SDL_Surface* src,
	 SDL_Surface* dst) {
#if POWERPC
  Uint32 pixie;
#endif
  Uint32* d, *p;
  int y, x;
  int sx, sy, ex, ey;
  int rsx, rex, rsy, rey;
  int cx, cy;
  int tr, tg, tb, tc;
  int sk;
  Uint8 r, g, b;
  d = (Uint32*)dst->pixels;
  for(y = 0; y < dst->h; ++y) {
    for(x = 0; x < dst->w; ++x) {
      sy = ((y * src->h) << 3) / dst->h;
      rsy = sy >> 3;
      sy &= 7;
      sy = 8 - sy;
      ey = ((y * src->h + src->h) << 3) / dst->h;
      rey = ey >> 3;
      ey &= 7;
      if(ey == 0) {
        ey = 8;
        --rey;
      }
      sx = ((x * src->w) << 3) / dst->w;
      rsx = sx >> 3;
      sx &= 7;
      sx = 8 - sx;
      ex = ((x * src->w + src->w) << 3) / dst->w;
      rex = ex >> 3;
      ex &= 7;
      if(ex == 0) {
        ex = 8;
        --rex;
      }
      if(rsy == rey) {
        p = (Uint32*)((Uint8*)src->pixels + rsy * src->pitch) + rsx;
        if(rsx == rex) {
          *(d++) = *p;
          continue;
        }
        else {
          tc = sx;
          P2RGB();
          ++p;
          tr = r * sx;
          tg = g * sx;
          tb = b * sx;
          for(cx = rsx + 1; cx < rex; ++cx) {
            tc += 8;
            P2RGB();
            ++p;
            tr += r * 8;
            tg += g * 8;
            tb += b * 8;
          }
          tc += ex;
          P2RGB();
          tr += r * ex;
          tg += g * ex;
          tb += b * ex;
        }
      }
      else {
        p = (Uint32*)((Uint8*)src->pixels + rsy * src->pitch) + rsx;
        sk = src->pitch - (rex - rsx) * 4;
        if(rsx == rex) {
          tc = sy;
          P2RGB();
          tr = r * sy;
          tg = g * sy;
          tb = b * sy;
          p = (Uint32*)((Uint8*)p + sk);
          for(cy = rsy + 1; cy < rey; ++cy) {
            tc += 8;
            P2RGB();
            tr += r * 8;
            tg += g * 8;
            tb += b * 8;
            p = (Uint32*)((Uint8*)p + sk);
          }
          tc += ey;
          P2RGB();
          tr += r * ey;
          tg += g * ey;
          tb += b * ey;
        }
        else {
          tc = sx * sy;
          P2RGB();
          ++p;
          tr = r * sx * sy;
          tg = g * sx * sy;
          tb = b * sx * sy;
          for(cx = rsx + 1; cx < rex; ++cx) {
            tc += 8 * sy;
            P2RGB();
            ++p;
            tr += r * 8 * sy;
            tg += g * 8 * sy;
            tb += b * 8 * sy;
          }
          tc += ex * sy;
          P2RGB();
          tr += r * ex * sy;
          tg += g * ex * sy;
          tb += b * ex * sy;
          p = (Uint32*)((Uint8*)p + sk);
          for(cy = rsy + 1; cy < rey; ++cy) {
            tc += sx * 8;
            P2RGB();
            ++p;
            tr += r * sx * 8;
            tg += g * sx * 8;
            tb += b * sx * 8;
            for(cx = rsx + 1; cx < rex; ++cx) {
              tc += 64;
              P2RGB();
              ++p;
              tr += r * 64;
              tg += g * 64;
              tb += b * 64;
            }
            tc += ex * 8;
            P2RGB();
            tr += r * ex * 8;
            tg += g * ex * 8;
            tb += b * ex * 8;
            p = (Uint32*)((Uint8*)p + sk);
          }
          tc += sx * ey;
          P2RGB();
          ++p;
          tr += r * sx * ey;
          tg += g * sx * ey;
          tb += b * sx * ey;
          for(cx = rsx + 1; cx < rex; ++cx) {
            tc += 8 * ey;
            P2RGB();
            ++p;
            tr += r * 8 * ey;
            tg += g * 8 * ey;
            tb += b * 8 * ey;
          }
          tc += ex * ey;
          P2RGB();
          tr += r * ex * ey;
          tg += g * ex * ey;
          tb += b * ex * ey;
        }
      }
#if POWERPC
      __asm__("rlwimi %0,%2,16,0xFF0000" : "=r"(pixie) : "0"(tb / tc), "r"(tr / tc));
      __asm__("rlwimi %0,%2,8,0xFF00" : "=r"(pixie) : "0"(pixie), "r"(tg / tc));
      *(d++) = pixie;
#else
      *(d++) = ((tr / tc) << 16) | ((tg / tc) << 8) | (tb / tc);
#endif
    }
  }
}
