mirror of
https://github.com/ultimatepp/ultimatepp.git
synced 2026-05-17 22:03:07 -06:00
1064 lines
30 KiB
C
1064 lines
30 KiB
C
/*
|
|
* Copyright © 2007 Luca Barbato
|
|
*
|
|
* Permission to use, copy, modify, distribute, and sell this software and its
|
|
* documentation for any purpose is hereby granted without fee, provided that
|
|
* the above copyright notice appear in all copies and that both that
|
|
* copyright notice and this permission notice appear in supporting
|
|
* documentation, and that the name of Luca Barbato not be used in advertising or
|
|
* publicity pertaining to distribution of the software without specific,
|
|
* written prior permission. Luca Barbato makes no representations about the
|
|
* suitability of this software for any purpose. It is provided "as is"
|
|
* without express or implied warranty.
|
|
*
|
|
* THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
|
|
* SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
|
|
* FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
|
* SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
|
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
|
|
* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
|
|
* OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
|
|
* SOFTWARE.
|
|
*
|
|
* Author: Luca Barbato (lu_zero@gentoo.org)
|
|
*
|
|
* Based on fbmmx.c by Owen Taylor, Søren Sandmann and Nicholas Miell
|
|
*/
|
|
|
|
#include <config.h>
|
|
#include "pixman-vmx.h"
|
|
#include "pixman-combine32.h"
|
|
#include <altivec.h>
|
|
|
|
static force_inline vector unsigned int
|
|
splat_alpha (vector unsigned int pix) {
|
|
return vec_perm (pix, pix,
|
|
(vector unsigned char)AVV(0x00,0x00,0x00,0x00, 0x04,0x04,0x04,0x04,
|
|
0x08,0x08,0x08,0x08, 0x0C,0x0C,0x0C,0x0C));
|
|
}
|
|
|
|
static force_inline vector unsigned int
|
|
pix_multiply (vector unsigned int p, vector unsigned int a)
|
|
{
|
|
vector unsigned short hi, lo, mod;
|
|
/* unpack to short */
|
|
hi = (vector unsigned short)
|
|
vec_mergeh ((vector unsigned char)AVV(0),
|
|
(vector unsigned char)p);
|
|
mod = (vector unsigned short)
|
|
vec_mergeh ((vector unsigned char)AVV(0),
|
|
(vector unsigned char)a);
|
|
|
|
hi = vec_mladd (hi, mod, (vector unsigned short)
|
|
AVV(0x0080,0x0080,0x0080,0x0080,
|
|
0x0080,0x0080,0x0080,0x0080));
|
|
|
|
hi = vec_adds (hi, vec_sr (hi, vec_splat_u16 (8)));
|
|
|
|
hi = vec_sr (hi, vec_splat_u16 (8));
|
|
|
|
/* unpack to short */
|
|
lo = (vector unsigned short)
|
|
vec_mergel ((vector unsigned char)AVV(0),
|
|
(vector unsigned char)p);
|
|
mod = (vector unsigned short)
|
|
vec_mergel ((vector unsigned char)AVV(0),
|
|
(vector unsigned char)a);
|
|
|
|
lo = vec_mladd (lo, mod, (vector unsigned short)
|
|
AVV(0x0080,0x0080,0x0080,0x0080,
|
|
0x0080,0x0080,0x0080,0x0080));
|
|
|
|
lo = vec_adds (lo, vec_sr (lo, vec_splat_u16 (8)));
|
|
|
|
lo = vec_sr (lo, vec_splat_u16 (8));
|
|
|
|
return (vector unsigned int)vec_packsu (hi, lo);
|
|
}
|
|
|
|
static force_inline vector unsigned int
|
|
pix_add (vector unsigned int a, vector unsigned int b)
|
|
{
|
|
return (vector unsigned int)vec_adds ((vector unsigned char)a,
|
|
(vector unsigned char)b);
|
|
}
|
|
|
|
static force_inline vector unsigned int
|
|
pix_add_mul (vector unsigned int x, vector unsigned int a,
|
|
vector unsigned int y, vector unsigned int b)
|
|
{
|
|
vector unsigned short hi, lo, mod, hiy, loy, mody;
|
|
|
|
hi = (vector unsigned short)
|
|
vec_mergeh ((vector unsigned char)AVV(0),
|
|
(vector unsigned char)x);
|
|
mod = (vector unsigned short)
|
|
vec_mergeh ((vector unsigned char)AVV(0),
|
|
(vector unsigned char)a);
|
|
hiy = (vector unsigned short)
|
|
vec_mergeh ((vector unsigned char)AVV(0),
|
|
(vector unsigned char)y);
|
|
mody = (vector unsigned short)
|
|
vec_mergeh ((vector unsigned char)AVV(0),
|
|
(vector unsigned char)b);
|
|
|
|
hi = vec_mladd (hi, mod, (vector unsigned short)
|
|
AVV(0x0080,0x0080,0x0080,0x0080,
|
|
0x0080,0x0080,0x0080,0x0080));
|
|
|
|
hi = vec_mladd (hiy, mody, hi);
|
|
|
|
hi = vec_adds (hi, vec_sr (hi, vec_splat_u16 (8)));
|
|
|
|
hi = vec_sr (hi, vec_splat_u16 (8));
|
|
|
|
lo = (vector unsigned short)
|
|
vec_mergel ((vector unsigned char)AVV(0),
|
|
(vector unsigned char)x);
|
|
mod = (vector unsigned short)
|
|
vec_mergel ((vector unsigned char)AVV(0),
|
|
(vector unsigned char)a);
|
|
|
|
loy = (vector unsigned short)
|
|
vec_mergel ((vector unsigned char)AVV(0),
|
|
(vector unsigned char)y);
|
|
mody = (vector unsigned short)
|
|
vec_mergel ((vector unsigned char)AVV(0),
|
|
(vector unsigned char)b);
|
|
|
|
lo = vec_mladd (lo, mod, (vector unsigned short)
|
|
AVV(0x0080,0x0080,0x0080,0x0080,
|
|
0x0080,0x0080,0x0080,0x0080));
|
|
|
|
lo = vec_mladd (loy, mody, lo);
|
|
|
|
lo = vec_adds (lo, vec_sr (lo, vec_splat_u16 (8)));
|
|
|
|
lo = vec_sr (lo, vec_splat_u16 (8));
|
|
|
|
return (vector unsigned int)vec_packsu (hi, lo);
|
|
}
|
|
|
|
static force_inline vector unsigned int
|
|
negate (vector unsigned int src)
|
|
{
|
|
return vec_nor (src, src);
|
|
}
|
|
/* dest*~srca + src */
|
|
static force_inline vector unsigned int
|
|
over (vector unsigned int src, vector unsigned int srca,
|
|
vector unsigned int dest)
|
|
{
|
|
vector unsigned char tmp = (vector unsigned char)
|
|
pix_multiply (dest, negate (srca));
|
|
tmp = vec_adds ((vector unsigned char)src, tmp);
|
|
return (vector unsigned int)tmp;
|
|
}
|
|
|
|
/* in == pix_multiply */
|
|
#define in_over(src, srca, mask, dest) over (pix_multiply (src, mask),\
|
|
pix_multiply (srca, mask), dest)
|
|
|
|
|
|
#define COMPUTE_SHIFT_MASK(source) \
|
|
source ## _mask = vec_lvsl (0, source);
|
|
|
|
#define COMPUTE_SHIFT_MASKS(dest, source) \
|
|
dest ## _mask = vec_lvsl (0, dest); \
|
|
source ## _mask = vec_lvsl (0, source); \
|
|
store_mask = vec_lvsr (0, dest);
|
|
|
|
#define COMPUTE_SHIFT_MASKC(dest, source, mask) \
|
|
mask ## _mask = vec_lvsl (0, mask); \
|
|
dest ## _mask = vec_lvsl (0, dest); \
|
|
source ## _mask = vec_lvsl (0, source); \
|
|
store_mask = vec_lvsr (0, dest);
|
|
|
|
/* notice you have to declare temp vars...
|
|
* Note: tmp3 and tmp4 must remain untouched!
|
|
*/
|
|
|
|
#define LOAD_VECTORS(dest, source) \
|
|
tmp1 = (typeof(tmp1))vec_ld(0, source); \
|
|
tmp2 = (typeof(tmp2))vec_ld(15, source); \
|
|
tmp3 = (typeof(tmp3))vec_ld(0, dest); \
|
|
v ## source = (typeof(v ## source)) \
|
|
vec_perm(tmp1, tmp2, source ## _mask); \
|
|
tmp4 = (typeof(tmp4))vec_ld(15, dest); \
|
|
v ## dest = (typeof(v ## dest)) \
|
|
vec_perm(tmp3, tmp4, dest ## _mask);
|
|
|
|
#define LOAD_VECTORSC(dest, source, mask) \
|
|
tmp1 = (typeof(tmp1))vec_ld(0, source); \
|
|
tmp2 = (typeof(tmp2))vec_ld(15, source); \
|
|
tmp3 = (typeof(tmp3))vec_ld(0, dest); \
|
|
v ## source = (typeof(v ## source)) \
|
|
vec_perm(tmp1, tmp2, source ## _mask); \
|
|
tmp4 = (typeof(tmp4))vec_ld(15, dest); \
|
|
tmp1 = (typeof(tmp1))vec_ld(0, mask); \
|
|
v ## dest = (typeof(v ## dest)) \
|
|
vec_perm(tmp3, tmp4, dest ## _mask); \
|
|
tmp2 = (typeof(tmp2))vec_ld(15, mask); \
|
|
v ## mask = (typeof(v ## mask)) \
|
|
vec_perm(tmp1, tmp2, mask ## _mask);
|
|
#define STORE_VECTOR(dest) \
|
|
edges = vec_perm (tmp4, tmp3, dest ## _mask); \
|
|
tmp3 = vec_perm ((vector unsigned char)v ## dest, edges, store_mask); \
|
|
tmp1 = vec_perm (edges, (vector unsigned char)v ## dest, store_mask); \
|
|
vec_st ((vector unsigned int) tmp3, 15, dest ); \
|
|
vec_st ((vector unsigned int) tmp1, 0, dest );
|
|
|
|
static FASTCALL void
|
|
vmxCombineMaskU (uint32_t *src, const uint32_t *msk, int width)
|
|
{
|
|
int i;
|
|
vector unsigned int vsrc, vmsk;
|
|
vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
|
|
src_mask, msk_mask, store_mask;
|
|
|
|
COMPUTE_SHIFT_MASKS(src, msk)
|
|
|
|
/* printf ("%s\n",__PRETTY_FUNCTION__); */
|
|
for (i = width/4; i > 0; i--) {
|
|
|
|
LOAD_VECTORS(src, msk)
|
|
|
|
vsrc = pix_multiply (vsrc, splat_alpha (vmsk));
|
|
|
|
STORE_VECTOR(src)
|
|
|
|
msk+=4;
|
|
src+=4;
|
|
}
|
|
|
|
for (i = width%4; --i >= 0;) {
|
|
uint32_t a = msk[i] >> 24;
|
|
uint32_t s = src[i];
|
|
FbByteMul (s, a);
|
|
src[i] = s;
|
|
}
|
|
}
|
|
|
|
static FASTCALL void
|
|
vmxCombineOverU (uint32_t *dest, const uint32_t *src, int width)
|
|
{
|
|
int i;
|
|
vector unsigned int vdest, vsrc;
|
|
vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
|
|
dest_mask, src_mask, store_mask;
|
|
|
|
COMPUTE_SHIFT_MASKS(dest, src)
|
|
|
|
/* printf ("%s\n",__PRETTY_FUNCTION__); */
|
|
for (i = width/4; i > 0; i--) {
|
|
|
|
LOAD_VECTORS(dest, src)
|
|
|
|
vdest = over (vsrc, splat_alpha (vsrc), vdest);
|
|
|
|
STORE_VECTOR(dest)
|
|
|
|
src+=4;
|
|
dest+=4;
|
|
}
|
|
|
|
for (i = width%4; --i >=0;) {
|
|
uint32_t s = src[i];
|
|
uint32_t d = dest[i];
|
|
uint32_t ia = Alpha (~s);
|
|
|
|
FbByteMulAdd (d, ia, s);
|
|
dest[i] = d;
|
|
}
|
|
}
|
|
|
|
|
|
static FASTCALL void
|
|
vmxCombineOverReverseU (uint32_t *dest, const uint32_t *src, int width)
|
|
{
|
|
int i;
|
|
vector unsigned int vdest, vsrc;
|
|
vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
|
|
dest_mask, src_mask, store_mask;
|
|
|
|
COMPUTE_SHIFT_MASKS(dest, src)
|
|
|
|
/* printf ("%s\n",__PRETTY_FUNCTION__); */
|
|
for (i = width/4; i > 0; i--) {
|
|
|
|
LOAD_VECTORS(dest, src)
|
|
|
|
vdest = over (vdest, splat_alpha (vdest) , vsrc);
|
|
|
|
STORE_VECTOR(dest)
|
|
|
|
src+=4;
|
|
dest+=4;
|
|
}
|
|
|
|
for (i = width%4; --i >=0;) {
|
|
uint32_t s = src[i];
|
|
uint32_t d = dest[i];
|
|
uint32_t ia = Alpha (~dest[i]);
|
|
|
|
FbByteMulAdd (s, ia, d);
|
|
dest[i] = s;
|
|
}
|
|
}
|
|
|
|
static FASTCALL void
|
|
vmxCombineInU (uint32_t *dest, const uint32_t *src, int width)
|
|
{
|
|
int i;
|
|
vector unsigned int vdest, vsrc;
|
|
vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
|
|
dest_mask, src_mask, store_mask;
|
|
|
|
COMPUTE_SHIFT_MASKS(dest, src)
|
|
|
|
/* printf ("%s\n",__PRETTY_FUNCTION__); */
|
|
for (i = width/4; i > 0; i--) {
|
|
|
|
LOAD_VECTORS(dest, src)
|
|
|
|
vdest = pix_multiply (vsrc, splat_alpha (vdest));
|
|
|
|
STORE_VECTOR(dest)
|
|
|
|
src+=4;
|
|
dest+=4;
|
|
}
|
|
|
|
for (i = width%4; --i >=0;) {
|
|
|
|
uint32_t s = src[i];
|
|
uint32_t a = Alpha (dest[i]);
|
|
FbByteMul (s, a);
|
|
dest[i] = s;
|
|
}
|
|
}
|
|
|
|
static FASTCALL void
|
|
vmxCombineInReverseU (uint32_t *dest, const uint32_t *src, int width)
|
|
{
|
|
int i;
|
|
vector unsigned int vdest, vsrc;
|
|
vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
|
|
dest_mask, src_mask, store_mask;
|
|
|
|
COMPUTE_SHIFT_MASKS(dest, src)
|
|
|
|
/* printf ("%s\n",__PRETTY_FUNCTION__); */
|
|
for (i = width/4; i > 0; i--) {
|
|
|
|
LOAD_VECTORS(dest, src)
|
|
|
|
vdest = pix_multiply (vdest, splat_alpha (vsrc));
|
|
|
|
STORE_VECTOR(dest)
|
|
|
|
src+=4;
|
|
dest+=4;
|
|
}
|
|
|
|
for (i = width%4; --i >=0;) {
|
|
uint32_t d = dest[i];
|
|
uint32_t a = Alpha (src[i]);
|
|
FbByteMul (d, a);
|
|
dest[i] = d;
|
|
}
|
|
}
|
|
|
|
static FASTCALL void
|
|
vmxCombineOutU (uint32_t *dest, const uint32_t *src, int width)
|
|
{
|
|
int i;
|
|
vector unsigned int vdest, vsrc;
|
|
vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
|
|
dest_mask, src_mask, store_mask;
|
|
|
|
COMPUTE_SHIFT_MASKS(dest, src)
|
|
|
|
/* printf ("%s\n",__PRETTY_FUNCTION__); */
|
|
for (i = width/4; i > 0; i--) {
|
|
|
|
LOAD_VECTORS(dest, src)
|
|
|
|
vdest = pix_multiply (vsrc, splat_alpha (negate (vdest)));
|
|
|
|
STORE_VECTOR(dest)
|
|
|
|
src+=4;
|
|
dest+=4;
|
|
}
|
|
|
|
for (i = width%4; --i >=0;) {
|
|
uint32_t s = src[i];
|
|
uint32_t a = Alpha (~dest[i]);
|
|
FbByteMul (s, a);
|
|
dest[i] = s;
|
|
}
|
|
}
|
|
|
|
static FASTCALL void
|
|
vmxCombineOutReverseU (uint32_t *dest, const uint32_t *src, int width)
|
|
{
|
|
int i;
|
|
vector unsigned int vdest, vsrc;
|
|
vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
|
|
dest_mask, src_mask, store_mask;
|
|
|
|
COMPUTE_SHIFT_MASKS(dest, src)
|
|
|
|
/* printf ("%s\n",__PRETTY_FUNCTION__); */
|
|
for (i = width/4; i > 0; i--) {
|
|
|
|
LOAD_VECTORS(dest, src)
|
|
|
|
vdest = pix_multiply (vdest, splat_alpha (negate (vsrc)));
|
|
|
|
STORE_VECTOR(dest)
|
|
|
|
src+=4;
|
|
dest+=4;
|
|
}
|
|
|
|
for (i = width%4; --i >=0;) {
|
|
uint32_t d = dest[i];
|
|
uint32_t a = Alpha (~src[i]);
|
|
FbByteMul (d, a);
|
|
dest[i] = d;
|
|
}
|
|
}
|
|
|
|
static FASTCALL void
|
|
vmxCombineAtopU (uint32_t *dest, const uint32_t *src, int width)
|
|
{
|
|
int i;
|
|
vector unsigned int vdest, vsrc;
|
|
vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
|
|
dest_mask, src_mask, store_mask;
|
|
|
|
COMPUTE_SHIFT_MASKS(dest, src)
|
|
|
|
/* printf ("%s\n",__PRETTY_FUNCTION__); */
|
|
for (i = width/4; i > 0; i--) {
|
|
|
|
LOAD_VECTORS(dest, src)
|
|
|
|
vdest = pix_add_mul (vsrc, splat_alpha (vdest),
|
|
vdest, splat_alpha (negate (vsrc)));
|
|
|
|
STORE_VECTOR(dest)
|
|
|
|
src+=4;
|
|
dest+=4;
|
|
}
|
|
|
|
for (i = width%4; --i >=0;) {
|
|
uint32_t s = src[i];
|
|
uint32_t d = dest[i];
|
|
uint32_t dest_a = Alpha (d);
|
|
uint32_t src_ia = Alpha (~s);
|
|
|
|
FbByteAddMul (s, dest_a, d, src_ia);
|
|
dest[i] = s;
|
|
}
|
|
}
|
|
|
|
static FASTCALL void
|
|
vmxCombineAtopReverseU (uint32_t *dest, const uint32_t *src, int width)
|
|
{
|
|
int i;
|
|
vector unsigned int vdest, vsrc;
|
|
vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
|
|
dest_mask, src_mask, store_mask;
|
|
|
|
COMPUTE_SHIFT_MASKS(dest, src)
|
|
|
|
/* printf ("%s\n",__PRETTY_FUNCTION__); */
|
|
for (i = width/4; i > 0; i--) {
|
|
|
|
LOAD_VECTORS(dest, src)
|
|
|
|
vdest = pix_add_mul (vdest, splat_alpha (vsrc),
|
|
vsrc, splat_alpha (negate (vdest)));
|
|
|
|
STORE_VECTOR(dest)
|
|
|
|
src+=4;
|
|
dest+=4;
|
|
}
|
|
|
|
for (i = width%4; --i >=0;) {
|
|
uint32_t s = src[i];
|
|
uint32_t d = dest[i];
|
|
uint32_t src_a = Alpha (s);
|
|
uint32_t dest_ia = Alpha (~d);
|
|
|
|
FbByteAddMul (s, dest_ia, d, src_a);
|
|
dest[i] = s;
|
|
}
|
|
}
|
|
|
|
static FASTCALL void
|
|
vmxCombineXorU (uint32_t *dest, const uint32_t *src, int width)
|
|
{
|
|
int i;
|
|
vector unsigned int vdest, vsrc;
|
|
vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
|
|
dest_mask, src_mask, store_mask;
|
|
|
|
COMPUTE_SHIFT_MASKS(dest, src)
|
|
|
|
/* printf ("%s\n",__PRETTY_FUNCTION__); */
|
|
for (i = width/4; i > 0; i--) {
|
|
|
|
LOAD_VECTORS (dest, src)
|
|
|
|
vdest = pix_add_mul (vsrc, splat_alpha (negate (vdest)),
|
|
vdest, splat_alpha (negate (vsrc)));
|
|
|
|
STORE_VECTOR(dest)
|
|
|
|
src+=4;
|
|
dest+=4;
|
|
}
|
|
|
|
for (i = width%4; --i >=0;) {
|
|
uint32_t s = src[i];
|
|
uint32_t d = dest[i];
|
|
uint32_t src_ia = Alpha (~s);
|
|
uint32_t dest_ia = Alpha (~d);
|
|
|
|
FbByteAddMul (s, dest_ia, d, src_ia);
|
|
dest[i] = s;
|
|
}
|
|
}
|
|
|
|
static FASTCALL void
|
|
vmxCombineAddU (uint32_t *dest, const uint32_t *src, int width)
|
|
{
|
|
int i;
|
|
vector unsigned int vdest, vsrc;
|
|
vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
|
|
dest_mask, src_mask, store_mask;
|
|
|
|
COMPUTE_SHIFT_MASKS(dest, src)
|
|
/* printf ("%s\n",__PRETTY_FUNCTION__); */
|
|
for (i = width/4; i > 0; i--) {
|
|
|
|
LOAD_VECTORS(dest, src)
|
|
|
|
vdest = pix_add (vsrc, vdest);
|
|
|
|
STORE_VECTOR(dest)
|
|
|
|
src+=4;
|
|
dest+=4;
|
|
}
|
|
|
|
for (i = width%4; --i >=0;) {
|
|
uint32_t s = src[i];
|
|
uint32_t d = dest[i];
|
|
FbByteAdd (d, s);
|
|
dest[i] = d;
|
|
}
|
|
}
|
|
|
|
static FASTCALL void
|
|
vmxCombineSrcC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
|
|
{
|
|
int i;
|
|
vector unsigned int vdest, vsrc, vmask;
|
|
vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
|
|
dest_mask, mask_mask, src_mask, store_mask;
|
|
|
|
COMPUTE_SHIFT_MASKC(dest, src, mask);
|
|
/* printf ("%s\n",__PRETTY_FUNCTION__); */
|
|
for (i = width/4; i > 0; i--) {
|
|
|
|
LOAD_VECTORSC(dest, src, mask)
|
|
|
|
vdest = pix_multiply (vsrc, vmask);
|
|
|
|
STORE_VECTOR(dest)
|
|
|
|
mask+=4;
|
|
src+=4;
|
|
dest+=4;
|
|
}
|
|
|
|
for (i = width%4; --i >=0;) {
|
|
uint32_t a = mask[i];
|
|
uint32_t s = src[i];
|
|
FbByteMulC (s, a);
|
|
dest[i] = s;
|
|
}
|
|
}
|
|
|
|
static FASTCALL void
|
|
vmxCombineOverC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
|
|
{
|
|
int i;
|
|
vector unsigned int vdest, vsrc, vmask;
|
|
vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
|
|
dest_mask, mask_mask, src_mask, store_mask;
|
|
|
|
COMPUTE_SHIFT_MASKC(dest, src, mask);
|
|
/* printf ("%s\n",__PRETTY_FUNCTION__); */
|
|
for (i = width/4; i > 0; i--) {
|
|
|
|
LOAD_VECTORSC(dest, src, mask)
|
|
|
|
vdest = in_over (vsrc, splat_alpha (vsrc), vmask, vdest);
|
|
|
|
STORE_VECTOR(dest)
|
|
|
|
mask+=4;
|
|
src+=4;
|
|
dest+=4;
|
|
}
|
|
|
|
for (i = width%4; --i >=0;) {
|
|
uint32_t a = mask[i];
|
|
uint32_t s = src[i];
|
|
uint32_t d = dest[i];
|
|
FbByteMulC (s, a);
|
|
FbByteMulAddC (d, ~a, s);
|
|
dest[i] = d;
|
|
}
|
|
}
|
|
|
|
static FASTCALL void
|
|
vmxCombineOverReverseC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
|
|
{
|
|
int i;
|
|
vector unsigned int vdest, vsrc, vmask;
|
|
vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
|
|
dest_mask, mask_mask, src_mask, store_mask;
|
|
|
|
COMPUTE_SHIFT_MASKC(dest, src, mask);
|
|
/* printf("%s\n",__PRETTY_FUNCTION__); */
|
|
for (i = width/4; i > 0; i--) {
|
|
|
|
LOAD_VECTORSC (dest, src, mask)
|
|
|
|
vdest = over (vdest, splat_alpha (vdest), pix_multiply (vsrc, vmask));
|
|
|
|
STORE_VECTOR(dest)
|
|
|
|
mask+=4;
|
|
src+=4;
|
|
dest+=4;
|
|
}
|
|
|
|
for (i = width%4; --i >=0;) {
|
|
uint32_t a = mask[i];
|
|
uint32_t s = src[i];
|
|
uint32_t d = dest[i];
|
|
uint32_t da = Alpha (d);
|
|
FbByteMulC (s, a);
|
|
FbByteMulAddC (s, ~da, d);
|
|
dest[i] = s;
|
|
}
|
|
}
|
|
|
|
static FASTCALL void
|
|
vmxCombineInC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
|
|
{
|
|
int i;
|
|
vector unsigned int vdest, vsrc, vmask;
|
|
vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
|
|
dest_mask, mask_mask, src_mask, store_mask;
|
|
|
|
COMPUTE_SHIFT_MASKC(dest, src, mask)
|
|
|
|
/* printf ("%s\n",__PRETTY_FUNCTION__); */
|
|
for (i = width/4; i > 0; i--) {
|
|
|
|
LOAD_VECTORSC(dest, src, mask)
|
|
|
|
vdest = pix_multiply (pix_multiply (vsrc, vmask), splat_alpha (vdest));
|
|
|
|
STORE_VECTOR(dest)
|
|
|
|
src+=4;
|
|
dest+=4;
|
|
mask+=4;
|
|
}
|
|
|
|
for (i = width%4; --i >=0;) {
|
|
uint32_t a = mask[i];
|
|
uint32_t s = src[i];
|
|
uint32_t da = Alpha (dest[i]);
|
|
FbByteMul (s, a);
|
|
FbByteMul (s, da);
|
|
dest[i] = s;
|
|
}
|
|
}
|
|
|
|
static FASTCALL void
|
|
vmxCombineInReverseC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
|
|
{
|
|
int i;
|
|
vector unsigned int vdest, vsrc, vmask;
|
|
vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
|
|
dest_mask, mask_mask, src_mask, store_mask;
|
|
|
|
COMPUTE_SHIFT_MASKC(dest, src, mask)
|
|
|
|
/* printf ("%s\n",__PRETTY_FUNCTION__); */
|
|
for (i = width/4; i > 0; i--) {
|
|
|
|
LOAD_VECTORSC(dest, src, mask)
|
|
|
|
vdest = pix_multiply (vdest, pix_multiply (vmask, splat_alpha (vsrc)));
|
|
|
|
STORE_VECTOR(dest)
|
|
|
|
src+=4;
|
|
dest+=4;
|
|
mask+=4;
|
|
}
|
|
|
|
for (i = width%4; --i >=0;) {
|
|
uint32_t a = mask[i];
|
|
uint32_t d = dest[i];
|
|
uint32_t sa = Alpha (src[i]);
|
|
FbByteMul (a, sa);
|
|
FbByteMulC (d, a);
|
|
dest[i] = d;
|
|
}
|
|
}
|
|
|
|
static FASTCALL void
|
|
vmxCombineOutC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
|
|
{
|
|
int i;
|
|
vector unsigned int vdest, vsrc, vmask;
|
|
vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
|
|
dest_mask, mask_mask, src_mask, store_mask;
|
|
|
|
COMPUTE_SHIFT_MASKC(dest, src, mask)
|
|
|
|
/* printf ("%s\n",__PRETTY_FUNCTION__); */
|
|
for (i = width/4; i > 0; i--) {
|
|
|
|
LOAD_VECTORSC(dest, src, mask)
|
|
|
|
vdest = pix_multiply (pix_multiply (vsrc, vmask), splat_alpha (vdest));
|
|
|
|
STORE_VECTOR(dest)
|
|
|
|
src+=4;
|
|
dest+=4;
|
|
mask+=4;
|
|
}
|
|
|
|
for (i = width%4; --i >=0;) {
|
|
uint32_t a = mask[i];
|
|
uint32_t s = src[i];
|
|
uint32_t d = dest[i];
|
|
uint32_t da = Alpha (~d);
|
|
FbByteMulC (s, a);
|
|
FbByteMulC (s, da);
|
|
dest[i] = s;
|
|
}
|
|
}
|
|
|
|
static FASTCALL void
|
|
vmxCombineOutReverseC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
|
|
{
|
|
int i;
|
|
vector unsigned int vdest, vsrc, vmask;
|
|
vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
|
|
dest_mask, mask_mask, src_mask, store_mask;
|
|
|
|
COMPUTE_SHIFT_MASKC(dest, src, mask)
|
|
|
|
/* printf ("%s\n",__PRETTY_FUNCTION__); */
|
|
for (i = width/4; i > 0; i--) {
|
|
|
|
LOAD_VECTORSC(dest, src, mask)
|
|
|
|
vdest = pix_multiply (vdest,
|
|
negate (pix_multiply (vmask, splat_alpha (vsrc))));
|
|
|
|
STORE_VECTOR(dest)
|
|
|
|
src+=4;
|
|
dest+=4;
|
|
mask+=4;
|
|
}
|
|
|
|
for (i = width%4; --i >=0;) {
|
|
uint32_t a = mask[i];
|
|
uint32_t s = src[i];
|
|
uint32_t d = dest[i];
|
|
uint32_t sa = Alpha (s);
|
|
FbByteMulC (a, sa);
|
|
FbByteMulC (d, ~a);
|
|
dest[i] = d;
|
|
}
|
|
}
|
|
|
|
static FASTCALL void
|
|
vmxCombineAtopC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
|
|
{
|
|
int i;
|
|
vector unsigned int vdest, vsrc, vmask;
|
|
vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
|
|
dest_mask, mask_mask, src_mask, store_mask;
|
|
|
|
COMPUTE_SHIFT_MASKC(dest, src, mask)
|
|
|
|
/* printf ("%s\n",__PRETTY_FUNCTION__); */
|
|
for (i = width/4; i > 0; i--) {
|
|
|
|
LOAD_VECTORSC(dest, src, mask)
|
|
|
|
vdest = pix_add_mul (pix_multiply (vsrc, vmask), splat_alpha (vdest),
|
|
vdest,
|
|
negate (pix_multiply (vmask,
|
|
splat_alpha (vmask))));
|
|
|
|
STORE_VECTOR(dest)
|
|
|
|
src+=4;
|
|
dest+=4;
|
|
mask+=4;
|
|
}
|
|
|
|
for (i = width%4; --i >=0;) {
|
|
uint32_t a = mask[i];
|
|
uint32_t s = src[i];
|
|
uint32_t d = dest[i];
|
|
uint32_t sa = Alpha (s);
|
|
uint32_t da = Alpha (d);
|
|
|
|
FbByteMulC (s, a);
|
|
FbByteMul (a, sa);
|
|
FbByteAddMulC (d, ~a, s, da);
|
|
dest[i] = d;
|
|
}
|
|
}
|
|
|
|
static FASTCALL void
|
|
vmxCombineAtopReverseC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
|
|
{
|
|
int i;
|
|
vector unsigned int vdest, vsrc, vmask;
|
|
vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
|
|
dest_mask, mask_mask, src_mask, store_mask;
|
|
|
|
COMPUTE_SHIFT_MASKC(dest, src, mask)
|
|
|
|
/* printf ("%s\n",__PRETTY_FUNCTION__); */
|
|
for (i = width/4; i > 0; i--) {
|
|
|
|
LOAD_VECTORSC(dest, src, mask)
|
|
|
|
vdest = pix_add_mul (vdest,
|
|
pix_multiply (vmask, splat_alpha (vsrc)),
|
|
pix_multiply (vsrc, vmask),
|
|
negate (splat_alpha (vdest)));
|
|
|
|
STORE_VECTOR(dest)
|
|
|
|
src+=4;
|
|
dest+=4;
|
|
mask+=4;
|
|
}
|
|
|
|
for (i = width%4; --i >=0;) {
|
|
uint32_t a = mask[i];
|
|
uint32_t s = src[i];
|
|
uint32_t d = dest[i];
|
|
uint32_t sa = Alpha (s);
|
|
uint32_t da = Alpha (d);
|
|
|
|
FbByteMulC (s, a);
|
|
FbByteMul (a, sa);
|
|
FbByteAddMulC (d, a, s, ~da);
|
|
dest[i] = d;
|
|
}
|
|
}
|
|
|
|
static FASTCALL void
|
|
vmxCombineXorC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
|
|
{
|
|
int i;
|
|
vector unsigned int vdest, vsrc, vmask;
|
|
vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
|
|
dest_mask, mask_mask, src_mask, store_mask;
|
|
|
|
COMPUTE_SHIFT_MASKC(dest, src, mask)
|
|
|
|
/* printf ("%s\n",__PRETTY_FUNCTION__); */
|
|
for (i = width/4; i > 0; i--) {
|
|
|
|
LOAD_VECTORSC(dest, src, mask)
|
|
|
|
vdest = pix_add_mul (vdest,
|
|
negate (pix_multiply (vmask, splat_alpha (vsrc))),
|
|
pix_multiply (vsrc, vmask),
|
|
negate (splat_alpha (vdest)));
|
|
|
|
STORE_VECTOR(dest)
|
|
|
|
src+=4;
|
|
dest+=4;
|
|
mask+=4;
|
|
}
|
|
|
|
for (i = width%4; --i >=0;) {
|
|
uint32_t a = mask[i];
|
|
uint32_t s = src[i];
|
|
uint32_t d = dest[i];
|
|
uint32_t sa = Alpha (s);
|
|
uint32_t da = Alpha (d);
|
|
|
|
FbByteMulC (s, a);
|
|
FbByteMul (a, sa);
|
|
FbByteAddMulC (d, ~a, s, ~da);
|
|
dest[i] = d;
|
|
}
|
|
}
|
|
|
|
static FASTCALL void
|
|
vmxCombineAddC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
|
|
{
|
|
int i;
|
|
vector unsigned int vdest, vsrc, vmask;
|
|
vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
|
|
dest_mask, mask_mask, src_mask, store_mask;
|
|
|
|
COMPUTE_SHIFT_MASKC(dest, src, mask)
|
|
|
|
/* printf ("%s\n",__PRETTY_FUNCTION__); */
|
|
for (i = width/4; i > 0; i--) {
|
|
|
|
LOAD_VECTORSC(dest, src, mask)
|
|
|
|
vdest = pix_add (pix_multiply (vsrc, vmask), vdest);
|
|
|
|
STORE_VECTOR(dest)
|
|
|
|
src+=4;
|
|
dest+=4;
|
|
mask+=4;
|
|
}
|
|
|
|
for (i = width%4; --i >=0;) {
|
|
uint32_t a = mask[i];
|
|
uint32_t s = src[i];
|
|
uint32_t d = dest[i];
|
|
|
|
FbByteMulC (s, a);
|
|
FbByteAdd (s, d);
|
|
dest[i] = s;
|
|
}
|
|
}
|
|
|
|
|
|
#if 0
|
|
void
|
|
fbCompositeSolid_nx8888vmx (pixman_operator_t op,
|
|
pixman_image_t * pSrc,
|
|
pixman_image_t * pMask,
|
|
pixman_image_t * pDst,
|
|
int16_t xSrc,
|
|
int16_t ySrc,
|
|
int16_t xMask,
|
|
int16_t yMask,
|
|
int16_t xDst,
|
|
int16_t yDst,
|
|
uint16_t width,
|
|
uint16_t height)
|
|
{
|
|
uint32_t src;
|
|
uint32_t *dstLine, *dst;
|
|
int dstStride;
|
|
|
|
fbComposeGetSolid (pSrc, pDst, src);
|
|
|
|
if (src >> 24 == 0)
|
|
return;
|
|
|
|
fbComposeGetStart (pDst, xDst, yDst, uint32_t, dstStride, dstLine, 1);
|
|
|
|
while (height--)
|
|
{
|
|
dst = dstLine;
|
|
dstLine += dstStride;
|
|
/* XXX vmxCombineOverU (dst, src, width); */
|
|
}
|
|
}
|
|
|
|
void
|
|
fbCompositeSolid_nx0565vmx (pixman_operator_t op,
|
|
pixman_image_t * pSrc,
|
|
pixman_image_t * pMask,
|
|
pixman_image_t * pDst,
|
|
int16_t xSrc,
|
|
int16_t ySrc,
|
|
int16_t xMask,
|
|
int16_t yMask,
|
|
int16_t xDst,
|
|
int16_t yDst,
|
|
uint16_t width,
|
|
uint16_t height)
|
|
{
|
|
uint32_t src;
|
|
uint16_t *dstLine, *dst;
|
|
uint16_t w;
|
|
int dstStride;
|
|
|
|
fbComposeGetSolid (pSrc, pDst, src);
|
|
|
|
if (src >> 24 == 0)
|
|
return;
|
|
|
|
fbComposeGetStart (pDst, xDst, yDst, uint16_t, dstStride, dstLine, 1);
|
|
|
|
while (height--)
|
|
{
|
|
dst = dstLine;
|
|
dstLine += dstStride;
|
|
vmxCombineOverU565(dst, src, width);
|
|
}
|
|
}
|
|
|
|
#endif
|
|
|
|
void fbComposeSetupVMX (void)
|
|
{
|
|
/* check if we have VMX support and initialize accordingly */
|
|
if (pixman_have_vmx ()) {
|
|
pixman_composeFunctions.combineU[PIXMAN_OP_OVER] = vmxCombineOverU;
|
|
pixman_composeFunctions.combineU[PIXMAN_OP_OVER_REVERSE] = vmxCombineOverReverseU;
|
|
pixman_composeFunctions.combineU[PIXMAN_OP_IN] = vmxCombineInU;
|
|
pixman_composeFunctions.combineU[PIXMAN_OP_IN_REVERSE] = vmxCombineInReverseU;
|
|
pixman_composeFunctions.combineU[PIXMAN_OP_OUT] = vmxCombineOutU;
|
|
pixman_composeFunctions.combineU[PIXMAN_OP_OUT_REVERSE] = vmxCombineOutReverseU;
|
|
pixman_composeFunctions.combineU[PIXMAN_OP_ATOP] = vmxCombineAtopU;
|
|
pixman_composeFunctions.combineU[PIXMAN_OP_ATOP_REVERSE] = vmxCombineAtopReverseU;
|
|
pixman_composeFunctions.combineU[PIXMAN_OP_XOR] = vmxCombineXorU;
|
|
pixman_composeFunctions.combineU[PIXMAN_OP_ADD] = vmxCombineAddU;
|
|
|
|
pixman_composeFunctions.combineC[PIXMAN_OP_SRC] = vmxCombineSrcC;
|
|
pixman_composeFunctions.combineC[PIXMAN_OP_OVER] = vmxCombineOverC;
|
|
pixman_composeFunctions.combineC[PIXMAN_OP_OVER_REVERSE] = vmxCombineOverReverseC;
|
|
pixman_composeFunctions.combineC[PIXMAN_OP_IN] = vmxCombineInC;
|
|
pixman_composeFunctions.combineC[PIXMAN_OP_IN_REVERSE] = vmxCombineInReverseC;
|
|
pixman_composeFunctions.combineC[PIXMAN_OP_OUT] = vmxCombineOutC;
|
|
pixman_composeFunctions.combineC[PIXMAN_OP_OUT_REVERSE] = vmxCombineOutReverseC;
|
|
pixman_composeFunctions.combineC[PIXMAN_OP_ATOP] = vmxCombineAtopC;
|
|
pixman_composeFunctions.combineC[PIXMAN_OP_ATOP_REVERSE] = vmxCombineAtopReverseC;
|
|
pixman_composeFunctions.combineC[PIXMAN_OP_XOR] = vmxCombineXorC;
|
|
pixman_composeFunctions.combineC[PIXMAN_OP_ADD] = vmxCombineAddC;
|
|
|
|
pixman_composeFunctions.combineMaskU = vmxCombineMaskU;
|
|
}
|
|
}
|