eGospodarka.pl
eGospodarka.pl poleca

eGospodarka.plGrupypl.comp.programmingcpu shading by sse intrinsics › Re: cpu shading by sse intrinsics
  • Data: 2014-06-29 12:46:22
    Temat: Re: cpu shading by sse intrinsics
    Od: firr <p...@g...com> szukaj wiadomości tego autora
    [ pokaż wszystkie nagłówki ]

    uzyskalem przez zapytanie na innym forum taki kod


    #include <pmmintrin.h>

    struct FourVec3s { __m128 x; __m128 y; __m128 z; };
    struct FourTris { FourVec3s a; FourVec3s b; FourVec3s c; __m128i colors; };

    // transposed
    static FourVec3s lightDirs = {{0.2, 0.5, -0.5, -0.5},
    {-1.6,-0.7,-0.3, 1.3},
    {-1.7, 20.3,-0.6, 0.6}};

    // transposed
    static FourVec3s lightColors = {{.4, .4145, .584, .41 },
    {.414, .451, .51414,.44},
    {.515, .543, .43, .3414}};

    static __m128 modelRight = {1.0, 0.0, 0.0, 0.0};
    static __m128 modelUp = {0.0, 1.0, 0.0, 0.0};
    static __m128 modelDir = {0.0, 0.0, 1.0, 0.0};
    static __m128 modelPos = {0.0, 0.0, 0.0, 1.0};


    static inline __m128 splatX(__m128 v) { return
    _mm_shuffle_ps(v,v,_MM_SHUFFLE(0,0,0,0)); }
    static inline __m128 splatY(__m128 v) { return
    _mm_shuffle_ps(v,v,_MM_SHUFFLE(1,1,1,1)); }
    static inline __m128 splatZ(__m128 v) { return
    _mm_shuffle_ps(v,v,_MM_SHUFFLE(2,2,2,2)); }
    static inline __m128 add(__m128 l, __m128 r) { return _mm_add_ps(l, r); }
    static inline __m128 sub(__m128 l, __m128 r) { return _mm_sub_ps(l, r); }
    static inline __m128 mul(__m128 l, __m128 r) { return _mm_mul_ps(l, r); }
    static inline __m128 and(__m128 l, __m128 r) { return _mm_and_ps(l, r); }
    static inline __m128 less(__m128 l, __m128 r) { return _mm_cmplt_ps(l, r); }
    static inline __m128 dot(const FourVec3s &l, const FourVec3s &r) { return
    add(add(mul(l.x,r.x), mul(l.y,r.y)), mul(l.z,r.z)); }

    // unpack 8 bit RgbaRgbaRgbaRgba into 32-bit RRRR gggg or bbbb
    static inline __m128i unpackR(__m128i iv) { return
    _mm_unpacklo_epi16(_mm_unpacklo_epi8(iv,_mm_setzero_
    si128()),_mm_setzero_si128()); }
    static inline __m128i unpackG(__m128i iv) { return
    _mm_unpackhi_epi16(_mm_unpacklo_epi8(iv,_mm_setzero_
    si128()),_mm_setzero_si128()); }
    static inline __m128i unpackB(__m128i iv) { return
    _mm_unpacklo_epi16(_mm_unpackhi_epi8(iv,_mm_setzero_
    si128()),_mm_setzero_si128()); }
    static inline __m128 intsToFloats(__m128i iv) { return _mm_cvtepi32_ps(iv); }
    static inline __m128i floatToInts(__m128 fv) { return _mm_cvttps_epi32(fv); }
    static inline __m128i packAndSaturate32To8(__m128i r ,__m128i g, __m128i b, __m128i
    a) { return _mm_packs_epi16(_mm_packs_epi32(r,g),_mm_packs_epi32
    (b,a)); }


    static inline FourVec3s normalizeFourVec3s(const FourVec3s &v) {
    __m128 length = _mm_sqrt_ps(add(add( mul(v.x,v.x), mul(v.y,v.y)), mul(v.z,v.z)
    ));
    FourVec3s result = { _mm_div_ps(v.x,length), _mm_div_ps(v.y,length),
    _mm_div_ps(v.z,length) };
    return result;
    }

    __m128i Shade4Triangles(const FourTris &tris) {
    __m128 x1 = add(add(add( mul(sub(tris.a.x, splatX(modelPos)),
    splatX(modelRight)), // (*triangle).a.x - modelPos.x)*modelRight.x +
    mul(sub(tris.a.y, splatY(modelPos)),
    splatY(modelRight))), // ((*triangle).a.y - modelPos.y)*modelRight.y +
    mul(sub(tris.a.z, splatZ(modelPos)),
    splatZ(modelRight))), // ((*triangle).a.z - modelPos.z)*modelRight.z) +
    splatX(modelPos));
    // modelPos.x
    __m128 y1 = add(add(add( mul(sub(tris.a.x, splatX(modelPos)), splatX(modelUp)),
    mul(sub(tris.a.y, splatY(modelPos)), splatY(modelUp))),
    mul(sub(tris.a.z, splatZ(modelPos)), splatZ(modelUp))),
    splatY(modelPos));
    __m128 z1 = add(add(add( mul(sub(tris.a.x, splatX(modelPos)), splatX(modelDir)),
    mul(sub(tris.a.y, splatY(modelPos)), splatY(modelDir))),
    mul(sub(tris.a.z, splatZ(modelPos)), splatZ(modelDir))),
    splatZ(modelPos));
    __m128 x2 = add(add(add( mul(sub(tris.b.x, splatX(modelPos)),
    splatX(modelRight)),
    mul(sub(tris.b.y, splatY(modelPos)),
    splatY(modelRight))),
    mul(sub(tris.b.z, splatZ(modelPos)),
    splatZ(modelRight))),
    splatX(modelPos));
    __m128 y2 = add(add(add( mul(sub(tris.b.x, splatX(modelPos)), splatX(modelUp)),
    mul(sub(tris.b.y, splatY(modelPos)), splatY(modelUp))),
    mul(sub(tris.b.z, splatZ(modelPos)), splatZ(modelUp))),
    splatY(modelPos));
    __m128 z2 = add(add(add( mul(sub(tris.b.x, splatX(modelPos)), splatX(modelDir)),
    mul(sub(tris.b.y, splatY(modelPos)), splatY(modelDir))),
    mul(sub(tris.b.z, splatZ(modelPos)), splatZ(modelDir))),
    splatZ(modelPos));
    __m128 x3 = add(add(add( mul(sub(tris.c.x, splatX(modelPos)),
    splatX(modelRight)),
    mul(sub(tris.c.y, splatY(modelPos)),
    splatY(modelRight))),
    mul(sub(tris.c.z, splatZ(modelPos)),
    splatZ(modelRight))),
    splatX(modelPos));
    __m128 y3 = add(add(add( mul(sub(tris.c.x, splatX(modelPos)), splatX(modelUp)),
    mul(sub(tris.c.y, splatY(modelPos)), splatY(modelUp))),
    mul(sub(tris.c.z, splatZ(modelPos)), splatZ(modelUp))),
    splatY(modelPos));
    __m128 z3 = add(add(add( mul(sub(tris.c.x, splatX(modelPos)), splatX(modelDir)),
    mul(sub(tris.c.y, splatY(modelPos)), splatY(modelDir))),
    mul(sub(tris.c.z, splatZ(modelPos)), splatZ(modelDir))),
    splatZ(modelPos));

    FourVec3s normal;
    normal.x = sub( mul(sub(y1,y1),sub(z3,z2)), mul(sub(z2,z1),sub(y3,y2)) );
    normal.y = sub( mul(sub(z2,z1),sub(x3,x2)), mul(sub(x2,x1),sub(z3,z2)) );
    normal.z = sub( mul(sub(x2,x1),sub(y3,y2)), mul(sub(y2,y1),sub(x3,x2)) );
    normal = normalizeFourVec3s(normal);

    __m128 s1234 = dot(normal, lightDirs);
    s1234 = and(s1234, less(_mm_setzero_ps(), s1234));

    __m128 l = add(_mm_set_ps1(0.1f), add(add( mul(s1234,lightColors.x),
    mul(s1234,lightColors.y)), mul(s1234,lightColors.z)));

    __m128i r = floatToInts(mul(l,intsToFloats(unpackR(tris.colors))
    ));
    __m128i g = floatToInts(mul(l,intsToFloats(unpackG(tris.colors))
    ));
    __m128i b = floatToInts(mul(l,intsToFloats(unpackB(tris.colors))
    ));

    return packAndSaturate32To8(r,g,b,_mm_setzero_si128());
    }

    aczkolwiek czeka mnie z tym mała 'rozkminka' bo nie wszystko jest jasne

Podziel się

Poleć ten post znajomemu poleć

Wydrukuj ten post drukuj


Następne wpisy z tego wątku

Najnowsze wątki z tej grupy


Najnowsze wątki

Szukaj w grupach

Eksperci egospodarka.pl

1 1 1

Wpisz nazwę miasta, dla którego chcesz znaleźć jednostkę ZUS.

Wzory dokumentów

Bezpłatne wzory dokumentów i formularzy.
Wyszukaj i pobierz za darmo: