eGospodarka.pl
eGospodarka.pl poleca

eGospodarka.plGrupypl.comp.programmingcpu shading by sse intrinsicsRe: cpu shading by sse intrinsics
  • X-Received: by 10.140.36.232 with SMTP id p95mr16870qgp.6.1404038782537; Sun, 29 Jun
    2014 03:46:22 -0700 (PDT)
    X-Received: by 10.140.36.232 with SMTP id p95mr16870qgp.6.1404038782537; Sun, 29 Jun
    2014 03:46:22 -0700 (PDT)
    Path: news-archive.icm.edu.pl!agh.edu.pl!news.agh.edu.pl!newsfeed2.atman.pl!newsfeed.
    atman.pl!news.nask.pl!news.nask.org.pl!newsfeed.pionier.net.pl!news.glorb.com!h
    n18no18364207igb.0!news-out.google.com!a8ni0qaq.1!nntp.google.com!i13no3028965q
    ae.1!postnews.google.com!glegroupsg2000goo.googlegroups.com!not-for-mail
    Newsgroups: pl.comp.programming
    Date: Sun, 29 Jun 2014 03:46:22 -0700 (PDT)
    In-Reply-To: <4...@g...com>
    Complaints-To: g...@g...com
    Injection-Info: glegroupsg2000goo.googlegroups.com; posting-host=78.30.123.209;
    posting-account=Sb6m8goAAABbWsBL7gouk3bfLsuxwMgN
    NNTP-Posting-Host: 78.30.123.209
    References: <4...@g...com>
    User-Agent: G2/1.0
    MIME-Version: 1.0
    Message-ID: <3...@g...com>
    Subject: Re: cpu shading by sse intrinsics
    From: firr <p...@g...com>
    Injection-Date: Sun, 29 Jun 2014 10:46:22 +0000
    Content-Type: text/plain; charset=UTF-8
    Content-Transfer-Encoding: quoted-printable
    Xref: news-archive.icm.edu.pl pl.comp.programming:206079
    [ ukryj nagłówki ]

    uzyskalem przez zapytanie na innym forum taki kod


    #include <pmmintrin.h>

    struct FourVec3s { __m128 x; __m128 y; __m128 z; };
    struct FourTris { FourVec3s a; FourVec3s b; FourVec3s c; __m128i colors; };

    // transposed
    static FourVec3s lightDirs = {{0.2, 0.5, -0.5, -0.5},
    {-1.6,-0.7,-0.3, 1.3},
    {-1.7, 20.3,-0.6, 0.6}};

    // transposed
    static FourVec3s lightColors = {{.4, .4145, .584, .41 },
    {.414, .451, .51414,.44},
    {.515, .543, .43, .3414}};

    static __m128 modelRight = {1.0, 0.0, 0.0, 0.0};
    static __m128 modelUp = {0.0, 1.0, 0.0, 0.0};
    static __m128 modelDir = {0.0, 0.0, 1.0, 0.0};
    static __m128 modelPos = {0.0, 0.0, 0.0, 1.0};


    static inline __m128 splatX(__m128 v) { return
    _mm_shuffle_ps(v,v,_MM_SHUFFLE(0,0,0,0)); }
    static inline __m128 splatY(__m128 v) { return
    _mm_shuffle_ps(v,v,_MM_SHUFFLE(1,1,1,1)); }
    static inline __m128 splatZ(__m128 v) { return
    _mm_shuffle_ps(v,v,_MM_SHUFFLE(2,2,2,2)); }
    static inline __m128 add(__m128 l, __m128 r) { return _mm_add_ps(l, r); }
    static inline __m128 sub(__m128 l, __m128 r) { return _mm_sub_ps(l, r); }
    static inline __m128 mul(__m128 l, __m128 r) { return _mm_mul_ps(l, r); }
    static inline __m128 and(__m128 l, __m128 r) { return _mm_and_ps(l, r); }
    static inline __m128 less(__m128 l, __m128 r) { return _mm_cmplt_ps(l, r); }
    static inline __m128 dot(const FourVec3s &l, const FourVec3s &r) { return
    add(add(mul(l.x,r.x), mul(l.y,r.y)), mul(l.z,r.z)); }

    // unpack 8 bit RgbaRgbaRgbaRgba into 32-bit RRRR gggg or bbbb
    static inline __m128i unpackR(__m128i iv) { return
    _mm_unpacklo_epi16(_mm_unpacklo_epi8(iv,_mm_setzero_
    si128()),_mm_setzero_si128()); }
    static inline __m128i unpackG(__m128i iv) { return
    _mm_unpackhi_epi16(_mm_unpacklo_epi8(iv,_mm_setzero_
    si128()),_mm_setzero_si128()); }
    static inline __m128i unpackB(__m128i iv) { return
    _mm_unpacklo_epi16(_mm_unpackhi_epi8(iv,_mm_setzero_
    si128()),_mm_setzero_si128()); }
    static inline __m128 intsToFloats(__m128i iv) { return _mm_cvtepi32_ps(iv); }
    static inline __m128i floatToInts(__m128 fv) { return _mm_cvttps_epi32(fv); }
    static inline __m128i packAndSaturate32To8(__m128i r ,__m128i g, __m128i b, __m128i
    a) { return _mm_packs_epi16(_mm_packs_epi32(r,g),_mm_packs_epi32
    (b,a)); }


    static inline FourVec3s normalizeFourVec3s(const FourVec3s &v) {
    __m128 length = _mm_sqrt_ps(add(add( mul(v.x,v.x), mul(v.y,v.y)), mul(v.z,v.z)
    ));
    FourVec3s result = { _mm_div_ps(v.x,length), _mm_div_ps(v.y,length),
    _mm_div_ps(v.z,length) };
    return result;
    }

    __m128i Shade4Triangles(const FourTris &tris) {
    __m128 x1 = add(add(add( mul(sub(tris.a.x, splatX(modelPos)),
    splatX(modelRight)), // (*triangle).a.x - modelPos.x)*modelRight.x +
    mul(sub(tris.a.y, splatY(modelPos)),
    splatY(modelRight))), // ((*triangle).a.y - modelPos.y)*modelRight.y +
    mul(sub(tris.a.z, splatZ(modelPos)),
    splatZ(modelRight))), // ((*triangle).a.z - modelPos.z)*modelRight.z) +
    splatX(modelPos));
    // modelPos.x
    __m128 y1 = add(add(add( mul(sub(tris.a.x, splatX(modelPos)), splatX(modelUp)),
    mul(sub(tris.a.y, splatY(modelPos)), splatY(modelUp))),
    mul(sub(tris.a.z, splatZ(modelPos)), splatZ(modelUp))),
    splatY(modelPos));
    __m128 z1 = add(add(add( mul(sub(tris.a.x, splatX(modelPos)), splatX(modelDir)),
    mul(sub(tris.a.y, splatY(modelPos)), splatY(modelDir))),
    mul(sub(tris.a.z, splatZ(modelPos)), splatZ(modelDir))),
    splatZ(modelPos));
    __m128 x2 = add(add(add( mul(sub(tris.b.x, splatX(modelPos)),
    splatX(modelRight)),
    mul(sub(tris.b.y, splatY(modelPos)),
    splatY(modelRight))),
    mul(sub(tris.b.z, splatZ(modelPos)),
    splatZ(modelRight))),
    splatX(modelPos));
    __m128 y2 = add(add(add( mul(sub(tris.b.x, splatX(modelPos)), splatX(modelUp)),
    mul(sub(tris.b.y, splatY(modelPos)), splatY(modelUp))),
    mul(sub(tris.b.z, splatZ(modelPos)), splatZ(modelUp))),
    splatY(modelPos));
    __m128 z2 = add(add(add( mul(sub(tris.b.x, splatX(modelPos)), splatX(modelDir)),
    mul(sub(tris.b.y, splatY(modelPos)), splatY(modelDir))),
    mul(sub(tris.b.z, splatZ(modelPos)), splatZ(modelDir))),
    splatZ(modelPos));
    __m128 x3 = add(add(add( mul(sub(tris.c.x, splatX(modelPos)),
    splatX(modelRight)),
    mul(sub(tris.c.y, splatY(modelPos)),
    splatY(modelRight))),
    mul(sub(tris.c.z, splatZ(modelPos)),
    splatZ(modelRight))),
    splatX(modelPos));
    __m128 y3 = add(add(add( mul(sub(tris.c.x, splatX(modelPos)), splatX(modelUp)),
    mul(sub(tris.c.y, splatY(modelPos)), splatY(modelUp))),
    mul(sub(tris.c.z, splatZ(modelPos)), splatZ(modelUp))),
    splatY(modelPos));
    __m128 z3 = add(add(add( mul(sub(tris.c.x, splatX(modelPos)), splatX(modelDir)),
    mul(sub(tris.c.y, splatY(modelPos)), splatY(modelDir))),
    mul(sub(tris.c.z, splatZ(modelPos)), splatZ(modelDir))),
    splatZ(modelPos));

    FourVec3s normal;
    normal.x = sub( mul(sub(y1,y1),sub(z3,z2)), mul(sub(z2,z1),sub(y3,y2)) );
    normal.y = sub( mul(sub(z2,z1),sub(x3,x2)), mul(sub(x2,x1),sub(z3,z2)) );
    normal.z = sub( mul(sub(x2,x1),sub(y3,y2)), mul(sub(y2,y1),sub(x3,x2)) );
    normal = normalizeFourVec3s(normal);

    __m128 s1234 = dot(normal, lightDirs);
    s1234 = and(s1234, less(_mm_setzero_ps(), s1234));

    __m128 l = add(_mm_set_ps1(0.1f), add(add( mul(s1234,lightColors.x),
    mul(s1234,lightColors.y)), mul(s1234,lightColors.z)));

    __m128i r = floatToInts(mul(l,intsToFloats(unpackR(tris.colors))
    ));
    __m128i g = floatToInts(mul(l,intsToFloats(unpackG(tris.colors))
    ));
    __m128i b = floatToInts(mul(l,intsToFloats(unpackB(tris.colors))
    ));

    return packAndSaturate32To8(r,g,b,_mm_setzero_si128());
    }

    aczkolwiek czeka mnie z tym mała 'rozkminka' bo nie wszystko jest jasne

Podziel się

Poleć ten post znajomemu poleć

Wydrukuj ten post drukuj


Następne wpisy z tego wątku

Najnowsze wątki z tej grupy


Najnowsze wątki

Szukaj w grupach

Eksperci egospodarka.pl

1 1 1

Wpisz nazwę miasta, dla którego chcesz znaleźć jednostkę ZUS.

Wzory dokumentów

Bezpłatne wzory dokumentów i formularzy.
Wyszukaj i pobierz za darmo: