-
X-Received: by 10.140.36.232 with SMTP id p95mr16870qgp.6.1404038782537; Sun, 29 Jun
2014 03:46:22 -0700 (PDT)
X-Received: by 10.140.36.232 with SMTP id p95mr16870qgp.6.1404038782537; Sun, 29 Jun
2014 03:46:22 -0700 (PDT)
Path: news-archive.icm.edu.pl!agh.edu.pl!news.agh.edu.pl!newsfeed2.atman.pl!newsfeed.
atman.pl!news.nask.pl!news.nask.org.pl!newsfeed.pionier.net.pl!news.glorb.com!h
n18no18364207igb.0!news-out.google.com!a8ni0qaq.1!nntp.google.com!i13no3028965q
ae.1!postnews.google.com!glegroupsg2000goo.googlegroups.com!not-for-mail
Newsgroups: pl.comp.programming
Date: Sun, 29 Jun 2014 03:46:22 -0700 (PDT)
In-Reply-To: <4...@g...com>
Complaints-To: g...@g...com
Injection-Info: glegroupsg2000goo.googlegroups.com; posting-host=78.30.123.209;
posting-account=Sb6m8goAAABbWsBL7gouk3bfLsuxwMgN
NNTP-Posting-Host: 78.30.123.209
References: <4...@g...com>
User-Agent: G2/1.0
MIME-Version: 1.0
Message-ID: <3...@g...com>
Subject: Re: cpu shading by sse intrinsics
From: firr <p...@g...com>
Injection-Date: Sun, 29 Jun 2014 10:46:22 +0000
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: quoted-printable
Xref: news-archive.icm.edu.pl pl.comp.programming:206079
[ ukryj nagłówki ]uzyskalem przez zapytanie na innym forum taki kod
#include <pmmintrin.h>
struct FourVec3s { __m128 x; __m128 y; __m128 z; };
struct FourTris { FourVec3s a; FourVec3s b; FourVec3s c; __m128i colors; };
// transposed
static FourVec3s lightDirs = {{0.2, 0.5, -0.5, -0.5},
{-1.6,-0.7,-0.3, 1.3},
{-1.7, 20.3,-0.6, 0.6}};
// transposed
static FourVec3s lightColors = {{.4, .4145, .584, .41 },
{.414, .451, .51414,.44},
{.515, .543, .43, .3414}};
static __m128 modelRight = {1.0, 0.0, 0.0, 0.0};
static __m128 modelUp = {0.0, 1.0, 0.0, 0.0};
static __m128 modelDir = {0.0, 0.0, 1.0, 0.0};
static __m128 modelPos = {0.0, 0.0, 0.0, 1.0};
static inline __m128 splatX(__m128 v) { return
_mm_shuffle_ps(v,v,_MM_SHUFFLE(0,0,0,0)); }
static inline __m128 splatY(__m128 v) { return
_mm_shuffle_ps(v,v,_MM_SHUFFLE(1,1,1,1)); }
static inline __m128 splatZ(__m128 v) { return
_mm_shuffle_ps(v,v,_MM_SHUFFLE(2,2,2,2)); }
static inline __m128 add(__m128 l, __m128 r) { return _mm_add_ps(l, r); }
static inline __m128 sub(__m128 l, __m128 r) { return _mm_sub_ps(l, r); }
static inline __m128 mul(__m128 l, __m128 r) { return _mm_mul_ps(l, r); }
static inline __m128 and(__m128 l, __m128 r) { return _mm_and_ps(l, r); }
static inline __m128 less(__m128 l, __m128 r) { return _mm_cmplt_ps(l, r); }
static inline __m128 dot(const FourVec3s &l, const FourVec3s &r) { return
add(add(mul(l.x,r.x), mul(l.y,r.y)), mul(l.z,r.z)); }
// unpack 8 bit RgbaRgbaRgbaRgba into 32-bit RRRR gggg or bbbb
static inline __m128i unpackR(__m128i iv) { return
_mm_unpacklo_epi16(_mm_unpacklo_epi8(iv,_mm_setzero_
si128()),_mm_setzero_si128()); }
static inline __m128i unpackG(__m128i iv) { return
_mm_unpackhi_epi16(_mm_unpacklo_epi8(iv,_mm_setzero_
si128()),_mm_setzero_si128()); }
static inline __m128i unpackB(__m128i iv) { return
_mm_unpacklo_epi16(_mm_unpackhi_epi8(iv,_mm_setzero_
si128()),_mm_setzero_si128()); }
static inline __m128 intsToFloats(__m128i iv) { return _mm_cvtepi32_ps(iv); }
static inline __m128i floatToInts(__m128 fv) { return _mm_cvttps_epi32(fv); }
static inline __m128i packAndSaturate32To8(__m128i r ,__m128i g, __m128i b, __m128i
a) { return _mm_packs_epi16(_mm_packs_epi32(r,g),_mm_packs_epi32
(b,a)); }
static inline FourVec3s normalizeFourVec3s(const FourVec3s &v) {
__m128 length = _mm_sqrt_ps(add(add( mul(v.x,v.x), mul(v.y,v.y)), mul(v.z,v.z)
));
FourVec3s result = { _mm_div_ps(v.x,length), _mm_div_ps(v.y,length),
_mm_div_ps(v.z,length) };
return result;
}
__m128i Shade4Triangles(const FourTris &tris) {
__m128 x1 = add(add(add( mul(sub(tris.a.x, splatX(modelPos)),
splatX(modelRight)), // (*triangle).a.x - modelPos.x)*modelRight.x +
mul(sub(tris.a.y, splatY(modelPos)),
splatY(modelRight))), // ((*triangle).a.y - modelPos.y)*modelRight.y +
mul(sub(tris.a.z, splatZ(modelPos)),
splatZ(modelRight))), // ((*triangle).a.z - modelPos.z)*modelRight.z) +
splatX(modelPos));
// modelPos.x
__m128 y1 = add(add(add( mul(sub(tris.a.x, splatX(modelPos)), splatX(modelUp)),
mul(sub(tris.a.y, splatY(modelPos)), splatY(modelUp))),
mul(sub(tris.a.z, splatZ(modelPos)), splatZ(modelUp))),
splatY(modelPos));
__m128 z1 = add(add(add( mul(sub(tris.a.x, splatX(modelPos)), splatX(modelDir)),
mul(sub(tris.a.y, splatY(modelPos)), splatY(modelDir))),
mul(sub(tris.a.z, splatZ(modelPos)), splatZ(modelDir))),
splatZ(modelPos));
__m128 x2 = add(add(add( mul(sub(tris.b.x, splatX(modelPos)),
splatX(modelRight)),
mul(sub(tris.b.y, splatY(modelPos)),
splatY(modelRight))),
mul(sub(tris.b.z, splatZ(modelPos)),
splatZ(modelRight))),
splatX(modelPos));
__m128 y2 = add(add(add( mul(sub(tris.b.x, splatX(modelPos)), splatX(modelUp)),
mul(sub(tris.b.y, splatY(modelPos)), splatY(modelUp))),
mul(sub(tris.b.z, splatZ(modelPos)), splatZ(modelUp))),
splatY(modelPos));
__m128 z2 = add(add(add( mul(sub(tris.b.x, splatX(modelPos)), splatX(modelDir)),
mul(sub(tris.b.y, splatY(modelPos)), splatY(modelDir))),
mul(sub(tris.b.z, splatZ(modelPos)), splatZ(modelDir))),
splatZ(modelPos));
__m128 x3 = add(add(add( mul(sub(tris.c.x, splatX(modelPos)),
splatX(modelRight)),
mul(sub(tris.c.y, splatY(modelPos)),
splatY(modelRight))),
mul(sub(tris.c.z, splatZ(modelPos)),
splatZ(modelRight))),
splatX(modelPos));
__m128 y3 = add(add(add( mul(sub(tris.c.x, splatX(modelPos)), splatX(modelUp)),
mul(sub(tris.c.y, splatY(modelPos)), splatY(modelUp))),
mul(sub(tris.c.z, splatZ(modelPos)), splatZ(modelUp))),
splatY(modelPos));
__m128 z3 = add(add(add( mul(sub(tris.c.x, splatX(modelPos)), splatX(modelDir)),
mul(sub(tris.c.y, splatY(modelPos)), splatY(modelDir))),
mul(sub(tris.c.z, splatZ(modelPos)), splatZ(modelDir))),
splatZ(modelPos));
FourVec3s normal;
normal.x = sub( mul(sub(y1,y1),sub(z3,z2)), mul(sub(z2,z1),sub(y3,y2)) );
normal.y = sub( mul(sub(z2,z1),sub(x3,x2)), mul(sub(x2,x1),sub(z3,z2)) );
normal.z = sub( mul(sub(x2,x1),sub(y3,y2)), mul(sub(y2,y1),sub(x3,x2)) );
normal = normalizeFourVec3s(normal);
__m128 s1234 = dot(normal, lightDirs);
s1234 = and(s1234, less(_mm_setzero_ps(), s1234));
__m128 l = add(_mm_set_ps1(0.1f), add(add( mul(s1234,lightColors.x),
mul(s1234,lightColors.y)), mul(s1234,lightColors.z)));
__m128i r = floatToInts(mul(l,intsToFloats(unpackR(tris.colors))
));
__m128i g = floatToInts(mul(l,intsToFloats(unpackG(tris.colors))
));
__m128i b = floatToInts(mul(l,intsToFloats(unpackB(tris.colors))
));
return packAndSaturate32To8(r,g,b,_mm_setzero_si128());
}
aczkolwiek czeka mnie z tym mała 'rozkminka' bo nie wszystko jest jasne
Następne wpisy z tego wątku
- 30.06.14 10:42 Wojciech Muła
- 30.06.14 12:16 firr
- 30.06.14 12:27 firr
- 02.07.14 01:20 firr
- 02.07.14 18:12 Edek
- 02.07.14 20:13 firr
Najnowsze wątki z tej grupy
- Arch. Prog. Nieuprzywilejowanych w pełnej wer. na nowej s. WWW energokod.pl
- 7. Raport Totaliztyczny: Sprawa Qt Group wer. 424
- TCL - problem z escape ostatniego \ w nawiasach {}
- Nauka i Praca Programisty C++ w III Rzeczy (pospolitej)
- testy-wyd-sort - Podsumowanie
- Tworzenie Programów Nieuprzywilejowanych Opartych Na Wtyczkach
- Do czego nadaje się QDockWidget z bibl. Qt?
- Bibl. Qt jest sztucznie ograniczona - jest nieprzydatna do celów komercyjnych
- Co sciaga kretynow
- AEiC 2024 - Ada-Europe conference - Deadlines Approaching
- Jakie są dobre zasady programowania programów opartych na wtyczkach?
- sprawdzanie słów kluczowych dot. zła
- Re: W czym sie teraz pisze programy??
- Re: (PDF) Surgical Pathology of Non-neoplastic Gastrointestinal Diseases by Lizhi Zhang
- CfC 28th Ada-Europe Int. Conf. Reliable Software Technologies
Najnowsze wątki
- 2024-12-30 Nowy Outlander PHEV w PL
- 2024-12-30 Warszawa => Key Account Manager <=
- 2024-12-30 Katowice => Key Account Manager (ERP) <=
- 2024-12-28 Śmiechu KOOOOOOPA ;-)
- 2024-12-29 Pomiar amplitudy w zegarku mechanicznym
- 2024-12-28 Antyradar
- 2024-12-28 Deweloper przegral w sadzie musi zwrócic pieniądze Posypia sie kolejne pozwy?
- 2024-12-28 Warszawa => Full Stack .Net Engineer <=
- 2024-12-28 Warszawa => Sales Assistant <=
- 2024-12-28 Warszawa => Programista Full Stack .Net <=
- 2024-12-28 Warszawa => Full Stack web developer (obszar .Net Core, Angular6+) <=
- 2024-12-28 Katowice => Head of Virtualization Platform Management and Operating S
- 2024-12-28 Błonie => Analityk Systemów Informatycznych (TMS SPEED) <=
- 2024-12-28 Warszawa => Senior Frontend Developer (React + React Native) <=
- 2024-12-28 Żerniki => Employer Branding Specialist <=