aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/linden/indra/newview/llviewerjointmesh.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'linden/indra/newview/llviewerjointmesh.cpp')
-rw-r--r--linden/indra/newview/llviewerjointmesh.cpp824
1 files changed, 149 insertions, 675 deletions
diff --git a/linden/indra/newview/llviewerjointmesh.cpp b/linden/indra/newview/llviewerjointmesh.cpp
index 96cdb88..3070cb4 100644
--- a/linden/indra/newview/llviewerjointmesh.cpp
+++ b/linden/indra/newview/llviewerjointmesh.cpp
@@ -38,7 +38,6 @@
38#include "llfasttimer.h" 38#include "llfasttimer.h"
39 39
40#include "llagent.h" 40#include "llagent.h"
41#include "llagparray.h"
42#include "llbox.h" 41#include "llbox.h"
43#include "lldrawable.h" 42#include "lldrawable.h"
44#include "lldrawpoolavatar.h" 43#include "lldrawpoolavatar.h"
@@ -62,6 +61,10 @@ extern PFNGLVERTEXBLENDARBPROC glVertexBlendARB;
62#endif 61#endif
63extern BOOL gRenderForSelect; 62extern BOOL gRenderForSelect;
64 63
64static LLPointer<LLVertexBuffer> sRenderBuffer = NULL;
65static const U32 sRenderMask = LLVertexBuffer::MAP_VERTEX |
66 LLVertexBuffer::MAP_NORMAL |
67 LLVertexBuffer::MAP_TEXCOORD;
65LLMatrix4 gBlendMat; 68LLMatrix4 gBlendMat;
66 69
67//----------------------------------------------------------------------------- 70//-----------------------------------------------------------------------------
@@ -394,11 +397,11 @@ void LLViewerJointMesh::setupJoint(LLViewerJoint* current_joint)
394 } 397 }
395 398
396 // depth-first traversal 399 // depth-first traversal
397 for (LLJoint *child_joint = current_joint->mChildren.getFirstData(); 400 for (LLJoint::child_list_t::iterator iter = current_joint->mChildren.begin();
398 child_joint; 401 iter != current_joint->mChildren.end(); ++iter)
399 child_joint = current_joint->mChildren.getNextData())
400 { 402 {
401 setupJoint((LLViewerJoint*)child_joint); 403 LLViewerJoint* child_joint = (LLViewerJoint*)(*iter);
404 setupJoint(child_joint);
402 } 405 }
403} 406}
404 407
@@ -431,7 +434,7 @@ void LLViewerJointMesh::uploadJointMatrices()
431 434
432 if (hardware_skinning) 435 if (hardware_skinning)
433 { 436 {
434 joint_mat *= gCamera->getModelview(); 437 joint_mat *= LLDrawPoolAvatar::getModelView();
435 } 438 }
436 gJointMat[joint_num] = joint_mat; 439 gJointMat[joint_num] = joint_mat;
437 gJointRot[joint_num] = joint_mat.getMat3(); 440 gJointRot[joint_num] = joint_mat.getMat3();
@@ -532,620 +535,39 @@ int compare_int(const void *a, const void *b)
532 else return 0; 535 else return 0;
533} 536}
534 537
535#if LL_WINDOWS || (LL_DARWIN && __i386__) // SSE optimizations in avatar code 538void llDrawRangeElements(GLenum mode, GLuint start, GLuint end, GLsizei count, GLenum type, const GLvoid *indices)
536
537#if LL_DARWIN
538#include <xmmintrin.h>
539
540 // On Windows, this class is defined in fvec.h. I've only reproduced the parts of it we use here for now.
541 #pragma pack(push,16) /* Must ensure class & union 16-B aligned */
542 class F32vec4
543 {
544 protected:
545 __m128 vec;
546 public:
547
548 /* Constructors: __m128, 4 floats, 1 float */
549 F32vec4() {}
550
551 /* initialize 4 SP FP with __m128 data type */
552 F32vec4(__m128 m) { vec = m;}
553
554 /* Explicitly initialize each of 4 SP FPs with same float */
555 explicit F32vec4(float f) { vec = _mm_set_ps1(f); }
556 };
557 #pragma pack(pop) /* 16-B aligned */
558
559
560#endif
561
562void blend_SSE_32_32_batch(const int vert_offset, const int vert_count, float* output,
563 LLStrider<LLVector3>& vertices, LLStrider<LLVector2>& texcoords, LLStrider<LLVector3>& normals, LLStrider<F32>& weights)
564{ 539{
565 F32 last_weight = F32_MAX; 540 if (end-start+1 > (U32) gGLManager.mGLMaxVertexRange ||
566 LLMatrix4 *blend_mat = &gBlendMat; 541 count > gGLManager.mGLMaxIndexRange)
567
568 for (S32 index = vert_offset; index < vert_offset + vert_count; index++)
569 { 542 {
570 F32 w = weights [index]; // register copy of weight 543 glDrawElements(mode,count,type,indices);
571 F32 *vin = &vertices[index].mV[0]; // pointer to input vertex data, assumed to be V3+T2+N3+whatever
572 F32 *vout = output + index * (AVATAR_VERTEX_BYTES/sizeof(F32)); // pointer to the output vertex data, assumed to be 16 byte aligned
573
574 if (w == last_weight)
575 {
576 // load input and output vertices, and last blended matrix
577 __asm {
578 mov esi, vin
579 mov edi, vout
580
581 mov edx, blend_mat
582 movaps xmm4, [edx]
583 movaps xmm5, [edx+0x10]
584 movaps xmm6, [edx+0x20]
585 movaps xmm7, [edx+0x30]
586 }
587 }
588 else
589 {
590 last_weight = w;
591 S32 joint = llfloor(w);
592 w -= joint;
593
594 LLMatrix4 *m0 = &(gJointMat[joint+1]);
595 LLMatrix4 *m1 = &(gJointMat[joint+0]);
596
597 // some initial code to load Matrix 0 into SSE registers
598 __asm {
599 mov esi, vin
600 mov edi, vout
601
602 //matrix2
603 mov edx, m0
604 movaps xmm4, [edx]
605 movaps xmm5, [edx+0x10]
606 movaps xmm6, [edx+0x20]
607 movaps xmm7, [edx+0x30]
608 };
609
610 // if w == 1.0f, we don't need to blend.
611 // but since we do the trick of blending the matrices, here, if w != 1.0,
612 // we load Matrix 1 into the other 4 SSE registers and blend both matrices
613 // based on the weight (which we load ingo a 16-byte aligned vector: w,w,w,w)
614
615 if (w != 1.0f)
616 {
617 F32vec4 weight(w);
618
619 __asm { // do blending of matrices instead of verts and normals -- faster
620 mov edx, m1
621 movaps xmm0, [edx]
622 movaps xmm1, [edx+0x10]
623 movaps xmm2, [edx+0x20]
624 movaps xmm3, [edx+0x30]
625
626 subps xmm4, xmm0 // do blend for each matrix column
627 subps xmm5, xmm1 // diff, then multiply weight and re-add
628 subps xmm6, xmm2
629 subps xmm7, xmm3
630
631 mulps xmm4, weight
632 mulps xmm5, weight
633 mulps xmm6, weight
634 mulps xmm7, weight
635
636 addps xmm4, xmm0
637 addps xmm5, xmm1
638 addps xmm6, xmm2
639 addps xmm7, xmm3
640 };
641 }
642
643 __asm {
644 // save off blended matrix
645 mov edx, blend_mat;
646 movaps [edx], xmm4;
647 movaps [edx+0x10], xmm5;
648 movaps [edx+0x20], xmm6;
649 movaps [edx+0x30], xmm7;
650 }
651 }
652
653 // now, we have either a blended matrix in xmm4-7 or the original Matrix 0
654 // we then multiply each vertex and normal by this one matrix.
655
656 // For SSE2, we would try to keep the original two matrices in other registers
657 // and avoid reloading them. However, they should ramain in L1 cache in the
658 // current case.
659
660 // One possible optimization would be to sort the vertices by weight instead
661 // of just index (we still want to uniqify). If we note when two or more vertices
662 // share the same weight, we can avoid doing the middle SSE code above and just
663 // re-use the blended matrix for those vertices
664
665
666 // now, we do the actual vertex blending
667 __asm {
668 // load Vertex into xmm0.
669 movaps xmm0, [esi] // change aps to ups when input is no longer 16-baligned
670 movaps xmm1, xmm0 // copy vector into xmm0 through xmm2 (x,y,z)
671 movaps xmm2, xmm0
672 shufps xmm0, xmm0, _MM_SHUFFLE(0,0,0,0); // clone vertex (x) across vector
673 shufps xmm1, xmm1, _MM_SHUFFLE(1,1,1,1); // clone vertex (y) across vector
674 shufps xmm2, xmm2, _MM_SHUFFLE(2,2,2,2); // same for Z
675 mulps xmm0, xmm4 // do the actual matrix multipication for r0
676 mulps xmm1, xmm5 // for r1
677 mulps xmm2, xmm6 // for r2
678 addps xmm0, xmm1 // accumulate
679 addps xmm0, xmm2 // accumulate
680 addps xmm0, xmm7 // add in the row 4 which holds the x,y,z translation. assumes w=1 (vertex-w, not weight)
681
682 movaps [edi], xmm0 // store aligned in output array
683
684 // load Normal into xmm0.
685 movaps xmm0, [esi + 0x10] // change aps to ups when input no longer 16-byte aligned
686 movaps xmm1, xmm0 //
687 movaps xmm2, xmm0
688 shufps xmm0, xmm0, _MM_SHUFFLE(0,0,0,0); // since UV sits between vertex and normal, normal starts at element 1, not 0
689 shufps xmm1, xmm1, _MM_SHUFFLE(1,1,1,1);
690 shufps xmm2, xmm2, _MM_SHUFFLE(2,2,2,2);
691 mulps xmm0, xmm4 // multiply by matrix
692 mulps xmm1, xmm5 // multiply
693 mulps xmm2, xmm6 // multiply
694 addps xmm0, xmm1 // accumulate
695 addps xmm0, xmm2 // accumulate. note: do not add translation component to normals, save time too
696 movaps [edi + 0x10], xmm0 // store aligned
697 }
698
699 *(LLVector2*)(vout + (AVATAR_OFFSET_TEX0/sizeof(F32))) = texcoords[index]; // write texcoord into appropriate spot.
700 }
701}
702
703#elif LL_LINUX
704
705void blend_SSE_32_32_batch(const int vert_offset, const int vert_count, float* output,
706 LLStrider<LLVector3>& vertices, LLStrider<LLVector2>& texcoords, LLStrider<LLVector3>& normals, LLStrider<F32>& weights)
707{
708 assert(0);
709}
710
711#elif LL_DARWIN
712// AltiVec versions of the same...
713
714static inline vector float loadAlign(int offset, vector float *addr)
715{
716 vector float in0 = vec_ld(offset, addr);
717 vector float in1 = vec_ld(offset + 16, addr);
718 vector unsigned char perm = vec_lvsl(0, (unsigned char*)addr);
719
720 return(vec_perm(in0, in1, perm));
721}
722
723static inline void storeAlign(vector float v, int offset, vector float *addr)
724{
725 vector float in0 = vec_ld(offset, addr);
726 vector float in1 = vec_ld(offset + 16, addr);
727 vector unsigned char perm = vec_lvsr(0, (unsigned char *)addr);
728 vector float temp = vec_perm(v, v, perm);
729 vector unsigned char mask = (vector unsigned char)vec_cmpgt(perm, vec_splat_u8(15));
730
731 in0 = vec_sel(in0, temp, (vector unsigned int)mask);
732 in1 = vec_sel(temp, in1, (vector unsigned int)mask);
733
734 vec_st(in0, offset, addr);
735 vec_st(in1, offset + 16, addr);
736}
737
738void blend_SSE_32_32_batch(const int vert_offset, const int vert_count, float* output,
739 LLStrider<LLVector3>& vertices, LLStrider<LLVector2>& texcoords, LLStrider<LLVector3>& normals, LLStrider<F32>& weights)
740{
741 F32 last_weight = F32_MAX;
742// LLMatrix4 &blend_mat = gBlendMat;
743
744 vector float matrix0_0, matrix0_1, matrix0_2, matrix0_3;
745 vector unsigned char out0perm = (vector unsigned char) ( 0x10,0x11,0x12,0x13, 0x14,0x15,0x16,0x17, 0x18,0x19,0x1A,0x1B, 0x0C,0x0D,0x0E,0x0F );
746// vector unsigned char out1perm = (vector unsigned char) ( 0x00,0x01,0x02,0x03, 0x10,0x11,0x12,0x13, 0x14,0x15,0x16,0x17, 0x18,0x19,0x1A,0x1B );
747 vector unsigned char out1perm = (vector unsigned char) ( 0x10,0x11,0x12,0x13, 0x14,0x15,0x16,0x17, 0x18,0x19,0x1A,0x1B, 0x0C,0x0D,0x0E,0x0F );
748
749 vector float zero = (vector float)vec_splat_u32(0);
750
751 for (U32 index = vert_offset; index < vert_offset + vert_count; index++)
752 {
753 F32 w = weights [index]; // register copy of weight
754 F32 *vin = &vertices[index].mV[0]; // pointer to input vertex data, assumed to be V3+T2+N3+whatever
755 F32 *vout = output + index * (AVATAR_VERTEX_BYTES/sizeof(F32)); // pointer to the output vertex data, assumed to be 16 byte aligned
756
757 // MBW -- XXX -- If this isn't the case, this code gets more complicated.
758 if(0x0000000F & (U32)vin)
759 {
760 llerrs << "blend_SSE_batch: input not 16-byte aligned!" << llendl;
761 }
762 if(0x0000000F & (U32)vout)
763 {
764 llerrs << "blend_SSE_batch: output not 16-byte aligned!" << llendl;
765 }
766// if(0x0000000F & (U32)&(blend_mat.mMatrix))
767// {
768// llerrs << "blend_SSE_batch: blend_mat not 16-byte aligned!" << llendl;
769// }
770
771 if (w == last_weight)
772 {
773 // load last blended matrix
774 // Still loaded from last time through the loop.
775// matrix0_0 = vec_ld(0x00, (vector float*)&(blend_mat.mMatrix));
776// matrix0_1 = vec_ld(0x10, (vector float*)&(blend_mat.mMatrix));
777// matrix0_2 = vec_ld(0x20, (vector float*)&(blend_mat.mMatrix));
778// matrix0_3 = vec_ld(0x30, (vector float*)&(blend_mat.mMatrix));
779 }
780 else
781 {
782 last_weight = w;
783 S32 joint = llfloor(w);
784 w -= joint;
785
786 LLMatrix4 &m0 = gJointMat[joint+1];
787 LLMatrix4 &m1 = gJointMat[joint+0];
788
789 // load Matrix 0 into vector registers
790 matrix0_0 = vec_ld(0x00, (vector float*)&(m0.mMatrix));
791 matrix0_1 = vec_ld(0x10, (vector float*)&(m0.mMatrix));
792 matrix0_2 = vec_ld(0x20, (vector float*)&(m0.mMatrix));
793 matrix0_3 = vec_ld(0x30, (vector float*)&(m0.mMatrix));
794
795 // if w == 1.0f, we don't need to blend.
796 // but since we do the trick of blending the matrices, here, if w != 1.0,
797 // we load Matrix 1 into the other 4 SSE registers and blend both matrices
798 // based on the weight (which we load ingo a 16-byte aligned vector: w,w,w,w)
799
800 if (w != 1.0f)
801 {
802 vector float matrix1_0, matrix1_1, matrix1_2, matrix1_3;
803
804 // This loads the weight somewhere in the vector register
805 vector float weight = vec_lde(0, &(w));
806 // and this splats it to all elements.
807 weight = vec_splat(vec_perm(weight, weight, vec_lvsl(0, &(w))), 0);
808
809 // do blending of matrices instead of verts and normals -- faster
810 matrix1_0 = vec_ld(0x00, (vector float*)&(m1.mMatrix));
811 matrix1_1 = vec_ld(0x10, (vector float*)&(m1.mMatrix));
812 matrix1_2 = vec_ld(0x20, (vector float*)&(m1.mMatrix));
813 matrix1_3 = vec_ld(0x30, (vector float*)&(m1.mMatrix));
814
815 // m0[col] = ((m0[col] - m1[col]) * weight) + m1[col];
816 matrix0_0 = vec_madd(vec_sub(matrix0_0, matrix1_0), weight, matrix1_0);
817 matrix0_1 = vec_madd(vec_sub(matrix0_1, matrix1_1), weight, matrix1_1);
818 matrix0_2 = vec_madd(vec_sub(matrix0_2, matrix1_2), weight, matrix1_2);
819 matrix0_3 = vec_madd(vec_sub(matrix0_3, matrix1_3), weight, matrix1_3);
820 }
821
822 // save off blended matrix
823// vec_st(matrix0_0, 0x00, (vector float*)&(blend_mat.mMatrix));
824// vec_st(matrix0_1, 0x10, (vector float*)&(blend_mat.mMatrix));
825// vec_st(matrix0_2, 0x20, (vector float*)&(blend_mat.mMatrix));
826// vec_st(matrix0_3, 0x30, (vector float*)&(blend_mat.mMatrix));
827 }
828
829 // now, we have either a blended matrix in matrix0_0-3 or the original Matrix 0
830 // we then multiply each vertex and normal by this one matrix.
831
832 // For SSE2, we would try to keep the original two matrices in other registers
833 // and avoid reloading them. However, they should ramain in L1 cache in the
834 // current case.
835
836 // One possible optimization would be to sort the vertices by weight instead
837 // of just index (we still want to uniqify). If we note when two or more vertices
838 // share the same weight, we can avoid doing the middle SSE code above and just
839 // re-use the blended matrix for those vertices
840
841
842 // now, we do the actual vertex blending
843
844 vector float in0 = vec_ld(AVATAR_OFFSET_POS, (vector float*)vin);
845 vector float in1 = vec_ld(AVATAR_OFFSET_NORMAL, (vector float*)vin);
846
847 // Matrix multiply vertex
848 vector float out0 = vec_madd
849 (
850 vec_splat(in0, 0),
851 matrix0_0,
852 vec_madd
853 (
854 vec_splat(in0, 1),
855 matrix0_1,
856 vec_madd
857 (
858 vec_splat(in0, 2),
859 matrix0_2,
860 matrix0_3
861 )
862 )
863 );
864
865 // Matrix multiply normal
866 vector float out1 = vec_madd
867 (
868 vec_splat(in1, 0),
869 matrix0_0,
870 vec_madd
871 (
872 vec_splat(in1, 1),
873 matrix0_1,
874 vec_madd
875 (
876 vec_splat(in1, 2),
877 matrix0_2,
878 // no translation for normals
879 (vector float)vec_splat_u32(0)
880 )
881 )
882 );
883
884 // indexed store
885 vec_stl(vec_perm(in0, out0, out0perm), AVATAR_OFFSET_POS, (vector float*)vout); // Pos
886 vec_stl(vec_perm(in1, out1, out1perm), AVATAR_OFFSET_NORMAL, (vector float*)vout); // Norm
887 *(LLVector2*)(vout + (AVATAR_OFFSET_TEX0/sizeof(F32))) = texcoords[index]; // write texcoord into appropriate spot.
888 }
889}
890
891#endif
892
893
894void llDrawElementsBatchBlend(const U32 vert_offset, const U32 vert_count, LLFace *face, const S32 index_count, const U32 *indices)
895{
896 U8* gAGPVertices = gPipeline.bufferGetScratchMemory();
897
898 if (gAGPVertices)
899 {
900 LLStrider<LLVector3> vertices;
901 LLStrider<LLVector3> normals;
902 LLStrider<LLVector2> tcoords0;
903 LLStrider<F32> weights;
904
905 LLStrider<LLVector3> o_vertices;
906 LLStrider<LLVector3> o_normals;
907 LLStrider<LLVector2> o_texcoords0;
908
909
910 LLStrider<LLVector3> binormals;
911 LLStrider<LLVector2> o_texcoords1;
912 // get the source vertices from the draw pool. We index these ourselves, as there was
913 // no guarantee the indices for a single jointmesh were contigious
914
915 LLDrawPool *pool = face->getPool();
916 pool->getVertexStrider (vertices, 0);
917 pool->getTexCoordStrider (tcoords0, 0, 0);
918 pool->getNormalStrider (normals, 0);
919 pool->getBinormalStrider (binormals, 0);
920 pool->getVertexWeightStrider(weights, 0);
921
922 // load the addresses of the output striders
923 o_vertices = (LLVector3*)(gAGPVertices + AVATAR_OFFSET_POS); o_vertices.setStride( AVATAR_VERTEX_BYTES);
924 o_normals = (LLVector3*)(gAGPVertices + AVATAR_OFFSET_NORMAL); o_normals.setStride( AVATAR_VERTEX_BYTES);
925 o_texcoords0= (LLVector2*)(gAGPVertices + AVATAR_OFFSET_TEX0); o_texcoords0.setStride(AVATAR_VERTEX_BYTES);
926 o_texcoords1= (LLVector2*)(gAGPVertices + AVATAR_OFFSET_TEX1); o_texcoords1.setStride(AVATAR_VERTEX_BYTES);
927
928#if !LL_LINUX // !!! *TODO: do the linux implementation
929 if (gGLManager.mSoftwareBlendSSE)
930 {
931 // do SSE blend without binormals or extra texcoords
932 blend_SSE_32_32_batch(vert_offset, vert_count, (float*)gAGPVertices,
933 vertices, tcoords0, normals, weights);
934 }
935 else // fully backwards compatible software blending, no SSE
936#endif
937 {
938 LLVector4 tpos0, tnorm0, tpos1, tnorm1, tbinorm0, tbinorm1;
939 F32 last_weight = F32_MAX;
940 LLMatrix3 gBlendRotMat;
941
942 {
943 for (U32 index=vert_offset; index < vert_offset + vert_count; index++)
944 {
945 // blend by first matrix
946 F32 w = weights [index];
947
948 if (w != last_weight)
949 {
950 last_weight = w;
951
952 S32 joint = llfloor(w);
953 w -= joint;
954
955 LLMatrix4 &m0 = gJointMat[joint+1];
956 LLMatrix4 &m1 = gJointMat[joint+0];
957 LLMatrix3 &n0 = gJointRot[joint+1];
958 LLMatrix3 &n1 = gJointRot[joint+0];
959
960 if (w == 1.0f)
961 {
962 gBlendMat = m0;
963 gBlendRotMat = n0;
964 }
965 else
966 {
967 gBlendMat.mMatrix[VX][VX] = lerp(m1.mMatrix[VX][VX], m0.mMatrix[VX][VX], w);
968 gBlendMat.mMatrix[VX][VY] = lerp(m1.mMatrix[VX][VY], m0.mMatrix[VX][VY], w);
969 gBlendMat.mMatrix[VX][VZ] = lerp(m1.mMatrix[VX][VZ], m0.mMatrix[VX][VZ], w);
970
971 gBlendMat.mMatrix[VY][VX] = lerp(m1.mMatrix[VY][VX], m0.mMatrix[VY][VX], w);
972 gBlendMat.mMatrix[VY][VY] = lerp(m1.mMatrix[VY][VY], m0.mMatrix[VY][VY], w);
973 gBlendMat.mMatrix[VY][VZ] = lerp(m1.mMatrix[VY][VZ], m0.mMatrix[VY][VZ], w);
974
975 gBlendMat.mMatrix[VZ][VX] = lerp(m1.mMatrix[VZ][VX], m0.mMatrix[VZ][VX], w);
976 gBlendMat.mMatrix[VZ][VY] = lerp(m1.mMatrix[VZ][VY], m0.mMatrix[VZ][VY], w);
977 gBlendMat.mMatrix[VZ][VZ] = lerp(m1.mMatrix[VZ][VZ], m0.mMatrix[VZ][VZ], w);
978
979 gBlendMat.mMatrix[VW][VX] = lerp(m1.mMatrix[VW][VX], m0.mMatrix[VW][VX], w);
980 gBlendMat.mMatrix[VW][VY] = lerp(m1.mMatrix[VW][VY], m0.mMatrix[VW][VY], w);
981 gBlendMat.mMatrix[VW][VZ] = lerp(m1.mMatrix[VW][VZ], m0.mMatrix[VW][VZ], w);
982
983 gBlendRotMat.mMatrix[VX][VX] = lerp(n1.mMatrix[VX][VX], n0.mMatrix[VX][VX], w);
984 gBlendRotMat.mMatrix[VX][VY] = lerp(n1.mMatrix[VX][VY], n0.mMatrix[VX][VY], w);
985 gBlendRotMat.mMatrix[VX][VZ] = lerp(n1.mMatrix[VX][VZ], n0.mMatrix[VX][VZ], w);
986
987 gBlendRotMat.mMatrix[VY][VX] = lerp(n1.mMatrix[VY][VX], n0.mMatrix[VY][VX], w);
988 gBlendRotMat.mMatrix[VY][VY] = lerp(n1.mMatrix[VY][VY], n0.mMatrix[VY][VY], w);
989 gBlendRotMat.mMatrix[VY][VZ] = lerp(n1.mMatrix[VY][VZ], n0.mMatrix[VY][VZ], w);
990
991 gBlendRotMat.mMatrix[VZ][VX] = lerp(n1.mMatrix[VZ][VX], n0.mMatrix[VZ][VX], w);
992 gBlendRotMat.mMatrix[VZ][VY] = lerp(n1.mMatrix[VZ][VY], n0.mMatrix[VZ][VY], w);
993 gBlendRotMat.mMatrix[VZ][VZ] = lerp(n1.mMatrix[VZ][VZ], n0.mMatrix[VZ][VZ], w);
994 }
995 }
996
997 // write result
998 o_vertices [index] = vertices[index] * gBlendMat;
999 o_normals [index] = normals [index] * gBlendRotMat;
1000 o_texcoords0[index] = tcoords0[index];
1001
1002 /*
1003 // Verification code. Leave this here. It's useful for keeping the SSE and non-SSE versions in sync.
1004 LLVector3 temp;
1005 temp = tpos0;
1006 if( (o_vertices[index] - temp).magVecSquared() > 0.001f )
1007 {
1008 llerrs << "V SSE: " << o_vertices[index] << " v. " << temp << llendl;
1009 }
1010
1011 temp = tnorm0;
1012 if( (o_normals[index] - temp).magVecSquared() > 0.001f )
1013 {
1014 llerrs << "N SSE: " << o_normals[index] << " v. " << temp << llendl;
1015 }
1016
1017 if( (o_texcoords0[index] - tcoords0[index]).magVecSquared() > 0.001f )
1018 {
1019 llerrs << "T0 SSE: " << o_texcoords0[index] << " v. " << tcoords0[index] << llendl;
1020 }
1021 */
1022 }
1023 }
1024 }
1025
1026#if LL_DARWIN
1027 // *HACK* *CHOKE* *PUKE*
1028 // No way does this belong here.
1029 glFlushVertexArrayRangeAPPLE(AVATAR_VERTEX_BYTES * vert_count, gAGPVertices + (AVATAR_VERTEX_BYTES * vert_offset));
1030#endif
1031 glDrawElements(GL_TRIANGLES, index_count, GL_UNSIGNED_INT, indices); // draw it!
1032 } 544 }
1033 else 545 else
1034 { 546 {
1035 glDrawElements(GL_TRIANGLES, index_count, GL_UNSIGNED_INT, indices); 547 glDrawRangeElements(mode,start,end,count,type,indices);
1036 } 548 }
1037} 549}
1038 550
1039
1040
1041//--------------------------------------------------------------------
1042// DrawElements
1043
1044// works just like glDrawElements, except it assumes GL_TRIANGLES and GL_UNSIGNED_INT indices
1045
1046// why? because the destination buffer may not be the AGP buffer and the eyes do not use blending
1047// separate the eyes into their own drawpools and this code goes away.
1048
1049//--------------------------------------------------------------------
1050
1051void llDrawElements(const S32 count, const U32 *indices, LLFace *face)
1052{
1053 U8* gAGPVertices = gPipeline.bufferGetScratchMemory();
1054
1055 if (gAGPVertices)
1056 {
1057#if LL_DARWIN
1058 U32 minIndex = indices[0];
1059 U32 maxIndex = indices[0];
1060#endif
1061 {
1062 LLStrider<LLVector3> vertices;
1063 LLStrider<LLVector3> normals;
1064 LLStrider<LLVector2> tcoords;
1065 LLStrider<F32> weights;
1066
1067 LLStrider<LLVector3> o_vertices;
1068 LLStrider<LLVector3> o_normals;
1069 LLStrider<LLVector2> o_texcoords0;
1070
1071 LLDrawPool *pool = face->getPool();
1072 pool->getVertexStrider (vertices,0);
1073 pool->getNormalStrider (normals, 0);
1074 pool->getTexCoordStrider (tcoords, 0);
1075
1076 o_vertices = (LLVector3*)(gAGPVertices + AVATAR_OFFSET_POS); o_vertices.setStride( AVATAR_VERTEX_BYTES);
1077 o_normals = (LLVector3*)(gAGPVertices + AVATAR_OFFSET_NORMAL); o_normals.setStride( AVATAR_VERTEX_BYTES);
1078 o_texcoords0= (LLVector2*)(gAGPVertices + AVATAR_OFFSET_TEX0); o_texcoords0.setStride(AVATAR_VERTEX_BYTES);
1079
1080 for (S32 i=0; i < count; i++)
1081 {
1082 U32 index = indices[i];
1083
1084 o_vertices [index] = vertices[index];
1085 o_normals [index] = normals [index];
1086 o_texcoords0[index] = tcoords [index];
1087
1088#if LL_DARWIN
1089 maxIndex = llmax(index, maxIndex);
1090 minIndex = llmin(index, minIndex);
1091#endif
1092 }
1093 }
1094
1095#if LL_DARWIN
1096 // *HACK* *CHOKE* *PUKE*
1097 // No way does this belong here.
1098 glFlushVertexArrayRangeAPPLE(AVATAR_VERTEX_BYTES * (maxIndex + 1 - minIndex), gAGPVertices + (AVATAR_VERTEX_BYTES * minIndex));
1099#endif
1100
1101 glDrawElements(GL_TRIANGLES, count, GL_UNSIGNED_INT, indices);
1102 }
1103 else
1104 {
1105 glDrawElements(GL_TRIANGLES, count, GL_UNSIGNED_INT, indices);
1106 }
1107}
1108
1109
1110//-------------------------------------------------------------------- 551//--------------------------------------------------------------------
1111// LLViewerJointMesh::drawShape() 552// LLViewerJointMesh::drawShape()
1112//-------------------------------------------------------------------- 553//--------------------------------------------------------------------
1113U32 LLViewerJointMesh::drawShape( F32 pixelArea ) 554U32 LLViewerJointMesh::drawShape( F32 pixelArea, BOOL first_pass)
1114{ 555{
1115 if (!mValid || !mVisible) return 0; 556 if (!mValid || !mMesh || !mFace || !mVisible ||
1116 557 mFace->mVertexBuffer.isNull() ||
1117 U32 triangle_count = 0; 558 mMesh->getNumFaces() == 0)
1118
1119 //----------------------------------------------------------------
1120 // if no mesh bail out now
1121 //----------------------------------------------------------------
1122 if ( !mMesh || !mFace)
1123 { 559 {
1124 return 0; 560 return 0;
1125 } 561 }
1126 562
1127 //---------------------------------------------------------------- 563 U32 triangle_count = 0;
1128 // if we have no faces, bail out now
1129 //----------------------------------------------------------------
1130 if ( mMesh->getNumFaces() == 0 )
1131 {
1132 return 0;
1133 }
1134 564
1135 stop_glerror(); 565 stop_glerror();
1136 566
1137 //---------------------------------------------------------------- 567 //----------------------------------------------------------------
1138 // setup current color 568 // setup current color
1139 //---------------------------------------------------------------- 569 //----------------------------------------------------------------
1140 if (gRenderForSelect) 570 if (!gRenderForSelect)
1141 {
1142 S32 name = mFace->getDrawable() ? mFace->getDrawable()->getVObj()->mGLName : 0;
1143 LLColor4U color((U8)(name >> 16), (U8)(name >> 8), (U8)name, 0xff);
1144 LLColor4 color_float(color);
1145
1146 glColor4f(color_float.mV[0], color_float.mV[1], color_float.mV[2], 1.f);
1147 }
1148 else
1149 { 571 {
1150 if ((mFace->getPool()->getVertexShaderLevel() > 0)) 572 if ((mFace->getPool()->getVertexShaderLevel() > 0))
1151 { 573 {
@@ -1169,7 +591,6 @@ U32 LLViewerJointMesh::drawShape( F32 pixelArea )
1169 591
1170 stop_glerror(); 592 stop_glerror();
1171 593
1172// LLGLSSpecular specular(mSpecular, gRenderForSelect ? 0.0f : mShiny);
1173 LLGLSSpecular specular(LLColor4(1.f,1.f,1.f,1.f), gRenderForSelect ? 0.0f : mShiny && !(mFace->getPool()->getVertexShaderLevel() > 0)); 594 LLGLSSpecular specular(LLColor4(1.f,1.f,1.f,1.f), gRenderForSelect ? 0.0f : mShiny && !(mFace->getPool()->getVertexShaderLevel() > 0));
1174 595
1175 LLGLEnable texture_2d((gRenderForSelect && isTransparent()) ? GL_TEXTURE_2D : 0); 596 LLGLEnable texture_2d((gRenderForSelect && isTransparent()) ? GL_TEXTURE_2D : 0);
@@ -1179,11 +600,6 @@ U32 LLViewerJointMesh::drawShape( F32 pixelArea )
1179 //---------------------------------------------------------------- 600 //----------------------------------------------------------------
1180 llassert( !(mTexture.notNull() && mLayerSet) ); // mutually exclusive 601 llassert( !(mTexture.notNull() && mLayerSet) ); // mutually exclusive
1181 602
1182 //GLuint test_image_name = 0;
1183
1184 //
1185 LLGLState force_alpha_test(GL_ALPHA_TEST, isTransparent());
1186
1187 if (mTestImageName) 603 if (mTestImageName)
1188 { 604 {
1189 LLImageGL::bindExternalTexture( mTestImageName, 0, GL_TEXTURE_2D ); 605 LLImageGL::bindExternalTexture( mTestImageName, 0, GL_TEXTURE_2D );
@@ -1236,11 +652,12 @@ U32 LLViewerJointMesh::drawShape( F32 pixelArea )
1236 gImageList.getImage(IMG_DEFAULT_AVATAR)->bind(); 652 gImageList.getImage(IMG_DEFAULT_AVATAR)->bind();
1237 } 653 }
1238 654
655 LLGLDisable tex(gRenderForSelect && !isTransparent() ? GL_TEXTURE_2D : 0);
656
1239 if (gRenderForSelect) 657 if (gRenderForSelect)
1240 { 658 {
1241 if (isTransparent()) 659 if (isTransparent())
1242 { 660 {
1243 //gGLSObjectSelectDepthAlpha.set();
1244 glTexEnvi(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_COMBINE_ARB); 661 glTexEnvi(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_COMBINE_ARB);
1245 glTexEnvi(GL_TEXTURE_ENV, GL_COMBINE_RGB_ARB, GL_REPLACE); 662 glTexEnvi(GL_TEXTURE_ENV, GL_COMBINE_RGB_ARB, GL_REPLACE);
1246 glTexEnvi(GL_TEXTURE_ENV, GL_COMBINE_ALPHA_ARB, GL_MODULATE); 663 glTexEnvi(GL_TEXTURE_ENV, GL_COMBINE_ALPHA_ARB, GL_MODULATE);
@@ -1251,19 +668,14 @@ U32 LLViewerJointMesh::drawShape( F32 pixelArea )
1251 glTexEnvi(GL_TEXTURE_ENV, GL_SOURCE0_ALPHA_ARB, GL_TEXTURE); // GL_TEXTURE_ENV_COLOR is set in renderPass1 668 glTexEnvi(GL_TEXTURE_ENV, GL_SOURCE0_ALPHA_ARB, GL_TEXTURE); // GL_TEXTURE_ENV_COLOR is set in renderPass1
1252 glTexEnvi(GL_TEXTURE_ENV, GL_OPERAND0_ALPHA_ARB, GL_SRC_ALPHA); 669 glTexEnvi(GL_TEXTURE_ENV, GL_OPERAND0_ALPHA_ARB, GL_SRC_ALPHA);
1253 } 670 }
1254 else
1255 {
1256 //gGLSObjectSelectDepth.set();
1257 }
1258 } 671 }
1259 else 672 else
1260 { 673 {
1261 //---------------------------------------------------------------- 674 //----------------------------------------------------------------
1262 // by default, backface culling is enabled 675 // by default, backface culling is enabled
1263 //---------------------------------------------------------------- 676 //----------------------------------------------------------------
1264 if (sRenderPass == AVATAR_RENDER_PASS_CLOTHING_INNER) 677 /*if (sRenderPass == AVATAR_RENDER_PASS_CLOTHING_INNER)
1265 { 678 {
1266 //LLGLSPipelineAvatar gls_pipeline_avatar;
1267 LLImageGL::bindExternalTexture( sClothingMaskImageName, 1, GL_TEXTURE_2D ); 679 LLImageGL::bindExternalTexture( sClothingMaskImageName, 1, GL_TEXTURE_2D );
1268 680
1269 glClientActiveTextureARB(GL_TEXTURE0_ARB); 681 glClientActiveTextureARB(GL_TEXTURE0_ARB);
@@ -1303,7 +715,6 @@ U32 LLViewerJointMesh::drawShape( F32 pixelArea )
1303 } 715 }
1304 else if (sRenderPass == AVATAR_RENDER_PASS_CLOTHING_OUTER) 716 else if (sRenderPass == AVATAR_RENDER_PASS_CLOTHING_OUTER)
1305 { 717 {
1306 //gGLSPipelineAvatarAlphaPass1.set();
1307 glAlphaFunc(GL_GREATER, 0.1f); 718 glAlphaFunc(GL_GREATER, 0.1f);
1308 LLImageGL::bindExternalTexture( sClothingMaskImageName, 1, GL_TEXTURE_2D ); 719 LLImageGL::bindExternalTexture( sClothingMaskImageName, 1, GL_TEXTURE_2D );
1309 720
@@ -1334,81 +745,48 @@ U32 LLViewerJointMesh::drawShape( F32 pixelArea )
1334 745
1335 glTexEnvi(GL_TEXTURE_ENV, GL_SOURCE0_ALPHA_ARB, GL_TEXTURE); 746 glTexEnvi(GL_TEXTURE_ENV, GL_SOURCE0_ALPHA_ARB, GL_TEXTURE);
1336 glTexEnvi(GL_TEXTURE_ENV, GL_OPERAND0_ALPHA_ARB, GL_SRC_ALPHA); 747 glTexEnvi(GL_TEXTURE_ENV, GL_OPERAND0_ALPHA_ARB, GL_SRC_ALPHA);
1337 } 748 }*/
1338 else if ( isTransparent())
1339 {
1340 //gGLSNoCullFaces.set();
1341 }
1342 else
1343 {
1344 //gGLSCullFaces.set();
1345 }
1346 } 749 }
1347 750
1348 if (mMesh->hasWeights()) 751 mFace->mVertexBuffer->setBuffer(sRenderMask);
1349 {
1350 uploadJointMatrices();
1351 752
753 U32 start = mMesh->mFaceVertexOffset;
754 U32 end = start + mMesh->mFaceVertexCount - 1;
755 U32 count = mMesh->mFaceIndexCount;
756 U32* indicesp = ((U32*) mFace->mVertexBuffer->getIndicesPointer()) + mMesh->mFaceIndexOffset;
1352 757
758 if (mMesh->hasWeights())
759 {
1353 if ((mFace->getPool()->getVertexShaderLevel() > 0)) 760 if ((mFace->getPool()->getVertexShaderLevel() > 0))
1354 { 761 {
1355 glMatrixMode(GL_MODELVIEW); 762 if (first_pass)
1356 glPushMatrix(); 763 {
1357 glLoadIdentity(); 764 uploadJointMatrices();
1358 765 }
1359 glDrawElements(GL_TRIANGLES, mMesh->mFaceIndexCount, GL_UNSIGNED_INT, mMesh->getIndices()); 766 llDrawRangeElements(GL_TRIANGLES, start, end, count, GL_UNSIGNED_INT, indicesp);
1360
1361 glPopMatrix();
1362 } 767 }
1363 else 768 else
1364 { 769 {
1365 if (mFace->getGeomIndex() < 0) 770 llDrawRangeElements(GL_TRIANGLES, start, end, count, GL_UNSIGNED_INT, indicesp);
1366 {
1367 llerrs << "Invalid geometry index in LLViewerJointMesh::drawShape() " << mFace->getGeomIndex() << llendl;
1368 }
1369
1370 if ((S32)(mMesh->mFaceVertexOffset + mMesh->mFaceVertexCount) > mFace->getGeomCount())
1371 {
1372 ((LLVOAvatar*)mFace->getDrawable()->getVObj())->mRoot.dump();
1373 llerrs << "Rendering outside of vertex bounds with mesh " << mName << " at pixel area " << pixelArea << llendl;
1374 }
1375 llDrawElementsBatchBlend(mMesh->mFaceVertexOffset, mMesh->mFaceVertexCount,
1376 mFace, mMesh->mFaceIndexCount, mMesh->getIndices());
1377 } 771 }
1378
1379 } 772 }
1380 else 773 else
1381 { 774 {
1382 glPushMatrix(); 775 glPushMatrix();
1383 LLMatrix4 jointToWorld = getWorldMatrix(); 776 LLMatrix4 jointToWorld = getWorldMatrix();
1384 jointToWorld *= gCamera->getModelview(); 777 glMultMatrixf((GLfloat*)jointToWorld.mMatrix);
1385 glLoadMatrixf((GLfloat*)jointToWorld.mMatrix); 778 llDrawRangeElements(GL_TRIANGLES, start, end, count, GL_UNSIGNED_INT, indicesp);
1386
1387 if ((mFace->getPool()->getVertexShaderLevel() > 0))
1388 {
1389 glDrawElements(GL_TRIANGLES, mMesh->mFaceIndexCount, GL_UNSIGNED_INT, mMesh->getIndices());
1390 }
1391 else // this else clause handles non-weighted vertices. llDrawElements just copies and draws
1392 {
1393 llDrawElements(mMesh->mFaceIndexCount, mMesh->getIndices(), mFace);
1394 }
1395
1396 glPopMatrix(); 779 glPopMatrix();
1397 } 780 }
1398 781
1399 triangle_count += mMesh->mFaceIndexCount; 782 triangle_count += mMesh->mFaceIndexCount;
1400 783
1401 if (gRenderForSelect)
1402 {
1403 glColor4fv(mColor.mV);
1404 }
1405
1406 if (mTestImageName) 784 if (mTestImageName)
1407 { 785 {
1408 glTexEnvi(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_MODULATE); 786 glTexEnvi(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_MODULATE);
1409 } 787 }
1410 788
1411 if (sRenderPass != AVATAR_RENDER_PASS_SINGLE) 789 /*if (sRenderPass != AVATAR_RENDER_PASS_SINGLE)
1412 { 790 {
1413 LLImageGL::unbindTexture(1, GL_TEXTURE_2D); 791 LLImageGL::unbindTexture(1, GL_TEXTURE_2D);
1414 glActiveTextureARB(GL_TEXTURE1_ARB); 792 glActiveTextureARB(GL_TEXTURE1_ARB);
@@ -1421,7 +799,7 @@ U32 LLViewerJointMesh::drawShape( F32 pixelArea )
1421 799
1422 glTexEnvi(GL_TEXTURE_ENV, GL_COMBINE_RGB_ARB, GL_MODULATE); 800 glTexEnvi(GL_TEXTURE_ENV, GL_COMBINE_RGB_ARB, GL_MODULATE);
1423 glAlphaFunc(GL_GREATER, 0.01f); 801 glAlphaFunc(GL_GREATER, 0.01f);
1424 } 802 }*/
1425 803
1426 if (mTexture.notNull()) { 804 if (mTexture.notNull()) {
1427 if (!mTexture->getClampS()) { 805 if (!mTexture->getClampS()) {
@@ -1438,19 +816,20 @@ U32 LLViewerJointMesh::drawShape( F32 pixelArea )
1438//----------------------------------------------------------------------------- 816//-----------------------------------------------------------------------------
1439// updateFaceSizes() 817// updateFaceSizes()
1440//----------------------------------------------------------------------------- 818//-----------------------------------------------------------------------------
1441void LLViewerJointMesh::updateFaceSizes(U32 &num_vertices, F32 pixel_area) 819void LLViewerJointMesh::updateFaceSizes(U32 &num_vertices, U32& num_indices, F32 pixel_area)
1442{ 820{
1443 // Do a pre-alloc pass to determine sizes of data. 821 // Do a pre-alloc pass to determine sizes of data.
1444 if (mMesh && mValid) 822 if (mMesh && mValid)
1445 { 823 {
1446 mMesh->mFaceVertexOffset = num_vertices; 824 mMesh->mFaceVertexOffset = num_vertices;
1447 mMesh->mFaceVertexCount = mMesh->getNumVertices(); 825 mMesh->mFaceVertexCount = mMesh->getNumVertices();
826 mMesh->mFaceIndexOffset = num_indices;
827 mMesh->mFaceIndexCount = mMesh->getSharedData()->mNumTriangleIndices;
828
1448 mMesh->getReferenceMesh()->mCurVertexCount = mMesh->mFaceVertexCount; 829 mMesh->getReferenceMesh()->mCurVertexCount = mMesh->mFaceVertexCount;
1449 num_vertices += mMesh->getNumVertices();
1450 830
1451 mMesh->mFaceIndexCount = mMesh->getSharedData()->mNumTriangleIndices; 831 num_vertices += mMesh->getNumVertices();
1452 832 num_indices += mMesh->mFaceIndexCount;
1453 mMesh->getSharedData()->genIndices(mMesh->mFaceVertexOffset);
1454 } 833 }
1455} 834}
1456 835
@@ -1460,9 +839,7 @@ void LLViewerJointMesh::updateFaceSizes(U32 &num_vertices, F32 pixel_area)
1460void LLViewerJointMesh::updateFaceData(LLFace *face, F32 pixel_area, BOOL damp_wind) 839void LLViewerJointMesh::updateFaceData(LLFace *face, F32 pixel_area, BOOL damp_wind)
1461{ 840{
1462 U32 i; 841 U32 i;
1463 842
1464 if (!mValid) return;
1465
1466 mFace = face; 843 mFace = face;
1467 844
1468 LLStrider<LLVector3> verticesp; 845 LLStrider<LLVector3> verticesp;
@@ -1471,13 +848,15 @@ void LLViewerJointMesh::updateFaceData(LLFace *face, F32 pixel_area, BOOL damp_w
1471 LLStrider<LLVector2> tex_coordsp; 848 LLStrider<LLVector2> tex_coordsp;
1472 LLStrider<F32> vertex_weightsp; 849 LLStrider<F32> vertex_weightsp;
1473 LLStrider<LLVector4> clothing_weightsp; 850 LLStrider<LLVector4> clothing_weightsp;
851 LLStrider<U32> indicesp;
1474 852
1475 // Copy data into the faces from the polymesh data. 853 // Copy data into the faces from the polymesh data.
1476 if (mMesh) 854 if (mMesh && mValid)
1477 { 855 {
1478 if (mMesh->getNumVertices()) 856 if (mMesh->getNumVertices())
1479 { 857 {
1480 S32 index = face->getGeometryAvatar(verticesp, normalsp, binormalsp, tex_coordsp, vertex_weightsp, clothing_weightsp); 858 S32 index = face->getGeometryAvatar(verticesp, normalsp, binormalsp, tex_coordsp, vertex_weightsp, clothing_weightsp);
859 face->mVertexBuffer->getIndexStrider(indicesp);
1481 860
1482 if (-1 == index) 861 if (-1 == index)
1483 { 862 {
@@ -1493,11 +872,20 @@ void LLViewerJointMesh::updateFaceData(LLFace *face, F32 pixel_area, BOOL damp_w
1493 vertex_weightsp[mMesh->mFaceVertexOffset + i] = *(mMesh->getWeights() + i); 872 vertex_weightsp[mMesh->mFaceVertexOffset + i] = *(mMesh->getWeights() + i);
1494 if (damp_wind) 873 if (damp_wind)
1495 { 874 {
1496 clothing_weightsp[mMesh->mFaceVertexOffset + i].setVec(0,0,0,0); 875 clothing_weightsp[mMesh->mFaceVertexOffset + i] = LLVector4(0,0,0,0);
1497 } 876 }
1498 else 877 else
1499 { 878 {
1500 clothing_weightsp[mMesh->mFaceVertexOffset + i].setVec(*(mMesh->getClothingWeights() + i)); 879 clothing_weightsp[mMesh->mFaceVertexOffset + i] = (*(mMesh->getClothingWeights() + i));
880 }
881 }
882
883 for (S32 i = 0; i < mMesh->getNumFaces(); i++)
884 {
885 for (U32 j = 0; j < 3; j++)
886 {
887 U32 k = i*3+j+mMesh->mFaceIndexOffset;
888 indicesp[k] = mMesh->getFaces()[i][j] + mMesh->mFaceVertexOffset;
1501 } 889 }
1502 } 890 }
1503 } 891 }
@@ -1514,6 +902,92 @@ BOOL LLViewerJointMesh::updateLOD(F32 pixel_area, BOOL activate)
1514 return (valid != activate); 902 return (valid != activate);
1515} 903}
1516 904
905void LLViewerJointMesh::updateGeometry()
906{
907 if (mValid && mMesh && mFace &&
908 mMesh->hasWeights() &&
909 mFace->mVertexBuffer.notNull() &&
910 gPipeline.getVertexShaderLevel(LLPipeline::SHADER_AVATAR) == 0)
911 {
912 uploadJointMatrices();
913 LLStrider<LLVector3> o_vertices;
914 LLStrider<LLVector3> o_normals;
915
916 //get vertex and normal striders
917 LLVertexBuffer *buffer = mFace->mVertexBuffer;
918 buffer->getVertexStrider(o_vertices, 0);
919 buffer->getNormalStrider(o_normals, 0);
920
921 {
922 LLVector4 tpos0, tnorm0, tpos1, tnorm1, tbinorm0, tbinorm1;
923 F32 last_weight = F32_MAX;
924 LLMatrix3 gBlendRotMat;
925
926
927 for (U32 index= 0; index < mMesh->getNumVertices(); index++)
928 {
929 // blend by first matrix
930 F32 w = mMesh->getWeights()[index];
931
932 if (w != last_weight)
933 {
934 last_weight = w;
935
936 S32 joint = llfloor(w);
937 w -= joint;
938
939 LLMatrix4 &m0 = gJointMat[joint+1];
940 LLMatrix4 &m1 = gJointMat[joint+0];
941 LLMatrix3 &n0 = gJointRot[joint+1];
942 LLMatrix3 &n1 = gJointRot[joint+0];
943
944 if (w == 1.0f)
945 {
946 gBlendMat = m0;
947 gBlendRotMat = n0;
948 }
949 else
950 {
951 gBlendMat.mMatrix[VX][VX] = lerp(m1.mMatrix[VX][VX], m0.mMatrix[VX][VX], w);
952 gBlendMat.mMatrix[VX][VY] = lerp(m1.mMatrix[VX][VY], m0.mMatrix[VX][VY], w);
953 gBlendMat.mMatrix[VX][VZ] = lerp(m1.mMatrix[VX][VZ], m0.mMatrix[VX][VZ], w);
954
955 gBlendMat.mMatrix[VY][VX] = lerp(m1.mMatrix[VY][VX], m0.mMatrix[VY][VX], w);
956 gBlendMat.mMatrix[VY][VY] = lerp(m1.mMatrix[VY][VY], m0.mMatrix[VY][VY], w);
957 gBlendMat.mMatrix[VY][VZ] = lerp(m1.mMatrix[VY][VZ], m0.mMatrix[VY][VZ], w);
958
959 gBlendMat.mMatrix[VZ][VX] = lerp(m1.mMatrix[VZ][VX], m0.mMatrix[VZ][VX], w);
960 gBlendMat.mMatrix[VZ][VY] = lerp(m1.mMatrix[VZ][VY], m0.mMatrix[VZ][VY], w);
961 gBlendMat.mMatrix[VZ][VZ] = lerp(m1.mMatrix[VZ][VZ], m0.mMatrix[VZ][VZ], w);
962
963 gBlendMat.mMatrix[VW][VX] = lerp(m1.mMatrix[VW][VX], m0.mMatrix[VW][VX], w);
964 gBlendMat.mMatrix[VW][VY] = lerp(m1.mMatrix[VW][VY], m0.mMatrix[VW][VY], w);
965 gBlendMat.mMatrix[VW][VZ] = lerp(m1.mMatrix[VW][VZ], m0.mMatrix[VW][VZ], w);
966
967 gBlendRotMat.mMatrix[VX][VX] = lerp(n1.mMatrix[VX][VX], n0.mMatrix[VX][VX], w);
968 gBlendRotMat.mMatrix[VX][VY] = lerp(n1.mMatrix[VX][VY], n0.mMatrix[VX][VY], w);
969 gBlendRotMat.mMatrix[VX][VZ] = lerp(n1.mMatrix[VX][VZ], n0.mMatrix[VX][VZ], w);
970
971 gBlendRotMat.mMatrix[VY][VX] = lerp(n1.mMatrix[VY][VX], n0.mMatrix[VY][VX], w);
972 gBlendRotMat.mMatrix[VY][VY] = lerp(n1.mMatrix[VY][VY], n0.mMatrix[VY][VY], w);
973 gBlendRotMat.mMatrix[VY][VZ] = lerp(n1.mMatrix[VY][VZ], n0.mMatrix[VY][VZ], w);
974
975 gBlendRotMat.mMatrix[VZ][VX] = lerp(n1.mMatrix[VZ][VX], n0.mMatrix[VZ][VX], w);
976 gBlendRotMat.mMatrix[VZ][VY] = lerp(n1.mMatrix[VZ][VY], n0.mMatrix[VZ][VY], w);
977 gBlendRotMat.mMatrix[VZ][VZ] = lerp(n1.mMatrix[VZ][VZ], n0.mMatrix[VZ][VZ], w);
978 }
979 }
980
981 // write result
982 U32 bidx = index + mMesh->mFaceVertexOffset;
983
984 o_vertices[bidx] = mMesh->getCoords()[index] * gBlendMat;
985 o_normals[bidx] = mMesh->getNormals()[index] * gBlendRotMat;
986 }
987 }
988 }
989}
990
1517void LLViewerJointMesh::dump() 991void LLViewerJointMesh::dump()
1518{ 992{
1519 if (mValid) 993 if (mValid)