diff options
Diffstat (limited to 'linden/indra/newview/llviewerjointmesh.cpp')
-rw-r--r-- | linden/indra/newview/llviewerjointmesh.cpp | 824 |
1 files changed, 149 insertions, 675 deletions
diff --git a/linden/indra/newview/llviewerjointmesh.cpp b/linden/indra/newview/llviewerjointmesh.cpp index 96cdb88..3070cb4 100644 --- a/linden/indra/newview/llviewerjointmesh.cpp +++ b/linden/indra/newview/llviewerjointmesh.cpp | |||
@@ -38,7 +38,6 @@ | |||
38 | #include "llfasttimer.h" | 38 | #include "llfasttimer.h" |
39 | 39 | ||
40 | #include "llagent.h" | 40 | #include "llagent.h" |
41 | #include "llagparray.h" | ||
42 | #include "llbox.h" | 41 | #include "llbox.h" |
43 | #include "lldrawable.h" | 42 | #include "lldrawable.h" |
44 | #include "lldrawpoolavatar.h" | 43 | #include "lldrawpoolavatar.h" |
@@ -62,6 +61,10 @@ extern PFNGLVERTEXBLENDARBPROC glVertexBlendARB; | |||
62 | #endif | 61 | #endif |
63 | extern BOOL gRenderForSelect; | 62 | extern BOOL gRenderForSelect; |
64 | 63 | ||
64 | static LLPointer<LLVertexBuffer> sRenderBuffer = NULL; | ||
65 | static const U32 sRenderMask = LLVertexBuffer::MAP_VERTEX | | ||
66 | LLVertexBuffer::MAP_NORMAL | | ||
67 | LLVertexBuffer::MAP_TEXCOORD; | ||
65 | LLMatrix4 gBlendMat; | 68 | LLMatrix4 gBlendMat; |
66 | 69 | ||
67 | //----------------------------------------------------------------------------- | 70 | //----------------------------------------------------------------------------- |
@@ -394,11 +397,11 @@ void LLViewerJointMesh::setupJoint(LLViewerJoint* current_joint) | |||
394 | } | 397 | } |
395 | 398 | ||
396 | // depth-first traversal | 399 | // depth-first traversal |
397 | for (LLJoint *child_joint = current_joint->mChildren.getFirstData(); | 400 | for (LLJoint::child_list_t::iterator iter = current_joint->mChildren.begin(); |
398 | child_joint; | 401 | iter != current_joint->mChildren.end(); ++iter) |
399 | child_joint = current_joint->mChildren.getNextData()) | ||
400 | { | 402 | { |
401 | setupJoint((LLViewerJoint*)child_joint); | 403 | LLViewerJoint* child_joint = (LLViewerJoint*)(*iter); |
404 | setupJoint(child_joint); | ||
402 | } | 405 | } |
403 | } | 406 | } |
404 | 407 | ||
@@ -431,7 +434,7 @@ void LLViewerJointMesh::uploadJointMatrices() | |||
431 | 434 | ||
432 | if (hardware_skinning) | 435 | if (hardware_skinning) |
433 | { | 436 | { |
434 | joint_mat *= gCamera->getModelview(); | 437 | joint_mat *= LLDrawPoolAvatar::getModelView(); |
435 | } | 438 | } |
436 | gJointMat[joint_num] = joint_mat; | 439 | gJointMat[joint_num] = joint_mat; |
437 | gJointRot[joint_num] = joint_mat.getMat3(); | 440 | gJointRot[joint_num] = joint_mat.getMat3(); |
@@ -532,620 +535,39 @@ int compare_int(const void *a, const void *b) | |||
532 | else return 0; | 535 | else return 0; |
533 | } | 536 | } |
534 | 537 | ||
535 | #if LL_WINDOWS || (LL_DARWIN && __i386__) // SSE optimizations in avatar code | 538 | void llDrawRangeElements(GLenum mode, GLuint start, GLuint end, GLsizei count, GLenum type, const GLvoid *indices) |
536 | |||
537 | #if LL_DARWIN | ||
538 | #include <xmmintrin.h> | ||
539 | |||
540 | // On Windows, this class is defined in fvec.h. I've only reproduced the parts of it we use here for now. | ||
541 | #pragma pack(push,16) /* Must ensure class & union 16-B aligned */ | ||
542 | class F32vec4 | ||
543 | { | ||
544 | protected: | ||
545 | __m128 vec; | ||
546 | public: | ||
547 | |||
548 | /* Constructors: __m128, 4 floats, 1 float */ | ||
549 | F32vec4() {} | ||
550 | |||
551 | /* initialize 4 SP FP with __m128 data type */ | ||
552 | F32vec4(__m128 m) { vec = m;} | ||
553 | |||
554 | /* Explicitly initialize each of 4 SP FPs with same float */ | ||
555 | explicit F32vec4(float f) { vec = _mm_set_ps1(f); } | ||
556 | }; | ||
557 | #pragma pack(pop) /* 16-B aligned */ | ||
558 | |||
559 | |||
560 | #endif | ||
561 | |||
562 | void blend_SSE_32_32_batch(const int vert_offset, const int vert_count, float* output, | ||
563 | LLStrider<LLVector3>& vertices, LLStrider<LLVector2>& texcoords, LLStrider<LLVector3>& normals, LLStrider<F32>& weights) | ||
564 | { | 539 | { |
565 | F32 last_weight = F32_MAX; | 540 | if (end-start+1 > (U32) gGLManager.mGLMaxVertexRange || |
566 | LLMatrix4 *blend_mat = &gBlendMat; | 541 | count > gGLManager.mGLMaxIndexRange) |
567 | |||
568 | for (S32 index = vert_offset; index < vert_offset + vert_count; index++) | ||
569 | { | 542 | { |
570 | F32 w = weights [index]; // register copy of weight | 543 | glDrawElements(mode,count,type,indices); |
571 | F32 *vin = &vertices[index].mV[0]; // pointer to input vertex data, assumed to be V3+T2+N3+whatever | ||
572 | F32 *vout = output + index * (AVATAR_VERTEX_BYTES/sizeof(F32)); // pointer to the output vertex data, assumed to be 16 byte aligned | ||
573 | |||
574 | if (w == last_weight) | ||
575 | { | ||
576 | // load input and output vertices, and last blended matrix | ||
577 | __asm { | ||
578 | mov esi, vin | ||
579 | mov edi, vout | ||
580 | |||
581 | mov edx, blend_mat | ||
582 | movaps xmm4, [edx] | ||
583 | movaps xmm5, [edx+0x10] | ||
584 | movaps xmm6, [edx+0x20] | ||
585 | movaps xmm7, [edx+0x30] | ||
586 | } | ||
587 | } | ||
588 | else | ||
589 | { | ||
590 | last_weight = w; | ||
591 | S32 joint = llfloor(w); | ||
592 | w -= joint; | ||
593 | |||
594 | LLMatrix4 *m0 = &(gJointMat[joint+1]); | ||
595 | LLMatrix4 *m1 = &(gJointMat[joint+0]); | ||
596 | |||
597 | // some initial code to load Matrix 0 into SSE registers | ||
598 | __asm { | ||
599 | mov esi, vin | ||
600 | mov edi, vout | ||
601 | |||
602 | //matrix2 | ||
603 | mov edx, m0 | ||
604 | movaps xmm4, [edx] | ||
605 | movaps xmm5, [edx+0x10] | ||
606 | movaps xmm6, [edx+0x20] | ||
607 | movaps xmm7, [edx+0x30] | ||
608 | }; | ||
609 | |||
610 | // if w == 1.0f, we don't need to blend. | ||
611 | // but since we do the trick of blending the matrices, here, if w != 1.0, | ||
612 | // we load Matrix 1 into the other 4 SSE registers and blend both matrices | ||
613 | // based on the weight (which we load ingo a 16-byte aligned vector: w,w,w,w) | ||
614 | |||
615 | if (w != 1.0f) | ||
616 | { | ||
617 | F32vec4 weight(w); | ||
618 | |||
619 | __asm { // do blending of matrices instead of verts and normals -- faster | ||
620 | mov edx, m1 | ||
621 | movaps xmm0, [edx] | ||
622 | movaps xmm1, [edx+0x10] | ||
623 | movaps xmm2, [edx+0x20] | ||
624 | movaps xmm3, [edx+0x30] | ||
625 | |||
626 | subps xmm4, xmm0 // do blend for each matrix column | ||
627 | subps xmm5, xmm1 // diff, then multiply weight and re-add | ||
628 | subps xmm6, xmm2 | ||
629 | subps xmm7, xmm3 | ||
630 | |||
631 | mulps xmm4, weight | ||
632 | mulps xmm5, weight | ||
633 | mulps xmm6, weight | ||
634 | mulps xmm7, weight | ||
635 | |||
636 | addps xmm4, xmm0 | ||
637 | addps xmm5, xmm1 | ||
638 | addps xmm6, xmm2 | ||
639 | addps xmm7, xmm3 | ||
640 | }; | ||
641 | } | ||
642 | |||
643 | __asm { | ||
644 | // save off blended matrix | ||
645 | mov edx, blend_mat; | ||
646 | movaps [edx], xmm4; | ||
647 | movaps [edx+0x10], xmm5; | ||
648 | movaps [edx+0x20], xmm6; | ||
649 | movaps [edx+0x30], xmm7; | ||
650 | } | ||
651 | } | ||
652 | |||
653 | // now, we have either a blended matrix in xmm4-7 or the original Matrix 0 | ||
654 | // we then multiply each vertex and normal by this one matrix. | ||
655 | |||
656 | // For SSE2, we would try to keep the original two matrices in other registers | ||
657 | // and avoid reloading them. However, they should ramain in L1 cache in the | ||
658 | // current case. | ||
659 | |||
660 | // One possible optimization would be to sort the vertices by weight instead | ||
661 | // of just index (we still want to uniqify). If we note when two or more vertices | ||
662 | // share the same weight, we can avoid doing the middle SSE code above and just | ||
663 | // re-use the blended matrix for those vertices | ||
664 | |||
665 | |||
666 | // now, we do the actual vertex blending | ||
667 | __asm { | ||
668 | // load Vertex into xmm0. | ||
669 | movaps xmm0, [esi] // change aps to ups when input is no longer 16-baligned | ||
670 | movaps xmm1, xmm0 // copy vector into xmm0 through xmm2 (x,y,z) | ||
671 | movaps xmm2, xmm0 | ||
672 | shufps xmm0, xmm0, _MM_SHUFFLE(0,0,0,0); // clone vertex (x) across vector | ||
673 | shufps xmm1, xmm1, _MM_SHUFFLE(1,1,1,1); // clone vertex (y) across vector | ||
674 | shufps xmm2, xmm2, _MM_SHUFFLE(2,2,2,2); // same for Z | ||
675 | mulps xmm0, xmm4 // do the actual matrix multipication for r0 | ||
676 | mulps xmm1, xmm5 // for r1 | ||
677 | mulps xmm2, xmm6 // for r2 | ||
678 | addps xmm0, xmm1 // accumulate | ||
679 | addps xmm0, xmm2 // accumulate | ||
680 | addps xmm0, xmm7 // add in the row 4 which holds the x,y,z translation. assumes w=1 (vertex-w, not weight) | ||
681 | |||
682 | movaps [edi], xmm0 // store aligned in output array | ||
683 | |||
684 | // load Normal into xmm0. | ||
685 | movaps xmm0, [esi + 0x10] // change aps to ups when input no longer 16-byte aligned | ||
686 | movaps xmm1, xmm0 // | ||
687 | movaps xmm2, xmm0 | ||
688 | shufps xmm0, xmm0, _MM_SHUFFLE(0,0,0,0); // since UV sits between vertex and normal, normal starts at element 1, not 0 | ||
689 | shufps xmm1, xmm1, _MM_SHUFFLE(1,1,1,1); | ||
690 | shufps xmm2, xmm2, _MM_SHUFFLE(2,2,2,2); | ||
691 | mulps xmm0, xmm4 // multiply by matrix | ||
692 | mulps xmm1, xmm5 // multiply | ||
693 | mulps xmm2, xmm6 // multiply | ||
694 | addps xmm0, xmm1 // accumulate | ||
695 | addps xmm0, xmm2 // accumulate. note: do not add translation component to normals, save time too | ||
696 | movaps [edi + 0x10], xmm0 // store aligned | ||
697 | } | ||
698 | |||
699 | *(LLVector2*)(vout + (AVATAR_OFFSET_TEX0/sizeof(F32))) = texcoords[index]; // write texcoord into appropriate spot. | ||
700 | } | ||
701 | } | ||
702 | |||
703 | #elif LL_LINUX | ||
704 | |||
705 | void blend_SSE_32_32_batch(const int vert_offset, const int vert_count, float* output, | ||
706 | LLStrider<LLVector3>& vertices, LLStrider<LLVector2>& texcoords, LLStrider<LLVector3>& normals, LLStrider<F32>& weights) | ||
707 | { | ||
708 | assert(0); | ||
709 | } | ||
710 | |||
711 | #elif LL_DARWIN | ||
712 | // AltiVec versions of the same... | ||
713 | |||
714 | static inline vector float loadAlign(int offset, vector float *addr) | ||
715 | { | ||
716 | vector float in0 = vec_ld(offset, addr); | ||
717 | vector float in1 = vec_ld(offset + 16, addr); | ||
718 | vector unsigned char perm = vec_lvsl(0, (unsigned char*)addr); | ||
719 | |||
720 | return(vec_perm(in0, in1, perm)); | ||
721 | } | ||
722 | |||
723 | static inline void storeAlign(vector float v, int offset, vector float *addr) | ||
724 | { | ||
725 | vector float in0 = vec_ld(offset, addr); | ||
726 | vector float in1 = vec_ld(offset + 16, addr); | ||
727 | vector unsigned char perm = vec_lvsr(0, (unsigned char *)addr); | ||
728 | vector float temp = vec_perm(v, v, perm); | ||
729 | vector unsigned char mask = (vector unsigned char)vec_cmpgt(perm, vec_splat_u8(15)); | ||
730 | |||
731 | in0 = vec_sel(in0, temp, (vector unsigned int)mask); | ||
732 | in1 = vec_sel(temp, in1, (vector unsigned int)mask); | ||
733 | |||
734 | vec_st(in0, offset, addr); | ||
735 | vec_st(in1, offset + 16, addr); | ||
736 | } | ||
737 | |||
738 | void blend_SSE_32_32_batch(const int vert_offset, const int vert_count, float* output, | ||
739 | LLStrider<LLVector3>& vertices, LLStrider<LLVector2>& texcoords, LLStrider<LLVector3>& normals, LLStrider<F32>& weights) | ||
740 | { | ||
741 | F32 last_weight = F32_MAX; | ||
742 | // LLMatrix4 &blend_mat = gBlendMat; | ||
743 | |||
744 | vector float matrix0_0, matrix0_1, matrix0_2, matrix0_3; | ||
745 | vector unsigned char out0perm = (vector unsigned char) ( 0x10,0x11,0x12,0x13, 0x14,0x15,0x16,0x17, 0x18,0x19,0x1A,0x1B, 0x0C,0x0D,0x0E,0x0F ); | ||
746 | // vector unsigned char out1perm = (vector unsigned char) ( 0x00,0x01,0x02,0x03, 0x10,0x11,0x12,0x13, 0x14,0x15,0x16,0x17, 0x18,0x19,0x1A,0x1B ); | ||
747 | vector unsigned char out1perm = (vector unsigned char) ( 0x10,0x11,0x12,0x13, 0x14,0x15,0x16,0x17, 0x18,0x19,0x1A,0x1B, 0x0C,0x0D,0x0E,0x0F ); | ||
748 | |||
749 | vector float zero = (vector float)vec_splat_u32(0); | ||
750 | |||
751 | for (U32 index = vert_offset; index < vert_offset + vert_count; index++) | ||
752 | { | ||
753 | F32 w = weights [index]; // register copy of weight | ||
754 | F32 *vin = &vertices[index].mV[0]; // pointer to input vertex data, assumed to be V3+T2+N3+whatever | ||
755 | F32 *vout = output + index * (AVATAR_VERTEX_BYTES/sizeof(F32)); // pointer to the output vertex data, assumed to be 16 byte aligned | ||
756 | |||
757 | // MBW -- XXX -- If this isn't the case, this code gets more complicated. | ||
758 | if(0x0000000F & (U32)vin) | ||
759 | { | ||
760 | llerrs << "blend_SSE_batch: input not 16-byte aligned!" << llendl; | ||
761 | } | ||
762 | if(0x0000000F & (U32)vout) | ||
763 | { | ||
764 | llerrs << "blend_SSE_batch: output not 16-byte aligned!" << llendl; | ||
765 | } | ||
766 | // if(0x0000000F & (U32)&(blend_mat.mMatrix)) | ||
767 | // { | ||
768 | // llerrs << "blend_SSE_batch: blend_mat not 16-byte aligned!" << llendl; | ||
769 | // } | ||
770 | |||
771 | if (w == last_weight) | ||
772 | { | ||
773 | // load last blended matrix | ||
774 | // Still loaded from last time through the loop. | ||
775 | // matrix0_0 = vec_ld(0x00, (vector float*)&(blend_mat.mMatrix)); | ||
776 | // matrix0_1 = vec_ld(0x10, (vector float*)&(blend_mat.mMatrix)); | ||
777 | // matrix0_2 = vec_ld(0x20, (vector float*)&(blend_mat.mMatrix)); | ||
778 | // matrix0_3 = vec_ld(0x30, (vector float*)&(blend_mat.mMatrix)); | ||
779 | } | ||
780 | else | ||
781 | { | ||
782 | last_weight = w; | ||
783 | S32 joint = llfloor(w); | ||
784 | w -= joint; | ||
785 | |||
786 | LLMatrix4 &m0 = gJointMat[joint+1]; | ||
787 | LLMatrix4 &m1 = gJointMat[joint+0]; | ||
788 | |||
789 | // load Matrix 0 into vector registers | ||
790 | matrix0_0 = vec_ld(0x00, (vector float*)&(m0.mMatrix)); | ||
791 | matrix0_1 = vec_ld(0x10, (vector float*)&(m0.mMatrix)); | ||
792 | matrix0_2 = vec_ld(0x20, (vector float*)&(m0.mMatrix)); | ||
793 | matrix0_3 = vec_ld(0x30, (vector float*)&(m0.mMatrix)); | ||
794 | |||
795 | // if w == 1.0f, we don't need to blend. | ||
796 | // but since we do the trick of blending the matrices, here, if w != 1.0, | ||
797 | // we load Matrix 1 into the other 4 SSE registers and blend both matrices | ||
798 | // based on the weight (which we load ingo a 16-byte aligned vector: w,w,w,w) | ||
799 | |||
800 | if (w != 1.0f) | ||
801 | { | ||
802 | vector float matrix1_0, matrix1_1, matrix1_2, matrix1_3; | ||
803 | |||
804 | // This loads the weight somewhere in the vector register | ||
805 | vector float weight = vec_lde(0, &(w)); | ||
806 | // and this splats it to all elements. | ||
807 | weight = vec_splat(vec_perm(weight, weight, vec_lvsl(0, &(w))), 0); | ||
808 | |||
809 | // do blending of matrices instead of verts and normals -- faster | ||
810 | matrix1_0 = vec_ld(0x00, (vector float*)&(m1.mMatrix)); | ||
811 | matrix1_1 = vec_ld(0x10, (vector float*)&(m1.mMatrix)); | ||
812 | matrix1_2 = vec_ld(0x20, (vector float*)&(m1.mMatrix)); | ||
813 | matrix1_3 = vec_ld(0x30, (vector float*)&(m1.mMatrix)); | ||
814 | |||
815 | // m0[col] = ((m0[col] - m1[col]) * weight) + m1[col]; | ||
816 | matrix0_0 = vec_madd(vec_sub(matrix0_0, matrix1_0), weight, matrix1_0); | ||
817 | matrix0_1 = vec_madd(vec_sub(matrix0_1, matrix1_1), weight, matrix1_1); | ||
818 | matrix0_2 = vec_madd(vec_sub(matrix0_2, matrix1_2), weight, matrix1_2); | ||
819 | matrix0_3 = vec_madd(vec_sub(matrix0_3, matrix1_3), weight, matrix1_3); | ||
820 | } | ||
821 | |||
822 | // save off blended matrix | ||
823 | // vec_st(matrix0_0, 0x00, (vector float*)&(blend_mat.mMatrix)); | ||
824 | // vec_st(matrix0_1, 0x10, (vector float*)&(blend_mat.mMatrix)); | ||
825 | // vec_st(matrix0_2, 0x20, (vector float*)&(blend_mat.mMatrix)); | ||
826 | // vec_st(matrix0_3, 0x30, (vector float*)&(blend_mat.mMatrix)); | ||
827 | } | ||
828 | |||
829 | // now, we have either a blended matrix in matrix0_0-3 or the original Matrix 0 | ||
830 | // we then multiply each vertex and normal by this one matrix. | ||
831 | |||
832 | // For SSE2, we would try to keep the original two matrices in other registers | ||
833 | // and avoid reloading them. However, they should ramain in L1 cache in the | ||
834 | // current case. | ||
835 | |||
836 | // One possible optimization would be to sort the vertices by weight instead | ||
837 | // of just index (we still want to uniqify). If we note when two or more vertices | ||
838 | // share the same weight, we can avoid doing the middle SSE code above and just | ||
839 | // re-use the blended matrix for those vertices | ||
840 | |||
841 | |||
842 | // now, we do the actual vertex blending | ||
843 | |||
844 | vector float in0 = vec_ld(AVATAR_OFFSET_POS, (vector float*)vin); | ||
845 | vector float in1 = vec_ld(AVATAR_OFFSET_NORMAL, (vector float*)vin); | ||
846 | |||
847 | // Matrix multiply vertex | ||
848 | vector float out0 = vec_madd | ||
849 | ( | ||
850 | vec_splat(in0, 0), | ||
851 | matrix0_0, | ||
852 | vec_madd | ||
853 | ( | ||
854 | vec_splat(in0, 1), | ||
855 | matrix0_1, | ||
856 | vec_madd | ||
857 | ( | ||
858 | vec_splat(in0, 2), | ||
859 | matrix0_2, | ||
860 | matrix0_3 | ||
861 | ) | ||
862 | ) | ||
863 | ); | ||
864 | |||
865 | // Matrix multiply normal | ||
866 | vector float out1 = vec_madd | ||
867 | ( | ||
868 | vec_splat(in1, 0), | ||
869 | matrix0_0, | ||
870 | vec_madd | ||
871 | ( | ||
872 | vec_splat(in1, 1), | ||
873 | matrix0_1, | ||
874 | vec_madd | ||
875 | ( | ||
876 | vec_splat(in1, 2), | ||
877 | matrix0_2, | ||
878 | // no translation for normals | ||
879 | (vector float)vec_splat_u32(0) | ||
880 | ) | ||
881 | ) | ||
882 | ); | ||
883 | |||
884 | // indexed store | ||
885 | vec_stl(vec_perm(in0, out0, out0perm), AVATAR_OFFSET_POS, (vector float*)vout); // Pos | ||
886 | vec_stl(vec_perm(in1, out1, out1perm), AVATAR_OFFSET_NORMAL, (vector float*)vout); // Norm | ||
887 | *(LLVector2*)(vout + (AVATAR_OFFSET_TEX0/sizeof(F32))) = texcoords[index]; // write texcoord into appropriate spot. | ||
888 | } | ||
889 | } | ||
890 | |||
891 | #endif | ||
892 | |||
893 | |||
894 | void llDrawElementsBatchBlend(const U32 vert_offset, const U32 vert_count, LLFace *face, const S32 index_count, const U32 *indices) | ||
895 | { | ||
896 | U8* gAGPVertices = gPipeline.bufferGetScratchMemory(); | ||
897 | |||
898 | if (gAGPVertices) | ||
899 | { | ||
900 | LLStrider<LLVector3> vertices; | ||
901 | LLStrider<LLVector3> normals; | ||
902 | LLStrider<LLVector2> tcoords0; | ||
903 | LLStrider<F32> weights; | ||
904 | |||
905 | LLStrider<LLVector3> o_vertices; | ||
906 | LLStrider<LLVector3> o_normals; | ||
907 | LLStrider<LLVector2> o_texcoords0; | ||
908 | |||
909 | |||
910 | LLStrider<LLVector3> binormals; | ||
911 | LLStrider<LLVector2> o_texcoords1; | ||
912 | // get the source vertices from the draw pool. We index these ourselves, as there was | ||
913 | // no guarantee the indices for a single jointmesh were contigious | ||
914 | |||
915 | LLDrawPool *pool = face->getPool(); | ||
916 | pool->getVertexStrider (vertices, 0); | ||
917 | pool->getTexCoordStrider (tcoords0, 0, 0); | ||
918 | pool->getNormalStrider (normals, 0); | ||
919 | pool->getBinormalStrider (binormals, 0); | ||
920 | pool->getVertexWeightStrider(weights, 0); | ||
921 | |||
922 | // load the addresses of the output striders | ||
923 | o_vertices = (LLVector3*)(gAGPVertices + AVATAR_OFFSET_POS); o_vertices.setStride( AVATAR_VERTEX_BYTES); | ||
924 | o_normals = (LLVector3*)(gAGPVertices + AVATAR_OFFSET_NORMAL); o_normals.setStride( AVATAR_VERTEX_BYTES); | ||
925 | o_texcoords0= (LLVector2*)(gAGPVertices + AVATAR_OFFSET_TEX0); o_texcoords0.setStride(AVATAR_VERTEX_BYTES); | ||
926 | o_texcoords1= (LLVector2*)(gAGPVertices + AVATAR_OFFSET_TEX1); o_texcoords1.setStride(AVATAR_VERTEX_BYTES); | ||
927 | |||
928 | #if !LL_LINUX // !!! *TODO: do the linux implementation | ||
929 | if (gGLManager.mSoftwareBlendSSE) | ||
930 | { | ||
931 | // do SSE blend without binormals or extra texcoords | ||
932 | blend_SSE_32_32_batch(vert_offset, vert_count, (float*)gAGPVertices, | ||
933 | vertices, tcoords0, normals, weights); | ||
934 | } | ||
935 | else // fully backwards compatible software blending, no SSE | ||
936 | #endif | ||
937 | { | ||
938 | LLVector4 tpos0, tnorm0, tpos1, tnorm1, tbinorm0, tbinorm1; | ||
939 | F32 last_weight = F32_MAX; | ||
940 | LLMatrix3 gBlendRotMat; | ||
941 | |||
942 | { | ||
943 | for (U32 index=vert_offset; index < vert_offset + vert_count; index++) | ||
944 | { | ||
945 | // blend by first matrix | ||
946 | F32 w = weights [index]; | ||
947 | |||
948 | if (w != last_weight) | ||
949 | { | ||
950 | last_weight = w; | ||
951 | |||
952 | S32 joint = llfloor(w); | ||
953 | w -= joint; | ||
954 | |||
955 | LLMatrix4 &m0 = gJointMat[joint+1]; | ||
956 | LLMatrix4 &m1 = gJointMat[joint+0]; | ||
957 | LLMatrix3 &n0 = gJointRot[joint+1]; | ||
958 | LLMatrix3 &n1 = gJointRot[joint+0]; | ||
959 | |||
960 | if (w == 1.0f) | ||
961 | { | ||
962 | gBlendMat = m0; | ||
963 | gBlendRotMat = n0; | ||
964 | } | ||
965 | else | ||
966 | { | ||
967 | gBlendMat.mMatrix[VX][VX] = lerp(m1.mMatrix[VX][VX], m0.mMatrix[VX][VX], w); | ||
968 | gBlendMat.mMatrix[VX][VY] = lerp(m1.mMatrix[VX][VY], m0.mMatrix[VX][VY], w); | ||
969 | gBlendMat.mMatrix[VX][VZ] = lerp(m1.mMatrix[VX][VZ], m0.mMatrix[VX][VZ], w); | ||
970 | |||
971 | gBlendMat.mMatrix[VY][VX] = lerp(m1.mMatrix[VY][VX], m0.mMatrix[VY][VX], w); | ||
972 | gBlendMat.mMatrix[VY][VY] = lerp(m1.mMatrix[VY][VY], m0.mMatrix[VY][VY], w); | ||
973 | gBlendMat.mMatrix[VY][VZ] = lerp(m1.mMatrix[VY][VZ], m0.mMatrix[VY][VZ], w); | ||
974 | |||
975 | gBlendMat.mMatrix[VZ][VX] = lerp(m1.mMatrix[VZ][VX], m0.mMatrix[VZ][VX], w); | ||
976 | gBlendMat.mMatrix[VZ][VY] = lerp(m1.mMatrix[VZ][VY], m0.mMatrix[VZ][VY], w); | ||
977 | gBlendMat.mMatrix[VZ][VZ] = lerp(m1.mMatrix[VZ][VZ], m0.mMatrix[VZ][VZ], w); | ||
978 | |||
979 | gBlendMat.mMatrix[VW][VX] = lerp(m1.mMatrix[VW][VX], m0.mMatrix[VW][VX], w); | ||
980 | gBlendMat.mMatrix[VW][VY] = lerp(m1.mMatrix[VW][VY], m0.mMatrix[VW][VY], w); | ||
981 | gBlendMat.mMatrix[VW][VZ] = lerp(m1.mMatrix[VW][VZ], m0.mMatrix[VW][VZ], w); | ||
982 | |||
983 | gBlendRotMat.mMatrix[VX][VX] = lerp(n1.mMatrix[VX][VX], n0.mMatrix[VX][VX], w); | ||
984 | gBlendRotMat.mMatrix[VX][VY] = lerp(n1.mMatrix[VX][VY], n0.mMatrix[VX][VY], w); | ||
985 | gBlendRotMat.mMatrix[VX][VZ] = lerp(n1.mMatrix[VX][VZ], n0.mMatrix[VX][VZ], w); | ||
986 | |||
987 | gBlendRotMat.mMatrix[VY][VX] = lerp(n1.mMatrix[VY][VX], n0.mMatrix[VY][VX], w); | ||
988 | gBlendRotMat.mMatrix[VY][VY] = lerp(n1.mMatrix[VY][VY], n0.mMatrix[VY][VY], w); | ||
989 | gBlendRotMat.mMatrix[VY][VZ] = lerp(n1.mMatrix[VY][VZ], n0.mMatrix[VY][VZ], w); | ||
990 | |||
991 | gBlendRotMat.mMatrix[VZ][VX] = lerp(n1.mMatrix[VZ][VX], n0.mMatrix[VZ][VX], w); | ||
992 | gBlendRotMat.mMatrix[VZ][VY] = lerp(n1.mMatrix[VZ][VY], n0.mMatrix[VZ][VY], w); | ||
993 | gBlendRotMat.mMatrix[VZ][VZ] = lerp(n1.mMatrix[VZ][VZ], n0.mMatrix[VZ][VZ], w); | ||
994 | } | ||
995 | } | ||
996 | |||
997 | // write result | ||
998 | o_vertices [index] = vertices[index] * gBlendMat; | ||
999 | o_normals [index] = normals [index] * gBlendRotMat; | ||
1000 | o_texcoords0[index] = tcoords0[index]; | ||
1001 | |||
1002 | /* | ||
1003 | // Verification code. Leave this here. It's useful for keeping the SSE and non-SSE versions in sync. | ||
1004 | LLVector3 temp; | ||
1005 | temp = tpos0; | ||
1006 | if( (o_vertices[index] - temp).magVecSquared() > 0.001f ) | ||
1007 | { | ||
1008 | llerrs << "V SSE: " << o_vertices[index] << " v. " << temp << llendl; | ||
1009 | } | ||
1010 | |||
1011 | temp = tnorm0; | ||
1012 | if( (o_normals[index] - temp).magVecSquared() > 0.001f ) | ||
1013 | { | ||
1014 | llerrs << "N SSE: " << o_normals[index] << " v. " << temp << llendl; | ||
1015 | } | ||
1016 | |||
1017 | if( (o_texcoords0[index] - tcoords0[index]).magVecSquared() > 0.001f ) | ||
1018 | { | ||
1019 | llerrs << "T0 SSE: " << o_texcoords0[index] << " v. " << tcoords0[index] << llendl; | ||
1020 | } | ||
1021 | */ | ||
1022 | } | ||
1023 | } | ||
1024 | } | ||
1025 | |||
1026 | #if LL_DARWIN | ||
1027 | // *HACK* *CHOKE* *PUKE* | ||
1028 | // No way does this belong here. | ||
1029 | glFlushVertexArrayRangeAPPLE(AVATAR_VERTEX_BYTES * vert_count, gAGPVertices + (AVATAR_VERTEX_BYTES * vert_offset)); | ||
1030 | #endif | ||
1031 | glDrawElements(GL_TRIANGLES, index_count, GL_UNSIGNED_INT, indices); // draw it! | ||
1032 | } | 544 | } |
1033 | else | 545 | else |
1034 | { | 546 | { |
1035 | glDrawElements(GL_TRIANGLES, index_count, GL_UNSIGNED_INT, indices); | 547 | glDrawRangeElements(mode,start,end,count,type,indices); |
1036 | } | 548 | } |
1037 | } | 549 | } |
1038 | 550 | ||
1039 | |||
1040 | |||
1041 | //-------------------------------------------------------------------- | ||
1042 | // DrawElements | ||
1043 | |||
1044 | // works just like glDrawElements, except it assumes GL_TRIANGLES and GL_UNSIGNED_INT indices | ||
1045 | |||
1046 | // why? because the destination buffer may not be the AGP buffer and the eyes do not use blending | ||
1047 | // separate the eyes into their own drawpools and this code goes away. | ||
1048 | |||
1049 | //-------------------------------------------------------------------- | ||
1050 | |||
1051 | void llDrawElements(const S32 count, const U32 *indices, LLFace *face) | ||
1052 | { | ||
1053 | U8* gAGPVertices = gPipeline.bufferGetScratchMemory(); | ||
1054 | |||
1055 | if (gAGPVertices) | ||
1056 | { | ||
1057 | #if LL_DARWIN | ||
1058 | U32 minIndex = indices[0]; | ||
1059 | U32 maxIndex = indices[0]; | ||
1060 | #endif | ||
1061 | { | ||
1062 | LLStrider<LLVector3> vertices; | ||
1063 | LLStrider<LLVector3> normals; | ||
1064 | LLStrider<LLVector2> tcoords; | ||
1065 | LLStrider<F32> weights; | ||
1066 | |||
1067 | LLStrider<LLVector3> o_vertices; | ||
1068 | LLStrider<LLVector3> o_normals; | ||
1069 | LLStrider<LLVector2> o_texcoords0; | ||
1070 | |||
1071 | LLDrawPool *pool = face->getPool(); | ||
1072 | pool->getVertexStrider (vertices,0); | ||
1073 | pool->getNormalStrider (normals, 0); | ||
1074 | pool->getTexCoordStrider (tcoords, 0); | ||
1075 | |||
1076 | o_vertices = (LLVector3*)(gAGPVertices + AVATAR_OFFSET_POS); o_vertices.setStride( AVATAR_VERTEX_BYTES); | ||
1077 | o_normals = (LLVector3*)(gAGPVertices + AVATAR_OFFSET_NORMAL); o_normals.setStride( AVATAR_VERTEX_BYTES); | ||
1078 | o_texcoords0= (LLVector2*)(gAGPVertices + AVATAR_OFFSET_TEX0); o_texcoords0.setStride(AVATAR_VERTEX_BYTES); | ||
1079 | |||
1080 | for (S32 i=0; i < count; i++) | ||
1081 | { | ||
1082 | U32 index = indices[i]; | ||
1083 | |||
1084 | o_vertices [index] = vertices[index]; | ||
1085 | o_normals [index] = normals [index]; | ||
1086 | o_texcoords0[index] = tcoords [index]; | ||
1087 | |||
1088 | #if LL_DARWIN | ||
1089 | maxIndex = llmax(index, maxIndex); | ||
1090 | minIndex = llmin(index, minIndex); | ||
1091 | #endif | ||
1092 | } | ||
1093 | } | ||
1094 | |||
1095 | #if LL_DARWIN | ||
1096 | // *HACK* *CHOKE* *PUKE* | ||
1097 | // No way does this belong here. | ||
1098 | glFlushVertexArrayRangeAPPLE(AVATAR_VERTEX_BYTES * (maxIndex + 1 - minIndex), gAGPVertices + (AVATAR_VERTEX_BYTES * minIndex)); | ||
1099 | #endif | ||
1100 | |||
1101 | glDrawElements(GL_TRIANGLES, count, GL_UNSIGNED_INT, indices); | ||
1102 | } | ||
1103 | else | ||
1104 | { | ||
1105 | glDrawElements(GL_TRIANGLES, count, GL_UNSIGNED_INT, indices); | ||
1106 | } | ||
1107 | } | ||
1108 | |||
1109 | |||
1110 | //-------------------------------------------------------------------- | 551 | //-------------------------------------------------------------------- |
1111 | // LLViewerJointMesh::drawShape() | 552 | // LLViewerJointMesh::drawShape() |
1112 | //-------------------------------------------------------------------- | 553 | //-------------------------------------------------------------------- |
1113 | U32 LLViewerJointMesh::drawShape( F32 pixelArea ) | 554 | U32 LLViewerJointMesh::drawShape( F32 pixelArea, BOOL first_pass) |
1114 | { | 555 | { |
1115 | if (!mValid || !mVisible) return 0; | 556 | if (!mValid || !mMesh || !mFace || !mVisible || |
1116 | 557 | mFace->mVertexBuffer.isNull() || | |
1117 | U32 triangle_count = 0; | 558 | mMesh->getNumFaces() == 0) |
1118 | |||
1119 | //---------------------------------------------------------------- | ||
1120 | // if no mesh bail out now | ||
1121 | //---------------------------------------------------------------- | ||
1122 | if ( !mMesh || !mFace) | ||
1123 | { | 559 | { |
1124 | return 0; | 560 | return 0; |
1125 | } | 561 | } |
1126 | 562 | ||
1127 | //---------------------------------------------------------------- | 563 | U32 triangle_count = 0; |
1128 | // if we have no faces, bail out now | ||
1129 | //---------------------------------------------------------------- | ||
1130 | if ( mMesh->getNumFaces() == 0 ) | ||
1131 | { | ||
1132 | return 0; | ||
1133 | } | ||
1134 | 564 | ||
1135 | stop_glerror(); | 565 | stop_glerror(); |
1136 | 566 | ||
1137 | //---------------------------------------------------------------- | 567 | //---------------------------------------------------------------- |
1138 | // setup current color | 568 | // setup current color |
1139 | //---------------------------------------------------------------- | 569 | //---------------------------------------------------------------- |
1140 | if (gRenderForSelect) | 570 | if (!gRenderForSelect) |
1141 | { | ||
1142 | S32 name = mFace->getDrawable() ? mFace->getDrawable()->getVObj()->mGLName : 0; | ||
1143 | LLColor4U color((U8)(name >> 16), (U8)(name >> 8), (U8)name, 0xff); | ||
1144 | LLColor4 color_float(color); | ||
1145 | |||
1146 | glColor4f(color_float.mV[0], color_float.mV[1], color_float.mV[2], 1.f); | ||
1147 | } | ||
1148 | else | ||
1149 | { | 571 | { |
1150 | if ((mFace->getPool()->getVertexShaderLevel() > 0)) | 572 | if ((mFace->getPool()->getVertexShaderLevel() > 0)) |
1151 | { | 573 | { |
@@ -1169,7 +591,6 @@ U32 LLViewerJointMesh::drawShape( F32 pixelArea ) | |||
1169 | 591 | ||
1170 | stop_glerror(); | 592 | stop_glerror(); |
1171 | 593 | ||
1172 | // LLGLSSpecular specular(mSpecular, gRenderForSelect ? 0.0f : mShiny); | ||
1173 | LLGLSSpecular specular(LLColor4(1.f,1.f,1.f,1.f), gRenderForSelect ? 0.0f : mShiny && !(mFace->getPool()->getVertexShaderLevel() > 0)); | 594 | LLGLSSpecular specular(LLColor4(1.f,1.f,1.f,1.f), gRenderForSelect ? 0.0f : mShiny && !(mFace->getPool()->getVertexShaderLevel() > 0)); |
1174 | 595 | ||
1175 | LLGLEnable texture_2d((gRenderForSelect && isTransparent()) ? GL_TEXTURE_2D : 0); | 596 | LLGLEnable texture_2d((gRenderForSelect && isTransparent()) ? GL_TEXTURE_2D : 0); |
@@ -1179,11 +600,6 @@ U32 LLViewerJointMesh::drawShape( F32 pixelArea ) | |||
1179 | //---------------------------------------------------------------- | 600 | //---------------------------------------------------------------- |
1180 | llassert( !(mTexture.notNull() && mLayerSet) ); // mutually exclusive | 601 | llassert( !(mTexture.notNull() && mLayerSet) ); // mutually exclusive |
1181 | 602 | ||
1182 | //GLuint test_image_name = 0; | ||
1183 | |||
1184 | // | ||
1185 | LLGLState force_alpha_test(GL_ALPHA_TEST, isTransparent()); | ||
1186 | |||
1187 | if (mTestImageName) | 603 | if (mTestImageName) |
1188 | { | 604 | { |
1189 | LLImageGL::bindExternalTexture( mTestImageName, 0, GL_TEXTURE_2D ); | 605 | LLImageGL::bindExternalTexture( mTestImageName, 0, GL_TEXTURE_2D ); |
@@ -1236,11 +652,12 @@ U32 LLViewerJointMesh::drawShape( F32 pixelArea ) | |||
1236 | gImageList.getImage(IMG_DEFAULT_AVATAR)->bind(); | 652 | gImageList.getImage(IMG_DEFAULT_AVATAR)->bind(); |
1237 | } | 653 | } |
1238 | 654 | ||
655 | LLGLDisable tex(gRenderForSelect && !isTransparent() ? GL_TEXTURE_2D : 0); | ||
656 | |||
1239 | if (gRenderForSelect) | 657 | if (gRenderForSelect) |
1240 | { | 658 | { |
1241 | if (isTransparent()) | 659 | if (isTransparent()) |
1242 | { | 660 | { |
1243 | //gGLSObjectSelectDepthAlpha.set(); | ||
1244 | glTexEnvi(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_COMBINE_ARB); | 661 | glTexEnvi(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_COMBINE_ARB); |
1245 | glTexEnvi(GL_TEXTURE_ENV, GL_COMBINE_RGB_ARB, GL_REPLACE); | 662 | glTexEnvi(GL_TEXTURE_ENV, GL_COMBINE_RGB_ARB, GL_REPLACE); |
1246 | glTexEnvi(GL_TEXTURE_ENV, GL_COMBINE_ALPHA_ARB, GL_MODULATE); | 663 | glTexEnvi(GL_TEXTURE_ENV, GL_COMBINE_ALPHA_ARB, GL_MODULATE); |
@@ -1251,19 +668,14 @@ U32 LLViewerJointMesh::drawShape( F32 pixelArea ) | |||
1251 | glTexEnvi(GL_TEXTURE_ENV, GL_SOURCE0_ALPHA_ARB, GL_TEXTURE); // GL_TEXTURE_ENV_COLOR is set in renderPass1 | 668 | glTexEnvi(GL_TEXTURE_ENV, GL_SOURCE0_ALPHA_ARB, GL_TEXTURE); // GL_TEXTURE_ENV_COLOR is set in renderPass1 |
1252 | glTexEnvi(GL_TEXTURE_ENV, GL_OPERAND0_ALPHA_ARB, GL_SRC_ALPHA); | 669 | glTexEnvi(GL_TEXTURE_ENV, GL_OPERAND0_ALPHA_ARB, GL_SRC_ALPHA); |
1253 | } | 670 | } |
1254 | else | ||
1255 | { | ||
1256 | //gGLSObjectSelectDepth.set(); | ||
1257 | } | ||
1258 | } | 671 | } |
1259 | else | 672 | else |
1260 | { | 673 | { |
1261 | //---------------------------------------------------------------- | 674 | //---------------------------------------------------------------- |
1262 | // by default, backface culling is enabled | 675 | // by default, backface culling is enabled |
1263 | //---------------------------------------------------------------- | 676 | //---------------------------------------------------------------- |
1264 | if (sRenderPass == AVATAR_RENDER_PASS_CLOTHING_INNER) | 677 | /*if (sRenderPass == AVATAR_RENDER_PASS_CLOTHING_INNER) |
1265 | { | 678 | { |
1266 | //LLGLSPipelineAvatar gls_pipeline_avatar; | ||
1267 | LLImageGL::bindExternalTexture( sClothingMaskImageName, 1, GL_TEXTURE_2D ); | 679 | LLImageGL::bindExternalTexture( sClothingMaskImageName, 1, GL_TEXTURE_2D ); |
1268 | 680 | ||
1269 | glClientActiveTextureARB(GL_TEXTURE0_ARB); | 681 | glClientActiveTextureARB(GL_TEXTURE0_ARB); |
@@ -1303,7 +715,6 @@ U32 LLViewerJointMesh::drawShape( F32 pixelArea ) | |||
1303 | } | 715 | } |
1304 | else if (sRenderPass == AVATAR_RENDER_PASS_CLOTHING_OUTER) | 716 | else if (sRenderPass == AVATAR_RENDER_PASS_CLOTHING_OUTER) |
1305 | { | 717 | { |
1306 | //gGLSPipelineAvatarAlphaPass1.set(); | ||
1307 | glAlphaFunc(GL_GREATER, 0.1f); | 718 | glAlphaFunc(GL_GREATER, 0.1f); |
1308 | LLImageGL::bindExternalTexture( sClothingMaskImageName, 1, GL_TEXTURE_2D ); | 719 | LLImageGL::bindExternalTexture( sClothingMaskImageName, 1, GL_TEXTURE_2D ); |
1309 | 720 | ||
@@ -1334,81 +745,48 @@ U32 LLViewerJointMesh::drawShape( F32 pixelArea ) | |||
1334 | 745 | ||
1335 | glTexEnvi(GL_TEXTURE_ENV, GL_SOURCE0_ALPHA_ARB, GL_TEXTURE); | 746 | glTexEnvi(GL_TEXTURE_ENV, GL_SOURCE0_ALPHA_ARB, GL_TEXTURE); |
1336 | glTexEnvi(GL_TEXTURE_ENV, GL_OPERAND0_ALPHA_ARB, GL_SRC_ALPHA); | 747 | glTexEnvi(GL_TEXTURE_ENV, GL_OPERAND0_ALPHA_ARB, GL_SRC_ALPHA); |
1337 | } | 748 | }*/ |
1338 | else if ( isTransparent()) | ||
1339 | { | ||
1340 | //gGLSNoCullFaces.set(); | ||
1341 | } | ||
1342 | else | ||
1343 | { | ||
1344 | //gGLSCullFaces.set(); | ||
1345 | } | ||
1346 | } | 749 | } |
1347 | 750 | ||
1348 | if (mMesh->hasWeights()) | 751 | mFace->mVertexBuffer->setBuffer(sRenderMask); |
1349 | { | ||
1350 | uploadJointMatrices(); | ||
1351 | 752 | ||
753 | U32 start = mMesh->mFaceVertexOffset; | ||
754 | U32 end = start + mMesh->mFaceVertexCount - 1; | ||
755 | U32 count = mMesh->mFaceIndexCount; | ||
756 | U32* indicesp = ((U32*) mFace->mVertexBuffer->getIndicesPointer()) + mMesh->mFaceIndexOffset; | ||
1352 | 757 | ||
758 | if (mMesh->hasWeights()) | ||
759 | { | ||
1353 | if ((mFace->getPool()->getVertexShaderLevel() > 0)) | 760 | if ((mFace->getPool()->getVertexShaderLevel() > 0)) |
1354 | { | 761 | { |
1355 | glMatrixMode(GL_MODELVIEW); | 762 | if (first_pass) |
1356 | glPushMatrix(); | 763 | { |
1357 | glLoadIdentity(); | 764 | uploadJointMatrices(); |
1358 | 765 | } | |
1359 | glDrawElements(GL_TRIANGLES, mMesh->mFaceIndexCount, GL_UNSIGNED_INT, mMesh->getIndices()); | 766 | llDrawRangeElements(GL_TRIANGLES, start, end, count, GL_UNSIGNED_INT, indicesp); |
1360 | |||
1361 | glPopMatrix(); | ||
1362 | } | 767 | } |
1363 | else | 768 | else |
1364 | { | 769 | { |
1365 | if (mFace->getGeomIndex() < 0) | 770 | llDrawRangeElements(GL_TRIANGLES, start, end, count, GL_UNSIGNED_INT, indicesp); |
1366 | { | ||
1367 | llerrs << "Invalid geometry index in LLViewerJointMesh::drawShape() " << mFace->getGeomIndex() << llendl; | ||
1368 | } | ||
1369 | |||
1370 | if ((S32)(mMesh->mFaceVertexOffset + mMesh->mFaceVertexCount) > mFace->getGeomCount()) | ||
1371 | { | ||
1372 | ((LLVOAvatar*)mFace->getDrawable()->getVObj())->mRoot.dump(); | ||
1373 | llerrs << "Rendering outside of vertex bounds with mesh " << mName << " at pixel area " << pixelArea << llendl; | ||
1374 | } | ||
1375 | llDrawElementsBatchBlend(mMesh->mFaceVertexOffset, mMesh->mFaceVertexCount, | ||
1376 | mFace, mMesh->mFaceIndexCount, mMesh->getIndices()); | ||
1377 | } | 771 | } |
1378 | |||
1379 | } | 772 | } |
1380 | else | 773 | else |
1381 | { | 774 | { |
1382 | glPushMatrix(); | 775 | glPushMatrix(); |
1383 | LLMatrix4 jointToWorld = getWorldMatrix(); | 776 | LLMatrix4 jointToWorld = getWorldMatrix(); |
1384 | jointToWorld *= gCamera->getModelview(); | 777 | glMultMatrixf((GLfloat*)jointToWorld.mMatrix); |
1385 | glLoadMatrixf((GLfloat*)jointToWorld.mMatrix); | 778 | llDrawRangeElements(GL_TRIANGLES, start, end, count, GL_UNSIGNED_INT, indicesp); |
1386 | |||
1387 | if ((mFace->getPool()->getVertexShaderLevel() > 0)) | ||
1388 | { | ||
1389 | glDrawElements(GL_TRIANGLES, mMesh->mFaceIndexCount, GL_UNSIGNED_INT, mMesh->getIndices()); | ||
1390 | } | ||
1391 | else // this else clause handles non-weighted vertices. llDrawElements just copies and draws | ||
1392 | { | ||
1393 | llDrawElements(mMesh->mFaceIndexCount, mMesh->getIndices(), mFace); | ||
1394 | } | ||
1395 | |||
1396 | glPopMatrix(); | 779 | glPopMatrix(); |
1397 | } | 780 | } |
1398 | 781 | ||
1399 | triangle_count += mMesh->mFaceIndexCount; | 782 | triangle_count += mMesh->mFaceIndexCount; |
1400 | 783 | ||
1401 | if (gRenderForSelect) | ||
1402 | { | ||
1403 | glColor4fv(mColor.mV); | ||
1404 | } | ||
1405 | |||
1406 | if (mTestImageName) | 784 | if (mTestImageName) |
1407 | { | 785 | { |
1408 | glTexEnvi(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_MODULATE); | 786 | glTexEnvi(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_MODULATE); |
1409 | } | 787 | } |
1410 | 788 | ||
1411 | if (sRenderPass != AVATAR_RENDER_PASS_SINGLE) | 789 | /*if (sRenderPass != AVATAR_RENDER_PASS_SINGLE) |
1412 | { | 790 | { |
1413 | LLImageGL::unbindTexture(1, GL_TEXTURE_2D); | 791 | LLImageGL::unbindTexture(1, GL_TEXTURE_2D); |
1414 | glActiveTextureARB(GL_TEXTURE1_ARB); | 792 | glActiveTextureARB(GL_TEXTURE1_ARB); |
@@ -1421,7 +799,7 @@ U32 LLViewerJointMesh::drawShape( F32 pixelArea ) | |||
1421 | 799 | ||
1422 | glTexEnvi(GL_TEXTURE_ENV, GL_COMBINE_RGB_ARB, GL_MODULATE); | 800 | glTexEnvi(GL_TEXTURE_ENV, GL_COMBINE_RGB_ARB, GL_MODULATE); |
1423 | glAlphaFunc(GL_GREATER, 0.01f); | 801 | glAlphaFunc(GL_GREATER, 0.01f); |
1424 | } | 802 | }*/ |
1425 | 803 | ||
1426 | if (mTexture.notNull()) { | 804 | if (mTexture.notNull()) { |
1427 | if (!mTexture->getClampS()) { | 805 | if (!mTexture->getClampS()) { |
@@ -1438,19 +816,20 @@ U32 LLViewerJointMesh::drawShape( F32 pixelArea ) | |||
1438 | //----------------------------------------------------------------------------- | 816 | //----------------------------------------------------------------------------- |
1439 | // updateFaceSizes() | 817 | // updateFaceSizes() |
1440 | //----------------------------------------------------------------------------- | 818 | //----------------------------------------------------------------------------- |
1441 | void LLViewerJointMesh::updateFaceSizes(U32 &num_vertices, F32 pixel_area) | 819 | void LLViewerJointMesh::updateFaceSizes(U32 &num_vertices, U32& num_indices, F32 pixel_area) |
1442 | { | 820 | { |
1443 | // Do a pre-alloc pass to determine sizes of data. | 821 | // Do a pre-alloc pass to determine sizes of data. |
1444 | if (mMesh && mValid) | 822 | if (mMesh && mValid) |
1445 | { | 823 | { |
1446 | mMesh->mFaceVertexOffset = num_vertices; | 824 | mMesh->mFaceVertexOffset = num_vertices; |
1447 | mMesh->mFaceVertexCount = mMesh->getNumVertices(); | 825 | mMesh->mFaceVertexCount = mMesh->getNumVertices(); |
826 | mMesh->mFaceIndexOffset = num_indices; | ||
827 | mMesh->mFaceIndexCount = mMesh->getSharedData()->mNumTriangleIndices; | ||
828 | |||
1448 | mMesh->getReferenceMesh()->mCurVertexCount = mMesh->mFaceVertexCount; | 829 | mMesh->getReferenceMesh()->mCurVertexCount = mMesh->mFaceVertexCount; |
1449 | num_vertices += mMesh->getNumVertices(); | ||
1450 | 830 | ||
1451 | mMesh->mFaceIndexCount = mMesh->getSharedData()->mNumTriangleIndices; | 831 | num_vertices += mMesh->getNumVertices(); |
1452 | 832 | num_indices += mMesh->mFaceIndexCount; | |
1453 | mMesh->getSharedData()->genIndices(mMesh->mFaceVertexOffset); | ||
1454 | } | 833 | } |
1455 | } | 834 | } |
1456 | 835 | ||
@@ -1460,9 +839,7 @@ void LLViewerJointMesh::updateFaceSizes(U32 &num_vertices, F32 pixel_area) | |||
1460 | void LLViewerJointMesh::updateFaceData(LLFace *face, F32 pixel_area, BOOL damp_wind) | 839 | void LLViewerJointMesh::updateFaceData(LLFace *face, F32 pixel_area, BOOL damp_wind) |
1461 | { | 840 | { |
1462 | U32 i; | 841 | U32 i; |
1463 | 842 | ||
1464 | if (!mValid) return; | ||
1465 | |||
1466 | mFace = face; | 843 | mFace = face; |
1467 | 844 | ||
1468 | LLStrider<LLVector3> verticesp; | 845 | LLStrider<LLVector3> verticesp; |
@@ -1471,13 +848,15 @@ void LLViewerJointMesh::updateFaceData(LLFace *face, F32 pixel_area, BOOL damp_w | |||
1471 | LLStrider<LLVector2> tex_coordsp; | 848 | LLStrider<LLVector2> tex_coordsp; |
1472 | LLStrider<F32> vertex_weightsp; | 849 | LLStrider<F32> vertex_weightsp; |
1473 | LLStrider<LLVector4> clothing_weightsp; | 850 | LLStrider<LLVector4> clothing_weightsp; |
851 | LLStrider<U32> indicesp; | ||
1474 | 852 | ||
1475 | // Copy data into the faces from the polymesh data. | 853 | // Copy data into the faces from the polymesh data. |
1476 | if (mMesh) | 854 | if (mMesh && mValid) |
1477 | { | 855 | { |
1478 | if (mMesh->getNumVertices()) | 856 | if (mMesh->getNumVertices()) |
1479 | { | 857 | { |
1480 | S32 index = face->getGeometryAvatar(verticesp, normalsp, binormalsp, tex_coordsp, vertex_weightsp, clothing_weightsp); | 858 | S32 index = face->getGeometryAvatar(verticesp, normalsp, binormalsp, tex_coordsp, vertex_weightsp, clothing_weightsp); |
859 | face->mVertexBuffer->getIndexStrider(indicesp); | ||
1481 | 860 | ||
1482 | if (-1 == index) | 861 | if (-1 == index) |
1483 | { | 862 | { |
@@ -1493,11 +872,20 @@ void LLViewerJointMesh::updateFaceData(LLFace *face, F32 pixel_area, BOOL damp_w | |||
1493 | vertex_weightsp[mMesh->mFaceVertexOffset + i] = *(mMesh->getWeights() + i); | 872 | vertex_weightsp[mMesh->mFaceVertexOffset + i] = *(mMesh->getWeights() + i); |
1494 | if (damp_wind) | 873 | if (damp_wind) |
1495 | { | 874 | { |
1496 | clothing_weightsp[mMesh->mFaceVertexOffset + i].setVec(0,0,0,0); | 875 | clothing_weightsp[mMesh->mFaceVertexOffset + i] = LLVector4(0,0,0,0); |
1497 | } | 876 | } |
1498 | else | 877 | else |
1499 | { | 878 | { |
1500 | clothing_weightsp[mMesh->mFaceVertexOffset + i].setVec(*(mMesh->getClothingWeights() + i)); | 879 | clothing_weightsp[mMesh->mFaceVertexOffset + i] = (*(mMesh->getClothingWeights() + i)); |
880 | } | ||
881 | } | ||
882 | |||
883 | for (S32 i = 0; i < mMesh->getNumFaces(); i++) | ||
884 | { | ||
885 | for (U32 j = 0; j < 3; j++) | ||
886 | { | ||
887 | U32 k = i*3+j+mMesh->mFaceIndexOffset; | ||
888 | indicesp[k] = mMesh->getFaces()[i][j] + mMesh->mFaceVertexOffset; | ||
1501 | } | 889 | } |
1502 | } | 890 | } |
1503 | } | 891 | } |
@@ -1514,6 +902,92 @@ BOOL LLViewerJointMesh::updateLOD(F32 pixel_area, BOOL activate) | |||
1514 | return (valid != activate); | 902 | return (valid != activate); |
1515 | } | 903 | } |
1516 | 904 | ||
905 | void LLViewerJointMesh::updateGeometry() | ||
906 | { | ||
907 | if (mValid && mMesh && mFace && | ||
908 | mMesh->hasWeights() && | ||
909 | mFace->mVertexBuffer.notNull() && | ||
910 | gPipeline.getVertexShaderLevel(LLPipeline::SHADER_AVATAR) == 0) | ||
911 | { | ||
912 | uploadJointMatrices(); | ||
913 | LLStrider<LLVector3> o_vertices; | ||
914 | LLStrider<LLVector3> o_normals; | ||
915 | |||
916 | //get vertex and normal striders | ||
917 | LLVertexBuffer *buffer = mFace->mVertexBuffer; | ||
918 | buffer->getVertexStrider(o_vertices, 0); | ||
919 | buffer->getNormalStrider(o_normals, 0); | ||
920 | |||
921 | { | ||
922 | LLVector4 tpos0, tnorm0, tpos1, tnorm1, tbinorm0, tbinorm1; | ||
923 | F32 last_weight = F32_MAX; | ||
924 | LLMatrix3 gBlendRotMat; | ||
925 | |||
926 | |||
927 | for (U32 index= 0; index < mMesh->getNumVertices(); index++) | ||
928 | { | ||
929 | // blend by first matrix | ||
930 | F32 w = mMesh->getWeights()[index]; | ||
931 | |||
932 | if (w != last_weight) | ||
933 | { | ||
934 | last_weight = w; | ||
935 | |||
936 | S32 joint = llfloor(w); | ||
937 | w -= joint; | ||
938 | |||
939 | LLMatrix4 &m0 = gJointMat[joint+1]; | ||
940 | LLMatrix4 &m1 = gJointMat[joint+0]; | ||
941 | LLMatrix3 &n0 = gJointRot[joint+1]; | ||
942 | LLMatrix3 &n1 = gJointRot[joint+0]; | ||
943 | |||
944 | if (w == 1.0f) | ||
945 | { | ||
946 | gBlendMat = m0; | ||
947 | gBlendRotMat = n0; | ||
948 | } | ||
949 | else | ||
950 | { | ||
951 | gBlendMat.mMatrix[VX][VX] = lerp(m1.mMatrix[VX][VX], m0.mMatrix[VX][VX], w); | ||
952 | gBlendMat.mMatrix[VX][VY] = lerp(m1.mMatrix[VX][VY], m0.mMatrix[VX][VY], w); | ||
953 | gBlendMat.mMatrix[VX][VZ] = lerp(m1.mMatrix[VX][VZ], m0.mMatrix[VX][VZ], w); | ||
954 | |||
955 | gBlendMat.mMatrix[VY][VX] = lerp(m1.mMatrix[VY][VX], m0.mMatrix[VY][VX], w); | ||
956 | gBlendMat.mMatrix[VY][VY] = lerp(m1.mMatrix[VY][VY], m0.mMatrix[VY][VY], w); | ||
957 | gBlendMat.mMatrix[VY][VZ] = lerp(m1.mMatrix[VY][VZ], m0.mMatrix[VY][VZ], w); | ||
958 | |||
959 | gBlendMat.mMatrix[VZ][VX] = lerp(m1.mMatrix[VZ][VX], m0.mMatrix[VZ][VX], w); | ||
960 | gBlendMat.mMatrix[VZ][VY] = lerp(m1.mMatrix[VZ][VY], m0.mMatrix[VZ][VY], w); | ||
961 | gBlendMat.mMatrix[VZ][VZ] = lerp(m1.mMatrix[VZ][VZ], m0.mMatrix[VZ][VZ], w); | ||
962 | |||
963 | gBlendMat.mMatrix[VW][VX] = lerp(m1.mMatrix[VW][VX], m0.mMatrix[VW][VX], w); | ||
964 | gBlendMat.mMatrix[VW][VY] = lerp(m1.mMatrix[VW][VY], m0.mMatrix[VW][VY], w); | ||
965 | gBlendMat.mMatrix[VW][VZ] = lerp(m1.mMatrix[VW][VZ], m0.mMatrix[VW][VZ], w); | ||
966 | |||
967 | gBlendRotMat.mMatrix[VX][VX] = lerp(n1.mMatrix[VX][VX], n0.mMatrix[VX][VX], w); | ||
968 | gBlendRotMat.mMatrix[VX][VY] = lerp(n1.mMatrix[VX][VY], n0.mMatrix[VX][VY], w); | ||
969 | gBlendRotMat.mMatrix[VX][VZ] = lerp(n1.mMatrix[VX][VZ], n0.mMatrix[VX][VZ], w); | ||
970 | |||
971 | gBlendRotMat.mMatrix[VY][VX] = lerp(n1.mMatrix[VY][VX], n0.mMatrix[VY][VX], w); | ||
972 | gBlendRotMat.mMatrix[VY][VY] = lerp(n1.mMatrix[VY][VY], n0.mMatrix[VY][VY], w); | ||
973 | gBlendRotMat.mMatrix[VY][VZ] = lerp(n1.mMatrix[VY][VZ], n0.mMatrix[VY][VZ], w); | ||
974 | |||
975 | gBlendRotMat.mMatrix[VZ][VX] = lerp(n1.mMatrix[VZ][VX], n0.mMatrix[VZ][VX], w); | ||
976 | gBlendRotMat.mMatrix[VZ][VY] = lerp(n1.mMatrix[VZ][VY], n0.mMatrix[VZ][VY], w); | ||
977 | gBlendRotMat.mMatrix[VZ][VZ] = lerp(n1.mMatrix[VZ][VZ], n0.mMatrix[VZ][VZ], w); | ||
978 | } | ||
979 | } | ||
980 | |||
981 | // write result | ||
982 | U32 bidx = index + mMesh->mFaceVertexOffset; | ||
983 | |||
984 | o_vertices[bidx] = mMesh->getCoords()[index] * gBlendMat; | ||
985 | o_normals[bidx] = mMesh->getNormals()[index] * gBlendRotMat; | ||
986 | } | ||
987 | } | ||
988 | } | ||
989 | } | ||
990 | |||
1517 | void LLViewerJointMesh::dump() | 991 | void LLViewerJointMesh::dump() |
1518 | { | 992 | { |
1519 | if (mValid) | 993 | if (mValid) |