aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/linden/indra/newview/llviewerjointmesh.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'linden/indra/newview/llviewerjointmesh.cpp')
-rw-r--r--linden/indra/newview/llviewerjointmesh.cpp214
1 files changed, 33 insertions, 181 deletions
diff --git a/linden/indra/newview/llviewerjointmesh.cpp b/linden/indra/newview/llviewerjointmesh.cpp
index 642fa7b..c76990c 100644
--- a/linden/indra/newview/llviewerjointmesh.cpp
+++ b/linden/indra/newview/llviewerjointmesh.cpp
@@ -31,11 +31,14 @@
31//----------------------------------------------------------------------------- 31//-----------------------------------------------------------------------------
32#include "llviewerprecompiledheaders.h" 32#include "llviewerprecompiledheaders.h"
33 33
34#if LL_WINDOWS // For Intel vector classes
35 #include "fvec.h"
36#endif
37
34#include "imageids.h" 38#include "imageids.h"
35#include "llfasttimer.h" 39#include "llfasttimer.h"
36 40
37#include "llagent.h" 41#include "llagent.h"
38#include "llapr.h"
39#include "llbox.h" 42#include "llbox.h"
40#include "lldrawable.h" 43#include "lldrawable.h"
41#include "lldrawpoolavatar.h" 44#include "lldrawpoolavatar.h"
@@ -46,18 +49,12 @@
46#include "llglheaders.h" 49#include "llglheaders.h"
47#include "lltexlayer.h" 50#include "lltexlayer.h"
48#include "llviewercamera.h" 51#include "llviewercamera.h"
49#include "llviewercontrol.h"
50#include "llviewerimagelist.h" 52#include "llviewerimagelist.h"
51#include "llviewerjointmesh.h" 53#include "llviewerjointmesh.h"
52#include "llvoavatar.h" 54#include "llvoavatar.h"
53#include "llsky.h" 55#include "llsky.h"
54#include "pipeline.h" 56#include "pipeline.h"
55#include "llglslshader.h" 57#include "llglslshader.h"
56#include "llmath.h"
57#include "v4math.h"
58#include "m3math.h"
59#include "m4math.h"
60
61 58
62#if !LL_DARWIN && !LL_LINUX 59#if !LL_DARWIN && !LL_LINUX
63extern PFNGLWEIGHTPOINTERARBPROC glWeightPointerARB; 60extern PFNGLWEIGHTPOINTERARBPROC glWeightPointerARB;
@@ -71,7 +68,6 @@ static const U32 sRenderMask = LLVertexBuffer::MAP_VERTEX |
71 LLVertexBuffer::MAP_NORMAL | 68 LLVertexBuffer::MAP_NORMAL |
72 LLVertexBuffer::MAP_TEXCOORD; 69 LLVertexBuffer::MAP_TEXCOORD;
73 70
74
75//----------------------------------------------------------------------------- 71//-----------------------------------------------------------------------------
76//----------------------------------------------------------------------------- 72//-----------------------------------------------------------------------------
77// LLViewerJointMesh::LLSkinJoint 73// LLViewerJointMesh::LLSkinJoint
@@ -124,7 +120,6 @@ BOOL LLSkinJoint::setupSkinJoint( LLViewerJoint *joint)
124 return TRUE; 120 return TRUE;
125} 121}
126 122
127
128//----------------------------------------------------------------------------- 123//-----------------------------------------------------------------------------
129//----------------------------------------------------------------------------- 124//-----------------------------------------------------------------------------
130// LLViewerJointMesh 125// LLViewerJointMesh
@@ -419,9 +414,9 @@ const S32 NUM_AXES = 3;
419// rotation Z 0-n 414// rotation Z 0-n
420// pivot parent 0-n -- child = n+1 415// pivot parent 0-n -- child = n+1
421 416
422static LLMatrix4 gJointMatUnaligned[32]; 417static LLMatrix4 gJointMat[32];
423static LLMatrix3 gJointRotUnaligned[32]; 418static LLMatrix3 gJointRot[32];
424static LLVector4 gJointPivot[32]; 419static LLVector4 gJointPivot[32];
425 420
426//----------------------------------------------------------------------------- 421//-----------------------------------------------------------------------------
427// uploadJointMatrices() 422// uploadJointMatrices()
@@ -442,8 +437,8 @@ void LLViewerJointMesh::uploadJointMatrices()
442 { 437 {
443 joint_mat *= LLDrawPoolAvatar::getModelView(); 438 joint_mat *= LLDrawPoolAvatar::getModelView();
444 } 439 }
445 gJointMatUnaligned[joint_num] = joint_mat; 440 gJointMat[joint_num] = joint_mat;
446 gJointRotUnaligned[joint_num] = joint_mat.getMat3(); 441 gJointRot[joint_num] = joint_mat.getMat3();
447 } 442 }
448 443
449 BOOL last_pivot_uploaded = FALSE; 444 BOOL last_pivot_uploaded = FALSE;
@@ -480,8 +475,8 @@ void LLViewerJointMesh::uploadJointMatrices()
480 { 475 {
481 LLVector3 pivot; 476 LLVector3 pivot;
482 pivot = LLVector3(gJointPivot[i]); 477 pivot = LLVector3(gJointPivot[i]);
483 pivot = pivot * gJointRotUnaligned[i]; 478 pivot = pivot * gJointRot[i];
484 gJointMatUnaligned[i].translate(pivot); 479 gJointMat[i].translate(pivot);
485 } 480 }
486 481
487 // upload matrices 482 // upload matrices
@@ -492,11 +487,11 @@ void LLViewerJointMesh::uploadJointMatrices()
492 487
493 for (joint_num = 0; joint_num < reference_mesh->mJointRenderData.count(); joint_num++) 488 for (joint_num = 0; joint_num < reference_mesh->mJointRenderData.count(); joint_num++)
494 { 489 {
495 gJointMatUnaligned[joint_num].transpose(); 490 gJointMat[joint_num].transpose();
496 491
497 for (S32 axis = 0; axis < NUM_AXES; axis++) 492 for (S32 axis = 0; axis < NUM_AXES; axis++)
498 { 493 {
499 F32* vector = gJointMatUnaligned[joint_num].mMatrix[axis]; 494 F32* vector = gJointMat[joint_num].mMatrix[axis];
500 //glProgramLocalParameter4fvARB(GL_VERTEX_PROGRAM_ARB, LL_CHARACTER_MAX_JOINTS_PER_MESH * axis + joint_num+5, (GLfloat*)vector); 495 //glProgramLocalParameter4fvARB(GL_VERTEX_PROGRAM_ARB, LL_CHARACTER_MAX_JOINTS_PER_MESH * axis + joint_num+5, (GLfloat*)vector);
501 U32 offset = LL_CHARACTER_MAX_JOINTS_PER_MESH*axis+joint_num; 496 U32 offset = LL_CHARACTER_MAX_JOINTS_PER_MESH*axis+joint_num;
502 memcpy(mat+offset*4, vector, sizeof(GLfloat)*4); 497 memcpy(mat+offset*4, vector, sizeof(GLfloat)*4);
@@ -908,9 +903,21 @@ BOOL LLViewerJointMesh::updateLOD(F32 pixel_area, BOOL activate)
908 return (valid != activate); 903 return (valid != activate);
909} 904}
910 905
911// static 906
912void LLViewerJointMesh::updateGeometryOriginal(LLFace *mFace, LLPolyMesh *mMesh) 907void LLViewerJointMesh::updateGeometry()
913{ 908{
909 if (!(mValid
910 && mMesh
911 && mFace
912 && mMesh->hasWeights()
913 && mFace->mVertexBuffer.notNull()
914 && LLShaderMgr::getVertexShaderLevel(LLShaderMgr::SHADER_AVATAR) == 0))
915 {
916 return;
917 }
918
919 uploadJointMatrices();
920
914 LLStrider<LLVector3> o_vertices; 921 LLStrider<LLVector3> o_vertices;
915 LLStrider<LLVector3> o_normals; 922 LLStrider<LLVector3> o_normals;
916 923
@@ -951,9 +958,9 @@ void LLViewerJointMesh::updateGeometryOriginal(LLFace *mFace, LLPolyMesh *mMesh)
951 // No lerp required in this case. 958 // No lerp required in this case.
952 if (w == 1.0f) 959 if (w == 1.0f)
953 { 960 {
954 gBlendMat = gJointMatUnaligned[joint+1]; 961 gBlendMat = gJointMat[joint+1];
955 o_vertices[bidx] = coords[index] * gBlendMat; 962 o_vertices[bidx] = coords[index] * gBlendMat;
956 gBlendRotMat = gJointRotUnaligned[joint+1]; 963 gBlendRotMat = gJointRot[joint+1];
957 o_normals[bidx] = normals[index] * gBlendRotMat; 964 o_normals[bidx] = normals[index] * gBlendRotMat;
958 continue; 965 continue;
959 } 966 }
@@ -961,8 +968,8 @@ void LLViewerJointMesh::updateGeometryOriginal(LLFace *mFace, LLPolyMesh *mMesh)
961 // Try to keep all the accesses to the matrix data as close 968 // Try to keep all the accesses to the matrix data as close
962 // together as possible. This function is a hot spot on the 969 // together as possible. This function is a hot spot on the
963 // Mac. JC 970 // Mac. JC
964 LLMatrix4 &m0 = gJointMatUnaligned[joint+1]; 971 LLMatrix4 &m0 = gJointMat[joint+1];
965 LLMatrix4 &m1 = gJointMatUnaligned[joint+0]; 972 LLMatrix4 &m1 = gJointMat[joint+0];
966 973
967 gBlendMat.mMatrix[VX][VX] = lerp(m1.mMatrix[VX][VX], m0.mMatrix[VX][VX], w); 974 gBlendMat.mMatrix[VX][VX] = lerp(m1.mMatrix[VX][VX], m0.mMatrix[VX][VX], w);
968 gBlendMat.mMatrix[VX][VY] = lerp(m1.mMatrix[VX][VY], m0.mMatrix[VX][VY], w); 975 gBlendMat.mMatrix[VX][VY] = lerp(m1.mMatrix[VX][VY], m0.mMatrix[VX][VY], w);
@@ -982,8 +989,8 @@ void LLViewerJointMesh::updateGeometryOriginal(LLFace *mFace, LLPolyMesh *mMesh)
982 989
983 o_vertices[bidx] = coords[index] * gBlendMat; 990 o_vertices[bidx] = coords[index] * gBlendMat;
984 991
985 LLMatrix3 &n0 = gJointRotUnaligned[joint+1]; 992 LLMatrix3 &n0 = gJointRot[joint+1];
986 LLMatrix3 &n1 = gJointRotUnaligned[joint+0]; 993 LLMatrix3 &n1 = gJointRot[joint+0];
987 994
988 gBlendRotMat.mMatrix[VX][VX] = lerp(n1.mMatrix[VX][VX], n0.mMatrix[VX][VX], w); 995 gBlendRotMat.mMatrix[VX][VX] = lerp(n1.mMatrix[VX][VX], n0.mMatrix[VX][VX], w);
989 gBlendRotMat.mMatrix[VX][VY] = lerp(n1.mMatrix[VX][VY], n0.mMatrix[VX][VY], w); 996 gBlendRotMat.mMatrix[VX][VY] = lerp(n1.mMatrix[VX][VY], n0.mMatrix[VX][VY], w);
@@ -1001,161 +1008,6 @@ void LLViewerJointMesh::updateGeometryOriginal(LLFace *mFace, LLPolyMesh *mMesh)
1001 } 1008 }
1002} 1009}
1003 1010
1004const U32 UPDATE_GEOMETRY_CALL_MASK = 0x1FFF; // 8K samples before overflow
1005const U32 UPDATE_GEOMETRY_CALL_OVERFLOW = ~UPDATE_GEOMETRY_CALL_MASK;
1006static bool sUpdateGeometryCallPointer = false;
1007static F64 sUpdateGeometryGlobalTime = 0.0 ;
1008static F64 sUpdateGeometryElapsedTime = 0.0 ;
1009static F64 sUpdateGeometryElapsedTimeOff = 0.0 ;
1010static F64 sUpdateGeometryElapsedTimeOn = 0.0 ;
1011static F64 sUpdateGeometryRunAvgOff[10];
1012static F64 sUpdateGeometryRunAvgOn[10];
1013static U32 sUpdateGeometryRunCount = 0 ;
1014static U32 sUpdateGeometryCalls = 0 ;
1015static U32 sUpdateGeometryLastProcessor = 0 ;
1016void (*LLViewerJointMesh::sUpdateGeometryFunc)(LLFace* face, LLPolyMesh* mesh);
1017
1018void LLViewerJointMesh::updateGeometry()
1019{
1020 extern BOOL gVectorizePerfTest;
1021 extern U32 gVectorizeProcessor;
1022
1023 if (!(mValid
1024 && mMesh
1025 && mFace
1026 && mMesh->hasWeights()
1027 && mFace->mVertexBuffer.notNull()
1028 && LLShaderMgr::getVertexShaderLevel(LLShaderMgr::SHADER_AVATAR) == 0))
1029 {
1030 return;
1031 }
1032
1033 if (!gVectorizePerfTest)
1034 {
1035 // Once we've measured performance, just run the specified
1036 // code version.
1037 if(sUpdateGeometryFunc == updateGeometryOriginal)
1038 uploadJointMatrices();
1039 sUpdateGeometryFunc(mFace, mMesh);
1040 }
1041 else
1042 {
1043 // At startup, measure the amount of time in skinning and choose
1044 // the fastest one.
1045 LLTimer ug_timer ;
1046
1047 if (sUpdateGeometryCallPointer)
1048 {
1049 if(sUpdateGeometryFunc == updateGeometryOriginal)
1050 uploadJointMatrices();
1051 // call accelerated version for this processor
1052 sUpdateGeometryFunc(mFace, mMesh);
1053 }
1054 else
1055 {
1056 uploadJointMatrices();
1057 updateGeometryOriginal(mFace, mMesh);
1058 }
1059
1060 sUpdateGeometryElapsedTime += ug_timer.getElapsedTimeF64();
1061 ++sUpdateGeometryCalls;
1062 if(0 != (sUpdateGeometryCalls & UPDATE_GEOMETRY_CALL_OVERFLOW))
1063 {
1064 F64 time_since_app_start = ug_timer.getElapsedSeconds();
1065 if(sUpdateGeometryGlobalTime == 0.0
1066 || sUpdateGeometryLastProcessor != gVectorizeProcessor)
1067 {
1068 sUpdateGeometryGlobalTime = time_since_app_start;
1069 sUpdateGeometryElapsedTime = 0;
1070 sUpdateGeometryCalls = 0;
1071 sUpdateGeometryRunCount = 0;
1072 sUpdateGeometryLastProcessor = gVectorizeProcessor;
1073 sUpdateGeometryCallPointer = false;
1074 return;
1075 }
1076 F64 percent_time_in_function =
1077 ( sUpdateGeometryElapsedTime * 100.0 ) / ( time_since_app_start - sUpdateGeometryGlobalTime ) ;
1078 sUpdateGeometryGlobalTime = time_since_app_start;
1079 if (!sUpdateGeometryCallPointer)
1080 {
1081 // First set of run data is with vectorization off.
1082 sUpdateGeometryCallPointer = true;
1083 llinfos << "profile (avg of " << sUpdateGeometryCalls << " samples) = "
1084 << "vectorize off " << percent_time_in_function
1085 << "% of time with "
1086 << (sUpdateGeometryElapsedTime / (F64)sUpdateGeometryCalls)
1087 << " seconds per call "
1088 << llendl;
1089 sUpdateGeometryRunAvgOff[sUpdateGeometryRunCount] = percent_time_in_function;
1090 sUpdateGeometryElapsedTimeOff += sUpdateGeometryElapsedTime;
1091 sUpdateGeometryCalls = 0;
1092 }
1093 else
1094 {
1095 // Second set of run data is with vectorization on.
1096 sUpdateGeometryCallPointer = false;
1097 llinfos << "profile (avg of " << sUpdateGeometryCalls << " samples) = "
1098 << "VEC on " << percent_time_in_function
1099 << "% of time with "
1100 << (sUpdateGeometryElapsedTime / (F64)sUpdateGeometryCalls)
1101 << " seconds per call "
1102 << llendl;
1103 sUpdateGeometryRunAvgOn[sUpdateGeometryRunCount] = percent_time_in_function ;
1104 sUpdateGeometryElapsedTimeOn += sUpdateGeometryElapsedTime;
1105
1106 sUpdateGeometryCalls = 0;
1107 sUpdateGeometryRunCount++;
1108 F64 a = 0.0, b = 0.0;
1109 for(U32 i = 0; i<sUpdateGeometryRunCount; i++)
1110 {
1111 a += sUpdateGeometryRunAvgOff[i];
1112 b += sUpdateGeometryRunAvgOn[i];
1113 }
1114 a /= sUpdateGeometryRunCount;
1115 b /= sUpdateGeometryRunCount;
1116 F64 perf_boost = ( sUpdateGeometryElapsedTimeOff - sUpdateGeometryElapsedTimeOn ) / sUpdateGeometryElapsedTimeOn;
1117 llinfos << "run averages (" << (F64)sUpdateGeometryRunCount
1118 << "/10) vectorize off " << a
1119 << "% : vectorize type " << gVectorizeProcessor
1120 << " " << b
1121 << "% : performance boost "
1122 << perf_boost * 100.0
1123 << "%"
1124 << llendl ;
1125 if(sUpdateGeometryRunCount == 10)
1126 {
1127 // In case user runs test again, force reset of data on
1128 // next run.
1129 sUpdateGeometryGlobalTime = 0.0;
1130
1131 // We have data now on which version is faster. Switch to that
1132 // code and save the data for next run.
1133 gVectorizePerfTest = FALSE;
1134 gSavedSettings.setBOOL("VectorizePerfTest", FALSE);
1135
1136 if (perf_boost > 0.0)
1137 {
1138 llinfos << "Vectorization improves avatar skinning performance, "
1139 << "keeping on for future runs."
1140 << llendl;
1141 gSavedSettings.setBOOL("VectorizeSkin", TRUE);
1142 }
1143 else
1144 {
1145 // SIMD decreases performance, fall back to original code
1146 llinfos << "Vectorization decreases avatar skinning performance, "
1147 << "switching back to original code."
1148 << llendl;
1149
1150 gSavedSettings.setBOOL("VectorizeSkin", FALSE);
1151 }
1152 }
1153 }
1154 sUpdateGeometryElapsedTime = 0.0f;
1155 }
1156 }
1157}
1158
1159void LLViewerJointMesh::dump() 1011void LLViewerJointMesh::dump()
1160{ 1012{
1161 if (mValid) 1013 if (mValid)