aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/linden/indra/newview/llviewerjointmesh.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'linden/indra/newview/llviewerjointmesh.cpp')
-rw-r--r--linden/indra/newview/llviewerjointmesh.cpp214
1 files changed, 181 insertions, 33 deletions
diff --git a/linden/indra/newview/llviewerjointmesh.cpp b/linden/indra/newview/llviewerjointmesh.cpp
index c76990c..642fa7b 100644
--- a/linden/indra/newview/llviewerjointmesh.cpp
+++ b/linden/indra/newview/llviewerjointmesh.cpp
@@ -31,14 +31,11 @@
31//----------------------------------------------------------------------------- 31//-----------------------------------------------------------------------------
32#include "llviewerprecompiledheaders.h" 32#include "llviewerprecompiledheaders.h"
33 33
34#if LL_WINDOWS // For Intel vector classes
35 #include "fvec.h"
36#endif
37
38#include "imageids.h" 34#include "imageids.h"
39#include "llfasttimer.h" 35#include "llfasttimer.h"
40 36
41#include "llagent.h" 37#include "llagent.h"
38#include "llapr.h"
42#include "llbox.h" 39#include "llbox.h"
43#include "lldrawable.h" 40#include "lldrawable.h"
44#include "lldrawpoolavatar.h" 41#include "lldrawpoolavatar.h"
@@ -49,12 +46,18 @@
49#include "llglheaders.h" 46#include "llglheaders.h"
50#include "lltexlayer.h" 47#include "lltexlayer.h"
51#include "llviewercamera.h" 48#include "llviewercamera.h"
49#include "llviewercontrol.h"
52#include "llviewerimagelist.h" 50#include "llviewerimagelist.h"
53#include "llviewerjointmesh.h" 51#include "llviewerjointmesh.h"
54#include "llvoavatar.h" 52#include "llvoavatar.h"
55#include "llsky.h" 53#include "llsky.h"
56#include "pipeline.h" 54#include "pipeline.h"
57#include "llglslshader.h" 55#include "llglslshader.h"
56#include "llmath.h"
57#include "v4math.h"
58#include "m3math.h"
59#include "m4math.h"
60
58 61
59#if !LL_DARWIN && !LL_LINUX 62#if !LL_DARWIN && !LL_LINUX
60extern PFNGLWEIGHTPOINTERARBPROC glWeightPointerARB; 63extern PFNGLWEIGHTPOINTERARBPROC glWeightPointerARB;
@@ -68,6 +71,7 @@ static const U32 sRenderMask = LLVertexBuffer::MAP_VERTEX |
68 LLVertexBuffer::MAP_NORMAL | 71 LLVertexBuffer::MAP_NORMAL |
69 LLVertexBuffer::MAP_TEXCOORD; 72 LLVertexBuffer::MAP_TEXCOORD;
70 73
74
71//----------------------------------------------------------------------------- 75//-----------------------------------------------------------------------------
72//----------------------------------------------------------------------------- 76//-----------------------------------------------------------------------------
73// LLViewerJointMesh::LLSkinJoint 77// LLViewerJointMesh::LLSkinJoint
@@ -120,6 +124,7 @@ BOOL LLSkinJoint::setupSkinJoint( LLViewerJoint *joint)
120 return TRUE; 124 return TRUE;
121} 125}
122 126
127
123//----------------------------------------------------------------------------- 128//-----------------------------------------------------------------------------
124//----------------------------------------------------------------------------- 129//-----------------------------------------------------------------------------
125// LLViewerJointMesh 130// LLViewerJointMesh
@@ -414,9 +419,9 @@ const S32 NUM_AXES = 3;
414// rotation Z 0-n 419// rotation Z 0-n
415// pivot parent 0-n -- child = n+1 420// pivot parent 0-n -- child = n+1
416 421
417static LLMatrix4 gJointMat[32]; 422static LLMatrix4 gJointMatUnaligned[32];
418static LLMatrix3 gJointRot[32]; 423static LLMatrix3 gJointRotUnaligned[32];
419static LLVector4 gJointPivot[32]; 424static LLVector4 gJointPivot[32];
420 425
421//----------------------------------------------------------------------------- 426//-----------------------------------------------------------------------------
422// uploadJointMatrices() 427// uploadJointMatrices()
@@ -437,8 +442,8 @@ void LLViewerJointMesh::uploadJointMatrices()
437 { 442 {
438 joint_mat *= LLDrawPoolAvatar::getModelView(); 443 joint_mat *= LLDrawPoolAvatar::getModelView();
439 } 444 }
440 gJointMat[joint_num] = joint_mat; 445 gJointMatUnaligned[joint_num] = joint_mat;
441 gJointRot[joint_num] = joint_mat.getMat3(); 446 gJointRotUnaligned[joint_num] = joint_mat.getMat3();
442 } 447 }
443 448
444 BOOL last_pivot_uploaded = FALSE; 449 BOOL last_pivot_uploaded = FALSE;
@@ -475,8 +480,8 @@ void LLViewerJointMesh::uploadJointMatrices()
475 { 480 {
476 LLVector3 pivot; 481 LLVector3 pivot;
477 pivot = LLVector3(gJointPivot[i]); 482 pivot = LLVector3(gJointPivot[i]);
478 pivot = pivot * gJointRot[i]; 483 pivot = pivot * gJointRotUnaligned[i];
479 gJointMat[i].translate(pivot); 484 gJointMatUnaligned[i].translate(pivot);
480 } 485 }
481 486
482 // upload matrices 487 // upload matrices
@@ -487,11 +492,11 @@ void LLViewerJointMesh::uploadJointMatrices()
487 492
488 for (joint_num = 0; joint_num < reference_mesh->mJointRenderData.count(); joint_num++) 493 for (joint_num = 0; joint_num < reference_mesh->mJointRenderData.count(); joint_num++)
489 { 494 {
490 gJointMat[joint_num].transpose(); 495 gJointMatUnaligned[joint_num].transpose();
491 496
492 for (S32 axis = 0; axis < NUM_AXES; axis++) 497 for (S32 axis = 0; axis < NUM_AXES; axis++)
493 { 498 {
494 F32* vector = gJointMat[joint_num].mMatrix[axis]; 499 F32* vector = gJointMatUnaligned[joint_num].mMatrix[axis];
495 //glProgramLocalParameter4fvARB(GL_VERTEX_PROGRAM_ARB, LL_CHARACTER_MAX_JOINTS_PER_MESH * axis + joint_num+5, (GLfloat*)vector); 500 //glProgramLocalParameter4fvARB(GL_VERTEX_PROGRAM_ARB, LL_CHARACTER_MAX_JOINTS_PER_MESH * axis + joint_num+5, (GLfloat*)vector);
496 U32 offset = LL_CHARACTER_MAX_JOINTS_PER_MESH*axis+joint_num; 501 U32 offset = LL_CHARACTER_MAX_JOINTS_PER_MESH*axis+joint_num;
497 memcpy(mat+offset*4, vector, sizeof(GLfloat)*4); 502 memcpy(mat+offset*4, vector, sizeof(GLfloat)*4);
@@ -903,21 +908,9 @@ BOOL LLViewerJointMesh::updateLOD(F32 pixel_area, BOOL activate)
903 return (valid != activate); 908 return (valid != activate);
904} 909}
905 910
906 911// static
907void LLViewerJointMesh::updateGeometry() 912void LLViewerJointMesh::updateGeometryOriginal(LLFace *mFace, LLPolyMesh *mMesh)
908{ 913{
909 if (!(mValid
910 && mMesh
911 && mFace
912 && mMesh->hasWeights()
913 && mFace->mVertexBuffer.notNull()
914 && LLShaderMgr::getVertexShaderLevel(LLShaderMgr::SHADER_AVATAR) == 0))
915 {
916 return;
917 }
918
919 uploadJointMatrices();
920
921 LLStrider<LLVector3> o_vertices; 914 LLStrider<LLVector3> o_vertices;
922 LLStrider<LLVector3> o_normals; 915 LLStrider<LLVector3> o_normals;
923 916
@@ -958,9 +951,9 @@ void LLViewerJointMesh::updateGeometry()
958 // No lerp required in this case. 951 // No lerp required in this case.
959 if (w == 1.0f) 952 if (w == 1.0f)
960 { 953 {
961 gBlendMat = gJointMat[joint+1]; 954 gBlendMat = gJointMatUnaligned[joint+1];
962 o_vertices[bidx] = coords[index] * gBlendMat; 955 o_vertices[bidx] = coords[index] * gBlendMat;
963 gBlendRotMat = gJointRot[joint+1]; 956 gBlendRotMat = gJointRotUnaligned[joint+1];
964 o_normals[bidx] = normals[index] * gBlendRotMat; 957 o_normals[bidx] = normals[index] * gBlendRotMat;
965 continue; 958 continue;
966 } 959 }
@@ -968,8 +961,8 @@ void LLViewerJointMesh::updateGeometry()
968 // Try to keep all the accesses to the matrix data as close 961 // Try to keep all the accesses to the matrix data as close
969 // together as possible. This function is a hot spot on the 962 // together as possible. This function is a hot spot on the
970 // Mac. JC 963 // Mac. JC
971 LLMatrix4 &m0 = gJointMat[joint+1]; 964 LLMatrix4 &m0 = gJointMatUnaligned[joint+1];
972 LLMatrix4 &m1 = gJointMat[joint+0]; 965 LLMatrix4 &m1 = gJointMatUnaligned[joint+0];
973 966
974 gBlendMat.mMatrix[VX][VX] = lerp(m1.mMatrix[VX][VX], m0.mMatrix[VX][VX], w); 967 gBlendMat.mMatrix[VX][VX] = lerp(m1.mMatrix[VX][VX], m0.mMatrix[VX][VX], w);
975 gBlendMat.mMatrix[VX][VY] = lerp(m1.mMatrix[VX][VY], m0.mMatrix[VX][VY], w); 968 gBlendMat.mMatrix[VX][VY] = lerp(m1.mMatrix[VX][VY], m0.mMatrix[VX][VY], w);
@@ -989,8 +982,8 @@ void LLViewerJointMesh::updateGeometry()
989 982
990 o_vertices[bidx] = coords[index] * gBlendMat; 983 o_vertices[bidx] = coords[index] * gBlendMat;
991 984
992 LLMatrix3 &n0 = gJointRot[joint+1]; 985 LLMatrix3 &n0 = gJointRotUnaligned[joint+1];
993 LLMatrix3 &n1 = gJointRot[joint+0]; 986 LLMatrix3 &n1 = gJointRotUnaligned[joint+0];
994 987
995 gBlendRotMat.mMatrix[VX][VX] = lerp(n1.mMatrix[VX][VX], n0.mMatrix[VX][VX], w); 988 gBlendRotMat.mMatrix[VX][VX] = lerp(n1.mMatrix[VX][VX], n0.mMatrix[VX][VX], w);
996 gBlendRotMat.mMatrix[VX][VY] = lerp(n1.mMatrix[VX][VY], n0.mMatrix[VX][VY], w); 989 gBlendRotMat.mMatrix[VX][VY] = lerp(n1.mMatrix[VX][VY], n0.mMatrix[VX][VY], w);
@@ -1008,6 +1001,161 @@ void LLViewerJointMesh::updateGeometry()
1008 } 1001 }
1009} 1002}
1010 1003
1004const U32 UPDATE_GEOMETRY_CALL_MASK = 0x1FFF; // 8K samples before overflow
1005const U32 UPDATE_GEOMETRY_CALL_OVERFLOW = ~UPDATE_GEOMETRY_CALL_MASK;
1006static bool sUpdateGeometryCallPointer = false;
1007static F64 sUpdateGeometryGlobalTime = 0.0 ;
1008static F64 sUpdateGeometryElapsedTime = 0.0 ;
1009static F64 sUpdateGeometryElapsedTimeOff = 0.0 ;
1010static F64 sUpdateGeometryElapsedTimeOn = 0.0 ;
1011static F64 sUpdateGeometryRunAvgOff[10];
1012static F64 sUpdateGeometryRunAvgOn[10];
1013static U32 sUpdateGeometryRunCount = 0 ;
1014static U32 sUpdateGeometryCalls = 0 ;
1015static U32 sUpdateGeometryLastProcessor = 0 ;
1016void (*LLViewerJointMesh::sUpdateGeometryFunc)(LLFace* face, LLPolyMesh* mesh);
1017
1018void LLViewerJointMesh::updateGeometry()
1019{
1020 extern BOOL gVectorizePerfTest;
1021 extern U32 gVectorizeProcessor;
1022
1023 if (!(mValid
1024 && mMesh
1025 && mFace
1026 && mMesh->hasWeights()
1027 && mFace->mVertexBuffer.notNull()
1028 && LLShaderMgr::getVertexShaderLevel(LLShaderMgr::SHADER_AVATAR) == 0))
1029 {
1030 return;
1031 }
1032
1033 if (!gVectorizePerfTest)
1034 {
1035 // Once we've measured performance, just run the specified
1036 // code version.
1037 if(sUpdateGeometryFunc == updateGeometryOriginal)
1038 uploadJointMatrices();
1039 sUpdateGeometryFunc(mFace, mMesh);
1040 }
1041 else
1042 {
1043 // At startup, measure the amount of time in skinning and choose
1044 // the fastest one.
1045 LLTimer ug_timer ;
1046
1047 if (sUpdateGeometryCallPointer)
1048 {
1049 if(sUpdateGeometryFunc == updateGeometryOriginal)
1050 uploadJointMatrices();
1051 // call accelerated version for this processor
1052 sUpdateGeometryFunc(mFace, mMesh);
1053 }
1054 else
1055 {
1056 uploadJointMatrices();
1057 updateGeometryOriginal(mFace, mMesh);
1058 }
1059
1060 sUpdateGeometryElapsedTime += ug_timer.getElapsedTimeF64();
1061 ++sUpdateGeometryCalls;
1062 if(0 != (sUpdateGeometryCalls & UPDATE_GEOMETRY_CALL_OVERFLOW))
1063 {
1064 F64 time_since_app_start = ug_timer.getElapsedSeconds();
1065 if(sUpdateGeometryGlobalTime == 0.0
1066 || sUpdateGeometryLastProcessor != gVectorizeProcessor)
1067 {
1068 sUpdateGeometryGlobalTime = time_since_app_start;
1069 sUpdateGeometryElapsedTime = 0;
1070 sUpdateGeometryCalls = 0;
1071 sUpdateGeometryRunCount = 0;
1072 sUpdateGeometryLastProcessor = gVectorizeProcessor;
1073 sUpdateGeometryCallPointer = false;
1074 return;
1075 }
1076 F64 percent_time_in_function =
1077 ( sUpdateGeometryElapsedTime * 100.0 ) / ( time_since_app_start - sUpdateGeometryGlobalTime ) ;
1078 sUpdateGeometryGlobalTime = time_since_app_start;
1079 if (!sUpdateGeometryCallPointer)
1080 {
1081 // First set of run data is with vectorization off.
1082 sUpdateGeometryCallPointer = true;
1083 llinfos << "profile (avg of " << sUpdateGeometryCalls << " samples) = "
1084 << "vectorize off " << percent_time_in_function
1085 << "% of time with "
1086 << (sUpdateGeometryElapsedTime / (F64)sUpdateGeometryCalls)
1087 << " seconds per call "
1088 << llendl;
1089 sUpdateGeometryRunAvgOff[sUpdateGeometryRunCount] = percent_time_in_function;
1090 sUpdateGeometryElapsedTimeOff += sUpdateGeometryElapsedTime;
1091 sUpdateGeometryCalls = 0;
1092 }
1093 else
1094 {
1095 // Second set of run data is with vectorization on.
1096 sUpdateGeometryCallPointer = false;
1097 llinfos << "profile (avg of " << sUpdateGeometryCalls << " samples) = "
1098 << "VEC on " << percent_time_in_function
1099 << "% of time with "
1100 << (sUpdateGeometryElapsedTime / (F64)sUpdateGeometryCalls)
1101 << " seconds per call "
1102 << llendl;
1103 sUpdateGeometryRunAvgOn[sUpdateGeometryRunCount] = percent_time_in_function ;
1104 sUpdateGeometryElapsedTimeOn += sUpdateGeometryElapsedTime;
1105
1106 sUpdateGeometryCalls = 0;
1107 sUpdateGeometryRunCount++;
1108 F64 a = 0.0, b = 0.0;
1109 for(U32 i = 0; i<sUpdateGeometryRunCount; i++)
1110 {
1111 a += sUpdateGeometryRunAvgOff[i];
1112 b += sUpdateGeometryRunAvgOn[i];
1113 }
1114 a /= sUpdateGeometryRunCount;
1115 b /= sUpdateGeometryRunCount;
1116 F64 perf_boost = ( sUpdateGeometryElapsedTimeOff - sUpdateGeometryElapsedTimeOn ) / sUpdateGeometryElapsedTimeOn;
1117 llinfos << "run averages (" << (F64)sUpdateGeometryRunCount
1118 << "/10) vectorize off " << a
1119 << "% : vectorize type " << gVectorizeProcessor
1120 << " " << b
1121 << "% : performance boost "
1122 << perf_boost * 100.0
1123 << "%"
1124 << llendl ;
1125 if(sUpdateGeometryRunCount == 10)
1126 {
1127 // In case user runs test again, force reset of data on
1128 // next run.
1129 sUpdateGeometryGlobalTime = 0.0;
1130
1131 // We have data now on which version is faster. Switch to that
1132 // code and save the data for next run.
1133 gVectorizePerfTest = FALSE;
1134 gSavedSettings.setBOOL("VectorizePerfTest", FALSE);
1135
1136 if (perf_boost > 0.0)
1137 {
1138 llinfos << "Vectorization improves avatar skinning performance, "
1139 << "keeping on for future runs."
1140 << llendl;
1141 gSavedSettings.setBOOL("VectorizeSkin", TRUE);
1142 }
1143 else
1144 {
1145 // SIMD decreases performance, fall back to original code
1146 llinfos << "Vectorization decreases avatar skinning performance, "
1147 << "switching back to original code."
1148 << llendl;
1149
1150 gSavedSettings.setBOOL("VectorizeSkin", FALSE);
1151 }
1152 }
1153 }
1154 sUpdateGeometryElapsedTime = 0.0f;
1155 }
1156 }
1157}
1158
1011void LLViewerJointMesh::dump() 1159void LLViewerJointMesh::dump()
1012{ 1160{
1013 if (mValid) 1161 if (mValid)