aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/linden/indra/newview/llviewerjointmesh.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'linden/indra/newview/llviewerjointmesh.cpp')
-rw-r--r--linden/indra/newview/llviewerjointmesh.cpp215
1 files changed, 181 insertions, 34 deletions
diff --git a/linden/indra/newview/llviewerjointmesh.cpp b/linden/indra/newview/llviewerjointmesh.cpp
index c76990c..09fd015 100644
--- a/linden/indra/newview/llviewerjointmesh.cpp
+++ b/linden/indra/newview/llviewerjointmesh.cpp
@@ -31,14 +31,11 @@
31//----------------------------------------------------------------------------- 31//-----------------------------------------------------------------------------
32#include "llviewerprecompiledheaders.h" 32#include "llviewerprecompiledheaders.h"
33 33
34#if LL_WINDOWS // For Intel vector classes
35 #include "fvec.h"
36#endif
37
38#include "imageids.h" 34#include "imageids.h"
39#include "llfasttimer.h" 35#include "llfasttimer.h"
40 36
41#include "llagent.h" 37#include "llagent.h"
38#include "llapr.h"
42#include "llbox.h" 39#include "llbox.h"
43#include "lldrawable.h" 40#include "lldrawable.h"
44#include "lldrawpoolavatar.h" 41#include "lldrawpoolavatar.h"
@@ -49,14 +46,19 @@
49#include "llglheaders.h" 46#include "llglheaders.h"
50#include "lltexlayer.h" 47#include "lltexlayer.h"
51#include "llviewercamera.h" 48#include "llviewercamera.h"
49#include "llviewercontrol.h"
52#include "llviewerimagelist.h" 50#include "llviewerimagelist.h"
53#include "llviewerjointmesh.h" 51#include "llviewerjointmesh.h"
54#include "llvoavatar.h" 52#include "llvoavatar.h"
55#include "llsky.h" 53#include "llsky.h"
56#include "pipeline.h" 54#include "pipeline.h"
57#include "llglslshader.h" 55#include "llglslshader.h"
56#include "llmath.h"
57#include "v4math.h"
58#include "m3math.h"
59#include "m4math.h"
58 60
59#if !LL_DARWIN && !LL_LINUX 61#if !LL_DARWIN && !LL_LINUX && !LL_SOLARIS
60extern PFNGLWEIGHTPOINTERARBPROC glWeightPointerARB; 62extern PFNGLWEIGHTPOINTERARBPROC glWeightPointerARB;
61extern PFNGLWEIGHTFVARBPROC glWeightfvARB; 63extern PFNGLWEIGHTFVARBPROC glWeightfvARB;
62extern PFNGLVERTEXBLENDARBPROC glVertexBlendARB; 64extern PFNGLVERTEXBLENDARBPROC glVertexBlendARB;
@@ -68,6 +70,7 @@ static const U32 sRenderMask = LLVertexBuffer::MAP_VERTEX |
68 LLVertexBuffer::MAP_NORMAL | 70 LLVertexBuffer::MAP_NORMAL |
69 LLVertexBuffer::MAP_TEXCOORD; 71 LLVertexBuffer::MAP_TEXCOORD;
70 72
73
71//----------------------------------------------------------------------------- 74//-----------------------------------------------------------------------------
72//----------------------------------------------------------------------------- 75//-----------------------------------------------------------------------------
73// LLViewerJointMesh::LLSkinJoint 76// LLViewerJointMesh::LLSkinJoint
@@ -120,6 +123,7 @@ BOOL LLSkinJoint::setupSkinJoint( LLViewerJoint *joint)
120 return TRUE; 123 return TRUE;
121} 124}
122 125
126
123//----------------------------------------------------------------------------- 127//-----------------------------------------------------------------------------
124//----------------------------------------------------------------------------- 128//-----------------------------------------------------------------------------
125// LLViewerJointMesh 129// LLViewerJointMesh
@@ -414,9 +418,9 @@ const S32 NUM_AXES = 3;
414// rotation Z 0-n 418// rotation Z 0-n
415// pivot parent 0-n -- child = n+1 419// pivot parent 0-n -- child = n+1
416 420
417static LLMatrix4 gJointMat[32]; 421static LLMatrix4 gJointMatUnaligned[32];
418static LLMatrix3 gJointRot[32]; 422static LLMatrix3 gJointRotUnaligned[32];
419static LLVector4 gJointPivot[32]; 423static LLVector4 gJointPivot[32];
420 424
421//----------------------------------------------------------------------------- 425//-----------------------------------------------------------------------------
422// uploadJointMatrices() 426// uploadJointMatrices()
@@ -437,8 +441,8 @@ void LLViewerJointMesh::uploadJointMatrices()
437 { 441 {
438 joint_mat *= LLDrawPoolAvatar::getModelView(); 442 joint_mat *= LLDrawPoolAvatar::getModelView();
439 } 443 }
440 gJointMat[joint_num] = joint_mat; 444 gJointMatUnaligned[joint_num] = joint_mat;
441 gJointRot[joint_num] = joint_mat.getMat3(); 445 gJointRotUnaligned[joint_num] = joint_mat.getMat3();
442 } 446 }
443 447
444 BOOL last_pivot_uploaded = FALSE; 448 BOOL last_pivot_uploaded = FALSE;
@@ -475,8 +479,8 @@ void LLViewerJointMesh::uploadJointMatrices()
475 { 479 {
476 LLVector3 pivot; 480 LLVector3 pivot;
477 pivot = LLVector3(gJointPivot[i]); 481 pivot = LLVector3(gJointPivot[i]);
478 pivot = pivot * gJointRot[i]; 482 pivot = pivot * gJointRotUnaligned[i];
479 gJointMat[i].translate(pivot); 483 gJointMatUnaligned[i].translate(pivot);
480 } 484 }
481 485
482 // upload matrices 486 // upload matrices
@@ -487,11 +491,11 @@ void LLViewerJointMesh::uploadJointMatrices()
487 491
488 for (joint_num = 0; joint_num < reference_mesh->mJointRenderData.count(); joint_num++) 492 for (joint_num = 0; joint_num < reference_mesh->mJointRenderData.count(); joint_num++)
489 { 493 {
490 gJointMat[joint_num].transpose(); 494 gJointMatUnaligned[joint_num].transpose();
491 495
492 for (S32 axis = 0; axis < NUM_AXES; axis++) 496 for (S32 axis = 0; axis < NUM_AXES; axis++)
493 { 497 {
494 F32* vector = gJointMat[joint_num].mMatrix[axis]; 498 F32* vector = gJointMatUnaligned[joint_num].mMatrix[axis];
495 //glProgramLocalParameter4fvARB(GL_VERTEX_PROGRAM_ARB, LL_CHARACTER_MAX_JOINTS_PER_MESH * axis + joint_num+5, (GLfloat*)vector); 499 //glProgramLocalParameter4fvARB(GL_VERTEX_PROGRAM_ARB, LL_CHARACTER_MAX_JOINTS_PER_MESH * axis + joint_num+5, (GLfloat*)vector);
496 U32 offset = LL_CHARACTER_MAX_JOINTS_PER_MESH*axis+joint_num; 500 U32 offset = LL_CHARACTER_MAX_JOINTS_PER_MESH*axis+joint_num;
497 memcpy(mat+offset*4, vector, sizeof(GLfloat)*4); 501 memcpy(mat+offset*4, vector, sizeof(GLfloat)*4);
@@ -903,21 +907,9 @@ BOOL LLViewerJointMesh::updateLOD(F32 pixel_area, BOOL activate)
903 return (valid != activate); 907 return (valid != activate);
904} 908}
905 909
906 910// static
907void LLViewerJointMesh::updateGeometry() 911void LLViewerJointMesh::updateGeometryOriginal(LLFace *mFace, LLPolyMesh *mMesh)
908{ 912{
909 if (!(mValid
910 && mMesh
911 && mFace
912 && mMesh->hasWeights()
913 && mFace->mVertexBuffer.notNull()
914 && LLShaderMgr::getVertexShaderLevel(LLShaderMgr::SHADER_AVATAR) == 0))
915 {
916 return;
917 }
918
919 uploadJointMatrices();
920
921 LLStrider<LLVector3> o_vertices; 913 LLStrider<LLVector3> o_vertices;
922 LLStrider<LLVector3> o_normals; 914 LLStrider<LLVector3> o_normals;
923 915
@@ -958,9 +950,9 @@ void LLViewerJointMesh::updateGeometry()
958 // No lerp required in this case. 950 // No lerp required in this case.
959 if (w == 1.0f) 951 if (w == 1.0f)
960 { 952 {
961 gBlendMat = gJointMat[joint+1]; 953 gBlendMat = gJointMatUnaligned[joint+1];
962 o_vertices[bidx] = coords[index] * gBlendMat; 954 o_vertices[bidx] = coords[index] * gBlendMat;
963 gBlendRotMat = gJointRot[joint+1]; 955 gBlendRotMat = gJointRotUnaligned[joint+1];
964 o_normals[bidx] = normals[index] * gBlendRotMat; 956 o_normals[bidx] = normals[index] * gBlendRotMat;
965 continue; 957 continue;
966 } 958 }
@@ -968,8 +960,8 @@ void LLViewerJointMesh::updateGeometry()
968 // Try to keep all the accesses to the matrix data as close 960 // Try to keep all the accesses to the matrix data as close
969 // together as possible. This function is a hot spot on the 961 // together as possible. This function is a hot spot on the
970 // Mac. JC 962 // Mac. JC
971 LLMatrix4 &m0 = gJointMat[joint+1]; 963 LLMatrix4 &m0 = gJointMatUnaligned[joint+1];
972 LLMatrix4 &m1 = gJointMat[joint+0]; 964 LLMatrix4 &m1 = gJointMatUnaligned[joint+0];
973 965
974 gBlendMat.mMatrix[VX][VX] = lerp(m1.mMatrix[VX][VX], m0.mMatrix[VX][VX], w); 966 gBlendMat.mMatrix[VX][VX] = lerp(m1.mMatrix[VX][VX], m0.mMatrix[VX][VX], w);
975 gBlendMat.mMatrix[VX][VY] = lerp(m1.mMatrix[VX][VY], m0.mMatrix[VX][VY], w); 967 gBlendMat.mMatrix[VX][VY] = lerp(m1.mMatrix[VX][VY], m0.mMatrix[VX][VY], w);
@@ -989,8 +981,8 @@ void LLViewerJointMesh::updateGeometry()
989 981
990 o_vertices[bidx] = coords[index] * gBlendMat; 982 o_vertices[bidx] = coords[index] * gBlendMat;
991 983
992 LLMatrix3 &n0 = gJointRot[joint+1]; 984 LLMatrix3 &n0 = gJointRotUnaligned[joint+1];
993 LLMatrix3 &n1 = gJointRot[joint+0]; 985 LLMatrix3 &n1 = gJointRotUnaligned[joint+0];
994 986
995 gBlendRotMat.mMatrix[VX][VX] = lerp(n1.mMatrix[VX][VX], n0.mMatrix[VX][VX], w); 987 gBlendRotMat.mMatrix[VX][VX] = lerp(n1.mMatrix[VX][VX], n0.mMatrix[VX][VX], w);
996 gBlendRotMat.mMatrix[VX][VY] = lerp(n1.mMatrix[VX][VY], n0.mMatrix[VX][VY], w); 988 gBlendRotMat.mMatrix[VX][VY] = lerp(n1.mMatrix[VX][VY], n0.mMatrix[VX][VY], w);
@@ -1008,6 +1000,161 @@ void LLViewerJointMesh::updateGeometry()
1008 } 1000 }
1009} 1001}
1010 1002
1003const U32 UPDATE_GEOMETRY_CALL_MASK = 0x1FFF; // 8K samples before overflow
1004const U32 UPDATE_GEOMETRY_CALL_OVERFLOW = ~UPDATE_GEOMETRY_CALL_MASK;
1005static bool sUpdateGeometryCallPointer = false;
1006static F64 sUpdateGeometryGlobalTime = 0.0 ;
1007static F64 sUpdateGeometryElapsedTime = 0.0 ;
1008static F64 sUpdateGeometryElapsedTimeOff = 0.0 ;
1009static F64 sUpdateGeometryElapsedTimeOn = 0.0 ;
1010static F64 sUpdateGeometryRunAvgOff[10];
1011static F64 sUpdateGeometryRunAvgOn[10];
1012static U32 sUpdateGeometryRunCount = 0 ;
1013static U32 sUpdateGeometryCalls = 0 ;
1014static U32 sUpdateGeometryLastProcessor = 0 ;
1015void (*LLViewerJointMesh::sUpdateGeometryFunc)(LLFace* face, LLPolyMesh* mesh);
1016
1017void LLViewerJointMesh::updateGeometry()
1018{
1019 extern BOOL gVectorizePerfTest;
1020 extern U32 gVectorizeProcessor;
1021
1022 if (!(mValid
1023 && mMesh
1024 && mFace
1025 && mMesh->hasWeights()
1026 && mFace->mVertexBuffer.notNull()
1027 && LLShaderMgr::getVertexShaderLevel(LLShaderMgr::SHADER_AVATAR) == 0))
1028 {
1029 return;
1030 }
1031
1032 if (!gVectorizePerfTest)
1033 {
1034 // Once we've measured performance, just run the specified
1035 // code version.
1036 if(sUpdateGeometryFunc == updateGeometryOriginal)
1037 uploadJointMatrices();
1038 sUpdateGeometryFunc(mFace, mMesh);
1039 }
1040 else
1041 {
1042 // At startup, measure the amount of time in skinning and choose
1043 // the fastest one.
1044 LLTimer ug_timer ;
1045
1046 if (sUpdateGeometryCallPointer)
1047 {
1048 if(sUpdateGeometryFunc == updateGeometryOriginal)
1049 uploadJointMatrices();
1050 // call accelerated version for this processor
1051 sUpdateGeometryFunc(mFace, mMesh);
1052 }
1053 else
1054 {
1055 uploadJointMatrices();
1056 updateGeometryOriginal(mFace, mMesh);
1057 }
1058
1059 sUpdateGeometryElapsedTime += ug_timer.getElapsedTimeF64();
1060 ++sUpdateGeometryCalls;
1061 if(0 != (sUpdateGeometryCalls & UPDATE_GEOMETRY_CALL_OVERFLOW))
1062 {
1063 F64 time_since_app_start = ug_timer.getElapsedSeconds();
1064 if(sUpdateGeometryGlobalTime == 0.0
1065 || sUpdateGeometryLastProcessor != gVectorizeProcessor)
1066 {
1067 sUpdateGeometryGlobalTime = time_since_app_start;
1068 sUpdateGeometryElapsedTime = 0;
1069 sUpdateGeometryCalls = 0;
1070 sUpdateGeometryRunCount = 0;
1071 sUpdateGeometryLastProcessor = gVectorizeProcessor;
1072 sUpdateGeometryCallPointer = false;
1073 return;
1074 }
1075 F64 percent_time_in_function =
1076 ( sUpdateGeometryElapsedTime * 100.0 ) / ( time_since_app_start - sUpdateGeometryGlobalTime ) ;
1077 sUpdateGeometryGlobalTime = time_since_app_start;
1078 if (!sUpdateGeometryCallPointer)
1079 {
1080 // First set of run data is with vectorization off.
1081 sUpdateGeometryCallPointer = true;
1082 llinfos << "profile (avg of " << sUpdateGeometryCalls << " samples) = "
1083 << "vectorize off " << percent_time_in_function
1084 << "% of time with "
1085 << (sUpdateGeometryElapsedTime / (F64)sUpdateGeometryCalls)
1086 << " seconds per call "
1087 << llendl;
1088 sUpdateGeometryRunAvgOff[sUpdateGeometryRunCount] = percent_time_in_function;
1089 sUpdateGeometryElapsedTimeOff += sUpdateGeometryElapsedTime;
1090 sUpdateGeometryCalls = 0;
1091 }
1092 else
1093 {
1094 // Second set of run data is with vectorization on.
1095 sUpdateGeometryCallPointer = false;
1096 llinfos << "profile (avg of " << sUpdateGeometryCalls << " samples) = "
1097 << "VEC on " << percent_time_in_function
1098 << "% of time with "
1099 << (sUpdateGeometryElapsedTime / (F64)sUpdateGeometryCalls)
1100 << " seconds per call "
1101 << llendl;
1102 sUpdateGeometryRunAvgOn[sUpdateGeometryRunCount] = percent_time_in_function ;
1103 sUpdateGeometryElapsedTimeOn += sUpdateGeometryElapsedTime;
1104
1105 sUpdateGeometryCalls = 0;
1106 sUpdateGeometryRunCount++;
1107 F64 a = 0.0, b = 0.0;
1108 for(U32 i = 0; i<sUpdateGeometryRunCount; i++)
1109 {
1110 a += sUpdateGeometryRunAvgOff[i];
1111 b += sUpdateGeometryRunAvgOn[i];
1112 }
1113 a /= sUpdateGeometryRunCount;
1114 b /= sUpdateGeometryRunCount;
1115 F64 perf_boost = ( sUpdateGeometryElapsedTimeOff - sUpdateGeometryElapsedTimeOn ) / sUpdateGeometryElapsedTimeOn;
1116 llinfos << "run averages (" << (F64)sUpdateGeometryRunCount
1117 << "/10) vectorize off " << a
1118 << "% : vectorize type " << gVectorizeProcessor
1119 << " " << b
1120 << "% : performance boost "
1121 << perf_boost * 100.0
1122 << "%"
1123 << llendl ;
1124 if(sUpdateGeometryRunCount == 10)
1125 {
1126 // In case user runs test again, force reset of data on
1127 // next run.
1128 sUpdateGeometryGlobalTime = 0.0;
1129
1130 // We have data now on which version is faster. Switch to that
1131 // code and save the data for next run.
1132 gVectorizePerfTest = FALSE;
1133 gSavedSettings.setBOOL("VectorizePerfTest", FALSE);
1134
1135 if (perf_boost > 0.0)
1136 {
1137 llinfos << "Vectorization improves avatar skinning performance, "
1138 << "keeping on for future runs."
1139 << llendl;
1140 gSavedSettings.setBOOL("VectorizeSkin", TRUE);
1141 }
1142 else
1143 {
1144 // SIMD decreases performance, fall back to original code
1145 llinfos << "Vectorization decreases avatar skinning performance, "
1146 << "switching back to original code."
1147 << llendl;
1148
1149 gSavedSettings.setBOOL("VectorizeSkin", FALSE);
1150 }
1151 }
1152 }
1153 sUpdateGeometryElapsedTime = 0.0f;
1154 }
1155 }
1156}
1157
1011void LLViewerJointMesh::dump() 1158void LLViewerJointMesh::dump()
1012{ 1159{
1013 if (mValid) 1160 if (mValid)