diff options
Diffstat (limited to 'linden/indra/newview/llviewerjointmesh.cpp')
-rw-r--r-- | linden/indra/newview/llviewerjointmesh.cpp | 214 |
1 files changed, 181 insertions, 33 deletions
diff --git a/linden/indra/newview/llviewerjointmesh.cpp b/linden/indra/newview/llviewerjointmesh.cpp index c76990c..642fa7b 100644 --- a/linden/indra/newview/llviewerjointmesh.cpp +++ b/linden/indra/newview/llviewerjointmesh.cpp | |||
@@ -31,14 +31,11 @@ | |||
31 | //----------------------------------------------------------------------------- | 31 | //----------------------------------------------------------------------------- |
32 | #include "llviewerprecompiledheaders.h" | 32 | #include "llviewerprecompiledheaders.h" |
33 | 33 | ||
34 | #if LL_WINDOWS // For Intel vector classes | ||
35 | #include "fvec.h" | ||
36 | #endif | ||
37 | |||
38 | #include "imageids.h" | 34 | #include "imageids.h" |
39 | #include "llfasttimer.h" | 35 | #include "llfasttimer.h" |
40 | 36 | ||
41 | #include "llagent.h" | 37 | #include "llagent.h" |
38 | #include "llapr.h" | ||
42 | #include "llbox.h" | 39 | #include "llbox.h" |
43 | #include "lldrawable.h" | 40 | #include "lldrawable.h" |
44 | #include "lldrawpoolavatar.h" | 41 | #include "lldrawpoolavatar.h" |
@@ -49,12 +46,18 @@ | |||
49 | #include "llglheaders.h" | 46 | #include "llglheaders.h" |
50 | #include "lltexlayer.h" | 47 | #include "lltexlayer.h" |
51 | #include "llviewercamera.h" | 48 | #include "llviewercamera.h" |
49 | #include "llviewercontrol.h" | ||
52 | #include "llviewerimagelist.h" | 50 | #include "llviewerimagelist.h" |
53 | #include "llviewerjointmesh.h" | 51 | #include "llviewerjointmesh.h" |
54 | #include "llvoavatar.h" | 52 | #include "llvoavatar.h" |
55 | #include "llsky.h" | 53 | #include "llsky.h" |
56 | #include "pipeline.h" | 54 | #include "pipeline.h" |
57 | #include "llglslshader.h" | 55 | #include "llglslshader.h" |
56 | #include "llmath.h" | ||
57 | #include "v4math.h" | ||
58 | #include "m3math.h" | ||
59 | #include "m4math.h" | ||
60 | |||
58 | 61 | ||
59 | #if !LL_DARWIN && !LL_LINUX | 62 | #if !LL_DARWIN && !LL_LINUX |
60 | extern PFNGLWEIGHTPOINTERARBPROC glWeightPointerARB; | 63 | extern PFNGLWEIGHTPOINTERARBPROC glWeightPointerARB; |
@@ -68,6 +71,7 @@ static const U32 sRenderMask = LLVertexBuffer::MAP_VERTEX | | |||
68 | LLVertexBuffer::MAP_NORMAL | | 71 | LLVertexBuffer::MAP_NORMAL | |
69 | LLVertexBuffer::MAP_TEXCOORD; | 72 | LLVertexBuffer::MAP_TEXCOORD; |
70 | 73 | ||
74 | |||
71 | //----------------------------------------------------------------------------- | 75 | //----------------------------------------------------------------------------- |
72 | //----------------------------------------------------------------------------- | 76 | //----------------------------------------------------------------------------- |
73 | // LLViewerJointMesh::LLSkinJoint | 77 | // LLViewerJointMesh::LLSkinJoint |
@@ -120,6 +124,7 @@ BOOL LLSkinJoint::setupSkinJoint( LLViewerJoint *joint) | |||
120 | return TRUE; | 124 | return TRUE; |
121 | } | 125 | } |
122 | 126 | ||
127 | |||
123 | //----------------------------------------------------------------------------- | 128 | //----------------------------------------------------------------------------- |
124 | //----------------------------------------------------------------------------- | 129 | //----------------------------------------------------------------------------- |
125 | // LLViewerJointMesh | 130 | // LLViewerJointMesh |
@@ -414,9 +419,9 @@ const S32 NUM_AXES = 3; | |||
414 | // rotation Z 0-n | 419 | // rotation Z 0-n |
415 | // pivot parent 0-n -- child = n+1 | 420 | // pivot parent 0-n -- child = n+1 |
416 | 421 | ||
417 | static LLMatrix4 gJointMat[32]; | 422 | static LLMatrix4 gJointMatUnaligned[32]; |
418 | static LLMatrix3 gJointRot[32]; | 423 | static LLMatrix3 gJointRotUnaligned[32]; |
419 | static LLVector4 gJointPivot[32]; | 424 | static LLVector4 gJointPivot[32]; |
420 | 425 | ||
421 | //----------------------------------------------------------------------------- | 426 | //----------------------------------------------------------------------------- |
422 | // uploadJointMatrices() | 427 | // uploadJointMatrices() |
@@ -437,8 +442,8 @@ void LLViewerJointMesh::uploadJointMatrices() | |||
437 | { | 442 | { |
438 | joint_mat *= LLDrawPoolAvatar::getModelView(); | 443 | joint_mat *= LLDrawPoolAvatar::getModelView(); |
439 | } | 444 | } |
440 | gJointMat[joint_num] = joint_mat; | 445 | gJointMatUnaligned[joint_num] = joint_mat; |
441 | gJointRot[joint_num] = joint_mat.getMat3(); | 446 | gJointRotUnaligned[joint_num] = joint_mat.getMat3(); |
442 | } | 447 | } |
443 | 448 | ||
444 | BOOL last_pivot_uploaded = FALSE; | 449 | BOOL last_pivot_uploaded = FALSE; |
@@ -475,8 +480,8 @@ void LLViewerJointMesh::uploadJointMatrices() | |||
475 | { | 480 | { |
476 | LLVector3 pivot; | 481 | LLVector3 pivot; |
477 | pivot = LLVector3(gJointPivot[i]); | 482 | pivot = LLVector3(gJointPivot[i]); |
478 | pivot = pivot * gJointRot[i]; | 483 | pivot = pivot * gJointRotUnaligned[i]; |
479 | gJointMat[i].translate(pivot); | 484 | gJointMatUnaligned[i].translate(pivot); |
480 | } | 485 | } |
481 | 486 | ||
482 | // upload matrices | 487 | // upload matrices |
@@ -487,11 +492,11 @@ void LLViewerJointMesh::uploadJointMatrices() | |||
487 | 492 | ||
488 | for (joint_num = 0; joint_num < reference_mesh->mJointRenderData.count(); joint_num++) | 493 | for (joint_num = 0; joint_num < reference_mesh->mJointRenderData.count(); joint_num++) |
489 | { | 494 | { |
490 | gJointMat[joint_num].transpose(); | 495 | gJointMatUnaligned[joint_num].transpose(); |
491 | 496 | ||
492 | for (S32 axis = 0; axis < NUM_AXES; axis++) | 497 | for (S32 axis = 0; axis < NUM_AXES; axis++) |
493 | { | 498 | { |
494 | F32* vector = gJointMat[joint_num].mMatrix[axis]; | 499 | F32* vector = gJointMatUnaligned[joint_num].mMatrix[axis]; |
495 | //glProgramLocalParameter4fvARB(GL_VERTEX_PROGRAM_ARB, LL_CHARACTER_MAX_JOINTS_PER_MESH * axis + joint_num+5, (GLfloat*)vector); | 500 | //glProgramLocalParameter4fvARB(GL_VERTEX_PROGRAM_ARB, LL_CHARACTER_MAX_JOINTS_PER_MESH * axis + joint_num+5, (GLfloat*)vector); |
496 | U32 offset = LL_CHARACTER_MAX_JOINTS_PER_MESH*axis+joint_num; | 501 | U32 offset = LL_CHARACTER_MAX_JOINTS_PER_MESH*axis+joint_num; |
497 | memcpy(mat+offset*4, vector, sizeof(GLfloat)*4); | 502 | memcpy(mat+offset*4, vector, sizeof(GLfloat)*4); |
@@ -903,21 +908,9 @@ BOOL LLViewerJointMesh::updateLOD(F32 pixel_area, BOOL activate) | |||
903 | return (valid != activate); | 908 | return (valid != activate); |
904 | } | 909 | } |
905 | 910 | ||
906 | 911 | // static | |
907 | void LLViewerJointMesh::updateGeometry() | 912 | void LLViewerJointMesh::updateGeometryOriginal(LLFace *mFace, LLPolyMesh *mMesh) |
908 | { | 913 | { |
909 | if (!(mValid | ||
910 | && mMesh | ||
911 | && mFace | ||
912 | && mMesh->hasWeights() | ||
913 | && mFace->mVertexBuffer.notNull() | ||
914 | && LLShaderMgr::getVertexShaderLevel(LLShaderMgr::SHADER_AVATAR) == 0)) | ||
915 | { | ||
916 | return; | ||
917 | } | ||
918 | |||
919 | uploadJointMatrices(); | ||
920 | |||
921 | LLStrider<LLVector3> o_vertices; | 914 | LLStrider<LLVector3> o_vertices; |
922 | LLStrider<LLVector3> o_normals; | 915 | LLStrider<LLVector3> o_normals; |
923 | 916 | ||
@@ -958,9 +951,9 @@ void LLViewerJointMesh::updateGeometry() | |||
958 | // No lerp required in this case. | 951 | // No lerp required in this case. |
959 | if (w == 1.0f) | 952 | if (w == 1.0f) |
960 | { | 953 | { |
961 | gBlendMat = gJointMat[joint+1]; | 954 | gBlendMat = gJointMatUnaligned[joint+1]; |
962 | o_vertices[bidx] = coords[index] * gBlendMat; | 955 | o_vertices[bidx] = coords[index] * gBlendMat; |
963 | gBlendRotMat = gJointRot[joint+1]; | 956 | gBlendRotMat = gJointRotUnaligned[joint+1]; |
964 | o_normals[bidx] = normals[index] * gBlendRotMat; | 957 | o_normals[bidx] = normals[index] * gBlendRotMat; |
965 | continue; | 958 | continue; |
966 | } | 959 | } |
@@ -968,8 +961,8 @@ void LLViewerJointMesh::updateGeometry() | |||
968 | // Try to keep all the accesses to the matrix data as close | 961 | // Try to keep all the accesses to the matrix data as close |
969 | // together as possible. This function is a hot spot on the | 962 | // together as possible. This function is a hot spot on the |
970 | // Mac. JC | 963 | // Mac. JC |
971 | LLMatrix4 &m0 = gJointMat[joint+1]; | 964 | LLMatrix4 &m0 = gJointMatUnaligned[joint+1]; |
972 | LLMatrix4 &m1 = gJointMat[joint+0]; | 965 | LLMatrix4 &m1 = gJointMatUnaligned[joint+0]; |
973 | 966 | ||
974 | gBlendMat.mMatrix[VX][VX] = lerp(m1.mMatrix[VX][VX], m0.mMatrix[VX][VX], w); | 967 | gBlendMat.mMatrix[VX][VX] = lerp(m1.mMatrix[VX][VX], m0.mMatrix[VX][VX], w); |
975 | gBlendMat.mMatrix[VX][VY] = lerp(m1.mMatrix[VX][VY], m0.mMatrix[VX][VY], w); | 968 | gBlendMat.mMatrix[VX][VY] = lerp(m1.mMatrix[VX][VY], m0.mMatrix[VX][VY], w); |
@@ -989,8 +982,8 @@ void LLViewerJointMesh::updateGeometry() | |||
989 | 982 | ||
990 | o_vertices[bidx] = coords[index] * gBlendMat; | 983 | o_vertices[bidx] = coords[index] * gBlendMat; |
991 | 984 | ||
992 | LLMatrix3 &n0 = gJointRot[joint+1]; | 985 | LLMatrix3 &n0 = gJointRotUnaligned[joint+1]; |
993 | LLMatrix3 &n1 = gJointRot[joint+0]; | 986 | LLMatrix3 &n1 = gJointRotUnaligned[joint+0]; |
994 | 987 | ||
995 | gBlendRotMat.mMatrix[VX][VX] = lerp(n1.mMatrix[VX][VX], n0.mMatrix[VX][VX], w); | 988 | gBlendRotMat.mMatrix[VX][VX] = lerp(n1.mMatrix[VX][VX], n0.mMatrix[VX][VX], w); |
996 | gBlendRotMat.mMatrix[VX][VY] = lerp(n1.mMatrix[VX][VY], n0.mMatrix[VX][VY], w); | 989 | gBlendRotMat.mMatrix[VX][VY] = lerp(n1.mMatrix[VX][VY], n0.mMatrix[VX][VY], w); |
@@ -1008,6 +1001,161 @@ void LLViewerJointMesh::updateGeometry() | |||
1008 | } | 1001 | } |
1009 | } | 1002 | } |
1010 | 1003 | ||
1004 | const U32 UPDATE_GEOMETRY_CALL_MASK = 0x1FFF; // 8K samples before overflow | ||
1005 | const U32 UPDATE_GEOMETRY_CALL_OVERFLOW = ~UPDATE_GEOMETRY_CALL_MASK; | ||
1006 | static bool sUpdateGeometryCallPointer = false; | ||
1007 | static F64 sUpdateGeometryGlobalTime = 0.0 ; | ||
1008 | static F64 sUpdateGeometryElapsedTime = 0.0 ; | ||
1009 | static F64 sUpdateGeometryElapsedTimeOff = 0.0 ; | ||
1010 | static F64 sUpdateGeometryElapsedTimeOn = 0.0 ; | ||
1011 | static F64 sUpdateGeometryRunAvgOff[10]; | ||
1012 | static F64 sUpdateGeometryRunAvgOn[10]; | ||
1013 | static U32 sUpdateGeometryRunCount = 0 ; | ||
1014 | static U32 sUpdateGeometryCalls = 0 ; | ||
1015 | static U32 sUpdateGeometryLastProcessor = 0 ; | ||
1016 | void (*LLViewerJointMesh::sUpdateGeometryFunc)(LLFace* face, LLPolyMesh* mesh); | ||
1017 | |||
1018 | void LLViewerJointMesh::updateGeometry() | ||
1019 | { | ||
1020 | extern BOOL gVectorizePerfTest; | ||
1021 | extern U32 gVectorizeProcessor; | ||
1022 | |||
1023 | if (!(mValid | ||
1024 | && mMesh | ||
1025 | && mFace | ||
1026 | && mMesh->hasWeights() | ||
1027 | && mFace->mVertexBuffer.notNull() | ||
1028 | && LLShaderMgr::getVertexShaderLevel(LLShaderMgr::SHADER_AVATAR) == 0)) | ||
1029 | { | ||
1030 | return; | ||
1031 | } | ||
1032 | |||
1033 | if (!gVectorizePerfTest) | ||
1034 | { | ||
1035 | // Once we've measured performance, just run the specified | ||
1036 | // code version. | ||
1037 | if(sUpdateGeometryFunc == updateGeometryOriginal) | ||
1038 | uploadJointMatrices(); | ||
1039 | sUpdateGeometryFunc(mFace, mMesh); | ||
1040 | } | ||
1041 | else | ||
1042 | { | ||
1043 | // At startup, measure the amount of time in skinning and choose | ||
1044 | // the fastest one. | ||
1045 | LLTimer ug_timer ; | ||
1046 | |||
1047 | if (sUpdateGeometryCallPointer) | ||
1048 | { | ||
1049 | if(sUpdateGeometryFunc == updateGeometryOriginal) | ||
1050 | uploadJointMatrices(); | ||
1051 | // call accelerated version for this processor | ||
1052 | sUpdateGeometryFunc(mFace, mMesh); | ||
1053 | } | ||
1054 | else | ||
1055 | { | ||
1056 | uploadJointMatrices(); | ||
1057 | updateGeometryOriginal(mFace, mMesh); | ||
1058 | } | ||
1059 | |||
1060 | sUpdateGeometryElapsedTime += ug_timer.getElapsedTimeF64(); | ||
1061 | ++sUpdateGeometryCalls; | ||
1062 | if(0 != (sUpdateGeometryCalls & UPDATE_GEOMETRY_CALL_OVERFLOW)) | ||
1063 | { | ||
1064 | F64 time_since_app_start = ug_timer.getElapsedSeconds(); | ||
1065 | if(sUpdateGeometryGlobalTime == 0.0 | ||
1066 | || sUpdateGeometryLastProcessor != gVectorizeProcessor) | ||
1067 | { | ||
1068 | sUpdateGeometryGlobalTime = time_since_app_start; | ||
1069 | sUpdateGeometryElapsedTime = 0; | ||
1070 | sUpdateGeometryCalls = 0; | ||
1071 | sUpdateGeometryRunCount = 0; | ||
1072 | sUpdateGeometryLastProcessor = gVectorizeProcessor; | ||
1073 | sUpdateGeometryCallPointer = false; | ||
1074 | return; | ||
1075 | } | ||
1076 | F64 percent_time_in_function = | ||
1077 | ( sUpdateGeometryElapsedTime * 100.0 ) / ( time_since_app_start - sUpdateGeometryGlobalTime ) ; | ||
1078 | sUpdateGeometryGlobalTime = time_since_app_start; | ||
1079 | if (!sUpdateGeometryCallPointer) | ||
1080 | { | ||
1081 | // First set of run data is with vectorization off. | ||
1082 | sUpdateGeometryCallPointer = true; | ||
1083 | llinfos << "profile (avg of " << sUpdateGeometryCalls << " samples) = " | ||
1084 | << "vectorize off " << percent_time_in_function | ||
1085 | << "% of time with " | ||
1086 | << (sUpdateGeometryElapsedTime / (F64)sUpdateGeometryCalls) | ||
1087 | << " seconds per call " | ||
1088 | << llendl; | ||
1089 | sUpdateGeometryRunAvgOff[sUpdateGeometryRunCount] = percent_time_in_function; | ||
1090 | sUpdateGeometryElapsedTimeOff += sUpdateGeometryElapsedTime; | ||
1091 | sUpdateGeometryCalls = 0; | ||
1092 | } | ||
1093 | else | ||
1094 | { | ||
1095 | // Second set of run data is with vectorization on. | ||
1096 | sUpdateGeometryCallPointer = false; | ||
1097 | llinfos << "profile (avg of " << sUpdateGeometryCalls << " samples) = " | ||
1098 | << "VEC on " << percent_time_in_function | ||
1099 | << "% of time with " | ||
1100 | << (sUpdateGeometryElapsedTime / (F64)sUpdateGeometryCalls) | ||
1101 | << " seconds per call " | ||
1102 | << llendl; | ||
1103 | sUpdateGeometryRunAvgOn[sUpdateGeometryRunCount] = percent_time_in_function ; | ||
1104 | sUpdateGeometryElapsedTimeOn += sUpdateGeometryElapsedTime; | ||
1105 | |||
1106 | sUpdateGeometryCalls = 0; | ||
1107 | sUpdateGeometryRunCount++; | ||
1108 | F64 a = 0.0, b = 0.0; | ||
1109 | for(U32 i = 0; i<sUpdateGeometryRunCount; i++) | ||
1110 | { | ||
1111 | a += sUpdateGeometryRunAvgOff[i]; | ||
1112 | b += sUpdateGeometryRunAvgOn[i]; | ||
1113 | } | ||
1114 | a /= sUpdateGeometryRunCount; | ||
1115 | b /= sUpdateGeometryRunCount; | ||
1116 | F64 perf_boost = ( sUpdateGeometryElapsedTimeOff - sUpdateGeometryElapsedTimeOn ) / sUpdateGeometryElapsedTimeOn; | ||
1117 | llinfos << "run averages (" << (F64)sUpdateGeometryRunCount | ||
1118 | << "/10) vectorize off " << a | ||
1119 | << "% : vectorize type " << gVectorizeProcessor | ||
1120 | << " " << b | ||
1121 | << "% : performance boost " | ||
1122 | << perf_boost * 100.0 | ||
1123 | << "%" | ||
1124 | << llendl ; | ||
1125 | if(sUpdateGeometryRunCount == 10) | ||
1126 | { | ||
1127 | // In case user runs test again, force reset of data on | ||
1128 | // next run. | ||
1129 | sUpdateGeometryGlobalTime = 0.0; | ||
1130 | |||
1131 | // We have data now on which version is faster. Switch to that | ||
1132 | // code and save the data for next run. | ||
1133 | gVectorizePerfTest = FALSE; | ||
1134 | gSavedSettings.setBOOL("VectorizePerfTest", FALSE); | ||
1135 | |||
1136 | if (perf_boost > 0.0) | ||
1137 | { | ||
1138 | llinfos << "Vectorization improves avatar skinning performance, " | ||
1139 | << "keeping on for future runs." | ||
1140 | << llendl; | ||
1141 | gSavedSettings.setBOOL("VectorizeSkin", TRUE); | ||
1142 | } | ||
1143 | else | ||
1144 | { | ||
1145 | // SIMD decreases performance, fall back to original code | ||
1146 | llinfos << "Vectorization decreases avatar skinning performance, " | ||
1147 | << "switching back to original code." | ||
1148 | << llendl; | ||
1149 | |||
1150 | gSavedSettings.setBOOL("VectorizeSkin", FALSE); | ||
1151 | } | ||
1152 | } | ||
1153 | } | ||
1154 | sUpdateGeometryElapsedTime = 0.0f; | ||
1155 | } | ||
1156 | } | ||
1157 | } | ||
1158 | |||
1011 | void LLViewerJointMesh::dump() | 1159 | void LLViewerJointMesh::dump() |
1012 | { | 1160 | { |
1013 | if (mValid) | 1161 | if (mValid) |