diff options
Diffstat (limited to 'linden/indra/newview/llviewerjointmesh.cpp')
-rw-r--r-- | linden/indra/newview/llviewerjointmesh.cpp | 215 |
1 files changed, 181 insertions, 34 deletions
diff --git a/linden/indra/newview/llviewerjointmesh.cpp b/linden/indra/newview/llviewerjointmesh.cpp index c76990c..09fd015 100644 --- a/linden/indra/newview/llviewerjointmesh.cpp +++ b/linden/indra/newview/llviewerjointmesh.cpp | |||
@@ -31,14 +31,11 @@ | |||
31 | //----------------------------------------------------------------------------- | 31 | //----------------------------------------------------------------------------- |
32 | #include "llviewerprecompiledheaders.h" | 32 | #include "llviewerprecompiledheaders.h" |
33 | 33 | ||
34 | #if LL_WINDOWS // For Intel vector classes | ||
35 | #include "fvec.h" | ||
36 | #endif | ||
37 | |||
38 | #include "imageids.h" | 34 | #include "imageids.h" |
39 | #include "llfasttimer.h" | 35 | #include "llfasttimer.h" |
40 | 36 | ||
41 | #include "llagent.h" | 37 | #include "llagent.h" |
38 | #include "llapr.h" | ||
42 | #include "llbox.h" | 39 | #include "llbox.h" |
43 | #include "lldrawable.h" | 40 | #include "lldrawable.h" |
44 | #include "lldrawpoolavatar.h" | 41 | #include "lldrawpoolavatar.h" |
@@ -49,14 +46,19 @@ | |||
49 | #include "llglheaders.h" | 46 | #include "llglheaders.h" |
50 | #include "lltexlayer.h" | 47 | #include "lltexlayer.h" |
51 | #include "llviewercamera.h" | 48 | #include "llviewercamera.h" |
49 | #include "llviewercontrol.h" | ||
52 | #include "llviewerimagelist.h" | 50 | #include "llviewerimagelist.h" |
53 | #include "llviewerjointmesh.h" | 51 | #include "llviewerjointmesh.h" |
54 | #include "llvoavatar.h" | 52 | #include "llvoavatar.h" |
55 | #include "llsky.h" | 53 | #include "llsky.h" |
56 | #include "pipeline.h" | 54 | #include "pipeline.h" |
57 | #include "llglslshader.h" | 55 | #include "llglslshader.h" |
56 | #include "llmath.h" | ||
57 | #include "v4math.h" | ||
58 | #include "m3math.h" | ||
59 | #include "m4math.h" | ||
58 | 60 | ||
59 | #if !LL_DARWIN && !LL_LINUX | 61 | #if !LL_DARWIN && !LL_LINUX && !LL_SOLARIS |
60 | extern PFNGLWEIGHTPOINTERARBPROC glWeightPointerARB; | 62 | extern PFNGLWEIGHTPOINTERARBPROC glWeightPointerARB; |
61 | extern PFNGLWEIGHTFVARBPROC glWeightfvARB; | 63 | extern PFNGLWEIGHTFVARBPROC glWeightfvARB; |
62 | extern PFNGLVERTEXBLENDARBPROC glVertexBlendARB; | 64 | extern PFNGLVERTEXBLENDARBPROC glVertexBlendARB; |
@@ -68,6 +70,7 @@ static const U32 sRenderMask = LLVertexBuffer::MAP_VERTEX | | |||
68 | LLVertexBuffer::MAP_NORMAL | | 70 | LLVertexBuffer::MAP_NORMAL | |
69 | LLVertexBuffer::MAP_TEXCOORD; | 71 | LLVertexBuffer::MAP_TEXCOORD; |
70 | 72 | ||
73 | |||
71 | //----------------------------------------------------------------------------- | 74 | //----------------------------------------------------------------------------- |
72 | //----------------------------------------------------------------------------- | 75 | //----------------------------------------------------------------------------- |
73 | // LLViewerJointMesh::LLSkinJoint | 76 | // LLViewerJointMesh::LLSkinJoint |
@@ -120,6 +123,7 @@ BOOL LLSkinJoint::setupSkinJoint( LLViewerJoint *joint) | |||
120 | return TRUE; | 123 | return TRUE; |
121 | } | 124 | } |
122 | 125 | ||
126 | |||
123 | //----------------------------------------------------------------------------- | 127 | //----------------------------------------------------------------------------- |
124 | //----------------------------------------------------------------------------- | 128 | //----------------------------------------------------------------------------- |
125 | // LLViewerJointMesh | 129 | // LLViewerJointMesh |
@@ -414,9 +418,9 @@ const S32 NUM_AXES = 3; | |||
414 | // rotation Z 0-n | 418 | // rotation Z 0-n |
415 | // pivot parent 0-n -- child = n+1 | 419 | // pivot parent 0-n -- child = n+1 |
416 | 420 | ||
417 | static LLMatrix4 gJointMat[32]; | 421 | static LLMatrix4 gJointMatUnaligned[32]; |
418 | static LLMatrix3 gJointRot[32]; | 422 | static LLMatrix3 gJointRotUnaligned[32]; |
419 | static LLVector4 gJointPivot[32]; | 423 | static LLVector4 gJointPivot[32]; |
420 | 424 | ||
421 | //----------------------------------------------------------------------------- | 425 | //----------------------------------------------------------------------------- |
422 | // uploadJointMatrices() | 426 | // uploadJointMatrices() |
@@ -437,8 +441,8 @@ void LLViewerJointMesh::uploadJointMatrices() | |||
437 | { | 441 | { |
438 | joint_mat *= LLDrawPoolAvatar::getModelView(); | 442 | joint_mat *= LLDrawPoolAvatar::getModelView(); |
439 | } | 443 | } |
440 | gJointMat[joint_num] = joint_mat; | 444 | gJointMatUnaligned[joint_num] = joint_mat; |
441 | gJointRot[joint_num] = joint_mat.getMat3(); | 445 | gJointRotUnaligned[joint_num] = joint_mat.getMat3(); |
442 | } | 446 | } |
443 | 447 | ||
444 | BOOL last_pivot_uploaded = FALSE; | 448 | BOOL last_pivot_uploaded = FALSE; |
@@ -475,8 +479,8 @@ void LLViewerJointMesh::uploadJointMatrices() | |||
475 | { | 479 | { |
476 | LLVector3 pivot; | 480 | LLVector3 pivot; |
477 | pivot = LLVector3(gJointPivot[i]); | 481 | pivot = LLVector3(gJointPivot[i]); |
478 | pivot = pivot * gJointRot[i]; | 482 | pivot = pivot * gJointRotUnaligned[i]; |
479 | gJointMat[i].translate(pivot); | 483 | gJointMatUnaligned[i].translate(pivot); |
480 | } | 484 | } |
481 | 485 | ||
482 | // upload matrices | 486 | // upload matrices |
@@ -487,11 +491,11 @@ void LLViewerJointMesh::uploadJointMatrices() | |||
487 | 491 | ||
488 | for (joint_num = 0; joint_num < reference_mesh->mJointRenderData.count(); joint_num++) | 492 | for (joint_num = 0; joint_num < reference_mesh->mJointRenderData.count(); joint_num++) |
489 | { | 493 | { |
490 | gJointMat[joint_num].transpose(); | 494 | gJointMatUnaligned[joint_num].transpose(); |
491 | 495 | ||
492 | for (S32 axis = 0; axis < NUM_AXES; axis++) | 496 | for (S32 axis = 0; axis < NUM_AXES; axis++) |
493 | { | 497 | { |
494 | F32* vector = gJointMat[joint_num].mMatrix[axis]; | 498 | F32* vector = gJointMatUnaligned[joint_num].mMatrix[axis]; |
495 | //glProgramLocalParameter4fvARB(GL_VERTEX_PROGRAM_ARB, LL_CHARACTER_MAX_JOINTS_PER_MESH * axis + joint_num+5, (GLfloat*)vector); | 499 | //glProgramLocalParameter4fvARB(GL_VERTEX_PROGRAM_ARB, LL_CHARACTER_MAX_JOINTS_PER_MESH * axis + joint_num+5, (GLfloat*)vector); |
496 | U32 offset = LL_CHARACTER_MAX_JOINTS_PER_MESH*axis+joint_num; | 500 | U32 offset = LL_CHARACTER_MAX_JOINTS_PER_MESH*axis+joint_num; |
497 | memcpy(mat+offset*4, vector, sizeof(GLfloat)*4); | 501 | memcpy(mat+offset*4, vector, sizeof(GLfloat)*4); |
@@ -903,21 +907,9 @@ BOOL LLViewerJointMesh::updateLOD(F32 pixel_area, BOOL activate) | |||
903 | return (valid != activate); | 907 | return (valid != activate); |
904 | } | 908 | } |
905 | 909 | ||
906 | 910 | // static | |
907 | void LLViewerJointMesh::updateGeometry() | 911 | void LLViewerJointMesh::updateGeometryOriginal(LLFace *mFace, LLPolyMesh *mMesh) |
908 | { | 912 | { |
909 | if (!(mValid | ||
910 | && mMesh | ||
911 | && mFace | ||
912 | && mMesh->hasWeights() | ||
913 | && mFace->mVertexBuffer.notNull() | ||
914 | && LLShaderMgr::getVertexShaderLevel(LLShaderMgr::SHADER_AVATAR) == 0)) | ||
915 | { | ||
916 | return; | ||
917 | } | ||
918 | |||
919 | uploadJointMatrices(); | ||
920 | |||
921 | LLStrider<LLVector3> o_vertices; | 913 | LLStrider<LLVector3> o_vertices; |
922 | LLStrider<LLVector3> o_normals; | 914 | LLStrider<LLVector3> o_normals; |
923 | 915 | ||
@@ -958,9 +950,9 @@ void LLViewerJointMesh::updateGeometry() | |||
958 | // No lerp required in this case. | 950 | // No lerp required in this case. |
959 | if (w == 1.0f) | 951 | if (w == 1.0f) |
960 | { | 952 | { |
961 | gBlendMat = gJointMat[joint+1]; | 953 | gBlendMat = gJointMatUnaligned[joint+1]; |
962 | o_vertices[bidx] = coords[index] * gBlendMat; | 954 | o_vertices[bidx] = coords[index] * gBlendMat; |
963 | gBlendRotMat = gJointRot[joint+1]; | 955 | gBlendRotMat = gJointRotUnaligned[joint+1]; |
964 | o_normals[bidx] = normals[index] * gBlendRotMat; | 956 | o_normals[bidx] = normals[index] * gBlendRotMat; |
965 | continue; | 957 | continue; |
966 | } | 958 | } |
@@ -968,8 +960,8 @@ void LLViewerJointMesh::updateGeometry() | |||
968 | // Try to keep all the accesses to the matrix data as close | 960 | // Try to keep all the accesses to the matrix data as close |
969 | // together as possible. This function is a hot spot on the | 961 | // together as possible. This function is a hot spot on the |
970 | // Mac. JC | 962 | // Mac. JC |
971 | LLMatrix4 &m0 = gJointMat[joint+1]; | 963 | LLMatrix4 &m0 = gJointMatUnaligned[joint+1]; |
972 | LLMatrix4 &m1 = gJointMat[joint+0]; | 964 | LLMatrix4 &m1 = gJointMatUnaligned[joint+0]; |
973 | 965 | ||
974 | gBlendMat.mMatrix[VX][VX] = lerp(m1.mMatrix[VX][VX], m0.mMatrix[VX][VX], w); | 966 | gBlendMat.mMatrix[VX][VX] = lerp(m1.mMatrix[VX][VX], m0.mMatrix[VX][VX], w); |
975 | gBlendMat.mMatrix[VX][VY] = lerp(m1.mMatrix[VX][VY], m0.mMatrix[VX][VY], w); | 967 | gBlendMat.mMatrix[VX][VY] = lerp(m1.mMatrix[VX][VY], m0.mMatrix[VX][VY], w); |
@@ -989,8 +981,8 @@ void LLViewerJointMesh::updateGeometry() | |||
989 | 981 | ||
990 | o_vertices[bidx] = coords[index] * gBlendMat; | 982 | o_vertices[bidx] = coords[index] * gBlendMat; |
991 | 983 | ||
992 | LLMatrix3 &n0 = gJointRot[joint+1]; | 984 | LLMatrix3 &n0 = gJointRotUnaligned[joint+1]; |
993 | LLMatrix3 &n1 = gJointRot[joint+0]; | 985 | LLMatrix3 &n1 = gJointRotUnaligned[joint+0]; |
994 | 986 | ||
995 | gBlendRotMat.mMatrix[VX][VX] = lerp(n1.mMatrix[VX][VX], n0.mMatrix[VX][VX], w); | 987 | gBlendRotMat.mMatrix[VX][VX] = lerp(n1.mMatrix[VX][VX], n0.mMatrix[VX][VX], w); |
996 | gBlendRotMat.mMatrix[VX][VY] = lerp(n1.mMatrix[VX][VY], n0.mMatrix[VX][VY], w); | 988 | gBlendRotMat.mMatrix[VX][VY] = lerp(n1.mMatrix[VX][VY], n0.mMatrix[VX][VY], w); |
@@ -1008,6 +1000,161 @@ void LLViewerJointMesh::updateGeometry() | |||
1008 | } | 1000 | } |
1009 | } | 1001 | } |
1010 | 1002 | ||
1003 | const U32 UPDATE_GEOMETRY_CALL_MASK = 0x1FFF; // 8K samples before overflow | ||
1004 | const U32 UPDATE_GEOMETRY_CALL_OVERFLOW = ~UPDATE_GEOMETRY_CALL_MASK; | ||
1005 | static bool sUpdateGeometryCallPointer = false; | ||
1006 | static F64 sUpdateGeometryGlobalTime = 0.0 ; | ||
1007 | static F64 sUpdateGeometryElapsedTime = 0.0 ; | ||
1008 | static F64 sUpdateGeometryElapsedTimeOff = 0.0 ; | ||
1009 | static F64 sUpdateGeometryElapsedTimeOn = 0.0 ; | ||
1010 | static F64 sUpdateGeometryRunAvgOff[10]; | ||
1011 | static F64 sUpdateGeometryRunAvgOn[10]; | ||
1012 | static U32 sUpdateGeometryRunCount = 0 ; | ||
1013 | static U32 sUpdateGeometryCalls = 0 ; | ||
1014 | static U32 sUpdateGeometryLastProcessor = 0 ; | ||
1015 | void (*LLViewerJointMesh::sUpdateGeometryFunc)(LLFace* face, LLPolyMesh* mesh); | ||
1016 | |||
1017 | void LLViewerJointMesh::updateGeometry() | ||
1018 | { | ||
1019 | extern BOOL gVectorizePerfTest; | ||
1020 | extern U32 gVectorizeProcessor; | ||
1021 | |||
1022 | if (!(mValid | ||
1023 | && mMesh | ||
1024 | && mFace | ||
1025 | && mMesh->hasWeights() | ||
1026 | && mFace->mVertexBuffer.notNull() | ||
1027 | && LLShaderMgr::getVertexShaderLevel(LLShaderMgr::SHADER_AVATAR) == 0)) | ||
1028 | { | ||
1029 | return; | ||
1030 | } | ||
1031 | |||
1032 | if (!gVectorizePerfTest) | ||
1033 | { | ||
1034 | // Once we've measured performance, just run the specified | ||
1035 | // code version. | ||
1036 | if(sUpdateGeometryFunc == updateGeometryOriginal) | ||
1037 | uploadJointMatrices(); | ||
1038 | sUpdateGeometryFunc(mFace, mMesh); | ||
1039 | } | ||
1040 | else | ||
1041 | { | ||
1042 | // At startup, measure the amount of time in skinning and choose | ||
1043 | // the fastest one. | ||
1044 | LLTimer ug_timer ; | ||
1045 | |||
1046 | if (sUpdateGeometryCallPointer) | ||
1047 | { | ||
1048 | if(sUpdateGeometryFunc == updateGeometryOriginal) | ||
1049 | uploadJointMatrices(); | ||
1050 | // call accelerated version for this processor | ||
1051 | sUpdateGeometryFunc(mFace, mMesh); | ||
1052 | } | ||
1053 | else | ||
1054 | { | ||
1055 | uploadJointMatrices(); | ||
1056 | updateGeometryOriginal(mFace, mMesh); | ||
1057 | } | ||
1058 | |||
1059 | sUpdateGeometryElapsedTime += ug_timer.getElapsedTimeF64(); | ||
1060 | ++sUpdateGeometryCalls; | ||
1061 | if(0 != (sUpdateGeometryCalls & UPDATE_GEOMETRY_CALL_OVERFLOW)) | ||
1062 | { | ||
1063 | F64 time_since_app_start = ug_timer.getElapsedSeconds(); | ||
1064 | if(sUpdateGeometryGlobalTime == 0.0 | ||
1065 | || sUpdateGeometryLastProcessor != gVectorizeProcessor) | ||
1066 | { | ||
1067 | sUpdateGeometryGlobalTime = time_since_app_start; | ||
1068 | sUpdateGeometryElapsedTime = 0; | ||
1069 | sUpdateGeometryCalls = 0; | ||
1070 | sUpdateGeometryRunCount = 0; | ||
1071 | sUpdateGeometryLastProcessor = gVectorizeProcessor; | ||
1072 | sUpdateGeometryCallPointer = false; | ||
1073 | return; | ||
1074 | } | ||
1075 | F64 percent_time_in_function = | ||
1076 | ( sUpdateGeometryElapsedTime * 100.0 ) / ( time_since_app_start - sUpdateGeometryGlobalTime ) ; | ||
1077 | sUpdateGeometryGlobalTime = time_since_app_start; | ||
1078 | if (!sUpdateGeometryCallPointer) | ||
1079 | { | ||
1080 | // First set of run data is with vectorization off. | ||
1081 | sUpdateGeometryCallPointer = true; | ||
1082 | llinfos << "profile (avg of " << sUpdateGeometryCalls << " samples) = " | ||
1083 | << "vectorize off " << percent_time_in_function | ||
1084 | << "% of time with " | ||
1085 | << (sUpdateGeometryElapsedTime / (F64)sUpdateGeometryCalls) | ||
1086 | << " seconds per call " | ||
1087 | << llendl; | ||
1088 | sUpdateGeometryRunAvgOff[sUpdateGeometryRunCount] = percent_time_in_function; | ||
1089 | sUpdateGeometryElapsedTimeOff += sUpdateGeometryElapsedTime; | ||
1090 | sUpdateGeometryCalls = 0; | ||
1091 | } | ||
1092 | else | ||
1093 | { | ||
1094 | // Second set of run data is with vectorization on. | ||
1095 | sUpdateGeometryCallPointer = false; | ||
1096 | llinfos << "profile (avg of " << sUpdateGeometryCalls << " samples) = " | ||
1097 | << "VEC on " << percent_time_in_function | ||
1098 | << "% of time with " | ||
1099 | << (sUpdateGeometryElapsedTime / (F64)sUpdateGeometryCalls) | ||
1100 | << " seconds per call " | ||
1101 | << llendl; | ||
1102 | sUpdateGeometryRunAvgOn[sUpdateGeometryRunCount] = percent_time_in_function ; | ||
1103 | sUpdateGeometryElapsedTimeOn += sUpdateGeometryElapsedTime; | ||
1104 | |||
1105 | sUpdateGeometryCalls = 0; | ||
1106 | sUpdateGeometryRunCount++; | ||
1107 | F64 a = 0.0, b = 0.0; | ||
1108 | for(U32 i = 0; i<sUpdateGeometryRunCount; i++) | ||
1109 | { | ||
1110 | a += sUpdateGeometryRunAvgOff[i]; | ||
1111 | b += sUpdateGeometryRunAvgOn[i]; | ||
1112 | } | ||
1113 | a /= sUpdateGeometryRunCount; | ||
1114 | b /= sUpdateGeometryRunCount; | ||
1115 | F64 perf_boost = ( sUpdateGeometryElapsedTimeOff - sUpdateGeometryElapsedTimeOn ) / sUpdateGeometryElapsedTimeOn; | ||
1116 | llinfos << "run averages (" << (F64)sUpdateGeometryRunCount | ||
1117 | << "/10) vectorize off " << a | ||
1118 | << "% : vectorize type " << gVectorizeProcessor | ||
1119 | << " " << b | ||
1120 | << "% : performance boost " | ||
1121 | << perf_boost * 100.0 | ||
1122 | << "%" | ||
1123 | << llendl ; | ||
1124 | if(sUpdateGeometryRunCount == 10) | ||
1125 | { | ||
1126 | // In case user runs test again, force reset of data on | ||
1127 | // next run. | ||
1128 | sUpdateGeometryGlobalTime = 0.0; | ||
1129 | |||
1130 | // We have data now on which version is faster. Switch to that | ||
1131 | // code and save the data for next run. | ||
1132 | gVectorizePerfTest = FALSE; | ||
1133 | gSavedSettings.setBOOL("VectorizePerfTest", FALSE); | ||
1134 | |||
1135 | if (perf_boost > 0.0) | ||
1136 | { | ||
1137 | llinfos << "Vectorization improves avatar skinning performance, " | ||
1138 | << "keeping on for future runs." | ||
1139 | << llendl; | ||
1140 | gSavedSettings.setBOOL("VectorizeSkin", TRUE); | ||
1141 | } | ||
1142 | else | ||
1143 | { | ||
1144 | // SIMD decreases performance, fall back to original code | ||
1145 | llinfos << "Vectorization decreases avatar skinning performance, " | ||
1146 | << "switching back to original code." | ||
1147 | << llendl; | ||
1148 | |||
1149 | gSavedSettings.setBOOL("VectorizeSkin", FALSE); | ||
1150 | } | ||
1151 | } | ||
1152 | } | ||
1153 | sUpdateGeometryElapsedTime = 0.0f; | ||
1154 | } | ||
1155 | } | ||
1156 | } | ||
1157 | |||
1011 | void LLViewerJointMesh::dump() | 1158 | void LLViewerJointMesh::dump() |
1012 | { | 1159 | { |
1013 | if (mValid) | 1160 | if (mValid) |