diff options
Diffstat (limited to 'linden/indra/newview/llviewerjointmesh.cpp')
-rw-r--r-- | linden/indra/newview/llviewerjointmesh.cpp | 214 |
1 files changed, 33 insertions, 181 deletions
diff --git a/linden/indra/newview/llviewerjointmesh.cpp b/linden/indra/newview/llviewerjointmesh.cpp index 642fa7b..c76990c 100644 --- a/linden/indra/newview/llviewerjointmesh.cpp +++ b/linden/indra/newview/llviewerjointmesh.cpp | |||
@@ -31,11 +31,14 @@ | |||
31 | //----------------------------------------------------------------------------- | 31 | //----------------------------------------------------------------------------- |
32 | #include "llviewerprecompiledheaders.h" | 32 | #include "llviewerprecompiledheaders.h" |
33 | 33 | ||
34 | #if LL_WINDOWS // For Intel vector classes | ||
35 | #include "fvec.h" | ||
36 | #endif | ||
37 | |||
34 | #include "imageids.h" | 38 | #include "imageids.h" |
35 | #include "llfasttimer.h" | 39 | #include "llfasttimer.h" |
36 | 40 | ||
37 | #include "llagent.h" | 41 | #include "llagent.h" |
38 | #include "llapr.h" | ||
39 | #include "llbox.h" | 42 | #include "llbox.h" |
40 | #include "lldrawable.h" | 43 | #include "lldrawable.h" |
41 | #include "lldrawpoolavatar.h" | 44 | #include "lldrawpoolavatar.h" |
@@ -46,18 +49,12 @@ | |||
46 | #include "llglheaders.h" | 49 | #include "llglheaders.h" |
47 | #include "lltexlayer.h" | 50 | #include "lltexlayer.h" |
48 | #include "llviewercamera.h" | 51 | #include "llviewercamera.h" |
49 | #include "llviewercontrol.h" | ||
50 | #include "llviewerimagelist.h" | 52 | #include "llviewerimagelist.h" |
51 | #include "llviewerjointmesh.h" | 53 | #include "llviewerjointmesh.h" |
52 | #include "llvoavatar.h" | 54 | #include "llvoavatar.h" |
53 | #include "llsky.h" | 55 | #include "llsky.h" |
54 | #include "pipeline.h" | 56 | #include "pipeline.h" |
55 | #include "llglslshader.h" | 57 | #include "llglslshader.h" |
56 | #include "llmath.h" | ||
57 | #include "v4math.h" | ||
58 | #include "m3math.h" | ||
59 | #include "m4math.h" | ||
60 | |||
61 | 58 | ||
62 | #if !LL_DARWIN && !LL_LINUX | 59 | #if !LL_DARWIN && !LL_LINUX |
63 | extern PFNGLWEIGHTPOINTERARBPROC glWeightPointerARB; | 60 | extern PFNGLWEIGHTPOINTERARBPROC glWeightPointerARB; |
@@ -71,7 +68,6 @@ static const U32 sRenderMask = LLVertexBuffer::MAP_VERTEX | | |||
71 | LLVertexBuffer::MAP_NORMAL | | 68 | LLVertexBuffer::MAP_NORMAL | |
72 | LLVertexBuffer::MAP_TEXCOORD; | 69 | LLVertexBuffer::MAP_TEXCOORD; |
73 | 70 | ||
74 | |||
75 | //----------------------------------------------------------------------------- | 71 | //----------------------------------------------------------------------------- |
76 | //----------------------------------------------------------------------------- | 72 | //----------------------------------------------------------------------------- |
77 | // LLViewerJointMesh::LLSkinJoint | 73 | // LLViewerJointMesh::LLSkinJoint |
@@ -124,7 +120,6 @@ BOOL LLSkinJoint::setupSkinJoint( LLViewerJoint *joint) | |||
124 | return TRUE; | 120 | return TRUE; |
125 | } | 121 | } |
126 | 122 | ||
127 | |||
128 | //----------------------------------------------------------------------------- | 123 | //----------------------------------------------------------------------------- |
129 | //----------------------------------------------------------------------------- | 124 | //----------------------------------------------------------------------------- |
130 | // LLViewerJointMesh | 125 | // LLViewerJointMesh |
@@ -419,9 +414,9 @@ const S32 NUM_AXES = 3; | |||
419 | // rotation Z 0-n | 414 | // rotation Z 0-n |
420 | // pivot parent 0-n -- child = n+1 | 415 | // pivot parent 0-n -- child = n+1 |
421 | 416 | ||
422 | static LLMatrix4 gJointMatUnaligned[32]; | 417 | static LLMatrix4 gJointMat[32]; |
423 | static LLMatrix3 gJointRotUnaligned[32]; | 418 | static LLMatrix3 gJointRot[32]; |
424 | static LLVector4 gJointPivot[32]; | 419 | static LLVector4 gJointPivot[32]; |
425 | 420 | ||
426 | //----------------------------------------------------------------------------- | 421 | //----------------------------------------------------------------------------- |
427 | // uploadJointMatrices() | 422 | // uploadJointMatrices() |
@@ -442,8 +437,8 @@ void LLViewerJointMesh::uploadJointMatrices() | |||
442 | { | 437 | { |
443 | joint_mat *= LLDrawPoolAvatar::getModelView(); | 438 | joint_mat *= LLDrawPoolAvatar::getModelView(); |
444 | } | 439 | } |
445 | gJointMatUnaligned[joint_num] = joint_mat; | 440 | gJointMat[joint_num] = joint_mat; |
446 | gJointRotUnaligned[joint_num] = joint_mat.getMat3(); | 441 | gJointRot[joint_num] = joint_mat.getMat3(); |
447 | } | 442 | } |
448 | 443 | ||
449 | BOOL last_pivot_uploaded = FALSE; | 444 | BOOL last_pivot_uploaded = FALSE; |
@@ -480,8 +475,8 @@ void LLViewerJointMesh::uploadJointMatrices() | |||
480 | { | 475 | { |
481 | LLVector3 pivot; | 476 | LLVector3 pivot; |
482 | pivot = LLVector3(gJointPivot[i]); | 477 | pivot = LLVector3(gJointPivot[i]); |
483 | pivot = pivot * gJointRotUnaligned[i]; | 478 | pivot = pivot * gJointRot[i]; |
484 | gJointMatUnaligned[i].translate(pivot); | 479 | gJointMat[i].translate(pivot); |
485 | } | 480 | } |
486 | 481 | ||
487 | // upload matrices | 482 | // upload matrices |
@@ -492,11 +487,11 @@ void LLViewerJointMesh::uploadJointMatrices() | |||
492 | 487 | ||
493 | for (joint_num = 0; joint_num < reference_mesh->mJointRenderData.count(); joint_num++) | 488 | for (joint_num = 0; joint_num < reference_mesh->mJointRenderData.count(); joint_num++) |
494 | { | 489 | { |
495 | gJointMatUnaligned[joint_num].transpose(); | 490 | gJointMat[joint_num].transpose(); |
496 | 491 | ||
497 | for (S32 axis = 0; axis < NUM_AXES; axis++) | 492 | for (S32 axis = 0; axis < NUM_AXES; axis++) |
498 | { | 493 | { |
499 | F32* vector = gJointMatUnaligned[joint_num].mMatrix[axis]; | 494 | F32* vector = gJointMat[joint_num].mMatrix[axis]; |
500 | //glProgramLocalParameter4fvARB(GL_VERTEX_PROGRAM_ARB, LL_CHARACTER_MAX_JOINTS_PER_MESH * axis + joint_num+5, (GLfloat*)vector); | 495 | //glProgramLocalParameter4fvARB(GL_VERTEX_PROGRAM_ARB, LL_CHARACTER_MAX_JOINTS_PER_MESH * axis + joint_num+5, (GLfloat*)vector); |
501 | U32 offset = LL_CHARACTER_MAX_JOINTS_PER_MESH*axis+joint_num; | 496 | U32 offset = LL_CHARACTER_MAX_JOINTS_PER_MESH*axis+joint_num; |
502 | memcpy(mat+offset*4, vector, sizeof(GLfloat)*4); | 497 | memcpy(mat+offset*4, vector, sizeof(GLfloat)*4); |
@@ -908,9 +903,21 @@ BOOL LLViewerJointMesh::updateLOD(F32 pixel_area, BOOL activate) | |||
908 | return (valid != activate); | 903 | return (valid != activate); |
909 | } | 904 | } |
910 | 905 | ||
911 | // static | 906 | |
912 | void LLViewerJointMesh::updateGeometryOriginal(LLFace *mFace, LLPolyMesh *mMesh) | 907 | void LLViewerJointMesh::updateGeometry() |
913 | { | 908 | { |
909 | if (!(mValid | ||
910 | && mMesh | ||
911 | && mFace | ||
912 | && mMesh->hasWeights() | ||
913 | && mFace->mVertexBuffer.notNull() | ||
914 | && LLShaderMgr::getVertexShaderLevel(LLShaderMgr::SHADER_AVATAR) == 0)) | ||
915 | { | ||
916 | return; | ||
917 | } | ||
918 | |||
919 | uploadJointMatrices(); | ||
920 | |||
914 | LLStrider<LLVector3> o_vertices; | 921 | LLStrider<LLVector3> o_vertices; |
915 | LLStrider<LLVector3> o_normals; | 922 | LLStrider<LLVector3> o_normals; |
916 | 923 | ||
@@ -951,9 +958,9 @@ void LLViewerJointMesh::updateGeometryOriginal(LLFace *mFace, LLPolyMesh *mMesh) | |||
951 | // No lerp required in this case. | 958 | // No lerp required in this case. |
952 | if (w == 1.0f) | 959 | if (w == 1.0f) |
953 | { | 960 | { |
954 | gBlendMat = gJointMatUnaligned[joint+1]; | 961 | gBlendMat = gJointMat[joint+1]; |
955 | o_vertices[bidx] = coords[index] * gBlendMat; | 962 | o_vertices[bidx] = coords[index] * gBlendMat; |
956 | gBlendRotMat = gJointRotUnaligned[joint+1]; | 963 | gBlendRotMat = gJointRot[joint+1]; |
957 | o_normals[bidx] = normals[index] * gBlendRotMat; | 964 | o_normals[bidx] = normals[index] * gBlendRotMat; |
958 | continue; | 965 | continue; |
959 | } | 966 | } |
@@ -961,8 +968,8 @@ void LLViewerJointMesh::updateGeometryOriginal(LLFace *mFace, LLPolyMesh *mMesh) | |||
961 | // Try to keep all the accesses to the matrix data as close | 968 | // Try to keep all the accesses to the matrix data as close |
962 | // together as possible. This function is a hot spot on the | 969 | // together as possible. This function is a hot spot on the |
963 | // Mac. JC | 970 | // Mac. JC |
964 | LLMatrix4 &m0 = gJointMatUnaligned[joint+1]; | 971 | LLMatrix4 &m0 = gJointMat[joint+1]; |
965 | LLMatrix4 &m1 = gJointMatUnaligned[joint+0]; | 972 | LLMatrix4 &m1 = gJointMat[joint+0]; |
966 | 973 | ||
967 | gBlendMat.mMatrix[VX][VX] = lerp(m1.mMatrix[VX][VX], m0.mMatrix[VX][VX], w); | 974 | gBlendMat.mMatrix[VX][VX] = lerp(m1.mMatrix[VX][VX], m0.mMatrix[VX][VX], w); |
968 | gBlendMat.mMatrix[VX][VY] = lerp(m1.mMatrix[VX][VY], m0.mMatrix[VX][VY], w); | 975 | gBlendMat.mMatrix[VX][VY] = lerp(m1.mMatrix[VX][VY], m0.mMatrix[VX][VY], w); |
@@ -982,8 +989,8 @@ void LLViewerJointMesh::updateGeometryOriginal(LLFace *mFace, LLPolyMesh *mMesh) | |||
982 | 989 | ||
983 | o_vertices[bidx] = coords[index] * gBlendMat; | 990 | o_vertices[bidx] = coords[index] * gBlendMat; |
984 | 991 | ||
985 | LLMatrix3 &n0 = gJointRotUnaligned[joint+1]; | 992 | LLMatrix3 &n0 = gJointRot[joint+1]; |
986 | LLMatrix3 &n1 = gJointRotUnaligned[joint+0]; | 993 | LLMatrix3 &n1 = gJointRot[joint+0]; |
987 | 994 | ||
988 | gBlendRotMat.mMatrix[VX][VX] = lerp(n1.mMatrix[VX][VX], n0.mMatrix[VX][VX], w); | 995 | gBlendRotMat.mMatrix[VX][VX] = lerp(n1.mMatrix[VX][VX], n0.mMatrix[VX][VX], w); |
989 | gBlendRotMat.mMatrix[VX][VY] = lerp(n1.mMatrix[VX][VY], n0.mMatrix[VX][VY], w); | 996 | gBlendRotMat.mMatrix[VX][VY] = lerp(n1.mMatrix[VX][VY], n0.mMatrix[VX][VY], w); |
@@ -1001,161 +1008,6 @@ void LLViewerJointMesh::updateGeometryOriginal(LLFace *mFace, LLPolyMesh *mMesh) | |||
1001 | } | 1008 | } |
1002 | } | 1009 | } |
1003 | 1010 | ||
1004 | const U32 UPDATE_GEOMETRY_CALL_MASK = 0x1FFF; // 8K samples before overflow | ||
1005 | const U32 UPDATE_GEOMETRY_CALL_OVERFLOW = ~UPDATE_GEOMETRY_CALL_MASK; | ||
1006 | static bool sUpdateGeometryCallPointer = false; | ||
1007 | static F64 sUpdateGeometryGlobalTime = 0.0 ; | ||
1008 | static F64 sUpdateGeometryElapsedTime = 0.0 ; | ||
1009 | static F64 sUpdateGeometryElapsedTimeOff = 0.0 ; | ||
1010 | static F64 sUpdateGeometryElapsedTimeOn = 0.0 ; | ||
1011 | static F64 sUpdateGeometryRunAvgOff[10]; | ||
1012 | static F64 sUpdateGeometryRunAvgOn[10]; | ||
1013 | static U32 sUpdateGeometryRunCount = 0 ; | ||
1014 | static U32 sUpdateGeometryCalls = 0 ; | ||
1015 | static U32 sUpdateGeometryLastProcessor = 0 ; | ||
1016 | void (*LLViewerJointMesh::sUpdateGeometryFunc)(LLFace* face, LLPolyMesh* mesh); | ||
1017 | |||
1018 | void LLViewerJointMesh::updateGeometry() | ||
1019 | { | ||
1020 | extern BOOL gVectorizePerfTest; | ||
1021 | extern U32 gVectorizeProcessor; | ||
1022 | |||
1023 | if (!(mValid | ||
1024 | && mMesh | ||
1025 | && mFace | ||
1026 | && mMesh->hasWeights() | ||
1027 | && mFace->mVertexBuffer.notNull() | ||
1028 | && LLShaderMgr::getVertexShaderLevel(LLShaderMgr::SHADER_AVATAR) == 0)) | ||
1029 | { | ||
1030 | return; | ||
1031 | } | ||
1032 | |||
1033 | if (!gVectorizePerfTest) | ||
1034 | { | ||
1035 | // Once we've measured performance, just run the specified | ||
1036 | // code version. | ||
1037 | if(sUpdateGeometryFunc == updateGeometryOriginal) | ||
1038 | uploadJointMatrices(); | ||
1039 | sUpdateGeometryFunc(mFace, mMesh); | ||
1040 | } | ||
1041 | else | ||
1042 | { | ||
1043 | // At startup, measure the amount of time in skinning and choose | ||
1044 | // the fastest one. | ||
1045 | LLTimer ug_timer ; | ||
1046 | |||
1047 | if (sUpdateGeometryCallPointer) | ||
1048 | { | ||
1049 | if(sUpdateGeometryFunc == updateGeometryOriginal) | ||
1050 | uploadJointMatrices(); | ||
1051 | // call accelerated version for this processor | ||
1052 | sUpdateGeometryFunc(mFace, mMesh); | ||
1053 | } | ||
1054 | else | ||
1055 | { | ||
1056 | uploadJointMatrices(); | ||
1057 | updateGeometryOriginal(mFace, mMesh); | ||
1058 | } | ||
1059 | |||
1060 | sUpdateGeometryElapsedTime += ug_timer.getElapsedTimeF64(); | ||
1061 | ++sUpdateGeometryCalls; | ||
1062 | if(0 != (sUpdateGeometryCalls & UPDATE_GEOMETRY_CALL_OVERFLOW)) | ||
1063 | { | ||
1064 | F64 time_since_app_start = ug_timer.getElapsedSeconds(); | ||
1065 | if(sUpdateGeometryGlobalTime == 0.0 | ||
1066 | || sUpdateGeometryLastProcessor != gVectorizeProcessor) | ||
1067 | { | ||
1068 | sUpdateGeometryGlobalTime = time_since_app_start; | ||
1069 | sUpdateGeometryElapsedTime = 0; | ||
1070 | sUpdateGeometryCalls = 0; | ||
1071 | sUpdateGeometryRunCount = 0; | ||
1072 | sUpdateGeometryLastProcessor = gVectorizeProcessor; | ||
1073 | sUpdateGeometryCallPointer = false; | ||
1074 | return; | ||
1075 | } | ||
1076 | F64 percent_time_in_function = | ||
1077 | ( sUpdateGeometryElapsedTime * 100.0 ) / ( time_since_app_start - sUpdateGeometryGlobalTime ) ; | ||
1078 | sUpdateGeometryGlobalTime = time_since_app_start; | ||
1079 | if (!sUpdateGeometryCallPointer) | ||
1080 | { | ||
1081 | // First set of run data is with vectorization off. | ||
1082 | sUpdateGeometryCallPointer = true; | ||
1083 | llinfos << "profile (avg of " << sUpdateGeometryCalls << " samples) = " | ||
1084 | << "vectorize off " << percent_time_in_function | ||
1085 | << "% of time with " | ||
1086 | << (sUpdateGeometryElapsedTime / (F64)sUpdateGeometryCalls) | ||
1087 | << " seconds per call " | ||
1088 | << llendl; | ||
1089 | sUpdateGeometryRunAvgOff[sUpdateGeometryRunCount] = percent_time_in_function; | ||
1090 | sUpdateGeometryElapsedTimeOff += sUpdateGeometryElapsedTime; | ||
1091 | sUpdateGeometryCalls = 0; | ||
1092 | } | ||
1093 | else | ||
1094 | { | ||
1095 | // Second set of run data is with vectorization on. | ||
1096 | sUpdateGeometryCallPointer = false; | ||
1097 | llinfos << "profile (avg of " << sUpdateGeometryCalls << " samples) = " | ||
1098 | << "VEC on " << percent_time_in_function | ||
1099 | << "% of time with " | ||
1100 | << (sUpdateGeometryElapsedTime / (F64)sUpdateGeometryCalls) | ||
1101 | << " seconds per call " | ||
1102 | << llendl; | ||
1103 | sUpdateGeometryRunAvgOn[sUpdateGeometryRunCount] = percent_time_in_function ; | ||
1104 | sUpdateGeometryElapsedTimeOn += sUpdateGeometryElapsedTime; | ||
1105 | |||
1106 | sUpdateGeometryCalls = 0; | ||
1107 | sUpdateGeometryRunCount++; | ||
1108 | F64 a = 0.0, b = 0.0; | ||
1109 | for(U32 i = 0; i<sUpdateGeometryRunCount; i++) | ||
1110 | { | ||
1111 | a += sUpdateGeometryRunAvgOff[i]; | ||
1112 | b += sUpdateGeometryRunAvgOn[i]; | ||
1113 | } | ||
1114 | a /= sUpdateGeometryRunCount; | ||
1115 | b /= sUpdateGeometryRunCount; | ||
1116 | F64 perf_boost = ( sUpdateGeometryElapsedTimeOff - sUpdateGeometryElapsedTimeOn ) / sUpdateGeometryElapsedTimeOn; | ||
1117 | llinfos << "run averages (" << (F64)sUpdateGeometryRunCount | ||
1118 | << "/10) vectorize off " << a | ||
1119 | << "% : vectorize type " << gVectorizeProcessor | ||
1120 | << " " << b | ||
1121 | << "% : performance boost " | ||
1122 | << perf_boost * 100.0 | ||
1123 | << "%" | ||
1124 | << llendl ; | ||
1125 | if(sUpdateGeometryRunCount == 10) | ||
1126 | { | ||
1127 | // In case user runs test again, force reset of data on | ||
1128 | // next run. | ||
1129 | sUpdateGeometryGlobalTime = 0.0; | ||
1130 | |||
1131 | // We have data now on which version is faster. Switch to that | ||
1132 | // code and save the data for next run. | ||
1133 | gVectorizePerfTest = FALSE; | ||
1134 | gSavedSettings.setBOOL("VectorizePerfTest", FALSE); | ||
1135 | |||
1136 | if (perf_boost > 0.0) | ||
1137 | { | ||
1138 | llinfos << "Vectorization improves avatar skinning performance, " | ||
1139 | << "keeping on for future runs." | ||
1140 | << llendl; | ||
1141 | gSavedSettings.setBOOL("VectorizeSkin", TRUE); | ||
1142 | } | ||
1143 | else | ||
1144 | { | ||
1145 | // SIMD decreases performance, fall back to original code | ||
1146 | llinfos << "Vectorization decreases avatar skinning performance, " | ||
1147 | << "switching back to original code." | ||
1148 | << llendl; | ||
1149 | |||
1150 | gSavedSettings.setBOOL("VectorizeSkin", FALSE); | ||
1151 | } | ||
1152 | } | ||
1153 | } | ||
1154 | sUpdateGeometryElapsedTime = 0.0f; | ||
1155 | } | ||
1156 | } | ||
1157 | } | ||
1158 | |||
1159 | void LLViewerJointMesh::dump() | 1011 | void LLViewerJointMesh::dump() |
1160 | { | 1012 | { |
1161 | if (mValid) | 1013 | if (mValid) |