diff --git a/.all-contributorsrc b/.all-contributorsrc index 9b31cc331..5cfef4953 100644 --- a/.all-contributorsrc +++ b/.all-contributorsrc @@ -184,6 +184,33 @@ "contributions": [ "translation" ] + }, + { + "login": "animeojisan", + "name": "animeojisan", + "avatar_url": "https://avatars.githubusercontent.com/u/132756551?v=4", + "profile": "https://github.com/animeojisan", + "contributions": [ + "translation" + ] + }, + { + "login": "MuscularPuky", + "name": "MuscularPuky", + "avatar_url": "https://avatars.githubusercontent.com/u/93962018?v=4", + "profile": "https://github.com/MuscularPuky", + "contributions": [ + "translation" + ] + }, + { + "login": "Zoommod", + "name": "Zoommod", + "avatar_url": "https://avatars.githubusercontent.com/u/71239440?v=4", + "profile": "https://github.com/Zoommod", + "contributions": [ + "translation" + ] } ], "contributorsPerLine": 7, @@ -192,5 +219,6 @@ "repoType": "github", "repoHost": "https://github.com", "skipCi": true, - "commitConvention": "angular" + "commitConvention": "angular", + "commitType": "docs" } diff --git a/.conan/.gitignore b/.conan/.gitignore new file mode 100644 index 000000000..a8b05297c --- /dev/null +++ b/.conan/.gitignore @@ -0,0 +1,3 @@ +* +!.gitignore +!Directory.Build.props diff --git a/.conan/Directory.Build.props b/.conan/Directory.Build.props new file mode 100644 index 000000000..3f0d118b8 --- /dev/null +++ b/.conan/Directory.Build.props @@ -0,0 +1,5 @@ + + + + + diff --git a/.github/FUNDING.yml b/.github/FUNDING.yml index 37bdaf78a..738671c0c 100644 --- a/.github/FUNDING.yml +++ b/.github/FUNDING.yml @@ -10,4 +10,4 @@ liberapay: # Replace with a single Liberapay username issuehunt: # Replace with a single IssueHunt username otechie: # Replace with a single Otechie username lfx_crowdfunding: # Replace with a single LFX Crowdfunding project-name e.g., cloud-foundry -custom: ['https://github.com/Blinue/Magpie/blob/dev/FUNDING.md'] +custom: ['https://github.com/Blinue/Magpie/blob/main/FUNDING.md'] diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 01daeb3ec..e33a972d7 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -23,7 +23,7 @@ jobs: uses: actions/cache@v3 with: path: ./.conan/data - key: ${{ runner.os }}-conan-${{ hashFiles('src/conanfile.txt') }} + key: ${{ runner.os }}-conan-${{ hashFiles('src/**/conanfile.txt') }} - name: Restore NuGet packages run: nuget restore diff --git a/.gitignore b/.gitignore index 330fc34d1..8b6d0fa54 100644 --- a/.gitignore +++ b/.gitignore @@ -9,7 +9,6 @@ *.user *.userosscache *.sln.docstates -.conan/ my_*/ # User-specific files (MonoDevelop/Xamarin Studio) diff --git a/Directory.Build.props b/Directory.Build.props index 85f33d0fd..f800a05ef 100644 --- a/Directory.Build.props +++ b/Directory.Build.props @@ -1,34 +1,24 @@ - - - - + + + + - - MultiThreadedDebug + stdcpp20 + true + + false + true + true + Level4 + + 4251 + + + + + %(AdditionalOptions) /await:strict /utf-8 /Zc:__cplusplus /volatile:iso /fp:contract - - - %(IgnoreSpecificDefaultLibraries);libucrtd.lib - %(AdditionalOptions) /defaultlib:ucrtd.lib - - - - - - MultiThreaded - - - - %(IgnoreSpecificDefaultLibraries);libucrt.lib - %(AdditionalOptions) /defaultlib:ucrt.lib - diff --git a/FUNDING.md b/FUNDING.md index d58789234..86207bac4 100644 --- a/FUNDING.md +++ b/FUNDING.md @@ -1,3 +1,9 @@ 捐助可以让我更专注于开源项目的开发和维护。感谢你的支持! -![赞赏码](./img/赞赏码.png) +赞赏码 + +赞赏码 + +Or + +Buy Me A Coffee diff --git a/HybridCRT.props b/HybridCRT.props new file mode 100644 index 000000000..1549406a2 --- /dev/null +++ b/HybridCRT.props @@ -0,0 +1,27 @@ + + + + + + + + + MultiThreadedDebug + + + + %(IgnoreSpecificDefaultLibraries);libucrtd.lib + %(AdditionalOptions) /defaultlib:ucrtd.lib + + + + + + MultiThreaded + + + %(IgnoreSpecificDefaultLibraries);libucrt.lib + %(AdditionalOptions) /defaultlib:ucrt.lib + + + diff --git a/Magpie.sln b/Magpie.sln index ea5543c2f..e07e2db8e 100644 --- a/Magpie.sln +++ b/Magpie.sln @@ -18,8 +18,8 @@ EndProject Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solution Items", "Solution Items", "{00AB63C3-0CD3-4944-B8E6-58C86138618D}" ProjectSection(SolutionItems) = preProject .editorconfig = .editorconfig - src\conanfile.py = src\conanfile.py Directory.Build.props = Directory.Build.props + HybridCRT.props = HybridCRT.props src\Solution.props = src\Solution.props EndProjectSection EndProject diff --git a/README.md b/README.md index 144f489f1..a98f7d329 100644 --- a/README.md +++ b/README.md @@ -105,6 +105,11 @@ Thanks go to these wonderful people: Serdar Sağlam
Serdar Sağlam

🌍 Andrus Diaz German
Andrus Diaz German

🌍 Kefir2105
Kefir2105

🌍 + animeojisan
animeojisan

🌍 + MuscularPuky
MuscularPuky

🌍 + + + Zoommod
Zoommod

🌍 diff --git a/README_ZH.md b/README_ZH.md index 2f5f4d5b2..bc28485bd 100644 --- a/README_ZH.md +++ b/README_ZH.md @@ -104,6 +104,11 @@ Magpie 是一个轻量级的窗口缩放工具,内置了多种高效的缩放 Serdar Sağlam
Serdar Sağlam

🌍 Andrus Diaz German
Andrus Diaz German

🌍 Kefir2105
Kefir2105

🌍 + animeojisan
animeojisan

🌍 + MuscularPuky
MuscularPuky

🌍 + + + Zoommod
Zoommod

🌍 diff --git a/img/Main window.png b/img/Main window.png index 84b6b118e..7c4a997f8 100644 Binary files a/img/Main window.png and b/img/Main window.png differ diff --git "a/img/\344\270\273\347\252\227\345\217\243.png" "b/img/\344\270\273\347\252\227\345\217\243.png" index f97dcc2c0..a26c96306 100644 Binary files "a/img/\344\270\273\347\252\227\345\217\243.png" and "b/img/\344\270\273\347\252\227\345\217\243.png" differ diff --git a/src/CONAN_INSTALL/ConanInstall.bat b/src/CONAN_INSTALL/ConanInstall.bat index 2aad4c3f3..701fdf780 100644 --- a/src/CONAN_INSTALL/ConanInstall.bat +++ b/src/CONAN_INSTALL/ConanInstall.bat @@ -9,14 +9,22 @@ conan config set storage.path=%CD%\..\..\.conan\data IF %1 == Debug ( IF %2 == x64 ( - conan install ..\conanfile.py --install-folder ..\..\.conan\x64\Debug --build=outdated -s build_type=Debug -s arch=x86_64 -s compiler.version=17 -s compiler.runtime=MTd --update + conan install ..\Magpie\conanfile.txt --install-folder ..\..\.conan\x64\Debug\Magpie --build=outdated -s build_type=Debug -s arch=x86_64 -s compiler.version=17 -s compiler.runtime=MTd --update + conan install ..\Magpie.Core\conanfile.txt --install-folder ..\..\.conan\x64\Debug\Magpie.Core --build=outdated -s build_type=Debug -s arch=x86_64 -s compiler.version=17 -s compiler.runtime=MTd --update + conan install ..\Magpie.App\conanfile.txt --install-folder ..\..\.conan\x64\Debug\Magpie.App --build=outdated -s build_type=Debug -s arch=x86_64 -s compiler.version=17 -s compiler.runtime=MTd --update ) ELSE ( - conan install ..\conanfile.py --install-folder ..\..\.conan\ARM64\Debug --build=outdated -s build_type=Debug -s arch=armv8 -s compiler.version=17 -s compiler.runtime=MTd --update + conan install ..\Magpie\conanfile.txt --install-folder ..\..\.conan\ARM64\Debug\Magpie --build=outdated -s build_type=Debug -s arch=armv8 -s compiler.version=17 -s compiler.runtime=MTd --update + conan install ..\Magpie.Core\conanfile.txt --install-folder ..\..\.conan\ARM64\Debug\Magpie.Core --build=outdated -s build_type=Debug -s arch=armv8 -s compiler.version=17 -s compiler.runtime=MTd --update + conan install ..\Magpie.App\conanfile.txt --install-folder ..\..\.conan\ARM64\Debug\Magpie.App --build=outdated -s build_type=Debug -s arch=armv8 -s compiler.version=17 -s compiler.runtime=MTd --update ) ) ELSE ( IF %2 == x64 ( - conan install ..\conanfile.py --install-folder ..\..\.conan\x64\Release --build=outdated -s build_type=Release -s arch=x86_64 -s compiler.version=17 -s compiler.runtime=MT --update + conan install ..\Magpie\conanfile.txt --install-folder ..\..\.conan\x64\Release\Magpie --build=outdated -s build_type=Release -s arch=x86_64 -s compiler.version=17 -s compiler.runtime=MT --update + conan install ..\Magpie.Core\conanfile.txt --install-folder ..\..\.conan\x64\Release\Magpie.Core --build=outdated -s build_type=Release -s arch=x86_64 -s compiler.version=17 -s compiler.runtime=MT --update + conan install ..\Magpie.App\conanfile.txt --install-folder ..\..\.conan\x64\Release\Magpie.App --build=outdated -s build_type=Release -s arch=x86_64 -s compiler.version=17 -s compiler.runtime=MT --update ) ELSE ( - conan install ..\conanfile.py --install-folder ..\..\.conan\ARM64\Release --build=outdated -s build_type=Release -s arch=armv8 -s compiler.version=17 -s compiler.runtime=MT --update + conan install ..\Magpie\conanfile.txt --install-folder ..\..\.conan\ARM64\Release\Magpie --build=outdated -s build_type=Release -s arch=armv8 -s compiler.version=17 -s compiler.runtime=MT --update + conan install ..\Magpie.Core\conanfile.txt --install-folder ..\..\.conan\ARM64\Release\Magpie.Core --build=outdated -s build_type=Release -s arch=armv8 -s compiler.version=17 -s compiler.runtime=MT --update + conan install ..\Magpie.App\conanfile.txt --install-folder ..\..\.conan\ARM64\Release\Magpie.App --build=outdated -s build_type=Release -s arch=armv8 -s compiler.version=17 -s compiler.runtime=MT --update ) ) diff --git a/src/Effects/Anime4K/Anime4K_Restore_Soft_UL.hlsl b/src/Effects/Anime4K/Anime4K_Restore_Soft_UL.hlsl new file mode 100644 index 000000000..88982a215 --- /dev/null +++ b/src/Effects/Anime4K/Anime4K_Restore_Soft_UL.hlsl @@ -0,0 +1,2173 @@ +// Anime4K_Restore_CNN_Soft_UL +// Ported from https://github.com/bloc97/Anime4K/blob/4ba94b179a144200cb6b3052e690fe2ca5c6914c/glsl/Restore/Anime4K_Restore_CNN_Soft_UL.glsl + +//!MAGPIE EFFECT +//!VERSION 3 +//!OUTPUT_WIDTH INPUT_WIDTH +//!OUTPUT_HEIGHT INPUT_HEIGHT +//!SORT_NAME Anime4K_Restore_Soft_3 + + +//!TEXTURE +Texture2D INPUT; + +//!SAMPLER +//!FILTER POINT +SamplerState sam; + +//!TEXTURE +//!WIDTH INPUT_WIDTH +//!HEIGHT INPUT_HEIGHT +//!FORMAT R16G16B16A16_FLOAT +Texture2D tex1; + +//!TEXTURE +//!WIDTH INPUT_WIDTH +//!HEIGHT INPUT_HEIGHT +//!FORMAT R16G16B16A16_FLOAT +Texture2D tex2; + +//!TEXTURE +//!WIDTH INPUT_WIDTH +//!HEIGHT INPUT_HEIGHT +//!FORMAT R16G16B16A16_FLOAT +Texture2D tex3; + +//!TEXTURE +//!WIDTH INPUT_WIDTH +//!HEIGHT INPUT_HEIGHT +//!FORMAT R16G16B16A16_FLOAT +Texture2D tex4; + +//!TEXTURE +//!WIDTH INPUT_WIDTH +//!HEIGHT INPUT_HEIGHT +//!FORMAT R16G16B16A16_FLOAT +Texture2D tex5; + +//!TEXTURE +//!WIDTH INPUT_WIDTH +//!HEIGHT INPUT_HEIGHT +//!FORMAT R16G16B16A16_FLOAT +Texture2D tex6; + +//!TEXTURE +//!WIDTH INPUT_WIDTH +//!HEIGHT INPUT_HEIGHT +//!FORMAT R16G16B16A16_FLOAT +Texture2D tex7; + +//!TEXTURE +//!WIDTH INPUT_WIDTH +//!HEIGHT INPUT_HEIGHT +//!FORMAT R16G16B16A16_FLOAT +Texture2D tex8; + +//!PASS 1 +//!DESC Conv-4x3x3x3 +//!IN INPUT +//!OUT tex1, tex2, tex3 +//!BLOCK_SIZE 16 +//!NUM_THREADS 64 + +void Pass1(uint2 blockStart, uint3 threadId) { + uint2 gxy = (Rmp8x8(threadId.x) << 1) + blockStart; + uint2 inputSize = GetInputSize(); + if (gxy.x >= inputSize.x || gxy.y >= inputSize.y) { + return; + } + + float2 inputPt = GetInputPt(); + + uint i, j; + + float3 src[4][4]; + [unroll] + for (i = 0; i <= 2; i += 2) { + [unroll] + for (j = 0; j <= 2; j += 2) { + float2 tpos = (gxy + uint2(i, j)) * inputPt; + const float4 sr = INPUT.GatherRed(sam, tpos); + const float4 sg = INPUT.GatherGreen(sam, tpos); + const float4 sb = INPUT.GatherBlue(sam, tpos); + + // w z + // x y + src[i][j] = float3(sr.w, sg.w, sb.w); + src[i][j + 1] = float3(sr.x, sg.x, sb.x); + src[i + 1][j] = float3(sr.z, sg.z, sb.z); + src[i + 1][j + 1] = float3(sr.y, sg.y, sb.y); + } + } + + [unroll] + for (i = 1; i <= 2; ++i) { + [unroll] + for (j = 1; j <= 2; ++j) { + uint2 destPos = gxy + uint2(i - 1, j - 1); + + if (i != 1 || j != 1) { + if (destPos.x >= inputSize.x || destPos.y >= inputSize.y) { + continue; + } + } + + float4 target1 = mul(src[i - 1][j - 1], float3x4(-0.23234928, -0.070085905, 0.0040122913, 0.21575761, -0.25936925, -0.20185155, 0.022299573, 0.2812235, -0.11045535, -0.11106335, -0.12113332, -0.49919847)); + target1 += mul(src[i - 1][j], float3x4(-0.48585954, -0.058959674, 0.11114158, -0.1971666, -0.24872562, 0.2667282, -0.107163996, 0.12475151, -0.027792914, -0.06700173, -0.10966316, 0.09399147)); + target1 += mul(src[i - 1][j + 1], float3x4(-0.16666615, -0.15644506, 0.048309084, 0.19122206, -0.1522582, 0.15417537, -0.23017146, 0.09460856, 0.074704535, 0.2168164, 0.2077189, -0.29264635)); + target1 += mul(src[i][j - 1], float3x4(0.3167284, -0.20522436, -0.050071932, -0.036290437, 0.20206359, 0.012589764, -0.1251284, -0.2911492, -0.0006390347, -0.09853893, 0.14406726, 0.33612582)); + target1 += mul(src[i][j], float3x4(0.13786903, 0.51342535, -0.44004235, -0.23918492, 0.5614157, 0.011565876, 0.5419984, -0.15937872, -0.075360805, 0.018496322, 0.12582661, 0.40117717)); + target1 += mul(src[i][j + 1], float3x4(0.19644158, 0.12697817, 0.15092115, 0.1963961, -0.03395398, -0.17465135, -0.04086773, 0.09187623, 0.18238129, -0.0063141263, -0.26402372, -0.28761682)); + target1 += mul(src[i + 1][j - 1], float3x4(-0.010849395, 0.15082607, 0.095264904, -0.038952388, -0.1121466, 0.21590506, 0.029462064, -0.65400773, 0.18295552, 0.2425088, 0.121624336, 0.7189011)); + target1 += mul(src[i + 1][j], float3x4(0.17197245, -0.04397748, 0.18232836, -0.04471754, 0.071163684, -0.20590816, 0.39706057, -0.5452873, -0.11754515, 0.006909551, 0.018450081, 0.5686299)); + target1 += mul(src[i + 1][j + 1], float3x4(0.077441245, -0.25645187, -0.19979256, -0.010363122, -0.04312338, -0.08810754, -0.059999906, 0.38630447, -0.11017497, -0.16309647, 0.026156282, -0.35432625)); + target1 += float4(-0.03509807, 0.029998481, -0.08691994, -0.017055636); + + float4 target2 = mul(src[i - 1][j - 1], float3x4(-0.34123975, 0.06927292, 0.12252625, 0.1038146, 0.15979475, 0.24436772, -0.016088272, -0.22664197, 0.16932374, 0.10719134, -0.16895153, 0.100098394)); + target2 += mul(src[i - 1][j], float3x4(0.11094869, -0.1379463, -0.53625333, -0.42690855, 0.12101115, -0.004709155, 0.6293494, 0.4763549, 0.030926082, -0.20099613, 0.39174548, 0.31219363)); + target2 += mul(src[i - 1][j + 1], float3x4(0.08731028, -0.010540878, 0.0757335, -0.1466203, -0.23115048, -0.17813745, 0.17698573, 0.18787299, 0.16219892, 0.10475756, -0.23984352, 0.025724094)); + target2 += mul(src[i][j - 1], float3x4(0.27665043, 0.4118298, -0.08762915, -0.07885308, 0.05053698, 0.28148478, -0.005842398, 0.15139125, -0.3791668, 0.24871133, 0.18160823, -0.10384939)); + target2 += mul(src[i][j], float3x4(-0.3206045, -0.22038852, -0.3038138, -0.0482595, -0.26852164, -0.23278148, 0.30639926, 0.2578657, -0.3874695, 0.06441954, 0.00026220892, 0.04361178)); + target2 += mul(src[i][j + 1], float3x4(-0.17908047, -0.0900835, 0.00652168, -0.038639892, 0.1520494, -0.13204975, -0.020355739, 0.26766944, 0.021308672, -0.31918222, 0.050667368, 0.10367864)); + target2 += mul(src[i + 1][j - 1], float3x4(-0.112388864, 0.053321466, 0.2691917, 0.26902813, 0.010105532, 0.24898581, -0.13757521, -0.10214595, 0.23615286, -0.09560994, -0.15046176, -0.08853913)); + target2 += mul(src[i + 1][j], float3x4(-0.36796987, -0.2124952, -0.07535088, 0.13065732, -0.21852261, 0.06934692, -0.013749303, -0.44900006, 0.3352218, 0.090437174, 0.08993535, -0.3050165)); + target2 += mul(src[i + 1][j + 1], float3x4(0.11873657, 0.13483031, 0.22352207, 0.23666611, 0.18977334, -0.32066482, -0.31396368, -0.5615055, -0.14588253, 0.0121516865, 0.0614425, -0.079611346)); + target2 += float4(0.6537504, 0.07195351, -0.38729003, -0.0374416); + + float4 target3 = mul(src[i - 1][j - 1], float3x4(0.16112354, 0.3756035, 0.09619928, 0.17283864, 0.054338567, -0.061197184, -0.10173672, -0.032733057, -0.111913994, -0.28940153, -0.062114924, 0.20520677)); + target3 += mul(src[i - 1][j], float3x4(0.3500745, 0.467141, -0.101748556, 0.43384346, 0.06712478, -0.43235737, 0.014446082, -0.12634972, -0.07507498, 0.025314584, 0.22664048, 0.22121347)); + target3 += mul(src[i - 1][j + 1], float3x4(-0.089320965, 0.319314, -0.06869195, -0.2465581, 0.449762, -0.38919032, 0.1562217, 0.05368933, 0.20758076, 0.0659555, -0.109858744, -0.114917934)); + target3 += mul(src[i][j - 1], float3x4(-0.07451217, 0.2239877, -0.009071173, 0.21869898, 0.042301223, 0.13635477, -0.20052543, 0.26130545, -0.051627826, -0.3429969, 0.093028575, -0.35710186)); + target3 += mul(src[i][j], float3x4(-0.16129561, -0.31247056, -0.123016216, 0.2122524, -0.2972285, 0.2718142, -0.17284301, 0.44368207, -0.032497104, 0.18240568, -0.28283152, -0.10045272)); + target3 += mul(src[i][j + 1], float3x4(0.15945031, -0.6797371, 0.3974546, 0.24741851, -0.1340806, 0.41666976, 0.27850744, -0.21406768, 0.096567124, 0.23366652, 0.15648519, -0.07626781)); + target3 += mul(src[i + 1][j - 1], float3x4(-0.053246673, 0.14282355, -0.114118166, -0.3172004, -0.18055372, -0.3400759, -0.19622837, 0.076828666, 0.29225305, 0.14866155, 0.07959014, -0.041400358)); + target3 += mul(src[i + 1][j], float3x4(-0.25331625, -0.14193451, 0.04879846, -0.077393495, 0.0104558095, 0.37905747, -0.07880302, -0.09453499, -0.1426901, -0.19738746, -0.28036812, 0.03675319)); + target3 += mul(src[i + 1][j + 1], float3x4(-0.08954212, -0.47161737, -0.12388452, -0.08005436, 0.04682568, 0.048485547, 0.31411946, -0.31375095, -0.22892538, 0.16906887, 0.16802602, 0.18711087)); + target3 += float4(-0.04453386, 0.06632044, 0.061607827, -0.19856223); + + tex1[destPos] = target1; + tex2[destPos] = target2; + tex3[destPos] = target3; + } + } +} + +//!PASS 2 +//!DESC Conv-4x3x3x24 +//!IN tex1, tex2, tex3 +//!OUT tex4, tex5, tex6 +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 + +void Pass2(uint2 blockStart, uint3 threadId) { + uint2 gxy = Rmp8x8(threadId.x) + blockStart; + uint2 inputSize = GetInputSize(); + if (gxy.x >= inputSize.x || gxy.y >= inputSize.y) { + return; + } + + float2 inputPt = GetInputPt(); + float2 pos = (gxy + 0.5f) * inputPt; + + // [ a, d, g ] + // [ b, e, h ] + // [ c, f, i ] + float4 a1 = tex1.SampleLevel(sam, pos + float2(-inputPt.x, -inputPt.y), 0); + float4 b1 = tex1.SampleLevel(sam, pos + float2(-inputPt.x, 0), 0); + float4 c1 = tex1.SampleLevel(sam, pos + float2(-inputPt.x, inputPt.y), 0); + float4 d1 = tex1.SampleLevel(sam, pos + float2(0, -inputPt.y), 0); + float4 e1 = tex1.SampleLevel(sam, pos, 0); + float4 f1 = tex1.SampleLevel(sam, pos + float2(0, inputPt.y), 0); + float4 g1 = tex1.SampleLevel(sam, pos + float2(inputPt.x, -inputPt.y), 0); + float4 h1 = tex1.SampleLevel(sam, pos + float2(inputPt.x, 0), 0); + float4 i1 = tex1.SampleLevel(sam, pos + float2(inputPt.x, inputPt.y), 0); + + float4 na1 = max(-a1, 0); + float4 nb1 = max(-b1, 0); + float4 nc1 = max(-c1, 0); + float4 nd1 = max(-d1, 0); + float4 ne1 = max(-e1, 0); + float4 nf1 = max(-f1, 0); + float4 ng1 = max(-g1, 0); + float4 nh1 = max(-h1, 0); + float4 ni1 = max(-i1, 0); + + a1 = max(a1, 0); + b1 = max(b1, 0); + c1 = max(c1, 0); + d1 = max(d1, 0); + e1 = max(e1, 0); + f1 = max(f1, 0); + g1 = max(g1, 0); + h1 = max(h1, 0); + i1 = max(i1, 0); + + float4 a2 = tex2.SampleLevel(sam, pos + float2(-inputPt.x, -inputPt.y), 0); + float4 b2 = tex2.SampleLevel(sam, pos + float2(-inputPt.x, 0), 0); + float4 c2 = tex2.SampleLevel(sam, pos + float2(-inputPt.x, inputPt.y), 0); + float4 d2 = tex2.SampleLevel(sam, pos + float2(0, -inputPt.y), 0); + float4 e2 = tex2.SampleLevel(sam, pos, 0); + float4 f2 = tex2.SampleLevel(sam, pos + float2(0, inputPt.y), 0); + float4 g2 = tex2.SampleLevel(sam, pos + float2(inputPt.x, -inputPt.y), 0); + float4 h2 = tex2.SampleLevel(sam, pos + float2(inputPt.x, 0), 0); + float4 i2 = tex2.SampleLevel(sam, pos + float2(inputPt.x, inputPt.y), 0); + + float4 na2 = max(-a2, 0); + float4 nb2 = max(-b2, 0); + float4 nc2 = max(-c2, 0); + float4 nd2 = max(-d2, 0); + float4 ne2 = max(-e2, 0); + float4 nf2 = max(-f2, 0); + float4 ng2 = max(-g2, 0); + float4 nh2 = max(-h2, 0); + float4 ni2 = max(-i2, 0); + + a2 = max(a2, 0); + b2 = max(b2, 0); + c2 = max(c2, 0); + d2 = max(d2, 0); + e2 = max(e2, 0); + f2 = max(f2, 0); + g2 = max(g2, 0); + h2 = max(h2, 0); + i2 = max(i2, 0); + + float4 a3 = tex3.SampleLevel(sam, pos + float2(-inputPt.x, -inputPt.y), 0); + float4 b3 = tex3.SampleLevel(sam, pos + float2(-inputPt.x, 0), 0); + float4 c3 = tex3.SampleLevel(sam, pos + float2(-inputPt.x, inputPt.y), 0); + float4 d3 = tex3.SampleLevel(sam, pos + float2(0, -inputPt.y), 0); + float4 e3 = tex3.SampleLevel(sam, pos, 0); + float4 f3 = tex3.SampleLevel(sam, pos + float2(0, inputPt.y), 0); + float4 g3 = tex3.SampleLevel(sam, pos + float2(inputPt.x, -inputPt.y), 0); + float4 h3 = tex3.SampleLevel(sam, pos + float2(inputPt.x, 0), 0); + float4 i3 = tex3.SampleLevel(sam, pos + float2(inputPt.x, inputPt.y), 0); + + float4 na3 = max(-a3, 0); + float4 nb3 = max(-b3, 0); + float4 nc3 = max(-c3, 0); + float4 nd3 = max(-d3, 0); + float4 ne3 = max(-e3, 0); + float4 nf3 = max(-f3, 0); + float4 ng3 = max(-g3, 0); + float4 nh3 = max(-h3, 0); + float4 ni3 = max(-i3, 0); + + a3 = max(a3, 0); + b3 = max(b3, 0); + c3 = max(c3, 0); + d3 = max(d3, 0); + e3 = max(e3, 0); + f3 = max(f3, 0); + g3 = max(g3, 0); + h3 = max(h3, 0); + i3 = max(i3, 0); + + float4 target1 = mul(a1, float4x4(-0.09962672, -0.09808486, 0.14167309, 0.050132442, 0.10861549, -0.03472704, -0.13705672, -0.029933043, 0.09841877, 0.07278074, -0.017292077, -0.027848938, 0.07552298, 0.076578915, -0.023463586, 0.052939452)); + target1 += mul(b1, float4x4(0.0010984733, -0.17330085, -0.08229318, -0.2175911, 0.08144593, 0.059445348, -0.15086831, -0.10372944, 0.117648594, -0.12558225, -0.11103407, -0.0701386, -0.05065664, -0.07396901, -0.11938091, 0.039866682)); + target1 += mul(c1, float4x4(0.15428792, 0.23440446, 0.21962269, 0.2650896, -0.03476033, 0.15719226, -0.12486064, 0.2167058, -0.023046771, -0.20562397, 0.10107006, -0.01569021, 0.16730824, -0.01259593, 0.053364236, -0.04500823)); + target1 += mul(d1, float4x4(0.030429626, -0.13110232, 0.057990804, 0.011675255, -0.05295247, -0.15326303, 0.22707884, -0.07973966, 0.0439027, -0.13198115, 0.07837125, -0.07131822, 0.05269012, -0.2104038, 0.048907652, -0.020645073)); + target1 += mul(e1, float4x4(0.0031781355, -0.021097122, -0.26952672, -0.3207644, 0.08375256, -0.14136748, 0.18542029, 0.15215854, 0.091964215, 0.26967737, 0.0587766, -0.07700872, 0.16575423, 0.35469708, -0.0051588053, -0.0006740279)); + target1 += mul(f1, float4x4(-0.08884001, 0.14041351, 0.17474355, 0.4161406, 0.023943432, 0.003970282, 0.29985484, 0.10266973, -0.25273883, -0.14029191, 0.11345857, 0.31820163, -0.38953283, 0.2583901, 0.009964725, 0.058217626)); + target1 += mul(g1, float4x4(-0.2032424, -0.07082582, 0.1580928, -0.048965808, 0.2141858, -0.041104354, -0.034682848, -0.15914723, 0.04790725, -0.024282899, 0.07099358, 0.16498338, -0.112657525, -0.0616071, -0.008030092, -0.016227499)); + target1 += mul(h1, float4x4(0.061936792, 0.21455337, 0.48054412, -0.04828425, 0.010028972, 0.11099989, 0.095458575, -0.19660684, 0.0425463, 0.11828354, -0.124904655, -0.17428195, 0.011525431, -0.124187276, -0.04230918, -0.035113487)); + target1 += mul(i1, float4x4(0.31514692, 0.08103313, 0.11659174, 0.08965401, -0.1970772, 0.14856051, -0.1938787, -0.16033082, -0.18799798, 0.030507786, -0.16664562, -0.13656873, 0.17780142, -0.25997472, 0.026064966, 0.011898)); + target1 += mul(a2, float4x4(-0.057235047, -0.27046695, -0.010699785, 0.049249526, 0.047039963, -0.077151395, -0.14362605, 0.06164646, 0.114476524, -0.17911421, -0.08053587, 0.11165565, -0.09624257, 0.025738657, -0.103865884, -0.03431851)); + target1 += mul(b2, float4x4(0.14757289, -0.011688203, 0.13329901, -0.117047496, 0.0012821602, 0.1926134, -0.20751058, -0.07072285, 0.010413468, 0.056632243, 0.115734495, -0.02967846, -0.03047392, -0.21189988, -0.0011950757, -0.19957498)); + target1 += mul(c2, float4x4(-0.08746177, -0.12079578, 0.04276918, -0.005454131, 0.11490783, -0.12847133, -0.09437031, -0.30269814, -0.21966903, -0.19212759, 0.02010421, -0.041956432, 0.10274604, -0.29135153, -0.05896102, -0.23609753)); + target1 += mul(d2, float4x4(0.041169632, -0.1239918, 0.11654365, 0.12085256, -0.16491309, 0.16958053, 0.08106695, 0.017548209, 0.005812545, 0.18601535, -0.26115587, 0.06350569, -0.05120703, 0.288068, -0.10665016, 0.14517978)); + target1 += mul(e2, float4x4(-0.031448353, 0.10505269, -0.11342215, 0.066149935, -0.11060372, 0.023158634, 0.112362646, -0.12653005, 0.10593459, 0.16429284, 0.105653964, 0.057039205, 0.43216446, 0.40089405, -0.13454677, 0.10088736)); + target1 += mul(f2, float4x4(0.006024284, -0.085603446, -0.03500259, -0.12583484, 0.037410516, -0.162403, -0.16079305, -0.40704638, -0.02878602, -0.05373755, 0.22466864, -0.18264142, 0.006703932, -0.2611284, 0.12246666, -0.09028182)); + target1 += mul(g2, float4x4(-0.060709704, -0.10833455, 0.057897534, 0.13747421, 0.023012483, 0.037656587, 0.14315368, -0.016442677, 0.047911663, -0.0069572264, 0.044352237, 0.3486672, -0.21061146, 0.09642802, 0.05590367, -0.060553044)); + target1 += mul(h2, float4x4(0.04283378, 0.24103515, -0.13148557, 0.010205976, -0.043310534, 0.10729743, 0.038866118, 0.18446185, -0.01657694, 0.1901015, 0.07020068, 0.12353552, 0.038972974, 0.23214848, -0.25911716, -0.019023877)); + target1 += mul(i2, float4x4(0.12810664, 0.2588679, -0.01086673, -0.028045006, 0.19610372, -0.096308656, 0.0042522033, 0.13961965, 0.11584688, 0.04171374, -0.22028726, -0.24815048, 0.18253902, -0.2803496, 0.04638075, 0.036636963)); + target1 += mul(a3, float4x4(-0.063880704, -0.1977201, -0.053342164, -0.066917926, -0.11009935, 0.17052847, 0.04694616, 0.07041865, 0.0104053635, 0.17147705, 0.14641339, 0.02914492, 0.02223927, -0.15581869, 0.0073570404, -0.092718706)); + target1 += mul(b3, float4x4(-0.11074706, 0.09035497, 0.041304804, -0.05657743, 0.02258131, 0.15751973, -0.08892718, 0.09498991, -0.062650494, 0.1528085, 0.08637203, 0.015458079, 0.080385685, 0.0014520894, -0.1777884, -0.022080136)); + target1 += mul(c3, float4x4(-0.12261772, 0.14604463, -0.30844545, -0.038277622, -0.03465457, -0.14419939, 0.08843366, -0.24528691, 0.08627054, 0.022934042, 0.065465, 0.08992177, 0.13908626, 0.29170883, 0.18499602, 0.44779378)); + target1 += mul(d3, float4x4(0.2403803, -0.034265775, 0.061548065, -0.2871231, 0.06244344, 0.55960923, 0.10674182, -0.099105835, 0.067223154, -0.016005594, -0.18609367, 0.068283536, 0.16862819, -0.35648894, 0.15355636, -0.21434662)); + target1 += mul(e3, float4x4(-0.1928663, -0.08712358, 0.010059887, 0.041675188, 0.028285503, 0.27573827, -0.13980475, 0.020420022, 0.08173396, -0.18047802, 0.14453442, 0.1705434, 0.032467145, -0.25624174, -0.091417946, -0.1830734)); + target1 += mul(f3, float4x4(-0.07378673, 0.0082734935, -0.0031403562, -0.09405621, -0.04572997, -0.47891915, 0.022257643, -0.18141934, -0.15467338, -0.080856316, 0.22424543, 0.1328784, -0.011105831, 0.012753231, -0.18666203, 0.29024994)); + target1 += mul(g3, float4x4(-0.014239724, 0.17424577, 0.04347437, -0.07241822, -0.0043192226, -0.15224636, -0.12850569, -0.07176244, -0.024936391, 0.1081912, -0.0634437, -0.17714879, 0.06807449, 0.036505345, 0.1765435, -0.06827722)); + target1 += mul(h3, float4x4(-0.10896065, -0.113828, -0.044186924, 0.083636716, 0.00946172, -0.096768014, 0.1477472, 0.28581375, 0.09928998, -0.03573682, -0.0877059, -0.07456346, -0.094931394, -0.29481927, 0.035076067, -0.030719504)); + target1 += mul(i3, float4x4(0.06879136, -0.0013524323, -0.015930668, 0.011338774, 0.27078402, -0.036486305, 0.07307458, -0.03654178, -0.1821915, -0.19957519, 0.047258675, -0.012780178, -0.23570615, 0.23241185, -0.049822707, -0.004932543)); + target1 += mul(na1, float4x4(0.059442203, 0.123758584, -0.0120902015, -0.035207815, -0.3852069, 0.02184997, 0.17941254, -0.060605425, -0.071601346, -0.07984123, -0.043631997, 0.050046816, 0.100848526, -0.1991431, 0.012486262, -0.12679099)); + target1 += mul(nb1, float4x4(-0.10241958, 0.14548102, 0.17390133, 0.11916023, -0.124270104, -0.016538827, 0.14511214, -0.11671281, -0.21087177, -0.06974753, 0.012906925, 0.13859452, -0.08547768, 0.1567956, -0.2022433, 0.038497575)); + target1 += mul(nc1, float4x4(0.07510719, -0.12558976, 0.27779973, 0.07905847, -0.005560809, -0.13164681, 0.0026637863, -0.42023313, 0.30791378, 0.0674288, 0.16762452, 0.03776929, 0.054378655, 0.12892224, 0.14568421, 0.057358757)); + target1 += mul(nd1, float4x4(-0.055342264, 0.17539698, -0.07691367, -0.016426053, -0.10654331, 0.12799862, 0.08000128, -0.026672266, -0.09276648, 0.08326771, -0.07549073, 0.09110558, 0.025476933, 0.23758717, -0.08576679, 0.05389538)); + target1 += mul(ne1, float4x4(0.13494995, -0.058528826, 0.0859778, 0.36369404, 0.20959967, 0.04463818, -0.10268673, -0.17128421, 0.12091434, -0.23517689, -0.006012021, -0.13097133, 0.07197561, -0.16344362, 0.10873641, 0.08921942)); + target1 += mul(nf1, float4x4(0.021762112, -0.003690478, 0.36574113, 0.008322902, 0.19321395, 0.04774496, -0.22579306, -0.19404013, 0.06938985, 0.15104407, -0.046889, -0.117904656, -0.14408903, -0.18670367, 0.16157444, -0.103656925)); + target1 += mul(ng1, float4x4(0.10242334, -0.055725146, -0.21333602, 0.010575543, -0.23961566, 0.0044566356, 0.39897293, 0.08584577, -0.23019423, 0.2032861, -0.18542935, -0.1764838, -0.13681203, -0.07769402, 0.03816189, 0.007777049)); + target1 += mul(nh1, float4x4(-0.028709, -0.16470426, -0.212036, 0.03143696, 0.27199176, -0.17678891, 0.23327425, 0.12954381, -0.020772377, -0.17467533, 0.13100848, 0.2351719, 0.097517245, 0.050158583, -0.002071869, 0.04241593)); + target1 += mul(ni1, float4x4(0.07411962, -0.08748965, 0.07468962, -0.22070734, 0.40794817, -0.088459395, 0.32936516, -0.032707095, 0.37608266, 0.027920008, 0.07734025, 0.08530036, 0.10898109, 0.22703189, -0.20785971, -0.06495064)); + target1 += mul(na2, float4x4(0.29293463, 0.16721301, -0.12183638, -0.03948546, 0.01529436, 0.078094184, -0.025749328, -0.006153496, -0.094414495, 0.22237025, 0.028131692, -0.060007866, 0.034187492, -0.116286926, 0.06509088, -0.048549082)); + target1 += mul(nb2, float4x4(0.008423889, -0.3957667, -0.049811136, 0.14082848, 0.09263845, -0.16698493, -0.025629787, 0.015054379, 0.028197043, 0.068465285, -0.08725762, 0.036668878, -0.062005505, 0.0764588, -0.054699335, -0.003840703)); + target1 += mul(nc2, float4x4(0.043419074, -0.20948833, -0.14390363, -0.17659377, -0.065787576, 0.06486438, -0.19382884, 0.08338218, 0.13709012, 0.21116447, -0.24534407, 0.20671941, -0.13327736, 0.2553412, -0.03380571, 0.2106275)); + target1 += mul(nd2, float4x4(0.32056695, -0.28739846, -0.008697179, -0.3094155, -0.12655911, -0.22508456, 0.046275456, -0.13609526, -0.056746602, -0.13714787, 0.006273007, -0.15033242, 0.19861896, -0.19801322, 0.008556289, 0.053491425)); + target1 += mul(ne2, float4x4(0.018890936, -0.7917244, -0.014075563, -0.1700778, -0.039983913, 0.028458029, -0.1522347, -0.08251537, -0.013377933, -0.3029727, 0.1349085, -0.16240561, -0.20748827, -0.46068287, 0.00913134, 0.030452987)); + target1 += mul(nf2, float4x4(-0.05005734, -0.2148053, 0.032070015, 0.14438215, 0.31232053, 0.1401732, -0.26635718, 0.19424468, 0.07584618, 0.10555894, 0.01795741, 0.31067818, 0.054555204, 0.2563484, -0.14635237, -0.10759128)); + target1 += mul(ng2, float4x4(0.23083898, -0.32226348, 0.19888338, -0.38176686, 0.050134797, -0.0015203251, 0.112237535, 0.14811106, 0.2174096, -0.24344379, -0.13310412, -0.42385107, 0.050850198, -0.27200532, -0.052719057, 0.009228699)); + target1 += mul(nh2, float4x4(-0.053870313, -0.47212356, 0.085255414, -0.014404558, -0.06817252, -0.0973503, 0.1635136, -0.0033316084, -0.037195005, -0.48788953, 0.08273281, -0.097501226, 0.0600793, -0.21372889, 0.03384461, 0.017936382)); + target1 += mul(ni2, float4x4(-0.3313351, -0.45776972, 0.0009931794, 0.11343333, 0.033024788, 0.046712194, 0.04782013, 0.064249486, -0.22282073, 0.12655938, 0.19051406, 0.31040603, -0.07731221, 0.17658137, -0.103276245, -0.06792484)); + target1 += mul(na3, float4x4(0.14607549, 0.1872639, -0.093263544, 0.09774117, -0.11698756, -0.067545414, 0.0023156274, -0.18209848, 0.03853313, -0.2223309, 0.12031081, 0.042545635, -0.034479424, 0.124472, 0.06731187, 0.12712644)); + target1 += mul(nb3, float4x4(-0.07627082, -0.17452952, -0.33548403, -0.18450926, 0.18033943, -0.12326704, 0.019632008, 0.07248642, -0.16483006, -0.18913946, 0.19646043, 0.40187582, -0.13083674, 0.08671764, 0.15356278, 0.0077914116)); + target1 += mul(nc3, float4x4(-0.13629752, -0.13993968, 0.2731425, -0.041057866, -0.118738905, 0.21209033, -0.051054828, 0.31168184, -0.16392295, 0.010364939, 0.0857728, 0.024030814, -0.07311749, -0.24349305, -0.20305401, -0.43344042)); + target1 += mul(nd3, float4x4(0.14196202, -0.04678858, 0.0077786436, 0.072588876, 0.048406214, -0.812405, 0.08031392, -0.1540258, 0.11032359, -0.06004812, -0.32815942, 0.09877014, -0.16591738, 0.4435054, -0.20656855, 0.22537513)); + target1 += mul(ne3, float4x4(0.09432511, 0.19597436, -0.08628448, -0.21871169, -0.16537306, -0.32272846, 0.13009092, 0.010715842, 0.26118267, 0.22872354, 0.19176646, 0.107038476, 0.1611875, 0.08846044, 0.15163514, 0.008047941)); + target1 += mul(nf3, float4x4(-0.07396799, -0.03825365, 0.093083926, 0.051318448, 0.2838576, 0.5694332, -0.10403076, 0.19238624, 0.11968883, 0.11856581, -0.119746156, -0.082536116, 0.076429665, -0.02471431, 0.11962365, -0.17637646)); + target1 += mul(ng3, float4x4(0.07824961, -0.16634372, 0.027028812, -0.074860476, -0.14161688, 0.23795755, 0.02944209, 0.17723913, -0.30600172, -0.23468062, -0.12452985, -0.020646518, -0.0397737, 0.021050548, -0.17934813, 0.13230623)); + target1 += mul(nh3, float4x4(0.0424831, 0.106492884, -0.03483414, -0.017710585, 0.22700353, 0.20349082, -0.10986577, -0.3389828, -0.21730238, -0.00039746048, 0.07059067, 0.102562755, 0.30204043, 0.21475948, -0.0162173, -0.017118886)); + target1 += mul(ni3, float4x4(-0.22430925, -0.014225937, 0.094149694, -0.018336432, 0.17596604, 0.14860786, 0.05728594, 0.04178837, 0.1751472, 0.23511195, 0.020594316, 0.11539313, 0.12581828, -0.15684246, 0.02905791, -0.11784082)); + target1 += float4(-0.17880301, 0.20980668, -0.013683405, -0.015587634); + + float4 target2 = mul(a1, float4x4(-0.02899521, -0.05649066, -0.026947228, 0.048783254, -0.14916636, 0.24028979, 0.044600923, -0.045931537, -0.1705095, -0.27147427, 0.16703783, 0.058726057, 0.0032612043, 0.083603844, -0.25704128, 0.13329254)); + target2 += mul(b1, float4x4(-0.1979236, -0.01025661, -0.019716073, 0.108358726, 0.043820046, 0.1919281, -0.21771714, -0.1133059, -0.061171446, -0.0882054, -0.120655626, -0.11155759, -0.07786948, 0.011810883, 0.14344923, -0.26561305)); + target2 += mul(c1, float4x4(0.1894701, 0.0239954, -0.119104624, 0.0081936605, -0.090172075, 0.16750605, 0.07118662, -0.068179235, 0.11522273, 0.02271562, 0.09519474, -0.28372973, 0.0015472358, 0.026579062, 0.117233984, -0.4856576)); + target2 += mul(d1, float4x4(-0.14819643, -0.31534502, -0.13870765, -0.01054195, -0.19450842, 0.10115552, 0.15510698, 0.003614742, -0.07340832, -0.20358734, -0.12068221, 0.1708203, -0.04059514, 0.05221531, 0.1185381, 0.0068877796)); + target2 += mul(e1, float4x4(-0.2649358, 0.2787165, 0.026068278, 0.05054382, 0.042817205, -0.13016234, 0.0052052587, 0.0671692, 0.10290017, 0.06727616, -0.025898565, -0.03125075, 0.1502351, -0.17578806, -0.07915442, -0.20580369)); + target2 += mul(f1, float4x4(0.01980342, 0.07163837, -0.10456945, 0.06892928, -0.00022086082, -0.122014746, -0.11635255, -0.050526325, 0.11869723, 0.07118713, 0.10652823, -0.21519308, -0.048316743, -0.09710376, 0.006049279, -0.15725243)); + target2 += mul(g1, float4x4(0.17198269, -0.04094963, -0.16597614, -0.022672966, -0.021484226, -0.07138965, 0.067678355, 0.010858899, -0.13862544, 0.06384301, -0.03991444, 0.22539167, -0.005830964, -0.093598455, 0.10466667, 0.19629909)); + target2 += mul(h1, float4x4(0.040208396, -0.0077782017, 0.026934639, -0.08231454, 0.122154236, -0.20185019, -0.04921797, 0.113472804, 0.025262907, 0.30940935, -0.0067619407, 0.011076865, -0.037738938, 0.22040449, -0.091454595, 0.08720387)); + target2 += mul(i1, float4x4(-0.19777842, 0.15188776, -0.112971924, 0.06551624, 0.21511264, -0.12696353, 0.05734954, 0.038562097, 0.09721514, 0.12184754, 0.098125674, 0.093547106, 0.04148773, -0.007749207, -0.097304046, 0.11741999)); + target2 += mul(a2, float4x4(-0.05388486, -0.15493694, -0.11779907, -0.063636035, 0.1683663, -0.19863462, 0.079785384, 0.002344284, 0.07419801, -0.18906172, 0.042702213, -0.106039785, -0.11761329, -0.34240028, 0.20399906, 0.19486815)); + target2 += mul(b2, float4x4(-0.0214746, 0.024925156, 0.071194954, 0.06452649, -0.10890589, -0.08571906, 0.13291912, -0.0013396982, 0.01863436, -0.20824501, 0.054323934, -0.23967488, 0.07283552, -0.28291726, 0.23057762, 0.121263705)); + target2 += mul(c2, float4x4(0.05597139, 0.07066334, 0.06768875, 0.01599472, -0.00039986568, -0.0053987154, 0.040123407, -0.100022465, -0.013812261, 0.050008554, 0.18786328, 0.0004141, 0.09763033, -0.2487105, 0.11663139, 0.05165497)); + target2 += mul(d2, float4x4(0.17904039, -0.31834564, -0.0737966, -0.061444905, -0.2252082, 0.00895136, 0.11486605, -0.0037112157, -0.07636511, -0.3503888, -0.04990528, -0.030310752, 0.068686, -0.3136087, -0.004038447, 0.12475536)); + target2 += mul(e2, float4x4(0.011218902, 0.16498409, -0.19213067, -0.3376179, -0.40268928, 0.009434513, -0.10950616, 0.1186675, -0.11379568, 0.23032996, -0.26904815, 0.30311096, 0.017041026, 0.39546305, -0.2145057, 0.20220405)); + target2 += mul(f2, float4x4(-0.116564326, 0.16520524, 0.25099444, -0.044852093, 0.04109138, -0.104986876, 0.09234278, -0.077715285, 0.046688464, 0.4072821, 0.021245886, -0.054421954, -0.12993707, 0.13713494, -0.12306372, 0.0076773493)); + target2 += mul(g2, float4x4(0.0022927783, -0.16100088, 0.0092022745, 0.043600008, -0.012064794, 0.14346212, 0.056605842, 0.04922658, 0.21234164, -0.36939904, -0.35937238, -0.0076974165, -0.033846013, -0.197686, 0.045169946, 0.05321761)); + target2 += mul(h2, float4x4(-0.12022473, 0.027450195, -0.070633, 0.010465206, -0.23977374, 0.008031643, -0.07748358, -0.12202592, -0.21730833, 0.0059398045, 0.40769234, -1.1242622, -0.06625515, 0.3264613, -0.07954283, 0.09583801)); + target2 += mul(i2, float4x4(0.008234909, 0.18505827, -0.1273086, 0.23858553, -0.00791922, 0.0122221485, -0.11842601, -0.038017634, 0.03933724, 0.2956, -0.01691444, 0.17929354, 0.015529619, 0.19893076, -0.16288021, 0.05490817)); + target2 += mul(a3, float4x4(-0.011399029, 0.10798575, 0.046656217, 0.032565042, 0.0119628515, -0.0011125325, 0.31439918, 0.09300187, -0.010849873, -0.060744617, 0.18471423, 0.15607913, -0.045522973, 0.16699308, -0.0722109, -0.024475403)); + target2 += mul(b3, float4x4(-0.082331106, -0.07089719, 0.1347553, -0.19314262, 0.0032955715, -0.24533619, 0.013174161, 0.15500104, 0.029693194, 0.040375546, -0.0059178416, 0.1092399, -0.112020314, 0.1500148, -0.22925867, -0.019879973)); + target2 += mul(c3, float4x4(0.1417249, 0.11215587, -0.26791674, 0.14707097, 0.040649403, -0.016661948, 0.15412898, -0.080876425, 0.035228007, 0.047104783, 0.06574109, -0.029853644, 0.05876159, 0.22823593, -0.19034219, 0.03162234)); + target2 += mul(d3, float4x4(0.2600437, 0.044771086, 0.014325027, 0.163108, 0.060724687, 0.09108473, -0.20747156, 0.0039435104, 0.18791565, -0.11700223, -0.0055135386, -0.024981469, -0.19696075, 0.11015166, -0.004077458, 0.011203278)); + target2 += mul(e3, float4x4(-0.05348392, 0.11058947, -0.11913848, 0.06359096, -0.13427798, -0.096259184, -0.122564375, 0.16873421, -0.021777656, 0.026404127, -0.19412898, -0.04525696, -0.089521095, -0.04556723, -0.14436369, 0.030330338)); + target2 += mul(f3, float4x4(-0.077864684, -0.0033614987, -0.053482026, -0.15834975, -0.12657848, 0.16701786, 0.040268235, -0.14463072, 0.01926974, -0.15924485, -0.011547801, -0.18185836, 0.030286407, -0.29259017, -0.0077412864, 0.037985537)); + target2 += mul(g3, float4x4(0.07485037, 0.19659927, 0.020025307, 0.10442409, -0.19772562, 0.4431493, -0.06422206, -0.045304112, -0.094377324, -0.04861216, 0.0023215367, 0.16513753, -0.1303532, -0.068101294, 0.017007684, 0.097332835)); + target2 += mul(h3, float4x4(-0.004584652, -0.2661271, 0.0063034855, 0.041456066, 0.11529073, 0.19888161, -0.24943323, -0.054349367, -0.010328835, 0.22214927, -0.20700802, -0.05599532, 0.24972723, -0.08987443, 0.20708983, -0.13030328)); + target2 += mul(i3, float4x4(0.10159776, 0.047147173, 0.1411316, -0.18355304, 0.07658331, -0.037969157, -0.074841976, 0.09781788, 0.06575143, 0.03210521, 0.058850992, -0.19939986, 0.11218086, -0.10744168, 0.14622156, 0.12941957)); + target2 += mul(na1, float4x4(0.13577162, 0.10681536, 0.08791653, -0.060445737, -0.19715475, -0.13252279, -0.036850456, -0.009957216, 0.1860376, 0.3743373, -0.14414039, 0.044343796, -0.05038453, -0.034720805, 0.17924316, 0.012001023)); + target2 += mul(nb1, float4x4(0.007108988, -0.09904293, -0.112725444, 0.031813867, -0.044795662, -0.14910372, 0.1680855, 0.32826513, 0.13105088, 0.11438789, -0.08039976, -1.1030464, 0.020364072, 0.19394659, 0.016075639, -0.22101837)); + target2 += mul(nc1, float4x4(-0.100025505, -0.06350414, 0.06775572, -0.07832278, 0.093700364, -0.15951614, 0.36111444, -0.20566626, -0.1011544, -0.047608454, -0.07719231, -0.71597475, 0.0048773736, 0.012542508, -0.26781914, -0.3445289)); + target2 += mul(nd1, float4x4(-0.050355583, 0.3859359, 0.08021888, 0.0031537602, 0.18742213, -0.30617613, -0.27419865, 0.18862267, -0.0011417761, 0.19679208, 0.06357993, -0.11287149, 0.11817958, -0.040369175, -0.055818953, 0.114691235)); + target2 += mul(ne1, float4x4(-0.24919917, -0.1840669, -0.47709405, 0.020121656, -0.09533757, 0.23901173, -0.08210879, -0.22835779, 0.023564098, -0.1592999, 0.005221987, -0.54973453, -0.039800424, 0.19367874, -0.10306205, -0.21813862)); + target2 += mul(nf1, float4x4(0.13417694, -0.06470136, -0.07049462, -0.052072115, -0.017625665, 0.108188346, 0.13198936, 0.1975063, -0.22973076, -0.28760132, -0.12961891, -0.08713851, -0.028337657, -0.35775787, 0.33782268, -0.282777)); + target2 += mul(ng1, float4x4(-0.0796041, 0.16454107, -0.026372116, 0.0788071, 0.044841573, 0.15395795, -0.011288428, -0.03305742, 0.15754524, -0.0043833177, 0.12766863, -0.11310043, -0.023906957, 0.03451837, -0.083479226, 0.03029468)); + target2 += mul(nh1, float4x4(-0.38791308, -0.120497175, -0.39432266, -0.016802365, 0.031366616, 0.20532085, -0.032990657, -0.004515397, -0.1540265, -0.2327063, 0.088945866, 0.11997355, 0.02506493, -0.11495644, 0.0847286, 0.0048163645)); + target2 += mul(ni1, float4x4(-0.03319572, -0.26717946, -0.13605991, -0.10878451, 0.19831704, 0.04036457, -0.056414742, 0.15083815, -0.1640081, -0.25487527, -0.096472785, 0.05001906, -0.01256949, 0.07090488, -0.0888089, 0.24414414)); + target2 += mul(na2, float4x4(-0.10947188, 0.07678741, -0.03716733, 0.10074092, -0.09684673, 0.19135101, 0.06687582, -0.03416071, -0.02605864, 0.18258773, 0.029176971, 0.14626507, 0.16892125, 0.26836056, -0.16163802, 0.0044406173)); + target2 += mul(nb2, float4x4(0.07490834, -0.16595219, 0.06855593, -0.31601232, 0.2051958, 0.12370633, 0.053092375, -0.09280303, -0.041799355, -0.02180234, -0.0647632, 0.12765023, -0.02619668, 0.35134858, 0.025718898, -0.03524767)); + target2 += mul(nc2, float4x4(0.051487356, -0.10184706, -0.058444723, 0.23035292, -0.03384644, -0.02926101, 0.24579355, 0.11463481, 0.00077921426, 0.0036189032, -0.04137187, 0.039233316, -0.11595721, 0.012141703, -0.19241674, 0.18287377)); + target2 += mul(nd2, float4x4(-0.0331477, 0.11774921, 0.1500689, 0.21751022, -0.0391579, -0.026443282, -0.23405433, 0.10924835, 0.010694821, 0.46834385, -0.06344277, -0.027459502, 0.02805852, 0.063863516, -0.052119188, -0.010459627)); + target2 += mul(ne2, float4x4(-0.19480526, -0.08907801, 0.13466452, -0.58980346, -0.18432151, 0.0025959515, -0.093561575, 0.21850146, -0.025087524, -0.112469815, 0.06425045, -0.017907271, 0.06015287, 0.23375069, 0.046780836, -0.124416254)); + target2 += mul(nf2, float4x4(0.20069234, 0.073735476, -0.20799713, 0.11896709, -0.08604335, -0.030489137, -0.19158117, 0.07545736, 0.1417471, -0.2885722, -0.04138416, 0.16751918, -0.039241627, -0.29653955, 0.06402645, 0.08477943)); + target2 += mul(ng2, float4x4(0.13838394, 0.17045505, 0.18386857, -0.06769848, 0.019191446, -0.10590481, 0.14499927, 0.005293376, -0.022189254, 0.45613396, 0.31436417, -0.23309496, 0.085356414, -0.12509619, -0.32398435, -0.06535322)); + target2 += mul(nh2, float4x4(0.20557566, 0.23378044, 0.16096559, 0.3109223, -0.13988405, -0.056287576, 0.15235564, 0.14485452, 0.025657065, -0.19962808, 0.12487959, -0.53206867, 0.17598459, 0.0012244214, -0.09263318, -0.048799008)); + target2 += mul(ni2, float4x4(0.031034216, -0.43335876, 0.15115865, -0.22912477, 0.039661117, -0.066167325, -0.0039048253, 0.108036794, -0.07157209, -0.42531285, -0.22807248, -0.070778824, -0.1216781, -0.20621637, 0.09195537, -0.0026917474)); + target2 += mul(na3, float4x4(0.11940706, -0.13485508, 0.026604721, -0.100989655, -0.14618637, 0.45079112, -0.111106694, 0.23393573, 0.21399105, 0.049563177, -0.10910516, -0.21594371, 0.030558927, -0.17320083, 0.012688248, 0.02913788)); + target2 += mul(nb3, float4x4(0.052507173, 0.13555464, 0.15568505, -0.13439007, 0.15468787, 0.20109199, 0.09981344, -0.022377115, 0.16711195, 0.1921043, -0.0457788, 0.11962697, 0.12201352, -0.15822104, 0.14560209, 0.11319004)); + target2 += mul(nc3, float4x4(-0.10677749, -0.037526496, 0.05529873, 0.0014219015, -0.07003492, 0.11616926, -0.2047762, 0.053331498, -0.029710975, 0.099788256, 0.016773999, -0.05440333, -0.07308938, -0.1613098, 0.11157061, -0.06163726)); + target2 += mul(nd3, float4x4(0.084668584, -0.024195379, 0.10567495, 0.018839711, 0.20675091, 0.064051956, 0.16356891, 0.0763972, 0.04519446, 0.04648411, -0.26651385, -0.32033405, 0.19019292, -0.03760131, 0.057477303, 0.039011493)); + target2 += mul(ne3, float4x4(0.10785335, -0.005846821, 0.106043994, -0.029447608, -0.17944743, -0.055760577, -0.061553795, -0.0897441, 0.30305168, -0.07138199, -0.038286258, 0.31980807, 0.08745091, 0.08931471, 0.19994807, -0.19448686)); + target2 += mul(nf3, float4x4(0.05872038, 0.019705178, -0.057756446, 0.032349724, 0.1162347, -0.1494079, 0.04883473, 0.06775521, 0.06246929, 0.18094592, 0.019297523, 0.22078563, -0.10864955, 0.024548724, -0.09518366, -0.049131762)); + target2 += mul(ng3, float4x4(0.083531916, -0.22589867, 0.15678734, -0.15247858, 0.037808564, -0.3915128, 0.023039397, -0.11101649, -0.024950527, 0.15221989, 0.02177459, -0.0052792793, -0.006660954, 0.103587925, -0.069532864, -0.036814045)); + target2 += mul(nh3, float4x4(0.042244066, 0.08479697, -0.057882927, 0.036821585, -0.12734346, -0.30277002, 0.17587237, 0.08462706, 0.03041879, -0.07751665, 0.41255432, -0.15170433, -0.094225794, 0.09409663, -0.03903985, -0.17728558)); + target2 += mul(ni3, float4x4(-0.07648597, -0.021105368, -0.13569473, 0.11226781, 0.0024825619, 0.10949022, -0.033650707, -0.01084071, -0.27865705, -0.050442215, -0.026282378, 0.07449441, -0.033618845, 0.20228988, 0.10323669, -0.2785842)); + target2 += float4(0.07964101, -0.050712653, 0.11978818, 0.122745104); + + float4 target3 = mul(a1, float4x4(-0.043845546, -0.051818844, 0.15098971, -0.029770624, 0.105532385, -0.017158495, 0.007995025, 0.01310204, 0.046253394, 0.054963812, -0.07156648, -0.026536593, -0.034585387, -0.03867656, -0.026378985, -0.0503513)); + target3 += mul(b1, float4x4(0.19067752, 0.077902906, 0.07043644, 0.093124524, -0.088099405, 0.05687826, 0.07339772, 0.25220734, -0.024105951, 0.047068372, -0.15396254, 0.0024811088, 0.05398644, 0.114431, 0.104937814, -0.084533244)); + target3 += mul(c1, float4x4(-0.06216834, -0.09104735, 0.030077647, -0.109212935, -0.03391817, 0.14209917, 0.06807519, 0.086794056, 0.13323791, -0.16663639, 0.18892457, 0.18872325, 0.098952405, -0.009112314, 0.16958214, 0.14279945)); + target3 += mul(d1, float4x4(-0.07209618, 0.10666213, 0.10406824, -0.10080884, -0.061229795, -0.070260175, 0.0544128, -0.16189453, -0.07493434, 0.25146472, -0.10089679, -0.16500695, -0.05206539, -0.10650778, 0.08510133, -0.12274426)); + target3 += mul(e1, float4x4(0.06154247, -0.2779647, -0.39013094, 0.19112335, 0.21914953, 0.174526, -0.2582261, 0.028989773, 0.12516306, 0.024158027, -0.06397669, -0.027443565, 0.01338054, 0.11226658, -0.18691953, 0.03941122)); + target3 += mul(f1, float4x4(0.06017567, 0.064941615, -0.16408192, 0.14018805, -0.022913788, -0.005578652, 0.056423694, -0.12357743, 0.053335212, -0.10533416, 0.0336598, 0.13383694, 0.13861552, 0.13800743, 0.048778858, 0.20749462)); + target3 += mul(g1, float4x4(-0.19730464, -0.07471736, -0.08532417, 0.22114716, -0.0655994, 0.014833043, 0.069433905, 0.0126395365, -0.115397535, 0.16183057, -0.0020461925, -0.08379374, 0.066027366, 0.046908997, -0.04298647, -0.039427415)); + target3 += mul(h1, float4x4(-0.40682083, -0.051349834, -0.058064308, -0.59165514, 0.07769667, -0.0061552664, 0.09866719, 0.09064238, -0.10091702, -0.08237763, -0.0896345, -0.06889367, 0.029332574, -0.067278475, 0.032268777, 0.08217916)); + target3 += mul(i1, float4x4(-0.16198236, 0.14663215, -0.19844484, 0.12605388, 0.11090156, 0.017791988, -0.058779463, 0.041371945, -0.22293547, -0.015482557, 0.2293464, 0.094193965, -0.26855227, -0.21347573, -0.09075141, -0.13876276)); + target3 += mul(a2, float4x4(-0.06498589, 0.100892216, -0.13253035, 0.15685925, 0.16823533, 0.16493973, 0.07777784, -0.07706127, 0.050116807, -0.01523585, -0.01661001, 0.020355182, 0.103539385, -0.17436443, 0.15487072, -0.037921157)); + target3 += mul(b2, float4x4(0.03836789, -0.15199225, 0.11784638, -0.04646745, 0.044564333, 0.22369106, 0.14419034, -0.010723647, -0.0027908115, -0.15769437, 0.14674728, 0.21630915, -0.15577918, 0.083906, -0.076731354, 0.09644861)); + target3 += mul(c2, float4x4(0.057972, -0.085704334, -0.044770416, -0.08455327, -0.096369885, 0.17715664, 0.0931527, 0.08611585, 0.082069114, -0.21235153, 0.056143392, -0.09681458, -0.15192977, -0.11773526, 0.085406475, -0.054963436)); + target3 += mul(d2, float4x4(0.04377759, 0.14948493, 0.14370604, 0.121995315, -0.034767535, -0.19136979, 0.20502615, -0.19230005, -0.010331832, 0.24712276, 0.08443175, -0.02108672, -0.05402554, -0.073491514, -0.01772348, -0.04717817)); + target3 += mul(e2, float4x4(-0.0859936, 0.12129631, 0.4917177, 0.014785702, -0.017697783, 0.20519169, 0.193045, -0.32276052, 0.052729923, 0.20259547, -0.23248449, 0.027868863, 0.06924204, -0.0680142, -0.1510381, -0.0858641)); + target3 += mul(f2, float4x4(0.0042993063, -0.001888591, -0.19050622, -0.1974649, 0.0028959673, -0.056935344, -0.15306468, -0.037034288, -0.005013645, -0.49978206, -0.2860419, -0.24230668, -0.21625051, 0.124884024, -0.018598353, 0.018011522)); + target3 += mul(g2, float4x4(-0.00059534056, 0.09819056, -0.10073479, -0.0036862926, 0.018240096, -0.068672635, -0.040024363, -0.002400606, 0.12492032, 0.6830032, -0.103963815, -0.20350884, -0.0731358, 0.122847795, -0.04129241, 0.027231846)); + target3 += mul(h2, float4x4(0.14632931, 0.056954373, 0.10602974, 0.06899008, 0.028749242, 0.16360262, -0.2776957, -0.13795078, 0.2955775, 0.07387963, 0.18735075, 0.37977517, -0.0032196203, -0.0368105, 0.0007467509, -0.048191283)); + target3 += mul(i2, float4x4(-0.08740623, -0.14123341, -0.16725405, -0.035077587, 0.16800366, 0.10287269, -0.02063956, -0.02751512, -0.22882652, -0.19836405, -0.07881451, -0.036120445, -0.03052641, -0.19137034, 0.02006256, -0.0003630293)); + target3 += mul(a3, float4x4(0.21042292, 0.07082529, 0.10551431, -0.17735177, 0.1211633, -0.07301316, 0.08914643, 0.027641036, 0.0716893, 0.009513582, 0.06489754, -0.11262447, -0.006487075, 0.042482372, 0.040942963, 0.026485842)); + target3 += mul(b3, float4x4(0.07677389, 0.017061912, 0.080698825, -0.02926673, 0.030129844, 0.08797221, -0.042393677, 0.040378265, 0.14051779, -0.01150974, -0.09838748, -0.084651664, 0.13157506, 0.15760668, 0.13706487, 0.017946318)); + target3 += mul(c3, float4x4(0.21381795, 0.108781934, 0.12417435, 0.04925163, 0.05298279, -0.1352583, 0.085234426, -0.03526282, -0.024876006, 0.0025064421, 0.07016869, 0.084552824, 0.064173326, -0.05621783, 0.0711457, -0.025467668)); + target3 += mul(d3, float4x4(-0.05810587, 0.0134641845, -0.038737856, 0.07663204, 0.121298246, -0.13257936, 0.004325269, -0.036193457, -0.29106387, -0.106322676, -0.23442906, 0.2862568, -0.18702938, 0.0030504123, -0.037212595, -0.2611213)); + target3 += mul(e3, float4x4(0.024120133, 0.07321953, 0.038489927, -0.04196367, -0.07796083, 0.33956012, -0.13922311, -0.05377065, -0.070829384, -0.10083194, 0.239536, -0.05901714, 0.26581895, -0.3095538, -0.2922295, 0.052582845)); + target3 += mul(f3, float4x4(0.02742305, -0.018496662, -0.094728574, 0.06404221, -0.041348618, -0.25715774, -0.1643205, 0.13505833, 0.043563902, -0.12633435, -0.101704225, -0.06851076, -0.10801949, -0.07229803, -0.042177804, 0.15722917)); + target3 += mul(g3, float4x4(-0.1890737, 0.086372465, 0.19611897, 0.11635388, -0.27176055, 0.113715895, -0.090014786, 0.028875142, -0.054593917, 0.030705186, 0.1435633, 0.061870232, -0.11143878, 0.09881344, 0.097813986, -0.21929547)); + target3 += mul(h3, float4x4(0.04700684, 0.042240005, -0.27370077, -0.10867852, -0.06256984, -0.08165931, 0.14414817, -0.046392858, 0.06402001, -0.18298607, -0.20697436, -0.035047896, 0.104348354, 0.21140936, 0.08119135, 0.11215284)); + target3 += mul(i3, float4x4(-0.15503405, -0.0058879172, 0.06903078, 0.10739542, -0.047215104, 0.05061763, -0.1265464, -0.13796777, 0.050830897, -0.06356833, 0.10470089, 0.061785046, -0.054734606, 0.069204785, 0.22219127, 0.14431196)); + target3 += mul(na1, float4x4(0.0035822908, -0.041718304, -0.06449883, 0.107891634, 0.11240286, 0.2773934, 0.018296933, 0.17229447, -0.038918763, -0.015615794, 0.013606009, -0.15145436, -0.038385842, -0.075797774, 0.074630134, -0.115841195)); + target3 += mul(nb1, float4x4(-0.35196853, -0.055269916, -0.10619746, 0.036240876, 0.027898792, 0.16981332, -0.08743389, -0.11659183, 0.21521945, 0.14624144, 0.3709361, 0.35440952, 0.05083335, -0.027957644, -0.14189775, 0.041765563)); + target3 += mul(nc1, float4x4(-0.012040415, 0.03733818, 0.0028794291, 0.085560195, -0.003578092, -2.0037096e-05, 0.018441873, -0.048575614, 0.16403939, 0.26586646, -0.23535033, -0.195904, 0.09343384, 0.16844647, 0.090654954, 0.20447001)); + target3 += mul(nd1, float4x4(-0.039211, 0.023288574, -0.11278111, 0.24733941, 0.030935412, 0.028505033, -0.054287612, 0.1626191, -0.013604053, -0.40332177, -0.12607175, 0.062430628, 0.020255104, -0.034459837, -0.02045024, 0.13066867)); + target3 += mul(ne1, float4x4(-0.109611966, 0.036982346, 0.24648234, -0.10601368, -0.046704277, 0.09159354, -0.051051375, 0.27708438, -0.27565628, 0.3181145, 0.0352402, 0.11326822, 0.08464163, 0.0037447016, -0.11625815, -0.27881616)); + target3 += mul(nf1, float4x4(-0.17009212, -0.14643735, 0.05730069, -0.19120802, 0.06845526, 0.10674906, -0.28353846, -0.12647904, 0.015396511, 0.097950876, 0.009746547, 0.031028407, -0.05640266, -0.04813061, 0.1215167, 0.013483247)); + target3 += mul(ng1, float4x4(-0.015532973, 0.06836607, -0.15256128, 0.016466603, 0.22348233, 0.13754332, -0.032162182, 0.33556822, 0.17382346, -0.2763521, 0.060414087, 0.0027655934, 0.031628147, 0.08716705, 0.015910214, 0.0672223)); + target3 += mul(nh1, float4x4(0.4342632, -0.067446776, -0.36212516, 0.027729288, 0.18695018, -0.026150677, -0.048804305, 0.03894249, 0.08076834, -0.024184678, -0.039985072, 0.019538054, -0.12608467, -0.114978395, 0.08024422, -0.009467871)); + target3 += mul(ni1, float4x4(-0.12950122, -0.04900754, 0.007479547, 0.005553716, -0.011067856, 0.15695909, 0.15179226, 0.13305564, 0.109665506, -0.071129486, -0.29301268, -0.19721518, -0.014072068, 0.110164836, -0.10445084, -0.07427861)); + target3 += mul(na2, float4x4(0.056494176, 0.10441701, 0.1473454, -0.10962488, -0.024387872, -0.10661404, 0.023665238, -0.014857965, -0.11904774, 0.028333792, -0.018734593, 0.041431252, -0.051380955, 0.08761405, 0.025005583, 0.27504325)); + target3 += mul(nb2, float4x4(0.12111209, 0.09115707, -0.12130387, 0.037170578, 0.17773823, 0.11543872, -0.0981619, -0.009393771, -0.072751574, 0.12490967, -0.050705448, -0.21641576, -0.0032860835, -0.017348124, -0.039524093, -0.22634275)); + target3 += mul(nc2, float4x4(-0.026149368, -0.0345828, 0.024678709, 0.073074006, 0.075326554, 0.07688483, -0.06151585, -0.0006315397, -0.11916223, 0.09640916, -0.03452899, 0.0711575, 0.10298667, 0.14983572, -0.029672628, 0.060187414)); + target3 += mul(nd2, float4x4(0.061185572, 0.025581252, 0.05371412, -0.30638546, 0.064506106, 0.22312112, -0.12822428, 0.050079864, 0.007665535, -0.270618, -0.1205649, 0.066014335, -0.10095298, 0.14537272, 0.07578119, -0.102102645)); + target3 += mul(ne2, float4x4(0.24163178, -0.14042771, -0.28968832, 0.32306322, -0.08210339, -0.089168124, -0.029958146, 0.23500884, -0.045208763, -0.076190665, -0.048189905, -0.062144633, -0.2209541, -0.118137404, -0.10013809, -0.2633339)); + target3 += mul(nf2, float4x4(-0.043336965, -0.14818442, 0.3353549, 0.37338758, -0.097953044, 0.08346902, 0.2809552, -0.15042788, 0.052860767, 0.3296333, 0.1520426, 0.013095576, 0.06748028, -0.18191148, 0.1262768, 0.1454165)); + target3 += mul(ng2, float4x4(0.020386793, -0.05559494, 0.0923228, -0.101281434, 0.07294861, -0.013454893, 0.14446425, -0.18820941, 0.03512501, -0.3100584, 0.07824563, 0.039452225, -0.31067702, -0.0059947846, -0.022850258, -0.03394584)); + target3 += mul(nh2, float4x4(-0.2551513, 0.07006202, 0.10514115, -0.07164224, -0.15870212, 0.058055036, 0.05213708, -0.14221531, 0.18606052, 0.121992745, 0.005545236, 0.20166458, -0.51196563, 0.13145791, -0.07664502, -0.102140725)); + target3 += mul(ni2, float4x4(0.013922251, -0.055376403, 0.32802138, 0.13208407, 0.013657613, 0.10752059, 0.036252435, 0.1592283, 0.013641419, 0.09172557, -0.047022454, -0.06487285, -0.010537236, 0.043602772, -0.018355483, 0.061706495)); + target3 += mul(na3, float4x4(0.034295138, 0.0290897, -0.055937063, 0.030905105, -0.049568217, 0.23283507, -0.09925937, 0.06541922, -0.19225466, -0.37406424, -0.0044630794, 0.12548251, -0.003204782, -0.033718586, -0.12822233, -0.06512161)); + target3 += mul(nb3, float4x4(0.04231634, 0.033866994, -0.060438603, 0.053806484, -0.043768402, -0.09377961, 0.053774644, -0.05314562, 0.08742594, -0.3595988, 0.05714237, -0.026023258, -0.14470316, -0.17429292, -0.05919939, -0.05714775)); + target3 += mul(nc3, float4x4(-0.035541177, -0.15197758, -0.03248727, 0.055882126, 0.03910343, 0.14273937, -0.16545315, -0.019183658, 0.067014545, -0.010861471, -0.23015557, -0.3174752, -0.0895981, 0.05603517, -0.10421314, 0.03543782)); + target3 += mul(nd3, float4x4(0.052712325, 0.15568605, 0.13511989, -0.035405457, 0.09660214, 0.0010066679, 0.0041616405, 0.3261607, -0.07167953, -0.3432988, 0.37812582, 0.08591545, 0.17927478, -0.08654189, 0.076707125, 0.14279753)); + target3 += mul(ne3, float4x4(-0.056844193, 0.16529651, -0.06650483, -0.08292316, -0.02760633, -0.22888668, -0.19214903, -0.08840017, -0.23843671, -0.6793711, -0.33102167, 0.0064898706, -0.29774654, 0.37099698, 0.42785385, 0.025804019)); + target3 += mul(nf3, float4x4(-0.11744241, -0.057497155, 0.18884729, 0.024753813, -0.0062507484, 0.33419883, 0.120441675, -0.25218838, -0.042276263, 0.08504629, -0.033582047, 0.07008096, -0.058578875, 0.0392345, 0.11335631, -0.15865934)); + target3 += mul(ng3, float4x4(-0.04641351, -0.0370654, -0.08322972, -0.11589779, 0.09985797, -0.0747252, 0.0050210473, -0.0737313, 0.34289247, -0.08783692, 0.13673791, 0.05667411, 0.058139045, -0.17664829, -0.16574872, 0.020792067)); + target3 += mul(nh3, float4x4(-0.17315285, 0.061304655, 0.23295666, 0.004587563, 0.025884068, -0.20429865, -0.17807725, 0.04610146, -0.16748384, 0.03548062, 0.36901402, 0.040421892, 0.0732819, -0.06323222, 0.17438933, 0.10541013)); + target3 += mul(ni3, float4x4(0.11953197, -0.041181084, -0.05777039, -0.0713763, -0.07250408, 0.00030710385, -0.12310962, 0.05047857, 0.07764678, 0.048569802, -0.07179031, -0.13407484, 0.18644087, -0.08796725, 0.09215986, 0.03264275)); + target3 += float4(0.08639024, -0.11024204, -0.0076959864, 0.053946566); + + tex4[gxy] = target1; + tex5[gxy] = target2; + tex6[gxy] = target3; +} + +//!PASS 3 +//!DESC Conv-4x3x3x24 +//!IN tex4, tex5, tex6 +//!OUT tex1, tex2, tex3 +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 + +void Pass3(uint2 blockStart, uint3 threadId) { + uint2 gxy = Rmp8x8(threadId.x) + blockStart; + uint2 inputSize = GetInputSize(); + if (gxy.x >= inputSize.x || gxy.y >= inputSize.y) { + return; + } + + float2 inputPt = GetInputPt(); + float2 pos = (gxy + 0.5f) * inputPt; + + // [ a, d, g ] + // [ b, e, h ] + // [ c, f, i ] + float4 a1 = tex4.SampleLevel(sam, pos + float2(-inputPt.x, -inputPt.y), 0); + float4 b1 = tex4.SampleLevel(sam, pos + float2(-inputPt.x, 0), 0); + float4 c1 = tex4.SampleLevel(sam, pos + float2(-inputPt.x, inputPt.y), 0); + float4 d1 = tex4.SampleLevel(sam, pos + float2(0, -inputPt.y), 0); + float4 e1 = tex4.SampleLevel(sam, pos, 0); + float4 f1 = tex4.SampleLevel(sam, pos + float2(0, inputPt.y), 0); + float4 g1 = tex4.SampleLevel(sam, pos + float2(inputPt.x, -inputPt.y), 0); + float4 h1 = tex4.SampleLevel(sam, pos + float2(inputPt.x, 0), 0); + float4 i1 = tex4.SampleLevel(sam, pos + float2(inputPt.x, inputPt.y), 0); + + float4 na1 = max(-a1, 0); + float4 nb1 = max(-b1, 0); + float4 nc1 = max(-c1, 0); + float4 nd1 = max(-d1, 0); + float4 ne1 = max(-e1, 0); + float4 nf1 = max(-f1, 0); + float4 ng1 = max(-g1, 0); + float4 nh1 = max(-h1, 0); + float4 ni1 = max(-i1, 0); + + a1 = max(a1, 0); + b1 = max(b1, 0); + c1 = max(c1, 0); + d1 = max(d1, 0); + e1 = max(e1, 0); + f1 = max(f1, 0); + g1 = max(g1, 0); + h1 = max(h1, 0); + i1 = max(i1, 0); + + float4 a2 = tex5.SampleLevel(sam, pos + float2(-inputPt.x, -inputPt.y), 0); + float4 b2 = tex5.SampleLevel(sam, pos + float2(-inputPt.x, 0), 0); + float4 c2 = tex5.SampleLevel(sam, pos + float2(-inputPt.x, inputPt.y), 0); + float4 d2 = tex5.SampleLevel(sam, pos + float2(0, -inputPt.y), 0); + float4 e2 = tex5.SampleLevel(sam, pos, 0); + float4 f2 = tex5.SampleLevel(sam, pos + float2(0, inputPt.y), 0); + float4 g2 = tex5.SampleLevel(sam, pos + float2(inputPt.x, -inputPt.y), 0); + float4 h2 = tex5.SampleLevel(sam, pos + float2(inputPt.x, 0), 0); + float4 i2 = tex5.SampleLevel(sam, pos + float2(inputPt.x, inputPt.y), 0); + + float4 na2 = max(-a2, 0); + float4 nb2 = max(-b2, 0); + float4 nc2 = max(-c2, 0); + float4 nd2 = max(-d2, 0); + float4 ne2 = max(-e2, 0); + float4 nf2 = max(-f2, 0); + float4 ng2 = max(-g2, 0); + float4 nh2 = max(-h2, 0); + float4 ni2 = max(-i2, 0); + + a2 = max(a2, 0); + b2 = max(b2, 0); + c2 = max(c2, 0); + d2 = max(d2, 0); + e2 = max(e2, 0); + f2 = max(f2, 0); + g2 = max(g2, 0); + h2 = max(h2, 0); + i2 = max(i2, 0); + + float4 a3 = tex6.SampleLevel(sam, pos + float2(-inputPt.x, -inputPt.y), 0); + float4 b3 = tex6.SampleLevel(sam, pos + float2(-inputPt.x, 0), 0); + float4 c3 = tex6.SampleLevel(sam, pos + float2(-inputPt.x, inputPt.y), 0); + float4 d3 = tex6.SampleLevel(sam, pos + float2(0, -inputPt.y), 0); + float4 e3 = tex6.SampleLevel(sam, pos, 0); + float4 f3 = tex6.SampleLevel(sam, pos + float2(0, inputPt.y), 0); + float4 g3 = tex6.SampleLevel(sam, pos + float2(inputPt.x, -inputPt.y), 0); + float4 h3 = tex6.SampleLevel(sam, pos + float2(inputPt.x, 0), 0); + float4 i3 = tex6.SampleLevel(sam, pos + float2(inputPt.x, inputPt.y), 0); + + float4 na3 = max(-a3, 0); + float4 nb3 = max(-b3, 0); + float4 nc3 = max(-c3, 0); + float4 nd3 = max(-d3, 0); + float4 ne3 = max(-e3, 0); + float4 nf3 = max(-f3, 0); + float4 ng3 = max(-g3, 0); + float4 nh3 = max(-h3, 0); + float4 ni3 = max(-i3, 0); + + a3 = max(a3, 0); + b3 = max(b3, 0); + c3 = max(c3, 0); + d3 = max(d3, 0); + e3 = max(e3, 0); + f3 = max(f3, 0); + g3 = max(g3, 0); + h3 = max(h3, 0); + i3 = max(i3, 0); + + float4 target1 = mul(a1, float4x4(0.2369839, -0.0792359, -0.12919348, 0.002247716, 0.04581234, 0.119436085, -0.039395507, -0.035233624, -0.031238249, 0.068567455, 0.021003028, -0.07353918, -0.12103854, -0.21112324, -0.0063801156, -0.04487009)); + target1 += mul(b1, float4x4(0.0683294, 0.062320776, -0.024078269, 0.08904798, 0.026528858, -0.041699078, -0.07854327, -0.14078824, 0.060052495, -0.12898798, -0.010206991, -0.10815312, -0.07348112, -0.09190296, 0.16384035, 0.11615318)); + target1 += mul(c1, float4x4(0.073954284, 0.11315491, -0.08271167, 0.012718058, -0.079351336, -0.12847738, 0.16898601, 0.057100534, -0.007783043, -0.046511702, -0.031176837, 0.09832856, 0.04629018, -0.11481637, 0.27974957, -0.008512578)); + target1 += mul(d1, float4x4(-0.11174049, -0.06978879, -0.0026527392, 0.09206777, -0.052967362, 0.04242691, -0.028125865, -0.006913773, -0.105203055, 0.012300771, 0.073976465, 0.0597795, 0.12224533, -0.15938343, -0.04735274, -0.13670483)); + target1 += mul(e1, float4x4(0.069210574, -0.18154296, -0.179752, 0.030308926, 0.21821375, -0.17105243, 0.002948972, 0.1510472, -0.07507222, 0.05799302, 0.22358851, -0.1593742, -0.14097035, -0.14883585, -0.10766054, -0.04192339)); + target1 += mul(f1, float4x4(0.04092946, -0.056620143, -0.08841022, 0.0820261, 0.12114886, -0.046587184, -0.24642876, 0.20291825, -0.021399742, 0.075130075, 0.08025963, -0.0004831952, -0.20216052, 0.063063756, -0.14950794, -0.016591785)); + target1 += mul(g1, float4x4(0.001037612, -0.12479094, -0.064145386, 0.03701432, -0.09794906, -0.02047066, -0.0064438935, 0.054445606, 0.017312052, -0.010994496, -0.043534316, -0.03507283, -0.2881326, 0.056422662, 0.45392624, -0.14301568)); + target1 += mul(h1, float4x4(0.02659516, -0.12523884, -0.045878954, 0.0401728, -0.026269691, 0.23919468, -0.05373766, 0.22576872, 0.15472023, -0.06473123, -0.16314703, -0.007313837, 0.06282956, -0.12448595, 0.32412103, -0.1669555)); + target1 += mul(i1, float4x4(0.10851828, -0.0019357264, -0.042929318, 0.087208286, 0.08521072, -0.015302626, -0.045136105, -0.07599174, -0.020620871, -0.08058013, 0.04687409, 0.07679515, 0.02748689, -0.04049585, 0.031744577, -0.08941878)); + target1 += mul(a2, float4x4(-0.053986546, 0.2688435, -0.057546657, -0.11350552, -0.081904754, -0.09276461, -0.13561548, -0.11588968, 0.04355686, -0.29325503, -0.018699612, -0.06769227, -0.015948739, 0.04491891, -0.046178948, 0.02711675)); + target1 += mul(b2, float4x4(-0.18972659, 0.27545497, -0.034131754, -0.09609413, 0.068409085, 0.13449967, -0.13105616, 0.028345212, -0.035266094, -0.065575354, -0.031779382, -0.14933869, 0.05228527, 0.09356076, -0.047118377, -0.020071832)); + target1 += mul(c2, float4x4(-0.17382587, 0.15029867, -0.00600536, -0.035180923, 0.025643297, 0.010418448, 0.14726849, -0.05890341, -0.053652834, 0.048409678, 0.2806725, -0.08192519, -0.06738357, 0.07469718, 0.06771393, -0.042583536)); + target1 += mul(d2, float4x4(0.011517158, 0.09972045, 0.06578792, -0.12352661, 0.05922438, -0.16663863, -0.006771989, -0.038835894, -0.02194692, -0.13857606, 0.023138417, -0.05360372, 0.014272163, 0.08904743, -0.04252727, 0.103002235)); + target1 += mul(e2, float4x4(-0.008667266, 0.19219917, -0.07475974, -0.2816411, -0.33488217, 0.039849013, 0.017313587, 0.08000436, 0.15055846, 0.015432909, 0.32798117, 0.009342251, -0.23739037, -0.28346112, -0.030122897, -0.18473577)); + target1 += mul(f2, float4x4(0.22614895, 0.13032585, -0.2176673, -0.3387019, 0.019557813, 0.17496689, 0.030887462, 0.17172079, -0.10533174, 0.0032622286, -0.13369057, -0.039323095, -0.0008841287, 0.121519946, 0.067216426, 0.03257707)); + target1 += mul(g2, float4x4(-0.0429636, 0.13093638, 0.12012435, -0.034646116, 0.0488735, 0.08784733, 0.03349143, -0.09357028, -0.06089799, 0.022837836, 0.16202758, 0.096765295, 0.009665008, -0.10780318, -0.07340907, 0.018662468)); + target1 += mul(h2, float4x4(0.0484555, 0.05852715, -0.11502228, -0.2250242, 0.00487918, -0.018516708, -0.024522817, -0.09146677, 0.0006642944, 0.17241697, 0.38606182, -0.23263825, 0.110663734, 0.11034593, -0.0056327246, 0.051475164)); + target1 += mul(i2, float4x4(-0.014929107, 0.09463201, 0.009869103, -0.17499818, 0.028962199, 0.23815866, 0.060768303, 0.13828199, 0.12261715, 0.096965745, -0.024608571, -0.24542965, 0.025484774, -0.0014874635, -0.009807938, 0.0007101552)); + target1 += mul(a3, float4x4(-0.101674154, 0.032412667, -0.10450873, -0.00022480187, 0.024635756, -0.1357198, -0.05327909, -0.036563605, 0.07561588, 0.009124707, -0.13368087, 0.042969264, -0.043317486, -0.1518712, -0.008810181, 0.030755859)); + target1 += mul(b3, float4x4(0.1406038, 0.036187246, -0.06288465, 0.013666562, -0.22509198, 0.054938264, 0.03374708, 0.036942195, -0.054834712, 0.08038173, -0.012174669, -0.05048155, 0.04105839, -0.13010618, 0.029987235, 0.029830217)); + target1 += mul(c3, float4x4(0.13428736, -0.145587, -0.09359362, 0.08647307, -0.1721466, 0.14161868, 0.06169795, -0.020108147, -0.082708314, -0.0009893128, 0.061197698, 0.015552345, 0.19280085, 0.045152925, -0.13817257, 0.08140578)); + target1 += mul(d3, float4x4(0.11750963, 0.0146443285, -0.026884248, -0.0006429066, -0.008400631, -0.043018907, -0.07913679, -0.14783737, -0.032443974, -0.08028971, -0.08927282, 0.00809941, 0.0124223465, 0.041715536, -0.06587267, 0.13605455)); + target1 += mul(e3, float4x4(0.29818505, 0.20918716, -0.13256323, 0.23988591, -0.38704476, -0.05851411, -0.004705456, 0.10221165, -0.08329328, 0.12643409, -0.23133238, 0.036488805, 0.21748522, -0.095220506, -0.012000105, -0.0032247186)); + target1 += mul(f3, float4x4(-0.119828835, -0.016386732, 0.06939514, 0.08491721, -0.017447483, -0.10812376, -0.015384033, -0.0137153845, -0.14978316, 0.032878425, 0.120704606, 0.07987688, 0.10143365, 0.16894275, -0.09816831, -0.029983638)); + target1 += mul(g3, float4x4(0.004197231, -0.004475635, 0.02442438, -0.08062267, -0.13645843, -0.063362874, -0.13470308, -4.8972346e-05, 0.04937739, 0.025885701, -0.0626489, 0.06272147, 0.040682197, -0.037275683, -0.07711889, -0.03401893)); + target1 += mul(h3, float4x4(-0.07601782, -0.044119228, 0.12799697, -0.04923261, -0.07554412, -0.13866402, -0.039769165, 0.0750738, 0.028331043, 0.22329865, -0.078985184, 0.21741354, 0.08896384, 0.02745735, -0.11954973, -0.030984413)); + target1 += mul(i3, float4x4(0.088372685, -0.04094657, 0.030890986, 0.011887401, 0.101060346, 0.036795005, -0.02541599, 0.11929074, 0.0042294776, -0.09067195, -0.13775113, 0.051152255, -0.011856665, -0.01186073, -0.014405341, -0.06443953)); + target1 += mul(na1, float4x4(0.1990754, -0.13920973, -0.24694741, 0.20978624, 0.00096705626, -0.09906306, -0.031113537, 0.09064841, -0.005332781, 0.06942478, 0.027275847, 0.14482562, 0.10915609, 0.15485178, 0.09100627, 0.08800073)); + target1 += mul(nb1, float4x4(0.067276604, -0.15296488, -0.10655601, 0.1007172, 0.06399946, 0.11820019, -0.012255674, -0.04701397, 0.005157013, 0.14800015, -0.005829729, 0.058462787, -0.0034304103, -0.0022002284, 0.088455915, -0.09076621)); + target1 += mul(nc1, float4x4(-0.0045863236, -0.13443832, -0.02036122, 0.067712225, -0.09286585, 0.15505461, -0.03191861, 0.062198598, -0.014097363, -0.06486533, -0.013725968, 0.09863627, 0.004106804, -0.11001409, -0.1489799, 0.012900801)); + target1 += mul(nd1, float4x4(0.11722181, 0.024655748, 0.028080126, 0.034889475, -0.02211666, 0.10347594, 0.19828199, -0.052708372, -0.23978107, 0.11193546, 0.015817301, -0.060378563, 0.05506628, 0.017437497, 0.17592382, 0.1566574)); + target1 += mul(ne1, float4x4(0.011318326, -0.19983633, 0.0542877, -0.08868874, 0.059281945, -0.08321469, -0.45549735, 0.41699305, 0.18566287, -0.22530322, -0.08444872, -0.04485004, -0.13312897, 0.025137378, 0.4283649, -0.22263475)); + target1 += mul(nf1, float4x4(0.10148392, 0.12450337, -0.032773893, 0.03742288, 0.0059106606, -0.17406113, -0.083701774, -0.010221676, 0.16314605, -0.22251254, -0.13263722, 0.09496533, -0.0020611945, 0.10998006, 0.23540293, 0.12287761)); + target1 += mul(ng1, float4x4(-0.01097223, 0.043488838, 0.028565591, 0.057649106, 0.04069052, -0.015125962, -0.033889383, -0.039301567, -0.28547964, 0.16771436, 0.064779356, 0.17768629, 0.0977948, -0.12978803, 0.1248975, 0.076509014)); + target1 += mul(nh1, float4x4(-0.014799843, -0.11454738, 0.0072981194, 0.06956252, -0.119126685, -0.054390237, 0.20148608, 0.055611208, -0.33772695, 0.02875631, 0.15688069, 0.07648471, 0.17330919, -0.10749096, -0.00058184325, -0.16302843)); + target1 += mul(ni1, float4x4(0.025022479, -0.0510169, -0.054967374, -0.18119891, 0.072380155, 0.13645615, -0.029061519, -0.09392558, 0.0020073708, 0.10373002, 0.08769151, 0.1467629, -0.032814845, -0.22622965, 0.062578805, 0.15869768)); + target1 += mul(na2, float4x4(0.08132352, -0.057824034, -0.049706902, -0.021799462, 0.027207939, 0.055137623, 0.13588108, -0.06595749, -0.10212913, 0.03328737, 0.07568671, 0.04425169, -0.056393128, 0.08096936, 0.049417946, -0.03110039)); + target1 += mul(nb2, float4x4(0.16936453, -0.03750322, -0.041140877, -0.08652042, -0.029363338, -0.07450129, 0.102560416, -0.23950958, -0.13059175, 0.21066219, 0.10126263, 0.043688625, 0.12293311, -0.02102107, -0.01415126, -0.08114574)); + target1 += mul(nc2, float4x4(0.13357115, 0.25130415, -0.008012242, -0.022129368, -0.04116201, 0.19364384, -0.0755634, -0.021590892, 0.014902855, -0.16364469, -0.15113516, 0.021274269, 0.002715793, -0.082595, -0.023225293, -0.0023291293)); + target1 += mul(nd2, float4x4(-0.047352426, 0.047768887, -0.027633572, -0.048747484, 0.002366812, 0.2123351, -0.03785716, -0.06169537, 0.05152527, -0.097918324, -0.09970387, -0.10696893, 0.14201112, 0.048251197, 0.020989964, -0.12759319)); + target1 += mul(ne2, float4x4(-0.11691897, 0.11003735, 0.1787839, 0.035897207, -0.068546794, 0.18663177, -0.11768889, 0.0046620993, -0.076647416, -0.008958245, 0.055827506, -0.095377706, 0.051213227, -0.2821711, 0.013320494, 0.1563779)); + target1 += mul(nf2, float4x4(-0.08324576, 0.3131121, 0.21894962, 0.013974257, -0.05526049, 0.032233212, 0.05284564, -0.2475858, 0.13031252, -0.08124232, -0.010205146, -0.057937223, 0.11874465, -0.013862318, 0.0052336063, 0.04949605)); + target1 += mul(ng2, float4x4(0.14994349, -0.03296414, -0.23602034, -0.0033256228, 0.008873702, -0.010388283, -0.035780232, 0.011833461, 0.117081955, -0.038984414, 0.074017905, 0.033703547, -0.024258457, 0.09559132, 0.02495569, -0.040010694)); + target1 += mul(nh2, float4x4(-0.0048430585, 0.17926253, 0.008713498, 0.10879202, 0.019645652, 0.029483858, -0.047485687, -0.042396937, -0.029273199, -0.2432983, -0.1250007, -0.024952445, -0.060036886, 0.014986906, -0.014428253, 0.03334825)); + target1 += mul(ni2, float4x4(0.11731086, 0.20593153, -0.10197385, -0.011249018, -0.10738923, -0.074847564, -0.006172099, -0.18687822, -0.097578146, -0.07579803, 0.05764291, 0.10152833, -0.14840044, 0.035003513, 0.023365693, 0.04386252)); + target1 += mul(na3, float4x4(0.2394935, -0.151495, -0.004142306, -0.084381334, -0.06817076, 0.04995128, 0.07523575, -0.019087847, 0.04900443, 0.03855287, -0.047666, -0.010728584, -0.041862275, 0.0092430115, 0.18933049, 0.001247498)); + target1 += mul(nb3, float4x4(-0.26478, -0.108964734, 0.07654512, -0.18083075, 0.087697916, -0.1985272, 0.12003646, 0.088157, -0.11911801, 0.10562385, 0.08664133, 0.04456427, -0.105021, 0.18528733, 0.034151975, -0.15520982)); + target1 += mul(nc3, float4x4(0.017519012, -0.012286436, 0.10177459, 0.038459957, -0.22457904, -0.05511256, 0.15413229, 0.1507701, 0.08257404, 0.034750186, -0.15717988, -0.030795097, -0.07657355, -0.33403704, -0.0053621423, -0.06624692)); + target1 += mul(nd3, float4x4(0.10030682, -0.052044563, -0.049402863, 0.09053447, -0.13081445, 0.0141896, 0.042153686, -0.010219266, -0.06850381, 0.03529716, 0.16374019, 0.06750858, 0.09204821, 0.053093266, -0.024561154, 0.018893644)); + target1 += mul(ne3, float4x4(-0.21870598, 0.32735768, -0.037454635, -0.062546894, 0.048824597, 0.006229873, 0.0879531, 0.0010694796, 0.1268415, -0.3329151, 0.18059574, 0.027663317, 0.06451952, 0.2059446, -0.14739716, 0.0425968)); + target1 += mul(nf3, float4x4(-0.02567249, 0.18261379, 0.0078112325, 0.13831526, 0.022516627, 0.18176961, 0.022643182, 0.06482983, 0.32458714, 0.1415256, -0.40462464, -0.24058491, -0.1555331, -0.058481682, 0.08041805, 0.068204984)); + target1 += mul(ng3, float4x4(0.08099861, -0.042113766, -0.012603856, -0.027247382, -0.09505534, 0.013861726, 0.16544205, -0.034136306, 0.013128467, 0.022156378, 0.021391893, -0.087280534, -0.18957394, -0.072840415, 0.1942784, -0.04479766)); + target1 += mul(nh3, float4x4(0.13244309, 0.23072438, -0.10388544, 0.055465538, -0.06797261, 0.0813476, 0.03605633, -0.002648387, 0.04333517, 0.1233629, 0.004186724, -0.068296656, -0.076496966, -0.13608767, 0.13116132, -0.067895085)); + target1 += mul(ni3, float4x4(-0.05193536, -0.057465453, 0.05165806, -0.092361026, -0.21779, -0.08789043, 0.056987524, -0.06524499, 0.02767333, 0.19836798, 0.104195744, -0.091015235, -0.10806183, -0.24305776, 0.12348048, 0.17889297)); + target1 += float4(0.0026275632, -0.111531265, -0.027438803, 0.048715387); + + float4 target2 = mul(a1, float4x4(0.0007129529, -0.23268181, -0.055581614, -0.19489531, -0.119524784, 0.16052821, 0.08242202, 0.1274113, 0.06528547, 0.11359341, -0.13980822, -0.04566708, -0.03624654, -0.08533644, -0.14554873, -0.14973463)); + target2 += mul(b1, float4x4(-0.010712782, 0.09223229, -0.06977767, 0.031998634, 0.2417462, -0.08404255, -0.067694396, -0.031915385, -0.08493046, -0.12639172, -0.12919787, 0.009066012, 0.027782273, -0.2951646, -0.1300083, -0.0673188)); + target2 += mul(c1, float4x4(0.1325964, -0.051963683, 0.13291354, 0.02579481, -0.103561625, -0.041789595, 0.040783167, 0.047240548, -0.06668069, 0.020328876, 0.08887853, -0.02963949, -0.11168412, 0.1557154, -0.076105356, -0.1504038)); + target2 += mul(d1, float4x4(-0.10317256, 0.07854648, 0.16037096, -0.0379184, 0.13046049, -0.024218671, 0.0822899, 0.08198137, 0.0012042717, -0.25853133, 0.046963938, -0.009453239, 0.09634527, -0.009770066, -0.12853295, -0.041695565)); + target2 += mul(e1, float4x4(0.11125126, 0.09055589, 0.014031054, -0.02255056, -0.10394986, 0.10815357, -0.15813628, -0.01853368, 0.012419031, 0.0020822953, -0.010447686, -0.026241936, -0.03541712, 0.076329805, 0.20895265, 0.003645337)); + target2 += mul(f1, float4x4(-0.12773241, -0.09765568, -0.14337096, -0.065751396, -0.0084745465, -0.052546956, -0.08200752, -0.08708897, -0.032195702, -0.036496297, 0.17860867, -0.068227254, 0.13200605, -0.13811241, -0.050324995, 0.16204447)); + target2 += mul(g1, float4x4(-0.014216644, 0.057588127, -0.044320818, 0.062128264, -0.020399947, -0.05649115, -0.11319402, -0.038921937, 0.036813796, 0.5067311, 0.22060235, -0.0055661057, 0.23151882, 0.0050073536, 0.12176585, 0.0038464004)); + target2 += mul(h1, float4x4(-0.0759528, 0.07477981, -0.06292785, -0.050053917, -0.06312128, -0.21425541, 0.0067035304, -0.06986801, 0.10586866, -0.12749328, -0.097493485, -0.003508852, 0.111684315, 0.18951331, -0.012068376, 0.036257178)); + target2 += mul(i1, float4x4(-0.15544677, 0.047360703, -0.059747778, 0.0026973744, -0.00072011014, 0.15553303, 0.10704341, -0.02808549, -0.09962682, -0.044461366, -0.014757942, -0.06257519, 0.13504705, 0.030818086, -0.047969542, -0.12272446)); + target2 += mul(a2, float4x4(0.02756638, 0.03870099, -0.078585416, -0.049957782, -0.16714093, -0.020673685, -0.0029932198, 0.08303188, 0.09362902, -0.32569888, -0.02152779, -0.039258134, -0.0024254394, -0.05215952, 0.103006296, -0.05561939)); + target2 += mul(b2, float4x4(0.11232395, -0.4204378, -0.02948307, 0.058709357, 0.10122942, -0.01815637, 0.029027436, 0.045725007, -0.0019202912, -0.20451765, -0.06804741, -0.018427953, 0.026046682, -0.02693389, -0.1603317, -0.11198625)); + target2 += mul(c2, float4x4(0.24319492, 0.114851095, -0.13692874, 0.07721465, 0.020316923, -0.08134961, 0.07356765, -0.054053787, -0.01942671, 0.22095704, 0.00965335, 0.018760502, 0.015964821, 0.086102456, 0.01024545, 0.043060217)); + target2 += mul(d2, float4x4(0.3332833, -0.03617076, -0.06354161, 0.095067084, 0.20085002, -0.07980238, 0.042980768, 0.016795967, -0.09440837, -0.18057466, -0.062128007, -0.22770254, 0.03636945, 0.0749142, 0.0034359195, -0.024630694)); + target2 += mul(e2, float4x4(0.18430449, -0.036511928, -0.053284973, 0.023835842, 0.23871118, 0.05792267, -0.0846795, -0.20196451, 0.03506874, 0.22829485, -0.28377455, -0.11413547, -0.10833865, 0.09104711, -0.13071612, 0.17202353)); + target2 += mul(f2, float4x4(0.19165954, 0.22479524, -0.19884257, 0.08072162, -0.07574742, 0.13766298, -0.25755826, 0.084687516, -0.080061525, 0.25205615, -0.12677447, 0.08576974, 0.02831567, -0.009467821, 0.1970242, 0.20168954)); + target2 += mul(g2, float4x4(0.0927734, 0.17610501, 0.14182864, 0.18800513, 0.05701441, 0.15469678, 0.11420199, -0.15377665, -0.08189125, -0.30660027, 0.033272292, -0.11340498, -0.08969095, 0.016946664, 0.03424574, -0.007572548)); + target2 += mul(h2, float4x4(0.23636094, 0.15679167, 0.070221, 0.11989854, -0.18536362, 0.06250143, -0.086411804, 0.0099315215, -0.13320905, 0.2642356, 0.22141577, -0.009068583, -0.06783877, 0.16432028, 0.06672474, -0.051250096)); + target2 += mul(i2, float4x4(-0.22000717, 0.15731241, 0.13043061, -0.042806733, 0.0031978998, 0.0668276, 0.08608138, 0.10850058, 0.22485662, -0.121448815, -0.014875905, -0.082832925, 0.056386247, -0.29444495, -0.05680645, -0.015010734)); + target2 += mul(a3, float4x4(0.014549664, -0.0069613485, 0.11311649, 0.05610812, 0.04279884, -0.1020982, -0.03904751, -0.17636296, -0.05201923, 0.14244251, -0.059024896, -0.09463292, -0.09491209, -0.022265568, -0.0002296264, 0.03899329)); + target2 += mul(b3, float4x4(0.048777632, 0.052673753, 0.13282603, 0.1795813, -0.028372066, 0.10603009, -0.4148765, -0.02000411, 0.053786337, -0.11523432, -0.31676108, -0.03830518, 0.022093901, 0.013758008, 0.106954776, -0.028646056)); + target2 += mul(c3, float4x4(-0.06699817, -0.1724271, -0.036506936, 0.1153328, 0.015884517, -0.008503094, 0.028359545, -0.012168917, 0.030682955, 0.03541267, -0.03814948, -0.01124931, -0.05933562, -0.014424095, 0.027945189, -0.08810283)); + target2 += mul(d3, float4x4(0.013294456, 0.19495966, 0.067234084, 0.15800472, 0.051711556, 0.17711255, 0.1140798, 0.10137737, -0.039499275, -0.04602223, -0.07446666, 0.0012073858, -0.08343905, -0.049277645, -0.078486815, -0.14566717)); + target2 += mul(e3, float4x4(-0.09936533, 0.039390396, 0.13288753, 0.1920324, 0.13764949, -0.05153866, 0.06799814, 0.22350872, 0.27779356, -0.02206339, 0.19484605, -0.07821554, -0.07797821, 0.12577902, -0.084113464, 0.02873002)); + target2 += mul(f3, float4x4(-0.10784442, -0.25804177, 0.1306632, 0.0046842564, 0.13917917, -0.03910364, 0.06410272, 0.019373003, -0.03459362, 0.080056466, 0.12915988, 0.14360592, 0.19040298, -0.0023102893, -0.04890759, -0.22537242)); + target2 += mul(g3, float4x4(0.056570116, 0.13121127, -0.069638334, 0.11919738, -0.04740792, -0.16621193, -0.118925, 0.044869807, -0.010641902, -0.051522024, -0.057623643, 0.017528418, -0.07562933, 0.058253985, 0.05989836, 0.032996327)); + target2 += mul(h3, float4x4(0.091301516, 0.08428476, -0.16445327, 0.11784904, -0.07030389, 0.022161584, -0.02548798, -0.08254805, -0.04188322, 0.24900444, 0.078174226, 0.20630752, -0.05519587, -0.10978986, 0.015350538, -0.12161702)); + target2 += mul(i3, float4x4(-0.095735274, -0.10423386, -0.036254395, 0.10522458, -0.022615599, 0.085539706, -0.096113354, -0.23468721, 0.050746538, -0.31889522, -0.061264757, 0.11150476, -0.007024875, -0.11553085, -0.019223234, -0.23692535)); + target2 += mul(na1, float4x4(-0.08454392, 0.21670897, -0.15095642, -0.060052566, 0.045126446, -0.030535553, -0.057765372, -0.027783932, -0.20350753, -0.2959993, 0.28601378, 0.028859718, 0.071787685, -0.027895963, -0.04723786, -0.10217129)); + target2 += mul(nb1, float4x4(-0.012522398, -0.23370479, -0.019732006, -0.052036785, -0.33242345, 0.02026433, 0.26734874, 0.044760924, -0.09205539, 0.0888652, 0.27825877, -0.08912795, 0.019177845, 0.123587854, -0.10933388, -0.046620987)); + target2 += mul(nc1, float4x4(-0.059484433, 0.107038036, -0.021947065, 0.03293247, 0.16987476, -0.02623603, -0.019537413, -0.02559007, -0.010399871, -0.028635733, -0.10141786, -0.10065662, -0.09635094, -0.107081525, 0.0060942136, 0.00018589811)); + target2 += mul(nd1, float4x4(0.063847266, -0.07454534, -0.1174812, -0.14199455, -0.044613797, -0.081642054, 0.035214093, 0.009284773, -0.00707006, 0.28477952, -0.03298465, 0.074021146, -0.04033067, 0.17765698, 0.1553138, 0.08380522)); + target2 += mul(ne1, float4x4(0.17025755, -0.118484, -0.21803714, -0.28715235, -0.13095933, -0.058834057, -0.18294802, 0.043152038, -0.058910713, 0.028670516, -0.0010361333, -0.025163988, 0.15223087, -0.016097538, -0.09638604, -0.01772858)); + target2 += mul(nf1, float4x4(0.062441614, -0.016123693, 0.07818185, 0.022483543, -0.029692583, 0.035550565, -0.12624146, -0.04230702, -0.061506867, -0.014386596, 0.0115612615, 0.068888955, 0.067702614, 0.07322066, 0.024701316, -0.04806952)); + target2 += mul(ng1, float4x4(0.026700316, -0.16510022, 0.050885063, -0.1332475, 0.019049475, -0.008760977, 0.04359399, 0.042262577, -0.05225198, -0.603255, -0.11838725, -0.017602438, -0.23949145, 0.07854026, -0.21954034, -0.07048147)); + target2 += mul(nh1, float4x4(0.18560836, 0.18485062, -0.008109583, -0.0061953044, 0.067029685, 0.1231515, 0.00463641, -0.031592768, -0.24861142, -0.012609046, 0.14307153, -0.072264954, -0.0067704953, -0.18041459, 0.17362577, -0.06497389)); + target2 += mul(ni1, float4x4(0.10974998, 0.06757753, 0.0377915, 0.057072945, 0.11128115, 0.0013228649, -0.044957817, -0.020252109, 0.06231163, -0.14761455, -0.027373059, -0.10220075, -0.22065234, -0.09441151, 0.052624665, 0.11956694)); + target2 += mul(na2, float4x4(0.11292619, -0.10152602, 0.10526179, 0.06337831, 0.116172016, 0.16123155, -0.055104487, 0.13740757, -0.08778325, -0.028898785, -0.019357817, -0.08015077, -0.0066665406, -0.009120153, 0.051283117, 0.04456564)); + target2 += mul(nb2, float4x4(0.19621657, 0.26922694, 0.03988996, 0.032870032, 0.057292562, 0.024405524, -0.11551687, -0.047686152, 0.13039996, 0.056989953, -0.065783806, 0.00033558672, -0.065978706, -0.00902148, 0.1314761, 0.064695716)); + target2 += mul(nc2, float4x4(0.20266968, 0.11562562, 0.0044746934, 0.052361086, 0.0009612361, 0.01889979, -0.045194417, 0.085848965, -0.05785333, 0.07915189, 0.09685515, 0.016877603, 0.00037991733, 0.0003345007, -0.03782238, -0.0066707213)); + target2 += mul(nd2, float4x4(-0.12730233, 0.037978236, 0.13999923, 0.033807464, -0.038275905, 0.012305192, 0.06438087, 0.08611617, 0.07200057, 0.13013837, 0.07331905, -0.0010762423, -0.038951423, -0.027457712, 0.014879732, 0.07803083)); + target2 += mul(ne2, float4x4(0.12269098, -0.01707025, 0.099231675, 0.16366597, -0.0075668246, -0.12552746, 0.27712014, 0.22933815, 0.14837137, -0.07610271, 0.11374453, 0.026816925, 0.1011783, -0.043783583, -0.18852726, -0.2007988)); + target2 += mul(nf2, float4x4(0.118183166, -0.45110446, -0.04326608, 0.10598517, 0.09142483, 0.004518412, 0.10789324, 0.18913233, -0.029293153, -0.10852763, 0.15762898, -0.021000696, 0.042484812, 0.030249448, -0.09806746, -0.15705605)); + target2 += mul(ng2, float4x4(0.026257282, -0.017269222, -0.111170195, 0.12946244, 0.015408065, -0.14137042, -0.035408627, 0.073995374, 0.006271072, 0.14994001, -0.01258022, 0.019418288, 0.118502036, 0.035291567, 0.039203968, 0.018011976)); + target2 += mul(nh2, float4x4(-0.11994321, 0.037343338, 0.034031454, -0.0947803, 0.2207995, 0.043690477, 0.06692838, 0.18297808, 0.03876948, -0.20762676, -0.13309777, 0.036189202, 0.0058699325, -0.1331377, -0.035574175, -0.091714606)); + target2 += mul(ni2, float4x4(0.16173537, 0.030811697, -0.07565782, 0.17767896, 0.1574808, -0.0071866834, -0.031369448, 0.11762595, -0.304427, 0.04666128, 0.19467019, 0.13271074, -0.066108644, 0.17788546, 0.09988941, 0.0071199923)); + target2 += mul(na3, float4x4(-0.07895499, 0.024530848, 0.07610484, 0.14991722, -0.071451046, 0.07360262, -0.10922367, 0.16261177, 0.14607567, -0.29037732, 0.19056098, 0.0017480691, 0.09447392, -0.097536966, -0.15283571, -0.2116911)); + target2 += mul(nb3, float4x4(-0.090664506, 0.0026753773, -0.19803517, -0.0035921792, 0.08019641, -0.34822193, 0.03115303, -0.11561995, 0.047316786, 0.08521655, 0.30527622, -0.03627345, -0.10390178, 0.13096002, -0.11939941, 0.076553464)); + target2 += mul(nc3, float4x4(-0.018057704, -0.012385826, -0.048699293, 0.057409126, 0.018623013, -0.13720913, -0.08693412, -0.035308264, 0.0048156027, 0.04298599, 0.20682096, 0.07020018, -0.19156799, -0.099447116, 0.11187527, -0.034651503)); + target2 += mul(nd3, float4x4(-0.19674721, -6.47493e-05, -0.14616148, -0.16328155, -0.15329379, -0.13080211, -0.095063426, 0.10239187, 0.29591182, 0.061356615, 0.19931474, -0.062333517, 0.111954294, -0.024125673, 0.1727124, -0.100813806)); + target2 += mul(ne3, float4x4(0.005782909, -0.048647407, 0.20534706, 0.04177472, -0.266937, 0.43962362, 0.03461612, 0.13415751, -0.21391335, -0.023739172, -0.382901, 0.1677018, 0.28375793, -0.10282615, -0.034843605, 0.00698951)); + target2 += mul(nf3, float4x4(-0.0019446284, 0.07665739, 0.13404883, 0.1467204, -0.0588129, 0.19369206, -0.050641898, 0.018204086, 0.21603708, -0.22462276, -0.07930267, -0.2749562, 0.016131664, 0.2697215, -0.14661922, -0.026748048)); + target2 += mul(ng3, float4x4(-0.15208562, -0.025413433, -0.031909585, 0.010184482, 0.09441715, 0.045736533, 0.0015301697, 0.055179585, 0.03623536, 0.08788274, 0.090822086, -0.041574936, -0.05593542, 0.013202262, -0.08831654, -0.117966585)); + target2 += mul(nh3, float4x4(-0.06767938, 0.036391854, 0.024670534, 0.065553516, 0.124412306, -0.18261679, -0.11035609, -0.021725666, 0.06963895, -0.18845208, 0.05664083, -0.28461877, 0.12621799, -0.024473144, 0.060711104, 0.06137061)); + target2 += mul(ni3, float4x4(0.030836413, -0.28885397, -0.0082618, -0.040858608, 0.121351525, -0.1581085, 0.04491976, 0.15929738, 0.011640548, 0.17567058, 0.18560362, -0.18308444, -0.091114745, -0.03191929, -0.0424641, 0.10603501)); + target2 += float4(-0.0980025, 0.0163943, 0.07015813, -0.04460826); + + float4 target3 = mul(a1, float4x4(-0.044858746, 0.112747766, 0.11743049, -0.04397981, -0.15657529, -0.08594472, -0.077046685, 0.040047225, -0.16525316, 0.118806966, -0.06923664, -0.068862945, 0.13853838, 0.21202816, 0.03315427, 0.02810617)); + target3 += mul(b1, float4x4(-0.08981965, 0.009795084, -0.17461349, 0.1293042, 0.13288464, -0.011990358, 0.045853514, 0.005478685, -0.039259993, 0.014204771, 0.049636167, -0.031643927, -0.081734784, 0.06592399, -0.075981714, -0.02715899)); + target3 += mul(c1, float4x4(0.110248916, 0.0064891353, 0.022578653, -0.029814541, 0.12611644, -0.1477485, 0.013158434, -0.029419534, -0.049103256, 0.11351519, -0.07094292, 0.15175463, 0.023724427, -0.04979516, 0.01999463, -0.04911801)); + target3 += mul(d1, float4x4(-0.072089985, 0.04007664, -0.024550471, -0.0041285334, -0.018247912, 0.046173554, -0.07198727, 0.017499885, -0.001033623, -0.19433345, 0.07760378, 0.049773693, -0.17062156, -0.02818212, 0.34907836, 0.0050598015)); + target3 += mul(e1, float4x4(-0.06617895, -0.029447488, -0.08041051, 0.10391866, -0.3511068, 0.24072146, -0.07714093, -0.19329752, -0.090364814, -0.114312, -0.14665945, 0.14689237, 0.20671985, 0.015588815, -0.119754635, -0.056320462)); + target3 += mul(f1, float4x4(-0.027388249, 0.104699664, -0.27179572, -0.02907286, 0.07357054, -0.0068755792, -0.13605821, 0.06462062, -0.093615666, -0.032704853, 0.038318764, -0.076435864, -0.0055633793, 0.046742633, 0.093529075, 0.18353659)); + target3 += mul(g1, float4x4(0.06082767, 0.085872404, -0.093700096, 0.061194196, 0.06258653, 0.058643147, -0.07235859, -0.092823185, -0.010440827, 0.11255041, 0.0090868175, -0.007858298, 0.148384, -0.05526942, 0.19361623, 0.004099247)); + target3 += mul(h1, float4x4(-0.06093948, -0.038310055, -0.082474135, -0.010680022, 0.0012025833, -0.092099264, 0.013127829, 0.027141726, 0.09983758, 0.03275215, 0.07185623, -0.19180898, -0.044681955, -0.024202297, -0.3165539, 0.0010588729)); + target3 += mul(i1, float4x4(-0.060221963, -0.026948337, -0.06574486, 0.011485259, -0.06550075, 0.040276073, 0.025496457, -0.19623038, -0.065990366, -0.025713596, -0.040418267, -0.08788943, 0.076047935, -0.056114316, 0.15456654, -0.07788768)); + target3 += mul(a2, float4x4(-0.067551315, 0.14745092, -0.054396585, 0.040545028, -0.17049932, 0.07036919, -0.13004121, -0.012877571, -0.09034833, 0.013381427, -0.07020307, 0.13269025, 0.04836113, 0.008816658, 0.06908017, 0.13488075)); + target3 += mul(b2, float4x4(0.15872127, 0.046130676, 0.059947554, -0.01181087, 0.00031724942, -0.048350845, -0.009036753, -0.11157358, -0.07300833, 0.09947689, 0.20575939, -0.3546566, -0.059859008, 0.029647622, 0.13094904, -0.03154742)); + target3 += mul(c2, float4x4(0.08560438, 0.1965193, -0.044979937, -0.13631731, 0.16646172, 0.09958199, 0.0074020037, 0.10672716, 0.15015182, 0.041704617, 0.063770875, 0.19410326, 0.008813034, 0.16075528, 0.08517037, 0.28283635)); + target3 += mul(d2, float4x4(0.12114333, -0.08197629, 0.026583742, -0.060136575, 0.07713845, -0.004285971, 0.16490252, 0.26541123, 0.13636889, 0.14296104, -0.045894254, -0.007115691, 0.037731793, -0.014873664, -0.00571577, -0.009701031)); + target3 += mul(e2, float4x4(0.2608233, -0.014971803, 0.15469527, 0.18899868, 0.06325761, 0.05273965, -0.021072507, 0.039343588, 0.049740855, 0.30912283, 0.1328661, 0.21406676, 0.013830919, -0.2128574, -0.020829424, 0.22456568)); + target3 += mul(f2, float4x4(0.0642146, -0.14275537, 0.032388665, -0.12502304, 0.31260416, -0.026139492, 0.11060444, 0.014260357, -0.06373526, 0.15441616, -0.14077063, -0.03819972, 0.023418859, -0.065061435, 0.068000436, -0.10781963)); + target3 += mul(g2, float4x4(0.039874375, -0.03544748, -0.09499391, -0.021817759, 0.2049574, 0.08219808, 0.044527993, -0.12810238, -0.07313955, -0.3041692, 0.074703164, 0.034242906, -0.08850236, 0.06280731, 0.07377995, 0.10382322)); + target3 += mul(h2, float4x4(0.04350059, 0.21734618, 0.08675183, -0.055069674, 0.16317086, -0.000833345, -0.061599948, 0.025430895, -0.05566867, -0.07084767, -0.20808282, -0.08088132, -0.08246971, 0.019896548, 0.0011203124, -0.016212555)); + target3 += mul(i2, float4x4(0.009271706, 0.10609657, 0.046975497, 0.016255897, -0.03132032, -0.026223281, -0.04218519, -0.089583725, 0.0011256885, -0.096725605, 0.13508168, 0.0070396424, 0.071279675, -0.009885292, 0.023429802, 0.04919291)); + target3 += mul(a3, float4x4(-0.043223884, 0.18723601, 0.059270866, -0.038768277, -0.03307238, 0.045570783, -0.01494598, 0.12532744, -0.0633282, -0.009204529, -0.032864776, -0.012969925, -0.03190685, 0.048798896, 0.033872727, 0.059553478)); + target3 += mul(b3, float4x4(0.087938786, -0.24108681, 0.14970978, -0.13961543, 0.0891246, 0.015723674, 0.05370719, -0.11110716, -0.00214365, 0.12866165, 0.108206935, 0.027394261, -0.15103427, -0.14690042, 0.035489313, -0.15238154)); + target3 += mul(c3, float4x4(-0.0800077, -0.23219119, -0.08327999, -0.022596871, -0.021897404, 0.15777653, 0.017139765, 0.28121725, 0.024720678, 0.0976178, 0.078697845, 0.050298456, 0.0918896, -0.1709005, 0.001258526, -0.16952778)); + target3 += mul(d3, float4x4(0.1855042, -0.1221885, 0.02704022, 0.00095695246, -0.014720871, -0.011397964, 0.009077131, -0.0658526, 0.0753248, -0.018622542, 0.21117687, 0.009595839, -0.014185466, -0.12340562, 0.20756626, 0.1002926)); + target3 += mul(e3, float4x4(0.03998379, -0.14931168, 0.43595135, -0.18249772, -0.014348168, 0.17039725, -0.54961896, 0.23570935, -0.0961725, -0.08736501, -0.48726758, -0.11515001, -0.03716486, -0.17436725, 0.3894316, -0.012835015)); + target3 += mul(f3, float4x4(-0.15397331, 0.021657735, -0.054806687, 0.1541452, -0.12548985, 0.0934218, 0.20914574, 0.14777465, -0.0670766, 0.11853072, -0.012987691, -0.020369543, 0.09420477, -0.17689225, 0.109701715, -0.046027176)); + target3 += mul(g3, float4x4(-0.02231296, 0.14284018, -0.14968887, 0.13387628, 0.06886712, -0.11273641, 0.03278117, -0.13931367, -0.07073904, -0.05791193, 0.0074532703, 0.057605404, -0.007830725, 0.16091831, -0.16650262, 0.1647855)); + target3 += mul(h3, float4x4(-0.057878133, -0.12752692, -0.12909345, 0.07441648, 0.027899493, -0.018735388, -0.07586787, -0.048344534, -0.11736236, 0.015326167, -0.103591904, -0.17694342, -0.049772773, 0.015765708, -0.1248672, 0.26354307)); + target3 += mul(i3, float4x4(-0.18220314, 0.0046032136, -0.2081131, 0.03723796, 0.08844814, -0.01369978, 0.053207412, -0.08312182, -0.062071536, -0.067955784, 0.004774782, -0.06925075, -0.059406135, 0.06784051, -0.09814774, -0.11124358)); + target3 += mul(na1, float4x4(-0.27883962, 0.12152088, -0.24405631, 0.0027260163, 0.19775666, 0.058938242, -0.05956473, -0.10816854, -0.0071739377, -0.4144036, 0.068261996, -0.2445757, -0.23093198, -0.17691095, 0.038170703, -0.013878705)); + target3 += mul(nb1, float4x4(0.063041806, 0.2538589, -0.11473429, 0.01619935, -0.08354722, -0.04798535, 0.02354034, 0.033864528, -0.055874173, -0.16368376, -0.02903178, -0.12477576, 0.02629324, 0.034359895, 0.08272036, 0.06732605)); + target3 += mul(nc1, float4x4(-0.15553482, 0.0060790586, -0.05535005, 0.0132087935, -0.03520144, 0.023434987, 0.031604007, -0.09385124, -0.15015934, -0.13401696, -0.005520488, -0.08600875, -0.04346026, -0.07434181, -0.05771243, 0.03339138)); + target3 += mul(nd1, float4x4(-0.13035898, -0.06444063, -0.12604833, -0.1291162, 0.0002854935, 0.0011192479, 0.03285, -0.0718767, -0.0048345756, 0.23910385, -0.13370244, -0.27723455, 0.2173459, -0.09477723, -0.2785804, -0.089392334)); + target3 += mul(ne1, float4x4(0.19831544, -0.04623001, 0.11013904, 0.07203301, 0.006143421, -0.059177686, -0.5040003, 0.12711781, 0.18126795, 0.13216637, 0.15124142, 0.0053686183, 0.090513304, 0.10542994, 0.34392425, 0.016424375)); + target3 += mul(nf1, float4x4(-0.16124019, 0.09191821, -0.04369587, -0.21306747, -0.16233422, 0.031122763, -0.012612568, -0.016409902, -0.09023912, 0.013649212, -0.16627215, -0.05366447, 0.10274318, 0.086314775, 0.08027116, 0.08462481)); + target3 += mul(ng1, float4x4(-0.14453822, 0.024520764, -0.0071830307, -0.13206398, -0.072472885, -0.10329967, 0.1636545, 0.016468262, -0.013051184, -0.12824146, 0.03824098, -0.22003986, -0.10416448, 0.0039071296, -0.34092218, 0.10734566)); + target3 += mul(nh1, float4x4(-0.072179504, 0.006203091, -0.018925803, -0.1199396, -0.084528126, 0.094925165, -0.11961369, -0.054626215, -0.117074564, -0.04484073, 0.040342934, 0.13213676, -0.0064397864, 0.10155662, 0.20097142, -0.2804305)); + target3 += mul(ni1, float4x4(0.087270446, 0.078806184, -0.05655386, 0.06486903, 0.034370087, 0.0036874234, 0.003311713, -0.10504396, 0.028166316, -0.22845218, 0.017909897, -0.2130404, -0.050013334, -0.117276974, -0.06318294, -0.0037857178)); + target3 += mul(na2, float4x4(0.08871242, 0.075167455, -0.039373945, 0.00051754323, 0.07687967, -0.06586344, -0.15153599, 0.0018507856, -0.017242108, -0.054329462, 0.051372115, 0.0033961546, 0.06248249, -0.06631481, 0.05806025, -0.021996895)); + target3 += mul(nb2, float4x4(0.09424522, 0.073743, -0.0017127816, 0.0033512171, -0.11385974, 0.014514997, -0.0068160114, 0.12540759, 0.106560245, -0.049447417, 0.111991346, -0.06375654, -0.011610938, -0.024543937, -0.12136444, 0.1091816)); + target3 += mul(nc2, float4x4(0.2360247, 0.051082112, 0.063963845, -0.19552353, -0.12502095, -0.043954436, -0.029264912, -0.107425205, -0.104991466, 0.1546093, -0.019506395, 0.102938086, -0.054183662, 0.010583785, -0.080395944, -0.08370572)); + target3 += mul(nd2, float4x4(0.15028444, 0.031050628, 0.04759701, -0.076938786, -0.09843708, 0.013380048, -0.07036618, -0.18517768, -0.24299946, 0.0074256407, 0.12335329, 0.008296356, -0.14130129, 0.089567006, -0.066212654, -0.019249886)); + target3 += mul(ne2, float4x4(0.21793036, 0.046704203, -0.26442486, 0.036775246, 0.011823214, 0.035270307, 0.27286708, -0.041062694, 0.1929, -0.18686813, 0.033577543, -0.23847485, -0.04342215, 0.20992972, -0.31331903, -0.3476763)); + target3 += mul(nf2, float4x4(0.2605603, 0.045636464, 0.078897774, -0.02860065, -0.17690817, -0.022998778, -0.078985356, -0.08182311, -0.02665034, 0.051768366, 0.14886487, 0.08579571, 0.13346, -0.10001264, 0.04904008, 0.14541489)); + target3 += mul(ng2, float4x4(0.106186725, -0.0063438504, 0.07265258, -0.036121733, -0.13984898, 0.003038981, -0.016125364, 0.13680565, -0.057302903, -0.12963718, -0.0030335293, -0.021742221, -0.006363557, -0.101099625, 0.095220365, 0.033486642)); + target3 += mul(nh2, float4x4(0.094589375, -0.044164203, -0.15519938, -0.02010285, -0.094102144, -0.06617603, -0.06663444, -0.036653996, -0.018485812, 0.04307366, 0.23020254, 0.17289902, 0.11927716, 0.059777882, 0.16321822, -0.17249192)); + target3 += mul(ni2, float4x4(0.17176881, -0.05145481, 0.058537252, 0.07365525, 0.17615119, -0.0008998237, 0.20070761, 0.08091997, -0.22727549, 0.040356588, -0.19447488, 0.019409144, -0.094837844, 0.029385263, 0.06778661, 0.15896504)); + target3 += mul(na3, float4x4(-0.11452088, -0.024284642, -0.04490299, -0.020004421, 0.050837193, -0.19884948, 0.0027391468, -0.04909611, 0.10565033, 0.046887845, 0.15566911, -0.04677708, -0.1617592, -0.1090753, 0.021104805, 0.12100669)); + target3 += mul(nb3, float4x4(0.0796837, 0.2143031, 0.15130435, 0.11013741, 0.02859385, -0.23182273, -0.01307099, 0.17366518, 0.067062154, -0.13214251, -0.0359161, -0.22044878, -0.065245375, -0.12085723, -0.0058068414, -0.05868892)); + target3 += mul(nc3, float4x4(0.05886354, 0.04594631, 0.0035692437, 0.0043173125, -0.0058938325, -0.12315084, 0.009706764, 0.029205475, 0.02275545, -0.030235367, -0.010946894, -0.1160915, -0.24663799, 0.021396592, -0.08312792, 0.035279196)); + target3 += mul(nd3, float4x4(-0.18054669, -0.03518381, 0.048470423, -0.0056507597, 0.03240578, 0.12688184, -0.09667544, 0.04029143, 0.03038166, 0.10955508, -0.2918326, -0.08950494, -0.06969353, 0.20913015, 0.13051425, -0.12262561)); + target3 += mul(ne3, float4x4(0.11774238, 0.107279345, 0.09160909, -0.12901367, -0.063854314, 0.012220096, 0.1428603, -0.03274951, -0.16071229, 0.16923961, 0.09850307, 0.3375513, 0.17089152, 0.1066977, -0.11292511, 0.07839456)); + target3 += mul(nf3, float4x4(0.06309776, -0.062669575, 0.12810674, -0.22764897, 0.05594526, -0.3354947, -0.271324, -0.1370599, 0.0019311982, -0.20568445, 0.14663076, -0.10399025, -0.11092913, 0.13635515, -0.046688963, 0.18119682)); + target3 += mul(ng3, float4x4(0.06247405, -0.070577376, -0.049723163, 0.20372438, 0.059769955, -0.15753393, 0.08755224, -0.16705483, 0.043191068, 0.13503598, -0.06549854, -0.08262152, -0.036690235, -0.017480936, 0.0087178415, 0.124511525)); + target3 += mul(nh3, float4x4(-0.103790514, -0.062080752, -0.04171218, -0.22629078, 0.058754075, 0.010274649, 0.012631916, 0.0884306, 0.10843063, 0.11566254, 0.16639906, -0.05603101, 0.03344291, -0.009285547, 0.22062606, -0.18537858)); + target3 += mul(ni3, float4x4(-0.010970425, 0.06433602, -0.010908282, 0.21255766, -0.124487005, -0.18626499, 0.017554395, 0.022440141, -0.043080032, 0.13329363, -0.019777333, -0.13920292, -0.057512637, -0.07950961, 0.0008059128, 0.08286962)); + target3 += float4(0.038618144, 0.034658056, 0.04403221, 0.22010419); + + tex1[gxy] = target1; + tex2[gxy] = target2; + tex3[gxy] = target3; +} + +//!PASS 4 +//!DESC Conv-4x3x3x24 +//!IN tex1, tex2, tex3 +//!OUT tex4, tex5, tex6 +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 + +void Pass4(uint2 blockStart, uint3 threadId) { + uint2 gxy = Rmp8x8(threadId.x) + blockStart; + uint2 inputSize = GetInputSize(); + if (gxy.x >= inputSize.x || gxy.y >= inputSize.y) { + return; + } + + float2 inputPt = GetInputPt(); + float2 pos = (gxy + 0.5f) * inputPt; + + // [ a, d, g ] + // [ b, e, h ] + // [ c, f, i ] + float4 a1 = tex1.SampleLevel(sam, pos + float2(-inputPt.x, -inputPt.y), 0); + float4 b1 = tex1.SampleLevel(sam, pos + float2(-inputPt.x, 0), 0); + float4 c1 = tex1.SampleLevel(sam, pos + float2(-inputPt.x, inputPt.y), 0); + float4 d1 = tex1.SampleLevel(sam, pos + float2(0, -inputPt.y), 0); + float4 e1 = tex1.SampleLevel(sam, pos, 0); + float4 f1 = tex1.SampleLevel(sam, pos + float2(0, inputPt.y), 0); + float4 g1 = tex1.SampleLevel(sam, pos + float2(inputPt.x, -inputPt.y), 0); + float4 h1 = tex1.SampleLevel(sam, pos + float2(inputPt.x, 0), 0); + float4 i1 = tex1.SampleLevel(sam, pos + float2(inputPt.x, inputPt.y), 0); + + float4 na1 = max(-a1, 0); + float4 nb1 = max(-b1, 0); + float4 nc1 = max(-c1, 0); + float4 nd1 = max(-d1, 0); + float4 ne1 = max(-e1, 0); + float4 nf1 = max(-f1, 0); + float4 ng1 = max(-g1, 0); + float4 nh1 = max(-h1, 0); + float4 ni1 = max(-i1, 0); + + a1 = max(a1, 0); + b1 = max(b1, 0); + c1 = max(c1, 0); + d1 = max(d1, 0); + e1 = max(e1, 0); + f1 = max(f1, 0); + g1 = max(g1, 0); + h1 = max(h1, 0); + i1 = max(i1, 0); + + float4 a2 = tex2.SampleLevel(sam, pos + float2(-inputPt.x, -inputPt.y), 0); + float4 b2 = tex2.SampleLevel(sam, pos + float2(-inputPt.x, 0), 0); + float4 c2 = tex2.SampleLevel(sam, pos + float2(-inputPt.x, inputPt.y), 0); + float4 d2 = tex2.SampleLevel(sam, pos + float2(0, -inputPt.y), 0); + float4 e2 = tex2.SampleLevel(sam, pos, 0); + float4 f2 = tex2.SampleLevel(sam, pos + float2(0, inputPt.y), 0); + float4 g2 = tex2.SampleLevel(sam, pos + float2(inputPt.x, -inputPt.y), 0); + float4 h2 = tex2.SampleLevel(sam, pos + float2(inputPt.x, 0), 0); + float4 i2 = tex2.SampleLevel(sam, pos + float2(inputPt.x, inputPt.y), 0); + + float4 na2 = max(-a2, 0); + float4 nb2 = max(-b2, 0); + float4 nc2 = max(-c2, 0); + float4 nd2 = max(-d2, 0); + float4 ne2 = max(-e2, 0); + float4 nf2 = max(-f2, 0); + float4 ng2 = max(-g2, 0); + float4 nh2 = max(-h2, 0); + float4 ni2 = max(-i2, 0); + + a2 = max(a2, 0); + b2 = max(b2, 0); + c2 = max(c2, 0); + d2 = max(d2, 0); + e2 = max(e2, 0); + f2 = max(f2, 0); + g2 = max(g2, 0); + h2 = max(h2, 0); + i2 = max(i2, 0); + + float4 a3 = tex3.SampleLevel(sam, pos + float2(-inputPt.x, -inputPt.y), 0); + float4 b3 = tex3.SampleLevel(sam, pos + float2(-inputPt.x, 0), 0); + float4 c3 = tex3.SampleLevel(sam, pos + float2(-inputPt.x, inputPt.y), 0); + float4 d3 = tex3.SampleLevel(sam, pos + float2(0, -inputPt.y), 0); + float4 e3 = tex3.SampleLevel(sam, pos, 0); + float4 f3 = tex3.SampleLevel(sam, pos + float2(0, inputPt.y), 0); + float4 g3 = tex3.SampleLevel(sam, pos + float2(inputPt.x, -inputPt.y), 0); + float4 h3 = tex3.SampleLevel(sam, pos + float2(inputPt.x, 0), 0); + float4 i3 = tex3.SampleLevel(sam, pos + float2(inputPt.x, inputPt.y), 0); + + float4 na3 = max(-a3, 0); + float4 nb3 = max(-b3, 0); + float4 nc3 = max(-c3, 0); + float4 nd3 = max(-d3, 0); + float4 ne3 = max(-e3, 0); + float4 nf3 = max(-f3, 0); + float4 ng3 = max(-g3, 0); + float4 nh3 = max(-h3, 0); + float4 ni3 = max(-i3, 0); + + a3 = max(a3, 0); + b3 = max(b3, 0); + c3 = max(c3, 0); + d3 = max(d3, 0); + e3 = max(e3, 0); + f3 = max(f3, 0); + g3 = max(g3, 0); + h3 = max(h3, 0); + i3 = max(i3, 0); + + float4 target1 = mul(a1, float4x4(-0.19656715, 0.073294915, -0.019779518, 0.021025823, 0.15261759, 0.04309221, -0.1493544, 0.049283743, -0.0905334, 0.1813188, -0.0016973419, 0.15697837, 0.13670535, -0.11242918, -0.013915669, -0.13730156)); + target1 += mul(b1, float4x4(0.04107699, 0.17384163, -0.096351616, 0.04331655, -0.23204431, 0.25804806, -0.04034741, 0.17473252, 0.2747926, -0.04826532, 0.06581498, -0.01747519, 0.16690566, 0.18259898, 0.051713206, -0.11371784)); + target1 += mul(c1, float4x4(0.09295699, -0.02639465, -0.07067535, -0.055101186, 0.049066454, -0.1348934, -0.010201892, 0.076446265, -0.17203535, 0.094379045, -0.05279342, -0.06568022, 0.019863818, 0.048707128, -0.001194968, 0.08657796)); + target1 += mul(d1, float4x4(0.075812176, -0.14860412, -0.07091005, 0.027131502, -0.037916575, -0.08786051, 0.12747246, 0.07358627, -0.17530513, 0.01687204, -0.02315926, -0.0475825, 0.10233608, 0.11752665, -0.066707715, -0.02696408)); + target1 += mul(e1, float4x4(-0.16265862, 0.07163909, 0.029001605, 0.023125717, -0.45108593, 0.31734392, 0.18262424, -0.16254611, -0.13591787, -0.34079695, 0.15933561, -0.11768856, -0.20831986, -0.2617357, -0.06293675, -0.21008867)); + target1 += mul(f1, float4x4(0.03834222, 0.11669165, -0.14289354, 0.19205377, 0.034326866, 0.11611292, -0.35397327, -0.22060747, -0.004148329, 0.16584732, 0.021622034, -0.026690945, -0.002915367, -0.0025648596, 0.098647386, 0.010004625)); + target1 += mul(g1, float4x4(0.12951577, -0.12372639, 0.050420888, -0.059468318, 0.06579213, -0.20325322, -0.1699444, 0.019064313, -0.035931777, -0.020957012, 0.0027909358, 0.007493282, 0.0004133846, 0.034073114, -0.038953777, 0.065847114)); + target1 += mul(h1, float4x4(0.044652946, 0.04014948, -0.11211438, -0.009610841, 0.04416661, 0.007001935, 0.23747365, 0.051566597, 0.08833828, 0.08240841, 0.11842664, -0.053376306, -0.24712811, 0.086317725, 0.0038018306, 0.058020968)); + target1 += mul(i1, float4x4(-0.14782053, -0.02475428, -0.17784445, -0.024647312, 0.1743018, 0.06606081, 0.056824066, 0.14064185, 0.06063915, -0.04583706, 0.101063475, -0.043567337, -0.07165717, 0.03192861, 0.056516238, -0.011080173)); + target1 += mul(a2, float4x4(0.15754054, -0.022155577, -0.08209624, -0.0014304873, -0.29201108, 0.08677429, 0.2264655, 0.047244307, -0.048876513, -0.0927597, -0.045443505, 0.20207925, -0.12566972, 0.1404151, -0.024384655, -0.032324787)); + target1 += mul(b2, float4x4(-0.2863662, 0.08409054, 0.060920034, -0.05120718, 0.20190823, -0.05651237, 0.16887607, 0.0733604, -0.14424925, -0.06001526, 0.030432044, 0.14361487, 0.02771769, 0.030591695, -0.029078443, -0.048318565)); + target1 += mul(c2, float4x4(0.00791873, -0.09765571, 0.00042280424, 0.09341729, -0.22752157, 0.23591608, 0.0051302435, -0.077698976, 0.10737567, -0.23836324, -0.067966945, 0.2257328, -0.004849654, -0.036767352, 0.13280262, -0.07600507)); + target1 += mul(d2, float4x4(0.05473653, -0.1915318, -0.00626306, -0.13441068, 0.088244185, -0.31077763, -0.010066507, -0.091302134, 0.11164262, 0.12096589, -0.0605778, 0.1308392, -0.010200418, -0.024670156, 0.09293591, 0.028182708)); + target1 += mul(e2, float4x4(0.053266037, 0.29016244, 0.19542074, -0.20729323, 0.1344162, -0.21329224, 0.20277289, -0.08846336, -0.114185594, 0.206921, -0.0006008467, 0.0205045, -0.282864, -0.22293371, -0.17658198, 0.20596933)); + target1 += mul(f2, float4x4(-0.03932726, 0.080170035, -0.07711416, -0.29913354, -0.06824731, -0.061843343, -0.13042153, -0.19094346, -0.11703233, 0.06704312, -0.049862698, 0.1466207, -0.0011674183, 0.080236584, -0.06675328, 0.13961533)); + target1 += mul(g2, float4x4(-0.022837833, 0.0019198474, 0.0236172, 0.07043625, 0.12661463, 0.001672872, 0.06307569, -0.13343741, 0.17860405, -0.19384168, 0.079231955, 0.032781634, -0.15199795, 0.13594992, 0.070164844, -0.060025793)); + target1 += mul(h2, float4x4(-0.12844789, -0.025733668, 0.020556241, -0.12541436, 0.14400893, -0.03860052, 0.059304774, -0.1786852, -0.18367065, -0.2374019, 0.1148333, -0.1065858, 0.119285814, 0.016301462, 0.028108267, 0.04623708)); + target1 += mul(i2, float4x4(-0.12543629, 0.011575056, -0.037424013, 0.09024941, -0.029061148, -0.08703052, -0.0024641235, -0.105915934, -0.044543535, 0.06887956, -0.04743747, -0.05494799, 0.031339128, -0.034284074, -0.121141724, 0.053843208)); + target1 += mul(a3, float4x4(-0.2666571, 0.09093467, -0.012503428, -0.035125565, 0.032127958, -0.042863887, -0.08649192, -0.015927156, 0.08400246, 0.01662268, 0.034715075, -0.00908548, 0.045443024, -0.06893885, -0.074183375, -0.00844849)); + target1 += mul(b3, float4x4(0.18397407, -0.10994183, 0.086410955, -0.008634828, -0.02632997, 0.019804388, 0.048724968, 0.18321893, -0.017287457, 0.18407986, -0.08789994, 0.23315573, 0.17696501, 0.021334525, -0.01854696, -0.19259432)); + target1 += mul(c3, float4x4(-0.17279425, 0.040277172, 0.045758635, 0.112192474, -0.1307268, -0.08761701, -0.17986964, 0.08374649, -0.14858364, 0.09763281, -0.08468596, 0.07319079, 0.035000093, 0.15952845, 0.07624351, -0.06327825)); + target1 += mul(d3, float4x4(0.08170692, -0.08774166, -0.06705707, 0.0831711, -0.08875457, 0.2449888, -0.14047605, 0.121503554, 0.12618999, 0.02661774, 0.074408755, 0.008565884, 0.0066377656, 0.07707615, 0.0047538104, -0.09728628)); + target1 += mul(e3, float4x4(-0.07823108, 0.26807916, -0.1287439, 0.122834176, -0.19714594, -0.4942738, 0.10632799, 0.15897664, -0.49854252, -0.53525513, -0.022803375, 0.009074036, 0.11774684, 0.031480465, -0.09894373, -0.021954348)); + target1 += mul(f3, float4x4(0.12955268, -0.19441509, 0.09120095, 0.011785061, 0.051540542, 0.04758044, 0.04755938, 0.038364667, 0.13298763, 0.10289183, 0.020991595, -0.081097506, 0.034405325, -0.11437959, -0.12480481, -0.065397635)); + target1 += mul(g3, float4x4(0.014650886, -0.013576367, 0.07512687, -0.12763637, -0.08968091, 0.10528254, -0.0038084937, 0.15035029, -0.012962306, 0.05691601, 0.0812368, -0.03788991, -0.023433529, -0.06713006, -0.11560342, -0.016785484)); + target1 += mul(h3, float4x4(-0.08841537, -0.06776095, -0.047271274, 0.019514252, -0.015769644, 0.13348323, -0.10441001, 0.11785519, 0.12106464, 0.12203113, 0.12842509, -0.060258504, -0.05513583, -0.11723075, 0.16473113, 0.011207402)); + target1 += mul(i3, float4x4(0.09194844, 0.090906195, 0.0073818006, -0.2054332, 0.10706456, -0.077387035, -0.10359331, -0.027400512, 0.03793532, -0.14690651, -0.093249865, 0.050878726, -0.050666083, -0.048249774, -0.029831426, -0.005122034)); + target1 += mul(na1, float4x4(-0.020260984, 0.03831684, 0.03624668, 0.05309828, 0.21425313, 0.04278761, -0.05036095, -0.11595718, -0.040695038, -0.053862657, 0.04097228, -0.14234553, 0.14645652, 0.0005556175, 0.037454158, -0.0834163)); + target1 += mul(nb1, float4x4(-0.06560231, 0.035083, 0.00515858, -0.078715794, 0.18701494, -0.13166098, 0.039057065, -0.0039464743, -0.060099427, -0.0197617, -0.03425236, -0.034474097, -0.13463692, -0.14687043, -0.07386183, 0.15877718)); + target1 += mul(nc1, float4x4(-0.17528099, 0.26496586, -0.026784442, 0.02301352, -0.19334768, 0.058255307, 0.084549166, 0.05813938, 0.14634804, 0.021594442, 0.09401384, 0.048137084, -0.027516525, 0.03725506, -0.03902931, -0.05822093)); + target1 += mul(nd1, float4x4(-0.08095242, -0.0700387, 0.1565957, -0.12318027, 0.0009593411, 0.083975986, -0.1672044, 0.07669263, 0.15444267, -0.2405765, -0.0999547, -0.0113806585, -0.086394556, -0.068187304, 0.022804303, 0.033642158)); + target1 += mul(ne1, float4x4(0.1742647, -0.1139802, 0.03773566, 0.24217185, 0.14553599, -0.13462967, -0.15343545, -0.07004845, 0.09719264, 0.40018603, 0.012584803, 0.20194948, 0.23837751, 0.14372137, 0.018450642, 0.19864424)); + target1 += mul(nf1, float4x4(-0.01977227, -0.01629232, -0.11582299, 0.07435107, 0.04089713, -0.013857991, 0.22446491, 0.016065463, -0.042079967, 0.0170577, 0.040578466, -0.038618524, -0.05470756, -0.115140095, -0.065542445, 0.14684047)); + target1 += mul(ng1, float4x4(-0.037176184, 0.090253, 0.14125483, -0.08512404, -0.051651485, 0.099631414, 0.10343597, -0.0061565666, 0.041628633, 0.09307784, -0.090136886, -0.009559773, -0.03024448, -0.031733215, -0.07797126, 0.055322547)); + target1 += mul(nh1, float4x4(-0.1050144, -0.03270817, 0.26327172, -0.28404585, -0.03861458, 0.048381314, -0.15304287, -0.0042754454, -0.10290137, -0.24198222, -0.12365528, 0.095550224, 0.15594007, -0.02852183, 0.021433152, 0.007750503)); + target1 += mul(ni1, float4x4(0.14963464, 0.036146436, 0.06857592, -0.03860567, 0.014884097, 0.07543522, 0.024485901, 0.035711233, -0.003450604, -0.0597103, 0.024015842, -0.001213529, -0.058722682, 0.01725032, -0.12181248, -0.008058613)); + target1 += mul(na2, float4x4(0.071817815, 0.041531287, 0.014643306, 0.16291411, 0.26480407, -0.17182025, -0.010588466, 0.105062984, 0.030210229, -0.04829373, -0.036531925, 0.047632486, -0.2479769, 0.045298517, 0.13192376, 0.033759)); + target1 += mul(nb2, float4x4(0.08769319, -0.0032485086, 0.022313096, 0.17629139, -0.29785547, 0.17973061, 0.033340637, 0.15138951, 0.016324213, -0.052774195, 0.03140277, 0.049657557, -0.1042336, -0.048986793, 0.11604845, 0.027282678)); + target1 += mul(nc2, float4x4(0.06349052, 0.020287551, 0.041792236, -0.064779855, -0.19809574, 0.1296112, -0.31961703, -0.10329236, -0.06304219, 0.1266601, 0.047558576, -0.01599766, -0.0840006, 0.110553645, -0.17114124, -0.092702754)); + target1 += mul(nd2, float4x4(-0.07258822, 0.33649316, 0.03007162, 0.122297324, -0.0035460228, -0.031201044, 0.12168557, -0.029285375, -0.0809431, -0.12167306, 0.07364419, 0.028837958, -0.0045297747, -0.0601046, 0.01811243, -0.09054316)); + target1 += mul(ne2, float4x4(-0.25255457, -0.22574474, -0.34320608, 0.11290973, -0.26090237, 0.040196825, 0.13508338, 0.029482007, -0.10183512, -0.08112251, -0.11552506, 0.19293801, 0.3495816, 0.18543391, 0.06588066, -0.04709665)); + target1 += mul(nf2, float4x4(0.084513545, -0.15674965, -0.0008406949, 0.090281285, 0.2712172, -0.116089135, -0.015765252, -0.117886744, 0.09920849, 0.029833876, -0.095729664, -0.027886158, 0.075155556, -0.046650156, -0.0065415367, -0.010066504)); + target1 += mul(ng2, float4x4(0.045017265, -0.032798514, 0.015228568, -0.10716174, 0.12533835, -0.18279125, 0.04248092, -0.19403161, -0.037150033, 0.051052798, 0.064987876, -0.0012054371, 0.17646495, -0.08559015, -0.030268986, -0.0110990135)); + target1 += mul(nh2, float4x4(0.2270626, 0.07908034, 0.10131884, -0.04477681, 0.012331784, -0.0066530495, 0.11152835, -0.011538218, 0.18025607, 0.28347188, -0.061317418, 0.140885, -0.12165267, -0.015358079, -0.1509724, 0.10532319)); + target1 += mul(ni2, float4x4(0.090888165, 0.051266413, 0.00032922614, 0.10080883, 0.25410557, 0.01401413, -0.08648295, -0.18689357, 0.06347326, -0.12212378, -0.047608927, 0.19060975, 0.030654645, -0.0008587347, 0.056551795, -0.075159475)); + target1 += mul(na3, float4x4(0.18279178, -0.043672565, -0.045675628, 0.08589114, -0.09798812, 0.13952787, 0.020468095, 0.019159447, -0.037739623, -0.031772856, 0.05684188, -0.0027250764, 0.15689476, -0.048730828, 0.07862422, -0.050529804)); + target1 += mul(nb3, float4x4(-0.16031522, 0.032964543, -0.04876928, 0.02347113, -0.15290083, -0.2451878, 0.052881993, -0.08540753, -0.07177217, -0.15992549, 0.0651776, -0.08932986, 0.1767438, 0.056059174, -0.0576956, -0.09165988)); + target1 += mul(nc3, float4x4(0.13147484, -0.0570967, -0.11560189, 0.05509382, -0.0150885545, 0.09640748, 0.014165965, 0.20704748, 0.030164357, -0.1082378, 0.024942307, -0.08219035, 0.124670975, -0.052641235, -0.0226715, -0.027526885)); + target1 += mul(nd3, float4x4(-0.23702182, 0.23504741, -0.0055127465, -0.05391814, -0.033541497, -0.004652474, 0.018886803, -0.03751877, -0.17091194, 0.023785884, 0.04066812, -0.060029395, -0.041414294, 0.01717907, -0.03250043, 0.06307296)); + target1 += mul(ne3, float4x4(0.13159078, -0.21431974, 0.18132871, -0.057554632, 0.21089126, 0.2748356, 0.09672305, -0.06376276, 0.533338, 0.44528118, 0.005919547, -0.04472009, 0.057801563, 0.089425854, 0.00619256, -0.1724294)); + target1 += mul(nf3, float4x4(0.092838384, 0.10023682, 0.024242302, -0.17375132, 0.078034006, -0.19451837, -0.15501493, -0.13293402, -0.18569629, -0.19058825, -0.00096012745, 0.046021726, 0.10088386, 0.08519642, 0.13217524, 0.13243376)); + target1 += mul(ng3, float4x4(-0.027395649, 0.023635123, -0.010267926, 0.17099325, 0.12437585, -0.1249303, -0.06419651, -0.021422816, 0.17196923, -0.22701795, 0.015808482, -0.14445387, -0.05460773, -0.003828314, -0.18222629, -0.048698075)); + target1 += mul(nh3, float4x4(-0.013598317, 0.122526735, -0.02577042, 0.08744144, -0.08979224, 0.1286411, -0.05883814, -0.12919591, -0.050816238, -0.15597601, -0.115631886, -0.12070838, 0.11360469, -0.02584839, -0.1560333, 0.011274217)); + target1 += mul(ni3, float4x4(0.080101736, -0.1330538, 0.021211144, 0.1438211, 0.01772601, 0.11148414, 0.13287805, 0.15081206, -0.012008585, 0.06793454, 0.03773184, -0.032694455, 0.10249589, 0.026878785, -0.12141799, -0.09203301)); + target1 += float4(-0.034046266, 0.030718531, 0.029500093, -0.007484251); + + float4 target2 = mul(a1, float4x4(0.07053637, -0.13757081, 0.083475806, 0.047742493, 0.17072907, 0.03391062, -0.06359729, -0.087144814, 0.13410439, 0.0766006, -0.07466357, 0.020072024, 0.070759356, -0.07371517, -0.002268697, -0.01926168)); + target2 += mul(b1, float4x4(0.044688217, 0.11774826, -0.042388003, -0.07989108, -0.1972503, 0.050274227, 0.016185008, -0.058738116, -0.19790363, -0.05735989, -0.047189124, -0.14251053, 0.020062871, 0.03521658, 0.08752947, -0.23372267)); + target2 += mul(c1, float4x4(0.25029907, -0.045921884, 0.09080718, 0.016080718, -0.084881, -0.11112135, 0.06381888, -0.11366214, -0.09134574, 0.078322254, 0.08380223, -0.04663652, 0.079209715, 0.019024234, -0.13660747, 0.014792784)); + target2 += mul(d1, float4x4(-0.32267445, -0.14074866, -0.07469012, -0.16251983, 0.03330349, 0.11198752, 0.031585973, 0.022867791, 0.018227417, -0.20173706, 0.06353359, 0.20548727, -0.028305814, 0.100267045, 0.10572279, 0.026997928)); + target2 += mul(e1, float4x4(-0.098945044, 0.05311362, -0.19431336, -0.04678114, -0.4157369, -0.042450808, -0.044422694, 0.12376175, -0.23616472, 0.28038284, 0.14551993, 0.09350881, -0.16741902, -0.038471445, 0.01986414, -0.17471582)); + target2 += mul(f1, float4x4(0.14448655, -0.019295415, -0.10982297, -0.06357956, 0.2369261, 0.032228198, -0.11679823, -0.05587635, 0.04871467, -0.0137740765, -0.07519058, -0.18964961, -0.026393183, -0.01974549, 0.06976226, -0.016081909)); + target2 += mul(g1, float4x4(-0.0137074115, -0.079286255, -0.12312942, -0.018789625, -0.013921108, -0.0020071631, -0.054753337, 0.017963797, 0.2555907, 0.032142516, -0.24176246, 0.16440704, -0.017065119, 0.051031727, -0.1010096, -0.034884788)); + target2 += mul(h1, float4x4(-0.060335524, 0.074403755, -0.03940425, -0.06947243, 0.063064545, 0.0003786891, 0.042550083, 0.014553617, -0.18506478, 0.14322414, 0.09673403, 0.19324619, 0.13711761, 0.14422627, -0.26975414, 0.1770873)); + target2 += mul(i1, float4x4(0.0749963, -0.0043254187, 0.007601493, 0.044143427, 0.07984297, -0.13162711, -0.0021585606, -0.009246663, 0.20984007, -0.046367027, -0.09268187, 0.14495128, -0.12562644, 0.1836961, -0.009291084, 0.09893831)); + target2 += mul(a2, float4x4(-0.04764635, 0.0771012, -0.06254785, 0.049306013, -0.1309074, -0.16724089, 0.064103976, 0.029477306, -0.15803875, -0.018528668, 0.008828626, -0.06195826, -0.06321009, -0.08585472, 0.062971294, -0.09935699)); + target2 += mul(b2, float4x4(0.09090366, 0.035622045, -0.07656447, 0.047136262, -0.12315156, -0.06781894, -0.16850029, -0.08679722, 0.031337507, -0.034778163, -0.0034067088, 0.060668938, 0.025843577, -0.13783671, 0.07269251, -0.016900677)); + target2 += mul(c2, float4x4(0.0440054, 0.0036595238, 0.008771493, 0.030218529, -0.027555777, -0.06657435, -0.055236634, -0.033271316, -0.13821629, 0.03597882, -0.05062761, -0.053709738, -0.0017135427, -0.030783407, 0.08677211, -0.07123904)); + target2 += mul(d2, float4x4(0.04535665, 0.10784165, 0.06629595, -0.08122243, -0.039657716, -0.18704408, 0.1423137, -0.042863034, -0.0742858, 0.18767954, 0.0049358946, -0.03671723, -0.26857832, -0.27597806, -0.15875868, -0.06401874)); + target2 += mul(e2, float4x4(-0.11346384, -0.02552838, 0.15989058, 0.135453, 0.11406132, -0.14827615, -0.034574773, 0.098348685, -0.11900814, -0.017299704, -0.0073399683, 0.28204438, 0.101091795, -0.05411511, -0.25915098, -0.18966594)); + target2 += mul(f2, float4x4(0.00533059, 0.02730481, -0.037623204, 0.0683161, -0.001293424, 0.048477355, -0.031527296, -0.06233651, -0.08181692, 0.15782121, -0.022148566, 0.023771856, 0.08038705, 0.20726775, -0.049652047, 0.013319854)); + target2 += mul(g2, float4x4(0.10297071, -0.004163597, -0.054161496, -0.047678668, 0.17382064, -0.07822223, -0.015010763, -0.04397959, 0.016814355, 0.029943537, -0.054495964, 0.025957715, -0.0020466947, -0.05330402, 0.1510799, 0.020855682)); + target2 += mul(h2, float4x4(-0.068892024, 0.06622253, -0.03464551, 0.058187455, 0.0040488504, 0.04686992, -0.031023791, -0.0217168, 0.27100065, 0.05667408, -0.1947591, 0.016222548, 0.15030278, 0.037999425, 0.18916538, 0.031360295)); + target2 += mul(i2, float4x4(0.0033874374, 0.16642143, -0.05115266, 0.033188976, 0.15715985, -0.016430153, 0.061355177, -0.03946156, 0.097530834, 0.09636378, -0.0040456597, 0.03355033, 0.08691396, -0.059714377, -2.6330366e-05, 0.07581963)); + target2 += mul(a3, float4x4(0.009388512, -0.11915495, -0.013311355, 0.037321404, -0.055611674, -0.03687238, 0.04141501, -0.05869542, 0.07016199, 0.09323767, 0.04217543, 0.06623439, 0.03210602, -0.05782674, -0.11002717, -0.0072197197)); + target2 += mul(b3, float4x4(0.10509906, 0.09556673, 0.12621118, 0.05654386, -0.059508096, 0.05438697, 0.07796531, 0.013211419, 0.117890954, 0.06040751, -0.0016234997, 0.081311926, -0.13193677, 0.06996361, -0.13694339, -0.17900036)); + target2 += mul(c3, float4x4(-0.053371694, -0.06806307, -0.1518878, 0.033526573, 0.046281144, -0.046205673, -0.034051836, 0.030009115, 0.126423, -0.016593177, -0.039357234, 0.10646578, -0.049523186, 0.031685475, 0.020842545, -0.12438032)); + target2 += mul(d3, float4x4(0.13840649, 0.09017664, -0.049517624, 0.16685495, -0.08715648, 0.10113574, 0.018099392, 0.060909674, -0.0058692712, 0.045747917, -0.027853733, 0.05472168, 0.11535146, 0.18636864, 0.0723617, -0.014345535)); + target2 += mul(e3, float4x4(0.2979883, -0.30398092, -0.00448704, 0.034454245, 0.06174734, -0.012045507, 0.23352385, 0.09975292, 0.17575547, -0.013523325, -0.23254701, 0.13217497, -0.20258605, 0.07145199, 0.08916167, -0.16669402)); + target2 += mul(f3, float4x4(-0.1606433, 0.014213267, 0.046263646, 0.014771279, 0.041478187, -0.023410609, 0.14462134, 0.0359066, -0.14913698, -0.28093338, -0.098434776, 0.14168552, -0.017470809, -0.10474789, -0.019634444, -0.09452941)); + target2 += mul(g3, float4x4(-0.024310801, 0.039855056, -0.0277722, -0.0049349763, -0.075620376, 0.14836049, 0.16639285, -0.11209939, -0.06530567, -0.059602138, -0.090105936, -0.024020225, 0.03793827, -0.08396542, 0.03918101, 0.031654693)); + target2 += mul(h3, float4x4(-0.0019075832, -0.06279881, 0.019373834, -0.022848947, -0.19700366, -0.07276809, 0.10826095, 0.030095315, -0.18057819, -0.06393351, -0.023836957, 0.0065463074, 0.13035376, -0.06434109, 0.09293361, 0.03301868)); + target2 += mul(i3, float4x4(0.013273036, -0.016674511, 0.13465153, -0.10852922, 0.026329456, 0.13648263, 0.09414527, 0.0012146169, -0.04237767, -0.085370585, 0.05478497, -0.009154848, 0.06731204, -0.034912866, 0.022870043, 0.038516387)); + target2 += mul(na1, float4x4(-0.12806587, 0.108848855, 0.054954138, -0.04903305, 0.06644907, 0.18971404, -0.07503066, -0.04593073, -0.061711293, 0.014664125, 0.11856841, 0.045254968, -0.15008299, -0.074398935, 0.04106602, -0.24396381)); + target2 += mul(nb1, float4x4(0.038917247, -0.04640616, -0.08809665, 0.11144138, 0.22032888, -0.050760582, -0.014760546, 0.11236783, -0.059079073, 0.06937314, -0.031159066, -0.07186243, -0.20771858, 0.006415583, -0.07267114, 0.056808673)); + target2 += mul(nc1, float4x4(-0.00423516, 0.099120915, -0.059658743, 0.07284239, -0.062910534, 0.0818146, -0.04679385, 0.18365376, -0.09501989, -0.01619849, -0.060537007, -0.053941406, 0.01753884, 0.030266201, 0.03675036, 0.0021925808)); + target2 += mul(nd1, float4x4(0.046608146, -0.02341816, 0.07648551, 0.052848052, 0.0039356127, 0.036476433, 0.10236195, -0.032545663, 0.24973582, 0.037997168, 0.058406867, 0.07901572, -0.18169855, -0.005032321, -0.035923317, -0.14367534)); + target2 += mul(ne1, float4x4(0.16122861, -0.040731397, 0.106095225, 0.0036572781, 0.028426658, 0.20103998, -0.06638636, -0.022662058, 0.050278876, -0.163471, -0.21357286, -0.1825406, -0.050869223, -0.040973462, -0.057061654, 0.2810217)); + target2 += mul(nf1, float4x4(0.18768045, -0.040844023, -0.057449356, 0.12673363, -0.059018314, -0.030971631, 0.08158004, 0.06946996, -0.23750862, -0.00860647, -0.1003966, 0.034669064, 0.24486068, 0.07178945, -0.018041046, 0.15238568)); + target2 += mul(ng1, float4x4(-0.2525261, -0.054517742, -0.23935242, -0.101572365, -0.030042917, 0.036003232, -0.122569695, -0.03556264, 0.06506395, -0.062265877, -0.1060633, 0.014838352, 0.108411096, -0.066448584, 0.0512793, 0.09429625)); + target2 += mul(nh1, float4x4(-0.13627818, -0.10708255, -0.19644266, -0.037392955, -0.045197092, 0.19481973, 0.13070573, -0.060230445, 0.33588138, -0.034298502, -0.11659601, 0.12985578, 0.13320427, -0.11824594, 0.026382592, -0.053538818)); + target2 += mul(ni1, float4x4(0.048666447, -0.017627062, 0.15464988, -0.045204308, -0.011256204, 0.13558346, -0.11524339, -0.09390041, -0.13328557, 0.0208915, 0.14780544, -0.076561615, 0.16454418, -0.06894578, 0.05620349, 0.023376953)); + target2 += mul(na2, float4x4(-0.15081851, 0.114050105, -0.20110038, -0.20664425, 0.024320586, 0.07185712, -0.07764587, -0.071572, 0.05626064, -0.033045493, 0.001562899, 0.025873706, -0.124655195, 0.008215636, 0.15243639, -0.004112266)); + target2 += mul(nb2, float4x4(-0.3066088, 0.03357835, 0.10992364, -0.06900282, -0.018690787, 0.06104214, -0.0041348864, 0.06407149, -0.03204788, 0.014121594, 0.08106938, -0.054475598, -0.265469, -0.14616208, -0.00401253, -0.03055698)); + target2 += mul(nc2, float4x4(-0.04365304, -0.03965277, -0.03783723, -0.064608894, -0.020902721, -0.14974843, -0.009566472, -0.05167228, 0.05844501, -0.06657627, 0.07489639, -0.033589855, -0.12991743, 0.06972872, -0.06635165, -0.012207249)); + target2 += mul(nd2, float4x4(-0.18625839, 0.054298103, -0.01828512, -0.11457061, -0.010832592, -0.058104735, -0.19629207, 0.04157041, -0.009692541, -0.14516611, -0.05172281, 0.049974106, 0.056659166, 0.04280708, 0.04385243, 0.010063984)); + target2 += mul(ne2, float4x4(0.30084208, -0.06846901, -0.026888005, -0.13171686, 0.14192836, 0.04704836, 0.121805556, 0.044525314, 0.083052106, -0.08510613, -0.12172583, -0.30419388, -0.0878148, -0.16297778, 0.2601781, 0.07285239)); + target2 += mul(nf2, float4x4(0.17655143, 0.030515455, 0.19467549, 0.06786994, 0.12391486, -0.07981, 0.032166053, 0.025423106, 0.03408094, -0.13683012, -0.1334616, -0.06392358, 0.19483057, -0.26500967, -0.09117511, 0.065103196)); + target2 += mul(ng2, float4x4(-0.09752205, 0.052617133, 0.093155235, 0.036831614, 0.0069967005, -0.010922277, 0.03564152, -0.032540992, 0.08338763, -0.09178083, 0.044856872, -0.03585517, -0.03322978, 0.046807107, -0.08056567, -0.011965988)); + target2 += mul(nh2, float4x4(0.18745014, -0.036378495, -0.05438124, -0.07421527, 0.19114059, 0.06546488, -0.011771215, 0.023253547, -0.21524692, -0.18299192, 0.16316402, -0.07972637, 0.05902257, -0.073275164, 0.034378335, -0.03623065)); + target2 += mul(ni2, float4x4(0.11460803, -0.08311417, -0.013590335, 0.03570915, 0.089230955, 0.089890435, 0.020723056, -0.042275555, -0.0369812, -0.26617813, 0.046739977, -0.12200707, 0.14000617, -0.12750666, 0.118415974, -0.07106497)); + target2 += mul(na3, float4x4(-0.0142673105, 0.2708808, 0.035017774, 0.06703521, 0.029961549, 0.14673132, 0.063695125, -0.02033868, -0.06468242, -0.026121365, -0.0026516293, -0.027388861, -0.08078243, 0.087715834, 0.022955768, -0.02319636)); + target2 += mul(nb3, float4x4(0.1718002, -0.11518721, -0.0025345646, 0.1238703, -0.18476513, -0.21286273, 0.20789471, 0.01770004, -0.061133858, -0.0626756, 0.055727385, -0.008457355, 0.118455775, 0.10262385, -0.006697552, 0.014359785)); + target2 += mul(nc3, float4x4(0.09120904, 0.07372497, 0.11044388, -0.03926499, -0.03780375, 0.089837864, -0.09430582, -0.08950114, -0.14755902, -0.03831562, 0.023441203, -0.01944005, 0.04238117, 0.03447587, -0.06003528, 0.044492997)); + target2 += mul(nd3, float4x4(0.11524887, -0.039266784, 0.066004194, 0.0054254015, 0.05427426, -0.09622213, -0.10315417, -0.050567277, -0.07576602, -0.12929544, -0.047569484, -0.0073220725, -0.19992967, -0.09847938, -0.010317084, -0.06364932)); + target2 += mul(ne3, float4x4(-0.16610023, 0.2372482, -0.08908623, -0.0074210903, -0.30944943, -0.13369739, -0.14367966, -0.088355504, -0.17537226, -0.027992815, 0.2002623, -0.077625655, 0.43252298, 0.070107736, 0.12366656, 0.04695458)); + target2 += mul(nf3, float4x4(-0.16442165, -0.115128726, -0.07232464, -0.06728961, -0.28723717, 0.17190668, -0.08351212, -0.0021862125, 0.121666215, 0.21980163, 0.041315466, -0.062825136, 0.19541566, 0.11147719, 0.024110844, 0.03695252)); + target2 += mul(ng3, float4x4(0.1264344, -0.043413147, -0.0045117373, 0.11031058, 0.14368063, 0.0709618, -0.060608998, 0.12688136, -0.0067742122, -0.036186397, 0.0893928, 0.0054127555, -0.09240343, 0.00012509787, 0.22235379, -0.010589687)); + target2 += mul(nh3, float4x4(-0.27677822, -0.026975723, 0.1144896, -0.02925077, 0.31745398, 0.030609636, -0.058644157, -0.080246314, 0.16578154, 0.040172495, 0.1331117, -0.02078141, -0.04805901, 0.04640852, 0.10614158, 0.012697342)); + target2 += mul(ni3, float4x4(-0.11755322, 0.09768716, 0.0012669004, -0.110562816, 0.06637665, 0.02764571, -0.12623487, -0.023209875, 0.12958577, -0.007275613, 0.010377832, -0.0105528, -0.14058565, -0.030191245, 0.025604937, -0.013748175)); + target2 += float4(-0.03760731, -0.0242485, -0.021933366, 0.027489811); + + float4 target3 = mul(a1, float4x4(0.027916895, -0.12058145, -0.083479345, -0.026083047, 0.014759637, -0.07421897, -0.017896682, 0.086924285, -0.05399337, 0.07368837, -0.26842278, 0.09718693, -0.014846767, -0.002766841, 0.06403607, 0.0669811)); + target3 += mul(b1, float4x4(-0.021020316, -0.21434662, 0.06523966, 0.11869478, -0.21215962, -0.042433035, -0.10847877, -0.10532955, -0.18938127, 0.05224748, 0.02170683, -0.09415175, -0.08846007, -0.18066676, -0.108328596, 0.09838168)); + target3 += mul(c1, float4x4(-0.0014482798, -0.08546761, 0.053286448, -0.022678297, -0.009113084, -0.09750778, 0.10262376, -0.003133292, 0.026831077, 0.03348051, -0.031860266, 0.053811714, -0.05097466, 0.040436964, -0.063929394, -0.12586603)); + target3 += mul(d1, float4x4(0.015445512, 0.029696425, -0.15537772, -0.057669863, -0.058116574, 0.188642, -0.289381, -0.03078714, -0.010385, 0.014184904, 0.16890535, -0.02512565, 0.116905816, 0.043889366, -0.027181825, 0.015912583)); + target3 += mul(e1, float4x4(0.0659837, 0.09219005, -0.085082754, 0.051408056, 0.11564603, 0.038757283, -0.0004551866, 0.06365286, 0.09702758, 0.10664935, 0.34716737, 0.14471847, 0.28146356, -0.00765224, 0.1772403, 0.22182499)); + target3 += mul(f1, float4x4(-0.08183167, 0.05762959, -0.043376535, -0.16858399, 0.02308398, -0.055296358, 0.07663533, 0.106998175, -0.10704135, -0.02596522, 0.013459359, -0.044283163, 0.07874813, 0.022484714, 0.30442455, 0.071259074)); + target3 += mul(g1, float4x4(-0.0022170176, 0.022313403, 0.122714184, -0.026881142, 0.08282965, -0.06490924, 0.18938759, 0.0677223, 0.1294088, 0.059372786, -0.110964485, 0.010619735, -0.09610158, -0.022745904, -0.019792024, -0.0034128428)); + target3 += mul(h1, float4x4(-0.041866794, -0.13008577, 0.110913865, -0.016430711, 0.13772298, 0.06592027, -0.0034176896, -0.032333232, -0.0071776314, 0.031079333, -0.10468204, 0.026052983, -0.025571326, 0.048804965, 0.045641564, -0.035233505)); + target3 += mul(i1, float4x4(0.035716273, -0.100303516, -0.031205915, -0.031608257, 0.09185616, 0.020499555, -0.16786079, -0.12639952, -0.023970092, 0.08524624, -0.06759015, -0.12429372, -0.0127442675, 0.112773865, -0.26975212, -0.007704992)); + target3 += mul(a2, float4x4(-0.022916801, -0.09817618, 0.20416892, 0.12702931, -0.012764869, -0.038772803, -0.27769282, -0.034622744, -0.13437684, 0.02120745, -0.1548114, 0.006228885, 0.22984603, -0.2432492, 0.10387287, -0.059412073)); + target3 += mul(b2, float4x4(0.2363932, -0.11194499, -0.064652696, 0.07116429, -0.14603911, 0.18921274, -0.013249935, 0.13604592, 0.07306857, 0.08511469, -0.061648175, 0.052819334, 0.2016978, -0.13642034, -0.014157929, 0.072210535)); + target3 += mul(c2, float4x4(-0.18656836, 0.17888299, 0.003892404, -0.048626274, -0.11190575, 0.09419825, 0.1276045, -0.012969808, -0.14728911, 0.17536093, 0.050294712, 0.19381198, -0.06558784, -0.09776518, -0.11414383, -0.1866525)); + target3 += mul(d2, float4x4(0.15634118, 0.19272068, -0.05068886, -0.030995008, -0.035177864, 0.07032768, 0.116517685, -0.09072261, -0.10217943, -0.09160873, -0.062906645, 0.12504682, -0.09060218, 0.014820002, 0.03459818, -0.108051665)); + target3 += mul(e2, float4x4(-0.018596219, -0.017673533, -0.006066184, -0.064746305, -0.11017345, -0.09556466, 0.21868086, -0.027287072, -0.11609723, -0.11342597, 0.1788956, 0.035506073, -0.096203305, -0.037712112, 0.14055523, 0.16230235)); + target3 += mul(f2, float4x4(0.0056694653, -0.045156818, 0.10069355, -0.07291589, -0.048202783, -0.12425812, 0.15425333, -0.052708663, -0.17621729, 0.06338879, 0.1205465, 0.1754123, -0.028028619, 0.11745275, -0.120209925, 0.031073835)); + target3 += mul(g2, float4x4(-0.058996305, 0.042816967, -0.002514556, -0.0055761277, -0.05223201, -0.17414387, 0.13102455, 0.02741174, -0.0785701, 0.080076955, -0.058584027, 0.034780204, -0.0276381, 0.055405296, -0.09418891, 0.0013168643)); + target3 += mul(h2, float4x4(-0.03179422, -0.080134, 0.055742502, -0.14085148, -0.0010870491, 0.006537003, -0.0013490077, -0.086607024, 0.057871595, -0.07872675, 0.22251247, -0.052358106, -0.1307614, -0.22731845, -0.06979484, 0.00849211)); + target3 += mul(i2, float4x4(0.14046812, 0.04020691, -0.032090276, -0.09295882, -0.11624229, -0.10551015, 0.08762072, -0.07726716, -0.1498316, -0.17169969, -0.04618553, 0.14997219, -0.012493408, -0.06838468, 0.10913737, 0.018050432)); + target3 += mul(a3, float4x4(0.033775266, -0.07618233, 0.11493082, -0.026212664, -0.04540022, 0.026137805, -0.13465577, 0.15029386, 0.07141062, 0.09716221, -0.025410062, 0.032656677, 0.037815653, -0.040112175, 0.11637884, 0.11825522)); + target3 += mul(b3, float4x4(0.14827983, -0.081768006, -0.05624141, -0.0044099363, 0.032463025, 0.10766071, -0.061944928, -0.15250987, -0.14692347, 0.1010062, 0.1384647, 0.079083905, -0.11822245, -0.054733507, 0.06650751, 0.04349182)); + target3 += mul(c3, float4x4(-0.0189154, -0.033869926, -0.008463885, -0.1524785, -0.017154029, -0.021163551, 0.045354687, 0.056154214, 0.0996, 0.18220654, -0.02275312, -0.051522568, -0.015781462, -0.14436358, -0.13036303, -0.08281256)); + target3 += mul(d3, float4x4(0.060433608, 0.025449814, 0.03162007, -0.13456888, -0.02134081, -0.15663207, 0.17289546, 0.0011385656, 0.059484057, 0.052233964, -0.03541694, 0.113927364, -0.0010070644, -0.005570618, 0.12790091, 0.16491406)); + target3 += mul(e3, float4x4(-0.033132974, -0.098377176, -0.12513644, 0.08796082, -0.0028096333, 0.09346144, -0.18109433, 0.17953019, 0.10331962, -0.111182235, -0.11015705, -0.15538633, -0.015985087, 0.06262592, 0.11767445, 0.04020116)); + target3 += mul(f3, float4x4(-0.10818913, -0.10614671, -0.0061792796, 0.07901479, -0.0018810411, 0.0026243017, 0.15281977, 0.1715038, -0.017927805, 0.07613347, 0.033450123, -0.23979095, 0.028044673, 0.060702097, 0.019822879, -0.14792976)); + target3 += mul(g3, float4x4(-0.008551052, -0.03843347, 0.0472157, -0.010416428, -0.01928176, -0.0063839755, 0.03508731, 0.24043478, 0.033645283, -0.11676803, -0.06743783, -0.034164682, -0.014925681, -0.030447507, -0.111160316, -0.034867767)); + target3 += mul(h3, float4x4(-0.102300785, 0.114739686, -0.007856566, -0.12389364, -0.18574199, 0.06441196, -0.1979763, -0.016671708, -0.09252569, 0.0037067563, -0.0609829, 0.028997343, 0.047285903, -0.018309064, -0.027229104, 0.06743576)); + target3 += mul(i3, float4x4(-0.055446856, -0.06821513, -0.0059853215, -0.13260886, 0.083104685, -0.11773866, 0.007317027, -0.039318476, -0.0042170533, 0.0121953655, -0.010792958, -0.010249791, 0.007397987, 0.0047044945, 0.049882278, 0.0047567203)); + target3 += mul(na1, float4x4(0.03465105, 0.062134508, -0.043116115, -0.017247844, -0.04502861, -0.10212199, 0.16550505, 0.016599817, 0.08857375, -0.03961283, 0.13870746, -0.082080655, -0.08469554, -0.18640712, -0.014425766, 0.034508247)); + target3 += mul(nb1, float4x4(-0.12399076, 0.22634715, -0.13730592, 0.04840304, 0.09450334, -0.065218486, 0.0068855314, -0.049165834, -0.011287574, -0.10739019, -0.00023772087, 0.09688784, 0.10983027, -0.011201701, 0.14466487, -0.21600902)); + target3 += mul(nc1, float4x4(-0.05468909, 0.050734483, 0.046412308, -0.09749245, 0.05704707, 0.22612362, -0.15571213, 0.06998293, 0.017409045, -0.13634662, -0.020574553, -0.073725305, 0.04699205, -0.08355112, 0.08512415, 0.15568486)); + target3 += mul(nd1, float4x4(0.10635322, 0.07337078, -0.07432055, 0.004248984, 0.027724393, -0.040500402, 0.196942, 0.041983824, 0.083976634, 0.10290795, -0.3009756, 0.082270764, -0.15817869, -0.027697606, -0.029153766, 0.08529106)); + target3 += mul(ne1, float4x4(0.14958759, 0.13267447, 0.22206177, -0.17663805, -0.10765967, -0.03566466, 0.04633988, -0.03062237, -0.20792471, -0.002921972, -0.15749575, -0.22428021, -0.23200673, 0.14563684, -0.160325, -0.266424)); + target3 += mul(nf1, float4x4(-0.13190623, 0.041341502, -0.12777801, -0.055840913, -0.112986885, -0.0021044768, -0.12469129, -0.11046474, -0.03600098, -0.011692557, -0.02686337, -0.17009224, -0.0820219, -0.029119916, -0.111095175, 0.15297051)); + target3 += mul(ng1, float4x4(0.14414293, 0.02744959, -0.102789834, 0.006705362, -0.030359348, 0.083485864, -0.12009053, -0.02636556, 0.08503298, -0.10867725, 0.09814758, -0.14605886, 0.16700824, -0.0866019, 0.008852153, -0.21706365)); + target3 += mul(nh1, float4x4(-0.011756063, 0.008039321, 0.03698028, -0.10509595, -0.099564835, 0.009903015, -0.08965568, 0.06633642, 0.10181769, 0.08294756, 0.025898153, -0.098384134, 0.066339396, -0.02191258, 0.03265874, 0.1477094)); + target3 += mul(ni1, float4x4(-0.22950448, -0.07607528, 0.016735366, 0.083834045, -0.005080134, 0.09744342, -0.105208844, 0.043603517, 0.005231004, -0.023469515, 0.08517984, 0.14299476, 0.0062482157, -0.09623864, 0.097964756, 0.11196982)); + target3 += mul(na2, float4x4(-0.31986436, 0.02033341, -0.067986034, 0.12729374, -0.0048481217, 0.05469139, 0.16248406, 0.09194327, 0.024411261, 0.051379394, 0.00034975683, -0.021101091, 0.00954231, 0.02220226, -0.32092375, -0.039599743)); + target3 += mul(nb2, float4x4(-0.23243247, 0.102910504, 0.08704054, -0.16638695, -0.04414702, 0.10768372, 0.06244856, -0.053088184, -0.07190515, 0.025491035, -0.0073894467, -0.06960583, 0.04625048, 0.09757096, -0.014015539, -0.2686573)); + target3 += mul(nc2, float4x4(0.09618109, -0.04002844, 0.10706359, 0.0021603133, -0.10353008, -0.051047757, 0.22455198, -0.034693047, 0.0572685, -0.055035133, 0.004646706, 0.097952425, 0.14423034, 0.03551641, 0.17294352, 0.18931827)); + target3 += mul(nd2, float4x4(-0.103451714, -0.14722984, 0.075681895, 0.05237415, -0.11553789, -0.04747042, 0.06682777, -0.094138026, 0.17443697, 0.022166768, -0.033095736, -0.060237505, 0.12380581, -0.0075241313, -0.07084953, -0.036764625)); + target3 += mul(ne2, float4x4(-0.035672948, -0.06520371, -0.09139108, 0.04217135, -0.117305085, 0.07602235, -0.15833357, 0.056191333, -0.11441557, 0.037268326, -0.028076539, -0.12540102, 0.016748995, 0.034004167, -0.1824477, -0.16126373)); + target3 += mul(nf2, float4x4(0.06134336, -0.11747715, -0.08211664, 0.08370146, -0.12180083, 0.19250062, -0.054975577, 0.020182844, 0.08444608, -0.06466239, 0.015815528, -0.031805765, -0.0028007699, 0.08060802, 0.15744543, -0.12746236)); + target3 += mul(ng2, float4x4(0.043641105, -0.07119625, -0.042450625, 0.05739444, 0.018069813, 0.029118251, 0.0061236136, 0.07221804, -0.011486244, -0.041661404, 0.2197789, -0.020237818, 0.15324089, -0.02419463, 0.095150515, -0.048418492)); + target3 += mul(nh2, float4x4(0.010760071, 0.079417765, -0.038494457, 0.0804348, -0.03777174, -0.2785645, -0.0018691403, -0.009908184, -0.2519993, -0.021114716, -0.075966366, -0.11307284, 0.042725798, 0.02793535, 0.08475073, 0.00719373)); + target3 += mul(ni2, float4x4(-0.06026802, -0.1285141, -0.015326734, -0.092160225, -0.03740965, -0.10725952, 0.11102985, -0.05550745, 0.07659162, -0.115331456, 0.003444734, -0.054064468, -0.08475641, -0.08501742, -0.24890389, 0.07931074)); + target3 += mul(na3, float4x4(0.16370693, 0.14513049, -0.13996753, -0.061734002, -0.030769601, 0.057222515, 0.050910987, -0.04650852, -0.054636024, -0.021683916, -0.17012738, 0.020975761, -0.1575395, 0.23097757, -0.20053351, -0.03677814)); + target3 += mul(nb3, float4x4(0.08665788, 0.11735751, -0.017768439, 0.0068110893, 0.2169534, 0.04611748, -0.05265798, -0.14298616, 0.030219741, 0.0361948, -0.17905854, -0.072263926, -0.12066245, -0.043840945, -0.075282134, 0.062113304)); + target3 += mul(nc3, float4x4(-0.07236986, 0.12181904, -0.010601836, 0.14551845, -0.073809735, 0.15977979, -0.018897848, 0.036385477, 0.0025911513, 0.026647402, 0.07882444, 0.028249063, 0.009689747, -0.03413688, -0.032440297, 0.060033906)); + target3 += mul(nd3, float4x4(0.0063548526, -0.05827531, -0.0863922, 0.09530562, -0.007424638, 0.2742968, -0.44429728, 0.1693316, 0.00851462, 0.018132828, -0.014929005, -0.08181229, -0.12771043, -0.15851092, -0.08833768, -0.05561009)); + target3 += mul(ne3, float4x4(-0.17187662, -0.020507278, -0.00087365095, -0.17611316, -0.13882494, 0.07799683, 0.06299509, -0.33718416, -0.19870155, 0.055342596, 0.1495889, 0.13743624, -0.16251567, -0.02317984, -0.3027063, 0.07310683)); + target3 += mul(nf3, float4x4(0.07612367, -0.06315094, 0.086967595, -0.17633231, 0.010166444, 0.109485, -0.06876594, 0.19186738, 0.12188993, -0.010759893, 0.0059104343, 0.21518311, -0.14552301, -0.04969499, 0.013590615, -0.0688024)); + target3 += mul(ng3, float4x4(-0.0505275, 0.05859463, -0.08945146, -0.0057924157, 0.058152966, -0.024229135, 0.031221801, -0.15067945, -0.018535225, 0.13843696, 0.041234065, 0.051733483, -0.10050763, 0.10705917, -0.022969715, 0.03912073)); + target3 += mul(nh3, float4x4(-0.009000505, -0.11456071, 0.0340094, 0.12444861, 0.07345543, -0.1419509, 0.092182405, 0.056249533, 0.063071616, -0.010534381, 0.056680985, 0.025993576, -0.13020347, 0.066157125, 0.0073951716, -0.027919816)); + target3 += mul(ni3, float4x4(0.11827389, 0.111768976, 0.024734994, -0.008209825, -0.11939657, 0.049890216, -0.14757815, -0.0018939807, -0.108214505, -0.13791578, 0.06980697, -0.035102874, 0.0068360427, 0.15766092, -0.0094464505, 0.02528075)); + target3 += float4(0.15827118, -0.013269078, -0.026832024, -0.007341773); + + tex4[gxy] = target1; + tex5[gxy] = target2; + tex6[gxy] = target3; +} + +//!PASS 5 +//!DESC Conv-4x3x3x24 +//!IN tex4, tex5, tex6 +//!OUT tex1, tex2, tex3, tex7 +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 + +void Pass5(uint2 blockStart, uint3 threadId) { + uint2 gxy = Rmp8x8(threadId.x) + blockStart; + uint2 inputSize = GetInputSize(); + if (gxy.x >= inputSize.x || gxy.y >= inputSize.y) { + return; + } + + float2 inputPt = GetInputPt(); + float2 pos = (gxy + 0.5f) * inputPt; + + // [ a, d, g ] + // [ b, e, h ] + // [ c, f, i ] + float4 a1 = tex4.SampleLevel(sam, pos + float2(-inputPt.x, -inputPt.y), 0); + float4 b1 = tex4.SampleLevel(sam, pos + float2(-inputPt.x, 0), 0); + float4 c1 = tex4.SampleLevel(sam, pos + float2(-inputPt.x, inputPt.y), 0); + float4 d1 = tex4.SampleLevel(sam, pos + float2(0, -inputPt.y), 0); + float4 e1 = tex4.SampleLevel(sam, pos, 0); + float4 f1 = tex4.SampleLevel(sam, pos + float2(0, inputPt.y), 0); + float4 g1 = tex4.SampleLevel(sam, pos + float2(inputPt.x, -inputPt.y), 0); + float4 h1 = tex4.SampleLevel(sam, pos + float2(inputPt.x, 0), 0); + float4 i1 = tex4.SampleLevel(sam, pos + float2(inputPt.x, inputPt.y), 0); + + float4 na1 = max(-a1, 0); + float4 nb1 = max(-b1, 0); + float4 nc1 = max(-c1, 0); + float4 nd1 = max(-d1, 0); + float4 ne1 = max(-e1, 0); + float4 nf1 = max(-f1, 0); + float4 ng1 = max(-g1, 0); + float4 nh1 = max(-h1, 0); + float4 ni1 = max(-i1, 0); + + a1 = max(a1, 0); + b1 = max(b1, 0); + c1 = max(c1, 0); + d1 = max(d1, 0); + e1 = max(e1, 0); + f1 = max(f1, 0); + g1 = max(g1, 0); + h1 = max(h1, 0); + i1 = max(i1, 0); + + float4 a2 = tex5.SampleLevel(sam, pos + float2(-inputPt.x, -inputPt.y), 0); + float4 b2 = tex5.SampleLevel(sam, pos + float2(-inputPt.x, 0), 0); + float4 c2 = tex5.SampleLevel(sam, pos + float2(-inputPt.x, inputPt.y), 0); + float4 d2 = tex5.SampleLevel(sam, pos + float2(0, -inputPt.y), 0); + float4 e2 = tex5.SampleLevel(sam, pos, 0); + float4 f2 = tex5.SampleLevel(sam, pos + float2(0, inputPt.y), 0); + float4 g2 = tex5.SampleLevel(sam, pos + float2(inputPt.x, -inputPt.y), 0); + float4 h2 = tex5.SampleLevel(sam, pos + float2(inputPt.x, 0), 0); + float4 i2 = tex5.SampleLevel(sam, pos + float2(inputPt.x, inputPt.y), 0); + + float4 na2 = max(-a2, 0); + float4 nb2 = max(-b2, 0); + float4 nc2 = max(-c2, 0); + float4 nd2 = max(-d2, 0); + float4 ne2 = max(-e2, 0); + float4 nf2 = max(-f2, 0); + float4 ng2 = max(-g2, 0); + float4 nh2 = max(-h2, 0); + float4 ni2 = max(-i2, 0); + + a2 = max(a2, 0); + b2 = max(b2, 0); + c2 = max(c2, 0); + d2 = max(d2, 0); + e2 = max(e2, 0); + f2 = max(f2, 0); + g2 = max(g2, 0); + h2 = max(h2, 0); + i2 = max(i2, 0); + + float4 a3 = tex6.SampleLevel(sam, pos + float2(-inputPt.x, -inputPt.y), 0); + float4 b3 = tex6.SampleLevel(sam, pos + float2(-inputPt.x, 0), 0); + float4 c3 = tex6.SampleLevel(sam, pos + float2(-inputPt.x, inputPt.y), 0); + float4 d3 = tex6.SampleLevel(sam, pos + float2(0, -inputPt.y), 0); + float4 e3 = tex6.SampleLevel(sam, pos, 0); + float4 f3 = tex6.SampleLevel(sam, pos + float2(0, inputPt.y), 0); + float4 g3 = tex6.SampleLevel(sam, pos + float2(inputPt.x, -inputPt.y), 0); + float4 h3 = tex6.SampleLevel(sam, pos + float2(inputPt.x, 0), 0); + float4 i3 = tex6.SampleLevel(sam, pos + float2(inputPt.x, inputPt.y), 0); + + float4 na3 = max(-a3, 0); + float4 nb3 = max(-b3, 0); + float4 nc3 = max(-c3, 0); + float4 nd3 = max(-d3, 0); + float4 ne3 = max(-e3, 0); + float4 nf3 = max(-f3, 0); + float4 ng3 = max(-g3, 0); + float4 nh3 = max(-h3, 0); + float4 ni3 = max(-i3, 0); + + a3 = max(a3, 0); + b3 = max(b3, 0); + c3 = max(c3, 0); + d3 = max(d3, 0); + e3 = max(e3, 0); + f3 = max(f3, 0); + g3 = max(g3, 0); + h3 = max(h3, 0); + i3 = max(i3, 0); + + float4 target1 = mul(a1, float4x4(0.0858087, -0.091669075, -0.029618878, -0.006829049, 0.05929614, -0.10007056, -0.07165286, -0.044839766, 0.16440393, -0.013165904, 0.16345644, -0.040166497, -0.08533438, 0.033274904, 0.023744298, 0.00462745)); + target1 += mul(b1, float4x4(-0.012197617, -0.17494427, -0.044840526, 0.09467358, 0.0670941, 0.04051791, 0.031950857, 0.043632418, 0.27560753, -0.18038619, -0.016762096, -0.18554263, -0.07514284, -0.12060545, -0.06567658, 0.095817134)); + target1 += mul(c1, float4x4(-0.006523474, -0.08222627, -0.0071327686, 0.0019292525, 0.10427757, -0.12537085, 0.04317682, -0.073334634, 0.05154215, -0.091944076, -0.02118822, -0.056948982, -0.13910337, 0.03694039, -0.057772428, -0.04310826)); + target1 += mul(d1, float4x4(0.06324951, -0.22648853, -0.07255338, 0.017026639, -0.008103722, -0.21415114, -0.26123968, -0.12380067, -0.122654535, -0.072686374, -0.106361255, -0.085339114, -0.032308288, 0.15890516, 0.06622756, -0.023396786)); + target1 += mul(e1, float4x4(-0.042859413, -0.09672486, -0.25197455, -0.09964709, 0.29545367, -0.031414457, -0.09069656, -0.1515025, 0.0027435324, -0.22244011, -0.028398262, 0.023005402, -0.01898338, -0.044614386, -0.15384883, -0.03421089)); + target1 += mul(f1, float4x4(0.02024483, -0.0024315433, -0.12607838, -0.06749624, 0.18010019, -0.13433436, 0.023777714, -0.000595795, 0.06660958, 0.0047493204, -0.008804406, -0.01993787, -0.16380167, -0.116379924, -0.08225346, -0.15736714)); + target1 += mul(g1, float4x4(0.04966525, -0.18904372, 0.0033124757, 0.007581284, 0.04973636, -0.054151382, -0.0019207672, 0.035237975, 0.050646428, 0.054101888, 0.10167154, -0.06646531, -0.1516706, 0.0069144946, -0.11062034, 0.00401821)); + target1 += mul(h1, float4x4(0.042625226, -0.13069777, -0.16180645, -0.023299642, 0.026805617, -0.05658099, 0.043719705, 0.1092636, 0.12341378, 0.042006955, 0.18066125, 0.16378504, 0.11135436, 0.08376113, 0.031495962, 0.04347884)); + target1 += mul(i1, float4x4(-0.006632579, -0.08609527, -0.08015572, -0.09973007, 0.0060661826, -0.0025273473, 0.027333017, 0.19469416, 0.115743786, -0.048629977, 0.07540239, 0.060625635, 0.11000561, -0.0005722454, -0.17226484, -0.27114823)); + target1 += mul(a2, float4x4(0.055690464, -0.02658434, 0.07268652, 0.10016923, -0.16319957, 0.17007092, 0.21633402, 0.20618637, -0.113490485, 0.02199878, 0.0055137747, 0.12215447, -0.09464513, 0.045647286, -0.14000376, 0.073245205)); + target1 += mul(b2, float4x4(-0.23644187, 0.079229124, 0.15172592, -0.13774072, -0.026849356, -0.021209542, -0.027881218, 0.033904858, 0.038394954, -0.087262556, -0.08262074, -0.031362742, -0.043572012, -0.025324758, -0.030077327, 0.005004752)); + target1 += mul(c2, float4x4(0.09973582, 0.010234953, -0.08513304, -0.03290182, -0.06016728, -0.14835075, -0.049367975, -0.109585285, 0.1308012, -0.05561698, 0.02354898, 0.013404379, 0.076999895, 0.0965049, -0.08259544, 0.08366891)); + target1 += mul(d2, float4x4(-0.14182909, 0.08395952, 0.08790292, -0.10685734, 0.20583102, 0.04878629, 0.33621716, 0.022593737, 0.11261521, 0.21141209, 0.20565145, 0.10242992, 0.024171967, 0.0531655, 0.049630456, -0.026225826)); + target1 += mul(e2, float4x4(-0.24823152, -0.004493935, -0.098093554, 0.024939846, -0.18618853, 0.23347515, 0.3677701, 0.116543904, -0.0123427, -0.023756992, -0.047602683, -0.17058952, 0.255566, -0.16041403, -0.12076221, -0.16564348)); + target1 += mul(f2, float4x4(-0.013236432, 0.076761626, 0.081881255, 0.09071596, -0.11729648, -0.006444196, 0.10923074, 0.13791469, -0.043651752, -0.18294181, -0.040916644, 0.097785, -0.0028955766, -0.0027983326, 0.073921174, -0.10227346)); + target1 += mul(g2, float4x4(-0.0018851581, 0.042510424, -0.049210977, 0.054145046, 0.062141296, -0.05751743, -0.1545065, -0.07428518, -0.0012378759, 0.0046147997, 0.05624973, 0.026027879, 0.014657843, 0.03690237, 0.020237725, 0.028257368)); + target1 += mul(h2, float4x4(-0.0131197, -0.068101935, 0.1810527, -0.16003254, -0.031678617, 0.02635583, 0.028078066, -0.061150465, 0.011311746, -0.13556738, 0.010152058, -0.27962342, -0.06454591, 0.20071113, -0.17109145, 0.24637976)); + target1 += mul(i2, float4x4(0.010061335, -0.001633492, 0.015894579, 0.11813777, -0.0085875485, 0.16046463, -0.03870673, -0.1573532, 0.007046577, -0.032677893, -0.03055368, 0.16372435, -0.023844553, 0.0414419, 0.024983952, 0.118105516)); + target1 += mul(a3, float4x4(-0.015675241, 0.06467378, -0.06385561, 0.23119383, 0.19577031, 0.09614502, 0.14291568, 0.0646477, -0.04580087, -0.054257967, 0.0035926772, -0.084984995, 0.07729014, -0.12703735, 0.021946913, -0.13898507)); + target1 += mul(b3, float4x4(0.04983293, -0.1356944, 0.06499119, 0.18219076, -0.0012843006, 0.017029244, 0.072508365, -0.109151654, 0.20020588, -0.01626167, -0.046852995, -0.15508164, 0.17773204, -0.11779289, 0.16009867, -0.20663622)); + target1 += mul(c3, float4x4(-0.29108632, -0.10655777, 0.044198614, -0.04410373, -0.04469665, 0.026585957, -0.03942788, 0.15000731, 0.09523267, -0.13746834, 0.04037757, -0.100768045, 0.13844049, 0.015964666, 0.050790466, -0.0692099)); + target1 += mul(d3, float4x4(-0.06911559, 0.012809353, -0.12680744, -0.048866037, 0.055650793, -0.0578033, -0.098106444, 0.074231684, 0.07584408, -0.06642763, -0.12730137, -0.19599968, 0.057783168, 0.2112361, 0.2594585, -0.005996189)); + target1 += mul(e3, float4x4(0.14301813, -0.11221026, -0.13259968, -0.094263665, -0.07149053, -0.048891526, -0.032946635, -0.10009779, 0.27363536, 0.14980642, -0.02908678, 0.0047521754, 0.3446896, 0.026126916, 0.36222085, 0.18175994)); + target1 += mul(f3, float4x4(-0.14227025, -0.043589745, -0.026543979, -0.16103217, -0.15027285, 0.039540008, -0.006172108, -0.10215637, 0.054667268, -0.056833044, -0.041795656, -0.13694401, 0.10057133, -0.13033262, 0.10366755, -0.022725947)); + target1 += mul(g3, float4x4(0.09428237, 0.013572218, 0.08937405, 0.07288141, -0.11737223, 0.23257263, -0.04531822, 0.13323838, -0.06946843, 0.09392816, 1.8482398e-05, 0.099077396, 0.035169534, -0.25623903, -0.018828487, -0.06300839)); + target1 += mul(h3, float4x4(-0.026215252, 0.03051006, 0.07113607, -0.12154545, 0.0040449486, -0.123852775, -0.08525913, -0.1901913, 0.017407645, 0.16107552, -0.12645124, -0.017211819, -0.07441704, -0.005858128, -0.011531684, 0.32415336)); + target1 += mul(i3, float4x4(-0.17503254, -0.0015612559, 0.10169017, 0.019084195, 0.021156454, -0.310495, 0.04608056, 0.10953508, 0.19300106, -0.22451214, 0.03351187, -0.23461004, 0.072505705, 0.015136174, 0.04027726, 0.07976788)); + target1 += mul(na1, float4x4(0.063301176, -0.05981131, 0.08261184, -0.14083353, -0.03195537, 0.0578306, 0.02804363, -0.20032683, -0.05517824, -0.0757025, -0.19914438, -0.042908937, -0.030248174, -0.25152618, 0.050725143, 0.1369699)); + target1 += mul(nb1, float4x4(0.1003561, 0.0734191, 0.09896423, -0.08019514, 0.014390224, -0.00054874463, 0.037597846, -0.09500772, 0.09027194, 0.101415, -0.040434603, 0.08962564, -0.22431703, -0.07749036, 0.0013332567, 0.09083244)); + target1 += mul(nc1, float4x4(0.008767293, 0.024598125, -0.006601763, -0.028163001, -0.024962863, 0.08638626, -0.028004736, -0.08388235, 0.022387464, 0.070822835, -0.022390194, 0.035980027, -0.043976255, -0.13879846, -0.0064368118, 0.1285523)); + target1 += mul(nd1, float4x4(0.04603082, 0.12994967, 0.081791796, 0.09700837, 0.023545785, 0.06551075, 0.12287056, -0.22250815, -0.16573791, -0.047054622, 0.07907622, -0.031006414, 0.06890266, -0.123863325, 0.067868486, 0.18396272)); + target1 += mul(ne1, float4x4(0.16980824, 0.015854107, 0.17575637, 0.15477645, -0.09076066, -0.05120928, 0.03070066, 0.11817304, 0.06490148, -0.012875289, 0.006170174, 0.11815885, 0.041828565, -0.17298366, -0.014634091, 0.124265894)); + target1 += mul(nf1, float4x4(0.06368938, -0.032462507, 0.04960355, -0.04891845, -0.0038693375, 0.12922424, 0.0047689294, -0.08684701, 0.095224895, 0.0020490738, -0.046015333, -0.18551406, 0.07551751, -0.071112834, 0.02344341, 0.04623062)); + target1 += mul(ng1, float4x4(-0.056204665, 0.013370383, -0.0043938635, 0.000645203, -0.09654348, 0.02182419, 0.0059207743, -0.13559262, 0.06930852, 0.008161434, 0.016864823, 0.11678153, 0.0039051562, -0.07914336, 0.04955842, 0.017483408)); + target1 += mul(nh1, float4x4(0.010976288, 0.10715081, 0.09279967, 0.020898886, 0.013906253, 0.09519829, -0.033453338, 0.06451083, -0.043613426, -0.009969589, -0.11931571, 0.03572111, 0.0051956573, -0.0647261, -0.07640106, 0.14315592)); + target1 += mul(ni1, float4x4(0.051365715, 0.05332849, 0.028890109, -0.047791258, 0.088561386, 0.07254739, -0.04627223, -0.066297226, -0.106467545, -0.061747868, -0.04026904, 0.0245163, 0.104035124, 0.0005123147, 0.09572231, 0.23461665)); + target1 += mul(na2, float4x4(0.025117617, -0.051919132, -0.10238518, 0.0087314, -0.24502674, 0.24725929, 0.020510906, -0.11374982, -0.08788345, 0.14287415, -0.05371828, -0.09765232, 0.089326784, 0.059355933, 0.003667818, 0.007658546)); + target1 += mul(nb2, float4x4(0.032038644, -0.064910114, -0.041199267, 0.07735183, 0.023076316, 0.2316058, 0.14530785, 0.01587883, 0.17309736, 0.034302652, 0.078950614, 0.07103009, 0.042525988, 0.026763001, 0.03962311, -0.017174084)); + target1 += mul(nc2, float4x4(-0.045589544, 0.09957158, 0.0106373755, 0.029290373, -0.012066452, 0.107432865, -0.10563313, 0.08964528, 0.081136055, 0.16387793, -0.037766423, 0.083933, -0.0100631835, -0.041015863, 0.020540452, -0.01647507)); + target1 += mul(nd2, float4x4(0.12199317, -0.078802, -0.14587677, -0.05266387, -0.2739342, -0.07832318, -0.02654562, -0.19134043, -0.15542388, 0.10411293, 0.0068920734, -0.18878813, 0.032211393, 0.15987061, -0.12149388, 0.09021272)); + target1 += mul(ne2, float4x4(0.017827235, 0.021951806, 0.12995765, -0.1925006, 0.43689772, -0.067384414, -0.12091599, -0.20399235, 0.11603679, 0.26069987, 0.09115632, 0.29782417, -0.37195182, 0.17822702, 0.26410392, 0.22584216)); + target1 += mul(nf2, float4x4(0.00091769337, -0.10308406, -0.03788696, 0.009829855, 0.07434385, -0.055530947, -0.119265415, 0.03404489, 0.13412294, 0.19281757, -0.21274371, -0.09527358, 0.022169666, 0.06841656, -0.09664997, 0.02376001)); + target1 += mul(ng2, float4x4(0.02522274, -0.07492225, 0.108729765, -0.084396936, -0.1450858, -0.084260635, 0.036976974, 0.07395254, 0.102778085, -0.04220765, 0.015409228, -0.09215815, 0.033722915, 0.09410213, 0.06759963, 0.03639959)); + target1 += mul(nh2, float4x4(-0.0129304, 0.049982294, -0.16211604, 0.15442637, -0.00069647084, 0.026756465, -0.06687889, 0.25361556, -0.022950223, 0.123774804, -0.018783078, 0.23009071, 0.08808236, -0.02500049, 0.058831975, -0.059854023)); + target1 += mul(ni2, float4x4(0.0282501, -0.06622134, -0.03940754, -0.057764836, 0.023595478, 0.12694593, -0.0038103263, 0.1507626, 0.03539641, 0.04670363, -0.07688535, -0.04822336, 0.054404292, -0.07222161, -0.09880846, -0.09190709)); + target1 += mul(na3, float4x4(0.028638069, 0.032332644, 0.10077746, -0.13334957, -0.068841085, -0.11133997, 0.03466411, -0.10885937, 0.050528128, -0.09258964, -0.07510585, -0.031298082, 0.074979246, 0.02487803, 0.05295848, 0.032633457)); + target1 += mul(nb3, float4x4(-0.07998351, 0.05691501, 0.036540154, 0.00094257767, -0.15473618, -0.06821505, -0.021972192, 0.22512725, -0.028976573, -0.016970608, -0.07117851, 0.0005293328, 0.18183869, 0.06063047, 0.016248764, 0.10007656)); + target1 += mul(nc3, float4x4(0.07268518, -0.09155798, -0.002566672, -0.018126441, -0.16046503, 0.048856504, -0.011080104, 0.036383335, 0.08598702, 0.19642924, -0.049759824, 0.07246208, 0.13074261, -0.058662284, 0.03459059, -0.04198155)); + target1 += mul(nd3, float4x4(0.12882999, -0.021765677, 0.21212584, -0.099195495, -0.105583504, 0.17039305, 0.06333295, 0.090153314, -0.038552936, 0.033885363, 0.04958857, -0.024823477, -0.06625663, -0.013032451, -0.22981462, 0.12724645)); + target1 += mul(ne3, float4x4(-0.2467723, 0.09133717, 0.08810061, 0.07691946, -0.20903678, 0.08403025, 0.010679062, -0.03956549, -0.011411588, -0.022170769, -0.06298385, -0.10067764, 0.03551502, -0.0736268, -0.109931275, -0.15795651)); + target1 += mul(nf3, float4x4(0.050697595, -0.012465872, -0.068512246, 0.1556296, -0.17526908, -0.06572342, -0.018147333, 0.026501164, -0.049490303, 0.061885685, 0.02423367, 0.027439872, 0.37354204, -0.09908201, -0.10852779, -0.047961403)); + target1 += mul(ng3, float4x4(-0.1904858, -0.08803705, -0.15687172, -0.14420407, 0.058089714, 0.103457965, 0.048995994, 0.057829436, -0.05192929, 0.019802462, -0.04511639, -0.0038435445, 0.2194401, -0.043761376, 0.1312272, 0.054326482)); + target1 += mul(nh3, float4x4(-0.07349653, -0.121248744, 0.057792176, 0.099129215, -0.20087934, -0.031952746, -0.09673813, -0.031076657, -0.03237994, -0.020143004, -0.12516364, 0.007846103, 0.17188387, -0.2825958, 0.07352815, -0.021273587)); + target1 += mul(ni3, float4x4(0.14606737, 0.04232884, -0.04508154, 0.11619671, -0.14093883, 0.022675866, 0.004869404, -0.1476083, -0.15496063, -0.11994992, -0.07718659, 0.0023026431, 0.012474549, 0.107636444, -0.08887454, 0.23727296)); + target1 += float4(-0.112874866, 0.058437236, -0.011864247, -0.050339766); + + float4 target2 = mul(a1, float4x4(0.016881438, -0.044336118, 0.11502204, -0.10677853, 0.08977789, -0.059579305, 0.109261245, 0.10357805, 0.3364402, 0.14823961, 0.06096494, 0.15078168, -0.11029799, 0.074738294, 0.012435908, -0.0106727)); + target2 += mul(b1, float4x4(-0.109714404, 0.008750308, 0.1948044, -0.1396421, 0.04144051, -0.12435535, 0.07815825, 0.019051697, -0.07954506, -0.0965191, -0.2027906, 0.17172056, -0.26384082, 0.13519175, 0.04667002, 0.021707565)); + target2 += mul(c1, float4x4(0.07089612, 0.007484666, 0.104900375, 0.04954983, -0.06030455, -0.12300262, 0.05197505, -0.041572303, 0.009151977, 0.0799586, -0.04780254, -0.13600186, 0.18708369, 0.047879692, 0.040363688, 0.042251408)); + target2 += mul(d1, float4x4(0.063153245, 0.0050982186, 0.17556845, -0.06571311, -0.03749878, 0.014326615, -0.11032866, -0.27065897, -0.10525074, 0.13344456, -0.16412602, 0.049715474, 0.061313376, -0.24646369, 0.14604661, -0.09024816)); + target2 += mul(e1, float4x4(-0.026787708, 0.09889526, -0.050714437, -0.18990894, -0.087063104, -0.12555519, -0.0064119035, -0.0066881417, 0.09163732, -0.13423459, -0.05758272, -0.11314744, -0.025565878, 0.078317784, -0.14494383, -0.14658383)); + target2 += mul(f1, float4x4(0.0110544115, 0.06727031, -0.099965416, 0.059974622, -0.15135513, -0.038787033, 0.07673858, 0.017850239, 0.02383772, -0.1985737, 0.094468035, -0.097836666, 0.19387084, 0.06476367, -0.15316796, 0.023932178)); + target2 += mul(g1, float4x4(0.009757702, 0.14015986, -0.10496543, -0.044595003, -0.024084711, 0.012144018, 0.0981338, -0.012515983, 0.0060429554, -0.07067267, 0.12682167, -0.11384025, -0.045077007, 0.035177663, -0.06615891, 0.045473646)); + target2 += mul(h1, float4x4(-0.111679845, 0.026028167, 0.04201027, -0.16000839, 0.037475422, 0.038049795, -0.16249935, 0.02239824, -0.018402468, 0.012291931, -0.07117686, -0.09280776, 0.20392933, -0.02732328, 0.045456327, -0.078981794)); + target2 += mul(i1, float4x4(0.03066143, -0.0739708, -0.13922189, -0.12682499, -0.1299339, -0.09163088, -0.07340559, -0.050614927, -0.03902327, 0.09319906, 0.23746358, -0.12093768, -0.0071099317, -0.060775675, 0.15018946, -0.013342442)); + target2 += mul(a2, float4x4(0.07596372, 0.20584284, -0.02217273, 0.2564209, 0.11423146, 0.0098925475, -0.19662452, -0.1313871, 0.08574315, 0.0067099673, -0.08959511, -0.13870555, -0.08032657, 0.013044774, 0.059738085, -0.06106972)); + target2 += mul(b2, float4x4(-0.017293025, 0.15636088, -0.10619794, 0.108079255, -0.1168585, 0.09589374, -0.08682874, -0.08116015, 0.005083832, -0.15325624, -0.022297598, 0.23929761, -0.024037527, -0.006906145, -0.17204066, 0.003067951)); + target2 += mul(c2, float4x4(0.036887415, 0.0059498777, -0.04959398, -0.12104261, 0.018237587, 0.07747393, -0.038457148, 0.01900929, 0.06142847, -0.03194083, -0.008666936, -0.13599987, 0.031248135, -0.04247948, 0.10167268, 0.13608082)); + target2 += mul(d2, float4x4(-0.13198346, 0.1674333, -0.06218637, -0.06692618, 0.12682024, -0.12734371, -0.08660555, 0.22860871, 0.32062, -0.001097262, 0.13959797, -0.0044754874, 0.081532046, -0.026494987, -0.0123374695, 0.026149286)); + target2 += mul(e2, float4x4(-0.104755685, -0.010431212, -0.031997994, 0.027816107, 0.028212348, 0.06372256, -0.14536087, 0.09018213, 0.01694147, -0.046755623, 0.04764776, 0.057815794, 0.062553786, -0.09214233, 0.040526126, 0.007556779)); + target2 += mul(f2, float4x4(-0.08911158, 0.07110132, 0.056163214, 0.038952623, 0.10865385, 0.006793654, -0.06653261, 0.18222512, 0.029801054, -0.0811567, 0.12070828, 0.054755576, 0.028518738, -0.016029958, 0.03218924, -0.009337529)); + target2 += mul(g2, float4x4(0.086197704, -0.015495907, 0.048072338, -0.07199852, 0.082511336, 0.20892009, 0.059752844, 0.105095305, 0.09896605, 0.11799976, 0.061295632, 0.06601205, 0.013666139, -0.015312437, -0.18342482, 0.023882518)); + target2 += mul(h2, float4x4(-0.013824049, 0.05503347, -0.16911599, 0.008002769, -0.13950597, 0.027708618, 0.25653768, -0.08508614, 0.18204638, -0.06386117, -0.20115016, -0.14755486, 0.05260717, 0.15443258, 0.25847095, -0.10009257)); + target2 += mul(i2, float4x4(-0.059717122, -0.03435186, -0.10407675, -0.064839795, -0.044192888, -0.036913253, 0.03681877, 0.03697244, -0.09967689, 0.09231582, -0.33624214, -0.023151914, -0.1287868, 0.025817866, 0.053143233, -0.05608657)); + target2 += mul(a3, float4x4(-0.014672332, 0.06483223, 0.04254691, -0.112299606, 0.23128588, -0.1651168, 0.050003413, 0.04894729, 0.2544582, -0.13577309, -0.0006000951, 0.06801677, 0.09296969, 0.061753552, 0.20265704, 0.257177)); + target2 += mul(b3, float4x4(-0.14275837, 0.17531338, 0.04749905, -0.0758535, -0.015062751, 0.046983913, -0.1333634, 0.068564706, -0.09043316, -0.31197232, 0.025262894, 0.042436298, -0.0040407367, -0.22480483, 0.041938446, 0.024641208)); + target2 += mul(c3, float4x4(0.14420901, 0.114887774, -0.11488812, -0.0597554, -0.054847293, 0.05547183, 0.03265681, -0.26890585, 0.03439455, -0.255012, 0.17280143, -0.15793064, -0.078898564, -0.12406215, -0.062780574, 0.10172549)); + target2 += mul(d3, float4x4(-0.094646096, -0.17374977, -0.0074399756, -0.34635502, 0.07774559, 0.15205993, -0.03449227, -0.06186042, 0.22554572, 0.05784444, 0.12953205, 0.30743182, 0.0064037675, -0.04768125, -0.024676334, -0.038768534)); + target2 += mul(e3, float4x4(-0.016888129, -0.01739885, -0.100029364, -0.18185234, -0.25301012, -0.057780884, 0.1565648, 0.03068169, -0.11813698, -0.03959661, 0.0833061, 0.15282218, 0.0071316576, -0.07718743, -0.10895751, -0.06739941)); + target2 += mul(f3, float4x4(0.06317689, 0.18067697, 0.15456977, 0.15414707, -0.17297679, 0.024572203, 0.1171789, 0.07393219, -0.18743253, -0.08222157, 0.11114712, -0.17883265, -0.04287185, -0.09453442, 0.12694365, -0.027343085)); + target2 += mul(g3, float4x4(-0.06716255, -0.21199013, -0.12669027, 0.21503277, 0.0019133248, 0.0634854, 0.1330296, -0.061205685, 0.29359, 0.17710103, -0.024374757, -0.09792164, -0.18657605, 0.081899285, 0.12585995, 0.025425494)); + target2 += mul(h3, float4x4(-0.05250748, 0.030909589, 0.1902117, 0.25473362, -0.09742609, 0.053520087, -0.03211081, -0.057595085, 0.12961125, 0.0023426781, -0.013836333, -0.09925751, -0.06785708, 0.08647871, -0.27276617, -0.10692432)); + target2 += mul(i3, float4x4(0.10256342, -0.0012406971, -0.043700144, 0.18606174, 0.019327404, 0.11292357, -0.15260164, 0.17126434, -0.18879685, -0.16486076, -0.011073295, -0.12524827, -0.008579242, -0.059638925, 0.084539056, -0.117018394)); + target2 += mul(na1, float4x4(-0.005224277, -0.004213279, -0.0032532548, 0.07730248, 0.09291174, -0.073571384, -0.026436254, 0.03482603, -0.19962324, -0.2610976, 0.020305693, -0.10154568, -0.36644712, 0.051274676, 0.014252039, 0.13887805)); + target2 += mul(nb1, float4x4(-0.027320823, -0.046729945, -0.026784837, 0.16669074, -0.060943272, 0.033377934, -0.14505643, -0.020337462, 0.16624433, -0.04587067, 0.097641915, 0.081230365, -0.007829853, 0.12665996, -0.14892018, -0.24456674)); + target2 += mul(nc1, float4x4(-0.08478914, -0.03012501, -0.10310597, -0.11331812, -0.0031645342, 0.14941333, 0.062353395, -0.024058735, -0.0594801, -0.23192395, -0.16746534, 0.06564879, -0.12253713, 0.17433378, -0.0781637, -0.02427467)); + target2 += mul(nd1, float4x4(-0.02302574, 0.04423824, -0.08400403, 0.09036313, -0.014492948, 0.11002858, 0.18625931, 0.32704633, 0.05124957, -0.068088494, 0.12289486, -0.014215405, -0.13288727, 0.040543802, -0.20918661, 0.073871054)); + target2 += mul(ne1, float4x4(0.017605199, 0.019553222, -0.08225654, 0.20643222, -0.00022603406, 0.08176717, 0.12868896, 0.124581024, -0.030760515, 0.095257014, -0.22808774, -0.034270126, 0.15100974, -0.3296213, 0.18732856, 0.1324247)); + target2 += mul(nf1, float4x4(-0.0529032, -0.026973793, -0.05097176, -0.11297454, 0.020022966, -0.018701904, -0.04847294, -0.15029453, -0.06363558, 0.09747056, 0.07460071, -0.03857069, -0.21553952, -0.11073493, -0.213246, 0.0711861)); + target2 += mul(ng1, float4x4(-0.03162221, 0.001236578, 0.123811916, 0.033390332, -0.037370905, 0.19355269, -0.17827089, 0.014296732, -0.16348897, -0.1319003, -0.16828157, 0.025803383, 0.059980027, 0.110682875, 0.0740905, 0.062085215)); + target2 += mul(nh1, float4x4(0.045581415, 0.045279585, 0.057199746, -0.02156781, 0.006849691, 0.088090494, 0.0050983853, -0.13634379, -0.027394824, -0.095449656, 0.24879529, -0.096120596, 0.1353526, 0.120924726, -0.18323645, -0.021366404)); + target2 += mul(ni1, float4x4(-0.09347594, -0.033151172, 0.08154851, -0.043524023, -0.11946553, 0.034201168, 0.29714793, -0.11766968, 0.07862629, -0.022385478, 0.007981411, 0.072274946, -0.07020759, -0.08967969, -0.01748178, -0.050568584)); + target2 += mul(na2, float4x4(-0.06815112, -0.08195707, -0.018364457, -0.1291466, -0.07578508, -0.026269661, -0.024130398, 0.345771, 0.061061617, 0.00024922745, 0.121253625, 0.1679367, -0.075497314, 0.018176561, -0.016344557, 0.0036648472)); + target2 += mul(nb2, float4x4(0.04763461, -0.0020346593, -0.02060361, 0.027356124, 0.12256149, -0.10517474, 0.05206596, 0.48938727, 0.24554996, 0.035649568, -0.0020840873, -0.13338771, 0.055847157, 0.08685442, -0.0049057365, 0.119682014)); + target2 += mul(nc2, float4x4(-0.0009415129, 0.023808502, 0.011839588, 0.1557796, -0.046566915, -0.16340078, 0.057526406, 0.051289674, 0.009036062, -0.033471756, 0.04899847, 0.08597252, 0.009778078, 0.15277503, -0.02124232, -0.13266967)); + target2 += mul(nd2, float4x4(0.044579845, -0.057439465, -0.0058995294, -0.016733315, -0.026765248, 0.22384825, 0.10160812, -0.24921204, 0.1505643, 0.10299508, -0.022949796, 0.023317415, -0.11803905, -0.010997904, -0.0165606, 0.10949375)); + target2 += mul(ne2, float4x4(-0.024293665, 0.080889955, 0.065993495, -0.08183072, 0.15339898, -0.10386374, -0.0075264974, -0.45434427, 0.16638929, 0.18546598, -0.15133487, -0.04405705, -0.11456945, 0.023287205, 0.01540089, 0.07659333)); + target2 += mul(nf2, float4x4(0.05337434, -0.07030085, -0.016662175, -0.025540289, -0.20320894, -0.22829315, 0.0023119978, -0.13647676, 0.14624248, 0.057653934, -0.23686588, -0.22728209, -0.04211661, -0.009623881, -0.013481165, -0.050968897)); + target2 += mul(ng2, float4x4(-0.10351371, -0.030414786, -0.021908734, 0.10631468, 0.1558724, -0.030323582, -0.12572181, 0.14325237, 0.16137493, 0.053329308, -0.07966373, -0.14599875, 0.06742195, -0.03502627, 0.093494855, -0.016484367)); + target2 += mul(nh2, float4x4(-0.010210213, -0.07126592, 0.08901449, -0.001978569, -0.0020932974, 0.05631864, -0.07674242, 0.07440119, -0.10262469, 0.06294099, 0.2640596, -0.068250656, 0.039236706, -0.05372906, -0.24834888, -0.027236471)); + target2 += mul(ni2, float4x4(0.053275097, 0.12541588, 0.057327554, 0.1483746, -0.019743394, -0.004537443, -0.002553759, 0.051015604, 0.15611948, 0.0062136482, 0.15442398, -0.17060183, 0.072183914, 0.030539684, 0.122384906, 0.13613632)); + target2 += mul(na3, float4x4(-0.081967466, -0.045659952, 0.053518385, 0.038269065, -0.20554744, 0.1022743, -0.16302924, 0.11628324, 0.08677866, -0.016249105, 0.0030073056, 0.029243115, 0.09487045, -0.08571386, -0.108643636, 0.023155121)); + target2 += mul(nb3, float4x4(0.13383357, -0.14805956, -0.0026678462, 0.096683614, -0.19977921, 0.06789931, 0.11313261, -0.08059509, 0.11312805, 0.02279778, -0.028791273, 0.00220455, 0.1280279, -0.0031435476, 0.027489156, 0.22506006)); + target2 += mul(nc3, float4x4(-0.17668596, 0.056276754, 0.06092557, 0.06512077, 0.28657347, -0.16558819, -0.032547206, 0.060506567, 0.042512514, 0.012298008, -0.0840555, -0.003036976, -0.065048106, 0.01438789, -0.022174913, -0.04558888)); + target2 += mul(nd3, float4x4(0.09863285, 0.20795937, 0.022519527, 0.18537116, 0.0392277, -0.0321246, -0.026941739, -0.113379315, -0.066700965, -0.03651247, 0.0571846, -0.030824896, 0.096933104, 0.15837808, -0.0047979183, 0.27915525)); + target2 += mul(ne3, float4x4(0.102253675, 0.009542945, -0.005872879, 0.16511136, 0.011185962, 0.06349425, 0.015944714, -0.070249364, 0.17597549, -0.0073095546, 0.06678522, -0.048394345, 0.07822778, 0.1582912, 0.029773576, 0.1454936)); + target2 += mul(nf3, float4x4(-0.064127095, -0.13314691, -0.15525493, -0.12851773, 0.32410213, 0.11451161, -0.16337484, 0.22651163, 0.0670393, -0.010159622, 0.061997004, -0.0028491814, -0.12702557, -0.02556835, -0.030351989, -0.101927444)); + target2 += mul(ng3, float4x4(-0.10687288, 0.013433122, 0.035762146, -0.07343635, -0.057016056, -0.041276235, -0.08300978, 0.0058231223, 0.014210706, 0.24323368, 0.010536771, 0.037272993, 0.14479576, 0.0013622575, 0.0004501183, 0.17661947)); + target2 += mul(nh3, float4x4(0.056699157, -0.009144585, -0.20287608, -0.17288777, -0.031525977, -0.014541391, 0.09615033, -0.020868845, -0.06501473, -0.015121819, 0.20430197, -0.04346306, -0.12766391, 0.093933746, -0.027732635, 0.11136926)); + target2 += mul(ni3, float4x4(-0.08438437, -0.1183074, -0.12171084, -0.016565872, 0.011952218, -0.058289453, 0.13479574, -0.0013566733, 0.20290127, 0.03338366, -0.1634658, 0.11389365, -0.060460836, 0.05049821, -0.14498705, 0.016767675)); + target2 += float4(-0.051753327, -0.07172183, 0.021211471, -0.050325148); + + float4 target3 = mul(a1, float4x4(-0.20980129, 0.023968311, 0.12840137, 0.10842146, 0.011306613, 0.05415782, 0.039082862, 0.16055544, -0.019953849, -0.038693313, 0.043451615, 0.29995796, -0.04229376, -0.052874412, -0.043818697, 0.12305407)); + target3 += mul(b1, float4x4(-0.05386288, 0.060217176, 0.115249164, -0.06499263, -0.24467815, -0.038295876, 0.1099765, 0.011418658, -0.037247434, 0.022481795, -0.022084411, 0.08719741, 0.112991996, 0.0038797192, 0.0007742727, -0.12125326)); + target3 += mul(c1, float4x4(0.14591883, 0.0059707207, 0.10084995, 0.11218308, -0.06853006, 0.056708243, 0.03836111, 0.097718365, -0.03493398, -0.025623012, -0.05587737, -0.08457079, 0.028527644, -0.12509371, 0.10159183, -0.1373413)); + target3 += mul(d1, float4x4(-0.070879295, 0.12866656, 0.0061003384, 0.2756249, -0.110929534, 0.08008204, 0.103428364, 0.0680596, 0.25671995, -0.20484821, 0.07494457, -0.30119568, -0.0037036634, 0.089072324, 0.08692298, 0.070883)); + target3 += mul(e1, float4x4(-0.010380239, -0.09510446, -0.082714744, -0.17768058, 0.07806542, 0.031258516, 0.22199117, -0.2503904, -0.35995534, 0.22030926, 0.31575105, 0.00959926, -0.01777953, 0.0140022775, 0.03752933, 0.09948206)); + target3 += mul(f1, float4x4(-0.041341938, -0.07633474, -0.0198307, -0.0010591226, -0.00981418, 0.054734066, 0.07465484, 0.18272892, -0.081466086, -0.016789936, 0.01381776, 0.19449398, -0.16821295, -0.17434129, -0.03367363, -0.29107878)); + target3 += mul(g1, float4x4(0.03371505, 0.0015008952, 0.038456205, 0.08681006, 0.06587572, 0.080483936, -0.046148404, 0.00034004613, -0.049788523, -0.014699005, 0.12648652, -0.08964094, -0.04100757, -0.0022513492, -0.045995962, -0.033430565)); + target3 += mul(h1, float4x4(-0.077466115, 0.055482663, 0.06427869, -0.014551742, -0.03726866, -0.050230574, 0.13511398, -0.14205348, -0.03461195, 0.031386618, 0.0590859, 0.032402404, -0.015786028, 0.086712435, 0.0989059, 0.031410974)); + target3 += mul(i1, float4x4(-0.034690183, -0.038256433, -0.011660002, 0.0063596303, 0.038892817, 0.11872877, 0.0003016667, 0.08520122, 0.0941757, 0.073596634, 0.08374554, 0.046010435, 0.0181265, -0.15729031, 0.11088375, -0.032952093)); + target3 += mul(a2, float4x4(-0.026296677, 0.15765159, -0.012793888, 0.0082718665, 0.12780726, -0.0118969055, -0.089828335, -0.24008913, 0.19047114, 0.03790669, 0.10990294, -0.14094876, 0.031807188, 0.044609103, -0.1013979, 0.008491038)); + target3 += mul(b2, float4x4(0.053833764, 0.26152018, -0.04398908, -0.060880598, -0.028556267, -0.04798034, 0.006057095, -0.19898368, -0.24473669, 0.0472649, 0.15300584, 0.028983278, 0.028763462, -0.017422339, 0.03820097, 0.083550654)); + target3 += mul(c2, float4x4(0.0082439445, -0.0012358675, -0.013711661, 0.07154783, 0.07983732, -0.015840268, 0.034440894, -0.04973906, -0.18109304, 0.05403726, -0.03891083, -0.016710335, 0.10012702, 0.02470262, 0.0085716015, -0.0851344)); + target3 += mul(d2, float4x4(0.04101796, 0.060623523, 0.011851002, -0.028376777, -0.025862841, -0.042955548, -0.1211269, -0.13360673, 0.0071736956, -0.18880656, 0.276794, -0.011204949, 0.020503378, 0.0110537205, 0.10886233, -0.003678301)); + target3 += mul(e2, float4x4(-0.14220405, -0.15436499, -0.13759659, -0.03390728, 0.21921699, -0.003154918, -0.101451315, 0.043704413, -0.12738566, 0.19500181, 0.45843616, 0.065685, -0.20168468, 0.12985173, -0.02569477, 0.17067434)); + target3 += mul(f2, float4x4(0.032667797, 0.16730979, 0.004677948, -9.4643896e-05, -0.014588183, -0.057854652, 0.013125396, 0.096397184, -0.054806076, 0.010901007, 0.0968573, -0.23783323, 0.08697233, 0.008680743, -0.035573848, -0.004963115)); + target3 += mul(g2, float4x4(-0.00958006, 0.03317287, -0.01340794, -0.018926572, -0.05369498, -0.03341796, 0.030888261, -0.0010606453, 0.039325304, -0.16673934, 0.06557901, -0.08155623, -0.02527372, -0.17023365, 0.015217776, -0.040017188)); + target3 += mul(h2, float4x4(-0.015815312, -0.042971406, 0.067791514, -0.08905113, -0.09565908, 0.04346861, -0.06728161, 0.15545414, 0.18861936, -0.031062441, 0.23719235, 0.037903327, 0.07448, 0.035912767, -0.011007527, -0.01686951)); + target3 += mul(i2, float4x4(0.0177658, 0.058648083, -0.028266283, 0.074122384, -0.114152886, -0.1088884, -0.00045867384, 0.12350585, -0.028705545, 0.07543727, 0.019930601, 0.05765993, 0.030875817, -0.01684014, 0.03873862, -0.29210237)); + target3 += mul(a3, float4x4(0.13872401, 0.0026290037, 0.120320186, -0.096298255, -0.22042315, -0.024083365, 0.021574842, -0.120338276, -0.030302105, 0.0030427484, -0.048579045, 0.11119769, 0.17029862, -0.03042154, -0.008851885, -0.04858139)); + target3 += mul(b3, float4x4(0.08693055, 0.0035178792, 0.0072182836, -0.21177882, -0.12236571, -0.041778523, -0.07611475, 0.1860772, -0.07140713, 0.079063386, 0.16111141, 0.10981697, -0.11631706, 0.00499998, 0.03531511, 0.112886176)); + target3 += mul(c3, float4x4(0.31241155, -0.155902, 0.026360337, 0.11567123, -0.01410306, 0.043105874, 0.06448718, -0.15669721, -0.10699524, 0.14620166, -0.022471936, 0.16952698, -0.0043298705, 0.012148871, -0.06097046, 0.13138528)); + target3 += mul(d3, float4x4(0.04631855, 0.16682167, -0.08682791, 0.031910088, -0.10863085, -0.05405996, 0.20847258, -0.25902548, -0.21886107, -0.016768524, 0.018900516, 0.016220776, 0.086765796, -0.086313486, 0.061806828, -0.042748976)); + target3 += mul(e3, float4x4(-0.22026716, -0.060322747, 0.055743527, -0.20811775, 0.15368998, -0.05755373, -0.1723089, 0.053601813, 0.3936026, -0.13520636, 0.13089643, -0.09859593, -0.08306327, 0.12936836, 0.1387318, 0.0221951)); + target3 += mul(f3, float4x4(0.14327681, -0.19199587, 0.02808416, -0.13307315, 0.12417994, -0.06954055, -0.11516412, -0.16203047, 0.085192114, 0.020538192, -0.10626918, 0.13578235, -0.042099748, 0.17358838, 0.040398534, 0.14976105)); + target3 += mul(g3, float4x4(-0.053998414, 0.12475386, -0.17873338, -0.06543859, 0.007933435, -0.07924536, -0.00051635655, -0.0015982009, 0.0397255, -0.16369022, 0.03679988, 0.100230515, -0.03289991, 0.043998964, 0.058887206, -0.09575534)); + target3 += mul(h3, float4x4(-0.0027977703, 0.17769088, -0.104156405, -0.1011918, -0.042667318, 0.19569083, 0.0944246, 0.05381444, 0.27140749, -0.12598918, 0.40728518, -0.16019246, 0.07478889, 0.07995141, -0.055247143, -0.015301875)); + target3 += mul(i3, float4x4(-0.10702615, 0.08362206, -0.12840238, 0.23424083, -0.11492997, 0.14988491, -0.058391277, -0.012141015, 0.15102027, 0.14370169, 0.04101889, 0.18302867, 0.11423182, 0.026963422, 0.02742905, 0.05555466)); + target3 += mul(na1, float4x4(0.0170646, -0.040626798, -0.086295746, -0.08303102, 0.07351082, -0.10439346, -0.09158801, 0.143845, 0.016958551, 0.21520329, 0.041720334, -0.11638024, 0.087674506, 0.12561873, -0.21283507, -0.23356001)); + target3 += mul(nb1, float4x4(-0.011983947, 0.026985325, -0.10494964, 0.045505363, 0.06308739, -0.0132794585, -0.19216236, 0.0044559645, -0.21042153, 0.026115706, -0.08442747, 0.08834091, 0.13262731, -0.06231853, -0.20550017, 0.03952042)); + target3 += mul(nc1, float4x4(-0.13586617, -0.0021369287, -0.121751934, -0.019784765, 0.03198282, -0.17328545, -0.10135551, -0.0024194748, -0.04619262, 0.21542613, -0.09846654, 0.081278816, 0.16300274, 0.01612674, -0.0033168197, -0.0257739)); + target3 += mul(nd1, float4x4(0.1674388, 0.01902311, 0.007676536, -0.12779048, 0.18292421, -0.22342151, -0.05965652, 0.14477763, -0.09779103, 0.14098361, -0.16848993, 0.19790487, 0.006252736, 0.22206211, -0.15818825, 0.08966031)); + target3 += mul(ne1, float4x4(0.17080314, 0.069508895, -0.038767304, 0.18950053, -0.08592572, -0.20979418, -0.21214612, 0.3330128, 0.30952567, -0.107134975, -0.16258, 0.022875668, -0.02457244, -0.12532432, -0.24953507, 0.059734188)); + target3 += mul(nf1, float4x4(-0.0018491185, 0.033706773, -0.1065624, 0.025152596, -0.016163057, -0.041699793, -0.12381229, -0.025942512, 0.13162622, 0.03565028, 0.029629026, -0.018657705, -0.1921952, 0.101777196, -0.06653633, 0.079698876)); + target3 += mul(ng1, float4x4(-0.040848907, 0.013372185, -0.061049856, -0.05829793, 0.03286879, -0.23536444, -0.056496553, 0.10049081, 0.0040958193, 0.1146177, 0.05323595, -0.040001456, -0.07206396, 0.052719124, -0.11720367, 0.12925144)); + target3 += mul(nh1, float4x4(0.08204122, -0.04806825, 0.03865589, -0.016993582, 0.004172861, -0.025698105, -0.01519582, 0.1425758, 0.02170024, 0.105864905, -0.03567325, -0.016229391, 0.22955607, -0.043812234, 0.045955688, 0.07391785)); + target3 += mul(ni1, float4x4(0.025563411, 0.016936684, 0.054015722, -0.03440089, 0.0448358, 0.012403107, 0.011840847, -0.10125541, 0.03623299, -0.005010518, -0.043322872, -0.17361045, 0.015130423, 0.1813893, 0.0017346571, 0.07948043)); + target3 += mul(na2, float4x4(-0.027647035, -0.0092600705, -0.05360344, -0.03877652, 0.028799497, 0.002088597, -0.13616459, 0.14142619, -0.26286268, -0.10349014, -0.066500075, 0.009223449, 0.08260629, -0.037491266, 0.019173276, -0.022004724)); + target3 += mul(nb2, float4x4(0.004824502, -0.114328325, -0.0023743433, 0.027862813, -0.019098494, 0.050463524, -0.11528185, 0.22641957, -0.025532806, 0.007936803, -0.064679936, -0.055090822, 0.07407797, 0.052605998, -0.043648902, -0.16713037)); + target3 += mul(nc2, float4x4(0.078680634, 0.020991815, 0.008421187, 0.010790185, 0.032945324, 0.13025786, -0.14650385, 0.053448163, -0.0028072142, 0.039515216, 0.1282605, -0.029288173, -0.029804084, -0.13323198, -0.054916043, 0.056681957)); + target3 += mul(nd2, float4x4(-0.09560204, -0.0669099, 0.005074813, 0.09496971, 0.027659275, -0.2191003, 0.29730386, -0.022740293, -0.025892505, -0.1871456, 0.028785622, -0.12673095, 0.0664705, -0.08389141, -0.089651205, -0.15402664)); + target3 += mul(ne2, float4x4(0.0063571655, 0.15680969, 0.061591282, 0.03752913, -0.041436892, 0.075064555, 0.20300192, 0.031942736, 0.0804296, -0.22194067, -0.20516422, 0.07361, -0.15353987, 0.25465, 0.008901653, 0.10683235)); + target3 += mul(nf2, float4x4(-0.006734436, -0.14774522, -0.031374577, -0.1032655, 0.11299578, 0.1205544, 0.11802791, -0.0612094, 0.03863345, 0.09838008, 0.037064772, 0.029507324, -0.051219307, -0.055263996, 0.02356915, -0.16056564)); + target3 += mul(ng2, float4x4(-0.06996934, -0.015304054, -0.009411581, 0.030309107, 0.10674073, -0.020733232, -0.115811616, 0.031903993, -0.049218595, -0.067377076, 0.26841155, -0.06866156, -0.09156055, -0.10751758, -0.022639344, -0.18830526)); + target3 += mul(nh2, float4x4(0.020456642, -0.035503354, -0.09457199, 0.05264921, 0.24155058, 0.12630259, -0.045381807, -0.12230558, -0.03225022, 0.04103188, -0.13622516, -0.0040657576, 0.023767322, -0.051124092, 0.09194598, -0.03766687)); + target3 += mul(ni2, float4x4(-0.016005656, -0.05218363, 0.029727828, -0.1604237, -0.009916855, -0.024033275, -0.14342757, 0.083073266, 0.057055146, -0.013757824, 0.15497124, -0.17284107, 0.05109579, 0.013304962, -0.06706223, 0.06251818)); + target3 += mul(na3, float4x4(0.06668304, 0.009187671, -0.047118776, 0.07131393, -0.17141497, -0.015085916, 0.004049452, -0.035744824, 0.032192133, 0.15326595, 0.044383276, 0.14035697, -0.090966456, 0.14161377, -0.015315352, 0.11275578)); + target3 += mul(nb3, float4x4(-0.13508414, 0.0785333, 0.009038879, 0.1607147, 0.22703816, 0.033339903, -0.03727777, -0.31905726, -0.069729164, 0.036481526, -0.025714623, 0.0851529, -0.12554394, 0.105045296, 0.059951913, -0.0604455)); + target3 += mul(nc3, float4x4(-0.20849659, 0.088841915, -0.1109168, -0.08992707, 0.31967592, 0.005481088, 0.22387522, 0.02098377, -0.0497405, -0.025430094, -0.0043220813, 0.060257867, -0.21568587, 0.067227446, -0.057946377, 0.06617755)); + target3 += mul(nd3, float4x4(0.076282814, -0.20857447, 0.056654572, -0.014142213, 0.029527945, -0.07234652, -0.094661996, 0.22620171, 0.042960577, 0.013866398, 0.036293183, 0.14942285, 0.076137245, -0.002794117, -0.1168563, -0.0146170305)); + target3 += mul(ne3, float4x4(0.10552861, -0.15840133, -0.03899879, 0.23962662, 0.04375998, 0.1696087, 0.037471466, -0.2348845, -0.04425561, -0.09243792, -0.12540625, 0.013209438, 0.20652635, 0.28815508, -0.14443508, -0.045806926)); + target3 += mul(nf3, float4x4(-0.18040875, 0.101635806, 0.022794934, 0.01974664, 0.24168968, -0.09383824, -0.05368557, 0.095760964, -0.03084522, 0.03096591, -0.025146073, -0.15247615, -0.07991138, 0.041957334, 0.13305306, 0.10435218)); + target3 += mul(ng3, float4x4(-0.12386387, -0.07711658, -0.010701461, -0.15226945, 0.13125682, -0.05067199, 0.05759467, 0.06512925, -0.087202296, -0.09307128, 0.074678324, -0.118310176, 0.013819953, 0.078637935, 0.060606144, 0.024220081)); + target3 += mul(nh3, float4x4(0.034386832, -0.18846357, -0.039673664, 0.113117084, -0.045039542, -0.10561991, -0.073102295, -0.3002364, 0.03678976, 0.12222279, -0.115726635, 0.07686326, 0.040241316, 0.1602316, 0.09017754, -0.115864284)); + target3 += mul(ni3, float4x4(0.052414972, 0.033908065, 0.08952466, -0.17085709, 0.006635481, -0.040943716, -0.21519491, 0.04866619, -0.04725049, -0.05258961, -0.014845829, -0.26571122, 0.07195377, 0.20871797, -0.068733044, 0.15962349)); + target3 += float4(-0.07961375, -0.07668534, 0.030482467, 0.035888318); + + float3 target4 = mul(e1, float4x3(0.053345524, 0.066197485, 0.07259881, 0.05303127, 0.06742834, 0.07375377, 0.094053976, -7.700613e-05, -0.02473139, 0.005308593, 0.03030767, 0.039729137)); + target4 += mul(e2, float4x3(-0.108758785, 0.037586506, 0.065435104, 0.027483977, -0.05654698, -0.076396726, 0.105040714, 0.05024414, 0.021126145, -0.0674868, -0.0055504893, 0.02190656)); + target4 += mul(e3, float4x3(-0.053890713, 0.0071396744, 0.016984116, -0.045092918, 0.025137635, 0.041979324, -0.03408237, 0.0019260172, 0.005701325, -0.02040999, -0.01315308, -0.00639404)); + target4 += mul(ne1, float4x3(-0.073155664, -0.06887698, -0.072435565, -0.08694837, -0.05531286, -0.055365037, -0.06690585, -0.00129934, 0.013128711, -0.045931015, 0.017999481, 0.021670034)); + target4 += mul(ne2, float4x3(0.14758188, -0.052864034, -0.06617946, -0.025215192, 0.005785653, 0.02022865, -0.07359226, -0.034944568, -0.01911832, -0.059109453, 0.0018033485, -0.022261323)); + target4 += mul(ne3, float4x3(0.079963796, 0.018210623, -0.0025736517, 0.06693135, -0.038985185, -0.04726813, -0.03559407, -0.0083629545, -0.005753532, 0.043954816, -0.022223696, -0.039470144)); + + tex1[gxy] = target1; + tex2[gxy] = target2; + tex3[gxy] = target3; + tex7[gxy] = float4(target4, 1); +} + +//!PASS 6 +//!DESC Conv-4x3x3x24 +//!IN tex1, tex2, tex3, tex7 +//!OUT tex4, tex5, tex6, tex8 +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 + +void Pass6(uint2 blockStart, uint3 threadId) { + uint2 gxy = Rmp8x8(threadId.x) + blockStart; + uint2 inputSize = GetInputSize(); + if (gxy.x >= inputSize.x || gxy.y >= inputSize.y) { + return; + } + + float2 inputPt = GetInputPt(); + float2 pos = (gxy + 0.5f) * inputPt; + + // [ a, d, g ] + // [ b, e, h ] + // [ c, f, i ] + float4 a1 = tex1.SampleLevel(sam, pos + float2(-inputPt.x, -inputPt.y), 0); + float4 b1 = tex1.SampleLevel(sam, pos + float2(-inputPt.x, 0), 0); + float4 c1 = tex1.SampleLevel(sam, pos + float2(-inputPt.x, inputPt.y), 0); + float4 d1 = tex1.SampleLevel(sam, pos + float2(0, -inputPt.y), 0); + float4 e1 = tex1.SampleLevel(sam, pos, 0); + float4 f1 = tex1.SampleLevel(sam, pos + float2(0, inputPt.y), 0); + float4 g1 = tex1.SampleLevel(sam, pos + float2(inputPt.x, -inputPt.y), 0); + float4 h1 = tex1.SampleLevel(sam, pos + float2(inputPt.x, 0), 0); + float4 i1 = tex1.SampleLevel(sam, pos + float2(inputPt.x, inputPt.y), 0); + + float4 na1 = max(-a1, 0); + float4 nb1 = max(-b1, 0); + float4 nc1 = max(-c1, 0); + float4 nd1 = max(-d1, 0); + float4 ne1 = max(-e1, 0); + float4 nf1 = max(-f1, 0); + float4 ng1 = max(-g1, 0); + float4 nh1 = max(-h1, 0); + float4 ni1 = max(-i1, 0); + + a1 = max(a1, 0); + b1 = max(b1, 0); + c1 = max(c1, 0); + d1 = max(d1, 0); + e1 = max(e1, 0); + f1 = max(f1, 0); + g1 = max(g1, 0); + h1 = max(h1, 0); + i1 = max(i1, 0); + + float4 a2 = tex2.SampleLevel(sam, pos + float2(-inputPt.x, -inputPt.y), 0); + float4 b2 = tex2.SampleLevel(sam, pos + float2(-inputPt.x, 0), 0); + float4 c2 = tex2.SampleLevel(sam, pos + float2(-inputPt.x, inputPt.y), 0); + float4 d2 = tex2.SampleLevel(sam, pos + float2(0, -inputPt.y), 0); + float4 e2 = tex2.SampleLevel(sam, pos, 0); + float4 f2 = tex2.SampleLevel(sam, pos + float2(0, inputPt.y), 0); + float4 g2 = tex2.SampleLevel(sam, pos + float2(inputPt.x, -inputPt.y), 0); + float4 h2 = tex2.SampleLevel(sam, pos + float2(inputPt.x, 0), 0); + float4 i2 = tex2.SampleLevel(sam, pos + float2(inputPt.x, inputPt.y), 0); + + float4 na2 = max(-a2, 0); + float4 nb2 = max(-b2, 0); + float4 nc2 = max(-c2, 0); + float4 nd2 = max(-d2, 0); + float4 ne2 = max(-e2, 0); + float4 nf2 = max(-f2, 0); + float4 ng2 = max(-g2, 0); + float4 nh2 = max(-h2, 0); + float4 ni2 = max(-i2, 0); + + a2 = max(a2, 0); + b2 = max(b2, 0); + c2 = max(c2, 0); + d2 = max(d2, 0); + e2 = max(e2, 0); + f2 = max(f2, 0); + g2 = max(g2, 0); + h2 = max(h2, 0); + i2 = max(i2, 0); + + float4 a3 = tex3.SampleLevel(sam, pos + float2(-inputPt.x, -inputPt.y), 0); + float4 b3 = tex3.SampleLevel(sam, pos + float2(-inputPt.x, 0), 0); + float4 c3 = tex3.SampleLevel(sam, pos + float2(-inputPt.x, inputPt.y), 0); + float4 d3 = tex3.SampleLevel(sam, pos + float2(0, -inputPt.y), 0); + float4 e3 = tex3.SampleLevel(sam, pos, 0); + float4 f3 = tex3.SampleLevel(sam, pos + float2(0, inputPt.y), 0); + float4 g3 = tex3.SampleLevel(sam, pos + float2(inputPt.x, -inputPt.y), 0); + float4 h3 = tex3.SampleLevel(sam, pos + float2(inputPt.x, 0), 0); + float4 i3 = tex3.SampleLevel(sam, pos + float2(inputPt.x, inputPt.y), 0); + + float4 na3 = max(-a3, 0); + float4 nb3 = max(-b3, 0); + float4 nc3 = max(-c3, 0); + float4 nd3 = max(-d3, 0); + float4 ne3 = max(-e3, 0); + float4 nf3 = max(-f3, 0); + float4 ng3 = max(-g3, 0); + float4 nh3 = max(-h3, 0); + float4 ni3 = max(-i3, 0); + + a3 = max(a3, 0); + b3 = max(b3, 0); + c3 = max(c3, 0); + d3 = max(d3, 0); + e3 = max(e3, 0); + f3 = max(f3, 0); + g3 = max(g3, 0); + h3 = max(h3, 0); + i3 = max(i3, 0); + + float4 target1 = mul(a1, float4x4(0.09584929, -0.095243275, 0.08022671, 0.075294726, 0.18445255, -0.082423694, -0.097833045, -0.021506732, -0.21379599, -0.023127496, -0.18897046, 0.023956126, -0.060177475, 0.027762169, 0.19984011, -0.20838684)); + target1 += mul(b1, float4x4(0.012249506, -0.12688737, -0.12119437, 0.10179773, -0.09664198, -0.0030920326, -0.030286502, -0.20217018, 0.34590152, -0.05034654, 0.049923953, -0.043337423, 0.25000378, -0.028680135, 0.16001691, -0.066234544)); + target1 += mul(c1, float4x4(-0.08372182, -0.089819506, -0.013704554, 0.04556739, -0.114813834, -0.06466441, 0.03785733, -0.0062836753, 0.047535792, 0.06347279, -0.007735239, 0.049881376, 0.20055495, 0.047256388, -0.09947006, 0.0025243685)); + target1 += mul(d1, float4x4(0.3671971, -0.05361603, -0.12586144, 0.12522058, 0.13843551, -0.06033578, 0.22667646, -0.08870703, 0.01452431, 0.17809536, 0.13784996, 0.15395631, 0.0001755052, 0.30571333, -0.14230241, -0.22773817)); + target1 += mul(e1, float4x4(-0.27697307, 0.19896318, -0.055979818, -0.27574858, 0.06590851, -0.083754696, 0.26534772, 0.04968563, 0.028200507, 0.11523887, 0.07717626, -0.037011877, 0.013540311, -0.015524421, 0.20788544, 0.16297664)); + target1 += mul(f1, float4x4(-0.13144116, -0.14546596, 0.10977632, 0.010728187, 0.025761489, -0.018065382, 0.06367839, 0.14230403, 0.12607081, -0.0124253975, 0.31784698, -0.017743418, -0.022748945, 0.05433257, 0.0031092372, -0.031199085)); + target1 += mul(g1, float4x4(0.21655789, 0.011040414, 0.06492884, 0.0706221, 0.09610853, 0.057776507, 0.009683445, -0.060912937, 0.021881321, -0.19671698, -0.0130090965, -0.013112566, -0.085476145, 0.038455218, -0.0014731084, -0.0831875)); + target1 += mul(h1, float4x4(0.37602007, 0.0823336, 0.24707538, -0.09009795, -0.017044, -0.12772176, -0.17441119, -0.042144842, 0.09458421, 0.28926283, 0.06927162, -0.06356304, 0.2206176, 0.1834394, -0.055222265, -0.13328971)); + target1 += mul(i1, float4x4(-0.12759925, -0.1872996, 0.12348925, 0.09169479, 0.2032652, 0.021332331, -0.02606638, -0.30383334, 0.11312311, -0.12563488, 0.07815656, 0.033551723, 0.073155805, 0.022491606, -0.004879681, -0.020566663)); + target1 += mul(a2, float4x4(-0.0074200626, -0.045258366, -0.11789159, 0.15158547, 0.021973789, 0.013558428, -0.06303165, -0.014261419, -0.005217678, -0.08988565, -0.032385588, -0.16513458, -0.00094591687, 0.105432004, 0.008511094, 0.064075306)); + target1 += mul(b2, float4x4(-0.11356488, 0.033568926, -0.0035593451, -0.1380603, -0.09776493, 0.16050343, 0.14889094, -0.20236592, -0.13227837, -0.3369538, -0.08290829, -0.102781296, -0.0008081758, 0.25186548, 0.045406237, -0.08264705)); + target1 += mul(c2, float4x4(0.012680731, 0.045382235, -0.099822015, 0.052455686, -0.017731141, 0.2148587, -0.025351917, 0.031683072, -0.25334007, 0.0181896, -0.0813112, -0.12272559, 0.04371032, 0.065688565, -0.020920211, 0.23614638)); + target1 += mul(d2, float4x4(0.07416445, -0.1632982, 0.019079927, 0.033240702, 0.13220134, -0.09758509, -0.09742767, 0.0003053599, 0.110648625, -0.06813206, 0.10455032, -0.037899535, -0.03261096, 0.06280864, -0.17577846, -0.279448)); + target1 += mul(e2, float4x4(0.032076143, 0.00038162203, 0.01970988, -0.040755652, 0.14594907, -0.29632306, 0.18186367, 0.06210379, 0.089618064, -0.20777738, -0.11941431, -0.047921117, 0.069467194, -0.061959818, -0.097263746, 0.20329393)); + target1 += mul(f2, float4x4(0.13389389, 0.053396456, 0.15672714, 0.1585184, 0.019925753, 0.08114361, 0.1381434, 0.06507304, -0.021846443, -0.040439755, 0.028436588, -0.1502027, -0.01547767, -0.09032624, 0.1101168, -0.044395007)); + target1 += mul(g2, float4x4(-0.08236856, 0.25564417, 0.15329555, 0.054097474, 0.12049528, -0.076263994, -0.19988477, 0.01916389, 0.097000316, -0.15214846, 0.1360054, -0.0007913522, -0.22950296, 0.0919526, -0.0045635877, 0.16661373)); + target1 += mul(h2, float4x4(-0.19546251, 0.008113141, -0.08576472, 0.23981415, 0.037918933, -0.106971025, -0.19296011, 0.064365655, -0.1451187, 0.03483461, 0.03271891, -0.001744038, -0.24933495, 0.0021132312, -0.15542698, 0.041852806)); + target1 += mul(i2, float4x4(0.07619386, 0.17014128, 0.05875971, 0.056373183, 0.077981666, -0.034455027, -0.09977959, 0.019308453, -0.097891875, -0.011260777, 0.009704571, -0.091228284, 0.072402045, 0.1679339, -0.021336546, -0.078355595)); + target1 += mul(a3, float4x4(-0.10250763, 0.07651088, -0.0131817255, 0.035391405, 0.1545587, -0.005520408, 0.18242277, 0.034118786, -0.0512669, 0.09563292, -0.0063671293, 0.017505696, 0.038793128, -0.23837951, 0.047975145, 0.17773068)); + target1 += mul(b3, float4x4(-0.0031853304, 0.1552162, 0.16779172, -0.06020084, -0.19057243, -0.13034964, -0.028302211, -0.1005563, -0.025626518, 0.087223954, 0.19338006, -0.06066401, -0.2122666, 0.001640063, 0.033021607, 0.06684525)); + target1 += mul(c3, float4x4(0.10043514, 0.033739183, 0.01542628, -0.07931681, 0.032161597, 0.16379037, 0.050983094, -0.030686958, 0.19189216, -0.15878248, 0.01945422, -0.02624594, -0.10871623, -0.06925224, 0.020812772, 0.07386481)); + target1 += mul(d3, float4x4(0.012129095, -0.029043682, -0.054882783, -0.04798959, 0.12916534, -0.012814343, 0.06516883, -0.054208606, 0.2625884, 0.008694777, -0.16992761, -0.041635927, 0.10295491, -0.04496253, 0.14063339, 0.15155916)); + target1 += mul(e3, float4x4(-0.09972329, -0.2031706, 0.15199123, 0.136278, -0.030424237, 0.01253304, -0.22483149, -0.04429611, -0.0058194255, 0.32650772, -0.13599585, -0.15167284, 0.13211648, 0.06883629, 0.13449487, 0.1419326)); + target1 += mul(f3, float4x4(0.16303232, -0.12681945, -0.24028221, -0.018534243, 0.048438597, 0.02196457, -0.26033646, 0.11363536, -0.23852448, -0.2250161, 0.054867614, -0.042418674, 0.036863618, 0.16061254, -0.103400566, -0.054900676)); + target1 += mul(g3, float4x4(0.0018444043, 0.047589947, 0.15244149, 0.026401952, -0.16383879, 0.2288589, -0.067270175, 0.035644963, -0.046972964, -0.079998486, -0.07510886, 0.086569756, 0.088215984, -0.08220123, 0.006012456, -0.081925176)); + target1 += mul(h3, float4x4(-0.20731804, -0.105194375, 0.1735274, -0.13702598, -0.08078456, -0.08891678, -0.20113394, 0.20032553, 0.23738097, -0.06555696, 0.0073099127, -0.24053259, -0.19441254, 0.044497594, -0.085050255, -0.45097253)); + target1 += mul(i3, float4x4(-0.015630659, 0.096795596, -0.05207522, -0.021776563, -0.052400976, 0.0060831443, 0.19417833, 0.14141484, -0.031068498, -0.031282816, -0.0053475797, 0.16884208, -0.049706176, -0.117957756, -0.122313395, -0.22831066)); + target1 += mul(na1, float4x4(-0.13027157, 0.11083156, -0.05295985, -0.13405156, -0.25512117, 0.007962338, -0.19477697, -0.043301556, 0.10253565, -0.12592895, 0.05690188, -0.03008582, -0.08713882, -0.05253795, -0.05898243, 0.07648529)); + target1 += mul(nb1, float4x4(-0.028103404, 0.107655846, 0.06792543, 0.038461875, -0.17316198, 0.045686997, -0.1318844, 0.1923057, -0.10082274, 0.023855874, 0.014650556, 0.07000885, 0.03179704, -0.17100379, 0.060464893, -0.05120159)); + target1 += mul(nc1, float4x4(-0.008488711, 0.10152624, -0.08714461, -0.054719266, -0.0132024065, 0.06630249, -0.0070151696, -0.076831385, 0.15455176, 0.065892935, 0.06491651, 0.07013989, -0.016401365, 0.1033902, -0.026735194, 0.09976299)); + target1 += mul(nd1, float4x4(-0.062281746, 0.11808364, 0.064350896, -0.077770054, -0.10968356, 0.08668185, -0.14066383, 0.020038921, 0.21482739, 0.01405822, -0.05047993, -0.098990895, 0.113971226, -0.07471277, 0.14986148, 0.087345585)); + target1 += mul(ne1, float4x4(-0.06364801, -0.17296022, -0.17889057, 0.20986524, -0.022308208, -0.13067317, -0.1608613, 0.005560176, 0.18469712, 0.08284309, -0.16637094, -0.1101153, 0.0047913613, 0.085900925, -0.19173592, -0.336121)); + target1 += mul(nf1, float4x4(0.32434624, -0.11097179, -0.2576656, -0.035399284, 0.12601346, 0.12047275, -0.08445279, -0.22353333, 0.275204, -0.028347714, -0.1910839, -0.105464876, -0.17244552, 0.10430915, 0.07988085, -0.024917416)); + target1 += mul(ng1, float4x4(-0.38803256, 0.12614547, 0.113965005, -0.05710032, -0.2639457, 0.015134661, 0.018303871, 0.060708337, 0.18753609, 0.025863146, 0.09349249, -0.034619175, 0.078573935, 0.034479834, 0.03612244, -0.08949277)); + target1 += mul(nh1, float4x4(-0.16215962, -0.030498799, -0.10899874, -0.03440776, -0.015821088, 0.029496742, 0.13228656, -0.16718344, -0.14563835, -0.17501803, -0.004510379, 0.020998359, 0.06548722, -0.13759966, 0.07444127, 0.10629099)); + target1 += mul(ni1, float4x4(0.0698536, 0.23689122, -0.0060213935, -0.0015028039, 0.039947093, 0.11350835, 0.19953221, 0.08415087, 0.22800536, 0.06906256, -0.06636992, -0.24212533, -0.0023316562, 0.011869679, 0.25965255, -0.012204548)); + target1 += mul(na2, float4x4(-0.000661378, 0.10967955, 0.058565635, -0.15265211, 0.14624023, -0.03375811, 0.05981829, -0.022552123, 0.070834555, -0.022453807, 0.019191928, 0.08326683, 0.0777132, 0.08895826, 0.023328163, 0.053312927)); + target1 += mul(nb2, float4x4(0.043799512, 0.12866509, -0.046365067, 0.24239258, -0.11673964, -0.025937054, -0.12636824, 0.100062154, -0.10018257, 0.19266897, 0.06142848, -0.14361443, -0.021221312, -0.30052304, -0.20469959, 0.14677355)); + target1 += mul(nc2, float4x4(-0.015183433, -0.19820379, -0.15852103, 0.054332163, 0.0071695223, 0.084583715, 0.24957466, -0.051836044, -0.1983422, -0.08417326, 0.08057586, -0.0437153, -0.01875922, -0.09707154, -0.15741958, -0.017708866)); + target1 += mul(nd2, float4x4(-0.29747635, 0.07556405, -0.024965616, -0.035462193, 0.00015182442, 0.039648414, -0.021202678, 0.048798855, -0.057369143, -0.1613142, 0.023689339, -0.04995168, 0.02980912, -0.052541643, -0.037693493, 0.089918755)); + target1 += mul(ne2, float4x4(0.13534155, -0.09769345, -0.072239734, 0.06396828, -0.067685336, 0.09630334, -0.060928572, 0.04446791, -0.08296695, 0.09350221, 0.34450835, -0.13325562, 0.017068733, 0.19159698, -0.0142695615, -0.0692556)); + target1 += mul(nf2, float4x4(0.006079359, 0.12826636, -0.12040495, 0.08986504, 0.07011883, -0.1098471, 0.14756078, -0.29749495, -0.13352399, -0.19821455, 0.088539004, 0.03831198, -0.2940772, 0.19943683, -0.083427206, 0.22637546)); + target1 += mul(ng2, float4x4(-0.35546607, 0.064483844, -0.19232833, -0.06884708, -0.2744395, 0.015903095, -0.18404284, 0.18437761, -0.072399296, -0.11778013, -0.109648645, 0.038300544, -0.016273083, -0.022765087, -0.18801431, 0.023174742)); + target1 += mul(nh2, float4x4(-0.047155075, -0.013470263, -0.2142679, -0.07784448, -0.17944333, -0.04802458, -0.059323605, 0.06443357, -0.023670893, -0.32168958, -0.047240417, -0.04732927, 0.22192943, -0.12674028, 0.038099587, 0.047584143)); + target1 += mul(ni2, float4x4(-0.036675204, -0.2955229, -0.2730817, -0.021219578, -0.22891581, 0.1896148, 0.1885584, 0.020979041, -0.115823194, -0.07042675, -0.042149916, 0.04921666, -0.0054005245, -0.12240402, -0.0031619132, 0.09292424)); + target1 += mul(na3, float4x4(-0.121177875, 0.022185382, -0.13757537, 0.110018514, 0.04366351, 0.07803729, -0.028073097, -0.070835054, -0.117744304, 0.010936038, 0.0039909417, 0.15176865, 0.14082533, -0.028780727, -0.09623105, -0.17158796)); + target1 += mul(nb3, float4x4(0.04708067, 0.09987003, -0.0011556224, -0.14066035, 0.18528107, 0.2334141, 0.031397898, 0.05785171, 0.056908704, 0.07767457, 0.10462482, 0.04132479, 0.0121364035, 0.009938317, -0.08584528, -0.067361355)); + target1 += mul(nc3, float4x4(-0.05410052, -0.0714775, -0.16306542, 0.090159744, -0.161323, -0.047408808, -0.06715019, 0.09986001, 0.2831126, 0.00576967, 0.040771786, -0.08548527, -0.09100255, 0.13035326, 0.012434338, -0.014341014)); + target1 += mul(nd3, float4x4(-0.1663156, 0.10254592, -0.050546184, 0.11586232, -0.16458654, -0.03840253, 0.20078611, -0.07851566, 0.15138014, -0.112647966, -0.01826464, 0.12073245, -0.08315027, -0.050763886, -0.15038362, -0.1131053)); + target1 += mul(ne3, float4x4(0.01002309, 0.08847059, -0.20151149, -0.0035132666, -0.23968504, -0.03516418, 0.29592118, 0.064261466, 0.45611492, -0.10594028, 0.110738106, -0.096258715, -0.05207964, -0.05561078, -0.11650712, -0.3685437)); + target1 += mul(nf3, float4x4(0.20816466, -0.05811231, -0.061693646, 0.07572569, 0.14781217, -0.0070261173, -0.025654003, 0.054483656, 0.057109646, 0.19076158, 0.04684541, 0.1116435, -0.09888648, -0.031974472, 0.19365066, 0.021925794)); + target1 += mul(ng3, float4x4(0.03929964, 0.07849196, -0.09844016, 0.07695297, 0.14535576, -0.2121029, -0.08024618, -0.012246682, 0.34951916, -0.09691296, 0.03363421, 0.058434267, 0.003874065, 0.14535636, -0.028760154, 0.124139)); + target1 += mul(nh3, float4x4(-0.0932687, 0.092196085, -0.31407887, 0.1343263, -0.27295715, 0.14278416, 0.08114481, -0.12019184, 0.11957917, -0.113183275, 0.039373737, 0.46590427, 0.13638581, -0.043146584, 0.072187565, 0.25355667)); + target1 += mul(ni3, float4x4(0.123297654, 0.13584657, 0.07648451, -0.13606457, -0.16890481, 0.01590599, -0.21695235, -0.0694265, -0.2649162, 0.02908455, 0.21927917, 0.010575717, 0.0485126, 0.039509103, 0.28077808, 0.081715904)); + target1 += float4(0.04207974, -0.22892998, 0.061954536, 0.076551735); + + float4 target2 = mul(a1, float4x4(-0.017371856, 0.031500984, -0.07871794, 0.07516421, -0.047120046, -0.1499491, 0.03412159, -0.11797919, 0.24790019, -0.19525756, -0.05562878, 0.0328997, 0.21224782, -0.15311961, -0.18679233, -0.021687083)); + target2 += mul(b1, float4x4(-0.025990961, 0.12443172, 0.0647746, -0.05208365, 0.05024424, -0.15237884, -0.12913004, -0.03974524, 0.1453159, 0.105298564, -0.17882426, 0.15200019, -0.024576407, 0.024749285, -0.114573665, 0.12468399)); + target2 += mul(c1, float4x4(0.07534002, -0.018443566, -0.07744656, -0.049855288, 0.030816372, -0.011974315, 0.05701086, 0.083947234, -0.16585147, -0.09379088, -0.090112925, -0.110042654, -0.105956376, 0.014653304, 0.041867986, 0.24255139)); + target2 += mul(d1, float4x4(0.0044792104, -0.029270872, 0.07648775, 0.049905814, 0.014173815, -0.16794622, -0.09707847, 0.12383384, 0.06794641, -0.07997065, -0.51078653, 0.034911633, 0.13010858, -0.23383191, 0.07255915, -0.06692129)); + target2 += mul(e1, float4x4(0.21879609, -0.017210754, -0.015485283, 0.083878465, -0.26080847, 0.36907044, 0.23289536, -0.038870774, 0.06501928, 0.14246589, -0.08897723, 0.10715434, 0.3482729, 0.16240129, -0.013726439, -0.005958744)); + target2 += mul(f1, float4x4(-0.11399226, 0.18352379, 0.14817153, -0.20127603, 0.014963564, 0.1103272, -0.07205868, 0.08848388, 0.14840026, 0.018574262, -0.07972405, 0.02918892, 0.18851598, 0.074035265, -0.010895981, -0.034228772)); + target2 += mul(g1, float4x4(-0.12840563, 0.13339421, -0.042844173, 0.17029236, 0.27274412, -0.05954642, -0.07974038, -0.14359044, -0.12972996, -0.14160097, -0.22879072, 0.17341535, -0.047784876, -0.0024098, -0.066806085, 0.1451525)); + target2 += mul(h1, float4x4(0.019089594, 0.14139606, -0.16583538, 0.038803227, -0.014393993, -0.06451304, -0.0133141065, -0.22717497, -0.07594741, 0.16408369, -0.0074125547, 0.06459095, -0.13577539, -0.123973124, -0.21311697, 0.06648542)); + target2 += mul(i1, float4x4(0.2023118, 0.014515263, -0.032675546, 0.01735652, 0.16447331, -0.016542327, -0.17865558, 0.07834224, 0.016872171, -0.12725283, -0.021913532, -0.03262319, -0.11567316, -0.009686028, 0.01897474, -0.00264971)); + target2 += mul(a2, float4x4(0.02156143, 0.06127393, 0.08751492, -0.0027723024, 0.061267495, 0.22953646, 0.26134068, 0.23994948, -0.05292228, 0.11692952, 0.1014853, -0.013061857, -0.13198215, -0.08740625, 0.08896114, 0.11902029)); + target2 += mul(b2, float4x4(0.017173437, -0.00088511547, 0.07882701, 0.059980858, -0.06255887, -0.07106743, -0.070686355, -0.111458905, -0.102210574, 0.082739465, 0.25598842, 0.010992033, -0.06413811, -0.03738569, 0.009392029, -0.047789197)); + target2 += mul(c2, float4x4(-0.23666115, 0.07702853, 0.15348057, 0.081954665, -0.028320765, -0.15108013, -0.06386237, -0.03937426, -0.070428774, 0.046394363, 0.097939745, -0.08086774, 0.06996333, -0.048788365, 0.07915947, 0.05624496)); + target2 += mul(d2, float4x4(-0.14345141, 0.048822183, -0.2908337, 0.013937969, -0.019703582, -0.41485405, 0.431834, 0.05884408, -0.3067431, 0.10988645, -0.014010137, 0.06143512, 0.24215294, -0.17129561, 0.11282655, 0.19824891)); + target2 += mul(e2, float4x4(-0.07530577, -0.015041713, -0.11711949, 0.060197067, 0.15375182, 0.5235449, -0.15465264, 0.055295702, -0.12753716, 0.04075088, 0.06649801, -0.08592669, -0.034694944, 0.18401965, -0.031681508, 0.086950384)); + target2 += mul(f2, float4x4(0.23155743, -0.012697523, -0.19502366, -0.09216853, -0.050312944, -0.003234684, -0.07824935, 0.09000848, -0.1604727, 0.16866255, -0.07226818, -0.04688219, 0.18855634, 0.07053166, 0.06875359, -0.082133405)); + target2 += mul(g2, float4x4(0.097153, 0.17410621, -0.07209523, 0.031690594, -0.18697138, -0.31457213, 0.12693302, 0.09791562, -0.056750435, 0.17457159, -0.014368028, 0.11140081, 0.14797364, -0.11987443, 0.010138102, -0.24108526)); + target2 += mul(h2, float4x4(0.08502398, 0.25199497, 0.033161916, 0.11686169, -0.000555042, -0.13222077, 0.019214375, -0.0740864, 0.05422655, -0.0689195, 0.07171115, -0.0063085253, -0.11293817, 0.28714395, 0.08302453, -0.297302)); + target2 += mul(i2, float4x4(0.0018131305, -0.23274079, 0.28795394, 0.10479223, 0.017336998, 0.10140653, -0.01703538, 0.0018864989, -0.19448972, 0.06781925, 0.0072297496, 0.054331925, -0.056745283, 0.0031926096, 0.08508613, -0.076465875)); + target2 += mul(a3, float4x4(-0.06579661, -0.074197, -0.07872732, -0.04833768, 0.07948355, 0.10680971, -0.038892176, 0.0026479303, -0.05120215, -0.005223787, 0.013828104, 0.033628467, -0.251052, -0.053964466, -0.04151976, -0.12170088)); + target2 += mul(b3, float4x4(0.02224381, -0.11401214, 0.049397755, 0.1178245, 0.124475546, -0.014129338, -0.08712223, -0.110995345, 0.027189068, 0.14115846, 0.008039289, -0.077303566, 0.13120183, 0.088576116, 0.19419082, -0.19265574)); + target2 += mul(c3, float4x4(-0.302041, -0.09488605, 0.10128198, 0.25093108, -0.05749319, -0.1325287, -0.07048078, 0.25168943, 0.24393974, 0.26709494, -0.005166187, -0.0858236, 0.098031975, -0.046012603, -0.025616428, -0.038455524)); + target2 += mul(d3, float4x4(0.15295, -0.058367014, -0.09462144, -0.004685292, 0.061874785, 0.17379992, 0.10421289, -0.102156416, 0.07116128, 0.09785571, -0.08606482, 0.1615783, -0.10226774, -0.15573122, -0.17567602, 0.12711914)); + target2 += mul(e3, float4x4(-0.08792466, 0.32314366, -0.040461652, -0.1960407, -0.11285709, -0.14666572, -0.070970505, 0.04230559, -0.05408487, -0.2794681, -0.4155402, 0.26639655, 0.13980015, 0.12434661, -0.02678858, 0.056679014)); + target2 += mul(f3, float4x4(-0.124382794, 0.018727468, 0.20523487, -0.070906, -0.030757494, -0.10337054, 0.067943715, -0.039035156, 0.035588995, 0.14607283, -0.085760534, 0.19209209, 0.13216998, 0.16539834, 0.010052314, -0.022481022)); + target2 += mul(g3, float4x4(0.021054843, -0.15636541, 0.011583453, -0.10839945, -0.05794076, -0.053845506, 0.0063711316, 0.09400282, 0.11037196, -0.11023954, 0.07765479, 0.0063296715, -0.100950494, 0.20135373, 0.048100784, 0.1047337)); + target2 += mul(h3, float4x4(0.019294975, 0.10017591, -0.022420274, -0.024994979, 0.033118278, -0.0335541, -0.099411234, -0.051065058, 0.04019899, -0.09789642, -0.21099539, -0.051657237, 0.0537393, 0.22397718, -0.09253929, 0.0056816903)); + target2 += mul(i3, float4x4(0.13451837, -0.31405422, -0.02294345, -0.09470789, 0.011980906, -0.29736918, 0.04785323, 0.008854729, 0.0064198305, 0.1608248, -0.0063040988, 0.015922181, 0.058713753, 0.19405961, -0.0074991966, -0.056430623)); + target2 += mul(na1, float4x4(-0.030276824, 0.051418643, -0.033852484, -0.04178643, -0.09626818, 0.06430078, 0.18420494, 0.21067473, -0.20206925, 0.039089408, -0.20179388, 0.04502135, -0.079114124, -0.18990965, 0.03482791, -0.20353125)); + target2 += mul(nb1, float4x4(0.12883389, 0.01503085, 0.07740192, -0.021361377, -0.021194257, -0.2965198, 0.038358267, 0.08110664, -0.122530565, 0.002974726, -0.11742695, -0.05976367, 0.011006546, -0.0676137, 0.109357566, -0.09688377)); + target2 += mul(nc1, float4x4(-0.22074296, 0.019343395, 0.17098527, 0.21736804, -0.15512446, 0.1447234, -0.1344856, 0.051509894, -0.021283794, -0.017791564, -0.023386735, 0.15375026, 0.05583616, -0.22131743, 0.010143341, -0.113710396)); + target2 += mul(nd1, float4x4(0.12376125, 0.086540736, -0.07823014, -0.11477249, 0.071970075, 0.04002691, 0.09260781, -0.16808367, -0.07891094, -0.28984514, -0.0030400122, 0.20933042, -0.09442383, 0.27100945, 0.03393376, -0.025617108)); + target2 += mul(ne1, float4x4(-0.041222293, 0.012311568, 0.13222927, 0.15650855, 0.024765523, -0.055989124, -0.02946687, -0.0066036643, -0.12604281, -0.16414027, -0.22830643, 0.0840456, -0.19442934, -0.00939128, -0.005971656, 0.027085181)); + target2 += mul(nf1, float4x4(-0.23906162, -0.04003579, 0.16445775, 0.2578306, -0.08858488, -0.0009076812, 0.05893361, -0.07622802, 0.07551978, 0.16221073, -0.08075802, -0.066482686, -0.082238205, -0.07318114, -0.02384466, -0.008769857)); + target2 += mul(ng1, float4x4(0.034418013, -0.04310424, 0.06940784, -0.040061995, -0.196672, 0.059436113, 0.18781166, -0.087357335, 0.17683987, -0.11832282, 0.0704508, -0.080166645, -0.10043135, 0.029797623, 0.045275707, -0.00091474655)); + target2 += mul(nh1, float4x4(-0.13774432, 0.039946273, 0.010250749, -0.064292066, -0.033921324, 0.086792484, -0.06556751, 0.16063036, 0.040354285, -0.005781792, -0.06043568, 0.0456958, 0.057671502, -0.09200769, 0.05852994, -0.038263924)); + target2 += mul(ni1, float4x4(0.0722641, -0.15417133, 0.0428391, -0.11669595, -0.15181269, -0.14444157, -0.05888602, -0.04931457, -0.024105387, 0.04452374, -0.19607021, 0.040299945, 0.023721624, 0.009294535, -0.12308105, -0.032013766)); + target2 += mul(na2, float4x4(0.13982506, 0.008242153, 0.007985137, -0.028785944, -0.045674372, 0.03811196, -0.006431167, 0.042959616, -0.14530565, -0.13717386, 0.15736887, -0.070945315, 0.16792078, -0.057526443, 0.11027599, -0.062423922)); + target2 += mul(nb2, float4x4(0.33995095, -0.06725867, 0.25568435, -0.1156066, 0.0073083406, 0.09118932, -0.036027674, 0.14834408, 0.0076618423, 0.048706416, -0.11109869, 0.014119505, -0.16117008, 0.055889986, 0.021106627, 0.0494479)); + target2 += mul(nc2, float4x4(0.058088336, -0.05898053, 0.28952774, 0.06457457, 0.06820624, 0.031307437, 0.040132232, -0.12814572, 0.034467205, 0.16643257, 0.13826352, -0.050465748, -0.082429856, 0.028516805, 0.10005895, -0.17591912)); + target2 += mul(nd2, float4x4(0.17962062, 0.050080433, 0.115288205, 0.07467281, 0.07438551, 0.111036986, -0.09742873, -0.23408481, -0.09974166, -0.12665741, -0.04540029, -0.03346997, 0.089152135, 0.082195945, 0.28275734, -0.24630727)); + target2 += mul(ne2, float4x4(0.11799736, -0.06625111, 0.091244, -0.13702978, 0.055218194, -0.031087862, 0.06133677, -0.27246916, -0.15978532, 0.19715077, 0.051257942, 0.036602553, 0.054990616, -0.25717, 0.12677813, -0.0406006)); + target2 += mul(nf2, float4x4(-0.043816347, -0.3335301, 0.19126506, -0.01086813, 0.075816035, 0.15178275, -0.07246076, -0.19391762, 0.07836278, 0.12452172, 0.09029487, -0.034167152, -0.061805293, -0.08850912, 0.08531079, 0.14093879)); + target2 += mul(ng2, float4x4(0.120683454, 0.02466898, 0.19501889, -0.047962803, 0.2524244, -0.04647245, 0.23329985, -0.437865, -0.11040008, 0.05536788, 0.094667554, -0.029751923, -0.04589413, -0.24310234, 0.27122453, 0.010039841)); + target2 += mul(nh2, float4x4(-0.17811799, -0.05787477, 0.10678799, -0.28424516, -0.11051176, -0.0372708, 0.20203365, 0.10050222, -0.1243157, 0.20707713, 0.14385784, 0.025799723, 0.028424745, -0.06201256, -0.1112155, 0.17677756)); + target2 += mul(ni2, float4x4(-0.06334935, 0.14396226, -0.121362604, -0.30631876, -0.17723008, -0.041447658, 0.03672539, 0.1550316, 0.113435954, 0.13270019, 0.04389676, 0.016865736, 0.0027031328, 0.107943274, -0.08071779, -0.007290789)); + target2 += mul(na3, float4x4(-0.0327075, -0.02185086, -0.00093145896, -0.009849336, -0.06994606, -0.009004001, -0.2962301, -0.093587525, 0.055827085, 0.15590863, -0.1348263, -0.030768193, 0.1539244, 0.056906786, -0.046778735, 0.1293399)); + target2 += mul(nb3, float4x4(0.060477, 0.10025322, 0.034794286, -0.15556674, -0.046868246, -0.06774045, -0.0046042744, -0.028093262, -0.14673153, 0.0014603435, -0.17085737, 0.09433877, 0.06585415, -0.17430365, -0.09225927, 0.18637276)); + target2 += mul(nc3, float4x4(-0.0829445, -0.046446815, 0.01044717, -0.08179017, -0.106227055, -0.07285646, -0.118698135, -0.08691134, -0.19350386, 0.18079466, -0.0896787, -0.0054066014, 0.044900116, -0.07164249, 0.03728663, -0.071337156)); + target2 += mul(nd3, float4x4(-0.091456026, 0.0829187, 0.2184223, 0.12404674, 0.0535281, -0.0046089985, -0.1367499, 0.14318149, -0.13627648, 0.008214974, -0.035714064, -0.11221228, -0.0848333, 0.054274652, 0.12799235, -0.12235648)); + target2 += mul(ne3, float4x4(0.015441998, -0.16407311, 0.29637286, 0.15780787, 0.100573234, -0.023377284, 0.19050701, 0.14114772, 0.1021301, 0.30314055, 0.08799963, 0.11630563, -0.28035656, 0.10020031, -0.009994972, -0.16998753)); + target2 += mul(nf3, float4x4(-0.053246386, 0.15038243, -0.020114498, 0.019207323, -0.4546607, 0.048940018, 0.122429796, 0.14951369, 0.09936216, -0.13126904, -0.15678225, 0.101906285, 0.017061174, -0.17944153, -0.12741113, -0.13633935)); + target2 += mul(ng3, float4x4(0.11258541, -0.056183632, -0.10542277, 0.048327565, -0.10695888, 0.021128727, -0.0025440033, -0.14460813, -0.2421658, 0.04799532, -0.025316745, 0.111919515, 0.133215, -0.23335934, -0.037506737, -0.12447751)); + target2 += mul(nh3, float4x4(0.035608087, -0.17302564, 0.07696709, -0.18077038, -0.02534479, 0.035865046, 0.15503906, -0.07042084, 0.37430316, 0.2688597, 0.23763078, 0.26458314, 0.22778325, 0.13661247, 0.032626268, 0.10627844)); + target2 += mul(ni3, float4x4(-0.14816584, 0.08924656, -0.02333901, 0.0735485, -0.17011848, -0.059921533, 0.045324218, 0.026974149, 0.15702479, 0.0067652813, 0.08584165, 0.09428486, 0.035495974, -0.07220769, -0.0524813, -0.008241412)); + target2 += float4(0.0076388572, -0.16117841, -0.21034169, -0.019341651); + + float4 target3 = mul(a1, float4x4(0.051828694, -0.14444938, -0.06172656, -0.092529796, 0.0032331774, 0.0505327, -0.092972204, 0.054304235, 0.04113735, 0.05488947, 0.27173808, 0.008734756, -0.037090253, 0.11106639, 0.1864697, -0.1308939)); + target3 += mul(b1, float4x4(-0.0292121, 0.09739149, -0.057740077, -0.043211482, 0.00057832256, 0.122456014, 0.14004166, -0.22281875, -0.00958859, 0.012818551, 0.21724443, 0.038053658, 0.11917748, -0.0147661995, 0.15326285, -0.007842389)); + target3 += mul(c1, float4x4(0.028475946, -0.044710767, 0.120977476, 0.024894554, 0.034071486, 0.002889187, 0.0886379, -0.13210039, 0.0254021, -0.10800576, -0.0154256895, 0.07889771, -0.026208088, -0.1735971, 0.12414827, 0.06541947)); + target3 += mul(d1, float4x4(0.15367964, -0.016319191, -0.087988645, 0.21592557, -0.13575394, 0.07606312, 0.17890929, 0.06405638, -0.15215087, -0.31830072, -0.070441514, -9.058544e-06, 0.15286519, -0.07961882, 0.0051650982, 0.05743661)); + target3 += mul(e1, float4x4(0.14879431, 0.09249706, -0.08179524, 0.08862426, -0.04546735, 0.052125804, 0.10511877, -0.036810514, 0.19695859, 0.06919595, -0.041425765, 0.05109113, 0.16108315, -0.0006357406, -0.036482725, -0.000831584)); + target3 += mul(f1, float4x4(-0.14299406, 0.24442554, 0.08385988, -0.0018431129, 0.025425488, 0.043124236, -0.19599897, 0.2500142, 0.084921256, -0.064991206, -0.04332563, -0.20997004, -0.06825186, 0.11137002, -0.08090301, -0.06958994)); + target3 += mul(g1, float4x4(-0.17347668, -0.09592853, -0.051422764, -0.15347266, 0.19709691, 0.012748645, 0.11250177, 0.020625748, -0.12617995, -0.09576706, 0.121928014, -0.052528545, 0.06992809, -0.060379576, -0.13869223, -0.05584254)); + target3 += mul(h1, float4x4(0.040104184, -0.12147194, -0.04430197, 0.13594869, 0.09909328, 0.12928483, -0.2334865, 0.11032421, 0.064912125, -0.010493585, 0.06800239, 0.18326257, 0.019329162, -0.09916547, -0.11674449, 0.03267864)); + target3 += mul(i1, float4x4(-0.07757802, -0.018029094, 0.029337326, 0.29172876, -0.03394624, 0.02624461, -0.2849472, -0.27765557, -0.04780892, -0.019495687, -0.11718942, -0.03025127, -0.008503852, -0.076533996, -0.02296907, 0.068641014)); + target3 += mul(a2, float4x4(0.13043757, -0.06434652, -0.0690028, -0.033568893, 0.17211302, -0.029193658, 0.12456035, -0.11193319, -0.0035818655, -0.2563802, -0.12287091, 0.10766433, -0.04711406, -0.08852275, 0.0153720435, -0.14872602)); + target3 += mul(b2, float4x4(-0.080712505, 0.11759175, -0.11220247, 0.10730683, 0.06418219, 0.00800814, -0.028890526, 0.1441286, 0.03056378, -0.0035148377, -0.120093554, 0.043768104, 0.07286328, -0.021130785, 0.09223498, 0.20331676)); + target3 += mul(c2, float4x4(-0.09102653, -0.10116414, 0.15046883, 0.28877532, -0.011975523, -0.0068613496, -0.09103339, 0.11455707, 0.007323278, 0.08825653, -0.054251585, -0.14907618, -0.00018906803, -0.08488728, 0.036797076, -0.12455349)); + target3 += mul(d2, float4x4(0.04010406, 0.024046177, -0.20183066, -0.06970149, -0.10715107, -0.077962436, 0.32845956, -0.2622872, -0.15997723, -0.07157501, -0.09492247, -0.00996072, -0.067652985, -0.16896474, 0.06192714, 0.019690538)); + target3 += mul(e2, float4x4(-0.10179747, -0.10023532, -0.10475995, -0.15501128, 0.017811656, 0.027858434, -0.11646674, 0.08104398, -0.12454491, 0.032985296, -0.09229711, 0.0909355, 0.0021391874, -0.051617827, -0.11611242, 0.036069512)); + target3 += mul(f2, float4x4(-0.14753185, -0.020901026, -0.0029391565, -0.14624536, -0.09374949, -0.049715783, 0.1951781, 0.22286539, -0.013287656, 0.0830378, -0.2975549, -0.13074464, -0.010272348, 0.032849077, -0.097859964, -0.1562913)); + target3 += mul(g2, float4x4(0.14641422, 0.13483211, -0.0438145, 0.08620407, 0.11926978, -0.15772878, 0.17547028, 0.15418763, 0.0097786365, 0.016791794, 0.057482373, -0.0716323, -0.061063405, 0.13135311, 0.1040161, 0.1688627)); + target3 += mul(h2, float4x4(0.11255645, 0.08840791, 0.07584055, -0.09523696, -0.1154477, -0.085963145, -0.075319275, -0.05898237, -0.14236066, 0.058508113, 0.078278095, 0.07180024, 0.19020182, 0.027219167, -0.11044013, -0.1411698)); + target3 += mul(i2, float4x4(0.1250712, -0.09155498, 0.11040472, -0.28928515, 0.06875818, -0.07716765, 0.07982134, 0.22709553, 0.08608979, 0.02659528, -0.050615177, -0.054662008, -0.016789312, 0.095084675, -0.20973809, -0.14231291)); + target3 += mul(a3, float4x4(0.009871057, 0.07234809, -0.061542578, -0.2561031, 0.17938578, 0.059759673, -0.0533506, -0.15160522, -0.06667153, 0.022478178, -0.078531526, 0.01727445, 0.032124806, -0.09959757, -0.08871009, -0.0010295251)); + target3 += mul(b3, float4x4(-0.07400921, 0.009798935, 0.06958411, -0.14588043, 0.045884695, 0.029824348, -0.08622057, -0.03112675, -0.050385453, 0.12655865, -0.06863022, -0.21982339, -0.06292096, -0.014440884, 0.06755428, -0.114989646)); + target3 += mul(c3, float4x4(0.054011043, -0.26510096, 0.21961565, 0.05448362, 0.06296498, -0.07182228, -0.09567859, -0.024238275, 0.005022228, 0.1626434, 0.00019249211, 0.073934935, 0.02381926, 0.025067188, -0.10400833, -0.10235642)); + target3 += mul(d3, float4x4(0.019573225, 0.016258147, 0.014888165, -0.09950712, 0.052801423, 0.18720426, 0.13194256, -0.030186977, -0.052970573, -0.20545387, 0.0477203, 0.12807603, 0.106122404, 0.013091209, 0.037285265, -0.17009702)); + target3 += mul(e3, float4x4(-0.052872628, 0.0067698397, -0.04057391, -0.10654882, -0.08066677, -0.11518657, 0.063243456, 0.108404346, 0.006817193, -0.08499581, -0.16265164, -0.019080937, 0.27572608, -0.02719708, -0.10466762, 0.006535063)); + target3 += mul(f3, float4x4(-0.004304222, -0.23885699, 0.0007060991, -0.011653924, -0.058662247, -0.10310051, 0.19861554, -0.124969624, 0.08919569, 0.062485468, -0.07952577, 0.06357056, 0.13038754, -0.10383543, -0.12508194, 0.07526947)); + target3 += mul(g3, float4x4(0.034628194, -0.1459473, -0.12843482, -0.16211623, 0.18986839, -0.021202087, 0.030887406, 0.16012087, -0.07651755, 0.25390217, 0.100328274, -0.18489215, -0.11211924, -0.18655026, -0.12336867, 0.03715863)); + target3 += mul(h3, float4x4(0.24926607, -0.12733914, -0.16163528, -0.18980862, 0.026140725, 0.030769283, -0.08602958, -0.011363779, -0.18870075, -0.08782851, -0.019595576, 0.15859611, 0.14101227, -0.23768859, -0.11449071, -0.21400326)); + target3 += mul(i3, float4x4(-0.014345643, 0.03152331, 0.14303848, 0.068378784, -0.023709042, 0.009476213, 0.03332845, -0.043729182, -0.16312705, 0.18575506, 0.045167383, 0.089232035, 0.12431053, -0.019391764, -0.09807002, -0.19098805)); + target3 += mul(na1, float4x4(-0.0027074527, 0.08881943, 0.021618785, 0.17202215, -0.023361688, -0.12384613, 0.1257001, 0.034937408, 0.050526705, -0.21945108, -0.23475797, 0.1385765, 0.03910722, 0.08761758, -0.06185295, 0.16879226)); + target3 += mul(nb1, float4x4(0.01759655, 0.07489585, 0.06413278, -0.16355684, 0.021823732, -0.19263723, -0.021956496, 0.07322703, 0.106124505, 0.17441194, 0.016513938, -0.09815339, -0.12467256, -0.036076445, -0.09139147, -0.09947436)); + target3 += mul(nc1, float4x4(-0.027052518, -0.059014272, 0.14797378, 0.21370119, 0.033306625, 0.070152596, 0.0052737673, 0.28024423, 0.040666968, -0.069734804, 0.07771406, 0.1577554, 0.03728327, -0.01140819, 0.056443825, -0.08787925)); + target3 += mul(nd1, float4x4(-0.24540152, 0.0015005039, 0.020643666, -0.3483438, -0.11493903, -0.13617486, -0.0063642715, -0.10733139, 0.12702248, 0.20147271, 0.031689152, 0.07603208, 0.15610643, 0.16600998, -0.041932072, -0.087021336)); + target3 += mul(ne1, float4x4(0.15945607, -0.019792518, 0.16893104, 0.047684517, -0.08704263, 0.019054385, -0.13532451, 0.07722914, 0.06000842, -0.053279165, -0.041631456, 0.021691417, -0.05814861, 0.0014272713, -0.2269319, 0.0764104)); + target3 += mul(nf1, float4x4(-0.084321365, -0.2361291, -0.1518955, -0.15901338, -0.06990816, -0.024734944, 0.06835628, -0.21718912, -0.12289749, -0.025446652, -0.15737066, -0.010520588, 0.12629907, -0.06181239, -0.0011575993, -0.004076976)); + target3 += mul(ng1, float4x4(0.012631871, 0.023027385, 0.0036474608, 0.02950606, -0.13008296, 0.098362945, 0.04146146, 0.17968152, -0.15123938, 0.09731617, -0.014078934, 0.05166318, -0.009141391, 0.08204638, 0.07045137, -0.030674614)); + target3 += mul(nh1, float4x4(0.109709226, -0.02842136, -0.07762395, -0.010807984, -0.17060421, 0.0826962, 0.03507386, -0.12764347, 0.12828389, -0.051255893, -0.124972954, -0.16426642, -0.15884088, 0.07268723, -0.0030184009, -0.009351197)); + target3 += mul(ni1, float4x4(-0.05924065, 0.109954804, -0.015081119, -0.30813795, 0.049611736, -0.09356052, 0.14393319, 0.2197319, 0.04127852, -0.083522744, -0.20068535, -0.1432542, 0.061216276, 0.040896352, -0.0010942877, 0.1074572)); + target3 += mul(na2, float4x4(-0.043747675, -0.09601221, -0.029208777, -0.3020336, -0.18261817, -0.076463126, 0.02404145, 0.021356242, -0.115703, 0.18811412, 0.01355199, -0.18233287, -0.164117, 0.10521931, 0.033724364, 0.045072973)); + target3 += mul(nb2, float4x4(-0.14719059, -0.12931113, 0.15695307, -0.16798888, 0.062653124, -0.12612487, -0.12454781, -0.084084496, 0.023468291, 0.027891247, 0.0042489907, -0.1077923, -0.005104954, -0.121897295, 0.08160336, 0.23735033)); + target3 += mul(nc2, float4x4(-0.06651707, -0.15773214, -0.016145034, -0.1297115, -0.05631942, 0.19243148, -0.08536315, -0.2202384, 0.024619251, 0.09842469, -0.060476214, 0.1606162, -0.06982684, 0.27481422, -0.0032873556, -0.055477414)); + target3 += mul(nd2, float4x4(0.013625612, -0.11602345, 0.13228852, -0.01016997, -0.113034405, 0.12990026, 0.008144483, 0.28583318, 0.0018612862, 0.19464394, 0.06077795, -0.05083094, -0.1419072, 0.30847812, 0.16012973, -0.043837596)); + target3 += mul(ne2, float4x4(0.25535858, 0.047635876, 0.20499952, 0.14458135, -0.2067339, 0.18970652, 0.18168713, 0.089201, -0.1371205, 0.09543299, -0.048719935, -0.21094483, 0.06297616, -0.14864779, 0.24678773, 0.023468606)); + target3 += mul(nf2, float4x4(-0.024188349, 0.049452, 0.119040206, 0.19403425, 0.15611161, 0.20774378, -0.10905696, -0.16743217, -0.067075364, 0.02012775, 0.031936057, 0.16447093, -0.14523768, 0.12793602, 0.21358742, 0.1580285)); + target3 += mul(ng2, float4x4(0.12834404, -0.23567453, 0.0594437, 0.1590165, 0.04364869, 0.092662945, 0.19947445, 0.13371125, -0.030953676, 0.072429836, 0.00064696936, 0.05223404, -0.18505633, -0.038344953, 0.1609896, -0.027951878)); + target3 += mul(nh2, float4x4(0.1615281, 0.02925065, -0.110526, 0.002472878, 0.15692636, 0.17720695, 0.08651831, -0.2926173, 0.039506726, 0.08039181, -0.125379, -0.112809196, -0.018160323, -0.15315212, 0.05300267, -0.12539586)); + target3 += mul(ni2, float4x4(0.045024972, -0.026277857, -0.13403505, -0.082753636, -0.014246987, 0.08158673, -0.17446561, -0.12912557, -0.03281638, 0.12861331, -0.048045747, 0.008813668, 0.13716908, -0.1772549, 0.12983966, 0.28312683)); + target3 += mul(na3, float4x4(0.06964638, 0.0047901543, 0.09235384, 0.24047932, -0.0034995198, 0.1894994, 0.044509877, 0.08263613, 0.22042292, 0.0068810997, -0.08542091, 0.13489819, -0.017957956, -0.049517035, 0.11637685, -0.070710674)); + target3 += mul(nb3, float4x4(0.005409427, 0.2764383, 0.100069076, 0.0025022945, 0.042582463, -0.07622942, 0.1427979, 0.12527353, 0.07857632, 0.110723145, -0.091726854, 0.18400952, 0.08911038, -0.11033729, 0.025358237, -0.011007877)); + target3 += mul(nc3, float4x4(0.041533705, -0.038725346, 0.09127384, 0.10426011, -0.02070303, 0.0878809, 0.15809457, -0.009334662, -0.049823076, 0.11527338, -0.06646191, 0.03342348, 0.07330054, 0.011010275, 0.16572441, 0.059434716)); + target3 += mul(nd3, float4x4(0.01884174, 0.024791235, 0.063296616, -0.042403292, -0.12980534, -0.019906277, -0.18554951, -0.09545456, 0.17291631, 0.22148399, -0.093014, -0.07421902, -0.15626103, -0.13463756, -0.08697246, 0.18189901)); + target3 += mul(ne3, float4x4(-0.027780509, 0.061554506, 0.18972316, 0.017942533, -0.012191195, 0.047828108, 0.102957085, -0.15932114, -0.13597767, 0.2235027, 0.13829249, 0.11061467, -0.20257929, -0.062691554, 0.06993067, 0.018168231)); + target3 += mul(nf3, float4x4(0.0038817637, 0.053267647, -0.1002687, -0.1239985, 0.04858564, 0.059892915, -0.10344583, 0.24931516, -0.02322075, -0.07354648, 0.20486975, 0.0147269, 0.09117062, 0.0001810227, 0.0011455072, -0.1166342)); + target3 += mul(ng3, float4x4(0.026433034, -0.010127757, 0.1411767, 0.12108788, -0.16191758, -0.06574798, -0.027283505, 0.052705772, -0.09186127, -0.05113535, -0.008512441, 0.06438505, 0.07150241, 0.096780665, 0.14615399, 0.043888208)); + target3 += mul(nh3, float4x4(-0.07171402, 0.053826947, 0.1817855, 0.15776771, 0.020122573, 0.014001945, 0.107657574, 0.06755519, -0.16229364, 0.025698826, 0.19443901, -0.18386869, -0.112747826, 0.19832937, 0.032073986, 0.07755969)); + target3 += mul(ni3, float4x4(-0.0017903978, 0.017006857, -0.154056, -0.12544118, -0.17143774, 0.11694203, 0.046639796, -0.13699242, 0.1032892, -0.16337542, 0.20032221, 0.30423567, -0.09217524, 0.03736137, 0.06391171, 0.18111771)); + target3 += float4(0.11033049, -0.073737, -0.013228117, 0.01553484); + + float3 target4 = tex7.SampleLevel(sam, pos, 0).rgb; + target4 += mul(e1, float4x3(0.060458526, -0.0033674864, -0.006985535, -0.013925546, 0.051077038, 0.053856038, -0.033647064, 0.043235198, 0.05311577, 0.0391791, -0.044376004, -0.054064214)); + target4 += mul(e2, float4x3(0.0069859014, -0.0050665336, -0.010343517, -0.027551029, 0.049856182, 0.058316905, 0.0121670095, -0.013107907, -0.0151846, 0.007648614, -0.0051277154, -0.0053846613)); + target4 += mul(e3, float4x3(0.06848036, 0.026777437, 0.024801696, -0.08711668, 0.049429595, 0.067019165, -0.09006778, -0.042166695, -0.02230536, -0.048024856, -0.020088708, -0.009932858)); + target4 += mul(ne1, float4x3(-0.05171447, 0.0029948682, 0.014913949, 0.02287364, -0.042476606, -0.052956346, 0.02762833, -0.044026252, -0.056759696, -0.0519502, 0.047626793, 0.06422155)); + target4 += mul(ne2, float4x3(-0.0031128856, 0.013134638, 0.021534251, 0.049189907, -0.039677586, -0.057255603, -0.009908353, -0.0013683038, 0.0028079485, 0.0002268831, 0.012356764, 0.009817244)); + target4 += mul(ne3, float4x3(-0.04058634, -0.01822148, -0.014306331, 0.107378654, -0.04138371, -0.058573496, 0.03701269, -0.009420217, -0.02310707, 0.039931968, 0.001769326, -0.007929419)); + + tex4[gxy] = target1; + tex5[gxy] = target2; + tex6[gxy] = target3; + tex8[gxy] = float4(target4, 1); +} + +//!PASS 7 +//!DESC Conv-4x3x3x24 +//!IN tex4, tex5, tex6, tex8 +//!OUT tex1, tex2, tex3, tex7 +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 + +void Pass7(uint2 blockStart, uint3 threadId) { + uint2 gxy = Rmp8x8(threadId.x) + blockStart; + uint2 inputSize = GetInputSize(); + if (gxy.x >= inputSize.x || gxy.y >= inputSize.y) { + return; + } + + float2 inputPt = GetInputPt(); + float2 pos = (gxy + 0.5f) * inputPt; + + // [ a, d, g ] + // [ b, e, h ] + // [ c, f, i ] + float4 a1 = tex4.SampleLevel(sam, pos + float2(-inputPt.x, -inputPt.y), 0); + float4 b1 = tex4.SampleLevel(sam, pos + float2(-inputPt.x, 0), 0); + float4 c1 = tex4.SampleLevel(sam, pos + float2(-inputPt.x, inputPt.y), 0); + float4 d1 = tex4.SampleLevel(sam, pos + float2(0, -inputPt.y), 0); + float4 e1 = tex4.SampleLevel(sam, pos, 0); + float4 f1 = tex4.SampleLevel(sam, pos + float2(0, inputPt.y), 0); + float4 g1 = tex4.SampleLevel(sam, pos + float2(inputPt.x, -inputPt.y), 0); + float4 h1 = tex4.SampleLevel(sam, pos + float2(inputPt.x, 0), 0); + float4 i1 = tex4.SampleLevel(sam, pos + float2(inputPt.x, inputPt.y), 0); + + float4 na1 = max(-a1, 0); + float4 nb1 = max(-b1, 0); + float4 nc1 = max(-c1, 0); + float4 nd1 = max(-d1, 0); + float4 ne1 = max(-e1, 0); + float4 nf1 = max(-f1, 0); + float4 ng1 = max(-g1, 0); + float4 nh1 = max(-h1, 0); + float4 ni1 = max(-i1, 0); + + a1 = max(a1, 0); + b1 = max(b1, 0); + c1 = max(c1, 0); + d1 = max(d1, 0); + e1 = max(e1, 0); + f1 = max(f1, 0); + g1 = max(g1, 0); + h1 = max(h1, 0); + i1 = max(i1, 0); + + float4 a2 = tex5.SampleLevel(sam, pos + float2(-inputPt.x, -inputPt.y), 0); + float4 b2 = tex5.SampleLevel(sam, pos + float2(-inputPt.x, 0), 0); + float4 c2 = tex5.SampleLevel(sam, pos + float2(-inputPt.x, inputPt.y), 0); + float4 d2 = tex5.SampleLevel(sam, pos + float2(0, -inputPt.y), 0); + float4 e2 = tex5.SampleLevel(sam, pos, 0); + float4 f2 = tex5.SampleLevel(sam, pos + float2(0, inputPt.y), 0); + float4 g2 = tex5.SampleLevel(sam, pos + float2(inputPt.x, -inputPt.y), 0); + float4 h2 = tex5.SampleLevel(sam, pos + float2(inputPt.x, 0), 0); + float4 i2 = tex5.SampleLevel(sam, pos + float2(inputPt.x, inputPt.y), 0); + + float4 na2 = max(-a2, 0); + float4 nb2 = max(-b2, 0); + float4 nc2 = max(-c2, 0); + float4 nd2 = max(-d2, 0); + float4 ne2 = max(-e2, 0); + float4 nf2 = max(-f2, 0); + float4 ng2 = max(-g2, 0); + float4 nh2 = max(-h2, 0); + float4 ni2 = max(-i2, 0); + + a2 = max(a2, 0); + b2 = max(b2, 0); + c2 = max(c2, 0); + d2 = max(d2, 0); + e2 = max(e2, 0); + f2 = max(f2, 0); + g2 = max(g2, 0); + h2 = max(h2, 0); + i2 = max(i2, 0); + + float4 a3 = tex6.SampleLevel(sam, pos + float2(-inputPt.x, -inputPt.y), 0); + float4 b3 = tex6.SampleLevel(sam, pos + float2(-inputPt.x, 0), 0); + float4 c3 = tex6.SampleLevel(sam, pos + float2(-inputPt.x, inputPt.y), 0); + float4 d3 = tex6.SampleLevel(sam, pos + float2(0, -inputPt.y), 0); + float4 e3 = tex6.SampleLevel(sam, pos, 0); + float4 f3 = tex6.SampleLevel(sam, pos + float2(0, inputPt.y), 0); + float4 g3 = tex6.SampleLevel(sam, pos + float2(inputPt.x, -inputPt.y), 0); + float4 h3 = tex6.SampleLevel(sam, pos + float2(inputPt.x, 0), 0); + float4 i3 = tex6.SampleLevel(sam, pos + float2(inputPt.x, inputPt.y), 0); + + float4 na3 = max(-a3, 0); + float4 nb3 = max(-b3, 0); + float4 nc3 = max(-c3, 0); + float4 nd3 = max(-d3, 0); + float4 ne3 = max(-e3, 0); + float4 nf3 = max(-f3, 0); + float4 ng3 = max(-g3, 0); + float4 nh3 = max(-h3, 0); + float4 ni3 = max(-i3, 0); + + a3 = max(a3, 0); + b3 = max(b3, 0); + c3 = max(c3, 0); + d3 = max(d3, 0); + e3 = max(e3, 0); + f3 = max(f3, 0); + g3 = max(g3, 0); + h3 = max(h3, 0); + i3 = max(i3, 0); + + float4 target1 = mul(a1, float4x4(-0.0041438183, 0.087629646, 0.02373779, -0.008705929, -0.06460613, -0.079614826, 0.20589171, -0.21300887, 0.06673036, -0.14301205, 0.0005478004, 0.10480311, 0.16944528, -0.023095177, -0.04593122, 0.031710908)); + target1 += mul(b1, float4x4(0.24273445, 0.1350743, -0.050578117, -0.006424492, 0.024859063, 0.017022807, -0.054993033, -0.13135757, -0.11061301, 0.0006009131, -0.012896671, -0.029120278, -0.09564777, -0.15695906, -0.008574818, 0.0022726357)); + target1 += mul(c1, float4x4(-0.11845177, 0.044411838, -0.02478517, -0.016679568, 0.2842885, 0.05566886, -0.020992488, 0.33000243, -0.045738284, -0.08624307, -0.0029711786, 0.06983461, 0.16860297, -0.08496602, 0.0026587378, 0.1191108)); + target1 += mul(d1, float4x4(0.08942806, -0.13266312, 0.050555114, 0.044336855, 0.04668655, -0.17912517, 0.09872363, -0.05689603, -0.04764076, 0.09976931, 0.026714336, -0.12177113, 0.10121553, 0.19926491, -0.013922513, -0.062807985)); + target1 += mul(e1, float4x4(-0.11948707, -0.19019963, -0.09910906, 0.015228854, 0.19573943, 0.18543078, 0.37633705, 0.0899833, -0.058247276, -0.06500262, -0.0968551, 0.3980007, -0.13930885, 0.031145731, 0.18868047, 0.20646492)); + target1 += mul(f1, float4x4(-0.27454132, 0.037422657, 0.060829625, -0.15062498, 0.22120185, -0.020640798, -0.15796806, 0.30988604, 0.117011115, -0.11581356, -0.105670854, 0.34526885, 0.09709533, -0.1335589, -0.061150175, -0.023490202)); + target1 += mul(g1, float4x4(-0.0064297495, 0.053259544, 0.061699186, -0.1023013, 0.13206881, 0.08598005, 0.042804673, -0.036392808, -0.022715596, 0.3187674, -0.043576453, 0.089301124, 0.010875903, -0.045669887, 0.13546628, -0.041321605)); + target1 += mul(h1, float4x4(0.033168443, 0.07130571, -0.06795218, -0.094012216, 0.09050034, -0.16879193, 0.18427128, 0.19835915, 0.014528693, 0.22958101, -0.012955512, 0.14033306, 0.10309811, 0.03351618, -0.100021325, -0.026367364)); + target1 += mul(i1, float4x4(-0.40170196, -0.10989097, 0.06447425, -0.19903958, 0.030508196, -0.09201532, -0.1493947, -0.0039443234, 0.16646437, -0.004893318, 0.030999044, 0.22652404, -0.1360666, -0.14109057, -0.124136284, -0.07020125)); + target1 += mul(a2, float4x4(-0.35415915, 0.078341804, -0.20908163, -0.032414813, -0.17489177, -0.10121671, -0.0123754265, -0.0074867755, 0.20203647, 0.2981116, 0.4581744, -0.10773967, -0.14040758, -0.1311706, 0.2421585, -0.05221277)); + target1 += mul(b2, float4x4(0.32388586, 0.121117495, 0.17030708, -0.09672408, 0.10174964, -0.089880064, -0.053550195, 0.07492085, 0.36688468, 0.39096692, 0.27509093, -0.09113504, 0.18473786, -0.030729344, -0.022813018, -0.07951988)); + target1 += mul(c2, float4x4(-0.10802985, -0.09921729, -0.083578154, 0.09941307, 0.15204535, 0.0048476397, 0.037141923, 0.072919704, -0.039613035, 0.0011554313, 0.029029889, -0.115339264, -0.2606713, 0.017305905, -0.032651994, -0.1710926)); + target1 += mul(d2, float4x4(-0.09530024, 0.08035671, -0.094462946, 0.04531403, 0.116854094, -0.039871104, 0.101754196, 0.07071469, -0.09344735, 0.2224399, 0.31438616, -0.1031509, -0.087050706, 0.023629284, 0.30222768, 0.087091036)); + target1 += mul(e2, float4x4(0.32540318, -0.123871066, 0.09114808, 0.20059493, 0.13602751, -0.294147, 0.028020037, 0.10215196, 0.14379483, -0.08321783, -0.06476323, 0.039079703, 0.11145182, 0.047562934, -0.0320396, 0.17505427)); + target1 += mul(f2, float4x4(0.117524795, 0.063353635, -0.08187684, -0.02796676, 0.11098208, -0.02517451, 0.052513797, -0.18859608, -0.25639486, 0.17382553, 0.053182043, -0.09802817, -0.08900308, 0.021651518, -0.07654097, -0.111615546)); + target1 += mul(g2, float4x4(-0.12933804, 0.0012732261, -0.045028616, 0.06224205, -0.00047467486, -0.26893324, 0.14208493, 0.027069936, 0.16365767, 0.30192706, 0.23923144, -0.105405785, -0.0021433597, 0.14549361, 0.05767389, -0.10113342)); + target1 += mul(h2, float4x4(-0.07045147, 0.13409013, 0.023928098, 0.045560613, 0.103115976, -0.066133045, 0.12823656, -0.01629772, 0.13711633, 0.27451962, 0.12717873, -0.084038205, 0.12807854, 0.110353716, -0.06848678, 0.056276537)); + target1 += mul(i2, float4x4(0.16927746, 0.111806795, 0.023252549, -0.12235242, 0.15292254, 0.061406262, 0.06284062, -0.11671832, -0.02885994, 0.12882869, -0.048748255, -0.14202079, -0.08404155, 0.03453428, -0.060811, 0.18254602)); + target1 += mul(a3, float4x4(-0.011917425, 0.023498498, 0.0072831116, -0.05328629, 0.3426947, 0.08741361, 0.35501662, 0.045255594, 0.08008512, -0.002467051, -0.053357143, -0.05487847, 0.15113881, -0.050046794, -0.036305785, 0.06071048)); + target1 += mul(b3, float4x4(-0.012859317, 0.06900528, -0.08498363, -0.08625659, -0.094864994, -0.04425656, -0.0071134693, 0.07542594, -0.08952303, -0.14963494, 0.115062006, 0.073727705, -0.06841927, 0.030572297, -0.060809616, -0.14095046)); + target1 += mul(c3, float4x4(0.29680476, -0.070317306, -0.056082696, 0.27471995, 0.109471574, -0.012238972, 0.16928561, -0.12685184, -0.100722544, 0.116650775, 0.054211635, -0.06463175, -0.13047734, -0.070404656, -0.08516014, -0.11477897)); + target1 += mul(d3, float4x4(0.058439, -0.1555504, -0.096580744, -0.024473842, 0.090628244, 0.04928509, 0.02740108, 0.0077335024, 0.026813101, 0.065165296, -0.059121966, 0.08125537, 0.16700324, -0.16615666, -0.14588222, 0.00048067764)); + target1 += mul(e3, float4x4(-0.053213652, -0.16659884, -0.09036764, 0.010975479, -0.11077762, 0.11982606, 0.02579046, -0.13114569, 0.17622563, 0.023344778, 0.080385335, -0.08998645, -0.18493009, -0.048734408, 0.010119995, 0.12936613)); + target1 += mul(f3, float4x4(-0.08402194, -0.16797844, -0.01022614, 0.09084325, 0.24871092, 0.13302508, -0.1210408, -0.04133277, -0.08691682, 0.02221635, 0.12621205, -0.15186077, 0.19762659, -0.10951936, -0.19129583, 0.21391307)); + target1 += mul(g3, float4x4(-0.1687245, 0.16445398, -0.06853974, -0.086989194, -0.14615493, -0.009716202, -0.088772245, 0.13583103, -0.08530893, -0.09424376, -0.12971476, -0.02487141, -0.1094553, -0.04473294, -0.27410263, 0.043002244)); + target1 += mul(h3, float4x4(0.03290918, -0.006952538, -0.12306263, 0.027640607, -0.025346387, -0.09620494, 0.116112545, 0.10227404, 0.03813908, 0.16176395, 0.47203362, 0.047157902, -0.10830938, -0.0019050312, 0.3620803, -0.069925636)); + target1 += mul(i3, float4x4(0.0020446004, 0.16054538, 0.12809694, 0.0069585256, 0.11748204, -0.011759154, -0.12903488, 0.29380128, 0.21712495, 0.068177566, 0.059223883, 0.10227324, 0.3817376, -0.11270308, 0.0073445877, 0.21012813)); + target1 += mul(na1, float4x4(-0.199299, -0.040114038, -0.15849929, 0.0057354206, 0.19681698, -0.107773945, -0.04031948, 0.12012136, -0.22728048, 0.045971204, -0.12776788, 0.025411135, -0.2745491, -0.113476306, -0.015801609, 0.008725868)); + target1 += mul(nb1, float4x4(-0.28201059, -0.069104806, 0.015983578, -0.103806704, 0.121411614, -0.09251776, -0.08143648, 0.21460037, -0.07785157, 0.101122744, 0.013448072, -0.023710037, -0.0358346, 0.1328456, -0.02043331, -0.06159447)); + target1 += mul(nc1, float4x4(0.06781508, -0.072408475, 0.083291575, 0.040496554, 0.04679973, 0.12705597, 0.06562132, -0.04938638, 0.21427007, -0.004967686, -0.08138591, 0.033386033, -0.048481766, 0.076613255, 0.21033032, -0.05062305)); + target1 += mul(nd1, float4x4(-0.21217471, 0.13806537, 0.04606568, -0.13743265, 0.1806969, -0.085699804, -0.06342818, 0.1660658, -0.0026293355, -0.02128403, -0.0046605268, 0.008235694, -0.1171583, -0.24562967, -0.28818226, 0.12968758)); + target1 += mul(ne1, float4x4(0.17914222, 0.12522437, -0.14189677, -5.616129e-05, 0.21868588, -0.24404518, -0.12704019, 0.25512457, 0.11127853, 0.043490496, -0.0034969563, -0.1935092, -0.12618113, 0.15022264, 0.10067992, -0.15296605)); + target1 += mul(nf1, float4x4(0.059839483, -0.07332882, -0.0026434374, 0.22739156, 0.04557501, -0.03867732, 0.21676865, -0.058800567, 0.006406612, -0.011612252, 0.009007284, 0.059830897, 0.1614946, -0.07674529, -0.0385602, 0.39797354)); + target1 += mul(ng1, float4x4(-0.1981268, -0.1361051, -0.06161995, -0.002189435, -0.0014002474, 0.126129, 0.023376467, 0.09703216, 0.10666224, -0.23168142, -0.018159337, 0.042339746, 0.12584367, -0.011922057, 0.10902402, 0.15436263)); + target1 += mul(nh1, float4x4(0.0027595635, -0.10197207, -0.034429558, 0.06667168, 0.33573776, -0.099396594, -0.07997797, 0.08387646, 0.0951511, -0.16234699, -0.14867416, 0.00735437, -0.09362014, 0.0664804, 0.27731436, 0.37119982)); + target1 += mul(ni1, float4x4(0.2548695, 0.028097544, -0.0022558135, 0.026973823, 0.1884029, -0.07246545, 0.21642277, 0.026800772, -0.19520886, -0.0009553605, 0.0062482627, -0.16592918, 0.48447585, 0.086303264, -0.05490935, 0.378503)); + target1 += mul(na2, float4x4(0.1574428, 0.035142746, 0.079227954, 0.100714244, 0.11136245, 0.11895534, 0.009833678, -0.001039115, -0.069387674, -0.010426503, -0.10678969, 0.101909705, -0.031729374, 0.15894724, -0.23622003, -0.011815657)); + target1 += mul(nb2, float4x4(-0.17458418, -0.120001495, 0.09203402, -0.002166517, 0.0031753816, 0.12831944, 0.16465144, -0.06330301, -0.24267045, -0.12281286, 0.052246343, 0.02494283, -0.18964235, 0.058346782, 0.0025673895, -0.01121613)); + target1 += mul(nc2, float4x4(0.115957834, -0.060228895, 0.009079297, -0.040949136, 0.014297083, 0.036444042, 0.12076215, -0.1402084, 0.09574682, -0.06670408, 0.029599207, 0.04741757, 0.01102373, -0.05027519, 0.13449037, -0.099299684)); + target1 += mul(nd2, float4x4(-0.029986456, -0.045808725, -0.05172542, -0.10101369, 0.03663162, 0.039696075, -0.08842631, -0.117827855, 0.1347963, -0.007392197, -0.05730133, -0.04402969, 0.13403495, 0.28114837, 0.17730127, -0.07764935)); + target1 += mul(ne2, float4x4(-0.34972468, 0.006863505, -0.068723604, -0.30767044, 0.12904535, 0.0763381, -0.037620995, 0.028365362, -0.08700267, 0.2257665, 0.14819853, -0.16082688, 0.0929386, -0.0062676766, 0.17218679, -0.16327891)); + target1 += mul(nf2, float4x4(-0.17909175, -0.09134105, -0.0057606776, -0.083825834, 0.1443505, 0.1877781, 0.02841784, 0.1146964, 0.3169764, 0.018749984, 0.19640554, -0.0014817682, -0.27608246, -0.080467306, -0.13688186, -0.06578604)); + target1 += mul(ng2, float4x4(0.02515703, -0.03203328, 0.06439871, -0.06689986, -0.004256959, 0.17631707, 0.042148568, -0.088977, 0.07314368, -0.18564323, -0.11051338, -0.032011528, 0.3711881, 0.495717, 0.21411352, -0.0066381986)); + target1 += mul(nh2, float4x4(-0.05550901, 0.06970293, -0.06802052, -0.022730853, 0.0143414615, 0.096654266, -0.045230158, 0.03669965, -0.08298829, -0.1573773, 0.12953721, -0.042050414, 0.04308049, 0.11458007, 0.0072063627, -0.18453878)); + target1 += mul(ni2, float4x4(-0.16849747, 0.051144414, 0.020992253, -0.09341655, 0.05105659, 0.042700652, -0.06062117, 0.13699457, 0.2397991, -0.009917461, -0.059426248, 0.09855892, -0.28842947, 0.1404379, -0.022812406, -0.23883702)); + target1 += mul(na3, float4x4(0.10231295, -0.05687462, 0.05454633, 0.1353426, 0.1760176, -0.11181645, -0.31677356, 0.06983046, 0.13605112, 0.17754814, 0.3348445, -0.1652707, -0.061019715, 0.1773025, -0.30495015, 0.11278704)); + target1 += mul(nb3, float4x4(0.13603285, 0.10336861, -0.023782251, 0.13608527, -0.4052799, 0.14841305, -0.25663885, -0.012108956, 0.28822663, 0.04447834, -0.05276655, -0.18212605, -0.20188917, 0.10997185, 0.06183931, -0.055857945)); + target1 += mul(nc3, float4x4(-0.2237108, 0.24488361, 0.18851626, -0.07019121, -0.021184865, -0.0499757, 0.026765132, -0.09804875, -0.011333142, -0.108678274, 0.040759776, -0.037615996, 0.14195605, -0.17333975, 0.09601836, 0.14565407)); + target1 += mul(nd3, float4x4(0.12259593, 0.27562442, 0.24215461, 0.14960998, 0.08186383, -0.010550085, -0.019250091, -0.014648717, 0.14972208, 0.14603175, 0.10073407, -0.1225431, 0.1675907, 0.038280413, -0.06087625, 0.0130648045)); + target1 += mul(ne3, float4x4(0.30968392, 0.11772451, -0.08816913, 0.12534001, -0.050786596, -0.21509898, -0.04253493, -0.04734682, 0.13719988, -0.09571686, -0.3094301, -0.08568065, -0.10093176, 0.024763435, 0.18954168, -0.227629)); + target1 += mul(nf3, float4x4(-0.22520582, 0.18443918, 0.14025666, -0.18477283, -0.12125983, 0.010999684, -0.0024025543, 0.24120031, -0.13416757, 0.01567192, -0.013440386, 0.17282273, 0.16098748, -0.02793626, 0.15618294, -0.0131627675)); + target1 += mul(ng3, float4x4(0.23410907, 0.019564115, -0.0076426617, -0.09377979, -0.47939178, -0.06636784, -0.0011904492, -0.09345677, -0.14794281, 0.25343522, -0.21156238, -0.01817268, 0.12250443, -0.0032213917, -0.19294205, 0.026571818)); + target1 += mul(nh3, float4x4(-0.066518046, -0.011708588, -0.007350381, -0.16976248, 0.09265956, 0.08236158, 0.12594578, 0.021188073, -0.2299054, -0.12767331, -0.098674, 0.035027504, -0.1722649, -0.15037538, 0.037455063, -0.027518287)); + target1 += mul(ni3, float4x4(-0.040520877, -0.17789118, 0.0535865, -0.15534161, 0.09352957, 0.11459578, -0.15315403, 0.04562035, -0.0015360791, 0.047871828, -0.021276174, 0.35346803, -0.10936083, 0.057735037, -0.089098595, 0.0057320776)); + target1 += float4(-0.12162919, -0.00032382424, 0.025486631, -0.09447538); + + float4 target2 = mul(a1, float4x4(-0.024318032, 0.062261496, 0.028226431, 0.063416876, -0.122350864, -0.0113668, 0.061698295, -0.22892742, -0.21282825, -0.30799037, -0.020646222, -0.21302511, 0.050188534, -0.03943688, -0.078553416, 0.010918215)); + target2 += mul(b1, float4x4(-0.0064165345, 0.082449056, -0.03667216, 0.026472934, -0.021514278, 0.17880541, 0.39611253, -0.17107382, 0.06770686, -0.053641487, 0.002025645, 0.09812659, -0.07990987, -0.08550891, 0.00025631645, -0.10817648)); + target2 += mul(c1, float4x4(-0.11507329, -0.06074527, 0.007052484, 0.015466066, 0.0675046, 0.28604895, -0.020563968, 0.04284168, -0.10729741, -0.103069924, 0.028218608, 0.2833194, 0.11628834, -0.06599205, -0.10394839, 0.13991328)); + target2 += mul(d1, float4x4(0.14225487, 0.08203055, 0.027650036, 0.1459416, -0.013772616, 0.23131026, 0.044769842, 0.27454084, -0.047555555, 0.05384277, -0.09042822, -0.16309428, 0.040359538, 0.19854581, -0.026278, 0.1577506)); + target2 += mul(e1, float4x4(-0.0091988975, -0.05603158, 0.08112747, 0.014755933, -0.50124913, 0.26424783, 0.1621611, -0.3766593, 0.15138763, 0.08449643, -0.16496105, 0.42882624, -0.010958174, 0.09773749, 0.22436622, -0.09687365)); + target2 += mul(f1, float4x4(-0.019358287, 0.025669195, 0.290994, 0.02750369, 0.28040195, 0.24038815, 0.08250993, 0.021609074, -0.040725835, -0.19103482, -0.10284562, 0.022636155, 0.050841074, 0.0030245516, -0.23331137, 0.15245193)); + target2 += mul(g1, float4x4(0.0992156, -0.09854949, 0.075423576, 0.008634914, 0.062402267, -0.22020867, -0.07628636, -0.055416584, -0.10278129, 0.117922865, 0.13292609, -0.011894427, 0.16825698, -0.036205966, 0.1424532, 0.10553304)); + target2 += mul(h1, float4x4(0.19908716, -0.12244845, 0.01669312, -0.01248478, -0.009518143, -0.08615178, 0.39116043, -0.52616054, 0.11156954, -0.115720086, -0.07697886, 0.23553406, 0.017087052, 0.016129963, 0.24723524, -0.11207272)); + target2 += mul(i1, float4x4(0.033391032, -0.1495619, -0.09304159, 0.30421168, 0.13344899, -0.31858364, -0.081601165, 0.13551356, 0.032184854, 0.016566517, -0.16247925, 0.034869343, 0.04001544, -0.08231552, -0.18482871, 0.19266751)); + target2 += mul(a2, float4x4(0.21768865, 0.012509539, -0.16523208, 0.22101055, -0.017112812, 0.12730962, -0.066268146, -0.05613703, 0.021577986, 0.24617495, 0.15244165, -0.08514145, -0.10427943, 0.17322995, 0.25568137, -0.015480765)); + target2 += mul(b2, float4x4(0.07753385, 0.021704786, 0.23479357, -0.21051238, -0.009220801, 0.20936434, -0.077434614, -0.09195854, -0.34075132, 0.17316882, 0.11968564, -0.021970788, 0.15152359, 0.28213486, 0.07805407, 0.099207774)); + target2 += mul(c2, float4x4(0.054490507, 0.07500978, -0.08916167, 0.22030471, 0.07036594, 0.1673276, 0.01864345, 0.0027516915, -0.39270175, -0.03433242, -0.17433889, -0.18174602, 0.044357035, -0.04678205, 0.11330789, 0.047382314)); + target2 += mul(d2, float4x4(0.07965972, -0.2201543, 0.18386759, -0.080045894, 0.04141404, -0.027790288, 0.032212794, -0.021278335, -0.070643224, 0.05221597, -0.06377366, 0.065172255, -0.18978727, 0.092385, -0.17461243, 0.2500567)); + target2 += mul(e2, float4x4(-0.048105214, 0.43421936, -0.11871231, -0.12232125, 0.06071036, -0.07797472, -0.13819577, -0.14363539, -0.003262046, 0.05031809, -0.103945084, -0.22375908, -0.36861306, 0.25518808, 0.04773121, -0.22608627)); + target2 += mul(f2, float4x4(-0.094031096, -0.011887294, -0.08532428, 0.112617865, 0.06823757, 0.21326852, 0.109153405, -0.3117106, -0.22819358, 0.123445965, -0.066512406, -0.21115267, -0.080148704, 0.12793726, -0.20465335, -0.104592934)); + target2 += mul(g2, float4x4(0.045067977, -0.2181705, -0.0677207, 0.13714351, -0.098488234, 0.19015153, -0.09273758, -0.0746141, 0.032907944, -0.006554721, 0.045943078, -0.2017389, -0.07914341, -0.085856505, -0.22186919, -0.049897686)); + target2 += mul(h2, float4x4(-0.10116989, -0.10004126, 0.09973816, -0.056045264, -0.18085082, 0.105252974, 0.11094914, -0.27471054, 0.20055285, -0.15355913, -0.080244385, -0.07118461, 0.02517136, -0.09862167, 0.22725868, -0.06279268)); + target2 += mul(i2, float4x4(0.10015747, -0.22263162, -0.014078088, -0.08387323, 0.005140913, 0.03506062, 0.18977262, -0.1479168, -0.03378466, -0.15656684, -0.061233502, -0.21884726, -0.24339373, -0.06372294, 0.12688471, -0.10735916)); + target2 += mul(a3, float4x4(0.033982676, 0.05078853, -0.1282201, -0.0035882539, 0.08219379, -0.0116551975, 0.22077334, 0.04950106, -0.08306263, -0.03258243, -0.09699666, 0.09209884, 0.24061108, -0.040557686, 0.070444405, 0.28183722)); + target2 += mul(b3, float4x4(-0.17872535, -0.13406444, -0.034040287, 0.03047437, -0.06435232, -0.24566554, 0.0670411, -0.024581233, -0.107877605, 0.08638364, -0.25626892, 0.044232026, 0.060273834, -0.16846469, 0.43043453, -0.1603817)); + target2 += mul(c3, float4x4(-0.22682182, 0.15527044, -0.08887372, -0.043433297, 0.028202614, -0.1919475, 0.2581379, -0.28678998, 0.040917493, -0.023046691, 0.20005395, -0.103288084, 0.009493088, -0.018459544, 0.081757404, 0.054610446)); + target2 += mul(d3, float4x4(-0.022377692, 0.008678131, -0.1065251, 0.2628791, -0.009904344, 0.10677991, -0.040256146, -0.116764925, 0.03182517, 0.11810951, -0.052380614, 0.30170968, 0.2569954, -0.17379415, -0.007437352, -0.13248402)); + target2 += mul(e3, float4x4(0.1602437, -0.097451374, -0.010258972, 0.12651087, -0.0061891475, 0.078265965, 0.08754248, -0.14903383, -0.07830899, -0.08898991, -0.058010247, 0.23148704, -0.3695693, 0.18824111, -0.07988307, -0.05880814)); + target2 += mul(f3, float4x4(-0.22253856, 0.26592886, -0.03350701, -0.14712897, -0.12118757, 0.19663027, 0.031479847, -0.1554313, -0.028078854, 0.47659087, 0.12390117, -0.11238944, 0.037422795, -0.049916733, -0.2926893, 0.16435196)); + target2 += mul(g3, float4x4(0.075061694, -0.24045657, -0.047069702, -0.09982952, 0.2340634, -0.33556157, -0.037818547, 0.15286541, 0.14214562, 0.02267143, 0.09929496, -0.055981826, 0.21834296, -0.19831084, -0.16977312, 0.08182871)); + target2 += mul(h3, float4x4(0.01741376, 0.08985922, 0.16625583, -0.097267725, 0.17712043, -0.068722576, 0.07060928, 0.09168345, -0.16337997, -0.038742293, -0.04963981, 0.15612502, 0.11807448, -0.08807022, 0.101155974, -0.5563793)); + target2 += mul(i3, float4x4(-0.27598697, -0.062920116, -0.08726363, -0.12058882, -0.07664108, -0.032059796, -0.25070706, 0.030094638, -0.1160773, 0.19200212, 0.18899699, -0.18259315, 0.24458873, 0.12005026, -0.4616454, 0.27545306)); + target2 += mul(na1, float4x4(0.15272795, -0.23518732, 0.030445633, 0.088528365, 0.055305615, -0.12609963, 0.15926869, -0.22551426, 0.040562432, 0.124508515, 0.124815956, -0.0953939, 0.14920413, 0.14798881, -0.14428794, 0.37141335)); + target2 += mul(nb1, float4x4(0.12783955, -0.0540082, 0.014302729, 0.1365942, 0.10768764, -0.16831467, -0.079203665, 0.1425581, 0.019629346, -0.1027023, 0.15957874, -0.29757223, 0.26533285, -0.15765496, 0.35999995, 0.025803005)); + target2 += mul(nc1, float4x4(0.29036346, 0.26730424, 0.12511441, -0.061552685, -0.16372615, -0.026372833, 0.14069465, -0.24948902, 0.028215056, 0.254545, -0.19650677, 0.09530049, 0.055034224, -0.009660105, 0.39131105, -0.11131454)); + target2 += mul(nd1, float4x4(-0.0675603, -0.24606612, 0.0658764, -0.04487242, -0.0043948023, 0.04578745, 0.065714814, -0.12173881, 0.06062957, -0.04769831, 0.017330103, -0.074727364, -0.25047338, -0.30126756, -0.0830633, 0.019802446)); + target2 += mul(ne1, float4x4(0.19933821, 0.08052119, -0.058912043, 0.31624097, 0.18705179, 0.023470681, -0.03783429, -0.04163007, -0.09845593, -0.12975362, 0.2510535, -0.32808807, -0.23654252, 0.3028382, -0.19675751, -0.030597644)); + target2 += mul(nf1, float4x4(0.09338011, -0.0415115, -0.22497573, -0.0028536345, -0.19024974, -0.1604205, 0.115466096, -0.2525424, -0.063761264, -0.20588842, 0.08622651, -0.00097166066, 0.10169425, 0.252253, -0.06758796, 0.23335451)); + target2 += mul(ng1, float4x4(-0.04426442, 0.1095582, -0.085856594, 0.13048999, -0.12778096, 0.2613617, -0.045577575, -0.1526907, 0.1257047, -0.111831486, -0.059892397, 0.15280181, -0.12673315, -0.05033893, -0.2930266, -0.46015793)); + target2 += mul(nh1, float4x4(-0.11951625, 0.03414521, -0.11969193, 0.1869847, 0.111495204, 0.080608666, -0.20057446, 0.10785576, -0.049578592, 0.016259808, 0.0058614444, -0.045524042, 0.0319529, 0.05456559, 0.007678947, 0.33595043)); + target2 += mul(ni1, float4x4(0.10240467, 0.18299319, 0.05753473, -0.02340504, -0.16686855, 0.21292439, 0.11702374, -0.30564633, -0.024081768, -0.088019624, 0.22313595, -0.06672843, 0.055274762, 0.13347326, -0.030782074, -0.35677573)); + target2 += mul(na2, float4x4(-0.075412944, -0.11053347, 0.07465402, -0.014327975, -0.13390768, 0.009061153, 0.027920425, -0.005080267, -0.04721174, -0.06812053, -0.08845801, 0.109399185, -0.04021429, 0.03812722, -0.25037023, -0.019478017)); + target2 += mul(nb2, float4x4(-0.07806179, 0.00493842, -0.02926109, -0.017333046, -0.125423, -0.1364203, 0.09466317, -0.26578787, 0.14311473, -0.0638623, 0.11139706, -0.08727186, -0.06821389, -0.19687861, 0.14772336, -0.10641787)); + target2 += mul(nc2, float4x4(0.027460072, 0.15687883, -0.17656918, 0.037287217, -0.06293563, -0.03923116, 0.037919715, -0.16810033, 0.26675344, -0.06076212, 0.104115106, 0.0798128, -0.023851654, 0.033833887, -0.030991107, 0.20160522)); + target2 += mul(nd2, float4x4(-0.058332916, -0.09243659, -0.24664097, -0.13549158, -0.1218952, 0.15865086, -0.1388978, -0.25030297, 0.045538265, 0.04120175, -0.031994786, -0.13400851, 0.007142682, 0.16071808, 0.04225278, 0.20399003)); + target2 += mul(ne2, float4x4(-0.09599313, -0.15977086, -0.02840129, 0.1264139, -0.0144603, -0.00054464, 0.025552921, -0.09051482, -0.06592454, -0.026247922, -0.06352208, -0.021571407, -0.04439837, -0.07514258, 0.0026004864, 0.23430851)); + target2 += mul(nf2, float4x4(0.09127431, -0.21962664, 0.029265152, -0.3099013, -0.09579088, 0.023516538, -0.08382231, 0.05348487, 0.17067212, -0.16390987, 0.03691037, 0.01566425, 0.18072702, 0.10966007, 0.22929187, 0.23833585)); + target2 += mul(ng2, float4x4(0.083102494, 0.18586425, 0.09552713, -0.22502401, 0.10707524, -0.041579556, -0.040507507, -0.07875607, 0.13548316, 0.065970294, -0.09524086, 0.12988009, -0.19841906, -0.016670253, 0.2779514, 0.0039394014)); + target2 += mul(nh2, float4x4(-0.056897737, -0.022942321, -0.089304574, 0.01799863, -0.031229522, 0.08292495, -0.040067356, -0.09749493, -0.2211719, 0.110088974, 0.05465516, -0.12767765, -0.06458067, -0.17160612, -0.09046756, -0.09943958)); + target2 += mul(ni2, float4x4(-0.20148912, 0.017609052, 0.2321357, -0.07018911, -0.1311024, 0.007025396, -0.3018123, 0.059590653, 0.02093451, 0.2801181, 0.047305427, -0.04511682, 0.02409926, -0.1167535, -0.051785782, -0.022035388)); + target2 += mul(na3, float4x4(-0.050354917, -0.070848934, 0.05680098, -0.15274279, 0.017402016, 0.36217922, -0.5604259, 0.07027285, 0.013515239, -0.024368018, 0.15436645, -0.20279783, -0.009300287, 0.07763277, -0.12982416, 0.018808186)); + target2 += mul(nb3, float4x4(0.06595005, 0.34867665, -0.1158312, -0.11764399, -0.36079824, -0.03821222, -0.019823037, -0.44939035, -0.16058454, 0.0022173142, -0.067403175, 0.094619855, -0.054194376, -0.15860401, 0.031142738, -0.020085743)); + target2 += mul(nc3, float4x4(0.15504256, -0.22207503, -0.037738267, -0.024344966, 0.22112809, -0.084620684, 0.31442386, -0.17054078, -0.14580488, -0.1475954, 0.014907614, -0.009613608, -0.120833494, 0.024163049, 0.055504505, 0.12984537)); + target2 += mul(nd3, float4x4(0.03553467, -0.047465023, 0.127075, -0.17350323, 0.17346224, -0.15783796, 0.15583144, 0.01985312, 0.019021586, -0.03840401, 0.19470496, -0.007293492, -0.17917366, -0.15722491, -0.26070598, -0.2573391)); + target2 += mul(ne3, float4x4(-0.0953191, 0.09084944, 0.25338924, 0.23829061, 0.08905475, -0.02061248, -0.012651722, 0.11955581, 0.239715, -0.2795726, 0.06275163, -0.15498403, -0.042101745, -0.16694753, -0.049197655, 0.06470607)); + target2 += mul(nf3, float4x4(0.07657325, -0.35392562, -0.055532675, -0.18168893, 0.08006482, 0.12548354, -0.17169037, 0.41884392, 0.047854125, -0.13949591, -0.34051692, 0.18265511, 0.082268566, 0.24420416, -0.049996477, -0.018989688)); + target2 += mul(ng3, float4x4(-0.16161917, 0.16816078, 0.018195407, 0.16679527, -0.3412548, 0.14028408, 0.17574453, -0.06049301, -0.01611411, -0.046527516, -0.044087164, 0.25788495, 0.13769192, -0.016161619, 0.041910134, 0.042887107)); + target2 += mul(nh3, float4x4(0.07837116, -0.22945437, -0.05715237, 0.062118188, -0.07539828, 0.22634326, -0.19471732, 0.31986186, 0.15694539, 0.1633341, -0.03029404, 0.056681212, -0.029835409, -0.13129339, 0.19710875, 0.13151285)); + target2 += mul(ni3, float4x4(0.017191496, 0.33163047, -0.026875576, 0.19212759, 0.27074674, 0.17707312, -0.13339694, 0.10855495, -0.18034323, 0.43113244, -0.33985507, 0.316351, 0.0358167, 0.023788683, 0.13152061, -0.019543748)); + target2 += float4(0.091157734, 0.06337161, 0.09025765, 0.07787731); + + float4 target3 = mul(a1, float4x4(-0.10152706, 0.13643685, 0.050397865, 0.10665431, 0.026328163, 0.1460299, 0.2569912, -0.19533697, 0.03801618, 0.0003496284, 0.18598852, -0.22565664, 0.05281963, -0.034972392, -0.14308542, 0.030370854)); + target3 += mul(b1, float4x4(-0.004119863, 0.057859607, -0.2119656, 0.14261195, -0.16826284, -0.25717396, -0.041528255, -0.119776234, -0.1013885, 0.16835499, 0.27712375, 0.11540263, 0.13435264, -0.15992326, -0.011525119, -0.052719552)); + target3 += mul(c1, float4x4(0.015662286, 0.039283197, 0.1298957, 0.14770529, 0.16800109, -0.26307538, -0.043486428, -0.088268735, -0.091123246, -0.02737689, 0.1340816, 0.20996217, 0.108091205, 0.030314112, 0.054512065, 0.012642684)); + target3 += mul(d1, float4x4(0.06709217, -0.05501374, 0.081222005, 0.089457735, 0.18656515, -0.3077529, 0.047672454, 0.024508892, -0.1351014, -0.39228433, -0.10557932, -0.04361972, -0.11915583, -0.009581473, 0.0063169855, -0.03613457)); + target3 += mul(e1, float4x4(-0.1854358, -0.17342652, -0.194473, 0.3151401, -0.051769286, -0.3236325, 0.16018392, -0.057727765, 0.16584621, -0.017418258, -0.3128051, 0.07975532, 0.18611333, 0.026310056, 0.02726216, 0.0067486716)); + target3 += mul(f1, float4x4(-0.110896066, -0.00702464, -0.20931682, 0.24850254, 0.03269825, -0.18380491, 0.032377258, 0.19312768, -0.22545849, 0.20047729, -0.21857505, 0.04958539, -0.012481836, 0.09664499, -0.14021717, -0.011379809)); + target3 += mul(g1, float4x4(0.029377487, -0.03222012, -0.047782637, 0.15043634, -0.028922928, 0.14329837, 0.070593685, 0.17937078, -0.098229684, -0.017268147, 0.023314565, -0.0373697, 0.086789444, -0.041083477, -0.14991397, 0.1569613)); + target3 += mul(h1, float4x4(-0.15204531, 0.038198274, -0.04654972, -0.023292607, 0.043118156, -0.1646481, -0.19841586, 0.0921996, -0.020243818, -0.006126642, 0.0073893177, -0.2155937, -0.051742166, -0.12905034, 0.026826771, -0.14480315)); + target3 += mul(i1, float4x4(0.10036964, 0.1710007, -0.07876652, 0.22185723, -0.07879332, -0.009758965, -0.07071612, 0.091213554, -0.112285696, 0.03389832, -0.028804176, -0.030022187, -0.1688445, 0.11049307, -0.054812532, 0.093897834)); + target3 += mul(a2, float4x4(-0.12732436, 0.085322656, -0.100760445, 0.18453589, -0.06775451, 0.10935976, 0.17619863, -0.1605919, 0.09963296, -0.15262389, 0.09841437, -0.19519499, -0.07014624, 0.25242952, -0.05024359, 0.087294735)); + target3 += mul(b2, float4x4(0.015800908, -0.14473227, -0.2478373, 0.053460408, -0.14864206, -0.043255955, 0.11067259, 0.0014784707, 0.12921435, -0.03185401, 0.116656736, -0.03951376, 0.06561661, -0.04718704, -0.10218965, 0.11587745)); + target3 += mul(c2, float4x4(0.07117372, 0.0109037515, -0.23872098, 0.07710495, 0.0921179, -0.1644194, -0.13181047, -0.057200883, 0.14430603, 0.10133447, 0.28212273, 0.09411812, -0.048196144, 0.0436184, -0.13561143, 0.3184622)); + target3 += mul(d2, float4x4(-0.18523192, 0.21471006, -0.0448867, 0.014551903, 0.009904246, -0.15023962, 0.004197992, -0.17210527, 0.194157, -0.08507272, 0.20821328, 0.053412434, 0.3099377, 0.119032666, -0.18388903, -0.19600375)); + target3 += mul(e2, float4x4(0.2807314, 0.2189851, 0.25916493, 0.060228985, -0.0049263136, -0.074992225, -0.15787919, -0.054917946, 0.12066998, -0.21063392, 0.14343189, -0.033192027, -0.010535234, 0.14374483, 0.1522993, -0.07717713)); + target3 += mul(f2, float4x4(-0.043371633, 0.13011403, -0.0015406794, -0.0128029715, 0.17256962, -0.04676938, 0.15432738, -0.07865593, 0.13326003, -0.20808597, -8.7830034e-05, 0.19136547, -0.1985925, -0.013042362, -0.22718841, -0.06583816)); + target3 += mul(g2, float4x4(-0.11845248, 0.027589038, 0.10232536, 0.089354545, 0.18008573, 0.061147142, 0.04159389, -0.12027304, 0.1662144, -0.19675921, 0.12992287, -0.10149212, 0.10550842, -0.006124143, 0.19946195, -0.1462058)); + target3 += mul(h2, float4x4(0.01296488, -0.09644271, -0.05817923, -0.0954995, 0.025634903, -0.10628822, -0.05637768, -0.284114, 0.17925075, 0.01273799, 0.309424, -0.036070596, -0.17971297, -0.35284916, 0.028788334, -0.040968318)); + target3 += mul(i2, float4x4(-0.14511016, -0.036098864, -0.029634831, -0.081007525, 0.17456302, -0.3121309, -0.005653063, -0.13220096, 0.07959643, 0.13494255, 0.16009367, 0.022134677, -0.06916521, -0.068016514, -0.07418041, -0.106386214)); + target3 += mul(a3, float4x4(0.0038909556, 0.10399398, -0.047585238, 0.020263152, 0.22357577, 0.20275299, -0.20587234, -0.14618087, -0.06699123, 0.05799765, -0.057206634, 0.070337296, 0.26828194, -0.110529095, -0.039317895, 0.1000372)); + target3 += mul(b3, float4x4(-0.12016816, -0.1746712, -0.15243006, 0.09121186, 0.17119732, 0.09372113, -0.011121283, 0.01683138, 0.04647735, 0.26708847, -0.045210358, -0.05229348, 0.13961853, -0.23234563, 0.11518522, 0.025384203)); + target3 += mul(c3, float4x4(0.24803765, -0.12064236, 0.16222163, 0.10242684, 0.35362238, -0.0025835831, 0.10871223, -0.14052986, 0.086918466, 0.003965692, -0.052900802, -0.09219091, -0.097256884, 0.027730078, -0.018556952, 0.029902605)); + target3 += mul(d3, float4x4(-0.07853819, -0.33072472, -0.01923759, -0.022614414, 0.037449032, 0.0057582236, 0.035095196, -0.10516724, 0.021059662, -0.1803607, -0.072927505, -0.032927528, 0.10600866, 0.2115304, -0.038914077, 0.026641702)); + target3 += mul(e3, float4x4(-0.046708018, -0.30087915, 0.23972215, 0.051118676, -0.09175249, 0.061564893, -0.0606459, 0.10725062, 0.16634792, 0.15181623, -0.14776988, -0.089753665, 0.09396779, 0.3047946, 0.20602426, -0.10614584)); + target3 += mul(f3, float4x4(0.16031305, -0.010385087, 0.12137829, 0.013936002, -0.09272479, -0.0462326, 0.14647374, -0.1364509, 0.1020013, 0.07280318, -0.035455197, -0.0074932426, 0.06966262, 0.43025437, 0.14413132, -0.020879302)); + target3 += mul(g3, float4x4(-0.048381433, 0.055672538, 0.17734092, 0.057804573, -0.064207256, -0.081648245, -0.19108449, -0.027356787, 0.22855555, 0.026296774, 0.051670585, 0.1469678, 0.14372535, -0.019550381, 0.0711832, -0.23015371)); + target3 += mul(h3, float4x4(0.10270051, 0.03306284, 0.18660016, -0.08794835, 0.022104584, 0.14556691, -0.18290472, -0.004233608, 0.31982687, -0.019705234, -0.18947408, -0.014298402, 0.13134713, -0.22212905, -0.22175267, -0.083559796)); + target3 += mul(i3, float4x4(0.09405076, -0.094762795, 0.00039714025, -0.033925287, -0.040082168, 0.18154381, -0.091368884, -0.002279935, 0.18112488, -0.16065024, -0.07302534, -0.054364413, -0.027507186, 0.056911435, -0.25985143, 0.19071229)); + target3 += mul(na1, float4x4(-0.08038882, -0.23933147, 0.091805875, 0.06882283, 0.030006107, -0.19613835, -0.19390447, -0.06947256, -0.15933713, -0.12136816, 0.10496873, 0.20988281, -0.06429982, 0.13831986, -0.12110751, -0.013753183)); + target3 += mul(nb1, float4x4(-0.03526272, 0.09196733, 0.22100714, -0.034608632, 0.11271489, 0.19354948, -0.08702665, 0.0818318, -0.23144986, -0.39077505, 0.068490066, -0.07049248, -0.15327029, -0.13464752, -0.23453039, -0.007664983)); + target3 += mul(nc1, float4x4(-0.071974635, -0.09427919, -0.14303383, -0.15694854, -0.0536355, 0.072341934, 0.0919402, -0.032855745, 0.061292388, 0.09840731, 0.035950005, -0.064508714, -0.121800035, -0.18790516, -0.098817684, -0.032492902)); + target3 += mul(nd1, float4x4(-0.006014576, 0.056944408, -0.04101546, -0.07834956, 0.048266124, 0.013926315, 0.041723326, -0.323333, -0.41566008, 0.3228979, 0.004536671, 0.31018063, -0.32762045, 0.23986395, 0.0941997, 0.32134023)); + target3 += mul(ne1, float4x4(-0.07315801, -0.04973393, -0.022297578, -0.0803329, -0.006434735, 0.010591334, 0.036642008, 0.099703625, -0.30428717, -0.13702157, 0.05784328, -0.08263622, -0.16771519, 0.012717832, 0.16369238, 0.082922)); + target3 += mul(nf1, float4x4(0.0001620341, 0.13469625, 0.022239598, -0.045452654, -0.012625867, -0.016001742, -0.13125779, 0.035808936, -0.06057855, -0.23169748, -0.031564385, 0.0035062286, 0.08688842, 0.043959387, 0.045130596, -0.082511395)); + target3 += mul(ng1, float4x4(-0.083551735, -0.0062169307, -0.071006864, 0.08302828, 0.041814975, -0.17135905, -0.051279463, -0.23531726, -0.07600026, -0.016305951, -0.12496258, 1.6274626e-05, -0.056098733, 0.05471391, -0.16807914, 0.043552015)); + target3 += mul(nh1, float4x4(0.10614594, 0.055918783, -0.04306798, 0.12271233, -0.053095255, -0.041611873, 0.0658641, -0.17270197, -0.17228878, 0.04906801, 0.025378078, 0.03993686, 0.26168197, -0.0664166, -0.24114749, 0.122338526)); + target3 += mul(ni1, float4x4(-0.028463805, -0.06832796, -0.042678714, -0.09115425, 0.112060644, -0.11552275, -0.13850841, -0.21241449, -0.025949117, -0.25152782, 0.118504696, -0.0032011967, -0.004659375, 0.14416796, 0.10196362, -0.25900578)); + target3 += mul(na2, float4x4(-0.014083873, -0.14722492, -0.04869616, -0.0060440497, -0.06496493, -0.080328904, 0.0021304504, -0.071984075, 0.037136473, -0.06741335, 0.047950987, 0.13102819, -0.084352426, 0.021756288, 0.14978755, -0.07930937)); + target3 += mul(nb2, float4x4(-0.043805413, 0.11554947, 0.08058495, -0.029509902, 0.07255308, -0.11107158, 0.19269472, -0.06936789, -0.056554012, -0.13389792, 0.05822567, -0.080038816, -0.11012767, -0.2594496, 0.013091632, -0.016040247)); + target3 += mul(nc2, float4x4(0.11076819, 0.29110146, 0.010078737, -0.07397723, 0.017001567, -0.0600932, 0.120115615, -0.1516764, -0.046932317, -0.1531205, -0.041367747, 0.03022747, 0.028425755, -0.09993652, 0.105394356, -0.097724885)); + target3 += mul(nd2, float4x4(-0.16120721, 0.12060183, -0.051696084, 0.13536309, -0.0629108, 0.20782739, 0.08011087, 0.16132146, -0.17330962, 0.075349055, -0.13367563, 0.0834821, 0.13859299, -0.24726664, 0.1219966, 0.008662899)); + target3 += mul(ne2, float4x4(-0.06673648, -0.059848122, -0.079399005, 0.07430188, 0.039565083, -0.02646128, 0.06627121, -0.15686277, -0.08100342, 0.211192, 0.11364034, -0.056452975, 0.003068278, -0.09815622, -0.2720423, -0.060945407)); + target3 += mul(nf2, float4x4(0.10425103, -0.11963076, -0.15664895, -0.008325704, 0.030473476, -0.059397645, 0.08696136, -0.105832994, 0.15845199, -0.0155479815, 0.21866821, -0.24220671, -0.07413551, 0.18748072, 0.15781933, 0.09678578)); + target3 += mul(ng2, float4x4(-0.105755776, 0.05295692, -0.065712206, -0.055599883, -0.024171222, -0.10882413, -0.019153712, -0.0797682, -0.05841592, 0.027539523, 0.018220939, 0.025832783, 0.10254366, 0.027248384, 0.17515337, 0.13366127)); + target3 += mul(nh2, float4x4(0.14450707, 0.16593692, 0.10250131, 0.022199351, 0.00025016058, 0.02208959, -0.015518909, 0.03897976, 0.066313244, -0.08834062, -0.06497536, 0.21156809, -0.028999787, 0.1924942, 0.27274308, -0.19622537)); + target3 += mul(ni2, float4x4(0.06670714, 0.032708794, 0.08869534, -0.1733506, 0.0076623727, 0.12130858, 0.010788659, -0.046009142, 0.09683414, -0.2074643, 0.08545223, 0.0447272, 0.12027344, 0.20864709, 0.17474543, 0.13670264)); + target3 += mul(na3, float4x4(0.021432638, -0.24760821, 0.12410156, -0.11143068, 0.1870739, 0.0740915, 0.11552895, -0.061147105, -0.037998777, -0.1789209, 0.02577988, -0.1907707, 0.16632228, 0.029018525, 0.016788188, 0.16683672)); + target3 += mul(nb3, float4x4(0.1736272, 0.052254014, -0.010544911, -0.25163132, -0.021734655, -0.23477079, 0.30037084, -0.024889933, 0.16576701, 0.11785999, -0.19426535, 0.012973521, -0.31330642, -0.12940904, 0.1407924, -0.104257464)); + target3 += mul(nc3, float4x4(-0.10720734, -0.22007015, 0.06929706, 0.1128352, 0.08878798, -0.74968565, 0.05292707, -0.2015415, -0.22024418, 0.12937216, 0.0077955252, 0.10120546, -0.051692892, -0.4005671, 0.019636473, -0.020149691)); + target3 += mul(nd3, float4x4(0.43247837, -0.18930417, -0.013568917, -0.079419196, 0.18672904, -0.35622415, 0.25079453, -0.1175358, 0.26581118, 0.008579299, 0.18397655, -0.29648697, -0.15222591, 0.32292458, -0.011576255, -0.030688757)); + target3 += mul(ne3, float4x4(0.10460517, 0.22705384, -0.11461504, -0.34884137, 0.06710358, 0.07710169, -0.22747318, -0.03428357, -0.12087394, -0.18585229, 0.053487252, -0.15423988, -0.24636437, 0.42574164, -0.20994124, -0.13236474)); + target3 += mul(nf3, float4x4(-0.15691194, -0.08720117, -0.052925915, -0.16245657, -0.16402285, 0.3253476, 0.1616336, 0.072358444, -0.19095042, 0.21235181, 0.033952657, -0.021103038, 0.1247694, 0.228517, 0.032327496, -0.21903606)); + target3 += mul(ng3, float4x4(-0.14174855, -0.06494216, 0.13284135, -0.08129453, 0.16482054, 0.110014215, 0.15709473, -0.010275839, -0.22032334, -0.10103909, -0.11650554, -0.17561941, 0.085149735, -0.40727508, 0.12032625, -0.02078777)); + target3 += mul(nh3, float4x4(-0.27078578, -0.08153653, 0.1757881, 0.11317136, 0.27882257, -0.24042514, -0.08648888, -0.045675088, -0.10128582, -0.04766186, 0.06836051, 0.15924035, 0.04440567, -0.099891834, -0.08893405, 0.05721548)); + target3 += mul(ni3, float4x4(0.15327021, 0.13603994, 0.17330587, 0.05625383, -0.11157126, -0.08179826, 0.05035325, -0.012668053, 0.04673393, 0.29881957, 0.019924281, -0.06682304, -0.034375366, -0.11446407, 0.055847015, 0.104117975)); + target3 += float4(0.013481283, -0.0006846239, 0.017479934, 0.13998064); + + float3 target4 = tex8.SampleLevel(sam, pos, 0).rgb; + target4 += mul(e1, float4x3(0.027129134, 0.01044246, 0.008198051, -0.019978391, 0.014817045, 0.014294805, -0.009071333, -0.018233696, -0.020756468, -0.016967475, -0.010472854, -0.0066578956)); + target4 += mul(e2, float4x3(0.012473992, -0.019771596, -0.02515739, -0.008238026, 0.026189122, 0.034326296, 0.01735337, -0.021417223, -0.027291182, 0.01815212, -0.012736875, -0.021111157)); + target4 += mul(e3, float4x3(0.022218483, -0.023485998, -0.03540812, 0.016531168, -0.0033816632, -0.010179393, -0.03181473, -0.0072774286, 0.0014077872, -0.0025735856, -0.015998563, -0.016743565)); + target4 += mul(ne1, float4x3(-0.01740865, 2.3718083e-05, 0.0032518203, 0.009272118, -0.01676428, -0.019791994, 0.013665012, 0.02245221, 0.022923533, 0.020898446, 0.012111701, 0.009756352)); + target4 += mul(ne2, float4x3(-0.0043926076, 0.019400991, 0.022581568, 0.003538965, -0.031301565, -0.0345112, -0.02405352, 0.006159623, 0.016130725, -0.0097925, 0.01677507, 0.027652735)); + target4 += mul(ne3, float4x3(-0.03267886, 0.014923966, 0.027258545, -0.033668566, -0.010421195, -0.0026646685, 0.015094835, -0.0023233194, -0.015871005, -0.01258443, 0.00507582, 0.0053544766)); + + tex1[gxy] = target1; + tex2[gxy] = target2; + tex3[gxy] = target3; + tex7[gxy] = float4(target4, 1); +} + +//!PASS 8 +//!DESC Conv-4x3x3x24, Conv-3x1x1x120 +//!IN INPUT, tex1, tex2, tex3, tex7 +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 + +void Pass8(uint2 blockStart, uint3 threadId) { + uint2 gxy = Rmp8x8(threadId.x) + blockStart; + uint2 inputSize = GetInputSize(); + if (gxy.x >= inputSize.x || gxy.y >= inputSize.y) { + return; + } + + float2 inputPt = GetInputPt(); + float2 pos = (gxy + 0.5f) * inputPt; + + // [ a, d, g ] + // [ b, e, h ] + // [ c, f, i ] + float4 a1 = tex1.SampleLevel(sam, pos + float2(-inputPt.x, -inputPt.y), 0); + float4 b1 = tex1.SampleLevel(sam, pos + float2(-inputPt.x, 0), 0); + float4 c1 = tex1.SampleLevel(sam, pos + float2(-inputPt.x, inputPt.y), 0); + float4 d1 = tex1.SampleLevel(sam, pos + float2(0, -inputPt.y), 0); + float4 e1 = tex1.SampleLevel(sam, pos, 0); + float4 f1 = tex1.SampleLevel(sam, pos + float2(0, inputPt.y), 0); + float4 g1 = tex1.SampleLevel(sam, pos + float2(inputPt.x, -inputPt.y), 0); + float4 h1 = tex1.SampleLevel(sam, pos + float2(inputPt.x, 0), 0); + float4 i1 = tex1.SampleLevel(sam, pos + float2(inputPt.x, inputPt.y), 0); + + float4 na1 = max(-a1, 0); + float4 nb1 = max(-b1, 0); + float4 nc1 = max(-c1, 0); + float4 nd1 = max(-d1, 0); + float4 ne1 = max(-e1, 0); + float4 nf1 = max(-f1, 0); + float4 ng1 = max(-g1, 0); + float4 nh1 = max(-h1, 0); + float4 ni1 = max(-i1, 0); + + a1 = max(a1, 0); + b1 = max(b1, 0); + c1 = max(c1, 0); + d1 = max(d1, 0); + e1 = max(e1, 0); + f1 = max(f1, 0); + g1 = max(g1, 0); + h1 = max(h1, 0); + i1 = max(i1, 0); + + float4 a2 = tex2.SampleLevel(sam, pos + float2(-inputPt.x, -inputPt.y), 0); + float4 b2 = tex2.SampleLevel(sam, pos + float2(-inputPt.x, 0), 0); + float4 c2 = tex2.SampleLevel(sam, pos + float2(-inputPt.x, inputPt.y), 0); + float4 d2 = tex2.SampleLevel(sam, pos + float2(0, -inputPt.y), 0); + float4 e2 = tex2.SampleLevel(sam, pos, 0); + float4 f2 = tex2.SampleLevel(sam, pos + float2(0, inputPt.y), 0); + float4 g2 = tex2.SampleLevel(sam, pos + float2(inputPt.x, -inputPt.y), 0); + float4 h2 = tex2.SampleLevel(sam, pos + float2(inputPt.x, 0), 0); + float4 i2 = tex2.SampleLevel(sam, pos + float2(inputPt.x, inputPt.y), 0); + + float4 na2 = max(-a2, 0); + float4 nb2 = max(-b2, 0); + float4 nc2 = max(-c2, 0); + float4 nd2 = max(-d2, 0); + float4 ne2 = max(-e2, 0); + float4 nf2 = max(-f2, 0); + float4 ng2 = max(-g2, 0); + float4 nh2 = max(-h2, 0); + float4 ni2 = max(-i2, 0); + + a2 = max(a2, 0); + b2 = max(b2, 0); + c2 = max(c2, 0); + d2 = max(d2, 0); + e2 = max(e2, 0); + f2 = max(f2, 0); + g2 = max(g2, 0); + h2 = max(h2, 0); + i2 = max(i2, 0); + + float4 a3 = tex3.SampleLevel(sam, pos + float2(-inputPt.x, -inputPt.y), 0); + float4 b3 = tex3.SampleLevel(sam, pos + float2(-inputPt.x, 0), 0); + float4 c3 = tex3.SampleLevel(sam, pos + float2(-inputPt.x, inputPt.y), 0); + float4 d3 = tex3.SampleLevel(sam, pos + float2(0, -inputPt.y), 0); + float4 e3 = tex3.SampleLevel(sam, pos, 0); + float4 f3 = tex3.SampleLevel(sam, pos + float2(0, inputPt.y), 0); + float4 g3 = tex3.SampleLevel(sam, pos + float2(inputPt.x, -inputPt.y), 0); + float4 h3 = tex3.SampleLevel(sam, pos + float2(inputPt.x, 0), 0); + float4 i3 = tex3.SampleLevel(sam, pos + float2(inputPt.x, inputPt.y), 0); + + float4 na3 = max(-a3, 0); + float4 nb3 = max(-b3, 0); + float4 nc3 = max(-c3, 0); + float4 nd3 = max(-d3, 0); + float4 ne3 = max(-e3, 0); + float4 nf3 = max(-f3, 0); + float4 ng3 = max(-g3, 0); + float4 nh3 = max(-h3, 0); + float4 ni3 = max(-i3, 0); + + a3 = max(a3, 0); + b3 = max(b3, 0); + c3 = max(c3, 0); + d3 = max(d3, 0); + e3 = max(e3, 0); + f3 = max(f3, 0); + g3 = max(g3, 0); + h3 = max(h3, 0); + i3 = max(i3, 0); + + float4 target1 = mul(a1, float4x4(-0.013182829, 0.053091962, 0.06549412, 0.09314398, 0.12759157, 0.19831958, -0.0066986284, 0.008724786, -0.008788724, -0.18448268, -0.08061004, -0.122672, 0.039246775, 0.114899494, 0.0053096768, -0.45705518)); + target1 += mul(b1, float4x4(-0.15435986, 0.12775438, 0.033445876, 0.13065258, -0.034713954, 0.011218427, -0.056961175, -0.028291933, 0.014069658, -0.12902507, 0.09579773, -0.24455607, 0.14417914, 0.05937612, 0.2243551, -0.3940324)); + target1 += mul(c1, float4x4(0.019673724, 0.20209175, 0.0864056, 0.062125377, -0.032693543, -0.07866025, 0.049098648, 0.09967038, 0.071991436, -0.035584584, 0.08620264, -0.3146151, 0.0016364265, -0.1282453, 0.113696136, -0.09162608)); + target1 += mul(d1, float4x4(-0.086494565, -0.031322442, -0.0010425163, 0.0043439222, -0.2207718, -0.114754595, -0.04754309, 0.038829442, 0.28012696, 0.01416326, -0.006575263, 0.09800945, 0.20944737, 0.12320554, -0.27976176, 0.042036757)); + target1 += mul(e1, float4x4(-0.0043551656, -0.034230676, 0.047720857, -0.0913431, 0.25977305, 0.21515612, 0.18708718, -0.004843006, 0.29522216, 0.03641434, -0.096512936, -0.07962972, -0.07454651, -0.2631387, -0.3370317, 0.40316954)); + target1 += mul(f1, float4x4(0.07995683, -0.17652078, 0.0023912708, -0.02042794, 0.17486735, -0.22842996, -0.06893651, -0.19074234, 0.10076771, 0.11205654, 0.001572062, 0.024552155, -0.00011488878, -0.12493254, -0.29600865, 0.07090882)); + target1 += mul(g1, float4x4(0.017154403, 0.13125315, 0.11503914, 0.105513334, -0.11673632, -0.034176424, -0.030536361, -0.002248858, -0.11892652, -0.08516513, 0.06950209, 0.17622153, 0.06246307, -0.07698598, 0.2093879, -0.058301486)); + target1 += mul(h1, float4x4(0.10316161, 0.0072260876, 0.07716615, -0.13834251, -0.010482067, 0.15220478, 0.09262941, 0.08135313, 0.14277095, -0.15209594, -0.15694623, 0.0658977, 0.16643007, -0.04802777, 0.039331965, 0.10639549)); + target1 += mul(i1, float4x4(0.003941457, 0.096958525, 0.08078122, -0.123746514, 0.05798335, -0.044676617, -0.3084394, 0.1140151, 0.010672668, -0.025900228, -0.06911797, 0.05360162, 0.15696998, 0.07253946, 0.06035546, 0.1159507)); + target1 += mul(a2, float4x4(0.229298, 0.064096935, 0.16048844, 0.012671015, 0.024478769, 0.063737154, -0.004687863, 0.19364266, -0.19646022, 0.2893255, 0.026786007, 0.069286734, 0.07800188, -0.053994164, 0.052960467, 0.1745522)); + target1 += mul(b2, float4x4(0.015469714, -0.12314818, 0.5338478, -0.11723986, -0.008365538, -0.10995339, -0.15134127, -0.07830025, 0.07128518, 0.009086638, -0.116382964, -0.16219214, 0.026113646, 0.29649207, -0.33176404, 0.009099685)); + target1 += mul(c2, float4x4(-0.09291687, 0.09429629, 0.47023576, 0.30382136, 0.022938905, -0.053612467, 0.03914971, 0.07205734, 0.11292842, -0.022005484, 0.17894705, -0.008363797, -0.1682453, 0.0409644, -0.017597, -0.14894786)); + target1 += mul(d2, float4x4(-0.22093011, 0.050382294, -0.07031792, 0.1064123, 0.09168859, -0.054715537, 0.07824245, 0.0675236, -0.11675646, 0.12587738, 0.33370635, -0.14830373, -0.4392533, -0.23865284, 0.071248956, -0.026170105)); + target1 += mul(e2, float4x4(-0.048300147, 0.08008235, 0.11813505, -0.09183442, -0.06377392, 0.14087953, 0.07831149, 0.044931732, -0.21497081, 0.026584432, 0.013495652, -0.14503439, 0.007470514, 0.14160597, -0.016141815, -0.31155616)); + target1 += mul(f2, float4x4(-0.41951287, 0.24024096, 0.19465575, -0.041104067, 0.09810697, 0.14586213, 0.13903797, -0.053057924, -0.14113568, -0.09644958, -0.09866805, -0.07899498, -0.172797, -0.08095462, -0.13160881, -0.0089402525)); + target1 += mul(g2, float4x4(-0.0807202, -0.37737992, -0.12689668, 0.079181, 0.01577318, -0.11053014, -0.12669973, 0.0071108835, -0.20729698, 0.046471246, -0.12194573, 0.2870874, -0.22770974, 0.09006065, 0.23021267, 0.3112802)); + target1 += mul(h2, float4x4(-0.17971495, 0.036312122, -0.03444156, 0.041823655, -0.09082372, 0.08112007, 0.049061, -0.0055645844, 0.013843324, -0.01969895, -0.13960351, -0.047275152, -0.043000307, 0.27807096, 0.22880352, 0.106376074)); + target1 += mul(i2, float4x4(-0.17075665, 0.006126272, -0.2546894, 0.11954845, -0.19607663, 0.048538323, -0.065129414, 0.014257998, 0.038192738, -0.037847996, -0.020429965, 0.025860488, 0.028515143, 0.06738391, 0.072110705, -0.033284377)); + target1 += mul(a3, float4x4(0.044568866, 0.062475223, -0.0983384, 0.009866407, -0.35514477, -0.030211627, 0.22333166, 0.32969475, -0.109626986, -0.034606833, -0.0576798, -0.17654708, -0.08829175, -0.16896097, -0.1001105, -0.12807782)); + target1 += mul(b3, float4x4(0.3260804, -0.13558061, 0.04645619, -0.07019992, -0.29856443, 0.053042114, 0.061772786, -0.13687392, -0.16278408, -0.034854803, -0.012278255, -0.098236114, -0.19714803, -0.1252398, 0.52471006, -0.00438923)); + target1 += mul(c3, float4x4(-0.23780306, 0.0058732736, -0.14263488, 0.014209727, 0.3014817, -0.19334342, 0.14975117, -0.4833427, 0.06679691, -0.068613395, -0.11530229, -0.27387938, 0.060538717, 0.2566434, 0.089476675, 0.20292005)); + target1 += mul(d3, float4x4(-0.08988664, 0.09974201, -0.06231258, -0.14937639, 0.3109973, -0.062920496, 0.38651597, 0.32825765, -0.019837346, 0.1774624, 0.0721711, 0.01380091, 0.33275485, -0.024928985, -0.07132799, -0.10537747)); + target1 += mul(e3, float4x4(-0.09355771, -0.018917486, 0.062002532, -0.06821075, 0.0852235, -0.043880213, 0.023216404, -0.034589138, 0.009775594, 0.020386059, -0.19563444, 0.34160665, -0.19108588, -0.36282206, 0.12477205, -0.1635429)); + target1 += mul(f3, float4x4(0.0660281, -0.004066676, 0.051605884, -0.20489341, 0.45396295, 0.048399396, 0.32993752, 0.5071012, 0.1316449, -0.028571565, -0.1418205, -0.06860564, 0.00832686, 0.10344168, -0.0033388403, -0.21189667)); + target1 += mul(g3, float4x4(0.03121781, 0.06918902, 0.0073941397, -0.039547186, -0.13978334, 0.36066145, 0.105322905, 0.048246022, -0.064950965, 0.04503515, 0.13340174, -0.30344757, 0.08683505, -0.046188712, -0.17417784, 0.09081479)); + target1 += mul(h3, float4x4(-0.16948071, 0.0040395157, 0.035707664, -0.079912804, 0.038716394, -0.17475441, -0.36299637, 0.03968082, 0.049196135, -0.1715365, -0.071639955, -0.016410451, 0.09188755, -0.2558949, -0.16094652, 0.07996079)); + target1 += mul(i3, float4x4(-0.21382907, 0.05636966, -0.06199946, -0.21989174, -0.18922164, 0.37826148, -0.0141571835, 0.024448203, 0.0884536, 0.12374937, 0.18420288, 0.0967765, -0.06720011, 0.069881, -0.0042892518, -0.19172947)); + target1 += mul(na1, float4x4(-0.08187655, -0.489484, -0.27973574, -0.009521738, 0.18314825, -0.35783502, 0.056075446, 0.1687472, 0.3308614, -0.23036483, 0.0055736783, 0.18089417, -0.01648603, -0.21509576, -0.05315817, 0.2311331)); + target1 += mul(nb1, float4x4(0.10545252, -0.16682841, 0.46201918, 0.41049242, 0.2867931, -0.10737721, 0.37278366, -0.15247364, 0.32457805, -0.13211884, 0.0282094, 0.32339963, -0.20642634, -0.07769656, -0.11572602, -0.0078001227)); + target1 += mul(nc1, float4x4(0.049419798, -0.16918461, -0.07071865, -0.23344457, -0.06583399, 0.21428098, 0.13742666, -0.24406539, -0.3166922, 0.04145341, 0.12750438, 0.7016666, -0.072237894, 0.060902767, 0.024233112, 0.1978945)); + target1 += mul(nd1, float4x4(0.21362431, -0.22586554, -0.13855393, 0.03641023, -0.18417473, 0.13428141, 0.019632103, 0.18459935, -0.25052726, -0.06585735, 0.06470142, -0.1343166, -0.1102426, 0.12908545, -0.03501417, -0.2672359)); + target1 += mul(ne1, float4x4(0.098094426, -0.40027153, 0.05030102, 0.29116127, -0.07573088, -0.0358284, -0.2436342, 0.00352126, -0.114547156, -0.013960078, -0.20433213, -0.021052646, -0.22285037, 0.028262915, 0.08860262, -0.30081618)); + target1 += mul(nf1, float4x4(-0.01646094, 0.24261765, 0.33677813, -0.060467284, -0.19734232, 0.1702455, -0.1304959, -0.20504838, -0.3379331, 0.26765183, -0.26516193, 0.27015924, -0.08003835, 0.3141519, 0.29280853, -0.052082997)); + target1 += mul(ng1, float4x4(0.12277012, -0.46426025, 0.015877785, 0.028895028, -0.12974375, -0.075910136, 0.300476, 0.16338159, -0.012315035, 0.05539739, 0.019287715, -0.2627638, -0.10653122, 0.15327309, 0.116874225, 0.17951632)); + target1 += mul(nh1, float4x4(0.29410085, -0.17102611, 0.035222203, 0.538198, -0.082762614, -0.13113698, 0.23784018, 0.10809719, 0.10368062, -0.26618072, 0.017677844, -0.5524849, 0.20205925, -0.25295278, -0.08522028, -0.35101673)); + target1 += mul(ni1, float4x4(-0.24038099, 0.047679562, 0.16125403, 0.12115515, 0.25935376, -0.12338007, 0.28222737, -0.1517331, 0.102381065, 0.02018978, 0.103473715, 0.008457937, 0.075750284, 0.030453209, 0.103425525, -0.11869024)); + target1 += mul(na2, float4x4(0.027691573, 0.033041246, -0.053919036, 0.021841308, 0.22477351, -0.012002719, -0.088659704, -0.19888699, -0.14622316, 0.07693778, 0.06058014, 0.072771885, -0.09090909, -0.009634639, -0.04093643, -0.0016425458)); + target1 += mul(nb2, float4x4(0.121816635, 0.06796998, -0.00956044, 0.060272712, 0.0929867, -0.104182824, 0.068678245, -0.0025653015, 0.29900813, -0.121311836, -0.18685773, 0.047214147, -0.002424332, -0.071621366, 0.09782575, 0.069204815)); + target1 += mul(nc2, float4x4(-0.115039505, -0.013585092, -0.062492177, 0.019942736, -0.22608118, 0.10974841, 0.121345155, 0.048270512, 0.036711007, -0.1555631, 0.3113601, 0.20424883, 0.036948208, 0.023162413, 0.093668364, 0.091156565)); + target1 += mul(nd2, float4x4(-0.0239057, -0.0074546733, -0.072916195, -0.15032186, 0.11206848, 0.076949894, -0.0719725, 0.057246305, -0.12505415, 0.17029393, 0.059913885, 0.10695817, 0.11587671, 0.009000426, -0.0065819114, 0.112660386)); + target1 += mul(ne2, float4x4(-0.22613746, 0.010249483, 0.31479695, -0.15589239, 0.21750645, -0.16260515, 0.03900687, 0.31478724, 0.24153055, -0.13562167, -0.13101026, -0.30842167, -0.09156883, -0.08611807, 0.0021150038, 0.19845119)); + target1 += mul(nf2, float4x4(-0.09328654, 0.065565474, 0.053929932, -0.0614148, 0.10553007, -0.16130202, -0.14184211, -0.0015263067, -0.015361093, -0.20926285, -0.23366193, -0.06125057, -0.071300104, 0.01055638, -0.05240934, 0.06743602)); + target1 += mul(ng2, float4x4(-0.05055375, 0.085141584, -0.025911124, -0.035443313, -0.1763071, 0.085818924, 0.19284901, -0.006149421, -0.0160643, 0.11941451, 0.20142859, -0.047862962, 0.049561072, 0.06118226, -0.117986836, -0.10885573)); + target1 += mul(nh2, float4x4(0.0026763107, -0.13232177, 0.040220898, 0.056682535, -0.03708343, 0.22508788, 0.14923818, -0.106249794, 0.035745993, -0.18804651, -0.3110593, -0.20087922, -0.14625967, -0.0653864, -0.061015815, -0.04066649)); + target1 += mul(ni2, float4x4(0.111738384, -0.104334466, 0.029024106, -0.09726162, 0.2414019, -0.029426873, 0.09094325, 0.027416501, 0.30706093, -0.09682458, -0.19449362, -0.014534671, 0.15952238, -0.033171862, -0.10819316, -0.10238822)); + target1 += mul(na3, float4x4(0.56843907, -0.18652008, -0.07477079, -0.09572682, 0.004717268, -0.19569749, 0.012557746, -0.16934179, 0.20934415, -0.13695319, -0.085793145, 0.16430594, 0.1280811, -0.035566512, 0.17796053, 0.034620196)); + target1 += mul(nb3, float4x4(0.10944063, 0.056659624, -0.10928797, -0.48222318, -0.03679725, 0.12002146, 0.06371042, -0.024989901, -0.19508527, 0.35469803, -0.034514666, 0.05471589, -0.008078808, 0.086663045, -0.06641959, 0.14787014)); + target1 += mul(nc3, float4x4(-0.08401734, 0.065710895, -0.03586741, -0.09523177, -0.11976769, -0.00039887297, -0.11169928, 0.11623861, 0.06338808, 0.1087186, 0.26752025, 0.27731213, 0.042043414, -0.040737793, -0.13757998, 0.03160253)); + target1 += mul(nd3, float4x4(0.03308292, 0.11817877, 0.04941428, 0.053257942, 0.20836346, -0.3157687, -0.15115938, 0.017689008, -0.08777182, 0.075874984, -0.11381275, 0.15768103, -0.25251803, 0.024785532, -0.1119765, -0.08488973)); + target1 += mul(ne3, float4x4(0.14967972, 0.08358996, -0.12477746, 0.18376626, -0.11429529, 0.18852599, 0.12402519, 0.13575697, -0.17223327, -0.18583423, 0.08749376, 0.14127673, 0.04728666, 0.13141015, -0.1578823, 0.064156786)); + target1 += mul(nf3, float4x4(-0.18258065, 0.05539021, -0.08642571, 0.22043483, -0.03830304, 0.10055482, -0.050123304, 0.12830205, -0.4921733, 0.2718683, 0.11772524, -0.07781355, -0.0075595984, 0.060227167, 0.1285977, 0.2978205)); + target1 += mul(ng3, float4x4(0.19988084, -0.36680242, -0.0095746415, 0.091812566, 0.3152317, -0.075949475, -0.04308324, 0.049759876, 0.02971871, 0.18617181, 0.19829167, 0.17954859, 0.015149219, -0.15809381, 0.10850363, 0.017803097)); + target1 += mul(nh3, float4x4(0.056506306, 0.15181234, -0.1497428, 0.01186181, 0.02351036, 0.01086669, -0.031891935, 0.01414558, 0.27038968, -0.2806401, -0.14722337, 0.080689445, 0.07039954, -0.054969363, -0.016640754, 0.020795437)); + target1 += mul(ni3, float4x4(-0.237999, 0.13528651, 0.005025065, -0.01291728, -0.22655746, 0.022678101, 0.07165532, 0.0073296893, 0.084639646, -0.06724732, -0.13105223, 0.10164715, -0.15071161, 0.08882156, -0.016988168, -0.013606533)); + target1 += float4(0.20634188, -0.10455712, -0.031700566, -0.13400781); + + float4 target2 = mul(a1, float4x4(0.1606533, 0.1120602, 0.427334, -0.056228757, -0.026887462, 0.0858575, 0.0052684247, 0.1645524, 0.021588106, -0.08577256, -0.03301297, -0.087385215, 0.17341405, 0.26737398, 0.04566977, 0.047820427)); + target2 += mul(b1, float4x4(0.21000437, 0.05300574, 0.060565695, -0.086724475, 0.09684198, 0.12685667, -0.10724282, -0.11021523, 0.048485592, -0.0054517, 0.081800036, -0.099787444, -0.12168391, 0.07623567, 0.09177046, 0.15815327)); + target2 += mul(c1, float4x4(0.14400747, 0.13797458, 0.11044521, 0.077145234, 0.14364728, 0.10041894, 0.0948857, 0.08613703, 0.030833652, 0.102926254, 0.029892365, -0.09385337, 0.07609406, -0.038274735, 0.22529188, -0.0905732)); + target2 += mul(d1, float4x4(-0.13334684, -0.083001845, -0.06109816, 0.067442395, 9.6367134e-05, -0.10395697, 0.047389086, 0.07404194, -0.11931296, -0.029852618, -0.08998846, 0.16543494, 0.19911745, -0.014106389, -0.020616226, 0.011981892)); + target2 += mul(e1, float4x4(-0.20999044, 0.20443644, -0.08602043, -0.06026268, 0.0016786976, -0.15406793, 0.25403517, 0.0038395252, -0.16244787, -0.19482566, -0.113314606, 0.007111468, -0.026472634, 0.08177431, 0.13382603, -0.01771927)); + target2 += mul(f1, float4x4(-0.14934808, -0.022533301, -0.14221415, -0.096389346, 0.11613694, 0.21117163, 0.22294325, -0.029256172, -0.072161585, -0.09670809, -0.24253419, 0.10479088, 0.20190297, -0.08443066, 0.08334989, 0.2928627)); + target2 += mul(g1, float4x4(0.030550636, 0.095876954, -0.040062953, 0.024307664, 0.17360783, -0.035755854, -0.20959523, 0.069054864, -0.1061238, -0.26194566, 0.2781827, -0.118610375, -0.09682604, -0.076366246, -0.05720086, -0.08075027)); + target2 += mul(h1, float4x4(0.15727855, 0.21407695, 0.009924877, -0.0027381582, 0.16699612, 0.017624786, -0.13224785, 0.008606034, -0.05968717, -0.009152095, -0.084314294, -0.14502133, -0.13212982, 0.2531764, -0.09840475, 0.020581203)); + target2 += mul(i1, float4x4(0.06285324, 0.019129366, 0.15062185, -0.0018203754, 0.025869751, 0.09390758, 0.027623225, 0.09279268, 0.12548098, -0.05622771, 0.024048142, -0.011120709, 0.039858714, 0.022324169, -0.061184626, 0.15133153)); + target2 += mul(a2, float4x4(0.25750592, -0.0633985, 0.05274334, 0.05652166, -0.13369319, -0.19132645, -0.11266925, 0.05310033, -0.10603932, -0.18876615, -0.23720984, 0.23625968, -0.1460642, -0.16662763, -0.31894067, 0.00010953036)); + target2 += mul(b2, float4x4(0.021643812, 0.27677965, -0.18880053, -0.085671276, -0.12699273, 0.07259516, -0.09705578, 0.0103639, -0.10424065, 0.2421007, -0.15788709, -0.03597044, -0.03210864, -0.009501378, -0.29830885, 0.18951061)); + target2 += mul(c2, float4x4(-0.023235895, -0.15663858, -0.097848825, -0.030312262, -0.36207277, -0.044624195, -0.24912846, 0.001322196, -0.012719531, -0.061012562, 0.02297421, -0.083919466, 0.023231668, 0.17829593, -0.20094186, 0.062941045)); + target2 += mul(d2, float4x4(0.031992577, -0.33281925, 0.24781865, 0.10445937, -0.043928526, -0.0048965504, 0.025098981, -0.02432072, -0.06936203, 0.06697805, -0.03503784, 0.04098378, 0.11242077, -0.47939962, -0.36156863, 0.10633177)); + target2 += mul(e2, float4x4(-0.360187, 0.15471298, 0.19546136, -0.19344117, 0.19245885, 0.10948706, -0.25480017, -0.117233664, 0.07698171, 0.00455522, 0.016817722, -0.21183428, -0.3989548, -0.0053129625, 0.32735184, -0.25722015)); + target2 += mul(f2, float4x4(-0.19386199, -0.104854785, 0.2354883, 0.07680881, -0.08103157, 0.19879752, -0.20958872, 0.03404414, 0.2462412, -0.025986584, 0.15228593, 0.082260065, 0.05948899, 0.018289726, 0.26004076, 0.29258958)); + target2 += mul(g2, float4x4(0.43535206, -0.1665342, -0.078847095, -0.09834152, 0.3344753, 0.14931677, 0.26555872, -0.050443217, 0.1165338, -0.018918963, -0.18268648, -0.08987844, -0.15032545, -0.41353035, 0.04693913, 0.12682211)); + target2 += mul(h2, float4x4(-0.101665966, -0.2346043, -0.13883743, 0.09837179, 0.11640853, -0.11128404, 0.1264696, 0.13364471, -0.0010915099, 0.32518107, 0.015125061, 0.0014352624, -0.32198808, 0.14844672, 0.045113582, 0.22434932)); + target2 += mul(i2, float4x4(0.04287463, -0.031287823, 0.07444511, 0.215022, -0.051081534, 0.09054911, 0.094913155, -0.16440862, 0.025819149, -0.035652477, 0.11303366, 0.072897494, 0.15771803, -0.026064822, -0.27329972, 0.3400305)); + target2 += mul(a3, float4x4(0.15108323, 0.099104606, -0.18490814, 0.282546, -0.39287362, 0.020549994, 0.03981046, -0.38331905, -0.028022572, 0.07132567, 0.054721024, -0.0544467, 0.30145043, 0.07482834, -0.030623315, -0.14339122)); + target2 += mul(b3, float4x4(-0.19015107, 0.084599644, -0.08060123, -0.13798787, 0.11629986, 0.1486292, 0.22505176, 0.120357476, 0.09555313, -0.012545042, 0.14008446, 0.09553097, -0.13701685, 0.13754234, 0.08133829, 0.0583218)); + target2 += mul(c3, float4x4(-0.011506865, -0.12886223, 0.32145864, -0.0046038935, -0.15737815, 0.31331423, 0.014334723, 0.13329573, 0.059868217, 0.044668417, 0.41486976, -0.115652, 0.07570654, 0.119870186, -0.04211968, 0.13550858)); + target2 += mul(d3, float4x4(-0.06290216, -0.09683136, 0.17791402, -0.06173693, -0.5663153, 0.6814847, -0.30665252, -0.015765276, -0.35518414, 0.014619069, 0.0059011583, -0.011811011, -0.12891632, -0.09697547, 0.0122915255, -0.035630453)); + target2 += mul(e3, float4x4(0.36552843, -0.02239533, 0.31511658, 0.07742532, 0.38120705, -0.34059232, -0.1941228, -0.009505623, -0.057844408, -0.08539643, -0.15442915, 0.047755927, -0.10766272, 0.19164445, -0.04577609, -0.02152571)); + target2 += mul(f3, float4x4(0.25128686, 0.008428173, -0.2337189, -0.07352831, 0.4192482, 0.03234093, -0.107415706, -0.59545743, 0.08484682, 0.10139428, 0.032704517, -0.03676146, 0.0709341, -0.08012427, -0.17445756, -0.051028527)); + target2 += mul(g3, float4x4(-0.13946229, 0.14634825, -0.29945642, 0.054991852, -0.19391525, 0.18525685, -0.01332443, 0.19226684, -0.25809357, -0.16726302, -0.08535996, 0.04962988, -0.21382174, -0.27475968, 0.14896728, -0.07398321)); + target2 += mul(h3, float4x4(0.012350131, -0.15466039, 0.2637539, -0.004446026, -0.23348337, 0.31829268, 0.30077904, -0.26715708, -0.2248632, 0.026697354, -0.13744812, -0.11420962, 0.12333178, 0.20206316, 0.14819679, 0.11332464)); + target2 += mul(i3, float4x4(-0.1398207, -0.08409686, 0.28911248, -0.17092308, -0.3288522, -0.33649427, 0.08738124, 0.07093669, 0.042545132, 0.056477334, -0.043472584, 0.11007758, -0.001716612, -0.10464193, 0.03468551, 0.18904419)); + target2 += mul(na1, float4x4(-0.22511648, -0.21305616, -0.20453261, -0.039165854, 0.12624744, -0.24751776, -0.071244866, -0.013594594, 0.050315578, -0.07449747, -0.2612381, 0.16027644, -0.16809192, -0.124898806, -0.038302764, -0.17824896)); + target2 += mul(nb1, float4x4(-0.011263025, -0.13628832, -0.018516185, -0.02475848, 0.09929525, 0.039110962, -0.12850322, -0.1350285, -0.35264796, -0.021238754, -0.11728184, -0.1417785, 0.2576594, 0.08627893, 0.11525315, -0.12663105)); + target2 += mul(nc1, float4x4(0.098808385, 0.11969382, -0.09717777, 0.15366855, 0.05112679, -0.0064458046, 0.17321971, -0.11823168, -0.010137996, -0.35834765, 0.22273073, 0.20014246, -0.077788174, -0.10495039, -0.17693104, -0.069975644)); + target2 += mul(nd1, float4x4(0.058652826, -0.12872136, 0.3963312, -0.15295003, -0.05818842, -0.4533677, 0.20178635, 0.24527496, 0.25999078, -0.37886587, 0.19364087, 0.18344274, 0.22268198, -0.14165895, 0.48145312, -0.23087662)); + target2 += mul(ne1, float4x4(0.11743857, -0.081544854, -0.19755375, -0.12016749, -0.28253412, -0.04377212, 0.15896732, -0.14978753, -0.25513977, 0.0025451197, 0.41349715, -0.28266567, -0.25201386, 0.54169023, 0.21759638, 0.09750061)); + target2 += mul(nf1, float4x4(0.2587435, 0.18573955, -0.23956883, 0.021557074, 0.020746572, 0.020505207, 0.08808335, 0.19680786, -0.061584104, -0.24666953, 0.054818455, 0.10405006, -0.24831708, -0.13658181, -0.0833388, 0.24120714)); + target2 += mul(ng1, float4x4(-0.29441085, 0.03656414, -0.2596772, -0.057041578, 0.1569663, -0.09881767, 0.08505022, -0.01768552, -0.051293768, -0.099874936, -0.05937486, -0.06985686, 0.055836957, 0.12361765, 0.00034758533, -0.16590339)); + target2 += mul(nh1, float4x4(0.08935089, -0.34096143, -0.06724781, 0.057755265, -0.07378229, 0.19852296, -0.101910785, -0.35927492, -0.27025247, 0.077470385, -0.26122475, 0.1062427, 0.34269986, 0.022384735, -0.19875982, -0.046883546)); + target2 += mul(ni1, float4x4(-0.023113059, 0.023807336, 0.04232145, 0.12731242, 0.079464786, 0.10940335, -0.1920892, 0.069054574, -0.046453483, -0.1463778, 0.050447907, 0.37861434, 0.19577271, -0.08457038, 0.055992957, 0.0051751668)); + target2 += mul(na2, float4x4(-0.0143063385, -0.031832237, -0.081230044, 0.15558316, 0.24739249, 0.06730676, 0.022930989, -0.060339663, 0.16502666, 0.0032860334, 0.13078189, -0.13917513, 0.083792515, 0.038169254, 0.26199514, -0.20886219)); + target2 += mul(nb2, float4x4(-0.07898116, -0.001202372, -0.1436358, -0.013730994, -0.011920493, 0.067917384, -0.032699063, -0.04563186, 0.16329978, 0.13089089, -0.29516026, -0.17357638, 0.02513132, -0.14417541, -0.0026529275, 0.028195161)); + target2 += mul(nc2, float4x4(0.1278416, -0.0652329, -0.080299005, -0.054219954, 0.15680717, -0.13077177, 0.25564823, 0.10533668, -0.10988264, 0.3860151, -0.18009946, -0.47674116, -0.10072908, 0.041740764, 0.1123633, -0.04076864)); + target2 += mul(nd2, float4x4(0.20640877, -0.24586456, -0.10259232, 0.111054115, -0.18076079, -0.22535121, -0.29812837, 0.12035098, 0.027053986, -0.10918923, -0.3172506, 0.11992493, -0.006251823, 0.2550944, -0.15903941, -0.12368186)); + target2 += mul(ne2, float4x4(-0.36056486, 0.25076455, 0.13362978, -0.033405777, -0.32817405, 0.0695707, 0.01829935, -0.08318219, -0.28797764, 0.16128948, 0.14374499, -0.025840933, 0.078341916, 0.13052103, -0.100241534, 0.10946945)); + target2 += mul(nf2, float4x4(-0.07073338, -0.112097755, 0.22497103, 0.13549447, -0.13218129, -0.22181363, -0.13737568, 0.06865537, -0.45603344, -0.35373682, 0.37757057, 0.1678293, -0.029289875, -0.13187636, 0.12758663, 0.04016698)); + target2 += mul(ng2, float4x4(0.040351316, -0.05093577, -0.08653635, -0.007213745, -0.3845516, -0.029778607, -0.47889423, 0.12643112, -0.11173547, 0.043787614, 0.2647412, -0.0109354155, -0.0064909635, 0.106970474, 0.11885388, 0.07061224)); + target2 += mul(nh2, float4x4(0.066118404, 0.1848857, -0.07987121, -0.044921577, -0.0753153, -0.060784195, -0.33154643, 0.116313264, 0.11995535, -0.071781196, -0.24509782, -0.0037734907, 0.037864428, -0.07115049, 0.10189698, 0.13620937)); + target2 += mul(ni2, float4x4(0.15173467, 0.117787406, -0.07836817, -0.06667758, -0.05646272, 0.33135724, -0.37163886, -0.315246, 0.22709703, 0.10267156, -0.07729526, -0.015280573, -0.015122008, 0.03244177, 0.17766209, -0.04914632)); + target2 += mul(na3, float4x4(-0.07482478, -0.13012838, -0.15759332, 0.015014935, 0.35088012, 0.007073843, 0.24014178, -0.14308095, 0.12774545, 0.18122073, -0.2547015, 0.13359042, -0.0800425, 0.14722595, -0.24495813, -0.013143742)); + target2 += mul(nb3, float4x4(-0.15018739, 0.075401075, -0.056287423, -0.21596402, -0.106415406, -0.09301949, 0.124646366, -0.07675658, 0.39950943, -0.36134976, -0.20888598, 0.11607109, 0.107587025, -0.1626161, -0.29592493, 0.01726878)); + target2 += mul(nc3, float4x4(0.042367462, 0.05787438, -0.091712154, 0.26943466, 0.018607875, -0.06306163, -0.044719845, 0.12169627, -0.0043602907, -0.11714036, -0.41751066, -0.11060246, 0.14270274, 0.105742574, 0.162418, -0.016003838)); + target2 += mul(nd3, float4x4(-0.059574872, -0.16347532, 0.24761298, 0.0098720435, 0.22380106, -0.06946215, -0.011973621, -0.09775447, -0.108043805, -0.18910232, 0.36019576, -0.21156572, 0.008830671, -0.05479997, 0.31133842, 0.037727185)); + target2 += mul(ne3, float4x4(-0.1920284, 0.17142658, 0.05467186, -0.0761654, -0.25932217, -0.16042638, 0.2197324, 0.12581502, -0.48400918, 0.5538232, 0.41044307, -0.17291741, -0.18633147, -0.0820574, 0.038112838, -0.18555504)); + target2 += mul(nf3, float4x4(0.13163397, 0.09845959, -0.21711256, 0.26999414, -0.053519428, -0.029779429, -0.15492497, 0.0993045, -0.16189748, -0.4925058, 0.034618095, 0.085681, 0.16100337, 0.078292616, -0.40945208, -0.15787469)); + target2 += mul(ng3, float4x4(0.20844188, 0.07127721, 0.032387372, 0.033018872, 0.15301323, -0.055671003, 0.25810188, -0.07723897, 0.17080788, 0.12247039, -0.16662452, -0.06526193, 0.1584067, 0.14825343, 0.16793022, -0.055639073)); + target2 += mul(nh3, float4x4(0.31691772, 0.03877285, -0.31999993, 0.15607259, -0.014967208, 0.17467377, -0.021213053, 0.05274054, 0.09042282, 0.3026185, -0.19465268, -0.15643322, -0.28652924, -0.12624627, -0.123150274, -0.06579748)); + target2 += mul(ni3, float4x4(-0.12737115, 0.21119869, 0.021830728, 0.25310937, 0.056086678, -0.10591854, -0.09623413, 0.09552772, 0.0077543957, -0.38552082, 0.105930105, 0.21966095, 0.03846968, -0.18900576, 0.13454477, 0.01323755)); + target2 += float4(0.15939143, -0.031111173, 0.011407361, 0.04436536); + + float4 target3 = mul(a1, float4x4(0.009852172, 0.067582026, -0.004946671, 0.10223505, -0.10428496, -0.025925757, -0.1812229, -0.086897664, -0.007505929, 0.11395492, -0.046959464, -0.040778246, -0.05989385, 0.2917696, 0.21723987, 0.104860075)); + target3 += mul(b1, float4x4(0.014223285, 0.23677638, 0.18793987, 0.1604355, -0.13773453, -0.035079855, 0.1325962, -0.01843488, 0.013658427, -0.039640892, -0.049931083, -0.17938142, -0.20694439, 0.13461684, 0.15713528, 0.061465815)); + target3 += mul(c1, float4x4(-0.045403525, 0.24799547, 0.073604435, 0.21103963, 0.18720928, -0.06703258, -0.20043622, -0.067137614, -0.19021615, -0.020830747, -0.120527774, 0.20456503, 0.07813807, -0.03798654, 0.04036844, -0.0033802337)); + target3 += mul(d1, float4x4(0.10385739, -0.06095687, 0.03991638, -0.119761765, 0.14859357, -0.2436967, -0.033547442, 0.06643773, 0.08074919, 0.00819047, 0.046388235, 0.113080844, 0.093403086, -0.25158677, -0.25340363, 0.043133873)); + target3 += mul(e1, float4x4(0.13219471, -0.01887069, -0.26465404, 0.012883369, -0.15132889, -0.08190286, 0.1886762, -0.121109776, -0.086888224, -0.13945712, 0.10484367, -0.014462356, -0.04274967, -0.27132213, 0.035799675, 0.18913607)); + target3 += mul(f1, float4x4(-0.09799585, -0.092021905, 0.042374767, -0.040835217, 0.2207681, 0.19816661, 0.2233777, -0.09462879, 0.174727, 0.067035824, 0.25794062, 0.03356712, 0.09257895, 0.03864686, 0.061125953, -0.10119902)); + target3 += mul(g1, float4x4(-0.06601051, 0.011512823, -0.13370325, 0.0033915695, 0.0060606156, -0.049539644, -0.1075017, -0.17912976, -0.042368136, -0.011520146, 0.06601078, 0.09978972, -0.0129192965, -0.21929637, -0.018458387, -0.010336992)); + target3 += mul(h1, float4x4(0.09475364, -0.005993277, 0.09861265, -0.03103845, -0.09583529, 0.058791347, 0.3326168, -0.11607132, -0.028693356, 0.11996273, -0.085005276, -0.029005809, -0.29191923, 0.06689837, 0.10442381, -0.024532465)); + target3 += mul(i1, float4x4(0.0053415983, 0.06602273, 0.2246564, 0.031501666, -0.14868876, -0.0093808025, -0.34579018, -0.030635692, 0.092459954, -0.039631926, -0.0085174255, 0.022645477, 0.07514861, 0.060552113, 0.06597399, 0.0397574)); + target3 += mul(a2, float4x4(0.0511157, 0.035152495, -0.19926861, -0.063245155, 0.13407591, -0.1288526, 0.23185496, -0.13392815, -0.09297701, -0.2809779, 0.07520502, 0.022971572, -0.08222417, 0.27030075, -0.10483476, 0.16590133)); + target3 += mul(b2, float4x4(-0.30276665, 0.17359875, -0.06072175, -0.036741048, 0.12927541, -0.11135696, -0.1655047, -0.05786224, -0.10111195, 0.03949635, -0.11618777, -0.0020339678, -0.16504388, 0.21188384, -0.24663985, -0.16005285)); + target3 += mul(c2, float4x4(-0.18182081, -0.05039593, -0.004631986, -0.03583777, 0.025373073, -0.33103603, -0.12213051, -0.10148533, -0.091509394, -0.13529696, 0.08307632, -0.0025659746, -0.5024331, 0.14926323, 0.05118105, -0.26585025)); + target3 += mul(d2, float4x4(0.17135173, -0.23068328, 0.16332187, 0.06196188, 0.0034444374, 0.044823382, 0.010302396, 0.06775431, 0.024591392, -0.054694094, -0.048208185, -0.055681854, 0.08873465, -0.14074552, -0.027211398, 0.23973261)); + target3 += mul(e2, float4x4(-0.12319059, -0.11429906, -0.36435902, 0.19346023, -0.030743172, 0.383669, 0.009289978, 0.0010576686, 0.17045438, -0.007082544, 0.16374406, 0.32419837, 0.15915118, -0.1356684, -0.13062716, -0.15574396)); + target3 += mul(f2, float4x4(-0.19135374, -0.2005694, -0.0020886995, 0.01221146, -0.40867472, -0.081229836, -0.16124476, 0.08071337, 0.09998693, -0.061099563, 0.13301264, -0.043883327, 0.11045742, -0.2383618, 0.18627192, -0.35225677)); + target3 += mul(g2, float4x4(0.37673324, 0.34416032, -0.103333436, 0.13149537, 0.006451586, 0.094312094, 0.08832807, 0.09592083, -0.116452016, 0.1066464, -0.115603626, -0.13193515, -0.13174447, -0.18561727, 0.13020653, -0.13364927)); + target3 += mul(h2, float4x4(-0.1834991, 0.11584597, -0.04156077, -0.12755936, 0.09659385, 0.08903952, 0.31892854, -0.01448324, -0.008864266, -0.039691996, -0.08322731, 0.095220886, -0.090739064, 0.092071235, -0.2817547, -0.29630283)); + target3 += mul(i2, float4x4(-0.24098976, 0.101338394, -0.28956947, 0.07237588, -0.10666849, 0.13332452, -0.20815872, -0.00023775037, -0.04327956, 0.0029107686, -0.008416182, 0.097931474, -0.37501606, -0.018609088, -0.10432809, -0.034832)); + target3 += mul(a3, float4x4(-0.08794669, 0.028736163, -0.17888173, -0.06455644, 0.23870508, -0.23358688, 0.072483465, -0.0085282335, -0.12771352, 0.0380899, -0.25210154, -0.010397481, -0.034966666, 0.08883341, -0.22751594, -0.18751557)); + target3 += mul(b3, float4x4(-0.04474889, 0.098189436, -0.10426362, -0.35184658, 0.043526888, -0.36088315, 0.13278794, 0.39718434, 0.0091220355, 0.0041375947, 0.17093311, 0.21236257, -0.10007804, -0.020010212, 0.111889765, 0.17784196)); + target3 += mul(c3, float4x4(0.1536085, -0.026586162, 0.12273445, -0.0801658, 0.20678692, -0.11288633, -0.21298888, 0.4272659, -0.027916932, 0.13641946, 0.08454202, 0.15072668, -0.36861306, 0.09071778, -0.23418477, 0.3515129)); + target3 += mul(d3, float4x4(-0.035221044, -0.05102627, 0.09558761, -0.008040629, -0.028807933, 0.303477, -0.20610638, -0.044902515, -0.19755092, -0.030813612, -0.17718953, -0.17694841, 0.03633824, 0.13118435, 0.029816214, -0.25406656)); + target3 += mul(e3, float4x4(-0.14495026, 0.12816216, 0.16447757, 0.031679, -0.044026595, 0.19292583, 0.33260253, 0.10592396, 0.035314452, 0.027002327, 0.06259657, 0.20517996, 0.47153056, 0.33924934, 0.20535454, -0.25056842)); + target3 += mul(f3, float4x4(-0.18414135, 0.19427143, -0.081241705, -0.01675198, -0.006523895, -0.07718575, -0.09325943, -0.08737854, -0.028951872, -0.17328268, -0.08241532, -0.08835567, -0.21019256, 0.16430716, 0.06124939, 0.035649084)); + target3 += mul(g3, float4x4(0.016590597, -0.0022054093, -0.1470956, -0.08206723, -0.10903706, 0.0064417794, -0.18528567, -0.34366733, -0.06885517, 0.16456494, -0.018355783, 0.17814603, -0.07158972, -0.28178605, -0.20745122, -0.099933885)); + target3 += mul(h3, float4x4(0.0065370193, -0.3044895, 0.055263646, 0.075996794, 0.024025463, -0.20411102, -0.01592019, -0.18464315, -0.08999649, -0.048265222, -0.08978591, -0.09877855, 0.4285961, -0.32419163, -0.10149259, -0.112745434)); + target3 += mul(i3, float4x4(0.06848249, -0.11997486, 0.05998702, -0.024274347, 0.31168294, -0.017959671, 0.34084293, -0.34249943, 0.11690563, -0.11239628, 0.2128415, 0.017396145, -0.14399542, -0.13887884, 0.041329246, -0.099762276)); + target3 += mul(na1, float4x4(0.11233947, 0.22560616, -0.14853792, 0.0025890833, -0.09261338, 0.026878078, -0.13337374, 0.14549397, 0.105454646, 0.20769532, 0.086635984, 0.05085061, 0.16791524, -0.031017043, 0.043081395, 0.18300867)); + target3 += mul(nb1, float4x4(0.21579266, -0.29286692, -0.05484676, 0.050306555, -0.1338697, 0.173389, -0.31768104, 0.051908243, 0.03826524, 0.48232502, 0.247302, 0.3487058, 0.28926015, 0.09935225, -0.18840632, 0.08882374)); + target3 += mul(nc1, float4x4(-0.13517806, -0.40837446, -0.14227843, -0.27239424, -0.3059563, 0.063552625, 0.2631879, 0.56351787, 0.22826865, -0.065214194, 0.22837348, -0.4106273, -0.05822978, 0.13954113, 0.13192196, 0.031006072)); + target3 += mul(nd1, float4x4(0.14163558, -0.13529845, 0.063847534, 0.066068165, 0.041967303, 0.21868911, 0.22319448, 0.00028246938, 0.07615932, -0.3879002, 0.039115347, -0.08572038, -0.24845092, 0.13100919, 0.253391, 0.22104543)); + target3 += mul(ne1, float4x4(0.15111032, 0.18182786, 0.22756334, -0.008264583, -0.14041592, -0.5454497, -0.5890025, -0.010277932, 0.17194566, -0.14571565, -0.15525545, 0.17403725, 0.09960832, -0.016455285, -0.3584658, 0.3162123)); + target3 += mul(nf1, float4x4(-0.21418694, 0.20062631, -0.0948598, -0.09684068, 0.13923916, -0.09331503, -0.0857385, 0.21380557, 0.020379147, 0.21428087, 0.07388917, 0.089147344, -0.20121545, -0.023731146, 0.3213483, 0.17458193)); + target3 += mul(ng1, float4x4(0.10643249, -0.00972507, 0.022902627, 0.29897237, 0.3299111, 0.085025065, 0.21370107, 0.20150943, -0.30700487, -0.1884155, 0.2567519, 0.00021575678, -0.23836862, 0.35121775, 0.13285181, -0.08186422)); + target3 += mul(nh1, float4x4(0.2257978, -0.044991307, -0.19195051, -0.067948796, -0.119921125, 0.0917341, -0.12133957, -0.0779332, -0.30272362, 0.29493997, -0.1241099, 0.57692826, -0.07721382, 0.23687507, -0.34731215, 0.25507957)); + target3 += mul(ni1, float4x4(0.45565096, -0.15130155, -0.10940018, -0.44904032, -0.15766421, 0.20778829, 0.21851856, 0.1678922, -0.08152111, 0.1772852, -0.2632565, 0.20217079, -0.014408843, 0.021441357, 0.001290681, 0.036566503)); + target3 += mul(na2, float4x4(-0.1125441, 0.019155068, 0.17547919, 0.010120996, -0.13596916, 0.06777857, -0.18246579, -0.07407904, 0.052627467, -0.12339828, 0.09181331, -0.039406203, 0.06598462, -0.20432016, 0.081749015, 0.016382169)); + target3 += mul(nb2, float4x4(-0.036727224, 0.05656203, 0.03722875, -0.19623469, -0.12215404, 0.15154731, 0.14583679, 0.13441144, -0.053393483, 0.3686272, -0.55303735, -0.18231596, -0.077573985, -0.17017044, 0.007368884, 0.06776619)); + target3 += mul(nc2, float4x4(-0.14654772, 0.0642838, 0.21146652, 0.1479552, 0.056684785, 0.08816999, 0.122986056, 0.025765898, 0.03539519, 0.2605786, 0.13069612, 0.2733717, 0.17476462, 0.04350784, -0.11019324, -0.085290305)); + target3 += mul(nd2, float4x4(0.070716664, -0.034127012, -0.17524591, -0.01733187, -0.0786243, -0.27215844, 0.14767595, -0.028927185, -0.268044, 0.17461409, 0.18505631, -0.07551577, -0.17443672, 0.33642206, -0.10371784, -0.09501668)); + target3 += mul(ne2, float4x4(0.41137508, 0.0039632237, -0.46695748, -0.14406478, -0.03359041, -0.26271477, -0.16481699, -0.23792827, -0.004218498, -0.082102485, -0.5725909, -0.19159988, -0.02892404, 0.018914402, 0.12181954, -0.045662787)); + target3 += mul(nf2, float4x4(0.059361387, -0.06857852, 0.29324803, 0.018895477, 0.5373943, -0.27193475, 0.19632731, 0.07846825, 0.46329513, -0.2541983, 0.3154752, 0.034318704, 0.07056785, 0.09071147, 0.018257828, 0.051888023)); + target3 += mul(ng2, float4x4(-0.01574486, 0.025618032, 0.013175459, -0.05611416, 0.1575709, -0.26184332, 0.14741376, -0.09205734, 0.15314968, -0.033463627, 0.27498275, -0.036637545, 0.07326095, -0.02758785, -0.008260478, -0.033497095)); + target3 += mul(nh2, float4x4(-0.043722082, -0.0053890822, -0.006806792, 0.078156926, 0.28195116, -0.19398853, -0.08171706, -0.13839847, -0.2289849, 0.13469638, -0.3921867, -0.12275613, -0.07401059, -0.08434314, 0.080387615, -0.023079267)); + target3 += mul(ni2, float4x4(-0.18200518, 0.15944996, -0.023199113, 0.03136635, 0.0124006495, -0.30606514, -0.008949306, -0.051349495, -0.039797418, 0.06404064, 0.089617595, 0.08849374, 0.07658024, 0.089631304, -0.021066017, 0.033190813)); + target3 += mul(na3, float4x4(-0.00024305849, -0.005267714, -0.110663235, -0.1235196, 0.012671297, 0.09886661, 0.2275749, 0.060427323, -0.08923832, 0.06393884, 0.08300952, 0.1602908, 0.08163518, 0.2060279, 0.079629295, 0.09073638)); + target3 += mul(nb3, float4x4(0.09152832, -0.13788883, -0.0003542848, -0.41267133, 0.06780486, 0.14365548, 0.093432106, -0.14731646, 0.06173505, -0.07698671, 0.13692452, 0.07682172, 0.019543113, -0.07077461, 0.018755287, -0.01956884)); + target3 += mul(nc3, float4x4(-0.025053086, -0.25834808, 0.27484947, 0.0542093, 0.04252427, 0.11526509, -0.12203397, -0.11198108, -0.122830786, 0.01729993, -0.26451054, 0.08480724, 0.27700573, -0.108406745, 0.21402664, -0.19032313)); + target3 += mul(nd3, float4x4(-0.01717388, -0.21526851, -0.100241624, -0.33087742, 0.059813473, 0.11764993, 0.046627797, 0.10824857, -0.033078045, -0.07022686, 0.05082029, -0.18696983, 0.06715326, -0.24360675, -0.028528497, 0.03166471)); + target3 += mul(ne3, float4x4(0.18515922, -0.19291987, -0.18511395, -0.16356386, 0.10254811, -0.14086638, -0.05160376, -0.07413514, -0.17066383, -0.1579425, -0.102077566, -0.0865959, -0.06315448, -0.12120578, -0.24788655, 0.045280393)); + target3 += mul(nf3, float4x4(-0.23578815, 0.17898968, -0.076395154, -0.068680935, -0.19281612, 0.0008040003, -0.13505532, -0.010227741, 0.35332572, 0.10865026, 0.14042157, -0.07081392, -0.16093336, 0.02813831, -0.1142879, 0.07591385)); + target3 += mul(ng3, float4x4(-0.10260084, 0.25031334, 0.030233473, 0.18198064, 0.06648322, -0.06662831, 0.29892904, 0.10685262, -0.099214576, 0.20269664, 0.14236231, -0.26043925, 0.17721854, 0.105084695, 0.11604949, 0.07190215)); + target3 += mul(nh3, float4x4(-0.006627316, -0.075635955, 0.37108654, -0.0919607, -0.12985592, 0.112184614, -0.16726716, -0.03870482, 0.01480095, 0.2044233, -0.051372997, 0.21520257, -0.08743421, 0.13153918, -0.15077852, 0.12159706)); + target3 += mul(ni3, float4x4(0.009482551, 0.12501961, 0.38921976, -0.1280031, -0.103060484, -0.027821409, 0.0720024, -0.027280543, 0.056729473, 0.048927493, -0.035154913, -0.08341783, 0.23103711, 0.046025522, 0.17039533, -0.014161812)); + target3 += float4(-0.09157235, -0.025660357, 0.076311104, -0.13737188); + + float3 result = tex7.SampleLevel(sam, pos, 0).rgb; + result += mul(e1, float4x3(0.012708346, 0.014336439, 0.012533707, -0.0019346073, -0.0070978077, -0.009478742, -0.011659758, -0.009855903, -0.008657096, 0.0098037105, 0.010785594, 0.008409619)); + result += mul(e2, float4x3(0.0056228717, 0.013483413, 0.008108323, -0.0013697809, 0.0026797573, 0.0037666177, 0.0130932415, 0.019868238, 0.01968549, 0.011160769, 0.012374028, 0.012855804)); + result += mul(e3, float4x3(0.0011662204, 0.00025071716, 0.0022244148, -0.017808594, -0.013589306, -0.01396329, -0.008117086, -0.0068251803, -0.004963602, -0.0069141523, -0.009125296, -0.008327947)); + result += mul(ne1, float4x3(-0.027597412, -0.02631107, -0.022816146, 0.009350171, 0.013661565, 0.015324706, 0.032538984, 0.02918167, 0.026186563, 0.018760988, 0.024502547, 0.023201061)); + result += mul(ne2, float4x3(0.013216693, 0.00991115, 0.01178417, 0.0076343333, 0.004714098, 0.0074490295, -0.0064893183, -0.014818341, -0.01199717, -0.008334491, -0.009955103, -0.011240684)); + result += mul(ne3, float4x3(-0.013846397, -0.012687341, -0.015767701, -0.0019117722, -0.0072347773, -0.0074835457, 0.013531867, 0.014263165, 0.012797156, 0.008260445, 0.0070536416, 0.0065693366)); + result += mul(max(target1, 0), float4x3(0.0017003485, 0.0021871394, 0.0003407296, 0.0054420815, 0.00801073, 0.008788295, -0.012685104, -0.0150940735, -0.017530257, -0.030698642, -0.030817484, -0.028548386)); + result += mul(max(target2, 0), float4x3(-0.008882145, -0.008943836, -0.007986094, -0.010494911, -0.011511255, -0.00892924, 0.014072905, 0.014985031, 0.011853883, -0.015823284, -0.017817877, -0.01684662)); + result += mul(max(target3, 0), float4x3(0.012270136, 0.011127063, 0.010729208, 0.00027298275, 0.001011805, 0.001318525, 0.0029811305, 0.0029161042, 0.0060088155, 0.00021241597, -0.0013439909, 0.0013205905)); + result += mul(max(-target1, 0), float4x3(-0.03467924, -0.035764243, -0.03348244, 0.023858175, 0.02580526, 0.026217844, -0.016814101, -0.016412167, -0.012021982, -0.0007905926, -0.0019904284, -0.0015143935)); + result += mul(max(-target2, 0), float4x3(0.046779703, 0.04961137, 0.046104047, -0.023665644, -0.022809561, -0.02236428, -0.054706786, -0.056090504, -0.052543454, -0.015520943, -0.01587306, -0.0142722875)); + result += mul(max(-target3, 0), float4x3(0.020273875, 0.020399818, 0.021745082, 0.037485637, 0.039574977, 0.03556703, 0.036673885, 0.04102765, 0.033708427, 0.024422405, 0.027724478, 0.0252598)); + result += float3(-0.0036656514, 0.006677459, 0.007698717); + + result += INPUT.SampleLevel(sam, pos, 0).rgb; + WriteToOutput(gxy, result.rgb); +} diff --git a/src/Effects/Anime4K/Anime4K_Restore_Soft_VL.hlsl b/src/Effects/Anime4K/Anime4K_Restore_Soft_VL.hlsl new file mode 100644 index 000000000..113453ced --- /dev/null +++ b/src/Effects/Anime4K/Anime4K_Restore_Soft_VL.hlsl @@ -0,0 +1,1293 @@ +// Anime4K_Restore_CNN_Soft_VL +// Ported from https://github.com/bloc97/Anime4K/blob/4ba94b179a144200cb6b3052e690fe2ca5c6914c/glsl/Restore/Anime4K_Restore_CNN_Soft_VL.glsl + +//!MAGPIE EFFECT +//!VERSION 3 +//!OUTPUT_WIDTH INPUT_WIDTH +//!OUTPUT_HEIGHT INPUT_HEIGHT +//!SORT_NAME Anime4K_Restore_Soft_2 + + +//!TEXTURE +Texture2D INPUT; + +//!SAMPLER +//!FILTER POINT +SamplerState sam; + +//!TEXTURE +//!WIDTH INPUT_WIDTH +//!HEIGHT INPUT_HEIGHT +//!FORMAT R16G16B16A16_FLOAT +Texture2D tex1; + +//!TEXTURE +//!WIDTH INPUT_WIDTH +//!HEIGHT INPUT_HEIGHT +//!FORMAT R16G16B16A16_FLOAT +Texture2D tex2; + +//!TEXTURE +//!WIDTH INPUT_WIDTH +//!HEIGHT INPUT_HEIGHT +//!FORMAT R16G16B16A16_FLOAT +Texture2D tex3; + +//!TEXTURE +//!WIDTH INPUT_WIDTH +//!HEIGHT INPUT_HEIGHT +//!FORMAT R16G16B16A16_FLOAT +Texture2D tex4; + +//!TEXTURE +//!WIDTH INPUT_WIDTH +//!HEIGHT INPUT_HEIGHT +//!FORMAT R16G16B16A16_FLOAT +Texture2D tex5; + +//!TEXTURE +//!WIDTH INPUT_WIDTH +//!HEIGHT INPUT_HEIGHT +//!FORMAT R16G16B16A16_FLOAT +Texture2D tex6; + +//!PASS 1 +//!DESC Conv-4x3x3x3 +//!IN INPUT +//!OUT tex1, tex2 +//!BLOCK_SIZE 16 +//!NUM_THREADS 64 + +void Pass1(uint2 blockStart, uint3 threadId) { + uint2 gxy = (Rmp8x8(threadId.x) << 1) + blockStart; + uint2 inputSize = GetInputSize(); + if (gxy.x >= inputSize.x || gxy.y >= inputSize.y) { + return; + } + + float2 inputPt = GetInputPt(); + + uint i, j; + + float3 src[4][4]; + [unroll] + for (i = 0; i <= 2; i += 2) { + [unroll] + for (j = 0; j <= 2; j += 2) { + float2 tpos = (gxy + uint2(i, j)) * inputPt; + const float4 sr = INPUT.GatherRed(sam, tpos); + const float4 sg = INPUT.GatherGreen(sam, tpos); + const float4 sb = INPUT.GatherBlue(sam, tpos); + + // w z + // x y + src[i][j] = float3(sr.w, sg.w, sb.w); + src[i][j + 1] = float3(sr.x, sg.x, sb.x); + src[i + 1][j] = float3(sr.z, sg.z, sb.z); + src[i + 1][j + 1] = float3(sr.y, sg.y, sb.y); + } + } + + [unroll] + for (i = 1; i <= 2; ++i) { + [unroll] + for (j = 1; j <= 2; ++j) { + uint2 destPos = gxy + uint2(i - 1, j - 1); + + if (i != 1 || j != 1) { + if (destPos.x >= inputSize.x || destPos.y >= inputSize.y) { + continue; + } + } + + float4 target1 = mul(src[i - 1][j - 1], float3x4(0.14361712, -0.16690509, 0.37253398, -0.45202538, -0.21331833, -0.32675815, -0.33971128, 0.20261937, -0.20606318, -0.215143, -0.079716705, 0.15640882)); + target1 += mul(src[i - 1][j], float3x4(-0.17360486, -0.3435545, 0.08199117, 0.56259036, -0.120246716, 0.24312893, -0.021436244, -0.11864853, 0.19452724, 0.106943935, -0.077393375, -0.3503661)); + target1 += mul(src[i - 1][j + 1], float3x4(-0.072465785, 0.2772823, 0.25493625, 0.3098145, -0.115831695, 0.072458096, -0.014782132, -0.15310249, 0.12178311, -0.015555423, -0.2229811, 0.16469522)); + target1 += mul(src[i][j - 1], float3x4(-0.18652022, -0.30702665, -0.59921896, 0.079824045, 0.4426619, 0.049343713, 0.44902903, -0.2711445, 0.20470268, -0.029203767, 0.29092675, 0.15562426)); + target1 += mul(src[i][j], float3x4(-0.21041247, 0.48450592, -0.110547826, 0.3842122, 0.5303875, -0.26512837, 0.19846216, 0.045673862, 0.12773214, -0.05117536, -0.03510946, -0.30123934)); + target1 += mul(src[i][j + 1], float3x4(0.3186735, 0.052702922, -0.12499774, 0.055628903, -0.16476867, 0.12642322, -0.18314636, 0.018323101, -0.3609603, 0.25649396, 0.3185421, -0.0057759956)); + target1 += mul(src[i + 1][j - 1], float3x4(0.16603558, -0.09259665, -0.28760567, -0.14319661, 0.12511417, -0.12551902, -0.00070228375, 0.20914114, -0.22466865, 0.1064727, 0.32598525, -0.08596318)); + target1 += mul(src[i + 1][j], float3x4(-0.03163653, 0.026722813, -0.4361858, -0.21164834, 0.4176763, 0.08203146, 0.35289326, -0.06128859, 0.20506798, -0.07098943, 0.1807802, 0.2658414)); + target1 += mul(src[i + 1][j + 1], float3x4(-0.09821681, 0.058886815, 0.39192092, -0.06791861, -0.15682612, 0.09503328, -0.23400265, 0.026475023, -0.08800713, -0.043749645, -0.18024494, -0.08045564)); + target1 += float4(-0.040999945, 0.075765304, -0.0911532, -0.10705836); + + float4 target2 = mul(src[i - 1][j - 1], float3x4(-0.16406488, -0.2506693, -0.15592022, -0.05529256, -0.3997277, -0.229681, -0.07762124, 0.1843808, 0.07895815, 0.14437248, 0.219114, -0.048090722)); + target2 += mul(src[i - 1][j], float3x4(-0.2150676, 0.09080163, 0.19598733, -0.40578827, -0.33846557, -0.02518622, 0.037079208, 0.20188439, -0.013777575, -0.2369007, -0.30985412, 0.0411912)); + target2 += mul(src[i - 1][j + 1], float3x4(0.119948365, 0.23014452, -0.14962277, -0.096262485, 0.09625151, 0.2025487, 0.03933539, 0.12268028, -0.24373281, 0.19730613, 0.11634144, 0.12293635)); + target2 += mul(src[i][j - 1], float3x4(0.08030697, -0.40114692, 0.21532272, 0.20222071, 0.073098, -0.004463858, 0.02820587, -0.18861918, -0.20994501, -0.12444653, -0.23178193, -0.13965288)); + target2 += mul(src[i][j], float3x4(0.14150894, 0.14563078, 0.697704, 0.20918849, 0.26776335, -0.34291518, 0.06394055, 0.17925078, 0.4165139, -0.042595536, 0.105312675, 0.231854)); + target2 += mul(src[i][j + 1], float3x4(0.024318576, 0.16668217, 0.0729521, -0.7071404, 0.3121693, 0.37295797, -0.015632952, 0.33763757, 0.00706697, 0.10836652, -0.11132417, 0.292844)); + target2 += mul(src[i + 1][j - 1], float3x4(-0.14489831, 0.0027769986, -0.24509215, 0.5557927, -0.1104541, 0.005070684, -0.020032275, -0.5642205, 0.16048644, 0.07248175, 0.20387374, -0.38145426)); + target2 += mul(src[i + 1][j], float3x4(0.33140838, -0.007438425, 0.26074782, 0.15947102, 0.219755, -0.14690271, -0.07412696, -0.24176367, -0.2230114, 0.027256912, -0.11255796, -0.05882673)); + target2 += mul(src[i + 1][j + 1], float3x4(-0.19712369, 0.003842208, -0.10893768, 0.09047115, -0.10260409, 0.18662766, 0.009733428, 0.0039940844, -0.006444674, -0.15196493, 0.06641555, -0.06169452)); + target2 += float4(-0.029148052, -0.03215124, -0.6175828, 0.057135154); + + tex1[destPos] = target1; + tex2[destPos] = target2; + } + } +} + +//!PASS 2 +//!DESC Conv-4x3x3x16 +//!IN tex1, tex2 +//!OUT tex3, tex4 +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 + +void Pass2(uint2 blockStart, uint3 threadId) { + uint2 gxy = Rmp8x8(threadId.x) + blockStart; + uint2 inputSize = GetInputSize(); + if (gxy.x >= inputSize.x || gxy.y >= inputSize.y) { + return; + } + + float2 inputPt = GetInputPt(); + float2 pos = (gxy + 0.5f) * inputPt; + + // [ a, d, g ] + // [ b, e, h ] + // [ c, f, i ] + float4 a1 = tex1.SampleLevel(sam, pos + float2(-inputPt.x, -inputPt.y), 0); + float4 b1 = tex1.SampleLevel(sam, pos + float2(-inputPt.x, 0), 0); + float4 c1 = tex1.SampleLevel(sam, pos + float2(-inputPt.x, inputPt.y), 0); + float4 d1 = tex1.SampleLevel(sam, pos + float2(0, -inputPt.y), 0); + float4 e1 = tex1.SampleLevel(sam, pos, 0); + float4 f1 = tex1.SampleLevel(sam, pos + float2(0, inputPt.y), 0); + float4 g1 = tex1.SampleLevel(sam, pos + float2(inputPt.x, -inputPt.y), 0); + float4 h1 = tex1.SampleLevel(sam, pos + float2(inputPt.x, 0), 0); + float4 i1 = tex1.SampleLevel(sam, pos + float2(inputPt.x, inputPt.y), 0); + + float4 na1 = max(-a1, 0); + float4 nb1 = max(-b1, 0); + float4 nc1 = max(-c1, 0); + float4 nd1 = max(-d1, 0); + float4 ne1 = max(-e1, 0); + float4 nf1 = max(-f1, 0); + float4 ng1 = max(-g1, 0); + float4 nh1 = max(-h1, 0); + float4 ni1 = max(-i1, 0); + + a1 = max(a1, 0); + b1 = max(b1, 0); + c1 = max(c1, 0); + d1 = max(d1, 0); + e1 = max(e1, 0); + f1 = max(f1, 0); + g1 = max(g1, 0); + h1 = max(h1, 0); + i1 = max(i1, 0); + + float4 a2 = tex2.SampleLevel(sam, pos + float2(-inputPt.x, -inputPt.y), 0); + float4 b2 = tex2.SampleLevel(sam, pos + float2(-inputPt.x, 0), 0); + float4 c2 = tex2.SampleLevel(sam, pos + float2(-inputPt.x, inputPt.y), 0); + float4 d2 = tex2.SampleLevel(sam, pos + float2(0, -inputPt.y), 0); + float4 e2 = tex2.SampleLevel(sam, pos, 0); + float4 f2 = tex2.SampleLevel(sam, pos + float2(0, inputPt.y), 0); + float4 g2 = tex2.SampleLevel(sam, pos + float2(inputPt.x, -inputPt.y), 0); + float4 h2 = tex2.SampleLevel(sam, pos + float2(inputPt.x, 0), 0); + float4 i2 = tex2.SampleLevel(sam, pos + float2(inputPt.x, inputPt.y), 0); + + float4 na2 = max(-a2, 0); + float4 nb2 = max(-b2, 0); + float4 nc2 = max(-c2, 0); + float4 nd2 = max(-d2, 0); + float4 ne2 = max(-e2, 0); + float4 nf2 = max(-f2, 0); + float4 ng2 = max(-g2, 0); + float4 nh2 = max(-h2, 0); + float4 ni2 = max(-i2, 0); + + a2 = max(a2, 0); + b2 = max(b2, 0); + c2 = max(c2, 0); + d2 = max(d2, 0); + e2 = max(e2, 0); + f2 = max(f2, 0); + g2 = max(g2, 0); + h2 = max(h2, 0); + i2 = max(i2, 0); + + float4 target1 = mul(a1, float4x4(0.05195995, 0.15220909, -0.24354807, -0.109075695, 0.020483498, -0.14830725, 0.0018816335, -0.0072673927, 0.0649385, 0.046050787, -0.10789607, -0.046609525, -0.11455093, -0.009358115, 0.11280759, 0.18053898)); + target1 += mul(b1, float4x4(-0.08619698, 0.091353096, -0.16379662, 0.07822936, 0.072919995, 0.1482446, 0.17846228, 0.04639898, -0.18998149, 0.1653338, -0.44187957, -0.010017503, -0.069953404, 0.08784785, -0.16391355, 0.35095468)); + target1 += mul(c1, float4x4(0.088297926, 0.27259287, 0.013088447, 0.023461785, 0.10037149, -0.017414214, 0.08559885, -0.10822335, 0.10591637, 0.17240539, 0.15749931, 0.026641782, 0.11889612, -0.018095117, -0.08736018, 0.09934933)); + target1 += mul(d1, float4x4(0.21426749, 0.0800268, -0.19816414, 0.07693414, 0.026270509, -0.11724047, 0.026078718, 0.13080709, 0.12207936, 0.056103867, -0.323923, -0.111454345, 0.059245165, 0.07257926, 0.032195322, 0.27225617)); + target1 += mul(e1, float4x4(-0.20130268, 0.026809234, -0.0020803472, -0.04394057, -0.1982125, -0.033678252, -0.12881789, 0.0025656687, 0.14193355, -0.2541802, -0.13239717, -0.05983356, -0.029376393, 0.33187667, 0.14438996, -0.21993925)); + target1 += mul(f1, float4x4(-0.12772562, 0.022498213, 0.24753313, 0.07440761, -0.17564529, 0.09971503, -0.013372, 0.09459552, -0.21597451, -0.40116546, 0.23446435, 0.1515452, 0.050813515, 0.19662157, -0.10604596, -0.24638489)); + target1 += mul(g1, float4x4(0.23866327, -0.2706382, -0.07480157, 0.03789501, 0.117716484, -0.095995456, -0.0435066, 0.013025061, -0.029759895, -0.036287807, 0.08570493, 0.030151363, 0.18863682, -0.27228612, 0.020479294, -0.07058746)); + target1 += mul(h1, float4x4(0.0026758043, -0.20750894, 0.2802277, -0.07761428, -0.012089615, -0.112726666, -0.03867965, -0.085082226, 0.034227375, 0.19662802, 0.26272395, 0.036822405, -0.23040786, -0.20173554, 0.07110236, -0.26939383)); + target1 += mul(i1, float4x4(-0.14012688, -0.067249745, 0.14726773, -0.0070919944, 0.19275497, 0.04460783, 0.18776374, -0.019941995, -0.076159865, 0.002261728, 0.238768, 0.039375026, 0.13200568, -0.023286859, 0.034387972, -0.01827453)); + target1 += mul(a2, float4x4(-0.0107542025, -0.13001555, 0.06596806, -0.03370635, -0.024291076, 0.10367739, -0.03396605, 0.041960735, 0.16230568, 0.024845246, -0.016806586, 0.22547007, -0.025378102, 0.064547986, -0.2113137, 0.042272836)); + target1 += mul(b2, float4x4(-0.2219356, -0.049535394, 0.10289468, 0.14175911, 0.013058568, -0.15909089, -0.02546921, 0.11721571, 0.13020545, 0.39660174, -0.07601573, -0.16366366, -0.023935124, 0.06681424, -0.26143414, -0.07485668)); + target1 += mul(c2, float4x4(0.1405031, -0.0645044, -0.15865614, 0.1829069, -0.22526503, 0.08991175, 0.041972812, -0.012462953, 0.3022753, 0.19457603, 0.022607598, -0.25460255, 0.028327515, 0.14420614, -0.077984214, 0.09278112)); + target1 += mul(d2, float4x4(0.13224132, 0.13115089, -0.188987, -0.19428022, -0.080641985, 0.20909777, 0.067079, -0.19832124, 0.13150498, 0.04450851, -0.2770351, -0.010381239, 0.32295567, 0.04445836, 0.030657565, 0.020271502)); + target1 += mul(e2, float4x4(-0.08188993, 0.039709873, 0.16059989, -0.13279189, 0.11389818, -0.071865685, 0.09312801, -0.08816363, -0.65844774, -0.6854379, 0.21431407, 0.597198, -0.3734657, -0.116027676, 0.015932929, -0.0653176)); + target1 += mul(f2, float4x4(0.24136105, 0.21444799, -0.14235207, 0.08445492, 0.017335927, -0.49877876, -0.06224622, 0.1571534, 0.035594277, 0.059829034, 0.087631516, -0.17090686, -0.005452869, 0.13786094, 0.27586326, 0.046760406)); + target1 += mul(g2, float4x4(0.095078, 0.30310658, 0.010268592, 0.18540539, -0.20722823, -0.0005848572, -0.06464327, -0.111019135, -0.07837157, -0.12183798, -0.09187498, -0.3368629, -0.08216629, -0.20095807, 0.009563313, 0.024838416)); + target1 += mul(h2, float4x4(0.28712475, 0.0641969, -0.034764312, 0.13600683, -0.09211094, 0.009699817, -0.001104855, -0.026146285, 0.33425868, -0.16132632, 0.18051304, -0.104004376, 0.20768233, 0.0888418, 0.050057285, -0.020228952)); + target1 += mul(i2, float4x4(0.11642946, -0.021900529, -0.08910504, 0.15492517, -0.19726521, 0.1434987, -0.24708387, 0.006737377, 0.11353539, -0.15897587, -0.029491093, 0.06002862, -0.09640613, -0.11342702, 0.21375169, 0.0062719737)); + target1 += mul(na1, float4x4(-0.15513068, -0.3151456, 0.20799752, -0.07449935, -0.09226967, 0.112302735, -0.16211908, -0.37986508, -0.27418482, -0.10445544, 0.21112369, -0.06780466, -0.062341, 0.07758948, -0.012719117, -0.16481343)); + target1 += mul(nb1, float4x4(0.16382848, 0.14490448, -0.012869055, 0.1804095, -0.05304844, -0.14624795, -0.14816979, -0.17435774, 0.25356865, 0.11435022, 0.19412366, 0.19499794, -0.10189348, 0.023880519, 0.16822465, -0.17454338)); + target1 += mul(nc1, float4x4(0.04854064, 0.11944563, 0.022984248, -0.0852543, -0.0077684796, -0.044182744, -0.02888099, 0.27452356, -0.07887827, -0.15155658, -0.12841311, -0.21202831, -0.18533322, -0.05852455, 0.0761054, -0.22115342)); + target1 += mul(nd1, float4x4(-0.21520375, 0.11415518, 0.18909843, -0.16420493, -0.20909967, -0.3257246, 0.29332343, -0.029541709, -0.1679851, 0.14073059, 0.32720464, 0.13311239, -0.0021121972, -0.08773544, -0.045532625, 0.36960867)); + target1 += mul(ne1, float4x4(0.58407414, -0.23632582, -0.16739567, 0.264173, 0.09584864, 0.18455075, 0.20051196, -0.04616608, 0.13441175, -0.0055764276, -0.08625195, 0.097847305, 0.19565724, -0.12183587, -0.11488796, 0.2520169)); + target1 += mul(nf1, float4x4(0.01584208, -0.31471413, 0.017104283, 0.0682452, 0.18728764, 0.042960413, 0.06437809, -0.14483811, 0.13882554, 0.016576322, -0.029599546, 0.034904055, -0.20939542, -0.10213055, 0.08821727, 0.0030552552)); + target1 += mul(ng1, float4x4(-0.2973797, 0.15791039, 0.10811437, -0.07947077, -0.26328024, -0.061920475, 0.12498813, 0.100570425, -0.018922925, 0.002256239, -0.094379805, -0.032315314, 0.48677605, -0.04879864, 0.028028104, -0.14557233)); + target1 += mul(nh1, float4x4(0.016148027, 0.13884154, -0.19554809, -0.006344376, -0.013450252, 0.2581758, 0.10643088, 0.23465036, -0.078438915, -0.099644944, -0.1442203, -0.2285087, 0.33528957, -0.17052084, -0.26595074, 0.14794162)); + target1 += mul(ni1, float4x4(0.041404825, -0.0813985, -0.19863169, -0.008302881, 0.023570588, -0.043578386, -0.20971186, 0.14654282, 0.048436746, 0.11266723, -0.25812748, -0.03340969, -0.18430679, -0.046258014, -0.007674466, -0.037139155)); + target1 += mul(na2, float4x4(-0.060693484, -0.08285047, 0.06638212, 0.18479855, 0.11099276, -0.14470962, 0.16915078, 0.32247669, -0.10845523, 0.0027510398, -0.014941873, -0.15779859, 0.051481526, -0.14748912, 0.12125527, -0.059839584)); + target1 += mul(nb2, float4x4(0.27571446, 0.01663349, -0.057985745, -0.089736536, -0.09541078, 0.18101417, 0.084854685, 0.11060913, 0.05631825, 0.066835634, -0.02837782, -0.049748126, -0.050051138, -0.05126488, 0.27121767, 0.06331115)); + target1 += mul(nc2, float4x4(-0.13630085, -0.03787764, 0.13351586, -0.024081819, 0.10403757, -0.0034796793, -0.04838045, -0.064052396, -0.34672704, -0.06271465, -0.024577484, -0.13450806, -0.013759927, 0.11706738, 0.07913658, -0.016639082)); + target1 += mul(nd2, float4x4(-0.023730129, 0.020174952, 0.048988737, -0.013395666, 0.0073305597, 0.059409764, -0.27721968, 0.13349204, -0.022947624, 0.112007216, -0.008175606, -0.14903043, -0.35755506, -0.02145208, -0.021762518, -0.17889674)); + target1 += mul(ne2, float4x4(0.19315337, 0.16287236, -0.07667863, -0.020898499, -0.021058874, -0.20849414, -0.3571716, -0.13001479, 0.44977963, 0.016706442, -0.03471178, 0.35189477, 0.3050666, -0.019236205, 0.16278796, 0.3093703)); + target1 += mul(nf2, float4x4(-0.1507458, -0.13747548, -0.05822537, 0.16035356, -0.08386089, -0.03476887, -0.0022021863, -0.032772254, 0.17572841, 0.004200287, 0.045312192, 0.27265742, -0.037853006, -0.056344658, -0.3095155, 0.15215549)); + target1 += mul(ng2, float4x4(0.11428048, -0.19523771, 0.016499955, -0.03625986, 0.15670861, -0.077859454, -0.059640404, 0.023970904, -0.009806148, 0.0904747, -0.006978744, 0.15938658, 0.030886533, 0.13507655, -0.002613293, -0.1335748)); + target1 += mul(nh2, float4x4(-0.20070468, 0.06281564, -0.026250493, -0.042895693, -0.06574456, 0.10412931, 0.12061968, -0.0750467, -0.10865931, -0.05715226, -0.022071969, 0.02608941, -0.21416737, -0.18582128, -0.091236554, -0.044943426)); + target1 += mul(ni2, float4x4(-0.057988428, 0.21430638, -0.17991407, -0.051662743, 0.060244065, -0.021494022, -0.018070806, -0.09278776, -0.011404125, 0.064091586, 0.12852973, -0.16610947, 0.08740408, 0.045517463, -0.27932477, 0.11050971)); + target1 += float4(0.012687187, -0.11876551, -0.041985378, -0.10110911); + + float4 target2 = mul(a1, float4x4(-0.07579397, 0.008718031, 0.03874428, -0.022123579, 0.064964466, -0.27502275, -0.0053009577, 0.11669645, 0.007708085, 0.009340055, -0.13001843, -0.03758108, -0.07045759, -0.08749642, -0.21329811, 0.13205966)); + target2 += mul(b1, float4x4(-0.14087188, -0.12068241, 0.046639618, 0.05115712, 0.108357444, -0.05040456, 0.03280633, 0.09336891, -0.055509757, -0.036777936, 0.043575723, -0.041975956, -0.17782387, -0.12977566, -0.0736514, 0.17304243)); + target2 += mul(c1, float4x4(-0.2638534, 0.0385968, 0.14743716, 0.18057759, -0.036564615, 0.107838504, 0.08324167, 0.13403444, -0.41366392, 0.072824344, -0.013165103, 0.06114856, -0.040475495, -0.14222278, 0.10455181, 0.0021660402)); + target2 += mul(d1, float4x4(0.30221993, -0.06315301, 0.057081617, -0.020285107, 0.053984016, 0.13086873, -0.30863532, 0.028010197, 0.0070908144, 0.19940577, -0.013766302, -0.039389495, 0.28064504, 0.05970737, 0.074613005, -0.10217121)); + target2 += mul(e1, float4x4(0.042094592, -0.1725651, 0.3514404, 0.008126955, 0.08739713, 0.081543595, -0.12912413, 0.0854203, 0.28885832, 0.107783586, 0.22996111, 0.13907135, 0.071920335, -0.15172984, 0.07151959, 0.1406894)); + target2 += mul(f1, float4x4(-0.1072496, 0.03934067, 0.20014063, 0.051399443, -0.29610988, 0.18659018, -0.17761967, 0.08701774, -0.17493258, -0.08035252, 0.03155133, -0.13986085, 0.023490375, 0.083998375, 0.014006612, 0.03860323)); + target2 += mul(g1, float4x4(0.09324427, 0.10990628, -0.18758917, 0.0054821614, -0.09425237, 0.1192338, -0.063183226, -0.15047066, 0.15664004, 0.037881903, -0.06762073, 0.09622682, 0.028449943, -0.25338468, -0.18897526, -0.18360007)); + target2 += mul(h1, float4x4(0.030310342, 0.2083269, -0.04938559, -0.009608655, 0.019751158, 0.12257741, 0.090964966, -0.09864261, 0.058817703, -0.053385522, 0.15931179, -0.10585003, 0.06986225, 0.3435001, -0.33307528, -0.14035752)); + target2 += mul(i1, float4x4(0.13506691, -0.00015406386, -0.15279713, -0.2290177, 0.019568326, 0.41041428, 0.10566904, -0.08350839, 0.19839814, -0.31052053, -0.04471875, 0.07629561, -0.117245845, 0.19819061, 0.1683647, 0.11896638)); + target2 += mul(a2, float4x4(0.06920538, 0.2656798, -0.06529862, -0.1695985, -0.21614018, 0.17208195, 0.123307765, -0.061470803, 0.07827313, -0.18543327, 0.0937214, 0.098630935, -0.17667519, -0.01978596, -0.09126346, -0.034487445)); + target2 += mul(b2, float4x4(0.030779282, -0.24423946, -0.08623178, 0.1490136, 0.029337894, 0.17548573, -0.05990294, -0.29123273, -0.10020608, -0.3527181, -0.105286725, 0.27502912, -0.25686985, 0.18521136, -0.110095225, -0.07999611)); + target2 += mul(c2, float4x4(-0.03266192, 0.045139533, -0.03275437, -0.13748369, 0.15633966, 0.089048125, -0.07592367, -0.09013536, -0.18907873, 0.08265683, -0.069233745, 0.27151683, -0.0647864, -0.15308899, 0.021954, 0.05528693)); + target2 += mul(d2, float4x4(0.10284642, -0.14667438, 0.18669777, 0.053000864, -0.12383836, -0.037600834, 0.29438737, 0.04739594, 0.07846367, -0.11676573, -0.048153553, -0.34298027, 0.028358897, 0.119508564, 0.08012271, -0.019992562)); + target2 += mul(e2, float4x4(-0.22123314, -0.2223458, 0.002969434, -0.07143056, 0.027859585, 0.010600199, 0.056626067, 0.15160584, -0.16350581, -0.044484995, -0.1805076, 0.33351076, 0.073631234, 0.0167081, 0.15704727, 0.107799366)); + target2 += mul(f2, float4x4(-0.006882137, 0.19744347, 0.041128602, 0.17459555, 0.10376277, -0.12519689, 0.0993647, -0.13044195, 0.10485074, 0.1712284, 0.13369127, 0.24649777, -0.038975652, -0.24550107, 0.19567624, -0.09961197)); + target2 += mul(g2, float4x4(0.24763626, -0.0902329, 0.21201743, 0.078442305, 0.013261817, -0.019013328, -0.07576136, 0.14993069, -0.24216306, -0.05666454, -0.064632, -0.38150248, 0.14649945, -0.020437164, -0.13821694, -0.026110074)); + target2 += mul(h2, float4x4(0.21790951, -0.08288076, 0.011415891, -0.1446542, -0.15910968, -0.21221179, -0.06154624, -0.028623452, 0.10872824, 0.17089185, 0.26339474, -0.42544034, 0.095593184, 0.20962211, 0.0034138034, 0.024243662)); + target2 += mul(i2, float4x4(-0.050784085, 0.06333505, 0.041011192, 0.17474842, 0.14517011, -0.4340653, -0.10313813, 0.12524489, 0.18353751, 0.4589042, -0.037463415, 0.07841999, -0.114173576, -0.10669665, 0.029463472, -0.14393249)); + target2 += mul(na1, float4x4(0.12771326, -0.06622126, 0.08327681, -0.15113758, -0.114005744, 0.059280578, 0.04071302, -0.11074485, -0.23312584, -0.032968838, 0.13736604, -0.15776984, 0.067029156, 0.0580463, 0.20655325, -0.2112593)); + target2 += mul(nb1, float4x4(0.16148107, 0.02879793, -0.24918973, 0.009605728, -0.102177374, 0.050518002, -0.00015101423, -0.046602443, 0.5081422, -0.044740383, -0.06243097, 0.076031074, 0.1157983, 0.03965003, 0.109161526, -0.36589798)); + target2 += mul(nc1, float4x4(-0.018941574, 0.000912917, -0.2585099, 0.13668273, 0.062664494, -0.09246434, -0.14594543, -0.11160076, 0.015663203, -0.02447256, -0.070794076, 0.11807077, 0.12931514, 0.14109722, -0.07506544, -0.012781477)); + target2 += mul(nd1, float4x4(-0.48816162, 0.16294348, 0.011336221, 0.107038386, -0.01978858, 0.039453425, 0.112853855, 0.007536018, -0.005471479, -0.11315905, 0.032013394, 0.11523904, -0.2504089, 0.04803124, -0.09689627, 0.24372064)); + target2 += mul(ne1, float4x4(0.61343086, 0.09531598, -0.24803302, 0.23788263, 0.13495958, 0.24733612, 0.1575427, -0.06863399, 0.2341275, -0.15821049, -0.165848, 0.0290172, -0.010136783, 0.04415787, -0.2619951, 0.09987892)); + target2 += mul(nf1, float4x4(0.19411229, 0.24528526, -0.250216, -0.33602244, 0.17639299, -0.052413136, 0.122578874, 0.028618507, 0.25713214, 0.22033587, -0.19680484, 0.028938502, -0.083384775, -0.06476429, 0.036840588, -0.14297847)); + target2 += mul(ng1, float4x4(-0.2897587, -0.12176407, 0.19259763, -0.106649496, -0.026704982, -0.036201328, -0.06753124, 0.37967134, -0.20092241, 0.006229027, 0.12085137, -0.09810282, -0.1501556, -0.0099991355, 0.25044358, 0.08538966)); + target2 += mul(nh1, float4x4(-0.11304407, -0.24147832, 0.21644448, -0.035938095, -0.036439262, -0.042730987, -0.04384442, 0.10325233, -0.32405272, -0.11873838, -0.15075137, -0.036929503, -0.10808143, 0.25799102, 0.13749036, 0.5451476)); + target2 += mul(ni1, float4x4(-0.24142508, -0.04895773, 0.09022442, 0.2821465, -0.06298706, -0.1807906, 0.02960867, 0.22310257, -0.1915311, 0.2900501, 0.1670845, -0.080343634, 0.25779882, -0.27144584, -0.23575482, -0.14724477)); + target2 += mul(na2, float4x4(0.020742219, -0.10571064, -0.0010137435, 0.14439318, 0.32805952, -0.027505733, -0.07111945, 0.07043296, -0.09525604, 0.03175366, -0.14633068, -0.15810682, 0.18050082, 0.08191363, 0.07047039, 0.0018573351)); + target2 += mul(nb2, float4x4(-0.023874652, 0.14996628, 0.11298528, -0.1508891, -0.052415725, -0.02570088, 0.0055150646, 0.16365297, -0.046594325, 0.18095094, 0.09934885, -0.066233225, 0.2404304, -0.112728044, 0.14004207, 0.11369578)); + target2 += mul(nc2, float4x4(0.14799033, 0.025304591, 0.031008242, 0.03795376, -0.15800071, -0.043169834, 0.10797239, 0.17129694, 0.09674189, -0.11010672, 0.07283912, -0.11063907, 0.108249694, 0.025199141, 0.09162024, -0.1827302)); + target2 += mul(nd2, float4x4(-0.08983324, 0.07823903, -0.137839, 0.11909572, 0.11996334, -0.05947995, -0.25459376, -0.18159851, 0.044489045, 0.052461334, 0.13674203, 0.12579007, -0.33665392, -0.07313439, -0.013640538, -0.010538632)); + target2 += mul(ne2, float4x4(0.0884388, -0.10034604, 0.047238693, 0.12025125, -0.16648497, -0.20305477, 0.08240087, -0.17453992, 0.19033237, 0.28438845, -0.32885036, 0.14011146, -0.13389368, -0.012868356, -0.15273216, -0.19119217)); + target2 += mul(nf2, float4x4(0.09196779, -0.13800567, 0.08842335, -0.18658079, 0.17512907, 0.021311145, -0.06347847, -0.13827331, -0.10689703, -0.1707886, -0.15724367, -0.167876, 0.22493233, 0.3070637, -0.035266686, -0.0068385694)); + target2 += mul(ng2, float4x4(-0.2739973, 0.07336105, -0.196827, 0.060224827, 0.05752693, -0.014346674, 0.025412507, -0.27530053, 0.27755278, -0.07631679, -0.053861864, 0.113329165, -0.31025892, -0.012681806, 0.06228483, -0.054306302)); + target2 += mul(nh2, float4x4(-0.16827694, 0.16333361, 0.068389125, 0.24560109, 0.11659498, 0.052896734, -0.020310031, -0.17830387, -0.07551057, -0.01822214, -0.037451357, 0.24607496, -0.2033962, -0.11107965, 0.05005381, 0.13685274)); + target2 += mul(ni2, float4x4(0.13665263, -0.24541081, 0.0012457973, -0.012630116, -0.09559698, 0.17756529, -0.039300505, -0.044217475, -0.22984356, -0.2294885, 0.104534455, -0.04131095, 0.084843494, 0.038027752, -0.106351435, 0.18853655)); + target2 += float4(0.010324113, -0.01262194, 0.0762259, -0.014071781); + + tex3[gxy] = target1; + tex4[gxy] = target2; +} + +//!PASS 3 +//!DESC Conv-4x3x3x16 +//!IN tex3, tex4 +//!OUT tex1, tex2, tex5 +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 + +void Pass3(uint2 blockStart, uint3 threadId) { + uint2 gxy = Rmp8x8(threadId.x) + blockStart; + uint2 inputSize = GetInputSize(); + if (gxy.x >= inputSize.x || gxy.y >= inputSize.y) { + return; + } + + float2 inputPt = GetInputPt(); + float2 pos = (gxy + 0.5f) * inputPt; + + // [ a, d, g ] + // [ b, e, h ] + // [ c, f, i ] + float4 a1 = tex3.SampleLevel(sam, pos + float2(-inputPt.x, -inputPt.y), 0); + float4 b1 = tex3.SampleLevel(sam, pos + float2(-inputPt.x, 0), 0); + float4 c1 = tex3.SampleLevel(sam, pos + float2(-inputPt.x, inputPt.y), 0); + float4 d1 = tex3.SampleLevel(sam, pos + float2(0, -inputPt.y), 0); + float4 e1 = tex3.SampleLevel(sam, pos, 0); + float4 f1 = tex3.SampleLevel(sam, pos + float2(0, inputPt.y), 0); + float4 g1 = tex3.SampleLevel(sam, pos + float2(inputPt.x, -inputPt.y), 0); + float4 h1 = tex3.SampleLevel(sam, pos + float2(inputPt.x, 0), 0); + float4 i1 = tex3.SampleLevel(sam, pos + float2(inputPt.x, inputPt.y), 0); + + float4 na1 = max(-a1, 0); + float4 nb1 = max(-b1, 0); + float4 nc1 = max(-c1, 0); + float4 nd1 = max(-d1, 0); + float4 ne1 = max(-e1, 0); + float4 nf1 = max(-f1, 0); + float4 ng1 = max(-g1, 0); + float4 nh1 = max(-h1, 0); + float4 ni1 = max(-i1, 0); + + a1 = max(a1, 0); + b1 = max(b1, 0); + c1 = max(c1, 0); + d1 = max(d1, 0); + e1 = max(e1, 0); + f1 = max(f1, 0); + g1 = max(g1, 0); + h1 = max(h1, 0); + i1 = max(i1, 0); + + float4 a2 = tex4.SampleLevel(sam, pos + float2(-inputPt.x, -inputPt.y), 0); + float4 b2 = tex4.SampleLevel(sam, pos + float2(-inputPt.x, 0), 0); + float4 c2 = tex4.SampleLevel(sam, pos + float2(-inputPt.x, inputPt.y), 0); + float4 d2 = tex4.SampleLevel(sam, pos + float2(0, -inputPt.y), 0); + float4 e2 = tex4.SampleLevel(sam, pos, 0); + float4 f2 = tex4.SampleLevel(sam, pos + float2(0, inputPt.y), 0); + float4 g2 = tex4.SampleLevel(sam, pos + float2(inputPt.x, -inputPt.y), 0); + float4 h2 = tex4.SampleLevel(sam, pos + float2(inputPt.x, 0), 0); + float4 i2 = tex4.SampleLevel(sam, pos + float2(inputPt.x, inputPt.y), 0); + + float4 na2 = max(-a2, 0); + float4 nb2 = max(-b2, 0); + float4 nc2 = max(-c2, 0); + float4 nd2 = max(-d2, 0); + float4 ne2 = max(-e2, 0); + float4 nf2 = max(-f2, 0); + float4 ng2 = max(-g2, 0); + float4 nh2 = max(-h2, 0); + float4 ni2 = max(-i2, 0); + + a2 = max(a2, 0); + b2 = max(b2, 0); + c2 = max(c2, 0); + d2 = max(d2, 0); + e2 = max(e2, 0); + f2 = max(f2, 0); + g2 = max(g2, 0); + h2 = max(h2, 0); + i2 = max(i2, 0); + + float4 target1 = mul(a1, float4x4(-0.06961854, 0.06914646, 0.120440066, -0.04889646, -0.012870159, 0.01994181, 0.052958567, -0.14740478, -0.0027199117, -0.004924673, 0.10131955, -0.11496505, -0.06742836, 0.08287776, 0.11206167, -0.021625644)); + target1 += mul(b1, float4x4(-0.025003597, 0.05389498, 0.14938618, 0.12255602, 0.050963886, 0.16300994, 0.17633909, 0.03229484, 0.2092038, 0.13367431, -0.09538967, 0.1636076, -0.022082182, 0.10898033, 0.0422286, -0.062253885)); + target1 += mul(c1, float4x4(-0.0018258828, 0.08333001, 0.002765037, -0.022241322, 0.1628206, 0.14671557, 0.3001151, 0.030986495, 0.05225914, -0.04880372, 0.15963705, 0.17972782, 0.055128947, 0.114626616, 0.03460699, -0.07679627)); + target1 += mul(d1, float4x4(-0.08866054, 0.0882386, 0.13833097, -0.079257324, -0.03060485, 0.049487974, 0.092268504, -0.17009564, 0.021603461, 0.20750603, 0.18884364, -0.10977116, 0.31758478, 0.053426504, 0.093257, 0.14912026)); + target1 += mul(e1, float4x4(0.13069148, 0.21368778, -0.4405162, -0.009193694, 0.090230525, -0.15897161, -0.005089127, -0.06011075, -0.27336648, -0.021869129, -0.2084852, -0.0850094, -0.10896211, 0.27229342, -0.044210993, -0.03346366)); + target1 += mul(f1, float4x4(0.05807779, 0.08506817, 0.23984064, 0.12547795, 0.036945213, 0.039088245, -0.10716132, -0.15966031, 0.13548918, 0.07746645, -0.248966, -0.15717135, -0.059498273, 0.0088413125, -0.02828682, -0.021795277)); + target1 += mul(g1, float4x4(0.013289853, 0.007272393, 0.06875863, -0.053158432, -0.03578172, 0.20148727, 0.1961931, -0.16910668, 0.03259818, 0.054221123, -0.0326064, 0.06493197, 0.053533003, -0.11878436, 0.14398894, -0.17543368)); + target1 += mul(h1, float4x4(-0.17906332, 0.1111989, 0.047910325, 0.11560207, 0.09790123, -0.2023765, 0.04265116, 0.0075303926, 0.012974969, -0.0853146, -0.04037416, 0.14489946, -0.0716403, -0.055603035, -0.30376709, -0.011667526)); + target1 += mul(i1, float4x4(-0.053314358, -0.012657763, 0.0077033425, 0.12168191, 0.016371705, 0.11979062, -0.08494259, -0.009617431, 0.1303907, 0.043279216, -0.17285421, 0.15823162, -0.030746695, 0.121796146, 0.13097613, 0.0024783302)); + target1 += mul(a2, float4x4(-0.11677548, -0.06592395, -0.022185773, 0.0031006308, -0.00906918, -0.0006412884, -0.00083743286, 0.083697535, -0.060518038, 0.14058606, 0.122444086, 0.17866874, 0.02376487, -0.06369968, -0.026537767, 0.21466877)); + target1 += mul(b2, float4x4(0.12340551, -0.015656117, 0.051990572, 0.04361656, -0.05291406, 0.10119005, 0.17603071, 0.10464767, 0.03288951, 0.091776796, -0.17373918, -0.12871055, 0.10205503, -0.17783496, -0.17020486, -0.09781929)); + target1 += mul(c2, float4x4(-0.01845568, -0.008877597, 0.14279746, 0.031775143, 0.041680444, 0.08784194, 0.044564564, -0.0011678484, -0.010219994, 0.10472676, 0.046920944, -0.110975444, -0.1197329, -0.11303071, -0.14893234, -0.091113724)); + target1 += mul(d2, float4x4(-0.03856561, -0.12173735, 0.040876064, 0.13847597, -0.14995924, -0.13332345, 0.18687452, -0.22562599, 0.08920785, -0.0017916666, 0.019448435, 0.2306492, -0.054546747, -0.1465318, -0.10628867, -0.0073827514)); + target1 += mul(e2, float4x4(0.12689775, 0.11765595, 0.13039489, 0.06940679, 0.2672624, -0.03880143, -0.11693099, -0.05516293, -0.09665274, -0.2583138, 0.22954193, -0.19324702, -0.39629623, -0.35457405, 0.10052407, -0.19756024)); + target1 += mul(f2, float4x4(-0.06307673, -0.096393906, -0.0075868783, -0.25133502, 0.03436604, -0.008201423, 0.06386583, 0.106548436, 0.014626536, 0.03485315, -0.043418273, -0.1141408, 0.005102567, -0.11701804, -0.01645601, -0.057083)); + target1 += mul(g2, float4x4(-0.019062268, 0.020416953, -0.08854219, -0.037497565, 0.09449262, -0.09127615, -0.063330196, 0.08736769, -0.12394077, -0.17950213, -0.11101161, 0.16013645, -0.09370585, 0.0047447495, -0.04288296, 0.00314098)); + target1 += mul(h2, float4x4(-0.08263743, -0.14441489, -0.14886282, -0.05694989, 0.4254853, 0.10864832, 0.26322174, -0.042006254, 0.24269578, -0.053833783, -0.11558995, -0.066605136, -0.064816564, -0.25914803, -0.017624624, 0.0402331)); + target1 += mul(i2, float4x4(-0.100058846, -0.030422715, -0.19600148, -0.13322774, 0.1796998, 0.087852575, 0.07324559, -0.0047889417, 0.007248384, 0.08930289, 0.09643387, -0.0060126656, 0.16357517, -0.06628222, 0.030618697, 0.097391844)); + target1 += mul(na1, float4x4(0.09539377, -0.10802722, -0.014952347, 0.1683223, -0.03919409, 0.041155327, -0.012186347, -0.030456683, -0.015024977, 0.061710294, 0.00049987395, 0.27338788, 0.04845922, -0.014114694, -0.06371904, 0.008664)); + target1 += mul(nb1, float4x4(0.063082814, -0.02755945, -0.15663072, -0.053271208, 0.070173115, 0.038125586, -0.11840675, -0.016337764, -0.07963128, -0.06404943, 0.23033784, -0.007848355, -0.04434174, -0.092422634, -0.013985954, -0.038096108)); + target1 += mul(nc1, float4x4(0.037121523, -0.020622304, 0.086708754, 0.045878958, -0.13188364, -0.022858748, -0.22411314, -0.08116162, 0.048863005, 0.039260563, -0.04934298, 0.11015131, 0.028177079, 0.025245499, 0.1067935, 0.15324049)); + target1 += mul(nd1, float4x4(0.068235874, -0.14401375, -0.032677606, 0.02996807, -0.11290208, 0.114133574, -0.09627152, 0.053930115, 0.14560424, -0.15935057, -0.13495773, 0.29710987, -0.23231608, 0.14334352, 0.070753984, -0.08189047)); + target1 += mul(ne1, float4x4(-0.22378983, -0.09858718, 0.30114698, -0.0048736916, 0.02198528, 0.21444769, -0.11228022, -0.14812283, 0.092372194, 0.1598949, 0.2534843, 0.4932573, -0.16642319, 0.12972073, -0.04147445, -0.09365905)); + target1 += mul(nf1, float4x4(-0.132199, -0.0798279, -0.18289213, -0.15133642, -0.033057958, 0.007495456, 0.070398286, 0.049111973, -0.03361502, 0.032059964, 0.003850814, 0.22922683, 0.20279214, -0.07350396, 0.27681342, 0.11891455)); + target1 += mul(ng1, float4x4(-0.095355205, -0.08533997, -0.043466177, 0.03183743, 0.0048090555, -0.07969942, -0.044769235, 0.15350139, 0.06485437, -0.027922742, 0.0850892, 0.00069019396, 0.035737295, 0.20380683, 0.03413393, 0.025630401)); + target1 += mul(nh1, float4x4(0.26616514, -0.024066277, 0.09220501, 0.09643391, -0.014585791, 0.22894275, -0.053128377, -0.08719867, -0.08819027, 0.01932318, -0.113633566, -0.15435793, 0.10542983, 0.029819246, 0.33675614, -0.059085276)); + target1 += mul(ni1, float4x4(-0.031325538, 0.040770013, -0.049561024, -0.2095101, -0.09537227, -0.075998954, -0.04323478, -0.05470401, -0.110066876, 0.059249427, -0.042351052, -0.047700178, 0.21932366, -0.12850443, 0.035361454, 0.013699006)); + target1 += mul(na2, float4x4(-0.08417607, 0.113477044, 0.03574209, 0.007835156, 0.2021717, 0.030678429, 0.19313626, -0.03506592, 0.04233059, -0.08540689, -0.07128929, -0.13245375, -0.08918939, -0.042622462, 0.19011301, -0.18228586)); + target1 += mul(nb2, float4x4(-0.19981891, -0.16255717, 0.042949058, -0.06921157, 0.279451, -0.11536949, -0.13747527, -0.10020231, -0.013784027, -0.06727259, 0.3556115, 0.08460814, -0.15348805, -0.07692103, -0.018658075, 0.0037634284)); + target1 += mul(nc2, float4x4(-0.09063814, -0.036312047, 0.13528036, 0.0070792423, 0.11834377, 0.02331524, 0.09386154, 0.07144935, 0.033078104, -0.1397121, 0.09283168, 0.2118868, -0.06313442, 0.032146804, 0.0060367053, 0.005822348)); + target1 += mul(nd2, float4x4(0.035949346, 0.06469895, -0.0051385965, -0.078584194, 0.43195483, 0.0045206803, -0.24012396, 0.21436183, -0.013394304, -0.04198491, 0.06645506, -0.23869638, -0.02311661, 0.06589808, 0.16800866, -0.21120183)); + target1 += mul(ne2, float4x4(-0.24937367, -0.042277586, 0.08117994, 0.3105402, -0.26087892, -0.10325264, -0.08689298, 0.0064907144, 0.031937066, 0.09783758, -0.9514562, -0.104631096, 0.27990052, 0.36389935, 0.057687905, 0.14072314)); + target1 += mul(nf2, float4x4(-0.19865227, 0.09398578, 0.06911146, 0.13077813, 0.024283953, -0.0036808057, -0.036725305, -0.024085987, 0.061556816, 0.0029027078, 0.24621862, 0.112107046, 0.068239614, 0.052718107, 0.20803368, 0.065064415)); + target1 += mul(ng2, float4x4(-0.055511028, -0.08662344, -0.074801624, -0.021917107, 0.18730342, 0.047116343, 0.14872652, 0.10580926, 0.16962165, 0.16628978, 0.17343876, -0.1697205, 0.047853447, -0.22705628, 0.031780355, -0.09273609)); + target1 += mul(nh2, float4x4(-0.17306295, -0.067308225, -0.17174196, -0.13221754, -0.24622467, 0.029901514, -0.12799668, -0.04145667, -0.14546, 0.013308366, 0.028113116, 0.1678875, 0.07922657, -0.015584258, 0.17059629, 0.07330948)); + target1 += mul(ni2, float4x4(-0.09916512, 0.0623665, -0.022458963, 0.061962493, 0.18569344, -0.06590287, 0.111395456, 0.08477448, -0.03609452, 0.024279302, -0.083497405, 0.06459743, -0.22963138, -0.12262581, 0.006980887, -0.06653474)); + target1 += float4(-0.023354841, 0.0019475977, -0.0705355, -0.08216019); + + float4 target2 = mul(a1, float4x4(-0.13703531, 0.06135254, -0.05032855, 0.0039429665, -0.05997914, 0.03737832, -0.09703001, -0.08112204, -0.096779875, 0.086732335, 0.03021232, -0.14636067, 0.079296306, 0.006656948, 0.08904937, 0.06196539)); + target2 += mul(b1, float4x4(-0.26374274, 0.16698441, -0.08554561, 0.03734819, -0.08525629, 0.12257442, 0.015473835, 0.13266069, 0.008439022, -0.05002345, 0.03232084, 0.17349075, 0.014541135, -0.10353582, 0.13339484, -0.13474584)); + target2 += mul(c1, float4x4(0.05637785, -0.049726896, 0.06597188, 0.0058668824, -0.10623723, 0.13441847, 0.015975956, -0.07811197, 0.05975957, -0.062021587, -0.06533749, 0.083735935, 0.02666556, 0.029904561, -0.0102926055, -0.10931666)); + target2 += mul(d1, float4x4(-0.22616413, 0.042830274, -0.116208926, -0.053796053, -0.1112898, 0.20703097, -0.34109348, -0.065111674, -0.17255561, 0.16784647, 0.00193431, -0.043237597, -0.02353095, -0.1302526, 0.05119598, 0.01403269)); + target2 += mul(e1, float4x4(0.086109385, -0.053006437, -0.24992542, 0.007938272, -0.0027849772, 0.09198081, -0.17596659, 0.030577915, -0.31807357, -0.29618275, 0.0056317504, 0.3662508, 0.16753437, -0.12481447, -0.057597708, -0.14973637)); + target2 += mul(f1, float4x4(-0.14585754, 0.027715279, -0.039035518, 0.11505972, 0.0038059987, -0.20368981, -0.014822689, 0.094012834, -0.20693347, -0.37216228, -0.12690443, 0.2727411, -0.15475404, -0.01948714, -0.12414737, 0.10378582)); + target2 += mul(g1, float4x4(-0.11750072, 0.051394574, -0.011073509, -0.1100907, -0.1389209, -0.10706716, 0.0017484069, -0.059556484, -0.20038931, 0.24976069, -0.011129469, -0.080446415, 0.19259459, -0.14515446, -0.07275811, 0.039244935)); + target2 += mul(h1, float4x4(-0.101780266, 0.003889027, 0.010705813, 0.011088775, -0.20406786, -0.009807119, 0.23070864, -0.030722639, -0.012015954, 0.025211284, -0.29246482, 0.04907962, -0.10485314, 0.21213223, 0.15788344, -0.014188987)); + target2 += mul(i1, float4x4(0.1546438, -0.15895118, 0.010730076, 0.034053337, -0.018741185, -0.008467293, 0.13143812, 0.022905342, -0.27543658, 0.3054419, 0.07025048, 0.29454592, -0.0032350307, 0.01671764, 0.081928045, -0.10051137)); + target2 += mul(a2, float4x4(-0.014834404, 0.07487839, -0.16554666, -0.04127725, 0.15239598, -0.017607473, 0.09927426, 0.15027349, -0.2073968, 0.041613225, -0.10290223, -0.12565911, 0.022021815, -0.07609557, -0.16338238, 0.04468512)); + target2 += mul(b2, float4x4(0.01768976, 0.0637369, 0.006542782, -0.0022799321, -0.14728844, -0.058199093, -0.029928437, 0.079634584, 0.095769696, -0.13526416, 0.20718366, -0.10116214, 0.1688786, -0.08906526, 0.020397741, 0.06541649)); + target2 += mul(c2, float4x4(-0.033067044, 0.10095467, -0.13792777, 0.022673525, -0.012797848, -0.11222105, 0.11443862, 0.04893716, 0.11389547, -0.07337629, 0.21447009, -0.032212257, 0.23070163, -0.18156143, 0.14542435, -0.10207653)); + target2 += mul(d2, float4x4(-0.22985588, 0.012290226, 0.018557416, -0.064000085, 0.012936774, -0.104329854, -0.0719669, 0.24160251, 0.03716294, -0.093069404, -0.12110873, 0.013251573, -0.12731232, -0.1995954, -0.07679729, 0.06823493)); + target2 += mul(e2, float4x4(-0.23359679, -0.052702624, -0.08710696, 0.19826421, 0.12880315, 0.19875911, -0.20581602, 0.32980308, -0.14479029, 0.099422045, 0.44737315, 0.13044962, 0.12935589, -0.13621494, 0.14902137, 0.09162335)); + target2 += mul(f2, float4x4(0.10801082, -0.22644557, 0.035719793, -0.12396268, 0.2906566, 0.119107775, -0.15470679, 0.17997102, -0.12866725, -0.12695445, -0.06832712, 0.017622665, 0.08215481, 0.065239124, -0.1256659, -0.06811625)); + target2 += mul(g2, float4x4(-0.097956754, 0.09383762, -0.19813508, 0.0035260199, -0.14278924, 0.0660843, 0.19110036, 0.11025648, 0.15489757, 0.011157471, -0.16014035, -0.050144047, 0.0032884583, 0.061513808, -0.03385016, -0.08534137)); + target2 += mul(h2, float4x4(0.09499595, 0.04162155, -0.26091605, -0.18066265, -0.21523187, -0.036668014, 0.09586408, 0.059850723, -0.10890033, 0.28857672, -0.32993382, 0.05107536, 0.012024929, -0.27968574, 0.15081042, -0.07215633)); + target2 += mul(i2, float4x4(0.15673614, -0.064684846, -0.13838115, 0.1264376, -0.23772664, 0.11594999, 0.0898036, -0.092647165, 0.26081505, 0.05110054, -0.017965768, 0.06740709, -0.24977967, 0.05645255, -0.08204664, 0.0435078)); + target2 += mul(na1, float4x4(0.02560865, -0.1613835, 0.05876215, 0.101586774, -0.00058163394, 0.0013674656, 0.039857507, -0.002919488, 0.05573127, -0.04311352, 0.05305971, 0.10097247, 0.036392104, -0.025071293, 0.029137935, -0.08593101)); + target2 += mul(nb1, float4x4(0.12406646, -0.21399136, 0.05611706, 0.021867402, -0.037916705, 0.05941278, 0.11277805, -0.12387807, 0.008577062, -0.045022104, 0.16465645, -0.07607619, 0.035939474, 0.07221297, -0.13557361, 0.07806311)); + target2 += mul(nc1, float4x4(-0.19589397, 0.011909766, -0.01258029, -0.065313555, 0.07366803, -0.0812486, 0.115863465, 0.019752543, -0.15854625, 0.11246406, 0.007201303, 0.0008530298, -0.0287012, -0.036224626, 0.059641607, 0.09416462)); + target2 += mul(nd1, float4x4(0.20361906, -0.20671111, -0.1126041, 0.049152024, 0.17586707, 0.10047246, 0.13149028, -0.16302691, -0.08559989, -0.17756243, -0.0061752857, 0.124775924, 0.020011704, 0.17147969, -0.0003063916, -0.015890911)); + target2 += mul(ne1, float4x4(0.11051906, 0.13774526, 0.29333818, -0.029932505, -0.07021508, 0.046212852, 0.11793092, -0.081830084, -0.18609521, -0.108229816, -0.044969153, -0.041069634, -0.13936938, 0.11356429, 0.19260931, 0.093210496)); + target2 += mul(nf1, float4x4(0.010555152, -0.15726428, -0.13187453, -0.12396212, 0.17309372, 0.100884624, 0.11547714, -0.030650318, -0.21877939, -0.0015167049, -0.090150684, 0.029793834, 0.1465573, -0.038805004, -0.033211514, -0.04926991)); + target2 += mul(ng1, float4x4(0.10250675, -0.030922988, -0.008545946, 0.024706079, 0.105154864, -0.06838902, -0.12627976, 0.032457255, 0.21747419, -0.12865087, -0.056018118, 0.07152061, -0.11214344, -0.029831404, 0.044855718, -0.04316971)); + target2 += mul(nh1, float4x4(0.12806997, 0.12385188, -0.06831653, -0.015933594, 0.08645126, 0.013043054, -0.19599608, -0.060719345, -0.23076192, 0.19181651, 0.1292978, 0.036317572, -0.061692618, -0.25434494, -0.10012762, 0.06366783)); + target2 += mul(ni1, float4x4(-0.11098094, 0.034632366, -0.053560194, 0.08499573, 0.20842391, -0.020262053, -0.023394845, 0.048971336, 0.10436084, 0.12614205, 0.035942093, -0.07592917, -0.07455495, -0.012119416, -0.011834865, 0.21032205)); + target2 += mul(na2, float4x4(-0.00055114913, -0.06662242, -0.009248925, -0.0024843027, -0.22993802, -0.04828541, -0.08667693, -0.093717255, 0.14400347, 0.030130679, -0.01590651, 0.10399553, 0.14478837, -0.11228224, -0.039653912, -0.042144097)); + target2 += mul(nb2, float4x4(-0.011044514, -0.09870122, -0.24879128, 0.111903004, 0.092567004, 0.06100228, 0.0053522107, 0.065252475, -0.18228072, 0.25602147, -0.2863954, 0.103064165, 0.052214783, -0.017557586, -0.07434391, 0.021111684)); + target2 += mul(nc2, float4x4(0.04537496, -0.024985183, -0.15247425, -0.0009907635, -0.09677889, 0.09858206, -0.030702371, 0.03539458, -0.029408665, 0.24335481, -0.1918429, 0.08056781, 0.1548214, 0.2850923, -0.15131058, -0.052048493)); + target2 += mul(nd2, float4x4(0.055409238, -0.13090813, -0.016612396, -0.019183576, -0.18499215, -0.013184845, 0.038750056, 0.10953814, -0.18437819, 0.19183092, -0.09780726, -0.046532292, -0.10841146, -0.17717329, -0.1731886, -0.06741823)); + target2 += mul(ne2, float4x4(0.27919188, -0.14904179, 0.22850563, -0.17785722, -0.32835802, -0.19134615, 0.32093298, 0.24667856, 0.51687604, -0.59745705, 0.23057328, -0.41411245, -0.4234339, -0.03083826, -0.13972719, 0.1729651)); + target2 += mul(nf2, float4x4(0.042352367, -0.109207705, -0.31047532, 0.08896513, -0.2187999, -0.117951825, 0.060705405, -0.10287316, 0.013815159, -0.023699438, -0.053614594, 0.09065406, -0.15286967, -0.101803675, 0.019537682, 0.12476822)); + target2 += mul(ng2, float4x4(0.0016159728, 0.04094818, 0.012745902, -0.051958837, 0.014557628, 0.00061195926, -0.11669799, 0.08763203, -0.27820277, 0.17871988, 0.10634548, 0.05234229, 0.03827577, -0.3117398, 0.027675012, 0.0655132)); + target2 += mul(nh2, float4x4(-0.0025006514, -0.1457415, 0.053443488, -0.0050932285, 0.01582735, 0.18783967, -0.066718, -0.15485887, -0.039741408, -0.21280284, 0.1502977, 0.09507925, 0.17178543, -0.014238171, -0.35757875, 0.026410697)); + target2 += mul(ni2, float4x4(-0.19434428, -0.079038315, -0.017264817, -0.04004242, 0.0063378955, 0.027904915, 0.02571677, 0.09895997, -0.036605608, -0.19889063, 0.015920812, -0.014095519, 0.4363826, -0.14143194, 0.015463533, -0.1656284)); + target2 += float4(0.08523788, 0.052322272, 0.08955637, -0.06945023); + + float3 target3 = mul(e1, float4x3(0.121882804, 0.055417646, 0.037575886, 0.040015355, 0.10440659, 0.120197006, 0.008896276, 0.07269119, 0.09253319, 0.009000448, -0.033739295, -0.059260685)); + target3 += mul(e2, float4x3(-0.048027042, 0.09210703, 0.123745404, -0.007914943, 0.05483587, 0.054822505, -0.005998682, 0.005822986, 0.009868176, -0.05866792, -0.04236153, -0.022935968)); + target3 += mul(ne1, float4x3(-0.091270015, -0.033997003, -0.012321896, -0.037983265, -0.078790314, -0.085029654, 0.10656225, 0.0008334142, -0.0041227583, 0.077364065, 0.033960085, 0.029391684)); + target3 += mul(ne2, float4x3(0.15057671, -0.037442014, -0.037083894, 0.015493511, -0.016119987, -0.027061606, -0.012329675, 0.0060544596, -0.019787522, 0.12182345, 0.11346318, 0.08640806)); + + tex1[gxy] = target1; + tex2[gxy] = target2; + tex5[gxy] = float4(target3, 1); +} + +//!PASS 4 +//!DESC Conv-4x3x3x16 +//!IN tex1, tex2, tex5 +//!OUT tex3, tex4, tex6 +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 + +void Pass4(uint2 blockStart, uint3 threadId) { + uint2 gxy = Rmp8x8(threadId.x) + blockStart; + uint2 inputSize = GetInputSize(); + if (gxy.x >= inputSize.x || gxy.y >= inputSize.y) { + return; + } + + float2 inputPt = GetInputPt(); + float2 pos = (gxy + 0.5f) * inputPt; + + // [ a, d, g ] + // [ b, e, h ] + // [ c, f, i ] + float4 a1 = tex1.SampleLevel(sam, pos + float2(-inputPt.x, -inputPt.y), 0); + float4 b1 = tex1.SampleLevel(sam, pos + float2(-inputPt.x, 0), 0); + float4 c1 = tex1.SampleLevel(sam, pos + float2(-inputPt.x, inputPt.y), 0); + float4 d1 = tex1.SampleLevel(sam, pos + float2(0, -inputPt.y), 0); + float4 e1 = tex1.SampleLevel(sam, pos, 0); + float4 f1 = tex1.SampleLevel(sam, pos + float2(0, inputPt.y), 0); + float4 g1 = tex1.SampleLevel(sam, pos + float2(inputPt.x, -inputPt.y), 0); + float4 h1 = tex1.SampleLevel(sam, pos + float2(inputPt.x, 0), 0); + float4 i1 = tex1.SampleLevel(sam, pos + float2(inputPt.x, inputPt.y), 0); + + float4 na1 = max(-a1, 0); + float4 nb1 = max(-b1, 0); + float4 nc1 = max(-c1, 0); + float4 nd1 = max(-d1, 0); + float4 ne1 = max(-e1, 0); + float4 nf1 = max(-f1, 0); + float4 ng1 = max(-g1, 0); + float4 nh1 = max(-h1, 0); + float4 ni1 = max(-i1, 0); + + a1 = max(a1, 0); + b1 = max(b1, 0); + c1 = max(c1, 0); + d1 = max(d1, 0); + e1 = max(e1, 0); + f1 = max(f1, 0); + g1 = max(g1, 0); + h1 = max(h1, 0); + i1 = max(i1, 0); + + float4 a2 = tex2.SampleLevel(sam, pos + float2(-inputPt.x, -inputPt.y), 0); + float4 b2 = tex2.SampleLevel(sam, pos + float2(-inputPt.x, 0), 0); + float4 c2 = tex2.SampleLevel(sam, pos + float2(-inputPt.x, inputPt.y), 0); + float4 d2 = tex2.SampleLevel(sam, pos + float2(0, -inputPt.y), 0); + float4 e2 = tex2.SampleLevel(sam, pos, 0); + float4 f2 = tex2.SampleLevel(sam, pos + float2(0, inputPt.y), 0); + float4 g2 = tex2.SampleLevel(sam, pos + float2(inputPt.x, -inputPt.y), 0); + float4 h2 = tex2.SampleLevel(sam, pos + float2(inputPt.x, 0), 0); + float4 i2 = tex2.SampleLevel(sam, pos + float2(inputPt.x, inputPt.y), 0); + + float4 na2 = max(-a2, 0); + float4 nb2 = max(-b2, 0); + float4 nc2 = max(-c2, 0); + float4 nd2 = max(-d2, 0); + float4 ne2 = max(-e2, 0); + float4 nf2 = max(-f2, 0); + float4 ng2 = max(-g2, 0); + float4 nh2 = max(-h2, 0); + float4 ni2 = max(-i2, 0); + + a2 = max(a2, 0); + b2 = max(b2, 0); + c2 = max(c2, 0); + d2 = max(d2, 0); + e2 = max(e2, 0); + f2 = max(f2, 0); + g2 = max(g2, 0); + h2 = max(h2, 0); + i2 = max(i2, 0); + + float4 target1 = mul(a1, float4x4(0.048841953, -0.010713874, 0.09238948, -0.0789676, -0.093295254, 0.063662216, -0.023454266, -0.06739832, 0.027439933, 0.007399632, -0.03550259, -0.013834889, 0.17168441, 0.06177229, 0.023950668, 0.14574073)); + target1 += mul(b1, float4x4(0.117296845, -0.07858486, -0.02099164, -0.024150673, -0.11662526, -0.26440877, -0.05449493, -0.13366842, -0.06870016, 0.12457937, 0.25052628, 0.013982828, 0.15127566, -0.031653196, -0.13851896, 0.04148151)); + target1 += mul(c1, float4x4(0.024360385, -0.31051615, 0.012448293, -0.11265428, 0.06123606, -0.0701936, 0.033618104, -0.064061284, -0.06969811, -0.108838804, 0.014163671, 0.02596177, 0.20071186, -0.0028744373, 0.13663651, -0.05592813)); + target1 += mul(d1, float4x4(0.13492568, -0.0726796, 0.13431883, -0.085713945, 0.056370113, 0.115660414, -0.14475793, 0.0044200714, 0.027387753, 0.045452334, 0.28178552, 0.017371183, 0.17304336, 0.0582999, 0.14465337, 0.046005037)); + target1 += mul(e1, float4x4(0.064034574, 0.041531377, 0.08218889, -0.44529077, -0.010563538, -0.14926371, 0.051012456, 0.08209141, 0.24089444, -0.225398, -0.22259372, -0.26353076, -0.1687418, -0.11501685, -0.016655196, -0.09882357)); + target1 += mul(f1, float4x4(-0.019985389, -0.19189276, -0.104917, -0.11139956, -0.08406414, 0.031484302, -0.082132496, 0.025829919, 0.07512055, 0.31116992, 0.061163265, -0.074850895, -0.091695994, -0.26492774, -0.06617365, 0.06590624)); + target1 += mul(g1, float4x4(0.1326703, 0.13008863, -0.1659525, -0.058325157, -0.047528613, 0.06777741, 0.06953616, 0.010587038, 0.031675722, -0.08119788, -0.11269768, -0.06225964, -0.26593694, 0.03627298, 0.12866129, 0.17876588)); + target1 += mul(h1, float4x4(-0.29016155, -0.12549841, -0.050858997, -0.088932805, 0.002237332, 0.01287246, 0.30138868, -0.071756564, -0.061206467, -0.11114371, -0.25731218, -0.11551616, -0.069513, -0.004583348, -0.10647163, 0.01981785)); + target1 += mul(i1, float4x4(0.16387528, 0.03450354, 0.03422023, -0.014030813, 0.13418834, -0.010909722, -0.00447121, -0.03082622, -0.23983373, -0.020655053, -0.054034587, -0.07133469, 0.21171515, 0.06268651, -0.1738516, -0.15001713)); + target1 += mul(a2, float4x4(0.040721033, -0.037582736, -0.13819644, -0.123978324, 0.1650318, 0.033942625, 0.17534302, 0.06452234, 0.18384823, 0.0048657497, 0.20220642, -0.0025760103, 0.011163899, 0.027265374, -0.051284578, 0.19202651)); + target1 += mul(b2, float4x4(-0.057493486, -0.031516504, 0.10835143, -0.040618125, -0.07762303, -0.06787725, 0.025559613, -0.0055560498, -0.0017830619, 0.020185964, -0.06656476, -0.008523214, 0.32331157, -0.21633361, 0.15338033, -0.104042485)); + target1 += mul(c2, float4x4(-0.18544987, -0.090446, -0.26797467, -0.082941435, -0.15003708, -0.11446041, -0.0394892, 1.1379096e-05, 0.04978554, 0.3350256, 0.032780237, 0.034625802, 0.0596261, 0.045886245, 0.009002243, 0.04746998)); + target1 += mul(d2, float4x4(-0.17104147, 0.0054165213, 0.09161088, -0.0673989, -0.119282715, -0.09094731, 0.47243354, 0.09914267, -0.13958418, -0.0050379517, 0.14352496, 0.18380567, -0.16128838, 0.08766813, 0.013876981, -0.09808636)); + target1 += mul(e2, float4x4(0.09617889, 0.045525175, -0.2550057, -0.02874332, 0.2743444, -0.20102581, 0.008461914, 0.16626629, -0.13309516, -0.19307104, 0.15780488, 0.15518525, -0.2790243, 0.056782067, 0.16836968, 0.17771688)); + target1 += mul(f2, float4x4(-0.10694667, 0.14490083, -0.037976455, 0.013456577, -0.1166783, 0.060722847, 0.07323464, -0.013812333, 0.03234213, 0.50859296, -0.20670377, -0.019631205, -0.022543924, 0.21776745, -0.093769215, 0.12193299)); + target1 += mul(g2, float4x4(-0.15260598, -0.04798592, -0.02370747, -0.005714705, 0.030857049, -0.16643822, 0.23971851, 0.08117996, -0.069645695, -0.06674784, 0.033509918, 0.06333286, 0.14010383, 0.02218942, -0.036704093, 0.043163314)); + target1 += mul(h2, float4x4(0.14653306, 0.002759894, 0.10548246, 0.24976018, 0.3212893, -0.07108953, 0.14068738, 0.29437128, -0.020556152, -0.17813908, 0.1989112, 0.12182122, -0.19231579, 0.06547012, -0.032785345, 0.089717634)); + target1 += mul(i2, float4x4(-0.23632105, -0.027022298, 0.00586518, 0.01836479, -0.2854795, -0.035417695, -0.07586866, 0.0715673, 0.17984483, 0.11210451, 0.032767817, 0.097993985, -0.010899036, 0.15933803, 0.05454052, 0.06768528)); + target1 += mul(na1, float4x4(-0.017289463, -0.058823984, 0.0807603, 0.32464716, 0.2756627, 0.036061637, -0.034578573, -0.08811335, 0.031841308, 0.11359879, 0.07553143, -0.028648997, 0.057192322, 0.07769366, -0.1998847, -0.06258051)); + target1 += mul(nb1, float4x4(0.0422091, 0.046305113, 0.028377453, -0.031071126, 0.06866086, 0.1538135, -0.009288249, -0.25543538, 0.07067607, -0.114061736, -0.024740022, -0.11824987, -0.17426041, 0.0028396242, 0.12849464, 0.057790644)); + target1 += mul(nc1, float4x4(0.057328146, 0.030677445, 0.07496485, 0.07847613, -0.22358766, -0.15659446, -0.18270054, -0.21316889, 0.084770195, 0.013863274, -0.001335942, -0.04027535, -0.15230416, -0.048156176, -0.04614562, 0.089494966)); + target1 += mul(nd1, float4x4(-0.117369525, 0.026577681, -0.1941765, 0.14904885, -0.16210394, -0.19549404, 0.19999947, 0.37138188, 0.14809363, -0.05078633, -0.092692114, -0.08533522, 0.12769112, 0.017061725, 0.104464866, -0.026744602)); + target1 += mul(ne1, float4x4(0.0880251, -0.005333869, -0.10327546, 0.30419552, 0.107773595, 0.02335926, -0.19014318, 0.19670166, -0.09443473, 0.10621109, 0.36843884, 0.13197622, 0.24537645, 0.4032842, 0.21791221, 0.08400414)); + target1 += mul(nf1, float4x4(0.06408587, 0.15366535, 0.042582024, 0.15629277, 0.028716238, -0.013479061, -0.23052843, -0.2992272, -0.050045617, -0.27255702, -0.038093377, 0.0031149297, -0.05625518, 0.52598304, -0.0845234, -0.09116851)); + target1 += mul(ng1, float4x4(0.02294159, -0.011902539, 0.00079296535, 0.030631313, 0.02114366, 0.082455896, 0.09450867, -0.08027284, 0.042443607, 0.15427661, 0.11882799, -0.040319934, 0.23706424, -0.107808165, -0.1730313, -0.06340064)); + target1 += mul(nh1, float4x4(0.2645207, 0.002157867, -0.095794424, 0.1141035, 0.08255855, -0.06977906, -0.04348005, 0.27864936, -0.1197219, 0.015997604, 0.09500464, -0.0010631803, 0.07198933, -0.053128377, 0.02176274, -0.001298847)); + target1 += mul(ni1, float4x4(-0.045475803, 0.03626341, -0.00891833, 0.17907676, -0.2810277, 0.13725498, -0.02413441, -0.08605496, 0.08306595, -0.012227401, -0.0070282067, -0.019027572, -0.13443586, -0.041331865, 0.029120144, -0.00490357)); + target1 += mul(na2, float4x4(-0.13398282, 0.06475972, 0.2528711, 0.02553969, -0.13428321, -0.03931247, 0.11360386, -0.18912545, -0.3725821, -0.018747944, -0.20893294, -0.012743096, 0.07444533, -0.15381604, 0.29776138, 0.10601149)); + target1 += mul(nb2, float4x4(-0.21793252, 0.07817356, -0.109576665, 0.19185133, -0.072846025, 0.04960289, -0.07506936, 0.12839878, -0.0061091883, 0.093669325, 0.009295678, 0.03780657, -0.10901407, 0.1375137, -0.0745914, 0.1468883)); + target1 += mul(nc2, float4x4(0.10739044, 0.30611086, 0.1585515, 0.07903283, 0.05612715, -0.0061900485, 0.13646163, 0.15230569, 0.036846787, -0.15846778, -0.18765065, 0.06611226, -0.07209187, 0.056037188, 0.04302953, -0.03887873)); + target1 += mul(nd2, float4x4(0.05618538, -0.072312586, -0.018046018, 0.049542785, -0.033638306, -0.035169322, -0.25882784, -0.036425237, 0.43763217, -0.07049093, 0.08085481, 0.013634128, -0.2701461, -0.13007875, 0.09603447, 0.2479431)); + target1 += mul(ne2, float4x4(-0.02283992, -0.24593964, 0.04616348, 0.023422526, -0.20994014, 0.064769074, -0.07680045, -0.30547765, 0.1518723, 0.31953967, -0.12841515, -0.19525428, -0.0076093865, -0.112106465, -0.04573789, -0.04834478)); + target1 += mul(nf2, float4x4(-0.008045419, -0.20285496, 0.15290824, 0.036240693, 0.11959966, -0.15712506, 0.096806675, 0.008780234, -0.19716795, -0.3824029, 0.1376541, 0.13325086, -0.103316806, -0.31788048, -0.071698256, -0.25901568)); + target1 += mul(ng2, float4x4(0.13714787, 0.020738773, 0.13716534, 0.12359137, -0.038154524, 0.053202964, -0.12023912, 0.09011213, -0.012448548, -0.026505312, -0.11293235, 0.10613704, -0.39916727, 0.041521315, 0.10659441, 0.027749784)); + target1 += mul(nh2, float4x4(-0.26475835, 0.044597875, -0.31229413, -0.17121075, -0.21795374, -0.009583571, -0.13428004, -0.30734754, -0.017038794, 0.113667324, -0.1516075, 0.06525228, -0.13789397, -0.05770066, -0.016166758, -0.29457557)); + target1 += mul(ni2, float4x4(0.054183286, 0.022085225, 0.086794585, 0.10968018, 0.1276148, 0.05739452, 0.08860957, -0.08131373, -0.081570424, -0.107991874, -0.03724999, 0.000843539, 0.20231429, -0.123543546, -0.19073018, -0.28328305)); + target1 += float4(0.013646388, -0.021442367, 0.0045393505, -0.037433166); + + float4 target2 = mul(a1, float4x4(-0.13948695, 0.016643738, 0.08168136, 0.02315663, 0.017184775, 0.11487715, 0.05770107, 0.010102888, 0.04955321, -0.045132335, -0.05731744, -0.05798246, 0.2245112, 0.17406365, 0.08979801, -0.10607952)); + target2 += mul(b1, float4x4(0.2812785, 0.022830509, 0.15164222, 0.13460225, 0.22263442, 0.2558749, -0.122489706, 0.10409658, 0.023308244, -0.19583783, -0.007824269, 0.06256542, 0.11161938, 0.14878923, 0.30865005, 0.08962341)); + target2 += mul(c1, float4x4(-0.20843887, 0.012371968, -0.008279775, -0.042467568, -0.13022369, 0.056743186, -0.018389069, 0.13964763, -0.03361555, -0.053087234, 0.012521351, 0.0209293, 0.015771557, 0.11718523, 0.010176676, 0.021708367)); + target2 += mul(d1, float4x4(-0.14373007, -0.114338934, -0.09077395, -0.11040866, 0.055298284, 0.022516333, 0.18901019, -0.05640152, -0.1413198, -0.08748339, -0.029985962, 0.00712751, -0.071436934, -0.18909407, 0.173448, 0.053675048)); + target2 += mul(e1, float4x4(-0.023129769, 0.42883545, -0.18110612, 0.24296297, -0.02441117, 0.18108079, -0.12298153, -0.19192219, -0.14139178, -0.069563635, 0.1524624, -0.17755614, -0.248875, 0.015161957, -0.16541803, -0.17773613)); + target2 += mul(f1, float4x4(-0.065477535, -0.113195814, -0.08284894, 0.11679537, 0.028445985, -0.026559185, -0.007267581, 0.14052133, 0.14847197, -0.040276285, -0.038166475, -0.030452784, -0.15184602, -0.22223297, 0.113732725, 0.11163395)); + target2 += mul(g1, float4x4(0.04990171, 0.08493333, 0.08668171, 0.14610586, -0.010766879, -0.05690133, 0.10706113, 0.13667485, 0.044783257, 0.029695645, -0.101674624, -0.02023205, 0.031889528, 0.14293797, 0.08712652, 0.08716896)); + target2 += mul(h1, float4x4(-0.21387868, -0.21650635, 0.2743992, -0.048781313, -0.027735803, -0.1543507, 0.11343657, -0.18251626, 0.15225998, 0.13158897, -0.41056108, 0.102582805, -0.09181491, -0.0042975787, 0.056065407, -0.16961528)); + target2 += mul(i1, float4x4(0.08966051, 0.09331515, -0.085415326, -0.022695992, 0.009771476, -0.07143986, 0.0590329, 0.07347928, -0.09033658, -0.06805735, -0.20129825, 0.017873045, 0.16908158, 0.014213783, 0.112663984, 0.10048714)); + target2 += mul(a2, float4x4(0.115590535, 0.08364541, 0.00864431, -0.094349444, -0.11073411, 0.05337711, 0.055587426, 0.12131219, -0.04710173, -0.046455074, 0.110379905, 0.25445566, 0.15154606, 0.04483541, 0.08708686, 0.113456205)); + target2 += mul(b2, float4x4(-0.014296297, 0.24858733, 0.05035193, -0.09225393, 0.034625243, 0.06219943, 0.19825043, 0.04673499, -0.4083363, -0.39954248, -0.08299408, 0.048756655, 0.09862206, 0.01588621, 0.0070629907, 0.04173666)); + target2 += mul(c2, float4x4(0.17356622, 0.1484559, -0.10054033, 0.013332302, 0.15200937, 0.08985606, -0.031668343, -0.026007611, -0.16339104, 0.054744486, 0.07386605, -0.033910174, -0.0018002358, -0.02968911, 0.054931052, 0.09970459)); + target2 += mul(d2, float4x4(-0.07330346, 0.05938635, 0.01911963, -0.09856661, -0.081916444, -0.046957035, -0.043849826, 0.09572135, -0.13621825, 0.034347896, -0.21189907, 0.10592239, -0.060592845, 0.09957844, 0.050621815, -0.07447668)); + target2 += mul(e2, float4x4(0.044731334, -0.13406886, -0.04138754, -0.06764551, -0.018899845, 0.35320804, -0.10959127, 0.17435175, -0.17941645, -0.30889434, 0.10573405, 0.0319751, -0.15677677, 0.08164649, 0.16559398, -0.08152387)); + target2 += mul(f2, float4x4(0.057760764, -0.12145107, 0.06889264, -0.30627275, 0.011501002, -0.080296256, -0.18067095, 0.10592384, 0.12884894, -0.18973115, 0.18740658, 0.28362688, 0.12934786, -0.010292026, 0.0559999, 0.079962276)); + target2 += mul(g2, float4x4(0.048659086, -0.006250348, -0.041242067, -0.12078197, -0.07152629, 0.05699244, 0.0011704164, -0.023007339, 0.07814492, 0.02546712, -0.08957218, -0.036925297, -0.03383498, 0.12583385, 0.12207602, 0.03910942)); + target2 += mul(h2, float4x4(0.26151723, 0.23277281, -0.021892069, 0.052827276, 0.18268764, 0.28595275, -0.20529993, 0.19892794, 0.0038986763, 0.114547804, -0.020574905, 0.02405073, 0.11713121, 0.04491106, -0.07557327, 0.014374293)); + target2 += mul(i2, float4x4(-0.14276731, -0.06600894, -0.029757235, -0.099975966, 0.023050314, -0.07662015, -0.11542214, 0.087981045, 0.070319094, 0.12462511, 0.008152087, 0.12613884, -0.07071591, 0.0063393894, 0.08699723, -0.0242523)); + target2 += mul(na1, float4x4(0.035586607, -0.26826563, -0.10145326, -0.002177148, 0.022144236, -0.117452875, 0.021346297, 0.051908135, -0.022425706, 0.067299, 0.09406446, 0.078294896, 0.014900606, -0.05468236, 0.07241715, 0.061000507)); + target2 += mul(nb1, float4x4(-0.184133, 0.06229474, -0.13819578, -0.025011744, -0.01868356, -0.18940887, 0.092631504, -0.092806384, 0.0035951615, 0.11777577, 0.028149817, 0.0049419673, 0.22230826, 0.06337655, -0.20004818, -0.20937593)); + target2 += mul(nc1, float4x4(0.13852163, -0.094492316, -0.040309057, 0.10771662, 0.18963522, 0.08687606, -0.20030232, -0.082126215, 0.012181411, 0.044306785, -0.036970526, 0.04403363, 0.07911973, 0.0021176056, 0.26944208, -0.06657045)); + target2 += mul(nd1, float4x4(0.027229607, 0.12410596, 0.04348171, 0.0019921176, 0.088246435, -0.02828269, -0.26499373, -0.12566662, 0.025947344, -0.0078000715, 0.058063716, -0.0032702687, 0.0059978673, -0.04860002, 0.027650384, -0.23394564)); + target2 += mul(ne1, float4x4(0.07892762, -0.13300626, 0.46678603, -0.033239357, -0.12306804, -0.079602, 0.20534003, 0.23873802, -0.035643574, 0.059950788, -0.26559883, 0.12206408, 0.25408483, 0.029933078, 0.32081822, 0.033947676)); + target2 += mul(nf1, float4x4(-0.06847802, -0.017930118, -0.12299636, -0.12987946, 0.09267518, -0.0009083275, -0.035390552, -0.15379669, -0.1132433, -0.036670692, -0.08342377, 0.015636675, 0.022590527, 0.10533322, 0.0389949, -0.059033744)); + target2 += mul(ng1, float4x4(-0.041753534, -0.014428097, 0.06999257, -3.546234e-05, -0.033465035, -0.040709455, 0.13118082, -0.21016484, -0.07846085, -0.030885663, 0.06934681, 0.12725256, -0.023784902, -0.13373604, -0.015261479, 0.05234782)); + target2 += mul(nh1, float4x4(0.13798563, 0.12757827, -0.26978776, 0.102494285, 0.13285922, 0.35432795, -0.11997128, 0.17108068, -0.12235328, -0.24582328, 0.26962712, -0.086760186, 0.010127441, 0.08048835, 0.047505867, 0.19991067)); + target2 += mul(ni1, float4x4(0.03584222, -0.13433793, -0.044629525, -0.0010440781, -0.0033084434, -0.026725832, -0.05386642, -0.13612603, 0.10066015, 0.10499841, 0.031767137, -0.04550841, -0.09391546, 0.1454157, -0.26962402, 0.21015608)); + target2 += mul(na2, float4x4(-0.21956864, -0.13502425, -0.02126954, 0.059263993, -0.13461533, -0.04001395, -0.0924258, -0.069165014, 0.22019973, 0.003270619, 0.022072528, -0.14173602, 0.0028843523, -0.13784003, -0.061057515, -0.0049253837)); + target2 += mul(nb2, float4x4(-0.0011410525, -0.16098002, -0.12883134, 0.018262507, 0.001481578, 0.19514659, -0.13703239, 0.096059754, 0.34194204, 0.13983466, 0.14021507, 0.011405113, -0.11303146, -0.17050214, -0.06992079, -0.05566986)); + target2 += mul(nc2, float4x4(-0.12307941, -0.02192472, 0.13193923, -0.061640862, -0.16841564, -0.0822524, 0.10141759, 0.02139286, 0.1599039, -0.050632223, 0.16702358, 0.111514546, 0.02397393, 0.037606515, 0.017971672, -0.048641708)); + target2 += mul(nd2, float4x4(-0.02697617, -0.08579184, -0.28045088, 0.05262136, -0.059576314, 0.107535526, -0.06188862, 0.0010509328, -0.18178311, -0.17288832, 0.20703638, 0.083048366, 0.03859681, -0.07548898, 0.011605782, -0.021842534)); + target2 += mul(ne2, float4x4(0.13198483, 0.37200937, -0.0896539, 0.12450637, 0.037202634, 0.035985112, 0.16579124, -0.08967905, -0.24341385, 0.32482424, -0.3037812, -0.007154969, -0.007152382, -0.017435173, 0.12662841, -0.090513505)); + target2 += mul(nf2, float4x4(-0.014726027, 0.08394915, -0.02100581, 0.24882795, -0.023793869, -0.006450114, 0.17093314, -0.06994153, -0.08689907, 0.113542505, -0.053211495, -0.1780173, 0.030043352, 0.2500714, -0.026940798, -0.0069258413)); + target2 += mul(ng2, float4x4(0.037078895, -0.03033529, -0.066851325, 0.14718252, 0.066372745, 0.028897487, -0.036055963, 0.035399746, 0.06733992, 0.21021596, -0.18314466, -0.027192699, 0.020213274, -0.17751546, -0.050674338, -0.09382659)); + target2 += mul(nh2, float4x4(-0.14761917, -0.22166072, 0.033172436, -0.21982265, -0.09172891, -0.20794454, 0.1738752, -0.13685037, 0.10981111, -0.23169234, 0.053787973, 0.12001196, -0.038242023, -0.047124114, 0.22503005, 0.1015142)); + target2 += mul(ni2, float4x4(0.021231879, -0.015423476, 0.058986407, 0.032002006, -0.029305007, 0.008933183, 0.10777483, -0.112574644, -0.023935415, -0.06604598, 0.053859934, -0.08354717, 0.13703763, -0.078382134, 0.12914242, -0.022056468)); + target2 += float4(-0.002022359, -0.007333954, -0.038140967, -0.03819673); + + float3 target3 = tex5.SampleLevel(sam, pos, 0).rgb; + target3 += mul(e1, float4x3(0.19254518, 0.009179287, 0.023821035, 0.020269603, 0.025629226, 0.040180814, -0.025135614, -0.07785793, -0.099851295, -0.122886, 0.03322616, 0.0509256)); + target3 += mul(e2, float4x3(0.060054794, 0.053996198, 0.047226787, 0.038959846, -0.025839888, -0.030583512, -0.034999896, 0.011966571, -0.011057454, 0.05765179, -0.041760337, -0.0694113)); + target3 += mul(ne1, float4x3(-0.20393562, -0.0055942894, -0.02089636, 0.14781304, -0.01954523, -0.0746086, 0.071556985, 0.07512172, 0.067927115, 0.084076844, -0.0561336, -0.06856403)); + target3 += mul(ne2, float4x3(-0.039552618, -0.04448951, -0.04170605, -0.00886809, 0.06708884, 0.07120977, 0.04834384, -0.10599933, -0.11024835, -0.015948117, 0.084044695, 0.10778199)); + + tex3[gxy] = target1; + tex4[gxy] = target2; + tex6[gxy] = float4(target3, 1); +} + +//!PASS 5 +//!DESC Conv-4x3x3x16 +//!IN tex3, tex4, tex6 +//!OUT tex1, tex2, tex5 +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 + +void Pass5(uint2 blockStart, uint3 threadId) { + uint2 gxy = Rmp8x8(threadId.x) + blockStart; + uint2 inputSize = GetInputSize(); + if (gxy.x >= inputSize.x || gxy.y >= inputSize.y) { + return; + } + + float2 inputPt = GetInputPt(); + float2 pos = (gxy + 0.5f) * inputPt; + + // [ a, d, g ] + // [ b, e, h ] + // [ c, f, i ] + float4 a1 = tex3.SampleLevel(sam, pos + float2(-inputPt.x, -inputPt.y), 0); + float4 b1 = tex3.SampleLevel(sam, pos + float2(-inputPt.x, 0), 0); + float4 c1 = tex3.SampleLevel(sam, pos + float2(-inputPt.x, inputPt.y), 0); + float4 d1 = tex3.SampleLevel(sam, pos + float2(0, -inputPt.y), 0); + float4 e1 = tex3.SampleLevel(sam, pos, 0); + float4 f1 = tex3.SampleLevel(sam, pos + float2(0, inputPt.y), 0); + float4 g1 = tex3.SampleLevel(sam, pos + float2(inputPt.x, -inputPt.y), 0); + float4 h1 = tex3.SampleLevel(sam, pos + float2(inputPt.x, 0), 0); + float4 i1 = tex3.SampleLevel(sam, pos + float2(inputPt.x, inputPt.y), 0); + + float4 na1 = max(-a1, 0); + float4 nb1 = max(-b1, 0); + float4 nc1 = max(-c1, 0); + float4 nd1 = max(-d1, 0); + float4 ne1 = max(-e1, 0); + float4 nf1 = max(-f1, 0); + float4 ng1 = max(-g1, 0); + float4 nh1 = max(-h1, 0); + float4 ni1 = max(-i1, 0); + + a1 = max(a1, 0); + b1 = max(b1, 0); + c1 = max(c1, 0); + d1 = max(d1, 0); + e1 = max(e1, 0); + f1 = max(f1, 0); + g1 = max(g1, 0); + h1 = max(h1, 0); + i1 = max(i1, 0); + + float4 a2 = tex4.SampleLevel(sam, pos + float2(-inputPt.x, -inputPt.y), 0); + float4 b2 = tex4.SampleLevel(sam, pos + float2(-inputPt.x, 0), 0); + float4 c2 = tex4.SampleLevel(sam, pos + float2(-inputPt.x, inputPt.y), 0); + float4 d2 = tex4.SampleLevel(sam, pos + float2(0, -inputPt.y), 0); + float4 e2 = tex4.SampleLevel(sam, pos, 0); + float4 f2 = tex4.SampleLevel(sam, pos + float2(0, inputPt.y), 0); + float4 g2 = tex4.SampleLevel(sam, pos + float2(inputPt.x, -inputPt.y), 0); + float4 h2 = tex4.SampleLevel(sam, pos + float2(inputPt.x, 0), 0); + float4 i2 = tex4.SampleLevel(sam, pos + float2(inputPt.x, inputPt.y), 0); + + float4 na2 = max(-a2, 0); + float4 nb2 = max(-b2, 0); + float4 nc2 = max(-c2, 0); + float4 nd2 = max(-d2, 0); + float4 ne2 = max(-e2, 0); + float4 nf2 = max(-f2, 0); + float4 ng2 = max(-g2, 0); + float4 nh2 = max(-h2, 0); + float4 ni2 = max(-i2, 0); + + a2 = max(a2, 0); + b2 = max(b2, 0); + c2 = max(c2, 0); + d2 = max(d2, 0); + e2 = max(e2, 0); + f2 = max(f2, 0); + g2 = max(g2, 0); + h2 = max(h2, 0); + i2 = max(i2, 0); + + float4 target1 = mul(a1, float4x4(0.050738923, 0.15003614, -0.18880141, 0.16791905, 0.16549185, -0.26726744, -0.12813666, -0.021510791, 0.070805945, 0.043350577, 0.0035756908, 0.11776675, -0.01824196, 0.12618026, 0.07424072, 0.032886628)); + target1 += mul(b1, float4x4(-0.11678059, 0.0565686, 0.04392921, -0.27621672, 0.2116695, 0.038044345, -0.015018062, -0.028636303, 0.049744565, -0.12935996, 0.027176194, -0.13208814, -0.21195693, 0.08980974, 0.013893243, -0.018403184)); + target1 += mul(c1, float4x4(0.3214697, -0.03143518, 0.19927292, 0.12566878, 0.16190828, 0.11784847, 0.09943727, 0.11755882, 0.017959306, -0.064603634, -0.14054321, -0.11917774, 0.0056958874, 0.06461699, 0.104604125, 0.021947173)); + target1 += mul(d1, float4x4(-0.24738057, -0.034892898, -0.03364674, 0.017340986, 0.02933764, -0.08090866, -0.034651175, -0.17391174, 0.08536477, -0.17446008, 0.22706915, -0.10555482, 0.0877744, 0.0681237, -0.035909466, -0.10355238)); + target1 += mul(e1, float4x4(-0.090646185, -0.12971672, -0.14531808, -0.060838025, 0.24902023, 0.1310588, 0.18602785, 0.21283495, -0.32160765, -0.070119165, -0.10350057, 0.19260244, -0.2610542, -0.3030521, 0.08432348, -0.22286619)); + target1 += mul(f1, float4x4(0.28333843, -0.053968847, 0.08344997, 0.19987041, 0.22163449, 0.22161576, 0.0030572868, 0.10848695, -0.20529847, 0.08406883, -0.07130339, 0.09987656, 0.29774663, -0.08768785, 0.15567012, -0.010313759)); + target1 += mul(g1, float4x4(-0.1260916, -0.071901485, -0.30566844, 0.19393384, -0.05133266, 0.07868844, -0.24817581, 0.055521224, 0.23277187, 0.16324161, 0.07110341, -0.042626668, 0.052509766, -0.014292625, -0.019677468, 0.041733738)); + target1 += mul(h1, float4x4(-0.04264262, -0.06528029, 0.0013520801, -0.02140956, 0.27304867, -0.029477939, -0.1859993, 0.01418354, 0.07256604, 0.14302284, 0.03309569, -0.15932149, 0.01500576, -0.053860538, 0.1131707, -0.06272606)); + target1 += mul(i1, float4x4(-0.0400483, -0.030188695, -0.108427785, 0.057873204, 0.42774406, -0.11353873, 0.110134825, 0.052191462, 0.00087113964, 0.040683694, 0.100507155, -0.16746339, -0.26971558, 0.06506685, -0.20950548, 0.040783025)); + target1 += mul(a2, float4x4(0.11394146, -0.10693933, 0.2377026, -0.03783948, -0.16496852, 0.046675198, -0.23396324, 0.05696911, -0.02770668, 0.12922443, -0.093586415, 0.102305606, 0.0040032533, -0.038440734, -0.0035825048, -0.22108772)); + target1 += mul(b2, float4x4(0.17577791, -0.024538597, -0.19877498, -0.14544973, 0.16614193, -0.3279891, 0.14678721, -0.16355143, -0.012954231, 0.20982395, 0.044255227, 0.087878115, 0.11289659, -0.26981032, -0.10789584, 0.24094439)); + target1 += mul(c2, float4x4(0.0041394173, -0.0937936, 0.15251775, 0.1026978, -0.01999847, -0.02865502, 0.16765144, -0.17490439, -0.016996933, 0.03891808, -0.01858217, -0.106255606, 0.027496144, -0.14120618, 0.023483312, -0.08291959)); + target1 += mul(d2, float4x4(0.060642462, -0.2957824, 0.33968493, -0.04501478, -0.14999421, -0.0067213452, -0.018236576, 0.01627547, -0.07771579, 0.0124932695, -0.11797959, -0.090979554, 0.0096479915, 0.021336472, -0.07794724, 0.030138575)); + target1 += mul(e2, float4x4(-0.091704845, -0.20800348, -0.22158638, 0.048748583, 0.15139692, -0.2832814, 0.09610812, 0.41077513, 0.0007106381, -0.14465855, 0.0056652213, 0.031696238, -0.03384328, 0.1940933, 0.19262145, 0.014331562)); + target1 += mul(f2, float4x4(-0.16637586, -0.22008398, 0.102937706, 0.15260033, 0.039856806, -0.21082906, -0.19694057, 0.0712475, 0.015049883, 0.17320138, 0.06505415, -0.020279367, -0.018576574, 0.201407, -0.08108244, 0.04151909)); + target1 += mul(g2, float4x4(-0.12496581, 0.107817784, 0.10645319, 0.035113968, 0.0166165, 0.1316661, -0.045253787, -0.03863719, 0.09126881, 0.07553792, -0.029150097, -0.07629157, -0.17978054, -0.27080613, -0.028408276, -0.15366451)); + target1 += mul(h2, float4x4(0.081859134, -0.11599677, 0.027383117, 0.092724435, 0.059302155, 0.10008954, -0.12217131, 0.07471211, -0.20396213, -0.040741358, 0.118772194, -0.21725504, 0.099645875, 0.09691941, -0.07696025, -0.016445495)); + target1 += mul(i2, float4x4(-0.18712623, -0.14458412, 0.03693652, 0.014525352, -0.09607279, -0.19400409, 0.032149505, 0.07106094, 0.051436905, -0.07765334, 0.017043818, 0.17777587, 0.05274306, 0.0062209824, -0.080005355, 0.026041988)); + target1 += mul(na1, float4x4(-0.090594456, -0.041637532, 0.10346829, -0.09393943, 0.027663473, 0.20729685, -0.011156861, 0.021863503, 0.04781304, -0.039483577, -0.092933334, -0.25187445, 0.033062164, 0.010756357, -0.13035728, -0.008321023)); + target1 += mul(nb1, float4x4(0.07772912, 0.010776647, -0.018709056, 0.25634038, 0.00906326, 0.21411708, 0.122652486, 0.07725616, 0.15266491, 0.1274286, 0.10400329, 0.20354506, 0.013765407, -0.039089683, 0.25870228, -0.08919069)); + target1 += mul(nc1, float4x4(-0.14971368, 0.06935879, -0.089983195, 0.01406992, 0.16989979, -0.037809014, 0.07157283, -0.050660506, -0.032826405, 0.033794664, -0.0051332368, 0.089349195, 0.06263488, -0.07048108, 0.07263597, -0.11618368)); + target1 += mul(nd1, float4x4(0.013391823, -0.07888697, -0.13984044, -0.01241464, -0.06475807, 0.06978077, -0.20329754, 0.16602662, 0.013664227, 0.12317301, -0.10240692, -0.0657491, -0.31402445, -0.14472555, 0.1739024, 0.0005437834)); + target1 += mul(ne1, float4x4(0.16330495, 0.02644609, 0.23837087, -0.07734767, 0.12377497, -0.18478604, 0.35040903, -0.05262452, 0.049074646, -0.0077528385, 0.15370984, -0.22888668, 0.3603141, 0.29372314, -0.4432887, 0.20702155)); + target1 += mul(nf1, float4x4(-0.18785694, 0.21085343, -0.111042105, 0.0478716, -0.08214944, -0.0922987, 0.29570273, 0.025100114, 0.25403878, 0.01271447, 0.21851794, -0.1434596, -0.21153769, 0.023305666, -0.10386609, 0.043919638)); + target1 += mul(ng1, float4x4(-0.117247805, 0.013329102, 0.0313911, -0.08055777, -0.0053445757, -0.2886372, 0.07938673, -0.06659165, 0.20798062, 0.030106818, -0.04811631, 0.036332276, -0.057687126, 0.03813657, 0.035860628, -0.11273985)); + target1 += mul(nh1, float4x4(-0.0031557097, 0.027456097, -0.14444692, 0.08411739, 0.13466308, -0.13212901, -0.0034804344, 0.1464661, -0.21033211, 0.05913627, 0.10233881, 0.009844489, -0.15369488, -0.018978333, -0.07518442, -0.010549853)); + target1 += mul(ni1, float4x4(0.112989105, -0.011166866, -0.08277204, 0.046827227, -0.08067428, 0.13465053, -0.1656419, 0.07280515, 0.037523627, -0.050147127, -0.17731906, 0.1067486, 0.119732924, -0.102017604, 0.31421226, -0.14060387)); + target1 += mul(na2, float4x4(-0.1106223, 0.09229271, -0.09355422, -0.02413533, -0.096457504, -0.13282233, 0.022983741, -0.13534859, -0.0056585902, -0.07214356, 0.14617127, -0.13723095, 0.058078192, -0.1038417, -0.10452195, -0.18855028)); + target1 += mul(nb2, float4x4(0.16357008, 0.080841675, 0.1663936, 0.20815827, 0.03813903, 0.34158087, -0.012987109, 0.39152008, -0.027927356, -0.14332302, -0.012866622, -0.016149148, -0.08733816, 0.1960951, 0.19572765, -0.2710826)); + target1 += mul(nc2, float4x4(0.024827998, 0.24175219, 0.030659903, -0.22227505, 0.026898654, 0.009930298, 0.088392995, 0.32644793, -0.10351868, -0.08717382, 0.22931585, 0.05197704, 0.06534648, 0.13636068, 0.062107667, 0.024806283)); + target1 += mul(nd2, float4x4(-0.18550465, 0.062058095, -0.08620093, 0.20158216, -0.1460996, 0.14275469, -0.28057688, -0.11685651, -0.09627509, 0.09029933, 0.03669734, 0.1257313, -0.07974307, 0.020742215, -0.0039170664, 0.11340528)); + target1 += mul(ne2, float4x4(0.15225565, 0.171972, 0.13573253, 0.0056740018, -0.1667786, 0.06028638, -0.1255049, -0.23327217, -0.139949, 0.029957669, -0.16713464, 0.046236664, -0.05070503, 0.18714412, -0.20076098, 0.1672637)); + target1 += mul(nf2, float4x4(0.18468563, 0.07733334, 0.14463845, -0.10712052, 0.36213547, 0.29404843, 0.2110929, 0.14646721, -0.059985258, -0.2709805, 0.073061034, -0.039072156, 0.015898943, -0.17166951, 0.20194982, -0.04723745)); + target1 += mul(ng2, float4x4(-0.26353067, 0.050225407, -0.42643914, 0.06601958, -0.10513071, -0.1654714, 0.0593609, 0.027410276, -0.19465327, -0.13865606, 0.05579213, 0.07982532, -0.20893136, -0.008150932, 0.053529713, -0.0317475)); + target1 += mul(nh2, float4x4(-0.012075693, -0.27574313, 0.22184552, -0.117393926, -0.49310133, -0.13997443, -0.079180904, -0.053438634, -0.07552426, -0.045796394, -0.037434675, 0.24076645, -0.04395852, 0.10325762, -0.19867313, -0.070216134)); + target1 += mul(ni2, float4x4(-0.026107877, -0.030023552, -0.047810435, 0.20572239, 0.061861858, 0.1776161, -0.306099, 0.16332485, -0.1843373, 0.06758581, -0.23902373, -0.10575018, 0.03990962, -0.046113137, 0.14876197, -0.21280771)); + target1 += float4(-0.009669773, 0.036289547, -0.050454646, 0.051479716); + + float4 target2 = mul(a1, float4x4(-0.14542116, -0.15827142, -0.20811677, -0.103433, 0.19787271, 0.33990738, 0.17085013, -0.059132278, 0.013047369, -0.1687924, 0.06732661, -0.050968684, 0.09197164, -0.041265316, -0.108277336, -0.014430892)); + target2 += mul(b1, float4x4(-0.022837132, 0.20440012, -0.14266612, 0.019944299, 0.069084294, 0.3171199, -0.1521742, -0.35806596, 0.13581008, -0.13811131, 0.12219503, 0.17329764, -0.15100783, 0.0862648, 0.118227705, 0.18736814)); + target2 += mul(c1, float4x4(0.013604392, 0.11496102, -0.18734755, -0.047555517, 0.05297245, 0.006461213, 0.06247472, -0.0202791, 0.02329791, 0.11530998, -0.148774, 0.0965498, 0.1487269, 0.061629567, -0.22488646, -0.005393787)); + target2 += mul(d1, float4x4(-0.29286116, 0.11958281, -0.11193505, -0.17139061, -0.035151243, -0.2635945, 0.0002499315, -0.16346519, 0.23779829, 0.04454211, 0.21293561, 0.25617847, 0.12194803, -0.0017443774, -0.009216221, -0.034387548)); + target2 += mul(e1, float4x4(0.28791443, -0.25421545, -0.058626153, -0.1520494, -0.16808414, -0.39723453, -0.13199537, 0.056999452, -0.048155293, 0.38699663, -0.114719056, 0.001293743, -0.0959443, -0.08189709, 0.26921842, 0.061219636)); + target2 += mul(f1, float4x4(0.00781977, -0.07103863, -0.21942843, 0.2419546, 0.20016691, -0.28697264, -0.034715973, -0.03381459, -0.028126812, 0.046806023, -0.14423183, -0.13472253, 0.009225362, -0.086190686, 0.0041205613, 0.08953202)); + target2 += mul(g1, float4x4(-0.04926224, -0.099740155, -0.088695474, 0.09950333, -0.06495916, 0.20126842, -0.0062843356, -0.034764495, -0.10808971, -0.19946553, 0.075991094, 0.14746219, 0.08247818, 0.07382381, -0.056908615, -0.026823666)); + target2 += mul(h1, float4x4(-0.04837408, 0.12605472, -0.23957102, -0.14252385, -0.046534102, -0.07511751, -0.21040416, 0.2064639, -0.006026243, -0.25005546, -0.063780144, 0.076840036, -0.07484346, 0.017368162, 0.04657373, -0.022188455)); + target2 += mul(i1, float4x4(0.04545079, -0.002226373, -0.11695467, 0.12954631, 0.054903183, 0.15162702, -0.19222596, 0.05351421, -0.079599276, -0.036238387, 0.1362261, 0.037431743, -0.0015106505, 0.18739921, 0.122365154, -0.05871144)); + target2 += mul(a2, float4x4(-0.005558987, -0.13553315, -0.006372213, 0.06633917, -0.22141413, -0.15780807, 0.057122614, -0.057320844, -0.06306763, 0.19112623, -0.041758966, 0.03555483, -0.005718873, 0.009167371, 0.050909385, -0.14599234)); + target2 += mul(b2, float4x4(0.18175003, 0.10442485, 0.052994236, -0.4001252, -0.08328538, 0.06380226, -0.055015627, 0.010929493, -0.22888647, -0.033181675, -0.07570874, 0.07933599, -0.07894686, 0.12202901, 0.13679314, -0.054344065)); + target2 += mul(c2, float4x4(0.030145945, -0.06121175, -0.08550973, 0.10082535, 0.07198805, 0.21414264, -0.25636044, 0.028803539, 0.043738026, -0.0367658, 0.27998537, -0.06274612, -0.22862338, 0.002624325, 0.28519824, 0.18540645)); + target2 += mul(d2, float4x4(-0.012136538, -0.07059324, 0.018098673, 0.12078888, -0.087637, 0.041642863, 0.034997553, -0.16741107, 0.04701011, -0.004160269, 0.122639626, 0.0043271836, 0.011551197, -0.16421974, -0.102481335, 0.014233497)); + target2 += mul(e2, float4x4(-0.37945676, 0.25232047, -0.03707734, -0.1985225, -0.11536396, 0.22039749, -0.21809638, -0.10596801, -0.17211124, -0.2035486, 0.011822896, 0.27510995, -0.105182275, 0.022503568, -0.0063389307, -0.071560584)); + target2 += mul(f2, float4x4(-0.16101715, -0.034247126, 0.16626042, 0.031131435, 0.03048031, -0.105447404, -0.05728527, -0.14518815, -0.019103229, -0.15152888, -0.119154684, 0.028724093, 0.05836196, -0.35943082, -0.016481897, -0.0437348)); + target2 += mul(g2, float4x4(-0.07719413, -0.33214888, -0.0541927, 0.16506542, -0.032792456, 0.016834807, 0.1724155, 0.073768586, 0.002303886, -0.001382793, -0.0562648, -0.10167158, -0.19101655, 0.052783452, -0.1422853, 0.09653729)); + target2 += mul(h2, float4x4(-0.30030164, 0.11637444, -0.23238538, -0.27238008, -0.077208534, -0.027645003, 0.10369907, 0.20162316, -0.14428844, 0.1766293, 0.024419712, 0.11301171, 0.07772854, 0.18613201, 0.20721672, -0.1751799)); + target2 += mul(i2, float4x4(-0.1026615, -0.12484944, 0.15386428, 0.038676128, -0.119472496, -0.032417197, -0.14208497, -0.05254358, -0.0035079278, -0.011276316, 0.043117497, -0.010022288, 0.031624593, 0.014969992, -0.031410277, 0.15284787)); + target2 += mul(na1, float4x4(0.018149922, -0.05906194, 0.054767277, 0.008161979, -0.076949194, 0.040888708, -0.006419542, -0.12897012, -0.0028229658, 0.20937827, 0.02741711, -0.04013348, -0.12731804, 0.008064522, 0.002870103, 0.027690327)); + target2 += mul(nb1, float4x4(0.023197446, -0.08888926, 0.15531142, 0.13745947, 0.054352283, -0.121785395, 0.16237587, 0.023567237, -0.36160588, 0.30499592, -0.033180915, -0.1515843, 0.04251452, -0.17903805, 0.03235283, -0.08062386)); + target2 += mul(nc1, float4x4(-0.0072868476, -0.2010616, 0.13061914, 0.12846659, 0.11725315, 0.14589547, -0.05373261, -0.081606135, -0.07010131, -0.025378224, 0.10265872, 0.18658938, -0.12165338, 0.036297683, 0.03925332, 0.16576236)); + target2 += mul(nd1, float4x4(0.10300252, -0.11548347, -0.08691649, -0.014866044, -0.3213804, 0.47206497, -0.16032113, 0.026284516, 0.046302956, -0.052474245, -0.025335522, -0.10957576, -0.16872157, 0.19049212, -0.023881195, 0.061396897)); + target2 += mul(ne1, float4x4(-0.16202278, 0.52128345, -0.2601511, 0.06116799, -0.21123995, 0.39389637, -0.350544, -0.16157438, -0.02823116, -0.39056876, -0.14267299, 0.03262984, 0.342303, -0.20556125, -0.0019219286, -0.1824844)); + target2 += mul(nf1, float4x4(0.23399737, -0.0912646, 0.11152403, -0.20945886, -0.053451832, -0.09786892, -0.059099484, 0.18103573, -0.117154315, -0.18342866, 0.12650815, 0.0067340015, -0.037984423, 0.17667364, 0.071636364, -0.011689163)); + target2 += mul(ng1, float4x4(-0.099510275, -0.0925438, -0.009136904, -0.03774997, -0.13348748, 0.3605135, -0.078298144, -0.14712195, 0.22566219, 0.18659295, 0.05614545, 0.10792911, -0.12477693, -0.03587624, 0.08050775, -0.054740936)); + target2 += mul(nh1, float4x4(0.10312337, -0.063681684, 0.16496794, 0.09038492, -0.08903926, 0.41163155, -0.013669214, -0.21472235, -0.054991595, 0.0033639956, 0.18160143, 0.17240305, -0.039428882, 0.17087695, -0.1729076, 0.09871825)); + target2 += mul(ni1, float4x4(-0.13123736, 0.0802573, 0.077981554, -0.101768315, 0.089998, -0.13781744, 0.122858986, 0.054121554, -0.02640825, 0.13577555, -0.037485655, -0.04179625, 0.000106130996, -0.100183845, 0.00046665114, 0.21791616)); + target2 += mul(na2, float4x4(0.011894387, -0.030088445, 0.025817253, 0.08193235, 0.109322436, 0.10855583, -0.19661167, -0.09405307, 0.2073779, -0.33972177, 0.048635002, -0.14883177, 0.056954246, 0.3953476, 0.18765114, -0.014010224)); + target2 += mul(nb2, float4x4(-0.22594279, -0.014942035, -0.1519647, 0.25367293, 0.16330296, 0.03317176, -0.32148597, -0.46503916, 0.19944623, -0.26229686, 0.019909514, -0.059794176, 0.12912126, 0.044948537, -0.08649492, 0.08024645)); + target2 += mul(nc2, float4x4(-0.022943841, -0.068013534, 0.11032515, 0.011685601, 0.020096298, -0.3285243, 0.08196111, -0.089537136, -0.03976742, -0.1315977, -0.36306036, 0.24678081, 0.22115967, -0.017472323, -0.19451386, -0.035218123)); + target2 += mul(nd2, float4x4(-0.020891193, -0.12721714, -0.15030408, 0.026523203, -0.12413139, -0.11235275, -0.21476477, -0.11326953, 0.028815055, -0.18552732, -0.0076828003, -0.14679903, 0.020509586, -0.18695217, 0.06696879, 0.103938386)); + target2 += mul(ne2, float4x4(0.057521313, 0.28509304, -0.2525733, 0.16745082, -0.26614547, 0.18545172, -0.27140215, 0.018639714, 0.19730581, 0.1659491, -0.058363054, -0.4048628, 0.024913948, -0.44124457, 0.13872208, -0.0371103)); + target2 += mul(nf2, float4x4(0.100904405, 0.06700356, -0.035322092, 0.21781014, 0.018047005, -0.21737386, -0.3734802, 0.13506944, 0.012760691, 0.06620756, -0.0253398, 0.0030280363, -0.044015452, -0.055860534, -0.3547194, -0.04230283)); + target2 += mul(ng2, float4x4(-0.19012743, -0.34408915, 0.18940191, 0.13152952, 0.107553795, -0.00694412, -0.07930157, -0.30964044, 0.034710668, -0.031806916, 0.019838978, 0.017044948, 0.110688254, -0.0029772928, 0.09414367, -0.10760175)); + target2 += mul(nh2, float4x4(-0.05745392, 0.29022983, 0.014998233, 0.27365527, 0.08169933, 0.0734232, -0.09404464, -0.26870936, 0.21171738, -0.19529793, -0.064401075, -0.18972695, -0.08024953, -0.027122354, -0.11661348, 0.010131282)); + target2 += mul(ni2, float4x4(0.07599435, -0.06851123, 0.06258365, 0.10296892, 0.15556085, -0.041609086, -0.11303363, 0.07082365, 0.013949174, -0.087201476, -0.0855705, -0.12979257, 0.04048528, 0.4211556, 0.04118289, -0.22093314)); + target2 += float4(0.07789114, 0.0024746545, 0.1891165, -0.0023716448); + + float3 target3 = tex6.SampleLevel(sam, pos, 0).rgb; + target3 += mul(e1, float4x3(0.050153337, 0.012563414, 0.014994658, 0.10498867, 0.07151875, 0.06761489, 0.061650798, -0.035183728, -0.050987806, 0.0017240314, 0.041055307, 0.020366805)); + target3 += mul(e2, float4x3(0.110105395, -0.044468552, -0.072567016, -0.049364448, -0.015713394, -0.021540897, -0.01636263, -0.084110685, -0.08281401, -0.08940374, 0.047863875, 0.051104594)); + target3 += mul(ne1, float4x3(-0.081597924, 0.002422661, 0.01143175, -0.07504751, -0.09938017, -0.1063178, -0.10390281, 0.0262197, 0.060155805, -0.24289346, -0.0054961476, 0.045964316)); + target3 += mul(ne2, float4x3(-0.1829316, 0.047622137, 0.07963877, 0.048703995, -0.0026299425, -0.003712008, 0.029338706, 0.096882835, 0.102083966, 0.078538164, -0.07247937, -0.06820231)); + + tex1[gxy] = target1; + tex2[gxy] = target2; + tex5[gxy] = float4(target3, 1); +} + +//!PASS 6 +//!DESC Conv-4x3x3x16 +//!IN tex1, tex2, tex5 +//!OUT tex3, tex4, tex6 +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 + +void Pass6(uint2 blockStart, uint3 threadId) { + uint2 gxy = Rmp8x8(threadId.x) + blockStart; + uint2 inputSize = GetInputSize(); + if (gxy.x >= inputSize.x || gxy.y >= inputSize.y) { + return; + } + + float2 inputPt = GetInputPt(); + float2 pos = (gxy + 0.5f) * inputPt; + + // [ a, d, g ] + // [ b, e, h ] + // [ c, f, i ] + float4 a1 = tex1.SampleLevel(sam, pos + float2(-inputPt.x, -inputPt.y), 0); + float4 b1 = tex1.SampleLevel(sam, pos + float2(-inputPt.x, 0), 0); + float4 c1 = tex1.SampleLevel(sam, pos + float2(-inputPt.x, inputPt.y), 0); + float4 d1 = tex1.SampleLevel(sam, pos + float2(0, -inputPt.y), 0); + float4 e1 = tex1.SampleLevel(sam, pos, 0); + float4 f1 = tex1.SampleLevel(sam, pos + float2(0, inputPt.y), 0); + float4 g1 = tex1.SampleLevel(sam, pos + float2(inputPt.x, -inputPt.y), 0); + float4 h1 = tex1.SampleLevel(sam, pos + float2(inputPt.x, 0), 0); + float4 i1 = tex1.SampleLevel(sam, pos + float2(inputPt.x, inputPt.y), 0); + + float4 na1 = max(-a1, 0); + float4 nb1 = max(-b1, 0); + float4 nc1 = max(-c1, 0); + float4 nd1 = max(-d1, 0); + float4 ne1 = max(-e1, 0); + float4 nf1 = max(-f1, 0); + float4 ng1 = max(-g1, 0); + float4 nh1 = max(-h1, 0); + float4 ni1 = max(-i1, 0); + + a1 = max(a1, 0); + b1 = max(b1, 0); + c1 = max(c1, 0); + d1 = max(d1, 0); + e1 = max(e1, 0); + f1 = max(f1, 0); + g1 = max(g1, 0); + h1 = max(h1, 0); + i1 = max(i1, 0); + + float4 a2 = tex2.SampleLevel(sam, pos + float2(-inputPt.x, -inputPt.y), 0); + float4 b2 = tex2.SampleLevel(sam, pos + float2(-inputPt.x, 0), 0); + float4 c2 = tex2.SampleLevel(sam, pos + float2(-inputPt.x, inputPt.y), 0); + float4 d2 = tex2.SampleLevel(sam, pos + float2(0, -inputPt.y), 0); + float4 e2 = tex2.SampleLevel(sam, pos, 0); + float4 f2 = tex2.SampleLevel(sam, pos + float2(0, inputPt.y), 0); + float4 g2 = tex2.SampleLevel(sam, pos + float2(inputPt.x, -inputPt.y), 0); + float4 h2 = tex2.SampleLevel(sam, pos + float2(inputPt.x, 0), 0); + float4 i2 = tex2.SampleLevel(sam, pos + float2(inputPt.x, inputPt.y), 0); + + float4 na2 = max(-a2, 0); + float4 nb2 = max(-b2, 0); + float4 nc2 = max(-c2, 0); + float4 nd2 = max(-d2, 0); + float4 ne2 = max(-e2, 0); + float4 nf2 = max(-f2, 0); + float4 ng2 = max(-g2, 0); + float4 nh2 = max(-h2, 0); + float4 ni2 = max(-i2, 0); + + a2 = max(a2, 0); + b2 = max(b2, 0); + c2 = max(c2, 0); + d2 = max(d2, 0); + e2 = max(e2, 0); + f2 = max(f2, 0); + g2 = max(g2, 0); + h2 = max(h2, 0); + i2 = max(i2, 0); + + float4 target1 = mul(a1, float4x4(0.10883355, -0.14958352, 0.026701333, 0.090302855, 0.033934478, 0.120340124, 0.027125617, -0.16792692, -0.075757094, 0.28692973, 0.013230067, -0.040618937, 0.087148145, -0.05985753, -0.06352023, -0.05775848)); + target1 += mul(b1, float4x4(-0.18206549, -0.10363482, 0.097648725, -0.08801144, 0.31633568, 0.058347676, -0.009121898, 0.02594872, 0.14757825, 0.4730546, -0.008518203, -0.3090668, -0.004052835, -0.14166127, -0.010156037, 0.21191326)); + target1 += mul(c1, float4x4(0.05735183, 0.039180398, -0.12357178, 0.04830351, 0.120369986, -0.052775342, 0.005902798, 0.07695394, 0.00602021, 0.16758691, 0.10287989, -0.1718468, -0.1319741, 0.16932078, -0.2055026, -0.31820264)); + target1 += mul(d1, float4x4(0.05427556, -0.28392607, 0.08579091, -0.0015861926, 0.062348455, -0.27778792, -0.07450379, 0.01616914, -0.012357131, -0.056992117, -0.1896176, 0.018156245, 0.06499259, -0.076558664, 0.10341699, -0.08993959)); + target1 += mul(e1, float4x4(-0.05741742, -0.05414434, 0.18006511, 0.09840777, -0.11849741, 0.40419933, 0.21349974, 0.40268886, 0.23218039, -0.0680356, -0.3130592, -0.21271054, 0.13776754, 0.19114101, 0.17373541, 0.43457666)); + target1 += mul(f1, float4x4(-0.060757063, 0.11339545, -0.042958036, -0.06483378, -0.06681766, -0.056395415, 0.037868995, 0.033861663, -0.1041215, 0.0046828864, 0.14360638, 0.087886184, -0.26808187, 0.19876598, -0.05276215, -0.07073776)); + target1 += mul(g1, float4x4(-0.24029991, -0.14217372, -0.011767948, 0.011623913, 0.33820602, -0.24501325, -0.11444902, 0.14536968, 0.16780593, 0.0065867775, -0.074971735, 0.021472024, -0.10853042, 0.09527126, 0.009436061, -0.09688826)); + target1 += mul(h1, float4x4(-0.31893802, -0.0016892607, -0.105592966, -0.116694786, -0.007851739, 0.1429722, 0.0741952, 0.050125953, 0.07185179, 0.1900389, 0.030889044, 0.15422693, 0.12550323, 0.3556344, 0.108276874, -0.099125646)); + target1 += mul(i1, float4x4(-0.33620578, -0.11113713, -0.15881014, 0.028243937, -0.12028756, -0.028566968, -0.002682634, -0.15635195, -0.06869284, -0.03309234, 0.03086361, 0.050773233, -0.08939835, 0.15237434, -0.024076303, -0.13092752)); + target1 += mul(a2, float4x4(-0.31200737, 0.32207087, -0.068700634, -0.39202076, 0.0676771, 0.083766654, -0.05696634, 0.03088338, 0.046761762, 0.09732023, 0.030844063, -0.03369749, -0.12664944, -0.029924957, 0.10551989, 0.086157694)); + target1 += mul(b2, float4x4(-0.1919761, 0.17179352, -0.025805056, -0.05570367, -0.16736336, 0.07430868, -0.13228212, 0.10702857, -0.09723214, 0.1884809, 0.09422538, -0.16902041, -0.1964137, 0.17877853, 0.17453954, -0.11339361)); + target1 += mul(c2, float4x4(0.11865004, 0.013131073, 0.17317963, -0.2077911, -0.1116894, 0.09672745, -0.023348883, -0.1176519, 0.15893579, 0.22941695, 0.18798698, 0.059098385, 0.09498779, 0.10118143, 0.08737761, -0.016268898)); + target1 += mul(d2, float4x4(-0.025380889, 0.17163627, -0.014800655, 0.12669696, 0.050048903, -0.06513837, 0.020915661, 0.2144372, -0.17799327, 0.0068409992, 0.06751171, -0.16618991, 0.14637277, 0.010591964, -0.15909241, 0.02660789)); + target1 += mul(e2, float4x4(0.3178319, 0.15036377, -0.03386948, 0.13883169, -0.33842105, 0.061425313, -0.04195804, 0.22558802, 0.2250625, 0.060225345, -0.08467863, 0.0014776831, 0.080328, 0.03221249, 0.20838667, 0.11489719)); + target1 += mul(f2, float4x4(-0.0013924981, 0.28233197, -0.17997956, -0.10959627, -0.16253087, 0.016549526, -0.1571556, 0.017017027, -0.14697123, 0.0869202, 0.2104898, -0.15658243, 0.13424201, -0.022636503, -0.09512045, 0.0927298)); + target1 += mul(g2, float4x4(-0.038486905, -0.19215351, -0.2446516, -0.02958912, 0.06899297, 0.028667469, -0.05537665, 0.066711955, -0.0017354499, -0.07466053, 0.028587297, -0.042017035, 0.023596823, 0.0067433366, -0.14685915, 0.13400853)); + target1 += mul(h2, float4x4(0.0573442, 0.1424536, 0.19606829, 0.07141616, -0.032276712, 0.20030099, 0.16644277, 0.10393295, 0.27240822, 0.0071844175, -0.023368603, -0.14067268, -0.20310283, 0.039528254, 0.103837095, 0.08236034)); + target1 += mul(i2, float4x4(0.15616669, 0.3495403, -0.05678421, -0.069600284, -0.07361787, 0.079501756, 0.009530261, -0.032385882, 0.029831208, -0.095407076, 0.010261287, 0.15250465, -0.04868275, 0.058579214, 0.03779718, -0.10810775)); + target1 += mul(na1, float4x4(0.06492073, 0.018667994, -0.004712761, -0.032692235, 0.04027288, -0.114499666, -0.04327484, 0.13778907, -0.09373396, -0.08822919, 0.04796151, -0.057756703, -0.26161298, 0.07182931, 0.12998815, -0.14389744)); + target1 += mul(nb1, float4x4(0.19001032, 0.13091461, -0.2551175, 0.013365716, -0.031779066, 0.002531366, -0.13807543, -0.14165778, -0.2701911, -0.0890182, 0.34704998, -0.008494185, 0.16179956, -0.060182545, 0.060827415, -0.17249492)); + target1 += mul(nc1, float4x4(0.10665868, 0.15999752, -0.042796712, -0.14010513, -0.014244899, 0.017433831, 0.053657144, -0.0965679, 0.23623326, 0.0690172, 0.1290121, -0.025523739, 0.122357905, -0.18172716, 0.02829383, 0.10042929)); + target1 += mul(nd1, float4x4(-0.09273112, 0.09466892, -0.009225705, 0.16772579, 0.0813042, -0.16461512, 0.038097944, 0.19834967, -0.033650465, -0.12888893, 0.1414859, -0.021587005, -0.0047441716, 0.08880282, 0.020621201, 0.065779164)); + target1 += mul(ne1, float4x4(0.0051817205, 0.20322648, -0.077459775, 0.07461627, 0.1817634, -0.5371515, -0.29336745, -0.57652086, 0.035826538, 0.41058993, 0.21512514, -0.041881148, -0.2490056, -0.07172767, 0.20821427, -0.69866294)); + target1 += mul(nf1, float4x4(0.18961228, 0.027452804, -0.0075194626, -0.029665018, 0.28770384, -0.099777386, -0.12160496, 0.07690297, 0.30273837, 0.026466522, 0.18100439, -0.09078488, 0.2035407, -0.062081084, 0.06744994, -0.07512911)); + target1 += mul(ng1, float4x4(0.008473044, 0.07501521, -0.11242355, -0.039451122, -0.21818535, -0.07779562, 0.13194147, 0.084983595, 0.0770609, -0.034488454, 0.08823556, -0.07168295, 0.041894365, 0.0789253, 0.06191209, 0.013991105)); + target1 += mul(nh1, float4x4(0.10582237, 0.1514222, 0.10751824, 0.08231926, 0.23913008, -0.2673503, 0.036170945, 0.31463087, 0.026397424, -0.26629624, -0.07428361, -0.077513516, 0.0768238, -0.026638538, 0.12589583, -0.11521212)); + target1 += mul(ni1, float4x4(0.30389515, 0.18963532, 0.023015842, -0.10240883, 0.045651495, -0.036785256, -0.13346411, 0.16431254, -0.030950911, -0.03381929, 0.09413111, 0.03924852, 0.11044091, -0.10149653, 0.14114548, 0.07801978)); + target1 += mul(na2, float4x4(0.029622428, 0.14528686, -0.034057826, 0.010664312, 0.059213262, -0.29354423, -0.08448559, 0.10569036, -0.02988314, -0.016480735, 0.042203777, -0.028342744, 0.36807576, 0.09301971, 0.123721026, 0.07806503)); + target1 += mul(nb2, float4x4(0.04849538, -0.09201287, 0.10069803, -0.031749677, 0.18774022, -0.27789372, 0.05288653, 0.08097265, 0.006918896, -0.060978457, -0.113319606, 0.008844536, 0.021804892, -0.0011744015, -0.35720357, -0.24996938)); + target1 += mul(nc2, float4x4(-0.07147501, -0.09339197, 0.16154395, 0.3372506, -0.0004858638, -0.056553435, -0.12463908, -0.0047342298, -0.009141984, -0.13796125, -0.14035304, -0.104403175, -0.07054226, 0.12142519, -0.24971877, -0.1914648)); + target1 += mul(nd2, float4x4(-0.008194284, -0.027617034, 0.004994261, -0.07672895, 0.25697777, -0.18313397, 0.03266311, -0.029157834, 0.010476624, 0.12394092, -0.059660904, 0.08561672, -0.0008583816, -0.044442356, 0.28336492, 0.065344445)); + target1 += mul(ne2, float4x4(-0.3570137, -0.06802815, -0.10298613, -0.21256869, 0.3025278, -0.263425, 0.13547331, 0.038517762, 0.14951234, -0.16869017, 0.03293678, 0.21897063, -0.14688788, 0.21619378, -0.27550143, 0.048003722)); + target1 += mul(nf2, float4x4(0.15607022, -0.111073844, 0.2733694, 0.05423378, 0.25116092, -0.17350473, 0.13460433, 0.09602139, 0.17372625, -0.0024815476, -0.30154657, 0.0062206364, -0.0051755225, 0.04985103, -0.06310478, -0.30450678)); + target1 += mul(ng2, float4x4(0.057571005, -0.019051064, 0.054884393, 0.03993782, 2.6782007e-05, -0.05726912, 0.067192145, -0.08955987, -0.11937056, 0.15837386, -0.011670469, -0.06299701, -0.014917928, 0.23921679, 0.0054613873, -0.23099245)); + target1 += mul(nh2, float4x4(-0.035849575, -0.06785954, -0.15053692, 0.011964653, 0.1975448, -0.1633047, -0.024539666, 0.03170174, -0.12585635, -0.021171011, 0.15862562, 0.10296358, 0.3114039, 0.10010659, -0.09519227, -0.12945092)); + target1 += mul(ni2, float4x4(0.044433746, -0.058466546, -0.13258536, -0.033972915, 0.0037206819, -0.057343487, 0.13798106, 0.044445634, -0.22623023, 0.2408462, 0.048287082, -0.30717465, -0.13402344, 0.20024839, -0.026932377, -0.034217034)); + target1 += float4(-0.05988374, -0.23198523, -0.058251306, -0.038808554); + + float4 target2 = mul(a1, float4x4(0.045249436, -0.040327657, -0.2667367, 0.0913868, 0.14961123, 0.07253207, 0.29162952, -0.11320944, 0.017569833, 0.012350104, 0.22532712, 0.025312115, -0.12193993, 0.037391737, 0.03220835, 0.12102545)); + target2 += mul(b1, float4x4(-0.020587588, -0.07043244, -0.28093454, 0.18336722, 0.08153308, -0.05914772, -0.15255487, 0.079236075, -0.4269835, -0.11470208, -0.19043571, 0.2723162, 0.0066251885, -0.17115718, 0.022036036, 0.07349558)); + target2 += mul(c1, float4x4(-0.09441315, 0.042170826, 0.071251415, -0.13891962, 0.10236482, 0.05356262, 0.0291025, 0.063867815, -0.14530063, -0.08727925, -0.0048300857, 0.06766869, -0.3481536, -0.10943503, 0.014951926, 0.11993114)); + target2 += mul(d1, float4x4(0.13420522, 0.095721036, -0.1756104, -0.09906728, 0.09808904, -0.27402034, -0.102161326, 0.40162942, 0.13465238, 0.20237032, 0.3192343, -0.061512157, -0.20711629, -0.09659007, 0.06838548, 0.30256763)); + target2 += mul(e1, float4x4(0.025805298, -0.0322599, 0.23653145, -0.2760735, 0.11291006, -0.10836205, 0.20742846, 0.06974535, -0.4191803, -0.10882523, 0.038603242, 0.22662747, -0.08845715, -0.26151156, -0.16670766, 0.008536192)); + target2 += mul(f1, float4x4(-0.085842185, -0.21239999, -0.032774646, 0.088163696, 0.038300447, -0.09510875, 0.10113864, -0.14712982, 0.14264707, -0.10895432, 0.03051617, -0.06791873, -0.35589013, -0.12884575, -0.09460007, -0.0879575)); + target2 += mul(g1, float4x4(0.19235751, -0.109611385, -0.037397474, -0.26632717, 0.07878826, 0.19749992, 0.0035685285, 0.11793927, 0.019899402, 0.085741036, 0.08433813, -0.018344546, -0.0901484, 0.08221562, 0.12735383, 0.12801875)); + target2 += mul(h1, float4x4(0.19123435, 0.007882246, -0.018564796, -0.09904253, 0.28052533, 0.6360808, 0.25001726, -0.30590564, 0.07646281, -0.34298185, -0.33293694, -0.036753535, 0.18719083, 0.22131144, -0.1420962, -0.0014709529)); + target2 += mul(i1, float4x4(0.23060241, -0.14145076, -0.113213465, 0.037221998, 0.22163334, 0.18520229, 0.2961799, -0.063605964, 0.022606356, 0.043340076, -0.3233993, -0.075055614, -0.0038865958, 0.19558622, -0.018503085, -0.22932632)); + target2 += mul(a2, float4x4(0.11712158, -0.03590364, 0.38039652, -0.019910801, 0.13338004, -0.07078425, 0.09404417, -0.27607328, -0.02205519, -0.013522961, 0.2924021, -0.16088538, -0.034280356, -0.063614614, -0.061583273, -0.22479968)); + target2 += mul(b2, float4x4(-0.05624079, 0.32659104, 0.47335497, -0.14091404, 0.14739423, -0.07122778, -0.009384643, -0.058900848, 0.06260307, -0.17574102, 0.3538743, 0.2842822, -0.18150197, 0.26806462, 0.24673693, 0.19710627)); + target2 += mul(c2, float4x4(-0.24837571, -0.01663848, -0.13093965, 0.30109972, -0.09680959, 0.074526474, 0.024111765, -0.012781737, -0.08591349, -0.100348584, 0.02363011, -0.02687084, -0.27630556, 0.14074354, -0.016993485, 0.084373675)); + target2 += mul(d2, float4x4(0.1543391, -0.2008408, -0.21885285, 0.2320177, 0.06669948, -0.05171086, -0.25833863, -0.14085051, -0.035878573, -0.1632403, 0.09782713, 0.22973235, -0.14022017, -0.018347954, -0.29652777, 0.10912002)); + target2 += mul(e2, float4x4(-0.050962634, -0.040519282, -0.04381614, 0.084133334, 0.21222316, -0.091010064, 0.13157965, -0.21375372, -0.021148674, -0.044127557, -0.11400533, 0.097688414, 0.31571037, -0.05167655, 0.27606225, 0.12169133)); + target2 += mul(f2, float4x4(-0.1329087, 0.14291021, 0.043337896, -0.25970098, -0.11379552, -0.040157612, 0.08379851, -0.24104865, 0.1593102, -0.031879216, -0.004603848, -0.019003935, -0.24769545, -0.17577063, 0.16019398, 0.04640235)); + target2 += mul(g2, float4x4(-0.11615644, 0.12189521, 0.12919527, -0.104224406, -0.10143574, 0.14024515, -0.02759362, -0.1467619, 0.09028311, -0.06510291, 0.061612967, 0.10227729, -0.08785846, 0.06464871, -0.05048917, 0.09055746)); + target2 += mul(h2, float4x4(0.34443164, 0.013906371, -0.0595573, 0.09354196, 0.12184454, -0.02698316, -0.06208632, -0.11266858, 0.004904335, -0.33987018, -0.2494041, 0.127125, 0.040493876, 0.0280356, -0.037431944, 0.05823802)); + target2 += mul(i2, float4x4(-0.1762869, -0.20683959, -0.37788594, -0.1244979, -0.17202286, -0.038234763, 0.015924744, -0.014006752, 0.07097758, -0.25219876, -0.3164728, 0.022413896, -0.41423917, -0.03191542, 0.009464804, 0.0770316)); + target2 += mul(na1, float4x4(0.12442388, 0.031095076, 0.18799834, -0.18449762, -0.11995044, 0.11634828, -0.0055850362, 0.08558657, -0.025694892, -0.2854381, -0.32876188, 0.14690274, -0.1835963, -0.1786755, -0.44678628, 0.1678422)); + target2 += mul(nb1, float4x4(0.031241562, -0.1265462, 0.081369035, -0.1184643, 0.0010021052, -0.10810683, -0.039572187, 0.13850863, -0.010703417, -0.057981443, 0.30309856, 0.13869847, -0.16935349, 0.16969836, 0.045642667, 0.26460654)); + target2 += mul(nc1, float4x4(0.28779998, 0.04767888, -0.011856489, 0.114210494, 0.034624737, 0.19084676, -0.02740287, 0.035041407, -0.049002927, 0.10928203, 0.17362499, -0.1280889, 0.00077811617, -0.17594084, -0.18379052, 0.22303762)); + target2 += mul(nd1, float4x4(0.0008487252, -0.060438234, 0.109334275, -0.18768874, 0.13844973, 0.09226474, 0.18361697, -0.19385563, -0.29241335, -0.1033556, -0.3289991, 0.10027422, -0.09454755, -0.22817631, -0.2964217, -0.19499257)); + target2 += mul(ne1, float4x4(-0.057920385, 0.06342629, -0.048577324, 0.15952215, -0.061343953, 0.16471362, 0.1501856, 0.027373426, 0.01837245, -0.0732048, 0.09776471, 0.14817989, -0.112215854, 0.109101914, 0.058316242, 0.29969788)); + target2 += mul(nf1, float4x4(-0.12411656, -0.033170763, -0.08715826, 0.110862456, 0.1871076, 0.14550175, 0.23373431, 0.19281025, -0.37016305, -0.11924462, 0.026793748, 0.092801645, 0.04318573, 0.20969667, -0.39267823, 0.1938874)); + target2 += mul(ng1, float4x4(-0.15932916, 0.22217506, 0.007901788, -0.04037383, 0.09095982, -0.043115042, 0.098845564, -0.073432215, -0.14535685, 0.11504512, -0.07950504, -0.010718905, -0.050012022, -0.13089752, -0.3323894, -0.005423676)); + target2 += mul(nh1, float4x4(0.007320675, 0.21108273, 0.20758918, -0.04005568, -0.13234317, -0.15708306, 0.41804615, -0.09720499, -0.09623786, 0.2441289, 0.33276868, 0.17716111, -0.45670444, -0.026252905, -0.01958701, 0.24028622)); + target2 += mul(ni1, float4x4(-0.14936383, -0.023504466, -0.028479185, -0.053541556, -0.060263615, -0.087681144, 0.2435555, 0.08470686, -0.17713271, -0.2303349, 0.09337386, 0.039068084, -0.16263027, 0.034289114, 0.16604292, 0.10550447)); + target2 += mul(na2, float4x4(-0.16556105, 0.12211341, -0.0036831333, 0.13802956, 0.065256506, 0.03395266, -0.2296282, 0.21284704, 0.017770419, -0.1722762, -0.1741687, 0.10708671, 0.331979, 0.11924846, -0.09410989, -0.123036265)); + target2 += mul(nb2, float4x4(-0.096586555, -0.30475244, -0.24065268, 0.053860847, 0.19413544, 0.05542323, -0.06327867, 0.012265184, -0.08913778, 0.13779551, -0.099127166, 0.007493773, -0.07125554, -0.0011684593, -0.003005287, -0.094847135)); + target2 += mul(nc2, float4x4(0.21711998, -0.13086027, 0.07825239, -0.21121782, 0.055840425, -0.0019166623, -0.05480048, 0.019817038, 0.007626905, 0.14126389, 0.04515749, -0.029315706, 0.18555732, -0.114861906, -0.21993469, 0.031716693)); + target2 += mul(nd2, float4x4(-0.06716353, -0.11964145, 0.09711908, -0.061763637, -0.0948045, 0.14189975, 0.2810092, 0.2505306, 0.08872909, 0.086749084, -0.17528322, -0.048835423, 0.124959685, -0.12602286, 0.065660164, -0.06783225)); + target2 += mul(ne2, float4x4(-0.23066516, -0.0068310793, -0.0021060852, 0.09136854, 0.09919007, 0.2259628, -0.026603302, 0.1367709, -0.07940821, 0.14962214, 0.00652088, -0.3114987, -0.18900892, -0.20450105, 0.09329685, -0.19482759)); + target2 += mul(nf2, float4x4(0.095197074, 0.06346413, -0.05207484, -0.086378016, 0.19733003, 0.1448027, -0.02410627, 0.024829205, -0.20296144, -0.09551166, 0.022987023, 0.09035918, -0.15824226, 0.1350293, -0.06641893, 0.11739518)); + target2 += mul(ng2, float4x4(0.08381447, -0.13171835, -0.030271608, 0.14649504, 0.0007350431, 0.15303299, -0.001797464, 0.30294403, -0.07635094, -0.102541, -0.12176348, 0.053775523, 0.08070882, -0.035387367, -0.09521456, 0.22530125)); + target2 += mul(nh2, float4x4(-0.04650126, 0.12029137, 0.009236626, -0.1371486, -0.119391896, 0.20490645, 0.17123316, -0.015455403, 0.05842872, 0.14354227, 0.37586045, 0.054906923, 0.062954046, 0.07285954, 0.12260665, -0.08675996)); + target2 += mul(ni2, float4x4(0.22510684, -0.010087092, 0.005660375, 0.05069907, 0.10297958, 0.1411009, 0.09538159, 0.00922383, -0.31313825, -0.06449414, 0.109746836, 0.30148697, 0.35861742, -0.045380104, 0.09908991, -0.1933117)); + target2 += float4(0.012253057, 0.13434875, -0.10318777, -0.074252345); + + float3 target3 = tex5.SampleLevel(sam, pos, 0).rgb; + target3 += mul(e1, float4x3(-0.02302231, -0.035528302, -0.030674051, 0.029780716, 0.031591274, 0.045867007, 0.01335752, 0.037001595, 0.04351411, -0.11126892, 0.038589563, 0.06444906)); + target3 += mul(e2, float4x3(0.0047764573, -0.063372664, -0.065609895, 0.0478139, 0.025694113, 0.025097322, -0.1019169, 0.029989049, 0.050038517, 0.07504127, -0.017047737, -0.026222635)); + target3 += mul(ne1, float4x3(0.0024485083, 0.00640911, 0.008171829, -0.014622121, -0.06078096, -0.0800138, -0.0062360805, -0.014344496, -0.021332184, 0.117842786, -0.103745885, -0.13756834)); + target3 += mul(ne2, float4x3(-0.01942775, 0.08720701, 0.104858086, -0.05545872, -0.041375194, -0.035368554, 0.080331706, -0.021207837, -0.043905254, -0.12515299, 3.445463e-05, 0.018742712)); + + tex3[gxy] = target1; + tex4[gxy] = target2; + tex6[gxy] = float4(target3, 1); +} + +//!PASS 7 +//!DESC Conv-4x3x3x16 +//!IN tex3, tex4, tex6 +//!OUT tex1, tex2, tex5 +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 + +void Pass7(uint2 blockStart, uint3 threadId) { + uint2 gxy = Rmp8x8(threadId.x) + blockStart; + uint2 inputSize = GetInputSize(); + if (gxy.x >= inputSize.x || gxy.y >= inputSize.y) { + return; + } + + float2 inputPt = GetInputPt(); + float2 pos = (gxy + 0.5f) * inputPt; + + // [ a, d, g ] + // [ b, e, h ] + // [ c, f, i ] + float4 a1 = tex3.SampleLevel(sam, pos + float2(-inputPt.x, -inputPt.y), 0); + float4 b1 = tex3.SampleLevel(sam, pos + float2(-inputPt.x, 0), 0); + float4 c1 = tex3.SampleLevel(sam, pos + float2(-inputPt.x, inputPt.y), 0); + float4 d1 = tex3.SampleLevel(sam, pos + float2(0, -inputPt.y), 0); + float4 e1 = tex3.SampleLevel(sam, pos, 0); + float4 f1 = tex3.SampleLevel(sam, pos + float2(0, inputPt.y), 0); + float4 g1 = tex3.SampleLevel(sam, pos + float2(inputPt.x, -inputPt.y), 0); + float4 h1 = tex3.SampleLevel(sam, pos + float2(inputPt.x, 0), 0); + float4 i1 = tex3.SampleLevel(sam, pos + float2(inputPt.x, inputPt.y), 0); + + float4 na1 = max(-a1, 0); + float4 nb1 = max(-b1, 0); + float4 nc1 = max(-c1, 0); + float4 nd1 = max(-d1, 0); + float4 ne1 = max(-e1, 0); + float4 nf1 = max(-f1, 0); + float4 ng1 = max(-g1, 0); + float4 nh1 = max(-h1, 0); + float4 ni1 = max(-i1, 0); + + a1 = max(a1, 0); + b1 = max(b1, 0); + c1 = max(c1, 0); + d1 = max(d1, 0); + e1 = max(e1, 0); + f1 = max(f1, 0); + g1 = max(g1, 0); + h1 = max(h1, 0); + i1 = max(i1, 0); + + float4 a2 = tex4.SampleLevel(sam, pos + float2(-inputPt.x, -inputPt.y), 0); + float4 b2 = tex4.SampleLevel(sam, pos + float2(-inputPt.x, 0), 0); + float4 c2 = tex4.SampleLevel(sam, pos + float2(-inputPt.x, inputPt.y), 0); + float4 d2 = tex4.SampleLevel(sam, pos + float2(0, -inputPt.y), 0); + float4 e2 = tex4.SampleLevel(sam, pos, 0); + float4 f2 = tex4.SampleLevel(sam, pos + float2(0, inputPt.y), 0); + float4 g2 = tex4.SampleLevel(sam, pos + float2(inputPt.x, -inputPt.y), 0); + float4 h2 = tex4.SampleLevel(sam, pos + float2(inputPt.x, 0), 0); + float4 i2 = tex4.SampleLevel(sam, pos + float2(inputPt.x, inputPt.y), 0); + + float4 na2 = max(-a2, 0); + float4 nb2 = max(-b2, 0); + float4 nc2 = max(-c2, 0); + float4 nd2 = max(-d2, 0); + float4 ne2 = max(-e2, 0); + float4 nf2 = max(-f2, 0); + float4 ng2 = max(-g2, 0); + float4 nh2 = max(-h2, 0); + float4 ni2 = max(-i2, 0); + + a2 = max(a2, 0); + b2 = max(b2, 0); + c2 = max(c2, 0); + d2 = max(d2, 0); + e2 = max(e2, 0); + f2 = max(f2, 0); + g2 = max(g2, 0); + h2 = max(h2, 0); + i2 = max(i2, 0); + + float4 target1 = mul(a1, float4x4(-0.04279202, -0.01698567, 0.18318103, -0.18172316, 0.04757184, 0.07232096, -0.054900512, 0.11956132, 0.048900753, 0.0006714882, -0.09200336, 0.16104606, 0.38940707, 0.2754208, -0.12735553, -0.30017206)); + target1 += mul(b1, float4x4(0.2469705, 0.103162065, 0.10321547, -0.1292231, 0.3013039, -0.018333653, -0.19897339, 0.122247696, 0.14719778, 0.003909129, -0.19585025, 0.03670547, -0.2132921, 0.33642963, 0.17569672, 0.07414473)); + target1 += mul(c1, float4x4(0.015335451, 0.15161209, 0.0447609, -0.042884503, 0.14257035, 0.07775234, -0.2064044, 0.03842874, -0.1660166, -0.19817057, -0.10740875, -0.123968095, 0.14156081, -0.2197906, -0.08622206, 0.4185408)); + target1 += mul(d1, float4x4(-0.33392438, -0.12483512, -0.062084857, 0.16336447, 0.09862199, 0.1659862, 0.034751434, -0.11968266, -0.017155796, 0.21001562, -0.053017724, 0.10386376, 0.07066254, 0.50014263, 0.31065208, -0.026068505)); + target1 += mul(e1, float4x4(-0.34320992, -0.030056434, -0.24118581, -0.024320357, 0.327435, -0.036838267, -0.19433706, 0.24561343, -0.1489437, 0.225435, 0.18421564, 0.021147838, 0.264245, 0.16846146, -0.51724315, 0.039252095)); + target1 += mul(f1, float4x4(-0.25945047, 0.12058094, 0.2889452, -0.061687145, -0.10309796, -0.19476385, -0.10393912, 0.16837607, -0.05198191, -0.036113493, -0.11847194, 0.16367626, 0.018113747, 0.059499823, 0.0062132217, 0.15846115)); + target1 += mul(g1, float4x4(0.094601326, 0.053219795, 0.027610637, 0.12041253, 0.21425363, 0.15754686, 0.08518286, -0.00661778, -0.021661628, -0.17554528, -0.014842315, 0.22240937, 0.15908821, -0.20964032, 0.21754523, 0.30307937)); + target1 += mul(h1, float4x4(0.13757955, 0.06684095, -0.03616685, -0.014618309, 0.04168136, -0.17148526, -0.16317028, 0.14210777, 0.102521434, -0.19108291, -0.14441934, 0.14435884, 0.24228935, -0.10589834, 0.24029285, 0.27317202)); + target1 += mul(i1, float4x4(-0.16239886, -0.073841535, 0.067964345, -0.11332664, 0.07695667, -0.047180675, -0.08260769, 0.09427637, 0.09471068, 0.012713836, 0.14605078, -0.062490974, -0.11498225, 0.04150893, 0.37402585, 0.21953487)); + target1 += mul(a2, float4x4(-0.07445113, -0.14220217, 0.09271495, -0.014715529, -0.37606132, -0.14938155, -0.024809113, 0.22279873, -0.011379667, -0.04545505, -0.033382278, 0.08971831, 0.016359061, -0.016230864, 0.052939463, -0.07754285)); + target1 += mul(b2, float4x4(0.10961948, 0.09230085, 0.061259165, 0.0015837378, 0.053883027, -0.22557226, 0.018400123, 0.43234614, 0.08967873, 0.06687854, -0.4389578, -0.01658211, -0.040707946, 0.0048945122, 0.1433802, 0.049759727)); + target1 += mul(c2, float4x4(-0.027641231, 0.026085567, 0.109188825, -0.19011945, 0.19309571, 0.0084956605, 0.05034047, -0.08674781, -0.008124587, 0.031490494, -0.0744263, 0.084508896, -0.007835403, 0.13120581, 0.0021786217, -0.025225073)); + target1 += mul(d2, float4x4(0.020191731, 0.24703082, -0.36845222, 0.0032569442, -0.1497622, 0.05968502, 0.09595371, 0.008410154, 0.119981945, -0.09983294, -0.19541258, -0.111814305, -0.25664008, 0.31031236, -0.23063917, -0.13823026)); + target1 += mul(e2, float4x4(-0.092747286, 0.23009373, -0.29804415, 0.05036082, 0.031480987, 0.18805481, 0.3676576, 0.06004687, 0.19841099, -0.058367446, -0.44229323, -0.19645047, 0.037667975, 0.12398346, -0.25753063, -0.26919344)); + target1 += mul(f2, float4x4(-0.019061154, 0.03841801, -0.28433323, 0.38128456, -0.059526864, 0.29960185, 0.014484517, -0.10234412, 0.05444907, -0.12615138, 0.14936689, -0.079120934, 0.028092088, 0.096715964, 0.0037780635, -0.12791039)); + target1 += mul(g2, float4x4(0.26949528, 0.015951393, 0.15355164, -0.030336212, -0.100286454, -0.052609976, 0.03197625, -0.092190474, 0.06131517, 0.18291938, -0.15216532, -0.026021928, 0.18581273, -0.10659101, 0.14806952, 0.20509768)); + target1 += mul(h2, float4x4(-0.2205839, 0.11654808, 0.43800604, 0.03188946, 0.13840868, 0.020377772, 0.038510147, 0.03779825, -0.23494276, 0.08624197, 0.036650848, -0.115041405, -0.03776705, -0.32108167, 0.0094707385, 0.37881464)); + target1 += mul(i2, float4x4(-0.031778246, -0.38020673, 0.16956653, 0.33444092, -0.042172886, -0.03465591, -0.17585713, 0.025507452, 0.07595919, -0.06807453, -0.100295454, -0.019174794, 0.07763043, -0.09321411, -0.05212223, 0.112239085)); + target1 += mul(na1, float4x4(-0.048172995, -0.012284629, 0.12846173, -0.13459995, 0.25443402, -0.013064909, 0.15480834, 0.14016332, 0.036635883, -0.049085367, 0.0506487, 0.26623604, -0.023176057, 0.012088936, -0.1844897, 0.040488705)); + target1 += mul(nb1, float4x4(0.2147455, 0.17323543, -0.2943051, -0.053386763, -0.023367947, 0.090753146, -0.011997397, -0.0626111, -0.13558747, -0.035944186, -0.014752113, 0.25506687, 0.055502877, 0.31465453, -0.16283247, -0.08967175)); + target1 += mul(nc1, float4x4(0.033773236, -0.09510872, -0.09313707, 0.046486538, -0.1699796, -0.11685979, 0.22197925, -0.013884658, 0.12514, -0.12129843, -0.09695589, -0.075202964, -0.12321221, 0.18949097, -0.03694664, -0.2306249)); + target1 += mul(nd1, float4x4(0.08668444, -0.22983012, -0.30873656, 0.07371376, 0.082137264, -0.014844924, 0.2283955, 0.24782042, 0.31113505, 0.14810014, 0.32804835, -0.12014127, -0.17742543, -0.15872951, -0.080107674, -0.16898526)); + target1 += mul(ne1, float4x4(0.29746926, 0.19479977, 0.13996765, -0.4268552, -0.16478531, 0.0835479, 0.45685142, -0.05510062, -0.1282004, 0.12359051, 0.34026766, -0.26152933, -0.13128015, 0.329812, 0.27172327, -0.06600192)); + target1 += mul(nf1, float4x4(-0.06552484, 0.19600633, 0.12407863, -0.13815112, 0.17426166, 0.040930413, 0.06495108, 0.034157254, -0.029772963, 0.015127817, 0.10718436, -0.13752984, -0.0205358, 0.1884735, 0.104591034, -0.020779913)); + target1 += mul(ng1, float4x4(-0.053475305, -0.13616458, 0.05487909, 0.13256747, -0.10030239, -0.12376705, 0.062755466, 0.03264356, 0.068466686, 0.05019395, -0.034875803, -0.17806669, -0.21720818, 0.25592342, -0.2685692, -0.27576914)); + target1 += mul(nh1, float4x4(-0.04562929, 0.04225299, -0.22311088, -0.09517893, -0.19886662, -0.11944208, 0.11044239, -0.10464355, 0.037634842, 0.124069214, 0.0927385, 0.108838566, -0.088783056, 0.17008123, -0.1007014, -0.23137446)); + target1 += mul(ni1, float4x4(0.10306672, 0.027472405, -0.069015354, -0.14412996, 0.24068132, -0.10624665, -0.25597134, 0.05208812, -0.10230778, 0.006520562, -0.11931577, 0.26738268, -0.09168354, 0.13557245, -0.008878644, -0.22292739)); + target1 += mul(na2, float4x4(-0.09403718, 0.11993688, -0.036254726, -0.053109076, 0.18422048, 0.25203657, 0.10025996, -0.11272799, -0.22040273, -0.05758331, -0.07059054, -0.054108664, -0.20009018, -0.22061199, 0.057880517, -0.26669186)); + target1 += mul(nb2, float4x4(-0.08534496, 0.0027822452, -0.01112169, -0.13484463, -0.09446875, -0.057457812, -0.03910888, -0.2816038, -0.096015625, -0.03636662, 0.12532772, 0.092033, 0.038156748, -0.101240925, 0.024886698, -0.086328045)); + target1 += mul(nc2, float4x4(0.2349796, 0.19884427, -0.0734711, 0.08422328, -0.07201622, 0.020658491, 0.1331021, 0.039766714, 0.19280422, 0.13086005, -0.11339721, -0.14782044, 0.19341573, 0.16767374, -0.03593828, 0.18139753)); + target1 += mul(nd2, float4x4(-0.040663462, -0.15233721, 0.524604, 0.26603413, 0.07202415, 0.053382196, 0.030758869, -0.06144292, -0.010495834, 0.13868876, -0.020688854, -0.15551737, -0.2958513, -0.32805985, -0.25359175, -0.036683984)); + target1 += mul(ne2, float4x4(-0.06644081, -0.145321, 0.24945419, 0.031560224, 0.17245345, 0.23418438, 0.20341763, -0.2619872, 0.038787205, 0.16488725, 0.0019107185, 0.03820528, 0.04169643, -0.34155026, -0.11183654, 0.028614044)); + target1 += mul(nf2, float4x4(-0.028469078, 0.010781976, 0.05263661, -0.15337946, -0.20491667, -0.13879907, 0.13934538, 0.061196275, 0.056804053, 0.063193604, -0.2389496, 0.037072126, -0.058510017, 0.036215063, 0.3074709, 0.10517675)); + target1 += mul(ng2, float4x4(0.028534278, 0.0022668538, 0.04492863, -0.060705435, 0.06349762, -0.016823182, -0.09148226, 0.03930522, -0.083295114, 0.14799853, -0.08089152, -0.21993661, -0.23298621, 0.05106244, -0.013708201, -0.16311577)); + target1 += mul(nh2, float4x4(0.05885827, 0.122300275, -0.16086812, -0.21892425, -0.07548077, 0.09286181, -0.027564062, -0.028723463, -0.0056181233, 0.23472206, -0.0049285595, -0.45054138, 0.07592325, -0.044704806, 0.019616256, -0.06956836)); + target1 += mul(ni2, float4x4(0.036423888, 0.20839189, -0.16420732, -0.15954947, -0.11311323, -0.24191359, 0.19845375, 0.084540576, -0.20946553, 0.09259613, 0.03234368, -0.056766506, -0.11992363, -0.06882079, -0.020428827, -0.093375795)); + target1 += float4(0.013113342, -0.2905848, -0.029724011, 0.1769613); + + float4 target2 = mul(a1, float4x4(0.093678355, -0.08574688, 0.007699401, -0.038818456, -0.10667588, 0.043627866, 0.23127791, 0.061317544, -0.32790044, 0.08618836, 0.009400048, -0.17129329, 0.23541448, -0.015561885, -0.11172365, -0.1190039)); + target2 += mul(b1, float4x4(-0.0052874424, 0.08136584, -0.12633958, -0.016064916, 0.14033778, 0.07755252, -0.26242834, 0.063312635, 0.06861756, 0.14867078, -0.2561066, 0.33325562, -0.106489345, -0.10068009, -0.039633382, -0.016305668)); + target2 += mul(c1, float4x4(-0.27784392, -0.14990395, -0.35981888, -0.2564094, -0.07480205, -0.026457628, 0.1027643, 0.19381845, -0.07160986, -0.15616457, -0.032070953, 0.32998616, 0.15383582, 0.16622585, -0.1435993, -0.02287804)); + target2 += mul(d1, float4x4(-0.09360053, 0.58019537, 0.02028909, 0.413114, 0.025173154, -0.030326266, -0.028177274, -0.12964654, -0.25432733, -0.06556034, 0.023097439, -0.09458851, -0.21772051, -0.10324596, -0.36674342, -0.14803977)); + target2 += mul(e1, float4x4(-0.1227467, 0.20252965, 0.2559927, 0.08719227, 0.030749539, -0.2526622, -0.25694713, -0.2960799, -0.34960067, -0.25393236, -0.28439638, 0.086787805, -0.34202877, 0.21933395, 0.23473133, 0.079260886)); + target2 += mul(f1, float4x4(-0.00147522, -0.16591258, -0.030617915, 0.10052425, -0.1822102, 0.038774874, -0.04285007, 0.07312042, 0.052175622, -0.33510515, 0.027545406, 0.2995306, -0.08535316, 0.11144203, 0.27999434, -0.09770663)); + target2 += mul(g1, float4x4(-0.04394928, -0.26842886, -0.08354109, 0.04077001, -0.009221606, 0.0328837, 0.006459338, 0.08984004, -0.13035133, 0.20004508, 0.21950854, -0.12742348, 0.32386312, 0.085903555, -0.29273173, -0.056370437)); + target2 += mul(h1, float4x4(0.019171638, -0.1824711, -0.10899421, -0.16201603, 0.054712642, -0.020315547, -0.048609916, -0.068621606, -0.055706583, -0.25671515, -0.019494208, 0.08366393, 0.09531471, -0.05988052, -0.024995802, 0.019303525)); + target2 += mul(i1, float4x4(-0.08694609, 0.26762635, 0.10477892, -0.15392998, -0.059596587, -0.047562487, -0.25932398, -0.054960977, -0.00015596532, 0.07196634, -0.017385524, -0.18826845, -0.017969077, -0.27291682, -0.153906, -0.107691295)); + target2 += mul(a2, float4x4(0.17340474, -0.1285696, -0.04484238, 0.15782213, -0.06190358, 0.27896214, 0.28475145, -0.042519942, -0.19862229, -0.1354097, 0.14344497, 0.015599392, 0.18698554, 0.035121564, -0.018465763, 0.0010143917)); + target2 += mul(b2, float4x4(-0.13428356, -0.06612225, 0.19397905, 0.14209093, 0.1526626, 0.2617573, -0.15316434, 0.35452205, 0.05003259, 0.07679617, -0.008399171, -0.0062716682, 0.11833864, 0.1331285, -0.006803729, 0.22615404)); + target2 += mul(c2, float4x4(0.0020632436, -0.173174, -0.15404437, 0.05430569, 0.21100305, 0.39063898, -0.019479724, 0.17396629, -0.061121427, -0.13424753, -0.008459669, -0.04975768, 0.20599939, -0.11374013, -0.21116278, 0.063624285)); + target2 += mul(d2, float4x4(-0.0073831948, -0.12009769, -0.16402034, 0.054093774, 0.061061747, -0.009054565, -0.02815144, -0.17071937, -0.22791979, 0.073427565, 0.25161973, 0.1011713, -0.23804636, 0.13810354, 0.09063126, -0.23065178)); + target2 += mul(e2, float4x4(-0.31885087, 0.21730177, -0.20516786, 0.04075695, -0.2736768, -0.38779113, -0.19445951, -0.14024325, -0.11824961, -0.102919355, -0.17858729, -0.013441498, 0.16320607, -0.27105078, -0.00019549616, 0.024509901)); + target2 += mul(f2, float4x4(-0.16024838, -0.3132909, -0.15461555, 0.34874174, -0.0051668375, 0.1811257, 0.3384939, 0.16381103, 0.047184363, -0.20424844, -0.1330078, -0.13795874, 0.21890834, -0.08242861, 0.22677775, 0.031102268)); + target2 += mul(g2, float4x4(0.19408257, 0.016361775, -0.202373, 0.2245766, -0.008954751, -0.047279913, -0.09170596, 0.01567793, -0.0019059096, -0.07785436, 0.0756357, 0.09683383, 0.034215495, -0.030802004, -0.077977195, -0.1101297)); + target2 += mul(h2, float4x4(-0.1060503, -0.0044663083, -0.14942732, -0.11696249, -0.04550482, 0.11463188, 0.17801443, 0.07229662, -0.14176941, 0.02773344, -0.10770335, -0.08745911, -0.023052111, -0.17474785, 0.016645849, -0.059080444)); + target2 += mul(i2, float4x4(-0.050500304, -0.14716387, 0.04525464, 0.23543595, 0.08411192, 0.16031684, 0.1659825, -0.03595111, -0.012943453, 0.13354135, -0.051425032, -0.0075654723, 0.11174184, 0.1266808, -0.18799087, 0.10571744)); + target2 += mul(na1, float4x4(-0.15583408, 0.09837484, 0.19239932, -0.03557196, -0.05406335, 0.096456856, -0.13921897, -0.2212671, 0.28973594, 0.04017474, -0.25423512, 0.1522156, -0.10563249, -0.033190794, 0.101713456, -0.08922746)); + target2 += mul(nb1, float4x4(-0.0787607, -0.14545321, 0.099762656, -0.2824299, 0.10130184, 0.019948835, -0.1013831, 0.06604923, 0.089561954, 0.28344154, 0.05757009, 0.04981809, -0.15927236, 0.008129835, -0.04280382, 0.10653281)); + target2 += mul(nc1, float4x4(0.28149363, 0.019583186, 0.25983065, 0.30190885, 0.055435803, -0.01970755, 0.04546505, -0.027456624, 0.43886992, -0.032305803, -0.23557569, 0.12753153, -0.18509789, -0.073295385, 0.0083466545, -0.08271229)); + target2 += mul(nd1, float4x4(0.016040009, -0.20475672, -0.015803276, 0.18247975, 0.21178837, -0.041543446, -0.24716362, 0.10105528, 0.19479224, -0.06583694, -0.09192672, -0.037776746, 0.09636229, -0.12086331, 0.13989103, 0.014564729)); + target2 += mul(ne1, float4x4(0.19923596, -0.4132588, -0.4254784, -0.33433357, -0.16956097, -0.25086832, 0.23311833, -0.08976422, 0.06432824, -0.0071802614, 0.0033370545, -0.11073493, -0.46609998, -0.09332235, -0.27287352, 0.052513942)); + target2 += mul(nf1, float4x4(-0.06954148, -0.06908355, -0.01875471, -0.35067585, 0.038715206, 0.08843527, 0.28899097, -0.024983376, 0.05879495, 0.110363334, 0.055481512, -0.0046147215, -0.035302363, -0.2722019, -0.0829261, 0.21088009)); + target2 += mul(ng1, float4x4(-0.101971015, -0.18584369, 0.1469676, 0.025965, 0.07205807, 0.08838771, 0.08537094, 0.023344917, -0.106373414, -0.09254277, -0.25996596, 0.24570447, 0.00590166, -0.20074098, -0.05443169, -0.10562662)); + target2 += mul(nh1, float4x4(0.12980327, -0.16834956, -0.1635997, 0.23437372, -0.07374834, 0.0062907683, 0.17292136, 0.0018093853, 0.04122969, -0.025285576, 0.29646805, 0.13402736, -0.040267725, 0.0011441729, -0.18658921, 0.12006417)); + target2 += mul(ni1, float4x4(0.13221453, 0.15109141, 0.07707579, 0.05148666, -0.039716493, 0.12869143, -0.012840577, 0.10953536, -0.05721115, -0.120122276, -0.07632444, 0.32949027, 0.00022400127, 0.22217369, 0.2180494, -0.028773604)); + target2 += mul(na2, float4x4(-0.08405412, 0.11332542, 0.120847605, 0.00520135, -0.13689686, -0.1459117, -0.029643068, 0.16147274, 0.21844815, -0.036921967, -0.12862785, -0.15930249, -0.11265427, -0.17471205, 0.0026749703, 0.2048758)); + target2 += mul(nb2, float4x4(-0.03768306, -0.07585988, 0.046583172, -0.35557657, 0.012359812, -0.05498573, 0.19581361, -0.08186999, -0.008727976, -0.16623624, -0.03647879, 0.22760212, 0.048297524, -0.12502927, 0.08636729, -0.26437047)); + target2 += mul(nc2, float4x4(-0.19518375, 0.17423135, 0.19473018, -0.22721744, -0.25087392, -0.17043075, -0.021999557, -0.27388734, -0.096786864, -0.012226921, 0.16101876, 0.030362492, -0.017619403, -0.2494354, -0.07336028, 0.06842719)); + target2 += mul(nd2, float4x4(0.13816363, 0.14551367, -0.08497621, 0.15563537, -0.01600614, -0.010629245, 0.007773828, 0.2733634, 0.13066974, -0.2223056, -0.12664202, -0.19242655, -0.13211249, 0.065143794, 0.23912583, 0.19819915)); + target2 += mul(ne2, float4x4(0.001870705, -0.0028601827, 0.14014813, 0.14659253, -0.037523735, 0.3726274, 0.13139205, 0.0112125, -0.16308945, -0.17571904, 0.12799808, -0.032106552, 0.013872656, 0.432307, -0.14197885, 0.24013121)); + target2 += mul(nf2, float4x4(0.117900506, -0.08039036, -0.17504077, -0.08337764, -0.0068703834, -0.07430392, -0.17125578, -0.3470726, -0.20989974, -0.019394008, -0.027336912, 0.18668686, 0.052886557, -0.023217537, 0.004054446, 0.055974416)); + target2 += mul(ng2, float4x4(-0.055653654, 0.08726097, 0.01206228, -0.25783783, -0.08736529, 0.19947968, -0.010166337, 0.36168414, 0.20298903, -0.15769973, -0.21389212, -0.19638214, -0.093130395, -0.067289785, 0.10245741, -0.14167903)); + target2 += mul(nh2, float4x4(0.04559992, -0.102125205, 0.21949212, -0.07308472, -0.15511832, 0.23785073, 0.04275021, 0.085007004, 0.079402514, 0.10851189, -0.151969, -0.29738536, -0.0776658, 0.1113102, -0.18987878, -0.045522977)); + target2 += mul(ni2, float4x4(0.073690206, -0.016468357, 0.122353435, -0.023995928, 0.095143944, 0.23051415, 0.17702249, 0.030164838, -0.09111423, -0.14219609, -0.19734482, -0.24854833, -0.0067356345, -0.1760497, 0.22637916, 0.119141534)); + target2 += float4(0.22705397, -0.029518934, -0.026397338, -0.08183741); + + float3 target3 = tex6.SampleLevel(sam, pos, 0).rgb; + target3 += mul(e1, float4x3(0.013106969, 0.010379314, 0.012753471, 0.07086715, -0.020893, -0.03968904, -0.06114372, 0.029510446, 0.035070244, 0.11180839, -0.087067656, -0.124039896)); + target3 += mul(e2, float4x3(-0.056521703, -0.001166792, -2.3704073e-05, 0.011961608, 0.01848977, 0.019861937, 0.012167056, 0.018613879, 0.020505793, 0.009734187, -0.0308419, -0.035206888)); + target3 += mul(ne1, float4x3(0.0048758825, 0.018046578, 0.014597015, -0.061724614, 0.040989272, 0.05644141, 0.070315465, 0.008318584, 0.0028647361, -0.11316492, 0.043919202, 0.07653594)); + target3 += mul(ne2, float4x3(0.031487904, -0.010548384, -0.009984509, -0.0022647562, 0.0043304027, 0.0029451603, -0.0063251094, -0.013420807, -0.011919729, -0.022760967, 0.019141173, 0.01782793)); + + tex1[gxy] = target1; + tex2[gxy] = target2; + tex5[gxy] = float4(target3, 1); +} + +//!PASS 8 +//!DESC Conv-4x3x3x16, Conv-3x1x1x112 +//!IN INPUT, tex1, tex2, tex5 +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 + +void Pass8(uint2 blockStart, uint3 threadId) { + uint2 gxy = Rmp8x8(threadId.x) + blockStart; + uint2 inputSize = GetInputSize(); + if (gxy.x >= inputSize.x || gxy.y >= inputSize.y) { + return; + } + + float2 inputPt = GetInputPt(); + float2 pos = (gxy + 0.5f) * inputPt; + + // [ a, d, g ] + // [ b, e, h ] + // [ c, f, i ] + float4 a1 = tex1.SampleLevel(sam, pos + float2(-inputPt.x, -inputPt.y), 0); + float4 b1 = tex1.SampleLevel(sam, pos + float2(-inputPt.x, 0), 0); + float4 c1 = tex1.SampleLevel(sam, pos + float2(-inputPt.x, inputPt.y), 0); + float4 d1 = tex1.SampleLevel(sam, pos + float2(0, -inputPt.y), 0); + float4 e1 = tex1.SampleLevel(sam, pos, 0); + float4 f1 = tex1.SampleLevel(sam, pos + float2(0, inputPt.y), 0); + float4 g1 = tex1.SampleLevel(sam, pos + float2(inputPt.x, -inputPt.y), 0); + float4 h1 = tex1.SampleLevel(sam, pos + float2(inputPt.x, 0), 0); + float4 i1 = tex1.SampleLevel(sam, pos + float2(inputPt.x, inputPt.y), 0); + + float4 na1 = max(-a1, 0); + float4 nb1 = max(-b1, 0); + float4 nc1 = max(-c1, 0); + float4 nd1 = max(-d1, 0); + float4 ne1 = max(-e1, 0); + float4 nf1 = max(-f1, 0); + float4 ng1 = max(-g1, 0); + float4 nh1 = max(-h1, 0); + float4 ni1 = max(-i1, 0); + + a1 = max(a1, 0); + b1 = max(b1, 0); + c1 = max(c1, 0); + d1 = max(d1, 0); + e1 = max(e1, 0); + f1 = max(f1, 0); + g1 = max(g1, 0); + h1 = max(h1, 0); + i1 = max(i1, 0); + + float4 a2 = tex2.SampleLevel(sam, pos + float2(-inputPt.x, -inputPt.y), 0); + float4 b2 = tex2.SampleLevel(sam, pos + float2(-inputPt.x, 0), 0); + float4 c2 = tex2.SampleLevel(sam, pos + float2(-inputPt.x, inputPt.y), 0); + float4 d2 = tex2.SampleLevel(sam, pos + float2(0, -inputPt.y), 0); + float4 e2 = tex2.SampleLevel(sam, pos, 0); + float4 f2 = tex2.SampleLevel(sam, pos + float2(0, inputPt.y), 0); + float4 g2 = tex2.SampleLevel(sam, pos + float2(inputPt.x, -inputPt.y), 0); + float4 h2 = tex2.SampleLevel(sam, pos + float2(inputPt.x, 0), 0); + float4 i2 = tex2.SampleLevel(sam, pos + float2(inputPt.x, inputPt.y), 0); + + float4 na2 = max(-a2, 0); + float4 nb2 = max(-b2, 0); + float4 nc2 = max(-c2, 0); + float4 nd2 = max(-d2, 0); + float4 ne2 = max(-e2, 0); + float4 nf2 = max(-f2, 0); + float4 ng2 = max(-g2, 0); + float4 nh2 = max(-h2, 0); + float4 ni2 = max(-i2, 0); + + a2 = max(a2, 0); + b2 = max(b2, 0); + c2 = max(c2, 0); + d2 = max(d2, 0); + e2 = max(e2, 0); + f2 = max(f2, 0); + g2 = max(g2, 0); + h2 = max(h2, 0); + i2 = max(i2, 0); + + float4 target1 = mul(a1, float4x4(0.16737834, 0.35369134, 0.14049083, 0.017871622, 0.0058661173, -0.035960242, -0.039154284, -0.01920433, 0.0729212, -0.03617972, -0.42717552, -0.019914677, -0.30816802, -0.07726792, 0.2088459, -0.09198307)); + target1 += mul(b1, float4x4(-0.0991125, 0.11411345, 0.15300295, -0.09510225, 0.014268626, -0.42914182, -0.13365223, -0.19440699, -0.27214321, 0.085696176, 0.1527733, -0.21056797, -0.062475704, -0.023041902, -0.29080424, -0.54386055)); + target1 += mul(c1, float4x4(-0.30736786, -0.16801229, 0.07400606, -0.31128535, -0.11047924, 0.16556956, -0.33445996, -0.09190697, -0.06132585, -0.11021996, 0.014628762, -0.45183894, 0.08186993, 0.19378273, 0.113438204, 0.038364496)); + target1 += mul(d1, float4x4(0.24129803, 0.29174972, -0.1250327, 0.14254767, 0.0026774528, 0.1742466, -0.021835174, 0.01668921, 0.13646975, 0.313305, -0.23293279, -0.16737306, -0.059818722, 0.06404477, 0.108172625, 0.22065729)); + target1 += mul(e1, float4x4(-0.3504013, 0.20759478, 0.28683922, 0.2771802, 0.13761812, -0.21180478, -0.17020214, -0.21419087, -0.031916566, -0.040439468, 0.39206958, 0.715565, 0.46198523, 0.05055317, -0.07409331, -0.050633535)); + target1 += mul(f1, float4x4(0.122958206, 0.0071205017, -0.21314384, -0.22197853, 0.016202174, -0.15960938, -0.14601983, -0.023609173, -0.07586023, 0.099936776, -0.0480375, -0.08681468, -0.14976887, -0.38979456, 0.16078879, -0.12263952)); + target1 += mul(g1, float4x4(0.1687149, 0.108331114, 0.10112296, 0.01738403, -0.06773097, -0.19410455, -0.09728116, 0.0013846151, -0.038603816, -0.05495021, 0.2453317, -0.40052003, -0.022453755, 0.045039784, 0.0474246, -0.2665161)); + target1 += mul(h1, float4x4(0.06805519, -0.052276067, 0.052459523, -0.0033053474, 0.13439268, -0.06845637, -0.20462433, -0.09088968, -0.00096404477, -0.35103628, 0.15096465, 0.3285226, 0.018747555, -0.06623108, 0.1754265, 0.3211156)); + target1 += mul(i1, float4x4(-0.04583627, 0.122267574, -0.44002235, -0.20039988, 0.039372742, -0.16505809, -0.26659602, 0.12207268, 0.03337428, 0.23131758, -0.009866899, 0.010381569, 0.29676, -0.020599596, 0.17816995, 0.32852224)); + target1 += mul(a2, float4x4(0.09469788, -0.12531966, -0.11786524, -0.3115985, -0.2213199, -0.012536277, -0.13176842, 0.14986996, 0.12069894, 0.2744789, 0.21674646, 0.46060535, -0.4101697, -0.55295914, 0.29993954, 0.114459395)); + target1 += mul(b2, float4x4(0.18347421, -0.29010707, 0.29127017, 0.087738656, 0.17509815, 0.03982794, 0.1731455, 0.38041735, 0.110374, -0.25045586, 0.36446962, 0.016104888, -0.012112869, 0.10154983, -0.45384112, -0.11416608)); + target1 += mul(c2, float4x4(-0.033837743, -0.020894403, -0.287127, -0.21196121, -0.03255823, 0.2599821, -0.38386443, 0.30563655, 0.39044768, -0.112917066, -0.021323297, 0.12623324, 0.06885038, -0.20750642, 0.07642818, -0.103580445)); + target1 += mul(d2, float4x4(0.1723114, -0.3726216, -0.21184283, 0.1761503, -0.24993578, -0.31068864, 0.19998416, -0.23127908, -0.052656204, -0.04243976, 0.4397144, 0.01863219, -0.04796025, -0.11009142, -0.0073631364, 0.2716381)); + target1 += mul(e2, float4x4(0.04202001, 0.27142277, -0.027491128, 0.27428457, -0.11009916, 0.39839938, -0.7223327, -0.124673314, 0.08123618, -0.11884722, -0.20375855, -0.7179687, 0.30648115, -0.28195357, -0.3350774, -0.29778734)); + target1 += mul(f2, float4x4(0.071278594, -0.09155223, 0.06417857, 0.08250104, -0.45117077, -0.023316784, 0.38917172, -0.19110887, -0.09265943, -0.2643835, -0.09707039, -0.33238646, -0.0818088, 0.17623149, -0.28457013, 0.13986786)); + target1 += mul(g2, float4x4(0.019971045, -0.046649583, -0.03036858, 0.07944429, 0.26344573, 0.054998036, 0.07139812, 0.21139374, 0.08021858, -0.025791258, -0.0423707, 0.25174072, -0.021300986, 0.13209766, 0.19120613, 0.3840775)); + target1 += mul(h2, float4x4(-0.11456406, -0.33503455, 0.21409267, -0.056933913, -0.12204284, -0.37379473, 0.33474764, 0.38634798, 0.12618992, 0.1353635, -0.22651522, -0.3160159, 0.18621005, 0.024818055, -0.11935204, 0.014005666)); + target1 += mul(i2, float4x4(0.1501391, 0.0014716414, -0.22049955, -0.10928345, -0.07085164, -0.08778668, 0.19251469, -0.4932493, 0.071784936, -0.06903646, -0.060333923, 0.020552203, -0.33637995, -0.22848415, 0.21518159, 0.23815839)); + target1 += mul(na1, float4x4(-0.04230713, -0.19312756, -0.0613665, 0.058912925, -0.17639293, -0.029920885, -0.027867602, -0.16602923, 0.10262268, -0.0743682, 0.15286638, 0.08042581, -0.042299524, 0.0022034592, 0.15304253, 0.049871147)); + target1 += mul(nb1, float4x4(0.004346093, -0.07895582, 0.02089975, 0.13429636, -0.1020282, 0.5270822, 0.017983409, 0.1531299, -0.02891241, -0.07050933, -0.18729019, 0.13855362, -0.11538968, 0.20733222, 0.1546878, 0.11550679)); + target1 += mul(nc1, float4x4(0.21800312, 0.20944421, -0.1817274, 0.022868395, -0.019241469, 0.038916696, 0.088702604, 0.1467791, 0.0048542274, 0.10344671, -0.0107803065, 0.23302868, 0.049728952, -0.016042534, -0.08694045, -0.0028224774)); + target1 += mul(nd1, float4x4(-0.1570157, 0.08688841, 0.03926086, -0.040503077, -0.052700017, -0.1432353, -0.04516745, -0.09649034, -0.053716175, 0.07059194, -0.07360609, 0.26307717, 0.121471435, -0.13640986, -0.1113535, -0.38560814)); + target1 += mul(ne1, float4x4(-0.014722592, -0.39773384, 0.28259715, -0.10905738, 0.07889424, 0.1415529, -0.15419348, -0.2064834, -0.15126482, -0.28288555, -0.0014232624, -0.26178944, -0.025823193, 0.008017357, -0.08547297, 0.26373458)); + target1 += mul(nf1, float4x4(0.2978961, -0.020236012, -0.101216674, 0.15498216, -0.0069343713, -0.088363856, 0.20511419, 0.23958007, 0.045810107, -0.19189738, -0.14137349, 0.04177724, -0.1394684, 0.0071990825, 0.06991723, -0.21052721)); + target1 += mul(ng1, float4x4(-0.05615232, 0.22506002, -0.12479586, -0.0070057763, 0.092545755, 0.096306436, 0.041890718, 0.1226944, -0.07541768, -0.08369033, -0.15144373, 0.09310172, 0.28388003, 0.09935607, 0.11299509, 0.0014283776)); + target1 += mul(nh1, float4x4(-0.005848455, 0.117699094, 0.23539856, 0.11006195, 0.10962903, 0.28139547, 0.18785141, -0.11635996, 0.057289902, 0.2370178, -0.29825503, -0.13706475, -0.3869794, 0.024066223, 0.36742347, 0.35919484)); + target1 += mul(ni1, float4x4(0.13744523, 0.09239356, 0.01173183, 0.119055405, -0.07841836, 0.0668925, 0.22598477, -0.016510552, 0.07971727, -0.17154713, 0.03333588, -0.13790733, 0.15421963, 0.2895701, -0.28440917, 0.015132756)); + target1 += mul(na2, float4x4(-0.054354303, 0.36663428, 0.02634933, 0.18688667, 0.0607547, 0.17321853, 0.086784445, -0.023283, 0.0027200899, 0.026914112, -0.07438439, 0.27042162, 0.09985293, 0.012430832, -0.20694605, -0.20363812)); + target1 += mul(nb2, float4x4(-0.42759168, 0.15540305, -0.18979609, 0.0073875943, 0.034251947, -0.34551802, 0.53327596, 0.17446762, -0.25879666, 0.2780996, 0.11094055, 0.17597, 0.13790102, 0.2615357, 0.09666047, 0.36155468)); + target1 += mul(nc2, float4x4(0.052614138, -0.1880028, 0.361331, 0.07957976, 0.12552904, -0.0042941784, 0.096562445, -0.041199915, 0.07412456, 0.16379668, 0.05464284, 0.050022952, -0.028281605, 0.09332573, 0.21379845, 0.21396561)); + target1 += mul(nd2, float4x4(-0.07546953, 0.16393837, -0.3060623, -0.64610606, -0.013715101, 0.18005042, 0.045286633, -0.21057944, -0.12779316, -0.10310629, 0.14360385, 0.011625261, 0.05597252, 0.023864657, -0.00018915108, -0.24224915)); + target1 += mul(ne2, float4x4(-0.08550672, 0.2438917, -0.30383766, -0.2463794, 0.13835424, -0.079946786, -0.060197506, 0.051599402, -0.24983203, -0.06691107, -0.0041784844, 0.07539119, -0.030340329, -0.23565106, -0.17968354, -0.10262371)); + target1 += mul(nf2, float4x4(0.19315718, -0.045718513, 0.120446794, -0.225136, 0.22922774, -0.046026126, 0.11448238, 0.114267804, -0.22327735, -0.03368635, 0.29763463, 0.03673529, -0.0583939, -0.092253424, 0.045279544, 0.04475646)); + target1 += mul(ng2, float4x4(-0.062286656, -0.06241419, -0.23600577, -0.24818502, -0.058666106, 0.17710151, -0.1751668, 0.05758226, 0.18278669, 0.033297777, 0.046349872, 0.09178792, -0.0745512, 0.20019765, 0.037281513, 0.22204825)); + target1 += mul(nh2, float4x4(-0.24708512, -0.1318695, -0.24966322, -0.31206796, 0.079176836, 0.11837155, -0.12882641, -0.01013533, -0.009065797, 0.0789075, 0.016151598, 0.00020127615, 0.1450729, 0.10825556, 0.09322918, 0.07283566)); + target1 += mul(ni2, float4x4(0.2604332, 0.25550258, 0.07709474, 0.28426003, 0.10387355, 0.09152259, 0.18742633, -0.0073229484, -0.20327723, -0.26013616, 0.055792782, -0.1713302, 0.14862068, 0.06698207, 0.17608787, -0.11622757)); + target1 += float4(-0.20551574, 0.073114716, -0.21843387, -0.28057778); + + float4 target2 = mul(a1, float4x4(-0.18413043, -0.12355504, 0.2708789, 0.17259507, -0.069752574, 0.12640886, 0.01075919, -0.028221423, -0.020598855, -0.17259665, 0.16907778, -0.10040477, 0.017177016, 0.0176426, 0.23724149, 0.14657862)); + target2 += mul(b1, float4x4(0.16921899, -0.33950835, 0.37508205, 0.09996622, 0.13377811, -0.036743056, -0.11633877, -0.23046862, -0.009307903, 0.027441062, 0.054166224, 0.011627087, -0.22831611, 0.043198805, -0.12695734, 0.0062862337)); + target2 += mul(c1, float4x4(0.17216596, -0.15588646, -0.14179194, 0.12487524, 0.10507964, 0.124544986, -0.0046104924, -0.116668865, -0.006100901, -0.022074439, 0.03376759, 0.10498887, 0.109659016, -0.03567928, 0.29972833, -0.045950003)); + target2 += mul(d1, float4x4(-0.29127, 0.21912472, 0.16494286, 0.027708547, 0.043136686, 0.04409876, -0.07686145, -0.13180132, -0.16630307, 0.15650205, -0.005864527, 0.03916553, 0.15750135, 0.1705246, 0.21626697, 0.06906506)); + target2 += mul(e1, float4x4(0.055395894, 0.28228188, 0.114794776, 0.020619212, -0.031812593, 0.11964309, -0.24317431, -0.36277202, 0.54564184, -0.032843567, -0.118973784, -0.40999004, -0.118530475, 0.09256661, 0.06583871, -0.36627474)); + target2 += mul(f1, float4x4(0.17914769, 0.33976436, -0.11220768, 0.1325754, 0.40586957, 0.3064959, -0.19086123, 0.014164092, -0.17376979, -0.0037554938, 0.11771888, 0.44933778, -0.15937245, -0.10635065, 0.084963776, 0.14630255)); + target2 += mul(g1, float4x4(-0.3723194, 0.21509883, 0.020062352, 0.094394304, 0.030794155, -0.11394617, -0.09103134, -0.0042343247, -0.28981096, -0.061873477, -0.17772584, 0.36440176, 0.007828069, -0.012121627, 0.25862312, 0.24646287)); + target2 += mul(h1, float4x4(0.10368119, -0.06185447, -0.022830853, 0.10918094, 0.18888599, -0.09235343, -0.055134308, -0.2210923, 0.15334128, -0.3084707, 0.31606838, 0.39931116, 0.29489174, -0.24794856, -0.4799932, -0.2617589)); + target2 += mul(i1, float4x4(0.32550937, -0.17103608, 0.3257806, -0.23358762, 0.20370598, 0.13325407, -0.020303056, -0.105462655, -0.22264756, -0.034177396, 0.36885822, 0.20504399, 0.36375418, -0.26149705, 0.022433946, 0.15646128)); + target2 += mul(a2, float4x4(0.007481421, 0.005642636, -0.170087, -0.08915849, 0.6329519, 0.06880098, -0.20856442, -0.1801066, -0.1342754, 0.13643123, 0.26994216, -0.27503812, 0.018052012, 0.058687408, -0.19784917, 0.021157453)); + target2 += mul(b2, float4x4(-0.1486918, 0.12212738, -0.03104796, 0.08664756, 0.3464865, 0.27309546, -0.022896903, -0.32080007, -0.28113958, 0.74847424, -0.33735126, -0.04616876, -0.23119605, 0.4214322, -0.16457441, 0.09162191)); + target2 += mul(c2, float4x4(0.15863913, 0.1303683, -0.06339421, 0.06328312, -0.3100047, -0.33906308, 0.13805804, -0.14923394, 0.4997829, -0.14977637, 0.02265068, -0.04585939, 0.29802153, 0.3767994, -0.031849556, -0.051892217)); + target2 += mul(d2, float4x4(-0.04541847, -0.13645087, 0.14119779, 0.06409465, -0.29877988, -0.0009743694, 0.028256422, 0.14978185, -0.13014801, -0.24171488, -0.10782599, 0.010709664, 0.21880737, -0.34132662, 0.22972895, -0.07159475)); + target2 += mul(e2, float4x4(-0.1510528, 0.115773134, 0.036761034, -0.284284, -0.35684052, 0.16348189, -0.105475456, 0.08259931, -0.6489164, -0.033928663, -0.04243186, 0.25324553, -0.31829014, 0.066608824, -0.11131264, 0.51919967)); + target2 += mul(f2, float4x4(-0.06517726, 0.1933327, 0.044391852, -0.013346896, -0.3033368, 0.106350735, -0.1351003, -0.13414839, 0.11720078, -0.24844061, -0.2900742, -0.047861837, 0.42789885, -0.47915378, -0.09643217, -0.22915216)); + target2 += mul(g2, float4x4(0.109821886, 0.31451595, 0.13300805, -0.08792569, -0.023928089, -0.038061168, 0.17821129, 0.003772247, 0.14684688, -0.12646271, 0.16072205, 0.011095222, 0.09209181, 0.005167038, -0.08823252, 0.079890974)); + target2 += mul(h2, float4x4(-0.20074554, 0.39979288, -0.007316405, -0.047838025, 0.10849111, -0.22469573, -0.059183244, -0.13663793, 0.07881898, 0.105663374, -0.3152222, 0.08104766, -0.22965154, 0.118780024, -0.07886757, 0.073527716)); + target2 += mul(i2, float4x4(0.1304303, 0.023158893, -0.081089824, -0.15955788, 0.42183343, -0.12898655, -0.14028409, 0.011985, 0.3977131, -0.313598, -0.148818, -0.048350018, -0.13534498, -0.12760727, -0.014968193, 0.06646305)); + target2 += mul(na1, float4x4(0.18085147, -0.11859402, 0.117530234, -0.10420847, 0.1848264, -0.12192718, -0.18729533, -0.10098887, 0.011134682, -0.23658146, 0.12963286, 0.117404245, 0.054487415, -0.030003065, -0.32175776, -0.08044254)); + target2 += mul(nb1, float4x4(-0.07251758, 0.073430285, -0.22191651, 0.030512359, -0.029650904, -0.15816379, 0.0418705, 0.04776615, -0.014070836, -0.14669086, -0.009874937, -0.015444495, -0.2747725, -0.061624944, -0.11261252, 0.14757589)); + target2 += mul(nc1, float4x4(-0.09274913, 0.046194065, 0.05642919, -0.07803342, 0.23578037, 0.01224276, 0.015608659, 0.05847865, -0.091819406, -0.14424564, -0.034869857, 0.019276984, -0.031180726, -0.21905676, 0.100375675, -0.13659117)); + target2 += mul(nd1, float4x4(-0.072157644, -0.13294607, 0.24301524, 0.048643183, -0.04338094, -0.0021709928, -0.06530963, -0.22672611, 0.07479903, 0.08388352, -0.07460508, -0.14517406, -0.072923675, -0.26912874, -0.2769797, 0.054033212)); + target2 += mul(ne1, float4x4(-0.5648679, -0.28059873, -0.039906785, -0.39112374, -0.3841447, -0.20383365, 0.12607281, 0.16049421, -0.34394273, -0.022326993, 0.16646549, -0.23433913, 0.071224056, 0.048073303, 0.122035526, 0.14941359)); + target2 += mul(nf1, float4x4(-0.11803124, 0.114169255, 0.018188128, 0.0053847185, -0.07537228, -0.048262373, 0.073838905, -0.041833423, 0.044405136, -0.03813592, 0.076818384, -0.06015139, -0.085042655, -0.14306667, -0.21477652, 0.31548396)); + target2 += mul(ng1, float4x4(0.19307283, -0.014985916, -0.14332882, -0.05549754, 0.14551677, 0.11406769, 0.2744144, -0.031179624, 0.17578745, -0.11309805, 0.010072839, -0.07453384, -0.23163621, 0.19061968, 0.11016298, 0.108093746)); + target2 += mul(nh1, float4x4(0.23180474, -0.12522835, -0.03218773, -0.0031955864, -0.14057393, 0.07269213, -0.20883523, 0.09332164, -0.16037942, 0.25845763, -0.002303125, -0.014625506, 0.17063208, -0.11648214, 0.13988028, -0.024688654)); + target2 += mul(ni1, float4x4(0.043369994, 0.12473897, 0.108142346, 0.10268199, 0.16159926, -0.17804666, -0.007889351, 0.07232418, 0.26326916, 0.0474316, -0.41637155, -0.11879895, 0.14051722, 0.08747377, 0.1162202, -0.06443569)); + target2 += mul(na2, float4x4(0.0041097966, 0.109841965, 0.097240336, 0.08123332, -0.081065506, 0.12650634, 0.23450434, 0.09631333, 0.21942414, -0.108897425, -0.033703003, 0.047280088, -0.017764917, -0.058596086, -0.15305139, 0.09055131)); + target2 += mul(nb2, float4x4(0.26824722, 0.014116421, 0.11844865, -0.156046, 0.057152968, 0.21287468, -0.3243975, -0.18181354, -0.07131152, -0.17860547, 0.18918999, 0.15399154, 0.20270234, 0.11524436, 0.05146645, -0.18196748)); + target2 += mul(nc2, float4x4(-0.2745638, -0.026905773, 0.045458756, 0.22942849, -0.21052304, 0.20649272, -0.03713028, 0.33655703, -0.12467089, -0.015030098, 0.15504798, -0.05647672, 0.18751477, 0.08505986, 0.04756538, -0.058810517)); + target2 += mul(nd2, float4x4(0.1737789, 0.06552432, -0.34797582, -0.05370679, -0.036056817, 0.085242435, -0.12802805, 0.03710984, -0.09883285, 0.08946925, -0.0446528, 0.07734006, -0.10973603, 0.262812, 0.14010249, -0.1543792)); + target2 += mul(ne2, float4x4(0.316673, -0.16414417, -0.23147403, -0.3080756, -0.056620106, -0.11389848, 0.0948114, 0.13236332, -0.40048537, -0.090742044, 0.12090404, 0.024549136, -0.19124876, -0.3007761, 0.16159211, -0.28620452)); + target2 += mul(nf2, float4x4(0.032962102, -0.05481415, -0.1185786, 0.18153866, -0.2105442, -0.03802839, 0.14060515, 0.072460145, -0.1523761, -0.11426362, 0.02610123, -0.053477813, -0.20768824, 0.04533907, 0.14381588, -0.041578818)); + target2 += mul(ng2, float4x4(-0.021694858, -0.028784249, -0.09928565, 0.07335764, 0.1315628, 0.11288982, 0.078681685, -0.1229723, -0.09618894, -0.07387309, 0.04340066, -0.036534667, 0.37295115, -0.08176548, -0.16579813, -0.13485877)); + target2 += mul(nh2, float4x4(0.45979, -0.289226, -0.15456465, 0.0117592, 0.22803205, 0.15497394, -0.38995707, 0.005227681, -0.20515667, 0.17184737, -0.069968715, -0.24724679, -0.048521046, 0.013277072, 0.049562644, -0.05522196)); + target2 += mul(ni2, float4x4(0.14561136, -0.18995416, 0.18104567, 0.063063085, -0.09728072, 0.018328888, -0.17258182, 0.069259025, 0.15187183, 0.16760696, -0.14086077, 0.013297849, -0.07579904, -0.09294852, -0.24227127, -0.048749007)); + target2 += float4(0.31939298, 0.03303962, -0.010749771, 0.084496394); + + float3 result = tex5.SampleLevel(sam, pos, 0).rgb; + result += mul(e1, float4x3(0.023055293, 0.028219413, 0.024810018, 0.031653803, 0.050207954, 0.04504577, 0.03877294, 0.0280465, 0.025589157, 0.0019387804, 0.023891818, 0.016049948)); + result += mul(e2, float4x3(0.006562233, 0.03880659, 0.037682824, -0.021441424, -0.011277022, -0.012471097, -0.030526241, -0.013880651, -0.014213582, 0.0075785257, -0.0017350517, -0.0024610942)); + result += mul(ne1, float4x3(0.015097556, 0.020325955, 0.015611413, -0.014755199, -0.034323387, -0.032325987, -0.008603291, 0.010346807, 0.011044969, -0.004739154, -0.026397636, -0.01995132)); + result += mul(ne2, float4x3(0.0097906375, -0.015094543, -0.016887931, -0.0007786067, -0.0069163437, -0.008449091, 0.025534432, 0.018064791, 0.017047096, 0.00055667467, 0.001493328, 0.003636564)); + result += mul(max(target1, 0), float4x3(-0.042251963, -0.042396102, -0.040224236, -0.004492444, -0.0069470624, -0.0065821502, 0.062203273, 0.06213223, 0.053592753, 0.06424337, 0.07964681, 0.07316769)); + result += mul(max(target2, 0), float4x3(0.026366957, 0.02789826, 0.027239393, -0.006712127, -0.0035723334, -0.0032348586, -0.04960562, -0.062758155, -0.058574595, -0.02896146, -0.020999067, -0.021301663)); + result += mul(max(-target1, 0), float4x3(-0.013106142, -0.017057793, -0.014653614, -0.04254173, -0.043040022, -0.041918345, -0.011146975, -0.0043820064, -0.003768677, -0.0027743059, -0.0114479, -0.0082087545)); + result += mul(max(-target2, 0), float4x3(-0.10087762, -0.10447133, -0.1005168, -0.04165659, -0.04558967, -0.040086865, 0.0016493691, 0.0055392827, 0.0070476984, -0.018665023, -0.035552308, -0.03375731)); + result += float3(0.018580848, -0.022256816, -0.0266178); + result += INPUT.SampleLevel(sam, pos, 0).rgb; + + WriteToOutput(gxy, result); +} diff --git a/src/Effects/Anime4K/Anime4K_Restore_UL.hlsl b/src/Effects/Anime4K/Anime4K_Restore_UL.hlsl new file mode 100644 index 000000000..3e12a7feb --- /dev/null +++ b/src/Effects/Anime4K/Anime4K_Restore_UL.hlsl @@ -0,0 +1,2173 @@ +// Anime4K_Restore_CNN_UL +// Ported from https://github.com/bloc97/Anime4K/blob/4ba94b179a144200cb6b3052e690fe2ca5c6914c/glsl/Restore/Anime4K_Restore_CNN_UL.glsl + +//!MAGPIE EFFECT +//!VERSION 3 +//!OUTPUT_WIDTH INPUT_WIDTH +//!OUTPUT_HEIGHT INPUT_HEIGHT +//!SORT_NAME Anime4K_Restore_3 + + +//!TEXTURE +Texture2D INPUT; + +//!SAMPLER +//!FILTER POINT +SamplerState sam; + +//!TEXTURE +//!WIDTH INPUT_WIDTH +//!HEIGHT INPUT_HEIGHT +//!FORMAT R16G16B16A16_FLOAT +Texture2D tex1; + +//!TEXTURE +//!WIDTH INPUT_WIDTH +//!HEIGHT INPUT_HEIGHT +//!FORMAT R16G16B16A16_FLOAT +Texture2D tex2; + +//!TEXTURE +//!WIDTH INPUT_WIDTH +//!HEIGHT INPUT_HEIGHT +//!FORMAT R16G16B16A16_FLOAT +Texture2D tex3; + +//!TEXTURE +//!WIDTH INPUT_WIDTH +//!HEIGHT INPUT_HEIGHT +//!FORMAT R16G16B16A16_FLOAT +Texture2D tex4; + +//!TEXTURE +//!WIDTH INPUT_WIDTH +//!HEIGHT INPUT_HEIGHT +//!FORMAT R16G16B16A16_FLOAT +Texture2D tex5; + +//!TEXTURE +//!WIDTH INPUT_WIDTH +//!HEIGHT INPUT_HEIGHT +//!FORMAT R16G16B16A16_FLOAT +Texture2D tex6; + +//!TEXTURE +//!WIDTH INPUT_WIDTH +//!HEIGHT INPUT_HEIGHT +//!FORMAT R16G16B16A16_FLOAT +Texture2D tex7; + +//!TEXTURE +//!WIDTH INPUT_WIDTH +//!HEIGHT INPUT_HEIGHT +//!FORMAT R16G16B16A16_FLOAT +Texture2D tex8; + +//!PASS 1 +//!DESC Conv-4x3x3x3 +//!IN INPUT +//!OUT tex1, tex2, tex3 +//!BLOCK_SIZE 16 +//!NUM_THREADS 64 + +void Pass1(uint2 blockStart, uint3 threadId) { + uint2 gxy = (Rmp8x8(threadId.x) << 1) + blockStart; + uint2 inputSize = GetInputSize(); + if (gxy.x >= inputSize.x || gxy.y >= inputSize.y) { + return; + } + + float2 inputPt = GetInputPt(); + + uint i, j; + + float3 src[4][4]; + [unroll] + for (i = 0; i <= 2; i += 2) { + [unroll] + for (j = 0; j <= 2; j += 2) { + float2 tpos = (gxy + uint2(i, j)) * inputPt; + const float4 sr = INPUT.GatherRed(sam, tpos); + const float4 sg = INPUT.GatherGreen(sam, tpos); + const float4 sb = INPUT.GatherBlue(sam, tpos); + + // w z + // x y + src[i][j] = float3(sr.w, sg.w, sb.w); + src[i][j + 1] = float3(sr.x, sg.x, sb.x); + src[i + 1][j] = float3(sr.z, sg.z, sb.z); + src[i + 1][j + 1] = float3(sr.y, sg.y, sb.y); + } + } + + [unroll] + for (i = 1; i <= 2; ++i) { + [unroll] + for (j = 1; j <= 2; ++j) { + uint2 destPos = gxy + uint2(i - 1, j - 1); + + if (i != 1 || j != 1) { + if (destPos.x >= inputSize.x || destPos.y >= inputSize.y) { + continue; + } + } + + float4 target1 = mul(src[i - 1][j - 1], float3x4(-0.28293434, -0.10095658, -0.013867814, 0.08509398, -0.31489053, -0.26828897, 0.01152665, 0.18905516, -0.23013242, -0.18878274, -0.17923735, -0.32707638)); + target1 += mul(src[i - 1][j], float3x4(-0.3519405, -0.12639853, 0.0981044, -0.23800656, -0.1666394, 0.2548722, -0.09458217, 0.17642984, -0.0016840132, -0.12355663, -0.13711694, 0.25234836)); + target1 += mul(src[i - 1][j + 1], float3x4(-0.14581299, -0.060752276, 0.06813433, 0.32616982, -0.29410994, 0.28217724, -0.2221963, -0.051627193, 0.10754401, 0.31993762, 0.25542948, -0.4268778)); + target1 += mul(src[i][j - 1], float3x4(0.2716687, -0.13160354, -0.056812827, -0.00881874, 0.3249303, 0.05037425, -0.117648534, -0.26370025, 0.032854702, -0.14214379, 0.10036965, 0.17808898)); + target1 += mul(src[i][j], float3x4(0.004323515, 0.37651265, -0.39865002, -0.18153298, 0.5224921, -0.11810103, 0.56151056, -0.063698344, -0.17272837, -0.053013492, 0.062254835, 0.28695017)); + target1 += mul(src[i][j + 1], float3x4(0.2776938, 0.22578415, 0.110299006, 0.27424663, 0.012712999, -0.22353122, -0.0010140019, 0.08163494, 0.3611274, 0.014346184, -0.26426178, -0.26777005)); + target1 += mul(src[i + 1][j - 1], float3x4(-0.09010997, 0.19958799, 0.22421049, 0.054506898, -0.11822318, 0.23656984, 0.11197124, -0.4646639, 0.17118955, 0.33748102, 0.20479581, 0.6810799)); + target1 += mul(src[i + 1][j], float3x4(0.2121316, -0.08664465, 0.2507115, -0.223455, 0.22042283, -0.20352642, 0.42714027, -0.5048447, -0.10270271, 0.11400399, -0.019575266, 0.40490857)); + target1 += mul(src[i + 1][j + 1], float3x4(0.091496244, -0.24679382, -0.3801941, -0.08482344, -0.17183328, -0.09308921, -0.059639163, 0.3321586, -0.19797249, -0.17941834, 0.015049101, -0.13793056)); + target1 += float4(-0.02313247, 0.016216148, -0.053347506, -0.023317637); + + float4 target2 = mul(src[i - 1][j - 1], float3x4(-0.44157687, 0.1715858, -0.11000502, 0.062367063, 0.21790773, 0.15507151, 0.14760862, -0.2598815, 0.14098467, 0.14019097, -0.26298222, 0.10975315)); + target2 += mul(src[i - 1][j], float3x4(0.15774319, -0.16769339, -0.49734345, -0.3935963, 0.115124024, -0.08045373, 0.55867237, 0.48593813, 0.058544844, -0.2705686, 0.3303555, 0.4181385)); + target2 += mul(src[i - 1][j + 1], float3x4(0.16588609, -0.013389144, 0.06600297, -0.09309111, -0.36321074, -0.13877828, 0.4099233, 0.20805255, 0.31892648, 0.16856939, -0.23898357, 0.11751563)); + target2 += mul(src[i][j - 1], float3x4(0.39999864, 0.46407622, -0.12249342, -0.09798957, 0.122675434, 0.18265116, 0.030651823, 0.14682484, -0.42969155, 0.2486042, 0.13566706, -0.13458017)); + target2 += mul(src[i][j], float3x4(-0.12757893, -0.19025628, -0.16728874, -0.10162156, -0.1577721, -0.174548, 0.29329458, 0.17963637, -0.43279588, 0.088979766, 0.06334896, -0.047701746)); + target2 += mul(src[i][j + 1], float3x4(-0.14359929, -0.12800618, -0.15429202, 0.034745168, 0.15794043, -0.086441815, -0.06520017, 0.26176664, -0.022253495, -0.34480432, -0.009120493, 0.08706416)); + target2 += mul(src[i + 1][j - 1], float3x4(-0.1994137, 0.070990525, 0.3388379, 0.37502727, -0.116911314, 0.2160554, -0.1831974, -0.04184975, 0.2545874, -0.083908126, -0.19057468, -0.13382773)); + target2 += mul(src[i + 1][j], float3x4(-0.46475947, -0.23414738, -0.036689937, 0.018558737, -0.32609373, 0.15265512, -0.055894423, -0.3676328, 0.24501368, 0.12390915, 0.13458043, -0.30162823)); + target2 += mul(src[i + 1][j + 1], float3x4(0.12621075, 0.046852987, 0.17333286, 0.18997045, 0.3245911, -0.28809196, -0.3660882, -0.5916272, -0.11456223, -0.030912774, 0.17037971, -0.12640971)); + target2 += float4(0.42778614, 0.054881692, -0.23388587, -0.031204376); + + float4 target3 = mul(src[i - 1][j - 1], float3x4(0.18228084, 0.25933146, 0.1764313, 0.23183075, -0.061067093, 0.34710985, -0.1785006, -0.06471029, -0.23235676, -0.43409523, -0.06639704, 0.30396065)); + target3 += mul(src[i - 1][j], float3x4(0.31676784, 0.21897513, 0.06466065, 0.42289257, 0.12306216, -0.3928633, 0.09720577, -0.10426061, -0.030383142, 0.03775265, 0.34221298, 0.3827705)); + target3 += mul(src[i - 1][j + 1], float3x4(-0.13229136, 0.37214845, -0.07046923, -0.17644346, 0.5591967, -0.5409525, 0.08944645, -0.047717415, 0.3754216, 0.2979604, -0.14149979, -0.1743562)); + target3 += mul(src[i][j - 1], float3x4(0.07603316, 0.10389099, 0.07042061, 0.24759614, 0.05822713, 0.29799607, -0.21219468, 0.3884128, -0.010661014, -0.5209726, 0.20311587, -0.39393)); + target3 += mul(src[i][j], float3x4(-0.17486575, -0.22572622, -0.13514778, 0.12839775, -0.25754005, 0.13090849, -0.16364887, 0.37675568, -0.05928962, 0.049174402, -0.37935108, -0.14333783)); + target3 += mul(src[i][j + 1], float3x4(0.15858985, -0.47485206, 0.4509964, 0.3877553, -0.04848657, 0.22396515, 0.33325925, -0.20703658, 0.14929648, 0.25580746, 0.2795224, -0.0158565)); + target3 += mul(src[i + 1][j - 1], float3x4(0.030926622, 0.16219522, -0.26666775, -0.27920142, -0.2693319, -0.29130983, -0.2795281, 0.22597994, 0.32512712, 0.16784063, -0.0113234315, -0.10118217)); + target3 += mul(src[i + 1][j], float3x4(-0.32426193, 0.0072339224, 0.08070994, -0.07735796, -0.09247539, 0.23327915, 0.09039661, -0.11836084, -0.2726992, -0.16031814, -0.28027415, -0.029263943)); + target3 += mul(src[i + 1][j + 1], float3x4(-0.20109104, -0.43383566, -0.33850962, -0.13422257, 0.040343326, -0.0819253, 0.26943803, -0.46652415, -0.3474102, 0.41198114, 0.14404535, 0.076806836)); + target3 += float4(-0.032911904, -0.0050934837, 0.021853646, -0.17256187); + + tex1[destPos] = target1; + tex2[destPos] = target2; + tex3[destPos] = target3; + } + } +} + +//!PASS 2 +//!DESC Conv-4x3x3x24 +//!IN tex1, tex2, tex3 +//!OUT tex4, tex5, tex6 +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 + +void Pass2(uint2 blockStart, uint3 threadId) { + uint2 gxy = Rmp8x8(threadId.x) + blockStart; + uint2 inputSize = GetInputSize(); + if (gxy.x >= inputSize.x || gxy.y >= inputSize.y) { + return; + } + + float2 inputPt = GetInputPt(); + float2 pos = (gxy + 0.5f) * inputPt; + + // [ a, d, g ] + // [ b, e, h ] + // [ c, f, i ] + float4 a1 = tex1.SampleLevel(sam, pos + float2(-inputPt.x, -inputPt.y), 0); + float4 b1 = tex1.SampleLevel(sam, pos + float2(-inputPt.x, 0), 0); + float4 c1 = tex1.SampleLevel(sam, pos + float2(-inputPt.x, inputPt.y), 0); + float4 d1 = tex1.SampleLevel(sam, pos + float2(0, -inputPt.y), 0); + float4 e1 = tex1.SampleLevel(sam, pos, 0); + float4 f1 = tex1.SampleLevel(sam, pos + float2(0, inputPt.y), 0); + float4 g1 = tex1.SampleLevel(sam, pos + float2(inputPt.x, -inputPt.y), 0); + float4 h1 = tex1.SampleLevel(sam, pos + float2(inputPt.x, 0), 0); + float4 i1 = tex1.SampleLevel(sam, pos + float2(inputPt.x, inputPt.y), 0); + + float4 na1 = max(-a1, 0); + float4 nb1 = max(-b1, 0); + float4 nc1 = max(-c1, 0); + float4 nd1 = max(-d1, 0); + float4 ne1 = max(-e1, 0); + float4 nf1 = max(-f1, 0); + float4 ng1 = max(-g1, 0); + float4 nh1 = max(-h1, 0); + float4 ni1 = max(-i1, 0); + + a1 = max(a1, 0); + b1 = max(b1, 0); + c1 = max(c1, 0); + d1 = max(d1, 0); + e1 = max(e1, 0); + f1 = max(f1, 0); + g1 = max(g1, 0); + h1 = max(h1, 0); + i1 = max(i1, 0); + + float4 a2 = tex2.SampleLevel(sam, pos + float2(-inputPt.x, -inputPt.y), 0); + float4 b2 = tex2.SampleLevel(sam, pos + float2(-inputPt.x, 0), 0); + float4 c2 = tex2.SampleLevel(sam, pos + float2(-inputPt.x, inputPt.y), 0); + float4 d2 = tex2.SampleLevel(sam, pos + float2(0, -inputPt.y), 0); + float4 e2 = tex2.SampleLevel(sam, pos, 0); + float4 f2 = tex2.SampleLevel(sam, pos + float2(0, inputPt.y), 0); + float4 g2 = tex2.SampleLevel(sam, pos + float2(inputPt.x, -inputPt.y), 0); + float4 h2 = tex2.SampleLevel(sam, pos + float2(inputPt.x, 0), 0); + float4 i2 = tex2.SampleLevel(sam, pos + float2(inputPt.x, inputPt.y), 0); + + float4 na2 = max(-a2, 0); + float4 nb2 = max(-b2, 0); + float4 nc2 = max(-c2, 0); + float4 nd2 = max(-d2, 0); + float4 ne2 = max(-e2, 0); + float4 nf2 = max(-f2, 0); + float4 ng2 = max(-g2, 0); + float4 nh2 = max(-h2, 0); + float4 ni2 = max(-i2, 0); + + a2 = max(a2, 0); + b2 = max(b2, 0); + c2 = max(c2, 0); + d2 = max(d2, 0); + e2 = max(e2, 0); + f2 = max(f2, 0); + g2 = max(g2, 0); + h2 = max(h2, 0); + i2 = max(i2, 0); + + float4 a3 = tex3.SampleLevel(sam, pos + float2(-inputPt.x, -inputPt.y), 0); + float4 b3 = tex3.SampleLevel(sam, pos + float2(-inputPt.x, 0), 0); + float4 c3 = tex3.SampleLevel(sam, pos + float2(-inputPt.x, inputPt.y), 0); + float4 d3 = tex3.SampleLevel(sam, pos + float2(0, -inputPt.y), 0); + float4 e3 = tex3.SampleLevel(sam, pos, 0); + float4 f3 = tex3.SampleLevel(sam, pos + float2(0, inputPt.y), 0); + float4 g3 = tex3.SampleLevel(sam, pos + float2(inputPt.x, -inputPt.y), 0); + float4 h3 = tex3.SampleLevel(sam, pos + float2(inputPt.x, 0), 0); + float4 i3 = tex3.SampleLevel(sam, pos + float2(inputPt.x, inputPt.y), 0); + + float4 na3 = max(-a3, 0); + float4 nb3 = max(-b3, 0); + float4 nc3 = max(-c3, 0); + float4 nd3 = max(-d3, 0); + float4 ne3 = max(-e3, 0); + float4 nf3 = max(-f3, 0); + float4 ng3 = max(-g3, 0); + float4 nh3 = max(-h3, 0); + float4 ni3 = max(-i3, 0); + + a3 = max(a3, 0); + b3 = max(b3, 0); + c3 = max(c3, 0); + d3 = max(d3, 0); + e3 = max(e3, 0); + f3 = max(f3, 0); + g3 = max(g3, 0); + h3 = max(h3, 0); + i3 = max(i3, 0); + + float4 target1 = mul(a1, float4x4(0.08648221, 0.012940912, 0.0694797, 0.021795172, 0.19547985, 0.019256733, -0.099714816, 0.08773751, 0.06443286, 0.08462334, 0.02924696, -0.07673487, 0.061156925, 0.12037308, -0.04778231, 0.010492923)); + target1 += mul(b1, float4x4(0.00033161003, -0.19050376, -0.14713565, -0.20729654, -0.122199036, 0.0044957534, 0.19240627, -0.1515226, 0.05051369, -0.08790857, -0.05331543, -0.13356556, -0.019020412, 0.06989371, -0.07270814, 0.06541199)); + target1 += mul(c1, float4x4(0.086689815, 0.08965917, 0.38167384, 0.31010604, 0.04204608, 0.095588356, -0.22810745, 0.112243816, 0.016992478, -0.16491304, -0.08901814, -0.038421903, -0.00041658967, -0.03551529, 0.097966395, -0.06240607)); + target1 += mul(d1, float4x4(0.13955353, -0.27422932, 0.14655617, -0.07651906, -0.07335902, -0.05214342, 0.35741827, 0.043639295, 0.041774176, -0.08277867, -0.028840896, -0.14434154, 0.029840615, -0.1444494, 0.0417388, -0.05095746)); + target1 += mul(e1, float4x4(-0.06985613, 0.036354948, -0.22372876, -0.268865, -0.07852222, -0.20930836, 0.06419149, 0.19363879, -0.00020227236, 0.04876036, 0.16503128, -0.05324255, 0.06806321, 0.2646995, -0.04032264, 0.06421368)); + target1 += mul(f1, float4x4(-0.16930926, 0.10122267, 0.0043684123, 0.14429477, -0.026909696, 0.028725943, 0.20114274, 0.09308162, -0.21070556, -0.13116634, 0.12419461, 0.29118228, -0.052020956, 0.18702126, 0.049802206, 0.09010561)); + target1 += mul(g1, float4x4(0.08972355, -0.076947, 0.2612936, 0.1700236, 0.21013896, -0.033608798, -0.16835962, -0.17834496, 0.050899435, -0.031109938, 0.066931866, 0.20528825, -0.024127234, -0.23103325, -0.14034404, 0.036399297)); + target1 += mul(h1, float4x4(0.058897257, 0.24295428, 0.5273254, -0.075384885, -0.03951092, 0.01945271, 0.2180824, -0.10192471, 0.04884028, 0.10811269, -0.056086104, -0.0177891, -0.15046783, -0.02886977, 0.012827891, -0.06317297)); + target1 += mul(i1, float4x4(0.08919534, 0.0142748635, 0.010994716, -0.116783954, -0.04848956, 0.12802334, -0.42647192, -0.047026183, -0.105416335, 0.014229579, -0.16081032, -0.1652654, 0.04367904, -0.21464449, 0.019457433, -0.053815585)); + target1 += mul(a2, float4x4(-0.033339895, -0.30358142, 0.03728797, -0.019257398, 0.041582108, -0.042878296, -0.16212925, 0.015385118, 0.1854467, -0.14912623, -0.10073306, 0.029578004, 0.0026831278, -0.1968894, -0.1447477, 0.013980874)); + target1 += mul(b2, float4x4(0.097215526, -0.27501073, 0.14077966, 0.07402363, 0.14528856, 0.26862413, -0.23837885, -0.19667485, 0.052117366, 0.012212545, 0.1311111, -0.05480854, 0.02206756, -0.09732581, -0.095444106, -0.12949228)); + target1 += mul(c2, float4x4(-0.0737551, -0.35384682, 0.13346575, -0.12573321, 0.12401249, -0.19727409, -0.022039715, -0.36438647, -0.17826872, -0.097721264, 0.10780637, -0.06372213, 0.078226656, -0.2319627, 0.06871096, -0.35198233)); + target1 += mul(d2, float4x4(0.016558306, -0.10755727, 0.07563601, 0.10631563, 0.006885377, 0.1507541, 0.028258704, 0.1609311, -0.026250815, 0.033572774, -0.0988431, 0.19565049, 0.024507977, 0.16839874, -0.19923483, 0.08130833)); + target1 += mul(e2, float4x4(-0.061187252, 0.09036177, -0.12626763, 0.036544666, 0.10568191, 0.087079406, 0.08745061, -0.10461285, 0.15552549, 0.25184712, -0.026420163, 0.028266618, 0.2387882, 0.20997152, -0.08588654, 0.19732232)); + target1 += mul(f2, float4x4(-0.057315756, 0.04398266, -0.203559, -0.10253955, -0.0009475058, -0.0786754, -0.051641934, -0.4047696, -0.057758473, -0.04819636, 0.053755116, -0.13864025, -0.165071, -0.14622927, 0.16270354, -0.11281594)); + target1 += mul(g2, float4x4(-0.023654325, 0.113905154, 0.0714336, 0.11184515, 0.12235184, 0.081852525, 0.2880535, 0.1926254, -0.01012154, 0.08924707, -0.06123374, 0.33078554, -0.14329071, 0.043857813, -0.09043615, 0.029145587)); + target1 += mul(h2, float4x4(0.023949241, 0.10680816, -0.07771331, 0.0008638595, 0.00088304427, 0.00707631, -0.029150054, 0.20421802, -0.051493708, 0.3196773, -0.0046316544, -0.08402997, -0.0020283381, 0.092219375, -0.21898057, 0.043405924)); + target1 += mul(i2, float4x4(0.10192696, 0.22852643, 0.024926228, 0.004321374, 0.1759848, -0.05959089, 0.03108929, -0.04175589, -0.032808244, -0.002723809, 0.11427024, -0.11884058, 0.085039005, -0.11861457, -0.041716687, 0.0049884217)); + target1 += mul(a3, float4x4(-0.08531041, 0.031572983, -0.010317835, -0.058514126, -0.028372116, 0.3587181, 0.07155074, -0.018486004, 0.11271158, 0.12346037, 0.14474016, -0.091422975, 0.046279423, -0.19440787, -0.040767148, -0.11089926)); + target1 += mul(b3, float4x4(-0.118612625, 0.036904, 0.040823236, 0.0006029242, -0.055478334, 0.065328576, -0.26563334, 0.14299026, 0.039150115, 0.17624554, 0.085402936, -0.007749703, 0.045554906, -0.051315133, -0.0989155, 0.023874454)); + target1 += mul(c3, float4x4(-0.08904957, 0.13246936, -0.1362266, 0.075549126, 0.015976984, -0.078003414, 0.27895245, -0.1714908, 0.05061789, 0.05510105, -0.011142018, 0.13279557, 0.122630805, 0.12880847, 0.2334916, 0.450533)); + target1 += mul(d3, float4x4(0.13635479, 0.12008325, 0.13332775, -0.1923403, 0.061475966, 0.12471921, 0.1438346, -0.30003113, 0.16227812, -0.011259031, -0.15664785, 0.082009956, 0.15664162, -0.14316271, 0.10871211, -0.23067066)); + target1 += mul(e3, float4x4(-0.17403863, -0.100490384, -0.056628566, 0.056505267, 0.03132433, 0.02990612, -0.023741463, -0.2221617, -0.023024872, -0.17946845, -0.014884968, 0.09488175, -0.08467482, -0.18569513, -0.08882533, -0.096383005)); + target1 += mul(f3, float4x4(0.036448594, 0.008876679, 0.082974724, -0.07486944, -0.1466638, -0.17435108, -0.08396226, 0.05346215, -0.13232903, -0.07391497, 0.19908291, 0.059030067, -0.017662045, 0.020650625, -0.20734224, 0.20043914)); + target1 += mul(g3, float4x4(-0.03861711, 0.06406407, 0.05915599, -0.0029750194, 0.046107147, -0.23294666, 0.019285874, 0.11214502, -0.05762434, -0.043726444, 0.010243058, -0.013164875, 0.033796065, -0.027231356, 0.18135343, -0.06158567)); + target1 += mul(h3, float4x4(-0.0061139134, 0.08773726, 0.05263668, -0.017488463, -0.021532185, -0.06330985, 0.03339514, 0.29500782, 0.19531941, -0.0625388, -0.0988155, 0.029160276, -0.14122078, -0.18272889, 0.035498794, -0.09119196)); + target1 += mul(i3, float4x4(0.21229745, -0.13745296, -0.02434639, 0.018458553, 0.1591066, 0.057361145, -0.034690984, -0.06146371, -0.2245296, -0.14576864, 0.053850707, -0.08887415, -0.17651638, 0.14863127, -0.07008009, 0.009406358)); + target1 += mul(na1, float4x4(-0.09558068, 0.08203744, 0.09736194, -0.08479601, -0.07671097, -0.13729817, 0.15081742, -0.107025385, -0.13094948, -0.11489214, 0.08040859, 0.18286897, -0.06001431, -0.16890974, -0.034702767, 0.06418509)); + target1 += mul(nb1, float4x4(-0.057768498, 0.121774144, 0.09370627, 0.04913383, 0.07690142, 0.0735232, 0.22072591, 0.023539443, 0.05777623, 0.32322824, 0.3281115, 0.08541682, 0.027571213, 0.08204197, -0.036617927, -0.11496138)); + target1 += mul(nc1, float4x4(0.10667931, 0.060036145, 0.17550562, 0.0036066761, -0.1475781, -0.0125017725, 0.1585272, -0.30022824, 0.020694837, 0.041336745, 0.34374732, 0.11649956, 0.0702352, 0.10661717, -0.018115027, 0.066765435)); + target1 += mul(nd1, float4x4(-0.031910367, 0.08394783, -0.12302906, 0.080788575, 0.056613773, 0.13485114, 0.14046827, -0.0015214924, -0.27299604, 0.043092493, 0.0110908365, 0.0120844785, 0.13837345, 0.14274547, -0.07037318, 0.073410094)); + target1 += mul(ne1, float4x4(0.11933822, -0.019749384, -0.14604573, 0.23067194, 0.07458434, 0.19018115, -0.09794594, -0.028165665, 0.34246337, -0.15636346, 0.2909177, 0.049812693, 0.002857417, -0.15300918, 0.28885588, -0.017372042)); + target1 += mul(nf1, float4x4(0.017289294, -0.034632802, 0.21390542, -0.010042412, -0.041615892, -0.08253338, -0.30123362, -0.19299945, 0.25370637, 0.093409844, -0.09362771, -0.17982802, -0.031628486, -0.09360746, 0.13314822, -0.034462616)); + target1 += mul(ng1, float4x4(0.106429726, 0.016680025, -0.0529926, -0.17085713, -0.22584449, -0.07722329, 0.30886117, 0.10528744, 0.010045352, 0.099818386, -0.1433606, -0.24887395, -0.04677741, 0.113051936, -0.062035765, -0.03359467)); + target1 += mul(nh1, float4x4(-0.10257249, -0.16084161, -0.058020744, 0.096825816, 0.19502446, -0.12214713, 0.078723475, 0.124732524, -0.15987179, -0.110849984, 0.1198203, 0.018647604, 0.114340924, -0.027776286, -0.07801131, 0.022275787)); + target1 += mul(ni1, float4x4(0.20298952, -0.069290765, 0.018832127, -0.17501442, 0.32367796, -0.20510589, 0.15283914, 0.16110845, 0.23468657, 0.12490908, 0.0031354423, 0.33064207, -0.089915626, 0.16466871, -0.2302326, 0.008596628)); + target1 += mul(na2, float4x4(0.07267445, 0.13096274, -0.22805755, -0.03723183, 0.055223588, -0.005618507, -0.022076515, -0.07149474, -0.121041514, 0.22917031, 0.066897914, -0.07756685, 0.024709817, 0.009276744, 0.014564059, 0.057168677)); + target1 += mul(nb2, float4x4(0.12088462, -0.21629183, -0.01628627, 0.13085558, 0.09959188, -0.26857927, 0.099242754, -0.014361698, -0.06972168, 0.01484912, -0.14507025, 0.036060054, 0.010170308, -0.038049888, -0.009749429, 0.02627631)); + target1 += mul(nc2, float4x4(0.014381424, -0.08200522, 0.016342247, -0.05138454, -0.048430134, 0.14594877, -0.3555319, 0.031946313, 0.10650339, 0.18046476, -0.24730566, 0.1373742, -0.119868465, 0.09006262, -0.043326948, 0.14803706)); + target1 += mul(nd2, float4x4(0.041357175, 0.018529125, -0.10729679, -0.048498925, -0.10630771, -0.1541114, -0.033143718, -0.22726057, -0.07868649, -0.13653874, 0.17372914, -0.12425049, 0.20172423, -0.13046068, 0.043416902, 0.022640392)); + target1 += mul(ne2, float4x4(0.0996741, -0.2517156, 0.19852571, -0.0021035601, 0.10721118, -0.05211148, -0.19747794, -0.09467657, -0.023530629, -0.3026388, 0.18776762, -0.083951, -0.23070371, -0.29687774, 0.19042933, 0.082099915)); + target1 += mul(nf2, float4x4(-0.14246128, -0.11360154, 0.042512555, 0.050347224, 0.15101464, 0.096761174, -0.09809747, 0.18949512, 0.08265103, 0.10818854, -0.06522224, 0.20080575, 0.04467876, 0.16536511, -0.17993684, 0.00630444)); + target1 += mul(ng2, float4x4(-0.06804489, -0.08932311, 0.11452633, -0.1371827, -0.038583722, -0.044566132, 0.11590918, 0.06928946, 0.09499521, -0.28891554, -0.039033752, -0.24065344, -0.008447823, -0.22869939, -0.1481265, -0.14827704)); + target1 += mul(nh2, float4x4(-0.040538706, -0.16607454, 0.066053875, -0.13771453, -0.26502058, -0.090013534, 0.10899838, 0.035818875, -0.025965845, -0.38602746, 0.11832495, -0.05114795, -0.0024577992, -0.131609, 0.031598363, 0.03701916)); + target1 += mul(ni2, float4x4(-0.058901474, -0.32447273, -0.009171959, 0.0660178, -0.060969505, 0.032002755, 0.1673554, 0.08129589, -0.027818324, 0.11499822, 0.047595307, 0.1351946, -0.10076986, 0.109632365, -0.15808961, -0.082471184)); + target1 += mul(na3, float4x4(0.11876087, 0.01871506, -0.15440665, 0.030330261, 0.0027066949, -0.14246738, 0.07165973, 0.01423915, -0.06284659, -0.21748444, -0.09141415, 0.0077323355, 0.0007271684, 0.1327971, 0.021298295, 0.029493187)); + target1 += mul(nb3, float4x4(-0.10310914, -0.07170663, -0.2685449, -0.15668112, 0.10965974, -0.027063346, 0.15944144, -0.16771634, -0.08454698, -0.12480185, 0.17647612, 0.17139068, -0.09694541, 0.14676706, 0.1353608, 0.11373892)); + target1 += mul(nc3, float4x4(-0.0808586, -0.02986483, 0.23335268, -0.05220655, -0.21456684, 0.089947656, -0.2306551, 0.23438993, -0.26377395, -0.00432009, -0.002377239, -0.0024554976, -0.11019007, -0.0772975, -0.119338326, -0.42517295)); + target1 += mul(nd3, float4x4(0.10294163, -0.024664093, 0.019653833, 0.034689307, 0.05632113, -0.31289428, -0.08254052, 0.13217352, 0.15772913, -0.09128828, -0.012524978, -0.06561359, -0.13107683, 0.23463258, -0.18762761, 0.22209615)); + target1 += mul(ne3, float4x4(0.06301127, 0.12489633, 0.025658585, -0.09527329, 0.009095258, -0.2463554, -0.0047031543, 0.119088694, 0.14065374, 0.19576642, 0.26679346, 0.03845879, 0.13757762, 0.10764672, -0.046270162, -0.11690896)); + target1 += mul(nf3, float4x4(-0.04519331, 0.002319274, -0.0704514, 0.029526047, 0.13251334, 0.2868927, -0.06838931, 0.11092056, 0.15345666, 0.16911614, 0.06869024, -0.03073747, 0.065442406, 0.018865792, 0.081815384, -0.20803072)); + target1 += mul(ng3, float4x4(0.06323602, -0.010242011, 0.10209268, -0.04929, -0.006704608, 0.30939466, 0.1744392, -0.10929571, -0.493058, -0.13673118, -0.12486283, -0.41315997, -0.036514506, 0.0937269, -0.16995876, 0.15627468)); + target1 += mul(nh3, float4x4(-0.13764359, -0.028645532, 0.006338959, 0.058005866, 0.08327983, 0.17576805, 0.17758359, -0.16738725, -0.26176876, 0.07402525, -0.02212828, 0.16919926, 0.3348425, 0.032946147, 0.09707724, 0.10009711)); + target1 += mul(ni3, float4x4(-0.2091657, 0.068191156, 0.09357867, -0.05217846, -0.1104997, 0.05062617, 0.016883407, -0.053662494, 0.24314725, 0.19810323, -0.065943204, 0.25030002, 0.08373754, -0.16690144, 0.03141188, -0.101124324)); + target1 += float4(-0.08736043, 0.2861529, -0.005863071, -0.004482026); + + float4 target2 = mul(a1, float4x4(0.04347682, -0.042527717, 0.057372455, 0.25276724, -0.057298373, 0.16023909, 0.21286428, -0.022668337, -0.21247427, -0.14335708, 0.19040126, 0.08368367, 0.0033008587, 0.03252031, -0.1777948, -0.082336985)); + target2 += mul(b1, float4x4(-0.12568378, -0.08425814, 0.004957988, -0.12844385, 0.055799566, 0.21515216, -0.20364483, -0.05265174, -0.011742827, -0.053792574, -0.15443824, 0.007910115, -0.045762774, 0.03763922, 0.014743974, -0.07495264)); + target2 += mul(c1, float4x4(0.095623426, 0.118021496, -0.3646953, 0.22952312, -0.06988015, 0.07823983, 0.10331074, 0.18235193, 0.10575183, 0.017832384, 0.099051595, -0.16202737, -0.065919116, -0.027154202, 0.19286686, -0.41187564)); + target2 += mul(d1, float4x4(-0.18027067, -0.15459284, -0.5194294, 0.15895993, -0.19545266, 0.11350413, 0.08665067, 0.053280413, -0.07407145, -0.04798788, -0.13345626, 0.1258462, 0.047066033, -0.040642574, 0.08591159, -0.10039696)); + target2 += mul(e1, float4x4(-0.080157764, 0.22366004, 0.17739227, 0.033781976, -0.045201484, -0.047641475, -0.07896631, -0.08679443, 0.10642969, 0.06992287, 0.041175313, -0.16435929, 0.15798622, -0.004883945, 0.08247824, -0.056977544)); + target2 += mul(f1, float4x4(-0.05262759, 0.015417186, 0.108641304, 0.005705979, -0.013303744, -0.016400715, -0.24967128, -0.13471037, 0.07906222, 0.07200451, 0.12428817, -0.05694691, -0.022635266, -0.08490837, -0.01682493, 0.08025121)); + target2 += mul(g1, float4x4(0.4013514, -0.09885115, -0.13964225, 0.0066076894, 0.035656366, -0.061563164, 0.005582264, 0.03445424, -0.0898461, 0.07694695, -0.06430643, 0.26156837, -0.045181878, -0.16155554, -0.008806556, 0.023297746)); + target2 += mul(h1, float4x4(0.014314168, 0.03040408, 0.079562426, 0.104040965, 0.15035652, -0.11237077, -0.04587703, 0.0664186, 0.011188344, 0.27792045, 0.03491885, 0.047752786, -0.02133782, 0.19199622, 0.03265004, 0.112835735)); + target2 += mul(i1, float4x4(-0.22900657, 0.19636537, 0.024062308, 0.004805258, 0.19197865, -0.26876372, 0.22812407, -0.13273205, 0.1163973, -0.016603881, 0.11751584, 0.07571751, -0.016665185, -0.020726109, -0.15382238, -0.05721929)); + target2 += mul(a2, float4x4(0.047688257, -0.04328092, 0.00020037305, -0.0030449564, 0.21016575, -0.3156827, 0.109759815, -0.012477153, -0.037765514, -0.19186607, 0.11098016, -0.122981705, 0.030443244, -0.27449754, 0.12108516, 0.14917934)); + target2 += mul(b2, float4x4(-0.015644934, 0.017102094, 0.068102054, 0.11661995, -0.13552219, 0.030102659, 0.14208834, 0.034298997, 0.06434777, -0.16380474, -0.10679716, -0.052865673, 0.03549326, -0.116048254, 0.16329505, 0.19959521)); + target2 += mul(c2, float4x4(-0.007844256, -0.033616025, 0.040885374, 0.0077286726, -0.057888485, 0.05796843, 0.0665138, -0.189592, -0.02662338, 0.022530284, 0.08647752, 0.054335136, 0.031057479, 0.03635868, 0.0933932, 0.064375274)); + target2 += mul(d2, float4x4(0.15531783, -0.21395409, -0.124851726, 0.049151056, -0.17787859, 0.07594992, 0.048780512, 0.0029584337, 0.013994473, -0.34576252, -0.05831177, 0.030209891, 0.009173122, -0.32105917, 0.026620382, 0.27054143)); + target2 += mul(e2, float4x4(0.031326182, 0.11699003, -0.1819442, -0.30510914, -0.21830374, 0.06375399, -0.11343298, 0.20248312, -0.032249533, 0.1300983, -0.23744828, -0.03899525, 0.095936954, 0.075583026, -0.18192224, 0.016086053)); + target2 += mul(f2, float4x4(-0.24321534, 0.1016422, 0.084550686, -0.007922614, -0.16052304, -0.09632171, 0.09476528, 0.03964334, -0.00061841257, 0.11085015, 0.16789092, 0.058375813, -0.021924267, 0.26049414, -0.04622306, 0.03622448)); + target2 += mul(g2, float4x4(0.05655466, -0.10016316, -0.026551498, 0.12944251, 0.06387257, 0.08759442, -0.040214762, -0.05403373, -0.001911277, -0.045361456, -0.29783988, -0.11533991, 0.07864674, -0.03580795, 0.09282203, 0.18479614)); + target2 += mul(h2, float4x4(-0.019557253, -0.01953009, 0.1073159, -0.077327915, -0.3287939, 0.08561906, -0.16314861, -0.14830309, -0.031493217, -0.050918207, 0.13767132, -0.25257835, 0.029513458, 0.1548974, -0.048502877, 0.0022710229)); + target2 += mul(i2, float4x4(0.0022606663, 0.048681643, -0.06014017, 0.23443368, -0.086114794, 0.017463014, -0.073657446, -0.0013138334, -0.053271778, 0.29075313, 0.07355574, 0.14009497, -0.15303768, 0.21335968, -0.17516625, 0.03268628)); + target2 += mul(a3, float4x4(-0.012742161, -0.041635115, 0.168062, -0.028525194, -0.030566072, -0.027266532, 0.0359287, -0.07139233, 0.061290823, -0.04036332, 0.04897623, 0.13846754, 0.039383594, 0.12339301, -0.026180696, -0.0051744552)); + target2 += mul(b3, float4x4(-0.03748404, -0.026544569, 0.11102617, -0.22780292, 0.06731992, -0.15827416, 0.09802122, 0.11640033, 0.00039111794, 0.072100006, -0.053455148, 0.06592366, -0.09381082, 0.13634324, -0.08554314, 0.016439624)); + target2 += mul(c3, float4x4(0.10113021, 0.08261971, -0.16603, -0.009958334, 0.03756299, -0.004461027, 0.08559942, -0.012674885, -0.03848595, 0.002108679, 0.021565402, -0.046234082, 0.04603834, 0.09276165, -0.29686695, -0.015194743)); + target2 += mul(d3, float4x4(0.053909358, 0.0835715, -0.116176985, 0.22114189, 0.17204702, -0.17098549, -0.08065474, -0.015051904, 0.14268506, -0.117853105, -0.0038547963, -0.099558994, -0.12031682, 0.11549271, 0.0201697, 0.093561895)); + target2 += mul(e3, float4x4(-0.056914307, 0.18547982, -0.09208387, -0.00943169, -0.024476565, 0.020612689, 0.04417863, 0.14231037, -0.05794176, 0.19624077, -0.10561953, -0.1312564, -0.09621997, -0.055228855, -0.06481115, 0.07939849)); + target2 += mul(f3, float4x4(-0.09013716, -0.12869088, -0.14419042, -0.021643816, -0.123301044, 0.1077149, -0.058566347, 0.010407963, 0.009403472, -0.07660888, 0.09947006, -0.07434618, -0.014246012, -0.24914171, 0.0034662948, -0.05013118)); + target2 += mul(g3, float4x4(-0.070962735, 0.06716404, -0.15136454, 0.02027541, -0.107001044, 0.50334495, -0.039790098, 0.08286825, -0.0010944081, 0.1031829, -0.011431386, -0.08257687, -0.18531963, -0.14856398, 0.024649108, 0.047142852)); + target2 += mul(h3, float4x4(0.049574193, 0.07180735, 0.047850125, -0.051012892, -0.00040669146, 0.4140869, -0.088046245, -0.036824025, -0.03582775, 0.26769164, -0.06151275, -0.09666011, 0.2566442, -0.09799407, 0.097338095, -0.026725585)); + target2 += mul(i3, float4x4(0.1490444, -0.06516709, 0.10439169, -0.034240134, -0.041965652, -0.2079741, -0.09079767, 0.15088585, 0.022063766, -0.07552733, 0.0012785956, -0.16747397, 0.10525993, -0.09890853, 0.10660105, 0.21784192)); + target2 += mul(na1, float4x4(0.07042895, 0.16030453, 0.0030912263, -0.027933247, -0.3086125, -0.28822276, -0.400802, 0.2096595, 0.08857404, 0.34754908, -0.15951826, -0.35737038, -0.038460553, 0.007917597, 0.2774085, -0.08004489)); + target2 += mul(nb1, float4x4(-0.038472448, -0.0174679, -0.107170366, -0.037775494, -0.054595813, -0.21341673, 0.21892805, 0.12125601, 0.058354914, -0.35335168, -0.21329384, -1.0650489, 0.059367847, -0.02849481, 0.001276761, -0.30784246)); + target2 += mul(nc1, float4x4(-0.050561953, 0.0007092989, -0.13955325, -0.07106547, 0.12613517, -0.0822321, 0.14023048, -0.20781253, 0.0041748453, 0.157751, -0.14171253, -0.9330524, -0.0035482922, -0.17769572, -0.1528532, -0.32141888)); + target2 += mul(nd1, float4x4(-0.040014382, 0.24272937, 0.12577556, -0.10304328, 0.12054429, -0.14819793, -0.46691173, 0.12551397, 0.21042542, 0.040414993, 0.2664476, -0.0624471, -0.10776527, 0.03234498, -0.14870068, -0.05700082)); + target2 += mul(ne1, float4x4(-0.15521951, -0.099391945, -0.31356367, -0.006449893, 0.059501357, 0.16860132, 0.2637131, -0.035344128, -0.20164591, -0.0771766, 0.22611247, -0.40267792, -0.060890198, 0.060215253, 0.093219444, -0.3483)); + target2 += mul(nf1, float4x4(0.03416117, -0.1827499, -0.15668888, -0.10794011, -0.075220324, 0.12177839, -0.07486823, 0.21677534, -0.039297394, -0.14563735, 0.05120258, -0.00035666916, 0.12478138, 0.04741504, 0.2288785, -0.17462626)); + target2 += mul(ng1, float4x4(-0.02980817, 0.087366745, 0.043035574, 0.040445086, 0.07882225, 0.030239558, -0.117186725, 0.19092828, -0.037465222, -0.10581845, -0.055081632, -0.15845117, 0.07946355, 0.14760616, -0.022140944, 0.11649563)); + target2 += mul(nh1, float4x4(-0.19723393, 0.024121622, -0.27199838, 0.07334678, -0.07288629, 0.17650653, -0.22066317, -0.13322048, 0.0069257803, -0.24415702, 0.09925061, 0.33271804, 0.0033860113, -0.18174358, -0.13197216, -0.018403139)); + target2 += mul(ni1, float4x4(-0.093481295, -0.28051332, -0.032411367, -0.14152545, 0.18546024, 0.26412115, 0.07146612, 0.036084935, -0.27073604, -0.010888752, -0.13251275, 0.052145492, -0.0332615, 0.06561024, -0.12152722, 0.25903332)); + target2 += mul(na2, float4x4(-0.14281613, 0.07859564, 0.0066864006, -0.15937181, -0.12278831, 0.311999, 0.025959859, 0.02308115, -0.03229773, 0.2645761, -0.13995989, 0.10817364, 0.07908819, 0.42388916, -0.17739546, 0.10429196)); + target2 += mul(nb2, float4x4(0.2201895, -0.2196956, 0.14305998, -0.3301203, 0.16685095, 0.09164033, 0.031294953, -0.05854433, -0.06691493, 0.1518185, 0.038523998, 0.05256842, -0.047954578, 0.1683237, 0.0048684916, -0.10664451)); + target2 += mul(nc2, float4x4(-0.026817175, -0.029176721, 0.24391933, 0.017680334, 0.15134846, -0.15139282, 0.29651865, 0.12128057, 0.044055674, 0.023059618, -0.054705862, -0.025505943, -0.019943522, -0.032058105, -0.30078474, 0.28300348)); + target2 += mul(nd2, float4x4(-0.15246257, 0.16519837, 0.030530507, 0.0019738604, -0.09898821, -0.10236442, -0.15473707, 0.1960111, 0.08083462, 0.1931143, 0.053789698, 0.063627414, -0.10000871, 0.1890801, -0.039166793, -0.035554815)); + target2 += mul(ne2, float4x4(-0.008138058, -0.090632096, 0.09218409, -0.1870409, 0.006966406, -0.036867052, -0.1109265, 0.15594107, -0.06334745, -0.025499493, 0.16426682, 0.024393357, 0.0060975226, 0.08250694, -0.022282967, -0.09879987)); + target2 += mul(nf2, float4x4(0.06807879, 0.127161, -0.20435798, -0.11276813, -0.035021268, -0.019755092, -0.17415504, 0.060618974, 0.12325889, -0.12290322, -0.05086793, 0.14947659, 0.023935383, -0.032783996, -0.029157335, -0.006670329)); + target2 += mul(ng2, float4x4(-0.14423427, 0.07715571, 0.06842541, -0.24895051, -0.06428334, -0.07863047, 0.23238844, 5.274231e-05, 0.048996497, 0.17647398, 0.413201, -0.31975266, -0.030216858, 0.04867342, -0.30262446, -0.15375552)); + target2 += mul(nh2, float4x4(0.23534048, 0.092139505, 0.012503786, 0.116008915, -0.0898572, -0.17778875, 0.16141091, 0.3644637, 0.043014687, -0.031378243, 0.11754703, -0.38509452, 0.1001422, 0.036844354, -0.0051652407, 0.036642574)); + target2 += mul(ni2, float4x4(0.08065526, -0.14093323, -0.027013494, -0.112644374, -0.019306205, -0.10695108, -0.21220952, -0.039872676, -0.09730943, -0.47728395, -0.28284085, -0.07133749, -0.04755162, -0.14241156, -0.01632541, -0.009647049)); + target2 += mul(na3, float4x4(0.07490686, -0.06242466, 0.15567005, -0.16337247, -0.2887383, 0.2881797, -0.121348776, 0.060069725, -0.03536951, -0.24556357, -0.35177758, -0.11175104, -0.0073047564, -0.06645475, 0.014323825, 0.058212377)); + target2 += mul(nb3, float4x4(0.03256386, -0.05097925, 0.27179804, -0.09543428, 0.161455, 0.023938831, 0.10773267, -0.10486564, 0.076764554, 0.06358945, -0.18258472, 0.08324786, 0.06467844, -0.20269682, 0.046431858, -0.08359799)); + target2 += mul(nc3, float4x4(-0.086718775, 0.029116197, -0.020623617, -0.010007143, -0.0062927944, 0.028177656, -0.07210879, 0.06786677, 0.023476062, 0.17860489, -0.06256401, 0.061757386, -0.046495005, -0.055532746, 0.15595034, 0.12336579)); + target2 += mul(nd3, float4x4(0.08569872, -0.03291618, 0.18875046, -0.080043204, 0.19672358, 0.0756269, 0.02688733, 0.16277955, -0.060868777, -0.037449554, 0.020366343, -0.28260133, 0.30251002, -0.08898951, 0.002503838, -0.031098645)); + target2 += mul(ne3, float4x4(0.09120409, -0.04983847, 0.07688438, 0.008763123, -0.09732479, 0.21332602, -0.13068666, -0.030675085, 0.31382635, 0.0012199014, -0.18128653, 0.30740625, -0.100602135, 0.08708379, 0.112137444, -0.03682313)); + target2 += mul(nf3, float4x4(0.0709511, -0.04224951, -0.05609049, -0.0006408909, -0.030565612, -0.012263292, -0.009747451, -0.07244236, 0.054749947, -0.01405017, 0.009567654, -0.074202195, -0.06860078, 0.13089342, -0.06874847, -0.03219275)); + target2 += mul(ng3, float4x4(0.1576853, -0.2683739, -0.025735255, -0.06460345, 0.075857066, -0.59675205, 0.11202596, 0.14385986, -0.06844365, -0.23115703, 0.12929395, -0.12881753, 0.009042129, 0.105781116, -0.055749435, -0.081277415)); + target2 += mul(nh3, float4x4(0.13527077, -0.03984972, 0.018804315, 0.12699783, -0.17789197, -0.30242765, 0.09397843, 0.090828404, -0.059823766, 0.044621762, 0.25259614, -0.19707985, -0.13368398, 0.20000716, -0.009788325, -0.20149179)); + target2 += mul(ni3, float4x4(-0.041884087, -0.059512906, -0.0896845, 0.06103581, 0.110947184, 0.10910047, -0.0047273464, 0.079314105, -0.121069044, 0.10926088, 0.13192393, 0.13567427, 0.109372094, 0.06015443, 0.100631915, -0.224153)); + target2 += float4(0.022030555, -0.05006568, 0.014002339, 0.023597209); + + float4 target3 = mul(a1, float4x4(-0.09202538, -0.081250995, 0.13399354, -0.09287109, 0.075870514, -0.046435528, 0.06888035, 0.07559372, 0.047911238, 0.1541559, 0.016089845, -0.020714905, 0.034469247, 0.09413617, -0.06726056, 0.04964387)); + target3 += mul(b1, float4x4(0.22596729, 0.02889021, -0.048012562, 0.14605793, -0.086510226, 0.09049988, -0.0024043226, 0.07370351, -0.02844908, 0.056516882, -0.12932102, -0.080092, -0.014557861, 0.2417015, 0.24414025, -0.08637478)); + target3 += mul(c1, float4x4(-0.08709868, -0.15894723, 0.051107977, -0.007953947, -0.005816434, 0.15406336, -0.08382943, 0.06931645, 0.10049424, -0.10653088, 0.2009932, 0.15972902, 0.02209797, -0.008090025, 0.058555678, 0.044184227)); + target3 += mul(d1, float4x4(-0.14687128, 0.08516212, -0.090116605, -0.053017177, -0.09254908, -0.043845087, -0.02666236, -0.12203544, -0.043807525, 0.14893356, -0.11529748, -0.06253818, -0.010695381, -0.10081673, -0.0314329, -0.044264063)); + target3 += mul(e1, float4x4(0.021610646, -0.16695172, -0.31326374, 0.05392923, 0.12519042, 0.12159836, -0.07893999, -0.10245254, 0.10427483, -0.042931017, -0.18065664, 0.01107328, 0.110220656, -0.06329314, -0.044132728, -0.004572783)); + target3 += mul(f1, float4x4(0.01665856, 0.121704906, -0.2353256, 0.16223833, 0.04024997, -0.01792505, 0.14950873, -0.06683434, 0.004776299, 0.011929818, 0.07254882, 0.03820532, 0.31055966, 0.08748786, 0.0073042163, 0.2684048)); + target3 += mul(g1, float4x4(-0.23074506, -0.06215829, 0.053791784, 0.22733828, -0.11443747, -0.15169612, 0.040388454, -0.007505497, 0.005672369, 0.0026797412, -0.001197972, 0.007488197, -0.0024618902, 0.10131061, -0.07500523, -0.013001146)); + target3 += mul(h1, float4x4(-0.0776098, -0.060467657, 0.063401155, -0.3178554, 0.046797205, -0.10740315, 0.02085142, 0.101416804, -0.1198098, -0.02295822, 0.039581314, -0.048711125, -0.06259446, -0.11206371, -0.0053890026, -0.070524804)); + target3 += mul(i1, float4x4(-0.12901165, 0.21051991, -0.1142095, 0.22749256, -0.023643937, -0.046942696, -0.060973406, -0.057919096, -0.22156318, -0.051061176, 0.0916328, 0.012217941, -0.17102586, -0.18390712, 0.006507473, -0.029991195)); + target3 += mul(a2, float4x4(-0.2522444, -0.03696223, -0.18561353, 0.13687257, 0.073648125, 0.13678576, 0.16931336, 0.00949838, -0.038437508, -0.059626862, 0.05821261, -0.07623236, -0.08685592, -0.17067757, 0.174131, -0.025060346)); + target3 += mul(b2, float4x4(0.104338415, -0.096368395, -0.029887693, 0.032492615, 0.041827764, 0.24553889, 0.099045165, 0.059192423, 0.023159435, -0.043454442, 0.10354106, 0.17867453, -0.1752651, 0.16507833, -0.09264873, 0.038281262)); + target3 += mul(c2, float4x4(0.06404952, 0.014349881, -0.08079635, -0.18684097, -0.021107968, 0.1474591, 0.02128032, 0.052345317, 0.19520657, -0.18109623, 0.12578261, 0.034501765, -0.1369868, -0.05843081, 0.16561405, -0.06775279)); + target3 += mul(d2, float4x4(0.08673276, 0.14922544, 0.12579706, 0.12474029, -0.06912261, -0.104719676, 0.27239847, -0.13122962, -0.05688415, 0.1428628, 0.00895786, -0.032757584, 0.019906566, -0.17429581, -0.10528849, 0.13250664)); + target3 += mul(e2, float4x4(0.1025883, 0.16903317, 0.24479683, 0.08272392, -0.12168113, 0.09135378, 0.06919754, -0.24658537, 0.014526622, 0.08442609, -0.30363482, -0.03433778, 0.037446275, 0.030086113, -0.07519447, -0.068841174)); + target3 += mul(f2, float4x4(0.024311058, -0.08233637, -0.16022089, -0.1597245, 0.050970588, -0.10577119, -0.1112992, -0.052199256, -0.0849103, -0.3776085, -0.21930903, -0.20542654, -0.01871536, 0.10911211, 0.07675561, -0.024964388)); + target3 += mul(g2, float4x4(0.12411877, -0.00519536, 0.0480481, -0.10641975, -0.0010129698, -0.049957395, 0.0066010677, -0.07925235, 0.1930976, 0.5361102, -0.056495357, -0.05665149, -0.1270014, 0.041294765, -0.15627688, 0.018746065)); + target3 += mul(h2, float4x4(0.13720295, 0.085025266, 0.05471863, 0.038614765, -0.06960719, 0.16281144, -0.21186842, -0.1941425, 0.095628515, 0.084828205, 0.02530074, 0.11415585, 0.10537103, -0.0586968, 0.019073522, -0.055825945)); + target3 += mul(i2, float4x4(-0.21141429, 0.01108361, -0.14758278, 0.08792016, -0.0016714301, -0.0030396983, -0.12766738, -0.08827425, -0.07848207, -0.13752016, 0.013766901, 0.09635439, -0.079080686, -0.14922711, 0.06670641, -0.080326416)); + target3 += mul(a3, float4x4(0.20643076, -0.00499668, 0.23666923, -0.17106888, 0.12709226, 0.00981184, 0.028967496, 0.016210513, 0.12393452, 0.0043048155, 0.05266705, -0.094970286, 0.005504978, -0.050391, 0.10117381, 0.09549521)); + target3 += mul(b3, float4x4(0.04931849, -0.0065390305, 0.08863048, -0.0947855, 0.15617795, -0.17475569, 0.10392811, 0.035971895, 0.03656791, -0.12339292, 0.010653483, 0.08514984, 0.15630373, 0.15763232, -0.012078789, -0.026336702)); + target3 += mul(c3, float4x4(0.13140163, 0.07304222, 0.03644733, 0.09648337, -0.017975705, -0.072331324, 0.0029975558, -0.021666657, -0.020042133, 0.044821594, 0.037660487, 0.09642576, 0.06416202, 0.014092053, -0.043693382, -0.051554378)); + target3 += mul(d3, float4x4(-0.23793697, -0.0014973939, -0.08946259, 0.067851745, -0.019646896, -0.19535433, 0.10289966, 0.0010244731, -0.20782173, 0.0020514326, -0.16879739, 0.17888409, -0.124513365, -0.07472942, -0.0588901, -0.2092017)); + target3 += mul(e3, float4x4(0.060483094, 0.059208773, 0.08345, 0.0010649676, -0.23659356, 0.3603475, 0.0053207604, -0.03345199, 0.020284697, -0.01113311, 0.11211144, 0.053414755, 0.1895607, -0.15760773, -0.23431808, 0.043709636)); + target3 += mul(f3, float4x4(0.080154695, -0.064768635, -0.12550141, -0.08824165, -0.07509624, -0.0713246, -0.22137038, 0.0921876, -0.025354594, -0.24898566, -0.028864942, -0.16679515, -0.08982522, 0.029950809, -0.06993633, 0.12565832)); + target3 += mul(g3, float4x4(-0.20841017, 0.06321075, -0.04099131, 0.07732559, -0.08110228, 0.20876545, -0.11388175, 0.27826598, -0.15344119, 0.09446656, 0.2735643, 0.079110265, -0.043845385, 0.029875547, 0.12783948, -0.10298459)); + target3 += mul(h3, float4x4(0.08580364, -0.08134692, -0.085382804, -0.09634259, -0.07509618, -0.12689087, 0.05720452, -0.1819075, 0.11217614, -0.16592574, -0.101749554, -0.018963661, 0.14723873, 0.12904182, -0.052782595, 0.05793788)); + target3 += mul(i3, float4x4(-0.0056530046, 0.05674741, 0.014994733, 0.11958239, 0.16446747, -0.049534798, -0.016570516, -0.21063349, -0.07496503, 0.0055008507, 0.11419655, 0.048011355, -0.04684853, 0.042691138, 0.09421025, 0.12923399)); + target3 += mul(na1, float4x4(-0.083864704, 0.07605092, -0.047560036, 0.16445905, -0.029962407, 0.18134072, -0.22724763, 0.023675185, -0.03332916, -0.04249084, 0.15973917, 0.007322849, -0.087714255, -0.153021, 0.030236037, -0.100231044)); + target3 += mul(nb1, float4x4(-0.17441258, -0.028744312, 0.05915575, -0.11824928, -0.04179886, -0.14449957, 0.04891911, -0.21351086, 0.3303812, 0.07433166, 0.503379, 0.2470829, 0.1322803, -0.04928455, -0.15583721, 0.106110215)); + target3 += mul(nc1, float4x4(-0.08065278, -0.00050983805, 0.027161239, 0.12555373, 0.017745659, 0.0479513, 0.10691591, -0.13202804, 0.38873398, 0.046141643, 0.07307728, 0.13692193, 0.18681903, 0.11005239, 0.15744549, 0.21892804)); + target3 += mul(nd1, float4x4(0.03978365, -0.023494922, -0.039753728, 0.27451408, 0.02140033, -0.013376269, 0.028383363, 0.059702866, -0.0071658283, -0.13848262, -0.1019017, -0.16829433, -0.018539641, 0.013991451, 0.099338084, -0.05775615)); + target3 += mul(ne1, float4x4(-0.065350726, 0.11001335, 0.11902446, -0.21104746, 0.095098086, 0.02739781, -0.26015705, 0.22157612, -0.15288728, 0.2722011, 0.27105704, -0.24145271, -0.051725585, 0.06605028, -0.012332871, -0.17540309)); + target3 += mul(nf1, float4x4(-0.2189158, -0.05287219, -0.04915249, -0.05357751, -0.12871711, -0.0061132344, -0.1406079, -0.18074436, -0.14702965, -0.22242828, 0.08177444, 0.3396842, -0.2632696, -0.06403873, -0.008123073, -0.030273361)); + target3 += mul(ng1, float4x4(0.11255844, -0.057998642, -0.07679987, 0.049385145, 0.13984528, -0.07007145, 0.11060764, 0.12331489, -0.05268373, -0.15397486, 0.054913905, -0.1393604, 0.020389834, -0.17137636, 0.067205, 0.084197655)); + target3 += mul(nh1, float4x4(0.27258077, -0.10924528, -0.1159478, 0.05647175, 0.13014089, 0.12746723, 0.0045503005, 0.07131271, 0.081193194, 0.018001271, -0.056847095, 0.19587554, -0.018607333, 0.1416207, -0.03856229, -0.0888815)); + target3 += mul(ni1, float4x4(0.0946241, 0.059010573, 0.013680293, -0.042248886, -0.2995221, -0.095081195, 0.06510416, 0.043059137, 0.10425443, -0.1222804, -0.16180466, -0.3628854, -0.01679748, 0.112195894, -0.004974211, -0.055885002)); + target3 += mul(na2, float4x4(0.11798436, 0.1390635, 0.142733, -0.16162498, 0.034902234, -0.13497733, 0.097894885, 0.10681201, -0.047284793, 0.015005336, -0.09031815, 0.12383599, -0.091548845, -0.013705567, 0.049403854, 0.18155518)); + target3 += mul(nb2, float4x4(0.1806166, 0.08396095, -0.17600271, -0.029499372, 0.17163202, 0.18944095, -0.1755662, -0.008431973, -0.057935216, 0.1584788, -0.059633583, -0.1950766, -0.03091734, -0.045874756, -0.0051801866, -0.20533004)); + target3 += mul(nc2, float4x4(0.004201836, -0.15968263, 0.015041736, 0.17407048, -0.03530788, 0.09062685, 0.050316375, -0.058444653, -0.12015508, 0.11712405, -0.031137828, -0.049205493, 0.05515115, 0.06733773, 0.03607973, 0.05056488)); + target3 += mul(nd2, float4x4(0.006330765, -0.17457847, -0.021863922, -0.16448942, 0.059458453, 0.1486118, -0.22728927, 0.0058831032, -0.00180954, -0.34799471, -0.017039202, 0.03939159, -0.033589013, 0.32948977, 0.087067194, -0.113632225)); + target3 += mul(ne2, float4x4(0.042377464, -0.030939378, -0.08917448, 0.2585585, -0.28696018, -0.04419827, 0.0057377038, 0.08444518, -0.009464956, -0.03967168, 0.05095106, -0.04785119, -0.05805417, -0.07269471, -0.18795604, -0.23612237)); + target3 += mul(nf2, float4x4(-0.026615486, 0.1219551, 0.17111751, 0.12014681, -0.10403522, 0.13139823, 0.28612077, -0.17874514, 0.030061528, 0.31433544, 0.16948178, 0.10126, 0.0582159, -0.13620348, -0.026327167, 0.11529438)); + target3 += mul(ng2, float4x4(-0.10999408, -0.1642254, -0.09659326, -0.085699454, 0.05962901, -0.07562989, 0.042366143, -0.1533413, -0.09869005, -0.21281542, 0.020441674, 0.17866766, -0.26933256, 0.049314983, 0.10039448, -0.13316467)); + target3 += mul(nh2, float4x4(-0.22610307, -0.0013520997, 0.16817398, 0.037943725, -0.067527935, -0.15105802, -0.0973126, -0.05843863, 0.19214404, 0.092337616, -0.024034662, -0.007926626, -0.32222804, 0.082673185, 0.069847725, 0.027493093)); + target3 += mul(ni2, float4x4(0.0014049035, -0.058899652, 0.060463455, -0.052001078, 0.19716045, 0.12879235, -0.026990427, 0.23919769, 0.0034248075, -0.0157977, -0.06720619, -0.013757762, -0.101808615, 0.029667001, 0.07381132, 0.092393965)); + target3 += mul(na3, float4x4(0.053514812, 0.14120969, -0.056737684, 0.017708244, -0.05407678, 0.103361025, -0.0924985, 0.053643283, -0.28559983, -0.12866977, -0.06750911, 0.027970003, 0.06481888, 0.06773354, -0.07627304, -0.07058017)); + target3 += mul(nb3, float4x4(0.10564813, 0.1891429, -0.085196435, 0.0073824013, 0.0039014777, 0.14679071, 0.09327677, -0.030248597, 0.18063113, -0.3115451, 0.06560229, -0.03190648, -0.1619295, -0.112393744, -0.10004008, 0.0023948452)); + target3 += mul(nc3, float4x4(-0.033827845, -0.12089327, 0.042195093, 0.025078757, -0.044261515, 0.09103579, -0.19070679, -0.1600237, 0.13683122, -0.072529055, 0.062436976, -0.29964364, -0.114442796, -0.047068417, -0.07223064, 0.05781626)); + target3 += mul(nd3, float4x4(-0.04086473, 0.029395554, 0.05157983, 0.013322953, -0.001428512, -0.103283875, 0.15795463, 0.21691218, 0.23493949, -0.18836173, 0.28818855, -0.07839693, -0.043874815, -0.011829423, 0.0825803, 0.18832965)); + target3 += mul(ne3, float4x4(0.087384604, 0.2075869, 0.012306303, -0.06356627, -0.019742407, -0.256092, -0.089735925, 0.026248232, -0.22160976, -0.4420786, 0.033200428, -0.1376953, -0.3315224, 0.17343274, 0.3179911, 0.012785637)); + target3 += mul(nf3, float4x4(-0.14358811, 0.052979786, 0.13841373, 0.07362653, 0.050186664, 0.11735455, 0.0032370305, -0.16536471, -0.005521641, 0.1040989, -0.07086791, 0.13729815, 0.0840539, 0.06547088, 0.22857827, -0.2079967)); + target3 += mul(ng3, float4x4(-0.11850976, -0.026047882, 0.00785038, -0.19955018, 0.040088244, -0.10139797, 0.08621738, -0.26192454, 0.3888625, 0.33236128, 0.1412189, 0.10097289, 0.07574426, -0.15459102, -0.1557534, 0.03405655)); + target3 += mul(nh3, float4x4(-0.15693793, -0.03326048, 0.110803954, 0.07044277, 0.1380442, -0.029729376, -0.26033366, 0.040598683, -0.23744181, 0.043091178, 0.18325818, 0.05989088, 0.099216335, -0.012825024, 0.20831011, -0.08420897)); + target3 += mul(ni3, float4x4(0.031240137, -0.034582928, 0.0022927374, -0.06525183, -0.15711913, -0.04604516, 0.0605175, 0.15128267, 0.072712876, -0.015489105, -0.20996843, -0.24177326, 0.053063773, -0.08747667, 0.24771367, 0.1244199)); + target3 += float4(0.07754665, -0.09230884, 0.019135362, 0.035482828); + + tex4[gxy] = target1; + tex5[gxy] = target2; + tex6[gxy] = target3; +} + +//!PASS 3 +//!DESC Conv-4x3x3x24 +//!IN tex4, tex5, tex6 +//!OUT tex1, tex2, tex3 +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 + +void Pass3(uint2 blockStart, uint3 threadId) { + uint2 gxy = Rmp8x8(threadId.x) + blockStart; + uint2 inputSize = GetInputSize(); + if (gxy.x >= inputSize.x || gxy.y >= inputSize.y) { + return; + } + + float2 inputPt = GetInputPt(); + float2 pos = (gxy + 0.5f) * inputPt; + + // [ a, d, g ] + // [ b, e, h ] + // [ c, f, i ] + float4 a1 = tex4.SampleLevel(sam, pos + float2(-inputPt.x, -inputPt.y), 0); + float4 b1 = tex4.SampleLevel(sam, pos + float2(-inputPt.x, 0), 0); + float4 c1 = tex4.SampleLevel(sam, pos + float2(-inputPt.x, inputPt.y), 0); + float4 d1 = tex4.SampleLevel(sam, pos + float2(0, -inputPt.y), 0); + float4 e1 = tex4.SampleLevel(sam, pos, 0); + float4 f1 = tex4.SampleLevel(sam, pos + float2(0, inputPt.y), 0); + float4 g1 = tex4.SampleLevel(sam, pos + float2(inputPt.x, -inputPt.y), 0); + float4 h1 = tex4.SampleLevel(sam, pos + float2(inputPt.x, 0), 0); + float4 i1 = tex4.SampleLevel(sam, pos + float2(inputPt.x, inputPt.y), 0); + + float4 na1 = max(-a1, 0); + float4 nb1 = max(-b1, 0); + float4 nc1 = max(-c1, 0); + float4 nd1 = max(-d1, 0); + float4 ne1 = max(-e1, 0); + float4 nf1 = max(-f1, 0); + float4 ng1 = max(-g1, 0); + float4 nh1 = max(-h1, 0); + float4 ni1 = max(-i1, 0); + + a1 = max(a1, 0); + b1 = max(b1, 0); + c1 = max(c1, 0); + d1 = max(d1, 0); + e1 = max(e1, 0); + f1 = max(f1, 0); + g1 = max(g1, 0); + h1 = max(h1, 0); + i1 = max(i1, 0); + + float4 a2 = tex5.SampleLevel(sam, pos + float2(-inputPt.x, -inputPt.y), 0); + float4 b2 = tex5.SampleLevel(sam, pos + float2(-inputPt.x, 0), 0); + float4 c2 = tex5.SampleLevel(sam, pos + float2(-inputPt.x, inputPt.y), 0); + float4 d2 = tex5.SampleLevel(sam, pos + float2(0, -inputPt.y), 0); + float4 e2 = tex5.SampleLevel(sam, pos, 0); + float4 f2 = tex5.SampleLevel(sam, pos + float2(0, inputPt.y), 0); + float4 g2 = tex5.SampleLevel(sam, pos + float2(inputPt.x, -inputPt.y), 0); + float4 h2 = tex5.SampleLevel(sam, pos + float2(inputPt.x, 0), 0); + float4 i2 = tex5.SampleLevel(sam, pos + float2(inputPt.x, inputPt.y), 0); + + float4 na2 = max(-a2, 0); + float4 nb2 = max(-b2, 0); + float4 nc2 = max(-c2, 0); + float4 nd2 = max(-d2, 0); + float4 ne2 = max(-e2, 0); + float4 nf2 = max(-f2, 0); + float4 ng2 = max(-g2, 0); + float4 nh2 = max(-h2, 0); + float4 ni2 = max(-i2, 0); + + a2 = max(a2, 0); + b2 = max(b2, 0); + c2 = max(c2, 0); + d2 = max(d2, 0); + e2 = max(e2, 0); + f2 = max(f2, 0); + g2 = max(g2, 0); + h2 = max(h2, 0); + i2 = max(i2, 0); + + float4 a3 = tex6.SampleLevel(sam, pos + float2(-inputPt.x, -inputPt.y), 0); + float4 b3 = tex6.SampleLevel(sam, pos + float2(-inputPt.x, 0), 0); + float4 c3 = tex6.SampleLevel(sam, pos + float2(-inputPt.x, inputPt.y), 0); + float4 d3 = tex6.SampleLevel(sam, pos + float2(0, -inputPt.y), 0); + float4 e3 = tex6.SampleLevel(sam, pos, 0); + float4 f3 = tex6.SampleLevel(sam, pos + float2(0, inputPt.y), 0); + float4 g3 = tex6.SampleLevel(sam, pos + float2(inputPt.x, -inputPt.y), 0); + float4 h3 = tex6.SampleLevel(sam, pos + float2(inputPt.x, 0), 0); + float4 i3 = tex6.SampleLevel(sam, pos + float2(inputPt.x, inputPt.y), 0); + + float4 na3 = max(-a3, 0); + float4 nb3 = max(-b3, 0); + float4 nc3 = max(-c3, 0); + float4 nd3 = max(-d3, 0); + float4 ne3 = max(-e3, 0); + float4 nf3 = max(-f3, 0); + float4 ng3 = max(-g3, 0); + float4 nh3 = max(-h3, 0); + float4 ni3 = max(-i3, 0); + + a3 = max(a3, 0); + b3 = max(b3, 0); + c3 = max(c3, 0); + d3 = max(d3, 0); + e3 = max(e3, 0); + f3 = max(f3, 0); + g3 = max(g3, 0); + h3 = max(h3, 0); + i3 = max(i3, 0); + + float4 target1 = mul(a1, float4x4(0.15677336, 0.18937011, -0.15614599, 0.15203404, 0.098624565, 0.023782162, -0.045496363, -0.014783688, 0.07303875, -0.075132, -0.019847363, -0.088889055, -0.11558432, -0.08860719, 0.16452459, -0.018188732)); + target1 += mul(b1, float4x4(0.026749048, -0.0376324, -0.0994071, -0.00093872234, 0.014682955, 0.008369919, -0.046362195, -0.21044572, -0.013911088, -0.117338374, 0.14585997, -0.11355687, 0.04094843, -0.11326298, 0.08555518, 0.076577775)); + target1 += mul(c1, float4x4(0.04918652, 0.10098061, -0.097193845, 0.011482707, -0.015221698, -0.06306758, 0.09985586, -0.0011515089, -0.09592504, 0.11805872, -0.053774815, 0.093555175, 0.11237289, -0.20694147, 0.255737, 0.0149322525)); + target1 += mul(d1, float4x4(0.06269537, -0.28116295, 0.1405942, 0.00218229, -0.012810465, 0.11574089, 0.060055815, -0.14248852, 0.03755387, 0.03748404, 0.04481931, 0.086039774, -0.0707909, -0.053917676, -0.009349141, -0.06623982)); + target1 += mul(e1, float4x4(-0.002837983, -0.0649247, -0.14890024, 0.0011222209, 0.12083026, -0.16136795, -0.04910086, 0.060653802, 0.020444075, 0.0024171378, 0.06839313, -0.21157807, -0.1678213, -0.27503422, 0.0063047423, 0.03292154)); + target1 += mul(f1, float4x4(0.14229529, -0.002042125, -0.022892606, 0.08743759, 0.035437252, -0.12997083, -0.1851374, 0.33951423, -0.037205234, 0.03710803, 0.018455725, -0.052581675, -0.16795224, -0.14008522, 0.011014682, 0.07038518)); + target1 += mul(g1, float4x4(0.105874196, -0.21320704, -0.08445409, 0.052140422, -0.13498448, -0.0737051, -0.027274717, -0.06932614, -0.017584193, -0.13111684, -0.049095873, 0.08269069, -0.017520722, -0.08716905, 0.25897968, -0.1412353)); + target1 += mul(h1, float4x4(-0.016677873, -0.024665434, -0.11711789, 0.16085778, 0.017375777, 0.15644072, 0.11040864, 0.23371918, 0.10210983, 0.0039968346, -0.007850634, -0.026810693, 0.08863099, 0.094195805, 0.10420045, -0.19671428)); + target1 += mul(i1, float4x4(-0.016842589, -0.15904509, -0.038347725, 0.1279937, -0.00045717083, 0.13132372, -0.13027431, -0.058826704, -0.0029436084, 0.008283112, 0.10262298, -0.05013397, -0.02922706, 0.14453132, 0.18946488, -0.0966266)); + target1 += mul(a2, float4x4(-0.00050655927, 0.2318558, 0.025141997, -0.058849655, 0.05127902, -0.056867033, -0.06191942, -0.028451841, 0.038166817, -0.14328304, 0.06050816, -0.12157533, 0.058556214, -0.13964172, 0.026282474, 0.03329027)); + target1 += mul(b2, float4x4(-0.06520211, 0.21877246, 0.017677024, -0.053116243, -0.018621214, -0.0063418522, -0.10306368, -0.07627847, -0.0035643768, -0.05579889, 0.07386847, -0.0084178485, 0.005625732, 0.10204069, -0.08501438, -0.013451101)); + target1 += mul(c2, float4x4(-0.067369066, 0.17327416, 0.062035594, -0.1340041, 0.10289677, -0.0868232, 0.023330351, -0.072417624, -0.12027732, 0.11592929, 0.05090798, -0.06895359, -0.04391116, 0.18919718, 0.064172365, -0.051173057)); + target1 += mul(d2, float4x4(-0.022913774, -0.021000199, -0.01890946, -0.079307556, -0.16522343, -0.3152304, -0.21007383, 0.01858985, 0.003152245, -0.009094366, -0.023845399, -0.06635666, 0.041294664, 0.12883614, -0.06389087, 0.005710572)); + target1 += mul(e2, float4x4(0.032583844, 0.16247992, 0.06764235, -0.2240413, -0.15760922, 0.20196813, 0.13201368, 0.106440805, -0.070570394, -0.19261852, 0.28010008, -0.0048360736, -0.14080645, -0.02105434, 0.023814693, -0.13861166)); + target1 += mul(f2, float4x4(0.071627796, 0.20605852, -0.2676727, -0.39509574, 0.22782667, 0.13424493, 0.08930976, 0.13314968, 0.045536704, -0.06271722, 0.01703984, 0.13352728, -0.07089344, 0.14776441, 0.11804898, -0.027061034)); + target1 += mul(g2, float4x4(-0.011638248, -0.016760292, 0.0593982, -0.100421235, 0.030956578, 0.13813019, 0.022237146, -0.091211095, 0.010232882, 0.0010010025, 0.16789608, -0.030847551, 0.027778173, -0.005418129, -0.16441783, 0.07580936)); + target1 += mul(h2, float4x4(0.08137598, -0.008976606, 0.00023393384, -0.19671111, -0.0068668523, 0.097364455, -0.0026000517, -0.11201763, 0.047109667, -0.043774106, 0.12344897, -0.13232613, 0.026984906, -0.13614078, 0.06604853, 0.10752554)); + target1 += mul(i2, float4x4(0.00047561026, 0.12248177, 0.05146918, -0.3956014, -0.12263068, 0.22729336, 0.03597535, 0.09500604, 0.06894016, 0.061162107, 0.13561803, -0.047466908, -0.0013999783, -0.068306796, -0.031758398, -0.046261873)); + target1 += mul(a3, float4x4(0.12310386, -0.046108138, -0.08357388, 0.02034243, 0.0024922634, 0.029359696, -0.04329755, -0.034257423, 0.08229037, -0.11810178, -0.1079754, 0.13327998, -0.09608102, -0.26294786, -0.056677792, -0.1958781)); + target1 += mul(b3, float4x4(0.007982684, 0.020604203, -0.12702446, -0.02264998, -0.034644246, -0.00025684707, 0.037761245, -0.0041598473, -0.047972955, 0.039201785, -0.016598722, -0.044081174, 0.11861525, 0.01239671, -0.12192053, 0.08865015)); + target1 += mul(c3, float4x4(-0.0018564354, -0.07618631, -0.09212719, 0.092056714, -0.16783315, 0.08645543, 0.24669226, -0.023520375, -0.04045034, -0.0023428998, -0.01612943, 0.014919031, 0.16028026, -0.020104371, -0.16949941, 0.18713622)); + target1 += mul(d3, float4x4(0.19490379, -0.07592651, -0.200843, 0.07704469, -0.02736559, -0.054601975, -0.07240532, -0.03120134, -0.038438305, -0.12783389, -0.057655185, -0.009752765, 0.07110615, 0.033978693, -0.023724876, 0.11998657)); + target1 += mul(e3, float4x4(0.18834178, 0.23053586, -0.14430945, 0.32287082, -0.32185385, -0.15306619, -0.1573794, 0.005030648, 0.06912159, 0.009656687, -0.20743106, 0.03814172, 0.104378454, -0.07221508, -0.11348173, -0.019581677)); + target1 += mul(f3, float4x4(-0.017694198, 0.028853144, 0.1263284, 0.1820403, -0.05317991, -0.057951134, -0.04575081, 0.05769411, -0.11807033, 0.06413361, 0.06063185, 0.19433405, 0.0032539407, 0.021501997, -0.14744627, -0.095206425)); + target1 += mul(g3, float4x4(-0.0463219, -0.13988416, 0.07200895, -0.13444373, -0.2447483, -0.024709478, -0.08591721, -0.09281996, -0.046719797, -0.11321926, -0.061532497, -0.0044461554, -0.03174407, -0.0056026108, 0.0056006387, 0.08828445)); + target1 += mul(h3, float4x4(0.060374547, 0.062058832, -0.0390557, -0.047456663, -0.2227052, -0.03193117, -0.025358196, 0.08565629, 0.03657194, 0.13427348, -0.09266081, 0.23655434, 0.024580589, 0.01999063, -0.038653534, -0.023600115)); + target1 += mul(i3, float4x4(-0.0522313, 0.079263784, 0.10858985, -0.031472187, 0.072964184, -0.065342486, -0.03705779, 0.12809205, 0.09141905, 0.042783994, -0.028724866, -0.08221137, 0.13597457, 0.029334683, -0.12261823, -0.0052482346)); + target1 += mul(na1, float4x4(0.018523648, -0.21706165, -0.14580801, 0.038885653, -0.030849187, -0.06640324, 0.0011639405, 0.097421385, -0.10876752, 0.14631185, 0.014579094, 0.13907033, 0.1310694, -0.1287285, 0.03553917, 0.025316685)); + target1 += mul(nb1, float4x4(0.22148734, 0.01278849, -0.1596892, 0.17187239, -0.04219283, -0.064526156, 0.011610614, -0.0094766095, 0.028804665, 0.16347663, -0.09309108, 0.07097134, -0.014338763, 0.051742412, 0.059907336, -0.17768253)); + target1 += mul(nc1, float4x4(-0.06295463, -0.118564956, -0.016017804, 0.050398786, -0.07136999, 0.25657415, -0.035830878, -0.084443375, 0.12151532, -0.089734256, -0.064030536, 0.048108097, -0.01340212, -0.16572993, -0.093480445, 0.088874646)); + target1 += mul(nd1, float4x4(-0.059600584, -0.0052702287, 0.029479535, 0.20121074, -0.07113247, 0.1561413, 0.25110185, -0.060266465, -0.34369025, 0.14528714, 0.060928173, 0.008688357, 0.034280702, -0.004796254, 0.15269074, 0.056567237)); + target1 += mul(ne1, float4x4(0.05273782, -0.10539872, -0.07192354, -0.083380386, 0.097994, -0.20134969, -0.5062206, 0.30952695, -0.041553877, -0.055801403, -0.037597038, -0.13394146, 0.027271803, 0.17738731, 0.3336375, -0.0035211574)); + target1 += mul(nf1, float4x4(0.009962762, 0.11503034, 0.027571376, -0.018972939, 0.057955634, -0.039739445, -0.0676937, 0.09477686, 0.17910802, -0.28064108, -0.12184129, -0.028407406, 0.056930028, 0.024252843, 0.08959171, -0.027298026)); + target1 += mul(ng1, float4x4(-0.010729545, -0.048747167, 0.03880723, -0.006755044, -0.011909068, 0.008659933, 0.0800407, -0.040333465, -0.25750905, 0.29087406, 0.04864783, 0.118413374, -0.03514928, -0.17206238, 0.2095635, 0.039926212)); + target1 += mul(nh1, float4x4(0.0073815766, -0.030507097, 0.13367772, 0.04863103, -0.067190245, 0.039960794, -0.013012274, 0.15617093, -0.33983988, -0.05671963, 0.22061184, -0.03684452, 0.06304772, -0.08322253, 0.1117871, -0.2006011)); + target1 += mul(ni1, float4x4(0.119437724, -0.009319272, -0.07218167, -0.20269917, 0.10248017, -0.009564983, -0.016272334, -0.042979773, 0.11264571, -0.15697405, 0.015802475, 0.11154868, -0.073011585, -0.07225136, 0.15061282, 0.027214698)); + target1 += mul(na2, float4x4(0.03921657, -0.0154446345, -0.01855873, -0.15813923, 0.11489257, -0.10245685, 0.090572976, -0.072605945, -0.069270656, 0.05171411, 0.045471992, -0.028802622, -0.19419885, 0.18310049, 0.06882923, -0.0005851153)); + target1 += mul(nb2, float4x4(0.04575681, -0.020910552, 0.051311508, -0.0004904971, 0.04239284, 0.024153773, 0.030940467, -0.107036866, -0.099398546, 0.30524835, 0.03902779, -0.05217122, 0.14969619, 0.084496036, -0.14226931, -0.07463564)); + target1 += mul(nc2, float4x4(0.05297294, 0.15384737, -0.0069473814, 0.055046722, 0.11697162, 0.2424236, 0.021053674, -0.004738062, 0.014129249, -0.2909751, -0.048418947, 0.014277387, 0.053296436, -0.12475984, 0.07531274, -0.022512587)); + target1 += mul(nd2, float4x4(-0.04752641, 0.0006545224, -0.00589135, -0.026285272, -0.043745905, 0.24044664, 0.027723765, -0.023630425, 0.00869218, 0.028710615, -0.013863237, 0.0809765, 0.06708566, 0.013517718, 0.0012386752, -0.022743834)); + target1 += mul(ne2, float4x4(-0.12600644, 0.0116939265, 0.0491542, 0.06871389, -0.2096317, 0.050711762, -0.0455067, -0.11994795, -0.05030036, 0.20621927, 0.10951404, -0.05465143, 0.09614336, -0.22954291, 0.15239881, 0.04559428)); + target1 += mul(nf2, float4x4(0.020940155, 0.16499193, 0.17525958, -0.051628407, -0.3068143, -0.14576466, 0.00672593, -0.1308778, 0.00072586804, -0.067010164, -0.093788825, 0.005219908, -0.020126363, -0.083521724, -0.0650657, 0.01836861)); + target1 += mul(ng2, float4x4(0.072675996, 0.10010303, -0.1263988, -0.13888146, 0.13648619, 0.09535094, -0.0038582503, 0.10240531, -0.0014882578, -0.21053605, 0.16676606, -0.024605514, -0.06614438, 0.09575527, 0.116414934, -0.18538997)); + target1 += mul(nh2, float4x4(-0.013467567, 0.11274834, 0.07675635, -0.054812886, -0.024862224, 0.044424616, -0.12858495, -0.120611496, -0.1295857, -0.029304063, -0.06629468, -0.22211547, 0.12577437, -0.015624684, -0.10307795, 0.09404936)); + target1 += mul(ni2, float4x4(0.11430831, 0.11486887, -0.06219608, -0.018371167, 0.091516815, 0.0041821343, -0.043150745, -0.11775014, 0.07794832, -0.01944774, -0.031383686, 0.077408955, -0.124252975, 0.062118705, 0.009199536, 0.06538969)); + target1 += mul(na3, float4x4(0.22154011, -0.098727904, -0.08378975, -0.04167056, 0.019208338, -0.02245709, 0.13298267, -0.104098395, 0.053671844, 0.12845491, -0.003814564, 0.0665341, -0.07084713, 0.26803628, 0.09472736, 0.16825765)); + target1 += mul(nb3, float4x4(-0.21349828, -0.14917591, 0.12592548, -0.12721801, 0.086323306, -0.15409322, 0.07365807, 0.00620922, -0.0280901, 0.0957864, 0.10711525, 0.1165179, -0.08383744, 0.14757137, 0.024865197, -0.17536579)); + target1 += mul(nc3, float4x4(-0.044920437, -0.00016428503, 0.035227478, -0.026525848, -0.17628764, 0.044141084, 0.025941433, 0.18698089, 0.0069334265, 0.097304195, -0.08945912, -0.007168394, -0.054236215, -0.2604089, -0.14738831, -0.074961744)); + target1 += mul(nd3, float4x4(-0.043119818, -0.012245711, 0.030121213, -0.0032237277, -0.033457555, 0.052158665, 0.046546284, -0.0047129868, -0.08133807, 0.037123546, 0.08634659, 0.120436855, -0.02609943, 0.11368193, -0.06750012, 0.0007624448)); + target1 += mul(ne3, float4x4(-0.20511842, 0.1999221, 0.099944666, -0.14691514, 0.012555328, -0.22190604, 0.12456348, 0.05391116, 0.031001683, -0.33920962, 0.13921735, 0.101068705, 0.28788915, 0.13809694, -0.10081831, -0.05679542)); + target1 += mul(nf3, float4x4(-0.019705083, 0.08693377, 0.06884471, 0.032386675, 0.10256849, 0.22142375, 0.07398588, 0.03336761, 0.19134827, 0.12654771, -0.39008364, -0.29602188, -0.04149512, 0.018968705, 0.080247656, 0.0480814)); + target1 += mul(ng3, float4x4(0.09539717, -0.10946926, -0.048939522, 0.030059233, -0.17243776, 0.021580435, 0.15642153, -0.10282692, -0.020257011, 0.060849674, 0.040640093, 0.05628088, -0.11358645, -0.16440971, 0.1787329, -0.02685428)); + target1 += mul(nh3, float4x4(0.14034219, 0.21827984, -0.16170599, 0.03681219, -0.051667843, 0.019152328, 0.033406716, -0.025032328, 0.13413768, -0.09349573, 0.10037219, -0.0105256345, -0.17372406, -0.07619186, 0.068273135, 0.088958755)); + target1 += mul(ni3, float4x4(-0.015460073, -0.04781314, -0.008159705, 0.117226824, -0.20293492, 0.019126927, 0.1074034, -0.10307512, 0.1356002, 0.108166546, -0.1275016, -0.023100886, -0.09334954, -0.14509954, 0.1668647, 0.13371155)); + target1 += float4(0.004647682, -0.04675001, -0.041206088, 0.07870823); + + float4 target2 = mul(a1, float4x4(-0.0116784945, -0.25090152, -0.17868316, 0.036498535, 0.015182224, 0.2023079, 0.011117134, 0.15237965, -0.015316299, 0.088544175, -0.06572522, -0.08057326, -0.22271864, -0.30610234, -0.12208543, -0.22944431)); + target2 += mul(b1, float4x4(-0.11143165, -0.077543005, -0.061455075, -0.037597977, -0.0023224957, -0.10979736, -0.034990564, -0.008420816, -0.094636045, -0.030254573, -0.06455877, -0.020989688, 0.018324712, -0.3669934, -0.350233, 0.037510827)); + target2 += mul(c1, float4x4(0.104956195, 0.015602951, -0.051957965, 0.13510315, 0.010418954, -0.054195777, 0.018231759, 0.045083612, 0.09856977, -0.10220956, -0.029939203, 0.01315078, -0.29208857, 0.0017958464, 0.08760539, -0.09646556)); + target2 += mul(d1, float4x4(0.046938017, 0.08242743, 0.13486576, -0.087577604, 0.1157099, 0.101392664, 0.14847688, 0.037801757, 0.018798033, -0.25906846, 0.097656235, -0.009259822, 0.10044328, 0.33434513, -0.15681681, -0.07497045)); + target2 += mul(e1, float4x4(0.113606565, 0.15215175, 0.056206945, 0.03135906, -0.06457102, 0.028175417, -0.06261949, -0.015601963, -0.048961632, 0.07163545, 0.0147160115, 0.037389677, 0.092339285, 0.26372424, 0.1122662, -0.058904216)); + target2 += mul(f1, float4x4(-0.21457312, 0.1408831, -0.08713026, -0.06950515, 0.006483218, 0.028784987, -0.02613041, -0.06227427, 0.024932534, -0.02103815, 0.080908604, 0.078669965, 0.19956729, -0.035375793, -0.046653055, 0.07523847)); + target2 += mul(g1, float4x4(-0.11979529, -0.15300119, -0.06692378, 0.0982862, -0.05148871, -0.16330053, -0.045053672, 0.022939514, -0.013373179, 0.38319084, 0.11172139, -0.07044107, 0.09208871, -0.07254955, -0.03284103, 0.05421524)); + target2 += mul(h1, float4x4(-0.09024579, 0.022398917, -0.084611446, 0.1254619, -0.0028836168, -0.092541836, -0.06697658, -0.09709128, 0.10234711, -0.1247404, 0.031691026, 0.0087786, -0.09046125, 0.059536055, 0.2076767, 0.15046969)); + target2 += mul(i1, float4x4(-0.18603326, 0.0022851937, -0.10218833, 0.18102962, 0.030617537, -0.005931309, -0.06299933, -0.13356128, -0.03377612, -0.009710565, -0.10352098, -0.20960933, 0.10586698, 0.018833099, 0.16208176, -0.048466753)); + target2 += mul(a2, float4x4(-0.004165509, -0.112526424, -0.1481008, -0.09386717, 0.017359056, -0.16117403, 0.065114655, 0.15273894, 0.0850914, -0.6033039, -0.102531776, -0.09553129, 0.06812466, -0.17199127, 0.009345428, -0.117129266)); + target2 += mul(b2, float4x4(0.19360402, -0.2172338, -0.025270093, 0.041762922, -0.06813442, -0.1315374, -0.03864256, -0.083543435, -0.14600715, -0.10248121, -0.039856248, 0.034162194, -0.06736031, 0.07872902, -0.06577812, -0.07003804)); + target2 += mul(c2, float4x4(0.2596632, -0.06779467, -0.061247632, 0.09280383, 0.15697475, -0.06379218, 0.117600165, 0.19564915, -0.043823496, 0.2113048, 0.1236739, 0.05126704, 0.0036669953, 0.059754487, -0.031676155, 0.07585315)); + target2 += mul(d2, float4x4(0.2750924, 0.07154958, -0.043717247, 0.11531165, 0.07236982, 0.112787254, 0.024018776, -0.0073595895, 0.037517104, -0.06963889, -0.13254988, -0.1347438, 0.08744426, 0.036659624, -0.010376286, -0.0011054546)); + target2 += mul(e2, float4x4(0.21909392, -0.15014122, -0.1724268, -0.11459151, 0.07886104, -0.039391857, -0.086656936, -0.18109863, 0.13549148, 0.24947289, -0.11073447, -0.012388639, -0.06299071, 0.094953805, -0.018513478, 0.11858225)); + target2 += mul(f2, float4x4(0.14019133, 0.289657, -0.13005698, 0.08418524, -0.15852125, 0.2049765, -0.18946235, -0.03330375, 0.057983503, 0.17226145, -0.16830897, -0.047264732, 0.027640691, -0.010081246, 0.14454436, 0.081710726)); + target2 += mul(g2, float4x4(0.1674246, 0.28778687, 0.19290589, 0.086525135, 0.09838388, 0.1437797, 0.18871532, -0.31380877, -0.13105413, -0.15920939, -0.049839422, 0.025027066, -0.042670842, -0.07288023, -0.03385935, 0.03853164)); + target2 += mul(h2, float4x4(0.26396382, -0.09383774, 0.10738164, 0.058519054, 0.01883401, 0.023963995, -0.09510717, 0.25038752, 0.004994643, 0.26613802, 0.11163109, -0.09746982, -0.08012294, 0.092731714, 0.024274494, 0.040725235)); + target2 += mul(i2, float4x4(0.024282128, 0.07086445, 0.04124535, -0.04565769, -0.043728314, -0.15438943, 0.06610379, 0.07666126, -0.046235953, 0.04901646, -0.045347054, -0.0908177, 0.03715751, -0.09512116, 0.024934331, 0.019330366)); + target2 += mul(a3, float4x4(-0.0610446, -0.00039494174, 0.11040924, 0.09711379, -0.033694427, 0.042628422, 0.04497454, -0.08639888, -0.006714255, -0.1956921, -0.07696048, -0.1440855, -0.036684107, 0.08872227, -0.014518533, -0.081829615)); + target2 += mul(b3, float4x4(0.03242377, 0.2742694, 0.15646815, 0.12491848, -0.097658925, 0.04652564, -0.20971832, -0.22238888, -0.045453016, -0.10306553, -0.14868681, -0.03697577, 0.037367497, 0.106009305, 0.0006840817, -0.06331295)); + target2 += mul(c3, float4x4(-0.09252423, -0.260707, 0.060529877, 0.1422387, 0.13040084, 0.060533516, -0.15988415, 0.093058884, -0.063219644, 0.16596133, -0.0858158, 0.0010563346, -0.05912638, -0.14902595, -0.0055698613, -0.19287406)); + target2 += mul(d3, float4x4(0.050616026, 0.027293183, 0.1349355, 0.06430556, -0.0017233352, 0.05913591, 0.111860454, 0.05829484, -0.036098555, 0.065207146, -0.049812254, -0.14549483, -0.12424656, 0.1472102, 0.031858474, 0.017159335)); + target2 += mul(e3, float4x4(0.018377563, 0.13093959, 0.15379103, 0.12314944, 0.040771928, -0.066829674, -0.05734121, 0.105038896, 0.29102528, -0.015173645, -0.004220056, -0.13141808, -0.20211789, 0.16278313, 0.09339586, -0.06485214)); + target2 += mul(f3, float4x4(-0.000521399, -0.3693901, 0.17483166, 0.16742888, -0.06343791, 0.042411476, 0.13772172, 0.064281724, -0.034507953, 0.03691756, 0.13490774, 0.10946845, 0.12370677, -0.03205938, -0.02645649, -0.15375873)); + target2 += mul(g3, float4x4(0.023370143, 0.11848177, 0.005112462, 0.026092546, 0.034971926, -0.08103188, -0.20400497, 0.06226299, -0.060475063, 0.035214186, -0.13627078, 0.045491677, -0.07321337, -0.10956125, 0.056908336, -0.0032308386)); + target2 += mul(h3, float4x4(0.076967224, 0.117254384, 0.03186256, 0.2218116, 0.05217254, -0.13943173, 0.058474854, 0.13177274, -0.019476373, 0.14138101, -0.012791203, 0.12705484, -0.013589421, -0.10622012, -0.0021916716, 0.015177393)); + target2 += mul(i3, float4x4(-0.061352234, -0.032728117, -0.16315818, 0.08222588, 0.013996033, 0.057500184, -0.11674498, -0.10170402, -0.03012877, -0.14447689, 0.032117244, 0.11841102, -0.0070680035, -0.15353645, 0.14097273, -0.12609388)); + target2 += mul(na1, float4x4(-0.1366668, 0.022588843, -0.06960645, -0.019482136, 0.008831277, 0.005849642, -0.042811397, -0.10104664, -0.21647254, -0.055100426, 0.10582604, 0.091224626, 0.16348936, -0.04480947, -0.08394584, 0.14027816)); + target2 += mul(nb1, float4x4(-0.05215042, -0.22153285, -0.07402603, -0.1395589, -0.26351386, 0.060670085, 0.12676051, 0.0018233472, 0.09564221, -0.14353068, 0.23205271, -0.026433198, -0.04914892, 0.09260728, 0.016136972, -0.037016835)); + target2 += mul(nc1, float4x4(-0.09228144, 0.028619789, -0.011197684, 0.043782573, 0.061469227, 0.019487167, 0.046048775, -0.060745444, -0.24178508, -0.11117579, 0.1313642, -0.20273723, 0.081280276, -0.015113924, -0.008701512, 0.038079187)); + target2 += mul(nd1, float4x4(-0.092076614, -0.14906341, -0.013150191, -0.1445046, 0.023577487, -0.088496424, -0.03039066, -0.028063597, 0.033408202, 0.105900854, -0.098281376, 0.09988187, -0.04934366, 0.1647861, 0.15974896, 0.0484809)); + target2 += mul(ne1, float4x4(0.043313354, -0.079856, -0.29574707, -0.23970212, -0.23463657, -0.061711017, -0.12481534, 0.21037807, -0.010700073, 0.14672308, 0.15071099, -0.03755617, 0.072450034, 0.083081506, -0.001196162, -0.055120632)); + target2 += mul(nf1, float4x4(0.20737736, 0.008907195, -0.11623631, -0.038137514, 0.037122898, -0.10322798, -0.065684326, -0.010471773, -0.12765402, -0.117699586, -0.012870391, 0.071912766, -0.03260932, 0.12864828, -0.035069928, -0.08712889)); + target2 += mul(ng1, float4x4(-0.05578123, 0.056912176, 0.01512389, -0.14807466, -0.012101421, 0.10860546, 0.034598228, 0.07160875, 0.15761101, -0.4777804, -0.24159615, -0.006523453, -0.28167522, -0.14714232, -0.1693888, -0.111417554)); + target2 += mul(nh1, float4x4(0.25981572, 0.1301148, -0.01769167, -0.10818973, 0.16135831, 0.024396034, -0.06722463, -0.032221332, -0.12383674, 0.038760092, 0.052030306, 0.077312715, -0.007761604, -0.12031171, 0.018808518, -0.103885494)); + target2 += mul(ni1, float4x4(0.048577465, 0.025990447, -0.07106119, 0.15832591, 0.019197416, 0.044232063, -0.030652093, 0.011447957, 0.18041368, -0.28076535, 0.022676598, -0.15350787, -0.1514482, -0.2362105, 0.14161605, 0.030001758)); + target2 += mul(na2, float4x4(0.2541123, 0.050012548, 0.1707829, 0.025630053, 0.078972176, 0.17645672, -0.020095231, 0.03378738, -0.1328695, 0.04409738, -0.23381121, -0.013347802, -0.049448222, 0.07035769, 0.105488785, 0.08659344)); + target2 += mul(nb2, float4x4(0.10455444, 0.28242826, 0.16516706, -0.046555575, 0.13230863, 0.07463435, -0.14748469, 0.11881527, 0.2279376, 0.14795774, 0.21520549, -0.05650647, 0.11728158, 0.048864357, 0.040869843, 0.1442246)); + target2 += mul(nc2, float4x4(0.21203394, -0.06565692, 0.03824069, 0.011281014, -0.033808656, 0.12499576, -0.13186213, -0.043884885, 0.017813649, 0.18413112, 0.046354674, -0.05213395, -0.051737677, -0.07141214, 0.03402196, -0.06220277)); + target2 += mul(nd2, float4x4(0.05735565, -0.12864622, 0.051514987, 0.03940558, -0.08701596, -0.1948226, 0.034218855, -0.03742723, 0.15607446, 0.0327541, 0.04040029, 0.0028771486, -0.08412264, -0.016660625, -0.058885157, 0.09373861)); + target2 += mul(ne2, float4x4(0.069591254, 0.018901952, 0.008289076, 0.08653302, -0.009072406, -0.11095817, 0.20987292, 0.016384758, 0.05693833, -0.118542574, 0.11310585, 0.073924355, 0.10250452, -0.043420166, -0.07558694, -0.10898524)); + target2 += mul(nf2, float4x4(-0.030319573, -0.3339516, -0.0689396, 0.01270701, 0.2504168, -0.08088952, 0.048351087, 0.013527536, -0.04373888, -0.27049688, 0.052563794, 0.010002367, 0.038096514, 0.0740455, -0.17847466, -0.1106183)); + target2 += mul(ng2, float4x4(-0.041473575, 0.036192052, -0.20958827, 0.09255741, 0.043088675, -0.07332803, -0.1566315, 0.19757885, 0.04752265, 0.14642613, 0.021630943, -0.105035484, 0.015669389, 0.015701298, 0.124771506, 0.028875854)); + target2 += mul(nh2, float4x4(-0.0017878636, 0.06815434, 0.03952396, 0.0008930589, 0.10052908, -0.010179957, 0.090537265, 0.26063922, -0.027913721, -0.27610707, -0.0935186, 0.103001356, -0.013015698, -0.13290603, -0.036786307, -0.120041944)); + target2 += mul(ni2, float4x4(0.008112194, 0.101246096, 0.10216113, 0.012162128, 0.16638301, 0.03442679, -0.013482703, 0.22639573, -0.106342115, 0.16007386, 0.1562559, 0.031520694, -0.04781568, 0.061812893, 0.063238494, -0.112484284)); + target2 += mul(na3, float4x4(-0.07636386, 0.02620731, -0.04784259, -0.0068134456, -0.098476306, -0.25026417, -0.26229498, 0.07999187, 0.08054805, -0.13999973, 0.038135037, -0.017274393, -0.07507948, -0.19170132, -0.111937724, -0.07482982)); + target2 += mul(nb3, float4x4(-0.102867655, 0.041831665, -0.26580745, 0.072875075, 0.122495115, -0.24738726, 0.01103763, 0.010455935, 0.10415628, 0.071636476, 0.24413374, 0.036024485, -0.14325532, -0.028743692, 0.09872556, 0.019074876)); + target2 += mul(nc3, float4x4(-0.08356808, 0.031134086, -0.0018714333, 0.052166995, -0.050258227, 0.015659487, -0.010771479, -0.094513185, 0.120308846, -0.16520835, 0.24742663, 0.0097768335, -0.26430902, 0.00096495246, -0.010277926, -0.03203841)); + target2 += mul(nd3, float4x4(-0.08886612, 0.045868922, -0.23351108, -0.11945227, -0.08114231, 0.1866038, -0.014666174, 0.10560594, 0.23003237, -0.031111564, 0.08909732, -0.004926665, 0.14808343, 0.012070922, 0.26077467, -0.13846008)); + target2 += mul(ne3, float4x4(0.02067818, 0.010505095, 0.1236986, 0.004310499, -0.23058774, 0.4539795, -0.1107521, 0.2687594, -0.088774115, 0.08556259, -0.28480148, 0.16472621, 0.22381066, 0.04922506, 0.03720699, -0.019955777)); + target2 += mul(nf3, float4x4(0.02878623, 0.08478639, 0.2798358, 0.08889886, 0.094446555, 0.022878725, 0.04060367, 0.008747965, 0.074154414, -0.36745515, -0.22710432, -0.17041051, 0.16977836, 0.18033457, -0.1422643, -0.06097858)); + target2 += mul(ng3, float4x4(-0.1882957, 0.07039768, 0.012633585, 0.0782871, 0.03383675, -0.07504364, -0.006248557, -0.0551174, 0.075581536, 0.091343425, 0.07753647, -0.0019186279, -0.016886314, 0.16758795, -0.060557626, -0.16569303)); + target2 += mul(nh3, float4x4(-0.13320294, -0.055567943, 0.05735829, 0.12787667, 0.04922832, -0.012577599, -0.13878204, -0.014323274, 0.06648109, -0.026210563, 0.019616883, -0.27789673, 0.051355522, -0.13060455, 0.039109703, 0.036932684)); + target2 += mul(ni3, float4x4(-0.10139845, -0.22758122, 0.044597298, 0.07907936, -0.025654264, -0.10633203, 0.04071873, 0.22363085, 0.12398309, 0.36964926, 0.21903247, -0.3217227, 0.030226095, 0.07867376, 0.045920413, 0.102684624)); + target2 += float4(-0.06939391, 0.017302405, 0.023963664, -0.011060264); + + float4 target3 = mul(a1, float4x4(-0.12172707, 0.08510432, 0.016999101, -0.03837886, -0.071940385, -0.028869554, -0.073142946, -0.018426571, -0.16583674, 0.02999741, -0.045404267, 0.07544135, -0.015742308, 0.051709145, 0.07165505, 0.15298915)); + target3 += mul(b1, float4x4(-0.18608806, -0.08503095, -0.05690552, 0.20230335, 0.03255425, -0.07374758, 0.02050966, -0.0322938, 0.029025763, 0.045261286, 0.040862788, 0.0007141505, -0.040648397, -0.09871272, 0.06639088, -0.10357326)); + target3 += mul(c1, float4x4(0.1160622, -0.021342635, -0.039825406, -0.19480887, 0.13462403, -0.06567422, 0.04279539, -0.012501501, -0.06882412, 0.24730788, -0.11261373, 0.15826169, -0.1942516, -0.011018759, -0.006282914, 0.15791936)); + target3 += mul(d1, float4x4(-0.24771467, -0.029817501, -0.0072410326, 0.0049591805, 0.002406374, 0.06705227, 0.0746882, -0.021962378, 0.02235974, -0.09111428, 0.046035543, -0.05091351, 0.12882613, -0.0052345973, 0.20476472, -0.035007346)); + target3 += mul(e1, float4x4(0.07206948, 0.007837054, 0.004716684, 0.032783184, -0.1640229, 0.09656901, -0.024538686, -0.13850725, 0.0020381159, -0.119971916, -0.03598378, 0.098396435, 0.11248338, 0.013638009, -0.13411912, -0.091735974)); + target3 += mul(f1, float4x4(0.012680958, 0.0073848446, -0.15104567, -0.086190425, 0.017306415, -0.12165865, -0.030102974, -0.06412363, -0.048320986, 0.066044435, -0.037102707, -0.05550032, -0.022057295, -0.016380537, -0.023064991, 0.04324733)); + target3 += mul(g1, float4x4(0.014645644, 0.029250145, -0.19020447, 0.06094981, 0.06021305, 0.033002753, -0.08270684, -0.13078806, -0.078915745, 0.03234919, 0.0033177685, 0.025673114, -0.10040817, -0.11726593, 0.26478398, -0.021515043)); + target3 += mul(h1, float4x4(-0.03930199, -0.007856709, -0.010699159, -0.03138408, -0.25258276, -0.051078923, -0.17284779, 0.115362965, 0.20981595, -0.12642711, -0.07527823, -0.21674243, -0.05171349, -0.032929346, -0.11959963, 0.021577986)); + target3 += mul(i1, float4x4(-0.12679584, -0.00971076, -0.2065375, -0.10207124, 0.1189984, 0.13061368, 0.048184898, 0.009846873, 0.08049477, -0.052818604, 0.024915429, -0.089877605, 0.028596658, -0.049394336, 0.15412825, -0.25427133)); + target3 += mul(a2, float4x4(-0.042340282, 0.15739791, -0.0058195787, 0.11638454, -0.29605922, 0.04940588, -0.12277728, 0.06556332, -0.15141304, -0.007342225, -0.015176599, 0.19668026, -0.029852653, 0.1131092, 0.06274694, 0.19488528)); + target3 += mul(b2, float4x4(0.17317021, 0.12034029, 0.023154281, -0.035767153, 0.023895182, 0.08562897, 0.010849429, 0.15511833, -0.071655706, 0.06762927, 0.110938646, -0.11194944, 0.088547744, 0.01826857, 0.10635028, 0.00079735904)); + target3 += mul(c2, float4x4(0.1724684, 0.072277844, -0.07157608, 0.014533819, 0.21083286, -0.10260293, -0.042641845, -0.022131564, 0.15609416, -0.012785209, 0.1689822, 0.08156936, -0.05814626, 0.12873544, 0.013016528, 0.07162671)); + target3 += mul(d2, float4x4(0.10265145, -0.15034834, -0.020390334, 0.051008113, 0.13483785, -0.036995072, 0.10197256, 0.07332627, 0.24034818, 0.041877862, 0.101294585, -0.038894523, -0.036132984, -0.09265928, -0.056219723, -0.02888855)); + target3 += mul(e2, float4x4(0.2652024, -0.01230703, 0.23594856, 0.0742723, 0.09739247, 0.0483161, 0.023852533, 0.17482124, -0.09551598, 0.07907358, 0.09280555, 0.27893403, -0.016893778, -0.15504459, 0.07111864, 0.17860727)); + target3 += mul(f2, float4x4(0.009993413, -0.034769267, 0.06733924, -0.026964549, 0.30227652, 0.0139632225, 0.049200308, -0.07578955, 0.061411507, 0.1924837, -0.008919774, -0.02543576, 0.08537961, 0.01291466, 0.07587885, -0.19892685)); + target3 += mul(g2, float4x4(0.079757795, -0.021056721, -0.119849935, -0.1829519, 0.25801504, 0.08255822, 0.09422877, -0.26859275, -0.17237917, 0.030880162, -0.073090166, 0.045552216, -0.15178613, 0.046667624, 0.05506945, 0.120318785)); + target3 += mul(h2, float4x4(0.13899504, 0.2106589, 0.09166694, -0.06926149, 0.13418478, 0.017007234, 0.027100448, -0.062565625, -0.021934774, 0.067251615, -0.10328445, 0.033577222, -0.050557505, -0.035202354, -0.062489368, -0.02470738)); + target3 += mul(i2, float4x4(0.15340589, 0.11806747, 0.20874004, 0.048173226, -0.05472843, 0.084544346, -0.043854542, -0.07571899, 0.036645986, 0.05016359, -0.074323095, -0.2529282, 0.13572234, -0.008771343, 0.11274458, 0.18037859)); + target3 += mul(a3, float4x4(0.021645557, 0.08299124, -0.051362146, 0.09342637, 0.0665058, 0.09216755, -0.0164684, 0.07281118, -0.0053016874, 0.032470454, 0.004089323, 0.009884544, -0.0046753073, -0.037279285, 0.12613527, 0.022236153)); + target3 += mul(b3, float4x4(-0.06745298, -0.15038055, 0.11176774, -0.06209666, 0.017843692, 0.09113945, 0.10990877, -0.021071523, -0.111020654, 0.066645324, 0.04690986, -0.011084726, -0.15171939, 0.084783286, 0.24798997, -0.042696327)); + target3 += mul(c3, float4x4(-0.05915715, -0.22595185, 0.061333664, -0.0924661, -0.013238295, 0.12872066, 0.076126665, 0.18921073, 0.01155994, 0.092524104, 0.07423282, 0.09467482, 0.070056126, -0.06073076, 0.030242696, -7.544676e-05)); + target3 += mul(d3, float4x4(0.110107556, 0.0036358358, -0.013859793, 0.008409858, -0.021337144, -0.2092404, 0.054274913, 0.013595842, 0.058993395, 0.029181428, 0.15061715, -0.046964824, 0.044353873, -0.036482453, 0.22763032, -0.018364066)); + target3 += mul(e3, float4x4(0.20778932, -0.049483854, 0.24778971, -0.3266631, -0.11545233, -0.093305275, -0.4550674, 0.2352049, 0.0052719507, -0.045975342, -0.35826904, -0.058102172, -0.096291795, -0.11218896, 0.23879842, -0.03641578)); + target3 += mul(f3, float4x4(-0.109331824, 0.00814177, -0.08803353, 0.06688425, -0.09283131, 0.031705324, 0.040918272, 0.18237656, -0.07152109, 0.12277652, -0.059865803, -0.06869673, 0.11195339, -0.1325457, 0.1912906, -0.08553347)); + target3 += mul(g3, float4x4(-0.10984097, 0.15747224, -0.019459615, 0.24969575, -0.01159421, -0.027474519, -0.004108195, -0.062133413, -0.06384389, -0.08368246, 0.0023778875, 0.13171864, -0.05652675, 0.14332311, -0.15735596, 0.20150533)); + target3 += mul(h3, float4x4(0.078031205, -0.12403856, 0.04191835, -0.16050112, 0.11339027, 0.074540265, -0.15324953, -0.093895815, -0.0614043, -0.013293006, -0.12348063, 0.026803058, -0.1773178, -0.083579265, -0.054864556, 0.296814)); + target3 += mul(i3, float4x4(-0.053263642, -0.048648115, -0.010281689, 0.20099847, 0.190146, -0.0023872026, -0.010445226, -0.04350378, -0.017980015, -0.04147092, -0.08261166, -0.031094978, -0.046422567, 0.120881446, -0.054973155, -0.058380593)); + target3 += mul(na1, float4x4(-0.16745642, 0.07924586, -0.16717474, 0.06620602, 0.16495655, 0.0293633, 0.07890249, -0.30954084, 0.03467237, -0.20190205, 0.0014116743, -0.32280523, -0.14156029, -0.06447037, -0.21021147, 0.0687274)); + target3 += mul(nb1, float4x4(-0.04360317, 0.14327015, -0.06630513, -0.09011326, -0.0919624, -0.09085504, 0.024597472, 0.23315085, 0.039139662, -0.17370877, 0.048785537, -0.10094988, 0.010336257, -0.016844554, -0.05375775, -0.041789643)); + target3 += mul(nc1, float4x4(-0.04296336, -0.093379766, 0.005651271, -0.090673715, 0.021506978, -0.08289978, 0.16281237, -0.0939677, -0.10273288, -0.22043118, 0.062697254, -0.027947478, -0.08711271, 0.0077892793, -0.10296665, 0.049631704)); + target3 += mul(nd1, float4x4(-0.09388834, -0.02609863, -0.043841925, -0.020223266, -0.023729876, 0.07854283, -0.19361661, -0.02297985, -0.003995974, 0.03295993, -0.07480908, -0.03279157, 0.20216386, -0.06685853, -0.22405225, -0.22138701)); + target3 += mul(ne1, float4x4(-0.041702025, 0.03686083, 0.051558632, 0.08093031, 0.0004725686, 0.0050831046, -0.31346506, 0.24020754, -0.012426937, 0.24121699, 0.0522848, 0.0524269, 0.0041041574, 0.20183508, 0.30658904, -0.099001035)); + target3 += mul(nf1, float4x4(0.0057143304, 0.07863334, 0.030834159, -0.20045337, -0.14132334, -0.019685036, -0.041891463, 0.04859716, -0.19865768, -0.16805026, -0.21894583, 0.08326542, 0.1381732, 0.06524222, 0.14627486, 0.105718866)); + target3 += mul(ng1, float4x4(-0.06811638, -0.07022535, -0.08053529, -0.019539276, -0.0013508294, -0.067808755, 0.14990425, -0.020371182, 0.2161962, 0.012578056, -0.07941276, -0.29615018, -0.11092915, 0.10959083, -0.38344857, -0.04684961)); + target3 += mul(nh1, float4x4(0.05912716, -0.007058617, 0.0053731226, -0.20157285, -0.0039983774, 0.1626744, -0.15158534, -0.0880334, -0.095339596, -0.102986366, 0.16870484, 0.37301186, 0.046958193, -0.018308617, 0.2801249, -0.1583765)); + target3 += mul(ni1, float4x4(0.03710428, 0.12427524, -0.15491271, 0.0521613, -0.104145944, -0.11358381, -0.11450005, -0.03948202, -0.022532975, 0.013648349, -0.05297846, -0.05551, 0.012648896, 0.013729304, 0.004389595, 0.033111174)); + target3 += mul(na2, float4x4(0.092548154, 0.12822087, 0.03935411, -0.03887123, 0.18817197, -0.010538254, -0.13670439, -0.073919185, 0.020497803, 0.030874884, 0.023953672, 0.0029225757, 0.1144403, -0.08691024, 0.05340699, -0.10702303)); + target3 += mul(nb2, float4x4(0.1613281, 0.05971506, 0.042405322, 0.005931725, -0.09373433, -0.06380234, -0.064201795, -0.014180793, 0.0671638, -0.01367733, 0.14260428, -0.11077721, -0.045686133, 0.056600757, -0.15297161, -0.005997308)); + target3 += mul(nc2, float4x4(0.24641256, 0.06483951, 0.060505014, -0.009762036, -0.04572455, 0.03593092, 0.03415938, -0.14721255, -0.107680336, 0.09697482, 0.016876915, 0.18656448, 0.016999245, -0.08490942, -0.040251363, -0.074220374)); + target3 += mul(nd2, float4x4(0.25207043, 0.11133333, 0.13421617, -0.10310646, -0.22712758, 0.11617119, 0.06397493, -0.011858522, -0.115762815, -0.050787542, 0.06386407, -0.1579078, -0.12903711, 0.084837236, 0.07354705, 0.02250288)); + target3 += mul(ne2, float4x4(0.14158289, 0.07666087, -0.20075443, -0.010602763, -0.02820616, 0.0944957, 0.15453936, -0.15856305, 0.1749605, -0.12841891, -0.017792901, -0.10751241, -0.059640024, 0.13478336, -0.35048804, -0.20975049)); + target3 += mul(nf2, float4x4(0.18300997, 0.0895379, 0.084789746, 0.092567876, -0.16524926, 0.1414963, -0.15058212, 0.13400394, -0.113864176, -0.05660036, -0.0001961134, 0.14347304, 0.16637255, -0.18054125, 0.009827294, 0.21254125)); + target3 += mul(ng2, float4x4(0.11330536, 0.020117162, 0.049111363, 0.059246156, -0.17288256, -0.07703511, -0.064532675, 0.10420442, 0.100950584, -0.11876045, 0.013643637, -0.060119864, 0.16402918, -0.0701684, 0.10797075, 0.15408994)); + target3 += mul(nh2, float4x4(0.034557853, -0.09076456, -0.06957025, 0.11215256, 0.09526117, -0.0033204784, -0.11551807, -0.03458551, -0.025462642, 0.0434891, 0.3050603, 0.053797644, 0.10751034, 0.060085565, 0.15370789, -0.2315563)); + target3 += mul(ni2, float4x4(-0.046833776, -0.006102459, 0.1123578, 0.24187551, 0.03283197, -0.11041104, 0.20806998, 0.008368949, -0.1924367, 0.03361783, -0.045319956, -0.08859883, -0.2011492, 0.0912345, 0.048245467, -0.005335901)); + target3 += mul(na3, float4x4(-0.18253306, -0.0011128648, -0.044692483, -0.057080504, -0.05725425, -0.19065356, -0.03155062, 0.06648306, -0.014216424, -0.0038765708, -0.017490484, -0.15456702, -0.010514629, -0.08982491, 0.10435141, 0.030280044)); + target3 += mul(nb3, float4x4(0.01791952, 0.1946834, 0.16822097, 0.18846266, -0.075084575, -0.10975577, -0.12906383, 0.20190994, 0.10143081, -0.2725471, -0.035883784, -0.22165625, -0.15959083, -0.34200552, 0.15872408, -0.021841785)); + target3 += mul(nc3, float4x4(0.029525736, 0.04896955, -0.011629367, 0.011558814, 0.00933636, -0.12728998, 0.0053133606, 0.019774856, 0.099030845, -0.27376446, -0.08325353, -0.20274483, -0.26426545, -0.17067485, -0.14366214, -0.21118636)); + target3 += mul(nd3, float4x4(-0.009527981, -0.033085525, -0.00047734487, -0.040472545, 0.071459636, 0.0954099, -0.060635693, 0.036283012, 0.1324083, 0.050335824, -0.2460094, -0.04979816, -0.09456389, 0.09053007, 0.11540641, -0.21168198)); + target3 += mul(ne3, float4x4(0.004067291, 0.1497142, 0.100381024, 0.083456755, 0.10807039, -0.05651095, 0.021606952, -0.005951023, -0.067543074, 0.21499002, -0.021271145, 0.20417792, 0.05860774, 0.20977509, -0.10931411, 0.16582364)); + target3 += mul(nf3, float4x4(-0.05491801, 0.0055349297, 0.03950427, 0.007250093, -0.062947564, -0.14126986, -0.06730335, -0.034683496, -0.03981397, -0.21181524, 0.21769942, -0.103150204, -0.17016284, 0.048786215, -0.014319224, 0.17676318)); + target3 += mul(ng3, float4x4(-0.14126709, -0.032334052, 0.05638739, 0.11381126, 0.30596843, -0.12634167, 0.23541147, 0.08096712, 0.09152563, 0.18567194, -0.25563926, -0.21220013, -0.10782045, -0.044764172, 0.14415121, 0.10968688)); + target3 += mul(nh3, float4x4(-0.034708634, -0.037528913, -0.0846457, -0.24652602, -0.09284069, -0.103932016, 0.09996971, 0.04605858, 0.06597961, 0.06697364, -0.028432503, -0.032057744, 0.052634656, 0.02281619, 0.17896608, -0.1521084)); + target3 += mul(ni3, float4x4(-0.0043455027, -0.07276675, 0.03043292, 0.07712516, -0.20799218, -0.25933886, -0.11458076, -0.0025673904, 0.08385744, 0.33315855, -0.035151098, -0.19899674, -0.005009251, 0.056176793, 0.045722242, 0.17721124)); + target3 += float4(-0.020202361, -0.0016936217, 0.023388062, 0.10373034); + + tex1[gxy] = target1; + tex2[gxy] = target2; + tex3[gxy] = target3; +} + +//!PASS 4 +//!DESC Conv-4x3x3x24 +//!IN tex1, tex2, tex3 +//!OUT tex4, tex5, tex6 +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 + +void Pass4(uint2 blockStart, uint3 threadId) { + uint2 gxy = Rmp8x8(threadId.x) + blockStart; + uint2 inputSize = GetInputSize(); + if (gxy.x >= inputSize.x || gxy.y >= inputSize.y) { + return; + } + + float2 inputPt = GetInputPt(); + float2 pos = (gxy + 0.5f) * inputPt; + + // [ a, d, g ] + // [ b, e, h ] + // [ c, f, i ] + float4 a1 = tex1.SampleLevel(sam, pos + float2(-inputPt.x, -inputPt.y), 0); + float4 b1 = tex1.SampleLevel(sam, pos + float2(-inputPt.x, 0), 0); + float4 c1 = tex1.SampleLevel(sam, pos + float2(-inputPt.x, inputPt.y), 0); + float4 d1 = tex1.SampleLevel(sam, pos + float2(0, -inputPt.y), 0); + float4 e1 = tex1.SampleLevel(sam, pos, 0); + float4 f1 = tex1.SampleLevel(sam, pos + float2(0, inputPt.y), 0); + float4 g1 = tex1.SampleLevel(sam, pos + float2(inputPt.x, -inputPt.y), 0); + float4 h1 = tex1.SampleLevel(sam, pos + float2(inputPt.x, 0), 0); + float4 i1 = tex1.SampleLevel(sam, pos + float2(inputPt.x, inputPt.y), 0); + + float4 na1 = max(-a1, 0); + float4 nb1 = max(-b1, 0); + float4 nc1 = max(-c1, 0); + float4 nd1 = max(-d1, 0); + float4 ne1 = max(-e1, 0); + float4 nf1 = max(-f1, 0); + float4 ng1 = max(-g1, 0); + float4 nh1 = max(-h1, 0); + float4 ni1 = max(-i1, 0); + + a1 = max(a1, 0); + b1 = max(b1, 0); + c1 = max(c1, 0); + d1 = max(d1, 0); + e1 = max(e1, 0); + f1 = max(f1, 0); + g1 = max(g1, 0); + h1 = max(h1, 0); + i1 = max(i1, 0); + + float4 a2 = tex2.SampleLevel(sam, pos + float2(-inputPt.x, -inputPt.y), 0); + float4 b2 = tex2.SampleLevel(sam, pos + float2(-inputPt.x, 0), 0); + float4 c2 = tex2.SampleLevel(sam, pos + float2(-inputPt.x, inputPt.y), 0); + float4 d2 = tex2.SampleLevel(sam, pos + float2(0, -inputPt.y), 0); + float4 e2 = tex2.SampleLevel(sam, pos, 0); + float4 f2 = tex2.SampleLevel(sam, pos + float2(0, inputPt.y), 0); + float4 g2 = tex2.SampleLevel(sam, pos + float2(inputPt.x, -inputPt.y), 0); + float4 h2 = tex2.SampleLevel(sam, pos + float2(inputPt.x, 0), 0); + float4 i2 = tex2.SampleLevel(sam, pos + float2(inputPt.x, inputPt.y), 0); + + float4 na2 = max(-a2, 0); + float4 nb2 = max(-b2, 0); + float4 nc2 = max(-c2, 0); + float4 nd2 = max(-d2, 0); + float4 ne2 = max(-e2, 0); + float4 nf2 = max(-f2, 0); + float4 ng2 = max(-g2, 0); + float4 nh2 = max(-h2, 0); + float4 ni2 = max(-i2, 0); + + a2 = max(a2, 0); + b2 = max(b2, 0); + c2 = max(c2, 0); + d2 = max(d2, 0); + e2 = max(e2, 0); + f2 = max(f2, 0); + g2 = max(g2, 0); + h2 = max(h2, 0); + i2 = max(i2, 0); + + float4 a3 = tex3.SampleLevel(sam, pos + float2(-inputPt.x, -inputPt.y), 0); + float4 b3 = tex3.SampleLevel(sam, pos + float2(-inputPt.x, 0), 0); + float4 c3 = tex3.SampleLevel(sam, pos + float2(-inputPt.x, inputPt.y), 0); + float4 d3 = tex3.SampleLevel(sam, pos + float2(0, -inputPt.y), 0); + float4 e3 = tex3.SampleLevel(sam, pos, 0); + float4 f3 = tex3.SampleLevel(sam, pos + float2(0, inputPt.y), 0); + float4 g3 = tex3.SampleLevel(sam, pos + float2(inputPt.x, -inputPt.y), 0); + float4 h3 = tex3.SampleLevel(sam, pos + float2(inputPt.x, 0), 0); + float4 i3 = tex3.SampleLevel(sam, pos + float2(inputPt.x, inputPt.y), 0); + + float4 na3 = max(-a3, 0); + float4 nb3 = max(-b3, 0); + float4 nc3 = max(-c3, 0); + float4 nd3 = max(-d3, 0); + float4 ne3 = max(-e3, 0); + float4 nf3 = max(-f3, 0); + float4 ng3 = max(-g3, 0); + float4 nh3 = max(-h3, 0); + float4 ni3 = max(-i3, 0); + + a3 = max(a3, 0); + b3 = max(b3, 0); + c3 = max(c3, 0); + d3 = max(d3, 0); + e3 = max(e3, 0); + f3 = max(f3, 0); + g3 = max(g3, 0); + h3 = max(h3, 0); + i3 = max(i3, 0); + + float4 target1 = mul(a1, float4x4(-0.2582688, 0.116867825, 0.009512264, -0.0022509228, 0.13270317, 0.019233711, 0.014508687, 0.01733284, -0.121534936, 0.2637504, -0.16833198, 0.08360115, 0.09262769, -0.09723933, -0.08402722, -0.06326682)); + target1 += mul(b1, float4x4(0.32656944, 0.035490595, 0.014057071, 0.08615446, -0.001598092, 0.16362181, -0.10130158, 0.16792357, 0.03340437, 0.037359558, 0.09397945, 0.11016778, 0.08567979, 0.31809476, 0.085573055, -0.15427281)); + target1 += mul(c1, float4x4(0.16257697, -0.03590016, -0.19049743, -0.13342945, 0.013655946, -0.11739747, -0.008941973, 0.015134444, -0.17258401, 0.17935902, 0.06434015, -0.06638789, 0.17013264, -0.171608, 0.07526482, 0.29814368)); + target1 += mul(d1, float4x4(-0.14037174, -0.060715932, 0.012513121, 0.05294183, -0.05479372, -0.13937469, 0.01836811, -0.133735, -0.29546124, -0.14349708, 0.14202882, -0.03247825, -0.054209106, 0.002391278, -0.024334526, -0.10866433)); + target1 += mul(e1, float4x4(-0.098666176, 0.009357217, 0.14404769, -0.03864725, -0.21861532, 0.24275939, 0.3084927, -0.17814654, -0.06785066, -0.20976599, -0.010328756, -0.0075252843, -0.1265569, -0.3896638, -0.07620251, -0.17581807)); + target1 += mul(f1, float4x4(-0.028447198, 0.088148355, -0.11362386, 0.032440383, -0.017401151, 0.2062452, -0.1613577, -0.07957526, 0.31136703, -0.06775296, -0.019393584, -0.063142054, -0.12292114, 0.010548703, 0.03203177, -0.053964596)); + target1 += mul(g1, float4x4(0.108504035, -0.20656614, -0.04412517, -0.047383796, 0.038306333, -0.20189808, -0.07821153, -0.0229348, 0.10628414, -0.015934726, -0.08728048, -0.17359804, 0.17790003, 0.085666224, -0.11872538, -0.007298351)); + target1 += mul(h1, float4x4(0.024346102, -0.0066076764, -0.011155871, -0.057157155, -0.04878886, 0.121565156, 0.094774745, -0.021847744, 0.04866778, 0.07184023, 0.26012063, -0.07480458, -0.29240155, 0.12562081, 0.01449463, -0.028680477)); + target1 += mul(i1, float4x4(-0.12557116, 0.034923933, -0.095903516, -0.03958003, 0.26028237, -0.017168928, -0.13332075, 0.15662631, 0.065815985, -0.035664845, 0.045483954, -0.015463682, -0.093050554, 0.17345443, 0.069853716, 0.012629484)); + target1 += mul(a2, float4x4(-0.06156731, 0.07782055, -0.10174533, -0.020296015, -0.11969389, -0.060097698, 0.13305716, 0.16102178, 0.024139002, -0.02605331, -0.07594407, 0.19671421, -0.12202574, 0.14988048, 0.015957702, -0.04196926)); + target1 += mul(b2, float4x4(-0.34706548, 0.043015823, 0.13185433, 0.10132207, 0.007556987, -0.043371882, -0.08854469, 0.1748955, -0.1481482, 0.031284038, 0.120617144, 0.21384451, -0.08435913, -0.049537454, 0.049118094, 0.01525446)); + target1 += mul(c2, float4x4(0.09368386, -0.057292625, -0.17107973, 0.102038346, -0.21283975, 0.29275435, -0.039638165, -0.14761256, -0.0026279686, -0.1902631, -0.14120182, 0.26573882, 0.0017522989, -0.06337007, 0.14134108, -0.015992256)); + target1 += mul(d2, float4x4(0.04090445, -0.15472308, 0.0086197965, -0.08812333, 0.079468906, -0.16199878, 0.15031399, -0.03220131, -0.08283918, 0.18892156, -0.11201425, 0.143803, 0.027449837, -0.15672483, -0.09222757, -0.0074415365)); + target1 += mul(e2, float4x4(0.10325783, 0.01752857, 0.10529392, -0.04568797, 0.017125184, -0.18414256, 0.109236374, -0.05950773, -0.07963555, 0.22193272, 0.009846993, -0.028046092, -0.28534588, -0.040712982, -0.018419487, 0.040993705)); + target1 += mul(f2, float4x4(-0.07601499, 0.14913873, -0.11738921, -0.21686155, -0.09468833, -0.10593258, -0.13899745, -0.08376532, -0.21147677, 0.0016611695, -0.12994987, 0.06078483, 0.007183634, 0.22829083, 0.054238643, 0.025317933)); + target1 += mul(g2, float4x4(-0.020357948, -0.06775977, 0.04134854, -0.19611607, 0.21193837, 0.19103523, 0.1623303, -0.07516307, 0.09373488, -0.18499903, 0.122855246, 0.06162072, -0.06930552, 0.040520284, 0.066090606, 0.06882486)); + target1 += mul(h2, float4x4(0.07091698, 0.027023822, 0.014318926, -0.096747, 0.2213003, -0.26515988, 0.027153777, -0.06498218, -0.1544758, -0.072314575, 0.060353238, 0.0008735325, 0.10359162, -0.040275127, 0.03365087, 0.067658685)); + target1 += mul(i2, float4x4(-0.010807538, -0.032808676, 0.0016953531, -0.07662512, 0.0726062, -0.018007128, -0.10622275, -0.25853804, 0.059124377, 0.1262254, -0.093686275, 0.013412181, 0.17268743, -0.0634091, -0.2380408, 0.061805595)); + target1 += mul(a3, float4x4(0.0589103, -0.13791196, -0.054214116, -0.10432153, -0.009462091, -0.06466445, -0.10792851, 0.0046791825, 0.034062322, 0.0810174, 0.112342946, 0.14306374, 0.0536091, -0.056520145, -0.14358906, 0.1730281)); + target1 += mul(b3, float4x4(0.102546036, -0.0005907261, 0.06815491, 0.054100085, 0.012063651, 0.13010375, 0.076584436, 0.10106609, 0.07464082, 0.12651648, -0.13567902, 0.12329812, 0.036417592, -0.030062713, -0.07439, -0.06734716)); + target1 += mul(c3, float4x4(-0.06956145, -0.0320128, 0.0069283135, 0.0010382348, -0.15168677, -0.07246775, -0.1870489, 0.081376776, -0.12240719, 0.040261835, -0.114711486, 0.11216043, 0.039739948, 0.064421944, -0.11448801, -0.11656052)); + target1 += mul(d3, float4x4(-0.029262811, 0.07973898, 0.014937532, 0.17416446, -0.13320738, 0.09951435, -0.09681337, 0.24465284, 0.0027678797, 0.054772142, 0.11334623, -0.062660255, 0.06494805, -0.014957246, -0.016339006, 0.0065059843)); + target1 += mul(e3, float4x4(-0.19118161, 0.24356417, -0.17327957, 0.06050448, -0.097790115, -0.38453653, 0.045624297, 0.04574299, -0.15803054, -0.5270604, -0.04556698, -0.13112716, -0.026057608, 0.13840397, -0.04413626, -0.06273916)); + target1 += mul(f3, float4x4(0.029510414, -0.005691187, 0.05228498, 0.028585492, 0.18082422, -0.032805815, 0.007563971, 0.08991763, 0.105824, 0.02457178, 0.055056915, -0.060770642, -0.011407322, -0.11525285, -0.04518266, -0.04449915)); + target1 += mul(g3, float4x4(0.14025277, -0.18081227, 0.014395497, -0.09138814, -0.09448127, 0.2532618, 0.08094696, 0.050620202, 0.040627994, 0.17808948, 0.0933771, -0.04734779, -0.025526097, 0.0038422223, 0.05230542, -0.101145774)); + target1 += mul(h3, float4x4(-0.07215562, -0.058965042, 0.038303573, 0.0009963732, -0.059399143, 0.15957262, 0.035185594, 0.0719169, 0.08515627, 0.09775558, 0.13178122, -0.0837824, 0.014349278, 0.038491696, 0.071876876, 0.0345376)); + target1 += mul(i3, float4x4(0.040965024, -0.030738113, -0.05919069, -0.14155431, 0.09109957, -0.099060595, -0.10192779, 0.033825647, 0.11551892, -0.04282345, 0.020072978, 0.035168435, 0.10797329, -0.0584945, -0.024158757, -0.03585887)); + target1 += mul(na1, float4x4(0.11656172, -0.03488785, 0.090906724, -0.0032958854, 0.11268224, 0.070826046, 0.008982598, -0.14222313, 0.0025792273, -0.07585458, -0.021171344, -0.10144507, 0.24918565, 0.004032981, 0.032430686, -0.012328044)); + target1 += mul(nb1, float4x4(-0.22021858, 0.06875914, 0.004574366, -0.0694593, 0.11509186, -0.25873652, -0.08872615, -0.024206636, 0.15076822, -0.14054653, -0.045519873, -0.04547437, -0.22077747, -0.054121707, 0.049612578, 0.10545096)); + target1 += mul(nc1, float4x4(-0.069911204, 0.078573205, -0.073091984, 0.015637126, -0.23398215, 0.12185918, 0.08496631, -0.063231654, 0.14004779, 0.07965737, 0.14457273, -0.057528477, -0.0971965, 0.10445598, -0.054162677, -0.11529022)); + target1 += mul(nd1, float4x4(-0.12595661, 0.16308525, 0.09465576, -0.05046868, 0.1799443, 0.115778774, -0.13534002, 0.09609113, 0.107355125, -0.07263705, -0.04365324, 0.10355821, -0.023942605, 0.026093582, 0.009621531, 0.06096017)); + target1 += mul(ne1, float4x4(0.1272364, -0.07220049, 0.041847665, 0.17912698, -0.03009012, 0.06394436, -0.03263169, -0.04573203, -0.07620046, 0.42576316, 0.042653862, 0.13744949, 0.23633486, 0.10078774, -0.121353894, 0.12101121)); + target1 += mul(nf1, float4x4(0.03558598, -0.1297437, -0.05971473, 0.17683595, 0.1725135, 0.052228056, 0.08043958, -0.09891566, 0.03620246, -0.07612062, 0.0671727, 0.037559096, -0.14037324, 0.021277385, -0.04257818, 0.17619017)); + target1 += mul(ng1, float4x4(-0.11092632, -0.00013030393, 0.12967736, -0.22887622, -0.08721344, 0.054407217, 0.07632402, -0.08394438, -0.071129434, 0.11594225, -0.058196247, 0.020942273, -0.123769015, -0.114318974, 0.03252267, 0.07218774)); + target1 += mul(nh1, float4x4(-0.11842664, -0.044281907, 0.07725646, -0.09330976, -0.028858917, -0.10954367, 0.04575166, -0.026068112, -0.06559436, -0.2284913, -0.19561197, -0.0016185943, 0.11867088, -0.038570896, 0.08526274, 0.019519364)); + target1 += mul(ni1, float4x4(0.0822196, -0.0037142867, 0.08382291, -0.013849318, -0.13749887, 0.044966772, 0.04564233, -0.00618037, -0.052107867, 0.033819627, -0.03494537, 0.024765901, -0.10504158, -0.028348709, -0.0089757275, 0.030026745)); + target1 += mul(na2, float4x4(0.053351242, 0.056979094, -0.060212657, 0.14301975, 0.17891912, -0.032538075, 0.011639607, 0.035919394, 0.04533616, -0.12939154, -0.041703038, 0.0071665174, -0.19303554, 0.018363694, 0.08923668, 0.020215489)); + target1 += mul(nb2, float4x4(0.038452573, 0.1614918, -0.022068001, 0.0030016324, -0.2680856, 0.21928017, 0.085351996, 0.049881425, 0.058913168, -0.044736963, 0.016097903, 0.21123125, 0.079624146, -0.16535924, 0.06877731, 0.1305827)); + target1 += mul(nc2, float4x4(0.05783186, -0.219528, 0.0816723, 1.3595931e-05, -0.02902699, -0.12913156, -0.40516803, -0.028480045, 0.12000909, 0.081304125, 0.053406257, -0.08878543, 0.02251961, 0.12547138, -0.20464425, -0.05598181)); + target1 += mul(nd2, float4x4(-0.15702735, 0.21000047, 0.08434562, 0.27938238, -0.03068116, -0.004006084, 0.19768693, 0.066732645, -0.055060755, -0.16314429, 0.028655436, 0.021063909, -0.028578848, -0.008238495, 0.12807982, -0.0071345936)); + target1 += mul(ne2, float4x4(-0.17309058, -0.18169925, -0.14182782, 0.107684694, -0.1117235, 0.19443877, 0.101682656, 0.030993309, -0.12313995, -0.048883304, -0.11149261, 0.12847972, 0.28405818, 0.20219465, 0.015797788, 0.123306856)); + target1 += mul(nf2, float4x4(-0.07962997, 0.06323938, 0.045708194, 0.0020409136, -0.0022456956, 0.010837137, 0.014872806, -0.060870074, 0.13772255, 0.005320253, 0.05848208, 0.14984395, -0.037590872, -0.07464743, -0.16873243, 0.019905593)); + target1 += mul(ng2, float4x4(0.13775061, 0.032707028, 0.13456069, 0.05904891, 0.046821773, -0.22715594, 0.056300808, -0.15724476, -0.07337338, 0.19666758, -0.013393664, 0.04086994, 0.12254266, -0.08695188, -0.11076954, -0.15678991)); + target1 += mul(nh2, float4x4(0.07177161, 0.01181348, -0.07497793, -0.085427515, 0.039396375, -0.0035293372, 0.20881353, -0.057439566, 0.15257393, 0.16040947, -0.027684899, 0.16330487, -0.054777898, 0.07572324, -0.03833461, -0.017093522)); + target1 += mul(ni2, float4x4(0.000963837, -0.00780663, -0.023343472, 0.18377425, 0.32722053, -0.08156815, -0.11247523, -0.12714005, 0.18326895, -0.16434003, 0.052783884, 0.2168339, 0.03372009, 0.024008008, -0.1949321, -0.11585071)); + target1 += mul(na3, float4x4(0.07887302, -0.043003492, -0.16841368, 0.023287356, -0.15838705, 0.21706697, 0.16976407, 0.11461476, -0.062454503, 0.08966307, 0.10723603, -0.029792916, -0.03903073, -0.06255455, 0.025979951, -0.09530182)); + target1 += mul(nb3, float4x4(-0.0917689, 0.12646815, -0.11529587, 0.06925059, -0.18619959, -0.05243984, 0.16720963, -0.07121025, -0.04476961, 0.0074207215, 0.16076323, -0.14866208, 0.042807475, -0.08767046, -0.005694572, -0.11727041)); + target1 += mul(nc3, float4x4(-0.0062040854, -0.00097002264, -0.058491956, -0.035364915, 0.040115915, -0.10968144, 0.046607487, 0.23429875, -0.11210956, 0.034507494, -0.07195393, -0.16490693, 0.047223017, -0.044811487, -0.11060463, -0.14174072)); + target1 += mul(nd3, float4x4(-0.14469296, 0.0862561, 0.027785733, 0.005940194, -0.0062618204, -0.015266768, -0.067160904, -0.17241345, -0.060631767, 0.024863401, 0.056833714, -0.063885145, -0.14061876, -0.042549785, 0.036430426, 0.14348027)); + target1 += mul(ne3, float4x4(0.3022943, -0.19899924, 0.19672908, -0.09840718, 0.14039348, 0.105976574, -0.14415087, -0.06547584, 0.3070416, 0.40989116, 0.009514016, 0.018336622, 0.08806178, 0.07710675, -0.03551256, -0.04064369)); + target1 += mul(nf3, float4x4(0.16016869, -0.12516344, -0.011240568, -0.1443897, -0.009084668, -0.1618983, 0.06672594, -0.30417737, -0.09547601, -0.09057253, 0.08657728, 0.036226142, -0.0022018533, 0.12780087, 0.0029589643, 0.12111095)); + target1 += mul(ng3, float4x4(-0.1765741, 0.03653064, -0.03139237, 0.057462048, 0.16041194, -0.2303424, -0.11946362, -0.1788824, 0.098096356, -0.18419504, 0.021373387, -0.1157983, 0.079671614, -0.03361971, 0.06394305, -0.0101026185)); + target1 += mul(nh3, float4x4(-0.01576709, 0.11476761, -0.041474868, 0.13242105, -0.056526344, 0.024517184, -0.21629438, -0.010624098, -0.0053918827, -0.19187245, -0.12927179, -0.08489797, 0.055730473, -0.043147404, -0.03800261, 0.048107833)); + target1 += mul(ni3, float4x4(-0.0014053301, -0.046847776, 0.004571536, 0.18300104, -0.053145096, 0.057801194, 0.2322556, 0.22864385, 0.0040904162, -0.037985127, 0.041369, -0.065972395, 0.16685532, -0.091719486, -0.1425869, -0.10230388)); + target1 += float4(0.00803133, -0.020707153, 0.0056995153, -0.052884795); + + float4 target2 = mul(a1, float4x4(-0.12893085, -0.12928686, 0.12365234, -0.021265296, 0.15424967, -0.0063038417, -0.027432516, -0.10297197, 0.118751466, -0.058228746, -0.10025376, 0.0027489034, 0.0073948866, 0.040659092, 0.08120041, -0.12702137)); + target2 += mul(b1, float4x4(-0.02242042, 0.114516795, -0.042158883, -0.14150862, -0.18976203, 0.109531336, 0.03548168, -0.1681465, -0.13782959, 0.07437085, -0.045712702, -0.09431652, -0.0029079607, 0.05180383, 0.07098421, -0.2149384)); + target2 += mul(c1, float4x4(0.3218102, 0.0013506162, 0.12795919, -0.10901241, -0.08859676, -0.06861104, -0.014102381, 0.0051467894, -0.16305672, 0.022653125, -0.019810826, -0.05701206, 0.1842382, -0.074959196, -0.07368022, -0.046023685)); + target2 += mul(d1, float4x4(-0.099247254, -0.2161521, -0.095611826, -0.0179061, -0.0067561218, 3.99846e-05, 0.01254028, -0.056954045, -0.0075805853, -0.082335606, -0.053469665, 0.25761604, -0.049429264, -0.08763215, 0.051362507, -0.030518934)); + target2 += mul(e1, float4x4(0.13518652, 0.05463841, -0.07654066, 0.023629244, -0.23324661, 0.04781438, -0.20902736, 0.10330495, -0.16452856, 0.235407, -0.022236459, 0.036046103, -0.08613043, -0.012954787, 0.043111194, 0.021807853)); + target2 += mul(f1, float4x4(0.11316856, -0.027803158, -0.026492868, -0.0030439082, 0.063926555, -0.09612654, -0.22492981, -0.13748476, 0.06954571, -0.008035041, -0.04846681, -0.23352449, -0.06676289, 0.13268302, 0.037954323, -0.0342029)); + target2 += mul(g1, float4x4(-0.18148762, -0.06975972, -0.21924862, -0.03831989, 0.09057307, -0.06784279, 0.05716139, 0.032582354, 0.32728904, 0.03561464, -0.06930132, 0.13582717, -0.04723415, 0.053298444, -0.1580453, 0.029922115)); + target2 += mul(h1, float4x4(-0.13381054, 0.06294187, 0.04273711, -0.089835554, -0.042215306, 0.04515037, -0.01970211, 0.07447383, -0.12915656, 0.087721184, 0.122159, 0.17817122, 0.05233303, 0.053456925, -0.22769327, 0.17450784)); + target2 += mul(i1, float4x4(0.062324032, 0.056449406, 0.070776984, 0.070366256, 0.15072031, -0.20342071, 0.118405774, -0.11357599, 0.23603258, -0.17724364, 0.028237892, 0.07491812, 0.015638597, 0.20543055, -0.05863285, 0.06565301)); + target2 += mul(a2, float4x4(-0.07647028, 0.2292153, 0.019423103, -0.06965646, -0.107311614, -0.19989595, -0.06673964, -0.027954143, 0.0017375473, -0.048038438, 0.052211836, -0.042501964, -0.1372413, -0.2437919, -0.15933524, -0.07229055)); + target2 += mul(b2, float4x4(-0.023719285, 0.05654754, 0.09026341, 0.020072227, -0.12716366, -0.013687293, -0.1312343, -0.06847118, 0.016806766, -0.10526531, -0.011248162, 0.12535807, -0.12538499, -0.042496204, -0.076355785, -0.0017766576)); + target2 += mul(c2, float4x4(0.039450683, -0.049502935, -0.009162741, 0.015372251, -0.14449993, -0.06564991, -0.093242005, -0.018039258, -0.2410318, 0.020259766, -0.040783074, -0.05092842, -0.023994599, -0.037968505, 0.052206438, -0.10967312)); + target2 += mul(d2, float4x4(0.13721816, -0.1571525, 0.09432105, 0.023277072, -0.073701076, -0.13941942, -0.02705892, 0.06508469, -0.17687775, -0.07433723, -0.11237514, -0.015321937, -0.31670073, -0.09665636, -0.11843665, -0.030077526)); + target2 += mul(e2, float4x4(-0.09092922, 0.088340946, 0.1001261, 0.05962185, 0.07731374, -0.09623944, -0.03218285, 0.04484794, -0.10394964, 0.111483194, -0.07343945, 0.15182221, 0.27208853, 0.024986237, -0.058641106, -0.039870527)); + target2 += mul(f2, float4x4(0.03685333, -0.014777545, -0.0064948527, 0.060336027, -0.04251398, -0.004589828, -0.025893224, -0.075040996, 0.007964778, 0.22512783, -0.033568367, 0.052608117, 0.2143682, 0.21318182, -0.06253117, -0.055562623)); + target2 += mul(g2, float4x4(0.07906376, -0.015447189, -0.045265637, 0.066810004, 0.07202818, -0.07874254, -0.071680374, 0.009017687, 0.07042464, 0.016754108, 0.017237889, 0.0106343115, -0.042138606, -0.11085673, 0.14738452, -0.10718694)); + target2 += mul(h2, float4x4(-0.07745664, 0.16073377, -0.01899363, 0.07030874, 0.058903817, -0.065876774, 0.020186676, 0.09385477, 0.14517148, 0.053237557, -0.16942556, -0.04716224, 0.13748227, 0.17071299, 0.12176032, 0.07409275)); + target2 += mul(i2, float4x4(0.09208682, 0.029487375, -0.057159107, 0.025398627, 0.12468226, 0.034707896, 0.010541767, -0.032418035, 0.11508723, 0.050812677, -0.08127881, 0.0052238777, 0.15403835, -0.17993934, 0.071115926, 0.0059663)); + target2 += mul(a3, float4x4(-0.053597223, -0.00758354, -0.011711322, 0.12876037, -0.022196915, 0.045487616, 0.02135921, 0.010447794, 0.063635394, 0.09686383, -0.05077074, 0.072695896, -0.02443565, -0.045984466, -0.025993166, -0.08304488)); + target2 += mul(b3, float4x4(0.1321831, 0.017644621, 0.16513684, 0.0659792, 0.09676037, -0.07867503, 0.04669573, -0.04401741, 0.23034973, 0.10561144, -0.1184282, 0.13691261, -0.18894893, 0.21760973, 0.08807475, -0.19776659)); + target2 += mul(c3, float4x4(-0.053137053, -0.07991928, -0.09902317, 0.017081713, -0.021857716, 0.011578801, -0.0009752623, 0.043588534, 0.11997389, 0.0027668865, -0.09973271, 0.065404624, -0.07151649, -0.017840967, -0.0188252, -0.14957094)); + target2 += mul(d3, float4x4(0.13721272, 0.04459704, -0.0069692475, 0.07410797, -0.13855937, 0.021286163, -0.04160423, -0.05980007, 0.027626112, 0.092742406, -0.032267787, -0.00358655, 0.12470872, 0.09738248, 0.06565896, -0.1076945)); + target2 += mul(e3, float4x4(0.12965658, -0.110055126, -0.08762725, 0.031792786, 0.11524638, -0.09530289, 0.07955128, 0.0049232226, 0.07190261, -0.010207877, -0.26513076, 0.045152593, -0.16932993, 0.091321826, 0.11550899, -0.100929074)); + target2 += mul(f3, float4x4(-0.1674921, 0.0907835, -0.033396322, -0.03168371, 0.013580539, 0.047018647, 0.028963672, 0.04756761, -0.08714202, -0.2602012, -0.12279786, 0.18663418, -0.07781514, -0.013219039, 0.006731288, 0.005795019)); + target2 += mul(g3, float4x4(0.01206949, -0.047031406, -0.060451232, 0.027200127, -0.1178311, 0.14014901, 0.25840858, -0.14889579, -0.11640469, -0.01811908, -0.09255012, -0.08351582, 0.086520575, -0.021090247, 0.08717082, 0.043429427)); + target2 += mul(h3, float4x4(0.020278929, -0.15339202, 0.041678756, 0.07180138, -0.0635027, -0.088976234, -0.04092133, 0.07997308, -0.134963, -0.015960857, -0.060887713, -0.07916197, 0.20483045, -0.12640053, 0.10478231, 0.04803776)); + target2 += mul(i3, float4x4(-0.03549656, 0.033666074, 0.20228225, -0.096664, -0.00096604426, 0.20793179, 0.09613217, -0.053552672, 0.051677585, -0.018252494, 0.07543575, 0.006295734, 0.046456967, -0.16520908, 0.0120992735, -0.015491354)); + target2 += mul(na1, float4x4(0.09486195, 0.0862073, 0.04189838, 0.0026638226, 0.09820532, 0.1007168, -0.022186898, -0.05491984, -0.13535279, 0.046514615, 0.09563633, 0.021364952, -0.23145446, 0.05070801, -0.022965223, -0.18874952)); + target2 += mul(nb1, float4x4(0.05885208, -0.022751214, -0.015712557, 0.157172, 0.05131988, -0.09524327, -0.045114886, 0.05928359, -0.001745961, -0.035245676, -0.010552595, -0.06321781, -0.15489094, 0.017822266, -0.06018634, 0.06429225)); + target2 += mul(nc1, float4x4(0.1243866, 0.014742004, -0.07896682, 0.2792386, -0.08055696, -0.0067778644, 0.0407617, 0.1389886, -0.02221008, 0.07494927, -0.11067403, 0.026464086, -0.009520921, 0.015791653, 0.021943323, 0.12500213)); + target2 += mul(nd1, float4x4(-0.08929889, 0.09244356, 0.130978, -0.03720041, 0.07869226, 0.13067861, 0.104627624, -0.01922214, 0.03561331, -0.031736456, 0.15136853, 0.0128885005, -0.16457924, -0.028147755, 0.13005957, -0.07908654)); + target2 += mul(ne1, float4x4(-0.020705838, 0.0936515, -0.026146421, 0.030703338, 0.032063864, 0.14091234, -0.021708539, -0.056303035, -0.007502981, -0.1276548, -0.15350288, -0.04722333, -0.049264792, -0.016106946, 0.035777904, 0.10648118)); + target2 += mul(nf1, float4x4(0.16387826, -0.059457906, 0.009808255, 0.030755969, 0.05709708, 0.0025975339, 0.021356652, -0.023887865, -0.15327913, -0.03702513, -0.041953377, 0.0049483287, 0.1434395, 0.08557114, -0.07722993, 0.22481233)); + target2 += mul(ng1, float4x4(-0.20757784, -0.05194353, -0.17085314, -0.12557504, -0.056353815, 0.06583933, 0.005532102, -0.0040489454, 0.23847903, -0.08254601, -0.20940065, 0.1251241, 0.14838001, -0.12861559, -0.04664337, 0.07232125)); + target2 += mul(nh1, float4x4(-0.010124613, -0.07096996, -0.1366236, 0.0018079067, -0.041023795, 0.12729517, 0.24600507, -0.07845422, 0.31226948, -0.023518091, -0.0023672595, 0.058046557, 0.1718256, -0.05916957, 0.0067618093, 0.08826252)); + target2 += mul(ni1, float4x4(-0.0013852714, -0.02530485, 0.12499248, -0.047640886, 0.06515882, 0.009700978, -0.005210036, -0.0332508, -0.135034, 0.07050036, 0.06152617, 0.02243357, 0.20835938, 0.041327897, 0.047491845, -0.017284496)); + target2 += mul(na2, float4x4(-0.2511675, 0.2016235, -0.22534974, -0.29850873, -0.014898309, 0.034321953, -0.14487329, 0.029454721, 0.05068056, -0.09661999, 0.00070758525, 0.06925706, -0.19870853, -0.0871149, 0.13158658, -0.09995704)); + target2 += mul(nb2, float4x4(-0.22352318, -0.073506966, -0.11625505, 0.0049028546, 0.029848805, -0.06952766, -0.043236732, 0.13255614, 0.093998544, 0.17581578, -0.0004033081, -0.12263665, -0.17329359, -0.11587317, 0.059647266, -0.02954624)); + target2 += mul(nc2, float4x4(-0.057583325, 0.056015383, 0.11960743, 0.033696633, -0.14805156, -0.10933173, -0.08482661, 0.07473009, 0.040999115, -0.0995941, -0.005304712, 0.04729056, -0.09739792, 0.07000572, -0.12560466, 0.023240168)); + target2 += mul(nd2, float4x4(-0.1967497, 0.093729794, -0.05857918, -0.12817049, -0.034558292, 0.016039368, -0.12012142, -0.017481307, 0.0391479, -0.10992257, 0.015143992, 0.01391454, 0.051010676, 0.012996939, 0.041216355, 0.08623047)); + target2 += mul(ne2, float4x4(0.21069938, -0.066038206, -0.015458416, -0.097732425, 0.051942978, -0.03459923, -0.05756448, 0.14080645, 0.055423364, -0.06490901, -0.07402898, -0.16263707, -0.07290088, -0.058713708, 0.06723124, 0.069584474)); + target2 += mul(nf2, float4x4(0.09618103, 0.055036288, 0.09001422, 0.027986465, -0.018399306, -0.07295329, 0.06687392, 0.06653489, -0.06524778, -0.11760177, -0.004764932, -0.10559294, 0.16195896, -0.22127731, -0.0060094665, -0.0073161777)); + target2 += mul(ng2, float4x4(-0.006081162, 0.09074974, 0.1387847, -0.012516454, 0.040442165, 0.024901407, 0.019887343, -0.012545043, 0.040630046, 0.06390039, -0.088361576, -0.07775115, -0.016567666, -0.048221476, 0.00507668, 0.00015517596)); + target2 += mul(nh2, float4x4(0.27623588, -0.29454315, -0.09558771, 0.016047282, 0.12541397, 0.06766668, 0.012096932, -0.051367834, -0.20859776, -0.20424904, 0.1920475, -0.12987578, 0.08319857, -0.05495395, 0.043287907, -0.027431363)); + target2 += mul(ni2, float4x4(0.1666435, -0.10736637, -0.039772738, 0.06555994, 0.06329126, -0.004524732, 0.027252503, -0.018687485, -0.0827318, -0.17353283, -0.17264223, 0.0050896755, 0.08507919, -0.19379872, 0.14229794, -0.0837528)); + target2 += mul(na3, float4x4(0.10103022, 0.2500691, 0.11863092, 0.04184915, 0.07104669, 0.11822421, 0.040399753, -0.05503637, -0.03777729, -0.0552892, -0.0367129, -0.07652974, -0.06387571, 0.09680754, 0.030113626, 0.07385613)); + target2 += mul(nb3, float4x4(0.21662953, -0.047714498, -0.100133225, 0.14122888, -0.053247962, -0.13878773, 0.043139406, 0.10316825, -0.050836936, -0.1023108, 0.07342308, -0.013418398, 0.1517183, -0.038232815, 0.16094449, 0.18475303)); + target2 += mul(nc3, float4x4(0.10745382, 0.14385694, 0.16242811, -0.022071859, -0.06788635, 0.09044915, -0.09642871, -0.032185104, -0.15011486, 0.06751199, -0.0030307414, 0.045759566, 0.17598514, 0.069681115, 0.18387364, 0.15741494)); + target2 += mul(nd3, float4x4(0.0355877, -0.01989782, -0.021107944, 0.1195755, 0.04636706, 0.15067361, -0.03446434, 0.091468826, -0.054333266, -0.091928974, 0.077975504, 0.051997006, -0.2611878, 0.012728117, 0.038493883, 0.062820844)); + target2 += mul(ne3, float4x4(-0.09769422, 0.0486323, -0.09317317, -0.09185559, -0.30752286, -0.11381268, -0.053577766, -0.17922285, -0.14485466, 0.10500625, 0.22108263, -0.12928547, 0.33743355, 0.13309081, 0.13873322, 0.05503852)); + target2 += mul(nf3, float4x4(-0.19131194, -0.10878378, -0.04047478, -0.024106042, -0.25611252, 0.10455126, -0.0774767, -0.005242356, 0.14342257, 0.096795335, 0.11119688, -0.06816075, 0.045405596, 0.11205132, 0.22008072, 0.010171907)); + target2 += mul(ng3, float4x4(0.03641146, 0.025730135, 0.088947766, 0.09581084, 0.18514295, 0.05196274, -0.09955554, 0.043848306, 0.09665611, -0.05949442, -0.037989084, 0.043330964, -0.046047594, 0.090160884, 0.06574573, -0.018593606)); + target2 += mul(nh3, float4x4(-0.26031247, -0.05067085, -0.07451936, -0.01263683, 0.13966191, -0.25842324, -0.115060754, -0.08976801, 0.028517777, 0.045588367, 0.2297454, 0.023451945, -0.13475016, 0.048971854, 0.04935944, -0.10817461)); + target2 += mul(ni3, float4x4(-0.044189412, 0.12302195, 0.05076291, -0.072933994, 0.22576593, 0.12513146, -0.020687684, -0.0017186786, 0.056137685, 0.07280331, -0.0060697175, 0.017558591, -0.19459185, -0.08931442, 0.03579924, -0.00051510497)); + target2 += float4(-0.088215575, 0.02001751, -0.0013112888, -0.0031276105); + + float4 target3 = mul(a1, float4x4(0.055708, -0.15470836, -0.18314275, -0.018972168, 0.0008025653, -0.04802735, 0.0037216125, -0.008888557, -0.044309124, 0.1032128, -0.09535111, 0.1075431, -0.061698865, -0.136952, -0.08298975, -0.03202739)); + target3 += mul(b1, float4x4(0.047130957, -0.13275343, 0.10046242, 0.14484632, -0.18798989, -0.01724291, -0.095696434, -0.06524662, -0.12395302, -0.057923865, 0.013821919, -0.19095008, -0.10312008, -0.067719445, 0.03039217, 0.002102062)); + target3 += mul(c1, float4x4(0.07914871, 0.03840256, -0.11512143, -0.19842817, -0.17087726, -0.117287606, 0.26407588, -0.028159037, -0.16280699, -0.1019244, 0.026774779, -0.06759367, 0.0024644772, 0.033856, -0.007847236, 0.028765628)); + target3 += mul(d1, float4x4(-0.07034455, 0.076142974, -0.22090098, -0.0905723, -0.06417895, 0.119223125, -0.26432338, -0.04371924, 0.16288432, 0.026691884, -0.017952124, 0.08947346, -0.1286289, -0.01910609, 0.04351911, 0.0340226)); + target3 += mul(e1, float4x4(0.14330725, 0.090986304, -0.1424256, 0.054584663, 0.043702085, -0.08414303, 0.001994348, -0.022233546, 0.03748274, 0.12121618, 0.26035795, 0.13496856, 0.3061306, 0.019047879, -0.043746773, 0.18116328)); + target3 += mul(f1, float4x4(-0.051031455, 0.0696392, 0.04753365, -0.20600007, 0.08226225, -0.055646114, 0.15932508, 0.0419586, -0.11326543, 0.027461074, -0.041595474, -0.10200617, 0.004414234, -0.085846625, 0.1470303, 0.15096648)); + target3 += mul(g1, float4x4(0.101050586, 0.15982646, 0.008072791, -0.11342946, 0.08270196, 0.08548463, 0.042926773, 0.06380147, 0.11114159, 0.07615307, -0.01628438, -0.082144625, 0.029875848, -0.020052845, 0.014533401, -0.027843053)); + target3 += mul(h1, float4x4(-0.0279601, -0.09164763, 0.11475252, 0.04266532, 0.17664109, -0.044317525, 0.038787685, 0.00897195, -0.065523826, 0.013996353, -0.109297335, -0.029989313, -0.025986332, -0.09013683, 0.24884683, 0.06528543)); + target3 += mul(i1, float4x4(-0.09584907, -0.15118982, -0.015254367, -0.12179126, -0.12146391, 0.15733819, -0.033256296, -0.061760996, -0.036719803, 0.16471127, 0.18006523, -0.056930948, 0.03617248, 0.07113426, -0.069748655, -0.081067815)); + target3 += mul(a2, float4x4(0.1271724, -0.082678355, 0.07997786, 0.06285082, 0.02332232, 0.05007377, -0.094914205, -0.06553253, -0.10122091, 0.012112823, -0.11796572, 0.021247976, 0.0654767, -0.091576956, 0.08175131, -0.010552305)); + target3 += mul(b2, float4x4(0.12505153, -0.037628997, -0.022449989, 0.06686099, -0.25006896, 0.13324498, 0.041733105, 0.2241118, 0.024380242, 0.09950468, 0.078383565, 0.11634127, 0.077024244, -0.07780778, 0.07760342, 0.06282892)); + target3 += mul(c2, float4x4(-0.13915282, 0.16686817, 0.030251533, -0.0035493453, -0.13203144, 0.033648454, 0.0024875028, -0.0007983041, -0.105395414, 0.1536483, 0.050240528, 0.11495208, -0.026644144, -0.05793395, -0.12098678, -0.065910175)); + target3 += mul(d2, float4x4(0.02292821, 0.030319002, -0.1293214, -0.0096194055, -0.01278381, -0.00087727525, 0.19325659, 0.025518872, -0.05107456, -0.14991362, -0.05873866, 0.12859605, -0.20932005, -0.11987684, -0.051870637, 0.001319446)); + target3 += mul(e2, float4x4(-0.022754941, 0.043839425, -0.08278873, -0.21222612, 0.0015371124, -0.010085336, 0.09510605, 0.07335702, -0.106798455, -0.12928678, 0.015216733, 0.031399984, -0.07811234, -0.119671986, 0.17570181, 0.029809073)); + target3 += mul(f2, float4x4(-0.11764911, -0.16164766, 0.08784963, -0.019233093, -0.076887585, -0.058506478, 0.08077385, -0.16966046, -0.24188527, -0.07365656, 0.09544133, 0.19833234, 0.09107925, -0.020520048, -0.05825717, -0.09854415)); + target3 += mul(g2, float4x4(0.03600886, -0.029253786, 0.048200432, 0.022130603, 0.13826382, -0.13885193, 0.20007242, 0.14829256, -0.017307537, 0.03851602, 0.020379594, 0.07832595, -0.07762187, 0.096413285, -0.079333976, -0.0061714468)); + target3 += mul(h2, float4x4(0.0413019, -0.07368758, 0.13919644, -0.12122368, -0.029388634, 0.10483587, -0.051654328, 0.015226432, -0.04520832, -0.026331404, 0.20372365, 0.06359042, -0.013045257, -0.10666548, 0.08962036, 0.20432319)); + target3 += mul(i2, float4x4(0.013157089, -0.034036867, 0.0819, 0.014009891, -0.03467534, -0.12812413, 0.18123335, -0.0781033, -0.2039025, -0.16503748, 0.02498213, 0.023839379, -0.13192852, -0.09351754, -0.045935795, -0.088439226)); + target3 += mul(a3, float4x4(0.17598471, -0.16652712, 0.04906223, 0.07156945, -0.019004462, -0.07228772, -0.030515088, 0.12137358, 0.049442984, 0.003075852, 0.0820677, 0.09503947, 0.15167919, 0.03480622, 0.055544864, 0.108532205)); + target3 += mul(b3, float4x4(0.06424813, 0.0047392054, -0.06604298, 0.065024786, -0.027760155, 0.013289014, -0.05930856, -0.22680816, -0.12812522, 0.046711236, 0.11081086, 0.12093126, 0.08999833, 0.09398781, -0.00391463, -0.013292052)); + target3 += mul(c3, float4x4(0.078218855, -0.096875966, -0.1891451, -0.075190805, 0.045807663, 0.038455345, 0.1420045, 0.1738224, 0.06848118, 0.18028922, -0.07149378, -0.16228504, -0.15232347, -0.032611012, -0.07023075, -0.12920822)); + target3 += mul(d3, float4x4(0.04663347, 0.0988432, 0.052362353, -0.112998225, -0.20248835, -0.19879234, 0.11022756, 0.10454231, -0.13743615, 0.047722638, 0.06637239, 0.016583467, 0.11989917, 0.0125074675, 0.053077225, -0.006272926)); + target3 += mul(e3, float4x4(-0.08468045, 0.047544964, 0.04363399, 0.086961746, 0.08489796, 0.12409043, -0.13015386, 0.10092822, 0.14706169, -0.102444105, -0.074901864, -0.11254591, 0.029065747, 0.14046147, 0.07324801, -0.015313643)); + target3 += mul(f3, float4x4(-0.0032504771, -0.025116406, -0.027151806, 0.04037948, -0.029422142, 0.053333733, 0.050427776, 0.2249123, -0.040938333, 0.05139012, -0.021061108, -0.21729107, 0.020586135, 0.04293995, 0.01888572, -0.15284136)); + target3 += mul(g3, float4x4(-0.050343722, -0.08038014, 0.033975042, -0.078313686, -0.025870735, -0.10589425, 0.11806239, 0.11905227, -0.030429581, -0.10916684, -0.08828011, -0.032881964, 0.005728985, -0.14882843, -0.058584355, 0.07463933)); + target3 += mul(h3, float4x4(-0.16999933, -0.027314415, 0.07264002, -0.013310814, -0.12945375, 0.016093813, -0.09084507, -0.12522581, 0.075081155, -0.012983989, 0.11086466, -0.020709865, -0.034555092, -0.13049836, -0.069538176, 0.120410606)); + target3 += mul(i3, float4x4(-0.041815765, -0.1464541, -0.112602025, -0.17897187, 0.023695359, -0.007984221, -0.09087018, 0.03442271, 0.03562612, -0.022015946, -0.0067399153, 0.038907483, -0.11839428, -0.029512445, 0.032437507, -0.13424557)); + target3 += mul(na1, float4x4(0.071081854, 0.064600624, 0.06933874, -0.00823228, -0.06739624, -0.05190142, -0.0063528903, -0.0056084343, -0.00883983, -0.1393001, 0.053884078, 0.024325706, 0.05893945, -0.075403966, 0.21418992, 0.099977955)); + target3 += mul(nb1, float4x4(-0.08398666, 0.06117285, 0.018424282, 0.13809077, -0.07201819, 0.051259644, -0.04685134, -0.017006194, 0.05818578, -0.11379136, -0.07999673, 0.23295905, 0.007356084, -0.020284122, 0.01972096, -0.13002637)); + target3 += mul(nc1, float4x4(-0.06733669, 0.13325273, -0.0074489512, -0.052333828, 0.10027424, 0.065753184, -0.14192791, 0.09388921, -0.01242138, -0.14718066, -0.014753866, -0.065210566, 0.0699064, 0.06399467, 0.022925656, 0.06504557)); + target3 += mul(nd1, float4x4(0.101876445, 0.060120665, -0.0039521665, 0.12171173, 0.08321828, -0.008348968, 0.21899523, 0.058748752, 0.05547674, 0.16084124, -0.30695668, -0.10121366, 0.038653154, -0.044442136, -0.13552639, -0.019972218)); + target3 += mul(ne1, float4x4(-0.07638072, 0.050575085, 0.07061123, -0.18657742, -0.012248586, 0.019414622, 0.03041808, 0.033964135, -0.17578666, -0.023182971, -0.08965867, -0.13880058, -0.16309536, 0.17266575, -0.17651099, -0.24348558)); + target3 += mul(nf1, float4x4(-0.14318372, -0.002566858, -0.08960772, -0.025085822, -0.002079447, 0.010120887, -0.09830438, -0.11765062, 0.022343377, -0.025783114, -0.029105041, -0.1690584, 0.054205775, 0.02676286, 0.016028486, 0.120592885)); + target3 += mul(ng1, float4x4(0.14526334, 0.09275921, -0.12105369, -0.038859725, -0.10460921, -0.07294215, -0.15117784, -0.009182169, -0.0074104583, -0.12306472, 0.10073853, -0.08833498, 0.12785646, 0.0477829, -0.03402452, -0.07908741)); + target3 += mul(nh1, float4x4(-0.025889793, 0.014548265, 0.029771648, -0.07727682, 0.041268997, 0.08237273, -0.07722456, -0.036970172, 0.09158823, 0.044813015, -0.019759692, -0.112869464, -0.04357199, -0.07405958, -0.124406114, 0.20240584)); + target3 += mul(ni1, float4x4(-0.08556598, -0.01543713, 0.026491836, 0.018786263, 0.0418143, 0.0678302, -0.11946711, 0.09875955, 0.032350425, 0.007956311, -0.017798368, 0.1994804, -0.027886698, -0.17802258, 0.099619284, -0.011239122)); + target3 += mul(na2, float4x4(-0.36927477, 0.0397264, 0.14609286, 0.065389656, -0.017865075, 0.113564, 0.14015609, 0.054612216, -0.0342091, -0.030581282, -0.0124170035, 0.03166654, 0.0691441, 0.032685474, -0.16473754, -0.10027306)); + target3 += mul(nb2, float4x4(-0.027898287, 0.037473463, -0.10177491, -0.15948737, -0.08981485, 0.0764328, -0.06419195, -0.085592985, -0.015740823, -0.052377183, 0.07003385, -0.065375, 0.051523235, 0.04340368, 0.10867685, -0.16211551)); + target3 += mul(nc2, float4x4(0.007090963, -0.02692243, 0.05383495, 0.14827509, -0.105507806, 0.17903765, 0.13615972, 0.0051062405, 0.08153507, 0.05720539, 0.08144471, 0.0929691, 0.09873174, 0.015049897, 0.23769383, 0.22297786)); + target3 += mul(nd2, float4x4(-0.08985236, -0.076104425, -0.01007519, 0.034048676, -0.0079994, -0.033355482, 0.16036998, -0.053786088, -0.093155414, 0.05777472, -0.13322827, -0.0813691, 0.24432959, 0.08388064, -0.04998493, -0.021753525)); + target3 += mul(ne2, float4x4(-0.016286949, -0.013190527, 0.053851254, 0.046217382, -0.21881466, 0.07689005, -0.12487547, -0.10310683, -0.02934103, -0.084740095, -0.054879915, -0.06519303, -0.15657778, 0.029417856, -0.13291313, -0.103854224)); + target3 += mul(nf2, float4x4(0.11695019, 0.0132304765, -0.07342763, 0.051626842, -0.115028076, 0.060695976, 0.030592902, 0.07832676, -0.033096768, -0.010105935, -0.0968592, -0.17071666, -0.10127668, -0.026590502, 0.05544078, -0.22503363)); + target3 += mul(ng2, float4x4(0.053587623, 0.013554916, 0.0018153706, 0.0050241053, 0.007109888, 0.049959134, -0.05311281, -0.09651782, -0.15021992, 0.041716605, 0.031055149, -0.04614386, 0.1668338, -0.15733725, 0.05505452, -0.04836756)); + target3 += mul(nh2, float4x4(-0.077188395, -0.058547955, 0.03399098, 0.09912107, -0.03275195, -0.13739568, -0.08232234, 0.06831293, -0.070714585, -0.046675168, -0.11615044, -0.119989395, -0.03131107, -0.09919153, 0.003835856, -0.014355857)); + target3 += mul(ni2, float4x4(-0.036215, 0.018938174, -0.2277618, -0.13956094, -0.07911919, -0.063870676, 0.08332067, 0.061556723, 0.038459476, 0.15356061, 0.007937132, 0.049789228, -0.0977846, -0.06580731, -0.092308916, 0.12081035)); + target3 += mul(na3, float4x4(0.2513099, 0.2640892, -0.073300436, 0.0054640956, 0.021276288, 0.117054164, -0.10756317, -0.10598032, -0.045152083, 0.08731703, -0.18050396, -0.047249332, -0.073264845, 0.2116926, -0.114557505, -0.037215512)); + target3 += mul(nb3, float4x4(0.050166927, -0.04862805, 0.12805791, 0.0045228424, 0.056160565, 0.16115089, -0.07979352, -0.13011862, 0.05441418, 0.05797822, -0.13112345, -0.025642958, 0.05028941, -0.03776722, -0.030840462, 0.1557417)); + target3 += mul(nc3, float4x4(-0.13133498, 0.18729036, 0.09921492, 0.08116472, -0.045803983, 0.26691306, -0.074901216, 0.27606857, -0.008125972, 0.042414363, 0.13946676, 0.08842948, 0.08357318, -0.03671059, -0.16490772, 0.1321214)); + target3 += mul(nd3, float4x4(-0.065409325, -0.0521094, -0.16489594, 0.13398097, 0.059531994, 0.12008558, -0.3398136, 0.1359767, 0.19906406, -0.07998507, 0.030024389, 0.07742193, -0.17542136, -0.009348887, -0.07117329, 0.03772329)); + target3 += mul(ne3, float4x4(-0.058133047, -0.16653563, -0.0063957074, -0.095268235, -0.17482235, 0.059023783, 0.122984484, -0.34188032, -0.20109126, 0.18325296, 0.14055713, -0.10793852, 0.011646871, -0.061308336, -0.061341055, -0.021440659)); + target3 += mul(nf3, float4x4(0.078113094, -0.09492607, 0.08023962, -0.12604296, 0.109075874, -0.0154309245, 0.06649317, 0.06254269, 0.07463966, -0.073904, 0.05772617, 0.26408893, -0.006501864, -0.07582579, -0.10127933, -0.12402614)); + target3 += mul(ng3, float4x4(-0.042008914, 0.09461804, -0.072341286, 0.080054514, 0.14365824, 0.04930919, -0.099516146, -0.008121477, -0.0093559455, 0.10470606, 0.02927817, 0.021877058, -0.054930143, 0.060183078, -0.0445749, -0.01106447)); + target3 += mul(nh3, float4x4(-0.0011625461, -0.0009088538, -0.023627708, 0.027977956, -0.11017806, -0.26268825, -0.011429036, -0.03145088, 0.020097682, -0.029126195, -0.06067577, 0.069737315, -0.059665915, 0.0012559243, 0.010016551, -0.09414456)); + target3 += mul(ni3, float4x4(0.11869016, 0.20854239, 0.0059952354, -0.05854996, -0.019913383, 0.111083195, -0.110878445, -0.09330779, -0.09355048, -0.023232793, -0.028993065, -0.016969083, -0.046021197, 0.120301165, -0.016181333, 0.121419206)); + target3 += float4(0.13923971, 0.015290389, 0.012198976, 0.04480318); + + tex4[gxy] = target1; + tex5[gxy] = target2; + tex6[gxy] = target3; +} + +//!PASS 5 +//!DESC Conv-4x3x3x24 +//!IN tex4, tex5, tex6 +//!OUT tex1, tex2, tex3, tex7 +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 + +void Pass5(uint2 blockStart, uint3 threadId) { + uint2 gxy = Rmp8x8(threadId.x) + blockStart; + uint2 inputSize = GetInputSize(); + if (gxy.x >= inputSize.x || gxy.y >= inputSize.y) { + return; + } + + float2 inputPt = GetInputPt(); + float2 pos = (gxy + 0.5f) * inputPt; + + // [ a, d, g ] + // [ b, e, h ] + // [ c, f, i ] + float4 a1 = tex4.SampleLevel(sam, pos + float2(-inputPt.x, -inputPt.y), 0); + float4 b1 = tex4.SampleLevel(sam, pos + float2(-inputPt.x, 0), 0); + float4 c1 = tex4.SampleLevel(sam, pos + float2(-inputPt.x, inputPt.y), 0); + float4 d1 = tex4.SampleLevel(sam, pos + float2(0, -inputPt.y), 0); + float4 e1 = tex4.SampleLevel(sam, pos, 0); + float4 f1 = tex4.SampleLevel(sam, pos + float2(0, inputPt.y), 0); + float4 g1 = tex4.SampleLevel(sam, pos + float2(inputPt.x, -inputPt.y), 0); + float4 h1 = tex4.SampleLevel(sam, pos + float2(inputPt.x, 0), 0); + float4 i1 = tex4.SampleLevel(sam, pos + float2(inputPt.x, inputPt.y), 0); + + float4 na1 = max(-a1, 0); + float4 nb1 = max(-b1, 0); + float4 nc1 = max(-c1, 0); + float4 nd1 = max(-d1, 0); + float4 ne1 = max(-e1, 0); + float4 nf1 = max(-f1, 0); + float4 ng1 = max(-g1, 0); + float4 nh1 = max(-h1, 0); + float4 ni1 = max(-i1, 0); + + a1 = max(a1, 0); + b1 = max(b1, 0); + c1 = max(c1, 0); + d1 = max(d1, 0); + e1 = max(e1, 0); + f1 = max(f1, 0); + g1 = max(g1, 0); + h1 = max(h1, 0); + i1 = max(i1, 0); + + float4 a2 = tex5.SampleLevel(sam, pos + float2(-inputPt.x, -inputPt.y), 0); + float4 b2 = tex5.SampleLevel(sam, pos + float2(-inputPt.x, 0), 0); + float4 c2 = tex5.SampleLevel(sam, pos + float2(-inputPt.x, inputPt.y), 0); + float4 d2 = tex5.SampleLevel(sam, pos + float2(0, -inputPt.y), 0); + float4 e2 = tex5.SampleLevel(sam, pos, 0); + float4 f2 = tex5.SampleLevel(sam, pos + float2(0, inputPt.y), 0); + float4 g2 = tex5.SampleLevel(sam, pos + float2(inputPt.x, -inputPt.y), 0); + float4 h2 = tex5.SampleLevel(sam, pos + float2(inputPt.x, 0), 0); + float4 i2 = tex5.SampleLevel(sam, pos + float2(inputPt.x, inputPt.y), 0); + + float4 na2 = max(-a2, 0); + float4 nb2 = max(-b2, 0); + float4 nc2 = max(-c2, 0); + float4 nd2 = max(-d2, 0); + float4 ne2 = max(-e2, 0); + float4 nf2 = max(-f2, 0); + float4 ng2 = max(-g2, 0); + float4 nh2 = max(-h2, 0); + float4 ni2 = max(-i2, 0); + + a2 = max(a2, 0); + b2 = max(b2, 0); + c2 = max(c2, 0); + d2 = max(d2, 0); + e2 = max(e2, 0); + f2 = max(f2, 0); + g2 = max(g2, 0); + h2 = max(h2, 0); + i2 = max(i2, 0); + + float4 a3 = tex6.SampleLevel(sam, pos + float2(-inputPt.x, -inputPt.y), 0); + float4 b3 = tex6.SampleLevel(sam, pos + float2(-inputPt.x, 0), 0); + float4 c3 = tex6.SampleLevel(sam, pos + float2(-inputPt.x, inputPt.y), 0); + float4 d3 = tex6.SampleLevel(sam, pos + float2(0, -inputPt.y), 0); + float4 e3 = tex6.SampleLevel(sam, pos, 0); + float4 f3 = tex6.SampleLevel(sam, pos + float2(0, inputPt.y), 0); + float4 g3 = tex6.SampleLevel(sam, pos + float2(inputPt.x, -inputPt.y), 0); + float4 h3 = tex6.SampleLevel(sam, pos + float2(inputPt.x, 0), 0); + float4 i3 = tex6.SampleLevel(sam, pos + float2(inputPt.x, inputPt.y), 0); + + float4 na3 = max(-a3, 0); + float4 nb3 = max(-b3, 0); + float4 nc3 = max(-c3, 0); + float4 nd3 = max(-d3, 0); + float4 ne3 = max(-e3, 0); + float4 nf3 = max(-f3, 0); + float4 ng3 = max(-g3, 0); + float4 nh3 = max(-h3, 0); + float4 ni3 = max(-i3, 0); + + a3 = max(a3, 0); + b3 = max(b3, 0); + c3 = max(c3, 0); + d3 = max(d3, 0); + e3 = max(e3, 0); + f3 = max(f3, 0); + g3 = max(g3, 0); + h3 = max(h3, 0); + i3 = max(i3, 0); + + float4 target1 = mul(a1, float4x4(0.027190452, 0.0060910345, -0.008547152, 0.17320672, 0.06733503, -0.08989388, -0.11381129, -0.13119508, 0.17610823, 0.14008744, 0.11026499, -0.21357119, -0.12159518, 0.06601897, -0.034462526, -0.06805842)); + target1 += mul(b1, float4x4(0.032029126, -0.17226543, -0.041954145, 0.0048979674, 0.07860925, 0.014572411, 0.028136868, 0.023380699, 0.08869984, 0.066781156, 0.054681987, -0.2045243, -0.08229035, 0.034414835, -0.059059203, 0.123423755)); + target1 += mul(c1, float4x4(0.06395383, -0.17036091, -0.09632937, 0.012491044, 0.023212979, 0.0016467012, -0.14969939, -0.0054716296, -0.023756625, -0.17073572, 0.052645937, -0.046952818, -0.16187616, 0.016573654, -0.14689016, 0.01019834)); + target1 += mul(d1, float4x4(0.08193712, -0.07631574, -0.034434203, -0.014776324, 0.042278692, -0.1091839, -0.10186231, -0.08016388, -0.036329824, -0.27691782, -0.060328513, -0.21892257, 0.039156485, -0.015808448, 0.063398294, -0.045008957)); + target1 += mul(e1, float4x4(-0.1413053, -0.04867498, -0.06696859, -0.19319332, 0.06924486, 0.10097274, 0.027635809, -0.25744498, 0.043045916, 0.0080625275, -0.078129664, 0.07637907, 0.08766779, 0.009869328, -0.04087825, -0.107835)); + target1 += mul(f1, float4x4(0.03251173, -0.088434696, -0.17404701, -0.047607604, 0.19409397, -0.011666368, -0.055492543, -0.06779062, 0.18695107, 0.12933761, 0.009486838, 0.1311912, -0.115678646, -0.15206106, -0.0692949, -0.2093353)); + target1 += mul(g1, float4x4(-0.024145309, -0.049262546, -0.13907287, 0.079473436, -0.042634737, -0.08339864, 0.10169023, -0.035110317, -0.07373649, -0.013395292, 0.040008895, -0.10978444, -0.11845739, -0.037593327, -0.06392299, -0.16472307)); + target1 += mul(h1, float4x4(-0.004245749, -0.017990965, -0.16623773, 0.058491312, 0.09169293, 0.095187806, -0.13777736, -0.058859553, 0.12717004, -0.21097647, 0.022213815, -0.060391422, 0.24919353, 0.027743122, -0.046835132, 0.05116896)); + target1 += mul(i1, float4x4(-0.031152543, -0.006675389, -0.20609254, 0.059274126, 0.057716113, 0.010372987, -0.09142726, 0.21968524, 0.1961135, -0.123708576, 0.16263476, 0.0062686265, 0.014965539, -0.007153107, -0.11750436, -0.1819159)); + target1 += mul(a2, float4x4(-0.0060456856, 0.19447032, 0.020056425, 0.11960106, -0.32920054, 0.015612619, 0.26585084, 0.10356409, -0.14553185, 0.00058173627, 0.05271928, -0.1452066, -0.060218733, -0.020830099, -0.10317562, 0.052465137)); + target1 += mul(b2, float4x4(-0.27812362, 0.058981895, 0.08322605, -0.0032075725, -0.15221997, 0.09520731, 0.04914796, 0.11785509, 0.013318352, -0.10878859, -0.15916938, -0.18263555, -0.05563399, 0.014653972, 0.14075124, -0.057639994)); + target1 += mul(c2, float4x4(-0.0041990946, 0.0977939, -0.10445638, 0.020671595, -0.051427394, -0.026315004, -0.17141542, -0.19342242, 0.18054874, -0.15474714, 0.13021101, 0.11164268, 0.09080831, 0.036626425, -0.082300276, 0.04107306)); + target1 += mul(d2, float4x4(-0.039793264, 0.14146407, 0.09102857, 0.03839708, 0.3213411, -0.037526935, 0.26050022, 0.05215784, 0.09104371, 0.1189446, 0.1516196, -0.06040828, 0.06444251, 0.03769561, -0.05992374, -0.09555435)); + target1 += mul(e2, float4x4(-0.3158521, -0.09743379, -0.16136461, 0.12563957, -0.047199205, 0.14175804, 0.26343465, 0.26441336, -0.08041752, 0.12452204, 0.00063982303, -0.13609244, 0.2354998, 0.00049649493, 0.015294863, -0.2654468)); + target1 += mul(f2, float4x4(-0.08709678, 0.15577738, 0.05169841, 0.07911614, -0.024321338, -0.015250634, -0.021416046, -0.081399545, 0.0089286, -0.2259574, -0.05061959, 0.065474294, -0.030742366, -0.03538435, -0.055524804, 0.15507819)); + target1 += mul(g2, float4x4(0.045065995, 0.023564292, -0.037309248, 0.06847233, 0.056869928, 0.028326921, -0.17528678, 0.12857448, 0.035632227, -0.032293174, 0.104832776, 0.017997067, -0.114497125, 0.16921379, 0.12497218, 0.036903612)); + target1 += mul(h2, float4x4(0.075956464, 0.09397675, 0.052031025, -0.105377, -0.12632053, 0.024217378, -0.07852874, 0.11461346, -0.04082505, -0.108691104, -0.04474934, -0.29607844, 0.034042932, 0.12287652, -0.052040536, 0.041936204)); + target1 += mul(i2, float4x4(-0.038337763, -0.018111536, 0.06151811, 0.05389662, -0.028443024, 0.08706589, -0.073154494, 0.05447222, 0.07653834, -0.19515261, -0.037622564, 0.08052142, -0.045269065, -0.0609327, -0.100833364, 0.10981602)); + target1 += mul(a3, float4x4(0.094026454, -0.0031063687, -0.21620432, 0.13547292, 0.20105883, -0.025618935, 0.11542153, 0.10962974, 0.113429956, -0.14227262, 0.0060875076, -0.14874603, 0.09162232, -0.053849343, 0.04125156, 0.032826412)); + target1 += mul(b3, float4x4(0.013978522, -0.13269992, -0.07810451, 0.070542224, -0.04335991, 0.13381198, -0.027735049, -0.15146035, 0.22838825, -0.064607605, 0.09653002, -0.12548994, 0.13875695, -0.07963269, 0.17691031, -0.09219512)); + target1 += mul(c3, float4x4(-0.3725075, -0.10551151, -0.015794966, 0.11881437, 0.032990977, -0.08120358, -0.028089223, 0.07270803, 0.09375988, -0.19002074, 0.042594276, -0.14296396, 0.058286652, 0.027516257, -0.06983339, -0.21678405)); + target1 += mul(d3, float4x4(-0.07584593, -0.030345742, -0.102612115, -0.008622554, 0.19179675, -0.007445088, -0.0055725924, 0.045661647, 0.15045294, 0.05527889, -0.16074698, -0.11140143, -0.10332519, 0.0775829, 0.3479224, -0.09605363)); + target1 += mul(e3, float4x4(0.24224567, -0.10463845, -0.004708288, -0.037463564, -0.174914, -0.12728058, -0.09033664, -0.07400692, -0.14376171, 0.047589123, 0.12197598, 0.10113545, 0.27015212, -0.034403134, 0.1424642, 0.160263)); + target1 += mul(f3, float4x4(-0.13663313, -0.1106191, 0.011357531, -0.22931215, -0.019929864, -0.10682277, -0.055398542, 0.066238664, -0.085308366, 0.04024022, 0.12161912, 0.08610841, 0.09498895, -0.06681962, 0.13027692, -0.0019338574)); + target1 += mul(g3, float4x4(-0.03641036, -0.011318962, 0.110239714, 0.11487314, -0.0893917, 0.15007862, 0.027590204, 0.09350642, 0.024954673, 0.12835681, 0.03920746, 0.09515919, -0.1465032, -0.030845147, -0.1298204, -0.13092597)); + target1 += mul(h3, float4x4(-0.053689882, -0.013590492, 0.14078104, -0.02906744, -0.028918952, -0.05751785, -0.15884842, -0.26478568, 0.13566354, 0.12888497, -0.07389985, -0.10991238, -0.04350177, 0.056619987, -0.007795586, 0.20150684)); + target1 += mul(i3, float4x4(-0.24407062, 0.21552294, -0.00949639, 0.06383184, -0.021686498, -0.3234789, 0.00095171423, 0.16604368, 0.21007693, -0.23288599, 0.14941412, -0.23804995, -0.041001838, 0.122981116, -0.08457904, 0.31631222)); + target1 += mul(na1, float4x4(-0.03347639, -0.11116802, -0.024119927, -0.13334364, -0.06425279, 0.034693595, -0.042770308, -0.17312396, -0.067923695, 0.016072923, -0.11040154, -0.17093144, 0.0015578474, -0.29394698, 0.107074894, 0.27303827)); + target1 += mul(nb1, float4x4(-0.0611658, 0.019790849, 0.06787951, 0.10454345, -0.015665758, 0.0151002975, 0.03526049, -0.103849605, 0.18519226, 0.13797036, -0.061827153, 0.049401954, -0.14499283, -0.019294523, -0.059974186, 0.08248854)); + target1 += mul(nc1, float4x4(-0.10331019, 0.013611227, 0.06224777, 0.051212363, 0.07831132, 0.10166972, 0.06203761, -0.18489413, 0.15709174, 0.10225166, -0.047563914, 0.07839388, 0.111176215, -0.17445758, -0.025798218, 0.039074145)); + target1 += mul(nd1, float4x4(-0.0126109915, 0.1351571, -0.036555156, 0.010697993, -0.13778222, 0.03346138, -0.0049093324, -0.15003881, -0.03876987, 0.07914351, 0.047344975, 0.11449459, 0.063460924, -0.08697232, 0.10283146, 0.051968753)); + target1 += mul(ne1, float4x4(0.23186366, -0.06041623, -0.16257766, 0.24217394, -0.023535172, -0.101410136, -0.108250454, 0.107450925, 0.034496274, -0.028800279, 0.021022853, 0.03616355, 0.02028369, -0.08332956, 0.10570706, 0.09971033)); + target1 += mul(nf1, float4x4(0.04147743, 0.015145005, 0.120189026, -0.068185546, 0.046765327, 0.06456099, -0.1020187, 0.021370325, -0.040851895, -0.03208752, 0.048594363, -0.1198498, 0.068069115, 0.041555826, -0.17036118, -0.01932193)); + target1 += mul(ng1, float4x4(0.056585032, 0.08170861, 0.16936389, 0.12775362, -0.06250441, 0.003437123, -0.1626591, -0.044595372, 0.05609032, -0.013985337, 0.12408558, -0.023731874, 0.06669848, 0.015816472, 0.02028663, 0.15866788)); + target1 += mul(nh1, float4x4(0.08446122, 0.18007189, -0.029043732, -0.011163938, -0.07911146, -0.08956735, 0.01947308, -0.14794883, 0.006629651, 0.038349632, -0.00968828, -0.025770634, -0.0773972, 0.005243162, -0.024193848, 0.13965817)); + target1 += mul(ni1, float4x4(0.11081664, 0.014651672, 0.17688385, -0.105908446, 0.10568161, -0.0114132725, -0.07771328, -0.07368131, -0.08784887, 0.000283126, -0.062638454, 0.10225453, 0.03358641, 0.022887172, -0.05419985, 0.13735344)); + target1 += mul(na2, float4x4(0.10541027, 0.020751795, -0.09398483, -0.005489149, -0.29769272, 0.23499025, -0.006691222, -0.053000394, 0.010389082, 0.17603737, -0.00460357, 0.022672169, 0.184428, -0.05348439, -0.056355994, -0.09495365)); + target1 += mul(nb2, float4x4(0.0008888126, -0.07352942, -0.115427524, 0.039416842, 0.035075482, 0.064889066, -0.0403974, -0.16294649, 0.15031078, 0.15975513, 0.050580446, 0.17225175, -0.15042374, 0.1044681, -0.020698681, 0.02006514)); + target1 += mul(nc2, float4x4(-0.04267897, 0.013600698, -0.06688994, 0.06905151, 0.0050800233, 0.074999094, -0.013612523, 0.24658114, 0.09293767, -0.025656242, -0.12935342, -0.053077035, -0.10818674, 0.10712919, 0.10325497, 0.026742944)); + target1 += mul(nd2, float4x4(0.057898734, -0.079083994, -0.014326936, -0.012377722, -0.081788406, 0.15159677, 0.009859493, -0.17867896, -0.15591973, 0.052071776, 0.08789029, -0.07519902, -0.05066772, -0.062322497, 0.115281776, 0.036021948)); + target1 += mul(ne2, float4x4(0.18813054, 0.08132526, 0.13596503, -0.048313983, 0.38620186, 0.2359013, 0.037454955, -0.1447747, 0.067145094, -0.0005996448, 0.1840271, 0.05323988, -0.23532471, -0.0116497595, 0.2535536, 0.061556816)); + target1 += mul(nf2, float4x4(0.0129419975, -0.17229463, -0.09436541, 0.10180941, 0.11799404, 0.031389806, -0.07010608, 0.0046768254, 0.10469505, 0.17582805, -0.22139175, -0.14195564, -0.02746759, 0.1141511, -0.029968468, 0.07361169)); + target1 += mul(ng2, float4x4(-0.0769514, 0.017098518, 0.082954735, 0.025435448, -0.21867949, -0.07731593, 0.031622138, -0.013084908, 0.053551342, 0.08035211, -0.06418101, -0.14921196, 0.18860011, 0.029326573, -0.0472363, -0.011997928)); + target1 += mul(nh2, float4x4(-0.01178925, -0.07107687, -0.09878797, 0.1556755, -0.055202577, -0.040342607, -0.1087109, 0.22202995, -0.02957374, 0.063299805, -0.0226507, 0.09204488, 0.08155232, -0.022691648, 0.061842438, -0.003388257)); + target1 += mul(ni2, float4x4(-0.0058287196, -0.013047009, -0.15424606, -0.056314673, -0.06388496, 0.0222499, -0.11188726, 0.2635107, -0.05954232, 0.1667741, -0.12295786, -0.15182652, 0.1224556, -0.1186777, -0.011522621, -0.09436076)); + target1 += mul(na3, float4x4(0.07150499, -0.07419667, 0.16062357, -0.13254762, -0.010069923, 0.09393101, 0.035834856, -0.043301247, 0.059349176, 0.015473052, 0.06563933, -0.013041895, 0.029431, 0.11289305, 0.08899771, 0.16794808)); + target1 += mul(nb3, float4x4(-0.113425404, 0.14999859, 0.06650979, 0.036482334, 0.018955054, -0.10026139, 0.11925662, 0.114249855, 0.06869671, 0.052254554, -0.004852112, 0.0565278, 0.078193806, 0.05062573, 0.03250799, 0.19846839)); + target1 += mul(nc3, float4x4(0.021927554, -0.1345216, -0.0016766218, -0.13956897, -0.045278247, -0.0069249924, 0.006003127, 0.07814754, 0.10342034, 0.06784387, -0.069491945, 0.19103162, 0.14311132, -0.022440588, -0.06932795, 0.030535521)); + target1 += mul(nd3, float4x4(-0.04036147, 0.054757025, 0.017254664, -0.12124264, -0.1816484, 0.15580839, -0.09062968, -0.0048705437, -0.029410018, 0.038827926, 0.057098128, -0.018173074, -0.10805557, -0.14378877, -0.2585165, 0.172119)); + target1 += mul(ne3, float4x4(-0.1310388, 0.18337108, 0.19657819, -0.010367786, -0.04445844, -0.24680386, -0.04328972, -0.0399127, 0.12341645, -0.08352961, 0.011123786, -0.083505794, -0.09089909, 0.060027592, -0.23706149, 0.03521439)); + target1 += mul(nf3, float4x4(0.01557783, 0.010480741, 0.0434283, 0.16624042, -0.15881334, -0.04636994, -0.0038111496, 0.03575316, -0.08781109, 0.12979223, 0.06802427, 0.08255704, 0.37816545, -0.058951244, -0.102753684, 0.1256413)); + target1 += mul(ng3, float4x4(-0.10425998, -0.071307346, -0.11617004, -0.13080333, 0.1492051, 0.054852143, 0.07140254, -0.064901225, 0.0023687668, 0.012650793, -0.1390397, -0.09889024, 0.19282119, -0.04274883, 0.1678261, 0.10092644)); + target1 += mul(nh3, float4x4(0.052412614, -0.016467815, -0.08627941, 0.21175376, -0.037298422, 0.009408156, 0.09253116, 0.22531977, -0.09862147, 0.012014097, -0.00088612316, 0.10639377, 0.21262354, -0.36476177, 0.1831788, -0.18416084)); + target1 += mul(ni3, float4x4(0.10780807, -0.049085826, -0.035806093, 0.089742415, -0.121957704, -0.07614303, 0.1122783, -0.1417334, -0.11307489, -0.099186234, -0.09983688, -0.08203866, 0.18696213, -0.10846918, 0.022843426, 0.17075616)); + target1 += float4(-0.10820368, 0.052109707, 0.02658453, -0.089495786); + + float4 target2 = mul(a1, float4x4(-0.06560893, -0.038288042, -0.0021071879, -0.030108955, 0.145761, 0.0029613946, 0.051950503, -0.015247062, 0.44679, 0.114423126, -0.006614156, -0.085114725, -0.17392384, -0.1525023, 0.00087433326, -0.0061209374)); + target2 += mul(b1, float4x4(-0.038765047, 0.023672441, 0.07686677, 0.1169065, 0.057648882, -0.04956052, 0.18272647, 0.074001, 0.0148019185, -0.17424357, -0.15635398, -0.11640745, -0.044930972, 0.17733482, -0.118420936, 0.0034517103)); + target2 += mul(c1, float4x4(-0.03843906, 0.14669247, -0.0016725688, -0.05404641, -0.010653548, -0.14568646, 0.01552742, 0.0075000613, -0.11138789, 0.12747082, -0.0019283098, 0.15637173, 0.17695609, 0.11176842, 0.037749417, 0.038456965)); + target2 += mul(d1, float4x4(0.011113179, -0.033781096, 0.10000893, 0.09236021, 0.05682521, 0.047795758, 0.082160555, -0.06516607, 0.021327825, 0.123461336, 0.16531587, -0.017066834, -0.17193775, 0.0088722, 0.11325116, -0.008696895)); + target2 += mul(e1, float4x4(-0.1559535, -0.027437076, -0.06791055, 0.0076806503, -0.105000794, -0.013547857, 0.044852357, -0.072031856, 0.03666842, -0.09417821, 0.044465255, -0.021518283, 0.075612575, 0.12548204, 0.0053096185, -0.081135504)); + target2 += mul(f1, float4x4(-0.032854624, -0.04636654, 0.08900102, -0.006676651, -0.17161772, -0.11203611, -0.08199468, -0.09992361, 0.20184253, -0.1002281, -0.1186801, 0.07690125, 0.10468101, -0.034323484, 0.05079439, 0.05624683)); + target2 += mul(g1, float4x4(0.098402895, 0.21312171, -0.09616754, -0.0022171456, 0.13993289, 0.020528518, 0.14474267, -0.10080646, -0.1283229, 0.1904186, -0.040573347, -0.14794436, 0.054999832, -0.11960501, -0.061369505, 0.09603712)); + target2 += mul(h1, float4x4(-0.10725682, 0.06215029, 0.089609645, 0.018108908, 0.021400819, 0.031146, -0.22904995, -0.01076689, -0.105205126, 0.012291847, -0.048588227, -0.049485933, 0.114158444, -0.091215335, -0.027073242, -0.11835295)); + target2 += mul(i1, float4x4(-0.102791235, -0.029520744, -0.19900851, -0.029541757, -0.031764254, -0.008002707, -0.017105635, -0.07239135, 0.14740342, 0.05648717, 0.077909015, -0.14993371, 0.120271415, -0.10764749, 0.024895139, -0.06620364)); + target2 += mul(a2, float4x4(0.23614062, 0.17541821, -0.008834044, 0.18276002, 0.0081810225, 0.08408151, -0.13527961, -0.018539876, 0.014361589, -0.027012244, -0.17484863, -0.019362496, -0.037048925, 0.094974704, 0.018246485, 0.109574154)); + target2 += mul(b2, float4x4(-0.1533575, 0.19374342, -0.027817149, 0.16140993, -0.06192059, 0.045258347, -0.09625185, -0.026630063, -0.0050361003, 0.020038875, 0.17793919, 0.059639167, 0.079904884, 0.03772698, 0.07656081, 0.21176697)); + target2 += mul(c2, float4x4(0.03496418, -0.07980854, -0.022122597, -0.15199453, -0.029270291, 0.02720027, 0.10541389, -0.020044396, 0.031097332, 0.00533792, -0.07936573, 0.0767852, -0.052802965, 0.044324324, 0.1331397, 0.09737042)); + target2 += mul(d2, float4x4(-0.09404921, -0.12238693, -0.15260863, -0.037168942, 0.101774864, -0.12818033, -0.19276977, 0.060901154, 0.3669953, -0.08837079, 0.09483071, 0.0039528203, 0.114874505, 0.11380748, -0.0675627, 0.099314205)); + target2 += mul(e2, float4x4(-0.18921007, 0.11088719, -0.03879293, 0.24393363, 0.024074616, -0.055593442, -0.038904842, 0.093477115, -0.074254654, 0.023504809, 0.0015475574, 0.06922074, -0.02201723, 0.04952918, -0.12691462, -0.04520855)); + target2 += mul(f2, float4x4(-0.015887981, 0.13304926, -0.006745367, 0.08113083, 0.14956935, -0.115906075, -0.14784655, 0.030012615, 0.031657662, -0.065392576, 0.26881677, 0.060661886, -0.022231037, -0.04828739, 0.09894193, -0.14562485)); + target2 += mul(g2, float4x4(-0.047161587, -0.017991489, -0.0075016962, -0.034034126, -0.061112147, 0.13156408, 0.16217458, 0.076580904, 0.1459869, 0.11071404, -0.043128885, 0.0338223, 0.21686563, 0.008266244, 0.058333807, 0.02561811)); + target2 += mul(h2, float4x4(-0.018609803, 0.0234848, 0.040451016, -0.08435358, -0.009784489, -0.008065147, -0.053126886, 0.011366649, -0.084467, -0.1788947, -0.12264094, -0.18014608, 0.059439298, 0.03542411, 0.078848965, -0.13048537)); + target2 += mul(i2, float4x4(0.078216806, 0.013697004, -0.15663616, -0.049786724, -0.13391373, -0.08318028, 0.06794668, 0.09373982, -0.083461255, 0.061056722, -0.2251907, -0.06139379, -0.20027658, -0.09285312, 0.039336286, 0.09701935)); + target2 += mul(a3, float4x4(-0.16103904, -0.102670334, 0.0012198326, -0.22724585, 0.23467462, 0.044629287, 0.0045051533, 0.08221795, 0.13965432, -0.025059564, 0.009324332, 0.17598952, 0.10017599, 0.043154277, 0.09106905, 0.004035487)); + target2 += mul(b3, float4x4(-0.044398602, -0.02080209, 0.07439402, -0.0837648, -0.09127961, -0.16654146, -0.028559506, 0.063172385, 0.02517883, -0.2839795, -0.011589502, -0.07898659, -0.013581755, -0.18534079, -0.0017158306, 0.105475046)); + target2 += mul(c3, float4x4(0.104462, 0.27500334, -0.16876803, -0.067298174, -0.011149543, 0.026384255, -0.10175635, -0.2548854, -0.1283541, -0.16410558, 0.07503598, -0.02121285, -0.0064750114, -0.09670444, 0.08300398, 0.19831792)); + target2 += mul(d3, float4x4(-0.009554492, -0.095104635, 0.08615534, -0.10154481, 0.11020224, -0.1011952, 0.061394565, 0.050413556, 0.19796023, 0.11560851, 0.033866078, 0.23405328, -0.0060241343, -0.050427623, -0.18293521, -0.031680096)); + target2 += mul(e3, float4x4(0.058735132, 0.026442906, -0.23102848, -0.07569987, -0.26244682, -0.20584835, 0.2259608, 0.06885029, 0.035959512, 0.075910114, -0.17818634, 0.053924832, -0.0046540634, -0.02363428, -0.0501489, 0.07347372)); + target2 += mul(f3, float4x4(-0.0733894, 0.10715639, 0.28019708, 0.100572936, -0.07274408, 0.072782665, -0.056028996, 0.06478587, -0.031222489, 0.043191776, -0.10039772, -0.21392053, -0.04606884, -0.16641788, 0.0065926304, 0.055378567)); + target2 += mul(g3, float4x4(-0.118616246, -0.13528953, -0.19563872, 0.23483656, 0.02614144, 0.19605434, -0.05274385, -0.08863971, 0.16891058, 0.1366527, 0.09084148, 0.100328505, 0.034491546, 0.08647768, 0.21777217, -0.049174547)); + target2 += mul(h3, float4x4(0.1357159, -0.012445991, 0.3096013, 0.181176, -0.010390439, 0.14459321, -0.10700577, -0.011389145, 0.09287424, 0.07787938, -0.096365124, 0.017783955, -0.09306514, 0.15694624, -0.14705794, -0.13922045)); + target2 += mul(i3, float4x4(0.13941582, 0.19728883, -0.151456, 0.10526561, -0.09251345, 0.11684088, 0.1303061, 0.14257613, -0.20296581, 0.00048331724, 0.2851077, -0.20377511, -0.057946853, 0.031233812, -0.15364504, -0.009259494)); + target2 += mul(na1, float4x4(-0.098066, -0.08288004, -0.06673981, -0.06435033, 0.034342356, 0.015804073, 0.023787297, 0.10401755, -0.19141194, -0.16482951, -0.0056575392, 0.0093797995, -0.28313008, 0.0048112553, -0.017099613, 0.02518723)); + target2 += mul(nb1, float4x4(-0.030270405, -0.038700357, -0.013410372, -0.004442315, -0.12467148, 0.08281559, -0.1605282, 0.069578275, 0.10012911, 0.01924674, -0.021857055, 0.07991313, 0.00801384, 0.13677774, 0.013247758, 0.03188123)); + target2 += mul(nc1, float4x4(-0.17157516, -0.08176375, -0.089773096, -0.0405298, -0.085242964, -0.03426719, 0.054874644, 0.066589154, 0.04864499, -0.18212035, -0.11903994, 0.04277644, -0.24286698, 0.14560008, 0.1412366, -0.049351584)); + target2 += mul(nd1, float4x4(-0.0020793858, 0.13244559, 0.022845006, -0.056293562, 0.025595138, 0.12697968, 0.0062493416, 0.10955782, -0.02731004, -0.04970028, 0.0558574, 0.013929665, -0.030912375, -0.07561133, -0.31270868, 0.027562078)); + target2 += mul(ne1, float4x4(0.072941735, 0.021501537, -0.0630067, -0.10351342, 0.0041823885, 0.13891226, -0.070387594, 0.052334826, -0.003547599, 0.19354597, -0.020180183, -0.037713047, 0.06751014, -0.17405544, -0.020440113, 0.25509283)); + target2 += mul(nf1, float4x4(0.005987273, -0.08264425, -0.019549685, -0.06343352, -0.005718748, 0.05226893, 0.07570872, -0.030717341, -0.18217428, -0.0039694863, 0.1455871, -0.0977504, -0.15671553, -0.006649227, -0.1283491, 0.100330345)); + target2 += mul(ng1, float4x4(-0.057930637, -0.114826396, 0.06898038, -0.13852106, 0.024047598, 0.20633829, -0.12503678, 0.022534683, -0.18774416, -0.31502175, -0.10984795, -0.018557208, 0.17580375, 0.25652558, 0.22530238, -0.0028108188)); + target2 += mul(nh1, float4x4(0.023331782, -0.01088776, -0.0052380436, 0.00686383, 0.026780738, 0.03749848, 0.22947483, -0.103271484, 0.012644287, -0.0142970905, 0.098855376, 0.0055474946, 0.032439362, 0.027143423, -0.14876749, -0.06213873)); + target2 += mul(ni1, float4x4(-0.03750828, 0.010431886, 0.17416674, -0.090744555, -0.17330858, 0.013979898, 0.03489776, -0.13337487, 0.00858403, -0.037750907, -0.17109399, 0.08273273, -0.14204618, -0.009869641, -0.013496473, 0.076338045)); + target2 += mul(na2, float4x4(-0.043562744, -0.18440323, 0.011339632, -0.14345059, -0.08992258, -0.10230683, -0.10468143, 0.34146136, 0.15978895, -0.0051261852, 0.061601657, 0.09483878, -0.007760578, -0.018336317, 0.044910427, -0.09316569)); + target2 += mul(nb2, float4x4(0.1253627, -0.12310892, 0.016166732, 0.027448155, 0.13965616, -0.13030767, 0.17542621, 0.061852284, 0.16997853, 0.0056183804, -0.18704928, -0.019231116, -0.08086044, 0.09974395, -0.01429541, 0.03184063)); + target2 += mul(nc2, float4x4(0.04526007, 0.030035531, 0.03181006, 0.22173904, -0.1355034, -0.1948648, 0.06783468, 0.038674995, -0.046629447, -0.03462297, 0.09421528, 0.048745953, 0.16898066, 0.13283801, -0.14163011, -0.23105736)); + target2 += mul(nd2, float4x4(0.07269096, -0.06190773, -0.038986176, 0.102121696, 0.14298806, 0.23800415, 0.1370508, 0.0034182875, 0.009464909, 0.073990576, -0.028228868, 0.047769118, -0.11799714, -0.07566264, -0.025975682, 0.06592005)); + target2 += mul(ne2, float4x4(0.1140849, 0.0011444123, 0.13536933, -0.045905575, 0.050907966, -0.065915674, 0.034910467, -0.2681743, 0.10803704, 0.12069119, -0.12347737, -0.06318596, -0.06862493, 0.014980036, 0.22914106, 0.0003237674)); + target2 += mul(nf2, float4x4(-0.09530222, -0.11337397, 0.014516241, 0.0709293, -0.122670494, -0.17343688, -0.09817145, 0.0427696, -0.0035809735, 0.0970125, -0.35413933, -0.13195236, 0.07348421, 0.11037325, 0.056015544, -0.011848703)); + target2 += mul(ng2, float4x4(-0.05069634, -0.032064505, -0.03238415, 0.1735258, 0.25210074, 0.10959535, -0.2741513, 0.13719772, 0.1066583, 0.20128429, -0.008766815, -0.11834798, 0.057237767, 0.017930366, 0.021861222, -0.025086008)); + target2 += mul(nh2, float4x4(-0.000881232, -0.05960106, -0.08985197, 0.14067702, 0.018204128, 0.09699959, -0.05949243, 0.059911992, 0.027270103, 0.06743677, 0.38237867, -0.058599375, -0.047956746, 0.11374969, -0.14632292, -0.005532837)); + target2 += mul(ni2, float4x4(-0.0312775, 0.0031963694, 0.08149806, 0.13988096, -0.0040519754, 0.035389222, 0.0864673, 0.18592173, 0.03735674, -0.054272953, 0.18598364, -0.13443853, 0.085672796, -0.049046505, 0.0057935636, 0.017542645)); + target2 += mul(na3, float4x4(-0.04916441, 0.015665755, 0.08576695, 0.17165792, -0.13008267, 0.04201376, -0.2670682, 0.119378634, -0.100484766, -0.0887232, 0.049034663, -0.039614394, 0.02695341, -0.04374321, -0.106656834, 0.023938615)); + target2 += mul(nb3, float4x4(0.03373819, 0.004977311, -0.0040103244, 0.13545765, 0.06599036, -0.09659661, 0.22132197, -0.116552144, 0.100918315, -0.022979576, 0.07052367, 0.04172229, 0.17585796, 0.05118707, -0.08703159, 0.055033304)); + target2 += mul(nc3, float4x4(-0.18900026, 0.019988917, 0.07693406, 0.28435934, 0.12686001, -0.14701878, -0.09573673, -0.17312722, 0.15025325, 0.12911554, -0.09475629, 0.016428819, 0.082817025, -0.11946521, -0.0013731157, -0.09071587)); + target2 += mul(nd3, float4x4(0.0797976, 0.11099694, -0.05467964, 0.014629147, -0.09720358, 0.04712591, 0.015981004, -0.05535863, 0.03645818, 0.041274335, 0.10671675, -0.11314873, 0.036964905, 0.17811853, 0.08903187, 0.0095582185)); + target2 += mul(ne3, float4x4(0.11976107, 0.004657432, -0.06258394, -0.022577194, 0.17443101, 0.1387175, 0.059126876, 0.032149844, 0.1430801, 0.002375262, -0.12749809, 0.08837332, 0.06466934, 0.13617098, 0.04582338, 0.068308234)); + target2 += mul(nf3, float4x4(0.022942754, -0.09855706, 0.049297135, 0.096298546, 0.1906194, 0.11273925, -0.22720218, 0.003925555, 0.0028442615, -0.12138431, 0.09074982, -0.030113788, 0.00383381, -0.09112362, -0.27005482, 0.022827866)); + target2 += mul(ng3, float4x4(-0.19426541, 0.009114653, 0.11889596, -0.057239886, -0.03998725, -0.1694043, -0.20197673, 0.041406937, 0.020746358, 0.22414313, -0.1622876, -0.11014813, -0.09325455, -0.08461812, -0.021865716, 0.008194336)); + target2 += mul(nh3, float4x4(0.021359676, -0.022532789, -0.10541426, -0.24901268, 0.030835157, -0.034806997, 0.10264721, -0.006528542, -0.03765987, 0.069545716, 0.25284502, 0.04730265, -0.012214816, -0.053018507, 0.13373806, -0.037745554)); + target2 += mul(ni3, float4x4(-0.09582438, -0.18056035, -0.09869147, 0.11321111, -0.10706152, -0.037460733, 0.121544324, -0.11290087, 0.18490471, -0.06921383, -0.19518846, 0.10960292, -0.06263085, 0.13362981, -0.08682174, -0.053608853)); + target2 += float4(-0.019858388, -0.049763262, 0.034831703, -0.12479427); + + float4 target3 = mul(a1, float4x4(-0.1652761, 0.13780159, 0.09095229, -0.043444302, -0.06450598, 0.04212247, 0.069517806, 0.09327406, -0.033491675, -0.14936084, 0.009638944, 0.11837384, 0.02686685, 0.037584316, -0.09761867, -0.026200296)); + target3 += mul(b1, float4x4(-0.12561406, 0.12076126, 0.028275209, -0.08543192, -0.099475406, -0.0822321, 0.0920009, 0.06756713, -0.10781483, -0.12923865, 0.032576296, 0.3534597, 0.03224445, -0.015600879, -0.025559058, -0.027278373)); + target3 += mul(c1, float4x4(0.07211016, 0.054111533, 0.13363571, -0.010288602, -0.20603329, 0.0047039236, -0.04776343, 0.25487995, -0.10845931, 0.0972547, -0.10519721, -0.0073581343, -0.10403583, -0.06662798, 0.041069936, -0.11237198)); + target3 += mul(d1, float4x4(-0.011475162, 0.062792905, 0.091312, 0.30339372, -0.11382581, 0.06737181, 0.07341503, 0.16007973, 0.001011511, -0.11274179, -0.006656744, -0.034754373, 0.08876155, 0.014858809, 0.08583179, 0.010586847)); + target3 += mul(e1, float4x4(0.095108636, 0.0049300413, -0.15713759, -0.049208567, 0.14641964, -0.1558201, 0.115891516, -0.06733412, -0.07573838, 0.29731378, 0.108890355, 0.043476757, 0.06507369, 0.035861496, -0.03979463, 0.0009747037)); + target3 += mul(f1, float4x4(0.04926235, -0.037529353, 0.079898834, -0.14147292, -0.08446753, -0.06169593, 0.047313344, 0.26457137, -0.035472378, -0.073560245, 0.14341679, -0.022741733, -0.1525431, -0.01243139, -0.011166588, -0.20521918)); + target3 += mul(g1, float4x4(-0.016135108, 0.011612018, 0.14412925, -0.02519369, 0.09124221, 0.05163101, -0.13721077, 0.028859738, -0.10101291, -0.14688651, 0.15746878, -0.124548726, -0.04213581, -0.01224665, 0.17707069, 0.012810498)); + target3 += mul(h1, float4x4(-0.17663126, -0.07370428, 0.043691028, -0.006832302, -0.050157465, -0.030904332, 0.061489057, -0.009296911, 0.03220379, -0.047700413, -0.029812776, 0.16822562, 0.041632306, 0.11511152, 0.09653043, -0.055198412)); + target3 += mul(i1, float4x4(0.13367188, 0.03333002, 0.008851994, -0.012191224, -0.045508027, 0.08612423, 0.06786381, 0.15179649, -0.031041663, -0.059014346, 0.15675054, -0.08772905, 0.09033015, -0.08435604, 0.07550108, -0.14843665)); + target3 += mul(a2, float4x4(0.14639384, 0.16561817, -0.03261034, -0.03337392, 0.14970617, -0.11748068, -0.12750028, -0.10566866, 0.16191705, -0.08984127, 0.06803522, 0.008120483, 0.10923837, 0.0364358, -0.13485567, 0.14291629)); + target3 += mul(b2, float4x4(-0.02444568, 0.21520157, 0.05191823, 0.17272551, -0.047668163, -0.09192939, -0.020734387, -0.016689759, -0.21506861, -0.038079426, 0.099174924, 0.010456613, -0.20138906, -0.0112631135, 0.08758567, -0.045137912)); + target3 += mul(c2, float4x4(0.060797717, 0.03514636, -0.05460338, -0.095668696, -0.08528851, -0.07811166, 0.12541622, -0.036730994, -0.14369172, -0.010652937, 0.0060692867, -0.1785254, 0.14972189, -0.13451393, -0.04655055, 0.16085984)); + target3 += mul(d2, float4x4(0.05367569, 0.20912962, 0.018910028, -0.10154244, 0.03168856, 0.06779478, -0.088652916, 0.016729023, 0.10557536, -0.099209085, 0.14797546, -0.18952388, 0.07048445, 0.102708265, -0.14564602, 0.12568687)); + target3 += mul(e2, float4x4(-0.049337912, -0.12502758, -0.09065302, 0.19880529, 0.26680514, -0.003136209, -0.11733151, -0.11684242, -0.04335924, 0.30764192, 0.2855104, 0.04156867, -0.08121212, 0.23999381, -0.019614706, 0.027516816)); + target3 += mul(f2, float4x4(-0.04837136, -0.0049304874, 0.006328469, 0.013705871, 0.067017764, -0.03406703, 0.053161882, 0.24689339, -0.02929922, 0.06797918, 0.015713276, -0.17147881, 0.04482974, 0.07526465, 0.019844312, -0.18729854)); + target3 += mul(g2, float4x4(0.030257802, 0.010643463, -0.11703066, -0.015162744, -0.074236035, 0.01591241, 0.061938114, -0.08404092, 0.111995466, -0.13485448, 0.21688463, -0.110088274, 0.079335205, -0.2474801, -0.03824567, -0.018190503)); + target3 += mul(h2, float4x4(-0.11581714, -0.004117979, 0.033883266, -0.13720983, 0.029020213, -0.08154189, -0.0020539986, 0.11715364, 0.17582226, 0.0916048, 0.0750543, 0.06601126, 0.038681798, -0.03606899, 0.08065586, 0.0019443193)); + target3 += mul(i2, float4x4(-0.037615683, 0.12732984, 0.042441927, -0.008004603, 0.11336218, -0.042417236, 0.044717386, -0.13728632, 0.038264424, 0.17234874, -0.02492702, 0.120399185, 0.024329247, 0.024983741, -0.1845697, -0.07284304)); + target3 += mul(a3, float4x4(0.2704137, 0.15812507, 0.060361683, -0.07266647, -0.15354276, -0.04938148, 0.11895455, -0.12520859, -0.07866695, 0.06199223, 0.02046756, 0.16162948, 0.037545823, -0.08195345, -0.02782581, -0.1247714)); + target3 += mul(b3, float4x4(0.058098216, 0.1090351, 0.036994565, -0.14390574, -0.02314059, -0.067219526, -0.08998296, 0.12025692, -0.1035221, 0.05190676, -0.0240437, 0.06639121, -0.039624542, 0.002958745, 0.019561864, 0.12834862)); + target3 += mul(c3, float4x4(0.2211613, -0.1103558, -0.0464588, 0.06874506, -0.32631674, 0.11210603, 0.051548798, -0.34436032, -0.11639206, 0.12327613, 0.051884107, -0.03575669, 0.035892785, -0.06696002, -0.15486757, 0.11983755)); + target3 += mul(d3, float4x4(0.021447798, 0.010329525, 0.013789607, 0.119596116, -0.05871373, 0.055229582, 0.20033267, 0.03858596, -0.10166856, 0.0006909935, 0.0964782, 0.095391914, 0.013319357, -0.13142642, 0.1100771, 0.050889898)); + target3 += mul(e3, float4x4(-0.16984001, -0.16002657, -0.060783282, -0.17456883, 0.2011064, -0.14940733, -0.15602681, 0.14061591, 0.18068549, -0.00217099, -0.024712907, 0.037761874, -0.07138531, -0.0016056405, 0.11756802, 0.18380354)); + target3 += mul(f3, float4x4(0.07733175, -0.17642827, 0.07976922, -0.051280692, 0.16156857, 0.032522928, -0.095040165, -0.0583928, 0.038923588, -0.043146443, -0.10355574, 0.1974055, 0.04354748, 0.09425934, 0.026754672, 0.23734866)); + target3 += mul(g3, float4x4(-0.13585593, 0.14902504, -0.27107853, 0.13296895, -0.2865579, -0.074112825, 0.1409574, -0.0003253808, 0.1733374, -0.16919981, 0.03372848, 0.21644552, -0.00050592434, -0.037268158, 0.1148079, -0.13287376)); + target3 += mul(h3, float4x4(0.005142486, 0.0867682, -0.09227092, -0.10524167, 0.07520852, 0.015542765, 0.016817883, -0.0733789, 0.20560083, -0.1119311, 0.17374502, -0.107678846, -0.09381425, 0.14690572, 0.022286026, -0.19862098)); + target3 += mul(i3, float4x4(-0.20393431, -0.045187343, 0.0095105795, 0.052588273, -0.14538154, 0.18569797, -0.031874318, -0.15881945, -0.08170196, 0.052769475, -0.15122755, 0.090783544, 0.21360469, 0.04577172, 0.05163147, 0.07916663)); + target3 += mul(na1, float4x4(0.14100257, -0.03398819, -0.052019518, -0.08121586, 0.008056087, -0.0931302, -0.19780545, 0.16904305, -0.13034676, 0.08930879, -0.0112331435, 0.029833045, 0.03981243, 0.12613662, -0.2159093, 0.035136405)); + target3 += mul(nb1, float4x4(0.09830958, 0.10535925, -0.08584078, -0.04632737, 0.0022527708, -0.031659063, -0.101096116, 0.063173816, -0.06613251, 0.118981436, -0.003423647, -0.105914734, -0.07703021, -0.07204621, -0.0748016, -0.11777416)); + target3 += mul(nc1, float4x4(0.053663094, 0.07884249, -0.17141959, -0.012647486, 0.08073693, -0.076323204, -0.17775054, 0.10244291, 0.14563464, 0.14345805, -0.18157926, 0.18835878, -0.026068632, 0.023138894, -0.0019046182, -0.00012485609)); + target3 += mul(nd1, float4x4(0.1348711, -0.04699952, 0.15993118, -0.23344111, 0.026501887, -0.14297141, -0.113242336, 0.080124736, -0.03513346, 0.10361922, -0.0922229, 0.07750678, 0.12542203, 0.12729637, -0.092106655, 0.055520497)); + target3 += mul(ne1, float4x4(0.083170444, -0.06302187, 0.0084091, -0.04599831, -0.035450544, -0.19657601, -0.07282212, 0.1447326, 0.11383889, -0.21189907, -0.045117173, -0.07391879, -0.11269967, -0.08903234, -0.032466423, 0.22887331)); + target3 += mul(nf1, float4x4(0.067729145, 0.06700018, -0.18447827, 0.03988203, 0.05277088, 0.033052627, -0.11088279, -0.02169712, 0.019287307, 0.06812, 0.04875055, 0.111010365, -0.14138764, 0.027063884, -0.05214136, 0.16399074)); + target3 += mul(ng1, float4x4(0.004932597, 0.1045028, -0.16486417, 0.010725656, 0.06950409, -0.121699296, 0.010512686, 0.14147647, 0.019202268, 0.17767008, 0.011134318, 0.063502066, -0.13067701, 0.108099535, -0.114125356, -0.046774942)); + target3 += mul(nh1, float4x4(0.15779556, 0.07332346, 0.063827224, 0.008358174, 0.0496721, -0.030757044, -0.050408855, 0.12898293, 0.023491597, 0.045543656, -0.07800668, 0.037886333, 0.17256846, 0.07125766, 0.029893918, -0.02450649)); + target3 += mul(ni1, float4x4(-0.18544081, -0.033090588, -0.05919492, -0.0003458201, 0.14915435, -0.037259944, 0.011946766, -0.16243212, 0.0882922, 0.093222775, -0.11737426, -0.003943405, 0.019537527, 0.0077801496, 0.1317979, -0.09169945)); + target3 += mul(na2, float4x4(-0.091774754, 0.012059926, 0.03165443, 0.14858909, 0.3944464, -0.014972357, -0.12189733, 0.26198938, -0.27252647, -0.026880303, -0.06978548, -0.013632001, -0.0032966428, -0.18498091, -0.0004948639, -0.12478541)); + target3 += mul(nb2, float4x4(-0.02833149, -0.050442036, -0.041132275, -0.07840716, 0.04005613, 0.17621154, -0.13607822, 0.1762098, 0.05282825, 0.0016353457, 0.006173704, -0.067321114, 0.13982886, -0.03623519, -0.087992206, -0.047710747)); + target3 += mul(nc2, float4x4(0.03881576, -0.08746933, -0.011487434, 0.12498892, -0.0017975342, 0.018888952, -0.18913451, 0.08337154, -0.090970725, 0.117090665, 0.1504768, -0.070024244, -0.019629575, -0.091753945, -0.0092930645, -0.15750532)); + target3 += mul(nd2, float4x4(0.017022166, -0.12516023, -0.12154394, 0.11974826, -0.09612418, -0.115943454, 0.24888757, 0.06153447, 0.056513205, -0.11116729, 0.029329464, 0.08975961, 0.10630068, -0.1328722, -0.06946471, -0.13333926)); + target3 += mul(ne2, float4x4(-0.034902636, 0.2483038, 0.14978237, -0.07164234, -0.012161076, 0.023050508, 0.06598259, -0.043513447, 0.10375706, -0.20177342, -0.123048, -0.035172284, -0.07363312, 0.18172532, 0.09612206, 0.19234397)); + target3 += mul(nf2, float4x4(0.029563665, -0.029694784, -0.101416424, -0.030606827, -0.070010245, 0.045257732, 0.05966623, 0.09107148, 0.03758803, 0.026623867, -0.071266346, 0.094123766, -0.059981044, 0.09513772, -0.08400028, 0.02511076)); + target3 += mul(ng2, float4x4(-0.037089724, -0.06322222, 0.1061242, 0.008586227, 0.13214453, 0.035300348, -0.15787113, 0.07151468, -0.12539263, -0.09025181, 0.18832791, -0.033440433, -0.06625288, -0.1530654, -0.005935112, -0.18216603)); + target3 += mul(nh2, float4x4(0.027623197, -0.04890818, -0.061262466, 0.015195151, 0.32218042, 0.19153431, -0.08007639, -0.11445247, 0.00393679, -0.06705804, -0.12879996, -0.1423812, -0.06090306, 0.0036856222, 0.0069346135, 0.043838803)); + target3 += mul(ni2, float4x4(-0.016647626, -0.08680245, -0.060714565, -0.06387184, 0.18913822, 0.10105815, -0.026422933, -0.039242256, -0.06503463, -0.03521194, 0.049169898, -0.06533137, -0.03167689, 0.015587601, -0.08370448, -0.021492135)); + target3 += mul(na3, float4x4(-0.12721944, 0.028729077, 0.10713755, -0.09260985, -0.047840588, 0.022301238, 0.11309327, -0.06745379, -0.004154309, 0.10523564, -0.04239449, -0.017029425, 0.10899646, 0.1546228, -0.07669311, 0.2672058)); + target3 += mul(nb3, float4x4(-0.056850802, -0.05440277, 0.0018135635, 0.09396988, 0.14010292, 0.08741186, -0.12758048, -0.08599669, -0.018672993, 0.05172455, 0.008185248, 0.111759275, -0.06955318, 0.14772479, 0.008665618, 0.0352044)); + target3 += mul(nc3, float4x4(-0.059702516, 0.058782764, -0.12532151, -0.096861176, 0.35831934, 0.0013884759, 0.30706376, -0.101967454, 0.095553055, 0.05883552, 0.06424327, 0.054175656, -0.1484007, 0.13297899, -0.01961164, 0.15321216)); + target3 += mul(nd3, float4x4(0.09578697, -0.20968121, 0.04902802, -0.030943176, -0.009951699, -0.05341875, -0.063387014, -0.0825744, -0.09769999, -0.075733155, 0.14749058, 0.12551898, 0.24074706, 0.16208081, -0.21561289, -0.062474046)); + target3 += mul(ne3, float4x4(0.0017662761, -0.088773146, 0.0043133483, 0.32119426, -0.13667256, 0.043542203, -0.045929775, -0.09663573, -0.136664, -0.19760157, -0.07579348, -0.04397654, 0.15027492, 0.08591492, -0.03781643, -0.1743205)); + target3 += mul(nf3, float4x4(-0.12654322, 0.028860493, 0.12822515, 0.049503203, 0.30117163, -0.03055389, -0.0582901, 0.0019550966, -0.0038878717, 0.0043905065, -0.12589069, -0.22796634, -0.10635117, 0.16903181, 0.16951965, 0.027410017)); + target3 += mul(ng3, float4x4(-0.059951358, -0.20652413, 0.056598257, -0.1811566, 0.2165428, -0.14381465, 0.20429386, 0.025329571, -0.19378977, -0.055971343, -0.0010970832, 0.08035063, 0.077368416, 0.078627735, 0.07322149, -0.14884202)); + target3 += mul(nh3, float4x4(0.041847393, -0.12735637, 0.014505967, 0.10192219, -0.13889207, -0.015992412, -0.17310154, 0.12131598, -0.13452062, -0.00036142246, -0.14270298, 0.14636193, 0.059705302, 0.051249746, 0.015804589, -0.11418885)); + target3 += mul(ni3, float4x4(-0.043562837, -8.029936e-05, -0.007859607, -0.08610097, -0.021267893, -0.011354754, -0.17890069, -0.0485164, -0.1679154, 0.11548207, -0.060171373, -0.24584498, 0.008396757, 0.1078782, 0.12012115, 0.07315681)); + target3 += float4(-0.067701444, -0.05630008, 0.022760866, -0.034229018); + + float3 target4 = mul(e1, float4x3(0.068483055, 0.036389243, 0.04961808, 0.05059915, 0.033048775, 0.029426659, 0.07465462, -0.012659731, -0.024048671, 0.02224484, 0.012289658, 0.008910066)); + target4 += mul(e2, float4x3(-0.10449372, 0.019832065, 0.035194747, 0.039656557, -0.028246421, -0.032626413, 0.10093569, 0.021039873, -0.0120673925, -0.047074273, -0.041248, -0.019464392)); + target4 += mul(e3, float4x3(-0.05256942, 0.0127243735, 0.012813261, -0.03551604, 0.040801138, 0.04893271, -0.0016839011, -0.018044796, -0.027161835, -0.060873054, 0.012360936, 0.020700796)); + target4 += mul(ne1, float4x3(-0.116182, -0.04271438, -0.046686683, -0.09575506, -0.030078743, -0.024359861, -0.04794246, 0.0044337297, 0.013972317, -0.023228236, 0.015726948, 0.0070847897)); + target4 += mul(ne2, float4x3(0.13986528, -0.016787121, -0.015848925, -0.04900687, -0.027417973, -0.027077334, -0.047319725, -0.021533312, -0.018427303, -0.06136185, -0.0051562944, -0.032072)); + target4 += mul(ne3, float4x3(0.070715815, 0.012814227, -0.0003389576, 0.012182037, -0.014952754, -0.019349998, -0.03254603, 0.012881403, 0.016392775, 0.059158217, 0.0055793705, -0.003696545)); + + tex1[gxy] = target1; + tex2[gxy] = target2; + tex3[gxy] = target3; + tex7[gxy] = float4(target4, 1); +} + +//!PASS 6 +//!DESC Conv-4x3x3x24 +//!IN tex1, tex2, tex3, tex7 +//!OUT tex4, tex5, tex6, tex8 +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 + +void Pass6(uint2 blockStart, uint3 threadId) { + uint2 gxy = Rmp8x8(threadId.x) + blockStart; + uint2 inputSize = GetInputSize(); + if (gxy.x >= inputSize.x || gxy.y >= inputSize.y) { + return; + } + + float2 inputPt = GetInputPt(); + float2 pos = (gxy + 0.5f) * inputPt; + + // [ a, d, g ] + // [ b, e, h ] + // [ c, f, i ] + float4 a1 = tex1.SampleLevel(sam, pos + float2(-inputPt.x, -inputPt.y), 0); + float4 b1 = tex1.SampleLevel(sam, pos + float2(-inputPt.x, 0), 0); + float4 c1 = tex1.SampleLevel(sam, pos + float2(-inputPt.x, inputPt.y), 0); + float4 d1 = tex1.SampleLevel(sam, pos + float2(0, -inputPt.y), 0); + float4 e1 = tex1.SampleLevel(sam, pos, 0); + float4 f1 = tex1.SampleLevel(sam, pos + float2(0, inputPt.y), 0); + float4 g1 = tex1.SampleLevel(sam, pos + float2(inputPt.x, -inputPt.y), 0); + float4 h1 = tex1.SampleLevel(sam, pos + float2(inputPt.x, 0), 0); + float4 i1 = tex1.SampleLevel(sam, pos + float2(inputPt.x, inputPt.y), 0); + + float4 na1 = max(-a1, 0); + float4 nb1 = max(-b1, 0); + float4 nc1 = max(-c1, 0); + float4 nd1 = max(-d1, 0); + float4 ne1 = max(-e1, 0); + float4 nf1 = max(-f1, 0); + float4 ng1 = max(-g1, 0); + float4 nh1 = max(-h1, 0); + float4 ni1 = max(-i1, 0); + + a1 = max(a1, 0); + b1 = max(b1, 0); + c1 = max(c1, 0); + d1 = max(d1, 0); + e1 = max(e1, 0); + f1 = max(f1, 0); + g1 = max(g1, 0); + h1 = max(h1, 0); + i1 = max(i1, 0); + + float4 a2 = tex2.SampleLevel(sam, pos + float2(-inputPt.x, -inputPt.y), 0); + float4 b2 = tex2.SampleLevel(sam, pos + float2(-inputPt.x, 0), 0); + float4 c2 = tex2.SampleLevel(sam, pos + float2(-inputPt.x, inputPt.y), 0); + float4 d2 = tex2.SampleLevel(sam, pos + float2(0, -inputPt.y), 0); + float4 e2 = tex2.SampleLevel(sam, pos, 0); + float4 f2 = tex2.SampleLevel(sam, pos + float2(0, inputPt.y), 0); + float4 g2 = tex2.SampleLevel(sam, pos + float2(inputPt.x, -inputPt.y), 0); + float4 h2 = tex2.SampleLevel(sam, pos + float2(inputPt.x, 0), 0); + float4 i2 = tex2.SampleLevel(sam, pos + float2(inputPt.x, inputPt.y), 0); + + float4 na2 = max(-a2, 0); + float4 nb2 = max(-b2, 0); + float4 nc2 = max(-c2, 0); + float4 nd2 = max(-d2, 0); + float4 ne2 = max(-e2, 0); + float4 nf2 = max(-f2, 0); + float4 ng2 = max(-g2, 0); + float4 nh2 = max(-h2, 0); + float4 ni2 = max(-i2, 0); + + a2 = max(a2, 0); + b2 = max(b2, 0); + c2 = max(c2, 0); + d2 = max(d2, 0); + e2 = max(e2, 0); + f2 = max(f2, 0); + g2 = max(g2, 0); + h2 = max(h2, 0); + i2 = max(i2, 0); + + float4 a3 = tex3.SampleLevel(sam, pos + float2(-inputPt.x, -inputPt.y), 0); + float4 b3 = tex3.SampleLevel(sam, pos + float2(-inputPt.x, 0), 0); + float4 c3 = tex3.SampleLevel(sam, pos + float2(-inputPt.x, inputPt.y), 0); + float4 d3 = tex3.SampleLevel(sam, pos + float2(0, -inputPt.y), 0); + float4 e3 = tex3.SampleLevel(sam, pos, 0); + float4 f3 = tex3.SampleLevel(sam, pos + float2(0, inputPt.y), 0); + float4 g3 = tex3.SampleLevel(sam, pos + float2(inputPt.x, -inputPt.y), 0); + float4 h3 = tex3.SampleLevel(sam, pos + float2(inputPt.x, 0), 0); + float4 i3 = tex3.SampleLevel(sam, pos + float2(inputPt.x, inputPt.y), 0); + + float4 na3 = max(-a3, 0); + float4 nb3 = max(-b3, 0); + float4 nc3 = max(-c3, 0); + float4 nd3 = max(-d3, 0); + float4 ne3 = max(-e3, 0); + float4 nf3 = max(-f3, 0); + float4 ng3 = max(-g3, 0); + float4 nh3 = max(-h3, 0); + float4 ni3 = max(-i3, 0); + + a3 = max(a3, 0); + b3 = max(b3, 0); + c3 = max(c3, 0); + d3 = max(d3, 0); + e3 = max(e3, 0); + f3 = max(f3, 0); + g3 = max(g3, 0); + h3 = max(h3, 0); + i3 = max(i3, 0); + + float4 target1 = mul(a1, float4x4(0.092447594, -0.10328636, -0.12202365, 0.27040935, 0.052717082, 0.018614411, -0.08485268, -0.07617377, -0.008931799, 0.051284462, 0.051496644, 0.026522819, 0.09565774, 0.18421015, 0.26325333, -0.12989432)); + target1 += mul(b1, float4x4(0.03988519, 0.042028125, -0.07100362, 0.03045228, 0.068984345, 0.03516445, 0.05874817, -0.028063854, 0.5054902, -0.16185366, 0.12543231, 0.07206758, 0.31235528, 0.03843813, 0.1501265, -0.08274924)); + target1 += mul(c1, float4x4(-0.11169874, -0.06681513, -0.00651678, 0.0010351768, 0.051753096, 0.053674143, 0.11657592, 0.12309117, -0.040198836, -0.007768111, 0.10881242, -0.14587292, 0.17091802, -0.087406136, -0.057882708, 0.0078790905)); + target1 += mul(d1, float4x4(0.26830226, -0.01915989, -0.18262567, 0.2194732, 0.13879527, -0.031352315, 0.15241407, 0.0994905, -0.057112038, 0.17008875, 0.037308767, 0.09374541, -0.3188967, 0.01450157, -0.18610804, -0.0793318)); + target1 += mul(e1, float4x4(0.0060915435, 0.06979378, -0.046237, -0.27248916, 0.09547359, -0.07666023, 0.09364251, 0.026975514, 0.16541278, 0.042641494, -0.02498914, 0.15121445, -0.0013431904, -0.06427887, 0.18217684, 0.26087397)); + target1 += mul(f1, float4x4(-0.20825194, -0.11043138, 0.02976852, -0.105722494, 0.0008496603, -0.065933526, 0.06687892, 0.025230588, 0.18294227, -0.03581215, 0.14366323, 0.101520695, 0.25154486, 0.055622917, -0.012970234, 0.054395743)); + target1 += mul(g1, float4x4(0.21373472, -0.030288193, 0.06773853, 0.07427125, -0.0103815105, 0.016129585, 0.038576525, 0.037529152, -0.20739938, -0.05778662, -0.05940614, 0.02449663, 0.23593283, -0.05812938, -0.039888572, -0.057957932)); + target1 += mul(h1, float4x4(0.387659, 0.1274861, 0.28752464, -0.05272344, -0.014581121, 0.0040657013, -0.06632645, -0.107276425, 0.03762339, 0.2742528, 0.028725976, -0.054044764, -0.04273324, -0.06317463, 0.0060703703, 0.053600952)); + target1 += mul(i1, float4x4(-0.1596047, -0.1561146, 0.109226674, -0.0052362215, 0.16038993, 0.10755746, -0.030864978, -0.36270598, 0.17078364, 0.09184639, 0.23489448, 0.026559642, 0.04388386, -0.061411064, 0.028113337, -0.045337155)); + target1 += mul(a2, float4x4(-0.111932576, 0.0021055648, -0.12106931, 0.019196665, 0.033925258, -0.13593148, -0.068236336, 0.107576296, 0.0415075, -0.2336552, -0.052428674, 0.07777366, 0.00816918, 0.2065682, -0.08628869, 0.15342048)); + target1 += mul(b2, float4x4(-0.021824878, -0.04840494, -0.116642684, 0.045604706, 0.008168658, -0.04534853, 0.11214711, -0.10829524, -0.043486122, -0.24905528, -0.07315474, 0.14727196, -0.07264179, 0.065202385, -0.0019039236, -0.08028288)); + target1 += mul(c2, float4x4(0.08439612, 0.008386524, -0.030988367, 0.09697018, -0.049302116, 0.20326442, -0.018234255, -0.20189443, 0.042629667, -0.1409463, -0.050773926, -0.29503027, -0.07123911, -0.046633366, 0.07981456, 0.10374346)); + target1 += mul(d2, float4x4(0.03868367, -0.05526043, -0.106714435, -0.14639367, 0.038107764, 0.069904044, 0.0744559, 0.13862458, 0.09222159, -0.14277418, -0.19073294, -0.03296828, -0.10584655, 0.13311721, -0.24290293, -0.008493607)); + target1 += mul(e2, float4x4(-0.15074006, 0.094411716, -0.058070287, -0.10475867, 0.127535, 0.047796316, 0.033599593, 0.055493813, 0.17686792, -0.23935609, -0.27880296, -0.12433512, 0.049884334, 0.0651521, 0.009873332, -0.039633323)); + target1 += mul(f2, float4x4(0.025122408, 0.16321969, -0.06588295, 0.09563756, -0.115063086, -0.061710395, 0.073383145, 0.09976373, 0.09290709, -0.042226892, -0.22798967, -0.14234817, -0.089538574, 0.022935519, 0.09885692, -0.050982323)); + target1 += mul(g2, float4x4(0.09486296, 0.04397677, 0.04075486, 0.056717344, -0.04711896, 0.04990853, -0.16473778, 0.13175704, 0.12485286, -0.18850122, -0.13122937, -0.102840684, -0.16874318, 0.05348968, -0.017259317, 0.07717163)); + target1 += mul(h2, float4x4(-0.059502125, -0.13897286, -0.03801125, 0.17431264, 0.11680923, -0.12560965, -0.0911302, -0.19165933, -0.121053115, 0.06541917, -0.06419728, -0.19364956, -0.13833821, 0.03234477, -0.09979964, 0.17789067)); + target1 += mul(i2, float4x4(0.067596145, 0.25704458, 0.19766523, 0.108859204, 0.09887382, 0.052284334, -0.07278858, 0.122003525, -0.030752266, -0.04871386, -0.05135825, -0.3072661, -0.033045944, -0.098459914, 0.10718348, -0.13164413)); + target1 += mul(a3, float4x4(0.020737967, 0.24545951, -0.044812705, 0.03566297, 0.095929176, -0.07487561, 0.20496303, 0.037086472, 0.038242895, 0.088189796, 0.021153267, -0.09462902, 0.026548525, -0.21922965, 0.050257247, -0.048741706)); + target1 += mul(b3, float4x4(0.040332116, 0.043284092, 0.24138524, -0.02451653, -0.13059705, 0.0343388, -0.07902276, -0.009631078, -0.0848101, 0.010842163, 0.086510465, -0.012446626, 0.005316944, -0.22108673, 0.14004333, 0.15579557)); + target1 += mul(c3, float4x4(0.022010755, 0.004139463, -0.017926715, 0.04037725, 0.016520657, 0.009780203, -0.14736284, -0.014491211, 0.057596914, -0.23008622, 0.21133287, -0.053522564, -0.18740861, -0.106346205, 0.10276541, 0.043288257)); + target1 += mul(d3, float4x4(0.10575789, 0.019061945, -0.026198203, 0.20347466, 0.07900247, 0.102640145, 0.08666188, -0.05840282, 0.058876745, 0.14216799, -0.11816214, 0.14975895, 0.09833406, -0.1061385, 0.08465644, 0.09426659)); + target1 += mul(e3, float4x4(-0.13777718, -0.28986838, 0.07906812, 0.059411187, 0.09088133, 0.23517007, -0.20900714, 0.011920497, 0.14009877, 0.19299953, -0.028272772, 0.06418091, 0.118590616, -0.111001015, -0.055573206, 0.085596696)); + target1 += mul(f3, float4x4(-0.124967046, -0.23403575, -0.085109934, 0.094934925, 0.15895598, 0.08125505, -0.2215677, 0.10778676, -0.12129276, -0.0019275933, 0.14121452, -0.07975474, -0.057002395, -0.052832086, -0.1850646, -0.100982465)); + target1 += mul(g3, float4x4(0.0710814, 0.20992099, 0.07493418, -0.109678715, -0.18531376, -0.039698873, -0.110102035, 0.16468482, 0.08024999, -0.09387882, -0.13551506, 0.11087316, -0.10608426, -0.13655968, 0.01102362, -0.060193118)); + target1 += mul(h3, float4x4(-0.015583674, -0.06961451, 0.14489253, -0.27566335, -0.17987481, -0.027696218, -0.23948374, 0.028104413, 0.27821308, 0.08043316, -0.05241405, -0.0027138551, -0.13761862, 0.0038414828, 0.010716796, -0.21286957)); + target1 += mul(i3, float4x4(-0.22588563, 0.040290482, -0.13179918, -0.15576197, 0.058554877, 0.10720413, 0.11312613, -0.004625868, 0.03558514, -0.023398632, -0.2564193, -0.045098998, -0.0012908503, 0.01255389, -0.018089779, -0.1334803)); + target1 += mul(na1, float4x4(-0.040578995, 0.14333616, 0.023703935, -0.24532415, -0.017356034, 0.05467018, -0.13556047, -0.051645495, 0.08613384, -0.18583167, 0.023360416, -0.12590869, -0.06778763, -0.06438733, 0.025624113, 0.07671888)); + target1 += mul(nb1, float4x4(0.042797543, 0.076091446, 0.082091615, 0.014681128, -0.09378036, 0.062476482, 0.026251588, 0.16627216, -0.15255791, 0.17601879, 0.042653207, 0.039376315, 0.029179158, -0.0095602125, 0.0705857, 0.011434591)); + target1 += mul(nc1, float4x4(0.012922825, 0.13863216, -0.09220861, -0.005267679, 0.12863027, 0.08068719, -0.07179554, -0.13297969, 0.04991335, -0.01473723, -0.028486373, 0.26253343, -0.052293234, -0.16709994, 0.013800583, 0.060783714)); + target1 += mul(nd1, float4x4(-0.17575453, -0.036046885, 0.17919157, -0.18988807, -0.18178074, -0.058441214, -0.07271548, -0.008791415, 0.18230358, 0.07766667, -0.066274896, -0.15386371, 0.06161233, 0.003612807, 0.20308098, -0.020216005)); + target1 += mul(ne1, float4x4(-0.05010378, 0.018410517, -0.050254025, 0.012066753, -0.12485184, -0.1916662, -0.1278125, 0.06593962, 0.11824467, 0.07994578, 0.05962518, -0.20991555, -0.114382625, 0.07509197, -0.19671203, -0.4580128)); + target1 += mul(nf1, float4x4(0.17728399, -0.15649322, -0.15205286, 0.22968316, 0.037434835, 0.021075314, -0.090972036, -0.17058647, 0.19727467, -0.013115808, -0.08461909, 0.010409278, 0.04355671, 0.08082593, 0.013779581, -0.08425518)); + target1 += mul(ng1, float4x4(-0.31590196, 0.107831545, -0.12198127, 0.00977694, -0.16240558, -0.038805872, 0.037051022, 0.10276969, 0.26788524, -0.072160736, 0.03843579, -0.08990598, -0.04897058, -0.019324914, 0.06016647, -0.015361721)); + target1 += mul(nh1, float4x4(-0.16626236, -0.07336449, -0.11358449, 0.08885961, -0.044137727, 0.057762783, 0.08864482, 0.029383648, -0.08608859, -0.17586444, 0.094455965, -0.054391533, -0.18796252, 0.009314891, -0.014734876, -0.02058656)); + target1 += mul(ni1, float4x4(0.12067889, 0.3618014, -0.17719771, 0.2175122, 0.12890387, 0.20503749, 0.19662304, 0.17338246, 0.1733569, -0.057952117, -0.016951751, -0.057121612, -0.014850513, -0.05018768, 0.20244005, 0.016323887)); + target1 += mul(na2, float4x4(-0.13357711, 0.12105561, -0.030620668, 0.005170665, 0.044319738, 0.12768681, 0.15325043, 0.027631996, -0.080610365, 0.03741198, -0.017102083, -0.0035679936, -0.2243731, 0.16709204, 0.023224674, 0.11311707)); + target1 += mul(nb2, float4x4(0.02376095, 0.027235378, -0.009955967, -0.049886744, -0.08411108, 0.10339928, -0.02877354, 0.12704167, -0.13884954, 0.089170545, -0.0039057198, -0.16050623, -0.05318099, -0.10950255, -0.11412448, 0.042694647)); + target1 += mul(nc2, float4x4(-0.20557326, -0.16362014, -0.090093814, 0.10406815, 0.08791842, 0.013667629, 0.099605836, -0.1062854, -0.07108554, -0.10362472, -0.0647173, 0.12420133, -0.082551, 0.07107792, -0.17423603, -0.048405636)); + target1 += mul(nd2, float4x4(-0.1954154, -0.027208658, -0.03684051, 0.1338225, -0.084645554, 0.06871324, -0.0778811, 0.025083596, -0.19436808, -0.097009145, -0.036444522, -0.17200048, 0.013402397, -0.23984545, -0.018724974, -0.005078688)); + target1 += mul(ne2, float4x4(0.21297796, 0.023222866, -0.069507584, -0.07308915, -0.18444547, 0.016984317, -0.016325353, 0.11981142, -0.12647548, -0.074321784, 0.27461126, -0.111357704, 0.13917843, -0.035653792, 0.052209657, 0.2077564)); + target1 += mul(nf2, float4x4(-0.13399822, 0.013458072, 0.031183472, 0.24100806, 0.025842719, -0.1878651, 0.14646488, -0.12074156, -0.15135823, -0.18367149, 0.14775206, 0.06404863, 0.06884799, 0.19008774, -0.094522566, 0.087253615)); + target1 += mul(ng2, float4x4(-0.2991564, 0.15301964, -0.028454246, 0.10222737, -0.14888696, -0.021354329, -0.26517984, 0.17276473, 0.021648446, -0.17384106, 0.071495906, -0.16509262, -0.029774027, 0.17916657, -0.036435083, 0.1344122)); + target1 += mul(nh2, float4x4(0.043782394, -0.111460604, -0.094103605, -0.024549566, -0.09227317, 0.009563868, -0.11380084, 0.14710943, 0.1623694, -0.2684087, 0.08932176, -0.025791056, 0.10586864, -0.2849578, -0.049896624, -0.07046415)); + target1 += mul(ni2, float4x4(0.06390326, -0.16954753, -0.24643445, -0.06667138, 0.0153694395, 0.1391578, 0.033687413, -0.18783121, -0.061314933, -0.19441758, -0.033504955, 0.1402065, -0.082206115, 0.16466151, -0.07656087, 0.14898944)); + target1 += mul(na3, float4x4(-0.1266701, 0.036555164, -0.4070397, -0.085509166, 0.045745134, -0.0494443, -0.07149184, -0.05286605, -0.022561546, -0.091546714, -0.12706481, 0.1923914, 0.26536146, -0.07096412, -0.16030753, -0.21569426)); + target1 += mul(nb3, float4x4(-0.097307466, 0.15349665, 0.015644126, -0.22425117, 0.21123715, 0.022773454, 0.23383828, -0.07435915, 0.07146555, -0.02743282, 0.14647867, -0.0041729338, 0.12715502, 0.11781688, -0.061080795, 0.0026166402)); + target1 += mul(nc3, float4x4(-0.010103422, -0.087011784, -0.12507296, -0.009202013, -0.0016642559, 0.12229101, 0.012257156, 0.09069687, 0.17266563, 0.04349975, 0.0065761553, -0.071280204, 0.03610506, 0.18303613, -0.02108923, -0.06867508)); + target1 += mul(nd3, float4x4(-0.13150483, -0.060967755, 0.0055990918, 0.037484363, -0.02158257, -0.024784425, 0.23109616, -0.120935716, 0.20638125, -0.072126925, 0.062352557, -0.004980783, 0.19314887, 0.13248818, -0.23808232, 0.014506469)); + target1 += mul(ne3, float4x4(0.18638828, -0.065645434, -0.20713033, 0.09149545, -0.24210495, -0.06484725, 0.08750317, 0.1802478, 0.3541541, -0.06987437, -0.1159385, -0.028150197, -0.23300691, -0.09201996, -0.121867135, -0.13276023)); + target1 += mul(nf3, float4x4(0.09099928, -0.039182268, -0.1400286, 0.010247891, -0.010239972, -0.18701951, -0.1772805, 0.01631285, -0.09500139, 0.2590885, -0.09521566, 0.05752499, -0.1184693, 0.04186501, 0.27024126, 0.08569921)); + target1 += mul(ng3, float4x4(-0.0729032, 0.10695013, -0.18894811, 0.06616699, 0.05852647, 0.03802247, 0.024427114, 0.022371208, 0.28009695, -0.022878911, 0.04645292, 0.060003202, 0.1053563, 0.027735699, 0.007826481, 0.14397411)); + target1 += mul(nh3, float4x4(-0.15458257, 0.12910113, -0.11843165, 0.14065553, -0.19225205, 0.059665926, 0.2690873, -0.1308205, 0.071195096, 0.07672256, 0.1497483, 0.21867657, 0.15143347, -0.16467342, -0.13924904, 0.098136105)); + target1 += mul(ni3, float4x4(0.05049889, 0.069295354, 0.017172134, 0.048614368, -0.19597568, -0.029311683, -0.190372, -0.025514813, -0.24531111, -0.041956335, 0.24628574, 0.15919869, 0.051921643, 0.09549575, 0.025514983, 0.13909552)); + target1 += float4(-0.012342477, -0.20862316, 0.08788906, -0.0010707981); + + float4 target2 = mul(a1, float4x4(-0.08156944, 0.10573189, 0.012908232, 0.1657589, -0.038043138, -0.2873211, -0.2046161, -0.09311608, 0.3097668, -0.08111585, -0.17932127, -0.02586952, 0.18931806, -0.13793743, -0.13352883, 0.06681123)); + target2 += mul(b1, float4x4(0.02374499, 0.14342955, 0.2563405, -0.029666856, 0.17285998, -0.1035698, -0.11706357, 0.11584379, 0.21326663, 0.06683621, -0.11183301, 0.092254475, -0.1014067, 0.03412136, -0.040375732, 0.13439587)); + target2 += mul(c1, float4x4(-0.114404246, 0.05252966, 0.00047894646, -0.028747892, 0.0105511965, 0.078781754, 0.029926287, 0.14559107, -0.12780708, -0.08478812, -0.2247857, -0.19385272, -0.13657221, 0.18088628, 0.15612762, 0.037660476)); + target2 += mul(d1, float4x4(0.05799563, 0.059148345, -0.09769129, 0.07772796, -0.09202486, -0.06425981, -0.016873274, 0.0030002298, 0.11275395, -0.08546416, -0.2876964, 0.023335997, -0.010972625, -0.032576468, -0.086281575, -0.070443906)); + target2 += mul(e1, float4x4(0.32762548, -0.06770343, 0.03179402, -0.04613723, -0.06790421, 0.44522998, 0.119118124, -0.11980204, 0.038128957, 0.17468919, 0.076030836, 0.14512211, 0.17252928, -0.047734894, -0.06045679, -0.08920573)); + target2 += mul(f1, float4x4(-0.015262433, 0.15428601, 0.06972416, -0.16334222, -0.08347724, 0.18573803, -0.11517264, -0.0009774134, -0.16686407, -0.10733252, -0.12523252, 0.050293542, 0.11212284, -0.009658616, -0.058349714, -0.014115335)); + target2 += mul(g1, float4x4(-0.056932453, 0.18084419, 0.02166639, 0.13523088, 0.011073456, -0.045516286, 0.003297358, -0.057280444, -0.018760536, -0.15718092, -0.11770054, -0.03166016, -0.19774522, 0.0755463, -0.20558798, 0.15830164)); + target2 += mul(h1, float4x4(0.19655597, 0.03901344, -0.051660974, 0.19494548, 0.034315336, -0.04597924, -0.056954715, -0.19345726, -0.11985197, 0.006047848, 0.12791121, -0.019705713, -0.01501477, 0.117168285, 0.025459006, 0.13246241)); + target2 += mul(i1, float4x4(-0.0023640324, 0.0349994, 0.009396353, 0.0936661, 0.100842424, -0.114130996, 0.038058087, 0.12808813, -0.054103322, 0.027919596, -0.10685234, -0.07498883, -0.06130471, -0.12066764, 0.0029782685, 0.059720848)); + target2 += mul(a2, float4x4(-0.098447025, -0.011071975, 0.16054775, -0.08671137, -0.13293275, 0.05532158, 0.14407343, 0.19340874, -0.20346253, 0.11525113, 0.1687311, 0.098785535, 0.03027443, -0.054430522, 0.022521, 0.19343728)); + target2 += mul(b2, float4x4(-0.084854074, 0.06853468, 0.06792569, 0.029366238, 0.06035099, -0.05761756, -0.033579275, -0.062136766, 0.1649456, 0.049637973, 0.2630636, -0.02261985, -0.18047638, -0.071598716, 0.14448155, -0.055889398)); + target2 += mul(c2, float4x4(-0.024849698, 0.088840574, 0.1503109, -0.004984663, -0.16879597, -0.26041916, -0.3362258, 0.20055196, -0.13901941, 0.042401403, 0.18325137, 0.1716765, -0.016100548, 0.11664664, -0.07838003, -0.16286951)); + target2 += mul(d2, float4x4(-0.16242248, 0.22381666, -0.017743299, 0.07717547, 0.048560552, -0.20423977, 0.30301192, 0.00976561, -0.2708939, -0.092156336, 0.038034424, 0.06372939, 0.06721783, -0.023243327, 0.119849995, 0.15898646)); + target2 += mul(e2, float4x4(0.10859177, -0.05935216, -0.015591001, -0.053253412, 0.071014024, 0.43206415, 0.04865775, 0.069328085, -0.09695977, 0.19359045, 0.016935471, 0.0028954153, -0.08338698, 0.041919734, 0.032975465, 0.11067615)); + target2 += mul(f2, float4x4(0.32948914, -0.04703423, -0.075494416, -0.06948022, -0.18574949, 0.15096106, 0.0067734853, -0.16238153, -0.21330655, 0.25306207, 0.08089956, 0.08108933, 0.056989696, 0.05212022, 0.15835905, 0.00077813526)); + target2 += mul(g2, float4x4(-0.011273352, 0.26307768, -0.04307922, 0.21710183, -0.3902529, -0.46155867, 0.015115735, -0.05384065, -0.07163729, 0.0793938, -0.0985122, 0.06594441, 0.09647775, 0.05617775, 0.07099344, -0.16353689)); + target2 += mul(h2, float4x4(-0.040731885, 0.14055543, -0.07012667, 0.07207971, -0.004641172, -0.06394655, 0.091212526, -0.00019208786, -0.07705868, 0.040352806, -0.07397878, 0.051934645, -0.010726301, 0.23407605, 0.12093579, -0.0406116)); + target2 += mul(i2, float4x4(0.041406166, -0.22172481, 0.22162893, 0.02442143, 0.10592917, 0.1968317, -0.14774016, 0.011944242, -0.12373062, 0.114184484, -0.090167396, 0.022542128, -0.1554341, 0.1371109, 0.13077694, -0.020479746)); + target2 += mul(a3, float4x4(0.123823315, -0.3012641, -0.2841784, 0.014021941, 0.10990905, 0.2764256, -0.075963184, -0.10125788, -0.007879674, -0.08643855, -0.038958456, 0.07453782, -0.48677143, -0.03276048, -0.03156215, -0.09289601)); + target2 += mul(b3, float4x4(-0.10992206, -0.05435893, 0.11743695, 0.17674956, 0.13509355, -0.17421335, -0.100946076, -0.10648024, 0.14750971, 0.21357685, -0.107157655, -0.017665314, 0.2106041, 0.124202386, 0.24976057, -0.09088304)); + target2 += mul(c3, float4x4(-0.26258346, -0.03037757, 0.13096122, 0.13691814, 0.11316644, -0.14852227, 0.008399919, 0.04381969, 0.030872608, 0.45056874, -0.04014858, -0.012530115, 0.21238118, -0.1332986, -0.101533614, 0.077671215)); + target2 += mul(d3, float4x4(0.101686284, -0.21485107, -0.109051324, 0.047709018, 0.018496532, 0.030967599, -0.07855083, 0.05204436, 0.0077558183, 0.080045685, -0.09668984, 0.17999001, -0.15804431, -0.042034358, -0.21375516, 0.001163862)); + target2 += mul(e3, float4x4(-0.14624378, 0.42138338, 0.028315686, -0.20134708, -0.010074609, -0.046433613, -0.050019633, 0.08432513, -0.079346046, -0.27917975, -0.19784799, 0.25092122, 0.21972348, -0.0084989555, 0.11432945, -0.0727637)); + target2 += mul(f3, float4x4(-0.22297074, 0.20484488, 0.17720158, 0.0022023271, -0.034587737, 0.0004995375, -0.027270092, -0.08549106, -0.07970776, 0.14142907, -0.039514165, 0.08021129, 0.262039, 0.08684183, 0.08106768, -0.088322006)); + target2 += mul(g3, float4x4(0.19230787, -0.019139988, 0.100881554, 0.0622476, -0.0073597133, -0.007861123, -0.09819001, -0.035048965, 0.1649283, 0.096261285, -0.0899776, -0.03930426, -0.044506907, 0.20075877, -0.049743377, -0.0076403967)); + target2 += mul(h3, float4x4(0.0043743993, 0.20346396, 0.1655524, -0.025431981, -0.02454905, -0.04476991, 0.020741275, -0.12993908, 0.026805034, -0.0037405565, -0.17931041, 0.09257133, 0.13752705, 0.07889819, -0.037251562, -0.002646608)); + target2 += mul(i3, float4x4(0.038870014, -0.37619725, 0.046597917, -0.15463144, 0.054383356, -0.2925491, 0.0640225, -0.00486844, -0.0016340262, 0.10840749, 0.0993287, 0.17394166, 0.08594391, -0.030945132, 0.025646068, -0.06640845)); + target2 += mul(na1, float4x4(-0.01649855, -0.068216905, -0.027988954, -0.12154563, 0.022097806, -0.1290429, 0.10954417, 0.13157494, -0.1745968, -0.04658394, -0.053029858, -0.0759596, -0.04430781, -0.041724976, -0.056713972, -0.14473973)); + target2 += mul(nb1, float4x4(0.06543556, 0.092009485, -0.08451462, 0.052707452, -0.06780165, -0.088456, -0.025358824, -0.12258837, -0.10129489, -0.059306916, -0.14748581, 0.014620428, -0.038939722, -0.10054172, 0.09494565, -0.07793254)); + target2 += mul(nc1, float4x4(-0.05932573, 0.013406356, 0.26368266, 0.18454649, -0.03142332, -0.01590683, -0.06236948, 0.11061398, 0.025253339, -0.030919848, 0.064894855, 0.13248478, -0.030221257, -0.0986045, -0.034824356, -0.16913392)); + target2 += mul(nd1, float4x4(0.0015110603, 0.2025821, 0.004228453, 0.08477586, -0.03797453, -0.04194356, 0.18174535, -0.06626136, -0.13344109, -0.22612168, 0.02602776, 0.016666876, -0.027019914, 0.119900815, -0.06250115, -0.070262626)); + target2 += mul(ne1, float4x4(-0.14976665, 0.03257234, -0.14965177, 0.073865525, 0.062913194, 0.05034122, 0.03676157, -0.018906, 0.04145618, -0.111236595, -0.20951095, -0.060131762, -0.16541055, -0.08913449, 0.044624332, -0.08443667)); + target2 += mul(nf1, float4x4(-0.21176168, -0.015680272, 0.25104785, 0.28819278, 0.068234585, -0.067152865, 0.18975581, -0.024222756, 0.09343949, 0.107427366, -0.08206377, -0.07970111, -0.10268362, -0.02063304, 0.007915588, -0.1344096)); + target2 += mul(ng1, float4x4(0.061288554, -0.017783957, 0.1759008, -0.096834674, -0.17838398, 0.22331426, -0.027759569, -0.0883247, -0.05435304, -0.099557355, 0.026310958, 0.18467775, 0.07900235, -0.017400427, 0.1453773, 0.033763483)); + target2 += mul(nh1, float4x4(-0.06601715, 0.19832757, 0.10341119, 0.015197309, -0.13140027, 0.06353335, -0.033154953, 0.14772332, 0.053612914, -0.018467115, -0.1992033, 0.17353232, 0.16321027, -0.09609656, -0.12580357, -0.052030507)); + target2 += mul(ni1, float4x4(-0.09335505, 0.099851064, 0.12890811, 0.13102262, -0.07580953, -0.11255671, -0.18570407, -0.14529274, -0.05160979, 0.06461672, -0.038672008, -0.00841868, 0.0029629876, -0.13739161, -0.29193023, -0.081763566)); + target2 += mul(na2, float4x4(0.23590541, 0.009043033, 0.06940084, 0.13891594, -0.010488754, 0.029098868, 0.07929391, -0.07250032, -0.13742201, -0.18533885, 0.2531767, -0.009061109, -0.027644258, 0.10404188, 0.012537389, 0.10293872)); + target2 += mul(nb2, float4x4(0.19354686, 0.15574348, 0.31874457, 0.024332082, 0.06383042, 0.048204664, -0.073850416, 0.032850295, -0.34514645, -0.054682292, -0.054835007, 0.012525943, -0.031569667, -0.093528986, 0.077636436, 0.080878824)); + target2 += mul(nc2, float4x4(-0.061584793, 0.003138571, 0.25193092, 0.09340434, 0.17664844, 0.010498078, 0.18399622, -0.23279727, -0.12833218, 0.15312086, -0.10134878, -0.0025951387, 0.07395745, -0.059028395, 0.1285172, 0.13659331)); + target2 += mul(nd2, float4x4(0.1286127, -0.08862414, 0.123132095, -0.11186987, 0.04064812, 0.1295343, -0.08698302, -0.054833192, -0.06911518, 0.1468998, 0.14806904, 0.0002644252, -0.102448784, 0.0064156754, 0.111383334, -0.07292957)); + target2 += mul(ne2, float4x4(0.05504673, -0.076037504, 0.11776747, -0.07890708, 0.077408485, -0.117229365, 0.0197986, -0.12881358, -0.121706314, 0.008088911, -0.025189465, -0.06471935, 0.111992925, -0.08574453, -0.18029808, 0.057162132)); + target2 += mul(nf2, float4x4(-0.09641628, -0.08636256, 0.07254762, -0.1108583, 0.06322016, 0.04606108, 0.015605975, -0.023462018, 0.077079624, 0.12611854, -0.026314614, -0.021778936, -0.080265954, -0.028592844, 0.1361638, 0.16848429)); + target2 += mul(ng2, float4x4(0.14155127, 0.013242842, 0.04764719, -0.12724996, -0.05762018, 4.4798093e-05, 0.31255975, -0.52083194, -0.18550456, 0.109841965, 0.1860627, 0.11478285, -0.36154944, -0.12439295, 0.3006208, 0.032344274)); + target2 += mul(nh2, float4x4(-0.11564562, -0.034078646, 0.16126357, -0.1936752, -0.2330871, -0.13876866, 0.088089384, -0.021154383, -0.091547124, 0.091753796, 0.18144718, 0.1774146, 0.007724317, 0.097580045, -0.15106232, -0.04128832)); + target2 += mul(ni2, float4x4(0.071651496, 0.18003649, 0.10129018, -0.16904286, -0.2137536, -0.1308051, 0.13850693, 0.04569891, 0.09158717, 0.1749203, -0.032127034, 0.06019649, 0.12735014, -0.19949023, 0.003664079, -0.050514087)); + target2 += mul(na3, float4x4(-0.009363578, 0.083391, -0.08583937, -0.008416162, -0.024429835, 0.008918877, -0.15991227, -0.035743445, -0.040119864, 0.20200913, -0.09585724, 0.039848186, 0.2914714, -0.13199879, -0.04198891, 0.049873233)); + target2 += mul(nb3, float4x4(0.14203294, -0.12218405, -0.1336784, -0.011557518, -0.10419894, -0.047520764, 0.012323197, 0.01812075, -0.15906301, 0.057789516, -0.108339556, 0.035662923, 0.008705645, -0.017022535, -0.11589909, 0.030071909)); + target2 += mul(nc3, float4x4(-0.15126535, 0.116061516, 0.26665378, -0.11970062, -0.192801, 0.021354547, -0.253131, 0.12830788, -0.17019245, 0.06896555, -0.0015308838, -0.0076949615, 0.031619042, -0.14708556, -0.11876281, -0.053292263)); + target2 += mul(nd3, float4x4(-0.14085393, 0.15730241, 0.10422539, 0.025466066, 0.10541659, -0.0012975787, 0.041553672, 0.059082996, -0.154172, 0.08198402, 0.09771777, -0.068264395, 0.047784068, -0.11348507, 0.004380174, -0.089181446)); + target2 += mul(ne3, float4x4(0.04478754, -0.18557417, 0.13422509, 0.15747893, -0.009310171, -0.0116828615, -0.0116161555, -0.0065923473, -0.028874157, 0.17116025, -0.15008302, 0.0864679, -0.10439667, 0.09480786, -0.14620537, -0.12444)); + target2 += mul(nf3, float4x4(-0.10271061, 0.037290677, 0.16068509, -0.0020577735, -0.26431653, 0.0316218, 0.13216278, 0.039026607, 0.114048995, -0.08055903, -0.25474527, 0.03769183, 0.11541464, -0.13846509, -0.23404308, 0.059910618)); + target2 += mul(ng3, float4x4(0.03207741, -0.057938, -0.083276935, -0.08009412, 0.11193717, -0.07672049, -0.16157848, -0.11298354, -0.17304356, 0.08984146, -0.050554533, 0.15308471, -0.05547862, -0.15691018, 0.07320868, -0.042120814)); + target2 += mul(nh3, float4x4(0.048134506, -0.10295267, 0.051832333, -0.13681562, 0.103027515, -0.06026332, 0.06881206, -0.015670486, 0.28807607, 0.03059088, 0.034055263, 0.017337816, 0.05512398, 0.075067505, -0.036354467, 0.06471895)); + target2 += mul(ni3, float4x4(-0.085566096, 0.014341178, -0.08384431, -0.051138613, -0.13172193, -0.10944131, 0.052603673, 0.10315314, 0.13149905, -0.10674123, -0.007911778, -0.028487006, 0.13898246, -0.018405652, 0.04242993, -0.10391517)); + target2 += float4(0.06731381, -0.14791869, -0.15826754, -0.069372416); + + float4 target3 = mul(a1, float4x4(-0.0017213221, -0.15371315, -0.092273064, -0.10798677, 0.009334791, 0.22254497, -0.097098924, 0.029816378, 4.457267e-05, -0.1057864, 0.4134007, 0.14368671, -0.004629636, 0.17854625, 0.2903048, -0.06277739)); + target3 += mul(b1, float4x4(-0.046712447, 0.119774394, -0.117091574, 0.09618261, -0.10770648, 0.124485455, 0.075216, -0.28377417, -0.24061379, -0.09114137, 0.23112294, 0.12123567, 0.025058655, 0.093606554, 0.10327309, -0.024526346)); + target3 += mul(c1, float4x4(0.019105028, 0.06630737, 0.032209937, 0.09685681, -0.018223759, 0.04791892, -0.008235882, -0.29300943, 0.25300565, -0.2488416, 0.08808891, 0.23057054, 0.07350692, -0.106139764, -0.063049704, -0.059718538)); + target3 += mul(d1, float4x4(0.0455073, -0.051755026, -0.11883914, 0.20130287, -0.131154, 0.017220428, 0.12068244, 0.070289314, -0.12415149, -0.22242554, 0.08771896, 0.0035022376, 0.24336605, 0.08416074, 0.028170893, -0.03845105)); + target3 += mul(e1, float4x4(0.03242001, 0.102102384, -0.17709577, -0.0109795965, 0.08089789, -0.021498924, 0.06255124, -0.042419348, 0.108601704, -0.05202687, -0.12712812, -0.17035247, 0.17001751, -0.045719698, 0.09703396, 0.037530866)); + target3 += mul(f1, float4x4(-0.09127368, 0.18729141, 0.11323561, 0.12806842, -0.058737166, 0.1974935, -0.1213344, 0.26005578, -0.041523788, -0.0029840702, 0.14748086, -0.10480214, -0.06823255, 0.045274846, 0.078861825, 0.088076524)); + target3 += mul(g1, float4x4(-0.10629749, -0.023263903, -0.082174115, -0.121970475, 0.21234329, 0.0262291, 0.1745219, 0.07722097, -0.12979622, -0.046668485, -0.0027060192, -0.07948489, -0.1455228, -0.1722979, -0.11220583, -0.15050055)); + target3 += mul(h1, float4x4(0.04207767, -0.08237373, 0.07580429, -0.02124768, 0.12718296, 0.053528596, -0.09762217, -0.0045613465, -0.04504155, 0.18147692, -0.13206507, 0.118414916, 0.03825585, -0.23475614, -0.06268228, 0.086768724)); + target3 += mul(i1, float4x4(0.034695115, 0.07061876, 0.04965704, 0.17847943, -0.1437011, 0.15886799, -0.201469, -0.063395016, -0.1750345, 0.11911144, -0.188721, 0.08700757, 0.14036323, -0.08573763, 0.10530263, -0.07726266)); + target3 += mul(a2, float4x4(0.21503586, -0.18479058, 0.0074815084, 0.09756983, 0.037916277, -0.17987613, 0.11589862, -0.028243838, -0.20950282, 0.026752079, 0.10840585, 0.15400405, 0.08625402, -0.07633785, 0.0017439253, -0.072862245)); + target3 += mul(b2, float4x4(0.008905137, 0.106612414, -0.07793345, 0.15220572, -0.0028391609, -0.10614796, -0.17509677, 0.09583197, 0.18518968, 0.005445739, 0.12949161, 0.07129458, 0.06554234, -0.1308029, -0.029664468, 0.010993508)); + target3 += mul(c2, float4x4(-0.054151967, -0.21677336, 0.17064962, 0.06138102, -0.06272079, -0.11186543, -0.02262431, 0.27793702, 0.019080682, 0.121934734, -0.08267019, -0.08607981, 0.10281368, -0.015739575, 0.07353178, 0.10465199)); + target3 += mul(d2, float4x4(0.11974522, 0.044251468, -0.15450975, -0.075565055, -0.04790616, -0.031326365, 0.27381012, -0.094721034, -0.11900706, -0.06368458, 0.10776822, 0.18564561, 0.089738145, -0.0016327037, 0.18722743, 0.09222095)); + target3 += mul(e2, float4x4(-0.02468192, -0.16873443, -0.02480979, -0.13937175, -0.13027008, 0.15577625, -0.01477261, 0.07563496, -0.00062903174, 0.071869016, 0.17108877, 0.00066113746, -0.29290298, 0.07078572, -0.054790854, 0.09035019)); + target3 += mul(f2, float4x4(0.066045515, -0.11800159, -0.0750722, -0.08316888, -0.08140103, -0.107804835, 0.1621138, 0.16997898, -0.04444603, 0.28161287, -0.28550264, -0.17914039, -0.15597315, 0.15387748, -0.047001313, -0.042532828)); + target3 += mul(g2, float4x4(0.025888437, 0.13297214, -0.07546064, -0.06647902, 0.017062671, -0.2597112, 0.13725336, 0.10858415, -0.1160102, 0.13422437, 0.1592752, 0.15240288, 0.03929169, 0.2020017, 0.07010354, 0.028547695)); + target3 += mul(h2, float4x4(-0.0703738, 0.13582481, -0.036476467, -0.096972756, -0.12283295, 0.13071987, -0.056827262, -0.023500688, -0.0075902776, 0.06296815, -0.049109932, 0.16880427, 0.29702982, -0.01992682, 0.013997502, -0.070870094)); + target3 += mul(i2, float4x4(0.108744465, -0.09422798, 0.13146311, -0.250233, 0.016463336, -0.12794453, 0.03931633, 0.17450981, 0.11661872, 0.12163951, -0.1192709, -0.05398837, -0.24910302, 0.19006594, -0.1857664, -0.1205357)); + target3 += mul(a3, float4x4(-0.054634392, 0.052315067, 0.05044536, -0.05177968, 0.21537638, -0.014019764, -0.06632539, 0.030889641, -0.18629341, -0.04575244, -0.07509494, 0.09061459, -0.0686147, -0.1872925, -0.08178069, -0.17149752)); + target3 += mul(b3, float4x4(-0.08697341, 0.15311632, 0.06298225, -0.17094718, -0.0854164, 0.037885193, -0.048915166, -0.010449174, 0.030081013, -0.02462675, -0.105993316, -0.100794375, -0.05364704, -0.120219246, 0.16426747, -0.016683623)); + target3 += mul(c3, float4x4(0.1442815, -0.2285766, 0.14395493, -0.01616554, -0.054909255, -0.06734717, 0.044498604, -0.07669548, 0.06888753, 0.2329823, -0.2728349, -0.06917594, 0.049095903, 0.0144689595, -0.08170211, -0.21154584)); + target3 += mul(d3, float4x4(-0.0032911033, -0.30628094, 0.01655303, -0.12639484, -0.043794096, 0.12097294, 0.10301277, 0.0323829, -0.20977376, -0.2598986, -0.032757662, 0.062723145, 0.065447785, -0.10534467, -0.061504886, -0.25371954)); + target3 += mul(e3, float4x4(-0.062172186, -0.12031234, -0.05312447, -0.07274714, -0.044065587, 0.060389437, -0.011823414, 0.08889303, 0.010290733, -0.056499645, -0.012554047, 0.13659821, 0.062492277, -0.1463726, -0.30616954, -0.048617195)); + target3 += mul(f3, float4x4(-0.05244876, 0.056097146, -0.06787384, 0.09076766, -0.09579352, -0.0066260016, 0.15201993, 0.03254239, 0.021516487, 0.15981875, -0.1432654, 0.17569521, 0.12658277, -0.1530729, -0.14634636, -0.00258191)); + target3 += mul(g3, float4x4(0.19284594, -0.24125227, -0.06610495, -0.22473419, 0.19109339, 0.20509472, 0.022192668, 0.13134679, -0.16711204, 0.03866372, 0.040778622, 0.004792002, 0.06713585, -0.11313002, -0.0494123, 0.16455573)); + target3 += mul(h3, float4x4(0.08695826, 0.03544317, -0.22323117, 0.10693563, -0.060470764, 0.14525974, -0.12502834, -0.10161133, -0.29323998, -0.14850102, 0.0802706, 0.14540558, 0.07584563, -0.105335936, -0.10063164, -0.16825674)); + target3 += mul(i3, float4x4(-0.09106831, -0.054964047, -0.0060697296, 0.1795092, -0.031979155, -0.17847598, 0.02053048, -0.09066955, -0.27984852, 0.11892948, 0.24315885, 0.18758732, 0.16902542, -0.21777025, -0.012130184, -0.060705084)); + target3 += mul(na1, float4x4(0.059577208, 0.060833983, 0.10868721, 0.11276571, -0.2327309, -0.11088089, 0.20807125, -0.021718912, 0.030323144, -0.10312503, -0.22234069, 0.16634466, 0.19398251, -0.0545838, -0.13059108, 0.017868554)); + target3 += mul(nb1, float4x4(-0.07514213, 0.10887309, 0.1218314, -0.18563306, -0.008527813, -0.20459747, -0.030698426, 0.0844588, 0.23686919, 0.03104538, 0.08527714, -0.09642553, -0.08534072, 0.06419827, -0.12806654, -0.11365306)); + target3 += mul(nc1, float4x4(-0.039864887, -0.25141066, 0.13011548, -0.13584746, -0.013512096, -0.17277367, 0.08957357, 0.24380256, -0.033397153, -0.012431397, 0.082527, 0.020838374, 0.016154792, -0.29341805, -0.015195005, 0.022471353)); + target3 += mul(nd1, float4x4(-0.11212281, 0.08150235, 0.0055854055, -0.28806004, -0.09078987, -0.05241604, -0.09806806, -0.2560824, 0.043018572, 0.013310293, -0.018843893, 0.049140453, 0.17483246, 0.12305487, -0.096557006, 0.0123909665)); + target3 += mul(ne1, float4x4(0.09532439, 0.15352365, 0.20087242, 0.08491758, -0.24605502, 0.16663635, -0.13709177, -0.12777333, 0.02181133, 0.036698326, -0.003161005, 0.05891433, -0.055862445, 0.29106724, -0.17064662, -0.14393678)); + target3 += mul(nf1, float4x4(0.0058135563, -0.22420937, 0.07235329, -0.124738544, 0.08238468, -0.2015809, -0.03386368, -0.17470017, 0.057452828, -0.06164105, -0.13776, -0.09869882, -0.0026272335, -0.20054811, 0.019651942, -0.2600821)); + target3 += mul(ng1, float4x4(-0.17325936, -0.05762174, -0.06450132, 0.050736707, 0.045916766, 0.00402603, -0.08697255, 0.12957326, -0.17539512, 0.087370165, -0.004544662, -0.073203914, -0.010898469, 0.12600337, -0.012520381, 0.034228735)); + target3 += mul(nh1, float4x4(-0.10941816, 0.0907973, -0.0004870752, -0.0067486484, -0.0726075, 0.2144327, -0.055393726, -0.023118004, -0.14722143, -0.15563087, -0.06595914, -0.048578046, -0.030177968, 0.20142747, 0.01779709, 0.01655237)); + target3 += mul(ni1, float4x4(-0.08580983, -0.026037404, -0.077059925, -0.087288134, 0.004400565, -0.011133582, 0.17784919, 0.23502137, 0.047681976, -0.11357638, -0.0896771, 0.0067448434, -0.10454412, 0.17173828, 0.02538007, 0.012261617)); + target3 += mul(na2, float4x4(-0.1899917, 0.035758197, 0.09290593, -0.321715, 0.0062465663, 0.0014386866, 0.016894078, -0.115979955, -0.0027755008, 0.06348923, 0.03340955, -0.24005453, 0.049253695, -0.038937677, 0.11952727, 0.0399283)); + target3 += mul(nb2, float4x4(-0.0768814, -0.070920505, 0.32928568, -0.09117129, -0.030737674, -0.10276032, 0.008501685, -0.092094645, -0.119966194, 0.08019844, 0.06642611, -0.061083883, 0.11307649, -0.031231074, -0.032001212, 0.13963008)); + target3 += mul(nc2, float4x4(-0.07274599, 0.0010301028, 0.045785096, -0.010552021, -0.13573211, 0.271882, -0.22248295, -0.28493458, 0.024056, 0.14095017, 0.065386854, 0.06830046, 0.039510656, -0.09839122, 0.20431511, 0.09510801)); + target3 += mul(nd2, float4x4(0.015967855, -0.18058023, 0.18704537, 0.18511131, 0.08232382, 0.0142269125, -0.045059025, 0.09668988, 0.062527284, 0.15584159, -0.19181041, -0.09103482, 0.07462716, 0.08690921, -0.006602257, -0.048261993)); + target3 += mul(ne2, float4x4(0.06590294, 0.03255081, 0.27418908, 0.12957683, -0.056972653, -0.13130698, 0.116743594, -0.021665238, -0.049696703, 0.1355714, -0.034948308, 0.013496893, 0.08264742, -0.040836275, 0.066302836, -0.008282482)); + target3 += mul(nf2, float4x4(-0.031672716, 0.062036, 0.0670039, 0.118378155, 0.16932462, 0.19176582, -0.14296779, -0.07521962, 0.08186631, 0.13872068, 0.2050204, 0.23874411, -0.05187021, -0.14518432, 0.17769787, 0.13543007)); + target3 += mul(ng2, float4x4(0.23216663, -0.07822891, 0.19363302, 0.14644198, 0.23314826, 0.16843605, 0.14231025, 0.39938375, 0.012976297, 0.04872197, -0.056092817, -0.06786196, -0.13020758, -0.16039686, -0.08942605, 0.06917485)); + target3 += mul(nh2, float4x4(0.13809198, -0.07787285, -0.0032761474, 0.08901838, 0.06670918, 0.23262213, 0.19812497, -0.29459605, -0.16106832, -0.089955695, 0.018862866, 0.027937569, -0.068481594, 0.0515106, 0.0076716254, -0.020717952)); + target3 += mul(ni2, float4x4(0.15160611, -0.056448795, -0.01282516, -0.060768176, -0.13858989, 0.070536785, -0.036451727, -0.007100553, -0.06416002, 0.1640014, -0.012680492, 0.089894645, 0.089873075, -0.12290447, 0.07415422, 0.051840447)); + target3 += mul(na3, float4x4(0.049169756, 0.012065099, 0.044702023, 0.41471246, -0.22039439, 0.26710343, 0.03259032, -0.0010071819, 0.122387365, 0.016845915, -0.04162581, 0.16303158, -0.018624788, -0.018498175, 0.119111605, 0.066239804)); + target3 += mul(nb3, float4x4(0.1304685, -0.015543399, 0.09727904, 0.025493689, 0.11235736, -0.024798019, 0.24016461, 0.05678371, 0.29092878, 0.008495527, -0.08145035, 0.1277052, 0.09728953, -0.064336315, 0.018896975, -0.0052928496)); + target3 += mul(nc3, float4x4(-0.22020516, 0.17298244, 0.08216116, 0.13081113, -0.058733664, 0.14459507, 0.1042437, 0.10113822, -0.012354008, 0.21633418, 0.059657548, 0.14173268, 0.026709042, -0.10159428, 0.14287837, 0.16256075)); + target3 += mul(nd3, float4x4(-0.03602925, 0.19763114, 0.14659521, 0.079257175, -0.048765395, -0.04763924, -0.023928326, -0.07900388, 0.13704984, 0.08109074, -0.017959716, 0.0065745655, -0.052421648, -0.03608805, 0.06062624, 0.11137132)); + target3 += mul(ne3, float4x4(0.10591948, 0.0052649123, 0.18899056, 0.0075388527, 0.035225954, -0.062119495, 0.022104654, -0.10452858, 0.03833499, 0.26919907, -0.078174464, 0.0016594962, 0.09164568, -0.05362235, 0.047250915, -0.031277195)); + target3 += mul(nf3, float4x4(0.0244364, -0.06794058, -0.021393122, -0.053156774, 0.15241314, -0.09962311, -0.03456499, -0.016867915, 0.1597494, -0.12681212, -0.010430228, 0.00086353114, 0.027244834, 0.08854933, 0.1284529, -0.05862663)); + target3 += mul(ng3, float4x4(-0.12345045, -0.044616744, -0.04131162, 0.13541003, -0.047810026, -0.12005011, 0.010486988, -0.021923149, 0.11812008, 0.17721419, -0.032736443, -0.15231252, -0.13128845, 0.07795993, 0.047232933, -0.07249807)); + target3 += mul(nh3, float4x4(0.08612666, 0.02928595, 0.24572, 0.1079535, 0.06905186, -0.040503707, 0.08792316, 0.13987797, 0.14096849, -0.026072232, -0.024833977, -0.031660788, -0.07927557, 0.03298344, -0.08978443, 0.112841055)); + target3 += mul(ni3, float4x4(0.15270372, 0.07552049, 0.09564199, -0.13284975, 0.003842602, -0.029099604, 0.0003256477, -0.09769279, 0.12788263, -0.10107807, 0.10767, 0.23706906, -0.059877742, 0.09791839, 0.04538287, 0.16307582)); + target3 += float4(0.07341823, -0.019611815, -0.09007808, -0.022756629); + + float3 target4 = tex7.SampleLevel(sam, pos, 0).rgb; + target4 += mul(e1, float4x3(0.022627862, -0.020713277, -0.009454221, -0.04352193, 0.058409747, 0.07186154, -0.009326966, 0.034919802, 0.04204233, 0.025182368, -0.039986387, -0.04990386)); + target4 += mul(e2, float4x3(0.0116241425, -0.039915055, -0.050241623, -0.0076204035, 0.050215762, 0.059038218, -0.006659752, -0.0054298495, -0.003807067, 0.011085346, -0.009443587, -0.009128077)); + target4 += mul(e3, float4x3(0.0453952, 0.004603456, 0.006256434, -0.104142666, 0.05726496, 0.069169044, -0.10102446, -0.034291938, -0.013720296, -0.035107866, -0.008388971, -0.0068969135)); + target4 += mul(ne1, float4x3(-0.038070124, -0.015017457, -0.015852718, 0.0607464, -0.052079927, -0.07268223, 0.008773512, -0.026051786, -0.027285712, -0.022916751, 0.048140153, 0.064897746)); + target4 += mul(ne2, float4x3(-0.01670857, 0.012646949, 0.03353705, 0.038032394, -0.044542246, -0.06310885, 0.002600519, -0.00824961, -0.008912322, 0.023435717, 0.021788329, 0.008603494)); + target4 += mul(ne3, float4x3(-0.02889454, -0.0058613745, -0.010699256, 0.12959917, -0.046572708, -0.06832117, 0.028117642, 0.020422146, 0.00869695, 0.035915125, 0.009355984, 0.005175107)); + + tex4[gxy] = target1; + tex5[gxy] = target2; + tex6[gxy] = target3; + tex8[gxy] = float4(target4, 1); +} + +//!PASS 7 +//!DESC Conv-4x3x3x24 +//!IN tex4, tex5, tex6, tex8 +//!OUT tex1, tex2, tex3, tex7 +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 + +void Pass7(uint2 blockStart, uint3 threadId) { + uint2 gxy = Rmp8x8(threadId.x) + blockStart; + uint2 inputSize = GetInputSize(); + if (gxy.x >= inputSize.x || gxy.y >= inputSize.y) { + return; + } + + float2 inputPt = GetInputPt(); + float2 pos = (gxy + 0.5f) * inputPt; + + // [ a, d, g ] + // [ b, e, h ] + // [ c, f, i ] + float4 a1 = tex4.SampleLevel(sam, pos + float2(-inputPt.x, -inputPt.y), 0); + float4 b1 = tex4.SampleLevel(sam, pos + float2(-inputPt.x, 0), 0); + float4 c1 = tex4.SampleLevel(sam, pos + float2(-inputPt.x, inputPt.y), 0); + float4 d1 = tex4.SampleLevel(sam, pos + float2(0, -inputPt.y), 0); + float4 e1 = tex4.SampleLevel(sam, pos, 0); + float4 f1 = tex4.SampleLevel(sam, pos + float2(0, inputPt.y), 0); + float4 g1 = tex4.SampleLevel(sam, pos + float2(inputPt.x, -inputPt.y), 0); + float4 h1 = tex4.SampleLevel(sam, pos + float2(inputPt.x, 0), 0); + float4 i1 = tex4.SampleLevel(sam, pos + float2(inputPt.x, inputPt.y), 0); + + float4 na1 = max(-a1, 0); + float4 nb1 = max(-b1, 0); + float4 nc1 = max(-c1, 0); + float4 nd1 = max(-d1, 0); + float4 ne1 = max(-e1, 0); + float4 nf1 = max(-f1, 0); + float4 ng1 = max(-g1, 0); + float4 nh1 = max(-h1, 0); + float4 ni1 = max(-i1, 0); + + a1 = max(a1, 0); + b1 = max(b1, 0); + c1 = max(c1, 0); + d1 = max(d1, 0); + e1 = max(e1, 0); + f1 = max(f1, 0); + g1 = max(g1, 0); + h1 = max(h1, 0); + i1 = max(i1, 0); + + float4 a2 = tex5.SampleLevel(sam, pos + float2(-inputPt.x, -inputPt.y), 0); + float4 b2 = tex5.SampleLevel(sam, pos + float2(-inputPt.x, 0), 0); + float4 c2 = tex5.SampleLevel(sam, pos + float2(-inputPt.x, inputPt.y), 0); + float4 d2 = tex5.SampleLevel(sam, pos + float2(0, -inputPt.y), 0); + float4 e2 = tex5.SampleLevel(sam, pos, 0); + float4 f2 = tex5.SampleLevel(sam, pos + float2(0, inputPt.y), 0); + float4 g2 = tex5.SampleLevel(sam, pos + float2(inputPt.x, -inputPt.y), 0); + float4 h2 = tex5.SampleLevel(sam, pos + float2(inputPt.x, 0), 0); + float4 i2 = tex5.SampleLevel(sam, pos + float2(inputPt.x, inputPt.y), 0); + + float4 na2 = max(-a2, 0); + float4 nb2 = max(-b2, 0); + float4 nc2 = max(-c2, 0); + float4 nd2 = max(-d2, 0); + float4 ne2 = max(-e2, 0); + float4 nf2 = max(-f2, 0); + float4 ng2 = max(-g2, 0); + float4 nh2 = max(-h2, 0); + float4 ni2 = max(-i2, 0); + + a2 = max(a2, 0); + b2 = max(b2, 0); + c2 = max(c2, 0); + d2 = max(d2, 0); + e2 = max(e2, 0); + f2 = max(f2, 0); + g2 = max(g2, 0); + h2 = max(h2, 0); + i2 = max(i2, 0); + + float4 a3 = tex6.SampleLevel(sam, pos + float2(-inputPt.x, -inputPt.y), 0); + float4 b3 = tex6.SampleLevel(sam, pos + float2(-inputPt.x, 0), 0); + float4 c3 = tex6.SampleLevel(sam, pos + float2(-inputPt.x, inputPt.y), 0); + float4 d3 = tex6.SampleLevel(sam, pos + float2(0, -inputPt.y), 0); + float4 e3 = tex6.SampleLevel(sam, pos, 0); + float4 f3 = tex6.SampleLevel(sam, pos + float2(0, inputPt.y), 0); + float4 g3 = tex6.SampleLevel(sam, pos + float2(inputPt.x, -inputPt.y), 0); + float4 h3 = tex6.SampleLevel(sam, pos + float2(inputPt.x, 0), 0); + float4 i3 = tex6.SampleLevel(sam, pos + float2(inputPt.x, inputPt.y), 0); + + float4 na3 = max(-a3, 0); + float4 nb3 = max(-b3, 0); + float4 nc3 = max(-c3, 0); + float4 nd3 = max(-d3, 0); + float4 ne3 = max(-e3, 0); + float4 nf3 = max(-f3, 0); + float4 ng3 = max(-g3, 0); + float4 nh3 = max(-h3, 0); + float4 ni3 = max(-i3, 0); + + a3 = max(a3, 0); + b3 = max(b3, 0); + c3 = max(c3, 0); + d3 = max(d3, 0); + e3 = max(e3, 0); + f3 = max(f3, 0); + g3 = max(g3, 0); + h3 = max(h3, 0); + i3 = max(i3, 0); + + float4 target1 = mul(a1, float4x4(-0.053204395, 0.2134829, 0.12336964, -0.10227736, 0.13940702, -0.124413736, 0.3020443, -0.2065515, -0.004734049, 0.037971064, -0.17321284, 0.041885074, 0.077058956, 0.12063891, -0.010338445, 0.06337065)); + target1 += mul(b1, float4x4(0.12816934, 0.14948028, -0.09161687, 0.009573578, 0.22003245, 0.044031654, 0.090882175, -0.14265673, 0.06734865, 0.05421324, 0.11106335, -0.020738617, 0.02484326, -0.059336618, -0.009157065, 0.0821956)); + target1 += mul(c1, float4x4(-0.02057381, -0.053952582, -0.05662845, 0.043356568, 0.2431925, -0.117109254, -0.03546069, 0.32747653, -0.0656724, -0.10274332, -0.026182862, 0.16777003, -0.038789105, -0.011600223, -0.06111373, -0.045530178)); + target1 += mul(d1, float4x4(-0.11627616, -0.2680533, 0.010153158, 0.04263144, -0.046353284, -0.05806104, 0.08532106, 0.02319678, -0.12570818, 0.0359389, 0.020782439, 0.10452313, 0.06330789, -0.0086953, -0.03920925, 0.06789389)); + target1 += mul(e1, float4x4(-0.08820413, -0.13917038, -0.049961973, 0.10507677, 0.25912637, 0.048801307, 0.13123387, 0.055866715, -0.055367444, 0.1428978, -0.040858068, 0.20058946, 0.0673469, -0.17162299, 0.15529002, 0.41366217)); + target1 += mul(f1, float4x4(-0.081712715, 0.04338456, -0.0368015, -0.0018422191, 0.16511263, -0.21779254, 0.065223925, 0.4804269, 0.26078546, -0.038037203, -0.2898542, 0.2068737, 0.101655796, -0.12456843, -0.11357212, -0.005879897)); + target1 += mul(g1, float4x4(-0.074044555, 0.07722422, 0.062057327, -0.039013617, 0.12760206, -0.18111233, -0.01114239, 0.1514668, -0.008963988, 0.23631106, 0.18362597, 0.14166053, -0.046458114, 0.16774492, 0.17774823, -0.008998563)); + target1 += mul(h1, float4x4(0.09820194, -0.054974817, -0.015640004, -0.037923157, 0.22821093, -0.03986652, -0.0074655996, 0.04587354, 0.05650628, 0.112482674, 0.023865355, 0.24882393, -0.011221855, 0.13942584, 0.003652544, -0.06288897)); + target1 += mul(i1, float4x4(-0.31229278, -0.10419711, -0.004614452, -0.032103445, -0.00018427879, 0.027711036, 0.028399462, 0.082576215, -0.056645207, 0.038272534, -0.011554511, 0.33454514, -0.21628743, 0.11849716, -0.23067485, -0.087079175)); + target1 += mul(a2, float4x4(-0.14960206, 0.29916358, -0.36191732, -0.096665345, -0.08732554, -0.10081626, 0.10593716, -0.0143145975, 0.12768494, 0.3251397, 0.23868982, -0.08632128, -0.07138096, -0.029475177, 0.07199368, -0.0016260111)); + target1 += mul(b2, float4x4(0.17022541, 0.19862384, 0.0029171365, 0.07225595, 0.08387519, -0.051419877, 0.16522466, -0.04951881, 0.07093068, 0.34544435, 0.08639415, -0.0077871718, 0.07875624, -0.10820802, 0.015711969, 0.1371948)); + target1 += mul(c2, float4x4(0.11947513, 0.03204784, -0.22552966, 0.05517582, 0.13209006, -0.06262761, 0.0719108, -0.083935544, -0.17171475, 0.07105399, 0.013485666, -0.13865131, -0.20124301, -0.10171288, -0.17265166, -0.1650513)); + target1 += mul(d2, float4x4(-0.038657106, -0.11968214, -0.04953467, 0.03988426, 0.18497725, 0.00012608049, -0.014361117, 0.016538745, 0.053768195, 0.21468902, 0.22507563, 0.13274029, 0.09316226, 0.10554355, 0.13079438, -0.020738615)); + target1 += mul(e2, float4x4(0.3934315, -0.14415179, 0.022628346, 0.067308314, 0.06434691, -0.09336087, -0.067665786, 0.05017148, -0.06534398, -0.048088152, -0.037155427, 0.1489594, -0.054163337, 0.2329102, -0.105613016, 0.0012456856)); + target1 += mul(f2, float4x4(0.24050267, -0.0067265374, -0.0153115215, 0.06555275, 0.19129738, 0.0043795216, 0.063948326, -0.13967972, -0.40650475, 0.09109113, 0.07856194, -0.13390535, -0.08199262, 0.17485364, -0.090266995, -0.012882164)); + target1 += mul(g2, float4x4(-0.387764, -0.15284535, -0.269682, 0.063642666, 0.08651869, -0.23153405, -0.10131002, 0.0043905224, 0.220928, 0.17752749, -0.01569877, -0.0686579, 0.21019012, 0.20529252, 0.06952716, -0.058749653)); + target1 += mul(h2, float4x4(-0.293644, 0.036391325, -0.07392813, -0.086678274, 0.2078697, -0.11507264, 0.028548734, -0.16409987, 0.17409426, 0.1885014, 0.084329076, -0.15027794, 0.20641033, 0.06187141, -0.03875406, 0.0032009226)); + target1 += mul(i2, float4x4(0.10790136, 0.1387389, -0.1781791, 0.21425287, 0.12715636, -0.063490026, 0.09555745, -0.10528784, 0.12758408, 0.29311177, 0.0432301, -0.021469813, 0.021922017, 0.082767464, 0.15348153, 0.12735313)); + target1 += mul(a3, float4x4(0.0062385295, 0.11732651, 0.06049321, -0.07607647, 0.17820913, 0.06216857, 0.05036523, 0.008527562, -0.05745378, 0.065337434, -0.04389796, 0.032172143, -0.08650831, -0.13604137, 0.050570212, 0.011036989)); + target1 += mul(b3, float4x4(0.016900355, 0.14422971, -0.106490955, -0.052399695, 0.13446756, 0.07712888, 0.0058913217, 0.07991085, 0.038670607, -0.25514704, 0.12148176, 0.17061579, 0.11421595, 0.022622943, 0.058726758, -0.17090438)); + target1 += mul(c3, float4x4(0.055515286, -0.19921277, -0.0012379233, 0.064982586, 0.26003027, -0.026233593, 0.07716586, -0.025661616, -0.11324887, -0.0035626758, 0.017872687, -0.10889948, -0.09775516, 0.07376668, -0.07696171, -0.2438295)); + target1 += mul(d3, float4x4(0.032405633, -0.05084789, -0.088054694, -0.10841894, -0.0075752116, 0.13531004, -0.1457409, 0.13204673, 0.0792082, 0.12976237, -0.07244278, -0.11369213, 0.06102383, -0.23130623, 0.0485402, 0.06685668)); + target1 += mul(e3, float4x4(-0.13683872, -0.053872824, -0.06719165, -0.070855714, 0.019770421, 0.18132222, 0.027324507, -0.04910738, 0.17011392, 0.057926424, 0.0857354, -0.14427422, -0.066373795, 0.09973484, 0.02194641, 0.17209244)); + target1 += mul(f3, float4x4(-0.07172457, -0.09989123, 0.06346084, 0.007205204, -0.18027657, 0.007516025, -0.0042022206, -0.0091036465, 0.18030393, -0.009558301, 0.12717903, -0.02116024, 0.14172006, 0.012544988, -0.16633627, 0.13234323)); + target1 += mul(g3, float4x4(-0.026680972, 0.26901576, -0.053663265, 0.0021016174, 0.032445803, 0.037003934, 0.05414299, -0.035497934, -0.10569329, 0.050672166, -0.01144387, 0.05000742, -0.057444472, 0.0010797186, 0.018822541, -0.04636653)); + target1 += mul(h3, float4x4(0.135361, -0.058395687, -0.033542126, 0.09484118, -0.07793999, 0.013546507, 0.11820586, 0.14490362, -0.016325314, -0.0062904614, 0.12631275, 0.1394393, -0.049356613, -0.02528993, 0.26334915, -0.032557055)); + target1 += mul(i3, float4x4(0.077839315, -0.052373778, 0.036136296, -0.05023568, -0.07987715, 0.018897712, -0.17742547, 0.18015353, 0.2571155, 0.058656774, 0.013118142, 0.12145675, 0.14177194, 0.099529505, -0.028370513, 0.25136563)); + target1 += mul(na1, float4x4(0.0747753, -0.15949982, 0.076973855, 0.080785476, 0.25431648, -0.120426156, 0.059631538, 0.13541599, -0.006538664, 0.06348775, -0.15413675, -0.011688718, -0.0877202, -0.07138076, -0.20553613, 0.17151853)); + target1 += mul(nb1, float4x4(-0.24562076, -0.31801596, 0.2534939, -0.054888077, 0.23713852, -0.23484352, 0.015403321, 0.28927258, 0.02333135, 0.115237035, 0.051989716, -0.0774211, -0.17619006, -0.042421665, -0.17778155, -0.16379887)); + target1 += mul(nc1, float4x4(-0.15642986, -0.0426825, 0.075349115, -0.13867629, 0.112977736, 0.06540842, 0.0059138774, 0.090976134, 0.102575876, -0.07702354, -0.060852207, -0.07358783, -0.030642396, -0.12437998, 0.19073227, -0.008556629)); + target1 += mul(nd1, float4x4(-0.009600349, 0.19660307, 0.06310739, -0.091261774, 0.1383758, -0.10920792, 0.01987075, 0.10960847, -0.03973851, -0.05378361, -0.053934645, -0.062070217, 0.017768001, -0.109798394, -0.27830756, 0.14825441)); + target1 += mul(ne1, float4x4(0.2253333, 0.04887524, 0.007540527, -0.21392706, 0.28378952, -0.22518088, -0.09280502, 0.25905597, 0.1558124, -0.06532809, -0.052613363, -0.038770456, -0.09479437, 0.39384437, 0.09516288, -0.29169223)); + target1 += mul(nf1, float4x4(0.023066722, -0.20169239, 0.025786614, 0.12992494, -0.0011414116, -0.0023400988, 0.13305776, -0.017615285, -0.06834794, -0.06084079, -0.10924924, 0.039389268, -0.0040167933, 0.049587116, 0.07590412, 0.31464538)); + target1 += mul(ng1, float4x4(-0.1917511, -0.008846332, 0.0914183, -0.06694468, 0.054535903, 0.19732447, 0.17194839, 0.12368525, -0.11447456, -0.10244315, -0.082908966, -0.103707045, 0.06248975, -0.14130668, -0.068753496, 0.23984621)); + target1 += mul(nh1, float4x4(-0.10043509, 0.036193024, 0.017117409, 0.15630378, 0.29531795, -0.20785378, -0.17022829, 0.010861576, -0.052274987, -0.050172083, -0.09687743, 0.025382213, 0.1061047, -0.019923043, 0.1905993, 0.31907213)); + target1 += mul(ni1, float4x4(-0.023860455, 0.013424604, -0.055340413, -0.006086705, 0.26867437, -0.18745743, 0.11919189, 0.05196282, -0.09836886, -0.10949307, -0.064731866, -0.14198364, 0.46431017, -0.14794265, 0.025133874, 0.38547024)); + target1 += mul(na2, float4x4(0.06934901, -0.20738873, 0.14471452, 0.03087651, 0.18033424, 0.16282603, -0.050284263, -0.041595727, -0.11747435, -0.04275445, -0.20998137, -0.056565028, -0.050009515, 0.13573733, -0.08438032, -0.07363902)); + target1 += mul(nb2, float4x4(-0.1109324, -0.08281566, 0.080020756, -0.07565862, 0.16276588, 0.13186535, 0.17810473, 0.051175643, -0.1470848, -0.08119655, 0.22341052, -0.14562707, -0.22091609, 0.08912351, 0.062519215, -0.17822169)); + target1 += mul(nc2, float4x4(-0.02652961, -0.050731696, 0.06761707, -0.070221156, 0.11255305, 0.15729706, 0.18315557, -0.0030489026, 0.08721225, -0.04417, -0.044907395, -0.0631245, -0.010991895, 0.14397791, -0.016412318, -0.016923137)); + target1 += mul(nd2, float4x4(-0.12462993, 0.14335859, 0.08130342, -0.16543365, 0.010432147, 0.019978197, -0.017498186, 0.03631899, 0.057306956, -0.06078837, -0.015008236, -0.24389061, -0.10250533, 0.31660014, 0.33440468, -0.12124798)); + target1 += mul(ne2, float4x4(-0.27909592, 0.21149877, 0.050259847, -0.24782999, 0.07350583, -0.03168507, -0.0206597, 0.07860909, -0.07629377, 0.1713701, 0.24176298, -0.25509474, 0.002090829, 0.051905315, 0.25929084, -0.09076089)); + target1 += mul(nf2, float4x4(-0.13923247, -0.083095506, -0.12958083, 0.008588576, 0.068224825, 0.094012275, 0.1395537, 0.0690222, 0.13958463, -0.02742012, 0.13905828, -0.04970139, -0.0629641, -0.15277445, 0.016491361, -0.13742869)); + target1 += mul(ng2, float4x4(-0.0027394858, -0.07178526, 0.07668042, -0.16290356, 0.10704169, 0.27434343, -0.003009555, -0.0124241095, 0.031501733, -0.10345558, -0.12258338, -0.055458266, 0.08220533, 0.16282788, 0.22585614, -0.04099274)); + target1 += mul(nh2, float4x4(-0.18252786, 0.032287426, 0.03831364, 0.03279567, -0.015436468, 0.16594371, -0.022859711, 0.014286839, -0.020073507, -0.06752274, 0.04850366, -0.03098202, 0.055985507, 0.030877378, -0.12457596, 0.012876079)); + target1 += mul(ni2, float4x4(-0.2959125, 0.12508816, -0.05321822, -0.1051829, 0.16586393, 0.07608049, -0.042397983, -0.0069031697, 0.13237686, -0.07125681, 0.021239927, 0.17826323, -0.14433292, 0.013577087, -0.14554563, -0.2040924)); + target1 += mul(na3, float4x4(0.33643177, -0.09343892, 0.05079197, -0.008774256, -0.002809458, -0.07406135, -0.33292174, 0.026698712, 0.3655136, 0.07260544, 0.3903461, -0.025114482, 0.038028333, 0.104210675, -0.4062275, -0.078964405)); + target1 += mul(nb3, float4x4(0.19767492, -0.1537188, 0.049587816, 0.23333088, -0.3893781, -0.011501175, -0.1826917, -0.12794746, -0.06709039, 0.015785962, -0.18090555, -0.11386157, -0.12038564, 0.011559484, -0.12779875, -0.14214684)); + target1 += mul(nc3, float4x4(-0.15774208, 0.24946158, -0.040942013, -0.1251321, -0.3509982, 0.07450445, -0.14480934, -0.20172012, -0.11019966, -0.07905495, -0.1572328, 0.12654895, 0.119401105, -0.12334677, 0.10720092, -0.06545273)); + target1 += mul(nd3, float4x4(-0.037104636, 0.33563337, 0.20923309, 0.028749982, 0.13854796, -0.13161437, 0.038462456, -0.14479184, 0.15403077, -0.04880203, 0.13780783, 0.06471987, 0.2944117, 0.13432993, -0.31482598, -0.06599348)); + target1 += mul(ne3, float4x4(0.54742974, 0.121937156, -0.07866791, 0.07451098, -0.03663172, -0.1554786, 0.059384037, -0.004000904, -0.04610048, -0.10617931, -0.18522029, 0.03238723, -0.085027255, -0.07754074, 0.22321595, -0.22000736)); + target1 += mul(nf3, float4x4(0.34576082, 0.054670934, -0.006112889, 0.08788217, -0.11128527, 0.016721481, 0.0025457302, 0.10134559, -0.08420967, 0.077211045, 0.04456844, 0.15408081, 0.08043456, -0.03195054, 0.068368874, -0.0011692513)); + target1 += mul(ng3, float4x4(-0.109538294, 0.035212234, -0.068712965, -0.09868468, -0.12186257, 0.122597136, -0.06546314, -0.024811305, -0.018210687, 0.09266877, -0.091002055, -0.05117649, 0.076985, 0.08579534, -0.14370322, -0.08178749)); + target1 += mul(nh3, float4x4(-0.21291538, 0.03441726, -0.01899837, -0.15328759, -0.17070505, 0.151839, 0.15083382, -0.08944362, -0.3224203, 0.012464086, 0.08693216, 0.014108278, -0.13456593, 0.008793197, 0.14650744, -0.04115599)); + target1 += mul(ni3, float4x4(0.12686576, 0.033990897, -0.0039116694, -0.12522134, 0.066877596, 0.09016868, -0.05867825, 0.08331187, -0.018720012, 0.10592668, 0.050558716, 0.35772276, -0.09896201, 0.057353813, -0.106769, 0.028894106)); + target1 += float4(-0.124429956, -0.023968874, -0.009741961, 0.000734556); + + float4 target2 = mul(a1, float4x4(-0.056590553, 0.03216381, -0.0666051, 0.19334152, -0.0050108447, -0.22589503, -0.057469424, -0.09344944, -0.1051364, -0.25752833, -0.035817955, -0.29675537, -0.1419535, -0.11206299, -0.005250591, -0.02839156)); + target2 += mul(b1, float4x4(-0.113020144, 0.028738707, 0.052538726, -0.039978653, 0.052219037, 0.057554238, 0.104583465, -0.03326389, 0.12732053, -0.09863676, -0.19774933, 0.10953924, 0.052640375, -0.2623868, -0.055126745, -0.12773202)); + target2 += mul(c1, float4x4(-0.17464705, -0.082161404, -0.18110912, 0.07796715, 0.04916518, 0.11231854, -0.086312726, -0.034675486, -0.19010356, 0.032855187, -0.013579661, 0.37123898, -0.014220876, -0.006728799, 0.08287457, -0.1138056)); + target2 += mul(d1, float4x4(0.13857616, -0.09273926, 0.13864596, 0.18886924, -0.011879785, 0.32183805, -0.051207457, 0.037754197, -0.09221778, -0.02035246, -0.17649348, 0.020960717, -0.07177013, 0.09179843, 0.080085315, 0.122304566)); + target2 += mul(e1, float4x4(-0.16989891, -0.08335691, 0.084998704, 0.11291987, -0.3019433, 0.0076751867, 0.093596675, 0.06530408, 0.1206327, 0.091008104, 0.109547265, 0.25353962, 0.036133915, 0.093532056, 0.061501086, 0.0021566728)); + target2 += mul(f1, float4x4(-0.017881159, -0.13595797, 0.01136082, 0.16003034, 0.10847896, 0.19483434, 0.26643255, -0.13653097, -0.02909977, 0.0048497478, -0.07825304, 0.19495782, 0.051259015, 0.06378301, -0.25297102, 0.12415515)); + target2 += mul(g1, float4x4(0.1937498, -0.054339543, 0.010112153, 0.1686902, -0.010859902, 0.017609913, 0.13538137, 0.21478494, -0.15561095, 0.03826493, 0.030638125, 0.15134248, 0.02018713, 0.09653892, 0.012655936, 0.12929274)); + target2 += mul(h1, float4x4(0.10884013, -0.059027947, 0.09222052, 0.08509775, -0.23504566, 0.10800187, 0.35871732, -0.27244377, 0.1780951, -0.09118458, -0.08485235, 0.18791482, 0.12209446, 0.0061277915, -0.011919617, -0.258573)); + target2 += mul(i1, float4x4(0.08261666, -0.107749484, -0.15589459, 0.23786806, -0.25947818, -0.07595851, 0.19160344, -0.024088206, -0.008799499, -0.17963524, -0.25323853, -0.026271267, 0.108688876, -0.21407057, -0.3583868, 0.09666366)); + target2 += mul(a2, float4x4(0.13808286, -0.04138869, -0.16940956, 0.3419983, 0.055550236, -0.020949477, -0.0067749163, -0.19835842, 0.030675124, 0.075373225, 0.12566806, -0.04334421, -0.102529705, 0.04508018, 0.23232533, 0.0019694006)); + target2 += mul(b2, float4x4(0.15215543, -0.016466457, -0.088040456, 0.17388342, 0.04182113, 0.18802759, 0.064585775, -0.14804406, -0.24339275, 0.17330259, 0.027834702, 0.058299657, -0.031298336, 0.31788856, 0.07080272, 0.24237408)); + target2 += mul(c2, float4x4(0.16990338, 0.3701443, 0.12791218, 0.14076602, 0.20176111, 0.0302564, 0.24510148, -0.13427663, -0.38024938, 0.12371078, -0.01582557, -0.3158842, 0.20104642, 0.07178823, -0.1876278, 0.084532306)); + target2 += mul(d2, float4x4(0.14377905, -0.058295894, 0.18250984, -0.09202952, 0.049288724, 0.06361697, 0.015274134, -0.009651323, -0.042051505, -0.012071234, 0.1326135, 0.019923072, -0.15128869, 0.25043762, -0.13259046, 0.00053170364)); + target2 += mul(e2, float4x4(0.094158195, 0.12379144, 0.19022636, 0.18195347, 0.013914745, 0.061979804, 0.02451591, -0.11115476, -0.17788209, 0.13222231, -0.13186376, -0.1616039, -0.24425243, 0.1886775, 0.0112440875, -0.06601394)); + target2 += mul(f2, float4x4(-0.030136446, 0.2917132, -0.27445439, 0.17572524, 0.041303374, 0.023066396, 0.15800332, -0.2759435, -0.13819514, 0.15358543, -0.20889634, -0.015854366, -0.046221938, -0.029213084, -0.20027846, -0.096412785)); + target2 += mul(g2, float4x4(0.0125947185, 0.0055787223, -0.09309416, 0.076822944, -0.093398675, 0.2956369, 0.06577939, -0.23052916, -0.07925194, -0.072308525, 0.024827626, -0.060508657, -0.12151571, 0.026541036, -0.12048794, -0.07427358)); + target2 += mul(h2, float4x4(-0.10964251, -0.17297563, 0.13372806, 0.049176272, -0.05832845, 0.017144928, -0.048461188, -0.15870371, 0.11398971, -0.107922345, 0.13167588, -0.14817321, -0.10338058, -0.31081274, 0.08330581, -0.29687402)); + target2 += mul(i2, float4x4(0.16665904, -0.2640339, -0.29233927, 0.038875308, -0.05411785, 0.16937009, 0.12490365, -0.124583, -0.07552158, 0.11799862, -0.28171206, -0.00040758983, -0.19385974, -0.06890529, 0.14208162, -0.1088734)); + target2 += mul(a3, float4x4(0.06168567, 0.08464485, 0.051727522, 0.0080752885, -0.024248002, -0.10022553, 0.16323335, 0.023631554, -0.05933269, -0.062205136, -0.18094447, 0.059799075, 0.21466024, 0.008523474, 0.26693302, 0.23969485)); + target2 += mul(b3, float4x4(-0.15529208, -0.011878417, -0.18483245, 0.14569621, 0.063189425, -0.19457999, -0.030479494, -0.06388341, 0.059255358, 0.021795692, -0.18915053, 0.10549042, -0.14347872, 0.035095137, 0.5123671, -0.36842114)); + target2 += mul(c3, float4x4(-0.3129531, 0.18427932, 0.08967258, 0.030795548, -0.062971294, 0.13863337, 0.1719862, -0.12454022, -0.13502273, 0.09999501, -0.08539335, -0.009761404, 0.12899344, 0.13241018, 0.07476177, 0.088581234)); + target2 += mul(d3, float4x4(0.060355544, -0.20497295, -0.056201037, 0.17441384, -0.07366008, 0.0031770081, 0.10340366, -0.065828614, -0.0135689005, 0.0018236408, -0.061976664, 0.2355626, 0.10771512, 0.077624, 0.13811535, -0.07868492)); + target2 += mul(e3, float4x4(-0.17156444, -0.026765984, -0.10527619, 0.03830846, 0.09402895, -0.004862654, 0.076368734, -0.14964046, 0.043011688, -0.23503943, -0.0006939608, 0.14159496, -0.044676844, 0.173952, 0.110504664, 0.0019379692)); + target2 += mul(f3, float4x4(-0.17247017, 0.08168303, 0.17221324, -0.06592961, 0.0044269604, 0.15659723, -0.055933986, -0.042620275, 0.06073025, 0.2532331, 0.10132909, -0.117701456, 0.12096025, 0.10205398, -0.18403697, 0.18307333)); + target2 += mul(g3, float4x4(0.09575911, -0.05598526, -0.00019075947, -0.09576007, 0.20932649, -0.20390967, 0.039013285, -0.0673076, 0.10174375, -0.029520035, 0.08187042, 0.0113893915, 0.2773657, -0.14660437, -0.052826468, -0.066547535)); + target2 += mul(h3, float4x4(0.073659964, 0.11016725, 0.03967363, -0.14039496, 0.14510235, -0.023440665, -0.14824589, 0.040890865, -0.17982483, -0.06410239, 0.1368475, 0.06049977, -0.04931566, 0.16838568, 0.032267325, -0.14558685)); + target2 += mul(i3, float4x4(-0.09795584, 0.042064235, -0.031120127, -0.14744717, 0.027100604, -0.24968515, -0.21389422, 0.04229415, -0.09014897, 0.12878452, 0.25642878, -0.08038266, 0.19971558, 0.11135897, -0.36821046, 0.1422662)); + target2 += mul(na1, float4x4(0.1094647, -0.016677434, -0.028883765, 0.3192714, 0.09875388, 0.063245736, 0.14410317, 0.032648303, -0.06333742, 0.27168024, 0.022700999, -0.24260196, 0.2008466, 0.0035053317, 0.033708334, 0.08848844)); + target2 += mul(nb1, float4x4(0.14528061, -0.15028432, -0.12186915, 0.2541439, 0.10196279, -0.08628881, 0.013626965, 0.0865205, -0.06720443, -0.012042523, 0.2745774, -0.15612917, 0.052762404, -0.048645414, 0.2373206, 0.15480334)); + target2 += mul(nc1, float4x4(0.30316323, 0.13258561, 0.064958744, -0.006462185, -0.18336357, -0.042762443, 0.14428605, 0.0022340214, 0.126048, 0.080833666, 0.009115843, 0.03493862, 0.10809081, -0.16448757, 0.3997175, -0.110012166)); + target2 += mul(nd1, float4x4(0.02458684, -0.057449866, 0.030437991, 0.12050426, 0.09614844, -0.014490843, 0.028539594, 0.04805738, -0.09334032, -0.025414651, -0.08732445, -0.23192073, -0.17476203, -0.09348745, -0.08307593, -0.23019521)); + target2 += mul(ne1, float4x4(0.35522544, -0.079090506, 0.008817837, 0.2532623, 0.34887648, -0.06478506, -0.08268971, -0.01187354, -0.01297639, -0.1617383, -0.08950093, -0.27147245, -0.18539499, -0.025695372, 0.014795757, 0.070290186)); + target2 += mul(nf1, float4x4(0.10833107, -0.04752071, 0.0257186, 0.045938533, -0.17696926, -0.044409238, 0.013435127, -0.026669621, -0.039547954, -0.24273679, -0.11717763, 0.03446355, 0.20519058, 0.14973645, -0.06620626, 0.27608195)); + target2 += mul(ng1, float4x4(-0.05178539, -0.052307468, -0.031603504, 0.087410286, -0.02714207, 0.19870313, -0.07222196, 0.016593033, 0.1256676, -0.0017593893, -0.09573438, 0.06781198, -0.21133266, 0.17265096, -0.18769167, -0.44435498)); + target2 += mul(nh1, float4x4(0.06497008, -0.036607113, -0.044402726, 0.2149976, 0.13416344, 0.042011082, -0.101590805, -0.020510921, -0.06912339, -0.054973233, -0.044747703, 0.14244531, -0.28504518, 0.3040643, -0.09546776, 0.31751406)); + target2 += mul(ni1, float4x4(-0.084402256, 0.09284107, 0.035581376, -0.0062208944, -0.09883153, 0.10322051, 0.1348337, -0.31998435, -0.012351705, -0.1971895, 0.22683385, -0.12512599, -0.07051629, 0.2452453, 0.083472766, -0.20878734)); + target2 += mul(na2, float4x4(-0.20292963, 0.044648554, 0.15208347, -0.08012225, -0.12525047, 0.015525035, 0.09556482, -0.11069662, -0.085732915, 0.011575785, -0.025669998, -0.14913903, -0.04931291, 0.012865525, -0.12986338, -0.01954532)); + target2 += mul(nb2, float4x4(-0.008896974, -0.039155565, 0.027794836, -0.117017545, -0.06935417, -0.026629506, 0.007301185, -0.46567324, 0.037060194, 0.09720974, 0.2845551, -0.3020958, -0.025294555, -0.30916882, 0.18453851, -0.18012975)); + target2 += mul(nc2, float4x4(0.030631881, -0.008507908, -0.09436097, 0.0311627, -0.20561115, 0.11587156, 0.09280758, -0.085967906, 0.3602613, -0.044544138, 0.1323068, -0.009463272, -0.0025823591, -0.15646757, -0.046626896, 0.16452411)); + target2 += mul(nd2, float4x4(-0.0077203126, -0.100717455, -0.2011105, -0.14975028, -0.20319125, 0.10198259, -0.04371703, -0.27115488, 0.027433528, -0.09739682, -0.13802922, -0.26861516, -0.048793945, 0.06584455, 0.06585165, -0.008628782)); + target2 += mul(ne2, float4x4(-0.10281875, 0.040024713, -0.2812408, -0.020755077, 0.013610964, -0.032100085, -0.019541265, 0.08268734, -0.03297649, -0.037923373, -0.18825053, 0.07058112, 0.08730599, 0.03063617, 0.02987196, -0.0043262425)); + target2 += mul(nf2, float4x4(-0.040238652, -0.13039924, 0.14888343, 7.490741e-05, -0.2158812, 0.24641772, 0.006157586, -0.04499295, 0.144089, 0.07224167, 0.17486697, -0.035505384, 0.1524877, 0.14747557, 0.17406234, 0.11407642)); + target2 += mul(ng2, float4x4(0.016506152, -0.010222893, 0.13286552, -0.21776699, -0.09772777, 0.1287599, -0.03898535, -0.16048339, 0.16613074, 0.07386897, 0.010006783, -0.109998874, -0.44924134, -0.10780198, 0.20899624, 0.0225183)); + target2 += mul(nh2, float4x4(-0.009322647, 0.037628874, -0.07781525, 0.096469015, -0.13213164, 0.112819366, -0.009472233, -0.2799395, -0.13030471, 0.15054065, -0.06948136, -0.15108407, 0.15611546, -0.033660483, -0.015103015, -0.11582756)); + target2 += mul(ni2, float4x4(-0.1565792, -0.020967469, 0.18913873, -0.16583163, -0.1238118, 0.09852521, -0.22204556, -0.03933885, -0.0059996913, 0.26517454, 0.029015608, -0.0067967405, 0.12023722, 0.020479612, -0.11405568, 0.09855018)); + target2 += mul(na3, float4x4(-0.100906074, 0.1372623, -0.06694728, 0.24972913, -0.050774068, -0.040847532, -0.2658499, -0.055020068, 0.017677482, -0.10252552, 0.093889, -0.066453, -0.11749236, 0.117650375, -0.009431862, -0.13268448)); + target2 += mul(nb3, float4x4(0.0062916246, 0.11412136, -0.04665643, -0.05716979, -0.3630308, 0.056478713, 0.13907139, -0.46697688, -0.17572168, -0.032978512, -0.25377706, 0.2386579, 0.08279535, -0.078310356, 0.14829971, -0.22042938)); + target2 += mul(nc3, float4x4(0.032816015, -0.30565384, -0.16489638, -0.16715215, 0.19837156, 0.2794504, -0.056615926, -0.15358809, -0.040108953, -0.30223787, 0.23217356, 0.0056255152, -0.018384434, 0.151488, 0.1853468, 0.08032189)); + target2 += mul(nd3, float4x4(0.0664597, -0.20910838, 0.26195124, -0.07578308, 0.13466386, -0.040509395, -0.005630214, -0.10919593, 0.09764661, -0.099661686, 0.105231985, 0.18113208, -0.13830248, -0.16406676, -0.36873665, -0.110502236)); + target2 += mul(ne3, float4x4(-0.009745877, 0.050425317, 0.041368794, 0.34543577, 0.017489558, -0.1383922, 0.02555688, 0.08608152, 0.2675467, -0.14163154, -0.009072096, -0.04938327, 0.02321701, -0.23915094, -0.20346476, 0.02754088)); + target2 += mul(nf3, float4x4(-0.0764608, -0.18401545, 0.18727265, -0.107619025, 0.02815041, 0.14077562, -0.05316665, 0.3057819, 0.033161953, -0.15832557, -0.13877237, 0.1657462, 0.01894343, 0.23329574, -0.14319004, 0.031079128)); + target2 += mul(ng3, float4x4(-0.3142226, 0.09312817, 0.08794322, 0.2222839, -0.06945857, 0.14425695, -0.014134404, 0.005755717, 0.010266066, -0.26988292, 0.04765992, 0.24445806, -0.11784465, 0.028391482, -0.09065907, 0.13896856)); + target2 += mul(nh3, float4x4(-0.17636561, -0.056445003, 0.06597882, 0.020473091, -0.13026594, 0.12097649, -0.060047906, 0.30939278, 0.20875697, 0.074364014, -0.06563088, -0.052628025, -0.07981685, -0.054282684, 0.006551467, 0.08257015)); + target2 += mul(ni3, float4x4(0.1486522, 0.27273872, -0.16233566, 0.08857763, 0.034426562, 0.31791484, -0.11444188, 0.20239855, -0.17699686, 0.40953103, -0.19843663, 0.32758692, -0.017546277, 0.040539514, -0.13233976, 0.054549627)); + target2 += float4(0.0570952, -0.011593155, 0.033286963, 0.00014048154); + + float4 target3 = mul(a1, float4x4(-0.028246857, 0.09429872, 0.034600366, 0.022117741, -0.034094583, -0.1416488, 0.114190586, -0.19039942, -0.03329484, 0.054765828, 0.0518203, -0.20784369, -0.11068853, -0.03985197, -0.040889204, -0.15233918)); + target3 += mul(b1, float4x4(0.0034295225, -0.0047144215, -0.13811362, 0.1063775, -0.042283904, -0.11053704, 0.031115215, -0.19094694, -0.07958675, 0.25251713, 0.27887833, 0.032974306, -0.007945948, 0.005038382, -0.018204618, -0.033514593)); + target3 += mul(c1, float4x4(-0.021439308, 0.09934385, 0.06221231, 0.20019929, 0.031433582, 0.10136135, 0.03170799, 0.22528099, -0.13307518, 0.0042947256, 0.12888439, 0.057041943, -0.093636274, -0.098759346, -0.0013004189, -0.11623657)); + target3 += mul(d1, float4x4(-0.12425962, 0.06631687, 0.03538785, 0.12683366, 0.036875088, -0.388709, 0.021293538, -0.06568616, -0.022915881, -0.17667641, -0.21997124, -0.15674002, 0.12193349, 0.05480543, -0.028813047, -0.092471436)); + target3 += mul(e1, float4x4(-0.23961155, -0.10273245, -0.08654801, 0.20536228, 0.15906096, -0.28645602, -0.20196053, -0.24955072, 0.030706927, 0.0390173, -0.18619792, 0.042841963, -0.021935288, 0.18055134, 0.056804277, 0.06829802)); + target3 += mul(f1, float4x4(-0.17750104, 0.060207605, -0.16278192, 0.10637904, 0.09263751, -0.15864064, -0.1921883, 0.15418245, -0.21325666, -0.060680047, -0.17831814, 0.08721947, 0.028428067, 0.110841654, -0.0018111315, -0.14204408)); + target3 += mul(g1, float4x4(-0.05341328, 0.022792514, 0.12271092, 0.10998399, -0.05194629, -0.0019651174, 0.096098036, 0.05388034, -0.09140511, -0.09375859, -0.033423815, -0.051705707, 0.40354738, -0.09664782, -0.16623749, -0.063937105)); + target3 += mul(h1, float4x4(-0.036799524, -0.0768793, -0.13867554, 0.0018584719, -0.1217911, -0.24234816, 0.09708973, -0.011562908, -0.04658245, -0.0382149, -0.06386236, -0.18728544, -0.07053968, 0.022178814, -0.011753032, 0.09338199)); + target3 += mul(i1, float4x4(-0.040192164, -0.042503025, -0.10662553, 0.04789613, -0.14751524, -0.10168207, 0.09263359, -0.042696435, -0.32350782, 0.12660037, -0.004465994, -0.006698753, 0.11897201, -0.046830907, -0.13950327, 0.06639755)); + target3 += mul(a2, float4x4(-0.35137546, 0.16106302, -0.03942045, 0.20408326, -0.21793413, -0.19028474, 0.03843431, 0.16594443, 0.06715659, -0.12361966, 0.09516593, -0.07226092, -0.0021764247, 0.09041338, -0.042596035, 0.17071731)); + target3 += mul(b2, float4x4(-0.1597755, -0.0058896556, -0.14055388, -0.1015749, 0.03897486, -0.14616072, 0.14914623, 0.04983836, 0.19837128, 0.031061351, -0.012111387, -0.14318599, 0.015185477, 0.015783781, 0.0806122, -0.029704068)); + target3 += mul(c2, float4x4(-0.039973997, -0.039424386, -0.00023192639, 0.08071814, 0.096021704, -0.20885538, -0.12213241, -0.023790348, 0.09664941, -0.10268222, 0.13096042, -0.05173415, -0.37291482, 0.07015618, -0.33403385, -0.083771)); + target3 += mul(d2, float4x4(0.03271248, 0.30518225, -0.07270691, 0.028075088, -0.05705947, -0.15325841, 0.100330696, -0.025110118, -0.076902226, 0.14327222, 0.06624428, 0.13375239, 0.37281695, 0.07052823, -0.14584045, -0.21908635)); + target3 += mul(e2, float4x4(0.120670766, 0.31895483, 0.025020262, -0.07187204, 0.12886079, -0.044927042, -0.016122498, -0.042634714, 0.13163976, -0.042178337, 0.1995516, 0.0356841, 0.15696648, 0.08892613, 0.21146311, -0.119200125)); + target3 += mul(f2, float4x4(0.07862659, -0.04457566, 0.026738126, -0.21411496, 0.10438254, -0.18654525, -0.01533368, 0.13947518, 0.10588101, -0.028714191, 0.15771964, 0.121909015, -0.10983157, 0.2185668, -0.068225995, -0.12562555)); + target3 += mul(g2, float4x4(-0.12062531, 0.0967178, 0.09571875, 0.23502766, 0.09096207, -0.21987092, 0.024857553, -0.048271395, 0.14787363, -0.033102654, 0.13895266, -0.04427544, 0.04914057, 0.048905186, -0.057733577, -0.26991108)); + target3 += mul(h2, float4x4(-0.06448222, 0.0056067007, 0.06258581, 0.16081811, 0.11269595, -0.120004445, -0.013984294, -0.13933693, -0.07139989, -0.052229576, 0.14940026, 0.023361623, -0.09279362, -0.18860416, 0.08875797, -0.007527515)); + target3 += mul(i2, float4x4(-0.074545845, 0.030673563, 0.15330285, 0.13776723, 0.10154421, -0.092071116, 0.04683676, -0.06964785, 0.10431926, 0.08699972, 0.23528512, -0.033892516, -0.14641368, 0.117580056, -0.004050138, -0.02582363)); + target3 += mul(a3, float4x4(0.14190136, 0.077225044, 0.09930474, 0.007267315, 0.092006706, 0.037188467, -0.027249279, -0.054990012, -0.03665177, 0.12651706, -0.100975744, -0.09072935, 0.24675299, 0.06761549, -0.05267532, 0.10347854)); + target3 += mul(b3, float4x4(0.10791531, -0.1370413, -0.08286376, 0.03607253, -0.0308955, 0.07522176, 0.018555947, -0.12568206, 0.112782314, 0.28888306, -0.003996075, 0.028732201, 0.25184667, -0.2680978, 0.02647103, -0.046891168)); + target3 += mul(c3, float4x4(-0.016372435, 0.010370288, 0.048521012, 0.17552224, 0.12718126, -0.07016058, 0.07195029, -0.020361308, 0.12597205, 0.08013731, -3.848295e-05, 0.0050118286, -0.009566892, -0.20061424, -0.03470485, -0.006634675)); + target3 += mul(d3, float4x4(-0.014340514, -0.061068784, 0.073101744, -0.026097663, -0.060043298, 0.03856278, -0.06831028, 0.01917565, 0.0030782523, -0.27292702, 0.009022088, -0.0835327, 0.15536709, 0.19875537, -0.04220971, 0.12280315)); + target3 += mul(e3, float4x4(-0.05038896, -0.0450083, 0.11035315, 0.017889546, -0.04486168, 0.02630088, 0.076166764, 0.040405206, 0.101371124, 0.013579925, -0.14421356, 0.10385705, -0.040398728, 0.16730694, 0.21123065, 0.08927596)); + target3 += mul(f3, float4x4(0.14247608, -0.020986153, 0.23048729, 0.016399987, 0.08749712, -0.042591766, 0.10078401, -0.235661, 0.16211063, 0.06193226, -0.074332505, -0.016298788, 0.045263976, 0.15765212, 0.07818007, -0.04620609)); + target3 += mul(g3, float4x4(0.021306554, -0.09750117, 0.08551645, -0.04607957, 0.023408834, -0.023608467, -0.20876807, -0.059991024, 0.073818475, -0.011034656, 0.021592963, 0.2020669, 0.0658326, -0.037186112, -0.12142336, 0.024981985)); + target3 += mul(h3, float4x4(0.14970483, -0.034374855, 0.059193425, -0.053641498, -0.012546929, 0.12899692, -0.14678986, 0.010604312, 0.06670342, -0.16510558, 0.008418653, -0.07479036, 0.18447658, -0.048377503, -0.09458383, 0.0069656954)); + target3 += mul(i3, float4x4(0.058000036, -0.16915704, -0.019119963, -0.045525633, -0.037617203, 0.25589603, -0.25075126, 0.06523698, 0.17653236, -0.061193496, 0.06445885, 0.012287812, 0.102899276, 0.110979825, -0.22975717, 0.1812179)); + target3 += mul(na1, float4x4(0.06707089, -0.20528378, 0.046027422, 0.09201046, -0.026794929, -0.14959913, -0.1530082, -0.11166134, -0.1543093, -0.018212209, 0.1530343, 0.16413027, -0.041838966, 0.10568013, 0.027219504, -0.045931514)); + target3 += mul(nb1, float4x4(0.0007681395, 0.027546167, -0.055535425, -0.16842778, 0.031941716, 0.10155229, -0.15778649, 0.20752658, -0.040377192, -0.30390355, -0.023281433, -0.030623253, -0.09503612, -0.17188235, 0.09639771, 0.006249103)); + target3 += mul(nc1, float4x4(0.06934318, -0.0011609821, -0.1791592, 0.03465803, -0.24253, 0.05893978, 0.13887544, -0.07227747, 0.01218867, 0.029141122, -0.05214466, -0.12778749, -0.1760804, -0.06785066, -0.007493355, 0.14466043)); + target3 += mul(nd1, float4x4(0.018881964, -0.05313997, 0.026167642, -0.11774113, 0.106899664, -0.04816693, -0.032971296, -0.2197493, -0.30351043, 0.41334164, 0.09371295, 0.117004104, -0.32039383, 0.21075623, 0.059145812, 0.22701162)); + target3 += mul(ne1, float4x4(0.15627995, -0.068059504, -0.025623176, -0.099454194, 0.053013522, -0.1204116, -0.019655226, 0.07376517, -0.25296777, -0.08185056, -0.055070046, -0.0901355, -0.11905481, -0.05469155, -0.017616548, -0.081166655)); + target3 += mul(nf1, float4x4(0.13076767, -0.05530982, -0.050112855, -0.12159198, -0.13501246, -0.003588778, -0.13545947, 0.11865785, -0.05613547, -0.068032116, -0.08055732, 0.21331398, 0.004210958, 0.0020068642, 0.028101314, -0.09094483)); + target3 += mul(ng1, float4x4(-0.06359586, 0.13318597, -0.013024477, 0.108700395, 0.11144461, -0.20727357, -0.024350716, -0.22389533, -0.09566586, -0.0131226955, -0.11817035, 0.09054735, -0.27647895, 0.07672232, -0.047891885, 0.071800984)); + target3 += mul(nh1, float4x4(-0.030071015, 0.1333995, 0.031153332, -0.086189225, -0.0019152679, -0.01622374, 0.040289503, -0.15809211, -0.12741992, 0.10740146, -0.051979292, -0.116695315, 0.320744, 0.0039460426, -0.0836046, -0.09634563)); + target3 += mul(ni1, float4x4(-0.09536935, -0.052188914, 0.047246125, 0.015771315, 0.044488825, -0.08132813, -0.27927315, -0.13175185, 0.024771225, -0.24907906, -0.023289192, -0.04971131, 0.05681843, 0.07283831, 0.064641275, -0.26641592)); + target3 += mul(na2, float4x4(-0.027925663, -0.1507286, 0.1326965, 0.016842714, 0.008826637, -0.16630088, 0.057058703, -0.18538098, -0.023735443, 0.032016642, 0.12527052, 0.16732964, 0.086843535, 0.035672616, 0.2063971, 0.09174031)); + target3 += mul(nb2, float4x4(-0.1374101, 0.0033208288, 0.10667102, 0.010594156, 0.046161152, -0.0973723, 0.038522966, 0.021097187, 0.016156282, -0.19751011, 0.28385642, 0.05756371, -0.05513193, -0.2048188, -0.21631682, 0.07647592)); + target3 += mul(nc2, float4x4(0.17377815, 0.15260585, 0.053718828, 0.05137225, -0.022358606, -0.1206224, 0.18654475, -0.36442846, 0.037749466, -0.1104878, -0.11404351, -0.06023782, 0.20938018, 0.07982189, 0.07250349, -0.07269494)); + target3 += mul(nd2, float4x4(-0.21727799, 0.060607027, 0.020804053, 0.18055809, 0.065868735, 0.027194923, 0.07823965, -0.0036479903, -0.00017318636, 0.08600115, -0.025587326, 0.07114245, -0.019529548, -0.13423847, 0.13471194, 0.09455981)); + target3 += mul(ne2, float4x4(-0.0054947184, 0.08912019, -0.0287804, 0.06010462, 0.01399159, 0.06061662, -0.11517458, -0.097311266, 0.050931722, 0.22020856, 0.1323814, -0.04628687, -0.11665284, -0.28899986, -0.24807844, -0.26831678)); + target3 += mul(nf2, float4x4(-0.030188283, -0.03878683, -0.017246237, 0.06085806, -0.018588748, 0.022792742, 0.25868282, -0.07614454, 0.13609566, 0.048479818, 0.1144347, -0.11878534, -0.0087716095, -0.10999109, -0.052827284, 0.05120022)); + target3 += mul(ng2, float4x4(0.13541034, 0.01645716, -0.058492333, -0.038296085, 0.100599736, -0.116733365, 0.04200369, -0.025886245, 0.10077625, -0.16246797, -0.17139618, 0.1154542, 0.048264973, 0.28143618, 0.21083501, 0.1901906)); + target3 += mul(nh2, float4x4(0.17519377, 0.11165914, 0.06639653, 0.07394748, -0.007674659, 0.16630298, 0.19389485, -0.095608205, 0.08834474, -0.014449134, -0.1498579, 0.10741625, -0.15439212, 0.067960866, -0.037635356, -0.15552957)); + target3 += mul(ni2, float4x4(-0.06438933, 0.014048397, 0.10090704, -0.113563396, 0.16256817, 0.05490672, 0.07492557, -0.117161274, 0.21595421, -0.043381806, -0.051558085, 0.1740199, 0.2152678, 0.2786416, 0.16830157, 0.2127052)); + target3 += mul(na3, float4x4(-0.15677509, -0.43225375, 0.060302902, -0.25911507, 0.33240193, -0.042785197, 0.12322616, 0.060724694, 0.19070825, 0.06739152, -0.11829862, -0.29873747, 0.044883754, -0.02737334, 0.35752672, 0.027660733)); + target3 += mul(nb3, float4x4(-0.031477857, -0.061355617, 0.14307205, -0.27185053, 0.0042110113, -0.17895593, 0.18448347, 0.1663187, -0.027779656, -0.038476624, -0.20109327, 0.0049036117, -0.33461937, -0.11617029, 0.16388293, 0.08732086)); + target3 += mul(nc3, float4x4(-0.14116575, -0.2656471, 0.11648339, -0.0032394545, 0.1182878, -0.3112847, 0.022472465, 0.01861419, -0.17598355, 0.09062213, -0.078444645, 0.08435301, -0.076718464, -0.27557522, 0.2719488, -0.2709603)); + target3 += mul(nd3, float4x4(0.27406302, -0.038197294, 0.08674393, -0.1581159, 0.13235791, -0.2564229, 0.1109576, -0.0176378, 0.15548801, -0.0590908, -0.017661547, -0.2397164, -0.13061532, 0.23031203, 0.13042833, -0.1644423)); + target3 += mul(ne3, float4x4(-0.07506608, 0.038386136, -0.079568535, -0.14536263, -0.14519933, 0.049832735, -0.0716522, 0.08434604, -0.12847446, 0.0008543391, -0.14790097, 0.021308336, -0.28987315, 0.2929442, -0.057600517, 0.0779305)); + target3 += mul(nf3, float4x4(-0.026810233, 0.11869411, -0.11281911, -0.14480188, -0.22689806, 0.28260702, 0.08524954, -0.016079135, -0.139977, 0.1590218, 0.24256052, 0.11876038, 0.1039834, 0.10720082, 0.15955658, -0.08241476)); + target3 += mul(ng3, float4x4(-0.0018456473, -0.044888236, 0.2312576, -0.2259125, 0.1552541, -0.10646746, 0.25436193, -0.0140782725, -0.11281806, -0.045578834, 0.089749135, -0.14050213, 0.09813328, -0.5474639, 0.084324725, -0.13670866)); + target3 += mul(nh3, float4x4(-0.18577714, 0.0991832, 0.02898408, 0.04317898, 0.25488335, -0.30257443, 0.0083487155, 0.00078779995, -0.0014885734, -0.116033524, -0.12751958, 0.20800439, -0.13863127, -0.14012383, -0.082795866, 0.07694529)); + target3 += mul(ni3, float4x4(0.124679685, 0.012901697, 0.15855546, -0.031145798, 0.044944238, -0.1519666, -0.015208867, 0.029840399, 0.07195047, 0.17145973, 0.06601934, -0.03358433, 0.16031715, 0.16808309, -0.007914282, -0.19619752)); + target3 += float4(-0.109316595, 0.025873583, 0.05582306, 0.10272255); + + float3 target4 = tex8.SampleLevel(sam, pos, 0).rgb; + target4 += mul(e1, float4x3(0.037913825, -0.0099191405, -0.018130798, -0.0065440857, 0.004536478, -0.0019739012, -0.014918686, -0.00011652434, 0.0007071924, -0.0033633227, -0.018028691, -0.014883887)); + target4 += mul(e2, float4x3(-0.021300001, -0.039009467, -0.043097164, -0.008222791, 0.057612088, 0.063239105, 0.023676023, -0.0119777955, -0.020785704, 0.03422571, -0.009187399, -0.016286165)); + target4 += mul(e3, float4x3(0.031610258, -0.022373654, -0.04004249, 0.015456217, -0.014708875, -0.017118618, -0.0235428, 0.0103508085, 0.020143243, 0.0044788374, -0.017377898, -0.023227183)); + target4 += mul(ne1, float4x3(-0.036366682, 0.007874863, 0.016618004, 0.0022973057, -0.010600425, -0.012978575, 0.0070587453, 0.005480104, 0.0052379463, -0.02330911, -0.002091681, -0.0004570695)); + target4 += mul(ne2, float4x3(0.0011265673, 0.017461559, 0.01678395, 0.019458788, -0.032603145, -0.042017594, -0.026735391, 0.007520235, 0.01661426, -0.023014631, 0.027602635, 0.040214695)); + target4 += mul(ne3, float4x3(-0.05236764, 0.007274719, 0.023289332, -0.033428065, 0.0054935357, 0.014490033, 0.016193395, -0.012767524, -0.022695007, -0.01161452, 0.015592775, 0.017280621)); + + tex1[gxy] = target1; + tex2[gxy] = target2; + tex3[gxy] = target3; + tex7[gxy] = float4(target4, 1); +} + +//!PASS 8 +//!DESC Conv-4x3x3x24, Conv-3x1x1x120 +//!IN INPUT, tex1, tex2, tex3, tex7 +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 + +void Pass8(uint2 blockStart, uint3 threadId) { + uint2 gxy = Rmp8x8(threadId.x) + blockStart; + uint2 inputSize = GetInputSize(); + if (gxy.x >= inputSize.x || gxy.y >= inputSize.y) { + return; + } + + float2 inputPt = GetInputPt(); + float2 pos = (gxy + 0.5f) * inputPt; + + // [ a, d, g ] + // [ b, e, h ] + // [ c, f, i ] + float4 a1 = tex1.SampleLevel(sam, pos + float2(-inputPt.x, -inputPt.y), 0); + float4 b1 = tex1.SampleLevel(sam, pos + float2(-inputPt.x, 0), 0); + float4 c1 = tex1.SampleLevel(sam, pos + float2(-inputPt.x, inputPt.y), 0); + float4 d1 = tex1.SampleLevel(sam, pos + float2(0, -inputPt.y), 0); + float4 e1 = tex1.SampleLevel(sam, pos, 0); + float4 f1 = tex1.SampleLevel(sam, pos + float2(0, inputPt.y), 0); + float4 g1 = tex1.SampleLevel(sam, pos + float2(inputPt.x, -inputPt.y), 0); + float4 h1 = tex1.SampleLevel(sam, pos + float2(inputPt.x, 0), 0); + float4 i1 = tex1.SampleLevel(sam, pos + float2(inputPt.x, inputPt.y), 0); + + float4 na1 = max(-a1, 0); + float4 nb1 = max(-b1, 0); + float4 nc1 = max(-c1, 0); + float4 nd1 = max(-d1, 0); + float4 ne1 = max(-e1, 0); + float4 nf1 = max(-f1, 0); + float4 ng1 = max(-g1, 0); + float4 nh1 = max(-h1, 0); + float4 ni1 = max(-i1, 0); + + a1 = max(a1, 0); + b1 = max(b1, 0); + c1 = max(c1, 0); + d1 = max(d1, 0); + e1 = max(e1, 0); + f1 = max(f1, 0); + g1 = max(g1, 0); + h1 = max(h1, 0); + i1 = max(i1, 0); + + float4 a2 = tex2.SampleLevel(sam, pos + float2(-inputPt.x, -inputPt.y), 0); + float4 b2 = tex2.SampleLevel(sam, pos + float2(-inputPt.x, 0), 0); + float4 c2 = tex2.SampleLevel(sam, pos + float2(-inputPt.x, inputPt.y), 0); + float4 d2 = tex2.SampleLevel(sam, pos + float2(0, -inputPt.y), 0); + float4 e2 = tex2.SampleLevel(sam, pos, 0); + float4 f2 = tex2.SampleLevel(sam, pos + float2(0, inputPt.y), 0); + float4 g2 = tex2.SampleLevel(sam, pos + float2(inputPt.x, -inputPt.y), 0); + float4 h2 = tex2.SampleLevel(sam, pos + float2(inputPt.x, 0), 0); + float4 i2 = tex2.SampleLevel(sam, pos + float2(inputPt.x, inputPt.y), 0); + + float4 na2 = max(-a2, 0); + float4 nb2 = max(-b2, 0); + float4 nc2 = max(-c2, 0); + float4 nd2 = max(-d2, 0); + float4 ne2 = max(-e2, 0); + float4 nf2 = max(-f2, 0); + float4 ng2 = max(-g2, 0); + float4 nh2 = max(-h2, 0); + float4 ni2 = max(-i2, 0); + + a2 = max(a2, 0); + b2 = max(b2, 0); + c2 = max(c2, 0); + d2 = max(d2, 0); + e2 = max(e2, 0); + f2 = max(f2, 0); + g2 = max(g2, 0); + h2 = max(h2, 0); + i2 = max(i2, 0); + + float4 a3 = tex3.SampleLevel(sam, pos + float2(-inputPt.x, -inputPt.y), 0); + float4 b3 = tex3.SampleLevel(sam, pos + float2(-inputPt.x, 0), 0); + float4 c3 = tex3.SampleLevel(sam, pos + float2(-inputPt.x, inputPt.y), 0); + float4 d3 = tex3.SampleLevel(sam, pos + float2(0, -inputPt.y), 0); + float4 e3 = tex3.SampleLevel(sam, pos, 0); + float4 f3 = tex3.SampleLevel(sam, pos + float2(0, inputPt.y), 0); + float4 g3 = tex3.SampleLevel(sam, pos + float2(inputPt.x, -inputPt.y), 0); + float4 h3 = tex3.SampleLevel(sam, pos + float2(inputPt.x, 0), 0); + float4 i3 = tex3.SampleLevel(sam, pos + float2(inputPt.x, inputPt.y), 0); + + float4 na3 = max(-a3, 0); + float4 nb3 = max(-b3, 0); + float4 nc3 = max(-c3, 0); + float4 nd3 = max(-d3, 0); + float4 ne3 = max(-e3, 0); + float4 nf3 = max(-f3, 0); + float4 ng3 = max(-g3, 0); + float4 nh3 = max(-h3, 0); + float4 ni3 = max(-i3, 0); + + a3 = max(a3, 0); + b3 = max(b3, 0); + c3 = max(c3, 0); + d3 = max(d3, 0); + e3 = max(e3, 0); + f3 = max(f3, 0); + g3 = max(g3, 0); + h3 = max(h3, 0); + i3 = max(i3, 0); + + float4 target1 = mul(a1, float4x4(0.03482331, -0.14944118, 0.046244163, -0.05941585, -0.07728179, 0.06265427, -0.045520462, 0.0871402, 0.0897178, -0.16006349, 0.008391846, -0.16923702, 0.25602654, 0.051176835, 0.011442495, -0.24914353)); + target1 += mul(b1, float4x4(-0.114224955, -0.048990358, 0.0317376, 0.19175068, -0.112552375, 0.037553445, -0.095972225, 0.123118624, 0.12175324, 0.030322522, 0.054718968, -0.39031324, 0.28009677, 0.07727779, 0.16123495, -0.2772586)); + target1 += mul(c1, float4x4(-0.06794576, 0.2141763, 0.1750928, 0.12166446, -0.13643269, 0.24814922, 0.037389282, 0.0035949312, -0.06241508, 0.041635923, -0.08047354, 0.010511207, 0.11825532, -0.28878912, 0.17174155, -0.25881785)); + target1 += mul(d1, float4x4(-0.0143542895, -0.010602584, -0.04226417, -0.04447678, -0.24656619, -0.053967457, -0.16034846, 0.04648599, 0.18855657, -0.20268312, 0.03610814, 0.022015022, -0.056165848, 0.17901546, -0.044555657, -0.089903764)); + target1 += mul(e1, float4x4(-0.05440948, 0.12527943, -0.08222082, -0.035428505, 0.2267783, 0.08257505, 0.056446668, -0.016560426, 0.17754072, -0.12249645, 0.15439054, -0.03524935, -0.481085, -0.0961953, -0.3649979, 0.17484458)); + target1 += mul(f1, float4x4(0.04679537, 0.15213947, -0.018560365, -0.027304955, 0.012417035, 0.033497352, -0.09031395, -0.28588498, 0.15779394, -0.014294813, 0.13411845, 0.07399604, 0.05855495, -0.15351114, -0.06195114, -0.033846762)); + target1 += mul(g1, float4x4(0.023053877, 0.09145102, -0.056014817, -0.103127845, -0.19463558, 0.009014216, 0.045743883, 0.105235375, 0.148088, -0.071407385, 0.1755759, 0.012725914, 0.04554227, -0.10347383, 0.23475589, -0.039336383)); + target1 += mul(h1, float4x4(0.015826384, -0.042269874, 0.056471203, 0.009655403, 0.020275326, 0.33224702, 0.009298279, 0.17336445, -0.018828178, 0.10215806, 0.049400896, 0.17038062, 0.057019416, 0.07406004, 0.03215971, 0.12004367)); + target1 += mul(i1, float4x4(-0.04070164, 0.027889524, 0.02177609, -0.16229889, -0.062548086, -0.027596086, -0.12423675, 0.09836905, 0.059131406, -0.047028925, -0.057379283, -0.104133494, 0.14117907, 0.065780245, -0.023410192, 0.061447598)); + target1 += mul(a2, float4x4(-0.0021021653, 0.077328384, -0.06821109, -0.19499542, -0.20052336, 0.12387703, 0.055179324, 0.19800851, -0.120995775, 0.42741755, 0.091175236, 0.020587375, 0.0042481394, 0.12762432, -0.06114739, 0.32906154)); + target1 += mul(b2, float4x4(-0.019685917, -0.040947627, 0.18565354, -0.46952146, -0.05437026, -0.026286738, -0.07812705, -0.006736804, 0.008634472, 0.23204291, -0.11855498, -0.12303054, 0.38381273, 0.52490336, -0.3265505, 0.21160527)); + target1 += mul(c2, float4x4(-0.18054116, 0.0051548174, 0.4753756, 0.17605813, -0.073726274, 0.15002227, -0.1850507, 0.0990851, 0.00921903, 0.13224806, 0.2253796, -0.20556282, -0.109973975, 0.046794172, 0.16226935, 0.08110087)); + target1 += mul(d2, float4x4(0.010205323, -0.09720397, 0.029996833, -0.10599145, -0.052096535, -0.053859178, -0.07132246, -0.040684257, -0.0064441697, 0.20659602, 0.26825082, 0.05841878, -0.102910444, -0.19080183, 0.0009101689, 0.31210572)); + target1 += mul(e2, float4x4(-0.10222517, -0.2537438, 0.17752838, -0.08470953, 0.06963046, -0.010764146, -0.033626176, 0.15240349, -0.20436993, -0.100720614, 0.0444932, 0.20770444, 0.031174636, -0.010206393, 0.09037244, -0.55185884)); + target1 += mul(f2, float4x4(-0.26993337, -0.020421378, 0.18469644, -0.21327373, 0.06911363, 0.014826783, 0.056256857, -0.06809406, -0.083685525, -0.0984942, -0.0171533, -0.22855683, -0.08748469, -0.1396983, -0.11391806, -0.072031595)); + target1 += mul(g2, float4x4(0.058208484, -0.091674164, 0.12105436, 0.10939658, -0.031674437, -0.05118359, -0.22271338, 0.028467823, -0.17376278, -0.123112075, -0.071464434, 0.17473213, -0.3117644, -0.18276823, 0.07496323, 0.1509144)); + target1 += mul(h2, float4x4(-0.05188268, 0.15533312, 0.22820903, 0.17042106, -0.089846164, -0.005064528, 0.04796515, 0.026351674, 0.04572985, 0.09318132, -0.038517136, -0.074062705, -0.036520045, 0.10455916, 0.14278695, 0.14136232)); + target1 += mul(i2, float4x4(-0.14247061, 0.08110525, -0.075231634, 0.31358016, -0.18515967, 0.06256364, -0.0484006, -0.017976558, -0.02657821, -0.028635541, 0.012627999, 0.054765414, -0.0019829564, 0.15433973, -0.14973663, 0.12542003)); + target1 += mul(a3, float4x4(-0.17475623, 0.073300175, -0.18943344, 0.13311169, -0.026332445, 0.14347847, 0.20637734, 0.19913399, 0.24245638, -0.01550613, -0.09732818, -0.3588367, -0.11411046, -0.15500076, -0.09746209, -0.14517665)); + target1 += mul(b3, float4x4(0.17039534, -0.20694748, 0.07940825, -0.29572237, -0.26519805, 0.126274, -0.22870643, 0.064273715, -0.22092016, -0.03348832, -0.08794688, -0.006346166, -0.14190583, -0.16601795, 0.15920593, 0.097251594)); + target1 += mul(c3, float4x4(-0.08191819, -0.010720725, -0.10248115, -0.066204295, 0.13338344, 0.1886245, -0.1326061, -0.107134834, -0.06729155, -0.1295641, -0.09283412, -0.1643324, 0.06636283, 0.35525218, 0.0003396009, 0.04252375)); + target1 += mul(d3, float4x4(0.018834922, 0.09374041, -0.04844811, -0.086488485, 0.36477897, -0.035175197, 0.10250587, 0.009436049, 0.09109528, 0.25697815, 0.12989257, -0.10460797, 0.13357025, -0.15341914, -0.14009036, -0.27027166)); + target1 += mul(e3, float4x4(-0.046186987, -0.04721098, -0.10386561, 0.042765476, 0.10490874, -0.14259604, 0.03565186, 0.11228278, -0.1333764, 0.111047596, -0.20885478, 0.19843856, -0.07459371, -0.054204836, 0.0895249, 0.053722855)); + target1 += mul(f3, float4x4(0.057206515, -0.016081734, 0.04002097, 0.09536414, 0.27507696, 0.009611371, 0.2858957, 0.016278412, 0.091774575, -0.020857088, -0.1354684, -0.046553783, -0.10013868, 0.059088446, 0.1768699, 0.02272152)); + target1 += mul(g3, float4x4(0.028798534, 0.21127033, 0.01716753, 0.020965017, -0.08091736, -0.15006042, -0.29822782, 0.019595081, -0.029534074, -0.0653482, 0.11786061, -0.047803946, 0.011680036, 0.010721205, -0.2639438, 0.15042429)); + target1 += mul(h3, float4x4(-0.098251216, 0.050176363, -0.0426328, -0.037756715, -0.20687164, -0.3096553, -0.2210454, -0.03763596, -0.022159807, 0.044400796, 0.09344259, -0.05465652, -0.039273985, -0.096617654, -0.19118373, 0.1643556)); + target1 += mul(i3, float4x4(-0.11874077, 0.021691876, 0.15513967, -0.012177898, -0.1298149, -0.08811524, 0.017105984, -0.047422726, -0.033107523, 0.0058112773, -0.08017183, -0.020971343, -0.41264817, 0.075800754, 0.1080831, -0.082354255)); + target1 += mul(na1, float4x4(0.0032239188, -0.28178176, -0.19482347, 0.054150533, 0.40856144, -0.23284851, 0.020973913, -0.09307241, 0.4258893, -0.034946837, -0.043585345, 0.16226469, 0.045328375, 0.03566808, 0.0712809, 0.12283043)); + target1 += mul(nb1, float4x4(-0.15139721, -0.2489635, 0.2122619, -0.08517609, 0.23784684, -0.070994906, 0.3132446, -0.36519074, -0.048850738, -0.36088645, 0.2145936, 0.19312155, -0.2579365, -0.12489612, -0.075510584, 0.16864875)); + target1 += mul(nc1, float4x4(0.01884723, -0.2775977, 0.0007072475, 0.30131263, 0.01366198, -0.18196137, 0.38918743, -0.03999786, -0.075060904, -0.12210868, 0.14701048, 0.18474291, -0.023507686, 0.13071437, -0.036284998, 0.26304045)); + target1 += mul(nd1, float4x4(-0.08185283, -0.09152341, -0.13410091, -0.13518219, 0.10747411, 0.007974842, 0.11000113, 0.19898382, -0.18449086, 0.058887243, -0.02379909, -0.038734827, 0.041931048, 0.081884705, 0.015872778, 0.08416657)); + target1 += mul(ne1, float4x4(0.05272478, -0.06669923, 0.007233672, 0.039665744, 0.021820793, -0.14690521, -0.26392132, 0.007352069, -0.04682333, -0.028595299, -0.34463075, -0.14347489, 0.00084401644, -0.030389901, 0.022279145, 0.14215061)); + target1 += mul(nf1, float4x4(0.17942588, 0.27815622, 0.39199513, 0.17727011, -0.14894293, -0.1705316, 0.038263746, 0.025509953, -0.12031536, 0.15371376, -0.30855826, 0.2394013, -0.20185183, 0.121072985, 0.070580006, -0.12321835)); + target1 += mul(ng1, float4x4(0.043464154, -0.4329999, 0.12176987, 0.1863519, -0.14952634, -0.03741596, 0.3588594, 0.015720207, 0.07319453, 0.04202827, 0.19699398, -0.18537244, -0.040319767, 0.081377335, 0.045191478, -0.070804425)); + target1 += mul(nh1, float4x4(0.14033453, -0.13302796, -0.058896482, 0.14912021, 0.25856513, -0.10442178, 0.3958381, -0.08528721, 0.3291926, -0.0024321752, 0.017541584, -0.31020027, 0.13845283, -0.24636552, -0.07630463, -0.32314765)); + target1 += mul(ni1, float4x4(0.005189076, 0.20132092, 0.069775395, 0.086517565, 0.2727916, -0.079313666, 0.14164488, -0.16358389, -0.103817366, -0.11717267, 0.019068012, 0.016953465, 0.2551057, 0.14430785, 0.00088051375, -0.23318093)); + target1 += mul(na2, float4x4(-0.008894086, 0.03201216, -0.13398862, 0.06335705, 0.13424714, -0.06514535, -0.19045971, -0.23764557, 0.05714849, -0.30345356, 0.0092409095, 0.16878125, -0.07465451, -0.015541787, 0.033304304, -0.113849334)); + target1 += mul(nb2, float4x4(0.12612185, -0.0715257, 0.16217476, -0.024476554, 0.10614049, 0.03700835, 0.08482953, -0.08358318, 0.098786205, -0.009351742, -0.15457323, 0.113223985, -0.011500662, -0.13529003, -0.058090385, 0.11290306)); + target1 += mul(nc2, float4x4(0.050260257, -0.056368183, 0.21489042, 0.14299081, -0.113755986, -0.22816344, 0.27275258, -0.0015117057, 0.14195545, -0.16299947, 0.049762867, 0.22725838, 0.06814647, -0.049368583, -0.08577855, -0.097503126)); + target1 += mul(nd2, float4x4(-0.0083364155, -0.052837223, -0.0846245, 0.053218696, 0.28152695, 0.19495425, -0.19180301, -0.26389152, -0.12953846, -0.102649055, -0.19722337, -0.15851225, 0.1725756, 0.056898903, 0.01023057, -0.033678)); + target1 += mul(ne2, float4x4(-0.044510186, 0.033060472, 0.26517934, -0.25734264, 0.11998833, -0.05369093, 0.19721112, -0.15774135, 0.061851945, -0.03981009, -0.034191426, -0.23678938, -0.013630672, -0.114661574, 0.096060224, 0.17892191)); + target1 += mul(nf2, float4x4(-0.14728574, -0.031724717, 0.13967156, 0.03676961, -0.09500629, -0.09584641, -0.3221665, 0.14028065, -0.09116274, -0.08160823, -0.03841335, 0.21315134, -0.025303967, -0.081841856, 0.024239374, 0.004911813)); + target1 += mul(ng2, float4x4(-0.16211908, -0.07225985, -0.06955749, 0.025049562, 0.016382609, 0.20329225, 0.23490642, 0.04267578, -0.008350769, 0.0037089891, 0.09515623, -0.06105943, 0.13584909, 0.09705268, -0.062350716, -0.074614085)); + target1 += mul(nh2, float4x4(0.025970146, -0.14939465, -0.08123037, -0.008690572, 0.16139375, 0.052395687, -0.03863909, 0.0953437, -0.103880964, -0.04672169, -0.078161545, 0.04628746, -0.019205566, -0.006394265, -0.009116098, 0.024979865)); + target1 += mul(ni2, float4x4(0.15779239, 0.009630995, -0.06269132, -0.11111548, 0.11478004, -0.0780718, -0.24617292, 0.05763241, 0.02476824, 0.0631411, -0.2777113, -0.010855008, 0.10766442, 0.020561088, -0.029775767, -0.060535327)); + target1 += mul(na3, float4x4(0.6058991, -0.29998928, -0.09883167, -0.36967963, 0.104703955, -0.1886391, 0.07915164, -0.02375336, 0.041111898, 0.09216705, 0.046296816, 0.24895348, -0.015484279, 0.06852782, 0.04170421, -0.008594877)); + target1 += mul(nb3, float4x4(-0.29542375, -0.11578118, -0.047219444, -0.10781526, 0.13507344, 0.09601799, 0.08975014, 0.09067836, 0.1565405, 0.082328156, 0.09181261, 0.04524675, -0.08546339, 0.107942745, 0.057727177, 0.15223116)); + target1 += mul(nc3, float4x4(-0.013349778, 0.15176241, -0.08432594, 0.10960892, 0.081638165, -0.13559791, -0.06557744, 0.01141079, 0.10179259, 0.35195625, 0.23831062, 0.13698545, -0.0073695974, -0.020154724, -0.2515228, 0.030157704)); + target1 += mul(nd3, float4x4(0.20604958, -0.09164565, 0.049274493, -0.111016676, -0.046125744, -0.22138667, -0.10698992, 0.07003299, 0.09432274, 0.13457412, 0.08988733, 0.16862586, -0.16797546, -0.0130331, -0.009054985, -0.01443074)); + target1 += mul(ne3, float4x4(-0.17840317, -0.079730295, 0.11214133, -0.015679857, 0.07462782, 0.1700189, -0.03588104, -0.055766776, 0.2527381, -0.040385213, 0.18867272, 0.15786001, -0.03973228, -0.053887095, -0.001591716, -0.050709404)); + target1 += mul(nf3, float4x4(0.24581482, 0.09119475, 0.049080588, 0.25806418, -0.005062941, 0.10484669, 0.05778071, 0.23681131, -0.09603774, 0.009163983, 0.19752978, 0.104258336, 0.13455175, -0.0034275826, -0.080408186, 0.10462319)); + target1 += mul(ng3, float4x4(0.07782564, -0.2789083, -0.13887207, -0.019308591, 0.25710207, -0.21921843, 0.0015911289, 0.080053106, -0.014144128, 0.074144535, 0.043883692, 0.2513407, 0.10068346, -0.17853074, 0.20460746, 0.04092755)); + target1 += mul(nh3, float4x4(-0.048100162, 0.042697787, -0.04842476, 0.18837112, 0.051532917, 0.088649124, -0.014739274, -0.023566334, 0.44025096, -0.10545216, -0.19667506, 0.097041525, 0.0008772463, -0.05555525, 0.069248185, 0.1176431)); + target1 += mul(ni3, float4x4(-0.01590801, 0.016883895, -0.09720278, 0.14969985, -0.099172674, -0.04525934, 0.13815412, 0.024430253, 0.0247448, 0.015865842, -0.10956577, 0.22523156, 0.22455531, -0.100728914, -0.053454183, 0.13590883)); + target1 += float4(-0.06673833, 0.01457202, -0.036676105, -0.06303146); + + float4 target2 = mul(a1, float4x4(0.23879923, 0.040317934, 0.22145784, -0.08336839, -0.16966912, 0.08528278, 0.2684323, 0.17057978, 0.1467542, -0.041414198, 0.03689633, 0.10483362, 0.04390369, 0.2617799, 0.13374175, 0.21909657)); + target2 += mul(b1, float4x4(0.013090143, 0.010181773, -0.022144144, -0.038787983, 0.17343685, 0.06579225, 0.036516637, -0.18973681, 0.11963511, 0.111920275, 0.13276073, 0.04570385, -0.009538788, -0.028358553, 0.06043411, 0.14202546)); + target2 += mul(c1, float4x4(0.2273523, 0.086418256, 0.058296323, -0.023292154, -0.016248869, 0.08703014, -0.14549017, 0.15725356, 0.26235282, 0.13655783, 0.06703612, -0.0746187, 0.18931058, -0.009649255, 0.27345505, 8.478176e-05)); + target2 += mul(d1, float4x4(-0.033401724, -0.064518325, -0.15034138, 0.05246805, 0.058772895, -0.176813, 0.078342214, -0.0020414025, 0.06217457, -0.20738979, -0.16368344, 0.03266785, 0.04921403, 0.112299785, -0.123247504, 0.0994201)); + target2 += mul(e1, float4x4(-0.2553642, 0.14918567, -0.14866059, -0.03617286, 0.032998353, -0.15592867, 0.087743975, -0.00049046543, -0.32823107, -0.107454315, 0.002674161, -0.01887908, 0.0833454, -0.03806806, -0.14595793, -0.20520253)); + target2 += mul(f1, float4x4(0.02986423, 0.028604368, -0.011768948, 0.10195398, -0.102379754, 0.1362889, -0.041802816, -0.084387876, -0.008137814, 0.09726054, 0.10758101, 0.09259081, -0.07889878, -0.07312139, 0.17478421, -0.033481717)); + target2 += mul(g1, float4x4(0.058965955, 0.024142284, 0.22129168, 0.04082889, 0.15887728, 0.103434056, -0.21192761, 0.06533756, 4.1846484e-05, -0.24297993, 0.17849778, -0.115734324, -0.11500629, 0.15694802, 0.04261307, 0.17415777)); + target2 += mul(h1, float4x4(0.01345909, 0.017319864, -0.0520044, 0.06891368, 0.078165226, -0.07047419, -0.013746107, -0.058885146, -0.10569072, -0.032166608, -0.02835551, -0.09911323, -0.062442902, 0.13147296, 0.1815978, -0.0042537497)); + target2 += mul(i1, float4x4(0.1606494, 0.05220283, 0.13166267, 0.10574164, -0.19102532, 0.03446111, -0.055919666, 0.057688963, 0.26081654, 0.03648174, 0.03616491, 0.046591155, 0.21643688, 0.052122388, 0.050889883, 0.29552755)); + target2 += mul(a2, float4x4(-0.024097791, -0.080628626, 0.12568358, 0.12252691, -0.16359662, 0.0051886803, -0.01954068, 0.02195983, -0.18788633, -0.030897139, -0.09377947, 0.15688346, -0.14129396, -0.11748491, -0.3835284, -0.022647042)); + target2 += mul(b2, float4x4(0.11930519, 0.24957322, 0.015541883, -0.11232224, -0.058490105, -0.049757216, 0.075522415, 0.09442181, 0.076607205, 0.037432365, -0.08629132, 0.008422209, -0.013450555, 0.10305229, -0.04537291, -0.08230579)); + target2 += mul(c2, float4x4(-0.050578903, -0.20879799, -0.04393353, 0.0015126837, -0.23416555, 0.01141535, -0.009691543, 0.06217469, -0.10707423, 0.20022671, 0.15437399, -0.04760398, -0.14287886, 0.2682982, -0.2561911, 0.033707578)); + target2 += mul(d2, float4x4(0.11812356, -0.29858422, 0.09146616, 0.052722417, -0.023986591, 0.0933364, 0.14801602, -0.10148, -0.15320316, -0.0028770058, -0.103183694, -0.006425709, 0.021735031, -0.47796893, -0.18304059, 0.084628224)); + target2 += mul(e2, float4x4(-0.09104168, 0.03286581, 0.04459324, -0.22438659, 0.12870388, -0.1360097, -0.15926069, 0.071017005, 0.074596204, -0.09715285, -0.07479851, -0.20799732, -0.29060403, -0.107118085, 0.25210482, 0.16397184)); + target2 += mul(f2, float4x4(-0.12460523, 0.16706169, 0.30230346, 0.054767944, -0.116781175, 0.19446343, -0.21735692, -0.026413433, 0.052394047, 0.020679068, -0.15584053, 0.061340448, 0.04663544, 0.27504724, 0.20286065, 0.3490867)); + target2 += mul(g2, float4x4(0.21607491, -0.21738917, -0.009051781, -0.07276944, 0.3103053, 0.15334722, 0.28409463, -0.17096485, 0.031179685, 0.2009012, -0.26543948, -0.19882691, 0.032035686, -0.35383067, -0.17236927, -0.113232605)); + target2 += mul(h2, float4x4(-0.11165131, -0.2941282, -0.029304054, 0.106581636, 0.21548472, -0.21285897, -0.043579012, -0.047211695, 0.027249131, 0.28340155, 0.082085736, -0.04485162, -0.24723412, -0.0007002699, 0.19643609, 0.2518287)); + target2 += mul(i2, float4x4(-0.1854792, -0.008842361, -0.08581101, 0.16760491, -0.10669554, 0.21352866, 0.1252966, -0.04194005, -0.07666296, 0.07259658, 0.10786684, -0.03364238, 0.1547786, -0.018965635, -0.13252488, 0.23715465)); + target2 += mul(a3, float4x4(0.1451508, 0.10011578, 0.07156718, 0.04740723, -0.19702536, 0.06286184, -0.29180148, -0.30204237, -0.07179627, 0.056043524, 0.27749023, -0.07051612, 0.1010544, -0.008737285, -0.13163415, -0.066848055)); + target2 += mul(b3, float4x4(0.07561846, -0.14928432, 0.027951663, -0.07524044, 0.10025779, -0.21305043, 0.008214884, 0.16192347, 0.04002263, -0.10425787, 0.018522112, -0.08742078, 0.039168026, 0.010691633, 0.0025965972, -0.016103525)); + target2 += mul(c3, float4x4(-0.045149434, 0.033272427, 0.06018518, -0.068993434, 0.017645787, 0.27027842, -0.25670657, 0.04577214, 0.002479582, -0.051434338, 0.25425145, -0.093131274, 0.09688695, 0.14416668, -0.1216349, 0.0229849)); + target2 += mul(d3, float4x4(0.030369451, 0.020748299, 0.034542933, 0.09359397, -0.37202555, 0.2808392, -0.2659807, -0.01941035, -0.22399698, 0.08132304, -0.0014507625, -0.017793491, 0.037623137, -0.029477628, -0.0720025, -0.15816812)); + target2 += mul(e3, float4x4(0.33115733, -0.013734702, 0.0101467, -0.12268663, 0.43017596, -0.32643738, -0.3273918, 0.1109477, 0.10758731, 0.070155494, -0.24037434, -0.0016639809, -0.06652544, 0.13758285, -0.072496586, -0.10106904)); + target2 += mul(f3, float4x4(0.19126198, -0.14967397, -0.18345782, -0.08460439, 0.13229868, -0.21144699, -0.058821946, -0.5039749, 0.24892776, 0.20228972, -0.06919527, -0.15942183, 0.12435562, -0.012193792, -0.2627704, 0.13625085)); + target2 += mul(g3, float4x4(-0.10896958, 0.044015855, -0.0181369, 0.10650041, -0.24092299, 0.18979153, -0.26630878, 0.06806665, -0.17771733, -0.2699458, -0.1144395, 0.014184961, -0.288627, -0.19622655, 0.39838296, -0.11162213)); + target2 += mul(h3, float4x4(-0.084831044, -0.02721028, 0.109261006, 0.087307416, -0.33783588, 0.08306577, -0.027817784, -0.10534335, -0.15593721, -0.013186225, -0.011052375, 0.10786937, -0.00060474424, 0.00431786, 0.38164118, 0.14728197)); + target2 += mul(i3, float4x4(-0.26669395, -0.09910907, 0.03960142, -0.21382816, -0.5042419, -0.12542717, 0.07396011, -0.24485987, -0.1770452, -0.00011720843, 0.11425563, 0.07332528, -0.06640686, -0.11683248, 0.003071298, 0.05543171)); + target2 += mul(na1, float4x4(-0.16784829, -0.031949766, -0.043842897, -0.09577157, 0.16381639, -0.33382246, -0.10782627, 0.07903589, 0.04620696, -0.04180326, -0.09783348, 0.3095548, 0.06762379, 0.021955997, -0.14974354, -0.143973)); + target2 += mul(nb1, float4x4(-0.14442697, -0.044818707, 0.025801856, 0.08461569, -0.20247138, 0.060513508, -0.1674155, 0.13058512, -0.08026784, -0.3141148, -0.04791329, -0.14586422, 0.16113773, -0.035697844, 0.21863447, -0.099939525)); + target2 += mul(nc1, float4x4(-0.298011, -0.053686857, -0.31031471, 0.11162896, 0.22341007, -0.052881762, 0.13043529, 0.15810435, -0.37888956, -0.31480342, 0.33116004, 0.06646278, -0.05665705, -0.03861846, 0.083101824, 0.003781792)); + target2 += mul(nd1, float4x4(-0.08649798, -0.1088245, 0.32511878, -0.16572024, 0.050254185, -0.252013, -0.040132295, 0.17312634, -0.016653338, -0.43009317, 0.5093538, 0.06922151, 0.08760091, -0.14250961, 0.4053319, -0.10107622)); + target2 += mul(ne1, float4x4(0.083406106, -0.16932109, 0.06787343, -0.05178522, -0.20603026, -0.09058593, 0.16128129, -0.22712888, 0.05429396, -0.15098302, 0.3041655, -0.07668127, -0.15419695, 0.4462755, 0.1874267, 0.17312653)); + target2 += mul(nf1, float4x4(0.19148338, 0.052311547, -0.13830717, 0.2996034, 0.05850986, 0.05484371, 0.0361025, 0.20699011, 0.0057291416, -0.12026241, 0.02678267, 0.12696257, -0.019684052, -0.09031823, 0.15297134, 0.13705085)); + target2 += mul(ng1, float4x4(-0.20881316, 0.14526081, -0.41917932, -0.16191165, 0.1262819, -0.23026188, -0.2561112, 0.049415354, -0.1497713, -0.009612483, -0.070241526, -0.039475128, 0.093497746, -0.1318667, -0.105637155, -0.21147394)); + target2 += mul(nh1, float4x4(0.042843655, -0.11218648, 0.013391185, 0.06646476, -0.24418473, -0.037722886, 0.08446243, -0.0018849184, 0.030670485, 0.27686, -0.15015033, 0.21402857, 0.10094001, 0.3145764, -0.17310384, -0.10199286)); + target2 += mul(ni1, float4x4(-0.14084649, 0.0033693435, -0.34370998, 0.1079324, 0.28795156, 0.14933614, 0.10669996, 0.12305359, -0.040551323, -0.07330404, -0.15179317, 0.069975436, 0.2920918, 0.020814283, -0.13944869, 0.09579582)); + target2 += mul(na2, float4x4(0.10180969, -0.021659529, -0.13541374, 0.0908069, 0.11346961, -0.0011830843, -0.19612141, -0.11018402, 0.12915576, -0.095653616, -0.13800405, -0.18932076, 0.12277476, 0.09764832, 0.114954636, -0.1578187)); + target2 += mul(nb2, float4x4(-0.07191152, -0.053082727, -0.067936264, 0.045203943, 0.13166252, 0.23256709, -0.288239, -0.08163785, -0.020897634, 0.15756424, -0.17083916, -0.13654962, -0.021136044, -0.14208466, -0.0040715886, 0.03707775)); + target2 += mul(nc2, float4x4(0.15754776, -0.042640373, -0.033360127, -0.06743833, -0.06533689, -0.16307046, -0.018182967, -0.060084824, -0.087093055, 0.036133945, -0.23553473, -0.40821072, -0.053628575, 0.026669571, 0.19045922, -0.035846557)); + target2 += mul(nd2, float4x4(0.07448724, 0.067469016, -0.066770956, 0.0030078532, -0.1173964, -0.012352791, -0.19451907, -0.021427047, 0.19994271, -0.0029543424, -0.034913633, 0.13859013, 0.048614684, 0.193721, -0.09548589, -0.026358109)); + target2 += mul(ne2, float4x4(-0.3411652, 0.23141026, 0.10978134, 0.07787867, -0.18412182, 0.15478246, 0.25846902, -0.13144507, -0.28535685, 0.086631864, 0.12785226, 0.0033878016, 0.03504869, -0.034950025, -0.17758164, 0.024054492)); + target2 += mul(nf2, float4x4(-0.019755604, -0.21744813, 0.14325249, 0.21274537, -0.04985571, -0.24407099, -0.02035735, 0.21803972, -0.16886176, -0.05224696, 0.20342873, -0.18543948, 0.0096319495, -0.1624773, 0.14216544, -0.081235185)); + target2 += mul(ng2, float4x4(0.20382723, -0.16942358, -0.15685835, 0.024889609, -0.3226424, -0.10469345, -0.46887016, 0.016228858, -0.1362387, -0.13054538, -0.0783913, -0.06385014, -0.08139782, 0.12035177, 0.21293128, -0.045476373)); + target2 += mul(nh2, float4x4(0.13462923, 0.1384135, -0.055161975, -0.099216595, -0.16864173, -0.15129986, -0.2535725, 0.22653887, -0.11102492, -0.09068262, -0.0044067153, -0.0603752, -0.095367156, -0.056415606, 0.0075126593, -0.009610249)); + target2 += mul(ni2, float4x4(0.1393697, 0.13611916, 0.090671785, 0.08593501, 0.07983876, -0.0068050954, -0.28696343, -0.17570612, -0.075322844, 0.06774856, -0.086022906, 0.09080408, 0.022836372, 0.018536389, 0.042727504, -0.043635663)); + target2 += mul(na3, float4x4(-0.0050578844, -0.04774735, 0.004759578, 0.09087925, 0.16171533, 0.01599633, 0.08316812, -0.09584462, 0.119889505, 0.003919012, -0.21555036, 0.2426096, -0.12047291, 0.10978759, -0.33754483, 0.15740488)); + target2 += mul(nb3, float4x4(-0.11716536, 0.08724526, -0.023726968, -0.12922543, -0.05567669, -0.021379862, -0.2031672, -0.023840401, -0.024058433, -0.081542544, -0.19171208, 0.051525865, -0.008789576, -0.16808029, -0.049115162, 0.052190997)); + target2 += mul(nc3, float4x4(0.13842055, -0.13871577, 0.0954928, 0.19763501, -0.049218517, -0.21299022, -0.14797242, -0.0996971, 0.004526675, -0.107513115, -0.31193256, -0.13720018, 0.01550265, 0.017279146, -0.03583415, 0.053429827)); + target2 += mul(nd3, float4x4(-0.0723815, 0.034265626, 0.20389315, -0.14053439, 0.18389022, 0.033574764, -0.039723963, -0.14978175, -0.084361784, -0.15831995, 0.49169922, -0.09837507, 0.0017199022, -0.09433373, 0.13506836, -0.06360633)); + target2 += mul(ne3, float4x4(-0.1265364, 0.24196059, 0.21346883, -0.035202276, -0.16924065, -0.039915517, 0.15855956, -0.00046526943, -0.30319792, 0.47292793, 0.19538064, -0.046434846, 0.0041063935, 0.026737224, 0.14377008, -0.086429365)); + target2 += mul(nf3, float4x4(-0.052318633, 0.01695744, 0.073576115, 0.2596724, -0.062066127, -0.051519766, -0.051504273, 0.05866547, -0.08328452, -0.28105405, 0.078826845, 0.18008032, 0.18682955, 0.0076535186, -0.05532054, -0.20601955)); + target2 += mul(ng3, float4x4(0.11029233, 0.16958456, 0.06657061, -0.019656291, 0.11484087, -0.044068743, 0.24364337, -0.0065622316, 0.28941217, 0.18499708, -0.19709894, -0.19475468, 0.03503256, -0.05113357, 0.10653205, 0.01789133)); + target2 += mul(nh3, float4x4(0.23000862, 0.21053173, -0.18862817, 0.17935936, -0.15975583, -0.05371, -0.012876548, 0.16915809, 0.048503194, 0.16087084, 0.013947819, -0.2625692, -0.07422495, 0.12091095, -0.07861796, -0.10306009)); + target2 += mul(ni3, float4x4(0.22752777, 0.25302207, -0.12559423, 0.32303494, 0.048354533, -0.09707823, -0.08385863, 0.14718369, 0.08453127, -0.12578502, 0.2255726, 0.28436616, 0.11673125, -0.109367356, -0.024817433, -0.061155386)); + target2 += float4(0.09436162, 0.053628888, -0.037304673, 0.07278107); + + float4 target3 = mul(a1, float4x4(-0.06848254, 0.17351831, 0.08460523, -0.04292461, 0.16476814, 0.12880002, -0.2188432, -0.14287443, -0.03620956, 0.03190214, -0.048488446, 0.13175257, -0.03531708, 0.25060365, -0.06213195, 0.12620556)); + target3 += mul(b1, float4x4(-0.002136314, 0.14399742, 0.033703934, 0.04852668, 0.044694893, 0.044961825, -0.049827278, -0.043917865, 0.13977914, -0.08126432, -0.14917606, 0.04644499, -0.14825742, 0.14075856, 0.03092348, -0.093371935)); + target3 += mul(c1, float4x4(-0.10156521, 0.17292573, 0.12147806, 0.058286913, 0.036107652, 0.11812006, -0.052188348, -0.018111996, -0.033433035, 0.13158733, 0.11174768, 0.3135695, -0.031843673, 0.14830989, 0.094200954, 0.046325628)); + target3 += mul(d1, float4x4(-0.020032655, -0.07413829, 0.08400475, -0.096378304, 0.018955225, 0.022839474, 0.0059678215, -0.1027026, -0.028222635, -0.14191163, 0.1683382, 0.12842403, -0.0019999016, -0.10452298, -0.00084425067, 0.21517049)); + target3 += mul(e1, float4x4(0.01772144, -0.055037472, -0.26999003, 0.08729775, -0.36895162, 0.011868349, 0.09449699, -0.098540016, -0.12167021, -0.14711088, 0.12771331, -0.23740645, 0.15759817, -0.19454266, 0.16208373, 0.24910314)); + target3 += mul(f1, float4x4(-0.01581086, 0.055212107, 0.09454114, 0.04507513, 0.06458917, 0.07870699, 0.043557264, -0.057501283, 0.20402664, 0.22241214, 0.04460486, 0.08704935, 0.16451277, -0.13080528, 0.039666496, -0.026260905)); + target3 += mul(g1, float4x4(0.052181657, 0.027077725, 0.06572071, 0.031183861, 0.10252249, -0.08605668, 0.041842632, -0.103617065, -0.10870241, 0.04929309, -0.036834683, 0.035595864, 0.05496096, -0.067191675, -0.021810448, 0.040137228)); + target3 += mul(h1, float4x4(0.12943552, 0.027362846, -0.04002257, 0.06176385, 0.03362332, -0.10467882, 0.33771384, -0.002079538, -0.14528175, 0.14312474, 0.02974133, -0.06945553, -0.33208638, -0.1682957, 0.08194348, -0.072072215)); + target3 += mul(i1, float4x4(-0.10689992, 0.0904542, 0.13820268, 0.13239543, -0.15937562, -0.123537876, -0.33618236, -0.081022464, 0.024027856, 0.26380306, -0.09225592, 0.040485747, -0.01705172, -0.049895052, -0.07952754, 0.030036716)); + target3 += mul(a2, float4x4(-0.1259129, 0.018831972, -0.1832129, 0.01803401, 0.033666562, -0.17717862, 0.087922215, -0.10147714, 0.045267824, -0.25754488, -0.08662288, 0.10354607, 0.10469745, 0.19675997, -0.20195517, 0.24481302)); + target3 += mul(b2, float4x4(-0.094946206, 0.015489291, -0.1777193, 0.037065975, 0.024963535, -0.3277457, -0.08534422, -0.08319194, -0.18495774, -0.09883332, -0.053772286, 0.08554662, -0.1215341, 0.15887743, -0.2965043, -0.11656119)); + target3 += mul(c2, float4x4(-0.34576485, -0.14033535, 0.07531725, -0.14229001, 0.08308607, -0.31519765, -0.15306507, -0.072686926, -0.12345635, -0.08589443, 0.015977165, -0.0041419766, -0.49153492, 0.3021553, 0.16130814, -0.17035122)); + target3 += mul(d2, float4x4(-0.08059237, -0.18008304, 0.23508278, -0.08894493, 0.11107956, 0.23715645, 0.091440715, -0.033679005, 0.23545177, 0.011845169, 0.0054449392, -0.30073527, 0.2796674, -0.1411897, -0.014096338, 0.115184374)); + target3 += mul(e2, float4x4(0.19655375, 0.027063202, -0.3324798, 0.29343468, -0.10879405, 0.16780332, -0.019309124, 0.04614956, 0.15054315, 0.19951852, 0.14648122, 0.28885373, 0.037958838, -0.34874088, -0.025065463, -0.19422896)); + target3 += mul(f2, float4x4(-0.18047136, 0.060818356, -0.13610844, -0.018481744, -0.09979387, 0.0477093, 0.032326147, -0.10137375, -0.059743475, 0.05039489, 0.17306165, -0.005998121, -0.009583858, -0.14829919, 0.24446519, -0.22378124)); + target3 += mul(g2, float4x4(0.45342392, 0.19783214, -0.042264447, 0.11951815, 0.017209506, 0.119354434, -0.089858785, 0.03950267, -0.19266395, -0.07500372, -0.02151692, -0.008635288, -0.14962971, -0.00780355, 0.18662006, -0.0046807216)); + target3 += mul(h2, float4x4(-0.13184623, -0.04977233, -0.08034406, -0.08663693, -0.06438305, -0.06699197, 0.15878884, 0.014209137, -0.018352475, -0.12698355, -0.18104841, -0.03212089, -0.31992742, 0.13199449, -0.039823674, -0.18864588)); + target3 += mul(i2, float4x4(-0.22096959, -0.06594324, -0.093964286, -0.069787376, -0.05717438, 0.18509367, -0.19014412, -0.11233723, -0.043684576, -0.04049064, -0.015180749, 0.04026833, -0.09723803, -0.014410513, -0.14038773, -0.20472965)); + target3 += mul(a3, float4x4(-0.020113828, 0.06306164, 0.1133604, -0.03264297, -0.019580074, -0.28136805, 0.046105113, -0.104369484, 0.047211405, -0.11510891, -0.2610411, -0.24363835, -0.15579234, 0.13080037, -0.2414289, -0.21552382)); + target3 += mul(b3, float4x4(-0.030723298, 0.10005462, -0.046389453, -0.42023477, -0.0900144, -0.3300974, 0.2023873, 0.47113106, -0.10733436, 0.13536386, 0.11873528, 0.075008325, -0.092727005, 0.16694772, -0.12538053, -0.019201787)); + target3 += mul(c3, float4x4(-0.020229753, 0.0050342986, -0.09015966, -0.23845413, 0.14204682, -0.24106354, 0.007471734, 0.21428482, -0.059586413, -0.07984075, 0.1474898, -0.12583902, -0.34393194, 0.08484377, -0.40459237, 0.32322514)); + target3 += mul(d3, float4x4(-0.11741491, -0.083517544, 0.04531866, -0.048355322, 0.15782192, 0.07919051, -0.34528416, -0.17551522, -0.20325756, -0.13701133, -0.09564707, -0.03711687, 0.030484512, -0.107849605, -0.09412398, -0.28914952)); + target3 += mul(e3, float4x4(-0.013266804, -0.035421904, 0.081956826, 0.15579522, 0.12775496, 0.1479336, 0.46652517, 0.21593826, -0.23207328, -0.13872643, 0.09056148, 0.1257084, 0.40673763, 0.14669922, 0.14093073, -0.31729355)); + target3 += mul(f3, float4x4(-0.03632805, 0.06513459, -0.13029967, 0.24914533, 0.08398421, -0.12399063, 0.15374567, 0.003005163, -0.03301567, 0.010896424, -0.10409926, -0.031162843, -0.080630526, 0.313793, -0.04112272, 0.06908576)); + target3 += mul(g3, float4x4(0.056705862, 0.04045318, -0.13523346, -0.12563162, 0.030291703, -0.22721136, -0.19567032, -0.22538094, -0.078549854, 0.16844983, -0.09419901, 0.1000363, -0.052691363, -0.14642943, -0.17214452, -0.23522456)); + target3 += mul(h3, float4x4(0.09823313, -0.16931288, 0.2667816, 0.019992903, 0.09905936, -0.14416765, 0.022824166, -0.02994203, 0.05482313, 0.0073759295, -0.087138794, -0.10250613, 0.22704037, -0.33540174, 0.059272785, -0.08828277)); + target3 += mul(i3, float4x4(0.05405852, 0.0015277737, 0.15057512, 0.008105634, 0.26466554, 0.021303358, 0.21576874, -0.055405084, 0.20417419, -0.1829464, 0.19177821, -0.10549947, -0.10019333, -0.04373452, 0.3086124, -0.030007664)); + target3 += mul(na1, float4x4(0.18547705, 0.015533089, -0.17023557, -0.14218459, -0.109183766, -0.21892494, -0.08033779, 0.1279889, 0.21425895, 0.31563443, 0.055812337, 0.035239376, 0.04874699, -0.03926052, 0.25620237, 0.05620038)); + target3 += mul(nb1, float4x4(0.17809738, -0.090085454, 0.086938836, 0.21705364, 0.057283174, 0.022287775, -0.21651776, -0.0027429194, 0.04257827, 0.17341158, 0.32710707, -0.029889492, 0.23903793, -0.038499728, 0.208562, 0.18147011)); + target3 += mul(nc1, float4x4(-0.02671488, -0.2577291, -0.101831675, -0.043231912, -0.08192727, -0.09351345, 0.10333126, 0.42192927, 0.11358276, 0.17070638, 0.11954223, -0.31113386, 0.21822956, 0.040758308, 0.18557602, -0.04927389)); + target3 += mul(nd1, float4x4(0.016825153, -0.16034372, 0.13393559, 0.0031862713, -0.07210358, 0.12088922, 0.18472868, 0.19526374, -0.098638535, -0.26882744, 0.01246303, -0.023679085, -0.07282684, 0.10335254, 0.11371582, -0.11949346)); + target3 += mul(ne1, float4x4(-0.0077989995, -0.06316807, -0.037497815, 0.010178734, -0.028329156, -0.109135084, -0.18357074, 0.40579423, -0.05144428, -0.28490487, -0.11653807, 0.22959495, -0.109780535, 0.22878933, -0.29027545, 0.17875119)); + target3 += mul(nf1, float4x4(-0.15628323, -0.07819484, -0.22514449, 0.065008484, -0.0055398177, -0.07419974, 0.09902451, 0.35817552, -0.0862891, -0.2973468, -0.10211232, 0.09778022, -0.08562242, -0.08868644, 0.30707374, 0.16413328)); + target3 += mul(ng1, float4x4(0.004233512, 0.02434783, -0.12356794, 0.13752618, 0.21815947, 0.16979212, 0.3382205, 0.15363333, -0.14368188, 0.10208307, 0.16594398, -0.002474651, -0.25072917, 0.19654895, 0.15537341, -0.011402132)); + target3 += mul(nh1, float4x4(0.1492285, -0.102569796, -0.15423858, 0.03359016, 0.008948687, 0.11137203, -0.0753569, -0.15314926, -0.22925344, 0.1943656, -0.4934053, 0.42356676, -0.10820874, 0.23832525, -0.4461194, 0.19386442)); + target3 += mul(ni1, float4x4(0.30649734, 0.061961878, -0.17697462, -0.29313368, 0.19318691, 0.14972912, -0.04568052, 0.123596475, -0.018475438, 0.33577895, -0.17800568, 0.12502621, 0.032249834, 0.013487416, -0.019249933, 0.004653166)); + target3 += mul(na2, float4x4(0.11560085, -0.030997908, 0.009219462, 0.05633901, -0.11158907, 0.09791856, -0.111877, -0.020388048, -0.25937706, -0.000673325, 0.106495194, 0.15643579, 0.022090284, -0.11573403, 0.123260945, -0.033783972)); + target3 += mul(nb2, float4x4(-0.061418246, 0.13925532, 0.070662834, -0.10297572, -0.08535479, 0.31824788, 0.08315885, 0.012375857, 0.04241964, 0.21071856, -0.18567438, -0.26859924, 0.09607365, -0.19106552, 0.1222843, 0.20521446)); + target3 += mul(nc2, float4x4(-0.1985242, 0.40886146, -0.03295415, 0.25985515, 0.00024564067, 0.22053646, 0.4425157, 0.030073104, 0.15870823, 0.3720021, -0.19778733, -0.11957699, 0.23951907, -0.022089735, 0.026504006, -0.1143626)); + target3 += mul(nd2, float4x4(0.07811988, 0.06360271, -0.18825488, 0.05489923, -0.316614, -0.2020329, -0.17215219, -0.1163882, 0.028907632, 0.13332835, 0.07710604, 0.15564129, -0.08207378, 0.2586524, -0.15368843, -0.026250634)); + target3 += mul(ne2, float4x4(0.1154507, 0.05374841, -0.35887244, -0.38684267, 0.024906285, -0.051356003, 0.06727699, -0.13258685, -0.04512674, -0.0630682, -0.016046045, -0.3630216, -0.10115332, 0.06723903, 0.10273197, 0.01658071)); + target3 += mul(nf2, float4x4(0.035411883, -0.10390069, 0.28300494, -0.030523226, 0.260309, -0.2897127, 0.17530721, 0.06502907, 0.10852879, 0.0101283565, 0.04377248, -0.14661616, 0.07372457, 0.029455552, -0.024029268, 0.019606834)); + target3 += mul(ng2, float4x4(0.06462741, -0.017584527, 0.05204551, 0.023974337, -0.09858389, -0.12002433, 0.051191356, -0.15688013, 0.1415572, -0.121506944, 0.4219788, -0.14832322, 0.09247079, -0.10846258, -0.030261837, -0.14657071)); + target3 += mul(nh2, float4x4(0.037952326, 0.05012869, 0.022779293, 0.0797289, 0.024931714, -0.050262492, -0.15463822, -0.023215678, 0.045349725, -0.0040035774, 0.22049266, -0.08079404, -0.0113567095, -0.00675084, 0.17475724, 0.025340058)); + target3 += mul(ni2, float4x4(-0.13610172, 0.14658909, 0.067050435, 0.12354151, 0.22096893, -0.06765668, -0.024593432, -0.03552899, 0.06936571, 0.10394856, 0.0048312224, -0.21034646, 0.037834894, -0.06692894, 0.009020093, -0.04065748)); + target3 += mul(na3, float4x4(-0.08967367, -0.14398253, -0.19402455, -0.14434609, -0.027259551, 0.1226331, 0.012233069, 0.13677149, -0.1507801, 0.14510965, 0.24108039, 0.04916487, 0.042398036, 0.09403761, -0.03958092, 0.17673557)); + target3 += mul(nb3, float4x4(-0.071569644, -0.19743139, -0.09648773, 0.038397867, 0.12506093, 0.24415006, 0.13810574, -0.23042768, 0.20971183, -0.14231962, 0.0963819, -0.07323753, -0.014360243, -0.099411555, 0.07815387, 0.09009336)); + target3 += mul(nc3, float4x4(0.14625058, -0.15307125, 0.45122483, 0.10113701, -0.12264418, 0.09390506, -0.25706926, -0.082095854, 0.11812526, 0.14046957, -0.09704567, 0.21640895, 0.20999698, -0.19149756, 0.16977966, 0.034616202)); + target3 += mul(nd3, float4x4(0.05720225, 0.0428485, -0.057531573, -0.111578174, 0.03538242, 0.033332366, -0.05961152, 0.13383748, -0.05669531, -0.047779217, 0.2760684, -0.23934118, 0.03728129, -0.15390043, 0.09151239, 0.016904302)); + target3 += mul(ne3, float4x4(0.05711261, -0.009796642, 0.1827549, -0.23561665, 0.15747361, -0.15555665, -0.03771464, -0.15358609, 0.124769196, -0.00302323, -0.1930878, -0.3193505, -0.036671866, -0.21477285, -0.0015818535, -0.054916248)); + target3 += mul(nf3, float4x4(-0.04039116, 0.022148842, 0.2527601, -0.08849551, -0.017892385, -0.01728494, -0.12817079, 0.112442665, 0.004877744, 0.08325303, 0.13601741, -0.12387854, -0.033808686, -0.07762037, -0.036944337, -0.016846744)); + target3 += mul(ng3, float4x4(0.025319673, 0.12447582, 0.06369372, 0.20814203, -0.062117852, 0.10390202, -0.030939216, 0.15888922, -0.0873872, 0.04641361, 0.13612288, -0.22024561, 0.15445144, -0.03273631, 0.18931653, 0.03979226)); + target3 += mul(nh3, float4x4(0.01642648, 0.10919636, 0.118298925, -0.052648794, 0.046562076, 0.042576727, -0.119064495, -0.10575594, -0.023527319, 0.27507904, -0.24070077, 0.037794556, 0.026340371, 0.08496194, -0.2165465, -0.10772629)); + target3 += mul(ni3, float4x4(-0.110290796, 0.23385854, 0.16042788, 0.041294437, -0.04052982, -0.030170577, 0.16566783, 0.18245162, -0.125454, 0.15547217, -0.02763223, -0.10694603, 0.12049954, -0.07608294, -0.06768503, 0.022071697)); + target3 += float4(-0.19256988, 0.07561771, 0.007950438, -0.050078563); + + float3 result = tex7.SampleLevel(sam, pos, 0).rgb; + result += mul(e1, float4x3(0.0075503755, 0.014264192, 0.014350495, 0.013990636, -0.0011566521, -0.005510977, -0.021975616, -0.013216436, -0.012400287, 0.018202957, 0.010433842, 0.007529786)); + result += mul(e2, float4x3(0.012649671, 0.016378459, 0.009756208, 0.0023225206, -0.0038671023, -0.005242471, 0.023699954, 0.015248626, 0.011651197, 0.014677953, 0.014319745, 0.012088228)); + result += mul(e3, float4x3(-0.0030005479, 0.0052323043, 0.007744717, -0.0077438625, -0.00072459516, -0.001971826, -0.01263717, -0.009226968, -0.005661945, 0.0046659256, 0.0014185858, 0.0038442858)); + result += mul(ne1, float4x3(-0.0053241113, -0.010728358, -0.013345879, -0.000893072, 0.015531841, 0.015812417, 0.021348871, 0.015751695, 0.016067913, 0.014817982, 0.03233685, 0.031598262)); + result += mul(ne2, float4x3(0.0038391522, 0.0027406036, 0.0063517806, 0.0021543978, 0.0065204683, 0.009420363, -0.022383714, -0.012619449, -0.008763167, -0.009436604, -0.012201518, -0.0103548)); + result += mul(ne3, float4x3(-0.005432008, -0.013701671, -0.021388102, -0.001045599, -0.0032160715, -0.0036216215, 0.031028647, 0.022415614, 0.01880324, -0.004328173, -0.004780637, -0.005459752)); + result += mul(max(target1, 0), float4x3(-0.007300146, -0.0076159053, -0.0080059795, 0.005996225, 0.0057377047, 0.0059788194, -0.021563234, -0.020394823, -0.020401813, -0.030919729, -0.03150251, -0.029059272)); + result += mul(max(target2, 0), float4x3(-0.002826552, -0.0042917025, -0.0025527687, -0.0074001094, -0.006878869, -0.0062073106, 0.010867636, 0.010852139, 0.008577537, -0.01606024, -0.0143771265, -0.013291837)); + result += mul(max(target3, 0), float4x3(0.012113326, 0.014259359, 0.011284172, -3.851684e-05, -0.003696042, -0.0020337042, 0.003427011, 0.006911378, 0.008471347, 0.0063997298, 0.004651406, 0.0075980425)); + result += mul(max(-target1, 0), float4x3(-0.026621016, -0.027831081, -0.025364956, 0.022336917, 0.023742557, 0.023516335, -0.01619396, -0.01820708, -0.015288538, 0.0045815264, 0.0022230193, 0.0017512285)); + result += mul(max(-target2, 0), float4x3(0.043799683, 0.046862658, 0.041910093, -0.027854608, -0.02948632, -0.02927831, -0.051899213, -0.04971418, -0.04712937, -0.017539004, -0.0245854, -0.023040624)); + result += mul(max(-target3, 0), float4x3(0.022317344, 0.021462968, 0.02187171, 0.0530127, 0.054741293, 0.052202478, 0.029963326, 0.0298772, 0.025601966, 0.027699472, 0.031187871, 0.02950236)); + result += float3(-0.0071146404, 0.005606682, 0.010180816); + + result += INPUT.SampleLevel(sam, pos, 0).rgb; + WriteToOutput(gxy, result.rgb); +} diff --git a/src/Effects/Anime4K/Anime4K_Restore_VL.hlsl b/src/Effects/Anime4K/Anime4K_Restore_VL.hlsl index f7892f953..4109710a9 100644 --- a/src/Effects/Anime4K/Anime4K_Restore_VL.hlsl +++ b/src/Effects/Anime4K/Anime4K_Restore_VL.hlsl @@ -446,14 +446,14 @@ void Pass3(uint2 blockStart, uint3 threadId) { target2 += mul(ni2 , float4x4(-0.16865666, -0.00015881563, -0.054488145, -0.06222717, -0.032101758, 0.06485387, -0.0028512608, 0.046645947, 0.017593225, -0.19447896, -0.024742266, 0.03970127, 0.29845607, -0.16168733, 0.035172883, 0.07924657)); target2 += float4(0.103826486, 0.045373913, 0.11565896, -0.06568643); - float4 target3 = mul(e1, float4x4(0.09689336, 0.06046458, 0.072598994, 0.0, 0.11994565, 0.104477674, 0.09302802, 0.0, -0.05718302, 0.050438102, 0.08814741, 0.0, 0.0308889, 0.0033925986, -0.01715605, 0.0)); - target3 += mul(e2, float4x4(-0.028314235, 0.06597744, 0.0966897, 0.0, 0.035656154, 0.07770106, 0.075551905, 0.0, 0.0001793458, -0.000479495, -0.00297406, 0.0, -0.053916585, -0.016807461, -0.0057141334, 0.0)); - target3 += mul(ne1, float4x4(-0.047189303, -0.0207, -0.020910334, 0.0, -0.07933196, -0.06961211, -0.086069845, 0.0, 0.0943727, 0.008463375, 0.010755166, 0.0, 0.062410597, 0.022625161, 0.04068433, 0.0)); - target3 += mul(ne2, float4x4(0.10270994, -0.019080428, 0.0050091282, 0.0, -0.004672948, -0.013966742, -0.0063746064, 0.0, -2.5856789e-05, 0.03151499, -0.0023983798, 0.0, 0.113539025, 0.12381699, 0.100360274, 0.0)); + float3 target3 = mul(e1, float4x3(0.09689336, 0.06046458, 0.072598994, 0.11994565, 0.104477674, 0.09302802, -0.05718302, 0.050438102, 0.08814741, 0.0308889, 0.0033925986, -0.01715605)); + target3 += mul(e2, float4x3(-0.028314235, 0.06597744, 0.0966897, 0.035656154, 0.07770106, 0.075551905, 0.0001793458, -0.000479495, -0.00297406, -0.053916585, -0.016807461, -0.0057141334)); + target3 += mul(ne1, float4x3(-0.047189303, -0.0207, -0.020910334, -0.07933196, -0.06961211, -0.086069845, 0.0943727, 0.008463375, 0.010755166, 0.062410597, 0.022625161, 0.04068433)); + target3 += mul(ne2, float4x3(0.10270994, -0.019080428, 0.0050091282, -0.004672948, -0.013966742, -0.0063746064, -2.5856789e-05, 0.03151499, -0.0023983798, 0.113539025, 0.12381699, 0.100360274)); tex1[gxy] = target1; tex2[gxy] = target2; - tex5[gxy] = target3; + tex5[gxy] = float4(target3, 1); } @@ -613,15 +613,15 @@ void Pass4(uint2 blockStart, uint3 threadId) { target2 += mul(ni2, float4x4(0.030153519, -0.092469186, -0.022912916, 0.10200855, -0.04237032, -0.05917764, 0.10479645, -0.05619482, -0.18949397, -0.019547248, 0.013868889, -0.1524476, 0.14048979, -0.032521486, 0.1322921, 0.070972025)); target2 += float4(0.012053958, -4.6962363e-05, 0.0020099226, -0.033494607); - float4 target3 = tex5.SampleLevel(sam, pos, 0); - target3 += mul(e1, float4x4(0.07868885, -0.030913834, -0.009213676, 0.0, 0.04870991, 0.021467991, 0.038739506, 0.0, -0.042969644, -0.07122453, -0.08798675, 0.0, -0.09784122, 0.021434791, 0.02510374, 0.0)); - target3 += mul(e2, float4x4(0.050420716, 0.0729716, 0.076532185, 0.0, -0.019112485, -0.01037939, -0.026948035, 0.0, -0.02591423, 0.008927897, -0.00042541025, 0.0, 0.1043701, -0.0071186824, -0.041817162, 0.0)); - target3 += mul(ne1, float4x4(-0.16143242, -0.0009298223, -0.01228508, 0.0, 0.07744052, -0.018313263, -0.0488145, 0.0, 0.09241393, 0.07128674, 0.055164956, 0.0, 0.054884013, -0.04834418, -0.06281626, 0.0)); - target3 += mul(ne2, float4x4(-0.049036566, -0.05979936, -0.05594288, 0.0, -0.014564307, 0.031926468, 0.037857566, 0.0, 0.015474487, -0.11385003, -0.11527764, 0.0, -0.07076006, 0.057038613, 0.095983796, 0.0)); + float3 target3 = tex5.SampleLevel(sam, pos, 0).rgb; + target3 += mul(e1, float4x3(0.07868885, -0.030913834, -0.009213676, 0.04870991, 0.021467991, 0.038739506, -0.042969644, -0.07122453, -0.08798675, -0.09784122, 0.021434791, 0.02510374)); + target3 += mul(e2, float4x3(0.050420716, 0.0729716, 0.076532185, -0.019112485, -0.01037939, -0.026948035, -0.02591423, 0.008927897, -0.00042541025, 0.1043701, -0.0071186824, -0.041817162)); + target3 += mul(ne1, float4x3(-0.16143242, -0.0009298223, -0.01228508, 0.07744052, -0.018313263, -0.0488145, 0.09241393, 0.07128674, 0.055164956, 0.054884013, -0.04834418, -0.06281626)); + target3 += mul(ne2, float4x3(-0.049036566, -0.05979936, -0.05594288, -0.014564307, 0.031926468, 0.037857566, 0.015474487, -0.11385003, -0.11527764, -0.07076006, 0.057038613, 0.095983796)); tex3[gxy] = target1; tex4[gxy] = target2; - tex6[gxy] = target3; + tex6[gxy] = float4(target3, 1); } @@ -781,15 +781,15 @@ void Pass5(uint2 blockStart, uint3 threadId) { target2 += mul(ni2, float4x4(-0.035496738, 0.010802548, -0.028718363, 0.19263634, 0.16900502, -0.16661702, -0.027631328, 0.18309957, -0.015860107, -0.03309961, -0.091390446, 0.14000848, -0.0036591904, 0.47659522, -0.09373507, -0.29020965)); target2 += float4(0.08895955, -0.027667087, 0.20500831, 0.00037762933); - float4 target3 = tex6.SampleLevel(sam, pos, 0); - target3 += mul(e1, float4x4(0.03094887, -0.008734403, 0.00042712069, 0.0, 0.053891554, 0.05837673, 0.06200635, 0.0, 0.09071558, -0.04202184, -0.046172567, 0.0, -0.0425916, 0.04905093, 0.020835675, 0.0)); - target3 += mul(e2, float4x4(0.096628904, -0.037792254, -0.043241944, 0.0, -0.011923947, -0.025950424, -0.031381752, 0.0, -0.060941868, -0.07859433, -0.07535451, 0.0, -0.026777223, 0.08604982, 0.07829908, 0.0)); - target3 += mul(ne1, float4x4(-0.06435972, 0.0036599538, 0.00786578, 0.0, -0.061972067, -0.05681472, -0.06667608, 0.0, -0.106890626, 0.007406496, 0.029977169, 0.0, -0.20519382, -0.044860814, 0.0021225857, 0.0)); - target3 += mul(ne2, float4x4(-0.16876474, 0.012789643, 0.026692612, 0.0, 0.017817136, 0.026935097, 0.02227043, 0.0, 0.01690181, 0.07716103, 0.086527, 0.0, 0.07923805, -0.10443151, -0.10859543, 0.0)); + float3 target3 = tex6.SampleLevel(sam, pos, 0).rgb; + target3 += mul(e1, float4x3(0.03094887, -0.008734403, 0.00042712069, 0.053891554, 0.05837673, 0.06200635, 0.09071558, -0.04202184, -0.046172567, -0.0425916, 0.04905093, 0.020835675)); + target3 += mul(e2, float4x3(0.096628904, -0.037792254, -0.043241944, -0.011923947, -0.025950424, -0.031381752, -0.060941868, -0.07859433, -0.07535451, -0.026777223, 0.08604982, 0.07829908)); + target3 += mul(ne1, float4x3(-0.06435972, 0.0036599538, 0.00786578, -0.061972067, -0.05681472, -0.06667608, -0.106890626, 0.007406496, 0.029977169, -0.20519382, -0.044860814, 0.0021225857)); + target3 += mul(ne2, float4x3(-0.16876474, 0.012789643, 0.026692612, 0.017817136, 0.026935097, 0.02227043, 0.01690181, 0.07716103, 0.086527, 0.07923805, -0.10443151, -0.10859543)); tex1[gxy] = target1; tex2[gxy] = target2; - tex5[gxy] = target3; + tex5[gxy] = float4(target3, 1); } @@ -949,15 +949,15 @@ void Pass6(uint2 blockStart, uint3 threadId) { target2 += mul(ni2, float4x4(0.18187882, 0.017893985, 0.17856054, 0.005413129, 0.014147176, 0.15102178, 0.12436294, -0.02176765, -0.16727823, -0.0364111, 0.17074408, 0.12899421, 0.31984514, -0.0072070034, 0.031895883, -0.1991405)); target2 += float4(-0.011865144, 0.11717201, -0.13823777, -0.059450272); - float4 target3 = tex5.SampleLevel(sam, pos, 0); - target3 += mul(e1, float4x4(0.003730466, -0.024648283, -0.022169832, 0.0, -0.0062762927, 0.022062732, 0.032966793, 0.0, 0.016349113, 0.017197203, 0.020952817, 0.0, -0.1763789, 0.035497356, 0.053835396, 0.0)); - target3 += mul(e2, float4x4(0.020886675, -0.07054202, -0.079142675, 0.0, 0.06664387, 0.044960167, 0.042230908, 0.0, -0.095019594, 0.012421141, 0.0142890485, 0.0, 0.056814816, -0.012751135, -0.014684506, 0.0)); - target3 += mul(ne1, float4x4(0.011765893, 0.0008920681, -0.0018258415, 0.0, -0.010473814, -0.023085753, -0.028783914, 0.0, -0.023034256, -0.0024786016, -0.0052162083, 0.0, 0.1643386, -0.06132718, -0.09289065, 0.0)); - target3 += mul(ne2, float4x4(0.016597198, 0.09389637, 0.10833379, 0.0, -0.043163072, -0.04714812, -0.035274632, 0.0, 0.09634976, -0.009292612, -0.022424143, 0.0, -0.08765172, 0.0051558353, 0.010900356, 0.0)); + float3 target3 = tex5.SampleLevel(sam, pos, 0).rgb; + target3 += mul(e1, float4x3(0.003730466, -0.024648283, -0.022169832, -0.0062762927, 0.022062732, 0.032966793, 0.016349113, 0.017197203, 0.020952817, -0.1763789, 0.035497356, 0.053835396)); + target3 += mul(e2, float4x3(0.020886675, -0.07054202, -0.079142675, 0.06664387, 0.044960167, 0.042230908, -0.095019594, 0.012421141, 0.0142890485, 0.056814816, -0.012751135, -0.014684506)); + target3 += mul(ne1, float4x3(0.011765893, 0.0008920681, -0.0018258415, -0.010473814, -0.023085753, -0.028783914, -0.023034256, -0.0024786016, -0.0052162083, 0.1643386, -0.06132718, -0.09289065)); + target3 += mul(ne2, float4x3(0.016597198, 0.09389637, 0.10833379, -0.043163072, -0.04714812, -0.035274632, 0.09634976, -0.009292612, -0.022424143, -0.08765172, 0.0051558353, 0.010900356)); tex3[gxy] = target1; tex4[gxy] = target2; - tex6[gxy] = target3; + tex6[gxy] = float4(target3, 1); } @@ -1117,15 +1117,15 @@ void Pass7(uint2 blockStart, uint3 threadId) { target2 += mul(ni2, float4x4(-0.034574904, 0.06755256, 0.09508443, -0.17162292, 0.046379335, 0.2178781, 0.08699012, -0.055380464, -0.2237568, -0.07427848, -0.028395249, -0.3225617, -0.084454566, -0.24776657, 0.254169, 0.13229847)); target2 += float4(0.18765923, -0.07697714, 0.028134674, -0.060966115); - float4 target3 = tex6.SampleLevel(sam, pos, 0); - target3 += mul(e1, float4x4(0.030815786, 0.021069322, 0.01812191, 0.0, 0.084839165, -0.0080813095, -0.029270556, 0.0, -0.10456346, 0.062386703, 0.0665605, 0.0, 0.11926609, -0.1104228, -0.13291118, 0.0)); - target3 += mul(e2, float4x4(-0.07159541, -0.007267032, -0.010134558, 0.0, 0.008234213, 0.045609634, 0.040295456, 0.0, 0.018416971, 0.01308482, 0.014649557, 0.0, 0.035107512, -0.02140815, -0.030279048, 0.0)); - target3 += mul(ne1, float4x4(0.01918586, 0.03875863, 0.03229402, 0.0, -0.07917104, 0.041135103, 0.057182517, 0.0, 0.08609541, 0.0079662455, 0.004327576, 0.0, -0.14332893, 0.03120354, 0.056732506, 0.0)); - target3 += mul(ne2, float4x4(0.03200192, -0.0035752193, -0.0031064528, 0.0, -0.010902813, 0.014607456, 0.019431474, 0.0, -0.016461229, -0.004938204, -0.004655488, 0.0, -0.033470232, 0.0026075812, 0.005896968, 0.0)); + float3 target3 = tex6.SampleLevel(sam, pos, 0).rgb; + target3 += mul(e1, float4x3(0.030815786, 0.021069322, 0.01812191, 0.084839165, -0.0080813095, -0.029270556, -0.10456346, 0.062386703, 0.0665605, 0.11926609, -0.1104228, -0.13291118)); + target3 += mul(e2, float4x3(-0.07159541, -0.007267032, -0.010134558, 0.008234213, 0.045609634, 0.040295456, 0.018416971, 0.01308482, 0.014649557, 0.035107512, -0.02140815, -0.030279048)); + target3 += mul(ne1, float4x3(0.01918586, 0.03875863, 0.03229402, -0.07917104, 0.041135103, 0.057182517, 0.08609541, 0.0079662455, 0.004327576, -0.14332893, 0.03120354, 0.056732506)); + target3 += mul(ne2, float4x3(0.03200192, -0.0035752193, -0.0031064528, -0.010902813, 0.014607456, 0.019431474, -0.016461229, -0.004938204, -0.004655488, -0.033470232, 0.0026075812, 0.005896968)); tex1[gxy] = target1; tex2[gxy] = target2; - tex5[gxy] = target3; + tex5[gxy] = float4(target3, 1); } @@ -1284,17 +1284,17 @@ void Pass8(uint2 blockStart, uint3 threadId) { target2 += mul(ni2, float4x4(0.20014295, -0.027282396, -0.06317007, 0.04452042, 0.064600386, 0.072222926, -0.33409226, 0.08063831, -0.022607977, 0.1308856, -0.39691743, -0.094889864, -0.1810531, 0.011367248, -0.2531222, -0.22468317)); target2 += float4(0.26886886, 0.05874665, 0.10268232, 0.05833081); - float4 result = tex5.SampleLevel(sam, pos, 0); - result += mul(e1, float4x4(0.037410006, 0.048742272, 0.04348088, 0.0, 0.037719514, 0.030768529, 0.03127472, 0.0, 0.056426726, 0.03066893, 0.016440205, 0.0, -0.010599352, 0.022832409, 0.023211194, 0.0)); - result += mul(e2, float4x4(-0.005733291, 0.06365659, 0.06663611, 0.0, -0.041917093, -0.016493445, -0.020438088, 0.0, -0.0014357592, -0.0022506563, -0.0045095007, 0.0, 0.029893145, -0.009129354, -0.015173116, 0.0)); - result += mul(ne1, float4x4(0.013052085, 0.005108175, 0.0025906067, 0.0, -0.021950055, -0.036447693, -0.036141638, 0.0, -0.036296472, 0.0068928464, 0.013102313, 0.0, 0.0060471976, -0.024798103, -0.023548538, 0.0)); - result += mul(ne2, float4x4(0.0067743887, -0.06191211, -0.062355213, 0.0, 0.0016080744, -0.020445071, -0.016840393, 0.0, 0.028264903, 0.01852915, 0.015891539, 0.0, -0.023877412, -0.013271666, -0.008158679, 0.0)); - result += mul(max(target1, 0), float4x4(-0.04317466, -0.018953001, -0.020452993, 0.0, -0.009322576, -0.03022352, -0.030970376, 0.0, 0.05653658, 0.05430553, 0.046692245, 0.0, 0.05615359, 0.059338935, 0.056018773, 0.0)); - result += mul(max(target2, 0), float4x4(0.022878079, 0.03392234, 0.033057988, 0.0, -0.017554542, -0.0141542535, -0.014122613, 0.0, -0.048634093, -0.05316463, -0.047988772, 0.0, -0.058002178, -0.040221967, -0.034025013, 0.0)); - result += mul(max(-target1, 0), float4x4(-0.018253656, -0.04197674, -0.040467236, 0.0, -0.04358929, -0.028309818, -0.025425073, 0.0, -0.008488672, -0.001727991, 0.00035808363, 0.0, -0.0011709273, 0.0052514165, 0.0059479307, 0.0)); - result += mul(max(-target2, 0), float4x4(-0.08333935, -0.09818201, -0.09476284, 0.0, -0.033692095, -0.046259012, -0.045797516, 0.0, -0.007577072, 0.0022402718, 0.0016200038, 0.0, 0.0029786075, -0.020728534, -0.018938033, 0.0)); - result += float4(0.047567394, -0.02504617, -0.028163986, 0.0); - result += INPUT.SampleLevel(sam, pos, 0); - - WriteToOutput(gxy, result.rgb); + float3 result = tex5.SampleLevel(sam, pos, 0).rgb; + result += mul(e1, float4x3(0.037410006, 0.048742272, 0.04348088, 0.037719514, 0.030768529, 0.03127472, 0.056426726, 0.03066893, 0.016440205, -0.010599352, 0.022832409, 0.023211194)); + result += mul(e2, float4x3(-0.005733291, 0.06365659, 0.06663611, -0.041917093, -0.016493445, -0.020438088, -0.0014357592, -0.0022506563, -0.0045095007, 0.029893145, -0.009129354, -0.015173116)); + result += mul(ne1, float4x3(0.013052085, 0.005108175, 0.0025906067, -0.021950055, -0.036447693, -0.036141638, -0.036296472, 0.0068928464, 0.013102313, 0.0060471976, -0.024798103, -0.023548538)); + result += mul(ne2, float4x3(0.0067743887, -0.06191211, -0.062355213, 0.0016080744, -0.020445071, -0.016840393, 0.028264903, 0.01852915, 0.015891539, -0.023877412, -0.013271666, -0.008158679)); + result += mul(max(target1, 0), float4x3(-0.04317466, -0.018953001, -0.020452993, -0.009322576, -0.03022352, -0.030970376, 0.05653658, 0.05430553, 0.046692245, 0.05615359, 0.059338935, 0.056018773)); + result += mul(max(target2, 0), float4x3(0.022878079, 0.03392234, 0.033057988, -0.017554542, -0.0141542535, -0.014122613, -0.048634093, -0.05316463, -0.047988772, -0.058002178, -0.040221967, -0.034025013)); + result += mul(max(-target1, 0), float4x3(-0.018253656, -0.04197674, -0.040467236, -0.04358929, -0.028309818, -0.025425073, -0.008488672, -0.001727991, 0.00035808363, -0.0011709273, 0.0052514165, 0.0059479307)); + result += mul(max(-target2, 0), float4x3(-0.08333935, -0.09818201, -0.09476284, -0.033692095, -0.046259012, -0.045797516, -0.007577072, 0.0022402718, 0.0016200038, 0.0029786075, -0.020728534, -0.018938033)); + result += float3(0.047567394, -0.02504617, -0.028163986); + + result += INPUT.SampleLevel(sam, pos, 0).rgb; + WriteToOutput(gxy, result); } diff --git a/src/Effects/Effects.vcxproj b/src/Effects/Effects.vcxproj index 5589c0077..fa2613e3e 100644 --- a/src/Effects/Effects.vcxproj +++ b/src/Effects/Effects.vcxproj @@ -123,6 +123,12 @@ Document + + Document + + + Document + Document @@ -132,6 +138,9 @@ Document + + Document + Document diff --git a/src/Effects/Effects.vcxproj.filters b/src/Effects/Effects.vcxproj.filters index d139e1f28..003398507 100644 --- a/src/Effects/Effects.vcxproj.filters +++ b/src/Effects/Effects.vcxproj.filters @@ -57,6 +57,12 @@ Anime4K + + Anime4K + + + Anime4K + Anime4K @@ -66,6 +72,9 @@ Anime4K + + Anime4K + Anime4K diff --git a/src/Magpie.App/AboutViewModel.cpp b/src/Magpie.App/AboutViewModel.cpp index 8eab56ef1..4eca520ab 100644 --- a/src/Magpie.App/AboutViewModel.cpp +++ b/src/Magpie.App/AboutViewModel.cpp @@ -10,7 +10,6 @@ #include "IconHelper.h" using namespace winrt; -using namespace Windows::ApplicationModel::Resources; using namespace Windows::UI::Xaml::Media::Imaging; namespace winrt::Magpie::App::implementation { diff --git a/src/Magpie.App/App.idl b/src/Magpie.App/App.idl index 62acbe9a0..ba8e45912 100644 --- a/src/Magpie.App/App.idl +++ b/src/Magpie.App/App.idl @@ -27,6 +27,8 @@ #include "ScalingConfigurationPage.idl" #include "ProfilePage.idl" #include "SettingsPage.idl" +#include "CaptionButtonsControl.idl" +#include "TitleBarControl.idl" namespace Magpie.App { enum ShortcutAction { diff --git a/src/Magpie.App/App.xaml b/src/Magpie.App/App.xaml index 3138f8bc0..b8c040b81 100644 --- a/src/Magpie.App/App.xaml +++ b/src/Magpie.App/App.xaml @@ -17,6 +17,628 @@ + + 12 + + + + + + + diff --git a/src/Magpie.App/FileDialogHelper.cpp b/src/Magpie.App/FileDialogHelper.cpp new file mode 100644 index 000000000..ce5962352 --- /dev/null +++ b/src/Magpie.App/FileDialogHelper.cpp @@ -0,0 +1,38 @@ +#include "pch.h" +#include "FileDialogHelper.h" +#include "Logger.h" +#include "App.h" + +namespace winrt::Magpie::App { + +// 出错返回空,取消返回空字符串 +std::optional FileDialogHelper::OpenFileDialog(IFileDialog* fileDialog, FILEOPENDIALOGOPTIONS options) noexcept { + FILEOPENDIALOGOPTIONS options1{}; + fileDialog->GetOptions(&options1); + fileDialog->SetOptions(options1 | options | FOS_FORCEFILESYSTEM); + + if (fileDialog->Show((HWND)Application::Current().as().HwndMain()) != S_OK) { + // 被用户取消 + return std::wstring(); + } + + com_ptr file; + HRESULT hr = fileDialog->GetResult(file.put()); + if (FAILED(hr)) { + Logger::Get().ComError("IFileSaveDialog::GetResult 失败", hr); + return std::nullopt; + } + + wchar_t* fileName = nullptr; + hr = file->GetDisplayName(SIGDN_DESKTOPABSOLUTEPARSING, &fileName); + if (FAILED(hr)) { + Logger::Get().ComError("IShellItem::GetDisplayName 失败", hr); + return std::nullopt; + } + + std::wstring result(fileName); + CoTaskMemFree(fileName); + return std::move(result); +} + +} diff --git a/src/Magpie.App/FileDialogHelper.h b/src/Magpie.App/FileDialogHelper.h new file mode 100644 index 000000000..d96a3ec9f --- /dev/null +++ b/src/Magpie.App/FileDialogHelper.h @@ -0,0 +1,12 @@ +#pragma once + +namespace winrt::Magpie::App { + +struct FileDialogHelper { + static std::optional OpenFileDialog( + IFileDialog* fileDialog, + FILEOPENDIALOGOPTIONS options = 0 + ) noexcept; +}; + +} diff --git a/src/Magpie.App/HomeViewModel.cpp b/src/Magpie.App/HomeViewModel.cpp index aeca07619..69d18596c 100644 --- a/src/Magpie.App/HomeViewModel.cpp +++ b/src/Magpie.App/HomeViewModel.cpp @@ -9,9 +9,6 @@ #include "StrUtils.h" #include "UpdateService.h" -using namespace winrt; -using namespace Windows::ApplicationModel::Resources; - namespace winrt::Magpie::App::implementation { HomeViewModel::HomeViewModel() { diff --git a/src/Magpie.App/IconHelper.cpp b/src/Magpie.App/IconHelper.cpp index 7700d782b..6292f91db 100644 --- a/src/Magpie.App/IconHelper.cpp +++ b/src/Magpie.App/IconHelper.cpp @@ -185,7 +185,6 @@ SoftwareBitmap IconHelper::ExtractIconFormWnd(HWND hWnd, uint32_t preferredSize, } SoftwareBitmap IconHelper::ExtractIconFromExe(const wchar_t* fileName, uint32_t preferredSize, uint32_t dpi) { - preferredSize = (preferredSize + 15) / 16 * 16; preferredSize = (uint32_t)std::lround(preferredSize * dpi / double(USER_DEFAULT_SCREEN_DPI)); { diff --git a/src/Magpie.App/LocalizationService.cpp b/src/Magpie.App/LocalizationService.cpp index c2f7dad0d..f07a528c1 100644 --- a/src/Magpie.App/LocalizationService.cpp +++ b/src/Magpie.App/LocalizationService.cpp @@ -7,19 +7,18 @@ #pragma comment(lib, "bcp47mrm.lib") -using namespace winrt; -using namespace Windows::ApplicationModel::Resources; -using namespace Windows::ApplicationModel::Resources::Core; - namespace winrt::Magpie::App { // 标签必须为小写 -static std::array SUPPORTED_LANGUAGES{ +static std::array SUPPORTED_LANGUAGES{ L"en-us", L"es", L"ja", + L"ko", + L"pt-br", L"ru", L"tr", + L"uk", L"zh-hans", L"zh-hant" }; @@ -54,7 +53,7 @@ void LocalizationService::EarlyInitialize() { } } - ResourceContext::SetGlobalQualifierValue(L"Language", bestLanguage); + _Language(bestLanguage); } void LocalizationService::Initialize() { @@ -62,7 +61,7 @@ void LocalizationService::Initialize() { int language = settings.Language(); if (language >= 0) { - ResourceContext::SetGlobalQualifierValue(L"Language", SUPPORTED_LANGUAGES[language]); + _Language(SUPPORTED_LANGUAGES[language]); } } @@ -70,4 +69,9 @@ std::span LocalizationService::SupportedLanguages() noexcept { return SUPPORTED_LANGUAGES; } +void LocalizationService::_Language(const wchar_t* tag) { + _language = tag; + ResourceContext::SetGlobalQualifierValue(L"Language", tag); +} + } diff --git a/src/Magpie.App/LocalizationService.h b/src/Magpie.App/LocalizationService.h index 85a66a9ca..bcd70cf6c 100644 --- a/src/Magpie.App/LocalizationService.h +++ b/src/Magpie.App/LocalizationService.h @@ -21,8 +21,16 @@ class LocalizationService { // 支持的所有语言的标签,均为小写 static std::span SupportedLanguages() noexcept; + const wchar_t* Language() const noexcept { + return _language; + } + private: LocalizationService() = default; + + void _Language(const wchar_t* tag); + + const wchar_t* _language = nullptr; }; } diff --git a/src/Magpie.App/MagService.cpp b/src/Magpie.App/MagService.cpp index d3c4c9b0c..dea8c411d 100644 --- a/src/Magpie.App/MagService.cpp +++ b/src/Magpie.App/MagService.cpp @@ -321,8 +321,10 @@ bool MagService::_StartScale(HWND hWnd, const Profile& profile) { options.downscalingEffect = settings.DownscalingEffect(); options.IsDebugMode(settings.IsDebugMode()); options.IsDisableEffectCache(settings.IsDisableEffectCache()); + options.IsDisableFontCache(settings.IsDisableFontCache()); options.IsSaveEffectSources(settings.IsSaveEffectSources()); options.IsWarningsAreErrors(settings.IsWarningsAreErrors()); + options.IsAllowScalingMaximized(settings.IsAllowScalingMaximized()); options.IsSimulateExclusiveFullscreen(settings.IsSimulateExclusiveFullscreen()); _isAutoScaling = profile.isAutoScale; @@ -341,7 +343,16 @@ void MagService::_ScaleForegroundWindow() { } bool MagService::_CheckSrcWnd(HWND hWnd) noexcept { - return hWnd && IsWindow(hWnd) && Win32Utils::GetWindowShowCmd(hWnd) == SW_NORMAL; + if (!hWnd || !IsWindow(hWnd)) { + return false; + } + + UINT showCmd = Win32Utils::GetWindowShowCmd(hWnd); + if (showCmd == SW_NORMAL) { + return true; + } + + return showCmd == SW_MAXIMIZE && AppSettings::Get().IsAllowScalingMaximized(); } } diff --git a/src/Magpie.App/Magpie.App.rc b/src/Magpie.App/Magpie.App.rc index 5bc88db96..f125e3ac9 100644 --- a/src/Magpie.App/Magpie.App.rc +++ b/src/Magpie.App/Magpie.App.rc @@ -25,8 +25,8 @@ LANGUAGE LANG_NEUTRAL, SUBLANG_NEUTRAL // VS_VERSION_INFO VERSIONINFO - FILEVERSION 0,10,2,0 - PRODUCTVERSION 0,10,2,0 + FILEVERSION 0,10,3,0 + PRODUCTVERSION 0,10,3,0 FILEFLAGSMASK 0x3fL #ifdef _DEBUG FILEFLAGS 0x1L @@ -41,12 +41,12 @@ BEGIN BEGIN BLOCK "000004b0" BEGIN - VALUE "FileVersion", "0.10.2.0" + VALUE "FileVersion", "0.10.3.0" VALUE "InternalName", "Magpie.App.dll" VALUE "LegalCopyright", "Copyright (C) 2023 Liu Xu" VALUE "OriginalFilename", "Magpie.App.dll" VALUE "ProductName", "Magpie" - VALUE "ProductVersion", "0.10.2.0" + VALUE "ProductVersion", "0.10.3.0" END END BLOCK "VarFileInfo" diff --git a/src/Magpie.App/Magpie.App.vcxproj b/src/Magpie.App/Magpie.App.vcxproj index ddcf97618..232a1a4cc 100644 --- a/src/Magpie.App/Magpie.App.vcxproj +++ b/src/Magpie.App/Magpie.App.vcxproj @@ -128,6 +128,10 @@ CandidateWindowItem.idl Code + + CaptionButtonsControl.xaml + Code + @@ -136,6 +140,7 @@ Code + HomeViewModel.idl Code @@ -240,6 +245,10 @@ TextBlockHelper.idl Code + + TitleBarControl.xaml + Code + WrapPanel.idl @@ -280,12 +289,17 @@ CandidateWindowItem.idl Code + + CaptionButtonsControl.xaml + Code + EffectParametersViewModel.idl Code + HomeViewModel.idl Code @@ -391,6 +405,10 @@ TextBlockHelper.idl Code + + TitleBarControl.xaml + Code + WrapPanel.idl @@ -401,6 +419,14 @@ Designer + + CaptionButtonsControl.xaml + Code + + + TitleBarControl.xaml + Code + Designer @@ -513,6 +539,9 @@ Designer + + Designer + Designer @@ -546,7 +575,7 @@ Designer - + Designer @@ -560,17 +589,24 @@ - + + + + + + false + + - + @@ -580,6 +616,6 @@ - + \ No newline at end of file diff --git a/src/Magpie.App/Magpie.App.vcxproj.filters b/src/Magpie.App/Magpie.App.vcxproj.filters index 02f627957..ec82effbf 100644 --- a/src/Magpie.App/Magpie.App.vcxproj.filters +++ b/src/Magpie.App/Magpie.App.vcxproj.filters @@ -55,6 +55,9 @@ Services + + Helpers + Models @@ -117,6 +120,9 @@ Services + + Helpers + Models @@ -233,9 +239,6 @@ Controls - - Styles - Controls @@ -254,6 +257,12 @@ Styles + + Controls + + + Controls + @@ -285,8 +294,17 @@ Strings - + + Strings + + Strings + + Strings + + + + \ No newline at end of file diff --git a/src/Magpie.App/MainPage.cpp b/src/Magpie.App/MainPage.cpp index 74599ba49..034969886 100644 --- a/src/Magpie.App/MainPage.cpp +++ b/src/Magpie.App/MainPage.cpp @@ -15,6 +15,7 @@ #include "ComboBoxHelper.h" #include "CommonSharedConstants.h" #include "ContentDialogHelper.h" +#include "LocalizationService.h" using namespace winrt; using namespace Windows::Graphics::Display; @@ -48,6 +49,9 @@ MainPage::MainPage() : _newApplicationViewModel(-1) { auto_revoke, { this, &MainPage::_ProfileService_ProfileRemoved }); _profileMovedRevoker = profileService.ProfileMoved( auto_revoke, { this, &MainPage::_ProfileService_ProfileReordered }); + + // 设置 Language 属性帮助 XAML 选择合适的字体,比如繁体中文使用 Microsoft JhengHei UI,日语使用 Yu Gothic UI + Language(LocalizationService::Get().Language()); } MainPage::~MainPage() { @@ -72,7 +76,7 @@ void MainPage::InitializeComponent() { MUXC::BackdropMaterial::SetApplyToRootOrPageBackground(*this, true); } - IVector navMenuItems = __super::RootNavigationView().MenuItems(); + IVector navMenuItems = RootNavigationView().MenuItems(); for (const Profile& profile : AppSettings::Get().Profiles()) { MUXC::NavigationViewItem item; item.Content(box_value(profile.name)); @@ -82,23 +86,9 @@ void MainPage::InitializeComponent() { navMenuItems.InsertAt(navMenuItems.Size() - 1, item); } - - // Win10 里启动时有一个 ToggleSwitch 的动画 bug,这里展示页面切换动画掩盖 - if (!osVersion.IsWin11()) { - ContentFrame().Navigate(winrt::xaml_typename()); - } } void MainPage::Loaded(IInspectable const&, RoutedEventArgs const&) { - MUXC::NavigationView nv = __super::RootNavigationView(); - - if (nv.DisplayMode() == MUXC::NavigationViewDisplayMode::Minimal) { - nv.IsPaneOpen(true); - } - - // 修复 WinUI 的汉堡菜单的尺寸 bug - nv.PaneDisplayMode(MUXC::NavigationViewPaneDisplayMode::Auto); - // 消除焦点框 IsTabStop(true); Focus(FocusState::Programmatic); @@ -115,7 +105,7 @@ void MainPage::NavigationView_SelectionChanged( auto contentFrame = ContentFrame(); if (args.IsSettingsSelected()) { - contentFrame.Navigate(winrt::xaml_typename()); + contentFrame.Navigate(xaml_typename()); } else { IInspectable selectedItem = args.SelectedItem(); if (!selectedItem) { @@ -128,22 +118,22 @@ void MainPage::NavigationView_SelectionChanged( hstring tagStr = unbox_value(tag); Interop::TypeName typeName; if (tagStr == L"Home") { - typeName = winrt::xaml_typename(); + typeName = xaml_typename(); } else if (tagStr == L"ScalingConfiguration") { - typeName = winrt::xaml_typename(); + typeName = xaml_typename(); } else if (tagStr == L"About") { - typeName = winrt::xaml_typename(); + typeName = xaml_typename(); } else { - typeName = winrt::xaml_typename(); + typeName = xaml_typename(); } contentFrame.Navigate(typeName); } else { // 缩放配置页面 - MUXC::NavigationView nv = __super::RootNavigationView(); + MUXC::NavigationView nv = RootNavigationView(); uint32_t index; if (nv.MenuItems().IndexOf(nv.SelectedItem(), index)) { - contentFrame.Navigate(winrt::xaml_typename(), box_value((int)index - 4)); + contentFrame.Navigate(xaml_typename(), box_value((int)index - 4)); } } } @@ -159,7 +149,7 @@ void MainPage::NavigationView_PaneOpening(MUXC::NavigationView const&, IInspecta // UpdateThemeOfTooltips 中使用的 hack 会使 NavigationViewItem 在展开时不会自动删除 Tooltip // 因此这里手动删除 - const MUXC::NavigationView& nv = __super::RootNavigationView(); + const MUXC::NavigationView& nv = RootNavigationView(); for (const IInspectable& item : nv.MenuItems()) { ToolTipService::SetToolTip(item.as(), nullptr); } @@ -172,7 +162,18 @@ void MainPage::NavigationView_PaneClosing(MUXC::NavigationView const&, MUXC::Nav XamlUtils::UpdateThemeOfTooltips(*this, ActualTheme()); } -void MainPage::NavigationView_DisplayModeChanged(MUXC::NavigationView const&, MUXC::NavigationViewDisplayModeChangedEventArgs const&) { +void MainPage::NavigationView_DisplayModeChanged(MUXC::NavigationView const& nv, MUXC::NavigationViewDisplayModeChangedEventArgs const&) { + bool isExpanded = nv.DisplayMode() == MUXC::NavigationViewDisplayMode::Expanded; + nv.IsPaneToggleButtonVisible(!isExpanded); + if (isExpanded) { + nv.IsPaneOpen(true); + } + + // HACK! + // 使导航栏的可滚动区域不会覆盖标题栏 + FrameworkElement menuItemsScrollViewer = nv.GetTemplateChild(L"MenuItemsScrollViewer").as(); + menuItemsScrollViewer.Margin({ 0,isExpanded ? TitleBar().ActualHeight() : 0.0,0,0}); + XamlUtils::UpdateThemeOfTooltips(*this, ActualTheme()); } @@ -184,8 +185,6 @@ fire_and_forget MainPage::NavigationView_ItemInvoked(MUXC::NavigationView const& // 同步调用 ShowAt 有时会失败 co_await Dispatcher().TryRunAsync(CoreDispatcherPriority::Normal, [this]() { - // 仅限 Win10:导航栏处于 Minimal 状态时会导致 Flyout 不在正确位置弹出 - // 有一个修复方法,但会导致性能损失 NewProfileFlyout().ShowAt(NewProfileNavigationViewItem()); }); } @@ -358,7 +357,7 @@ void MainPage::_ProfileService_ProfileAdded(Profile& profile) { item.Icon(FontIcon()); _LoadIcon(item, profile, false); - IVector navMenuItems = __super::RootNavigationView().MenuItems(); + IVector navMenuItems = RootNavigationView().MenuItems(); navMenuItems.InsertAt(navMenuItems.Size() - 1, item); RootNavigationView().SelectedItem(item); } diff --git a/src/Magpie.App/MainPage.h b/src/Magpie.App/MainPage.h index f69cf3780..a4bd437d2 100644 --- a/src/Magpie.App/MainPage.h +++ b/src/Magpie.App/MainPage.h @@ -22,7 +22,7 @@ struct MainPage : MainPageT { void NavigationView_PaneClosing(MUXC::NavigationView const&, MUXC::NavigationViewPaneClosingEventArgs const&); - void NavigationView_DisplayModeChanged(MUXC::NavigationView const&, MUXC::NavigationViewDisplayModeChangedEventArgs const&); + void NavigationView_DisplayModeChanged(MUXC::NavigationView const& nv, MUXC::NavigationViewDisplayModeChangedEventArgs const&); fire_and_forget NavigationView_ItemInvoked(MUXC::NavigationView const&, MUXC::NavigationViewItemInvokedEventArgs const& args); diff --git a/src/Magpie.App/MainPage.idl b/src/Magpie.App/MainPage.idl index 12edd3b02..924e66e99 100644 --- a/src/Magpie.App/MainPage.idl +++ b/src/Magpie.App/MainPage.idl @@ -3,6 +3,8 @@ namespace Magpie.App { MainPage(); Microsoft.UI.Xaml.Controls.NavigationView RootNavigationView { get; }; + TitleBarControl TitleBar { get; }; + NewApplicationViewModel NewApplicationViewModel { get; }; void NavigateToAboutPage(); diff --git a/src/Magpie.App/MainPage.xaml b/src/Magpie.App/MainPage.xaml index 54ccb707a..4d5af1a91 100644 --- a/src/Magpie.App/MainPage.xaml +++ b/src/Magpie.App/MainPage.xaml @@ -7,152 +7,158 @@ xmlns:muxc="using:Microsoft.UI.Xaml.Controls" Loaded="Loaded" mc:Ignorable="d"> - - - - - + + + + + 0 + 1,0,0,0 + - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + diff --git a/src/Magpie.App/ShortcutDialog.cpp b/src/Magpie.App/ShortcutDialog.cpp index d699ad683..c893d9cd7 100644 --- a/src/Magpie.App/ShortcutDialog.cpp +++ b/src/Magpie.App/ShortcutDialog.cpp @@ -4,9 +4,6 @@ #include "ShortcutDialog.g.cpp" #endif -using namespace winrt; -using namespace Windows::ApplicationModel::Resources; - namespace winrt::Magpie::App::implementation { void ShortcutDialog::Error(ShortcutError value) { diff --git a/src/Magpie.App/ShortcutDialog.idl b/src/Magpie.App/ShortcutDialog.idl index bd94b6468..a4f25c1fa 100644 --- a/src/Magpie.App/ShortcutDialog.idl +++ b/src/Magpie.App/ShortcutDialog.idl @@ -1,5 +1,5 @@ namespace Magpie.App { - runtimeclass ShortcutDialog : Windows.UI.Xaml.Controls.UserControl { + runtimeclass ShortcutDialog : Windows.UI.Xaml.Controls.Grid { ShortcutDialog(); ShortcutError Error; diff --git a/src/Magpie.App/ShortcutDialog.xaml b/src/Magpie.App/ShortcutDialog.xaml index 9321b490b..be25efd8b 100644 --- a/src/Magpie.App/ShortcutDialog.xaml +++ b/src/Magpie.App/ShortcutDialog.xaml @@ -1,82 +1,82 @@ - - - - - - - + - + + + + + - - - - - - - - - - - - + - - - - - - - - + + + + + + + + + + + + - + + + + + + + + - - - - - - - - + + + + + + + + + diff --git a/src/Magpie.App/TitlebarControl.cpp b/src/Magpie.App/TitlebarControl.cpp new file mode 100644 index 000000000..fa2bcfccc --- /dev/null +++ b/src/Magpie.App/TitlebarControl.cpp @@ -0,0 +1,46 @@ +#include "pch.h" +#include "TitleBarControl.h" +#if __has_include("TitleBarControl.g.cpp") +#include "TitleBarControl.g.cpp" +#endif +#include "IconHelper.h" + +using namespace winrt; +using namespace Windows::UI::Xaml::Media::Imaging; + +namespace winrt::Magpie::App::implementation { + +TitleBarControl::TitleBarControl() { + // 异步加载 Logo + [](TitleBarControl* that)->fire_and_forget { + wchar_t exePath[MAX_PATH]; + GetModuleFileName(NULL, exePath, MAX_PATH); + + auto weakThis = that->get_weak(); + + SoftwareBitmapSource bitmap; + co_await bitmap.SetBitmapAsync(IconHelper::ExtractIconFromExe(exePath, 40, USER_DEFAULT_SCREEN_DPI)); + + if (!weakThis.get()) { + co_return; + } + + that->_logo = std::move(bitmap); + that->_propertyChangedEvent(*that, PropertyChangedEventArgs(L"Logo")); + }(this); +} + +void TitleBarControl::Loading(FrameworkElement const&, IInspectable const&) { + MUXC::NavigationView rootNavigationView = Application::Current().as().MainPage().RootNavigationView(); + rootNavigationView.DisplayModeChanged([this](const auto&, const auto& args) { + bool expanded = args.DisplayMode() == MUXC::NavigationViewDisplayMode::Expanded; + VisualStateManager::GoToState(*this, expanded ? L"Expanded" : L"Compact", true); + }); +} + +void TitleBarControl::IsWindowActive(bool value) { + VisualStateManager::GoToState(*this, value ? L"Active" : L"NotActive", false); + CaptionButtons().IsWindowActive(value); +} + +} diff --git a/src/Magpie.App/TitlebarControl.h b/src/Magpie.App/TitlebarControl.h new file mode 100644 index 000000000..fe7e63a93 --- /dev/null +++ b/src/Magpie.App/TitlebarControl.h @@ -0,0 +1,35 @@ +#pragma once +#include "TitleBarControl.g.h" + +namespace winrt::Magpie::App::implementation { +struct TitleBarControl : TitleBarControlT { + TitleBarControl(); + + void Loading(FrameworkElement const&, IInspectable const&); + + Imaging::SoftwareBitmapSource Logo() const noexcept { + return _logo; + } + + event_token PropertyChanged(PropertyChangedEventHandler const& value) { + return _propertyChangedEvent.add(value); + } + + void PropertyChanged(event_token const& token) { + _propertyChangedEvent.remove(token); + } + + void IsWindowActive(bool value); + +private: + Imaging::SoftwareBitmapSource _logo{ nullptr }; + event _propertyChangedEvent; +}; +} + +namespace winrt::Magpie::App::factory_implementation { + +struct TitleBarControl : TitleBarControlT { +}; + +} diff --git a/src/Magpie.App/TitlebarControl.idl b/src/Magpie.App/TitlebarControl.idl new file mode 100644 index 000000000..fd80b6445 --- /dev/null +++ b/src/Magpie.App/TitlebarControl.idl @@ -0,0 +1,10 @@ +namespace Magpie.App { + runtimeclass TitleBarControl : Windows.UI.Xaml.Controls.UserControl, Windows.UI.Xaml.Data.INotifyPropertyChanged { + TitleBarControl(); + + Windows.UI.Xaml.Media.Imaging.SoftwareBitmapSource Logo { get; }; + CaptionButtonsControl CaptionButtons { get; }; + + void IsWindowActive(Boolean value); + } +} diff --git a/src/Magpie.App/TitlebarControl.xaml b/src/Magpie.App/TitlebarControl.xaml new file mode 100644 index 000000000..88da5eed2 --- /dev/null +++ b/src/Magpie.App/TitlebarControl.xaml @@ -0,0 +1,86 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/Magpie.App/ToggleSwitch.xaml b/src/Magpie.App/ToggleSwitch.xaml deleted file mode 100644 index 6d765bbc9..000000000 --- a/src/Magpie.App/ToggleSwitch.xaml +++ /dev/null @@ -1,654 +0,0 @@ - - - - - - - diff --git a/src/Magpie.App/conanfile.txt b/src/Magpie.App/conanfile.txt new file mode 100644 index 000000000..2c14d3a01 --- /dev/null +++ b/src/Magpie.App/conanfile.txt @@ -0,0 +1,14 @@ +[requires] +fmt/9.1.0 +spdlog/1.11.0 +parallel-hashmap/1.37 +rapidjson/cci.20220822 +kuba-zip/0.2.6 + +[generators] +visual_studio + +[options] +fmt:header_only=True +spdlog:header_only=True +spdlog:no_exceptions=True diff --git a/src/Magpie.App/packages.config b/src/Magpie.App/packages.config index fb6919caa..b95621d37 100644 --- a/src/Magpie.App/packages.config +++ b/src/Magpie.App/packages.config @@ -1,6 +1,6 @@  - + \ No newline at end of file diff --git a/src/Magpie.Core/EffectCacheManager.cpp b/src/Magpie.Core/EffectCacheManager.cpp index dab84e476..79d35a821 100644 --- a/src/Magpie.Core/EffectCacheManager.cpp +++ b/src/Magpie.Core/EffectCacheManager.cpp @@ -6,47 +6,10 @@ #include "CommonSharedConstants.h" #include #include "Utils.h" - -// YAS 暂不支持 ARM64 -// https://github.com/niXman/yas/pull/121 -#ifdef _M_ARM64 -#define _LITTLE_ENDIAN -#endif -#pragma warning(push) -// C4458:“size”的声明隐藏了类成员 -// C4127:条件表达式是常量 -#pragma warning(disable: 4458 4127) -#include -#include -#include -#include -#include -#include -#include -#pragma warning(pop) - +#include "YasHelper.h" namespace yas::detail { -// SmallVector -template -struct serializer< - type_prop::not_a_fundamental, - ser_case::use_internal_serializer, - F, - SmallVector -> { - template - static Archive& save(Archive& ar, const SmallVector& vector) { - return concepts::array::save(ar, vector); - } - - template - static Archive& load(Archive& ar, SmallVector& vector) { - return concepts::array::load(ar, vector); - } -}; - // winrt::com_ptr template struct serializer< @@ -60,10 +23,7 @@ struct serializer< uint32_t size = (uint32_t)blob->GetBufferSize(); ar& size; - BYTE* buf = (BYTE*)blob->GetBufferPointer(); - for (uint32_t i = 0; i < size; ++i) { - ar& (*buf++); - } + ar.write(blob->GetBufferPointer(), size); return ar; } @@ -78,10 +38,7 @@ struct serializer< throw new std::exception(); } - BYTE* buf = (BYTE*)blob->GetBufferPointer(); - for (uint32_t i = 0; i < size; ++i) { - ar& (*buf++); - } + ar.read(blob->GetBufferPointer(), size); return ar; } @@ -91,11 +48,6 @@ struct serializer< namespace Magpie::Core { -template -void serialize(Archive& ar, EffectConstant& o) { - ar& o.defaultValue& o.minValue& o.maxValue& o.step; -} - template void serialize(Archive& ar, EffectParameterDesc& o) { ar& o.name& o.label& o.constant; @@ -121,15 +73,11 @@ void serialize(Archive& ar, EffectDesc& o) { ar& o.name& o.outSizeExpr& o.params& o.textures& o.samplers& o.passes& o.flags; } - static constexpr const uint32_t MAX_CACHE_COUNT = 127; // 缓存版本 // 当缓存文件结构有更改时更新它,使旧缓存失效 -static constexpr const uint32_t CACHE_VERSION = 10; - -// 缓存的压缩等级 -static constexpr const int CACHE_COMPRESSION_LEVEL = 1; +static constexpr const uint32_t EFFECT_CACHE_VERSION = 12; static std::wstring GetLinearEffectName(std::wstring_view effectName) { @@ -203,16 +151,8 @@ bool EffectCacheManager::Load(std::wstring_view effectName, std::wstring_view ha } std::vector buf; - { - std::vector compressedBuf; - if (!Win32Utils::ReadFile(cacheFileName.c_str(), compressedBuf) || compressedBuf.empty()) { - return false; - } - - if (!Utils::ZstdDecompress(compressedBuf, buf)) { - Logger::Get().Error("解压缓存失败"); - return false; - } + if (!Win32Utils::ReadFile(cacheFileName.c_str(), buf) || buf.empty()) { + return false; } try { @@ -235,26 +175,17 @@ bool EffectCacheManager::Load(std::wstring_view effectName, std::wstring_view ha void EffectCacheManager::Save(std::wstring_view effectName, std::wstring_view hash, const EffectDesc& desc) { std::wstring linearEffectName = GetLinearEffectName(effectName); - std::vector compressedBuf; - { - std::vector buf; - buf.reserve(4096); - - try { - yas::vector_ostream os(buf); - yas::binary_oarchive, yas::binary> oa(os); - - oa& desc; - } catch (...) { - Logger::Get().Error("序列化失败"); - return; - } - + std::vector buf; + buf.reserve(4096); + + try { + yas::vector_ostream os(buf); + yas::binary_oarchive, yas::binary> oa(os); - if (!Utils::ZstdCompress(buf, compressedBuf, CACHE_COMPRESSION_LEVEL)) { - Logger::Get().Error("压缩缓存失败"); - return; - } + oa& desc; + } catch (...) { + Logger::Get().Error("序列化 EffectDesc 失败"); + return; } if (!Win32Utils::DirExists(CommonSharedConstants::CACHE_DIR)) { @@ -297,7 +228,7 @@ void EffectCacheManager::Save(std::wstring_view effectName, std::wstring_view ha } std::wstring cacheFileName = GetCacheFileName(linearEffectName, hash, desc.flags); - if (!Win32Utils::WriteFile(cacheFileName.c_str(), compressedBuf.data(), compressedBuf.size())) { + if (!Win32Utils::WriteFile(cacheFileName.c_str(), buf.data(), buf.size())) { Logger::Get().Error("保存缓存失败"); } @@ -335,7 +266,7 @@ std::wstring EffectCacheManager::GetHash( str.reserve(source.size() + 256); str = source; - str.append(fmt::format("CACHE_VERSION:{}\n", CACHE_VERSION)); + str.append(fmt::format("VERSION:{}\n", EFFECT_CACHE_VERSION)); if (inlineParams) { for (const auto& pair : *inlineParams) { str.append(fmt::format("{}:{}\n", StrUtils::UTF16ToUTF8(pair.first), std::lroundf(pair.second * 10000))); @@ -350,7 +281,7 @@ std::wstring EffectCacheManager::GetHash(std::string& source, const phmap::flat_ source.reserve(originSize + 256); - source.append(fmt::format("CACHE_VERSION:{}\n", CACHE_VERSION)); + source.append(fmt::format("VERSION:{}\n", EFFECT_CACHE_VERSION)); if (inlineParams) { for (const auto& pair : *inlineParams) { source.append(fmt::format("{}:{}\n", StrUtils::UTF16ToUTF8(pair.first), std::lroundf(pair.second * 10000))); diff --git a/src/Magpie.Core/EffectCacheManager.h b/src/Magpie.Core/EffectCacheManager.h index d6e5b1857..1bc6a747d 100644 --- a/src/Magpie.Core/EffectCacheManager.h +++ b/src/Magpie.Core/EffectCacheManager.h @@ -12,6 +12,9 @@ class EffectCacheManager { return instance; } + EffectCacheManager(const EffectCacheManager&) = delete; + EffectCacheManager(EffectCacheManager&&) = delete; + bool Load(std::wstring_view effectName, std::wstring_view hash, EffectDesc& desc); void Save(std::wstring_view effectName, std::wstring_view hash, const EffectDesc& desc); @@ -28,6 +31,8 @@ class EffectCacheManager { ); private: + EffectCacheManager() = default; + void _AddToMemCache(const std::wstring& cacheFileName, const EffectDesc& desc); bool _LoadFromMemCache(const std::wstring& cacheFileName, EffectDesc& desc); diff --git a/src/Magpie.Core/ImGuiBackend.cpp b/src/Magpie.Core/ImGuiBackend.cpp new file mode 100644 index 000000000..0b26773ae --- /dev/null +++ b/src/Magpie.Core/ImGuiBackend.cpp @@ -0,0 +1,416 @@ +// 原始文件: https://github.com/ocornut/imgui/blob/e489e40a853426767de9ce0637bc0c9ceb431c1e/backends/imgui_impl_dx11.cpp + +#include "pch.h" +#include "ImGuiBackend.h" +#include +#include +#include "MagApp.h" +#include "DeviceResources.h" +#include "StrUtils.h" +#include "Logger.h" + +namespace Magpie::Core { + +static constexpr const char* VERTEX_SHADER = R"( +cbuffer vertexBuffer : register(b0) { + float4x4 ProjectionMatrix; +}; + +struct VS_INPUT { + float2 pos : POSITION; + float4 col : COLOR0; + float2 uv : TEXCOORD0; +}; + +struct PS_INPUT { + float4 pos : SV_POSITION; + float4 col : COLOR0; + float2 uv : TEXCOORD0; +}; + +PS_INPUT main(VS_INPUT input) { + PS_INPUT output; + output.pos = mul( ProjectionMatrix, float4(input.pos.xy, 0.f, 1.f)); + output.col = input.col; + output.uv = input.uv; + return output; +})"; + +static constexpr const char* PIXEL_SHADER = R"( +struct PS_INPUT { + float4 pos : SV_POSITION; + float4 col : COLOR0; + float2 uv : TEXCOORD0; +}; + +sampler sampler0; +Texture2D texture0; + +float4 main(PS_INPUT input) : SV_Target { + return input.col * float4(1, 1, 1, texture0.Sample(sampler0, input.uv).r); +})"; + +struct VERTEX_CONSTANT_BUFFER_DX11 { + float mvp[4][4]; +}; + +void ImGuiBackend::_SetupRenderState(ImDrawData* drawData, ID3D11DeviceContext* ctx) noexcept { + D3D11_VIEWPORT vp{}; + vp.Width = drawData->DisplaySize.x; + vp.Height = drawData->DisplaySize.y; + vp.MinDepth = 0.0f; + vp.MaxDepth = 1.0f; + ctx->RSSetViewports(1, &vp); + + ctx->IASetInputLayout(_inputLayout.get()); + { + unsigned int stride = sizeof(ImDrawVert); + unsigned int offset = 0; + + ID3D11Buffer* t = _vertexBuffer.get(); + ctx->IASetVertexBuffers(0, 1, &t, &stride, &offset); + } + + ctx->IASetIndexBuffer(_indexBuffer.get(), sizeof(ImDrawIdx) == 2 ? DXGI_FORMAT_R16_UINT : DXGI_FORMAT_R32_UINT, 0); + ctx->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST); + ctx->VSSetShader(_vertexShader.get(), nullptr, 0); + { + ID3D11Buffer* t = _vertexConstantBuffer.get(); + ctx->VSSetConstantBuffers(0, 1, &t); + } + ctx->PSSetShader(_pixelShader.get(), nullptr, 0); + { + ID3D11SamplerState* t = _fontSampler.get(); + ctx->PSSetSamplers(0, 1, &t); + } + + const float blend_factor[4]{}; + ctx->OMSetBlendState(_blendState.get(), blend_factor, 0xffffffff); + ctx->RSSetState(_rasterizerState.get()); +} + +void ImGuiBackend::RenderDrawData(ImDrawData* drawData) noexcept { + // Avoid rendering when minimized + if (drawData->DisplaySize.x <= 0.0f || drawData->DisplaySize.y <= 0.0f) { + return; + } + + DeviceResources& dr = MagApp::Get().GetDeviceResources(); + ID3D11DeviceContext4* ctx = dr.GetD3DDC(); + ID3D11Device5* d3dDevice = dr.GetD3DDevice(); + + HRESULT hr; + + // Create and grow vertex/index buffers if needed + if (!_vertexBuffer || _vertexBufferSize < drawData->TotalVtxCount) { + _vertexBufferSize = drawData->TotalVtxCount + 5000; + D3D11_BUFFER_DESC desc{}; + desc.Usage = D3D11_USAGE_DYNAMIC; + desc.ByteWidth = _vertexBufferSize * sizeof(ImDrawVert); + desc.BindFlags = D3D11_BIND_VERTEX_BUFFER; + desc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE; + hr = d3dDevice->CreateBuffer(&desc, nullptr, _vertexBuffer.put()); + if (FAILED(hr)) { + Logger::Get().ComError("CreateBuffer 失败", hr); + return; + } + } + if (!_indexBuffer || _indexBufferSize < drawData->TotalIdxCount) { + _indexBufferSize = drawData->TotalIdxCount + 10000; + D3D11_BUFFER_DESC desc{}; + desc.Usage = D3D11_USAGE_DYNAMIC; + desc.ByteWidth = _indexBufferSize * sizeof(ImDrawIdx); + desc.BindFlags = D3D11_BIND_INDEX_BUFFER; + desc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE; + hr = d3dDevice->CreateBuffer(&desc, nullptr, _indexBuffer.put()); + if (FAILED(hr)) { + Logger::Get().ComError("CreateBuffer 失败", hr); + return; + } + } + + // Upload vertex/index data into a single contiguous GPU buffer + D3D11_MAPPED_SUBRESOURCE vtxResource, idxResource; + hr = ctx->Map(_vertexBuffer.get(), 0, D3D11_MAP_WRITE_DISCARD, 0, &vtxResource); + if (FAILED(hr)) { + Logger::Get().ComError("Map 失败", hr); + return; + } + + hr = ctx->Map(_indexBuffer.get(), 0, D3D11_MAP_WRITE_DISCARD, 0, &idxResource); + if (FAILED(hr)) { + Logger::Get().ComError("Map 失败", hr); + return; + } + + ImDrawVert* vtxDst = (ImDrawVert*)vtxResource.pData; + ImDrawIdx* idxDst = (ImDrawIdx*)idxResource.pData; + for (int n = 0; n < drawData->CmdListsCount; ++n) { + const ImDrawList* cmdList = drawData->CmdLists[n]; + std::memcpy(vtxDst, cmdList->VtxBuffer.Data, cmdList->VtxBuffer.Size * sizeof(ImDrawVert)); + std::memcpy(idxDst, cmdList->IdxBuffer.Data, cmdList->IdxBuffer.Size * sizeof(ImDrawIdx)); + vtxDst += cmdList->VtxBuffer.Size; + idxDst += cmdList->IdxBuffer.Size; + } + ctx->Unmap(_vertexBuffer.get(), 0); + ctx->Unmap(_indexBuffer.get(), 0); + + // Setup orthographic projection matrix into our constant buffer + // Our visible imgui space lies from drawData->DisplayPos (top left) to drawData->DisplayPos+data_data->DisplaySize (bottom right). DisplayPos is (0,0) for single viewport apps. + { + D3D11_MAPPED_SUBRESOURCE mappedResource; + hr = ctx->Map(_vertexConstantBuffer.get(), 0, D3D11_MAP_WRITE_DISCARD, 0, &mappedResource); + if (FAILED(hr)) { + Logger::Get().ComError("Map 失败", hr); + return; + } + + VERTEX_CONSTANT_BUFFER_DX11* constant_buffer = (VERTEX_CONSTANT_BUFFER_DX11*)mappedResource.pData; + float left = drawData->DisplayPos.x; + float right = drawData->DisplayPos.x + drawData->DisplaySize.x; + float top = drawData->DisplayPos.y; + float bottom = drawData->DisplayPos.y + drawData->DisplaySize.y; + float mvp[4][4] = { + { 2.0f / (right - left), 0.0f, 0.0f, 0.0f }, + { 0.0f, 2.0f / (top - bottom), 0.0f, 0.0f }, + { 0.0f, 0.0f, 0.5f, 0.0f }, + { (right + left) / (left - right), (top + bottom) / (bottom - top), 0.5f, 1.0f }, + }; + std::memcpy(&constant_buffer->mvp, mvp, sizeof(mvp)); + ctx->Unmap(_vertexConstantBuffer.get(), 0); + } + + // Setup desired DX state + _SetupRenderState(drawData, ctx); + + // Render command lists + // (Because we merged all buffers into a single one, we maintain our own offset into them) + int globalIdxOffset = 0; + int globalVtxOffset = 0; + ImVec2 clip_off = drawData->DisplayPos; + for (int n = 0; n < drawData->CmdListsCount; n++) { + const ImDrawList* cmdList = drawData->CmdLists[n]; + for (int cmd_i = 0; cmd_i < cmdList->CmdBuffer.Size; cmd_i++) { + const ImDrawCmd* pcmd = &cmdList->CmdBuffer[cmd_i]; + if (pcmd->UserCallback != nullptr) { + // User callback, registered via ImDrawList::AddCallback() + // (ImDrawCallback_ResetRenderState is a special callback value used by the user to request the renderer to reset render state.) + if (pcmd->UserCallback == ImDrawCallback_ResetRenderState) + _SetupRenderState(drawData, ctx); + else + pcmd->UserCallback(cmdList, pcmd); + } else { + // Project scissor/clipping rectangles into framebuffer space + ImVec2 clipMin(pcmd->ClipRect.x - clip_off.x, pcmd->ClipRect.y - clip_off.y); + ImVec2 clipMax(pcmd->ClipRect.z - clip_off.x, pcmd->ClipRect.w - clip_off.y); + if (clipMax.x <= clipMin.x || clipMax.y <= clipMin.y) + continue; + + // Apply scissor/clipping rectangle + const D3D11_RECT r = { (LONG)clipMin.x, (LONG)clipMin.y, (LONG)clipMax.x, (LONG)clipMax.y }; + ctx->RSSetScissorRects(1, &r); + + // Bind texture, Draw + ID3D11ShaderResourceView* textureSrv = (ID3D11ShaderResourceView*)pcmd->GetTexID(); + ctx->PSSetShaderResources(0, 1, &textureSrv); + ctx->DrawIndexed(pcmd->ElemCount, pcmd->IdxOffset + globalIdxOffset, pcmd->VtxOffset + globalVtxOffset); + } + } + globalIdxOffset += cmdList->IdxBuffer.Size; + globalVtxOffset += cmdList->VtxBuffer.Size; + } +} + +bool ImGuiBackend::_CreateFontsTexture() noexcept { + ImGuiIO& io = ImGui::GetIO(); + ID3D11Device5* d3dDevice = MagApp::Get().GetDeviceResources().GetD3DDevice(); + + HRESULT hr; + + // 字体纹理使用 R8_UNORM 格式 + unsigned char* pixels; + int width, height; + io.Fonts->GetTexDataAsAlpha8(&pixels, &width, &height); + + // Upload texture to graphics system + { + D3D11_TEXTURE2D_DESC desc{}; + desc.Width = width; + desc.Height = height; + desc.MipLevels = 1; + desc.ArraySize = 1; + desc.Format = DXGI_FORMAT_R8_UNORM; + desc.SampleDesc.Count = 1; + desc.Usage = D3D11_USAGE_DEFAULT; + desc.BindFlags = D3D11_BIND_SHADER_RESOURCE; + + winrt::com_ptr texture = nullptr; + D3D11_SUBRESOURCE_DATA subResource{}; + subResource.pSysMem = pixels; + subResource.SysMemPitch = width; + hr = d3dDevice->CreateTexture2D(&desc, &subResource, texture.put()); + if (FAILED(hr)) { + Logger::Get().ComError("CreateTexture2D 失败", hr); + return false; + } + + // Create texture view + D3D11_SHADER_RESOURCE_VIEW_DESC srvDesc{}; + srvDesc.Format = desc.Format; + srvDesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE2D; + srvDesc.Texture2D.MipLevels = desc.MipLevels; + hr = d3dDevice->CreateShaderResourceView(texture.get(), &srvDesc, _fontTextureView.put()); + if (FAILED(hr)) { + Logger::Get().ComError("CreateShaderResourceView 失败", hr); + return false; + } + } + + // Store our identifier + io.Fonts->SetTexID((ImTextureID)_fontTextureView.get()); + + // Create texture sampler + // (Bilinear sampling is required by default. Set 'io.Fonts->Flags |= ImFontAtlasFlags_NoBakedLines' or 'style.AntiAliasedLinesUseTex = false' to allow point/nearest sampling) + { + D3D11_SAMPLER_DESC desc{}; + desc.Filter = D3D11_FILTER_MIN_MAG_MIP_LINEAR; + desc.AddressU = D3D11_TEXTURE_ADDRESS_WRAP; + desc.AddressV = D3D11_TEXTURE_ADDRESS_WRAP; + desc.AddressW = D3D11_TEXTURE_ADDRESS_WRAP; + desc.ComparisonFunc = D3D11_COMPARISON_ALWAYS; + hr = d3dDevice->CreateSamplerState(&desc, _fontSampler.put()); + if (FAILED(hr)) { + Logger::Get().ComError("CreateSamplerState 失败", hr); + return false; + } + } + + // 清理不再需要的数据降低内存占用 + io.Fonts->ClearTexData(); + + return true; +} + +bool ImGuiBackend::_CreateDeviceObjects() noexcept { + ID3D11Device5* d3dDevice = MagApp::Get().GetDeviceResources().GetD3DDevice(); + + HRESULT hr; + + static winrt::com_ptr vertexShaderBlob; + if (!vertexShaderBlob) { + hr = D3DCompile(VERTEX_SHADER, StrUtils::StrLen(VERTEX_SHADER), + nullptr, nullptr, nullptr, "main", "vs_5_0", 0, 0, vertexShaderBlob.put(), nullptr); + if (FAILED(hr)) { + Logger::Get().ComError("编译顶点着色器失败", hr); + return false; + } + } + + hr = d3dDevice->CreateVertexShader( + vertexShaderBlob->GetBufferPointer(), + vertexShaderBlob->GetBufferSize(), + nullptr, + _vertexShader.put() + ); + if (FAILED(hr)) { + Logger::Get().ComError("CreateVertexShader 失败", hr); + return false; + } + + static constexpr D3D11_INPUT_ELEMENT_DESC LOCAL_LAYOUT[] = { + { "POSITION", 0, DXGI_FORMAT_R32G32_FLOAT, 0, (UINT)IM_OFFSETOF(ImDrawVert, pos), D3D11_INPUT_PER_VERTEX_DATA, 0 }, + { "TEXCOORD", 0, DXGI_FORMAT_R32G32_FLOAT, 0, (UINT)IM_OFFSETOF(ImDrawVert, uv), D3D11_INPUT_PER_VERTEX_DATA, 0 }, + { "COLOR", 0, DXGI_FORMAT_R8G8B8A8_UNORM, 0, (UINT)IM_OFFSETOF(ImDrawVert, col), D3D11_INPUT_PER_VERTEX_DATA, 0 }, + }; + hr = d3dDevice->CreateInputLayout(LOCAL_LAYOUT, 3, + vertexShaderBlob->GetBufferPointer(), vertexShaderBlob->GetBufferSize(), _inputLayout.put()); + if (FAILED(hr)) { + Logger::Get().ComError("CreateInputLayout 失败", hr); + return false; + } + + { + D3D11_BUFFER_DESC desc{}; + desc.ByteWidth = sizeof(VERTEX_CONSTANT_BUFFER_DX11); + desc.Usage = D3D11_USAGE_DYNAMIC; + desc.BindFlags = D3D11_BIND_CONSTANT_BUFFER; + desc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE; + d3dDevice->CreateBuffer(&desc, nullptr, _vertexConstantBuffer.put()); + } + + static winrt::com_ptr pixelShaderBlob; + if (!pixelShaderBlob) { + hr = D3DCompile(PIXEL_SHADER, StrUtils::StrLen(PIXEL_SHADER), + nullptr, nullptr, nullptr, "main", "ps_5_0", 0, 0, pixelShaderBlob.put(), nullptr); + if (FAILED(hr)) { + Logger::Get().ComError("编译像素着色器失败", hr); + return false; + } + } + + hr = d3dDevice->CreatePixelShader( + pixelShaderBlob->GetBufferPointer(), + pixelShaderBlob->GetBufferSize(), + nullptr, + _pixelShader.put() + ); + if (FAILED(hr)) { + Logger::Get().ComError("CreatePixelShader 失败", hr); + return false; + } + + { + D3D11_BLEND_DESC desc{}; + desc.AlphaToCoverageEnable = false; + desc.RenderTarget[0].BlendEnable = true; + desc.RenderTarget[0].SrcBlend = D3D11_BLEND_SRC_ALPHA; + desc.RenderTarget[0].DestBlend = D3D11_BLEND_INV_SRC_ALPHA; + desc.RenderTarget[0].BlendOp = D3D11_BLEND_OP_ADD; + desc.RenderTarget[0].SrcBlendAlpha = D3D11_BLEND_ONE; + desc.RenderTarget[0].DestBlendAlpha = D3D11_BLEND_INV_SRC_ALPHA; + desc.RenderTarget[0].BlendOpAlpha = D3D11_BLEND_OP_ADD; + desc.RenderTarget[0].RenderTargetWriteMask = D3D11_COLOR_WRITE_ENABLE_ALL; + hr = d3dDevice->CreateBlendState(&desc, _blendState.put()); + if (FAILED(hr)) { + Logger::Get().ComError("CreateBlendState 失败", hr); + return false; + } + } + + // Create the rasterizer state + { + D3D11_RASTERIZER_DESC desc{}; + desc.FillMode = D3D11_FILL_SOLID; + desc.CullMode = D3D11_CULL_NONE; + desc.ScissorEnable = true; + hr = d3dDevice->CreateRasterizerState(&desc, _rasterizerState.put()); + if (FAILED(hr)) { + Logger::Get().ComError("CreateRasterizerState 失败", hr); + return false; + } + } + + if (!_CreateFontsTexture()) { + Logger::Get().Error("_CreateFontsTexture 失败"); + return false; + } + + return true; +} + +void ImGuiBackend::NewFrame() noexcept { + if (!_fontSampler) { + _CreateDeviceObjects(); + } +} + +bool ImGuiBackend::Initialize() noexcept { + // Setup backend capabilities flags + ImGuiIO& io = ImGui::GetIO(); + io.BackendRendererName = "Magpie"; + io.BackendFlags |= ImGuiBackendFlags_RendererHasVtxOffset; // We can honor the ImDrawCmd::VtxOffset field, allowing for large meshes. + + return true; +} + +} diff --git a/src/Magpie.Core/ImGuiBackend.h b/src/Magpie.Core/ImGuiBackend.h new file mode 100644 index 000000000..f9a722591 --- /dev/null +++ b/src/Magpie.Core/ImGuiBackend.h @@ -0,0 +1,40 @@ +#pragma once + +struct ImDrawData; + +namespace Magpie::Core { + +class ImGuiBackend { +public: + ImGuiBackend() = default; + ImGuiBackend(const ImGuiBackend&) = delete; + ImGuiBackend(ImGuiBackend&&) = delete; + + bool Initialize() noexcept; + + void NewFrame() noexcept; + void RenderDrawData(ImDrawData* drawData) noexcept; + +private: + bool _CreateDeviceObjects() noexcept; + + void _SetupRenderState(ImDrawData* drawData, ID3D11DeviceContext* ctx) noexcept; + bool _CreateFontsTexture() noexcept; + + winrt::com_ptr _vertexBuffer; + int _vertexBufferSize = 5000; + + winrt::com_ptr _indexBuffer; + int _indexBufferSize = 10000; + + winrt::com_ptr _vertexShader; + winrt::com_ptr _inputLayout; + winrt::com_ptr _vertexConstantBuffer; + winrt::com_ptr _pixelShader; + winrt::com_ptr _fontSampler; + winrt::com_ptr _fontTextureView; + winrt::com_ptr _blendState; + winrt::com_ptr _rasterizerState; +}; + +} diff --git a/src/Magpie.Core/ImGuiFontsCacheManager.cpp b/src/Magpie.Core/ImGuiFontsCacheManager.cpp new file mode 100644 index 000000000..e2f754d95 --- /dev/null +++ b/src/Magpie.Core/ImGuiFontsCacheManager.cpp @@ -0,0 +1,208 @@ +#include "pch.h" +#include "ImGuiFontsCacheManager.h" +#include +#include "YasHelper.h" +#include "Logger.h" +#include "Win32Utils.h" +#include "CommonSharedConstants.h" +#include "StrUtils.h" + +namespace yas::detail { + +// ImVector +template +struct serializer< + type_prop::not_a_fundamental, + ser_case::use_internal_serializer, + F, + ImVector +> { + template + static Archive& save(Archive& ar, const ImVector& vector) noexcept { + uint32_t size = (uint32_t)vector.size(); + ar& size; + + if constexpr (std::integral_constant::value>::value) { + ar.write(vector.Data, sizeof(T) * vector.Size()); + } else { + for (const T& e : vector) { + ar& e; + } + } + + return ar; + } + + template + static Archive& load(Archive& ar, ImVector& vector) noexcept { + uint32_t size = 0; + ar& size; + vector.resize(size); + + if constexpr (std::integral_constant::value>::value) { + ar.read(vector.Data, sizeof(T) * vector.Size()); + } else { + for (T& e : vector) { + ar& e; + } + } + + return ar; + } +}; + +// 对 ImFontAtlas 的序列化与反序列化来自 https://github.com/ocornut/imgui/issues/6169 +template +struct serializer< + type_prop::not_a_fundamental, + ser_case::use_internal_serializer, + F, + ImFontAtlas +> { + template + static Archive& save(Archive& ar, const ImFontAtlas& fontAltas) noexcept { + ar& fontAltas.Flags & fontAltas.TexUvWhitePixel& fontAltas.TexUvLines; + + // 为了方便反序列化,ImFont 两次分别序列化不同部分 + ar& fontAltas.Fonts.size(); + for (ImFont* font : fontAltas.Fonts) { + std::string_view name; + if (font->ConfigData && font->ConfigData->Name[0]) { + name = font->ConfigData->Name; + } + ar& name; + + ar& font->FontSize; + } + + ar& fontAltas.TexWidth& fontAltas.TexHeight; + ar.write(fontAltas.TexPixelsAlpha8, fontAltas.TexWidth * fontAltas.TexHeight); + + for (ImFont* font : fontAltas.Fonts) { + ar& font->Glyphs; + } + + return ar; + } + + template + static Archive& load(Archive& ar, ImFontAtlas& fontAltas) noexcept { + fontAltas.ClearTexData(); + ar& fontAltas.Flags& fontAltas.TexUvWhitePixel& fontAltas.TexUvLines; + + int size = 0; + ar& size; + for (int i = 0; i < size; ++i) { + ImFontConfig dummyConfig; + dummyConfig.FontData = IM_ALLOC(1); + dummyConfig.FontDataSize = 1; + dummyConfig.SizePixels = 1.0f; + + std::string name; + ar& name; + std::char_traits::copy(dummyConfig.Name, name.data(), name.size() + 1); + + ImFont* font = fontAltas.AddFont(&dummyConfig); + font->ConfigData = &fontAltas.ConfigData.back(); + font->ConfigDataCount = 1; + font->ContainerAtlas = &fontAltas; + + ar& font->FontSize; + } + + // TexPixelsAlpha8 应在 AddFont 后,AddGlyph前 + ar& fontAltas.TexWidth& fontAltas.TexHeight; + int totalPixels = fontAltas.TexWidth * fontAltas.TexHeight; + fontAltas.TexPixelsAlpha8 = (unsigned char*)IM_ALLOC(totalPixels); + ar.read(fontAltas.TexPixelsAlpha8, totalPixels); + + for (ImFont* font : fontAltas.Fonts) { + ImVector glyphs; + ar& glyphs; + + for (ImFontGlyph& glyph : glyphs) { + font->AddGlyph(font->ConfigData, glyph.Codepoint, glyph.X0, glyph.Y0, glyph.X1, glyph.Y1, + glyph.U0, glyph.V0, glyph.U1, glyph.V1, glyph.AdvanceX); + font->SetGlyphVisible(glyph.Codepoint, glyph.Visible); + } + + font->BuildLookupTable(); + } + + fontAltas.TexReady = true; + + return ar; + } +}; + +} + +namespace Magpie::Core { + +// 缓存版本 +// 当缓存文件结构有更改时更新它,使旧缓存失效 +static constexpr const uint32_t FONTS_CACHE_VERSION = 1; + +static std::wstring GetCacheFileName(const std::wstring_view& language) noexcept { + return StrUtils::ConcatW(CommonSharedConstants::CACHE_DIR, L"fonts_", language); +} + +void ImGuiFontsCacheManager::Save(std::wstring_view language, const ImFontAtlas& fontAltas) noexcept { + _buffer.reserve(131072); + + try { + yas::vector_ostream os(_buffer); + yas::binary_oarchive, yas::binary> oa(os); + + oa& FONTS_CACHE_VERSION& fontAltas; + } catch (...) { + Logger::Get().Error("序列化 ImFontAtlas 失败"); + return; + } + + if (!Win32Utils::DirExists(CommonSharedConstants::CACHE_DIR)) { + if (!CreateDirectory(CommonSharedConstants::CACHE_DIR, nullptr)) { + Logger::Get().Win32Error("创建 cache 文件夹失败"); + return; + } + } + + std::wstring cacheFileName = GetCacheFileName(language); + if (!Win32Utils::WriteFile(cacheFileName.c_str(), _buffer.data(), _buffer.size())) { + Logger::Get().Error("保存字体缓存失败"); + } +} + +bool ImGuiFontsCacheManager::Load(std::wstring_view language, ImFontAtlas& fontAltas) noexcept { + if (_buffer.empty()) { + std::wstring cacheFileName = GetCacheFileName(language); + if (!Win32Utils::FileExists(cacheFileName.c_str())) { + return false; + } + + if (!Win32Utils::ReadFile(cacheFileName.c_str(), _buffer) || _buffer.empty()) { + return false; + } + } + + try { + yas::mem_istream mi(_buffer.data(), _buffer.size()); + yas::binary_iarchive ia(mi); + + uint32_t cacheVersion; + ia& cacheVersion; + if (cacheVersion != FONTS_CACHE_VERSION) { + Logger::Get().Info("字体缓存版本不匹配"); + return false; + } + + ia& fontAltas; + } catch (...) { + Logger::Get().Error("反序列化失败"); + return false; + } + + return true; +} + +} diff --git a/src/Magpie.Core/ImGuiFontsCacheManager.h b/src/Magpie.Core/ImGuiFontsCacheManager.h new file mode 100644 index 000000000..0e8616e47 --- /dev/null +++ b/src/Magpie.Core/ImGuiFontsCacheManager.h @@ -0,0 +1,28 @@ +#pragma once + +struct ImFontAtlas; + +namespace Magpie::Core { + +class ImGuiFontsCacheManager { +public: + static ImGuiFontsCacheManager& Get() noexcept { + static ImGuiFontsCacheManager instance; + return instance; + } + + ImGuiFontsCacheManager(const ImGuiFontsCacheManager&) = delete; + ImGuiFontsCacheManager(ImGuiFontsCacheManager&&) = delete; + + bool Load(std::wstring_view language, ImFontAtlas& fontAltas) noexcept; + + void Save(std::wstring_view language, const ImFontAtlas& fontAltas) noexcept; + +private: + ImGuiFontsCacheManager() = default; + + // 不支持在运行时更改语言,因此我们可以缓存字体数据 + std::vector _buffer; +}; + +} diff --git a/src/Magpie.Core/ImGuiHelper.cpp b/src/Magpie.Core/ImGuiHelper.cpp new file mode 100644 index 000000000..1b971eaa9 --- /dev/null +++ b/src/Magpie.Core/ImGuiHelper.cpp @@ -0,0 +1,190 @@ +#include "pch.h" +#include "ImGuiHelper.h" + +static void UnpackAccumulativeOffsetsIntoRanges( + int base_codepoint, + const short* accumulative_offsets, + int accumulative_offsets_count, + ImWchar* out_ranges +) noexcept { + for (int n = 0; n < accumulative_offsets_count; n++, out_ranges += 2) { + out_ranges[0] = out_ranges[1] = (ImWchar)(base_codepoint + accumulative_offsets[n]); + base_codepoint += accumulative_offsets[n]; + } + out_ranges[0] = 0; +} + +const ImWchar* Magpie::Core::ImGuiHelper::GetGlyphRangesChineseSimplifiedOfficial() noexcept { + // 存储了通用规范汉字表中的一级字表(3500字)以及其他一些常用字。 + // 来自 https://zh.wiktionary.org/wiki/Appendix:%E9%80%9A%E7%94%A8%E8%A7%84%E8%8C%83%E6%B1%89%E5%AD%97%E8%A1%A8 + // 由 CJKCharacterSetForImGui 生成,它位于 tools 文件夹中。 + // (Stored as accumulative offsets from the initial unicode codepoint 0x4E00. This encoding is designed to helps us compact the source code size.) + static const short accumulative_offsets_from_0x4E00[] = + { + 0,1,2,4,1,1,1,1,2,1,2,1,2,1,2,2,1,1,1,1,1,5,2,1,2,3,1,2,3,2,2,4,1,1,1,2,1,5,2,3,1,2,1,1,1,1,1,2,1,1,2,2,1,4,1,1,1,1,5,10,1,2,11,8,2,1,2,1,2, + 1,2,1,2,1,5,1,6,1,2,1,1,1,2,2,1,1,1,4,8,5,1,1,4,1,1,3,1,2,1,3,2,1,2,1,1,1,10,1,1,5,2,4,2,4,1,4,2,2,2,9,3,2,1,1,6,1,1,1,4,1,1,4,2,4,5,1,4,2,2, + 2,2,7,3,7,1,1,1,1,2,2,4,2,1,4,3,6,10,12,5,4,3,2,14,2,3,3,2,1,1,1,6,1,6,10,4,1,6,5,1,7,1,5,4,8,4,1,1,2,9,19,5,2,4,1,1,5,2,5,20,2,2,9,7,1,11,2, + 9,17,1,8,6,8,27,4,6,9,20,11,13,14,6,23,45,2,2,1,1,1,2,1,2,2,4,3,6,2,6,3,3,3,1,1,3,1,2,1,1,1,1,1,1,2,1,1,3,5,3,1,3,1,5,5,2,2,1,4,4,1,7,3,1,2, + 1,2,1,1,4,5,4,2,3,3,3,2,10,2,3,1,3,9,2,1,3,3,2,1,1,1,2,2,1,1,2,3,1,3,7,1,5,1,1,1,1,2,3,4,4,1,2,3,2,6,1,1,1,1,1,7,1,7,3,4,3,2,15,2,2,1,5,3,13, + 9,19,2,1,1,1,1,2,5,1,1,1,6,1,1,12,4,4,2,2,7,6,7,5,22,4,1,1,5,1,2,13,1,1,2,7,3,7,15,1,1,3,1,2,2,4,1,1,1,4,1,2,1,1,2,1,1,3,2,4,1,1,2,2,1,4,4,1, + 1,2,1,1,2,1,7,3,3,1,3,2,1,9,3,2,5,3,4,2,19,4,2,1,6,1,1,1,1,1,4,3,2,1,1,1,2,5,3,1,1,1,2,1,1,1,1,1,1,1,1,2,1,3,1,1,1,3,1,4,2,1,2,2,1,1,2,1,1,1, + 1,1,2,2,2,4,2,1,1,1,6,1,1,1,2,1,1,1,1,2,3,1,3,1,2,1,2,2,6,2,2,6,5,3,3,1,6,6,11,2,6,1,1,7,7,1,3,1,2,3,1,3,14,1,2,2,5,2,5,5,3,1,3,2,1,1,1,4,1, + 3,6,8,6,1,2,1,1,3,1,4,8,2,5,5,1,2,7,16,4,3,5,2,1,2,13,5,1,2,4,23,3,1,1,4,6,8,4,6,2,3,2,1,14,4,1,10,12,4,4,10,14,9,5,5,23,3,9,18,22,1,2,2,3,2, + 4,41,1,1,36,21,20,5,14,16,1,3,2,2,2,9,3,1,3,6,3,1,5,3,2,23,4,5,8,10,4,2,7,3,4,1,1,1,6,3,1,2,2,1,1,3,2,4,5,8,11,1,1,7,7,9,7,4,5,3,20,1,8,3,17, + 1,25,1,8,4,15,12,3,6,6,5,23,5,3,4,6,13,24,2,14,6,5,1,9,1,24,35,7,1,2,2,3,3,3,11,3,6,2,6,1,4,2,3,8,2,1,1,2,1,1,2,3,3,1,1,1,13,1,1,2,4,2,3,2,1, + 9,2,3,14,1,2,2,1,4,5,1,1,2,1,1,10,1,3,3,12,3,6,11,2,11,4,1,5,1,2,1,6,2,9,3,19,4,2,2,1,3,17,4,3,6,12,5,16,3,17,16,10,2,9,19,8,25,14,1,7,3,29, + 103,4,1,2,1,1,4,2,4,1,2,3,12,8,4,2,2,2,1,1,2,1,3,8,1,1,1,1,1,1,1,2,1,1,1,1,2,4,1,5,3,1,1,1,3,4,1,1,3,2,1,1,1,5,6,1,10,1,1,2,4,3,16,1,1,1,1,3, + 2,3,2,3,1,5,2,3,2,2,2,3,7,3,5,5,3,4,2,2,1,1,1,1,1,1,3,3,1,1,1,3,1,2,4,9,2,1,4,10,2,8,6,2,1,18,2,1,4,14,4,6,5,41,5,2,5,3,11,12,7,6,2,5,14,4, + 31,121,8,16,1,3,1,3,1,1,1,1,2,3,3,1,1,1,3,7,3,1,1,2,1,2,4,4,5,1,2,2,2,1,9,1,6,1,9,1,5,8,7,9,13,16,1,1,2,2,3,1,1,2,5,2,1,3,5,1,3,1,1,2,2,1,2, + 2,1,7,1,6,8,1,1,1,17,1,9,35,1,3,6,2,1,1,6,5,4,2,2,1,3,4,1,5,1,1,8,2,8,1,24,1,2,13,2,5,1,2,1,3,1,8,2,1,4,1,3,1,3,2,1,5,2,5,1,1,8,9,4,9,6,6,2, + 1,6,1,10,1,1,7,7,4,6,4,8,2,1,1,13,4,2,1,1,6,1,3,1,4,2,1,2,5,12,8,8,2,3,2,3,13,2,4,1,3,1,2,1,3,3,6,8,5,4,7,2,9,1,3,3,2,4,3,3,2,8,9,5,1,6,4,7, + 3,1,6,1,1,1,2,2,2,1,3,3,3,8,7,1,6,6,5,5,4,1,3,24,9,4,2,7,13,5,1,8,7,20,3,6,20,22,4,6,2,8,20,34,2,2,2,1,1,1,1,4,2,2,16,9,1,3,8,1,1,6,4,2,1,3, + 1,1,1,4,3,8,4,2,2,1,1,1,1,1,3,3,3,3,2,1,1,4,6,7,1,1,2,1,1,1,2,1,5,1,1,2,1,6,1,5,4,4,3,1,5,2,1,1,1,2,3,1,1,2,2,1,1,2,1,1,1,2,1,3,3,1,2,1,1,1, + 1,3,1,2,2,2,1,3,5,1,1,1,2,1,5,2,5,3,5,4,5,1,1,2,1,1,3,2,1,4,11,3,5,3,1,3,3,1,1,1,1,5,9,1,2,1,1,4,7,8,1,3,1,5,2,6,1,3,3,1,2,6,8,2,3,2,1,1,1,6, + 7,3,15,4,2,1,2,15,2,6,1,3,7,9,3,1,1,3,10,4,1,8,2,14,1,13,10,2,1,3,10,4,15,2,15,1,14,10,1,3,9,6,5,3,1,1,2,5,7,6,3,8,1,4,20,26,47,7,3,2,3,1,6, + 3,4,3,2,8,2,3,4,1,3,6,4,2,2,3,16,4,1,5,6,2,3,3,5,1,2,2,4,2,1,9,4,4,4,6,4,8,9,2,3,1,1,1,1,3,1,4,5,1,3,8,4,6,2,1,4,1,11,1,5,2,1,5,2,13,2,5,8,1, + 6,1,2,5,12,2,6,1,1,4,2,4,4,4,5,10,5,1,23,6,37,4,19,2,2,5,3,2,1,1,8,1,2,2,10,4,2,2,7,2,2,1,1,3,2,3,1,5,3,3,2,1,3,3,5,1,1,1,5,6,3,1,1,4,3,5,2, + 1,14,1,2,3,5,7,5,2,3,2,1,5,1,7,1,4,7,14,11,1,1,1,1,1,8,4,5,7,5,2,1,11,6,2,1,3,4,2,2,3,1,10,9,13,1,1,3,1,5,1,3,2,4,4,1,3,15,2,1,2,1,13,11,4,1, + 17,11,4,1,1,5,2,1,3,13,9,2,2,5,3,3,2,6,14,3,4,5,11,8,1,4,27,3,15,27,29,5,6,2,2,14,1,7,12,12,28,11,34,34,2,9,7,19,20,1,8,16,15,16,28,116,1,1, + 1,4,11,8,4,9,2,3,22,1,1,1,1,1,3,15,2,1,7,6,1,1,11,30,1,2,8,2,4,8,2,3,2,1,4,2,6,10,4,32,2,2,1,7,7,5,1,6,1,5,4,9,1,5,2,14,4,2,1,1,1,1,3,6,6,9, + 4,4,2,5,1,7,9,2,4,2,4,1,1,3,1,3,5,5,1,2,1,1,1,1,5,5,1,2,9,6,3,3,1,1,2,3,2,6,3,2,6,1,1,4,10,7,5,4,3,7,5,8,9,1,1,1,3,4,1,1,3,1,3,1,2,2,6,13,3, + 1,4,6,3,1,10,6,1,3,9,6,2,4,2,1,2,1,1,1,5,1,3,3,11,6,5,1,5,7,9,3,7,3,3,2,4,2,2,10,5,6,4,3,6,3,1,2,1,5,6,3,2,4,2,9,19,2,38,1,4,2,4,7,12,6,8,5, + 7,4,13,4,6,3,6,4,3,3,1,3,1,11,14,4,9,4,1,12,11,6,13,9,17,4,17,1,22,4,6,14,5,18,13,18,63,59,31,2,2,1,5,1,2,4,2,1,10,1,4,4,3,22,1,1,1,10,1,3,5, + 1,6,16,1,2,4,5,2,1,4,2,12,16,1,11,5,12,10,6,22,2,16,6,3,7,15,7,6,5,5,5,6,13,23,18,16,33,36,2,5,4,1,1,1,1,4,10,1,4,13,2,6,1,5,2,9,3,4,1,6,1, + 43,3,7,3,9,6,8,7,7,2,1,11,1,1,2,1,7,4,18,8,5,1,13,1,1,1,2,6,10,1,69,3,2,2,11,5,14,2,4,1,2,5,4,15,3,19,13,22,2,1,3,7,18,17,1,8,29,5,1,17,19, + 36,53,6,1,1,2,16,1,33,2,2,3,6,3,1,2,5,1,1,1,2,2,1,3,10,7,3,5,5,3,9,5,10,4,14,9,2,6,2,1,5,5,7,3,1,3,7,3,2,7,2,3,8,3,3,3,7,8,6,4,5,38,5,2,3,1, + 1,13,6,14,23,21,3,2,1,4,2,2,1,12,3,24,3,14,7,2,2,5,1,1,1,2,2,1,1,3,4,15,1,3,2,4,1,3,2,3,8,2,20,1,8,7,7,1,5,4,1,15,11,6,2,3,5,1,13,11,1,9,4, + 21,2,1,2,1,6,8,28,4,7,1,2,4,2,2,1,5,11,1,2,1,10,1,7,2,4,22,4,4,6,2,5,16,8,14,1,2,30,1,1,3,6,1,7,8,9,1,2,11,3,4,5,11,19,15,10,57,2,2,2,8,14,7, + 1,1,1,5,3,5,10,1,8,1,3,1,10,42,2,2,1,2,1,2,3,3,2,2,4,1,6,7,5,2,1,2,6,1,8,3,2,3,11,2,1,12,6,19,8,1,1,2,7,17,29,2,1,3,5,2,2,1,9,4,1,4,1,1,4,1, + 2,6,26,12,11,3,5,1,1,3,2,8,2,10,6,12,6,3,5,2,11,2,4,16,13,2,4,1,1,1,2,2,5,2,26,2,5,2,21,2,10,8,2,2,4,22,12,14,13,3,6,16,32,17,7,14,38,8,2,12, + 9,5,1,7,5,1,5,4,3,8,5,12,11,1,3,6,1,15,12,15,22,2,5,4,4,63,211,95,2,2,2,1,3,1,1,3,2,1,1,2,2,1,1,1,3,2,4,1,1,1,1,1,2,3,1,1,2,1,1,2,3,1,1,2,1, + 1,1,3,1,4,2,1,3,3,3,1,1,2,1,4,1,2,1,5,1,3,5,5,1,1,1,2,2,3,3,4,5,4,3,4,4,2,22,1,4,2,3,8,7,1,4,4,24,4,6,6,4,3,3,21,4,4,4,15,4,8,9,7,11,1,4,1,2, + 2,7,1,3,5,2,1,1,26,5,3,2,2,3,4,4,1,1,8,4,2,16,25,3,1,2,2,1,10,2,2,1,2,3,1,1,2,1,4,1,4,1,3,2,6,4,1,1,1,2,3,6,2,8,4,2,2,3,6,9,3,3,2,5,5,4,3,1, + 5,1,1,2,3,4,21,2,7,6,12,1,5,4,1,16,9,2,9,1,1,3,1,1,10,5,12,1,1,11,24,2,8,25,7,3,6,1,8,4,5,1,6,1,5,2,10,1,11,2,4,1,4,1,3,14,17,23,1,2,1,7,4,4, + 9,7,7,3,1,8,1,6,1,2,2,2,6,4,10,6,2,2,3,3,4,3,1,6,1,11,8,8,1,1,1,1,4,5,25,4,1,9,1,2,14,3,7,2,2,13,2,1,3,4,4,8,18,34,6,1,5,2,1,3,10,3,2,16,4,9, + 8,1,18,8,1,1,15,7,1,2,1,21,26,4,6,2,8,1,5,4,13,9,14,3,22,6,12,5,20,15,37,2,4,3,7,11,16,1,12,1,42,10,6,3,20,15,5,26,6,1,5,15,23,22,61,1,1,1,9, + 17,14,4,1,2,1,1,8,2,7,2,14,1,6,5,17,7,4,14,2,9,10,16,2,2,6,10,1,2,2,1,4,5,26,12,2,3,2,9,2,7,20,2,13,10,45,6,6,5,46,28,13,30,5,7,1,7,3,2,8,2, + 2,3,3,1,4,7,10,3,7,2,9,6,15,2,4,16,1,3,16,4,11,15,5,1,9,14,2,19,5,53,32,2,5,59,1,2,1,1,2,1,9,17,3,26,137,1,9,211,6,53,1,2,1,3,1,4,1,1,1,2,1, + 3,2,1,1,2,1,1,1,1,1,3,2,2,1,1,3,4,4,2,3,3,1,3,1,3,1,5,1,1,2,2,1,2,1,2,1,2,1,2,1,3,2,2,1,2,2,1,2,1,2,3,7,2,6,1,1,2,2,4,1,4,3,3,10,5,6,14,7,9, + 1,14,1,18,145,1,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,3,1,2,1,1,1,1,1,2,1,2,1,1,1,7,1,2,1,1,4,2,2,2,1,3,2,1,1,1,1,2,5,5,4,1,1,1,10,4,5,1,20,4, + 16,1,10,5,8,1,5,10,2,2,5,4,2,5,4,2,2,3,16,5,19,7,1,7,17,2,4,3,27,6,1,5,15,14,28,1,3,3,8,172,1,1,1,3,2,1,1,4,7,2,2,4,2,1,2,1,7,1,2,3,1,2,2,1, + 2,1,4,5,1,2,5,1,8,4,1,3,1,2,1,2,1,6,2,1,3,4,1,2,1,1,1,1,3,8,1,2,3,7,2,4,3,1,1,1,3,3,1,5,1,2,2,3,3,1,2,1,1,1,2,12,10,3,1,2,4,5,6,2,4,4,12,2,8, + 1,7,10,1,17,2,16,3,4,4,13,6,9,4,3,23,5,16,5,23,48,3,1,5,5,6,6,2,5,2,5,4,2,1,7,8,2,2,7,53,3,2,1,1,1,2,163,532,1,10,6,4,1,1,1,2,3,1,1,2,5,3,2, + 8,3,3,2,2,11,11,1,1,16,5,4,2,2,4,2,1,3,1,3,3,1,4,9,1,7,2,1,1,1,8,1,1,9,2,5,7,9,1,11,20,6,9,105,2,3,1,1,1,2,2,3,1,1,1,1,2,3,1,4,9,2,4,11,19,1, + 1,1,1,5,4,5,1,1,2,3,1,1,3,5,12,1,2,4,1,1,11,1,1,14,1,5,4,1,4,4,1,3,11,15,8,2,1,3,1,1,1,5,3,6,19,2,9,3,1,2,5,2,4,3,2,4,1,14,2,20,6,1,25,4,3,5, + 2,1,1,7,11,2,21,2,19,13,58,2,4,6,2,128,1,1,2,1,1,2,1,1,2,1,2,1,1,1,1,2,7,2,3,1,1,4,1,3,4,42,10,6,1,49,85,8,1,2,1,1,3,1,4,2,3,2,4,1,2,3,2,2,3, + 4,3,15,196,2,1,1,1,2,1,2,3,2,1,2,4,2,2,1,5,3,2,6,3,7,3,4,43,5,59,41,5,1,2,1,10,5,296,5,12,15,8,4,3,13,12,9,9,8,321,2,2,2,1,7,2,4,2,8,2,4,2,4, + 1,5,21,2,10,15,39,21,9,10,3,3,4,23,31,5,13,27,21,47,5,21,6 + }; + static ImWchar base_ranges[] = // not zero-terminated + { + 0x0020, 0x00FF, // Basic Latin + Latin Supplement + 0x2000, 0x206F, // General Punctuation + 0x3000, 0x30FF, // CJK Symbols and Punctuations, Hiragana, Katakana + 0x31F0, 0x31FF, // Katakana Phonetic Extensions + 0xFF00, 0xFFEF // Half-width characters + }; + static ImWchar full_ranges[IM_ARRAYSIZE(base_ranges) + IM_ARRAYSIZE(accumulative_offsets_from_0x4E00) * 2 + 1] = { 0 }; + if (!full_ranges[0]) { + memcpy(full_ranges, base_ranges, sizeof(base_ranges)); + UnpackAccumulativeOffsetsIntoRanges(0x4E00, accumulative_offsets_from_0x4E00, IM_ARRAYSIZE(accumulative_offsets_from_0x4E00), full_ranges + IM_ARRAYSIZE(base_ranges)); + } + return &full_ranges[0]; +} + +// 来自 https://github.com/flyinghead/flycast/blob/541544292a3d051839672ffa7bd4524a3e1c1c51/core/rend/gui_util.cpp#L523 +const ImWchar* Magpie::Core::ImGuiHelper::GetGlyphRangesChineseTraditionalOfficial() noexcept { + // Store all official characters for Traditional Chinese. + // Sourced from https://https://en.wikipedia.org/wiki/List_of_Graphemes_of_Commonly-Used_Chinese_Characters + // (Stored as accumulative offsets from the initial unicode codepoint 0x4E00. This encoding is designed to helps us compact the source code size.) + static const short accumulative_offsets_from_0x4E00[] = + { + 0,1,2,5,1,1,1,2,3,1,3,1,1,2,1,5,1,3,4,5,2,5,6,1,2,8,2,6,2,1,1,3,1,3,2,1,4,1,1,10,10,11,4,4,2,3,1,2,3,1,2,1,4,2,3,1,2,3,1,1,2,3,1,1,1,12,6,1, + 2,1,2,1,3,1,2,7,1,1,1,1,1,5,1,4,1,1,11,2,1,3,5,1,1,2,2,8,1,3,2,1,1,4,4,22,1,4,2,2,2,2,1,6,3,1,1,5,1,1,1,1,3,1,2,2,2,1,1,1,2,3,6,3,3,1,3,2,6, + 4,4,3,2,3,2,2,7,5,2,17,1,6,1,9,3,1,1,6,4,1,1,1,6,1,2,3,1,1,1,1,13,1,4,5,2,4,2,6,3,2,1,1,2,2,1,2,2,2,1,1,5,2,2,2,1,2,1,1,1,2,9,6,7,4,2,3,2,1, + 6,5,2,9,15,1,1,1,6,3,5,8,4,5,2,1,9,10,1,5,1,2,2,4,3,7,10,2,4,1,4,9,2,2,7,3,4,4,2,5,2,2,4,1,7,2,2,4,7,2,9,8,5,5,4,1,2,1,1,1,1,1,1,2,1,1,4,1,2, + 1,2,5,6,3,2,1,1,2,1,1,1,3,4,1,1,1,4,4,9,1,3,4,1,3,11,2,2,1,7,4,6,1,6,9,5,1,1,15,1,3,1,15,1,2,3,2,1,1,3,3,1,2,3,1,1,2,4,3,1,5,6,7,4,1,1,1,3,2, + 6,1,1,2,1,6,2,4,3,1,1,1,1,6,2,5,1,1,12,1,5,3,2,1,2,6,2,4,4,1,1,4,4,10,4,1,3,6,1,1,1,18,3,2,4,2,9,3,2,1,1,2,2,1,1,3,2,15,2,3,3,3,1,1,5,1,2,5, + 2,1,5,1,1,2,4,3,1,2,7,5,2,8,5,1,1,1,2,2,2,1,1,1,7,1,1,1,3,3,2,2,2,1,5,1,5,2,2,1,2,2,2,1,1,2,4,5,22,5,6,8,5,9,8,5,1,1,1,2,7,2,1,2,2,4,3,1,1,1, + 1,2,1,1,1,1,1,1,1,1,1,1,1,2,3,4,5,2,1,2,2,1,2,1,1,1,1,1,1,1,9,2,1,1,1,6,1,2,2,2,1,3,2,2,3,1,2,1,2,2,2,1,3,2,2,4,2,18,6,9,2,2,1,1,1,3,1,1,3,4, + 2,5,1,2,2,2,3,1,4,12,1,1,1,1,1,1,3,2,2,5,3,2,3,1,1,2,3,1,1,5,19,4,1,2,1,1,3,1,4,8,2,5,5,1,2,7,4,19,5,2,1,2,2,4,1,6,3,2,1,2,4,5,6,1,6,2,1,2,2, + 1,1,1,4,17,1,2,2,2,1,1,3,2,1,1,9,4,2,2,1,2,8,3,2,2,3,1,1,8,10,1,1,2,5,3,2,7,2,3,2,2,1,1,3,7,8,6,1,2,6,2,1,4,7,2,1,4,4,10,5,1,1,2,2,2,3,2,4, + 15,5,4,2,9,2,1,3,1,1,1,1,3,2,3,4,1,7,5,2,7,1,6,16,3,6,6,2,1,5,4,1,1,6,1,1,2,5,9,1,3,2,4,6,6,1,9,9,5,3,2,5,1,3,2,7,9,4,1,2,1,3,11,2,7,3,3,1, + 1,1,1,16,3,2,3,1,13,5,6,1,8,21,2,1,11,21,8,3,6,11,1,23,2,1,8,3,1,27,3,6,5,1,1,3,1,21,2,5,3,3,1,2,4,4,9,10,4,5,2,3,2,5,9,9,2,1,9,6,5,11,3,4, + 2,10,2,2,3,1,5,1,5,5,2,1,3,10,1,3,18,6,1,3,1,2,3,3,2,1,2,2,1,1,2,1,3,6,1,6,6,3,1,1,5,1,1,1,3,1,2,1,2,6,2,5,2,1,4,5,1,2,2,1,4,5,1,1,6,3,5,1,3, + 3,4,1,5,1,1,3,6,1,4,6,2,3,8,4,1,2,3,1,1,1,1,4,2,2,7,2,1,2,2,2,15,4,2,2,9,5,2,5,3,1,3,1,3,2,4,13,6,4,1,5,3,1,16,8,4,4,13,8,19,8,1,15,8,1,10,1, + 3,1,7,3,10,1,1,9,2,3,3,12,5,1,14,2,1,17,7,6,6,4,4,8,16,1,2,1,1,2,1,2,2,1,2,4,1,5,2,5,1,2,2,3,2,3,2,3,1,2,2,1,1,2,1,3,8,1,1,1,1,1,6,1,1,1,1,8, + 2,3,1,1,1,3,6,3,2,1,1,1,5,4,2,1,11,1,2,1,2,1,1,1,1,1,2,1,2,7,1,2,2,7,3,1,2,1,1,1,1,2,1,1,2,5,4,10,8,5,6,1,1,1,2,2,1,1,1,4,1,2,3,2,1,1,1,1,4, + 7,4,2,2,1,7,3,2,8,19,4,1,3,13,8,2,6,2,4,1,33,15,1,1,3,2,1,6,5,2,9,1,7,3,3,2,1,2,2,7,7,35,4,25,27,3,22,29,2,1,16,5,2,2,7,1,3,1,3,1,1,1,3,3,3, + 1,1,1,3,6,1,4,1,3,6,5,4,1,2,2,1,2,8,6,2,6,3,1,1,5,3,5,7,8,1,2,4,4,3,1,8,7,1,1,2,2,1,1,1,1,1,1,9,8,6,1,1,3,2,4,6,1,4,2,8,1,1,1,6,3,1,6,1,1,9, + 3,4,3,2,1,2,1,9,7,3,1,3,5,2,3,4,2,5,2,2,1,1,2,1,3,4,7,1,2,4,4,4,2,5,10,2,4,2,9,2,4,5,2,1,4,1,1,1,1,3,1,6,2,3,3,1,1,4,2,1,2,1,1,4,1,1,5,1,1,5, + 3,7,1,1,2,2,7,2,2,4,6,2,7,1,9,1,1,1,4,3,11,6,4,7,1,2,15,1,3,2,1,1,7,3,1,4,2,1,2,5,2,4,14,3,7,3,5,4,9,1,1,1,3,1,1,1,1,1,2,1,15,5,1,4,3,1,7, + 2,4,5,1,3,3,2,10,2,1,1,2,3,1,1,8,1,5,6,4,3,2,2,3,1,1,5,10,1,2,1,2,3,1,1,5,1,7,1,5,1,6,5,1,1,3,1,3,4,13,10,1,5,3,1,3,1,1,1,7,3,2,3,2,3,1,4,1, + 3,3,2,3,3,1,5,2,4,8,4,2,1,3,6,10,3,2,2,1,6,4,8,4,5,1,1,1,3,22,3,12,3,1,1,1,2,2,3,1,8,2,2,1,1,2,1,1,3,1,4,1,4,2,1,8,4,2,2,1,1,3,7,1,1,1,6,1,1, + 2,2,1,4,1,1,4,3,8,10,1,1,4,3,3,3,2,1,1,4,5,1,7,1,1,2,1,1,1,16,3,1,5,4,4,3,1,2,3,2,2,1,1,1,2,1,1,3,2,1,1,2,1,1,1,1,1,1,16,1,1,1,2,2,1,3,5,1,1, + 1,2,6,1,1,8,5,18,2,1,4,11,3,1,4,4,3,5,1,1,5,18,1,1,2,6,1,4,1,4,5,3,1,4,1,3,3,3,4,2,1,2,5,1,1,1,2,2,1,1,1,2,20,6,3,4,2,1,2,4,4,1,6,2,6,1,3,1, + 6,25,2,4,1,2,1,5,2,12,2,1,9,4,3,1,19,1,2,4,7,8,2,6,9,1,1,13,1,8,2,1,2,5,4,6,1,4,2,1,1,1,2,2,9,1,3,2,1,2,1,2,3,1,2,1,4,2,4,2,6,1,5,1,5,4,1,1, + 2,6,1,3,2,6,9,5,4,1,3,1,6,1,6,1,1,3,7,3,2,3,1,6,3,5,2,2,5,1,1,1,4,1,4,1,3,6,6,2,1,2,1,10,1,4,9,1,6,2,3,3,2,3,1,2,4,1,3,1,7,2,3,1,4,4,6,1,2,1, + 7,1,11,3,1,2,1,3,1,4,9,8,4,1,5,2,1,4,1,11,1,5,2,1,5,2,19,1,6,3,3,11,3,7,2,2,6,1,1,4,2,4,13,1,1,8,5,1,11,6,3,3,6,5,13,3,13,3,4,3,6,6,4,2,1,1, + 3,1,1,3,2,1,1,3,5,1,2,2,7,1,2,4,2,2,7,1,1,2,1,1,1,1,3,3,1,8,9,3,5,1,1,1,5,6,2,1,14,2,1,1,2,2,2,7,1,8,2,6,1,5,2,5,1,18,2,5,2,12,11,1,1,2,1,6, + 2,9,4,3,3,2,2,1,1,5,5,20,4,10,8,1,14,1,3,1,3,2,1,2,1,2,4,4,1,2,1,34,9,2,2,2,1,13,4,6,2,3,5,1,5,2,1,3,2,15,5,2,2,5,3,2,1,2,5,1,3,4,7,4,3,4,1, + 11,8,1,4,30,26,9,1,6,4,2,8,3,2,8,2,2,9,4,14,1,6,1,10,2,5,1,6,3,4,6,10,1,1,3,3,21,11,10,4,1,8,7,2,6,5,1,2,2,18,3,1,1,3,7,3,4,13,1,6,2,1,9,21, + 4,16,8,6,10,3,4,5,3,7,1,5,11,7,1,2,8,22,9,6,12,10,1,2,15,8,3,1,9,2,3,2,2,15,2,1,1,1,1,1,1,3,8,5,1,1,2,4,4,3,3,1,12,2,12,10,1,3,3,2,3,2,3,1,5, + 1,5,2,2,3,1,1,3,4,16,4,14,13,5,1,1,2,3,5,4,4,1,2,1,1,3,3,1,3,2,4,6,2,1,1,14,5,2,6,1,1,1,1,7,2,6,2,8,3,1,3,2,2,3,4,4,2,4,3,2,16,3,3,2,5,1,2, + 1,1,1,1,2,3,1,4,1,2,7,2,2,4,6,1,1,2,3,8,1,2,24,12,4,3,7,4,1,7,1,4,5,2,1,3,24,1,12,3,1,2,2,6,3,1,15,1,1,3,2,5,2,25,1,3,3,3,4,8,1,1,1,4,1,5,1, + 6,1,1,4,3,3,4,2,1,3,3,2,2,2,2,1,1,10,20,1,1,2,3,1,2,1,1,1,3,3,1,2,3,2,4,2,4,5,7,3,7,2,2,3,12,7,1,26,7,6,2,4,1,5,3,5,5,7,4,2,6,2,1,2,4,1,5,2, + 1,24,3,3,2,10,1,2,1,4,9,4,1,1,11,2,1,6,1,1,1,5,1,1,5,6,1,18,3,4,3,9,2,7,1,2,8,2,2,4,4,2,2,6,9,10,3,3,10,2,6,7,1,1,1,21,3,4,1,1,3,7,1,3,2,9,4, + 8,3,2,4,2,4,5,1,2,2,9,8,14,14,5,7,11,8,5,6,2,4,1,13,4,4,3,4,18,1,1,1,1,4,5,2,14,2,5,9,1,6,5,21,4,12,1,15,1,7,5,10,6,19,3,2,11,3,2,6,1,1,1, + 1,1,3,2,15,7,6,10,5,6,9,4,5,8,5,5,1,4,1,5,2,2,3,4,3,3,6,1,1,5,1,1,6,7,11,7,3,11,13,2,2,1,3,2,3,1,2,1,1,1,2,1,2,6,1,1,4,6,4,2,1,2,2,2,1,1,1,5, + 2,6,3,2,5,4,3,1,3,14,4,5,7,5,5,3,17,2,2,10,2,7,2,7,1,8,14,1,1,3,1,25,3,2,1,9,4,11,2,1,7,1,5,1,9,2,7,17,8,2,3,2,1,2,1,5,4,3,2,2,11,9,10,2,7,1, + 4,4,5,10,3,19,13,1,16,5,2,1,3,1,24,3,1,2,2,9,1,2,4,5,2,20,4,1,1,1,2,1,4,1,5,1,5,1,19,8,17,7,3,1,9,13,13,5,13,4,2,1,3,16,1,13,8,9,3,2,2,3,3,3, + 1,2,1,2,1,1,3,1,1,1,4,1,20,3,5,5,1,2,1,5,3,1,1,3,1,5,6,2,14,1,3,1,3,1,2,8,1,3,7,1,6,7,1,2,4,3,1,1,7,2,3,10,1,3,1,2,3,4,1,13,1,2,1,6,5,1,1,8, + 2,2,3,3,12,1,1,3,3,2,11,4,10,4,6,6,4,2,9,1,3,4,3,2,3,1,6,3,1,1,1,4,2,2,1,1,6,5,3,4,20,2,4,6,5,3,1,3,2,2,3,1,4,2,7,1,2,1,2,2,1,1,2,3,4,1,3,2, + 4,1,3,2,3,8,2,20,1,8,1,13,1,1,2,2,2,2,15,12,1,2,3,2,2,1,2,3,1,13,4,1,1,5,3,5,2,3,13,1,1,5,2,3,2,1,3,3,8,2,2,8,12,4,3,1,1,6,1,2,4,4,1,1,4,10, + 5,10,1,7,9,8,2,6,3,2,2,4,11,26,14,1,3,13,13,3,2,3,6,1,7,8,2,7,3,7,2,2,3,4,5,1,5,5,5,14,4,3,5,3,3,7,13,11,13,4,1,1,14,4,2,1,5,1,1,1,7,5,1,1,3, + 3,1,1,1,5,3,5,13,5,2,12,1,1,23,1,3,4,3,9,3,1,1,1,1,8,2,6,1,13,4,1,2,1,2,18,5,5,3,3,2,1,6,6,2,3,1,14,3,6,4,1,1,2,1,7,5,1,1,9,10,1,7,2,9,2,3, + 1,5,2,3,4,10,3,1,1,1,7,1,9,1,4,1,11,10,1,2,1,2,1,14,6,1,3,2,8,2,7,3,1,3,2,7,11,8,2,3,2,6,2,4,26,3,2,2,2,1,2,2,2,10,21,2,20,4,5,1,2,6,4,12,4, + 3,12,1,3,2,1,2,16,8,3,5,14,7,9,6,17,3,2,4,3,1,12,1,5,1,1,4,9,1,18,1,4,8,2,4,1,22,9,3,2,8,6,12,2,2,5,3,1,11,1,11,1,3,3,2,1,1,2,5,8,12,3,2,2, + 2,1,2,2,1,1,2,3,4,1,2,2,1,1,1,4,1,1,9,3,1,1,3,6,3,4,2,1,1,2,10,5,9,3,1,4,2,6,3,1,7,8,8,6,2,2,9,2,2,2,9,2,3,1,1,2,1,3,2,1,2,1,1,8,3,8,6,2,3,1, + 2,3,5,1,3,3,5,7,13,9,4,6,8,8,3,3,1,5,6,1,3,2,2,1,14,1,5,4,3,8,1,1,2,6,6,3,1,5,10,3,4,3,6,154,2,2,3,7,4,8,4,1,10,10,1,4,2,2,3,2,2,12,3,2,2,2, + 6,6,4,5,1,4,1,6,3,4,2,1,4,2,2,4,7,2,4,2,1,10,1,1,8,7,1,2,6,1,1,1,4,1,2,2,1,6,1,3,2,3,2,1,1,3,23,3,7,2,7,4,12,2,2,4,17,1,1,1,1,3,1,6,1,1,5,1, + 1,1,2,2,1,7,3,2,2,1,2,4,1,3,4,1,1,4,2,1,2,6,9,9,2,8,4,1,3,3,3,1,8,3,1,2,1,4,5,5,3,1,2,2,12,13,6,2,4,2,8,5,8,5,3,2,1,3,1,16,1,5,3,2,1,2,5,1,1, + 5,1,8,2,5,11,1,1,1,3,8,1,10,7,3,1,1,1,2,1,3,3,4,2,9,2,5,4,2,2,1,2,3,6,1,6,1,1,2,2,2,3,2,1,1,1,2,1,3,2,2,7,1,2,1,3,6,2,1,1,9,1,1,2,14,17,1, + 13,8,1,2,1,4,8,13,2,5,7,4,2,6,7,1,4,2,6,2,2,20,1,1,1,3,4,3,1,4,2,1,1,10,16,1,1,1,1,4,14,20,6,1,2,1,1,2,1,7,3,6,1,5,1,2,2,35,1,3,1,13,1,4,4, + 1,3,2,6,2,2,9,18,4,4,5,2,16,4,9,6,1,1,1,2,4,5,6,1,6,1,1,1,1,30,5,4,3,4,1,12,14,4,6,2,3,3,2,1,5,4,2,11,14,9,3,2,20,6,4,3,1,4,2,3,2,6,2,25,2, + 35,2,1,3,3,7,2,9,1,16,6,6,1,15,15,1,1,3,17,6,3,10,3,8,10,3,1,5,1,6,19,4,2,6,8,7,1,4,1,15,1,1,11,1,3,1,6,9,19,11,7,15,2,4,1,6,1,2,1,8,4,4,2,8, + 17,2,7,16,1,5,1,5,2,5,10,6,1,4,9,5,2,4,5,9,12,2,3,2,2,1,4,1,11,2,3,2,2,6,3,15,7,4,13,9,2,2,7,6,2,12,2,2,10,1,10,17,1,3,9,8,7,1,5,6,2,3,6,1, + 26,3,3,9,10,8,2,5,1,10,2,1,1,14,5,3,2,5,12,1,6,4,2,2,2,4,1,1,5,3,5,7,10,1,6,3,2,5,4,8,13,6,16,1,8,5,5,1,3,1,3,1,2,2,7,11,1,4,1,4,5,5,9,2,1,7, + 5,6,5,1,7,6,3,8,1,18,9,1,4,6,5,3,13,2,2,3,5,4,4,3,16,4,2,6,8,3,3,18,17,17,4,8,2,1,5,2,2,2,1,4,2,1,1,1,3,2,2,4,2,5,3,4,3,6,1,6,5,10,4,1,6,3, + 2,2,1,3,3,2,1,1,1,2,1,1,1,6,3,9,2,5,1,4,2,2,3,6,2,1,2,1,1,1,2,2,2,4,2,10,3,3,2,3,2,2,2,5,4,6,10,1,4,2,1,3,6,1,2,4,2,1,1,2,6,4,9,2,2,2,3,8,4, + 13,8,8,3,2,1,9,2,2,10,6,3,1,4,3,6,3,10,8,1,3,4,157,8,2,5,2,1,3,2,2,4,1,5,7,1,8,1,1,13,1,8,7,3,1,6,10,1,2,1,1,5,1,1,1,1,1,3,3,1,1,2,1,1,3,1, + 1,2,1,1,1,1,1,1,2,1,2,7,1,1,9,2,2,2,1,1,2,4,2,1,7,6,2,1,8,3,2,1,2,2,4,3,5,73,2,1,4,2,3,3,1,3,10,4,5,11,4,6,4,5,11,1,10,8,5,1,2,3,9,1,2,2,2, + 7,2,5,4,7,2,19,1,13,2,3,7,1,1,6,3,1,4,9,2,4,1,1,1,14,6,4,1,2,6,6,4,4,2,5,1,3,2,2,1,3,4,16,9,1,1,1,5,8,6,10,1,1,1,5,2,7,4,25,3,1,2,5,6,1,8,1, + 1,1,6,1,1,2,7,3,1,3,2,9,3,3,1,3,2,1,4,4,1,6,11,2,58,1,3,4,3,2,5,1,1,1,1,1,16,2,1,9,3,3,14,2,1,1,4,1,2,3,4,3,1,1,1,3,3,1,2,2,1,7,2,1,4,1,1,1, + 2,1,1,2,1,1,1,2,1,11,3,1,3,3,4,2,3,1,5,3,1,2,1,1,1,1,2,1,1,3,1,3,2,2,3,5,1,4,1,4,2,1,2,1,2,2,1,2,1,1,1,2,3,3,5,1,1,4,13,1,3,2,2,7,4,3,9,9,4, + 19,7,5,8,8,5,7,9,7,14,6,3,1,24,1,1,1,1,5,5,12,2,4,1,2,9,2,1,11,4,2,2,7,10,2,5,8,1,14,6,1,6,2,2,1,1,1,1,1,1,6,1,4,1,6,3,1,14,20,4,1,2,4,1,9, + 5,17,3,1,6,2,11,6,6,4,7,2,28,5,14,3,2,4,16,7,4,2,2,1,5,4,13,18,6,3,11,4,8,12,8,2,6,2,4,2,3,4,4,3,20,1,2,14,10,7,9,9,4,8,10,2,12,12,5,16,5,9, + 5,1,1,4,3,2,1,27,6,21,22,4,1,2,3,2,10,13,1,14,3,13,2,10,1,1,1,248,9,2,1,6,2,4,2,1,1,1,4,9,2,1,1,3,1,4,4,1,9,11,2,2,1,1,4,4,2,6,5,1,58,5,9,4, + 3,1,9,4,1,4,7,1,1,3,11,1,5,1,1,1,6,6,2,1,2,1,1,1,5,8,1,4,1,2,1,6,1,3,1,2,1,11,1,2,7,3,4,3,5,1,3,1,1,1,2,2,1,1,8,1,3,2,1,2,4,1,5,2,5,3,4,1,2, + 2,5,4,2,1,2,4,1,1,2,2,3,6,2,9,3,8,6,1,4,1,4,2,4,10,4,5,1,2,2,1,1,4,2,1,4,7,2,6,9,6,2,2,9,8,3,3,7,20,2,3,5,1,7,9,17,6,2,1,5,4,2,1,1,2,1,2,4, + 4,1,1,1,4,1,9,3,9,3,7,1,1,2,11,6,1,1,1,9,3,3,9,4,4,1,1,55,7,2,2,3,4,2,8,23,4,3,5,2,1,3,2,3,2,8,1,1,5,2,4,1,2,4,2,1,5,3,3,3,7,13,8,1,1,6,12, + 1,10,2,56,3,12,3,4,1,1,3,2,1,13,15,1,1,3,4,2,2,2,3,11,4,14,2,13,8,3,18,5,7,7,2,3,2,16,2,3,1,4,3,3,5,62,7,1,7,1,4,19,3,1,1,4,14,7,1,1,12,8,3, + 7,13,8,3,2,1,9,11,5,1,2,1,10,5,4,2,21,8,26,26,3,27,1,30,21,16,6,18,8,4,10,3,22,2,1,32,1,109,4,10,1,2,16,3,12,6,8,3,2,19,4,18,12,3,1,9,2,6, + 23,38,5,14,17,4,14,20,1,32,4,87,4,3,1,2,12,7,1,4,6,2,6,4,1,10,7,1,1,1,5,10,1,1,2,3,4,3,1,1,1,2,8,7,5,3,16,1,6,5,2,4,7,12,8,7,3,12,1,7,13,2, + 2,3,4,2,6,5,22,3,4,8, + }; + static ImWchar base_ranges[] = // not zero-terminated + { + 0x0020, 0x00FF, // Basic Latin + Latin Supplement + 0x2000, 0x206F, // General Punctuation + 0x3000, 0x30FF, // CJK Symbols and Punctuations, Hiragana, Katakana + 0x31F0, 0x31FF, // Katakana Phonetic Extensions + 0xFF00, 0xFFEF // Half-width characters + }; + static ImWchar full_ranges[IM_ARRAYSIZE(base_ranges) + IM_ARRAYSIZE(accumulative_offsets_from_0x4E00) * 2 + 1] = { 0 }; + if (!full_ranges[0]) { + memcpy(full_ranges, base_ranges, sizeof(base_ranges)); + UnpackAccumulativeOffsetsIntoRanges(0x4E00, accumulative_offsets_from_0x4E00, IM_ARRAYSIZE(accumulative_offsets_from_0x4E00), full_ranges + IM_ARRAYSIZE(base_ranges)); + } + return &full_ranges[0]; +} diff --git a/src/Magpie.Core/ImGuiHelper.h b/src/Magpie.Core/ImGuiHelper.h new file mode 100644 index 000000000..e560d5235 --- /dev/null +++ b/src/Magpie.Core/ImGuiHelper.h @@ -0,0 +1,19 @@ +#pragma once +#include + +namespace Magpie::Core { + +struct ImGuiHelper { + static const ImWchar* GetGlyphRangesChineseSimplifiedOfficial() noexcept; + static const ImWchar* GetGlyphRangesChineseTraditionalOfficial() noexcept; + + static constexpr ImWchar NUMBER_RANGES[] = { L'0', L'9', 0 }; + static constexpr ImWchar NOT_NUMBER_RANGES[] = { 0x20, L'0' - 1, L'9' + 1, 0x7E, 0 }; + // Basic Latin + static constexpr ImWchar ENGLISH_RANGES[] = { 0x20, 0x7E, 0 }; + // Basic Latin + Latin-1 Supplement + Latin Extended-A + // 参见 https://en.wikipedia.org/wiki/Latin_Extended-A + static constexpr ImWchar TURKISH_RANGES[] = { 0x20, 0x17F, 0 }; +}; + +} diff --git a/src/Magpie.Core/ImGuiImpl.cpp b/src/Magpie.Core/ImGuiImpl.cpp index fbd9d9a04..cf9c36095 100644 --- a/src/Magpie.Core/ImGuiImpl.cpp +++ b/src/Magpie.Core/ImGuiImpl.cpp @@ -2,7 +2,7 @@ #include "ImGuiImpl.h" #include #include -#include "imgui_impl_dx11.h" +#include "ImGuiBackend.h" #include "MagApp.h" #include "CursorManager.h" #include "DeviceResources.h" @@ -10,14 +10,11 @@ #include "Logger.h" #include "Win32Utils.h" - namespace Magpie::Core { -ImGuiImpl::~ImGuiImpl() { - ImGuiIO& io = ImGui::GetIO(); - io.BackendPlatformName = nullptr; - io.BackendPlatformUserData = nullptr; +ImGuiImpl::ImGuiImpl() {} +ImGuiImpl::~ImGuiImpl() { MagApp::Get().UnregisterWndProcHandler(_handlerId); if (_hHookThread) { @@ -25,7 +22,7 @@ ImGuiImpl::~ImGuiImpl() { WaitForSingleObject(_hHookThread, 1000); } - ImGui_ImplDX11_Shutdown(); + _backend.reset(); ImGui::DestroyContext(); } @@ -159,9 +156,10 @@ bool ImGuiImpl::Initialize() { io.ImeWindowHandle = MagApp::Get().GetHwndHost(); io.ConfigFlags |= ImGuiConfigFlags_NavNoCaptureKeyboard | ImGuiConfigFlags_NoMouseCursorChange; - auto& dr = MagApp::Get().GetDeviceResources(); - ImGui_ImplDX11_Init(dr.GetD3DDevice(), dr.GetD3DDC()); + _backend = std::make_unique(); + _backend->Initialize(); + auto& dr = MagApp::Get().GetDeviceResources(); if (!dr.GetRenderTargetView(dr.GetBackBuffer(), &_rtv)) { Logger::Get().Error("GetRenderTargetView 失败"); return false; @@ -230,7 +228,7 @@ void ImGuiImpl::NewFrame() { bool originWantCaptureMouse = io.WantCaptureMouse; - ImGui_ImplDX11_NewFrame(); + _backend->NewFrame(); ImGui::NewFrame(); // 将所有 ImGUI 窗口限制在视口内 @@ -277,7 +275,7 @@ void ImGuiImpl::EndFrame() { auto d3dDC = MagApp::Get().GetDeviceResources().GetD3DDC(); d3dDC->OMSetRenderTargets(1, &_rtv, NULL); - ImGui_ImplDX11_RenderDrawData(ImGui::GetDrawData()); + _backend->RenderDrawData(ImGui::GetDrawData()); } void ImGuiImpl::Tooltip(const char* content, float maxWidth) { diff --git a/src/Magpie.Core/ImGuiImpl.h b/src/Magpie.Core/ImGuiImpl.h index 2fdcbef45..213dd3e69 100644 --- a/src/Magpie.Core/ImGuiImpl.h +++ b/src/Magpie.Core/ImGuiImpl.h @@ -2,9 +2,11 @@ namespace Magpie::Core { +class ImGuiBackend; + class ImGuiImpl { public: - ImGuiImpl() = default; + ImGuiImpl(); ImGuiImpl(const ImGuiImpl&) = delete; ImGuiImpl(ImGuiImpl&&) = delete; @@ -21,6 +23,8 @@ class ImGuiImpl { // 将提示窗口限制在屏幕内 static void Tooltip(const char* content, float maxWidth = -1.0f); private: + std::unique_ptr _backend; + ID3D11RenderTargetView* _rtv = nullptr; uint32_t _handlerId = 0; diff --git a/src/Magpie.Core/MagApp.cpp b/src/Magpie.Core/MagApp.cpp index 696d6457b..7bd25030f 100644 --- a/src/Magpie.Core/MagApp.cpp +++ b/src/Magpie.Core/MagApp.cpp @@ -72,16 +72,19 @@ MagApp::MagApp() : MagApp::~MagApp() {} -static bool CheckSrcWindow(HWND hwndSrc) { +static bool CheckSrcWindow(HWND hwndSrc, bool isAllowScalingMaximized) { if (!WindowHelper::IsValidSrcWindow(hwndSrc)) { Logger::Get().Info("禁止缩放系统窗口"); return false; } // 不缩放最大化和最小化的窗口 - if (Win32Utils::GetWindowShowCmd(hwndSrc) != SW_NORMAL) { - Logger::Get().Info("源窗口已最大化或最小化"); - return false; + if (UINT showCmd = Win32Utils::GetWindowShowCmd(hwndSrc); showCmd != SW_NORMAL) { + if (showCmd != SW_SHOWMAXIMIZED || !isAllowScalingMaximized) { + Logger::Get().Info(StrUtils::Concat("源窗口已", + showCmd == SW_SHOWMAXIMIZED ? "最大化" : "最小化")); + return false; + } } // 不缩放过小的窗口 @@ -106,7 +109,7 @@ bool MagApp::Start(HWND hwndSrc, MagOptions&& options) { return false; } - if (!CheckSrcWindow(hwndSrc)) { + if (!CheckSrcWindow(hwndSrc, options.IsAllowScalingMaximized())) { return false; } @@ -183,11 +186,12 @@ bool MagApp::Start(HWND hwndSrc, MagOptions&& options) { winrt::fire_and_forget MagApp::_WaitForSrcMovingOrSizing() { HWND hwndSrc = _hwndSrc; while (true) { - if (!IsWindow(hwndSrc) - || GetForegroundWindow() != hwndSrc - || Win32Utils::GetWindowShowCmd(hwndSrc) != SW_NORMAL - ) { + if (!IsWindow(hwndSrc) || GetForegroundWindow() != hwndSrc) { break; + } else if (UINT showCmd = Win32Utils::GetWindowShowCmd(hwndSrc); showCmd != SW_NORMAL) { + if (showCmd != SW_SHOWMAXIMIZED || !MagApp::Get().GetOptions().IsAllowScalingMaximized()) { + break; + } } // 检查源窗口是否正在调整大小或移动 @@ -412,7 +416,7 @@ bool MagApp::_CreateHostWnd() { return false; } - { + if (!_options.IsAllowScalingMaximized()) { // 源窗口和缩放窗口重合则不缩放,此时源窗口可能是无边框全屏窗口 RECT srcRect; if (!Win32Utils::GetWindowFrameRect(_hwndSrc, srcRect)) { @@ -425,8 +429,10 @@ bool MagApp::_CreateHostWnd() { } } + // WS_EX_NOREDIRECTIONBITMAP 可以避免 WS_EX_LAYERED 导致的额外内存开销 _hwndHost = CreateWindowEx( - (_options.IsDebugMode() ? 0 : WS_EX_TOPMOST) | WS_EX_NOACTIVATE | WS_EX_LAYERED | WS_EX_TRANSPARENT | WS_EX_TOOLWINDOW, + (_options.IsDebugMode() ? 0 : WS_EX_TOPMOST) | WS_EX_NOACTIVATE + | WS_EX_LAYERED | WS_EX_NOREDIRECTIONBITMAP | WS_EX_TRANSPARENT | WS_EX_TOOLWINDOW, HOST_WINDOW_CLASS_NAME, NULL, // 标题为空,否则会被添加新配置页面列为候选窗口 WS_POPUP, diff --git a/src/Magpie.Core/MagOptions.h b/src/Magpie.Core/MagOptions.h index 83ac9fefc..70aa79f9b 100644 --- a/src/Magpie.Core/MagOptions.h +++ b/src/Magpie.Core/MagOptions.h @@ -43,6 +43,8 @@ struct MagFlags { static constexpr const uint32_t AdjustCursorSpeed = 0x800; static constexpr const uint32_t DrawCursor = 0x1000; static constexpr const uint32_t DisableDirectFlip = 0x2000; + static constexpr const uint32_t DisableFontCache = 0x4000; + static constexpr const uint32_t AllowScalingMaximized = 0x8000; }; struct DownscalingEffect { @@ -79,8 +81,10 @@ struct MagOptions { DEFINE_FLAG_ACCESSOR(IsDisableWindowResizing, MagFlags::DisableWindowResizing, flags) DEFINE_FLAG_ACCESSOR(IsDebugMode, MagFlags::BreakpointMode, flags) DEFINE_FLAG_ACCESSOR(IsDisableEffectCache, MagFlags::DisableEffectCache, flags) + DEFINE_FLAG_ACCESSOR(IsDisableFontCache, MagFlags::DisableFontCache, flags) DEFINE_FLAG_ACCESSOR(IsSaveEffectSources, MagFlags::SaveEffectSources, flags) DEFINE_FLAG_ACCESSOR(IsWarningsAreErrors, MagFlags::WarningsAreErrors, flags) + DEFINE_FLAG_ACCESSOR(IsAllowScalingMaximized, MagFlags::AllowScalingMaximized, flags) DEFINE_FLAG_ACCESSOR(IsSimulateExclusiveFullscreen, MagFlags::SimulateExclusiveFullscreen, flags) DEFINE_FLAG_ACCESSOR(Is3DGameMode, MagFlags::Is3DGameMode, flags) DEFINE_FLAG_ACCESSOR(IsShowFPS, MagFlags::ShowFPS, flags) diff --git a/src/Magpie.Core/Magpie.Core.rc b/src/Magpie.Core/Magpie.Core.rc index afccb946c..899be79c9 100644 --- a/src/Magpie.Core/Magpie.Core.rc +++ b/src/Magpie.Core/Magpie.Core.rc @@ -25,8 +25,8 @@ LANGUAGE LANG_NEUTRAL, SUBLANG_NEUTRAL // VS_VERSION_INFO VERSIONINFO - FILEVERSION 0,10,2,0 - PRODUCTVERSION 0,10,2,0 + FILEVERSION 0,10,3,0 + PRODUCTVERSION 0,10,3,0 FILEFLAGSMASK 0x3fL #ifdef _DEBUG FILEFLAGS 0x1L @@ -41,12 +41,12 @@ BEGIN BEGIN BLOCK "000004b0" BEGIN - VALUE "FileVersion", "0.10.2.0" + VALUE "FileVersion", "0.10.3.0" VALUE "InternalName", "Magpie.Core.dll" VALUE "LegalCopyright", "Copyright (C) 2023 Liu Xu" VALUE "OriginalFilename", "Magpie.Core.dll" VALUE "ProductName", "Magpie" - VALUE "ProductVersion", "0.10.2.0" + VALUE "ProductVersion", "0.10.3.0" END END BLOCK "VarFileInfo" diff --git a/src/Magpie.Core/Magpie.Core.vcxproj b/src/Magpie.Core/Magpie.Core.vcxproj index 2e3f420d3..c1e203618 100644 --- a/src/Magpie.Core/Magpie.Core.vcxproj +++ b/src/Magpie.Core/Magpie.Core.vcxproj @@ -96,8 +96,10 @@ + + - + @@ -108,6 +110,7 @@ + @@ -124,10 +127,10 @@ + + - - NotUsing - + @@ -139,18 +142,17 @@ - - - Document - $(OutDir)assets - - + + + false + + diff --git a/src/Magpie.Core/Magpie.Core.vcxproj.filters b/src/Magpie.Core/Magpie.Core.vcxproj.filters index 795421e16..cbd551017 100644 --- a/src/Magpie.Core/Magpie.Core.vcxproj.filters +++ b/src/Magpie.Core/Magpie.Core.vcxproj.filters @@ -56,7 +56,7 @@ Overlay - + Overlay @@ -86,6 +86,15 @@ Helpers + + Helpers + + + Overlay + + + Helpers + @@ -118,7 +127,7 @@ Overlay - + Overlay @@ -136,11 +145,12 @@ Helpers - - - - 资源文件 - + + Helpers + + + Overlay + @@ -150,4 +160,7 @@ + + + \ No newline at end of file diff --git a/src/Magpie.Core/NotoSansSC-Regular.otf b/src/Magpie.Core/NotoSansSC-Regular.otf deleted file mode 100644 index d350ffa79..000000000 Binary files a/src/Magpie.Core/NotoSansSC-Regular.otf and /dev/null differ diff --git a/src/Magpie.Core/OverlayDrawer.cpp b/src/Magpie.Core/OverlayDrawer.cpp index 968c1e40d..67fdff363 100644 --- a/src/Magpie.Core/OverlayDrawer.cpp +++ b/src/Magpie.Core/OverlayDrawer.cpp @@ -2,7 +2,6 @@ #include "OverlayDrawer.h" #include "MagApp.h" #include "DeviceResources.h" -#include #include "ImGuiImpl.h" #include "Renderer.h" #include "GPUTimer.h" @@ -13,15 +12,32 @@ #include "CommonSharedConstants.h" #include "EffectDesc.h" #include // std::bit_ceil -#include #include +#include "ImGuiHelper.h" +#include "ImGuiFontsCacheManager.h" -#pragma comment(lib, "wbemuuid.lib") +namespace Magpie::Core { +static const std::wstring& GetSystemFontsFolder() noexcept { + static std::wstring result; -namespace Magpie::Core { + if (result.empty()) { + wchar_t* fontsFolder = nullptr; + HRESULT hr = SHGetKnownFolderPath(FOLDERID_Fonts, 0, NULL, &fontsFolder); + if (FAILED(hr)) { + CoTaskMemFree(fontsFolder); + Logger::Get().ComError("SHGetKnownFolderPath 失败", hr); + return result; + } + + result = fontsFolder; + CoTaskMemFree(fontsFolder); + } + + return result; +} -OverlayDrawer::OverlayDrawer() { +OverlayDrawer::OverlayDrawer() noexcept { HWND hwndSrc = MagApp::Get().GetHwndSrc(); _isSrcMainWnd = Win32Utils::GetWndClassName(hwndSrc) == CommonSharedConstants::MAIN_WINDOW_CLASS_NAME; } @@ -170,63 +186,38 @@ static SmallVector GenerateTimelineColors() { return result; } -bool OverlayDrawer::Initialize() { +bool OverlayDrawer::Initialize() noexcept { _imguiImpl.reset(new ImGuiImpl()); if (!_imguiImpl->Initialize()) { Logger::Get().Error("初始化 ImGuiImpl 失败"); return false; } - ImGuiIO& io = ImGui::GetIO(); - _dpiScale = GetDpiForWindow(MagApp::Get().GetHwndHost()) / 96.0f; ImGui::StyleColorsDark(); ImGuiStyle& style = ImGui::GetStyle(); - style.WindowRounding = 6; + style.PopupRounding = style.WindowRounding = 6; style.FrameBorderSize = 1; style.FrameRounding = 2; style.WindowMinSize = ImVec2(10, 10); style.ScaleAllSizes(_dpiScale); - static std::vector fontData; - if (fontData.empty()) { - if (!Win32Utils::ReadFile( - StrUtils::ConcatW(CommonSharedConstants::ASSETS_DIR, L"NotoSansSC-Regular.otf").c_str(), - fontData - )) { - Logger::Get().Error("读取字体文件失败"); - return false; - } + if (!_BuildFonts()) { + Logger::Get().Error("_BuildFonts 失败"); + return false; } - ImFontConfig config; - config.FontDataOwnedByAtlas = false; - - ImVector uiRanges; - ImFontGlyphRangesBuilder builder; - builder.AddRanges(io.Fonts->GetGlyphRangesDefault()); - builder.AddText("■"); - builder.BuildRanges(&uiRanges); - - _fontUI = io.Fonts->AddFontFromMemoryTTF(fontData.data(), (int)fontData.size(), std::floor(18 * _dpiScale), &config, uiRanges.Data); - - ImVector fpsRanges; - builder.Clear(); - builder.AddText("0123456789 FPS"); - builder.BuildRanges(&fpsRanges); - // FPS 的字体尺寸不跟随系统缩放 - _fontFPS = io.Fonts->AddFontFromMemoryTTF(fontData.data(), (int)fontData.size(), 32, &config, fpsRanges.Data); - - io.Fonts->Build(); - _RetrieveHardwareInfo(); _timelineColors = GenerateTimelineColors(); + // 将 _fontUI 设为默认字体 + ImGui::GetIO().FontDefault = _fontUI; + return true; } -void OverlayDrawer::Draw() { +void OverlayDrawer::Draw() noexcept { bool isShowFPS = MagApp::Get().GetOptions().IsShowFPS(); if (!_isUIVisiable && !isShowFPS) { @@ -234,7 +225,6 @@ void OverlayDrawer::Draw() { } _imguiImpl->NewFrame(); - ImGui::PushFont(_fontUI); if (isShowFPS) { _DrawFPS(); @@ -243,13 +233,12 @@ void OverlayDrawer::Draw() { if (_isUIVisiable) { _DrawUI(); } - - ImGui::PopFont(); + ImGui::Render(); _imguiImpl->EndFrame(); } -void OverlayDrawer::SetUIVisibility(bool value) { +void OverlayDrawer::SetUIVisibility(bool value) noexcept { if (_isUIVisiable == value) { return; } @@ -265,7 +254,7 @@ void OverlayDrawer::SetUIVisibility(bool value) { // 使源窗口无法接收用户输入 _EnableSrcWnd(false); - // 由 ImGUI 绘制光标 + // 由 ImGui 绘制光标 ImGui::GetIO().MouseDrawCursor = true; } @@ -294,196 +283,208 @@ void OverlayDrawer::SetUIVisibility(bool value) { } } -void OverlayDrawer::_DrawFPS() { - static float oldOpacity = 0.0f; - static float opacity = 0.0f; - static bool isLocked = false; - // 背景透明时绘制阴影 - const bool drawShadow = opacity < 1e-5f; +static const std::wstring& GetAppLanguage() noexcept { + static std::wstring language; + if (language.empty()) { + winrt::ResourceContext resourceContext = winrt::ResourceContext::GetForViewIndependentUse(); + language = resourceContext.QualifierValues().Lookup(L"Language"); + StrUtils::ToLowerCase(language); + } + return language; +} - static constexpr float PADDING_X = 5; - static constexpr float PADDING_Y = 1; +bool OverlayDrawer::_BuildFonts() noexcept { + const std::wstring& language = GetAppLanguage(); - ImGui::SetNextWindowPos(ImVec2(10, 10), ImGuiCond_FirstUseEver); - ImGui::SetNextWindowBgAlpha(opacity); + ImFontAtlas& fontAtlas = *ImGui::GetIO().Fonts; - ImGui::PushStyleVar(ImGuiStyleVar_WindowBorderSize, 0.0f); - ImGui::PushStyleVar(ImGuiStyleVar_WindowPadding, drawShadow ? ImVec2() : ImVec2(PADDING_X, PADDING_Y)); - if (!ImGui::Begin("FPS", nullptr, ImGuiWindowFlags_NoNav | ImGuiWindowFlags_AlwaysAutoResize | ImGuiWindowFlags_NoDecoration | ImGuiWindowFlags_NoFocusOnAppearing | (isLocked ? ImGuiWindowFlags_NoMove : 0) | (drawShadow ? ImGuiWindowFlags_NoBackground : 0))) { - // Early out if the window is collapsed, as an optimization. - ImGui::End(); - return; + bool fontCacheDisabled = MagApp::Get().GetOptions().IsDisableFontCache(); + if (!fontCacheDisabled && ImGuiFontsCacheManager::Get().Load(language, fontAtlas)) { + _fontUI = fontAtlas.Fonts[0]; + _fontMonoNumbers = fontAtlas.Fonts[1]; + _fontFPS = fontAtlas.Fonts[2]; + return true; } - if (oldOpacity != opacity) { - // 透明时无边距,确保文字位置不变 - if (oldOpacity < 1e-5f) { - if (opacity >= 1e-5f) { - ImVec2 windowPos = ImGui::GetWindowPos(); - ImGui::SetWindowPos(ImVec2(windowPos.x - PADDING_X, windowPos.y - PADDING_Y)); - } - } else { - if (opacity < 1e-5f) { - ImVec2 windowPos = ImGui::GetWindowPos(); - ImGui::SetWindowPos(ImVec2(windowPos.x + PADDING_X, windowPos.y + PADDING_Y)); - } - } - oldOpacity = opacity; + fontAtlas.Flags |= ImFontAtlasFlags_NoPowerOfTwoHeight; + if (!MagApp::Get().GetOptions().Is3DGameMode()) { + // 非 3D 游戏模式无需 ImGui 绘制光标 + fontAtlas.Flags |= ImFontAtlasFlags_NoMouseCursors; } - ImGui::PushFont(_fontFPS); + std::wstring fontPath = GetSystemFontsFolder(); + if (Win32Utils::GetOSVersion().IsWin11()) { + fontPath += L"\\SegUIVar.ttf"; + } else { + fontPath += L"\\segoeui.ttf"; + } - ImVec2 cursorPos = ImGui::GetCursorPos(); - // 不知为何文字无法竖直居中,因此这里调整位置 - cursorPos.y -= 3; - ImGui::SetCursorPosY(cursorPos.y); + std::vector fontData; + if (!Win32Utils::ReadFile(fontPath.c_str(), fontData)) { + Logger::Get().Error("读取字体文件失败"); + return false; + } - std::string fps = fmt::format("{} FPS", MagApp::Get().GetRenderer().GetGPUTimer().GetFramesPerSecond()); - if (drawShadow) { - ImGui::SetCursorPos(ImVec2(cursorPos.x + 1.0f, cursorPos.y + 1.0f)); - ImGui::PushStyleColor(ImGuiCol_Text, ImVec4(0.0f, 0.0f, 0.0f, 0.8f)); - ImGui::TextUnformatted(fps.c_str()); - ImGui::PopStyleColor(); + // 构建字体前 uiRanges 不能析构,因为 ImGui 只保存了指针 + ImVector uiRanges; + _BuildFontUI(language, fontData, uiRanges); + _BuildFontFPS(fontData); - ImGui::SetCursorPos(cursorPos); - ImGui::PushStyleColor(ImGuiCol_Text, ImVec4(0.0f, 0.0f, 0.0f, 0.6f)); - ImGui::TextUnformatted(fps.c_str()); - ImGui::PopStyleColor(); + if (!fontAtlas.Build()) { + Logger::Get().Error("构建字体失败"); + return false; + } - ImGui::SetCursorPos(cursorPos); + if (!fontCacheDisabled) { + ImGuiFontsCacheManager::Get().Save(language, fontAtlas); } - ImGui::TextUnformatted(fps.c_str()); + + return true; +} - ImGui::PopFont(); +void OverlayDrawer::_BuildFontUI(std::wstring_view language, const std::vector& fontData, ImVector& uiRanges) noexcept { + ImFontAtlas& fontAtlas = *ImGui::GetIO().Fonts; - ImGui::PopStyleVar(); + std::string extraFontPath; + const ImWchar* extraRanges = nullptr; + int extraFontNo = 0; - if (ImGui::BeginPopupContextWindow()) { - ImGui::PushItemWidth(200); - ImGui::SliderFloat("Opacity", &opacity, 0.0f, 1.0f); - ImGui::Separator(); - if (ImGui::MenuItem(isLocked ? "Unlock" : "Lock", nullptr, nullptr)) { - isLocked = !isLocked; + ImFontGlyphRangesBuilder builder; + + if (language == L"en-us") { + builder.AddRanges(ImGuiHelper::ENGLISH_RANGES); + } else if (language == L"es" || language == L"pt-br") { + // Basic Latin + Latin-1 Supplement + // 参见 https://en.wikipedia.org/wiki/Latin-1_Supplement + builder.AddRanges(fontAtlas.GetGlyphRangesDefault()); + } else if (language == L"ru" || language == L"uk") { + builder.AddRanges(fontAtlas.GetGlyphRangesCyrillic()); + } else if (language == L"tr") { + builder.AddRanges(ImGuiHelper::TURKISH_RANGES); + } else { + builder.AddRanges(fontAtlas.GetGlyphRangesDefault()); + + // 一些语言需要加载额外的字体: + // 简体中文 -> Microsoft YaHei UI + // 繁体中文 -> Microsoft JhengHei UI + // 日语 -> Yu Gothic UI + // 韩语/朝鲜语 -> Malgun Gothic + // 参见 https://learn.microsoft.com/en-us/windows/apps/design/style/typography#fonts-for-non-latin-languages + + extraFontPath = StrUtils::UTF16ToUTF8(GetSystemFontsFolder()); + extraFontPath.push_back(L'\\'); + if (language == L"zh-hans") { + // msyh.ttc: 0 是微软雅黑,1 是 Microsoft YaHei UI + extraFontPath += "msyh.ttc"; + extraFontNo = 1; + extraRanges = ImGuiHelper::GetGlyphRangesChineseSimplifiedOfficial(); + } else if (language == L"zh-hant") { + // msjh.ttc: 0 是 Microsoft JhengHei,1 是 Microsoft JhengHei UI + extraFontPath += "msjh.ttc"; + extraFontNo = 1; + extraRanges = ImGuiHelper::GetGlyphRangesChineseTraditionalOfficial(); + } else if (language == L"ja") { + // YuGothM.ttc: 0 是 Yu Gothic Medium,1 是 Yu Gothic UI + extraFontPath += "YuGothM.ttc"; + extraFontNo = 1; + extraRanges = fontAtlas.GetGlyphRangesJapanese(); + } else if (language == L"ko") { + extraFontPath += "malgun.ttf"; + extraRanges = fontAtlas.GetGlyphRangesKorean(); } - ImGui::PopItemWidth(); - ImGui::EndPopup(); } + builder.SetBit(L'■'); + builder.BuildRanges(&uiRanges); - ImGui::End(); - ImGui::PopStyleVar(); -} + ImFontConfig config; + config.FontDataOwnedByAtlas = false; -#ifdef _M_X64 -// 只在 x86 可用 -static std::string GetCPUNameViaCPUID() { - std::string cpuName(48, '\0'); + const float fontSize = 18 * _dpiScale; - std::array cpuInfo{}; + ////////////////////////////////////////////////////////// + // + // uiRanges (+ extraRanges) -> _fontUI + // + ////////////////////////////////////////////////////////// - __cpuid(cpuInfo.data(), 0); - // Calling __cpuid with 0x80000000 as the function_id argument - // gets the number of the highest valid extended ID. - __cpuid(cpuInfo.data(), 0x80000000); +#ifdef _DEBUG + std::char_traits::copy(config.Name, "_fontUI", std::size(config.Name)); +#endif - if (cpuInfo[0] < 0x80000004) { - return {}; + _fontUI = fontAtlas.AddFontFromMemoryTTF( + (void*)fontData.data(), (int)fontData.size(), fontSize, &config, uiRanges.Data); + + if (extraRanges) { + assert(Win32Utils::FileExists(StrUtils::UTF8ToUTF16(extraFontPath).c_str())); + + // 在 MergeMode 下已有字符会跳过而不是覆盖 + config.MergeMode = true; + config.FontNo = extraFontNo; + // 额外字体数据由 ImGui 管理,退出缩放时释放 + config.FontDataOwnedByAtlas = true; + fontAtlas.AddFontFromFileTTF(extraFontPath.c_str(), fontSize, &config, extraRanges); + config.FontDataOwnedByAtlas = false; + config.FontNo = 0; + config.MergeMode = false; } - __cpuidex(cpuInfo.data(), 0x80000002, 0); - memcpy(cpuName.data(), cpuInfo.data(), sizeof(cpuInfo)); - __cpuidex(cpuInfo.data(), 0x80000003, 0); - memcpy(cpuName.data() + 16, cpuInfo.data(), sizeof(cpuInfo)); - __cpuidex(cpuInfo.data(), 0x80000004, 0); - memcpy(cpuName.data() + 32, cpuInfo.data(), sizeof(cpuInfo)); + ////////////////////////////////////////////////////////// + // + // NUMBER_RANGES + NOT_NUMBER_RANGES -> _fontMonoNumbers + // + ////////////////////////////////////////////////////////// - cpuName.resize(StrUtils::StrLen(cpuName.c_str())); - StrUtils::Trim(cpuName); - return cpuName; -} +#ifdef _DEBUG + std::char_traits::copy(config.Name, "_fontMonoNumbers", std::size(config.Name)); #endif -// 非常慢,需要大约 18 ms -static std::string GetCPUNameViaWMI() { - winrt::com_ptr wbemLocator = winrt::try_create_instance(CLSID_WbemLocator); - if (!wbemLocator) { - Logger::Get().Error("创建 WbemLocator 失败"); - return ""; - } - - winrt::com_ptr wbemServices; - winrt::com_ptr enumWbemClassObject; - winrt::com_ptr wbemClassObject; - - HRESULT hr = wbemLocator->ConnectServer( - Win32Utils::BStr(L"ROOT\\CIMV2"), - nullptr, - nullptr, - nullptr, - 0, - nullptr, - nullptr, - wbemServices.put() - ); - if (hr != WBEM_S_NO_ERROR) { - return ""; - } - - hr = CoSetProxyBlanket( - wbemServices.get(), - RPC_C_AUTHN_WINNT, - RPC_C_AUTHZ_NONE, - nullptr, - RPC_C_AUTHN_LEVEL_CALL, - RPC_C_IMP_LEVEL_IMPERSONATE, - NULL, - EOAC_NONE - ); - if (FAILED(hr)) { - return ""; - } + // 等宽的数字字符 + config.GlyphMinAdvanceX = config.GlyphMaxAdvanceX = fontSize * 0.42f; + _fontMonoNumbers = fontAtlas.AddFontFromMemoryTTF( + (void*)fontData.data(), (int)fontData.size(), fontSize, &config, ImGuiHelper::NUMBER_RANGES); + + // 其他不等宽的字符 + config.MergeMode = true; + config.GlyphMinAdvanceX = 0; + config.GlyphMaxAdvanceX = std::numeric_limits::max(); + fontAtlas.AddFontFromMemoryTTF( + (void*)fontData.data(), (int)fontData.size(), fontSize, &config, ImGuiHelper::NOT_NUMBER_RANGES); +} - hr = wbemServices->ExecQuery( - Win32Utils::BStr(L"WQL"), - Win32Utils::BStr(L"SELECT NAME FROM Win32_Processor"), - WBEM_FLAG_FORWARD_ONLY | WBEM_FLAG_RETURN_IMMEDIATELY, - nullptr, - enumWbemClassObject.put() - ); - if (hr != WBEM_S_NO_ERROR) { - return ""; - } +void OverlayDrawer::_BuildFontFPS(const std::vector& fontData) noexcept { + ImFontAtlas& fontAtlas = *ImGui::GetIO().Fonts; - ULONG uReturn = 0; - hr = enumWbemClassObject->Next(WBEM_INFINITE, 1, wbemClassObject.put(), &uReturn); - if (hr != WBEM_S_NO_ERROR || uReturn <= 0) { - return ""; - } + ImFontConfig config; + config.FontDataOwnedByAtlas = false; - VARIANT value; - VariantInit(&value); - hr = wbemClassObject->Get(Win32Utils::BStr(L"Name"), 0, &value, 0, 0); - if (hr != WBEM_S_NO_ERROR || value.vt != VT_BSTR) { - return ""; - } + const float fpsSize = 24 * _dpiScale; - std::string result = Win32Utils::BStr(value.bstrVal).ToUTF8(); - StrUtils::Trim(result); - return result; -} + ////////////////////////////////////////////////////////// + // + // NUMBER_RANGES + " FPS" -> _fontFPS + // + ////////////////////////////////////////////////////////// -static std::string GetCPUName() { -#ifdef _M_X64 - std::string result = GetCPUNameViaCPUID(); - if (!result.empty()) { - return result; - } -#endif // _M_X64 +#ifdef _DEBUG + std::char_traits::copy(config.Name, "_fontFPS", std::size(config.Name)); +#endif - return GetCPUNameViaWMI(); + // 等宽的数字字符 + config.MergeMode = false; + config.GlyphMinAdvanceX = config.GlyphMaxAdvanceX = fpsSize * 0.42f; + _fontFPS = fontAtlas.AddFontFromMemoryTTF( + (void*)fontData.data(), (int)fontData.size(), fpsSize, &config, ImGuiHelper::NUMBER_RANGES); + + // 其他不等宽的字符 + config.MergeMode = true; + config.GlyphMinAdvanceX = 0; + config.GlyphMaxAdvanceX = std::numeric_limits::max(); + fontAtlas.AddFontFromMemoryTTF( + (void*)fontData.data(), (int)fontData.size(), fpsSize, &config, (const ImWchar*)L" FFPPSS"); } -static std::string_view GetEffectDisplayName(const EffectDesc* desc) { +static std::string_view GetEffectDisplayName(const EffectDesc* desc) noexcept { auto delimPos = desc->name.find_last_of('\\'); if (delimPos == std::string::npos) { return desc->name; @@ -492,14 +493,20 @@ static std::string_view GetEffectDisplayName(const EffectDesc* desc) { } } -struct EffectTimings { - const EffectDesc* desc = nullptr; - std::span passTimings; - float totalTime = 0.0f; -}; +static void DrawTextWithFont(const char* text, ImFont* font) noexcept { + ImGui::PushFont(font); + ImGui::TextUnformatted(text); + ImGui::PopFont(); +} // 返回鼠标悬停的项的序号,未悬停于任何项返回 -1 -static int DrawEffectTimings(const EffectTimings& et, bool showPasses, float maxWindowWidth, std::span colors, bool singleEffect) { +int OverlayDrawer::_DrawEffectTimings( + const _EffectTimings& et, + bool showPasses, + float maxWindowWidth, + std::span colors, + bool singleEffect +) noexcept { ImGui::TableNextRow(); ImGui::TableNextColumn(); @@ -534,7 +541,7 @@ static int DrawEffectTimings(const EffectTimings& et, bool showPasses, float max ImGui::Dummy(ImVec2(rightAlignSpace, 0)); ImGui::SameLine(0, 0); } - ImGui::TextUnformatted(fmt::format("{:.3f} ms", et.totalTime).c_str()); + DrawTextWithFont(fmt::format("{:.3f} ms", et.totalTime).c_str(), _fontMonoNumbers); if (showPasses) { ImGui::PopStyleColor(); @@ -592,7 +599,7 @@ static int DrawEffectTimings(const EffectTimings& et, bool showPasses, float max ImGui::Dummy(ImVec2(rightAlignSpace, 0)); ImGui::SameLine(0, 0); } - ImGui::TextUnformatted(time.c_str()); + DrawTextWithFont(time.c_str(), _fontMonoNumbers); } } } else { @@ -600,14 +607,13 @@ static int DrawEffectTimings(const EffectTimings& et, bool showPasses, float max ImGui::Dummy(ImVec2(rightAlignSpace, 0)); ImGui::SameLine(0, 0); } - ImGui::TextUnformatted(fmt::format("{:.3f} ms", et.totalTime).c_str()); + DrawTextWithFont(fmt::format("{:.3f} ms", et.totalTime).c_str(), _fontMonoNumbers); } return result; } -static void DrawTimelineItem(ImU32 color, float dpiScale, std::string_view name, - float time, float effectsTotalTime, bool selected = false) { +void OverlayDrawer::_DrawTimelineItem(ImU32 color, float dpiScale, std::string_view name, float time, float effectsTotalTime, bool selected) { ImGui::TableSetBgColor(ImGuiTableBgTarget_CellBg, color); ImGui::PushStyleColor(ImGuiCol_HeaderActive, color); ImGui::PushStyleColor(ImGuiCol_HeaderHovered, color); @@ -617,7 +623,9 @@ static void DrawTimelineItem(ImU32 color, float dpiScale, std::string_view name, if (ImGui::IsItemHovered() || ImGui::IsItemClicked()) { std::string content = fmt::format("{}\n{:.3f} ms\n{}%", name, time, std::lroundf(time / effectsTotalTime * 100)); + ImGui::PushFont(_fontMonoNumbers); ImGuiImpl::Tooltip(content.c_str(), 500 * dpiScale); + ImGui::PopFont(); } // 空间足够时显示文字 @@ -627,17 +635,102 @@ static void DrawTimelineItem(ImU32 color, float dpiScale, std::string_view name, } else { text.assign(name); } - + float textWidth = ImGui::CalcTextSize(text.c_str()).x; float itemWidth = ImGui::GetItemRectSize().x; float itemSpacing = ImGui::GetStyle().ItemSpacing.x; - if (itemWidth - (selected ? 0 : itemSpacing) > textWidth + 4) { + if (itemWidth - (selected ? 0 : itemSpacing) > textWidth + 4 * _dpiScale) { ImGui::SameLine(0, 0); ImGui::SetCursorPosX(ImGui::GetCursorPosX() + (itemWidth - textWidth - itemSpacing) / 2); ImGui::TextUnformatted(text.c_str()); } } +void OverlayDrawer::_DrawFPS() noexcept { + static float oldOpacity = 0.0f; + static float opacity = 0.0f; + static bool isLocked = false; + // 背景透明时绘制阴影 + const bool drawShadow = opacity < 1e-5f; + + static constexpr float PADDING_X = 5; + static constexpr float PADDING_Y = 1; + + ImGui::SetNextWindowPos(ImVec2(10, 10), ImGuiCond_FirstUseEver); + ImGui::SetNextWindowBgAlpha(opacity); + + ImGui::PushStyleVar(ImGuiStyleVar_WindowBorderSize, 0.0f); + ImGui::PushStyleVar(ImGuiStyleVar_WindowPadding, drawShadow ? ImVec2() : ImVec2(PADDING_X, PADDING_Y)); + if (!ImGui::Begin("FPS", nullptr, ImGuiWindowFlags_NoNav | ImGuiWindowFlags_AlwaysAutoResize | ImGuiWindowFlags_NoDecoration | ImGuiWindowFlags_NoFocusOnAppearing | (isLocked ? ImGuiWindowFlags_NoMove : 0) | (drawShadow ? ImGuiWindowFlags_NoBackground : 0))) { + // Early out if the window is collapsed, as an optimization. + ImGui::End(); + return; + } + + if (oldOpacity != opacity) { + // 透明时无边距,确保文字位置不变 + if (oldOpacity < 1e-5f) { + if (opacity >= 1e-5f) { + ImVec2 windowPos = ImGui::GetWindowPos(); + ImGui::SetWindowPos(ImVec2(windowPos.x - PADDING_X, windowPos.y - PADDING_Y)); + } + } else { + if (opacity < 1e-5f) { + ImVec2 windowPos = ImGui::GetWindowPos(); + ImGui::SetWindowPos(ImVec2(windowPos.x + PADDING_X, windowPos.y + PADDING_Y)); + } + } + oldOpacity = opacity; + } + + ImGui::PushFont(_fontFPS); + + ImVec2 cursorPos = ImGui::GetCursorPos(); + // 不知为何文字无法竖直居中,因此这里调整位置 + cursorPos.y -= 3; + ImGui::SetCursorPosY(cursorPos.y); + + std::string fps = fmt::format("{} FPS", MagApp::Get().GetRenderer().GetGPUTimer().GetFramesPerSecond()); + if (drawShadow) { + ImGui::SetCursorPos(ImVec2(cursorPos.x + 1.0f, cursorPos.y + 1.0f)); + ImGui::PushStyleColor(ImGuiCol_Text, ImVec4(0.0f, 0.0f, 0.0f, 0.8f)); + ImGui::TextUnformatted(fps.c_str()); + ImGui::PopStyleColor(); + + ImGui::SetCursorPos(cursorPos); + ImGui::PushStyleColor(ImGuiCol_Text, ImVec4(0.0f, 0.0f, 0.0f, 0.6f)); + ImGui::TextUnformatted(fps.c_str()); + ImGui::PopStyleColor(); + + ImGui::SetCursorPos(cursorPos); + } + ImGui::TextUnformatted(fps.c_str()); + + ImGui::PopFont(); + + ImGui::PopStyleVar(); + + if (ImGui::BeginPopupContextWindow()) { + ImGui::PushItemWidth(150 * _dpiScale); + ImGui::PushFont(_fontMonoNumbers); + ImGui::SliderFloat("##FPS_Opacity", &opacity, 0.0f, 1.0f); + ImGui::PopFont(); + ImGui::SameLine(); + ImGui::TextUnformatted(_GetResourceString(L"Overlay_FPS_Opacity").c_str()); + ImGui::Separator(); + const std::string& lockStr = _GetResourceString(isLocked ? L"Overlay_FPS_Unlock" : L"Overlay_FPS_Lock"); + if (ImGui::MenuItem(lockStr.c_str(), nullptr, nullptr)) { + isLocked = !isLocked; + } + ImGui::PopItemWidth(); + + ImGui::EndPopup(); + } + + ImGui::End(); + ImGui::PopStyleVar(); +} + // 自定义提示 static void MyPlotLines(float(*values_getter)(void* data, int idx), void* data, int values_count, int values_offset, const char* overlay_text, float scale_min, float scale_max, ImVec2 graph_size) { // 通过改变光标位置避免绘制提示窗口 @@ -666,10 +759,10 @@ static void MyPlotLines(float(*values_getter)(void* data, int idx), void* data, int v_idx = (int)(t * values_count); float v0 = values_getter(data, (v_idx + values_offset) % values_count); - ImGuiImpl::Tooltip(fmt::format("{:.3f}", v0).c_str()); + ImGuiImpl::Tooltip(fmt::format("{:.1f}", v0).c_str()); } -void OverlayDrawer::_DrawUI() { +void OverlayDrawer::_DrawUI() noexcept { auto& settings = MagApp::Get().GetOptions(); auto& renderer = MagApp::Get().GetRenderer(); auto& gpuTimer = renderer.GetGPUTimer(); @@ -684,7 +777,8 @@ void OverlayDrawer::_DrawUI() { static float initPosX = Win32Utils::GetSizeOfRect(MagApp::Get().GetRenderer().GetOutputRect()).cx - maxWindowWidth; ImGui::SetNextWindowPos(ImVec2(initPosX, 20), ImGuiCond_FirstUseEver); - if (!ImGui::Begin("Profiler", nullptr, ImGuiWindowFlags_NoNav | ImGuiWindowFlags_AlwaysAutoResize)) { + std::string profilerStr = _GetResourceString(L"Overlay_Profiler"); + if (!ImGui::Begin(profilerStr.c_str(), nullptr, ImGuiWindowFlags_NoNav | ImGuiWindowFlags_AlwaysAutoResize)) { ImGui::End(); return; } @@ -692,9 +786,11 @@ void OverlayDrawer::_DrawUI() { // 始终为滚动条预留空间 ImGui::PushTextWrapPos(maxWindowWidth - ImGui::GetStyle().WindowPadding.x - ImGui::GetStyle().ScrollbarSize); ImGui::TextUnformatted(StrUtils::Concat("GPU: ", _hardwareInfo.gpuName).c_str()); - ImGui::TextUnformatted(StrUtils::Concat("CPU: ", _hardwareInfo.cpuName).c_str()); - ImGui::TextUnformatted(StrUtils::Concat("VSync: ", settings.IsVSync() ? "ON" : "OFF").c_str()); - ImGui::TextUnformatted(StrUtils::Concat("Capture Method: ", MagApp::Get().GetFrameSource().GetName()).c_str()); + const std::string& vSyncStr = _GetResourceString(L"Overlay_Profiler_VSync"); + const std::string& stateStr = _GetResourceString(settings.IsVSync() ? L"ToggleSwitch/OnContent" : L"ToggleSwitch/OffContent"); + ImGui::TextUnformatted(StrUtils::Concat(vSyncStr, ": ", stateStr).c_str()); + const std::string& captureMethodStr = _GetResourceString(L"Overlay_Profiler_CaptureMethod"); + ImGui::TextUnformatted(StrUtils::Concat(captureMethodStr.c_str(), ": ", MagApp::Get().GetFrameSource().GetName()).c_str()); ImGui::PopTextWrapPos(); ImGui::Spacing(); @@ -710,10 +806,22 @@ void OverlayDrawer::_DrawUI() { _validFrames = std::min(_validFrames + 1, nSamples); // 帧率统计,支持在渲染时间和 FPS 间切换 - if (ImGui::CollapsingHeader("Frame Statistics", ImGuiTreeNodeFlags_DefaultOpen)) { - static bool showFPS = true; + const std::string& frameStatisticsStr = _GetResourceString(L"Overlay_Profiler_FrameStatistics"); + if (ImGui::CollapsingHeader(frameStatisticsStr.c_str(), ImGuiTreeNodeFlags_DefaultOpen)) { + static bool showFrameRates = true; + + ImGui::Spacing(); + const std::string& buttonStr = _GetResourceString(showFrameRates + ? L"Overlay_Profiler_FrameStatistics_SwitchToFrameTimings" + : L"Overlay_Profiler_FrameStatistics_SwitchToFrameRates"); + if (ImGui::Button(buttonStr.c_str())) { + showFrameRates = !showFrameRates; + } + ImGui::Spacing(); + + ImGui::PushFont(_fontMonoNumbers); - if (showFPS) { + if (showFrameRates) { float totalTime = 0; float minTime = FLT_MAX; float minTime2 = FLT_MAX; @@ -740,7 +848,7 @@ void OverlayDrawer::_DrawUI() { MyPlotLines([](void* data, int idx) { float time = (*(std::deque*)data)[idx]; return time < 1e-6 ? 0 : 1000 / time; - }, &_frameTimes, (int)_frameTimes.size(), 0, fmt::format("avg: {:.3f} FPS", _validFrames * 1000 / totalTime).c_str(), 0, maxFPS, ImVec2(250 * _dpiScale, 80 * _dpiScale)); + }, &_frameTimes, (int)_frameTimes.size(), 0, fmt::format("avg: {:.1f} FPS", _validFrames * 1000 / totalTime).c_str(), 0, maxFPS, ImVec2(250 * _dpiScale, 80 * _dpiScale)); } else { float totalTime = 0; float maxTime = 0; @@ -764,23 +872,20 @@ void OverlayDrawer::_DrawUI() { MyPlotLines([](void* data, int idx) { return (*(std::deque*)data)[idx]; }, &_frameTimes, (int)_frameTimes.size(), 0, - fmt::format("avg: {:.3f} ms", totalTime / _validFrames).c_str(), + fmt::format("avg: {:.1f} ms", totalTime / _validFrames).c_str(), 0, maxTime2 * 1.7f, ImVec2(250 * _dpiScale, 80 * _dpiScale)); } - ImGui::Spacing(); - - if (ImGui::Button(showFPS ? "Switch to timings" : "Switch to FPS")) { - showFPS = !showFPS; - } + ImGui::PopFont(); } ImGui::Spacing(); - if (ImGui::CollapsingHeader("Timings", ImGuiTreeNodeFlags_DefaultOpen)) { + const std::string& timingsStr = _GetResourceString(L"Overlay_Profiler_Timings"); + if (ImGui::CollapsingHeader(timingsStr.c_str(), ImGuiTreeNodeFlags_DefaultOpen)) { const auto& gpuTimings = gpuTimer.GetGPUTimings(); const UINT nEffect = renderer.GetEffectCount(); - SmallVector effectTimings(nEffect); + SmallVector<_EffectTimings, 4> effectTimings(nEffect); { UINT idx = 0; @@ -810,7 +915,11 @@ void OverlayDrawer::_DrawUI() { for (const auto& et : effectTimings) { // 某个效果有多个通道,显示切换按钮 if (et.passTimings.size() > 1) { - if (ImGui::Button(showPasses ? "Switch to effects" : "Switch to passes")) { + ImGui::Spacing(); + const std::string& buttonStr = _GetResourceString(showPasses + ? L"Overlay_Profiler_Timings_SwitchToEffects" + : L"Overlay_Profiler_Timings_SwitchToPasses"); + if (ImGui::Button(buttonStr.c_str())) { showPasses = !showPasses; } break; @@ -879,7 +988,7 @@ void OverlayDrawer::_DrawUI() { ImGui::TableNextRow(); UINT i = 0; - for (const EffectTimings& et : effectTimings) { + for (const _EffectTimings& et : effectTimings) { for (UINT j = 0, end = (UINT)et.passTimings.size(); j < end; ++j) { if (et.passTimings[j] < 1e-5f) { continue; @@ -896,7 +1005,7 @@ void OverlayDrawer::_DrawUI() { name = StrUtils::Concat(GetEffectDisplayName(et.desc), "/", et.desc->passes[j].desc); } - DrawTimelineItem(colors[i], _dpiScale, name, et.passTimings[j], effectsTotalTime, selectedIdx == (int)i); + _DrawTimelineItem(colors[i], _dpiScale, name, et.passTimings[j], effectsTotalTime, selectedIdx == (int)i); ++i; } @@ -927,7 +1036,7 @@ void OverlayDrawer::_DrawUI() { } ImGui::TableNextColumn(); - DrawTimelineItem(colors[i], _dpiScale, GetEffectDisplayName(et.desc), et.totalTime, effectsTotalTime, selectedIdx == (int)i); + _DrawTimelineItem(colors[i], _dpiScale, GetEffectDisplayName(et.desc), et.totalTime, effectsTotalTime, selectedIdx == (int)i); } ImGui::EndTable(); @@ -964,7 +1073,7 @@ void OverlayDrawer::_DrawUI() { if (nEffect == 1) { const auto& et = effectTimings[0]; - int hovered = DrawEffectTimings(et, true, maxWindowWidth, colors, true); + int hovered = _DrawEffectTimings(et, true, maxWindowWidth, colors, true); if (hovered >= 0) { selectedIdx = hovered; } @@ -982,7 +1091,7 @@ void OverlayDrawer::_DrawUI() { idx += et.passTimings.size(); } - int hovered = DrawEffectTimings(et, showPasses, maxWindowWidth, colorSpan, false); + int hovered = _DrawEffectTimings(et, showPasses, maxWindowWidth, colorSpan, false); if (hovered >= 0) { selectedIdx = idxBegin + hovered; } @@ -1001,9 +1110,10 @@ void OverlayDrawer::_DrawUI() { ImGui::TableNextRow(); ImGui::TableNextColumn(); - ImGui::TextUnformatted("Total"); + const std::string& totalStr = _GetResourceString(L"Overlay_Profiler_Timings_Total"); + ImGui::TextUnformatted(totalStr.c_str()); ImGui::TableNextColumn(); - ImGui::TextUnformatted(fmt::format("{:.3f} ms", effectsTotalTime).c_str()); + DrawTextWithFont(fmt::format("{:.3f} ms", effectsTotalTime).c_str(), _fontMonoNumbers); ImGui::EndTable(); } @@ -1014,16 +1124,13 @@ void OverlayDrawer::_DrawUI() { ImGui::End(); } -void OverlayDrawer::_RetrieveHardwareInfo() { +void OverlayDrawer::_RetrieveHardwareInfo() noexcept { DXGI_ADAPTER_DESC desc{}; HRESULT hr = MagApp::Get().GetDeviceResources().GetGraphicsAdapter()->GetDesc(&desc); _hardwareInfo.gpuName = SUCCEEDED(hr) ? StrUtils::UTF16ToUTF8(desc.Description) : "UNAVAILABLE"; - - std::string cpuName = GetCPUName(); - _hardwareInfo.cpuName = !cpuName.empty() ? std::move(cpuName) : "UNAVAILABLE"; } -void OverlayDrawer::_EnableSrcWnd(bool enable) { +void OverlayDrawer::_EnableSrcWnd(bool enable) noexcept { HWND hwndSrc = MagApp::Get().GetHwndSrc(); if (!_isSrcMainWnd) { // 如果源窗口是 Magpie 主窗口会卡死 @@ -1034,4 +1141,14 @@ void OverlayDrawer::_EnableSrcWnd(bool enable) { } } +const std::string& OverlayDrawer::_GetResourceString(const std::wstring_view& key) noexcept { + static phmap::flat_hash_map cache; + + if (auto it = cache.find(key); it != cache.end()) { + return it->second; + } + + return cache[key] = StrUtils::UTF16ToUTF8(_resourceLoader.GetString(key)); +} + } diff --git a/src/Magpie.Core/OverlayDrawer.h b/src/Magpie.Core/OverlayDrawer.h index e5adaafa3..fdfc9b859 100644 --- a/src/Magpie.Core/OverlayDrawer.h +++ b/src/Magpie.Core/OverlayDrawer.h @@ -1,44 +1,61 @@ #pragma once #include #include "SmallVector.h" - -struct ImFont; +#include namespace Magpie::Core { +struct EffectDesc; class ImGuiImpl; class OverlayDrawer { public: - OverlayDrawer(); + OverlayDrawer() noexcept; OverlayDrawer(const OverlayDrawer&) = delete; OverlayDrawer(OverlayDrawer&&) = delete; ~OverlayDrawer(); - bool Initialize(); + bool Initialize() noexcept; - void Draw(); + void Draw() noexcept; bool IsUIVisiable() const noexcept { return _isUIVisiable; } - void SetUIVisibility(bool value); + void SetUIVisibility(bool value) noexcept; private: - void _DrawFPS(); + bool _BuildFonts() noexcept; + void _BuildFontUI(std::wstring_view language, const std::vector& fontData, ImVector& uiRanges) noexcept; + void _BuildFontFPS(const std::vector& fontData) noexcept; + + struct _EffectTimings { + const EffectDesc* desc = nullptr; + std::span passTimings; + float totalTime = 0.0f; + }; + + int _DrawEffectTimings(const _EffectTimings& et, bool showPasses, float maxWindowWidth, std::span colors, bool singleEffect) noexcept; + + void _DrawTimelineItem(ImU32 color, float dpiScale, std::string_view name, float time, float effectsTotalTime, bool selected = false); - void _DrawUI(); + void _DrawFPS() noexcept; - void _RetrieveHardwareInfo(); + void _DrawUI() noexcept; - void _EnableSrcWnd(bool enable); + void _RetrieveHardwareInfo() noexcept; + + void _EnableSrcWnd(bool enable) noexcept; + + const std::string& _GetResourceString(const std::wstring_view& key) noexcept; float _dpiScale = 1.0f; - ImFont* _fontUI = nullptr; - ImFont* _fontFPS = nullptr; + ImFont* _fontUI = nullptr; // 普通 UI 文字 + ImFont* _fontMonoNumbers = nullptr; // 普通 UI 文字,但数字部分是等宽的,只支持 ASCII + ImFont* _fontFPS = nullptr; // FPS std::deque _frameTimes; UINT _validFrames = 0; @@ -47,11 +64,12 @@ class OverlayDrawer { struct { std::string gpuName; - std::string cpuName; } _hardwareInfo; std::unique_ptr _imguiImpl; + winrt::ResourceLoader _resourceLoader = winrt::ResourceLoader::GetForViewIndependentUse(); + bool _isUIVisiable = false; bool _isSrcMainWnd = false; }; diff --git a/src/Magpie.Core/Renderer.cpp b/src/Magpie.Core/Renderer.cpp index e65ae0950..98d7c4cea 100644 --- a/src/Magpie.Core/Renderer.cpp +++ b/src/Magpie.Core/Renderer.cpp @@ -36,8 +36,8 @@ bool Renderer::Initialize() { if (MagApp::Get().GetOptions().IsShowFPS()) { _overlayDrawer.reset(new OverlayDrawer()); if (!_overlayDrawer->Initialize()) { + _overlayDrawer.reset(); Logger::Get().Error("初始化 OverlayDrawer 失败"); - return false; } } @@ -167,6 +167,7 @@ void Renderer::SetUIVisibility(bool value) { if (!_overlayDrawer) { _overlayDrawer.reset(new OverlayDrawer()); if (!_overlayDrawer->Initialize()) { + _overlayDrawer.reset(); Logger::Get().Error("初始化 OverlayDrawer 失败"); return; } @@ -237,15 +238,16 @@ const EffectDesc& Renderer::GetEffectDesc(uint32_t idx) const noexcept { } // 0 -> 可继续缩放 -// 1 -> 前台窗口改变或源窗口最大化/最小化 -// 2 -> 源窗口大小或位置改变 +// 1 -> 前台窗口改变或源窗口最大化(如果不允许缩放最大化的窗口)/最小化 +// 2 -> 源窗口大小或位置改变或最大化(如果允许缩放最大化的窗口) int Renderer::_CheckSrcState() { HWND hwndSrc = MagApp::Get().GetHwndSrc(); + const MagOptions& options = MagApp::Get().GetOptions(); - if (!MagApp::Get().GetOptions().IsDebugMode()) { + if (!options.IsDebugMode()) { HWND hwndForeground = GetForegroundWindow(); // 在 3D 游戏模式下打开游戏内叠加层则全屏窗口可以接收焦点 - if (!MagApp::Get().GetOptions().Is3DGameMode() || !IsUIVisiable() || hwndForeground != MagApp::Get().GetHwndHost()) { + if (!options.Is3DGameMode() || !IsUIVisiable() || hwndForeground != MagApp::Get().GetHwndHost()) { if (hwndForeground && hwndForeground != hwndSrc && !CheckForeground(hwndForeground)) { Logger::Get().Info("前台窗口已改变"); return 1; @@ -253,7 +255,8 @@ int Renderer::_CheckSrcState() { } } - if (Win32Utils::GetWindowShowCmd(hwndSrc) != SW_NORMAL) { + UINT showCmd = Win32Utils::GetWindowShowCmd(hwndSrc); + if (showCmd != SW_NORMAL && (showCmd != SW_SHOWMAXIMIZED || !options.IsAllowScalingMaximized())) { Logger::Get().Info("源窗口显示状态改变"); return 1; } diff --git a/src/Magpie.Core/YasHelper.h b/src/Magpie.Core/YasHelper.h new file mode 100644 index 000000000..2eaa646d4 --- /dev/null +++ b/src/Magpie.Core/YasHelper.h @@ -0,0 +1,66 @@ +#pragma once +// YAS 暂不支持 ARM64 +// https://github.com/niXman/yas/pull/121 +#ifdef _M_ARM64 +#define _LITTLE_ENDIAN +#endif +#pragma warning(push) +// C4458:“size”的声明隐藏了类成员 +// C4127:条件表达式是常量 +#pragma warning(disable: 4458 4127) +#include +#include +#include +#include +#include +#include +#include +#include +#pragma warning(pop) + +#include "SmallVector.h" + +namespace yas::detail { + +// 可平凡复制类型 +// 注意不检查指针成员 +template +struct serializer< + type_prop::not_a_fundamental, + ser_case::use_internal_serializer, + F, + T +> { + template && !std::is_pointer_v, T>> + static Archive& save(Archive& ar, const T& o) noexcept { + ar.write(&o, sizeof(T)); + return ar; + } + + template && !std::is_pointer_v, T>> + static Archive& load(Archive& ar, T& o) noexcept { + ar.read(&o, sizeof(T)); + return ar; + } +}; + +// SmallVector +template +struct serializer< + type_prop::not_a_fundamental, + ser_case::use_internal_serializer, + F, + SmallVector +> { + template + static Archive& save(Archive& ar, const SmallVector& vector) noexcept { + return concepts::array::save(ar, vector); + } + + template + static Archive& load(Archive& ar, SmallVector& vector) noexcept { + return concepts::array::load(ar, vector); + } +}; + +} diff --git a/src/Magpie.Core/conanfile.txt b/src/Magpie.Core/conanfile.txt new file mode 100644 index 000000000..91d9a0bda --- /dev/null +++ b/src/Magpie.Core/conanfile.txt @@ -0,0 +1,16 @@ +[requires] +fmt/9.1.0 +spdlog/1.11.0 +parallel-hashmap/1.37 +muparser/2.3.4 +yas/7.1.0 +imgui/1.89.4 + +[generators] +visual_studio + +[options] +fmt:header_only=True +spdlog:header_only=True +spdlog:no_exceptions=True + diff --git a/src/Magpie/Magpie.rc b/src/Magpie/Magpie.rc index 27f7bddf5..eacc50112 100644 --- a/src/Magpie/Magpie.rc +++ b/src/Magpie/Magpie.rc @@ -62,8 +62,8 @@ IDI_APP ICON "Magpie.ico" // VS_VERSION_INFO VERSIONINFO - FILEVERSION 0,10,2,0 - PRODUCTVERSION 0,10,2,0 + FILEVERSION 0,10,3,0 + PRODUCTVERSION 0,10,3,0 FILEFLAGSMASK 0x3fL #ifdef _DEBUG FILEFLAGS 0x1L @@ -78,12 +78,12 @@ BEGIN BEGIN BLOCK "000004b0" BEGIN - VALUE "FileVersion", "0.10.2.0" + VALUE "FileVersion", "0.10.3.0" VALUE "InternalName", "Magpie.exe" VALUE "LegalCopyright", "Copyright (C) 2023 Liu Xu" VALUE "OriginalFilename", "Magpie.exe" VALUE "ProductName", "Magpie" - VALUE "ProductVersion", "0.10.2.0" + VALUE "ProductVersion", "0.10.3.0" END END BLOCK "VarFileInfo" diff --git a/src/Magpie/Magpie.vcxproj b/src/Magpie/Magpie.vcxproj index 4e98f667e..c921aeadd 100644 --- a/src/Magpie/Magpie.vcxproj +++ b/src/Magpie/Magpie.vcxproj @@ -121,12 +121,17 @@ + + + false + + - + @@ -136,6 +141,6 @@ - + \ No newline at end of file diff --git a/src/Magpie/Magpie.vcxproj.filters b/src/Magpie/Magpie.vcxproj.filters index c8dc56aa0..cfe3deace 100644 --- a/src/Magpie/Magpie.vcxproj.filters +++ b/src/Magpie/Magpie.vcxproj.filters @@ -54,4 +54,7 @@ Resources + + + \ No newline at end of file diff --git a/src/Magpie/MainWindow.cpp b/src/Magpie/MainWindow.cpp index 851acc216..ceea9effc 100644 --- a/src/Magpie/MainWindow.cpp +++ b/src/Magpie/MainWindow.cpp @@ -8,15 +8,24 @@ namespace Magpie { bool MainWindow::Create(HINSTANCE hInstance, const RECT& windowRect, bool isMaximized) noexcept { - WNDCLASSEXW wcex{}; - wcex.cbSize = sizeof(wcex); - wcex.lpfnWndProc = _WndProc; - wcex.hInstance = hInstance; - wcex.hIcon = LoadIcon(hInstance, MAKEINTRESOURCE(CommonSharedConstants::IDI_APP)); - wcex.hCursor = LoadCursor(nullptr, IDC_ARROW); - wcex.lpszClassName = CommonSharedConstants::MAIN_WINDOW_CLASS_NAME; + static const int _ = [](HINSTANCE hInstance) { + WNDCLASSEXW wcex{}; + wcex.cbSize = sizeof(wcex); + wcex.lpfnWndProc = _WndProc; + wcex.hInstance = hInstance; + wcex.hIcon = LoadIcon(hInstance, MAKEINTRESOURCE(CommonSharedConstants::IDI_APP)); + wcex.hCursor = LoadCursor(nullptr, IDC_ARROW); + wcex.lpszClassName = CommonSharedConstants::MAIN_WINDOW_CLASS_NAME; + RegisterClassEx(&wcex); - RegisterClassEx(&wcex); + wcex.style = CS_DBLCLKS; + wcex.lpfnWndProc = _TitleBarWndProc; + wcex.hIcon = NULL; + wcex.lpszClassName = CommonSharedConstants::TITLE_BAR_WINDOW_CLASS_NAME; + RegisterClassEx(&wcex); + + return 0; + }(hInstance); // Win11 22H2 中为了使用 Mica 背景需指定 WS_EX_NOREDIRECTIONBITMAP CreateWindowEx( @@ -25,8 +34,8 @@ bool MainWindow::Create(HINSTANCE hInstance, const RECT& windowRect, bool isMaxi L"Magpie", WS_OVERLAPPEDWINDOW, windowRect.left, windowRect.top, windowRect.right, windowRect.bottom, - nullptr, - nullptr, + NULL, + NULL, hInstance, this ); @@ -37,22 +46,87 @@ bool MainWindow::Create(HINSTANCE hInstance, const RECT& windowRect, bool isMaxi _SetContent(winrt::Magpie::App::MainPage()); - // Xaml 控件加载完成后显示主窗口 - _content.Loaded([this, isMaximized](winrt::IInspectable const&, winrt::RoutedEventArgs const&) -> winrt::IAsyncAction { - co_await _content.Dispatcher().RunAsync(winrt::CoreDispatcherPriority::Normal, [hWnd(_hWnd), isMaximized]() { - // 防止窗口显示时背景闪烁 - // https://stackoverflow.com/questions/69715610/how-to-initialize-the-background-color-of-win32-app-to-something-other-than-whit - SetWindowPos(hWnd, NULL, 0, 0, 0, 0, SWP_NOMOVE | SWP_NOSIZE); - ShowWindow(hWnd, isMaximized ? SW_SHOWMAXIMIZED : SW_SHOWNORMAL); - Win32Utils::SetForegroundWindow(hWnd); - }); - }); - _content.ActualThemeChanged([this](winrt::FrameworkElement const&, winrt::IInspectable const&) { _UpdateTheme(); }); _UpdateTheme(); + // 窗口尚未显示无法最大化,所以我们设置 _isMaximized 使 XamlWindow 估计 XAML Islands 窗口尺寸。 + // 否则在显示窗口时可能会看到 NavigationView 的导航栏的展开动画。 + _isMaximized = isMaximized; + + // 1. 设置初始 XAML Islands 窗口的尺寸 + // 2. 刷新窗口边框 + // 3. 防止窗口显示时背景闪烁: https://stackoverflow.com/questions/69715610/how-to-initialize-the-background-color-of-win32-app-to-something-other-than-whit + SetWindowPos(_hWnd, NULL, 0, 0, 0, 0, SWP_NOMOVE | SWP_NOSIZE | SWP_FRAMECHANGED); + + // Xaml 控件加载完成后显示主窗口 + _content.Loaded([this, isMaximized](winrt::IInspectable const&, winrt::RoutedEventArgs const&) { + if (isMaximized) { + // ShowWindow(_hWnd, SW_SHOWMAXIMIZED) 会显示错误的动画。因此我们以窗口化显示, + // 但位置和大小都和最大化相同,显示完毕后将状态设为最大化。 + // + // 在此过程中,_isMaximized 始终是 true。 + + // 保存原始窗口化位置 + WINDOWPLACEMENT wp{}; + wp.length = sizeof(wp); + GetWindowPlacement(_hWnd, &wp); + + // 查询最大化窗口位置 + if (HMONITOR hMon = MonitorFromWindow(_hWnd, MONITOR_DEFAULTTONEAREST)) { + MONITORINFO mi{}; + mi.cbSize = sizeof(mi); + GetMonitorInfo(hMon, &mi); + + // 播放窗口显示动画 + SetWindowPos( + _hWnd, + NULL, + mi.rcWork.left, + mi.rcWork.top, + mi.rcMonitor.right - mi.rcMonitor.left, + mi.rcMonitor.bottom - mi.rcMonitor.top, + SWP_NOACTIVATE | SWP_NOZORDER | SWP_SHOWWINDOW + ); + } + + // 将状态设为最大化,也还原了原始的窗口化位置 + wp.showCmd = SW_SHOWMAXIMIZED; + SetWindowPlacement(_hWnd, &wp); + } else { + ShowWindow(_hWnd, SW_SHOWNORMAL); + } + + Win32Utils::SetForegroundWindow(_hWnd); + + _isWindowShown = true; + }); + + // 创建标题栏窗口,它是主窗口的子窗口。我们将它置于 XAML Islands 窗口之上以防止鼠标事件被吞掉 + // + // 出于未知的原因,必须添加 WS_EX_LAYERED 样式才能发挥作用,见 + // https://github.com/microsoft/terminal/blob/0ee2c74cd432eda153f3f3e77588164cde95044f/src/cascadia/WindowsTerminal/NonClientIslandWindow.cpp#L79 + // WS_EX_NOREDIRECTIONBITMAP 可以避免 WS_EX_LAYERED 导致的额外内存开销 + // + // WS_MINIMIZEBOX 和 WS_MAXIMIZEBOX 使得鼠标悬停时显示文字提示,Win11 的贴靠布局不依赖它们 + CreateWindowEx( + WS_EX_LAYERED | WS_EX_NOPARENTNOTIFY | WS_EX_NOREDIRECTIONBITMAP | WS_EX_NOACTIVATE, + CommonSharedConstants::TITLE_BAR_WINDOW_CLASS_NAME, + L"", + WS_CHILD | WS_MINIMIZEBOX | WS_MAXIMIZEBOX, + 0, 0, 0, 0, + _hWnd, + nullptr, + hInstance, + this + ); + SetLayeredWindowAttributes(_hwndTitleBar, 0, 255, LWA_ALPHA); + + _content.TitleBar().SizeChanged([this](winrt::IInspectable const&, winrt::SizeChangedEventArgs const&) { + _ResizeTitleBarWindow(); + }); + return true; } @@ -66,16 +140,64 @@ void MainWindow::Show() const noexcept { LRESULT MainWindow::_MessageHandler(UINT msg, WPARAM wParam, LPARAM lParam) noexcept { switch (msg) { + case WM_SIZE: + { + LRESULT ret = base_type::_MessageHandler(WM_SIZE, wParam, lParam); + _ResizeTitleBarWindow(); + _content.TitleBar().CaptionButtons().IsWindowMaximized(_isMaximized); + return ret; + } case WM_GETMINMAXINFO: { // 设置窗口最小尺寸 MINMAXINFO* mmi = (MINMAXINFO*)lParam; - mmi->ptMinTrackSize = { 500,300 }; + mmi->ptMinTrackSize = { + std::lround(550 * _currentDpi / double(USER_DEFAULT_SCREEN_DPI)), + std::lround(300 * _currentDpi / double(USER_DEFAULT_SCREEN_DPI)) + }; return 0; } + case WM_NCRBUTTONUP: + { + // 我们自己处理标题栏右键,不知为何 DefWindowProc 没有作用 + if (wParam == HTCAPTION) { + HMENU systemMenu = GetSystemMenu(_hWnd, FALSE); + + // 根据窗口状态更新选项 + MENUITEMINFO mii{}; + mii.cbSize = sizeof(MENUITEMINFO); + mii.fMask = MIIM_STATE; + mii.fType = MFT_STRING; + auto setState = [&](UINT item, bool enabled) { + mii.fState = enabled ? MF_ENABLED : MF_DISABLED; + SetMenuItemInfo(systemMenu, item, FALSE, &mii); + }; + setState(SC_RESTORE, _isMaximized); + setState(SC_MOVE, !_isMaximized); + setState(SC_SIZE, !_isMaximized); + setState(SC_MINIMIZE, true); + setState(SC_MAXIMIZE, !_isMaximized); + setState(SC_CLOSE, true); + SetMenuDefaultItem(systemMenu, UINT_MAX, FALSE); + + BOOL cmd = TrackPopupMenu(systemMenu, TPM_RETURNCMD, + GET_X_LPARAM(lParam), GET_Y_LPARAM(lParam), 0, _hWnd, nullptr); + if (cmd != 0) { + PostMessage(_hWnd, WM_SYSCOMMAND, cmd, 0); + } + } + break; + } + case WM_ACTIVATE: + { + _content.TitleBar().IsWindowActive(LOWORD(wParam) != WA_INACTIVE); + break; + } case WM_DESTROY: { XamlApp::Get().SaveSettings(); + _hwndTitleBar = NULL; + _trackingMouse = false; break; } case CommonSharedConstants::WM_QUIT_MAGPIE: @@ -93,27 +215,227 @@ LRESULT MainWindow::_MessageHandler(UINT msg, WPARAM wParam, LPARAM lParam) noex } void MainWindow::_UpdateTheme() { - const bool isDarkTheme = _content.ActualTheme() == winrt::ElementTheme::Dark; - - if (Win32Utils::GetOSVersion().Is22H2OrNewer()) { - // 设置 Mica 背景 - DWM_SYSTEMBACKDROP_TYPE value = DWMSBT_MAINWINDOW; - DwmSetWindowAttribute(_hWnd, DWMWA_SYSTEMBACKDROP_TYPE, &value, sizeof(value)); - } else { - // 更改背景色以配合主题 - // 背景色在更改窗口大小时会短暂可见 - HBRUSH hbrOld = (HBRUSH)SetClassLongPtr( - _hWnd, - GCLP_HBRBACKGROUND, - (INT_PTR)CreateSolidBrush(isDarkTheme ? - CommonSharedConstants::DARK_TINT_COLOR : CommonSharedConstants::LIGHT_TINT_COLOR)); - if (hbrOld) { - DeleteObject(hbrOld); + XamlWindowT::_SetTheme(_content.ActualTheme() == winrt::ElementTheme::Dark); +} + +LRESULT MainWindow::_TitleBarWndProc(HWND hWnd, UINT msg, WPARAM wParam, LPARAM lParam) noexcept { + if (msg == WM_NCCREATE) { + MainWindow* that = (MainWindow*)(((CREATESTRUCT*)lParam)->lpCreateParams); + assert(that && !that->_hwndTitleBar); + that->_hwndTitleBar = hWnd; + SetWindowLongPtr(hWnd, GWLP_USERDATA, (LONG_PTR)that); + } else if (MainWindow* that = (MainWindow*)GetWindowLongPtr(hWnd, GWLP_USERDATA)) { + return that->_TitleBarMessageHandler(msg, wParam, lParam); + } + + return DefWindowProc(hWnd, msg, wParam, lParam); +} + +LRESULT MainWindow::_TitleBarMessageHandler(UINT msg, WPARAM wParam, LPARAM lParam) noexcept { + switch (msg) { + case WM_NCHITTEST: + { + POINT cursorPos{ GET_X_LPARAM(lParam),GET_Y_LPARAM(lParam) }; + ScreenToClient(_hwndTitleBar, &cursorPos); + + RECT titleBarClientRect; + GetClientRect(_hwndTitleBar, &titleBarClientRect); + if (!PtInRect(&titleBarClientRect, cursorPos)) { + // 先检查鼠标是否在窗口内。在标题栏按钮上按下鼠标时我们会捕获光标,从而收到 WM_MOUSEMOVE 和 WM_LBUTTONUP 消息。 + // 它们使用 WM_NCHITTEST 测试鼠标位于哪个区域 + return HTNOWHERE; + } + + if (!_isMaximized && cursorPos.y + (int)_GetTopBorderHeight() < _GetResizeHandleHeight()) { + // 鼠标位于上边框 + return HTTOP; + } + + static const winrt::Size buttonSizeInDips = [this]() { + return _content.TitleBar().CaptionButtons().CaptionButtonSize(); + }(); + + const float buttonWidthInPixels = buttonSizeInDips.Width * _currentDpi / USER_DEFAULT_SCREEN_DPI; + const float buttonHeightInPixels = buttonSizeInDips.Height * _currentDpi / USER_DEFAULT_SCREEN_DPI; + + if (cursorPos.y >= buttonHeightInPixels) { + // 鼠标位于标题按钮下方,如果标题栏很宽,这里也可以拖动 + return HTCAPTION; + } + + // 从右向左检查鼠标是否位于某个标题栏按钮上 + const LONG cursorToRight = titleBarClientRect.right - cursorPos.x; + if (cursorToRight < buttonWidthInPixels) { + return HTCLOSE; + } else if (cursorToRight < buttonWidthInPixels * 2) { + // 支持 Win11 的贴靠布局 + // FIXME: 最大化时贴靠布局的位置不对,目前没有找到解决方案。似乎只适配了系统原生框架和 UWP + return HTMAXBUTTON; + } else if (cursorToRight < buttonWidthInPixels * 3) { + return HTMINBUTTON; + } else { + // 不在任何标题栏按钮上则在可拖拽区域 + return HTCAPTION; + } + } + // 在捕获光标时会收到 + case WM_MOUSEMOVE: + { + POINT cursorPos{ GET_X_LPARAM(lParam),GET_Y_LPARAM(lParam) }; + ClientToScreen(_hwndTitleBar, &cursorPos); + wParam = SendMessage(_hwndTitleBar, WM_NCHITTEST, 0, MAKELPARAM(cursorPos.x, cursorPos.y)); + } + [[fallthrough]]; + case WM_NCMOUSEMOVE: + { + auto captionButtons = _content.TitleBar().CaptionButtons(); + + // 将 hover 状态通知 CaptionButtons。标题栏窗口拦截了 XAML Islands 中的标题栏 + // 控件的鼠标消息,标题栏按钮的状态由我们手动控制。 + switch (wParam) { + case HTTOP: + case HTCAPTION: + { + captionButtons.LeaveButtons(); + + // 将 HTTOP 传给主窗口才能通过上边框调整窗口高度 + return SendMessage(_hWnd, msg, wParam, lParam); + } + case HTMINBUTTON: + case HTMAXBUTTON: + case HTCLOSE: + captionButtons.HoverButton((winrt::Magpie::App::CaptionButton)wParam); + + // 追踪鼠标以确保鼠标离开标题栏时我们能收到 WM_NCMOUSELEAVE 消息,否则无法 + // 可靠的收到这个消息,尤其是在用户快速移动鼠标的时候。 + if (!_trackingMouse && msg == WM_NCMOUSEMOVE) { + TRACKMOUSEEVENT ev{}; + ev.cbSize = sizeof(TRACKMOUSEEVENT); + ev.dwFlags = TME_LEAVE | TME_NONCLIENT; + ev.hwndTrack = _hwndTitleBar; + ev.dwHoverTime = HOVER_DEFAULT; // 不关心 HOVER 消息 + TrackMouseEvent(&ev); + _trackingMouse = true; + } + + break; + default: + captionButtons.LeaveButtons(); + } + break; + } + case WM_NCMOUSELEAVE: + case WM_MOUSELEAVE: + { + // 我们需要检查鼠标是否**真的**离开了标题栏按钮,因为在某些情况下 OS 会错误汇报。 + // 比如:鼠标在关闭按钮上停留了一段时间,系统会显示文字提示,这时按下左键,便会收 + // 到 WM_NCMOUSELEAVE,但此时鼠标并没有离开标题栏按钮 + POINT cursorPos; + GetCursorPos(&cursorPos); + // 先检查鼠标是否在主窗口上,如果正在显示文字提示,会返回 _hwndTitleBar + HWND hwndUnderCursor = WindowFromPoint(cursorPos); + if (hwndUnderCursor != _hWnd && hwndUnderCursor != _hwndTitleBar) { + _content.TitleBar().CaptionButtons().LeaveButtons(); + } else { + // 然后检查鼠标在标题栏上的位置 + LRESULT hit = SendMessage(_hwndTitleBar, WM_NCHITTEST, 0, MAKELPARAM(cursorPos.x, cursorPos.y)); + if (hit != HTMINBUTTON && hit != HTMAXBUTTON && hit != HTCLOSE) { + _content.TitleBar().CaptionButtons().LeaveButtons(); + } + } + + _trackingMouse = false; + break; + } + case WM_NCLBUTTONDOWN: + case WM_NCLBUTTONDBLCLK: + { + // 手动处理标题栏上的点击。如果在标题栏按钮上,则通知 CaptionButtons,否则将消息传递 + // 给主窗口。 + switch (wParam) { + case HTTOP: + case HTCAPTION: + { + // 将 HTTOP 传给主窗口才能通过上边框调整窗口高度 + return SendMessage(_hWnd, msg, wParam, lParam); + } + case HTMINBUTTON: + case HTMAXBUTTON: + case HTCLOSE: + _content.TitleBar().CaptionButtons().PressButton((winrt::Magpie::App::CaptionButton)wParam); + // 在标题栏按钮上按下左键后我们便捕获光标,这样才能在释放时得到通知。注意捕获光标后 + // 便不会再收到 NC 族消息,这就是为什么我们要处理 WM_MOUSEMOVE 和 WM_LBUTTONUP + SetCapture(_hwndTitleBar); + break; } - InvalidateRect(_hWnd, nullptr, TRUE); + return 0; } + // 在捕获光标时会收到 + case WM_LBUTTONUP: + { + ReleaseCapture(); - ThemeHelper::SetWindowTheme(_hWnd, isDarkTheme); + POINT cursorPos{ GET_X_LPARAM(lParam),GET_Y_LPARAM(lParam) }; + ClientToScreen(_hwndTitleBar, &cursorPos); + wParam = SendMessage(_hwndTitleBar, WM_NCHITTEST, 0, MAKELPARAM(cursorPos.x, cursorPos.y)); + } + [[fallthrough]]; + case WM_NCLBUTTONUP: + { + // 处理鼠标在标题栏上释放。如果位于标题栏按钮上,则传递给 CaptionButtons,不在则将消息传递给主窗口 + switch (wParam) { + case HTTOP: + case HTCAPTION: + { + // 在可拖拽区域或上边框释放左键,将此消息传递给主窗口 + _content.TitleBar().CaptionButtons().ReleaseButtons(); + return SendMessage(_hWnd, msg, wParam, lParam); + } + case HTMINBUTTON: + case HTMAXBUTTON: + case HTCLOSE: + // 在标题栏按钮上释放左键 + _content.TitleBar().CaptionButtons().ReleaseButton((winrt::Magpie::App::CaptionButton)wParam); + break; + default: + _content.TitleBar().CaptionButtons().ReleaseButtons(); + } + + return 0; + } + case WM_NCRBUTTONDOWN: + case WM_NCRBUTTONDBLCLK: + case WM_NCRBUTTONUP: + // 不关心右键,将它们传递给主窗口 + return SendMessage(_hWnd, msg, wParam, lParam); + } + + return DefWindowProc(_hwndTitleBar, msg, wParam, lParam); +} + +void MainWindow::_ResizeTitleBarWindow() noexcept { + if (!_hwndTitleBar) { + return; + } + + auto titleBar = _content.TitleBar(); + + // 获取标题栏的边框矩形 + winrt::Rect rect{0.0f, 0.0f, (float)titleBar.ActualWidth(), (float)titleBar.ActualHeight()}; + rect = titleBar.TransformToVisual(_content).TransformBounds(rect); + + const float dpiScale = _currentDpi / float(USER_DEFAULT_SCREEN_DPI); + + // 将标题栏窗口置于 XAML Islands 窗口上方 + SetWindowPos( + _hwndTitleBar, + HWND_TOP, + (int)std::floorf(rect.X * dpiScale), + (int)std::floorf(rect.Y * dpiScale) + _GetTopBorderHeight(), + (int)std::ceilf(rect.Width * dpiScale), + (int)std::floorf(rect.Height * dpiScale + 1), // 不知为何,直接向上取整有时无法遮盖 TitleBarControl + SWP_SHOWWINDOW + ); } } diff --git a/src/Magpie/MainWindow.h b/src/Magpie/MainWindow.h index 81b96cd25..54dd10eba 100644 --- a/src/Magpie/MainWindow.h +++ b/src/Magpie/MainWindow.h @@ -17,7 +17,14 @@ class MainWindow : public XamlWindowT private: void _UpdateTheme(); - bool _isMainWndMaximized = false; + static LRESULT CALLBACK _TitleBarWndProc(HWND hWnd, UINT msg, WPARAM wParam, LPARAM lParam) noexcept; + + LRESULT _TitleBarMessageHandler(UINT msg, WPARAM wParam, LPARAM lParam) noexcept; + + void _ResizeTitleBarWindow() noexcept; + + HWND _hwndTitleBar = NULL; + bool _trackingMouse = false; }; } diff --git a/src/Magpie/ThemeHelper.cpp b/src/Magpie/ThemeHelper.cpp index d09bd6818..0ba5eebc7 100644 --- a/src/Magpie/ThemeHelper.cpp +++ b/src/Magpie/ThemeHelper.cpp @@ -45,17 +45,17 @@ void ThemeHelper::Initialize() noexcept { RefreshImmersiveColorPolicyState(); } -void ThemeHelper::SetWindowTheme(HWND hWnd, bool isDark) noexcept { +void ThemeHelper::SetWindowTheme(HWND hWnd, bool darkBorder, bool darkMenu) noexcept { InitApis(); - SetPreferredAppMode(isDark ? PreferredAppMode::ForceDark : PreferredAppMode::ForceLight); - AllowDarkModeForWindow(hWnd, isDark); + SetPreferredAppMode(darkMenu ? PreferredAppMode::ForceDark : PreferredAppMode::ForceLight); + AllowDarkModeForWindow(hWnd, darkMenu); // 使标题栏适应黑暗模式 // build 18985 之前 DWMWA_USE_IMMERSIVE_DARK_MODE 的值不同 // https://github.com/MicrosoftDocs/sdk-api/pull/966/files constexpr const DWORD DWMWA_USE_IMMERSIVE_DARK_MODE_BEFORE_20H1 = 19; - BOOL value = isDark; + BOOL value = darkBorder; DwmSetWindowAttribute( hWnd, Win32Utils::GetOSVersion().Is20H1OrNewer() ? DWMWA_USE_IMMERSIVE_DARK_MODE : DWMWA_USE_IMMERSIVE_DARK_MODE_BEFORE_20H1, @@ -65,19 +65,6 @@ void ThemeHelper::SetWindowTheme(HWND hWnd, bool isDark) noexcept { RefreshImmersiveColorPolicyState(); FlushMenuThemes(); - - const Win32Utils::OSVersion& osVersion = Win32Utils::GetOSVersion(); - if (osVersion.Is22H2OrNewer()) { - return; - } - - LONG_PTR style = GetWindowLongPtr(hWnd, GWL_EXSTYLE); - if (!osVersion.IsWin11()) { - // 在 Win10 上需要更多 hack - SetWindowLongPtr(hWnd, GWL_EXSTYLE, style | WS_EX_LAYERED); - SetLayeredWindowAttributes(hWnd, 0, 254, LWA_ALPHA); - } - SetWindowLongPtr(hWnd, GWL_EXSTYLE, style); } } diff --git a/src/Magpie/ThemeHelper.h b/src/Magpie/ThemeHelper.h index 19ac75671..a31f5fc6e 100644 --- a/src/Magpie/ThemeHelper.h +++ b/src/Magpie/ThemeHelper.h @@ -5,7 +5,7 @@ namespace Magpie { struct ThemeHelper { // 应用程序启动时调用一次 static void Initialize() noexcept; - static void SetWindowTheme(HWND hWnd, bool isDark) noexcept; + static void SetWindowTheme(HWND hWnd, bool darkBorder, bool darkMenu) noexcept; }; } diff --git a/src/Magpie/XamlWindow.h b/src/Magpie/XamlWindow.h index d465a8fe4..da97f5bbb 100644 --- a/src/Magpie/XamlWindow.h +++ b/src/Magpie/XamlWindow.h @@ -2,6 +2,11 @@ #include #include #include "XamlUtils.h" +#include "Win32Utils.h" +#include "ThemeHelper.h" +#include "CommonSharedConstants.h" + +#pragma comment(lib, "uxtheme.lib") namespace Magpie { @@ -92,13 +97,212 @@ class XamlWindowT { sender.NavigateFocus(args.Request()); } }); + } + + void _SetTheme(bool isDarkTheme) noexcept { + _isDarkTheme = isDarkTheme; - // 防止第一次收到 WM_SIZE 消息时 MainPage 尺寸为 0 - _OnResize(); + // Win10 中即使在亮色主题下我们也使用暗色边框,这也是 UWP 窗口的行为 + ThemeHelper::SetWindowTheme( + _hWnd, + Win32Utils::GetOSVersion().IsWin11() ? isDarkTheme : true, + isDarkTheme + ); + + if (Win32Utils::GetOSVersion().Is22H2OrNewer()) { + // 设置 Mica 背景 + DWM_SYSTEMBACKDROP_TYPE value = DWMSBT_MAINWINDOW; + DwmSetWindowAttribute(_hWnd, DWMWA_SYSTEMBACKDROP_TYPE, &value, sizeof(value)); + return; + } + + if (Win32Utils::GetOSVersion().IsWin11()) { + // Win11 21H1/21H2 对 Mica 的支持不完善,改为使用纯色背景。Win10 在 WM_PAINT 中 + // 绘制背景。背景色在更改窗口大小时会短暂可见。 + HBRUSH hbrOld = (HBRUSH)SetClassLongPtr( + _hWnd, + GCLP_HBRBACKGROUND, + (INT_PTR)CreateSolidBrush(isDarkTheme ? + CommonSharedConstants::DARK_TINT_COLOR : CommonSharedConstants::LIGHT_TINT_COLOR)); + if (hbrOld) { + DeleteObject(hbrOld); + } + } + + // 立即重新绘制 + InvalidateRect(_hWnd, nullptr, FALSE); + UpdateWindow(_hWnd); } LRESULT _MessageHandler(UINT msg, WPARAM wParam, LPARAM lParam) noexcept { switch (msg) { + case WM_CREATE: + { + _currentDpi = GetDpiForWindow(_hWnd); + + _UpdateFrameMargins(); + + if (!Win32Utils::GetOSVersion().IsWin11()) { + // 初始化双缓冲绘图 + static const int _ = []() { + BufferedPaintInit(); + return 0; + }(); + } + + break; + } + case WM_NCCALCSIZE: + { + // 移除标题栏的逻辑基本来自 Windows Terminal + // https://github.com/microsoft/terminal/blob/0ee2c74cd432eda153f3f3e77588164cde95044f/src/cascadia/WindowsTerminal/NonClientIslandWindow.cpp + + if (!wParam) { + return 0; + } + + NCCALCSIZE_PARAMS* params = (NCCALCSIZE_PARAMS*)lParam; + RECT& clientRect = params->rgrc[0]; + + // 保存原始上边框位置 + const LONG originalTop = clientRect.top; + + // 应用默认边框 + LRESULT ret = DefWindowProc(_hWnd, WM_NCCALCSIZE, wParam, lParam); + if (ret != 0) { + return ret; + } + + // 重新应用原始上边框,因此我们完全移除了默认边框中的上边框和标题栏,但保留了其他方向的边框 + clientRect.top = originalTop; + + // WM_NCCALCSIZE 在 WM_SIZE 前 + _UpdateMaximizedState(); + + if (_isMaximized) { + // 最大化的窗口的实际尺寸比屏幕的工作区更大一点,这是为了将可调整窗口大小的区域隐藏在屏幕外面 + clientRect.top += _GetResizeHandleHeight(); + + // 如果有自动隐藏的任务栏,我们在它的方向稍微减小客户区,这样用户就可以用鼠标呼出任务栏 + if (HMONITOR hMon = MonitorFromWindow(_hWnd, MONITOR_DEFAULTTONEAREST)) { + MONITORINFO monInfo{}; + monInfo.cbSize = sizeof(MONITORINFO); + GetMonitorInfo(hMon, &monInfo); + + // 检查是否有自动隐藏的任务栏 + APPBARDATA appBarData{}; + appBarData.cbSize = sizeof(appBarData); + if (SHAppBarMessage(ABM_GETSTATE, &appBarData) & ABS_AUTOHIDE) { + // 检查显示器的一条边 + auto hasAutohideTaskbar = [&monInfo](UINT edge) -> bool { + APPBARDATA data{}; + data.cbSize = sizeof(data); + data.uEdge = edge; + data.rc = monInfo.rcMonitor; + HWND hTaskbar = (HWND)SHAppBarMessage(ABM_GETAUTOHIDEBAREX, &data); + return hTaskbar != nullptr; + }; + + static constexpr int AUTO_HIDE_TASKBAR_HEIGHT = 2; + + if (hasAutohideTaskbar(ABE_TOP)) { + clientRect.top += AUTO_HIDE_TASKBAR_HEIGHT; + } + if (hasAutohideTaskbar(ABE_BOTTOM)) { + clientRect.bottom -= AUTO_HIDE_TASKBAR_HEIGHT; + } + if (hasAutohideTaskbar(ABE_LEFT)) { + clientRect.left += AUTO_HIDE_TASKBAR_HEIGHT; + } + if (hasAutohideTaskbar(ABE_RIGHT)) { + clientRect.right -= AUTO_HIDE_TASKBAR_HEIGHT; + } + } + } + } + + return 0; + } + case WM_NCHITTEST: + { + // 让 OS 处理左右下三边,由于我们移除了标题栏,上边框会被视为客户区 + LRESULT originalRet = DefWindowProc(_hWnd, WM_NCHITTEST, 0, lParam); + if (originalRet != HTCLIENT) { + return originalRet; + } + + // XAML Islands 和它上面的标题栏窗口都会吞掉鼠标事件,因此能到达这里的唯一机会 + // 是上边框。保险起见做一些额外检查。 + + if (!_isMaximized) { + RECT rcWindow; + GetWindowRect(_hWnd, &rcWindow); + + if (GET_Y_LPARAM(lParam) < rcWindow.top + _GetResizeHandleHeight()) { + return HTTOP; + } + } + + return HTCAPTION; + } + case WM_PAINT: + { + if (Win32Utils::GetOSVersion().IsWin11()) { + break; + } + + PAINTSTRUCT ps{ 0 }; + HDC hdc = BeginPaint(_hWnd, &ps); + if (!hdc) { + return 0; + } + + const int topBorderHeight = (int)_GetTopBorderHeight(); + + // 在顶部绘制黑色实线以显示系统原始边框,见 _UpdateFrameMargins + if (ps.rcPaint.top < topBorderHeight) { + RECT rcTopBorder = ps.rcPaint; + rcTopBorder.bottom = topBorderHeight; + + static HBRUSH hBrush = GetStockBrush(BLACK_BRUSH); + FillRect(hdc, &rcTopBorder, hBrush); + } + + // 绘制客户区,它会在调整窗口尺寸时短暂可见 + if (ps.rcPaint.bottom > topBorderHeight) { + RECT rcRest = ps.rcPaint; + rcRest.top = topBorderHeight; + + static bool isDarkBrush = _isDarkTheme; + static HBRUSH backgroundBrush = CreateSolidBrush(isDarkBrush ? + CommonSharedConstants::DARK_TINT_COLOR : CommonSharedConstants::LIGHT_TINT_COLOR); + + if (isDarkBrush != _isDarkTheme) { + isDarkBrush = _isDarkTheme; + DeleteBrush(backgroundBrush); + backgroundBrush = CreateSolidBrush(isDarkBrush ? + CommonSharedConstants::DARK_TINT_COLOR : CommonSharedConstants::LIGHT_TINT_COLOR); + } + + if (isDarkBrush) { + // 这里我们想要黑色背景而不是原始边框 + // hack 来自 https://github.com/microsoft/terminal/blob/0ee2c74cd432eda153f3f3e77588164cde95044f/src/cascadia/WindowsTerminal/NonClientIslandWindow.cpp#L1030-L1047 + HDC opaqueDc; + BP_PAINTPARAMS params = { sizeof(params), BPPF_NOCLIP | BPPF_ERASE }; + HPAINTBUFFER buf = BeginBufferedPaint(hdc, &rcRest, BPBF_TOPDOWNDIB, ¶ms, &opaqueDc); + if (buf && opaqueDc) { + FillRect(opaqueDc, &rcRest, backgroundBrush); + BufferedPaintSetAlpha(buf, nullptr, 255); + EndBufferedPaint(buf, TRUE); + } + } else { + FillRect(hdc, &rcRest, backgroundBrush); + } + } + + EndPaint(_hWnd, &ps); + return 0; + } case WM_SHOWWINDOW: { if (wParam == TRUE) { @@ -123,6 +327,8 @@ class XamlWindowT { } case WM_DPICHANGED: { + _currentDpi = HIWORD(wParam); + RECT* newRect = (RECT*)lParam; SetWindowPos(_hWnd, NULL, @@ -172,8 +378,10 @@ class XamlWindowT { } case WM_SIZE: { + _UpdateMaximizedState(); + if (wParam != SIZE_MINIMIZED) { - _OnResize(); + _UpdateIslandPosition(LOWORD(lParam), HIWORD(lParam)); if (_hwndXamlIsland) { // 使 ContentDialog 跟随窗口尺寸调整 @@ -192,6 +400,8 @@ class XamlWindowT { } } + _UpdateFrameMargins(); + return 0; } case WM_DESTROY: @@ -205,6 +415,10 @@ class XamlWindowT { _xamlSource = nullptr; _hwndXamlIsland = NULL; + _isMaximized = false; + _isWindowShown = false; + _isDarkTheme = false; + _content = nullptr; _destroyedEvent(); @@ -216,14 +430,89 @@ class XamlWindowT { return DefWindowProc(_hWnd, msg, wParam, lParam); } + uint32_t _GetTopBorderHeight() const noexcept { + static constexpr uint32_t TOP_BORDER_HEIGHT = 1; + + // Win11 或最大化时没有上边框 + return (Win32Utils::GetOSVersion().IsWin11() || _isMaximized) ? 0 : TOP_BORDER_HEIGHT; + } + + int _GetResizeHandleHeight() noexcept { + // 没有 SM_CYPADDEDBORDER + return GetSystemMetricsForDpi(SM_CXPADDEDBORDER, _currentDpi) + + GetSystemMetricsForDpi(SM_CYSIZEFRAME, _currentDpi); + } + HWND _hWnd = NULL; C _content{ nullptr }; + uint32_t _currentDpi = USER_DEFAULT_SCREEN_DPI; + bool _isMaximized = false; + bool _isWindowShown = false; + bool _isDarkTheme = false; + private: - void _OnResize() noexcept { - RECT clientRect; - GetClientRect(_hWnd, &clientRect); - SetWindowPos(_hwndXamlIsland, NULL, 0, 0, clientRect.right - clientRect.left, clientRect.bottom - clientRect.top, SWP_SHOWWINDOW | SWP_NOACTIVATE); + void _UpdateIslandPosition(int width, int height) const noexcept { + if (!IsWindowVisible(_hWnd) && _isMaximized) { + // 初始化过程中此函数会被调用两次。如果窗口以最大化显示,则两次传入的尺寸不一致。第一次 + // 调用此函数时主窗口尚未显示,因此无法最大化,我们必须估算最大化窗口的尺寸。不执行这个 + // 操作可能导致窗口显示时展示 NavigationView 导航展开的动画。 + if (HMONITOR hMon = MonitorFromWindow(_hWnd, MONITOR_DEFAULTTONEAREST)) { + MONITORINFO monInfo{}; + monInfo.cbSize = sizeof(MONITORINFO); + GetMonitorInfo(hMon, &monInfo); + + // 最大化窗口的尺寸为当前屏幕工作区的尺寸 + width = monInfo.rcWork.right - monInfo.rcMonitor.left; + height = monInfo.rcWork.bottom - monInfo.rcMonitor.top; + } + } + + int topBorderHeight = _GetTopBorderHeight(); + + // SWP_NOZORDER 确保 XAML Islands 窗口始终在标题栏窗口下方,否则主窗口在调整大小时会闪烁 + SetWindowPos( + _hwndXamlIsland, + NULL, + 0, + topBorderHeight, + width, + height - topBorderHeight, + SWP_NOACTIVATE | SWP_NOZORDER | SWP_SHOWWINDOW + ); + } + + void _UpdateMaximizedState() noexcept { + // 如果窗口尚未显示,不碰 _isMaximized + if (_isWindowShown) { + _isMaximized = IsMaximized(_hWnd); + } + } + + void _UpdateFrameMargins() const noexcept { + if (Win32Utils::GetOSVersion().IsWin11()) { + return; + } + + MARGINS margins{}; + if (_GetTopBorderHeight() > 0) { + // 在 Win10 中,移除标题栏时上边框也被没了。我们的解决方案是:使用 DwmExtendFrameIntoClientArea + // 将边框扩展到客户区,然后在顶部绘制了一个黑色实线来显示系统原始边框(这种情况下操作系统将黑色视 + // 为透明)。因此我们有**完美**的上边框! + // 见 https://docs.microsoft.com/en-us/windows/win32/dwm/customframe#extending-the-client-frame + // + // 有的软件自己绘制了假的上边框,如 Chromium 系、WinUI 3 等,但窗口失去焦点时边框是半透明的,无法 + // 完美模拟。 + // + // 我们选择扩展到标题栏高度,这是最好的选择。一个自然的想法是,既然上边框只有一个像素高,我们扩展一 + // 个像素即可,可惜因为 DWM 的 bug,这会使窗口失去焦点时上边框变为透明。那么能否传一个负值,让边框 + // 扩展到整个客户区?这大部分情况下可以工作,有一个小 bug:不显示边框颜色的设置下深色模式的边框会变 + // 为纯黑而不是半透明。 + RECT frame{}; + AdjustWindowRectExForDpi(&frame, GetWindowStyle(_hWnd), FALSE, 0, _currentDpi); + margins.cyTopHeight = -frame.top; + } + DwmExtendFrameIntoClientArea(_hWnd, &margins); } winrt::event> _destroyedEvent; diff --git a/src/Magpie/conanfile.txt b/src/Magpie/conanfile.txt new file mode 100644 index 000000000..26fceae9d --- /dev/null +++ b/src/Magpie/conanfile.txt @@ -0,0 +1,12 @@ +[requires] +fmt/9.1.0 +spdlog/1.11.0 +parallel-hashmap/1.37 + +[generators] +visual_studio + +[options] +fmt:header_only=True +spdlog:header_only=True +spdlog:no_exceptions=True diff --git a/src/Magpie/packages.config b/src/Magpie/packages.config index fb6919caa..b95621d37 100644 --- a/src/Magpie/packages.config +++ b/src/Magpie/packages.config @@ -1,6 +1,6 @@  - + \ No newline at end of file diff --git a/src/Shared/CommonPch.h b/src/Shared/CommonPch.h index 5ebe459ed..ccfdf98f8 100644 --- a/src/Shared/CommonPch.h +++ b/src/Shared/CommonPch.h @@ -37,6 +37,8 @@ #include namespace winrt { +using namespace Windows::ApplicationModel::Resources; +using namespace Windows::ApplicationModel::Resources::Core; using namespace Windows::Foundation; using namespace Windows::Foundation::Collections; using namespace Windows::Foundation::Metadata; diff --git a/src/Shared/CommonSharedConstants.h b/src/Shared/CommonSharedConstants.h index 7b521f5ee..cc6696d10 100644 --- a/src/Shared/CommonSharedConstants.h +++ b/src/Shared/CommonSharedConstants.h @@ -2,6 +2,7 @@ struct CommonSharedConstants { static constexpr const wchar_t* MAIN_WINDOW_CLASS_NAME = L"Magpie_Main"; + static constexpr const wchar_t* TITLE_BAR_WINDOW_CLASS_NAME = L"Magpie_TitleBar"; static constexpr const wchar_t* NOTIFY_ICON_WINDOW_CLASS_NAME = L"Magpie_NotifyIcon"; static constexpr const wchar_t* HOTKEY_WINDOW_CLASS_NAME = L"Magpie_Hotkey"; diff --git a/src/Shared/Utils.cpp b/src/Shared/Utils.cpp index 57032c8a1..fa9833a31 100644 --- a/src/Shared/Utils.cpp +++ b/src/Shared/Utils.cpp @@ -2,40 +2,6 @@ #include "Utils.h" #include "Logger.h" #include "StrUtils.h" -#include - - -bool Utils::ZstdCompress(std::span src, std::vector& dest, int compressionLevel) { - dest.resize(ZSTD_compressBound(src.size())); - size_t size = ZSTD_compress(dest.data(), dest.size(), src.data(), src.size(), compressionLevel); - - if (ZSTD_isError(size)) { - Logger::Get().Error(StrUtils::Concat("压缩失败:", ZSTD_getErrorName(size))); - return false; - } - - dest.resize(size); - return true; -} - -bool Utils::ZstdDecompress(std::span src, std::vector& dest) { - auto size = ZSTD_getFrameContentSize(src.data(), src.size()); - if (size == ZSTD_CONTENTSIZE_UNKNOWN || size == ZSTD_CONTENTSIZE_ERROR) { - Logger::Get().Error("ZSTD_getFrameContentSize 失败"); - return false; - } - - dest.resize(size); - size = ZSTD_decompress(dest.data(), dest.size(), src.data(), src.size()); - if (ZSTD_isError(size)) { - Logger::Get().Error(StrUtils::Concat("解压失败:", ZSTD_getErrorName(size))); - return false; - } - - dest.resize(size); - - return true; -} //////////////////////////////////////////////////////////////////////////////////////////////////////////// // diff --git a/src/Shared/Utils.h b/src/Shared/Utils.h index 837bd617f..95d7e0d99 100644 --- a/src/Shared/Utils.h +++ b/src/Shared/Utils.h @@ -26,8 +26,5 @@ struct Utils { T _exitScope; }; - static bool ZstdCompress(std::span src, std::vector& dest, int compressionLevel); - static bool ZstdDecompress(std::span src, std::vector& dest); - static uint64_t HashData(std::span data) noexcept; }; diff --git a/src/Shared/Version.h b/src/Shared/Version.h index 481579387..06750377c 100644 --- a/src/Shared/Version.h +++ b/src/Shared/Version.h @@ -22,6 +22,6 @@ struct Version { uint32_t patch = 0; }; -constexpr inline Version MAGPIE_VERSION(0, 10, 2); -constexpr inline const char* MAGPIE_TAG = "v0.10.2"; -constexpr inline const wchar_t* MAGPIE_TAG_W = L"v0.10.2"; +constexpr inline Version MAGPIE_VERSION(0, 10, 3); +constexpr inline const char* MAGPIE_TAG = "v0.10.3"; +constexpr inline const wchar_t* MAGPIE_TAG_W = L"v0.10.3"; diff --git a/src/Solution.props b/src/Solution.props index b8fb9cdff..d9a020318 100644 --- a/src/Solution.props +++ b/src/Solution.props @@ -18,22 +18,20 @@ Use pch.h $(IntDir)pch.pch - Level4 - 4251 - true - true - stdcpp20 - stdc17 - true - - false _WINDOWS;WIN32_LEAN_AND_MEAN;WINRT_LEAN_AND_MEAN;WINRT_NO_MODULE_LOCK;NOGDICAPMASKS;NOICONS;NOATOM;NOCLIPBOARD;NODRAWTEXT;NOMEMMGR;NOMETAFILE;NOMINMAX;NOOPENFILE;NOSCROLL;NOSERVICE;NOSOUND;NOTEXTMETRIC;NOCOMM;NOKANJI;NOHELP;NOPROFILER;NODEFERWINDOWPOS;NOMCX;%(PreprocessorDefinitions) - /bigobj /utf-8 /Zc:__cplusplus /volatile:iso /fp:contract %(AdditionalOptions) + /bigobj %(AdditionalOptions) + + + + + + + true - + diff --git a/src/Updater/PackageFiles.h b/src/Updater/PackageFiles.h index 665b423a4..f8dd035c1 100644 --- a/src/Updater/PackageFiles.h +++ b/src/Updater/PackageFiles.h @@ -102,16 +102,117 @@ static constexpr const wchar_t* V0_9_101_FOLDERS[] = { L"effects" }; +static constexpr const wchar_t* V0_10_3_FILES[] = { + L"effects\\Anime4K\\Anime4K_3D_AA_Upscale_US.hlsl", + L"effects\\Anime4K\\Anime4K_3D_Upscale_US.hlsl", + L"effects\\Anime4K\\Anime4K_Denoise_Bilateral_Mean.hlsl", + L"effects\\Anime4K\\Anime4K_Denoise_Bilateral_Median.hlsl", + L"effects\\Anime4K\\Anime4K_Denoise_Bilateral_Mode.hlsl", + L"effects\\Anime4K\\Anime4K_Restore_L.hlsl", + L"effects\\Anime4K\\Anime4K_Restore_M.hlsl", + L"effects\\Anime4K\\Anime4K_Restore_Soft_L.hlsl", + L"effects\\Anime4K\\Anime4K_Restore_Soft_M.hlsl", + L"effects\\Anime4K\\Anime4K_Restore_Soft_UL.hlsl", + L"effects\\Anime4K\\Anime4K_Restore_Soft_VL.hlsl", + L"effects\\Anime4K\\Anime4K_Restore_UL.hlsl", + L"effects\\Anime4K\\Anime4K_Restore_VL.hlsl", + L"effects\\Anime4K\\Anime4K_Thin_HQ.hlsl", + L"effects\\Anime4K\\Anime4K_Upscale_Denoise_L.hlsl", + L"effects\\Anime4K\\Anime4K_Upscale_Denoise_S.hlsl", + L"effects\\Anime4K\\Anime4K_Upscale_Denoise_UL.hlsl", + L"effects\\Anime4K\\Anime4K_Upscale_Denoise_VL.hlsl", + L"effects\\Anime4K\\Anime4K_Upscale_GAN_x2_S.hlsl", + L"effects\\Anime4K\\Anime4K_Upscale_L.hlsl", + L"effects\\Anime4K\\Anime4K_Upscale_S.hlsl", + L"effects\\Anime4K\\Anime4K_Upscale_UL.hlsl", + L"effects\\Anime4K\\Anime4K_Upscale_VL.hlsl", + L"effects\\CAS\\CAS.hlsl", + L"effects\\CAS\\CAS_Scaling.hlsl", + L"effects\\CRT\\CRT_Easymode.hlsl", + L"effects\\CRT\\CRT_Geom.hlsl", + L"effects\\CRT\\CRT_Hyllian.hlsl", + L"effects\\CRT\\CRT_Lottes.hlsl", + L"effects\\CRT\\GTU_v050.hlsl", + L"effects\\FSR\\FSR_EASU.hlsl", + L"effects\\FSR\\FSR_RCAS.hlsl", + L"effects\\FSRCNNX\\FSRCNNX.hlsl", + L"effects\\FSRCNNX\\FSRCNNX_LineArt.hlsl", + L"effects\\FXAA\\FXAA.hlsli", + L"effects\\FXAA\\FXAA_High.hlsl", + L"effects\\FXAA\\FXAA_Medium.hlsl", + L"effects\\FXAA\\FXAA_Ultra.hlsl", + L"effects\\NIS\\Coef_Scale.dds", + L"effects\\NIS\\Coef_USM.dds", + L"effects\\NIS\\NIS.hlsl", + L"effects\\NIS\\NVSharpen.hlsl", + L"effects\\NNEDI3\\NNEDI3_nns16_win8x4.hlsl", + L"effects\\NNEDI3\\NNEDI3_nns64_win8x6.hlsl", + L"effects\\Pixel Art\\MMPX.hlsl", + L"effects\\Pixel Art\\Pixellate.hlsl", + L"effects\\Pixel Art\\SharpBilinear.hlsl", + L"effects\\RAVU\\RAVU_Lite_R3.hlsl", + L"effects\\RAVU\\RAVU_Lite_R3_Weights.dds", + L"effects\\RAVU\\RAVU_Zoom_R3.hlsl", + L"effects\\RAVU\\RAVU_Zoom_R3_Weights.dds", + L"effects\\Sharpen\\AdaptiveSharpen.hlsl", + L"effects\\Sharpen\\FineSharp.hlsl", + L"effects\\Sharpen\\LCAS.hlsl", + L"effects\\Sharpen\\LumaSharpen.hlsl", + L"effects\\SMAA\\AreaTex.dds", + L"effects\\SMAA\\SearchTex.dds", + L"effects\\SMAA\\SMAA.hlsli", + L"effects\\SMAA\\SMAA_High.hlsl", + L"effects\\SMAA\\SMAA_Low.hlsl", + L"effects\\SMAA\\SMAA_Medium.hlsl", + L"effects\\SMAA\\SMAA_Ultra.hlsl", + L"effects\\xBRZ\\xBRZ_2x.hlsl", + L"effects\\xBRZ\\xBRZ_3x.hlsl", + L"effects\\xBRZ\\xBRZ_4x.hlsl", + L"effects\\xBRZ\\xBRZ_5x.hlsl", + L"effects\\xBRZ\\xBRZ_6x.hlsl", + L"effects\\xBRZ\\xBRZ_Freescale.hlsl", + L"effects\\ACNet.hlsl", + L"effects\\Bicubic.hlsl", + L"effects\\Bilinear.hlsl", + L"effects\\Deband.hlsl", + L"effects\\ImageAdjustment.hlsl", + L"effects\\Jinc.hlsl", + L"effects\\Lanczos.hlsl", + L"effects\\Nearest.hlsl", + L"effects\\SSimDownscaler.hlsl", + L"Magpie.App.dll", + L"Magpie.Core.dll", + L"Microsoft.UI.Xaml.dll", + L"resources.pri" +}; + +static constexpr const wchar_t* V0_10_3_FOLDERS[] = { + L"effects\\Anime4K", + L"effects\\CAS", + L"effects\\CRT", + L"effects\\FSR", + L"effects\\FSRCNNX", + L"effects\\FXAA", + L"effects\\NIS", + L"effects\\NNEDI3", + L"effects\\Pixel Art", + L"effects\\RAVU", + L"effects\\Sharpen", + L"effects\\SMAA", + L"effects\\xBRZ", + L"effects" +}; + struct PackageFiles { const std::span files; const std::span folders; - static std::optional Get(const Version& /*version*/) { - //if (version <= Version(0, 10, 0)) { - return PackageFiles{ _ToSpan(V0_9_101_FILES), _ToSpan(V0_9_101_FOLDERS)}; - //} - - //return std::nullopt; + static std::optional Get(const Version& version) { + if (version < Version(0, 10, 3)) { + return PackageFiles{ _ToSpan(V0_9_101_FILES), _ToSpan(V0_9_101_FOLDERS)}; + } else { + return PackageFiles{ _ToSpan(V0_10_3_FILES), _ToSpan(V0_10_3_FOLDERS) }; + } } private: diff --git a/src/Updater/Updater.rc b/src/Updater/Updater.rc index 312e336f1..3c530a23d 100644 --- a/src/Updater/Updater.rc +++ b/src/Updater/Updater.rc @@ -67,8 +67,8 @@ END // VS_VERSION_INFO VERSIONINFO - FILEVERSION 0,10,2,0 - PRODUCTVERSION 0,10,2,0 + FILEVERSION 0,10,3,0 + PRODUCTVERSION 0,10,3,0 FILEFLAGSMASK 0x3fL #ifdef _DEBUG FILEFLAGS 0x1L @@ -83,12 +83,12 @@ BEGIN BEGIN BLOCK "000004b0" BEGIN - VALUE "FileVersion", "0.10.2.0" + VALUE "FileVersion", "0.10.3.0" VALUE "InternalName", "Updater.exe" VALUE "LegalCopyright", "Copyright (C) 2023 Liu Xu" VALUE "OriginalFilename", "Updater.exe" VALUE "ProductName", "Magpie" - VALUE "ProductVersion", "0.10.2.0" + VALUE "ProductVersion", "0.10.3.0" END END BLOCK "VarFileInfo" diff --git a/src/conanfile.py b/src/conanfile.py deleted file mode 100644 index 822bddab6..000000000 --- a/src/conanfile.py +++ /dev/null @@ -1,29 +0,0 @@ -from conans import ConanFile - -class Magpie(ConanFile): - settings = { - "os": "Windows", - "compiler": {"Visual Studio": {"version": ["17"]}}, - "build_type": ["Debug", "Release"], - "arch": ["x86_64", "armv8"] - } - requires = [ - "fmt/9.1.0", - "spdlog/1.11.0", - "muparser/2.3.4", - "yas/7.1.0", - "rapidjson/cci.20220822", - "zstd/1.5.2", - "imgui/1.89.2", - "parallel-hashmap/1.37", - "kuba-zip/0.2.6" - ] - generators = "visual_studio" - default_options = { - "fmt:header_only": True, - "spdlog:header_only": True, - "spdlog:no_exceptions": True - } - - def imports(self): - self.copy("imgui_impl_dx11.*", dst="../../../src/Magpie.Core", src="./res/bindings") diff --git a/tools/CJKCharacterSetForImGui/CJKCharacterSetForImGui.sln b/tools/CJKCharacterSetForImGui/CJKCharacterSetForImGui.sln new file mode 100644 index 000000000..f7f98cb83 --- /dev/null +++ b/tools/CJKCharacterSetForImGui/CJKCharacterSetForImGui.sln @@ -0,0 +1,25 @@ + +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio Version 17 +VisualStudioVersion = 17.5.33627.172 +MinimumVisualStudioVersion = 10.0.40219.1 +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "CJKCharacterSetForImGui", "CJKCharacterSetForImGui.vcxproj", "{E0E023BE-72A6-45BA-A3DE-07E4F7EB5EB7}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|x64 = Debug|x64 + Release|x64 = Release|x64 + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {E0E023BE-72A6-45BA-A3DE-07E4F7EB5EB7}.Debug|x64.ActiveCfg = Debug|x64 + {E0E023BE-72A6-45BA-A3DE-07E4F7EB5EB7}.Debug|x64.Build.0 = Debug|x64 + {E0E023BE-72A6-45BA-A3DE-07E4F7EB5EB7}.Release|x64.ActiveCfg = Release|x64 + {E0E023BE-72A6-45BA-A3DE-07E4F7EB5EB7}.Release|x64.Build.0 = Release|x64 + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection + GlobalSection(ExtensibilityGlobals) = postSolution + SolutionGuid = {428064E2-6EE8-4B12-A569-092F40898C06} + EndGlobalSection +EndGlobal diff --git a/tools/CJKCharacterSetForImGui/CJKCharacterSetForImGui.vcxproj b/tools/CJKCharacterSetForImGui/CJKCharacterSetForImGui.vcxproj new file mode 100644 index 000000000..63210ce1a --- /dev/null +++ b/tools/CJKCharacterSetForImGui/CJKCharacterSetForImGui.vcxproj @@ -0,0 +1,89 @@ + + + + + Debug + x64 + + + Release + x64 + + + + 16.0 + Win32Proj + {e0e023be-72a6-45ba-a3de-07e4f7eb5eb7} + CJKCharacterSetForImGui + 10.0 + CJKCharacterSetForImGui + + + + Application + true + v143 + Unicode + + + Application + false + v143 + true + Unicode + + + + + + + + + + + + + + + + Level3 + true + _DEBUG;_CONSOLE;%(PreprocessorDefinitions) + true + stdcpp20 + false + /utf-8 /Zc:__cplusplus /volatile:iso %(AdditionalOptions) + Fast + + + Console + true + + + + + Level3 + true + true + true + NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + true + stdcpp20 + false + /utf-8 /Zc:__cplusplus /volatile:iso %(AdditionalOptions) + Fast + + + Console + true + true + true + + + + + + + + + \ No newline at end of file diff --git a/tools/CJKCharacterSetForImGui/CJKCharacterSetForImGui.vcxproj.filters b/tools/CJKCharacterSetForImGui/CJKCharacterSetForImGui.vcxproj.filters new file mode 100644 index 000000000..65fa6d5ae --- /dev/null +++ b/tools/CJKCharacterSetForImGui/CJKCharacterSetForImGui.vcxproj.filters @@ -0,0 +1,22 @@ + + + + + {4FC737F1-C7A5-4376-A066-2A32D752A2FF} + cpp;c;cc;cxx;c++;cppm;ixx;def;odl;idl;hpj;bat;asm;asmx + + + {93995380-89BD-4b04-88EB-625FBE52EBFB} + h;hh;hpp;hxx;h++;hm;inl;inc;ipp;xsd + + + {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} + rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms + + + + + 源文件 + + + \ No newline at end of file diff --git a/tools/CJKCharacterSetForImGui/README.md b/tools/CJKCharacterSetForImGui/README.md new file mode 100644 index 000000000..6feab5d85 --- /dev/null +++ b/tools/CJKCharacterSetForImGui/README.md @@ -0,0 +1,3 @@ +# CJKCharacterSetForImGui + +用于导出供 ImGui 使用的字符表,格式为 https://github.com/ocornut/imgui/blob/bcfc1ad8f63997751a7269788511157ed872da2c/imgui_draw.cpp#L2883 。 diff --git a/tools/CJKCharacterSetForImGui/input.txt b/tools/CJKCharacterSetForImGui/input.txt new file mode 100644 index 000000000..1673886c8 --- /dev/null +++ b/tools/CJKCharacterSetForImGui/input.txt @@ -0,0 +1,2 @@ +一乙二十丁厂七卜八人入儿匕几九刁了刀力乃又三干于亏工土士才下寸大丈与万上小口山巾千乞川亿个夕久么勺凡丸及广亡门丫义之尸己已巳弓子卫也女刃飞习叉马乡丰王开井天夫元无云专丐扎艺木五支厅不犬太区历歹友尤匹车巨牙屯戈比互切瓦止少曰日中贝冈内水见午牛手气毛壬升夭长仁什片仆化仇币仍仅斤爪反介父从仑今凶分乏公仓月氏勿欠风丹匀乌勾凤六文亢方火为斗忆计订户认冗讥心尺引丑巴孔队办以允予邓劝双书幻玉刊未末示击打巧正扑卉扒功扔去甘世艾古节本术可丙左厉石右布夯戊龙平灭轧东卡北占凸卢业旧帅归旦目且叶甲申叮电号田由只叭史央兄叽叼叫叩叨另叹冉皿凹囚四生矢失乍禾丘付仗代仙们仪白仔他斥瓜乎丛令用甩印尔乐句匆册卯犯外处冬鸟务包饥主市立冯玄闪兰半汁汇头汉宁穴它讨写让礼训议必讯记永司尼民弗弘出辽奶奴召加皮边孕发圣对台矛纠母幼丝邦式迂刑戎动扛寺吉扣考托老巩圾执扩扫地场扬耳芋共芒亚芝朽朴机权过臣吏再协西压厌戌在百有存而页匠夸夺灰达列死成夹夷轨邪尧划迈毕至此贞师尘尖劣光当早吁吐吓虫曲团吕同吊吃因吸吗吆屿屹岁帆回岂则刚网肉年朱先丢廷舌竹迁乔迄伟传乒乓休伍伏优臼伐延仲件任伤价伦份华仰仿伙伪自伊血向似后行舟全会杀合兆企众爷伞创肌肋朵杂危旬旨旭负匈名各多争色壮冲妆冰庄庆亦刘齐交衣次产决亥充妄闭问闯羊并关米灯州汗污江汛池汝汤忙兴宇守宅字安讲讳军讶许讹论讼农讽设访诀寻那迅尽导异弛孙阵阳收阶阴防奸如妇妃好她妈戏羽观欢买红驮纤驯约级纪驰纫巡寿弄麦玖玛形进戒吞远违韧运扶抚坛技坏抠扰扼拒找批址扯走抄贡汞坝攻赤折抓扳抡扮抢孝坎均抑抛投坟坑抗坊抖护壳志块扭声把报拟却抒劫芙芜苇芽花芹芥芬苍芳严芦芯劳克芭苏杆杠杜材村杖杏杉巫极李杨求甫匣更束吾豆两酉丽医辰励否还尬歼来连轩步卤坚肖旱盯呈时吴助县里呆吱吠呕园旷围呀吨足邮男困吵串员呐听吟吩呛吻吹呜吭吧邑吼囤别吮岖岗帐财针钉牡告我乱利秃秀私每兵估体何佐佑但伸佃作伯伶佣低你住位伴身皂伺佛囱近彻役返余希坐谷妥含邻岔肝肛肚肘肠龟甸免狂犹狈角删条彤卵灸岛刨迎饭饮系言冻状亩况床库庇疗吝应这冷庐序辛弃冶忘闰闲间闷判兑灶灿灼弟汪沐沛汰沥沙汽沃沦汹泛沧没沟沪沈沉沁怀忧忱快完宋宏牢究穷灾良证启评补初社祀识诈诉罕诊词译君灵即层屁尿尾迟局改张忌际陆阿陈阻附坠妓妙妖姊妨妒努忍劲矣鸡纬驱纯纱纲纳驳纵纷纸纹纺驴纽奉玩环武青责现玫表规抹卦坷坯拓拢拔坪拣坦担坤押抽拐拖者拍顶拆拎拥抵拘势抱拄垃拉拦幸拌拧拂拙招坡披拨择抬拇拗其取茉苦昔苛若茂苹苗英苟苑苞范直茁茄茎苔茅枉林枝杯枢柜枚析板松枪枫构杭杰述枕丧或画卧事刺枣雨卖郁矾矿码厕奈奔奇奋态欧殴垄妻轰顷转斩轮软到非叔歧肯齿些卓虎虏肾贤尚旺具味果昆国哎咕昌呵畅明易咙昂迪典固忠呻咒咋咐呼鸣咏呢咄咖岸岩帖罗帜帕岭凯败账贩贬购贮图钓制知迭氛垂牧物乖刮秆和季委秉佳侍岳供使例侠侥版侄侦侣侧凭侨佩货侈依卑的迫质欣征往爬彼径所舍金刹命肴斧爸采觅受乳贪念贫忿肤肺肢肿胀朋股肮肪肥服胁周昏鱼兔狐忽狗狞备饰饱饲变京享庞店夜庙府底疟疙疚剂卒郊庚废净盲放刻育氓闸闹郑券卷单炬炒炊炕炎炉沫浅法泄沽河沾泪沮油泊沿泡注泣泞泻泌泳泥沸沼波泼泽治怔怯怖性怕怜怪怡学宝宗定宠宜审宙官空帘宛实试郎诗肩房诚衬衫视祈话诞诡询该详建肃录隶帚屉居届刷屈弧弥弦承孟陋陌孤陕降函限妹姑姐姓妮始姆迢驾叁参艰线练组绅细驶织驹终驻绊驼绍绎经贯契贰奏春帮玷珍玲珊玻毒型拭挂封持拷拱项垮挎城挟挠政赴赵挡拽哉挺括垢拴拾挑垛指垫挣挤拼挖按挥挪拯某甚荆茸革茬荐巷带草茧茵茶荒茫荡荣荤荧故胡荫荔南药标栈柑枯柄栋相查柏栅柳柱柿栏柠树勃要柬咸威歪研砖厘厚砌砂泵砚砍面耐耍牵鸥残殃轴轻鸦皆韭背战点虐临览竖省削尝昧盹是盼眨哇哄哑显冒映星昨咧昭畏趴胃贵界虹虾蚁思蚂虽品咽骂勋哗咱响哈哆咬咳咪哪哟炭峡罚贱贴贻骨幽钙钝钞钟钢钠钥钦钧钩钮卸缸拜看矩毡氢怎牲选适秒香种秋科重复竿段便俩贷顺修俏保促俄俐侮俭俗俘信皇泉鬼侵禹侯追俊盾待徊衍律很须叙剑逃食盆胚胧胆胜胞胖脉胎勉狭狮独狰狡狱狠贸怨急饵饶蚀饺饼峦弯将奖哀亭亮度迹庭疮疯疫疤咨姿亲音帝施闺闻闽阀阁差养美姜叛送类迷籽娄前首逆兹总炼炸烁炮炫烂剃洼洁洪洒柒浇浊洞测洗活派洽染洛浏济洋洲浑浓津恃恒恢恍恬恤恰恼恨举觉宣宦室宫宪突穿窃客诫冠诬语扁袄祖神祝祠误诱诲说诵垦退既屋昼屏屎费陡逊眉孩陨除险院娃姥姨姻娇姚娜怒架贺盈勇怠癸蚤柔垒绑绒结绕骄绘给绚骆络绝绞骇统耕耘耗耙艳泰秦珠班素匿蚕顽盏匪捞栽捕埂捂振载赶起盐捎捍捏埋捉捆捐损袁捌都哲逝捡挫换挽挚热恐捣壶捅埃挨耻耿耽聂恭莽莱莲莫莉荷获晋恶莹莺真框梆桂桔栖档桐株桥桦栓桃格桩校核样根索哥速逗栗贾酌配翅辱唇夏砸砰砾础破原套逐烈殊殉顾轿较顿毙致柴桌虑监紧党逞晒眠晓哮唠鸭晃哺晌剔晕蚌畔蚣蚊蚪蚓哨哩圃哭哦恩鸯唤唁哼唧啊唉唆罢峭峨峰圆峻贼贿赂赃钱钳钻钾铁铃铅缺氧氨特牺造乘敌秤租积秧秩称秘透笔笑笋债借值倚俺倾倒倘俱倡候赁俯倍倦健臭射躬息倔徒徐殷舰舱般航途拿耸爹舀爱豺豹颁颂翁胰脆脂胸胳脏脐胶脑脓逛狸狼卿逢鸵留鸳皱饿馁凌凄恋桨浆衰衷高郭席准座症病疾斋疹疼疲脊效离紊唐瓷资凉站剖竞部旁旅畜阅羞羔瓶拳粉料益兼烤烘烦烧烛烟烙递涛浙涝浦酒涉消涡浩海涂浴浮涣涤流润涧涕浪浸涨烫涩涌悖悟悄悍悔悯悦害宽家宵宴宾窍窄容宰案请朗诸诺读扇诽袜袖袍被祥课冥谁调冤谅谆谈谊剥恳展剧屑弱陵祟陶陷陪娱娟恕娥娘通能难预桑绢绣验继骏球琐理琉琅捧堵措描域捺掩捷排焉掉捶赦堆推埠掀授捻教掏掐掠掂培接掷控探据掘掺职基聆勘聊娶著菱勒黄菲萌萝菌萎菜萄菊菩萍菠萤营乾萧萨菇械彬梦婪梗梧梢梅检梳梯桶梭救曹副票酝酗厢戚硅硕奢盔爽聋袭盛匾雪辅辆颅虚彪雀堂常眶匙晨睁眯眼悬野啪啦曼晦晚啄啡距趾啃跃略蚯蛀蛇唬累鄂唱患啰唾唯啤啥啸崖崎崭逻崔帷崩崇崛婴圈铐铛铝铜铭铲银矫甜秸梨犁秽移笨笼笛笙符第敏做袋悠偿偶偎偷您售停偏躯兜假衅徘徙得衔盘舶船舵斜盒鸽敛悉欲彩领脚脖脯豚脸脱象够逸猜猪猎猫凰猖猛祭馅馆凑减毫烹庶麻庵痊痒痕廊康庸鹿盗章竟商族旋望率阎阐着羚盖眷粘粗粒断剪兽焊焕清添鸿淋涯淹渠渐淑淌混淮淆渊淫渔淘淳液淤淡淀深涮涵婆梁渗情惜惭悼惧惕惟惊惦悴惋惨惯寇寅寄寂宿窒窑密谋谍谎谐袱祷祸谓谚谜逮敢尉屠弹隋堕随蛋隅隆隐婚婶婉颇颈绩绪续骑绰绳维绵绷绸综绽绿缀巢琴琳琢琼斑替揍款堪塔搭堰揩越趁趋超揽堤提博揭喜彭揣插揪搜煮援搀裁搁搓搂搅壹握搔揉斯期欺联葫散惹葬募葛董葡敬葱蒋蒂落韩朝辜葵棒棱棋椰植森焚椅椒棵棍椎棉棚棕棺榔椭惠惑逼粟棘酣酥厨厦硬硝确硫雁殖裂雄颊雳暂雅翘辈悲紫凿辉敞棠赏掌晴睐暑最晰量鼎喷喳晶喇遇喊遏晾景畴践跋跌跑跛遗蛙蛛蜓蜒蛤喝鹃喂喘喉喻啼喧嵌幅帽赋赌赎赐赔黑铸铺链销锁锄锅锈锋锌锐甥掰短智氮毯氯鹅剩稍程稀税筐等筑策筛筒筏答筋筝傲傅牌堡集焦傍储皓皖粤奥街惩御循艇舒逾番释禽腊脾腋腔腕鲁猩猬猾猴惫然馈馋装蛮就敦斌痘痢痪痛童竣阔善翔羡普粪尊奠道遂曾焰港滞湖湘渣渤渺湿温渴溃溅滑湃渝湾渡游滋渲溉愤慌惰愕愣惶愧愉慨割寒富寓窜窝窖窗窘遍雇裕裤裙禅禄谢谣谤谦犀属屡强粥疏隔隙隘媒絮嫂媚婿登缅缆缉缎缓缔缕骗编骚缘瑟鹉瑞瑰瑙魂肆摄摸填搏塌鼓摆携搬摇搞塘摊聘斟蒜勤靴靶鹊蓝墓幕蓬蓄蒲蓉蒙蒸献椿禁楚楷榄想槐榆楼概赖酪酬感碍碘碑碎碰碗碌尴雷零雾雹辐辑输督频龄鉴睛睹睦瞄睫睡睬嗜鄙嗦愚暖盟歇暗暇照畸跨跷跳跺跪路跤跟遣蜈蜗蛾蜂蜕嗅嗡嗓署置罪罩蜀幌错锚锡锣锤锥锦键锯锰矮辞稚稠颓愁筹签简筷毁舅鼠催傻像躲魁衙微愈遥腻腰腥腮腹腺鹏腾腿鲍猿颖触解煞雏馍馏酱禀痹廓痴痰廉靖新韵意誊粮数煎塑慈煤煌满漠滇源滤滥滔溪溜漓滚溢溯滨溶溺粱滩慎誉塞寞窥窟寝谨褂裸福谬群殿辟障媳嫉嫌嫁叠缚缝缠缤剿静碧璃赘熬墙墟嘉摧赫截誓境摘摔撇聚慕暮摹蔓蔑蔡蔗蔽蔼熙蔚兢模槛榴榜榨榕歌遭酵酷酿酸碟碱碳磁愿需辖辗雌裳颗瞅墅嗽踊蜻蜡蝇蜘蝉嘛嘀赚锹锻镀舞舔稳熏箕算箩管箫舆僚僧鼻魄魅貌膜膊膀鲜疑孵馒裹敲豪膏遮腐瘩瘟瘦辣彰竭端旗精粹歉弊熄熔煽潇漆漱漂漫滴漾演漏慢慷寨赛寡察蜜寥谭肇褐褪谱隧嫩翠熊凳骡缩慧撵撕撒撩趣趟撑撮撬播擒墩撞撤增撰聪鞋鞍蕉蕊蔬蕴横槽樱橡樟橄敷豌飘醋醇醉磕磊磅碾震霄霉瞒题暴瞎嘻嘶嘲嘹影踢踏踩踪蝶蝴蝠蝎蝌蝗蝙嘿嘱幢墨镇镐镑靠稽稻黎稿稼箱篓箭篇僵躺僻德艘膝膛鲤鲫熟摩褒瘪瘤瘫凛颜毅糊遵憋潜澎潮潭鲨澳潘澈澜澄懂憔懊憎额翩褥谴鹤憨慰劈履豫缭撼擂操擅燕蕾薯薛薇擎薪薄颠翰噩橱橙橘整融瓢醒霍霎辙冀餐嘴踱蹄蹂蟆螃器噪鹦赠默黔镜赞穆篮篡篷篱儒邀衡膨雕鲸磨瘾瘸凝辨辩糙糖糕燃濒澡激懒憾懈窿壁避缰缴戴擦藉鞠藏藐檬檐檀礁磷霜霞瞭瞧瞬瞳瞩瞪曙蹋蹈螺蟋蟀嚎赡穗魏簧簇繁徽爵朦臊鳄癌辫赢糟糠燥懦豁臀臂翼骤藕鞭藤覆瞻蹦嚣镰翻鳍鹰瀑襟璧戳孽警蘑藻攀曝蹲蹭蹬巅簸簿蟹颤靡癣瓣羹鳖爆疆鬓壤馨耀躁蠕嚼嚷巍籍鳞魔糯灌譬蠢霸露霹躏黯髓赣囊镶瓤罐矗 +帧 \ No newline at end of file diff --git a/tools/CJKCharacterSetForImGui/main.cpp b/tools/CJKCharacterSetForImGui/main.cpp new file mode 100644 index 000000000..f80b30879 --- /dev/null +++ b/tools/CJKCharacterSetForImGui/main.cpp @@ -0,0 +1,106 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +struct HandleCloser { void operator()(HANDLE h) noexcept { assert(h != INVALID_HANDLE_VALUE); if (h) CloseHandle(h); } }; +using ScopedHandle = std::unique_ptr::type, HandleCloser>; + +static HANDLE SafeHandle(HANDLE h) noexcept { + return (h == INVALID_HANDLE_VALUE) ? nullptr : h; +} + +static std::vector ReadFile(const wchar_t* fileName) noexcept { + CREATEFILE2_EXTENDED_PARAMETERS extendedParams = {}; + extendedParams.dwSize = sizeof(CREATEFILE2_EXTENDED_PARAMETERS); + extendedParams.dwFileAttributes = FILE_ATTRIBUTE_NORMAL; + extendedParams.dwFileFlags = FILE_FLAG_SEQUENTIAL_SCAN; + extendedParams.dwSecurityQosFlags = SECURITY_ANONYMOUS; + extendedParams.lpSecurityAttributes = nullptr; + extendedParams.hTemplateFile = nullptr; + + ScopedHandle hFile(SafeHandle(CreateFile2(fileName, GENERIC_READ, FILE_SHARE_READ, OPEN_EXISTING, &extendedParams))); + + if (!hFile) { + return {}; + } + + DWORD size = GetFileSize(hFile.get(), nullptr); + std::vector result(size, 0); + + DWORD readed; + if (!::ReadFile(hFile.get(), result.data(), size, &readed, nullptr)) { + return {}; + } + + return result; +} + +static std::wstring UTF8ToUTF16(std::string_view str) noexcept { + if (str.empty()) { + return {}; + } + + int convertResult = MultiByteToWideChar(CP_UTF8, 0, + str.data(), (int)str.size(), nullptr, 0); + if (convertResult <= 0) { + assert(false); + return {}; + } + + std::wstring result(convertResult + 10, L'\0'); + convertResult = MultiByteToWideChar(CP_UTF8, 0, str.data(), (int)str.size(), + result.data(), (int)result.size()); + if (convertResult <= 0) { + assert(false); + return {}; + } + + result.resize(convertResult); + return result; +} + +// 输入: input.txt +// 将输出输入文件中所有汉字组成的字符表 +int main() { + std::vector input = ReadFile(L"input.txt"); + input.push_back(0); + + std::wstring utf16 = UTF8ToUTF16(std::string_view((const char*)input.data(), input.size() - 1)); + + static constexpr std::pair CJK_RANGE{ 0x4E00, 0x9FAF }; + + std::vector bitSet(CJK_RANGE.second - CJK_RANGE.first + 1); + for (wchar_t character : utf16) { + if (character < CJK_RANGE.first || character > CJK_RANGE.second) { + continue; + } + + bitSet[character - CJK_RANGE.first] = true; + } + + std::vector index; + int prevIdx = 0; + for (int i = 0; i < CJK_RANGE.second - CJK_RANGE.first + 1; ++i) { + if (bitSet[i]) { + index.push_back(uint16_t(i - prevIdx)); + prevIdx = i; + } + } + + if (index.empty()) { + return 0; + } + + std::string out = std::to_string(index[0]); + for (int i = 1; i < index.size(); ++i) { + out += ','; + out += std::to_string(index[i]); + } + + std::cout << out; +} diff --git a/version.json b/version.json index 9795db37b..6ebd57924 100644 --- a/version.json +++ b/version.json @@ -1,10 +1,10 @@ { - "version": "0.10.2", - "tag": "v0.10.2", + "version": "0.10.3", + "tag": "v0.10.3", "binary": { "x64": { - "url": "https://github.com/Blinue/Magpie/releases/download/v0.10.2/Magpie_v0.10.2.zip", - "hash": "006ba306c9e31a6c382351694e9527d2" + "url": "https://github.com/Blinue/Magpie/releases/download/v0.10.3/Magpie_v0.10.3.zip", + "hash": "1768d18e72c45a2165d47b4a43a64f8a" } } }