WebKit Bugzilla
Attachment 357696 Details for
Bug 192864
: Refresh libyuv to M72
Home
|
New
|
Browse
|
Search
|
[?]
|
Reports
|
Requests
|
Help
|
New Account
|
Log In
Remember
[x]
|
Forgot Password
Login:
[x]
[patch]
Patch
bug-192864-20181219110903.patch (text/plain), 891.42 KB, created by
youenn fablet
on 2018-12-19 11:09:06 PST
(
hide
)
Description:
Patch
Filename:
MIME Type:
Creator:
youenn fablet
Created:
2018-12-19 11:09:06 PST
Size:
891.42 KB
patch
obsolete
>Subversion Revision: 239282 >diff --git a/Source/ThirdParty/libwebrtc/ChangeLog b/Source/ThirdParty/libwebrtc/ChangeLog >index cc9deee9c607b733af24f0872d058bac84420cd0..308e447e9f82b26114ba8b2c488a9e39f6245011 100644 >--- a/Source/ThirdParty/libwebrtc/ChangeLog >+++ b/Source/ThirdParty/libwebrtc/ChangeLog >@@ -1,3 +1,12 @@ >+2018-12-19 Youenn Fablet <youenn@apple.com> >+ >+ Refresh libyuv to M72 >+ https://bugs.webkit.org/show_bug.cgi?id=192864 >+ >+ Reviewed by NOBODY (OOPS!). >+ >+ * Source/third_party/libyuv: Resynced. >+ > 2018-12-19 Youenn Fablet <youenn@apple.com> > > Refresh usrsctplib to M72 >diff --git a/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/.gn b/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/.gn >index 26a4a62d1994e5abd6f534a9b6b20b71bacb07e9..fc66637f362a70b5c2321c86888fa824fd5bac0c 100644 >--- a/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/.gn >+++ b/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/.gn >@@ -29,7 +29,7 @@ exec_script_whitelist = build_dotfile_settings.exec_script_whitelist + > [ "//build_overrides/build.gni" ] > > default_args = { >- mac_sdk_min = "10.11" >+ mac_sdk_min = "10.12" > > # LibYUV does not want to switch to C++14 yet. > use_cxx11 = true >diff --git a/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/.vpython b/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/.vpython >index cf74427542e4b6a1dfb2243d4ad58b175acf9964..defaf5df722ace398fe39febdf7086f5f3ad79cd 100644 >--- a/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/.vpython >+++ b/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/.vpython >@@ -30,3 +30,16 @@ wheel: < > name: "infra/python/wheels/psutil/${platform}_${py_python}_${py_abi}" > version: "version:5.2.2" > > >+ >+# Used by: >+# third_party/catapult >+wheel: < >+ name: "infra/python/wheels/pypiwin32/${vpython_platform}" >+ version: "version:219" >+ match_tag: < >+ platform: "win32" >+ > >+ match_tag: < >+ platform: "win_amd64" >+ > >+> >diff --git a/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/Android.bp b/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/Android.bp >index 7d95a7865abe182356dcb849efaa52c019988f6f..691686aad330fad7acd62f83a19f1ee7f2f363b0 100644 >--- a/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/Android.bp >+++ b/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/Android.bp >@@ -11,6 +11,7 @@ cc_library { > "source/compare_gcc.cc", > "source/compare_neon.cc", > "source/compare_neon64.cc", >+ "source/compare_mmi.cc", > "source/compare_msa.cc", > "source/convert.cc", > "source/convert_argb.cc", >@@ -25,12 +26,14 @@ cc_library { > "source/rotate_argb.cc", > "source/rotate_common.cc", > "source/rotate_gcc.cc", >+ "source/rotate_mmi.cc", > "source/rotate_msa.cc", > "source/rotate_neon.cc", > "source/rotate_neon64.cc", > "source/row_any.cc", > "source/row_common.cc", > "source/row_gcc.cc", >+ "source/row_mmi.cc", > "source/row_msa.cc", > "source/row_neon.cc", > "source/row_neon64.cc", >@@ -39,11 +42,11 @@ cc_library { > "source/scale_argb.cc", > "source/scale_common.cc", > "source/scale_gcc.cc", >+ "source/scale_mmi.cc", > "source/scale_msa.cc", > "source/scale_neon.cc", > "source/scale_neon64.cc", > "source/video_common.cc", >- > "source/convert_jpeg.cc", > "source/mjpeg_decoder.cc", > "source/mjpeg_validate.cc", >diff --git a/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/Android.mk b/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/Android.mk >index dbc6cad37ab3ca5b09ea342639935ef48f1dae50..0975d64a3860695947680e5e63db45efe6dbc0c0 100644 >--- a/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/Android.mk >+++ b/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/Android.mk >@@ -9,6 +9,7 @@ LOCAL_SRC_FILES := \ > source/compare.cc \ > source/compare_common.cc \ > source/compare_gcc.cc \ >+ source/compare_mmi.cc \ > source/compare_msa.cc \ > source/compare_neon.cc \ > source/compare_neon64.cc \ >@@ -25,12 +26,14 @@ LOCAL_SRC_FILES := \ > source/rotate_argb.cc \ > source/rotate_common.cc \ > source/rotate_gcc.cc \ >+ source/rotate_mmi.cc \ > source/rotate_msa.cc \ > source/rotate_neon.cc \ > source/rotate_neon64.cc \ > source/row_any.cc \ > source/row_common.cc \ > source/row_gcc.cc \ >+ source/row_mmi.cc \ > source/row_msa.cc \ > source/row_neon.cc \ > source/row_neon64.cc \ >@@ -39,6 +42,7 @@ LOCAL_SRC_FILES := \ > source/scale_argb.cc \ > source/scale_common.cc \ > source/scale_gcc.cc \ >+ source/scale_mmi.cc \ > source/scale_msa.cc \ > source/scale_neon.cc \ > source/scale_neon64.cc \ >diff --git a/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/BUILD.gn b/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/BUILD.gn >index 03ce499e6d9db8d2decdbadd94f877b403a24d6d..8904fd6c69913235520ae97fac7cd10d9217dc07 100644 >--- a/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/BUILD.gn >+++ b/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/BUILD.gn >@@ -69,6 +69,10 @@ group("libyuv") { > deps += [ ":libyuv_msa" ] > } > >+ if (libyuv_use_mmi) { >+ deps += [ ":libyuv_mmi" ] >+ } >+ > if (!is_ios) { > # Make sure that clients of libyuv link with libjpeg. This can't go in > # libyuv_internal because in Windows x64 builds that will generate a clang >@@ -229,6 +233,24 @@ if (libyuv_use_msa) { > } > } > >+if (libyuv_use_mmi) { >+ static_library("libyuv_mmi") { >+ sources = [ >+ # MMI Source Files >+ "source/compare_mmi.cc", >+ "source/rotate_mmi.cc", >+ "source/row_mmi.cc", >+ "source/scale_mmi.cc", >+ ] >+ >+ deps = [ >+ ":libyuv_internal", >+ ] >+ >+ public_configs = [ ":libyuv_config" ] >+ } >+} >+ > if (libyuv_include_tests) { > config("libyuv_unittest_warnings_config") { > if (!is_win) { >@@ -329,7 +351,6 @@ if (libyuv_include_tests) { > ] > deps = [ > ":libyuv", >- "//build/config:exe_and_shlib_deps", # for asan on llvm libc++ > ] > if (is_linux) { > cflags = [ "-fexceptions" ] >@@ -343,7 +364,6 @@ if (libyuv_include_tests) { > ] > deps = [ > ":libyuv", >- "//build/config:exe_and_shlib_deps", # for new[] on llvm libc++ > ] > if (is_linux) { > cflags = [ "-fexceptions" ] >@@ -359,7 +379,6 @@ if (libyuv_include_tests) { > ] > deps = [ > ":libyuv", >- "//build/config:exe_and_shlib_deps", # for new[] on llvm libc++ > ] > > if (!is_ios && !libyuv_disable_jpeg) { >@@ -374,7 +393,6 @@ if (libyuv_include_tests) { > ] > deps = [ > ":libyuv", >- "//build/config:exe_and_shlib_deps", # for asan on llvm libc++ > ] > } > } >diff --git a/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/CMakeLists.txt b/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/CMakeLists.txt >index 6420371de98314b82ec39f541c37c8a82abcec48..ed4948f066e23c927cf02a87c63c8b1b99a0931c 100644 >--- a/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/CMakeLists.txt >+++ b/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/CMakeLists.txt >@@ -59,7 +59,11 @@ if(TEST) > endif() > > add_executable(libyuv_unittest ${ly_unittest_sources}) >- target_link_libraries(libyuv_unittest ${ly_lib_name} ${GTEST_LIBRARY} pthread) >+ target_link_libraries(libyuv_unittest ${ly_lib_name} ${GTEST_LIBRARY}) >+ find_library(PTHREAD_LIBRARY pthread) >+ if(NOT PTHREAD_LIBRARY STREQUAL "PTHREAD_LIBRARY-NOTFOUND") >+ target_link_libraries(libyuv_unittest pthread) >+ endif() > if (JPEG_FOUND) > target_link_libraries(libyuv_unittest ${JPEG_LIBRARY}) > endif() >@@ -68,14 +72,18 @@ if(TEST) > target_link_libraries(libyuv_unittest glibc-compat) > endif() > >- target_link_libraries(libyuv_unittest gflags) >+ find_library(GFLAGS_LIBRARY gflags) >+ if(NOT GFLAGS_LIBRARY STREQUAL "GFLAGS_LIBRARY-NOTFOUND") >+ target_link_libraries(libyuv_unittest gflags) >+ add_definitions(-DLIBYUV_USE_GFLAGS) >+ endif() > endif() > > > # install the conversion tool, .so, .a, and all the header files > INSTALL ( PROGRAMS ${CMAKE_BINARY_DIR}/yuvconvert DESTINATION bin ) > INSTALL ( TARGETS ${ly_lib_static} DESTINATION lib ) >-INSTALL ( TARGETS ${ly_lib_shared} LIBRARY DESTINATION lib ) >+INSTALL ( TARGETS ${ly_lib_shared} LIBRARY DESTINATION lib RUNTIME DESTINATION bin ) > INSTALL ( DIRECTORY ${PROJECT_SOURCE_DIR}/include/ DESTINATION include ) > > # create the .deb and .rpm packages using cpack >diff --git a/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/DEPS b/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/DEPS >index 266327470b7c0fa2c3631a3d0b28439e18c7d253..ac20e06276c867c6003962cf68c544bb04f16112 100644 >--- a/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/DEPS >+++ b/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/DEPS >@@ -1,7 +1,7 @@ > vars = { > 'chromium_git': 'https://chromium.googlesource.com', >- 'chromium_revision': '80f0c5570649c35a869429b2ab8c381a0a7246cb', >- 'swarming_revision': '88229872dd17e71658fe96763feaa77915d8cbd6', >+ 'chromium_revision': '35b72bf255d6519506b7e732f9c74205d2ab452d', >+ 'swarming_revision': '486c9b53c4d54dd4b95bb6ce0e31160e600dfc11', > # Three lines of non-changing comments so that > # the commit queue can handle CLs rolling lss > # and whatever else without interference from each other. >@@ -9,34 +9,36 @@ vars = { > # Three lines of non-changing comments so that > # the commit queue can handle CLs rolling catapult > # and whatever else without interference from each other. >- 'catapult_revision': 'e7298f36f7912f2caa122086cfbe71734d04b73f', >+ 'catapult_revision': '0d25dda9b148bcd2dad9e1080b1dc57eaf9d2c2a', > } > > deps = { > 'src/build': >- Var('chromium_git') + '/chromium/src/build' + '@' + '39738e75b27f39d4c0030a0b11d5d2ddd34715f7', >+ Var('chromium_git') + '/chromium/src/build' + '@' + 'f79db013c75bff172913707cd762eba847838fea', > 'src/buildtools': >- Var('chromium_git') + '/chromium/buildtools.git' + '@' + 'a09e064635a49f08e585e3b173d5fbc3dd3f485e', >+ Var('chromium_git') + '/chromium/buildtools.git' + '@' + '9a90d9aaadeb5e04327ed05775f45132e4b3523f', > 'src/testing': >- Var('chromium_git') + '/chromium/src/testing' + '@' + '5f7e36cad6434fd3d65674af96653a23ecc9f694', >+ Var('chromium_git') + '/chromium/src/testing' + '@' + 'd2fde4ae5b8d0a5021e6f79d2f4a62e83ba348bc', > 'src/third_party': >- Var('chromium_git') + '/chromium/src/third_party' + '@' + '2c0ced3ddbf84ce1c0759b277d9538da42f23650', >+ Var('chromium_git') + '/chromium/src/third_party' + '@' + 'f931bb4f2bdcb327d066052df1914cab4bd68c50', > 'src/third_party/catapult': > Var('chromium_git') + '/catapult.git' + '@' + Var('catapult_revision'), > 'src/third_party/colorama/src': > Var('chromium_git') + '/external/colorama.git' + '@' + '799604a1041e9b3bc5d2789ecbd7e8db2e18e6b8', >+ 'src/third_party/freetype/src': >+ Var('chromium_git') + '/chromium/src/third_party/freetype2.git' + '@' + '578bcf103a12fb742cdb314565819011d1ac12a7', > 'src/third_party/googletest/src': >- Var('chromium_git') + '/external/github.com/google/googletest.git' + '@' + '0062e4869f07a3ef235703ddf63af604b712446c', >+ Var('chromium_git') + '/external/github.com/google/googletest.git' + '@' + 'd5266326752f0a1dadbd310932d8f4fd8c3c5e7d', >+ 'src/third_party/harfbuzz-ng/src': >+ Var('chromium_git') + '/external/github.com/harfbuzz/harfbuzz.git' + '@' + '2b76767bf572364d3d647cdd139f2044a7ad06b2', > 'src/third_party/libjpeg_turbo': > Var('chromium_git') + '/chromium/deps/libjpeg_turbo.git' + '@' + 'a1750dbc79a8792dde3d3f7d7d8ac28ba01ac9dd', > 'src/third_party/yasm/source/patched-yasm': >- Var('chromium_git') + '/chromium/deps/yasm/patched-yasm.git' + '@' + 'b98114e18d8b9b84586b10d24353ab8616d4c5fc', >+ Var('chromium_git') + '/chromium/deps/yasm/patched-yasm.git' + '@' + '720b70524a4424b15fc57e82263568c8ba0496ad', > 'src/tools': >- Var('chromium_git') + '/chromium/src/tools' + '@' + '6202b67fc46a9984097caf237e12e3b8f7a9f7da', >- 'src/tools/gyp': >- Var('chromium_git') + '/external/gyp.git' + '@' + 'd61a9397e668fa9843c4aa7da9e79460fe590bfb', >- 'src/tools/swarming_client': >- Var('chromium_git') + '/infra/luci/client-py.git' + '@' + Var('swarming_revision'), >+ Var('chromium_git') + '/chromium/src/tools' + '@' + 'f2c6ed916b94176158763400de308c2afd56b259', >+ 'src/tools/swarming_client': >+ Var('chromium_git') + '/infra/luci/client-py.git' + '@' + Var('swarming_revision'), > > # libyuv-only dependencies (not present in Chromium). > 'src/third_party/gflags': >@@ -52,40 +54,136 @@ deps = { > }, > > # Android deps: >- 'src/third_party/auto/src': { >- 'url': Var('chromium_git') + '/external/github.com/google/auto.git' + '@' + '8a81a858ae7b78a1aef71ac3905fade0bbd64e82', >- 'condition': 'checkout_android', >+ 'src/third_party/accessibility_test_framework': { >+ 'packages': [ >+ { >+ 'package': 'chromium/third_party/accessibility-test-framework', >+ 'version': 'version:2.1-cr0', >+ }, >+ ], >+ 'condition': 'checkout_android', >+ 'dep_type': 'cipd', > }, > 'src/third_party/auto/src': { > 'url': Var('chromium_git') + '/external/github.com/google/auto.git' + '@' + '8a81a858ae7b78a1aef71ac3905fade0bbd64e82', > 'condition': 'checkout_android', > }, > 'src/base': { >- 'url': Var('chromium_git') + '/chromium/src/base' + '@' + 'ac3d2b81181b085a9952cb83dba748420eefe691', >+ 'url': Var('chromium_git') + '/chromium/src/base' + '@' + '6c0497f398c5f6e6af0c66fbf4d77e875eb3f2b1', > 'condition': 'checkout_android', > }, >+ 'src/third_party/bazel': { >+ 'packages': [ >+ { >+ 'package': 'chromium/third_party/bazel', >+ 'version': 'version:0.10.0', >+ }, >+ ], >+ 'condition': 'checkout_android', >+ 'dep_type': 'cipd', >+ }, >+ 'src/third_party/bouncycastle': { >+ 'packages': [ >+ { >+ 'package': 'chromium/third_party/bouncycastle', >+ 'version': 'version:1.46-cr0', >+ }, >+ ], >+ 'condition': 'checkout_android', >+ 'dep_type': 'cipd', >+ }, > 'src/third_party/android_ndk': { >- 'url': Var('chromium_git') + '/android_ndk.git' + '@' + 'e951c37287c7d8cd915bf8d4149fd4a06d808b55', >+ 'url': Var('chromium_git') + '/android_ndk.git' + '@' + '5cd86312e794bdf542a3685c6f10cbb96072990b', > 'condition': 'checkout_android', > }, >+ 'src/third_party/android_support_test_runner': { >+ 'packages': [ >+ { >+ 'package': 'chromium/third_party/android_support_test_runner', >+ 'version': 'version:0.5-cr0', >+ }, >+ ], >+ 'condition': 'checkout_android', >+ 'dep_type': 'cipd', >+ }, > 'src/third_party/android_tools': { >- 'url': Var('chromium_git') + '/android_tools.git' + '@' + '9a70d48fcdd68cd0e7e968f342bd767ee6323bd1', >+ 'url': Var('chromium_git') + '/android_tools.git' + '@' + '130499e25286f4d56acafa252fee09f3cc595c49', > 'condition': 'checkout_android', > }, >+ 'src/third_party/byte_buddy': { >+ 'packages': [ >+ { >+ 'package': 'chromium/third_party/byte_buddy', >+ 'version': 'version:1.4.17-cr0', >+ }, >+ ], >+ 'condition': 'checkout_android', >+ 'dep_type': 'cipd', >+ }, > 'src/third_party/ced/src': { > 'url': Var('chromium_git') + '/external/github.com/google/compact_enc_det.git' + '@' + '94c367a1fe3a13207f4b22604fcfd1d9f9ddf6d9', > 'condition': 'checkout_android', > }, > 'src/third_party/errorprone/lib': { >- 'url': Var('chromium_git') + '/chromium/third_party/errorprone.git' + '@' + 'ecc57c2b00627667874744b9ad8efe10734d97a8', >+ 'url': Var('chromium_git') + '/chromium/third_party/errorprone.git' + '@' + '980d49e839aa4984015efed34b0134d4b2c9b6d7', > 'condition': 'checkout_android', > }, > 'src/third_party/findbugs': { > 'url': Var('chromium_git') + '/chromium/deps/findbugs.git' + '@' + '4275d9ac8610db6b1bc9a5e887f97e41b33fac67', > 'condition': 'checkout_android', > }, >+ 'src/third_party/gson': { >+ 'packages': [ >+ { >+ 'package': 'chromium/third_party/gson', >+ 'version': 'version:2.8.0-cr0', >+ }, >+ ], >+ 'condition': 'checkout_android', >+ 'dep_type': 'cipd', >+ }, >+ 'src/third_party/guava': { >+ 'packages': [ >+ { >+ 'package': 'chromium/third_party/guava', >+ 'version': 'version:23.0-cr0', >+ }, >+ ], >+ 'condition': 'checkout_android', >+ 'dep_type': 'cipd', >+ }, >+ 'src/third_party/hamcrest': { >+ 'packages': [ >+ { >+ 'package': 'chromium/third_party/hamcrest', >+ 'version': 'version:1.3-cr0', >+ }, >+ ], >+ 'condition': 'checkout_android', >+ 'dep_type': 'cipd', >+ }, > 'src/third_party/icu': { >- 'url': Var('chromium_git') + '/chromium/deps/icu.git' + '@' + 'd888fd2a1be890f4d35e43f68d6d79f42519a357', >+ 'url': Var('chromium_git') + '/chromium/deps/icu.git' + '@' + '297a4dd02b9d36c92ab9b4f121e433c9c3bc14f8', >+ }, >+ 'src/third_party/icu4j': { >+ 'packages': [ >+ { >+ 'package': 'chromium/third_party/icu4j', >+ 'version': 'version:53.1-cr0', >+ }, >+ ], >+ 'condition': 'checkout_android', >+ 'dep_type': 'cipd', >+ }, >+ 'src/third_party/intellij': { >+ 'packages': [ >+ { >+ 'package': 'chromium/third_party/intellij', >+ 'version': 'version:12.0-cr0', >+ }, >+ ], >+ 'condition': 'checkout_android', >+ 'dep_type': 'cipd', > }, > 'src/third_party/jsr-305/src': { > 'url': Var('chromium_git') + '/external/jsr-305.git' + '@' + '642c508235471f7220af6d5df2d3210e3bfc0919', >@@ -96,25 +194,85 @@ deps = { > 'condition': 'checkout_android', > }, > 'src/third_party/mockito/src': { >- 'url': Var('chromium_git') + '/external/mockito/mockito.git' + '@' + 'de83ad4598ad4cf5ea53c69a8a8053780b04b850', >+ 'url': Var('chromium_git') + '/external/mockito/mockito.git' + '@' + '04a2a289a4222f80ad20717c25144981210d2eac', > 'condition': 'checkout_android', > }, >+ 'src/third_party/objenesis': { >+ 'packages': [ >+ { >+ 'package': 'chromium/third_party/objenesis', >+ 'version': 'version:2.4-cr0', >+ }, >+ ], >+ 'condition': 'checkout_android', >+ 'dep_type': 'cipd', >+ }, >+ 'src/third_party/ow2_asm': { >+ 'packages': [ >+ { >+ 'package': 'chromium/third_party/ow2_asm', >+ 'version': 'version:5.0.1-cr0', >+ }, >+ ], >+ 'condition': 'checkout_android', >+ 'dep_type': 'cipd', >+ }, >+ 'src/third_party/r8': { >+ 'packages': [ >+ { >+ 'package': 'chromium/third_party/r8', >+ 'version': 'version:1.0.30', >+ }, >+ ], >+ 'condition': 'checkout_android', >+ 'dep_type': 'cipd', >+ }, > 'src/third_party/requests/src': { > 'url': Var('chromium_git') + '/external/github.com/kennethreitz/requests.git' + '@' + 'f172b30356d821d180fa4ecfa3e71c7274a32de4', > 'condition': 'checkout_android', > }, >+ 'src/third_party/robolectric': { >+ 'packages': [ >+ { >+ 'package': 'chromium/third_party/robolectric', >+ 'version': 'version:3.5.1', >+ }, >+ ], >+ 'condition': 'checkout_android', >+ 'dep_type': 'cipd', >+ }, > 'src/third_party/robolectric/robolectric': { > 'url': Var('chromium_git') + '/external/robolectric.git' + '@' + '7e067f1112e1502caa742f7be72d37b5678d3403', > 'condition': 'checkout_android', > }, >+ 'src/third_party/sqlite4java': { >+ 'packages': [ >+ { >+ 'package': 'chromium/third_party/sqlite4java', >+ 'version': 'version:0.282-cr0', >+ }, >+ ], >+ 'condition': 'checkout_android', >+ 'dep_type': 'cipd', >+ }, > 'src/third_party/ub-uiautomator/lib': { > 'url': Var('chromium_git') + '/chromium/third_party/ub-uiautomator.git' + '@' + '00270549ce3161ae72ceb24712618ea28b4f9434', > 'condition': 'checkout_android', > }, >+ 'src/third_party/xstream': { >+ 'packages': [ >+ { >+ 'package': 'chromium/third_party/xstream', >+ 'version': 'version:1.4.8-cr0', >+ }, >+ ], >+ 'condition': 'checkout_android', >+ 'dep_type': 'cipd', >+ }, > > # iOS deps: > 'src/ios': { >- 'url': Var('chromium_git') + '/chromium/src/ios' + '@' + '299ef76e844a74a1f2f4ce7f06d101861fb49aba', >+ 'url': Var('chromium_git') + '/chromium/src/ios' + '@' + '8e45eb00dffadde7e0669a881991e237b0b7a8eb', > 'condition': 'checkout_ios' > }, > >@@ -124,6 +282,472 @@ deps = { > 'url': Var('chromium_git') + '/chromium/deps/yasm/binaries.git' + '@' + '52f9b3f4b0aa06da24ef8b123058bb61ee468881', > 'condition': 'checkout_win', > }, >+ >+ # === ANDROID_DEPS Generated Code Start === >+ # Generated by //tools/android/roll/android_deps/fetch_all.sh >+ 'src/third_party/android_deps/libs/android_arch_core_common': { >+ 'packages': [ >+ { >+ 'package': 'chromium/third_party/android_deps/libs/android_arch_core_common', >+ 'version': 'version:1.0.0-cr0', >+ }, >+ ], >+ 'condition': 'checkout_android', >+ 'dep_type': 'cipd', >+ }, >+ >+ 'src/third_party/android_deps/libs/android_arch_lifecycle_common': { >+ 'packages': [ >+ { >+ 'package': 'chromium/third_party/android_deps/libs/android_arch_lifecycle_common', >+ 'version': 'version:1.0.0-cr0', >+ }, >+ ], >+ 'condition': 'checkout_android', >+ 'dep_type': 'cipd', >+ }, >+ >+ 'src/third_party/android_deps/libs/android_arch_lifecycle_runtime': { >+ 'packages': [ >+ { >+ 'package': 'chromium/third_party/android_deps/libs/android_arch_lifecycle_runtime', >+ 'version': 'version:1.0.0-cr0', >+ }, >+ ], >+ 'condition': 'checkout_android', >+ 'dep_type': 'cipd', >+ }, >+ >+ 'src/third_party/android_deps/libs/com_android_support_animated_vector_drawable': { >+ 'packages': [ >+ { >+ 'package': 'chromium/third_party/android_deps/libs/com_android_support_animated_vector_drawable', >+ 'version': 'version:27.0.0-cr0', >+ }, >+ ], >+ 'condition': 'checkout_android', >+ 'dep_type': 'cipd', >+ }, >+ >+ 'src/third_party/android_deps/libs/com_android_support_appcompat_v7': { >+ 'packages': [ >+ { >+ 'package': 'chromium/third_party/android_deps/libs/com_android_support_appcompat_v7', >+ 'version': 'version:27.0.0-cr0', >+ }, >+ ], >+ 'condition': 'checkout_android', >+ 'dep_type': 'cipd', >+ }, >+ >+ 'src/third_party/android_deps/libs/com_android_support_cardview_v7': { >+ 'packages': [ >+ { >+ 'package': 'chromium/third_party/android_deps/libs/com_android_support_cardview_v7', >+ 'version': 'version:27.0.0-cr0', >+ }, >+ ], >+ 'condition': 'checkout_android', >+ 'dep_type': 'cipd', >+ }, >+ >+ 'src/third_party/android_deps/libs/com_android_support_design': { >+ 'packages': [ >+ { >+ 'package': 'chromium/third_party/android_deps/libs/com_android_support_design', >+ 'version': 'version:27.0.0-cr0', >+ }, >+ ], >+ 'condition': 'checkout_android', >+ 'dep_type': 'cipd', >+ }, >+ >+ 'src/third_party/android_deps/libs/com_android_support_gridlayout_v7': { >+ 'packages': [ >+ { >+ 'package': 'chromium/third_party/android_deps/libs/com_android_support_gridlayout_v7', >+ 'version': 'version:27.0.0-cr0', >+ }, >+ ], >+ 'condition': 'checkout_android', >+ 'dep_type': 'cipd', >+ }, >+ >+ 'src/third_party/android_deps/libs/com_android_support_leanback_v17': { >+ 'packages': [ >+ { >+ 'package': 'chromium/third_party/android_deps/libs/com_android_support_leanback_v17', >+ 'version': 'version:27.0.0-cr0', >+ }, >+ ], >+ 'condition': 'checkout_android', >+ 'dep_type': 'cipd', >+ }, >+ >+ 'src/third_party/android_deps/libs/com_android_support_mediarouter_v7': { >+ 'packages': [ >+ { >+ 'package': 'chromium/third_party/android_deps/libs/com_android_support_mediarouter_v7', >+ 'version': 'version:27.0.0-cr0', >+ }, >+ ], >+ 'condition': 'checkout_android', >+ 'dep_type': 'cipd', >+ }, >+ >+ 'src/third_party/android_deps/libs/com_android_support_multidex': { >+ 'packages': [ >+ { >+ 'package': 'chromium/third_party/android_deps/libs/com_android_support_multidex', >+ 'version': 'version:1.0.0-cr0', >+ }, >+ ], >+ 'condition': 'checkout_android', >+ 'dep_type': 'cipd', >+ }, >+ >+ 'src/third_party/android_deps/libs/com_android_support_palette_v7': { >+ 'packages': [ >+ { >+ 'package': 'chromium/third_party/android_deps/libs/com_android_support_palette_v7', >+ 'version': 'version:27.0.0-cr0', >+ }, >+ ], >+ 'condition': 'checkout_android', >+ 'dep_type': 'cipd', >+ }, >+ >+ 'src/third_party/android_deps/libs/com_android_support_preference_leanback_v17': { >+ 'packages': [ >+ { >+ 'package': 'chromium/third_party/android_deps/libs/com_android_support_preference_leanback_v17', >+ 'version': 'version:27.0.0-cr0', >+ }, >+ ], >+ 'condition': 'checkout_android', >+ 'dep_type': 'cipd', >+ }, >+ >+ 'src/third_party/android_deps/libs/com_android_support_preference_v14': { >+ 'packages': [ >+ { >+ 'package': 'chromium/third_party/android_deps/libs/com_android_support_preference_v14', >+ 'version': 'version:27.0.0-cr0', >+ }, >+ ], >+ 'condition': 'checkout_android', >+ 'dep_type': 'cipd', >+ }, >+ >+ 'src/third_party/android_deps/libs/com_android_support_preference_v7': { >+ 'packages': [ >+ { >+ 'package': 'chromium/third_party/android_deps/libs/com_android_support_preference_v7', >+ 'version': 'version:27.0.0-cr0', >+ }, >+ ], >+ 'condition': 'checkout_android', >+ 'dep_type': 'cipd', >+ }, >+ >+ 'src/third_party/android_deps/libs/com_android_support_recyclerview_v7': { >+ 'packages': [ >+ { >+ 'package': 'chromium/third_party/android_deps/libs/com_android_support_recyclerview_v7', >+ 'version': 'version:27.0.0-cr0', >+ }, >+ ], >+ 'condition': 'checkout_android', >+ 'dep_type': 'cipd', >+ }, >+ >+ 'src/third_party/android_deps/libs/com_android_support_support_annotations': { >+ 'packages': [ >+ { >+ 'package': 'chromium/third_party/android_deps/libs/com_android_support_support_annotations', >+ 'version': 'version:27.0.0-cr0', >+ }, >+ ], >+ 'condition': 'checkout_android', >+ 'dep_type': 'cipd', >+ }, >+ >+ 'src/third_party/android_deps/libs/com_android_support_support_compat': { >+ 'packages': [ >+ { >+ 'package': 'chromium/third_party/android_deps/libs/com_android_support_support_compat', >+ 'version': 'version:27.0.0-cr0', >+ }, >+ ], >+ 'condition': 'checkout_android', >+ 'dep_type': 'cipd', >+ }, >+ >+ 'src/third_party/android_deps/libs/com_android_support_support_core_ui': { >+ 'packages': [ >+ { >+ 'package': 'chromium/third_party/android_deps/libs/com_android_support_support_core_ui', >+ 'version': 'version:27.0.0-cr0', >+ }, >+ ], >+ 'condition': 'checkout_android', >+ 'dep_type': 'cipd', >+ }, >+ >+ 'src/third_party/android_deps/libs/com_android_support_support_core_utils': { >+ 'packages': [ >+ { >+ 'package': 'chromium/third_party/android_deps/libs/com_android_support_support_core_utils', >+ 'version': 'version:27.0.0-cr0', >+ }, >+ ], >+ 'condition': 'checkout_android', >+ 'dep_type': 'cipd', >+ }, >+ >+ 'src/third_party/android_deps/libs/com_android_support_support_fragment': { >+ 'packages': [ >+ { >+ 'package': 'chromium/third_party/android_deps/libs/com_android_support_support_fragment', >+ 'version': 'version:27.0.0-cr0', >+ }, >+ ], >+ 'condition': 'checkout_android', >+ 'dep_type': 'cipd', >+ }, >+ >+ 'src/third_party/android_deps/libs/com_android_support_support_media_compat': { >+ 'packages': [ >+ { >+ 'package': 'chromium/third_party/android_deps/libs/com_android_support_support_media_compat', >+ 'version': 'version:27.0.0-cr0', >+ }, >+ ], >+ 'condition': 'checkout_android', >+ 'dep_type': 'cipd', >+ }, >+ >+ 'src/third_party/android_deps/libs/com_android_support_support_v13': { >+ 'packages': [ >+ { >+ 'package': 'chromium/third_party/android_deps/libs/com_android_support_support_v13', >+ 'version': 'version:27.0.0-cr0', >+ }, >+ ], >+ 'condition': 'checkout_android', >+ 'dep_type': 'cipd', >+ }, >+ >+ 'src/third_party/android_deps/libs/com_android_support_support_v4': { >+ 'packages': [ >+ { >+ 'package': 'chromium/third_party/android_deps/libs/com_android_support_support_v4', >+ 'version': 'version:27.0.0-cr0', >+ }, >+ ], >+ 'condition': 'checkout_android', >+ 'dep_type': 'cipd', >+ }, >+ >+ 'src/third_party/android_deps/libs/com_android_support_support_vector_drawable': { >+ 'packages': [ >+ { >+ 'package': 'chromium/third_party/android_deps/libs/com_android_support_support_vector_drawable', >+ 'version': 'version:27.0.0-cr0', >+ }, >+ ], >+ 'condition': 'checkout_android', >+ 'dep_type': 'cipd', >+ }, >+ >+ 'src/third_party/android_deps/libs/com_android_support_transition': { >+ 'packages': [ >+ { >+ 'package': 'chromium/third_party/android_deps/libs/com_android_support_transition', >+ 'version': 'version:27.0.0-cr0', >+ }, >+ ], >+ 'condition': 'checkout_android', >+ 'dep_type': 'cipd', >+ }, >+ >+ 'src/third_party/android_deps/libs/com_google_android_gms_play_services_auth': { >+ 'packages': [ >+ { >+ 'package': 'chromium/third_party/android_deps/libs/com_google_android_gms_play_services_auth', >+ 'version': 'version:12.0.1-cr0', >+ }, >+ ], >+ 'condition': 'checkout_android', >+ 'dep_type': 'cipd', >+ }, >+ >+ 'src/third_party/android_deps/libs/com_google_android_gms_play_services_auth_api_phone': { >+ 'packages': [ >+ { >+ 'package': 'chromium/third_party/android_deps/libs/com_google_android_gms_play_services_auth_api_phone', >+ 'version': 'version:12.0.1-cr0', >+ }, >+ ], >+ 'condition': 'checkout_android', >+ 'dep_type': 'cipd', >+ }, >+ >+ 'src/third_party/android_deps/libs/com_google_android_gms_play_services_auth_base': { >+ 'packages': [ >+ { >+ 'package': 'chromium/third_party/android_deps/libs/com_google_android_gms_play_services_auth_base', >+ 'version': 'version:12.0.1-cr0', >+ }, >+ ], >+ 'condition': 'checkout_android', >+ 'dep_type': 'cipd', >+ }, >+ >+ 'src/third_party/android_deps/libs/com_google_android_gms_play_services_base': { >+ 'packages': [ >+ { >+ 'package': 'chromium/third_party/android_deps/libs/com_google_android_gms_play_services_base', >+ 'version': 'version:12.0.1-cr0', >+ }, >+ ], >+ 'condition': 'checkout_android', >+ 'dep_type': 'cipd', >+ }, >+ >+ 'src/third_party/android_deps/libs/com_google_android_gms_play_services_basement': { >+ 'packages': [ >+ { >+ 'package': 'chromium/third_party/android_deps/libs/com_google_android_gms_play_services_basement', >+ 'version': 'version:12.0.1-cr0', >+ }, >+ ], >+ 'condition': 'checkout_android', >+ 'dep_type': 'cipd', >+ }, >+ >+ 'src/third_party/android_deps/libs/com_google_android_gms_play_services_cast': { >+ 'packages': [ >+ { >+ 'package': 'chromium/third_party/android_deps/libs/com_google_android_gms_play_services_cast', >+ 'version': 'version:12.0.1-cr0', >+ }, >+ ], >+ 'condition': 'checkout_android', >+ 'dep_type': 'cipd', >+ }, >+ >+ 'src/third_party/android_deps/libs/com_google_android_gms_play_services_cast_framework': { >+ 'packages': [ >+ { >+ 'package': 'chromium/third_party/android_deps/libs/com_google_android_gms_play_services_cast_framework', >+ 'version': 'version:12.0.1-cr0', >+ }, >+ ], >+ 'condition': 'checkout_android', >+ 'dep_type': 'cipd', >+ }, >+ >+ 'src/third_party/android_deps/libs/com_google_android_gms_play_services_fido': { >+ 'packages': [ >+ { >+ 'package': 'chromium/third_party/android_deps/libs/com_google_android_gms_play_services_fido', >+ 'version': 'version:12.0.1-cr0', >+ }, >+ ], >+ 'condition': 'checkout_android', >+ 'dep_type': 'cipd', >+ }, >+ >+ 'src/third_party/android_deps/libs/com_google_android_gms_play_services_gcm': { >+ 'packages': [ >+ { >+ 'package': 'chromium/third_party/android_deps/libs/com_google_android_gms_play_services_gcm', >+ 'version': 'version:12.0.1-cr0', >+ }, >+ ], >+ 'condition': 'checkout_android', >+ 'dep_type': 'cipd', >+ }, >+ >+ 'src/third_party/android_deps/libs/com_google_android_gms_play_services_iid': { >+ 'packages': [ >+ { >+ 'package': 'chromium/third_party/android_deps/libs/com_google_android_gms_play_services_iid', >+ 'version': 'version:12.0.1-cr0', >+ }, >+ ], >+ 'condition': 'checkout_android', >+ 'dep_type': 'cipd', >+ }, >+ >+ 'src/third_party/android_deps/libs/com_google_android_gms_play_services_instantapps': { >+ 'packages': [ >+ { >+ 'package': 'chromium/third_party/android_deps/libs/com_google_android_gms_play_services_instantapps', >+ 'version': 'version:12.0.1-cr0', >+ }, >+ ], >+ 'condition': 'checkout_android', >+ 'dep_type': 'cipd', >+ }, >+ >+ 'src/third_party/android_deps/libs/com_google_android_gms_play_services_location': { >+ 'packages': [ >+ { >+ 'package': 'chromium/third_party/android_deps/libs/com_google_android_gms_play_services_location', >+ 'version': 'version:12.0.1-cr0', >+ }, >+ ], >+ 'condition': 'checkout_android', >+ 'dep_type': 'cipd', >+ }, >+ >+ 'src/third_party/android_deps/libs/com_google_android_gms_play_services_tasks': { >+ 'packages': [ >+ { >+ 'package': 'chromium/third_party/android_deps/libs/com_google_android_gms_play_services_tasks', >+ 'version': 'version:12.0.1-cr0', >+ }, >+ ], >+ 'condition': 'checkout_android', >+ 'dep_type': 'cipd', >+ }, >+ >+ 'src/third_party/android_deps/libs/com_google_android_gms_play_services_vision': { >+ 'packages': [ >+ { >+ 'package': 'chromium/third_party/android_deps/libs/com_google_android_gms_play_services_vision', >+ 'version': 'version:12.0.1-cr0', >+ }, >+ ], >+ 'condition': 'checkout_android', >+ 'dep_type': 'cipd', >+ }, >+ >+ 'src/third_party/android_deps/libs/com_google_android_gms_play_services_vision_common': { >+ 'packages': [ >+ { >+ 'package': 'chromium/third_party/android_deps/libs/com_google_android_gms_play_services_vision_common', >+ 'version': 'version:12.0.1-cr0', >+ }, >+ ], >+ 'condition': 'checkout_android', >+ 'dep_type': 'cipd', >+ }, >+ >+ 'src/third_party/android_deps/libs/com_google_android_play_core': { >+ 'packages': [ >+ { >+ 'package': 'chromium/third_party/android_deps/libs/com_google_android_play_core', >+ 'version': 'version:1.3.0-cr0', >+ }, >+ ], >+ 'condition': 'checkout_android', >+ 'dep_type': 'cipd', >+ }, >+ >+ # === ANDROID_DEPS Generated Code End === > } > > # Define rules for which include paths are allowed in our source. >@@ -197,6 +821,12 @@ hooks = [ > 'pattern': '.', > 'action': ['python', 'src/build/vs_toolchain.py', 'update'], > }, >+ { >+ # Update the Mac toolchain if necessary. >+ 'name': 'mac_toolchain', >+ 'pattern': '.', >+ 'action': ['python', 'src/build/mac_toolchain.py'], >+ }, > # Pull binutils for linux, enabled debug fission for faster linking / > # debugging when used with clang on Ubuntu Precise. > # https://code.google.com/p/chromium/issues/detail?id=352046 >@@ -213,7 +843,7 @@ hooks = [ > # Note: On Win, this should run after win_toolchain, as it may use it. > 'name': 'clang', > 'pattern': '.', >- 'action': ['python', 'src/tools/clang/scripts/update.py', '--if-needed'], >+ 'action': ['python', 'src/tools/clang/scripts/update.py'], > }, > { > # Update LASTCHANGE. >@@ -325,12 +955,15 @@ hooks = [ > ], > }, > { >- 'name': 'Android CIPD Ensure', >+ # We used to use src as a CIPD root. We moved it to a different directory >+ # in crrev.com/c/930178 but left the clobber here to ensure that that CL >+ # could be reverted safely. This can be safely removed once crbug.com/794764 >+ # is resolved. >+ 'name': 'Android Clobber Deprecated CIPD Root', > 'pattern': '.', > 'condition': 'checkout_android', >- 'action': ['src/build/cipd/cipd_wrapper.py', >- '--chromium-root', 'src', >- '--ensure-file', 'src/build/cipd/android/android.ensure', >+ 'action': ['src/build/cipd/clobber_cipd_root.py', >+ '--root', 'src', > ], > }, > # Android dependencies. Many are downloaded using Google Storage these days. >@@ -340,10 +973,11 @@ hooks = [ > # This downloads SDK extras and puts them in the > # third_party/android_tools/sdk/extras directory. > 'name': 'sdkextras', >+ 'condition': 'checkout_android', > 'pattern': '.', > # When adding a new sdk extras package to download, add the package > # directory and zip file to .gitignore in third_party/android_tools. >- 'action': ['python', >+ 'action': ['vpython', > 'src/build/android/play_services/update.py', > 'download' > ], >diff --git a/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/LICENSE_THIRD_PARTY b/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/LICENSE_THIRD_PARTY >deleted file mode 100644 >index a71591e77104194f464733625331235ed838958e..0000000000000000000000000000000000000000 >--- a/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/LICENSE_THIRD_PARTY >+++ /dev/null >@@ -1,8 +0,0 @@ >-This source tree contains third party source code which is governed by third >-party licenses. This file contains references to files which are under other >-licenses than the one provided in the LICENSE file in the root of the source >-tree. >- >-Files governed by third party licenses: >-source/x86inc.asm >- >diff --git a/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/README.chromium b/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/README.chromium >index c25373e23ce4214f22882c6d9904bea39681820c..4a239b3cbf0eb68479b746d14c495f857690387f 100644 >--- a/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/README.chromium >+++ b/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/README.chromium >@@ -1,6 +1,6 @@ > Name: libyuv > URL: http://code.google.com/p/libyuv/ >-Version: 1703 >+Version: 1722 > License: BSD > License File: LICENSE > >diff --git a/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/README.md b/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/README.md >index 7b6619220b8dcd183948083221de49df6fb4b2a7..db70b7f08d3ead3bfcdaa206faf392e5f56d1418 100644 >--- a/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/README.md >+++ b/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/README.md >@@ -10,9 +10,9 @@ > > ### Development > >-See [Getting started] [1] for instructions on how to get started developing. >+See [Getting started][1] for instructions on how to get started developing. > >-You can also browse the [docs directory] [2] for more documentation. >+You can also browse the [docs directory][2] for more documentation. > >-[1]: https://chromium.googlesource.com/libyuv/libyuv/+/master/docs/getting_started.md >-[2]: https://chromium.googlesource.com/libyuv/libyuv/+/master/docs/ >+[1]: ./docs/getting_started.md >+[2]: ./docs/ >diff --git a/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/all.gyp b/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/all.gyp >deleted file mode 100644 >index 88a74842716a81cdfea319b034b825ac73847604..0000000000000000000000000000000000000000 >--- a/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/all.gyp >+++ /dev/null >@@ -1,21 +0,0 @@ >-# Copyright 2013 The LibYuv Project Authors. All rights reserved. >-# >-# Use of this source code is governed by a BSD-style license >-# that can be found in the LICENSE file in the root of the source >-# tree. An additional intellectual property rights grant can be found >-# in the file PATENTS. All contributing project authors may >-# be found in the AUTHORS file in the root of the source tree. >- >-# all.gyp and All target are for benefit of android gyp build. >-{ >- 'targets': [ >- { >- 'target_name': 'All', >- 'type': 'none', >- 'dependencies': [ >- 'libyuv.gyp:*', >- 'libyuv_test.gyp:*', >- ], >- }, >- ], >-} >diff --git a/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/build_overrides/build.gni b/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/build_overrides/build.gni >index 9bb73ab023acf7d062388e1e17d3771137329895..6d8319b965e318b563d1e5ca3c95f4f62a630d29 100644 >--- a/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/build_overrides/build.gni >+++ b/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/build_overrides/build.gni >@@ -33,6 +33,14 @@ ubsan_vptr_blacklist_path = > # so we just ignore that assert. See https://crbug.com/648948 for more info. > ignore_elf32_limitations = true > >-# Use system Xcode installation instead of the Chromium bundled Mac toolchain, >-# since it contains only SDK 10.11, not 10.12 which WebRTC needs. >-use_system_xcode = true >+# Use bundled hermetic Xcode installation maintained by Chromium, >+# except for local iOS builds where it is unsupported. >+if (host_os == "mac") { >+ _result = exec_script("//build/mac/should_use_hermetic_xcode.py", >+ [ target_os ], >+ "value") >+ assert(_result != 2, >+ "Do not allow building targets with the default" + >+ "hermetic toolchain if the minimum OS version is not met.") >+ use_system_xcode = _result == 0 >+} >diff --git a/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/docs/environment_variables.md b/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/docs/environment_variables.md >index c28d83e7dc14905e9506e4f7eee8ac35957ffb16..cd8159ad5a87776cf9d5eebf45be6ec1ad4e638d 100644 >--- a/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/docs/environment_variables.md >+++ b/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/docs/environment_variables.md >@@ -6,7 +6,10 @@ For test purposes, environment variables can be set to control libyuv behavior. > > By default the cpu is detected and the most advanced form of SIMD is used. But you can disable instruction sets selectively, or completely, falling back on C code. Set the variable to 1 to disable the specified instruction set. > >+## All CPUs > LIBYUV_DISABLE_ASM >+ >+## Intel CPUs > LIBYUV_DISABLE_X86 > LIBYUV_DISABLE_SSE2 > LIBYUV_DISABLE_SSSE3 >@@ -14,12 +17,25 @@ By default the cpu is detected and the most advanced form of SIMD is used. But > LIBYUV_DISABLE_SSE42 > LIBYUV_DISABLE_AVX > LIBYUV_DISABLE_AVX2 >- LIBYUV_DISABLE_AVX512BW > LIBYUV_DISABLE_ERMS > LIBYUV_DISABLE_FMA3 >- LIBYUV_DISABLE_MSA >+ LIBYUV_DISABLE_F16C >+ LIBYUV_DISABLE_AVX512BW >+ LIBYUV_DISABLE_AVX512VL >+ LIBYUV_DISABLE_AVX512VBMI >+ LIBYUV_DISABLE_AVX512VBMI2 >+ LIBYUV_DISABLE_AVX512VBITALG >+ LIBYUV_DISABLE_AVX512VPOPCNTDQ >+ LIBYUV_DISABLE_GFNI >+ >+## ARM CPUs >+ > LIBYUV_DISABLE_NEON > >+## MIPS CPUs >+ LIBYUV_DISABLE_MSA >+ LIBYUV_DISABLE_MMI >+ > # Test Width/Height/Repeat > > The unittests default to a small image (128x72) to run fast. This can be set by environment variable to test a specific resolutions. >diff --git a/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/docs/formats.md b/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/docs/formats.md >index f78f57bb4c41a3e150842a0c4fb5c043e33656f0..97e8ce05f482ba97c0617fa74c7fa737d72e7878 100644 >--- a/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/docs/formats.md >+++ b/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/docs/formats.md >@@ -66,7 +66,7 @@ The following is extracted from video_common.h as a complete list of formats sup > // 1 Primary Compressed YUV format. > FOURCC_MJPG = FOURCC('M', 'J', 'P', 'G'), > >- // 7 Auxiliary YUV variations: 3 with U and V planes are swapped, 1 Alias. >+ // 8 Auxiliary YUV variations: 3 with U and V planes are swapped, 1 Alias. > FOURCC_YV12 = FOURCC('Y', 'V', '1', '2'), > FOURCC_YV16 = FOURCC('Y', 'V', '1', '6'), > FOURCC_YV24 = FOURCC('Y', 'V', '2', '4'), >@@ -74,6 +74,7 @@ The following is extracted from video_common.h as a complete list of formats sup > FOURCC_J420 = FOURCC('J', '4', '2', '0'), > FOURCC_J400 = FOURCC('J', '4', '0', '0'), // unofficial fourcc > FOURCC_H420 = FOURCC('H', '4', '2', '0'), // unofficial fourcc >+ FOURCC_H422 = FOURCC('H', '4', '2', '2'), // unofficial fourcc > > // 14 Auxiliary aliases. CanonicalFourCC() maps these to canonical fourcc. > FOURCC_IYUV = FOURCC('I', 'Y', 'U', 'V'), // Alias for I420. >diff --git a/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/docs/getting_started.md b/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/docs/getting_started.md >index 09297b66a5cc784de596690fb4bfe36ad4232b87..4426b606e07561c606c997618c125c75dc1131a4 100644 >--- a/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/docs/getting_started.md >+++ b/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/docs/getting_started.md >@@ -27,7 +27,7 @@ Then you'll get a .gclient file like: > }, > ]; > >-For iOS add `;target_os=['ios'];` to your OSX .gclient and run `GYP_DEFINES="OS=ios" gclient sync.` >+For iOS add `;target_os=['ios'];` to your OSX .gclient and run `gclient sync.` > > Browse the Git reprository: https://chromium.googlesource.com/libyuv/libyuv/+/master > >@@ -48,11 +48,8 @@ For Android add `;target_os=['android'];` to your Linux .gclient > > Then run: > >- export GYP_DEFINES="OS=android" > gclient sync > >-The sync will generate native build files for your environment using gyp (Windows: Visual Studio, OSX: XCode, Linux: make). This generation can also be forced manually: `gclient runhooks` >- > To get just the source (not buildable): > > git clone https://chromium.googlesource.com/libyuv/libyuv >@@ -98,11 +95,15 @@ arm64 > > ios simulator > >- gn gen out/Release "--args=is_debug=false target_os=\"ios\" ios_enable_code_signing=false target_cpu=\"x86\"" >- gn gen out/Debug "--args=is_debug=true target_os=\"ios\" ios_enable_code_signing=false target_cpu=\"x86\"" >+ gn gen out/Release "--args=is_debug=false target_os=\"ios\" ios_enable_code_signing=false use_xcode_clang=true target_cpu=\"x86\"" >+ gn gen out/Debug "--args=is_debug=true target_os=\"ios\" ios_enable_code_signing=false use_xcode_clang=true target_cpu=\"x86\"" > ninja -v -C out/Debug libyuv_unittest > ninja -v -C out/Release libyuv_unittest > >+ios disassembly >+ >+ otool -tV ./out/Release/obj/libyuv_neon/row_neon64.o >row_neon64.txt >+ > ### Android > https://code.google.com/p/chromium/wiki/AndroidBuildInstructions > >@@ -131,8 +132,8 @@ ia32 > > mips > >- gn gen out/Release "--args=is_debug=false target_os=\"android\" target_cpu=\"mips64el\" mips_arch_variant=\"r6\" mips_use_msa=true is_component_build=true is_clang=true" >- gn gen out/Debug "--args=is_debug=true target_os=\"android\" target_cpu=\"mips64el\" mips_arch_variant=\"r6\" mips_use_msa=true is_component_build=true is_clang=true" >+ gn gen out/Release "--args=is_debug=false target_os=\"android\" target_cpu=\"mips64el\" mips_arch_variant=\"r6\" mips_use_msa=true is_component_build=true" >+ gn gen out/Debug "--args=is_debug=true target_os=\"android\" target_cpu=\"mips64el\" mips_arch_variant=\"r6\" mips_use_msa=true is_component_build=true" > ninja -v -C out/Debug libyuv_unittest > ninja -v -C out/Release libyuv_unittest > >@@ -144,17 +145,19 @@ arm disassembly: > > third_party/android_ndk/toolchains/aarch64-linux-android-4.9/prebuilt/linux-x86_64/bin/aarch64-linux-android-objdump -d ./out/Release/obj/libyuv_neon/row_neon64.o >row_neon64.txt > >+ Caveat: Disassembly may require optimize_max be disabled in BUILD.gn >+ > Running tests: > >- build/android/test_runner.py gtest -s libyuv_unittest -t 7200 --verbose --release --gtest_filter=* >+ out/Release/bin/run_libyuv_unittest -vv --gtest_filter=* > > Running test as benchmark: > >- build/android/test_runner.py gtest -s libyuv_unittest -t 7200 --verbose --release --gtest_filter=* -a "--libyuv_width=1280 --libyuv_height=720 --libyuv_repeat=999 --libyuv_flags=-1 --libyuv_cpu_info=-1" >+ out/Release/bin/run_libyuv_unittest -vv --gtest_filter=* --libyuv_width=1280 --libyuv_height=720 --libyuv_repeat=999 --libyuv_flags=-1 --libyuv_cpu_info=-1 > > Running test with C code: > >- build/android/test_runner.py gtest -s libyuv_unittest -t 7200 --verbose --release --gtest_filter=* -a "--libyuv_width=1280 --libyuv_height=720 --libyuv_repeat=999 --libyuv_flags=1 --libyuv_cpu_info=1" >+ out/Release/bin/run_libyuv_unittest -vv --gtest_filter=* --libyuv_width=1280 --libyuv_height=720 --libyuv_repeat=999 --libyuv_flags=1 --libyuv_cpu_info=1 > > ### Build targets > >@@ -172,6 +175,15 @@ Running test with C code: > ninja -v -C out/Debug libyuv_unittest > ninja -v -C out/Release libyuv_unittest > >+### MIPS Linux >+ >+mips >+ >+ gn gen out/Release "--args=is_debug=false target_os=\"linux\" target_cpu=\"mips64el\" mips_arch_variant=\"loongson3\" mips_use_mmi=true is_component_build=false use_sysroot=false use_gold=false" >+ gn gen out/Debug "--args=is_debug=true target_os=\"linux\" target_cpu=\"mips64el\" mips_arch_variant=\"loongson3\" mips_use_mmi=true is_component_build=false use_sysroot=false use_gold=false" >+ ninja -v -C out/Debug libyuv_unittest >+ ninja -v -C out/Release libyuv_unittest >+ > ## Building the Library with make > > ### Linux >diff --git a/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/gyp_libyuv b/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/gyp_libyuv >deleted file mode 100755 >index 445b924f16b0a814f626eccebb1a2b02e04922ff..0000000000000000000000000000000000000000 >--- a/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/gyp_libyuv >+++ /dev/null >@@ -1,101 +0,0 @@ >-#!/usr/bin/env python >-# >-# Copyright 2014 The LibYuv Project Authors. All rights reserved. >-# >-# Use of this source code is governed by a BSD-style license >-# that can be found in the LICENSE file in the root of the source >-# tree. An additional intellectual property rights grant can be found >-# in the file PATENTS. All contributing project authors may >-# be found in the AUTHORS file in the root of the source tree. >- >-# This script is used to run GYP for libyuv. It contains selected parts of the >-# main function from the src/build/gyp_chromium file. >- >-import glob >-import os >-import shlex >-import sys >- >-checkout_root = os.path.dirname(os.path.realpath(__file__)) >- >-sys.path.insert(0, os.path.join(checkout_root, 'build')) >-import gyp_chromium >-import gyp_helper >-import vs_toolchain >- >-sys.path.insert(0, os.path.join(checkout_root, 'tools', 'gyp', 'pylib')) >-import gyp >- >-def GetSupplementalFiles(): >- """Returns a list of the supplemental files that are included in all GYP >- sources.""" >- # Can't use the one in gyp_chromium since the directory location of the root >- # is different. >- return glob.glob(os.path.join(checkout_root, '*', 'supplement.gypi')) >- >- >-if __name__ == '__main__': >- args = sys.argv[1:] >- >- if int(os.environ.get('GYP_CHROMIUM_NO_ACTION', 0)): >- print 'Skipping gyp_libyuv due to GYP_CHROMIUM_NO_ACTION env var.' >- sys.exit(0) >- >- # This could give false positives since it doesn't actually do real option >- # parsing. Oh well. >- gyp_file_specified = False >- for arg in args: >- if arg.endswith('.gyp'): >- gyp_file_specified = True >- break >- >- # If we didn't get a file, assume 'all.gyp' in the root of the checkout. >- if not gyp_file_specified: >- # Because of a bug in gyp, simply adding the abspath to all.gyp doesn't >- # work, but chdir'ing and adding the relative path does. Spooky :/ >- os.chdir(checkout_root) >- args.append('all.gyp') >- >- # There shouldn't be a circular dependency relationship between .gyp files, >- args.append('--no-circular-check') >- >- # Default to ninja unless GYP_GENERATORS is set. >- if not os.environ.get('GYP_GENERATORS'): >- os.environ['GYP_GENERATORS'] = 'ninja' >- >- vs2013_runtime_dll_dirs = None >- if int(os.environ.get('DEPOT_TOOLS_WIN_TOOLCHAIN', '1')): >- vs2013_runtime_dll_dirs = vs_toolchain.SetEnvironmentAndGetRuntimeDllDirs() >- >- # Enforce gyp syntax checking. This adds about 20% execution time. >- args.append('--check') >- >- supplemental_includes = gyp_chromium.GetSupplementalFiles() >- gyp_vars_dict = gyp_chromium.GetGypVars(supplemental_includes) >- >- # Automatically turn on crosscompile support for platforms that need it. >- if all(('ninja' in os.environ.get('GYP_GENERATORS', ''), >- gyp_vars_dict.get('OS') in ['android', 'ios'], >- 'GYP_CROSSCOMPILE' not in os.environ)): >- os.environ['GYP_CROSSCOMPILE'] = '1' >- >- args.extend(['-I' + i for i in >- gyp_chromium.additional_include_files(supplemental_includes, >- args)]) >- >- # Set the gyp depth variable to the root of the checkout. >- args.append('--depth=' + os.path.relpath(checkout_root)) >- >- print 'Updating projects from gyp files...' >- sys.stdout.flush() >- >- # Off we go... >- gyp_rc = gyp.main(args) >- >- if vs2013_runtime_dll_dirs: >- x64_runtime, x86_runtime = vs2013_runtime_dll_dirs >- vs_toolchain.CopyVsRuntimeDlls( >- os.path.join(checkout_root, gyp_chromium.GetOutputDirectory()), >- (x86_runtime, x64_runtime)) >- >- sys.exit(gyp_rc) >diff --git a/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/gyp_libyuv.py b/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/gyp_libyuv.py >deleted file mode 100644 >index bb32ec39df3953cb98c25f1d80b3fd4a27f6acf5..0000000000000000000000000000000000000000 >--- a/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/gyp_libyuv.py >+++ /dev/null >@@ -1,28 +0,0 @@ >-#!/usr/bin/env python >-# >-# Copyright 2014 The LibYuv Project Authors. All rights reserved. >-# >-# Use of this source code is governed by a BSD-style license >-# that can be found in the LICENSE file in the root of the source >-# tree. An additional intellectual property rights grant can be found >-# in the file PATENTS. All contributing project authors may >-# be found in the AUTHORS file in the root of the source tree. >- >- >-# This script is a modified copy of the src/build/gyp_chromium.py file. >-# It is needed for parallel processing. >- >-# This file is (possibly, depending on python version) imported by >-# gyp_libyuv when GYP_PARALLEL=1 and it creates sub-processes >-# through the multiprocessing library. >- >-# Importing in Python 2.6 (fixed in 2.7) on Windows doesn't search for >-# imports that don't end in .py (and aren't directories with an >-# __init__.py). This wrapper makes "import gyp_libyuv" work with >-# those old versions and makes it possible to execute gyp_libyuv.py >-# directly on Windows where the extension is useful. >- >-import os >- >-path = os.path.abspath(os.path.split(__file__)[0]) >-execfile(os.path.join(path, 'gyp_libyuv')) >diff --git a/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/include/libyuv/basic_types.h b/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/include/libyuv/basic_types.h >index 01d9dfc773628adf90c4c223dd0926b7ce7ff2f0..1bea67f2f2547081684704fc61a0acfdcd5e5de2 100644 >--- a/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/include/libyuv/basic_types.h >+++ b/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/include/libyuv/basic_types.h >@@ -29,6 +29,8 @@ typedef signed char int8_t; > #else > #include <stdint.h> // for uintptr_t and C99 types > #endif // defined(_MSC_VER) && (_MSC_VER < 1600) >+// Types are deprecated. Enable this macro for legacy types. >+#ifdef LIBYUV_LEGACY_TYPES > typedef uint64_t uint64; > typedef int64_t int64; > typedef uint32_t uint32; >@@ -37,6 +39,7 @@ typedef uint16_t uint16; > typedef int16_t int16; > typedef uint8_t uint8; > typedef int8_t int8; >+#endif // LIBYUV_LEGACY_TYPES > #endif // INT_TYPES_DEFINED > > #if !defined(LIBYUV_API) >diff --git a/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/include/libyuv/compare_row.h b/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/include/libyuv/compare_row.h >index 72ee740600af1c643a2bf1473f735ce829a144f3..e95b9d93eb2c64a69712abe83a32ff4f9997351c 100644 >--- a/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/include/libyuv/compare_row.h >+++ b/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/include/libyuv/compare_row.h >@@ -45,12 +45,6 @@ extern "C" { > #endif // clang >= 3.4 > #endif // __clang__ > >-// The following are available for Visual C: >-#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && \ >- (defined(VISUALC_HAS_AVX2) || defined(CLANG_HAS_AVX2)) >-#define HAS_HASHDJB2_AVX2 >-#endif >- > // The following are available for Visual C and GCC: > #if !defined(LIBYUV_DISABLE_X86) && \ > (defined(__x86_64__) || defined(__i386__) || defined(_M_IX86)) >@@ -60,7 +54,7 @@ extern "C" { > #endif > > // The following are available for Visual C and clangcl 32 bit: >-#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && \ >+#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && defined(_MSC_VER) && \ > (defined(VISUALC_HAS_AVX2) || defined(CLANG_HAS_AVX2)) > #define HAS_HASHDJB2_AVX2 > #define HAS_SUMSQUAREERROR_AVX2 >@@ -90,6 +84,11 @@ extern "C" { > #define HAS_SUMSQUAREERROR_MSA > #endif > >+#if !defined(LIBYUV_DISABLE_MMI) && defined(_MIPS_ARCH_LOONGSON3A) >+#define HAS_HAMMINGDISTANCE_MMI >+#define HAS_SUMSQUAREERROR_MMI >+#endif >+ > uint32_t HammingDistance_C(const uint8_t* src_a, > const uint8_t* src_b, > int count); >@@ -108,7 +107,9 @@ uint32_t HammingDistance_NEON(const uint8_t* src_a, > uint32_t HammingDistance_MSA(const uint8_t* src_a, > const uint8_t* src_b, > int count); >- >+uint32_t HammingDistance_MMI(const uint8_t* src_a, >+ const uint8_t* src_b, >+ int count); > uint32_t SumSquareError_C(const uint8_t* src_a, > const uint8_t* src_b, > int count); >@@ -124,6 +125,9 @@ uint32_t SumSquareError_NEON(const uint8_t* src_a, > uint32_t SumSquareError_MSA(const uint8_t* src_a, > const uint8_t* src_b, > int count); >+uint32_t SumSquareError_MMI(const uint8_t* src_a, >+ const uint8_t* src_b, >+ int count); > > uint32_t HashDjb2_C(const uint8_t* src, int count, uint32_t seed); > uint32_t HashDjb2_SSE41(const uint8_t* src, int count, uint32_t seed); >diff --git a/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/include/libyuv/convert.h b/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/include/libyuv/convert.h >index d12ef24f799792a5695bf51e67e1e972892b0125..d8b47a838487e06cb96d17e398b7379a9ec3bd7e 100644 >--- a/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/include/libyuv/convert.h >+++ b/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/include/libyuv/convert.h >@@ -42,6 +42,21 @@ int I444ToI420(const uint8_t* src_y, > int width, > int height); > >+// Convert I444 to NV21. >+LIBYUV_API >+int I444ToNV21(const uint8_t* src_y, >+ int src_stride_y, >+ const uint8_t* src_u, >+ int src_stride_u, >+ const uint8_t* src_v, >+ int src_stride_v, >+ uint8_t* dst_y, >+ int dst_stride_y, >+ uint8_t* dst_vu, >+ int dst_stride_vu, >+ int width, >+ int height); >+ > // Convert I422 to I420. > LIBYUV_API > int I422ToI420(const uint8_t* src_y, >@@ -59,6 +74,21 @@ int I422ToI420(const uint8_t* src_y, > int width, > int height); > >+// Convert I422 to NV21. >+LIBYUV_API >+int I422ToNV21(const uint8_t* src_y, >+ int src_stride_y, >+ const uint8_t* src_u, >+ int src_stride_u, >+ const uint8_t* src_v, >+ int src_stride_v, >+ uint8_t* dst_y, >+ int dst_stride_y, >+ uint8_t* dst_vu, >+ int dst_stride_vu, >+ int width, >+ int height); >+ > // Copy I420 to I420. > #define I420ToI420 I420Copy > LIBYUV_API >@@ -127,6 +157,17 @@ int I400ToI420(const uint8_t* src_y, > int width, > int height); > >+// Convert I400 (grey) to NV21. >+LIBYUV_API >+int I400ToNV21(const uint8_t* src_y, >+ int src_stride_y, >+ uint8_t* dst_y, >+ int dst_stride_y, >+ uint8_t* dst_vu, >+ int dst_stride_vu, >+ int width, >+ int height); >+ > #define J400ToJ420 I400ToI420 > > // Convert NV12 to I420. >@@ -350,6 +391,19 @@ int MJPGToI420(const uint8_t* sample, > int dst_width, > int dst_height); > >+// JPEG to NV21 >+LIBYUV_API >+int MJPGToNV21(const uint8_t* sample, >+ size_t sample_size, >+ uint8_t* dst_y, >+ int dst_stride_y, >+ uint8_t* dst_vu, >+ int dst_stride_vu, >+ int src_width, >+ int src_height, >+ int dst_width, >+ int dst_height); >+ > // Query size of MJPG in pixels. > LIBYUV_API > int MJPGSize(const uint8_t* sample, >diff --git a/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/include/libyuv/convert_argb.h b/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/include/libyuv/convert_argb.h >index ab772b6c32334c8eac8600c281c9aa3f082a75b9..891c57de6808658a4ccce2847f8be11af98b0000 100644 >--- a/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/include/libyuv/convert_argb.h >+++ b/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/include/libyuv/convert_argb.h >@@ -298,6 +298,28 @@ int NV21ToRGB24(const uint8_t* src_y, > int width, > int height); > >+// Convert NV12 to RAW. >+LIBYUV_API >+int NV12ToRAW(const uint8_t* src_y, >+ int src_stride_y, >+ const uint8_t* src_uv, >+ int src_stride_uv, >+ uint8_t* dst_raw, >+ int dst_stride_raw, >+ int width, >+ int height); >+ >+// Convert NV21 to RAW. >+LIBYUV_API >+int NV21ToRAW(const uint8_t* src_y, >+ int src_stride_y, >+ const uint8_t* src_vu, >+ int src_stride_vu, >+ uint8_t* dst_raw, >+ int dst_stride_raw, >+ int width, >+ int height); >+ > // Convert M420 to ARGB. > LIBYUV_API > int M420ToARGB(const uint8_t* src_m420, >diff --git a/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/include/libyuv/convert_from.h b/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/include/libyuv/convert_from.h >index 5cd8a4bfc04cc24603e70edf87bb5b3aa2dee556..861418d07b3cf967e5e89004353582b0f0f1a942 100644 >--- a/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/include/libyuv/convert_from.h >+++ b/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/include/libyuv/convert_from.h >@@ -239,6 +239,30 @@ int I420ToRGB565(const uint8_t* src_y, > int width, > int height); > >+LIBYUV_API >+int J420ToRGB565(const uint8_t* src_y, >+ int src_stride_y, >+ const uint8_t* src_u, >+ int src_stride_u, >+ const uint8_t* src_v, >+ int src_stride_v, >+ uint8_t* dst_rgb565, >+ int dst_stride_rgb565, >+ int width, >+ int height); >+ >+LIBYUV_API >+int H420ToRGB565(const uint8_t* src_y, >+ int src_stride_y, >+ const uint8_t* src_u, >+ int src_stride_u, >+ const uint8_t* src_v, >+ int src_stride_v, >+ uint8_t* dst_rgb565, >+ int dst_stride_rgb565, >+ int width, >+ int height); >+ > LIBYUV_API > int I422ToRGB565(const uint8_t* src_y, > int src_stride_y, >diff --git a/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/include/libyuv/cpu_id.h b/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/include/libyuv/cpu_id.h >index 91480c68b0133c90a0389b3d527566b595d386a8..b01cd25c574f1ed5646154c87148644b72c62c26 100644 >--- a/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/include/libyuv/cpu_id.h >+++ b/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/include/libyuv/cpu_id.h >@@ -48,6 +48,7 @@ static const int kCpuHasAVX512VPOPCNTDQ = 0x100000; > // These flags are only valid on MIPS processors. > static const int kCpuHasMIPS = 0x200000; > static const int kCpuHasMSA = 0x400000; >+static const int kCpuHasMMI = 0x800000; > > // Optional init function. TestCpuFlag does an auto-init. > // Returns cpu_info flags. >@@ -80,6 +81,31 @@ int ArmCpuCaps(const char* cpuinfo_name); > LIBYUV_API > int MaskCpuFlags(int enable_flags); > >+// Sets the CPU flags to |cpu_flags|, bypassing the detection code. |cpu_flags| >+// should be a valid combination of the kCpuHas constants above and include >+// kCpuInitialized. Use this method when running in a sandboxed process where >+// the detection code might fail (as it might access /proc/cpuinfo). In such >+// cases the cpu_info can be obtained from a non sandboxed process by calling >+// InitCpuFlags() and passed to the sandboxed process (via command line >+// parameters, IPC...) which can then call this method to initialize the CPU >+// flags. >+// Notes: >+// - when specifying 0 for |cpu_flags|, the auto initialization is enabled >+// again. >+// - enabling CPU features that are not supported by the CPU will result in >+// undefined behavior. >+// TODO(fbarchard): consider writing a helper function that translates from >+// other library CPU info to libyuv CPU info and add a .md doc that explains >+// CPU detection. >+static __inline void SetCpuFlags(int cpu_flags) { >+ LIBYUV_API extern int cpu_info_; >+#ifdef __ATOMIC_RELAXED >+ __atomic_store_n(&cpu_info_, cpu_flags, __ATOMIC_RELAXED); >+#else >+ cpu_info_ = cpu_flags; >+#endif >+} >+ > // Low level cpuid for X86. Returns zeros on other CPUs. > // eax is the info type that you want. > // ecx is typically the cpu number, and should normally be zero. >diff --git a/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/include/libyuv/macros_msa.h b/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/include/libyuv/macros_msa.h >index 921eb0714d66faf26cbcda7ec88762fff410d52f..29997ce11fd12ded65c8d6f8603f52ba4cd041dd 100644 >--- a/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/include/libyuv/macros_msa.h >+++ b/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/include/libyuv/macros_msa.h >@@ -16,30 +16,30 @@ > #include <stdint.h> > > #if (__mips_isa_rev >= 6) >-#define LW(psrc) \ >- ({ \ >- uint8_t* psrc_lw_m = (uint8_t*)(psrc); /* NOLINT */ \ >- uint32_t val_m; \ >- asm volatile("lw %[val_m], %[psrc_lw_m] \n" \ >- : [val_m] "=r"(val_m) \ >- : [psrc_lw_m] "m"(*psrc_lw_m)); \ >- val_m; \ >+#define LW(psrc) \ >+ ({ \ >+ const uint8_t* psrc_lw_m = (const uint8_t*)(psrc); \ >+ uint32_t val_m; \ >+ asm volatile("lw %[val_m], %[psrc_lw_m] \n" \ >+ : [val_m] "=r"(val_m) \ >+ : [psrc_lw_m] "m"(*psrc_lw_m)); \ >+ val_m; \ > }) > > #if (__mips == 64) >-#define LD(psrc) \ >- ({ \ >- uint8_t* psrc_ld_m = (uint8_t*)(psrc); /* NOLINT */ \ >- uint64_t val_m = 0; \ >- asm volatile("ld %[val_m], %[psrc_ld_m] \n" \ >- : [val_m] "=r"(val_m) \ >- : [psrc_ld_m] "m"(*psrc_ld_m)); \ >- val_m; \ >+#define LD(psrc) \ >+ ({ \ >+ const uint8_t* psrc_ld_m = (const uint8_t*)(psrc); \ >+ uint64_t val_m = 0; \ >+ asm volatile("ld %[val_m], %[psrc_ld_m] \n" \ >+ : [val_m] "=r"(val_m) \ >+ : [psrc_ld_m] "m"(*psrc_ld_m)); \ >+ val_m; \ > }) > #else // !(__mips == 64) > #define LD(psrc) \ > ({ \ >- uint8_t* psrc_ld_m = (uint8_t*)(psrc); /* NOLINT */ \ >+ const uint8_t* psrc_ld_m = (const uint8_t*)(psrc); \ > uint32_t val0_m, val1_m; \ > uint64_t val_m = 0; \ > val0_m = LW(psrc_ld_m); \ >@@ -81,30 +81,30 @@ > }) > #endif // !(__mips == 64) > #else // !(__mips_isa_rev >= 6) >-#define LW(psrc) \ >- ({ \ >- uint8_t* psrc_lw_m = (uint8_t*)(psrc); /* NOLINT */ \ >- uint32_t val_m; \ >- asm volatile("ulw %[val_m], %[psrc_lw_m] \n" \ >- : [val_m] "=r"(val_m) \ >- : [psrc_lw_m] "m"(*psrc_lw_m)); \ >- val_m; \ >+#define LW(psrc) \ >+ ({ \ >+ const uint8_t* psrc_lw_m = (const uint8_t*)(psrc); \ >+ uint32_t val_m; \ >+ asm volatile("ulw %[val_m], %[psrc_lw_m] \n" \ >+ : [val_m] "=r"(val_m) \ >+ : [psrc_lw_m] "m"(*psrc_lw_m)); \ >+ val_m; \ > }) > > #if (__mips == 64) >-#define LD(psrc) \ >- ({ \ >- uint8_t* psrc_ld_m = (uint8_t*)(psrc); /* NOLINT */ \ >- uint64_t val_m = 0; \ >- asm volatile("uld %[val_m], %[psrc_ld_m] \n" \ >- : [val_m] "=r"(val_m) \ >- : [psrc_ld_m] "m"(*psrc_ld_m)); \ >- val_m; \ >+#define LD(psrc) \ >+ ({ \ >+ const uint8_t* psrc_ld_m = (const uint8_t*)(psrc); \ >+ uint64_t val_m = 0; \ >+ asm volatile("uld %[val_m], %[psrc_ld_m] \n" \ >+ : [val_m] "=r"(val_m) \ >+ : [psrc_ld_m] "m"(*psrc_ld_m)); \ >+ val_m; \ > }) > #else // !(__mips == 64) > #define LD(psrc) \ > ({ \ >- uint8_t* psrc_ld_m = (uint8_t*)(psrc); /* NOLINT */ \ >+ const uint8_t* psrc_ld_m = (const uint8_t*)(psrc); \ > uint32_t val0_m, val1_m; \ > uint64_t val_m = 0; \ > val0_m = LW(psrc_ld_m); \ >@@ -138,7 +138,7 @@ > > // TODO(fbarchard): Consider removing __VAR_ARGS versions. > #define LD_B(RTYPE, psrc) *((RTYPE*)(psrc)) /* NOLINT */ >-#define LD_UB(...) LD_B(v16u8, __VA_ARGS__) >+#define LD_UB(...) LD_B(const v16u8, __VA_ARGS__) > > #define ST_B(RTYPE, in, pdst) *((RTYPE*)(pdst)) = (in) /* NOLINT */ > #define ST_UB(...) ST_B(v16u8, __VA_ARGS__) >@@ -158,14 +158,14 @@ > out0 = LD_B(RTYPE, (psrc)); \ > out1 = LD_B(RTYPE, (psrc) + stride); \ > } >-#define LD_UB2(...) LD_B2(v16u8, __VA_ARGS__) >+#define LD_UB2(...) LD_B2(const v16u8, __VA_ARGS__) > > #define LD_B4(RTYPE, psrc, stride, out0, out1, out2, out3) \ > { \ > LD_B2(RTYPE, (psrc), stride, out0, out1); \ > LD_B2(RTYPE, (psrc) + 2 * stride, stride, out2, out3); \ > } >-#define LD_UB4(...) LD_B4(v16u8, __VA_ARGS__) >+#define LD_UB4(...) LD_B4(const v16u8, __VA_ARGS__) > > /* Description : Store two vectors with stride each having 16 'byte' sized > elements >diff --git a/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/include/libyuv/mjpeg_decoder.h b/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/include/libyuv/mjpeg_decoder.h >index 6c12633387fad8234ddb8f0691564aaed029d7ee..275f8d4c18532003248bf4c51d0bce36b737c9a8 100644 >--- a/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/include/libyuv/mjpeg_decoder.h >+++ b/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/include/libyuv/mjpeg_decoder.h >@@ -168,8 +168,8 @@ class LIBYUV_API MJpegDecoder { > int GetComponentScanlinePadding(int component); > > // A buffer holding the input data for a frame. >- Buffer buf_{}; >- BufferVector buf_vec_{}; >+ Buffer buf_; >+ BufferVector buf_vec_; > > jpeg_decompress_struct* decompress_struct_; > jpeg_source_mgr* source_mgr_; >@@ -181,12 +181,12 @@ class LIBYUV_API MJpegDecoder { > > // Temporaries used to point to scanline outputs. > int num_outbufs_; // Outermost size of all arrays below. >- uint8_t*** scanlines_{}; >- int* scanlines_sizes_{}; >+ uint8_t*** scanlines_; >+ int* scanlines_sizes_; > // Temporary buffer used for decoding when we can't decode directly to the > // output buffers. Large enough for just one iMCU row. >- uint8_t** databuf_{}; >- int* databuf_strides_{}; >+ uint8_t** databuf_; >+ int* databuf_strides_; > }; > > } // namespace libyuv >diff --git a/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/include/libyuv/rotate_row.h b/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/include/libyuv/rotate_row.h >index 5edc0fcf13a1b632fb4a2ca127567e5de031cacb..022293eef2c9580c2ca46d94633db8e84b58f9ba 100644 >--- a/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/include/libyuv/rotate_row.h >+++ b/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/include/libyuv/rotate_row.h >@@ -60,6 +60,11 @@ extern "C" { > #define HAS_TRANSPOSEUVWX16_MSA > #endif > >+#if !defined(LIBYUV_DISABLE_MMI) && defined(_MIPS_ARCH_LOONGSON3A) >+#define HAS_TRANSPOSEWX8_MMI >+#define HAS_TRANSPOSEUVWX8_MMI >+#endif >+ > void TransposeWxH_C(const uint8_t* src, > int src_stride, > uint8_t* dst, >@@ -87,6 +92,11 @@ void TransposeWx8_SSSE3(const uint8_t* src, > uint8_t* dst, > int dst_stride, > int width); >+void TransposeWx8_MMI(const uint8_t* src, >+ int src_stride, >+ uint8_t* dst, >+ int dst_stride, >+ int width); > void TransposeWx8_Fast_SSSE3(const uint8_t* src, > int src_stride, > uint8_t* dst, >@@ -108,6 +118,11 @@ void TransposeWx8_Any_SSSE3(const uint8_t* src, > uint8_t* dst, > int dst_stride, > int width); >+void TransposeWx8_Any_MMI(const uint8_t* src, >+ int src_stride, >+ uint8_t* dst, >+ int dst_stride, >+ int width); > void TransposeWx8_Fast_Any_SSSE3(const uint8_t* src, > int src_stride, > uint8_t* dst, >@@ -156,6 +171,13 @@ void TransposeUVWx8_NEON(const uint8_t* src, > uint8_t* dst_b, > int dst_stride_b, > int width); >+void TransposeUVWx8_MMI(const uint8_t* src, >+ int src_stride, >+ uint8_t* dst_a, >+ int dst_stride_a, >+ uint8_t* dst_b, >+ int dst_stride_b, >+ int width); > void TransposeUVWx16_MSA(const uint8_t* src, > int src_stride, > uint8_t* dst_a, >@@ -178,6 +200,13 @@ void TransposeUVWx8_Any_NEON(const uint8_t* src, > uint8_t* dst_b, > int dst_stride_b, > int width); >+void TransposeUVWx8_Any_MMI(const uint8_t* src, >+ int src_stride, >+ uint8_t* dst_a, >+ int dst_stride_a, >+ uint8_t* dst_b, >+ int dst_stride_b, >+ int width); > void TransposeUVWx16_Any_MSA(const uint8_t* src, > int src_stride, > uint8_t* dst_a, >diff --git a/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/include/libyuv/row.h b/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/include/libyuv/row.h >index b5a42d0e62cfcf5c43fe753e78c92e669442cec0..cc948878f668979c9836eea680830a92bb126ee6 100644 >--- a/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/include/libyuv/row.h >+++ b/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/include/libyuv/row.h >@@ -55,6 +55,15 @@ extern "C" { > #endif // clang >= 3.4 > #endif // __clang__ > >+// clang >= 6.0.0 required for AVX512. >+#if defined(__clang__) && (defined(__x86_64__) || defined(__i386__)) >+// clang in xcode follows a different versioning scheme. >+// TODO(fbarchard): fix xcode 9 ios b/789. >+#if (__clang_major__ >= 7) && !defined(__APPLE__) >+#define CLANG_HAS_AVX512 1 >+#endif // clang >= 7 >+#endif // __clang__ >+ > // Visual C 2012 required for AVX2. > #if defined(_M_IX86) && !defined(__clang__) && defined(_MSC_VER) && \ > _MSC_VER >= 1700 >@@ -105,8 +114,10 @@ extern "C" { > #define HAS_MIRRORROW_SSSE3 > #define HAS_MIRRORUVROW_SSSE3 > #define HAS_NV12TOARGBROW_SSSE3 >+#define HAS_NV12TORGB24ROW_SSSE3 > #define HAS_NV12TORGB565ROW_SSSE3 > #define HAS_NV21TOARGBROW_SSSE3 >+#define HAS_NV21TORGB24ROW_SSSE3 > #define HAS_RAWTOARGBROW_SSSE3 > #define HAS_RAWTORGB24ROW_SSSE3 > #define HAS_RAWTOYROW_SSSE3 >@@ -200,8 +211,10 @@ extern "C" { > #define HAS_MERGEUVROW_AVX2 > #define HAS_MIRRORROW_AVX2 > #define HAS_NV12TOARGBROW_AVX2 >+#define HAS_NV12TORGB24ROW_AVX2 > #define HAS_NV12TORGB565ROW_AVX2 > #define HAS_NV21TOARGBROW_AVX2 >+#define HAS_NV21TORGB24ROW_AVX2 > #define HAS_SPLITUVROW_AVX2 > #define HAS_UYVYTOARGBROW_AVX2 > #define HAS_UYVYTOUV422ROW_AVX2 >@@ -230,7 +243,7 @@ extern "C" { > > // The following are available for AVX2 Visual C and clangcl 32 bit: > // TODO(fbarchard): Port to gcc. >-#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && \ >+#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && defined(_MSC_VER) && \ > (defined(VISUALC_HAS_AVX2) || defined(CLANG_HAS_AVX2)) > #define HAS_ARGB1555TOARGBROW_AVX2 > #define HAS_ARGB4444TOARGBROW_AVX2 >@@ -271,10 +284,13 @@ extern "C" { > (defined(CLANG_HAS_AVX2) || defined(GCC_HAS_AVX2)) > #define HAS_ABGRTOAR30ROW_AVX2 > #define HAS_ARGBTOAR30ROW_AVX2 >+// Fix AVX2 b:118386049 segfault >+//#define HAS_ARGBTORAWROW_AVX2 >+//#define HAS_ARGBTORGB24ROW_AVX2 > #define HAS_CONVERT16TO8ROW_AVX2 > #define HAS_CONVERT8TO16ROW_AVX2 >-#define HAS_I210TOARGBROW_AVX2 > #define HAS_I210TOAR30ROW_AVX2 >+#define HAS_I210TOARGBROW_AVX2 > #define HAS_I422TOAR30ROW_AVX2 > #define HAS_I422TOUYVYROW_AVX2 > #define HAS_I422TOYUY2ROW_AVX2 >@@ -282,6 +298,16 @@ extern "C" { > #define HAS_MULTIPLYROW_16_AVX2 > #endif > >+// The following are available for AVX512 clang x86 platforms: >+// TODO(fbarchard): Port to GCC and Visual C >+// TODO(fbarchard): re-enable HAS_ARGBTORGB24ROW_AVX512VBMI. Issue libyuv:789 >+#if !defined(LIBYUV_DISABLE_X86) && \ >+ (defined(__x86_64__) || (defined(__i386__) && !defined(_MSC_VER))) && \ >+ (defined(CLANG_HAS_AVX512)) >+// Fix AVX2 b:118386049 segfault >+//#define HAS_ARGBTORGB24ROW_AVX512VBMI >+#endif >+ > // The following are available on Neon platforms: > #if !defined(LIBYUV_DISABLE_NEON) && \ > (defined(__aarch64__) || defined(__ARM_NEON__) || defined(LIBYUV_NEON)) >@@ -459,6 +485,81 @@ extern "C" { > #define HAS_YUY2TOYROW_MSA > #endif > >+#if !defined(LIBYUV_DISABLE_MMI) && defined(_MIPS_ARCH_LOONGSON3A) >+#define HAS_ABGRTOUVROW_MMI >+#define HAS_ABGRTOYROW_MMI >+#define HAS_ARGB1555TOARGBROW_MMI >+#define HAS_ARGB1555TOUVROW_MMI >+#define HAS_ARGB1555TOYROW_MMI >+#define HAS_ARGB4444TOARGBROW_MMI >+#define HAS_ARGB4444TOUVROW_MMI >+#define HAS_ARGB4444TOYROW_MMI >+#define HAS_ARGBADDROW_MMI >+#define HAS_ARGBATTENUATEROW_MMI >+#define HAS_ARGBBLENDROW_MMI >+#define HAS_ARGBCOLORMATRIXROW_MMI >+#define HAS_ARGBCOPYALPHAROW_MMI >+#define HAS_ARGBCOPYYTOALPHAROW_MMI >+#define HAS_ARGBEXTRACTALPHAROW_MMI >+#define HAS_ARGBGRAYROW_MMI >+#define HAS_ARGBMIRRORROW_MMI >+#define HAS_ARGBMULTIPLYROW_MMI >+#define HAS_ARGBSEPIAROW_MMI >+#define HAS_ARGBSHADEROW_MMI >+#define HAS_ARGBSHUFFLEROW_MMI >+#define HAS_ARGBSUBTRACTROW_MMI >+#define HAS_ARGBTOARGB1555ROW_MMI >+#define HAS_ARGBTOARGB4444ROW_MMI >+#define HAS_ARGBTORAWROW_MMI >+#define HAS_ARGBTORGB24ROW_MMI >+#define HAS_ARGBTORGB565DITHERROW_MMI >+#define HAS_ARGBTORGB565ROW_MMI >+#define HAS_ARGBTOUV444ROW_MMI >+#define HAS_ARGBTOUVJROW_MMI >+#define HAS_ARGBTOUVROW_MMI >+#define HAS_ARGBTOYJROW_MMI >+#define HAS_ARGBTOYROW_MMI >+#define HAS_BGRATOUVROW_MMI >+#define HAS_BGRATOYROW_MMI >+#define HAS_BLENDPLANEROW_MMI >+#define HAS_COMPUTECUMULATIVESUMROW_MMI >+#define HAS_CUMULATIVESUMTOAVERAGEROW_MMI >+#define HAS_HALFFLOATROW_MMI >+#define HAS_I400TOARGBROW_MMI >+#define HAS_I422TOUYVYROW_MMI >+#define HAS_I422TOYUY2ROW_MMI >+#define HAS_INTERPOLATEROW_MMI >+#define HAS_J400TOARGBROW_MMI >+#define HAS_MERGERGBROW_MMI >+#define HAS_MERGEUVROW_MMI >+#define HAS_MIRRORROW_MMI >+#define HAS_MIRRORUVROW_MMI >+#define HAS_RAWTOARGBROW_MMI >+#define HAS_RAWTORGB24ROW_MMI >+#define HAS_RAWTOUVROW_MMI >+#define HAS_RAWTOYROW_MMI >+#define HAS_RGB24TOARGBROW_MMI >+#define HAS_RGB24TOUVROW_MMI >+#define HAS_RGB24TOYROW_MMI >+#define HAS_RGB565TOARGBROW_MMI >+#define HAS_RGB565TOUVROW_MMI >+#define HAS_RGB565TOYROW_MMI >+#define HAS_RGBATOUVROW_MMI >+#define HAS_RGBATOYROW_MMI >+#define HAS_SOBELROW_MMI >+#define HAS_SOBELTOPLANEROW_MMI >+#define HAS_SOBELXROW_MMI >+#define HAS_SOBELXYROW_MMI >+#define HAS_SOBELYROW_MMI >+#define HAS_SPLITRGBROW_MMI >+#define HAS_SPLITUVROW_MMI >+#define HAS_UYVYTOUVROW_MMI >+#define HAS_UYVYTOYROW_MMI >+#define HAS_YUY2TOUV422ROW_MMI >+#define HAS_YUY2TOUVROW_MMI >+#define HAS_YUY2TOYROW_MMI >+#endif >+ > #if defined(_MSC_VER) && !defined(__CLR_VER) && !defined(__clang__) > #if defined(VISUALC_HAS_AVX2) > #define SIMD_ALIGNED(var) __declspec(align(32)) var >@@ -813,6 +914,8 @@ void ARGBToYRow_NEON(const uint8_t* src_argb, uint8_t* dst_y, int width); > void ARGBToYJRow_NEON(const uint8_t* src_argb, uint8_t* dst_y, int width); > void ARGBToYRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width); > void ARGBToYJRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width); >+void ARGBToYRow_MMI(const uint8_t* src_argb0, uint8_t* dst_y, int width); >+void ARGBToYJRow_MMI(const uint8_t* src_argb0, uint8_t* dst_y, int width); > void ARGBToUV444Row_NEON(const uint8_t* src_argb, > uint8_t* dst_u, > uint8_t* dst_v, >@@ -831,6 +934,15 @@ void ARGBToUVRow_MSA(const uint8_t* src_argb0, > uint8_t* dst_u, > uint8_t* dst_v, > int width); >+void ARGBToUV444Row_MMI(const uint8_t* src_argb, >+ uint8_t* dst_u, >+ uint8_t* dst_v, >+ int width); >+void ARGBToUVRow_MMI(const uint8_t* src_argb0, >+ int src_stride_argb, >+ uint8_t* dst_u, >+ uint8_t* dst_v, >+ int width); > void ARGBToUVJRow_NEON(const uint8_t* src_argb, > int src_stride_argb, > uint8_t* dst_u, >@@ -916,6 +1028,51 @@ void ARGB1555ToUVRow_MSA(const uint8_t* src_argb1555, > uint8_t* dst_u, > uint8_t* dst_v, > int width); >+void ARGBToUVJRow_MMI(const uint8_t* src_rgb0, >+ int src_stride_rgb, >+ uint8_t* dst_u, >+ uint8_t* dst_v, >+ int width); >+void BGRAToUVRow_MMI(const uint8_t* src_rgb0, >+ int src_stride_rgb, >+ uint8_t* dst_u, >+ uint8_t* dst_v, >+ int width); >+void ABGRToUVRow_MMI(const uint8_t* src_rgb0, >+ int src_stride_rgb, >+ uint8_t* dst_u, >+ uint8_t* dst_v, >+ int width); >+void RGBAToUVRow_MMI(const uint8_t* src_rgb0, >+ int src_stride_rgb, >+ uint8_t* dst_u, >+ uint8_t* dst_v, >+ int width); >+void RGB24ToUVRow_MMI(const uint8_t* src_rgb0, >+ int src_stride_rgb, >+ uint8_t* dst_u, >+ uint8_t* dst_v, >+ int width); >+void RAWToUVRow_MMI(const uint8_t* src_rgb0, >+ int src_stride_rgb, >+ uint8_t* dst_u, >+ uint8_t* dst_v, >+ int width); >+void RGB565ToUVRow_MMI(const uint8_t* src_rgb565, >+ int src_stride_rgb565, >+ uint8_t* dst_u, >+ uint8_t* dst_v, >+ int width); >+void ARGB1555ToUVRow_MMI(const uint8_t* src_argb1555, >+ int src_stride_argb1555, >+ uint8_t* dst_u, >+ uint8_t* dst_v, >+ int width); >+void ARGB4444ToUVRow_MMI(const uint8_t* src_argb4444, >+ int src_stride_argb4444, >+ uint8_t* dst_u, >+ uint8_t* dst_v, >+ int width); > void BGRAToYRow_NEON(const uint8_t* src_bgra, uint8_t* dst_y, int width); > void ABGRToYRow_NEON(const uint8_t* src_abgr, uint8_t* dst_y, int width); > void RGBAToYRow_NEON(const uint8_t* src_rgba, uint8_t* dst_y, int width); >@@ -935,6 +1092,15 @@ void RGB24ToYRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width); > void RAWToYRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width); > void RGB565ToYRow_MSA(const uint8_t* src_rgb565, uint8_t* dst_y, int width); > void ARGB1555ToYRow_MSA(const uint8_t* src_argb1555, uint8_t* dst_y, int width); >+void BGRAToYRow_MMI(const uint8_t* src_argb0, uint8_t* dst_y, int width); >+void ABGRToYRow_MMI(const uint8_t* src_argb0, uint8_t* dst_y, int width); >+void RGBAToYRow_MMI(const uint8_t* src_argb0, uint8_t* dst_y, int width); >+void RGB24ToYRow_MMI(const uint8_t* src_argb0, uint8_t* dst_y, int width); >+void RAWToYRow_MMI(const uint8_t* src_argb0, uint8_t* dst_y, int width); >+void RGB565ToYRow_MMI(const uint8_t* src_rgb565, uint8_t* dst_y, int width); >+void ARGB1555ToYRow_MMI(const uint8_t* src_argb1555, uint8_t* dst_y, int width); >+void ARGB4444ToYRow_MMI(const uint8_t* src_argb4444, uint8_t* dst_y, int width); >+ > void ARGBToYRow_C(const uint8_t* src_argb0, uint8_t* dst_y, int width); > void ARGBToYJRow_C(const uint8_t* src_argb0, uint8_t* dst_y, int width); > void BGRAToYRow_C(const uint8_t* src_argb0, uint8_t* dst_y, int width); >@@ -977,6 +1143,20 @@ void RGB565ToYRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); > void ARGB1555ToYRow_Any_MSA(const uint8_t* src_ptr, > uint8_t* dst_ptr, > int width); >+void BGRAToYRow_Any_MMI(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); >+void ABGRToYRow_Any_MMI(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); >+void RGBAToYRow_Any_MMI(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); >+void ARGBToYJRow_Any_MMI(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); >+void ARGBToYRow_Any_MMI(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); >+void RGB24ToYRow_Any_MMI(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); >+void RAWToYRow_Any_MMI(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); >+void RGB565ToYRow_Any_MMI(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); >+void ARGB1555ToYRow_Any_MMI(const uint8_t* src_ptr, >+ uint8_t* dst_ptr, >+ int width); >+void ARGB4444ToYRow_Any_MMI(const uint8_t* src_ptr, >+ uint8_t* dst_ptr, >+ int width); > > void ARGBToUVRow_AVX2(const uint8_t* src_argb0, > int src_stride_argb, >@@ -1066,6 +1246,15 @@ void ARGBToUVRow_Any_MSA(const uint8_t* src_ptr, > uint8_t* dst_u, > uint8_t* dst_v, > int width); >+void ARGBToUV444Row_Any_MMI(const uint8_t* src_ptr, >+ uint8_t* dst_u, >+ uint8_t* dst_v, >+ int width); >+void ARGBToUVRow_Any_MMI(const uint8_t* src_ptr, >+ int src_stride_ptr, >+ uint8_t* dst_u, >+ uint8_t* dst_v, >+ int width); > void ARGBToUVJRow_Any_NEON(const uint8_t* src_ptr, > int src_stride_ptr, > uint8_t* dst_u, >@@ -1151,6 +1340,51 @@ void ARGB1555ToUVRow_Any_MSA(const uint8_t* src_ptr, > uint8_t* dst_u, > uint8_t* dst_v, > int width); >+void ARGBToUVJRow_Any_MMI(const uint8_t* src_ptr, >+ int src_stride_ptr, >+ uint8_t* dst_u, >+ uint8_t* dst_v, >+ int width); >+void BGRAToUVRow_Any_MMI(const uint8_t* src_ptr, >+ int src_stride_ptr, >+ uint8_t* dst_u, >+ uint8_t* dst_v, >+ int width); >+void ABGRToUVRow_Any_MMI(const uint8_t* src_ptr, >+ int src_stride_ptr, >+ uint8_t* dst_u, >+ uint8_t* dst_v, >+ int width); >+void RGBAToUVRow_Any_MMI(const uint8_t* src_ptr, >+ int src_stride_ptr, >+ uint8_t* dst_u, >+ uint8_t* dst_v, >+ int width); >+void RGB24ToUVRow_Any_MMI(const uint8_t* src_ptr, >+ int src_stride_ptr, >+ uint8_t* dst_u, >+ uint8_t* dst_v, >+ int width); >+void RAWToUVRow_Any_MMI(const uint8_t* src_ptr, >+ int src_stride_ptr, >+ uint8_t* dst_u, >+ uint8_t* dst_v, >+ int width); >+void RGB565ToUVRow_Any_MMI(const uint8_t* src_ptr, >+ int src_stride_ptr, >+ uint8_t* dst_u, >+ uint8_t* dst_v, >+ int width); >+void ARGB1555ToUVRow_Any_MMI(const uint8_t* src_ptr, >+ int src_stride_ptr, >+ uint8_t* dst_u, >+ uint8_t* dst_v, >+ int width); >+void ARGB4444ToUVRow_Any_MMI(const uint8_t* src_ptr, >+ int src_stride_ptr, >+ uint8_t* dst_u, >+ uint8_t* dst_v, >+ int width); > void ARGBToUVRow_C(const uint8_t* src_rgb0, > int src_stride_rgb, > uint8_t* dst_u, >@@ -1230,12 +1464,14 @@ void MirrorRow_AVX2(const uint8_t* src, uint8_t* dst, int width); > void MirrorRow_SSSE3(const uint8_t* src, uint8_t* dst, int width); > void MirrorRow_NEON(const uint8_t* src, uint8_t* dst, int width); > void MirrorRow_MSA(const uint8_t* src, uint8_t* dst, int width); >+void MirrorRow_MMI(const uint8_t* src, uint8_t* dst, int width); > void MirrorRow_C(const uint8_t* src, uint8_t* dst, int width); > void MirrorRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); > void MirrorRow_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); > void MirrorRow_Any_SSE2(const uint8_t* src, uint8_t* dst, int width); > void MirrorRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); > void MirrorRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); >+void MirrorRow_Any_MMI(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); > > void MirrorUVRow_SSSE3(const uint8_t* src, > uint8_t* dst_u, >@@ -1249,6 +1485,10 @@ void MirrorUVRow_MSA(const uint8_t* src_uv, > uint8_t* dst_u, > uint8_t* dst_v, > int width); >+void MirrorUVRow_MMI(const uint8_t* src_uv, >+ uint8_t* dst_u, >+ uint8_t* dst_v, >+ int width); > void MirrorUVRow_C(const uint8_t* src_uv, > uint8_t* dst_u, > uint8_t* dst_v, >@@ -1258,6 +1498,7 @@ void ARGBMirrorRow_AVX2(const uint8_t* src, uint8_t* dst, int width); > void ARGBMirrorRow_SSE2(const uint8_t* src, uint8_t* dst, int width); > void ARGBMirrorRow_NEON(const uint8_t* src, uint8_t* dst, int width); > void ARGBMirrorRow_MSA(const uint8_t* src, uint8_t* dst, int width); >+void ARGBMirrorRow_MMI(const uint8_t* src, uint8_t* dst, int width); > void ARGBMirrorRow_C(const uint8_t* src, uint8_t* dst, int width); > void ARGBMirrorRow_Any_AVX2(const uint8_t* src_ptr, > uint8_t* dst_ptr, >@@ -1269,6 +1510,7 @@ void ARGBMirrorRow_Any_NEON(const uint8_t* src_ptr, > uint8_t* dst_ptr, > int width); > void ARGBMirrorRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); >+void ARGBMirrorRow_Any_MMI(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); > > void SplitUVRow_C(const uint8_t* src_uv, > uint8_t* dst_u, >@@ -1290,6 +1532,10 @@ void SplitUVRow_MSA(const uint8_t* src_uv, > uint8_t* dst_u, > uint8_t* dst_v, > int width); >+void SplitUVRow_MMI(const uint8_t* src_uv, >+ uint8_t* dst_u, >+ uint8_t* dst_v, >+ int width); > void SplitUVRow_Any_SSE2(const uint8_t* src_ptr, > uint8_t* dst_u, > uint8_t* dst_v, >@@ -1306,6 +1552,10 @@ void SplitUVRow_Any_MSA(const uint8_t* src_ptr, > uint8_t* dst_u, > uint8_t* dst_v, > int width); >+void SplitUVRow_Any_MMI(const uint8_t* src_ptr, >+ uint8_t* dst_u, >+ uint8_t* dst_v, >+ int width); > > void MergeUVRow_C(const uint8_t* src_u, > const uint8_t* src_v, >@@ -1327,6 +1577,10 @@ void MergeUVRow_MSA(const uint8_t* src_u, > const uint8_t* src_v, > uint8_t* dst_uv, > int width); >+void MergeUVRow_MMI(const uint8_t* src_u, >+ const uint8_t* src_v, >+ uint8_t* dst_uv, >+ int width); > void MergeUVRow_Any_SSE2(const uint8_t* y_buf, > const uint8_t* uv_buf, > uint8_t* dst_ptr, >@@ -1343,6 +1597,10 @@ void MergeUVRow_Any_MSA(const uint8_t* y_buf, > const uint8_t* uv_buf, > uint8_t* dst_ptr, > int width); >+void MergeUVRow_Any_MMI(const uint8_t* y_buf, >+ const uint8_t* uv_buf, >+ uint8_t* dst_ptr, >+ int width); > > void SplitRGBRow_C(const uint8_t* src_rgb, > uint8_t* dst_r, >@@ -1359,6 +1617,11 @@ void SplitRGBRow_NEON(const uint8_t* src_rgb, > uint8_t* dst_g, > uint8_t* dst_b, > int width); >+void SplitRGBRow_MMI(const uint8_t* src_rgb, >+ uint8_t* dst_r, >+ uint8_t* dst_g, >+ uint8_t* dst_b, >+ int width); > void SplitRGBRow_Any_SSSE3(const uint8_t* src_ptr, > uint8_t* dst_r, > uint8_t* dst_g, >@@ -1369,6 +1632,11 @@ void SplitRGBRow_Any_NEON(const uint8_t* src_ptr, > uint8_t* dst_g, > uint8_t* dst_b, > int width); >+void SplitRGBRow_Any_MMI(const uint8_t* src_ptr, >+ uint8_t* dst_r, >+ uint8_t* dst_g, >+ uint8_t* dst_b, >+ int width); > > void MergeRGBRow_C(const uint8_t* src_r, > const uint8_t* src_g, >@@ -1385,6 +1653,11 @@ void MergeRGBRow_NEON(const uint8_t* src_r, > const uint8_t* src_b, > uint8_t* dst_rgb, > int width); >+void MergeRGBRow_MMI(const uint8_t* src_r, >+ const uint8_t* src_g, >+ const uint8_t* src_b, >+ uint8_t* dst_rgb, >+ int width); > void MergeRGBRow_Any_SSSE3(const uint8_t* y_buf, > const uint8_t* u_buf, > const uint8_t* v_buf, >@@ -1395,6 +1668,11 @@ void MergeRGBRow_Any_NEON(const uint8_t* src_r, > const uint8_t* src_b, > uint8_t* dst_rgb, > int width); >+void MergeRGBRow_Any_MMI(const uint8_t* src_r, >+ const uint8_t* src_g, >+ const uint8_t* src_b, >+ uint8_t* dst_rgb, >+ int width); > > void MergeUVRow_16_C(const uint16_t* src_u, > const uint16_t* src_v, >@@ -1473,12 +1751,16 @@ void CopyRow_16_C(const uint16_t* src, uint16_t* dst, int count); > void ARGBCopyAlphaRow_C(const uint8_t* src, uint8_t* dst, int width); > void ARGBCopyAlphaRow_SSE2(const uint8_t* src, uint8_t* dst, int width); > void ARGBCopyAlphaRow_AVX2(const uint8_t* src, uint8_t* dst, int width); >+void ARGBCopyAlphaRow_MMI(const uint8_t* src, uint8_t* dst, int width); > void ARGBCopyAlphaRow_Any_SSE2(const uint8_t* src_ptr, > uint8_t* dst_ptr, > int width); > void ARGBCopyAlphaRow_Any_AVX2(const uint8_t* src_ptr, > uint8_t* dst_ptr, > int width); >+void ARGBCopyAlphaRow_Any_MMI(const uint8_t* src_ptr, >+ uint8_t* dst_ptr, >+ int width); > > void ARGBExtractAlphaRow_C(const uint8_t* src_argb, uint8_t* dst_a, int width); > void ARGBExtractAlphaRow_SSE2(const uint8_t* src_argb, >@@ -1493,6 +1775,9 @@ void ARGBExtractAlphaRow_NEON(const uint8_t* src_argb, > void ARGBExtractAlphaRow_MSA(const uint8_t* src_argb, > uint8_t* dst_a, > int width); >+void ARGBExtractAlphaRow_MMI(const uint8_t* src_argb, >+ uint8_t* dst_a, >+ int width); > void ARGBExtractAlphaRow_Any_SSE2(const uint8_t* src_ptr, > uint8_t* dst_ptr, > int width); >@@ -1505,16 +1790,23 @@ void ARGBExtractAlphaRow_Any_NEON(const uint8_t* src_ptr, > void ARGBExtractAlphaRow_Any_MSA(const uint8_t* src_ptr, > uint8_t* dst_ptr, > int width); >+void ARGBExtractAlphaRow_Any_MMI(const uint8_t* src_ptr, >+ uint8_t* dst_ptr, >+ int width); > > void ARGBCopyYToAlphaRow_C(const uint8_t* src, uint8_t* dst, int width); > void ARGBCopyYToAlphaRow_SSE2(const uint8_t* src, uint8_t* dst, int width); > void ARGBCopyYToAlphaRow_AVX2(const uint8_t* src, uint8_t* dst, int width); >+void ARGBCopyYToAlphaRow_MMI(const uint8_t* src, uint8_t* dst, int width); > void ARGBCopyYToAlphaRow_Any_SSE2(const uint8_t* src_ptr, > uint8_t* dst_ptr, > int width); > void ARGBCopyYToAlphaRow_Any_AVX2(const uint8_t* src_ptr, > uint8_t* dst_ptr, > int width); >+void ARGBCopyYToAlphaRow_Any_MMI(const uint8_t* src_ptr, >+ uint8_t* dst_ptr, >+ int width); > > void SetRow_C(uint8_t* dst, uint8_t v8, int width); > void SetRow_MSA(uint8_t* dst, uint8_t v8, int width); >@@ -1552,6 +1844,10 @@ void ARGBShuffleRow_MSA(const uint8_t* src_argb, > uint8_t* dst_argb, > const uint8_t* shuffler, > int width); >+void ARGBShuffleRow_MMI(const uint8_t* src_argb, >+ uint8_t* dst_argb, >+ const uint8_t* shuffler, >+ int width); > void ARGBShuffleRow_Any_SSSE3(const uint8_t* src_ptr, > uint8_t* dst_ptr, > const uint8_t* param, >@@ -1568,6 +1864,10 @@ void ARGBShuffleRow_Any_MSA(const uint8_t* src_ptr, > uint8_t* dst_ptr, > const uint8_t* param, > int width); >+void ARGBShuffleRow_Any_MMI(const uint8_t* src_ptr, >+ uint8_t* dst_ptr, >+ const uint8_t* param, >+ int width); > > void RGB24ToARGBRow_SSSE3(const uint8_t* src_rgb24, > uint8_t* dst_argb, >@@ -1591,28 +1891,40 @@ void RGB24ToARGBRow_NEON(const uint8_t* src_rgb24, > uint8_t* dst_argb, > int width); > void RGB24ToARGBRow_MSA(const uint8_t* src_rgb24, uint8_t* dst_argb, int width); >+void RGB24ToARGBRow_MMI(const uint8_t* src_rgb24, uint8_t* dst_argb, int width); > void RAWToARGBRow_NEON(const uint8_t* src_raw, uint8_t* dst_argb, int width); > void RAWToARGBRow_MSA(const uint8_t* src_raw, uint8_t* dst_argb, int width); >+void RAWToARGBRow_MMI(const uint8_t* src_raw, uint8_t* dst_argb, int width); > void RAWToRGB24Row_NEON(const uint8_t* src_raw, uint8_t* dst_rgb24, int width); > void RAWToRGB24Row_MSA(const uint8_t* src_raw, uint8_t* dst_rgb24, int width); >+void RAWToRGB24Row_MMI(const uint8_t* src_raw, uint8_t* dst_rgb24, int width); > void RGB565ToARGBRow_NEON(const uint8_t* src_rgb565, > uint8_t* dst_argb, > int width); > void RGB565ToARGBRow_MSA(const uint8_t* src_rgb565, > uint8_t* dst_argb, > int width); >+void RGB565ToARGBRow_MMI(const uint8_t* src_rgb565, >+ uint8_t* dst_argb, >+ int width); > void ARGB1555ToARGBRow_NEON(const uint8_t* src_argb1555, > uint8_t* dst_argb, > int width); > void ARGB1555ToARGBRow_MSA(const uint8_t* src_argb1555, > uint8_t* dst_argb, > int width); >+void ARGB1555ToARGBRow_MMI(const uint8_t* src_argb1555, >+ uint8_t* dst_argb, >+ int width); > void ARGB4444ToARGBRow_NEON(const uint8_t* src_argb4444, > uint8_t* dst_argb, > int width); > void ARGB4444ToARGBRow_MSA(const uint8_t* src_argb4444, > uint8_t* dst_argb, > int width); >+void ARGB4444ToARGBRow_MMI(const uint8_t* src_argb4444, >+ uint8_t* dst_argb, >+ int width); > void RGB24ToARGBRow_C(const uint8_t* src_rgb24, uint8_t* dst_argb, int width); > void RAWToARGBRow_C(const uint8_t* src_raw, uint8_t* dst_argb, int width); > void RAWToRGB24Row_C(const uint8_t* src_raw, uint8_t* dst_rgb24, int width); >@@ -1663,24 +1975,35 @@ void RGB24ToARGBRow_Any_NEON(const uint8_t* src_ptr, > void RGB24ToARGBRow_Any_MSA(const uint8_t* src_ptr, > uint8_t* dst_ptr, > int width); >+void RGB24ToARGBRow_Any_MMI(const uint8_t* src_ptr, >+ uint8_t* dst_ptr, >+ int width); > void RAWToARGBRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); > void RAWToARGBRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); >+void RAWToARGBRow_Any_MMI(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); > void RAWToRGB24Row_Any_NEON(const uint8_t* src_ptr, > uint8_t* dst_ptr, > int width); > void RAWToRGB24Row_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); >+void RAWToRGB24Row_Any_MMI(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); > void RGB565ToARGBRow_Any_NEON(const uint8_t* src_ptr, > uint8_t* dst_ptr, > int width); > void RGB565ToARGBRow_Any_MSA(const uint8_t* src_ptr, > uint8_t* dst_ptr, > int width); >+void RGB565ToARGBRow_Any_MMI(const uint8_t* src_ptr, >+ uint8_t* dst_ptr, >+ int width); > void ARGB1555ToARGBRow_Any_NEON(const uint8_t* src_ptr, > uint8_t* dst_ptr, > int width); > void ARGB1555ToARGBRow_Any_MSA(const uint8_t* src_ptr, > uint8_t* dst_ptr, > int width); >+void ARGB1555ToARGBRow_Any_MMI(const uint8_t* src_ptr, >+ uint8_t* dst_ptr, >+ int width); > void ARGB4444ToARGBRow_Any_NEON(const uint8_t* src_ptr, > uint8_t* dst_ptr, > int width); >@@ -1688,6 +2011,9 @@ void ARGB4444ToARGBRow_Any_NEON(const uint8_t* src_ptr, > void ARGB4444ToARGBRow_Any_MSA(const uint8_t* src_ptr, > uint8_t* dst_ptr, > int width); >+void ARGB4444ToARGBRow_Any_MMI(const uint8_t* src_ptr, >+ uint8_t* dst_ptr, >+ int width); > > void ARGBToRGB24Row_SSSE3(const uint8_t* src, uint8_t* dst, int width); > void ARGBToRAWRow_SSSE3(const uint8_t* src, uint8_t* dst, int width); >@@ -1697,6 +2023,11 @@ void ARGBToARGB4444Row_SSE2(const uint8_t* src, uint8_t* dst, int width); > void ABGRToAR30Row_SSSE3(const uint8_t* src, uint8_t* dst, int width); > void ARGBToAR30Row_SSSE3(const uint8_t* src, uint8_t* dst, int width); > >+void ARGBToRAWRow_AVX2(const uint8_t* src, uint8_t* dst, int width); >+void ARGBToRGB24Row_AVX2(const uint8_t* src, uint8_t* dst, int width); >+ >+void ARGBToRGB24Row_AVX512VBMI(const uint8_t* src, uint8_t* dst, int width); >+ > void ARGBToRGB565DitherRow_C(const uint8_t* src_argb, > uint8_t* dst_rgb, > const uint32_t dither4, >@@ -1751,6 +2082,20 @@ void ARGBToRGB565DitherRow_MSA(const uint8_t* src_argb, > const uint32_t dither4, > int width); > >+void ARGBToRGB24Row_MMI(const uint8_t* src_argb, uint8_t* dst_rgb, int width); >+void ARGBToRAWRow_MMI(const uint8_t* src_argb, uint8_t* dst_rgb, int width); >+void ARGBToRGB565Row_MMI(const uint8_t* src_argb, uint8_t* dst_rgb, int width); >+void ARGBToARGB1555Row_MMI(const uint8_t* src_argb, >+ uint8_t* dst_rgb, >+ int width); >+void ARGBToARGB4444Row_MMI(const uint8_t* src_argb, >+ uint8_t* dst_rgb, >+ int width); >+void ARGBToRGB565DitherRow_MMI(const uint8_t* src_argb, >+ uint8_t* dst_rgb, >+ const uint32_t dither4, >+ int width); >+ > void ARGBToRGBARow_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width); > void ARGBToRGB24Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width); > void ARGBToRAWRow_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width); >@@ -1764,6 +2109,7 @@ void J400ToARGBRow_SSE2(const uint8_t* src_y, uint8_t* dst_argb, int width); > void J400ToARGBRow_AVX2(const uint8_t* src_y, uint8_t* dst_argb, int width); > void J400ToARGBRow_NEON(const uint8_t* src_y, uint8_t* dst_argb, int width); > void J400ToARGBRow_MSA(const uint8_t* src_y, uint8_t* dst_argb, int width); >+void J400ToARGBRow_MMI(const uint8_t* src_y, uint8_t* dst_argb, int width); > void J400ToARGBRow_C(const uint8_t* src_y, uint8_t* dst_argb, int width); > void J400ToARGBRow_Any_SSE2(const uint8_t* src_ptr, > uint8_t* dst_ptr, >@@ -1775,6 +2121,7 @@ void J400ToARGBRow_Any_NEON(const uint8_t* src_ptr, > uint8_t* dst_ptr, > int width); > void J400ToARGBRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); >+void J400ToARGBRow_Any_MMI(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); > > void I444ToARGBRow_C(const uint8_t* src_y, > const uint8_t* src_u, >@@ -1979,11 +2326,31 @@ void NV12ToARGBRow_AVX2(const uint8_t* y_buf, > uint8_t* dst_argb, > const struct YuvConstants* yuvconstants, > int width); >+void NV12ToRGB24Row_SSSE3(const uint8_t* src_y, >+ const uint8_t* src_uv, >+ uint8_t* dst_rgb24, >+ const struct YuvConstants* yuvconstants, >+ int width); >+void NV21ToRGB24Row_SSSE3(const uint8_t* src_y, >+ const uint8_t* src_vu, >+ uint8_t* dst_rgb24, >+ const struct YuvConstants* yuvconstants, >+ int width); > void NV12ToRGB565Row_SSSE3(const uint8_t* src_y, > const uint8_t* src_uv, > uint8_t* dst_rgb565, > const struct YuvConstants* yuvconstants, > int width); >+void NV12ToRGB24Row_AVX2(const uint8_t* src_y, >+ const uint8_t* src_uv, >+ uint8_t* dst_rgb24, >+ const struct YuvConstants* yuvconstants, >+ int width); >+void NV21ToRGB24Row_AVX2(const uint8_t* src_y, >+ const uint8_t* src_vu, >+ uint8_t* dst_rgb24, >+ const struct YuvConstants* yuvconstants, >+ int width); > void NV12ToRGB565Row_AVX2(const uint8_t* src_y, > const uint8_t* src_uv, > uint8_t* dst_rgb565, >@@ -2169,6 +2536,26 @@ void NV21ToARGBRow_Any_AVX2(const uint8_t* y_buf, > uint8_t* dst_ptr, > const struct YuvConstants* yuvconstants, > int width); >+void NV12ToRGB24Row_Any_SSSE3(const uint8_t* y_buf, >+ const uint8_t* uv_buf, >+ uint8_t* dst_ptr, >+ const struct YuvConstants* yuvconstants, >+ int width); >+void NV21ToRGB24Row_Any_SSSE3(const uint8_t* y_buf, >+ const uint8_t* uv_buf, >+ uint8_t* dst_ptr, >+ const struct YuvConstants* yuvconstants, >+ int width); >+void NV12ToRGB24Row_Any_AVX2(const uint8_t* y_buf, >+ const uint8_t* uv_buf, >+ uint8_t* dst_ptr, >+ const struct YuvConstants* yuvconstants, >+ int width); >+void NV21ToRGB24Row_Any_AVX2(const uint8_t* y_buf, >+ const uint8_t* uv_buf, >+ uint8_t* dst_ptr, >+ const struct YuvConstants* yuvconstants, >+ int width); > void NV12ToRGB565Row_Any_SSSE3(const uint8_t* y_buf, > const uint8_t* uv_buf, > uint8_t* dst_ptr, >@@ -2255,6 +2642,7 @@ void I400ToARGBRow_SSE2(const uint8_t* y_buf, uint8_t* dst_argb, int width); > void I400ToARGBRow_AVX2(const uint8_t* y_buf, uint8_t* dst_argb, int width); > void I400ToARGBRow_NEON(const uint8_t* src_y, uint8_t* dst_argb, int width); > void I400ToARGBRow_MSA(const uint8_t* src_y, uint8_t* dst_argb, int width); >+void I400ToARGBRow_MMI(const uint8_t* src_y, uint8_t* dst_argb, int width); > void I400ToARGBRow_Any_SSE2(const uint8_t* src_ptr, > uint8_t* dst_ptr, > int width); >@@ -2265,6 +2653,7 @@ void I400ToARGBRow_Any_NEON(const uint8_t* src_ptr, > uint8_t* dst_ptr, > int width); > void I400ToARGBRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); >+void I400ToARGBRow_Any_MMI(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); > > // ARGB preattenuated alpha blend. > void ARGBBlendRow_SSSE3(const uint8_t* src_argb0, >@@ -2279,6 +2668,10 @@ void ARGBBlendRow_MSA(const uint8_t* src_argb0, > const uint8_t* src_argb1, > uint8_t* dst_argb, > int width); >+void ARGBBlendRow_MMI(const uint8_t* src_argb0, >+ const uint8_t* src_argb1, >+ uint8_t* dst_argb, >+ int width); > void ARGBBlendRow_C(const uint8_t* src_argb0, > const uint8_t* src_argb1, > uint8_t* dst_argb, >@@ -2305,6 +2698,16 @@ void BlendPlaneRow_Any_AVX2(const uint8_t* y_buf, > const uint8_t* v_buf, > uint8_t* dst_ptr, > int width); >+void BlendPlaneRow_MMI(const uint8_t* src0, >+ const uint8_t* src1, >+ const uint8_t* alpha, >+ uint8_t* dst, >+ int width); >+void BlendPlaneRow_Any_MMI(const uint8_t* y_buf, >+ const uint8_t* u_buf, >+ const uint8_t* v_buf, >+ uint8_t* dst_ptr, >+ int width); > void BlendPlaneRow_C(const uint8_t* src0, > const uint8_t* src1, > const uint8_t* alpha, >@@ -2349,6 +2752,14 @@ void ARGBMultiplyRow_Any_MSA(const uint8_t* y_buf, > const uint8_t* uv_buf, > uint8_t* dst_ptr, > int width); >+void ARGBMultiplyRow_MMI(const uint8_t* src_argb0, >+ const uint8_t* src_argb1, >+ uint8_t* dst_argb, >+ int width); >+void ARGBMultiplyRow_Any_MMI(const uint8_t* y_buf, >+ const uint8_t* uv_buf, >+ uint8_t* dst_ptr, >+ int width); > > // ARGB add images. > void ARGBAddRow_C(const uint8_t* src_argb0, >@@ -2387,6 +2798,14 @@ void ARGBAddRow_Any_MSA(const uint8_t* y_buf, > const uint8_t* uv_buf, > uint8_t* dst_ptr, > int width); >+void ARGBAddRow_MMI(const uint8_t* src_argb0, >+ const uint8_t* src_argb1, >+ uint8_t* dst_argb, >+ int width); >+void ARGBAddRow_Any_MMI(const uint8_t* y_buf, >+ const uint8_t* uv_buf, >+ uint8_t* dst_ptr, >+ int width); > > // ARGB subtract images. Same API as Blend, but these require > // pointer and width alignment for SSE2. >@@ -2426,6 +2845,14 @@ void ARGBSubtractRow_Any_MSA(const uint8_t* y_buf, > const uint8_t* uv_buf, > uint8_t* dst_ptr, > int width); >+void ARGBSubtractRow_MMI(const uint8_t* src_argb0, >+ const uint8_t* src_argb1, >+ uint8_t* dst_argb, >+ int width); >+void ARGBSubtractRow_Any_MMI(const uint8_t* y_buf, >+ const uint8_t* uv_buf, >+ uint8_t* dst_ptr, >+ int width); > > void ARGBToRGB24Row_Any_SSSE3(const uint8_t* src_ptr, > uint8_t* dst_ptr, >@@ -2448,7 +2875,13 @@ void ABGRToAR30Row_Any_SSSE3(const uint8_t* src_ptr, > void ARGBToAR30Row_Any_SSSE3(const uint8_t* src_ptr, > uint8_t* dst_ptr, > int width); >- >+void ARGBToRAWRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); >+void ARGBToRGB24Row_Any_AVX2(const uint8_t* src_ptr, >+ uint8_t* dst_ptr, >+ int width); >+void ARGBToRGB24Row_Any_AVX512VBMI(const uint8_t* src_ptr, >+ uint8_t* dst_ptr, >+ int width); > void ARGBToRGB565DitherRow_Any_SSE2(const uint8_t* src_ptr, > uint8_t* dst_ptr, > const uint32_t param, >@@ -2509,6 +2942,24 @@ void ARGBToRGB565DitherRow_Any_MSA(const uint8_t* src_ptr, > const uint32_t param, > int width); > >+void ARGBToRGB24Row_Any_MMI(const uint8_t* src_ptr, >+ uint8_t* dst_ptr, >+ int width); >+void ARGBToRAWRow_Any_MMI(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); >+void ARGBToRGB565Row_Any_MMI(const uint8_t* src_ptr, >+ uint8_t* dst_ptr, >+ int width); >+void ARGBToARGB1555Row_Any_MMI(const uint8_t* src_ptr, >+ uint8_t* dst_ptr, >+ int width); >+void ARGBToARGB4444Row_Any_MMI(const uint8_t* src_ptr, >+ uint8_t* dst_ptr, >+ int width); >+void ARGBToRGB565DitherRow_Any_MMI(const uint8_t* src_ptr, >+ uint8_t* dst_ptr, >+ const uint32_t param, >+ int width); >+ > void I444ToARGBRow_Any_NEON(const uint8_t* y_buf, > const uint8_t* u_buf, > const uint8_t* v_buf, >@@ -2695,15 +3146,25 @@ void YUY2ToUV422Row_NEON(const uint8_t* src_yuy2, > uint8_t* dst_v, > int width); > void YUY2ToYRow_MSA(const uint8_t* src_yuy2, uint8_t* dst_y, int width); >+void YUY2ToYRow_MMI(const uint8_t* src_yuy2, uint8_t* dst_y, int width); > void YUY2ToUVRow_MSA(const uint8_t* src_yuy2, > int src_stride_yuy2, > uint8_t* dst_u, > uint8_t* dst_v, > int width); >+void YUY2ToUVRow_MMI(const uint8_t* src_yuy2, >+ int src_stride_yuy2, >+ uint8_t* dst_u, >+ uint8_t* dst_v, >+ int width); > void YUY2ToUV422Row_MSA(const uint8_t* src_yuy2, > uint8_t* dst_u, > uint8_t* dst_v, > int width); >+void YUY2ToUV422Row_MMI(const uint8_t* src_yuy2, >+ uint8_t* dst_u, >+ uint8_t* dst_v, >+ int width); > void YUY2ToYRow_C(const uint8_t* src_yuy2, uint8_t* dst_y, int width); > void YUY2ToUVRow_C(const uint8_t* src_yuy2, > int src_stride_yuy2, >@@ -2745,15 +3206,25 @@ void YUY2ToUV422Row_Any_NEON(const uint8_t* src_ptr, > uint8_t* dst_v, > int width); > void YUY2ToYRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); >+void YUY2ToYRow_Any_MMI(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); > void YUY2ToUVRow_Any_MSA(const uint8_t* src_ptr, > int src_stride_ptr, > uint8_t* dst_u, > uint8_t* dst_v, > int width); >+void YUY2ToUVRow_Any_MMI(const uint8_t* src_ptr, >+ int src_stride_ptr, >+ uint8_t* dst_u, >+ uint8_t* dst_v, >+ int width); > void YUY2ToUV422Row_Any_MSA(const uint8_t* src_ptr, > uint8_t* dst_u, > uint8_t* dst_v, > int width); >+void YUY2ToUV422Row_Any_MMI(const uint8_t* src_ptr, >+ uint8_t* dst_u, >+ uint8_t* dst_v, >+ int width); > void UYVYToYRow_AVX2(const uint8_t* src_uyvy, uint8_t* dst_y, int width); > void UYVYToUVRow_AVX2(const uint8_t* src_uyvy, > int stride_uyvy, >@@ -2795,15 +3266,25 @@ void UYVYToUV422Row_NEON(const uint8_t* src_uyvy, > uint8_t* dst_v, > int width); > void UYVYToYRow_MSA(const uint8_t* src_uyvy, uint8_t* dst_y, int width); >+void UYVYToYRow_MMI(const uint8_t* src_uyvy, uint8_t* dst_y, int width); > void UYVYToUVRow_MSA(const uint8_t* src_uyvy, > int src_stride_uyvy, > uint8_t* dst_u, > uint8_t* dst_v, > int width); >+void UYVYToUVRow_MMI(const uint8_t* src_uyvy, >+ int src_stride_uyvy, >+ uint8_t* dst_u, >+ uint8_t* dst_v, >+ int width); > void UYVYToUV422Row_MSA(const uint8_t* src_uyvy, > uint8_t* dst_u, > uint8_t* dst_v, > int width); >+void UYVYToUV422Row_MMI(const uint8_t* src_uyvy, >+ uint8_t* dst_u, >+ uint8_t* dst_v, >+ int width); > > void UYVYToYRow_C(const uint8_t* src_uyvy, uint8_t* dst_y, int width); > void UYVYToUVRow_C(const uint8_t* src_uyvy, >@@ -2846,15 +3327,25 @@ void UYVYToUV422Row_Any_NEON(const uint8_t* src_ptr, > uint8_t* dst_v, > int width); > void UYVYToYRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); >+void UYVYToYRow_Any_MMI(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); > void UYVYToUVRow_Any_MSA(const uint8_t* src_ptr, > int src_stride_ptr, > uint8_t* dst_u, > uint8_t* dst_v, > int width); >+void UYVYToUVRow_Any_MMI(const uint8_t* src_ptr, >+ int src_stride_ptr, >+ uint8_t* dst_u, >+ uint8_t* dst_v, >+ int width); > void UYVYToUV422Row_Any_MSA(const uint8_t* src_ptr, > uint8_t* dst_u, > uint8_t* dst_v, > int width); >+void UYVYToUV422Row_Any_MMI(const uint8_t* src_ptr, >+ uint8_t* dst_u, >+ uint8_t* dst_v, >+ int width); > > void I422ToYUY2Row_C(const uint8_t* src_y, > const uint8_t* src_u, >@@ -2931,21 +3422,41 @@ void I422ToYUY2Row_MSA(const uint8_t* src_y, > const uint8_t* src_v, > uint8_t* dst_yuy2, > int width); >+void I422ToYUY2Row_MMI(const uint8_t* src_y, >+ const uint8_t* src_u, >+ const uint8_t* src_v, >+ uint8_t* dst_yuy2, >+ int width); > void I422ToUYVYRow_MSA(const uint8_t* src_y, > const uint8_t* src_u, > const uint8_t* src_v, > uint8_t* dst_uyvy, > int width); >+void I422ToUYVYRow_MMI(const uint8_t* src_y, >+ const uint8_t* src_u, >+ const uint8_t* src_v, >+ uint8_t* dst_uyvy, >+ int width); > void I422ToYUY2Row_Any_MSA(const uint8_t* y_buf, > const uint8_t* u_buf, > const uint8_t* v_buf, > uint8_t* dst_ptr, > int width); >+void I422ToYUY2Row_Any_MMI(const uint8_t* y_buf, >+ const uint8_t* u_buf, >+ const uint8_t* v_buf, >+ uint8_t* dst_ptr, >+ int width); > void I422ToUYVYRow_Any_MSA(const uint8_t* y_buf, > const uint8_t* u_buf, > const uint8_t* v_buf, > uint8_t* dst_ptr, > int width); >+void I422ToUYVYRow_Any_MMI(const uint8_t* y_buf, >+ const uint8_t* u_buf, >+ const uint8_t* v_buf, >+ uint8_t* dst_ptr, >+ int width); > > // Effects related row functions. > void ARGBAttenuateRow_C(const uint8_t* src_argb, uint8_t* dst_argb, int width); >@@ -2961,6 +3472,9 @@ void ARGBAttenuateRow_NEON(const uint8_t* src_argb, > void ARGBAttenuateRow_MSA(const uint8_t* src_argb, > uint8_t* dst_argb, > int width); >+void ARGBAttenuateRow_MMI(const uint8_t* src_argb, >+ uint8_t* dst_argb, >+ int width); > void ARGBAttenuateRow_Any_SSSE3(const uint8_t* src_ptr, > uint8_t* dst_ptr, > int width); >@@ -2973,6 +3487,9 @@ void ARGBAttenuateRow_Any_NEON(const uint8_t* src_ptr, > void ARGBAttenuateRow_Any_MSA(const uint8_t* src_ptr, > uint8_t* dst_ptr, > int width); >+void ARGBAttenuateRow_Any_MMI(const uint8_t* src_ptr, >+ uint8_t* dst_ptr, >+ int width); > > // Inverse table for unattenuate, shared by C and SSE2. > extern const uint32_t fixed_invtbl8[256]; >@@ -2996,11 +3513,13 @@ void ARGBGrayRow_C(const uint8_t* src_argb, uint8_t* dst_argb, int width); > void ARGBGrayRow_SSSE3(const uint8_t* src_argb, uint8_t* dst_argb, int width); > void ARGBGrayRow_NEON(const uint8_t* src_argb, uint8_t* dst_argb, int width); > void ARGBGrayRow_MSA(const uint8_t* src_argb, uint8_t* dst_argb, int width); >+void ARGBGrayRow_MMI(const uint8_t* src_argb, uint8_t* dst_argb, int width); > > void ARGBSepiaRow_C(uint8_t* dst_argb, int width); > void ARGBSepiaRow_SSSE3(uint8_t* dst_argb, int width); > void ARGBSepiaRow_NEON(uint8_t* dst_argb, int width); > void ARGBSepiaRow_MSA(uint8_t* dst_argb, int width); >+void ARGBSepiaRow_MMI(uint8_t* dst_argb, int width); > > void ARGBColorMatrixRow_C(const uint8_t* src_argb, > uint8_t* dst_argb, >@@ -3018,6 +3537,10 @@ void ARGBColorMatrixRow_MSA(const uint8_t* src_argb, > uint8_t* dst_argb, > const int8_t* matrix_argb, > int width); >+void ARGBColorMatrixRow_MMI(const uint8_t* src_argb, >+ uint8_t* dst_argb, >+ const int8_t* matrix_argb, >+ int width); > > void ARGBColorTableRow_C(uint8_t* dst_argb, > const uint8_t* table_argb, >@@ -3070,6 +3593,10 @@ void ARGBShadeRow_MSA(const uint8_t* src_argb, > uint8_t* dst_argb, > int width, > uint32_t value); >+void ARGBShadeRow_MMI(const uint8_t* src_argb, >+ uint8_t* dst_argb, >+ int width, >+ uint32_t value); > > // Used for blur. > void CumulativeSumToAverageRow_SSE2(const int32_t* topleft, >@@ -3083,6 +3610,11 @@ void ComputeCumulativeSumRow_SSE2(const uint8_t* row, > const int32_t* previous_cumsum, > int width); > >+void ComputeCumulativeSumRow_MMI(const uint8_t* row, >+ int32_t* cumsum, >+ const int32_t* previous_cumsum, >+ int width); >+ > void CumulativeSumToAverageRow_C(const int32_t* tl, > const int32_t* bl, > int w, >@@ -3133,6 +3665,11 @@ void InterpolateRow_MSA(uint8_t* dst_ptr, > ptrdiff_t src_stride, > int width, > int source_y_fraction); >+void InterpolateRow_MMI(uint8_t* dst_ptr, >+ const uint8_t* src_ptr, >+ ptrdiff_t src_stride, >+ int width, >+ int source_y_fraction); > void InterpolateRow_Any_NEON(uint8_t* dst_ptr, > const uint8_t* src_ptr, > ptrdiff_t src_stride_ptr, >@@ -3153,6 +3690,11 @@ void InterpolateRow_Any_MSA(uint8_t* dst_ptr, > ptrdiff_t src_stride_ptr, > int width, > int source_y_fraction); >+void InterpolateRow_Any_MMI(uint8_t* dst_ptr, >+ const uint8_t* src_ptr, >+ ptrdiff_t src_stride_ptr, >+ int width, >+ int source_y_fraction); > > void InterpolateRow_16_C(uint16_t* dst_ptr, > const uint16_t* src_ptr, >@@ -3181,6 +3723,11 @@ void SobelXRow_MSA(const uint8_t* src_y0, > const uint8_t* src_y2, > uint8_t* dst_sobelx, > int width); >+void SobelXRow_MMI(const uint8_t* src_y0, >+ const uint8_t* src_y1, >+ const uint8_t* src_y2, >+ uint8_t* dst_sobelx, >+ int width); > void SobelYRow_C(const uint8_t* src_y0, > const uint8_t* src_y1, > uint8_t* dst_sobely, >@@ -3197,6 +3744,10 @@ void SobelYRow_MSA(const uint8_t* src_y0, > const uint8_t* src_y1, > uint8_t* dst_sobely, > int width); >+void SobelYRow_MMI(const uint8_t* src_y0, >+ const uint8_t* src_y1, >+ uint8_t* dst_sobely, >+ int width); > void SobelRow_C(const uint8_t* src_sobelx, > const uint8_t* src_sobely, > uint8_t* dst_argb, >@@ -3213,6 +3764,10 @@ void SobelRow_MSA(const uint8_t* src_sobelx, > const uint8_t* src_sobely, > uint8_t* dst_argb, > int width); >+void SobelRow_MMI(const uint8_t* src_sobelx, >+ const uint8_t* src_sobely, >+ uint8_t* dst_argb, >+ int width); > void SobelToPlaneRow_C(const uint8_t* src_sobelx, > const uint8_t* src_sobely, > uint8_t* dst_y, >@@ -3229,6 +3784,10 @@ void SobelToPlaneRow_MSA(const uint8_t* src_sobelx, > const uint8_t* src_sobely, > uint8_t* dst_y, > int width); >+void SobelToPlaneRow_MMI(const uint8_t* src_sobelx, >+ const uint8_t* src_sobely, >+ uint8_t* dst_y, >+ int width); > void SobelXYRow_C(const uint8_t* src_sobelx, > const uint8_t* src_sobely, > uint8_t* dst_argb, >@@ -3245,6 +3804,10 @@ void SobelXYRow_MSA(const uint8_t* src_sobelx, > const uint8_t* src_sobely, > uint8_t* dst_argb, > int width); >+void SobelXYRow_MMI(const uint8_t* src_sobelx, >+ const uint8_t* src_sobely, >+ uint8_t* dst_argb, >+ int width); > void SobelRow_Any_SSE2(const uint8_t* y_buf, > const uint8_t* uv_buf, > uint8_t* dst_ptr, >@@ -3257,6 +3820,10 @@ void SobelRow_Any_MSA(const uint8_t* y_buf, > const uint8_t* uv_buf, > uint8_t* dst_ptr, > int width); >+void SobelRow_Any_MMI(const uint8_t* y_buf, >+ const uint8_t* uv_buf, >+ uint8_t* dst_ptr, >+ int width); > void SobelToPlaneRow_Any_SSE2(const uint8_t* y_buf, > const uint8_t* uv_buf, > uint8_t* dst_ptr, >@@ -3269,6 +3836,10 @@ void SobelToPlaneRow_Any_MSA(const uint8_t* y_buf, > const uint8_t* uv_buf, > uint8_t* dst_ptr, > int width); >+void SobelToPlaneRow_Any_MMI(const uint8_t* y_buf, >+ const uint8_t* uv_buf, >+ uint8_t* dst_ptr, >+ int width); > void SobelXYRow_Any_SSE2(const uint8_t* y_buf, > const uint8_t* uv_buf, > uint8_t* dst_ptr, >@@ -3281,6 +3852,10 @@ void SobelXYRow_Any_MSA(const uint8_t* y_buf, > const uint8_t* uv_buf, > uint8_t* dst_ptr, > int width); >+void SobelXYRow_Any_MMI(const uint8_t* y_buf, >+ const uint8_t* uv_buf, >+ uint8_t* dst_ptr, >+ int width); > > void ARGBPolynomialRow_C(const uint8_t* src_argb, > uint8_t* dst_argb, >@@ -3358,9 +3933,9 @@ void ByteToFloatRow_NEON(const uint8_t* src, > float* dst, > float scale, > int width); >-void ByteToFloatRow_Any_NEON(const uint8_t* src, >- float* dst, >- float scale, >+void ByteToFloatRow_Any_NEON(const uint8_t* src_ptr, >+ float* dst_ptr, >+ float param, > int width); > > void ARGBLumaColorTableRow_C(const uint8_t* src_argb, >diff --git a/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/include/libyuv/scale_row.h b/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/include/libyuv/scale_row.h >index 7194ba09f842ba7ffb5b295b06977ddf91237e31..6e207a9c64e119d43a39e77043f994b96ff5ee33 100644 >--- a/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/include/libyuv/scale_row.h >+++ b/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/include/libyuv/scale_row.h >@@ -58,6 +58,7 @@ extern "C" { > (defined(_M_IX86) || defined(__x86_64__) || defined(__i386__)) > #define HAS_FIXEDDIV1_X86 > #define HAS_FIXEDDIV_X86 >+#define HAS_SCALEADDROW_SSE2 > #define HAS_SCALEARGBCOLS_SSE2 > #define HAS_SCALEARGBCOLSUP2_SSE2 > #define HAS_SCALEARGBFILTERCOLS_SSSE3 >@@ -69,7 +70,6 @@ extern "C" { > #define HAS_SCALEROWDOWN34_SSSE3 > #define HAS_SCALEROWDOWN38_SSSE3 > #define HAS_SCALEROWDOWN4_SSSE3 >-#define HAS_SCALEADDROW_SSE2 > #endif > > // The following are available on all x86 platforms, but >@@ -86,7 +86,9 @@ extern "C" { > // The following are available on Neon platforms: > #if !defined(LIBYUV_DISABLE_NEON) && \ > (defined(__ARM_NEON__) || defined(LIBYUV_NEON) || defined(__aarch64__)) >+#define HAS_SCALEADDROW_NEON > #define HAS_SCALEARGBCOLS_NEON >+#define HAS_SCALEARGBFILTERCOLS_NEON > #define HAS_SCALEARGBROWDOWN2_NEON > #define HAS_SCALEARGBROWDOWNEVEN_NEON > #define HAS_SCALEFILTERCOLS_NEON >@@ -94,7 +96,6 @@ extern "C" { > #define HAS_SCALEROWDOWN34_NEON > #define HAS_SCALEROWDOWN38_NEON > #define HAS_SCALEROWDOWN4_NEON >-#define HAS_SCALEARGBFILTERCOLS_NEON > #endif > > #if !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa) >@@ -110,6 +111,23 @@ extern "C" { > #define HAS_SCALEROWDOWN4_MSA > #endif > >+#if !defined(LIBYUV_DISABLE_MMI) && defined(_MIPS_ARCH_LOONGSON3A) >+#define HAS_FIXEDDIV1_MIPS >+#define HAS_FIXEDDIV_MIPS >+#define HAS_SCALEADDROW_16_MMI >+#define HAS_SCALEADDROW_MMI >+#define HAS_SCALEARGBCOLS_MMI >+#define HAS_SCALEARGBCOLSUP2_MMI >+#define HAS_SCALEARGBROWDOWN2_MMI >+#define HAS_SCALEARGBROWDOWNEVEN_MMI >+#define HAS_SCALECOLS_16_MMI >+#define HAS_SCALECOLS_MMI >+#define HAS_SCALEROWDOWN2_16_MMI >+#define HAS_SCALEROWDOWN2_MMI >+#define HAS_SCALEROWDOWN4_16_MMI >+#define HAS_SCALEROWDOWN4_MMI >+#endif >+ > // Scale ARGB vertically with bilinear interpolation. > void ScalePlaneVertical(int src_height, > int dst_width, >@@ -147,12 +165,17 @@ enum FilterMode ScaleFilterReduce(int src_width, > // Divide num by div and return as 16.16 fixed point result. > int FixedDiv_C(int num, int div); > int FixedDiv_X86(int num, int div); >+int FixedDiv_MIPS(int num, int div); > // Divide num - 1 by div - 1 and return as 16.16 fixed point result. > int FixedDiv1_C(int num, int div); > int FixedDiv1_X86(int num, int div); >+int FixedDiv1_MIPS(int num, int div); > #ifdef HAS_FIXEDDIV_X86 > #define FixedDiv FixedDiv_X86 > #define FixedDiv1 FixedDiv1_X86 >+#elif defined HAS_FIXEDDIV_MIPS >+#define FixedDiv FixedDiv_MIPS >+#define FixedDiv1 FixedDiv1_MIPS > #else > #define FixedDiv FixedDiv_C > #define FixedDiv1 FixedDiv1_C >@@ -569,6 +592,16 @@ void ScaleARGBCols_Any_MSA(uint8_t* dst_ptr, > int dst_width, > int x, > int dx); >+void ScaleARGBCols_MMI(uint8_t* dst_argb, >+ const uint8_t* src_argb, >+ int dst_width, >+ int x, >+ int dx); >+void ScaleARGBCols_Any_MMI(uint8_t* dst_ptr, >+ const uint8_t* src_ptr, >+ int dst_width, >+ int x, >+ int dx); > > // ARGB Row functions > void ScaleARGBRowDown2_SSE2(const uint8_t* src_argb, >@@ -607,6 +640,18 @@ void ScaleARGBRowDown2Box_MSA(const uint8_t* src_argb, > ptrdiff_t src_stride, > uint8_t* dst_argb, > int dst_width); >+void ScaleARGBRowDown2_MMI(const uint8_t* src_argb, >+ ptrdiff_t src_stride, >+ uint8_t* dst_argb, >+ int dst_width); >+void ScaleARGBRowDown2Linear_MMI(const uint8_t* src_argb, >+ ptrdiff_t src_stride, >+ uint8_t* dst_argb, >+ int dst_width); >+void ScaleARGBRowDown2Box_MMI(const uint8_t* src_argb, >+ ptrdiff_t src_stride, >+ uint8_t* dst_argb, >+ int dst_width); > void ScaleARGBRowDown2_Any_SSE2(const uint8_t* src_ptr, > ptrdiff_t src_stride, > uint8_t* dst_ptr, >@@ -643,7 +688,18 @@ void ScaleARGBRowDown2Box_Any_MSA(const uint8_t* src_ptr, > ptrdiff_t src_stride, > uint8_t* dst_ptr, > int dst_width); >- >+void ScaleARGBRowDown2_Any_MMI(const uint8_t* src_ptr, >+ ptrdiff_t src_stride, >+ uint8_t* dst_ptr, >+ int dst_width); >+void ScaleARGBRowDown2Linear_Any_MMI(const uint8_t* src_ptr, >+ ptrdiff_t src_stride, >+ uint8_t* dst_ptr, >+ int dst_width); >+void ScaleARGBRowDown2Box_Any_MMI(const uint8_t* src_ptr, >+ ptrdiff_t src_stride, >+ uint8_t* dst_ptr, >+ int dst_width); > void ScaleARGBRowDownEven_SSE2(const uint8_t* src_argb, > ptrdiff_t src_stride, > int src_stepx, >@@ -674,6 +730,16 @@ void ScaleARGBRowDownEvenBox_MSA(const uint8_t* src_argb, > int src_stepx, > uint8_t* dst_argb, > int dst_width); >+void ScaleARGBRowDownEven_MMI(const uint8_t* src_argb, >+ ptrdiff_t src_stride, >+ int32_t src_stepx, >+ uint8_t* dst_argb, >+ int dst_width); >+void ScaleARGBRowDownEvenBox_MMI(const uint8_t* src_argb, >+ ptrdiff_t src_stride, >+ int src_stepx, >+ uint8_t* dst_argb, >+ int dst_width); > void ScaleARGBRowDownEven_Any_SSE2(const uint8_t* src_ptr, > ptrdiff_t src_stride, > int src_stepx, >@@ -704,6 +770,16 @@ void ScaleARGBRowDownEvenBox_Any_MSA(const uint8_t* src_ptr, > int src_stepx, > uint8_t* dst_ptr, > int dst_width); >+void ScaleARGBRowDownEven_Any_MMI(const uint8_t* src_ptr, >+ ptrdiff_t src_stride, >+ int32_t src_stepx, >+ uint8_t* dst_ptr, >+ int dst_width); >+void ScaleARGBRowDownEvenBox_Any_MMI(const uint8_t* src_ptr, >+ ptrdiff_t src_stride, >+ int src_stepx, >+ uint8_t* dst_ptr, >+ int dst_width); > > // ScaleRowDown2Box also used by planar functions > // NEON downscalers with interpolation. >@@ -936,6 +1012,93 @@ void ScaleRowDown34_1_Box_Any_MSA(const uint8_t* src_ptr, > uint8_t* dst_ptr, > int dst_width); > >+void ScaleRowDown2_MMI(const uint8_t* src_ptr, >+ ptrdiff_t src_stride, >+ uint8_t* dst, >+ int dst_width); >+void ScaleRowDown2_16_MMI(const uint16_t* src_ptr, >+ ptrdiff_t src_stride, >+ uint16_t* dst, >+ int dst_width); >+void ScaleRowDown2Linear_MMI(const uint8_t* src_ptr, >+ ptrdiff_t src_stride, >+ uint8_t* dst, >+ int dst_width); >+void ScaleRowDown2Linear_16_MMI(const uint16_t* src_ptr, >+ ptrdiff_t src_stride, >+ uint16_t* dst, >+ int dst_width); >+void ScaleRowDown2Box_MMI(const uint8_t* src_ptr, >+ ptrdiff_t src_stride, >+ uint8_t* dst, >+ int dst_width); >+void ScaleRowDown2Box_16_MMI(const uint16_t* src_ptr, >+ ptrdiff_t src_stride, >+ uint16_t* dst, >+ int dst_width); >+void ScaleRowDown2Box_Odd_MMI(const uint8_t* src_ptr, >+ ptrdiff_t src_stride, >+ uint8_t* dst, >+ int dst_width); >+void ScaleRowDown4_MMI(const uint8_t* src_ptr, >+ ptrdiff_t src_stride, >+ uint8_t* dst, >+ int dst_width); >+void ScaleRowDown4_16_MMI(const uint16_t* src_ptr, >+ ptrdiff_t src_stride, >+ uint16_t* dst, >+ int dst_width); >+void ScaleRowDown4Box_MMI(const uint8_t* src_ptr, >+ ptrdiff_t src_stride, >+ uint8_t* dst, >+ int dst_width); >+void ScaleRowDown4Box_16_MMI(const uint16_t* src_ptr, >+ ptrdiff_t src_stride, >+ uint16_t* dst, >+ int dst_width); >+void ScaleAddRow_MMI(const uint8_t* src_ptr, uint16_t* dst_ptr, int src_width); >+void ScaleAddRow_16_MMI(const uint16_t* src_ptr, >+ uint32_t* dst_ptr, >+ int src_width); >+void ScaleColsUp2_MMI(uint8_t* dst_ptr, >+ const uint8_t* src_ptr, >+ int dst_width, >+ int x, >+ int dx); >+void ScaleColsUp2_16_MMI(uint16_t* dst_ptr, >+ const uint16_t* src_ptr, >+ int dst_width, >+ int x, >+ int dx); >+void ScaleARGBColsUp2_MMI(uint8_t* dst_argb, >+ const uint8_t* src_argb, >+ int dst_width, >+ int x, >+ int dx); >+ >+void ScaleRowDown2_Any_MMI(const uint8_t* src_ptr, >+ ptrdiff_t src_stride, >+ uint8_t* dst_ptr, >+ int dst_width); >+void ScaleRowDown2Linear_Any_MMI(const uint8_t* src_ptr, >+ ptrdiff_t src_stride, >+ uint8_t* dst_ptr, >+ int dst_width); >+void ScaleRowDown2Box_Any_MMI(const uint8_t* src_ptr, >+ ptrdiff_t src_stride, >+ uint8_t* dst_ptr, >+ int dst_width); >+void ScaleRowDown4_Any_MMI(const uint8_t* src_ptr, >+ ptrdiff_t src_stride, >+ uint8_t* dst_ptr, >+ int dst_width); >+void ScaleRowDown4Box_Any_MMI(const uint8_t* src_ptr, >+ ptrdiff_t src_stride, >+ uint8_t* dst_ptr, >+ int dst_width); >+void ScaleAddRow_Any_MMI(const uint8_t* src_ptr, >+ uint16_t* dst_ptr, >+ int src_width); > #ifdef __cplusplus > } // extern "C" > } // namespace libyuv >diff --git a/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/include/libyuv/version.h b/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/include/libyuv/version.h >index 21522cf351c8c53610ec2e8430ddc0a42cd58772..1a38ba7d6dd41d9e9c30650587f764aa4878b3c4 100644 >--- a/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/include/libyuv/version.h >+++ b/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/include/libyuv/version.h >@@ -11,6 +11,6 @@ > #ifndef INCLUDE_LIBYUV_VERSION_H_ > #define INCLUDE_LIBYUV_VERSION_H_ > >-#define LIBYUV_VERSION 1703 >+#define LIBYUV_VERSION 1722 > > #endif // INCLUDE_LIBYUV_VERSION_H_ >diff --git a/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/include/libyuv/video_common.h b/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/include/libyuv/video_common.h >index bcef378b5a4e5b2c3af9f364e153c7753671dd31..ffcbdbf1b0c1455079931e4796e111a497275a94 100644 >--- a/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/include/libyuv/video_common.h >+++ b/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/include/libyuv/video_common.h >@@ -30,7 +30,8 @@ extern "C" { > #ifdef __cplusplus > #define FOURCC(a, b, c, d) \ > ((static_cast<uint32_t>(a)) | (static_cast<uint32_t>(b) << 8) | \ >- (static_cast<uint32_t>(c) << 16) | (static_cast<uint32_t>(d) << 24)) >+ (static_cast<uint32_t>(c) << 16) | /* NOLINT */ \ >+ (static_cast<uint32_t>(d) << 24)) /* NOLINT */ > #else > #define FOURCC(a, b, c, d) \ > (((uint32_t)(a)) | ((uint32_t)(b) << 8) | /* NOLINT */ \ >@@ -79,7 +80,7 @@ enum FourCC { > // 1 Primary Compressed YUV format. > FOURCC_MJPG = FOURCC('M', 'J', 'P', 'G'), > >- // 7 Auxiliary YUV variations: 3 with U and V planes are swapped, 1 Alias. >+ // 8 Auxiliary YUV variations: 3 with U and V planes are swapped, 1 Alias. > FOURCC_YV12 = FOURCC('Y', 'V', '1', '2'), > FOURCC_YV16 = FOURCC('Y', 'V', '1', '6'), > FOURCC_YV24 = FOURCC('Y', 'V', '2', '4'), >@@ -87,6 +88,7 @@ enum FourCC { > FOURCC_J420 = FOURCC('J', '4', '2', '0'), > FOURCC_J400 = FOURCC('J', '4', '0', '0'), // unofficial fourcc > FOURCC_H420 = FOURCC('H', '4', '2', '0'), // unofficial fourcc >+ FOURCC_H422 = FOURCC('H', '4', '2', '2'), // unofficial fourcc > > // 14 Auxiliary aliases. CanonicalFourCC() maps these to canonical fourcc. > FOURCC_IYUV = FOURCC('I', 'Y', 'U', 'V'), // Alias for I420. >@@ -155,6 +157,7 @@ enum FourCCBpp { > FOURCC_BPP_J420 = 12, > FOURCC_BPP_J400 = 8, > FOURCC_BPP_H420 = 12, >+ FOURCC_BPP_H422 = 16, > FOURCC_BPP_H010 = 24, > FOURCC_BPP_MJPG = 0, // 0 means unknown. > FOURCC_BPP_H264 = 0, >diff --git a/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/infra/config/PRESUBMIT.py b/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/infra/config/PRESUBMIT.py >new file mode 100644 >index 0000000000000000000000000000000000000000..89eaa5192c2b3333031b3580dfd0eab59e96328f >--- /dev/null >+++ b/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/infra/config/PRESUBMIT.py >@@ -0,0 +1,15 @@ >+# Copyright 2018 The LibYuv Project Authors. All rights reserved. >+# >+# Use of this source code is governed by a BSD-style license >+# that can be found in the LICENSE file in the root of the source >+# tree. An additional intellectual property rights grant can be found >+# in the file PATENTS. All contributing project authors may >+# be found in the AUTHORS file in the root of the source tree. >+ >+ >+def CheckChangeOnUpload(input_api, output_api): >+ return input_api.canned_checks.CheckChangedLUCIConfigs(input_api, output_api) >+ >+ >+def CheckChangeOnCommit(input_api, output_api): >+ return input_api.canned_checks.CheckChangedLUCIConfigs(input_api, output_api) >diff --git a/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/infra/config/cq.cfg b/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/infra/config/cq.cfg >index 604de7814d9420614a3b372d377282dd14113412..7bcc05955a38d9bceaf8bce5fb37658dac0e211e 100644 >--- a/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/infra/config/cq.cfg >+++ b/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/infra/config/cq.cfg >@@ -2,7 +2,6 @@ > # at http://luci-config.appspot.com/schemas/projects/refs:cq.cfg. > > version: 1 >-cq_name: "libyuv" > cq_status_url: "https://chromium-cq-status.appspot.com" > git_repo_url: "https://chromium.googlesource.com/libyuv/libyuv.git" > >@@ -16,7 +15,7 @@ verifiers { > > try_job { > buckets { >- name: "master.tryserver.libyuv" >+ name: "luci.libyuv.try" > builders { name: "win" } > builders { name: "win_rel" } > builders { name: "win_x64_rel" } >@@ -47,10 +46,6 @@ verifiers { > builders { name: "android_arm64" } > builders { name: "android_x86" } > builders { name: "android_x64" } >- builders { >- name: "android_mips" >- experiment_percentage: 100 >- } > } > } > } >diff --git a/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/libyuv.gni b/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/libyuv.gni >index 89e4d38232767e84c5fa76498a1edf21acfa6087..8df40ba2d77b3b5cf2b6fd27bd69200e93db5390 100644 >--- a/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/libyuv.gni >+++ b/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/libyuv.gni >@@ -13,8 +13,11 @@ import("//build/config/mips.gni") > declare_args() { > libyuv_include_tests = !build_with_chromium > libyuv_disable_jpeg = false >- libyuv_use_neon = (current_cpu == "arm64" || >- (current_cpu == "arm" && (arm_use_neon || arm_optionally_use_neon))) >- libyuv_use_msa = (current_cpu == "mips64el" || current_cpu == "mipsel") && >- mips_use_msa >+ libyuv_use_neon = >+ current_cpu == "arm64" || >+ (current_cpu == "arm" && (arm_use_neon || arm_optionally_use_neon)) >+ libyuv_use_msa = >+ (current_cpu == "mips64el" || current_cpu == "mipsel") && mips_use_msa >+ libyuv_use_mmi = >+ (current_cpu == "mips64el" || current_cpu == "mipsel") && mips_use_mmi > } >diff --git a/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/libyuv.gyp b/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/libyuv.gyp >deleted file mode 100644 >index e853ba3197a94bcad91011ac6384ce84daa1db10..0000000000000000000000000000000000000000 >--- a/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/libyuv.gyp >+++ /dev/null >@@ -1,161 +0,0 @@ >-# Copyright 2011 The LibYuv Project Authors. All rights reserved. >-# >-# Use of this source code is governed by a BSD-style license >-# that can be found in the LICENSE file in the root of the source >-# tree. An additional intellectual property rights grant can be found >-# in the file PATENTS. All contributing project authors may >-# be found in the AUTHORS file in the root of the source tree. >- >-{ >- 'includes': [ >- 'libyuv.gypi', >- ], >- # Make sure that if we are being compiled to an xcodeproj, nothing tries to >- # include a .pch. >- 'xcode_settings': { >- 'GCC_PREFIX_HEADER': '', >- 'GCC_PRECOMPILE_PREFIX_HEADER': 'NO', >- }, >- 'variables': { >- 'use_system_libjpeg%': 0, >- # Can be enabled if your jpeg has GYP support. >- 'libyuv_disable_jpeg%': 1, >- # 'chromium_code' treats libyuv as internal and increases warning level. >- 'chromium_code': 1, >- # clang compiler default variable usable by other apps that include libyuv. >- 'clang%': 0, >- # Link-Time Optimizations. >- 'use_lto%': 0, >- 'mips_msa%': 0, # Default to msa off. >- 'build_neon': 0, >- 'build_msa': 0, >- 'conditions': [ >- ['(target_arch == "armv7" or target_arch == "armv7s" or \ >- (target_arch == "arm" and arm_version >= 7) or target_arch == "arm64")\ >- and (arm_neon == 1 or arm_neon_optional == 1)', { >- 'build_neon': 1, >- }], >- ['(target_arch == "mipsel" or target_arch == "mips64el")\ >- and (mips_msa == 1)', >- { >- 'build_msa': 1, >- }], >- ], >- }, >- >- 'targets': [ >- { >- 'target_name': 'libyuv', >- # Change type to 'shared_library' to build .so or .dll files. >- 'type': 'static_library', >- 'variables': { >- 'optimize': 'max', # enable O2 and ltcg. >- }, >- # Allows libyuv.a redistributable library without external dependencies. >- 'standalone_static_library': 1, >- 'conditions': [ >- # Disable -Wunused-parameter >- ['clang == 1', { >- 'cflags': [ >- '-Wno-unused-parameter', >- ], >- }], >- ['build_neon != 0', { >- 'defines': [ >- 'LIBYUV_NEON', >- ], >- 'cflags!': [ >- '-mfpu=vfp', >- '-mfpu=vfpv3', >- '-mfpu=vfpv3-d16', >- # '-mthumb', # arm32 not thumb >- ], >- 'conditions': [ >- # Disable LTO in libyuv_neon target due to gcc 4.9 compiler bug. >- ['clang == 0 and use_lto == 1', { >- 'cflags!': [ >- '-flto', >- '-ffat-lto-objects', >- ], >- }], >- # arm64 does not need -mfpu=neon option as neon is not optional >- ['target_arch != "arm64"', { >- 'cflags': [ >- '-mfpu=neon', >- # '-marm', # arm32 not thumb >- ], >- }], >- ], >- }], >- ['build_msa != 0', { >- 'defines': [ >- 'LIBYUV_MSA', >- ], >- }], >- ['OS != "ios" and libyuv_disable_jpeg != 1', { >- 'defines': [ >- 'HAVE_JPEG' >- ], >- 'conditions': [ >- # Caveat system jpeg support may not support motion jpeg >- [ 'use_system_libjpeg == 1', { >- 'dependencies': [ >- '<(DEPTH)/third_party/libjpeg/libjpeg.gyp:libjpeg', >- ], >- }, { >- 'dependencies': [ >- '<(DEPTH)/third_party/libjpeg_turbo/libjpeg.gyp:libjpeg', >- ], >- }], >- [ 'use_system_libjpeg == 1', { >- 'link_settings': { >- 'libraries': [ >- '-ljpeg', >- ], >- } >- }], >- ], >- }], >- ], #conditions >- 'defines': [ >- # Enable the following 3 macros to turn off assembly for specified CPU. >- # 'LIBYUV_DISABLE_X86', >- # 'LIBYUV_DISABLE_NEON', >- # Enable the following macro to build libyuv as a shared library (dll). >- # 'LIBYUV_USING_SHARED_LIBRARY', >- # TODO(fbarchard): Make these into gyp defines. >- ], >- 'include_dirs': [ >- 'include', >- '.', >- ], >- 'direct_dependent_settings': { >- 'include_dirs': [ >- 'include', >- '.', >- ], >- 'conditions': [ >- ['OS == "android" and target_arch == "arm64"', { >- 'ldflags': [ >- '-Wl,--dynamic-linker,/system/bin/linker64', >- ], >- }], >- ['OS == "android" and target_arch != "arm64"', { >- 'ldflags': [ >- '-Wl,--dynamic-linker,/system/bin/linker', >- ], >- }], >- ], #conditions >- }, >- 'sources': [ >- '<@(libyuv_sources)', >- ], >- }, >- ], # targets. >-} >- >-# Local Variables: >-# tab-width:2 >-# indent-tabs-mode:nil >-# End: >-# vim: set expandtab tabstop=2 shiftwidth=2: >diff --git a/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/libyuv.gypi b/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/libyuv.gypi >deleted file mode 100644 >index 9467adfc515d7e45e5fda706a32b565ddfe492ed..0000000000000000000000000000000000000000 >--- a/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/libyuv.gypi >+++ /dev/null >@@ -1,81 +0,0 @@ >-# Copyright 2014 The LibYuv Project Authors. All rights reserved. >-# >-# Use of this source code is governed by a BSD-style license >-# that can be found in the LICENSE file in the root of the source >-# tree. An additional intellectual property rights grant can be found >-# in the file PATENTS. All contributing project authors may >-# be found in the AUTHORS file in the root of the source tree. >- >-{ >- 'variables': { >- 'libyuv_sources': [ >- # includes. >- 'include/libyuv.h', >- 'include/libyuv/basic_types.h', >- 'include/libyuv/compare.h', >- 'include/libyuv/convert.h', >- 'include/libyuv/convert_argb.h', >- 'include/libyuv/convert_from.h', >- 'include/libyuv/convert_from_argb.h', >- 'include/libyuv/cpu_id.h', >- 'include/libyuv/macros_msa.h', >- 'include/libyuv/mjpeg_decoder.h', >- 'include/libyuv/planar_functions.h', >- 'include/libyuv/rotate.h', >- 'include/libyuv/rotate_argb.h', >- 'include/libyuv/rotate_row.h', >- 'include/libyuv/row.h', >- 'include/libyuv/scale.h', >- 'include/libyuv/scale_argb.h', >- 'include/libyuv/scale_row.h', >- 'include/libyuv/version.h', >- 'include/libyuv/video_common.h', >- >- # sources. >- 'source/compare.cc', >- 'source/compare_common.cc', >- 'source/compare_gcc.cc', >- 'source/compare_msa.cc', >- 'source/compare_neon.cc', >- 'source/compare_neon64.cc', >- 'source/compare_win.cc', >- 'source/convert.cc', >- 'source/convert_argb.cc', >- 'source/convert_from.cc', >- 'source/convert_from_argb.cc', >- 'source/convert_jpeg.cc', >- 'source/convert_to_argb.cc', >- 'source/convert_to_i420.cc', >- 'source/cpu_id.cc', >- 'source/mjpeg_decoder.cc', >- 'source/mjpeg_validate.cc', >- 'source/planar_functions.cc', >- 'source/rotate.cc', >- 'source/rotate_any.cc', >- 'source/rotate_argb.cc', >- 'source/rotate_common.cc', >- 'source/rotate_gcc.cc', >- 'source/rotate_msa.cc', >- 'source/rotate_neon.cc', >- 'source/rotate_neon64.cc', >- 'source/rotate_win.cc', >- 'source/row_any.cc', >- 'source/row_common.cc', >- 'source/row_gcc.cc', >- 'source/row_msa.cc', >- 'source/row_neon.cc', >- 'source/row_neon64.cc', >- 'source/row_win.cc', >- 'source/scale.cc', >- 'source/scale_any.cc', >- 'source/scale_argb.cc', >- 'source/scale_common.cc', >- 'source/scale_gcc.cc', >- 'source/scale_msa.cc', >- 'source/scale_neon.cc', >- 'source/scale_neon64.cc', >- 'source/scale_win.cc', >- 'source/video_common.cc', >- ], >- } >-} >diff --git a/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/libyuv_nacl.gyp b/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/libyuv_nacl.gyp >deleted file mode 100644 >index b8fe57ee3e2116267fa5a6db47f5864662ae4430..0000000000000000000000000000000000000000 >--- a/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/libyuv_nacl.gyp >+++ /dev/null >@@ -1,37 +0,0 @@ >-# Copyright 2014 The LibYuv Project Authors. All rights reserved. >-# >-# Use of this source code is governed by a BSD-style license >-# that can be found in the LICENSE file in the root of the source >-# tree. An additional intellectual property rights grant can be found >-# in the file PATENTS. All contributing project authors may >-# be found in the AUTHORS file in the root of the source tree. >- >-{ >- 'includes': [ >- 'libyuv.gypi', >- '../../native_client/build/untrusted.gypi', >- ], >- 'targets': [ >- { >- 'target_name': 'libyuv_nacl', >- 'type': 'none', >- 'variables': { >- 'nlib_target': 'libyuv_nacl.a', >- 'build_glibc': 0, >- 'build_newlib': 0, >- 'build_pnacl_newlib': 1, >- }, >- 'include_dirs': [ >- 'include', >- ], >- 'direct_dependent_settings': { >- 'include_dirs': [ >- 'include', >- ], >- }, >- 'sources': [ >- '<@(libyuv_sources)', >- ], >- }, # target libyuv_nacl >- ] >-} >diff --git a/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/libyuv_test.gyp b/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/libyuv_test.gyp >deleted file mode 100644 >index 5fe154c610a70ef9acdf0f40076d807d86856094..0000000000000000000000000000000000000000 >--- a/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/libyuv_test.gyp >+++ /dev/null >@@ -1,203 +0,0 @@ >-# Copyright 2011 The LibYuv Project Authors. All rights reserved. >-# >-# Use of this source code is governed by a BSD-style license >-# that can be found in the LICENSE file in the root of the source >-# tree. An additional intellectual property rights grant can be found >-# in the file PATENTS. All contributing project authors may >-# be found in the AUTHORS file in the root of the source tree. >- >-{ >- 'variables': { >- # Can be enabled if your jpeg has GYP support. >- 'libyuv_disable_jpeg%': 1, >- 'mips_msa%': 0, # Default to msa off. >- }, >- 'targets': [ >- { >- 'target_name': 'libyuv_unittest', >- 'type': '<(gtest_target_type)', >- 'dependencies': [ >- 'libyuv.gyp:libyuv', >- 'testing/gtest.gyp:gtest', >- 'third_party/gflags/gflags.gyp:gflags', >- ], >- 'direct_dependent_settings': { >- 'defines': [ >- 'GTEST_RELATIVE_PATH', >- ], >- }, >- 'export_dependent_settings': [ >- '<(DEPTH)/testing/gtest.gyp:gtest', >- ], >- 'sources': [ >- # headers >- 'unit_test/unit_test.h', >- >- # sources >- 'unit_test/basictypes_test.cc', >- 'unit_test/compare_test.cc', >- 'unit_test/color_test.cc', >- 'unit_test/convert_test.cc', >- 'unit_test/cpu_test.cc', >- 'unit_test/cpu_thread_test.cc', >- 'unit_test/math_test.cc', >- 'unit_test/planar_test.cc', >- 'unit_test/rotate_argb_test.cc', >- 'unit_test/rotate_test.cc', >- 'unit_test/scale_argb_test.cc', >- 'unit_test/scale_test.cc', >- 'unit_test/unit_test.cc', >- 'unit_test/video_common_test.cc', >- ], >- 'conditions': [ >- ['OS=="linux"', { >- 'cflags': [ >- '-fexceptions', >- ], >- }], >- [ 'OS == "ios"', { >- 'xcode_settings': { >- 'DEBUGGING_SYMBOLS': 'YES', >- 'DEBUG_INFORMATION_FORMAT' : 'dwarf-with-dsym', >- # Work around compile issue with isosim.mm, see >- # https://code.google.com/p/libyuv/issues/detail?id=548 for details. >- 'WARNING_CFLAGS': [ >- '-Wno-sometimes-uninitialized', >- ], >- }, >- 'cflags': [ >- '-Wno-sometimes-uninitialized', >- ], >- }], >- [ 'OS != "ios" and libyuv_disable_jpeg != 1', { >- 'defines': [ >- 'HAVE_JPEG', >- ], >- }], >- ['OS=="android"', { >- 'dependencies': [ >- '<(DEPTH)/testing/android/native_test.gyp:native_test_native_code', >- ], >- }], >- # TODO(YangZhang): These lines can be removed when high accuracy >- # YUV to RGB to Neon is ported. >- [ '(target_arch == "armv7" or target_arch == "armv7s" \ >- or (target_arch == "arm" and arm_version >= 7) \ >- or target_arch == "arm64") \ >- and (arm_neon == 1 or arm_neon_optional == 1)', { >- 'defines': [ >- 'LIBYUV_NEON' >- ], >- }], >- [ '(target_arch == "mipsel" or target_arch == "mips64el") \ >- and (mips_msa == 1)', { >- 'defines': [ >- 'LIBYUV_MSA' >- ], >- }], >- ], # conditions >- 'defines': [ >- # Enable the following 3 macros to turn off assembly for specified CPU. >- # 'LIBYUV_DISABLE_X86', >- # 'LIBYUV_DISABLE_NEON', >- # Enable the following macro to build libyuv as a shared library (dll). >- # 'LIBYUV_USING_SHARED_LIBRARY', >- ], >- }, >- { >- 'target_name': 'compare', >- 'type': 'executable', >- 'dependencies': [ >- 'libyuv.gyp:libyuv', >- ], >- 'sources': [ >- # sources >- 'util/compare.cc', >- ], >- 'conditions': [ >- ['OS=="linux"', { >- 'cflags': [ >- '-fexceptions', >- ], >- }], >- ], # conditions >- }, >- { >- 'target_name': 'yuvconvert', >- 'type': 'executable', >- 'dependencies': [ >- 'libyuv.gyp:libyuv', >- ], >- 'sources': [ >- # sources >- 'util/yuvconvert.cc', >- ], >- 'conditions': [ >- ['OS=="linux"', { >- 'cflags': [ >- '-fexceptions', >- ], >- }], >- ], # conditions >- }, >- # TODO(fbarchard): Enable SSE2 and OpenMP for better performance. >- { >- 'target_name': 'psnr', >- 'type': 'executable', >- 'sources': [ >- # sources >- 'util/psnr_main.cc', >- 'util/psnr.cc', >- 'util/ssim.cc', >- ], >- 'dependencies': [ >- 'libyuv.gyp:libyuv', >- ], >- 'conditions': [ >- [ 'OS != "ios" and libyuv_disable_jpeg != 1', { >- 'defines': [ >- 'HAVE_JPEG', >- ], >- }], >- ], # conditions >- }, >- >- { >- 'target_name': 'cpuid', >- 'type': 'executable', >- 'sources': [ >- # sources >- 'util/cpuid.c', >- ], >- 'dependencies': [ >- 'libyuv.gyp:libyuv', >- ], >- }, >- ], # targets >- 'conditions': [ >- ['OS=="android"', { >- 'targets': [ >- { >- 'target_name': 'yuv_unittest_apk', >- 'type': 'none', >- 'variables': { >- 'test_suite_name': 'yuv_unittest', >- 'input_shlib_path': '<(SHARED_LIB_DIR)/(SHARED_LIB_PREFIX)libyuv_unittest<(SHARED_LIB_SUFFIX)', >- }, >- 'includes': [ >- 'build/apk_test.gypi', >- ], >- 'dependencies': [ >- 'libyuv_unittest', >- ], >- }, >- ], >- }], >- ], >-} >- >-# Local Variables: >-# tab-width:2 >-# indent-tabs-mode:nil >-# End: >-# vim: set expandtab tabstop=2 shiftwidth=2: >diff --git a/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/linux.mk b/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/linux.mk >index b84c89f912e066416f2a2477ab720a6a477cb66f..e9a26a79b209564b3f4e6e80ac14f1dd83768ae1 100644 >--- a/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/linux.mk >+++ b/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/linux.mk >@@ -13,6 +13,8 @@ LOCAL_OBJ_FILES := \ > source/compare.o \ > source/compare_common.o \ > source/compare_gcc.o \ >+ source/compare_mmi.o \ >+ source/compare_msa.o \ > source/compare_neon64.o \ > source/compare_neon.o \ > source/compare_win.o \ >@@ -32,12 +34,16 @@ LOCAL_OBJ_FILES := \ > source/rotate.o \ > source/rotate_common.o \ > source/rotate_gcc.o \ >+ source/rotate_mmi.o \ >+ source/rotate_msa.o \ > source/rotate_neon64.o \ > source/rotate_neon.o \ > source/rotate_win.o \ > source/row_any.o \ > source/row_common.o \ > source/row_gcc.o \ >+ source/row_mmi.o \ >+ source/row_msa.o \ > source/row_neon64.o \ > source/row_neon.o \ > source/row_win.o \ >@@ -46,6 +52,8 @@ LOCAL_OBJ_FILES := \ > source/scale.o \ > source/scale_common.o \ > source/scale_gcc.o \ >+ source/scale_mmi.o \ >+ source/scale_msa.o \ > source/scale_neon64.o \ > source/scale_neon.o \ > source/scale_win.o \ >diff --git a/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/public.mk b/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/public.mk >index 090d8cb659fa8198ec0ec95cd083c42d7afa2db5..1342307a56e30142ee3128f819d9eabfab9cb8cc 100644 >--- a/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/public.mk >+++ b/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/public.mk >@@ -1,13 +1,13 @@ >-# This file contains all the common make variables which are useful for >-# anyone depending on this library. >-# Note that dependencies on NDK are not directly listed since NDK auto adds >-# them. >- >-LIBYUV_INCLUDES := $(LIBYUV_PATH)/include >- >-LIBYUV_C_FLAGS := >- >-LIBYUV_CPP_FLAGS := >- >-LIBYUV_LDLIBS := >-LIBYUV_DEP_MODULES := >+# This file contains all the common make variables which are useful for >+# anyone depending on this library. >+# Note that dependencies on NDK are not directly listed since NDK auto adds >+# them. >+ >+LIBYUV_INCLUDES := $(LIBYUV_PATH)/include >+ >+LIBYUV_C_FLAGS := >+ >+LIBYUV_CPP_FLAGS := >+ >+LIBYUV_LDLIBS := >+LIBYUV_DEP_MODULES := >diff --git a/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/source/compare.cc b/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/source/compare.cc >index 50e3abd0556e3862a7da070e4555daa2ed4bf45c..5aa3a4db86da6421f5ecbb1142d25fb5b0352b87 100644 >--- a/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/source/compare.cc >+++ b/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/source/compare.cc >@@ -154,6 +154,12 @@ uint64_t ComputeHammingDistance(const uint8_t* src_a, > HammingDistance = HammingDistance_MSA; > } > #endif >+#if defined(HAS_HAMMINGDISTANCE_MMI) >+ if (TestCpuFlag(kCpuHasMMI)) { >+ HammingDistance = HammingDistance_MMI; >+ } >+#endif >+ > #ifdef _OPENMP > #pragma omp parallel for reduction(+ : diff) > #endif >@@ -210,6 +216,11 @@ uint64_t ComputeSumSquareError(const uint8_t* src_a, > SumSquareError = SumSquareError_MSA; > } > #endif >+#if defined(HAS_SUMSQUAREERROR_MMI) >+ if (TestCpuFlag(kCpuHasMMI)) { >+ SumSquareError = SumSquareError_MMI; >+ } >+#endif > #ifdef _OPENMP > #pragma omp parallel for reduction(+ : sse) > #endif >diff --git a/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/source/compare_common.cc b/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/source/compare_common.cc >index 633466addb53e48b4ae839187c6b59cdd23c8eb6..d4b170ad9862b5efe05b7a9fcedca5b305fb6b97 100644 >--- a/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/source/compare_common.cc >+++ b/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/source/compare_common.cc >@@ -55,7 +55,7 @@ uint32_t HammingDistance_C(const uint8_t* src_a, > > int i; > for (i = 0; i < count - 3; i += 4) { >- uint32_t x = *((uint32_t*)src_a) ^ *((uint32_t*)src_b); // NOLINT >+ uint32_t x = *((const uint32_t*)src_a) ^ *((const uint32_t*)src_b); > uint32_t u = x - ((x >> 1) & 0x55555555); > u = ((u >> 2) & 0x33333333) + (u & 0x33333333); > diff += ((((u + (u >> 4)) & 0x0f0f0f0f) * 0x01010101) >> 24); >diff --git a/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/source/compare_mmi.cc b/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/source/compare_mmi.cc >new file mode 100644 >index 0000000000000000000000000000000000000000..7640d9468cb8986c120cc96ad3b3c83bd3ce3678 >--- /dev/null >+++ b/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/source/compare_mmi.cc >@@ -0,0 +1,123 @@ >+/* >+ * Copyright 2012 The LibYuv Project Authors. All rights reserved. >+ * >+ * Use of this source code is governed by a BSD-style license >+ * that can be found in the LICENSE file in the root of the source >+ * tree. An additional intellectual property rights grant can be found >+ * in the file PATENTS. All contributing project authors may >+ * be found in the AUTHORS file in the root of the source tree. >+ */ >+ >+#include "libyuv/basic_types.h" >+ >+#include "libyuv/compare_row.h" >+ >+#ifdef __cplusplus >+namespace libyuv { >+extern "C" { >+#endif >+ >+// This module is for Mips MMI. >+#if !defined(LIBYUV_DISABLE_MMI) && defined(_MIPS_ARCH_LOONGSON3A) >+ >+// Hakmem method for hamming distance. >+uint32_t HammingDistance_MMI(const uint8_t* src_a, >+ const uint8_t* src_b, >+ int count) { >+ uint32_t diff = 0u; >+ >+ uint64_t temp = 0, temp1 = 0, ta = 0, tb = 0; >+ uint64_t c1 = 0x5555555555555555; >+ uint64_t c2 = 0x3333333333333333; >+ uint64_t c3 = 0x0f0f0f0f0f0f0f0f; >+ uint32_t c4 = 0x01010101; >+ uint64_t s1 = 1, s2 = 2, s3 = 4; >+ __asm__ volatile( >+ "1: \n\t" >+ "ldc1 %[ta], 0(%[src_a]) \n\t" >+ "ldc1 %[tb], 0(%[src_b]) \n\t" >+ "xor %[temp], %[ta], %[tb] \n\t" >+ "psrlw %[temp1], %[temp], %[s1] \n\t" // temp1=x>>1 >+ "and %[temp1], %[temp1], %[c1] \n\t" // temp1&=c1 >+ "psubw %[temp1], %[temp], %[temp1] \n\t" // x-temp1 >+ "and %[temp], %[temp1], %[c2] \n\t" // t = (u&c2) >+ "psrlw %[temp1], %[temp1], %[s2] \n\t" // u>>2 >+ "and %[temp1], %[temp1], %[c2] \n\t" // u>>2 & c2 >+ "paddw %[temp1], %[temp1], %[temp] \n\t" // t1 = t1+t >+ "psrlw %[temp], %[temp1], %[s3] \n\t" // u>>4 >+ "paddw %[temp1], %[temp1], %[temp] \n\t" // u+(u>>4) >+ "and %[temp1], %[temp1], %[c3] \n\t" //&c3 >+ "dmfc1 $t0, %[temp1] \n\t" >+ "dsrl32 $t0, $t0, 0 \n\t " >+ "mul $t0, $t0, %[c4] \n\t" >+ "dsrl $t0, $t0, 24 \n\t" >+ "dadd %[diff], %[diff], $t0 \n\t" >+ "dmfc1 $t0, %[temp1] \n\t" >+ "mul $t0, $t0, %[c4] \n\t" >+ "dsrl $t0, $t0, 24 \n\t" >+ "dadd %[diff], %[diff], $t0 \n\t" >+ "daddiu %[src_a], %[src_a], 8 \n\t" >+ "daddiu %[src_b], %[src_b], 8 \n\t" >+ "addiu %[count], %[count], -8 \n\t" >+ "bgtz %[count], 1b \n\t" >+ "nop \n\t" >+ : [diff] "+r"(diff), [src_a] "+r"(src_a), [src_b] "+r"(src_b), >+ [count] "+r"(count), [ta] "+f"(ta), [tb] "+f"(tb), [temp] "+f"(temp), >+ [temp1] "+f"(temp1) >+ : [c1] "f"(c1), [c2] "f"(c2), [c3] "f"(c3), [c4] "r"(c4), [s1] "f"(s1), >+ [s2] "f"(s2), [s3] "f"(s3) >+ : "memory"); >+ return diff; >+} >+ >+uint32_t SumSquareError_MMI(const uint8_t* src_a, >+ const uint8_t* src_b, >+ int count) { >+ uint32_t sse = 0u; >+ uint32_t sse_hi = 0u, sse_lo = 0u; >+ >+ uint64_t src1, src2; >+ uint64_t diff, diff_hi, diff_lo; >+ uint64_t sse_sum, sse_tmp; >+ >+ const uint64_t mask = 0x0ULL; >+ >+ __asm__ volatile( >+ "xor %[sse_sum], %[sse_sum], %[sse_sum] \n\t" >+ >+ "1: \n\t" >+ "ldc1 %[src1], 0x00(%[src_a]) \n\t" >+ "ldc1 %[src2], 0x00(%[src_b]) \n\t" >+ "pasubub %[diff], %[src1], %[src2] \n\t" >+ "punpcklbh %[diff_lo], %[diff], %[mask] \n\t" >+ "punpckhbh %[diff_hi], %[diff], %[mask] \n\t" >+ "pmaddhw %[sse_tmp], %[diff_lo], %[diff_lo] \n\t" >+ "paddw %[sse_sum], %[sse_sum], %[sse_tmp] \n\t" >+ "pmaddhw %[sse_tmp], %[diff_hi], %[diff_hi] \n\t" >+ "paddw %[sse_sum], %[sse_sum], %[sse_tmp] \n\t" >+ >+ "daddiu %[src_a], %[src_a], 0x08 \n\t" >+ "daddiu %[src_b], %[src_b], 0x08 \n\t" >+ "daddiu %[count], %[count], -0x08 \n\t" >+ "bnez %[count], 1b \n\t" >+ >+ "mfc1 %[sse_lo], %[sse_sum] \n\t" >+ "mfhc1 %[sse_hi], %[sse_sum] \n\t" >+ "daddu %[sse], %[sse_hi], %[sse_lo] \n\t" >+ : [sse] "+&r"(sse), [diff] "=&f"(diff), [src1] "=&f"(src1), >+ [src2] "=&f"(src2), [diff_lo] "=&f"(diff_lo), [diff_hi] "=&f"(diff_hi), >+ [sse_sum] "=&f"(sse_sum), [sse_tmp] "=&f"(sse_tmp), >+ [sse_hi] "+&r"(sse_hi), [sse_lo] "+&r"(sse_lo) >+ : [src_a] "r"(src_a), [src_b] "r"(src_b), [count] "r"(count), >+ [mask] "f"(mask) >+ : "memory"); >+ >+ return sse; >+} >+ >+#endif // !defined(LIBYUV_DISABLE_MMI) && defined(_MIPS_ARCH_LOONGSON3A) >+ >+#ifdef __cplusplus >+} // extern "C" >+} // namespace libyuv >+#endif >diff --git a/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/source/compare_msa.cc b/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/source/compare_msa.cc >index e944235f02060baaae70f29da4c427d549dd810c..0b807d37bee353735e70746731c86b57dbf0e739 100644 >--- a/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/source/compare_msa.cc >+++ b/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/source/compare_msa.cc >@@ -1,97 +1,97 @@ >-/* >- * Copyright 2017 The LibYuv Project Authors. All rights reserved. >- * >- * Use of this source code is governed by a BSD-style license >- * that can be found in the LICENSE file in the root of the source >- * tree. An additional intellectual property rights grant can be found >- * in the file PATENTS. All contributing project authors may >- * be found in the AUTHORS file in the root of the source tree. >- */ >- >-#include "libyuv/basic_types.h" >- >-#include "libyuv/compare_row.h" >-#include "libyuv/row.h" >- >-// This module is for GCC MSA >-#if !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa) >-#include "libyuv/macros_msa.h" >- >-#ifdef __cplusplus >-namespace libyuv { >-extern "C" { >-#endif >- >-uint32_t HammingDistance_MSA(const uint8_t* src_a, >- const uint8_t* src_b, >- int count) { >- uint32_t diff = 0u; >- int i; >- v16u8 src0, src1, src2, src3; >- v2i64 vec0 = {0}, vec1 = {0}; >- >- for (i = 0; i < count; i += 32) { >- src0 = (v16u8)__msa_ld_b((v16i8*)src_a, 0); >- src1 = (v16u8)__msa_ld_b((v16i8*)src_a, 16); >- src2 = (v16u8)__msa_ld_b((v16i8*)src_b, 0); >- src3 = (v16u8)__msa_ld_b((v16i8*)src_b, 16); >- src0 ^= src2; >- src1 ^= src3; >- vec0 += __msa_pcnt_d((v2i64)src0); >- vec1 += __msa_pcnt_d((v2i64)src1); >- src_a += 32; >- src_b += 32; >- } >- >- vec0 += vec1; >- diff = (uint32_t)__msa_copy_u_w((v4i32)vec0, 0); >- diff += (uint32_t)__msa_copy_u_w((v4i32)vec0, 2); >- return diff; >-} >- >-uint32_t SumSquareError_MSA(const uint8_t* src_a, >- const uint8_t* src_b, >- int count) { >- uint32_t sse = 0u; >- int i; >- v16u8 src0, src1, src2, src3; >- v8i16 vec0, vec1, vec2, vec3; >- v4i32 reg0 = {0}, reg1 = {0}, reg2 = {0}, reg3 = {0}; >- v2i64 tmp0; >- >- for (i = 0; i < count; i += 32) { >- src0 = (v16u8)__msa_ld_b((v16i8*)src_a, 0); >- src1 = (v16u8)__msa_ld_b((v16i8*)src_a, 16); >- src2 = (v16u8)__msa_ld_b((v16i8*)src_b, 0); >- src3 = (v16u8)__msa_ld_b((v16i8*)src_b, 16); >- vec0 = (v8i16)__msa_ilvr_b((v16i8)src2, (v16i8)src0); >- vec1 = (v8i16)__msa_ilvl_b((v16i8)src2, (v16i8)src0); >- vec2 = (v8i16)__msa_ilvr_b((v16i8)src3, (v16i8)src1); >- vec3 = (v8i16)__msa_ilvl_b((v16i8)src3, (v16i8)src1); >- vec0 = __msa_hsub_u_h((v16u8)vec0, (v16u8)vec0); >- vec1 = __msa_hsub_u_h((v16u8)vec1, (v16u8)vec1); >- vec2 = __msa_hsub_u_h((v16u8)vec2, (v16u8)vec2); >- vec3 = __msa_hsub_u_h((v16u8)vec3, (v16u8)vec3); >- reg0 = __msa_dpadd_s_w(reg0, vec0, vec0); >- reg1 = __msa_dpadd_s_w(reg1, vec1, vec1); >- reg2 = __msa_dpadd_s_w(reg2, vec2, vec2); >- reg3 = __msa_dpadd_s_w(reg3, vec3, vec3); >- src_a += 32; >- src_b += 32; >- } >- >- reg0 += reg1; >- reg2 += reg3; >- reg0 += reg2; >- tmp0 = __msa_hadd_s_d(reg0, reg0); >- sse = (uint32_t)__msa_copy_u_w((v4i32)tmp0, 0); >- sse += (uint32_t)__msa_copy_u_w((v4i32)tmp0, 2); >- return sse; >-} >- >-#ifdef __cplusplus >-} // extern "C" >-} // namespace libyuv >-#endif >- >-#endif // !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa) >+/* >+ * Copyright 2017 The LibYuv Project Authors. All rights reserved. >+ * >+ * Use of this source code is governed by a BSD-style license >+ * that can be found in the LICENSE file in the root of the source >+ * tree. An additional intellectual property rights grant can be found >+ * in the file PATENTS. All contributing project authors may >+ * be found in the AUTHORS file in the root of the source tree. >+ */ >+ >+#include "libyuv/basic_types.h" >+ >+#include "libyuv/compare_row.h" >+#include "libyuv/row.h" >+ >+// This module is for GCC MSA >+#if !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa) >+#include "libyuv/macros_msa.h" >+ >+#ifdef __cplusplus >+namespace libyuv { >+extern "C" { >+#endif >+ >+uint32_t HammingDistance_MSA(const uint8_t* src_a, >+ const uint8_t* src_b, >+ int count) { >+ uint32_t diff = 0u; >+ int i; >+ v16u8 src0, src1, src2, src3; >+ v2i64 vec0 = {0}, vec1 = {0}; >+ >+ for (i = 0; i < count; i += 32) { >+ src0 = (v16u8)__msa_ld_b((v16i8*)src_a, 0); >+ src1 = (v16u8)__msa_ld_b((v16i8*)src_a, 16); >+ src2 = (v16u8)__msa_ld_b((v16i8*)src_b, 0); >+ src3 = (v16u8)__msa_ld_b((v16i8*)src_b, 16); >+ src0 ^= src2; >+ src1 ^= src3; >+ vec0 += __msa_pcnt_d((v2i64)src0); >+ vec1 += __msa_pcnt_d((v2i64)src1); >+ src_a += 32; >+ src_b += 32; >+ } >+ >+ vec0 += vec1; >+ diff = (uint32_t)__msa_copy_u_w((v4i32)vec0, 0); >+ diff += (uint32_t)__msa_copy_u_w((v4i32)vec0, 2); >+ return diff; >+} >+ >+uint32_t SumSquareError_MSA(const uint8_t* src_a, >+ const uint8_t* src_b, >+ int count) { >+ uint32_t sse = 0u; >+ int i; >+ v16u8 src0, src1, src2, src3; >+ v8i16 vec0, vec1, vec2, vec3; >+ v4i32 reg0 = {0}, reg1 = {0}, reg2 = {0}, reg3 = {0}; >+ v2i64 tmp0; >+ >+ for (i = 0; i < count; i += 32) { >+ src0 = (v16u8)__msa_ld_b((v16i8*)src_a, 0); >+ src1 = (v16u8)__msa_ld_b((v16i8*)src_a, 16); >+ src2 = (v16u8)__msa_ld_b((v16i8*)src_b, 0); >+ src3 = (v16u8)__msa_ld_b((v16i8*)src_b, 16); >+ vec0 = (v8i16)__msa_ilvr_b((v16i8)src2, (v16i8)src0); >+ vec1 = (v8i16)__msa_ilvl_b((v16i8)src2, (v16i8)src0); >+ vec2 = (v8i16)__msa_ilvr_b((v16i8)src3, (v16i8)src1); >+ vec3 = (v8i16)__msa_ilvl_b((v16i8)src3, (v16i8)src1); >+ vec0 = __msa_hsub_u_h((v16u8)vec0, (v16u8)vec0); >+ vec1 = __msa_hsub_u_h((v16u8)vec1, (v16u8)vec1); >+ vec2 = __msa_hsub_u_h((v16u8)vec2, (v16u8)vec2); >+ vec3 = __msa_hsub_u_h((v16u8)vec3, (v16u8)vec3); >+ reg0 = __msa_dpadd_s_w(reg0, vec0, vec0); >+ reg1 = __msa_dpadd_s_w(reg1, vec1, vec1); >+ reg2 = __msa_dpadd_s_w(reg2, vec2, vec2); >+ reg3 = __msa_dpadd_s_w(reg3, vec3, vec3); >+ src_a += 32; >+ src_b += 32; >+ } >+ >+ reg0 += reg1; >+ reg2 += reg3; >+ reg0 += reg2; >+ tmp0 = __msa_hadd_s_d(reg0, reg0); >+ sse = (uint32_t)__msa_copy_u_w((v4i32)tmp0, 0); >+ sse += (uint32_t)__msa_copy_u_w((v4i32)tmp0, 2); >+ return sse; >+} >+ >+#ifdef __cplusplus >+} // extern "C" >+} // namespace libyuv >+#endif >+ >+#endif // !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa) >diff --git a/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/source/convert.cc b/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/source/convert.cc >index 375cc732c1d710ebd8ccbc44ac1d4ff4a8a81e2f..4b8d0dc57f7d4106e39f5a0114036a10ae2bad0f 100644 >--- a/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/source/convert.cc >+++ b/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/source/convert.cc >@@ -215,6 +215,195 @@ int I422ToI420(const uint8_t* src_y, > dst_v, dst_stride_v, width, height, src_uv_width, height); > } > >+// TODO(fbarchard): Implement row conversion. >+LIBYUV_API >+int I422ToNV21(const uint8_t* src_y, >+ int src_stride_y, >+ const uint8_t* src_u, >+ int src_stride_u, >+ const uint8_t* src_v, >+ int src_stride_v, >+ uint8_t* dst_y, >+ int dst_stride_y, >+ uint8_t* dst_vu, >+ int dst_stride_vu, >+ int width, >+ int height) { >+ int halfwidth = (width + 1) >> 1; >+ int halfheight = (height + 1) >> 1; >+ // Negative height means invert the image. >+ if (height < 0) { >+ height = -height; >+ halfheight = (height + 1) >> 1; >+ src_y = src_y + (height - 1) * src_stride_y; >+ src_u = src_u + (height - 1) * src_stride_u; >+ src_v = src_v + (height - 1) * src_stride_v; >+ src_stride_y = -src_stride_y; >+ src_stride_u = -src_stride_u; >+ src_stride_v = -src_stride_v; >+ } >+ >+ // Allocate u and v buffers >+ align_buffer_64(plane_u, halfwidth * halfheight * 2); >+ uint8_t* plane_v = plane_u + halfwidth * halfheight; >+ >+ I422ToI420(src_y, src_stride_y, src_u, src_stride_u, src_v, src_stride_v, >+ dst_y, dst_stride_y, plane_u, halfwidth, plane_v, halfwidth, width, >+ height); >+ MergeUVPlane(plane_v, halfwidth, plane_u, halfwidth, dst_vu, dst_stride_vu, >+ halfwidth, halfheight); >+ free_aligned_buffer_64(plane_u); >+ return 0; >+} >+ >+#ifdef I422TONV21_ROW_VERSION >+// Unittest fails for this version. >+// 422 chroma is 1/2 width, 1x height >+// 420 chroma is 1/2 width, 1/2 height >+// Swap src_u and src_v to implement I422ToNV12 >+LIBYUV_API >+int I422ToNV21(const uint8_t* src_y, >+ int src_stride_y, >+ const uint8_t* src_u, >+ int src_stride_u, >+ const uint8_t* src_v, >+ int src_stride_v, >+ uint8_t* dst_y, >+ int dst_stride_y, >+ uint8_t* dst_vu, >+ int dst_stride_vu, >+ int width, >+ int height) { >+ int y; >+ void (*MergeUVRow)(const uint8_t* src_u, const uint8_t* src_v, >+ uint8_t* dst_uv, int width) = MergeUVRow_C; >+ void (*InterpolateRow)(uint8_t * dst_ptr, const uint8_t* src_ptr, >+ ptrdiff_t src_stride, int dst_width, >+ int source_y_fraction) = InterpolateRow_C; >+ int halfwidth = (width + 1) >> 1; >+ int halfheight = (height + 1) >> 1; >+ if (!src_u || !src_v || !dst_vu || width <= 0 || height == 0) { >+ return -1; >+ } >+ // Negative height means invert the image. >+ if (height < 0) { >+ height = -height; >+ halfheight = (height + 1) >> 1; >+ src_y = src_y + (height - 1) * src_stride_y; >+ src_u = src_u + (halfheight - 1) * src_stride_u; >+ src_v = src_v + (halfheight - 1) * src_stride_v; >+ src_stride_y = -src_stride_y; >+ src_stride_u = -src_stride_u; >+ src_stride_v = -src_stride_v; >+ } >+#if defined(HAS_MERGEUVROW_SSE2) >+ if (TestCpuFlag(kCpuHasSSE2)) { >+ MergeUVRow = MergeUVRow_Any_SSE2; >+ if (IS_ALIGNED(halfwidth, 16)) { >+ MergeUVRow = MergeUVRow_SSE2; >+ } >+ } >+#endif >+#if defined(HAS_MERGEUVROW_AVX2) >+ if (TestCpuFlag(kCpuHasAVX2)) { >+ MergeUVRow = MergeUVRow_Any_AVX2; >+ if (IS_ALIGNED(halfwidth, 32)) { >+ MergeUVRow = MergeUVRow_AVX2; >+ } >+ } >+#endif >+#if defined(HAS_MERGEUVROW_NEON) >+ if (TestCpuFlag(kCpuHasNEON)) { >+ MergeUVRow = MergeUVRow_Any_NEON; >+ if (IS_ALIGNED(halfwidth, 16)) { >+ MergeUVRow = MergeUVRow_NEON; >+ } >+ } >+#endif >+#if defined(HAS_MERGEUVROW_MSA) >+ if (TestCpuFlag(kCpuHasMSA)) { >+ MergeUVRow = MergeUVRow_Any_MSA; >+ if (IS_ALIGNED(halfwidth, 16)) { >+ MergeUVRow = MergeUVRow_MSA; >+ } >+ } >+#endif >+#if defined(HAS_MERGEUVROW_MMI) >+ if (TestCpuFlag(kCpuHasMMI)) { >+ MergeUVRow = MergeUVRow_Any_MMI; >+ if (IS_ALIGNED(halfwidth, 8)) { >+ MergeUVRow = MergeUVRow_MMI; >+ } >+ } >+#endif >+#if defined(HAS_INTERPOLATEROW_SSSE3) >+ if (TestCpuFlag(kCpuHasSSSE3)) { >+ InterpolateRow = InterpolateRow_Any_SSSE3; >+ if (IS_ALIGNED(width, 16)) { >+ InterpolateRow = InterpolateRow_SSSE3; >+ } >+ } >+#endif >+#if defined(HAS_INTERPOLATEROW_AVX2) >+ if (TestCpuFlag(kCpuHasAVX2)) { >+ InterpolateRow = InterpolateRow_Any_AVX2; >+ if (IS_ALIGNED(width, 32)) { >+ InterpolateRow = InterpolateRow_AVX2; >+ } >+ } >+#endif >+#if defined(HAS_INTERPOLATEROW_NEON) >+ if (TestCpuFlag(kCpuHasNEON)) { >+ InterpolateRow = InterpolateRow_Any_NEON; >+ if (IS_ALIGNED(width, 16)) { >+ InterpolateRow = InterpolateRow_NEON; >+ } >+ } >+#endif >+#if defined(HAS_INTERPOLATEROW_MSA) >+ if (TestCpuFlag(kCpuHasMSA)) { >+ InterpolateRow = InterpolateRow_Any_MSA; >+ if (IS_ALIGNED(width, 32)) { >+ InterpolateRow = InterpolateRow_MSA; >+ } >+ } >+#endif >+#if defined(HAS_INTERPOLATEROW_MMI) >+ if (TestCpuFlag(kCpuHasMMI)) { >+ InterpolateRow = InterpolateRow_Any_MMI; >+ if (IS_ALIGNED(width, 8)) { >+ InterpolateRow = InterpolateRow_MMI; >+ } >+ } >+#endif >+ >+ if (dst_y) { >+ CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, halfwidth, height); >+ } >+ { >+ // Allocate 2 rows of vu. >+ int awidth = halfwidth * 2; >+ align_buffer_64(row_vu_0, awidth * 2); >+ uint8_t* row_vu_1 = row_vu_0 + awidth; >+ >+ for (y = 0; y < height - 1; y += 2) { >+ MergeUVRow(src_v, src_u, row_vu_0, halfwidth); >+ MergeUVRow(src_v + src_stride_v, src_u + src_stride_u, row_vu_1, >+ halfwidth); >+ InterpolateRow(dst_vu, row_vu_0, awidth, awidth, 128); >+ src_u += src_stride_u * 2; >+ src_v += src_stride_v * 2; >+ dst_vu += dst_stride_vu; >+ } >+ if (height & 1) { >+ MergeUVRow(src_v, src_u, dst_vu, halfwidth); >+ } >+ free_aligned_buffer_64(row_vu_0); >+ } >+ return 0; >+} >+#endif // I422TONV21_ROW_VERSION >+ > // 444 chroma is 1x width, 1x height > // 420 chroma is 1/2 width, 1/2 height > LIBYUV_API >@@ -237,6 +426,46 @@ int I444ToI420(const uint8_t* src_y, > dst_v, dst_stride_v, width, height, width, height); > } > >+// TODO(fbarchard): Implement row conversion. >+LIBYUV_API >+int I444ToNV21(const uint8_t* src_y, >+ int src_stride_y, >+ const uint8_t* src_u, >+ int src_stride_u, >+ const uint8_t* src_v, >+ int src_stride_v, >+ uint8_t* dst_y, >+ int dst_stride_y, >+ uint8_t* dst_vu, >+ int dst_stride_vu, >+ int width, >+ int height) { >+ int halfwidth = (width + 1) >> 1; >+ int halfheight = (height + 1) >> 1; >+ // Negative height means invert the image. >+ if (height < 0) { >+ height = -height; >+ halfheight = (height + 1) >> 1; >+ src_y = src_y + (height - 1) * src_stride_y; >+ src_u = src_u + (height - 1) * src_stride_u; >+ src_v = src_v + (height - 1) * src_stride_v; >+ src_stride_y = -src_stride_y; >+ src_stride_u = -src_stride_u; >+ src_stride_v = -src_stride_v; >+ } >+ // Allocate u and v buffers >+ align_buffer_64(plane_u, halfwidth * halfheight * 2); >+ uint8_t* plane_v = plane_u + halfwidth * halfheight; >+ >+ I444ToI420(src_y, src_stride_y, src_u, src_stride_u, src_v, src_stride_v, >+ dst_y, dst_stride_y, plane_u, halfwidth, plane_v, halfwidth, width, >+ height); >+ MergeUVPlane(plane_v, halfwidth, plane_u, halfwidth, dst_vu, dst_stride_vu, >+ halfwidth, halfheight); >+ free_aligned_buffer_64(plane_u); >+ return 0; >+} >+ > // I400 is greyscale typically used in MJPG > LIBYUV_API > int I400ToI420(const uint8_t* src_y, >@@ -269,6 +498,35 @@ int I400ToI420(const uint8_t* src_y, > return 0; > } > >+// I400 is greyscale typically used in MJPG >+LIBYUV_API >+int I400ToNV21(const uint8_t* src_y, >+ int src_stride_y, >+ uint8_t* dst_y, >+ int dst_stride_y, >+ uint8_t* dst_vu, >+ int dst_stride_vu, >+ int width, >+ int height) { >+ int halfwidth = (width + 1) >> 1; >+ int halfheight = (height + 1) >> 1; >+ if (!dst_vu || width <= 0 || height == 0) { >+ return -1; >+ } >+ // Negative height means invert the image. >+ if (height < 0) { >+ height = -height; >+ halfheight = (height + 1) >> 1; >+ src_y = src_y + (height - 1) * src_stride_y; >+ src_stride_y = -src_stride_y; >+ } >+ if (dst_y) { >+ CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height); >+ } >+ SetPlane(dst_vu, dst_stride_vu, halfwidth * 2, halfheight, 128); >+ return 0; >+} >+ > static void CopyPlane2(const uint8_t* src, > int src_stride_0, > int src_stride_1, >@@ -502,6 +760,18 @@ int YUY2ToI420(const uint8_t* src_yuy2, > } > } > #endif >+#if defined(HAS_YUY2TOYROW_MMI) >+ if (TestCpuFlag(kCpuHasMMI)) { >+ YUY2ToYRow = YUY2ToYRow_Any_MMI; >+ YUY2ToUVRow = YUY2ToUVRow_Any_MMI; >+ if (IS_ALIGNED(width, 8)) { >+ YUY2ToYRow = YUY2ToYRow_MMI; >+ if (IS_ALIGNED(width, 16)) { >+ YUY2ToUVRow = YUY2ToUVRow_MMI; >+ } >+ } >+ } >+#endif > > for (y = 0; y < height - 1; y += 2) { > YUY2ToUVRow(src_yuy2, src_stride_yuy2, dst_u, dst_v, width); >@@ -583,6 +853,16 @@ int UYVYToI420(const uint8_t* src_uyvy, > } > } > #endif >+#if defined(HAS_UYVYTOYROW_MMI) >+ if (TestCpuFlag(kCpuHasMMI)) { >+ UYVYToYRow = UYVYToYRow_Any_MMI; >+ UYVYToUVRow = UYVYToUVRow_Any_MMI; >+ if (IS_ALIGNED(width, 16)) { >+ UYVYToYRow = UYVYToYRow_MMI; >+ UYVYToUVRow = UYVYToUVRow_MMI; >+ } >+ } >+#endif > > for (y = 0; y < height - 1; y += 2) { > UYVYToUVRow(src_uyvy, src_stride_uyvy, dst_u, dst_v, width); >@@ -679,6 +959,22 @@ int ARGBToI420(const uint8_t* src_argb, > } > } > #endif >+#if defined(HAS_ARGBTOYROW_MMI) >+ if (TestCpuFlag(kCpuHasMMI)) { >+ ARGBToYRow = ARGBToYRow_Any_MMI; >+ if (IS_ALIGNED(width, 8)) { >+ ARGBToYRow = ARGBToYRow_MMI; >+ } >+ } >+#endif >+#if defined(HAS_ARGBTOUVROW_MMI) >+ if (TestCpuFlag(kCpuHasMMI)) { >+ ARGBToUVRow = ARGBToUVRow_Any_MMI; >+ if (IS_ALIGNED(width, 16)) { >+ ARGBToUVRow = ARGBToUVRow_MMI; >+ } >+ } >+#endif > > for (y = 0; y < height - 1; y += 2) { > ARGBToUVRow(src_argb, src_stride_argb, dst_u, dst_v, width); >@@ -765,6 +1061,22 @@ int BGRAToI420(const uint8_t* src_bgra, > } > } > #endif >+#if defined(HAS_BGRATOYROW_MMI) >+ if (TestCpuFlag(kCpuHasMMI)) { >+ BGRAToYRow = BGRAToYRow_Any_MMI; >+ if (IS_ALIGNED(width, 8)) { >+ BGRAToYRow = BGRAToYRow_MMI; >+ } >+ } >+#endif >+#if defined(HAS_BGRATOUVROW_MMI) >+ if (TestCpuFlag(kCpuHasMMI)) { >+ BGRAToUVRow = BGRAToUVRow_Any_MMI; >+ if (IS_ALIGNED(width, 16)) { >+ BGRAToUVRow = BGRAToUVRow_MMI; >+ } >+ } >+#endif > > for (y = 0; y < height - 1; y += 2) { > BGRAToUVRow(src_bgra, src_stride_bgra, dst_u, dst_v, width); >@@ -851,6 +1163,22 @@ int ABGRToI420(const uint8_t* src_abgr, > } > } > #endif >+#if defined(HAS_ABGRTOYROW_MMI) >+ if (TestCpuFlag(kCpuHasMMI)) { >+ ABGRToYRow = ABGRToYRow_Any_MMI; >+ if (IS_ALIGNED(width, 8)) { >+ ABGRToYRow = ABGRToYRow_MMI; >+ } >+ } >+#endif >+#if defined(HAS_ABGRTOUVROW_MMI) >+ if (TestCpuFlag(kCpuHasMMI)) { >+ ABGRToUVRow = ABGRToUVRow_Any_MMI; >+ if (IS_ALIGNED(width, 16)) { >+ ABGRToUVRow = ABGRToUVRow_MMI; >+ } >+ } >+#endif > > for (y = 0; y < height - 1; y += 2) { > ABGRToUVRow(src_abgr, src_stride_abgr, dst_u, dst_v, width); >@@ -937,6 +1265,22 @@ int RGBAToI420(const uint8_t* src_rgba, > } > } > #endif >+#if defined(HAS_RGBATOYROW_MMI) >+ if (TestCpuFlag(kCpuHasMMI)) { >+ RGBAToYRow = RGBAToYRow_Any_MMI; >+ if (IS_ALIGNED(width, 8)) { >+ RGBAToYRow = RGBAToYRow_MMI; >+ } >+ } >+#endif >+#if defined(HAS_RGBATOUVROW_MMI) >+ if (TestCpuFlag(kCpuHasMMI)) { >+ RGBAToUVRow = RGBAToUVRow_Any_MMI; >+ if (IS_ALIGNED(width, 16)) { >+ RGBAToUVRow = RGBAToUVRow_MMI; >+ } >+ } >+#endif > > for (y = 0; y < height - 1; y += 2) { > RGBAToUVRow(src_rgba, src_stride_rgba, dst_u, dst_v, width); >@@ -967,7 +1311,8 @@ int RGB24ToI420(const uint8_t* src_rgb24, > int width, > int height) { > int y; >-#if (defined(HAS_RGB24TOYROW_NEON) || defined(HAS_RGB24TOYROW_MSA)) >+#if (defined(HAS_RGB24TOYROW_NEON) || defined(HAS_RGB24TOYROW_MSA) || \ >+ defined(HAS_RGB24TOYROW_MMI)) > void (*RGB24ToUVRow)(const uint8_t* src_rgb24, int src_stride_rgb24, > uint8_t* dst_u, uint8_t* dst_v, int width) = > RGB24ToUVRow_C; >@@ -1013,6 +1358,17 @@ int RGB24ToI420(const uint8_t* src_rgb24, > RGB24ToUVRow = RGB24ToUVRow_MSA; > } > } >+#elif defined(HAS_RGB24TOYROW_MMI) >+ if (TestCpuFlag(kCpuHasMMI)) { >+ RGB24ToUVRow = RGB24ToUVRow_Any_MMI; >+ RGB24ToYRow = RGB24ToYRow_Any_MMI; >+ if (IS_ALIGNED(width, 8)) { >+ RGB24ToYRow = RGB24ToYRow_MMI; >+ if (IS_ALIGNED(width, 16)) { >+ RGB24ToUVRow = RGB24ToUVRow_MMI; >+ } >+ } >+ } > // Other platforms do intermediate conversion from RGB24 to ARGB. > #else > #if defined(HAS_RGB24TOARGBROW_SSSE3) >@@ -1046,14 +1402,16 @@ int RGB24ToI420(const uint8_t* src_rgb24, > #endif > > { >-#if !(defined(HAS_RGB24TOYROW_NEON) || defined(HAS_RGB24TOYROW_MSA)) >+#if !(defined(HAS_RGB24TOYROW_NEON) || defined(HAS_RGB24TOYROW_MSA) || \ >+ defined(HAS_RGB24TOYROW_MMI)) > // Allocate 2 rows of ARGB. > const int kRowSize = (width * 4 + 31) & ~31; > align_buffer_64(row, kRowSize * 2); > #endif > > for (y = 0; y < height - 1; y += 2) { >-#if (defined(HAS_RGB24TOYROW_NEON) || defined(HAS_RGB24TOYROW_MSA)) >+#if (defined(HAS_RGB24TOYROW_NEON) || defined(HAS_RGB24TOYROW_MSA) || \ >+ defined(HAS_RGB24TOYROW_MMI)) > RGB24ToUVRow(src_rgb24, src_stride_rgb24, dst_u, dst_v, width); > RGB24ToYRow(src_rgb24, dst_y, width); > RGB24ToYRow(src_rgb24 + src_stride_rgb24, dst_y + dst_stride_y, width); >@@ -1070,7 +1428,8 @@ int RGB24ToI420(const uint8_t* src_rgb24, > dst_v += dst_stride_v; > } > if (height & 1) { >-#if (defined(HAS_RGB24TOYROW_NEON) || defined(HAS_RGB24TOYROW_MSA)) >+#if (defined(HAS_RGB24TOYROW_NEON) || defined(HAS_RGB24TOYROW_MSA) || \ >+ defined(HAS_RGB24TOYROW_MMI)) > RGB24ToUVRow(src_rgb24, 0, dst_u, dst_v, width); > RGB24ToYRow(src_rgb24, dst_y, width); > #else >@@ -1079,7 +1438,8 @@ int RGB24ToI420(const uint8_t* src_rgb24, > ARGBToYRow(row, dst_y, width); > #endif > } >-#if !(defined(HAS_RGB24TOYROW_NEON) || defined(HAS_RGB24TOYROW_MSA)) >+#if !(defined(HAS_RGB24TOYROW_NEON) || defined(HAS_RGB24TOYROW_MSA) || \ >+ defined(HAS_RGB24TOYROW_MMI)) > free_aligned_buffer_64(row); > #endif > } >@@ -1099,7 +1459,8 @@ int RAWToI420(const uint8_t* src_raw, > int width, > int height) { > int y; >-#if (defined(HAS_RAWTOYROW_NEON) || defined(HAS_RAWTOYROW_MSA)) >+#if (defined(HAS_RAWTOYROW_NEON) || defined(HAS_RAWTOYROW_MSA) || \ >+ defined(HAS_RAWTOYROW_MMI)) > void (*RAWToUVRow)(const uint8_t* src_raw, int src_stride_raw, uint8_t* dst_u, > uint8_t* dst_v, int width) = RAWToUVRow_C; > void (*RAWToYRow)(const uint8_t* src_raw, uint8_t* dst_y, int width) = >@@ -1144,6 +1505,17 @@ int RAWToI420(const uint8_t* src_raw, > RAWToUVRow = RAWToUVRow_MSA; > } > } >+#elif defined(HAS_RAWTOYROW_MMI) >+ if (TestCpuFlag(kCpuHasMMI)) { >+ RAWToUVRow = RAWToUVRow_Any_MMI; >+ RAWToYRow = RAWToYRow_Any_MMI; >+ if (IS_ALIGNED(width, 8)) { >+ RAWToYRow = RAWToYRow_MMI; >+ if (IS_ALIGNED(width, 16)) { >+ RAWToUVRow = RAWToUVRow_MMI; >+ } >+ } >+ } > // Other platforms do intermediate conversion from RAW to ARGB. > #else > #if defined(HAS_RAWTOARGBROW_SSSE3) >@@ -1177,14 +1549,16 @@ int RAWToI420(const uint8_t* src_raw, > #endif > > { >-#if !(defined(HAS_RAWTOYROW_NEON) || defined(HAS_RAWTOYROW_MSA)) >+#if !(defined(HAS_RAWTOYROW_NEON) || defined(HAS_RAWTOYROW_MSA) || \ >+ defined(HAS_RAWTOYROW_MMI)) > // Allocate 2 rows of ARGB. > const int kRowSize = (width * 4 + 31) & ~31; > align_buffer_64(row, kRowSize * 2); > #endif > > for (y = 0; y < height - 1; y += 2) { >-#if (defined(HAS_RAWTOYROW_NEON) || defined(HAS_RAWTOYROW_MSA)) >+#if (defined(HAS_RAWTOYROW_NEON) || defined(HAS_RAWTOYROW_MSA) || \ >+ defined(HAS_RAWTOYROW_MMI)) > RAWToUVRow(src_raw, src_stride_raw, dst_u, dst_v, width); > RAWToYRow(src_raw, dst_y, width); > RAWToYRow(src_raw + src_stride_raw, dst_y + dst_stride_y, width); >@@ -1201,7 +1575,8 @@ int RAWToI420(const uint8_t* src_raw, > dst_v += dst_stride_v; > } > if (height & 1) { >-#if (defined(HAS_RAWTOYROW_NEON) || defined(HAS_RAWTOYROW_MSA)) >+#if (defined(HAS_RAWTOYROW_NEON) || defined(HAS_RAWTOYROW_MSA) || \ >+ defined(HAS_RAWTOYROW_MMI)) > RAWToUVRow(src_raw, 0, dst_u, dst_v, width); > RAWToYRow(src_raw, dst_y, width); > #else >@@ -1210,7 +1585,8 @@ int RAWToI420(const uint8_t* src_raw, > ARGBToYRow(row, dst_y, width); > #endif > } >-#if !(defined(HAS_RAWTOYROW_NEON) || defined(HAS_RAWTOYROW_MSA)) >+#if !(defined(HAS_RAWTOYROW_NEON) || defined(HAS_RAWTOYROW_MSA) || \ >+ defined(HAS_RAWTOYROW_MMI)) > free_aligned_buffer_64(row); > #endif > } >@@ -1230,7 +1606,8 @@ int RGB565ToI420(const uint8_t* src_rgb565, > int width, > int height) { > int y; >-#if (defined(HAS_RGB565TOYROW_NEON) || defined(HAS_RGB565TOYROW_MSA)) >+#if (defined(HAS_RGB565TOYROW_NEON) || defined(HAS_RGB565TOYROW_MSA) || \ >+ defined(HAS_RGB565TOYROW_MMI)) > void (*RGB565ToUVRow)(const uint8_t* src_rgb565, int src_stride_rgb565, > uint8_t* dst_u, uint8_t* dst_v, int width) = > RGB565ToUVRow_C; >@@ -1276,6 +1653,17 @@ int RGB565ToI420(const uint8_t* src_rgb565, > RGB565ToUVRow = RGB565ToUVRow_MSA; > } > } >+#elif defined(HAS_RGB565TOYROW_MMI) >+ if (TestCpuFlag(kCpuHasMMI)) { >+ RGB565ToUVRow = RGB565ToUVRow_Any_MMI; >+ RGB565ToYRow = RGB565ToYRow_Any_MMI; >+ if (IS_ALIGNED(width, 8)) { >+ RGB565ToYRow = RGB565ToYRow_MMI; >+ if (IS_ALIGNED(width, 16)) { >+ RGB565ToUVRow = RGB565ToUVRow_MMI; >+ } >+ } >+ } > // Other platforms do intermediate conversion from RGB565 to ARGB. > #else > #if defined(HAS_RGB565TOARGBROW_SSE2) >@@ -1316,13 +1704,15 @@ int RGB565ToI420(const uint8_t* src_rgb565, > #endif > #endif > { >-#if !(defined(HAS_RGB565TOYROW_NEON) || defined(HAS_RGB565TOYROW_MSA)) >+#if !(defined(HAS_RGB565TOYROW_NEON) || defined(HAS_RGB565TOYROW_MSA) || \ >+ defined(HAS_RGB565TOYROW_MMI)) > // Allocate 2 rows of ARGB. > const int kRowSize = (width * 4 + 31) & ~31; > align_buffer_64(row, kRowSize * 2); > #endif > for (y = 0; y < height - 1; y += 2) { >-#if (defined(HAS_RGB565TOYROW_NEON) || defined(HAS_RGB565TOYROW_MSA)) >+#if (defined(HAS_RGB565TOYROW_NEON) || defined(HAS_RGB565TOYROW_MSA) || \ >+ defined(HAS_RGB565TOYROW_MMI)) > RGB565ToUVRow(src_rgb565, src_stride_rgb565, dst_u, dst_v, width); > RGB565ToYRow(src_rgb565, dst_y, width); > RGB565ToYRow(src_rgb565 + src_stride_rgb565, dst_y + dst_stride_y, width); >@@ -1339,7 +1729,8 @@ int RGB565ToI420(const uint8_t* src_rgb565, > dst_v += dst_stride_v; > } > if (height & 1) { >-#if (defined(HAS_RGB565TOYROW_NEON) || defined(HAS_RGB565TOYROW_MSA)) >+#if (defined(HAS_RGB565TOYROW_NEON) || defined(HAS_RGB565TOYROW_MSA) || \ >+ defined(HAS_RGB565TOYROW_MMI)) > RGB565ToUVRow(src_rgb565, 0, dst_u, dst_v, width); > RGB565ToYRow(src_rgb565, dst_y, width); > #else >@@ -1348,7 +1739,8 @@ int RGB565ToI420(const uint8_t* src_rgb565, > ARGBToYRow(row, dst_y, width); > #endif > } >-#if !(defined(HAS_RGB565TOYROW_NEON) || defined(HAS_RGB565TOYROW_MSA)) >+#if !(defined(HAS_RGB565TOYROW_NEON) || defined(HAS_RGB565TOYROW_MSA) || \ >+ defined(HAS_RGB565TOYROW_MMI)) > free_aligned_buffer_64(row); > #endif > } >@@ -1368,7 +1760,8 @@ int ARGB1555ToI420(const uint8_t* src_argb1555, > int width, > int height) { > int y; >-#if (defined(HAS_ARGB1555TOYROW_NEON) || defined(HAS_ARGB1555TOYROW_MSA)) >+#if (defined(HAS_ARGB1555TOYROW_NEON) || defined(HAS_ARGB1555TOYROW_MSA) || \ >+ defined(HAS_ARGB1555TOYROW_MMI)) > void (*ARGB1555ToUVRow)(const uint8_t* src_argb1555, int src_stride_argb1555, > uint8_t* dst_u, uint8_t* dst_v, int width) = > ARGB1555ToUVRow_C; >@@ -1415,6 +1808,17 @@ int ARGB1555ToI420(const uint8_t* src_argb1555, > ARGB1555ToUVRow = ARGB1555ToUVRow_MSA; > } > } >+#elif defined(HAS_ARGB1555TOYROW_MMI) >+ if (TestCpuFlag(kCpuHasMMI)) { >+ ARGB1555ToUVRow = ARGB1555ToUVRow_Any_MMI; >+ ARGB1555ToYRow = ARGB1555ToYRow_Any_MMI; >+ if (IS_ALIGNED(width, 8)) { >+ ARGB1555ToYRow = ARGB1555ToYRow_MMI; >+ if (IS_ALIGNED(width, 16)) { >+ ARGB1555ToUVRow = ARGB1555ToUVRow_MMI; >+ } >+ } >+ } > // Other platforms do intermediate conversion from ARGB1555 to ARGB. > #else > #if defined(HAS_ARGB1555TOARGBROW_SSE2) >@@ -1455,14 +1859,16 @@ int ARGB1555ToI420(const uint8_t* src_argb1555, > #endif > #endif > { >-#if !(defined(HAS_ARGB1555TOYROW_NEON) || defined(HAS_ARGB1555TOYROW_MSA)) >+#if !(defined(HAS_ARGB1555TOYROW_NEON) || defined(HAS_ARGB1555TOYROW_MSA) || \ >+ defined(HAS_ARGB1555TOYROW_MMI)) > // Allocate 2 rows of ARGB. > const int kRowSize = (width * 4 + 31) & ~31; > align_buffer_64(row, kRowSize * 2); > #endif > > for (y = 0; y < height - 1; y += 2) { >-#if (defined(HAS_ARGB1555TOYROW_NEON) || defined(HAS_ARGB1555TOYROW_MSA)) >+#if (defined(HAS_ARGB1555TOYROW_NEON) || defined(HAS_ARGB1555TOYROW_MSA) || \ >+ defined(HAS_ARGB1555TOYROW_MMI)) > ARGB1555ToUVRow(src_argb1555, src_stride_argb1555, dst_u, dst_v, width); > ARGB1555ToYRow(src_argb1555, dst_y, width); > ARGB1555ToYRow(src_argb1555 + src_stride_argb1555, dst_y + dst_stride_y, >@@ -1481,7 +1887,8 @@ int ARGB1555ToI420(const uint8_t* src_argb1555, > dst_v += dst_stride_v; > } > if (height & 1) { >-#if (defined(HAS_ARGB1555TOYROW_NEON) || defined(HAS_ARGB1555TOYROW_MSA)) >+#if (defined(HAS_ARGB1555TOYROW_NEON) || defined(HAS_ARGB1555TOYROW_MSA) || \ >+ defined(HAS_ARGB1555TOYROW_MMI)) > ARGB1555ToUVRow(src_argb1555, 0, dst_u, dst_v, width); > ARGB1555ToYRow(src_argb1555, dst_y, width); > #else >@@ -1490,7 +1897,8 @@ int ARGB1555ToI420(const uint8_t* src_argb1555, > ARGBToYRow(row, dst_y, width); > #endif > } >-#if !(defined(HAS_ARGB1555TOYROW_NEON) || defined(HAS_ARGB1555TOYROW_MSA)) >+#if !(defined(HAS_ARGB1555TOYROW_NEON) || defined(HAS_ARGB1555TOYROW_MSA) || \ >+ defined(HAS_ARGB1555TOYROW_MMI)) > free_aligned_buffer_64(row); > #endif > } >@@ -1510,7 +1918,7 @@ int ARGB4444ToI420(const uint8_t* src_argb4444, > int width, > int height) { > int y; >-#if defined(HAS_ARGB4444TOYROW_NEON) >+#if (defined(HAS_ARGB4444TOYROW_NEON) || defined(HAS_ARGB4444TOYROW_MMI)) > void (*ARGB4444ToUVRow)(const uint8_t* src_argb4444, int src_stride_argb4444, > uint8_t* dst_u, uint8_t* dst_v, int width) = > ARGB4444ToUVRow_C; >@@ -1548,6 +1956,17 @@ int ARGB4444ToI420(const uint8_t* src_argb4444, > } > } > } >+#elif defined(HAS_ARGB4444TOYROW_MMI) >+ if (TestCpuFlag(kCpuHasMMI)) { >+ ARGB4444ToUVRow = ARGB4444ToUVRow_Any_MMI; >+ ARGB4444ToYRow = ARGB4444ToYRow_Any_MMI; >+ if (IS_ALIGNED(width, 8)) { >+ ARGB4444ToYRow = ARGB4444ToYRow_MMI; >+ if (IS_ALIGNED(width, 16)) { >+ ARGB4444ToUVRow = ARGB4444ToUVRow_MMI; >+ } >+ } >+ } > // Other platforms do intermediate conversion from ARGB4444 to ARGB. > #else > #if defined(HAS_ARGB4444TOARGBROW_SSE2) >@@ -1606,17 +2025,29 @@ int ARGB4444ToI420(const uint8_t* src_argb4444, > } > } > #endif >+#if defined(HAS_ARGBTOYROW_MMI) >+ if (TestCpuFlag(kCpuHasMMI)) { >+ ARGBToUVRow = ARGBToUVRow_Any_MMI; >+ ARGBToYRow = ARGBToYRow_Any_MMI; >+ if (IS_ALIGNED(width, 8)) { >+ ARGBToYRow = ARGBToYRow_MMI; >+ if (IS_ALIGNED(width, 16)) { >+ ARGBToUVRow = ARGBToUVRow_MMI; >+ } >+ } >+ } >+#endif > #endif > > { >-#if !defined(HAS_ARGB4444TOYROW_NEON) >+#if !(defined(HAS_ARGB4444TOYROW_NEON) || defined(HAS_ARGB4444TOYROW_MMI)) > // Allocate 2 rows of ARGB. > const int kRowSize = (width * 4 + 31) & ~31; > align_buffer_64(row, kRowSize * 2); > #endif > > for (y = 0; y < height - 1; y += 2) { >-#if defined(HAS_ARGB4444TOYROW_NEON) >+#if (defined(HAS_ARGB4444TOYROW_NEON) || defined(HAS_ARGB4444TOYROW_MMI)) > ARGB4444ToUVRow(src_argb4444, src_stride_argb4444, dst_u, dst_v, width); > ARGB4444ToYRow(src_argb4444, dst_y, width); > ARGB4444ToYRow(src_argb4444 + src_stride_argb4444, dst_y + dst_stride_y, >@@ -1635,7 +2066,7 @@ int ARGB4444ToI420(const uint8_t* src_argb4444, > dst_v += dst_stride_v; > } > if (height & 1) { >-#if defined(HAS_ARGB4444TOYROW_NEON) >+#if (defined(HAS_ARGB4444TOYROW_NEON) || defined(HAS_ARGB4444TOYROW_MMI)) > ARGB4444ToUVRow(src_argb4444, 0, dst_u, dst_v, width); > ARGB4444ToYRow(src_argb4444, dst_y, width); > #else >@@ -1644,7 +2075,7 @@ int ARGB4444ToI420(const uint8_t* src_argb4444, > ARGBToYRow(row, dst_y, width); > #endif > } >-#if !defined(HAS_ARGB4444TOYROW_NEON) >+#if !(defined(HAS_ARGB4444TOYROW_NEON) || defined(HAS_ARGB4444TOYROW_MMI)) > free_aligned_buffer_64(row); > #endif > } >diff --git a/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/source/convert_argb.cc b/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/source/convert_argb.cc >index 4c317ae30c68d61d00fad58c4b3e8a62d8c33b04..b376a0f38767652afe736c324ffb46a5e68f5034 100644 >--- a/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/source/convert_argb.cc >+++ b/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/source/convert_argb.cc >@@ -885,6 +885,14 @@ static int I420AlphaToARGBMatrix(const uint8_t* src_y, > } > } > #endif >+#if defined(HAS_ARGBATTENUATEROW_MMI) >+ if (TestCpuFlag(kCpuHasMMI)) { >+ ARGBAttenuateRow = ARGBAttenuateRow_Any_MMI; >+ if (IS_ALIGNED(width, 2)) { >+ ARGBAttenuateRow = ARGBAttenuateRow_MMI; >+ } >+ } >+#endif > > for (y = 0; y < height; ++y) { > I422AlphaToARGBRow(src_y, src_u, src_v, src_a, dst_argb, yuvconstants, >@@ -1004,6 +1012,14 @@ int I400ToARGB(const uint8_t* src_y, > } > } > #endif >+#if defined(HAS_I400TOARGBROW_MMI) >+ if (TestCpuFlag(kCpuHasMMI)) { >+ I400ToARGBRow = I400ToARGBRow_Any_MMI; >+ if (IS_ALIGNED(width, 8)) { >+ I400ToARGBRow = I400ToARGBRow_MMI; >+ } >+ } >+#endif > > for (y = 0; y < height; ++y) { > I400ToARGBRow(src_y, dst_argb, width); >@@ -1070,6 +1086,14 @@ int J400ToARGB(const uint8_t* src_y, > J400ToARGBRow = J400ToARGBRow_MSA; > } > } >+#endif >+#if defined(HAS_J400TOARGBROW_MMI) >+ if (TestCpuFlag(kCpuHasMMI)) { >+ J400ToARGBRow = J400ToARGBRow_Any_MMI; >+ if (IS_ALIGNED(width, 4)) { >+ J400ToARGBRow = J400ToARGBRow_MMI; >+ } >+ } > #endif > for (y = 0; y < height; ++y) { > J400ToARGBRow(src_y, dst_argb, width); >@@ -1201,6 +1225,14 @@ int RGB24ToARGB(const uint8_t* src_rgb24, > } > } > #endif >+#if defined(HAS_RGB24TOARGBROW_MMI) >+ if (TestCpuFlag(kCpuHasMMI)) { >+ RGB24ToARGBRow = RGB24ToARGBRow_Any_MMI; >+ if (IS_ALIGNED(width, 4)) { >+ RGB24ToARGBRow = RGB24ToARGBRow_MMI; >+ } >+ } >+#endif > > for (y = 0; y < height; ++y) { > RGB24ToARGBRow(src_rgb24, dst_argb, width); >@@ -1260,6 +1292,14 @@ int RAWToARGB(const uint8_t* src_raw, > } > } > #endif >+#if defined(HAS_RAWTOARGBROW_MMI) >+ if (TestCpuFlag(kCpuHasMMI)) { >+ RAWToARGBRow = RAWToARGBRow_Any_MMI; >+ if (IS_ALIGNED(width, 4)) { >+ RAWToARGBRow = RAWToARGBRow_MMI; >+ } >+ } >+#endif > > for (y = 0; y < height; ++y) { > RAWToARGBRow(src_raw, dst_argb, width); >@@ -1327,6 +1367,14 @@ int RGB565ToARGB(const uint8_t* src_rgb565, > } > } > #endif >+#if defined(HAS_RGB565TOARGBROW_MMI) >+ if (TestCpuFlag(kCpuHasMMI)) { >+ RGB565ToARGBRow = RGB565ToARGBRow_Any_MMI; >+ if (IS_ALIGNED(width, 4)) { >+ RGB565ToARGBRow = RGB565ToARGBRow_MMI; >+ } >+ } >+#endif > > for (y = 0; y < height; ++y) { > RGB565ToARGBRow(src_rgb565, dst_argb, width); >@@ -1394,6 +1442,14 @@ int ARGB1555ToARGB(const uint8_t* src_argb1555, > } > } > #endif >+#if defined(HAS_ARGB1555TOARGBROW_MMI) >+ if (TestCpuFlag(kCpuHasMMI)) { >+ ARGB1555ToARGBRow = ARGB1555ToARGBRow_Any_MMI; >+ if (IS_ALIGNED(width, 4)) { >+ ARGB1555ToARGBRow = ARGB1555ToARGBRow_MMI; >+ } >+ } >+#endif > > for (y = 0; y < height; ++y) { > ARGB1555ToARGBRow(src_argb1555, dst_argb, width); >@@ -1461,6 +1517,14 @@ int ARGB4444ToARGB(const uint8_t* src_argb4444, > } > } > #endif >+#if defined(HAS_ARGB4444TOARGBROW_MMI) >+ if (TestCpuFlag(kCpuHasMMI)) { >+ ARGB4444ToARGBRow = ARGB4444ToARGBRow_Any_MMI; >+ if (IS_ALIGNED(width, 4)) { >+ ARGB4444ToARGBRow = ARGB4444ToARGBRow_MMI; >+ } >+ } >+#endif > > for (y = 0; y < height; ++y) { > ARGB4444ToARGBRow(src_argb4444, dst_argb, width); >@@ -1789,6 +1853,22 @@ static int NV12ToRGB24Matrix(const uint8_t* src_y, > } > } > #endif >+#if defined(HAS_NV12TORGB24ROW_SSSE3) >+ if (TestCpuFlag(kCpuHasSSSE3)) { >+ NV12ToRGB24Row = NV12ToRGB24Row_Any_SSSE3; >+ if (IS_ALIGNED(width, 16)) { >+ NV12ToRGB24Row = NV12ToRGB24Row_SSSE3; >+ } >+ } >+#endif >+#if defined(HAS_NV12TORGB24ROW_AVX2) >+ if (TestCpuFlag(kCpuHasAVX2)) { >+ NV12ToRGB24Row = NV12ToRGB24Row_Any_AVX2; >+ if (IS_ALIGNED(width, 32)) { >+ NV12ToRGB24Row = NV12ToRGB24Row_AVX2; >+ } >+ } >+#endif > > for (y = 0; y < height; ++y) { > NV12ToRGB24Row(src_y, src_uv, dst_rgb24, yuvconstants, width); >@@ -1832,6 +1912,22 @@ static int NV21ToRGB24Matrix(const uint8_t* src_y, > } > } > #endif >+#if defined(HAS_NV21TORGB24ROW_SSSE3) >+ if (TestCpuFlag(kCpuHasSSSE3)) { >+ NV21ToRGB24Row = NV21ToRGB24Row_Any_SSSE3; >+ if (IS_ALIGNED(width, 16)) { >+ NV21ToRGB24Row = NV21ToRGB24Row_SSSE3; >+ } >+ } >+#endif >+#if defined(HAS_NV21TORGB24ROW_AVX2) >+ if (TestCpuFlag(kCpuHasAVX2)) { >+ NV21ToRGB24Row = NV21ToRGB24Row_Any_AVX2; >+ if (IS_ALIGNED(width, 32)) { >+ NV21ToRGB24Row = NV21ToRGB24Row_AVX2; >+ } >+ } >+#endif > > for (y = 0; y < height; ++y) { > NV21ToRGB24Row(src_y, src_vu, dst_rgb24, yuvconstants, width); >@@ -1844,8 +1940,7 @@ static int NV21ToRGB24Matrix(const uint8_t* src_y, > return 0; > } > >-// TODO(fbarchard): \(fbarchard): NV12ToRAW can be implemented by mirrored >-// matrix. Convert NV12 to RGB24. >+// Convert NV12 to RGB24. > LIBYUV_API > int NV12ToRGB24(const uint8_t* src_y, > int src_stride_y, >@@ -1875,6 +1970,34 @@ int NV21ToRGB24(const uint8_t* src_y, > width, height); > } > >+// Convert NV12 to RAW. >+LIBYUV_API >+int NV12ToRAW(const uint8_t* src_y, >+ int src_stride_y, >+ const uint8_t* src_uv, >+ int src_stride_uv, >+ uint8_t* dst_raw, >+ int dst_stride_raw, >+ int width, >+ int height) { >+ return NV21ToRGB24Matrix(src_y, src_stride_y, src_uv, src_stride_uv, dst_raw, >+ dst_stride_raw, &kYvuI601Constants, width, height); >+} >+ >+// Convert NV21 to RAW. >+LIBYUV_API >+int NV21ToRAW(const uint8_t* src_y, >+ int src_stride_y, >+ const uint8_t* src_vu, >+ int src_stride_vu, >+ uint8_t* dst_raw, >+ int dst_stride_raw, >+ int width, >+ int height) { >+ return NV12ToRGB24Matrix(src_y, src_stride_y, src_vu, src_stride_vu, dst_raw, >+ dst_stride_raw, &kYvuI601Constants, width, height); >+} >+ > // Convert M420 to ARGB. > LIBYUV_API > int M420ToARGB(const uint8_t* src_m420, >diff --git a/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/source/convert_from.cc b/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/source/convert_from.cc >index b5587ced625702f13d0bb3b353fa1ecedf61ffe6..706067bb29ba501016f685ee2e71bb48b5ae25dd 100644 >--- a/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/source/convert_from.cc >+++ b/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/source/convert_from.cc >@@ -302,6 +302,14 @@ int I420ToYUY2(const uint8_t* src_y, > } > } > #endif >+#if defined(HAS_I422TOYUY2ROW_MMI) >+ if (TestCpuFlag(kCpuHasMMI)) { >+ I422ToYUY2Row = I422ToYUY2Row_Any_MMI; >+ if (IS_ALIGNED(width, 8)) { >+ I422ToYUY2Row = I422ToYUY2Row_MMI; >+ } >+ } >+#endif > > for (y = 0; y < height - 1; y += 2) { > I422ToYUY2Row(src_y, src_u, src_v, dst_yuy2, width); >@@ -381,6 +389,14 @@ int I422ToUYVY(const uint8_t* src_y, > } > } > #endif >+#if defined(HAS_I422TOUYVYROW_MMI) >+ if (TestCpuFlag(kCpuHasMMI)) { >+ I422ToUYVYRow = I422ToUYVYRow_Any_MMI; >+ if (IS_ALIGNED(width, 8)) { >+ I422ToUYVYRow = I422ToUYVYRow_MMI; >+ } >+ } >+#endif > > for (y = 0; y < height; ++y) { > I422ToUYVYRow(src_y, src_u, src_v, dst_uyvy, width); >@@ -448,6 +464,14 @@ int I420ToUYVY(const uint8_t* src_y, > } > } > #endif >+#if defined(HAS_I422TOUYVYROW_MMI) >+ if (TestCpuFlag(kCpuHasMMI)) { >+ I422ToUYVYRow = I422ToUYVYRow_Any_MMI; >+ if (IS_ALIGNED(width, 8)) { >+ I422ToUYVYRow = I422ToUYVYRow_MMI; >+ } >+ } >+#endif > > for (y = 0; y < height - 1; y += 2) { > I422ToUYVYRow(src_y, src_u, src_v, dst_uyvy, width); >@@ -906,18 +930,19 @@ int I420ToARGB4444(const uint8_t* src_y, > return 0; > } > >-// Convert I420 to RGB565. >+// Convert I420 to RGB565 with specified color matrix. > LIBYUV_API >-int I420ToRGB565(const uint8_t* src_y, >- int src_stride_y, >- const uint8_t* src_u, >- int src_stride_u, >- const uint8_t* src_v, >- int src_stride_v, >- uint8_t* dst_rgb565, >- int dst_stride_rgb565, >- int width, >- int height) { >+int I420ToRGB565Matrix(const uint8_t* src_y, >+ int src_stride_y, >+ const uint8_t* src_u, >+ int src_stride_u, >+ const uint8_t* src_v, >+ int src_stride_v, >+ uint8_t* dst_rgb565, >+ int dst_stride_rgb565, >+ const struct YuvConstants* yuvconstants, >+ int width, >+ int height) { > int y; > void (*I422ToRGB565Row)(const uint8_t* y_buf, const uint8_t* u_buf, > const uint8_t* v_buf, uint8_t* rgb_buf, >@@ -966,7 +991,7 @@ int I420ToRGB565(const uint8_t* src_y, > #endif > > for (y = 0; y < height; ++y) { >- I422ToRGB565Row(src_y, src_u, src_v, dst_rgb565, &kYuvI601Constants, width); >+ I422ToRGB565Row(src_y, src_u, src_v, dst_rgb565, yuvconstants, width); > dst_rgb565 += dst_stride_rgb565; > src_y += src_stride_y; > if (y & 1) { >@@ -977,6 +1002,57 @@ int I420ToRGB565(const uint8_t* src_y, > return 0; > } > >+// Convert I420 to RGB565. >+LIBYUV_API >+int I420ToRGB565(const uint8_t* src_y, >+ int src_stride_y, >+ const uint8_t* src_u, >+ int src_stride_u, >+ const uint8_t* src_v, >+ int src_stride_v, >+ uint8_t* dst_rgb565, >+ int dst_stride_rgb565, >+ int width, >+ int height) { >+ return I420ToRGB565Matrix(src_y, src_stride_y, src_u, src_stride_u, src_v, >+ src_stride_v, dst_rgb565, dst_stride_rgb565, >+ &kYuvI601Constants, width, height); >+} >+ >+// Convert J420 to RGB565. >+LIBYUV_API >+int J420ToRGB565(const uint8_t* src_y, >+ int src_stride_y, >+ const uint8_t* src_u, >+ int src_stride_u, >+ const uint8_t* src_v, >+ int src_stride_v, >+ uint8_t* dst_rgb565, >+ int dst_stride_rgb565, >+ int width, >+ int height) { >+ return I420ToRGB565Matrix(src_y, src_stride_y, src_u, src_stride_u, src_v, >+ src_stride_v, dst_rgb565, dst_stride_rgb565, >+ &kYuvJPEGConstants, width, height); >+} >+ >+// Convert H420 to RGB565. >+LIBYUV_API >+int H420ToRGB565(const uint8_t* src_y, >+ int src_stride_y, >+ const uint8_t* src_u, >+ int src_stride_u, >+ const uint8_t* src_v, >+ int src_stride_v, >+ uint8_t* dst_rgb565, >+ int dst_stride_rgb565, >+ int width, >+ int height) { >+ return I420ToRGB565Matrix(src_y, src_stride_y, src_u, src_stride_u, src_v, >+ src_stride_v, dst_rgb565, dst_stride_rgb565, >+ &kYuvH709Constants, width, height); >+} >+ > // Convert I422 to RGB565. > LIBYUV_API > int I422ToRGB565(const uint8_t* src_y, >@@ -1154,8 +1230,8 @@ int I420ToRGB565Dither(const uint8_t* src_y, > for (y = 0; y < height; ++y) { > I422ToARGBRow(src_y, src_u, src_v, row_argb, &kYuvI601Constants, width); > ARGBToRGB565DitherRow(row_argb, dst_rgb565, >- *(uint32_t*)(dither4x4 + ((y & 3) << 2)), // NOLINT >- width); // NOLINT >+ *(const uint32_t*)(dither4x4 + ((y & 3) << 2)), >+ width); > dst_rgb565 += dst_stride_rgb565; > src_y += src_stride_y; > if (y & 1) { >diff --git a/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/source/convert_from_argb.cc b/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/source/convert_from_argb.cc >index 16b838458f02d07767e23468d94b612d5dab2849..1b070c10f756fcba861c5ab1b6a292073994c740 100644 >--- a/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/source/convert_from_argb.cc >+++ b/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/source/convert_from_argb.cc >@@ -76,6 +76,14 @@ int ARGBToI444(const uint8_t* src_argb, > } > } > #endif >+#if defined(HAS_ARGBTOUV444ROW_MMI) >+ if (TestCpuFlag(kCpuHasMMI)) { >+ ARGBToUV444Row = ARGBToUV444Row_Any_MMI; >+ if (IS_ALIGNED(width, 8)) { >+ ARGBToUV444Row = ARGBToUV444Row_MMI; >+ } >+ } >+#endif > #if defined(HAS_ARGBTOYROW_SSSE3) > if (TestCpuFlag(kCpuHasSSSE3)) { > ARGBToYRow = ARGBToYRow_Any_SSSE3; >@@ -108,6 +116,14 @@ int ARGBToI444(const uint8_t* src_argb, > } > } > #endif >+#if defined(HAS_ARGBTOYROW_MMI) >+ if (TestCpuFlag(kCpuHasMMI)) { >+ ARGBToYRow = ARGBToYRow_Any_MMI; >+ if (IS_ALIGNED(width, 8)) { >+ ARGBToYRow = ARGBToYRow_MMI; >+ } >+ } >+#endif > > for (y = 0; y < height; ++y) { > ARGBToUV444Row(src_argb, dst_u, dst_v, width); >@@ -208,6 +224,23 @@ int ARGBToI422(const uint8_t* src_argb, > } > #endif > >+#if defined(HAS_ARGBTOYROW_MMI) >+ if (TestCpuFlag(kCpuHasMMI)) { >+ ARGBToYRow = ARGBToYRow_Any_MMI; >+ if (IS_ALIGNED(width, 8)) { >+ ARGBToYRow = ARGBToYRow_MMI; >+ } >+ } >+#endif >+#if defined(HAS_ARGBTOUVROW_MMI) >+ if (TestCpuFlag(kCpuHasMMI)) { >+ ARGBToUVRow = ARGBToUVRow_Any_MMI; >+ if (IS_ALIGNED(width, 16)) { >+ ARGBToUVRow = ARGBToUVRow_MMI; >+ } >+ } >+#endif >+ > for (y = 0; y < height; ++y) { > ARGBToUVRow(src_argb, 0, dst_u, dst_v, width); > ARGBToYRow(src_argb, dst_y, width); >@@ -298,6 +331,22 @@ int ARGBToNV12(const uint8_t* src_argb, > } > } > #endif >+#if defined(HAS_ARGBTOYROW_MMI) >+ if (TestCpuFlag(kCpuHasMMI)) { >+ ARGBToYRow = ARGBToYRow_Any_MMI; >+ if (IS_ALIGNED(width, 8)) { >+ ARGBToYRow = ARGBToYRow_MMI; >+ } >+ } >+#endif >+#if defined(HAS_ARGBTOUVROW_MMI) >+ if (TestCpuFlag(kCpuHasMMI)) { >+ ARGBToUVRow = ARGBToUVRow_Any_MMI; >+ if (IS_ALIGNED(width, 16)) { >+ ARGBToUVRow = ARGBToUVRow_MMI; >+ } >+ } >+#endif > #if defined(HAS_MERGEUVROW_SSE2) > if (TestCpuFlag(kCpuHasSSE2)) { > MergeUVRow_ = MergeUVRow_Any_SSE2; >@@ -329,6 +378,14 @@ int ARGBToNV12(const uint8_t* src_argb, > MergeUVRow_ = MergeUVRow_MSA; > } > } >+#endif >+#if defined(HAS_MERGEUVROW_MMI) >+ if (TestCpuFlag(kCpuHasMMI)) { >+ MergeUVRow_ = MergeUVRow_Any_MMI; >+ if (IS_ALIGNED(halfwidth, 8)) { >+ MergeUVRow_ = MergeUVRow_MMI; >+ } >+ } > #endif > { > // Allocate a rows of uv. >@@ -434,6 +491,23 @@ int ARGBToNV21(const uint8_t* src_argb, > } > } > #endif >+#if defined(HAS_ARGBTOYROW_MMI) >+ if (TestCpuFlag(kCpuHasMMI)) { >+ ARGBToYRow = ARGBToYRow_Any_MMI; >+ if (IS_ALIGNED(width, 8)) { >+ ARGBToYRow = ARGBToYRow_MMI; >+ } >+ } >+#endif >+#if defined(HAS_ARGBTOUVROW_MMI) >+ if (TestCpuFlag(kCpuHasMMI)) { >+ ARGBToUVRow = ARGBToUVRow_Any_MMI; >+ if (IS_ALIGNED(width, 16)) { >+ ARGBToUVRow = ARGBToUVRow_MMI; >+ } >+ } >+#endif >+ > #if defined(HAS_MERGEUVROW_SSE2) > if (TestCpuFlag(kCpuHasSSE2)) { > MergeUVRow_ = MergeUVRow_Any_SSE2; >@@ -465,6 +539,14 @@ int ARGBToNV21(const uint8_t* src_argb, > MergeUVRow_ = MergeUVRow_MSA; > } > } >+#endif >+#if defined(HAS_MERGEUVROW_MMI) >+ if (TestCpuFlag(kCpuHasMMI)) { >+ MergeUVRow_ = MergeUVRow_Any_MMI; >+ if (IS_ALIGNED(halfwidth, 8)) { >+ MergeUVRow_ = MergeUVRow_MMI; >+ } >+ } > #endif > { > // Allocate a rows of uv. >@@ -575,6 +657,22 @@ int ARGBToYUY2(const uint8_t* src_argb, > } > } > #endif >+#if defined(HAS_ARGBTOYROW_MMI) >+ if (TestCpuFlag(kCpuHasMMI)) { >+ ARGBToYRow = ARGBToYRow_Any_MMI; >+ if (IS_ALIGNED(width, 8)) { >+ ARGBToYRow = ARGBToYRow_MMI; >+ } >+ } >+#endif >+#if defined(HAS_ARGBTOUVROW_MMI) >+ if (TestCpuFlag(kCpuHasMMI)) { >+ ARGBToUVRow = ARGBToUVRow_Any_MMI; >+ if (IS_ALIGNED(width, 16)) { >+ ARGBToUVRow = ARGBToUVRow_MMI; >+ } >+ } >+#endif > #if defined(HAS_I422TOYUY2ROW_SSE2) > if (TestCpuFlag(kCpuHasSSE2)) { > I422ToYUY2Row = I422ToYUY2Row_Any_SSE2; >@@ -607,6 +705,14 @@ int ARGBToYUY2(const uint8_t* src_argb, > } > } > #endif >+#if defined(HAS_I422TOYUY2ROW_MMI) >+ if (TestCpuFlag(kCpuHasMMI)) { >+ I422ToYUY2Row = I422ToYUY2Row_Any_MMI; >+ if (IS_ALIGNED(width, 8)) { >+ I422ToYUY2Row = I422ToYUY2Row_MMI; >+ } >+ } >+#endif > > { > // Allocate a rows of yuv. >@@ -712,6 +818,22 @@ int ARGBToUYVY(const uint8_t* src_argb, > } > } > #endif >+#if defined(HAS_ARGBTOYROW_MMI) >+ if (TestCpuFlag(kCpuHasMMI)) { >+ ARGBToYRow = ARGBToYRow_Any_MMI; >+ if (IS_ALIGNED(width, 8)) { >+ ARGBToYRow = ARGBToYRow_MMI; >+ } >+ } >+#endif >+#if defined(HAS_ARGBTOUVROW_MMI) >+ if (TestCpuFlag(kCpuHasMMI)) { >+ ARGBToUVRow = ARGBToUVRow_Any_MMI; >+ if (IS_ALIGNED(width, 16)) { >+ ARGBToUVRow = ARGBToUVRow_MMI; >+ } >+ } >+#endif > #if defined(HAS_I422TOUYVYROW_SSE2) > if (TestCpuFlag(kCpuHasSSE2)) { > I422ToUYVYRow = I422ToUYVYRow_Any_SSE2; >@@ -744,6 +866,14 @@ int ARGBToUYVY(const uint8_t* src_argb, > } > } > #endif >+#if defined(HAS_I422TOUYVYROW_MMI) >+ if (TestCpuFlag(kCpuHasMMI)) { >+ I422ToUYVYRow = I422ToUYVYRow_Any_MMI; >+ if (IS_ALIGNED(width, 8)) { >+ I422ToUYVYRow = I422ToUYVYRow_MMI; >+ } >+ } >+#endif > > { > // Allocate a rows of yuv. >@@ -821,6 +951,14 @@ int ARGBToI400(const uint8_t* src_argb, > } > } > #endif >+#if defined(HAS_ARGBTOYROW_MMI) >+ if (TestCpuFlag(kCpuHasMMI)) { >+ ARGBToYRow = ARGBToYRow_Any_MMI; >+ if (IS_ALIGNED(width, 8)) { >+ ARGBToYRow = ARGBToYRow_MMI; >+ } >+ } >+#endif > > for (y = 0; y < height; ++y) { > ARGBToYRow(src_argb, dst_y, width); >@@ -879,6 +1017,22 @@ int ARGBToRGB24(const uint8_t* src_argb, > } > } > #endif >+#if defined(HAS_ARGBTORGB24ROW_AVX2) >+ if (TestCpuFlag(kCpuHasAVX2)) { >+ ARGBToRGB24Row = ARGBToRGB24Row_Any_AVX2; >+ if (IS_ALIGNED(width, 32)) { >+ ARGBToRGB24Row = ARGBToRGB24Row_AVX2; >+ } >+ } >+#endif >+#if defined(HAS_ARGBTORGB24ROW_AVX512VBMI) >+ if (TestCpuFlag(kCpuHasAVX512VBMI)) { >+ ARGBToRGB24Row = ARGBToRGB24Row_Any_AVX512VBMI; >+ if (IS_ALIGNED(width, 32)) { >+ ARGBToRGB24Row = ARGBToRGB24Row_AVX512VBMI; >+ } >+ } >+#endif > #if defined(HAS_ARGBTORGB24ROW_NEON) > if (TestCpuFlag(kCpuHasNEON)) { > ARGBToRGB24Row = ARGBToRGB24Row_Any_NEON; >@@ -895,6 +1049,14 @@ int ARGBToRGB24(const uint8_t* src_argb, > } > } > #endif >+#if defined(HAS_ARGBTORGB24ROW_MMI) >+ if (TestCpuFlag(kCpuHasMMI)) { >+ ARGBToRGB24Row = ARGBToRGB24Row_Any_MMI; >+ if (IS_ALIGNED(width, 4)) { >+ ARGBToRGB24Row = ARGBToRGB24Row_MMI; >+ } >+ } >+#endif > > for (y = 0; y < height; ++y) { > ARGBToRGB24Row(src_argb, dst_rgb24, width); >@@ -937,6 +1099,14 @@ int ARGBToRAW(const uint8_t* src_argb, > } > } > #endif >+#if defined(HAS_ARGBTORAWROW_AVX2) >+ if (TestCpuFlag(kCpuHasAVX2)) { >+ ARGBToRAWRow = ARGBToRAWRow_Any_AVX2; >+ if (IS_ALIGNED(width, 32)) { >+ ARGBToRAWRow = ARGBToRAWRow_AVX2; >+ } >+ } >+#endif > #if defined(HAS_ARGBTORAWROW_NEON) > if (TestCpuFlag(kCpuHasNEON)) { > ARGBToRAWRow = ARGBToRAWRow_Any_NEON; >@@ -953,6 +1123,14 @@ int ARGBToRAW(const uint8_t* src_argb, > } > } > #endif >+#if defined(HAS_ARGBTORAWROW_MMI) >+ if (TestCpuFlag(kCpuHasMMI)) { >+ ARGBToRAWRow = ARGBToRAWRow_Any_MMI; >+ if (IS_ALIGNED(width, 4)) { >+ ARGBToRAWRow = ARGBToRAWRow_MMI; >+ } >+ } >+#endif > > for (y = 0; y < height; ++y) { > ARGBToRAWRow(src_argb, dst_raw, width); >@@ -1023,11 +1201,19 @@ int ARGBToRGB565Dither(const uint8_t* src_argb, > } > } > #endif >+#if defined(HAS_ARGBTORGB565DITHERROW_MMI) >+ if (TestCpuFlag(kCpuHasMMI)) { >+ ARGBToRGB565DitherRow = ARGBToRGB565DitherRow_Any_MMI; >+ if (IS_ALIGNED(width, 4)) { >+ ARGBToRGB565DitherRow = ARGBToRGB565DitherRow_MMI; >+ } >+ } >+#endif > > for (y = 0; y < height; ++y) { > ARGBToRGB565DitherRow(src_argb, dst_rgb565, >- *(uint32_t*)(dither4x4 + ((y & 3) << 2)), // NOLINT >- width); /* NOLINT */ >+ *(const uint32_t*)(dither4x4 + ((y & 3) << 2)), >+ width); > src_argb += src_stride_argb; > dst_rgb565 += dst_stride_rgb565; > } >@@ -1092,6 +1278,14 @@ int ARGBToRGB565(const uint8_t* src_argb, > } > } > #endif >+#if defined(HAS_ARGBTORGB565ROW_MMI) >+ if (TestCpuFlag(kCpuHasMMI)) { >+ ARGBToRGB565Row = ARGBToRGB565Row_Any_MMI; >+ if (IS_ALIGNED(width, 4)) { >+ ARGBToRGB565Row = ARGBToRGB565Row_MMI; >+ } >+ } >+#endif > > for (y = 0; y < height; ++y) { > ARGBToRGB565Row(src_argb, dst_rgb565, width); >@@ -1158,6 +1352,14 @@ int ARGBToARGB1555(const uint8_t* src_argb, > } > } > #endif >+#if defined(HAS_ARGBTOARGB1555ROW_MMI) >+ if (TestCpuFlag(kCpuHasMMI)) { >+ ARGBToARGB1555Row = ARGBToARGB1555Row_Any_MMI; >+ if (IS_ALIGNED(width, 4)) { >+ ARGBToARGB1555Row = ARGBToARGB1555Row_MMI; >+ } >+ } >+#endif > > for (y = 0; y < height; ++y) { > ARGBToARGB1555Row(src_argb, dst_argb1555, width); >@@ -1224,6 +1426,14 @@ int ARGBToARGB4444(const uint8_t* src_argb, > } > } > #endif >+#if defined(HAS_ARGBTOARGB4444ROW_MMI) >+ if (TestCpuFlag(kCpuHasMMI)) { >+ ARGBToARGB4444Row = ARGBToARGB4444Row_Any_MMI; >+ if (IS_ALIGNED(width, 4)) { >+ ARGBToARGB4444Row = ARGBToARGB4444Row_MMI; >+ } >+ } >+#endif > > for (y = 0; y < height; ++y) { > ARGBToARGB4444Row(src_argb, dst_argb4444, width); >@@ -1400,6 +1610,14 @@ int ARGBToJ420(const uint8_t* src_argb, > } > } > #endif >+#if defined(HAS_ARGBTOYJROW_MMI) >+ if (TestCpuFlag(kCpuHasMMI)) { >+ ARGBToYJRow = ARGBToYJRow_Any_MMI; >+ if (IS_ALIGNED(width, 8)) { >+ ARGBToYJRow = ARGBToYJRow_MMI; >+ } >+ } >+#endif > #if defined(HAS_ARGBTOUVJROW_MSA) > if (TestCpuFlag(kCpuHasMSA)) { > ARGBToUVJRow = ARGBToUVJRow_Any_MSA; >@@ -1408,6 +1626,14 @@ int ARGBToJ420(const uint8_t* src_argb, > } > } > #endif >+#if defined(HAS_ARGBTOUVJROW_MMI) >+ if (TestCpuFlag(kCpuHasMMI)) { >+ ARGBToUVJRow = ARGBToUVJRow_Any_MMI; >+ if (IS_ALIGNED(width, 16)) { >+ ARGBToUVJRow = ARGBToUVJRow_MMI; >+ } >+ } >+#endif > > for (y = 0; y < height - 1; y += 2) { > ARGBToUVJRow(src_argb, src_stride_argb, dst_u, dst_v, width); >@@ -1501,6 +1727,14 @@ int ARGBToJ422(const uint8_t* src_argb, > } > } > #endif >+#if defined(HAS_ARGBTOYJROW_MMI) >+ if (TestCpuFlag(kCpuHasMMI)) { >+ ARGBToYJRow = ARGBToYJRow_Any_MMI; >+ if (IS_ALIGNED(width, 8)) { >+ ARGBToYJRow = ARGBToYJRow_MMI; >+ } >+ } >+#endif > #if defined(HAS_ARGBTOUVJROW_MSA) > if (TestCpuFlag(kCpuHasMSA)) { > ARGBToUVJRow = ARGBToUVJRow_Any_MSA; >@@ -1509,6 +1743,14 @@ int ARGBToJ422(const uint8_t* src_argb, > } > } > #endif >+#if defined(HAS_ARGBTOUVJROW_MMI) >+ if (TestCpuFlag(kCpuHasMMI)) { >+ ARGBToUVJRow = ARGBToUVJRow_Any_MMI; >+ if (IS_ALIGNED(width, 16)) { >+ ARGBToUVJRow = ARGBToUVJRow_MMI; >+ } >+ } >+#endif > > for (y = 0; y < height; ++y) { > ARGBToUVJRow(src_argb, 0, dst_u, dst_v, width); >@@ -1578,6 +1820,14 @@ int ARGBToJ400(const uint8_t* src_argb, > } > } > #endif >+#if defined(HAS_ARGBTOYJROW_MMI) >+ if (TestCpuFlag(kCpuHasMMI)) { >+ ARGBToYJRow = ARGBToYJRow_Any_MMI; >+ if (IS_ALIGNED(width, 8)) { >+ ARGBToYJRow = ARGBToYJRow_MMI; >+ } >+ } >+#endif > > for (y = 0; y < height; ++y) { > ARGBToYJRow(src_argb, dst_yj, width); >diff --git a/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/source/convert_jpeg.cc b/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/source/convert_jpeg.cc >index ae3cc18cd247c2fe976ef7d7ebe48b8da67a6727..56a95c5770787accf1e76f66b0e9e3e5bd385133 100644 >--- a/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/source/convert_jpeg.cc >+++ b/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/source/convert_jpeg.cc >@@ -190,7 +190,144 @@ int MJPGToI420(const uint8_t* sample, > return ret ? 0 : 1; > } > >-#ifdef HAVE_JPEG >+struct NV21Buffers { >+ uint8_t* y; >+ int y_stride; >+ uint8_t* vu; >+ int vu_stride; >+ int w; >+ int h; >+}; >+ >+static void JpegI420ToNV21(void* opaque, >+ const uint8_t* const* data, >+ const int* strides, >+ int rows) { >+ NV21Buffers* dest = (NV21Buffers*)(opaque); >+ I420ToNV21(data[0], strides[0], data[1], strides[1], data[2], strides[2], >+ dest->y, dest->y_stride, dest->vu, dest->vu_stride, dest->w, rows); >+ dest->y += rows * dest->y_stride; >+ dest->vu += ((rows + 1) >> 1) * dest->vu_stride; >+ dest->h -= rows; >+} >+ >+static void JpegI422ToNV21(void* opaque, >+ const uint8_t* const* data, >+ const int* strides, >+ int rows) { >+ NV21Buffers* dest = (NV21Buffers*)(opaque); >+ I422ToNV21(data[0], strides[0], data[1], strides[1], data[2], strides[2], >+ dest->y, dest->y_stride, dest->vu, dest->vu_stride, dest->w, rows); >+ dest->y += rows * dest->y_stride; >+ dest->vu += ((rows + 1) >> 1) * dest->vu_stride; >+ dest->h -= rows; >+} >+ >+static void JpegI444ToNV21(void* opaque, >+ const uint8_t* const* data, >+ const int* strides, >+ int rows) { >+ NV21Buffers* dest = (NV21Buffers*)(opaque); >+ I444ToNV21(data[0], strides[0], data[1], strides[1], data[2], strides[2], >+ dest->y, dest->y_stride, dest->vu, dest->vu_stride, dest->w, rows); >+ dest->y += rows * dest->y_stride; >+ dest->vu += ((rows + 1) >> 1) * dest->vu_stride; >+ dest->h -= rows; >+} >+ >+static void JpegI400ToNV21(void* opaque, >+ const uint8_t* const* data, >+ const int* strides, >+ int rows) { >+ NV21Buffers* dest = (NV21Buffers*)(opaque); >+ I400ToNV21(data[0], strides[0], dest->y, dest->y_stride, dest->vu, >+ dest->vu_stride, dest->w, rows); >+ dest->y += rows * dest->y_stride; >+ dest->vu += ((rows + 1) >> 1) * dest->vu_stride; >+ dest->h -= rows; >+} >+ >+// MJPG (Motion JPeg) to NV21 >+LIBYUV_API >+int MJPGToNV21(const uint8_t* sample, >+ size_t sample_size, >+ uint8_t* dst_y, >+ int dst_stride_y, >+ uint8_t* dst_vu, >+ int dst_stride_vu, >+ int src_width, >+ int src_height, >+ int dst_width, >+ int dst_height) { >+ if (sample_size == kUnknownDataSize) { >+ // ERROR: MJPEG frame size unknown >+ return -1; >+ } >+ >+ // TODO(fbarchard): Port MJpeg to C. >+ MJpegDecoder mjpeg_decoder; >+ LIBYUV_BOOL ret = mjpeg_decoder.LoadFrame(sample, sample_size); >+ if (ret && (mjpeg_decoder.GetWidth() != src_width || >+ mjpeg_decoder.GetHeight() != src_height)) { >+ // ERROR: MJPEG frame has unexpected dimensions >+ mjpeg_decoder.UnloadFrame(); >+ return 1; // runtime failure >+ } >+ if (ret) { >+ NV21Buffers bufs = {dst_y, dst_stride_y, dst_vu, >+ dst_stride_vu, dst_width, dst_height}; >+ // YUV420 >+ if (mjpeg_decoder.GetColorSpace() == MJpegDecoder::kColorSpaceYCbCr && >+ mjpeg_decoder.GetNumComponents() == 3 && >+ mjpeg_decoder.GetVertSampFactor(0) == 2 && >+ mjpeg_decoder.GetHorizSampFactor(0) == 2 && >+ mjpeg_decoder.GetVertSampFactor(1) == 1 && >+ mjpeg_decoder.GetHorizSampFactor(1) == 1 && >+ mjpeg_decoder.GetVertSampFactor(2) == 1 && >+ mjpeg_decoder.GetHorizSampFactor(2) == 1) { >+ ret = mjpeg_decoder.DecodeToCallback(&JpegI420ToNV21, &bufs, dst_width, >+ dst_height); >+ // YUV422 >+ } else if (mjpeg_decoder.GetColorSpace() == >+ MJpegDecoder::kColorSpaceYCbCr && >+ mjpeg_decoder.GetNumComponents() == 3 && >+ mjpeg_decoder.GetVertSampFactor(0) == 1 && >+ mjpeg_decoder.GetHorizSampFactor(0) == 2 && >+ mjpeg_decoder.GetVertSampFactor(1) == 1 && >+ mjpeg_decoder.GetHorizSampFactor(1) == 1 && >+ mjpeg_decoder.GetVertSampFactor(2) == 1 && >+ mjpeg_decoder.GetHorizSampFactor(2) == 1) { >+ ret = mjpeg_decoder.DecodeToCallback(&JpegI422ToNV21, &bufs, dst_width, >+ dst_height); >+ // YUV444 >+ } else if (mjpeg_decoder.GetColorSpace() == >+ MJpegDecoder::kColorSpaceYCbCr && >+ mjpeg_decoder.GetNumComponents() == 3 && >+ mjpeg_decoder.GetVertSampFactor(0) == 1 && >+ mjpeg_decoder.GetHorizSampFactor(0) == 1 && >+ mjpeg_decoder.GetVertSampFactor(1) == 1 && >+ mjpeg_decoder.GetHorizSampFactor(1) == 1 && >+ mjpeg_decoder.GetVertSampFactor(2) == 1 && >+ mjpeg_decoder.GetHorizSampFactor(2) == 1) { >+ ret = mjpeg_decoder.DecodeToCallback(&JpegI444ToNV21, &bufs, dst_width, >+ dst_height); >+ // YUV400 >+ } else if (mjpeg_decoder.GetColorSpace() == >+ MJpegDecoder::kColorSpaceGrayscale && >+ mjpeg_decoder.GetNumComponents() == 1 && >+ mjpeg_decoder.GetVertSampFactor(0) == 1 && >+ mjpeg_decoder.GetHorizSampFactor(0) == 1) { >+ ret = mjpeg_decoder.DecodeToCallback(&JpegI400ToNV21, &bufs, dst_width, >+ dst_height); >+ } else { >+ // Unknown colorspace. >+ mjpeg_decoder.UnloadFrame(); >+ return 1; >+ } >+ } >+ return ret ? 0 : 1; >+} >+ > struct ARGBBuffers { > uint8_t* argb; > int argb_stride; >@@ -322,9 +459,8 @@ int MJPGToARGB(const uint8_t* sample, > } > return ret ? 0 : 1; > } >-#endif > >-#endif >+#endif // HAVE_JPEG > > #ifdef __cplusplus > } // extern "C" >diff --git a/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/source/convert_to_argb.cc b/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/source/convert_to_argb.cc >index 677e5d56fcc98481af504b10a2708435a6cebf9d..bde1aa8891bc87a20182c0f31dde1ee4c50505fe 100644 >--- a/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/source/convert_to_argb.cc >+++ b/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/source/convert_to_argb.cc >@@ -28,6 +28,14 @@ extern "C" { > // src_height is used to compute location of planes, and indicate inversion > // sample_size is measured in bytes and is the size of the frame. > // With MJPEG it is the compressed size of the frame. >+ >+// TODO(fbarchard): Add the following: >+// H010ToARGB >+// I010ToARGB >+// J400ToARGB >+// J422ToARGB >+// J444ToARGB >+ > LIBYUV_API > int ConvertToARGB(const uint8_t* sample, > size_t sample_size, >@@ -123,6 +131,16 @@ int ConvertToARGB(const uint8_t* sample, > r = RGBAToARGB(src, src_width * 4, dst_argb, dst_stride_argb, crop_width, > inv_crop_height); > break; >+ case FOURCC_AR30: >+ src = sample + (src_width * crop_y + crop_x) * 4; >+ r = AR30ToARGB(src, src_width * 4, dst_argb, dst_stride_argb, crop_width, >+ inv_crop_height); >+ break; >+ case FOURCC_AB30: >+ src = sample + (src_width * crop_y + crop_x) * 4; >+ r = AB30ToARGB(src, src_width * 4, dst_argb, dst_stride_argb, crop_width, >+ inv_crop_height); >+ break; > case FOURCC_RGBP: > src = sample + (src_width * crop_y + crop_x) * 2; > r = RGB565ToARGB(src, src_width * 2, dst_argb, dst_stride_argb, >@@ -147,13 +165,15 @@ int ConvertToARGB(const uint8_t* sample, > // Biplanar formats > case FOURCC_NV12: > src = sample + (src_width * crop_y + crop_x); >- src_uv = sample + aligned_src_width * (src_height + crop_y / 2) + crop_x; >+ src_uv = >+ sample + aligned_src_width * (abs_src_height + crop_y / 2) + crop_x; > r = NV12ToARGB(src, src_width, src_uv, aligned_src_width, dst_argb, > dst_stride_argb, crop_width, inv_crop_height); > break; > case FOURCC_NV21: > src = sample + (src_width * crop_y + crop_x); >- src_uv = sample + aligned_src_width * (src_height + crop_y / 2) + crop_x; >+ src_uv = >+ sample + aligned_src_width * (abs_src_height + crop_y / 2) + crop_x; > // Call NV12 but with u and v parameters swapped. > r = NV21ToARGB(src, src_width, src_uv, aligned_src_width, dst_argb, > dst_stride_argb, crop_width, inv_crop_height); >@@ -163,6 +183,7 @@ int ConvertToARGB(const uint8_t* sample, > r = M420ToARGB(src, src_width, dst_argb, dst_stride_argb, crop_width, > inv_crop_height); > break; >+ > // Triplanar formats > case FOURCC_I420: > case FOURCC_YV12: { >@@ -187,16 +208,27 @@ int ConvertToARGB(const uint8_t* sample, > break; > } > >- case FOURCC_J420: { >+ case FOURCC_H420: { >+ int halfwidth = (src_width + 1) / 2; >+ int halfheight = (abs_src_height + 1) / 2; > const uint8_t* src_y = sample + (src_width * crop_y + crop_x); >- const uint8_t* src_u; >- const uint8_t* src_v; >+ const uint8_t* src_u = sample + src_width * abs_src_height + >+ (halfwidth * crop_y + crop_x) / 2; >+ const uint8_t* src_v = sample + src_width * abs_src_height + >+ halfwidth * (halfheight + crop_y / 2) + crop_x / 2; >+ r = H420ToARGB(src_y, src_width, src_u, halfwidth, src_v, halfwidth, >+ dst_argb, dst_stride_argb, crop_width, inv_crop_height); >+ break; >+ } >+ >+ case FOURCC_J420: { > int halfwidth = (src_width + 1) / 2; > int halfheight = (abs_src_height + 1) / 2; >- src_u = sample + src_width * abs_src_height + >- (halfwidth * crop_y + crop_x) / 2; >- src_v = sample + src_width * abs_src_height + >- halfwidth * (halfheight + crop_y / 2) + crop_x / 2; >+ const uint8_t* src_y = sample + (src_width * crop_y + crop_x); >+ const uint8_t* src_u = sample + src_width * abs_src_height + >+ (halfwidth * crop_y + crop_x) / 2; >+ const uint8_t* src_v = sample + src_width * abs_src_height + >+ halfwidth * (halfheight + crop_y / 2) + crop_x / 2; > r = J420ToARGB(src_y, src_width, src_u, halfwidth, src_v, halfwidth, > dst_argb, dst_stride_argb, crop_width, inv_crop_height); > break; >@@ -204,10 +236,10 @@ int ConvertToARGB(const uint8_t* sample, > > case FOURCC_I422: > case FOURCC_YV16: { >+ int halfwidth = (src_width + 1) / 2; > const uint8_t* src_y = sample + src_width * crop_y + crop_x; > const uint8_t* src_u; > const uint8_t* src_v; >- int halfwidth = (src_width + 1) / 2; > if (format == FOURCC_YV16) { > src_v = sample + src_width * abs_src_height + halfwidth * crop_y + > crop_x / 2; >@@ -223,6 +255,19 @@ int ConvertToARGB(const uint8_t* sample, > dst_argb, dst_stride_argb, crop_width, inv_crop_height); > break; > } >+ >+ case FOURCC_H422: { >+ int halfwidth = (src_width + 1) / 2; >+ const uint8_t* src_y = sample + src_width * crop_y + crop_x; >+ const uint8_t* src_u = >+ sample + src_width * abs_src_height + halfwidth * crop_y + crop_x / 2; >+ const uint8_t* src_v = sample + src_width * abs_src_height + >+ halfwidth * (abs_src_height + crop_y) + crop_x / 2; >+ r = H422ToARGB(src_y, src_width, src_u, halfwidth, src_v, halfwidth, >+ dst_argb, dst_stride_argb, crop_width, inv_crop_height); >+ break; >+ } >+ > case FOURCC_I444: > case FOURCC_YV24: { > const uint8_t* src_y = sample + src_width * crop_y + crop_x; >diff --git a/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/source/convert_to_i420.cc b/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/source/convert_to_i420.cc >index 1bed9d6440db92a696b2167c88873963e4c29f05..584be0ac33aded539ebd4ea26fa43b002522e2dd 100644 >--- a/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/source/convert_to_i420.cc >+++ b/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/source/convert_to_i420.cc >@@ -155,6 +155,7 @@ int ConvertToI420(const uint8_t* sample, > dst_stride_u, dst_v, dst_stride_v, crop_width, > inv_crop_height); > break; >+ // TODO(fbarchard): Add AR30 and AB30 > case FOURCC_I400: > src = sample + src_width * crop_y + crop_x; > r = I400ToI420(src, src_width, dst_y, dst_stride_y, dst_u, dst_stride_u, >@@ -163,7 +164,7 @@ int ConvertToI420(const uint8_t* sample, > // Biplanar formats > case FOURCC_NV12: > src = sample + (src_width * crop_y + crop_x); >- src_uv = sample + (src_width * src_height) + >+ src_uv = sample + (src_width * abs_src_height) + > ((crop_y / 2) * aligned_src_width) + ((crop_x / 2) * 2); > r = NV12ToI420Rotate(src, src_width, src_uv, aligned_src_width, dst_y, > dst_stride_y, dst_u, dst_stride_u, dst_v, >@@ -171,7 +172,7 @@ int ConvertToI420(const uint8_t* sample, > break; > case FOURCC_NV21: > src = sample + (src_width * crop_y + crop_x); >- src_uv = sample + (src_width * src_height) + >+ src_uv = sample + (src_width * abs_src_height) + > ((crop_y / 2) * aligned_src_width) + ((crop_x / 2) * 2); > // Call NV12 but with dst_u and dst_v parameters swapped. > r = NV12ToI420Rotate(src, src_width, src_uv, aligned_src_width, dst_y, >@@ -192,15 +193,15 @@ int ConvertToI420(const uint8_t* sample, > int halfwidth = (src_width + 1) / 2; > int halfheight = (abs_src_height + 1) / 2; > if (format == FOURCC_YV12) { >- src_v = sample + src_width * abs_src_height + >- (halfwidth * crop_y + crop_x) / 2; >+ src_v = sample + src_width * abs_src_height + halfwidth * (crop_y / 2) + >+ (crop_x / 2); > src_u = sample + src_width * abs_src_height + >- halfwidth * (halfheight + crop_y / 2) + crop_x / 2; >+ halfwidth * (halfheight + (crop_y / 2)) + (crop_x / 2); > } else { >- src_u = sample + src_width * abs_src_height + >- (halfwidth * crop_y + crop_x) / 2; >+ src_u = sample + src_width * abs_src_height + halfwidth * (crop_y / 2) + >+ (crop_x / 2); > src_v = sample + src_width * abs_src_height + >- halfwidth * (halfheight + crop_y / 2) + crop_x / 2; >+ halfwidth * (halfheight + (crop_y / 2)) + (crop_x / 2); > } > r = I420Rotate(src_y, src_width, src_u, halfwidth, src_v, halfwidth, > dst_y, dst_stride_y, dst_u, dst_stride_u, dst_v, >@@ -215,14 +216,14 @@ int ConvertToI420(const uint8_t* sample, > int halfwidth = (src_width + 1) / 2; > if (format == FOURCC_YV16) { > src_v = sample + src_width * abs_src_height + halfwidth * crop_y + >- crop_x / 2; >+ (crop_x / 2); > src_u = sample + src_width * abs_src_height + >- halfwidth * (abs_src_height + crop_y) + crop_x / 2; >+ halfwidth * (abs_src_height + crop_y) + (crop_x / 2); > } else { > src_u = sample + src_width * abs_src_height + halfwidth * crop_y + >- crop_x / 2; >+ (crop_x / 2); > src_v = sample + src_width * abs_src_height + >- halfwidth * (abs_src_height + crop_y) + crop_x / 2; >+ halfwidth * (abs_src_height + crop_y) + (crop_x / 2); > } > r = I422ToI420(src_y, src_width, src_u, halfwidth, src_v, halfwidth, > dst_y, dst_stride_y, dst_u, dst_stride_u, dst_v, >diff --git a/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/source/cpu_id.cc b/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/source/cpu_id.cc >index 446aad12078b4e4a963ff58335efb8018c32c713..48e2b61526d1325136d738425b1f34993d7352d9 100644 >--- a/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/source/cpu_id.cc >+++ b/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/source/cpu_id.cc >@@ -19,10 +19,6 @@ > #include <immintrin.h> // For _xgetbv() > #endif > >-#if !defined(__native_client__) >-#include <stdlib.h> // For getenv() >-#endif >- > // For ArmCpuCaps() but unittested on all platforms > #include <stdio.h> > #include <string.h> >@@ -177,6 +173,9 @@ LIBYUV_API SAFEBUFFERS int MipsCpuCaps(const char* cpuinfo_name, > if (strcmp(ase, " msa") == 0) { > return kCpuHasMSA; > } >+ if (strcmp(ase, " mmi") == 0) { >+ return kCpuHasMMI; >+ } > return 0; > } > while (fgets(cpuinfo_line, sizeof(cpuinfo_line) - 1, f)) { >@@ -189,31 +188,21 @@ LIBYUV_API SAFEBUFFERS int MipsCpuCaps(const char* cpuinfo_name, > } > return 0; > } >+ } else if (memcmp(cpuinfo_line, "cpu model", 9) == 0) { >+ char* p = strstr(cpuinfo_line, "Loongson-3"); >+ if (p) { >+ fclose(f); >+ if (strcmp(ase, " mmi") == 0) { >+ return kCpuHasMMI; >+ } >+ return 0; >+ } > } > } > fclose(f); > return 0; > } > >-// Test environment variable for disabling CPU features. Any non-zero value >-// to disable. Zero ignored to make it easy to set the variable on/off. >-#if !defined(__native_client__) && !defined(_M_ARM) >- >-static LIBYUV_BOOL TestEnv(const char* name) { >- const char* var = getenv(name); >- if (var) { >- if (var[0] != '0') { >- return LIBYUV_TRUE; >- } >- } >- return LIBYUV_FALSE; >-} >-#else // nacl does not support getenv(). >-static LIBYUV_BOOL TestEnv(const char*) { >- return LIBYUV_FALSE; >-} >-#endif >- > static SAFEBUFFERS int GetCpuFlags(void) { > int cpu_info = 0; > #if !defined(__pnacl__) && !defined(__CLR_VER) && \ >@@ -251,52 +240,14 @@ static SAFEBUFFERS int GetCpuFlags(void) { > cpu_info |= (cpu_info7[2] & 0x00000100) ? kCpuHasGFNI : 0; > } > } >- >- // TODO(fbarchard): Consider moving these to gtest >- // Environment variable overrides for testing. >- if (TestEnv("LIBYUV_DISABLE_X86")) { >- cpu_info &= ~kCpuHasX86; >- } >- if (TestEnv("LIBYUV_DISABLE_SSE2")) { >- cpu_info &= ~kCpuHasSSE2; >- } >- if (TestEnv("LIBYUV_DISABLE_SSSE3")) { >- cpu_info &= ~kCpuHasSSSE3; >- } >- if (TestEnv("LIBYUV_DISABLE_SSE41")) { >- cpu_info &= ~kCpuHasSSE41; >- } >- if (TestEnv("LIBYUV_DISABLE_SSE42")) { >- cpu_info &= ~kCpuHasSSE42; >- } >- if (TestEnv("LIBYUV_DISABLE_AVX")) { >- cpu_info &= ~kCpuHasAVX; >- } >- if (TestEnv("LIBYUV_DISABLE_AVX2")) { >- cpu_info &= ~kCpuHasAVX2; >- } >- if (TestEnv("LIBYUV_DISABLE_ERMS")) { >- cpu_info &= ~kCpuHasERMS; >- } >- if (TestEnv("LIBYUV_DISABLE_FMA3")) { >- cpu_info &= ~kCpuHasFMA3; >- } >- if (TestEnv("LIBYUV_DISABLE_F16C")) { >- cpu_info &= ~kCpuHasF16C; >- } >- if (TestEnv("LIBYUV_DISABLE_AVX512BW")) { >- cpu_info &= ~kCpuHasAVX512BW; >- } >- > #endif > #if defined(__mips__) && defined(__linux__) > #if defined(__mips_msa) > cpu_info = MipsCpuCaps("/proc/cpuinfo", " msa"); >+#elif defined(_MIPS_ARCH_LOONGSON3A) >+ cpu_info = MipsCpuCaps("/proc/cpuinfo", " mmi"); > #endif > cpu_info |= kCpuHasMIPS; >- if (getenv("LIBYUV_DISABLE_MSA")) { >- cpu_info &= ~kCpuHasMSA; >- } > #endif > #if defined(__arm__) || defined(__aarch64__) > // gcc -mfpu=neon defines __ARM_NEON__ >@@ -315,13 +266,7 @@ static SAFEBUFFERS int GetCpuFlags(void) { > cpu_info = ArmCpuCaps("/proc/cpuinfo"); > #endif > cpu_info |= kCpuHasARM; >- if (TestEnv("LIBYUV_DISABLE_NEON")) { >- cpu_info &= ~kCpuHasNEON; >- } > #endif // __arm__ >- if (TestEnv("LIBYUV_DISABLE_ASM")) { >- cpu_info = 0; >- } > cpu_info |= kCpuInitialized; > return cpu_info; > } >@@ -330,11 +275,7 @@ static SAFEBUFFERS int GetCpuFlags(void) { > LIBYUV_API > int MaskCpuFlags(int enable_flags) { > int cpu_info = GetCpuFlags() & enable_flags; >-#ifdef __ATOMIC_RELAXED >- __atomic_store_n(&cpu_info_, cpu_info, __ATOMIC_RELAXED); >-#else >- cpu_info_ = cpu_info; >-#endif >+ SetCpuFlags(cpu_info); > return cpu_info; > } > >diff --git a/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/source/planar_functions.cc b/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/source/planar_functions.cc >index 5eae3f763a7290aa83b0031aa6725bbb367a6f18..b49bf0a0b4b6e34cf8464db6e4ce1da3318a43ec 100644 >--- a/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/source/planar_functions.cc >+++ b/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/source/planar_functions.cc >@@ -410,6 +410,14 @@ void SplitUVPlane(const uint8_t* src_uv, > } > } > #endif >+#if defined(HAS_SPLITUVROW_MMI) >+ if (TestCpuFlag(kCpuHasMMI)) { >+ SplitUVRow = SplitUVRow_Any_MMI; >+ if (IS_ALIGNED(width, 8)) { >+ SplitUVRow = SplitUVRow_MMI; >+ } >+ } >+#endif > > for (y = 0; y < height; ++y) { > // Copy a row of UV. >@@ -478,6 +486,14 @@ void MergeUVPlane(const uint8_t* src_u, > } > } > #endif >+#if defined(HAS_MERGEUVROW_MMI) >+ if (TestCpuFlag(kCpuHasMMI)) { >+ MergeUVRow = MergeUVRow_Any_MMI; >+ if (IS_ALIGNED(width, 8)) { >+ MergeUVRow = MergeUVRow_MMI; >+ } >+ } >+#endif > > for (y = 0; y < height; ++y) { > // Merge a row of U and V into a row of UV. >@@ -537,6 +553,14 @@ void SplitRGBPlane(const uint8_t* src_rgb, > } > } > #endif >+#if defined(HAS_SPLITRGBROW_MMI) >+ if (TestCpuFlag(kCpuHasMMI)) { >+ SplitRGBRow = SplitRGBRow_Any_MMI; >+ if (IS_ALIGNED(width, 4)) { >+ SplitRGBRow = SplitRGBRow_MMI; >+ } >+ } >+#endif > > for (y = 0; y < height; ++y) { > // Copy a row of RGB. >@@ -593,6 +617,14 @@ void MergeRGBPlane(const uint8_t* src_r, > } > } > #endif >+#if defined(HAS_MERGERGBROW_MMI) >+ if (TestCpuFlag(kCpuHasMMI)) { >+ MergeRGBRow = MergeRGBRow_Any_MMI; >+ if (IS_ALIGNED(width, 8)) { >+ MergeRGBRow = MergeRGBRow_MMI; >+ } >+ } >+#endif > > for (y = 0; y < height; ++y) { > // Merge a row of U and V into a row of RGB. >@@ -651,6 +683,14 @@ void MirrorPlane(const uint8_t* src_y, > } > } > #endif >+#if defined(HAS_MIRRORROW_MMI) >+ if (TestCpuFlag(kCpuHasMMI)) { >+ MirrorRow = MirrorRow_Any_MMI; >+ if (IS_ALIGNED(width, 8)) { >+ MirrorRow = MirrorRow_MMI; >+ } >+ } >+#endif > > // Mirror plane > for (y = 0; y < height; ++y) { >@@ -734,6 +774,16 @@ int YUY2ToI422(const uint8_t* src_yuy2, > } > } > #endif >+#if defined(HAS_YUY2TOYROW_MMI) >+ if (TestCpuFlag(kCpuHasMMI)) { >+ YUY2ToYRow = YUY2ToYRow_Any_MMI; >+ YUY2ToUV422Row = YUY2ToUV422Row_Any_MMI; >+ if (IS_ALIGNED(width, 8)) { >+ YUY2ToYRow = YUY2ToYRow_MMI; >+ YUY2ToUV422Row = YUY2ToUV422Row_MMI; >+ } >+ } >+#endif > > for (y = 0; y < height; ++y) { > YUY2ToUV422Row(src_yuy2, dst_u, dst_v, width); >@@ -820,6 +870,16 @@ int UYVYToI422(const uint8_t* src_uyvy, > } > } > #endif >+#if defined(HAS_UYVYTOYROW_MMI) >+ if (TestCpuFlag(kCpuHasMMI)) { >+ UYVYToYRow = UYVYToYRow_Any_MMI; >+ UYVYToUV422Row = UYVYToUV422Row_Any_MMI; >+ if (IS_ALIGNED(width, 16)) { >+ UYVYToYRow = UYVYToYRow_MMI; >+ UYVYToUV422Row = UYVYToUV422Row_MMI; >+ } >+ } >+#endif > > for (y = 0; y < height; ++y) { > UYVYToUV422Row(src_uyvy, dst_u, dst_v, width); >@@ -890,6 +950,14 @@ int YUY2ToY(const uint8_t* src_yuy2, > } > } > #endif >+#if defined(HAS_YUY2TOYROW_MMI) >+ if (TestCpuFlag(kCpuHasMMI)) { >+ YUY2ToYRow = YUY2ToYRow_Any_MMI; >+ if (IS_ALIGNED(width, 8)) { >+ YUY2ToYRow = YUY2ToYRow_MMI; >+ } >+ } >+#endif > > for (y = 0; y < height; ++y) { > YUY2ToYRow(src_yuy2, dst_y, width); >@@ -1015,6 +1083,14 @@ int ARGBMirror(const uint8_t* src_argb, > } > } > #endif >+#if defined(HAS_ARGBMIRRORROW_MMI) >+ if (TestCpuFlag(kCpuHasMMI)) { >+ ARGBMirrorRow = ARGBMirrorRow_Any_MMI; >+ if (IS_ALIGNED(width, 2)) { >+ ARGBMirrorRow = ARGBMirrorRow_MMI; >+ } >+ } >+#endif > > // Mirror plane > for (y = 0; y < height; ++y) { >@@ -1047,6 +1123,11 @@ ARGBBlendRow GetARGBBlend() { > if (TestCpuFlag(kCpuHasMSA)) { > ARGBBlendRow = ARGBBlendRow_MSA; > } >+#endif >+#if defined(HAS_ARGBBLENDROW_MMI) >+ if (TestCpuFlag(kCpuHasMMI)) { >+ ARGBBlendRow = ARGBBlendRow_MMI; >+ } > #endif > return ARGBBlendRow; > } >@@ -1140,6 +1221,14 @@ int BlendPlane(const uint8_t* src_y0, > } > } > #endif >+#if defined(HAS_BLENDPLANEROW_MMI) >+ if (TestCpuFlag(kCpuHasMMI)) { >+ BlendPlaneRow = BlendPlaneRow_Any_MMI; >+ if (IS_ALIGNED(width, 8)) { >+ BlendPlaneRow = BlendPlaneRow_MMI; >+ } >+ } >+#endif > > for (y = 0; y < height; ++y) { > BlendPlaneRow(src_y0, src_y1, alpha, dst_y, width); >@@ -1215,6 +1304,14 @@ int I420Blend(const uint8_t* src_y0, > BlendPlaneRow = BlendPlaneRow_AVX2; > } > } >+#endif >+#if defined(HAS_BLENDPLANEROW_MMI) >+ if (TestCpuFlag(kCpuHasMMI)) { >+ BlendPlaneRow = BlendPlaneRow_Any_MMI; >+ if (IS_ALIGNED(halfwidth, 8)) { >+ BlendPlaneRow = BlendPlaneRow_MMI; >+ } >+ } > #endif > if (!IS_ALIGNED(width, 2)) { > ScaleRowDown2 = ScaleRowDown2Box_Odd_C; >@@ -1252,6 +1349,17 @@ int I420Blend(const uint8_t* src_y0, > } > } > #endif >+#if defined(HAS_SCALEROWDOWN2_MMI) >+ if (TestCpuFlag(kCpuHasMMI)) { >+ ScaleRowDown2 = ScaleRowDown2Box_Odd_MMI; >+ if (IS_ALIGNED(width, 2)) { >+ ScaleRowDown2 = ScaleRowDown2Box_Any_MMI; >+ if (IS_ALIGNED(halfwidth, 8)) { >+ ScaleRowDown2 = ScaleRowDown2Box_MMI; >+ } >+ } >+ } >+#endif > > // Row buffer for intermediate alpha pixels. > align_buffer_64(halfalpha, halfwidth); >@@ -1337,6 +1445,14 @@ int ARGBMultiply(const uint8_t* src_argb0, > } > } > #endif >+#if defined(HAS_ARGBMULTIPLYROW_MMI) >+ if (TestCpuFlag(kCpuHasMMI)) { >+ ARGBMultiplyRow = ARGBMultiplyRow_Any_MMI; >+ if (IS_ALIGNED(width, 2)) { >+ ARGBMultiplyRow = ARGBMultiplyRow_MMI; >+ } >+ } >+#endif > > // Multiply plane > for (y = 0; y < height; ++y) { >@@ -1414,6 +1530,14 @@ int ARGBAdd(const uint8_t* src_argb0, > } > } > #endif >+#if defined(HAS_ARGBADDROW_MMI) >+ if (TestCpuFlag(kCpuHasMMI)) { >+ ARGBAddRow = ARGBAddRow_Any_MMI; >+ if (IS_ALIGNED(width, 2)) { >+ ARGBAddRow = ARGBAddRow_MMI; >+ } >+ } >+#endif > > // Add plane > for (y = 0; y < height; ++y) { >@@ -1486,6 +1610,14 @@ int ARGBSubtract(const uint8_t* src_argb0, > } > } > #endif >+#if defined(HAS_ARGBSUBTRACTROW_MMI) >+ if (TestCpuFlag(kCpuHasMMI)) { >+ ARGBSubtractRow = ARGBSubtractRow_Any_MMI; >+ if (IS_ALIGNED(width, 2)) { >+ ARGBSubtractRow = ARGBSubtractRow_MMI; >+ } >+ } >+#endif > > // Subtract plane > for (y = 0; y < height; ++y) { >@@ -1718,6 +1850,14 @@ int RAWToRGB24(const uint8_t* src_raw, > } > } > #endif >+#if defined(HAS_RAWTORGB24ROW_MMI) >+ if (TestCpuFlag(kCpuHasMMI)) { >+ RAWToRGB24Row = RAWToRGB24Row_Any_MMI; >+ if (IS_ALIGNED(width, 4)) { >+ RAWToRGB24Row = RAWToRGB24Row_MMI; >+ } >+ } >+#endif > > for (y = 0; y < height; ++y) { > RAWToRGB24Row(src_raw, dst_rgb24, width); >@@ -1939,6 +2079,14 @@ int ARGBAttenuate(const uint8_t* src_argb, > } > } > #endif >+#if defined(HAS_ARGBATTENUATEROW_MMI) >+ if (TestCpuFlag(kCpuHasMMI)) { >+ ARGBAttenuateRow = ARGBAttenuateRow_Any_MMI; >+ if (IS_ALIGNED(width, 2)) { >+ ARGBAttenuateRow = ARGBAttenuateRow_MMI; >+ } >+ } >+#endif > > for (y = 0; y < height; ++y) { > ARGBAttenuateRow(src_argb, dst_argb, width); >@@ -2039,6 +2187,11 @@ int ARGBGrayTo(const uint8_t* src_argb, > ARGBGrayRow = ARGBGrayRow_MSA; > } > #endif >+#if defined(HAS_ARGBGRAYROW_MMI) >+ if (TestCpuFlag(kCpuHasMMI) && IS_ALIGNED(width, 2)) { >+ ARGBGrayRow = ARGBGrayRow_MMI; >+ } >+#endif > > for (y = 0; y < height; ++y) { > ARGBGrayRow(src_argb, dst_argb, width); >@@ -2084,6 +2237,11 @@ int ARGBGray(uint8_t* dst_argb, > ARGBGrayRow = ARGBGrayRow_MSA; > } > #endif >+#if defined(HAS_ARGBGRAYROW_MMI) >+ if (TestCpuFlag(kCpuHasMMI) && IS_ALIGNED(width, 2)) { >+ ARGBGrayRow = ARGBGrayRow_MMI; >+ } >+#endif > > for (y = 0; y < height; ++y) { > ARGBGrayRow(dst, dst, width); >@@ -2127,6 +2285,11 @@ int ARGBSepia(uint8_t* dst_argb, > ARGBSepiaRow = ARGBSepiaRow_MSA; > } > #endif >+#if defined(HAS_ARGBSEPIAROW_MMI) >+ if (TestCpuFlag(kCpuHasMMI) && IS_ALIGNED(width, 2)) { >+ ARGBSepiaRow = ARGBSepiaRow_MMI; >+ } >+#endif > > for (y = 0; y < height; ++y) { > ARGBSepiaRow(dst, width); >@@ -2177,6 +2340,11 @@ int ARGBColorMatrix(const uint8_t* src_argb, > if (TestCpuFlag(kCpuHasMSA) && IS_ALIGNED(width, 8)) { > ARGBColorMatrixRow = ARGBColorMatrixRow_MSA; > } >+#endif >+#if defined(HAS_ARGBCOLORMATRIXROW_MMI) >+ if (TestCpuFlag(kCpuHasMMI) && IS_ALIGNED(width, 2)) { >+ ARGBColorMatrixRow = ARGBColorMatrixRow_MMI; >+ } > #endif > for (y = 0; y < height; ++y) { > ARGBColorMatrixRow(src_argb, dst_argb, matrix_argb, width); >@@ -2372,6 +2540,12 @@ int ARGBComputeCumulativeSum(const uint8_t* src_argb, > ComputeCumulativeSumRow = ComputeCumulativeSumRow_SSE2; > } > #endif >+#if defined(HAS_CUMULATIVESUMTOAVERAGEROW_MMI) >+ if (TestCpuFlag(kCpuHasMMI)) { >+ ComputeCumulativeSumRow = ComputeCumulativeSumRow_MMI; >+ } >+#endif >+ > memset(dst_cumsum, 0, width * sizeof(dst_cumsum[0]) * 4); // 4 int per pixel. > for (y = 0; y < height; ++y) { > ComputeCumulativeSumRow(src_argb, dst_cumsum, previous_cumsum, width); >@@ -2429,6 +2603,11 @@ int ARGBBlur(const uint8_t* src_argb, > ComputeCumulativeSumRow = ComputeCumulativeSumRow_SSE2; > CumulativeSumToAverageRow = CumulativeSumToAverageRow_SSE2; > } >+#endif >+#if defined(HAS_CUMULATIVESUMTOAVERAGEROW_MMI) >+ if (TestCpuFlag(kCpuHasMMI)) { >+ ComputeCumulativeSumRow = ComputeCumulativeSumRow_MMI; >+ } > #endif > // Compute enough CumulativeSum for first row to be blurred. After this > // one row of CumulativeSum is updated at a time. >@@ -2536,6 +2715,11 @@ int ARGBShade(const uint8_t* src_argb, > ARGBShadeRow = ARGBShadeRow_MSA; > } > #endif >+#if defined(HAS_ARGBSHADEROW_MMI) >+ if (TestCpuFlag(kCpuHasMMI) && IS_ALIGNED(width, 2)) { >+ ARGBShadeRow = ARGBShadeRow_MMI; >+ } >+#endif > > for (y = 0; y < height; ++y) { > ARGBShadeRow(src_argb, dst_argb, width, value); >@@ -2607,6 +2791,14 @@ int InterpolatePlane(const uint8_t* src0, > } > } > #endif >+#if defined(HAS_INTERPOLATEROW_MMI) >+ if (TestCpuFlag(kCpuHasMMI)) { >+ InterpolateRow = InterpolateRow_Any_MMI; >+ if (IS_ALIGNED(width, 8)) { >+ InterpolateRow = InterpolateRow_MMI; >+ } >+ } >+#endif > > for (y = 0; y < height; ++y) { > InterpolateRow(dst, src0, src1 - src0, width, interpolation); >@@ -2730,6 +2922,14 @@ int ARGBShuffle(const uint8_t* src_bgra, > } > } > #endif >+#if defined(HAS_ARGBSHUFFLEROW_MMI) >+ if (TestCpuFlag(kCpuHasMMI)) { >+ ARGBShuffleRow = ARGBShuffleRow_Any_MMI; >+ if (IS_ALIGNED(width, 2)) { >+ ARGBShuffleRow = ARGBShuffleRow_MMI; >+ } >+ } >+#endif > > for (y = 0; y < height; ++y) { > ARGBShuffleRow(src_bgra, dst_argb, shuffler, width); >@@ -2801,6 +3001,14 @@ static int ARGBSobelize(const uint8_t* src_argb, > } > } > #endif >+#if defined(HAS_ARGBTOYJROW_MMI) >+ if (TestCpuFlag(kCpuHasMMI)) { >+ ARGBToYJRow = ARGBToYJRow_Any_MMI; >+ if (IS_ALIGNED(width, 8)) { >+ ARGBToYJRow = ARGBToYJRow_MMI; >+ } >+ } >+#endif > > #if defined(HAS_SOBELYROW_SSE2) > if (TestCpuFlag(kCpuHasSSE2)) { >@@ -2817,6 +3025,11 @@ static int ARGBSobelize(const uint8_t* src_argb, > SobelYRow = SobelYRow_MSA; > } > #endif >+#if defined(HAS_SOBELYROW_MMI) >+ if (TestCpuFlag(kCpuHasMMI)) { >+ SobelYRow = SobelYRow_MMI; >+ } >+#endif > #if defined(HAS_SOBELXROW_SSE2) > if (TestCpuFlag(kCpuHasSSE2)) { > SobelXRow = SobelXRow_SSE2; >@@ -2831,6 +3044,11 @@ static int ARGBSobelize(const uint8_t* src_argb, > if (TestCpuFlag(kCpuHasMSA)) { > SobelXRow = SobelXRow_MSA; > } >+#endif >+#if defined(HAS_SOBELXROW_MMI) >+ if (TestCpuFlag(kCpuHasMMI)) { >+ SobelXRow = SobelXRow_MMI; >+ } > #endif > { > // 3 rows with edges before/after. >@@ -2913,6 +3131,14 @@ int ARGBSobel(const uint8_t* src_argb, > SobelRow = SobelRow_MSA; > } > } >+#endif >+#if defined(HAS_SOBELROW_MMI) >+ if (TestCpuFlag(kCpuHasMMI)) { >+ SobelRow = SobelRow_Any_MMI; >+ if (IS_ALIGNED(width, 8)) { >+ SobelRow = SobelRow_MMI; >+ } >+ } > #endif > return ARGBSobelize(src_argb, src_stride_argb, dst_argb, dst_stride_argb, > width, height, SobelRow); >@@ -2951,6 +3177,14 @@ int ARGBSobelToPlane(const uint8_t* src_argb, > SobelToPlaneRow = SobelToPlaneRow_MSA; > } > } >+#endif >+#if defined(HAS_SOBELTOPLANEROW_MMI) >+ if (TestCpuFlag(kCpuHasMMI)) { >+ SobelToPlaneRow = SobelToPlaneRow_Any_MMI; >+ if (IS_ALIGNED(width, 8)) { >+ SobelToPlaneRow = SobelToPlaneRow_MMI; >+ } >+ } > #endif > return ARGBSobelize(src_argb, src_stride_argb, dst_y, dst_stride_y, width, > height, SobelToPlaneRow); >@@ -2990,6 +3224,14 @@ int ARGBSobelXY(const uint8_t* src_argb, > SobelXYRow = SobelXYRow_MSA; > } > } >+#endif >+#if defined(HAS_SOBELXYROW_MMI) >+ if (TestCpuFlag(kCpuHasMMI)) { >+ SobelXYRow = SobelXYRow_Any_MMI; >+ if (IS_ALIGNED(width, 8)) { >+ SobelXYRow = SobelXYRow_MMI; >+ } >+ } > #endif > return ARGBSobelize(src_argb, src_stride_argb, dst_argb, dst_stride_argb, > width, height, SobelXYRow); >@@ -3228,6 +3470,14 @@ int ARGBCopyAlpha(const uint8_t* src_argb, > } > } > #endif >+#if defined(HAS_ARGBCOPYALPHAROW_MMI) >+ if (TestCpuFlag(kCpuHasMMI)) { >+ ARGBCopyAlphaRow = ARGBCopyAlphaRow_Any_MMI; >+ if (IS_ALIGNED(width, 2)) { >+ ARGBCopyAlphaRow = ARGBCopyAlphaRow_MMI; >+ } >+ } >+#endif > > for (y = 0; y < height; ++y) { > ARGBCopyAlphaRow(src_argb, dst_argb, width); >@@ -3286,6 +3536,12 @@ int ARGBExtractAlpha(const uint8_t* src_argb, > : ARGBExtractAlphaRow_Any_MSA; > } > #endif >+#if defined(HAS_ARGBEXTRACTALPHAROW_MMI) >+ if (TestCpuFlag(kCpuHasMMI)) { >+ ARGBExtractAlphaRow = IS_ALIGNED(width, 8) ? ARGBExtractAlphaRow_MMI >+ : ARGBExtractAlphaRow_Any_MMI; >+ } >+#endif > > for (int y = 0; y < height; ++y) { > ARGBExtractAlphaRow(src_argb, dst_a, width); >@@ -3337,6 +3593,14 @@ int ARGBCopyYToAlpha(const uint8_t* src_y, > } > } > #endif >+#if defined(HAS_ARGBCOPYYTOALPHAROW_MMI) >+ if (TestCpuFlag(kCpuHasMMI)) { >+ ARGBCopyYToAlphaRow = ARGBCopyYToAlphaRow_Any_MMI; >+ if (IS_ALIGNED(width, 8)) { >+ ARGBCopyYToAlphaRow = ARGBCopyYToAlphaRow_MMI; >+ } >+ } >+#endif > > for (y = 0; y < height; ++y) { > ARGBCopyYToAlphaRow(src_y, dst_argb, width); >@@ -3406,6 +3670,14 @@ int YUY2ToNV12(const uint8_t* src_yuy2, > } > } > #endif >+#if defined(HAS_SPLITUVROW_MMI) >+ if (TestCpuFlag(kCpuHasMMI)) { >+ SplitUVRow = SplitUVRow_Any_MMI; >+ if (IS_ALIGNED(width, 8)) { >+ SplitUVRow = SplitUVRow_MMI; >+ } >+ } >+#endif > #if defined(HAS_INTERPOLATEROW_SSSE3) > if (TestCpuFlag(kCpuHasSSSE3)) { > InterpolateRow = InterpolateRow_Any_SSSE3; >@@ -3438,6 +3710,14 @@ int YUY2ToNV12(const uint8_t* src_yuy2, > } > } > #endif >+#if defined(HAS_INTERPOLATEROW_MMI) >+ if (TestCpuFlag(kCpuHasMMI)) { >+ InterpolateRow = InterpolateRow_Any_MMI; >+ if (IS_ALIGNED(width, 8)) { >+ InterpolateRow = InterpolateRow_MMI; >+ } >+ } >+#endif > > { > int awidth = halfwidth * 2; >@@ -3522,6 +3802,14 @@ int UYVYToNV12(const uint8_t* src_uyvy, > } > } > #endif >+#if defined(HAS_SPLITUVROW_MMI) >+ if (TestCpuFlag(kCpuHasMMI)) { >+ SplitUVRow = SplitUVRow_Any_MMI; >+ if (IS_ALIGNED(width, 8)) { >+ SplitUVRow = SplitUVRow_MMI; >+ } >+ } >+#endif > #if defined(HAS_INTERPOLATEROW_SSSE3) > if (TestCpuFlag(kCpuHasSSSE3)) { > InterpolateRow = InterpolateRow_Any_SSSE3; >@@ -3554,6 +3842,14 @@ int UYVYToNV12(const uint8_t* src_uyvy, > } > } > #endif >+#if defined(HAS_INTERPOLATEROW_MMI) >+ if (TestCpuFlag(kCpuHasMMI)) { >+ InterpolateRow = InterpolateRow_Any_MMI; >+ if (IS_ALIGNED(width, 8)) { >+ InterpolateRow = InterpolateRow_MMI; >+ } >+ } >+#endif > > { > int awidth = halfwidth * 2; >diff --git a/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/source/rotate.cc b/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/source/rotate.cc >index f2bed85b7552f250d419ec2889c1920ba540298a..f28a06d38a9756674edfcab0f68cd405e33bd4c3 100644 >--- a/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/source/rotate.cc >+++ b/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/source/rotate.cc >@@ -49,6 +49,11 @@ void TransposePlane(const uint8_t* src, > } > } > #endif >+#if defined(HAS_TRANSPOSEWX8_MMI) >+ if (TestCpuFlag(kCpuHasMMI)) { >+ TransposeWx8 = TransposeWx8_MMI; >+ } >+#endif > #if defined(HAS_TRANSPOSEWX8_FAST_SSSE3) > if (TestCpuFlag(kCpuHasSSSE3)) { > TransposeWx8 = TransposeWx8_Fast_Any_SSSE3; >@@ -166,6 +171,14 @@ void RotatePlane180(const uint8_t* src, > } > } > #endif >+#if defined(HAS_MIRRORROW_MMI) >+ if (TestCpuFlag(kCpuHasMMI)) { >+ MirrorRow = MirrorRow_Any_MMI; >+ if (IS_ALIGNED(width, 8)) { >+ MirrorRow = MirrorRow_MMI; >+ } >+ } >+#endif > #if defined(HAS_COPYROW_SSE2) > if (TestCpuFlag(kCpuHasSSE2)) { > CopyRow = IS_ALIGNED(width, 32) ? CopyRow_SSE2 : CopyRow_Any_SSE2; >@@ -186,6 +199,11 @@ void RotatePlane180(const uint8_t* src, > CopyRow = IS_ALIGNED(width, 32) ? CopyRow_NEON : CopyRow_Any_NEON; > } > #endif >+#if defined(HAS_COPYROW_MMI) >+ if (TestCpuFlag(kCpuHasMMI)) { >+ CopyRow = IS_ALIGNED(width, 8) ? CopyRow_MMI : CopyRow_Any_MMI; >+ } >+#endif > > // Odd height will harmlessly mirror the middle row twice. > for (y = 0; y < half_height; ++y) { >@@ -232,6 +250,14 @@ void TransposeUV(const uint8_t* src, > } > } > #endif >+#if defined(HAS_TRANSPOSEUVWX8_MMI) >+ if (TestCpuFlag(kCpuHasMMI)) { >+ TransposeUVWx8 = TransposeUVWx8_Any_MMI; >+ if (IS_ALIGNED(width, 4)) { >+ TransposeUVWx8 = TransposeUVWx8_MMI; >+ } >+ } >+#endif > #if defined(HAS_TRANSPOSEUVWX16_MSA) > if (TestCpuFlag(kCpuHasMSA)) { > TransposeUVWx16 = TransposeUVWx16_Any_MSA; >@@ -331,6 +357,11 @@ void RotateUV180(const uint8_t* src, > MirrorUVRow = MirrorUVRow_MSA; > } > #endif >+#if defined(HAS_MIRRORUVROW_MMI) >+ if (TestCpuFlag(kCpuHasMMI) && IS_ALIGNED(width, 8)) { >+ MirrorUVRow = MirrorUVRow_MMI; >+ } >+#endif > > dst_a += dst_stride_a * (height - 1); > dst_b += dst_stride_b * (height - 1); >diff --git a/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/source/rotate_any.cc b/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/source/rotate_any.cc >index c2752e6222c02df6824aa8511fc3d0c5832de3a7..b3baf084d0c8281fa99801620a4724ece95235d6 100644 >--- a/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/source/rotate_any.cc >+++ b/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/source/rotate_any.cc >@@ -35,6 +35,9 @@ TANY(TransposeWx8_Any_NEON, TransposeWx8_NEON, 7) > #ifdef HAS_TRANSPOSEWX8_SSSE3 > TANY(TransposeWx8_Any_SSSE3, TransposeWx8_SSSE3, 7) > #endif >+#ifdef HAS_TRANSPOSEWX8_MMI >+TANY(TransposeWx8_Any_MMI, TransposeWx8_MMI, 7) >+#endif > #ifdef HAS_TRANSPOSEWX8_FAST_SSSE3 > TANY(TransposeWx8_Fast_Any_SSSE3, TransposeWx8_Fast_SSSE3, 15) > #endif >@@ -62,6 +65,9 @@ TUVANY(TransposeUVWx8_Any_NEON, TransposeUVWx8_NEON, 7) > #ifdef HAS_TRANSPOSEUVWX8_SSE2 > TUVANY(TransposeUVWx8_Any_SSE2, TransposeUVWx8_SSE2, 7) > #endif >+#ifdef HAS_TRANSPOSEUVWX8_MMI >+TUVANY(TransposeUVWx8_Any_MMI, TransposeUVWx8_MMI, 7) >+#endif > #ifdef HAS_TRANSPOSEUVWX16_MSA > TUVANY(TransposeUVWx16_Any_MSA, TransposeUVWx16_MSA, 7) > #endif >diff --git a/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/source/rotate_argb.cc b/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/source/rotate_argb.cc >index 5a6e05376f17690040900a2c49afb13344828f50..a93fd55f9214fa058c4bf53f9ae863a30a13596b 100644 >--- a/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/source/rotate_argb.cc >+++ b/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/source/rotate_argb.cc >@@ -56,6 +56,14 @@ static void ARGBTranspose(const uint8_t* src_argb, > } > } > #endif >+#if defined(HAS_SCALEARGBROWDOWNEVEN_MMI) >+ if (TestCpuFlag(kCpuHasMMI)) { >+ ScaleARGBRowDownEven = ScaleARGBRowDownEven_Any_MMI; >+ if (IS_ALIGNED(height, 4)) { // Width of dest. >+ ScaleARGBRowDownEven = ScaleARGBRowDownEven_MMI; >+ } >+ } >+#endif > > for (i = 0; i < width; ++i) { // column of source to row of dest. > ScaleARGBRowDownEven(src_argb, 0, src_pixel_step, dst_argb, height); >@@ -142,6 +150,14 @@ void ARGBRotate180(const uint8_t* src_argb, > } > } > #endif >+#if defined(HAS_ARGBMIRRORROW_MMI) >+ if (TestCpuFlag(kCpuHasMMI)) { >+ ARGBMirrorRow = ARGBMirrorRow_Any_MMI; >+ if (IS_ALIGNED(width, 2)) { >+ ARGBMirrorRow = ARGBMirrorRow_MMI; >+ } >+ } >+#endif > #if defined(HAS_COPYROW_SSE2) > if (TestCpuFlag(kCpuHasSSE2)) { > CopyRow = IS_ALIGNED(width * 4, 32) ? CopyRow_SSE2 : CopyRow_Any_SSE2; >diff --git a/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/source/rotate_mmi.cc b/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/source/rotate_mmi.cc >new file mode 100644 >index 0000000000000000000000000000000000000000..f8de60834d9cab7a33e0d2ed5205a2ab00972e19 >--- /dev/null >+++ b/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/source/rotate_mmi.cc >@@ -0,0 +1,291 @@ >+/* >+ * Copyright 2011 The LibYuv Project Authors. All rights reserved. >+ * >+ * Use of this source code is governed by a BSD-style license >+ * that can be found in the LICENSE file in the root of the source >+ * tree. An additional intellectual property rights grant can be found >+ * in the file PATENTS. All contributing project authors may >+ * be found in the AUTHORS file in the root of the source tree. >+ */ >+ >+#include "libyuv/rotate_row.h" >+#include "libyuv/row.h" >+ >+#ifdef __cplusplus >+namespace libyuv { >+extern "C" { >+#endif >+ >+// This module is for Mips MMI. >+#if !defined(LIBYUV_DISABLE_MMI) && defined(_MIPS_ARCH_LOONGSON3A) >+ >+void TransposeWx8_MMI(const uint8_t* src, >+ int src_stride, >+ uint8_t* dst, >+ int dst_stride, >+ int width) { >+ uint64_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6; >+ uint64_t tmp7, tmp8, tmp9, tmp10, tmp11, tmp12, tmp13; >+ uint8_t* src_tmp = nullptr; >+ >+ __asm__ volatile( >+ "1: \n\t" >+ "ldc1 %[tmp12], 0x00(%[src]) \n\t" >+ "dadd %[src_tmp], %[src], %[src_stride] \n\t" >+ "ldc1 %[tmp13], 0x00(%[src_tmp]) \n\t" >+ >+ /* tmp0 = (00 10 01 11 02 12 03 13) */ >+ "punpcklbh %[tmp0], %[tmp12], %[tmp13] \n\t" >+ /* tmp1 = (04 14 05 15 06 16 07 17) */ >+ "punpckhbh %[tmp1], %[tmp12], %[tmp13] \n\t" >+ >+ "dadd %[src_tmp], %[src_tmp], %[src_stride] \n\t" >+ "ldc1 %[tmp12], 0x00(%[src_tmp]) \n\t" >+ "dadd %[src_tmp], %[src_tmp], %[src_stride] \n\t" >+ "ldc1 %[tmp13], 0x00(%[src_tmp]) \n\t" >+ >+ /* tmp2 = (20 30 21 31 22 32 23 33) */ >+ "punpcklbh %[tmp2], %[tmp12], %[tmp13] \n\t" >+ /* tmp3 = (24 34 25 35 26 36 27 37) */ >+ "punpckhbh %[tmp3], %[tmp12], %[tmp13] \n\t" >+ >+ /* tmp4 = (00 10 20 30 01 11 21 31) */ >+ "punpcklhw %[tmp4], %[tmp0], %[tmp2] \n\t" >+ /* tmp5 = (02 12 22 32 03 13 23 33) */ >+ "punpckhhw %[tmp5], %[tmp0], %[tmp2] \n\t" >+ /* tmp6 = (04 14 24 34 05 15 25 35) */ >+ "punpcklhw %[tmp6], %[tmp1], %[tmp3] \n\t" >+ /* tmp7 = (06 16 26 36 07 17 27 37) */ >+ "punpckhhw %[tmp7], %[tmp1], %[tmp3] \n\t" >+ >+ "dadd %[src_tmp], %[src_tmp], %[src_stride] \n\t" >+ "ldc1 %[tmp12], 0x00(%[src_tmp]) \n\t" >+ "dadd %[src_tmp], %[src_tmp], %[src_stride] \n\t" >+ "ldc1 %[tmp13], 0x00(%[src_tmp]) \n\t" >+ >+ /* tmp0 = (40 50 41 51 42 52 43 53) */ >+ "punpcklbh %[tmp0], %[tmp12], %[tmp13] \n\t" >+ /* tmp1 = (44 54 45 55 46 56 47 57) */ >+ "punpckhbh %[tmp1], %[tmp12], %[tmp13] \n\t" >+ >+ "dadd %[src_tmp], %[src_tmp], %[src_stride] \n\t" >+ "ldc1 %[tmp12], 0x00(%[src_tmp]) \n\t" >+ "dadd %[src_tmp], %[src_tmp], %[src_stride] \n\t" >+ "ldc1 %[tmp13], 0x00(%[src_tmp]) \n\t" >+ >+ /* tmp2 = (60 70 61 71 62 72 63 73) */ >+ "punpcklbh %[tmp2], %[tmp12], %[tmp13] \n\t" >+ /* tmp3 = (64 74 65 75 66 76 67 77) */ >+ "punpckhbh %[tmp3], %[tmp12], %[tmp13] \n\t" >+ >+ /* tmp8 = (40 50 60 70 41 51 61 71) */ >+ "punpcklhw %[tmp8], %[tmp0], %[tmp2] \n\t" >+ /* tmp9 = (42 52 62 72 43 53 63 73) */ >+ "punpckhhw %[tmp9], %[tmp0], %[tmp2] \n\t" >+ /* tmp10 = (44 54 64 74 45 55 65 75) */ >+ "punpcklhw %[tmp10], %[tmp1], %[tmp3] \n\t" >+ /* tmp11 = (46 56 66 76 47 57 67 77) */ >+ "punpckhhw %[tmp11], %[tmp1], %[tmp3] \n\t" >+ >+ /* tmp0 = (00 10 20 30 40 50 60 70) */ >+ "punpcklwd %[tmp0], %[tmp4], %[tmp8] \n\t" >+ /* tmp1 = (01 11 21 31 41 51 61 71) */ >+ "punpckhwd %[tmp1], %[tmp4], %[tmp8] \n\t" >+ "gssdlc1 %[tmp0], 0x07(%[dst]) \n\t" >+ "gssdrc1 %[tmp0], 0x00(%[dst]) \n\t" >+ "dadd %[dst], %[dst], %[dst_stride] \n\t" >+ "gssdlc1 %[tmp1], 0x07(%[dst]) \n\t" >+ "gssdrc1 %[tmp1], 0x00(%[dst]) \n\t" >+ >+ /* tmp0 = (02 12 22 32 42 52 62 72) */ >+ "punpcklwd %[tmp0], %[tmp5], %[tmp9] \n\t" >+ /* tmp1 = (03 13 23 33 43 53 63 73) */ >+ "punpckhwd %[tmp1], %[tmp5], %[tmp9] \n\t" >+ "dadd %[dst], %[dst], %[dst_stride] \n\t" >+ "gssdlc1 %[tmp0], 0x07(%[dst]) \n\t" >+ "gssdrc1 %[tmp0], 0x00(%[dst]) \n\t" >+ "dadd %[dst], %[dst], %[dst_stride] \n\t" >+ "gssdlc1 %[tmp1], 0x07(%[dst]) \n\t" >+ "gssdrc1 %[tmp1], 0x00(%[dst]) \n\t" >+ >+ /* tmp0 = (04 14 24 34 44 54 64 74) */ >+ "punpcklwd %[tmp0], %[tmp6], %[tmp10] \n\t" >+ /* tmp1 = (05 15 25 35 45 55 65 75) */ >+ "punpckhwd %[tmp1], %[tmp6], %[tmp10] \n\t" >+ "dadd %[dst], %[dst], %[dst_stride] \n\t" >+ "gssdlc1 %[tmp0], 0x07(%[dst]) \n\t" >+ "gssdrc1 %[tmp0], 0x00(%[dst]) \n\t" >+ "dadd %[dst], %[dst], %[dst_stride] \n\t" >+ "gssdlc1 %[tmp1], 0x07(%[dst]) \n\t" >+ "gssdrc1 %[tmp1], 0x00(%[dst]) \n\t" >+ >+ /* tmp0 = (06 16 26 36 46 56 66 76) */ >+ "punpcklwd %[tmp0], %[tmp7], %[tmp11] \n\t" >+ /* tmp1 = (07 17 27 37 47 57 67 77) */ >+ "punpckhwd %[tmp1], %[tmp7], %[tmp11] \n\t" >+ "dadd %[dst], %[dst], %[dst_stride] \n\t" >+ "gssdlc1 %[tmp0], 0x07(%[dst]) \n\t" >+ "gssdrc1 %[tmp0], 0x00(%[dst]) \n\t" >+ "dadd %[dst], %[dst], %[dst_stride] \n\t" >+ "gssdlc1 %[tmp1], 0x07(%[dst]) \n\t" >+ "gssdrc1 %[tmp1], 0x00(%[dst]) \n\t" >+ >+ "dadd %[dst], %[dst], %[dst_stride] \n\t" >+ "daddi %[src], %[src], 0x08 \n\t" >+ "daddi %[width], %[width], -0x08 \n\t" >+ "bnez %[width], 1b \n\t" >+ >+ : [tmp0] "=&f"(tmp0), [tmp1] "=&f"(tmp1), [tmp2] "=&f"(tmp2), >+ [tmp3] "=&f"(tmp3), [tmp4] "=&f"(tmp4), [tmp5] "=&f"(tmp5), >+ [tmp6] "=&f"(tmp6), [tmp7] "=&f"(tmp7), [tmp8] "=&f"(tmp8), >+ [tmp9] "=&f"(tmp9), [tmp10] "=&f"(tmp10), [tmp11] "=&f"(tmp11), >+ [tmp12] "=&f"(tmp12), [tmp13] "=&f"(tmp13), [dst] "+&r"(dst), >+ [src_tmp] "+&r"(src_tmp) >+ : [src] "r"(src), [width] "r"(width), [src_stride] "r"(src_stride), >+ [dst_stride] "r"(dst_stride) >+ : "memory"); >+} >+ >+void TransposeUVWx8_MMI(const uint8_t* src, >+ int src_stride, >+ uint8_t* dst_a, >+ int dst_stride_a, >+ uint8_t* dst_b, >+ int dst_stride_b, >+ int width) { >+ uint64_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6; >+ uint64_t tmp7, tmp8, tmp9, tmp10, tmp11, tmp12, tmp13; >+ uint8_t* src_tmp = nullptr; >+ >+ __asm__ volatile( >+ "1: \n\t" >+ /* tmp12 = (u00 v00 u01 v01 u02 v02 u03 v03) */ >+ "ldc1 %[tmp12], 0x00(%[src]) \n\t" >+ "dadd %[src_tmp], %[src], %[src_stride] \n\t" >+ /* tmp13 = (u10 v10 u11 v11 u12 v12 u13 v13) */ >+ "ldc1 %[tmp13], 0x00(%[src_tmp]) \n\t" >+ >+ /* tmp0 = (u00 u10 v00 v10 u01 u11 v01 v11) */ >+ "punpcklbh %[tmp0], %[tmp12], %[tmp13] \n\t" >+ /* tmp1 = (u02 u12 v02 v12 u03 u13 v03 v13) */ >+ "punpckhbh %[tmp1], %[tmp12], %[tmp13] \n\t" >+ >+ "dadd %[src_tmp], %[src_tmp], %[src_stride] \n\t" >+ /* tmp12 = (u20 v20 u21 v21 u22 v22 u23 v23) */ >+ "ldc1 %[tmp12], 0x00(%[src_tmp]) \n\t" >+ "dadd %[src_tmp], %[src_tmp], %[src_stride] \n\t" >+ /* tmp13 = (u30 v30 u31 v31 u32 v32 u33 v33) */ >+ "ldc1 %[tmp13], 0x00(%[src_tmp]) \n\t" >+ >+ /* tmp2 = (u20 u30 v20 v30 u21 u31 v21 v31) */ >+ "punpcklbh %[tmp2], %[tmp12], %[tmp13] \n\t" >+ /* tmp3 = (u22 u32 v22 v32 u23 u33 v23 v33) */ >+ "punpckhbh %[tmp3], %[tmp12], %[tmp13] \n\t" >+ >+ /* tmp4 = (u00 u10 u20 u30 v00 v10 v20 v30) */ >+ "punpcklhw %[tmp4], %[tmp0], %[tmp2] \n\t" >+ /* tmp5 = (u01 u11 u21 u31 v01 v11 v21 v31) */ >+ "punpckhhw %[tmp5], %[tmp0], %[tmp2] \n\t" >+ /* tmp6 = (u02 u12 u22 u32 v02 v12 v22 v32) */ >+ "punpcklhw %[tmp6], %[tmp1], %[tmp3] \n\t" >+ /* tmp7 = (u03 u13 u23 u33 v03 v13 v23 v33) */ >+ "punpckhhw %[tmp7], %[tmp1], %[tmp3] \n\t" >+ >+ "dadd %[src_tmp], %[src_tmp], %[src_stride] \n\t" >+ /* tmp12 = (u40 v40 u41 v41 u42 v42 u43 v43) */ >+ "ldc1 %[tmp12], 0x00(%[src_tmp]) \n\t" >+ /* tmp13 = (u50 v50 u51 v51 u52 v52 u53 v53) */ >+ "dadd %[src_tmp], %[src_tmp], %[src_stride] \n\t" >+ "ldc1 %[tmp13], 0x00(%[src_tmp]) \n\t" >+ >+ /* tmp0 = (u40 u50 v40 v50 u41 u51 v41 v51) */ >+ "punpcklbh %[tmp0], %[tmp12], %[tmp13] \n\t" >+ /* tmp1 = (u42 u52 v42 v52 u43 u53 v43 v53) */ >+ "punpckhbh %[tmp1], %[tmp12], %[tmp13] \n\t" >+ >+ "dadd %[src_tmp], %[src_tmp], %[src_stride] \n\t" >+ /* tmp12 = (u60 v60 u61 v61 u62 v62 u63 v63) */ >+ "ldc1 %[tmp12], 0x00(%[src_tmp]) \n\t" >+ /* tmp13 = (u70 v70 u71 v71 u72 v72 u73 v73) */ >+ "dadd %[src_tmp], %[src_tmp], %[src_stride] \n\t" >+ "ldc1 %[tmp13], 0x00(%[src_tmp]) \n\t" >+ >+ /* tmp2 = (u60 u70 v60 v70 u61 u71 v61 v71) */ >+ "punpcklbh %[tmp2], %[tmp12], %[tmp13] \n\t" >+ /* tmp3 = (u62 u72 v62 v72 u63 u73 v63 v73) */ >+ "punpckhbh %[tmp3], %[tmp12], %[tmp13] \n\t" >+ >+ /* tmp8 = (u40 u50 u60 u70 v40 v50 v60 v70) */ >+ "punpcklhw %[tmp8], %[tmp0], %[tmp2] \n\t" >+ /* tmp9 = (u41 u51 u61 u71 v41 v51 v61 v71) */ >+ "punpckhhw %[tmp9], %[tmp0], %[tmp2] \n\t" >+ /* tmp10 = (u42 u52 u62 u72 v42 v52 v62 v72) */ >+ "punpcklhw %[tmp10], %[tmp1], %[tmp3] \n\t" >+ /* tmp11 = (u43 u53 u63 u73 v43 v53 v63 v73) */ >+ "punpckhhw %[tmp11], %[tmp1], %[tmp3] \n\t" >+ >+ /* tmp0 = (u00 u10 u20 u30 u40 u50 u60 u70) */ >+ "punpcklwd %[tmp0], %[tmp4], %[tmp8] \n\t" >+ /* tmp1 = (v00 v10 v20 v30 v40 v50 v60 v70) */ >+ "punpckhwd %[tmp1], %[tmp4], %[tmp8] \n\t" >+ "gssdlc1 %[tmp0], 0x07(%[dst_a]) \n\t" >+ "gssdrc1 %[tmp0], 0x00(%[dst_a]) \n\t" >+ "gssdlc1 %[tmp1], 0x07(%[dst_b]) \n\t" >+ "gssdrc1 %[tmp1], 0x00(%[dst_b]) \n\t" >+ >+ /* tmp0 = (u01 u11 u21 u31 u41 u51 u61 u71) */ >+ "punpcklwd %[tmp0], %[tmp5], %[tmp9] \n\t" >+ /* tmp1 = (v01 v11 v21 v31 v41 v51 v61 v71) */ >+ "punpckhwd %[tmp1], %[tmp5], %[tmp9] \n\t" >+ "dadd %[dst_a], %[dst_a], %[dst_stride_a] \n\t" >+ "gssdlc1 %[tmp0], 0x07(%[dst_a]) \n\t" >+ "gssdrc1 %[tmp0], 0x00(%[dst_a]) \n\t" >+ "dadd %[dst_b], %[dst_b], %[dst_stride_b] \n\t" >+ "gssdlc1 %[tmp1], 0x07(%[dst_b]) \n\t" >+ "gssdrc1 %[tmp1], 0x00(%[dst_b]) \n\t" >+ >+ /* tmp0 = (u02 u12 u22 u32 u42 u52 u62 u72) */ >+ "punpcklwd %[tmp0], %[tmp6], %[tmp10] \n\t" >+ /* tmp1 = (v02 v12 v22 v32 v42 v52 v62 v72) */ >+ "punpckhwd %[tmp1], %[tmp6], %[tmp10] \n\t" >+ "dadd %[dst_a], %[dst_a], %[dst_stride_a] \n\t" >+ "gssdlc1 %[tmp0], 0x07(%[dst_a]) \n\t" >+ "gssdrc1 %[tmp0], 0x00(%[dst_a]) \n\t" >+ "dadd %[dst_b], %[dst_b], %[dst_stride_b] \n\t" >+ "gssdlc1 %[tmp1], 0x07(%[dst_b]) \n\t" >+ "gssdrc1 %[tmp1], 0x00(%[dst_b]) \n\t" >+ >+ /* tmp0 = (u03 u13 u23 u33 u43 u53 u63 u73) */ >+ "punpcklwd %[tmp0], %[tmp7], %[tmp11] \n\t" >+ /* tmp1 = (v03 v13 v23 v33 v43 v53 v63 v73) */ >+ "punpckhwd %[tmp1], %[tmp7], %[tmp11] \n\t" >+ "dadd %[dst_a], %[dst_a], %[dst_stride_a] \n\t" >+ "gssdlc1 %[tmp0], 0x07(%[dst_a]) \n\t" >+ "gssdrc1 %[tmp0], 0x00(%[dst_a]) \n\t" >+ "dadd %[dst_b], %[dst_b], %[dst_stride_b] \n\t" >+ "gssdlc1 %[tmp1], 0x07(%[dst_b]) \n\t" >+ "gssdrc1 %[tmp1], 0x00(%[dst_b]) \n\t" >+ >+ "dadd %[dst_a], %[dst_a], %[dst_stride_a] \n\t" >+ "dadd %[dst_b], %[dst_b], %[dst_stride_b] \n\t" >+ "daddiu %[src], %[src], 0x08 \n\t" >+ "daddi %[width], %[width], -0x04 \n\t" >+ "bnez %[width], 1b \n\t" >+ >+ : [tmp0] "=&f"(tmp0), [tmp1] "=&f"(tmp1), [tmp2] "=&f"(tmp2), >+ [tmp3] "=&f"(tmp3), [tmp4] "=&f"(tmp4), [tmp5] "=&f"(tmp5), >+ [tmp6] "=&f"(tmp6), [tmp7] "=&f"(tmp7), [tmp8] "=&f"(tmp8), >+ [tmp9] "=&f"(tmp9), [tmp10] "=&f"(tmp10), [tmp11] "=&f"(tmp11), >+ [tmp12] "=&f"(tmp12), [tmp13] "=&f"(tmp13), [dst_a] "+&r"(dst_a), >+ [dst_b] "+&r"(dst_b), [src_tmp] "+&r"(src_tmp) >+ : [src] "r"(src), [width] "r"(width), [dst_stride_a] "r"(dst_stride_a), >+ [dst_stride_b] "r"(dst_stride_b), [src_stride] "r"(src_stride) >+ : "memory"); >+} >+ >+#endif // !defined(LIBYUV_DISABLE_MMI) && defined(_MIPS_ARCH_LOONGSON3A) >+ >+#ifdef __cplusplus >+} // extern "C" >+} // namespace libyuv >+#endif >diff --git a/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/source/row_any.cc b/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/source/row_any.cc >index cc5914dd29d0d0106177e583598b87053f6dc721..031a8f6490ef15e080d877855ab1aac1d7010697 100644 >--- a/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/source/row_any.cc >+++ b/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/source/row_any.cc >@@ -92,6 +92,9 @@ ANY31(MergeRGBRow_Any_SSSE3, MergeRGBRow_SSSE3, 0, 0, 3, 15) > #ifdef HAS_MERGERGBROW_NEON > ANY31(MergeRGBRow_Any_NEON, MergeRGBRow_NEON, 0, 0, 3, 15) > #endif >+#ifdef HAS_MERGERGBROW_MMI >+ANY31(MergeRGBRow_Any_MMI, MergeRGBRow_MMI, 0, 0, 3, 7) >+#endif > #ifdef HAS_I422TOYUY2ROW_SSE2 > ANY31(I422ToYUY2Row_Any_SSE2, I422ToYUY2Row_SSE2, 1, 1, 4, 15) > ANY31(I422ToUYVYRow_Any_SSE2, I422ToUYVYRow_SSE2, 1, 1, 4, 15) >@@ -106,44 +109,54 @@ ANY31(I422ToYUY2Row_Any_NEON, I422ToYUY2Row_NEON, 1, 1, 4, 15) > #ifdef HAS_I422TOYUY2ROW_MSA > ANY31(I422ToYUY2Row_Any_MSA, I422ToYUY2Row_MSA, 1, 1, 4, 31) > #endif >+#ifdef HAS_I422TOYUY2ROW_MMI >+ANY31(I422ToYUY2Row_Any_MMI, I422ToYUY2Row_MMI, 1, 1, 4, 7) >+#endif > #ifdef HAS_I422TOUYVYROW_NEON > ANY31(I422ToUYVYRow_Any_NEON, I422ToUYVYRow_NEON, 1, 1, 4, 15) > #endif > #ifdef HAS_I422TOUYVYROW_MSA > ANY31(I422ToUYVYRow_Any_MSA, I422ToUYVYRow_MSA, 1, 1, 4, 31) > #endif >+#ifdef HAS_I422TOUYVYROW_MMI >+ANY31(I422ToUYVYRow_Any_MMI, I422ToUYVYRow_MMI, 1, 1, 4, 7) >+#endif > #ifdef HAS_BLENDPLANEROW_AVX2 > ANY31(BlendPlaneRow_Any_AVX2, BlendPlaneRow_AVX2, 0, 0, 1, 31) > #endif > #ifdef HAS_BLENDPLANEROW_SSSE3 > ANY31(BlendPlaneRow_Any_SSSE3, BlendPlaneRow_SSSE3, 0, 0, 1, 7) > #endif >+#ifdef HAS_BLENDPLANEROW_MMI >+ANY31(BlendPlaneRow_Any_MMI, BlendPlaneRow_MMI, 0, 0, 1, 7) >+#endif > #undef ANY31 > > // Note that odd width replication includes 444 due to implementation > // on arm that subsamples 444 to 422 internally. > // Any 3 planes to 1 with yuvconstants >-#define ANY31C(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, BPP, MASK) \ >- void NAMEANY(const uint8_t* y_buf, const uint8_t* u_buf, \ >- const uint8_t* v_buf, uint8_t* dst_ptr, \ >- const struct YuvConstants* yuvconstants, int width) { \ >- SIMD_ALIGNED(uint8_t temp[64 * 4]); \ >- memset(temp, 0, 64 * 3); /* for YUY2 and msan */ \ >- int r = width & MASK; \ >- int n = width & ~MASK; \ >- if (n > 0) { \ >- ANY_SIMD(y_buf, u_buf, v_buf, dst_ptr, yuvconstants, n); \ >- } \ >- memcpy(temp, y_buf + n, r); \ >- memcpy(temp + 64, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \ >- memcpy(temp + 128, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \ >- if (width & 1) { \ >- temp[64 + SS(r, UVSHIFT)] = temp[64 + SS(r, UVSHIFT) - 1]; \ >- temp[128 + SS(r, UVSHIFT)] = temp[128 + SS(r, UVSHIFT) - 1]; \ >- } \ >- ANY_SIMD(temp, temp + 64, temp + 128, temp + 192, yuvconstants, MASK + 1); \ >- memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, temp + 192, \ >- SS(r, DUVSHIFT) * BPP); \ >+#define ANY31C(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, BPP, MASK) \ >+ void NAMEANY(const uint8_t* y_buf, const uint8_t* u_buf, \ >+ const uint8_t* v_buf, uint8_t* dst_ptr, \ >+ const struct YuvConstants* yuvconstants, int width) { \ >+ SIMD_ALIGNED(uint8_t temp[128 * 4]); \ >+ memset(temp, 0, 128 * 3); /* for YUY2 and msan */ \ >+ int r = width & MASK; \ >+ int n = width & ~MASK; \ >+ if (n > 0) { \ >+ ANY_SIMD(y_buf, u_buf, v_buf, dst_ptr, yuvconstants, n); \ >+ } \ >+ memcpy(temp, y_buf + n, r); \ >+ memcpy(temp + 128, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \ >+ memcpy(temp + 256, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \ >+ if (width & 1) { \ >+ temp[128 + SS(r, UVSHIFT)] = temp[128 + SS(r, UVSHIFT) - 1]; \ >+ temp[256 + SS(r, UVSHIFT)] = temp[256 + SS(r, UVSHIFT) - 1]; \ >+ } \ >+ ANY_SIMD(temp, temp + 128, temp + 256, temp + 384, yuvconstants, \ >+ MASK + 1); \ >+ memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, temp + 384, \ >+ SS(r, DUVSHIFT) * BPP); \ > } > > #ifdef HAS_I422TOARGBROW_SSSE3 >@@ -161,10 +174,10 @@ ANY31C(I422ToRGBARow_Any_SSSE3, I422ToRGBARow_SSSE3, 1, 0, 4, 7) > ANY31C(I422ToARGB4444Row_Any_SSSE3, I422ToARGB4444Row_SSSE3, 1, 0, 2, 7) > ANY31C(I422ToARGB1555Row_Any_SSSE3, I422ToARGB1555Row_SSSE3, 1, 0, 2, 7) > ANY31C(I422ToRGB565Row_Any_SSSE3, I422ToRGB565Row_SSSE3, 1, 0, 2, 7) >-ANY31C(I422ToRGB24Row_Any_SSSE3, I422ToRGB24Row_SSSE3, 1, 0, 3, 7) >+ANY31C(I422ToRGB24Row_Any_SSSE3, I422ToRGB24Row_SSSE3, 1, 0, 3, 15) > #endif // HAS_I444TOARGBROW_SSSE3 > #ifdef HAS_I422TORGB24ROW_AVX2 >-ANY31C(I422ToRGB24Row_Any_AVX2, I422ToRGB24Row_AVX2, 1, 0, 3, 15) >+ANY31C(I422ToRGB24Row_Any_AVX2, I422ToRGB24Row_AVX2, 1, 0, 3, 31) > #endif > #ifdef HAS_I422TOARGBROW_AVX2 > ANY31C(I422ToARGBRow_Any_AVX2, I422ToARGBRow_AVX2, 1, 0, 4, 15) >@@ -270,6 +283,9 @@ ANY21(MergeUVRow_Any_NEON, MergeUVRow_NEON, 0, 1, 1, 2, 15) > #ifdef HAS_MERGEUVROW_MSA > ANY21(MergeUVRow_Any_MSA, MergeUVRow_MSA, 0, 1, 1, 2, 15) > #endif >+#ifdef HAS_MERGEUVROW_MMI >+ANY21(MergeUVRow_Any_MMI, MergeUVRow_MMI, 0, 1, 1, 2, 7) >+#endif > > // Math functions. > #ifdef HAS_ARGBMULTIPLYROW_SSE2 >@@ -302,12 +318,21 @@ ANY21(ARGBSubtractRow_Any_NEON, ARGBSubtractRow_NEON, 0, 4, 4, 4, 7) > #ifdef HAS_ARGBMULTIPLYROW_MSA > ANY21(ARGBMultiplyRow_Any_MSA, ARGBMultiplyRow_MSA, 0, 4, 4, 4, 3) > #endif >+#ifdef HAS_ARGBMULTIPLYROW_MMI >+ANY21(ARGBMultiplyRow_Any_MMI, ARGBMultiplyRow_MMI, 0, 4, 4, 4, 1) >+#endif > #ifdef HAS_ARGBADDROW_MSA > ANY21(ARGBAddRow_Any_MSA, ARGBAddRow_MSA, 0, 4, 4, 4, 7) > #endif >+#ifdef HAS_ARGBADDROW_MMI >+ANY21(ARGBAddRow_Any_MMI, ARGBAddRow_MMI, 0, 4, 4, 4, 1) >+#endif > #ifdef HAS_ARGBSUBTRACTROW_MSA > ANY21(ARGBSubtractRow_Any_MSA, ARGBSubtractRow_MSA, 0, 4, 4, 4, 7) > #endif >+#ifdef HAS_ARGBSUBTRACTROW_MMI >+ANY21(ARGBSubtractRow_Any_MMI, ARGBSubtractRow_MMI, 0, 4, 4, 4, 1) >+#endif > #ifdef HAS_SOBELROW_SSE2 > ANY21(SobelRow_Any_SSE2, SobelRow_SSE2, 0, 1, 1, 4, 15) > #endif >@@ -317,6 +342,9 @@ ANY21(SobelRow_Any_NEON, SobelRow_NEON, 0, 1, 1, 4, 7) > #ifdef HAS_SOBELROW_MSA > ANY21(SobelRow_Any_MSA, SobelRow_MSA, 0, 1, 1, 4, 15) > #endif >+#ifdef HAS_SOBELROW_MMI >+ANY21(SobelRow_Any_MMI, SobelRow_MMI, 0, 1, 1, 4, 7) >+#endif > #ifdef HAS_SOBELTOPLANEROW_SSE2 > ANY21(SobelToPlaneRow_Any_SSE2, SobelToPlaneRow_SSE2, 0, 1, 1, 1, 15) > #endif >@@ -326,6 +354,9 @@ ANY21(SobelToPlaneRow_Any_NEON, SobelToPlaneRow_NEON, 0, 1, 1, 1, 15) > #ifdef HAS_SOBELTOPLANEROW_MSA > ANY21(SobelToPlaneRow_Any_MSA, SobelToPlaneRow_MSA, 0, 1, 1, 1, 31) > #endif >+#ifdef HAS_SOBELTOPLANEROW_MMI >+ANY21(SobelToPlaneRow_Any_MMI, SobelToPlaneRow_MMI, 0, 1, 1, 1, 7) >+#endif > #ifdef HAS_SOBELXYROW_SSE2 > ANY21(SobelXYRow_Any_SSE2, SobelXYRow_SSE2, 0, 1, 1, 4, 15) > #endif >@@ -335,24 +366,27 @@ ANY21(SobelXYRow_Any_NEON, SobelXYRow_NEON, 0, 1, 1, 4, 7) > #ifdef HAS_SOBELXYROW_MSA > ANY21(SobelXYRow_Any_MSA, SobelXYRow_MSA, 0, 1, 1, 4, 15) > #endif >+#ifdef HAS_SOBELXYROW_MMI >+ANY21(SobelXYRow_Any_MMI, SobelXYRow_MMI, 0, 1, 1, 4, 7) >+#endif > #undef ANY21 > > // Any 2 planes to 1 with yuvconstants > #define ANY21C(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, SBPP2, BPP, MASK) \ > void NAMEANY(const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* dst_ptr, \ > const struct YuvConstants* yuvconstants, int width) { \ >- SIMD_ALIGNED(uint8_t temp[64 * 3]); \ >- memset(temp, 0, 64 * 2); /* for msan */ \ >+ SIMD_ALIGNED(uint8_t temp[128 * 3]); \ >+ memset(temp, 0, 128 * 2); /* for msan */ \ > int r = width & MASK; \ > int n = width & ~MASK; \ > if (n > 0) { \ > ANY_SIMD(y_buf, uv_buf, dst_ptr, yuvconstants, n); \ > } \ > memcpy(temp, y_buf + n * SBPP, r * SBPP); \ >- memcpy(temp + 64, uv_buf + (n >> UVSHIFT) * SBPP2, \ >+ memcpy(temp + 128, uv_buf + (n >> UVSHIFT) * SBPP2, \ > SS(r, UVSHIFT) * SBPP2); \ >- ANY_SIMD(temp, temp + 64, temp + 128, yuvconstants, MASK + 1); \ >- memcpy(dst_ptr + n * BPP, temp + 128, r * BPP); \ >+ ANY_SIMD(temp, temp + 128, temp + 256, yuvconstants, MASK + 1); \ >+ memcpy(dst_ptr + n * BPP, temp + 256, r * BPP); \ > } > > // Biplanar to RGB. >@@ -386,6 +420,18 @@ ANY21C(NV12ToRGB24Row_Any_NEON, NV12ToRGB24Row_NEON, 1, 1, 2, 3, 7) > #ifdef HAS_NV21TORGB24ROW_NEON > ANY21C(NV21ToRGB24Row_Any_NEON, NV21ToRGB24Row_NEON, 1, 1, 2, 3, 7) > #endif >+#ifdef HAS_NV12TORGB24ROW_SSSE3 >+ANY21C(NV12ToRGB24Row_Any_SSSE3, NV12ToRGB24Row_SSSE3, 1, 1, 2, 3, 15) >+#endif >+#ifdef HAS_NV21TORGB24ROW_SSSE3 >+ANY21C(NV21ToRGB24Row_Any_SSSE3, NV21ToRGB24Row_SSSE3, 1, 1, 2, 3, 15) >+#endif >+#ifdef HAS_NV12TORGB24ROW_AVX2 >+ANY21C(NV12ToRGB24Row_Any_AVX2, NV12ToRGB24Row_AVX2, 1, 1, 2, 3, 31) >+#endif >+#ifdef HAS_NV21TORGB24ROW_AVX2 >+ANY21C(NV21ToRGB24Row_Any_AVX2, NV21ToRGB24Row_AVX2, 1, 1, 2, 3, 31) >+#endif > #ifdef HAS_NV12TORGB565ROW_SSSE3 > ANY21C(NV12ToRGB565Row_Any_SSSE3, NV12ToRGB565Row_SSSE3, 1, 1, 2, 2, 7) > #endif >@@ -431,6 +477,15 @@ ANY11(ARGBToRGB565Row_Any_SSE2, ARGBToRGB565Row_SSE2, 0, 4, 2, 3) > ANY11(ARGBToARGB1555Row_Any_SSE2, ARGBToARGB1555Row_SSE2, 0, 4, 2, 3) > ANY11(ARGBToARGB4444Row_Any_SSE2, ARGBToARGB4444Row_SSE2, 0, 4, 2, 3) > #endif >+#if defined(HAS_ARGBTORGB24ROW_AVX2) >+ANY11(ARGBToRGB24Row_Any_AVX2, ARGBToRGB24Row_AVX2, 0, 4, 3, 31) >+#endif >+#if defined(HAS_ARGBTORGB24ROW_AVX512VBMI) >+ANY11(ARGBToRGB24Row_Any_AVX512VBMI, ARGBToRGB24Row_AVX512VBMI, 0, 4, 3, 31) >+#endif >+#if defined(HAS_ARGBTORAWROW_AVX2) >+ANY11(ARGBToRAWRow_Any_AVX2, ARGBToRAWRow_AVX2, 0, 4, 3, 31) >+#endif > #if defined(HAS_ARGBTORGB565ROW_AVX2) > ANY11(ARGBToRGB565Row_Any_AVX2, ARGBToRGB565Row_AVX2, 0, 4, 2, 7) > #endif >@@ -499,12 +554,24 @@ ANY11(ARGBToARGB4444Row_Any_MSA, ARGBToARGB4444Row_MSA, 0, 4, 2, 7) > ANY11(J400ToARGBRow_Any_MSA, J400ToARGBRow_MSA, 0, 1, 4, 15) > ANY11(I400ToARGBRow_Any_MSA, I400ToARGBRow_MSA, 0, 1, 4, 15) > #endif >+#if defined(HAS_ARGBTORGB24ROW_MMI) >+ANY11(ARGBToRGB24Row_Any_MMI, ARGBToRGB24Row_MMI, 0, 4, 3, 3) >+ANY11(ARGBToRAWRow_Any_MMI, ARGBToRAWRow_MMI, 0, 4, 3, 3) >+ANY11(ARGBToRGB565Row_Any_MMI, ARGBToRGB565Row_MMI, 0, 4, 2, 3) >+ANY11(ARGBToARGB1555Row_Any_MMI, ARGBToARGB1555Row_MMI, 0, 4, 2, 3) >+ANY11(ARGBToARGB4444Row_Any_MMI, ARGBToARGB4444Row_MMI, 0, 4, 2, 3) >+ANY11(J400ToARGBRow_Any_MMI, J400ToARGBRow_MMI, 0, 1, 4, 3) >+ANY11(I400ToARGBRow_Any_MMI, I400ToARGBRow_MMI, 0, 1, 4, 7) >+#endif > #if defined(HAS_RAWTORGB24ROW_NEON) > ANY11(RAWToRGB24Row_Any_NEON, RAWToRGB24Row_NEON, 0, 3, 3, 7) > #endif > #if defined(HAS_RAWTORGB24ROW_MSA) > ANY11(RAWToRGB24Row_Any_MSA, RAWToRGB24Row_MSA, 0, 3, 3, 15) > #endif >+#if defined(HAS_RAWTORGB24ROW_MMI) >+ANY11(RAWToRGB24Row_Any_MMI, RAWToRGB24Row_MMI, 0, 3, 3, 3) >+#endif > #ifdef HAS_ARGBTOYROW_AVX2 > ANY11(ARGBToYRow_Any_AVX2, ARGBToYRow_AVX2, 0, 4, 1, 31) > #endif >@@ -536,57 +603,87 @@ ANY11(ARGBToYRow_Any_NEON, ARGBToYRow_NEON, 0, 4, 1, 7) > #ifdef HAS_ARGBTOYROW_MSA > ANY11(ARGBToYRow_Any_MSA, ARGBToYRow_MSA, 0, 4, 1, 15) > #endif >+#ifdef HAS_ARGBTOYROW_MMI >+ANY11(ARGBToYRow_Any_MMI, ARGBToYRow_MMI, 0, 4, 1, 7) >+#endif > #ifdef HAS_ARGBTOYJROW_NEON > ANY11(ARGBToYJRow_Any_NEON, ARGBToYJRow_NEON, 0, 4, 1, 7) > #endif > #ifdef HAS_ARGBTOYJROW_MSA > ANY11(ARGBToYJRow_Any_MSA, ARGBToYJRow_MSA, 0, 4, 1, 15) > #endif >+#ifdef HAS_ARGBTOYJROW_MMI >+ANY11(ARGBToYJRow_Any_MMI, ARGBToYJRow_MMI, 0, 4, 1, 7) >+#endif > #ifdef HAS_BGRATOYROW_NEON > ANY11(BGRAToYRow_Any_NEON, BGRAToYRow_NEON, 0, 4, 1, 7) > #endif > #ifdef HAS_BGRATOYROW_MSA > ANY11(BGRAToYRow_Any_MSA, BGRAToYRow_MSA, 0, 4, 1, 15) > #endif >+#ifdef HAS_BGRATOYROW_MMI >+ANY11(BGRAToYRow_Any_MMI, BGRAToYRow_MMI, 0, 4, 1, 7) >+#endif > #ifdef HAS_ABGRTOYROW_NEON > ANY11(ABGRToYRow_Any_NEON, ABGRToYRow_NEON, 0, 4, 1, 7) > #endif > #ifdef HAS_ABGRTOYROW_MSA > ANY11(ABGRToYRow_Any_MSA, ABGRToYRow_MSA, 0, 4, 1, 7) > #endif >+#ifdef HAS_ABGRTOYROW_MMI >+ANY11(ABGRToYRow_Any_MMI, ABGRToYRow_MMI, 0, 4, 1, 7) >+#endif > #ifdef HAS_RGBATOYROW_NEON > ANY11(RGBAToYRow_Any_NEON, RGBAToYRow_NEON, 0, 4, 1, 7) > #endif > #ifdef HAS_RGBATOYROW_MSA > ANY11(RGBAToYRow_Any_MSA, RGBAToYRow_MSA, 0, 4, 1, 15) > #endif >+#ifdef HAS_RGBATOYROW_MMI >+ANY11(RGBAToYRow_Any_MMI, RGBAToYRow_MMI, 0, 4, 1, 7) >+#endif > #ifdef HAS_RGB24TOYROW_NEON > ANY11(RGB24ToYRow_Any_NEON, RGB24ToYRow_NEON, 0, 3, 1, 7) > #endif > #ifdef HAS_RGB24TOYROW_MSA > ANY11(RGB24ToYRow_Any_MSA, RGB24ToYRow_MSA, 0, 3, 1, 15) > #endif >+#ifdef HAS_RGB24TOYROW_MMI >+ANY11(RGB24ToYRow_Any_MMI, RGB24ToYRow_MMI, 0, 3, 1, 7) >+#endif > #ifdef HAS_RAWTOYROW_NEON > ANY11(RAWToYRow_Any_NEON, RAWToYRow_NEON, 0, 3, 1, 7) > #endif > #ifdef HAS_RAWTOYROW_MSA > ANY11(RAWToYRow_Any_MSA, RAWToYRow_MSA, 0, 3, 1, 15) > #endif >+#ifdef HAS_RAWTOYROW_MMI >+ANY11(RAWToYRow_Any_MMI, RAWToYRow_MMI, 0, 3, 1, 7) >+#endif > #ifdef HAS_RGB565TOYROW_NEON > ANY11(RGB565ToYRow_Any_NEON, RGB565ToYRow_NEON, 0, 2, 1, 7) > #endif > #ifdef HAS_RGB565TOYROW_MSA > ANY11(RGB565ToYRow_Any_MSA, RGB565ToYRow_MSA, 0, 2, 1, 15) > #endif >+#ifdef HAS_RGB565TOYROW_MMI >+ANY11(RGB565ToYRow_Any_MMI, RGB565ToYRow_MMI, 0, 2, 1, 7) >+#endif > #ifdef HAS_ARGB1555TOYROW_NEON > ANY11(ARGB1555ToYRow_Any_NEON, ARGB1555ToYRow_NEON, 0, 2, 1, 7) > #endif > #ifdef HAS_ARGB1555TOYROW_MSA > ANY11(ARGB1555ToYRow_Any_MSA, ARGB1555ToYRow_MSA, 0, 2, 1, 15) > #endif >+#ifdef HAS_ARGB1555TOYROW_MMI >+ANY11(ARGB1555ToYRow_Any_MMI, ARGB1555ToYRow_MMI, 0, 2, 1, 7) >+#endif > #ifdef HAS_ARGB4444TOYROW_NEON > ANY11(ARGB4444ToYRow_Any_NEON, ARGB4444ToYRow_NEON, 0, 2, 1, 7) > #endif >+#ifdef HAS_ARGB4444TOYROW_MMI >+ANY11(ARGB4444ToYRow_Any_MMI, ARGB4444ToYRow_MMI, 0, 2, 1, 7) >+#endif > #ifdef HAS_YUY2TOYROW_NEON > ANY11(YUY2ToYRow_Any_NEON, YUY2ToYRow_NEON, 1, 4, 1, 15) > #endif >@@ -596,39 +693,60 @@ ANY11(UYVYToYRow_Any_NEON, UYVYToYRow_NEON, 1, 4, 1, 15) > #ifdef HAS_YUY2TOYROW_MSA > ANY11(YUY2ToYRow_Any_MSA, YUY2ToYRow_MSA, 1, 4, 1, 31) > #endif >+#ifdef HAS_YUY2TOYROW_MMI >+ANY11(YUY2ToYRow_Any_MMI, YUY2ToYRow_MMI, 1, 4, 1, 7) >+#endif > #ifdef HAS_UYVYTOYROW_MSA > ANY11(UYVYToYRow_Any_MSA, UYVYToYRow_MSA, 1, 4, 1, 31) > #endif >+#ifdef HAS_UYVYTOYROW_MMI >+ANY11(UYVYToYRow_Any_MMI, UYVYToYRow_MMI, 1, 4, 1, 15) >+#endif > #ifdef HAS_RGB24TOARGBROW_NEON > ANY11(RGB24ToARGBRow_Any_NEON, RGB24ToARGBRow_NEON, 0, 3, 4, 7) > #endif > #ifdef HAS_RGB24TOARGBROW_MSA > ANY11(RGB24ToARGBRow_Any_MSA, RGB24ToARGBRow_MSA, 0, 3, 4, 15) > #endif >+#ifdef HAS_RGB24TOARGBROW_MMI >+ANY11(RGB24ToARGBRow_Any_MMI, RGB24ToARGBRow_MMI, 0, 3, 4, 3) >+#endif > #ifdef HAS_RAWTOARGBROW_NEON > ANY11(RAWToARGBRow_Any_NEON, RAWToARGBRow_NEON, 0, 3, 4, 7) > #endif > #ifdef HAS_RAWTOARGBROW_MSA > ANY11(RAWToARGBRow_Any_MSA, RAWToARGBRow_MSA, 0, 3, 4, 15) > #endif >+#ifdef HAS_RAWTOARGBROW_MMI >+ANY11(RAWToARGBRow_Any_MMI, RAWToARGBRow_MMI, 0, 3, 4, 3) >+#endif > #ifdef HAS_RGB565TOARGBROW_NEON > ANY11(RGB565ToARGBRow_Any_NEON, RGB565ToARGBRow_NEON, 0, 2, 4, 7) > #endif > #ifdef HAS_RGB565TOARGBROW_MSA > ANY11(RGB565ToARGBRow_Any_MSA, RGB565ToARGBRow_MSA, 0, 2, 4, 15) > #endif >+#ifdef HAS_RGB565TOARGBROW_MMI >+ANY11(RGB565ToARGBRow_Any_MMI, RGB565ToARGBRow_MMI, 0, 2, 4, 3) >+#endif > #ifdef HAS_ARGB1555TOARGBROW_NEON > ANY11(ARGB1555ToARGBRow_Any_NEON, ARGB1555ToARGBRow_NEON, 0, 2, 4, 7) > #endif > #ifdef HAS_ARGB1555TOARGBROW_MSA > ANY11(ARGB1555ToARGBRow_Any_MSA, ARGB1555ToARGBRow_MSA, 0, 2, 4, 15) > #endif >+#ifdef HAS_ARGB1555TOARGBROW_MMI >+ANY11(ARGB1555ToARGBRow_Any_MMI, ARGB1555ToARGBRow_MMI, 0, 2, 4, 3) >+#endif > #ifdef HAS_ARGB4444TOARGBROW_NEON > ANY11(ARGB4444ToARGBRow_Any_NEON, ARGB4444ToARGBRow_NEON, 0, 2, 4, 7) > #endif > #ifdef HAS_ARGB4444TOARGBROW_MSA > ANY11(ARGB4444ToARGBRow_Any_MSA, ARGB4444ToARGBRow_MSA, 0, 2, 4, 15) > #endif >+#ifdef HAS_ARGB4444TOARGBROW_MMI >+ANY11(ARGB4444ToARGBRow_Any_MMI, ARGB4444ToARGBRow_MMI, 0, 2, 4, 3) >+#endif > #ifdef HAS_ARGBATTENUATEROW_SSSE3 > ANY11(ARGBAttenuateRow_Any_SSSE3, ARGBAttenuateRow_SSSE3, 0, 4, 4, 3) > #endif >@@ -647,11 +765,14 @@ ANY11(ARGBAttenuateRow_Any_NEON, ARGBAttenuateRow_NEON, 0, 4, 4, 7) > #ifdef HAS_ARGBATTENUATEROW_MSA > ANY11(ARGBAttenuateRow_Any_MSA, ARGBAttenuateRow_MSA, 0, 4, 4, 7) > #endif >+#ifdef HAS_ARGBATTENUATEROW_MMI >+ANY11(ARGBAttenuateRow_Any_MMI, ARGBAttenuateRow_MMI, 0, 4, 4, 1) >+#endif > #ifdef HAS_ARGBEXTRACTALPHAROW_SSE2 > ANY11(ARGBExtractAlphaRow_Any_SSE2, ARGBExtractAlphaRow_SSE2, 0, 4, 1, 7) > #endif > #ifdef HAS_ARGBEXTRACTALPHAROW_AVX2 >-ANY11(ARGBExtractAlphaRow_Any_AVX2, ARGBExtractAlphaRow_AVX2, 0, 4, 1, 32) >+ANY11(ARGBExtractAlphaRow_Any_AVX2, ARGBExtractAlphaRow_AVX2, 0, 4, 1, 31) > #endif > #ifdef HAS_ARGBEXTRACTALPHAROW_NEON > ANY11(ARGBExtractAlphaRow_Any_NEON, ARGBExtractAlphaRow_NEON, 0, 4, 1, 15) >@@ -659,6 +780,9 @@ ANY11(ARGBExtractAlphaRow_Any_NEON, ARGBExtractAlphaRow_NEON, 0, 4, 1, 15) > #ifdef HAS_ARGBEXTRACTALPHAROW_MSA > ANY11(ARGBExtractAlphaRow_Any_MSA, ARGBExtractAlphaRow_MSA, 0, 4, 1, 15) > #endif >+#ifdef HAS_ARGBEXTRACTALPHAROW_MMI >+ANY11(ARGBExtractAlphaRow_Any_MMI, ARGBExtractAlphaRow_MMI, 0, 4, 1, 7) >+#endif > #undef ANY11 > > // Any 1 to 1 blended. Destination is read, modify, write. >@@ -683,12 +807,18 @@ ANY11B(ARGBCopyAlphaRow_Any_AVX2, ARGBCopyAlphaRow_AVX2, 0, 4, 4, 15) > #ifdef HAS_ARGBCOPYALPHAROW_SSE2 > ANY11B(ARGBCopyAlphaRow_Any_SSE2, ARGBCopyAlphaRow_SSE2, 0, 4, 4, 7) > #endif >+#ifdef HAS_ARGBCOPYALPHAROW_MMI >+ANY11B(ARGBCopyAlphaRow_Any_MMI, ARGBCopyAlphaRow_MMI, 0, 4, 4, 1) >+#endif > #ifdef HAS_ARGBCOPYYTOALPHAROW_AVX2 > ANY11B(ARGBCopyYToAlphaRow_Any_AVX2, ARGBCopyYToAlphaRow_AVX2, 0, 1, 4, 15) > #endif > #ifdef HAS_ARGBCOPYYTOALPHAROW_SSE2 > ANY11B(ARGBCopyYToAlphaRow_Any_SSE2, ARGBCopyYToAlphaRow_SSE2, 0, 1, 4, 7) > #endif >+#ifdef HAS_ARGBCOPYYTOALPHAROW_MMI >+ANY11B(ARGBCopyYToAlphaRow_Any_MMI, ARGBCopyYToAlphaRow_MMI, 0, 1, 4, 7) >+#endif > #undef ANY11B > > // Any 1 to 1 with parameter. >@@ -738,6 +868,14 @@ ANY11P(ARGBToRGB565DitherRow_Any_MSA, > 2, > 7) > #endif >+#if defined(HAS_ARGBTORGB565DITHERROW_MMI) >+ANY11P(ARGBToRGB565DitherRow_Any_MMI, >+ ARGBToRGB565DitherRow_MMI, >+ const uint32_t, >+ 4, >+ 2, >+ 3) >+#endif > #ifdef HAS_ARGBSHUFFLEROW_SSSE3 > ANY11P(ARGBShuffleRow_Any_SSSE3, ARGBShuffleRow_SSSE3, const uint8_t*, 4, 4, 7) > #endif >@@ -750,6 +888,10 @@ ANY11P(ARGBShuffleRow_Any_NEON, ARGBShuffleRow_NEON, const uint8_t*, 4, 4, 3) > #ifdef HAS_ARGBSHUFFLEROW_MSA > ANY11P(ARGBShuffleRow_Any_MSA, ARGBShuffleRow_MSA, const uint8_t*, 4, 4, 7) > #endif >+#ifdef HAS_ARGBSHUFFLEROW_MMI >+ANY11P(ARGBShuffleRow_Any_MMI, ARGBShuffleRow_MMI, const uint8_t*, 4, 4, 1) >+#endif >+#undef ANY11P > #undef ANY11P > > // Any 1 to 1 with parameter and shorts. BPP measures in shorts. >@@ -918,6 +1060,9 @@ ANY11T(InterpolateRow_Any_NEON, InterpolateRow_NEON, 1, 1, 15) > #ifdef HAS_INTERPOLATEROW_MSA > ANY11T(InterpolateRow_Any_MSA, InterpolateRow_MSA, 1, 1, 31) > #endif >+#ifdef HAS_INTERPOLATEROW_MMI >+ANY11T(InterpolateRow_Any_MMI, InterpolateRow_MMI, 1, 1, 7) >+#endif > #undef ANY11T > > // Any 1 to 1 mirror. >@@ -947,6 +1092,9 @@ ANY11M(MirrorRow_Any_NEON, MirrorRow_NEON, 1, 15) > #ifdef HAS_MIRRORROW_MSA > ANY11M(MirrorRow_Any_MSA, MirrorRow_MSA, 1, 63) > #endif >+#ifdef HAS_MIRRORROW_MMI >+ANY11M(MirrorRow_Any_MMI, MirrorRow_MMI, 1, 7) >+#endif > #ifdef HAS_ARGBMIRRORROW_AVX2 > ANY11M(ARGBMirrorRow_Any_AVX2, ARGBMirrorRow_AVX2, 4, 7) > #endif >@@ -959,6 +1107,9 @@ ANY11M(ARGBMirrorRow_Any_NEON, ARGBMirrorRow_NEON, 4, 3) > #ifdef HAS_ARGBMIRRORROW_MSA > ANY11M(ARGBMirrorRow_Any_MSA, ARGBMirrorRow_MSA, 4, 15) > #endif >+#ifdef HAS_ARGBMIRRORROW_MMI >+ANY11M(ARGBMirrorRow_Any_MMI, ARGBMirrorRow_MMI, 4, 1) >+#endif > #undef ANY11M > > // Any 1 plane. (memset) >@@ -1017,6 +1168,9 @@ ANY12(SplitUVRow_Any_NEON, SplitUVRow_NEON, 0, 2, 0, 15) > #ifdef HAS_SPLITUVROW_MSA > ANY12(SplitUVRow_Any_MSA, SplitUVRow_MSA, 0, 2, 0, 31) > #endif >+#ifdef HAS_SPLITUVROW_MMI >+ANY12(SplitUVRow_Any_MMI, SplitUVRow_MMI, 0, 2, 0, 7) >+#endif > #ifdef HAS_ARGBTOUV444ROW_SSSE3 > ANY12(ARGBToUV444Row_Any_SSSE3, ARGBToUV444Row_SSSE3, 0, 4, 0, 15) > #endif >@@ -1038,6 +1192,11 @@ ANY12(ARGBToUV444Row_Any_MSA, ARGBToUV444Row_MSA, 0, 4, 0, 15) > ANY12(YUY2ToUV422Row_Any_MSA, YUY2ToUV422Row_MSA, 1, 4, 1, 31) > ANY12(UYVYToUV422Row_Any_MSA, UYVYToUV422Row_MSA, 1, 4, 1, 31) > #endif >+#ifdef HAS_YUY2TOUV422ROW_MMI >+ANY12(ARGBToUV444Row_Any_MMI, ARGBToUV444Row_MMI, 0, 4, 0, 7) >+ANY12(UYVYToUV422Row_Any_MMI, UYVYToUV422Row_MMI, 1, 4, 1, 15) >+ANY12(YUY2ToUV422Row_Any_MMI, YUY2ToUV422Row_MMI, 1, 4, 1, 15) >+#endif > #undef ANY12 > > // Any 1 to 3. Outputs RGB planes. >@@ -1064,6 +1223,9 @@ ANY13(SplitRGBRow_Any_SSSE3, SplitRGBRow_SSSE3, 3, 15) > #ifdef HAS_SPLITRGBROW_NEON > ANY13(SplitRGBRow_Any_NEON, SplitRGBRow_NEON, 3, 15) > #endif >+#ifdef HAS_SPLITRGBROW_MMI >+ANY13(SplitRGBRow_Any_MMI, SplitRGBRow_MMI, 3, 3) >+#endif > > // Any 1 to 2 with source stride (2 rows of source). Outputs UV planes. > // 128 byte row allows for 32 avx ARGB pixels. >@@ -1118,57 +1280,87 @@ ANY12S(ARGBToUVRow_Any_NEON, ARGBToUVRow_NEON, 0, 4, 15) > #ifdef HAS_ARGBTOUVROW_MSA > ANY12S(ARGBToUVRow_Any_MSA, ARGBToUVRow_MSA, 0, 4, 31) > #endif >+#ifdef HAS_ARGBTOUVROW_MMI >+ANY12S(ARGBToUVRow_Any_MMI, ARGBToUVRow_MMI, 0, 4, 15) >+#endif > #ifdef HAS_ARGBTOUVJROW_NEON > ANY12S(ARGBToUVJRow_Any_NEON, ARGBToUVJRow_NEON, 0, 4, 15) > #endif > #ifdef HAS_ARGBTOUVJROW_MSA > ANY12S(ARGBToUVJRow_Any_MSA, ARGBToUVJRow_MSA, 0, 4, 31) > #endif >+#ifdef HAS_ARGBTOUVJROW_MMI >+ANY12S(ARGBToUVJRow_Any_MMI, ARGBToUVJRow_MMI, 0, 4, 15) >+#endif > #ifdef HAS_BGRATOUVROW_NEON > ANY12S(BGRAToUVRow_Any_NEON, BGRAToUVRow_NEON, 0, 4, 15) > #endif > #ifdef HAS_BGRATOUVROW_MSA > ANY12S(BGRAToUVRow_Any_MSA, BGRAToUVRow_MSA, 0, 4, 31) > #endif >+#ifdef HAS_BGRATOUVROW_MMI >+ANY12S(BGRAToUVRow_Any_MMI, BGRAToUVRow_MMI, 0, 4, 15) >+#endif > #ifdef HAS_ABGRTOUVROW_NEON > ANY12S(ABGRToUVRow_Any_NEON, ABGRToUVRow_NEON, 0, 4, 15) > #endif > #ifdef HAS_ABGRTOUVROW_MSA > ANY12S(ABGRToUVRow_Any_MSA, ABGRToUVRow_MSA, 0, 4, 31) > #endif >+#ifdef HAS_ABGRTOUVROW_MMI >+ANY12S(ABGRToUVRow_Any_MMI, ABGRToUVRow_MMI, 0, 4, 15) >+#endif > #ifdef HAS_RGBATOUVROW_NEON > ANY12S(RGBAToUVRow_Any_NEON, RGBAToUVRow_NEON, 0, 4, 15) > #endif > #ifdef HAS_RGBATOUVROW_MSA > ANY12S(RGBAToUVRow_Any_MSA, RGBAToUVRow_MSA, 0, 4, 31) > #endif >+#ifdef HAS_RGBATOUVROW_MMI >+ANY12S(RGBAToUVRow_Any_MMI, RGBAToUVRow_MMI, 0, 4, 15) >+#endif > #ifdef HAS_RGB24TOUVROW_NEON > ANY12S(RGB24ToUVRow_Any_NEON, RGB24ToUVRow_NEON, 0, 3, 15) > #endif > #ifdef HAS_RGB24TOUVROW_MSA > ANY12S(RGB24ToUVRow_Any_MSA, RGB24ToUVRow_MSA, 0, 3, 15) > #endif >+#ifdef HAS_RGB24TOUVROW_MMI >+ANY12S(RGB24ToUVRow_Any_MMI, RGB24ToUVRow_MMI, 0, 3, 15) >+#endif > #ifdef HAS_RAWTOUVROW_NEON > ANY12S(RAWToUVRow_Any_NEON, RAWToUVRow_NEON, 0, 3, 15) > #endif > #ifdef HAS_RAWTOUVROW_MSA > ANY12S(RAWToUVRow_Any_MSA, RAWToUVRow_MSA, 0, 3, 15) > #endif >+#ifdef HAS_RAWTOUVROW_MMI >+ANY12S(RAWToUVRow_Any_MMI, RAWToUVRow_MMI, 0, 3, 15) >+#endif > #ifdef HAS_RGB565TOUVROW_NEON > ANY12S(RGB565ToUVRow_Any_NEON, RGB565ToUVRow_NEON, 0, 2, 15) > #endif > #ifdef HAS_RGB565TOUVROW_MSA > ANY12S(RGB565ToUVRow_Any_MSA, RGB565ToUVRow_MSA, 0, 2, 15) > #endif >+#ifdef HAS_RGB565TOUVROW_MMI >+ANY12S(RGB565ToUVRow_Any_MMI, RGB565ToUVRow_MMI, 0, 2, 15) >+#endif > #ifdef HAS_ARGB1555TOUVROW_NEON > ANY12S(ARGB1555ToUVRow_Any_NEON, ARGB1555ToUVRow_NEON, 0, 2, 15) > #endif > #ifdef HAS_ARGB1555TOUVROW_MSA > ANY12S(ARGB1555ToUVRow_Any_MSA, ARGB1555ToUVRow_MSA, 0, 2, 15) > #endif >+#ifdef HAS_ARGB1555TOUVROW_MMI >+ANY12S(ARGB1555ToUVRow_Any_MMI, ARGB1555ToUVRow_MMI, 0, 2, 15) >+#endif > #ifdef HAS_ARGB4444TOUVROW_NEON > ANY12S(ARGB4444ToUVRow_Any_NEON, ARGB4444ToUVRow_NEON, 0, 2, 15) > #endif >+#ifdef HAS_ARGB4444TOUVROW_MMI >+ANY12S(ARGB4444ToUVRow_Any_MMI, ARGB4444ToUVRow_MMI, 0, 2, 15) >+#endif > #ifdef HAS_YUY2TOUVROW_NEON > ANY12S(YUY2ToUVRow_Any_NEON, YUY2ToUVRow_NEON, 1, 4, 15) > #endif >@@ -1178,9 +1370,15 @@ ANY12S(UYVYToUVRow_Any_NEON, UYVYToUVRow_NEON, 1, 4, 15) > #ifdef HAS_YUY2TOUVROW_MSA > ANY12S(YUY2ToUVRow_Any_MSA, YUY2ToUVRow_MSA, 1, 4, 31) > #endif >+#ifdef HAS_YUY2TOUVROW_MMI >+ANY12S(YUY2ToUVRow_Any_MMI, YUY2ToUVRow_MMI, 1, 4, 15) >+#endif > #ifdef HAS_UYVYTOUVROW_MSA > ANY12S(UYVYToUVRow_Any_MSA, UYVYToUVRow_MSA, 1, 4, 31) > #endif >+#ifdef HAS_UYVYTOUVROW_MMI >+ANY12S(UYVYToUVRow_Any_MMI, UYVYToUVRow_MMI, 1, 4, 15) >+#endif > #undef ANY12S > > #ifdef __cplusplus >diff --git a/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/source/row_common.cc b/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/source/row_common.cc >index da97821f796440dd12beb6e05f4ff626842d2bd6..2bbc5adbf145f36231b203bf7f1cbea4468b9965 100644 >--- a/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/source/row_common.cc >+++ b/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/source/row_common.cc >@@ -181,7 +181,7 @@ void ARGB4444ToARGBRow_C(const uint8_t* src_argb4444, > void AR30ToARGBRow_C(const uint8_t* src_ar30, uint8_t* dst_argb, int width) { > int x; > for (x = 0; x < width; ++x) { >- uint32_t ar30 = *(uint32_t*)src_ar30; >+ uint32_t ar30 = *(const uint32_t*)src_ar30; > uint32_t b = (ar30 >> 2) & 0xff; > uint32_t g = (ar30 >> 12) & 0xff; > uint32_t r = (ar30 >> 22) & 0xff; >@@ -195,7 +195,7 @@ void AR30ToARGBRow_C(const uint8_t* src_ar30, uint8_t* dst_argb, int width) { > void AR30ToABGRRow_C(const uint8_t* src_ar30, uint8_t* dst_abgr, int width) { > int x; > for (x = 0; x < width; ++x) { >- uint32_t ar30 = *(uint32_t*)src_ar30; >+ uint32_t ar30 = *(const uint32_t*)src_ar30; > uint32_t b = (ar30 >> 2) & 0xff; > uint32_t g = (ar30 >> 12) & 0xff; > uint32_t r = (ar30 >> 22) & 0xff; >@@ -209,7 +209,7 @@ void AR30ToABGRRow_C(const uint8_t* src_ar30, uint8_t* dst_abgr, int width) { > void AR30ToAB30Row_C(const uint8_t* src_ar30, uint8_t* dst_ab30, int width) { > int x; > for (x = 0; x < width; ++x) { >- uint32_t ar30 = *(uint32_t*)src_ar30; >+ uint32_t ar30 = *(const uint32_t*)src_ar30; > uint32_t b = ar30 & 0x3ff; > uint32_t ga = ar30 & 0xc00ffc00; > uint32_t r = (ar30 >> 20) & 0x3ff; >@@ -2762,6 +2762,13 @@ void ARGBPolynomialRow_C(const uint8_t* src_argb, > // simply extract the low bits of the exponent and the high > // bits of the mantissa from our float and we're done. > >+// Work around GCC 7 punning warning -Wstrict-aliasing >+#if defined(__GNUC__) >+typedef uint32_t __attribute__((__may_alias__)) uint32_alias_t; >+#else >+typedef uint32_t uint32_alias_t; >+#endif >+ > void HalfFloatRow_C(const uint16_t* src, > uint16_t* dst, > float scale, >@@ -2770,7 +2777,7 @@ void HalfFloatRow_C(const uint16_t* src, > float mult = 1.9259299444e-34f * scale; > for (i = 0; i < width; ++i) { > float value = src[i] * mult; >- dst[i] = (uint16_t)((*(uint32_t*)&value) >> 13); >+ dst[i] = (uint16_t)((*(const uint32_alias_t*)&value) >> 13); > } > } > >@@ -2953,6 +2960,94 @@ void NV12ToRGB565Row_SSSE3(const uint8_t* src_y, > } > #endif > >+#if defined(HAS_NV12TORGB24ROW_SSSE3) >+void NV12ToRGB24Row_SSSE3(const uint8_t* src_y, >+ const uint8_t* src_uv, >+ uint8_t* dst_rgb24, >+ const struct YuvConstants* yuvconstants, >+ int width) { >+ // Row buffer for intermediate ARGB pixels. >+ SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]); >+ while (width > 0) { >+ int twidth = width > MAXTWIDTH ? MAXTWIDTH : width; >+ NV12ToARGBRow_SSSE3(src_y, src_uv, row, yuvconstants, twidth); >+ ARGBToRGB24Row_SSSE3(row, dst_rgb24, twidth); >+ src_y += twidth; >+ src_uv += twidth; >+ dst_rgb24 += twidth * 3; >+ width -= twidth; >+ } >+} >+#endif >+ >+#if defined(HAS_NV21TORGB24ROW_SSSE3) >+void NV21ToRGB24Row_SSSE3(const uint8_t* src_y, >+ const uint8_t* src_vu, >+ uint8_t* dst_rgb24, >+ const struct YuvConstants* yuvconstants, >+ int width) { >+ // Row buffer for intermediate ARGB pixels. >+ SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]); >+ while (width > 0) { >+ int twidth = width > MAXTWIDTH ? MAXTWIDTH : width; >+ NV21ToARGBRow_SSSE3(src_y, src_vu, row, yuvconstants, twidth); >+ ARGBToRGB24Row_SSSE3(row, dst_rgb24, twidth); >+ src_y += twidth; >+ src_vu += twidth; >+ dst_rgb24 += twidth * 3; >+ width -= twidth; >+ } >+} >+#endif >+ >+#if defined(HAS_NV12TORGB24ROW_AVX2) >+void NV12ToRGB24Row_AVX2(const uint8_t* src_y, >+ const uint8_t* src_uv, >+ uint8_t* dst_rgb24, >+ const struct YuvConstants* yuvconstants, >+ int width) { >+ // Row buffer for intermediate ARGB pixels. >+ SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]); >+ while (width > 0) { >+ int twidth = width > MAXTWIDTH ? MAXTWIDTH : width; >+ NV12ToARGBRow_AVX2(src_y, src_uv, row, yuvconstants, twidth); >+#if defined(HAS_ARGBTORGB24ROW_AVX2) >+ ARGBToRGB24Row_AVX2(row, dst_rgb24, twidth); >+#else >+ ARGBToRGB24Row_SSSE3(row, dst_rgb24, twidth); >+#endif >+ src_y += twidth; >+ src_uv += twidth; >+ dst_rgb24 += twidth * 3; >+ width -= twidth; >+ } >+} >+#endif >+ >+#if defined(HAS_NV21TORGB24ROW_AVX2) >+void NV21ToRGB24Row_AVX2(const uint8_t* src_y, >+ const uint8_t* src_vu, >+ uint8_t* dst_rgb24, >+ const struct YuvConstants* yuvconstants, >+ int width) { >+ // Row buffer for intermediate ARGB pixels. >+ SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]); >+ while (width > 0) { >+ int twidth = width > MAXTWIDTH ? MAXTWIDTH : width; >+ NV21ToARGBRow_AVX2(src_y, src_vu, row, yuvconstants, twidth); >+#if defined(HAS_ARGBTORGB24ROW_AVX2) >+ ARGBToRGB24Row_AVX2(row, dst_rgb24, twidth); >+#else >+ ARGBToRGB24Row_SSSE3(row, dst_rgb24, twidth); >+#endif >+ src_y += twidth; >+ src_vu += twidth; >+ dst_rgb24 += twidth * 3; >+ width -= twidth; >+ } >+} >+#endif >+ > #if defined(HAS_I422TORGB565ROW_AVX2) > void I422ToRGB565Row_AVX2(const uint8_t* src_y, > const uint8_t* src_u, >@@ -3042,8 +3137,11 @@ void I422ToRGB24Row_AVX2(const uint8_t* src_y, > while (width > 0) { > int twidth = width > MAXTWIDTH ? MAXTWIDTH : width; > I422ToARGBRow_AVX2(src_y, src_u, src_v, row, yuvconstants, twidth); >- // TODO(fbarchard): ARGBToRGB24Row_AVX2 >+#if defined(HAS_ARGBTORGB24ROW_AVX2) >+ ARGBToRGB24Row_AVX2(row, dst_rgb24, twidth); >+#else > ARGBToRGB24Row_SSSE3(row, dst_rgb24, twidth); >+#endif > src_y += twidth; > src_u += twidth / 2; > src_v += twidth / 2; >diff --git a/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/source/row_gcc.cc b/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/source/row_gcc.cc >index 95845c2592f7c84307c37b00bf18274ea83b8cc9..8d3cb81cec2e25e2addc7f6e52f351bb616a4a6b 100644 >--- a/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/source/row_gcc.cc >+++ b/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/source/row_gcc.cc >@@ -505,6 +505,149 @@ void ARGBToRAWRow_SSSE3(const uint8_t* src, uint8_t* dst, int width) { > : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"); > } > >+#ifdef HAS_ARGBTORGB24ROW_AVX2 >+// vpermd for 12+12 to 24 >+static const lvec32 kPermdRGB24_AVX = {0, 1, 2, 4, 5, 6, 3, 7}; >+ >+void ARGBToRGB24Row_AVX2(const uint8_t* src, uint8_t* dst, int width) { >+ asm volatile( >+ "vbroadcastf128 %3,%%ymm6 \n" >+ "vmovdqa %4,%%ymm7 \n" >+ >+ LABELALIGN >+ "1: \n" >+ "vmovdqu (%0),%%ymm0 \n" >+ "vmovdqu 0x20(%0),%%ymm1 \n" >+ "vmovdqu 0x40(%0),%%ymm2 \n" >+ "vmovdqu 0x60(%0),%%ymm3 \n" >+ "lea 0x80(%0),%0 \n" >+ "vpshufb %%ymm6,%%ymm0,%%ymm0 \n" // xxx0yyy0 >+ "vpshufb %%ymm6,%%ymm1,%%ymm1 \n" >+ "vpshufb %%ymm6,%%ymm2,%%ymm2 \n" >+ "vpshufb %%ymm6,%%ymm3,%%ymm3 \n" >+ "vpermd %%ymm0,%%ymm7,%%ymm0 \n" // pack to 24 bytes >+ "vpermd %%ymm1,%%ymm7,%%ymm1 \n" >+ "vpermd %%ymm2,%%ymm7,%%ymm2 \n" >+ "vpermd %%ymm3,%%ymm7,%%ymm3 \n" >+ "vpermq $0x3f,%%ymm1,%%ymm4 \n" // combine 24 + 8 >+ "vpor %%ymm4,%%ymm0,%%ymm0 \n" >+ "vmovdqu %%ymm0,(%1) \n" >+ "vpermq $0xf9,%%ymm1,%%ymm1 \n" // combine 16 + 16 >+ "vpermq $0x4f,%%ymm2,%%ymm4 \n" >+ "vpor %%ymm4,%%ymm1,%%ymm1 \n" >+ "vmovdqu %%ymm1,0x20(%1) \n" >+ "vpermq $0xfe,%%ymm2,%%ymm2 \n" // combine 8 + 24 >+ "vpermq $0x93,%%ymm3,%%ymm3 \n" >+ "vpor %%ymm3,%%ymm2,%%ymm2 \n" >+ "vmovdqu %%ymm2,0x40(%1) \n" >+ "lea 0x60(%1),%1 \n" >+ "sub $0x20,%2 \n" >+ "jg 1b \n" >+ "vzeroupper \n" >+ : "+r"(src), // %0 >+ "+r"(dst), // %1 >+ "+r"(width) // %2 >+ : "m"(kShuffleMaskARGBToRGB24), // %3 >+ "m"(kPermdRGB24_AVX) // %4 >+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", >+ "xmm7"); >+} >+#endif >+ >+#ifdef HAS_ARGBTORGB24ROW_AVX512VBMI >+// Shuffle table for converting ARGBToRGB24 >+static const ulvec8 kPermARGBToRGB24_0 = { >+ 0u, 1u, 2u, 4u, 5u, 6u, 8u, 9u, 10u, 12u, 13u, >+ 14u, 16u, 17u, 18u, 20u, 21u, 22u, 24u, 25u, 26u, 28u, >+ 29u, 30u, 32u, 33u, 34u, 36u, 37u, 38u, 40u, 41u}; >+static const ulvec8 kPermARGBToRGB24_1 = { >+ 10u, 12u, 13u, 14u, 16u, 17u, 18u, 20u, 21u, 22u, 24u, >+ 25u, 26u, 28u, 29u, 30u, 32u, 33u, 34u, 36u, 37u, 38u, >+ 40u, 41u, 42u, 44u, 45u, 46u, 48u, 49u, 50u, 52u}; >+static const ulvec8 kPermARGBToRGB24_2 = { >+ 21u, 22u, 24u, 25u, 26u, 28u, 29u, 30u, 32u, 33u, 34u, >+ 36u, 37u, 38u, 40u, 41u, 42u, 44u, 45u, 46u, 48u, 49u, >+ 50u, 52u, 53u, 54u, 56u, 57u, 58u, 60u, 61u, 62u}; >+ >+void ARGBToRGB24Row_AVX512VBMI(const uint8_t* src, uint8_t* dst, int width) { >+ asm volatile( >+ "vmovdqa %3,%%ymm5 \n" >+ "vmovdqa %4,%%ymm6 \n" >+ "vmovdqa %5,%%ymm7 \n" >+ >+ LABELALIGN >+ "1: \n" >+ "vmovdqu (%0),%%ymm0 \n" >+ "vmovdqu 0x20(%0),%%ymm1 \n" >+ "vmovdqu 0x40(%0),%%ymm2 \n" >+ "vmovdqu 0x60(%0),%%ymm3 \n" >+ "lea 0x80(%0),%0 \n" >+ "vpermt2b %%ymm1,%%ymm5,%%ymm0 \n" >+ "vpermt2b %%ymm2,%%ymm6,%%ymm1 \n" >+ "vpermt2b %%ymm3,%%ymm7,%%ymm2 \n" >+ "vmovdqu %%ymm0,(%1) \n" >+ "vmovdqu %%ymm1,0x20(%1) \n" >+ "vmovdqu %%ymm2,0x40(%1) \n" >+ "lea 0x60(%1),%1 \n" >+ "sub $0x20,%2 \n" >+ "jg 1b \n" >+ "vzeroupper \n" >+ : "+r"(src), // %0 >+ "+r"(dst), // %1 >+ "+r"(width) // %2 >+ : "m"(kPermARGBToRGB24_0), // %3 >+ "m"(kPermARGBToRGB24_1), // %4 >+ "m"(kPermARGBToRGB24_2) // %5 >+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm5", "xmm6", "xmm7"); >+} >+#endif >+ >+#ifdef HAS_ARGBTORAWROW_AVX2 >+void ARGBToRAWRow_AVX2(const uint8_t* src, uint8_t* dst, int width) { >+ asm volatile( >+ "vbroadcastf128 %3,%%ymm6 \n" >+ "vmovdqa %4,%%ymm7 \n" >+ >+ LABELALIGN >+ "1: \n" >+ "vmovdqu (%0),%%ymm0 \n" >+ "vmovdqu 0x20(%0),%%ymm1 \n" >+ "vmovdqu 0x40(%0),%%ymm2 \n" >+ "vmovdqu 0x60(%0),%%ymm3 \n" >+ "lea 0x80(%0),%0 \n" >+ "vpshufb %%ymm6,%%ymm0,%%ymm0 \n" // xxx0yyy0 >+ "vpshufb %%ymm6,%%ymm1,%%ymm1 \n" >+ "vpshufb %%ymm6,%%ymm2,%%ymm2 \n" >+ "vpshufb %%ymm6,%%ymm3,%%ymm3 \n" >+ "vpermd %%ymm0,%%ymm7,%%ymm0 \n" // pack to 24 bytes >+ "vpermd %%ymm1,%%ymm7,%%ymm1 \n" >+ "vpermd %%ymm2,%%ymm7,%%ymm2 \n" >+ "vpermd %%ymm3,%%ymm7,%%ymm3 \n" >+ "vpermq $0x3f,%%ymm1,%%ymm4 \n" // combine 24 + 8 >+ "vpor %%ymm4,%%ymm0,%%ymm0 \n" >+ "vmovdqu %%ymm0,(%1) \n" >+ "vpermq $0xf9,%%ymm1,%%ymm1 \n" // combine 16 + 16 >+ "vpermq $0x4f,%%ymm2,%%ymm4 \n" >+ "vpor %%ymm4,%%ymm1,%%ymm1 \n" >+ "vmovdqu %%ymm1,0x20(%1) \n" >+ "vpermq $0xfe,%%ymm2,%%ymm2 \n" // combine 8 + 24 >+ "vpermq $0x93,%%ymm3,%%ymm3 \n" >+ "vpor %%ymm3,%%ymm2,%%ymm2 \n" >+ "vmovdqu %%ymm2,0x40(%1) \n" >+ "lea 0x60(%1),%1 \n" >+ "sub $0x20,%2 \n" >+ "jg 1b \n" >+ "vzeroupper \n" >+ : "+r"(src), // %0 >+ "+r"(dst), // %1 >+ "+r"(width) // %2 >+ : "m"(kShuffleMaskARGBToRAW), // %3 >+ "m"(kPermdRGB24_AVX) // %4 >+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", >+ "xmm7"); >+} >+#endif >+ > void ARGBToRGB565Row_SSE2(const uint8_t* src, uint8_t* dst, int width) { > asm volatile( > "pcmpeqb %%xmm3,%%xmm3 \n" >diff --git a/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/source/row_mmi.cc b/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/source/row_mmi.cc >new file mode 100644 >index 0000000000000000000000000000000000000000..d8726d09374d90457ea9de67f28dbc9c9db262f4 >--- /dev/null >+++ b/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/source/row_mmi.cc >@@ -0,0 +1,6042 @@ >+/* >+ * Copyright 2011 The LibYuv Project Authors. All rights reserved. >+ * >+ * Use of this source code is governed by a BSD-style license >+ * that can be found in the LICENSE file in the root of the source >+ * tree. An additional intellectual property rights grant can be found >+ * in the file PATENTS. All contributing project authors may >+ * be found in the AUTHORS file in the root of the source tree. >+ */ >+#include "libyuv/row.h" >+ >+#include <string.h> // For memcpy and memset. >+ >+#include "libyuv/basic_types.h" >+ >+#ifdef __cplusplus >+namespace libyuv { >+extern "C" { >+#endif >+ >+// This module is for Mips MMI. >+#if !defined(LIBYUV_DISABLE_MMI) && defined(_MIPS_ARCH_LOONGSON3A) >+ >+void RGB24ToARGBRow_MMI(const uint8_t* src_rgb24, >+ uint8_t* dst_argb, >+ int width) { >+ uint64_t src0, src1, dest; >+ const uint64_t mask = 0xff000000ULL; >+ >+ __asm__ volatile( >+ "1: \n\t" >+ "gslwlc1 %[src0], 0x03(%[src_ptr]) \n\t" >+ "gslwrc1 %[src0], 0x00(%[src_ptr]) \n\t" >+ "gslwlc1 %[src1], 0x06(%[src_ptr]) \n\t" >+ "gslwrc1 %[src1], 0x03(%[src_ptr]) \n\t" >+ >+ "or %[src0], %[src0], %[mask] \n\t" >+ "or %[src1], %[src1], %[mask] \n\t" >+ "punpcklwd %[dest], %[src0], %[src1] \n\t" >+ "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t" >+ "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t" >+ >+ "gslwlc1 %[src0], 0x09(%[src_ptr]) \n\t" >+ "gslwrc1 %[src0], 0x06(%[src_ptr]) \n\t" >+ "gslwlc1 %[src1], 0x0c(%[src_ptr]) \n\t" >+ "gslwrc1 %[src1], 0x09(%[src_ptr]) \n\t" >+ >+ "or %[src0], %[src0], %[mask] \n\t" >+ "or %[src1], %[src1], %[mask] \n\t" >+ "punpcklwd %[dest], %[src0], %[src1] \n\t" >+ "gssdlc1 %[dest], 0x0f(%[dst_ptr]) \n\t" >+ "gssdrc1 %[dest], 0x08(%[dst_ptr]) \n\t" >+ >+ "daddiu %[src_ptr], %[src_ptr], 0x0c \n\t" >+ "daddiu %[dst_ptr], %[dst_ptr], 0x10 \n\t" >+ "daddi %[width], %[width], -0x04 \n\t" >+ "bnez %[width], 1b \n\t" >+ : [src0] "=&f"(src0), [src1] "=&f"(src1), [dest] "=&f"(dest) >+ : [src_ptr] "r"(src_rgb24), [dst_ptr] "r"(dst_argb), [width] "r"(width), >+ [mask] "f"(mask) >+ : "memory"); >+} >+ >+void RAWToARGBRow_MMI(const uint8_t* src_raw, uint8_t* dst_argb, int width) { >+ uint64_t src0, src1, dest; >+ const uint64_t mask0 = 0x0; >+ const uint64_t mask1 = 0xff000000ULL; >+ const uint64_t mask2 = 0xc6; >+ >+ __asm__ volatile( >+ "1: \n\t" >+ "gslwlc1 %[src0], 0x03(%[src_ptr]) \n\t" >+ "gslwrc1 %[src0], 0x00(%[src_ptr]) \n\t" >+ "gslwlc1 %[src1], 0x06(%[src_ptr]) \n\t" >+ "gslwrc1 %[src1], 0x03(%[src_ptr]) \n\t" >+ >+ "or %[src0], %[src0], %[mask1] \n\t" >+ "punpcklbh %[src0], %[src0], %[mask0] \n\t" >+ "pshufh %[src0], %[src0], %[mask2] \n\t" >+ "or %[src1], %[src1], %[mask1] \n\t" >+ "punpcklbh %[src1], %[src1], %[mask0] \n\t" >+ "pshufh %[src1], %[src1], %[mask2] \n\t" >+ "packushb %[dest], %[src0], %[src1] \n\t" >+ "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t" >+ "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t" >+ >+ "gslwlc1 %[src0], 0x09(%[src_ptr]) \n\t" >+ "gslwrc1 %[src0], 0x06(%[src_ptr]) \n\t" >+ "gslwlc1 %[src1], 0x0c(%[src_ptr]) \n\t" >+ "gslwrc1 %[src1], 0x09(%[src_ptr]) \n\t" >+ >+ "or %[src0], %[src0], %[mask1] \n\t" >+ "punpcklbh %[src0], %[src0], %[mask0] \n\t" >+ "pshufh %[src0], %[src0], %[mask2] \n\t" >+ "or %[src1], %[src1], %[mask1] \n\t" >+ "punpcklbh %[src1], %[src1], %[mask0] \n\t" >+ "pshufh %[src1], %[src1], %[mask2] \n\t" >+ "packushb %[dest], %[src0], %[src1] \n\t" >+ "gssdlc1 %[dest], 0x0f(%[dst_ptr]) \n\t" >+ "gssdrc1 %[dest], 0x08(%[dst_ptr]) \n\t" >+ >+ "daddiu %[src_ptr], %[src_ptr], 0x0c \n\t" >+ "daddiu %[dst_ptr], %[dst_ptr], 0x10 \n\t" >+ "daddi %[width], %[width], -0x04 \n\t" >+ "bnez %[width], 1b \n\t" >+ : [src0] "=&f"(src0), [src1] "=&f"(src1), [dest] "=&f"(dest) >+ : [src_ptr] "r"(src_raw), [dst_ptr] "r"(dst_argb), [mask0] "f"(mask0), >+ [mask1] "f"(mask1), [mask2] "f"(mask2), [width] "r"(width) >+ : "memory"); >+} >+ >+void RAWToRGB24Row_MMI(const uint8_t* src_raw, uint8_t* dst_rgb24, int width) { >+ uint64_t src0, src1; >+ uint64_t ftmp[4]; >+ uint64_t mask0 = 0xc6; >+ uint64_t mask1 = 0x6c; >+ >+ __asm__ volatile( >+ "1: \n\t" >+ "gsldrc1 %[src0], 0x00(%[src_raw]) \n\t" >+ "gsldlc1 %[src0], 0x07(%[src_raw]) \n\t" >+ "gslwrc1 %[src1], 0x08(%[src_raw]) \n\t" >+ "gslwlc1 %[src1], 0x0b(%[src_raw]) \n\t" >+ >+ "punpcklbh %[ftmp0], %[src0], %[zero] \n\t" >+ "pshufh %[ftmp0], %[ftmp0], %[mask0] \n\t" >+ "punpckhbh %[ftmp1], %[src0], %[zero] \n\t" >+ "punpcklbh %[src1], %[src1], %[zero] \n\t" >+ "pextrh %[ftmp2], %[ftmp0], %[three] \n\t" >+ "pextrh %[ftmp3], %[ftmp1], %[one] \n\t" >+ "pinsrh_3 %[ftmp0], %[ftmp0], %[ftmp3] \n\t" >+ "pextrh %[ftmp3], %[ftmp1], %[two] \n\t" >+ "pinsrh_1 %[ftmp1], %[ftmp1], %[ftmp2] \n\t" >+ "pshufh %[src1], %[src1], %[mask1] \n\t" >+ "pextrh %[ftmp2], %[src1], %[zero] \n\t" >+ "pinsrh_2 %[ftmp1], %[ftmp1], %[ftmp2] \n\t" >+ "pinsrh_0 %[src1], %[src1], %[ftmp3] \n\t" >+ "packushb %[ftmp0], %[ftmp0], %[ftmp1] \n\t" >+ "packushb %[src1], %[src1], %[zero] \n\t" >+ >+ "gssdrc1 %[ftmp0], 0x00(%[dst_rgb24]) \n\t" >+ "gssdlc1 %[ftmp0], 0x07(%[dst_rgb24]) \n\t" >+ "gsswrc1 %[src1], 0x08(%[dst_rgb24]) \n\t" >+ "gsswlc1 %[src1], 0x0b(%[dst_rgb24]) \n\t" >+ >+ "daddiu %[src_raw], %[src_raw], 0x0c \n\t" >+ "daddiu %[dst_rgb24], %[dst_rgb24], 0x0c \n\t" >+ "daddiu %[width], %[width], -0x04 \n\t" >+ "bgtz %[width], 1b \n\t" >+ : [src0] "=&f"(src0), [src1] "=&f"(src1), [ftmp0] "=&f"(ftmp[0]), >+ [ftmp1] "=&f"(ftmp[1]), [ftmp2] "=&f"(ftmp[2]), [ftmp3] "=&f"(ftmp[3]) >+ : [src_raw] "r"(src_raw), [dst_rgb24] "r"(dst_rgb24), [width] "r"(width), >+ [mask0] "f"(mask0), [mask1] "f"(mask1), [zero] "f"(0x00), >+ [one] "f"(0x01), [two] "f"(0x02), [three] "f"(0x03) >+ : "memory"); >+} >+ >+void RGB565ToARGBRow_MMI(const uint8_t* src_rgb565, >+ uint8_t* dst_argb, >+ int width) { >+ uint64_t ftmp[5]; >+ uint64_t c0 = 0x001f001f001f001f; >+ uint64_t c1 = 0x00ff00ff00ff00ff; >+ uint64_t c2 = 0x0007000700070007; >+ __asm__ volatile( >+ "1: \n\t" >+ "gsldrc1 %[src0], 0x00(%[src_rgb565]) \n\t" >+ "gsldlc1 %[src0], 0x07(%[src_rgb565]) \n\t" >+ "psrlh %[src1], %[src0], %[eight] \n\t" >+ "and %[b], %[src0], %[c0] \n\t" >+ "and %[src0], %[src0], %[c1] \n\t" >+ "psrlh %[src0], %[src0], %[five] \n\t" >+ "and %[g], %[src1], %[c2] \n\t" >+ "psllh %[g], %[g], %[three] \n\t" >+ "or %[g], %[src0], %[g] \n\t" >+ "psrlh %[r], %[src1], %[three] \n\t" >+ "psllh %[src0], %[b], %[three] \n\t" >+ "psrlh %[src1], %[b], %[two] \n\t" >+ "or %[b], %[src0], %[src1] \n\t" >+ "psllh %[src0], %[g], %[two] \n\t" >+ "psrlh %[src1], %[g], %[four] \n\t" >+ "or %[g], %[src0], %[src1] \n\t" >+ "psllh %[src0], %[r], %[three] \n\t" >+ "psrlh %[src1], %[r], %[two] \n\t" >+ "or %[r], %[src0], %[src1] \n\t" >+ "packushb %[b], %[b], %[r] \n\t" >+ "packushb %[g], %[g], %[c1] \n\t" >+ "punpcklbh %[src0], %[b], %[g] \n\t" >+ "punpckhbh %[src1], %[b], %[g] \n\t" >+ "punpcklhw %[r], %[src0], %[src1] \n\t" >+ "gssdrc1 %[r], 0x00(%[dst_argb]) \n\t" >+ "gssdlc1 %[r], 0x07(%[dst_argb]) \n\t" >+ "punpckhhw %[r], %[src0], %[src1] \n\t" >+ "gssdrc1 %[r], 0x08(%[dst_argb]) \n\t" >+ "gssdlc1 %[r], 0x0f(%[dst_argb]) \n\t" >+ "daddiu %[src_rgb565], %[src_rgb565], 0x08 \n\t" >+ "daddiu %[dst_argb], %[dst_argb], 0x10 \n\t" >+ "daddiu %[width], %[width], -0x04 \n\t" >+ "bgtz %[width], 1b \n\t" >+ : [src0] "=&f"(ftmp[0]), [src1] "=&f"(ftmp[1]), [b] "=&f"(ftmp[2]), >+ [g] "=&f"(ftmp[3]), [r] "=&f"(ftmp[4]) >+ : [src_rgb565] "r"(src_rgb565), [dst_argb] "r"(dst_argb), >+ [width] "r"(width), [c0] "f"(c0), [c1] "f"(c1), [c2] "f"(c2), >+ [eight] "f"(0x08), [five] "f"(0x05), [three] "f"(0x03), [two] "f"(0x02), >+ [four] "f"(0x04) >+ : "memory"); >+} >+ >+void ARGB1555ToARGBRow_MMI(const uint8_t* src_argb1555, >+ uint8_t* dst_argb, >+ int width) { >+ uint64_t ftmp[6]; >+ uint64_t c0 = 0x001f001f001f001f; >+ uint64_t c1 = 0x00ff00ff00ff00ff; >+ uint64_t c2 = 0x0003000300030003; >+ uint64_t c3 = 0x007c007c007c007c; >+ uint64_t c4 = 0x0001000100010001; >+ __asm__ volatile( >+ "1: \n\t" >+ "gsldrc1 %[src0], 0x00(%[src_argb1555]) \n\t" >+ "gsldlc1 %[src0], 0x07(%[src_argb1555]) \n\t" >+ "psrlh %[src1], %[src0], %[eight] \n\t" >+ "and %[b], %[src0], %[c0] \n\t" >+ "and %[src0], %[src0], %[c1] \n\t" >+ "psrlh %[src0], %[src0], %[five] \n\t" >+ "and %[g], %[src1], %[c2] \n\t" >+ "psllh %[g], %[g], %[three] \n\t" >+ "or %[g], %[src0], %[g] \n\t" >+ "and %[r], %[src1], %[c3] \n\t" >+ "psrlh %[r], %[r], %[two] \n\t" >+ "psrlh %[a], %[src1], %[seven] \n\t" >+ "psllh %[src0], %[b], %[three] \n\t" >+ "psrlh %[src1], %[b], %[two] \n\t" >+ "or %[b], %[src0], %[src1] \n\t" >+ "psllh %[src0], %[g], %[three] \n\t" >+ "psrlh %[src1], %[g], %[two] \n\t" >+ "or %[g], %[src0], %[src1] \n\t" >+ "psllh %[src0], %[r], %[three] \n\t" >+ "psrlh %[src1], %[r], %[two] \n\t" >+ "or %[r], %[src0], %[src1] \n\t" >+ "xor %[a], %[a], %[c1] \n\t" >+ "paddb %[a], %[a], %[c4] \n\t" >+ "packushb %[b], %[b], %[r] \n\t" >+ "packushb %[g], %[g], %[a] \n\t" >+ "punpcklbh %[src0], %[b], %[g] \n\t" >+ "punpckhbh %[src1], %[b], %[g] \n\t" >+ "punpcklhw %[r], %[src0], %[src1] \n\t" >+ "gssdrc1 %[r], 0x00(%[dst_argb]) \n\t" >+ "gssdlc1 %[r], 0x07(%[dst_argb]) \n\t" >+ "punpckhhw %[r], %[src0], %[src1] \n\t" >+ "gssdrc1 %[r], 0x08(%[dst_argb]) \n\t" >+ "gssdlc1 %[r], 0x0f(%[dst_argb]) \n\t" >+ "daddiu %[src_argb1555], %[src_argb1555], 0x08 \n\t" >+ "daddiu %[dst_argb], %[dst_argb], 0x10 \n\t" >+ "daddiu %[width], %[width], -0x04 \n\t" >+ "bgtz %[width], 1b \n\t" >+ : [src0] "=&f"(ftmp[0]), [src1] "=&f"(ftmp[1]), [b] "=&f"(ftmp[2]), >+ [g] "=&f"(ftmp[3]), [r] "=&f"(ftmp[4]), [a] "=&f"(ftmp[5]) >+ : [src_argb1555] "r"(src_argb1555), [dst_argb] "r"(dst_argb), >+ [width] "r"(width), [c0] "f"(c0), [c1] "f"(c1), [c2] "f"(c2), >+ [c3] "f"(c3), [c4] "f"(c4), [eight] "f"(0x08), [five] "f"(0x05), >+ [three] "f"(0x03), [two] "f"(0x02), [seven] "f"(0x07) >+ : "memory"); >+} >+ >+void ARGB4444ToARGBRow_MMI(const uint8_t* src_argb4444, >+ uint8_t* dst_argb, >+ int width) { >+ uint64_t ftmp[6]; >+ uint64_t c0 = 0x000f000f000f000f; >+ uint64_t c1 = 0x00ff00ff00ff00ff; >+ __asm__ volatile( >+ "1: \n\t" >+ "gsldrc1 %[src0], 0x00(%[src_argb4444]) \n\t" >+ "gsldlc1 %[src0], 0x07(%[src_argb4444]) \n\t" >+ "psrlh %[src1], %[src0], %[eight] \n\t" >+ "and %[b], %[src0], %[c0] \n\t" >+ "and %[src0], %[src0], %[c1] \n\t" >+ "psrlh %[g], %[src0], %[four] \n\t" >+ "and %[r], %[src1], %[c0] \n\t" >+ "psrlh %[a], %[src1], %[four] \n\t" >+ "psllh %[src0], %[b], %[four] \n\t" >+ "or %[b], %[src0], %[b] \n\t" >+ "psllh %[src0], %[g], %[four] \n\t" >+ "or %[g], %[src0], %[g] \n\t" >+ "psllh %[src0], %[r], %[four] \n\t" >+ "or %[r], %[src0], %[r] \n\t" >+ "psllh %[src0], %[a], %[four] \n\t" >+ "or %[a], %[src0], %[a] \n\t" >+ "packushb %[b], %[b], %[r] \n\t" >+ "packushb %[g], %[g], %[a] \n\t" >+ "punpcklbh %[src0], %[b], %[g] \n\t" >+ "punpckhbh %[src1], %[b], %[g] \n\t" >+ "punpcklhw %[r], %[src0], %[src1] \n\t" >+ "gssdrc1 %[r], 0x00(%[dst_argb]) \n\t" >+ "gssdlc1 %[r], 0x07(%[dst_argb]) \n\t" >+ "punpckhhw %[r], %[src0], %[src1] \n\t" >+ "gssdrc1 %[r], 0x08(%[dst_argb]) \n\t" >+ "gssdlc1 %[r], 0x0f(%[dst_argb]) \n\t" >+ "daddiu %[src_argb4444], %[src_argb4444], 0x08 \n\t" >+ "daddiu %[dst_argb], %[dst_argb], 0x10 \n\t" >+ "daddiu %[width], %[width], -0x04 \n\t" >+ "bgtz %[width], 1b \n\t" >+ : [src0] "=&f"(ftmp[0]), [src1] "=&f"(ftmp[1]), [b] "=&f"(ftmp[2]), >+ [g] "=&f"(ftmp[3]), [r] "=&f"(ftmp[4]), [a] "=&f"(ftmp[5]) >+ : [src_argb4444] "r"(src_argb4444), [dst_argb] "r"(dst_argb), >+ [width] "r"(width), [c0] "f"(c0), [c1] "f"(c1), [eight] "f"(0x08), >+ [four] "f"(0x04) >+ : "memory"); >+} >+ >+void ARGBToRGB24Row_MMI(const uint8_t* src_argb, uint8_t* dst_rgb, int width) { >+ uint64_t src; >+ >+ __asm__ volatile( >+ "1: \n\t" >+ "gslwlc1 %[src], 0x03(%[src_ptr]) \n\t" >+ "gslwrc1 %[src], 0x00(%[src_ptr]) \n\t" >+ "gsswlc1 %[src], 0x03(%[dst_ptr]) \n\t" >+ "gsswrc1 %[src], 0x00(%[dst_ptr]) \n\t" >+ >+ "gslwlc1 %[src], 0x07(%[src_ptr]) \n\t" >+ "gslwrc1 %[src], 0x04(%[src_ptr]) \n\t" >+ "gsswlc1 %[src], 0x06(%[dst_ptr]) \n\t" >+ "gsswrc1 %[src], 0x03(%[dst_ptr]) \n\t" >+ >+ "gslwlc1 %[src], 0x0b(%[src_ptr]) \n\t" >+ "gslwrc1 %[src], 0x08(%[src_ptr]) \n\t" >+ "gsswlc1 %[src], 0x09(%[dst_ptr]) \n\t" >+ "gsswrc1 %[src], 0x06(%[dst_ptr]) \n\t" >+ >+ "gslwlc1 %[src], 0x0f(%[src_ptr]) \n\t" >+ "gslwrc1 %[src], 0x0c(%[src_ptr]) \n\t" >+ "gsswlc1 %[src], 0x0c(%[dst_ptr]) \n\t" >+ "gsswrc1 %[src], 0x09(%[dst_ptr]) \n\t" >+ >+ "daddiu %[src_ptr], %[src_ptr], 0x10 \n\t" >+ "daddiu %[dst_ptr], %[dst_ptr], 0x0c \n\t" >+ "daddi %[width], %[width], -0x04 \n\t" >+ "bnez %[width], 1b \n\t" >+ : [src] "=&f"(src) >+ : [src_ptr] "r"(src_argb), [dst_ptr] "r"(dst_rgb), [width] "r"(width) >+ : "memory"); >+} >+ >+void ARGBToRAWRow_MMI(const uint8_t* src_argb, uint8_t* dst_rgb, int width) { >+ uint64_t src0, src1; >+ uint64_t ftmp[3]; >+ uint64_t mask0 = 0xc6; >+ uint64_t mask1 = 0x18; >+ >+ __asm__ volatile( >+ "1: \n\t" >+ "gsldrc1 %[src0], 0x00(%[src_argb]) \n\t" >+ "gsldlc1 %[src0], 0x07(%[src_argb]) \n\t" >+ "gsldrc1 %[src1], 0x08(%[src_argb]) \n\t" >+ "gsldlc1 %[src1], 0x0f(%[src_argb]) \n\t" >+ >+ "punpcklbh %[ftmp0], %[src0], %[zero] \n\t" >+ "pshufh %[ftmp0], %[ftmp0], %[mask0] \n\t" >+ "punpckhbh %[ftmp1], %[src0], %[zero] \n\t" >+ "punpcklbh %[ftmp2], %[src1], %[zero] \n\t" >+ "punpckhbh %[src1], %[src1], %[zero] \n\t" >+ >+ "pextrh %[src0], %[ftmp1], %[two] \n\t" >+ "pinsrh_3 %[ftmp0], %[ftmp0], %[src0] \n\t" >+ "pshufh %[ftmp1], %[ftmp1], %[one] \n\t" >+ >+ "pextrh %[src0], %[ftmp2], %[two] \n\t" >+ "pinsrh_2 %[ftmp1], %[ftmp1], %[src0] \n\t" >+ "pextrh %[src0], %[ftmp2], %[one] \n\t" >+ "pinsrh_3 %[ftmp1], %[ftmp1], %[src0] \n\t" >+ "pextrh %[src0], %[ftmp2], %[zero] \n\t" >+ "pshufh %[src1], %[src1], %[mask1] \n\t" >+ "pinsrh_0 %[src1], %[src1], %[src0] \n\t" >+ "packushb %[ftmp0], %[ftmp0], %[ftmp1] \n\t" >+ "packushb %[src1], %[src1], %[zero] \n\t" >+ >+ "gssdrc1 %[ftmp0], 0x00(%[dst_rgb]) \n\t" >+ "gssdlc1 %[ftmp0], 0x07(%[dst_rgb]) \n\t" >+ "gsswrc1 %[src1], 0x08(%[dst_rgb]) \n\t" >+ "gsswlc1 %[src1], 0x0b(%[dst_rgb]) \n\t" >+ >+ "daddiu %[src_argb], %[src_argb], 0x10 \n\t" >+ "daddiu %[dst_rgb], %[dst_rgb], 0x0c \n\t" >+ "daddiu %[width], %[width], -0x04 \n\t" >+ "bgtz %[width], 1b \n\t" >+ : [src0] "=&f"(src0), [src1] "=&f"(src1), [ftmp0] "=&f"(ftmp[0]), >+ [ftmp1] "=&f"(ftmp[1]), [ftmp2] "=&f"(ftmp[2]) >+ : [src_argb] "r"(src_argb), [dst_rgb] "r"(dst_rgb), [width] "r"(width), >+ [mask0] "f"(mask0), [mask1] "f"(mask1), [zero] "f"(0x00), >+ [one] "f"(0x01), [two] "f"(0x02) >+ : "memory"); >+} >+ >+void ARGBToRGB565Row_MMI(const uint8_t* src_argb, uint8_t* dst_rgb, int width) { >+ uint64_t src0, src1; >+ uint64_t ftmp[3]; >+ >+ __asm__ volatile( >+ "1: \n\t" >+ "gsldrc1 %[src0], 0x00(%[src_argb]) \n\t" >+ "gsldlc1 %[src0], 0x07(%[src_argb]) \n\t" >+ "gsldrc1 %[src1], 0x08(%[src_argb]) \n\t" >+ "gsldlc1 %[src1], 0x0f(%[src_argb]) \n\t" >+ >+ "punpcklbh %[b], %[src0], %[src1] \n\t" >+ "punpckhbh %[g], %[src0], %[src1] \n\t" >+ "punpcklbh %[src0], %[b], %[g] \n\t" >+ "punpckhbh %[src1], %[b], %[g] \n\t" >+ "punpcklbh %[b], %[src0], %[zero] \n\t" >+ "punpckhbh %[g], %[src0], %[zero] \n\t" >+ "punpcklbh %[r], %[src1], %[zero] \n\t" >+ >+ "psrlh %[b], %[b], %[three] \n\t" >+ "psrlh %[g], %[g], %[two] \n\t" >+ "psrlh %[r], %[r], %[three] \n\t" >+ >+ "psllh %[g], %[g], %[five] \n\t" >+ "psllh %[r], %[r], %[eleven] \n\t" >+ "or %[b], %[b], %[g] \n\t" >+ "or %[b], %[b], %[r] \n\t" >+ >+ "gssdrc1 %[b], 0x00(%[dst_rgb]) \n\t" >+ "gssdlc1 %[b], 0x07(%[dst_rgb]) \n\t" >+ >+ "daddiu %[src_argb], %[src_argb], 0x10 \n\t" >+ "daddiu %[dst_rgb], %[dst_rgb], 0x08 \n\t" >+ "daddiu %[width], %[width], -0x04 \n\t" >+ "bgtz %[width], 1b \n\t" >+ : [src0] "=&f"(src0), [src1] "=&f"(src1), [b] "=&f"(ftmp[0]), >+ [g] "=&f"(ftmp[1]), [r] "=&f"(ftmp[2]) >+ : [src_argb] "r"(src_argb), [dst_rgb] "r"(dst_rgb), [width] "r"(width), >+ [zero] "f"(0x00), [two] "f"(0x02), [three] "f"(0x03), [five] "f"(0x05), >+ [eleven] "f"(0x0b) >+ : "memory"); >+} >+ >+// dither4 is a row of 4 values from 4x4 dither matrix. >+// The 4x4 matrix contains values to increase RGB. When converting to >+// fewer bits (565) this provides an ordered dither. >+// The order in the 4x4 matrix in first byte is upper left. >+// The 4 values are passed as an int, then referenced as an array, so >+// endian will not affect order of the original matrix. But the dither4 >+// will containing the first pixel in the lower byte for little endian >+// or the upper byte for big endian. >+void ARGBToRGB565DitherRow_MMI(const uint8_t* src_argb, >+ uint8_t* dst_rgb, >+ const uint32_t dither4, >+ int width) { >+ uint64_t src0, src1; >+ uint64_t ftmp[3]; >+ uint64_t c0 = 0x00ff00ff00ff00ff; >+ >+ __asm__ volatile( >+ "punpcklbh %[dither], %[dither], %[zero] \n\t" >+ "1: \n\t" >+ "gsldrc1 %[src0], 0x00(%[src_argb]) \n\t" >+ "gsldlc1 %[src0], 0x07(%[src_argb]) \n\t" >+ "gsldrc1 %[src1], 0x08(%[src_argb]) \n\t" >+ "gsldlc1 %[src1], 0x0f(%[src_argb]) \n\t" >+ >+ "punpcklbh %[b], %[src0], %[src1] \n\t" >+ "punpckhbh %[g], %[src0], %[src1] \n\t" >+ "punpcklbh %[src0], %[b], %[g] \n\t" >+ "punpckhbh %[src1], %[b], %[g] \n\t" >+ "punpcklbh %[b], %[src0], %[zero] \n\t" >+ "punpckhbh %[g], %[src0], %[zero] \n\t" >+ "punpcklbh %[r], %[src1], %[zero] \n\t" >+ >+ "paddh %[b], %[b], %[dither] \n\t" >+ "paddh %[g], %[g], %[dither] \n\t" >+ "paddh %[r], %[r], %[dither] \n\t" >+ "pcmpgth %[src0], %[b], %[c0] \n\t" >+ "or %[src0], %[src0], %[b] \n\t" >+ "and %[b], %[src0], %[c0] \n\t" >+ "pcmpgth %[src0], %[g], %[c0] \n\t" >+ "or %[src0], %[src0], %[g] \n\t" >+ "and %[g], %[src0], %[c0] \n\t" >+ "pcmpgth %[src0], %[r], %[c0] \n\t" >+ "or %[src0], %[src0], %[r] \n\t" >+ "and %[r], %[src0], %[c0] \n\t" >+ >+ "psrlh %[b], %[b], %[three] \n\t" >+ "psrlh %[g], %[g], %[two] \n\t" >+ "psrlh %[r], %[r], %[three] \n\t" >+ >+ "psllh %[g], %[g], %[five] \n\t" >+ "psllh %[r], %[r], %[eleven] \n\t" >+ "or %[b], %[b], %[g] \n\t" >+ "or %[b], %[b], %[r] \n\t" >+ >+ "gssdrc1 %[b], 0x00(%[dst_rgb]) \n\t" >+ "gssdlc1 %[b], 0x07(%[dst_rgb]) \n\t" >+ >+ "daddiu %[src_argb], %[src_argb], 0x10 \n\t" >+ "daddiu %[dst_rgb], %[dst_rgb], 0x08 \n\t" >+ "daddiu %[width], %[width], -0x04 \n\t" >+ "bgtz %[width], 1b \n\t" >+ : [src0] "=&f"(src0), [src1] "=&f"(src1), [b] "=&f"(ftmp[0]), >+ [g] "=&f"(ftmp[1]), [r] "=&f"(ftmp[2]) >+ : [src_argb] "r"(src_argb), [dst_rgb] "r"(dst_rgb), [width] "r"(width), >+ [dither] "f"(dither4), [c0] "f"(c0), [zero] "f"(0x00), [two] "f"(0x02), >+ [three] "f"(0x03), [five] "f"(0x05), [eleven] "f"(0x0b) >+ : "memory"); >+} >+ >+void ARGBToARGB1555Row_MMI(const uint8_t* src_argb, >+ uint8_t* dst_rgb, >+ int width) { >+ uint64_t src0, src1; >+ uint64_t ftmp[4]; >+ >+ __asm__ volatile( >+ "1: \n\t" >+ "gsldrc1 %[src0], 0x00(%[src_argb]) \n\t" >+ "gsldlc1 %[src0], 0x07(%[src_argb]) \n\t" >+ "gsldrc1 %[src1], 0x08(%[src_argb]) \n\t" >+ "gsldlc1 %[src1], 0x0f(%[src_argb]) \n\t" >+ >+ "punpcklbh %[b], %[src0], %[src1] \n\t" >+ "punpckhbh %[g], %[src0], %[src1] \n\t" >+ "punpcklbh %[src0], %[b], %[g] \n\t" >+ "punpckhbh %[src1], %[b], %[g] \n\t" >+ "punpcklbh %[b], %[src0], %[zero] \n\t" >+ "punpckhbh %[g], %[src0], %[zero] \n\t" >+ "punpcklbh %[r], %[src1], %[zero] \n\t" >+ "punpckhbh %[a], %[src1], %[zero] \n\t" >+ >+ "psrlh %[b], %[b], %[three] \n\t" >+ "psrlh %[g], %[g], %[three] \n\t" >+ "psrlh %[r], %[r], %[three] \n\t" >+ "psrlh %[a], %[a], %[seven] \n\t" >+ >+ "psllh %[g], %[g], %[five] \n\t" >+ "psllh %[r], %[r], %[ten] \n\t" >+ "psllh %[a], %[a], %[fifteen] \n\t" >+ "or %[b], %[b], %[g] \n\t" >+ "or %[b], %[b], %[r] \n\t" >+ "or %[b], %[b], %[a] \n\t" >+ >+ "gssdrc1 %[b], 0x00(%[dst_rgb]) \n\t" >+ "gssdlc1 %[b], 0x07(%[dst_rgb]) \n\t" >+ >+ "daddiu %[src_argb], %[src_argb], 0x10 \n\t" >+ "daddiu %[dst_rgb], %[dst_rgb], 0x08 \n\t" >+ "daddiu %[width], %[width], -0x04 \n\t" >+ "bgtz %[width], 1b \n\t" >+ : [src0] "=&f"(src0), [src1] "=&f"(src1), [b] "=&f"(ftmp[0]), >+ [g] "=&f"(ftmp[1]), [r] "=&f"(ftmp[2]), [a] "=&f"(ftmp[3]) >+ : [src_argb] "r"(src_argb), [dst_rgb] "r"(dst_rgb), [width] "r"(width), >+ [zero] "f"(0x00), [three] "f"(0x03), [five] "f"(0x05), >+ [seven] "f"(0x07), [ten] "f"(0x0a), [fifteen] "f"(0x0f) >+ : "memory"); >+} >+ >+void ARGBToARGB4444Row_MMI(const uint8_t* src_argb, >+ uint8_t* dst_rgb, >+ int width) { >+ uint64_t src0, src1; >+ uint64_t ftmp[4]; >+ >+ __asm__ volatile( >+ "1: \n\t" >+ "gsldrc1 %[src0], 0x00(%[src_argb]) \n\t" >+ "gsldlc1 %[src0], 0x07(%[src_argb]) \n\t" >+ "gsldrc1 %[src1], 0x08(%[src_argb]) \n\t" >+ "gsldlc1 %[src1], 0x0f(%[src_argb]) \n\t" >+ >+ "punpcklbh %[b], %[src0], %[src1] \n\t" >+ "punpckhbh %[g], %[src0], %[src1] \n\t" >+ "punpcklbh %[src0], %[b], %[g] \n\t" >+ "punpckhbh %[src1], %[b], %[g] \n\t" >+ "punpcklbh %[b], %[src0], %[zero] \n\t" >+ "punpckhbh %[g], %[src0], %[zero] \n\t" >+ "punpcklbh %[r], %[src1], %[zero] \n\t" >+ "punpckhbh %[a], %[src1], %[zero] \n\t" >+ >+ "psrlh %[b], %[b], %[four] \n\t" >+ "psrlh %[g], %[g], %[four] \n\t" >+ "psrlh %[r], %[r], %[four] \n\t" >+ "psrlh %[a], %[a], %[four] \n\t" >+ >+ "psllh %[g], %[g], %[four] \n\t" >+ "psllh %[r], %[r], %[eight] \n\t" >+ "psllh %[a], %[a], %[twelve] \n\t" >+ "or %[b], %[b], %[g] \n\t" >+ "or %[b], %[b], %[r] \n\t" >+ "or %[b], %[b], %[a] \n\t" >+ >+ "gssdrc1 %[b], 0x00(%[dst_rgb]) \n\t" >+ "gssdlc1 %[b], 0x07(%[dst_rgb]) \n\t" >+ >+ "daddiu %[src_argb], %[src_argb], 0x10 \n\t" >+ "daddiu %[dst_rgb], %[dst_rgb], 0x08 \n\t" >+ "daddiu %[width], %[width], -0x04 \n\t" >+ "bgtz %[width], 1b \n\t" >+ : [src0] "=&f"(src0), [src1] "=&f"(src1), [b] "=&f"(ftmp[0]), >+ [g] "=&f"(ftmp[1]), [r] "=&f"(ftmp[2]), [a] "=&f"(ftmp[3]) >+ : [src_argb] "r"(src_argb), [dst_rgb] "r"(dst_rgb), [width] "r"(width), >+ [zero] "f"(0x00), [four] "f"(0x04), [eight] "f"(0x08), >+ [twelve] "f"(0x0c) >+ : "memory"); >+} >+ >+void ARGBToYRow_MMI(const uint8_t* src_argb0, uint8_t* dst_y, int width) { >+ uint64_t src, src_hi, src_lo; >+ uint64_t dest0, dest1, dest2, dest3; >+ const uint64_t value = 0x1080; >+ const uint64_t mask = 0x0001004200810019; >+ >+ __asm__ volatile( >+ "1: \n\t" >+ "gsldlc1 %[src], 0x07(%[src_argb0]) \n\t" >+ "gsldrc1 %[src], 0x00(%[src_argb0]) \n\t" >+ "punpcklbh %[src_lo], %[src], %[zero] \n\t" >+ "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t" >+ "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t" >+ "punpckhbh %[src_hi], %[src], %[zero] \n\t" >+ "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t" >+ "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t" >+ "punpcklwd %[src], %[src_lo], %[src_hi] \n\t" >+ "punpckhwd %[dest0], %[src_lo], %[src_hi] \n\t" >+ "paddw %[dest0], %[dest0], %[src] \n\t" >+ "psrlw %[dest0], %[dest0], %[eight] \n\t" >+ >+ "gsldlc1 %[src], 0x0f(%[src_argb0]) \n\t" >+ "gsldrc1 %[src], 0x08(%[src_argb0]) \n\t" >+ "punpcklbh %[src_lo], %[src], %[zero] \n\t" >+ "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t" >+ "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t" >+ "punpckhbh %[src_hi], %[src], %[zero] \n\t" >+ "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t" >+ "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t" >+ "punpcklwd %[src], %[src_lo], %[src_hi] \n\t" >+ "punpckhwd %[dest1], %[src_lo], %[src_hi] \n\t" >+ "paddw %[dest1], %[dest1], %[src] \n\t" >+ "psrlw %[dest1], %[dest1], %[eight] \n\t" >+ >+ "gsldlc1 %[src], 0x17(%[src_argb0]) \n\t" >+ "gsldrc1 %[src], 0x10(%[src_argb0]) \n\t" >+ "punpcklbh %[src_lo], %[src], %[zero] \n\t" >+ "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t" >+ "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t" >+ "punpckhbh %[src_hi], %[src], %[zero] \n\t" >+ "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t" >+ "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t" >+ "punpcklwd %[src], %[src_lo], %[src_hi] \n\t" >+ "punpckhwd %[dest2], %[src_lo], %[src_hi] \n\t" >+ "paddw %[dest2], %[dest2], %[src] \n\t" >+ "psrlw %[dest2], %[dest2], %[eight] \n\t" >+ >+ "gsldlc1 %[src], 0x1f(%[src_argb0]) \n\t" >+ "gsldrc1 %[src], 0x18(%[src_argb0]) \n\t" >+ "punpcklbh %[src_lo], %[src], %[zero] \n\t" >+ "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t" >+ "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t" >+ "punpckhbh %[src_hi], %[src], %[zero] \n\t" >+ "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t" >+ "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t" >+ "punpcklwd %[src], %[src_lo], %[src_hi] \n\t" >+ "punpckhwd %[dest3], %[src_lo], %[src_hi] \n\t" >+ "paddw %[dest3], %[dest3], %[src] \n\t" >+ "psrlw %[dest3], %[dest3], %[eight] \n\t" >+ >+ "packsswh %[src_lo], %[dest0], %[dest1] \n\t" >+ "packsswh %[src_hi], %[dest2], %[dest3] \n\t" >+ "packushb %[dest0], %[src_lo], %[src_hi] \n\t" >+ "gssdlc1 %[dest0], 0x07(%[dst_y]) \n\t" >+ "gssdrc1 %[dest0], 0x00(%[dst_y]) \n\t" >+ >+ "daddiu %[src_argb0], %[src_argb0], 0x20 \n\t" >+ "daddiu %[dst_y], %[dst_y], 0x08 \n\t" >+ "daddi %[width], %[width], -0x08 \n\t" >+ "bnez %[width], 1b \n\t" >+ : [src] "=&f"(src), [src_hi] "=&f"(src_hi), [src_lo] "=&f"(src_lo), >+ [dest0] "=&f"(dest0), [dest1] "=&f"(dest1), [dest2] "=&f"(dest2), >+ [dest3] "=&f"(dest3) >+ : [src_argb0] "r"(src_argb0), [dst_y] "r"(dst_y), [width] "r"(width), >+ [mask] "f"(mask), [value] "f"(value), [eight] "f"(0x08), >+ [zero] "f"(0x00) >+ : "memory"); >+} >+ >+void ARGBToUVRow_MMI(const uint8_t* src_rgb0, >+ int src_stride_rgb, >+ uint8_t* dst_u, >+ uint8_t* dst_v, >+ int width) { >+ uint64_t src_rgb1; >+ uint64_t ftmp[12]; >+ const uint64_t value = 0x4040; >+ const uint64_t mask_u = 0x0026004a00700002; >+ const uint64_t mask_v = 0x00020070005e0012; >+ >+ __asm__ volatile( >+ "1: \n\t" >+ "daddu %[src_rgb1], %[src_rgb0], %[src_stride_rgb] \n\t" >+ "gsldrc1 %[src0], 0x00(%[src_rgb0]) \n\t" >+ "gsldlc1 %[src0], 0x07(%[src_rgb0]) \n\t" >+ "gsldrc1 %[src1], 0x00(%[src_rgb1]) \n\t" >+ "gsldlc1 %[src1], 0x07(%[src_rgb1]) \n\t" >+ "punpcklbh %[src_lo], %[src0], %[zero] \n\t" >+ "punpckhbh %[src_hi], %[src0], %[zero] \n\t" >+ "paddh %[src0], %[src_lo], %[src_hi] \n\t" >+ "punpcklbh %[src_lo], %[src1], %[zero] \n\t" >+ "paddh %[src0], %[src0], %[src_lo] \n\t" >+ "punpckhbh %[src_hi], %[src1], %[zero] \n\t" >+ "paddh %[src0], %[src0], %[src_hi] \n\t" >+ "psrlh %[src0], %[src0], %[two] \n\t" >+ "dsll %[dest0_u], %[src0], %[sixteen] \n\t" >+ "pinsrh_0 %[dest0_u], %[dest0_u], %[value] \n\t" >+ "pinsrh_3 %[dest0_v], %[src0], %[value] \n\t" >+ "pmaddhw %[dest0_u], %[dest0_u], %[mask_u] \n\t" >+ "pmaddhw %[dest0_v], %[dest0_v], %[mask_v] \n\t" >+ >+ "gsldrc1 %[src0], 0x08(%[src_rgb0]) \n\t" >+ "gsldlc1 %[src0], 0x0f(%[src_rgb0]) \n\t" >+ "gsldrc1 %[src1], 0x08(%[src_rgb1]) \n\t" >+ "gsldlc1 %[src1], 0x0f(%[src_rgb1]) \n\t" >+ "punpcklbh %[src_lo], %[src0], %[zero] \n\t" >+ "punpckhbh %[src_hi], %[src0], %[zero] \n\t" >+ "paddh %[src0], %[src_lo], %[src_hi] \n\t" >+ "punpcklbh %[src_lo], %[src1], %[zero] \n\t" >+ "paddh %[src0], %[src0], %[src_lo] \n\t" >+ "punpckhbh %[src_hi], %[src1], %[zero] \n\t" >+ "paddh %[src0], %[src0], %[src_hi] \n\t" >+ "psrlh %[src0], %[src0], %[two] \n\t" >+ "dsll %[src_lo], %[src0], %[sixteen] \n\t" >+ "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t" >+ "pinsrh_3 %[src_hi], %[src0], %[value] \n\t" >+ "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t" >+ "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t" >+ >+ "punpcklwd %[src0], %[dest0_u], %[src_lo] \n\t" >+ "punpckhwd %[src1], %[dest0_u], %[src_lo] \n\t" >+ "psubw %[dest0_u], %[src0], %[src1] \n\t" >+ "psraw %[dest0_u], %[dest0_u], %[eight] \n\t" >+ "punpcklwd %[src0], %[dest0_v], %[src_hi] \n\t" >+ "punpckhwd %[src1], %[dest0_v], %[src_hi] \n\t" >+ "psubw %[dest0_v], %[src1], %[src0] \n\t" >+ "psraw %[dest0_v], %[dest0_v], %[eight] \n\t" >+ >+ "gsldrc1 %[src0], 0x10(%[src_rgb0]) \n\t" >+ "gsldlc1 %[src0], 0x17(%[src_rgb0]) \n\t" >+ "gsldrc1 %[src1], 0x10(%[src_rgb1]) \n\t" >+ "gsldlc1 %[src1], 0x17(%[src_rgb1]) \n\t" >+ "punpcklbh %[src_lo], %[src0], %[zero] \n\t" >+ "punpckhbh %[src_hi], %[src0], %[zero] \n\t" >+ "paddh %[src0], %[src_lo], %[src_hi] \n\t" >+ "punpcklbh %[src_lo], %[src1], %[zero] \n\t" >+ "paddh %[src0], %[src0], %[src_lo] \n\t" >+ "punpckhbh %[src_hi], %[src1], %[zero] \n\t" >+ "paddh %[src0], %[src0], %[src_hi] \n\t" >+ "psrlh %[src0], %[src0], %[two] \n\t" >+ "dsll %[dest1_u], %[src0], %[sixteen] \n\t" >+ "pinsrh_0 %[dest1_u], %[dest1_u], %[value] \n\t" >+ "pinsrh_3 %[dest1_v], %[src0], %[value] \n\t" >+ "pmaddhw %[dest1_u], %[dest1_u], %[mask_u] \n\t" >+ "pmaddhw %[dest1_v], %[dest1_v], %[mask_v] \n\t" >+ >+ "gsldrc1 %[src0], 0x18(%[src_rgb0]) \n\t" >+ "gsldlc1 %[src0], 0x1f(%[src_rgb0]) \n\t" >+ "gsldrc1 %[src1], 0x18(%[src_rgb1]) \n\t" >+ "gsldlc1 %[src1], 0x1f(%[src_rgb1]) \n\t" >+ "punpcklbh %[src_lo], %[src0], %[zero] \n\t" >+ "punpckhbh %[src_hi], %[src0], %[zero] \n\t" >+ "paddh %[src0], %[src_lo], %[src_hi] \n\t" >+ "punpcklbh %[src_lo], %[src1], %[zero] \n\t" >+ "paddh %[src0], %[src0], %[src_lo] \n\t" >+ "punpckhbh %[src_hi], %[src1], %[zero] \n\t" >+ "paddh %[src0], %[src0], %[src_hi] \n\t" >+ "psrlh %[src0], %[src0], %[two] \n\t" >+ "dsll %[src_lo], %[src0], %[sixteen] \n\t" >+ "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t" >+ "pinsrh_3 %[src_hi], %[src0], %[value] \n\t" >+ "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t" >+ "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t" >+ >+ "punpcklwd %[src0], %[dest1_u], %[src_lo] \n\t" >+ "punpckhwd %[src1], %[dest1_u], %[src_lo] \n\t" >+ "psubw %[dest1_u], %[src0], %[src1] \n\t" >+ "psraw %[dest1_u], %[dest1_u], %[eight] \n\t" >+ "punpcklwd %[src0], %[dest1_v], %[src_hi] \n\t" >+ "punpckhwd %[src1], %[dest1_v], %[src_hi] \n\t" >+ "psubw %[dest1_v], %[src1], %[src0] \n\t" >+ "psraw %[dest1_v], %[dest1_v], %[eight] \n\t" >+ >+ "gsldrc1 %[src0], 0x20(%[src_rgb0]) \n\t" >+ "gsldlc1 %[src0], 0x27(%[src_rgb0]) \n\t" >+ "gsldrc1 %[src1], 0x20(%[src_rgb1]) \n\t" >+ "gsldlc1 %[src1], 0x27(%[src_rgb1]) \n\t" >+ "punpcklbh %[src_lo], %[src0], %[zero] \n\t" >+ "punpckhbh %[src_hi], %[src0], %[zero] \n\t" >+ "paddh %[src0], %[src_lo], %[src_hi] \n\t" >+ "punpcklbh %[src_lo], %[src1], %[zero] \n\t" >+ "paddh %[src0], %[src0], %[src_lo] \n\t" >+ "punpckhbh %[src_hi], %[src1], %[zero] \n\t" >+ "paddh %[src0], %[src0], %[src_hi] \n\t" >+ "psrlh %[src0], %[src0], %[two] \n\t" >+ "dsll %[dest2_u], %[src0], %[sixteen] \n\t" >+ "pinsrh_0 %[dest2_u], %[dest2_u], %[value] \n\t" >+ "pinsrh_3 %[dest2_v], %[src0], %[value] \n\t" >+ "pmaddhw %[dest2_u], %[dest2_u], %[mask_u] \n\t" >+ "pmaddhw %[dest2_v], %[dest2_v], %[mask_v] \n\t" >+ >+ "gsldrc1 %[src0], 0x28(%[src_rgb0]) \n\t" >+ "gsldlc1 %[src0], 0x2f(%[src_rgb0]) \n\t" >+ "gsldrc1 %[src1], 0x28(%[src_rgb1]) \n\t" >+ "gsldlc1 %[src1], 0x2f(%[src_rgb1]) \n\t" >+ "punpcklbh %[src_lo], %[src0], %[zero] \n\t" >+ "punpckhbh %[src_hi], %[src0], %[zero] \n\t" >+ "paddh %[src0], %[src_lo], %[src_hi] \n\t" >+ "punpcklbh %[src_lo], %[src1], %[zero] \n\t" >+ "paddh %[src0], %[src0], %[src_lo] \n\t" >+ "punpckhbh %[src_hi], %[src1], %[zero] \n\t" >+ "paddh %[src0], %[src0], %[src_hi] \n\t" >+ "psrlh %[src0], %[src0], %[two] \n\t" >+ "dsll %[src_lo], %[src0], %[sixteen] \n\t" >+ "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t" >+ "pinsrh_3 %[src_hi], %[src0], %[value] \n\t" >+ "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t" >+ "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t" >+ >+ "punpcklwd %[src0], %[dest2_u], %[src_lo] \n\t" >+ "punpckhwd %[src1], %[dest2_u], %[src_lo] \n\t" >+ "psubw %[dest2_u], %[src0], %[src1] \n\t" >+ "psraw %[dest2_u], %[dest2_u], %[eight] \n\t" >+ "punpcklwd %[src0], %[dest2_v], %[src_hi] \n\t" >+ "punpckhwd %[src1], %[dest2_v], %[src_hi] \n\t" >+ "psubw %[dest2_v], %[src1], %[src0] \n\t" >+ "psraw %[dest2_v], %[dest2_v], %[eight] \n\t" >+ >+ "gsldrc1 %[src0], 0x30(%[src_rgb0]) \n\t" >+ "gsldlc1 %[src0], 0x37(%[src_rgb0]) \n\t" >+ "gsldrc1 %[src1], 0x30(%[src_rgb1]) \n\t" >+ "gsldlc1 %[src1], 0x37(%[src_rgb1]) \n\t" >+ "punpcklbh %[src_lo], %[src0], %[zero] \n\t" >+ "punpckhbh %[src_hi], %[src0], %[zero] \n\t" >+ "paddh %[src0], %[src_lo], %[src_hi] \n\t" >+ "punpcklbh %[src_lo], %[src1], %[zero] \n\t" >+ "paddh %[src0], %[src0], %[src_lo] \n\t" >+ "punpckhbh %[src_hi], %[src1], %[zero] \n\t" >+ "paddh %[src0], %[src0], %[src_hi] \n\t" >+ "psrlh %[src0], %[src0], %[two] \n\t" >+ "dsll %[dest3_u], %[src0], %[sixteen] \n\t" >+ "pinsrh_0 %[dest3_u], %[dest3_u], %[value] \n\t" >+ "pinsrh_3 %[dest3_v], %[src0], %[value] \n\t" >+ "pmaddhw %[dest3_u], %[dest3_u], %[mask_u] \n\t" >+ "pmaddhw %[dest3_v], %[dest3_v], %[mask_v] \n\t" >+ >+ "gsldrc1 %[src0], 0x38(%[src_rgb0]) \n\t" >+ "gsldlc1 %[src0], 0x3f(%[src_rgb0]) \n\t" >+ "gsldrc1 %[src1], 0x38(%[src_rgb1]) \n\t" >+ "gsldlc1 %[src1], 0x3f(%[src_rgb1]) \n\t" >+ "punpcklbh %[src_lo], %[src0], %[zero] \n\t" >+ "punpckhbh %[src_hi], %[src0], %[zero] \n\t" >+ "paddh %[src0], %[src_lo], %[src_hi] \n\t" >+ "punpcklbh %[src_lo], %[src1], %[zero] \n\t" >+ "paddh %[src0], %[src0], %[src_lo] \n\t" >+ "punpckhbh %[src_hi], %[src1], %[zero] \n\t" >+ "paddh %[src0], %[src0], %[src_hi] \n\t" >+ "psrlh %[src0], %[src0], %[two] \n\t" >+ "dsll %[src_lo], %[src0], %[sixteen] \n\t" >+ "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t" >+ "pinsrh_3 %[src_hi], %[src0], %[value] \n\t" >+ "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t" >+ "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t" >+ >+ "punpcklwd %[src0], %[dest3_u], %[src_lo] \n\t" >+ "punpckhwd %[src1], %[dest3_u], %[src_lo] \n\t" >+ "psubw %[dest3_u], %[src0], %[src1] \n\t" >+ "psraw %[dest3_u], %[dest3_u], %[eight] \n\t" >+ "punpcklwd %[src0], %[dest3_v], %[src_hi] \n\t" >+ "punpckhwd %[src1], %[dest3_v], %[src_hi] \n\t" >+ "psubw %[dest3_v], %[src1], %[src0] \n\t" >+ "psraw %[dest3_v], %[dest3_v], %[eight] \n\t" >+ >+ "packsswh %[src0], %[dest0_u], %[dest1_u] \n\t" >+ "packsswh %[src1], %[dest2_u], %[dest3_u] \n\t" >+ "packushb %[dest0_u], %[src0], %[src1] \n\t" >+ "gssdlc1 %[dest0_u], 0x07(%[dst_u]) \n\t" >+ "gssdrc1 %[dest0_u], 0x00(%[dst_u]) \n\t" >+ >+ "packsswh %[src0], %[dest0_v], %[dest1_v] \n\t" >+ "packsswh %[src1], %[dest2_v], %[dest3_v] \n\t" >+ "packushb %[dest0_v], %[src0], %[src1] \n\t" >+ "gssdlc1 %[dest0_v], 0x07(%[dst_v]) \n\t" >+ "gssdrc1 %[dest0_v], 0x00(%[dst_v]) \n\t" >+ >+ "daddiu %[src_rgb0], %[src_rgb0], 0x40 \n\t" >+ "daddiu %[dst_u], %[dst_u], 0x08 \n\t" >+ "daddiu %[dst_v], %[dst_v], 0x08 \n\t" >+ "daddi %[width], %[width], -0x10 \n\t" >+ "bgtz %[width], 1b \n\t" >+ : [src_rgb1] "=&r"(src_rgb1), [src0] "=&f"(ftmp[0]), >+ [src1] "=&f"(ftmp[1]), [src_lo] "=&f"(ftmp[2]), [src_hi] "=&f"(ftmp[3]), >+ [dest0_u] "=&f"(ftmp[4]), [dest0_v] "=&f"(ftmp[5]), >+ [dest1_u] "=&f"(ftmp[6]), [dest1_v] "=&f"(ftmp[7]), >+ [dest2_u] "=&f"(ftmp[8]), [dest2_v] "=&f"(ftmp[9]), >+ [dest3_u] "=&f"(ftmp[10]), [dest3_v] "=&f"(ftmp[11]) >+ : [src_rgb0] "r"(src_rgb0), [src_stride_rgb] "r"(src_stride_rgb), >+ [dst_u] "r"(dst_u), [dst_v] "r"(dst_v), [width] "r"(width), >+ [mask_u] "f"(mask_u), [mask_v] "f"(mask_v), [value] "f"(value), >+ [zero] "f"(0x00), [eight] "f"(0x08), [two] "f"(0x02), >+ [sixteen] "f"(0x10) >+ : "memory"); >+} >+ >+void BGRAToYRow_MMI(const uint8_t* src_argb0, uint8_t* dst_y, int width) { >+ uint64_t src, src_hi, src_lo; >+ uint64_t dest0, dest1, dest2, dest3; >+ const uint64_t value = 0x1080; >+ const uint64_t mask = 0x0019008100420001; >+ >+ __asm__ volatile( >+ "1: \n\t" >+ "gsldlc1 %[src], 0x07(%[src_argb0]) \n\t" >+ "gsldrc1 %[src], 0x00(%[src_argb0]) \n\t" >+ "punpcklbh %[src_lo], %[src], %[zero] \n\t" >+ "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t" >+ "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t" >+ "punpckhbh %[src_hi], %[src], %[zero] \n\t" >+ "pinsrh_0 %[src_hi], %[src_hi], %[value] \n\t" >+ "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t" >+ "punpcklwd %[src], %[src_lo], %[src_hi] \n\t" >+ "punpckhwd %[dest0], %[src_lo], %[src_hi] \n\t" >+ "paddw %[dest0], %[dest0], %[src] \n\t" >+ "psrlw %[dest0], %[dest0], %[eight] \n\t" >+ >+ "gsldlc1 %[src], 0x0f(%[src_argb0]) \n\t" >+ "gsldrc1 %[src], 0x08(%[src_argb0]) \n\t" >+ "punpcklbh %[src_lo], %[src], %[zero] \n\t" >+ "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t" >+ "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t" >+ "punpckhbh %[src_hi], %[src], %[zero] \n\t" >+ "pinsrh_0 %[src_hi], %[src_hi], %[value] \n\t" >+ "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t" >+ "punpcklwd %[src], %[src_lo], %[src_hi] \n\t" >+ "punpckhwd %[dest1], %[src_lo], %[src_hi] \n\t" >+ "paddw %[dest1], %[dest1], %[src] \n\t" >+ "psrlw %[dest1], %[dest1], %[eight] \n\t" >+ >+ "gsldlc1 %[src], 0x17(%[src_argb0]) \n\t" >+ "gsldrc1 %[src], 0x10(%[src_argb0]) \n\t" >+ "punpcklbh %[src_lo], %[src], %[zero] \n\t" >+ "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t" >+ "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t" >+ "punpckhbh %[src_hi], %[src], %[zero] \n\t" >+ "pinsrh_0 %[src_hi], %[src_hi], %[value] \n\t" >+ "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t" >+ "punpcklwd %[src], %[src_lo], %[src_hi] \n\t" >+ "punpckhwd %[dest2], %[src_lo], %[src_hi] \n\t" >+ "paddw %[dest2], %[dest2], %[src] \n\t" >+ "psrlw %[dest2], %[dest2], %[eight] \n\t" >+ >+ "gsldlc1 %[src], 0x1f(%[src_argb0]) \n\t" >+ "gsldrc1 %[src], 0x18(%[src_argb0]) \n\t" >+ "punpcklbh %[src_lo], %[src], %[zero] \n\t" >+ "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t" >+ "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t" >+ "punpckhbh %[src_hi], %[src], %[zero] \n\t" >+ "pinsrh_0 %[src_hi], %[src_hi], %[value] \n\t" >+ "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t" >+ "punpcklwd %[src], %[src_lo], %[src_hi] \n\t" >+ "punpckhwd %[dest3], %[src_lo], %[src_hi] \n\t" >+ "paddw %[dest3], %[dest3], %[src] \n\t" >+ "psrlw %[dest3], %[dest3], %[eight] \n\t" >+ >+ "packsswh %[src_lo], %[dest0], %[dest1] \n\t" >+ "packsswh %[src_hi], %[dest2], %[dest3] \n\t" >+ "packushb %[dest0], %[src_lo], %[src_hi] \n\t" >+ "gssdlc1 %[dest0], 0x07(%[dst_y]) \n\t" >+ "gssdrc1 %[dest0], 0x00(%[dst_y]) \n\t" >+ >+ "daddiu %[src_argb0], %[src_argb0], 0x20 \n\t" >+ "daddiu %[dst_y], %[dst_y], 0x08 \n\t" >+ "daddi %[width], %[width], -0x08 \n\t" >+ "bnez %[width], 1b \n\t" >+ : [src] "=&f"(src), [src_hi] "=&f"(src_hi), [src_lo] "=&f"(src_lo), >+ [dest0] "=&f"(dest0), [dest1] "=&f"(dest1), [dest2] "=&f"(dest2), >+ [dest3] "=&f"(dest3) >+ : [src_argb0] "r"(src_argb0), [dst_y] "r"(dst_y), [width] "r"(width), >+ [mask] "f"(mask), [value] "f"(value), [eight] "f"(0x08), >+ [zero] "f"(0x00) >+ : "memory"); >+} >+ >+void BGRAToUVRow_MMI(const uint8_t* src_rgb0, >+ int src_stride_rgb, >+ uint8_t* dst_u, >+ uint8_t* dst_v, >+ int width) { >+ uint64_t src_rgb1; >+ uint64_t ftmp[12]; >+ const uint64_t value = 0x4040; >+ const uint64_t mask_u = 0x00020070004a0026; >+ const uint64_t mask_v = 0x0012005e00700002; >+ >+ __asm__ volatile( >+ "1: \n\t" >+ "daddu %[src_rgb1], %[src_rgb0], %[src_stride_rgb] \n\t" >+ "gsldrc1 %[src0], 0x00(%[src_rgb0]) \n\t" >+ "gsldlc1 %[src0], 0x07(%[src_rgb0]) \n\t" >+ "gsldrc1 %[src1], 0x00(%[src_rgb1]) \n\t" >+ "gsldlc1 %[src1], 0x07(%[src_rgb1]) \n\t" >+ "punpcklbh %[src_lo], %[src0], %[zero] \n\t" >+ "punpckhbh %[src_hi], %[src0], %[zero] \n\t" >+ "paddh %[src0], %[src_lo], %[src_hi] \n\t" >+ "punpcklbh %[src_lo], %[src1], %[zero] \n\t" >+ "paddh %[src0], %[src0], %[src_lo] \n\t" >+ "punpckhbh %[src_hi], %[src1], %[zero] \n\t" >+ "paddh %[src0], %[src0], %[src_hi] \n\t" >+ "psrlh %[src0], %[src0], %[two] \n\t" >+ "dsrl %[dest0_u], %[src0], %[sixteen] \n\t" >+ "pinsrh_3 %[dest0_u], %[dest0_u], %[value] \n\t" >+ "pinsrh_0 %[dest0_v], %[src0], %[value] \n\t" >+ "pmaddhw %[dest0_u], %[dest0_u], %[mask_u] \n\t" >+ "pmaddhw %[dest0_v], %[dest0_v], %[mask_v] \n\t" >+ >+ "gsldrc1 %[src0], 0x08(%[src_rgb0]) \n\t" >+ "gsldlc1 %[src0], 0x0f(%[src_rgb0]) \n\t" >+ "gsldrc1 %[src1], 0x08(%[src_rgb1]) \n\t" >+ "gsldlc1 %[src1], 0x0f(%[src_rgb1]) \n\t" >+ "punpcklbh %[src_lo], %[src0], %[zero] \n\t" >+ "punpckhbh %[src_hi], %[src0], %[zero] \n\t" >+ "paddh %[src0], %[src_lo], %[src_hi] \n\t" >+ "punpcklbh %[src_lo], %[src1], %[zero] \n\t" >+ "paddh %[src0], %[src0], %[src_lo] \n\t" >+ "punpckhbh %[src_hi], %[src1], %[zero] \n\t" >+ "paddh %[src0], %[src0], %[src_hi] \n\t" >+ "psrlh %[src0], %[src0], %[two] \n\t" >+ "dsrl %[src_lo], %[src0], %[sixteen] \n\t" >+ "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t" >+ "pinsrh_0 %[src_hi], %[src0], %[value] \n\t" >+ "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t" >+ "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t" >+ >+ "punpcklwd %[src0], %[dest0_u], %[src_lo] \n\t" >+ "punpckhwd %[src1], %[dest0_u], %[src_lo] \n\t" >+ "psubw %[dest0_u], %[src1], %[src0] \n\t" >+ "psraw %[dest0_u], %[dest0_u], %[eight] \n\t" >+ "punpcklwd %[src0], %[dest0_v], %[src_hi] \n\t" >+ "punpckhwd %[src1], %[dest0_v], %[src_hi] \n\t" >+ "psubw %[dest0_v], %[src0], %[src1] \n\t" >+ "psraw %[dest0_v], %[dest0_v], %[eight] \n\t" >+ >+ "gsldrc1 %[src0], 0x10(%[src_rgb0]) \n\t" >+ "gsldlc1 %[src0], 0x17(%[src_rgb0]) \n\t" >+ "gsldrc1 %[src1], 0x10(%[src_rgb1]) \n\t" >+ "gsldlc1 %[src1], 0x17(%[src_rgb1]) \n\t" >+ "punpcklbh %[src_lo], %[src0], %[zero] \n\t" >+ "punpckhbh %[src_hi], %[src0], %[zero] \n\t" >+ "paddh %[src0], %[src_lo], %[src_hi] \n\t" >+ "punpcklbh %[src_lo], %[src1], %[zero] \n\t" >+ "paddh %[src0], %[src0], %[src_lo] \n\t" >+ "punpckhbh %[src_hi], %[src1], %[zero] \n\t" >+ "paddh %[src0], %[src0], %[src_hi] \n\t" >+ "psrlh %[src0], %[src0], %[two] \n\t" >+ "dsrl %[dest1_u], %[src0], %[sixteen] \n\t" >+ "pinsrh_3 %[dest1_u], %[dest1_u], %[value] \n\t" >+ "pinsrh_0 %[dest1_v], %[src0], %[value] \n\t" >+ "pmaddhw %[dest1_u], %[dest1_u], %[mask_u] \n\t" >+ "pmaddhw %[dest1_v], %[dest1_v], %[mask_v] \n\t" >+ >+ "gsldrc1 %[src0], 0x18(%[src_rgb0]) \n\t" >+ "gsldlc1 %[src0], 0x1f(%[src_rgb0]) \n\t" >+ "gsldrc1 %[src1], 0x18(%[src_rgb1]) \n\t" >+ "gsldlc1 %[src1], 0x1f(%[src_rgb1]) \n\t" >+ "punpcklbh %[src_lo], %[src0], %[zero] \n\t" >+ "punpckhbh %[src_hi], %[src0], %[zero] \n\t" >+ "paddh %[src0], %[src_lo], %[src_hi] \n\t" >+ "punpcklbh %[src_lo], %[src1], %[zero] \n\t" >+ "paddh %[src0], %[src0], %[src_lo] \n\t" >+ "punpckhbh %[src_hi], %[src1], %[zero] \n\t" >+ "paddh %[src0], %[src0], %[src_hi] \n\t" >+ "psrlh %[src0], %[src0], %[two] \n\t" >+ "dsrl %[src_lo], %[src0], %[sixteen] \n\t" >+ "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t" >+ "pinsrh_0 %[src_hi], %[src0], %[value] \n\t" >+ "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t" >+ "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t" >+ >+ "punpcklwd %[src0], %[dest1_u], %[src_lo] \n\t" >+ "punpckhwd %[src1], %[dest1_u], %[src_lo] \n\t" >+ "psubw %[dest1_u], %[src1], %[src0] \n\t" >+ "psraw %[dest1_u], %[dest1_u], %[eight] \n\t" >+ "punpcklwd %[src0], %[dest1_v], %[src_hi] \n\t" >+ "punpckhwd %[src1], %[dest1_v], %[src_hi] \n\t" >+ "psubw %[dest1_v], %[src0], %[src1] \n\t" >+ "psraw %[dest1_v], %[dest1_v], %[eight] \n\t" >+ >+ "gsldrc1 %[src0], 0x20(%[src_rgb0]) \n\t" >+ "gsldlc1 %[src0], 0x27(%[src_rgb0]) \n\t" >+ "gsldrc1 %[src1], 0x20(%[src_rgb1]) \n\t" >+ "gsldlc1 %[src1], 0x27(%[src_rgb1]) \n\t" >+ "punpcklbh %[src_lo], %[src0], %[zero] \n\t" >+ "punpckhbh %[src_hi], %[src0], %[zero] \n\t" >+ "paddh %[src0], %[src_lo], %[src_hi] \n\t" >+ "punpcklbh %[src_lo], %[src1], %[zero] \n\t" >+ "paddh %[src0], %[src0], %[src_lo] \n\t" >+ "punpckhbh %[src_hi], %[src1], %[zero] \n\t" >+ "paddh %[src0], %[src0], %[src_hi] \n\t" >+ "psrlh %[src0], %[src0], %[two] \n\t" >+ "dsrl %[dest2_u], %[src0], %[sixteen] \n\t" >+ "pinsrh_3 %[dest2_u], %[dest2_u], %[value] \n\t" >+ "pinsrh_0 %[dest2_v], %[src0], %[value] \n\t" >+ "pmaddhw %[dest2_u], %[dest2_u], %[mask_u] \n\t" >+ "pmaddhw %[dest2_v], %[dest2_v], %[mask_v] \n\t" >+ >+ "gsldrc1 %[src0], 0x28(%[src_rgb0]) \n\t" >+ "gsldlc1 %[src0], 0x2f(%[src_rgb0]) \n\t" >+ "gsldrc1 %[src1], 0x28(%[src_rgb1]) \n\t" >+ "gsldlc1 %[src1], 0x2f(%[src_rgb1]) \n\t" >+ "punpcklbh %[src_lo], %[src0], %[zero] \n\t" >+ "punpckhbh %[src_hi], %[src0], %[zero] \n\t" >+ "paddh %[src0], %[src_lo], %[src_hi] \n\t" >+ "punpcklbh %[src_lo], %[src1], %[zero] \n\t" >+ "paddh %[src0], %[src0], %[src_lo] \n\t" >+ "punpckhbh %[src_hi], %[src1], %[zero] \n\t" >+ "paddh %[src0], %[src0], %[src_hi] \n\t" >+ "psrlh %[src0], %[src0], %[two] \n\t" >+ "dsrl %[src_lo], %[src0], %[sixteen] \n\t" >+ "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t" >+ "pinsrh_0 %[src_hi], %[src0], %[value] \n\t" >+ "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t" >+ "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t" >+ >+ "punpcklwd %[src0], %[dest2_u], %[src_lo] \n\t" >+ "punpckhwd %[src1], %[dest2_u], %[src_lo] \n\t" >+ "psubw %[dest2_u], %[src1], %[src0] \n\t" >+ "psraw %[dest2_u], %[dest2_u], %[eight] \n\t" >+ "punpcklwd %[src0], %[dest2_v], %[src_hi] \n\t" >+ "punpckhwd %[src1], %[dest2_v], %[src_hi] \n\t" >+ "psubw %[dest2_v], %[src0], %[src1] \n\t" >+ "psraw %[dest2_v], %[dest2_v], %[eight] \n\t" >+ >+ "gsldrc1 %[src0], 0x30(%[src_rgb0]) \n\t" >+ "gsldlc1 %[src0], 0x37(%[src_rgb0]) \n\t" >+ "gsldrc1 %[src1], 0x30(%[src_rgb1]) \n\t" >+ "gsldlc1 %[src1], 0x37(%[src_rgb1]) \n\t" >+ "punpcklbh %[src_lo], %[src0], %[zero] \n\t" >+ "punpckhbh %[src_hi], %[src0], %[zero] \n\t" >+ "paddh %[src0], %[src_lo], %[src_hi] \n\t" >+ "punpcklbh %[src_lo], %[src1], %[zero] \n\t" >+ "paddh %[src0], %[src0], %[src_lo] \n\t" >+ "punpckhbh %[src_hi], %[src1], %[zero] \n\t" >+ "paddh %[src0], %[src0], %[src_hi] \n\t" >+ "psrlh %[src0], %[src0], %[two] \n\t" >+ "dsrl %[dest3_u], %[src0], %[sixteen] \n\t" >+ "pinsrh_3 %[dest3_u], %[dest3_u], %[value] \n\t" >+ "pinsrh_0 %[dest3_v], %[src0], %[value] \n\t" >+ "pmaddhw %[dest3_u], %[dest3_u], %[mask_u] \n\t" >+ "pmaddhw %[dest3_v], %[dest3_v], %[mask_v] \n\t" >+ >+ "gsldrc1 %[src0], 0x38(%[src_rgb0]) \n\t" >+ "gsldlc1 %[src0], 0x3f(%[src_rgb0]) \n\t" >+ "gsldrc1 %[src1], 0x38(%[src_rgb1]) \n\t" >+ "gsldlc1 %[src1], 0x3f(%[src_rgb1]) \n\t" >+ "punpcklbh %[src_lo], %[src0], %[zero] \n\t" >+ "punpckhbh %[src_hi], %[src0], %[zero] \n\t" >+ "paddh %[src0], %[src_lo], %[src_hi] \n\t" >+ "punpcklbh %[src_lo], %[src1], %[zero] \n\t" >+ "paddh %[src0], %[src0], %[src_lo] \n\t" >+ "punpckhbh %[src_hi], %[src1], %[zero] \n\t" >+ "paddh %[src0], %[src0], %[src_hi] \n\t" >+ "psrlh %[src0], %[src0], %[two] \n\t" >+ "dsrl %[src_lo], %[src0], %[sixteen] \n\t" >+ "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t" >+ "pinsrh_0 %[src_hi], %[src0], %[value] \n\t" >+ "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t" >+ "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t" >+ >+ "punpcklwd %[src0], %[dest3_u], %[src_lo] \n\t" >+ "punpckhwd %[src1], %[dest3_u], %[src_lo] \n\t" >+ "psubw %[dest3_u], %[src1], %[src0] \n\t" >+ "psraw %[dest3_u], %[dest3_u], %[eight] \n\t" >+ "punpcklwd %[src0], %[dest3_v], %[src_hi] \n\t" >+ "punpckhwd %[src1], %[dest3_v], %[src_hi] \n\t" >+ "psubw %[dest3_v], %[src0], %[src1] \n\t" >+ "psraw %[dest3_v], %[dest3_v], %[eight] \n\t" >+ >+ "packsswh %[src0], %[dest0_u], %[dest1_u] \n\t" >+ "packsswh %[src1], %[dest2_u], %[dest3_u] \n\t" >+ "packushb %[dest0_u], %[src0], %[src1] \n\t" >+ "gssdlc1 %[dest0_u], 0x07(%[dst_u]) \n\t" >+ "gssdrc1 %[dest0_u], 0x00(%[dst_u]) \n\t" >+ >+ "packsswh %[src0], %[dest0_v], %[dest1_v] \n\t" >+ "packsswh %[src1], %[dest2_v], %[dest3_v] \n\t" >+ "packushb %[dest0_v], %[src0], %[src1] \n\t" >+ "gssdlc1 %[dest0_v], 0x07(%[dst_v]) \n\t" >+ "gssdrc1 %[dest0_v], 0x00(%[dst_v]) \n\t" >+ >+ "daddiu %[src_rgb0], %[src_rgb0], 0x40 \n\t" >+ "daddiu %[dst_u], %[dst_u], 0x08 \n\t" >+ "daddiu %[dst_v], %[dst_v], 0x08 \n\t" >+ "daddi %[width], %[width], -0x10 \n\t" >+ "bgtz %[width], 1b \n\t" >+ : [src_rgb1] "=&r"(src_rgb1), [src0] "=&f"(ftmp[0]), >+ [src1] "=&f"(ftmp[1]), [src_lo] "=&f"(ftmp[2]), [src_hi] "=&f"(ftmp[3]), >+ [dest0_u] "=&f"(ftmp[4]), [dest0_v] "=&f"(ftmp[5]), >+ [dest1_u] "=&f"(ftmp[6]), [dest1_v] "=&f"(ftmp[7]), >+ [dest2_u] "=&f"(ftmp[8]), [dest2_v] "=&f"(ftmp[9]), >+ [dest3_u] "=&f"(ftmp[10]), [dest3_v] "=&f"(ftmp[11]) >+ : [src_rgb0] "r"(src_rgb0), [src_stride_rgb] "r"(src_stride_rgb), >+ [dst_u] "r"(dst_u), [dst_v] "r"(dst_v), [width] "r"(width), >+ [mask_u] "f"(mask_u), [mask_v] "f"(mask_v), [value] "f"(value), >+ [zero] "f"(0x00), [eight] "f"(0x08), [two] "f"(0x02), >+ [sixteen] "f"(0x10) >+ : "memory"); >+} >+ >+void ABGRToYRow_MMI(const uint8_t* src_argb0, uint8_t* dst_y, int width) { >+ uint64_t src, src_hi, src_lo; >+ uint64_t dest0, dest1, dest2, dest3; >+ const uint64_t value = 0x1080; >+ const uint64_t mask = 0x0001001900810042; >+ >+ __asm__ volatile( >+ "1: \n\t" >+ "gsldlc1 %[src], 0x07(%[src_argb0]) \n\t" >+ "gsldrc1 %[src], 0x00(%[src_argb0]) \n\t" >+ "punpcklbh %[src_lo], %[src], %[zero] \n\t" >+ "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t" >+ "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t" >+ "punpckhbh %[src_hi], %[src], %[zero] \n\t" >+ "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t" >+ "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t" >+ "punpcklwd %[src], %[src_lo], %[src_hi] \n\t" >+ "punpckhwd %[dest0], %[src_lo], %[src_hi] \n\t" >+ "paddw %[dest0], %[dest0], %[src] \n\t" >+ "psrlw %[dest0], %[dest0], %[eight] \n\t" >+ >+ "gsldlc1 %[src], 0x0f(%[src_argb0]) \n\t" >+ "gsldrc1 %[src], 0x08(%[src_argb0]) \n\t" >+ "punpcklbh %[src_lo], %[src], %[zero] \n\t" >+ "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t" >+ "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t" >+ "punpckhbh %[src_hi], %[src], %[zero] \n\t" >+ "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t" >+ "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t" >+ "punpcklwd %[src], %[src_lo], %[src_hi] \n\t" >+ "punpckhwd %[dest1], %[src_lo], %[src_hi] \n\t" >+ "paddw %[dest1], %[dest1], %[src] \n\t" >+ "psrlw %[dest1], %[dest1], %[eight] \n\t" >+ >+ "gsldlc1 %[src], 0x17(%[src_argb0]) \n\t" >+ "gsldrc1 %[src], 0x10(%[src_argb0]) \n\t" >+ "punpcklbh %[src_lo], %[src], %[zero] \n\t" >+ "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t" >+ "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t" >+ "punpckhbh %[src_hi], %[src], %[zero] \n\t" >+ "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t" >+ "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t" >+ "punpcklwd %[src], %[src_lo], %[src_hi] \n\t" >+ "punpckhwd %[dest2], %[src_lo], %[src_hi] \n\t" >+ "paddw %[dest2], %[dest2], %[src] \n\t" >+ "psrlw %[dest2], %[dest2], %[eight] \n\t" >+ >+ "gsldlc1 %[src], 0x1f(%[src_argb0]) \n\t" >+ "gsldrc1 %[src], 0x18(%[src_argb0]) \n\t" >+ "punpcklbh %[src_lo], %[src], %[zero] \n\t" >+ "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t" >+ "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t" >+ "punpckhbh %[src_hi], %[src], %[zero] \n\t" >+ "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t" >+ "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t" >+ "punpcklwd %[src], %[src_lo], %[src_hi] \n\t" >+ "punpckhwd %[dest3], %[src_lo], %[src_hi] \n\t" >+ "paddw %[dest3], %[dest3], %[src] \n\t" >+ "psrlw %[dest3], %[dest3], %[eight] \n\t" >+ >+ "packsswh %[src_lo], %[dest0], %[dest1] \n\t" >+ "packsswh %[src_hi], %[dest2], %[dest3] \n\t" >+ "packushb %[dest0], %[src_lo], %[src_hi] \n\t" >+ "gssdlc1 %[dest0], 0x07(%[dst_y]) \n\t" >+ "gssdrc1 %[dest0], 0x00(%[dst_y]) \n\t" >+ >+ "daddiu %[src_argb0], %[src_argb0], 0x20 \n\t" >+ "daddiu %[dst_y], %[dst_y], 0x08 \n\t" >+ "daddi %[width], %[width], -0x08 \n\t" >+ "bnez %[width], 1b \n\t" >+ : [src] "=&f"(src), [src_hi] "=&f"(src_hi), [src_lo] "=&f"(src_lo), >+ [dest0] "=&f"(dest0), [dest1] "=&f"(dest1), [dest2] "=&f"(dest2), >+ [dest3] "=&f"(dest3) >+ : [src_argb0] "r"(src_argb0), [dst_y] "r"(dst_y), [width] "r"(width), >+ [mask] "f"(mask), [value] "f"(value), [eight] "f"(0x08), >+ [zero] "f"(0x00) >+ : "memory"); >+} >+ >+void ABGRToUVRow_MMI(const uint8_t* src_rgb0, >+ int src_stride_rgb, >+ uint8_t* dst_u, >+ uint8_t* dst_v, >+ int width) { >+ uint64_t src_rgb1; >+ uint64_t ftmp[12]; >+ const uint64_t value = 0x4040; >+ const uint64_t mask_u = 0x00020070004a0026; >+ const uint64_t mask_v = 0x0012005e00700002; >+ >+ __asm__ volatile( >+ "1: \n\t" >+ "daddu %[src_rgb1], %[src_rgb0], %[src_stride_rgb] \n\t" >+ "gsldrc1 %[src0], 0x00(%[src_rgb0]) \n\t" >+ "gsldlc1 %[src0], 0x07(%[src_rgb0]) \n\t" >+ "gsldrc1 %[src1], 0x00(%[src_rgb1]) \n\t" >+ "gsldlc1 %[src1], 0x07(%[src_rgb1]) \n\t" >+ "punpcklbh %[src_lo], %[src0], %[zero] \n\t" >+ "punpckhbh %[src_hi], %[src0], %[zero] \n\t" >+ "paddh %[src0], %[src_lo], %[src_hi] \n\t" >+ "punpcklbh %[src_lo], %[src1], %[zero] \n\t" >+ "paddh %[src0], %[src0], %[src_lo] \n\t" >+ "punpckhbh %[src_hi], %[src1], %[zero] \n\t" >+ "paddh %[src0], %[src0], %[src_hi] \n\t" >+ "psrlh %[src0], %[src0], %[two] \n\t" >+ "pinsrh_3 %[dest0_u], %[src0], %[value] \n\t" >+ "dsll %[dest0_v], %[src0], %[sixteen] \n\t" >+ "pinsrh_0 %[dest0_v], %[dest0_v], %[value] \n\t" >+ "pmaddhw %[dest0_u], %[dest0_u], %[mask_u] \n\t" >+ "pmaddhw %[dest0_v], %[dest0_v], %[mask_v] \n\t" >+ >+ "gsldrc1 %[src0], 0x08(%[src_rgb0]) \n\t" >+ "gsldlc1 %[src0], 0x0f(%[src_rgb0]) \n\t" >+ "gsldrc1 %[src1], 0x08(%[src_rgb1]) \n\t" >+ "gsldlc1 %[src1], 0x0f(%[src_rgb1]) \n\t" >+ "punpcklbh %[src_lo], %[src0], %[zero] \n\t" >+ "punpckhbh %[src_hi], %[src0], %[zero] \n\t" >+ "paddh %[src0], %[src_lo], %[src_hi] \n\t" >+ "punpcklbh %[src_lo], %[src1], %[zero] \n\t" >+ "paddh %[src0], %[src0], %[src_lo] \n\t" >+ "punpckhbh %[src_hi], %[src1], %[zero] \n\t" >+ "paddh %[src0], %[src0], %[src_hi] \n\t" >+ "psrlh %[src0], %[src0], %[two] \n\t" >+ "pinsrh_3 %[src_lo], %[src0], %[value] \n\t" >+ "dsll %[src_hi], %[src0], %[sixteen] \n\t" >+ "pinsrh_0 %[src_hi], %[src_hi], %[value] \n\t" >+ "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t" >+ "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t" >+ >+ "punpcklwd %[src0], %[dest0_u], %[src_lo] \n\t" >+ "punpckhwd %[src1], %[dest0_u], %[src_lo] \n\t" >+ "psubw %[dest0_u], %[src1], %[src0] \n\t" >+ "psraw %[dest0_u], %[dest0_u], %[eight] \n\t" >+ "punpcklwd %[src0], %[dest0_v], %[src_hi] \n\t" >+ "punpckhwd %[src1], %[dest0_v], %[src_hi] \n\t" >+ "psubw %[dest0_v], %[src0], %[src1] \n\t" >+ "psraw %[dest0_v], %[dest0_v], %[eight] \n\t" >+ >+ "gsldrc1 %[src0], 0x10(%[src_rgb0]) \n\t" >+ "gsldlc1 %[src0], 0x17(%[src_rgb0]) \n\t" >+ "gsldrc1 %[src1], 0x10(%[src_rgb1]) \n\t" >+ "gsldlc1 %[src1], 0x17(%[src_rgb1]) \n\t" >+ "punpcklbh %[src_lo], %[src0], %[zero] \n\t" >+ "punpckhbh %[src_hi], %[src0], %[zero] \n\t" >+ "paddh %[src0], %[src_lo], %[src_hi] \n\t" >+ "punpcklbh %[src_lo], %[src1], %[zero] \n\t" >+ "paddh %[src0], %[src0], %[src_lo] \n\t" >+ "punpckhbh %[src_hi], %[src1], %[zero] \n\t" >+ "paddh %[src0], %[src0], %[src_hi] \n\t" >+ "psrlh %[src0], %[src0], %[two] \n\t" >+ "pinsrh_3 %[dest1_u], %[src0], %[value] \n\t" >+ "dsll %[dest1_v], %[src0], %[sixteen] \n\t" >+ "pinsrh_0 %[dest1_v], %[dest1_v], %[value] \n\t" >+ "pmaddhw %[dest1_u], %[dest1_u], %[mask_u] \n\t" >+ "pmaddhw %[dest1_v], %[dest1_v], %[mask_v] \n\t" >+ >+ "gsldrc1 %[src0], 0x18(%[src_rgb0]) \n\t" >+ "gsldlc1 %[src0], 0x1f(%[src_rgb0]) \n\t" >+ "gsldrc1 %[src1], 0x18(%[src_rgb1]) \n\t" >+ "gsldlc1 %[src1], 0x1f(%[src_rgb1]) \n\t" >+ "punpcklbh %[src_lo], %[src0], %[zero] \n\t" >+ "punpckhbh %[src_hi], %[src0], %[zero] \n\t" >+ "paddh %[src0], %[src_lo], %[src_hi] \n\t" >+ "punpcklbh %[src_lo], %[src1], %[zero] \n\t" >+ "paddh %[src0], %[src0], %[src_lo] \n\t" >+ "punpckhbh %[src_hi], %[src1], %[zero] \n\t" >+ "paddh %[src0], %[src0], %[src_hi] \n\t" >+ "psrlh %[src0], %[src0], %[two] \n\t" >+ "pinsrh_3 %[src_lo], %[src0], %[value] \n\t" >+ "dsll %[src_hi], %[src0], %[sixteen] \n\t" >+ "pinsrh_0 %[src_hi], %[src_hi], %[value] \n\t" >+ "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t" >+ "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t" >+ >+ "punpcklwd %[src0], %[dest1_u], %[src_lo] \n\t" >+ "punpckhwd %[src1], %[dest1_u], %[src_lo] \n\t" >+ "psubw %[dest1_u], %[src1], %[src0] \n\t" >+ "psraw %[dest1_u], %[dest1_u], %[eight] \n\t" >+ "punpcklwd %[src0], %[dest1_v], %[src_hi] \n\t" >+ "punpckhwd %[src1], %[dest1_v], %[src_hi] \n\t" >+ "psubw %[dest1_v], %[src0], %[src1] \n\t" >+ "psraw %[dest1_v], %[dest1_v], %[eight] \n\t" >+ >+ "gsldrc1 %[src0], 0x20(%[src_rgb0]) \n\t" >+ "gsldlc1 %[src0], 0x27(%[src_rgb0]) \n\t" >+ "gsldrc1 %[src1], 0x20(%[src_rgb1]) \n\t" >+ "gsldlc1 %[src1], 0x27(%[src_rgb1]) \n\t" >+ "punpcklbh %[src_lo], %[src0], %[zero] \n\t" >+ "punpckhbh %[src_hi], %[src0], %[zero] \n\t" >+ "paddh %[src0], %[src_lo], %[src_hi] \n\t" >+ "punpcklbh %[src_lo], %[src1], %[zero] \n\t" >+ "paddh %[src0], %[src0], %[src_lo] \n\t" >+ "punpckhbh %[src_hi], %[src1], %[zero] \n\t" >+ "paddh %[src0], %[src0], %[src_hi] \n\t" >+ "psrlh %[src0], %[src0], %[two] \n\t" >+ "pinsrh_3 %[dest2_u], %[src0], %[value] \n\t" >+ "dsll %[dest2_v], %[src0], %[sixteen] \n\t" >+ "pinsrh_0 %[dest2_v], %[dest2_v], %[value] \n\t" >+ "pmaddhw %[dest2_u], %[dest2_u], %[mask_u] \n\t" >+ "pmaddhw %[dest2_v], %[dest2_v], %[mask_v] \n\t" >+ >+ "gsldrc1 %[src0], 0x28(%[src_rgb0]) \n\t" >+ "gsldlc1 %[src0], 0x2f(%[src_rgb0]) \n\t" >+ "gsldrc1 %[src1], 0x28(%[src_rgb1]) \n\t" >+ "gsldlc1 %[src1], 0x2f(%[src_rgb1]) \n\t" >+ "punpcklbh %[src_lo], %[src0], %[zero] \n\t" >+ "punpckhbh %[src_hi], %[src0], %[zero] \n\t" >+ "paddh %[src0], %[src_lo], %[src_hi] \n\t" >+ "punpcklbh %[src_lo], %[src1], %[zero] \n\t" >+ "paddh %[src0], %[src0], %[src_lo] \n\t" >+ "punpckhbh %[src_hi], %[src1], %[zero] \n\t" >+ "paddh %[src0], %[src0], %[src_hi] \n\t" >+ "psrlh %[src0], %[src0], %[two] \n\t" >+ "pinsrh_3 %[src_lo], %[src0], %[value] \n\t" >+ "dsll %[src_hi], %[src0], %[sixteen] \n\t" >+ "pinsrh_0 %[src_hi], %[src_hi], %[value] \n\t" >+ "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t" >+ "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t" >+ >+ "punpcklwd %[src0], %[dest2_u], %[src_lo] \n\t" >+ "punpckhwd %[src1], %[dest2_u], %[src_lo] \n\t" >+ "psubw %[dest2_u], %[src1], %[src0] \n\t" >+ "psraw %[dest2_u], %[dest2_u], %[eight] \n\t" >+ "punpcklwd %[src0], %[dest2_v], %[src_hi] \n\t" >+ "punpckhwd %[src1], %[dest2_v], %[src_hi] \n\t" >+ "psubw %[dest2_v], %[src0], %[src1] \n\t" >+ "psraw %[dest2_v], %[dest2_v], %[eight] \n\t" >+ >+ "gsldrc1 %[src0], 0x30(%[src_rgb0]) \n\t" >+ "gsldlc1 %[src0], 0x37(%[src_rgb0]) \n\t" >+ "gsldrc1 %[src1], 0x30(%[src_rgb1]) \n\t" >+ "gsldlc1 %[src1], 0x37(%[src_rgb1]) \n\t" >+ "punpcklbh %[src_lo], %[src0], %[zero] \n\t" >+ "punpckhbh %[src_hi], %[src0], %[zero] \n\t" >+ "paddh %[src0], %[src_lo], %[src_hi] \n\t" >+ "punpcklbh %[src_lo], %[src1], %[zero] \n\t" >+ "paddh %[src0], %[src0], %[src_lo] \n\t" >+ "punpckhbh %[src_hi], %[src1], %[zero] \n\t" >+ "paddh %[src0], %[src0], %[src_hi] \n\t" >+ "psrlh %[src0], %[src0], %[two] \n\t" >+ "pinsrh_3 %[dest3_u], %[src0], %[value] \n\t" >+ "dsll %[dest3_v], %[src0], %[sixteen] \n\t" >+ "pinsrh_0 %[dest3_v], %[dest3_v], %[value] \n\t" >+ "pmaddhw %[dest3_u], %[dest3_u], %[mask_u] \n\t" >+ "pmaddhw %[dest3_v], %[dest3_v], %[mask_v] \n\t" >+ >+ "gsldrc1 %[src0], 0x38(%[src_rgb0]) \n\t" >+ "gsldlc1 %[src0], 0x3f(%[src_rgb0]) \n\t" >+ "gsldrc1 %[src1], 0x38(%[src_rgb1]) \n\t" >+ "gsldlc1 %[src1], 0x3f(%[src_rgb1]) \n\t" >+ "punpcklbh %[src_lo], %[src0], %[zero] \n\t" >+ "punpckhbh %[src_hi], %[src0], %[zero] \n\t" >+ "paddh %[src0], %[src_lo], %[src_hi] \n\t" >+ "punpcklbh %[src_lo], %[src1], %[zero] \n\t" >+ "paddh %[src0], %[src0], %[src_lo] \n\t" >+ "punpckhbh %[src_hi], %[src1], %[zero] \n\t" >+ "paddh %[src0], %[src0], %[src_hi] \n\t" >+ "psrlh %[src0], %[src0], %[two] \n\t" >+ "pinsrh_3 %[src_lo], %[src0], %[value] \n\t" >+ "dsll %[src_hi], %[src0], %[sixteen] \n\t" >+ "pinsrh_0 %[src_hi], %[src_hi], %[value] \n\t" >+ "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t" >+ "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t" >+ >+ "punpcklwd %[src0], %[dest3_u], %[src_lo] \n\t" >+ "punpckhwd %[src1], %[dest3_u], %[src_lo] \n\t" >+ "psubw %[dest3_u], %[src1], %[src0] \n\t" >+ "psraw %[dest3_u], %[dest3_u], %[eight] \n\t" >+ "punpcklwd %[src0], %[dest3_v], %[src_hi] \n\t" >+ "punpckhwd %[src1], %[dest3_v], %[src_hi] \n\t" >+ "psubw %[dest3_v], %[src0], %[src1] \n\t" >+ "psraw %[dest3_v], %[dest3_v], %[eight] \n\t" >+ >+ "packsswh %[src0], %[dest0_u], %[dest1_u] \n\t" >+ "packsswh %[src1], %[dest2_u], %[dest3_u] \n\t" >+ "packushb %[dest0_u], %[src0], %[src1] \n\t" >+ "gssdlc1 %[dest0_u], 0x07(%[dst_u]) \n\t" >+ "gssdrc1 %[dest0_u], 0x00(%[dst_u]) \n\t" >+ >+ "packsswh %[src0], %[dest0_v], %[dest1_v] \n\t" >+ "packsswh %[src1], %[dest2_v], %[dest3_v] \n\t" >+ "packushb %[dest0_v], %[src0], %[src1] \n\t" >+ "gssdlc1 %[dest0_v], 0x07(%[dst_v]) \n\t" >+ "gssdrc1 %[dest0_v], 0x00(%[dst_v]) \n\t" >+ >+ "daddiu %[src_rgb0], %[src_rgb0], 0x40 \n\t" >+ "daddiu %[dst_u], %[dst_u], 0x08 \n\t" >+ "daddiu %[dst_v], %[dst_v], 0x08 \n\t" >+ "daddi %[width], %[width], -0x10 \n\t" >+ "bgtz %[width], 1b \n\t" >+ : [src_rgb1] "=&r"(src_rgb1), [src0] "=&f"(ftmp[0]), >+ [src1] "=&f"(ftmp[1]), [src_lo] "=&f"(ftmp[2]), [src_hi] "=&f"(ftmp[3]), >+ [dest0_u] "=&f"(ftmp[4]), [dest0_v] "=&f"(ftmp[5]), >+ [dest1_u] "=&f"(ftmp[6]), [dest1_v] "=&f"(ftmp[7]), >+ [dest2_u] "=&f"(ftmp[8]), [dest2_v] "=&f"(ftmp[9]), >+ [dest3_u] "=&f"(ftmp[10]), [dest3_v] "=&f"(ftmp[11]) >+ : [src_rgb0] "r"(src_rgb0), [src_stride_rgb] "r"(src_stride_rgb), >+ [dst_u] "r"(dst_u), [dst_v] "r"(dst_v), [width] "r"(width), >+ [mask_u] "f"(mask_u), [mask_v] "f"(mask_v), [value] "f"(value), >+ [zero] "f"(0x00), [eight] "f"(0x08), [two] "f"(0x02), >+ [sixteen] "f"(0x10) >+ : "memory"); >+} >+ >+void RGBAToYRow_MMI(const uint8_t* src_argb0, uint8_t* dst_y, int width) { >+ uint64_t src, src_hi, src_lo; >+ uint64_t dest0, dest1, dest2, dest3; >+ const uint64_t value = 0x1080; >+ const uint64_t mask = 0x0042008100190001; >+ >+ __asm__ volatile( >+ "1: \n\t" >+ "gsldlc1 %[src], 0x07(%[src_argb0]) \n\t" >+ "gsldrc1 %[src], 0x00(%[src_argb0]) \n\t" >+ "punpcklbh %[src_lo], %[src], %[zero] \n\t" >+ "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t" >+ "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t" >+ "punpckhbh %[src_hi], %[src], %[zero] \n\t" >+ "pinsrh_0 %[src_hi], %[src_hi], %[value] \n\t" >+ "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t" >+ "punpcklwd %[src], %[src_lo], %[src_hi] \n\t" >+ "punpckhwd %[dest0], %[src_lo], %[src_hi] \n\t" >+ "paddw %[dest0], %[dest0], %[src] \n\t" >+ "psrlw %[dest0], %[dest0], %[eight] \n\t" >+ >+ "gsldlc1 %[src], 0x0f(%[src_argb0]) \n\t" >+ "gsldrc1 %[src], 0x08(%[src_argb0]) \n\t" >+ "punpcklbh %[src_lo], %[src], %[zero] \n\t" >+ "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t" >+ "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t" >+ "punpckhbh %[src_hi], %[src], %[zero] \n\t" >+ "pinsrh_0 %[src_hi], %[src_hi], %[value] \n\t" >+ "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t" >+ "punpcklwd %[src], %[src_lo], %[src_hi] \n\t" >+ "punpckhwd %[dest1], %[src_lo], %[src_hi] \n\t" >+ "paddw %[dest1], %[dest1], %[src] \n\t" >+ "psrlw %[dest1], %[dest1], %[eight] \n\t" >+ >+ "gsldlc1 %[src], 0x17(%[src_argb0]) \n\t" >+ "gsldrc1 %[src], 0x10(%[src_argb0]) \n\t" >+ "punpcklbh %[src_lo], %[src], %[zero] \n\t" >+ "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t" >+ "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t" >+ "punpckhbh %[src_hi], %[src], %[zero] \n\t" >+ "pinsrh_0 %[src_hi], %[src_hi], %[value] \n\t" >+ "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t" >+ "punpcklwd %[src], %[src_lo], %[src_hi] \n\t" >+ "punpckhwd %[dest2], %[src_lo], %[src_hi] \n\t" >+ "paddw %[dest2], %[dest2], %[src] \n\t" >+ "psrlw %[dest2], %[dest2], %[eight] \n\t" >+ >+ "gsldlc1 %[src], 0x1f(%[src_argb0]) \n\t" >+ "gsldrc1 %[src], 0x18(%[src_argb0]) \n\t" >+ "punpcklbh %[src_lo], %[src], %[zero] \n\t" >+ "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t" >+ "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t" >+ "punpckhbh %[src_hi], %[src], %[zero] \n\t" >+ "pinsrh_0 %[src_hi], %[src_hi], %[value] \n\t" >+ "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t" >+ "punpcklwd %[src], %[src_lo], %[src_hi] \n\t" >+ "punpckhwd %[dest3], %[src_lo], %[src_hi] \n\t" >+ "paddw %[dest3], %[dest3], %[src] \n\t" >+ "psrlw %[dest3], %[dest3], %[eight] \n\t" >+ >+ "packsswh %[src_lo], %[dest0], %[dest1] \n\t" >+ "packsswh %[src_hi], %[dest2], %[dest3] \n\t" >+ "packushb %[dest0], %[src_lo], %[src_hi] \n\t" >+ "gssdlc1 %[dest0], 0x07(%[dst_y]) \n\t" >+ "gssdrc1 %[dest0], 0x00(%[dst_y]) \n\t" >+ >+ "daddiu %[src_argb0], %[src_argb0], 0x20 \n\t" >+ "daddiu %[dst_y], %[dst_y], 0x08 \n\t" >+ "daddi %[width], %[width], -0x08 \n\t" >+ "bnez %[width], 1b \n\t" >+ : [src] "=&f"(src), [src_hi] "=&f"(src_hi), [src_lo] "=&f"(src_lo), >+ [dest0] "=&f"(dest0), [dest1] "=&f"(dest1), [dest2] "=&f"(dest2), >+ [dest3] "=&f"(dest3) >+ : [src_argb0] "r"(src_argb0), [dst_y] "r"(dst_y), [width] "r"(width), >+ [mask] "f"(mask), [value] "f"(value), [eight] "f"(0x08), >+ [zero] "f"(0x00) >+ : "memory"); >+} >+ >+void RGBAToUVRow_MMI(const uint8_t* src_rgb0, >+ int src_stride_rgb, >+ uint8_t* dst_u, >+ uint8_t* dst_v, >+ int width) { >+ uint64_t src_rgb1; >+ uint64_t ftmp[12]; >+ const uint64_t value = 0x4040; >+ const uint64_t mask_u = 0x0026004a00700002; >+ const uint64_t mask_v = 0x00020070005e0012; >+ >+ __asm__ volatile( >+ "1: \n\t" >+ "daddu %[src_rgb1], %[src_rgb0], %[src_stride_rgb] \n\t" >+ "gsldrc1 %[src0], 0x00(%[src_rgb0]) \n\t" >+ "gsldlc1 %[src0], 0x07(%[src_rgb0]) \n\t" >+ "gsldrc1 %[src1], 0x00(%[src_rgb1]) \n\t" >+ "gsldlc1 %[src1], 0x07(%[src_rgb1]) \n\t" >+ "punpcklbh %[src_lo], %[src0], %[zero] \n\t" >+ "punpckhbh %[src_hi], %[src0], %[zero] \n\t" >+ "paddh %[src0], %[src_lo], %[src_hi] \n\t" >+ "punpcklbh %[src_lo], %[src1], %[zero] \n\t" >+ "paddh %[src0], %[src0], %[src_lo] \n\t" >+ "punpckhbh %[src_hi], %[src1], %[zero] \n\t" >+ "paddh %[src0], %[src0], %[src_hi] \n\t" >+ "psrlh %[src0], %[src0], %[two] \n\t" >+ "pinsrh_0 %[dest0_u], %[src0], %[value] \n\t" >+ "dsrl %[dest0_v], %[src0], %[sixteen] \n\t" >+ "pinsrh_3 %[dest0_v], %[dest0_v], %[value] \n\t" >+ "pmaddhw %[dest0_u], %[dest0_u], %[mask_u] \n\t" >+ "pmaddhw %[dest0_v], %[dest0_v], %[mask_v] \n\t" >+ >+ "gsldrc1 %[src0], 0x08(%[src_rgb0]) \n\t" >+ "gsldlc1 %[src0], 0x0f(%[src_rgb0]) \n\t" >+ "gsldrc1 %[src1], 0x08(%[src_rgb1]) \n\t" >+ "gsldlc1 %[src1], 0x0f(%[src_rgb1]) \n\t" >+ "punpcklbh %[src_lo], %[src0], %[zero] \n\t" >+ "punpckhbh %[src_hi], %[src0], %[zero] \n\t" >+ "paddh %[src0], %[src_lo], %[src_hi] \n\t" >+ "punpcklbh %[src_lo], %[src1], %[zero] \n\t" >+ "paddh %[src0], %[src0], %[src_lo] \n\t" >+ "punpckhbh %[src_hi], %[src1], %[zero] \n\t" >+ "paddh %[src0], %[src0], %[src_hi] \n\t" >+ "psrlh %[src0], %[src0], %[two] \n\t" >+ "pinsrh_0 %[src_lo], %[src0], %[value] \n\t" >+ "dsrl %[src_hi], %[src0], %[sixteen] \n\t" >+ "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t" >+ "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t" >+ "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t" >+ >+ "punpcklwd %[src0], %[dest0_u], %[src_lo] \n\t" >+ "punpckhwd %[src1], %[dest0_u], %[src_lo] \n\t" >+ "psubw %[dest0_u], %[src0], %[src1] \n\t" >+ "psraw %[dest0_u], %[dest0_u], %[eight] \n\t" >+ "punpcklwd %[src0], %[dest0_v], %[src_hi] \n\t" >+ "punpckhwd %[src1], %[dest0_v], %[src_hi] \n\t" >+ "psubw %[dest0_v], %[src1], %[src0] \n\t" >+ "psraw %[dest0_v], %[dest0_v], %[eight] \n\t" >+ >+ "gsldrc1 %[src0], 0x10(%[src_rgb0]) \n\t" >+ "gsldlc1 %[src0], 0x17(%[src_rgb0]) \n\t" >+ "gsldrc1 %[src1], 0x10(%[src_rgb1]) \n\t" >+ "gsldlc1 %[src1], 0x17(%[src_rgb1]) \n\t" >+ "punpcklbh %[src_lo], %[src0], %[zero] \n\t" >+ "punpckhbh %[src_hi], %[src0], %[zero] \n\t" >+ "paddh %[src0], %[src_lo], %[src_hi] \n\t" >+ "punpcklbh %[src_lo], %[src1], %[zero] \n\t" >+ "paddh %[src0], %[src0], %[src_lo] \n\t" >+ "punpckhbh %[src_hi], %[src1], %[zero] \n\t" >+ "paddh %[src0], %[src0], %[src_hi] \n\t" >+ "psrlh %[src0], %[src0], %[two] \n\t" >+ "pinsrh_0 %[dest1_u], %[src0], %[value] \n\t" >+ "dsrl %[dest1_v], %[src0], %[sixteen] \n\t" >+ "pinsrh_3 %[dest1_v], %[dest1_v], %[value] \n\t" >+ "pmaddhw %[dest1_u], %[dest1_u], %[mask_u] \n\t" >+ "pmaddhw %[dest1_v], %[dest1_v], %[mask_v] \n\t" >+ >+ "gsldrc1 %[src0], 0x18(%[src_rgb0]) \n\t" >+ "gsldlc1 %[src0], 0x1f(%[src_rgb0]) \n\t" >+ "gsldrc1 %[src1], 0x18(%[src_rgb1]) \n\t" >+ "gsldlc1 %[src1], 0x1f(%[src_rgb1]) \n\t" >+ "punpcklbh %[src_lo], %[src0], %[zero] \n\t" >+ "punpckhbh %[src_hi], %[src0], %[zero] \n\t" >+ "paddh %[src0], %[src_lo], %[src_hi] \n\t" >+ "punpcklbh %[src_lo], %[src1], %[zero] \n\t" >+ "paddh %[src0], %[src0], %[src_lo] \n\t" >+ "punpckhbh %[src_hi], %[src1], %[zero] \n\t" >+ "paddh %[src0], %[src0], %[src_hi] \n\t" >+ "psrlh %[src0], %[src0], %[two] \n\t" >+ "pinsrh_0 %[src_lo], %[src0], %[value] \n\t" >+ "dsrl %[src_hi], %[src0], %[sixteen] \n\t" >+ "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t" >+ "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t" >+ "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t" >+ >+ "punpcklwd %[src0], %[dest1_u], %[src_lo] \n\t" >+ "punpckhwd %[src1], %[dest1_u], %[src_lo] \n\t" >+ "psubw %[dest1_u], %[src0], %[src1] \n\t" >+ "psraw %[dest1_u], %[dest1_u], %[eight] \n\t" >+ "punpcklwd %[src0], %[dest1_v], %[src_hi] \n\t" >+ "punpckhwd %[src1], %[dest1_v], %[src_hi] \n\t" >+ "psubw %[dest1_v], %[src1], %[src0] \n\t" >+ "psraw %[dest1_v], %[dest1_v], %[eight] \n\t" >+ >+ "gsldrc1 %[src0], 0x20(%[src_rgb0]) \n\t" >+ "gsldlc1 %[src0], 0x27(%[src_rgb0]) \n\t" >+ "gsldrc1 %[src1], 0x20(%[src_rgb1]) \n\t" >+ "gsldlc1 %[src1], 0x27(%[src_rgb1]) \n\t" >+ "punpcklbh %[src_lo], %[src0], %[zero] \n\t" >+ "punpckhbh %[src_hi], %[src0], %[zero] \n\t" >+ "paddh %[src0], %[src_lo], %[src_hi] \n\t" >+ "punpcklbh %[src_lo], %[src1], %[zero] \n\t" >+ "paddh %[src0], %[src0], %[src_lo] \n\t" >+ "punpckhbh %[src_hi], %[src1], %[zero] \n\t" >+ "paddh %[src0], %[src0], %[src_hi] \n\t" >+ "psrlh %[src0], %[src0], %[two] \n\t" >+ "pinsrh_0 %[dest2_u], %[src0], %[value] \n\t" >+ "dsrl %[dest2_v], %[src0], %[sixteen] \n\t" >+ "pinsrh_3 %[dest2_v], %[dest2_v], %[value] \n\t" >+ "pmaddhw %[dest2_u], %[dest2_u], %[mask_u] \n\t" >+ "pmaddhw %[dest2_v], %[dest2_v], %[mask_v] \n\t" >+ >+ "gsldrc1 %[src0], 0x28(%[src_rgb0]) \n\t" >+ "gsldlc1 %[src0], 0x2f(%[src_rgb0]) \n\t" >+ "gsldrc1 %[src1], 0x28(%[src_rgb1]) \n\t" >+ "gsldlc1 %[src1], 0x2f(%[src_rgb1]) \n\t" >+ "punpcklbh %[src_lo], %[src0], %[zero] \n\t" >+ "punpckhbh %[src_hi], %[src0], %[zero] \n\t" >+ "paddh %[src0], %[src_lo], %[src_hi] \n\t" >+ "punpcklbh %[src_lo], %[src1], %[zero] \n\t" >+ "paddh %[src0], %[src0], %[src_lo] \n\t" >+ "punpckhbh %[src_hi], %[src1], %[zero] \n\t" >+ "paddh %[src0], %[src0], %[src_hi] \n\t" >+ "psrlh %[src0], %[src0], %[two] \n\t" >+ "pinsrh_0 %[src_lo], %[src0], %[value] \n\t" >+ "dsrl %[src_hi], %[src0], %[sixteen] \n\t" >+ "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t" >+ "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t" >+ "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t" >+ >+ "punpcklwd %[src0], %[dest2_u], %[src_lo] \n\t" >+ "punpckhwd %[src1], %[dest2_u], %[src_lo] \n\t" >+ "psubw %[dest2_u], %[src0], %[src1] \n\t" >+ "psraw %[dest2_u], %[dest2_u], %[eight] \n\t" >+ "punpcklwd %[src0], %[dest2_v], %[src_hi] \n\t" >+ "punpckhwd %[src1], %[dest2_v], %[src_hi] \n\t" >+ "psubw %[dest2_v], %[src1], %[src0] \n\t" >+ "psraw %[dest2_v], %[dest2_v], %[eight] \n\t" >+ >+ "gsldrc1 %[src0], 0x30(%[src_rgb0]) \n\t" >+ "gsldlc1 %[src0], 0x37(%[src_rgb0]) \n\t" >+ "gsldrc1 %[src1], 0x30(%[src_rgb1]) \n\t" >+ "gsldlc1 %[src1], 0x37(%[src_rgb1]) \n\t" >+ "punpcklbh %[src_lo], %[src0], %[zero] \n\t" >+ "punpckhbh %[src_hi], %[src0], %[zero] \n\t" >+ "paddh %[src0], %[src_lo], %[src_hi] \n\t" >+ "punpcklbh %[src_lo], %[src1], %[zero] \n\t" >+ "paddh %[src0], %[src0], %[src_lo] \n\t" >+ "punpckhbh %[src_hi], %[src1], %[zero] \n\t" >+ "paddh %[src0], %[src0], %[src_hi] \n\t" >+ "psrlh %[src0], %[src0], %[two] \n\t" >+ "pinsrh_0 %[dest3_u], %[src0], %[value] \n\t" >+ "dsrl %[dest3_v], %[src0], %[sixteen] \n\t" >+ "pinsrh_3 %[dest3_v], %[dest3_v], %[value] \n\t" >+ "pmaddhw %[dest3_u], %[dest3_u], %[mask_u] \n\t" >+ "pmaddhw %[dest3_v], %[dest3_v], %[mask_v] \n\t" >+ >+ "gsldrc1 %[src0], 0x38(%[src_rgb0]) \n\t" >+ "gsldlc1 %[src0], 0x3f(%[src_rgb0]) \n\t" >+ "gsldrc1 %[src1], 0x38(%[src_rgb1]) \n\t" >+ "gsldlc1 %[src1], 0x3f(%[src_rgb1]) \n\t" >+ "punpcklbh %[src_lo], %[src0], %[zero] \n\t" >+ "punpckhbh %[src_hi], %[src0], %[zero] \n\t" >+ "paddh %[src0], %[src_lo], %[src_hi] \n\t" >+ "punpcklbh %[src_lo], %[src1], %[zero] \n\t" >+ "paddh %[src0], %[src0], %[src_lo] \n\t" >+ "punpckhbh %[src_hi], %[src1], %[zero] \n\t" >+ "paddh %[src0], %[src0], %[src_hi] \n\t" >+ "psrlh %[src0], %[src0], %[two] \n\t" >+ "pinsrh_0 %[src_lo], %[src0], %[value] \n\t" >+ "dsrl %[src_hi], %[src0], %[sixteen] \n\t" >+ "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t" >+ "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t" >+ "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t" >+ >+ "punpcklwd %[src0], %[dest3_u], %[src_lo] \n\t" >+ "punpckhwd %[src1], %[dest3_u], %[src_lo] \n\t" >+ "psubw %[dest3_u], %[src0], %[src1] \n\t" >+ "psraw %[dest3_u], %[dest3_u], %[eight] \n\t" >+ "punpcklwd %[src0], %[dest3_v], %[src_hi] \n\t" >+ "punpckhwd %[src1], %[dest3_v], %[src_hi] \n\t" >+ "psubw %[dest3_v], %[src1], %[src0] \n\t" >+ "psraw %[dest3_v], %[dest3_v], %[eight] \n\t" >+ >+ "packsswh %[src0], %[dest0_u], %[dest1_u] \n\t" >+ "packsswh %[src1], %[dest2_u], %[dest3_u] \n\t" >+ "packushb %[dest0_u], %[src0], %[src1] \n\t" >+ "gssdlc1 %[dest0_u], 0x07(%[dst_u]) \n\t" >+ "gssdrc1 %[dest0_u], 0x00(%[dst_u]) \n\t" >+ >+ "packsswh %[src0], %[dest0_v], %[dest1_v] \n\t" >+ "packsswh %[src1], %[dest2_v], %[dest3_v] \n\t" >+ "packushb %[dest0_v], %[src0], %[src1] \n\t" >+ "gssdlc1 %[dest0_v], 0x07(%[dst_v]) \n\t" >+ "gssdrc1 %[dest0_v], 0x00(%[dst_v]) \n\t" >+ >+ "daddiu %[src_rgb0], %[src_rgb0], 0x40 \n\t" >+ "daddiu %[dst_u], %[dst_u], 0x08 \n\t" >+ "daddiu %[dst_v], %[dst_v], 0x08 \n\t" >+ "daddi %[width], %[width], -0x10 \n\t" >+ "bgtz %[width], 1b \n\t" >+ : [src_rgb1] "=&r"(src_rgb1), [src0] "=&f"(ftmp[0]), >+ [src1] "=&f"(ftmp[1]), [src_lo] "=&f"(ftmp[2]), [src_hi] "=&f"(ftmp[3]), >+ [dest0_u] "=&f"(ftmp[4]), [dest0_v] "=&f"(ftmp[5]), >+ [dest1_u] "=&f"(ftmp[6]), [dest1_v] "=&f"(ftmp[7]), >+ [dest2_u] "=&f"(ftmp[8]), [dest2_v] "=&f"(ftmp[9]), >+ [dest3_u] "=&f"(ftmp[10]), [dest3_v] "=&f"(ftmp[11]) >+ : [src_rgb0] "r"(src_rgb0), [src_stride_rgb] "r"(src_stride_rgb), >+ [dst_u] "r"(dst_u), [dst_v] "r"(dst_v), [width] "r"(width), >+ [mask_u] "f"(mask_u), [mask_v] "f"(mask_v), [value] "f"(value), >+ [zero] "f"(0x00), [eight] "f"(0x08), [two] "f"(0x02), >+ [sixteen] "f"(0x10) >+ : "memory"); >+} >+ >+void RGB24ToYRow_MMI(const uint8_t* src_argb0, uint8_t* dst_y, int width) { >+ uint64_t src, src_hi, src_lo; >+ uint64_t dest0, dest1, dest2, dest3; >+ const uint64_t value = 0x1080; >+ const uint64_t mask = 0x0001004200810019; >+ >+ __asm__ volatile( >+ "1: \n\t" >+ "gsldlc1 %[src], 0x07(%[src_argb0]) \n\t" >+ "gsldrc1 %[src], 0x00(%[src_argb0]) \n\t" >+ "punpcklbh %[src_lo], %[src], %[zero] \n\t" >+ "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t" >+ "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t" >+ "dsll %[src], %[src], %[eight] \n\t" >+ "punpckhbh %[src_hi], %[src], %[zero] \n\t" >+ "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t" >+ "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t" >+ "punpcklwd %[src], %[src_lo], %[src_hi] \n\t" >+ "punpckhwd %[dest0], %[src_lo], %[src_hi] \n\t" >+ "paddw %[dest0], %[dest0], %[src] \n\t" >+ "psrlw %[dest0], %[dest0], %[eight] \n\t" >+ >+ "gsldlc1 %[src], 0x0d(%[src_argb0]) \n\t" >+ "gsldrc1 %[src], 0x06(%[src_argb0]) \n\t" >+ "punpcklbh %[src_lo], %[src], %[zero] \n\t" >+ "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t" >+ "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t" >+ "dsll %[src], %[src], %[eight] \n\t" >+ "punpckhbh %[src_hi], %[src], %[zero] \n\t" >+ "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t" >+ "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t" >+ "punpcklwd %[src], %[src_lo], %[src_hi] \n\t" >+ "punpckhwd %[dest1], %[src_lo], %[src_hi] \n\t" >+ "paddw %[dest1], %[dest1], %[src] \n\t" >+ "psrlw %[dest1], %[dest1], %[eight] \n\t" >+ >+ "gsldlc1 %[src], 0x13(%[src_argb0]) \n\t" >+ "gsldrc1 %[src], 0x0c(%[src_argb0]) \n\t" >+ "punpcklbh %[src_lo], %[src], %[zero] \n\t" >+ "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t" >+ "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t" >+ "dsll %[src], %[src], %[eight] \n\t" >+ "punpckhbh %[src_hi], %[src], %[zero] \n\t" >+ "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t" >+ "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t" >+ "punpcklwd %[src], %[src_lo], %[src_hi] \n\t" >+ "punpckhwd %[dest2], %[src_lo], %[src_hi] \n\t" >+ "paddw %[dest2], %[dest2], %[src] \n\t" >+ "psrlw %[dest2], %[dest2], %[eight] \n\t" >+ >+ "gsldlc1 %[src], 0x19(%[src_argb0]) \n\t" >+ "gsldrc1 %[src], 0x12(%[src_argb0]) \n\t" >+ "punpcklbh %[src_lo], %[src], %[zero] \n\t" >+ "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t" >+ "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t" >+ "dsll %[src], %[src], %[eight] \n\t" >+ "punpckhbh %[src_hi], %[src], %[zero] \n\t" >+ "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t" >+ "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t" >+ "punpcklwd %[src], %[src_lo], %[src_hi] \n\t" >+ "punpckhwd %[dest3], %[src_lo], %[src_hi] \n\t" >+ "paddw %[dest3], %[dest3], %[src] \n\t" >+ "psrlw %[dest3], %[dest3], %[eight] \n\t" >+ >+ "packsswh %[src_lo], %[dest0], %[dest1] \n\t" >+ "packsswh %[src_hi], %[dest2], %[dest3] \n\t" >+ "packushb %[dest0], %[src_lo], %[src_hi] \n\t" >+ "gssdlc1 %[dest0], 0x07(%[dst_y]) \n\t" >+ "gssdrc1 %[dest0], 0x00(%[dst_y]) \n\t" >+ >+ "daddiu %[src_argb0], %[src_argb0], 0x18 \n\t" >+ "daddiu %[dst_y], %[dst_y], 0x08 \n\t" >+ "daddi %[width], %[width], -0x08 \n\t" >+ "bnez %[width], 1b \n\t" >+ : [src] "=&f"(src), [src_hi] "=&f"(src_hi), [src_lo] "=&f"(src_lo), >+ [dest0] "=&f"(dest0), [dest1] "=&f"(dest1), [dest2] "=&f"(dest2), >+ [dest3] "=&f"(dest3) >+ : [src_argb0] "r"(src_argb0), [dst_y] "r"(dst_y), [width] "r"(width), >+ [mask] "f"(mask), [value] "f"(value), [eight] "f"(0x08), >+ [zero] "f"(0x00) >+ : "memory"); >+} >+ >+void RGB24ToUVRow_MMI(const uint8_t* src_rgb0, >+ int src_stride_rgb, >+ uint8_t* dst_u, >+ uint8_t* dst_v, >+ int width) { >+ uint64_t src_rgb1; >+ uint64_t ftmp[12]; >+ const uint64_t value = 0x4040; >+ const uint64_t mask_u = 0x0026004a00700002; >+ const uint64_t mask_v = 0x00020070005e0012; >+ >+ __asm__ volatile( >+ "1: \n\t" >+ "daddu %[src_rgb1], %[src_rgb0], %[src_stride_rgb] \n\t" >+ "gsldrc1 %[src0], 0x00(%[src_rgb0]) \n\t" >+ "gsldlc1 %[src0], 0x07(%[src_rgb0]) \n\t" >+ "gsldrc1 %[src1], 0x00(%[src_rgb1]) \n\t" >+ "gsldlc1 %[src1], 0x07(%[src_rgb1]) \n\t" >+ "punpcklbh %[src_lo], %[src0], %[zero] \n\t" >+ "dsll %[src0], %[src0], %[eight] \n\t" >+ "punpckhbh %[src_hi], %[src0], %[zero] \n\t" >+ "paddh %[src0], %[src_lo], %[src_hi] \n\t" >+ "punpcklbh %[src_lo], %[src1], %[zero] \n\t" >+ "paddh %[src0], %[src0], %[src_lo] \n\t" >+ "dsll %[src1], %[src1], %[eight] \n\t" >+ "punpckhbh %[src_hi], %[src1], %[zero] \n\t" >+ "paddh %[src0], %[src0], %[src_hi] \n\t" >+ "psrlh %[src0], %[src0], %[two] \n\t" >+ "dsll %[dest0_u], %[src0], %[sixteen] \n\t" >+ "pinsrh_0 %[dest0_u], %[dest0_u], %[value] \n\t" >+ "pinsrh_3 %[dest0_v], %[src0], %[value] \n\t" >+ "pmaddhw %[dest0_u], %[dest0_u], %[mask_u] \n\t" >+ "pmaddhw %[dest0_v], %[dest0_v], %[mask_v] \n\t" >+ >+ "gsldrc1 %[src0], 0x06(%[src_rgb0]) \n\t" >+ "gsldlc1 %[src0], 0x0d(%[src_rgb0]) \n\t" >+ "gsldrc1 %[src1], 0x06(%[src_rgb1]) \n\t" >+ "gsldlc1 %[src1], 0x0d(%[src_rgb1]) \n\t" >+ "punpcklbh %[src_lo], %[src0], %[zero] \n\t" >+ "dsll %[src0], %[src0], %[eight] \n\t" >+ "punpckhbh %[src_hi], %[src0], %[zero] \n\t" >+ "paddh %[src0], %[src_lo], %[src_hi] \n\t" >+ "punpcklbh %[src_lo], %[src1], %[zero] \n\t" >+ "paddh %[src0], %[src0], %[src_lo] \n\t" >+ "dsll %[src1], %[src1], %[eight] \n\t" >+ "punpckhbh %[src_hi], %[src1], %[zero] \n\t" >+ "paddh %[src0], %[src0], %[src_hi] \n\t" >+ "psrlh %[src0], %[src0], %[two] \n\t" >+ "dsll %[src_lo], %[src0], %[sixteen] \n\t" >+ "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t" >+ "pinsrh_3 %[src_hi], %[src0], %[value] \n\t" >+ "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t" >+ "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t" >+ >+ "punpcklwd %[src0], %[dest0_u], %[src_lo] \n\t" >+ "punpckhwd %[src1], %[dest0_u], %[src_lo] \n\t" >+ "psubw %[dest0_u], %[src0], %[src1] \n\t" >+ "psraw %[dest0_u], %[dest0_u], %[eight] \n\t" >+ "punpcklwd %[src0], %[dest0_v], %[src_hi] \n\t" >+ "punpckhwd %[src1], %[dest0_v], %[src_hi] \n\t" >+ "psubw %[dest0_v], %[src1], %[src0] \n\t" >+ "psraw %[dest0_v], %[dest0_v], %[eight] \n\t" >+ >+ "gsldrc1 %[src0], 0x0c(%[src_rgb0]) \n\t" >+ "gsldlc1 %[src0], 0x13(%[src_rgb0]) \n\t" >+ "gsldrc1 %[src1], 0x0c(%[src_rgb1]) \n\t" >+ "gsldlc1 %[src1], 0x13(%[src_rgb1]) \n\t" >+ "punpcklbh %[src_lo], %[src0], %[zero] \n\t" >+ "dsll %[src0], %[src0], %[eight] \n\t" >+ "punpckhbh %[src_hi], %[src0], %[zero] \n\t" >+ "paddh %[src0], %[src_lo], %[src_hi] \n\t" >+ "punpcklbh %[src_lo], %[src1], %[zero] \n\t" >+ "paddh %[src0], %[src0], %[src_lo] \n\t" >+ "dsll %[src1], %[src1], %[eight] \n\t" >+ "punpckhbh %[src_hi], %[src1], %[zero] \n\t" >+ "paddh %[src0], %[src0], %[src_hi] \n\t" >+ "psrlh %[src0], %[src0], %[two] \n\t" >+ "dsll %[dest1_u], %[src0], %[sixteen] \n\t" >+ "pinsrh_0 %[dest1_u], %[dest1_u], %[value] \n\t" >+ "pinsrh_3 %[dest1_v], %[src0], %[value] \n\t" >+ "pmaddhw %[dest1_u], %[dest1_u], %[mask_u] \n\t" >+ "pmaddhw %[dest1_v], %[dest1_v], %[mask_v] \n\t" >+ >+ "gsldrc1 %[src0], 0x12(%[src_rgb0]) \n\t" >+ "gsldlc1 %[src0], 0x19(%[src_rgb0]) \n\t" >+ "gsldrc1 %[src1], 0x12(%[src_rgb1]) \n\t" >+ "gsldlc1 %[src1], 0x19(%[src_rgb1]) \n\t" >+ "punpcklbh %[src_lo], %[src0], %[zero] \n\t" >+ "dsll %[src0], %[src0], %[eight] \n\t" >+ "punpckhbh %[src_hi], %[src0], %[zero] \n\t" >+ "paddh %[src0], %[src_lo], %[src_hi] \n\t" >+ "punpcklbh %[src_lo], %[src1], %[zero] \n\t" >+ "paddh %[src0], %[src0], %[src_lo] \n\t" >+ "dsll %[src1], %[src1], %[eight] \n\t" >+ "punpckhbh %[src_hi], %[src1], %[zero] \n\t" >+ "paddh %[src0], %[src0], %[src_hi] \n\t" >+ "psrlh %[src0], %[src0], %[two] \n\t" >+ "dsll %[src_lo], %[src0], %[sixteen] \n\t" >+ "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t" >+ "pinsrh_3 %[src_hi], %[src0], %[value] \n\t" >+ "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t" >+ "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t" >+ >+ "punpcklwd %[src0], %[dest1_u], %[src_lo] \n\t" >+ "punpckhwd %[src1], %[dest1_u], %[src_lo] \n\t" >+ "psubw %[dest1_u], %[src0], %[src1] \n\t" >+ "psraw %[dest1_u], %[dest1_u], %[eight] \n\t" >+ "punpcklwd %[src0], %[dest1_v], %[src_hi] \n\t" >+ "punpckhwd %[src1], %[dest1_v], %[src_hi] \n\t" >+ "psubw %[dest1_v], %[src1], %[src0] \n\t" >+ "psraw %[dest1_v], %[dest1_v], %[eight] \n\t" >+ >+ "gsldrc1 %[src0], 0x18(%[src_rgb0]) \n\t" >+ "gsldlc1 %[src0], 0x1f(%[src_rgb0]) \n\t" >+ "gsldrc1 %[src1], 0x18(%[src_rgb1]) \n\t" >+ "gsldlc1 %[src1], 0x1f(%[src_rgb1]) \n\t" >+ "punpcklbh %[src_lo], %[src0], %[zero] \n\t" >+ "dsll %[src0], %[src0], %[eight] \n\t" >+ "punpckhbh %[src_hi], %[src0], %[zero] \n\t" >+ "paddh %[src0], %[src_lo], %[src_hi] \n\t" >+ "punpcklbh %[src_lo], %[src1], %[zero] \n\t" >+ "paddh %[src0], %[src0], %[src_lo] \n\t" >+ "dsll %[src1], %[src1], %[eight] \n\t" >+ "punpckhbh %[src_hi], %[src1], %[zero] \n\t" >+ "paddh %[src0], %[src0], %[src_hi] \n\t" >+ "psrlh %[src0], %[src0], %[two] \n\t" >+ "dsll %[dest2_u], %[src0], %[sixteen] \n\t" >+ "pinsrh_0 %[dest2_u], %[dest2_u], %[value] \n\t" >+ "pinsrh_3 %[dest2_v], %[src0], %[value] \n\t" >+ "pmaddhw %[dest2_u], %[dest2_u], %[mask_u] \n\t" >+ "pmaddhw %[dest2_v], %[dest2_v], %[mask_v] \n\t" >+ >+ "gsldrc1 %[src0], 0x1e(%[src_rgb0]) \n\t" >+ "gsldlc1 %[src0], 0x25(%[src_rgb0]) \n\t" >+ "gsldrc1 %[src1], 0x1e(%[src_rgb1]) \n\t" >+ "gsldlc1 %[src1], 0x25(%[src_rgb1]) \n\t" >+ "punpcklbh %[src_lo], %[src0], %[zero] \n\t" >+ "dsll %[src0], %[src0], %[eight] \n\t" >+ "punpckhbh %[src_hi], %[src0], %[zero] \n\t" >+ "paddh %[src0], %[src_lo], %[src_hi] \n\t" >+ "punpcklbh %[src_lo], %[src1], %[zero] \n\t" >+ "paddh %[src0], %[src0], %[src_lo] \n\t" >+ "dsll %[src1], %[src1], %[eight] \n\t" >+ "punpckhbh %[src_hi], %[src1], %[zero] \n\t" >+ "paddh %[src0], %[src0], %[src_hi] \n\t" >+ "psrlh %[src0], %[src0], %[two] \n\t" >+ "dsll %[src_lo], %[src0], %[sixteen] \n\t" >+ "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t" >+ "pinsrh_3 %[src_hi], %[src0], %[value] \n\t" >+ "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t" >+ "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t" >+ >+ "punpcklwd %[src0], %[dest2_u], %[src_lo] \n\t" >+ "punpckhwd %[src1], %[dest2_u], %[src_lo] \n\t" >+ "psubw %[dest2_u], %[src0], %[src1] \n\t" >+ "psraw %[dest2_u], %[dest2_u], %[eight] \n\t" >+ "punpcklwd %[src0], %[dest2_v], %[src_hi] \n\t" >+ "punpckhwd %[src1], %[dest2_v], %[src_hi] \n\t" >+ "psubw %[dest2_v], %[src1], %[src0] \n\t" >+ "psraw %[dest2_v], %[dest2_v], %[eight] \n\t" >+ >+ "gsldrc1 %[src0], 0x24(%[src_rgb0]) \n\t" >+ "gsldlc1 %[src0], 0x2b(%[src_rgb0]) \n\t" >+ "gsldrc1 %[src1], 0x24(%[src_rgb1]) \n\t" >+ "gsldlc1 %[src1], 0x2b(%[src_rgb1]) \n\t" >+ "punpcklbh %[src_lo], %[src0], %[zero] \n\t" >+ "dsll %[src0], %[src0], %[eight] \n\t" >+ "punpckhbh %[src_hi], %[src0], %[zero] \n\t" >+ "paddh %[src0], %[src_lo], %[src_hi] \n\t" >+ "punpcklbh %[src_lo], %[src1], %[zero] \n\t" >+ "paddh %[src0], %[src0], %[src_lo] \n\t" >+ "dsll %[src1], %[src1], %[eight] \n\t" >+ "punpckhbh %[src_hi], %[src1], %[zero] \n\t" >+ "paddh %[src0], %[src0], %[src_hi] \n\t" >+ "psrlh %[src0], %[src0], %[two] \n\t" >+ "dsll %[dest3_u], %[src0], %[sixteen] \n\t" >+ "pinsrh_0 %[dest3_u], %[dest3_u], %[value] \n\t" >+ "pinsrh_3 %[dest3_v], %[src0], %[value] \n\t" >+ "pmaddhw %[dest3_u], %[dest3_u], %[mask_u] \n\t" >+ "pmaddhw %[dest3_v], %[dest3_v], %[mask_v] \n\t" >+ >+ "gsldrc1 %[src0], 0x2a(%[src_rgb0]) \n\t" >+ "gsldlc1 %[src0], 0x31(%[src_rgb0]) \n\t" >+ "gsldrc1 %[src1], 0x2a(%[src_rgb1]) \n\t" >+ "gsldlc1 %[src1], 0x31(%[src_rgb1]) \n\t" >+ "punpcklbh %[src_lo], %[src0], %[zero] \n\t" >+ "dsll %[src0], %[src0], %[eight] \n\t" >+ "punpckhbh %[src_hi], %[src0], %[zero] \n\t" >+ "paddh %[src0], %[src_lo], %[src_hi] \n\t" >+ "punpcklbh %[src_lo], %[src1], %[zero] \n\t" >+ "paddh %[src0], %[src0], %[src_lo] \n\t" >+ "dsll %[src1], %[src1], %[eight] \n\t" >+ "punpckhbh %[src_hi], %[src1], %[zero] \n\t" >+ "paddh %[src0], %[src0], %[src_hi] \n\t" >+ "psrlh %[src0], %[src0], %[two] \n\t" >+ "dsll %[src_lo], %[src0], %[sixteen] \n\t" >+ "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t" >+ "pinsrh_3 %[src_hi], %[src0], %[value] \n\t" >+ "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t" >+ "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t" >+ >+ "punpcklwd %[src0], %[dest3_u], %[src_lo] \n\t" >+ "punpckhwd %[src1], %[dest3_u], %[src_lo] \n\t" >+ "psubw %[dest3_u], %[src0], %[src1] \n\t" >+ "psraw %[dest3_u], %[dest3_u], %[eight] \n\t" >+ "punpcklwd %[src0], %[dest3_v], %[src_hi] \n\t" >+ "punpckhwd %[src1], %[dest3_v], %[src_hi] \n\t" >+ "psubw %[dest3_v], %[src1], %[src0] \n\t" >+ "psraw %[dest3_v], %[dest3_v], %[eight] \n\t" >+ >+ "packsswh %[src0], %[dest0_u], %[dest1_u] \n\t" >+ "packsswh %[src1], %[dest2_u], %[dest3_u] \n\t" >+ "packushb %[dest0_u], %[src0], %[src1] \n\t" >+ "gssdlc1 %[dest0_u], 0x07(%[dst_u]) \n\t" >+ "gssdrc1 %[dest0_u], 0x00(%[dst_u]) \n\t" >+ >+ "packsswh %[src0], %[dest0_v], %[dest1_v] \n\t" >+ "packsswh %[src1], %[dest2_v], %[dest3_v] \n\t" >+ "packushb %[dest0_v], %[src0], %[src1] \n\t" >+ "gssdlc1 %[dest0_v], 0x07(%[dst_v]) \n\t" >+ "gssdrc1 %[dest0_v], 0x00(%[dst_v]) \n\t" >+ >+ "daddiu %[src_rgb0], %[src_rgb0], 0x30 \n\t" >+ "daddiu %[dst_u], %[dst_u], 0x08 \n\t" >+ "daddiu %[dst_v], %[dst_v], 0x08 \n\t" >+ "daddi %[width], %[width], -0x10 \n\t" >+ "bgtz %[width], 1b \n\t" >+ : [src_rgb1] "=&r"(src_rgb1), [src0] "=&f"(ftmp[0]), >+ [src1] "=&f"(ftmp[1]), [src_lo] "=&f"(ftmp[2]), [src_hi] "=&f"(ftmp[3]), >+ [dest0_u] "=&f"(ftmp[4]), [dest0_v] "=&f"(ftmp[5]), >+ [dest1_u] "=&f"(ftmp[6]), [dest1_v] "=&f"(ftmp[7]), >+ [dest2_u] "=&f"(ftmp[8]), [dest2_v] "=&f"(ftmp[9]), >+ [dest3_u] "=&f"(ftmp[10]), [dest3_v] "=&f"(ftmp[11]) >+ : [src_rgb0] "r"(src_rgb0), [src_stride_rgb] "r"(src_stride_rgb), >+ [dst_u] "r"(dst_u), [dst_v] "r"(dst_v), [width] "r"(width), >+ [mask_u] "f"(mask_u), [mask_v] "f"(mask_v), [value] "f"(value), >+ [zero] "f"(0x00), [eight] "f"(0x08), [two] "f"(0x02), >+ [sixteen] "f"(0x10) >+ : "memory"); >+} >+ >+void RAWToYRow_MMI(const uint8_t* src_argb0, uint8_t* dst_y, int width) { >+ uint64_t src, src_hi, src_lo; >+ uint64_t dest0, dest1, dest2, dest3; >+ const uint64_t value = 0x1080; >+ const uint64_t mask = 0x0001001900810042; >+ >+ __asm__ volatile( >+ "1: \n\t" >+ "gsldlc1 %[src], 0x07(%[src_argb0]) \n\t" >+ "gsldrc1 %[src], 0x00(%[src_argb0]) \n\t" >+ "punpcklbh %[src_lo], %[src], %[zero] \n\t" >+ "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t" >+ "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t" >+ "dsll %[src], %[src], %[eight] \n\t" >+ "punpckhbh %[src_hi], %[src], %[zero] \n\t" >+ "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t" >+ "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t" >+ "punpcklwd %[src], %[src_lo], %[src_hi] \n\t" >+ "punpckhwd %[dest0], %[src_lo], %[src_hi] \n\t" >+ "paddw %[dest0], %[dest0], %[src] \n\t" >+ "psrlw %[dest0], %[dest0], %[eight] \n\t" >+ >+ "gsldlc1 %[src], 0x0d(%[src_argb0]) \n\t" >+ "gsldrc1 %[src], 0x06(%[src_argb0]) \n\t" >+ "punpcklbh %[src_lo], %[src], %[zero] \n\t" >+ "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t" >+ "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t" >+ "dsll %[src], %[src], %[eight] \n\t" >+ "punpckhbh %[src_hi], %[src], %[zero] \n\t" >+ "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t" >+ "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t" >+ "punpcklwd %[src], %[src_lo], %[src_hi] \n\t" >+ "punpckhwd %[dest1], %[src_lo], %[src_hi] \n\t" >+ "paddw %[dest1], %[dest1], %[src] \n\t" >+ "psrlw %[dest1], %[dest1], %[eight] \n\t" >+ >+ "gsldlc1 %[src], 0x13(%[src_argb0]) \n\t" >+ "gsldrc1 %[src], 0x0c(%[src_argb0]) \n\t" >+ "punpcklbh %[src_lo], %[src], %[zero] \n\t" >+ "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t" >+ "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t" >+ "dsll %[src], %[src], %[eight] \n\t" >+ "punpckhbh %[src_hi], %[src], %[zero] \n\t" >+ "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t" >+ "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t" >+ "punpcklwd %[src], %[src_lo], %[src_hi] \n\t" >+ "punpckhwd %[dest2], %[src_lo], %[src_hi] \n\t" >+ "paddw %[dest2], %[dest2], %[src] \n\t" >+ "psrlw %[dest2], %[dest2], %[eight] \n\t" >+ >+ "gsldlc1 %[src], 0x19(%[src_argb0]) \n\t" >+ "gsldrc1 %[src], 0x12(%[src_argb0]) \n\t" >+ "punpcklbh %[src_lo], %[src], %[zero] \n\t" >+ "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t" >+ "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t" >+ "dsll %[src], %[src], %[eight] \n\t" >+ "punpckhbh %[src_hi], %[src], %[zero] \n\t" >+ "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t" >+ "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t" >+ "punpcklwd %[src], %[src_lo], %[src_hi] \n\t" >+ "punpckhwd %[dest3], %[src_lo], %[src_hi] \n\t" >+ "paddw %[dest3], %[dest3], %[src] \n\t" >+ "psrlw %[dest3], %[dest3], %[eight] \n\t" >+ >+ "packsswh %[src_lo], %[dest0], %[dest1] \n\t" >+ "packsswh %[src_hi], %[dest2], %[dest3] \n\t" >+ "packushb %[dest0], %[src_lo], %[src_hi] \n\t" >+ "gssdlc1 %[dest0], 0x07(%[dst_y]) \n\t" >+ "gssdrc1 %[dest0], 0x00(%[dst_y]) \n\t" >+ >+ "daddiu %[src_argb0], %[src_argb0], 0x18 \n\t" >+ "daddiu %[dst_y], %[dst_y], 0x08 \n\t" >+ "daddi %[width], %[width], -0x08 \n\t" >+ "bnez %[width], 1b \n\t" >+ : [src] "=&f"(src), [src_hi] "=&f"(src_hi), [src_lo] "=&f"(src_lo), >+ [dest0] "=&f"(dest0), [dest1] "=&f"(dest1), [dest2] "=&f"(dest2), >+ [dest3] "=&f"(dest3) >+ : [src_argb0] "r"(src_argb0), [dst_y] "r"(dst_y), [width] "r"(width), >+ [mask] "f"(mask), [value] "f"(value), [eight] "f"(0x08), >+ [zero] "f"(0x00) >+ : "memory"); >+} >+ >+void RAWToUVRow_MMI(const uint8_t* src_rgb0, >+ int src_stride_rgb, >+ uint8_t* dst_u, >+ uint8_t* dst_v, >+ int width) { >+ uint64_t src_rgb1; >+ uint64_t ftmp[12]; >+ const uint64_t value = 0x4040; >+ const uint64_t mask_u = 0x00020070004a0026; >+ const uint64_t mask_v = 0x0012005e00700002; >+ >+ __asm__ volatile( >+ "1: \n\t" >+ "daddu %[src_rgb1], %[src_rgb0], %[src_stride_rgb] \n\t" >+ "gsldrc1 %[src0], 0x00(%[src_rgb0]) \n\t" >+ "gsldlc1 %[src0], 0x07(%[src_rgb0]) \n\t" >+ "gsldrc1 %[src1], 0x00(%[src_rgb1]) \n\t" >+ "gsldlc1 %[src1], 0x07(%[src_rgb1]) \n\t" >+ "punpcklbh %[src_lo], %[src0], %[zero] \n\t" >+ "dsll %[src0], %[src0], %[eight] \n\t" >+ "punpckhbh %[src_hi], %[src0], %[zero] \n\t" >+ "paddh %[src0], %[src_lo], %[src_hi] \n\t" >+ "punpcklbh %[src_lo], %[src1], %[zero] \n\t" >+ "paddh %[src0], %[src0], %[src_lo] \n\t" >+ "dsll %[src1], %[src1], %[eight] \n\t" >+ "punpckhbh %[src_hi], %[src1], %[zero] \n\t" >+ "paddh %[src0], %[src0], %[src_hi] \n\t" >+ "psrlh %[src0], %[src0], %[two] \n\t" >+ "pinsrh_3 %[dest0_u], %[src0], %[value] \n\t" >+ "dsll %[dest0_v], %[src0], %[sixteen] \n\t" >+ "pinsrh_0 %[dest0_v], %[dest0_v], %[value] \n\t" >+ "pmaddhw %[dest0_u], %[dest0_u], %[mask_u] \n\t" >+ "pmaddhw %[dest0_v], %[dest0_v], %[mask_v] \n\t" >+ >+ "gsldrc1 %[src0], 0x06(%[src_rgb0]) \n\t" >+ "gsldlc1 %[src0], 0x0d(%[src_rgb0]) \n\t" >+ "gsldrc1 %[src1], 0x06(%[src_rgb1]) \n\t" >+ "gsldlc1 %[src1], 0x0d(%[src_rgb1]) \n\t" >+ "punpcklbh %[src_lo], %[src0], %[zero] \n\t" >+ "dsll %[src0], %[src0], %[eight] \n\t" >+ "punpckhbh %[src_hi], %[src0], %[zero] \n\t" >+ "paddh %[src0], %[src_lo], %[src_hi] \n\t" >+ "punpcklbh %[src_lo], %[src1], %[zero] \n\t" >+ "paddh %[src0], %[src0], %[src_lo] \n\t" >+ "dsll %[src1], %[src1], %[eight] \n\t" >+ "punpckhbh %[src_hi], %[src1], %[zero] \n\t" >+ "paddh %[src0], %[src0], %[src_hi] \n\t" >+ "psrlh %[src0], %[src0], %[two] \n\t" >+ "pinsrh_3 %[src_lo], %[src0], %[value] \n\t" >+ "dsll %[src_hi], %[src0], %[sixteen] \n\t" >+ "pinsrh_0 %[src_hi], %[src_hi], %[value] \n\t" >+ "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t" >+ "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t" >+ >+ "punpcklwd %[src0], %[dest0_u], %[src_lo] \n\t" >+ "punpckhwd %[src1], %[dest0_u], %[src_lo] \n\t" >+ "psubw %[dest0_u], %[src1], %[src0] \n\t" >+ "psraw %[dest0_u], %[dest0_u], %[eight] \n\t" >+ "punpcklwd %[src0], %[dest0_v], %[src_hi] \n\t" >+ "punpckhwd %[src1], %[dest0_v], %[src_hi] \n\t" >+ "psubw %[dest0_v], %[src0], %[src1] \n\t" >+ "psraw %[dest0_v], %[dest0_v], %[eight] \n\t" >+ >+ "gsldrc1 %[src0], 0x0c(%[src_rgb0]) \n\t" >+ "gsldlc1 %[src0], 0x13(%[src_rgb0]) \n\t" >+ "gsldrc1 %[src1], 0x0c(%[src_rgb1]) \n\t" >+ "gsldlc1 %[src1], 0x13(%[src_rgb1]) \n\t" >+ "punpcklbh %[src_lo], %[src0], %[zero] \n\t" >+ "dsll %[src0], %[src0], %[eight] \n\t" >+ "punpckhbh %[src_hi], %[src0], %[zero] \n\t" >+ "paddh %[src0], %[src_lo], %[src_hi] \n\t" >+ "punpcklbh %[src_lo], %[src1], %[zero] \n\t" >+ "paddh %[src0], %[src0], %[src_lo] \n\t" >+ "dsll %[src1], %[src1], %[eight] \n\t" >+ "punpckhbh %[src_hi], %[src1], %[zero] \n\t" >+ "paddh %[src0], %[src0], %[src_hi] \n\t" >+ "psrlh %[src0], %[src0], %[two] \n\t" >+ "pinsrh_3 %[dest1_u], %[src0], %[value] \n\t" >+ "dsll %[dest1_v], %[src0], %[sixteen] \n\t" >+ "pinsrh_0 %[dest1_v], %[dest1_v], %[value] \n\t" >+ "pmaddhw %[dest1_u], %[dest1_u], %[mask_u] \n\t" >+ "pmaddhw %[dest1_v], %[dest1_v], %[mask_v] \n\t" >+ >+ "gsldrc1 %[src0], 0x12(%[src_rgb0]) \n\t" >+ "gsldlc1 %[src0], 0x19(%[src_rgb0]) \n\t" >+ "gsldrc1 %[src1], 0x12(%[src_rgb1]) \n\t" >+ "gsldlc1 %[src1], 0x19(%[src_rgb1]) \n\t" >+ "punpcklbh %[src_lo], %[src0], %[zero] \n\t" >+ "dsll %[src0], %[src0], %[eight] \n\t" >+ "punpckhbh %[src_hi], %[src0], %[zero] \n\t" >+ "paddh %[src0], %[src_lo], %[src_hi] \n\t" >+ "punpcklbh %[src_lo], %[src1], %[zero] \n\t" >+ "paddh %[src0], %[src0], %[src_lo] \n\t" >+ "dsll %[src1], %[src1], %[eight] \n\t" >+ "punpckhbh %[src_hi], %[src1], %[zero] \n\t" >+ "paddh %[src0], %[src0], %[src_hi] \n\t" >+ "psrlh %[src0], %[src0], %[two] \n\t" >+ "pinsrh_3 %[src_lo], %[src0], %[value] \n\t" >+ "dsll %[src_hi], %[src0], %[sixteen] \n\t" >+ "pinsrh_0 %[src_hi], %[src_hi], %[value] \n\t" >+ "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t" >+ "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t" >+ >+ "punpcklwd %[src0], %[dest1_u], %[src_lo] \n\t" >+ "punpckhwd %[src1], %[dest1_u], %[src_lo] \n\t" >+ "psubw %[dest1_u], %[src1], %[src0] \n\t" >+ "psraw %[dest1_u], %[dest1_u], %[eight] \n\t" >+ "punpcklwd %[src0], %[dest1_v], %[src_hi] \n\t" >+ "punpckhwd %[src1], %[dest1_v], %[src_hi] \n\t" >+ "psubw %[dest1_v], %[src0], %[src1] \n\t" >+ "psraw %[dest1_v], %[dest1_v], %[eight] \n\t" >+ >+ "gsldrc1 %[src0], 0x18(%[src_rgb0]) \n\t" >+ "gsldlc1 %[src0], 0x1f(%[src_rgb0]) \n\t" >+ "gsldrc1 %[src1], 0x18(%[src_rgb1]) \n\t" >+ "gsldlc1 %[src1], 0x1f(%[src_rgb1]) \n\t" >+ "punpcklbh %[src_lo], %[src0], %[zero] \n\t" >+ "dsll %[src0], %[src0], %[eight] \n\t" >+ "punpckhbh %[src_hi], %[src0], %[zero] \n\t" >+ "paddh %[src0], %[src_lo], %[src_hi] \n\t" >+ "punpcklbh %[src_lo], %[src1], %[zero] \n\t" >+ "paddh %[src0], %[src0], %[src_lo] \n\t" >+ "dsll %[src1], %[src1], %[eight] \n\t" >+ "punpckhbh %[src_hi], %[src1], %[zero] \n\t" >+ "paddh %[src0], %[src0], %[src_hi] \n\t" >+ "psrlh %[src0], %[src0], %[two] \n\t" >+ "pinsrh_3 %[dest2_u], %[src0], %[value] \n\t" >+ "dsll %[dest2_v], %[src0], %[sixteen] \n\t" >+ "pinsrh_0 %[dest2_v], %[dest2_v], %[value] \n\t" >+ "pmaddhw %[dest2_u], %[dest2_u], %[mask_u] \n\t" >+ "pmaddhw %[dest2_v], %[dest2_v], %[mask_v] \n\t" >+ >+ "gsldrc1 %[src0], 0x1e(%[src_rgb0]) \n\t" >+ "gsldlc1 %[src0], 0x25(%[src_rgb0]) \n\t" >+ "gsldrc1 %[src1], 0x1e(%[src_rgb1]) \n\t" >+ "gsldlc1 %[src1], 0x25(%[src_rgb1]) \n\t" >+ "punpcklbh %[src_lo], %[src0], %[zero] \n\t" >+ "dsll %[src0], %[src0], %[eight] \n\t" >+ "punpckhbh %[src_hi], %[src0], %[zero] \n\t" >+ "paddh %[src0], %[src_lo], %[src_hi] \n\t" >+ "punpcklbh %[src_lo], %[src1], %[zero] \n\t" >+ "paddh %[src0], %[src0], %[src_lo] \n\t" >+ "dsll %[src1], %[src1], %[eight] \n\t" >+ "punpckhbh %[src_hi], %[src1], %[zero] \n\t" >+ "paddh %[src0], %[src0], %[src_hi] \n\t" >+ "psrlh %[src0], %[src0], %[two] \n\t" >+ "pinsrh_3 %[src_lo], %[src0], %[value] \n\t" >+ "dsll %[src_hi], %[src0], %[sixteen] \n\t" >+ "pinsrh_0 %[src_hi], %[src_hi], %[value] \n\t" >+ "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t" >+ "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t" >+ >+ "punpcklwd %[src0], %[dest2_u], %[src_lo] \n\t" >+ "punpckhwd %[src1], %[dest2_u], %[src_lo] \n\t" >+ "psubw %[dest2_u], %[src1], %[src0] \n\t" >+ "psraw %[dest2_u], %[dest2_u], %[eight] \n\t" >+ "punpcklwd %[src0], %[dest2_v], %[src_hi] \n\t" >+ "punpckhwd %[src1], %[dest2_v], %[src_hi] \n\t" >+ "psubw %[dest2_v], %[src0], %[src1] \n\t" >+ "psraw %[dest2_v], %[dest2_v], %[eight] \n\t" >+ >+ "gsldrc1 %[src0], 0x24(%[src_rgb0]) \n\t" >+ "gsldlc1 %[src0], 0x2b(%[src_rgb0]) \n\t" >+ "gsldrc1 %[src1], 0x24(%[src_rgb1]) \n\t" >+ "gsldlc1 %[src1], 0x2b(%[src_rgb1]) \n\t" >+ "punpcklbh %[src_lo], %[src0], %[zero] \n\t" >+ "dsll %[src0], %[src0], %[eight] \n\t" >+ "punpckhbh %[src_hi], %[src0], %[zero] \n\t" >+ "paddh %[src0], %[src_lo], %[src_hi] \n\t" >+ "punpcklbh %[src_lo], %[src1], %[zero] \n\t" >+ "paddh %[src0], %[src0], %[src_lo] \n\t" >+ "dsll %[src1], %[src1], %[eight] \n\t" >+ "punpckhbh %[src_hi], %[src1], %[zero] \n\t" >+ "paddh %[src0], %[src0], %[src_hi] \n\t" >+ "psrlh %[src0], %[src0], %[two] \n\t" >+ "pinsrh_3 %[dest3_u], %[src0], %[value] \n\t" >+ "dsll %[dest3_v], %[src0], %[sixteen] \n\t" >+ "pinsrh_0 %[dest3_v], %[dest3_v], %[value] \n\t" >+ "pmaddhw %[dest3_u], %[dest3_u], %[mask_u] \n\t" >+ "pmaddhw %[dest3_v], %[dest3_v], %[mask_v] \n\t" >+ >+ "gsldrc1 %[src0], 0x2a(%[src_rgb0]) \n\t" >+ "gsldlc1 %[src0], 0x31(%[src_rgb0]) \n\t" >+ "gsldrc1 %[src1], 0x2a(%[src_rgb1]) \n\t" >+ "gsldlc1 %[src1], 0x31(%[src_rgb1]) \n\t" >+ "punpcklbh %[src_lo], %[src0], %[zero] \n\t" >+ "dsll %[src0], %[src0], %[eight] \n\t" >+ "punpckhbh %[src_hi], %[src0], %[zero] \n\t" >+ "paddh %[src0], %[src_lo], %[src_hi] \n\t" >+ "punpcklbh %[src_lo], %[src1], %[zero] \n\t" >+ "paddh %[src0], %[src0], %[src_lo] \n\t" >+ "dsll %[src1], %[src1], %[eight] \n\t" >+ "punpckhbh %[src_hi], %[src1], %[zero] \n\t" >+ "paddh %[src0], %[src0], %[src_hi] \n\t" >+ "psrlh %[src0], %[src0], %[two] \n\t" >+ "pinsrh_3 %[src_lo], %[src0], %[value] \n\t" >+ "dsll %[src_hi], %[src0], %[sixteen] \n\t" >+ "pinsrh_0 %[src_hi], %[src_hi], %[value] \n\t" >+ "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t" >+ "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t" >+ >+ "punpcklwd %[src0], %[dest3_u], %[src_lo] \n\t" >+ "punpckhwd %[src1], %[dest3_u], %[src_lo] \n\t" >+ "psubw %[dest3_u], %[src1], %[src0] \n\t" >+ "psraw %[dest3_u], %[dest3_u], %[eight] \n\t" >+ "punpcklwd %[src0], %[dest3_v], %[src_hi] \n\t" >+ "punpckhwd %[src1], %[dest3_v], %[src_hi] \n\t" >+ "psubw %[dest3_v], %[src0], %[src1] \n\t" >+ "psraw %[dest3_v], %[dest3_v], %[eight] \n\t" >+ >+ "packsswh %[src0], %[dest0_u], %[dest1_u] \n\t" >+ "packsswh %[src1], %[dest2_u], %[dest3_u] \n\t" >+ "packushb %[dest0_u], %[src0], %[src1] \n\t" >+ "gssdlc1 %[dest0_u], 0x07(%[dst_u]) \n\t" >+ "gssdrc1 %[dest0_u], 0x00(%[dst_u]) \n\t" >+ >+ "packsswh %[src0], %[dest0_v], %[dest1_v] \n\t" >+ "packsswh %[src1], %[dest2_v], %[dest3_v] \n\t" >+ "packushb %[dest0_v], %[src0], %[src1] \n\t" >+ "gssdlc1 %[dest0_v], 0x07(%[dst_v]) \n\t" >+ "gssdrc1 %[dest0_v], 0x00(%[dst_v]) \n\t" >+ >+ "daddiu %[src_rgb0], %[src_rgb0], 0x30 \n\t" >+ "daddiu %[dst_u], %[dst_u], 0x08 \n\t" >+ "daddiu %[dst_v], %[dst_v], 0x08 \n\t" >+ "daddi %[width], %[width], -0x10 \n\t" >+ "bgtz %[width], 1b \n\t" >+ : [src_rgb1] "=&r"(src_rgb1), [src0] "=&f"(ftmp[0]), >+ [src1] "=&f"(ftmp[1]), [src_lo] "=&f"(ftmp[2]), [src_hi] "=&f"(ftmp[3]), >+ [dest0_u] "=&f"(ftmp[4]), [dest0_v] "=&f"(ftmp[5]), >+ [dest1_u] "=&f"(ftmp[6]), [dest1_v] "=&f"(ftmp[7]), >+ [dest2_u] "=&f"(ftmp[8]), [dest2_v] "=&f"(ftmp[9]), >+ [dest3_u] "=&f"(ftmp[10]), [dest3_v] "=&f"(ftmp[11]) >+ : [src_rgb0] "r"(src_rgb0), [src_stride_rgb] "r"(src_stride_rgb), >+ [dst_u] "r"(dst_u), [dst_v] "r"(dst_v), [width] "r"(width), >+ [mask_u] "f"(mask_u), [mask_v] "f"(mask_v), [value] "f"(value), >+ [zero] "f"(0x00), [eight] "f"(0x08), [two] "f"(0x02), >+ [sixteen] "f"(0x10) >+ : "memory"); >+} >+ >+void ARGBToYJRow_MMI(const uint8_t* src_argb0, uint8_t* dst_y, int width) { >+ uint64_t src, src_hi, src_lo; >+ uint64_t dest, dest0, dest1, dest2, dest3; >+ uint64_t tmp0, tmp1; >+ const uint64_t shift = 0x07; >+ const uint64_t value = 0x0040; >+ const uint64_t mask0 = 0x0; >+ const uint64_t mask1 = 0x00010026004B000FULL; >+ >+ __asm__ volatile( >+ "1: \n\t" >+ "gsldlc1 %[src], 0x07(%[src_ptr]) \n\t" >+ "gsldrc1 %[src], 0x00(%[src_ptr]) \n\t" >+ "punpcklbh %[src_lo], %[src], %[mask0] \n\t" >+ "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t" >+ "pmaddhw %[src_lo], %[src_lo], %[mask1] \n\t" >+ "punpckhbh %[src_hi], %[src], %[mask0] \n\t" >+ "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t" >+ "pmaddhw %[src_hi], %[src_hi], %[mask1] \n\t" >+ "punpcklwd %[tmp0], %[src_lo], %[src_hi] \n\t" >+ "punpckhwd %[tmp1], %[src_lo], %[src_hi] \n\t" >+ "paddw %[dest0], %[tmp0], %[tmp1] \n\t" >+ "psrlw %[dest0], %[dest0], %[shift] \n\t" >+ >+ "gsldlc1 %[src], 0x0f(%[src_ptr]) \n\t" >+ "gsldrc1 %[src], 0x08(%[src_ptr]) \n\t" >+ "punpcklbh %[src_lo], %[src], %[mask0] \n\t" >+ "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t" >+ "pmaddhw %[src_lo], %[src_lo], %[mask1] \n\t" >+ "punpckhbh %[src_hi], %[src], %[mask0] \n\t" >+ "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t" >+ "pmaddhw %[src_hi], %[src_hi], %[mask1] \n\t" >+ "punpcklwd %[tmp0], %[src_lo], %[src_hi] \n\t" >+ "punpckhwd %[tmp1], %[src_lo], %[src_hi] \n\t" >+ "paddw %[dest1], %[tmp0], %[tmp1] \n\t" >+ "psrlw %[dest1], %[dest1], %[shift] \n\t" >+ >+ "gsldlc1 %[src], 0x17(%[src_ptr]) \n\t" >+ "gsldrc1 %[src], 0x10(%[src_ptr]) \n\t" >+ "punpcklbh %[src_lo], %[src], %[mask0] \n\t" >+ "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t" >+ "pmaddhw %[src_lo], %[src_lo], %[mask1] \n\t" >+ "punpckhbh %[src_hi], %[src], %[mask0] \n\t" >+ "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t" >+ "pmaddhw %[src_hi], %[src_hi], %[mask1] \n\t" >+ "punpcklwd %[tmp0], %[src_lo], %[src_hi] \n\t" >+ "punpckhwd %[tmp1], %[src_lo], %[src_hi] \n\t" >+ "paddw %[dest2], %[tmp0], %[tmp1] \n\t" >+ "psrlw %[dest2], %[dest2], %[shift] \n\t" >+ >+ "gsldlc1 %[src], 0x1f(%[src_ptr]) \n\t" >+ "gsldrc1 %[src], 0x18(%[src_ptr]) \n\t" >+ "punpcklbh %[src_lo], %[src], %[mask0] \n\t" >+ "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t" >+ "pmaddhw %[src_lo], %[src_lo], %[mask1] \n\t" >+ "punpckhbh %[src_hi], %[src], %[mask0] \n\t" >+ "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t" >+ "pmaddhw %[src_hi], %[src_hi], %[mask1] \n\t" >+ "punpcklwd %[tmp0], %[src_lo], %[src_hi] \n\t" >+ "punpckhwd %[tmp1], %[src_lo], %[src_hi] \n\t" >+ "paddw %[dest3], %[tmp0], %[tmp1] \n\t" >+ "psrlw %[dest3], %[dest3], %[shift] \n\t" >+ >+ "packsswh %[tmp0], %[dest0], %[dest1] \n\t" >+ "packsswh %[tmp1], %[dest2], %[dest3] \n\t" >+ "packushb %[dest], %[tmp0], %[tmp1] \n\t" >+ "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t" >+ "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t" >+ >+ "daddiu %[src_ptr], %[src_ptr], 0x20 \n\t" >+ "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t" >+ "daddi %[width], %[width], -0x08 \n\t" >+ "bnez %[width], 1b \n\t" >+ : [src] "=&f"(src), [dest] "=&f"(dest), [src_hi] "=&f"(src_hi), >+ [src_lo] "=&f"(src_lo), [dest0] "=&f"(dest0), [dest1] "=&f"(dest1), >+ [dest2] "=&f"(dest2), [dest3] "=&f"(dest3), [tmp0] "=&f"(tmp0), >+ [tmp1] "=&f"(tmp1) >+ : [src_ptr] "r"(src_argb0), [dst_ptr] "r"(dst_y), [mask0] "f"(mask0), >+ [mask1] "f"(mask1), [shift] "f"(shift), [value] "f"(value), >+ [width] "r"(width) >+ : "memory"); >+} >+ >+void ARGBToUVJRow_MMI(const uint8_t* src_rgb0, >+ int src_stride_rgb, >+ uint8_t* dst_u, >+ uint8_t* dst_v, >+ int width) { >+ uint64_t src_rgb1; >+ uint64_t ftmp[12]; >+ const uint64_t value = 0x4040; >+ const uint64_t mask_u = 0x002b0054007f0002; >+ const uint64_t mask_v = 0x0002007f006b0014; >+ >+ __asm__ volatile( >+ "1: \n\t" >+ "daddu %[src_rgb1], %[src_rgb0], %[src_stride_rgb] \n\t" >+ "gsldrc1 %[src0], 0x00(%[src_rgb0]) \n\t" >+ "gsldlc1 %[src0], 0x07(%[src_rgb0]) \n\t" >+ "gsldrc1 %[src1], 0x00(%[src_rgb1]) \n\t" >+ "gsldlc1 %[src1], 0x07(%[src_rgb1]) \n\t" >+ "punpcklbh %[src_lo], %[src0], %[zero] \n\t" >+ "punpckhbh %[src_hi], %[src0], %[zero] \n\t" >+ "punpcklbh %[src0], %[src1], %[zero] \n\t" >+ "punpckhbh %[src1], %[src1], %[zero] \n\t" >+ "pavgh %[src0], %[src_lo], %[src0] \n\t" >+ "pavgh %[src1], %[src_hi], %[src1] \n\t" >+ "pavgh %[src0], %[src0], %[src1] \n\t" >+ "dsll %[dest0_u], %[src0], %[sixteen] \n\t" >+ "pinsrh_0 %[dest0_u], %[dest0_u], %[value] \n\t" >+ "pinsrh_3 %[dest0_v], %[src0], %[value] \n\t" >+ "pmaddhw %[dest0_u], %[dest0_u], %[mask_u] \n\t" >+ "pmaddhw %[dest0_v], %[dest0_v], %[mask_v] \n\t" >+ >+ "gsldrc1 %[src0], 0x08(%[src_rgb0]) \n\t" >+ "gsldlc1 %[src0], 0x0f(%[src_rgb0]) \n\t" >+ "gsldrc1 %[src1], 0x08(%[src_rgb1]) \n\t" >+ "gsldlc1 %[src1], 0x0f(%[src_rgb1]) \n\t" >+ "punpcklbh %[src_lo], %[src0], %[zero] \n\t" >+ "punpckhbh %[src_hi], %[src0], %[zero] \n\t" >+ "punpcklbh %[src0], %[src1], %[zero] \n\t" >+ "punpckhbh %[src1], %[src1], %[zero] \n\t" >+ "pavgh %[src0], %[src_lo], %[src0] \n\t" >+ "pavgh %[src1], %[src_hi], %[src1] \n\t" >+ "pavgh %[src0], %[src0], %[src1] \n\t" >+ "dsll %[src_lo], %[src0], %[sixteen] \n\t" >+ "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t" >+ "pinsrh_3 %[src_hi], %[src0], %[value] \n\t" >+ "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t" >+ "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t" >+ >+ "punpcklwd %[src0], %[dest0_u], %[src_lo] \n\t" >+ "punpckhwd %[src1], %[dest0_u], %[src_lo] \n\t" >+ "psubw %[dest0_u], %[src0], %[src1] \n\t" >+ "psraw %[dest0_u], %[dest0_u], %[eight] \n\t" >+ "punpcklwd %[src0], %[dest0_v], %[src_hi] \n\t" >+ "punpckhwd %[src1], %[dest0_v], %[src_hi] \n\t" >+ "psubw %[dest0_v], %[src1], %[src0] \n\t" >+ "psraw %[dest0_v], %[dest0_v], %[eight] \n\t" >+ >+ "gsldrc1 %[src0], 0x10(%[src_rgb0]) \n\t" >+ "gsldlc1 %[src0], 0x17(%[src_rgb0]) \n\t" >+ "gsldrc1 %[src1], 0x10(%[src_rgb1]) \n\t" >+ "gsldlc1 %[src1], 0x17(%[src_rgb1]) \n\t" >+ "punpcklbh %[src_lo], %[src0], %[zero] \n\t" >+ "punpckhbh %[src_hi], %[src0], %[zero] \n\t" >+ "punpcklbh %[src0], %[src1], %[zero] \n\t" >+ "punpckhbh %[src1], %[src1], %[zero] \n\t" >+ "pavgh %[src0], %[src_lo], %[src0] \n\t" >+ "pavgh %[src1], %[src_hi], %[src1] \n\t" >+ "pavgh %[src0], %[src0], %[src1] \n\t" >+ "dsll %[dest1_u], %[src0], %[sixteen] \n\t" >+ "pinsrh_0 %[dest1_u], %[dest1_u], %[value] \n\t" >+ "pinsrh_3 %[dest1_v], %[src0], %[value] \n\t" >+ "pmaddhw %[dest1_u], %[dest1_u], %[mask_u] \n\t" >+ "pmaddhw %[dest1_v], %[dest1_v], %[mask_v] \n\t" >+ >+ "gsldrc1 %[src0], 0x18(%[src_rgb0]) \n\t" >+ "gsldlc1 %[src0], 0x1f(%[src_rgb0]) \n\t" >+ "gsldrc1 %[src1], 0x18(%[src_rgb1]) \n\t" >+ "gsldlc1 %[src1], 0x1f(%[src_rgb1]) \n\t" >+ "punpcklbh %[src_lo], %[src0], %[zero] \n\t" >+ "punpckhbh %[src_hi], %[src0], %[zero] \n\t" >+ "punpcklbh %[src0], %[src1], %[zero] \n\t" >+ "punpckhbh %[src1], %[src1], %[zero] \n\t" >+ "pavgh %[src0], %[src_lo], %[src0] \n\t" >+ "pavgh %[src1], %[src_hi], %[src1] \n\t" >+ "pavgh %[src0], %[src0], %[src1] \n\t" >+ "dsll %[src_lo], %[src0], %[sixteen] \n\t" >+ "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t" >+ "pinsrh_3 %[src_hi], %[src0], %[value] \n\t" >+ "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t" >+ "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t" >+ >+ "punpcklwd %[src0], %[dest1_u], %[src_lo] \n\t" >+ "punpckhwd %[src1], %[dest1_u], %[src_lo] \n\t" >+ "psubw %[dest1_u], %[src0], %[src1] \n\t" >+ "psraw %[dest1_u], %[dest1_u], %[eight] \n\t" >+ "punpcklwd %[src0], %[dest1_v], %[src_hi] \n\t" >+ "punpckhwd %[src1], %[dest1_v], %[src_hi] \n\t" >+ "psubw %[dest1_v], %[src1], %[src0] \n\t" >+ "psraw %[dest1_v], %[dest1_v], %[eight] \n\t" >+ >+ "gsldrc1 %[src0], 0x20(%[src_rgb0]) \n\t" >+ "gsldlc1 %[src0], 0x27(%[src_rgb0]) \n\t" >+ "gsldrc1 %[src1], 0x20(%[src_rgb1]) \n\t" >+ "gsldlc1 %[src1], 0x27(%[src_rgb1]) \n\t" >+ "punpcklbh %[src_lo], %[src0], %[zero] \n\t" >+ "punpckhbh %[src_hi], %[src0], %[zero] \n\t" >+ "punpcklbh %[src0], %[src1], %[zero] \n\t" >+ "punpckhbh %[src1], %[src1], %[zero] \n\t" >+ "pavgh %[src0], %[src_lo], %[src0] \n\t" >+ "pavgh %[src1], %[src_hi], %[src1] \n\t" >+ "pavgh %[src0], %[src0], %[src1] \n\t" >+ "dsll %[dest2_u], %[src0], %[sixteen] \n\t" >+ "pinsrh_0 %[dest2_u], %[dest2_u], %[value] \n\t" >+ "pinsrh_3 %[dest2_v], %[src0], %[value] \n\t" >+ "pmaddhw %[dest2_u], %[dest2_u], %[mask_u] \n\t" >+ "pmaddhw %[dest2_v], %[dest2_v], %[mask_v] \n\t" >+ >+ "gsldrc1 %[src0], 0x28(%[src_rgb0]) \n\t" >+ "gsldlc1 %[src0], 0x2f(%[src_rgb0]) \n\t" >+ "gsldrc1 %[src1], 0x28(%[src_rgb1]) \n\t" >+ "gsldlc1 %[src1], 0x2f(%[src_rgb1]) \n\t" >+ "punpcklbh %[src_lo], %[src0], %[zero] \n\t" >+ "punpckhbh %[src_hi], %[src0], %[zero] \n\t" >+ "punpcklbh %[src0], %[src1], %[zero] \n\t" >+ "punpckhbh %[src1], %[src1], %[zero] \n\t" >+ "pavgh %[src0], %[src_lo], %[src0] \n\t" >+ "pavgh %[src1], %[src_hi], %[src1] \n\t" >+ "pavgh %[src0], %[src0], %[src1] \n\t" >+ "dsll %[src_lo], %[src0], %[sixteen] \n\t" >+ "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t" >+ "pinsrh_3 %[src_hi], %[src0], %[value] \n\t" >+ "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t" >+ "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t" >+ >+ "punpcklwd %[src0], %[dest2_u], %[src_lo] \n\t" >+ "punpckhwd %[src1], %[dest2_u], %[src_lo] \n\t" >+ "psubw %[dest2_u], %[src0], %[src1] \n\t" >+ "psraw %[dest2_u], %[dest2_u], %[eight] \n\t" >+ "punpcklwd %[src0], %[dest2_v], %[src_hi] \n\t" >+ "punpckhwd %[src1], %[dest2_v], %[src_hi] \n\t" >+ "psubw %[dest2_v], %[src1], %[src0] \n\t" >+ "psraw %[dest2_v], %[dest2_v], %[eight] \n\t" >+ >+ "gsldrc1 %[src0], 0x30(%[src_rgb0]) \n\t" >+ "gsldlc1 %[src0], 0x37(%[src_rgb0]) \n\t" >+ "gsldrc1 %[src1], 0x30(%[src_rgb1]) \n\t" >+ "gsldlc1 %[src1], 0x37(%[src_rgb1]) \n\t" >+ "punpcklbh %[src_lo], %[src0], %[zero] \n\t" >+ "punpckhbh %[src_hi], %[src0], %[zero] \n\t" >+ "punpcklbh %[src0], %[src1], %[zero] \n\t" >+ "punpckhbh %[src1], %[src1], %[zero] \n\t" >+ "pavgh %[src0], %[src_lo], %[src0] \n\t" >+ "pavgh %[src1], %[src_hi], %[src1] \n\t" >+ "pavgh %[src0], %[src0], %[src1] \n\t" >+ "dsll %[dest3_u], %[src0], %[sixteen] \n\t" >+ "pinsrh_0 %[dest3_u], %[dest3_u], %[value] \n\t" >+ "pinsrh_3 %[dest3_v], %[src0], %[value] \n\t" >+ "pmaddhw %[dest3_u], %[dest3_u], %[mask_u] \n\t" >+ "pmaddhw %[dest3_v], %[dest3_v], %[mask_v] \n\t" >+ >+ "gsldrc1 %[src0], 0x38(%[src_rgb0]) \n\t" >+ "gsldlc1 %[src0], 0x3f(%[src_rgb0]) \n\t" >+ "gsldrc1 %[src1], 0x38(%[src_rgb1]) \n\t" >+ "gsldlc1 %[src1], 0x3f(%[src_rgb1]) \n\t" >+ "punpcklbh %[src_lo], %[src0], %[zero] \n\t" >+ "punpckhbh %[src_hi], %[src0], %[zero] \n\t" >+ "punpcklbh %[src0], %[src1], %[zero] \n\t" >+ "punpckhbh %[src1], %[src1], %[zero] \n\t" >+ "pavgh %[src0], %[src_lo], %[src0] \n\t" >+ "pavgh %[src1], %[src_hi], %[src1] \n\t" >+ "pavgh %[src0], %[src0], %[src1] \n\t" >+ "dsll %[src_lo], %[src0], %[sixteen] \n\t" >+ "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t" >+ "pinsrh_3 %[src_hi], %[src0], %[value] \n\t" >+ "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t" >+ "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t" >+ >+ "punpcklwd %[src0], %[dest3_u], %[src_lo] \n\t" >+ "punpckhwd %[src1], %[dest3_u], %[src_lo] \n\t" >+ "psubw %[dest3_u], %[src0], %[src1] \n\t" >+ "psraw %[dest3_u], %[dest3_u], %[eight] \n\t" >+ "punpcklwd %[src0], %[dest3_v], %[src_hi] \n\t" >+ "punpckhwd %[src1], %[dest3_v], %[src_hi] \n\t" >+ "psubw %[dest3_v], %[src1], %[src0] \n\t" >+ "psraw %[dest3_v], %[dest3_v], %[eight] \n\t" >+ >+ "packsswh %[src0], %[dest0_u], %[dest1_u] \n\t" >+ "packsswh %[src1], %[dest2_u], %[dest3_u] \n\t" >+ "packushb %[dest0_u], %[src0], %[src1] \n\t" >+ "gssdlc1 %[dest0_u], 0x07(%[dst_u]) \n\t" >+ "gssdrc1 %[dest0_u], 0x00(%[dst_u]) \n\t" >+ >+ "packsswh %[src0], %[dest0_v], %[dest1_v] \n\t" >+ "packsswh %[src1], %[dest2_v], %[dest3_v] \n\t" >+ "packushb %[dest0_v], %[src0], %[src1] \n\t" >+ "gssdlc1 %[dest0_v], 0x07(%[dst_v]) \n\t" >+ "gssdrc1 %[dest0_v], 0x00(%[dst_v]) \n\t" >+ >+ "daddiu %[src_rgb0], %[src_rgb0], 0x40 \n\t" >+ "daddiu %[dst_u], %[dst_u], 0x08 \n\t" >+ "daddiu %[dst_v], %[dst_v], 0x08 \n\t" >+ "daddi %[width], %[width], -0x10 \n\t" >+ "bgtz %[width], 1b \n\t" >+ : [src_rgb1] "=&r"(src_rgb1), [src0] "=&f"(ftmp[0]), >+ [src1] "=&f"(ftmp[1]), [src_lo] "=&f"(ftmp[2]), [src_hi] "=&f"(ftmp[3]), >+ [dest0_u] "=&f"(ftmp[4]), [dest0_v] "=&f"(ftmp[5]), >+ [dest1_u] "=&f"(ftmp[6]), [dest1_v] "=&f"(ftmp[7]), >+ [dest2_u] "=&f"(ftmp[8]), [dest2_v] "=&f"(ftmp[9]), >+ [dest3_u] "=&f"(ftmp[10]), [dest3_v] "=&f"(ftmp[11]) >+ : [src_rgb0] "r"(src_rgb0), [src_stride_rgb] "r"(src_stride_rgb), >+ [dst_u] "r"(dst_u), [dst_v] "r"(dst_v), [width] "r"(width), >+ [mask_u] "f"(mask_u), [mask_v] "f"(mask_v), [value] "f"(value), >+ [zero] "f"(0x00), [eight] "f"(0x08), [two] "f"(0x02), >+ [sixteen] "f"(0x10) >+ : "memory"); >+} >+ >+void RGB565ToYRow_MMI(const uint8_t* src_rgb565, uint8_t* dst_y, int width) { >+ uint64_t ftmp[11]; >+ const uint64_t value = 0x1080108010801080; >+ const uint64_t mask = 0x0001004200810019; >+ uint64_t c0 = 0x001f001f001f001f; >+ uint64_t c1 = 0x00ff00ff00ff00ff; >+ uint64_t c2 = 0x0007000700070007; >+ __asm__ volatile( >+ "1: \n\t" >+ "gsldrc1 %[src0], 0x00(%[src_rgb565]) \n\t" >+ "gsldlc1 %[src0], 0x07(%[src_rgb565]) \n\t" >+ "psrlh %[src1], %[src0], %[eight] \n\t" >+ "and %[b], %[src0], %[c0] \n\t" >+ "and %[src0], %[src0], %[c1] \n\t" >+ "psrlh %[src0], %[src0], %[five] \n\t" >+ "and %[g], %[src1], %[c2] \n\t" >+ "psllh %[g], %[g], %[three] \n\t" >+ "or %[g], %[src0], %[g] \n\t" >+ "psrlh %[r], %[src1], %[three] \n\t" >+ "psllh %[src0], %[b], %[three] \n\t" >+ "psrlh %[src1], %[b], %[two] \n\t" >+ "or %[b], %[src0], %[src1] \n\t" >+ "psllh %[src0], %[g], %[two] \n\t" >+ "psrlh %[src1], %[g], %[four] \n\t" >+ "or %[g], %[src0], %[src1] \n\t" >+ "psllh %[src0], %[r], %[three] \n\t" >+ "psrlh %[src1], %[r], %[two] \n\t" >+ "or %[r], %[src0], %[src1] \n\t" >+ "punpcklhw %[src0], %[b], %[r] \n\t" >+ "punpcklhw %[src1], %[g], %[value] \n\t" >+ "punpcklhw %[src_lo], %[src0], %[src1] \n\t" >+ "punpckhhw %[src_hi], %[src0], %[src1] \n\t" >+ "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t" >+ "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t" >+ "punpcklwd %[src0], %[src_lo], %[src_hi] \n\t" >+ "punpckhwd %[src1], %[src_lo], %[src_hi] \n\t" >+ "paddw %[dest0], %[src0], %[src1] \n\t" >+ "psrlw %[dest0], %[dest0], %[eight] \n\t" >+ >+ "punpckhhw %[src0], %[b], %[r] \n\t" >+ "punpckhhw %[src1], %[g], %[value] \n\t" >+ "punpcklhw %[src_lo], %[src0], %[src1] \n\t" >+ "punpckhhw %[src_hi], %[src0], %[src1] \n\t" >+ "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t" >+ "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t" >+ "punpcklwd %[src0], %[src_lo], %[src_hi] \n\t" >+ "punpckhwd %[src1], %[src_lo], %[src_hi] \n\t" >+ "paddw %[dest1], %[src0], %[src1] \n\t" >+ "psrlw %[dest1], %[dest1], %[eight] \n\t" >+ >+ "gsldrc1 %[src0], 0x08(%[src_rgb565]) \n\t" >+ "gsldlc1 %[src0], 0x0f(%[src_rgb565]) \n\t" >+ "psrlh %[src1], %[src0], %[eight] \n\t" >+ "and %[b], %[src0], %[c0] \n\t" >+ "and %[src0], %[src0], %[c1] \n\t" >+ "psrlh %[src0], %[src0], %[five] \n\t" >+ "and %[g], %[src1], %[c2] \n\t" >+ "psllh %[g], %[g], %[three] \n\t" >+ "or %[g], %[src0], %[g] \n\t" >+ "psrlh %[r], %[src1], %[three] \n\t" >+ "psllh %[src0], %[b], %[three] \n\t" >+ "psrlh %[src1], %[b], %[two] \n\t" >+ "or %[b], %[src0], %[src1] \n\t" >+ "psllh %[src0], %[g], %[two] \n\t" >+ "psrlh %[src1], %[g], %[four] \n\t" >+ "or %[g], %[src0], %[src1] \n\t" >+ "psllh %[src0], %[r], %[three] \n\t" >+ "psrlh %[src1], %[r], %[two] \n\t" >+ "or %[r], %[src0], %[src1] \n\t" >+ "punpcklhw %[src0], %[b], %[r] \n\t" >+ "punpcklhw %[src1], %[g], %[value] \n\t" >+ "punpcklhw %[src_lo], %[src0], %[src1] \n\t" >+ "punpckhhw %[src_hi], %[src0], %[src1] \n\t" >+ "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t" >+ "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t" >+ "punpcklwd %[src0], %[src_lo], %[src_hi] \n\t" >+ "punpckhwd %[src1], %[src_lo], %[src_hi] \n\t" >+ "paddw %[dest2], %[src0], %[src1] \n\t" >+ "psrlw %[dest2], %[dest2], %[eight] \n\t" >+ >+ "punpckhhw %[src0], %[b], %[r] \n\t" >+ "punpckhhw %[src1], %[g], %[value] \n\t" >+ "punpcklhw %[src_lo], %[src0], %[src1] \n\t" >+ "punpckhhw %[src_hi], %[src0], %[src1] \n\t" >+ "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t" >+ "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t" >+ "punpcklwd %[src0], %[src_lo], %[src_hi] \n\t" >+ "punpckhwd %[src1], %[src_lo], %[src_hi] \n\t" >+ "paddw %[dest3], %[src0], %[src1] \n\t" >+ "psrlw %[dest3], %[dest3], %[eight] \n\t" >+ >+ "packsswh %[src_lo], %[dest0], %[dest1] \n\t" >+ "packsswh %[src_hi], %[dest2], %[dest3] \n\t" >+ "packushb %[dest0], %[src_lo], %[src_hi] \n\t" >+ "gssdlc1 %[dest0], 0x07(%[dst_y]) \n\t" >+ "gssdrc1 %[dest0], 0x00(%[dst_y]) \n\t" >+ >+ "daddiu %[src_rgb565], %[src_rgb565], 0x10 \n\t" >+ "daddiu %[dst_y], %[dst_y], 0x08 \n\t" >+ "daddiu %[width], %[width], -0x08 \n\t" >+ "bgtz %[width], 1b \n\t" >+ : [src0] "=&f"(ftmp[0]), [src1] "=&f"(ftmp[1]), [src_lo] "=&f"(ftmp[2]), >+ [src_hi] "=&f"(ftmp[3]), [b] "=&f"(ftmp[4]), [g] "=&f"(ftmp[5]), >+ [r] "=&f"(ftmp[6]), [dest0] "=&f"(ftmp[7]), [dest1] "=&f"(ftmp[8]), >+ [dest2] "=&f"(ftmp[9]), [dest3] "=&f"(ftmp[10]) >+ : [src_rgb565] "r"(src_rgb565), [dst_y] "r"(dst_y), [value] "f"(value), >+ [width] "r"(width), [c0] "f"(c0), [c1] "f"(c1), [c2] "f"(c2), >+ [mask] "f"(mask), [eight] "f"(0x08), [five] "f"(0x05), >+ [three] "f"(0x03), [two] "f"(0x02), [four] "f"(0x04) >+ : "memory"); >+} >+ >+void ARGB1555ToYRow_MMI(const uint8_t* src_argb1555, >+ uint8_t* dst_y, >+ int width) { >+ uint64_t ftmp[11]; >+ const uint64_t value = 0x1080108010801080; >+ const uint64_t mask = 0x0001004200810019; >+ uint64_t c0 = 0x001f001f001f001f; >+ uint64_t c1 = 0x00ff00ff00ff00ff; >+ uint64_t c2 = 0x0003000300030003; >+ uint64_t c3 = 0x007c007c007c007c; >+ __asm__ volatile( >+ "1: \n\t" >+ "gsldrc1 %[src0], 0x00(%[src_argb1555]) \n\t" >+ "gsldlc1 %[src0], 0x07(%[src_argb1555]) \n\t" >+ "psrlh %[src1], %[src0], %[eight] \n\t" >+ "and %[b], %[src0], %[c0] \n\t" >+ "and %[src0], %[src0], %[c1] \n\t" >+ "psrlh %[src0], %[src0], %[five] \n\t" >+ "and %[g], %[src1], %[c2] \n\t" >+ "psllh %[g], %[g], %[three] \n\t" >+ "or %[g], %[src0], %[g] \n\t" >+ "and %[r], %[src1], %[c3] \n\t" >+ "psrlh %[r], %[r], %[two] \n\t" >+ "psllh %[src0], %[b], %[three] \n\t" >+ "psrlh %[src1], %[b], %[two] \n\t" >+ "or %[b], %[src0], %[src1] \n\t" >+ "psllh %[src0], %[g], %[three] \n\t" >+ "psrlh %[src1], %[g], %[two] \n\t" >+ "or %[g], %[src0], %[src1] \n\t" >+ "psllh %[src0], %[r], %[three] \n\t" >+ "psrlh %[src1], %[r], %[two] \n\t" >+ "or %[r], %[src0], %[src1] \n\t" >+ "punpcklhw %[src0], %[b], %[r] \n\t" >+ "punpcklhw %[src1], %[g], %[value] \n\t" >+ "punpcklhw %[src_lo], %[src0], %[src1] \n\t" >+ "punpckhhw %[src_hi], %[src0], %[src1] \n\t" >+ "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t" >+ "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t" >+ "punpcklwd %[src0], %[src_lo], %[src_hi] \n\t" >+ "punpckhwd %[src1], %[src_lo], %[src_hi] \n\t" >+ "paddw %[dest0], %[src0], %[src1] \n\t" >+ "psrlw %[dest0], %[dest0], %[eight] \n\t" >+ >+ "punpckhhw %[src0], %[b], %[r] \n\t" >+ "punpckhhw %[src1], %[g], %[value] \n\t" >+ "punpcklhw %[src_lo], %[src0], %[src1] \n\t" >+ "punpckhhw %[src_hi], %[src0], %[src1] \n\t" >+ "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t" >+ "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t" >+ "punpcklwd %[src0], %[src_lo], %[src_hi] \n\t" >+ "punpckhwd %[src1], %[src_lo], %[src_hi] \n\t" >+ "paddw %[dest1], %[src0], %[src1] \n\t" >+ "psrlw %[dest1], %[dest1], %[eight] \n\t" >+ >+ "gsldrc1 %[src0], 0x08(%[src_argb1555]) \n\t" >+ "gsldlc1 %[src0], 0x0f(%[src_argb1555]) \n\t" >+ "psrlh %[src1], %[src0], %[eight] \n\t" >+ "and %[b], %[src0], %[c0] \n\t" >+ "and %[src0], %[src0], %[c1] \n\t" >+ "psrlh %[src0], %[src0], %[five] \n\t" >+ "and %[g], %[src1], %[c2] \n\t" >+ "psllh %[g], %[g], %[three] \n\t" >+ "or %[g], %[src0], %[g] \n\t" >+ "and %[r], %[src1], %[c3] \n\t" >+ "psrlh %[r], %[r], %[two] \n\t" >+ "psllh %[src0], %[b], %[three] \n\t" >+ "psrlh %[src1], %[b], %[two] \n\t" >+ "or %[b], %[src0], %[src1] \n\t" >+ "psllh %[src0], %[g], %[three] \n\t" >+ "psrlh %[src1], %[g], %[two] \n\t" >+ "or %[g], %[src0], %[src1] \n\t" >+ "psllh %[src0], %[r], %[three] \n\t" >+ "psrlh %[src1], %[r], %[two] \n\t" >+ "or %[r], %[src0], %[src1] \n\t" >+ "punpcklhw %[src0], %[b], %[r] \n\t" >+ "punpcklhw %[src1], %[g], %[value] \n\t" >+ "punpcklhw %[src_lo], %[src0], %[src1] \n\t" >+ "punpckhhw %[src_hi], %[src0], %[src1] \n\t" >+ "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t" >+ "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t" >+ "punpcklwd %[src0], %[src_lo], %[src_hi] \n\t" >+ "punpckhwd %[src1], %[src_lo], %[src_hi] \n\t" >+ "paddw %[dest2], %[src0], %[src1] \n\t" >+ "psrlw %[dest2], %[dest2], %[eight] \n\t" >+ >+ "punpckhhw %[src0], %[b], %[r] \n\t" >+ "punpckhhw %[src1], %[g], %[value] \n\t" >+ "punpcklhw %[src_lo], %[src0], %[src1] \n\t" >+ "punpckhhw %[src_hi], %[src0], %[src1] \n\t" >+ "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t" >+ "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t" >+ "punpcklwd %[src0], %[src_lo], %[src_hi] \n\t" >+ "punpckhwd %[src1], %[src_lo], %[src_hi] \n\t" >+ "paddw %[dest3], %[src0], %[src1] \n\t" >+ "psrlw %[dest3], %[dest3], %[eight] \n\t" >+ >+ "packsswh %[src_lo], %[dest0], %[dest1] \n\t" >+ "packsswh %[src_hi], %[dest2], %[dest3] \n\t" >+ "packushb %[dest0], %[src_lo], %[src_hi] \n\t" >+ "gssdlc1 %[dest0], 0x07(%[dst_y]) \n\t" >+ "gssdrc1 %[dest0], 0x00(%[dst_y]) \n\t" >+ >+ "daddiu %[src_argb1555], %[src_argb1555], 0x10 \n\t" >+ "daddiu %[dst_y], %[dst_y], 0x08 \n\t" >+ "daddiu %[width], %[width], -0x08 \n\t" >+ "bgtz %[width], 1b \n\t" >+ : [src0] "=&f"(ftmp[0]), [src1] "=&f"(ftmp[1]), [src_lo] "=&f"(ftmp[2]), >+ [src_hi] "=&f"(ftmp[3]), [b] "=&f"(ftmp[4]), [g] "=&f"(ftmp[5]), >+ [r] "=&f"(ftmp[6]), [dest0] "=&f"(ftmp[7]), [dest1] "=&f"(ftmp[8]), >+ [dest2] "=&f"(ftmp[9]), [dest3] "=&f"(ftmp[10]) >+ : [src_argb1555] "r"(src_argb1555), [dst_y] "r"(dst_y), >+ [width] "r"(width), [value] "f"(value), [mask] "f"(mask), [c0] "f"(c0), >+ [c1] "f"(c1), [c2] "f"(c2), [c3] "f"(c3), [eight] "f"(0x08), >+ [five] "f"(0x05), [three] "f"(0x03), [two] "f"(0x02), [seven] "f"(0x07) >+ : "memory"); >+} >+ >+void ARGB4444ToYRow_MMI(const uint8_t* src_argb4444, >+ uint8_t* dst_y, >+ int width) { >+ uint64_t ftmp[11]; >+ uint64_t value = 0x1080108010801080; >+ uint64_t mask = 0x0001004200810019; >+ uint64_t c0 = 0x000f000f000f000f; >+ uint64_t c1 = 0x00ff00ff00ff00ff; >+ __asm__ volatile( >+ "1: \n\t" >+ "gsldrc1 %[src0], 0x00(%[src_argb4444]) \n\t" >+ "gsldlc1 %[src0], 0x07(%[src_argb4444]) \n\t" >+ "psrlh %[src1], %[src0], %[eight] \n\t" >+ "and %[b], %[src0], %[c0] \n\t" >+ "and %[src0], %[src0], %[c1] \n\t" >+ "psrlh %[g], %[src0], %[four] \n\t" >+ "and %[r], %[src1], %[c0] \n\t" >+ "psllh %[src0], %[b], %[four] \n\t" >+ "or %[b], %[src0], %[b] \n\t" >+ "psllh %[src0], %[g], %[four] \n\t" >+ "or %[g], %[src0], %[g] \n\t" >+ "psllh %[src0], %[r], %[four] \n\t" >+ "or %[r], %[src0], %[r] \n\t" >+ "punpcklhw %[src0], %[b], %[r] \n\t" >+ "punpcklhw %[src1], %[g], %[value] \n\t" >+ "punpcklhw %[src_lo], %[src0], %[src1] \n\t" >+ "punpckhhw %[src_hi], %[src0], %[src1] \n\t" >+ "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t" >+ "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t" >+ "punpcklwd %[src0], %[src_lo], %[src_hi] \n\t" >+ "punpckhwd %[src1], %[src_lo], %[src_hi] \n\t" >+ "paddw %[dest0], %[src0], %[src1] \n\t" >+ "psrlw %[dest0], %[dest0], %[eight] \n\t" >+ >+ "punpckhhw %[src0], %[b], %[r] \n\t" >+ "punpckhhw %[src1], %[g], %[value] \n\t" >+ "punpcklhw %[src_lo], %[src0], %[src1] \n\t" >+ "punpckhhw %[src_hi], %[src0], %[src1] \n\t" >+ "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t" >+ "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t" >+ "punpcklwd %[src0], %[src_lo], %[src_hi] \n\t" >+ "punpckhwd %[src1], %[src_lo], %[src_hi] \n\t" >+ "paddw %[dest1], %[src0], %[src1] \n\t" >+ "psrlw %[dest1], %[dest1], %[eight] \n\t" >+ >+ "gsldrc1 %[src0], 0x08(%[src_argb4444]) \n\t" >+ "gsldlc1 %[src0], 0x0f(%[src_argb4444]) \n\t" >+ "psrlh %[src1], %[src0], %[eight] \n\t" >+ "and %[b], %[src0], %[c0] \n\t" >+ "and %[src0], %[src0], %[c1] \n\t" >+ "psrlh %[g], %[src0], %[four] \n\t" >+ "and %[r], %[src1], %[c0] \n\t" >+ "psllh %[src0], %[b], %[four] \n\t" >+ "or %[b], %[src0], %[b] \n\t" >+ "psllh %[src0], %[g], %[four] \n\t" >+ "or %[g], %[src0], %[g] \n\t" >+ "psllh %[src0], %[r], %[four] \n\t" >+ "or %[r], %[src0], %[r] \n\t" >+ "punpcklhw %[src0], %[b], %[r] \n\t" >+ "punpcklhw %[src1], %[g], %[value] \n\t" >+ "punpcklhw %[src_lo], %[src0], %[src1] \n\t" >+ "punpckhhw %[src_hi], %[src0], %[src1] \n\t" >+ "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t" >+ "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t" >+ "punpcklwd %[src0], %[src_lo], %[src_hi] \n\t" >+ "punpckhwd %[src1], %[src_lo], %[src_hi] \n\t" >+ "paddw %[dest2], %[src0], %[src1] \n\t" >+ "psrlw %[dest2], %[dest2], %[eight] \n\t" >+ >+ "punpckhhw %[src0], %[b], %[r] \n\t" >+ "punpckhhw %[src1], %[g], %[value] \n\t" >+ "punpcklhw %[src_lo], %[src0], %[src1] \n\t" >+ "punpckhhw %[src_hi], %[src0], %[src1] \n\t" >+ "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t" >+ "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t" >+ "punpcklwd %[src0], %[src_lo], %[src_hi] \n\t" >+ "punpckhwd %[src1], %[src_lo], %[src_hi] \n\t" >+ "paddw %[dest3], %[src0], %[src1] \n\t" >+ "psrlw %[dest3], %[dest3], %[eight] \n\t" >+ >+ "packsswh %[src_lo], %[dest0], %[dest1] \n\t" >+ "packsswh %[src_hi], %[dest2], %[dest3] \n\t" >+ "packushb %[dest0], %[src_lo], %[src_hi] \n\t" >+ "gssdlc1 %[dest0], 0x07(%[dst_y]) \n\t" >+ "gssdrc1 %[dest0], 0x00(%[dst_y]) \n\t" >+ >+ "daddiu %[src_argb4444], %[src_argb4444], 0x10 \n\t" >+ "daddiu %[dst_y], %[dst_y], 0x08 \n\t" >+ "daddiu %[width], %[width], -0x08 \n\t" >+ "bgtz %[width], 1b \n\t" >+ : [src0] "=&f"(ftmp[0]), [src1] "=&f"(ftmp[1]), [src_lo] "=&f"(ftmp[2]), >+ [src_hi] "=&f"(ftmp[3]), [b] "=&f"(ftmp[4]), [g] "=&f"(ftmp[5]), >+ [r] "=&f"(ftmp[6]), [dest0] "=&f"(ftmp[7]), [dest1] "=&f"(ftmp[8]), >+ [dest2] "=&f"(ftmp[9]), [dest3] "=&f"(ftmp[10]) >+ : [src_argb4444] "r"(src_argb4444), [dst_y] "r"(dst_y), >+ [width] "r"(width), [value] "f"(value), [mask] "f"(mask), [c0] "f"(c0), >+ [c1] "f"(c1), [eight] "f"(0x08), [four] "f"(0x04) >+ : "memory"); >+} >+ >+void RGB565ToUVRow_MMI(const uint8_t* src_rgb565, >+ int src_stride_rgb565, >+ uint8_t* dst_u, >+ uint8_t* dst_v, >+ int width) { >+ uint64_t ftmp[13]; >+ uint64_t value = 0x2020202020202020; >+ uint64_t mask_u = 0x0026004a00700002; >+ uint64_t mask_v = 0x00020070005e0012; >+ uint64_t mask = 0x93; >+ uint64_t c0 = 0x001f001f001f001f; >+ uint64_t c1 = 0x00ff00ff00ff00ff; >+ uint64_t c2 = 0x0007000700070007; >+ __asm__ volatile( >+ "daddu %[next_rgb565], %[src_rgb565], %[next_rgb565] \n\t" >+ "1: \n\t" >+ "gsldrc1 %[src0], 0x00(%[src_rgb565]) \n\t" >+ "gsldlc1 %[src0], 0x07(%[src_rgb565]) \n\t" >+ "gsldrc1 %[src1], 0x00(%[next_rgb565]) \n\t" >+ "gsldlc1 %[src1], 0x07(%[next_rgb565]) \n\t" >+ "psrlh %[dest0_u], %[src0], %[eight] \n\t" >+ "and %[b0], %[src0], %[c0] \n\t" >+ "and %[src0], %[src0], %[c1] \n\t" >+ "psrlh %[src0], %[src0], %[five] \n\t" >+ "and %[g0], %[dest0_u], %[c2] \n\t" >+ "psllh %[g0], %[g0], %[three] \n\t" >+ "or %[g0], %[src0], %[g0] \n\t" >+ "psrlh %[r0], %[dest0_u], %[three] \n\t" >+ "psrlh %[src0], %[src1], %[eight] \n\t" >+ "and %[dest0_u], %[src1], %[c0] \n\t" >+ "and %[src1], %[src1], %[c1] \n\t" >+ "psrlh %[src1], %[src1], %[five] \n\t" >+ "and %[dest0_v], %[src0], %[c2] \n\t" >+ "psllh %[dest0_v], %[dest0_v], %[three] \n\t" >+ "or %[dest0_v], %[src1], %[dest0_v] \n\t" >+ "psrlh %[src0], %[src0], %[three] \n\t" >+ "paddh %[b0], %[b0], %[dest0_u] \n\t" >+ "paddh %[g0], %[g0], %[dest0_v] \n\t" >+ "paddh %[r0], %[r0], %[src0] \n\t" >+ "punpcklhw %[src0], %[b0], %[r0] \n\t" >+ "punpckhhw %[src1], %[b0], %[r0] \n\t" >+ "punpcklwd %[dest0_u], %[src0], %[src1] \n\t" >+ "punpckhwd %[dest0_v], %[src0], %[src1] \n\t" >+ "paddh %[src0], %[dest0_u], %[dest0_v] \n\t" >+ "psrlh %[b0], %[src0], %[six] \n\t" >+ "psllh %[r0], %[src0], %[one] \n\t" >+ "or %[b0], %[b0], %[r0] \n\t" >+ "punpcklhw %[src0], %[g0], %[value] \n\t" >+ "punpckhhw %[src1], %[g0], %[value] \n\t" >+ "punpcklwd %[dest0_u], %[src0], %[src1] \n\t" >+ "punpckhwd %[dest0_v], %[src0], %[src1] \n\t" >+ "paddh %[g0], %[dest0_u], %[dest0_v] \n\t" >+ "punpcklhw %[src0], %[b0], %[g0] \n\t" >+ "punpckhhw %[src1], %[b0], %[g0] \n\t" >+ >+ "pmaddhw %[dest0_v], %[src0], %[mask_v] \n\t" >+ "pshufh %[dest0_u], %[src0], %[mask] \n\t" >+ "pmaddhw %[dest0_u], %[dest0_u], %[mask_u] \n\t" >+ "pmaddhw %[g0], %[src1], %[mask_v] \n\t" >+ "pshufh %[b0], %[src1], %[mask] \n\t" >+ "pmaddhw %[b0], %[b0], %[mask_u] \n\t" >+ >+ "punpcklwd %[src0], %[dest0_u], %[b0] \n\t" >+ "punpckhwd %[src1], %[dest0_u], %[b0] \n\t" >+ "psubw %[dest0_u], %[src0], %[src1] \n\t" >+ "psraw %[dest0_u], %[dest0_u], %[eight] \n\t" >+ "punpcklwd %[src0], %[dest0_v], %[g0] \n\t" >+ "punpckhwd %[src1], %[dest0_v], %[g0] \n\t" >+ "psubw %[dest0_v], %[src1], %[src0] \n\t" >+ "psraw %[dest0_v], %[dest0_v], %[eight] \n\t" >+ >+ "gsldrc1 %[src0], 0x08(%[src_rgb565]) \n\t" >+ "gsldlc1 %[src0], 0x0f(%[src_rgb565]) \n\t" >+ "gsldrc1 %[src1], 0x08(%[next_rgb565]) \n\t" >+ "gsldlc1 %[src1], 0x0f(%[next_rgb565]) \n\t" >+ "psrlh %[dest1_u], %[src0], %[eight] \n\t" >+ "and %[b0], %[src0], %[c0] \n\t" >+ "and %[src0], %[src0], %[c1] \n\t" >+ "psrlh %[src0], %[src0], %[five] \n\t" >+ "and %[g0], %[dest1_u], %[c2] \n\t" >+ "psllh %[g0], %[g0], %[three] \n\t" >+ "or %[g0], %[src0], %[g0] \n\t" >+ "psrlh %[r0], %[dest1_u], %[three] \n\t" >+ "psrlh %[src0], %[src1], %[eight] \n\t" >+ "and %[dest1_u], %[src1], %[c0] \n\t" >+ "and %[src1], %[src1], %[c1] \n\t" >+ "psrlh %[src1], %[src1], %[five] \n\t" >+ "and %[dest1_v], %[src0], %[c2] \n\t" >+ "psllh %[dest1_v], %[dest1_v], %[three] \n\t" >+ "or %[dest1_v], %[src1], %[dest1_v] \n\t" >+ "psrlh %[src0], %[src0], %[three] \n\t" >+ "paddh %[b0], %[b0], %[dest1_u] \n\t" >+ "paddh %[g0], %[g0], %[dest1_v] \n\t" >+ "paddh %[r0], %[r0], %[src0] \n\t" >+ "punpcklhw %[src0], %[b0], %[r0] \n\t" >+ "punpckhhw %[src1], %[b0], %[r0] \n\t" >+ "punpcklwd %[dest1_u], %[src0], %[src1] \n\t" >+ "punpckhwd %[dest1_v], %[src0], %[src1] \n\t" >+ "paddh %[src0], %[dest1_u], %[dest1_v] \n\t" >+ "psrlh %[b0], %[src0], %[six] \n\t" >+ "psllh %[r0], %[src0], %[one] \n\t" >+ "or %[b0], %[b0], %[r0] \n\t" >+ "punpcklhw %[src0], %[g0], %[value] \n\t" >+ "punpckhhw %[src1], %[g0], %[value] \n\t" >+ "punpcklwd %[dest1_u], %[src0], %[src1] \n\t" >+ "punpckhwd %[dest1_v], %[src0], %[src1] \n\t" >+ "paddh %[g0], %[dest1_u], %[dest1_v] \n\t" >+ "punpcklhw %[src0], %[b0], %[g0] \n\t" >+ "punpckhhw %[src1], %[b0], %[g0] \n\t" >+ >+ "pmaddhw %[dest1_v], %[src0], %[mask_v] \n\t" >+ "pshufh %[dest1_u], %[src0], %[mask] \n\t" >+ "pmaddhw %[dest1_u], %[dest1_u], %[mask_u] \n\t" >+ "pmaddhw %[g0], %[src1], %[mask_v] \n\t" >+ "pshufh %[b0], %[src1], %[mask] \n\t" >+ "pmaddhw %[b0], %[b0], %[mask_u] \n\t" >+ >+ "punpcklwd %[src0], %[dest1_u], %[b0] \n\t" >+ "punpckhwd %[src1], %[dest1_u], %[b0] \n\t" >+ "psubw %[dest1_u], %[src0], %[src1] \n\t" >+ "psraw %[dest1_u], %[dest1_u], %[eight] \n\t" >+ "punpcklwd %[src0], %[dest1_v], %[g0] \n\t" >+ "punpckhwd %[src1], %[dest1_v], %[g0] \n\t" >+ "psubw %[dest1_v], %[src1], %[src0] \n\t" >+ "psraw %[dest1_v], %[dest1_v], %[eight] \n\t" >+ >+ "gsldrc1 %[src0], 0x10(%[src_rgb565]) \n\t" >+ "gsldlc1 %[src0], 0x17(%[src_rgb565]) \n\t" >+ "gsldrc1 %[src1], 0x10(%[next_rgb565]) \n\t" >+ "gsldlc1 %[src1], 0x17(%[next_rgb565]) \n\t" >+ "psrlh %[dest2_u], %[src0], %[eight] \n\t" >+ "and %[b0], %[src0], %[c0] \n\t" >+ "and %[src0], %[src0], %[c1] \n\t" >+ "psrlh %[src0], %[src0], %[five] \n\t" >+ "and %[g0], %[dest2_u], %[c2] \n\t" >+ "psllh %[g0], %[g0], %[three] \n\t" >+ "or %[g0], %[src0], %[g0] \n\t" >+ "psrlh %[r0], %[dest2_u], %[three] \n\t" >+ "psrlh %[src0], %[src1], %[eight] \n\t" >+ "and %[dest2_u], %[src1], %[c0] \n\t" >+ "and %[src1], %[src1], %[c1] \n\t" >+ "psrlh %[src1], %[src1], %[five] \n\t" >+ "and %[dest2_v], %[src0], %[c2] \n\t" >+ "psllh %[dest2_v], %[dest2_v], %[three] \n\t" >+ "or %[dest2_v], %[src1], %[dest2_v] \n\t" >+ "psrlh %[src0], %[src0], %[three] \n\t" >+ "paddh %[b0], %[b0], %[dest2_u] \n\t" >+ "paddh %[g0], %[g0], %[dest2_v] \n\t" >+ "paddh %[r0], %[r0], %[src0] \n\t" >+ "punpcklhw %[src0], %[b0], %[r0] \n\t" >+ "punpckhhw %[src1], %[b0], %[r0] \n\t" >+ "punpcklwd %[dest2_u], %[src0], %[src1] \n\t" >+ "punpckhwd %[dest2_v], %[src0], %[src1] \n\t" >+ "paddh %[src0], %[dest2_u], %[dest2_v] \n\t" >+ "psrlh %[b0], %[src0], %[six] \n\t" >+ "psllh %[r0], %[src0], %[one] \n\t" >+ "or %[b0], %[b0], %[r0] \n\t" >+ "punpcklhw %[src0], %[g0], %[value] \n\t" >+ "punpckhhw %[src1], %[g0], %[value] \n\t" >+ "punpcklwd %[dest2_u], %[src0], %[src1] \n\t" >+ "punpckhwd %[dest2_v], %[src0], %[src1] \n\t" >+ "paddh %[g0], %[dest2_u], %[dest2_v] \n\t" >+ "punpcklhw %[src0], %[b0], %[g0] \n\t" >+ "punpckhhw %[src1], %[b0], %[g0] \n\t" >+ >+ "pmaddhw %[dest2_v], %[src0], %[mask_v] \n\t" >+ "pshufh %[dest2_u], %[src0], %[mask] \n\t" >+ "pmaddhw %[dest2_u], %[dest2_u], %[mask_u] \n\t" >+ "pmaddhw %[g0], %[src1], %[mask_v] \n\t" >+ "pshufh %[b0], %[src1], %[mask] \n\t" >+ "pmaddhw %[b0], %[b0], %[mask_u] \n\t" >+ >+ "punpcklwd %[src0], %[dest2_u], %[b0] \n\t" >+ "punpckhwd %[src1], %[dest2_u], %[b0] \n\t" >+ "psubw %[dest2_u], %[src0], %[src1] \n\t" >+ "psraw %[dest2_u], %[dest2_u], %[eight] \n\t" >+ "punpcklwd %[src0], %[dest2_v], %[g0] \n\t" >+ "punpckhwd %[src1], %[dest2_v], %[g0] \n\t" >+ "psubw %[dest2_v], %[src1], %[src0] \n\t" >+ "psraw %[dest2_v], %[dest2_v], %[eight] \n\t" >+ >+ "gsldrc1 %[src0], 0x18(%[src_rgb565]) \n\t" >+ "gsldlc1 %[src0], 0x1f(%[src_rgb565]) \n\t" >+ "gsldrc1 %[src1], 0x18(%[next_rgb565]) \n\t" >+ "gsldlc1 %[src1], 0x1f(%[next_rgb565]) \n\t" >+ "psrlh %[dest3_u], %[src0], %[eight] \n\t" >+ "and %[b0], %[src0], %[c0] \n\t" >+ "and %[src0], %[src0], %[c1] \n\t" >+ "psrlh %[src0], %[src0], %[five] \n\t" >+ "and %[g0], %[dest3_u], %[c2] \n\t" >+ "psllh %[g0], %[g0], %[three] \n\t" >+ "or %[g0], %[src0], %[g0] \n\t" >+ "psrlh %[r0], %[dest3_u], %[three] \n\t" >+ "psrlh %[src0], %[src1], %[eight] \n\t" >+ "and %[dest3_u], %[src1], %[c0] \n\t" >+ "and %[src1], %[src1], %[c1] \n\t" >+ "psrlh %[src1], %[src1], %[five] \n\t" >+ "and %[dest3_v], %[src0], %[c2] \n\t" >+ "psllh %[dest3_v], %[dest3_v], %[three] \n\t" >+ "or %[dest3_v], %[src1], %[dest3_v] \n\t" >+ "psrlh %[src0], %[src0], %[three] \n\t" >+ "paddh %[b0], %[b0], %[dest3_u] \n\t" >+ "paddh %[g0], %[g0], %[dest3_v] \n\t" >+ "paddh %[r0], %[r0], %[src0] \n\t" >+ "punpcklhw %[src0], %[b0], %[r0] \n\t" >+ "punpckhhw %[src1], %[b0], %[r0] \n\t" >+ "punpcklwd %[dest3_u], %[src0], %[src1] \n\t" >+ "punpckhwd %[dest3_v], %[src0], %[src1] \n\t" >+ "paddh %[src0], %[dest3_u], %[dest3_v] \n\t" >+ "psrlh %[b0], %[src0], %[six] \n\t" >+ "psllh %[r0], %[src0], %[one] \n\t" >+ "or %[b0], %[b0], %[r0] \n\t" >+ "punpcklhw %[src0], %[g0], %[value] \n\t" >+ "punpckhhw %[src1], %[g0], %[value] \n\t" >+ "punpcklwd %[dest3_u], %[src0], %[src1] \n\t" >+ "punpckhwd %[dest3_v], %[src0], %[src1] \n\t" >+ "paddh %[g0], %[dest3_u], %[dest3_v] \n\t" >+ "punpcklhw %[src0], %[b0], %[g0] \n\t" >+ "punpckhhw %[src1], %[b0], %[g0] \n\t" >+ >+ "pmaddhw %[dest3_v], %[src0], %[mask_v] \n\t" >+ "pshufh %[dest3_u], %[src0], %[mask] \n\t" >+ "pmaddhw %[dest3_u], %[dest3_u], %[mask_u] \n\t" >+ "pmaddhw %[g0], %[src1], %[mask_v] \n\t" >+ "pshufh %[b0], %[src1], %[mask] \n\t" >+ "pmaddhw %[b0], %[b0], %[mask_u] \n\t" >+ >+ "punpcklwd %[src0], %[dest3_u], %[b0] \n\t" >+ "punpckhwd %[src1], %[dest3_u], %[b0] \n\t" >+ "psubw %[dest3_u], %[src0], %[src1] \n\t" >+ "psraw %[dest3_u], %[dest3_u], %[eight] \n\t" >+ "punpcklwd %[src0], %[dest3_v], %[g0] \n\t" >+ "punpckhwd %[src1], %[dest3_v], %[g0] \n\t" >+ "psubw %[dest3_v], %[src1], %[src0] \n\t" >+ "psraw %[dest3_v], %[dest3_v], %[eight] \n\t" >+ >+ "packsswh %[src0], %[dest0_u], %[dest1_u] \n\t" >+ "packsswh %[src1], %[dest2_u], %[dest3_u] \n\t" >+ "packushb %[dest0_u], %[src0], %[src1] \n\t" >+ "gssdlc1 %[dest0_u], 0x07(%[dst_u]) \n\t" >+ "gssdrc1 %[dest0_u], 0x00(%[dst_u]) \n\t" >+ "packsswh %[src0], %[dest0_v], %[dest1_v] \n\t" >+ "packsswh %[src1], %[dest2_v], %[dest3_v] \n\t" >+ "packushb %[dest0_v], %[src0], %[src1] \n\t" >+ "gssdlc1 %[dest0_v], 0x07(%[dst_v]) \n\t" >+ "gssdrc1 %[dest0_v], 0x00(%[dst_v]) \n\t" >+ >+ "daddiu %[src_rgb565], %[src_rgb565], 0x20 \n\t" >+ "daddiu %[next_rgb565], %[next_rgb565], 0x20 \n\t" >+ "daddiu %[dst_u], %[dst_u], 0x08 \n\t" >+ "daddiu %[dst_v], %[dst_v], 0x08 \n\t" >+ "daddiu %[width], %[width], -0x10 \n\t" >+ "bgtz %[width], 1b \n\t" >+ : [src0] "=&f"(ftmp[0]), [src1] "=&f"(ftmp[1]), [b0] "=&f"(ftmp[2]), >+ [g0] "=&f"(ftmp[3]), [r0] "=&f"(ftmp[4]), [dest0_u] "=&f"(ftmp[5]), >+ [dest1_u] "=&f"(ftmp[6]), [dest2_u] "=&f"(ftmp[7]), >+ [dest3_u] "=&f"(ftmp[8]), [dest0_v] "=&f"(ftmp[9]), >+ [dest1_v] "=&f"(ftmp[10]), [dest2_v] "=&f"(ftmp[11]), >+ [dest3_v] "=&f"(ftmp[12]) >+ : [src_rgb565] "r"(src_rgb565), [next_rgb565] "r"(src_stride_rgb565), >+ [dst_u] "r"(dst_u), [dst_v] "r"(dst_v), [width] "r"(width), >+ [value] "f"(value), [c0] "f"(c0), [c1] "f"(c1), [c2] "f"(c2), >+ [mask] "f"(mask), [mask_u] "f"(mask_u), [mask_v] "f"(mask_v), >+ [eight] "f"(0x08), [six] "f"(0x06), [five] "f"(0x05), [three] "f"(0x03), >+ [one] "f"(0x01) >+ : "memory"); >+} >+ >+void ARGB1555ToUVRow_MMI(const uint8_t* src_argb1555, >+ int src_stride_argb1555, >+ uint8_t* dst_u, >+ uint8_t* dst_v, >+ int width) { >+ uint64_t ftmp[11]; >+ uint64_t value = 0x2020202020202020; >+ uint64_t mask_u = 0x0026004a00700002; >+ uint64_t mask_v = 0x00020070005e0012; >+ uint64_t mask = 0x93; >+ uint64_t c0 = 0x001f001f001f001f; >+ uint64_t c1 = 0x00ff00ff00ff00ff; >+ uint64_t c2 = 0x0003000300030003; >+ uint64_t c3 = 0x007c007c007c007c; >+ __asm__ volatile( >+ "daddu %[next_argb1555], %[src_argb1555], %[next_argb1555] \n\t" >+ "1: \n\t" >+ "gsldrc1 %[src0], 0x00(%[src_argb1555]) \n\t" >+ "gsldlc1 %[src0], 0x07(%[src_argb1555]) \n\t" >+ "gsldrc1 %[src1], 0x00(%[next_argb1555]) \n\t" >+ "gsldlc1 %[src1], 0x07(%[next_argb1555]) \n\t" >+ "psrlh %[dest0_u], %[src0], %[eight] \n\t" >+ "and %[b0], %[src0], %[c0] \n\t" >+ "and %[src0], %[src0], %[c1] \n\t" >+ "psrlh %[src0], %[src0], %[five] \n\t" >+ "and %[g0], %[dest0_u], %[c2] \n\t" >+ "psllh %[g0], %[g0], %[three] \n\t" >+ "or %[g0], %[src0], %[g0] \n\t" >+ "and %[r0], %[dest0_u], %[c3] \n\t" >+ "psrlh %[r0], %[r0], %[two] \n\t" >+ "psrlh %[src0], %[src1], %[eight] \n\t" >+ "and %[dest0_u], %[src1], %[c0] \n\t" >+ "and %[src1], %[src1], %[c1] \n\t" >+ "psrlh %[src1], %[src1], %[five] \n\t" >+ "and %[dest0_v], %[src0], %[c2] \n\t" >+ "psllh %[dest0_v], %[dest0_v], %[three] \n\t" >+ "or %[dest0_v], %[src1], %[dest0_v] \n\t" >+ "and %[src0], %[src0], %[c3] \n\t" >+ "psrlh %[src0], %[src0], %[two] \n\t" >+ "paddh %[b0], %[b0], %[dest0_u] \n\t" >+ "paddh %[g0], %[g0], %[dest0_v] \n\t" >+ "paddh %[r0], %[r0], %[src0] \n\t" >+ "punpcklhw %[src0], %[b0], %[r0] \n\t" >+ "punpckhhw %[src1], %[b0], %[r0] \n\t" >+ "punpcklwd %[dest0_u], %[src0], %[src1] \n\t" >+ "punpckhwd %[dest0_v], %[src0], %[src1] \n\t" >+ "paddh %[src0], %[dest0_u], %[dest0_v] \n\t" >+ "psrlh %[b0], %[src0], %[six] \n\t" >+ "psllh %[r0], %[src0], %[one] \n\t" >+ "or %[b0], %[b0], %[r0] \n\t" >+ "psrlh %[r0], %[g0], %[six] \n\t" >+ "psllh %[g0], %[g0], %[one] \n\t" >+ "or %[g0], %[g0], %[r0] \n\t" >+ "punpcklhw %[src0], %[g0], %[value] \n\t" >+ "punpckhhw %[src1], %[g0], %[value] \n\t" >+ "punpcklwd %[dest0_u], %[src0], %[src1] \n\t" >+ "punpckhwd %[dest0_v], %[src0], %[src1] \n\t" >+ "paddh %[g0], %[dest0_u], %[dest0_v] \n\t" >+ "punpcklhw %[src0], %[b0], %[g0] \n\t" >+ "punpckhhw %[src1], %[b0], %[g0] \n\t" >+ >+ "pmaddhw %[dest0_v], %[src0], %[mask_v] \n\t" >+ "pshufh %[dest0_u], %[src0], %[mask] \n\t" >+ "pmaddhw %[dest0_u], %[dest0_u], %[mask_u] \n\t" >+ "pmaddhw %[g0], %[src1], %[mask_v] \n\t" >+ "pshufh %[b0], %[src1], %[mask] \n\t" >+ "pmaddhw %[b0], %[b0], %[mask_u] \n\t" >+ >+ "punpcklwd %[src0], %[dest0_u], %[b0] \n\t" >+ "punpckhwd %[src1], %[dest0_u], %[b0] \n\t" >+ "psubw %[dest0_u], %[src0], %[src1] \n\t" >+ "psraw %[dest0_u], %[dest0_u], %[eight] \n\t" >+ "punpcklwd %[src0], %[dest0_v], %[g0] \n\t" >+ "punpckhwd %[src1], %[dest0_v], %[g0] \n\t" >+ "psubw %[dest0_v], %[src1], %[src0] \n\t" >+ "psraw %[dest0_v], %[dest0_v], %[eight] \n\t" >+ >+ "gsldrc1 %[src0], 0x08(%[src_argb1555]) \n\t" >+ "gsldlc1 %[src0], 0x0f(%[src_argb1555]) \n\t" >+ "gsldrc1 %[src1], 0x08(%[next_argb1555]) \n\t" >+ "gsldlc1 %[src1], 0x0f(%[next_argb1555]) \n\t" >+ "psrlh %[dest1_u], %[src0], %[eight] \n\t" >+ "and %[b0], %[src0], %[c0] \n\t" >+ "and %[src0], %[src0], %[c1] \n\t" >+ "psrlh %[src0], %[src0], %[five] \n\t" >+ "and %[g0], %[dest1_u], %[c2] \n\t" >+ "psllh %[g0], %[g0], %[three] \n\t" >+ "or %[g0], %[src0], %[g0] \n\t" >+ "and %[r0], %[dest1_u], %[c3] \n\t" >+ "psrlh %[r0], %[r0], %[two] \n\t" >+ "psrlh %[src0], %[src1], %[eight] \n\t" >+ "and %[dest1_u], %[src1], %[c0] \n\t" >+ "and %[src1], %[src1], %[c1] \n\t" >+ "psrlh %[src1], %[src1], %[five] \n\t" >+ "and %[dest1_v], %[src0], %[c2] \n\t" >+ "psllh %[dest1_v], %[dest1_v], %[three] \n\t" >+ "or %[dest1_v], %[src1], %[dest1_v] \n\t" >+ "and %[src0], %[src0], %[c3] \n\t" >+ "psrlh %[src0], %[src0], %[two] \n\t" >+ "paddh %[b0], %[b0], %[dest1_u] \n\t" >+ "paddh %[g0], %[g0], %[dest1_v] \n\t" >+ "paddh %[r0], %[r0], %[src0] \n\t" >+ "punpcklhw %[src0], %[b0], %[r0] \n\t" >+ "punpckhhw %[src1], %[b0], %[r0] \n\t" >+ "punpcklwd %[dest1_u], %[src0], %[src1] \n\t" >+ "punpckhwd %[dest1_v], %[src0], %[src1] \n\t" >+ "paddh %[src0], %[dest1_u], %[dest1_v] \n\t" >+ "psrlh %[b0], %[src0], %[six] \n\t" >+ "psllh %[r0], %[src0], %[one] \n\t" >+ "or %[b0], %[b0], %[r0] \n\t" >+ "psrlh %[r0], %[g0], %[six] \n\t" >+ "psllh %[g0], %[g0], %[one] \n\t" >+ "or %[g0], %[g0], %[r0] \n\t" >+ "punpcklhw %[src0], %[g0], %[value] \n\t" >+ "punpckhhw %[src1], %[g0], %[value] \n\t" >+ "punpcklwd %[dest1_u], %[src0], %[src1] \n\t" >+ "punpckhwd %[dest1_v], %[src0], %[src1] \n\t" >+ "paddh %[g0], %[dest1_u], %[dest1_v] \n\t" >+ "punpcklhw %[src0], %[b0], %[g0] \n\t" >+ "punpckhhw %[src1], %[b0], %[g0] \n\t" >+ >+ "pmaddhw %[dest1_v], %[src0], %[mask_v] \n\t" >+ "pshufh %[dest1_u], %[src0], %[mask] \n\t" >+ "pmaddhw %[dest1_u], %[dest1_u], %[mask_u] \n\t" >+ "pmaddhw %[g0], %[src1], %[mask_v] \n\t" >+ "pshufh %[b0], %[src1], %[mask] \n\t" >+ "pmaddhw %[b0], %[b0], %[mask_u] \n\t" >+ >+ "punpcklwd %[src0], %[dest1_u], %[b0] \n\t" >+ "punpckhwd %[src1], %[dest1_u], %[b0] \n\t" >+ "psubw %[dest1_u], %[src0], %[src1] \n\t" >+ "psraw %[dest1_u], %[dest1_u], %[eight] \n\t" >+ "punpcklwd %[src0], %[dest1_v], %[g0] \n\t" >+ "punpckhwd %[src1], %[dest1_v], %[g0] \n\t" >+ "psubw %[dest1_v], %[src1], %[src0] \n\t" >+ "psraw %[dest1_v], %[dest1_v], %[eight] \n\t" >+ >+ "packsswh %[dest0_u], %[dest0_u], %[dest1_u] \n\t" >+ "packsswh %[dest1_u], %[dest0_v], %[dest1_v] \n\t" >+ >+ "gsldrc1 %[src0], 0x10(%[src_argb1555]) \n\t" >+ "gsldlc1 %[src0], 0x17(%[src_argb1555]) \n\t" >+ "gsldrc1 %[src1], 0x10(%[next_argb1555]) \n\t" >+ "gsldlc1 %[src1], 0x17(%[next_argb1555]) \n\t" >+ "psrlh %[dest2_u], %[src0], %[eight] \n\t" >+ "and %[b0], %[src0], %[c0] \n\t" >+ "and %[src0], %[src0], %[c1] \n\t" >+ "psrlh %[src0], %[src0], %[five] \n\t" >+ "and %[g0], %[dest2_u], %[c2] \n\t" >+ "psllh %[g0], %[g0], %[three] \n\t" >+ "or %[g0], %[src0], %[g0] \n\t" >+ "and %[r0], %[dest2_u], %[c3] \n\t" >+ "psrlh %[r0], %[r0], %[two] \n\t" >+ "psrlh %[src0], %[src1], %[eight] \n\t" >+ "and %[dest2_u], %[src1], %[c0] \n\t" >+ "and %[src1], %[src1], %[c1] \n\t" >+ "psrlh %[src1], %[src1], %[five] \n\t" >+ "and %[dest0_v], %[src0], %[c2] \n\t" >+ "psllh %[dest0_v], %[dest0_v], %[three] \n\t" >+ "or %[dest0_v], %[src1], %[dest0_v] \n\t" >+ "and %[src0], %[src0], %[c3] \n\t" >+ "psrlh %[src0], %[src0], %[two] \n\t" >+ "paddh %[b0], %[b0], %[dest2_u] \n\t" >+ "paddh %[g0], %[g0], %[dest0_v] \n\t" >+ "paddh %[r0], %[r0], %[src0] \n\t" >+ "punpcklhw %[src0], %[b0], %[r0] \n\t" >+ "punpckhhw %[src1], %[b0], %[r0] \n\t" >+ "punpcklwd %[dest2_u], %[src0], %[src1] \n\t" >+ "punpckhwd %[dest0_v], %[src0], %[src1] \n\t" >+ "paddh %[src0], %[dest2_u], %[dest0_v] \n\t" >+ "psrlh %[b0], %[src0], %[six] \n\t" >+ "psllh %[r0], %[src0], %[one] \n\t" >+ "or %[b0], %[b0], %[r0] \n\t" >+ "psrlh %[r0], %[g0], %[six] \n\t" >+ "psllh %[g0], %[g0], %[one] \n\t" >+ "or %[g0], %[g0], %[r0] \n\t" >+ "punpcklhw %[src0], %[g0], %[value] \n\t" >+ "punpckhhw %[src1], %[g0], %[value] \n\t" >+ "punpcklwd %[dest2_u], %[src0], %[src1] \n\t" >+ "punpckhwd %[dest0_v], %[src0], %[src1] \n\t" >+ "paddh %[g0], %[dest2_u], %[dest0_v] \n\t" >+ "punpcklhw %[src0], %[b0], %[g0] \n\t" >+ "punpckhhw %[src1], %[b0], %[g0] \n\t" >+ >+ "pmaddhw %[dest0_v], %[src0], %[mask_v] \n\t" >+ "pshufh %[dest2_u], %[src0], %[mask] \n\t" >+ "pmaddhw %[dest2_u], %[dest2_u], %[mask_u] \n\t" >+ "pmaddhw %[g0], %[src1], %[mask_v] \n\t" >+ "pshufh %[b0], %[src1], %[mask] \n\t" >+ "pmaddhw %[b0], %[b0], %[mask_u] \n\t" >+ >+ "punpcklwd %[src0], %[dest2_u], %[b0] \n\t" >+ "punpckhwd %[src1], %[dest2_u], %[b0] \n\t" >+ "psubw %[dest2_u], %[src0], %[src1] \n\t" >+ "psraw %[dest2_u], %[dest2_u], %[eight] \n\t" >+ "punpcklwd %[src0], %[dest0_v], %[g0] \n\t" >+ "punpckhwd %[src1], %[dest0_v], %[g0] \n\t" >+ "psubw %[dest0_v], %[src1], %[src0] \n\t" >+ "psraw %[dest0_v], %[dest0_v], %[eight] \n\t" >+ >+ "gsldrc1 %[src0], 0x18(%[src_argb1555]) \n\t" >+ "gsldlc1 %[src0], 0x1f(%[src_argb1555]) \n\t" >+ "gsldrc1 %[src1], 0x18(%[next_argb1555]) \n\t" >+ "gsldlc1 %[src1], 0x1f(%[next_argb1555]) \n\t" >+ "psrlh %[dest3_u], %[src0], %[eight] \n\t" >+ "and %[b0], %[src0], %[c0] \n\t" >+ "and %[src0], %[src0], %[c1] \n\t" >+ "psrlh %[src0], %[src0], %[five] \n\t" >+ "and %[g0], %[dest3_u], %[c2] \n\t" >+ "psllh %[g0], %[g0], %[three] \n\t" >+ "or %[g0], %[src0], %[g0] \n\t" >+ "and %[r0], %[dest3_u], %[c3] \n\t" >+ "psrlh %[r0], %[r0], %[two] \n\t" >+ "psrlh %[src0], %[src1], %[eight] \n\t" >+ "and %[dest3_u], %[src1], %[c0] \n\t" >+ "and %[src1], %[src1], %[c1] \n\t" >+ "psrlh %[src1], %[src1], %[five] \n\t" >+ "and %[dest1_v], %[src0], %[c2] \n\t" >+ "psllh %[dest1_v], %[dest1_v], %[three] \n\t" >+ "or %[dest1_v], %[src1], %[dest1_v] \n\t" >+ "and %[src0], %[src0], %[c3] \n\t" >+ "psrlh %[src0], %[src0], %[two] \n\t" >+ "paddh %[b0], %[b0], %[dest3_u] \n\t" >+ "paddh %[g0], %[g0], %[dest1_v] \n\t" >+ "paddh %[r0], %[r0], %[src0] \n\t" >+ "punpcklhw %[src0], %[b0], %[r0] \n\t" >+ "punpckhhw %[src1], %[b0], %[r0] \n\t" >+ "punpcklwd %[dest3_u], %[src0], %[src1] \n\t" >+ "punpckhwd %[dest1_v], %[src0], %[src1] \n\t" >+ "paddh %[src0], %[dest3_u], %[dest1_v] \n\t" >+ "psrlh %[b0], %[src0], %[six] \n\t" >+ "psllh %[r0], %[src0], %[one] \n\t" >+ "or %[b0], %[b0], %[r0] \n\t" >+ "psrlh %[r0], %[g0], %[six] \n\t" >+ "psllh %[g0], %[g0], %[one] \n\t" >+ "or %[g0], %[g0], %[r0] \n\t" >+ "punpcklhw %[src0], %[g0], %[value] \n\t" >+ "punpckhhw %[src1], %[g0], %[value] \n\t" >+ "punpcklwd %[dest3_u], %[src0], %[src1] \n\t" >+ "punpckhwd %[dest1_v], %[src0], %[src1] \n\t" >+ "paddh %[g0], %[dest3_u], %[dest1_v] \n\t" >+ "punpcklhw %[src0], %[b0], %[g0] \n\t" >+ "punpckhhw %[src1], %[b0], %[g0] \n\t" >+ >+ "pmaddhw %[dest1_v], %[src0], %[mask_v] \n\t" >+ "pshufh %[dest3_u], %[src0], %[mask] \n\t" >+ "pmaddhw %[dest3_u], %[dest3_u], %[mask_u] \n\t" >+ "pmaddhw %[g0], %[src1], %[mask_v] \n\t" >+ "pshufh %[b0], %[src1], %[mask] \n\t" >+ "pmaddhw %[b0], %[b0], %[mask_u] \n\t" >+ >+ "punpcklwd %[src0], %[dest3_u], %[b0] \n\t" >+ "punpckhwd %[src1], %[dest3_u], %[b0] \n\t" >+ "psubw %[dest3_u], %[src0], %[src1] \n\t" >+ "psraw %[dest3_u], %[dest3_u], %[eight] \n\t" >+ "punpcklwd %[src0], %[dest1_v], %[g0] \n\t" >+ "punpckhwd %[src1], %[dest1_v], %[g0] \n\t" >+ "psubw %[dest1_v], %[src1], %[src0] \n\t" >+ "psraw %[dest1_v], %[dest1_v], %[eight] \n\t" >+ >+ "packsswh %[src1], %[dest2_u], %[dest3_u] \n\t" >+ "packushb %[dest0_u], %[dest0_u], %[src1] \n\t" >+ "gssdlc1 %[dest0_u], 0x07(%[dst_u]) \n\t" >+ "gssdrc1 %[dest0_u], 0x00(%[dst_u]) \n\t" >+ "packsswh %[src1], %[dest0_v], %[dest1_v] \n\t" >+ "packushb %[dest0_v], %[dest1_u], %[src1] \n\t" >+ "gssdlc1 %[dest0_v], 0x07(%[dst_v]) \n\t" >+ "gssdrc1 %[dest0_v], 0x00(%[dst_v]) \n\t" >+ >+ "daddiu %[src_argb1555], %[src_argb1555], 0x20 \n\t" >+ "daddiu %[next_argb1555], %[next_argb1555], 0x20 \n\t" >+ "daddiu %[dst_u], %[dst_u], 0x08 \n\t" >+ "daddiu %[dst_v], %[dst_v], 0x08 \n\t" >+ "daddiu %[width], %[width], -0x10 \n\t" >+ "bgtz %[width], 1b \n\t" >+ : [src0] "=&f"(ftmp[0]), [src1] "=&f"(ftmp[1]), [b0] "=&f"(ftmp[2]), >+ [g0] "=&f"(ftmp[3]), [r0] "=&f"(ftmp[4]), [dest0_u] "=&f"(ftmp[5]), >+ [dest1_u] "=&f"(ftmp[6]), [dest2_u] "=&f"(ftmp[7]), >+ [dest3_u] "=&f"(ftmp[8]), [dest0_v] "=&f"(ftmp[9]), >+ [dest1_v] "=&f"(ftmp[10]) >+ : [src_argb1555] "r"(src_argb1555), >+ [next_argb1555] "r"(src_stride_argb1555), [dst_u] "r"(dst_u), >+ [dst_v] "r"(dst_v), [width] "r"(width), [value] "f"(value), >+ [c0] "f"(c0), [c1] "f"(c1), [c2] "f"(c2), [c3] "f"(c3), >+ [mask] "f"(mask), [mask_u] "f"(mask_u), [mask_v] "f"(mask_v), >+ [eight] "f"(0x08), [six] "f"(0x06), [five] "f"(0x05), [three] "f"(0x03), >+ [two] "f"(0x02), [one] "f"(0x01) >+ : "memory"); >+} >+ >+void ARGB4444ToUVRow_MMI(const uint8_t* src_argb4444, >+ int src_stride_argb4444, >+ uint8_t* dst_u, >+ uint8_t* dst_v, >+ int width) { >+ uint64_t ftmp[13]; >+ uint64_t value = 0x2020202020202020; >+ uint64_t mask_u = 0x0026004a00700002; >+ uint64_t mask_v = 0x00020070005e0012; >+ uint64_t mask = 0x93; >+ uint64_t c0 = 0x000f000f000f000f; >+ uint64_t c1 = 0x00ff00ff00ff00ff; >+ __asm__ volatile( >+ "daddu %[next_argb4444], %[src_argb4444], %[next_argb4444] \n\t" >+ "1: \n\t" >+ "gsldrc1 %[src0], 0x00(%[src_argb4444]) \n\t" >+ "gsldlc1 %[src0], 0x07(%[src_argb4444]) \n\t" >+ "gsldrc1 %[src1], 0x00(%[next_argb4444]) \n\t" >+ "gsldlc1 %[src1], 0x07(%[next_argb4444]) \n\t" >+ "psrlh %[dest0_u], %[src0], %[eight] \n\t" >+ "and %[b0], %[src0], %[c0] \n\t" >+ "and %[src0], %[src0], %[c1] \n\t" >+ "psrlh %[g0], %[src0], %[four] \n\t" >+ "and %[r0], %[dest0_u], %[c0] \n\t" >+ "psrlh %[src0], %[src1], %[eight] \n\t" >+ "and %[dest0_u], %[src1], %[c0] \n\t" >+ "and %[src1], %[src1], %[c1] \n\t" >+ "psrlh %[dest0_v], %[src1], %[four] \n\t" >+ "and %[src0], %[src0], %[c0] \n\t" >+ "paddh %[b0], %[b0], %[dest0_u] \n\t" >+ "paddh %[g0], %[g0], %[dest0_v] \n\t" >+ "paddh %[r0], %[r0], %[src0] \n\t" >+ "punpcklhw %[src0], %[b0], %[r0] \n\t" >+ "punpckhhw %[src1], %[b0], %[r0] \n\t" >+ "punpcklwd %[dest0_u], %[src0], %[src1] \n\t" >+ "punpckhwd %[dest0_v], %[src0], %[src1] \n\t" >+ "paddh %[src0], %[dest0_u], %[dest0_v] \n\t" >+ "psrlh %[b0], %[src0], %[four] \n\t" >+ "psllh %[r0], %[src0], %[two] \n\t" >+ "or %[b0], %[b0], %[r0] \n\t" >+ "psrlh %[r0], %[g0], %[four] \n\t" >+ "psllh %[g0], %[g0], %[two] \n\t" >+ "or %[g0], %[g0], %[r0] \n\t" >+ "punpcklhw %[src0], %[g0], %[value] \n\t" >+ "punpckhhw %[src1], %[g0], %[value] \n\t" >+ "punpcklwd %[dest0_u], %[src0], %[src1] \n\t" >+ "punpckhwd %[dest0_v], %[src0], %[src1] \n\t" >+ "paddh %[g0], %[dest0_u], %[dest0_v] \n\t" >+ "punpcklhw %[src0], %[b0], %[g0] \n\t" >+ "punpckhhw %[src1], %[b0], %[g0] \n\t" >+ >+ "pmaddhw %[dest0_v], %[src0], %[mask_v] \n\t" >+ "pshufh %[dest0_u], %[src0], %[mask] \n\t" >+ "pmaddhw %[dest0_u], %[dest0_u], %[mask_u] \n\t" >+ "pmaddhw %[g0], %[src1], %[mask_v] \n\t" >+ "pshufh %[b0], %[src1], %[mask] \n\t" >+ "pmaddhw %[b0], %[b0], %[mask_u] \n\t" >+ >+ "punpcklwd %[src0], %[dest0_u], %[b0] \n\t" >+ "punpckhwd %[src1], %[dest0_u], %[b0] \n\t" >+ "psubw %[dest0_u], %[src0], %[src1] \n\t" >+ "psraw %[dest0_u], %[dest0_u], %[eight] \n\t" >+ "punpcklwd %[src0], %[dest0_v], %[g0] \n\t" >+ "punpckhwd %[src1], %[dest0_v], %[g0] \n\t" >+ "psubw %[dest0_v], %[src1], %[src0] \n\t" >+ "psraw %[dest0_v], %[dest0_v], %[eight] \n\t" >+ >+ "gsldrc1 %[src0], 0x08(%[src_argb4444]) \n\t" >+ "gsldlc1 %[src0], 0x0f(%[src_argb4444]) \n\t" >+ "gsldrc1 %[src1], 0x08(%[next_argb4444]) \n\t" >+ "gsldlc1 %[src1], 0x0f(%[next_argb4444]) \n\t" >+ "psrlh %[dest1_u], %[src0], %[eight] \n\t" >+ "and %[b0], %[src0], %[c0] \n\t" >+ "and %[src0], %[src0], %[c1] \n\t" >+ "psrlh %[g0], %[src0], %[four] \n\t" >+ "and %[r0], %[dest1_u], %[c0] \n\t" >+ "psrlh %[src0], %[src1], %[eight] \n\t" >+ "and %[dest1_u], %[src1], %[c0] \n\t" >+ "and %[src1], %[src1], %[c1] \n\t" >+ "psrlh %[dest1_v], %[src1], %[four] \n\t" >+ "and %[src0], %[src0], %[c0] \n\t" >+ "paddh %[b0], %[b0], %[dest1_u] \n\t" >+ "paddh %[g0], %[g0], %[dest1_v] \n\t" >+ "paddh %[r0], %[r0], %[src0] \n\t" >+ "punpcklhw %[src0], %[b0], %[r0] \n\t" >+ "punpckhhw %[src1], %[b0], %[r0] \n\t" >+ "punpcklwd %[dest1_u], %[src0], %[src1] \n\t" >+ "punpckhwd %[dest1_v], %[src0], %[src1] \n\t" >+ "paddh %[src0], %[dest1_u], %[dest1_v] \n\t" >+ "psrlh %[b0], %[src0], %[four] \n\t" >+ "psllh %[r0], %[src0], %[two] \n\t" >+ "or %[b0], %[b0], %[r0] \n\t" >+ "psrlh %[r0], %[g0], %[four] \n\t" >+ "psllh %[g0], %[g0], %[two] \n\t" >+ "or %[g0], %[g0], %[r0] \n\t" >+ "punpcklhw %[src0], %[g0], %[value] \n\t" >+ "punpckhhw %[src1], %[g0], %[value] \n\t" >+ "punpcklwd %[dest1_u], %[src0], %[src1] \n\t" >+ "punpckhwd %[dest1_v], %[src0], %[src1] \n\t" >+ "paddh %[g0], %[dest1_u], %[dest1_v] \n\t" >+ "punpcklhw %[src0], %[b0], %[g0] \n\t" >+ "punpckhhw %[src1], %[b0], %[g0] \n\t" >+ >+ "pmaddhw %[dest1_v], %[src0], %[mask_v] \n\t" >+ "pshufh %[dest1_u], %[src0], %[mask] \n\t" >+ "pmaddhw %[dest1_u], %[dest1_u], %[mask_u] \n\t" >+ "pmaddhw %[g0], %[src1], %[mask_v] \n\t" >+ "pshufh %[b0], %[src1], %[mask] \n\t" >+ "pmaddhw %[b0], %[b0], %[mask_u] \n\t" >+ >+ "punpcklwd %[src0], %[dest1_u], %[b0] \n\t" >+ "punpckhwd %[src1], %[dest1_u], %[b0] \n\t" >+ "psubw %[dest1_u], %[src0], %[src1] \n\t" >+ "psraw %[dest1_u], %[dest1_u], %[eight] \n\t" >+ "punpcklwd %[src0], %[dest1_v], %[g0] \n\t" >+ "punpckhwd %[src1], %[dest1_v], %[g0] \n\t" >+ "psubw %[dest1_v], %[src1], %[src0] \n\t" >+ "psraw %[dest1_v], %[dest1_v], %[eight] \n\t" >+ >+ "gsldrc1 %[src0], 0x10(%[src_argb4444]) \n\t" >+ "gsldlc1 %[src0], 0x17(%[src_argb4444]) \n\t" >+ "gsldrc1 %[src1], 0x10(%[next_argb4444]) \n\t" >+ "gsldlc1 %[src1], 0x17(%[next_argb4444]) \n\t" >+ "psrlh %[dest2_u], %[src0], %[eight] \n\t" >+ "and %[b0], %[src0], %[c0] \n\t" >+ "and %[src0], %[src0], %[c1] \n\t" >+ "psrlh %[g0], %[src0], %[four] \n\t" >+ "and %[r0], %[dest2_u], %[c0] \n\t" >+ "psrlh %[src0], %[src1], %[eight] \n\t" >+ "and %[dest2_u], %[src1], %[c0] \n\t" >+ "and %[src1], %[src1], %[c1] \n\t" >+ "psrlh %[dest2_v], %[src1], %[four] \n\t" >+ "and %[src0], %[src0], %[c0] \n\t" >+ "paddh %[b0], %[b0], %[dest2_u] \n\t" >+ "paddh %[g0], %[g0], %[dest2_v] \n\t" >+ "paddh %[r0], %[r0], %[src0] \n\t" >+ "punpcklhw %[src0], %[b0], %[r0] \n\t" >+ "punpckhhw %[src1], %[b0], %[r0] \n\t" >+ "punpcklwd %[dest2_u], %[src0], %[src1] \n\t" >+ "punpckhwd %[dest2_v], %[src0], %[src1] \n\t" >+ "paddh %[src0], %[dest2_u], %[dest2_v] \n\t" >+ "psrlh %[b0], %[src0], %[four] \n\t" >+ "psllh %[r0], %[src0], %[two] \n\t" >+ "or %[b0], %[b0], %[r0] \n\t" >+ "psrlh %[r0], %[g0], %[four] \n\t" >+ "psllh %[g0], %[g0], %[two] \n\t" >+ "or %[g0], %[g0], %[r0] \n\t" >+ "punpcklhw %[src0], %[g0], %[value] \n\t" >+ "punpckhhw %[src1], %[g0], %[value] \n\t" >+ "punpcklwd %[dest2_u], %[src0], %[src1] \n\t" >+ "punpckhwd %[dest2_v], %[src0], %[src1] \n\t" >+ "paddh %[g0], %[dest2_u], %[dest2_v] \n\t" >+ "punpcklhw %[src0], %[b0], %[g0] \n\t" >+ "punpckhhw %[src1], %[b0], %[g0] \n\t" >+ >+ "pmaddhw %[dest2_v], %[src0], %[mask_v] \n\t" >+ "pshufh %[dest2_u], %[src0], %[mask] \n\t" >+ "pmaddhw %[dest2_u], %[dest2_u], %[mask_u] \n\t" >+ "pmaddhw %[g0], %[src1], %[mask_v] \n\t" >+ "pshufh %[b0], %[src1], %[mask] \n\t" >+ "pmaddhw %[b0], %[b0], %[mask_u] \n\t" >+ >+ "punpcklwd %[src0], %[dest2_u], %[b0] \n\t" >+ "punpckhwd %[src1], %[dest2_u], %[b0] \n\t" >+ "psubw %[dest2_u], %[src0], %[src1] \n\t" >+ "psraw %[dest2_u], %[dest2_u], %[eight] \n\t" >+ "punpcklwd %[src0], %[dest2_v], %[g0] \n\t" >+ "punpckhwd %[src1], %[dest2_v], %[g0] \n\t" >+ "psubw %[dest2_v], %[src1], %[src0] \n\t" >+ "psraw %[dest2_v], %[dest2_v], %[eight] \n\t" >+ >+ "gsldrc1 %[src0], 0x18(%[src_argb4444]) \n\t" >+ "gsldlc1 %[src0], 0x1f(%[src_argb4444]) \n\t" >+ "gsldrc1 %[src1], 0x18(%[next_argb4444]) \n\t" >+ "gsldlc1 %[src1], 0x1f(%[next_argb4444]) \n\t" >+ "psrlh %[dest3_u], %[src0], %[eight] \n\t" >+ "and %[b0], %[src0], %[c0] \n\t" >+ "and %[src0], %[src0], %[c1] \n\t" >+ "psrlh %[g0], %[src0], %[four] \n\t" >+ "and %[r0], %[dest3_u], %[c0] \n\t" >+ "psrlh %[src0], %[src1], %[eight] \n\t" >+ "and %[dest3_u], %[src1], %[c0] \n\t" >+ "and %[src1], %[src1], %[c1] \n\t" >+ "psrlh %[dest3_v], %[src1], %[four] \n\t" >+ "and %[src0], %[src0], %[c0] \n\t" >+ "paddh %[b0], %[b0], %[dest3_u] \n\t" >+ "paddh %[g0], %[g0], %[dest3_v] \n\t" >+ "paddh %[r0], %[r0], %[src0] \n\t" >+ "punpcklhw %[src0], %[b0], %[r0] \n\t" >+ "punpckhhw %[src1], %[b0], %[r0] \n\t" >+ "punpcklwd %[dest3_u], %[src0], %[src1] \n\t" >+ "punpckhwd %[dest3_v], %[src0], %[src1] \n\t" >+ "paddh %[src0], %[dest3_u], %[dest3_v] \n\t" >+ "psrlh %[b0], %[src0], %[four] \n\t" >+ "psllh %[r0], %[src0], %[two] \n\t" >+ "or %[b0], %[b0], %[r0] \n\t" >+ "psrlh %[r0], %[g0], %[four] \n\t" >+ "psllh %[g0], %[g0], %[two] \n\t" >+ "or %[g0], %[g0], %[r0] \n\t" >+ "punpcklhw %[src0], %[g0], %[value] \n\t" >+ "punpckhhw %[src1], %[g0], %[value] \n\t" >+ "punpcklwd %[dest3_u], %[src0], %[src1] \n\t" >+ "punpckhwd %[dest3_v], %[src0], %[src1] \n\t" >+ "paddh %[g0], %[dest3_u], %[dest3_v] \n\t" >+ "punpcklhw %[src0], %[b0], %[g0] \n\t" >+ "punpckhhw %[src1], %[b0], %[g0] \n\t" >+ >+ "pmaddhw %[dest3_v], %[src0], %[mask_v] \n\t" >+ "pshufh %[dest3_u], %[src0], %[mask] \n\t" >+ "pmaddhw %[dest3_u], %[dest3_u], %[mask_u] \n\t" >+ "pmaddhw %[g0], %[src1], %[mask_v] \n\t" >+ "pshufh %[b0], %[src1], %[mask] \n\t" >+ "pmaddhw %[b0], %[b0], %[mask_u] \n\t" >+ >+ "punpcklwd %[src0], %[dest3_u], %[b0] \n\t" >+ "punpckhwd %[src1], %[dest3_u], %[b0] \n\t" >+ "psubw %[dest3_u], %[src0], %[src1] \n\t" >+ "psraw %[dest3_u], %[dest3_u], %[eight] \n\t" >+ "punpcklwd %[src0], %[dest3_v], %[g0] \n\t" >+ "punpckhwd %[src1], %[dest3_v], %[g0] \n\t" >+ "psubw %[dest3_v], %[src1], %[src0] \n\t" >+ "psraw %[dest3_v], %[dest3_v], %[eight] \n\t" >+ >+ "packsswh %[src0], %[dest0_u], %[dest1_u] \n\t" >+ "packsswh %[src1], %[dest2_u], %[dest3_u] \n\t" >+ "packushb %[dest0_u], %[src0], %[src1] \n\t" >+ "gssdlc1 %[dest0_u], 0x07(%[dst_u]) \n\t" >+ "gssdrc1 %[dest0_u], 0x00(%[dst_u]) \n\t" >+ "packsswh %[src0], %[dest0_v], %[dest1_v] \n\t" >+ "packsswh %[src1], %[dest2_v], %[dest3_v] \n\t" >+ "packushb %[dest0_v], %[src0], %[src1] \n\t" >+ "gssdlc1 %[dest0_v], 0x07(%[dst_v]) \n\t" >+ "gssdrc1 %[dest0_v], 0x00(%[dst_v]) \n\t" >+ >+ "daddiu %[src_argb4444], %[src_argb4444], 0x20 \n\t" >+ "daddiu %[next_argb4444], %[next_argb4444], 0x20 \n\t" >+ "daddiu %[dst_u], %[dst_u], 0x08 \n\t" >+ "daddiu %[dst_v], %[dst_v], 0x08 \n\t" >+ "daddiu %[width], %[width], -0x10 \n\t" >+ "bgtz %[width], 1b \n\t" >+ : [src0] "=&f"(ftmp[0]), [src1] "=&f"(ftmp[1]), [b0] "=&f"(ftmp[2]), >+ [g0] "=&f"(ftmp[3]), [r0] "=&f"(ftmp[4]), [dest0_u] "=&f"(ftmp[5]), >+ [dest1_u] "=&f"(ftmp[6]), [dest2_u] "=&f"(ftmp[7]), >+ [dest3_u] "=&f"(ftmp[8]), [dest0_v] "=&f"(ftmp[9]), >+ [dest1_v] "=&f"(ftmp[10]), [dest2_v] "=&f"(ftmp[11]), >+ [dest3_v] "=&f"(ftmp[12]) >+ : [src_argb4444] "r"(src_argb4444), >+ [next_argb4444] "r"(src_stride_argb4444), [dst_u] "r"(dst_u), >+ [dst_v] "r"(dst_v), [width] "r"(width), [value] "f"(value), >+ [c0] "f"(c0), [c1] "f"(c1), [mask] "f"(mask), [mask_u] "f"(mask_u), >+ [mask_v] "f"(mask_v), [eight] "f"(0x08), [four] "f"(0x04), >+ [two] "f"(0x02) >+ : "memory"); >+} >+ >+void ARGBToUV444Row_MMI(const uint8_t* src_argb, >+ uint8_t* dst_u, >+ uint8_t* dst_v, >+ int width) { >+ uint64_t ftmp[12]; >+ const uint64_t value = 0x4040; >+ const uint64_t mask_u = 0x0026004a00700002; >+ const uint64_t mask_v = 0x00020070005e0012; >+ >+ __asm__ volatile( >+ "1: \n\t" >+ "gsldrc1 %[src0], 0x00(%[src_argb]) \n\t" >+ "gsldlc1 %[src0], 0x07(%[src_argb]) \n\t" >+ "punpcklbh %[src_lo], %[src0], %[zero] \n\t" >+ "punpckhbh %[src_hi], %[src0], %[zero] \n\t" >+ "dsll %[dest0_u], %[src_lo], %[sixteen] \n\t" >+ "pinsrh_0 %[dest0_u], %[dest0_u], %[value] \n\t" >+ "pinsrh_3 %[dest0_v], %[src_lo], %[value] \n\t" >+ "pmaddhw %[dest0_u], %[dest0_u], %[mask_u] \n\t" >+ "pmaddhw %[dest0_v], %[dest0_v], %[mask_v] \n\t" >+ >+ "dsll %[src_lo], %[src_hi], %[sixteen] \n\t" >+ "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t" >+ "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t" >+ "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t" >+ "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t" >+ >+ "punpcklwd %[src0], %[dest0_u], %[src_lo] \n\t" >+ "punpckhwd %[src1], %[dest0_u], %[src_lo] \n\t" >+ "psubw %[dest0_u], %[src0], %[src1] \n\t" >+ "psraw %[dest0_u], %[dest0_u], %[eight] \n\t" >+ "punpcklwd %[src0], %[dest0_v], %[src_hi] \n\t" >+ "punpckhwd %[src1], %[dest0_v], %[src_hi] \n\t" >+ "psubw %[dest0_v], %[src1], %[src0] \n\t" >+ "psraw %[dest0_v], %[dest0_v], %[eight] \n\t" >+ >+ "gsldrc1 %[src0], 0x08(%[src_argb]) \n\t" >+ "gsldlc1 %[src0], 0x0f(%[src_argb]) \n\t" >+ "punpcklbh %[src_lo], %[src0], %[zero] \n\t" >+ "punpckhbh %[src_hi], %[src0], %[zero] \n\t" >+ "dsll %[dest1_u], %[src_lo], %[sixteen] \n\t" >+ "pinsrh_0 %[dest1_u], %[dest1_u], %[value] \n\t" >+ "pinsrh_3 %[dest1_v], %[src_lo], %[value] \n\t" >+ "pmaddhw %[dest1_u], %[dest1_u], %[mask_u] \n\t" >+ "pmaddhw %[dest1_v], %[dest1_v], %[mask_v] \n\t" >+ "dsll %[src_lo], %[src_hi], %[sixteen] \n\t" >+ "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t" >+ "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t" >+ "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t" >+ "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t" >+ >+ "punpcklwd %[src0], %[dest1_u], %[src_lo] \n\t" >+ "punpckhwd %[src1], %[dest1_u], %[src_lo] \n\t" >+ "psubw %[dest1_u], %[src0], %[src1] \n\t" >+ "psraw %[dest1_u], %[dest1_u], %[eight] \n\t" >+ "punpcklwd %[src0], %[dest1_v], %[src_hi] \n\t" >+ "punpckhwd %[src1], %[dest1_v], %[src_hi] \n\t" >+ "psubw %[dest1_v], %[src1], %[src0] \n\t" >+ "psraw %[dest1_v], %[dest1_v], %[eight] \n\t" >+ >+ "gsldrc1 %[src0], 0x10(%[src_argb]) \n\t" >+ "gsldlc1 %[src0], 0x17(%[src_argb]) \n\t" >+ "punpcklbh %[src_lo], %[src0], %[zero] \n\t" >+ "punpckhbh %[src_hi], %[src0], %[zero] \n\t" >+ "dsll %[dest2_u], %[src_lo], %[sixteen] \n\t" >+ "pinsrh_0 %[dest2_u], %[dest2_u], %[value] \n\t" >+ "pinsrh_3 %[dest2_v], %[src_lo], %[value] \n\t" >+ "pmaddhw %[dest2_u], %[dest2_u], %[mask_u] \n\t" >+ "pmaddhw %[dest2_v], %[dest2_v], %[mask_v] \n\t" >+ "dsll %[src_lo], %[src_hi], %[sixteen] \n\t" >+ "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t" >+ "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t" >+ "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t" >+ "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t" >+ >+ "punpcklwd %[src0], %[dest2_u], %[src_lo] \n\t" >+ "punpckhwd %[src1], %[dest2_u], %[src_lo] \n\t" >+ "psubw %[dest2_u], %[src0], %[src1] \n\t" >+ "psraw %[dest2_u], %[dest2_u], %[eight] \n\t" >+ "punpcklwd %[src0], %[dest2_v], %[src_hi] \n\t" >+ "punpckhwd %[src1], %[dest2_v], %[src_hi] \n\t" >+ "psubw %[dest2_v], %[src1], %[src0] \n\t" >+ "psraw %[dest2_v], %[dest2_v], %[eight] \n\t" >+ >+ "gsldrc1 %[src0], 0x18(%[src_argb]) \n\t" >+ "gsldlc1 %[src0], 0x1f(%[src_argb]) \n\t" >+ "punpcklbh %[src_lo], %[src0], %[zero] \n\t" >+ "punpckhbh %[src_hi], %[src0], %[zero] \n\t" >+ "dsll %[dest3_u], %[src_lo], %[sixteen] \n\t" >+ "pinsrh_0 %[dest3_u], %[dest3_u], %[value] \n\t" >+ "pinsrh_3 %[dest3_v], %[src_lo], %[value] \n\t" >+ "pmaddhw %[dest3_u], %[dest3_u], %[mask_u] \n\t" >+ "pmaddhw %[dest3_v], %[dest3_v], %[mask_v] \n\t" >+ "dsll %[src_lo], %[src_hi], %[sixteen] \n\t" >+ "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t" >+ "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t" >+ "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t" >+ "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t" >+ >+ "punpcklwd %[src0], %[dest3_u], %[src_lo] \n\t" >+ "punpckhwd %[src1], %[dest3_u], %[src_lo] \n\t" >+ "psubw %[dest3_u], %[src0], %[src1] \n\t" >+ "psraw %[dest3_u], %[dest3_u], %[eight] \n\t" >+ "punpcklwd %[src0], %[dest3_v], %[src_hi] \n\t" >+ "punpckhwd %[src1], %[dest3_v], %[src_hi] \n\t" >+ "psubw %[dest3_v], %[src1], %[src0] \n\t" >+ "psraw %[dest3_v], %[dest3_v], %[eight] \n\t" >+ >+ "packsswh %[src0], %[dest0_u], %[dest1_u] \n\t" >+ "packsswh %[src1], %[dest2_u], %[dest3_u] \n\t" >+ "packushb %[dest0_u], %[src0], %[src1] \n\t" >+ "gssdlc1 %[dest0_u], 0x07(%[dst_u]) \n\t" >+ "gssdrc1 %[dest0_u], 0x00(%[dst_u]) \n\t" >+ >+ "packsswh %[src0], %[dest0_v], %[dest1_v] \n\t" >+ "packsswh %[src1], %[dest2_v], %[dest3_v] \n\t" >+ "packushb %[dest0_v], %[src0], %[src1] \n\t" >+ "gssdlc1 %[dest0_v], 0x07(%[dst_v]) \n\t" >+ "gssdrc1 %[dest0_v], 0x00(%[dst_v]) \n\t" >+ >+ "daddiu %[src_argb], %[src_argb], 0x20 \n\t" >+ "daddiu %[dst_u], %[dst_u], 0x08 \n\t" >+ "daddiu %[dst_v], %[dst_v], 0x08 \n\t" >+ "daddi %[width], %[width], -0x08 \n\t" >+ "bgtz %[width], 1b \n\t" >+ : [src0] "=&f"(ftmp[0]), [src1] "=&f"(ftmp[1]), [src_lo] "=&f"(ftmp[2]), >+ [src_hi] "=&f"(ftmp[3]), [dest0_u] "=&f"(ftmp[4]), >+ [dest0_v] "=&f"(ftmp[5]), [dest1_u] "=&f"(ftmp[6]), >+ [dest1_v] "=&f"(ftmp[7]), [dest2_u] "=&f"(ftmp[8]), >+ [dest2_v] "=&f"(ftmp[9]), [dest3_u] "=&f"(ftmp[10]), >+ [dest3_v] "=&f"(ftmp[11]) >+ : [src_argb] "r"(src_argb), [dst_u] "r"(dst_u), [dst_v] "r"(dst_v), >+ [width] "r"(width), [mask_u] "f"(mask_u), [mask_v] "f"(mask_v), >+ [value] "f"(value), [zero] "f"(0x00), [sixteen] "f"(0x10), >+ [eight] "f"(0x08) >+ : "memory"); >+} >+ >+void ARGBGrayRow_MMI(const uint8_t* src_argb, uint8_t* dst_argb, int width) { >+ uint64_t src, src_lo, src_hi, src37, dest, dest_lo, dest_hi; >+ uint64_t tmp0, tmp1; >+ const uint64_t mask0 = 0x0; >+ const uint64_t mask1 = 0x01; >+ const uint64_t mask2 = 0x00400026004B000FULL; >+ const uint64_t mask3 = 0xFF000000FF000000ULL; >+ const uint64_t mask4 = ~mask3; >+ const uint64_t shift = 0x07; >+ >+ __asm__ volatile( >+ "1: \n\t" >+ "gsldlc1 %[src], 0x07(%[src_ptr]) \n\t" >+ "gsldrc1 %[src], 0x00(%[src_ptr]) \n\t" >+ >+ "and %[src37], %[src], %[mask3] \n\t" >+ >+ "punpcklbh %[src_lo], %[src], %[mask0] \n\t" >+ "pinsrh_3 %[src_lo], %[src_lo], %[mask1] \n\t" >+ "pmaddhw %[dest_lo], %[src_lo], %[mask2] \n\t" >+ "punpcklwd %[tmp0], %[dest_lo], %[dest_lo] \n\t" >+ "punpckhwd %[tmp1], %[dest_lo], %[dest_lo] \n\t" >+ "paddw %[dest_lo], %[tmp0], %[tmp1] \n\t" >+ "psrlw %[dest_lo], %[dest_lo], %[shift] \n\t" >+ "packsswh %[dest_lo], %[dest_lo], %[dest_lo] \n\t" >+ >+ "punpckhbh %[src_hi], %[src], %[mask0] \n\t" >+ "pinsrh_3 %[src_hi], %[src_hi], %[mask1] \n\t" >+ "pmaddhw %[dest_hi], %[src_hi], %[mask2] \n\t" >+ "punpcklwd %[tmp0], %[dest_hi], %[dest_hi] \n\t" >+ "punpckhwd %[tmp1], %[dest_hi], %[dest_hi] \n\t" >+ "paddw %[dest_hi], %[tmp0], %[tmp1] \n\t" >+ "psrlw %[dest_hi], %[dest_hi], %[shift] \n\t" >+ "packsswh %[dest_hi], %[dest_hi], %[dest_hi] \n\t" >+ >+ "packushb %[dest], %[dest_lo], %[dest_hi] \n\t" >+ "and %[dest], %[dest], %[mask4] \n\t" >+ "or %[dest], %[dest], %[src37] \n\t" >+ >+ "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t" >+ "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t" >+ >+ "daddiu %[src_ptr], %[src_ptr], 0x08 \n\t" >+ "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t" >+ "daddi %[width], %[width], -0x02 \n\t" >+ "bnez %[width], 1b \n\t" >+ : [dest_hi] "=&f"(dest_hi), [dest_lo] "=&f"(dest_lo), >+ [src_hi] "=&f"(src_hi), [src_lo] "=&f"(src_lo), [tmp0] "=&f"(tmp0), >+ [tmp1] "=&f"(tmp1), [src] "=&f"(src), [dest] "=&f"(dest), >+ [src37] "=&f"(src37) >+ : [src_ptr] "r"(src_argb), [dst_ptr] "r"(dst_argb), [width] "r"(width), >+ [shift] "f"(shift), [mask0] "f"(mask0), [mask1] "f"(mask1), >+ [mask2] "f"(mask2), [mask3] "f"(mask3), [mask4] "f"(mask4) >+ : "memory"); >+} >+ >+// Convert a row of image to Sepia tone. >+void ARGBSepiaRow_MMI(uint8_t* dst_argb, int width) { >+ uint64_t dest, dest_lo, dest_hi, dest37, dest0, dest1, dest2; >+ uint64_t tmp0, tmp1; >+ const uint64_t mask0 = 0x0; >+ const uint64_t mask1 = 0x002300440011ULL; >+ const uint64_t mask2 = 0x002D00580016ULL; >+ const uint64_t mask3 = 0x003200620018ULL; >+ const uint64_t mask4 = 0xFF000000FF000000ULL; >+ const uint64_t shift = 0x07; >+ >+ __asm__ volatile( >+ "1: \n\t" >+ "gsldlc1 %[dest], 0x07(%[dst_ptr]) \n\t" >+ "gsldrc1 %[dest], 0x00(%[dst_ptr]) \n\t" >+ >+ "and %[dest37], %[dest], %[mask4] \n\t" >+ >+ "punpcklbh %[dest_lo], %[dest], %[mask0] \n\t" >+ "pmaddhw %[dest0], %[dest_lo], %[mask1] \n\t" >+ "pmaddhw %[dest1], %[dest_lo], %[mask2] \n\t" >+ "pmaddhw %[dest2], %[dest_lo], %[mask3] \n\t" >+ "punpcklwd %[tmp0], %[dest0], %[dest1] \n\t" >+ "punpckhwd %[tmp1], %[dest0], %[dest1] \n\t" >+ "paddw %[dest0], %[tmp0], %[tmp1] \n\t" >+ "psrlw %[dest0], %[dest0], %[shift] \n\t" >+ "punpcklwd %[tmp0], %[dest2], %[mask0] \n\t" >+ "punpckhwd %[tmp1], %[dest2], %[mask0] \n\t" >+ "paddw %[dest1], %[tmp0], %[tmp1] \n\t" >+ "psrlw %[dest1], %[dest1], %[shift] \n\t" >+ "packsswh %[dest_lo], %[dest0], %[dest1] \n\t" >+ >+ "punpckhbh %[dest_hi], %[dest], %[mask0] \n\t" >+ "pmaddhw %[dest0], %[dest_hi], %[mask1] \n\t" >+ "pmaddhw %[dest1], %[dest_hi], %[mask2] \n\t" >+ "pmaddhw %[dest2], %[dest_hi], %[mask3] \n\t" >+ "punpcklwd %[tmp0], %[dest0], %[dest1] \n\t" >+ "punpckhwd %[tmp1], %[dest0], %[dest1] \n\t" >+ "paddw %[dest0], %[tmp0], %[tmp1] \n\t" >+ "psrlw %[dest0], %[dest0], %[shift] \n\t" >+ "punpcklwd %[tmp0], %[dest2], %[mask0] \n\t" >+ "punpckhwd %[tmp1], %[dest2], %[mask0] \n\t" >+ "paddw %[dest1], %[tmp0], %[tmp1] \n\t" >+ "psrlw %[dest1], %[dest1], %[shift] \n\t" >+ "packsswh %[dest_hi], %[dest0], %[dest1] \n\t" >+ >+ "packushb %[dest], %[dest_lo], %[dest_hi] \n\t" >+ "or %[dest], %[dest], %[dest37] \n\t" >+ >+ "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t" >+ "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t" >+ >+ "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t" >+ "daddi %[width], %[width], -0x02 \n\t" >+ "bnez %[width], 1b \n\t" >+ : [dest_hi] "=&f"(dest_hi), [dest_lo] "=&f"(dest_lo), >+ [dest0] "=&f"(dest0), [dest1] "=&f"(dest1), [dest2] "=&f"(dest2), >+ [dest37] "=&f"(dest37), [tmp0] "=&f"(tmp0), [tmp1] "=&f"(tmp1), >+ [dest] "=&f"(dest) >+ : [dst_ptr] "r"(dst_argb), [width] "r"(width), [mask0] "f"(mask0), >+ [mask1] "f"(mask1), [mask2] "f"(mask2), [mask3] "f"(mask3), >+ [mask4] "f"(mask4), [shift] "f"(shift) >+ : "memory"); >+} >+ >+// Apply color matrix to a row of image. Matrix is signed. >+// TODO(fbarchard): Consider adding rounding (+32). >+void ARGBColorMatrixRow_MMI(const uint8_t* src_argb, >+ uint8_t* dst_argb, >+ const int8_t* matrix_argb, >+ int width) { >+ uint64_t src, src_hi, src_lo, dest, dest_lo, dest_hi, dest0, dest1, dest2, >+ dest3; >+ uint64_t matrix, matrix_hi, matrix_lo; >+ uint64_t tmp0, tmp1; >+ const uint64_t shift0 = 0x06; >+ const uint64_t shift1 = 0x08; >+ const uint64_t mask0 = 0x0; >+ const uint64_t mask1 = 0x08; >+ >+ __asm__ volatile( >+ "1: \n\t" >+ "gsldlc1 %[src], 0x07(%[src_ptr]) \n\t" >+ "gsldrc1 %[src], 0x00(%[src_ptr]) \n\t" >+ >+ "punpcklbh %[src_lo], %[src], %[mask0] \n\t" >+ >+ "gsldlc1 %[matrix], 0x07(%[matrix_ptr]) \n\t" >+ "gsldrc1 %[matrix], 0x00(%[matrix_ptr]) \n\t" >+ "punpcklbh %[matrix_lo], %[matrix], %[mask0] \n\t" >+ "psllh %[matrix_lo], %[matrix_lo], %[shift1] \n\t" >+ "psrah %[matrix_lo], %[matrix_lo], %[shift1] \n\t" >+ "punpckhbh %[matrix_hi], %[matrix], %[mask0] \n\t" >+ "psllh %[matrix_hi], %[matrix_hi], %[shift1] \n\t" >+ "psrah %[matrix_hi], %[matrix_hi], %[shift1] \n\t" >+ "pmaddhw %[dest_lo], %[src_lo], %[matrix_lo] \n\t" >+ "pmaddhw %[dest_hi], %[src_lo], %[matrix_hi] \n\t" >+ "punpcklwd %[tmp0], %[dest_lo], %[dest_hi] \n\t" >+ "punpckhwd %[tmp1], %[dest_lo], %[dest_hi] \n\t" >+ "paddw %[dest0], %[tmp0], %[tmp1] \n\t" >+ "psraw %[dest0], %[dest0], %[shift0] \n\t" >+ >+ "gsldlc1 %[matrix], 0x0f(%[matrix_ptr]) \n\t" >+ "gsldrc1 %[matrix], 0x08(%[matrix_ptr]) \n\t" >+ "punpcklbh %[matrix_lo], %[matrix], %[mask0] \n\t" >+ "psllh %[matrix_lo], %[matrix_lo], %[shift1] \n\t" >+ "psrah %[matrix_lo], %[matrix_lo], %[shift1] \n\t" >+ "punpckhbh %[matrix_hi], %[matrix], %[mask0] \n\t" >+ "psllh %[matrix_hi], %[matrix_hi], %[shift1] \n\t" >+ "psrah %[matrix_hi], %[matrix_hi], %[shift1] \n\t" >+ "pmaddhw %[dest_lo], %[src_lo], %[matrix_lo] \n\t" >+ "pmaddhw %[dest_hi], %[src_lo], %[matrix_hi] \n\t" >+ "punpcklwd %[tmp0], %[dest_lo], %[dest_hi] \n\t" >+ "punpckhwd %[tmp1], %[dest_lo], %[dest_hi] \n\t" >+ "paddw %[dest1], %[tmp0], %[tmp1] \n\t" >+ "psraw %[dest1], %[dest1], %[shift0] \n\t" >+ >+ "punpckhbh %[src_hi], %[src], %[mask0] \n\t" >+ >+ "gsldlc1 %[matrix], 0x07(%[matrix_ptr]) \n\t" >+ "gsldrc1 %[matrix], 0x00(%[matrix_ptr]) \n\t" >+ "punpcklbh %[matrix_lo], %[matrix], %[mask0] \n\t" >+ "psllh %[matrix_lo], %[matrix_lo], %[shift1] \n\t" >+ "psrah %[matrix_lo], %[matrix_lo], %[shift1] \n\t" >+ "punpckhbh %[matrix_hi], %[matrix], %[mask0] \n\t" >+ "psllh %[matrix_hi], %[matrix_hi], %[shift1] \n\t" >+ "psrah %[matrix_hi], %[matrix_hi], %[shift1] \n\t" >+ "pmaddhw %[dest_lo], %[src_hi], %[matrix_lo] \n\t" >+ "pmaddhw %[dest_hi], %[src_hi], %[matrix_hi] \n\t" >+ "punpcklwd %[tmp0], %[dest_lo], %[dest_hi] \n\t" >+ "punpckhwd %[tmp1], %[dest_lo], %[dest_hi] \n\t" >+ "paddw %[dest2], %[tmp0], %[tmp1] \n\t" >+ "psraw %[dest2], %[dest2], %[shift0] \n\t" >+ >+ "gsldlc1 %[matrix], 0x0f(%[matrix_ptr]) \n\t" >+ "gsldrc1 %[matrix], 0x08(%[matrix_ptr]) \n\t" >+ "punpcklbh %[matrix_lo], %[matrix], %[mask0] \n\t" >+ "psllh %[matrix_lo], %[matrix_lo], %[shift1] \n\t" >+ "psrah %[matrix_lo], %[matrix_lo], %[shift1] \n\t" >+ "punpckhbh %[matrix_hi], %[matrix], %[mask0] \n\t" >+ "psllh %[matrix_hi], %[matrix_hi], %[shift1] \n\t" >+ "psrah %[matrix_hi], %[matrix_hi], %[shift1] \n\t" >+ "pmaddhw %[dest_lo], %[src_hi], %[matrix_lo] \n\t" >+ "pmaddhw %[dest_hi], %[src_hi], %[matrix_hi] \n\t" >+ "punpcklwd %[tmp0], %[dest_lo], %[dest_hi] \n\t" >+ "punpckhwd %[tmp1], %[dest_lo], %[dest_hi] \n\t" >+ "paddw %[dest3], %[tmp0], %[tmp1] \n\t" >+ "psraw %[dest3], %[dest3], %[shift0] \n\t" >+ >+ "packsswh %[tmp0], %[dest0], %[dest1] \n\t" >+ "packsswh %[tmp1], %[dest2], %[dest3] \n\t" >+ "packushb %[dest], %[tmp0], %[tmp1] \n\t" >+ >+ "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t" >+ "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t" >+ >+ "daddiu %[src_ptr], %[src_ptr], 0x08 \n\t" >+ "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t" >+ "daddi %[width], %[width], -0x02 \n\t" >+ "bnez %[width], 1b \n\t" >+ : [src_hi] "=&f"(src_hi), [src_lo] "=&f"(src_lo), >+ [dest_hi] "=&f"(dest_hi), [dest_lo] "=&f"(dest_lo), >+ [dest0] "=&f"(dest0), [dest1] "=&f"(dest1), [dest2] "=&f"(dest2), >+ [dest3] "=&f"(dest3), [src] "=&f"(src), [dest] "=&f"(dest), >+ [tmp0] "=&f"(tmp0), [tmp1] "=&f"(tmp1), [matrix_hi] "=&f"(matrix_hi), >+ [matrix_lo] "=&f"(matrix_lo), [matrix] "=&f"(matrix) >+ : [src_ptr] "r"(src_argb), [matrix_ptr] "r"(matrix_argb), >+ [dst_ptr] "r"(dst_argb), [width] "r"(width), [shift0] "f"(shift0), >+ [shift1] "f"(shift1), [mask0] "f"(mask0), [mask1] "f"(mask1) >+ : "memory"); >+} >+ >+void ARGBShadeRow_MMI(const uint8_t* src_argb, >+ uint8_t* dst_argb, >+ int width, >+ uint32_t value) { >+ uint64_t src, src_hi, src_lo, dest, dest_lo, dest_hi; >+ const uint64_t shift = 0x08; >+ >+ __asm__ volatile( >+ "1: \n\t" >+ "gsldlc1 %[src], 0x07(%[src_ptr]) \n\t" >+ "gsldrc1 %[src], 0x00(%[src_ptr]) \n\t" >+ "punpcklbh %[src_lo], %[src], %[src] \n\t" >+ "punpckhbh %[src_hi], %[src], %[src] \n\t" >+ >+ "punpcklbh %[value], %[value], %[value] \n\t" >+ >+ "pmulhuh %[dest_lo], %[src_lo], %[value] \n\t" >+ "psrlh %[dest_lo], %[dest_lo], %[shift] \n\t" >+ "pmulhuh %[dest_hi], %[src_hi], %[value] \n\t" >+ "psrlh %[dest_hi], %[dest_hi], %[shift] \n\t" >+ "packushb %[dest], %[dest_lo], %[dest_hi] \n\t" >+ >+ "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t" >+ "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t" >+ >+ "daddiu %[src_ptr], %[src_ptr], 0x08 \n\t" >+ "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t" >+ "daddi %[width], %[width], -0x02 \n\t" >+ "bnez %[width], 1b \n\t" >+ : [src_hi] "=&f"(src_hi), [src_lo] "=&f"(src_lo), >+ [dest_hi] "=&f"(dest_hi), [dest_lo] "=&f"(dest_lo), [src] "=&f"(src), >+ [dest] "=&f"(dest) >+ : [src_ptr] "r"(src_argb), [dst_ptr] "r"(dst_argb), [width] "r"(width), >+ [value] "f"(value), [shift] "f"(shift) >+ : "memory"); >+} >+ >+void ARGBMultiplyRow_MMI(const uint8_t* src_argb0, >+ const uint8_t* src_argb1, >+ uint8_t* dst_argb, >+ int width) { >+ uint64_t src0, src0_hi, src0_lo, src1, src1_hi, src1_lo; >+ uint64_t dest, dest_lo, dest_hi; >+ const uint64_t mask = 0x0; >+ >+ __asm__ volatile( >+ "1: \n\t" >+ "gsldlc1 %[src0], 0x07(%[src0_ptr]) \n\t" >+ "gsldrc1 %[src0], 0x00(%[src0_ptr]) \n\t" >+ "punpcklbh %[src0_lo], %[src0], %[src0] \n\t" >+ "punpckhbh %[src0_hi], %[src0], %[src0] \n\t" >+ >+ "gsldlc1 %[src1], 0x07(%[src1_ptr]) \n\t" >+ "gsldrc1 %[src1], 0x00(%[src1_ptr]) \n\t" >+ "punpcklbh %[src1_lo], %[src1], %[mask] \n\t" >+ "punpckhbh %[src1_hi], %[src1], %[mask] \n\t" >+ >+ "pmulhuh %[dest_lo], %[src0_lo], %[src1_lo] \n\t" >+ "pmulhuh %[dest_hi], %[src0_hi], %[src1_hi] \n\t" >+ "packushb %[dest], %[dest_lo], %[dest_hi] \n\t" >+ >+ "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t" >+ "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t" >+ >+ "daddiu %[src0_ptr], %[src0_ptr], 0x08 \n\t" >+ "daddiu %[src1_ptr], %[src1_ptr], 0x08 \n\t" >+ "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t" >+ "daddi %[width], %[width], -0x02 \n\t" >+ "bnez %[width], 1b \n\t" >+ : [src0_hi] "=&f"(src0_hi), [src0_lo] "=&f"(src0_lo), >+ [src1_hi] "=&f"(src1_hi), [src1_lo] "=&f"(src1_lo), >+ [dest_hi] "=&f"(dest_hi), [dest_lo] "=&f"(dest_lo), [src0] "=&f"(src0), >+ [src1] "=&f"(src1), [dest] "=&f"(dest) >+ : [src0_ptr] "r"(src_argb0), [src1_ptr] "r"(src_argb1), >+ [dst_ptr] "r"(dst_argb), [width] "r"(width), [mask] "f"(mask) >+ : "memory"); >+} >+ >+void ARGBAddRow_MMI(const uint8_t* src_argb0, >+ const uint8_t* src_argb1, >+ uint8_t* dst_argb, >+ int width) { >+ uint64_t src0, src1, dest; >+ >+ __asm__ volatile( >+ "1: \n\t" >+ "gsldlc1 %[src0], 0x07(%[src0_ptr]) \n\t" >+ "gsldrc1 %[src0], 0x00(%[src0_ptr]) \n\t" >+ "gsldlc1 %[src1], 0x07(%[src1_ptr]) \n\t" >+ "gsldrc1 %[src1], 0x00(%[src1_ptr]) \n\t" >+ "paddusb %[dest], %[src0], %[src1] \n\t" >+ "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t" >+ "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t" >+ >+ "daddiu %[src0_ptr], %[src0_ptr], 0x08 \n\t" >+ "daddiu %[src1_ptr], %[src1_ptr], 0x08 \n\t" >+ "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t" >+ "daddi %[width], %[width], -0x02 \n\t" >+ "bnez %[width], 1b \n\t" >+ : [src0] "=&f"(src0), [src1] "=&f"(src1), [dest] "=&f"(dest) >+ : [src0_ptr] "r"(src_argb0), [src1_ptr] "r"(src_argb1), >+ [dst_ptr] "r"(dst_argb), [width] "r"(width) >+ : "memory"); >+} >+ >+void ARGBSubtractRow_MMI(const uint8_t* src_argb0, >+ const uint8_t* src_argb1, >+ uint8_t* dst_argb, >+ int width) { >+ uint64_t src0, src1, dest; >+ >+ __asm__ volatile( >+ "1: \n\t" >+ "gsldlc1 %[src0], 0x07(%[src0_ptr]) \n\t" >+ "gsldrc1 %[src0], 0x00(%[src0_ptr]) \n\t" >+ "gsldlc1 %[src1], 0x07(%[src1_ptr]) \n\t" >+ "gsldrc1 %[src1], 0x00(%[src1_ptr]) \n\t" >+ "psubusb %[dest], %[src0], %[src1] \n\t" >+ "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t" >+ "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t" >+ >+ "daddiu %[src0_ptr], %[src0_ptr], 0x08 \n\t" >+ "daddiu %[src1_ptr], %[src1_ptr], 0x08 \n\t" >+ "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t" >+ "daddi %[width], %[width], -0x02 \n\t" >+ "bnez %[width], 1b \n\t" >+ : [src0] "=&f"(src0), [src1] "=&f"(src1), [dest] "=&f"(dest) >+ : [src0_ptr] "r"(src_argb0), [src1_ptr] "r"(src_argb1), >+ [dst_ptr] "r"(dst_argb), [width] "r"(width) >+ : "memory"); >+} >+ >+// Sobel functions which mimics SSSE3. >+void SobelXRow_MMI(const uint8_t* src_y0, >+ const uint8_t* src_y1, >+ const uint8_t* src_y2, >+ uint8_t* dst_sobelx, >+ int width) { >+ uint64_t y00 = 0, y10 = 0, y20 = 0; >+ uint64_t y02 = 0, y12 = 0, y22 = 0; >+ uint64_t zero = 0x0; >+ uint64_t sobel = 0x0; >+ __asm__ volatile( >+ "1: \n\t" >+ "gsldlc1 %[y00], 0x07(%[src_y0]) \n\t" // a=src_y0[i] >+ "gsldrc1 %[y00], 0x00(%[src_y0]) \n\t" >+ "gsldlc1 %[y02], 0x09(%[src_y0]) \n\t" // a_sub=src_y0[i+2] >+ "gsldrc1 %[y02], 0x02(%[src_y0]) \n\t" >+ >+ "gsldlc1 %[y10], 0x07(%[src_y1]) \n\t" // b=src_y1[i] >+ "gsldrc1 %[y10], 0x00(%[src_y1]) \n\t" >+ "gsldlc1 %[y12], 0x09(%[src_y1]) \n\t" // b_sub=src_y1[i+2] >+ "gsldrc1 %[y12], 0x02(%[src_y1]) \n\t" >+ >+ "gsldlc1 %[y20], 0x07(%[src_y2]) \n\t" // c=src_y2[i] >+ "gsldrc1 %[y20], 0x00(%[src_y2]) \n\t" >+ "gsldlc1 %[y22], 0x09(%[src_y2]) \n\t" // c_sub=src_y2[i+2] >+ "gsldrc1 %[y22], 0x02(%[src_y2]) \n\t" >+ >+ "punpcklbh %[y00], %[y00], %[zero] \n\t" >+ "punpcklbh %[y10], %[y10], %[zero] \n\t" >+ "punpcklbh %[y20], %[y20], %[zero] \n\t" >+ >+ "punpcklbh %[y02], %[y02], %[zero] \n\t" >+ "punpcklbh %[y12], %[y12], %[zero] \n\t" >+ "punpcklbh %[y22], %[y22], %[zero] \n\t" >+ >+ "paddh %[y00], %[y00], %[y10] \n\t" // a+b >+ "paddh %[y20], %[y20], %[y10] \n\t" // c+b >+ "paddh %[y00], %[y00], %[y20] \n\t" // a+2b+c >+ >+ "paddh %[y02], %[y02], %[y12] \n\t" // a_sub+b_sub >+ "paddh %[y22], %[y22], %[y12] \n\t" // c_sub+b_sub >+ "paddh %[y02], %[y02], %[y22] \n\t" // a_sub+2b_sub+c_sub >+ >+ "pmaxsh %[y10], %[y00], %[y02] \n\t" >+ "pminsh %[y20], %[y00], %[y02] \n\t" >+ "psubh %[sobel], %[y10], %[y20] \n\t" // Abs >+ >+ "gsldlc1 %[y00], 0x0B(%[src_y0]) \n\t" >+ "gsldrc1 %[y00], 0x04(%[src_y0]) \n\t" >+ "gsldlc1 %[y02], 0x0D(%[src_y0]) \n\t" >+ "gsldrc1 %[y02], 0x06(%[src_y0]) \n\t" >+ >+ "gsldlc1 %[y10], 0x0B(%[src_y1]) \n\t" >+ "gsldrc1 %[y10], 0x04(%[src_y1]) \n\t" >+ "gsldlc1 %[y12], 0x0D(%[src_y1]) \n\t" >+ "gsldrc1 %[y12], 0x06(%[src_y1]) \n\t" >+ >+ "gsldlc1 %[y20], 0x0B(%[src_y2]) \n\t" >+ "gsldrc1 %[y20], 0x04(%[src_y2]) \n\t" >+ "gsldlc1 %[y22], 0x0D(%[src_y2]) \n\t" >+ "gsldrc1 %[y22], 0x06(%[src_y2]) \n\t" >+ >+ "punpcklbh %[y00], %[y00], %[zero] \n\t" >+ "punpcklbh %[y10], %[y10], %[zero] \n\t" >+ "punpcklbh %[y20], %[y20], %[zero] \n\t" >+ >+ "punpcklbh %[y02], %[y02], %[zero] \n\t" >+ "punpcklbh %[y12], %[y12], %[zero] \n\t" >+ "punpcklbh %[y22], %[y22], %[zero] \n\t" >+ >+ "paddh %[y00], %[y00], %[y10] \n\t" >+ "paddh %[y20], %[y20], %[y10] \n\t" >+ "paddh %[y00], %[y00], %[y20] \n\t" >+ >+ "paddh %[y02], %[y02], %[y12] \n\t" >+ "paddh %[y22], %[y22], %[y12] \n\t" >+ "paddh %[y02], %[y02], %[y22] \n\t" >+ >+ "pmaxsh %[y10], %[y00], %[y02] \n\t" >+ "pminsh %[y20], %[y00], %[y02] \n\t" >+ "psubh %[y00], %[y10], %[y20] \n\t" >+ >+ "packushb %[sobel], %[sobel], %[y00] \n\t" // clamp255 >+ "gssdrc1 %[sobel], 0(%[dst_sobelx]) \n\t" >+ "gssdlc1 %[sobel], 7(%[dst_sobelx]) \n\t" >+ >+ "daddiu %[src_y0], %[src_y0], 8 \n\t" >+ "daddiu %[src_y1], %[src_y1], 8 \n\t" >+ "daddiu %[src_y2], %[src_y2], 8 \n\t" >+ "daddiu %[dst_sobelx], %[dst_sobelx], 8 \n\t" >+ "daddiu %[width], %[width], -8 \n\t" >+ "bgtz %[width], 1b \n\t" >+ "nop \n\t" >+ : [sobel] "=&f"(sobel), [y00] "=&f"(y00), [y10] "=&f"(y10), >+ [y20] "=&f"(y20), [y02] "=&f"(y02), [y12] "=&f"(y12), [y22] "=&f"(y22) >+ : [src_y0] "r"(src_y0), [src_y1] "r"(src_y1), [src_y2] "r"(src_y2), >+ [dst_sobelx] "r"(dst_sobelx), [width] "r"(width), [zero] "f"(zero) >+ : "memory"); >+} >+ >+void SobelYRow_MMI(const uint8_t* src_y0, >+ const uint8_t* src_y1, >+ uint8_t* dst_sobely, >+ int width) { >+ uint64_t y00 = 0, y01 = 0, y02 = 0; >+ uint64_t y10 = 0, y11 = 0, y12 = 0; >+ uint64_t zero = 0x0; >+ uint64_t sobel = 0x0; >+ __asm__ volatile( >+ "1: \n\t" >+ "gsldlc1 %[y00], 0x07(%[src_y0]) \n\t" // a=src_y0[i] >+ "gsldrc1 %[y00], 0x00(%[src_y0]) \n\t" >+ "gsldlc1 %[y01], 0x08(%[src_y0]) \n\t" // b=src_y0[i+1] >+ "gsldrc1 %[y01], 0x01(%[src_y0]) \n\t" >+ "gsldlc1 %[y02], 0x09(%[src_y0]) \n\t" // c=src_y0[i+2] >+ "gsldrc1 %[y02], 0x02(%[src_y0]) \n\t" >+ >+ "gsldlc1 %[y10], 0x07(%[src_y1]) \n\t" // a_sub=src_y1[i] >+ "gsldrc1 %[y10], 0x00(%[src_y1]) \n\t" >+ "gsldlc1 %[y11], 0x08(%[src_y1]) \n\t" // b_sub=src_y1[i+1] >+ "gsldrc1 %[y11], 0x01(%[src_y1]) \n\t" >+ "gsldlc1 %[y12], 0x09(%[src_y1]) \n\t" // c_sub=src_y1[i+2] >+ "gsldrc1 %[y12], 0x02(%[src_y1]) \n\t" >+ >+ "punpcklbh %[y00], %[y00], %[zero] \n\t" >+ "punpcklbh %[y01], %[y01], %[zero] \n\t" >+ "punpcklbh %[y02], %[y02], %[zero] \n\t" >+ >+ "punpcklbh %[y10], %[y10], %[zero] \n\t" >+ "punpcklbh %[y11], %[y11], %[zero] \n\t" >+ "punpcklbh %[y12], %[y12], %[zero] \n\t" >+ >+ "paddh %[y00], %[y00], %[y01] \n\t" // a+b >+ "paddh %[y02], %[y02], %[y01] \n\t" // c+b >+ "paddh %[y00], %[y00], %[y02] \n\t" // a+2b+c >+ >+ "paddh %[y10], %[y10], %[y11] \n\t" // a_sub+b_sub >+ "paddh %[y12], %[y12], %[y11] \n\t" // c_sub+b_sub >+ "paddh %[y10], %[y10], %[y12] \n\t" // a_sub+2b_sub+c_sub >+ >+ "pmaxsh %[y02], %[y00], %[y10] \n\t" >+ "pminsh %[y12], %[y00], %[y10] \n\t" >+ "psubh %[sobel], %[y02], %[y12] \n\t" // Abs >+ >+ "gsldlc1 %[y00], 0x0B(%[src_y0]) \n\t" >+ "gsldrc1 %[y00], 0x04(%[src_y0]) \n\t" >+ "gsldlc1 %[y01], 0x0C(%[src_y0]) \n\t" >+ "gsldrc1 %[y01], 0x05(%[src_y0]) \n\t" >+ "gsldlc1 %[y02], 0x0D(%[src_y0]) \n\t" >+ "gsldrc1 %[y02], 0x06(%[src_y0]) \n\t" >+ >+ "gsldlc1 %[y10], 0x0B(%[src_y1]) \n\t" >+ "gsldrc1 %[y10], 0x04(%[src_y1]) \n\t" >+ "gsldlc1 %[y11], 0x0C(%[src_y1]) \n\t" >+ "gsldrc1 %[y11], 0x05(%[src_y1]) \n\t" >+ "gsldlc1 %[y12], 0x0D(%[src_y1]) \n\t" >+ "gsldrc1 %[y12], 0x06(%[src_y1]) \n\t" >+ >+ "punpcklbh %[y00], %[y00], %[zero] \n\t" >+ "punpcklbh %[y01], %[y01], %[zero] \n\t" >+ "punpcklbh %[y02], %[y02], %[zero] \n\t" >+ >+ "punpcklbh %[y10], %[y10], %[zero] \n\t" >+ "punpcklbh %[y11], %[y11], %[zero] \n\t" >+ "punpcklbh %[y12], %[y12], %[zero] \n\t" >+ >+ "paddh %[y00], %[y00], %[y01] \n\t" >+ "paddh %[y02], %[y02], %[y01] \n\t" >+ "paddh %[y00], %[y00], %[y02] \n\t" >+ >+ "paddh %[y10], %[y10], %[y11] \n\t" >+ "paddh %[y12], %[y12], %[y11] \n\t" >+ "paddh %[y10], %[y10], %[y12] \n\t" >+ >+ "pmaxsh %[y02], %[y00], %[y10] \n\t" >+ "pminsh %[y12], %[y00], %[y10] \n\t" >+ "psubh %[y00], %[y02], %[y12] \n\t" >+ >+ "packushb %[sobel], %[sobel], %[y00] \n\t" // clamp255 >+ "gssdrc1 %[sobel], 0(%[dst_sobely]) \n\t" >+ "gssdlc1 %[sobel], 7(%[dst_sobely]) \n\t" >+ >+ "daddiu %[src_y0], %[src_y0], 8 \n\t" >+ "daddiu %[src_y1], %[src_y1], 8 \n\t" >+ "daddiu %[dst_sobely], %[dst_sobely], 8 \n\t" >+ "daddiu %[width], %[width], -8 \n\t" >+ "bgtz %[width], 1b \n\t" >+ "nop \n\t" >+ : [sobel] "=&f"(sobel), [y00] "=&f"(y00), [y01] "=&f"(y01), >+ [y02] "=&f"(y02), [y10] "=&f"(y10), [y11] "=&f"(y11), [y12] "=&f"(y12) >+ : [src_y0] "r"(src_y0), [src_y1] "r"(src_y1), >+ [dst_sobely] "r"(dst_sobely), [width] "r"(width), [zero] "f"(zero) >+ : "memory"); >+} >+ >+void SobelRow_MMI(const uint8_t* src_sobelx, >+ const uint8_t* src_sobely, >+ uint8_t* dst_argb, >+ int width) { >+ double temp[3]; >+ uint64_t c1 = 0xff000000ff000000; >+ __asm__ volatile( >+ "1: \n\t" >+ "gsldlc1 %[t0], 0x07(%[src_sobelx]) \n\t" // a=src_sobelx[i] >+ "gsldrc1 %[t0], 0x00(%[src_sobelx]) \n\t" >+ "gsldlc1 %[t1], 0x07(%[src_sobely]) \n\t" // b=src_sobely[i] >+ "gsldrc1 %[t1], 0x00(%[src_sobely]) \n\t" >+ // s7 s6 s5 s4 s3 s2 s1 s0 = a+b >+ "paddusb %[t2] , %[t0], %[t1] \n\t" >+ >+ // s3 s2 s1 s0->s3 s3 s2 s2 s1 s1 s0 s0 >+ "punpcklbh %[t0], %[t2], %[t2] \n\t" >+ >+ // s1 s1 s0 s0->s1 s2 s1 s1 s0 s0 s0 s0 >+ "punpcklbh %[t1], %[t0], %[t0] \n\t" >+ "or %[t1], %[t1], %[c1] \n\t" >+ // 255 s1 s1 s1 s55 s0 s0 s0 >+ "gssdrc1 %[t1], 0x00(%[dst_argb]) \n\t" >+ "gssdlc1 %[t1], 0x07(%[dst_argb]) \n\t" >+ >+ // s3 s3 s2 s2->s3 s3 s3 s3 s2 s2 s2 s2 >+ "punpckhbh %[t1], %[t0], %[t0] \n\t" >+ "or %[t1], %[t1], %[c1] \n\t" >+ // 255 s3 s3 s3 255 s2 s2 s2 >+ "gssdrc1 %[t1], 0x08(%[dst_argb]) \n\t" >+ "gssdlc1 %[t1], 0x0f(%[dst_argb]) \n\t" >+ >+ // s7 s6 s5 s4->s7 s7 s6 s6 s5 s5 s4 s4 >+ "punpckhbh %[t0], %[t2], %[t2] \n\t" >+ >+ // s5 s5 s4 s4->s5 s5 s5 s5 s4 s4 s4 s4 >+ "punpcklbh %[t1], %[t0], %[t0] \n\t" >+ "or %[t1], %[t1], %[c1] \n\t" >+ "gssdrc1 %[t1], 0x10(%[dst_argb]) \n\t" >+ "gssdlc1 %[t1], 0x17(%[dst_argb]) \n\t" >+ >+ // s7 s7 s6 s6->s7 s7 s7 s7 s6 s6 s6 s6 >+ "punpckhbh %[t1], %[t0], %[t0] \n\t" >+ "or %[t1], %[t1], %[c1] \n\t" >+ "gssdrc1 %[t1], 0x18(%[dst_argb]) \n\t" >+ "gssdlc1 %[t1], 0x1f(%[dst_argb]) \n\t" >+ >+ "daddiu %[dst_argb], %[dst_argb], 32 \n\t" >+ "daddiu %[src_sobelx], %[src_sobelx], 8 \n\t" >+ "daddiu %[src_sobely], %[src_sobely], 8 \n\t" >+ "daddiu %[width], %[width], -8 \n\t" >+ "bgtz %[width], 1b \n\t" >+ "nop \n\t" >+ : [t0] "=&f"(temp[0]), [t1] "=&f"(temp[1]), [t2] "=&f"(temp[2]) >+ : [src_sobelx] "r"(src_sobelx), [src_sobely] "r"(src_sobely), >+ [dst_argb] "r"(dst_argb), [width] "r"(width), [c1] "f"(c1) >+ : "memory"); >+} >+ >+void SobelToPlaneRow_MMI(const uint8_t* src_sobelx, >+ const uint8_t* src_sobely, >+ uint8_t* dst_y, >+ int width) { >+ uint64_t tr = 0; >+ uint64_t tb = 0; >+ __asm__ volatile( >+ "1: \n\t" >+ "gsldrc1 %[tr], 0x0(%[src_sobelx]) \n\t" >+ "gsldlc1 %[tr], 0x7(%[src_sobelx]) \n\t" // r=src_sobelx[i] >+ "gsldrc1 %[tb], 0x0(%[src_sobely]) \n\t" >+ "gsldlc1 %[tb], 0x7(%[src_sobely]) \n\t" // b=src_sobely[i] >+ "paddusb %[tr], %[tr], %[tb] \n\t" // g >+ "gssdrc1 %[tr], 0x0(%[dst_y]) \n\t" >+ "gssdlc1 %[tr], 0x7(%[dst_y]) \n\t" >+ >+ "daddiu %[dst_y], %[dst_y], 8 \n\t" >+ "daddiu %[src_sobelx], %[src_sobelx], 8 \n\t" >+ "daddiu %[src_sobely], %[src_sobely], 8 \n\t" >+ "daddiu %[width], %[width], -8 \n\t" >+ "bgtz %[width], 1b \n\t" >+ "nop \n\t" >+ : [tr] "=&f"(tr), [tb] "=&f"(tb) >+ : [src_sobelx] "r"(src_sobelx), [src_sobely] "r"(src_sobely), >+ [dst_y] "r"(dst_y), [width] "r"(width) >+ : "memory"); >+} >+ >+void SobelXYRow_MMI(const uint8_t* src_sobelx, >+ const uint8_t* src_sobely, >+ uint8_t* dst_argb, >+ int width) { >+ uint64_t temp[3]; >+ uint64_t result = 0; >+ uint64_t gb = 0; >+ uint64_t cr = 0; >+ uint64_t c1 = 0xffffffffffffffff; >+ __asm__ volatile( >+ "1: \n\t" >+ "gsldlc1 %[tr], 0x07(%[src_sobelx]) \n\t" // r=src_sobelx[i] >+ "gsldrc1 %[tr], 0x00(%[src_sobelx]) \n\t" >+ "gsldlc1 %[tb], 0x07(%[src_sobely]) \n\t" // b=src_sobely[i] >+ "gsldrc1 %[tb], 0x00(%[src_sobely]) \n\t" >+ "paddusb %[tg] , %[tr], %[tb] \n\t" // g >+ >+ // g3 b3 g2 b2 g1 b1 g0 b0 >+ "punpcklbh %[gb], %[tb], %[tg] \n\t" >+ // c3 r3 r2 r2 c1 r1 c0 r0 >+ "punpcklbh %[cr], %[tr], %[c1] \n\t" >+ // c1 r1 g1 b1 c0 r0 g0 b0 >+ "punpcklhw %[result], %[gb], %[cr] \n\t" >+ "gssdrc1 %[result], 0x00(%[dst_argb]) \n\t" >+ "gssdlc1 %[result], 0x07(%[dst_argb]) \n\t" >+ // c3 r3 g3 b3 c2 r2 g2 b2 >+ "punpckhhw %[result], %[gb], %[cr] \n\t" >+ "gssdrc1 %[result], 0x08(%[dst_argb]) \n\t" >+ "gssdlc1 %[result], 0x0f(%[dst_argb]) \n\t" >+ >+ // g7 b7 g6 b6 g5 b5 g4 b4 >+ "punpckhbh %[gb], %[tb], %[tg] \n\t" >+ // c7 r7 c6 r6 c5 r5 c4 r4 >+ "punpckhbh %[cr], %[tr], %[c1] \n\t" >+ // c5 r5 g5 b5 c4 r4 g4 b4 >+ "punpcklhw %[result], %[gb], %[cr] \n\t" >+ "gssdrc1 %[result], 0x10(%[dst_argb]) \n\t" >+ "gssdlc1 %[result], 0x17(%[dst_argb]) \n\t" >+ // c7 r7 g7 b7 c6 r6 g6 b6 >+ "punpckhhw %[result], %[gb], %[cr] \n\t" >+ "gssdrc1 %[result], 0x18(%[dst_argb]) \n\t" >+ "gssdlc1 %[result], 0x1f(%[dst_argb]) \n\t" >+ >+ "daddiu %[dst_argb], %[dst_argb], 32 \n\t" >+ "daddiu %[src_sobelx], %[src_sobelx], 8 \n\t" >+ "daddiu %[src_sobely], %[src_sobely], 8 \n\t" >+ "daddiu %[width], %[width], -8 \n\t" >+ "bgtz %[width], 1b \n\t" >+ "nop \n\t" >+ : [tr] "=&f"(temp[0]), [tb] "=&f"(temp[1]), [tg] "=&f"(temp[2]), >+ [gb] "=&f"(gb), [cr] "=&f"(cr), [result] "=&f"(result) >+ : [src_sobelx] "r"(src_sobelx), [src_sobely] "r"(src_sobely), >+ [dst_argb] "r"(dst_argb), [width] "r"(width), [c1] "f"(c1) >+ : "memory"); >+} >+ >+void J400ToARGBRow_MMI(const uint8_t* src_y, uint8_t* dst_argb, int width) { >+ // Copy a Y to RGB. >+ uint64_t src, dest; >+ const uint64_t mask0 = 0x00ffffff00ffffffULL; >+ const uint64_t mask1 = ~mask0; >+ >+ __asm__ volatile( >+ "1: \n\t" >+ "gslwlc1 %[src], 0x03(%[src_ptr]) \n\t" >+ "gslwrc1 %[src], 0x00(%[src_ptr]) \n\t" >+ "punpcklbh %[src], %[src], %[src] \n\t" >+ "punpcklhw %[dest], %[src], %[src] \n\t" >+ "and %[dest], %[dest], %[mask0] \n\t" >+ "or %[dest], %[dest], %[mask1] \n\t" >+ "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t" >+ "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t" >+ >+ "punpckhhw %[dest], %[src], %[src] \n\t" >+ "and %[dest], %[dest], %[mask0] \n\t" >+ "or %[dest], %[dest], %[mask1] \n\t" >+ "gssdrc1 %[dest], 0x08(%[dst_ptr]) \n\t" >+ "gssdlc1 %[dest], 0x0f(%[dst_ptr]) \n\t" >+ >+ "daddiu %[src_ptr], %[src_ptr], 0x04 \n\t" >+ "daddiu %[dst_ptr], %[dst_ptr], 0x10 \n\t" >+ "daddi %[width], %[width], -0x04 \n\t" >+ "bnez %[width], 1b \n\t" >+ : [src] "=&f"(src), [dest] "=&f"(dest) >+ : [src_ptr] "r"(src_y), [dst_ptr] "r"(dst_argb), [mask0] "f"(mask0), >+ [mask1] "f"(mask1), [width] "r"(width) >+ : "memory"); >+} >+ >+void I400ToARGBRow_MMI(const uint8_t* src_y, uint8_t* rgb_buf, int width) { >+ uint64_t src, src_lo, src_hi, dest, dest_lo, dest_hi; >+ const uint64_t mask0 = 0x0; >+ const uint64_t mask1 = 0x55; >+ const uint64_t mask2 = 0xAA; >+ const uint64_t mask3 = 0xFF; >+ const uint64_t mask4 = 0x4A354A354A354A35ULL; >+ const uint64_t mask5 = 0x0488048804880488ULL; >+ const uint64_t shift0 = 0x08; >+ const uint64_t shift1 = 0x06; >+ >+ __asm__ volatile( >+ "1: \n\t" >+ "gsldlc1 %[src], 0x07(%[src_ptr]) \n\t" >+ "gsldrc1 %[src], 0x00(%[src_ptr]) \n\t" >+ "punpcklbh %[src_lo], %[src], %[mask0] \n\t" >+ "punpckhbh %[src_hi], %[src], %[mask0] \n\t" >+ >+ "pshufh %[src], %[src_lo], %[mask0] \n\t" >+ "psllh %[dest_lo], %[src], %[shift0] \n\t" >+ "paddush %[dest_lo], %[dest_lo], %[src] \n\t" >+ "pmulhuh %[dest_lo], %[dest_lo], %[mask4] \n\t" >+ "psubh %[dest_lo], %[dest_lo], %[mask5] \n\t" >+ "psrah %[dest_lo], %[dest_lo], %[shift1] \n\t" >+ "pinsrh_3 %[dest_lo], %[dest_lo], %[mask3] \n\t" >+ "pshufh %[src], %[src_lo], %[mask1] \n\t" >+ "psllh %[dest_hi], %[src], %[shift0] \n\t" >+ "paddush %[dest_hi], %[dest_hi], %[src] \n\t" >+ "pmulhuh %[dest_hi], %[dest_hi], %[mask4] \n\t" >+ "psubh %[dest_hi], %[dest_hi], %[mask5] \n\t" >+ "psrah %[dest_hi], %[dest_hi], %[shift1] \n\t" >+ "pinsrh_3 %[dest_hi], %[dest_hi], %[mask3] \n\t" >+ "packushb %[dest], %[dest_lo], %[dest_hi] \n\t" >+ "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t" >+ "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t" >+ >+ "pshufh %[src], %[src_lo], %[mask2] \n\t" >+ "psllh %[dest_lo], %[src], %[shift0] \n\t" >+ "paddush %[dest_lo], %[dest_lo], %[src] \n\t" >+ "pmulhuh %[dest_lo], %[dest_lo], %[mask4] \n\t" >+ "psubh %[dest_lo], %[dest_lo], %[mask5] \n\t" >+ "psrah %[dest_lo], %[dest_lo], %[shift1] \n\t" >+ "pinsrh_3 %[dest_lo], %[dest_lo], %[mask3] \n\t" >+ "pshufh %[src], %[src_lo], %[mask3] \n\t" >+ "psllh %[dest_hi], %[src], %[shift0] \n\t" >+ "paddush %[dest_hi], %[dest_hi], %[src] \n\t" >+ "pmulhuh %[dest_hi], %[dest_hi], %[mask4] \n\t" >+ "psubh %[dest_hi], %[dest_hi], %[mask5] \n\t" >+ "psrah %[dest_hi], %[dest_hi], %[shift1] \n\t" >+ "pinsrh_3 %[dest_hi], %[dest_hi], %[mask3] \n\t" >+ "packushb %[dest], %[dest_lo], %[dest_hi] \n\t" >+ "gssdlc1 %[dest], 0x0f(%[dst_ptr]) \n\t" >+ "gssdrc1 %[dest], 0x08(%[dst_ptr]) \n\t" >+ >+ "pshufh %[src], %[src_hi], %[mask0] \n\t" >+ "psllh %[dest_lo], %[src], %[shift0] \n\t" >+ "paddush %[dest_lo], %[dest_lo], %[src] \n\t" >+ "pmulhuh %[dest_lo], %[dest_lo], %[mask4] \n\t" >+ "psubh %[dest_lo], %[dest_lo], %[mask5] \n\t" >+ "psrah %[dest_lo], %[dest_lo], %[shift1] \n\t" >+ "pinsrh_3 %[dest_lo], %[dest_lo], %[mask3] \n\t" >+ "pshufh %[src], %[src_hi], %[mask1] \n\t" >+ "psllh %[dest_hi], %[src], %[shift0] \n\t" >+ "paddush %[dest_hi], %[dest_hi], %[src] \n\t" >+ "pmulhuh %[dest_hi], %[dest_hi], %[mask4] \n\t" >+ "psubh %[dest_hi], %[dest_hi], %[mask5] \n\t" >+ "psrah %[dest_hi], %[dest_hi], %[shift1] \n\t" >+ "pinsrh_3 %[dest_hi], %[dest_hi], %[mask3] \n\t" >+ "packushb %[dest], %[dest_lo], %[dest_hi] \n\t" >+ "gssdlc1 %[dest], 0x17(%[dst_ptr]) \n\t" >+ "gssdrc1 %[dest], 0x10(%[dst_ptr]) \n\t" >+ >+ "pshufh %[src], %[src_hi], %[mask2] \n\t" >+ "psllh %[dest_lo], %[src], %[shift0] \n\t" >+ "paddush %[dest_lo], %[dest_lo], %[src] \n\t" >+ "pmulhuh %[dest_lo], %[dest_lo], %[mask4] \n\t" >+ "psubh %[dest_lo], %[dest_lo], %[mask5] \n\t" >+ "psrah %[dest_lo], %[dest_lo], %[shift1] \n\t" >+ "pinsrh_3 %[dest_lo], %[dest_lo], %[mask3] \n\t" >+ "pshufh %[src], %[src_hi], %[mask3] \n\t" >+ "psllh %[dest_hi], %[src], %[shift0] \n\t" >+ "paddush %[dest_hi], %[dest_hi], %[src] \n\t" >+ "pmulhuh %[dest_hi], %[dest_hi], %[mask4] \n\t" >+ "psubh %[dest_hi], %[dest_hi], %[mask5] \n\t" >+ "psrah %[dest_hi], %[dest_hi], %[shift1] \n\t" >+ "pinsrh_3 %[dest_hi], %[dest_hi], %[mask3] \n\t" >+ "packushb %[dest], %[dest_lo], %[dest_hi] \n\t" >+ "gssdlc1 %[dest], 0x1f(%[dst_ptr]) \n\t" >+ "gssdrc1 %[dest], 0x18(%[dst_ptr]) \n\t" >+ >+ "daddi %[src_ptr], %[src_ptr], 0x08 \n\t" >+ "daddiu %[dst_ptr], %[dst_ptr], 0x20 \n\t" >+ "daddi %[width], %[width], -0x08 \n\t" >+ "bnez %[width], 1b \n\t" >+ : [src] "=&f"(src), [dest] "=&f"(dest), [src_hi] "=&f"(src_hi), >+ [src_lo] "=&f"(src_lo), [dest_hi] "=&f"(dest_hi), >+ [dest_lo] "=&f"(dest_lo) >+ : [src_ptr] "r"(src_y), [dst_ptr] "r"(rgb_buf), [mask0] "f"(mask0), >+ [mask1] "f"(mask1), [mask2] "f"(mask2), [mask3] "f"(mask3), >+ [mask4] "f"(mask4), [mask5] "f"(mask5), [shift0] "f"(shift0), >+ [shift1] "f"(shift1), [width] "r"(width) >+ : "memory"); >+} >+ >+void MirrorRow_MMI(const uint8_t* src, uint8_t* dst, int width) { >+ uint64_t source, src0, src1, dest; >+ const uint64_t mask0 = 0x0; >+ const uint64_t mask1 = 0x1b; >+ >+ src += width - 1; >+ __asm__ volatile( >+ "1: \n\t" >+ "gsldlc1 %[source], 0(%[src_ptr]) \n\t" >+ "gsldrc1 %[source], -7(%[src_ptr]) \n\t" >+ "punpcklbh %[src0], %[source], %[mask0] \n\t" >+ "pshufh %[src0], %[src0], %[mask1] \n\t" >+ "punpckhbh %[src1], %[source], %[mask0] \n\t" >+ "pshufh %[src1], %[src1], %[mask1] \n\t" >+ "packushb %[dest], %[src1], %[src0] \n\t" >+ >+ "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t" >+ "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t" >+ >+ "daddi %[src_ptr], %[src_ptr], -0x08 \n\t" >+ "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t" >+ "daddi %[width], %[width], -0x08 \n\t" >+ "bnez %[width], 1b \n\t" >+ : [source] "=&f"(source), [dest] "=&f"(dest), [src0] "=&f"(src0), >+ [src1] "=&f"(src1) >+ : [src_ptr] "r"(src), [dst_ptr] "r"(dst), [mask0] "f"(mask0), >+ [mask1] "f"(mask1), [width] "r"(width) >+ : "memory"); >+} >+ >+void MirrorUVRow_MMI(const uint8_t* src_uv, >+ uint8_t* dst_u, >+ uint8_t* dst_v, >+ int width) { >+ uint64_t src0, src1, dest0, dest1; >+ const uint64_t mask0 = 0x00ff00ff00ff00ffULL; >+ const uint64_t mask1 = 0x1b; >+ const uint64_t shift = 0x08; >+ >+ src_uv += (width - 1) << 1; >+ >+ __asm__ volatile( >+ "1: \n\t" >+ "gsldlc1 %[src0], 1(%[src_ptr]) \n\t" >+ "gsldrc1 %[src0], -6(%[src_ptr]) \n\t" >+ "gsldlc1 %[src1], -7(%[src_ptr]) \n\t" >+ "gsldrc1 %[src1], -14(%[src_ptr]) \n\t" >+ >+ "and %[dest0], %[src0], %[mask0] \n\t" >+ "pshufh %[dest0], %[dest0], %[mask1] \n\t" >+ "and %[dest1], %[src1], %[mask0] \n\t" >+ "pshufh %[dest1], %[dest1], %[mask1] \n\t" >+ "packushb %[dest0], %[dest0], %[dest1] \n\t" >+ "gssdlc1 %[dest0], 0x07(%[dstu_ptr]) \n\t" >+ "gssdrc1 %[dest0], 0x00(%[dstu_ptr]) \n\t" >+ >+ "psrlh %[dest0], %[src0], %[shift] \n\t" >+ "pshufh %[dest0], %[dest0], %[mask1] \n\t" >+ "psrlh %[dest1], %[src1], %[shift] \n\t" >+ "pshufh %[dest1], %[dest1], %[mask1] \n\t" >+ "packushb %[dest0], %[dest0], %[dest1] \n\t" >+ "gssdlc1 %[dest0], 0x07(%[dstv_ptr]) \n\t" >+ "gssdrc1 %[dest0], 0x00(%[dstv_ptr]) \n\t" >+ >+ "daddi %[src_ptr], %[src_ptr], -0x10 \n\t" >+ "daddiu %[dstu_ptr], %[dstu_ptr], 0x08 \n\t" >+ "daddiu %[dstv_ptr], %[dstv_ptr], 0x08 \n\t" >+ "daddi %[width], %[width], -0x08 \n\t" >+ "bnez %[width], 1b \n\t" >+ : [dest0] "=&f"(dest0), [dest1] "=&f"(dest1), [src0] "=&f"(src0), >+ [src1] "=&f"(src1) >+ : [src_ptr] "r"(src_uv), [dstu_ptr] "r"(dst_u), [dstv_ptr] "r"(dst_v), >+ [width] "r"(width), [mask0] "f"(mask0), [mask1] "f"(mask1), >+ [shift] "f"(shift) >+ : "memory"); >+} >+ >+void ARGBMirrorRow_MMI(const uint8_t* src, uint8_t* dst, int width) { >+ src += (width - 1) * 4; >+ uint64_t temp = 0x0; >+ uint64_t shuff = 0x4e; // 01 00 11 10 >+ __asm__ volatile( >+ "1: \n\t" >+ "gsldlc1 %[temp], 3(%[src]) \n\t" >+ "gsldrc1 %[temp], -4(%[src]) \n\t" >+ "pshufh %[temp], %[temp], %[shuff] \n\t" >+ "gssdrc1 %[temp], 0x0(%[dst]) \n\t" >+ "gssdlc1 %[temp], 0x7(%[dst]) \n\t" >+ >+ "daddiu %[src], %[src], -0x08 \n\t" >+ "daddiu %[dst], %[dst], 0x08 \n\t" >+ "daddiu %[width], %[width], -0x02 \n\t" >+ "bnez %[width], 1b \n\t" >+ : [temp] "=&f"(temp) >+ : [src] "r"(src), [dst] "r"(dst), [width] "r"(width), [shuff] "f"(shuff) >+ : "memory"); >+} >+ >+void SplitUVRow_MMI(const uint8_t* src_uv, >+ uint8_t* dst_u, >+ uint8_t* dst_v, >+ int width) { >+ uint64_t c0 = 0x00ff00ff00ff00ff; >+ uint64_t temp[4]; >+ uint64_t shift = 0x08; >+ __asm__ volatile( >+ "1: \n\t" >+ "gsldrc1 %[t0], 0x00(%[src_uv]) \n\t" >+ "gsldlc1 %[t0], 0x07(%[src_uv]) \n\t" >+ "gsldrc1 %[t1], 0x08(%[src_uv]) \n\t" >+ "gsldlc1 %[t1], 0x0f(%[src_uv]) \n\t" >+ >+ "and %[t2], %[t0], %[c0] \n\t" >+ "and %[t3], %[t1], %[c0] \n\t" >+ "packushb %[t2], %[t2], %[t3] \n\t" >+ "gssdrc1 %[t2], 0x0(%[dst_u]) \n\t" >+ "gssdlc1 %[t2], 0x7(%[dst_u]) \n\t" >+ >+ "psrlh %[t2], %[t0], %[shift] \n\t" >+ "psrlh %[t3], %[t1], %[shift] \n\t" >+ "packushb %[t2], %[t2], %[t3] \n\t" >+ "gssdrc1 %[t2], 0x0(%[dst_v]) \n\t" >+ "gssdlc1 %[t2], 0x7(%[dst_v]) \n\t" >+ >+ "daddiu %[src_uv], %[src_uv], 16 \n\t" >+ "daddiu %[dst_u], %[dst_u], 8 \n\t" >+ "daddiu %[dst_v], %[dst_v], 8 \n\t" >+ "daddiu %[width], %[width], -8 \n\t" >+ "bgtz %[width], 1b \n\t" >+ "nop \n\t" >+ : [t0] "=&f"(temp[0]), [t1] "=&f"(temp[1]), [t2] "=&f"(temp[2]), >+ [t3] "=&f"(temp[3]) >+ : [src_uv] "r"(src_uv), [dst_u] "r"(dst_u), [dst_v] "r"(dst_v), >+ [width] "r"(width), [c0] "f"(c0), [shift] "f"(shift) >+ : "memory"); >+} >+ >+void MergeUVRow_MMI(const uint8_t* src_u, >+ const uint8_t* src_v, >+ uint8_t* dst_uv, >+ int width) { >+ uint64_t temp[3]; >+ __asm__ volatile( >+ "1: \n\t" >+ "gsldrc1 %[t0], 0x0(%[src_u]) \n\t" >+ "gsldlc1 %[t0], 0x7(%[src_u]) \n\t" >+ "gsldrc1 %[t1], 0x0(%[src_v]) \n\t" >+ "gsldlc1 %[t1], 0x7(%[src_v]) \n\t" >+ "punpcklbh %[t2], %[t0], %[t1] \n\t" >+ "gssdrc1 %[t2], 0x0(%[dst_uv]) \n\t" >+ "gssdlc1 %[t2], 0x7(%[dst_uv]) \n\t" >+ "punpckhbh %[t2], %[t0], %[t1] \n\t" >+ "gssdrc1 %[t2], 0x8(%[dst_uv]) \n\t" >+ "gssdlc1 %[t2], 0xf(%[dst_uv]) \n\t" >+ >+ "daddiu %[src_u], %[src_u], 8 \n\t" >+ "daddiu %[src_v], %[src_v], 8 \n\t" >+ "daddiu %[dst_uv], %[dst_uv], 16 \n\t" >+ "daddiu %[width], %[width], -8 \n\t" >+ "bgtz %[width], 1b \n\t" >+ "nop \n\t" >+ : [t0] "=&f"(temp[0]), [t1] "=&f"(temp[1]), [t2] "=&f"(temp[2]) >+ : [dst_uv] "r"(dst_uv), [src_u] "r"(src_u), [src_v] "r"(src_v), >+ [width] "r"(width) >+ : "memory"); >+} >+ >+void SplitRGBRow_MMI(const uint8_t* src_rgb, >+ uint8_t* dst_r, >+ uint8_t* dst_g, >+ uint8_t* dst_b, >+ int width) { >+ uint64_t src[4]; >+ uint64_t dest_hi, dest_lo, dest; >+ >+ __asm__ volatile( >+ "1: \n\t" >+ "gslwlc1 %[src0], 0x03(%[src_ptr]) \n\t" >+ "gslwrc1 %[src0], 0x00(%[src_ptr]) \n\t" >+ "gslwlc1 %[src1], 0x06(%[src_ptr]) \n\t" >+ "gslwrc1 %[src1], 0x03(%[src_ptr]) \n\t" >+ "punpcklbh %[dest_lo], %[src0], %[src1] \n\t" >+ "gslwlc1 %[src2], 0x09(%[src_ptr]) \n\t" >+ "gslwrc1 %[src2], 0x06(%[src_ptr]) \n\t" >+ "gslwlc1 %[src3], 0x0c(%[src_ptr]) \n\t" >+ "gslwrc1 %[src3], 0x09(%[src_ptr]) \n\t" >+ "punpcklbh %[dest_hi], %[src2], %[src3] \n\t" >+ >+ "punpcklhw %[dest], %[dest_lo], %[dest_hi] \n\t" >+ "gsswlc1 %[dest], 0x03(%[dstr_ptr]) \n\t" >+ "gsswrc1 %[dest], 0x00(%[dstr_ptr]) \n\t" >+ "punpckhwd %[dest], %[dest], %[dest] \n\t" >+ "gsswlc1 %[dest], 0x03(%[dstg_ptr]) \n\t" >+ "gsswrc1 %[dest], 0x00(%[dstg_ptr]) \n\t" >+ "punpckhhw %[dest], %[dest_lo], %[dest_hi] \n\t" >+ "gsswlc1 %[dest], 0x03(%[dstb_ptr]) \n\t" >+ "gsswrc1 %[dest], 0x00(%[dstb_ptr]) \n\t" >+ >+ "daddiu %[src_ptr], %[src_ptr], 0x0c \n\t" >+ "daddiu %[dstr_ptr], %[dstr_ptr], 0x04 \n\t" >+ "daddiu %[dstg_ptr], %[dstg_ptr], 0x04 \n\t" >+ "daddiu %[dstb_ptr], %[dstb_ptr], 0x04 \n\t" >+ "daddi %[width], %[width], -0x04 \n\t" >+ "bnez %[width], 1b \n\t" >+ : [src0] "=&f"(src[0]), [src1] "=&f"(src[1]), [src2] "=&f"(src[2]), >+ [src3] "=&f"(src[3]), [dest_hi] "=&f"(dest_hi), >+ [dest_lo] "=&f"(dest_lo), [dest] "=&f"(dest) >+ : [src_ptr] "r"(src_rgb), [dstr_ptr] "r"(dst_r), [dstg_ptr] "r"(dst_g), >+ [dstb_ptr] "r"(dst_b), [width] "r"(width) >+ : "memory"); >+} >+ >+void MergeRGBRow_MMI(const uint8_t* src_r, >+ const uint8_t* src_g, >+ const uint8_t* src_b, >+ uint8_t* dst_rgb, >+ int width) { >+ uint64_t srcr, srcg, srcb, dest; >+ uint64_t srcrg_hi, srcrg_lo, srcbz_hi, srcbz_lo; >+ const uint64_t temp = 0x0; >+ >+ __asm__ volatile( >+ "1: \n\t" >+ "gsldlc1 %[srcr], 0x07(%[srcr_ptr]) \n\t" >+ "gsldrc1 %[srcr], 0x00(%[srcr_ptr]) \n\t" >+ "gsldlc1 %[srcg], 0x07(%[srcg_ptr]) \n\t" >+ "gsldrc1 %[srcg], 0x00(%[srcg_ptr]) \n\t" >+ "punpcklbh %[srcrg_lo], %[srcr], %[srcg] \n\t" >+ "punpckhbh %[srcrg_hi], %[srcr], %[srcg] \n\t" >+ >+ "gsldlc1 %[srcb], 0x07(%[srcb_ptr]) \n\t" >+ "gsldrc1 %[srcb], 0x00(%[srcb_ptr]) \n\t" >+ "punpcklbh %[srcbz_lo], %[srcb], %[temp] \n\t" >+ "punpckhbh %[srcbz_hi], %[srcb], %[temp] \n\t" >+ >+ "punpcklhw %[dest], %[srcrg_lo], %[srcbz_lo] \n\t" >+ "gsswlc1 %[dest], 0x03(%[dst_ptr]) \n\t" >+ "gsswrc1 %[dest], 0x00(%[dst_ptr]) \n\t" >+ "punpckhwd %[dest], %[dest], %[dest] \n\t" >+ "gsswlc1 %[dest], 0x06(%[dst_ptr]) \n\t" >+ "gsswrc1 %[dest], 0x03(%[dst_ptr]) \n\t" >+ "punpckhhw %[dest], %[srcrg_lo], %[srcbz_lo] \n\t" >+ "gsswlc1 %[dest], 0x09(%[dst_ptr]) \n\t" >+ "gsswrc1 %[dest], 0x06(%[dst_ptr]) \n\t" >+ "punpckhwd %[dest], %[dest], %[dest] \n\t" >+ "gsswlc1 %[dest], 0x0c(%[dst_ptr]) \n\t" >+ "gsswrc1 %[dest], 0x09(%[dst_ptr]) \n\t" >+ "punpcklhw %[dest], %[srcrg_hi], %[srcbz_hi] \n\t" >+ "gsswlc1 %[dest], 0x0f(%[dst_ptr]) \n\t" >+ "gsswrc1 %[dest], 0x0c(%[dst_ptr]) \n\t" >+ "punpckhwd %[dest], %[dest], %[dest] \n\t" >+ "gsswlc1 %[dest], 0x12(%[dst_ptr]) \n\t" >+ "gsswrc1 %[dest], 0x0f(%[dst_ptr]) \n\t" >+ "punpckhhw %[dest], %[srcrg_hi], %[srcbz_hi] \n\t" >+ "gsswlc1 %[dest], 0x15(%[dst_ptr]) \n\t" >+ "gsswrc1 %[dest], 0x12(%[dst_ptr]) \n\t" >+ "punpckhwd %[dest], %[dest], %[dest] \n\t" >+ "gsswlc1 %[dest], 0x18(%[dst_ptr]) \n\t" >+ "gsswrc1 %[dest], 0x15(%[dst_ptr]) \n\t" >+ >+ "daddiu %[srcr_ptr], %[srcr_ptr], 0x08 \n\t" >+ "daddiu %[srcg_ptr], %[srcg_ptr], 0x08 \n\t" >+ "daddiu %[srcb_ptr], %[srcb_ptr], 0x08 \n\t" >+ "daddiu %[dst_ptr], %[dst_ptr], 0x18 \n\t" >+ "daddi %[width], %[width], -0x08 \n\t" >+ "bnez %[width], 1b \n\t" >+ : [srcr] "=&f"(srcr), [srcg] "=&f"(srcg), [srcb] "=&f"(srcb), >+ [dest] "=&f"(dest), [srcrg_hi] "=&f"(srcrg_hi), >+ [srcrg_lo] "=&f"(srcrg_lo), [srcbz_hi] "=&f"(srcbz_hi), >+ [srcbz_lo] "=&f"(srcbz_lo) >+ : [srcr_ptr] "r"(src_r), [srcg_ptr] "r"(src_g), [srcb_ptr] "r"(src_b), >+ [dst_ptr] "r"(dst_rgb), [width] "r"(width), [temp] "f"(temp) >+ : "memory"); >+} >+ >+// Filter 2 rows of YUY2 UV's (422) into U and V (420). >+void YUY2ToUVRow_MMI(const uint8_t* src_yuy2, >+ int src_stride_yuy2, >+ uint8_t* dst_u, >+ uint8_t* dst_v, >+ int width) { >+ uint64_t c0 = 0xff00ff00ff00ff00; >+ uint64_t c1 = 0x00ff00ff00ff00ff; >+ uint64_t temp[3]; >+ uint64_t data[4]; >+ uint64_t shift = 0x08; >+ uint64_t src_stride = 0x0; >+ __asm__ volatile( >+ "1: \n\t" >+ "gsldrc1 %[t0], 0x00(%[src_yuy2]) \n\t" >+ "gsldlc1 %[t0], 0x07(%[src_yuy2]) \n\t" >+ "daddu %[src_stride], %[src_yuy2], %[src_stride_yuy2] \n\t" >+ "gsldrc1 %[t1], 0x00(%[src_stride]) \n\t" >+ "gsldlc1 %[t1], 0x07(%[src_stride]) \n\t" >+ "pavgb %[t0], %[t0], %[t1] \n\t" >+ >+ "gsldrc1 %[t2], 0x08(%[src_yuy2]) \n\t" >+ "gsldlc1 %[t2], 0x0f(%[src_yuy2]) \n\t" >+ "gsldrc1 %[t1], 0x08(%[src_stride]) \n\t" >+ "gsldlc1 %[t1], 0x0f(%[src_stride]) \n\t" >+ "pavgb %[t1], %[t2], %[t1] \n\t" >+ >+ "and %[t0], %[t0], %[c0] \n\t" >+ "and %[t1], %[t1], %[c0] \n\t" >+ "psrlh %[t0], %[t0], %[shift] \n\t" >+ "psrlh %[t1], %[t1], %[shift] \n\t" >+ "packushb %[t0], %[t0], %[t1] \n\t" >+ "mov.s %[t1], %[t0] \n\t" >+ "and %[d0], %[t0], %[c1] \n\t" >+ "psrlh %[d1], %[t1], %[shift] \n\t" >+ >+ "gsldrc1 %[t0], 0x10(%[src_yuy2]) \n\t" >+ "gsldlc1 %[t0], 0x17(%[src_yuy2]) \n\t" >+ "gsldrc1 %[t1], 0x10(%[src_stride]) \n\t" >+ "gsldlc1 %[t1], 0x17(%[src_stride]) \n\t" >+ "pavgb %[t0], %[t0], %[t1] \n\t" >+ >+ "gsldrc1 %[t2], 0x18(%[src_yuy2]) \n\t" >+ "gsldlc1 %[t2], 0x1f(%[src_yuy2]) \n\t" >+ "gsldrc1 %[t1], 0x18(%[src_stride]) \n\t" >+ "gsldlc1 %[t1], 0x1f(%[src_stride]) \n\t" >+ "pavgb %[t1], %[t2], %[t1] \n\t" >+ >+ "and %[t0], %[t0], %[c0] \n\t" >+ "and %[t1], %[t1], %[c0] \n\t" >+ "psrlh %[t0], %[t0], %[shift] \n\t" >+ "psrlh %[t1], %[t1], %[shift] \n\t" >+ "packushb %[t0], %[t0], %[t1] \n\t" >+ "mov.s %[t1], %[t0] \n\t" >+ "and %[d2], %[t0], %[c1] \n\t" >+ "psrlh %[d3], %[t1], %[shift] \n\t" >+ >+ "packushb %[d0], %[d0], %[d2] \n\t" >+ "packushb %[d1], %[d1], %[d3] \n\t" >+ "gssdrc1 %[d0], 0x0(%[dst_u]) \n\t" >+ "gssdlc1 %[d0], 0x7(%[dst_u]) \n\t" >+ "gssdrc1 %[d1], 0x0(%[dst_v]) \n\t" >+ "gssdlc1 %[d1], 0x7(%[dst_v]) \n\t" >+ "daddiu %[src_yuy2], %[src_yuy2], 32 \n\t" >+ "daddiu %[dst_u], %[dst_u], 8 \n\t" >+ "daddiu %[dst_v], %[dst_v], 8 \n\t" >+ "daddiu %[width], %[width], -16 \n\t" >+ "bgtz %[width], 1b \n\t" >+ "nop \n\t" >+ : [t0] "=&f"(temp[0]), [t1] "=&f"(temp[1]), [t2] "=&f"(temp[2]), >+ [d0] "=&f"(data[0]), [d1] "=&f"(data[1]), [d2] "=&f"(data[2]), >+ [d3] "=&f"(data[3]), [src_stride] "=&r"(src_stride) >+ : [src_yuy2] "r"(src_yuy2), [src_stride_yuy2] "r"(src_stride_yuy2), >+ [dst_u] "r"(dst_u), [dst_v] "r"(dst_v), [width] "r"(width), >+ [c0] "f"(c0), [c1] "f"(c1), [shift] "f"(shift) >+ : "memory"); >+} >+ >+// Copy row of YUY2 UV's (422) into U and V (422). >+void YUY2ToUV422Row_MMI(const uint8_t* src_yuy2, >+ uint8_t* dst_u, >+ uint8_t* dst_v, >+ int width) { >+ uint64_t c0 = 0xff00ff00ff00ff00; >+ uint64_t c1 = 0x00ff00ff00ff00ff; >+ uint64_t temp[2]; >+ uint64_t data[4]; >+ uint64_t shift = 0x08; >+ __asm__ volatile( >+ "1: \n\t" >+ "gsldrc1 %[t0], 0x00(%[src_yuy2]) \n\t" >+ "gsldlc1 %[t0], 0x07(%[src_yuy2]) \n\t" >+ "gsldrc1 %[t1], 0x08(%[src_yuy2]) \n\t" >+ "gsldlc1 %[t1], 0x0f(%[src_yuy2]) \n\t" >+ "and %[t0], %[t0], %[c0] \n\t" >+ "and %[t1], %[t1], %[c0] \n\t" >+ "psrlh %[t0], %[t0], %[shift] \n\t" >+ "psrlh %[t1], %[t1], %[shift] \n\t" >+ "packushb %[t0], %[t0], %[t1] \n\t" >+ "mov.s %[t1], %[t0] \n\t" >+ "and %[d0], %[t0], %[c1] \n\t" >+ "psrlh %[d1], %[t1], %[shift] \n\t" >+ >+ "gsldrc1 %[t0], 0x10(%[src_yuy2]) \n\t" >+ "gsldlc1 %[t0], 0x17(%[src_yuy2]) \n\t" >+ "gsldrc1 %[t1], 0x18(%[src_yuy2]) \n\t" >+ "gsldlc1 %[t1], 0x1f(%[src_yuy2]) \n\t" >+ "and %[t0], %[t0], %[c0] \n\t" >+ "and %[t1], %[t1], %[c0] \n\t" >+ "psrlh %[t0], %[t0], %[shift] \n\t" >+ "psrlh %[t1], %[t1], %[shift] \n\t" >+ "packushb %[t0], %[t0], %[t1] \n\t" >+ "mov.s %[t1], %[t0] \n\t" >+ "and %[d2], %[t0], %[c1] \n\t" >+ "psrlh %[d3], %[t1], %[shift] \n\t" >+ >+ "packushb %[d0], %[d0], %[d2] \n\t" >+ "packushb %[d1], %[d1], %[d3] \n\t" >+ "gssdrc1 %[d0], 0x0(%[dst_u]) \n\t" >+ "gssdlc1 %[d0], 0x7(%[dst_u]) \n\t" >+ "gssdrc1 %[d1], 0x0(%[dst_v]) \n\t" >+ "gssdlc1 %[d1], 0x7(%[dst_v]) \n\t" >+ "daddiu %[src_yuy2], %[src_yuy2], 32 \n\t" >+ "daddiu %[dst_u], %[dst_u], 8 \n\t" >+ "daddiu %[dst_v], %[dst_v], 8 \n\t" >+ "daddiu %[width], %[width], -16 \n\t" >+ "bgtz %[width], 1b \n\t" >+ "nop \n\t" >+ : [t0] "=&f"(temp[0]), [t1] "=&f"(temp[1]), [d0] "=&f"(data[0]), >+ [d1] "=&f"(data[1]), [d2] "=&f"(data[2]), [d3] "=&f"(data[3]) >+ : [src_yuy2] "r"(src_yuy2), [dst_u] "r"(dst_u), [dst_v] "r"(dst_v), >+ [width] "r"(width), [c0] "f"(c0), [c1] "f"(c1), [shift] "f"(shift) >+ : "memory"); >+} >+ >+// Copy row of YUY2 Y's (422) into Y (420/422). >+void YUY2ToYRow_MMI(const uint8_t* src_yuy2, uint8_t* dst_y, int width) { >+ uint64_t c0 = 0x00ff00ff00ff00ff; >+ uint64_t temp[2]; >+ __asm__ volatile( >+ "1: \n\t" >+ "gsldrc1 %[t0], 0x00(%[src_yuy2]) \n\t" >+ "gsldlc1 %[t0], 0x07(%[src_yuy2]) \n\t" >+ "gsldrc1 %[t1], 0x08(%[src_yuy2]) \n\t" >+ "gsldlc1 %[t1], 0x0f(%[src_yuy2]) \n\t" >+ "and %[t0], %[t0], %[c0] \n\t" >+ "and %[t1], %[t1], %[c0] \n\t" >+ "packushb %[t0], %[t0], %[t1] \n\t" >+ "gssdrc1 %[t0], 0x0(%[dst_y]) \n\t" >+ "gssdlc1 %[t0], 0x7(%[dst_y]) \n\t" >+ "daddiu %[src_yuy2], %[src_yuy2], 16 \n\t" >+ "daddiu %[dst_y], %[dst_y], 8 \n\t" >+ "daddiu %[width], %[width], -8 \n\t" >+ "bgtz %[width], 1b \n\t" >+ "nop \n\t" >+ : [t0] "=&f"(temp[0]), [t1] "=&f"(temp[1]) >+ : [src_yuy2] "r"(src_yuy2), [dst_y] "r"(dst_y), [width] "r"(width), >+ [c0] "f"(c0) >+ : "memory"); >+} >+ >+// Filter 2 rows of UYVY UV's (422) into U and V (420). >+void UYVYToUVRow_MMI(const uint8_t* src_uyvy, >+ int src_stride_uyvy, >+ uint8_t* dst_u, >+ uint8_t* dst_v, >+ int width) { >+ // Output a row of UV values. >+ uint64_t c0 = 0x00ff00ff00ff00ff; >+ uint64_t temp[3]; >+ uint64_t data[4]; >+ uint64_t shift = 0x08; >+ uint64_t src_stride = 0x0; >+ __asm__ volatile( >+ "1: \n\t" >+ "gsldrc1 %[t0], 0x00(%[src_uyvy]) \n\t" >+ "gsldlc1 %[t0], 0x07(%[src_uyvy]) \n\t" >+ "daddu %[src_stride], %[src_uyvy], %[src_stride_uyvy] \n\t" >+ "gsldrc1 %[t1], 0x00(%[src_stride]) \n\t" >+ "gsldlc1 %[t1], 0x07(%[src_stride]) \n\t" >+ "pavgb %[t0], %[t0], %[t1] \n\t" >+ >+ "gsldrc1 %[t2], 0x08(%[src_uyvy]) \n\t" >+ "gsldlc1 %[t2], 0x0f(%[src_uyvy]) \n\t" >+ "gsldrc1 %[t1], 0x08(%[src_stride]) \n\t" >+ "gsldlc1 %[t1], 0x0f(%[src_stride]) \n\t" >+ "pavgb %[t1], %[t2], %[t1] \n\t" >+ >+ "and %[t0], %[t0], %[c0] \n\t" >+ "and %[t1], %[t1], %[c0] \n\t" >+ "packushb %[t0], %[t0], %[t1] \n\t" >+ "mov.s %[t1], %[t0] \n\t" >+ "and %[d0], %[t0], %[c0] \n\t" >+ "psrlh %[d1], %[t1], %[shift] \n\t" >+ >+ "gsldrc1 %[t0], 0x10(%[src_uyvy]) \n\t" >+ "gsldlc1 %[t0], 0x17(%[src_uyvy]) \n\t" >+ "gsldrc1 %[t1], 0x10(%[src_stride]) \n\t" >+ "gsldlc1 %[t1], 0x17(%[src_stride]) \n\t" >+ "pavgb %[t0], %[t0], %[t1] \n\t" >+ >+ "gsldrc1 %[t2], 0x18(%[src_uyvy]) \n\t" >+ "gsldlc1 %[t2], 0x1f(%[src_uyvy]) \n\t" >+ "gsldrc1 %[t1], 0x18(%[src_stride]) \n\t" >+ "gsldlc1 %[t1], 0x1f(%[src_stride]) \n\t" >+ "pavgb %[t1], %[t2], %[t1] \n\t" >+ >+ "and %[t0], %[t0], %[c0] \n\t" >+ "and %[t1], %[t1], %[c0] \n\t" >+ "packushb %[t0], %[t0], %[t1] \n\t" >+ "mov.s %[t1], %[t0] \n\t" >+ "and %[d2], %[t0], %[c0] \n\t" >+ "psrlh %[d3], %[t1], %[shift] \n\t" >+ >+ "packushb %[d0], %[d0], %[d2] \n\t" >+ "packushb %[d1], %[d1], %[d3] \n\t" >+ "gssdrc1 %[d0], 0x0(%[dst_u]) \n\t" >+ "gssdlc1 %[d0], 0x7(%[dst_u]) \n\t" >+ "gssdrc1 %[d1], 0x0(%[dst_v]) \n\t" >+ "gssdlc1 %[d1], 0x7(%[dst_v]) \n\t" >+ "daddiu %[src_uyvy], %[src_uyvy], 32 \n\t" >+ "daddiu %[dst_u], %[dst_u], 8 \n\t" >+ "daddiu %[dst_v], %[dst_v], 8 \n\t" >+ "daddiu %[width], %[width], -16 \n\t" >+ "bgtz %[width], 1b \n\t" >+ "nop \n\t" >+ : [t0] "=&f"(temp[0]), [t1] "=&f"(temp[1]), [t2] "=&f"(temp[2]), >+ [d0] "=&f"(data[0]), [d1] "=&f"(data[1]), [d2] "=&f"(data[2]), >+ [d3] "=&f"(data[3]), [src_stride] "=&r"(src_stride) >+ : [src_uyvy] "r"(src_uyvy), [src_stride_uyvy] "r"(src_stride_uyvy), >+ [dst_u] "r"(dst_u), [dst_v] "r"(dst_v), [width] "r"(width), >+ [c0] "f"(c0), [shift] "f"(shift) >+ : "memory"); >+} >+ >+// Copy row of UYVY UV's (422) into U and V (422). >+void UYVYToUV422Row_MMI(const uint8_t* src_uyvy, >+ uint8_t* dst_u, >+ uint8_t* dst_v, >+ int width) { >+ // Output a row of UV values. >+ uint64_t c0 = 0x00ff00ff00ff00ff; >+ uint64_t temp[2]; >+ uint64_t data[4]; >+ uint64_t shift = 0x08; >+ __asm__ volatile( >+ "1: \n\t" >+ "gsldrc1 %[t0], 0x00(%[src_uyvy]) \n\t" >+ "gsldlc1 %[t0], 0x07(%[src_uyvy]) \n\t" >+ "gsldrc1 %[t1], 0x08(%[src_uyvy]) \n\t" >+ "gsldlc1 %[t1], 0x0f(%[src_uyvy]) \n\t" >+ "and %[t0], %[t0], %[c0] \n\t" >+ "and %[t1], %[t1], %[c0] \n\t" >+ "packushb %[t0], %[t0], %[t1] \n\t" >+ "mov.s %[t1], %[t0] \n\t" >+ "and %[d0], %[t0], %[c0] \n\t" >+ "psrlh %[d1], %[t1], %[shift] \n\t" >+ >+ "gsldrc1 %[t0], 0x10(%[src_uyvy]) \n\t" >+ "gsldlc1 %[t0], 0x17(%[src_uyvy]) \n\t" >+ "gsldrc1 %[t1], 0x18(%[src_uyvy]) \n\t" >+ "gsldlc1 %[t1], 0x1f(%[src_uyvy]) \n\t" >+ "and %[t0], %[t0], %[c0] \n\t" >+ "and %[t1], %[t1], %[c0] \n\t" >+ "packushb %[t0], %[t0], %[t1] \n\t" >+ "mov.s %[t1], %[t0] \n\t" >+ "and %[d2], %[t0], %[c0] \n\t" >+ "psrlh %[d3], %[t1], %[shift] \n\t" >+ >+ "packushb %[d0], %[d0], %[d2] \n\t" >+ "packushb %[d1], %[d1], %[d3] \n\t" >+ "gssdrc1 %[d0], 0x0(%[dst_u]) \n\t" >+ "gssdlc1 %[d0], 0x7(%[dst_u]) \n\t" >+ "gssdrc1 %[d1], 0x0(%[dst_v]) \n\t" >+ "gssdlc1 %[d1], 0x7(%[dst_v]) \n\t" >+ "daddiu %[src_uyvy], %[src_uyvy], 32 \n\t" >+ "daddiu %[dst_u], %[dst_u], 8 \n\t" >+ "daddiu %[dst_v], %[dst_v], 8 \n\t" >+ "daddiu %[width], %[width], -16 \n\t" >+ "bgtz %[width], 1b \n\t" >+ "nop \n\t" >+ : [t0] "=&f"(temp[0]), [t1] "=&f"(temp[1]), [d0] "=&f"(data[0]), >+ [d1] "=&f"(data[1]), [d2] "=&f"(data[2]), [d3] "=&f"(data[3]) >+ : [src_uyvy] "r"(src_uyvy), [dst_u] "r"(dst_u), [dst_v] "r"(dst_v), >+ [width] "r"(width), [c0] "f"(c0), [shift] "f"(shift) >+ : "memory"); >+} >+ >+// Copy row of UYVY Y's (422) into Y (420/422). >+void UYVYToYRow_MMI(const uint8_t* src_uyvy, uint8_t* dst_y, int width) { >+ // Output a row of Y values. >+ uint64_t c0 = 0x00ff00ff00ff00ff; >+ uint64_t shift = 0x08; >+ uint64_t temp[2]; >+ __asm__ volatile( >+ "1: \n\t" >+ "gsldrc1 %[t0], 0x00(%[src_uyvy]) \n\t" >+ "gsldlc1 %[t0], 0x07(%[src_uyvy]) \n\t" >+ "gsldrc1 %[t1], 0x08(%[src_uyvy]) \n\t" >+ "gsldlc1 %[t1], 0x0f(%[src_uyvy]) \n\t" >+ "dsrl %[t0], %[t0], %[shift] \n\t" >+ "dsrl %[t1], %[t1], %[shift] \n\t" >+ "and %[t0], %[t0], %[c0] \n\t" >+ "and %[t1], %[t1], %[c0] \n\t" >+ "and %[t1], %[t1], %[c0] \n\t" >+ "packushb %[t0], %[t0], %[t1] \n\t" >+ "gssdrc1 %[t0], 0x0(%[dst_y]) \n\t" >+ "gssdlc1 %[t0], 0x7(%[dst_y]) \n\t" >+ "daddiu %[src_uyvy], %[src_uyvy], 16 \n\t" >+ "daddiu %[dst_y], %[dst_y], 8 \n\t" >+ "daddiu %[width], %[width], -8 \n\t" >+ "bgtz %[width], 1b \n\t" >+ "nop \n\t" >+ : [t0] "=&f"(temp[0]), [t1] "=&f"(temp[1]) >+ : [src_uyvy] "r"(src_uyvy), [dst_y] "r"(dst_y), [width] "r"(width), >+ [c0] "f"(c0), [shift] "f"(shift) >+ : "memory"); >+} >+ >+// Blend src_argb0 over src_argb1 and store to dst_argb. >+// dst_argb may be src_argb0 or src_argb1. >+// This code mimics the SSSE3 version for better testability. >+void ARGBBlendRow_MMI(const uint8_t* src_argb0, >+ const uint8_t* src_argb1, >+ uint8_t* dst_argb, >+ int width) { >+ uint64_t src0, src1, dest, alpha, src0_hi, src0_lo, src1_hi, src1_lo, dest_hi, >+ dest_lo; >+ const uint64_t mask0 = 0x0; >+ const uint64_t mask1 = 0x00FFFFFF00FFFFFFULL; >+ const uint64_t mask2 = 0x00FF00FF00FF00FFULL; >+ const uint64_t mask3 = 0xFF; >+ const uint64_t mask4 = ~mask1; >+ const uint64_t shift = 0x08; >+ >+ __asm__ volatile( >+ "1: \n\t" >+ "gsldlc1 %[src0], 0x07(%[src0_ptr]) \n\t" >+ "gsldrc1 %[src0], 0x00(%[src0_ptr]) \n\t" >+ "punpcklbh %[src0_lo], %[src0], %[mask0] \n\t" >+ >+ "gsldlc1 %[src1], 0x07(%[src1_ptr]) \n\t" >+ "gsldrc1 %[src1], 0x00(%[src1_ptr]) \n\t" >+ "punpcklbh %[src1_lo], %[src1], %[mask0] \n\t" >+ >+ "psubush %[alpha], %[mask2], %[src0_lo] \n\t" >+ "pshufh %[alpha], %[alpha], %[mask3] \n\t" >+ "pmullh %[dest_lo], %[src1_lo], %[alpha] \n\t" >+ "psrlh %[dest_lo], %[dest_lo], %[shift] \n\t" >+ "paddush %[dest_lo], %[dest_lo], %[src0_lo] \n\t" >+ >+ "punpckhbh %[src0_hi], %[src0], %[mask0] \n\t" >+ "punpckhbh %[src1_hi], %[src1], %[mask0] \n\t" >+ >+ "psubush %[alpha], %[mask2], %[src0_hi] \n\t" >+ "pshufh %[alpha], %[alpha], %[mask3] \n\t" >+ "pmullh %[dest_hi], %[src1_hi], %[alpha] \n\t" >+ "psrlh %[dest_hi], %[dest_hi], %[shift] \n\t" >+ "paddush %[dest_hi], %[dest_hi], %[src0_hi] \n\t" >+ >+ "packushb %[dest], %[dest_lo], %[dest_hi] \n\t" >+ "and %[dest], %[dest], %[mask1] \n\t" >+ "or %[dest], %[dest], %[mask4] \n\t" >+ "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t" >+ "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t" >+ >+ "daddiu %[src0_ptr], %[src0_ptr], 0x08 \n\t" >+ "daddiu %[src1_ptr], %[src1_ptr], 0x08 \n\t" >+ "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t" >+ "daddi %[width], %[width], -0x02 \n\t" >+ "bnez %[width], 1b \n\t" >+ : [src0] "=&f"(src0), [src1] "=&f"(src1), [alpha] "=&f"(alpha), >+ [dest] "=&f"(dest), [src0_hi] "=&f"(src0_hi), [src0_lo] "=&f"(src0_lo), >+ [src1_hi] "=&f"(src1_hi), [src1_lo] "=&f"(src1_lo), >+ [dest_hi] "=&f"(dest_hi), [dest_lo] "=&f"(dest_lo) >+ : [src0_ptr] "r"(src_argb0), [src1_ptr] "r"(src_argb1), >+ [dst_ptr] "r"(dst_argb), [mask0] "f"(mask0), [mask1] "f"(mask1), >+ [mask2] "f"(mask2), [mask3] "f"(mask3), [mask4] "f"(mask4), >+ [shift] "f"(shift), [width] "r"(width) >+ : "memory"); >+} >+ >+void BlendPlaneRow_MMI(const uint8_t* src0, >+ const uint8_t* src1, >+ const uint8_t* alpha, >+ uint8_t* dst, >+ int width) { >+ uint64_t source0, source1, dest, alph; >+ uint64_t src0_hi, src0_lo, src1_hi, src1_lo, alpha_hi, alpha_lo, dest_hi, >+ dest_lo; >+ uint64_t alpha_rev, alpha_rev_lo, alpha_rev_hi; >+ const uint64_t mask0 = 0x0; >+ const uint64_t mask1 = 0xFFFFFFFFFFFFFFFFULL; >+ const uint64_t mask2 = 0x00FF00FF00FF00FFULL; >+ const uint64_t shift = 0x08; >+ >+ __asm__ volatile( >+ "1: \n\t" >+ "gsldlc1 %[src0], 0x07(%[src0_ptr]) \n\t" >+ "gsldrc1 %[src0], 0x00(%[src0_ptr]) \n\t" >+ "punpcklbh %[src0_lo], %[src0], %[mask0] \n\t" >+ "punpckhbh %[src0_hi], %[src0], %[mask0] \n\t" >+ >+ "gsldlc1 %[src1], 0x07(%[src1_ptr]) \n\t" >+ "gsldrc1 %[src1], 0x00(%[src1_ptr]) \n\t" >+ "punpcklbh %[src1_lo], %[src1], %[mask0] \n\t" >+ "punpckhbh %[src1_hi], %[src1], %[mask0] \n\t" >+ >+ "gsldlc1 %[alpha], 0x07(%[alpha_ptr]) \n\t" >+ "gsldrc1 %[alpha], 0x00(%[alpha_ptr]) \n\t" >+ "psubusb %[alpha_r], %[mask1], %[alpha] \n\t" >+ "punpcklbh %[alpha_lo], %[alpha], %[mask0] \n\t" >+ "punpckhbh %[alpha_hi], %[alpha], %[mask0] \n\t" >+ "punpcklbh %[alpha_rlo], %[alpha_r], %[mask0] \n\t" >+ "punpckhbh %[alpha_rhi], %[alpha_r], %[mask0] \n\t" >+ >+ "pmullh %[dest_lo], %[src0_lo], %[alpha_lo] \n\t" >+ "pmullh %[dest], %[src1_lo], %[alpha_rlo] \n\t" >+ "paddush %[dest_lo], %[dest_lo], %[dest] \n\t" >+ "paddush %[dest_lo], %[dest_lo], %[mask2] \n\t" >+ "psrlh %[dest_lo], %[dest_lo], %[shift] \n\t" >+ >+ "pmullh %[dest_hi], %[src0_hi], %[alpha_hi] \n\t" >+ "pmullh %[dest], %[src1_hi], %[alpha_rhi] \n\t" >+ "paddush %[dest_hi], %[dest_hi], %[dest] \n\t" >+ "paddush %[dest_hi], %[dest_hi], %[mask2] \n\t" >+ "psrlh %[dest_hi], %[dest_hi], %[shift] \n\t" >+ >+ "packushb %[dest], %[dest_lo], %[dest_hi] \n\t" >+ "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t" >+ "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t" >+ >+ "daddiu %[src0_ptr], %[src0_ptr], 0x08 \n\t" >+ "daddiu %[src1_ptr], %[src1_ptr], 0x08 \n\t" >+ "daddiu %[alpha_ptr], %[alpha_ptr], 0x08 \n\t" >+ "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t" >+ "daddi %[width], %[width], -0x08 \n\t" >+ "bnez %[width], 1b \n\t" >+ : [src0] "=&f"(source0), [src1] "=&f"(source1), [alpha] "=&f"(alph), >+ [dest] "=&f"(dest), [src0_hi] "=&f"(src0_hi), [src0_lo] "=&f"(src0_lo), >+ [src1_hi] "=&f"(src1_hi), [src1_lo] "=&f"(src1_lo), >+ [alpha_hi] "=&f"(alpha_hi), [alpha_lo] "=&f"(alpha_lo), >+ [dest_hi] "=&f"(dest_hi), [dest_lo] "=&f"(dest_lo), >+ [alpha_rlo] "=&f"(alpha_rev_lo), [alpha_rhi] "=&f"(alpha_rev_hi), >+ [alpha_r] "=&f"(alpha_rev) >+ : [src0_ptr] "r"(src0), [src1_ptr] "r"(src1), [alpha_ptr] "r"(alpha), >+ [dst_ptr] "r"(dst), [mask0] "f"(mask0), [mask1] "f"(mask1), >+ [mask2] "f"(mask2), [shift] "f"(shift), [width] "r"(width) >+ : "memory"); >+} >+ >+// Multiply source RGB by alpha and store to destination. >+// This code mimics the SSSE3 version for better testability. >+void ARGBAttenuateRow_MMI(const uint8_t* src_argb, >+ uint8_t* dst_argb, >+ int width) { >+ uint64_t src, src_hi, src_lo, dest, dest_hi, dest_lo, alpha; >+ const uint64_t mask0 = 0xFF; >+ const uint64_t mask1 = 0xFF000000FF000000ULL; >+ const uint64_t mask2 = ~mask1; >+ const uint64_t shift = 0x08; >+ >+ __asm__ volatile( >+ "1: \n\t" >+ "gsldlc1 %[src], 0x07(%[src_ptr]) \n\t" >+ "gsldrc1 %[src], 0x00(%[src_ptr]) \n\t" >+ "punpcklbh %[src_lo], %[src], %[src] \n\t" >+ "punpckhbh %[src_hi], %[src], %[src] \n\t" >+ >+ "pshufh %[alpha], %[src_lo], %[mask0] \n\t" >+ "pmulhuh %[dest_lo], %[alpha], %[src_lo] \n\t" >+ "psrlh %[dest_lo], %[dest_lo], %[shift] \n\t" >+ "pshufh %[alpha], %[src_hi], %[mask0] \n\t" >+ "pmulhuh %[dest_hi], %[alpha], %[src_hi] \n\t" >+ "psrlh %[dest_hi], %[dest_hi], %[shift] \n\t" >+ >+ "packushb %[dest], %[dest_lo], %[dest_hi] \n\t" >+ "and %[dest], %[dest], %[mask2] \n\t" >+ "and %[src], %[src], %[mask1] \n\t" >+ "or %[dest], %[dest], %[src] \n\t" >+ "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t" >+ "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t" >+ >+ "daddiu %[src_ptr], %[src_ptr], 0x08 \n\t" >+ "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t" >+ "daddi %[width], %[width], -0x02 \n\t" >+ "bnez %[width], 1b \n\t" >+ : [src] "=&f"(src), [dest] "=&f"(dest), [src_hi] "=&f"(src_hi), >+ [src_lo] "=&f"(src_lo), [dest_hi] "=&f"(dest_hi), >+ [dest_lo] "=&f"(dest_lo), [alpha] "=&f"(alpha) >+ : [src_ptr] "r"(src_argb), [dst_ptr] "r"(dst_argb), [mask0] "f"(mask0), >+ [mask1] "f"(mask1), [mask2] "f"(mask2), [shift] "f"(shift), >+ [width] "r"(width) >+ : "memory"); >+} >+ >+void ComputeCumulativeSumRow_MMI(const uint8_t* row, >+ int32_t* cumsum, >+ const int32_t* previous_cumsum, >+ int width) { >+ int64_t row_sum[2] = {0, 0}; >+ uint64_t src, dest0, dest1, presrc0, presrc1, dest; >+ const uint64_t mask = 0x0; >+ >+ __asm__ volatile( >+ "xor %[row_sum0], %[row_sum0], %[row_sum0] \n\t" >+ "xor %[row_sum1], %[row_sum1], %[row_sum1] \n\t" >+ >+ "1: \n\t" >+ "gslwlc1 %[src], 0x03(%[row_ptr]) \n\t" >+ "gslwrc1 %[src], 0x00(%[row_ptr]) \n\t" >+ >+ "punpcklbh %[src], %[src], %[mask] \n\t" >+ "punpcklhw %[dest0], %[src], %[mask] \n\t" >+ "punpckhhw %[dest1], %[src], %[mask] \n\t" >+ >+ "paddw %[row_sum0], %[row_sum0], %[dest0] \n\t" >+ "paddw %[row_sum1], %[row_sum1], %[dest1] \n\t" >+ >+ "gsldlc1 %[presrc0], 0x07(%[pre_ptr]) \n\t" >+ "gsldrc1 %[presrc0], 0x00(%[pre_ptr]) \n\t" >+ "gsldlc1 %[presrc1], 0x0f(%[pre_ptr]) \n\t" >+ "gsldrc1 %[presrc1], 0x08(%[pre_ptr]) \n\t" >+ >+ "paddw %[dest0], %[row_sum0], %[presrc0] \n\t" >+ "paddw %[dest1], %[row_sum1], %[presrc1] \n\t" >+ >+ "gssdlc1 %[dest0], 0x07(%[dst_ptr]) \n\t" >+ "gssdrc1 %[dest0], 0x00(%[dst_ptr]) \n\t" >+ "gssdlc1 %[dest1], 0x0f(%[dst_ptr]) \n\t" >+ "gssdrc1 %[dest1], 0x08(%[dst_ptr]) \n\t" >+ >+ "daddiu %[row_ptr], %[row_ptr], 0x04 \n\t" >+ "daddiu %[pre_ptr], %[pre_ptr], 0x10 \n\t" >+ "daddiu %[dst_ptr], %[dst_ptr], 0x10 \n\t" >+ "daddi %[width], %[width], -0x01 \n\t" >+ "bnez %[width], 1b \n\t" >+ : [src] "=&f"(src), [dest] "=&f"(dest), [dest0] "=&f"(dest0), >+ [dest1] "=&f"(dest1), [row_sum0] "+&f"(row_sum[0]), >+ [row_sum1] "+&f"(row_sum[1]), [presrc0] "=&f"(presrc0), >+ [presrc1] "=&f"(presrc1) >+ : [row_ptr] "r"(row), [pre_ptr] "r"(previous_cumsum), >+ [dst_ptr] "r"(cumsum), [width] "r"(width), [mask] "f"(mask) >+ : "memory"); >+} >+ >+// C version 2x2 -> 2x1. >+void InterpolateRow_MMI(uint8_t* dst_ptr, >+ const uint8_t* src_ptr, >+ ptrdiff_t src_stride, >+ int width, >+ int source_y_fraction) { >+ if (source_y_fraction == 0) { >+ __asm__ volatile( >+ "1: \n\t" >+ "ld $t0, 0x0(%[src_ptr]) \n\t" >+ "sd $t0, 0x0(%[dst_ptr]) \n\t" >+ "daddiu %[src_ptr], %[src_ptr], 8 \n\t" >+ "daddiu %[dst_ptr], %[dst_ptr], 8 \n\t" >+ "daddiu %[width], %[width], -8 \n\t" >+ "bgtz %[width], 1b \n\t" >+ "nop \n\t" >+ : >+ : [dst_ptr] "r"(dst_ptr), [src_ptr] "r"(src_ptr), [width] "r"(width) >+ : "memory"); >+ return; >+ } >+ if (source_y_fraction == 128) { >+ uint64_t uv = 0x0; >+ uint64_t uv_stride = 0x0; >+ __asm__ volatile( >+ "1: \n\t" >+ "gsldrc1 %[uv], 0x0(%[src_ptr]) \n\t" >+ "gsldlc1 %[uv], 0x7(%[src_ptr]) \n\t" >+ "daddu $t0, %[src_ptr], %[stride] \n\t" >+ "gsldrc1 %[uv_stride], 0x0($t0) \n\t" >+ "gsldlc1 %[uv_stride], 0x7($t0) \n\t" >+ >+ "pavgb %[uv], %[uv], %[uv_stride] \n\t" >+ "gssdrc1 %[uv], 0x0(%[dst_ptr]) \n\t" >+ "gssdlc1 %[uv], 0x7(%[dst_ptr]) \n\t" >+ >+ "daddiu %[src_ptr], %[src_ptr], 8 \n\t" >+ "daddiu %[dst_ptr], %[dst_ptr], 8 \n\t" >+ "daddiu %[width], %[width], -8 \n\t" >+ "bgtz %[width], 1b \n\t" >+ "nop \n\t" >+ : [uv] "=&f"(uv), [uv_stride] "=&f"(uv_stride) >+ : [src_ptr] "r"(src_ptr), [dst_ptr] "r"(dst_ptr), [width] "r"(width), >+ [stride] "r"((int64_t)src_stride) >+ : "memory"); >+ return; >+ } >+ const uint8_t* src_ptr1 = src_ptr + src_stride; >+ uint64_t temp; >+ uint64_t data[4]; >+ uint64_t zero = 0x0; >+ uint64_t c0 = 0x0080008000800080; >+ uint64_t fy0 = 0x0100010001000100; >+ uint64_t shift = 0x8; >+ __asm__ volatile( >+ "pshufh %[fy1], %[fy1], %[zero] \n\t" >+ "psubh %[fy0], %[fy0], %[fy1] \n\t" >+ "1: \n\t" >+ "gsldrc1 %[t0], 0x0(%[src_ptr]) \n\t" >+ "gsldlc1 %[t0], 0x7(%[src_ptr]) \n\t" >+ "punpcklbh %[d0], %[t0], %[zero] \n\t" >+ "punpckhbh %[d1], %[t0], %[zero] \n\t" >+ "gsldrc1 %[t0], 0x0(%[src_ptr1]) \n\t" >+ "gsldlc1 %[t0], 0x7(%[src_ptr1]) \n\t" >+ "punpcklbh %[d2], %[t0], %[zero] \n\t" >+ "punpckhbh %[d3], %[t0], %[zero] \n\t" >+ >+ "pmullh %[d0], %[d0], %[fy0] \n\t" >+ "pmullh %[d2], %[d2], %[fy1] \n\t" >+ "paddh %[d0], %[d0], %[d2] \n\t" >+ "paddh %[d0], %[d0], %[c0] \n\t" >+ "psrlh %[d0], %[d0], %[shift] \n\t" >+ >+ "pmullh %[d1], %[d1], %[fy0] \n\t" >+ "pmullh %[d3], %[d3], %[fy1] \n\t" >+ "paddh %[d1], %[d1], %[d3] \n\t" >+ "paddh %[d1], %[d1], %[c0] \n\t" >+ "psrlh %[d1], %[d1], %[shift] \n\t" >+ >+ "packushb %[d0], %[d0], %[d1] \n\t" >+ "gssdrc1 %[d0], 0x0(%[dst_ptr]) \n\t" >+ "gssdlc1 %[d0], 0x7(%[dst_ptr]) \n\t" >+ "daddiu %[src_ptr], %[src_ptr], 8 \n\t" >+ "daddiu %[src_ptr1], %[src_ptr1], 8 \n\t" >+ "daddiu %[dst_ptr], %[dst_ptr], 8 \n\t" >+ "daddiu %[width], %[width], -8 \n\t" >+ "bgtz %[width], 1b \n\t" >+ "nop \n\t" >+ : [t0] "=&f"(temp), [d0] "=&f"(data[0]), [d1] "=&f"(data[1]), >+ [d2] "=&f"(data[2]), [d3] "=&f"(data[3]) >+ : [src_ptr] "r"(src_ptr), [src_ptr1] "r"(src_ptr1), >+ [dst_ptr] "r"(dst_ptr), [width] "r"(width), >+ [fy1] "f"(source_y_fraction), [fy0] "f"(fy0), [c0] "f"(c0), >+ [shift] "f"(shift), [zero] "f"(zero) >+ : "memory"); >+} >+ >+// Use first 4 shuffler values to reorder ARGB channels. >+void ARGBShuffleRow_MMI(const uint8_t* src_argb, >+ uint8_t* dst_argb, >+ const uint8_t* shuffler, >+ int width) { >+ uint64_t source, dest0, dest1, dest; >+ const uint64_t mask0 = 0x0; >+ const uint64_t mask1 = (shuffler[0] & 0x03) | ((shuffler[1] & 0x03) << 2) | >+ ((shuffler[2] & 0x03) << 4) | >+ ((shuffler[3] & 0x03) << 6); >+ >+ __asm__ volatile( >+ "1: \n\t" >+ "gsldlc1 %[src], 0x07(%[src_ptr]) \n\t" >+ "gsldrc1 %[src], 0x00(%[src_ptr]) \n\t" >+ >+ "punpcklbh %[dest0], %[src], %[mask0] \n\t" >+ "pshufh %[dest0], %[dest0], %[mask1] \n\t" >+ "punpckhbh %[dest1], %[src], %[mask0] \n\t" >+ "pshufh %[dest1], %[dest1], %[mask1] \n\t" >+ "packushb %[dest], %[dest0], %[dest1] \n\t" >+ >+ "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t" >+ "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t" >+ >+ "daddiu %[src_ptr], %[src_ptr], 0x08 \n\t" >+ "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t" >+ "daddi %[width], %[width], -0x02 \n\t" >+ "bnez %[width], 1b \n\t" >+ : [src] "=&f"(source), [dest] "=&f"(dest), [dest0] "=&f"(dest0), >+ [dest1] "=&f"(dest1) >+ : [src_ptr] "r"(src_argb), [dst_ptr] "r"(dst_argb), [mask0] "f"(mask0), >+ [mask1] "f"(mask1), [width] "r"(width) >+ : "memory"); >+} >+ >+void I422ToYUY2Row_MMI(const uint8_t* src_y, >+ const uint8_t* src_u, >+ const uint8_t* src_v, >+ uint8_t* dst_frame, >+ int width) { >+ uint64_t temp[3]; >+ uint64_t vu = 0x0; >+ __asm__ volatile( >+ "1: \n\t" >+ "gsldlc1 %[ty], 0x7(%[src_y]) \n\t" // r=src_sobelx[i] >+ "gsldrc1 %[ty], 0x0(%[src_y]) \n\t" // r=src_sobelx[i] >+ "gslwlc1 %[tu], 0x3(%[src_u]) \n\t" // b=src_sobely[i] >+ "gslwrc1 %[tu], 0x0(%[src_u]) \n\t" // b=src_sobely[i] >+ "gslwlc1 %[tv], 0x3(%[src_v]) \n\t" // b=src_sobely[i] >+ "gslwrc1 %[tv], 0x0(%[src_v]) \n\t" // b=src_sobely[i] >+ "punpcklbh %[vu], %[tu], %[tv] \n\t" // g >+ "punpcklbh %[tu], %[ty], %[vu] \n\t" // g >+ "gssdlc1 %[tu], 0x7(%[dst_frame]) \n\t" >+ "gssdrc1 %[tu], 0x0(%[dst_frame]) \n\t" >+ "punpckhbh %[tu], %[ty], %[vu] \n\t" // g >+ "gssdlc1 %[tu], 0x0F(%[dst_frame]) \n\t" >+ "gssdrc1 %[tu], 0x08(%[dst_frame]) \n\t" >+ "daddiu %[src_y], %[src_y], 8 \n\t" >+ "daddiu %[src_u], %[src_u], 4 \n\t" >+ "daddiu %[src_v], %[src_v], 4 \n\t" >+ "daddiu %[dst_frame], %[dst_frame], 16 \n\t" >+ "daddiu %[width], %[width], -8 \n\t" >+ "bgtz %[width], 1b \n\t" >+ "nop \n\t" >+ : [ty] "=&f"(temp[1]), [tu] "=&f"(temp[1]), [tv] "=&f"(temp[1]), >+ [vu] "=&f"(vu) >+ : [src_y] "r"(src_y), [src_u] "r"(src_u), [src_v] "r"(src_v), >+ [dst_frame] "r"(dst_frame), [width] "r"(width) >+ : "memory"); >+} >+ >+void I422ToUYVYRow_MMI(const uint8_t* src_y, >+ const uint8_t* src_u, >+ const uint8_t* src_v, >+ uint8_t* dst_frame, >+ int width) { >+ uint64_t temp[3]; >+ uint64_t vu = 0x0; >+ __asm__ volatile( >+ "1: \n\t" >+ "gsldlc1 %[ty], 0x7(%[src_y]) \n\t" // r=src_sobelx[i] >+ "gsldrc1 %[ty], 0x0(%[src_y]) \n\t" // r=src_sobelx[i] >+ "gslwlc1 %[tu], 0x3(%[src_u]) \n\t" // b=src_sobely[i] >+ "gslwrc1 %[tu], 0x0(%[src_u]) \n\t" // b=src_sobely[i] >+ "gslwlc1 %[tv], 0x3(%[src_v]) \n\t" // b=src_sobely[i] >+ "gslwrc1 %[tv], 0x0(%[src_v]) \n\t" // b=src_sobely[i] >+ "punpcklbh %[vu], %[tu], %[tv] \n\t" // g >+ "punpcklbh %[tu], %[vu], %[ty] \n\t" // g >+ "gssdlc1 %[tu], 0x7(%[dst_frame]) \n\t" >+ "gssdrc1 %[tu], 0x0(%[dst_frame]) \n\t" >+ "punpckhbh %[tu], %[vu], %[ty] \n\t" // g >+ "gssdlc1 %[tu], 0x0F(%[dst_frame]) \n\t" >+ "gssdrc1 %[tu], 0x08(%[dst_frame]) \n\t" >+ "daddiu %[src_y], %[src_y], 8 \n\t" >+ "daddiu %[src_u], %[src_u], 4 \n\t" >+ "daddiu %[src_v], %[src_v], 4 \n\t" >+ "daddiu %[dst_frame], %[dst_frame], 16 \n\t" >+ "daddiu %[width], %[width], -8 \n\t" >+ "bgtz %[width], 1b \n\t" >+ "nop \n\t" >+ : [ty] "=&f"(temp[1]), [tu] "=&f"(temp[1]), [tv] "=&f"(temp[1]), >+ [vu] "=&f"(vu) >+ : [src_y] "r"(src_y), [src_u] "r"(src_u), [src_v] "r"(src_v), >+ [dst_frame] "r"(dst_frame), [width] "r"(width) >+ : "memory"); >+} >+ >+void ARGBCopyAlphaRow_MMI(const uint8_t* src, uint8_t* dst, int width) { >+ uint64_t source, dest; >+ const uint64_t mask0 = 0xff000000ff000000ULL; >+ const uint64_t mask1 = ~mask0; >+ >+ __asm__ volatile( >+ "1: \n\t" >+ "gsldlc1 %[src], 0x07(%[src_ptr]) \n\t" >+ "gsldrc1 %[src], 0x00(%[src_ptr]) \n\t" >+ "gsldlc1 %[dest], 0x07(%[dst_ptr]) \n\t" >+ "gsldrc1 %[dest], 0x00(%[dst_ptr]) \n\t" >+ >+ "and %[src], %[src], %[mask0] \n\t" >+ "and %[dest], %[dest], %[mask1] \n\t" >+ "or %[dest], %[src], %[dest] \n\t" >+ "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t" >+ "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t" >+ >+ "daddiu %[src_ptr], %[src_ptr], 0x08 \n\t" >+ "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t" >+ "daddi %[width], %[width], -0x02 \n\t" >+ "bnez %[width], 1b \n\t" >+ : [src] "=&f"(source), [dest] "=&f"(dest) >+ : [src_ptr] "r"(src), [dst_ptr] "r"(dst), [mask0] "f"(mask0), >+ [mask1] "f"(mask1), [width] "r"(width) >+ : "memory"); >+} >+ >+void ARGBExtractAlphaRow_MMI(const uint8_t* src_argb, >+ uint8_t* dst_a, >+ int width) { >+ uint64_t src, dest0, dest1, dest_lo, dest_hi, dest; >+ const uint64_t mask = 0xff000000ff000000ULL; >+ const uint64_t shift = 0x18; >+ >+ __asm__ volatile( >+ "1: \n\t" >+ "gsldlc1 %[src], 0x07(%[src_ptr]) \n\t" >+ "gsldrc1 %[src], 0x00(%[src_ptr]) \n\t" >+ "and %[dest0], %[src], %[mask] \n\t" >+ "psrlw %[dest0], %[dest0], %[shift] \n\t" >+ "gsldlc1 %[src], 0x0f(%[src_ptr]) \n\t" >+ "gsldrc1 %[src], 0x08(%[src_ptr]) \n\t" >+ "and %[dest1], %[src], %[mask] \n\t" >+ "psrlw %[dest1], %[dest1], %[shift] \n\t" >+ "packsswh %[dest_lo], %[dest0], %[dest1] \n\t" >+ >+ "gsldlc1 %[src], 0x17(%[src_ptr]) \n\t" >+ "gsldrc1 %[src], 0x10(%[src_ptr]) \n\t" >+ "and %[dest0], %[src], %[mask] \n\t" >+ "psrlw %[dest0], %[dest0], %[shift] \n\t" >+ "gsldlc1 %[src], 0x1f(%[src_ptr]) \n\t" >+ "gsldrc1 %[src], 0x18(%[src_ptr]) \n\t" >+ "and %[dest1], %[src], %[mask] \n\t" >+ "psrlw %[dest1], %[dest1], %[shift] \n\t" >+ "packsswh %[dest_hi], %[dest0], %[dest1] \n\t" >+ >+ "packushb %[dest], %[dest_lo], %[dest_hi] \n\t" >+ >+ "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t" >+ "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t" >+ >+ "daddiu %[src_ptr], %[src_ptr], 0x20 \n\t" >+ "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t" >+ "daddi %[width], %[width], -0x08 \n\t" >+ "bnez %[width], 1b \n\t" >+ : [src] "=&f"(src), [dest] "=&f"(dest), [dest0] "=&f"(dest0), >+ [dest1] "=&f"(dest1), [dest_lo] "=&f"(dest_lo), [dest_hi] "=&f"(dest_hi) >+ : [src_ptr] "r"(src_argb), [dst_ptr] "r"(dst_a), [mask] "f"(mask), >+ [shift] "f"(shift), [width] "r"(width) >+ : "memory"); >+} >+ >+void ARGBCopyYToAlphaRow_MMI(const uint8_t* src, uint8_t* dst, int width) { >+ uint64_t source, dest0, dest1, dest; >+ const uint64_t mask0 = 0x0; >+ const uint64_t mask1 = 0x00ffffff00ffffffULL; >+ >+ __asm__ volatile( >+ "1: \n\t" >+ "gsldlc1 %[src], 0x07(%[src_ptr]) \n\t" >+ "gsldrc1 %[src], 0x00(%[src_ptr]) \n\t" >+ >+ "punpcklbh %[dest0], %[mask0], %[src] \n\t" >+ "punpcklhw %[dest1], %[mask0], %[dest0] \n\t" >+ "gsldlc1 %[dest], 0x07(%[dst_ptr]) \n\t" >+ "gsldrc1 %[dest], 0x00(%[dst_ptr]) \n\t" >+ "and %[dest], %[dest], %[mask1] \n\t" >+ "or %[dest], %[dest], %[dest1] \n\t" >+ "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t" >+ "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t" >+ "punpckhhw %[dest1], %[mask0], %[dest0] \n\t" >+ "gsldlc1 %[dest], 0x0f(%[dst_ptr]) \n\t" >+ "gsldrc1 %[dest], 0x08(%[dst_ptr]) \n\t" >+ "and %[dest], %[dest], %[mask1] \n\t" >+ "or %[dest], %[dest], %[dest1] \n\t" >+ "gssdlc1 %[dest], 0x0f(%[dst_ptr]) \n\t" >+ "gssdrc1 %[dest], 0x08(%[dst_ptr]) \n\t" >+ >+ "punpckhbh %[dest0], %[mask0], %[src] \n\t" >+ "punpcklhw %[dest1], %[mask0], %[dest0] \n\t" >+ "gsldlc1 %[dest], 0x17(%[dst_ptr]) \n\t" >+ "gsldrc1 %[dest], 0x10(%[dst_ptr]) \n\t" >+ "and %[dest], %[dest], %[mask1] \n\t" >+ "or %[dest], %[dest], %[dest1] \n\t" >+ "gssdlc1 %[dest], 0x17(%[dst_ptr]) \n\t" >+ "gssdrc1 %[dest], 0x10(%[dst_ptr]) \n\t" >+ "punpckhhw %[dest1], %[mask0], %[dest0] \n\t" >+ "gsldlc1 %[dest], 0x1f(%[dst_ptr]) \n\t" >+ "gsldrc1 %[dest], 0x18(%[dst_ptr]) \n\t" >+ "and %[dest], %[dest], %[mask1] \n\t" >+ "or %[dest], %[dest], %[dest1] \n\t" >+ "gssdlc1 %[dest], 0x1f(%[dst_ptr]) \n\t" >+ "gssdrc1 %[dest], 0x18(%[dst_ptr]) \n\t" >+ >+ "daddiu %[src_ptr], %[src_ptr], 0x08 \n\t" >+ "daddiu %[dst_ptr], %[dst_ptr], 0x20 \n\t" >+ "daddi %[width], %[width], -0x08 \n\t" >+ "bnez %[width], 1b \n\t" >+ : [src] "=&f"(source), [dest] "=&f"(dest), [dest0] "=&f"(dest0), >+ [dest1] "=&f"(dest1) >+ : [src_ptr] "r"(src), [dst_ptr] "r"(dst), [mask0] "f"(mask0), >+ [mask1] "f"(mask1), [width] "r"(width) >+ : "memory"); >+} >+ >+#endif // !defined(LIBYUV_DISABLE_MMI) && defined(_MIPS_ARCH_LOONGSON3A) >+ >+#ifdef __cplusplus >+} // extern "C" >+} // namespace libyuv >+#endif >diff --git a/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/source/row_msa.cc b/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/source/row_msa.cc >index 66666cefcd98c4f2314bf27ac142d34f1864cfcf..5c0239a37f0f646d4fd8ebbd42cff08d9ee16d4c 100644 >--- a/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/source/row_msa.cc >+++ b/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/source/row_msa.cc >@@ -163,14 +163,14 @@ extern "C" { > v8u16 reg0_m, reg1_m, reg2_m, reg3_m, reg4_m, reg5_m, reg6_m, reg7_m; \ > v8u16 reg8_m, reg9_m; \ > \ >- src0_m = (v16u8)__msa_ld_b((v16i8*)s, 0); \ >- src1_m = (v16u8)__msa_ld_b((v16i8*)s, 16); \ >- src2_m = (v16u8)__msa_ld_b((v16i8*)s, 32); \ >- src3_m = (v16u8)__msa_ld_b((v16i8*)s, 48); \ >- src4_m = (v16u8)__msa_ld_b((v16i8*)t, 0); \ >- src5_m = (v16u8)__msa_ld_b((v16i8*)t, 16); \ >- src6_m = (v16u8)__msa_ld_b((v16i8*)t, 32); \ >- src7_m = (v16u8)__msa_ld_b((v16i8*)t, 48); \ >+ src0_m = (v16u8)__msa_ld_b((void*)s, 0); \ >+ src1_m = (v16u8)__msa_ld_b((void*)s, 16); \ >+ src2_m = (v16u8)__msa_ld_b((void*)s, 32); \ >+ src3_m = (v16u8)__msa_ld_b((void*)s, 48); \ >+ src4_m = (v16u8)__msa_ld_b((void*)t, 0); \ >+ src5_m = (v16u8)__msa_ld_b((void*)t, 16); \ >+ src6_m = (v16u8)__msa_ld_b((void*)t, 32); \ >+ src7_m = (v16u8)__msa_ld_b((void*)t, 48); \ > vec0_m = (v16u8)__msa_ilvr_b((v16i8)src0_m, (v16i8)src4_m); \ > vec1_m = (v16u8)__msa_ilvr_b((v16i8)src1_m, (v16i8)src5_m); \ > vec2_m = (v16u8)__msa_ilvr_b((v16i8)src2_m, (v16i8)src6_m); \ >@@ -201,14 +201,14 @@ extern "C" { > reg1_m = (v8u16)__msa_srai_h((v8i16)reg1_m, 2); \ > argb0 = (v16u8)__msa_pckev_b((v16i8)reg9_m, (v16i8)reg8_m); \ > argb1 = (v16u8)__msa_pckev_b((v16i8)reg1_m, (v16i8)reg0_m); \ >- src0_m = (v16u8)__msa_ld_b((v16i8*)s, 64); \ >- src1_m = (v16u8)__msa_ld_b((v16i8*)s, 80); \ >- src2_m = (v16u8)__msa_ld_b((v16i8*)s, 96); \ >- src3_m = (v16u8)__msa_ld_b((v16i8*)s, 112); \ >- src4_m = (v16u8)__msa_ld_b((v16i8*)t, 64); \ >- src5_m = (v16u8)__msa_ld_b((v16i8*)t, 80); \ >- src6_m = (v16u8)__msa_ld_b((v16i8*)t, 96); \ >- src7_m = (v16u8)__msa_ld_b((v16i8*)t, 112); \ >+ src0_m = (v16u8)__msa_ld_b((void*)s, 64); \ >+ src1_m = (v16u8)__msa_ld_b((void*)s, 80); \ >+ src2_m = (v16u8)__msa_ld_b((void*)s, 96); \ >+ src3_m = (v16u8)__msa_ld_b((void*)s, 112); \ >+ src4_m = (v16u8)__msa_ld_b((void*)t, 64); \ >+ src5_m = (v16u8)__msa_ld_b((void*)t, 80); \ >+ src6_m = (v16u8)__msa_ld_b((void*)t, 96); \ >+ src7_m = (v16u8)__msa_ld_b((void*)t, 112); \ > vec2_m = (v16u8)__msa_ilvr_b((v16i8)src0_m, (v16i8)src4_m); \ > vec3_m = (v16u8)__msa_ilvr_b((v16i8)src1_m, (v16i8)src5_m); \ > vec4_m = (v16u8)__msa_ilvr_b((v16i8)src2_m, (v16i8)src6_m); \ >@@ -942,10 +942,10 @@ void ARGBToRGB24Row_MSA(const uint8_t* src_argb, uint8_t* dst_rgb, int width) { > 21, 22, 24, 25, 26, 28, 29, 30}; > > for (x = 0; x < width; x += 16) { >- src0 = (v16u8)__msa_ld_b((v16i8*)src_argb, 0); >- src1 = (v16u8)__msa_ld_b((v16i8*)src_argb, 16); >- src2 = (v16u8)__msa_ld_b((v16i8*)src_argb, 32); >- src3 = (v16u8)__msa_ld_b((v16i8*)src_argb, 48); >+ src0 = (v16u8)__msa_ld_b((void*)src_argb, 0); >+ src1 = (v16u8)__msa_ld_b((void*)src_argb, 16); >+ src2 = (v16u8)__msa_ld_b((void*)src_argb, 32); >+ src3 = (v16u8)__msa_ld_b((void*)src_argb, 48); > dst0 = (v16u8)__msa_vshf_b(shuffler0, (v16i8)src1, (v16i8)src0); > dst1 = (v16u8)__msa_vshf_b(shuffler1, (v16i8)src2, (v16i8)src1); > dst2 = (v16u8)__msa_vshf_b(shuffler2, (v16i8)src3, (v16i8)src2); >@@ -966,10 +966,10 @@ void ARGBToRAWRow_MSA(const uint8_t* src_argb, uint8_t* dst_rgb, int width) { > 21, 20, 26, 25, 24, 30, 29, 28}; > > for (x = 0; x < width; x += 16) { >- src0 = (v16u8)__msa_ld_b((v16i8*)src_argb, 0); >- src1 = (v16u8)__msa_ld_b((v16i8*)src_argb, 16); >- src2 = (v16u8)__msa_ld_b((v16i8*)src_argb, 32); >- src3 = (v16u8)__msa_ld_b((v16i8*)src_argb, 48); >+ src0 = (v16u8)__msa_ld_b((void*)src_argb, 0); >+ src1 = (v16u8)__msa_ld_b((void*)src_argb, 16); >+ src2 = (v16u8)__msa_ld_b((void*)src_argb, 32); >+ src3 = (v16u8)__msa_ld_b((void*)src_argb, 48); > dst0 = (v16u8)__msa_vshf_b(shuffler0, (v16i8)src1, (v16i8)src0); > dst1 = (v16u8)__msa_vshf_b(shuffler1, (v16i8)src2, (v16i8)src1); > dst2 = (v16u8)__msa_vshf_b(shuffler2, (v16i8)src3, (v16i8)src2); >@@ -987,8 +987,8 @@ void ARGBToRGB565Row_MSA(const uint8_t* src_argb, uint8_t* dst_rgb, int width) { > v16i8 zero = {0}; > > for (x = 0; x < width; x += 8) { >- src0 = (v16u8)__msa_ld_b((v16i8*)src_argb, 0); >- src1 = (v16u8)__msa_ld_b((v16i8*)src_argb, 16); >+ src0 = (v16u8)__msa_ld_b((void*)src_argb, 0); >+ src1 = (v16u8)__msa_ld_b((void*)src_argb, 16); > vec0 = (v16u8)__msa_srai_b((v16i8)src0, 3); > vec1 = (v16u8)__msa_slli_b((v16i8)src0, 3); > vec2 = (v16u8)__msa_srai_b((v16i8)src0, 5); >@@ -1023,8 +1023,8 @@ void ARGBToARGB1555Row_MSA(const uint8_t* src_argb, > v16i8 zero = {0}; > > for (x = 0; x < width; x += 8) { >- src0 = (v16u8)__msa_ld_b((v16i8*)src_argb, 0); >- src1 = (v16u8)__msa_ld_b((v16i8*)src_argb, 16); >+ src0 = (v16u8)__msa_ld_b((void*)src_argb, 0); >+ src1 = (v16u8)__msa_ld_b((void*)src_argb, 16); > vec0 = (v16u8)__msa_srai_b((v16i8)src0, 3); > vec1 = (v16u8)__msa_slli_b((v16i8)src0, 2); > vec2 = (v16u8)__msa_srai_b((v16i8)vec0, 3); >@@ -1066,8 +1066,8 @@ void ARGBToARGB4444Row_MSA(const uint8_t* src_argb, > v16i8 zero = {0}; > > for (x = 0; x < width; x += 8) { >- src0 = (v16u8)__msa_ld_b((v16i8*)src_argb, 0); >- src1 = (v16u8)__msa_ld_b((v16i8*)src_argb, 16); >+ src0 = (v16u8)__msa_ld_b((void*)src_argb, 0); >+ src1 = (v16u8)__msa_ld_b((void*)src_argb, 16); > vec0 = (v16u8)__msa_srai_b((v16i8)src0, 4); > vec1 = (v16u8)__msa_srai_b((v16i8)src1, 4); > src0 = (v16u8)__msa_sldi_b(zero, (v16i8)src0, 1); >@@ -1098,10 +1098,10 @@ void ARGBToUV444Row_MSA(const uint8_t* src_argb, > v16i8 zero = {0}; > > for (x = width; x > 0; x -= 16) { >- src0 = (v16u8)__msa_ld_b((v16i8*)src_argb, 0); >- src1 = (v16u8)__msa_ld_b((v16i8*)src_argb, 16); >- src2 = (v16u8)__msa_ld_b((v16i8*)src_argb, 32); >- src3 = (v16u8)__msa_ld_b((v16i8*)src_argb, 48); >+ src0 = (v16u8)__msa_ld_b((void*)src_argb, 0); >+ src1 = (v16u8)__msa_ld_b((void*)src_argb, 16); >+ src2 = (v16u8)__msa_ld_b((void*)src_argb, 32); >+ src3 = (v16u8)__msa_ld_b((void*)src_argb, 48); > reg0 = (v16u8)__msa_pckev_b((v16i8)src1, (v16i8)src0); > reg1 = (v16u8)__msa_pckev_b((v16i8)src3, (v16i8)src2); > reg2 = (v16u8)__msa_pckod_b((v16i8)src1, (v16i8)src0); >@@ -1164,8 +1164,8 @@ void ARGBMultiplyRow_MSA(const uint8_t* src_argb0, > v8i16 zero = {0}; > > for (x = 0; x < width; x += 4) { >- src0 = (v16u8)__msa_ld_b((v16i8*)src_argb0, 0); >- src1 = (v16u8)__msa_ld_b((v16i8*)src_argb1, 0); >+ src0 = (v16u8)__msa_ld_b((void*)src_argb0, 0); >+ src1 = (v16u8)__msa_ld_b((void*)src_argb1, 0); > vec0 = (v8u16)__msa_ilvr_b((v16i8)src0, (v16i8)src0); > vec1 = (v8u16)__msa_ilvl_b((v16i8)src0, (v16i8)src0); > vec2 = (v8u16)__msa_ilvr_b((v16i8)zero, (v16i8)src1); >@@ -1200,10 +1200,10 @@ void ARGBAddRow_MSA(const uint8_t* src_argb0, > v16u8 src0, src1, src2, src3, dst0, dst1; > > for (x = 0; x < width; x += 8) { >- src0 = (v16u8)__msa_ld_b((v16i8*)src_argb0, 0); >- src1 = (v16u8)__msa_ld_b((v16i8*)src_argb0, 16); >- src2 = (v16u8)__msa_ld_b((v16i8*)src_argb1, 0); >- src3 = (v16u8)__msa_ld_b((v16i8*)src_argb1, 16); >+ src0 = (v16u8)__msa_ld_b((void*)src_argb0, 0); >+ src1 = (v16u8)__msa_ld_b((void*)src_argb0, 16); >+ src2 = (v16u8)__msa_ld_b((void*)src_argb1, 0); >+ src3 = (v16u8)__msa_ld_b((void*)src_argb1, 16); > dst0 = __msa_adds_u_b(src0, src2); > dst1 = __msa_adds_u_b(src1, src3); > ST_UB2(dst0, dst1, dst_argb, 16); >@@ -1221,10 +1221,10 @@ void ARGBSubtractRow_MSA(const uint8_t* src_argb0, > v16u8 src0, src1, src2, src3, dst0, dst1; > > for (x = 0; x < width; x += 8) { >- src0 = (v16u8)__msa_ld_b((v16i8*)src_argb0, 0); >- src1 = (v16u8)__msa_ld_b((v16i8*)src_argb0, 16); >- src2 = (v16u8)__msa_ld_b((v16i8*)src_argb1, 0); >- src3 = (v16u8)__msa_ld_b((v16i8*)src_argb1, 16); >+ src0 = (v16u8)__msa_ld_b((void*)src_argb0, 0); >+ src1 = (v16u8)__msa_ld_b((void*)src_argb0, 16); >+ src2 = (v16u8)__msa_ld_b((void*)src_argb1, 0); >+ src3 = (v16u8)__msa_ld_b((void*)src_argb1, 16); > dst0 = __msa_subs_u_b(src0, src2); > dst1 = __msa_subs_u_b(src1, src3); > ST_UB2(dst0, dst1, dst_argb, 16); >@@ -1245,8 +1245,8 @@ void ARGBAttenuateRow_MSA(const uint8_t* src_argb, > v16u8 mask = {0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255}; > > for (x = 0; x < width; x += 8) { >- src0 = (v16u8)__msa_ld_b((v16i8*)src_argb, 0); >- src1 = (v16u8)__msa_ld_b((v16i8*)src_argb, 16); >+ src0 = (v16u8)__msa_ld_b((void*)src_argb, 0); >+ src1 = (v16u8)__msa_ld_b((void*)src_argb, 16); > vec0 = (v8u16)__msa_ilvr_b((v16i8)src0, (v16i8)src0); > vec1 = (v8u16)__msa_ilvl_b((v16i8)src0, (v16i8)src0); > vec2 = (v8u16)__msa_ilvr_b((v16i8)src1, (v16i8)src1); >@@ -1316,8 +1316,8 @@ void ARGBToRGB565DitherRow_MSA(const uint8_t* src_argb, > vec_d0 = (v8i16)__msa_ilvr_b(zero, (v16i8)vec_d0); > > for (x = 0; x < width; x += 8) { >- src0 = (v16u8)__msa_ld_b((v16i8*)src_argb, 0); >- src1 = (v16u8)__msa_ld_b((v16i8*)src_argb, 16); >+ src0 = (v16u8)__msa_ld_b((void*)src_argb, 0); >+ src1 = (v16u8)__msa_ld_b((void*)src_argb, 16); > vec0 = (v16u8)__msa_pckev_b((v16i8)src1, (v16i8)src0); > vec1 = (v16u8)__msa_pckod_b((v16i8)src1, (v16i8)src0); > reg0 = (v8i16)__msa_ilvev_b(zero, (v16i8)vec0); >@@ -1514,8 +1514,8 @@ void ARGB1555ToARGBRow_MSA(const uint8_t* src_argb1555, > v8u16 const_0x1F = (v8u16)__msa_ldi_h(0x1F); > > for (x = 0; x < width; x += 16) { >- src0 = (v8u16)__msa_ld_h((v8u16*)src_argb1555, 0); >- src1 = (v8u16)__msa_ld_h((v8u16*)src_argb1555, 16); >+ src0 = (v8u16)__msa_ld_h((void*)src_argb1555, 0); >+ src1 = (v8u16)__msa_ld_h((void*)src_argb1555, 16); > vec0 = src0 & const_0x1F; > vec1 = src1 & const_0x1F; > src0 = (v8u16)__msa_srli_h((v8i16)src0, 5); >@@ -1566,8 +1566,8 @@ void RGB565ToARGBRow_MSA(const uint8_t* src_rgb565, > v8u16 const_0xF800 = (v8u16)__msa_fill_h(0xF800); > > for (x = 0; x < width; x += 16) { >- src0 = (v8u16)__msa_ld_h((v8u16*)src_rgb565, 0); >- src1 = (v8u16)__msa_ld_h((v8u16*)src_rgb565, 16); >+ src0 = (v8u16)__msa_ld_h((void*)src_rgb565, 0); >+ src1 = (v8u16)__msa_ld_h((void*)src_rgb565, 16); > vec0 = src0 & const_0x1F; > vec1 = src0 & const_0x7E0; > vec2 = src0 & const_0xF800; >@@ -1611,9 +1611,9 @@ void RGB24ToARGBRow_MSA(const uint8_t* src_rgb24, > v16i8 shuffler = {0, 1, 2, 16, 3, 4, 5, 17, 6, 7, 8, 18, 9, 10, 11, 19}; > > for (x = 0; x < width; x += 16) { >- src0 = (v16u8)__msa_ld_b((v16i8*)src_rgb24, 0); >- src1 = (v16u8)__msa_ld_b((v16i8*)src_rgb24, 16); >- src2 = (v16u8)__msa_ld_b((v16i8*)src_rgb24, 32); >+ src0 = (v16u8)__msa_ld_b((void*)src_rgb24, 0); >+ src1 = (v16u8)__msa_ld_b((void*)src_rgb24, 16); >+ src2 = (v16u8)__msa_ld_b((void*)src_rgb24, 32); > vec0 = (v16u8)__msa_sldi_b((v16i8)src1, (v16i8)src0, 12); > vec1 = (v16u8)__msa_sldi_b((v16i8)src2, (v16i8)src1, 8); > vec2 = (v16u8)__msa_sldi_b((v16i8)src2, (v16i8)src2, 4); >@@ -1636,9 +1636,9 @@ void RAWToARGBRow_MSA(const uint8_t* src_raw, uint8_t* dst_argb, int width) { > v16i8 mask = {2, 1, 0, 16, 5, 4, 3, 17, 8, 7, 6, 18, 11, 10, 9, 19}; > > for (x = 0; x < width; x += 16) { >- src0 = (v16u8)__msa_ld_b((v16i8*)src_raw, 0); >- src1 = (v16u8)__msa_ld_b((v16i8*)src_raw, 16); >- src2 = (v16u8)__msa_ld_b((v16i8*)src_raw, 32); >+ src0 = (v16u8)__msa_ld_b((void*)src_raw, 0); >+ src1 = (v16u8)__msa_ld_b((void*)src_raw, 16); >+ src2 = (v16u8)__msa_ld_b((void*)src_raw, 32); > vec0 = (v16u8)__msa_sldi_b((v16i8)src1, (v16i8)src0, 12); > vec1 = (v16u8)__msa_sldi_b((v16i8)src2, (v16i8)src1, 8); > vec2 = (v16u8)__msa_sldi_b((v16i8)src2, (v16i8)src2, 4); >@@ -1666,8 +1666,8 @@ void ARGB1555ToYRow_MSA(const uint8_t* src_argb1555, > v8u16 const_0x1080 = (v8u16)__msa_fill_h(0x1080); > > for (x = 0; x < width; x += 16) { >- src0 = (v8u16)__msa_ld_b((v8i16*)src_argb1555, 0); >- src1 = (v8u16)__msa_ld_b((v8i16*)src_argb1555, 16); >+ src0 = (v8u16)__msa_ld_b((void*)src_argb1555, 0); >+ src1 = (v8u16)__msa_ld_b((void*)src_argb1555, 16); > vec0 = src0 & const_0x1F; > vec1 = src1 & const_0x1F; > src0 = (v8u16)__msa_srai_h((v8i16)src0, 5); >@@ -1725,8 +1725,8 @@ void RGB565ToYRow_MSA(const uint8_t* src_rgb565, uint8_t* dst_y, int width) { > v8u16 const_0xF800 = (v8u16)__msa_fill_h(0xF800); > > for (x = 0; x < width; x += 16) { >- src0 = (v8u16)__msa_ld_b((v8i16*)src_rgb565, 0); >- src1 = (v8u16)__msa_ld_b((v8i16*)src_rgb565, 16); >+ src0 = (v8u16)__msa_ld_b((void*)src_rgb565, 0); >+ src1 = (v8u16)__msa_ld_b((void*)src_rgb565, 16); > vec0 = src0 & const_0x1F; > vec1 = src0 & const_0x7E0; > vec2 = src0 & const_0xF800; >@@ -1789,9 +1789,9 @@ void RGB24ToYRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width) { > v16i8 zero = {0}; > > for (x = 0; x < width; x += 16) { >- src0 = (v16u8)__msa_ld_b((v16i8*)src_argb0, 0); >- src1 = (v16u8)__msa_ld_b((v16i8*)src_argb0, 16); >- src2 = (v16u8)__msa_ld_b((v16i8*)src_argb0, 32); >+ src0 = (v16u8)__msa_ld_b((void*)src_argb0, 0); >+ src1 = (v16u8)__msa_ld_b((void*)src_argb0, 16); >+ src2 = (v16u8)__msa_ld_b((void*)src_argb0, 32); > reg0 = (v16u8)__msa_vshf_b(mask0, zero, (v16i8)src0); > reg1 = (v16u8)__msa_vshf_b(mask1, (v16i8)src1, (v16i8)src0); > reg2 = (v16u8)__msa_vshf_b(mask2, (v16i8)src2, (v16i8)src1); >@@ -1830,9 +1830,9 @@ void RAWToYRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width) { > v16i8 zero = {0}; > > for (x = 0; x < width; x += 16) { >- src0 = (v16u8)__msa_ld_b((v16i8*)src_argb0, 0); >- src1 = (v16u8)__msa_ld_b((v16i8*)src_argb0, 16); >- src2 = (v16u8)__msa_ld_b((v16i8*)src_argb0, 32); >+ src0 = (v16u8)__msa_ld_b((void*)src_argb0, 0); >+ src1 = (v16u8)__msa_ld_b((void*)src_argb0, 16); >+ src2 = (v16u8)__msa_ld_b((void*)src_argb0, 32); > reg0 = (v16u8)__msa_vshf_b(mask0, zero, (v16i8)src0); > reg1 = (v16u8)__msa_vshf_b(mask1, (v16i8)src1, (v16i8)src0); > reg2 = (v16u8)__msa_vshf_b(mask2, (v16i8)src2, (v16i8)src1); >@@ -1877,10 +1877,10 @@ void ARGB1555ToUVRow_MSA(const uint8_t* src_argb1555, > v8u16 const_0x1F = (v8u16)__msa_ldi_h(0x1F); > > for (x = 0; x < width; x += 16) { >- src0 = (v8u16)__msa_ld_b((v8i16*)s, 0); >- src1 = (v8u16)__msa_ld_b((v8i16*)s, 16); >- src2 = (v8u16)__msa_ld_b((v8i16*)t, 0); >- src3 = (v8u16)__msa_ld_b((v8i16*)t, 16); >+ src0 = (v8u16)__msa_ld_b((void*)s, 0); >+ src1 = (v8u16)__msa_ld_b((void*)s, 16); >+ src2 = (v8u16)__msa_ld_b((void*)t, 0); >+ src3 = (v8u16)__msa_ld_b((void*)t, 16); > vec0 = src0 & const_0x1F; > vec1 = src1 & const_0x1F; > vec0 += src2 & const_0x1F; >@@ -1959,10 +1959,10 @@ void RGB565ToUVRow_MSA(const uint8_t* src_rgb565, > v8u16 const_0x3F = (v8u16)__msa_fill_h(0x3F); > > for (x = 0; x < width; x += 16) { >- src0 = (v8u16)__msa_ld_b((v8i16*)s, 0); >- src1 = (v8u16)__msa_ld_b((v8i16*)s, 16); >- src2 = (v8u16)__msa_ld_b((v8i16*)t, 0); >- src3 = (v8u16)__msa_ld_b((v8i16*)t, 16); >+ src0 = (v8u16)__msa_ld_b((void*)s, 0); >+ src1 = (v8u16)__msa_ld_b((void*)s, 16); >+ src2 = (v8u16)__msa_ld_b((void*)t, 0); >+ src3 = (v8u16)__msa_ld_b((void*)t, 16); > vec0 = src0 & const_0x1F; > vec1 = src1 & const_0x1F; > vec0 += src2 & const_0x1F; >@@ -2041,12 +2041,12 @@ void RGB24ToUVRow_MSA(const uint8_t* src_rgb0, > v16i8 zero = {0}; > > for (x = 0; x < width; x += 16) { >- inp0 = (v16u8)__msa_ld_b((v16i8*)s, 0); >- inp1 = (v16u8)__msa_ld_b((v16i8*)s, 16); >- inp2 = (v16u8)__msa_ld_b((v16i8*)s, 32); >- inp3 = (v16u8)__msa_ld_b((v16i8*)t, 0); >- inp4 = (v16u8)__msa_ld_b((v16i8*)t, 16); >- inp5 = (v16u8)__msa_ld_b((v16i8*)t, 32); >+ inp0 = (v16u8)__msa_ld_b((void*)s, 0); >+ inp1 = (v16u8)__msa_ld_b((void*)s, 16); >+ inp2 = (v16u8)__msa_ld_b((void*)s, 32); >+ inp3 = (v16u8)__msa_ld_b((void*)t, 0); >+ inp4 = (v16u8)__msa_ld_b((void*)t, 16); >+ inp5 = (v16u8)__msa_ld_b((void*)t, 32); > src1 = (v16u8)__msa_sldi_b((v16i8)inp1, (v16i8)inp0, 12); > src5 = (v16u8)__msa_sldi_b((v16i8)inp4, (v16i8)inp3, 12); > src2 = (v16u8)__msa_sldi_b((v16i8)inp2, (v16i8)inp1, 8); >@@ -2146,12 +2146,12 @@ void RAWToUVRow_MSA(const uint8_t* src_rgb0, > v16i8 zero = {0}; > > for (x = 0; x < width; x += 16) { >- inp0 = (v16u8)__msa_ld_b((v16i8*)s, 0); >- inp1 = (v16u8)__msa_ld_b((v16i8*)s, 16); >- inp2 = (v16u8)__msa_ld_b((v16i8*)s, 32); >- inp3 = (v16u8)__msa_ld_b((v16i8*)t, 0); >- inp4 = (v16u8)__msa_ld_b((v16i8*)t, 16); >- inp5 = (v16u8)__msa_ld_b((v16i8*)t, 32); >+ inp0 = (v16u8)__msa_ld_b((void*)s, 0); >+ inp1 = (v16u8)__msa_ld_b((void*)s, 16); >+ inp2 = (v16u8)__msa_ld_b((void*)s, 32); >+ inp3 = (v16u8)__msa_ld_b((void*)t, 0); >+ inp4 = (v16u8)__msa_ld_b((void*)t, 16); >+ inp5 = (v16u8)__msa_ld_b((void*)t, 32); > src1 = (v16u8)__msa_sldi_b((v16i8)inp1, (v16i8)inp0, 12); > src5 = (v16u8)__msa_sldi_b((v16i8)inp4, (v16i8)inp3, 12); > src2 = (v16u8)__msa_sldi_b((v16i8)inp2, (v16i8)inp1, 8); >@@ -2353,8 +2353,8 @@ void SobelRow_MSA(const uint8_t* src_sobelx, > v16u8 alpha = (v16u8)__msa_ldi_b(ALPHA_VAL); > > for (x = 0; x < width; x += 16) { >- src0 = (v16u8)__msa_ld_b((v16i8*)src_sobelx, 0); >- src1 = (v16u8)__msa_ld_b((v16i8*)src_sobely, 0); >+ src0 = (v16u8)__msa_ld_b((void*)src_sobelx, 0); >+ src1 = (v16u8)__msa_ld_b((void*)src_sobely, 0); > vec0 = __msa_adds_u_b(src0, src1); > dst0 = (v16u8)__msa_vshf_b(mask0, (v16i8)alpha, (v16i8)vec0); > dst1 = (v16u8)__msa_vshf_b(mask1, (v16i8)alpha, (v16i8)vec0); >@@ -2375,10 +2375,10 @@ void SobelToPlaneRow_MSA(const uint8_t* src_sobelx, > v16u8 src0, src1, src2, src3, dst0, dst1; > > for (x = 0; x < width; x += 32) { >- src0 = (v16u8)__msa_ld_b((v16i8*)src_sobelx, 0); >- src1 = (v16u8)__msa_ld_b((v16i8*)src_sobelx, 16); >- src2 = (v16u8)__msa_ld_b((v16i8*)src_sobely, 0); >- src3 = (v16u8)__msa_ld_b((v16i8*)src_sobely, 16); >+ src0 = (v16u8)__msa_ld_b((void*)src_sobelx, 0); >+ src1 = (v16u8)__msa_ld_b((void*)src_sobelx, 16); >+ src2 = (v16u8)__msa_ld_b((void*)src_sobely, 0); >+ src3 = (v16u8)__msa_ld_b((void*)src_sobely, 16); > dst0 = __msa_adds_u_b(src0, src2); > dst1 = __msa_adds_u_b(src1, src3); > ST_UB2(dst0, dst1, dst_y, 16); >@@ -2398,8 +2398,8 @@ void SobelXYRow_MSA(const uint8_t* src_sobelx, > v16u8 alpha = (v16u8)__msa_ldi_b(ALPHA_VAL); > > for (x = 0; x < width; x += 16) { >- src0 = (v16u8)__msa_ld_b((v16i8*)src_sobelx, 0); >- src1 = (v16u8)__msa_ld_b((v16i8*)src_sobely, 0); >+ src0 = (v16u8)__msa_ld_b((void*)src_sobelx, 0); >+ src1 = (v16u8)__msa_ld_b((void*)src_sobely, 0); > vec0 = __msa_adds_u_b(src0, src1); > vec1 = (v16u8)__msa_ilvr_b((v16i8)src0, (v16i8)src1); > vec2 = (v16u8)__msa_ilvl_b((v16i8)src0, (v16i8)src1); >@@ -2424,10 +2424,10 @@ void ARGBToYJRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width) { > v8u16 const_0x40 = (v8u16)__msa_fill_h(0x40); > > for (x = 0; x < width; x += 16) { >- src0 = (v16u8)__msa_ld_b((v16i8*)src_argb0, 0); >- src1 = (v16u8)__msa_ld_b((v16i8*)src_argb0, 16); >- src2 = (v16u8)__msa_ld_b((v16i8*)src_argb0, 32); >- src3 = (v16u8)__msa_ld_b((v16i8*)src_argb0, 48); >+ src0 = (v16u8)__msa_ld_b((void*)src_argb0, 0); >+ src1 = (v16u8)__msa_ld_b((void*)src_argb0, 16); >+ src2 = (v16u8)__msa_ld_b((void*)src_argb0, 32); >+ src3 = (v16u8)__msa_ld_b((void*)src_argb0, 48); > ARGBTOY(src0, src1, src2, src3, const_0x4B0F, const_0x26, const_0x40, 7, > dst0); > ST_UB(dst0, dst_y); >@@ -2444,10 +2444,10 @@ void BGRAToYRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width) { > v8u16 const_0x1080 = (v8u16)__msa_fill_h(0x1080); > > for (x = 0; x < width; x += 16) { >- src0 = (v16u8)__msa_ld_b((v16i8*)src_argb0, 0); >- src1 = (v16u8)__msa_ld_b((v16i8*)src_argb0, 16); >- src2 = (v16u8)__msa_ld_b((v16i8*)src_argb0, 32); >- src3 = (v16u8)__msa_ld_b((v16i8*)src_argb0, 48); >+ src0 = (v16u8)__msa_ld_b((void*)src_argb0, 0); >+ src1 = (v16u8)__msa_ld_b((void*)src_argb0, 16); >+ src2 = (v16u8)__msa_ld_b((void*)src_argb0, 32); >+ src3 = (v16u8)__msa_ld_b((void*)src_argb0, 48); > ARGBTOY(src0, src1, src2, src3, const_0x4200, const_0x1981, const_0x1080, 8, > dst0); > ST_UB(dst0, dst_y); >@@ -2464,10 +2464,10 @@ void ABGRToYRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width) { > v8u16 const_0x1080 = (v8u16)__msa_fill_h(0x1080); > > for (x = 0; x < width; x += 16) { >- src0 = (v16u8)__msa_ld_b((v16i8*)src_argb0, 0); >- src1 = (v16u8)__msa_ld_b((v16i8*)src_argb0, 16); >- src2 = (v16u8)__msa_ld_b((v16i8*)src_argb0, 32); >- src3 = (v16u8)__msa_ld_b((v16i8*)src_argb0, 48); >+ src0 = (v16u8)__msa_ld_b((void*)src_argb0, 0); >+ src1 = (v16u8)__msa_ld_b((void*)src_argb0, 16); >+ src2 = (v16u8)__msa_ld_b((void*)src_argb0, 32); >+ src3 = (v16u8)__msa_ld_b((void*)src_argb0, 48); > ARGBTOY(src0, src1, src2, src3, const_0x8142, const_0x19, const_0x1080, 8, > dst0); > ST_UB(dst0, dst_y); >@@ -2484,10 +2484,10 @@ void RGBAToYRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width) { > v8u16 const_0x1080 = (v8u16)__msa_fill_h(0x1080); > > for (x = 0; x < width; x += 16) { >- src0 = (v16u8)__msa_ld_b((v16i8*)src_argb0, 0); >- src1 = (v16u8)__msa_ld_b((v16i8*)src_argb0, 16); >- src2 = (v16u8)__msa_ld_b((v16i8*)src_argb0, 32); >- src3 = (v16u8)__msa_ld_b((v16i8*)src_argb0, 48); >+ src0 = (v16u8)__msa_ld_b((void*)src_argb0, 0); >+ src1 = (v16u8)__msa_ld_b((void*)src_argb0, 16); >+ src2 = (v16u8)__msa_ld_b((void*)src_argb0, 32); >+ src3 = (v16u8)__msa_ld_b((void*)src_argb0, 48); > ARGBTOY(src0, src1, src2, src3, const_0x1900, const_0x4281, const_0x1080, 8, > dst0); > ST_UB(dst0, dst_y); >@@ -2518,14 +2518,14 @@ void ARGBToUVJRow_MSA(const uint8_t* src_rgb0, > v8u16 const_0x8080 = (v8u16)__msa_fill_h(0x8080); > > for (x = 0; x < width; x += 32) { >- src0 = (v16u8)__msa_ld_b((v16i8*)s, 0); >- src1 = (v16u8)__msa_ld_b((v16i8*)s, 16); >- src2 = (v16u8)__msa_ld_b((v16i8*)s, 32); >- src3 = (v16u8)__msa_ld_b((v16i8*)s, 48); >- src4 = (v16u8)__msa_ld_b((v16i8*)t, 0); >- src5 = (v16u8)__msa_ld_b((v16i8*)t, 16); >- src6 = (v16u8)__msa_ld_b((v16i8*)t, 32); >- src7 = (v16u8)__msa_ld_b((v16i8*)t, 48); >+ src0 = (v16u8)__msa_ld_b((void*)s, 0); >+ src1 = (v16u8)__msa_ld_b((void*)s, 16); >+ src2 = (v16u8)__msa_ld_b((void*)s, 32); >+ src3 = (v16u8)__msa_ld_b((void*)s, 48); >+ src4 = (v16u8)__msa_ld_b((void*)t, 0); >+ src5 = (v16u8)__msa_ld_b((void*)t, 16); >+ src6 = (v16u8)__msa_ld_b((void*)t, 32); >+ src7 = (v16u8)__msa_ld_b((void*)t, 48); > src0 = __msa_aver_u_b(src0, src4); > src1 = __msa_aver_u_b(src1, src5); > src2 = __msa_aver_u_b(src2, src6); >@@ -2536,14 +2536,14 @@ void ARGBToUVJRow_MSA(const uint8_t* src_rgb0, > src7 = (v16u8)__msa_pckod_w((v4i32)src3, (v4i32)src2); > vec0 = __msa_aver_u_b(src4, src6); > vec1 = __msa_aver_u_b(src5, src7); >- src0 = (v16u8)__msa_ld_b((v16i8*)s, 64); >- src1 = (v16u8)__msa_ld_b((v16i8*)s, 80); >- src2 = (v16u8)__msa_ld_b((v16i8*)s, 96); >- src3 = (v16u8)__msa_ld_b((v16i8*)s, 112); >- src4 = (v16u8)__msa_ld_b((v16i8*)t, 64); >- src5 = (v16u8)__msa_ld_b((v16i8*)t, 80); >- src6 = (v16u8)__msa_ld_b((v16i8*)t, 96); >- src7 = (v16u8)__msa_ld_b((v16i8*)t, 112); >+ src0 = (v16u8)__msa_ld_b((void*)s, 64); >+ src1 = (v16u8)__msa_ld_b((void*)s, 80); >+ src2 = (v16u8)__msa_ld_b((void*)s, 96); >+ src3 = (v16u8)__msa_ld_b((void*)s, 112); >+ src4 = (v16u8)__msa_ld_b((void*)t, 64); >+ src5 = (v16u8)__msa_ld_b((void*)t, 80); >+ src6 = (v16u8)__msa_ld_b((void*)t, 96); >+ src7 = (v16u8)__msa_ld_b((void*)t, 112); > src0 = __msa_aver_u_b(src0, src4); > src1 = __msa_aver_u_b(src1, src5); > src2 = __msa_aver_u_b(src2, src6); >@@ -2746,7 +2746,7 @@ void I400ToARGBRow_MSA(const uint8_t* src_y, uint8_t* dst_argb, int width) { > v8i16 zero = {0}; > > for (x = 0; x < width; x += 16) { >- src0 = (v16u8)__msa_ld_b((v16i8*)src_y, 0); >+ src0 = (v16u8)__msa_ld_b((void*)src_y, 0); > vec0 = (v8i16)__msa_ilvr_b((v16i8)src0, (v16i8)src0); > vec1 = (v8i16)__msa_ilvl_b((v16i8)src0, (v16i8)src0); > reg0 = (v4i32)__msa_ilvr_h(zero, vec0); >@@ -2792,7 +2792,7 @@ void J400ToARGBRow_MSA(const uint8_t* src_y, uint8_t* dst_argb, int width) { > v16u8 alpha = (v16u8)__msa_ldi_b(ALPHA_VAL); > > for (x = 0; x < width; x += 16) { >- src0 = (v16u8)__msa_ld_b((v16i8*)src_y, 0); >+ src0 = (v16u8)__msa_ld_b((void*)src_y, 0); > vec0 = (v16u8)__msa_ilvr_b((v16i8)src0, (v16i8)src0); > vec1 = (v16u8)__msa_ilvl_b((v16i8)src0, (v16i8)src0); > vec2 = (v16u8)__msa_ilvr_b((v16i8)alpha, (v16i8)src0); >@@ -2824,7 +2824,7 @@ void YUY2ToARGBRow_MSA(const uint8_t* src_yuy2, > vec_ugvg = (v4i32)__msa_ilvev_h((v8i16)vec_vg, (v8i16)vec_ug); > > for (x = 0; x < width; x += 8) { >- src0 = (v16u8)__msa_ld_b((v16i8*)src_yuy2, 0); >+ src0 = (v16u8)__msa_ld_b((void*)src_yuy2, 0); > src1 = (v16u8)__msa_pckev_b((v16i8)src0, (v16i8)src0); > src2 = (v16u8)__msa_pckod_b((v16i8)src0, (v16i8)src0); > YUVTORGB(src1, src2, vec_ubvr, vec_ugvg, vec_bb, vec_bg, vec_br, vec_yg, >@@ -2852,7 +2852,7 @@ void UYVYToARGBRow_MSA(const uint8_t* src_uyvy, > vec_ugvg = (v4i32)__msa_ilvev_h((v8i16)vec_vg, (v8i16)vec_ug); > > for (x = 0; x < width; x += 8) { >- src0 = (v16u8)__msa_ld_b((v16i8*)src_uyvy, 0); >+ src0 = (v16u8)__msa_ld_b((void*)src_uyvy, 0); > src1 = (v16u8)__msa_pckod_b((v16i8)src0, (v16i8)src0); > src2 = (v16u8)__msa_pckev_b((v16i8)src0, (v16i8)src0); > YUVTORGB(src1, src2, vec_ubvr, vec_ugvg, vec_bb, vec_bg, vec_br, vec_yg, >@@ -2884,10 +2884,10 @@ void InterpolateRow_MSA(uint8_t* dst_ptr, > > if (128 == y1_fraction) { > for (x = 0; x < width; x += 32) { >- src0 = (v16u8)__msa_ld_b((v16i8*)s, 0); >- src1 = (v16u8)__msa_ld_b((v16i8*)s, 16); >- src2 = (v16u8)__msa_ld_b((v16i8*)t, 0); >- src3 = (v16u8)__msa_ld_b((v16i8*)t, 16); >+ src0 = (v16u8)__msa_ld_b((void*)s, 0); >+ src1 = (v16u8)__msa_ld_b((void*)s, 16); >+ src2 = (v16u8)__msa_ld_b((void*)t, 0); >+ src3 = (v16u8)__msa_ld_b((void*)t, 16); > dst0 = __msa_aver_u_b(src0, src2); > dst1 = __msa_aver_u_b(src1, src3); > ST_UB2(dst0, dst1, dst_ptr, 16); >@@ -2902,10 +2902,10 @@ void InterpolateRow_MSA(uint8_t* dst_ptr, > y_frac = (v8u16)__msa_fill_h(y_fractions); > > for (x = 0; x < width; x += 32) { >- src0 = (v16u8)__msa_ld_b((v16i8*)s, 0); >- src1 = (v16u8)__msa_ld_b((v16i8*)s, 16); >- src2 = (v16u8)__msa_ld_b((v16i8*)t, 0); >- src3 = (v16u8)__msa_ld_b((v16i8*)t, 16); >+ src0 = (v16u8)__msa_ld_b((void*)s, 0); >+ src1 = (v16u8)__msa_ld_b((void*)s, 16); >+ src2 = (v16u8)__msa_ld_b((void*)t, 0); >+ src3 = (v16u8)__msa_ld_b((void*)t, 16); > vec0 = (v8u16)__msa_ilvr_b((v16i8)src2, (v16i8)src0); > vec1 = (v8u16)__msa_ilvl_b((v16i8)src2, (v16i8)src0); > vec2 = (v8u16)__msa_ilvr_b((v16i8)src3, (v16i8)src1); >@@ -2947,9 +2947,9 @@ void RAWToRGB24Row_MSA(const uint8_t* src_raw, uint8_t* dst_rgb24, int width) { > 24, 23, 28, 27, 26, 31, 30, 29}; > > for (x = 0; x < width; x += 16) { >- src0 = (v16u8)__msa_ld_b((v16i8*)src_raw, 0); >- src1 = (v16u8)__msa_ld_b((v16i8*)src_raw, 16); >- src2 = (v16u8)__msa_ld_b((v16i8*)src_raw, 32); >+ src0 = (v16u8)__msa_ld_b((void*)src_raw, 0); >+ src1 = (v16u8)__msa_ld_b((void*)src_raw, 16); >+ src2 = (v16u8)__msa_ld_b((void*)src_raw, 32); > src3 = (v16u8)__msa_sldi_b((v16i8)src1, (v16i8)src0, 8); > src4 = (v16u8)__msa_sldi_b((v16i8)src2, (v16i8)src1, 8); > dst0 = (v16u8)__msa_vshf_b(shuffler0, (v16i8)src1, (v16i8)src0); >@@ -2970,8 +2970,8 @@ void MergeUVRow_MSA(const uint8_t* src_u, > v16u8 src0, src1, dst0, dst1; > > for (x = 0; x < width; x += 16) { >- src0 = (v16u8)__msa_ld_b((v16i8*)src_u, 0); >- src1 = (v16u8)__msa_ld_b((v16i8*)src_v, 0); >+ src0 = (v16u8)__msa_ld_b((void*)src_u, 0); >+ src1 = (v16u8)__msa_ld_b((void*)src_v, 0); > dst0 = (v16u8)__msa_ilvr_b((v16i8)src1, (v16i8)src0); > dst1 = (v16u8)__msa_ilvl_b((v16i8)src1, (v16i8)src0); > ST_UB2(dst0, dst1, dst_uv, 16); >@@ -2988,10 +2988,10 @@ void ARGBExtractAlphaRow_MSA(const uint8_t* src_argb, > v16u8 src0, src1, src2, src3, vec0, vec1, dst0; > > for (i = 0; i < width; i += 16) { >- src0 = (v16u8)__msa_ld_b((v16i8*)src_argb, 0); >- src1 = (v16u8)__msa_ld_b((v16i8*)src_argb, 16); >- src2 = (v16u8)__msa_ld_b((v16i8*)src_argb, 32); >- src3 = (v16u8)__msa_ld_b((v16i8*)src_argb, 48); >+ src0 = (v16u8)__msa_ld_b((void*)src_argb, 0); >+ src1 = (v16u8)__msa_ld_b((void*)src_argb, 16); >+ src2 = (v16u8)__msa_ld_b((void*)src_argb, 32); >+ src3 = (v16u8)__msa_ld_b((void*)src_argb, 48); > vec0 = (v16u8)__msa_pckod_b((v16i8)src1, (v16i8)src0); > vec1 = (v16u8)__msa_pckod_b((v16i8)src3, (v16i8)src2); > dst0 = (v16u8)__msa_pckod_b((v16i8)vec1, (v16i8)vec0); >@@ -3015,10 +3015,10 @@ void ARGBBlendRow_MSA(const uint8_t* src_argb0, > v16i8 zero = {0}; > > for (x = 0; x < width; x += 8) { >- src0 = (v16u8)__msa_ld_b((v16i8*)src_argb0, 0); >- src1 = (v16u8)__msa_ld_b((v16i8*)src_argb0, 16); >- src2 = (v16u8)__msa_ld_b((v16i8*)src_argb1, 0); >- src3 = (v16u8)__msa_ld_b((v16i8*)src_argb1, 16); >+ src0 = (v16u8)__msa_ld_b((void*)src_argb0, 0); >+ src1 = (v16u8)__msa_ld_b((void*)src_argb0, 16); >+ src2 = (v16u8)__msa_ld_b((void*)src_argb1, 0); >+ src3 = (v16u8)__msa_ld_b((void*)src_argb1, 16); > vec0 = (v8u16)__msa_ilvr_b(zero, (v16i8)src0); > vec1 = (v8u16)__msa_ilvl_b(zero, (v16i8)src0); > vec2 = (v8u16)__msa_ilvr_b(zero, (v16i8)src1); >@@ -3083,10 +3083,10 @@ void ARGBQuantizeRow_MSA(uint8_t* dst_argb, > v16i8 zero = {0}; > > for (x = 0; x < width; x += 8) { >- src0 = (v16u8)__msa_ld_b((v16i8*)dst_argb, 0); >- src1 = (v16u8)__msa_ld_b((v16i8*)dst_argb, 16); >- src2 = (v16u8)__msa_ld_b((v16i8*)dst_argb, 32); >- src3 = (v16u8)__msa_ld_b((v16i8*)dst_argb, 48); >+ src0 = (v16u8)__msa_ld_b((void*)dst_argb, 0); >+ src1 = (v16u8)__msa_ld_b((void*)dst_argb, 16); >+ src2 = (v16u8)__msa_ld_b((void*)dst_argb, 32); >+ src3 = (v16u8)__msa_ld_b((void*)dst_argb, 48); > vec0 = (v8i16)__msa_ilvr_b(zero, (v16i8)src0); > vec1 = (v8i16)__msa_ilvl_b(zero, (v16i8)src0); > vec2 = (v8i16)__msa_ilvr_b(zero, (v16i8)src1); >@@ -3186,13 +3186,13 @@ void ARGBColorMatrixRow_MSA(const uint8_t* src_argb, > v16i8 zero = {0}; > v8i16 max = __msa_ldi_h(255); > >- src0 = __msa_ld_b((v16i8*)matrix_argb, 0); >+ src0 = __msa_ld_b((void*)matrix_argb, 0); > vec0 = (v8i16)__msa_ilvr_b(zero, src0); > vec1 = (v8i16)__msa_ilvl_b(zero, src0); > > for (x = 0; x < width; x += 8) { >- src1 = (v16u8)__msa_ld_b((v16i8*)src_argb, 0); >- src2 = (v16u8)__msa_ld_b((v16i8*)src_argb, 16); >+ src1 = (v16u8)__msa_ld_b((void*)src_argb, 0); >+ src2 = (v16u8)__msa_ld_b((void*)src_argb, 16); > vec2 = (v8i16)__msa_ilvr_b(zero, (v16i8)src1); > vec3 = (v8i16)__msa_ilvl_b(zero, (v16i8)src1); > vec4 = (v8i16)__msa_ilvr_b(zero, (v16i8)src2); >@@ -3289,10 +3289,10 @@ void SplitUVRow_MSA(const uint8_t* src_uv, > v16u8 src0, src1, src2, src3, dst0, dst1, dst2, dst3; > > for (x = 0; x < width; x += 32) { >- src0 = (v16u8)__msa_ld_b((v16i8*)src_uv, 0); >- src1 = (v16u8)__msa_ld_b((v16i8*)src_uv, 16); >- src2 = (v16u8)__msa_ld_b((v16i8*)src_uv, 32); >- src3 = (v16u8)__msa_ld_b((v16i8*)src_uv, 48); >+ src0 = (v16u8)__msa_ld_b((void*)src_uv, 0); >+ src1 = (v16u8)__msa_ld_b((void*)src_uv, 16); >+ src2 = (v16u8)__msa_ld_b((void*)src_uv, 32); >+ src3 = (v16u8)__msa_ld_b((void*)src_uv, 48); > dst0 = (v16u8)__msa_pckev_b((v16i8)src1, (v16i8)src0); > dst1 = (v16u8)__msa_pckev_b((v16i8)src3, (v16i8)src2); > dst2 = (v16u8)__msa_pckod_b((v16i8)src1, (v16i8)src0); >@@ -3329,10 +3329,10 @@ void MirrorUVRow_MSA(const uint8_t* src_uv, > > for (x = 0; x < width; x += 32) { > src_uv -= 64; >- src2 = (v16u8)__msa_ld_b((v16i8*)src_uv, 0); >- src3 = (v16u8)__msa_ld_b((v16i8*)src_uv, 16); >- src0 = (v16u8)__msa_ld_b((v16i8*)src_uv, 32); >- src1 = (v16u8)__msa_ld_b((v16i8*)src_uv, 48); >+ src2 = (v16u8)__msa_ld_b((void*)src_uv, 0); >+ src3 = (v16u8)__msa_ld_b((void*)src_uv, 16); >+ src0 = (v16u8)__msa_ld_b((void*)src_uv, 32); >+ src1 = (v16u8)__msa_ld_b((void*)src_uv, 48); > dst0 = (v16u8)__msa_vshf_b(mask1, (v16i8)src1, (v16i8)src0); > dst1 = (v16u8)__msa_vshf_b(mask1, (v16i8)src3, (v16i8)src2); > dst2 = (v16u8)__msa_vshf_b(mask0, (v16i8)src1, (v16i8)src0); >@@ -3359,12 +3359,12 @@ void SobelXRow_MSA(const uint8_t* src_y0, > v8i16 max = __msa_ldi_h(255); > > for (x = 0; x < width; x += 16) { >- src0 = (v16u8)__msa_ld_b((v16i8*)src_y0, 0); >- src1 = (v16u8)__msa_ld_b((v16i8*)src_y0, 16); >- src2 = (v16u8)__msa_ld_b((v16i8*)src_y1, 0); >- src3 = (v16u8)__msa_ld_b((v16i8*)src_y1, 16); >- src4 = (v16u8)__msa_ld_b((v16i8*)src_y2, 0); >- src5 = (v16u8)__msa_ld_b((v16i8*)src_y2, 16); >+ src0 = (v16u8)__msa_ld_b((void*)src_y0, 0); >+ src1 = (v16u8)__msa_ld_b((void*)src_y0, 16); >+ src2 = (v16u8)__msa_ld_b((void*)src_y1, 0); >+ src3 = (v16u8)__msa_ld_b((void*)src_y1, 16); >+ src4 = (v16u8)__msa_ld_b((void*)src_y2, 0); >+ src5 = (v16u8)__msa_ld_b((void*)src_y2, 16); > vec0 = (v8i16)__msa_vshf_b(mask0, (v16i8)src1, (v16i8)src0); > vec1 = (v8i16)__msa_vshf_b(mask1, (v16i8)src1, (v16i8)src0); > vec2 = (v8i16)__msa_vshf_b(mask0, (v16i8)src3, (v16i8)src2); >@@ -3409,8 +3409,8 @@ void SobelYRow_MSA(const uint8_t* src_y0, > v8i16 max = __msa_ldi_h(255); > > for (x = 0; x < width; x += 16) { >- src0 = (v16u8)__msa_ld_b((v16i8*)src_y0, 0); >- src1 = (v16u8)__msa_ld_b((v16i8*)src_y1, 0); >+ src0 = (v16u8)__msa_ld_b((void*)src_y0, 0); >+ src1 = (v16u8)__msa_ld_b((void*)src_y1, 0); > vec0 = (v8i16)__msa_ilvr_b((v16i8)zero, (v16i8)src0); > vec1 = (v8i16)__msa_ilvl_b((v16i8)zero, (v16i8)src0); > vec2 = (v8i16)__msa_ilvr_b((v16i8)zero, (v16i8)src1); >@@ -3457,10 +3457,10 @@ void HalfFloatRow_MSA(const uint16_t* src, > mult_vec = (v4f32)__msa_splati_w((v4i32)mult_vec, 0); > > for (i = 0; i < width; i += 32) { >- src0 = (v8u16)__msa_ld_h((v8i16*)src, 0); >- src1 = (v8u16)__msa_ld_h((v8i16*)src, 16); >- src2 = (v8u16)__msa_ld_h((v8i16*)src, 32); >- src3 = (v8u16)__msa_ld_h((v8i16*)src, 48); >+ src0 = (v8u16)__msa_ld_h((void*)src, 0); >+ src1 = (v8u16)__msa_ld_h((void*)src, 16); >+ src2 = (v8u16)__msa_ld_h((void*)src, 32); >+ src3 = (v8u16)__msa_ld_h((void*)src, 48); > vec0 = (v4u32)__msa_ilvr_h(zero, (v8i16)src0); > vec1 = (v4u32)__msa_ilvl_h(zero, (v8i16)src0); > vec2 = (v4u32)__msa_ilvr_h(zero, (v8i16)src1); >diff --git a/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/source/row_neon.cc b/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/source/row_neon.cc >index 8b6c19520722988b59ba061e1b0ec5a9d889c66d..ff87e74c62c9f909b096d84ae53edf33bad3ffe0 100644 >--- a/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/source/row_neon.cc >+++ b/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/source/row_neon.cc >@@ -2604,12 +2604,15 @@ void SobelYRow_NEON(const uint8_t* src_y0, > ); > } > >+// %y passes a float as a scalar vector for vector * scalar multiply. >+// the regoster must be d0 to d15 and indexed with [0] or [1] to access >+// the float in the first or second float of the d-reg >+ > void HalfFloat1Row_NEON(const uint16_t* src, > uint16_t* dst, > float /*unused*/, > int width) { > asm volatile( >- "vdup.32 q0, %3 \n" > > "1: \n" > "vld1.8 {q1}, [%0]! \n" // load 8 shorts >@@ -2618,8 +2621,8 @@ void HalfFloat1Row_NEON(const uint16_t* src, > "vmovl.u16 q3, d3 \n" > "vcvt.f32.u32 q2, q2 \n" // 8 floats > "vcvt.f32.u32 q3, q3 \n" >- "vmul.f32 q2, q2, q0 \n" // adjust exponent >- "vmul.f32 q3, q3, q0 \n" >+ "vmul.f32 q2, q2, %y3 \n" // adjust exponent >+ "vmul.f32 q3, q3, %y3 \n" > "vqshrn.u32 d2, q2, #13 \n" // isolate halffloat > "vqshrn.u32 d3, q3, #13 \n" > "vst1.8 {q1}, [%1]! \n" >@@ -2627,17 +2630,15 @@ void HalfFloat1Row_NEON(const uint16_t* src, > : "+r"(src), // %0 > "+r"(dst), // %1 > "+r"(width) // %2 >- : "r"(1.9259299444e-34f) // %3 >- : "cc", "memory", "q0", "q1", "q2", "q3"); >+ : "w"(1.9259299444e-34f) // %3 >+ : "cc", "memory", "q1", "q2", "q3"); > } > >-// TODO(fbarchard): multiply by element. > void HalfFloatRow_NEON(const uint16_t* src, > uint16_t* dst, > float scale, > int width) { > asm volatile( >- "vdup.32 q0, %3 \n" > > "1: \n" > "vld1.8 {q1}, [%0]! \n" // load 8 shorts >@@ -2646,8 +2647,8 @@ void HalfFloatRow_NEON(const uint16_t* src, > "vmovl.u16 q3, d3 \n" > "vcvt.f32.u32 q2, q2 \n" // 8 floats > "vcvt.f32.u32 q3, q3 \n" >- "vmul.f32 q2, q2, q0 \n" // adjust exponent >- "vmul.f32 q3, q3, q0 \n" >+ "vmul.f32 q2, q2, %y3 \n" // adjust exponent >+ "vmul.f32 q3, q3, %y3 \n" > "vqshrn.u32 d2, q2, #13 \n" // isolate halffloat > "vqshrn.u32 d3, q3, #13 \n" > "vst1.8 {q1}, [%1]! \n" >@@ -2655,8 +2656,8 @@ void HalfFloatRow_NEON(const uint16_t* src, > : "+r"(src), // %0 > "+r"(dst), // %1 > "+r"(width) // %2 >- : "r"(scale * 1.9259299444e-34f) // %3 >- : "cc", "memory", "q0", "q1", "q2", "q3"); >+ : "w"(scale * 1.9259299444e-34f) // %3 >+ : "cc", "memory", "q1", "q2", "q3"); > } > > void ByteToFloatRow_NEON(const uint8_t* src, >@@ -2664,7 +2665,6 @@ void ByteToFloatRow_NEON(const uint8_t* src, > float scale, > int width) { > asm volatile( >- "vdup.32 q0, %3 \n" > > "1: \n" > "vld1.8 {d2}, [%0]! \n" // load 8 bytes >@@ -2674,15 +2674,15 @@ void ByteToFloatRow_NEON(const uint8_t* src, > "vmovl.u16 q3, d3 \n" > "vcvt.f32.u32 q2, q2 \n" // 8 floats > "vcvt.f32.u32 q3, q3 \n" >- "vmul.f32 q2, q2, d0[0] \n" // scale >- "vmul.f32 q3, q3, d0[0] \n" >+ "vmul.f32 q2, q2, %y3 \n" // scale >+ "vmul.f32 q3, q3, %y3 \n" > "vst1.8 {q2, q3}, [%1]! \n" // store 8 floats > "bgt 1b \n" > : "+r"(src), // %0 > "+r"(dst), // %1 > "+r"(width) // %2 >- : "r"(scale) // %3 >- : "cc", "memory", "q0", "q1", "q2", "q3"); >+ : "w"(scale) // %3 >+ : "cc", "memory", "q1", "q2", "q3"); > } > > #endif // !defined(LIBYUV_DISABLE_NEON) && defined(__ARM_NEON__).. >diff --git a/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/source/scale.cc b/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/source/scale.cc >index 2cfa1c6cb1c4b8a4be72bb590372aa7b54ccd7ee..a8db93fde48bb2ea0d9f44ccf61eea74767a9946 100644 >--- a/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/source/scale.cc >+++ b/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/source/scale.cc >@@ -118,6 +118,21 @@ static void ScalePlaneDown2(int src_width, > } > } > #endif >+#if defined(HAS_SCALEROWDOWN2_MMI) >+ if (TestCpuFlag(kCpuHasMMI)) { >+ ScaleRowDown2 = >+ filtering == kFilterNone >+ ? ScaleRowDown2_Any_MMI >+ : (filtering == kFilterLinear ? ScaleRowDown2Linear_Any_MMI >+ : ScaleRowDown2Box_Any_MMI); >+ if (IS_ALIGNED(dst_width, 8)) { >+ ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_MMI >+ : (filtering == kFilterLinear >+ ? ScaleRowDown2Linear_MMI >+ : ScaleRowDown2Box_MMI); >+ } >+ } >+#endif > > if (filtering == kFilterLinear) { > src_stride = 0; >@@ -169,6 +184,14 @@ static void ScalePlaneDown2_16(int src_width, > : ScaleRowDown2Box_16_SSE2); > } > #endif >+#if defined(HAS_SCALEROWDOWN2_16_MMI) >+ if (TestCpuFlag(kCpuHasMMI) && IS_ALIGNED(dst_width, 4)) { >+ ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_16_MMI >+ : (filtering == kFilterLinear >+ ? ScaleRowDown2Linear_16_MMI >+ : ScaleRowDown2Box_16_MMI); >+ } >+#endif > > if (filtering == kFilterLinear) { > src_stride = 0; >@@ -241,6 +264,15 @@ static void ScalePlaneDown4(int src_width, > } > } > #endif >+#if defined(HAS_SCALEROWDOWN4_MMI) >+ if (TestCpuFlag(kCpuHasMMI)) { >+ ScaleRowDown4 = >+ filtering ? ScaleRowDown4Box_Any_MMI : ScaleRowDown4_Any_MMI; >+ if (IS_ALIGNED(dst_width, 8)) { >+ ScaleRowDown4 = filtering ? ScaleRowDown4Box_MMI : ScaleRowDown4_MMI; >+ } >+ } >+#endif > > if (filtering == kFilterLinear) { > src_stride = 0; >@@ -284,6 +316,11 @@ static void ScalePlaneDown4_16(int src_width, > filtering ? ScaleRowDown4Box_16_SSE2 : ScaleRowDown4_16_SSE2; > } > #endif >+#if defined(HAS_SCALEROWDOWN4_16_MMI) >+ if (TestCpuFlag(kCpuHasMMI) && IS_ALIGNED(dst_width, 8)) { >+ ScaleRowDown4 = filtering ? ScaleRowDown4Box_16_MMI : ScaleRowDown4_16_MMI; >+ } >+#endif > > if (filtering == kFilterLinear) { > src_stride = 0; >@@ -849,6 +886,14 @@ static void ScalePlaneBox(int src_width, > } > } > #endif >+#if defined(HAS_SCALEADDROW_MMI) >+ if (TestCpuFlag(kCpuHasMMI)) { >+ ScaleAddRow = ScaleAddRow_Any_MMI; >+ if (IS_ALIGNED(src_width, 8)) { >+ ScaleAddRow = ScaleAddRow_MMI; >+ } >+ } >+#endif > > for (j = 0; j < dst_height; ++j) { > int boxheight; >@@ -904,6 +949,11 @@ static void ScalePlaneBox_16(int src_width, > } > #endif > >+#if defined(HAS_SCALEADDROW_16_MMI) >+ if (TestCpuFlag(kCpuHasMMI) && IS_ALIGNED(src_width, 4)) { >+ ScaleAddRow = ScaleAddRow_16_MMI; >+ } >+#endif > for (j = 0; j < dst_height; ++j) { > int boxheight; > int iy = y >> 16; >@@ -988,6 +1038,14 @@ void ScalePlaneBilinearDown(int src_width, > } > } > #endif >+#if defined(HAS_INTERPOLATEROW_MMI) >+ if (TestCpuFlag(kCpuHasMMI)) { >+ InterpolateRow = InterpolateRow_Any_MMI; >+ if (IS_ALIGNED(src_width, 16)) { >+ InterpolateRow = InterpolateRow_MMI; >+ } >+ } >+#endif > > #if defined(HAS_SCALEFILTERCOLS_SSSE3) > if (TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) { >@@ -1206,6 +1264,11 @@ void ScalePlaneBilinearUp(int src_width, > if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) { > ScaleFilterCols = ScaleColsUp2_SSE2; > } >+#endif >+#if defined(HAS_SCALECOLS_MMI) >+ if (TestCpuFlag(kCpuHasMMI) && IS_ALIGNED(dst_width, 8)) { >+ ScaleFilterCols = ScaleColsUp2_MMI; >+ } > #endif > } > >@@ -1333,6 +1396,11 @@ void ScalePlaneBilinearUp_16(int src_width, > if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) { > ScaleFilterCols = ScaleColsUp2_16_SSE2; > } >+#endif >+#if defined(HAS_SCALECOLS_16_MMI) >+ if (TestCpuFlag(kCpuHasMMI) && IS_ALIGNED(dst_width, 8)) { >+ ScaleFilterCols = ScaleColsUp2_16_MMI; >+ } > #endif > } > >@@ -1418,6 +1486,11 @@ static void ScalePlaneSimple(int src_width, > if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) { > ScaleCols = ScaleColsUp2_SSE2; > } >+#endif >+#if defined(HAS_SCALECOLS_MMI) >+ if (TestCpuFlag(kCpuHasMMI) && IS_ALIGNED(dst_width, 8)) { >+ ScaleCols = ScaleColsUp2_MMI; >+ } > #endif > } > >@@ -1454,6 +1527,11 @@ static void ScalePlaneSimple_16(int src_width, > if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) { > ScaleCols = ScaleColsUp2_16_SSE2; > } >+#endif >+#if defined(HAS_SCALECOLS_16_MMI) >+ if (TestCpuFlag(kCpuHasMMI) && IS_ALIGNED(dst_width, 8)) { >+ ScaleCols = ScaleColsUp2_16_MMI; >+ } > #endif > } > >diff --git a/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/source/scale_any.cc b/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/source/scale_any.cc >index 53ad136404995333e572364b70076334c5eb8d69..17831372cf0a02632524138a30d9dd004a255813 100644 >--- a/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/source/scale_any.cc >+++ b/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/source/scale_any.cc >@@ -8,6 +8,8 @@ > * be found in the AUTHORS file in the root of the source tree. > */ > >+#include <string.h> // For memset/memcpy >+ > #include "libyuv/scale.h" > #include "libyuv/scale_row.h" > >@@ -42,6 +44,9 @@ CANY(ScaleARGBCols_Any_NEON, ScaleARGBCols_NEON, ScaleARGBCols_C, 4, 7) > #ifdef HAS_SCALEARGBCOLS_MSA > CANY(ScaleARGBCols_Any_MSA, ScaleARGBCols_MSA, ScaleARGBCols_C, 4, 3) > #endif >+#ifdef HAS_SCALEARGBCOLS_MMI >+CANY(ScaleARGBCols_Any_MMI, ScaleARGBCols_MMI, ScaleARGBCols_C, 4, 0) >+#endif > #ifdef HAS_SCALEARGBFILTERCOLS_NEON > CANY(ScaleARGBFilterCols_Any_NEON, > ScaleARGBFilterCols_NEON, >@@ -165,6 +170,27 @@ SDANY(ScaleRowDown2Box_Any_MSA, > 1, > 31) > #endif >+#ifdef HAS_SCALEROWDOWN2_MMI >+SDANY(ScaleRowDown2_Any_MMI, ScaleRowDown2_MMI, ScaleRowDown2_C, 2, 1, 7) >+SDANY(ScaleRowDown2Linear_Any_MMI, >+ ScaleRowDown2Linear_MMI, >+ ScaleRowDown2Linear_C, >+ 2, >+ 1, >+ 7) >+SDANY(ScaleRowDown2Box_Any_MMI, >+ ScaleRowDown2Box_MMI, >+ ScaleRowDown2Box_C, >+ 2, >+ 1, >+ 7) >+SDODD(ScaleRowDown2Box_Odd_MMI, >+ ScaleRowDown2Box_MMI, >+ ScaleRowDown2Box_Odd_C, >+ 2, >+ 1, >+ 7) >+#endif > #ifdef HAS_SCALEROWDOWN4_SSSE3 > SDANY(ScaleRowDown4_Any_SSSE3, ScaleRowDown4_SSSE3, ScaleRowDown4_C, 4, 1, 7) > SDANY(ScaleRowDown4Box_Any_SSSE3, >@@ -201,6 +227,15 @@ SDANY(ScaleRowDown4Box_Any_MSA, > 1, > 15) > #endif >+#ifdef HAS_SCALEROWDOWN4_MMI >+SDANY(ScaleRowDown4_Any_MMI, ScaleRowDown4_MMI, ScaleRowDown4_C, 4, 1, 7) >+SDANY(ScaleRowDown4Box_Any_MMI, >+ ScaleRowDown4Box_MMI, >+ ScaleRowDown4Box_C, >+ 4, >+ 1, >+ 7) >+#endif > #ifdef HAS_SCALEROWDOWN34_SSSE3 > SDANY(ScaleRowDown34_Any_SSSE3, > ScaleRowDown34_SSSE3, >@@ -382,6 +417,26 @@ SDANY(ScaleARGBRowDown2Box_Any_MSA, > 4, > 3) > #endif >+#ifdef HAS_SCALEARGBROWDOWN2_MMI >+SDANY(ScaleARGBRowDown2_Any_MMI, >+ ScaleARGBRowDown2_MMI, >+ ScaleARGBRowDown2_C, >+ 2, >+ 4, >+ 1) >+SDANY(ScaleARGBRowDown2Linear_Any_MMI, >+ ScaleARGBRowDown2Linear_MMI, >+ ScaleARGBRowDown2Linear_C, >+ 2, >+ 4, >+ 1) >+SDANY(ScaleARGBRowDown2Box_Any_MMI, >+ ScaleARGBRowDown2Box_MMI, >+ ScaleARGBRowDown2Box_C, >+ 2, >+ 4, >+ 1) >+#endif > #undef SDANY > > // Scale down by even scale factor. >@@ -433,6 +488,57 @@ SDAANY(ScaleARGBRowDownEvenBox_Any_MSA, > 4, > 3) > #endif >+#ifdef HAS_SCALEARGBROWDOWNEVEN_MMI >+SDAANY(ScaleARGBRowDownEven_Any_MMI, >+ ScaleARGBRowDownEven_MMI, >+ ScaleARGBRowDownEven_C, >+ 4, >+ 1) >+SDAANY(ScaleARGBRowDownEvenBox_Any_MMI, >+ ScaleARGBRowDownEvenBox_MMI, >+ ScaleARGBRowDownEvenBox_C, >+ 4, >+ 1) >+#endif >+ >+#ifdef SASIMDONLY >+// This also works and uses memcpy and SIMD instead of C, but is slower on ARM >+ >+// Add rows box filter scale down. Using macro from row_any >+#define SAROW(NAMEANY, ANY_SIMD, SBPP, BPP, MASK) \ >+ void NAMEANY(const uint8_t* src_ptr, uint16_t* dst_ptr, int width) { \ >+ SIMD_ALIGNED(uint16_t dst_temp[32]); \ >+ SIMD_ALIGNED(uint8_t src_temp[32]); \ >+ memset(dst_temp, 0, 32 * 2); /* for msan */ \ >+ int r = width & MASK; \ >+ int n = width & ~MASK; \ >+ if (n > 0) { \ >+ ANY_SIMD(src_ptr, dst_ptr, n); \ >+ } \ >+ memcpy(src_temp, src_ptr + n * SBPP, r * SBPP); \ >+ memcpy(dst_temp, dst_ptr + n * BPP, r * BPP); \ >+ ANY_SIMD(src_temp, dst_temp, MASK + 1); \ >+ memcpy(dst_ptr + n * BPP, dst_temp, r * BPP); \ >+ } >+ >+#ifdef HAS_SCALEADDROW_SSE2 >+SAROW(ScaleAddRow_Any_SSE2, ScaleAddRow_SSE2, 1, 2, 15) >+#endif >+#ifdef HAS_SCALEADDROW_AVX2 >+SAROW(ScaleAddRow_Any_AVX2, ScaleAddRow_AVX2, 1, 2, 31) >+#endif >+#ifdef HAS_SCALEADDROW_NEON >+SAROW(ScaleAddRow_Any_NEON, ScaleAddRow_NEON, 1, 2, 15) >+#endif >+#ifdef HAS_SCALEADDROW_MSA >+SAROW(ScaleAddRow_Any_MSA, ScaleAddRow_MSA, 1, 2, 15) >+#endif >+#ifdef HAS_SCALEADDROW_MMI >+SAROW(ScaleAddRow_Any_MMI, ScaleAddRow_MMI, 1, 2, 7) >+#endif >+#undef SAANY >+ >+#else > > // Add rows box filter scale down. > #define SAANY(NAMEANY, SCALEADDROW_SIMD, SCALEADDROW_C, MASK) \ >@@ -456,8 +562,13 @@ SAANY(ScaleAddRow_Any_NEON, ScaleAddRow_NEON, ScaleAddRow_C, 15) > #ifdef HAS_SCALEADDROW_MSA > SAANY(ScaleAddRow_Any_MSA, ScaleAddRow_MSA, ScaleAddRow_C, 15) > #endif >+#ifdef HAS_SCALEADDROW_MMI >+SAANY(ScaleAddRow_Any_MMI, ScaleAddRow_MMI, ScaleAddRow_C, 7) >+#endif > #undef SAANY > >+#endif // SASIMDONLY >+ > #ifdef __cplusplus > } // extern "C" > } // namespace libyuv >diff --git a/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/source/scale_argb.cc b/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/source/scale_argb.cc >index 53a22e8b41ea1273fc2ae15215dbf91aa409aec6..beef380a8e61dba28300f88e59cec2c234690a10 100644 >--- a/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/source/scale_argb.cc >+++ b/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/source/scale_argb.cc >@@ -111,6 +111,22 @@ static void ScaleARGBDown2(int src_width, > } > } > #endif >+#if defined(HAS_SCALEARGBROWDOWN2_MMI) >+ if (TestCpuFlag(kCpuHasMMI)) { >+ ScaleARGBRowDown2 = >+ filtering == kFilterNone >+ ? ScaleARGBRowDown2_Any_MMI >+ : (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_Any_MMI >+ : ScaleARGBRowDown2Box_Any_MMI); >+ if (IS_ALIGNED(dst_width, 2)) { >+ ScaleARGBRowDown2 = >+ filtering == kFilterNone >+ ? ScaleARGBRowDown2_MMI >+ : (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_MMI >+ : ScaleARGBRowDown2Box_MMI); >+ } >+ } >+#endif > > if (filtering == kFilterLinear) { > src_stride = 0; >@@ -237,6 +253,16 @@ static void ScaleARGBDownEven(int src_width, > } > } > #endif >+#if defined(HAS_SCALEARGBROWDOWNEVEN_MMI) >+ if (TestCpuFlag(kCpuHasMMI)) { >+ ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_Any_MMI >+ : ScaleARGBRowDownEven_Any_MMI; >+ if (IS_ALIGNED(dst_width, 2)) { >+ ScaleARGBRowDownEven = >+ filtering ? ScaleARGBRowDownEvenBox_MMI : ScaleARGBRowDownEven_MMI; >+ } >+ } >+#endif > > if (filtering == kFilterLinear) { > src_stride = 0; >@@ -417,6 +443,14 @@ static void ScaleARGBBilinearUp(int src_width, > InterpolateRow = InterpolateRow_MSA; > } > } >+#endif >+#if defined(HAS_INTERPOLATEROW_MMI) >+ if (TestCpuFlag(kCpuHasMMI)) { >+ InterpolateRow = InterpolateRow_Any_MMI; >+ if (IS_ALIGNED(dst_width, 2)) { >+ InterpolateRow = InterpolateRow_MMI; >+ } >+ } > #endif > if (src_width >= 32768) { > ScaleARGBFilterCols = >@@ -463,6 +497,14 @@ static void ScaleARGBBilinearUp(int src_width, > ScaleARGBFilterCols = ScaleARGBCols_MSA; > } > } >+#endif >+#if defined(HAS_SCALEARGBCOLS_MMI) >+ if (!filtering && TestCpuFlag(kCpuHasMMI)) { >+ ScaleARGBFilterCols = ScaleARGBCols_Any_MMI; >+ if (IS_ALIGNED(dst_width, 1)) { >+ ScaleARGBFilterCols = ScaleARGBCols_MMI; >+ } >+ } > #endif > if (!filtering && src_width * 2 == dst_width && x < 0x8000) { > ScaleARGBFilterCols = ScaleARGBColsUp2_C; >@@ -470,6 +512,11 @@ static void ScaleARGBBilinearUp(int src_width, > if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) { > ScaleARGBFilterCols = ScaleARGBColsUp2_SSE2; > } >+#endif >+#if defined(HAS_SCALEARGBCOLSUP2_MMI) >+ if (TestCpuFlag(kCpuHasMMI) && IS_ALIGNED(dst_width, 4)) { >+ ScaleARGBFilterCols = ScaleARGBColsUp2_MMI; >+ } > #endif > } > >@@ -665,6 +712,14 @@ static void ScaleYUVToARGBBilinearUp(int src_width, > ScaleARGBFilterCols = ScaleARGBCols_MSA; > } > } >+#endif >+#if defined(HAS_SCALEARGBCOLS_MMI) >+ if (!filtering && TestCpuFlag(kCpuHasMMI)) { >+ ScaleARGBFilterCols = ScaleARGBCols_Any_MMI; >+ if (IS_ALIGNED(dst_width, 1)) { >+ ScaleARGBFilterCols = ScaleARGBCols_MMI; >+ } >+ } > #endif > if (!filtering && src_width * 2 == dst_width && x < 0x8000) { > ScaleARGBFilterCols = ScaleARGBColsUp2_C; >@@ -672,6 +727,11 @@ static void ScaleYUVToARGBBilinearUp(int src_width, > if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) { > ScaleARGBFilterCols = ScaleARGBColsUp2_SSE2; > } >+#endif >+#if defined(HAS_SCALEARGBCOLSUP2_MMI) >+ if (TestCpuFlag(kCpuHasMMI) && IS_ALIGNED(dst_width, 4)) { >+ ScaleARGBFilterCols = ScaleARGBColsUp2_MMI; >+ } > #endif > } > >@@ -796,6 +856,14 @@ static void ScaleARGBSimple(int src_width, > ScaleARGBCols = ScaleARGBCols_MSA; > } > } >+#endif >+#if defined(HAS_SCALEARGBCOLS_MMI) >+ if (TestCpuFlag(kCpuHasMMI)) { >+ ScaleARGBCols = ScaleARGBCols_Any_MMI; >+ if (IS_ALIGNED(dst_width, 1)) { >+ ScaleARGBCols = ScaleARGBCols_MMI; >+ } >+ } > #endif > if (src_width * 2 == dst_width && x < 0x8000) { > ScaleARGBCols = ScaleARGBColsUp2_C; >@@ -803,6 +871,11 @@ static void ScaleARGBSimple(int src_width, > if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) { > ScaleARGBCols = ScaleARGBColsUp2_SSE2; > } >+#endif >+#if defined(HAS_SCALEARGBCOLSUP2_MMI) >+ if (TestCpuFlag(kCpuHasMMI) && IS_ALIGNED(dst_width, 4)) { >+ ScaleARGBCols = ScaleARGBColsUp2_MMI; >+ } > #endif > } > >diff --git a/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/source/scale_common.cc b/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/source/scale_common.cc >index b28d7da41fce9b6230aa3ccf6c110bdebe072e10..6369027175f1605e550ad83b57828fcb3dcf3b24 100644 >--- a/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/source/scale_common.cc >+++ b/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/source/scale_common.cc >@@ -542,7 +542,9 @@ void ScaleFilterCols64_C(uint8_t* dst_ptr, > > // Same as 8 bit arm blender but return is cast to uint16_t > #define BLENDER(a, b, f) \ >- (uint16_t)((int)(a) + ((((int)((f)) * ((int)(b) - (int)(a))) + 0x8000) >> 16)) >+ (uint16_t)( \ >+ (int)(a) + \ >+ (int)((((int64_t)((f)) * ((int64_t)(b) - (int)(a))) + 0x8000) >> 16)) > > void ScaleFilterCols_16_C(uint16_t* dst_ptr, > const uint16_t* src_ptr, >@@ -1072,6 +1074,14 @@ void ScalePlaneVertical(int src_height, > InterpolateRow = InterpolateRow_MSA; > } > } >+#endif >+#if defined(HAS_INTERPOLATEROW_MMI) >+ if (TestCpuFlag(kCpuHasMMI)) { >+ InterpolateRow = InterpolateRow_Any_MMI; >+ if (IS_ALIGNED(dst_width_bytes, 8)) { >+ InterpolateRow = InterpolateRow_MMI; >+ } >+ } > #endif > for (j = 0; j < dst_height; ++j) { > int yi; >diff --git a/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/source/scale_mmi.cc b/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/source/scale_mmi.cc >new file mode 100644 >index 0000000000000000000000000000000000000000..e12c6bb79ba839a94ced53faf5eee317151452dd >--- /dev/null >+++ b/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/source/scale_mmi.cc >@@ -0,0 +1,1109 @@ >+/* >+ * Copyright 2013 The LibYuv Project Authors. All rights reserved. >+ * >+ * Use of this source code is governed by a BSD-style license >+ * that can be found in the LICENSE file in the root of the source >+ * tree. An additional intellectual property rights grant can be found >+ * in the file PATENTS. All contributing project authors may >+ * be found in the AUTHORS file in the root of the source tree. >+ */ >+ >+#include "libyuv/scale.h" >+ >+#include <assert.h> >+#include <string.h> >+ >+#include "libyuv/cpu_id.h" >+#include "libyuv/planar_functions.h" // For CopyARGB >+#include "libyuv/row.h" >+#include "libyuv/scale_row.h" >+ >+#ifdef __cplusplus >+namespace libyuv { >+extern "C" { >+#endif >+ >+// This module is for Mips MMI. >+#if !defined(LIBYUV_DISABLE_MMI) && defined(_MIPS_ARCH_LOONGSON3A) >+ >+// CPU agnostic row functions >+void ScaleRowDown2_MMI(const uint8_t* src_ptr, >+ ptrdiff_t src_stride, >+ uint8_t* dst, >+ int dst_width) { >+ (void)src_stride; >+ >+ uint64_t src0, src1, dest; >+ const uint64_t shift = 0x8ULL; >+ >+ __asm__ volatile( >+ "1: \n\t" >+ "gsldrc1 %[src0], 0x00(%[src_ptr]) \n\t" >+ "gsldlc1 %[src0], 0x07(%[src_ptr]) \n\t" >+ "psrlh %[src0], %[src0], %[shift] \n\t" >+ >+ "gsldrc1 %[src1], 0x08(%[src_ptr]) \n\t" >+ "gsldlc1 %[src1], 0x0f(%[src_ptr]) \n\t" >+ "psrlh %[src1], %[src1], %[shift] \n\t" >+ >+ "packushb %[dest], %[src0], %[src1] \n\t" >+ "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t" >+ "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t" >+ >+ "daddiu %[src_ptr], %[src_ptr], 0x10 \n\t" >+ "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t" >+ "daddi %[width], %[width], -0x08 \n\t" >+ "bnez %[width], 1b \n\t" >+ : [src0] "=&f"(src0), [src1] "=&f"(src1), [dest] "=&f"(dest) >+ : [src_ptr] "r"(src_ptr), [dst_ptr] "r"(dst), [width] "r"(dst_width), >+ [shift] "f"(shift) >+ : "memory"); >+} >+ >+void ScaleRowDown2Linear_MMI(const uint8_t* src_ptr, >+ ptrdiff_t src_stride, >+ uint8_t* dst, >+ int dst_width) { >+ (void)src_stride; >+ >+ uint64_t src0, src1; >+ uint64_t dest, dest0, dest1; >+ >+ const uint64_t mask = 0x00ff00ff00ff00ffULL; >+ const uint64_t shift = 0x8ULL; >+ >+ __asm__ volatile( >+ "1: \n\t" >+ "gsldrc1 %[src0], 0x00(%[src_ptr]) \n\t" >+ "gsldlc1 %[src0], 0x07(%[src_ptr]) \n\t" >+ "and %[dest0], %[src0], %[mask] \n\t" >+ "gsldrc1 %[src1], 0x08(%[src_ptr]) \n\t" >+ "gsldlc1 %[src1], 0x0f(%[src_ptr]) \n\t" >+ "and %[dest1], %[src1], %[mask] \n\t" >+ "packushb %[dest0], %[dest0], %[dest1] \n\t" >+ >+ "psrlh %[src0], %[src0], %[shift] \n\t" >+ "psrlh %[src1], %[src1], %[shift] \n\t" >+ "packushb %[dest1], %[src0], %[src1] \n\t" >+ >+ "pavgb %[dest], %[dest0], %[dest1] \n\t" >+ "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t" >+ "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t" >+ >+ "daddiu %[src_ptr], %[src_ptr], 0x10 \n\t" >+ "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t" >+ "daddi %[width], %[width], -0x08 \n\t" >+ "bnez %[width], 1b \n\t" >+ : [src0] "=&f"(src0), [src1] "=&f"(src1), [dest0] "=&f"(dest0), >+ [dest1] "=&f"(dest1), [dest] "=&f"(dest) >+ : [src_ptr] "r"(src_ptr), [dst_ptr] "r"(dst), [mask] "f"(mask), >+ [shift] "f"(shift), [width] "r"(dst_width) >+ : "memory"); >+} >+ >+void ScaleRowDown2Box_MMI(const uint8_t* src_ptr, >+ ptrdiff_t src_stride, >+ uint8_t* dst, >+ int dst_width) { >+ const uint8_t* s = src_ptr; >+ const uint8_t* t = src_ptr + src_stride; >+ >+ uint64_t s0, s1, t0, t1; >+ uint64_t dest, dest0, dest1; >+ >+ const uint64_t ph = 0x0002000200020002ULL; >+ const uint64_t mask = 0x00ff00ff00ff00ffULL; >+ const uint64_t shift0 = 0x2ULL; >+ const uint64_t shift1 = 0x8ULL; >+ >+ __asm__ volatile( >+ "1: \n\t" >+ "gsldrc1 %[s0], 0x00(%[s]) \n\t" >+ "gsldlc1 %[s0], 0x07(%[s]) \n\t" >+ "psrlh %[s1], %[s0], %[shift1] \n\t" >+ "and %[s0], %[s0], %[mask] \n\t" >+ >+ "gsldrc1 %[t0], 0x00(%[t]) \n\t" >+ "gsldlc1 %[t0], 0x07(%[t]) \n\t" >+ "psrlh %[t1], %[t0], %[shift1] \n\t" >+ "and %[t0], %[t0], %[mask] \n\t" >+ >+ "paddh %[dest0], %[s0], %[s1] \n\t" >+ "paddh %[dest0], %[dest0], %[t0] \n\t" >+ "paddh %[dest0], %[dest0], %[t1] \n\t" >+ "paddh %[dest0], %[dest0], %[ph] \n\t" >+ "psrlh %[dest0], %[dest0], %[shift0] \n\t" >+ >+ "gsldrc1 %[s0], 0x08(%[s]) \n\t" >+ "gsldlc1 %[s0], 0x0f(%[s]) \n\t" >+ "psrlh %[s1], %[s0], %[shift1] \n\t" >+ "and %[s0], %[s0], %[mask] \n\t" >+ >+ "gsldrc1 %[t0], 0x08(%[t]) \n\t" >+ "gsldlc1 %[t0], 0x0f(%[t]) \n\t" >+ "psrlh %[t1], %[t0], %[shift1] \n\t" >+ "and %[t0], %[t0], %[mask] \n\t" >+ >+ "paddh %[dest1], %[s0], %[s1] \n\t" >+ "paddh %[dest1], %[dest1], %[t0] \n\t" >+ "paddh %[dest1], %[dest1], %[t1] \n\t" >+ "paddh %[dest1], %[dest1], %[ph] \n\t" >+ "psrlh %[dest1], %[dest1], %[shift0] \n\t" >+ >+ "packushb %[dest], %[dest0], %[dest1] \n\t" >+ "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t" >+ "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t" >+ >+ "daddiu %[s], %[s], 0x10 \n\t" >+ "daddiu %[t], %[t], 0x10 \n\t" >+ "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t" >+ "daddi %[width], %[width], -0x08 \n\t" >+ "bnez %[width], 1b \n\t" >+ : [s0] "=&f"(s0), [s1] "=&f"(s1), [t0] "=&f"(t0), [t1] "=&f"(t1), >+ [dest0] "=&f"(dest0), [dest1] "=&f"(dest1), [dest] "=&f"(dest) >+ : [s] "r"(s), [t] "r"(t), [dst_ptr] "r"(dst), [width] "r"(dst_width), >+ [shift0] "f"(shift0), [shift1] "f"(shift1), [ph] "f"(ph), >+ [mask] "f"(mask) >+ : "memory"); >+} >+ >+void ScaleARGBRowDown2_MMI(const uint8_t* src_argb, >+ ptrdiff_t src_stride, >+ uint8_t* dst_argb, >+ int dst_width) { >+ (void)src_stride; >+ >+ const uint32_t* src = (const uint32_t*)(src_argb); >+ uint32_t* dst = (uint32_t*)(dst_argb); >+ >+ uint64_t src0, src1, dest; >+ >+ __asm__ volatile( >+ "1: \n\t" >+ "gsldrc1 %[src0], 0x00(%[src_ptr]) \n\t" >+ "gsldlc1 %[src0], 0x07(%[src_ptr]) \n\t" >+ "gsldrc1 %[src1], 0x08(%[src_ptr]) \n\t" >+ "gsldlc1 %[src1], 0x0f(%[src_ptr]) \n\t" >+ "punpckhwd %[dest], %[src0], %[src1] \n\t" >+ >+ "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t" >+ "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t" >+ >+ "daddiu %[src_ptr], %[src_ptr], 0x10 \n\t" >+ "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t" >+ "daddi %[width], %[width], -0x02 \n\t" >+ "bnez %[width], 1b \n\t" >+ : [src0] "=&f"(src0), [src1] "=&f"(src1), [dest] "=&f"(dest) >+ : [src_ptr] "r"(src), [dst_ptr] "r"(dst), [width] "r"(dst_width) >+ : "memory"); >+} >+ >+void ScaleARGBRowDown2Linear_MMI(const uint8_t* src_argb, >+ ptrdiff_t src_stride, >+ uint8_t* dst_argb, >+ int dst_width) { >+ (void)src_stride; >+ >+ uint64_t src0, src1; >+ uint64_t dest, dest_hi, dest_lo; >+ >+ __asm__ volatile( >+ "1: \n\t" >+ "lwc1 %[src0], 0x00(%[src_ptr]) \n\t" >+ "lwc1 %[src1], 0x08(%[src_ptr]) \n\t" >+ "punpcklwd %[dest_lo], %[src0], %[src1] \n\t" >+ "lwc1 %[src0], 0x04(%[src_ptr]) \n\t" >+ "lwc1 %[src1], 0x0c(%[src_ptr]) \n\t" >+ "punpcklwd %[dest_hi], %[src0], %[src1] \n\t" >+ >+ "pavgb %[dest], %[dest_lo], %[dest_hi] \n\t" >+ "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t" >+ "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t" >+ >+ "daddiu %[src_ptr], %[src_ptr], 0x10 \n\t" >+ "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t" >+ "daddi %[width], %[width], -0x02 \n\t" >+ "bnez %[width], 1b \n\t" >+ : [src0] "=&f"(src0), [src1] "=&f"(src1), [dest_hi] "=&f"(dest_hi), >+ [dest_lo] "=&f"(dest_lo), [dest] "=&f"(dest) >+ : [src_ptr] "r"(src_argb), [dst_ptr] "r"(dst_argb), [width] "r"(dst_width) >+ : "memory"); >+} >+ >+void ScaleARGBRowDown2Box_MMI(const uint8_t* src_argb, >+ ptrdiff_t src_stride, >+ uint8_t* dst_argb, >+ int dst_width) { >+ const uint8_t* s = src_argb; >+ const uint8_t* t = src_argb + src_stride; >+ >+ uint64_t s0, s_hi, s_lo; >+ uint64_t t0, t_hi, t_lo; >+ uint64_t dest, dest_hi, dest_lo; >+ >+ const uint64_t mask = 0x0ULL; >+ const uint64_t ph = 0x0002000200020002ULL; >+ const uint64_t shfit = 0x2ULL; >+ >+ __asm__ volatile( >+ "1: \n\t" >+ "gsldrc1 %[s0], 0x00(%[s]) \n\t" >+ "gsldlc1 %[s0], 0x07(%[s]) \n\t" >+ "punpcklbh %[s_lo], %[s0], %[mask] \n\t" >+ "punpckhbh %[s_hi], %[s0], %[mask] \n\t" >+ "paddh %[dest_lo], %[s_lo], %[s_hi] \n\t" >+ >+ "gsldrc1 %[t0], 0x00(%[t]) \n\t" >+ "gsldlc1 %[t0], 0x07(%[t]) \n\t" >+ "punpcklbh %[t_lo], %[t0], %[mask] \n\t" >+ "punpckhbh %[t_hi], %[t0], %[mask] \n\t" >+ "paddh %[dest_lo], %[dest_lo], %[t_lo] \n\t" >+ "paddh %[dest_lo], %[dest_lo], %[t_hi] \n\t" >+ >+ "paddh %[dest_lo], %[dest_lo], %[ph] \n\t" >+ "psrlh %[dest_lo], %[dest_lo], %[shfit] \n\t" >+ >+ "gsldrc1 %[s0], 0x08(%[s]) \n\t" >+ "gsldlc1 %[s0], 0x0f(%[s]) \n\t" >+ "punpcklbh %[s_lo], %[s0], %[mask] \n\t" >+ "punpckhbh %[s_hi], %[s0], %[mask] \n\t" >+ "paddh %[dest_hi], %[s_lo], %[s_hi] \n\t" >+ >+ "gsldrc1 %[t0], 0x08(%[t]) \n\t" >+ "gsldlc1 %[t0], 0x0f(%[t]) \n\t" >+ "punpcklbh %[t_lo], %[t0], %[mask] \n\t" >+ "punpckhbh %[t_hi], %[t0], %[mask] \n\t" >+ "paddh %[dest_hi], %[dest_hi], %[t_lo] \n\t" >+ "paddh %[dest_hi], %[dest_hi], %[t_hi] \n\t" >+ >+ "paddh %[dest_hi], %[dest_hi], %[ph] \n\t" >+ "psrlh %[dest_hi], %[dest_hi], %[shfit] \n\t" >+ >+ "packushb %[dest], %[dest_lo], %[dest_hi] \n\t" >+ "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t" >+ "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t" >+ >+ "daddiu %[s], %[s], 0x10 \n\t" >+ "daddiu %[t], %[t], 0x10 \n\t" >+ "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t" >+ "daddi %[width], %[width], -0x02 \n\t" >+ "bnez %[width], 1b \n\t" >+ : [s0] "=&f"(s0), [t0] "=&f"(t0), [dest_hi] "=&f"(dest_hi), >+ [dest_lo] "=&f"(dest_lo), [s_hi] "=&f"(s_hi), [s_lo] "=&f"(s_lo), >+ [t_hi] "=&f"(t_hi), [t_lo] "=&f"(t_lo), [dest] "=&f"(dest) >+ : [s] "r"(s), [t] "r"(t), [dst_ptr] "r"(dst_argb), [width] "r"(dst_width), >+ [mask] "f"(mask), [ph] "f"(ph), [shfit] "f"(shfit) >+ : "memory"); >+} >+ >+void ScaleRowDown2_16_MMI(const uint16_t* src_ptr, >+ ptrdiff_t src_stride, >+ uint16_t* dst, >+ int dst_width) { >+ (void)src_stride; >+ >+ uint64_t src0, src1, dest; >+ const uint64_t shift = 0x10ULL; >+ >+ __asm__ volatile( >+ "1: \n\t" >+ "gsldrc1 %[src0], 0x00(%[src_ptr]) \n\t" >+ "gsldlc1 %[src0], 0x07(%[src_ptr]) \n\t" >+ "psrlw %[src0], %[src0], %[shift] \n\t" >+ >+ "gsldrc1 %[src1], 0x08(%[src_ptr]) \n\t" >+ "gsldlc1 %[src1], 0x0f(%[src_ptr]) \n\t" >+ "psrlw %[src1], %[src1], %[shift] \n\t" >+ >+ "packsswh %[dest], %[src0], %[src1] \n\t" >+ "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t" >+ "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t" >+ >+ "daddiu %[src_ptr], %[src_ptr], 0x10 \n\t" >+ "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t" >+ "daddi %[width], %[width], -0x04 \n\t" >+ "bnez %[width], 1b \n\t" >+ : [src0] "=&f"(src0), [src1] "=&f"(src1), [dest] "=&f"(dest) >+ : [src_ptr] "r"(src_ptr), [dst_ptr] "r"(dst), [width] "r"(dst_width), >+ [shift] "f"(shift) >+ : "memory"); >+} >+ >+void ScaleRowDown2Linear_16_MMI(const uint16_t* src_ptr, >+ ptrdiff_t src_stride, >+ uint16_t* dst, >+ int dst_width) { >+ (void)src_stride; >+ >+ uint64_t src0, src1; >+ uint64_t dest, dest_hi, dest_lo; >+ >+ __asm__ volatile( >+ "1: \n\t" >+ "gsldrc1 %[src0], 0x00(%[src_ptr]) \n\t" >+ "gsldlc1 %[src0], 0x07(%[src_ptr]) \n\t" >+ "gsldrc1 %[src1], 0x08(%[src_ptr]) \n\t" >+ "gsldlc1 %[src1], 0x0f(%[src_ptr]) \n\t" >+ "punpcklhw %[dest_lo], %[src0], %[src1] \n\t" >+ "punpckhhw %[dest_hi], %[src0], %[src1] \n\t" >+ >+ "punpcklhw %[src0], %[dest_lo], %[dest_hi] \n\t" >+ "punpckhhw %[src1], %[dest_lo], %[dest_hi] \n\t" >+ >+ "pavgh %[dest], %[src0], %[src1] \n\t" >+ "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t" >+ "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t" >+ >+ "daddiu %[src_ptr], %[src_ptr], 0x10 \n\t" >+ "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t" >+ "daddi %[width], %[width], -0x04 \n\t" >+ "bnez %[width], 1b \n\t" >+ : [src0] "=&f"(src0), [src1] "=&f"(src1), [dest_hi] "=&f"(dest_hi), >+ [dest_lo] "=&f"(dest_lo), [dest] "=&f"(dest) >+ : [src_ptr] "r"(src_ptr), [dst_ptr] "r"(dst), [width] "r"(dst_width) >+ : "memory"); >+} >+ >+void ScaleRowDown2Box_16_MMI(const uint16_t* src_ptr, >+ ptrdiff_t src_stride, >+ uint16_t* dst, >+ int dst_width) { >+ const uint16_t* s = src_ptr; >+ const uint16_t* t = src_ptr + src_stride; >+ >+ uint64_t s0, s1, s_hi, s_lo; >+ uint64_t t0, t1, t_hi, t_lo; >+ uint64_t dest, dest0, dest1; >+ >+ const uint64_t ph = 0x0000000200000002ULL; >+ const uint64_t mask = 0x0000ffff0000ffffULL; >+ const uint64_t shift0 = 0x10ULL; >+ const uint64_t shift1 = 0x2ULL; >+ >+ __asm__ volatile( >+ "1: \n\t" >+ "gsldrc1 %[s0], 0x00(%[s]) \n\t" >+ "gsldlc1 %[s0], 0x07(%[s]) \n\t" >+ "psrlw %[s1], %[s0], %[shift0] \n\t" >+ "and %[s0], %[s0], %[mask] \n\t" >+ >+ "gsldrc1 %[t0], 0x00(%[t]) \n\t" >+ "gsldlc1 %[t0], 0x07(%[t]) \n\t" >+ "psrlw %[t1], %[t0], %[shift0] \n\t" >+ "and %[t0], %[t0], %[mask] \n\t" >+ >+ "paddw %[dest0], %[s0], %[s1] \n\t" >+ "paddw %[dest0], %[dest0], %[t0] \n\t" >+ "paddw %[dest0], %[dest0], %[t1] \n\t" >+ "paddw %[dest0], %[dest0], %[ph] \n\t" >+ "psrlw %[dest0], %[dest0], %[shift1] \n\t" >+ >+ "gsldrc1 %[s0], 0x08(%[s]) \n\t" >+ "gsldlc1 %[s0], 0x0f(%[s]) \n\t" >+ "psrlw %[s1], %[s0], %[shift0] \n\t" >+ "and %[s0], %[s0], %[mask] \n\t" >+ >+ "gsldrc1 %[t0], 0x08(%[t]) \n\t" >+ "gsldlc1 %[t0], 0x0f(%[t]) \n\t" >+ "psrlw %[t1], %[t0], %[shift0] \n\t" >+ "and %[t0], %[t0], %[mask] \n\t" >+ >+ "paddw %[dest1], %[s0], %[s1] \n\t" >+ "paddw %[dest1], %[dest1], %[t0] \n\t" >+ "paddw %[dest1], %[dest1], %[t1] \n\t" >+ "paddw %[dest1], %[dest1], %[ph] \n\t" >+ "psrlw %[dest1], %[dest1], %[shift1] \n\t" >+ >+ "packsswh %[dest], %[dest0], %[dest1] \n\t" >+ "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t" >+ "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t" >+ >+ "daddiu %[s], %[s], 0x10 \n\t" >+ "daddiu %[t], %[t], 0x10 \n\t" >+ "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t" >+ "daddi %[width], %[width], -0x04 \n\t" >+ "bnez %[width], 1b \n\t" >+ : [s0] "=&f"(s0), [s1] "=&f"(s1), [t0] "=&f"(t0), [t1] "=&f"(t1), >+ [s_hi] "=&f"(s_hi), [s_lo] "=&f"(s_lo), [t_hi] "=&f"(t_hi), >+ [t_lo] "=&f"(t_lo), [dest0] "=&f"(dest0), [dest1] "=&f"(dest1), >+ [dest] "=&f"(dest) >+ : [s] "r"(s), [t] "r"(t), [dst_ptr] "r"(dst), [width] "r"(dst_width), >+ [shift0] "f"(shift0), [shift1] "f"(shift1), [ph] "f"(ph), >+ [mask] "f"(mask) >+ : "memory"); >+} >+ >+void ScaleRowDown4_MMI(const uint8_t* src_ptr, >+ ptrdiff_t src_stride, >+ uint8_t* dst, >+ int dst_width) { >+ (void)src_stride; >+ >+ uint64_t src0, src1; >+ uint64_t dest, dest_hi, dest_lo; >+ >+ const uint64_t shift = 0x10ULL; >+ const uint64_t mask = 0x000000ff000000ffULL; >+ >+ __asm__ volatile( >+ "1: \n\t" >+ "gsldrc1 %[src0], 0x00(%[src_ptr]) \n\t" >+ "gsldlc1 %[src0], 0x07(%[src_ptr]) \n\t" >+ "psrlw %[src0], %[src0], %[shift] \n\t" >+ "and %[src0], %[src0], %[mask] \n\t" >+ "gsldrc1 %[src1], 0x08(%[src_ptr]) \n\t" >+ "gsldlc1 %[src1], 0x0f(%[src_ptr]) \n\t" >+ "psrlw %[src1], %[src1], %[shift] \n\t" >+ "and %[src1], %[src1], %[mask] \n\t" >+ "packsswh %[dest_lo], %[src0], %[src1] \n\t" >+ >+ "gsldrc1 %[src0], 0x10(%[src_ptr]) \n\t" >+ "gsldlc1 %[src0], 0x17(%[src_ptr]) \n\t" >+ "psrlw %[src0], %[src0], %[shift] \n\t" >+ "and %[src0], %[src0], %[mask] \n\t" >+ "gsldrc1 %[src1], 0x18(%[src_ptr]) \n\t" >+ "gsldlc1 %[src1], 0x1f(%[src_ptr]) \n\t" >+ "psrlw %[src1], %[src1], %[shift] \n\t" >+ "and %[src1], %[src1], %[mask] \n\t" >+ "packsswh %[dest_hi], %[src0], %[src1] \n\t" >+ >+ "packushb %[dest], %[dest_lo], %[dest_hi] \n\t" >+ "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t" >+ "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t" >+ >+ "daddiu %[src_ptr], %[src_ptr], 0x20 \n\t" >+ "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t" >+ "daddi %[width], %[width], -0x08 \n\t" >+ "bnez %[width], 1b \n\t" >+ : [src0] "=&f"(src0), [src1] "=&f"(src1), [dest_hi] "=&f"(dest_hi), >+ [dest_lo] "=&f"(dest_lo), [dest] "=&f"(dest) >+ : [src_ptr] "r"(src_ptr), [dst_ptr] "r"(dst), [width] "r"(dst_width), >+ [shift] "f"(shift), [mask] "f"(mask) >+ : "memory"); >+} >+ >+void ScaleRowDown4_16_MMI(const uint16_t* src_ptr, >+ ptrdiff_t src_stride, >+ uint16_t* dst, >+ int dst_width) { >+ (void)src_stride; >+ >+ uint64_t src0, src1; >+ uint64_t dest, dest_hi, dest_lo; >+ >+ const uint64_t mask = 0x0ULL; >+ >+ __asm__ volatile( >+ "1: \n\t" >+ "gsldrc1 %[src0], 0x00(%[src_ptr]) \n\t" >+ "gsldlc1 %[src0], 0x07(%[src_ptr]) \n\t" >+ "gsldrc1 %[src1], 0x08(%[src_ptr]) \n\t" >+ "gsldlc1 %[src1], 0x0f(%[src_ptr]) \n\t" >+ "punpckhhw %[dest_lo], %[src0], %[src1] \n\t" >+ "punpcklhw %[dest_lo], %[dest_lo], %[mask] \n\t" >+ >+ "gsldrc1 %[src0], 0x10(%[src_ptr]) \n\t" >+ "gsldlc1 %[src0], 0x17(%[src_ptr]) \n\t" >+ "gsldrc1 %[src1], 0x18(%[src_ptr]) \n\t" >+ "gsldlc1 %[src1], 0x1f(%[src_ptr]) \n\t" >+ "punpckhhw %[dest_hi], %[src0], %[src1] \n\t" >+ "punpcklhw %[dest_hi], %[dest_hi], %[mask] \n\t" >+ >+ "packushb %[dest], %[dest_lo], %[dest_hi] \n\t" >+ "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t" >+ "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t" >+ >+ "daddiu %[src_ptr], %[src_ptr], 0x20 \n\t" >+ "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t" >+ "daddi %[width], %[width], -0x04 \n\t" >+ "bnez %[width], 1b \n\t" >+ : [src0] "=&f"(src0), [src1] "=&f"(src1), [dest_hi] "=&f"(dest_hi), >+ [dest_lo] "=&f"(dest_lo), [dest] "=&f"(dest) >+ : [src_ptr] "r"(src_ptr), [dst_ptr] "r"(dst), [width] "r"(dst_width), >+ [mask] "f"(mask) >+ : "memory"); >+} >+ >+#define DO_SCALEROWDOWN4BOX_PUNPCKADD() \ >+ "punpcklbh %[src_lo], %[src], %[mask0] \n\t" \ >+ "punpckhbh %[src_hi], %[src], %[mask0] \n\t" \ >+ "paddh %[dest_lo], %[dest_lo], %[src_lo] \n\t" \ >+ "paddh %[dest_hi], %[dest_hi], %[src_hi] \n\t" >+ >+#define DO_SCALEROWDOWN4BOX_LOOP(reg) \ >+ "ldc1 %[src], 0x00(%[src0_ptr]) \n\t" \ >+ "punpcklbh %[dest_lo], %[src], %[mask0] \n\t" \ >+ "punpckhbh %[dest_hi], %[src], %[mask0] \n\t" \ >+ \ >+ "ldc1 %[src], 0x00(%[src1_ptr]) \n\t" \ >+ DO_SCALEROWDOWN4BOX_PUNPCKADD() \ >+ \ >+ "ldc1 %[src], 0x00(%[src2_ptr]) \n\t" \ >+ DO_SCALEROWDOWN4BOX_PUNPCKADD() \ >+ \ >+ "ldc1 %[src], 0x00(%[src3_ptr]) \n\t" \ >+ DO_SCALEROWDOWN4BOX_PUNPCKADD() \ >+ \ >+ "pmaddhw %[dest_lo], %[dest_lo], %[mask1] \n\t" \ >+ "pmaddhw %[dest_hi], %[dest_hi], %[mask1] \n\t" \ >+ "packsswh " #reg ", %[dest_lo], %[dest_hi] \n\t" \ >+ "pmaddhw " #reg ", " #reg ", %[mask1] \n\t" \ >+ "paddh " #reg ", " #reg ", %[ph] \n\t" \ >+ "psrlh " #reg ", " #reg ", %[shift] \n\t" \ >+ \ >+ "daddiu %[src0_ptr], %[src0_ptr], 0x08 \n\t" \ >+ "daddiu %[src1_ptr], %[src1_ptr], 0x08 \n\t" \ >+ "daddiu %[src2_ptr], %[src2_ptr], 0x08 \n\t" \ >+ "daddiu %[src3_ptr], %[src3_ptr], 0x08 \n\t" >+ >+/* LibYUVScaleTest.ScaleDownBy4_Box */ >+void ScaleRowDown4Box_MMI(const uint8_t* src_ptr, >+ ptrdiff_t src_stride, >+ uint8_t* dst, >+ int dst_width) { >+ const uint8_t* src0_ptr = src_ptr; >+ const uint8_t* src1_ptr = src_ptr + src_stride; >+ const uint8_t* src2_ptr = src_ptr + src_stride * 2; >+ const uint8_t* src3_ptr = src_ptr + src_stride * 3; >+ >+ uint64_t src, src_hi, src_lo; >+ uint64_t dest, dest_hi, dest_lo, dest0, dest1, dest2, dest3; >+ >+ const uint64_t mask0 = 0x0ULL; >+ const uint64_t mask1 = 0x0001000100010001ULL; >+ const uint64_t ph = 0x0008000800080008ULL; >+ const uint64_t shift = 0x4ULL; >+ >+ __asm__ volatile( >+ "1: \n\t" >+ >+ DO_SCALEROWDOWN4BOX_LOOP(%[dest0]) >+ DO_SCALEROWDOWN4BOX_LOOP(%[dest1]) >+ DO_SCALEROWDOWN4BOX_LOOP(%[dest2]) >+ DO_SCALEROWDOWN4BOX_LOOP(%[dest3]) >+ >+ "packsswh %[dest_lo], %[dest0], %[dest1] \n\t" >+ "packsswh %[dest_hi], %[dest2], %[dest3] \n\t" >+ >+ "packushb %[dest], %[dest_lo], %[dest_hi] \n\t" >+ "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t" >+ "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t" >+ >+ "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t" >+ "daddi %[width], %[width], -0x08 \n\t" >+ "bnez %[width], 1b \n\t" >+ : [src_hi] "=&f"(src_hi), [src_lo] "=&f"(src_lo), >+ [dest_hi] "=&f"(dest_hi), [dest_lo] "=&f"(dest_lo), >+ [dest0] "=&f"(dest0), [dest1] "=&f"(dest1), [dest2] "=&f"(dest2), >+ [dest3] "=&f"(dest3), [src] "=&f"(src), [dest] "=&f"(dest) >+ : [src0_ptr] "r"(src0_ptr), [src1_ptr] "r"(src1_ptr), >+ [src2_ptr] "r"(src2_ptr), [src3_ptr] "r"(src3_ptr), [dst_ptr] "r"(dst), >+ [width] "r"(dst_width), [shift] "f"(shift), [mask0] "f"(mask0), >+ [ph] "f"(ph), [mask1] "f"(mask1) >+ : "memory"); >+} >+ >+#define DO_SCALEROWDOWN4BOX_16_PUNPCKADD() \ >+ "punpcklbh %[src_lo], %[src], %[mask0] \n\t" \ >+ "punpckhbh %[src_hi], %[src], %[mask0] \n\t" \ >+ "paddh %[dest_lo], %[dest_lo], %[src_lo] \n\t" \ >+ "paddh %[dest_hi], %[dest_hi], %[src_hi] \n\t" >+ >+#define DO_SCALEROWDOWN4BOX_16_LOOP(reg) \ >+ "ldc1 %[src], 0x00(%[src0_ptr]) \n\t" \ >+ "punpcklbh %[dest_lo], %[src], %[mask0] \n\t" \ >+ "punpckhbh %[dest_hi], %[src], %[mask0] \n\t" \ >+ \ >+ "ldc1 %[src], 0x00(%[src1_ptr]) \n\t" \ >+ DO_SCALEROWDOWN4BOX_16_PUNPCKADD() \ >+ \ >+ "ldc1 %[src], 0x00(%[src2_ptr]) \n\t" \ >+ DO_SCALEROWDOWN4BOX_16_PUNPCKADD() \ >+ \ >+ "ldc1 %[src], 0x00(%[src3_ptr]) \n\t" \ >+ DO_SCALEROWDOWN4BOX_16_PUNPCKADD() \ >+ \ >+ "paddw %[dest], %[dest_lo], %[dest_hi] \n\t" \ >+ "punpckhwd %[dest_hi], %[dest], %[dest] \n\t" \ >+ "paddw %[dest], %[dest_hi], %[dest] \n\t" \ >+ "paddw %[dest], %[dest], %[ph] \n\t" \ >+ "psraw %[dest], %[dest], %[shift] \n\t" \ >+ "and " #reg ", %[dest], %[mask1] \n\t" \ >+ \ >+ "daddiu %[src0_ptr], %[src0_ptr], 0x08 \n\t" \ >+ "daddiu %[src1_ptr], %[src1_ptr], 0x08 \n\t" \ >+ "daddiu %[src2_ptr], %[src2_ptr], 0x08 \n\t" \ >+ "daddiu %[src3_ptr], %[src3_ptr], 0x08 \n\t" >+ >+/* LibYUVScaleTest.ScaleDownBy4_Box_16 */ >+void ScaleRowDown4Box_16_MMI(const uint16_t* src_ptr, >+ ptrdiff_t src_stride, >+ uint16_t* dst, >+ int dst_width) { >+ const uint16_t* src0_ptr = src_ptr; >+ const uint16_t* src1_ptr = src_ptr + src_stride; >+ const uint16_t* src2_ptr = src_ptr + src_stride * 2; >+ const uint16_t* src3_ptr = src_ptr + src_stride * 3; >+ >+ uint64_t src, src_hi, src_lo; >+ uint64_t dest, dest_hi, dest_lo, dest0, dest1, dest2, dest3; >+ >+ const uint64_t mask0 = 0x0ULL; >+ const uint64_t mask1 = 0x00000000ffffffffULL; >+ const uint64_t ph = 0x0000000800000008ULL; >+ const uint64_t shift = 0x04ULL; >+ >+ __asm__ volatile( >+ "1: \n\t" >+ >+ DO_SCALEROWDOWN4BOX_16_LOOP(%[dest0]) >+ DO_SCALEROWDOWN4BOX_16_LOOP(%[dest1]) >+ DO_SCALEROWDOWN4BOX_16_LOOP(%[dest2]) >+ DO_SCALEROWDOWN4BOX_16_LOOP(%[dest3]) >+ "punpcklwd %[dest_lo], %[dest0], %[dest1] \n\t" >+ "punpcklwd %[dest_hi], %[dest2], %[dest3] \n\t" >+ >+ "packushb %[dest], %[dest_lo], %[dest_hi] \n\t" >+ "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t" >+ "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t" >+ >+ "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t" >+ "daddi %[width], %[width], -0x04 \n\t" >+ "bnez %[width], 1b \n\t" >+ : [src_hi] "=&f"(src_hi), [src_lo] "=&f"(src_lo), >+ [dest_hi] "=&f"(dest_hi), [dest_lo] "=&f"(dest_lo), >+ [dest0] "=&f"(dest0), [dest1] "=&f"(dest1), [dest2] "=&f"(dest2), >+ [dest3] "=&f"(dest3), [src] "=&f"(src), [dest] "=&f"(dest) >+ : [src0_ptr] "r"(src0_ptr), [src1_ptr] "r"(src1_ptr), >+ [src2_ptr] "r"(src2_ptr), [src3_ptr] "r"(src3_ptr), [dst_ptr] "r"(dst), >+ [width] "r"(dst_width), [shift] "f"(shift), [mask0] "f"(mask0), >+ [ph] "f"(ph), [mask1] "f"(mask1) >+ : "memory"); >+} >+ >+// Scales a single row of pixels up by 2x using point sampling. >+void ScaleColsUp2_MMI(uint8_t* dst_ptr, >+ const uint8_t* src_ptr, >+ int dst_width, >+ int x, >+ int dx) { >+ uint64_t src, dest; >+ >+ (void)x; >+ (void)dx; >+ >+ __asm__ volatile( >+ "1: \n\t" >+ "lwc1 %[src], 0x00(%[src_ptr]) \n\t" >+ >+ "punpcklbh %[dest], %[src], %[src] \n\t" >+ "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t" >+ "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t" >+ >+ "daddiu %[src_ptr], %[src_ptr], 0x04 \n\t" >+ "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t" >+ "daddi %[width], %[width], -0x08 \n\t" >+ "bnez %[width], 1b \n\t" >+ : [src] "=&f"(src), [dest] "=&f"(dest) >+ : [src_ptr] "r"(src_ptr), [dst_ptr] "r"(dst_ptr), [width] "r"(dst_width) >+ : "memory"); >+} >+ >+void ScaleColsUp2_16_MMI(uint16_t* dst_ptr, >+ const uint16_t* src_ptr, >+ int dst_width, >+ int x, >+ int dx) { >+ uint64_t src, dest; >+ >+ (void)x; >+ (void)dx; >+ >+ __asm__ volatile( >+ "1: \n\t" >+ "gsldrc1 %[src], 0x00(%[src_ptr]) \n\t" >+ "gsldlc1 %[src], 0x07(%[src_ptr]) \n\t" >+ >+ "punpcklhw %[dest], %[src], %[src] \n\t" >+ "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t" >+ "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t" >+ >+ "punpckhhw %[dest], %[src], %[src] \n\t" >+ "gssdlc1 %[dest], 0x0f(%[dst_ptr]) \n\t" >+ "gssdrc1 %[dest], 0x08(%[dst_ptr]) \n\t" >+ >+ "daddiu %[src_ptr], %[src_ptr], 0x08 \n\t" >+ "daddiu %[dst_ptr], %[dst_ptr], 0x10 \n\t" >+ "daddi %[width], %[width], -0x08 \n\t" >+ "bnez %[width], 1b \n\t" >+ : [src] "=&f"(src), [dest] "=&f"(dest) >+ : [src_ptr] "r"(src_ptr), [dst_ptr] "r"(dst_ptr), [width] "r"(dst_width) >+ : "memory"); >+} >+ >+void ScaleAddRow_MMI(const uint8_t* src_ptr, uint16_t* dst_ptr, int src_width) { >+ uint64_t src, src_hi, src_lo, dest0, dest1; >+ const uint64_t mask = 0x0ULL; >+ >+ __asm__ volatile( >+ "1: \n\t" >+ "gsldlc1 %[src], 0x07(%[src_ptr]) \n\t" >+ "gsldrc1 %[src], 0x00(%[src_ptr]) \n\t" >+ "punpcklbh %[src_lo], %[src], %[mask] \n\t" >+ "punpckhbh %[src_hi], %[src], %[mask] \n\t" >+ >+ "gsldrc1 %[dest0], 0x00(%[dst_ptr]) \n\t" >+ "gsldlc1 %[dest0], 0x07(%[dst_ptr]) \n\t" >+ "paddush %[dest0], %[dest0], %[src_lo] \n\t" >+ "gsldrc1 %[dest1], 0x08(%[dst_ptr]) \n\t" >+ "gsldlc1 %[dest1], 0x0f(%[dst_ptr]) \n\t" >+ "paddush %[dest1], %[dest1], %[src_hi] \n\t" >+ >+ "gssdlc1 %[dest0], 0x07(%[dst_ptr]) \n\t" >+ "gssdrc1 %[dest0], 0x00(%[dst_ptr]) \n\t" >+ "gssdlc1 %[dest1], 0x0f(%[dst_ptr]) \n\t" >+ "gssdrc1 %[dest1], 0x08(%[dst_ptr]) \n\t" >+ >+ "daddiu %[src_ptr], %[src_ptr], 0x08 \n\t" >+ "daddiu %[dst_ptr], %[dst_ptr], 0x10 \n\t" >+ "daddi %[width], %[width], -0x08 \n\t" >+ "bnez %[width], 1b \n\t" >+ : [dest0] "=&f"(dest0), [dest1] "=&f"(dest1), [src_hi] "=&f"(src_hi), >+ [src_lo] "=&f"(src_lo), [src] "=&f"(src) >+ : [src_ptr] "r"(src_ptr), [dst_ptr] "r"(dst_ptr), [width] "r"(src_width), >+ [mask] "f"(mask) >+ : "memory"); >+} >+ >+void ScaleAddRow_16_MMI(const uint16_t* src_ptr, >+ uint32_t* dst_ptr, >+ int src_width) { >+ uint64_t src, src_hi, src_lo, dest0, dest1; >+ const uint64_t mask = 0x0ULL; >+ >+ __asm__ volatile( >+ "1: \n\t" >+ "gsldrc1 %[src], 0x00(%[src_ptr]) \n\t" >+ "gsldlc1 %[src], 0x07(%[src_ptr]) \n\t" >+ "punpcklhw %[src_lo], %[src], %[mask] \n\t" >+ "punpckhhw %[src_hi], %[src], %[mask] \n\t" >+ >+ "gsldrc1 %[dest0], 0x00(%[dst_ptr]) \n\t" >+ "gsldlc1 %[dest0], 0x07(%[dst_ptr]) \n\t" >+ "paddw %[dest0], %[dest0], %[src_lo] \n\t" >+ "gssdlc1 %[dest0], 0x07(%[dst_ptr]) \n\t" >+ "gssdrc1 %[dest0], 0x00(%[dst_ptr]) \n\t" >+ >+ "gsldrc1 %[dest1], 0x08(%[dst_ptr]) \n\t" >+ "gsldlc1 %[dest1], 0x0f(%[dst_ptr]) \n\t" >+ "paddw %[dest1], %[dest1], %[src_hi] \n\t" >+ "gssdlc1 %[dest1], 0x0f(%[dst_ptr]) \n\t" >+ "gssdrc1 %[dest1], 0x08(%[dst_ptr]) \n\t" >+ >+ "daddiu %[src_ptr], %[src_ptr], 0x08 \n\t" >+ "daddiu %[dst_ptr], %[dst_ptr], 0x10 \n\t" >+ "daddi %[width], %[width], -0x04 \n\t" >+ "bnez %[width], 1b \n\t" >+ : [dest0] "=&f"(dest0), [dest1] "=&f"(dest1), [src_hi] "=&f"(src_hi), >+ [src_lo] "=&f"(src_lo), [src] "=&f"(src) >+ : [src_ptr] "r"(src_ptr), [dst_ptr] "r"(dst_ptr), [width] "r"(src_width), >+ [mask] "f"(mask) >+ : "memory"); >+} >+ >+void ScaleARGBRowDownEven_MMI(const uint8_t* src_argb, >+ ptrdiff_t src_stride, >+ int src_stepx, >+ uint8_t* dst_argb, >+ int dst_width) { >+ (void)src_stride; >+ >+ uint64_t src0, src1, dest; >+ >+ __asm__ volatile( >+ "1: \n\t" >+ "lwc1 %[src0], 0x00(%[src_ptr]) \n\t" >+ "dadd %[src_ptr], %[src_ptr], %[src_stepx_4]\n\t" >+ "lwc1 %[src1], 0x00(%[src_ptr]) \n\t" >+ "punpcklwd %[dest], %[src0], %[src1] \n\t" >+ >+ "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t" >+ "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t" >+ >+ "dadd %[src_ptr], %[src_ptr], %[src_stepx_4]\n\t" >+ "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t" >+ "daddi %[width], %[width], -0x02 \n\t" >+ "bnez %[width], 1b \n\t" >+ : [src0] "=&f"(src0), [src1] "=&f"(src1), [dest] "=&f"(dest) >+ : [src_ptr] "r"(src_argb), [dst_ptr] "r"(dst_argb), >+ [src_stepx_4] "r"(src_stepx << 2), [width] "r"(dst_width) >+ : "memory"); >+} >+ >+void ScaleARGBRowDownEvenBox_MMI(const uint8_t* src_argb, >+ ptrdiff_t src_stride, >+ int src_stepx, >+ uint8_t* dst_argb, >+ int dst_width) { >+ const uint8_t* src0_ptr = src_argb; >+ const uint8_t* src1_ptr = src_argb + src_stride; >+ >+ uint64_t src0, src1, src_hi, src_lo; >+ uint64_t dest, dest_hi, dest_lo, dest0, dest1; >+ >+ const uint64_t mask = 0x0ULL; >+ const uint64_t ph = 0x0002000200020002ULL; >+ const uint64_t shift = 0x2ULL; >+ >+ __asm__ volatile( >+ "1: \n\t" >+ >+ "lwc1 %[src0], 0x00(%[src0_ptr]) \n\t" >+ "punpcklbh %[dest_lo], %[src0], %[mask] \n\t" >+ "lwc1 %[src0], 0x04(%[src0_ptr]) \n\t" >+ "punpcklbh %[dest_hi], %[src0], %[mask] \n\t" >+ >+ "lwc1 %[src1], 0x00(%[src1_ptr]) \n\t" >+ "punpcklbh %[src_lo], %[src1], %[mask] \n\t" >+ "lwc1 %[src1], 0x04(%[src1_ptr]) \n\t" >+ "punpcklbh %[src_hi], %[src1], %[mask] \n\t" >+ "paddh %[dest_lo], %[dest_lo], %[src_lo] \n\t" >+ "paddh %[dest_hi], %[dest_hi], %[src_hi] \n\t" >+ "paddh %[dest0], %[dest_hi], %[dest_lo] \n\t" >+ "paddh %[dest0], %[dest0], %[ph] \n\t" >+ "psrlh %[dest0], %[dest0], %[shift] \n\t" >+ >+ "dadd %[src0_ptr], %[src0_ptr], %[src_stepx_4] \n\t" >+ "dadd %[src1_ptr], %[src1_ptr], %[src_stepx_4] \n\t" >+ >+ "lwc1 %[src0], 0x00(%[src0_ptr]) \n\t" >+ "punpcklbh %[dest_lo], %[src0], %[mask] \n\t" >+ "lwc1 %[src0], 0x04(%[src0_ptr]) \n\t" >+ "punpcklbh %[dest_hi], %[src0], %[mask] \n\t" >+ >+ "lwc1 %[src1], 0x00(%[src1_ptr]) \n\t" >+ "punpcklbh %[src_lo], %[src1], %[mask] \n\t" >+ "lwc1 %[src1], 0x04(%[src1_ptr]) \n\t" >+ "punpcklbh %[src_hi], %[src1], %[mask] \n\t" >+ "paddh %[dest_lo], %[dest_lo], %[src_lo] \n\t" >+ "paddh %[dest_hi], %[dest_hi], %[src_hi] \n\t" >+ "paddh %[dest1], %[dest_hi], %[dest_lo] \n\t" >+ "paddh %[dest1], %[dest1], %[ph] \n\t" >+ "psrlh %[dest1], %[dest1], %[shift] \n\t" >+ >+ "packushb %[dest], %[dest0], %[dest1] \n\t" >+ "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t" >+ "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t" >+ >+ "dadd %[src0_ptr], %[src0_ptr], %[src_stepx_4] \n\t" >+ "dadd %[src1_ptr], %[src1_ptr], %[src_stepx_4] \n\t" >+ "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t" >+ "daddi %[width], %[width], -0x02 \n\t" >+ "bnez %[width], 1b \n\t" >+ : [src_hi] "=&f"(src_hi), [src_lo] "=&f"(src_lo), >+ [dest_hi] "=&f"(dest_hi), [dest_lo] "=&f"(dest_lo), >+ [dest0] "=&f"(dest0), [dest1] "=&f"(dest1), [src0] "=&f"(src0), >+ [src1] "=&f"(src1), [dest] "=&f"(dest) >+ : [src0_ptr] "r"(src0_ptr), [src1_ptr] "r"(src1_ptr), >+ [dst_ptr] "r"(dst_argb), [width] "r"(dst_width), >+ [src_stepx_4] "r"(src_stepx << 2), [shift] "f"(shift), [mask] "f"(mask), >+ [ph] "f"(ph) >+ : "memory"); >+} >+ >+// Scales a single row of pixels using point sampling. >+void ScaleARGBCols_MMI(uint8_t* dst_argb, >+ const uint8_t* src_argb, >+ int dst_width, >+ int x, >+ int dx) { >+ const uint32_t* src = (const uint32_t*)(src_argb); >+ uint32_t* dst = (uint32_t*)(dst_argb); >+ >+ const uint32_t* src_tmp; >+ >+ uint64_t dest, offset; >+ >+ const uint64_t shift0 = 16; >+ const uint64_t shift1 = 2; >+ >+ __asm__ volatile( >+ "1: \n\t" >+ "srav %[offset], %[x], %[shift0] \n\t" >+ "sllv %[offset], %[offset], %[shift1] \n\t" >+ "dadd %[src_tmp], %[src_ptr], %[offset] \n\t" >+ "lwc1 %[dest], 0x00(%[src_tmp]) \n\t" >+ "swc1 %[dest], 0x00(%[dst_ptr]) \n\t" >+ >+ "dadd %[x], %[x], %[dx] \n\t" >+ >+ "daddiu %[dst_ptr], %[dst_ptr], 0x04 \n\t" >+ "daddi %[width], %[width], -0x01 \n\t" >+ "bnez %[width], 1b \n\t" >+ : [dest] "=&f"(dest), [offset] "=&r"(offset), [src_tmp] "=&r"(src_tmp) >+ : [src_ptr] "r"(src), [dst_ptr] "r"(dst), [width] "r"(dst_width), >+ [dx] "r"(dx), [x] "r"(x), [shift0] "r"(shift0), [shift1] "r"(shift1) >+ : "memory"); >+} >+ >+// Scales a single row of pixels up by 2x using point sampling. >+void ScaleARGBColsUp2_MMI(uint8_t* dst_argb, >+ const uint8_t* src_argb, >+ int dst_width, >+ int x, >+ int dx) { >+ uint64_t src, dest0, dest1; >+ (void)x; >+ (void)dx; >+ >+ __asm__ volatile( >+ "1: \n\t" >+ "gsldrc1 %[src], 0x00(%[src_ptr]) \n\t" >+ "gsldlc1 %[src], 0x07(%[src_ptr]) \n\t" >+ "punpcklwd %[dest0], %[src], %[src] \n\t" >+ "gssdlc1 %[dest0], 0x07(%[dst_ptr]) \n\t" >+ "gssdrc1 %[dest0], 0x00(%[dst_ptr]) \n\t" >+ "punpckhwd %[dest1], %[src], %[src] \n\t" >+ "gssdlc1 %[dest1], 0x0f(%[dst_ptr]) \n\t" >+ "gssdrc1 %[dest1], 0x08(%[dst_ptr]) \n\t" >+ >+ "daddiu %[src_ptr], %[src_ptr], 0x08 \n\t" >+ "daddiu %[dst_ptr], %[dst_ptr], 0x10 \n\t" >+ "daddi %[width], %[width], -0x04 \n\t" >+ "bnez %[width], 1b \n\t" >+ : [dest0] "=&f"(dest0), [dest1] "=&f"(dest1), [src] "=&f"(src) >+ : [src_ptr] "r"(src_argb), [dst_ptr] "r"(dst_argb), [width] "r"(dst_width) >+ : "memory"); >+} >+ >+// Divide num by div and return as 16.16 fixed point result. >+/* LibYUVBaseTest.TestFixedDiv */ >+int FixedDiv_MIPS(int num, int div) { >+ int quotient = 0; >+ const int shift = 16; >+ >+ asm( >+ "dsll %[num], %[num], %[shift] \n\t" >+ "ddiv %[num], %[div] \t\n" >+ "mflo %[quo] \t\n" >+ : [quo] "+&r"(quotient) >+ : [num] "r"(num), [div] "r"(div), [shift] "r"(shift)); >+ >+ return quotient; >+} >+ >+// Divide num by div and return as 16.16 fixed point result. >+/* LibYUVScaleTest.ARGBScaleTo320x240_Linear */ >+int FixedDiv1_MIPS(int num, int div) { >+ int quotient = 0; >+ const int shift = 16; >+ const int val1 = 1; >+ const int64_t val11 = 0x00010001ULL; >+ >+ asm( >+ "dsll %[num], %[num], %[shift] \n\t" >+ "dsub %[num], %[num], %[val11] \n\t" >+ "dsub %[div], %[div], %[val1] \n\t" >+ "ddiv %[num], %[div] \t\n" >+ "mflo %[quo] \t\n" >+ : [quo] "+&r"(quotient) >+ : [num] "r"(num), [div] "r"(div), [val1] "r"(val1), [val11] "r"(val11), >+ [shift] "r"(shift)); >+ >+ return quotient; >+} >+ >+// Read 8x2 upsample with filtering and write 16x1. >+// actually reads an extra pixel, so 9x2. >+void ScaleRowUp2_16_MMI(const uint16_t* src_ptr, >+ ptrdiff_t src_stride, >+ uint16_t* dst, >+ int dst_width) { >+ const uint16_t* src2_ptr = src_ptr + src_stride; >+ >+ uint64_t src0, src1; >+ uint64_t dest, dest04, dest15, dest26, dest37; >+ uint64_t tmp0, tmp1, tmp2, tmp3; >+ >+ const uint64_t mask0 = 0x0003000900030009ULL; >+ const uint64_t mask1 = 0x0001000300010003ULL; >+ const uint64_t mask2 = 0x0009000300090003ULL; >+ const uint64_t mask3 = 0x0003000100030001ULL; >+ const uint64_t ph = 0x0000000800000008ULL; >+ const uint64_t shift = 4; >+ >+ __asm__ volatile( >+ "1: \n\t" >+ "gsldrc1 %[src0], 0x00(%[src1_ptr]) \n\t" >+ "gsldlc1 %[src0], 0x07(%[src1_ptr]) \n\t" >+ "pmaddhw %[dest04], %[src0], %[mask0] \n\t" >+ "gsldrc1 %[src1], 0x00(%[src2_ptr]) \n\t" >+ "gsldlc1 %[src1], 0x07(%[src2_ptr]) \n\t" >+ "pmaddhw %[dest], %[src1], %[mask1] \n\t" >+ "paddw %[dest04], %[dest04], %[dest] \n\t" >+ "paddw %[dest04], %[dest04], %[ph] \n\t" >+ "psrlw %[dest04], %[dest04], %[shift] \n\t" >+ >+ "pmaddhw %[dest15], %[src0], %[mask2] \n\t" >+ "pmaddhw %[dest], %[src1], %[mask3] \n\t" >+ "paddw %[dest15], %[dest15], %[dest] \n\t" >+ "paddw %[dest15], %[dest15], %[ph] \n\t" >+ "psrlw %[dest15], %[dest15], %[shift] \n\t" >+ >+ "gsldrc1 %[src0], 0x02(%[src1_ptr]) \n\t" >+ "gsldlc1 %[src0], 0x09(%[src1_ptr]) \n\t" >+ "pmaddhw %[dest26], %[src0], %[mask0] \n\t" >+ "gsldrc1 %[src1], 0x02(%[src2_ptr]) \n\t" >+ "gsldlc1 %[src1], 0x09(%[src2_ptr]) \n\t" >+ "pmaddhw %[dest], %[src1], %[mask1] \n\t" >+ "paddw %[dest26], %[dest26], %[dest] \n\t" >+ "paddw %[dest26], %[dest26], %[ph] \n\t" >+ "psrlw %[dest26], %[dest26], %[shift] \n\t" >+ >+ "pmaddhw %[dest37], %[src0], %[mask2] \n\t" >+ "pmaddhw %[dest], %[src1], %[mask3] \n\t" >+ "paddw %[dest37], %[dest37], %[dest] \n\t" >+ "paddw %[dest37], %[dest37], %[ph] \n\t" >+ "psrlw %[dest37], %[dest37], %[shift] \n\t" >+ >+ /* tmp0 = ( 00 04 02 06 ) */ >+ "packsswh %[tmp0], %[dest04], %[dest26] \n\t" >+ /* tmp1 = ( 01 05 03 07 ) */ >+ "packsswh %[tmp1], %[dest15], %[dest37] \n\t" >+ >+ /* tmp2 = ( 00 01 04 05 )*/ >+ "punpcklhw %[tmp2], %[tmp0], %[tmp1] \n\t" >+ /* tmp3 = ( 02 03 06 07 )*/ >+ "punpckhhw %[tmp3], %[tmp0], %[tmp1] \n\t" >+ >+ /* ( 00 01 02 03 ) */ >+ "punpcklwd %[dest], %[tmp2], %[tmp3] \n\t" >+ "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t" >+ "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t" >+ >+ /* ( 04 05 06 07 ) */ >+ "punpckhwd %[dest], %[tmp2], %[tmp3] \n\t" >+ "gssdlc1 %[dest], 0x0f(%[dst_ptr]) \n\t" >+ "gssdrc1 %[dest], 0x08(%[dst_ptr]) \n\t" >+ >+ "daddiu %[src1_ptr], %[src1_ptr], 0x08 \n\t" >+ "daddiu %[src2_ptr], %[src2_ptr], 0x08 \n\t" >+ "daddiu %[dst_ptr], %[dst_ptr], 0x10 \n\t" >+ "daddi %[width], %[width], -0x08 \n\t" >+ "bnez %[width], 1b \n\t" >+ : [src0] "=&f"(src0), [src1] "=&f"(src1), [dest04] "=&f"(dest04), >+ [dest15] "=&f"(dest15), [dest26] "=&f"(dest26), [dest37] "=&f"(dest37), >+ [tmp0] "=&f"(tmp0), [tmp1] "=&f"(tmp1), [tmp2] "=&f"(tmp2), >+ [tmp3] "=&f"(tmp3), [dest] "=&f"(dest) >+ : [src1_ptr] "r"(src_ptr), [src2_ptr] "r"(src2_ptr), [dst_ptr] "r"(dst), >+ [width] "r"(dst_width), [mask0] "f"(mask0), [mask1] "f"(mask1), >+ [mask2] "f"(mask2), [mask3] "f"(mask3), [shift] "f"(shift), [ph] "f"(ph) >+ : "memory"); >+} >+ >+#endif // !defined(LIBYUV_DISABLE_MMI) && defined(_MIPS_ARCH_LOONGSON3A) >+ >+#ifdef __cplusplus >+} // extern "C" >+} // namespace libyuv >+#endif >diff --git a/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/source/scale_neon.cc b/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/source/scale_neon.cc >index 459a2995dfe7530eaac460864436387d2130224c..46f5ba4cd4fddb1fab08e3347c5c857f66b8b03f 100644 >--- a/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/source/scale_neon.cc >+++ b/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/source/scale_neon.cc >@@ -504,37 +504,25 @@ void ScaleRowDown38_2_Box_NEON(const uint8_t* src_ptr, > : "q0", "q1", "q2", "q3", "q13", "q14", "memory", "cc"); > } > >-void ScaleAddRows_NEON(const uint8_t* src_ptr, >- ptrdiff_t src_stride, >- uint16_t* dst_ptr, >- int src_width, >- int src_height) { >- const uint8_t* src_tmp; >+// Add a row of bytes to a row of shorts. Used for box filter. >+// Reads 16 bytes and accumulates to 16 shorts at a time. >+void ScaleAddRow_NEON(const uint8_t* src_ptr, >+ uint16_t* dst_ptr, >+ int src_width) { > asm volatile( > "1: \n" >- "mov %0, %1 \n" >- "mov r12, %5 \n" >- "veor q2, q2, q2 \n" >- "veor q3, q3, q3 \n" >- "2: \n" >- // load 16 pixels into q0 >- "vld1.8 {q0}, [%0], %3 \n" >- "vaddw.u8 q3, q3, d1 \n" >- "vaddw.u8 q2, q2, d0 \n" >- "subs r12, r12, #1 \n" >- "bgt 2b \n" >- "vst1.16 {q2, q3}, [%2]! \n" // store pixels >- "add %1, %1, #16 \n" >- "subs %4, %4, #16 \n" // 16 processed per loop >+ "vld1.16 {q1, q2}, [%1] \n" // load accumulator >+ "vld1.8 {q0}, [%0]! \n" // load 16 bytes >+ "vaddw.u8 q2, q2, d1 \n" // add >+ "vaddw.u8 q1, q1, d0 \n" >+ "vst1.16 {q1, q2}, [%1]! \n" // store accumulator >+ "subs %2, %2, #16 \n" // 16 processed per loop > "bgt 1b \n" >- : "=&r"(src_tmp), // %0 >- "+r"(src_ptr), // %1 >- "+r"(dst_ptr), // %2 >- "+r"(src_stride), // %3 >- "+r"(src_width), // %4 >- "+r"(src_height) // %5 >+ : "+r"(src_ptr), // %0 >+ "+r"(dst_ptr), // %1 >+ "+r"(src_width) // %2 > : >- : "memory", "cc", "r12", "q0", "q1", "q2", "q3" // Clobber List >+ : "memory", "cc", "q0", "q1", "q2" // Clobber List > ); > } > >diff --git a/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/source/scale_neon64.cc b/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/source/scale_neon64.cc >index 494a9cfbfbe3c383e5ebf872c098a832fb82b238..f4aed5fc92f4f043c3f597b7b11b07f155d76283 100644 >--- a/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/source/scale_neon64.cc >+++ b/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/source/scale_neon64.cc >@@ -515,37 +515,25 @@ void ScaleRowDown38_2_Box_NEON(const uint8_t* src_ptr, > "v19", "v30", "v31", "memory", "cc"); > } > >-void ScaleAddRows_NEON(const uint8_t* src_ptr, >- ptrdiff_t src_stride, >- uint16_t* dst_ptr, >- int src_width, >- int src_height) { >- const uint8_t* src_tmp; >+// Add a row of bytes to a row of shorts. Used for box filter. >+// Reads 16 bytes and accumulates to 16 shorts at a time. >+void ScaleAddRow_NEON(const uint8_t* src_ptr, >+ uint16_t* dst_ptr, >+ int src_width) { > asm volatile( > "1: \n" >- "mov %0, %1 \n" >- "mov w12, %w5 \n" >- "eor v2.16b, v2.16b, v2.16b \n" >- "eor v3.16b, v3.16b, v3.16b \n" >- "2: \n" >- // load 16 pixels into q0 >- "ld1 {v0.16b}, [%0], %3 \n" >- "uaddw2 v3.8h, v3.8h, v0.16b \n" >- "uaddw v2.8h, v2.8h, v0.8b \n" >- "subs w12, w12, #1 \n" >- "b.gt 2b \n" >- "st1 {v2.8h, v3.8h}, [%2], #32 \n" // store pixels >- "add %1, %1, #16 \n" >- "subs %w4, %w4, #16 \n" // 16 processed per loop >+ "ld1 {v1.8h, v2.8h}, [%1] \n" // load accumulator >+ "ld1 {v0.16b}, [%0], #16 \n" // load 16 bytes >+ "uaddw2 v2.8h, v2.8h, v0.16b \n" // add >+ "uaddw v1.8h, v1.8h, v0.8b \n" >+ "st1 {v1.8h, v2.8h}, [%1], #32 \n" // store accumulator >+ "subs %w2, %w2, #16 \n" // 16 processed per loop > "b.gt 1b \n" >- : "=&r"(src_tmp), // %0 >- "+r"(src_ptr), // %1 >- "+r"(dst_ptr), // %2 >- "+r"(src_stride), // %3 >- "+r"(src_width), // %4 >- "+r"(src_height) // %5 >+ : "+r"(src_ptr), // %0 >+ "+r"(dst_ptr), // %1 >+ "+r"(src_width) // %2 > : >- : "memory", "cc", "w12", "v0", "v1", "v2", "v3" // Clobber List >+ : "memory", "cc", "v0", "v1", "v2" // Clobber List > ); > } > >diff --git a/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/tools_libyuv/autoroller/roll_deps.py b/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/tools_libyuv/autoroller/roll_deps.py >index 09ddc40e730ff9c4fb93e8d4bcac5e4492b9a071..ea8f69f18825b5a4d92e917a8ee3c642c5b0976f 100755 >--- a/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/tools_libyuv/autoroller/roll_deps.py >+++ b/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/tools_libyuv/autoroller/roll_deps.py >@@ -8,7 +8,7 @@ > # be found in the AUTHORS file in the root of the source tree. > > # This is a modified copy of the script in >-# https://chromium.googlesource.com/external/webrtc/+/master/tools-webrtc/autoroller/roll_deps.py >+# https://webrtc.googlesource.com/src/+/master/tools_webrtc/autoroller/roll_deps.py > # customized for libyuv. > > >@@ -22,7 +22,7 @@ import os > import re > import subprocess > import sys >-import urllib >+import urllib2 > > > # Skip these dependencies (list without solution name prefix). >@@ -90,7 +90,7 @@ def ParseCommitPosition(commit_message): > for line in reversed(commit_message.splitlines()): > m = COMMIT_POSITION_RE.match(line.strip()) > if m: >- return m.group(1) >+ return int(m.group(1)) > logging.error('Failed to parse commit position id from:\n%s\n', > commit_message) > sys.exit(-1) >@@ -109,7 +109,7 @@ def _RunCommand(command, working_dir=None, ignore_exit_code=False, > logging.debug('CMD: %s CWD: %s', ' '.join(command), working_dir) > env = os.environ.copy() > if extra_env: >- assert all(type(value) == str for value in extra_env.values()) >+ assert all(isinstance(value, str) for value in extra_env.values()) > logging.debug('extra env: %s', extra_env) > env.update(extra_env) > p = subprocess.Popen(command, stdout=subprocess.PIPE, >@@ -169,7 +169,7 @@ def ReadRemoteCrCommit(revision): > > def ReadUrlContent(url): > """Connect to a remote host and read the contents. Returns a list of lines.""" >- conn = urllib.urlopen(url) >+ conn = urllib2.urlopen(url) > try: > return conn.readlines() > except IOError as e: >@@ -210,6 +210,8 @@ def BuildDepsentryDict(deps_dict): > for path, deps_url_spec in deps_subdict.iteritems(): > # The deps url is either an URL and a condition, or just the URL. > if isinstance(deps_url_spec, dict): >+ if deps_url_spec.get('dep_type') == 'cipd': >+ continue > deps_url = deps_url_spec['url'] > else: > deps_url = deps_url_spec >@@ -345,11 +347,9 @@ def UpdateDepsFile(deps_filename, old_cr_revision, new_cr_revision, > 'contains all platforms in the target_os list, i.e.\n' > 'target_os = ["android", "unix", "mac", "ios", "win"];\n' > 'Then run "gclient sync" again.' % local_dep_dir) >- _, stderr = _RunCommand( >- ['roll-dep-svn', '--no-verify-revision', dep.path, dep.new_rev], >- working_dir=CHECKOUT_SRC_DIR, ignore_exit_code=True) >- if stderr: >- logging.warning('roll-dep-svn: %s', stderr) >+ _RunCommand( >+ ['gclient', 'setdep', '--revision', '%s@%s' % (dep.path, dep.new_rev)], >+ working_dir=CHECKOUT_SRC_DIR) > > > def _IsTreeClean(): >@@ -397,20 +397,36 @@ def _LocalCommit(commit_msg, dry_run): > _RunCommand(['git', 'commit', '-m', commit_msg]) > > >-def _UploadCL(dry_run, rietveld_email=None): >- logging.info('Uploading CL...') >- if not dry_run: >- cmd = ['git', 'cl', 'upload', '-f'] >- if rietveld_email: >- cmd.append('--email=%s' % rietveld_email) >- _RunCommand(cmd, extra_env={'EDITOR': 'true'}) >+def ChooseCQMode(skip_cq, cq_over, current_commit_pos, new_commit_pos): >+ if skip_cq: >+ return 0 >+ if (new_commit_pos - current_commit_pos) < cq_over: >+ return 1 >+ return 2 > > >-def _SendToCQ(dry_run, skip_cq): >- logging.info('Sending the CL to the CQ...') >- if not dry_run and not skip_cq: >- _RunCommand(['git', 'cl', 'set_commit']) >- logging.info('Sent the CL to the CQ.') >+def _UploadCL(commit_queue_mode): >+ """Upload the committed changes as a changelist to Gerrit. >+ >+ commit_queue_mode: >+ - 2: Submit to commit queue. >+ - 1: Run trybots but do not submit to CQ. >+ - 0: Skip CQ, upload only. >+ """ >+ cmd = ['git', 'cl', 'upload', '--force', '--bypass-hooks', '--send-mail'] >+ if commit_queue_mode >= 2: >+ logging.info('Sending the CL to the CQ...') >+ cmd.extend(['--use-commit-queue']) >+ elif commit_queue_mode >= 1: >+ logging.info('Starting CQ dry run...') >+ cmd.extend(['--cq-dry-run']) >+ extra_env = { >+ 'EDITOR': 'true', >+ 'SKIP_GCE_AUTH_FOR_GIT': '1', >+ } >+ stdout, stderr = _RunCommand(cmd, extra_env=extra_env) >+ logging.debug('Output from "git cl upload":\nstdout:\n%s\n\nstderr:\n%s', >+ stdout, stderr) > > > def main(): >@@ -420,10 +436,6 @@ def main(): > p.add_argument('-r', '--revision', > help=('Chromium Git revision to roll to. Defaults to the ' > 'Chromium HEAD revision if omitted.')) >- p.add_argument('-u', '--rietveld-email', >- help=('E-mail address to use for creating the CL at Rietveld' >- 'If omitted a previously cached one will be used or an ' >- 'error will be thrown during upload.')) > p.add_argument('--dry-run', action='store_true', default=False, > help=('Calculate changes and modify DEPS, but don\'t create ' > 'any local branch, commit, upload CL or send any ' >@@ -432,8 +444,12 @@ def main(): > default=False, > help=('Ignore if the current branch is not master or if there ' > 'are uncommitted changes (default: %(default)s).')) >- p.add_argument('--skip-cq', action='store_true', default=False, >- help='Skip sending the CL to the CQ (default: %(default)s)') >+ grp = p.add_mutually_exclusive_group() >+ grp.add_argument('--skip-cq', action='store_true', default=False, >+ help='Skip sending the CL to the CQ (default: %(default)s)') >+ grp.add_argument('--cq-over', type=int, default=1, >+ help=('Commit queue dry run if the revision difference ' >+ 'is below this number (default: %(default)s)')) > p.add_argument('-v', '--verbose', action='store_true', default=False, > help='Be extra verbose in printing of log messages.') > opts = p.parse_args() >@@ -478,8 +494,11 @@ def main(): > _CreateRollBranch(opts.dry_run) > UpdateDepsFile(deps_filename, current_cr_rev, new_cr_rev, changed_deps) > _LocalCommit(commit_msg, opts.dry_run) >- _UploadCL(opts.dry_run, opts.rietveld_email) >- _SendToCQ(opts.dry_run, opts.skip_cq) >+ commit_queue_mode = ChooseCQMode(opts.skip_cq, opts.cq_over, >+ current_commit_pos, new_commit_pos) >+ logging.info('Uploading CL...') >+ if not opts.dry_run: >+ _UploadCL(commit_queue_mode) > return 0 > > >diff --git a/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/tools_libyuv/valgrind/libyuv_tests.bat b/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/tools_libyuv/valgrind/libyuv_tests.bat >index e37f09eb25f3e154863e1c10a6ef466ebd9dcfa1..5fceca67620cf85d2e0e47a05aa7261e76412500 100644 >--- a/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/tools_libyuv/valgrind/libyuv_tests.bat >+++ b/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/tools_libyuv/valgrind/libyuv_tests.bat >@@ -1,79 +1,79 @@ >-@echo off >-:: Copyright (c) 2012 The LibYuv Project Authors. All rights reserved. >-:: >-:: Use of this source code is governed by a BSD-style license >-:: that can be found in the LICENSE file in the root of the source >-:: tree. An additional intellectual property rights grant can be found >-:: in the file PATENTS. All contributing project authors may >-:: be found in the AUTHORS file in the root of the source tree. >- >-:: This script is a copy of chrome_tests.bat with the following changes: >-:: - Invokes libyuv_tests.py instead of chrome_tests.py >-:: - Chromium's Valgrind scripts directory is added to the PYTHONPATH to make >-:: it possible to execute the Python scripts properly. >- >-:: TODO(timurrrr): batch files 'export' all the variables to the parent shell >-set THISDIR=%~dp0 >-set TOOL_NAME="unknown" >- >-:: Get the tool name and put it into TOOL_NAME {{{1 >-:: NB: SHIFT command doesn't modify %* >-:PARSE_ARGS_LOOP >- if %1 == () GOTO:TOOLNAME_NOT_FOUND >- if %1 == --tool GOTO:TOOLNAME_FOUND >- SHIFT >- goto :PARSE_ARGS_LOOP >- >-:TOOLNAME_NOT_FOUND >-echo "Please specify a tool (tsan or drmemory) by using --tool flag" >-exit /B 1 >- >-:TOOLNAME_FOUND >-SHIFT >-set TOOL_NAME=%1 >-:: }}} >-if "%TOOL_NAME%" == "drmemory" GOTO :SETUP_DRMEMORY >-if "%TOOL_NAME%" == "drmemory_light" GOTO :SETUP_DRMEMORY >-if "%TOOL_NAME%" == "drmemory_full" GOTO :SETUP_DRMEMORY >-if "%TOOL_NAME%" == "drmemory_pattern" GOTO :SETUP_DRMEMORY >-if "%TOOL_NAME%" == "tsan" GOTO :SETUP_TSAN >-echo "Unknown tool: `%TOOL_NAME%`! Only tsan and drmemory are supported." >-exit /B 1 >- >-:SETUP_DRMEMORY >-if NOT "%DRMEMORY_COMMAND%"=="" GOTO :RUN_TESTS >-:: Set up DRMEMORY_COMMAND to invoke Dr. Memory {{{1 >-set DRMEMORY_PATH=%THISDIR%..\..\third_party\drmemory >-set DRMEMORY_SFX=%DRMEMORY_PATH%\drmemory-windows-sfx.exe >-if EXIST %DRMEMORY_SFX% GOTO DRMEMORY_BINARY_OK >-echo "Can't find Dr. Memory executables." >-echo "See http://www.chromium.org/developers/how-tos/using-valgrind/dr-memory" >-echo "for the instructions on how to get them." >-exit /B 1 >- >-:DRMEMORY_BINARY_OK >-%DRMEMORY_SFX% -o%DRMEMORY_PATH%\unpacked -y >-set DRMEMORY_COMMAND=%DRMEMORY_PATH%\unpacked\bin\drmemory.exe >-:: }}} >-goto :RUN_TESTS >- >-:SETUP_TSAN >-:: Set up PIN_COMMAND to invoke TSan {{{1 >-set TSAN_PATH=%THISDIR%..\..\third_party\tsan >-set TSAN_SFX=%TSAN_PATH%\tsan-x86-windows-sfx.exe >-if EXIST %TSAN_SFX% GOTO TSAN_BINARY_OK >-echo "Can't find ThreadSanitizer executables." >-echo "See http://www.chromium.org/developers/how-tos/using-valgrind/threadsanitizer/threadsanitizer-on-windows" >-echo "for the instructions on how to get them." >-exit /B 1 >- >-:TSAN_BINARY_OK >-%TSAN_SFX% -o%TSAN_PATH%\unpacked -y >-set PIN_COMMAND=%TSAN_PATH%\unpacked\tsan-x86-windows\tsan.bat >-:: }}} >-goto :RUN_TESTS >- >-:RUN_TESTS >-set PYTHONPATH=%THISDIR%..\python\google;%THISDIR%..\valgrind >-set RUNNING_ON_VALGRIND=yes >-python %THISDIR%libyuv_tests.py %* >+@echo off >+:: Copyright (c) 2012 The LibYuv Project Authors. All rights reserved. >+:: >+:: Use of this source code is governed by a BSD-style license >+:: that can be found in the LICENSE file in the root of the source >+:: tree. An additional intellectual property rights grant can be found >+:: in the file PATENTS. All contributing project authors may >+:: be found in the AUTHORS file in the root of the source tree. >+ >+:: This script is a copy of chrome_tests.bat with the following changes: >+:: - Invokes libyuv_tests.py instead of chrome_tests.py >+:: - Chromium's Valgrind scripts directory is added to the PYTHONPATH to make >+:: it possible to execute the Python scripts properly. >+ >+:: TODO(timurrrr): batch files 'export' all the variables to the parent shell >+set THISDIR=%~dp0 >+set TOOL_NAME="unknown" >+ >+:: Get the tool name and put it into TOOL_NAME {{{1 >+:: NB: SHIFT command doesn't modify %* >+:PARSE_ARGS_LOOP >+ if %1 == () GOTO:TOOLNAME_NOT_FOUND >+ if %1 == --tool GOTO:TOOLNAME_FOUND >+ SHIFT >+ goto :PARSE_ARGS_LOOP >+ >+:TOOLNAME_NOT_FOUND >+echo "Please specify a tool (tsan or drmemory) by using --tool flag" >+exit /B 1 >+ >+:TOOLNAME_FOUND >+SHIFT >+set TOOL_NAME=%1 >+:: }}} >+if "%TOOL_NAME%" == "drmemory" GOTO :SETUP_DRMEMORY >+if "%TOOL_NAME%" == "drmemory_light" GOTO :SETUP_DRMEMORY >+if "%TOOL_NAME%" == "drmemory_full" GOTO :SETUP_DRMEMORY >+if "%TOOL_NAME%" == "drmemory_pattern" GOTO :SETUP_DRMEMORY >+if "%TOOL_NAME%" == "tsan" GOTO :SETUP_TSAN >+echo "Unknown tool: `%TOOL_NAME%`! Only tsan and drmemory are supported." >+exit /B 1 >+ >+:SETUP_DRMEMORY >+if NOT "%DRMEMORY_COMMAND%"=="" GOTO :RUN_TESTS >+:: Set up DRMEMORY_COMMAND to invoke Dr. Memory {{{1 >+set DRMEMORY_PATH=%THISDIR%..\..\third_party\drmemory >+set DRMEMORY_SFX=%DRMEMORY_PATH%\drmemory-windows-sfx.exe >+if EXIST %DRMEMORY_SFX% GOTO DRMEMORY_BINARY_OK >+echo "Can't find Dr. Memory executables." >+echo "See http://www.chromium.org/developers/how-tos/using-valgrind/dr-memory" >+echo "for the instructions on how to get them." >+exit /B 1 >+ >+:DRMEMORY_BINARY_OK >+%DRMEMORY_SFX% -o%DRMEMORY_PATH%\unpacked -y >+set DRMEMORY_COMMAND=%DRMEMORY_PATH%\unpacked\bin\drmemory.exe >+:: }}} >+goto :RUN_TESTS >+ >+:SETUP_TSAN >+:: Set up PIN_COMMAND to invoke TSan {{{1 >+set TSAN_PATH=%THISDIR%..\..\third_party\tsan >+set TSAN_SFX=%TSAN_PATH%\tsan-x86-windows-sfx.exe >+if EXIST %TSAN_SFX% GOTO TSAN_BINARY_OK >+echo "Can't find ThreadSanitizer executables." >+echo "See http://www.chromium.org/developers/how-tos/using-valgrind/threadsanitizer/threadsanitizer-on-windows" >+echo "for the instructions on how to get them." >+exit /B 1 >+ >+:TSAN_BINARY_OK >+%TSAN_SFX% -o%TSAN_PATH%\unpacked -y >+set PIN_COMMAND=%TSAN_PATH%\unpacked\tsan-x86-windows\tsan.bat >+:: }}} >+goto :RUN_TESTS >+ >+:RUN_TESTS >+set PYTHONPATH=%THISDIR%..\python\google;%THISDIR%..\valgrind >+set RUNNING_ON_VALGRIND=yes >+python %THISDIR%libyuv_tests.py %* >diff --git a/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/unit_test/convert_test.cc b/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/unit_test/convert_test.cc >index 39281ae08076d7d5ed2d209ab36998f269817fbd..d97b4fc723c740b0f7cd141c4d77b892c7d44b39 100644 >--- a/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/unit_test/convert_test.cc >+++ b/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/unit_test/convert_test.cc >@@ -281,6 +281,23 @@ TESTAPLANARTOP(Android420, I420, 1, 0, 0, 2, 2, I420, 2, 2) > TESTAPLANARTOP(Android420, NV12, 2, 0, 1, 2, 2, I420, 2, 2) > TESTAPLANARTOP(Android420, NV21, 2, 1, 0, 2, 2, I420, 2, 2) > >+// wrapper to keep API the same >+int I400ToNV21(const uint8_t* src_y, >+ int src_stride_y, >+ const uint8_t* /* src_u */, >+ int /* src_stride_u */, >+ const uint8_t* /* src_v */, >+ int /* src_stride_v */, >+ uint8_t* dst_y, >+ int dst_stride_y, >+ uint8_t* dst_vu, >+ int dst_stride_vu, >+ int width, >+ int height) { >+ return I400ToNV21(src_y, src_stride_y, dst_y, dst_stride_y, dst_vu, >+ dst_stride_vu, width, height); >+} >+ > #define TESTPLANARTOBPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ > FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, W1280, N, NEG, OFF) \ > TEST_F(LibYUVConvertTest, SRC_FMT_PLANAR##To##FMT_PLANAR##N) { \ >@@ -374,6 +391,9 @@ TESTAPLANARTOP(Android420, NV21, 2, 1, 0, 2, 2, I420, 2, 2) > > TESTPLANARTOBP(I420, 2, 2, NV12, 2, 2) > TESTPLANARTOBP(I420, 2, 2, NV21, 2, 2) >+TESTPLANARTOBP(I422, 2, 1, NV21, 2, 2) >+TESTPLANARTOBP(I444, 1, 1, NV21, 2, 2) >+TESTPLANARTOBP(I400, 2, 2, NV21, 2, 2) > > #define TESTBIPLANARTOPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ > FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, W1280, N, NEG, OFF, \ >@@ -566,6 +586,8 @@ TESTPLANARTOB(I420, 2, 2, RGB24, 3, 3, 1) > TESTPLANARTOB(H420, 2, 2, RAW, 3, 3, 1) > TESTPLANARTOB(H420, 2, 2, RGB24, 3, 3, 1) > TESTPLANARTOB(I420, 2, 2, RGB565, 2, 2, 1) >+TESTPLANARTOB(J420, 2, 2, RGB565, 2, 2, 1) >+TESTPLANARTOB(H420, 2, 2, RGB565, 2, 2, 1) > TESTPLANARTOB(I420, 2, 2, ARGB1555, 2, 2, 1) > TESTPLANARTOB(I420, 2, 2, ARGB4444, 2, 2, 1) > TESTPLANARTOB(I422, 2, 1, ARGB, 4, 4, 1) >@@ -734,6 +756,8 @@ TESTBIPLANARTOB(NV12, 2, 2, ABGR, 4, 2) > TESTBIPLANARTOB(NV21, 2, 2, ABGR, 4, 2) > TESTBIPLANARTOB(NV12, 2, 2, RGB24, 3, 2) > TESTBIPLANARTOB(NV21, 2, 2, RGB24, 3, 2) >+TESTBIPLANARTOB(NV12, 2, 2, RAW, 3, 2) >+TESTBIPLANARTOB(NV21, 2, 2, RAW, 3, 2) > TESTBIPLANARTOB(NV12, 2, 2, RGB565, 2, 9) > > #ifdef DO_THREE_PLANES >@@ -1022,15 +1046,9 @@ TESTATOBIPLANAR(UYVY, 2, 4, NV12, 2, 2) > MaskCpuFlags(benchmark_cpu_info_); \ > FMT_A##To##FMT_B(src_argb, kStrideA, dst_argb_opt, kStrideB, kWidth, \ > kHeight); \ >- int max_diff = 0; \ > for (int i = 0; i < kStrideB * kHeightB; ++i) { \ >- int abs_diff = abs(static_cast<int>(dst_argb_c[i]) - \ >- static_cast<int>(dst_argb_opt[i])); \ >- if (abs_diff > max_diff) { \ >- max_diff = abs_diff; \ >- } \ >+ EXPECT_NEAR(dst_argb_c[i], dst_argb_opt[i], DIFF); \ > } \ >- EXPECT_LE(max_diff, DIFF); \ > free_aligned_buffer_page_end(src_argb); \ > free_aligned_buffer_page_end(dst_argb_c); \ > free_aligned_buffer_page_end(dst_argb_opt); \ >@@ -1050,6 +1068,7 @@ TESTATOBIPLANAR(UYVY, 2, 4, NV12, 2, 2) > TESTATOBRANDOM(FMT_A, BPP_A, STRIDE_A, HEIGHT_A, FMT_B, BPP_B, STRIDE_B, \ > HEIGHT_B, DIFF) > >+// TODO(fbarchard): make ARM version of C code that matches NEON. > TESTATOB(ARGB, 4, 4, 1, ARGB, 4, 4, 1, 0) > TESTATOB(ARGB, 4, 4, 1, BGRA, 4, 4, 1, 0) > TESTATOB(ARGB, 4, 4, 1, ABGR, 4, 4, 1, 0) >@@ -1361,72 +1380,613 @@ TEST_F(LibYUVConvertTest, FuzzJpeg) { > } > } > >-TEST_F(LibYUVConvertTest, MJPGToI420) { >- const int kOff = 10; >- const int kMinJpeg = 64; >- const int kImageSize = benchmark_width_ * benchmark_height_ >= kMinJpeg >- ? benchmark_width_ * benchmark_height_ >- : kMinJpeg; >- const int kSize = kImageSize + kOff; >- align_buffer_page_end(orig_pixels, kSize); >- align_buffer_page_end(dst_y_opt, benchmark_width_ * benchmark_height_); >- align_buffer_page_end(dst_u_opt, SUBSAMPLE(benchmark_width_, 2) * >- SUBSAMPLE(benchmark_height_, 2)); >- align_buffer_page_end(dst_v_opt, SUBSAMPLE(benchmark_width_, 2) * >- SUBSAMPLE(benchmark_height_, 2)); >+// Test data created in GIMP. In export jpeg, disable thumbnails etc, >+// choose a subsampling, and use low quality (50) to keep size small. >+// Generated with xxd -i test.jpg >+// test 0 is J400 >+static const uint8_t kTest0Jpg[] = { >+ 0xff, 0xd8, 0xff, 0xe0, 0x00, 0x10, 0x4a, 0x46, 0x49, 0x46, 0x00, 0x01, >+ 0x01, 0x01, 0x00, 0x48, 0x00, 0x48, 0x00, 0x00, 0xff, 0xdb, 0x00, 0x43, >+ 0x00, 0x10, 0x0b, 0x0c, 0x0e, 0x0c, 0x0a, 0x10, 0x0e, 0x0d, 0x0e, 0x12, >+ 0x11, 0x10, 0x13, 0x18, 0x28, 0x1a, 0x18, 0x16, 0x16, 0x18, 0x31, 0x23, >+ 0x25, 0x1d, 0x28, 0x3a, 0x33, 0x3d, 0x3c, 0x39, 0x33, 0x38, 0x37, 0x40, >+ 0x48, 0x5c, 0x4e, 0x40, 0x44, 0x57, 0x45, 0x37, 0x38, 0x50, 0x6d, 0x51, >+ 0x57, 0x5f, 0x62, 0x67, 0x68, 0x67, 0x3e, 0x4d, 0x71, 0x79, 0x70, 0x64, >+ 0x78, 0x5c, 0x65, 0x67, 0x63, 0xff, 0xc2, 0x00, 0x0b, 0x08, 0x00, 0x10, >+ 0x00, 0x20, 0x01, 0x01, 0x11, 0x00, 0xff, 0xc4, 0x00, 0x17, 0x00, 0x01, >+ 0x01, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, >+ 0x00, 0x00, 0x00, 0x03, 0x04, 0x01, 0x02, 0xff, 0xda, 0x00, 0x08, 0x01, >+ 0x01, 0x00, 0x00, 0x00, 0x01, 0x43, 0x7e, 0xa7, 0x97, 0x57, 0xff, 0xc4, >+ 0x00, 0x1b, 0x10, 0x00, 0x03, 0x00, 0x02, 0x03, 0x00, 0x00, 0x00, 0x00, >+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x02, 0x11, 0x00, 0x03, >+ 0x10, 0x12, 0x13, 0xff, 0xda, 0x00, 0x08, 0x01, 0x01, 0x00, 0x01, 0x05, >+ 0x02, 0x3b, 0xc0, 0x6f, 0x66, 0x76, 0x56, 0x23, 0x87, 0x99, 0x0d, 0x26, >+ 0x62, 0xf6, 0xbf, 0xff, 0xc4, 0x00, 0x1e, 0x10, 0x00, 0x02, 0x01, 0x03, >+ 0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, >+ 0x00, 0x11, 0x21, 0x02, 0x12, 0x32, 0x10, 0x31, 0x71, 0x81, 0xa1, 0xff, >+ 0xda, 0x00, 0x08, 0x01, 0x01, 0x00, 0x06, 0x3f, 0x02, 0x4b, 0xb3, 0x28, >+ 0x32, 0xd2, 0xed, 0xf9, 0x1d, 0x3e, 0x13, 0x51, 0x73, 0x83, 0xff, 0xc4, >+ 0x00, 0x1c, 0x10, 0x01, 0x01, 0x01, 0x00, 0x02, 0x03, 0x01, 0x00, 0x00, >+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x11, 0x00, 0x21, 0x51, >+ 0x31, 0x61, 0x81, 0xf0, 0xff, 0xda, 0x00, 0x08, 0x01, 0x01, 0x00, 0x01, >+ 0x3f, 0x21, 0x65, 0x6e, 0x31, 0x86, 0x28, 0xf9, 0x30, 0xdc, 0x27, 0xdb, >+ 0xa9, 0x01, 0xf3, 0xde, 0x02, 0xa0, 0xed, 0x1e, 0x34, 0x68, 0x23, 0xf9, >+ 0xc6, 0x48, 0x5d, 0x7a, 0x35, 0x02, 0xf5, 0x6f, 0xff, 0xda, 0x00, 0x08, >+ 0x01, 0x01, 0x00, 0x00, 0x00, 0x10, 0x35, 0xff, 0xc4, 0x00, 0x1f, 0x10, >+ 0x01, 0x00, 0x02, 0x01, 0x04, 0x03, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, >+ 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x11, 0x31, 0x41, 0x61, 0x71, 0x91, >+ 0x21, 0x81, 0xd1, 0xb1, 0xff, 0xda, 0x00, 0x08, 0x01, 0x01, 0x00, 0x01, >+ 0x3f, 0x10, 0x0b, 0x30, 0xe9, 0x58, 0xbe, 0x1a, 0xfd, 0x88, 0xab, 0x8b, >+ 0x34, 0x74, 0x80, 0x4b, 0xb5, 0xd5, 0xab, 0xcd, 0x46, 0x96, 0x2e, 0xec, >+ 0xbd, 0xaa, 0x78, 0x47, 0x5c, 0x47, 0xa7, 0x30, 0x49, 0xad, 0x88, 0x7c, >+ 0x40, 0x74, 0x30, 0xff, 0x00, 0x23, 0x1d, 0x03, 0x0b, 0xb7, 0xd4, 0xff, >+ 0xd9}; >+static const size_t kTest0JpgLen = 421; >+ >+// test 1 is J444 >+static const uint8_t kTest1Jpg[] = { >+ 0xff, 0xd8, 0xff, 0xe0, 0x00, 0x10, 0x4a, 0x46, 0x49, 0x46, 0x00, 0x01, >+ 0x01, 0x01, 0x00, 0x48, 0x00, 0x48, 0x00, 0x00, 0xff, 0xdb, 0x00, 0x43, >+ 0x00, 0x10, 0x0b, 0x0c, 0x0e, 0x0c, 0x0a, 0x10, 0x0e, 0x0d, 0x0e, 0x12, >+ 0x11, 0x10, 0x13, 0x18, 0x28, 0x1a, 0x18, 0x16, 0x16, 0x18, 0x31, 0x23, >+ 0x25, 0x1d, 0x28, 0x3a, 0x33, 0x3d, 0x3c, 0x39, 0x33, 0x38, 0x37, 0x40, >+ 0x48, 0x5c, 0x4e, 0x40, 0x44, 0x57, 0x45, 0x37, 0x38, 0x50, 0x6d, 0x51, >+ 0x57, 0x5f, 0x62, 0x67, 0x68, 0x67, 0x3e, 0x4d, 0x71, 0x79, 0x70, 0x64, >+ 0x78, 0x5c, 0x65, 0x67, 0x63, 0xff, 0xdb, 0x00, 0x43, 0x01, 0x11, 0x12, >+ 0x12, 0x18, 0x15, 0x18, 0x2f, 0x1a, 0x1a, 0x2f, 0x63, 0x42, 0x38, 0x42, >+ 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, >+ 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, >+ 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, >+ 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, >+ 0x63, 0x63, 0xff, 0xc2, 0x00, 0x11, 0x08, 0x00, 0x10, 0x00, 0x20, 0x03, >+ 0x01, 0x11, 0x00, 0x02, 0x11, 0x01, 0x03, 0x11, 0x01, 0xff, 0xc4, 0x00, >+ 0x17, 0x00, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, >+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x04, 0x01, 0x02, 0xff, 0xc4, >+ 0x00, 0x16, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, >+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x01, 0x03, 0xff, 0xda, >+ 0x00, 0x0c, 0x03, 0x01, 0x00, 0x02, 0x10, 0x03, 0x10, 0x00, 0x00, 0x01, >+ 0x40, 0x8f, 0x26, 0xe8, 0xf4, 0xcc, 0xf9, 0x69, 0x2b, 0x1b, 0x2a, 0xcb, >+ 0xff, 0xc4, 0x00, 0x1b, 0x10, 0x00, 0x03, 0x00, 0x02, 0x03, 0x00, 0x00, >+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x02, 0x11, >+ 0x00, 0x03, 0x10, 0x12, 0x13, 0xff, 0xda, 0x00, 0x08, 0x01, 0x01, 0x00, >+ 0x01, 0x05, 0x02, 0x3b, 0x80, 0x6f, 0x56, 0x76, 0x56, 0x23, 0x87, 0x99, >+ 0x0d, 0x26, 0x62, 0xf6, 0xbf, 0xff, 0xc4, 0x00, 0x19, 0x11, 0x01, 0x00, >+ 0x03, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, >+ 0x00, 0x00, 0x01, 0x00, 0x10, 0x11, 0x02, 0x12, 0xff, 0xda, 0x00, 0x08, >+ 0x01, 0x03, 0x01, 0x01, 0x3f, 0x01, 0xf1, 0x00, 0x27, 0x45, 0xbb, 0x31, >+ 0xaf, 0xff, 0xc4, 0x00, 0x1a, 0x11, 0x00, 0x02, 0x03, 0x01, 0x01, 0x00, >+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, >+ 0x02, 0x10, 0x11, 0x41, 0x12, 0xff, 0xda, 0x00, 0x08, 0x01, 0x02, 0x01, >+ 0x01, 0x3f, 0x01, 0xf6, 0x4b, 0x5f, 0x48, 0xb3, 0x69, 0x63, 0x35, 0x72, >+ 0xbf, 0xff, 0xc4, 0x00, 0x1e, 0x10, 0x00, 0x02, 0x01, 0x03, 0x05, 0x00, >+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x11, >+ 0x21, 0x02, 0x12, 0x32, 0x10, 0x31, 0x71, 0x81, 0xa1, 0xff, 0xda, 0x00, >+ 0x08, 0x01, 0x01, 0x00, 0x06, 0x3f, 0x02, 0x4b, 0xb3, 0x28, 0x32, 0xd2, >+ 0xed, 0xf9, 0x1d, 0x3e, 0x13, 0x51, 0x73, 0x83, 0xff, 0xc4, 0x00, 0x1c, >+ 0x10, 0x01, 0x01, 0x01, 0x00, 0x02, 0x03, 0x01, 0x00, 0x00, 0x00, 0x00, >+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x11, 0x00, 0x21, 0x51, 0x31, 0x61, >+ 0x81, 0xf0, 0xff, 0xda, 0x00, 0x08, 0x01, 0x01, 0x00, 0x01, 0x3f, 0x21, >+ 0x75, 0x6e, 0x31, 0x94, 0x28, 0xf9, 0x30, 0xdc, 0x27, 0xdb, 0xa9, 0x01, >+ 0xf3, 0xde, 0x02, 0xa0, 0xed, 0x1e, 0x34, 0x68, 0x23, 0xf9, 0xc6, 0x48, >+ 0x5d, 0x7a, 0x35, 0x02, 0xf5, 0x6f, 0xff, 0xda, 0x00, 0x0c, 0x03, 0x01, >+ 0x00, 0x02, 0x00, 0x03, 0x00, 0x00, 0x00, 0x10, 0x26, 0x61, 0xd4, 0xff, >+ 0xc4, 0x00, 0x1a, 0x11, 0x00, 0x03, 0x01, 0x00, 0x03, 0x00, 0x00, 0x00, >+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x11, 0x21, >+ 0x31, 0x41, 0x51, 0xff, 0xda, 0x00, 0x08, 0x01, 0x03, 0x01, 0x01, 0x3f, >+ 0x10, 0x54, 0xa8, 0xbf, 0x50, 0x87, 0xb0, 0x9d, 0x8b, 0xc4, 0x6a, 0x26, >+ 0x6b, 0x2a, 0x9c, 0x1f, 0xff, 0xc4, 0x00, 0x18, 0x11, 0x01, 0x01, 0x01, >+ 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, >+ 0x00, 0x01, 0x00, 0x11, 0x21, 0x51, 0xff, 0xda, 0x00, 0x08, 0x01, 0x02, >+ 0x01, 0x01, 0x3f, 0x10, 0x70, 0xe1, 0x3e, 0xd1, 0x8e, 0x0d, 0xe1, 0xb5, >+ 0xd5, 0x91, 0x76, 0x43, 0x82, 0x45, 0x4c, 0x7b, 0x7f, 0xff, 0xc4, 0x00, >+ 0x1f, 0x10, 0x01, 0x00, 0x02, 0x01, 0x04, 0x03, 0x01, 0x00, 0x00, 0x00, >+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x11, 0x31, 0x41, 0x61, >+ 0x71, 0x91, 0x21, 0x81, 0xd1, 0xb1, 0xff, 0xda, 0x00, 0x08, 0x01, 0x01, >+ 0x00, 0x01, 0x3f, 0x10, 0x1b, 0x30, 0xe9, 0x58, 0xbe, 0x1a, 0xfd, 0x8a, >+ 0xeb, 0x8b, 0x34, 0x74, 0x80, 0x4b, 0xb5, 0xd5, 0xab, 0xcd, 0x46, 0x96, >+ 0x2e, 0xec, 0xbd, 0xaa, 0x78, 0x47, 0x5c, 0x47, 0xa7, 0x30, 0x49, 0xad, >+ 0x88, 0x7c, 0x40, 0x74, 0x30, 0xff, 0x00, 0x23, 0x1d, 0x03, 0x0b, 0xb7, >+ 0xd4, 0xff, 0xd9}; >+static const size_t kTest1JpgLen = 735; >+ >+// test 2 is J420 >+static const uint8_t kTest2Jpg[] = { >+ 0xff, 0xd8, 0xff, 0xe0, 0x00, 0x10, 0x4a, 0x46, 0x49, 0x46, 0x00, 0x01, >+ 0x01, 0x01, 0x00, 0x48, 0x00, 0x48, 0x00, 0x00, 0xff, 0xdb, 0x00, 0x43, >+ 0x00, 0x10, 0x0b, 0x0c, 0x0e, 0x0c, 0x0a, 0x10, 0x0e, 0x0d, 0x0e, 0x12, >+ 0x11, 0x10, 0x13, 0x18, 0x28, 0x1a, 0x18, 0x16, 0x16, 0x18, 0x31, 0x23, >+ 0x25, 0x1d, 0x28, 0x3a, 0x33, 0x3d, 0x3c, 0x39, 0x33, 0x38, 0x37, 0x40, >+ 0x48, 0x5c, 0x4e, 0x40, 0x44, 0x57, 0x45, 0x37, 0x38, 0x50, 0x6d, 0x51, >+ 0x57, 0x5f, 0x62, 0x67, 0x68, 0x67, 0x3e, 0x4d, 0x71, 0x79, 0x70, 0x64, >+ 0x78, 0x5c, 0x65, 0x67, 0x63, 0xff, 0xdb, 0x00, 0x43, 0x01, 0x11, 0x12, >+ 0x12, 0x18, 0x15, 0x18, 0x2f, 0x1a, 0x1a, 0x2f, 0x63, 0x42, 0x38, 0x42, >+ 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, >+ 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, >+ 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, >+ 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, >+ 0x63, 0x63, 0xff, 0xc2, 0x00, 0x11, 0x08, 0x00, 0x10, 0x00, 0x20, 0x03, >+ 0x01, 0x22, 0x00, 0x02, 0x11, 0x01, 0x03, 0x11, 0x01, 0xff, 0xc4, 0x00, >+ 0x18, 0x00, 0x00, 0x02, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, >+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x05, 0x01, 0x02, 0x04, 0xff, >+ 0xc4, 0x00, 0x16, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, >+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x01, 0x02, 0xff, >+ 0xda, 0x00, 0x0c, 0x03, 0x01, 0x00, 0x02, 0x10, 0x03, 0x10, 0x00, 0x00, >+ 0x01, 0x20, 0xe7, 0x28, 0xa3, 0x0b, 0x2e, 0x2d, 0xcf, 0xff, 0xc4, 0x00, >+ 0x1b, 0x10, 0x00, 0x03, 0x00, 0x02, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, >+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x02, 0x11, 0x00, 0x03, 0x10, >+ 0x12, 0x13, 0xff, 0xda, 0x00, 0x08, 0x01, 0x01, 0x00, 0x01, 0x05, 0x02, >+ 0x3b, 0x80, 0x6f, 0x56, 0x76, 0x56, 0x23, 0x87, 0x99, 0x0d, 0x26, 0x62, >+ 0xf6, 0xbf, 0xff, 0xc4, 0x00, 0x17, 0x11, 0x01, 0x00, 0x03, 0x00, 0x00, >+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, >+ 0x01, 0x11, 0x21, 0xff, 0xda, 0x00, 0x08, 0x01, 0x03, 0x01, 0x01, 0x3f, >+ 0x01, 0xc8, 0x53, 0xff, 0xc4, 0x00, 0x16, 0x11, 0x01, 0x01, 0x01, 0x00, >+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, >+ 0x00, 0x11, 0x32, 0xff, 0xda, 0x00, 0x08, 0x01, 0x02, 0x01, 0x01, 0x3f, >+ 0x01, 0xd2, 0xc7, 0xff, 0xc4, 0x00, 0x1e, 0x10, 0x00, 0x02, 0x01, 0x03, >+ 0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, >+ 0x00, 0x11, 0x21, 0x02, 0x12, 0x32, 0x10, 0x31, 0x71, 0x81, 0xa1, 0xff, >+ 0xda, 0x00, 0x08, 0x01, 0x01, 0x00, 0x06, 0x3f, 0x02, 0x4b, 0xb3, 0x28, >+ 0x32, 0xd2, 0xed, 0xf9, 0x1d, 0x3e, 0x13, 0x51, 0x73, 0x83, 0xff, 0xc4, >+ 0x00, 0x1c, 0x10, 0x01, 0x01, 0x01, 0x00, 0x02, 0x03, 0x01, 0x00, 0x00, >+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x11, 0x00, 0x21, 0x51, >+ 0x31, 0x61, 0x81, 0xf0, 0xff, 0xda, 0x00, 0x08, 0x01, 0x01, 0x00, 0x01, >+ 0x3f, 0x21, 0x75, 0x6e, 0x31, 0x94, 0x28, 0xf9, 0x30, 0xdc, 0x27, 0xdb, >+ 0xa9, 0x01, 0xf3, 0xde, 0x02, 0xa0, 0xed, 0x1e, 0x34, 0x68, 0x23, 0xf9, >+ 0xc6, 0x48, 0x5d, 0x7a, 0x35, 0x02, 0xf5, 0x6f, 0xff, 0xda, 0x00, 0x0c, >+ 0x03, 0x01, 0x00, 0x02, 0x00, 0x03, 0x00, 0x00, 0x00, 0x10, 0x13, 0x5f, >+ 0xff, 0xc4, 0x00, 0x17, 0x11, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00, 0x00, >+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x11, >+ 0x21, 0xff, 0xda, 0x00, 0x08, 0x01, 0x03, 0x01, 0x01, 0x3f, 0x10, 0x0e, >+ 0xa1, 0x3a, 0x76, 0xff, 0xc4, 0x00, 0x17, 0x11, 0x01, 0x01, 0x01, 0x01, >+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, >+ 0x01, 0x00, 0x21, 0x11, 0xff, 0xda, 0x00, 0x08, 0x01, 0x02, 0x01, 0x01, >+ 0x3f, 0x10, 0x57, 0x0b, 0x08, 0x70, 0xdb, 0xff, 0xc4, 0x00, 0x1f, 0x10, >+ 0x01, 0x00, 0x02, 0x01, 0x04, 0x03, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, >+ 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x11, 0x31, 0x41, 0x61, 0x71, 0x91, >+ 0x21, 0x81, 0xd1, 0xb1, 0xff, 0xda, 0x00, 0x08, 0x01, 0x01, 0x00, 0x01, >+ 0x3f, 0x10, 0x1b, 0x30, 0xe9, 0x58, 0xbe, 0x1a, 0xfd, 0x8a, 0xeb, 0x8b, >+ 0x34, 0x74, 0x80, 0x4b, 0xb5, 0xd5, 0xab, 0xcd, 0x46, 0x96, 0x2e, 0xec, >+ 0xbd, 0xaa, 0x78, 0x47, 0x5c, 0x47, 0xa7, 0x30, 0x49, 0xad, 0x88, 0x7c, >+ 0x40, 0x74, 0x30, 0xff, 0x00, 0x23, 0x1d, 0x03, 0x0b, 0xb7, 0xd4, 0xff, >+ 0xd9}; >+static const size_t kTest2JpgLen = 685; >+ >+// test 3 is J422 >+static const uint8_t kTest3Jpg[] = { >+ 0xff, 0xd8, 0xff, 0xe0, 0x00, 0x10, 0x4a, 0x46, 0x49, 0x46, 0x00, 0x01, >+ 0x01, 0x01, 0x00, 0x48, 0x00, 0x48, 0x00, 0x00, 0xff, 0xdb, 0x00, 0x43, >+ 0x00, 0x10, 0x0b, 0x0c, 0x0e, 0x0c, 0x0a, 0x10, 0x0e, 0x0d, 0x0e, 0x12, >+ 0x11, 0x10, 0x13, 0x18, 0x28, 0x1a, 0x18, 0x16, 0x16, 0x18, 0x31, 0x23, >+ 0x25, 0x1d, 0x28, 0x3a, 0x33, 0x3d, 0x3c, 0x39, 0x33, 0x38, 0x37, 0x40, >+ 0x48, 0x5c, 0x4e, 0x40, 0x44, 0x57, 0x45, 0x37, 0x38, 0x50, 0x6d, 0x51, >+ 0x57, 0x5f, 0x62, 0x67, 0x68, 0x67, 0x3e, 0x4d, 0x71, 0x79, 0x70, 0x64, >+ 0x78, 0x5c, 0x65, 0x67, 0x63, 0xff, 0xdb, 0x00, 0x43, 0x01, 0x11, 0x12, >+ 0x12, 0x18, 0x15, 0x18, 0x2f, 0x1a, 0x1a, 0x2f, 0x63, 0x42, 0x38, 0x42, >+ 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, >+ 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, >+ 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, >+ 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, >+ 0x63, 0x63, 0xff, 0xc2, 0x00, 0x11, 0x08, 0x00, 0x10, 0x00, 0x20, 0x03, >+ 0x01, 0x21, 0x00, 0x02, 0x11, 0x01, 0x03, 0x11, 0x01, 0xff, 0xc4, 0x00, >+ 0x17, 0x00, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, >+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x04, 0x01, 0x02, 0xff, 0xc4, >+ 0x00, 0x17, 0x01, 0x00, 0x03, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, >+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x02, 0x03, 0x00, 0xff, >+ 0xda, 0x00, 0x0c, 0x03, 0x01, 0x00, 0x02, 0x10, 0x03, 0x10, 0x00, 0x00, >+ 0x01, 0x43, 0x8d, 0x1f, 0xa2, 0xb3, 0xca, 0x1b, 0x57, 0x0f, 0xff, 0xc4, >+ 0x00, 0x1b, 0x10, 0x00, 0x03, 0x00, 0x02, 0x03, 0x00, 0x00, 0x00, 0x00, >+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x02, 0x11, 0x00, 0x03, >+ 0x10, 0x12, 0x13, 0xff, 0xda, 0x00, 0x08, 0x01, 0x01, 0x00, 0x01, 0x05, >+ 0x02, 0x3b, 0x80, 0x6f, 0x56, 0x76, 0x56, 0x23, 0x87, 0x99, 0x0d, 0x26, >+ 0x62, 0xf6, 0xbf, 0xff, 0xc4, 0x00, 0x19, 0x11, 0x00, 0x02, 0x03, 0x01, >+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, >+ 0x00, 0x01, 0x02, 0x10, 0x11, 0x21, 0xff, 0xda, 0x00, 0x08, 0x01, 0x03, >+ 0x01, 0x01, 0x3f, 0x01, 0x51, 0xce, 0x8c, 0x75, 0xff, 0xc4, 0x00, 0x18, >+ 0x11, 0x00, 0x03, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, >+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x02, 0x61, 0x21, 0xff, 0xda, >+ 0x00, 0x08, 0x01, 0x02, 0x01, 0x01, 0x3f, 0x01, 0xa6, 0xd9, 0x2f, 0x84, >+ 0xe8, 0xf0, 0xff, 0xc4, 0x00, 0x1e, 0x10, 0x00, 0x02, 0x01, 0x03, 0x05, >+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, >+ 0x11, 0x21, 0x02, 0x12, 0x32, 0x10, 0x31, 0x71, 0x81, 0xa1, 0xff, 0xda, >+ 0x00, 0x08, 0x01, 0x01, 0x00, 0x06, 0x3f, 0x02, 0x4b, 0xb3, 0x28, 0x32, >+ 0xd2, 0xed, 0xf9, 0x1d, 0x3e, 0x13, 0x51, 0x73, 0x83, 0xff, 0xc4, 0x00, >+ 0x1c, 0x10, 0x01, 0x01, 0x01, 0x00, 0x02, 0x03, 0x01, 0x00, 0x00, 0x00, >+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x11, 0x00, 0x21, 0x51, 0x31, >+ 0x61, 0x81, 0xf0, 0xff, 0xda, 0x00, 0x08, 0x01, 0x01, 0x00, 0x01, 0x3f, >+ 0x21, 0x75, 0x6e, 0x31, 0x94, 0x28, 0xf9, 0x30, 0xdc, 0x27, 0xdb, 0xa9, >+ 0x01, 0xf3, 0xde, 0x02, 0xa0, 0xed, 0x1e, 0x34, 0x68, 0x23, 0xf9, 0xc6, >+ 0x48, 0x5d, 0x7a, 0x35, 0x02, 0xf5, 0x6f, 0xff, 0xda, 0x00, 0x0c, 0x03, >+ 0x01, 0x00, 0x02, 0x00, 0x03, 0x00, 0x00, 0x00, 0x10, 0x2e, 0x45, 0xff, >+ 0xc4, 0x00, 0x18, 0x11, 0x00, 0x03, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, >+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x11, 0x21, >+ 0x31, 0xff, 0xda, 0x00, 0x08, 0x01, 0x03, 0x01, 0x01, 0x3f, 0x10, 0x53, >+ 0x50, 0xba, 0x54, 0xc1, 0x67, 0x4f, 0xff, 0xc4, 0x00, 0x18, 0x11, 0x00, >+ 0x03, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, >+ 0x00, 0x00, 0x00, 0x01, 0x11, 0x21, 0x00, 0x10, 0xff, 0xda, 0x00, 0x08, >+ 0x01, 0x02, 0x01, 0x01, 0x3f, 0x10, 0x18, 0x81, 0x5c, 0x04, 0x1a, 0xca, >+ 0x91, 0xbf, 0xff, 0xc4, 0x00, 0x1f, 0x10, 0x01, 0x00, 0x02, 0x01, 0x04, >+ 0x03, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, >+ 0x00, 0x11, 0x31, 0x41, 0x61, 0x71, 0x91, 0x21, 0x81, 0xd1, 0xb1, 0xff, >+ 0xda, 0x00, 0x08, 0x01, 0x01, 0x00, 0x01, 0x3f, 0x10, 0x1b, 0x30, 0xe9, >+ 0x58, 0xbe, 0x1a, 0xfd, 0x8a, 0xeb, 0x8b, 0x34, 0x74, 0x80, 0x4b, 0xb5, >+ 0xd5, 0xab, 0xcd, 0x46, 0x96, 0x2e, 0xec, 0xbd, 0xaa, 0x78, 0x47, 0x5c, >+ 0x47, 0xa7, 0x30, 0x49, 0xad, 0x88, 0x7c, 0x40, 0x74, 0x30, 0xff, 0x00, >+ 0x23, 0x1d, 0x03, 0x0b, 0xb7, 0xd4, 0xff, 0xd9}; >+static const size_t kTest3JpgLen = 704; >+ >+// test 4 is J422 vertical - not supported >+static const uint8_t kTest4Jpg[] = { >+ 0xff, 0xd8, 0xff, 0xe0, 0x00, 0x10, 0x4a, 0x46, 0x49, 0x46, 0x00, 0x01, >+ 0x01, 0x01, 0x00, 0x48, 0x00, 0x48, 0x00, 0x00, 0xff, 0xdb, 0x00, 0x43, >+ 0x00, 0x10, 0x0b, 0x0c, 0x0e, 0x0c, 0x0a, 0x10, 0x0e, 0x0d, 0x0e, 0x12, >+ 0x11, 0x10, 0x13, 0x18, 0x28, 0x1a, 0x18, 0x16, 0x16, 0x18, 0x31, 0x23, >+ 0x25, 0x1d, 0x28, 0x3a, 0x33, 0x3d, 0x3c, 0x39, 0x33, 0x38, 0x37, 0x40, >+ 0x48, 0x5c, 0x4e, 0x40, 0x44, 0x57, 0x45, 0x37, 0x38, 0x50, 0x6d, 0x51, >+ 0x57, 0x5f, 0x62, 0x67, 0x68, 0x67, 0x3e, 0x4d, 0x71, 0x79, 0x70, 0x64, >+ 0x78, 0x5c, 0x65, 0x67, 0x63, 0xff, 0xdb, 0x00, 0x43, 0x01, 0x11, 0x12, >+ 0x12, 0x18, 0x15, 0x18, 0x2f, 0x1a, 0x1a, 0x2f, 0x63, 0x42, 0x38, 0x42, >+ 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, >+ 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, >+ 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, >+ 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, >+ 0x63, 0x63, 0xff, 0xc2, 0x00, 0x11, 0x08, 0x00, 0x10, 0x00, 0x20, 0x03, >+ 0x01, 0x12, 0x00, 0x02, 0x11, 0x01, 0x03, 0x11, 0x01, 0xff, 0xc4, 0x00, >+ 0x18, 0x00, 0x00, 0x02, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, >+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x05, 0x01, 0x02, 0x03, 0xff, >+ 0xc4, 0x00, 0x16, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, >+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x03, 0xff, >+ 0xda, 0x00, 0x0c, 0x03, 0x01, 0x00, 0x02, 0x10, 0x03, 0x10, 0x00, 0x00, >+ 0x01, 0xd2, 0x98, 0xe9, 0x03, 0x0c, 0x00, 0x46, 0x21, 0xd9, 0xff, 0xc4, >+ 0x00, 0x1b, 0x10, 0x00, 0x03, 0x00, 0x02, 0x03, 0x00, 0x00, 0x00, 0x00, >+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x02, 0x11, 0x00, 0x03, >+ 0x10, 0x12, 0x13, 0xff, 0xda, 0x00, 0x08, 0x01, 0x01, 0x00, 0x01, 0x05, >+ 0x02, 0x3b, 0x80, 0x6f, 0x56, 0x76, 0x56, 0x23, 0x87, 0x99, 0x0d, 0x26, >+ 0x62, 0xf6, 0xbf, 0xff, 0xc4, 0x00, 0x17, 0x11, 0x01, 0x01, 0x01, 0x01, >+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, >+ 0x00, 0x11, 0x01, 0x21, 0xff, 0xda, 0x00, 0x08, 0x01, 0x03, 0x01, 0x01, >+ 0x3f, 0x01, 0x98, 0xb1, 0xbd, 0x47, 0xff, 0xc4, 0x00, 0x18, 0x11, 0x00, >+ 0x03, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, >+ 0x00, 0x00, 0x00, 0x00, 0x01, 0x12, 0x11, 0x21, 0xff, 0xda, 0x00, 0x08, >+ 0x01, 0x02, 0x01, 0x01, 0x3f, 0x01, 0xb6, 0x35, 0xa2, 0xe1, 0x47, 0xff, >+ 0xc4, 0x00, 0x1e, 0x10, 0x00, 0x02, 0x01, 0x03, 0x05, 0x00, 0x00, 0x00, >+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x11, 0x21, 0x02, >+ 0x12, 0x32, 0x10, 0x31, 0x71, 0x81, 0xa1, 0xff, 0xda, 0x00, 0x08, 0x01, >+ 0x01, 0x00, 0x06, 0x3f, 0x02, 0x4b, 0xb3, 0x28, 0x32, 0xd2, 0xed, 0xf9, >+ 0x1d, 0x3e, 0x13, 0x51, 0x73, 0x83, 0xff, 0xc4, 0x00, 0x1c, 0x10, 0x01, >+ 0x01, 0x01, 0x00, 0x02, 0x03, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, >+ 0x00, 0x00, 0x00, 0x01, 0x11, 0x00, 0x21, 0x51, 0x31, 0x61, 0x81, 0xf0, >+ 0xff, 0xda, 0x00, 0x08, 0x01, 0x01, 0x00, 0x01, 0x3f, 0x21, 0x75, 0x6e, >+ 0x31, 0x94, 0x28, 0xf9, 0x30, 0xdc, 0x27, 0xdb, 0xa9, 0x01, 0xf3, 0xde, >+ 0x02, 0xa0, 0xed, 0x1e, 0x34, 0x68, 0x23, 0xf9, 0xc6, 0x48, 0x5d, 0x7a, >+ 0x35, 0x02, 0xf5, 0x6f, 0xff, 0xda, 0x00, 0x0c, 0x03, 0x01, 0x00, 0x02, >+ 0x00, 0x03, 0x00, 0x00, 0x00, 0x10, 0x24, 0xaf, 0xff, 0xc4, 0x00, 0x19, >+ 0x11, 0x00, 0x03, 0x01, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, >+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x11, 0x51, 0x21, 0x31, 0xff, >+ 0xda, 0x00, 0x08, 0x01, 0x03, 0x01, 0x01, 0x3f, 0x10, 0x59, 0x11, 0xca, >+ 0x42, 0x60, 0x9f, 0x69, 0xff, 0xc4, 0x00, 0x19, 0x11, 0x00, 0x02, 0x03, >+ 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, >+ 0x00, 0x00, 0x01, 0x11, 0x21, 0x31, 0x61, 0xff, 0xda, 0x00, 0x08, 0x01, >+ 0x02, 0x01, 0x01, 0x3f, 0x10, 0xb0, 0xd7, 0x27, 0x51, 0xb6, 0x41, 0xff, >+ 0xc4, 0x00, 0x1f, 0x10, 0x01, 0x00, 0x02, 0x01, 0x04, 0x03, 0x01, 0x00, >+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x11, 0x31, >+ 0x41, 0x61, 0x71, 0x91, 0x21, 0x81, 0xd1, 0xb1, 0xff, 0xda, 0x00, 0x08, >+ 0x01, 0x01, 0x00, 0x01, 0x3f, 0x10, 0x1b, 0x30, 0xe9, 0x58, 0xbe, 0x1a, >+ 0xfd, 0x8a, 0xeb, 0x8b, 0x34, 0x74, 0x80, 0x4b, 0xb5, 0xd5, 0xab, 0xcd, >+ 0x46, 0x96, 0x2e, 0xec, 0xbd, 0xaa, 0x78, 0x47, 0x5c, 0x47, 0xa7, 0x30, >+ 0x49, 0xad, 0x88, 0x7c, 0x40, 0x74, 0x30, 0xff, 0x00, 0x23, 0x1d, 0x03, >+ 0x0b, 0xb7, 0xd4, 0xff, 0xd9}; >+static const size_t kTest4JpgLen = 701; >+ >+TEST_F(LibYUVConvertTest, TestMJPGSize) { >+ int width = 0; >+ int height = 0; >+ int ret = MJPGSize(kTest2Jpg, kTest2JpgLen, &width, &height); >+ EXPECT_EQ(0, ret); >+ >+ printf("test jpeg size %d x %d\n", width, height); >+} > >- // EOI, SOI to make MJPG appear valid. >- memset(orig_pixels, 0, kSize); >- orig_pixels[0] = 0xff; >- orig_pixels[1] = 0xd8; // SOI. >- orig_pixels[kSize - kOff + 0] = 0xff; >- orig_pixels[kSize - kOff + 1] = 0xd9; // EOI. >+TEST_F(LibYUVConvertTest, TestMJPGToI420) { >+ int width = 0; >+ int height = 0; >+ int ret = MJPGSize(kTest2Jpg, kTest2JpgLen, &width, &height); >+ EXPECT_EQ(0, ret); >+ >+ int half_width = (width + 1) / 2; >+ int half_height = (height + 1) / 2; >+ int benchmark_iterations = benchmark_iterations_ * benchmark_width_ * >+ benchmark_height_ / (width * height); >+ >+ align_buffer_page_end(dst_y, width * height); >+ align_buffer_page_end(dst_u, half_width * half_height); >+ align_buffer_page_end(dst_v, half_width * half_height); >+ for (int times = 0; times < benchmark_iterations; ++times) { >+ ret = MJPGToI420(kTest2Jpg, kTest2JpgLen, dst_y, width, dst_u, half_width, >+ dst_v, half_width, width, height, width, height); >+ } >+ // Expect sucesss >+ EXPECT_EQ(0, ret); >+ >+ // Test result matches known hash value. >+ uint32_t dst_y_hash = HashDjb2(dst_y, width * height, 5381); >+ uint32_t dst_u_hash = HashDjb2(dst_u, half_width * half_height, 5381); >+ uint32_t dst_v_hash = HashDjb2(dst_v, half_width * half_height, 5381); >+ EXPECT_EQ(dst_y_hash, 2682851208u); >+ EXPECT_EQ(dst_u_hash, 2501859930u); >+ EXPECT_EQ(dst_v_hash, 2126459123u); > >- for (int times = 0; times < benchmark_iterations_; ++times) { >- int ret = >- MJPGToI420(orig_pixels, kSize, dst_y_opt, benchmark_width_, dst_u_opt, >- SUBSAMPLE(benchmark_width_, 2), dst_v_opt, >- SUBSAMPLE(benchmark_width_, 2), benchmark_width_, >- benchmark_height_, benchmark_width_, benchmark_height_); >- // Expect failure because image is not really valid. >- EXPECT_EQ(1, ret); >- } >- >- free_aligned_buffer_page_end(dst_y_opt); >- free_aligned_buffer_page_end(dst_u_opt); >- free_aligned_buffer_page_end(dst_v_opt); >- free_aligned_buffer_page_end(orig_pixels); >+ free_aligned_buffer_page_end(dst_y); >+ free_aligned_buffer_page_end(dst_u); >+ free_aligned_buffer_page_end(dst_v); > } > >-TEST_F(LibYUVConvertTest, MJPGToARGB) { >- const int kOff = 10; >- const int kMinJpeg = 64; >- const int kImageSize = benchmark_width_ * benchmark_height_ >= kMinJpeg >- ? benchmark_width_ * benchmark_height_ >- : kMinJpeg; >- const int kSize = kImageSize + kOff; >- align_buffer_page_end(orig_pixels, kSize); >- align_buffer_page_end(dst_argb_opt, benchmark_width_ * benchmark_height_ * 4); >+TEST_F(LibYUVConvertTest, TestMJPGToI420_NV21) { >+ int width = 0; >+ int height = 0; >+ int ret = MJPGSize(kTest2Jpg, kTest2JpgLen, &width, &height); >+ EXPECT_EQ(0, ret); > >- // EOI, SOI to make MJPG appear valid. >- memset(orig_pixels, 0, kSize); >- orig_pixels[0] = 0xff; >- orig_pixels[1] = 0xd8; // SOI. >- orig_pixels[kSize - kOff + 0] = 0xff; >- orig_pixels[kSize - kOff + 1] = 0xd9; // EOI. >+ int half_width = (width + 1) / 2; >+ int half_height = (height + 1) / 2; >+ int benchmark_iterations = benchmark_iterations_ * benchmark_width_ * >+ benchmark_height_ / (width * height); > >- for (int times = 0; times < benchmark_iterations_; ++times) { >- int ret = MJPGToARGB(orig_pixels, kSize, dst_argb_opt, benchmark_width_ * 4, >- benchmark_width_, benchmark_height_, benchmark_width_, >- benchmark_height_); >- // Expect failure because image is not really valid. >- EXPECT_EQ(1, ret); >+ // Convert to NV21 >+ align_buffer_page_end(dst_y, width * height); >+ align_buffer_page_end(dst_vu, half_width * half_height * 2); >+ >+ for (int times = 0; times < benchmark_iterations; ++times) { >+ ret = MJPGToNV21(kTest2Jpg, kTest2JpgLen, dst_y, width, dst_vu, >+ half_width * 2, width, height, width, height); > } >+ // Expect sucesss >+ EXPECT_EQ(0, ret); > >- free_aligned_buffer_page_end(dst_argb_opt); >- free_aligned_buffer_page_end(orig_pixels); >+ // Convert to I420 >+ align_buffer_page_end(dst2_y, width * height); >+ align_buffer_page_end(dst2_u, half_width * half_height); >+ align_buffer_page_end(dst2_v, half_width * half_height); >+ for (int times = 0; times < benchmark_iterations; ++times) { >+ ret = MJPGToI420(kTest2Jpg, kTest2JpgLen, dst2_y, width, dst2_u, half_width, >+ dst2_v, half_width, width, height, width, height); >+ } >+ // Expect sucesss >+ EXPECT_EQ(0, ret); >+ >+ // Convert I420 to NV21 >+ align_buffer_page_end(dst3_y, width * height); >+ align_buffer_page_end(dst3_vu, half_width * half_height * 2); >+ >+ I420ToNV21(dst2_y, width, dst2_u, half_width, dst2_v, half_width, dst3_y, >+ width, dst3_vu, half_width * 2, width, height); >+ >+ for (int i = 0; i < width * height; ++i) { >+ EXPECT_EQ(dst_y[i], dst3_y[i]); >+ } >+ for (int i = 0; i < half_width * half_height * 2; ++i) { >+ EXPECT_EQ(dst_vu[i], dst3_vu[i]); >+ EXPECT_EQ(dst_vu[i], dst3_vu[i]); >+ } >+ >+ free_aligned_buffer_page_end(dst3_y); >+ free_aligned_buffer_page_end(dst3_vu); >+ >+ free_aligned_buffer_page_end(dst2_y); >+ free_aligned_buffer_page_end(dst2_u); >+ free_aligned_buffer_page_end(dst2_v); >+ >+ free_aligned_buffer_page_end(dst_y); >+ free_aligned_buffer_page_end(dst_vu); > } > >+TEST_F(LibYUVConvertTest, TestMJPGToNV21_420) { >+ int width = 0; >+ int height = 0; >+ int ret = MJPGSize(kTest2Jpg, kTest2JpgLen, &width, &height); >+ EXPECT_EQ(0, ret); >+ >+ int half_width = (width + 1) / 2; >+ int half_height = (height + 1) / 2; >+ int benchmark_iterations = benchmark_iterations_ * benchmark_width_ * >+ benchmark_height_ / (width * height); >+ >+ align_buffer_page_end(dst_y, width * height); >+ align_buffer_page_end(dst_uv, half_width * half_height * 2); >+ for (int times = 0; times < benchmark_iterations; ++times) { >+ ret = MJPGToNV21(kTest2Jpg, kTest2JpgLen, dst_y, width, dst_uv, >+ half_width * 2, width, height, width, height); >+ } >+ // Expect sucesss >+ EXPECT_EQ(0, ret); >+ >+ // Test result matches known hash value. >+ uint32_t dst_y_hash = HashDjb2(dst_y, width * height, 5381); >+ uint32_t dst_uv_hash = HashDjb2(dst_uv, half_width * half_height * 2, 5381); >+ EXPECT_EQ(dst_y_hash, 2682851208u); >+ EXPECT_EQ(dst_uv_hash, 1069662856u); >+ >+ free_aligned_buffer_page_end(dst_y); >+ free_aligned_buffer_page_end(dst_uv); >+} >+ >+TEST_F(LibYUVConvertTest, TestMJPGToNV21_422) { >+ int width = 0; >+ int height = 0; >+ int ret = MJPGSize(kTest3Jpg, kTest3JpgLen, &width, &height); >+ EXPECT_EQ(0, ret); >+ >+ int half_width = (width + 1) / 2; >+ int half_height = (height + 1) / 2; >+ int benchmark_iterations = benchmark_iterations_ * benchmark_width_ * >+ benchmark_height_ / (width * height); >+ >+ align_buffer_page_end(dst_y, width * height); >+ align_buffer_page_end(dst_uv, half_width * half_height * 2); >+ for (int times = 0; times < benchmark_iterations; ++times) { >+ ret = MJPGToNV21(kTest3Jpg, kTest3JpgLen, dst_y, width, dst_uv, >+ half_width * 2, width, height, width, height); >+ } >+ // Expect sucesss >+ EXPECT_EQ(0, ret); >+ >+ // Test result matches known hash value. >+ uint32_t dst_y_hash = HashDjb2(dst_y, width * height, 5381); >+ uint32_t dst_uv_hash = HashDjb2(dst_uv, half_width * half_height * 2, 5381); >+ EXPECT_EQ(dst_y_hash, 2682851208u); >+ EXPECT_EQ(dst_uv_hash, 3543430771u); >+ >+ free_aligned_buffer_page_end(dst_y); >+ free_aligned_buffer_page_end(dst_uv); >+} >+ >+TEST_F(LibYUVConvertTest, TestMJPGToNV21_400) { >+ int width = 0; >+ int height = 0; >+ int ret = MJPGSize(kTest0Jpg, kTest0JpgLen, &width, &height); >+ EXPECT_EQ(0, ret); >+ >+ int half_width = (width + 1) / 2; >+ int half_height = (height + 1) / 2; >+ int benchmark_iterations = benchmark_iterations_ * benchmark_width_ * >+ benchmark_height_ / (width * height); >+ >+ align_buffer_page_end(dst_y, width * height); >+ align_buffer_page_end(dst_uv, half_width * half_height * 2); >+ for (int times = 0; times < benchmark_iterations; ++times) { >+ ret = MJPGToNV21(kTest0Jpg, kTest0JpgLen, dst_y, width, dst_uv, >+ half_width * 2, width, height, width, height); >+ } >+ // Expect sucesss >+ EXPECT_EQ(0, ret); >+ >+ // Test result matches known hash value. >+ uint32_t dst_y_hash = HashDjb2(dst_y, width * height, 5381); >+ uint32_t dst_uv_hash = HashDjb2(dst_uv, half_width * half_height * 2, 5381); >+ EXPECT_EQ(dst_y_hash, 330644005u); >+ EXPECT_EQ(dst_uv_hash, 135214341u); >+ >+ free_aligned_buffer_page_end(dst_y); >+ free_aligned_buffer_page_end(dst_uv); >+} >+ >+TEST_F(LibYUVConvertTest, TestMJPGToNV21_444) { >+ int width = 0; >+ int height = 0; >+ int ret = MJPGSize(kTest1Jpg, kTest1JpgLen, &width, &height); >+ EXPECT_EQ(0, ret); >+ >+ int half_width = (width + 1) / 2; >+ int half_height = (height + 1) / 2; >+ int benchmark_iterations = benchmark_iterations_ * benchmark_width_ * >+ benchmark_height_ / (width * height); >+ >+ align_buffer_page_end(dst_y, width * height); >+ align_buffer_page_end(dst_uv, half_width * half_height * 2); >+ for (int times = 0; times < benchmark_iterations; ++times) { >+ ret = MJPGToNV21(kTest1Jpg, kTest1JpgLen, dst_y, width, dst_uv, >+ half_width * 2, width, height, width, height); >+ } >+ // Expect sucesss >+ EXPECT_EQ(0, ret); >+ >+ // Test result matches known hash value. >+ uint32_t dst_y_hash = HashDjb2(dst_y, width * height, 5381); >+ uint32_t dst_uv_hash = HashDjb2(dst_uv, half_width * half_height * 2, 5381); >+ EXPECT_EQ(dst_y_hash, 2682851208u); >+ EXPECT_EQ(dst_uv_hash, 506143297u); >+ >+ free_aligned_buffer_page_end(dst_y); >+ free_aligned_buffer_page_end(dst_uv); >+} >+ >+TEST_F(LibYUVConvertTest, TestMJPGToARGB) { >+ int width = 0; >+ int height = 0; >+ int ret = MJPGSize(kTest3Jpg, kTest3JpgLen, &width, &height); >+ EXPECT_EQ(0, ret); >+ >+ int benchmark_iterations = benchmark_iterations_ * benchmark_width_ * >+ benchmark_height_ / (width * height); >+ >+ align_buffer_page_end(dst_argb, width * height * 4); >+ for (int times = 0; times < benchmark_iterations; ++times) { >+ ret = MJPGToARGB(kTest3Jpg, kTest3JpgLen, dst_argb, width * 4, width, >+ height, width, height); >+ } >+ // Expect sucesss >+ EXPECT_EQ(0, ret); >+ >+ // Test result matches known hash value. >+ uint32_t dst_argb_hash = HashDjb2(dst_argb, width * height, 5381); >+ EXPECT_EQ(dst_argb_hash, 2355976473u); >+ >+ free_aligned_buffer_page_end(dst_argb); >+} >+ >+static int ShowJPegInfo(const uint8_t* sample, size_t sample_size) { >+ MJpegDecoder mjpeg_decoder; >+ LIBYUV_BOOL ret = mjpeg_decoder.LoadFrame(sample, sample_size); >+ >+ int width = mjpeg_decoder.GetWidth(); >+ int height = mjpeg_decoder.GetHeight(); >+ >+ // YUV420 >+ if (mjpeg_decoder.GetColorSpace() == MJpegDecoder::kColorSpaceYCbCr && >+ mjpeg_decoder.GetNumComponents() == 3 && >+ mjpeg_decoder.GetVertSampFactor(0) == 2 && >+ mjpeg_decoder.GetHorizSampFactor(0) == 2 && >+ mjpeg_decoder.GetVertSampFactor(1) == 1 && >+ mjpeg_decoder.GetHorizSampFactor(1) == 1 && >+ mjpeg_decoder.GetVertSampFactor(2) == 1 && >+ mjpeg_decoder.GetHorizSampFactor(2) == 1) { >+ printf("JPeg is J420, %dx%d %d bytes\n", width, height, >+ static_cast<int>(sample_size)); >+ // YUV422 >+ } else if (mjpeg_decoder.GetColorSpace() == MJpegDecoder::kColorSpaceYCbCr && >+ mjpeg_decoder.GetNumComponents() == 3 && >+ mjpeg_decoder.GetVertSampFactor(0) == 1 && >+ mjpeg_decoder.GetHorizSampFactor(0) == 2 && >+ mjpeg_decoder.GetVertSampFactor(1) == 1 && >+ mjpeg_decoder.GetHorizSampFactor(1) == 1 && >+ mjpeg_decoder.GetVertSampFactor(2) == 1 && >+ mjpeg_decoder.GetHorizSampFactor(2) == 1) { >+ printf("JPeg is J422, %dx%d %d bytes\n", width, height, >+ static_cast<int>(sample_size)); >+ // YUV444 >+ } else if (mjpeg_decoder.GetColorSpace() == MJpegDecoder::kColorSpaceYCbCr && >+ mjpeg_decoder.GetNumComponents() == 3 && >+ mjpeg_decoder.GetVertSampFactor(0) == 1 && >+ mjpeg_decoder.GetHorizSampFactor(0) == 1 && >+ mjpeg_decoder.GetVertSampFactor(1) == 1 && >+ mjpeg_decoder.GetHorizSampFactor(1) == 1 && >+ mjpeg_decoder.GetVertSampFactor(2) == 1 && >+ mjpeg_decoder.GetHorizSampFactor(2) == 1) { >+ printf("JPeg is J444, %dx%d %d bytes\n", width, height, >+ static_cast<int>(sample_size)); >+ // YUV400 >+ } else if (mjpeg_decoder.GetColorSpace() == >+ MJpegDecoder::kColorSpaceGrayscale && >+ mjpeg_decoder.GetNumComponents() == 1 && >+ mjpeg_decoder.GetVertSampFactor(0) == 1 && >+ mjpeg_decoder.GetHorizSampFactor(0) == 1) { >+ printf("JPeg is J400, %dx%d %d bytes\n", width, height, >+ static_cast<int>(sample_size)); >+ } else { >+ // Unknown colorspace. >+ printf("JPeg is Unknown colorspace.\n"); >+ } >+ mjpeg_decoder.UnloadFrame(); >+ return ret; >+} >+ >+TEST_F(LibYUVConvertTest, TestMJPGInfo) { >+ EXPECT_EQ(1, ShowJPegInfo(kTest0Jpg, kTest0JpgLen)); >+ EXPECT_EQ(1, ShowJPegInfo(kTest1Jpg, kTest1JpgLen)); >+ EXPECT_EQ(1, ShowJPegInfo(kTest2Jpg, kTest2JpgLen)); >+ EXPECT_EQ(1, ShowJPegInfo(kTest3Jpg, kTest3JpgLen)); >+ EXPECT_EQ(1, >+ ShowJPegInfo(kTest4Jpg, kTest4JpgLen)); // Valid but unsupported. >+} > #endif // HAVE_JPEG > > TEST_F(LibYUVConvertTest, NV12Crop) { >@@ -1509,6 +2069,78 @@ TEST_F(LibYUVConvertTest, NV12Crop) { > free_aligned_buffer_page_end(src_y); > } > >+TEST_F(LibYUVConvertTest, I420CropOddY) { >+ const int SUBSAMP_X = 2; >+ const int SUBSAMP_Y = 2; >+ const int kWidth = benchmark_width_; >+ const int kHeight = benchmark_height_; >+ const int crop_y = 1; >+ const int kDestWidth = benchmark_width_; >+ const int kDestHeight = benchmark_height_ - crop_y * 2; >+ const int kStrideU = SUBSAMPLE(kWidth, SUBSAMP_X); >+ const int kStrideV = SUBSAMPLE(kWidth, SUBSAMP_X); >+ const int sample_size = kWidth * kHeight + >+ kStrideU * SUBSAMPLE(kHeight, SUBSAMP_Y) + >+ kStrideV * SUBSAMPLE(kHeight, SUBSAMP_Y); >+ align_buffer_page_end(src_y, sample_size); >+ uint8_t* src_u = src_y + kWidth * kHeight; >+ uint8_t* src_v = src_u + kStrideU * SUBSAMPLE(kHeight, SUBSAMP_Y); >+ >+ align_buffer_page_end(dst_y, kDestWidth * kDestHeight); >+ align_buffer_page_end(dst_u, SUBSAMPLE(kDestWidth, SUBSAMP_X) * >+ SUBSAMPLE(kDestHeight, SUBSAMP_Y)); >+ align_buffer_page_end(dst_v, SUBSAMPLE(kDestWidth, SUBSAMP_X) * >+ SUBSAMPLE(kDestHeight, SUBSAMP_Y)); >+ >+ for (int i = 0; i < kHeight * kWidth; ++i) { >+ src_y[i] = (fastrand() & 0xff); >+ } >+ for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y) * kStrideU; ++i) { >+ src_u[i] = (fastrand() & 0xff); >+ } >+ for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y) * kStrideV; ++i) { >+ src_v[i] = (fastrand() & 0xff); >+ } >+ memset(dst_y, 1, kDestWidth * kDestHeight); >+ memset(dst_u, 2, >+ SUBSAMPLE(kDestWidth, SUBSAMP_X) * SUBSAMPLE(kDestHeight, SUBSAMP_Y)); >+ memset(dst_v, 3, >+ SUBSAMPLE(kDestWidth, SUBSAMP_X) * SUBSAMPLE(kDestHeight, SUBSAMP_Y)); >+ >+ MaskCpuFlags(benchmark_cpu_info_); >+ for (int i = 0; i < benchmark_iterations_; ++i) { >+ ConvertToI420(src_y, sample_size, dst_y, kDestWidth, dst_u, >+ SUBSAMPLE(kDestWidth, SUBSAMP_X), dst_v, >+ SUBSAMPLE(kDestWidth, SUBSAMP_X), 0, crop_y, kWidth, kHeight, >+ kDestWidth, kDestHeight, libyuv::kRotate0, >+ libyuv::FOURCC_I420); >+ } >+ >+ for (int i = 0; i < kDestHeight; ++i) { >+ for (int j = 0; j < kDestWidth; ++j) { >+ EXPECT_EQ(src_y[crop_y * kWidth + i * kWidth + j], >+ dst_y[i * kDestWidth + j]); >+ } >+ } >+ for (int i = 0; i < SUBSAMPLE(kDestHeight, SUBSAMP_Y); ++i) { >+ for (int j = 0; j < SUBSAMPLE(kDestWidth, SUBSAMP_X); ++j) { >+ EXPECT_EQ(src_u[(crop_y / 2 + i) * kStrideU + j], >+ dst_u[i * SUBSAMPLE(kDestWidth, SUBSAMP_X) + j]); >+ } >+ } >+ for (int i = 0; i < SUBSAMPLE(kDestHeight, SUBSAMP_Y); ++i) { >+ for (int j = 0; j < SUBSAMPLE(kDestWidth, SUBSAMP_X); ++j) { >+ EXPECT_EQ(src_v[(crop_y / 2 + i) * kStrideV + j], >+ dst_v[i * SUBSAMPLE(kDestWidth, SUBSAMP_X) + j]); >+ } >+ } >+ >+ free_aligned_buffer_page_end(dst_y); >+ free_aligned_buffer_page_end(dst_u); >+ free_aligned_buffer_page_end(dst_v); >+ free_aligned_buffer_page_end(src_y); >+} >+ > TEST_F(LibYUVConvertTest, TestYToARGB) { > uint8_t y[32]; > uint8_t expectedg[32]; >@@ -2458,4 +3090,28 @@ TEST_F(LibYUVConvertTest, TestH420ToAR30) { > free_aligned_buffer_page_end(ar30_pixels); > } > >+// Test RGB24 to ARGB and back to RGB24 >+TEST_F(LibYUVConvertTest, TestARGBToRGB24) { >+ const int kSize = 256; >+ align_buffer_page_end(orig_rgb24, kSize * 3); >+ align_buffer_page_end(argb_pixels, kSize * 4); >+ align_buffer_page_end(dest_rgb24, kSize * 3); >+ >+ // Test grey scale >+ for (int i = 0; i < kSize * 3; ++i) { >+ orig_rgb24[i] = i; >+ } >+ >+ RGB24ToARGB(orig_rgb24, 0, argb_pixels, 0, kSize, 1); >+ ARGBToRGB24(argb_pixels, 0, dest_rgb24, 0, kSize, 1); >+ >+ for (int i = 0; i < kSize * 3; ++i) { >+ EXPECT_EQ(orig_rgb24[i], dest_rgb24[i]); >+ } >+ >+ free_aligned_buffer_page_end(orig_rgb24); >+ free_aligned_buffer_page_end(argb_pixels); >+ free_aligned_buffer_page_end(dest_rgb24); >+} >+ > } // namespace libyuv >diff --git a/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/unit_test/cpu_test.cc b/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/unit_test/cpu_test.cc >index a8fb4b4ac01f16fc7525c39a20c66478ea0fccaa..a7991d2bab10102a6c986a2fdbafd97ab8ca1125 100644 >--- a/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/unit_test/cpu_test.cc >+++ b/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/unit_test/cpu_test.cc >@@ -20,12 +20,12 @@ namespace libyuv { > > TEST_F(LibYUVBaseTest, TestCpuHas) { > int cpu_flags = TestCpuFlag(-1); >- printf("Cpu Flags %x\n", cpu_flags); >+ printf("Cpu Flags %d\n", cpu_flags); > #if defined(__arm__) || defined(__aarch64__) > int has_arm = TestCpuFlag(kCpuHasARM); >- printf("Has ARM %x\n", has_arm); >+ printf("Has ARM %d\n", has_arm); > int has_neon = TestCpuFlag(kCpuHasNEON); >- printf("Has NEON %x\n", has_neon); >+ printf("Has NEON %d\n", has_neon); > #endif > int has_x86 = TestCpuFlag(kCpuHasX86); > int has_sse2 = TestCpuFlag(kCpuHasSSE2); >@@ -44,29 +44,31 @@ TEST_F(LibYUVBaseTest, TestCpuHas) { > int has_avx512vbmi2 = TestCpuFlag(kCpuHasAVX512VBMI2); > int has_avx512vbitalg = TestCpuFlag(kCpuHasAVX512VBITALG); > int has_avx512vpopcntdq = TestCpuFlag(kCpuHasAVX512VPOPCNTDQ); >- printf("Has X86 %x\n", has_x86); >- printf("Has SSE2 %x\n", has_sse2); >- printf("Has SSSE3 %x\n", has_ssse3); >- printf("Has SSE4.1 %x\n", has_sse41); >- printf("Has SSE4.2 %x\n", has_sse42); >- printf("Has AVX %x\n", has_avx); >- printf("Has AVX2 %x\n", has_avx2); >- printf("Has ERMS %x\n", has_erms); >- printf("Has FMA3 %x\n", has_fma3); >- printf("Has F16C %x\n", has_f16c); >- printf("Has GFNI %x\n", has_gfni); >- printf("Has AVX512BW %x\n", has_avx512bw); >- printf("Has AVX512VL %x\n", has_avx512vl); >- printf("Has AVX512VBMI %x\n", has_avx512vbmi); >- printf("Has AVX512VBMI2 %x\n", has_avx512vbmi2); >- printf("Has AVX512VBITALG %x\n", has_avx512vbitalg); >- printf("Has AVX512VPOPCNTDQ %x\n", has_avx512vpopcntdq); >+ printf("Has X86 %d\n", has_x86); >+ printf("Has SSE2 %d\n", has_sse2); >+ printf("Has SSSE3 %d\n", has_ssse3); >+ printf("Has SSE41 %d\n", has_sse41); >+ printf("Has SSE42 %d\n", has_sse42); >+ printf("Has AVX %d\n", has_avx); >+ printf("Has AVX2 %d\n", has_avx2); >+ printf("Has ERMS %d\n", has_erms); >+ printf("Has FMA3 %d\n", has_fma3); >+ printf("Has F16C %d\n", has_f16c); >+ printf("Has GFNI %d\n", has_gfni); >+ printf("Has AVX512BW %d\n", has_avx512bw); >+ printf("Has AVX512VL %d\n", has_avx512vl); >+ printf("Has AVX512VBMI %d\n", has_avx512vbmi); >+ printf("Has AVX512VBMI2 %d\n", has_avx512vbmi2); >+ printf("Has AVX512VBITALG %d\n", has_avx512vbitalg); >+ printf("Has AVX512VPOPCNTDQ %d\n", has_avx512vpopcntdq); > > #if defined(__mips__) > int has_mips = TestCpuFlag(kCpuHasMIPS); >- printf("Has MIPS %x\n", has_mips); >+ printf("Has MIPS %d\n", has_mips); > int has_msa = TestCpuFlag(kCpuHasMSA); >- printf("Has MSA %x\n", has_msa); >+ printf("Has MSA %d\n", has_msa); >+ int has_mmi = TestCpuFlag(kCpuHasMMI); >+ printf("Has MMI %d\n", has_mmi); > #endif > } > >@@ -158,4 +160,27 @@ TEST_F(LibYUVBaseTest, TestLinuxNeon) { > #endif > } > >+TEST_F(LibYUVBaseTest, TestSetCpuFlags) { >+ // Reset any masked flags that may have been set so auto init is enabled. >+ MaskCpuFlags(0); >+ >+ int original_cpu_flags = TestCpuFlag(-1); >+ >+ // Test setting different CPU configurations. >+ int cpu_flags = kCpuHasARM | kCpuHasNEON | kCpuInitialized; >+ SetCpuFlags(cpu_flags); >+ EXPECT_EQ(cpu_flags, TestCpuFlag(-1)); >+ >+ cpu_flags = kCpuHasX86 | kCpuInitialized; >+ SetCpuFlags(cpu_flags); >+ EXPECT_EQ(cpu_flags, TestCpuFlag(-1)); >+ >+ // Test that setting 0 turns auto-init back on. >+ SetCpuFlags(0); >+ EXPECT_EQ(original_cpu_flags, TestCpuFlag(-1)); >+ >+ // Restore the CPU flag mask. >+ MaskCpuFlags(benchmark_cpu_info_); >+} >+ > } // namespace libyuv >diff --git a/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/unit_test/scale_argb_test.cc b/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/unit_test/scale_argb_test.cc >index a1be85b8d8ab49807ec4e9270a7f2d5156671ac7..94aef60e22632fc3a42863e97d511ed9240d4149 100644 >--- a/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/unit_test/scale_argb_test.cc >+++ b/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/unit_test/scale_argb_test.cc >@@ -303,10 +303,10 @@ TEST_FACTOR(3, 1, 3) > > TEST_SCALETO(ARGBScale, 1, 1) > TEST_SCALETO(ARGBScale, 320, 240) >-TEST_SCALETO(ARGBScale, 352, 288) > TEST_SCALETO(ARGBScale, 569, 480) > TEST_SCALETO(ARGBScale, 640, 360) > TEST_SCALETO(ARGBScale, 1280, 720) >+TEST_SCALETO(ARGBScale, 1920, 1080) > #undef TEST_SCALETO1 > #undef TEST_SCALETO > >@@ -319,12 +319,12 @@ int YUVToARGBScaleReference2(const uint8_t* src_y, > int src_stride_u, > const uint8_t* src_v, > int src_stride_v, >- uint32 /* src_fourcc */, >+ uint32_t /* src_fourcc */, > int src_width, > int src_height, > uint8_t* dst_argb, > int dst_stride_argb, >- uint32 /* dst_fourcc */, >+ uint32_t /* dst_fourcc */, > int dst_width, > int dst_height, > int clip_x, >diff --git a/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/unit_test/scale_test.cc b/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/unit_test/scale_test.cc >index 08b6cffaa26b7863371e691df89f8be135c7470b..d97d54a8830fb2620c6f19c55dcd16c364b58122 100644 >--- a/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/unit_test/scale_test.cc >+++ b/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/unit_test/scale_test.cc >@@ -336,10 +336,10 @@ TEST_FACTOR(3, 1, 3, 0) > > TEST_SCALETO(Scale, 1, 1) > TEST_SCALETO(Scale, 320, 240) >-TEST_SCALETO(Scale, 352, 288) > TEST_SCALETO(Scale, 569, 480) > TEST_SCALETO(Scale, 640, 360) > TEST_SCALETO(Scale, 1280, 720) >+TEST_SCALETO(Scale, 1920, 1080) > #undef TEST_SCALETO1 > #undef TEST_SCALETO > >@@ -437,6 +437,10 @@ extern "C" void ScaleRowUp2_16_NEON(const uint16_t* src_ptr, > ptrdiff_t src_stride, > uint16_t* dst, > int dst_width); >+extern "C" void ScaleRowUp2_16_MMI(const uint16_t* src_ptr, >+ ptrdiff_t src_stride, >+ uint16_t* dst, >+ int dst_width); > extern "C" void ScaleRowUp2_16_C(const uint16_t* src_ptr, > ptrdiff_t src_stride, > uint16_t* dst, >@@ -463,6 +467,13 @@ TEST_F(LibYUVScaleTest, TestScaleRowUp2_16) { > } else { > ScaleRowUp2_16_C(&orig_pixels[0], 640, &dst_pixels_opt[0], 1280); > } >+#elif !defined(LIBYUV_DISABLE_MMI) && defined(_MIPS_ARCH_LOONGSON3A) >+ int has_mmi = TestCpuFlag(kCpuHasMMI); >+ if (has_mmi) { >+ ScaleRowUp2_16_MMI(&orig_pixels[0], 640, &dst_pixels_opt[0], 1280); >+ } else { >+ ScaleRowUp2_16_C(&orig_pixels[0], 640, &dst_pixels_opt[0], 1280); >+ } > #else > ScaleRowUp2_16_C(&orig_pixels[0], 640, &dst_pixels_opt[0], 1280); > #endif >diff --git a/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/unit_test/testdata/juno.txt b/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/unit_test/testdata/juno.txt >index c275be7425f29568f0d18e7b956305d64527ae10..dd465272b8837bdc6588a533e66fbc9672d58de4 100644 >--- a/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/unit_test/testdata/juno.txt >+++ b/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/unit_test/testdata/juno.txt >@@ -1,15 +1,15 @@ >-Processor : AArch64 Processor rev 0 (aarch64) >-processor : 0 >-processor : 1 >-processor : 2 >-processor : 3 >-processor : 4 >-processor : 5 >-Features : fp asimd evtstrm aes pmull sha1 sha2 crc32 >-CPU implementer : 0x41 >-CPU architecture: AArch64 >-CPU variant : 0x0 >-CPU part : 0xd07 >-CPU revision : 0 >- >-Hardware : Juno >+Processor : AArch64 Processor rev 0 (aarch64) >+processor : 0 >+processor : 1 >+processor : 2 >+processor : 3 >+processor : 4 >+processor : 5 >+Features : fp asimd evtstrm aes pmull sha1 sha2 crc32 >+CPU implementer : 0x41 >+CPU architecture: AArch64 >+CPU variant : 0x0 >+CPU part : 0xd07 >+CPU revision : 0 >+ >+Hardware : Juno >diff --git a/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/unit_test/testdata/test0.jpg b/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/unit_test/testdata/test0.jpg >new file mode 100644 >index 0000000000000000000000000000000000000000..f4461a813f14ad79395cd844ae26fa8fddcb18a6 >GIT binary patch >literal 421 >zcmex=<NpH&0WUXCHwH#VMg|WcWcYuZ!I?pTn}?5wOMs7;Pe@QeSVBWeLQG7;P+3)0 >z!^+s!#?sir+`%Kp&%q_!)!ZT=H!wUtDLo_I&bP3#Af+NEH9h(NAqH*^1_1^IMn*w~ >z|3?_aLFO|uAOU6;MyCI_7&yS9jLvn-r-uX8NeeJAGcYkDsbgdkWMCE$5{9Z}WM#5G >zke^l-rrbW0S1sw=exN2f0R|>UW>zFk41$VGLPi3Hg^dfL`q=E5yf<qYU3&Xd)=oIE >zxEZKTMgZg_CT4`QfNoU`G)!##0JF?qF*VPyP2;D*9rfEQ89(1+TJToRBt!Y<F^||P >zQ>L%^a1RKW0u7WG02;^0f@~mzprK=8;Y7v8iyPr)3UC{|jMyjjw_|m;NlAnE)~l<} >jx=qu2vv*a6dyM;X1JAV`H4Y^P{~45JnYp)L`F|4th$m{S > >literal 0 >HcmV?d00001 > >diff --git a/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/unit_test/testdata/test1.jpg b/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/unit_test/testdata/test1.jpg >new file mode 100644 >index 0000000000000000000000000000000000000000..a0210e9d4dbaa6ff6d83d4c353d56f32dcf777a5 >GIT binary patch >literal 735 >zcmex=<NpH&0WUXCHwH#VMg|WcWcYuZ!I?pTn}?5wOMs7;Pe@QeSVBWeLQG7;P+3)0 >z!^+s!#?sir+`%Kp&%q_!)!ZT=H!wUtDLo_I&bP3#Af+NEH9Z+*2&15okc6m&zLb=H >zvXg~VGRffoAqGJX1_1^IW=25<CP7AKLB{__7{ozNWn@4C%q)ydKnXE)aRw$v=Kr@C >zc$gU(m;{&w7#J8G`qf^1IrB49TUzThP=~Yt12Y2?Gm=q^Oo9x|0z$$-Z5%*L7+IOD >z8}h@-!j#)*@~S0$+Yi(vDagpcjN$-B1_41PA+Q#pggxU&26flnhU<Z9rGV~6Rm;H0 >zBp~PrQOpEV{LMSwV{>M*X%Wb5IRTKHS&<yXAgIVBWF%l%*tihtCN_H}@68%Um)`!A >zwG$33ZU$<T5deh<&?dM>V1OwG8YVV=fO*7Tu{6(cipEcaJL<PrGJd|twBW6rNrv*z >zV;-?prc7V+f$G5F1Pl-c1_8CiD-hQMjYJL}21Y?eL&re4R|P^=><?()Ft__imRh#f >z9C@H72|-XaAY1|rb)X+0T0kBaD0pahv5)uR)~gfCoSR&Is_TK8<OP7vXJkS40E3{R >zV`AY%#m0*p;eHU1Hh3AaPwH>i>u!^h2Jfv`SD$s8ruSy=stWfQ_vHqjYddNjN(}xp >ND9bW)Z@=>YCIC;)v-|)6 > >literal 0 >HcmV?d00001 > >diff --git a/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/unit_test/testdata/test2.jpg b/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/unit_test/testdata/test2.jpg >new file mode 100644 >index 0000000000000000000000000000000000000000..816ca767b1a1a6f5a6f8124bdaee488ba1caf511 >GIT binary patch >literal 685 >zcmex=<NpH&0WUXCHwH#VMg|WcWcYuZ!I?pTn}?5wOMs7;Pe@QeSVBWeLQG7;P+3)0 >z!^+s!#?sir+`%Kp&%q_!)!ZT=H!wUtDLo_I&bP3#Af+NEH9Z+*2&15okc6m&zLb=H >zvXg~VGRffoAqGJX1_1^IW=16jCP7AKLB{__7$g`Nn3x%m0W&Kj6AMsA4Cn_&WC;dl >zMyCI_7<iZ&8JGl^1sE6@6`pG>=GN0a4^%BJz`%@Z5+jo!1G9jTFi;Bz&>%)uChLa$ >zu(B}a_L;nDN#FJZHHix{0(GGPMnOfe24+S^d&U#NAa@9&yMsZ{2&{$)q~_9bpc**= >z1|~*kR%DX|6`6#L1Plus7eXDtX3ylkS;Oek+n=&_!hyxjKy5MtAfuUp9)bZzK?cP@ >z!^Fl9Fjw0vmgX5w(fDa_NB#Cn#?SYd7QB@+$x!}z%p<nSl<8|eP#rix!2S>rj)(XP >z&BcriNWK!_TWD2=Lz$u=++zaa+#ChBf$HQ1fKFs&L3SjAprK=8;Y7v8iyPtY6p%J} >z8L?05Z`bQ?ladDStyfo{b(^O5X78#B_Zau(2A*p>Y8*-o{xc}cGIMXg^8Y3P^0b`M > >literal 0 >HcmV?d00001 > >diff --git a/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/unit_test/testdata/test3.jpg b/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/unit_test/testdata/test3.jpg >new file mode 100644 >index 0000000000000000000000000000000000000000..792d91dc8db88b7c16ff8eb00e22b5118ec9f3d3 >GIT binary patch >literal 704 >zcmex=<NpH&0WUXCHwH#VMg|WcWcYuZ!I?pTn}?5wOMs7;Pe@QeSVBWeLQG7;P+3)0 >z!^+s!#?sir+`%Kp&%q_!)!ZT=H!wUtDLo_I&bP3#Af+NEH9Z+*2&15okc6m&zLb=H >zvXg~VGRffoAqGJX1_1^IW=2H@CP7AKLB{__7{ozNWn@4C%q)ydAPGhWW)yJ-MkZ#4 >z|F;-;fWk}y%mNGyjLyCCi#DH<4(A7|mlj}PW&o-{07fQ324(>vVW1ukpcRa)Ox6wg >zVP#>;?K64RlD_Q+YLXNLYC*R}Ku{5^g_)7jo-y!TPbpBL1kfl{X9As*2vNrbQn&1; >ze#?swK$UU=3`~s7tO#c?FbFC#2^k3(7B((~I)%-i$$PVg(WSRPW$lCmi<^PkWCTFL >zfb1AXK?cP@!^Fl9F!$RlmgX5w(fDa_NB#Cn#?SYd7QB@+$x!}z%p<nSl<8|eP#rjI >zfZ@QvAfV@p!`Xt0hH&o*1PAO2IhgK`RVkyOB7*=}DaeNc5{)q|Ql}<@f=ylk=u}1) >zq>y7|5Hxg5ES#v=cyS}#y#mq(FC+Fz{q1_)ZBo+Uz4hwqvu@M$-t1jf;U43@+`w~f >UM~y>?!G8v2S!V9-SN`7w0A}!{tN;K2 > >literal 0 >HcmV?d00001 > >diff --git a/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/unit_test/testdata/test4.jpg b/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/unit_test/testdata/test4.jpg >new file mode 100644 >index 0000000000000000000000000000000000000000..1ef41668facb24eadd6a7d07c9f58f57a4f2bf61 >GIT binary patch >literal 701 >zcmex=<NpH&0WUXCHwH#VMg|WcWcYuZ!I?pTn}?5wOMs7;Pe@QeSVBWeLQG7;P+3)0 >z!^+s!#?sir+`%Kp&%q_!)!ZT=H!wUtDLo_I&bP3#Af+NEH9Z+*2&15okc6m&zLb=H >zvXg~VGRffoAqGJX1_1^IW=0_fCP7AKLB{__7$g`Nn3x%m0ShZ56Ejdo4Cn_&WC;ca >zM&|#w7<iZ&8JGl^1%TX3GhQ<DFt{n+1S*#nU|>cyijhf>fmuLE7^sH>Xc8kUlXXLW >zSXr2I`%GT7q;LCyn#2V$ED>Z>1Z!YsWVC0Tv2m|E$dN#U(A>f(BnVN*1X8!nbkReQ >zLOB5jCPrpfWLFC+G6@+87#21zgt~*xp2>T&hS8<BKV|KN1B;u1+GGSkUS$G$3<ekl >z859Ez6B|Fk9B;2!nrApg<EOzL_1h~MKi^|o@K(+wL;2@1kJu_xrmy)xb>M&j1_A?v >zfXaHHsght<Bg_O!FbW1L8p6FN5Gi=dDPevlL@l!QAZIWNDjFt&)q=bzu;IFT;5J8~ >zQh5QOQyE#19m^nS=$KeIQL*vjM!0(gqzzt1?34Q2^}5@nq``aZ)zxR+rs=)eyQ;!H >a#(lYg=h}`MhZ2MT49c?1+}p4GzX<>te530C > >literal 0 >HcmV?d00001 > >diff --git a/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/unit_test/unit_test.cc b/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/unit_test/unit_test.cc >index 20aadb44e2faf809f938515b538f6868ed14a108..a1ae7ea3c7c27c9460f7f65c8d0b4f05b878b25a 100644 >--- a/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/unit_test/unit_test.cc >+++ b/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/unit_test/unit_test.cc >@@ -38,6 +38,99 @@ static const int32_t FLAGS_libyuv_flags = 0; > static const int32_t FLAGS_libyuv_cpu_info = 0; > #endif > >+// Test environment variable for disabling CPU features. Any non-zero value >+// to disable. Zero ignored to make it easy to set the variable on/off. >+#if !defined(__native_client__) && !defined(_M_ARM) >+static LIBYUV_BOOL TestEnv(const char* name) { >+ const char* var = getenv(name); >+ if (var) { >+ if (var[0] != '0') { >+ return LIBYUV_TRUE; >+ } >+ } >+ return LIBYUV_FALSE; >+} >+#else // nacl does not support getenv(). >+static LIBYUV_BOOL TestEnv(const char*) { >+ return LIBYUV_FALSE; >+} >+#endif >+ >+int TestCpuEnv(int cpu_info) { >+#if defined(__arm__) || defined(__aarch64__) >+ if (TestEnv("LIBYUV_DISABLE_NEON")) { >+ cpu_info &= ~libyuv::kCpuHasNEON; >+ } >+#endif >+#if defined(__mips__) && defined(__linux__) >+ if (TestEnv("LIBYUV_DISABLE_MSA")) { >+ cpu_info &= ~libyuv::kCpuHasMSA; >+ } >+ if (TestEnv("LIBYUV_DISABLE_MMI")) { >+ cpu_info &= ~libyuv::kCpuHasMMI; >+ } >+#endif >+#if !defined(__pnacl__) && !defined(__CLR_VER) && \ >+ (defined(__x86_64__) || defined(_M_X64) || defined(__i386__) || \ >+ defined(_M_IX86)) >+ if (TestEnv("LIBYUV_DISABLE_X86")) { >+ cpu_info &= ~libyuv::kCpuHasX86; >+ } >+ if (TestEnv("LIBYUV_DISABLE_SSE2")) { >+ cpu_info &= ~libyuv::kCpuHasSSE2; >+ } >+ if (TestEnv("LIBYUV_DISABLE_SSSE3")) { >+ cpu_info &= ~libyuv::kCpuHasSSSE3; >+ } >+ if (TestEnv("LIBYUV_DISABLE_SSE41")) { >+ cpu_info &= ~libyuv::kCpuHasSSE41; >+ } >+ if (TestEnv("LIBYUV_DISABLE_SSE42")) { >+ cpu_info &= ~libyuv::kCpuHasSSE42; >+ } >+ if (TestEnv("LIBYUV_DISABLE_AVX")) { >+ cpu_info &= ~libyuv::kCpuHasAVX; >+ } >+ if (TestEnv("LIBYUV_DISABLE_AVX2")) { >+ cpu_info &= ~libyuv::kCpuHasAVX2; >+ } >+ if (TestEnv("LIBYUV_DISABLE_ERMS")) { >+ cpu_info &= ~libyuv::kCpuHasERMS; >+ } >+ if (TestEnv("LIBYUV_DISABLE_FMA3")) { >+ cpu_info &= ~libyuv::kCpuHasFMA3; >+ } >+ if (TestEnv("LIBYUV_DISABLE_F16C")) { >+ cpu_info &= ~libyuv::kCpuHasF16C; >+ } >+ if (TestEnv("LIBYUV_DISABLE_AVX512BW")) { >+ cpu_info &= ~libyuv::kCpuHasAVX512BW; >+ } >+ if (TestEnv("LIBYUV_DISABLE_AVX512VL")) { >+ cpu_info &= ~libyuv::kCpuHasAVX512VL; >+ } >+ if (TestEnv("LIBYUV_DISABLE_AVX512VBMI")) { >+ cpu_info &= ~libyuv::kCpuHasAVX512VBMI; >+ } >+ if (TestEnv("LIBYUV_DISABLE_AVX512VBMI2")) { >+ cpu_info &= ~libyuv::kCpuHasAVX512VBMI2; >+ } >+ if (TestEnv("LIBYUV_DISABLE_AVX512VBITALG")) { >+ cpu_info &= ~libyuv::kCpuHasAVX512VBITALG; >+ } >+ if (TestEnv("LIBYUV_DISABLE_AVX512VPOPCNTDQ")) { >+ cpu_info &= ~libyuv::kCpuHasAVX512VPOPCNTDQ; >+ } >+ if (TestEnv("LIBYUV_DISABLE_GFNI")) { >+ cpu_info &= ~libyuv::kCpuHasGFNI; >+ } >+#endif >+ if (TestEnv("LIBYUV_DISABLE_ASM")) { >+ cpu_info = libyuv::kCpuInitialized; >+ } >+ return cpu_info; >+} >+ > // For quicker unittests, default is 128 x 72. But when benchmarking, > // default to 720p. Allow size to specify. > // Set flags to -1 for benchmarking to avoid slower C code. >@@ -87,6 +180,8 @@ LibYUVConvertTest::LibYUVConvertTest() > if (FLAGS_libyuv_cpu_info) { > benchmark_cpu_info_ = FLAGS_libyuv_cpu_info; > } >+ disable_cpu_flags_ = TestCpuEnv(disable_cpu_flags_); >+ benchmark_cpu_info_ = TestCpuEnv(benchmark_cpu_info_); > libyuv::MaskCpuFlags(benchmark_cpu_info_); > benchmark_pixels_div1280_ = > static_cast<int>((static_cast<double>(Abs(benchmark_width_)) * >@@ -141,6 +236,8 @@ LibYUVColorTest::LibYUVColorTest() > if (FLAGS_libyuv_cpu_info) { > benchmark_cpu_info_ = FLAGS_libyuv_cpu_info; > } >+ disable_cpu_flags_ = TestCpuEnv(disable_cpu_flags_); >+ benchmark_cpu_info_ = TestCpuEnv(benchmark_cpu_info_); > libyuv::MaskCpuFlags(benchmark_cpu_info_); > benchmark_pixels_div1280_ = > static_cast<int>((static_cast<double>(Abs(benchmark_width_)) * >@@ -195,6 +292,8 @@ LibYUVScaleTest::LibYUVScaleTest() > if (FLAGS_libyuv_cpu_info) { > benchmark_cpu_info_ = FLAGS_libyuv_cpu_info; > } >+ disable_cpu_flags_ = TestCpuEnv(disable_cpu_flags_); >+ benchmark_cpu_info_ = TestCpuEnv(benchmark_cpu_info_); > libyuv::MaskCpuFlags(benchmark_cpu_info_); > benchmark_pixels_div1280_ = > static_cast<int>((static_cast<double>(Abs(benchmark_width_)) * >@@ -249,6 +348,8 @@ LibYUVRotateTest::LibYUVRotateTest() > if (FLAGS_libyuv_cpu_info) { > benchmark_cpu_info_ = FLAGS_libyuv_cpu_info; > } >+ disable_cpu_flags_ = TestCpuEnv(disable_cpu_flags_); >+ benchmark_cpu_info_ = TestCpuEnv(benchmark_cpu_info_); > libyuv::MaskCpuFlags(benchmark_cpu_info_); > benchmark_pixels_div1280_ = > static_cast<int>((static_cast<double>(Abs(benchmark_width_)) * >@@ -303,6 +404,8 @@ LibYUVPlanarTest::LibYUVPlanarTest() > if (FLAGS_libyuv_cpu_info) { > benchmark_cpu_info_ = FLAGS_libyuv_cpu_info; > } >+ disable_cpu_flags_ = TestCpuEnv(disable_cpu_flags_); >+ benchmark_cpu_info_ = TestCpuEnv(benchmark_cpu_info_); > libyuv::MaskCpuFlags(benchmark_cpu_info_); > benchmark_pixels_div1280_ = > static_cast<int>((static_cast<double>(Abs(benchmark_width_)) * >@@ -357,6 +460,8 @@ LibYUVBaseTest::LibYUVBaseTest() > if (FLAGS_libyuv_cpu_info) { > benchmark_cpu_info_ = FLAGS_libyuv_cpu_info; > } >+ disable_cpu_flags_ = TestCpuEnv(disable_cpu_flags_); >+ benchmark_cpu_info_ = TestCpuEnv(benchmark_cpu_info_); > libyuv::MaskCpuFlags(benchmark_cpu_info_); > benchmark_pixels_div1280_ = > static_cast<int>((static_cast<double>(Abs(benchmark_width_)) * >@@ -411,6 +516,8 @@ LibYUVCompareTest::LibYUVCompareTest() > if (FLAGS_libyuv_cpu_info) { > benchmark_cpu_info_ = FLAGS_libyuv_cpu_info; > } >+ disable_cpu_flags_ = TestCpuEnv(disable_cpu_flags_); >+ benchmark_cpu_info_ = TestCpuEnv(benchmark_cpu_info_); > libyuv::MaskCpuFlags(benchmark_cpu_info_); > benchmark_pixels_div1280_ = > static_cast<int>((static_cast<double>(Abs(benchmark_width_)) * >diff --git a/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/unit_test/video_common_test.cc b/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/unit_test/video_common_test.cc >index 4d89586e76f225c04736d27964b49152130b104f..a84206a2adbf426f4d5da140a728821ed752a756 100644 >--- a/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/unit_test/video_common_test.cc >+++ b/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/unit_test/video_common_test.cc >@@ -79,6 +79,7 @@ TEST_F(LibYUVBaseTest, TestFourCC) { > EXPECT_TRUE(TestValidFourCC(FOURCC_RGBO, FOURCC_BPP_RGBO)); > EXPECT_TRUE(TestValidFourCC(FOURCC_R444, FOURCC_BPP_R444)); > EXPECT_TRUE(TestValidFourCC(FOURCC_H420, FOURCC_BPP_H420)); >+ EXPECT_TRUE(TestValidFourCC(FOURCC_H422, FOURCC_BPP_H422)); > EXPECT_TRUE(TestValidFourCC(FOURCC_H010, FOURCC_BPP_H010)); > EXPECT_TRUE(TestValidFourCC(FOURCC_MJPG, FOURCC_BPP_MJPG)); > EXPECT_TRUE(TestValidFourCC(FOURCC_YV12, FOURCC_BPP_YV12)); >diff --git a/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/util/cpuid.c b/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/util/cpuid.c >index 59c65d60e0fe46d42c2fdb7554b1efc77f03315a..84c0602287bf5477dc53f7c4eb0a1e9d77ab1d74 100644 >--- a/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/util/cpuid.c >+++ b/Source/ThirdParty/libwebrtc/Source/third_party/libyuv/util/cpuid.c >@@ -71,6 +71,8 @@ int main(int argc, const char* argv[]) { > if (has_mips) { > int has_msa = TestCpuFlag(kCpuHasMSA); > printf("Has MSA %x\n", has_msa); >+ int has_mmi = TestCpuFlag(kCpuHasMMI); >+ printf("Has MMI %x\n", has_mmi); > } > if (has_x86) { > int has_sse2 = TestCpuFlag(kCpuHasSSE2);
You cannot view the attachment while viewing its details because your browser does not support IFRAMEs.
View the attachment on a separate page
.
View Attachment As Diff
View Attachment As Raw
Actions:
View
|
Formatted Diff
|
Diff
Attachments on
bug 192864
: 357696