Extract libyuv into a standalone module

This commit is contained in:
Ali 2021-07-21 18:12:17 +02:00
parent 7161c34527
commit cb61f86ba2
186 changed files with 20998 additions and 9615 deletions

View File

@ -91,6 +91,7 @@ swift_library(
"//submodules/TextFormat:TextFormat",
"//submodules/Markdown:Markdown",
"//submodules/ChatTitleActivityNode:ChatTitleActivityNode",
"//third-party/libyuv:LibYuvBinding",
],
visibility = [
"//visibility:public",

View File

@ -6,6 +6,7 @@ import SwiftSignalKit
import AccountContext
import TelegramVoip
import AVFoundation
import LibYuvBinding
private func sampleBufferFromPixelBuffer(pixelBuffer: CVPixelBuffer) -> CMSampleBuffer? {
var maybeFormat: CMVideoFormatDescription?
@ -40,6 +41,68 @@ private func sampleBufferFromPixelBuffer(pixelBuffer: CVPixelBuffer) -> CMSample
return sampleBuffer
}
private func copyI420BufferToNV12Buffer(buffer: OngoingGroupCallContext.VideoFrameData.I420Buffer, pixelBuffer: CVPixelBuffer) -> Bool {
guard CVPixelBufferGetPixelFormatType(pixelBuffer) == kCVPixelFormatType_420YpCbCr8BiPlanarFullRange else {
return false
}
guard CVPixelBufferGetWidthOfPlane(pixelBuffer, 0) == buffer.width else {
return false
}
guard CVPixelBufferGetHeightOfPlane(pixelBuffer, 0) == buffer.height else {
return false
}
let cvRet = CVPixelBufferLockBaseAddress(pixelBuffer, [])
if cvRet != kCVReturnSuccess {
return false
}
defer {
CVPixelBufferUnlockBaseAddress(pixelBuffer, [])
}
guard let dstY = CVPixelBufferGetBaseAddressOfPlane(pixelBuffer, 0) else {
return false
}
let dstStrideY = CVPixelBufferGetBytesPerRowOfPlane(pixelBuffer, 0)
guard let dstUV = CVPixelBufferGetBaseAddressOfPlane(pixelBuffer, 1) else {
return false
}
let dstStrideUV = CVPixelBufferGetBytesPerRowOfPlane(pixelBuffer, 1)
buffer.y.withUnsafeBytes { srcYBuffer in
guard let srcY = srcYBuffer.baseAddress else {
return
}
buffer.u.withUnsafeBytes { srcUBuffer in
guard let srcU = srcUBuffer.baseAddress else {
return
}
buffer.v.withUnsafeBytes { srcVBuffer in
guard let srcV = srcVBuffer.baseAddress else {
return
}
libyuv_I420ToNV12(
srcY.assumingMemoryBound(to: UInt8.self),
Int32(buffer.strideY),
srcU.assumingMemoryBound(to: UInt8.self),
Int32(buffer.strideU),
srcV.assumingMemoryBound(to: UInt8.self),
Int32(buffer.strideV),
dstY.assumingMemoryBound(to: UInt8.self),
Int32(dstStrideY),
dstUV.assumingMemoryBound(to: UInt8.self),
Int32(dstStrideUV),
Int32(buffer.width),
Int32(buffer.height)
)
}
}
}
return true
}
final class SampleBufferVideoRenderingView: UIView, VideoRenderingView {
static override var layerClass: AnyClass {
return AVSampleBufferDisplayLayer.self
@ -111,6 +174,28 @@ final class SampleBufferVideoRenderingView: UIView, VideoRenderingView {
if let sampleBuffer = sampleBufferFromPixelBuffer(pixelBuffer: buffer.pixelBuffer) {
self.sampleBufferLayer.enqueue(sampleBuffer)
}
case let .i420(buffer):
let ioSurfaceProperties = NSMutableDictionary()
let options = NSMutableDictionary()
options.setObject(ioSurfaceProperties, forKey: kCVPixelBufferIOSurfacePropertiesKey as NSString)
var pixelBuffer: CVPixelBuffer?
CVPixelBufferCreate(
kCFAllocatorDefault,
buffer.width,
buffer.height,
kCVPixelFormatType_420YpCbCr8BiPlanarFullRange,
options,
&pixelBuffer
)
if let pixelBuffer = pixelBuffer {
if copyI420BufferToNV12Buffer(buffer: buffer, pixelBuffer: pixelBuffer) {
if let sampleBuffer = sampleBufferFromPixelBuffer(pixelBuffer: pixelBuffer) {
self.sampleBufferLayer.enqueue(sampleBuffer)
}
}
}
default:
break
}

View File

@ -35,11 +35,17 @@ class VideoRenderingContext {
func makeView(input: Signal<OngoingGroupCallContext.VideoFrameData, NoError>, blur: Bool) -> VideoRenderingView? {
#if targetEnvironment(simulator)
if blur {
return nil
}
return SampleBufferVideoRenderingView(input: input)
#else
if #available(iOS 13.0, *) {
return MetalVideoRenderingView(renderingContext: self.metalContext, input: input, blur: blur)
} else {
if blur {
return nil
}
return SampleBufferVideoRenderingView(input: input)
}
#endif

View File

@ -843,13 +843,20 @@ func contextMenuForChatPresentationInterfaceState(chatPresentationInterfaceState
if !hasAutoremove {
for media in message.media {
if media is TelegramMediaAction {
if let action = media as? TelegramMediaAction {
if let channel = message.peers[message.id.peerId] as? TelegramChannel {
if channel.flags.contains(.isCreator) || (channel.adminRights?.rights.contains(.canDeleteMessages) == true) {
} else {
isUnremovableAction = true
}
}
switch action.action {
case .historyScreenshot:
isUnremovableAction = true
default:
break
}
}
if let file = media as? TelegramMediaFile {
if file.isVideo {

View File

@ -44,7 +44,8 @@ objc_library(
"-Ithird-party/webrtc/webrtc/sdk/objc/components/renderer/metal",
"-Ithird-party/webrtc/webrtc/sdk/objc/components/renderer/opengl",
"-Ithird-party/webrtc/webrtc/sdk/objc/components/video_codec",
"-Ithird-party/webrtc/dependencies/third_party/libyuv/include",
"-Ithird-party/libyuv/third_party/libyuv/include",
"-Ithird-party/libyuv",
"-Ithird-party/webrtc/webrtc/sdk/objc/api/video_codec",
"-DWEBRTC_IOS",
"-DWEBRTC_MAC",
@ -65,6 +66,7 @@ objc_library(
"//third-party/opusfile:opusfile",
"//submodules/ffmpeg:ffmpeg",
"//third-party/rnnoise:rnnoise",
"//third-party/libyuv:libyuv",
],
sdk_frameworks = [
"Foundation",

142
third-party/libyuv/BUILD vendored Normal file
View File

@ -0,0 +1,142 @@
config_setting(
name = "debug_build",
values = {
"compilation_mode": "dbg",
},
)
optimization_flags = select({
":debug_build": ["-O2", "-DNDEBUG"],
"//conditions:default": ["-DNDEBUG"],
})
common_flags = []
arm_specific_flags = [
"-DLIBYUV_NEON",
]
arm64_specific_flags = [
"-DLIBYUV_NEON",
]
x86_64_specific_flags = [
"-DHAVE_SSE2",
]
arch_specific_cflags = select({
"@build_bazel_rules_apple//apple:ios_armv7": common_flags + arm_specific_flags,
"@build_bazel_rules_apple//apple:ios_arm64": common_flags + arm64_specific_flags,
"//build-system:ios_sim_arm64": common_flags + arm64_specific_flags,
"@build_bazel_rules_apple//apple:ios_x86_64": common_flags + x86_64_specific_flags,
})
cc_library(
name = "libyuv",
srcs = [ "third_party/libyuv/" + path for path in [
# Headers
"include/libyuv.h",
"include/libyuv/basic_types.h",
"include/libyuv/compare.h",
"include/libyuv/compare_row.h",
"include/libyuv/convert.h",
"include/libyuv/convert_argb.h",
"include/libyuv/convert_from.h",
"include/libyuv/convert_from_argb.h",
"include/libyuv/cpu_id.h",
"include/libyuv/mjpeg_decoder.h",
"include/libyuv/planar_functions.h",
"include/libyuv/rotate.h",
"include/libyuv/rotate_argb.h",
"include/libyuv/rotate_row.h",
"include/libyuv/row.h",
"include/libyuv/scale.h",
"include/libyuv/scale_argb.h",
"include/libyuv/scale_row.h",
"include/libyuv/scale_uv.h",
"include/libyuv/version.h",
"include/libyuv/video_common.h",
# Source Files
"source/compare.cc",
"source/compare_common.cc",
"source/compare_gcc.cc",
"source/compare_win.cc",
"source/convert.cc",
"source/convert_argb.cc",
"source/convert_from.cc",
"source/convert_from_argb.cc",
"source/convert_jpeg.cc",
"source/convert_to_argb.cc",
"source/convert_to_i420.cc",
"source/cpu_id.cc",
"source/mjpeg_decoder.cc",
"source/mjpeg_validate.cc",
"source/planar_functions.cc",
"source/rotate.cc",
"source/rotate_any.cc",
"source/rotate_argb.cc",
"source/rotate_common.cc",
"source/rotate_gcc.cc",
"source/rotate_win.cc",
"source/row_any.cc",
"source/row_common.cc",
"source/row_gcc.cc",
"source/row_win.cc",
"source/scale.cc",
"source/scale_any.cc",
"source/scale_argb.cc",
"source/scale_common.cc",
"source/scale_gcc.cc",
"source/scale_uv.cc",
"source/scale_win.cc",
"source/video_common.cc",
# ARM Source Files
"source/compare_neon.cc",
"source/compare_neon64.cc",
"source/rotate_neon.cc",
"source/rotate_neon64.cc",
"source/row_neon.cc",
"source/row_neon64.cc",
"source/scale_neon.cc",
"source/scale_neon64.cc",
]],
copts = [
"-ffp-contract=fast",
"-Ithird-party/libyuv/third_party/libyuv/include",
] + arch_specific_cflags + optimization_flags,
visibility = ["//visibility:public"],
)
objc_library(
name = "LibYuvBinding",
enable_modules = True,
module_name = "LibYuvBinding",
srcs = glob([
"LibYuvBinding/Sources/**/*.m",
"LibYuvBinding/Sources/**/*.c",
"LibYuvBinding/Sources/**/*.h",
]),
hdrs = glob([
"LibYuvBinding/PublicHeaders/**/*.h",
]),
includes = [
"LibYuvBinding/PublicHeaders",
],
copts = [
"-Ithird-party/libyuv/third_party/libyuv/include",
],
deps = [
":libyuv",
],
sdk_frameworks = [
"Foundation",
],
visibility = [
"//visibility:public",
],
)

View File

@ -0,0 +1,21 @@
#ifndef LIBYUV_BINDING_H
#define LIBYUV_BINDING_H
#import <Foundation/Foundation.h>
bool libyuv_I420ToNV12(
const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
uint8_t* dst_y,
int dst_stride_y,
uint8_t* dst_uv,
int dst_stride_uv,
int width,
int height
);
#endif

View File

@ -0,0 +1,33 @@
#include <LibYuvBinding/LibYuvBinding.h>
#include "libyuv/convert_from.h"
bool libyuv_I420ToNV12(
const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
uint8_t* dst_y,
int dst_stride_y,
uint8_t* dst_uv,
int dst_stride_uv,
int width,
int height
) {
return I420ToNV12(
src_y,
src_stride_y,
src_u,
src_stride_u,
src_v,
src_stride_v,
dst_y,
dst_stride_y,
dst_uv,
dst_stride_uv,
width,
height
) == 0;
}

View File

@ -1,6 +1,32 @@
package {
default_applicable_licenses: ["external_libyuv_files_license"],
}
// Added automatically by a large-scale-change
//
// large-scale-change included anything that looked like it might be a license
// text as a license_text. e.g. LICENSE, NOTICE, COPYING etc.
//
// Please consider removing redundant or irrelevant files from 'license_text:'.
// See: http://go/android-license-faq
license {
name: "external_libyuv_files_license",
visibility: [":__subpackages__"],
license_kinds: [
"SPDX-license-identifier-BSD",
],
license_text: [
"LICENSE",
"LICENSE_THIRD_PARTY",
"PATENTS",
],
}
cc_library {
name: "libyuv",
vendor_available: true,
product_available: true,
host_supported: true,
vndk: {
enabled: true,
},
@ -61,9 +87,21 @@ cc_library {
"-DHAVE_JPEG",
],
arch: {
arm: {
cflags: ["-mfpu=neon"],
},
},
shared_libs: ["libjpeg"],
export_include_dirs: ["include"],
apex_available: [
"//apex_available:platform",
"com.android.media.swcodec",
],
min_sdk_version: "29",
}
// compatibilty static library until all uses of libyuv_static are replaced
@ -72,6 +110,11 @@ cc_library_static {
name: "libyuv_static",
vendor_available: true,
whole_static_libs: ["libyuv"],
apex_available: [
"//apex_available:platform",
"com.android.media.swcodec",
],
min_sdk_version: "29",
}
cc_test {
@ -108,19 +151,19 @@ cc_test {
}
cc_test {
name: "i444tonv12_eg",
name: "cpuid",
gtest: false,
srcs: [
"util/i444tonv12_eg.cc",
"util/cpuid.c",
],
static_libs: ["libyuv"],
}
cc_test {
name: "cpuid",
name: "i444tonv12_eg",
gtest: false,
srcs: [
"util/cpuid.c",
"util/i444tonv12_eg.cc",
],
static_libs: ["libyuv"],
}
@ -136,6 +179,15 @@ cc_test {
static_libs: ["libyuv"],
}
cc_test {
name: "yuvconstants",
gtest: false,
srcs: [
"util/yuvconstants.c",
],
static_libs: ["libyuv"],
}
cc_test {
name: "yuvconvert",
gtest: false,

View File

@ -10,8 +10,8 @@ import("//testing/test.gni")
import("libyuv.gni")
declare_args() {
# Set to false to disable building with gflags.
libyuv_use_gflags = true
# Set to false to disable building with absl flags.
libyuv_use_absl_flags = true
# When building a shared library using a target in WebRTC or
# Chromium projects that depends on libyuv, setting this flag
@ -40,6 +40,7 @@ group("default") {
":i444tonv12_eg",
":libyuv_unittest",
":psnr",
":yuvconstants",
":yuvconvert",
]
}
@ -293,9 +294,12 @@ if (libyuv_include_tests) {
]
defines = []
if (libyuv_use_gflags) {
defines += [ "LIBYUV_USE_GFLAGS" ]
deps += [ "//third_party/gflags" ]
if (libyuv_use_absl_flags) {
defines += [ "LIBYUV_USE_ABSL_FLAGS" ]
deps += [
"//third_party/abseil-cpp/absl/flags:flag",
"//third_party/abseil-cpp/absl/flags:parse",
]
}
configs += [ ":libyuv_unittest_warnings_config" ]
@ -357,6 +361,17 @@ if (libyuv_include_tests) {
}
}
executable("yuvconstants") {
sources = [
# sources
"util/yuvconstants.c",
]
deps = [ ":libyuv" ]
if (is_linux || is_chromeos) {
cflags = [ "-fexceptions" ]
}
}
executable("psnr") {
sources = [
# sources
@ -376,9 +391,7 @@ if (libyuv_include_tests) {
# sources
"util/i444tonv12_eg.cc",
]
deps = [
":libyuv",
]
deps = [ ":libyuv" ]
}
executable("cpuid") {

View File

@ -71,12 +71,6 @@ if(TEST)
if(NACL AND NACL_LIBC STREQUAL "newlib")
target_link_libraries(libyuv_unittest glibc-compat)
endif()
find_library(GFLAGS_LIBRARY gflags)
if(NOT GFLAGS_LIBRARY STREQUAL "GFLAGS_LIBRARY-NOTFOUND")
target_link_libraries(libyuv_unittest gflags)
add_definitions(-DLIBYUV_USE_GFLAGS)
endif()
endif()

View File

@ -0,0 +1,3 @@
monorail {
component: "Internals>Images>Codecs"
}

View File

@ -8,5 +8,3 @@ per-file .gitignore=*
per-file AUTHORS=*
per-file DEPS=*
per-file PRESUBMIT.py=mbonadei@chromium.org
# COMPONENT: Internals>Images>Codecs

View File

@ -6,50 +6,27 @@
# in the file PATENTS. All contributing project authors may
# be found in the AUTHORS file in the root of the source tree.
import os
def _RunPythonTests(input_api, output_api):
def join(*args):
return input_api.os_path.join(input_api.PresubmitLocalPath(), *args)
test_directories = [
root for root, _, files in os.walk(join('tools_libyuv'))
if any(f.endswith('_test.py') for f in files)
]
tests = []
for directory in test_directories:
tests.extend(
input_api.canned_checks.GetUnitTestsInDirectory(
input_api,
output_api,
directory,
whitelist=[r'.+_test\.py$']))
return input_api.RunTests(tests, parallel=True)
def _CommonChecks(input_api, output_api):
"""Checks common to both upload and commit."""
results = []
results.extend(input_api.canned_checks.RunPylint(input_api, output_api,
black_list=(r'^base[\\\/].*\.py$',
r'^build[\\\/].*\.py$',
r'^buildtools[\\\/].*\.py$',
r'^ios[\\\/].*\.py$',
r'^out.*[\\\/].*\.py$',
r'^testing[\\\/].*\.py$',
r'^third_party[\\\/].*\.py$',
r'^tools[\\\/].*\.py$',
# TODO(kjellander): should arguably be checked.
r'^tools_libyuv[\\\/]valgrind[\\\/].*\.py$',
r'^xcodebuild.*[\\\/].*\.py$',),
files_to_skip=(r'^base[\\\/].*\.py$',
r'^build[\\\/].*\.py$',
r'^buildtools[\\\/].*\.py$',
r'^ios[\\\/].*\.py$',
r'^out.*[\\\/].*\.py$',
r'^testing[\\\/].*\.py$',
r'^third_party[\\\/].*\.py$',
r'^tools[\\\/].*\.py$',
# TODO(kjellander): should arguably be checked.
r'^tools_libyuv[\\\/]valgrind[\\\/].*\.py$',
r'^xcodebuild.*[\\\/].*\.py$',),
disabled_warnings=['F0401', # Failed to import x
'E0611', # No package y in x
'W0232', # Class has no __init__ method
],
pylintrc='pylintrc'))
results.extend(_RunPythonTests(input_api, output_api))
return results

View File

@ -1,6 +1,6 @@
Name: libyuv
URL: http://code.google.com/p/libyuv/
Version: 1768
Version: 1789
License: BSD
License File: LICENSE

View File

@ -6,9 +6,6 @@
# in the file PATENTS. All contributing project authors may
# be found in the AUTHORS file in the root of the source tree.
# Some non-Chromium builds don't use Chromium's third_party/binutils.
linux_use_bundled_binutils_override = true
# Variable that can be used to support multiple build scenarios, like having
# Chromium specific targets in a client project's GN file etc.
build_with_chromium = false
@ -50,6 +47,13 @@ declare_args() {
enable_base_tracing = false
use_perfetto_client_library = false
# Limits the defined //third_party/android_deps targets to only "buildCompile"
# and "buildCompileNoDeps" targets. This is useful for third-party
# repositories which do not use JUnit tests. For instance,
# limit_android_deps == true removes "gn gen" requirement for
# //third_party/robolectric .
limit_android_deps = false
# Allows googletest to pretty-print various absl types.
# Defined here rather than in gtest.gni to match chromium.
gtest_enable_absl_printers = true

View File

@ -1,6 +1,5 @@
# This file is used by git cl to get repository specific information.
# This file is used by `git cl` to get repository specific information.
CODE_REVIEW_SERVER: codereview.chromium.org
GERRIT_HOST: True
PROJECT: libyuv
TRY_ON_UPLOAD: False
VIEW_VC: https://chromium.googlesource.com/libyuv/libyuv/+/

View File

@ -239,6 +239,7 @@ If you get a compile error for atlthunk.lib on Windows, read http://www.chromium
ninja -C out/Debug libyuv_unittest
ninja -C out/Debug compare
ninja -C out/Debug yuvconvert
ninja -C out/Debug yuvconstants
ninja -C out/Debug psnr
ninja -C out/Debug cpuid

View File

@ -4,7 +4,9 @@ Formats (FOURCC) supported by libyuv are detailed here.
# Core Formats
There are 2 core formats supported by libyuv - I420 and ARGB. All YUV formats can be converted to/from I420. All RGB formats can be converted to/from ARGB.
There are 2 core formats supported by libyuv - I420 and ARGB.
All YUV formats can be converted to/from I420.
All RGB formats can be converted to/from ARGB.
Filtering functions such as scaling and planar functions work on I420 and/or ARGB.
@ -52,12 +54,14 @@ The following is extracted from video_common.h as a complete list of formats sup
// 1 Secondary YUV format: row biplanar.
FOURCC_M420 = FOURCC('M', '4', '2', '0'), // deprecated.
// 11 Primary RGB formats: 4 32 bpp, 2 24 bpp, 3 16 bpp, 1 10 bpc
// 13 Primary RGB formats: 4 32 bpp, 2 24 bpp, 3 16 bpp, 1 10 bpc, 2 64 bpp
FOURCC_ARGB = FOURCC('A', 'R', 'G', 'B'),
FOURCC_BGRA = FOURCC('B', 'G', 'R', 'A'),
FOURCC_ABGR = FOURCC('A', 'B', 'G', 'R'),
FOURCC_AR30 = FOURCC('A', 'R', '3', '0'), // 10 bit per channel. 2101010.
FOURCC_AB30 = FOURCC('A', 'B', '3', '0'), // ABGR version of 10 bit
FOURCC_AR64 = FOURCC('A', 'R', '6', '4'), // 16 bit per channel.
FOURCC_AB64 = FOURCC('A', 'B', '6', '4'), // ABGR version of 16 bit
FOURCC_24BG = FOURCC('2', '4', 'B', 'G'),
FOURCC_RAW = FOURCC('r', 'a', 'w', ' '),
FOURCC_RGBA = FOURCC('R', 'G', 'B', 'A'),
@ -109,6 +113,27 @@ The following is extracted from video_common.h as a complete list of formats sup
I444, NV24 and NV42 are full width, full height
I400 and J400 have no chroma channel.
# Color space
The YUV formats start with a letter to specify the color space. e.g. I420
I = BT.601 limited range
J = BT.601 full range (J = JPEG that uses this)
H = BT.709 limited range (H for HD)
F = BT.709 full range (F for Full range)
U = BT.2020 limited range (U for UHD)
V = BT.2020 full range
For YUV to RGB conversions, a matrix can be passed. See also convert_argh.h
# HDR formats
Planar formats with 10 or 12 bits use the following fourcc:
I010, I012, P010, P012 are half width, half height
I210, I212, P210, P212 are half width, full height
I410, I412, P410, P412 are full width, full height
where
I is the color space (see above) and 3 planes: Y, U and V.
P is a biplanar format, similar to NV12 but 16 bits, with the valid bits in the high bits. There is a Y plane and a UV plane.
0, 2 or 4 is the last digit of subsampling: 4:2:0, 4:2:2, or 4:4:4
10 or 12 is the bits per channel. The bits are in the low bits of a 16 bit channel.
# The ARGB FOURCC
There are 4 ARGB layouts - ARGB, BGRA, ABGR and RGBA. ARGB is most common by far, used for screen formats, and windows webcam drivers.
@ -157,6 +182,13 @@ The 2 bit alpha has 4 values. Here are the comparable 8 bit alpha values.
The 10 bit RGB values range from 0 to 1023.
XR30 is the same as AR30 but with no alpha channel.
# AB64 and AR64
AB64 is similar to ABGR, with 16 bit (2 bytes) per channel. Each channel stores an unsigned short.
In memory R is the lowest and A is the highest.
Each channel has value ranges from 0 to 65535.
AR64 is similar to ARGB.
# NV12 and NV21
NV12 is a biplanar format with a full sized Y plane followed by a single
@ -167,3 +199,10 @@ height chroma channel, and therefore is a 420 subsampling.
NV16 is 16 bits per pixel, with half width and full height. aka 422.
NV24 is 24 bits per pixel with full sized chroma channel. aka 444.
Most NV12 functions allow the destination Y pointer to be NULL.
# YUY2 and UYVY
YUY2 is a packed YUV format with half width, full height.
YUY2 is YUYV in memory
UYVY is UYVY in memory

View File

@ -165,6 +165,7 @@ Running test with C code:
ninja -C out/Debug libyuv_unittest
ninja -C out/Debug compare
ninja -C out/Debug yuvconvert
ninja -C out/Debug yuvconstants
ninja -C out/Debug psnr
ninja -C out/Debug cpuid

View File

@ -22,7 +22,7 @@ sys.path.insert(0, os.path.join(checkout_root, 'build'))
sys.path.insert(0, os.path.join(checkout_root, 'tools', 'find_depot_tools'))
import vs_toolchain
import vs_toolchain # pylint: disable=wrong-import-position
if __name__ == '__main__':

View File

@ -55,20 +55,20 @@ extern "C" {
// The following are available for Visual C and clangcl 32 bit:
#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && defined(_MSC_VER) && \
!defined(__clang__) && \
(defined(VISUALC_HAS_AVX2) || defined(CLANG_HAS_AVX2))
#define HAS_HASHDJB2_AVX2
#define HAS_SUMSQUAREERROR_AVX2
#endif
// The following are available for GCC and clangcl 64 bit:
#if !defined(LIBYUV_DISABLE_X86) && \
(defined(__x86_64__) || (defined(__i386__) && !defined(_MSC_VER)))
// The following are available for GCC and clangcl:
#if !defined(LIBYUV_DISABLE_X86) && (defined(__x86_64__) || defined(__i386__))
#define HAS_HAMMINGDISTANCE_SSSE3
#endif
// The following are available for GCC and clangcl 64 bit:
// The following are available for GCC and clangcl:
#if !defined(LIBYUV_DISABLE_X86) && defined(CLANG_HAS_AVX2) && \
(defined(__x86_64__) || (defined(__i386__) && !defined(_MSC_VER)))
(defined(__x86_64__) || defined(__i386__))
#define HAS_HAMMINGDISTANCE_AVX2
#endif

View File

@ -89,6 +89,23 @@ int I422ToI420(const uint8_t* src_y,
int width,
int height);
// Convert I422 to I444.
LIBYUV_API
int I422ToI444(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
uint8_t* dst_y,
int dst_stride_y,
uint8_t* dst_u,
int dst_stride_u,
uint8_t* dst_v,
int dst_stride_v,
int width,
int height);
// Convert I422 to NV21.
LIBYUV_API
int I422ToNV21(const uint8_t* src_y,
@ -122,6 +139,23 @@ int I420Copy(const uint8_t* src_y,
int width,
int height);
// Convert I420 to I444.
LIBYUV_API
int I420ToI444(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
uint8_t* dst_y,
int dst_stride_y,
uint8_t* dst_u,
int dst_stride_u,
uint8_t* dst_v,
int dst_stride_v,
int width,
int height);
// Copy I010 to I010
#define I010ToI010 I010Copy
#define H010ToH010 I010Copy
@ -159,6 +193,229 @@ int I010ToI420(const uint16_t* src_y,
int width,
int height);
#define H210ToH422 I210ToI422
LIBYUV_API
int I210ToI422(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_u,
int src_stride_u,
const uint16_t* src_v,
int src_stride_v,
uint8_t* dst_y,
int dst_stride_y,
uint8_t* dst_u,
int dst_stride_u,
uint8_t* dst_v,
int dst_stride_v,
int width,
int height);
#define H410ToH444 I410ToI444
LIBYUV_API
int I410ToI444(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_u,
int src_stride_u,
const uint16_t* src_v,
int src_stride_v,
uint8_t* dst_y,
int dst_stride_y,
uint8_t* dst_u,
int dst_stride_u,
uint8_t* dst_v,
int dst_stride_v,
int width,
int height);
#define H012ToH420 I012ToI420
LIBYUV_API
int I012ToI420(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_u,
int src_stride_u,
const uint16_t* src_v,
int src_stride_v,
uint8_t* dst_y,
int dst_stride_y,
uint8_t* dst_u,
int dst_stride_u,
uint8_t* dst_v,
int dst_stride_v,
int width,
int height);
#define H212ToH422 I212ToI422
LIBYUV_API
int I212ToI422(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_u,
int src_stride_u,
const uint16_t* src_v,
int src_stride_v,
uint8_t* dst_y,
int dst_stride_y,
uint8_t* dst_u,
int dst_stride_u,
uint8_t* dst_v,
int dst_stride_v,
int width,
int height);
#define H412ToH444 I412ToI444
LIBYUV_API
int I412ToI444(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_u,
int src_stride_u,
const uint16_t* src_v,
int src_stride_v,
uint8_t* dst_y,
int dst_stride_y,
uint8_t* dst_u,
int dst_stride_u,
uint8_t* dst_v,
int dst_stride_v,
int width,
int height);
#define I412ToI012 I410ToI010
#define H410ToH010 I410ToI010
#define H412ToH012 I410ToI010
LIBYUV_API
int I410ToI010(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_u,
int src_stride_u,
const uint16_t* src_v,
int src_stride_v,
uint16_t* dst_y,
int dst_stride_y,
uint16_t* dst_u,
int dst_stride_u,
uint16_t* dst_v,
int dst_stride_v,
int width,
int height);
#define I212ToI012 I210ToI010
#define H210ToH010 I210ToI010
#define H212ToH012 I210ToI010
LIBYUV_API
int I210ToI010(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_u,
int src_stride_u,
const uint16_t* src_v,
int src_stride_v,
uint16_t* dst_y,
int dst_stride_y,
uint16_t* dst_u,
int dst_stride_u,
uint16_t* dst_v,
int dst_stride_v,
int width,
int height);
// Convert I010 to I410
LIBYUV_API
int I010ToI410(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_u,
int src_stride_u,
const uint16_t* src_v,
int src_stride_v,
uint16_t* dst_y,
int dst_stride_y,
uint16_t* dst_u,
int dst_stride_u,
uint16_t* dst_v,
int dst_stride_v,
int width,
int height);
// Convert I012 to I412
#define I012ToI412 I010ToI410
// Convert I210 to I410
LIBYUV_API
int I210ToI410(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_u,
int src_stride_u,
const uint16_t* src_v,
int src_stride_v,
uint16_t* dst_y,
int dst_stride_y,
uint16_t* dst_u,
int dst_stride_u,
uint16_t* dst_v,
int dst_stride_v,
int width,
int height);
// Convert I212 to I412
#define I212ToI412 I210ToI410
// Convert I010 to P010
LIBYUV_API
int I010ToP010(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_u,
int src_stride_u,
const uint16_t* src_v,
int src_stride_v,
uint16_t* dst_y,
int dst_stride_y,
uint16_t* dst_uv,
int dst_stride_uv,
int width,
int height);
// Convert I210 to P210
LIBYUV_API
int I210ToP210(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_u,
int src_stride_u,
const uint16_t* src_v,
int src_stride_v,
uint16_t* dst_y,
int dst_stride_y,
uint16_t* dst_uv,
int dst_stride_uv,
int width,
int height);
// Convert I012 to P012
LIBYUV_API
int I012ToP012(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_u,
int src_stride_u,
const uint16_t* src_v,
int src_stride_v,
uint16_t* dst_y,
int dst_stride_y,
uint16_t* dst_uv,
int dst_stride_uv,
int width,
int height);
// Convert I212 to P212
LIBYUV_API
int I212ToP212(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_u,
int src_stride_u,
const uint16_t* src_v,
int src_stride_v,
uint16_t* dst_y,
int dst_stride_y,
uint16_t* dst_uv,
int dst_stride_uv,
int width,
int height);
// Convert I400 (grey) to I420.
LIBYUV_API
int I400ToI420(const uint8_t* src_y,
@ -215,6 +472,70 @@ int NV21ToI420(const uint8_t* src_y,
int width,
int height);
// Convert NV12 to NV24.
LIBYUV_API
int NV12ToNV24(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_uv,
int src_stride_uv,
uint8_t* dst_y,
int dst_stride_y,
uint8_t* dst_uv,
int dst_stride_uv,
int width,
int height);
// Convert NV16 to NV24.
LIBYUV_API
int NV16ToNV24(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_uv,
int src_stride_uv,
uint8_t* dst_y,
int dst_stride_y,
uint8_t* dst_uv,
int dst_stride_uv,
int width,
int height);
// Convert P010 to P410.
LIBYUV_API
int P010ToP410(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_uv,
int src_stride_uv,
uint16_t* dst_y,
int dst_stride_y,
uint16_t* dst_uv,
int dst_stride_uv,
int width,
int height);
// Convert P012 to P412.
#define P012ToP412 P010ToP410
// Convert P016 to P416.
#define P016ToP416 P010ToP410
// Convert P210 to P410.
LIBYUV_API
int P210ToP410(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_uv,
int src_stride_uv,
uint16_t* dst_y,
int dst_stride_y,
uint16_t* dst_uv,
int dst_stride_uv,
int width,
int height);
// Convert P212 to P412.
#define P212ToP412 P210ToP410
// Convert P216 to P416.
#define P216ToP416 P210ToP410
// Convert YUY2 to I420.
LIBYUV_API
int YUY2ToI420(const uint8_t* src_yuy2,
@ -372,6 +693,19 @@ int RAWToI420(const uint8_t* src_raw,
int width,
int height);
// RGB big endian (rgb in memory) to J420.
LIBYUV_API
int RAWToJ420(const uint8_t* src_raw,
int src_stride_raw,
uint8_t* dst_y,
int dst_stride_y,
uint8_t* dst_u,
int dst_stride_u,
uint8_t* dst_v,
int dst_stride_v,
int width,
int height);
// RGB16 (RGBP fourcc) little endian to I420.
LIBYUV_API
int RGB565ToI420(const uint8_t* src_rgb565,

View File

@ -21,16 +21,20 @@ extern "C" {
#endif
// Conversion matrix for YUV to RGB
LIBYUV_API extern const struct YuvConstants kYuvI601Constants; // BT.601
LIBYUV_API extern const struct YuvConstants kYuvJPEGConstants; // JPeg
LIBYUV_API extern const struct YuvConstants kYuvH709Constants; // BT.709
LIBYUV_API extern const struct YuvConstants kYuv2020Constants; // BT.2020
LIBYUV_API extern const struct YuvConstants kYuvI601Constants; // BT.601
LIBYUV_API extern const struct YuvConstants kYuvJPEGConstants; // BT.601 full
LIBYUV_API extern const struct YuvConstants kYuvH709Constants; // BT.709
LIBYUV_API extern const struct YuvConstants kYuvF709Constants; // BT.709 full
LIBYUV_API extern const struct YuvConstants kYuv2020Constants; // BT.2020
LIBYUV_API extern const struct YuvConstants kYuvV2020Constants; // BT.2020 full
// Conversion matrix for YVU to BGR
LIBYUV_API extern const struct YuvConstants kYvuI601Constants; // BT.601
LIBYUV_API extern const struct YuvConstants kYvuJPEGConstants; // JPeg
LIBYUV_API extern const struct YuvConstants kYvuH709Constants; // BT.709
LIBYUV_API extern const struct YuvConstants kYvu2020Constants; // BT.2020
LIBYUV_API extern const struct YuvConstants kYvuI601Constants; // BT.601
LIBYUV_API extern const struct YuvConstants kYvuJPEGConstants; // BT.601 full
LIBYUV_API extern const struct YuvConstants kYvuH709Constants; // BT.709
LIBYUV_API extern const struct YuvConstants kYvuF709Constants; // BT.709 full
LIBYUV_API extern const struct YuvConstants kYvu2020Constants; // BT.2020
LIBYUV_API extern const struct YuvConstants kYvuV2020Constants; // BT.2020 full
// Macros for end swapped destination Matrix conversions.
// Swap UV and pass mirrored kYvuJPEGConstants matrix.
@ -38,7 +42,10 @@ LIBYUV_API extern const struct YuvConstants kYvu2020Constants; // BT.2020
#define kYuvI601ConstantsVU kYvuI601Constants
#define kYuvJPEGConstantsVU kYvuJPEGConstants
#define kYuvH709ConstantsVU kYvuH709Constants
#define kYuvF709ConstantsVU kYvuF709Constants
#define kYuv2020ConstantsVU kYvu2020Constants
#define kYuvV2020ConstantsVU kYvuV2020Constants
#define NV12ToABGRMatrix(a, b, c, d, e, f, g, h, i) \
NV21ToARGBMatrix(a, b, c, d, e, f, g##VU, h, i)
#define NV21ToABGRMatrix(a, b, c, d, e, f, g, h, i) \
@ -47,8 +54,30 @@ LIBYUV_API extern const struct YuvConstants kYvu2020Constants; // BT.2020
NV21ToRGB24Matrix(a, b, c, d, e, f, g##VU, h, i)
#define NV21ToRAWMatrix(a, b, c, d, e, f, g, h, i) \
NV12ToRGB24Matrix(a, b, c, d, e, f, g##VU, h, i)
#define I010ToABGRMatrix(a, b, c, d, e, f, g, h, i, j, k) \
I010ToARGBMatrix(a, b, e, f, c, d, g, h, i##VU, j, k)
#define I210ToABGRMatrix(a, b, c, d, e, f, g, h, i, j, k) \
I210ToARGBMatrix(a, b, e, f, c, d, g, h, i##VU, j, k)
#define I410ToABGRMatrix(a, b, c, d, e, f, g, h, i, j, k) \
I410ToARGBMatrix(a, b, e, f, c, d, g, h, i##VU, j, k)
#define I010ToAB30Matrix(a, b, c, d, e, f, g, h, i, j, k) \
I010ToAR30Matrix(a, b, e, f, c, d, g, h, i##VU, j, k)
#define I210ToAB30Matrix(a, b, c, d, e, f, g, h, i, j, k) \
I210ToAR30Matrix(a, b, e, f, c, d, g, h, i##VU, j, k)
#define I410ToAB30Matrix(a, b, c, d, e, f, g, h, i, j, k) \
I410ToAR30Matrix(a, b, e, f, c, d, g, h, i##VU, j, k)
#define I420AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, k, l, m, n) \
I420AlphaToARGBMatrix(a, b, e, f, c, d, g, h, i, j, k##VU, l, m, n)
#define I422AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, k, l, m, n) \
I422AlphaToARGBMatrix(a, b, e, f, c, d, g, h, i, j, k##VU, l, m, n)
#define I444AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, k, l, m, n) \
I444AlphaToARGBMatrix(a, b, e, f, c, d, g, h, i, j, k##VU, l, m, n)
#define I010AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, k, l, m, n) \
I010AlphaToARGBMatrix(a, b, e, f, c, d, g, h, i, j, k##VU, l, m, n)
#define I210AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, k, l, m, n) \
I210AlphaToARGBMatrix(a, b, e, f, c, d, g, h, i, j, k##VU, l, m, n)
#define I410AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, k, l, m, n) \
I410AlphaToARGBMatrix(a, b, e, f, c, d, g, h, i, j, k##VU, l, m, n)
// Alias.
#define ARGBToARGB ARGBCopy
@ -562,6 +591,70 @@ int I420AlphaToABGR(const uint8_t* src_y,
int height,
int attenuate);
// Convert I422 with Alpha to preattenuated ARGB.
LIBYUV_API
int I422AlphaToARGB(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
const uint8_t* src_a,
int src_stride_a,
uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height,
int attenuate);
// Convert I422 with Alpha to preattenuated ABGR.
LIBYUV_API
int I422AlphaToABGR(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
const uint8_t* src_a,
int src_stride_a,
uint8_t* dst_abgr,
int dst_stride_abgr,
int width,
int height,
int attenuate);
// Convert I444 with Alpha to preattenuated ARGB.
LIBYUV_API
int I444AlphaToARGB(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
const uint8_t* src_a,
int src_stride_a,
uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height,
int attenuate);
// Convert I444 with Alpha to preattenuated ABGR.
LIBYUV_API
int I444AlphaToABGR(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
const uint8_t* src_a,
int src_stride_a,
uint8_t* dst_abgr,
int dst_stride_abgr,
int width,
int height,
int attenuate);
// Convert I400 (grey) to ARGB. Reverse of ARGBToI400.
LIBYUV_API
int I400ToARGB(const uint8_t* src_y,
@ -713,19 +806,6 @@ int I010ToAR30(const uint16_t* src_y,
int width,
int height);
// Convert I010 to AB30.
LIBYUV_API
int I010ToAB30(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_u,
int src_stride_u,
const uint16_t* src_v,
int src_stride_v,
uint8_t* dst_ab30,
int dst_stride_ab30,
int width,
int height);
// Convert H010 to AR30.
LIBYUV_API
int H010ToAR30(const uint16_t* src_y,
@ -739,6 +819,19 @@ int H010ToAR30(const uint16_t* src_y,
int width,
int height);
// Convert I010 to AB30.
LIBYUV_API
int I010ToAB30(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_u,
int src_stride_u,
const uint16_t* src_v,
int src_stride_v,
uint8_t* dst_ab30,
int dst_stride_ab30,
int width,
int height);
// Convert H010 to AB30.
LIBYUV_API
int H010ToAB30(const uint16_t* src_y,
@ -972,6 +1065,42 @@ int AR30ToAB30(const uint8_t* src_ar30,
int width,
int height);
// Convert AR64 to ARGB.
LIBYUV_API
int AR64ToARGB(const uint16_t* src_ar64,
int src_stride_ar64,
uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height);
// Convert AB64 to ABGR.
#define AB64ToABGR AR64ToARGB
// Convert AB64 to ARGB.
LIBYUV_API
int AB64ToARGB(const uint16_t* src_ab64,
int src_stride_ab64,
uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height);
// Convert AR64 to ABGR.
#define AR64ToABGR AB64ToARGB
// Convert AR64 To AB64.
LIBYUV_API
int AR64ToAB64(const uint16_t* src_ar64,
int src_stride_ar64,
uint16_t* dst_ab64,
int dst_stride_ab64,
int width,
int height);
// Convert AB64 To AR64.
#define AB64ToAR64 AR64ToAB64
// src_width/height provided by capture
// dst_width/height for clipping determine final size.
LIBYUV_API
@ -1284,6 +1413,19 @@ int I420ToAR30(const uint8_t* src_y,
int width,
int height);
// Convert I420 to AB30.
LIBYUV_API
int I420ToAB30(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
uint8_t* dst_ab30,
int dst_stride_ab30,
int width,
int height);
// Convert H420 to AR30.
LIBYUV_API
int H420ToAR30(const uint8_t* src_y,
@ -1297,6 +1439,19 @@ int H420ToAR30(const uint8_t* src_y,
int width,
int height);
// Convert H420 to AB30.
LIBYUV_API
int H420ToAB30(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
uint8_t* dst_ab30,
int dst_stride_ab30,
int width,
int height);
// Convert I420 to ARGB with matrix.
LIBYUV_API
int I420ToARGBMatrix(const uint8_t* src_y,
@ -1339,7 +1494,7 @@ int I444ToARGBMatrix(const uint8_t* src_y,
int width,
int height);
// multiply 10 bit yuv into high bits to allow any number of bits.
// Convert 10 bit 420 YUV to ARGB with matrix.
LIBYUV_API
int I010ToAR30Matrix(const uint16_t* src_y,
int src_stride_y,
@ -1353,7 +1508,7 @@ int I010ToAR30Matrix(const uint16_t* src_y,
int width,
int height);
// multiply 10 bit yuv into high bits to allow any number of bits.
// Convert 10 bit 420 YUV to ARGB with matrix.
LIBYUV_API
int I210ToAR30Matrix(const uint16_t* src_y,
int src_stride_y,
@ -1367,6 +1522,20 @@ int I210ToAR30Matrix(const uint16_t* src_y,
int width,
int height);
// Convert 10 bit 444 YUV to ARGB with matrix.
LIBYUV_API
int I410ToAR30Matrix(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_u,
int src_stride_u,
const uint16_t* src_v,
int src_stride_v,
uint8_t* dst_ar30,
int dst_stride_ar30,
const struct YuvConstants* yuvconstants,
int width,
int height);
// Convert 10 bit YUV to ARGB with matrix.
LIBYUV_API
int I010ToARGBMatrix(const uint16_t* src_y,
@ -1381,6 +1550,34 @@ int I010ToARGBMatrix(const uint16_t* src_y,
int width,
int height);
// multiply 12 bit yuv into high bits to allow any number of bits.
LIBYUV_API
int I012ToAR30Matrix(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_u,
int src_stride_u,
const uint16_t* src_v,
int src_stride_v,
uint8_t* dst_ar30,
int dst_stride_ar30,
const struct YuvConstants* yuvconstants,
int width,
int height);
// Convert 12 bit YUV to ARGB with matrix.
LIBYUV_API
int I012ToARGBMatrix(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_u,
int src_stride_u,
const uint16_t* src_v,
int src_stride_v,
uint8_t* dst_argb,
int dst_stride_argb,
const struct YuvConstants* yuvconstants,
int width,
int height);
// Convert 10 bit 422 YUV to ARGB with matrix.
LIBYUV_API
int I210ToARGBMatrix(const uint16_t* src_y,
@ -1395,6 +1592,87 @@ int I210ToARGBMatrix(const uint16_t* src_y,
int width,
int height);
// Convert 10 bit 444 YUV to ARGB with matrix.
LIBYUV_API
int I410ToARGBMatrix(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_u,
int src_stride_u,
const uint16_t* src_v,
int src_stride_v,
uint8_t* dst_argb,
int dst_stride_argb,
const struct YuvConstants* yuvconstants,
int width,
int height);
// Convert P010 to ARGB with matrix.
LIBYUV_API
int P010ToARGBMatrix(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_uv,
int src_stride_uv,
uint8_t* dst_argb,
int dst_stride_argb,
const struct YuvConstants* yuvconstants,
int width,
int height);
// Convert P210 to ARGB with matrix.
LIBYUV_API
int P210ToARGBMatrix(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_uv,
int src_stride_uv,
uint8_t* dst_argb,
int dst_stride_argb,
const struct YuvConstants* yuvconstants,
int width,
int height);
// Convert P010 to AR30 with matrix.
LIBYUV_API
int P010ToAR30Matrix(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_uv,
int src_stride_uv,
uint8_t* dst_ar30,
int dst_stride_ar30,
const struct YuvConstants* yuvconstants,
int width,
int height);
// Convert P210 to AR30 with matrix.
LIBYUV_API
int P210ToAR30Matrix(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_uv,
int src_stride_uv,
uint8_t* dst_ar30,
int dst_stride_ar30,
const struct YuvConstants* yuvconstants,
int width,
int height);
// P012 and P010 use most significant bits so the conversion is the same.
// Convert P012 to ARGB with matrix.
#define P012ToARGBMatrix P010ToARGBMatrix
// Convert P012 to AR30 with matrix.
#define P012ToAR30Matrix P010ToAR30Matrix
// Convert P212 to ARGB with matrix.
#define P212ToARGBMatrix P210ToARGBMatrix
// Convert P212 to AR30 with matrix.
#define P212ToAR30Matrix P210ToAR30Matrix
// Convert P016 to ARGB with matrix.
#define P016ToARGBMatrix P010ToARGBMatrix
// Convert P016 to AR30 with matrix.
#define P016ToAR30Matrix P010ToAR30Matrix
// Convert P216 to ARGB with matrix.
#define P216ToARGBMatrix P210ToARGBMatrix
// Convert P216 to AR30 with matrix.
#define P216ToAR30Matrix P210ToAR30Matrix
// Convert I420 with Alpha to preattenuated ARGB with matrix.
LIBYUV_API
int I420AlphaToARGBMatrix(const uint8_t* src_y,
@ -1412,6 +1690,91 @@ int I420AlphaToARGBMatrix(const uint8_t* src_y,
int height,
int attenuate);
// Convert I422 with Alpha to preattenuated ARGB with matrix.
LIBYUV_API
int I422AlphaToARGBMatrix(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
const uint8_t* src_a,
int src_stride_a,
uint8_t* dst_argb,
int dst_stride_argb,
const struct YuvConstants* yuvconstants,
int width,
int height,
int attenuate);
// Convert I444 with Alpha to preattenuated ARGB with matrix.
LIBYUV_API
int I444AlphaToARGBMatrix(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
const uint8_t* src_a,
int src_stride_a,
uint8_t* dst_argb,
int dst_stride_argb,
const struct YuvConstants* yuvconstants,
int width,
int height,
int attenuate);
// Convert I010 with Alpha to preattenuated ARGB with matrix.
LIBYUV_API
int I010AlphaToARGBMatrix(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_u,
int src_stride_u,
const uint16_t* src_v,
int src_stride_v,
const uint16_t* src_a,
int src_stride_a,
uint8_t* dst_argb,
int dst_stride_argb,
const struct YuvConstants* yuvconstants,
int width,
int height,
int attenuate);
// Convert I210 with Alpha to preattenuated ARGB with matrix.
LIBYUV_API
int I210AlphaToARGBMatrix(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_u,
int src_stride_u,
const uint16_t* src_v,
int src_stride_v,
const uint16_t* src_a,
int src_stride_a,
uint8_t* dst_argb,
int dst_stride_argb,
const struct YuvConstants* yuvconstants,
int width,
int height,
int attenuate);
// Convert I410 with Alpha to preattenuated ARGB with matrix.
LIBYUV_API
int I410AlphaToARGBMatrix(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_u,
int src_stride_u,
const uint16_t* src_v,
int src_stride_v,
const uint16_t* src_a,
int src_stride_a,
uint8_t* dst_argb,
int dst_stride_argb,
const struct YuvConstants* yuvconstants,
int width,
int height,
int attenuate);
// Convert NV12 to ARGB with matrix.
LIBYUV_API
int NV12ToARGBMatrix(const uint8_t* src_y,

View File

@ -39,6 +39,24 @@ int I420ToI010(const uint8_t* src_y,
int width,
int height);
// Convert 8 bit YUV to 12 bit.
#define H420ToH012 I420ToI012
LIBYUV_API
int I420ToI012(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
uint16_t* dst_y,
int dst_stride_y,
uint16_t* dst_u,
int dst_stride_u,
uint16_t* dst_v,
int dst_stride_v,
int width,
int height);
LIBYUV_API
int I420ToI422(const uint8_t* src_y,
int src_stride_y,

View File

@ -153,6 +153,30 @@ int ARGBToI444(const uint8_t* src_argb,
int width,
int height);
// Convert ARGB to AR64.
LIBYUV_API
int ARGBToAR64(const uint8_t* src_argb,
int src_stride_argb,
uint16_t* dst_ar64,
int dst_stride_ar64,
int width,
int height);
// Convert ABGR to AB64.
#define ABGRToAB64 ARGBToAR64
// Convert ARGB to AB64.
LIBYUV_API
int ARGBToAB64(const uint8_t* src_argb,
int src_stride_argb,
uint16_t* dst_ab64,
int dst_stride_ab64,
int width,
int height);
// Convert ABGR to AR64.
#define ABGRToAR64 ARGBToAB64
// Convert ARGB To I422.
LIBYUV_API
int ARGBToI422(const uint8_t* src_argb,

View File

@ -105,6 +105,50 @@ void MergeUVPlane(const uint8_t* src_u,
int width,
int height);
// Split interleaved msb UV plane into separate lsb U and V planes.
LIBYUV_API
void SplitUVPlane_16(const uint16_t* src_uv,
int src_stride_uv,
uint16_t* dst_u,
int dst_stride_u,
uint16_t* dst_v,
int dst_stride_v,
int width,
int height,
int depth);
// Merge separate lsb U and V planes into one interleaved msb UV plane.
LIBYUV_API
void MergeUVPlane_16(const uint16_t* src_u,
int src_stride_u,
const uint16_t* src_v,
int src_stride_v,
uint16_t* dst_uv,
int dst_stride_uv,
int width,
int height,
int depth);
// Convert lsb plane to msb plane
LIBYUV_API
void ConvertToMSBPlane_16(const uint16_t* src_y,
int src_stride_y,
uint16_t* dst_y,
int dst_stride_y,
int width,
int height,
int depth);
// Convert msb plane to lsb plane
LIBYUV_API
void ConvertToLSBPlane_16(const uint16_t* src_y,
int src_stride_y,
uint16_t* dst_y,
int dst_stride_y,
int width,
int height,
int depth);
// Scale U and V to half width and height and merge into interleaved UV plane.
// width and height are source size, allowing odd sizes.
// Use for converting I444 or I422 to NV12.
@ -153,6 +197,92 @@ void MergeRGBPlane(const uint8_t* src_r,
int width,
int height);
// Split interleaved ARGB plane into separate R, G, B and A planes.
// dst_a can be NULL to discard alpha plane.
LIBYUV_API
void SplitARGBPlane(const uint8_t* src_argb,
int src_stride_argb,
uint8_t* dst_r,
int dst_stride_r,
uint8_t* dst_g,
int dst_stride_g,
uint8_t* dst_b,
int dst_stride_b,
uint8_t* dst_a,
int dst_stride_a,
int width,
int height);
// Merge separate R, G, B and A planes into one interleaved ARGB plane.
// src_a can be NULL to fill opaque value to alpha.
LIBYUV_API
void MergeARGBPlane(const uint8_t* src_r,
int src_stride_r,
const uint8_t* src_g,
int src_stride_g,
const uint8_t* src_b,
int src_stride_b,
const uint8_t* src_a,
int src_stride_a,
uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height);
// Merge separate 'depth' bit R, G and B planes stored in lsb
// into one interleaved XR30 plane.
// depth should in range [10, 16]
LIBYUV_API
void MergeXR30Plane(const uint16_t* src_r,
int src_stride_r,
const uint16_t* src_g,
int src_stride_g,
const uint16_t* src_b,
int src_stride_b,
uint8_t* dst_ar30,
int dst_stride_ar30,
int width,
int height,
int depth);
// Merge separate 'depth' bit R, G, B and A planes stored in lsb
// into one interleaved AR64 plane.
// src_a can be NULL to fill opaque value to alpha.
// depth should in range [1, 16]
LIBYUV_API
void MergeAR64Plane(const uint16_t* src_r,
int src_stride_r,
const uint16_t* src_g,
int src_stride_g,
const uint16_t* src_b,
int src_stride_b,
const uint16_t* src_a,
int src_stride_a,
uint16_t* dst_ar64,
int dst_stride_ar64,
int width,
int height,
int depth);
// Merge separate 'depth' bit R, G, B and A planes stored in lsb
// into one interleaved ARGB plane.
// src_a can be NULL to fill opaque value to alpha.
// depth should in range [8, 16]
LIBYUV_API
void MergeARGB16To8Plane(const uint16_t* src_r,
int src_stride_r,
const uint16_t* src_g,
int src_stride_g,
const uint16_t* src_b,
int src_stride_b,
const uint16_t* src_a,
int src_stride_a,
uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height,
int depth);
// Copy I400. Supports inverting.
LIBYUV_API
int I400ToI400(const uint8_t* src_y,
@ -201,14 +331,28 @@ int I444Copy(const uint8_t* src_y,
int height);
// Copy NV12. Supports inverting.
int NV12Copy(const uint8_t* src_y, int src_stride_y, const uint8_t* src_uv,
int src_stride_uv, uint8_t* dst_y, int dst_stride_y,
uint8_t* dst_uv, int dst_stride_uv, int width, int height);
int NV12Copy(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_uv,
int src_stride_uv,
uint8_t* dst_y,
int dst_stride_y,
uint8_t* dst_uv,
int dst_stride_uv,
int width,
int height);
// Copy NV21. Supports inverting.
int NV21Copy(const uint8_t* src_y, int src_stride_y, const uint8_t* src_vu,
int src_stride_vu, uint8_t* dst_y, int dst_stride_y,
uint8_t* dst_vu, int dst_stride_vu, int width, int height);
int NV21Copy(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_vu,
int src_stride_vu,
uint8_t* dst_y,
int dst_stride_y,
uint8_t* dst_vu,
int dst_stride_vu,
int width,
int height);
// Convert YUY2 to I422.
LIBYUV_API
@ -855,7 +999,7 @@ void ARGBAffineRow_SSE2(const uint8_t* src_argb,
int width);
// Shuffle ARGB channel order. e.g. BGRA to ARGB.
// shuffler is 16 bytes and must be aligned.
// shuffler is 16 bytes.
LIBYUV_API
int ARGBShuffle(const uint8_t* src_bgra,
int src_stride_bgra,
@ -865,6 +1009,17 @@ int ARGBShuffle(const uint8_t* src_bgra,
int width,
int height);
// Shuffle AR64 channel order. e.g. AR64 to AB64.
// shuffler is 16 bytes.
LIBYUV_API
int AR64Shuffle(const uint16_t* src_ar64,
int src_stride_ar64,
uint16_t* dst_ar64,
int dst_stride_ar64,
const uint8_t* shuffler,
int width,
int height);
// Sobel ARGB effect with planar output.
LIBYUV_API
int ARGBSobelToPlane(const uint8_t* src_argb,

View File

@ -32,8 +32,9 @@ extern "C" {
#define LIBYUV_DISABLE_X86
#endif
#endif
// The following are available for Visual C and clangcl 32 bit:
#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && defined(_MSC_VER)
// The following are available for Visual C 32 bit:
#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && defined(_MSC_VER) && \
!defined(__clang__)
#define HAS_TRANSPOSEWX8_SSSE3
#define HAS_TRANSPOSEUVWX8_SSE2
#endif

View File

@ -49,6 +49,18 @@ void ScalePlane_16(const uint16_t* src,
int dst_height,
enum FilterMode filtering);
// Sample is expected to be in the low 12 bits.
LIBYUV_API
void ScalePlane_12(const uint16_t* src,
int src_stride,
int src_width,
int src_height,
uint16_t* dst,
int dst_stride,
int dst_width,
int dst_height,
enum FilterMode filtering);
// Scales a YUV 4:2:0 image from the src width and height to the
// dst width and height.
// If filtering is kFilterNone, a simple nearest-neighbor algorithm is
@ -97,6 +109,25 @@ int I420Scale_16(const uint16_t* src_y,
int dst_height,
enum FilterMode filtering);
LIBYUV_API
int I420Scale_12(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_u,
int src_stride_u,
const uint16_t* src_v,
int src_stride_v,
int src_width,
int src_height,
uint16_t* dst_y,
int dst_stride_y,
uint16_t* dst_u,
int dst_stride_u,
uint16_t* dst_v,
int dst_stride_v,
int dst_width,
int dst_height,
enum FilterMode filtering);
// Scales a YUV 4:4:4 image from the src width and height to the
// dst width and height.
// If filtering is kFilterNone, a simple nearest-neighbor algorithm is
@ -145,6 +176,25 @@ int I444Scale_16(const uint16_t* src_y,
int dst_height,
enum FilterMode filtering);
LIBYUV_API
int I444Scale_12(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_u,
int src_stride_u,
const uint16_t* src_v,
int src_stride_v,
int src_width,
int src_height,
uint16_t* dst_y,
int dst_stride_y,
uint16_t* dst_u,
int dst_stride_u,
uint16_t* dst_v,
int dst_stride_v,
int dst_width,
int dst_height,
enum FilterMode filtering);
// Scales an NV12 image from the src width and height to the
// dst width and height.
// If filtering is kFilterNone, a simple nearest-neighbor algorithm is

View File

@ -74,18 +74,39 @@ extern "C" {
// The following are available for gcc/clang x86 platforms:
// TODO(fbarchard): Port to Visual C
#if !defined(LIBYUV_DISABLE_X86) && \
(defined(__x86_64__) || (defined(__i386__) && !defined(_MSC_VER)))
#if !defined(LIBYUV_DISABLE_X86) && (defined(__x86_64__) || defined(__i386__))
#define HAS_SCALEUVROWDOWN2BOX_SSSE3
#define HAS_SCALEROWUP2LINEAR_SSE2
#define HAS_SCALEROWUP2LINEAR_SSSE3
#define HAS_SCALEROWUP2BILINEAR_SSE2
#define HAS_SCALEROWUP2BILINEAR_SSSE3
#define HAS_SCALEROWUP2LINEAR_12_SSSE3
#define HAS_SCALEROWUP2BILINEAR_12_SSSE3
#define HAS_SCALEROWUP2LINEAR_16_SSE2
#define HAS_SCALEROWUP2BILINEAR_16_SSE2
#define HAS_SCALEUVROWUP2LINEAR_SSSE3
#define HAS_SCALEUVROWUP2BILINEAR_SSSE3
#define HAS_SCALEUVROWUP2LINEAR_16_SSE2
#define HAS_SCALEUVROWUP2BILINEAR_16_SSE2
#endif
// The following are available for gcc/clang x86 platforms, but
// require clang 3.4 or gcc 4.7.
// TODO(fbarchard): Port to Visual C
#if !defined(LIBYUV_DISABLE_X86) && \
(defined(__x86_64__) || defined(__i386__)) && !defined(_MSC_VER) && \
#if !defined(LIBYUV_DISABLE_X86) && \
(defined(__x86_64__) || defined(__i386__)) && \
(defined(CLANG_HAS_AVX2) || defined(GCC_HAS_AVX2))
#define HAS_SCALEUVROWDOWN2BOX_AVX2
#define HAS_SCALEROWUP2LINEAR_AVX2
#define HAS_SCALEROWUP2BILINEAR_AVX2
#define HAS_SCALEROWUP2LINEAR_12_AVX2
#define HAS_SCALEROWUP2BILINEAR_12_AVX2
#define HAS_SCALEROWUP2LINEAR_16_AVX2
#define HAS_SCALEROWUP2BILINEAR_16_AVX2
#define HAS_SCALEUVROWUP2LINEAR_AVX2
#define HAS_SCALEUVROWUP2BILINEAR_AVX2
#define HAS_SCALEUVROWUP2LINEAR_16_AVX2
#define HAS_SCALEUVROWUP2BILINEAR_16_AVX2
#endif
// The following are available on all x86 platforms, but
@ -114,6 +135,16 @@ extern "C" {
#define HAS_SCALEROWDOWN4_NEON
#define HAS_SCALEUVROWDOWN2BOX_NEON
#define HAS_SCALEUVROWDOWNEVEN_NEON
#define HAS_SCALEROWUP2LINEAR_NEON
#define HAS_SCALEROWUP2BILINEAR_NEON
#define HAS_SCALEROWUP2LINEAR_12_NEON
#define HAS_SCALEROWUP2BILINEAR_12_NEON
#define HAS_SCALEROWUP2LINEAR_16_NEON
#define HAS_SCALEROWUP2BILINEAR_16_NEON
#define HAS_SCALEUVROWUP2LINEAR_NEON
#define HAS_SCALEUVROWUP2BILINEAR_NEON
#define HAS_SCALEUVROWUP2LINEAR_16_NEON
#define HAS_SCALEUVROWUP2BILINEAR_16_NEON
#endif
#if !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa)
@ -279,6 +310,40 @@ void ScaleRowDown34_1_Box_16_C(const uint16_t* src_ptr,
ptrdiff_t src_stride,
uint16_t* d,
int dst_width);
void ScaleRowUp2_Linear_C(const uint8_t* src_ptr,
uint8_t* dst_ptr,
int dst_width);
void ScaleRowUp2_Bilinear_C(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
ptrdiff_t dst_stride,
int dst_width);
void ScaleRowUp2_Linear_16_C(const uint16_t* src_ptr,
uint16_t* dst_ptr,
int dst_width);
void ScaleRowUp2_Bilinear_16_C(const uint16_t* src_ptr,
ptrdiff_t src_stride,
uint16_t* dst_ptr,
ptrdiff_t dst_stride,
int dst_width);
void ScaleRowUp2_Linear_Any_C(const uint8_t* src_ptr,
uint8_t* dst_ptr,
int dst_width);
void ScaleRowUp2_Bilinear_Any_C(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
ptrdiff_t dst_stride,
int dst_width);
void ScaleRowUp2_Linear_16_Any_C(const uint16_t* src_ptr,
uint16_t* dst_ptr,
int dst_width);
void ScaleRowUp2_Bilinear_16_Any_C(const uint16_t* src_ptr,
ptrdiff_t src_stride,
uint16_t* dst_ptr,
ptrdiff_t dst_stride,
int dst_width);
void ScaleCols_C(uint8_t* dst_ptr,
const uint8_t* src_ptr,
int dst_width,
@ -416,6 +481,40 @@ void ScaleUVRowDownEvenBox_C(const uint8_t* src_uv,
int src_stepx,
uint8_t* dst_uv,
int dst_width);
void ScaleUVRowUp2_Linear_C(const uint8_t* src_ptr,
uint8_t* dst_ptr,
int dst_width);
void ScaleUVRowUp2_Bilinear_C(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
ptrdiff_t dst_stride,
int dst_width);
void ScaleUVRowUp2_Linear_Any_C(const uint8_t* src_ptr,
uint8_t* dst_ptr,
int dst_width);
void ScaleUVRowUp2_Bilinear_Any_C(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
ptrdiff_t dst_stride,
int dst_width);
void ScaleUVRowUp2_Linear_16_C(const uint16_t* src_ptr,
uint16_t* dst_ptr,
int dst_width);
void ScaleUVRowUp2_Bilinear_16_C(const uint16_t* src_ptr,
ptrdiff_t src_stride,
uint16_t* dst_ptr,
ptrdiff_t dst_stride,
int dst_width);
void ScaleUVRowUp2_Linear_16_Any_C(const uint16_t* src_ptr,
uint16_t* dst_ptr,
int dst_width);
void ScaleUVRowUp2_Bilinear_16_Any_C(const uint16_t* src_ptr,
ptrdiff_t src_stride,
uint16_t* dst_ptr,
ptrdiff_t dst_stride,
int dst_width);
void ScaleUVCols_C(uint8_t* dst_uv,
const uint8_t* src_uv,
int dst_width,
@ -508,6 +607,120 @@ void ScaleRowDown38_2_Box_SSSE3(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
int dst_width);
void ScaleRowUp2_Linear_SSE2(const uint8_t* src_ptr,
uint8_t* dst_ptr,
int dst_width);
void ScaleRowUp2_Bilinear_SSE2(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
ptrdiff_t dst_stride,
int dst_width);
void ScaleRowUp2_Linear_12_SSSE3(const uint16_t* src_ptr,
uint16_t* dst_ptr,
int dst_width);
void ScaleRowUp2_Bilinear_12_SSSE3(const uint16_t* src_ptr,
ptrdiff_t src_stride,
uint16_t* dst_ptr,
ptrdiff_t dst_stride,
int dst_width);
void ScaleRowUp2_Linear_16_SSE2(const uint16_t* src_ptr,
uint16_t* dst_ptr,
int dst_width);
void ScaleRowUp2_Bilinear_16_SSE2(const uint16_t* src_ptr,
ptrdiff_t src_stride,
uint16_t* dst_ptr,
ptrdiff_t dst_stride,
int dst_width);
void ScaleRowUp2_Linear_SSSE3(const uint8_t* src_ptr,
uint8_t* dst_ptr,
int dst_width);
void ScaleRowUp2_Bilinear_SSSE3(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
ptrdiff_t dst_stride,
int dst_width);
void ScaleRowUp2_Linear_AVX2(const uint8_t* src_ptr,
uint8_t* dst_ptr,
int dst_width);
void ScaleRowUp2_Bilinear_AVX2(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
ptrdiff_t dst_stride,
int dst_width);
void ScaleRowUp2_Linear_12_AVX2(const uint16_t* src_ptr,
uint16_t* dst_ptr,
int dst_width);
void ScaleRowUp2_Bilinear_12_AVX2(const uint16_t* src_ptr,
ptrdiff_t src_stride,
uint16_t* dst_ptr,
ptrdiff_t dst_stride,
int dst_width);
void ScaleRowUp2_Linear_16_AVX2(const uint16_t* src_ptr,
uint16_t* dst_ptr,
int dst_width);
void ScaleRowUp2_Bilinear_16_AVX2(const uint16_t* src_ptr,
ptrdiff_t src_stride,
uint16_t* dst_ptr,
ptrdiff_t dst_stride,
int dst_width);
void ScaleRowUp2_Linear_Any_SSE2(const uint8_t* src_ptr,
uint8_t* dst_ptr,
int dst_width);
void ScaleRowUp2_Bilinear_Any_SSE2(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
ptrdiff_t dst_stride,
int dst_width);
void ScaleRowUp2_Linear_12_Any_SSSE3(const uint16_t* src_ptr,
uint16_t* dst_ptr,
int dst_width);
void ScaleRowUp2_Bilinear_12_Any_SSSE3(const uint16_t* src_ptr,
ptrdiff_t src_stride,
uint16_t* dst_ptr,
ptrdiff_t dst_stride,
int dst_width);
void ScaleRowUp2_Linear_16_Any_SSE2(const uint16_t* src_ptr,
uint16_t* dst_ptr,
int dst_width);
void ScaleRowUp2_Bilinear_16_Any_SSSE3(const uint16_t* src_ptr,
ptrdiff_t src_stride,
uint16_t* dst_ptr,
ptrdiff_t dst_stride,
int dst_width);
void ScaleRowUp2_Linear_Any_SSSE3(const uint8_t* src_ptr,
uint8_t* dst_ptr,
int dst_width);
void ScaleRowUp2_Bilinear_Any_SSSE3(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
ptrdiff_t dst_stride,
int dst_width);
void ScaleRowUp2_Linear_Any_AVX2(const uint8_t* src_ptr,
uint8_t* dst_ptr,
int dst_width);
void ScaleRowUp2_Bilinear_Any_AVX2(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
ptrdiff_t dst_stride,
int dst_width);
void ScaleRowUp2_Linear_12_Any_AVX2(const uint16_t* src_ptr,
uint16_t* dst_ptr,
int dst_width);
void ScaleRowUp2_Bilinear_12_Any_AVX2(const uint16_t* src_ptr,
ptrdiff_t src_stride,
uint16_t* dst_ptr,
ptrdiff_t dst_stride,
int dst_width);
void ScaleRowUp2_Linear_16_Any_AVX2(const uint16_t* src_ptr,
uint16_t* dst_ptr,
int dst_width);
void ScaleRowUp2_Bilinear_16_Any_AVX2(const uint16_t* src_ptr,
ptrdiff_t src_stride,
uint16_t* dst_ptr,
ptrdiff_t dst_stride,
int dst_width);
void ScaleRowDown2_Any_SSSE3(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
@ -1033,6 +1246,103 @@ void ScaleUVRowDownEvenBox_Any_MMI(const uint8_t* src_ptr,
uint8_t* dst_ptr,
int dst_width);
void ScaleUVRowUp2_Linear_SSSE3(const uint8_t* src_ptr,
uint8_t* dst_ptr,
int dst_width);
void ScaleUVRowUp2_Bilinear_SSSE3(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
ptrdiff_t dst_stride,
int dst_width);
void ScaleUVRowUp2_Linear_Any_SSSE3(const uint8_t* src_ptr,
uint8_t* dst_ptr,
int dst_width);
void ScaleUVRowUp2_Bilinear_Any_SSSE3(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
ptrdiff_t dst_stride,
int dst_width);
void ScaleUVRowUp2_Linear_AVX2(const uint8_t* src_ptr,
uint8_t* dst_ptr,
int dst_width);
void ScaleUVRowUp2_Bilinear_AVX2(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
ptrdiff_t dst_stride,
int dst_width);
void ScaleUVRowUp2_Linear_Any_AVX2(const uint8_t* src_ptr,
uint8_t* dst_ptr,
int dst_width);
void ScaleUVRowUp2_Bilinear_Any_AVX2(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
ptrdiff_t dst_stride,
int dst_width);
void ScaleUVRowUp2_Linear_NEON(const uint8_t* src_ptr,
uint8_t* dst_ptr,
int dst_width);
void ScaleUVRowUp2_Bilinear_NEON(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
ptrdiff_t dst_stride,
int dst_width);
void ScaleUVRowUp2_Linear_Any_NEON(const uint8_t* src_ptr,
uint8_t* dst_ptr,
int dst_width);
void ScaleUVRowUp2_Bilinear_Any_NEON(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
ptrdiff_t dst_stride,
int dst_width);
void ScaleUVRowUp2_Linear_16_SSE2(const uint16_t* src_ptr,
uint16_t* dst_ptr,
int dst_width);
void ScaleUVRowUp2_Bilinear_16_SSE2(const uint16_t* src_ptr,
ptrdiff_t src_stride,
uint16_t* dst_ptr,
ptrdiff_t dst_stride,
int dst_width);
void ScaleUVRowUp2_Linear_16_Any_SSE2(const uint16_t* src_ptr,
uint16_t* dst_ptr,
int dst_width);
void ScaleUVRowUp2_Bilinear_16_Any_SSE2(const uint16_t* src_ptr,
ptrdiff_t src_stride,
uint16_t* dst_ptr,
ptrdiff_t dst_stride,
int dst_width);
void ScaleUVRowUp2_Linear_16_AVX2(const uint16_t* src_ptr,
uint16_t* dst_ptr,
int dst_width);
void ScaleUVRowUp2_Bilinear_16_AVX2(const uint16_t* src_ptr,
ptrdiff_t src_stride,
uint16_t* dst_ptr,
ptrdiff_t dst_stride,
int dst_width);
void ScaleUVRowUp2_Linear_16_Any_AVX2(const uint16_t* src_ptr,
uint16_t* dst_ptr,
int dst_width);
void ScaleUVRowUp2_Bilinear_16_Any_AVX2(const uint16_t* src_ptr,
ptrdiff_t src_stride,
uint16_t* dst_ptr,
ptrdiff_t dst_stride,
int dst_width);
void ScaleUVRowUp2_Linear_16_NEON(const uint16_t* src_ptr,
uint16_t* dst_ptr,
int dst_width);
void ScaleUVRowUp2_Bilinear_16_NEON(const uint16_t* src_ptr,
ptrdiff_t src_stride,
uint16_t* dst_ptr,
ptrdiff_t dst_stride,
int dst_width);
void ScaleUVRowUp2_Linear_16_Any_NEON(const uint16_t* src_ptr,
uint16_t* dst_ptr,
int dst_width);
void ScaleUVRowUp2_Bilinear_16_Any_NEON(const uint16_t* src_ptr,
ptrdiff_t src_stride,
uint16_t* dst_ptr,
ptrdiff_t dst_stride,
int dst_width);
// ScaleRowDown2Box also used by planar functions
// NEON downscalers with interpolation.
@ -1143,6 +1453,55 @@ void ScaleRowDown38_2_Box_Any_NEON(const uint8_t* src_ptr,
uint8_t* dst_ptr,
int dst_width);
void ScaleRowUp2_Linear_NEON(const uint8_t* src_ptr,
uint8_t* dst_ptr,
int dst_width);
void ScaleRowUp2_Bilinear_NEON(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
ptrdiff_t dst_stride,
int dst_width);
void ScaleRowUp2_Linear_12_NEON(const uint16_t* src_ptr,
uint16_t* dst_ptr,
int dst_width);
void ScaleRowUp2_Bilinear_12_NEON(const uint16_t* src_ptr,
ptrdiff_t src_stride,
uint16_t* dst_ptr,
ptrdiff_t dst_stride,
int dst_width);
void ScaleRowUp2_Linear_16_NEON(const uint16_t* src_ptr,
uint16_t* dst_ptr,
int dst_width);
void ScaleRowUp2_Bilinear_16_NEON(const uint16_t* src_ptr,
ptrdiff_t src_stride,
uint16_t* dst_ptr,
ptrdiff_t dst_stride,
int dst_width);
void ScaleRowUp2_Linear_Any_NEON(const uint8_t* src_ptr,
uint8_t* dst_ptr,
int dst_width);
void ScaleRowUp2_Bilinear_Any_NEON(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
ptrdiff_t dst_stride,
int dst_width);
void ScaleRowUp2_Linear_12_Any_NEON(const uint16_t* src_ptr,
uint16_t* dst_ptr,
int dst_width);
void ScaleRowUp2_Bilinear_12_Any_NEON(const uint16_t* src_ptr,
ptrdiff_t src_stride,
uint16_t* dst_ptr,
ptrdiff_t dst_stride,
int dst_width);
void ScaleRowUp2_Linear_16_Any_NEON(const uint16_t* src_ptr,
uint16_t* dst_ptr,
int dst_width);
void ScaleRowUp2_Bilinear_16_Any_NEON(const uint16_t* src_ptr,
ptrdiff_t src_stride,
uint16_t* dst_ptr,
ptrdiff_t dst_stride,
int dst_width);
void ScaleAddRow_NEON(const uint8_t* src_ptr, uint16_t* dst_ptr, int src_width);
void ScaleAddRow_Any_NEON(const uint8_t* src_ptr,
uint16_t* dst_ptr,

View File

@ -30,6 +30,19 @@ int UVScale(const uint8_t* src_uv,
int dst_height,
enum FilterMode filtering);
// Scale a 16 bit UV image.
// This function is currently incomplete, it can't handle all cases.
LIBYUV_API
int UVScale_16(const uint16_t* src_uv,
int src_stride_uv,
int src_width,
int src_height,
uint16_t* dst_uv,
int dst_stride_uv,
int dst_width,
int dst_height,
enum FilterMode filtering);
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv

View File

@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_
#define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 1768
#define LIBYUV_VERSION 1789
#endif // INCLUDE_LIBYUV_VERSION_H_

View File

@ -60,17 +60,19 @@ enum FourCC {
FOURCC_YUY2 = FOURCC('Y', 'U', 'Y', '2'),
FOURCC_UYVY = FOURCC('U', 'Y', 'V', 'Y'),
FOURCC_I010 = FOURCC('I', '0', '1', '0'), // bt.601 10 bit 420
FOURCC_I210 = FOURCC('I', '0', '1', '0'), // bt.601 10 bit 422
FOURCC_I210 = FOURCC('I', '2', '1', '0'), // bt.601 10 bit 422
// 1 Secondary YUV format: row biplanar. deprecated.
FOURCC_M420 = FOURCC('M', '4', '2', '0'),
// 11 Primary RGB formats: 4 32 bpp, 2 24 bpp, 3 16 bpp, 1 10 bpc
// 13 Primary RGB formats: 4 32 bpp, 2 24 bpp, 3 16 bpp, 1 10 bpc 2 64 bpp
FOURCC_ARGB = FOURCC('A', 'R', 'G', 'B'),
FOURCC_BGRA = FOURCC('B', 'G', 'R', 'A'),
FOURCC_ABGR = FOURCC('A', 'B', 'G', 'R'),
FOURCC_AR30 = FOURCC('A', 'R', '3', '0'), // 10 bit per channel. 2101010.
FOURCC_AB30 = FOURCC('A', 'B', '3', '0'), // ABGR version of 10 bit
FOURCC_AR64 = FOURCC('A', 'R', '6', '4'), // 16 bit per channel.
FOURCC_AB64 = FOURCC('A', 'B', '6', '4'), // ABGR version of 16 bit
FOURCC_24BG = FOURCC('2', '4', 'B', 'G'),
FOURCC_RAW = FOURCC('r', 'a', 'w', ' '),
FOURCC_RGBA = FOURCC('R', 'G', 'B', 'A'),
@ -94,16 +96,23 @@ enum FourCC {
FOURCC('J', '4', '4', '4'), // jpeg (bt.601 full), unofficial fourcc
FOURCC_J400 =
FOURCC('J', '4', '0', '0'), // jpeg (bt.601 full), unofficial fourcc
FOURCC_F420 = FOURCC('F', '4', '2', '0'), // bt.709 full, unofficial fourcc
FOURCC_F422 = FOURCC('F', '4', '2', '2'), // bt.709 full, unofficial fourcc
FOURCC_F444 = FOURCC('F', '4', '4', '4'), // bt.709 full, unofficial fourcc
FOURCC_H420 = FOURCC('H', '4', '2', '0'), // bt.709, unofficial fourcc
FOURCC_H422 = FOURCC('H', '4', '2', '2'), // bt.709, unofficial fourcc
FOURCC_H444 = FOURCC('H', '4', '4', '4'), // bt.709, unofficial fourcc
FOURCC_U420 = FOURCC('U', '4', '2', '0'), // bt.2020, unofficial fourcc
FOURCC_U422 = FOURCC('U', '4', '2', '2'), // bt.2020, unofficial fourcc
FOURCC_U444 = FOURCC('U', '4', '4', '4'), // bt.2020, unofficial fourcc
FOURCC_F010 = FOURCC('F', '0', '1', '0'), // bt.709 full range 10 bit 420
FOURCC_H010 = FOURCC('H', '0', '1', '0'), // bt.709 10 bit 420
FOURCC_U010 = FOURCC('U', '0', '1', '0'), // bt.2020 10 bit 420
FOURCC_H210 = FOURCC('H', '0', '1', '0'), // bt.709 10 bit 422
FOURCC_U210 = FOURCC('U', '0', '1', '0'), // bt.2020 10 bit 422
FOURCC_F210 = FOURCC('F', '2', '1', '0'), // bt.709 full range 10 bit 422
FOURCC_H210 = FOURCC('H', '2', '1', '0'), // bt.709 10 bit 422
FOURCC_U210 = FOURCC('U', '2', '1', '0'), // bt.2020 10 bit 422
FOURCC_P010 = FOURCC('P', '0', '1', '0'),
FOURCC_P210 = FOURCC('P', '2', '1', '0'),
// 14 Auxiliary aliases. CanonicalFourCC() maps these to canonical fourcc.
FOURCC_IYUV = FOURCC('I', 'Y', 'U', 'V'), // Alias for I420.
@ -156,6 +165,8 @@ enum FourCCBpp {
FOURCC_BPP_RGBA = 32,
FOURCC_BPP_AR30 = 32,
FOURCC_BPP_AB30 = 32,
FOURCC_BPP_AR64 = 64,
FOURCC_BPP_AB64 = 64,
FOURCC_BPP_24BG = 24,
FOURCC_BPP_RAW = 24,
FOURCC_BPP_RGBP = 16,
@ -173,7 +184,12 @@ enum FourCCBpp {
FOURCC_BPP_J400 = 8,
FOURCC_BPP_H420 = 12,
FOURCC_BPP_H422 = 16,
FOURCC_BPP_H010 = 24,
FOURCC_BPP_I010 = 15,
FOURCC_BPP_I210 = 20,
FOURCC_BPP_H010 = 15,
FOURCC_BPP_H210 = 20,
FOURCC_BPP_P010 = 15,
FOURCC_BPP_P210 = 20,
FOURCC_BPP_MJPG = 0, // 0 means unknown.
FOURCC_BPP_H264 = 0,
FOURCC_BPP_IYUV = 12,

View File

@ -66,7 +66,7 @@ LOCAL_OBJ_FILES := \
.c.o:
$(CC) -c $(CFLAGS) $*.c -o $*.o
all: libyuv.a i444tonv12_eg yuvconvert cpuid psnr
all: libyuv.a i444tonv12_eg yuvconvert yuvconstants cpuid psnr
libyuv.a: $(LOCAL_OBJ_FILES)
$(AR) $(ARFLAGS) $@ $(LOCAL_OBJ_FILES)
@ -75,13 +75,17 @@ libyuv.a: $(LOCAL_OBJ_FILES)
yuvconvert: util/yuvconvert.cc libyuv.a
$(CXX) $(CXXFLAGS) -Iutil/ -o $@ util/yuvconvert.cc libyuv.a
# A C test utility that generates yuvconstants for yuv to rgb.
yuvconstants: util/yuvconstants.c libyuv.a
$(CXX) $(CXXFLAGS) -Iutil/ -lm -o $@ util/yuvconstants.c libyuv.a
# A standalone test utility
psnr: util/psnr.cc
$(CXX) $(CXXFLAGS) -Iutil/ -o $@ util/psnr.cc util/psnr_main.cc util/ssim.cc
# A simple conversion example.
i444tonv12_eg: util/i444tonv12_eg.cc libyuv.a
$(CC) $(CFLAGS) -o $@ util/i444tonv12_eg.cc libyuv.a
$(CXX) $(CXXFLAGS) -o $@ util/i444tonv12_eg.cc libyuv.a
# A C test utility that uses libyuv conversion from C.
# gcc 4.4 and older require -fno-exceptions to avoid link error on __gxx_personality_v0
@ -90,4 +94,4 @@ cpuid: util/cpuid.c libyuv.a
$(CC) $(CFLAGS) -o $@ util/cpuid.c libyuv.a
clean:
/bin/rm -f source/*.o *.ii *.s libyuv.a i444tonv12_eg yuvconvert cpuid psnr
/bin/rm -f source/*.o *.ii *.s libyuv.a i444tonv12_eg yuvconvert yuvconstants cpuid psnr

View File

@ -17,36 +17,6 @@ namespace libyuv {
extern "C" {
#endif
#if ORIGINAL_OPT
uint32_t HammingDistance_C1(const uint8_t* src_a,
const uint8_t* src_b,
int count) {
uint32_t diff = 0u;
int i;
for (i = 0; i < count; ++i) {
int x = src_a[i] ^ src_b[i];
if (x & 1)
++diff;
if (x & 2)
++diff;
if (x & 4)
++diff;
if (x & 8)
++diff;
if (x & 16)
++diff;
if (x & 32)
++diff;
if (x & 64)
++diff;
if (x & 128)
++diff;
}
return diff;
}
#endif
// Hakmem method for hamming distance.
uint32_t HammingDistance_C(const uint8_t* src_a,
const uint8_t* src_b,

View File

@ -19,8 +19,7 @@ extern "C" {
#endif
// This module is for GCC x86 and x64.
#if !defined(LIBYUV_DISABLE_X86) && \
(defined(__x86_64__) || (defined(__i386__) && !defined(_MSC_VER)))
#if !defined(LIBYUV_DISABLE_X86) && (defined(__x86_64__) || defined(__i386__))
#if defined(__x86_64__)
uint32_t HammingDistance_SSE42(const uint8_t* src_a,

View File

@ -22,8 +22,9 @@ namespace libyuv {
extern "C" {
#endif
// This module is for 32 bit Visual C x86 and clangcl
#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && defined(_MSC_VER)
// This module is for 32 bit Visual C x86
#if !defined(LIBYUV_DISABLE_X86) && defined(_MSC_VER) && \
!defined(__clang__) && defined(_M_IX86)
uint32_t HammingDistance_SSE42(const uint8_t* src_a,
const uint8_t* src_b,
@ -77,8 +78,7 @@ __declspec(naked) uint32_t
}
}
// Visual C 2012 required for AVX2.
#if _MSC_VER >= 1700
#ifdef HAS_SUMSQUAREERROR_AVX2
// C4752: found Intel(R) Advanced Vector Extensions; consider using /arch:AVX.
#pragma warning(disable : 4752)
__declspec(naked) uint32_t
@ -118,7 +118,7 @@ __declspec(naked) uint32_t
ret
}
}
#endif // _MSC_VER >= 1700
#endif // HAS_SUMSQUAREERROR_AVX2
uvec32 kHash16x33 = {0x92d9e201, 0, 0, 0}; // 33 ^ 16
uvec32 kHashMul0 = {
@ -196,7 +196,7 @@ __declspec(naked) uint32_t
}
// Visual C 2012 required for AVX2.
#if _MSC_VER >= 1700
#ifdef HAS_HASHDJB2_AVX2
__declspec(naked) uint32_t
HashDjb2_AVX2(const uint8_t* src, int count, uint32_t seed) {
__asm {
@ -231,7 +231,7 @@ __declspec(naked) uint32_t
ret
}
}
#endif // _MSC_VER >= 1700
#endif // HAS_HASHDJB2_AVX2
#endif // !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86)

View File

@ -15,7 +15,8 @@
#include "libyuv/planar_functions.h"
#include "libyuv/rotate.h"
#include "libyuv/row.h"
#include "libyuv/scale.h" // For ScalePlane()
#include "libyuv/scale.h" // For ScalePlane()
#include "libyuv/scale_uv.h" // For UVScale()
#ifdef __cplusplus
namespace libyuv {
@ -48,7 +49,7 @@ static int I4xxToI420(const uint8_t* src_y,
const int dst_y_height = Abs(src_y_height);
const int dst_uv_width = SUBSAMPLE(dst_y_width, 1, 1);
const int dst_uv_height = SUBSAMPLE(dst_y_height, 1, 1);
if (src_uv_width == 0 || src_uv_height == 0) {
if (src_uv_width <= 0 || src_uv_height == 0) {
return -1;
}
if (dst_y) {
@ -148,6 +149,52 @@ int I010Copy(const uint16_t* src_y,
return 0;
}
static int Planar16bitTo8bit(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_u,
int src_stride_u,
const uint16_t* src_v,
int src_stride_v,
uint8_t* dst_y,
int dst_stride_y,
uint8_t* dst_u,
int dst_stride_u,
uint8_t* dst_v,
int dst_stride_v,
int width,
int height,
int subsample_x,
int subsample_y,
int depth) {
int uv_width = SUBSAMPLE(width, subsample_x, subsample_x);
int uv_height = SUBSAMPLE(height, subsample_y, subsample_y);
int scale = 1 << (24 - depth);
if (!src_u || !src_v || !dst_u || !dst_v || width <= 0 || height == 0) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
uv_height = -uv_height;
src_y = src_y + (height - 1) * src_stride_y;
src_u = src_u + (uv_height - 1) * src_stride_u;
src_v = src_v + (uv_height - 1) * src_stride_v;
src_stride_y = -src_stride_y;
src_stride_u = -src_stride_u;
src_stride_v = -src_stride_v;
}
// Convert Y plane.
Convert16To8Plane(src_y, src_stride_y, dst_y, dst_stride_y, scale, width,
height);
// Convert UV planes.
Convert16To8Plane(src_u, src_stride_u, dst_u, dst_stride_u, scale, uv_width,
uv_height);
Convert16To8Plane(src_v, src_stride_v, dst_v, dst_stride_v, scale, uv_width,
uv_height);
return 0;
}
// Convert 10 bit YUV to 8 bit.
LIBYUV_API
int I010ToI420(const uint16_t* src_y,
@ -164,34 +211,295 @@ int I010ToI420(const uint16_t* src_y,
int dst_stride_v,
int width,
int height) {
int halfwidth = (width + 1) >> 1;
int halfheight = (height + 1) >> 1;
if (!src_u || !src_v || !dst_u || !dst_v || width <= 0 || height == 0) {
return Planar16bitTo8bit(src_y, src_stride_y, src_u, src_stride_u, src_v,
src_stride_v, dst_y, dst_stride_y, dst_u,
dst_stride_u, dst_v, dst_stride_v, width, height, 1,
1, 10);
}
LIBYUV_API
int I210ToI422(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_u,
int src_stride_u,
const uint16_t* src_v,
int src_stride_v,
uint8_t* dst_y,
int dst_stride_y,
uint8_t* dst_u,
int dst_stride_u,
uint8_t* dst_v,
int dst_stride_v,
int width,
int height) {
return Planar16bitTo8bit(src_y, src_stride_y, src_u, src_stride_u, src_v,
src_stride_v, dst_y, dst_stride_y, dst_u,
dst_stride_u, dst_v, dst_stride_v, width, height, 1,
0, 10);
}
LIBYUV_API
int I410ToI444(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_u,
int src_stride_u,
const uint16_t* src_v,
int src_stride_v,
uint8_t* dst_y,
int dst_stride_y,
uint8_t* dst_u,
int dst_stride_u,
uint8_t* dst_v,
int dst_stride_v,
int width,
int height) {
return Planar16bitTo8bit(src_y, src_stride_y, src_u, src_stride_u, src_v,
src_stride_v, dst_y, dst_stride_y, dst_u,
dst_stride_u, dst_v, dst_stride_v, width, height, 0,
0, 10);
}
LIBYUV_API
int I012ToI420(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_u,
int src_stride_u,
const uint16_t* src_v,
int src_stride_v,
uint8_t* dst_y,
int dst_stride_y,
uint8_t* dst_u,
int dst_stride_u,
uint8_t* dst_v,
int dst_stride_v,
int width,
int height) {
return Planar16bitTo8bit(src_y, src_stride_y, src_u, src_stride_u, src_v,
src_stride_v, dst_y, dst_stride_y, dst_u,
dst_stride_u, dst_v, dst_stride_v, width, height, 1,
1, 12);
}
LIBYUV_API
int I212ToI422(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_u,
int src_stride_u,
const uint16_t* src_v,
int src_stride_v,
uint8_t* dst_y,
int dst_stride_y,
uint8_t* dst_u,
int dst_stride_u,
uint8_t* dst_v,
int dst_stride_v,
int width,
int height) {
return Planar16bitTo8bit(src_y, src_stride_y, src_u, src_stride_u, src_v,
src_stride_v, dst_y, dst_stride_y, dst_u,
dst_stride_u, dst_v, dst_stride_v, width, height, 1,
0, 12);
}
LIBYUV_API
int I412ToI444(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_u,
int src_stride_u,
const uint16_t* src_v,
int src_stride_v,
uint8_t* dst_y,
int dst_stride_y,
uint8_t* dst_u,
int dst_stride_u,
uint8_t* dst_v,
int dst_stride_v,
int width,
int height) {
return Planar16bitTo8bit(src_y, src_stride_y, src_u, src_stride_u, src_v,
src_stride_v, dst_y, dst_stride_y, dst_u,
dst_stride_u, dst_v, dst_stride_v, width, height, 0,
0, 12);
}
// Any Ix10 To I010 format with mirroring.
static int Ix10ToI010(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_u,
int src_stride_u,
const uint16_t* src_v,
int src_stride_v,
uint16_t* dst_y,
int dst_stride_y,
uint16_t* dst_u,
int dst_stride_u,
uint16_t* dst_v,
int dst_stride_v,
int width,
int height,
int subsample_x,
int subsample_y) {
const int dst_y_width = Abs(width);
const int dst_y_height = Abs(height);
const int src_uv_width = SUBSAMPLE(width, subsample_x, subsample_x);
const int src_uv_height = SUBSAMPLE(height, subsample_y, subsample_y);
const int dst_uv_width = SUBSAMPLE(dst_y_width, 1, 1);
const int dst_uv_height = SUBSAMPLE(dst_y_height, 1, 1);
if (width <= 0 || height == 0) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
halfheight = (height + 1) >> 1;
src_y = src_y + (height - 1) * src_stride_y;
src_u = src_u + (halfheight - 1) * src_stride_u;
src_v = src_v + (halfheight - 1) * src_stride_v;
src_stride_y = -src_stride_y;
src_stride_u = -src_stride_u;
src_stride_v = -src_stride_v;
if (dst_y) {
ScalePlane_12(src_y, src_stride_y, width, height, dst_y, dst_stride_y,
dst_y_width, dst_y_height, kFilterBilinear);
}
ScalePlane_12(src_u, src_stride_u, src_uv_width, src_uv_height, dst_u,
dst_stride_u, dst_uv_width, dst_uv_height, kFilterBilinear);
ScalePlane_12(src_v, src_stride_v, src_uv_width, src_uv_height, dst_v,
dst_stride_v, dst_uv_width, dst_uv_height, kFilterBilinear);
return 0;
}
LIBYUV_API
int I410ToI010(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_u,
int src_stride_u,
const uint16_t* src_v,
int src_stride_v,
uint16_t* dst_y,
int dst_stride_y,
uint16_t* dst_u,
int dst_stride_u,
uint16_t* dst_v,
int dst_stride_v,
int width,
int height) {
return Ix10ToI010(src_y, src_stride_y, src_u, src_stride_u, src_v,
src_stride_v, dst_y, dst_stride_y, dst_u, dst_stride_u,
dst_v, dst_stride_v, width, height, 0, 0);
}
LIBYUV_API
int I210ToI010(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_u,
int src_stride_u,
const uint16_t* src_v,
int src_stride_v,
uint16_t* dst_y,
int dst_stride_y,
uint16_t* dst_u,
int dst_stride_u,
uint16_t* dst_v,
int dst_stride_v,
int width,
int height) {
return Ix10ToI010(src_y, src_stride_y, src_u, src_stride_u, src_v,
src_stride_v, dst_y, dst_stride_y, dst_u, dst_stride_u,
dst_v, dst_stride_v, width, height, 1, 0);
}
// Any I[420]1[02] to P[420]1[02] format with mirroring.
static int IxxxToPxxx(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_u,
int src_stride_u,
const uint16_t* src_v,
int src_stride_v,
uint16_t* dst_y,
int dst_stride_y,
uint16_t* dst_uv,
int dst_stride_uv,
int width,
int height,
int subsample_x,
int subsample_y,
int depth) {
const int uv_width = SUBSAMPLE(width, subsample_x, subsample_x);
const int uv_height = SUBSAMPLE(height, subsample_y, subsample_y);
if (width <= 0 || height == 0) {
return -1;
}
// Convert Y plane.
Convert16To8Plane(src_y, src_stride_y, dst_y, dst_stride_y, 16384, width,
height);
// Convert UV planes.
Convert16To8Plane(src_u, src_stride_u, dst_u, dst_stride_u, 16384, halfwidth,
halfheight);
Convert16To8Plane(src_v, src_stride_v, dst_v, dst_stride_v, 16384, halfwidth,
halfheight);
ConvertToMSBPlane_16(src_y, src_stride_y, dst_y, dst_stride_y, width, height,
depth);
MergeUVPlane_16(src_u, src_stride_u, src_v, src_stride_v, dst_uv,
dst_stride_uv, uv_width, uv_height, depth);
return 0;
}
LIBYUV_API
int I010ToP010(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_u,
int src_stride_u,
const uint16_t* src_v,
int src_stride_v,
uint16_t* dst_y,
int dst_stride_y,
uint16_t* dst_uv,
int dst_stride_uv,
int width,
int height) {
return IxxxToPxxx(src_y, src_stride_y, src_u, src_stride_u, src_v,
src_stride_v, dst_y, dst_stride_y, dst_uv, dst_stride_uv,
width, height, 1, 1, 10);
}
LIBYUV_API
int I210ToP210(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_u,
int src_stride_u,
const uint16_t* src_v,
int src_stride_v,
uint16_t* dst_y,
int dst_stride_y,
uint16_t* dst_uv,
int dst_stride_uv,
int width,
int height) {
return IxxxToPxxx(src_y, src_stride_y, src_u, src_stride_u, src_v,
src_stride_v, dst_y, dst_stride_y, dst_uv, dst_stride_uv,
width, height, 1, 0, 10);
}
LIBYUV_API
int I012ToP012(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_u,
int src_stride_u,
const uint16_t* src_v,
int src_stride_v,
uint16_t* dst_y,
int dst_stride_y,
uint16_t* dst_uv,
int dst_stride_uv,
int width,
int height) {
return IxxxToPxxx(src_y, src_stride_y, src_u, src_stride_u, src_v,
src_stride_v, dst_y, dst_stride_y, dst_uv, dst_stride_uv,
width, height, 1, 1, 12);
}
LIBYUV_API
int I212ToP212(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_u,
int src_stride_u,
const uint16_t* src_v,
int src_stride_v,
uint16_t* dst_y,
int dst_stride_y,
uint16_t* dst_uv,
int dst_stride_uv,
int width,
int height) {
return IxxxToPxxx(src_y, src_stride_y, src_u, src_stride_u, src_v,
src_stride_v, dst_y, dst_stride_y, dst_uv, dst_stride_uv,
width, height, 1, 0, 12);
}
// 422 chroma is 1/2 width, 1x height
// 420 chroma is 1/2 width, 1/2 height
LIBYUV_API
@ -613,6 +921,104 @@ int NV21ToI420(const uint8_t* src_y,
width, height);
}
LIBYUV_API
int NV12ToNV24(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_uv,
int src_stride_uv,
uint8_t* dst_y,
int dst_stride_y,
uint8_t* dst_uv,
int dst_stride_uv,
int width,
int height) {
if (width <= 0 || height == 0) {
return -1;
}
if (dst_y) {
ScalePlane(src_y, src_stride_y, width, height, dst_y, dst_stride_y,
Abs(width), Abs(height), kFilterBilinear);
}
UVScale(src_uv, src_stride_uv, SUBSAMPLE(width, 1, 1),
SUBSAMPLE(height, 1, 1), dst_uv, dst_stride_uv, Abs(width),
Abs(height), kFilterBilinear);
return 0;
}
LIBYUV_API
int NV16ToNV24(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_uv,
int src_stride_uv,
uint8_t* dst_y,
int dst_stride_y,
uint8_t* dst_uv,
int dst_stride_uv,
int width,
int height) {
if (width <= 0 || height == 0) {
return -1;
}
if (dst_y) {
ScalePlane(src_y, src_stride_y, width, height, dst_y, dst_stride_y,
Abs(width), Abs(height), kFilterBilinear);
}
UVScale(src_uv, src_stride_uv, SUBSAMPLE(width, 1, 1), height, dst_uv,
dst_stride_uv, Abs(width), Abs(height), kFilterBilinear);
return 0;
}
LIBYUV_API
int P010ToP410(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_uv,
int src_stride_uv,
uint16_t* dst_y,
int dst_stride_y,
uint16_t* dst_uv,
int dst_stride_uv,
int width,
int height) {
if (width <= 0 || height == 0) {
return -1;
}
if (dst_y) {
ScalePlane_16(src_y, src_stride_y, width, height, dst_y, dst_stride_y,
Abs(width), Abs(height), kFilterBilinear);
}
UVScale_16(src_uv, src_stride_uv, SUBSAMPLE(width, 1, 1),
SUBSAMPLE(height, 1, 1), dst_uv, dst_stride_uv, Abs(width),
Abs(height), kFilterBilinear);
return 0;
}
LIBYUV_API
int P210ToP410(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_uv,
int src_stride_uv,
uint16_t* dst_y,
int dst_stride_y,
uint16_t* dst_uv,
int dst_stride_uv,
int width,
int height) {
if (width <= 0 || height == 0) {
return -1;
}
if (dst_y) {
ScalePlane_16(src_y, src_stride_y, width, height, dst_y, dst_stride_y,
Abs(width), Abs(height), kFilterBilinear);
}
UVScale_16(src_uv, src_stride_uv, SUBSAMPLE(width, 1, 1), height, dst_uv,
dst_stride_uv, Abs(width), Abs(height), kFilterBilinear);
return 0;
}
// Convert YUY2 to I420.
LIBYUV_API
int YUY2ToI420(const uint8_t* src_yuy2,
@ -962,6 +1368,18 @@ int ARGBToI420(const uint8_t* src_argb,
src_argb = src_argb + (height - 1) * src_stride_argb;
src_stride_argb = -src_stride_argb;
}
#if defined(HAS_ARGBTOYROW_NEON) && defined(HAS_ARGBTOUVROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
ARGBToYRow = ARGBToYRow_Any_NEON;
ARGBToUVRow = ARGBToUVRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
ARGBToYRow = ARGBToYRow_NEON;
if (IS_ALIGNED(width, 16)) {
ARGBToUVRow = ARGBToUVRow_NEON;
}
}
}
#endif
#if defined(HAS_ARGBTOYROW_SSSE3) && defined(HAS_ARGBTOUVROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
@ -982,22 +1400,6 @@ int ARGBToI420(const uint8_t* src_argb,
}
}
#endif
#if defined(HAS_ARGBTOYROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
ARGBToYRow = ARGBToYRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
ARGBToYRow = ARGBToYRow_NEON;
}
}
#endif
#if defined(HAS_ARGBTOUVROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
ARGBToUVRow = ARGBToUVRow_Any_NEON;
if (IS_ALIGNED(width, 16)) {
ARGBToUVRow = ARGBToUVRow_NEON;
}
}
#endif
#if defined(HAS_ARGBTOYROW_MMI) && defined(HAS_ARGBTOUVROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
ARGBToYRow = ARGBToYRow_Any_MMI;
@ -1365,7 +1767,7 @@ int RGB24ToI420(const uint8_t* src_rgb24,
}
// Neon version does direct RGB24 to YUV.
#if defined(HAS_RGB24TOYROW_NEON)
#if defined(HAS_RGB24TOYROW_NEON) && defined(HAS_RGB24TOUVROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
RGB24ToUVRow = RGB24ToUVRow_Any_NEON;
RGB24ToYRow = RGB24ToYRow_Any_NEON;
@ -1402,6 +1804,14 @@ int RGB24ToI420(const uint8_t* src_rgb24,
#endif
// Other platforms do intermediate conversion from RGB24 to ARGB.
#else
#if defined(HAS_RGB24TOARGBROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
RGB24ToARGBRow = RGB24ToARGBRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
RGB24ToARGBRow = RGB24ToARGBRow_NEON;
}
}
#endif
#if defined(HAS_RGB24TOARGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
RGB24ToARGBRow = RGB24ToARGBRow_Any_SSSE3;
@ -1410,6 +1820,18 @@ int RGB24ToI420(const uint8_t* src_rgb24,
}
}
#endif
#if defined(HAS_ARGBTOYROW_NEON) && defined(HAS_ARGBTOUVROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
ARGBToUVRow = ARGBToUVRow_Any_NEON;
ARGBToYRow = ARGBToYRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
ARGBToYRow = ARGBToYRow_NEON;
if (IS_ALIGNED(width, 16)) {
ARGBToUVRow = ARGBToUVRow_NEON;
}
}
}
#endif
#if defined(HAS_ARGBTOYROW_SSSE3) && defined(HAS_ARGBTOUVROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
@ -1554,6 +1976,14 @@ int RGB24ToJ420(const uint8_t* src_rgb24,
}
#endif
#else
#if defined(HAS_RGB24TOARGBROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
RGB24ToARGBRow = RGB24ToARGBRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
RGB24ToARGBRow = RGB24ToARGBRow_NEON;
}
}
#endif
#if defined(HAS_RGB24TOARGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
RGB24ToARGBRow = RGB24ToARGBRow_Any_SSSE3;
@ -1562,6 +1992,18 @@ int RGB24ToJ420(const uint8_t* src_rgb24,
}
}
#endif
#if defined(HAS_ARGBTOYJROW_NEON) && defined(HAS_ARGBTOUVJROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
ARGBToUVJRow = ARGBToUVJRow_Any_NEON;
ARGBToYJRow = ARGBToYJRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
ARGBToYJRow = ARGBToYJRow_NEON;
if (IS_ALIGNED(width, 16)) {
ARGBToUVJRow = ARGBToUVJRow_NEON;
}
}
}
#endif
#if defined(HAS_ARGBTOYJROW_SSSE3) && defined(HAS_ARGBTOUVJROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBToUVJRow = ARGBToUVJRow_Any_SSSE3;
@ -1705,6 +2147,26 @@ int RAWToI420(const uint8_t* src_raw,
#endif
// Other platforms do intermediate conversion from RAW to ARGB.
#else
#if defined(HAS_RAWTOARGBROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
RAWToARGBRow = RAWToARGBRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
RAWToARGBRow = RAWToARGBRow_NEON;
}
}
#endif
#if defined(HAS_ARGBTOYROW_NEON) && defined(HAS_ARGBTOUVROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
ARGBToUVRow = ARGBToUVRow_Any_NEON;
ARGBToYRow = ARGBToYRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
ARGBToYRow = ARGBToYRow_NEON;
if (IS_ALIGNED(width, 16)) {
ARGBToUVRow = ARGBToUVRow_NEON;
}
}
}
#endif
#if defined(HAS_RAWTOARGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
RAWToARGBRow = RAWToARGBRow_Any_SSSE3;
@ -1780,6 +2242,178 @@ int RAWToI420(const uint8_t* src_raw,
return 0;
}
// TODO(fbarchard): Use Matrix version to implement I420 and J420.
// Convert RAW to J420.
LIBYUV_API
int RAWToJ420(const uint8_t* src_raw,
int src_stride_raw,
uint8_t* dst_y,
int dst_stride_y,
uint8_t* dst_u,
int dst_stride_u,
uint8_t* dst_v,
int dst_stride_v,
int width,
int height) {
int y;
#if (defined(HAS_RAWTOYJROW_NEON) && defined(HAS_RAWTOUVJROW_NEON)) || \
defined(HAS_RAWTOYJROW_MSA) || defined(HAS_RAWTOYJROW_MMI)
void (*RAWToUVJRow)(const uint8_t* src_raw, int src_stride_raw,
uint8_t* dst_u, uint8_t* dst_v, int width) =
RAWToUVJRow_C;
void (*RAWToYJRow)(const uint8_t* src_raw, uint8_t* dst_y, int width) =
RAWToYJRow_C;
#else
void (*RAWToARGBRow)(const uint8_t* src_rgb, uint8_t* dst_argb, int width) =
RAWToARGBRow_C;
void (*ARGBToUVJRow)(const uint8_t* src_argb0, int src_stride_argb,
uint8_t* dst_u, uint8_t* dst_v, int width) =
ARGBToUVJRow_C;
void (*ARGBToYJRow)(const uint8_t* src_argb, uint8_t* dst_y, int width) =
ARGBToYJRow_C;
#endif
if (!src_raw || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
src_raw = src_raw + (height - 1) * src_stride_raw;
src_stride_raw = -src_stride_raw;
}
// Neon version does direct RAW to YUV.
#if defined(HAS_RAWTOYJROW_NEON) && defined(HAS_RAWTOUVJROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
RAWToUVJRow = RAWToUVJRow_Any_NEON;
RAWToYJRow = RAWToYJRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
RAWToYJRow = RAWToYJRow_NEON;
if (IS_ALIGNED(width, 16)) {
RAWToUVJRow = RAWToUVJRow_NEON;
}
}
}
// MMI and MSA version does direct RAW to YUV.
#elif (defined(HAS_RAWTOYJROW_MMI) || defined(HAS_RAWTOYJROW_MSA))
#if defined(HAS_RAWTOYJROW_MMI) && defined(HAS_RAWTOUVJROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
RAWToUVJRow = RAWToUVJRow_Any_MMI;
RAWToYJRow = RAWToYJRow_Any_MMI;
if (IS_ALIGNED(width, 8)) {
RAWToYJRow = RAWToYJRow_MMI;
if (IS_ALIGNED(width, 16)) {
RAWToUVJRow = RAWToUVJRow_MMI;
}
}
}
#endif
#if defined(HAS_RAWTOYJROW_MSA) && defined(HAS_RAWTOUVJROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
RAWToUVJRow = RAWToUVJRow_Any_MSA;
RAWToYJRow = RAWToYJRow_Any_MSA;
if (IS_ALIGNED(width, 16)) {
RAWToYJRow = RAWToYJRow_MSA;
RAWToUVJRow = RAWToUVJRow_MSA;
}
}
#endif
#else
#if defined(HAS_RAWTOARGBROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
RAWToARGBRow = RAWToARGBRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
RAWToARGBRow = RAWToARGBRow_NEON;
}
}
#endif
#if defined(HAS_ARGBTOYJROW_NEON) && defined(HAS_ARGBTOUVJROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
ARGBToUVJRow = ARGBToUVJRow_Any_NEON;
ARGBToYJRow = ARGBToYJRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
ARGBToYJRow = ARGBToYJRow_NEON;
if (IS_ALIGNED(width, 16)) {
ARGBToUVJRow = ARGBToUVJRow_NEON;
}
}
}
#endif
#if defined(HAS_RAWTOARGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
RAWToARGBRow = RAWToARGBRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
RAWToARGBRow = RAWToARGBRow_SSSE3;
}
}
#endif
#if defined(HAS_ARGBTOYJROW_SSSE3) && defined(HAS_ARGBTOUVJROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBToUVJRow = ARGBToUVJRow_Any_SSSE3;
ARGBToYJRow = ARGBToYJRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
ARGBToUVJRow = ARGBToUVJRow_SSSE3;
ARGBToYJRow = ARGBToYJRow_SSSE3;
}
}
#endif
#if defined(HAS_ARGBTOYJROW_AVX2) && defined(HAS_ARGBTOUVJROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
ARGBToUVJRow = ARGBToUVJRow_Any_AVX2;
ARGBToYJRow = ARGBToYJRow_Any_AVX2;
if (IS_ALIGNED(width, 32)) {
ARGBToUVJRow = ARGBToUVJRow_AVX2;
ARGBToYJRow = ARGBToYJRow_AVX2;
}
}
#endif
#endif
{
#if !((defined(HAS_RAWTOYJROW_NEON) && defined(HAS_RAWTOUVJROW_NEON)) || \
defined(HAS_RAWTOYJROW_MSA) || defined(HAS_RAWTOYJROW_MMI))
// Allocate 2 rows of ARGB.
const int kRowSize = (width * 4 + 31) & ~31;
align_buffer_64(row, kRowSize * 2);
#endif
for (y = 0; y < height - 1; y += 2) {
#if ((defined(HAS_RAWTOYJROW_NEON) && defined(HAS_RAWTOUVJROW_NEON)) || \
defined(HAS_RAWTOYJROW_MSA) || defined(HAS_RAWTOYJROW_MMI))
RAWToUVJRow(src_raw, src_stride_raw, dst_u, dst_v, width);
RAWToYJRow(src_raw, dst_y, width);
RAWToYJRow(src_raw + src_stride_raw, dst_y + dst_stride_y, width);
#else
RAWToARGBRow(src_raw, row, width);
RAWToARGBRow(src_raw + src_stride_raw, row + kRowSize, width);
ARGBToUVJRow(row, kRowSize, dst_u, dst_v, width);
ARGBToYJRow(row, dst_y, width);
ARGBToYJRow(row + kRowSize, dst_y + dst_stride_y, width);
#endif
src_raw += src_stride_raw * 2;
dst_y += dst_stride_y * 2;
dst_u += dst_stride_u;
dst_v += dst_stride_v;
}
if (height & 1) {
#if ((defined(HAS_RAWTOYJROW_NEON) && defined(HAS_RAWTOUVJROW_NEON)) || \
defined(HAS_RAWTOYJROW_MSA) || defined(HAS_RAWTOYJROW_MMI))
RAWToUVJRow(src_raw, 0, dst_u, dst_v, width);
RAWToYJRow(src_raw, dst_y, width);
#else
RAWToARGBRow(src_raw, row, width);
ARGBToUVJRow(row, 0, dst_u, dst_v, width);
ARGBToYJRow(row, dst_y, width);
#endif
}
#if !((defined(HAS_RAWTOYJROW_NEON) && defined(HAS_RAWTOUVJROW_NEON)) || \
defined(HAS_RAWTOYJROW_MSA) || defined(HAS_RAWTOYJROW_MMI))
free_aligned_buffer_64(row);
#endif
}
return 0;
}
// Convert RGB565 to I420.
LIBYUV_API
int RGB565ToI420(const uint8_t* src_rgb565,

View File

@ -30,6 +30,8 @@ static __inline int Abs(int v) {
}
// I420 To any I4xx YUV format with mirroring.
// TODO(fbarchard): Consider kFilterNone for Y, or CopyPlane
static int I420ToI4xx(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_u,
@ -109,6 +111,50 @@ int I420ToI010(const uint8_t* src_y,
return 0;
}
// Convert 8 bit YUV to 12 bit.
LIBYUV_API
int I420ToI012(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
uint16_t* dst_y,
int dst_stride_y,
uint16_t* dst_u,
int dst_stride_u,
uint16_t* dst_v,
int dst_stride_v,
int width,
int height) {
int halfwidth = (width + 1) >> 1;
int halfheight = (height + 1) >> 1;
if (!src_u || !src_v || !dst_u || !dst_v || width <= 0 || height == 0) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
halfheight = (height + 1) >> 1;
src_y = src_y + (height - 1) * src_stride_y;
src_u = src_u + (halfheight - 1) * src_stride_u;
src_v = src_v + (halfheight - 1) * src_stride_v;
src_stride_y = -src_stride_y;
src_stride_u = -src_stride_u;
src_stride_v = -src_stride_v;
}
// Convert Y plane.
Convert8To16Plane(src_y, src_stride_y, dst_y, dst_stride_y, 4096, width,
height);
// Convert UV planes.
Convert8To16Plane(src_u, src_stride_u, dst_u, dst_stride_u, 4096, halfwidth,
halfheight);
Convert8To16Plane(src_v, src_stride_v, dst_v, dst_stride_v, 4096, halfwidth,
halfheight);
return 0;
}
// 420 chroma is 1/2 width, 1/2 height
// 422 chroma is 1/2 width, 1x height
LIBYUV_API
@ -159,6 +205,102 @@ int I420ToI444(const uint8_t* src_y,
dst_uv_height);
}
// 420 chroma to 444 chroma, 10/12 bit version
LIBYUV_API
int I010ToI410(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_u,
int src_stride_u,
const uint16_t* src_v,
int src_stride_v,
uint16_t* dst_y,
int dst_stride_y,
uint16_t* dst_u,
int dst_stride_u,
uint16_t* dst_v,
int dst_stride_v,
int width,
int height) {
if (width == 0 || height == 0) {
return -1;
}
if (dst_y) {
ScalePlane_12(src_y, src_stride_y, width, height, dst_y, dst_stride_y,
Abs(width), Abs(height), kFilterBilinear);
}
ScalePlane_12(src_u, src_stride_u, SUBSAMPLE(width, 1, 1),
SUBSAMPLE(height, 1, 1), dst_u, dst_stride_u, Abs(width),
Abs(height), kFilterBilinear);
ScalePlane_12(src_v, src_stride_v, SUBSAMPLE(width, 1, 1),
SUBSAMPLE(height, 1, 1), dst_v, dst_stride_v, Abs(width),
Abs(height), kFilterBilinear);
return 0;
}
// 422 chroma to 444 chroma, 10/12 bit version
LIBYUV_API
int I210ToI410(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_u,
int src_stride_u,
const uint16_t* src_v,
int src_stride_v,
uint16_t* dst_y,
int dst_stride_y,
uint16_t* dst_u,
int dst_stride_u,
uint16_t* dst_v,
int dst_stride_v,
int width,
int height) {
if (width == 0 || height == 0) {
return -1;
}
if (dst_y) {
ScalePlane_12(src_y, src_stride_y, width, height, dst_y, dst_stride_y,
Abs(width), Abs(height), kFilterBilinear);
}
ScalePlane_12(src_u, src_stride_u, SUBSAMPLE(width, 1, 1), height, dst_u,
dst_stride_u, Abs(width), Abs(height), kFilterBilinear);
ScalePlane_12(src_v, src_stride_v, SUBSAMPLE(width, 1, 1), height, dst_v,
dst_stride_v, Abs(width), Abs(height), kFilterBilinear);
return 0;
}
// 422 chroma is 1/2 width, 1x height
// 444 chroma is 1x width, 1x height
LIBYUV_API
int I422ToI444(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
uint8_t* dst_y,
int dst_stride_y,
uint8_t* dst_u,
int dst_stride_u,
uint8_t* dst_v,
int dst_stride_v,
int width,
int height) {
if (width == 0 || height == 0) {
return -1;
}
if (dst_y) {
ScalePlane(src_y, src_stride_y, width, height, dst_y, dst_stride_y,
Abs(width), Abs(height), kFilterBilinear);
}
ScalePlane(src_u, src_stride_u, SUBSAMPLE(width, 1, 1), height, dst_u,
dst_stride_u, Abs(width), Abs(height), kFilterBilinear);
ScalePlane(src_v, src_stride_v, SUBSAMPLE(width, 1, 1), height, dst_v,
dst_stride_v, Abs(width), Abs(height), kFilterBilinear);
return 0;
}
// Copy to I400. Source can be I420,422,444,400,NV12,NV21
LIBYUV_API
int I400Copy(const uint8_t* src_y,

View File

@ -2009,6 +2009,124 @@ int ARGBToJ422(const uint8_t* src_argb,
return 0;
}
// Convert ARGB to AR64.
LIBYUV_API
int ARGBToAR64(const uint8_t* src_argb,
int src_stride_argb,
uint16_t* dst_ar64,
int dst_stride_ar64,
int width,
int height) {
int y;
void (*ARGBToAR64Row)(const uint8_t* src_argb, uint16_t* dst_ar64,
int width) = ARGBToAR64Row_C;
if (!src_argb || !dst_ar64 || width <= 0 || height == 0) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
src_argb = src_argb + (height - 1) * src_stride_argb;
src_stride_argb = -src_stride_argb;
}
// Coalesce rows.
if (src_stride_argb == width * 4 && dst_stride_ar64 == width * 4) {
width *= height;
height = 1;
src_stride_argb = dst_stride_ar64 = 0;
}
#if defined(HAS_ARGBTOAR64ROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBToAR64Row = ARGBToAR64Row_Any_SSSE3;
if (IS_ALIGNED(width, 4)) {
ARGBToAR64Row = ARGBToAR64Row_SSSE3;
}
}
#endif
#if defined(HAS_ARGBTOAR64ROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
ARGBToAR64Row = ARGBToAR64Row_Any_AVX2;
if (IS_ALIGNED(width, 8)) {
ARGBToAR64Row = ARGBToAR64Row_AVX2;
}
}
#endif
#if defined(HAS_ARGBTOAR64ROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
ARGBToAR64Row = ARGBToAR64Row_Any_NEON;
if (IS_ALIGNED(width, 8)) {
ARGBToAR64Row = ARGBToAR64Row_NEON;
}
}
#endif
for (y = 0; y < height; ++y) {
ARGBToAR64Row(src_argb, dst_ar64, width);
src_argb += src_stride_argb;
dst_ar64 += dst_stride_ar64;
}
return 0;
}
// Convert ARGB to AB64.
LIBYUV_API
int ARGBToAB64(const uint8_t* src_argb,
int src_stride_argb,
uint16_t* dst_ab64,
int dst_stride_ab64,
int width,
int height) {
int y;
void (*ARGBToAB64Row)(const uint8_t* src_argb, uint16_t* dst_ar64,
int width) = ARGBToAB64Row_C;
if (!src_argb || !dst_ab64 || width <= 0 || height == 0) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
src_argb = src_argb + (height - 1) * src_stride_argb;
src_stride_argb = -src_stride_argb;
}
// Coalesce rows.
if (src_stride_argb == width * 4 && dst_stride_ab64 == width * 4) {
width *= height;
height = 1;
src_stride_argb = dst_stride_ab64 = 0;
}
#if defined(HAS_ARGBTOAB64ROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBToAB64Row = ARGBToAB64Row_Any_SSSE3;
if (IS_ALIGNED(width, 4)) {
ARGBToAB64Row = ARGBToAB64Row_SSSE3;
}
}
#endif
#if defined(HAS_ARGBTOAB64ROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
ARGBToAB64Row = ARGBToAB64Row_Any_AVX2;
if (IS_ALIGNED(width, 8)) {
ARGBToAB64Row = ARGBToAB64Row_AVX2;
}
}
#endif
#if defined(HAS_ARGBTOAB64ROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
ARGBToAB64Row = ARGBToAB64Row_Any_NEON;
if (IS_ALIGNED(width, 8)) {
ARGBToAB64Row = ARGBToAB64Row_NEON;
}
}
#endif
for (y = 0; y < height; ++y) {
ARGBToAB64Row(src_argb, dst_ab64, width);
src_argb += src_stride_argb;
dst_ab64 += dst_stride_ab64;
}
return 0;
}
// Convert ARGB to J400.
LIBYUV_API
int ARGBToJ400(const uint8_t* src_argb,

View File

@ -89,18 +89,26 @@ int ConvertToI420(const uint8_t* sample,
switch (format) {
// Single plane formats
case FOURCC_YUY2:
case FOURCC_YUY2: { // TODO(fbarchard): Find better odd crop fix.
uint8_t* u = (crop_x & 1) ? dst_v : dst_u;
uint8_t* v = (crop_x & 1) ? dst_u : dst_v;
int stride_u = (crop_x & 1) ? dst_stride_v : dst_stride_u;
int stride_v = (crop_x & 1) ? dst_stride_u : dst_stride_v;
src = sample + (aligned_src_width * crop_y + crop_x) * 2;
r = YUY2ToI420(src, aligned_src_width * 2, dst_y, dst_stride_y, dst_u,
dst_stride_u, dst_v, dst_stride_v, crop_width,
inv_crop_height);
r = YUY2ToI420(src, aligned_src_width * 2, dst_y, dst_stride_y, u,
stride_u, v, stride_v, crop_width, inv_crop_height);
break;
case FOURCC_UYVY:
}
case FOURCC_UYVY: {
uint8_t* u = (crop_x & 1) ? dst_v : dst_u;
uint8_t* v = (crop_x & 1) ? dst_u : dst_v;
int stride_u = (crop_x & 1) ? dst_stride_v : dst_stride_u;
int stride_v = (crop_x & 1) ? dst_stride_u : dst_stride_v;
src = sample + (aligned_src_width * crop_y + crop_x) * 2;
r = UYVYToI420(src, aligned_src_width * 2, dst_y, dst_stride_y, dst_u,
dst_stride_u, dst_v, dst_stride_v, crop_width,
inv_crop_height);
r = UYVYToI420(src, aligned_src_width * 2, dst_y, dst_stride_y, u,
stride_u, v, stride_v, crop_width, inv_crop_height);
break;
}
case FOURCC_RGBP:
src = sample + (src_width * crop_y + crop_x) * 2;
r = RGB565ToI420(src, src_width * 2, dst_y, dst_stride_y, dst_u,

View File

@ -133,7 +133,7 @@ int GetXCR0() {
#pragma optimize("g", on)
#endif
// based on libvpx arm_cpudetect.c
// Based on libvpx arm_cpudetect.c
// For Arm, but public to allow testing on any CPU
LIBYUV_API SAFEBUFFERS int ArmCpuCaps(const char* cpuinfo_name) {
char cpuinfo_line[512];

View File

@ -17,8 +17,7 @@ extern "C" {
#endif
// This module is for GCC x86 and x64.
#if !defined(LIBYUV_DISABLE_X86) && \
(defined(__x86_64__) || (defined(__i386__) && !defined(_MSC_VER)))
#if !defined(LIBYUV_DISABLE_X86) && (defined(__x86_64__) || defined(__i386__))
// Transpose 8x8. 32 or 64 bit, but not NaCL for 64 bit.
#if defined(HAS_TRANSPOSEWX8_SSSE3)

View File

@ -16,8 +16,9 @@ namespace libyuv {
extern "C" {
#endif
// This module is for 32 bit Visual C x86 and clangcl
#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && defined(_MSC_VER)
// This module is for 32 bit Visual C x86
#if !defined(LIBYUV_DISABLE_X86) && defined(_MSC_VER) && \
!defined(__clang__) && defined(_M_IX86)
__declspec(naked) void TransposeWx8_SSSE3(const uint8_t* src,
int src_stride,

View File

@ -30,6 +30,39 @@ extern "C" {
// Subsampled source needs to be increase by 1 of not even.
#define SS(width, shift) (((width) + (1 << (shift)) - 1) >> (shift))
// Any 4 planes to 1
#define ANY41(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, BPP, MASK) \
void NAMEANY(const uint8_t* y_buf, const uint8_t* u_buf, \
const uint8_t* v_buf, const uint8_t* a_buf, uint8_t* dst_ptr, \
int width) { \
SIMD_ALIGNED(uint8_t temp[64 * 5]); \
memset(temp, 0, 64 * 4); /* for msan */ \
int r = width & MASK; \
int n = width & ~MASK; \
if (n > 0) { \
ANY_SIMD(y_buf, u_buf, v_buf, a_buf, dst_ptr, n); \
} \
memcpy(temp, y_buf + n, r); \
memcpy(temp + 64, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \
memcpy(temp + 128, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \
memcpy(temp + 192, a_buf + n, r); \
ANY_SIMD(temp, temp + 64, temp + 128, temp + 192, temp + 256, MASK + 1); \
memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, temp + 256, \
SS(r, DUVSHIFT) * BPP); \
}
#ifdef HAS_MERGEARGBROW_SSE2
ANY41(MergeARGBRow_Any_SSE2, MergeARGBRow_SSE2, 0, 0, 4, 7)
#endif
#ifdef HAS_MERGEARGBROW_AVX2
ANY41(MergeARGBRow_Any_AVX2, MergeARGBRow_AVX2, 0, 0, 4, 15)
#endif
#ifdef HAS_MERGEARGBROW_NEON
ANY41(MergeARGBRow_Any_NEON, MergeARGBRow_NEON, 0, 0, 4, 15)
#endif
// Note that odd width replication includes 444 due to implementation
// on arm that subsamples 444 to 422 internally.
// Any 4 planes to 1 with yuvconstants
#define ANY41C(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, BPP, MASK) \
void NAMEANY(const uint8_t* y_buf, const uint8_t* u_buf, \
@ -46,29 +79,166 @@ extern "C" {
memcpy(temp + 64, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \
memcpy(temp + 128, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \
memcpy(temp + 192, a_buf + n, r); \
if (width & 1) { \
temp[64 + SS(r, UVSHIFT)] = temp[64 + SS(r, UVSHIFT) - 1]; \
temp[128 + SS(r, UVSHIFT)] = temp[128 + SS(r, UVSHIFT) - 1]; \
} \
ANY_SIMD(temp, temp + 64, temp + 128, temp + 192, temp + 256, \
yuvconstants, MASK + 1); \
memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, temp + 256, \
SS(r, DUVSHIFT) * BPP); \
}
#ifdef HAS_I444ALPHATOARGBROW_SSSE3
ANY41C(I444AlphaToARGBRow_Any_SSSE3, I444AlphaToARGBRow_SSSE3, 0, 0, 4, 7)
#endif
#ifdef HAS_I444ALPHATOARGBROW_AVX2
ANY41C(I444AlphaToARGBRow_Any_AVX2, I444AlphaToARGBRow_AVX2, 0, 0, 4, 15)
#endif
#ifdef HAS_I422ALPHATOARGBROW_SSSE3
ANY41C(I422AlphaToARGBRow_Any_SSSE3, I422AlphaToARGBRow_SSSE3, 1, 0, 4, 7)
#endif
#ifdef HAS_I422ALPHATOARGBROW_AVX2
ANY41C(I422AlphaToARGBRow_Any_AVX2, I422AlphaToARGBRow_AVX2, 1, 0, 4, 15)
#endif
#ifdef HAS_I444ALPHATOARGBROW_NEON
ANY41C(I444AlphaToARGBRow_Any_NEON, I444AlphaToARGBRow_NEON, 0, 0, 4, 7)
#endif
#ifdef HAS_I422ALPHATOARGBROW_NEON
ANY41C(I422AlphaToARGBRow_Any_NEON, I422AlphaToARGBRow_NEON, 1, 0, 4, 7)
#endif
#ifdef HAS_I444ALPHATOARGBROW_MSA
ANY41C(I444AlphaToARGBRow_Any_MSA, I444AlphaToARGBRow_MSA, 0, 0, 4, 7)
#endif
#ifdef HAS_I422ALPHATOARGBROW_MSA
ANY41C(I422AlphaToARGBRow_Any_MSA, I422AlphaToARGBRow_MSA, 1, 0, 4, 7)
#endif
#ifdef HAS_I444ALPHATOARGBROW_MMI
ANY41C(I444AlphaToARGBRow_Any_MMI, I444AlphaToARGBRow_MMI, 0, 0, 4, 7)
#endif
#ifdef HAS_I422ALPHATOARGBROW_MMI
ANY41C(I422AlphaToARGBRow_Any_MMI, I422AlphaToARGBRow_MMI, 1, 0, 4, 7)
#endif
#undef ANY41C
// Any 4 planes to 1 plane of 8 bit with yuvconstants
#define ANY41CT(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, T, SBPP, BPP, MASK) \
void NAMEANY(const T* y_buf, const T* u_buf, const T* v_buf, const T* a_buf, \
uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, \
int width) { \
SIMD_ALIGNED(T temp[16 * 4]); \
SIMD_ALIGNED(uint8_t out[64]); \
memset(temp, 0, 16 * 4 * SBPP); /* for YUY2 and msan */ \
int r = width & MASK; \
int n = width & ~MASK; \
if (n > 0) { \
ANY_SIMD(y_buf, u_buf, v_buf, a_buf, dst_ptr, yuvconstants, n); \
} \
memcpy(temp, y_buf + n, r * SBPP); \
memcpy(temp + 16, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT) * SBPP); \
memcpy(temp + 32, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT) * SBPP); \
memcpy(temp + 48, a_buf + n, r * SBPP); \
ANY_SIMD(temp, temp + 16, temp + 32, temp + 48, out, yuvconstants, \
MASK + 1); \
memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, out, SS(r, DUVSHIFT) * BPP); \
}
#ifdef HAS_I210ALPHATOARGBROW_SSSE3
ANY41CT(I210AlphaToARGBRow_Any_SSSE3,
I210AlphaToARGBRow_SSSE3,
1,
0,
uint16_t,
2,
4,
7)
#endif
#ifdef HAS_I210ALPHATOARGBROW_AVX2
ANY41CT(I210AlphaToARGBRow_Any_AVX2,
I210AlphaToARGBRow_AVX2,
1,
0,
uint16_t,
2,
4,
15)
#endif
#ifdef HAS_I410ALPHATOARGBROW_SSSE3
ANY41CT(I410AlphaToARGBRow_Any_SSSE3,
I410AlphaToARGBRow_SSSE3,
0,
0,
uint16_t,
2,
4,
7)
#endif
#ifdef HAS_I410ALPHATOARGBROW_AVX2
ANY41CT(I410AlphaToARGBRow_Any_AVX2,
I410AlphaToARGBRow_AVX2,
0,
0,
uint16_t,
2,
4,
15)
#endif
#undef ANY41CT
// Any 4 planes to 1 plane with parameter
#define ANY41PT(NAMEANY, ANY_SIMD, STYPE, SBPP, DTYPE, BPP, MASK) \
void NAMEANY(const STYPE* r_buf, const STYPE* g_buf, const STYPE* b_buf, \
const STYPE* a_buf, DTYPE* dst_ptr, int depth, int width) { \
SIMD_ALIGNED(STYPE temp[16 * 4]); \
SIMD_ALIGNED(DTYPE out[64]); \
memset(temp, 0, 16 * 4 * SBPP); /* for YUY2 and msan */ \
int r = width & MASK; \
int n = width & ~MASK; \
if (n > 0) { \
ANY_SIMD(r_buf, g_buf, b_buf, a_buf, dst_ptr, depth, n); \
} \
memcpy(temp, r_buf + n, r * SBPP); \
memcpy(temp + 16, g_buf + n, r * SBPP); \
memcpy(temp + 32, b_buf + n, r * SBPP); \
memcpy(temp + 48, a_buf + n, r * SBPP); \
ANY_SIMD(temp, temp + 16, temp + 32, temp + 48, out, depth, MASK + 1); \
memcpy((uint8_t*)dst_ptr + n * BPP, out, r * BPP); \
}
#ifdef HAS_MERGEAR64ROW_AVX2
ANY41PT(MergeAR64Row_Any_AVX2, MergeAR64Row_AVX2, uint16_t, 2, uint16_t, 8, 15)
#endif
#ifdef HAS_MERGEAR64ROW_NEON
ANY41PT(MergeAR64Row_Any_NEON, MergeAR64Row_NEON, uint16_t, 2, uint16_t, 8, 7)
#endif
#ifdef HAS_MERGEARGB16TO8ROW_AVX2
ANY41PT(MergeARGB16To8Row_Any_AVX2,
MergeARGB16To8Row_AVX2,
uint16_t,
2,
uint8_t,
4,
15)
#endif
#ifdef HAS_MERGEARGB16TO8ROW_NEON
ANY41PT(MergeARGB16To8Row_Any_NEON,
MergeARGB16To8Row_NEON,
uint16_t,
2,
uint8_t,
4,
7)
#endif
#undef ANY41PT
// Any 3 planes to 1.
#define ANY31(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, BPP, MASK) \
void NAMEANY(const uint8_t* y_buf, const uint8_t* u_buf, \
@ -98,6 +268,15 @@ ANY31(MergeRGBRow_Any_NEON, MergeRGBRow_NEON, 0, 0, 3, 15)
#ifdef HAS_MERGERGBROW_MMI
ANY31(MergeRGBRow_Any_MMI, MergeRGBRow_MMI, 0, 0, 3, 7)
#endif
#ifdef HAS_MERGEXRGBROW_SSE2
ANY31(MergeXRGBRow_Any_SSE2, MergeXRGBRow_SSE2, 0, 0, 4, 7)
#endif
#ifdef HAS_MERGEXRGBROW_AVX2
ANY31(MergeXRGBRow_Any_AVX2, MergeXRGBRow_AVX2, 0, 0, 4, 15)
#endif
#ifdef HAS_MERGEXRGBROW_NEON
ANY31(MergeXRGBRow_Any_NEON, MergeXRGBRow_NEON, 0, 0, 4, 15)
#endif
#ifdef HAS_I422TOYUY2ROW_SSE2
ANY31(I422ToYUY2Row_Any_SSE2, I422ToYUY2Row_SSE2, 1, 1, 4, 15)
ANY31(I422ToUYVYRow_Any_SSE2, I422ToUYVYRow_SSE2, 1, 1, 4, 15)
@ -165,6 +344,21 @@ ANY31(BlendPlaneRow_Any_MMI, BlendPlaneRow_MMI, 0, 0, 1, 7)
#ifdef HAS_I422TOARGBROW_SSSE3
ANY31C(I422ToARGBRow_Any_SSSE3, I422ToARGBRow_SSSE3, 1, 0, 4, 7)
#endif
#ifdef HAS_I422TORGBAROW_SSSE3
ANY31C(I422ToRGBARow_Any_SSSE3, I422ToRGBARow_SSSE3, 1, 0, 4, 7)
#endif
#ifdef HAS_I422TOARGB4444ROW_SSSE3
ANY31C(I422ToARGB4444Row_Any_SSSE3, I422ToARGB4444Row_SSSE3, 1, 0, 2, 7)
#endif
#ifdef HAS_I422TOARGB1555ROW_SSSE3
ANY31C(I422ToARGB1555Row_Any_SSSE3, I422ToARGB1555Row_SSSE3, 1, 0, 2, 7)
#endif
#ifdef HAS_I422TORGB565ROW_SSSE3
ANY31C(I422ToRGB565Row_Any_SSSE3, I422ToRGB565Row_SSSE3, 1, 0, 2, 7)
#endif
#ifdef HAS_I422TORGB24ROW_SSSE3
ANY31C(I422ToRGB24Row_Any_SSSE3, I422ToRGB24Row_SSSE3, 1, 0, 3, 15)
#endif
#ifdef HAS_I422TOAR30ROW_SSSE3
ANY31C(I422ToAR30Row_Any_SSSE3, I422ToAR30Row_SSSE3, 1, 0, 4, 7)
#endif
@ -173,12 +367,7 @@ ANY31C(I422ToAR30Row_Any_AVX2, I422ToAR30Row_AVX2, 1, 0, 4, 15)
#endif
#ifdef HAS_I444TOARGBROW_SSSE3
ANY31C(I444ToARGBRow_Any_SSSE3, I444ToARGBRow_SSSE3, 0, 0, 4, 7)
ANY31C(I422ToRGBARow_Any_SSSE3, I422ToRGBARow_SSSE3, 1, 0, 4, 7)
ANY31C(I422ToARGB4444Row_Any_SSSE3, I422ToARGB4444Row_SSSE3, 1, 0, 2, 7)
ANY31C(I422ToARGB1555Row_Any_SSSE3, I422ToARGB1555Row_SSSE3, 1, 0, 2, 7)
ANY31C(I422ToRGB565Row_Any_SSSE3, I422ToRGB565Row_SSSE3, 1, 0, 2, 7)
ANY31C(I422ToRGB24Row_Any_SSSE3, I422ToRGB24Row_SSSE3, 1, 0, 3, 15)
#endif // HAS_I444TOARGBROW_SSSE3
#endif
#ifdef HAS_I422TORGB24ROW_AVX2
ANY31C(I422ToRGB24Row_Any_AVX2, I422ToRGB24Row_AVX2, 1, 0, 3, 31)
#endif
@ -262,11 +451,99 @@ ANY31CT(I210ToARGBRow_Any_AVX2, I210ToARGBRow_AVX2, 1, 0, uint16_t, 2, 4, 15)
#ifdef HAS_I210TOAR30ROW_AVX2
ANY31CT(I210ToAR30Row_Any_AVX2, I210ToAR30Row_AVX2, 1, 0, uint16_t, 2, 4, 15)
#endif
#ifdef HAS_I410TOAR30ROW_SSSE3
ANY31CT(I410ToAR30Row_Any_SSSE3, I410ToAR30Row_SSSE3, 0, 0, uint16_t, 2, 4, 7)
#endif
#ifdef HAS_I410TOARGBROW_SSSE3
ANY31CT(I410ToARGBRow_Any_SSSE3, I410ToARGBRow_SSSE3, 0, 0, uint16_t, 2, 4, 7)
#endif
#ifdef HAS_I410TOARGBROW_AVX2
ANY31CT(I410ToARGBRow_Any_AVX2, I410ToARGBRow_AVX2, 0, 0, uint16_t, 2, 4, 15)
#endif
#ifdef HAS_I410TOAR30ROW_AVX2
ANY31CT(I410ToAR30Row_Any_AVX2, I410ToAR30Row_AVX2, 0, 0, uint16_t, 2, 4, 15)
#endif
#ifdef HAS_I210TOARGBROW_MMI
ANY31CT(I210ToARGBRow_Any_MMI, I210ToARGBRow_MMI, 1, 0, uint16_t, 2, 4, 7)
#endif
#ifdef HAS_I212TOAR30ROW_SSSE3
ANY31CT(I212ToAR30Row_Any_SSSE3, I212ToAR30Row_SSSE3, 1, 0, uint16_t, 2, 4, 7)
#endif
#ifdef HAS_I212TOARGBROW_SSSE3
ANY31CT(I212ToARGBRow_Any_SSSE3, I212ToARGBRow_SSSE3, 1, 0, uint16_t, 2, 4, 7)
#endif
#ifdef HAS_I212TOARGBROW_AVX2
ANY31CT(I212ToARGBRow_Any_AVX2, I212ToARGBRow_AVX2, 1, 0, uint16_t, 2, 4, 15)
#endif
#ifdef HAS_I212TOAR30ROW_AVX2
ANY31CT(I212ToAR30Row_Any_AVX2, I212ToAR30Row_AVX2, 1, 0, uint16_t, 2, 4, 15)
#endif
#undef ANY31CT
// Any 3 planes to 1 plane with parameter
#define ANY31PT(NAMEANY, ANY_SIMD, STYPE, SBPP, DTYPE, BPP, MASK) \
void NAMEANY(const STYPE* r_buf, const STYPE* g_buf, const STYPE* b_buf, \
DTYPE* dst_ptr, int depth, int width) { \
SIMD_ALIGNED(STYPE temp[16 * 3]); \
SIMD_ALIGNED(DTYPE out[64]); \
memset(temp, 0, 16 * 3 * SBPP); /* for YUY2 and msan */ \
int r = width & MASK; \
int n = width & ~MASK; \
if (n > 0) { \
ANY_SIMD(r_buf, g_buf, b_buf, dst_ptr, depth, n); \
} \
memcpy(temp, r_buf + n, r * SBPP); \
memcpy(temp + 16, g_buf + n, r * SBPP); \
memcpy(temp + 32, b_buf + n, r * SBPP); \
ANY_SIMD(temp, temp + 16, temp + 32, out, depth, MASK + 1); \
memcpy((uint8_t*)dst_ptr + n * BPP, out, r * BPP); \
}
#ifdef HAS_MERGEXR30ROW_AVX2
ANY31PT(MergeXR30Row_Any_AVX2, MergeXR30Row_AVX2, uint16_t, 2, uint8_t, 4, 15)
#endif
#ifdef HAS_MERGEXR30ROW_NEON
ANY31PT(MergeXR30Row_Any_NEON, MergeXR30Row_NEON, uint16_t, 2, uint8_t, 4, 3)
ANY31PT(MergeXR30Row_10_Any_NEON,
MergeXR30Row_10_NEON,
uint16_t,
2,
uint8_t,
4,
3)
#endif
#ifdef HAS_MERGEXR64ROW_AVX2
ANY31PT(MergeXR64Row_Any_AVX2, MergeXR64Row_AVX2, uint16_t, 2, uint16_t, 8, 15)
#endif
#ifdef HAS_MERGEXR64ROW_NEON
ANY31PT(MergeXR64Row_Any_NEON, MergeXR64Row_NEON, uint16_t, 2, uint16_t, 8, 7)
#endif
#ifdef HAS_MERGEXRGB16TO8ROW_AVX2
ANY31PT(MergeXRGB16To8Row_Any_AVX2,
MergeXRGB16To8Row_AVX2,
uint16_t,
2,
uint8_t,
4,
15)
#endif
#ifdef HAS_MERGEXRGB16TO8ROW_NEON
ANY31PT(MergeXRGB16To8Row_Any_NEON,
MergeXRGB16To8Row_NEON,
uint16_t,
2,
uint8_t,
4,
7)
#endif
#undef ANY31PT
// Any 2 planes to 1.
#define ANY21(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, SBPP2, BPP, MASK) \
void NAMEANY(const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* dst_ptr, \
@ -481,6 +758,77 @@ ANY21C(NV12ToRGB565Row_Any_MMI, NV12ToRGB565Row_MMI, 1, 1, 2, 2, 7)
#endif
#undef ANY21C
// Any 2 planes of 16 bit to 1 with yuvconstants
#define ANY21CT(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, T, SBPP, BPP, MASK) \
void NAMEANY(const T* y_buf, const T* uv_buf, uint8_t* dst_ptr, \
const struct YuvConstants* yuvconstants, int width) { \
SIMD_ALIGNED(T temp[16 * 3]); \
SIMD_ALIGNED(uint8_t out[64]); \
memset(temp, 0, 16 * 3 * SBPP); /* for YUY2 and msan */ \
int r = width & MASK; \
int n = width & ~MASK; \
if (n > 0) { \
ANY_SIMD(y_buf, uv_buf, dst_ptr, yuvconstants, n); \
} \
memcpy(temp, y_buf + n, r * SBPP); \
memcpy(temp + 16, uv_buf + 2 * (n >> UVSHIFT), SS(r, UVSHIFT) * SBPP * 2); \
ANY_SIMD(temp, temp + 16, out, yuvconstants, MASK + 1); \
memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, out, SS(r, DUVSHIFT) * BPP); \
}
#ifdef HAS_P210TOAR30ROW_SSSE3
ANY21CT(P210ToAR30Row_Any_SSSE3, P210ToAR30Row_SSSE3, 1, 0, uint16_t, 2, 4, 7)
#endif
#ifdef HAS_P210TOARGBROW_SSSE3
ANY21CT(P210ToARGBRow_Any_SSSE3, P210ToARGBRow_SSSE3, 1, 0, uint16_t, 2, 4, 7)
#endif
#ifdef HAS_P210TOARGBROW_AVX2
ANY21CT(P210ToARGBRow_Any_AVX2, P210ToARGBRow_AVX2, 1, 0, uint16_t, 2, 4, 15)
#endif
#ifdef HAS_P210TOAR30ROW_AVX2
ANY21CT(P210ToAR30Row_Any_AVX2, P210ToAR30Row_AVX2, 1, 0, uint16_t, 2, 4, 15)
#endif
#ifdef HAS_P410TOAR30ROW_SSSE3
ANY21CT(P410ToAR30Row_Any_SSSE3, P410ToAR30Row_SSSE3, 0, 0, uint16_t, 2, 4, 7)
#endif
#ifdef HAS_P410TOARGBROW_SSSE3
ANY21CT(P410ToARGBRow_Any_SSSE3, P410ToARGBRow_SSSE3, 0, 0, uint16_t, 2, 4, 7)
#endif
#ifdef HAS_P410TOARGBROW_AVX2
ANY21CT(P410ToARGBRow_Any_AVX2, P410ToARGBRow_AVX2, 0, 0, uint16_t, 2, 4, 15)
#endif
#ifdef HAS_P410TOAR30ROW_AVX2
ANY21CT(P410ToAR30Row_Any_AVX2, P410ToAR30Row_AVX2, 0, 0, uint16_t, 2, 4, 15)
#endif
#undef ANY21CT
// Any 2 16 bit planes with parameter to 1
#define ANY21PT(NAMEANY, ANY_SIMD, T, BPP, MASK) \
void NAMEANY(const T* src_u, const T* src_v, T* dst_uv, int depth, \
int width) { \
SIMD_ALIGNED(T temp[16 * 4]); \
memset(temp, 0, 16 * 4 * BPP); /* for msan */ \
int r = width & MASK; \
int n = width & ~MASK; \
if (n > 0) { \
ANY_SIMD(src_u, src_v, dst_uv, depth, n); \
} \
memcpy(temp, src_u + n, r * BPP); \
memcpy(temp + 16, src_v + n, r * BPP); \
ANY_SIMD(temp, temp + 16, temp + 32, depth, MASK + 1); \
memcpy(dst_uv + n * 2, temp + 32, r * BPP * 2); \
}
#ifdef HAS_MERGEUVROW_16_AVX2
ANY21PT(MergeUVRow_16_Any_AVX2, MergeUVRow_16_AVX2, uint16_t, 2, 15)
#endif
#ifdef HAS_MERGEUVROW_16_NEON
ANY21PT(MergeUVRow_16_Any_NEON, MergeUVRow_16_NEON, uint16_t, 2, 7)
#endif
#undef ANY21CT
// Any 1 to 1.
#define ANY11(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, BPP, MASK) \
void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_ptr, int width) { \
@ -1009,6 +1357,72 @@ ANY11P(ARGBShuffleRow_Any_MMI, ARGBShuffleRow_MMI, const uint8_t*, 4, 4, 1)
#undef ANY11P
#undef ANY11P
// Any 1 to 1 with type
#define ANY11T(NAMEANY, ANY_SIMD, SBPP, BPP, STYPE, DTYPE, MASK) \
void NAMEANY(const STYPE* src_ptr, DTYPE* dst_ptr, int width) { \
SIMD_ALIGNED(uint8_t temp[(MASK + 1) * SBPP]); \
SIMD_ALIGNED(uint8_t out[(MASK + 1) * BPP]); \
memset(temp, 0, (MASK + 1) * SBPP); /* for msan */ \
int r = width & MASK; \
int n = width & ~MASK; \
if (n > 0) { \
ANY_SIMD(src_ptr, dst_ptr, n); \
} \
memcpy(temp, (uint8_t*)(src_ptr) + n * SBPP, r * SBPP); \
ANY_SIMD((STYPE*)temp, (DTYPE*)out, MASK + 1); \
memcpy((uint8_t*)(dst_ptr) + n * BPP, out, r * BPP); \
}
#ifdef HAS_ARGBTOAR64ROW_SSSE3
ANY11T(ARGBToAR64Row_Any_SSSE3, ARGBToAR64Row_SSSE3, 4, 8, uint8_t, uint16_t, 3)
#endif
#ifdef HAS_ARGBTOAB64ROW_SSSE3
ANY11T(ARGBToAB64Row_Any_SSSE3, ARGBToAB64Row_SSSE3, 4, 8, uint8_t, uint16_t, 3)
#endif
#ifdef HAS_AR64TOARGBROW_SSSE3
ANY11T(AR64ToARGBRow_Any_SSSE3, AR64ToARGBRow_SSSE3, 8, 4, uint16_t, uint8_t, 3)
#endif
#ifdef HAS_ARGBTOAR64ROW_SSSE3
ANY11T(AB64ToARGBRow_Any_SSSE3, AB64ToARGBRow_SSSE3, 8, 4, uint16_t, uint8_t, 3)
#endif
#ifdef HAS_ARGBTOAR64ROW_AVX2
ANY11T(ARGBToAR64Row_Any_AVX2, ARGBToAR64Row_AVX2, 4, 8, uint8_t, uint16_t, 7)
#endif
#ifdef HAS_ARGBTOAB64ROW_AVX2
ANY11T(ARGBToAB64Row_Any_AVX2, ARGBToAB64Row_AVX2, 4, 8, uint8_t, uint16_t, 7)
#endif
#ifdef HAS_AR64TOARGBROW_AVX2
ANY11T(AR64ToARGBRow_Any_AVX2, AR64ToARGBRow_AVX2, 8, 4, uint16_t, uint8_t, 7)
#endif
#ifdef HAS_ARGBTOAR64ROW_AVX2
ANY11T(AB64ToARGBRow_Any_AVX2, AB64ToARGBRow_AVX2, 8, 4, uint16_t, uint8_t, 7)
#endif
#ifdef HAS_ARGBTOAR64ROW_NEON
ANY11T(ARGBToAR64Row_Any_NEON, ARGBToAR64Row_NEON, 4, 8, uint8_t, uint16_t, 7)
#endif
#ifdef HAS_ARGBTOAB64ROW_NEON
ANY11T(ARGBToAB64Row_Any_NEON, ARGBToAB64Row_NEON, 4, 8, uint8_t, uint16_t, 7)
#endif
#ifdef HAS_AR64TOARGBROW_NEON
ANY11T(AR64ToARGBRow_Any_NEON, AR64ToARGBRow_NEON, 8, 4, uint16_t, uint8_t, 7)
#endif
#ifdef HAS_ARGBTOAR64ROW_NEON
ANY11T(AB64ToARGBRow_Any_NEON, AB64ToARGBRow_NEON, 8, 4, uint16_t, uint8_t, 7)
#endif
#undef ANY11T
// Any 1 to 1 with parameter and shorts. BPP measures in shorts.
#define ANY11C(NAMEANY, ANY_SIMD, SBPP, BPP, STYPE, DTYPE, MASK) \
void NAMEANY(const STYPE* src_ptr, DTYPE* dst_ptr, int scale, int width) { \
@ -1061,6 +1475,30 @@ ANY11C(Convert8To16Row_Any_AVX2,
uint16_t,
31)
#endif
#ifdef HAS_MULTIPLYROW_16_AVX2
ANY11C(MultiplyRow_16_Any_AVX2,
MultiplyRow_16_AVX2,
2,
2,
uint16_t,
uint16_t,
31)
#endif
#ifdef HAS_MULTIPLYROW_16_NEON
ANY11C(MultiplyRow_16_Any_NEON,
MultiplyRow_16_NEON,
2,
2,
uint16_t,
uint16_t,
15)
#endif
#ifdef HAS_DIVIDEROW_16_AVX2
ANY11C(DivideRow_16_Any_AVX2, DivideRow_16_AVX2, 2, 2, uint16_t, uint16_t, 31)
#endif
#ifdef HAS_DIVIDEROW_16_NEON
ANY11C(DivideRow_16_Any_NEON, DivideRow_16_NEON, 2, 2, uint16_t, uint16_t, 15)
#endif
#undef ANY11C
// Any 1 to 1 with parameter and shorts to byte. BPP measures in shorts.
@ -1151,38 +1589,38 @@ ANY11C(UYVYToARGBRow_Any_MMI, UYVYToARGBRow_MMI, 1, 4, 4, 7)
#undef ANY11C
// Any 1 to 1 interpolate. Takes 2 rows of source via stride.
#define ANY11T(NAMEANY, ANY_SIMD, SBPP, BPP, MASK) \
void NAMEANY(uint8_t* dst_ptr, const uint8_t* src_ptr, \
ptrdiff_t src_stride_ptr, int width, int source_y_fraction) { \
SIMD_ALIGNED(uint8_t temp[64 * 3]); \
memset(temp, 0, 64 * 2); /* for msan */ \
int r = width & MASK; \
int n = width & ~MASK; \
if (n > 0) { \
ANY_SIMD(dst_ptr, src_ptr, src_stride_ptr, n, source_y_fraction); \
} \
memcpy(temp, src_ptr + n * SBPP, r * SBPP); \
memcpy(temp + 64, src_ptr + src_stride_ptr + n * SBPP, r * SBPP); \
ANY_SIMD(temp + 128, temp, 64, MASK + 1, source_y_fraction); \
memcpy(dst_ptr + n * BPP, temp + 128, r * BPP); \
#define ANY11I(NAMEANY, ANY_SIMD, SBPP, BPP, MASK) \
void NAMEANY(uint8_t* dst_ptr, const uint8_t* src_ptr, ptrdiff_t src_stride, \
int width, int source_y_fraction) { \
SIMD_ALIGNED(uint8_t temp[64 * 3]); \
memset(temp, 0, 64 * 2); /* for msan */ \
int r = width & MASK; \
int n = width & ~MASK; \
if (n > 0) { \
ANY_SIMD(dst_ptr, src_ptr, src_stride, n, source_y_fraction); \
} \
memcpy(temp, src_ptr + n * SBPP, r * SBPP); \
memcpy(temp + 64, src_ptr + src_stride + n * SBPP, r * SBPP); \
ANY_SIMD(temp + 128, temp, 64, MASK + 1, source_y_fraction); \
memcpy(dst_ptr + n * BPP, temp + 128, r * BPP); \
}
#ifdef HAS_INTERPOLATEROW_AVX2
ANY11T(InterpolateRow_Any_AVX2, InterpolateRow_AVX2, 1, 1, 31)
ANY11I(InterpolateRow_Any_AVX2, InterpolateRow_AVX2, 1, 1, 31)
#endif
#ifdef HAS_INTERPOLATEROW_SSSE3
ANY11T(InterpolateRow_Any_SSSE3, InterpolateRow_SSSE3, 1, 1, 15)
ANY11I(InterpolateRow_Any_SSSE3, InterpolateRow_SSSE3, 1, 1, 15)
#endif
#ifdef HAS_INTERPOLATEROW_NEON
ANY11T(InterpolateRow_Any_NEON, InterpolateRow_NEON, 1, 1, 15)
ANY11I(InterpolateRow_Any_NEON, InterpolateRow_NEON, 1, 1, 15)
#endif
#ifdef HAS_INTERPOLATEROW_MSA
ANY11T(InterpolateRow_Any_MSA, InterpolateRow_MSA, 1, 1, 31)
ANY11I(InterpolateRow_Any_MSA, InterpolateRow_MSA, 1, 1, 31)
#endif
#ifdef HAS_INTERPOLATEROW_MMI
ANY11T(InterpolateRow_Any_MMI, InterpolateRow_MMI, 1, 1, 7)
ANY11I(InterpolateRow_Any_MMI, InterpolateRow_MMI, 1, 1, 7)
#endif
#undef ANY11T
#undef ANY11I
// Any 1 to 1 mirror.
#define ANY11M(NAMEANY, ANY_SIMD, BPP, MASK) \
@ -1340,6 +1778,32 @@ ANY12(YUY2ToUV422Row_Any_MMI, YUY2ToUV422Row_MMI, 1, 4, 1, 15)
#endif
#undef ANY12
// Any 2 16 bit planes with parameter to 1
#define ANY12PT(NAMEANY, ANY_SIMD, T, BPP, MASK) \
void NAMEANY(const T* src_uv, T* dst_u, T* dst_v, int depth, int width) { \
SIMD_ALIGNED(T temp[16 * 4]); \
memset(temp, 0, 16 * 4 * BPP); /* for msan */ \
int r = width & MASK; \
int n = width & ~MASK; \
if (n > 0) { \
ANY_SIMD(src_uv, dst_u, dst_v, depth, n); \
} \
memcpy(temp, src_uv + n * 2, r * BPP * 2); \
ANY_SIMD(temp, temp + 32, temp + 48, depth, MASK + 1); \
memcpy(dst_u + n, temp + 32, r * BPP); \
memcpy(dst_v + n, temp + 48, r * BPP); \
}
#ifdef HAS_SPLITUVROW_16_AVX2
ANY12PT(SplitUVRow_16_Any_AVX2, SplitUVRow_16_AVX2, uint16_t, 2, 15)
#endif
#ifdef HAS_SPLITUVROW_16_NEON
ANY12PT(SplitUVRow_16_Any_NEON, SplitUVRow_16_NEON, uint16_t, 2, 7)
#endif
#undef ANY21CT
// Any 1 to 3. Outputs RGB planes.
#define ANY13(NAMEANY, ANY_SIMD, BPP, MASK) \
void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_r, uint8_t* dst_g, \
@ -1367,21 +1831,66 @@ ANY13(SplitRGBRow_Any_NEON, SplitRGBRow_NEON, 3, 15)
#ifdef HAS_SPLITRGBROW_MMI
ANY13(SplitRGBRow_Any_MMI, SplitRGBRow_MMI, 3, 3)
#endif
#ifdef HAS_SPLITXRGBROW_SSE2
ANY13(SplitXRGBRow_Any_SSE2, SplitXRGBRow_SSE2, 4, 7)
#endif
#ifdef HAS_SPLITXRGBROW_SSSE3
ANY13(SplitXRGBRow_Any_SSSE3, SplitXRGBRow_SSSE3, 4, 7)
#endif
#ifdef HAS_SPLITXRGBROW_AVX2
ANY13(SplitXRGBRow_Any_AVX2, SplitXRGBRow_AVX2, 4, 15)
#endif
#ifdef HAS_SPLITXRGBROW_NEON
ANY13(SplitXRGBRow_Any_NEON, SplitXRGBRow_NEON, 4, 15)
#endif
// Any 1 to 4. Outputs ARGB planes.
#define ANY14(NAMEANY, ANY_SIMD, BPP, MASK) \
void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_r, uint8_t* dst_g, \
uint8_t* dst_b, uint8_t* dst_a, int width) { \
SIMD_ALIGNED(uint8_t temp[16 * 8]); \
memset(temp, 0, 16 * 4); /* for msan */ \
int r = width & MASK; \
int n = width & ~MASK; \
if (n > 0) { \
ANY_SIMD(src_ptr, dst_r, dst_g, dst_b, dst_a, n); \
} \
memcpy(temp, src_ptr + n * BPP, r * BPP); \
ANY_SIMD(temp, temp + 16 * 4, temp + 16 * 5, temp + 16 * 6, temp + 16 * 7, \
MASK + 1); \
memcpy(dst_r + n, temp + 16 * 4, r); \
memcpy(dst_g + n, temp + 16 * 5, r); \
memcpy(dst_b + n, temp + 16 * 6, r); \
memcpy(dst_a + n, temp + 16 * 7, r); \
}
#ifdef HAS_SPLITARGBROW_SSE2
ANY14(SplitARGBRow_Any_SSE2, SplitARGBRow_SSE2, 4, 7)
#endif
#ifdef HAS_SPLITARGBROW_SSSE3
ANY14(SplitARGBRow_Any_SSSE3, SplitARGBRow_SSSE3, 4, 7)
#endif
#ifdef HAS_SPLITARGBROW_AVX2
ANY14(SplitARGBRow_Any_AVX2, SplitARGBRow_AVX2, 4, 15)
#endif
#ifdef HAS_SPLITARGBROW_NEON
ANY14(SplitARGBRow_Any_NEON, SplitARGBRow_NEON, 4, 15)
#endif
// Any 1 to 2 with source stride (2 rows of source). Outputs UV planes.
// 128 byte row allows for 32 avx ARGB pixels.
#define ANY12S(NAMEANY, ANY_SIMD, UVSHIFT, BPP, MASK) \
void NAMEANY(const uint8_t* src_ptr, int src_stride_ptr, uint8_t* dst_u, \
void NAMEANY(const uint8_t* src_ptr, int src_stride, uint8_t* dst_u, \
uint8_t* dst_v, int width) { \
SIMD_ALIGNED(uint8_t temp[128 * 4]); \
memset(temp, 0, 128 * 2); /* for msan */ \
int r = width & MASK; \
int n = width & ~MASK; \
if (n > 0) { \
ANY_SIMD(src_ptr, src_stride_ptr, dst_u, dst_v, n); \
ANY_SIMD(src_ptr, src_stride, dst_u, dst_v, n); \
} \
memcpy(temp, src_ptr + (n >> UVSHIFT) * BPP, SS(r, UVSHIFT) * BPP); \
memcpy(temp + 128, src_ptr + src_stride_ptr + (n >> UVSHIFT) * BPP, \
memcpy(temp + 128, src_ptr + src_stride + (n >> UVSHIFT) * BPP, \
SS(r, UVSHIFT) * BPP); \
if ((width & 1) && UVSHIFT == 0) { /* repeat last pixel for subsample */ \
memcpy(temp + SS(r, UVSHIFT) * BPP, temp + SS(r, UVSHIFT) * BPP - BPP, \
@ -1528,17 +2037,17 @@ ANY12S(UYVYToUVRow_Any_MMI, UYVYToUVRow_MMI, 1, 4, 15)
// Any 1 to 1 with source stride (2 rows of source). Outputs UV plane.
// 128 byte row allows for 32 avx ARGB pixels.
#define ANY11S(NAMEANY, ANY_SIMD, UVSHIFT, BPP, MASK) \
void NAMEANY(const uint8_t* src_ptr, int src_stride_ptr, uint8_t* dst_vu, \
void NAMEANY(const uint8_t* src_ptr, int src_stride, uint8_t* dst_vu, \
int width) { \
SIMD_ALIGNED(uint8_t temp[128 * 3]); \
memset(temp, 0, 128 * 2); /* for msan */ \
int r = width & MASK; \
int n = width & ~MASK; \
if (n > 0) { \
ANY_SIMD(src_ptr, src_stride_ptr, dst_vu, n); \
ANY_SIMD(src_ptr, src_stride, dst_vu, n); \
} \
memcpy(temp, src_ptr + (n >> UVSHIFT) * BPP, SS(r, UVSHIFT) * BPP); \
memcpy(temp + 128, src_ptr + src_stride_ptr + (n >> UVSHIFT) * BPP, \
memcpy(temp + 128, src_ptr + src_stride + (n >> UVSHIFT) * BPP, \
SS(r, UVSHIFT) * BPP); \
if ((width & 1) && UVSHIFT == 0) { /* repeat last pixel for subsample */ \
memcpy(temp + SS(r, UVSHIFT) * BPP, temp + SS(r, UVSHIFT) * BPP - BPP, \

View File

@ -781,7 +781,7 @@ void UYVYToUV422Row_MSA(const uint8_t* src_uyvy,
}
}
void ARGBToYRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width) {
void ARGBToYRow_MSA(const uint8_t* src_argb, uint8_t* dst_y, int width) {
int x;
v16u8 src0, src1, src2, src3, vec0, vec1, vec2, vec3, dst0;
v8u16 reg0, reg1, reg2, reg3, reg4, reg5;
@ -792,10 +792,10 @@ void ARGBToYRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width) {
v8u16 const_0x1080 = (v8u16)__msa_fill_h(0x1080);
for (x = 0; x < width; x += 16) {
src0 = (v16u8)__msa_ld_b((v16u8*)src_argb0, 0);
src1 = (v16u8)__msa_ld_b((v16u8*)src_argb0, 16);
src2 = (v16u8)__msa_ld_b((v16u8*)src_argb0, 32);
src3 = (v16u8)__msa_ld_b((v16u8*)src_argb0, 48);
src0 = (v16u8)__msa_ld_b((v16u8*)src_argb, 0);
src1 = (v16u8)__msa_ld_b((v16u8*)src_argb, 16);
src2 = (v16u8)__msa_ld_b((v16u8*)src_argb, 32);
src3 = (v16u8)__msa_ld_b((v16u8*)src_argb, 48);
vec0 = (v16u8)__msa_pckev_b((v16i8)src1, (v16i8)src0);
vec1 = (v16u8)__msa_pckev_b((v16i8)src3, (v16i8)src2);
vec2 = (v16u8)__msa_pckod_b((v16i8)src1, (v16i8)src0);
@ -822,18 +822,18 @@ void ARGBToYRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width) {
reg1 = (v8u16)__msa_srai_h((v8i16)reg1, 8);
dst0 = (v16u8)__msa_pckev_b((v16i8)reg1, (v16i8)reg0);
ST_UB(dst0, dst_y);
src_argb0 += 64;
src_argb += 64;
dst_y += 16;
}
}
void ARGBToUVRow_MSA(const uint8_t* src_argb0,
void ARGBToUVRow_MSA(const uint8_t* src_argb,
int src_stride_argb,
uint8_t* dst_u,
uint8_t* dst_v,
int width) {
int x;
const uint8_t* src_argb0_next = src_argb0 + src_stride_argb;
const uint8_t* src_argb_next = src_argb + src_stride_argb;
v16u8 src0, src1, src2, src3, src4, src5, src6, src7;
v16u8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7, vec8, vec9;
v8u16 reg0, reg1, reg2, reg3, reg4, reg5, reg6, reg7, reg8, reg9;
@ -847,14 +847,14 @@ void ARGBToUVRow_MSA(const uint8_t* src_argb0,
v8u16 const_0x0001 = (v8u16)__msa_fill_h(0x0001);
for (x = 0; x < width; x += 32) {
src0 = (v16u8)__msa_ld_b((v16u8*)src_argb0, 0);
src1 = (v16u8)__msa_ld_b((v16u8*)src_argb0, 16);
src2 = (v16u8)__msa_ld_b((v16u8*)src_argb0, 32);
src3 = (v16u8)__msa_ld_b((v16u8*)src_argb0, 48);
src4 = (v16u8)__msa_ld_b((v16u8*)src_argb0, 64);
src5 = (v16u8)__msa_ld_b((v16u8*)src_argb0, 80);
src6 = (v16u8)__msa_ld_b((v16u8*)src_argb0, 96);
src7 = (v16u8)__msa_ld_b((v16u8*)src_argb0, 112);
src0 = (v16u8)__msa_ld_b((v16u8*)src_argb, 0);
src1 = (v16u8)__msa_ld_b((v16u8*)src_argb, 16);
src2 = (v16u8)__msa_ld_b((v16u8*)src_argb, 32);
src3 = (v16u8)__msa_ld_b((v16u8*)src_argb, 48);
src4 = (v16u8)__msa_ld_b((v16u8*)src_argb, 64);
src5 = (v16u8)__msa_ld_b((v16u8*)src_argb, 80);
src6 = (v16u8)__msa_ld_b((v16u8*)src_argb, 96);
src7 = (v16u8)__msa_ld_b((v16u8*)src_argb, 112);
vec0 = (v16u8)__msa_pckev_b((v16i8)src1, (v16i8)src0);
vec1 = (v16u8)__msa_pckev_b((v16i8)src3, (v16i8)src2);
vec2 = (v16u8)__msa_pckev_b((v16i8)src5, (v16i8)src4);
@ -875,14 +875,14 @@ void ARGBToUVRow_MSA(const uint8_t* src_argb0,
reg3 = __msa_hadd_u_h(vec5, vec5);
reg4 = __msa_hadd_u_h(vec0, vec0);
reg5 = __msa_hadd_u_h(vec1, vec1);
src0 = (v16u8)__msa_ld_b((v16u8*)src_argb0_next, 0);
src1 = (v16u8)__msa_ld_b((v16u8*)src_argb0_next, 16);
src2 = (v16u8)__msa_ld_b((v16u8*)src_argb0_next, 32);
src3 = (v16u8)__msa_ld_b((v16u8*)src_argb0_next, 48);
src4 = (v16u8)__msa_ld_b((v16u8*)src_argb0_next, 64);
src5 = (v16u8)__msa_ld_b((v16u8*)src_argb0_next, 80);
src6 = (v16u8)__msa_ld_b((v16u8*)src_argb0_next, 96);
src7 = (v16u8)__msa_ld_b((v16u8*)src_argb0_next, 112);
src0 = (v16u8)__msa_ld_b((v16u8*)src_argb_next, 0);
src1 = (v16u8)__msa_ld_b((v16u8*)src_argb_next, 16);
src2 = (v16u8)__msa_ld_b((v16u8*)src_argb_next, 32);
src3 = (v16u8)__msa_ld_b((v16u8*)src_argb_next, 48);
src4 = (v16u8)__msa_ld_b((v16u8*)src_argb_next, 64);
src5 = (v16u8)__msa_ld_b((v16u8*)src_argb_next, 80);
src6 = (v16u8)__msa_ld_b((v16u8*)src_argb_next, 96);
src7 = (v16u8)__msa_ld_b((v16u8*)src_argb_next, 112);
vec0 = (v16u8)__msa_pckev_b((v16i8)src1, (v16i8)src0);
vec1 = (v16u8)__msa_pckev_b((v16i8)src3, (v16i8)src2);
vec2 = (v16u8)__msa_pckev_b((v16i8)src5, (v16i8)src4);
@ -945,8 +945,8 @@ void ARGBToUVRow_MSA(const uint8_t* src_argb0,
dst1 = (v16u8)__msa_pckev_b((v16i8)reg5, (v16i8)reg4);
ST_UB(dst0, dst_u);
ST_UB(dst1, dst_v);
src_argb0 += 128;
src_argb0_next += 128;
src_argb += 128;
src_argb_next += 128;
dst_u += 16;
dst_v += 16;
}
@ -1173,7 +1173,7 @@ void ARGBToUV444Row_MSA(const uint8_t* src_argb,
}
}
void ARGBMultiplyRow_MSA(const uint8_t* src_argb0,
void ARGBMultiplyRow_MSA(const uint8_t* src_argb,
const uint8_t* src_argb1,
uint8_t* dst_argb,
int width) {
@ -1184,7 +1184,7 @@ void ARGBMultiplyRow_MSA(const uint8_t* src_argb0,
v8i16 zero = {0};
for (x = 0; x < width; x += 4) {
src0 = (v16u8)__msa_ld_b((void*)src_argb0, 0);
src0 = (v16u8)__msa_ld_b((void*)src_argb, 0);
src1 = (v16u8)__msa_ld_b((void*)src_argb1, 0);
vec0 = (v8u16)__msa_ilvr_b((v16i8)src0, (v16i8)src0);
vec1 = (v8u16)__msa_ilvl_b((v16i8)src0, (v16i8)src0);
@ -1206,13 +1206,13 @@ void ARGBMultiplyRow_MSA(const uint8_t* src_argb0,
vec1 = (v8u16)__msa_pckev_h((v8i16)reg3, (v8i16)reg2);
dst0 = (v16u8)__msa_pckev_b((v16i8)vec1, (v16i8)vec0);
ST_UB(dst0, dst_argb);
src_argb0 += 16;
src_argb += 16;
src_argb1 += 16;
dst_argb += 16;
}
}
void ARGBAddRow_MSA(const uint8_t* src_argb0,
void ARGBAddRow_MSA(const uint8_t* src_argb,
const uint8_t* src_argb1,
uint8_t* dst_argb,
int width) {
@ -1220,20 +1220,20 @@ void ARGBAddRow_MSA(const uint8_t* src_argb0,
v16u8 src0, src1, src2, src3, dst0, dst1;
for (x = 0; x < width; x += 8) {
src0 = (v16u8)__msa_ld_b((void*)src_argb0, 0);
src1 = (v16u8)__msa_ld_b((void*)src_argb0, 16);
src0 = (v16u8)__msa_ld_b((void*)src_argb, 0);
src1 = (v16u8)__msa_ld_b((void*)src_argb, 16);
src2 = (v16u8)__msa_ld_b((void*)src_argb1, 0);
src3 = (v16u8)__msa_ld_b((void*)src_argb1, 16);
dst0 = __msa_adds_u_b(src0, src2);
dst1 = __msa_adds_u_b(src1, src3);
ST_UB2(dst0, dst1, dst_argb, 16);
src_argb0 += 32;
src_argb += 32;
src_argb1 += 32;
dst_argb += 32;
}
}
void ARGBSubtractRow_MSA(const uint8_t* src_argb0,
void ARGBSubtractRow_MSA(const uint8_t* src_argb,
const uint8_t* src_argb1,
uint8_t* dst_argb,
int width) {
@ -1241,14 +1241,14 @@ void ARGBSubtractRow_MSA(const uint8_t* src_argb0,
v16u8 src0, src1, src2, src3, dst0, dst1;
for (x = 0; x < width; x += 8) {
src0 = (v16u8)__msa_ld_b((void*)src_argb0, 0);
src1 = (v16u8)__msa_ld_b((void*)src_argb0, 16);
src0 = (v16u8)__msa_ld_b((void*)src_argb, 0);
src1 = (v16u8)__msa_ld_b((void*)src_argb, 16);
src2 = (v16u8)__msa_ld_b((void*)src_argb1, 0);
src3 = (v16u8)__msa_ld_b((void*)src_argb1, 16);
dst0 = __msa_subs_u_b(src0, src2);
dst1 = __msa_subs_u_b(src1, src3);
ST_UB2(dst0, dst1, dst_argb, 16);
src_argb0 += 32;
src_argb += 32;
src_argb1 += 32;
dst_argb += 32;
}
@ -1794,7 +1794,7 @@ void RGB565ToYRow_MSA(const uint8_t* src_rgb565, uint8_t* dst_y, int width) {
}
}
void RGB24ToYRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width) {
void RGB24ToYRow_MSA(const uint8_t* src_argb, uint8_t* dst_y, int width) {
int x;
v16u8 src0, src1, src2, reg0, reg1, reg2, reg3, dst0;
v8u16 vec0, vec1, vec2, vec3;
@ -1809,9 +1809,9 @@ void RGB24ToYRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width) {
v16i8 zero = {0};
for (x = 0; x < width; x += 16) {
src0 = (v16u8)__msa_ld_b((void*)src_argb0, 0);
src1 = (v16u8)__msa_ld_b((void*)src_argb0, 16);
src2 = (v16u8)__msa_ld_b((void*)src_argb0, 32);
src0 = (v16u8)__msa_ld_b((void*)src_argb, 0);
src1 = (v16u8)__msa_ld_b((void*)src_argb, 16);
src2 = (v16u8)__msa_ld_b((void*)src_argb, 32);
reg0 = (v16u8)__msa_vshf_b(mask0, zero, (v16i8)src0);
reg1 = (v16u8)__msa_vshf_b(mask1, (v16i8)src1, (v16i8)src0);
reg2 = (v16u8)__msa_vshf_b(mask2, (v16i8)src2, (v16i8)src1);
@ -1830,12 +1830,12 @@ void RGB24ToYRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width) {
vec1 = (v8u16)__msa_srai_h((v8i16)vec1, 8);
dst0 = (v16u8)__msa_pckev_b((v16i8)vec1, (v16i8)vec0);
ST_UB(dst0, dst_y);
src_argb0 += 48;
src_argb += 48;
dst_y += 16;
}
}
void RAWToYRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width) {
void RAWToYRow_MSA(const uint8_t* src_argb, uint8_t* dst_y, int width) {
int x;
v16u8 src0, src1, src2, reg0, reg1, reg2, reg3, dst0;
v8u16 vec0, vec1, vec2, vec3;
@ -1850,9 +1850,9 @@ void RAWToYRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width) {
v16i8 zero = {0};
for (x = 0; x < width; x += 16) {
src0 = (v16u8)__msa_ld_b((void*)src_argb0, 0);
src1 = (v16u8)__msa_ld_b((void*)src_argb0, 16);
src2 = (v16u8)__msa_ld_b((void*)src_argb0, 32);
src0 = (v16u8)__msa_ld_b((void*)src_argb, 0);
src1 = (v16u8)__msa_ld_b((void*)src_argb, 16);
src2 = (v16u8)__msa_ld_b((void*)src_argb, 32);
reg0 = (v16u8)__msa_vshf_b(mask0, zero, (v16i8)src0);
reg1 = (v16u8)__msa_vshf_b(mask1, (v16i8)src1, (v16i8)src0);
reg2 = (v16u8)__msa_vshf_b(mask2, (v16i8)src2, (v16i8)src1);
@ -1871,7 +1871,7 @@ void RAWToYRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width) {
vec1 = (v8u16)__msa_srai_h((v8i16)vec1, 8);
dst0 = (v16u8)__msa_pckev_b((v16i8)vec1, (v16i8)vec0);
ST_UB(dst0, dst_y);
src_argb0 += 48;
src_argb += 48;
dst_y += 16;
}
}
@ -2037,14 +2037,14 @@ void RGB565ToUVRow_MSA(const uint8_t* src_rgb565,
}
}
void RGB24ToUVRow_MSA(const uint8_t* src_rgb0,
void RGB24ToUVRow_MSA(const uint8_t* src_rgb,
int src_stride_rgb,
uint8_t* dst_u,
uint8_t* dst_v,
int width) {
int x;
const uint8_t* s = src_rgb0;
const uint8_t* t = src_rgb0 + src_stride_rgb;
const uint8_t* s = src_rgb;
const uint8_t* t = src_rgb + src_stride_rgb;
int64_t res0, res1;
v16u8 src0, src1, src2, src3, src4, src5, src6, src7;
v16u8 inp0, inp1, inp2, inp3, inp4, inp5;
@ -2147,14 +2147,14 @@ void RGB24ToUVRow_MSA(const uint8_t* src_rgb0,
}
}
void RAWToUVRow_MSA(const uint8_t* src_rgb0,
void RAWToUVRow_MSA(const uint8_t* src_rgb,
int src_stride_rgb,
uint8_t* dst_u,
uint8_t* dst_v,
int width) {
int x;
const uint8_t* s = src_rgb0;
const uint8_t* t = src_rgb0 + src_stride_rgb;
const uint8_t* s = src_rgb;
const uint8_t* t = src_rgb + src_stride_rgb;
int64_t res0, res1;
v16u8 inp0, inp1, inp2, inp3, inp4, inp5;
v16u8 src0, src1, src2, src3, src4, src5, src6, src7;
@ -2446,7 +2446,7 @@ void SobelXYRow_MSA(const uint8_t* src_sobelx,
}
}
void ARGBToYJRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width) {
void ARGBToYJRow_MSA(const uint8_t* src_argb, uint8_t* dst_y, int width) {
int x;
v16u8 src0, src1, src2, src3, dst0;
v16u8 const_0x961D = (v16u8)__msa_fill_h(0x961D);
@ -2454,19 +2454,19 @@ void ARGBToYJRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width) {
v8u16 const_0x80 = (v8u16)__msa_fill_h(0x80);
for (x = 0; x < width; x += 16) {
src0 = (v16u8)__msa_ld_b((void*)src_argb0, 0);
src1 = (v16u8)__msa_ld_b((void*)src_argb0, 16);
src2 = (v16u8)__msa_ld_b((void*)src_argb0, 32);
src3 = (v16u8)__msa_ld_b((void*)src_argb0, 48);
src0 = (v16u8)__msa_ld_b((void*)src_argb, 0);
src1 = (v16u8)__msa_ld_b((void*)src_argb, 16);
src2 = (v16u8)__msa_ld_b((void*)src_argb, 32);
src3 = (v16u8)__msa_ld_b((void*)src_argb, 48);
ARGBTOY(src0, src1, src2, src3, const_0x961D, const_0x4D, const_0x80, 8,
dst0);
ST_UB(dst0, dst_y);
src_argb0 += 64;
src_argb += 64;
dst_y += 16;
}
}
void BGRAToYRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width) {
void BGRAToYRow_MSA(const uint8_t* src_argb, uint8_t* dst_y, int width) {
int x;
v16u8 src0, src1, src2, src3, dst0;
v16u8 const_0x4200 = (v16u8)__msa_fill_h(0x4200);
@ -2474,19 +2474,19 @@ void BGRAToYRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width) {
v8u16 const_0x1080 = (v8u16)__msa_fill_h(0x1080);
for (x = 0; x < width; x += 16) {
src0 = (v16u8)__msa_ld_b((void*)src_argb0, 0);
src1 = (v16u8)__msa_ld_b((void*)src_argb0, 16);
src2 = (v16u8)__msa_ld_b((void*)src_argb0, 32);
src3 = (v16u8)__msa_ld_b((void*)src_argb0, 48);
src0 = (v16u8)__msa_ld_b((void*)src_argb, 0);
src1 = (v16u8)__msa_ld_b((void*)src_argb, 16);
src2 = (v16u8)__msa_ld_b((void*)src_argb, 32);
src3 = (v16u8)__msa_ld_b((void*)src_argb, 48);
ARGBTOY(src0, src1, src2, src3, const_0x4200, const_0x1981, const_0x1080, 8,
dst0);
ST_UB(dst0, dst_y);
src_argb0 += 64;
src_argb += 64;
dst_y += 16;
}
}
void ABGRToYRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width) {
void ABGRToYRow_MSA(const uint8_t* src_argb, uint8_t* dst_y, int width) {
int x;
v16u8 src0, src1, src2, src3, dst0;
v16u8 const_0x8142 = (v16u8)__msa_fill_h(0x8142);
@ -2494,19 +2494,19 @@ void ABGRToYRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width) {
v8u16 const_0x1080 = (v8u16)__msa_fill_h(0x1080);
for (x = 0; x < width; x += 16) {
src0 = (v16u8)__msa_ld_b((void*)src_argb0, 0);
src1 = (v16u8)__msa_ld_b((void*)src_argb0, 16);
src2 = (v16u8)__msa_ld_b((void*)src_argb0, 32);
src3 = (v16u8)__msa_ld_b((void*)src_argb0, 48);
src0 = (v16u8)__msa_ld_b((void*)src_argb, 0);
src1 = (v16u8)__msa_ld_b((void*)src_argb, 16);
src2 = (v16u8)__msa_ld_b((void*)src_argb, 32);
src3 = (v16u8)__msa_ld_b((void*)src_argb, 48);
ARGBTOY(src0, src1, src2, src3, const_0x8142, const_0x19, const_0x1080, 8,
dst0);
ST_UB(dst0, dst_y);
src_argb0 += 64;
src_argb += 64;
dst_y += 16;
}
}
void RGBAToYRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width) {
void RGBAToYRow_MSA(const uint8_t* src_argb, uint8_t* dst_y, int width) {
int x;
v16u8 src0, src1, src2, src3, dst0;
v16u8 const_0x1900 = (v16u8)__msa_fill_h(0x1900);
@ -2514,26 +2514,26 @@ void RGBAToYRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width) {
v8u16 const_0x1080 = (v8u16)__msa_fill_h(0x1080);
for (x = 0; x < width; x += 16) {
src0 = (v16u8)__msa_ld_b((void*)src_argb0, 0);
src1 = (v16u8)__msa_ld_b((void*)src_argb0, 16);
src2 = (v16u8)__msa_ld_b((void*)src_argb0, 32);
src3 = (v16u8)__msa_ld_b((void*)src_argb0, 48);
src0 = (v16u8)__msa_ld_b((void*)src_argb, 0);
src1 = (v16u8)__msa_ld_b((void*)src_argb, 16);
src2 = (v16u8)__msa_ld_b((void*)src_argb, 32);
src3 = (v16u8)__msa_ld_b((void*)src_argb, 48);
ARGBTOY(src0, src1, src2, src3, const_0x1900, const_0x4281, const_0x1080, 8,
dst0);
ST_UB(dst0, dst_y);
src_argb0 += 64;
src_argb += 64;
dst_y += 16;
}
}
void ARGBToUVJRow_MSA(const uint8_t* src_rgb0,
void ARGBToUVJRow_MSA(const uint8_t* src_rgb,
int src_stride_rgb,
uint8_t* dst_u,
uint8_t* dst_v,
int width) {
int x;
const uint8_t* s = src_rgb0;
const uint8_t* t = src_rgb0 + src_stride_rgb;
const uint8_t* s = src_rgb;
const uint8_t* t = src_rgb + src_stride_rgb;
v8u16 src0, src1, src2, src3, src4, src5, src6, src7;
v8u16 vec0, vec1, vec2, vec3;
v8u16 dst0, dst1, dst2, dst3;
@ -2658,14 +2658,14 @@ void ARGBToUVJRow_MSA(const uint8_t* src_rgb0,
}
}
void BGRAToUVRow_MSA(const uint8_t* src_rgb0,
void BGRAToUVRow_MSA(const uint8_t* src_rgb,
int src_stride_rgb,
uint8_t* dst_u,
uint8_t* dst_v,
int width) {
int x;
const uint8_t* s = src_rgb0;
const uint8_t* t = src_rgb0 + src_stride_rgb;
const uint8_t* s = src_rgb;
const uint8_t* t = src_rgb + src_stride_rgb;
const uint8_t unused = 0xf;
v8u16 src0, src1, src2, src3;
v16u8 dst0, dst1;
@ -2693,14 +2693,14 @@ void BGRAToUVRow_MSA(const uint8_t* src_rgb0,
}
}
void ABGRToUVRow_MSA(const uint8_t* src_rgb0,
void ABGRToUVRow_MSA(const uint8_t* src_rgb,
int src_stride_rgb,
uint8_t* dst_u,
uint8_t* dst_v,
int width) {
int x;
const uint8_t* s = src_rgb0;
const uint8_t* t = src_rgb0 + src_stride_rgb;
const uint8_t* s = src_rgb;
const uint8_t* t = src_rgb + src_stride_rgb;
const uint8_t unused = 0xf;
v8u16 src0, src1, src2, src3;
v16u8 dst0, dst1;
@ -2728,14 +2728,14 @@ void ABGRToUVRow_MSA(const uint8_t* src_rgb0,
}
}
void RGBAToUVRow_MSA(const uint8_t* src_rgb0,
void RGBAToUVRow_MSA(const uint8_t* src_rgb,
int src_stride_rgb,
uint8_t* dst_u,
uint8_t* dst_v,
int width) {
int x;
const uint8_t* s = src_rgb0;
const uint8_t* t = src_rgb0 + src_stride_rgb;
const uint8_t* s = src_rgb;
const uint8_t* t = src_rgb + src_stride_rgb;
const uint8_t unused = 0xf;
v8u16 src0, src1, src2, src3;
v16u8 dst0, dst1;
@ -3109,7 +3109,7 @@ void ARGBExtractAlphaRow_MSA(const uint8_t* src_argb,
}
}
void ARGBBlendRow_MSA(const uint8_t* src_argb0,
void ARGBBlendRow_MSA(const uint8_t* src_argb,
const uint8_t* src_argb1,
uint8_t* dst_argb,
int width) {
@ -3123,8 +3123,8 @@ void ARGBBlendRow_MSA(const uint8_t* src_argb0,
v16i8 zero = {0};
for (x = 0; x < width; x += 8) {
src0 = (v16u8)__msa_ld_b((void*)src_argb0, 0);
src1 = (v16u8)__msa_ld_b((void*)src_argb0, 16);
src0 = (v16u8)__msa_ld_b((void*)src_argb, 0);
src1 = (v16u8)__msa_ld_b((void*)src_argb, 16);
src2 = (v16u8)__msa_ld_b((void*)src_argb1, 0);
src3 = (v16u8)__msa_ld_b((void*)src_argb1, 16);
vec0 = (v8u16)__msa_ilvr_b(zero, (v16i8)src0);
@ -3168,7 +3168,7 @@ void ARGBBlendRow_MSA(const uint8_t* src_argb0,
dst0 = __msa_bmnz_v(dst0, const_255, mask);
dst1 = __msa_bmnz_v(dst1, const_255, mask);
ST_UB2(dst0, dst1, dst_argb, 16);
src_argb0 += 32;
src_argb += 32;
src_argb1 += 32;
dst_argb += 32;
}

View File

@ -1336,6 +1336,327 @@ void ScalePlaneBilinearUp(int src_width,
}
}
// Scale plane, horizontally up by 2 times.
// Uses linear filter horizontally, nearest vertically.
// This is an optimized version for scaling up a plane to 2 times of
// its original width, using linear interpolation.
// This is used to scale U and V planes of I422 to I444.
void ScalePlaneUp2_Linear(int src_width,
int src_height,
int dst_width,
int dst_height,
int src_stride,
int dst_stride,
const uint8_t* src_ptr,
uint8_t* dst_ptr) {
void (*ScaleRowUp)(const uint8_t* src_ptr, uint8_t* dst_ptr, int dst_width) =
ScaleRowUp2_Linear_Any_C;
int i;
int y;
int dy;
// This function can only scale up by 2 times horizontally.
assert(src_width == ((dst_width + 1) / 2));
#ifdef HAS_SCALEROWUP2LINEAR_SSE2
if (TestCpuFlag(kCpuHasSSE2)) {
ScaleRowUp = ScaleRowUp2_Linear_Any_SSE2;
}
#endif
#ifdef HAS_SCALEROWUP2LINEAR_SSSE3
if (TestCpuFlag(kCpuHasSSSE3)) {
ScaleRowUp = ScaleRowUp2_Linear_Any_SSSE3;
}
#endif
#ifdef HAS_SCALEROWUP2LINEAR_AVX2
if (TestCpuFlag(kCpuHasAVX2)) {
ScaleRowUp = ScaleRowUp2_Linear_Any_AVX2;
}
#endif
#ifdef HAS_SCALEROWUP2LINEAR_NEON
if (TestCpuFlag(kCpuHasNEON)) {
ScaleRowUp = ScaleRowUp2_Linear_Any_NEON;
}
#endif
if (dst_height == 1) {
ScaleRowUp(src_ptr + ((src_height - 1) / 2) * src_stride, dst_ptr,
dst_width);
} else {
dy = FixedDiv(src_height - 1, dst_height - 1);
y = (1 << 15) - 1;
for (i = 0; i < dst_height; ++i) {
ScaleRowUp(src_ptr + (y >> 16) * src_stride, dst_ptr, dst_width);
dst_ptr += dst_stride;
y += dy;
}
}
}
// Scale plane, up by 2 times.
// This is an optimized version for scaling up a plane to 2 times of
// its original size, using bilinear interpolation.
// This is used to scale U and V planes of I420 to I444.
void ScalePlaneUp2_Bilinear(int src_width,
int src_height,
int dst_width,
int dst_height,
int src_stride,
int dst_stride,
const uint8_t* src_ptr,
uint8_t* dst_ptr) {
void (*Scale2RowUp)(const uint8_t* src_ptr, ptrdiff_t src_stride,
uint8_t* dst_ptr, ptrdiff_t dst_stride, int dst_width) =
ScaleRowUp2_Bilinear_Any_C;
int x;
// This function can only scale up by 2 times.
assert(src_width == ((dst_width + 1) / 2));
assert(src_height == ((dst_height + 1) / 2));
#ifdef HAS_SCALEROWUP2BILINEAR_SSE2
if (TestCpuFlag(kCpuHasSSE2)) {
Scale2RowUp = ScaleRowUp2_Bilinear_Any_SSE2;
}
#endif
#ifdef HAS_SCALEROWUP2BILINEAR_SSSE3
if (TestCpuFlag(kCpuHasSSSE3)) {
Scale2RowUp = ScaleRowUp2_Bilinear_Any_SSSE3;
}
#endif
#ifdef HAS_SCALEROWUP2BILINEAR_AVX2
if (TestCpuFlag(kCpuHasAVX2)) {
Scale2RowUp = ScaleRowUp2_Bilinear_Any_AVX2;
}
#endif
#ifdef HAS_SCALEROWUP2BILINEAR_NEON
if (TestCpuFlag(kCpuHasNEON)) {
Scale2RowUp = ScaleRowUp2_Bilinear_Any_NEON;
}
#endif
Scale2RowUp(src_ptr, 0, dst_ptr, 0, dst_width);
dst_ptr += dst_stride;
for (x = 0; x < src_height - 1; ++x) {
Scale2RowUp(src_ptr, src_stride, dst_ptr, dst_stride, dst_width);
src_ptr += src_stride;
// TODO(fbarchard): Test performance of writing one row of destination at a
// time.
dst_ptr += 2 * dst_stride;
}
if (!(dst_height & 1)) {
Scale2RowUp(src_ptr, 0, dst_ptr, 0, dst_width);
}
}
// Scale at most 14 bit plane, horizontally up by 2 times.
// This is an optimized version for scaling up a plane to 2 times of
// its original width, using linear interpolation.
// stride is in count of uint16_t.
// This is used to scale U and V planes of I210 to I410 and I212 to I412.
void ScalePlaneUp2_12_Linear(int src_width,
int src_height,
int dst_width,
int dst_height,
int src_stride,
int dst_stride,
const uint16_t* src_ptr,
uint16_t* dst_ptr) {
void (*ScaleRowUp)(const uint16_t* src_ptr, uint16_t* dst_ptr,
int dst_width) = ScaleRowUp2_Linear_16_Any_C;
int i;
int y;
int dy;
// This function can only scale up by 2 times horizontally.
assert(src_width == ((dst_width + 1) / 2));
#ifdef HAS_SCALEROWUP2LINEAR_12_SSSE3
if (TestCpuFlag(kCpuHasSSSE3)) {
ScaleRowUp = ScaleRowUp2_Linear_12_Any_SSSE3;
}
#endif
#ifdef HAS_SCALEROWUP2LINEAR_12_AVX2
if (TestCpuFlag(kCpuHasAVX2)) {
ScaleRowUp = ScaleRowUp2_Linear_12_Any_AVX2;
}
#endif
#ifdef HAS_SCALEROWUP2LINEAR_12_NEON
if (TestCpuFlag(kCpuHasNEON)) {
ScaleRowUp = ScaleRowUp2_Linear_12_Any_NEON;
}
#endif
if (dst_height == 1) {
ScaleRowUp(src_ptr + ((src_height - 1) / 2) * src_stride, dst_ptr,
dst_width);
} else {
dy = FixedDiv(src_height - 1, dst_height - 1);
y = (1 << 15) - 1;
for (i = 0; i < dst_height; ++i) {
ScaleRowUp(src_ptr + (y >> 16) * src_stride, dst_ptr, dst_width);
dst_ptr += dst_stride;
y += dy;
}
}
}
// Scale at most 12 bit plane, up by 2 times.
// This is an optimized version for scaling up a plane to 2 times of
// its original size, using bilinear interpolation.
// stride is in count of uint16_t.
// This is used to scale U and V planes of I010 to I410 and I012 to I412.
void ScalePlaneUp2_12_Bilinear(int src_width,
int src_height,
int dst_width,
int dst_height,
int src_stride,
int dst_stride,
const uint16_t* src_ptr,
uint16_t* dst_ptr) {
void (*Scale2RowUp)(const uint16_t* src_ptr, ptrdiff_t src_stride,
uint16_t* dst_ptr, ptrdiff_t dst_stride, int dst_width) =
ScaleRowUp2_Bilinear_16_Any_C;
int x;
// This function can only scale up by 2 times.
assert(src_width == ((dst_width + 1) / 2));
assert(src_height == ((dst_height + 1) / 2));
#ifdef HAS_SCALEROWUP2BILINEAR_12_SSSE3
if (TestCpuFlag(kCpuHasSSSE3)) {
Scale2RowUp = ScaleRowUp2_Bilinear_12_Any_SSSE3;
}
#endif
#ifdef HAS_SCALEROWUP2BILINEAR_12_AVX2
if (TestCpuFlag(kCpuHasAVX2)) {
Scale2RowUp = ScaleRowUp2_Bilinear_12_Any_AVX2;
}
#endif
#ifdef HAS_SCALEROWUP2BILINEAR_12_NEON
if (TestCpuFlag(kCpuHasNEON)) {
Scale2RowUp = ScaleRowUp2_Bilinear_12_Any_NEON;
}
#endif
Scale2RowUp(src_ptr, 0, dst_ptr, 0, dst_width);
dst_ptr += dst_stride;
for (x = 0; x < src_height - 1; ++x) {
Scale2RowUp(src_ptr, src_stride, dst_ptr, dst_stride, dst_width);
src_ptr += src_stride;
dst_ptr += 2 * dst_stride;
}
if (!(dst_height & 1)) {
Scale2RowUp(src_ptr, 0, dst_ptr, 0, dst_width);
}
}
void ScalePlaneUp2_16_Linear(int src_width,
int src_height,
int dst_width,
int dst_height,
int src_stride,
int dst_stride,
const uint16_t* src_ptr,
uint16_t* dst_ptr) {
void (*ScaleRowUp)(const uint16_t* src_ptr, uint16_t* dst_ptr,
int dst_width) = ScaleRowUp2_Linear_16_Any_C;
int i;
int y;
int dy;
// This function can only scale up by 2 times horizontally.
assert(src_width == ((dst_width + 1) / 2));
#ifdef HAS_SCALEROWUP2LINEAR_16_SSE2
if (TestCpuFlag(kCpuHasSSE2)) {
ScaleRowUp = ScaleRowUp2_Linear_16_Any_SSE2;
}
#endif
#ifdef HAS_SCALEROWUP2LINEAR_16_AVX2
if (TestCpuFlag(kCpuHasAVX2)) {
ScaleRowUp = ScaleRowUp2_Linear_16_Any_AVX2;
}
#endif
#ifdef HAS_SCALEROWUP2LINEAR_16_NEON
if (TestCpuFlag(kCpuHasNEON)) {
ScaleRowUp = ScaleRowUp2_Linear_16_Any_NEON;
}
#endif
if (dst_height == 1) {
ScaleRowUp(src_ptr + ((src_height - 1) / 2) * src_stride, dst_ptr,
dst_width);
} else {
dy = FixedDiv(src_height - 1, dst_height - 1);
y = (1 << 15) - 1;
for (i = 0; i < dst_height; ++i) {
ScaleRowUp(src_ptr + (y >> 16) * src_stride, dst_ptr, dst_width);
dst_ptr += dst_stride;
y += dy;
}
}
}
void ScalePlaneUp2_16_Bilinear(int src_width,
int src_height,
int dst_width,
int dst_height,
int src_stride,
int dst_stride,
const uint16_t* src_ptr,
uint16_t* dst_ptr) {
void (*Scale2RowUp)(const uint16_t* src_ptr, ptrdiff_t src_stride,
uint16_t* dst_ptr, ptrdiff_t dst_stride, int dst_width) =
ScaleRowUp2_Bilinear_16_Any_C;
int x;
// This function can only scale up by 2 times.
assert(src_width == ((dst_width + 1) / 2));
assert(src_height == ((dst_height + 1) / 2));
#ifdef HAS_SCALEROWUP2BILINEAR_16_SSE2
if (TestCpuFlag(kCpuHasSSSE3)) {
Scale2RowUp = ScaleRowUp2_Bilinear_16_Any_SSSE3;
}
#endif
#ifdef HAS_SCALEROWUP2BILINEAR_16_AVX2
if (TestCpuFlag(kCpuHasAVX2)) {
Scale2RowUp = ScaleRowUp2_Bilinear_16_Any_AVX2;
}
#endif
#ifdef HAS_SCALEROWUP2BILINEAR_16_NEON
if (TestCpuFlag(kCpuHasNEON)) {
Scale2RowUp = ScaleRowUp2_Bilinear_16_Any_NEON;
}
#endif
Scale2RowUp(src_ptr, 0, dst_ptr, 0, dst_width);
dst_ptr += dst_stride;
for (x = 0; x < src_height - 1; ++x) {
Scale2RowUp(src_ptr, src_stride, dst_ptr, dst_stride, dst_width);
src_ptr += src_stride;
dst_ptr += 2 * dst_stride;
}
if (!(dst_height & 1)) {
Scale2RowUp(src_ptr, 0, dst_ptr, 0, dst_width);
}
}
void ScalePlaneBilinearUp_16(int src_width,
int src_height,
int dst_width,
@ -1627,6 +1948,17 @@ void ScalePlane(const uint8_t* src,
dst_stride, src, dst);
return;
}
if ((dst_width + 1) / 2 == src_width && filtering == kFilterLinear) {
ScalePlaneUp2_Linear(src_width, src_height, dst_width, dst_height,
src_stride, dst_stride, src, dst);
return;
}
if ((dst_height + 1) / 2 == src_height && (dst_width + 1) / 2 == src_width &&
(filtering == kFilterBilinear || filtering == kFilterBox)) {
ScalePlaneUp2_Bilinear(src_width, src_height, dst_width, dst_height,
src_stride, dst_stride, src, dst);
return;
}
if (filtering && dst_height > src_height) {
ScalePlaneBilinearUp(src_width, src_height, dst_width, dst_height,
src_stride, dst_stride, src, dst, filtering);
@ -1710,6 +2042,17 @@ void ScalePlane_16(const uint16_t* src,
dst_stride, src, dst);
return;
}
if ((dst_width + 1) / 2 == src_width && filtering == kFilterLinear) {
ScalePlaneUp2_16_Linear(src_width, src_height, dst_width, dst_height,
src_stride, dst_stride, src, dst);
return;
}
if ((dst_height + 1) / 2 == src_height && (dst_width + 1) / 2 == src_width &&
(filtering == kFilterBilinear || filtering == kFilterBox)) {
ScalePlaneUp2_16_Bilinear(src_width, src_height, dst_width, dst_height,
src_stride, dst_stride, src, dst);
return;
}
if (filtering && dst_height > src_height) {
ScalePlaneBilinearUp_16(src_width, src_height, dst_width, dst_height,
src_stride, dst_stride, src, dst, filtering);
@ -1724,6 +2067,43 @@ void ScalePlane_16(const uint16_t* src,
dst_stride, src, dst);
}
LIBYUV_API
void ScalePlane_12(const uint16_t* src,
int src_stride,
int src_width,
int src_height,
uint16_t* dst,
int dst_stride,
int dst_width,
int dst_height,
enum FilterMode filtering) {
// Simplify filtering when possible.
filtering = ScaleFilterReduce(src_width, src_height, dst_width, dst_height,
filtering);
// Negative height means invert the image.
if (src_height < 0) {
src_height = -src_height;
src = src + (src_height - 1) * src_stride;
src_stride = -src_stride;
}
if ((dst_width + 1) / 2 == src_width && filtering == kFilterLinear) {
ScalePlaneUp2_12_Linear(src_width, src_height, dst_width, dst_height,
src_stride, dst_stride, src, dst);
return;
}
if ((dst_height + 1) / 2 == src_height && (dst_width + 1) / 2 == src_width &&
(filtering == kFilterBilinear || filtering == kFilterBox)) {
ScalePlaneUp2_12_Bilinear(src_width, src_height, dst_width, dst_height,
src_stride, dst_stride, src, dst);
return;
}
ScalePlane_16(src, src_stride, src_width, src_height, dst, dst_stride,
dst_width, dst_height, filtering);
}
// Scale an I420 image.
// This function in turn calls a scaling function for each plane.
@ -1749,7 +2129,7 @@ int I420Scale(const uint8_t* src_y,
int src_halfheight = SUBSAMPLE(src_height, 1, 1);
int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1);
int dst_halfheight = SUBSAMPLE(dst_height, 1, 1);
if (!src_y || !src_u || !src_v || src_width == 0 || src_height == 0 ||
if (!src_y || !src_u || !src_v || src_width <= 0 || src_height == 0 ||
src_width > 32768 || src_height > 32768 || !dst_y || !dst_u || !dst_v ||
dst_width <= 0 || dst_height <= 0) {
return -1;
@ -1786,7 +2166,7 @@ int I420Scale_16(const uint16_t* src_y,
int src_halfheight = SUBSAMPLE(src_height, 1, 1);
int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1);
int dst_halfheight = SUBSAMPLE(dst_height, 1, 1);
if (!src_y || !src_u || !src_v || src_width == 0 || src_height == 0 ||
if (!src_y || !src_u || !src_v || src_width <= 0 || src_height == 0 ||
src_width > 32768 || src_height > 32768 || !dst_y || !dst_u || !dst_v ||
dst_width <= 0 || dst_height <= 0) {
return -1;
@ -1801,6 +2181,43 @@ int I420Scale_16(const uint16_t* src_y,
return 0;
}
LIBYUV_API
int I420Scale_12(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_u,
int src_stride_u,
const uint16_t* src_v,
int src_stride_v,
int src_width,
int src_height,
uint16_t* dst_y,
int dst_stride_y,
uint16_t* dst_u,
int dst_stride_u,
uint16_t* dst_v,
int dst_stride_v,
int dst_width,
int dst_height,
enum FilterMode filtering) {
int src_halfwidth = SUBSAMPLE(src_width, 1, 1);
int src_halfheight = SUBSAMPLE(src_height, 1, 1);
int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1);
int dst_halfheight = SUBSAMPLE(dst_height, 1, 1);
if (!src_y || !src_u || !src_v || src_width <= 0 || src_height == 0 ||
src_width > 32768 || src_height > 32768 || !dst_y || !dst_u || !dst_v ||
dst_width <= 0 || dst_height <= 0) {
return -1;
}
ScalePlane_12(src_y, src_stride_y, src_width, src_height, dst_y, dst_stride_y,
dst_width, dst_height, filtering);
ScalePlane_12(src_u, src_stride_u, src_halfwidth, src_halfheight, dst_u,
dst_stride_u, dst_halfwidth, dst_halfheight, filtering);
ScalePlane_12(src_v, src_stride_v, src_halfwidth, src_halfheight, dst_v,
dst_stride_v, dst_halfwidth, dst_halfheight, filtering);
return 0;
}
// Scale an I444 image.
// This function in turn calls a scaling function for each plane.
@ -1822,7 +2239,7 @@ int I444Scale(const uint8_t* src_y,
int dst_width,
int dst_height,
enum FilterMode filtering) {
if (!src_y || !src_u || !src_v || src_width == 0 || src_height == 0 ||
if (!src_y || !src_u || !src_v || src_width <= 0 || src_height == 0 ||
src_width > 32768 || src_height > 32768 || !dst_y || !dst_u || !dst_v ||
dst_width <= 0 || dst_height <= 0) {
return -1;
@ -1855,7 +2272,7 @@ int I444Scale_16(const uint16_t* src_y,
int dst_width,
int dst_height,
enum FilterMode filtering) {
if (!src_y || !src_u || !src_v || src_width == 0 || src_height == 0 ||
if (!src_y || !src_u || !src_v || src_width <= 0 || src_height == 0 ||
src_width > 32768 || src_height > 32768 || !dst_y || !dst_u || !dst_v ||
dst_width <= 0 || dst_height <= 0) {
return -1;
@ -1870,6 +2287,39 @@ int I444Scale_16(const uint16_t* src_y,
return 0;
}
LIBYUV_API
int I444Scale_12(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_u,
int src_stride_u,
const uint16_t* src_v,
int src_stride_v,
int src_width,
int src_height,
uint16_t* dst_y,
int dst_stride_y,
uint16_t* dst_u,
int dst_stride_u,
uint16_t* dst_v,
int dst_stride_v,
int dst_width,
int dst_height,
enum FilterMode filtering) {
if (!src_y || !src_u || !src_v || src_width <= 0 || src_height == 0 ||
src_width > 32768 || src_height > 32768 || !dst_y || !dst_u || !dst_v ||
dst_width <= 0 || dst_height <= 0) {
return -1;
}
ScalePlane_12(src_y, src_stride_y, src_width, src_height, dst_y, dst_stride_y,
dst_width, dst_height, filtering);
ScalePlane_12(src_u, src_stride_u, src_width, src_height, dst_u, dst_stride_u,
dst_width, dst_height, filtering);
ScalePlane_12(src_v, src_stride_v, src_width, src_height, dst_v, dst_stride_v,
dst_width, dst_height, filtering);
return 0;
}
// Scale an NV12 image.
// This function in turn calls a scaling function for each plane.
@ -1891,7 +2341,7 @@ int NV12Scale(const uint8_t* src_y,
int src_halfheight = SUBSAMPLE(src_height, 1, 1);
int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1);
int dst_halfheight = SUBSAMPLE(dst_height, 1, 1);
if (!src_y || !src_uv || src_width == 0 || src_height == 0 ||
if (!src_y || !src_uv || src_width <= 0 || src_height == 0 ||
src_width > 32768 || src_height > 32768 || !dst_y || !dst_uv ||
dst_width <= 0 || dst_height <= 0) {
return -1;

View File

@ -609,6 +609,417 @@ CANY(ScaleARGBFilterCols_Any_MSA,
#endif
#undef CANY
// Scale up horizontally 2 times using linear filter.
#define SUH2LANY(NAME, SIMD, C, MASK, PTYPE) \
void NAME(const PTYPE* src_ptr, PTYPE* dst_ptr, int dst_width) { \
int work_width = (dst_width - 1) & ~1; \
int r = work_width & MASK; \
int n = work_width & ~MASK; \
dst_ptr[0] = src_ptr[0]; \
if (work_width > 0) { \
if (n != 0) { \
SIMD(src_ptr, dst_ptr + 1, n); \
} \
C(src_ptr + (n / 2), dst_ptr + n + 1, r); \
} \
dst_ptr[dst_width - 1] = src_ptr[(dst_width / 2) - 1]; \
}
// Even the C versions need to be wrapped, because boundary pixels have to
// be handled differently
SUH2LANY(ScaleRowUp2_Linear_Any_C,
ScaleRowUp2_Linear_C,
ScaleRowUp2_Linear_C,
0,
uint8_t)
SUH2LANY(ScaleRowUp2_Linear_16_Any_C,
ScaleRowUp2_Linear_16_C,
ScaleRowUp2_Linear_16_C,
0,
uint16_t)
#ifdef HAS_SCALEROWUP2LINEAR_SSE2
SUH2LANY(ScaleRowUp2_Linear_Any_SSE2,
ScaleRowUp2_Linear_SSE2,
ScaleRowUp2_Linear_C,
15,
uint8_t)
#endif
#ifdef HAS_SCALEROWUP2LINEAR_SSSE3
SUH2LANY(ScaleRowUp2_Linear_Any_SSSE3,
ScaleRowUp2_Linear_SSSE3,
ScaleRowUp2_Linear_C,
15,
uint8_t)
#endif
#ifdef HAS_SCALEROWUP2LINEAR_12_SSSE3
SUH2LANY(ScaleRowUp2_Linear_12_Any_SSSE3,
ScaleRowUp2_Linear_12_SSSE3,
ScaleRowUp2_Linear_16_C,
15,
uint16_t)
#endif
#ifdef HAS_SCALEROWUP2LINEAR_16_SSE2
SUH2LANY(ScaleRowUp2_Linear_16_Any_SSE2,
ScaleRowUp2_Linear_16_SSE2,
ScaleRowUp2_Linear_16_C,
7,
uint16_t)
#endif
#ifdef HAS_SCALEROWUP2LINEAR_AVX2
SUH2LANY(ScaleRowUp2_Linear_Any_AVX2,
ScaleRowUp2_Linear_AVX2,
ScaleRowUp2_Linear_C,
31,
uint8_t)
#endif
#ifdef HAS_SCALEROWUP2LINEAR_12_AVX2
SUH2LANY(ScaleRowUp2_Linear_12_Any_AVX2,
ScaleRowUp2_Linear_12_AVX2,
ScaleRowUp2_Linear_16_C,
31,
uint16_t)
#endif
#ifdef HAS_SCALEROWUP2LINEAR_16_AVX2
SUH2LANY(ScaleRowUp2_Linear_16_Any_AVX2,
ScaleRowUp2_Linear_16_AVX2,
ScaleRowUp2_Linear_16_C,
15,
uint16_t)
#endif
#ifdef HAS_SCALEROWUP2LINEAR_NEON
SUH2LANY(ScaleRowUp2_Linear_Any_NEON,
ScaleRowUp2_Linear_NEON,
ScaleRowUp2_Linear_C,
15,
uint8_t)
#endif
#ifdef HAS_SCALEROWUP2LINEAR_12_NEON
SUH2LANY(ScaleRowUp2_Linear_12_Any_NEON,
ScaleRowUp2_Linear_12_NEON,
ScaleRowUp2_Linear_16_C,
15,
uint16_t)
#endif
#ifdef HAS_SCALEROWUP2LINEAR_16_NEON
SUH2LANY(ScaleRowUp2_Linear_16_Any_NEON,
ScaleRowUp2_Linear_16_NEON,
ScaleRowUp2_Linear_16_C,
15,
uint16_t)
#endif
#undef SUH2LANY
// Scale up 2 times using bilinear filter.
// This function produces 2 rows at a time.
#define SU2BLANY(NAME, SIMD, C, MASK, PTYPE) \
void NAME(const PTYPE* src_ptr, ptrdiff_t src_stride, PTYPE* dst_ptr, \
ptrdiff_t dst_stride, int dst_width) { \
int work_width = (dst_width - 1) & ~1; \
int r = work_width & MASK; \
int n = work_width & ~MASK; \
const PTYPE* sa = src_ptr; \
const PTYPE* sb = src_ptr + src_stride; \
PTYPE* da = dst_ptr; \
PTYPE* db = dst_ptr + dst_stride; \
da[0] = (3 * sa[0] + sb[0] + 2) >> 2; \
db[0] = (sa[0] + 3 * sb[0] + 2) >> 2; \
if (work_width > 0) { \
if (n != 0) { \
SIMD(sa, sb - sa, da + 1, db - da, n); \
} \
C(sa + (n / 2), sb - sa, da + n + 1, db - da, r); \
} \
da[dst_width - 1] = \
(3 * sa[(dst_width - 1) / 2] + sb[(dst_width - 1) / 2] + 2) >> 2; \
db[dst_width - 1] = \
(sa[(dst_width - 1) / 2] + 3 * sb[(dst_width - 1) / 2] + 2) >> 2; \
}
SU2BLANY(ScaleRowUp2_Bilinear_Any_C,
ScaleRowUp2_Bilinear_C,
ScaleRowUp2_Bilinear_C,
0,
uint8_t)
SU2BLANY(ScaleRowUp2_Bilinear_16_Any_C,
ScaleRowUp2_Bilinear_16_C,
ScaleRowUp2_Bilinear_16_C,
0,
uint16_t)
#ifdef HAS_SCALEROWUP2BILINEAR_SSE2
SU2BLANY(ScaleRowUp2_Bilinear_Any_SSE2,
ScaleRowUp2_Bilinear_SSE2,
ScaleRowUp2_Bilinear_C,
15,
uint8_t)
#endif
#ifdef HAS_SCALEROWUP2BILINEAR_12_SSSE3
SU2BLANY(ScaleRowUp2_Bilinear_12_Any_SSSE3,
ScaleRowUp2_Bilinear_12_SSSE3,
ScaleRowUp2_Bilinear_16_C,
15,
uint16_t)
#endif
#ifdef HAS_SCALEROWUP2BILINEAR_16_SSE2
SU2BLANY(ScaleRowUp2_Bilinear_16_Any_SSSE3,
ScaleRowUp2_Bilinear_16_SSE2,
ScaleRowUp2_Bilinear_16_C,
7,
uint16_t)
#endif
#ifdef HAS_SCALEROWUP2BILINEAR_SSSE3
SU2BLANY(ScaleRowUp2_Bilinear_Any_SSSE3,
ScaleRowUp2_Bilinear_SSSE3,
ScaleRowUp2_Bilinear_C,
15,
uint8_t)
#endif
#ifdef HAS_SCALEROWUP2BILINEAR_AVX2
SU2BLANY(ScaleRowUp2_Bilinear_Any_AVX2,
ScaleRowUp2_Bilinear_AVX2,
ScaleRowUp2_Bilinear_C,
31,
uint8_t)
#endif
#ifdef HAS_SCALEROWUP2BILINEAR_12_AVX2
SU2BLANY(ScaleRowUp2_Bilinear_12_Any_AVX2,
ScaleRowUp2_Bilinear_12_AVX2,
ScaleRowUp2_Bilinear_16_C,
15,
uint16_t)
#endif
#ifdef HAS_SCALEROWUP2BILINEAR_16_AVX2
SU2BLANY(ScaleRowUp2_Bilinear_16_Any_AVX2,
ScaleRowUp2_Bilinear_16_AVX2,
ScaleRowUp2_Bilinear_16_C,
15,
uint16_t)
#endif
#ifdef HAS_SCALEROWUP2BILINEAR_NEON
SU2BLANY(ScaleRowUp2_Bilinear_Any_NEON,
ScaleRowUp2_Bilinear_NEON,
ScaleRowUp2_Bilinear_C,
15,
uint8_t)
#endif
#ifdef HAS_SCALEROWUP2BILINEAR_12_NEON
SU2BLANY(ScaleRowUp2_Bilinear_12_Any_NEON,
ScaleRowUp2_Bilinear_12_NEON,
ScaleRowUp2_Bilinear_16_C,
15,
uint16_t)
#endif
#ifdef HAS_SCALEROWUP2BILINEAR_16_NEON
SU2BLANY(ScaleRowUp2_Bilinear_16_Any_NEON,
ScaleRowUp2_Bilinear_16_NEON,
ScaleRowUp2_Bilinear_16_C,
7,
uint16_t)
#endif
#undef SU2BLANY
// Scale bi-planar plane up horizontally 2 times using linear filter.
#define SBUH2LANY(NAME, SIMD, C, MASK, PTYPE) \
void NAME(const PTYPE* src_ptr, PTYPE* dst_ptr, int dst_width) { \
int work_width = (dst_width - 1) & ~1; \
int r = work_width & MASK; \
int n = work_width & ~MASK; \
dst_ptr[0] = src_ptr[0]; \
dst_ptr[1] = src_ptr[1]; \
if (work_width > 0) { \
if (n != 0) { \
SIMD(src_ptr, dst_ptr + 2, n); \
} \
C(src_ptr + n, dst_ptr + 2 * n + 2, r); \
} \
dst_ptr[2 * dst_width - 2] = src_ptr[((dst_width + 1) & ~1) - 2]; \
dst_ptr[2 * dst_width - 1] = src_ptr[((dst_width + 1) & ~1) - 1]; \
}
SBUH2LANY(ScaleUVRowUp2_Linear_Any_C,
ScaleUVRowUp2_Linear_C,
ScaleUVRowUp2_Linear_C,
0,
uint8_t)
SBUH2LANY(ScaleUVRowUp2_Linear_16_Any_C,
ScaleUVRowUp2_Linear_16_C,
ScaleUVRowUp2_Linear_16_C,
0,
uint16_t)
#ifdef HAS_SCALEUVROWUP2LINEAR_SSSE3
SBUH2LANY(ScaleUVRowUp2_Linear_Any_SSSE3,
ScaleUVRowUp2_Linear_SSSE3,
ScaleUVRowUp2_Linear_C,
7,
uint8_t)
#endif
#ifdef HAS_SCALEUVROWUP2LINEAR_AVX2
SBUH2LANY(ScaleUVRowUp2_Linear_Any_AVX2,
ScaleUVRowUp2_Linear_AVX2,
ScaleUVRowUp2_Linear_C,
15,
uint8_t)
#endif
#ifdef HAS_SCALEUVROWUP2LINEAR_16_SSE2
SBUH2LANY(ScaleUVRowUp2_Linear_16_Any_SSE2,
ScaleUVRowUp2_Linear_16_SSE2,
ScaleUVRowUp2_Linear_16_C,
3,
uint16_t)
#endif
#ifdef HAS_SCALEUVROWUP2LINEAR_16_AVX2
SBUH2LANY(ScaleUVRowUp2_Linear_16_Any_AVX2,
ScaleUVRowUp2_Linear_16_AVX2,
ScaleUVRowUp2_Linear_16_C,
7,
uint16_t)
#endif
#ifdef HAS_SCALEUVROWUP2LINEAR_NEON
SBUH2LANY(ScaleUVRowUp2_Linear_Any_NEON,
ScaleUVRowUp2_Linear_NEON,
ScaleUVRowUp2_Linear_C,
15,
uint8_t)
#endif
#ifdef HAS_SCALEUVROWUP2LINEAR_16_NEON
SBUH2LANY(ScaleUVRowUp2_Linear_16_Any_NEON,
ScaleUVRowUp2_Linear_16_NEON,
ScaleUVRowUp2_Linear_16_C,
15,
uint16_t)
#endif
#undef SBUH2LANY
// Scale bi-planar plane up 2 times using bilinear filter.
// This function produces 2 rows at a time.
#define SBU2BLANY(NAME, SIMD, C, MASK, PTYPE) \
void NAME(const PTYPE* src_ptr, ptrdiff_t src_stride, PTYPE* dst_ptr, \
ptrdiff_t dst_stride, int dst_width) { \
int work_width = (dst_width - 1) & ~1; \
int r = work_width & MASK; \
int n = work_width & ~MASK; \
const PTYPE* sa = src_ptr; \
const PTYPE* sb = src_ptr + src_stride; \
PTYPE* da = dst_ptr; \
PTYPE* db = dst_ptr + dst_stride; \
da[0] = (3 * sa[0] + sb[0] + 2) >> 2; \
db[0] = (sa[0] + 3 * sb[0] + 2) >> 2; \
da[1] = (3 * sa[1] + sb[1] + 2) >> 2; \
db[1] = (sa[1] + 3 * sb[1] + 2) >> 2; \
if (work_width > 0) { \
if (n != 0) { \
SIMD(sa, sb - sa, da + 2, db - da, n); \
} \
C(sa + n, sb - sa, da + 2 * n + 2, db - da, r); \
} \
da[2 * dst_width - 2] = (3 * sa[((dst_width + 1) & ~1) - 2] + \
sb[((dst_width + 1) & ~1) - 2] + 2) >> \
2; \
db[2 * dst_width - 2] = (sa[((dst_width + 1) & ~1) - 2] + \
3 * sb[((dst_width + 1) & ~1) - 2] + 2) >> \
2; \
da[2 * dst_width - 1] = (3 * sa[((dst_width + 1) & ~1) - 1] + \
sb[((dst_width + 1) & ~1) - 1] + 2) >> \
2; \
db[2 * dst_width - 1] = (sa[((dst_width + 1) & ~1) - 1] + \
3 * sb[((dst_width + 1) & ~1) - 1] + 2) >> \
2; \
}
SBU2BLANY(ScaleUVRowUp2_Bilinear_Any_C,
ScaleUVRowUp2_Bilinear_C,
ScaleUVRowUp2_Bilinear_C,
0,
uint8_t)
SBU2BLANY(ScaleUVRowUp2_Bilinear_16_Any_C,
ScaleUVRowUp2_Bilinear_16_C,
ScaleUVRowUp2_Bilinear_16_C,
0,
uint16_t)
#ifdef HAS_SCALEUVROWUP2BILINEAR_SSSE3
SBU2BLANY(ScaleUVRowUp2_Bilinear_Any_SSSE3,
ScaleUVRowUp2_Bilinear_SSSE3,
ScaleUVRowUp2_Bilinear_C,
7,
uint8_t)
#endif
#ifdef HAS_SCALEUVROWUP2BILINEAR_AVX2
SBU2BLANY(ScaleUVRowUp2_Bilinear_Any_AVX2,
ScaleUVRowUp2_Bilinear_AVX2,
ScaleUVRowUp2_Bilinear_C,
15,
uint8_t)
#endif
#ifdef HAS_SCALEUVROWUP2BILINEAR_16_SSE2
SBU2BLANY(ScaleUVRowUp2_Bilinear_16_Any_SSE2,
ScaleUVRowUp2_Bilinear_16_SSE2,
ScaleUVRowUp2_Bilinear_16_C,
7,
uint16_t)
#endif
#ifdef HAS_SCALEUVROWUP2BILINEAR_16_AVX2
SBU2BLANY(ScaleUVRowUp2_Bilinear_16_Any_AVX2,
ScaleUVRowUp2_Bilinear_16_AVX2,
ScaleUVRowUp2_Bilinear_16_C,
7,
uint16_t)
#endif
#ifdef HAS_SCALEUVROWUP2BILINEAR_NEON
SBU2BLANY(ScaleUVRowUp2_Bilinear_Any_NEON,
ScaleUVRowUp2_Bilinear_NEON,
ScaleUVRowUp2_Bilinear_C,
7,
uint8_t)
#endif
#ifdef HAS_SCALEUVROWUP2BILINEAR_16_NEON
SBU2BLANY(ScaleUVRowUp2_Bilinear_16_Any_NEON,
ScaleUVRowUp2_Bilinear_16_NEON,
ScaleUVRowUp2_Bilinear_16_C,
7,
uint16_t)
#endif
#undef SBU2BLANY
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv

View File

@ -400,6 +400,95 @@ void ScaleRowDown34_1_Box_16_C(const uint16_t* src_ptr,
}
}
// Sample position: (O is src sample position, X is dst sample position)
//
// v dst_ptr at here v stop at here
// X O X X O X X O X X O X X O X
// ^ src_ptr at here
void ScaleRowUp2_Linear_C(const uint8_t* src_ptr,
uint8_t* dst_ptr,
int dst_width) {
int src_width = dst_width >> 1;
int x;
assert((dst_width % 2 == 0) && (dst_width >= 0));
for (x = 0; x < src_width; ++x) {
dst_ptr[2 * x + 0] = (src_ptr[x + 0] * 3 + src_ptr[x + 1] * 1 + 2) >> 2;
dst_ptr[2 * x + 1] = (src_ptr[x + 0] * 1 + src_ptr[x + 1] * 3 + 2) >> 2;
}
}
// Sample position: (O is src sample position, X is dst sample position)
//
// src_ptr at here
// X v X X X X X X X X X
// O O O O O
// X X X X X X X X X X
// ^ dst_ptr at here ^ stop at here
// X X X X X X X X X X
// O O O O O
// X X X X X X X X X X
void ScaleRowUp2_Bilinear_C(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
ptrdiff_t dst_stride,
int dst_width) {
const uint8_t* s = src_ptr;
const uint8_t* t = src_ptr + src_stride;
uint8_t* d = dst_ptr;
uint8_t* e = dst_ptr + dst_stride;
int src_width = dst_width >> 1;
int x;
assert((dst_width % 2 == 0) && (dst_width >= 0));
for (x = 0; x < src_width; ++x) {
d[2 * x + 0] =
(s[x + 0] * 9 + s[x + 1] * 3 + t[x + 0] * 3 + t[x + 1] * 1 + 8) >> 4;
d[2 * x + 1] =
(s[x + 0] * 3 + s[x + 1] * 9 + t[x + 0] * 1 + t[x + 1] * 3 + 8) >> 4;
e[2 * x + 0] =
(s[x + 0] * 3 + s[x + 1] * 1 + t[x + 0] * 9 + t[x + 1] * 3 + 8) >> 4;
e[2 * x + 1] =
(s[x + 0] * 1 + s[x + 1] * 3 + t[x + 0] * 3 + t[x + 1] * 9 + 8) >> 4;
}
}
// Only suitable for at most 14 bit range.
void ScaleRowUp2_Linear_16_C(const uint16_t* src_ptr,
uint16_t* dst_ptr,
int dst_width) {
int src_width = dst_width >> 1;
int x;
assert((dst_width % 2 == 0) && (dst_width >= 0));
for (x = 0; x < src_width; ++x) {
dst_ptr[2 * x + 0] = (src_ptr[x + 0] * 3 + src_ptr[x + 1] * 1 + 2) >> 2;
dst_ptr[2 * x + 1] = (src_ptr[x + 0] * 1 + src_ptr[x + 1] * 3 + 2) >> 2;
}
}
// Only suitable for at most 12bit range.
void ScaleRowUp2_Bilinear_16_C(const uint16_t* src_ptr,
ptrdiff_t src_stride,
uint16_t* dst_ptr,
ptrdiff_t dst_stride,
int dst_width) {
const uint16_t* s = src_ptr;
const uint16_t* t = src_ptr + src_stride;
uint16_t* d = dst_ptr;
uint16_t* e = dst_ptr + dst_stride;
int src_width = dst_width >> 1;
int x;
assert((dst_width % 2 == 0) && (dst_width >= 0));
for (x = 0; x < src_width; ++x) {
d[2 * x + 0] =
(s[x + 0] * 9 + s[x + 1] * 3 + t[x + 0] * 3 + t[x + 1] * 1 + 8) >> 4;
d[2 * x + 1] =
(s[x + 0] * 3 + s[x + 1] * 9 + t[x + 0] * 1 + t[x + 1] * 3 + 8) >> 4;
e[2 * x + 0] =
(s[x + 0] * 3 + s[x + 1] * 1 + t[x + 0] * 9 + t[x + 1] * 3 + 8) >> 4;
e[2 * x + 1] =
(s[x + 0] * 1 + s[x + 1] * 3 + t[x + 0] * 3 + t[x + 1] * 9 + 8) >> 4;
}
}
// Scales a single row of pixels using point sampling.
void ScaleCols_C(uint8_t* dst_ptr,
const uint8_t* src_ptr,
@ -1111,6 +1200,122 @@ void ScaleUVRowDownEvenBox_C(const uint8_t* src_uv,
}
}
void ScaleUVRowUp2_Linear_C(const uint8_t* src_ptr,
uint8_t* dst_ptr,
int dst_width) {
int src_width = dst_width >> 1;
int x;
assert((dst_width % 2 == 0) && (dst_width >= 0));
for (x = 0; x < src_width; ++x) {
dst_ptr[4 * x + 0] =
(src_ptr[2 * x + 0] * 3 + src_ptr[2 * x + 2] * 1 + 2) >> 2;
dst_ptr[4 * x + 1] =
(src_ptr[2 * x + 1] * 3 + src_ptr[2 * x + 3] * 1 + 2) >> 2;
dst_ptr[4 * x + 2] =
(src_ptr[2 * x + 0] * 1 + src_ptr[2 * x + 2] * 3 + 2) >> 2;
dst_ptr[4 * x + 3] =
(src_ptr[2 * x + 1] * 1 + src_ptr[2 * x + 3] * 3 + 2) >> 2;
}
}
void ScaleUVRowUp2_Bilinear_C(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
ptrdiff_t dst_stride,
int dst_width) {
const uint8_t* s = src_ptr;
const uint8_t* t = src_ptr + src_stride;
uint8_t* d = dst_ptr;
uint8_t* e = dst_ptr + dst_stride;
int src_width = dst_width >> 1;
int x;
assert((dst_width % 2 == 0) && (dst_width >= 0));
for (x = 0; x < src_width; ++x) {
d[4 * x + 0] = (s[2 * x + 0] * 9 + s[2 * x + 2] * 3 + t[2 * x + 0] * 3 +
t[2 * x + 2] * 1 + 8) >>
4;
d[4 * x + 1] = (s[2 * x + 1] * 9 + s[2 * x + 3] * 3 + t[2 * x + 1] * 3 +
t[2 * x + 3] * 1 + 8) >>
4;
d[4 * x + 2] = (s[2 * x + 0] * 3 + s[2 * x + 2] * 9 + t[2 * x + 0] * 1 +
t[2 * x + 2] * 3 + 8) >>
4;
d[4 * x + 3] = (s[2 * x + 1] * 3 + s[2 * x + 3] * 9 + t[2 * x + 1] * 1 +
t[2 * x + 3] * 3 + 8) >>
4;
e[4 * x + 0] = (s[2 * x + 0] * 3 + s[2 * x + 2] * 1 + t[2 * x + 0] * 9 +
t[2 * x + 2] * 3 + 8) >>
4;
e[4 * x + 1] = (s[2 * x + 1] * 3 + s[2 * x + 3] * 1 + t[2 * x + 1] * 9 +
t[2 * x + 3] * 3 + 8) >>
4;
e[4 * x + 2] = (s[2 * x + 0] * 1 + s[2 * x + 2] * 3 + t[2 * x + 0] * 3 +
t[2 * x + 2] * 9 + 8) >>
4;
e[4 * x + 3] = (s[2 * x + 1] * 1 + s[2 * x + 3] * 3 + t[2 * x + 1] * 3 +
t[2 * x + 3] * 9 + 8) >>
4;
}
}
void ScaleUVRowUp2_Linear_16_C(const uint16_t* src_ptr,
uint16_t* dst_ptr,
int dst_width) {
int src_width = dst_width >> 1;
int x;
assert((dst_width % 2 == 0) && (dst_width >= 0));
for (x = 0; x < src_width; ++x) {
dst_ptr[4 * x + 0] =
(src_ptr[2 * x + 0] * 3 + src_ptr[2 * x + 2] * 1 + 2) >> 2;
dst_ptr[4 * x + 1] =
(src_ptr[2 * x + 1] * 3 + src_ptr[2 * x + 3] * 1 + 2) >> 2;
dst_ptr[4 * x + 2] =
(src_ptr[2 * x + 0] * 1 + src_ptr[2 * x + 2] * 3 + 2) >> 2;
dst_ptr[4 * x + 3] =
(src_ptr[2 * x + 1] * 1 + src_ptr[2 * x + 3] * 3 + 2) >> 2;
}
}
void ScaleUVRowUp2_Bilinear_16_C(const uint16_t* src_ptr,
ptrdiff_t src_stride,
uint16_t* dst_ptr,
ptrdiff_t dst_stride,
int dst_width) {
const uint16_t* s = src_ptr;
const uint16_t* t = src_ptr + src_stride;
uint16_t* d = dst_ptr;
uint16_t* e = dst_ptr + dst_stride;
int src_width = dst_width >> 1;
int x;
assert((dst_width % 2 == 0) && (dst_width >= 0));
for (x = 0; x < src_width; ++x) {
d[4 * x + 0] = (s[2 * x + 0] * 9 + s[2 * x + 2] * 3 + t[2 * x + 0] * 3 +
t[2 * x + 2] * 1 + 8) >>
4;
d[4 * x + 1] = (s[2 * x + 1] * 9 + s[2 * x + 3] * 3 + t[2 * x + 1] * 3 +
t[2 * x + 3] * 1 + 8) >>
4;
d[4 * x + 2] = (s[2 * x + 0] * 3 + s[2 * x + 2] * 9 + t[2 * x + 0] * 1 +
t[2 * x + 2] * 3 + 8) >>
4;
d[4 * x + 3] = (s[2 * x + 1] * 3 + s[2 * x + 3] * 9 + t[2 * x + 1] * 1 +
t[2 * x + 3] * 3 + 8) >>
4;
e[4 * x + 0] = (s[2 * x + 0] * 3 + s[2 * x + 2] * 1 + t[2 * x + 0] * 9 +
t[2 * x + 2] * 3 + 8) >>
4;
e[4 * x + 1] = (s[2 * x + 1] * 3 + s[2 * x + 3] * 1 + t[2 * x + 1] * 9 +
t[2 * x + 3] * 3 + 8) >>
4;
e[4 * x + 2] = (s[2 * x + 0] * 1 + s[2 * x + 2] * 3 + t[2 * x + 0] * 3 +
t[2 * x + 2] * 9 + 8) >>
4;
e[4 * x + 3] = (s[2 * x + 1] * 1 + s[2 * x + 3] * 3 + t[2 * x + 1] * 3 +
t[2 * x + 3] * 9 + 8) >>
4;
}
}
// Scales a single row of pixels using point sampling.
void ScaleUVCols_C(uint8_t* dst_uv,
const uint8_t* src_uv,

Some files were not shown because too many files have changed in this diff Show More