Update webrtc and its dependencies to M108

This commit is contained in:
Ali 2022-11-20 22:59:27 +04:00
parent dc085a2fe9
commit b62aeeb6cc
740 changed files with 68238 additions and 14755 deletions

View File

@ -2,3 +2,5 @@
# Name or Organization <email address>
Google Inc.
Ivan Pavlotskiy <ivan.pavlotskiy@lgepartner.com>

View File

@ -17,7 +17,6 @@ license {
],
license_text: [
"LICENSE",
"LICENSE_THIRD_PARTY",
"PATENTS",
],
}
@ -35,7 +34,6 @@ cc_library {
"source/compare.cc",
"source/compare_common.cc",
"source/compare_gcc.cc",
"source/compare_mmi.cc",
"source/compare_msa.cc",
"source/compare_neon.cc",
"source/compare_neon64.cc",
@ -55,14 +53,12 @@ cc_library {
"source/rotate_argb.cc",
"source/rotate_common.cc",
"source/rotate_gcc.cc",
"source/rotate_mmi.cc",
"source/rotate_msa.cc",
"source/rotate_neon.cc",
"source/rotate_neon64.cc",
"source/row_any.cc",
"source/row_common.cc",
"source/row_gcc.cc",
"source/row_mmi.cc",
"source/row_msa.cc",
"source/row_neon.cc",
"source/row_neon64.cc",
@ -71,10 +67,10 @@ cc_library {
"source/scale_argb.cc",
"source/scale_common.cc",
"source/scale_gcc.cc",
"source/scale_mmi.cc",
"source/scale_msa.cc",
"source/scale_neon.cc",
"source/scale_neon64.cc",
"source/scale_rgb.cc",
"source/scale_uv.cc",
"source/video_common.cc",
],
@ -134,6 +130,7 @@ cc_test {
"unit_test/rotate_argb_test.cc",
"unit_test/rotate_test.cc",
"unit_test/scale_argb_test.cc",
"unit_test/scale_rgb_test.cc",
"unit_test/scale_test.cc",
"unit_test/scale_uv_test.cc",
"unit_test/unit_test.cc",

View File

@ -9,7 +9,6 @@ LOCAL_SRC_FILES := \
source/compare.cc \
source/compare_common.cc \
source/compare_gcc.cc \
source/compare_mmi.cc \
source/compare_msa.cc \
source/compare_neon.cc \
source/compare_neon64.cc \
@ -27,7 +26,6 @@ LOCAL_SRC_FILES := \
source/rotate_argb.cc \
source/rotate_common.cc \
source/rotate_gcc.cc \
source/rotate_mmi.cc \
source/rotate_msa.cc \
source/rotate_neon.cc \
source/rotate_neon64.cc \
@ -35,7 +33,6 @@ LOCAL_SRC_FILES := \
source/row_any.cc \
source/row_common.cc \
source/row_gcc.cc \
source/row_mmi.cc \
source/row_msa.cc \
source/row_neon.cc \
source/row_neon64.cc \
@ -45,10 +42,10 @@ LOCAL_SRC_FILES := \
source/scale_argb.cc \
source/scale_common.cc \
source/scale_gcc.cc \
source/scale_mmi.cc \
source/scale_msa.cc \
source/scale_neon.cc \
source/scale_neon64.cc \
source/scale_rgb.cc \
source/scale_uv.cc \
source/scale_win.cc \
source/video_common.cc
@ -101,6 +98,7 @@ LOCAL_SRC_FILES := \
unit_test/rotate_argb_test.cc \
unit_test/rotate_test.cc \
unit_test/scale_argb_test.cc \
unit_test/scale_rgb_test.cc \
unit_test/scale_test.cc \
unit_test/scale_uv_test.cc \
unit_test/unit_test.cc \

View File

@ -27,6 +27,10 @@ config("libyuv_config") {
if (is_android && current_cpu != "arm64") {
ldflags = [ "-Wl,--dynamic-linker,/system/bin/linker" ]
}
if (!libyuv_use_neon) {
defines = [ "LIBYUV_DISABLE_NEON" ]
}
}
# This target is built when no specific target is specified on the command line.
@ -65,10 +69,6 @@ group("libyuv") {
deps += [ ":libyuv_msa" ]
}
if (libyuv_use_mmi) {
deps += [ ":libyuv_mmi" ]
}
if (!is_ios && !libyuv_disable_jpeg) {
# Make sure that clients of libyuv link with libjpeg. This can't go in
# libyuv_internal because in Windows x64 builds that will generate a clang
@ -98,6 +98,7 @@ static_library("libyuv_internal") {
"include/libyuv/row.h",
"include/libyuv/scale.h",
"include/libyuv/scale_argb.h",
"include/libyuv/scale_rgb.h",
"include/libyuv/scale_row.h",
"include/libyuv/scale_uv.h",
"include/libyuv/version.h",
@ -134,6 +135,7 @@ static_library("libyuv_internal") {
"source/scale_argb.cc",
"source/scale_common.cc",
"source/scale_gcc.cc",
"source/scale_rgb.cc",
"source/scale_uv.cc",
"source/scale_win.cc",
"source/video_common.cc",
@ -174,9 +176,6 @@ static_library("libyuv_internal") {
"-ffp-contract=fast", # Enable fma vectorization for NEON.
]
}
if (!libyuv_use_mmi) {
defines += [ "LIBYUV_DISABLE_MMI" ]
}
}
if (libyuv_use_neon) {
@ -230,22 +229,6 @@ if (libyuv_use_msa) {
}
}
if (libyuv_use_mmi) {
static_library("libyuv_mmi") {
sources = [
# MMI Source Files
"source/compare_mmi.cc",
"source/rotate_mmi.cc",
"source/row_mmi.cc",
"source/scale_mmi.cc",
]
deps = [ ":libyuv_internal" ]
public_configs = [ ":libyuv_config" ]
}
}
if (libyuv_include_tests) {
config("libyuv_unittest_warnings_config") {
if (!is_win) {
@ -281,6 +264,7 @@ if (libyuv_include_tests) {
"unit_test/rotate_argb_test.cc",
"unit_test/rotate_test.cc",
"unit_test/scale_argb_test.cc",
"unit_test/scale_rgb_test.cc",
"unit_test/scale_test.cc",
"unit_test/scale_uv_test.cc",
"unit_test/unit_test.cc",

View File

@ -8,7 +8,7 @@ SET ( YUV_VER_MAJOR 0 )
SET ( YUV_VER_MINOR 0 )
SET ( YUV_VER_PATCH ${YUV_VERSION_NUMBER} )
SET ( YUV_VERSION ${YUV_VER_MAJOR}.${YUV_VER_MINOR}.${YUV_VER_PATCH} )
MESSAGE ( "Building ver.: ${YUV_VERSION}" )
MESSAGE ( VERBOSE "Building ver.: ${YUV_VERSION}" )
# is this a 32-bit or 64-bit build?
IF ( CMAKE_SIZEOF_VOID_P EQUAL 8 )
@ -45,7 +45,7 @@ ELSE ()
SET ( YUV_SYSTEM_NAME "amd-${YUV_BIT_SIZE}" )
ENDIF ()
ENDIF ()
MESSAGE ( "Packaging for: ${YUV_SYSTEM_NAME}" )
MESSAGE ( VERBOSE "Packaging for: ${YUV_SYSTEM_NAME}" )
# define all the variables needed by CPack to create .deb and .rpm packages
SET ( CPACK_PACKAGE_VENDOR "Frank Barchard" )

View File

@ -3,7 +3,7 @@
# Run with -DTEST=ON to build unit tests
PROJECT ( YUV C CXX ) # "C" is required even for C++ projects
CMAKE_MINIMUM_REQUIRED( VERSION 2.8 )
CMAKE_MINIMUM_REQUIRED( VERSION 2.8.12 )
OPTION( TEST "Built unit tests" OFF )
SET ( ly_base_dir ${PROJECT_SOURCE_DIR} )
@ -22,6 +22,10 @@ LIST ( SORT ly_unittest_sources )
INCLUDE_DIRECTORIES( BEFORE ${ly_inc_dir} )
if(MSVC)
ADD_DEFINITIONS ( -D_CRT_SECURE_NO_WARNINGS )
endif()
# this creates the static library (.a)
ADD_LIBRARY ( ${ly_lib_static} STATIC ${ly_source_files} )
@ -29,13 +33,19 @@ ADD_LIBRARY ( ${ly_lib_static} STATIC ${ly_source_files} )
ADD_LIBRARY ( ${ly_lib_shared} SHARED ${ly_source_files} )
SET_TARGET_PROPERTIES ( ${ly_lib_shared} PROPERTIES OUTPUT_NAME "${ly_lib_name}" )
SET_TARGET_PROPERTIES ( ${ly_lib_shared} PROPERTIES PREFIX "lib" )
if(WIN32)
SET_TARGET_PROPERTIES ( ${ly_lib_shared} PROPERTIES IMPORT_PREFIX "lib" )
endif()
# this creates the conversion tool
ADD_EXECUTABLE ( yuvconvert ${ly_base_dir}/util/yuvconvert.cc )
TARGET_LINK_LIBRARIES ( yuvconvert ${ly_lib_static} )
# this creates the yuvconstants tool
ADD_EXECUTABLE ( yuvconstants ${ly_base_dir}/util/yuvconstants.c )
TARGET_LINK_LIBRARIES ( yuvconstants ${ly_lib_static} )
INCLUDE ( FindJPEG )
find_package ( JPEG )
if (JPEG_FOUND)
include_directories( ${JPEG_INCLUDE_DIR} )
target_link_libraries( yuvconvert ${JPEG_LIBRARY} )
@ -71,6 +81,12 @@ if(TEST)
if(NACL AND NACL_LIBC STREQUAL "newlib")
target_link_libraries(libyuv_unittest glibc-compat)
endif()
find_library(GFLAGS_LIBRARY gflags)
if(NOT GFLAGS_LIBRARY STREQUAL "GFLAGS_LIBRARY-NOTFOUND")
target_link_libraries(libyuv_unittest gflags)
add_definitions(-DLIBYUV_USE_GFLAGS)
endif()
endif()

File diff suppressed because it is too large Load Diff

View File

@ -1,10 +1,11 @@
mbonadei@chromium.org
fbarchard@chromium.org
magjed@chromium.org
pbos@chromium.org
wtc@google.com
jansson@google.com
per-file *.gn=mbonadei@chromium.org
per-file *.gn=mbonadei@chromium.org,jansson@google.com
per-file .gitignore=*
per-file AUTHORS=*
per-file DEPS=*
per-file PRESUBMIT.py=mbonadei@chromium.org
per-file PRESUBMIT.py=mbonadei@chromium.org,jansson@google.com

View File

@ -6,6 +6,8 @@
# in the file PATENTS. All contributing project authors may
# be found in the AUTHORS file in the root of the source tree.
# Runs PRESUBMIT.py in py3 mode by git cl presubmit.
USE_PYTHON3 = True
def _CommonChecks(input_api, output_api):
"""Checks common to both upload and commit."""
@ -26,7 +28,8 @@ def _CommonChecks(input_api, output_api):
'E0611', # No package y in x
'W0232', # Class has no __init__ method
],
pylintrc='pylintrc'))
pylintrc='pylintrc',
version='2.7'))
return results

View File

@ -1,8 +1,9 @@
Name: libyuv
URL: http://code.google.com/p/libyuv/
Version: 1789
Version: 1850
License: BSD
License File: LICENSE
Description:
libyuv is an open source project that includes YUV conversion and scaling functionality.

View File

@ -0,0 +1,17 @@
# Copyright 2022 The LibYuv Project Authors. All rights reserved.
#
# Use of this source code is governed by a BSD-style license
# that can be found in the LICENSE file in the root of the source
# tree. An additional intellectual property rights grant can be found
# in the file PATENTS. All contributing project authors may
# be found in the AUTHORS file in the root of the source tree.
# Use default values for PartitionAlloc as standalone library from
# base/allocator/partition_allocator/build_overrides/partition_alloc.gni
use_partition_alloc_as_malloc_default = false
use_allocator_shim_default = false
enable_backup_ref_ptr_support_default = false
enable_mte_checked_ptr_support_default = false
put_ref_count_in_previous_slot_default = false
enable_backup_ref_ptr_slow_checks_default = false
enable_dangling_raw_ptr_checks_default = false

View File

@ -1,4 +1,5 @@
#!/usr/bin/env python
#!/usr/bin/env vpython3
# Copyright 2017 The LibYuv Project Authors. All rights reserved.
#
# Use of this source code is governed by a BSD-style license
@ -18,8 +19,8 @@ landing that change, this script cleans up any old symlinks, avoiding annoying
manual cleanup needed in order to complete gclient sync.
"""
import argparse
import logging
import optparse
import os
import shelve
import subprocess
@ -32,14 +33,14 @@ LINKS_DB = 'links'
# Version management to make future upgrades/downgrades easier to support.
SCHEMA_VERSION = 1
class WebRTCLinkSetup(object):
class WebRTCLinkSetup():
def __init__(self, links_db, dry_run=False):
self._dry_run = dry_run
self._links_db = links_db
def CleanupLinks(self):
logging.debug('CleanupLinks')
for source, link_path in self._links_db.iteritems():
for source, link_path in self._links_db.tems():
if source == 'SCHEMA_VERSION':
continue
if os.path.islink(link_path) or sys.platform.startswith('win'):
@ -71,15 +72,15 @@ def _initialize_database(filename):
def main():
parser = optparse.OptionParser()
parser.add_option('-d', '--dry-run', action='store_true', default=False,
help='Print what would be done, but don\'t perform any '
'operations. This will automatically set logging to '
'verbose.')
parser.add_option('-v', '--verbose', action='store_const',
const=logging.DEBUG, default=logging.INFO,
help='Print verbose output for debugging.')
options, _ = parser.parse_args()
p = argparse.ArgumentParser()
p.add_argument('-d', '--dry-run', action='store_true', default=False,
help='Print what would be done, but don\'t perform any '
'operations. This will automatically set logging to '
'verbose.')
p.add_argument('-v', '--verbose', action='store_const',
const=logging.DEBUG, default=logging.INFO,
help='Print verbose output for debugging.')
options = p.parse_args()
if options.dry_run:
options.verbose = logging.DEBUG

View File

@ -22,6 +22,7 @@ By default the cpu is detected and the most advanced form of SIMD is used. But
LIBYUV_DISABLE_F16C
LIBYUV_DISABLE_AVX512BW
LIBYUV_DISABLE_AVX512VL
LIBYUV_DISABLE_AVX512VNNI
LIBYUV_DISABLE_AVX512VBMI
LIBYUV_DISABLE_AVX512VBMI2
LIBYUV_DISABLE_AVX512VBITALG
@ -34,7 +35,10 @@ By default the cpu is detected and the most advanced form of SIMD is used. But
## MIPS CPUs
LIBYUV_DISABLE_MSA
LIBYUV_DISABLE_MMI
## LOONGARCH CPUs
LIBYUV_DISABLE_LSX
LIBYUV_DISABLE_LASX
# Test Width/Height/Repeat

View File

@ -180,8 +180,8 @@ Running test with C code:
mips
gn gen out/Release "--args=is_debug=false target_os=\"linux\" target_cpu=\"mips64el\" mips_arch_variant=\"loongson3\" mips_use_mmi=true is_component_build=false use_sysroot=false use_gold=false"
gn gen out/Debug "--args=is_debug=true target_os=\"linux\" target_cpu=\"mips64el\" mips_arch_variant=\"loongson3\" mips_use_mmi=true is_component_build=false use_sysroot=false use_gold=false"
gn gen out/Release "--args=is_debug=false target_os=\"linux\" target_cpu=\"mips64el\" mips_arch_variant=\"loongson3\" is_component_build=false use_sysroot=false use_gold=false"
gn gen out/Debug "--args=is_debug=true target_os=\"linux\" target_cpu=\"mips64el\" mips_arch_variant=\"loongson3\" is_component_build=false use_sysroot=false use_gold=false"
ninja -v -C out/Debug libyuv_unittest
ninja -v -C out/Release libyuv_unittest

View File

@ -1,5 +1,5 @@
#!/usr/bin/env python
#
#!/usr/bin/env vpython3
# Copyright 2014 The LibYuv Project Authors. All rights reserved.
#
# Use of this source code is governed by a BSD-style license

View File

@ -84,11 +84,6 @@ extern "C" {
#define HAS_SUMSQUAREERROR_MSA
#endif
#if !defined(LIBYUV_DISABLE_MMI) && defined(_MIPS_ARCH_LOONGSON3A)
#define HAS_HAMMINGDISTANCE_MMI
#define HAS_SUMSQUAREERROR_MMI
#endif
uint32_t HammingDistance_C(const uint8_t* src_a,
const uint8_t* src_b,
int count);
@ -107,9 +102,6 @@ uint32_t HammingDistance_NEON(const uint8_t* src_a,
uint32_t HammingDistance_MSA(const uint8_t* src_a,
const uint8_t* src_b,
int count);
uint32_t HammingDistance_MMI(const uint8_t* src_a,
const uint8_t* src_b,
int count);
uint32_t SumSquareError_C(const uint8_t* src_a,
const uint8_t* src_b,
int count);
@ -125,9 +117,6 @@ uint32_t SumSquareError_NEON(const uint8_t* src_a,
uint32_t SumSquareError_MSA(const uint8_t* src_a,
const uint8_t* src_b,
int count);
uint32_t SumSquareError_MMI(const uint8_t* src_a,
const uint8_t* src_b,
int count);
uint32_t HashDjb2_C(const uint8_t* src, int count, uint32_t seed);
uint32_t HashDjb2_SSE41(const uint8_t* src, int count, uint32_t seed);

View File

@ -106,6 +106,62 @@ int I422ToI444(const uint8_t* src_y,
int width,
int height);
// Convert I422 to I210.
LIBYUV_API
int I422ToI210(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
uint16_t* dst_y,
int dst_stride_y,
uint16_t* dst_u,
int dst_stride_u,
uint16_t* dst_v,
int dst_stride_v,
int width,
int height);
// Convert MM21 to NV12.
LIBYUV_API
int MM21ToNV12(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_uv,
int src_stride_uv,
uint8_t* dst_y,
int dst_stride_y,
uint8_t* dst_uv,
int dst_stride_uv,
int width,
int height);
// Convert MM21 to I420.
LIBYUV_API
int MM21ToI420(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_uv,
int src_stride_uv,
uint8_t* dst_y,
int dst_stride_y,
uint8_t* dst_u,
int dst_stride_u,
uint8_t* dst_v,
int dst_stride_v,
int width,
int height);
// Convert MM21 to YUY2
LIBYUV_API
int MM21ToYUY2(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_uv,
int src_stride_uv,
uint8_t* dst_yuy2,
int dst_stride_yuy2,
int width,
int height);
// Convert I422 to NV21.
LIBYUV_API
int I422ToNV21(const uint8_t* src_y,
@ -193,6 +249,23 @@ int I010ToI420(const uint16_t* src_y,
int width,
int height);
#define H210ToH420 I210ToI420
LIBYUV_API
int I210ToI420(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_u,
int src_stride_u,
const uint16_t* src_v,
int src_stride_v,
uint8_t* dst_y,
int dst_stride_y,
uint8_t* dst_u,
int dst_stride_u,
uint8_t* dst_v,
int dst_stride_v,
int width,
int height);
#define H210ToH422 I210ToI422
LIBYUV_API
int I210ToI422(const uint16_t* src_y,

View File

@ -14,6 +14,7 @@
#include "libyuv/basic_types.h"
#include "libyuv/rotate.h" // For enum RotationMode.
#include "libyuv/scale.h" // For enum FilterMode.
#ifdef __cplusplus
namespace libyuv {
@ -403,6 +404,32 @@ int U444ToABGR(const uint8_t* src_y,
int width,
int height);
// Convert I444 to RGB24.
LIBYUV_API
int I444ToRGB24(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
uint8_t* dst_rgb24,
int dst_stride_rgb24,
int width,
int height);
// Convert I444 to RAW.
LIBYUV_API
int I444ToRAW(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
uint8_t* dst_raw,
int dst_stride_raw,
int width,
int height);
// Convert I010 to ARGB.
LIBYUV_API
int I010ToARGB(const uint16_t* src_y,
@ -1311,6 +1338,32 @@ int J420ToRAW(const uint8_t* src_y,
int width,
int height);
// Convert I422 to RGB24.
LIBYUV_API
int I422ToRGB24(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
uint8_t* dst_rgb24,
int dst_stride_rgb24,
int width,
int height);
// Convert I422 to RAW.
LIBYUV_API
int I422ToRAW(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
uint8_t* dst_raw,
int dst_stride_raw,
int width,
int height);
LIBYUV_API
int I420ToRGB565(const uint8_t* src_y,
int src_stride_y,
@ -1494,6 +1547,20 @@ int I444ToARGBMatrix(const uint8_t* src_y,
int width,
int height);
// Convert I444 to RGB24 with matrix.
LIBYUV_API
int I444ToRGB24Matrix(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
uint8_t* dst_rgb24,
int dst_stride_rgb24,
const struct YuvConstants* yuvconstants,
int width,
int height);
// Convert 10 bit 420 YUV to ARGB with matrix.
LIBYUV_API
int I010ToAR30Matrix(const uint16_t* src_y,
@ -1864,7 +1931,7 @@ int I422ToRGBAMatrix(const uint8_t* src_y,
int width,
int height);
// Convert I422 to RGBA with matrix.
// Convert I420 to RGBA with matrix.
LIBYUV_API
int I420ToRGBAMatrix(const uint8_t* src_y,
int src_stride_y,
@ -1892,6 +1959,20 @@ int I420ToRGB24Matrix(const uint8_t* src_y,
int width,
int height);
// Convert I422 to RGB24 with matrix.
LIBYUV_API
int I422ToRGB24Matrix(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
uint8_t* dst_rgb24,
int dst_stride_rgb24,
const struct YuvConstants* yuvconstants,
int width,
int height);
// Convert I420 to RGB565 with specified color matrix.
LIBYUV_API
int I420ToRGB565Matrix(const uint8_t* src_y,
@ -1906,6 +1987,20 @@ int I420ToRGB565Matrix(const uint8_t* src_y,
int width,
int height);
// Convert I422 to RGB565 with specified color matrix.
LIBYUV_API
int I422ToRGB565Matrix(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
uint8_t* dst_rgb565,
int dst_stride_rgb565,
const struct YuvConstants* yuvconstants,
int width,
int height);
// Convert I420 to AR30 with matrix.
LIBYUV_API
int I420ToAR30Matrix(const uint8_t* src_y,
@ -1930,6 +2025,250 @@ int I400ToARGBMatrix(const uint8_t* src_y,
int width,
int height);
// Convert I420 to ARGB with matrix and UV filter mode.
LIBYUV_API
int I420ToARGBMatrixFilter(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
uint8_t* dst_argb,
int dst_stride_argb,
const struct YuvConstants* yuvconstants,
int width,
int height,
enum FilterMode filter);
// Convert I422 to ARGB with matrix and UV filter mode.
LIBYUV_API
int I422ToARGBMatrixFilter(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
uint8_t* dst_argb,
int dst_stride_argb,
const struct YuvConstants* yuvconstants,
int width,
int height,
enum FilterMode filter);
// Convert I422 to RGB24 with matrix and UV filter mode.
LIBYUV_API
int I422ToRGB24MatrixFilter(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
uint8_t* dst_rgb24,
int dst_stride_rgb24,
const struct YuvConstants* yuvconstants,
int width,
int height,
enum FilterMode filter);
// Convert I420 to RGB24 with matrix and UV filter mode.
LIBYUV_API
int I420ToRGB24MatrixFilter(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
uint8_t* dst_rgb24,
int dst_stride_rgb24,
const struct YuvConstants* yuvconstants,
int width,
int height,
enum FilterMode filter);
// Convert I010 to AR30 with matrix and UV filter mode.
LIBYUV_API
int I010ToAR30MatrixFilter(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_u,
int src_stride_u,
const uint16_t* src_v,
int src_stride_v,
uint8_t* dst_ar30,
int dst_stride_ar30,
const struct YuvConstants* yuvconstants,
int width,
int height,
enum FilterMode filter);
// Convert I210 to AR30 with matrix and UV filter mode.
LIBYUV_API
int I210ToAR30MatrixFilter(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_u,
int src_stride_u,
const uint16_t* src_v,
int src_stride_v,
uint8_t* dst_ar30,
int dst_stride_ar30,
const struct YuvConstants* yuvconstants,
int width,
int height,
enum FilterMode filter);
// Convert I010 to ARGB with matrix and UV filter mode.
LIBYUV_API
int I010ToARGBMatrixFilter(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_u,
int src_stride_u,
const uint16_t* src_v,
int src_stride_v,
uint8_t* dst_argb,
int dst_stride_argb,
const struct YuvConstants* yuvconstants,
int width,
int height,
enum FilterMode filter);
// Convert I210 to ARGB with matrix and UV filter mode.
LIBYUV_API
int I210ToARGBMatrixFilter(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_u,
int src_stride_u,
const uint16_t* src_v,
int src_stride_v,
uint8_t* dst_argb,
int dst_stride_argb,
const struct YuvConstants* yuvconstants,
int width,
int height,
enum FilterMode filter);
// Convert I420 with Alpha to attenuated ARGB with matrix and UV filter mode.
LIBYUV_API
int I420AlphaToARGBMatrixFilter(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
const uint8_t* src_a,
int src_stride_a,
uint8_t* dst_argb,
int dst_stride_argb,
const struct YuvConstants* yuvconstants,
int width,
int height,
int attenuate,
enum FilterMode filter);
// Convert I422 with Alpha to attenuated ARGB with matrix and UV filter mode.
LIBYUV_API
int I422AlphaToARGBMatrixFilter(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
const uint8_t* src_a,
int src_stride_a,
uint8_t* dst_argb,
int dst_stride_argb,
const struct YuvConstants* yuvconstants,
int width,
int height,
int attenuate,
enum FilterMode filter);
// Convert I010 with Alpha to attenuated ARGB with matrix and UV filter mode.
LIBYUV_API
int I010AlphaToARGBMatrixFilter(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_u,
int src_stride_u,
const uint16_t* src_v,
int src_stride_v,
const uint16_t* src_a,
int src_stride_a,
uint8_t* dst_argb,
int dst_stride_argb,
const struct YuvConstants* yuvconstants,
int width,
int height,
int attenuate,
enum FilterMode filter);
// Convert I210 with Alpha to attenuated ARGB with matrix and UV filter mode.
LIBYUV_API
int I210AlphaToARGBMatrixFilter(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_u,
int src_stride_u,
const uint16_t* src_v,
int src_stride_v,
const uint16_t* src_a,
int src_stride_a,
uint8_t* dst_argb,
int dst_stride_argb,
const struct YuvConstants* yuvconstants,
int width,
int height,
int attenuate,
enum FilterMode filter);
// Convert P010 to ARGB with matrix and UV filter mode.
LIBYUV_API
int P010ToARGBMatrixFilter(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_uv,
int src_stride_uv,
uint8_t* dst_argb,
int dst_stride_argb,
const struct YuvConstants* yuvconstants,
int width,
int height,
enum FilterMode filter);
// Convert P210 to ARGB with matrix and UV filter mode.
LIBYUV_API
int P210ToARGBMatrixFilter(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_uv,
int src_stride_uv,
uint8_t* dst_argb,
int dst_stride_argb,
const struct YuvConstants* yuvconstants,
int width,
int height,
enum FilterMode filter);
// Convert P010 to AR30 with matrix and UV filter mode.
LIBYUV_API
int P010ToAR30MatrixFilter(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_uv,
int src_stride_uv,
uint8_t* dst_ar30,
int dst_stride_ar30,
const struct YuvConstants* yuvconstants,
int width,
int height,
enum FilterMode filter);
// Convert P210 to AR30 with matrix and UV filter mode.
LIBYUV_API
int P210ToAR30MatrixFilter(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_uv,
int src_stride_uv,
uint8_t* dst_ar30,
int dst_stride_ar30,
const struct YuvConstants* yuvconstants,
int width,
int height,
enum FilterMode filter);
// Convert camera sample to ARGB with cropping, rotation and vertical flip.
// "sample_size" is needed to parse MJPG.
// "dst_stride_argb" number of bytes in a row of the dst_argb plane.

View File

@ -209,10 +209,10 @@ int ARGBToJ420(const uint8_t* src_argb,
int src_stride_argb,
uint8_t* dst_yj,
int dst_stride_yj,
uint8_t* dst_u,
int dst_stride_u,
uint8_t* dst_v,
int dst_stride_v,
uint8_t* dst_uj,
int dst_stride_uj,
uint8_t* dst_vj,
int dst_stride_vj,
int width,
int height);
@ -222,10 +222,10 @@ int ARGBToJ422(const uint8_t* src_argb,
int src_stride_argb,
uint8_t* dst_yj,
int dst_stride_yj,
uint8_t* dst_u,
int dst_stride_u,
uint8_t* dst_v,
int dst_stride_v,
uint8_t* dst_uj,
int dst_stride_uj,
uint8_t* dst_vj,
int dst_stride_vj,
int width,
int height);
@ -238,6 +238,41 @@ int ARGBToJ400(const uint8_t* src_argb,
int width,
int height);
// Convert ABGR to J420. (JPeg full range I420).
LIBYUV_API
int ABGRToJ420(const uint8_t* src_abgr,
int src_stride_abgr,
uint8_t* dst_yj,
int dst_stride_yj,
uint8_t* dst_uj,
int dst_stride_uj,
uint8_t* dst_vj,
int dst_stride_vj,
int width,
int height);
// Convert ABGR to J422.
LIBYUV_API
int ABGRToJ422(const uint8_t* src_abgr,
int src_stride_abgr,
uint8_t* dst_yj,
int dst_stride_yj,
uint8_t* dst_uj,
int dst_stride_uj,
uint8_t* dst_vj,
int dst_stride_vj,
int width,
int height);
// Convert ABGR to J400. (JPeg full range).
LIBYUV_API
int ABGRToJ400(const uint8_t* src_abgr,
int src_stride_abgr,
uint8_t* dst_yj,
int dst_stride_yj,
int width,
int height);
// Convert RGBA to J400. (JPeg full range).
LIBYUV_API
int RGBAToJ400(const uint8_t* src_rgba,
@ -327,6 +362,17 @@ int ARGBToUYVY(const uint8_t* src_argb,
int width,
int height);
// RAW to JNV21 full range NV21
LIBYUV_API
int RAWToJNV21(const uint8_t* src_raw,
int src_stride_raw,
uint8_t* dst_y,
int dst_stride_y,
uint8_t* dst_vu,
int dst_stride_vu,
int width,
int height);
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv

View File

@ -40,15 +40,20 @@ static const int kCpuHasF16C = 0x2000;
static const int kCpuHasGFNI = 0x4000;
static const int kCpuHasAVX512BW = 0x8000;
static const int kCpuHasAVX512VL = 0x10000;
static const int kCpuHasAVX512VBMI = 0x20000;
static const int kCpuHasAVX512VBMI2 = 0x40000;
static const int kCpuHasAVX512VBITALG = 0x80000;
static const int kCpuHasAVX512VPOPCNTDQ = 0x100000;
static const int kCpuHasAVX512VNNI = 0x20000;
static const int kCpuHasAVX512VBMI = 0x40000;
static const int kCpuHasAVX512VBMI2 = 0x80000;
static const int kCpuHasAVX512VBITALG = 0x100000;
static const int kCpuHasAVX512VPOPCNTDQ = 0x200000;
// These flags are only valid on MIPS processors.
static const int kCpuHasMIPS = 0x200000;
static const int kCpuHasMSA = 0x400000;
static const int kCpuHasMMI = 0x800000;
static const int kCpuHasMIPS = 0x400000;
static const int kCpuHasMSA = 0x800000;
// These flags are only valid on LOONGARCH processors.
static const int kCpuHasLOONGARCH = 0x2000000;
static const int kCpuHasLSX = 0x4000000;
static const int kCpuHasLASX = 0x8000000;
// Optional init function. TestCpuFlag does an auto-init.
// Returns cpu_info flags.

File diff suppressed because it is too large Load Diff

View File

@ -81,25 +81,35 @@
})
#endif // !(__mips == 64)
#else // !(__mips_isa_rev >= 6)
#define LW(psrc) \
({ \
const uint8_t* psrc_lw_m = (const uint8_t*)(psrc); \
uint32_t val_m; \
asm volatile("ulw %[val_m], %[psrc_lw_m] \n" \
: [val_m] "=r"(val_m) \
: [psrc_lw_m] "m"(*psrc_lw_m)); \
val_m; \
#define LW(psrc) \
({ \
uint8_t* psrc_lw_m = (uint8_t*)(psrc); \
uint32_t val_lw_m; \
\
__asm__ volatile( \
"lwr %[val_lw_m], 0(%[psrc_lw_m]) \n\t" \
"lwl %[val_lw_m], 3(%[psrc_lw_m]) \n\t" \
\
: [val_lw_m] "=&r"(val_lw_m) \
: [psrc_lw_m] "r"(psrc_lw_m)); \
\
val_lw_m; \
})
#if (__mips == 64)
#define LD(psrc) \
({ \
const uint8_t* psrc_ld_m = (const uint8_t*)(psrc); \
uint64_t val_m = 0; \
asm volatile("uld %[val_m], %[psrc_ld_m] \n" \
: [val_m] "=r"(val_m) \
: [psrc_ld_m] "m"(*psrc_ld_m)); \
val_m; \
#define LD(psrc) \
({ \
uint8_t* psrc_ld_m = (uint8_t*)(psrc); \
uint64_t val_ld_m = 0; \
\
__asm__ volatile( \
"ldr %[val_ld_m], 0(%[psrc_ld_m]) \n\t" \
"ldl %[val_ld_m], 7(%[psrc_ld_m]) \n\t" \
\
: [val_ld_m] "=&r"(val_ld_m) \
: [psrc_ld_m] "r"(psrc_ld_m)); \
\
val_ld_m; \
})
#else // !(__mips == 64)
#define LD(psrc) \

View File

@ -83,6 +83,50 @@ void SetPlane(uint8_t* dst_y,
int height,
uint32_t value);
// Convert a plane of tiles of 16 x H to linear.
LIBYUV_API
int DetilePlane(const uint8_t* src_y,
int src_stride_y,
uint8_t* dst_y,
int dst_stride_y,
int width,
int height,
int tile_height);
// Convert a plane of 16 bit tiles of 16 x H to linear.
LIBYUV_API
int DetilePlane_16(const uint16_t* src_y,
int src_stride_y,
uint16_t* dst_y,
int dst_stride_y,
int width,
int height,
int tile_height);
// Convert a UV plane of tiles of 16 x H into linear U and V planes.
LIBYUV_API
void DetileSplitUVPlane(const uint8_t* src_uv,
int src_stride_uv,
uint8_t* dst_u,
int dst_stride_u,
uint8_t* dst_v,
int dst_stride_v,
int width,
int height,
int tile_height);
// Convert a Y and UV plane of tiles into interlaced YUY2.
LIBYUV_API
void DetileToYUY2(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_uv,
int src_stride_uv,
uint8_t* dst_yuy2,
int dst_stride_yuy2,
int width,
int height,
int tile_height);
// Split interleaved UV plane into separate U and V planes.
LIBYUV_API
void SplitUVPlane(const uint8_t* src_uv,
@ -330,7 +374,26 @@ int I444Copy(const uint8_t* src_y,
int width,
int height);
// Copy I210 to I210.
#define I210ToI210 I210Copy
LIBYUV_API
int I210Copy(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_u,
int src_stride_u,
const uint16_t* src_v,
int src_stride_v,
uint16_t* dst_y,
int dst_stride_y,
uint16_t* dst_u,
int dst_stride_u,
uint16_t* dst_v,
int dst_stride_v,
int width,
int height);
// Copy NV12. Supports inverting.
LIBYUV_API
int NV12Copy(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_uv,
@ -343,6 +406,7 @@ int NV12Copy(const uint8_t* src_y,
int height);
// Copy NV21. Supports inverting.
LIBYUV_API
int NV21Copy(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_vu,
@ -421,6 +485,14 @@ int YUY2ToY(const uint8_t* src_yuy2,
int width,
int height);
LIBYUV_API
int UYVYToY(const uint8_t* src_uyvy,
int src_stride_uyvy,
uint8_t* dst_y,
int dst_stride_y,
int width,
int height);
// Convert I420 to I400. (calls CopyPlane ignoring u/v).
LIBYUV_API
int I420ToI400(const uint8_t* src_y,
@ -943,6 +1015,21 @@ int InterpolatePlane(const uint8_t* src0,
int height,
int interpolation);
// Interpolate between two images using specified amount of interpolation
// (0 to 255) and store to destination.
// 'interpolation' is specified as 8 bit fraction where 0 means 100% src0
// and 255 means 1% src0 and 99% src1.
LIBYUV_API
int InterpolatePlane_16(const uint16_t* src0,
int src_stride0, // measured in 16 bit pixels
const uint16_t* src1,
int src_stride1,
uint16_t* dst,
int dst_stride,
int width,
int height,
int interpolation);
// Interpolate between two ARGB images using specified amount of interpolation
// Internally calls InterpolatePlane with width * 4 (bpp).
LIBYUV_API

View File

@ -49,6 +49,24 @@ int I420Rotate(const uint8_t* src_y,
int height,
enum RotationMode mode);
// Rotate I422 frame.
LIBYUV_API
int I422Rotate(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
uint8_t* dst_y,
int dst_stride_y,
uint8_t* dst_u,
int dst_stride_u,
uint8_t* dst_v,
int dst_stride_v,
int width,
int height,
enum RotationMode mode);
// Rotate I444 frame.
LIBYUV_API
int I444Rotate(const uint8_t* src_y,
@ -83,6 +101,26 @@ int NV12ToI420Rotate(const uint8_t* src_y,
int height,
enum RotationMode mode);
// Convert Android420 to I420 with rotation.
// "rotation" can be 0, 90, 180 or 270.
LIBYUV_API
int Android420ToI420Rotate(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
int src_pixel_stride_uv,
uint8_t* dst_y,
int dst_stride_y,
uint8_t* dst_u,
int dst_stride_u,
uint8_t* dst_v,
int dst_stride_v,
int width,
int height,
enum RotationMode rotation);
// Rotate a plane by 0, 90, 180, or 270.
LIBYUV_API
int RotatePlane(const uint8_t* src,
@ -119,38 +157,50 @@ void RotatePlane270(const uint8_t* src,
int height);
// Rotations for when U and V are interleaved.
// These functions take one input pointer and
// These functions take one UV input pointer and
// split the data into two buffers while
// rotating them. Deprecated.
// rotating them.
// width and height expected to be half size for NV12.
LIBYUV_API
void RotateUV90(const uint8_t* src,
int src_stride,
uint8_t* dst_a,
int dst_stride_a,
uint8_t* dst_b,
int dst_stride_b,
int width,
int height);
int SplitRotateUV(const uint8_t* src_uv,
int src_stride_uv,
uint8_t* dst_u,
int dst_stride_u,
uint8_t* dst_v,
int dst_stride_v,
int width,
int height,
enum RotationMode mode);
LIBYUV_API
void RotateUV180(const uint8_t* src,
int src_stride,
uint8_t* dst_a,
int dst_stride_a,
uint8_t* dst_b,
int dst_stride_b,
int width,
int height);
void SplitRotateUV90(const uint8_t* src,
int src_stride,
uint8_t* dst_a,
int dst_stride_a,
uint8_t* dst_b,
int dst_stride_b,
int width,
int height);
LIBYUV_API
void RotateUV270(const uint8_t* src,
int src_stride,
uint8_t* dst_a,
int dst_stride_a,
uint8_t* dst_b,
int dst_stride_b,
int width,
int height);
void SplitRotateUV180(const uint8_t* src,
int src_stride,
uint8_t* dst_a,
int dst_stride_a,
uint8_t* dst_b,
int dst_stride_b,
int width,
int height);
LIBYUV_API
void SplitRotateUV270(const uint8_t* src,
int src_stride,
uint8_t* dst_a,
int dst_stride_a,
uint8_t* dst_b,
int dst_stride_b,
int width,
int height);
// The 90 and 270 functions are based on transposes.
// Doing a transpose with reversing the read/write
@ -165,14 +215,14 @@ void TransposePlane(const uint8_t* src,
int height);
LIBYUV_API
void TransposeUV(const uint8_t* src,
int src_stride,
uint8_t* dst_a,
int dst_stride_a,
uint8_t* dst_b,
int dst_stride_b,
int width,
int height);
void SplitTransposeUV(const uint8_t* src,
int src_stride,
uint8_t* dst_a,
int dst_stride_a,
uint8_t* dst_b,
int dst_stride_b,
int width,
int height);
#ifdef __cplusplus
} // extern "C"

View File

@ -61,9 +61,9 @@ extern "C" {
#define HAS_TRANSPOSEUVWX16_MSA
#endif
#if !defined(LIBYUV_DISABLE_MMI) && defined(_MIPS_ARCH_LOONGSON3A)
#define HAS_TRANSPOSEWX8_MMI
#define HAS_TRANSPOSEUVWX8_MMI
#if !defined(LIBYUV_DISABLE_LSX) && defined(__loongarch_sx)
#define HAS_TRANSPOSEWX16_LSX
#define HAS_TRANSPOSEUVWX16_LSX
#endif
void TransposeWxH_C(const uint8_t* src,
@ -93,11 +93,6 @@ void TransposeWx8_SSSE3(const uint8_t* src,
uint8_t* dst,
int dst_stride,
int width);
void TransposeWx8_MMI(const uint8_t* src,
int src_stride,
uint8_t* dst,
int dst_stride,
int width);
void TransposeWx8_Fast_SSSE3(const uint8_t* src,
int src_stride,
uint8_t* dst,
@ -108,6 +103,11 @@ void TransposeWx16_MSA(const uint8_t* src,
uint8_t* dst,
int dst_stride,
int width);
void TransposeWx16_LSX(const uint8_t* src,
int src_stride,
uint8_t* dst,
int dst_stride,
int width);
void TransposeWx8_Any_NEON(const uint8_t* src,
int src_stride,
@ -119,11 +119,6 @@ void TransposeWx8_Any_SSSE3(const uint8_t* src,
uint8_t* dst,
int dst_stride,
int width);
void TransposeWx8_Any_MMI(const uint8_t* src,
int src_stride,
uint8_t* dst,
int dst_stride,
int width);
void TransposeWx8_Fast_Any_SSSE3(const uint8_t* src,
int src_stride,
uint8_t* dst,
@ -134,6 +129,11 @@ void TransposeWx16_Any_MSA(const uint8_t* src,
uint8_t* dst,
int dst_stride,
int width);
void TransposeWx16_Any_LSX(const uint8_t* src,
int src_stride,
uint8_t* dst,
int dst_stride,
int width);
void TransposeUVWxH_C(const uint8_t* src,
int src_stride,
@ -172,13 +172,6 @@ void TransposeUVWx8_NEON(const uint8_t* src,
uint8_t* dst_b,
int dst_stride_b,
int width);
void TransposeUVWx8_MMI(const uint8_t* src,
int src_stride,
uint8_t* dst_a,
int dst_stride_a,
uint8_t* dst_b,
int dst_stride_b,
int width);
void TransposeUVWx16_MSA(const uint8_t* src,
int src_stride,
uint8_t* dst_a,
@ -186,6 +179,13 @@ void TransposeUVWx16_MSA(const uint8_t* src,
uint8_t* dst_b,
int dst_stride_b,
int width);
void TransposeUVWx16_LSX(const uint8_t* src,
int src_stride,
uint8_t* dst_a,
int dst_stride_a,
uint8_t* dst_b,
int dst_stride_b,
int width);
void TransposeUVWx8_Any_SSE2(const uint8_t* src,
int src_stride,
@ -201,13 +201,6 @@ void TransposeUVWx8_Any_NEON(const uint8_t* src,
uint8_t* dst_b,
int dst_stride_b,
int width);
void TransposeUVWx8_Any_MMI(const uint8_t* src,
int src_stride,
uint8_t* dst_a,
int dst_stride_a,
uint8_t* dst_b,
int dst_stride_b,
int width);
void TransposeUVWx16_Any_MSA(const uint8_t* src,
int src_stride,
uint8_t* dst_a,
@ -215,6 +208,13 @@ void TransposeUVWx16_Any_MSA(const uint8_t* src,
uint8_t* dst_b,
int dst_stride_b,
int width);
void TransposeUVWx16_Any_LSX(const uint8_t* src,
int src_stride,
uint8_t* dst_a,
int dst_stride_a,
uint8_t* dst_b,
int dst_stride_b,
int width);
#ifdef __cplusplus
} // extern "C"

File diff suppressed because it is too large Load Diff

View File

@ -195,6 +195,72 @@ int I444Scale_12(const uint16_t* src_y,
int dst_height,
enum FilterMode filtering);
// Scales a YUV 4:2:2 image from the src width and height to the
// dst width and height.
// If filtering is kFilterNone, a simple nearest-neighbor algorithm is
// used. This produces basic (blocky) quality at the fastest speed.
// If filtering is kFilterBilinear, interpolation is used to produce a better
// quality image, at the expense of speed.
// If filtering is kFilterBox, averaging is used to produce ever better
// quality image, at further expense of speed.
// Returns 0 if successful.
LIBYUV_API
int I422Scale(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
int src_width,
int src_height,
uint8_t* dst_y,
int dst_stride_y,
uint8_t* dst_u,
int dst_stride_u,
uint8_t* dst_v,
int dst_stride_v,
int dst_width,
int dst_height,
enum FilterMode filtering);
LIBYUV_API
int I422Scale_16(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_u,
int src_stride_u,
const uint16_t* src_v,
int src_stride_v,
int src_width,
int src_height,
uint16_t* dst_y,
int dst_stride_y,
uint16_t* dst_u,
int dst_stride_u,
uint16_t* dst_v,
int dst_stride_v,
int dst_width,
int dst_height,
enum FilterMode filtering);
LIBYUV_API
int I422Scale_12(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_u,
int src_stride_u,
const uint16_t* src_v,
int src_stride_v,
int src_width,
int src_height,
uint16_t* dst_y,
int dst_stride_y,
uint16_t* dst_u,
int dst_stride_u,
uint16_t* dst_v,
int dst_stride_v,
int dst_width,
int dst_height,
enum FilterMode filtering);
// Scales an NV12 image from the src width and height to the
// dst width and height.
// If filtering is kFilterNone, a simple nearest-neighbor algorithm is

View File

@ -0,0 +1,42 @@
/*
* Copyright 2022 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef INCLUDE_LIBYUV_SCALE_RGB_H_
#define INCLUDE_LIBYUV_SCALE_RGB_H_
#include "libyuv/basic_types.h"
#include "libyuv/scale.h" // For FilterMode
#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
// RGB can be RAW, RGB24 or YUV24
// RGB scales 24 bit images by converting a row at a time to ARGB
// and using ARGB row functions to scale, then convert to RGB.
// TODO(fbarchard): Allow input/output formats to be specified.
LIBYUV_API
int RGBScale(const uint8_t* src_rgb,
int src_stride_rgb,
int src_width,
int src_height,
uint8_t* dst_rgb,
int dst_stride_rgb,
int dst_width,
int dst_height,
enum FilterMode filtering);
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
#endif
#endif // INCLUDE_LIBYUV_SCALE_UV_H_

View File

@ -76,18 +76,18 @@ extern "C" {
// TODO(fbarchard): Port to Visual C
#if !defined(LIBYUV_DISABLE_X86) && (defined(__x86_64__) || defined(__i386__))
#define HAS_SCALEUVROWDOWN2BOX_SSSE3
#define HAS_SCALEROWUP2LINEAR_SSE2
#define HAS_SCALEROWUP2LINEAR_SSSE3
#define HAS_SCALEROWUP2BILINEAR_SSE2
#define HAS_SCALEROWUP2BILINEAR_SSSE3
#define HAS_SCALEROWUP2LINEAR_12_SSSE3
#define HAS_SCALEROWUP2BILINEAR_12_SSSE3
#define HAS_SCALEROWUP2LINEAR_16_SSE2
#define HAS_SCALEROWUP2BILINEAR_16_SSE2
#define HAS_SCALEUVROWUP2LINEAR_SSSE3
#define HAS_SCALEUVROWUP2BILINEAR_SSSE3
#define HAS_SCALEUVROWUP2LINEAR_16_SSE2
#define HAS_SCALEUVROWUP2BILINEAR_16_SSE2
#define HAS_SCALEROWUP2_LINEAR_SSE2
#define HAS_SCALEROWUP2_LINEAR_SSSE3
#define HAS_SCALEROWUP2_BILINEAR_SSE2
#define HAS_SCALEROWUP2_BILINEAR_SSSE3
#define HAS_SCALEROWUP2_LINEAR_12_SSSE3
#define HAS_SCALEROWUP2_BILINEAR_12_SSSE3
#define HAS_SCALEROWUP2_LINEAR_16_SSE2
#define HAS_SCALEROWUP2_BILINEAR_16_SSE2
#define HAS_SCALEUVROWUP2_LINEAR_SSSE3
#define HAS_SCALEUVROWUP2_BILINEAR_SSSE3
#define HAS_SCALEUVROWUP2_LINEAR_16_SSE41
#define HAS_SCALEUVROWUP2_BILINEAR_16_SSE41
#endif
// The following are available for gcc/clang x86 platforms, but
@ -97,16 +97,16 @@ extern "C" {
(defined(__x86_64__) || defined(__i386__)) && \
(defined(CLANG_HAS_AVX2) || defined(GCC_HAS_AVX2))
#define HAS_SCALEUVROWDOWN2BOX_AVX2
#define HAS_SCALEROWUP2LINEAR_AVX2
#define HAS_SCALEROWUP2BILINEAR_AVX2
#define HAS_SCALEROWUP2LINEAR_12_AVX2
#define HAS_SCALEROWUP2BILINEAR_12_AVX2
#define HAS_SCALEROWUP2LINEAR_16_AVX2
#define HAS_SCALEROWUP2BILINEAR_16_AVX2
#define HAS_SCALEUVROWUP2LINEAR_AVX2
#define HAS_SCALEUVROWUP2BILINEAR_AVX2
#define HAS_SCALEUVROWUP2LINEAR_16_AVX2
#define HAS_SCALEUVROWUP2BILINEAR_16_AVX2
#define HAS_SCALEROWUP2_LINEAR_AVX2
#define HAS_SCALEROWUP2_BILINEAR_AVX2
#define HAS_SCALEROWUP2_LINEAR_12_AVX2
#define HAS_SCALEROWUP2_BILINEAR_12_AVX2
#define HAS_SCALEROWUP2_LINEAR_16_AVX2
#define HAS_SCALEROWUP2_BILINEAR_16_AVX2
#define HAS_SCALEUVROWUP2_LINEAR_AVX2
#define HAS_SCALEUVROWUP2_BILINEAR_AVX2
#define HAS_SCALEUVROWUP2_LINEAR_16_AVX2
#define HAS_SCALEUVROWUP2_BILINEAR_16_AVX2
#endif
// The following are available on all x86 platforms, but
@ -135,16 +135,16 @@ extern "C" {
#define HAS_SCALEROWDOWN4_NEON
#define HAS_SCALEUVROWDOWN2BOX_NEON
#define HAS_SCALEUVROWDOWNEVEN_NEON
#define HAS_SCALEROWUP2LINEAR_NEON
#define HAS_SCALEROWUP2BILINEAR_NEON
#define HAS_SCALEROWUP2LINEAR_12_NEON
#define HAS_SCALEROWUP2BILINEAR_12_NEON
#define HAS_SCALEROWUP2LINEAR_16_NEON
#define HAS_SCALEROWUP2BILINEAR_16_NEON
#define HAS_SCALEUVROWUP2LINEAR_NEON
#define HAS_SCALEUVROWUP2BILINEAR_NEON
#define HAS_SCALEUVROWUP2LINEAR_16_NEON
#define HAS_SCALEUVROWUP2BILINEAR_16_NEON
#define HAS_SCALEROWUP2_LINEAR_NEON
#define HAS_SCALEROWUP2_BILINEAR_NEON
#define HAS_SCALEROWUP2_LINEAR_12_NEON
#define HAS_SCALEROWUP2_BILINEAR_12_NEON
#define HAS_SCALEROWUP2_LINEAR_16_NEON
#define HAS_SCALEROWUP2_BILINEAR_16_NEON
#define HAS_SCALEUVROWUP2_LINEAR_NEON
#define HAS_SCALEUVROWUP2_BILINEAR_NEON
#define HAS_SCALEUVROWUP2_LINEAR_16_NEON
#define HAS_SCALEUVROWUP2_BILINEAR_16_NEON
#endif
#if !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa)
@ -160,22 +160,17 @@ extern "C" {
#define HAS_SCALEROWDOWN4_MSA
#endif
#if !defined(LIBYUV_DISABLE_MMI) && defined(_MIPS_ARCH_LOONGSON3A)
#define HAS_FIXEDDIV1_MIPS
#define HAS_FIXEDDIV_MIPS
#define HAS_SCALEADDROW_16_MMI
#define HAS_SCALEADDROW_MMI
#define HAS_SCALEARGBCOLS_MMI
#define HAS_SCALEARGBCOLSUP2_MMI
#define HAS_SCALEARGBROWDOWN2_MMI
#define HAS_SCALEARGBROWDOWNEVEN_MMI
#define HAS_SCALECOLS_16_MMI
#define HAS_SCALECOLS_MMI
#define HAS_SCALEROWDOWN2_16_MMI
#define HAS_SCALEROWDOWN2_MMI
#define HAS_SCALEROWDOWN4_16_MMI
#define HAS_SCALEROWDOWN4_MMI
#define HAS_SCALEROWDOWN34_MMI
#if !defined(LIBYUV_DISABLE_LSX) && defined(__loongarch_sx)
#define HAS_SCALEARGBROWDOWN2_LSX
#define HAS_SCALEARGBROWDOWNEVEN_LSX
#define HAS_SCALEROWDOWN2_LSX
#define HAS_SCALEROWDOWN4_LSX
#define HAS_SCALEROWDOWN38_LSX
#define HAS_SCALEFILTERCOLS_LSX
#define HAS_SCALEADDROW_LSX
#define HAS_SCALEARGBCOLS_LSX
#define HAS_SCALEARGBFILTERCOLS_LSX
#define HAS_SCALEROWDOWN34_LSX
#endif
// Scale ARGB vertically with bilinear interpolation.
@ -205,6 +200,20 @@ void ScalePlaneVertical_16(int src_height,
int wpp,
enum FilterMode filtering);
void ScalePlaneVertical_16To8(int src_height,
int dst_width,
int dst_height,
int src_stride,
int dst_stride,
const uint16_t* src_argb,
uint8_t* dst_argb,
int x,
int y,
int dy,
int wpp,
int scale,
enum FilterMode filtering);
// Simplify the filtering based on scale factors.
enum FilterMode ScaleFilterReduce(int src_width,
int src_height,
@ -683,11 +692,11 @@ void ScaleRowUp2_Bilinear_12_Any_SSSE3(const uint16_t* src_ptr,
void ScaleRowUp2_Linear_16_Any_SSE2(const uint16_t* src_ptr,
uint16_t* dst_ptr,
int dst_width);
void ScaleRowUp2_Bilinear_16_Any_SSSE3(const uint16_t* src_ptr,
ptrdiff_t src_stride,
uint16_t* dst_ptr,
ptrdiff_t dst_stride,
int dst_width);
void ScaleRowUp2_Bilinear_16_Any_SSE2(const uint16_t* src_ptr,
ptrdiff_t src_stride,
uint16_t* dst_ptr,
ptrdiff_t dst_stride,
int dst_width);
void ScaleRowUp2_Linear_Any_SSSE3(const uint8_t* src_ptr,
uint8_t* dst_ptr,
int dst_width);
@ -871,16 +880,6 @@ void ScaleARGBCols_Any_MSA(uint8_t* dst_ptr,
int dst_width,
int x,
int dx);
void ScaleARGBCols_MMI(uint8_t* dst_argb,
const uint8_t* src_argb,
int dst_width,
int x,
int dx);
void ScaleARGBCols_Any_MMI(uint8_t* dst_ptr,
const uint8_t* src_ptr,
int dst_width,
int x,
int dx);
// ARGB Row functions
void ScaleARGBRowDown2_SSE2(const uint8_t* src_argb,
@ -919,15 +918,15 @@ void ScaleARGBRowDown2Box_MSA(const uint8_t* src_argb,
ptrdiff_t src_stride,
uint8_t* dst_argb,
int dst_width);
void ScaleARGBRowDown2_MMI(const uint8_t* src_argb,
void ScaleARGBRowDown2_LSX(const uint8_t* src_argb,
ptrdiff_t src_stride,
uint8_t* dst_argb,
int dst_width);
void ScaleARGBRowDown2Linear_MMI(const uint8_t* src_argb,
void ScaleARGBRowDown2Linear_LSX(const uint8_t* src_argb,
ptrdiff_t src_stride,
uint8_t* dst_argb,
int dst_width);
void ScaleARGBRowDown2Box_MMI(const uint8_t* src_argb,
void ScaleARGBRowDown2Box_LSX(const uint8_t* src_argb,
ptrdiff_t src_stride,
uint8_t* dst_argb,
int dst_width);
@ -967,15 +966,15 @@ void ScaleARGBRowDown2Box_Any_MSA(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
int dst_width);
void ScaleARGBRowDown2_Any_MMI(const uint8_t* src_ptr,
void ScaleARGBRowDown2_Any_LSX(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
int dst_width);
void ScaleARGBRowDown2Linear_Any_MMI(const uint8_t* src_ptr,
void ScaleARGBRowDown2Linear_Any_LSX(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
int dst_width);
void ScaleARGBRowDown2Box_Any_MMI(const uint8_t* src_ptr,
void ScaleARGBRowDown2Box_Any_LSX(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
int dst_width);
@ -1009,12 +1008,12 @@ void ScaleARGBRowDownEvenBox_MSA(const uint8_t* src_argb,
int src_stepx,
uint8_t* dst_argb,
int dst_width);
void ScaleARGBRowDownEven_MMI(const uint8_t* src_argb,
void ScaleARGBRowDownEven_LSX(const uint8_t* src_argb,
ptrdiff_t src_stride,
int32_t src_stepx,
uint8_t* dst_argb,
int dst_width);
void ScaleARGBRowDownEvenBox_MMI(const uint8_t* src_argb,
void ScaleARGBRowDownEvenBox_LSX(const uint8_t* src_argb,
ptrdiff_t src_stride,
int src_stepx,
uint8_t* dst_argb,
@ -1049,12 +1048,12 @@ void ScaleARGBRowDownEvenBox_Any_MSA(const uint8_t* src_ptr,
int src_stepx,
uint8_t* dst_ptr,
int dst_width);
void ScaleARGBRowDownEven_Any_MMI(const uint8_t* src_ptr,
void ScaleARGBRowDownEven_Any_LSX(const uint8_t* src_ptr,
ptrdiff_t src_stride,
int32_t src_stepx,
uint8_t* dst_ptr,
int dst_width);
void ScaleARGBRowDownEvenBox_Any_MMI(const uint8_t* src_ptr,
void ScaleARGBRowDownEvenBox_Any_LSX(const uint8_t* src_ptr,
ptrdiff_t src_stride,
int src_stepx,
uint8_t* dst_ptr,
@ -1101,18 +1100,6 @@ void ScaleUVRowDown2Box_MSA(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_uv,
int dst_width);
void ScaleUVRowDown2_MMI(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_uv,
int dst_width);
void ScaleUVRowDown2Linear_MMI(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_uv,
int dst_width);
void ScaleUVRowDown2Box_MMI(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_uv,
int dst_width);
void ScaleUVRowDown2_Any_SSSE3(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
@ -1153,18 +1140,6 @@ void ScaleUVRowDown2Box_Any_MSA(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
int dst_width);
void ScaleUVRowDown2_Any_MMI(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
int dst_width);
void ScaleUVRowDown2Linear_Any_MMI(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
int dst_width);
void ScaleUVRowDown2Box_Any_MMI(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
int dst_width);
void ScaleUVRowDownEven_SSSE3(const uint8_t* src_ptr,
ptrdiff_t src_stride,
int src_stepx,
@ -1195,16 +1170,6 @@ void ScaleUVRowDownEvenBox_MSA(const uint8_t* src_ptr,
int src_stepx,
uint8_t* dst_uv,
int dst_width);
void ScaleUVRowDownEven_MMI(const uint8_t* src_ptr,
ptrdiff_t src_stride,
int32_t src_stepx,
uint8_t* dst_uv,
int dst_width);
void ScaleUVRowDownEvenBox_MMI(const uint8_t* src_ptr,
ptrdiff_t src_stride,
int src_stepx,
uint8_t* dst_uv,
int dst_width);
void ScaleUVRowDownEven_Any_SSSE3(const uint8_t* src_ptr,
ptrdiff_t src_stride,
int src_stepx,
@ -1235,16 +1200,6 @@ void ScaleUVRowDownEvenBox_Any_MSA(const uint8_t* src_ptr,
int src_stepx,
uint8_t* dst_ptr,
int dst_width);
void ScaleUVRowDownEven_Any_MMI(const uint8_t* src_ptr,
ptrdiff_t src_stride,
int32_t src_stepx,
uint8_t* dst_ptr,
int dst_width);
void ScaleUVRowDownEvenBox_Any_MMI(const uint8_t* src_ptr,
ptrdiff_t src_stride,
int src_stepx,
uint8_t* dst_ptr,
int dst_width);
void ScaleUVRowUp2_Linear_SSSE3(const uint8_t* src_ptr,
uint8_t* dst_ptr,
@ -1294,22 +1249,22 @@ void ScaleUVRowUp2_Bilinear_Any_NEON(const uint8_t* src_ptr,
uint8_t* dst_ptr,
ptrdiff_t dst_stride,
int dst_width);
void ScaleUVRowUp2_Linear_16_SSE2(const uint16_t* src_ptr,
uint16_t* dst_ptr,
int dst_width);
void ScaleUVRowUp2_Bilinear_16_SSE2(const uint16_t* src_ptr,
ptrdiff_t src_stride,
uint16_t* dst_ptr,
ptrdiff_t dst_stride,
int dst_width);
void ScaleUVRowUp2_Linear_16_Any_SSE2(const uint16_t* src_ptr,
uint16_t* dst_ptr,
int dst_width);
void ScaleUVRowUp2_Bilinear_16_Any_SSE2(const uint16_t* src_ptr,
ptrdiff_t src_stride,
uint16_t* dst_ptr,
ptrdiff_t dst_stride,
int dst_width);
void ScaleUVRowUp2_Linear_16_SSE41(const uint16_t* src_ptr,
uint16_t* dst_ptr,
int dst_width);
void ScaleUVRowUp2_Bilinear_16_SSE41(const uint16_t* src_ptr,
ptrdiff_t src_stride,
uint16_t* dst_ptr,
ptrdiff_t dst_stride,
int dst_width);
void ScaleUVRowUp2_Linear_16_Any_SSE41(const uint16_t* src_ptr,
uint16_t* dst_ptr,
int dst_width);
void ScaleUVRowUp2_Bilinear_16_Any_SSE41(const uint16_t* src_ptr,
ptrdiff_t src_stride,
uint16_t* dst_ptr,
ptrdiff_t dst_stride,
int dst_width);
void ScaleUVRowUp2_Linear_16_AVX2(const uint16_t* src_ptr,
uint16_t* dst_ptr,
int dst_width);
@ -1561,10 +1516,6 @@ void ScaleRowDown34_MSA(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst,
int dst_width);
void ScaleRowDown34_MMI(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst,
int dst_width);
void ScaleRowDown34_0_Box_MSA(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* d,
@ -1618,10 +1569,6 @@ void ScaleRowDown34_Any_MSA(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
int dst_width);
void ScaleRowDown34_Any_MMI(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
int dst_width);
void ScaleRowDown34_0_Box_Any_MSA(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
@ -1631,93 +1578,129 @@ void ScaleRowDown34_1_Box_Any_MSA(const uint8_t* src_ptr,
uint8_t* dst_ptr,
int dst_width);
void ScaleRowDown2_MMI(const uint8_t* src_ptr,
void ScaleRowDown2_LSX(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst,
int dst_width);
void ScaleRowDown2_16_MMI(const uint16_t* src_ptr,
ptrdiff_t src_stride,
uint16_t* dst,
int dst_width);
void ScaleRowDown2Linear_MMI(const uint8_t* src_ptr,
void ScaleRowDown2Linear_LSX(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst,
int dst_width);
void ScaleRowDown2Linear_16_MMI(const uint16_t* src_ptr,
ptrdiff_t src_stride,
uint16_t* dst,
int dst_width);
void ScaleRowDown2Box_MMI(const uint8_t* src_ptr,
void ScaleRowDown2Box_LSX(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst,
int dst_width);
void ScaleRowDown2Box_16_MMI(const uint16_t* src_ptr,
ptrdiff_t src_stride,
uint16_t* dst,
int dst_width);
void ScaleRowDown2Box_Odd_MMI(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst,
int dst_width);
void ScaleRowDown4_MMI(const uint8_t* src_ptr,
void ScaleRowDown4_LSX(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst,
int dst_width);
void ScaleRowDown4_16_MMI(const uint16_t* src_ptr,
ptrdiff_t src_stride,
uint16_t* dst,
int dst_width);
void ScaleRowDown4Box_MMI(const uint8_t* src_ptr,
void ScaleRowDown4Box_LSX(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst,
int dst_width);
void ScaleRowDown4Box_16_MMI(const uint16_t* src_ptr,
ptrdiff_t src_stride,
uint16_t* dst,
int dst_width);
void ScaleAddRow_MMI(const uint8_t* src_ptr, uint16_t* dst_ptr, int src_width);
void ScaleAddRow_16_MMI(const uint16_t* src_ptr,
uint32_t* dst_ptr,
int src_width);
void ScaleColsUp2_MMI(uint8_t* dst_ptr,
const uint8_t* src_ptr,
int dst_width,
int x,
int dx);
void ScaleColsUp2_16_MMI(uint16_t* dst_ptr,
const uint16_t* src_ptr,
void ScaleRowDown38_LSX(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst,
int dst_width);
void ScaleRowDown38_2_Box_LSX(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
int dst_width);
void ScaleRowDown38_3_Box_LSX(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
int dst_width);
void ScaleAddRow_LSX(const uint8_t* src_ptr, uint16_t* dst_ptr, int src_width);
void ScaleFilterCols_LSX(uint8_t* dst_ptr,
const uint8_t* src_ptr,
int dst_width,
int x,
int dx);
void ScaleARGBColsUp2_MMI(uint8_t* dst_argb,
const uint8_t* src_argb,
int dst_width,
int x,
int dx);
void ScaleRowDown2_Any_MMI(const uint8_t* src_ptr,
void ScaleARGBFilterCols_LSX(uint8_t* dst_argb,
const uint8_t* src_argb,
int dst_width,
int x,
int dx);
void ScaleARGBCols_LSX(uint8_t* dst_argb,
const uint8_t* src_argb,
int dst_width,
int x,
int dx);
void ScaleRowDown34_LSX(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst,
int dst_width);
void ScaleRowDown34_0_Box_LSX(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* d,
int dst_width);
void ScaleRowDown34_1_Box_LSX(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* d,
int dst_width);
void ScaleRowDown2_Any_LSX(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
int dst_width);
void ScaleRowDown2Linear_Any_MMI(const uint8_t* src_ptr,
void ScaleRowDown2Linear_Any_LSX(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
int dst_width);
void ScaleRowDown2Box_Any_MMI(const uint8_t* src_ptr,
void ScaleRowDown2Box_Any_LSX(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
int dst_width);
void ScaleRowDown4_Any_MMI(const uint8_t* src_ptr,
void ScaleRowDown4_Any_LSX(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
int dst_width);
void ScaleRowDown4Box_Any_MMI(const uint8_t* src_ptr,
void ScaleRowDown4Box_Any_LSX(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
int dst_width);
void ScaleAddRow_Any_MMI(const uint8_t* src_ptr,
void ScaleRowDown38_Any_LSX(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
int dst_width);
void ScaleRowDown38_2_Box_Any_LSX(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
int dst_width);
void ScaleRowDown38_3_Box_Any_LSX(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
int dst_width);
void ScaleAddRow_Any_LSX(const uint8_t* src_ptr,
uint16_t* dst_ptr,
int src_width);
void ScaleFilterCols_Any_LSX(uint8_t* dst_ptr,
const uint8_t* src_ptr,
int dst_width,
int x,
int dx);
void ScaleARGBCols_Any_LSX(uint8_t* dst_ptr,
const uint8_t* src_ptr,
int dst_width,
int x,
int dx);
void ScaleARGBFilterCols_Any_LSX(uint8_t* dst_ptr,
const uint8_t* src_ptr,
int dst_width,
int x,
int dx);
void ScaleRowDown34_Any_LSX(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
int dst_width);
void ScaleRowDown34_0_Box_Any_LSX(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
int dst_width);
void ScaleRowDown34_1_Box_Any_LSX(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
int dst_width);
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv

View File

@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_
#define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 1789
#define LIBYUV_VERSION 1850
#endif // INCLUDE_LIBYUV_VERSION_H_

View File

@ -0,0 +1,3 @@
fbarchard@chromium.org
mbonadei@chromium.org
jansson@google.com

View File

@ -0,0 +1,13 @@
# Copyright 2018 The PDFium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
USE_PYTHON3 = True
def CheckChangeOnUpload(input_api, output_api):
return input_api.canned_checks.CheckChangedLUCIConfigs(input_api, output_api)
def CheckChangeOnCommit(input_api, output_api):
return input_api.canned_checks.CheckChangedLUCIConfigs(input_api, output_api)

View File

@ -0,0 +1,2 @@
This folder contains libyuv project-wide configurations
for chrome-infra services.

View File

@ -0,0 +1,6 @@
# This file is used by gcl and git-cl to get repository specific information.
CODE_REVIEW_SERVER: codereview.chromium.org
PROJECT: libyuv
GERRIT_HOST: True
VIEW_VC: https://chromium.googlesource.com/libyuv/libyuv/+/

View File

@ -0,0 +1,143 @@
# Auto-generated by lucicfg.
# Do not modify manually.
#
# For the schema of this file, see Config message:
# https://luci-config.appspot.com/schemas/projects:commit-queue.cfg
cq_status_host: "chromium-cq-status.appspot.com"
submit_options {
max_burst: 4
burst_delay {
seconds: 480
}
}
config_groups {
name: "config"
gerrit {
url: "https://chromium-review.googlesource.com"
projects {
name: "libyuv/libyuv"
ref_regexp: "refs/heads/infra/config"
}
}
verifiers {
gerrit_cq_ability {
committer_list: "project-libyuv-committers"
dry_run_access_list: "project-libyuv-tryjob-access"
}
tryjob {
builders {
name: "libyuv/try/presubmit"
}
retry_config {
single_quota: 1
global_quota: 2
failure_weight: 1
transient_failure_weight: 1
timeout_weight: 2
}
}
}
}
config_groups {
name: "master"
gerrit {
url: "https://chromium-review.googlesource.com"
projects {
name: "libyuv/libyuv"
ref_regexp: "refs/heads/main"
ref_regexp: "refs/heads/master"
}
}
verifiers {
gerrit_cq_ability {
committer_list: "project-libyuv-committers"
dry_run_access_list: "project-libyuv-tryjob-access"
}
tryjob {
builders {
name: "libyuv/try/android"
experiment_percentage: 100
}
builders {
name: "libyuv/try/android_arm64"
experiment_percentage: 100
}
builders {
name: "libyuv/try/android_rel"
experiment_percentage: 100
}
builders {
name: "libyuv/try/android_x64"
}
builders {
name: "libyuv/try/android_x86"
}
builders {
name: "libyuv/try/ios_arm64"
}
builders {
name: "libyuv/try/ios_arm64_rel"
}
builders {
name: "libyuv/try/linux"
}
builders {
name: "libyuv/try/linux_asan"
}
builders {
name: "libyuv/try/linux_gcc"
experiment_percentage: 100
}
builders {
name: "libyuv/try/linux_msan"
}
builders {
name: "libyuv/try/linux_rel"
}
builders {
name: "libyuv/try/linux_tsan2"
}
builders {
name: "libyuv/try/linux_ubsan"
}
builders {
name: "libyuv/try/linux_ubsan_vptr"
}
builders {
name: "libyuv/try/mac"
}
builders {
name: "libyuv/try/mac_asan"
}
builders {
name: "libyuv/try/mac_rel"
}
builders {
name: "libyuv/try/win"
}
builders {
name: "libyuv/try/win_clang"
}
builders {
name: "libyuv/try/win_clang_rel"
}
builders {
name: "libyuv/try/win_rel"
}
builders {
name: "libyuv/try/win_x64_clang_rel"
}
builders {
name: "libyuv/try/win_x64_rel"
}
retry_config {
single_quota: 1
global_quota: 2
failure_weight: 1
transient_failure_weight: 1
timeout_weight: 2
}
}
}
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,9 @@
# Auto-generated by lucicfg.
# Do not modify manually.
#
# For the schema of this file, see ProjectConfig message:
# https://luci-config.appspot.com/schemas/projects:luci-logdog.cfg
reader_auth_groups: "all"
writer_auth_groups: "luci-logdog-chromium-writers"
archive_gs_bucket: "chromium-luci-logdog"

View File

@ -0,0 +1,246 @@
# Auto-generated by lucicfg.
# Do not modify manually.
#
# For the schema of this file, see Project message:
# https://luci-config.appspot.com/schemas/projects:luci-milo.cfg
consoles {
id: "main"
name: "libyuv Main Console"
repo_url: "https://chromium.googlesource.com/libyuv/libyuv"
refs: "regexp:refs/heads/main"
manifest_name: "REVISION"
builders {
name: "buildbucket/luci.libyuv.ci/Android ARM64 Debug"
category: "Android|Builder"
short_name: "dbg"
}
builders {
name: "buildbucket/luci.libyuv.ci/Android Debug"
category: "Android|Builder"
short_name: "dbg"
}
builders {
name: "buildbucket/luci.libyuv.ci/Android Release"
category: "Android|Builder"
short_name: "rel"
}
builders {
name: "buildbucket/luci.libyuv.ci/Android32 x86 Debug"
category: "Android|Builder|x86"
short_name: "dbg"
}
builders {
name: "buildbucket/luci.libyuv.ci/Android64 x64 Debug"
category: "Android|Builder|x64"
short_name: "dbg"
}
builders {
name: "buildbucket/luci.libyuv.ci/Android Tester ARM32 Debug (Nexus 5X)"
category: "Android|Tester|ARM 32"
short_name: "dbg"
}
builders {
name: "buildbucket/luci.libyuv.ci/Android Tester ARM32 Release (Nexus 5X)"
category: "Android|Tester|ARM 32"
short_name: "rel"
}
builders {
name: "buildbucket/luci.libyuv.ci/Android Tester ARM64 Debug (Nexus 5X)"
category: "Android|Tester|ARM 64"
short_name: "dbg"
}
builders {
name: "buildbucket/luci.libyuv.ci/Linux Asan"
category: "Linux"
short_name: "asan"
}
builders {
name: "buildbucket/luci.libyuv.ci/Linux MSan"
category: "Linux"
short_name: "msan"
}
builders {
name: "buildbucket/luci.libyuv.ci/Linux Tsan v2"
category: "Linux"
short_name: "tsan"
}
builders {
name: "buildbucket/luci.libyuv.ci/Linux UBSan"
category: "Linux|UBSan"
}
builders {
name: "buildbucket/luci.libyuv.ci/Linux UBSan vptr"
category: "Linux|UBSan"
short_name: "vptr"
}
builders {
name: "buildbucket/luci.libyuv.ci/Linux32 Debug"
category: "Linux|32"
short_name: "dbg"
}
builders {
name: "buildbucket/luci.libyuv.ci/Linux32 Release"
category: "Linux|32"
short_name: "rel"
}
builders {
name: "buildbucket/luci.libyuv.ci/Linux64 Debug"
category: "Linux|64"
short_name: "dbg"
}
builders {
name: "buildbucket/luci.libyuv.ci/Linux64 Release"
category: "Linux|64"
short_name: "rel"
}
builders {
name: "buildbucket/luci.libyuv.ci/Mac Asan"
category: "Mac"
short_name: "asan"
}
builders {
name: "buildbucket/luci.libyuv.ci/Mac64 Debug"
category: "Mac"
short_name: "dbg"
}
builders {
name: "buildbucket/luci.libyuv.ci/Mac64 Release"
category: "Mac"
short_name: "rel"
}
builders {
name: "buildbucket/luci.libyuv.ci/Win32 Debug"
category: "Win|32|Debug"
}
builders {
name: "buildbucket/luci.libyuv.ci/Win32 Debug (Clang)"
category: "Win|32|Debug"
short_name: "clg"
}
builders {
name: "buildbucket/luci.libyuv.ci/Win32 Release"
category: "Win|32|Release"
}
builders {
name: "buildbucket/luci.libyuv.ci/Win32 Release (Clang)"
category: "Win|32|Release"
short_name: "clg"
}
builders {
name: "buildbucket/luci.libyuv.ci/Win64 Debug"
category: "Win|64|Debug"
short_name: "clg"
}
builders {
name: "buildbucket/luci.libyuv.ci/Win64 Debug (Clang)"
category: "Win|64|Debug"
short_name: "clg"
}
builders {
name: "buildbucket/luci.libyuv.ci/Win64 Release"
category: "Win|64|Release"
}
builders {
name: "buildbucket/luci.libyuv.ci/Win64 Release (Clang)"
category: "Win|64|Release"
short_name: "clg"
}
builders {
name: "buildbucket/luci.libyuv.ci/iOS ARM64 Debug"
category: "iOS|ARM64"
short_name: "dbg"
}
builders {
name: "buildbucket/luci.libyuv.ci/iOS ARM64 Release"
category: "iOS|ARM64"
short_name: "rel"
}
include_experimental_builds: true
}
consoles {
id: "cron"
name: "Cron"
builders {
name: "buildbucket/luci.libyuv.cron/DEPS Autoroller"
}
builder_view_only: true
}
consoles {
id: "try"
name: "libyuv Try Builders"
builders {
name: "buildbucket/luci.libyuv.try/android"
}
builders {
name: "buildbucket/luci.libyuv.try/android_arm64"
}
builders {
name: "buildbucket/luci.libyuv.try/android_rel"
}
builders {
name: "buildbucket/luci.libyuv.try/android_x64"
}
builders {
name: "buildbucket/luci.libyuv.try/android_x86"
}
builders {
name: "buildbucket/luci.libyuv.try/ios_arm64"
}
builders {
name: "buildbucket/luci.libyuv.try/ios_arm64_rel"
}
builders {
name: "buildbucket/luci.libyuv.try/linux"
}
builders {
name: "buildbucket/luci.libyuv.try/linux_asan"
}
builders {
name: "buildbucket/luci.libyuv.try/linux_gcc"
}
builders {
name: "buildbucket/luci.libyuv.try/linux_msan"
}
builders {
name: "buildbucket/luci.libyuv.try/linux_rel"
}
builders {
name: "buildbucket/luci.libyuv.try/linux_tsan2"
}
builders {
name: "buildbucket/luci.libyuv.try/linux_ubsan"
}
builders {
name: "buildbucket/luci.libyuv.try/linux_ubsan_vptr"
}
builders {
name: "buildbucket/luci.libyuv.try/mac"
}
builders {
name: "buildbucket/luci.libyuv.try/mac_asan"
}
builders {
name: "buildbucket/luci.libyuv.try/mac_rel"
}
builders {
name: "buildbucket/luci.libyuv.try/win"
}
builders {
name: "buildbucket/luci.libyuv.try/win_clang"
}
builders {
name: "buildbucket/luci.libyuv.try/win_clang_rel"
}
builders {
name: "buildbucket/luci.libyuv.try/win_rel"
}
builders {
name: "buildbucket/luci.libyuv.try/win_x64_clang_rel"
}
builders {
name: "buildbucket/luci.libyuv.try/win_x64_rel"
}
builder_view_only: true
}
logo_url: "https://storage.googleapis.com/chrome-infra-public/logo/libyuv-logo.png"

View File

@ -0,0 +1,385 @@
# Auto-generated by lucicfg.
# Do not modify manually.
#
# For the schema of this file, see ProjectConfig message:
# https://luci-config.appspot.com/schemas/projects:luci-scheduler.cfg
job {
id: "Android ARM64 Debug"
realm: "ci"
acl_sets: "ci"
buildbucket {
server: "cr-buildbucket.appspot.com"
bucket: "ci"
builder: "Android ARM64 Debug"
}
}
job {
id: "Android Debug"
realm: "ci"
acl_sets: "ci"
buildbucket {
server: "cr-buildbucket.appspot.com"
bucket: "ci"
builder: "Android Debug"
}
}
job {
id: "Android Release"
realm: "ci"
acl_sets: "ci"
buildbucket {
server: "cr-buildbucket.appspot.com"
bucket: "ci"
builder: "Android Release"
}
}
job {
id: "Android Tester ARM32 Debug (Nexus 5X)"
realm: "ci"
acls {
role: TRIGGERER
granted_to: "libyuv-ci-builder@chops-service-accounts.iam.gserviceaccount.com"
}
acl_sets: "ci"
buildbucket {
server: "cr-buildbucket.appspot.com"
bucket: "ci"
builder: "Android Tester ARM32 Debug (Nexus 5X)"
}
}
job {
id: "Android Tester ARM32 Release (Nexus 5X)"
realm: "ci"
acls {
role: TRIGGERER
granted_to: "libyuv-ci-builder@chops-service-accounts.iam.gserviceaccount.com"
}
acl_sets: "ci"
buildbucket {
server: "cr-buildbucket.appspot.com"
bucket: "ci"
builder: "Android Tester ARM32 Release (Nexus 5X)"
}
}
job {
id: "Android Tester ARM64 Debug (Nexus 5X)"
realm: "ci"
acls {
role: TRIGGERER
granted_to: "libyuv-ci-builder@chops-service-accounts.iam.gserviceaccount.com"
}
acl_sets: "ci"
buildbucket {
server: "cr-buildbucket.appspot.com"
bucket: "ci"
builder: "Android Tester ARM64 Debug (Nexus 5X)"
}
}
job {
id: "Android32 x86 Debug"
realm: "ci"
acl_sets: "ci"
buildbucket {
server: "cr-buildbucket.appspot.com"
bucket: "ci"
builder: "Android32 x86 Debug"
}
}
job {
id: "Android64 x64 Debug"
realm: "ci"
acl_sets: "ci"
buildbucket {
server: "cr-buildbucket.appspot.com"
bucket: "ci"
builder: "Android64 x64 Debug"
}
}
job {
id: "DEPS Autoroller"
realm: "cron"
schedule: "0 14 * * *"
acl_sets: "cron"
buildbucket {
server: "cr-buildbucket.appspot.com"
bucket: "cron"
builder: "DEPS Autoroller"
}
}
job {
id: "Linux Asan"
realm: "ci"
acl_sets: "ci"
buildbucket {
server: "cr-buildbucket.appspot.com"
bucket: "ci"
builder: "Linux Asan"
}
}
job {
id: "Linux MSan"
realm: "ci"
acl_sets: "ci"
buildbucket {
server: "cr-buildbucket.appspot.com"
bucket: "ci"
builder: "Linux MSan"
}
}
job {
id: "Linux Tsan v2"
realm: "ci"
acl_sets: "ci"
buildbucket {
server: "cr-buildbucket.appspot.com"
bucket: "ci"
builder: "Linux Tsan v2"
}
}
job {
id: "Linux UBSan"
realm: "ci"
acl_sets: "ci"
buildbucket {
server: "cr-buildbucket.appspot.com"
bucket: "ci"
builder: "Linux UBSan"
}
}
job {
id: "Linux UBSan vptr"
realm: "ci"
acl_sets: "ci"
buildbucket {
server: "cr-buildbucket.appspot.com"
bucket: "ci"
builder: "Linux UBSan vptr"
}
}
job {
id: "Linux32 Debug"
realm: "ci"
acl_sets: "ci"
buildbucket {
server: "cr-buildbucket.appspot.com"
bucket: "ci"
builder: "Linux32 Debug"
}
}
job {
id: "Linux32 Release"
realm: "ci"
acl_sets: "ci"
buildbucket {
server: "cr-buildbucket.appspot.com"
bucket: "ci"
builder: "Linux32 Release"
}
}
job {
id: "Linux64 Debug"
realm: "ci"
acl_sets: "ci"
buildbucket {
server: "cr-buildbucket.appspot.com"
bucket: "ci"
builder: "Linux64 Debug"
}
}
job {
id: "Linux64 Release"
realm: "ci"
acl_sets: "ci"
buildbucket {
server: "cr-buildbucket.appspot.com"
bucket: "ci"
builder: "Linux64 Release"
}
}
job {
id: "Mac Asan"
realm: "ci"
acl_sets: "ci"
buildbucket {
server: "cr-buildbucket.appspot.com"
bucket: "ci"
builder: "Mac Asan"
}
}
job {
id: "Mac64 Debug"
realm: "ci"
acl_sets: "ci"
buildbucket {
server: "cr-buildbucket.appspot.com"
bucket: "ci"
builder: "Mac64 Debug"
}
}
job {
id: "Mac64 Release"
realm: "ci"
acl_sets: "ci"
buildbucket {
server: "cr-buildbucket.appspot.com"
bucket: "ci"
builder: "Mac64 Release"
}
}
job {
id: "Win32 Debug"
realm: "ci"
acl_sets: "ci"
buildbucket {
server: "cr-buildbucket.appspot.com"
bucket: "ci"
builder: "Win32 Debug"
}
}
job {
id: "Win32 Debug (Clang)"
realm: "ci"
acl_sets: "ci"
buildbucket {
server: "cr-buildbucket.appspot.com"
bucket: "ci"
builder: "Win32 Debug (Clang)"
}
}
job {
id: "Win32 Release"
realm: "ci"
acl_sets: "ci"
buildbucket {
server: "cr-buildbucket.appspot.com"
bucket: "ci"
builder: "Win32 Release"
}
}
job {
id: "Win32 Release (Clang)"
realm: "ci"
acl_sets: "ci"
buildbucket {
server: "cr-buildbucket.appspot.com"
bucket: "ci"
builder: "Win32 Release (Clang)"
}
}
job {
id: "Win64 Debug"
realm: "ci"
acl_sets: "ci"
buildbucket {
server: "cr-buildbucket.appspot.com"
bucket: "ci"
builder: "Win64 Debug"
}
}
job {
id: "Win64 Debug (Clang)"
realm: "ci"
acl_sets: "ci"
buildbucket {
server: "cr-buildbucket.appspot.com"
bucket: "ci"
builder: "Win64 Debug (Clang)"
}
}
job {
id: "Win64 Release"
realm: "ci"
acl_sets: "ci"
buildbucket {
server: "cr-buildbucket.appspot.com"
bucket: "ci"
builder: "Win64 Release"
}
}
job {
id: "Win64 Release (Clang)"
realm: "ci"
acl_sets: "ci"
buildbucket {
server: "cr-buildbucket.appspot.com"
bucket: "ci"
builder: "Win64 Release (Clang)"
}
}
job {
id: "iOS ARM64 Debug"
realm: "ci"
acl_sets: "ci"
buildbucket {
server: "cr-buildbucket.appspot.com"
bucket: "ci"
builder: "iOS ARM64 Debug"
}
}
job {
id: "iOS ARM64 Release"
realm: "ci"
acl_sets: "ci"
buildbucket {
server: "cr-buildbucket.appspot.com"
bucket: "ci"
builder: "iOS ARM64 Release"
}
}
trigger {
id: "master-gitiles-trigger"
realm: "ci"
acl_sets: "ci"
triggers: "Android ARM64 Debug"
triggers: "Android Debug"
triggers: "Android Release"
triggers: "Android32 x86 Debug"
triggers: "Android64 x64 Debug"
triggers: "Linux Asan"
triggers: "Linux MSan"
triggers: "Linux Tsan v2"
triggers: "Linux UBSan"
triggers: "Linux UBSan vptr"
triggers: "Linux32 Debug"
triggers: "Linux32 Release"
triggers: "Linux64 Debug"
triggers: "Linux64 Release"
triggers: "Mac Asan"
triggers: "Mac64 Debug"
triggers: "Mac64 Release"
triggers: "Win32 Debug"
triggers: "Win32 Debug (Clang)"
triggers: "Win32 Release"
triggers: "Win32 Release (Clang)"
triggers: "Win64 Debug"
triggers: "Win64 Debug (Clang)"
triggers: "Win64 Release"
triggers: "Win64 Release (Clang)"
triggers: "iOS ARM64 Debug"
triggers: "iOS ARM64 Release"
gitiles {
repo: "https://chromium.googlesource.com/libyuv/libyuv"
refs: "regexp:refs/heads/main"
}
}
acl_sets {
name: "ci"
acls {
role: OWNER
granted_to: "group:project-libyuv-admins"
}
acls {
granted_to: "group:all"
}
}
acl_sets {
name: "cron"
acls {
role: OWNER
granted_to: "group:project-libyuv-admins"
}
acls {
granted_to: "group:all"
}
}

View File

@ -0,0 +1,377 @@
#!/usr/bin/env lucicfg
# https://chromium.googlesource.com/infra/luci/luci-go/+/master/lucicfg/doc/
"""LUCI project configuration for libyuv CQ and CI."""
lucicfg.check_version("1.30.9")
LIBYUV_GIT = "https://chromium.googlesource.com/libyuv/libyuv"
LIBYUV_GERRIT = "https://chromium-review.googlesource.com/libyuv/libyuv"
GOMA_BACKEND_RBE_PROD = {
"server_host": "goma.chromium.org",
"use_luci_auth": True,
}
GOMA_BACKEND_RBE_ATS_PROD = {
"server_host": "goma.chromium.org",
"use_luci_auth": True,
"enable_ats": True,
}
# Disable ATS on Windows CQ/try.
GOMA_BACKEND_RBE_NO_ATS_PROD = {
"server_host": "goma.chromium.org",
"use_luci_auth": True,
"enable_ats": False,
}
RECLIENT_CI = {
"instance": "rbe-webrtc-trusted",
"metrics_project": "chromium-reclient-metrics",
}
RECLIENT_CQ = {
"instance": "rbe-webrtc-untrusted",
"metrics_project": "chromium-reclient-metrics",
}
# Use LUCI Scheduler BBv2 names and add Scheduler realms configs.
lucicfg.enable_experiment("crbug.com/1182002")
luci.builder.defaults.experiments.set(
{
"luci.recipes.use_python3": 100,
},
)
lucicfg.config(
lint_checks = ["default"],
config_dir = ".",
tracked_files = [
"commit-queue.cfg",
"cr-buildbucket.cfg",
"luci-logdog.cfg",
"luci-milo.cfg",
"luci-scheduler.cfg",
"project.cfg",
"realms.cfg",
],
)
# Generates project.cfg
luci.project(
name = "libyuv",
buildbucket = "cr-buildbucket.appspot.com",
logdog = "luci-logdog.appspot.com",
milo = "luci-milo.appspot.com",
notify = "luci-notify.appspot.com",
scheduler = "luci-scheduler.appspot.com",
swarming = "chromium-swarm.appspot.com",
acls = [
acl.entry(acl.PROJECT_CONFIGS_READER, groups = ["all"]),
acl.entry(acl.LOGDOG_READER, groups = ["all"]),
acl.entry(acl.LOGDOG_WRITER, groups = ["luci-logdog-chromium-writers"]),
acl.entry(acl.SCHEDULER_READER, groups = ["all"]),
acl.entry(acl.SCHEDULER_OWNER, groups = ["project-libyuv-admins"]),
acl.entry(acl.BUILDBUCKET_READER, groups = ["all"]),
acl.entry(acl.BUILDBUCKET_OWNER, groups = ["project-libyuv-admins"]),
],
bindings = [
luci.binding(
roles = "role/swarming.taskTriggerer", # for LED tasks.
groups = "project-libyuv-admins",
),
luci.binding(
roles = "role/configs.validator",
users = "libyuv-try-builder@chops-service-accounts.iam.gserviceaccount.com",
),
],
)
# Generates luci-logdog.cfg
luci.logdog(
gs_bucket = "chromium-luci-logdog",
)
# Generates luci-scheduler.cfg
luci.gitiles_poller(
name = "master-gitiles-trigger",
bucket = "ci",
repo = LIBYUV_GIT,
)
# Generates luci-milo.cfg
luci.milo(
logo = "https://storage.googleapis.com/chrome-infra-public/logo/libyuv-logo.png",
)
def libyuv_ci_view(name, category, short_name):
return luci.console_view_entry(
console_view = "main",
builder = name,
category = category,
short_name = short_name,
)
def libyuv_try_view(name):
return luci.list_view_entry(
list_view = "try",
builder = name,
)
luci.console_view(
name = "main",
title = "libyuv Main Console",
include_experimental_builds = True,
repo = LIBYUV_GIT,
)
luci.list_view(
name = "cron",
title = "Cron",
entries = ["DEPS Autoroller"],
)
luci.list_view(
name = "try",
title = "libyuv Try Builders",
)
# Generates commit-queue.cfg
def libyuv_try_job_verifier(name, cq_group, experiment_percentage):
return luci.cq_tryjob_verifier(
builder = name,
cq_group = cq_group,
experiment_percentage = experiment_percentage,
)
luci.cq(
status_host = "chromium-cq-status.appspot.com",
submit_max_burst = 4,
submit_burst_delay = 8 * time.minute,
)
luci.cq_group(
name = "master",
watch = [
cq.refset(
repo = LIBYUV_GERRIT,
refs = ["refs/heads/main", "refs/heads/master"],
),
],
acls = [
acl.entry(acl.CQ_COMMITTER, groups = ["project-libyuv-committers"]),
acl.entry(acl.CQ_DRY_RUNNER, groups = ["project-libyuv-tryjob-access"]),
],
retry_config = cq.RETRY_ALL_FAILURES,
cancel_stale_tryjobs = True,
)
luci.cq_group(
name = "config",
watch = [
cq.refset(
repo = LIBYUV_GERRIT,
refs = ["refs/heads/infra/config"],
),
],
acls = [
acl.entry(acl.CQ_COMMITTER, groups = ["project-libyuv-committers"]),
acl.entry(acl.CQ_DRY_RUNNER, groups = ["project-libyuv-tryjob-access"]),
],
retry_config = cq.RETRY_ALL_FAILURES,
cancel_stale_tryjobs = True,
)
# Generates cr-buildbucket.cfg
luci.bucket(
name = "ci",
)
luci.bucket(
name = "try",
acls = [
acl.entry(acl.BUILDBUCKET_TRIGGERER, groups = [
"project-libyuv-tryjob-access",
"service-account-cq",
]),
],
)
luci.bucket(
name = "cron",
)
def get_os_dimensions(os):
if os == "android":
return {"device_type": "bullhead"}
if os == "ios" or os == "mac":
return {"os": "Mac-10.15", "cpu": "x86-64"}
elif os == "win":
return {"os": "Windows-10", "cores": "8", "cpu": "x86-64"}
elif os == "linux":
return {"os": "Ubuntu-18.04", "cores": "8", "cpu": "x86-64"}
return {}
def get_os_properties(os, try_builder = False):
if os == "android":
return {"$build/goma": GOMA_BACKEND_RBE_PROD}
elif os in ("ios", "mac"):
return {"$build/goma": GOMA_BACKEND_RBE_PROD}
elif os == "win" and try_builder:
return {"$build/goma": GOMA_BACKEND_RBE_NO_ATS_PROD}
elif os == "win":
return {"$build/goma": GOMA_BACKEND_RBE_ATS_PROD}
elif os == "linux":
return {"$build/goma": GOMA_BACKEND_RBE_ATS_PROD}
return {}
def libyuv_ci_builder(name, dimensions, properties, triggered_by):
return luci.builder(
name = name,
dimensions = dimensions,
properties = properties,
bucket = "ci",
service_account = "libyuv-ci-builder@chops-service-accounts.iam.gserviceaccount.com",
triggered_by = triggered_by,
swarming_tags = ["vpython:native-python-wrapper"],
execution_timeout = 180 * time.minute,
build_numbers = True,
executable = luci.recipe(
name = "libyuv/libyuv",
cipd_package = "infra/recipe_bundles/chromium.googlesource.com/chromium/tools/build",
use_python3 = True,
),
)
def libyuv_try_builder(name, dimensions, properties, recipe_name = "libyuv/libyuv"):
return luci.builder(
name = name,
dimensions = dimensions,
properties = properties,
bucket = "try",
service_account = "libyuv-try-builder@chops-service-accounts.iam.gserviceaccount.com",
swarming_tags = ["vpython:native-python-wrapper"],
execution_timeout = 180 * time.minute,
build_numbers = True,
executable = luci.recipe(
name = recipe_name,
cipd_package = "infra/recipe_bundles/chromium.googlesource.com/chromium/tools/build",
use_python3 = True,
),
)
def ci_builder(name, os, category, short_name = None):
dimensions = get_os_dimensions(os)
properties = get_os_properties(os)
properties["$build/reclient"] = RECLIENT_CI
dimensions["pool"] = "luci.flex.ci"
properties["builder_group"] = "client.libyuv"
triggered_by = ["master-gitiles-trigger" if os != "android" else "Android Debug"]
libyuv_ci_view(name, category, short_name)
return libyuv_ci_builder(name, dimensions, properties, triggered_by)
def try_builder(name, os, experiment_percentage = None):
dimensions = get_os_dimensions(os)
properties = get_os_properties(os, try_builder = True)
properties["$build/reclient"] = RECLIENT_CQ
dimensions["pool"] = "luci.flex.try"
properties["builder_group"] = "tryserver.libyuv"
if name == "presubmit":
recipe_name = "run_presubmit"
properties["repo_name"] = "libyuv"
properties["runhooks"] = True
libyuv_try_job_verifier(name, "config", experiment_percentage)
return libyuv_try_builder(name, dimensions, properties, recipe_name)
libyuv_try_job_verifier(name, "master", experiment_percentage)
libyuv_try_view(name)
return libyuv_try_builder(name, dimensions, properties)
luci.builder(
name = "DEPS Autoroller",
bucket = "cron",
service_account = "libyuv-ci-autoroll-builder@chops-service-accounts.iam.gserviceaccount.com",
dimensions = {
"pool": "luci.webrtc.cron",
"os": "Linux",
"cpu": "x86-64",
},
swarming_tags = ["vpython:native-python-wrapper"],
execution_timeout = 120 * time.minute,
build_numbers = True,
schedule = "0 14 * * *", # Every 2 hours.
executable = luci.recipe(
name = "libyuv/roll_deps",
cipd_package = "infra/recipe_bundles/chromium.googlesource.com/chromium/tools/build",
use_python3 = True,
),
)
ci_builder("Android ARM64 Debug", "linux", "Android|Builder", "dbg")
ci_builder("Android Debug", "linux", "Android|Builder", "dbg")
ci_builder("Android Release", "linux", "Android|Builder", "rel")
ci_builder("Android32 x86 Debug", "linux", "Android|Builder|x86", "dbg")
ci_builder("Android64 x64 Debug", "linux", "Android|Builder|x64", "dbg")
ci_builder("Android Tester ARM32 Debug (Nexus 5X)", "android", "Android|Tester|ARM 32", "dbg")
ci_builder("Android Tester ARM32 Release (Nexus 5X)", "android", "Android|Tester|ARM 32", "rel")
ci_builder("Android Tester ARM64 Debug (Nexus 5X)", "android", "Android|Tester|ARM 64", "dbg")
ci_builder("Linux Asan", "linux", "Linux", "asan")
ci_builder("Linux MSan", "linux", "Linux", "msan")
ci_builder("Linux Tsan v2", "linux", "Linux", "tsan")
ci_builder("Linux UBSan", "linux", "Linux|UBSan")
ci_builder("Linux UBSan vptr", "linux", "Linux|UBSan", "vptr")
ci_builder("Linux32 Debug", "linux", "Linux|32", "dbg")
ci_builder("Linux32 Release", "linux", "Linux|32", "rel")
ci_builder("Linux64 Debug", "linux", "Linux|64", "dbg")
ci_builder("Linux64 Release", "linux", "Linux|64", "rel")
ci_builder("Mac Asan", "mac", "Mac", "asan")
ci_builder("Mac64 Debug", "mac", "Mac", "dbg")
ci_builder("Mac64 Release", "mac", "Mac", "rel")
ci_builder("Win32 Debug", "win", "Win|32|Debug")
ci_builder("Win32 Debug (Clang)", "win", "Win|32|Debug", "clg")
ci_builder("Win32 Release", "win", "Win|32|Release")
ci_builder("Win32 Release (Clang)", "win", "Win|32|Release", "clg")
ci_builder("Win64 Debug", "win", "Win|64|Debug", "clg")
ci_builder("Win64 Debug (Clang)", "win", "Win|64|Debug", "clg")
ci_builder("Win64 Release", "win", "Win|64|Release")
ci_builder("Win64 Release (Clang)", "win", "Win|64|Release", "clg")
ci_builder("iOS ARM64 Debug", "ios", "iOS|ARM64", "dbg")
ci_builder("iOS ARM64 Release", "ios", "iOS|ARM64", "rel")
# TODO(crbug.com/1242847): make this not experimental.
try_builder("android", "android", experiment_percentage = 100)
try_builder("android_arm64", "android", experiment_percentage = 100)
try_builder("android_rel", "android", experiment_percentage = 100)
try_builder("android_x64", "linux")
try_builder("android_x86", "linux")
try_builder("ios_arm64", "ios")
try_builder("ios_arm64_rel", "ios")
try_builder("linux", "linux")
try_builder("linux_asan", "linux")
try_builder("linux_gcc", "linux", experiment_percentage = 100)
try_builder("linux_msan", "linux")
try_builder("linux_rel", "linux")
try_builder("linux_tsan2", "linux")
try_builder("linux_ubsan", "linux")
try_builder("linux_ubsan_vptr", "linux")
try_builder("mac", "mac")
try_builder("mac_asan", "mac")
try_builder("mac_rel", "mac")
try_builder("win", "win")
try_builder("win_clang", "win")
try_builder("win_clang_rel", "win")
try_builder("win_rel", "win")
try_builder("win_x64_clang_rel", "win")
try_builder("win_x64_rel", "win")
try_builder("presubmit", "linux")

View File

@ -0,0 +1,15 @@
# Auto-generated by lucicfg.
# Do not modify manually.
#
# For the schema of this file, see ProjectCfg message:
# https://luci-config.appspot.com/schemas/projects:project.cfg
name: "libyuv"
access: "group:all"
lucicfg {
version: "1.32.1"
package_dir: "."
config_dir: "."
entry_point: "main.star"
experiments: "crbug.com/1182002"
}

View File

@ -0,0 +1,83 @@
# Auto-generated by lucicfg.
# Do not modify manually.
#
# For the schema of this file, see RealmsCfg message:
# https://luci-config.appspot.com/schemas/projects:realms.cfg
realms {
name: "@root"
bindings {
role: "role/buildbucket.owner"
principals: "group:project-libyuv-admins"
}
bindings {
role: "role/buildbucket.reader"
principals: "group:all"
}
bindings {
role: "role/configs.reader"
principals: "group:all"
}
bindings {
role: "role/configs.validator"
principals: "user:libyuv-try-builder@chops-service-accounts.iam.gserviceaccount.com"
}
bindings {
role: "role/logdog.reader"
principals: "group:all"
}
bindings {
role: "role/logdog.writer"
principals: "group:luci-logdog-chromium-writers"
}
bindings {
role: "role/scheduler.owner"
principals: "group:project-libyuv-admins"
}
bindings {
role: "role/scheduler.reader"
principals: "group:all"
}
bindings {
role: "role/swarming.taskTriggerer"
principals: "group:project-libyuv-admins"
}
}
realms {
name: "ci"
bindings {
role: "role/buildbucket.builderServiceAccount"
principals: "user:libyuv-ci-builder@chops-service-accounts.iam.gserviceaccount.com"
}
bindings {
role: "role/scheduler.triggerer"
principals: "user:libyuv-ci-builder@chops-service-accounts.iam.gserviceaccount.com"
conditions {
restrict {
attribute: "scheduler.job.name"
values: "Android Tester ARM32 Debug (Nexus 5X)"
values: "Android Tester ARM32 Release (Nexus 5X)"
values: "Android Tester ARM64 Debug (Nexus 5X)"
}
}
}
}
realms {
name: "cron"
bindings {
role: "role/buildbucket.builderServiceAccount"
principals: "user:libyuv-ci-autoroll-builder@chops-service-accounts.iam.gserviceaccount.com"
}
}
realms {
name: "try"
bindings {
role: "role/buildbucket.builderServiceAccount"
principals: "user:libyuv-try-builder@chops-service-accounts.iam.gserviceaccount.com"
}
bindings {
role: "role/buildbucket.triggerer"
principals: "group:project-libyuv-tryjob-access"
principals: "group:service-account-cq"
}
}

View File

@ -6,9 +6,9 @@
# in the file PATENTS. All contributing project authors may
# be found in the AUTHORS file in the root of the source tree.
import("//build_overrides/build.gni")
import("//build/config/arm.gni")
import("//build/config/mips.gni")
import("//build_overrides/build.gni")
declare_args() {
libyuv_include_tests = !build_with_chromium

View File

@ -0,0 +1,162 @@
# Copyright 2011 The LibYuv Project Authors. All rights reserved.
#
# Use of this source code is governed by a BSD-style license
# that can be found in the LICENSE file in the root of the source
# tree. An additional intellectual property rights grant can be found
# in the file PATENTS. All contributing project authors may
# be found in the AUTHORS file in the root of the source tree.
{
'includes': [
'libyuv.gypi',
],
# Make sure that if we are being compiled to an xcodeproj, nothing tries to
# include a .pch.
'xcode_settings': {
'GCC_PREFIX_HEADER': '',
'GCC_PRECOMPILE_PREFIX_HEADER': 'NO',
},
'variables': {
'use_system_libjpeg%': 0,
# Can be enabled if your jpeg has GYP support.
'libyuv_disable_jpeg%': 1,
# 'chromium_code' treats libyuv as internal and increases warning level.
'chromium_code': 1,
# clang compiler default variable usable by other apps that include libyuv.
'clang%': 0,
# Link-Time Optimizations.
'use_lto%': 0,
'mips_msa%': 0, # Default to msa off.
'build_neon': 0,
'build_msa': 0,
'conditions': [
['(target_arch == "armv7" or target_arch == "armv7s" or \
(target_arch == "arm" and arm_version >= 7) or target_arch == "arm64")\
and (arm_neon == 1 or arm_neon_optional == 1)', {
'build_neon': 1,
}],
['(target_arch == "mipsel" or target_arch == "mips64el")\
and (mips_msa == 1)',
{
'build_msa': 1,
}],
],
},
'targets': [
{
'target_name': 'libyuv',
# Change type to 'shared_library' to build .so or .dll files.
'type': 'static_library',
'variables': {
'optimize': 'max', # enable O2 and ltcg.
},
# Allows libyuv.a redistributable library without external dependencies.
'standalone_static_library': 1,
'conditions': [
# Disable -Wunused-parameter
['clang == 1', {
'cflags': [
'-Wno-unused-parameter',
],
}],
['build_neon != 0', {
'defines': [
'LIBYUV_NEON',
],
'cflags!': [
'-mfpu=vfp',
'-mfpu=vfpv3',
'-mfpu=vfpv3-d16',
# '-mthumb', # arm32 not thumb
],
'conditions': [
# Disable LTO in libyuv_neon target due to gcc 4.9 compiler bug.
['clang == 0 and use_lto == 1', {
'cflags!': [
'-flto',
'-ffat-lto-objects',
],
}],
# arm64 does not need -mfpu=neon option as neon is not optional
['target_arch != "arm64"', {
'cflags': [
'-mfpu=neon',
# '-marm', # arm32 not thumb
],
}],
],
}],
['build_msa != 0', {
'defines': [
'LIBYUV_MSA',
],
}],
['OS != "ios" and libyuv_disable_jpeg != 1', {
'defines': [
'HAVE_JPEG'
],
'conditions': [
# Caveat system jpeg support may not support motion jpeg
[ 'use_system_libjpeg == 1', {
'dependencies': [
'<(DEPTH)/third_party/libjpeg/libjpeg.gyp:libjpeg',
],
}, {
'dependencies': [
'<(DEPTH)/third_party/libjpeg_turbo/libjpeg.gyp:libjpeg',
],
}],
[ 'use_system_libjpeg == 1', {
'link_settings': {
'libraries': [
'-ljpeg',
],
}
}],
],
}],
], #conditions
'defines': [
# Enable the following 3 macros to turn off assembly for specified CPU.
# 'LIBYUV_DISABLE_X86',
# 'LIBYUV_DISABLE_NEON',
# 'LIBYUV_DISABLE_DSPR2',
# Enable the following macro to build libyuv as a shared library (dll).
# 'LIBYUV_USING_SHARED_LIBRARY',
# TODO(fbarchard): Make these into gyp defines.
],
'include_dirs': [
'include',
'.',
],
'direct_dependent_settings': {
'include_dirs': [
'include',
'.',
],
'conditions': [
['OS == "android" and target_arch == "arm64"', {
'ldflags': [
'-Wl,--dynamic-linker,/system/bin/linker64',
],
}],
['OS == "android" and target_arch != "arm64"', {
'ldflags': [
'-Wl,--dynamic-linker,/system/bin/linker',
],
}],
], #conditions
},
'sources': [
'<@(libyuv_sources)',
],
},
], # targets.
}
# Local Variables:
# tab-width:2
# indent-tabs-mode:nil
# End:
# vim: set expandtab tabstop=2 shiftwidth=2:

View File

@ -0,0 +1,85 @@
# Copyright 2014 The LibYuv Project Authors. All rights reserved.
#
# Use of this source code is governed by a BSD-style license
# that can be found in the LICENSE file in the root of the source
# tree. An additional intellectual property rights grant can be found
# in the file PATENTS. All contributing project authors may
# be found in the AUTHORS file in the root of the source tree.
{
'variables': {
'libyuv_sources': [
# includes.
'include/libyuv.h',
'include/libyuv/basic_types.h',
'include/libyuv/compare.h',
'include/libyuv/convert.h',
'include/libyuv/convert_argb.h',
'include/libyuv/convert_from.h',
'include/libyuv/convert_from_argb.h',
'include/libyuv/cpu_id.h',
'include/libyuv/macros_msa.h',
'include/libyuv/mjpeg_decoder.h',
'include/libyuv/planar_functions.h',
'include/libyuv/rotate.h',
'include/libyuv/rotate_argb.h',
'include/libyuv/rotate_row.h',
'include/libyuv/row.h',
'include/libyuv/scale.h',
'include/libyuv/scale_argb.h',
'include/libyuv/scale_rgb.h',
'include/libyuv/scale_row.h',
'include/libyuv/scale_uv.h',
'include/libyuv/version.h',
'include/libyuv/video_common.h',
# sources.
'source/compare.cc',
'source/compare_common.cc',
'source/compare_gcc.cc',
'source/compare_msa.cc',
'source/compare_neon.cc',
'source/compare_neon64.cc',
'source/compare_win.cc',
'source/convert.cc',
'source/convert_argb.cc',
'source/convert_from.cc',
'source/convert_from_argb.cc',
'source/convert_jpeg.cc',
'source/convert_to_argb.cc',
'source/convert_to_i420.cc',
'source/cpu_id.cc',
'source/mjpeg_decoder.cc',
'source/mjpeg_validate.cc',
'source/planar_functions.cc',
'source/rotate.cc',
'source/rotate_any.cc',
'source/rotate_argb.cc',
'source/rotate_common.cc',
'source/rotate_gcc.cc',
'source/rotate_msa.cc',
'source/rotate_neon.cc',
'source/rotate_neon64.cc',
'source/rotate_win.cc',
'source/row_any.cc',
'source/row_common.cc',
'source/row_gcc.cc',
'source/row_msa.cc',
'source/row_neon.cc',
'source/row_neon64.cc',
'source/row_win.cc',
'source/scale.cc',
'source/scale_any.cc',
'source/scale_argb.cc',
'source/scale_common.cc',
'source/scale_gcc.cc',
'source/scale_msa.cc',
'source/scale_neon.cc',
'source/scale_neon64.cc',
'source/scale_rgb.cc',
'source/scale_uv.cc',
'source/scale_win.cc',
'source/video_common.cc',
],
}
}

View File

@ -13,7 +13,6 @@ LOCAL_OBJ_FILES := \
source/compare.o \
source/compare_common.o \
source/compare_gcc.o \
source/compare_mmi.o \
source/compare_msa.o \
source/compare_neon.o \
source/compare_neon64.o \
@ -34,7 +33,6 @@ LOCAL_OBJ_FILES := \
source/rotate_argb.o \
source/rotate_common.o \
source/rotate_gcc.o \
source/rotate_mmi.o \
source/rotate_msa.o \
source/rotate_neon.o \
source/rotate_neon64.o \
@ -42,7 +40,6 @@ LOCAL_OBJ_FILES := \
source/row_any.o \
source/row_common.o \
source/row_gcc.o \
source/row_mmi.o \
source/row_msa.o \
source/row_neon.o \
source/row_neon64.o \
@ -52,10 +49,10 @@ LOCAL_OBJ_FILES := \
source/scale_argb.o \
source/scale_common.o \
source/scale_gcc.o \
source/scale_mmi.o \
source/scale_msa.o \
source/scale_neon.o \
source/scale_neon64.o \
source/scale_rgb.o \
source/scale_uv.o \
source/scale_win.o \
source/video_common.o

View File

@ -149,11 +149,6 @@ uint64_t ComputeHammingDistance(const uint8_t* src_a,
HammingDistance = HammingDistance_AVX2;
}
#endif
#if defined(HAS_HAMMINGDISTANCE_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
HammingDistance = HammingDistance_MMI;
}
#endif
#if defined(HAS_HAMMINGDISTANCE_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
HammingDistance = HammingDistance_MSA;
@ -211,11 +206,6 @@ uint64_t ComputeSumSquareError(const uint8_t* src_a,
SumSquareError = SumSquareError_AVX2;
}
#endif
#if defined(HAS_SUMSQUAREERROR_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
SumSquareError = SumSquareError_MMI;
}
#endif
#if defined(HAS_SUMSQUAREERROR_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
SumSquareError = SumSquareError_MSA;

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -85,7 +85,8 @@ int I420ToI010(const uint8_t* src_y,
int height) {
int halfwidth = (width + 1) >> 1;
int halfheight = (height + 1) >> 1;
if (!src_u || !src_v || !dst_u || !dst_v || width <= 0 || height == 0) {
if ((!src_y && dst_y) || !src_u || !src_v || !dst_u || !dst_v || width <= 0 ||
height == 0) {
return -1;
}
// Negative height means invert the image.
@ -129,7 +130,8 @@ int I420ToI012(const uint8_t* src_y,
int height) {
int halfwidth = (width + 1) >> 1;
int halfheight = (height + 1) >> 1;
if (!src_u || !src_v || !dst_u || !dst_v || width <= 0 || height == 0) {
if ((!src_y && dst_y) || !src_u || !src_v || !dst_u || !dst_v || width <= 0 ||
height == 0) {
return -1;
}
// Negative height means invert the image.
@ -436,14 +438,6 @@ int I420ToYUY2(const uint8_t* src_y,
}
}
#endif
#if defined(HAS_I422TOYUY2ROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
I422ToYUY2Row = I422ToYUY2Row_Any_MMI;
if (IS_ALIGNED(width, 8)) {
I422ToYUY2Row = I422ToYUY2Row_MMI;
}
}
#endif
#if defined(HAS_I422TOYUY2ROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
I422ToYUY2Row = I422ToYUY2Row_Any_MSA;
@ -452,6 +446,14 @@ int I420ToYUY2(const uint8_t* src_y,
}
}
#endif
#if defined(HAS_I422TOYUY2ROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
I422ToYUY2Row = I422ToYUY2Row_Any_LASX;
if (IS_ALIGNED(width, 32)) {
I422ToYUY2Row = I422ToYUY2Row_LASX;
}
}
#endif
for (y = 0; y < height - 1; y += 2) {
I422ToYUY2Row(src_y, src_u, src_v, dst_yuy2, width);
@ -523,14 +525,6 @@ int I422ToUYVY(const uint8_t* src_y,
}
}
#endif
#if defined(HAS_I422TOUYVYROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
I422ToUYVYRow = I422ToUYVYRow_Any_MMI;
if (IS_ALIGNED(width, 8)) {
I422ToUYVYRow = I422ToUYVYRow_MMI;
}
}
#endif
#if defined(HAS_I422TOUYVYROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
I422ToUYVYRow = I422ToUYVYRow_Any_MSA;
@ -539,6 +533,14 @@ int I422ToUYVY(const uint8_t* src_y,
}
}
#endif
#if defined(HAS_I422TOUYVYROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
I422ToUYVYRow = I422ToUYVYRow_Any_LASX;
if (IS_ALIGNED(width, 32)) {
I422ToUYVYRow = I422ToUYVYRow_LASX;
}
}
#endif
for (y = 0; y < height; ++y) {
I422ToUYVYRow(src_y, src_u, src_v, dst_uyvy, width);
@ -598,14 +600,6 @@ int I420ToUYVY(const uint8_t* src_y,
}
}
#endif
#if defined(HAS_I422TOUYVYROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
I422ToUYVYRow = I422ToUYVYRow_Any_MMI;
if (IS_ALIGNED(width, 8)) {
I422ToUYVYRow = I422ToUYVYRow_MMI;
}
}
#endif
#if defined(HAS_I422TOUYVYROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
I422ToUYVYRow = I422ToUYVYRow_Any_MSA;
@ -614,6 +608,14 @@ int I420ToUYVY(const uint8_t* src_y,
}
}
#endif
#if defined(HAS_I422TOUYVYROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
I422ToUYVYRow = I422ToUYVYRow_Any_LASX;
if (IS_ALIGNED(width, 32)) {
I422ToUYVYRow = I422ToUYVYRow_LASX;
}
}
#endif
for (y = 0; y < height - 1; y += 2) {
I422ToUYVYRow(src_y, src_u, src_v, dst_uyvy, width);
@ -645,8 +647,7 @@ int I420ToNV12(const uint8_t* src_y,
int height) {
int halfwidth = (width + 1) / 2;
int halfheight = (height + 1) / 2;
if (!src_y || !src_u || !src_v || !dst_y || !dst_uv || width <= 0 ||
height == 0) {
if (!src_y || !src_u || !src_v || !dst_uv || width <= 0 || height == 0) {
return -1;
}
// Negative height means invert the image.
@ -772,7 +773,8 @@ int ConvertFromI420(const uint8_t* y,
height);
break;
case FOURCC_NV12: {
uint8_t* dst_uv = dst_sample + width * height;
int dst_y_stride = dst_sample_stride ? dst_sample_stride : width;
uint8_t* dst_uv = dst_sample + dst_y_stride * height;
r = I420ToNV12(y, y_stride, u, u_stride, v, v_stride, dst_sample,
dst_sample_stride ? dst_sample_stride : width, dst_uv,
dst_sample_stride ? dst_sample_stride : width, width,
@ -780,7 +782,8 @@ int ConvertFromI420(const uint8_t* y,
break;
}
case FOURCC_NV21: {
uint8_t* dst_vu = dst_sample + width * height;
int dst_y_stride = dst_sample_stride ? dst_sample_stride : width;
uint8_t* dst_vu = dst_sample + dst_y_stride * height;
r = I420ToNV21(y, y_stride, u, u_stride, v, v_stride, dst_sample,
dst_sample_stride ? dst_sample_stride : width, dst_vu,
dst_sample_stride ? dst_sample_stride : width, width,

File diff suppressed because it is too large Load Diff

View File

@ -20,7 +20,7 @@
#endif
// For ArmCpuCaps() but unittested on all platforms
#include <stdio.h>
#include <stdio.h> // For fopen()
#include <string.h>
#ifdef __cplusplus
@ -108,14 +108,14 @@ void CpuId(int eax, int ecx, int* cpu_info) {
// }
// For VS2013 and earlier 32 bit, the _xgetbv(0) optimizer produces bad code.
// https://code.google.com/p/libyuv/issues/detail?id=529
#if defined(_M_IX86) && (_MSC_VER < 1900)
#if defined(_M_IX86) && defined(_MSC_VER) && (_MSC_VER < 1900)
#pragma optimize("g", off)
#endif
#if (defined(_M_IX86) || defined(_M_X64) || defined(__i386__) || \
defined(__x86_64__)) && \
!defined(__pnacl__) && !defined(__CLR_VER) && !defined(__native_client__)
// X86 CPUs have xgetbv to detect OS saves high parts of ymm registers.
int GetXCR0() {
static int GetXCR0() {
int xcr0 = 0;
#if defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 160040219)
xcr0 = (int)_xgetbv(0); // VS2010 SP1 required. NOLINT
@ -129,7 +129,7 @@ int GetXCR0() {
#define GetXCR0() 0
#endif // defined(_M_IX86) || defined(_M_X64) ..
// Return optimization to previous setting.
#if defined(_M_IX86) && (_MSC_VER < 1900)
#if defined(_M_IX86) && defined(_MSC_VER) && (_MSC_VER < 1900)
#pragma optimize("g", on)
#endif
@ -174,18 +174,12 @@ LIBYUV_API SAFEBUFFERS int MipsCpuCaps(const char* cpuinfo_name) {
}
while (fgets(cpuinfo_line, sizeof(cpuinfo_line) - 1, f)) {
if (memcmp(cpuinfo_line, "cpu model", 9) == 0) {
// Workaround early kernel without mmi in ASEs line.
if (strstr(cpuinfo_line, "Loongson-3")) {
flag |= kCpuHasMMI;
} else if (strstr(cpuinfo_line, "Loongson-2K")) {
flag |= kCpuHasMMI | kCpuHasMSA;
// Workaround early kernel without MSA in ASEs line.
if (strstr(cpuinfo_line, "Loongson-2K")) {
flag |= kCpuHasMSA;
}
}
if (memcmp(cpuinfo_line, "ASEs implemented", 16) == 0) {
if (strstr(cpuinfo_line, "loongson-mmi") &&
strstr(cpuinfo_line, "loongson-ext")) {
flag |= kCpuHasMMI;
}
if (strstr(cpuinfo_line, "msa")) {
flag |= kCpuHasMSA;
}
@ -197,6 +191,27 @@ LIBYUV_API SAFEBUFFERS int MipsCpuCaps(const char* cpuinfo_name) {
return flag;
}
// TODO(fbarchard): Consider read_loongarch_ir().
#define LOONGARCH_CFG2 0x2
#define LOONGARCH_CFG2_LSX (1 << 6)
#define LOONGARCH_CFG2_LASX (1 << 7)
#if defined(__loongarch__)
LIBYUV_API SAFEBUFFERS int LoongarchCpuCaps(void) {
int flag = 0x0;
uint32_t cfg2 = 0;
__asm__ volatile("cpucfg %0, %1 \n\t" : "+&r"(cfg2) : "r"(LOONGARCH_CFG2));
if (cfg2 & LOONGARCH_CFG2_LSX)
flag |= kCpuHasLSX;
if (cfg2 & LOONGARCH_CFG2_LASX)
flag |= kCpuHasLASX;
return flag;
}
#endif
static SAFEBUFFERS int GetCpuFlags(void) {
int cpu_info = 0;
#if !defined(__pnacl__) && !defined(__CLR_VER) && \
@ -229,6 +244,7 @@ static SAFEBUFFERS int GetCpuFlags(void) {
cpu_info |= (cpu_info7[1] & 0x80000000) ? kCpuHasAVX512VL : 0;
cpu_info |= (cpu_info7[2] & 0x00000002) ? kCpuHasAVX512VBMI : 0;
cpu_info |= (cpu_info7[2] & 0x00000040) ? kCpuHasAVX512VBMI2 : 0;
cpu_info |= (cpu_info7[2] & 0x00000800) ? kCpuHasAVX512VNNI : 0;
cpu_info |= (cpu_info7[2] & 0x00001000) ? kCpuHasAVX512VBITALG : 0;
cpu_info |= (cpu_info7[2] & 0x00004000) ? kCpuHasAVX512VPOPCNTDQ : 0;
cpu_info |= (cpu_info7[2] & 0x00000100) ? kCpuHasGFNI : 0;
@ -239,6 +255,10 @@ static SAFEBUFFERS int GetCpuFlags(void) {
cpu_info = MipsCpuCaps("/proc/cpuinfo");
cpu_info |= kCpuHasMIPS;
#endif
#if defined(__loongarch__) && defined(__linux__)
cpu_info = LoongarchCpuCaps();
cpu_info |= kCpuHasLOONGARCH;
#endif
#if defined(__arm__) || defined(__aarch64__)
// gcc -mfpu=neon defines __ARM_NEON__
// __ARM_NEON__ generates code that requires Neon. NaCL also requires Neon.

View File

@ -417,10 +417,6 @@ void init_source(j_decompress_ptr cinfo) {
boolean fill_input_buffer(j_decompress_ptr cinfo) {
BufferVector* buf_vec = reinterpret_cast<BufferVector*>(cinfo->client_data);
if (buf_vec->pos >= buf_vec->len) {
// Don't assert-fail when fuzzing.
#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
assert(0 && "No more data");
#endif
// ERROR: No more data
return FALSE;
}

File diff suppressed because it is too large Load Diff

View File

@ -29,7 +29,7 @@ void TransposePlane(const uint8_t* src,
int width,
int height) {
int i = height;
#if defined(HAS_TRANSPOSEWX16_MSA)
#if defined(HAS_TRANSPOSEWX16_MSA) || defined(HAS_TRANSPOSEWX16_LSX)
void (*TransposeWx16)(const uint8_t* src, int src_stride, uint8_t* dst,
int dst_stride, int width) = TransposeWx16_C;
#else
@ -37,17 +37,12 @@ void TransposePlane(const uint8_t* src,
int dst_stride, int width) = TransposeWx8_C;
#endif
#if defined(HAS_TRANSPOSEWX16_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
TransposeWx16 = TransposeWx16_Any_MSA;
if (IS_ALIGNED(width, 16)) {
TransposeWx16 = TransposeWx16_MSA;
}
}
#else
#if defined(HAS_TRANSPOSEWX8_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
TransposeWx8 = TransposeWx8_NEON;
TransposeWx8 = TransposeWx8_Any_NEON;
if (IS_ALIGNED(width, 8)) {
TransposeWx8 = TransposeWx8_NEON;
}
}
#endif
#if defined(HAS_TRANSPOSEWX8_SSSE3)
@ -58,11 +53,6 @@ void TransposePlane(const uint8_t* src,
}
}
#endif
#if defined(HAS_TRANSPOSEWX8_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
TransposeWx8 = TransposeWx8_MMI;
}
#endif
#if defined(HAS_TRANSPOSEWX8_FAST_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
TransposeWx8 = TransposeWx8_Fast_Any_SSSE3;
@ -71,9 +61,24 @@ void TransposePlane(const uint8_t* src,
}
}
#endif
#endif /* defined(HAS_TRANSPOSEWX16_MSA) */
#if defined(HAS_TRANSPOSEWX16_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
TransposeWx16 = TransposeWx16_Any_MSA;
if (IS_ALIGNED(width, 16)) {
TransposeWx16 = TransposeWx16_MSA;
}
}
#endif
#if defined(HAS_TRANSPOSEWX16_LSX)
if (TestCpuFlag(kCpuHasLSX)) {
TransposeWx16 = TransposeWx16_Any_LSX;
if (IS_ALIGNED(width, 16)) {
TransposeWx16 = TransposeWx16_LSX;
}
}
#endif
#if defined(HAS_TRANSPOSEWX16_MSA) || defined(HAS_TRANSPOSEWX16_LSX)
// Work across the source in 16x16 tiles
while (i >= 16) {
TransposeWx16(src, src_stride, dst, dst_stride, width);
@ -165,14 +170,6 @@ void RotatePlane180(const uint8_t* src,
}
}
#endif
#if defined(HAS_MIRRORROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
MirrorRow = MirrorRow_Any_MMI;
if (IS_ALIGNED(width, 8)) {
MirrorRow = MirrorRow_MMI;
}
}
#endif
#if defined(HAS_MIRRORROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
MirrorRow = MirrorRow_Any_MSA;
@ -181,6 +178,14 @@ void RotatePlane180(const uint8_t* src,
}
}
#endif
#if defined(HAS_MIRRORROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
MirrorRow = MirrorRow_Any_LASX;
if (IS_ALIGNED(width, 64)) {
MirrorRow = MirrorRow_LASX;
}
}
#endif
#if defined(HAS_COPYROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) {
CopyRow = IS_ALIGNED(width, 32) ? CopyRow_SSE2 : CopyRow_Any_SSE2;
@ -201,11 +206,6 @@ void RotatePlane180(const uint8_t* src,
CopyRow = IS_ALIGNED(width, 32) ? CopyRow_NEON : CopyRow_Any_NEON;
}
#endif
#if defined(HAS_COPYROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
CopyRow = IS_ALIGNED(width, 8) ? CopyRow_MMI : CopyRow_Any_MMI;
}
#endif
// Odd height will harmlessly mirror the middle row twice.
for (y = 0; y < half_height; ++y) {
@ -221,19 +221,23 @@ void RotatePlane180(const uint8_t* src,
}
LIBYUV_API
void TransposeUV(const uint8_t* src,
int src_stride,
uint8_t* dst_a,
int dst_stride_a,
uint8_t* dst_b,
int dst_stride_b,
int width,
int height) {
void SplitTransposeUV(const uint8_t* src,
int src_stride,
uint8_t* dst_a,
int dst_stride_a,
uint8_t* dst_b,
int dst_stride_b,
int width,
int height) {
int i = height;
#if defined(HAS_TRANSPOSEUVWX16_MSA)
void (*TransposeUVWx16)(const uint8_t* src, int src_stride, uint8_t* dst_a,
int dst_stride_a, uint8_t* dst_b, int dst_stride_b,
int width) = TransposeUVWx16_C;
#elif defined(HAS_TRANSPOSEUVWX16_LSX)
void (*TransposeUVWx16)(const uint8_t* src, int src_stride, uint8_t* dst_a,
int dst_stride_a, uint8_t* dst_b, int dst_stride_b,
int width) = TransposeUVWx16_C;
#else
void (*TransposeUVWx8)(const uint8_t* src, int src_stride, uint8_t* dst_a,
int dst_stride_a, uint8_t* dst_b, int dst_stride_b,
@ -247,6 +251,13 @@ void TransposeUV(const uint8_t* src,
TransposeUVWx16 = TransposeUVWx16_MSA;
}
}
#elif defined(HAS_TRANSPOSEUVWX16_LSX)
if (TestCpuFlag(kCpuHasLSX)) {
TransposeUVWx16 = TransposeUVWx16_Any_LSX;
if (IS_ALIGNED(width, 8)) {
TransposeUVWx16 = TransposeUVWx16_LSX;
}
}
#else
#if defined(HAS_TRANSPOSEUVWX8_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
@ -261,14 +272,6 @@ void TransposeUV(const uint8_t* src,
}
}
#endif
#if defined(HAS_TRANSPOSEUVWX8_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
TransposeUVWx8 = TransposeUVWx8_Any_MMI;
if (IS_ALIGNED(width, 4)) {
TransposeUVWx8 = TransposeUVWx8_MMI;
}
}
#endif
#endif /* defined(HAS_TRANSPOSEUVWX16_MSA) */
#if defined(HAS_TRANSPOSEUVWX16_MSA)
@ -281,6 +284,16 @@ void TransposeUV(const uint8_t* src,
dst_b += 16; // Move over 8 columns.
i -= 16;
}
#elif defined(HAS_TRANSPOSEUVWX16_LSX)
// Work through the source in 8x8 tiles.
while (i >= 16) {
TransposeUVWx16(src, src_stride, dst_a, dst_stride_a, dst_b, dst_stride_b,
width);
src += 16 * src_stride; // Go down 16 rows.
dst_a += 16; // Move over 8 columns.
dst_b += 16; // Move over 8 columns.
i -= 16;
}
#else
// Work through the source in 8x8 tiles.
while (i >= 8) {
@ -300,49 +313,49 @@ void TransposeUV(const uint8_t* src,
}
LIBYUV_API
void RotateUV90(const uint8_t* src,
int src_stride,
uint8_t* dst_a,
int dst_stride_a,
uint8_t* dst_b,
int dst_stride_b,
int width,
int height) {
void SplitRotateUV90(const uint8_t* src,
int src_stride,
uint8_t* dst_a,
int dst_stride_a,
uint8_t* dst_b,
int dst_stride_b,
int width,
int height) {
src += src_stride * (height - 1);
src_stride = -src_stride;
TransposeUV(src, src_stride, dst_a, dst_stride_a, dst_b, dst_stride_b, width,
height);
SplitTransposeUV(src, src_stride, dst_a, dst_stride_a, dst_b, dst_stride_b,
width, height);
}
LIBYUV_API
void RotateUV270(const uint8_t* src,
int src_stride,
uint8_t* dst_a,
int dst_stride_a,
uint8_t* dst_b,
int dst_stride_b,
int width,
int height) {
void SplitRotateUV270(const uint8_t* src,
int src_stride,
uint8_t* dst_a,
int dst_stride_a,
uint8_t* dst_b,
int dst_stride_b,
int width,
int height) {
dst_a += dst_stride_a * (width - 1);
dst_b += dst_stride_b * (width - 1);
dst_stride_a = -dst_stride_a;
dst_stride_b = -dst_stride_b;
TransposeUV(src, src_stride, dst_a, dst_stride_a, dst_b, dst_stride_b, width,
height);
SplitTransposeUV(src, src_stride, dst_a, dst_stride_a, dst_b, dst_stride_b,
width, height);
}
// Rotate 180 is a horizontal and vertical flip.
LIBYUV_API
void RotateUV180(const uint8_t* src,
int src_stride,
uint8_t* dst_a,
int dst_stride_a,
uint8_t* dst_b,
int dst_stride_b,
int width,
int height) {
void SplitRotateUV180(const uint8_t* src,
int src_stride,
uint8_t* dst_a,
int dst_stride_a,
uint8_t* dst_b,
int dst_stride_b,
int width,
int height) {
int i;
void (*MirrorSplitUVRow)(const uint8_t* src, uint8_t* dst_u, uint8_t* dst_v,
int width) = MirrorSplitUVRow_C;
@ -356,16 +369,16 @@ void RotateUV180(const uint8_t* src,
MirrorSplitUVRow = MirrorSplitUVRow_SSSE3;
}
#endif
#if defined(HAS_MIRRORSPLITUVROW_MMI)
if (TestCpuFlag(kCpuHasMMI) && IS_ALIGNED(width, 8)) {
MirrorSplitUVRow = MirrorSplitUVRow_MMI;
}
#endif
#if defined(HAS_MIRRORSPLITUVROW_MSA)
if (TestCpuFlag(kCpuHasMSA) && IS_ALIGNED(width, 32)) {
MirrorSplitUVRow = MirrorSplitUVRow_MSA;
}
#endif
#if defined(HAS_MIRRORSPLITUVROW_LSX)
if (TestCpuFlag(kCpuHasLSX) && IS_ALIGNED(width, 32)) {
MirrorSplitUVRow = MirrorSplitUVRow_LSX;
}
#endif
dst_a += dst_stride_a * (height - 1);
dst_b += dst_stride_b * (height - 1);
@ -378,6 +391,52 @@ void RotateUV180(const uint8_t* src,
}
}
// Rotate UV and split into planar.
// width and height expected to be half size for NV12
LIBYUV_API
int SplitRotateUV(const uint8_t* src_uv,
int src_stride_uv,
uint8_t* dst_u,
int dst_stride_u,
uint8_t* dst_v,
int dst_stride_v,
int width,
int height,
enum RotationMode mode) {
if (!src_uv || width <= 0 || height == 0 || !dst_u || !dst_v) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
src_uv = src_uv + (height - 1) * src_stride_uv;
src_stride_uv = -src_stride_uv;
}
switch (mode) {
case kRotate0:
SplitUVPlane(src_uv, src_stride_uv, dst_u, dst_stride_u, dst_v,
dst_stride_v, width, height);
return 0;
case kRotate90:
SplitRotateUV90(src_uv, src_stride_uv, dst_u, dst_stride_u, dst_v,
dst_stride_v, width, height);
return 0;
case kRotate270:
SplitRotateUV270(src_uv, src_stride_uv, dst_u, dst_stride_u, dst_v,
dst_stride_v, width, height);
return 0;
case kRotate180:
SplitRotateUV180(src_uv, src_stride_uv, dst_u, dst_stride_u, dst_v,
dst_stride_v, width, height);
return 0;
default:
break;
}
return -1;
}
LIBYUV_API
int RotatePlane(const uint8_t* src,
int src_stride,
@ -435,8 +494,8 @@ int I420Rotate(const uint8_t* src_y,
enum RotationMode mode) {
int halfwidth = (width + 1) >> 1;
int halfheight = (height + 1) >> 1;
if (!src_y || !src_u || !src_v || width <= 0 || height == 0 || !dst_y ||
!dst_u || !dst_v) {
if ((!src_y && dst_y) || !src_u || !src_v || width <= 0 || height == 0 ||
!dst_y || !dst_u || !dst_v) {
return -1;
}
@ -485,6 +544,80 @@ int I420Rotate(const uint8_t* src_y,
return -1;
}
LIBYUV_API
int I422Rotate(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
uint8_t* dst_y,
int dst_stride_y,
uint8_t* dst_u,
int dst_stride_u,
uint8_t* dst_v,
int dst_stride_v,
int width,
int height,
enum RotationMode mode) {
int halfwidth = (width + 1) >> 1;
int halfheight = (height + 1) >> 1;
if (!src_y || !src_u || !src_v || width <= 0 || height == 0 || !dst_y ||
!dst_u || !dst_v) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
src_y = src_y + (height - 1) * src_stride_y;
src_u = src_u + (height - 1) * src_stride_u;
src_v = src_v + (height - 1) * src_stride_v;
src_stride_y = -src_stride_y;
src_stride_u = -src_stride_u;
src_stride_v = -src_stride_v;
}
switch (mode) {
case kRotate0:
// copy frame
CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
CopyPlane(src_u, src_stride_u, dst_u, dst_stride_u, halfwidth, height);
CopyPlane(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth, height);
return 0;
case kRotate90:
// We need to rotate and rescale, we use plane Y as temporal storage.
RotatePlane90(src_u, src_stride_u, dst_y, height, halfwidth, height);
ScalePlane(dst_y, height, height, halfwidth, dst_u, halfheight,
halfheight, width, kFilterBilinear);
RotatePlane90(src_v, src_stride_v, dst_y, height, halfwidth, height);
ScalePlane(dst_y, height, height, halfwidth, dst_v, halfheight,
halfheight, width, kFilterLinear);
RotatePlane90(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
return 0;
case kRotate270:
// We need to rotate and rescale, we use plane Y as temporal storage.
RotatePlane270(src_u, src_stride_u, dst_y, height, halfwidth, height);
ScalePlane(dst_y, height, height, halfwidth, dst_u, halfheight,
halfheight, width, kFilterBilinear);
RotatePlane270(src_v, src_stride_v, dst_y, height, halfwidth, height);
ScalePlane(dst_y, height, height, halfwidth, dst_v, halfheight,
halfheight, width, kFilterLinear);
RotatePlane270(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
return 0;
case kRotate180:
RotatePlane180(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
RotatePlane180(src_u, src_stride_u, dst_u, dst_stride_u, halfwidth,
height);
RotatePlane180(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth,
height);
return 0;
default:
break;
}
return -1;
}
LIBYUV_API
int I444Rotate(const uint8_t* src_y,
int src_stride_y,
@ -500,7 +633,7 @@ int I444Rotate(const uint8_t* src_y,
int dst_stride_v,
int width,
int height,
enum libyuv::RotationMode mode) {
enum RotationMode mode) {
if (!src_y || !src_u || !src_v || width <= 0 || height == 0 || !dst_y ||
!dst_u || !dst_v) {
return -1;
@ -518,23 +651,23 @@ int I444Rotate(const uint8_t* src_y,
}
switch (mode) {
case libyuv::kRotate0:
case kRotate0:
// copy frame
CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
CopyPlane(src_u, src_stride_u, dst_u, dst_stride_u, width, height);
CopyPlane(src_v, src_stride_v, dst_v, dst_stride_v, width, height);
return 0;
case libyuv::kRotate90:
case kRotate90:
RotatePlane90(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
RotatePlane90(src_u, src_stride_u, dst_u, dst_stride_u, width, height);
RotatePlane90(src_v, src_stride_v, dst_v, dst_stride_v, width, height);
return 0;
case libyuv::kRotate270:
case kRotate270:
RotatePlane270(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
RotatePlane270(src_u, src_stride_u, dst_u, dst_stride_u, width, height);
RotatePlane270(src_v, src_stride_v, dst_v, dst_stride_v, width, height);
return 0;
case libyuv::kRotate180:
case kRotate180:
RotatePlane180(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
RotatePlane180(src_u, src_stride_u, dst_u, dst_stride_u, width, height);
RotatePlane180(src_v, src_stride_v, dst_v, dst_stride_v, width, height);
@ -584,18 +717,18 @@ int NV12ToI420Rotate(const uint8_t* src_y,
width, height);
case kRotate90:
RotatePlane90(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
RotateUV90(src_uv, src_stride_uv, dst_u, dst_stride_u, dst_v,
dst_stride_v, halfwidth, halfheight);
SplitRotateUV90(src_uv, src_stride_uv, dst_u, dst_stride_u, dst_v,
dst_stride_v, halfwidth, halfheight);
return 0;
case kRotate270:
RotatePlane270(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
RotateUV270(src_uv, src_stride_uv, dst_u, dst_stride_u, dst_v,
dst_stride_v, halfwidth, halfheight);
SplitRotateUV270(src_uv, src_stride_uv, dst_u, dst_stride_u, dst_v,
dst_stride_v, halfwidth, halfheight);
return 0;
case kRotate180:
RotatePlane180(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
RotateUV180(src_uv, src_stride_uv, dst_u, dst_stride_u, dst_v,
dst_stride_v, halfwidth, halfheight);
SplitRotateUV180(src_uv, src_stride_uv, dst_u, dst_stride_u, dst_v,
dst_stride_v, halfwidth, halfheight);
return 0;
default:
break;
@ -603,6 +736,98 @@ int NV12ToI420Rotate(const uint8_t* src_y,
return -1;
}
static void SplitPixels(const uint8_t* src_u,
int src_pixel_stride_uv,
uint8_t* dst_u,
int width) {
int i;
for (i = 0; i < width; ++i) {
*dst_u = *src_u;
++dst_u;
src_u += src_pixel_stride_uv;
}
}
// Convert Android420 to I420 with Rotate
LIBYUV_API
int Android420ToI420Rotate(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
int src_pixel_stride_uv,
uint8_t* dst_y,
int dst_stride_y,
uint8_t* dst_u,
int dst_stride_u,
uint8_t* dst_v,
int dst_stride_v,
int width,
int height,
enum RotationMode rotation) {
int y;
const ptrdiff_t vu_off = src_v - src_u;
int halfwidth = (width + 1) >> 1;
int halfheight = (height + 1) >> 1;
if ((!src_y && dst_y) || !src_u || !src_v || !dst_u || !dst_v || width <= 0 ||
height == 0) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
halfheight = (height + 1) >> 1;
src_y = src_y + (height - 1) * src_stride_y;
src_u = src_u + (halfheight - 1) * src_stride_u;
src_v = src_v + (halfheight - 1) * src_stride_v;
src_stride_y = -src_stride_y;
src_stride_u = -src_stride_u;
src_stride_v = -src_stride_v;
}
if (dst_y) {
RotatePlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height,
rotation);
}
// Copy UV planes - I420
if (src_pixel_stride_uv == 1) {
RotatePlane(src_u, src_stride_u, dst_u, dst_stride_u, halfwidth, halfheight,
rotation);
RotatePlane(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth, halfheight,
rotation);
return 0;
}
// Split UV planes - NV21
if (src_pixel_stride_uv == 2 && vu_off == -1 &&
src_stride_u == src_stride_v) {
SplitRotateUV(src_v, src_stride_v, dst_v, dst_stride_v, dst_u, dst_stride_u,
halfwidth, halfheight, rotation);
return 0;
}
// Split UV planes - NV12
if (src_pixel_stride_uv == 2 && vu_off == 1 && src_stride_u == src_stride_v) {
SplitRotateUV(src_u, src_stride_u, dst_u, dst_stride_u, dst_v, dst_stride_v,
halfwidth, halfheight, rotation);
return 0;
}
if (rotation == 0) {
for (y = 0; y < halfheight; ++y) {
SplitPixels(src_u, src_pixel_stride_uv, dst_u, halfwidth);
SplitPixels(src_v, src_pixel_stride_uv, dst_v, halfwidth);
src_u += src_stride_u;
src_v += src_stride_v;
dst_u += dst_stride_u;
dst_v += dst_stride_v;
}
return 0;
}
// unsupported type and/or rotation.
return -1;
}
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv

View File

@ -35,15 +35,15 @@ TANY(TransposeWx8_Any_NEON, TransposeWx8_NEON, 7)
#ifdef HAS_TRANSPOSEWX8_SSSE3
TANY(TransposeWx8_Any_SSSE3, TransposeWx8_SSSE3, 7)
#endif
#ifdef HAS_TRANSPOSEWX8_MMI
TANY(TransposeWx8_Any_MMI, TransposeWx8_MMI, 7)
#endif
#ifdef HAS_TRANSPOSEWX8_FAST_SSSE3
TANY(TransposeWx8_Fast_Any_SSSE3, TransposeWx8_Fast_SSSE3, 15)
#endif
#ifdef HAS_TRANSPOSEWX16_MSA
TANY(TransposeWx16_Any_MSA, TransposeWx16_MSA, 15)
#endif
#ifdef HAS_TRANSPOSEWX16_LSX
TANY(TransposeWx16_Any_LSX, TransposeWx16_LSX, 15)
#endif
#undef TANY
#define TUVANY(NAMEANY, TPOS_SIMD, MASK) \
@ -65,12 +65,12 @@ TUVANY(TransposeUVWx8_Any_NEON, TransposeUVWx8_NEON, 7)
#ifdef HAS_TRANSPOSEUVWX8_SSE2
TUVANY(TransposeUVWx8_Any_SSE2, TransposeUVWx8_SSE2, 7)
#endif
#ifdef HAS_TRANSPOSEUVWX8_MMI
TUVANY(TransposeUVWx8_Any_MMI, TransposeUVWx8_MMI, 7)
#endif
#ifdef HAS_TRANSPOSEUVWX16_MSA
TUVANY(TransposeUVWx16_Any_MSA, TransposeUVWx16_MSA, 7)
#endif
#ifdef HAS_TRANSPOSEUVWX16_LSX
TUVANY(TransposeUVWx16_Any_LSX, TransposeUVWx16_LSX, 7)
#endif
#undef TUVANY
#ifdef __cplusplus

View File

@ -8,11 +8,12 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#include "libyuv/rotate.h"
#include "libyuv/rotate_argb.h"
#include "libyuv/convert.h"
#include "libyuv/cpu_id.h"
#include "libyuv/planar_functions.h"
#include "libyuv/rotate.h"
#include "libyuv/row.h"
#include "libyuv/scale_row.h" /* for ScaleARGBRowDownEven_ */
@ -52,14 +53,6 @@ static int ARGBTranspose(const uint8_t* src_argb,
}
}
#endif
#if defined(HAS_SCALEARGBROWDOWNEVEN_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
ScaleARGBRowDownEven = ScaleARGBRowDownEven_Any_MMI;
if (IS_ALIGNED(height, 4)) { // Width of dest.
ScaleARGBRowDownEven = ScaleARGBRowDownEven_MMI;
}
}
#endif
#if defined(HAS_SCALEARGBROWDOWNEVEN_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
ScaleARGBRowDownEven = ScaleARGBRowDownEven_Any_MSA;
@ -68,6 +61,14 @@ static int ARGBTranspose(const uint8_t* src_argb,
}
}
#endif
#if defined(HAS_SCALEARGBROWDOWNEVEN_LSX)
if (TestCpuFlag(kCpuHasLSX)) {
ScaleARGBRowDownEven = ScaleARGBRowDownEven_Any_LSX;
if (IS_ALIGNED(height, 4)) { // Width of dest.
ScaleARGBRowDownEven = ScaleARGBRowDownEven_LSX;
}
}
#endif
for (i = 0; i < width; ++i) { // column of source to row of dest.
ScaleARGBRowDownEven(src_argb, 0, src_pixel_step, dst_argb, height);
@ -147,14 +148,6 @@ static int ARGBRotate180(const uint8_t* src_argb,
}
}
#endif
#if defined(HAS_ARGBMIRRORROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
ARGBMirrorRow = ARGBMirrorRow_Any_MMI;
if (IS_ALIGNED(width, 2)) {
ARGBMirrorRow = ARGBMirrorRow_MMI;
}
}
#endif
#if defined(HAS_ARGBMIRRORROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
ARGBMirrorRow = ARGBMirrorRow_Any_MSA;
@ -163,6 +156,14 @@ static int ARGBRotate180(const uint8_t* src_argb,
}
}
#endif
#if defined(HAS_ARGBMIRRORROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
ARGBMirrorRow = ARGBMirrorRow_Any_LASX;
if (IS_ALIGNED(width, 16)) {
ARGBMirrorRow = ARGBMirrorRow_LASX;
}
}
#endif
#if defined(HAS_COPYROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) {
CopyRow = IS_ALIGNED(width * 4, 32) ? CopyRow_SSE2 : CopyRow_Any_SSE2;

View File

@ -0,0 +1,243 @@
/*
* Copyright 2022 The LibYuv Project Authors. All rights reserved.
*
* Copyright (c) 2022 Loongson Technology Corporation Limited
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "libyuv/rotate_row.h"
#if !defined(LIBYUV_DISABLE_LSX) && defined(__loongarch_sx)
#include "libyuv/loongson_intrinsics.h"
#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
#define ILVLH_B(in0, in1, in2, in3, out0, out1, out2, out3) \
{ \
DUP2_ARG2(__lsx_vilvl_b, in1, in0, in3, in2, out0, out2); \
DUP2_ARG2(__lsx_vilvh_b, in1, in0, in3, in2, out1, out3); \
}
#define ILVLH_H(in0, in1, in2, in3, out0, out1, out2, out3) \
{ \
DUP2_ARG2(__lsx_vilvl_h, in1, in0, in3, in2, out0, out2); \
DUP2_ARG2(__lsx_vilvh_h, in1, in0, in3, in2, out1, out3); \
}
#define ILVLH_W(in0, in1, in2, in3, out0, out1, out2, out3) \
{ \
DUP2_ARG2(__lsx_vilvl_w, in1, in0, in3, in2, out0, out2); \
DUP2_ARG2(__lsx_vilvh_w, in1, in0, in3, in2, out1, out3); \
}
#define ILVLH_D(in0, in1, in2, in3, out0, out1, out2, out3) \
{ \
DUP2_ARG2(__lsx_vilvl_d, in1, in0, in3, in2, out0, out2); \
DUP2_ARG2(__lsx_vilvh_d, in1, in0, in3, in2, out1, out3); \
}
#define LSX_ST_4(_dst0, _dst1, _dst2, _dst3, _dst, _stride, _stride2, \
_stride3, _stride4) \
{ \
__lsx_vst(_dst0, _dst, 0); \
__lsx_vstx(_dst1, _dst, _stride); \
__lsx_vstx(_dst2, _dst, _stride2); \
__lsx_vstx(_dst3, _dst, _stride3); \
_dst += _stride4; \
}
#define LSX_ST_2(_dst0, _dst1, _dst, _stride, _stride2) \
{ \
__lsx_vst(_dst0, _dst, 0); \
__lsx_vstx(_dst1, _dst, _stride); \
_dst += _stride2; \
}
void TransposeWx16_C(const uint8_t* src,
int src_stride,
uint8_t* dst,
int dst_stride,
int width) {
TransposeWx8_C(src, src_stride, dst, dst_stride, width);
TransposeWx8_C((src + 8 * src_stride), src_stride, (dst + 8), dst_stride,
width);
}
void TransposeUVWx16_C(const uint8_t* src,
int src_stride,
uint8_t* dst_a,
int dst_stride_a,
uint8_t* dst_b,
int dst_stride_b,
int width) {
TransposeUVWx8_C(src, src_stride, dst_a, dst_stride_a, dst_b, dst_stride_b,
width);
TransposeUVWx8_C((src + 8 * src_stride), src_stride, (dst_a + 8),
dst_stride_a, (dst_b + 8), dst_stride_b, width);
}
void TransposeWx16_LSX(const uint8_t* src,
int src_stride,
uint8_t* dst,
int dst_stride,
int width) {
int x;
int len = width / 16;
uint8_t* s;
int src_stride2 = src_stride << 1;
int src_stride3 = src_stride + src_stride2;
int src_stride4 = src_stride2 << 1;
int dst_stride2 = dst_stride << 1;
int dst_stride3 = dst_stride + dst_stride2;
int dst_stride4 = dst_stride2 << 1;
__m128i src0, src1, src2, src3, dst0, dst1, dst2, dst3;
__m128i tmp0, tmp1, tmp2, tmp3;
__m128i reg0, reg1, reg2, reg3, reg4, reg5, reg6, reg7;
__m128i res0, res1, res2, res3, res4, res5, res6, res7, res8, res9;
for (x = 0; x < len; x++) {
s = (uint8_t*)src;
src0 = __lsx_vld(s, 0);
src1 = __lsx_vldx(s, src_stride);
src2 = __lsx_vldx(s, src_stride2);
src3 = __lsx_vldx(s, src_stride3);
s += src_stride4;
ILVLH_B(src0, src1, src2, src3, tmp0, tmp1, tmp2, tmp3);
ILVLH_H(tmp0, tmp2, tmp1, tmp3, reg0, reg1, reg2, reg3);
src0 = __lsx_vld(s, 0);
src1 = __lsx_vldx(s, src_stride);
src2 = __lsx_vldx(s, src_stride2);
src3 = __lsx_vldx(s, src_stride3);
s += src_stride4;
ILVLH_B(src0, src1, src2, src3, tmp0, tmp1, tmp2, tmp3);
ILVLH_H(tmp0, tmp2, tmp1, tmp3, reg4, reg5, reg6, reg7);
ILVLH_W(reg0, reg4, reg1, reg5, res0, res1, res2, res3);
ILVLH_W(reg2, reg6, reg3, reg7, res4, res5, res6, res7);
src0 = __lsx_vld(s, 0);
src1 = __lsx_vldx(s, src_stride);
src2 = __lsx_vldx(s, src_stride2);
src3 = __lsx_vldx(s, src_stride3);
s += src_stride4;
ILVLH_B(src0, src1, src2, src3, tmp0, tmp1, tmp2, tmp3);
ILVLH_H(tmp0, tmp2, tmp1, tmp3, reg0, reg1, reg2, reg3);
src0 = __lsx_vld(s, 0);
src1 = __lsx_vldx(s, src_stride);
src2 = __lsx_vldx(s, src_stride2);
src3 = __lsx_vldx(s, src_stride3);
s += src_stride4;
ILVLH_B(src0, src1, src2, src3, tmp0, tmp1, tmp2, tmp3);
ILVLH_H(tmp0, tmp2, tmp1, tmp3, reg4, reg5, reg6, reg7);
res8 = __lsx_vilvl_w(reg4, reg0);
res9 = __lsx_vilvh_w(reg4, reg0);
ILVLH_D(res0, res8, res1, res9, dst0, dst1, dst2, dst3);
LSX_ST_4(dst0, dst1, dst2, dst3, dst, dst_stride, dst_stride2, dst_stride3,
dst_stride4);
res8 = __lsx_vilvl_w(reg5, reg1);
res9 = __lsx_vilvh_w(reg5, reg1);
ILVLH_D(res2, res8, res3, res9, dst0, dst1, dst2, dst3);
LSX_ST_4(dst0, dst1, dst2, dst3, dst, dst_stride, dst_stride2, dst_stride3,
dst_stride4);
res8 = __lsx_vilvl_w(reg6, reg2);
res9 = __lsx_vilvh_w(reg6, reg2);
ILVLH_D(res4, res8, res5, res9, dst0, dst1, dst2, dst3);
LSX_ST_4(dst0, dst1, dst2, dst3, dst, dst_stride, dst_stride2, dst_stride3,
dst_stride4);
res8 = __lsx_vilvl_w(reg7, reg3);
res9 = __lsx_vilvh_w(reg7, reg3);
ILVLH_D(res6, res8, res7, res9, dst0, dst1, dst2, dst3);
LSX_ST_4(dst0, dst1, dst2, dst3, dst, dst_stride, dst_stride2, dst_stride3,
dst_stride4);
src += 16;
}
}
void TransposeUVWx16_LSX(const uint8_t* src,
int src_stride,
uint8_t* dst_a,
int dst_stride_a,
uint8_t* dst_b,
int dst_stride_b,
int width) {
int x;
int len = width / 8;
uint8_t* s;
int src_stride2 = src_stride << 1;
int src_stride3 = src_stride + src_stride2;
int src_stride4 = src_stride2 << 1;
int dst_stride_a2 = dst_stride_a << 1;
int dst_stride_b2 = dst_stride_b << 1;
__m128i src0, src1, src2, src3, dst0, dst1, dst2, dst3;
__m128i tmp0, tmp1, tmp2, tmp3;
__m128i reg0, reg1, reg2, reg3, reg4, reg5, reg6, reg7;
__m128i res0, res1, res2, res3, res4, res5, res6, res7, res8, res9;
for (x = 0; x < len; x++) {
s = (uint8_t*)src;
src0 = __lsx_vld(s, 0);
src1 = __lsx_vldx(s, src_stride);
src2 = __lsx_vldx(s, src_stride2);
src3 = __lsx_vldx(s, src_stride3);
s += src_stride4;
ILVLH_B(src0, src1, src2, src3, tmp0, tmp1, tmp2, tmp3);
ILVLH_H(tmp0, tmp2, tmp1, tmp3, reg0, reg1, reg2, reg3);
src0 = __lsx_vld(s, 0);
src1 = __lsx_vldx(s, src_stride);
src2 = __lsx_vldx(s, src_stride2);
src3 = __lsx_vldx(s, src_stride3);
s += src_stride4;
ILVLH_B(src0, src1, src2, src3, tmp0, tmp1, tmp2, tmp3);
ILVLH_H(tmp0, tmp2, tmp1, tmp3, reg4, reg5, reg6, reg7);
ILVLH_W(reg0, reg4, reg1, reg5, res0, res1, res2, res3);
ILVLH_W(reg2, reg6, reg3, reg7, res4, res5, res6, res7);
src0 = __lsx_vld(s, 0);
src1 = __lsx_vldx(s, src_stride);
src2 = __lsx_vldx(s, src_stride2);
src3 = __lsx_vldx(s, src_stride3);
s += src_stride4;
ILVLH_B(src0, src1, src2, src3, tmp0, tmp1, tmp2, tmp3);
ILVLH_H(tmp0, tmp2, tmp1, tmp3, reg0, reg1, reg2, reg3);
src0 = __lsx_vld(s, 0);
src1 = __lsx_vldx(s, src_stride);
src2 = __lsx_vldx(s, src_stride2);
src3 = __lsx_vldx(s, src_stride3);
s += src_stride4;
ILVLH_B(src0, src1, src2, src3, tmp0, tmp1, tmp2, tmp3);
ILVLH_H(tmp0, tmp2, tmp1, tmp3, reg4, reg5, reg6, reg7);
res8 = __lsx_vilvl_w(reg4, reg0);
res9 = __lsx_vilvh_w(reg4, reg0);
ILVLH_D(res0, res8, res1, res9, dst0, dst1, dst2, dst3);
LSX_ST_2(dst0, dst2, dst_a, dst_stride_a, dst_stride_a2);
LSX_ST_2(dst1, dst3, dst_b, dst_stride_b, dst_stride_b2);
res8 = __lsx_vilvl_w(reg5, reg1);
res9 = __lsx_vilvh_w(reg5, reg1);
ILVLH_D(res2, res8, res3, res9, dst0, dst1, dst2, dst3);
LSX_ST_2(dst0, dst2, dst_a, dst_stride_a, dst_stride_a2);
LSX_ST_2(dst1, dst3, dst_b, dst_stride_b, dst_stride_b2);
res8 = __lsx_vilvl_w(reg6, reg2);
res9 = __lsx_vilvh_w(reg6, reg2);
ILVLH_D(res4, res8, res5, res9, dst0, dst1, dst2, dst3);
LSX_ST_2(dst0, dst2, dst_a, dst_stride_a, dst_stride_a2);
LSX_ST_2(dst1, dst3, dst_b, dst_stride_b, dst_stride_b2);
res8 = __lsx_vilvl_w(reg7, reg3);
res9 = __lsx_vilvh_w(reg7, reg3);
ILVLH_D(res6, res8, res7, res9, dst0, dst1, dst2, dst3);
LSX_ST_2(dst0, dst2, dst_a, dst_stride_a, dst_stride_a2);
LSX_ST_2(dst1, dst3, dst_b, dst_stride_b, dst_stride_b2);
src += 16;
}
}
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
#endif
#endif // !defined(LIBYUV_DISABLE_LSX) && defined(__loongarch_sx)

File diff suppressed because it is too large Load Diff

View File

@ -11,7 +11,6 @@
#include "libyuv/row.h"
#include <assert.h>
#include <stdio.h>
#include <string.h> // For memcpy and memset.
#include "libyuv/basic_types.h"
@ -22,21 +21,31 @@ namespace libyuv {
extern "C" {
#endif
// This macro control YUV to RGB using unsigned math to extend range of
// This macro controls YUV to RGB using unsigned math to extend range of
// YUV to RGB coefficients to 0 to 4 instead of 0 to 2 for more accuracy on B:
// LIBYUV_UNLIMITED_DATA
// Macros to enable unlimited data for each colorspace
// LIBYUV_UNLIMITED_BT601
// LIBYUV_UNLIMITED_BT709
// LIBYUV_UNLIMITED_BT2020
// The following macro from row_win makes the C code match the row_win code,
// which is 7 bit fixed point for ARGBToI420:
#if !defined(LIBYUV_DISABLE_X86) && defined(_MSC_VER) && \
!defined(__clang__) && (defined(_M_IX86) || defined(_M_X64))
#if !defined(LIBYUV_BIT_EXACT) && !defined(LIBYUV_DISABLE_X86) && \
defined(_MSC_VER) && !defined(__clang__) && \
(defined(_M_IX86) || defined(_M_X64))
#define LIBYUV_RGB7 1
#endif
#if defined(__x86_64__) || defined(_M_X64) || defined(__i386__) || \
defined(_M_IX86)
#if !defined(LIBYUV_BIT_EXACT) && (defined(__x86_64__) || defined(_M_X64) || \
defined(__i386__) || defined(_M_IX86))
#define LIBYUV_ARGBTOUV_PAVGB 1
#define LIBYUV_RGBTOU_TRUNCATE 1
#define LIBYUV_ATTENUATE_DUP 1
#endif
#if defined(LIBYUV_BIT_EXACT)
#define LIBYUV_UNATTENUATE_DUP 1
#endif
// llvm x86 is poor at ternary operator, so use branchless min/max.
@ -337,8 +346,8 @@ void ARGBToRGB565DitherRow_C(const uint8_t* src_argb,
uint8_t b1 = clamp255(src_argb[4] + dither1) >> 3;
uint8_t g1 = clamp255(src_argb[5] + dither1) >> 2;
uint8_t r1 = clamp255(src_argb[6] + dither1) >> 3;
WRITEWORD(dst_rgb, b0 | (g0 << 5) | (r0 << 11) | (b1 << 16) | (g1 << 21) |
(r1 << 27));
*(uint16_t*)(dst_rgb + 0) = b0 | (g0 << 5) | (r0 << 11);
*(uint16_t*)(dst_rgb + 2) = b1 | (g1 << 5) | (r1 << 11);
dst_rgb += 4;
src_argb += 8;
}
@ -362,8 +371,8 @@ void ARGBToARGB1555Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width) {
uint8_t g1 = src_argb[5] >> 3;
uint8_t r1 = src_argb[6] >> 3;
uint8_t a1 = src_argb[7] >> 7;
*(uint32_t*)(dst_rgb) = b0 | (g0 << 5) | (r0 << 10) | (a0 << 15) |
(b1 << 16) | (g1 << 21) | (r1 << 26) | (a1 << 31);
*(uint16_t*)(dst_rgb + 0) = b0 | (g0 << 5) | (r0 << 10) | (a0 << 15);
*(uint16_t*)(dst_rgb + 2) = b1 | (g1 << 5) | (r1 << 10) | (a1 << 15);
dst_rgb += 4;
src_argb += 8;
}
@ -387,8 +396,8 @@ void ARGBToARGB4444Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width) {
uint8_t g1 = src_argb[5] >> 4;
uint8_t r1 = src_argb[6] >> 4;
uint8_t a1 = src_argb[7] >> 4;
*(uint32_t*)(dst_rgb) = b0 | (g0 << 4) | (r0 << 8) | (a0 << 12) |
(b1 << 16) | (g1 << 20) | (r1 << 24) | (a1 << 28);
*(uint16_t*)(dst_rgb + 0) = b0 | (g0 << 4) | (r0 << 8) | (a0 << 12);
*(uint16_t*)(dst_rgb + 2) = b1 | (g1 << 4) | (r1 << 8) | (a1 << 12);
dst_rgb += 4;
src_argb += 8;
}
@ -430,10 +439,14 @@ void ARGBToAR30Row_C(const uint8_t* src_argb, uint8_t* dst_ar30, int width) {
void ARGBToAR64Row_C(const uint8_t* src_argb, uint16_t* dst_ar64, int width) {
int x;
for (x = 0; x < width; ++x) {
dst_ar64[0] = src_argb[0] * 0x0101;
dst_ar64[1] = src_argb[1] * 0x0101;
dst_ar64[2] = src_argb[2] * 0x0101;
dst_ar64[3] = src_argb[3] * 0x0101;
uint16_t b = src_argb[0] * 0x0101;
uint16_t g = src_argb[1] * 0x0101;
uint16_t r = src_argb[2] * 0x0101;
uint16_t a = src_argb[3] * 0x0101;
dst_ar64[0] = b;
dst_ar64[1] = g;
dst_ar64[2] = r;
dst_ar64[3] = a;
dst_ar64 += 4;
src_argb += 4;
}
@ -442,10 +455,14 @@ void ARGBToAR64Row_C(const uint8_t* src_argb, uint16_t* dst_ar64, int width) {
void ARGBToAB64Row_C(const uint8_t* src_argb, uint16_t* dst_ab64, int width) {
int x;
for (x = 0; x < width; ++x) {
dst_ab64[0] = src_argb[2] * 0x0101;
dst_ab64[1] = src_argb[1] * 0x0101;
dst_ab64[2] = src_argb[0] * 0x0101;
dst_ab64[3] = src_argb[3] * 0x0101;
uint16_t b = src_argb[0] * 0x0101;
uint16_t g = src_argb[1] * 0x0101;
uint16_t r = src_argb[2] * 0x0101;
uint16_t a = src_argb[3] * 0x0101;
dst_ab64[0] = r;
dst_ab64[1] = g;
dst_ab64[2] = b;
dst_ab64[3] = a;
dst_ab64 += 4;
src_argb += 4;
}
@ -454,10 +471,14 @@ void ARGBToAB64Row_C(const uint8_t* src_argb, uint16_t* dst_ab64, int width) {
void AR64ToARGBRow_C(const uint16_t* src_ar64, uint8_t* dst_argb, int width) {
int x;
for (x = 0; x < width; ++x) {
dst_argb[0] = src_ar64[0] >> 8;
dst_argb[1] = src_ar64[1] >> 8;
dst_argb[2] = src_ar64[2] >> 8;
dst_argb[3] = src_ar64[3] >> 8;
uint8_t b = src_ar64[0] >> 8;
uint8_t g = src_ar64[1] >> 8;
uint8_t r = src_ar64[2] >> 8;
uint8_t a = src_ar64[3] >> 8;
dst_argb[0] = b;
dst_argb[1] = g;
dst_argb[2] = r;
dst_argb[3] = a;
dst_argb += 4;
src_ar64 += 4;
}
@ -466,10 +487,14 @@ void AR64ToARGBRow_C(const uint16_t* src_ar64, uint8_t* dst_argb, int width) {
void AB64ToARGBRow_C(const uint16_t* src_ab64, uint8_t* dst_argb, int width) {
int x;
for (x = 0; x < width; ++x) {
dst_argb[0] = src_ab64[2] >> 8;
dst_argb[1] = src_ab64[1] >> 8;
dst_argb[2] = src_ab64[0] >> 8;
dst_argb[3] = src_ab64[3] >> 8;
uint8_t r = src_ab64[0] >> 8;
uint8_t g = src_ab64[1] >> 8;
uint8_t b = src_ab64[2] >> 8;
uint8_t a = src_ab64[3] >> 8;
dst_argb[0] = b;
dst_argb[1] = g;
dst_argb[2] = r;
dst_argb[3] = a;
dst_argb += 4;
src_ab64 += 4;
}
@ -522,6 +547,7 @@ static __inline int RGBToY(uint8_t r, uint8_t g, uint8_t b) {
#define AVGB(a, b) (((a) + (b) + 1) >> 1)
// LIBYUV_RGBTOU_TRUNCATE mimics x86 code that does not round.
#ifdef LIBYUV_RGBTOU_TRUNCATE
static __inline int RGBToU(uint8_t r, uint8_t g, uint8_t b) {
return (112 * b - 74 * g - 38 * r + 0x8000) >> 8;
@ -530,7 +556,7 @@ static __inline int RGBToV(uint8_t r, uint8_t g, uint8_t b) {
return (112 * r - 94 * g - 18 * b + 0x8000) >> 8;
}
#else
// TODO(fbarchard): Add rounding to SIMD and use this
// TODO(fbarchard): Add rounding to x86 SIMD and use this
static __inline int RGBToU(uint8_t r, uint8_t g, uint8_t b) {
return (112 * b - 74 * g - 38 * r + 0x8080) >> 8;
}
@ -539,6 +565,7 @@ static __inline int RGBToV(uint8_t r, uint8_t g, uint8_t b) {
}
#endif
// LIBYUV_ARGBTOUV_PAVGB mimics x86 code that subsamples with 2 pavgb.
#if !defined(LIBYUV_ARGBTOUV_PAVGB)
static __inline int RGB2xToU(uint16_t r, uint16_t g, uint16_t b) {
return ((112 / 2) * b - (74 / 2) * g - (38 / 2) * r + 0x8080) >> 8;
@ -551,7 +578,6 @@ static __inline int RGB2xToV(uint16_t r, uint16_t g, uint16_t b) {
// ARGBToY_C and ARGBToUV_C
// Intel version mimic SSE/AVX which does 2 pavgb
#if LIBYUV_ARGBTOUV_PAVGB
#define MAKEROWY(NAME, R, G, B, BPP) \
void NAME##ToYRow_C(const uint8_t* src_rgb, uint8_t* dst_y, int width) { \
int x; \
@ -772,6 +798,7 @@ static __inline int RGB2xToVJ(uint16_t r, uint16_t g, uint16_t b) {
#endif
MAKEROWYJ(ARGB, 2, 1, 0, 4)
MAKEROWYJ(ABGR, 0, 1, 2, 4)
MAKEROWYJ(RGBA, 3, 2, 1, 4)
MAKEROWYJ(RGB24, 2, 1, 0, 3)
MAKEROWYJ(RAW, 0, 1, 2, 3)
@ -1457,7 +1484,7 @@ void J400ToARGBRow_C(const uint8_t* src_y, uint8_t* dst_argb, int width) {
// KR = 0.299; KB = 0.114
// U and V contributions to R,G,B.
#ifdef LIBYUV_UNLIMITED_DATA
#if defined(LIBYUV_UNLIMITED_DATA) || defined(LIBYUV_UNLIMITED_BT601)
#define UB 129 /* round(2.018 * 64) */
#else
#define UB 128 /* max(128, round(2.018 * 64)) */
@ -1511,7 +1538,7 @@ MAKEYUVCONSTANTS(JPEG, YG, YB, UB, UG, VG, VR)
// KR = 0.2126, KB = 0.0722
// U and V contributions to R,G,B.
#ifdef LIBYUV_UNLIMITED_DATA
#if defined(LIBYUV_UNLIMITED_DATA) || defined(LIBYUV_UNLIMITED_BT709)
#define UB 135 /* round(2.112 * 64) */
#else
#define UB 128 /* max(128, round(2.112 * 64)) */
@ -1565,7 +1592,7 @@ MAKEYUVCONSTANTS(F709, YG, YB, UB, UG, VG, VR)
// KR = 0.2627; KB = 0.0593
// U and V contributions to R,G,B.
#ifdef LIBYUV_UNLIMITED_DATA
#if defined(LIBYUV_UNLIMITED_DATA) || defined(LIBYUV_UNLIMITED_BT2020)
#define UB 137 /* round(2.142 * 64) */
#else
#define UB 128 /* max(128, round(2.142 * 64)) */
@ -1644,8 +1671,8 @@ MAKEYUVCONSTANTS(V2020, YG, YB, UB, UG, VG, VR)
#define CALC_RGB16 \
int32_t y1 = ((uint32_t)(y32 * yg) >> 16) + yb; \
int8_t ui = u; \
int8_t vi = v; \
int8_t ui = (int8_t)u; \
int8_t vi = (int8_t)v; \
ui -= 0x80; \
vi -= 0x80; \
int b16 = y1 + (ui * ub); \
@ -1696,7 +1723,7 @@ static __inline void YuvPixel10_16(uint16_t y,
int* r,
const struct YuvConstants* yuvconstants) {
LOAD_YUV_CONSTANTS;
uint32_t y32 = y << 6;
uint32_t y32 = (y << 6) | (y >> 4);
u = clamp255(u >> 2);
v = clamp255(v >> 2);
CALC_RGB16;
@ -1715,7 +1742,7 @@ static __inline void YuvPixel12_16(int16_t y,
int* r,
const struct YuvConstants* yuvconstants) {
LOAD_YUV_CONSTANTS;
uint32_t y32 = y << 4;
uint32_t y32 = (y << 4) | (y >> 8);
u = clamp255(u >> 4);
v = clamp255(v >> 4);
CALC_RGB16;
@ -1836,6 +1863,23 @@ void I444ToARGBRow_C(const uint8_t* src_y,
}
}
void I444ToRGB24Row_C(const uint8_t* src_y,
const uint8_t* src_u,
const uint8_t* src_v,
uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants,
int width) {
int x;
for (x = 0; x < width; ++x) {
YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
rgb_buf + 2, yuvconstants);
src_y += 1;
src_u += 1;
src_v += 1;
rgb_buf += 3; // Advance 1 pixel.
}
}
// Also used for 420
void I422ToARGBRow_C(const uint8_t* src_y,
const uint8_t* src_u,
@ -2273,8 +2317,8 @@ void I422ToARGB4444Row_C(const uint8_t* src_y,
b1 = b1 >> 4;
g1 = g1 >> 4;
r1 = r1 >> 4;
*(uint32_t*)(dst_argb4444) = b0 | (g0 << 4) | (r0 << 8) | (b1 << 16) |
(g1 << 20) | (r1 << 24) | 0xf000f000;
*(uint16_t*)(dst_argb4444 + 0) = b0 | (g0 << 4) | (r0 << 8) | 0xf000;
*(uint16_t*)(dst_argb4444 + 2) = b1 | (g1 << 4) | (r1 << 8) | 0xf000;
src_y += 2;
src_u += 1;
src_v += 1;
@ -2311,8 +2355,8 @@ void I422ToARGB1555Row_C(const uint8_t* src_y,
b1 = b1 >> 3;
g1 = g1 >> 3;
r1 = r1 >> 3;
*(uint32_t*)(dst_argb1555) = b0 | (g0 << 5) | (r0 << 10) | (b1 << 16) |
(g1 << 21) | (r1 << 26) | 0x80008000;
*(uint16_t*)(dst_argb1555 + 0) = b0 | (g0 << 5) | (r0 << 10) | 0x8000;
*(uint16_t*)(dst_argb1555 + 2) = b1 | (g1 << 5) | (r1 << 10) | 0x8000;
src_y += 2;
src_u += 1;
src_v += 1;
@ -2349,8 +2393,8 @@ void I422ToRGB565Row_C(const uint8_t* src_y,
b1 = b1 >> 3;
g1 = g1 >> 2;
r1 = r1 >> 3;
*(uint32_t*)(dst_rgb565) =
b0 | (g0 << 5) | (r0 << 11) | (b1 << 16) | (g1 << 21) | (r1 << 27);
*(uint16_t*)(dst_rgb565 + 0) = b0 | (g0 << 5) | (r0 << 11); // for ubsan
*(uint16_t*)(dst_rgb565 + 2) = b1 | (g1 << 5) | (r1 << 11);
src_y += 2;
src_u += 1;
src_v += 1;
@ -2476,8 +2520,8 @@ void NV12ToRGB565Row_C(const uint8_t* src_y,
b1 = b1 >> 3;
g1 = g1 >> 2;
r1 = r1 >> 3;
*(uint32_t*)(dst_rgb565) =
b0 | (g0 << 5) | (r0 << 11) | (b1 << 16) | (g1 << 21) | (r1 << 27);
*(uint16_t*)(dst_rgb565 + 0) = b0 | (g0 << 5) | (r0 << 11);
*(uint16_t*)(dst_rgb565 + 2) = b1 | (g1 << 5) | (r1 << 11);
src_y += 2;
src_uv += 2;
dst_rgb565 += 4; // Advance 2 pixels.
@ -2689,6 +2733,74 @@ void MergeUVRow_C(const uint8_t* src_u,
}
}
void DetileRow_C(const uint8_t* src,
ptrdiff_t src_tile_stride,
uint8_t* dst,
int width) {
int x;
for (x = 0; x < width - 15; x += 16) {
memcpy(dst, src, 16);
dst += 16;
src += src_tile_stride;
}
if (width & 15) {
memcpy(dst, src, width & 15);
}
}
void DetileRow_16_C(const uint16_t* src,
ptrdiff_t src_tile_stride,
uint16_t* dst,
int width) {
int x;
for (x = 0; x < width - 15; x += 16) {
memcpy(dst, src, 16 * sizeof(uint16_t));
dst += 16;
src += src_tile_stride;
}
if (width & 15) {
memcpy(dst, src, (width & 15) * sizeof(uint16_t));
}
}
void DetileSplitUVRow_C(const uint8_t* src_uv,
ptrdiff_t src_tile_stride,
uint8_t* dst_u,
uint8_t* dst_v,
int width) {
int x;
for (x = 0; x < width - 15; x += 16) {
SplitUVRow_C(src_uv, dst_u, dst_v, 8);
dst_u += 8;
dst_v += 8;
src_uv += src_tile_stride;
}
if (width & 15) {
SplitUVRow_C(src_uv, dst_u, dst_v, ((width & 15) + 1) / 2);
}
}
void DetileToYUY2_C(const uint8_t* src_y,
ptrdiff_t src_y_tile_stride,
const uint8_t* src_uv,
ptrdiff_t src_uv_tile_stride,
uint8_t* dst_yuy2,
int width) {
for (int x = 0; x < width - 15; x += 16) {
for (int i = 0; i < 8; i++) {
dst_yuy2[0] = src_y[0];
dst_yuy2[1] = src_uv[0];
dst_yuy2[2] = src_y[1];
dst_yuy2[3] = src_uv[1];
dst_yuy2 += 4;
src_y += 2;
src_uv += 2;
}
src_y += src_y_tile_stride - 16;
src_uv += src_uv_tile_stride - 16;
}
}
void SplitRGBRow_C(const uint8_t* src_rgb,
uint8_t* dst_r,
uint8_t* dst_g,
@ -2936,6 +3048,9 @@ void DivideRow_16_C(const uint16_t* src_y,
// 16384 = 10 bits
// 4096 = 12 bits
// 256 = 16 bits
// TODO(fbarchard): change scale to bits
#define C16TO8(v, scale) clamp255(((v) * (scale)) >> 16)
void Convert16To8Row_C(const uint16_t* src_y,
uint8_t* dst_y,
int scale,
@ -2945,7 +3060,7 @@ void Convert16To8Row_C(const uint16_t* src_y,
assert(scale <= 32768);
for (x = 0; x < width; ++x) {
dst_y[x] = clamp255((src_y[x] * scale) >> 16);
dst_y[x] = C16TO8(src_y[x], scale);
}
}
@ -2998,6 +3113,21 @@ void YUY2ToUVRow_C(const uint8_t* src_yuy2,
}
}
// Filter 2 rows of YUY2 UV's (422) into UV (NV12).
void YUY2ToNVUVRow_C(const uint8_t* src_yuy2,
int src_stride_yuy2,
uint8_t* dst_uv,
int width) {
// Output a row of UV values, filtering 2 rows of YUY2.
int x;
for (x = 0; x < width; x += 2) {
dst_uv[0] = (src_yuy2[1] + src_yuy2[src_stride_yuy2 + 1] + 1) >> 1;
dst_uv[1] = (src_yuy2[3] + src_yuy2[src_stride_yuy2 + 3] + 1) >> 1;
src_yuy2 += 4;
dst_uv += 2;
}
}
// Copy row of YUY2 UV's (422) into U and V (422).
void YUY2ToUV422Row_C(const uint8_t* src_yuy2,
uint8_t* dst_u,
@ -3151,11 +3281,11 @@ void BlendPlaneRow_C(const uint8_t* src0,
}
#undef UBLEND
#if defined(__aarch64__) || defined(__arm__)
#define ATTENUATE(f, a) (f * a + 128) >> 8
#else
#if LIBYUV_ATTENUATE_DUP
// This code mimics the SSSE3 version for better testability.
#define ATTENUATE(f, a) (a | (a << 8)) * (f | (f << 8)) >> 24
#else
#define ATTENUATE(f, a) (f * a + 128) >> 8
#endif
// Multiply source RGB by alpha and store to destination.
@ -3242,6 +3372,14 @@ const uint32_t fixed_invtbl8[256] = {
T(0xfc), T(0xfd), T(0xfe), 0x01000100};
#undef T
#if LIBYUV_UNATTENUATE_DUP
// This code mimics the Intel SIMD version for better testability.
#define UNATTENUATE(f, ia) clamp255(((f | (f << 8)) * ia) >> 16)
#else
#define UNATTENUATE(f, ia) clamp255((f * ia) >> 8)
#endif
// mimics the Intel SIMD code for exactness.
void ARGBUnattenuateRow_C(const uint8_t* src_argb,
uint8_t* dst_argb,
int width) {
@ -3252,13 +3390,11 @@ void ARGBUnattenuateRow_C(const uint8_t* src_argb,
uint32_t r = src_argb[2];
const uint32_t a = src_argb[3];
const uint32_t ia = fixed_invtbl8[a] & 0xffff; // 8.8 fixed point
b = (b * ia) >> 8;
g = (g * ia) >> 8;
r = (r * ia) >> 8;
// Clamping should not be necessary but is free in assembly.
dst_argb[0] = clamp255(b);
dst_argb[1] = clamp255(g);
dst_argb[2] = clamp255(r);
dst_argb[0] = UNATTENUATE(b, ia);
dst_argb[1] = UNATTENUATE(g, ia);
dst_argb[2] = UNATTENUATE(r, ia);
dst_argb[3] = a;
src_argb += 4;
dst_argb += 4;
@ -3289,8 +3425,11 @@ void CumulativeSumToAverageRow_C(const int32_t* tl,
int area,
uint8_t* dst,
int count) {
float ooa = 1.0f / area;
float ooa;
int i;
assert(area != 0);
ooa = 1.0f / area;
for (i = 0; i < count; ++i) {
dst[0] = (uint8_t)((bl[w + 0] + tl[0] - bl[0] - tl[w + 0]) * ooa);
dst[1] = (uint8_t)((bl[w + 1] + tl[1] - bl[1] - tl[w + 1]) * ooa);
@ -3346,6 +3485,17 @@ static void HalfRow_16_C(const uint16_t* src_uv,
}
}
static void HalfRow_16To8_C(const uint16_t* src_uv,
ptrdiff_t src_uv_stride,
uint8_t* dst_uv,
int scale,
int width) {
int x;
for (x = 0; x < width; ++x) {
dst_uv[x] = C16TO8((src_uv[x] + src_uv[src_uv_stride + x] + 1) >> 1, scale);
}
}
// C version 2x2 -> 2x1.
void InterpolateRow_C(uint8_t* dst_ptr,
const uint8_t* src_ptr,
@ -3356,6 +3506,9 @@ void InterpolateRow_C(uint8_t* dst_ptr,
int y0_fraction = 256 - y1_fraction;
const uint8_t* src_ptr1 = src_ptr + src_stride;
int x;
assert(source_y_fraction >= 0);
assert(source_y_fraction < 256);
if (y1_fraction == 0) {
memcpy(dst_ptr, src_ptr, width);
return;
@ -3364,21 +3517,16 @@ void InterpolateRow_C(uint8_t* dst_ptr,
HalfRow_C(src_ptr, src_stride, dst_ptr, width);
return;
}
for (x = 0; x < width - 1; x += 2) {
dst_ptr[0] =
(src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction + 128) >> 8;
dst_ptr[1] =
(src_ptr[1] * y0_fraction + src_ptr1[1] * y1_fraction + 128) >> 8;
src_ptr += 2;
src_ptr1 += 2;
dst_ptr += 2;
}
if (width & 1) {
for (x = 0; x < width; ++x) {
dst_ptr[0] =
(src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction + 128) >> 8;
++src_ptr;
++src_ptr1;
++dst_ptr;
}
}
// C version 2x2 -> 2x1.
void InterpolateRow_16_C(uint16_t* dst_ptr,
const uint16_t* src_ptr,
ptrdiff_t src_stride,
@ -3388,23 +3536,62 @@ void InterpolateRow_16_C(uint16_t* dst_ptr,
int y0_fraction = 256 - y1_fraction;
const uint16_t* src_ptr1 = src_ptr + src_stride;
int x;
if (source_y_fraction == 0) {
assert(source_y_fraction >= 0);
assert(source_y_fraction < 256);
if (y1_fraction == 0) {
memcpy(dst_ptr, src_ptr, width * 2);
return;
}
if (source_y_fraction == 128) {
if (y1_fraction == 128) {
HalfRow_16_C(src_ptr, src_stride, dst_ptr, width);
return;
}
for (x = 0; x < width - 1; x += 2) {
dst_ptr[0] = (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction) >> 8;
dst_ptr[1] = (src_ptr[1] * y0_fraction + src_ptr1[1] * y1_fraction) >> 8;
src_ptr += 2;
src_ptr1 += 2;
dst_ptr += 2;
for (x = 0; x < width; ++x) {
dst_ptr[0] =
(src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction + 128) >> 8;
++src_ptr;
++src_ptr1;
++dst_ptr;
}
if (width & 1) {
dst_ptr[0] = (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction) >> 8;
}
// C version 2x2 16 bit-> 2x1 8 bit.
// Use scale to convert lsb formats to msb, depending how many bits there are:
// 32768 = 9 bits
// 16384 = 10 bits
// 4096 = 12 bits
// 256 = 16 bits
// TODO(fbarchard): change scale to bits
void InterpolateRow_16To8_C(uint8_t* dst_ptr,
const uint16_t* src_ptr,
ptrdiff_t src_stride,
int scale,
int width,
int source_y_fraction) {
int y1_fraction = source_y_fraction;
int y0_fraction = 256 - y1_fraction;
const uint16_t* src_ptr1 = src_ptr + src_stride;
int x;
assert(source_y_fraction >= 0);
assert(source_y_fraction < 256);
if (source_y_fraction == 0) {
Convert16To8Row_C(src_ptr, dst_ptr, scale, width);
return;
}
if (source_y_fraction == 128) {
HalfRow_16To8_C(src_ptr, src_stride, dst_ptr, scale, width);
return;
}
for (x = 0; x < width; ++x) {
dst_ptr[0] = C16TO8(
(src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction + 128) >> 8,
scale);
src_ptr += 1;
src_ptr1 += 1;
dst_ptr += 1;
}
}
@ -3921,6 +4108,32 @@ void I422ToRGB24Row_AVX2(const uint8_t* src_y,
}
#endif
#if defined(HAS_I444TORGB24ROW_AVX2)
void I444ToRGB24Row_AVX2(const uint8_t* src_y,
const uint8_t* src_u,
const uint8_t* src_v,
uint8_t* dst_rgb24,
const struct YuvConstants* yuvconstants,
int width) {
// Row buffer for intermediate ARGB pixels.
SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
while (width > 0) {
int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
I444ToARGBRow_AVX2(src_y, src_u, src_v, row, yuvconstants, twidth);
#if defined(HAS_ARGBTORGB24ROW_AVX2)
ARGBToRGB24Row_AVX2(row, dst_rgb24, twidth);
#else
ARGBToRGB24Row_SSSE3(row, dst_rgb24, twidth);
#endif
src_y += twidth;
src_u += twidth;
src_v += twidth;
dst_rgb24 += twidth * 3;
width -= twidth;
}
}
#endif
#if defined(HAS_NV12TORGB565ROW_AVX2)
void NV12ToRGB565Row_AVX2(const uint8_t* src_y,
const uint8_t* src_uv,
@ -4009,6 +4222,26 @@ void RAWToYJRow_SSSE3(const uint8_t* src_raw, uint8_t* dst_yj, int width) {
}
#endif // HAS_RAWTOYJROW_SSSE3
#ifdef HAS_INTERPOLATEROW_16TO8_AVX2
void InterpolateRow_16To8_AVX2(uint8_t* dst_ptr,
const uint16_t* src_ptr,
ptrdiff_t src_stride,
int scale,
int width,
int source_y_fraction) {
// Row buffer for intermediate 16 bit pixels.
SIMD_ALIGNED(uint16_t row[MAXTWIDTH]);
while (width > 0) {
int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
InterpolateRow_16_C(row, src_ptr, src_stride, twidth, source_y_fraction);
Convert16To8Row_AVX2(row, dst_ptr, scale, twidth);
src_ptr += twidth;
dst_ptr += twidth;
width -= twidth;
}
}
#endif // HAS_INTERPOLATEROW_16TO8_AVX2
float ScaleSumSamples_C(const float* src, float* dst, float scale, int width) {
float fsum = 0.f;
int i;

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -10,8 +10,6 @@
#include "libyuv/row.h"
#include <stdio.h>
#ifdef __cplusplus
namespace libyuv {
extern "C" {
@ -21,6 +19,9 @@ extern "C" {
#if !defined(LIBYUV_DISABLE_NEON) && defined(__ARM_NEON__) && \
!defined(__aarch64__)
// d8-d15, r4-r11,r14(lr) need to be preserved if used. r13(sp),r15(pc) are
// reserved.
// q0: Y uint16x8_t
// d2: U uint8x8_t
// d3: V uint8x8_t
@ -155,6 +156,29 @@ void I444ToARGBRow_NEON(const uint8_t* src_y,
: "cc", "memory", YUVTORGB_REGS, "d6");
}
void I444ToRGB24Row_NEON(const uint8_t* src_y,
const uint8_t* src_u,
const uint8_t* src_v,
uint8_t* dst_rgb24,
const struct YuvConstants* yuvconstants,
int width) {
asm volatile(
YUVTORGB_SETUP
"1: \n" READYUV444 YUVTORGB
RGBTORGB8
"subs %[width], %[width], #8 \n"
"vst3.8 {d0, d2, d4}, [%[dst_rgb24]]! \n"
"bgt 1b \n"
: [src_y] "+r"(src_y), // %[src_y]
[src_u] "+r"(src_u), // %[src_u]
[src_v] "+r"(src_v), // %[src_v]
[dst_rgb24] "+r"(dst_rgb24), // %[dst_argb]
[width] "+r"(width) // %[width]
: [kUVCoeff] "r"(&yuvconstants->kUVCoeff), // %[kUVCoeff]
[kRGBCoeffBias] "r"(&yuvconstants->kRGBCoeffBias) // %[kRGBCoeffBias]
: "cc", "memory", YUVTORGB_REGS);
}
void I422ToARGBRow_NEON(const uint8_t* src_y,
const uint8_t* src_u,
const uint8_t* src_v,
@ -575,6 +599,127 @@ void SplitUVRow_NEON(const uint8_t* src_uv,
);
}
// Reads 16 byte Y's from tile and writes out 16 Y's.
// MM21 Y tiles are 16x32 so src_tile_stride = 512 bytes
// MM21 UV tiles are 8x16 so src_tile_stride = 256 bytes
// width measured in bytes so 8 UV = 16.
void DetileRow_NEON(const uint8_t* src,
ptrdiff_t src_tile_stride,
uint8_t* dst,
int width) {
asm volatile(
"1: \n"
"vld1.8 {q0}, [%0], %3 \n" // load 16 bytes
"subs %2, %2, #16 \n" // 16 processed per loop
"pld [%0, #1792] \n"
"vst1.8 {q0}, [%1]! \n" // store 16 bytes
"bgt 1b \n"
: "+r"(src), // %0
"+r"(dst), // %1
"+r"(width) // %2
: "r"(src_tile_stride) // %3
: "cc", "memory", "q0" // Clobber List
);
}
// Reads 16 byte Y's of 16 bits from tile and writes out 16 Y's.
void DetileRow_16_NEON(const uint16_t* src,
ptrdiff_t src_tile_stride,
uint16_t* dst,
int width) {
asm volatile(
"1: \n"
"vld1.16 {q0, q1}, [%0], %3 \n" // load 16 pixels
"subs %2, %2, #16 \n" // 16 processed per loop
"pld [%0, #3584] \n"
"vst1.16 {q0, q1}, [%1]! \n" // store 16 pixels
"bgt 1b \n"
: "+r"(src), // %0
"+r"(dst), // %1
"+r"(width) // %2
: "r"(src_tile_stride * 2) // %3
: "cc", "memory", "q0", "q1" // Clobber List
);
}
// Read 16 bytes of UV, detile, and write 8 bytes of U and 8 bytes of V.
void DetileSplitUVRow_NEON(const uint8_t* src_uv,
ptrdiff_t src_tile_stride,
uint8_t* dst_u,
uint8_t* dst_v,
int width) {
asm volatile(
"1: \n"
"vld2.8 {d0, d1}, [%0], %4 \n"
"subs %3, %3, #16 \n"
"pld [%0, #1792] \n"
"vst1.8 {d0}, [%1]! \n"
"vst1.8 {d1}, [%2]! \n"
"bgt 1b \n"
: "+r"(src_uv), // %0
"+r"(dst_u), // %1
"+r"(dst_v), // %2
"+r"(width) // %3
: "r"(src_tile_stride) // %4
: "cc", "memory", "d0", "d1" // Clobber List
);
}
#if LIBYUV_USE_ST2
// Read 16 Y, 8 UV, and write 8 YUYV.
void DetileToYUY2_NEON(const uint8_t* src_y,
ptrdiff_t src_y_tile_stride,
const uint8_t* src_uv,
ptrdiff_t src_uv_tile_stride,
uint8_t* dst_yuy2,
int width) {
asm volatile(
"1: \n"
"vld1.8 {q0}, [%0], %4 \n" // Load 16 Y
"pld [%0, #1792] \n"
"vld1.8 {q1}, [%1], %5 \n" // Load 8 UV
"pld [%1, #1792] \n"
"subs %3, %3, #16 \n"
"vst2.8 {q0, q1}, [%2]! \n"
"bgt 1b \n"
: "+r"(src_y), // %0
"+r"(src_uv), // %1
"+r"(dst_yuy2), // %2
"+r"(width) // %3
: "r"(src_y_tile_stride), // %4
"r"(src_uv_tile_stride) // %5
: "cc", "memory", "d0", "d1", "d2", "d3" // Clobber list
);
}
#else
// Read 16 Y, 8 UV, and write 8 YUYV.
void DetileToYUY2_NEON(const uint8_t* src_y,
ptrdiff_t src_y_tile_stride,
const uint8_t* src_uv,
ptrdiff_t src_uv_tile_stride,
uint8_t* dst_yuy2,
int width) {
asm volatile(
"1: \n"
"vld1.8 {q0}, [%0], %4 \n" // Load 16 Y
"vld1.8 {q1}, [%1], %5 \n" // Load 8 UV
"subs %3, %3, #16 \n"
"pld [%0, #1792] \n"
"vzip.8 q0, q1 \n"
"pld [%1, #1792] \n"
"vst1.8 {q0, q1}, [%2]! \n"
"bgt 1b \n"
: "+r"(src_y), // %0
"+r"(src_uv), // %1
"+r"(dst_yuy2), // %2
"+r"(width) // %3
: "r"(src_y_tile_stride), // %4
"r"(src_uv_tile_stride) // %5
: "cc", "memory", "q0", "q1", "q2", "q3" // Clobber list
);
}
#endif
// Reads 16 U's and V's and writes out 16 pairs of UV.
void MergeUVRow_NEON(const uint8_t* src_u,
const uint8_t* src_v,
@ -1304,16 +1449,17 @@ void ARGBToRGB24Row_NEON(const uint8_t* src_argb,
int width) {
asm volatile(
"1: \n"
"vld4.8 {d1, d2, d3, d4}, [%0]! \n" // load 8 pixels of ARGB.
"subs %2, %2, #8 \n" // 8 processed per loop.
"vst3.8 {d1, d2, d3}, [%1]! \n" // store 8 pixels of
// RGB24.
"vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 16 pixels of ARGB.
"vld4.8 {d1, d3, d5, d7}, [%0]! \n"
"subs %2, %2, #16 \n" // 16 processed per loop.
"vst3.8 {d0, d2, d4}, [%1]! \n" // store 16 RGB24 pixels.
"vst3.8 {d1, d3, d5}, [%1]! \n"
"bgt 1b \n"
: "+r"(src_argb), // %0
"+r"(dst_rgb24), // %1
"+r"(width) // %2
:
: "cc", "memory", "d1", "d2", "d3", "d4" // Clobber List
: "cc", "memory", "q0", "q1", "q2", "q3" // Clobber List
);
}
@ -1457,6 +1603,29 @@ void UYVYToUVRow_NEON(const uint8_t* src_uyvy,
);
}
void YUY2ToNVUVRow_NEON(const uint8_t* src_yuy2,
int stride_yuy2,
uint8_t* dst_uv,
int width) {
asm volatile(
"add %1, %0, %1 \n" // stride + src_yuy2
"1: \n"
"vld2.8 {q0, q1}, [%0]! \n" // load 16 pixels of YUY2.
"subs %3, %3, #16 \n" // 16 pixels = 8 UVs.
"vld2.8 {q2, q3}, [%1]! \n" // load next row YUY2.
"vrhadd.u8 q4, q1, q3 \n" // average rows of UV
"vst1.8 {q4}, [%2]! \n" // store 8 UV.
"bgt 1b \n"
: "+r"(src_yuy2), // %0
"+r"(stride_yuy2), // %1
"+r"(dst_uv), // %2
"+r"(width) // %3
:
: "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6",
"d7" // Clobber List
);
}
// For BGRAToARGB, ABGRToARGB, RGBAToARGB, and ARGBToRGBA.
void ARGBShuffleRow_NEON(const uint8_t* src_argb,
uint8_t* dst_argb,
@ -1598,29 +1767,6 @@ void ARGBToARGB4444Row_NEON(const uint8_t* src_argb,
: "cc", "memory", "q0", "q1", "q2", "q3");
}
void ARGBToYRow_NEON(const uint8_t* src_argb, uint8_t* dst_y, int width) {
asm volatile(
"vmov.u8 d24, #25 \n" // B * 0.1016 coefficient
"vmov.u8 d25, #129 \n" // G * 0.5078 coefficient
"vmov.u8 d26, #66 \n" // R * 0.2578 coefficient
"vmov.u8 d27, #16 \n" // Add 16 constant
"1: \n"
"vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 ARGB pixels.
"subs %2, %2, #8 \n" // 8 processed per loop.
"vmull.u8 q2, d0, d24 \n" // B
"vmlal.u8 q2, d1, d25 \n" // G
"vmlal.u8 q2, d2, d26 \n" // R
"vqrshrn.u16 d0, q2, #8 \n" // 16 bit to 8 bit Y
"vqadd.u8 d0, d27 \n"
"vst1.8 {d0}, [%1]! \n" // store 8 pixels Y.
"bgt 1b \n"
: "+r"(src_argb), // %0
"+r"(dst_y), // %1
"+r"(width) // %2
:
: "cc", "memory", "q0", "q1", "q2", "q12", "q13");
}
void ARGBExtractAlphaRow_NEON(const uint8_t* src_argb,
uint8_t* dst_a,
int width) {
@ -1639,48 +1785,6 @@ void ARGBExtractAlphaRow_NEON(const uint8_t* src_argb,
);
}
void ARGBToYJRow_NEON(const uint8_t* src_argb, uint8_t* dst_y, int width) {
asm volatile(
"vmov.u8 d24, #29 \n" // B * 0.1140 coefficient
"vmov.u8 d25, #150 \n" // G * 0.5870 coefficient
"vmov.u8 d26, #77 \n" // R * 0.2990 coefficient
"1: \n"
"vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 ARGB pixels.
"subs %2, %2, #8 \n" // 8 processed per loop.
"vmull.u8 q2, d0, d24 \n" // B
"vmlal.u8 q2, d1, d25 \n" // G
"vmlal.u8 q2, d2, d26 \n" // R
"vqrshrn.u16 d0, q2, #8 \n" // 16 bit to 8 bit Y
"vst1.8 {d0}, [%1]! \n" // store 8 pixels Y.
"bgt 1b \n"
: "+r"(src_argb), // %0
"+r"(dst_y), // %1
"+r"(width) // %2
:
: "cc", "memory", "q0", "q1", "q2", "q12", "q13");
}
void RGBAToYJRow_NEON(const uint8_t* src_rgba, uint8_t* dst_y, int width) {
asm volatile(
"vmov.u8 d24, #29 \n" // B * 0.1140 coefficient
"vmov.u8 d25, #150 \n" // G * 0.5870 coefficient
"vmov.u8 d26, #77 \n" // R * 0.2990 coefficient
"1: \n"
"vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 RGBA pixels.
"subs %2, %2, #8 \n" // 8 processed per loop.
"vmull.u8 q2, d1, d24 \n" // B
"vmlal.u8 q2, d2, d25 \n" // G
"vmlal.u8 q2, d3, d26 \n" // R
"vqrshrn.u16 d0, q2, #8 \n" // 16 bit to 8 bit Y
"vst1.8 {d0}, [%1]! \n" // store 8 pixels Y.
"bgt 1b \n"
: "+r"(src_rgba), // %0
"+r"(dst_y), // %1
"+r"(width) // %2
:
: "cc", "memory", "q0", "q1", "q2", "q12", "q13");
}
// 8x1 pixels.
void ARGBToUV444Row_NEON(const uint8_t* src_argb,
uint8_t* dst_u,
@ -1700,15 +1804,13 @@ void ARGBToUV444Row_NEON(const uint8_t* src_argb,
"vmull.u8 q2, d0, d24 \n" // B
"vmlsl.u8 q2, d1, d25 \n" // G
"vmlsl.u8 q2, d2, d26 \n" // R
"vadd.u16 q2, q2, q15 \n" // +128 -> unsigned
"vmull.u8 q3, d2, d24 \n" // R
"vmlsl.u8 q3, d1, d28 \n" // G
"vmlsl.u8 q3, d0, d27 \n" // B
"vadd.u16 q3, q3, q15 \n" // +128 -> unsigned
"vqshrn.u16 d0, q2, #8 \n" // 16 bit to 8 bit U
"vqshrn.u16 d1, q3, #8 \n" // 16 bit to 8 bit V
"vaddhn.u16 d0, q2, q15 \n" // +128 -> unsigned
"vaddhn.u16 d1, q3, q15 \n" // +128 -> unsigned
"vst1.8 {d0}, [%1]! \n" // store 8 pixels U.
"vst1.8 {d1}, [%2]! \n" // store 8 pixels V.
@ -1728,13 +1830,11 @@ void ARGBToUV444Row_NEON(const uint8_t* src_argb,
"vmul.s16 q8, " #QB ", q10 \n" /* B */ \
"vmls.s16 q8, " #QG ", q11 \n" /* G */ \
"vmls.s16 q8, " #QR ", q12 \n" /* R */ \
"vadd.u16 q8, q8, q15 \n" /* +128 -> unsigned */ \
"vmul.s16 q9, " #QR ", q10 \n" /* R */ \
"vmls.s16 q9, " #QG ", q14 \n" /* G */ \
"vmls.s16 q9, " #QB ", q13 \n" /* B */ \
"vadd.u16 q9, q9, q15 \n" /* +128 -> unsigned */ \
"vqshrn.u16 d0, q8, #8 \n" /* 16 bit to 8 bit U */ \
"vqshrn.u16 d1, q9, #8 \n" /* 16 bit to 8 bit V */
"vaddhn.u16 d0, q8, q15 \n" /* +128 -> unsigned */ \
"vaddhn.u16 d1, q9, q15 \n" /* +128 -> unsigned */
// clang-format on
// TODO(fbarchard): Consider vhadd vertical, then vpaddl horizontal, avoid shr.
@ -1783,7 +1883,7 @@ void ARGBToUVRow_NEON(const uint8_t* src_argb,
);
}
// TODO(fbarchard): Subsample match C code.
// TODO(fbarchard): Subsample match Intel code.
void ARGBToUVJRow_NEON(const uint8_t* src_argb,
int src_stride_argb,
uint8_t* dst_u,
@ -1829,6 +1929,143 @@ void ARGBToUVJRow_NEON(const uint8_t* src_argb,
);
}
void ABGRToUVJRow_NEON(const uint8_t* src_abgr,
int src_stride_abgr,
uint8_t* dst_uj,
uint8_t* dst_vj,
int width) {
asm volatile (
"add %1, %0, %1 \n" // src_stride + src_argb
"vmov.s16 q10, #127 / 2 \n" // UB / VR 0.500 coefficient
"vmov.s16 q11, #84 / 2 \n" // UG -0.33126 coefficient
"vmov.s16 q12, #43 / 2 \n" // UR -0.16874 coefficient
"vmov.s16 q13, #20 / 2 \n" // VB -0.08131 coefficient
"vmov.s16 q14, #107 / 2 \n" // VG -0.41869 coefficient
"vmov.u16 q15, #0x8080 \n" // 128.5
"1: \n"
"vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 ABGR pixels.
"vld4.8 {d1, d3, d5, d7}, [%0]! \n" // load next 8 ABGR pixels.
"vpaddl.u8 q0, q0 \n" // R 16 bytes -> 8 shorts.
"vpaddl.u8 q1, q1 \n" // G 16 bytes -> 8 shorts.
"vpaddl.u8 q2, q2 \n" // B 16 bytes -> 8 shorts.
"vld4.8 {d8, d10, d12, d14}, [%1]! \n" // load 8 more ABGR pixels.
"vld4.8 {d9, d11, d13, d15}, [%1]! \n" // load last 8 ABGR pixels.
"vpadal.u8 q0, q4 \n" // R 16 bytes -> 8 shorts.
"vpadal.u8 q1, q5 \n" // G 16 bytes -> 8 shorts.
"vpadal.u8 q2, q6 \n" // B 16 bytes -> 8 shorts.
"vrshr.u16 q0, q0, #1 \n" // 2x average
"vrshr.u16 q1, q1, #1 \n"
"vrshr.u16 q2, q2, #1 \n"
"subs %4, %4, #16 \n" // 16 processed per loop.
RGBTOUV(q2, q1, q0)
"vst1.8 {d0}, [%2]! \n" // store 8 pixels U.
"vst1.8 {d1}, [%3]! \n" // store 8 pixels V.
"bgt 1b \n"
: "+r"(src_abgr), // %0
"+r"(src_stride_abgr), // %1
"+r"(dst_uj), // %2
"+r"(dst_vj), // %3
"+r"(width) // %4
:
: "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
"q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
);
}
// TODO(fbarchard): Subsample match C code.
void RGB24ToUVJRow_NEON(const uint8_t* src_rgb24,
int src_stride_rgb24,
uint8_t* dst_u,
uint8_t* dst_v,
int width) {
asm volatile (
"add %1, %0, %1 \n" // src_stride + src_rgb24
"vmov.s16 q10, #127 / 2 \n" // UB / VR 0.500 coefficient
"vmov.s16 q11, #84 / 2 \n" // UG -0.33126 coefficient
"vmov.s16 q12, #43 / 2 \n" // UR -0.16874 coefficient
"vmov.s16 q13, #20 / 2 \n" // VB -0.08131 coefficient
"vmov.s16 q14, #107 / 2 \n" // VG -0.41869 coefficient
"vmov.u16 q15, #0x8080 \n" // 128.5
"1: \n"
"vld3.8 {d0, d2, d4}, [%0]! \n" // load 8 RGB24 pixels.
"vld3.8 {d1, d3, d5}, [%0]! \n" // load next 8 RGB24 pixels.
"vpaddl.u8 q0, q0 \n" // B 16 bytes -> 8 shorts.
"vpaddl.u8 q1, q1 \n" // G 16 bytes -> 8 shorts.
"vpaddl.u8 q2, q2 \n" // R 16 bytes -> 8 shorts.
"vld3.8 {d8, d10, d12}, [%1]! \n" // load 8 more RGB24 pixels.
"vld3.8 {d9, d11, d13}, [%1]! \n" // load last 8 RGB24 pixels.
"vpadal.u8 q0, q4 \n" // B 16 bytes -> 8 shorts.
"vpadal.u8 q1, q5 \n" // G 16 bytes -> 8 shorts.
"vpadal.u8 q2, q6 \n" // R 16 bytes -> 8 shorts.
"vrshr.u16 q0, q0, #1 \n" // 2x average
"vrshr.u16 q1, q1, #1 \n"
"vrshr.u16 q2, q2, #1 \n"
"subs %4, %4, #16 \n" // 16 processed per loop.
RGBTOUV(q0, q1, q2)
"vst1.8 {d0}, [%2]! \n" // store 8 pixels U.
"vst1.8 {d1}, [%3]! \n" // store 8 pixels V.
"bgt 1b \n"
: "+r"(src_rgb24), // %0
"+r"(src_stride_rgb24), // %1
"+r"(dst_u), // %2
"+r"(dst_v), // %3
"+r"(width) // %4
:
: "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
"q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
);
}
// TODO(fbarchard): Subsample match C code.
void RAWToUVJRow_NEON(const uint8_t* src_raw,
int src_stride_raw,
uint8_t* dst_u,
uint8_t* dst_v,
int width) {
asm volatile (
"add %1, %0, %1 \n" // src_stride + src_raw
"vmov.s16 q10, #127 / 2 \n" // UB / VR 0.500 coefficient
"vmov.s16 q11, #84 / 2 \n" // UG -0.33126 coefficient
"vmov.s16 q12, #43 / 2 \n" // UR -0.16874 coefficient
"vmov.s16 q13, #20 / 2 \n" // VB -0.08131 coefficient
"vmov.s16 q14, #107 / 2 \n" // VG -0.41869 coefficient
"vmov.u16 q15, #0x8080 \n" // 128.5
"1: \n"
"vld3.8 {d0, d2, d4}, [%0]! \n" // load 8 RAW pixels.
"vld3.8 {d1, d3, d5}, [%0]! \n" // load next 8 RAW pixels.
"vpaddl.u8 q0, q0 \n" // B 16 bytes -> 8 shorts.
"vpaddl.u8 q1, q1 \n" // G 16 bytes -> 8 shorts.
"vpaddl.u8 q2, q2 \n" // R 16 bytes -> 8 shorts.
"vld3.8 {d8, d10, d12}, [%1]! \n" // load 8 more RAW pixels.
"vld3.8 {d9, d11, d13}, [%1]! \n" // load last 8 RAW pixels.
"vpadal.u8 q0, q4 \n" // B 16 bytes -> 8 shorts.
"vpadal.u8 q1, q5 \n" // G 16 bytes -> 8 shorts.
"vpadal.u8 q2, q6 \n" // R 16 bytes -> 8 shorts.
"vrshr.u16 q0, q0, #1 \n" // 2x average
"vrshr.u16 q1, q1, #1 \n"
"vrshr.u16 q2, q2, #1 \n"
"subs %4, %4, #16 \n" // 16 processed per loop.
RGBTOUV(q2, q1, q0)
"vst1.8 {d0}, [%2]! \n" // store 8 pixels U.
"vst1.8 {d1}, [%3]! \n" // store 8 pixels V.
"bgt 1b \n"
: "+r"(src_raw), // %0
"+r"(src_stride_raw), // %1
"+r"(dst_u), // %2
"+r"(dst_v), // %3
"+r"(width) // %4
:
: "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
"q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
);
}
void BGRAToUVRow_NEON(const uint8_t* src_bgra,
int src_stride_bgra,
uint8_t* dst_u,
@ -2319,9 +2556,6 @@ void ARGB4444ToYRow_NEON(const uint8_t* src_argb4444,
: "cc", "memory", "q0", "q1", "q2", "q3", "q12", "q13");
}
static const uvec8 kShuffleARGBToABGR = {2, 1, 0, 3, 6, 5, 4, 7,
10, 9, 8, 11, 14, 13, 12, 15};
void ARGBToAR64Row_NEON(const uint8_t* src_argb,
uint16_t* dst_ar64,
int width) {
@ -2342,11 +2576,15 @@ void ARGBToAR64Row_NEON(const uint8_t* src_argb,
: "cc", "memory", "q0", "q1", "q2", "q3");
}
static const uvec8 kShuffleARGBToABGR = {2, 1, 0, 3, 6, 5, 4, 7,
10, 9, 8, 11, 14, 13, 12, 15};
void ARGBToAB64Row_NEON(const uint8_t* src_argb,
uint16_t* dst_ab64,
int width) {
asm volatile(
"vld1.8 q4, %3 \n" // shuffler
"vld1.8 {q4}, [%3] \n" // shuffler
"1: \n"
"vld1.8 {q0}, [%0]! \n"
"vld1.8 {q2}, [%0]! \n"
@ -2360,10 +2598,10 @@ void ARGBToAB64Row_NEON(const uint8_t* src_argb,
"vst2.8 {q0, q1}, [%1]! \n" // store 4 pixels
"vst2.8 {q2, q3}, [%1]! \n" // store 4 pixels
"bgt 1b \n"
: "+r"(src_argb), // %0
"+r"(dst_ab64), // %1
"+r"(width) // %2
: "m"(kShuffleARGBToABGR) // %3
: "+r"(src_argb), // %0
"+r"(dst_ab64), // %1
"+r"(width) // %2
: "r"(&kShuffleARGBToABGR) // %3
: "cc", "memory", "q0", "q1", "q2", "q3", "q4");
}
@ -2397,7 +2635,8 @@ void AB64ToARGBRow_NEON(const uint16_t* src_ab64,
uint8_t* dst_argb,
int width) {
asm volatile(
"vld1.8 d8, %3 \n" // shuffler
"vld1.8 {d8}, [%3] \n" // shuffler
"1: \n"
"vld1.16 {q0}, [%0]! \n"
"vld1.16 {q1}, [%0]! \n"
@ -2411,168 +2650,186 @@ void AB64ToARGBRow_NEON(const uint16_t* src_ab64,
"vst1.8 {q0}, [%1]! \n" // store 4 pixels
"vst1.8 {q2}, [%1]! \n" // store 4 pixels
"bgt 1b \n"
: "+r"(src_ab64), // %0
"+r"(dst_argb), // %1
"+r"(width) // %2
: "m"(kShuffleAB64ToARGB) // %3
: "+r"(src_ab64), // %0
"+r"(dst_argb), // %1
"+r"(width) // %2
: "r"(&kShuffleAB64ToARGB) // %3
: "cc", "memory", "q0", "q1", "q2", "q3", "q4");
}
void BGRAToYRow_NEON(const uint8_t* src_bgra, uint8_t* dst_y, int width) {
struct RgbConstants {
uint8_t kRGBToY[4];
uint16_t kAddY;
uint16_t pad;
};
// RGB to JPeg coefficients
// B * 0.1140 coefficient = 29
// G * 0.5870 coefficient = 150
// R * 0.2990 coefficient = 77
// Add 0.5 = 0x80
static const struct RgbConstants kRgb24JPEGConstants = {{29, 150, 77, 0},
128,
0};
static const struct RgbConstants kRawJPEGConstants = {{77, 150, 29, 0}, 128, 0};
// RGB to BT.601 coefficients
// B * 0.1016 coefficient = 25
// G * 0.5078 coefficient = 129
// R * 0.2578 coefficient = 66
// Add 16.5 = 0x1080
static const struct RgbConstants kRgb24I601Constants = {{25, 129, 66, 0},
0x1080,
0};
static const struct RgbConstants kRawI601Constants = {{66, 129, 25, 0},
0x1080,
0};
// ARGB expects first 3 values to contain RGB and 4th value is ignored.
void ARGBToYMatrixRow_NEON(const uint8_t* src_argb,
uint8_t* dst_y,
int width,
const struct RgbConstants* rgbconstants) {
asm volatile(
"vmov.u8 d6, #25 \n" // B * 0.1016 coefficient
"vmov.u8 d5, #129 \n" // G * 0.5078 coefficient
"vmov.u8 d4, #66 \n" // R * 0.2578 coefficient
"vmov.u8 d7, #16 \n" // Add 16 constant
"vld1.8 {d0}, [%3] \n" // load rgbconstants
"vdup.u8 d20, d0[0] \n"
"vdup.u8 d21, d0[1] \n"
"vdup.u8 d22, d0[2] \n"
"vdup.u16 q12, d0[2] \n"
"1: \n"
"vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 pixels of BGRA.
"subs %2, %2, #8 \n" // 8 processed per loop.
"vmull.u8 q8, d1, d4 \n" // R
"vmlal.u8 q8, d2, d5 \n" // G
"vmlal.u8 q8, d3, d6 \n" // B
"vqrshrn.u16 d0, q8, #8 \n" // 16 bit to 8 bit Y
"vqadd.u8 d0, d7 \n"
"vst1.8 {d0}, [%1]! \n" // store 8 pixels Y.
"vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 16 pixels of ARGB
"vld4.8 {d1, d3, d5, d7}, [%0]! \n"
"subs %2, %2, #16 \n" // 16 processed per loop.
"vmull.u8 q8, d0, d20 \n" // B
"vmull.u8 q9, d1, d20 \n"
"vmlal.u8 q8, d2, d21 \n" // G
"vmlal.u8 q9, d3, d21 \n"
"vmlal.u8 q8, d4, d22 \n" // R
"vmlal.u8 q9, d5, d22 \n"
"vaddhn.u16 d0, q8, q12 \n" // 16 bit to 8 bit Y
"vaddhn.u16 d1, q9, q12 \n"
"vst1.8 {d0, d1}, [%1]! \n" // store 16 pixels Y.
"bgt 1b \n"
: "+r"(src_bgra), // %0
"+r"(dst_y), // %1
"+r"(width) // %2
:
: "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "q8");
: "+r"(src_argb), // %0
"+r"(dst_y), // %1
"+r"(width) // %2
: "r"(rgbconstants) // %3
: "cc", "memory", "q0", "q1", "q2", "q3", "q8", "q9", "d20", "d21", "d22",
"q12");
}
void ARGBToYRow_NEON(const uint8_t* src_argb, uint8_t* dst_y, int width) {
ARGBToYMatrixRow_NEON(src_argb, dst_y, width, &kRgb24I601Constants);
}
void ARGBToYJRow_NEON(const uint8_t* src_argb, uint8_t* dst_yj, int width) {
ARGBToYMatrixRow_NEON(src_argb, dst_yj, width, &kRgb24JPEGConstants);
}
void ABGRToYRow_NEON(const uint8_t* src_abgr, uint8_t* dst_y, int width) {
ARGBToYMatrixRow_NEON(src_abgr, dst_y, width, &kRawI601Constants);
}
void ABGRToYJRow_NEON(const uint8_t* src_abgr, uint8_t* dst_yj, int width) {
ARGBToYMatrixRow_NEON(src_abgr, dst_yj, width, &kRawJPEGConstants);
}
// RGBA expects first value to be A and ignored, then 3 values to contain RGB.
// Same code as ARGB, except the LD4
void RGBAToYMatrixRow_NEON(const uint8_t* src_rgba,
uint8_t* dst_y,
int width,
const struct RgbConstants* rgbconstants) {
asm volatile(
"vmov.u8 d6, #25 \n" // B * 0.1016 coefficient
"vmov.u8 d5, #129 \n" // G * 0.5078 coefficient
"vmov.u8 d4, #66 \n" // R * 0.2578 coefficient
"vmov.u8 d7, #16 \n" // Add 16 constant
"vld1.8 {d0}, [%3] \n" // load rgbconstants
"vdup.u8 d20, d0[0] \n"
"vdup.u8 d21, d0[1] \n"
"vdup.u8 d22, d0[2] \n"
"vdup.u16 q12, d0[2] \n"
"1: \n"
"vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 pixels of ABGR.
"subs %2, %2, #8 \n" // 8 processed per loop.
"vmull.u8 q8, d0, d4 \n" // R
"vmlal.u8 q8, d1, d5 \n" // G
"vmlal.u8 q8, d2, d6 \n" // B
"vqrshrn.u16 d0, q8, #8 \n" // 16 bit to 8 bit Y
"vqadd.u8 d0, d7 \n"
"vst1.8 {d0}, [%1]! \n" // store 8 pixels Y.
"vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 16 pixels of RGBA
"vld4.8 {d1, d3, d5, d7}, [%0]! \n"
"subs %2, %2, #16 \n" // 16 processed per loop.
"vmull.u8 q8, d2, d20 \n" // B
"vmull.u8 q9, d3, d20 \n"
"vmlal.u8 q8, d4, d21 \n" // G
"vmlal.u8 q9, d5, d21 \n"
"vmlal.u8 q8, d6, d22 \n" // R
"vmlal.u8 q9, d7, d22 \n"
"vaddhn.u16 d0, q8, q12 \n" // 16 bit to 8 bit Y
"vaddhn.u16 d1, q9, q12 \n"
"vst1.8 {d0, d1}, [%1]! \n" // store 16 pixels Y.
"bgt 1b \n"
: "+r"(src_abgr), // %0
"+r"(dst_y), // %1
"+r"(width) // %2
:
: "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "q8");
: "+r"(src_rgba), // %0
"+r"(dst_y), // %1
"+r"(width) // %2
: "r"(rgbconstants) // %3
: "cc", "memory", "q0", "q1", "q2", "q3", "q8", "q9", "d20", "d21", "d22",
"q12");
}
void RGBAToYRow_NEON(const uint8_t* src_rgba, uint8_t* dst_y, int width) {
asm volatile(
"vmov.u8 d4, #25 \n" // B * 0.1016 coefficient
"vmov.u8 d5, #129 \n" // G * 0.5078 coefficient
"vmov.u8 d6, #66 \n" // R * 0.2578 coefficient
"vmov.u8 d7, #16 \n" // Add 16 constant
"1: \n"
"vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 pixels of RGBA.
"subs %2, %2, #8 \n" // 8 processed per loop.
"vmull.u8 q8, d1, d4 \n" // B
"vmlal.u8 q8, d2, d5 \n" // G
"vmlal.u8 q8, d3, d6 \n" // R
"vqrshrn.u16 d0, q8, #8 \n" // 16 bit to 8 bit Y
"vqadd.u8 d0, d7 \n"
"vst1.8 {d0}, [%1]! \n" // store 8 pixels Y.
"bgt 1b \n"
: "+r"(src_rgba), // %0
"+r"(dst_y), // %1
"+r"(width) // %2
:
: "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "q8");
RGBAToYMatrixRow_NEON(src_rgba, dst_y, width, &kRgb24I601Constants);
}
void RGB24ToYRow_NEON(const uint8_t* src_rgb24, uint8_t* dst_y, int width) {
asm volatile(
"vmov.u8 d4, #25 \n" // B * 0.1016 coefficient
"vmov.u8 d5, #129 \n" // G * 0.5078 coefficient
"vmov.u8 d6, #66 \n" // R * 0.2578 coefficient
"vmov.u8 d7, #16 \n" // Add 16 constant
"1: \n"
"vld3.8 {d0, d1, d2}, [%0]! \n" // load 8 pixels of RGB24.
"subs %2, %2, #8 \n" // 8 processed per loop.
"vmull.u8 q8, d0, d4 \n" // B
"vmlal.u8 q8, d1, d5 \n" // G
"vmlal.u8 q8, d2, d6 \n" // R
"vqrshrn.u16 d0, q8, #8 \n" // 16 bit to 8 bit Y
"vqadd.u8 d0, d7 \n"
"vst1.8 {d0}, [%1]! \n" // store 8 pixels Y.
"bgt 1b \n"
: "+r"(src_rgb24), // %0
"+r"(dst_y), // %1
"+r"(width) // %2
:
: "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "q8");
void RGBAToYJRow_NEON(const uint8_t* src_rgba, uint8_t* dst_yj, int width) {
RGBAToYMatrixRow_NEON(src_rgba, dst_yj, width, &kRgb24JPEGConstants);
}
void RAWToYRow_NEON(const uint8_t* src_raw, uint8_t* dst_y, int width) {
void BGRAToYRow_NEON(const uint8_t* src_bgra, uint8_t* dst_y, int width) {
RGBAToYMatrixRow_NEON(src_bgra, dst_y, width, &kRawI601Constants);
}
void RGBToYMatrixRow_NEON(const uint8_t* src_rgb,
uint8_t* dst_y,
int width,
const struct RgbConstants* rgbconstants) {
asm volatile(
"vmov.u8 d6, #25 \n" // B * 0.1016 coefficient
"vmov.u8 d5, #129 \n" // G * 0.5078 coefficient
"vmov.u8 d4, #66 \n" // R * 0.2578 coefficient
"vmov.u8 d7, #16 \n" // Add 16 constant
"vld1.8 {d0}, [%3] \n" // load rgbconstants
"vdup.u8 d20, d0[0] \n"
"vdup.u8 d21, d0[1] \n"
"vdup.u8 d22, d0[2] \n"
"vdup.u16 q12, d0[2] \n"
"1: \n"
"vld3.8 {d0, d1, d2}, [%0]! \n" // load 8 pixels of RAW.
"subs %2, %2, #8 \n" // 8 processed per loop.
"vmull.u8 q8, d0, d4 \n" // B
"vmlal.u8 q8, d1, d5 \n" // G
"vmlal.u8 q8, d2, d6 \n" // R
"vqrshrn.u16 d0, q8, #8 \n" // 16 bit to 8 bit Y
"vqadd.u8 d0, d7 \n"
"vst1.8 {d0}, [%1]! \n" // store 8 pixels Y.
"vld3.8 {d2, d4, d6}, [%0]! \n" // load 16 pixels of
// RGB24.
"vld3.8 {d3, d5, d7}, [%0]! \n"
"subs %2, %2, #16 \n" // 16 processed per loop.
"vmull.u8 q8, d2, d20 \n" // B
"vmull.u8 q9, d3, d20 \n"
"vmlal.u8 q8, d4, d21 \n" // G
"vmlal.u8 q9, d5, d21 \n"
"vmlal.u8 q8, d6, d22 \n" // R
"vmlal.u8 q9, d7, d22 \n"
"vaddhn.u16 d0, q8, q12 \n" // 16 bit to 8 bit Y
"vaddhn.u16 d1, q9, q12 \n"
"vst1.8 {d0, d1}, [%1]! \n" // store 16 pixels Y.
"bgt 1b \n"
: "+r"(src_raw), // %0
"+r"(dst_y), // %1
"+r"(width) // %2
:
: "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "q8");
: "+r"(src_rgb), // %0
"+r"(dst_y), // %1
"+r"(width) // %2
: "r"(rgbconstants) // %3
: "cc", "memory", "q0", "q1", "q2", "q3", "q8", "q9", "d20", "d21", "d22",
"q12");
}
void RGB24ToYJRow_NEON(const uint8_t* src_rgb24, uint8_t* dst_yj, int width) {
asm volatile(
"vmov.u8 d4, #29 \n" // B * 0.1140 coefficient
"vmov.u8 d5, #150 \n" // G * 0.5870 coefficient
"vmov.u8 d6, #77 \n" // R * 0.2990 coefficient
"1: \n"
"vld3.8 {d0, d1, d2}, [%0]! \n" // load 8 pixels of RGB24.
"subs %2, %2, #8 \n" // 8 processed per loop.
"vmull.u8 q4, d0, d4 \n" // B
"vmlal.u8 q4, d1, d5 \n" // G
"vmlal.u8 q4, d2, d6 \n" // R
"vqrshrn.u16 d0, q4, #8 \n" // 16 bit to 8 bit Y
"vst1.8 {d0}, [%1]! \n" // store 8 pixels Y.
"bgt 1b \n"
: "+r"(src_rgb24), // %0
"+r"(dst_yj), // %1
"+r"(width) // %2
:
: "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "q4");
RGBToYMatrixRow_NEON(src_rgb24, dst_yj, width, &kRgb24JPEGConstants);
}
void RAWToYJRow_NEON(const uint8_t* src_raw, uint8_t* dst_yj, int width) {
asm volatile(
"vmov.u8 d6, #29 \n" // B * 0.1140 coefficient
"vmov.u8 d5, #150 \n" // G * 0.5870 coefficient
"vmov.u8 d4, #77 \n" // R * 0.2990 coefficient
"1: \n"
"vld3.8 {d0, d1, d2}, [%0]! \n" // load 8 pixels of RAW.
"subs %2, %2, #8 \n" // 8 processed per loop.
"vmull.u8 q4, d0, d4 \n" // R
"vmlal.u8 q4, d1, d5 \n" // G
"vmlal.u8 q4, d2, d6 \n" // B
"vqrshrn.u16 d0, q4, #8 \n" // 16 bit to 8 bit Y
"vst1.8 {d0}, [%1]! \n" // store 8 pixels Y.
"bgt 1b \n"
: "+r"(src_raw), // %0
"+r"(dst_yj), // %1
"+r"(width) // %2
:
: "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "q4");
RGBToYMatrixRow_NEON(src_raw, dst_yj, width, &kRawJPEGConstants);
}
void RGB24ToYRow_NEON(const uint8_t* src_rgb24, uint8_t* dst_y, int width) {
RGBToYMatrixRow_NEON(src_rgb24, dst_y, width, &kRgb24I601Constants);
}
void RAWToYRow_NEON(const uint8_t* src_raw, uint8_t* dst_y, int width) {
RGBToYMatrixRow_NEON(src_raw, dst_y, width, &kRawI601Constants);
}
// Bilinear filter 16x2 -> 16x1
@ -2634,6 +2891,66 @@ void InterpolateRow_NEON(uint8_t* dst_ptr,
: "cc", "memory", "q0", "q1", "d4", "d5", "q13", "q14");
}
// Bilinear filter 8x2 -> 8x1
void InterpolateRow_16_NEON(uint16_t* dst_ptr,
const uint16_t* src_ptr,
ptrdiff_t src_stride,
int dst_width,
int source_y_fraction) {
int y1_fraction = source_y_fraction;
int y0_fraction = 256 - y1_fraction;
const uint16_t* src_ptr1 = src_ptr + src_stride;
asm volatile(
"cmp %4, #0 \n"
"beq 100f \n"
"cmp %4, #128 \n"
"beq 50f \n"
"vdup.16 d17, %4 \n"
"vdup.16 d16, %5 \n"
// General purpose row blend.
"1: \n"
"vld1.16 {q0}, [%1]! \n"
"vld1.16 {q1}, [%2]! \n"
"subs %3, %3, #8 \n"
"vmull.u16 q2, d0, d16 \n"
"vmull.u16 q3, d1, d16 \n"
"vmlal.u16 q2, d2, d17 \n"
"vmlal.u16 q3, d3, d17 \n"
"vrshrn.u32 d0, q2, #8 \n"
"vrshrn.u32 d1, q3, #8 \n"
"vst1.16 {q0}, [%0]! \n"
"bgt 1b \n"
"b 99f \n"
// Blend 50 / 50.
"50: \n"
"vld1.16 {q0}, [%1]! \n"
"vld1.16 {q1}, [%2]! \n"
"subs %3, %3, #8 \n"
"vrhadd.u16 q0, q1 \n"
"vst1.16 {q0}, [%0]! \n"
"bgt 50b \n"
"b 99f \n"
// Blend 100 / 0 - Copy row unchanged.
"100: \n"
"vld1.16 {q0}, [%1]! \n"
"subs %3, %3, #8 \n"
"vst1.16 {q0}, [%0]! \n"
"bgt 100b \n"
"99: \n"
: "+r"(dst_ptr), // %0
"+r"(src_ptr), // %1
"+r"(src_ptr1), // %2
"+r"(dst_width) // %3
: "r"(y1_fraction), // %4
"r"(y0_fraction) // %5
: "cc", "memory", "q0", "q1", "q2", "q3", "q8");
}
// dr * (256 - sa) / 256 + sr = dr - dr * sa / 256 + sr
void ARGBBlendRow_NEON(const uint8_t* src_argb,
const uint8_t* src_argb1,
@ -3518,7 +3835,7 @@ void MultiplyRow_16_NEON(const uint16_t* src_y,
int scale,
int width) {
asm volatile(
"vdup.16 q2, %2 \n"
"vdup.16 q2, %3 \n"
"1: \n"
"vld1.16 {q0}, [%0]! \n"
"vld1.16 {q1}, [%0]! \n"
@ -3526,13 +3843,12 @@ void MultiplyRow_16_NEON(const uint16_t* src_y,
"vmul.u16 q1, q1, q2 \n"
"vst1.16 {q0}, [%1]! \n"
"vst1.16 {q1}, [%1]! \n"
"subs %3, %3, #16 \n" // 16 src pixels per loop
"subs %2, %2, #16 \n" // 16 src pixels per loop
"bgt 1b \n"
: "+r"(src_y), // %0
"+r"(dst_y), // %1
"+r"(scale), // %2
"+r"(width) // %3
:
"+r"(width) // %2
: "r"(scale) // %3
: "cc", "memory", "q0", "q1", "q2");
}
@ -3541,7 +3857,7 @@ void DivideRow_16_NEON(const uint16_t* src_y,
int scale,
int width) {
asm volatile(
"vdup.16 q0, %2 \n"
"vdup.16 q0, %3 \n"
"1: \n"
"vld1.16 {q1}, [%0]! \n"
"vld1.16 {q2}, [%0]! \n"
@ -3559,16 +3875,44 @@ void DivideRow_16_NEON(const uint16_t* src_y,
"vmovn.u32 d5, q2 \n"
"vst1.16 {q1}, [%1]! \n"
"vst1.16 {q2}, [%1]! \n"
"subs %3, %3, #16 \n" // 16 src pixels per loop
"subs %2, %2, #16 \n" // 16 src pixels per loop
"bgt 1b \n"
: "+r"(src_y), // %0
"+r"(dst_y), // %1
"+r"(scale), // %2
"+r"(width) // %3
:
"+r"(width) // %2
: "r"(scale) // %3
: "cc", "memory", "q0", "q1", "q2", "q3", "q4");
}
// Use scale to convert lsb formats to msb, depending how many bits there are:
// 32768 = 9 bits = shr 1
// 16384 = 10 bits = shr 2
// 4096 = 12 bits = shr 4
// 256 = 16 bits = shr 8
void Convert16To8Row_NEON(const uint16_t* src_y,
uint8_t* dst_y,
int scale,
int width) {
int shift = 15 - __builtin_clz((int32_t)scale); // Negative shl is shr
asm volatile(
"vdup.16 q2, %3 \n"
"1: \n"
"vld1.16 {q0}, [%0]! \n"
"vld1.16 {q1}, [%0]! \n"
"vshl.u16 q0, q0, q2 \n" // shr = q2 is negative
"vshl.u16 q1, q1, q2 \n"
"vqmovn.u16 d0, q0 \n"
"vqmovn.u16 d1, q1 \n"
"subs %2, %2, #16 \n" // 16 src pixels per loop
"vst1.8 {q0}, [%1]! \n"
"bgt 1b \n"
: "+r"(src_y), // %0
"+r"(dst_y), // %1
"+r"(width) // %2
: "r"(shift) // %3
: "cc", "memory", "q0", "q1", "q2");
}
#endif // !defined(LIBYUV_DISABLE_NEON) && defined(__ARM_NEON__)..
#ifdef __cplusplus

File diff suppressed because it is too large Load Diff

View File

@ -75,7 +75,7 @@ extern "C" {
// Convert 8 pixels: 8 UV and 8 Y.
#define YUVTORGB(yuvconstants) \
xmm3 = _mm_sub_epi8(xmm3, _mm_set1_epi8(0x80)); \
xmm3 = _mm_sub_epi8(xmm3, _mm_set1_epi8((char)0x80)); \
xmm4 = _mm_mulhi_epu16(xmm4, *(__m128i*)yuvconstants->kYToRgb); \
xmm4 = _mm_add_epi16(xmm4, *(__m128i*)yuvconstants->kYBiasToRgb); \
xmm0 = _mm_maddubs_epi16(*(__m128i*)yuvconstants->kUVToB, xmm3); \
@ -2789,6 +2789,44 @@ __declspec(naked) void I422ToRGB24Row_SSSE3(
}
}
// 8 pixels.
// 8 UV values, mixed with 8 Y producing 8 RGB24 (24 bytes).
__declspec(naked) void I444ToRGB24Row_SSSE3(
const uint8_t* y_buf,
const uint8_t* u_buf,
const uint8_t* v_buf,
uint8_t* dst_rgb24,
const struct YuvConstants* yuvconstants,
int width) {
__asm {
push esi
push edi
push ebx
mov eax, [esp + 12 + 4] // Y
mov esi, [esp + 12 + 8] // U
mov edi, [esp + 12 + 12] // V
mov edx, [esp + 12 + 16] // argb
mov ebx, [esp + 12 + 20] // yuvconstants
mov ecx, [esp + 12 + 24] // width
sub edi, esi
movdqa xmm5, xmmword ptr kShuffleMaskARGBToRGB24_0
movdqa xmm6, xmmword ptr kShuffleMaskARGBToRGB24
convertloop:
READYUV444
YUVTORGB(ebx)
STORERGB24
sub ecx, 8
jg convertloop
pop ebx
pop edi
pop esi
ret
}
}
// 8 pixels
// 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 RGB565 (16 bytes).
__declspec(naked) void I422ToRGB565Row_SSSE3(

View File

@ -29,6 +29,7 @@ static __inline int Abs(int v) {
}
#define SUBSAMPLE(v, a, s) (v < 0) ? (-((-v + a) >> s)) : ((v + a) >> s)
#define CENTERSTART(dx, s) (dx < 0) ? -((-dx >> 1) + s) : ((dx >> 1) + s)
// Scale plane, 1/2
// This is an optimized version for scaling down a plane to 1/2 of
@ -50,7 +51,7 @@ static void ScalePlaneDown2(int src_width,
? ScaleRowDown2_C
: (filtering == kFilterLinear ? ScaleRowDown2Linear_C
: ScaleRowDown2Box_C);
int row_stride = src_stride << 1;
int row_stride = src_stride * 2;
(void)src_width;
(void)src_height;
if (!filtering) {
@ -104,21 +105,6 @@ static void ScalePlaneDown2(int src_width,
}
}
#endif
#if defined(HAS_SCALEROWDOWN2_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
ScaleRowDown2 =
filtering == kFilterNone
? ScaleRowDown2_Any_MMI
: (filtering == kFilterLinear ? ScaleRowDown2Linear_Any_MMI
: ScaleRowDown2Box_Any_MMI);
if (IS_ALIGNED(dst_width, 8)) {
ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_MMI
: (filtering == kFilterLinear
? ScaleRowDown2Linear_MMI
: ScaleRowDown2Box_MMI);
}
}
#endif
#if defined(HAS_SCALEROWDOWN2_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
ScaleRowDown2 =
@ -134,6 +120,21 @@ static void ScalePlaneDown2(int src_width,
}
}
#endif
#if defined(HAS_SCALEROWDOWN2_LSX)
if (TestCpuFlag(kCpuHasLSX)) {
ScaleRowDown2 =
filtering == kFilterNone
? ScaleRowDown2_Any_LSX
: (filtering == kFilterLinear ? ScaleRowDown2Linear_Any_LSX
: ScaleRowDown2Box_Any_LSX);
if (IS_ALIGNED(dst_width, 32)) {
ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_LSX
: (filtering == kFilterLinear
? ScaleRowDown2Linear_LSX
: ScaleRowDown2Box_LSX);
}
}
#endif
if (filtering == kFilterLinear) {
src_stride = 0;
@ -162,7 +163,7 @@ static void ScalePlaneDown2_16(int src_width,
? ScaleRowDown2_16_C
: (filtering == kFilterLinear ? ScaleRowDown2Linear_16_C
: ScaleRowDown2Box_16_C);
int row_stride = src_stride << 1;
int row_stride = src_stride * 2;
(void)src_width;
(void)src_height;
if (!filtering) {
@ -185,14 +186,6 @@ static void ScalePlaneDown2_16(int src_width,
: ScaleRowDown2Box_16_SSE2);
}
#endif
#if defined(HAS_SCALEROWDOWN2_16_MMI)
if (TestCpuFlag(kCpuHasMMI) && IS_ALIGNED(dst_width, 4)) {
ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_16_MMI
: (filtering == kFilterLinear
? ScaleRowDown2Linear_16_MMI
: ScaleRowDown2Box_16_MMI);
}
#endif
if (filtering == kFilterLinear) {
src_stride = 0;
@ -222,7 +215,7 @@ static void ScalePlaneDown4(int src_width,
void (*ScaleRowDown4)(const uint8_t* src_ptr, ptrdiff_t src_stride,
uint8_t* dst_ptr, int dst_width) =
filtering ? ScaleRowDown4Box_C : ScaleRowDown4_C;
int row_stride = src_stride << 2;
int row_stride = src_stride * 4;
(void)src_width;
(void)src_height;
if (!filtering) {
@ -256,15 +249,6 @@ static void ScalePlaneDown4(int src_width,
}
}
#endif
#if defined(HAS_SCALEROWDOWN4_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
ScaleRowDown4 =
filtering ? ScaleRowDown4Box_Any_MMI : ScaleRowDown4_Any_MMI;
if (IS_ALIGNED(dst_width, 8)) {
ScaleRowDown4 = filtering ? ScaleRowDown4Box_MMI : ScaleRowDown4_MMI;
}
}
#endif
#if defined(HAS_SCALEROWDOWN4_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
ScaleRowDown4 =
@ -274,6 +258,15 @@ static void ScalePlaneDown4(int src_width,
}
}
#endif
#if defined(HAS_SCALEROWDOWN4_LSX)
if (TestCpuFlag(kCpuHasLSX)) {
ScaleRowDown4 =
filtering ? ScaleRowDown4Box_Any_LSX : ScaleRowDown4_Any_LSX;
if (IS_ALIGNED(dst_width, 16)) {
ScaleRowDown4 = filtering ? ScaleRowDown4Box_LSX : ScaleRowDown4_LSX;
}
}
#endif
if (filtering == kFilterLinear) {
src_stride = 0;
@ -298,7 +291,7 @@ static void ScalePlaneDown4_16(int src_width,
void (*ScaleRowDown4)(const uint16_t* src_ptr, ptrdiff_t src_stride,
uint16_t* dst_ptr, int dst_width) =
filtering ? ScaleRowDown4Box_16_C : ScaleRowDown4_16_C;
int row_stride = src_stride << 2;
int row_stride = src_stride * 4;
(void)src_width;
(void)src_height;
if (!filtering) {
@ -317,11 +310,6 @@ static void ScalePlaneDown4_16(int src_width,
filtering ? ScaleRowDown4Box_16_SSE2 : ScaleRowDown4_16_SSE2;
}
#endif
#if defined(HAS_SCALEROWDOWN4_16_MMI)
if (TestCpuFlag(kCpuHasMMI) && IS_ALIGNED(dst_width, 8)) {
ScaleRowDown4 = filtering ? ScaleRowDown4Box_16_MMI : ScaleRowDown4_16_MMI;
}
#endif
if (filtering == kFilterLinear) {
src_stride = 0;
@ -379,18 +367,6 @@ static void ScalePlaneDown34(int src_width,
}
}
#endif
#if defined(HAS_SCALEROWDOWN34_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
if (!filtering) {
ScaleRowDown34_0 = ScaleRowDown34_Any_MMI;
ScaleRowDown34_1 = ScaleRowDown34_Any_MMI;
if (dst_width % 24 == 0) {
ScaleRowDown34_0 = ScaleRowDown34_MMI;
ScaleRowDown34_1 = ScaleRowDown34_MMI;
}
}
}
#endif
#if defined(HAS_SCALEROWDOWN34_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
if (!filtering) {
@ -411,6 +387,26 @@ static void ScalePlaneDown34(int src_width,
}
}
#endif
#if defined(HAS_SCALEROWDOWN34_LSX)
if (TestCpuFlag(kCpuHasLSX)) {
if (!filtering) {
ScaleRowDown34_0 = ScaleRowDown34_Any_LSX;
ScaleRowDown34_1 = ScaleRowDown34_Any_LSX;
} else {
ScaleRowDown34_0 = ScaleRowDown34_0_Box_Any_LSX;
ScaleRowDown34_1 = ScaleRowDown34_1_Box_Any_LSX;
}
if (dst_width % 48 == 0) {
if (!filtering) {
ScaleRowDown34_0 = ScaleRowDown34_LSX;
ScaleRowDown34_1 = ScaleRowDown34_LSX;
} else {
ScaleRowDown34_0 = ScaleRowDown34_0_Box_LSX;
ScaleRowDown34_1 = ScaleRowDown34_1_Box_LSX;
}
}
}
#endif
#if defined(HAS_SCALEROWDOWN34_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
if (!filtering) {
@ -626,6 +622,26 @@ static void ScalePlaneDown38(int src_width,
}
}
#endif
#if defined(HAS_SCALEROWDOWN38_LSX)
if (TestCpuFlag(kCpuHasLSX)) {
if (!filtering) {
ScaleRowDown38_3 = ScaleRowDown38_Any_LSX;
ScaleRowDown38_2 = ScaleRowDown38_Any_LSX;
} else {
ScaleRowDown38_3 = ScaleRowDown38_3_Box_Any_LSX;
ScaleRowDown38_2 = ScaleRowDown38_2_Box_Any_LSX;
}
if (dst_width % 12 == 0) {
if (!filtering) {
ScaleRowDown38_3 = ScaleRowDown38_LSX;
ScaleRowDown38_2 = ScaleRowDown38_LSX;
} else {
ScaleRowDown38_3 = ScaleRowDown38_3_Box_LSX;
ScaleRowDown38_2 = ScaleRowDown38_2_Box_LSX;
}
}
}
#endif
for (y = 0; y < dst_height - 2; y += 3) {
ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width);
@ -891,14 +907,6 @@ static void ScalePlaneBox(int src_width,
}
}
#endif
#if defined(HAS_SCALEADDROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
ScaleAddRow = ScaleAddRow_Any_MMI;
if (IS_ALIGNED(src_width, 8)) {
ScaleAddRow = ScaleAddRow_MMI;
}
}
#endif
#if defined(HAS_SCALEADDROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
ScaleAddRow = ScaleAddRow_Any_MSA;
@ -907,11 +915,19 @@ static void ScalePlaneBox(int src_width,
}
}
#endif
#if defined(HAS_SCALEADDROW_LSX)
if (TestCpuFlag(kCpuHasLSX)) {
ScaleAddRow = ScaleAddRow_Any_LSX;
if (IS_ALIGNED(src_width, 16)) {
ScaleAddRow = ScaleAddRow_LSX;
}
}
#endif
for (j = 0; j < dst_height; ++j) {
int boxheight;
int iy = y >> 16;
const uint8_t* src = src_ptr + iy * src_stride;
const uint8_t* src = src_ptr + iy * (int64_t)src_stride;
y += dy;
if (y > max_y) {
y = max_y;
@ -962,15 +978,10 @@ static void ScalePlaneBox_16(int src_width,
}
#endif
#if defined(HAS_SCALEADDROW_16_MMI)
if (TestCpuFlag(kCpuHasMMI) && IS_ALIGNED(src_width, 4)) {
ScaleAddRow = ScaleAddRow_16_MMI;
}
#endif
for (j = 0; j < dst_height; ++j) {
int boxheight;
int iy = y >> 16;
const uint16_t* src = src_ptr + iy * src_stride;
const uint16_t* src = src_ptr + iy * (int64_t)src_stride;
y += dy;
if (y > max_y) {
y = max_y;
@ -1043,14 +1054,6 @@ void ScalePlaneBilinearDown(int src_width,
}
}
#endif
#if defined(HAS_INTERPOLATEROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
InterpolateRow = InterpolateRow_Any_MMI;
if (IS_ALIGNED(src_width, 16)) {
InterpolateRow = InterpolateRow_MMI;
}
}
#endif
#if defined(HAS_INTERPOLATEROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
InterpolateRow = InterpolateRow_Any_MSA;
@ -1059,6 +1062,14 @@ void ScalePlaneBilinearDown(int src_width,
}
}
#endif
#if defined(HAS_INTERPOLATEROW_LSX)
if (TestCpuFlag(kCpuHasLSX)) {
InterpolateRow = InterpolateRow_Any_LSX;
if (IS_ALIGNED(src_width, 32)) {
InterpolateRow = InterpolateRow_LSX;
}
}
#endif
#if defined(HAS_SCALEFILTERCOLS_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
@ -1080,6 +1091,14 @@ void ScalePlaneBilinearDown(int src_width,
ScaleFilterCols = ScaleFilterCols_MSA;
}
}
#endif
#if defined(HAS_SCALEFILTERCOLS_LSX)
if (TestCpuFlag(kCpuHasLSX) && src_width < 32768) {
ScaleFilterCols = ScaleFilterCols_Any_LSX;
if (IS_ALIGNED(dst_width, 16)) {
ScaleFilterCols = ScaleFilterCols_LSX;
}
}
#endif
if (y > max_y) {
y = max_y;
@ -1087,7 +1106,7 @@ void ScalePlaneBilinearDown(int src_width,
for (j = 0; j < dst_height; ++j) {
int yi = y >> 16;
const uint8_t* src = src_ptr + yi * src_stride;
const uint8_t* src = src_ptr + yi * (int64_t)src_stride;
if (filtering == kFilterLinear) {
ScaleFilterCols(dst_ptr, src, dst_width, x, dx);
} else {
@ -1136,7 +1155,7 @@ void ScalePlaneBilinearDown_16(int src_width,
#if defined(HAS_INTERPOLATEROW_16_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) {
InterpolateRow = InterpolateRow_Any_16_SSE2;
InterpolateRow = InterpolateRow_16_Any_SSE2;
if (IS_ALIGNED(src_width, 16)) {
InterpolateRow = InterpolateRow_16_SSE2;
}
@ -1144,7 +1163,7 @@ void ScalePlaneBilinearDown_16(int src_width,
#endif
#if defined(HAS_INTERPOLATEROW_16_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
InterpolateRow = InterpolateRow_Any_16_SSSE3;
InterpolateRow = InterpolateRow_16_Any_SSSE3;
if (IS_ALIGNED(src_width, 16)) {
InterpolateRow = InterpolateRow_16_SSSE3;
}
@ -1152,7 +1171,7 @@ void ScalePlaneBilinearDown_16(int src_width,
#endif
#if defined(HAS_INTERPOLATEROW_16_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
InterpolateRow = InterpolateRow_Any_16_AVX2;
InterpolateRow = InterpolateRow_16_Any_AVX2;
if (IS_ALIGNED(src_width, 32)) {
InterpolateRow = InterpolateRow_16_AVX2;
}
@ -1160,7 +1179,7 @@ void ScalePlaneBilinearDown_16(int src_width,
#endif
#if defined(HAS_INTERPOLATEROW_16_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
InterpolateRow = InterpolateRow_Any_16_NEON;
InterpolateRow = InterpolateRow_16_Any_NEON;
if (IS_ALIGNED(src_width, 16)) {
InterpolateRow = InterpolateRow_16_NEON;
}
@ -1178,7 +1197,7 @@ void ScalePlaneBilinearDown_16(int src_width,
for (j = 0; j < dst_height; ++j) {
int yi = y >> 16;
const uint16_t* src = src_ptr + yi * src_stride;
const uint16_t* src = src_ptr + yi * (int64_t)src_stride;
if (filtering == kFilterLinear) {
ScaleFilterCols(dst_ptr, src, dst_width, x, dx);
} else {
@ -1270,6 +1289,14 @@ void ScalePlaneBilinearUp(int src_width,
ScaleFilterCols = ScaleFilterCols_MSA;
}
}
#endif
#if defined(HAS_SCALEFILTERCOLS_LSX)
if (filtering && TestCpuFlag(kCpuHasLSX) && src_width < 32768) {
ScaleFilterCols = ScaleFilterCols_Any_LSX;
if (IS_ALIGNED(dst_width, 16)) {
ScaleFilterCols = ScaleFilterCols_LSX;
}
}
#endif
if (!filtering && src_width * 2 == dst_width && x < 0x8000) {
ScaleFilterCols = ScaleColsUp2_C;
@ -1277,11 +1304,6 @@ void ScalePlaneBilinearUp(int src_width,
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
ScaleFilterCols = ScaleColsUp2_SSE2;
}
#endif
#if defined(HAS_SCALECOLS_MMI)
if (TestCpuFlag(kCpuHasMMI) && IS_ALIGNED(dst_width, 8)) {
ScaleFilterCols = ScaleColsUp2_MMI;
}
#endif
}
@ -1290,14 +1312,14 @@ void ScalePlaneBilinearUp(int src_width,
}
{
int yi = y >> 16;
const uint8_t* src = src_ptr + yi * src_stride;
const uint8_t* src = src_ptr + yi * (int64_t)src_stride;
// Allocate 2 row buffers.
const int kRowSize = (dst_width + 31) & ~31;
align_buffer_64(row, kRowSize * 2);
const int row_size = (dst_width + 31) & ~31;
align_buffer_64(row, row_size * 2);
uint8_t* rowptr = row;
int rowstride = kRowSize;
int rowstride = row_size;
int lasty = yi;
ScaleFilterCols(rowptr, src, dst_width, x, dx);
@ -1305,7 +1327,9 @@ void ScalePlaneBilinearUp(int src_width,
src += src_stride;
}
ScaleFilterCols(rowptr + rowstride, src, dst_width, x, dx);
src += src_stride;
if (src_height > 2) {
src += src_stride;
}
for (j = 0; j < dst_height; ++j) {
yi = y >> 16;
@ -1313,14 +1337,16 @@ void ScalePlaneBilinearUp(int src_width,
if (y > max_y) {
y = max_y;
yi = y >> 16;
src = src_ptr + yi * src_stride;
src = src_ptr + yi * (int64_t)src_stride;
}
if (yi != lasty) {
ScaleFilterCols(rowptr, src, dst_width, x, dx);
rowptr += rowstride;
rowstride = -rowstride;
lasty = yi;
src += src_stride;
if ((y + 65536) < max_y) {
src += src_stride;
}
}
}
if (filtering == kFilterLinear) {
@ -1358,38 +1384,38 @@ void ScalePlaneUp2_Linear(int src_width,
// This function can only scale up by 2 times horizontally.
assert(src_width == ((dst_width + 1) / 2));
#ifdef HAS_SCALEROWUP2LINEAR_SSE2
#ifdef HAS_SCALEROWUP2_LINEAR_SSE2
if (TestCpuFlag(kCpuHasSSE2)) {
ScaleRowUp = ScaleRowUp2_Linear_Any_SSE2;
}
#endif
#ifdef HAS_SCALEROWUP2LINEAR_SSSE3
#ifdef HAS_SCALEROWUP2_LINEAR_SSSE3
if (TestCpuFlag(kCpuHasSSSE3)) {
ScaleRowUp = ScaleRowUp2_Linear_Any_SSSE3;
}
#endif
#ifdef HAS_SCALEROWUP2LINEAR_AVX2
#ifdef HAS_SCALEROWUP2_LINEAR_AVX2
if (TestCpuFlag(kCpuHasAVX2)) {
ScaleRowUp = ScaleRowUp2_Linear_Any_AVX2;
}
#endif
#ifdef HAS_SCALEROWUP2LINEAR_NEON
#ifdef HAS_SCALEROWUP2_LINEAR_NEON
if (TestCpuFlag(kCpuHasNEON)) {
ScaleRowUp = ScaleRowUp2_Linear_Any_NEON;
}
#endif
if (dst_height == 1) {
ScaleRowUp(src_ptr + ((src_height - 1) / 2) * src_stride, dst_ptr,
ScaleRowUp(src_ptr + ((src_height - 1) / 2) * (int64_t)src_stride, dst_ptr,
dst_width);
} else {
dy = FixedDiv(src_height - 1, dst_height - 1);
y = (1 << 15) - 1;
for (i = 0; i < dst_height; ++i) {
ScaleRowUp(src_ptr + (y >> 16) * src_stride, dst_ptr, dst_width);
ScaleRowUp(src_ptr + (y >> 16) * (int64_t)src_stride, dst_ptr, dst_width);
dst_ptr += dst_stride;
y += dy;
}
@ -1417,25 +1443,25 @@ void ScalePlaneUp2_Bilinear(int src_width,
assert(src_width == ((dst_width + 1) / 2));
assert(src_height == ((dst_height + 1) / 2));
#ifdef HAS_SCALEROWUP2BILINEAR_SSE2
#ifdef HAS_SCALEROWUP2_BILINEAR_SSE2
if (TestCpuFlag(kCpuHasSSE2)) {
Scale2RowUp = ScaleRowUp2_Bilinear_Any_SSE2;
}
#endif
#ifdef HAS_SCALEROWUP2BILINEAR_SSSE3
#ifdef HAS_SCALEROWUP2_BILINEAR_SSSE3
if (TestCpuFlag(kCpuHasSSSE3)) {
Scale2RowUp = ScaleRowUp2_Bilinear_Any_SSSE3;
}
#endif
#ifdef HAS_SCALEROWUP2BILINEAR_AVX2
#ifdef HAS_SCALEROWUP2_BILINEAR_AVX2
if (TestCpuFlag(kCpuHasAVX2)) {
Scale2RowUp = ScaleRowUp2_Bilinear_Any_AVX2;
}
#endif
#ifdef HAS_SCALEROWUP2BILINEAR_NEON
#ifdef HAS_SCALEROWUP2_BILINEAR_NEON
if (TestCpuFlag(kCpuHasNEON)) {
Scale2RowUp = ScaleRowUp2_Bilinear_Any_NEON;
}
@ -1477,32 +1503,32 @@ void ScalePlaneUp2_12_Linear(int src_width,
// This function can only scale up by 2 times horizontally.
assert(src_width == ((dst_width + 1) / 2));
#ifdef HAS_SCALEROWUP2LINEAR_12_SSSE3
#ifdef HAS_SCALEROWUP2_LINEAR_12_SSSE3
if (TestCpuFlag(kCpuHasSSSE3)) {
ScaleRowUp = ScaleRowUp2_Linear_12_Any_SSSE3;
}
#endif
#ifdef HAS_SCALEROWUP2LINEAR_12_AVX2
#ifdef HAS_SCALEROWUP2_LINEAR_12_AVX2
if (TestCpuFlag(kCpuHasAVX2)) {
ScaleRowUp = ScaleRowUp2_Linear_12_Any_AVX2;
}
#endif
#ifdef HAS_SCALEROWUP2LINEAR_12_NEON
#ifdef HAS_SCALEROWUP2_LINEAR_12_NEON
if (TestCpuFlag(kCpuHasNEON)) {
ScaleRowUp = ScaleRowUp2_Linear_12_Any_NEON;
}
#endif
if (dst_height == 1) {
ScaleRowUp(src_ptr + ((src_height - 1) / 2) * src_stride, dst_ptr,
ScaleRowUp(src_ptr + ((src_height - 1) / 2) * (int64_t)src_stride, dst_ptr,
dst_width);
} else {
dy = FixedDiv(src_height - 1, dst_height - 1);
y = (1 << 15) - 1;
for (i = 0; i < dst_height; ++i) {
ScaleRowUp(src_ptr + (y >> 16) * src_stride, dst_ptr, dst_width);
ScaleRowUp(src_ptr + (y >> 16) * (int64_t)src_stride, dst_ptr, dst_width);
dst_ptr += dst_stride;
y += dy;
}
@ -1531,19 +1557,19 @@ void ScalePlaneUp2_12_Bilinear(int src_width,
assert(src_width == ((dst_width + 1) / 2));
assert(src_height == ((dst_height + 1) / 2));
#ifdef HAS_SCALEROWUP2BILINEAR_12_SSSE3
#ifdef HAS_SCALEROWUP2_BILINEAR_12_SSSE3
if (TestCpuFlag(kCpuHasSSSE3)) {
Scale2RowUp = ScaleRowUp2_Bilinear_12_Any_SSSE3;
}
#endif
#ifdef HAS_SCALEROWUP2BILINEAR_12_AVX2
#ifdef HAS_SCALEROWUP2_BILINEAR_12_AVX2
if (TestCpuFlag(kCpuHasAVX2)) {
Scale2RowUp = ScaleRowUp2_Bilinear_12_Any_AVX2;
}
#endif
#ifdef HAS_SCALEROWUP2BILINEAR_12_NEON
#ifdef HAS_SCALEROWUP2_BILINEAR_12_NEON
if (TestCpuFlag(kCpuHasNEON)) {
Scale2RowUp = ScaleRowUp2_Bilinear_12_Any_NEON;
}
@ -1578,32 +1604,32 @@ void ScalePlaneUp2_16_Linear(int src_width,
// This function can only scale up by 2 times horizontally.
assert(src_width == ((dst_width + 1) / 2));
#ifdef HAS_SCALEROWUP2LINEAR_16_SSE2
#ifdef HAS_SCALEROWUP2_LINEAR_16_SSE2
if (TestCpuFlag(kCpuHasSSE2)) {
ScaleRowUp = ScaleRowUp2_Linear_16_Any_SSE2;
}
#endif
#ifdef HAS_SCALEROWUP2LINEAR_16_AVX2
#ifdef HAS_SCALEROWUP2_LINEAR_16_AVX2
if (TestCpuFlag(kCpuHasAVX2)) {
ScaleRowUp = ScaleRowUp2_Linear_16_Any_AVX2;
}
#endif
#ifdef HAS_SCALEROWUP2LINEAR_16_NEON
#ifdef HAS_SCALEROWUP2_LINEAR_16_NEON
if (TestCpuFlag(kCpuHasNEON)) {
ScaleRowUp = ScaleRowUp2_Linear_16_Any_NEON;
}
#endif
if (dst_height == 1) {
ScaleRowUp(src_ptr + ((src_height - 1) / 2) * src_stride, dst_ptr,
ScaleRowUp(src_ptr + ((src_height - 1) / 2) * (int64_t)src_stride, dst_ptr,
dst_width);
} else {
dy = FixedDiv(src_height - 1, dst_height - 1);
y = (1 << 15) - 1;
for (i = 0; i < dst_height; ++i) {
ScaleRowUp(src_ptr + (y >> 16) * src_stride, dst_ptr, dst_width);
ScaleRowUp(src_ptr + (y >> 16) * (int64_t)src_stride, dst_ptr, dst_width);
dst_ptr += dst_stride;
y += dy;
}
@ -1627,19 +1653,19 @@ void ScalePlaneUp2_16_Bilinear(int src_width,
assert(src_width == ((dst_width + 1) / 2));
assert(src_height == ((dst_height + 1) / 2));
#ifdef HAS_SCALEROWUP2BILINEAR_16_SSE2
if (TestCpuFlag(kCpuHasSSSE3)) {
Scale2RowUp = ScaleRowUp2_Bilinear_16_Any_SSSE3;
#ifdef HAS_SCALEROWUP2_BILINEAR_16_SSE2
if (TestCpuFlag(kCpuHasSSE2)) {
Scale2RowUp = ScaleRowUp2_Bilinear_16_Any_SSE2;
}
#endif
#ifdef HAS_SCALEROWUP2BILINEAR_16_AVX2
#ifdef HAS_SCALEROWUP2_BILINEAR_16_AVX2
if (TestCpuFlag(kCpuHasAVX2)) {
Scale2RowUp = ScaleRowUp2_Bilinear_16_Any_AVX2;
}
#endif
#ifdef HAS_SCALEROWUP2BILINEAR_16_NEON
#ifdef HAS_SCALEROWUP2_BILINEAR_16_NEON
if (TestCpuFlag(kCpuHasNEON)) {
Scale2RowUp = ScaleRowUp2_Bilinear_16_Any_NEON;
}
@ -1685,7 +1711,7 @@ void ScalePlaneBilinearUp_16(int src_width,
#if defined(HAS_INTERPOLATEROW_16_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) {
InterpolateRow = InterpolateRow_Any_16_SSE2;
InterpolateRow = InterpolateRow_16_Any_SSE2;
if (IS_ALIGNED(dst_width, 16)) {
InterpolateRow = InterpolateRow_16_SSE2;
}
@ -1693,7 +1719,7 @@ void ScalePlaneBilinearUp_16(int src_width,
#endif
#if defined(HAS_INTERPOLATEROW_16_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
InterpolateRow = InterpolateRow_Any_16_SSSE3;
InterpolateRow = InterpolateRow_16_Any_SSSE3;
if (IS_ALIGNED(dst_width, 16)) {
InterpolateRow = InterpolateRow_16_SSSE3;
}
@ -1701,7 +1727,7 @@ void ScalePlaneBilinearUp_16(int src_width,
#endif
#if defined(HAS_INTERPOLATEROW_16_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
InterpolateRow = InterpolateRow_Any_16_AVX2;
InterpolateRow = InterpolateRow_16_Any_AVX2;
if (IS_ALIGNED(dst_width, 32)) {
InterpolateRow = InterpolateRow_16_AVX2;
}
@ -1709,7 +1735,7 @@ void ScalePlaneBilinearUp_16(int src_width,
#endif
#if defined(HAS_INTERPOLATEROW_16_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
InterpolateRow = InterpolateRow_Any_16_NEON;
InterpolateRow = InterpolateRow_16_Any_NEON;
if (IS_ALIGNED(dst_width, 16)) {
InterpolateRow = InterpolateRow_16_NEON;
}
@ -1730,27 +1756,21 @@ void ScalePlaneBilinearUp_16(int src_width,
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
ScaleFilterCols = ScaleColsUp2_16_SSE2;
}
#endif
#if defined(HAS_SCALECOLS_16_MMI)
if (TestCpuFlag(kCpuHasMMI) && IS_ALIGNED(dst_width, 8)) {
ScaleFilterCols = ScaleColsUp2_16_MMI;
}
#endif
}
if (y > max_y) {
y = max_y;
}
{
int yi = y >> 16;
const uint16_t* src = src_ptr + yi * src_stride;
const uint16_t* src = src_ptr + yi * (int64_t)src_stride;
// Allocate 2 row buffers.
const int kRowSize = (dst_width + 31) & ~31;
align_buffer_64(row, kRowSize * 4);
const int row_size = (dst_width + 31) & ~31;
align_buffer_64(row, row_size * 4);
uint16_t* rowptr = (uint16_t*)row;
int rowstride = kRowSize;
int rowstride = row_size;
int lasty = yi;
ScaleFilterCols(rowptr, src, dst_width, x, dx);
@ -1758,7 +1778,9 @@ void ScalePlaneBilinearUp_16(int src_width,
src += src_stride;
}
ScaleFilterCols(rowptr + rowstride, src, dst_width, x, dx);
src += src_stride;
if (src_height > 2) {
src += src_stride;
}
for (j = 0; j < dst_height; ++j) {
yi = y >> 16;
@ -1766,14 +1788,16 @@ void ScalePlaneBilinearUp_16(int src_width,
if (y > max_y) {
y = max_y;
yi = y >> 16;
src = src_ptr + yi * src_stride;
src = src_ptr + yi * (int64_t)src_stride;
}
if (yi != lasty) {
ScaleFilterCols(rowptr, src, dst_width, x, dx);
rowptr += rowstride;
rowstride = -rowstride;
lasty = yi;
src += src_stride;
if ((y + 65536) < max_y) {
src += src_stride;
}
}
}
if (filtering == kFilterLinear) {
@ -1820,16 +1844,12 @@ static void ScalePlaneSimple(int src_width,
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
ScaleCols = ScaleColsUp2_SSE2;
}
#endif
#if defined(HAS_SCALECOLS_MMI)
if (TestCpuFlag(kCpuHasMMI) && IS_ALIGNED(dst_width, 8)) {
ScaleCols = ScaleColsUp2_MMI;
}
#endif
}
for (i = 0; i < dst_height; ++i) {
ScaleCols(dst_ptr, src_ptr + (y >> 16) * src_stride, dst_width, x, dx);
ScaleCols(dst_ptr, src_ptr + (y >> 16) * (int64_t)src_stride, dst_width, x,
dx);
dst_ptr += dst_stride;
y += dy;
}
@ -1861,16 +1881,12 @@ static void ScalePlaneSimple_16(int src_width,
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
ScaleCols = ScaleColsUp2_16_SSE2;
}
#endif
#if defined(HAS_SCALECOLS_16_MMI)
if (TestCpuFlag(kCpuHasMMI) && IS_ALIGNED(dst_width, 8)) {
ScaleCols = ScaleColsUp2_16_MMI;
}
#endif
}
for (i = 0; i < dst_height; ++i) {
ScaleCols(dst_ptr, src_ptr + (y >> 16) * src_stride, dst_width, x, dx);
ScaleCols(dst_ptr, src_ptr + (y >> 16) * (int64_t)src_stride, dst_width, x,
dx);
dst_ptr += dst_stride;
y += dy;
}
@ -1878,7 +1894,6 @@ static void ScalePlaneSimple_16(int src_width,
// Scale a plane.
// This function dispatches to a specialized scaler based on scale factor.
LIBYUV_API
void ScalePlane(const uint8_t* src,
int src_stride,
@ -1896,10 +1911,9 @@ void ScalePlane(const uint8_t* src,
// Negative height means invert the image.
if (src_height < 0) {
src_height = -src_height;
src = src + (src_height - 1) * src_stride;
src = src + (src_height - 1) * (int64_t)src_stride;
src_stride = -src_stride;
}
// Use specialized scales to improve performance for common resolutions.
// For example, all the 1/2 scalings will use ScalePlaneDown2()
if (dst_width == src_width && dst_height == src_height) {
@ -1908,10 +1922,19 @@ void ScalePlane(const uint8_t* src,
return;
}
if (dst_width == src_width && filtering != kFilterBox) {
int dy = FixedDiv(src_height, dst_height);
int dy = 0;
int y = 0;
// When scaling down, use the center 2 rows to filter.
// When scaling up, last row of destination uses the last 2 source rows.
if (dst_height <= src_height) {
dy = FixedDiv(src_height, dst_height);
y = CENTERSTART(dy, -32768); // Subtract 0.5 (32768) to center filter.
} else if (src_height > 1 && dst_height > 1) {
dy = FixedDiv1(src_height, dst_height);
}
// Arbitrary scale vertically, but unscaled horizontally.
ScalePlaneVertical(src_height, dst_width, dst_height, src_stride,
dst_stride, src, dst, 0, 0, dy, 1, filtering);
dst_stride, src, dst, 0, y, dy, /*bpp=*/1, filtering);
return;
}
if (dst_width <= Abs(src_width) && dst_height <= src_height) {
@ -1990,10 +2013,9 @@ void ScalePlane_16(const uint16_t* src,
// Negative height means invert the image.
if (src_height < 0) {
src_height = -src_height;
src = src + (src_height - 1) * src_stride;
src = src + (src_height - 1) * (int64_t)src_stride;
src_stride = -src_stride;
}
// Use specialized scales to improve performance for common resolutions.
// For example, all the 1/2 scalings will use ScalePlaneDown2()
if (dst_width == src_width && dst_height == src_height) {
@ -2002,10 +2024,22 @@ void ScalePlane_16(const uint16_t* src,
return;
}
if (dst_width == src_width && filtering != kFilterBox) {
int dy = FixedDiv(src_height, dst_height);
int dy = 0;
int y = 0;
// When scaling down, use the center 2 rows to filter.
// When scaling up, last row of destination uses the last 2 source rows.
if (dst_height <= src_height) {
dy = FixedDiv(src_height, dst_height);
y = CENTERSTART(dy, -32768); // Subtract 0.5 (32768) to center filter.
// When scaling up, ensure the last row of destination uses the last
// source. Avoid divide by zero for dst_height but will do no scaling
// later.
} else if (src_height > 1 && dst_height > 1) {
dy = FixedDiv1(src_height, dst_height);
}
// Arbitrary scale vertically, but unscaled horizontally.
ScalePlaneVertical_16(src_height, dst_width, dst_height, src_stride,
dst_stride, src, dst, 0, 0, dy, 1, filtering);
dst_stride, src, dst, 0, y, dy, /*bpp=*/1, filtering);
return;
}
if (dst_width <= Abs(src_width) && dst_height <= src_height) {
@ -2084,7 +2118,7 @@ void ScalePlane_12(const uint16_t* src,
// Negative height means invert the image.
if (src_height < 0) {
src_height = -src_height;
src = src + (src_height - 1) * src_stride;
src = src + (src_height - 1) * (int64_t)src_stride;
src_stride = -src_stride;
}
@ -2129,6 +2163,7 @@ int I420Scale(const uint8_t* src_y,
int src_halfheight = SUBSAMPLE(src_height, 1, 1);
int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1);
int dst_halfheight = SUBSAMPLE(dst_height, 1, 1);
if (!src_y || !src_u || !src_v || src_width <= 0 || src_height == 0 ||
src_width > 32768 || src_height > 32768 || !dst_y || !dst_u || !dst_v ||
dst_width <= 0 || dst_height <= 0) {
@ -2166,6 +2201,7 @@ int I420Scale_16(const uint16_t* src_y,
int src_halfheight = SUBSAMPLE(src_height, 1, 1);
int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1);
int dst_halfheight = SUBSAMPLE(dst_height, 1, 1);
if (!src_y || !src_u || !src_v || src_width <= 0 || src_height == 0 ||
src_width > 32768 || src_height > 32768 || !dst_y || !dst_u || !dst_v ||
dst_width <= 0 || dst_height <= 0) {
@ -2203,6 +2239,7 @@ int I420Scale_12(const uint16_t* src_y,
int src_halfheight = SUBSAMPLE(src_height, 1, 1);
int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1);
int dst_halfheight = SUBSAMPLE(dst_height, 1, 1);
if (!src_y || !src_u || !src_v || src_width <= 0 || src_height == 0 ||
src_width > 32768 || src_height > 32768 || !dst_y || !dst_u || !dst_v ||
dst_width <= 0 || dst_height <= 0) {
@ -2320,6 +2357,117 @@ int I444Scale_12(const uint16_t* src_y,
return 0;
}
// Scale an I422 image.
// This function in turn calls a scaling function for each plane.
LIBYUV_API
int I422Scale(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
int src_width,
int src_height,
uint8_t* dst_y,
int dst_stride_y,
uint8_t* dst_u,
int dst_stride_u,
uint8_t* dst_v,
int dst_stride_v,
int dst_width,
int dst_height,
enum FilterMode filtering) {
int src_halfwidth = SUBSAMPLE(src_width, 1, 1);
int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1);
if (!src_y || !src_u || !src_v || src_width <= 0 || src_height == 0 ||
src_width > 32768 || src_height > 32768 || !dst_y || !dst_u || !dst_v ||
dst_width <= 0 || dst_height <= 0) {
return -1;
}
ScalePlane(src_y, src_stride_y, src_width, src_height, dst_y, dst_stride_y,
dst_width, dst_height, filtering);
ScalePlane(src_u, src_stride_u, src_halfwidth, src_height, dst_u,
dst_stride_u, dst_halfwidth, dst_height, filtering);
ScalePlane(src_v, src_stride_v, src_halfwidth, src_height, dst_v,
dst_stride_v, dst_halfwidth, dst_height, filtering);
return 0;
}
LIBYUV_API
int I422Scale_16(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_u,
int src_stride_u,
const uint16_t* src_v,
int src_stride_v,
int src_width,
int src_height,
uint16_t* dst_y,
int dst_stride_y,
uint16_t* dst_u,
int dst_stride_u,
uint16_t* dst_v,
int dst_stride_v,
int dst_width,
int dst_height,
enum FilterMode filtering) {
int src_halfwidth = SUBSAMPLE(src_width, 1, 1);
int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1);
if (!src_y || !src_u || !src_v || src_width <= 0 || src_height == 0 ||
src_width > 32768 || src_height > 32768 || !dst_y || !dst_u || !dst_v ||
dst_width <= 0 || dst_height <= 0) {
return -1;
}
ScalePlane_16(src_y, src_stride_y, src_width, src_height, dst_y, dst_stride_y,
dst_width, dst_height, filtering);
ScalePlane_16(src_u, src_stride_u, src_halfwidth, src_height, dst_u,
dst_stride_u, dst_halfwidth, dst_height, filtering);
ScalePlane_16(src_v, src_stride_v, src_halfwidth, src_height, dst_v,
dst_stride_v, dst_halfwidth, dst_height, filtering);
return 0;
}
LIBYUV_API
int I422Scale_12(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_u,
int src_stride_u,
const uint16_t* src_v,
int src_stride_v,
int src_width,
int src_height,
uint16_t* dst_y,
int dst_stride_y,
uint16_t* dst_u,
int dst_stride_u,
uint16_t* dst_v,
int dst_stride_v,
int dst_width,
int dst_height,
enum FilterMode filtering) {
int src_halfwidth = SUBSAMPLE(src_width, 1, 1);
int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1);
if (!src_y || !src_u || !src_v || src_width <= 0 || src_height == 0 ||
src_width > 32768 || src_height > 32768 || !dst_y || !dst_u || !dst_v ||
dst_width <= 0 || dst_height <= 0) {
return -1;
}
ScalePlane_12(src_y, src_stride_y, src_width, src_height, dst_y, dst_stride_y,
dst_width, dst_height, filtering);
ScalePlane_12(src_u, src_stride_u, src_halfwidth, src_height, dst_u,
dst_stride_u, dst_halfwidth, dst_height, filtering);
ScalePlane_12(src_v, src_stride_v, src_halfwidth, src_height, dst_v,
dst_stride_v, dst_halfwidth, dst_height, filtering);
return 0;
}
// Scale an NV12 image.
// This function in turn calls a scaling function for each plane.
@ -2341,6 +2489,7 @@ int NV12Scale(const uint8_t* src_y,
int src_halfheight = SUBSAMPLE(src_height, 1, 1);
int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1);
int dst_halfheight = SUBSAMPLE(dst_height, 1, 1);
if (!src_y || !src_uv || src_width <= 0 || src_height == 0 ||
src_width > 32768 || src_height > 32768 || !dst_y || !dst_uv ||
dst_width <= 0 || dst_height <= 0) {

View File

@ -76,7 +76,7 @@ SDANY(ScaleUVRowDown2Box_Any_SSSE3,
ScaleUVRowDown2Box_C,
2,
2,
4)
3)
#endif
#ifdef HAS_SCALEUVROWDOWN2BOX_AVX2
SDANY(ScaleUVRowDown2Box_Any_AVX2,
@ -84,7 +84,7 @@ SDANY(ScaleUVRowDown2Box_Any_AVX2,
ScaleUVRowDown2Box_C,
2,
2,
8)
7)
#endif
#ifdef HAS_SCALEROWDOWN2_AVX2
SDANY(ScaleRowDown2_Any_AVX2, ScaleRowDown2_AVX2, ScaleRowDown2_C, 2, 1, 31)
@ -134,7 +134,7 @@ SDANY(ScaleUVRowDown2Box_Any_NEON,
ScaleUVRowDown2Box_C,
2,
2,
8)
7)
#endif
#ifdef HAS_SCALEROWDOWN2_MSA
@ -152,26 +152,20 @@ SDANY(ScaleRowDown2Box_Any_MSA,
1,
31)
#endif
#ifdef HAS_SCALEROWDOWN2_MMI
SDANY(ScaleRowDown2_Any_MMI, ScaleRowDown2_MMI, ScaleRowDown2_C, 2, 1, 7)
SDANY(ScaleRowDown2Linear_Any_MMI,
ScaleRowDown2Linear_MMI,
#ifdef HAS_SCALEROWDOWN2_LSX
SDANY(ScaleRowDown2_Any_LSX, ScaleRowDown2_LSX, ScaleRowDown2_C, 2, 1, 31)
SDANY(ScaleRowDown2Linear_Any_LSX,
ScaleRowDown2Linear_LSX,
ScaleRowDown2Linear_C,
2,
1,
7)
SDANY(ScaleRowDown2Box_Any_MMI,
ScaleRowDown2Box_MMI,
31)
SDANY(ScaleRowDown2Box_Any_LSX,
ScaleRowDown2Box_LSX,
ScaleRowDown2Box_C,
2,
1,
7)
SDODD(ScaleRowDown2Box_Odd_MMI,
ScaleRowDown2Box_MMI,
ScaleRowDown2Box_Odd_C,
2,
1,
7)
31)
#endif
#ifdef HAS_SCALEROWDOWN4_SSSE3
SDANY(ScaleRowDown4_Any_SSSE3, ScaleRowDown4_SSSE3, ScaleRowDown4_C, 4, 1, 7)
@ -209,14 +203,14 @@ SDANY(ScaleRowDown4Box_Any_MSA,
1,
15)
#endif
#ifdef HAS_SCALEROWDOWN4_MMI
SDANY(ScaleRowDown4_Any_MMI, ScaleRowDown4_MMI, ScaleRowDown4_C, 4, 1, 7)
SDANY(ScaleRowDown4Box_Any_MMI,
ScaleRowDown4Box_MMI,
#ifdef HAS_SCALEROWDOWN4_LSX
SDANY(ScaleRowDown4_Any_LSX, ScaleRowDown4_LSX, ScaleRowDown4_C, 4, 1, 15)
SDANY(ScaleRowDown4Box_Any_LSX,
ScaleRowDown4Box_LSX,
ScaleRowDown4Box_C,
4,
1,
7)
15)
#endif
#ifdef HAS_SCALEROWDOWN34_SSSE3
SDANY(ScaleRowDown34_Any_SSSE3,
@ -278,13 +272,25 @@ SDANY(ScaleRowDown34_1_Box_Any_MSA,
1,
47)
#endif
#ifdef HAS_SCALEROWDOWN34_MMI
SDANY(ScaleRowDown34_Any_MMI,
ScaleRowDown34_MMI,
#ifdef HAS_SCALEROWDOWN34_LSX
SDANY(ScaleRowDown34_Any_LSX,
ScaleRowDown34_LSX,
ScaleRowDown34_C,
4 / 3,
1,
23)
47)
SDANY(ScaleRowDown34_0_Box_Any_LSX,
ScaleRowDown34_0_Box_LSX,
ScaleRowDown34_0_Box_C,
4 / 3,
1,
47)
SDANY(ScaleRowDown34_1_Box_Any_LSX,
ScaleRowDown34_1_Box_LSX,
ScaleRowDown34_1_Box_C,
4 / 3,
1,
47)
#endif
#ifdef HAS_SCALEROWDOWN38_SSSE3
SDANY(ScaleRowDown38_Any_SSSE3,
@ -346,6 +352,26 @@ SDANY(ScaleRowDown38_2_Box_Any_MSA,
1,
11)
#endif
#ifdef HAS_SCALEROWDOWN38_LSX
SDANY(ScaleRowDown38_Any_LSX,
ScaleRowDown38_LSX,
ScaleRowDown38_C,
8 / 3,
1,
11)
SDANY(ScaleRowDown38_3_Box_Any_LSX,
ScaleRowDown38_3_Box_LSX,
ScaleRowDown38_3_Box_C,
8 / 3,
1,
11)
SDANY(ScaleRowDown38_2_Box_Any_LSX,
ScaleRowDown38_2_Box_LSX,
ScaleRowDown38_2_Box_C,
8 / 3,
1,
11)
#endif
#ifdef HAS_SCALEARGBROWDOWN2_SSE2
SDANY(ScaleARGBRowDown2_Any_SSE2,
@ -407,25 +433,25 @@ SDANY(ScaleARGBRowDown2Box_Any_MSA,
4,
3)
#endif
#ifdef HAS_SCALEARGBROWDOWN2_MMI
SDANY(ScaleARGBRowDown2_Any_MMI,
ScaleARGBRowDown2_MMI,
#ifdef HAS_SCALEARGBROWDOWN2_LSX
SDANY(ScaleARGBRowDown2_Any_LSX,
ScaleARGBRowDown2_LSX,
ScaleARGBRowDown2_C,
2,
4,
1)
SDANY(ScaleARGBRowDown2Linear_Any_MMI,
ScaleARGBRowDown2Linear_MMI,
3)
SDANY(ScaleARGBRowDown2Linear_Any_LSX,
ScaleARGBRowDown2Linear_LSX,
ScaleARGBRowDown2Linear_C,
2,
4,
1)
SDANY(ScaleARGBRowDown2Box_Any_MMI,
ScaleARGBRowDown2Box_MMI,
3)
SDANY(ScaleARGBRowDown2Box_Any_LSX,
ScaleARGBRowDown2Box_LSX,
ScaleARGBRowDown2Box_C,
2,
4,
1)
3)
#endif
#undef SDANY
@ -478,17 +504,17 @@ SDAANY(ScaleARGBRowDownEvenBox_Any_MSA,
4,
3)
#endif
#ifdef HAS_SCALEARGBROWDOWNEVEN_MMI
SDAANY(ScaleARGBRowDownEven_Any_MMI,
ScaleARGBRowDownEven_MMI,
#ifdef HAS_SCALEARGBROWDOWNEVEN_LSX
SDAANY(ScaleARGBRowDownEven_Any_LSX,
ScaleARGBRowDownEven_LSX,
ScaleARGBRowDownEven_C,
4,
1)
SDAANY(ScaleARGBRowDownEvenBox_Any_MMI,
ScaleARGBRowDownEvenBox_MMI,
3)
SDAANY(ScaleARGBRowDownEvenBox_Any_LSX,
ScaleARGBRowDownEvenBox_LSX,
ScaleARGBRowDownEvenBox_C,
4,
1)
3)
#endif
#ifdef HAS_SCALEUVROWDOWNEVEN_NEON
SDAANY(ScaleUVRowDownEven_Any_NEON,
@ -530,8 +556,8 @@ SAROW(ScaleAddRow_Any_NEON, ScaleAddRow_NEON, 1, 2, 15)
#ifdef HAS_SCALEADDROW_MSA
SAROW(ScaleAddRow_Any_MSA, ScaleAddRow_MSA, 1, 2, 15)
#endif
#ifdef HAS_SCALEADDROW_MMI
SAROW(ScaleAddRow_Any_MMI, ScaleAddRow_MMI, 1, 2, 7)
#ifdef HAS_SCALEADDROW_LSX
SAROW(ScaleAddRow_Any_LSX, ScaleAddRow_LSX, 1, 2, 15)
#endif
#undef SAANY
@ -559,8 +585,8 @@ SAANY(ScaleAddRow_Any_NEON, ScaleAddRow_NEON, ScaleAddRow_C, 15)
#ifdef HAS_SCALEADDROW_MSA
SAANY(ScaleAddRow_Any_MSA, ScaleAddRow_MSA, ScaleAddRow_C, 15)
#endif
#ifdef HAS_SCALEADDROW_MMI
SAANY(ScaleAddRow_Any_MMI, ScaleAddRow_MMI, ScaleAddRow_C, 7)
#ifdef HAS_SCALEADDROW_LSX
SAANY(ScaleAddRow_Any_LSX, ScaleAddRow_LSX, ScaleAddRow_C, 15)
#endif
#undef SAANY
@ -584,14 +610,17 @@ CANY(ScaleFilterCols_Any_NEON, ScaleFilterCols_NEON, ScaleFilterCols_C, 1, 7)
#ifdef HAS_SCALEFILTERCOLS_MSA
CANY(ScaleFilterCols_Any_MSA, ScaleFilterCols_MSA, ScaleFilterCols_C, 1, 15)
#endif
#ifdef HAS_SCALEFILTERCOLS_LSX
CANY(ScaleFilterCols_Any_LSX, ScaleFilterCols_LSX, ScaleFilterCols_C, 1, 15)
#endif
#ifdef HAS_SCALEARGBCOLS_NEON
CANY(ScaleARGBCols_Any_NEON, ScaleARGBCols_NEON, ScaleARGBCols_C, 4, 7)
#endif
#ifdef HAS_SCALEARGBCOLS_MSA
CANY(ScaleARGBCols_Any_MSA, ScaleARGBCols_MSA, ScaleARGBCols_C, 4, 3)
#endif
#ifdef HAS_SCALEARGBCOLS_MMI
CANY(ScaleARGBCols_Any_MMI, ScaleARGBCols_MMI, ScaleARGBCols_C, 4, 0)
#ifdef HAS_SCALEARGBCOLS_LSX
CANY(ScaleARGBCols_Any_LSX, ScaleARGBCols_LSX, ScaleARGBCols_C, 4, 3)
#endif
#ifdef HAS_SCALEARGBFILTERCOLS_NEON
CANY(ScaleARGBFilterCols_Any_NEON,
@ -607,6 +636,13 @@ CANY(ScaleARGBFilterCols_Any_MSA,
4,
7)
#endif
#ifdef HAS_SCALEARGBFILTERCOLS_LSX
CANY(ScaleARGBFilterCols_Any_LSX,
ScaleARGBFilterCols_LSX,
ScaleARGBFilterCols_C,
4,
7)
#endif
#undef CANY
// Scale up horizontally 2 times using linear filter.
@ -622,7 +658,7 @@ CANY(ScaleARGBFilterCols_Any_MSA,
} \
C(src_ptr + (n / 2), dst_ptr + n + 1, r); \
} \
dst_ptr[dst_width - 1] = src_ptr[(dst_width / 2) - 1]; \
dst_ptr[dst_width - 1] = src_ptr[(dst_width - 1) / 2]; \
}
// Even the C versions need to be wrapped, because boundary pixels have to
@ -640,7 +676,7 @@ SUH2LANY(ScaleRowUp2_Linear_16_Any_C,
0,
uint16_t)
#ifdef HAS_SCALEROWUP2LINEAR_SSE2
#ifdef HAS_SCALEROWUP2_LINEAR_SSE2
SUH2LANY(ScaleRowUp2_Linear_Any_SSE2,
ScaleRowUp2_Linear_SSE2,
ScaleRowUp2_Linear_C,
@ -648,7 +684,7 @@ SUH2LANY(ScaleRowUp2_Linear_Any_SSE2,
uint8_t)
#endif
#ifdef HAS_SCALEROWUP2LINEAR_SSSE3
#ifdef HAS_SCALEROWUP2_LINEAR_SSSE3
SUH2LANY(ScaleRowUp2_Linear_Any_SSSE3,
ScaleRowUp2_Linear_SSSE3,
ScaleRowUp2_Linear_C,
@ -656,7 +692,7 @@ SUH2LANY(ScaleRowUp2_Linear_Any_SSSE3,
uint8_t)
#endif
#ifdef HAS_SCALEROWUP2LINEAR_12_SSSE3
#ifdef HAS_SCALEROWUP2_LINEAR_12_SSSE3
SUH2LANY(ScaleRowUp2_Linear_12_Any_SSSE3,
ScaleRowUp2_Linear_12_SSSE3,
ScaleRowUp2_Linear_16_C,
@ -664,7 +700,7 @@ SUH2LANY(ScaleRowUp2_Linear_12_Any_SSSE3,
uint16_t)
#endif
#ifdef HAS_SCALEROWUP2LINEAR_16_SSE2
#ifdef HAS_SCALEROWUP2_LINEAR_16_SSE2
SUH2LANY(ScaleRowUp2_Linear_16_Any_SSE2,
ScaleRowUp2_Linear_16_SSE2,
ScaleRowUp2_Linear_16_C,
@ -672,7 +708,7 @@ SUH2LANY(ScaleRowUp2_Linear_16_Any_SSE2,
uint16_t)
#endif
#ifdef HAS_SCALEROWUP2LINEAR_AVX2
#ifdef HAS_SCALEROWUP2_LINEAR_AVX2
SUH2LANY(ScaleRowUp2_Linear_Any_AVX2,
ScaleRowUp2_Linear_AVX2,
ScaleRowUp2_Linear_C,
@ -680,7 +716,7 @@ SUH2LANY(ScaleRowUp2_Linear_Any_AVX2,
uint8_t)
#endif
#ifdef HAS_SCALEROWUP2LINEAR_12_AVX2
#ifdef HAS_SCALEROWUP2_LINEAR_12_AVX2
SUH2LANY(ScaleRowUp2_Linear_12_Any_AVX2,
ScaleRowUp2_Linear_12_AVX2,
ScaleRowUp2_Linear_16_C,
@ -688,7 +724,7 @@ SUH2LANY(ScaleRowUp2_Linear_12_Any_AVX2,
uint16_t)
#endif
#ifdef HAS_SCALEROWUP2LINEAR_16_AVX2
#ifdef HAS_SCALEROWUP2_LINEAR_16_AVX2
SUH2LANY(ScaleRowUp2_Linear_16_Any_AVX2,
ScaleRowUp2_Linear_16_AVX2,
ScaleRowUp2_Linear_16_C,
@ -696,7 +732,7 @@ SUH2LANY(ScaleRowUp2_Linear_16_Any_AVX2,
uint16_t)
#endif
#ifdef HAS_SCALEROWUP2LINEAR_NEON
#ifdef HAS_SCALEROWUP2_LINEAR_NEON
SUH2LANY(ScaleRowUp2_Linear_Any_NEON,
ScaleRowUp2_Linear_NEON,
ScaleRowUp2_Linear_C,
@ -704,7 +740,7 @@ SUH2LANY(ScaleRowUp2_Linear_Any_NEON,
uint8_t)
#endif
#ifdef HAS_SCALEROWUP2LINEAR_12_NEON
#ifdef HAS_SCALEROWUP2_LINEAR_12_NEON
SUH2LANY(ScaleRowUp2_Linear_12_Any_NEON,
ScaleRowUp2_Linear_12_NEON,
ScaleRowUp2_Linear_16_C,
@ -712,7 +748,7 @@ SUH2LANY(ScaleRowUp2_Linear_12_Any_NEON,
uint16_t)
#endif
#ifdef HAS_SCALEROWUP2LINEAR_16_NEON
#ifdef HAS_SCALEROWUP2_LINEAR_16_NEON
SUH2LANY(ScaleRowUp2_Linear_16_Any_NEON,
ScaleRowUp2_Linear_16_NEON,
ScaleRowUp2_Linear_16_C,
@ -760,7 +796,7 @@ SU2BLANY(ScaleRowUp2_Bilinear_16_Any_C,
0,
uint16_t)
#ifdef HAS_SCALEROWUP2BILINEAR_SSE2
#ifdef HAS_SCALEROWUP2_BILINEAR_SSE2
SU2BLANY(ScaleRowUp2_Bilinear_Any_SSE2,
ScaleRowUp2_Bilinear_SSE2,
ScaleRowUp2_Bilinear_C,
@ -768,7 +804,7 @@ SU2BLANY(ScaleRowUp2_Bilinear_Any_SSE2,
uint8_t)
#endif
#ifdef HAS_SCALEROWUP2BILINEAR_12_SSSE3
#ifdef HAS_SCALEROWUP2_BILINEAR_12_SSSE3
SU2BLANY(ScaleRowUp2_Bilinear_12_Any_SSSE3,
ScaleRowUp2_Bilinear_12_SSSE3,
ScaleRowUp2_Bilinear_16_C,
@ -776,15 +812,15 @@ SU2BLANY(ScaleRowUp2_Bilinear_12_Any_SSSE3,
uint16_t)
#endif
#ifdef HAS_SCALEROWUP2BILINEAR_16_SSE2
SU2BLANY(ScaleRowUp2_Bilinear_16_Any_SSSE3,
#ifdef HAS_SCALEROWUP2_BILINEAR_16_SSE2
SU2BLANY(ScaleRowUp2_Bilinear_16_Any_SSE2,
ScaleRowUp2_Bilinear_16_SSE2,
ScaleRowUp2_Bilinear_16_C,
7,
uint16_t)
#endif
#ifdef HAS_SCALEROWUP2BILINEAR_SSSE3
#ifdef HAS_SCALEROWUP2_BILINEAR_SSSE3
SU2BLANY(ScaleRowUp2_Bilinear_Any_SSSE3,
ScaleRowUp2_Bilinear_SSSE3,
ScaleRowUp2_Bilinear_C,
@ -792,7 +828,7 @@ SU2BLANY(ScaleRowUp2_Bilinear_Any_SSSE3,
uint8_t)
#endif
#ifdef HAS_SCALEROWUP2BILINEAR_AVX2
#ifdef HAS_SCALEROWUP2_BILINEAR_AVX2
SU2BLANY(ScaleRowUp2_Bilinear_Any_AVX2,
ScaleRowUp2_Bilinear_AVX2,
ScaleRowUp2_Bilinear_C,
@ -800,7 +836,7 @@ SU2BLANY(ScaleRowUp2_Bilinear_Any_AVX2,
uint8_t)
#endif
#ifdef HAS_SCALEROWUP2BILINEAR_12_AVX2
#ifdef HAS_SCALEROWUP2_BILINEAR_12_AVX2
SU2BLANY(ScaleRowUp2_Bilinear_12_Any_AVX2,
ScaleRowUp2_Bilinear_12_AVX2,
ScaleRowUp2_Bilinear_16_C,
@ -808,7 +844,7 @@ SU2BLANY(ScaleRowUp2_Bilinear_12_Any_AVX2,
uint16_t)
#endif
#ifdef HAS_SCALEROWUP2BILINEAR_16_AVX2
#ifdef HAS_SCALEROWUP2_BILINEAR_16_AVX2
SU2BLANY(ScaleRowUp2_Bilinear_16_Any_AVX2,
ScaleRowUp2_Bilinear_16_AVX2,
ScaleRowUp2_Bilinear_16_C,
@ -816,7 +852,7 @@ SU2BLANY(ScaleRowUp2_Bilinear_16_Any_AVX2,
uint16_t)
#endif
#ifdef HAS_SCALEROWUP2BILINEAR_NEON
#ifdef HAS_SCALEROWUP2_BILINEAR_NEON
SU2BLANY(ScaleRowUp2_Bilinear_Any_NEON,
ScaleRowUp2_Bilinear_NEON,
ScaleRowUp2_Bilinear_C,
@ -824,7 +860,7 @@ SU2BLANY(ScaleRowUp2_Bilinear_Any_NEON,
uint8_t)
#endif
#ifdef HAS_SCALEROWUP2BILINEAR_12_NEON
#ifdef HAS_SCALEROWUP2_BILINEAR_12_NEON
SU2BLANY(ScaleRowUp2_Bilinear_12_Any_NEON,
ScaleRowUp2_Bilinear_12_NEON,
ScaleRowUp2_Bilinear_16_C,
@ -832,7 +868,7 @@ SU2BLANY(ScaleRowUp2_Bilinear_12_Any_NEON,
uint16_t)
#endif
#ifdef HAS_SCALEROWUP2BILINEAR_16_NEON
#ifdef HAS_SCALEROWUP2_BILINEAR_16_NEON
SU2BLANY(ScaleRowUp2_Bilinear_16_Any_NEON,
ScaleRowUp2_Bilinear_16_NEON,
ScaleRowUp2_Bilinear_16_C,
@ -872,7 +908,7 @@ SBUH2LANY(ScaleUVRowUp2_Linear_16_Any_C,
0,
uint16_t)
#ifdef HAS_SCALEUVROWUP2LINEAR_SSSE3
#ifdef HAS_SCALEUVROWUP2_LINEAR_SSSE3
SBUH2LANY(ScaleUVRowUp2_Linear_Any_SSSE3,
ScaleUVRowUp2_Linear_SSSE3,
ScaleUVRowUp2_Linear_C,
@ -880,7 +916,7 @@ SBUH2LANY(ScaleUVRowUp2_Linear_Any_SSSE3,
uint8_t)
#endif
#ifdef HAS_SCALEUVROWUP2LINEAR_AVX2
#ifdef HAS_SCALEUVROWUP2_LINEAR_AVX2
SBUH2LANY(ScaleUVRowUp2_Linear_Any_AVX2,
ScaleUVRowUp2_Linear_AVX2,
ScaleUVRowUp2_Linear_C,
@ -888,15 +924,15 @@ SBUH2LANY(ScaleUVRowUp2_Linear_Any_AVX2,
uint8_t)
#endif
#ifdef HAS_SCALEUVROWUP2LINEAR_16_SSE2
SBUH2LANY(ScaleUVRowUp2_Linear_16_Any_SSE2,
ScaleUVRowUp2_Linear_16_SSE2,
#ifdef HAS_SCALEUVROWUP2_LINEAR_16_SSE41
SBUH2LANY(ScaleUVRowUp2_Linear_16_Any_SSE41,
ScaleUVRowUp2_Linear_16_SSE41,
ScaleUVRowUp2_Linear_16_C,
3,
uint16_t)
#endif
#ifdef HAS_SCALEUVROWUP2LINEAR_16_AVX2
#ifdef HAS_SCALEUVROWUP2_LINEAR_16_AVX2
SBUH2LANY(ScaleUVRowUp2_Linear_16_Any_AVX2,
ScaleUVRowUp2_Linear_16_AVX2,
ScaleUVRowUp2_Linear_16_C,
@ -904,7 +940,7 @@ SBUH2LANY(ScaleUVRowUp2_Linear_16_Any_AVX2,
uint16_t)
#endif
#ifdef HAS_SCALEUVROWUP2LINEAR_NEON
#ifdef HAS_SCALEUVROWUP2_LINEAR_NEON
SBUH2LANY(ScaleUVRowUp2_Linear_Any_NEON,
ScaleUVRowUp2_Linear_NEON,
ScaleUVRowUp2_Linear_C,
@ -912,7 +948,7 @@ SBUH2LANY(ScaleUVRowUp2_Linear_Any_NEON,
uint8_t)
#endif
#ifdef HAS_SCALEUVROWUP2LINEAR_16_NEON
#ifdef HAS_SCALEUVROWUP2_LINEAR_16_NEON
SBUH2LANY(ScaleUVRowUp2_Linear_16_Any_NEON,
ScaleUVRowUp2_Linear_16_NEON,
ScaleUVRowUp2_Linear_16_C,
@ -970,7 +1006,7 @@ SBU2BLANY(ScaleUVRowUp2_Bilinear_16_Any_C,
0,
uint16_t)
#ifdef HAS_SCALEUVROWUP2BILINEAR_SSSE3
#ifdef HAS_SCALEUVROWUP2_BILINEAR_SSSE3
SBU2BLANY(ScaleUVRowUp2_Bilinear_Any_SSSE3,
ScaleUVRowUp2_Bilinear_SSSE3,
ScaleUVRowUp2_Bilinear_C,
@ -978,7 +1014,7 @@ SBU2BLANY(ScaleUVRowUp2_Bilinear_Any_SSSE3,
uint8_t)
#endif
#ifdef HAS_SCALEUVROWUP2BILINEAR_AVX2
#ifdef HAS_SCALEUVROWUP2_BILINEAR_AVX2
SBU2BLANY(ScaleUVRowUp2_Bilinear_Any_AVX2,
ScaleUVRowUp2_Bilinear_AVX2,
ScaleUVRowUp2_Bilinear_C,
@ -986,15 +1022,15 @@ SBU2BLANY(ScaleUVRowUp2_Bilinear_Any_AVX2,
uint8_t)
#endif
#ifdef HAS_SCALEUVROWUP2BILINEAR_16_SSE2
SBU2BLANY(ScaleUVRowUp2_Bilinear_16_Any_SSE2,
ScaleUVRowUp2_Bilinear_16_SSE2,
#ifdef HAS_SCALEUVROWUP2_BILINEAR_16_SSE41
SBU2BLANY(ScaleUVRowUp2_Bilinear_16_Any_SSE41,
ScaleUVRowUp2_Bilinear_16_SSE41,
ScaleUVRowUp2_Bilinear_16_C,
7,
uint16_t)
#endif
#ifdef HAS_SCALEUVROWUP2BILINEAR_16_AVX2
#ifdef HAS_SCALEUVROWUP2_BILINEAR_16_AVX2
SBU2BLANY(ScaleUVRowUp2_Bilinear_16_Any_AVX2,
ScaleUVRowUp2_Bilinear_16_AVX2,
ScaleUVRowUp2_Bilinear_16_C,
@ -1002,7 +1038,7 @@ SBU2BLANY(ScaleUVRowUp2_Bilinear_16_Any_AVX2,
uint16_t)
#endif
#ifdef HAS_SCALEUVROWUP2BILINEAR_NEON
#ifdef HAS_SCALEUVROWUP2_BILINEAR_NEON
SBU2BLANY(ScaleUVRowUp2_Bilinear_Any_NEON,
ScaleUVRowUp2_Bilinear_NEON,
ScaleUVRowUp2_Bilinear_C,
@ -1010,7 +1046,7 @@ SBU2BLANY(ScaleUVRowUp2_Bilinear_Any_NEON,
uint8_t)
#endif
#ifdef HAS_SCALEUVROWUP2BILINEAR_16_NEON
#ifdef HAS_SCALEUVROWUP2_BILINEAR_16_NEON
SBU2BLANY(ScaleUVRowUp2_Bilinear_16_Any_NEON,
ScaleUVRowUp2_Bilinear_16_NEON,
ScaleUVRowUp2_Bilinear_16_C,

View File

@ -58,9 +58,9 @@ static void ScaleARGBDown2(int src_width,
assert((dy & 0x1ffff) == 0); // Test vertical scale is multiple of 2.
// Advance to odd row, even column.
if (filtering == kFilterBilinear) {
src_argb += (y >> 16) * src_stride + (x >> 16) * 4;
src_argb += (y >> 16) * (int64_t)src_stride + (x >> 16) * 4;
} else {
src_argb += (y >> 16) * src_stride + ((x >> 16) - 1) * 4;
src_argb += (y >> 16) * (int64_t)src_stride + ((x >> 16) - 1) * 4;
}
#if defined(HAS_SCALEARGBROWDOWN2_SSE2)
@ -95,22 +95,6 @@ static void ScaleARGBDown2(int src_width,
}
}
#endif
#if defined(HAS_SCALEARGBROWDOWN2_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
ScaleARGBRowDown2 =
filtering == kFilterNone
? ScaleARGBRowDown2_Any_MMI
: (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_Any_MMI
: ScaleARGBRowDown2Box_Any_MMI);
if (IS_ALIGNED(dst_width, 2)) {
ScaleARGBRowDown2 =
filtering == kFilterNone
? ScaleARGBRowDown2_MMI
: (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_MMI
: ScaleARGBRowDown2Box_MMI);
}
}
#endif
#if defined(HAS_SCALEARGBROWDOWN2_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
ScaleARGBRowDown2 =
@ -127,6 +111,22 @@ static void ScaleARGBDown2(int src_width,
}
}
#endif
#if defined(HAS_SCALEARGBROWDOWN2_LSX)
if (TestCpuFlag(kCpuHasLSX)) {
ScaleARGBRowDown2 =
filtering == kFilterNone
? ScaleARGBRowDown2_Any_LSX
: (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_Any_LSX
: ScaleARGBRowDown2Box_Any_LSX);
if (IS_ALIGNED(dst_width, 4)) {
ScaleARGBRowDown2 =
filtering == kFilterNone
? ScaleARGBRowDown2_LSX
: (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_LSX
: ScaleARGBRowDown2Box_LSX);
}
}
#endif
if (filtering == kFilterLinear) {
src_stride = 0;
@ -155,14 +155,14 @@ static void ScaleARGBDown4Box(int src_width,
int dy) {
int j;
// Allocate 2 rows of ARGB.
const int kRowSize = (dst_width * 2 * 4 + 31) & ~31;
align_buffer_64(row, kRowSize * 2);
const int row_size = (dst_width * 2 * 4 + 31) & ~31;
align_buffer_64(row, row_size * 2);
int row_stride = src_stride * (dy >> 16);
void (*ScaleARGBRowDown2)(const uint8_t* src_argb, ptrdiff_t src_stride,
uint8_t* dst_argb, int dst_width) =
ScaleARGBRowDown2Box_C;
// Advance to odd row, even column.
src_argb += (y >> 16) * src_stride + (x >> 16) * 4;
src_argb += (y >> 16) * (int64_t)src_stride + (x >> 16) * 4;
(void)src_width;
(void)src_height;
(void)dx;
@ -187,9 +187,9 @@ static void ScaleARGBDown4Box(int src_width,
for (j = 0; j < dst_height; ++j) {
ScaleARGBRowDown2(src_argb, src_stride, row, dst_width * 2);
ScaleARGBRowDown2(src_argb + src_stride * 2, src_stride, row + kRowSize,
ScaleARGBRowDown2(src_argb + src_stride * 2, src_stride, row + row_size,
dst_width * 2);
ScaleARGBRowDown2(row, kRowSize, dst_argb, dst_width);
ScaleARGBRowDown2(row, row_size, dst_argb, dst_width);
src_argb += row_stride;
dst_argb += dst_stride;
}
@ -214,7 +214,7 @@ static void ScaleARGBDownEven(int src_width,
enum FilterMode filtering) {
int j;
int col_step = dx >> 16;
int row_stride = (dy >> 16) * src_stride;
int row_stride = (dy >> 16) * (int64_t)src_stride;
void (*ScaleARGBRowDownEven)(const uint8_t* src_argb, ptrdiff_t src_stride,
int src_step, uint8_t* dst_argb, int dst_width) =
filtering ? ScaleARGBRowDownEvenBox_C : ScaleARGBRowDownEven_C;
@ -222,7 +222,7 @@ static void ScaleARGBDownEven(int src_width,
(void)src_height;
assert(IS_ALIGNED(src_width, 2));
assert(IS_ALIGNED(src_height, 2));
src_argb += (y >> 16) * src_stride + (x >> 16) * 4;
src_argb += (y >> 16) * (int64_t)src_stride + (x >> 16) * 4;
#if defined(HAS_SCALEARGBROWDOWNEVEN_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) {
ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_Any_SSE2
@ -243,16 +243,6 @@ static void ScaleARGBDownEven(int src_width,
}
}
#endif
#if defined(HAS_SCALEARGBROWDOWNEVEN_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_Any_MMI
: ScaleARGBRowDownEven_Any_MMI;
if (IS_ALIGNED(dst_width, 2)) {
ScaleARGBRowDownEven =
filtering ? ScaleARGBRowDownEvenBox_MMI : ScaleARGBRowDownEven_MMI;
}
}
#endif
#if defined(HAS_SCALEARGBROWDOWNEVEN_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_Any_MSA
@ -263,6 +253,16 @@ static void ScaleARGBDownEven(int src_width,
}
}
#endif
#if defined(HAS_SCALEARGBROWDOWNEVEN_LSX)
if (TestCpuFlag(kCpuHasLSX)) {
ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_Any_LSX
: ScaleARGBRowDownEven_Any_LSX;
if (IS_ALIGNED(dst_width, 4)) {
ScaleARGBRowDownEven =
filtering ? ScaleARGBRowDownEvenBox_LSX : ScaleARGBRowDownEven_LSX;
}
}
#endif
if (filtering == kFilterLinear) {
src_stride = 0;
@ -340,6 +340,14 @@ static void ScaleARGBBilinearDown(int src_width,
}
}
#endif
#if defined(HAS_INTERPOLATEROW_LSX)
if (TestCpuFlag(kCpuHasLSX)) {
InterpolateRow = InterpolateRow_Any_LSX;
if (IS_ALIGNED(clip_src_width, 32)) {
InterpolateRow = InterpolateRow_LSX;
}
}
#endif
#if defined(HAS_SCALEARGBFILTERCOLS_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
ScaleARGBFilterCols = ScaleARGBFilterCols_SSSE3;
@ -360,6 +368,14 @@ static void ScaleARGBBilinearDown(int src_width,
ScaleARGBFilterCols = ScaleARGBFilterCols_MSA;
}
}
#endif
#if defined(HAS_SCALEARGBFILTERCOLS_LSX)
if (TestCpuFlag(kCpuHasLSX)) {
ScaleARGBFilterCols = ScaleARGBFilterCols_Any_LSX;
if (IS_ALIGNED(dst_width, 8)) {
ScaleARGBFilterCols = ScaleARGBFilterCols_LSX;
}
}
#endif
// TODO(fbarchard): Consider not allocating row buffer for kFilterLinear.
// Allocate a row of ARGB.
@ -372,7 +388,7 @@ static void ScaleARGBBilinearDown(int src_width,
}
for (j = 0; j < dst_height; ++j) {
int yi = y >> 16;
const uint8_t* src = src_argb + yi * src_stride;
const uint8_t* src = src_argb + yi * (int64_t)src_stride;
if (filtering == kFilterLinear) {
ScaleARGBFilterCols(dst_argb, src, dst_width, x, dx);
} else {
@ -436,14 +452,6 @@ static void ScaleARGBBilinearUp(int src_width,
}
}
#endif
#if defined(HAS_INTERPOLATEROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
InterpolateRow = InterpolateRow_Any_MMI;
if (IS_ALIGNED(dst_width, 2)) {
InterpolateRow = InterpolateRow_MMI;
}
}
#endif
#if defined(HAS_INTERPOLATEROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
InterpolateRow = InterpolateRow_Any_MSA;
@ -451,6 +459,14 @@ static void ScaleARGBBilinearUp(int src_width,
InterpolateRow = InterpolateRow_MSA;
}
}
#endif
#if defined(HAS_INTERPOLATEROW_LSX)
if (TestCpuFlag(kCpuHasLSX)) {
InterpolateRow = InterpolateRow_Any_LSX;
if (IS_ALIGNED(dst_width, 8)) {
InterpolateRow = InterpolateRow_LSX;
}
}
#endif
if (src_width >= 32768) {
ScaleARGBFilterCols =
@ -477,6 +493,14 @@ static void ScaleARGBBilinearUp(int src_width,
}
}
#endif
#if defined(HAS_SCALEARGBFILTERCOLS_LSX)
if (filtering && TestCpuFlag(kCpuHasLSX)) {
ScaleARGBFilterCols = ScaleARGBFilterCols_Any_LSX;
if (IS_ALIGNED(dst_width, 8)) {
ScaleARGBFilterCols = ScaleARGBFilterCols_LSX;
}
}
#endif
#if defined(HAS_SCALEARGBCOLS_SSE2)
if (!filtering && TestCpuFlag(kCpuHasSSE2) && src_width < 32768) {
ScaleARGBFilterCols = ScaleARGBCols_SSE2;
@ -490,14 +514,6 @@ static void ScaleARGBBilinearUp(int src_width,
}
}
#endif
#if defined(HAS_SCALEARGBCOLS_MMI)
if (!filtering && TestCpuFlag(kCpuHasMMI)) {
ScaleARGBFilterCols = ScaleARGBCols_Any_MMI;
if (IS_ALIGNED(dst_width, 1)) {
ScaleARGBFilterCols = ScaleARGBCols_MMI;
}
}
#endif
#if defined(HAS_SCALEARGBCOLS_MSA)
if (!filtering && TestCpuFlag(kCpuHasMSA)) {
ScaleARGBFilterCols = ScaleARGBCols_Any_MSA;
@ -505,6 +521,14 @@ static void ScaleARGBBilinearUp(int src_width,
ScaleARGBFilterCols = ScaleARGBCols_MSA;
}
}
#endif
#if defined(HAS_SCALEARGBCOLS_LSX)
if (!filtering && TestCpuFlag(kCpuHasLSX)) {
ScaleARGBFilterCols = ScaleARGBCols_Any_LSX;
if (IS_ALIGNED(dst_width, 4)) {
ScaleARGBFilterCols = ScaleARGBCols_LSX;
}
}
#endif
if (!filtering && src_width * 2 == dst_width && x < 0x8000) {
ScaleARGBFilterCols = ScaleARGBColsUp2_C;
@ -512,11 +536,6 @@ static void ScaleARGBBilinearUp(int src_width,
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
ScaleARGBFilterCols = ScaleARGBColsUp2_SSE2;
}
#endif
#if defined(HAS_SCALEARGBCOLSUP2_MMI)
if (TestCpuFlag(kCpuHasMMI) && IS_ALIGNED(dst_width, 4)) {
ScaleARGBFilterCols = ScaleARGBColsUp2_MMI;
}
#endif
}
@ -526,14 +545,14 @@ static void ScaleARGBBilinearUp(int src_width,
{
int yi = y >> 16;
const uint8_t* src = src_argb + yi * src_stride;
const uint8_t* src = src_argb + yi * (int64_t)src_stride;
// Allocate 2 rows of ARGB.
const int kRowSize = (dst_width * 4 + 31) & ~31;
align_buffer_64(row, kRowSize * 2);
const int row_size = (dst_width * 4 + 31) & ~31;
align_buffer_64(row, row_size * 2);
uint8_t* rowptr = row;
int rowstride = kRowSize;
int rowstride = row_size;
int lasty = yi;
ScaleARGBFilterCols(rowptr, src, dst_width, x, dx);
@ -541,7 +560,9 @@ static void ScaleARGBBilinearUp(int src_width,
src += src_stride;
}
ScaleARGBFilterCols(rowptr + rowstride, src, dst_width, x, dx);
src += src_stride;
if (src_height > 2) {
src += src_stride;
}
for (j = 0; j < dst_height; ++j) {
yi = y >> 16;
@ -549,14 +570,16 @@ static void ScaleARGBBilinearUp(int src_width,
if (y > max_y) {
y = max_y;
yi = y >> 16;
src = src_argb + yi * src_stride;
src = src_argb + yi * (int64_t)src_stride;
}
if (yi != lasty) {
ScaleARGBFilterCols(rowptr, src, dst_width, x, dx);
rowptr += rowstride;
rowstride = -rowstride;
lasty = yi;
src += src_stride;
if ((y + 65536) < max_y) {
src += src_stride;
}
}
}
if (filtering == kFilterLinear) {
@ -611,6 +634,15 @@ static void ScaleYUVToARGBBilinearUp(int src_width,
}
}
#endif
#if defined(HAS_I422TOARGBROW_AVX512BW)
if (TestCpuFlag(kCpuHasAVX512BW | kCpuHasAVX512VL) ==
(kCpuHasAVX512BW | kCpuHasAVX512VL)) {
I422ToARGBRow = I422ToARGBRow_Any_AVX512BW;
if (IS_ALIGNED(src_width, 32)) {
I422ToARGBRow = I422ToARGBRow_AVX512BW;
}
}
#endif
#if defined(HAS_I422TOARGBROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
I422ToARGBRow = I422ToARGBRow_Any_NEON;
@ -619,14 +651,6 @@ static void ScaleYUVToARGBBilinearUp(int src_width,
}
}
#endif
#if defined(HAS_I422TOARGBROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
I422ToARGBRow = I422ToARGBRow_Any_MMI;
if (IS_ALIGNED(src_width, 4)) {
I422ToARGBRow = I422ToARGBRow_MMI;
}
}
#endif
#if defined(HAS_I422TOARGBROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
I422ToARGBRow = I422ToARGBRow_Any_MSA;
@ -635,6 +659,14 @@ static void ScaleYUVToARGBBilinearUp(int src_width,
}
}
#endif
#if defined(HAS_I422TOARGBROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
I422ToARGBRow = I422ToARGBRow_Any_LASX;
if (IS_ALIGNED(src_width, 32)) {
I422ToARGBRow = I422ToARGBRow_LASX;
}
}
#endif
void (*InterpolateRow)(uint8_t * dst_argb, const uint8_t* src_argb,
ptrdiff_t src_stride, int dst_width,
@ -671,6 +703,14 @@ static void ScaleYUVToARGBBilinearUp(int src_width,
}
}
#endif
#if defined(HAS_INTERPOLATEROW_LSX)
if (TestCpuFlag(kCpuHasLSX)) {
InterpolateRow = InterpolateRow_Any_LSX;
if (IS_ALIGNED(dst_width, 8)) {
InterpolateRow = InterpolateRow_LSX;
}
}
#endif
void (*ScaleARGBFilterCols)(uint8_t * dst_argb, const uint8_t* src_argb,
int dst_width, int x, int dx) =
@ -700,6 +740,14 @@ static void ScaleYUVToARGBBilinearUp(int src_width,
}
}
#endif
#if defined(HAS_SCALEARGBFILTERCOLS_LSX)
if (filtering && TestCpuFlag(kCpuHasLSX)) {
ScaleARGBFilterCols = ScaleARGBFilterCols_Any_LSX;
if (IS_ALIGNED(dst_width, 8)) {
ScaleARGBFilterCols = ScaleARGBFilterCols_LSX;
}
}
#endif
#if defined(HAS_SCALEARGBCOLS_SSE2)
if (!filtering && TestCpuFlag(kCpuHasSSE2) && src_width < 32768) {
ScaleARGBFilterCols = ScaleARGBCols_SSE2;
@ -713,14 +761,6 @@ static void ScaleYUVToARGBBilinearUp(int src_width,
}
}
#endif
#if defined(HAS_SCALEARGBCOLS_MMI)
if (!filtering && TestCpuFlag(kCpuHasMMI)) {
ScaleARGBFilterCols = ScaleARGBCols_Any_MMI;
if (IS_ALIGNED(dst_width, 1)) {
ScaleARGBFilterCols = ScaleARGBCols_MMI;
}
}
#endif
#if defined(HAS_SCALEARGBCOLS_MSA)
if (!filtering && TestCpuFlag(kCpuHasMSA)) {
ScaleARGBFilterCols = ScaleARGBCols_Any_MSA;
@ -728,6 +768,14 @@ static void ScaleYUVToARGBBilinearUp(int src_width,
ScaleARGBFilterCols = ScaleARGBCols_MSA;
}
}
#endif
#if defined(HAS_SCALEARGBCOLS_LSX)
if (!filtering && TestCpuFlag(kCpuHasLSX)) {
ScaleARGBFilterCols = ScaleARGBCols_Any_LSX;
if (IS_ALIGNED(dst_width, 4)) {
ScaleARGBFilterCols = ScaleARGBCols_LSX;
}
}
#endif
if (!filtering && src_width * 2 == dst_width && x < 0x8000) {
ScaleARGBFilterCols = ScaleARGBColsUp2_C;
@ -735,11 +783,6 @@ static void ScaleYUVToARGBBilinearUp(int src_width,
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
ScaleARGBFilterCols = ScaleARGBColsUp2_SSE2;
}
#endif
#if defined(HAS_SCALEARGBCOLSUP2_MMI)
if (TestCpuFlag(kCpuHasMMI) && IS_ALIGNED(dst_width, 4)) {
ScaleARGBFilterCols = ScaleARGBColsUp2_MMI;
}
#endif
}
@ -750,19 +793,19 @@ static void ScaleYUVToARGBBilinearUp(int src_width,
const int kYShift = 1; // Shift Y by 1 to convert Y plane to UV coordinate.
int yi = y >> 16;
int uv_yi = yi >> kYShift;
const uint8_t* src_row_y = src_y + yi * src_stride_y;
const uint8_t* src_row_u = src_u + uv_yi * src_stride_u;
const uint8_t* src_row_v = src_v + uv_yi * src_stride_v;
const uint8_t* src_row_y = src_y + yi * (int64_t)src_stride_y;
const uint8_t* src_row_u = src_u + uv_yi * (int64_t)src_stride_u;
const uint8_t* src_row_v = src_v + uv_yi * (int64_t)src_stride_v;
// Allocate 2 rows of ARGB.
const int kRowSize = (dst_width * 4 + 31) & ~31;
align_buffer_64(row, kRowSize * 2);
const int row_size = (dst_width * 4 + 31) & ~31;
align_buffer_64(row, row_size * 2);
// Allocate 1 row of ARGB for source conversion.
align_buffer_64(argb_row, src_width * 4);
uint8_t* rowptr = row;
int rowstride = kRowSize;
int rowstride = row_size;
int lasty = yi;
// TODO(fbarchard): Convert first 2 rows of YUV to ARGB.
@ -790,9 +833,9 @@ static void ScaleYUVToARGBBilinearUp(int src_width,
y = max_y;
yi = y >> 16;
uv_yi = yi >> kYShift;
src_row_y = src_y + yi * src_stride_y;
src_row_u = src_u + uv_yi * src_stride_u;
src_row_v = src_v + uv_yi * src_stride_v;
src_row_y = src_y + yi * (int64_t)src_stride_y;
src_row_u = src_u + uv_yi * (int64_t)src_stride_u;
src_row_v = src_v + uv_yi * (int64_t)src_stride_v;
}
if (yi != lasty) {
// TODO(fbarchard): Convert the clipped region of row.
@ -857,14 +900,6 @@ static void ScaleARGBSimple(int src_width,
}
}
#endif
#if defined(HAS_SCALEARGBCOLS_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
ScaleARGBCols = ScaleARGBCols_Any_MMI;
if (IS_ALIGNED(dst_width, 1)) {
ScaleARGBCols = ScaleARGBCols_MMI;
}
}
#endif
#if defined(HAS_SCALEARGBCOLS_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
ScaleARGBCols = ScaleARGBCols_Any_MSA;
@ -872,6 +907,14 @@ static void ScaleARGBSimple(int src_width,
ScaleARGBCols = ScaleARGBCols_MSA;
}
}
#endif
#if defined(HAS_SCALEARGBCOLS_LSX)
if (TestCpuFlag(kCpuHasLSX)) {
ScaleARGBCols = ScaleARGBCols_Any_LSX;
if (IS_ALIGNED(dst_width, 4)) {
ScaleARGBCols = ScaleARGBCols_LSX;
}
}
#endif
if (src_width * 2 == dst_width && x < 0x8000) {
ScaleARGBCols = ScaleARGBColsUp2_C;
@ -879,17 +922,12 @@ static void ScaleARGBSimple(int src_width,
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
ScaleARGBCols = ScaleARGBColsUp2_SSE2;
}
#endif
#if defined(HAS_SCALEARGBCOLSUP2_MMI)
if (TestCpuFlag(kCpuHasMMI) && IS_ALIGNED(dst_width, 4)) {
ScaleARGBCols = ScaleARGBColsUp2_MMI;
}
#endif
}
for (j = 0; j < dst_height; ++j) {
ScaleARGBCols(dst_argb, src_argb + (y >> 16) * src_stride, dst_width, x,
dx);
ScaleARGBCols(dst_argb, src_argb + (y >> 16) * (int64_t)src_stride,
dst_width, x, dx);
dst_argb += dst_stride;
y += dy;
}
@ -924,7 +962,7 @@ static void ScaleARGB(const uint8_t* src,
// Negative src_height means invert the image.
if (src_height < 0) {
src_height = -src_height;
src = src + (src_height - 1) * src_stride;
src = src + (src_height - 1) * (int64_t)src_stride;
src_stride = -src_stride;
}
ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, &x, &y,
@ -939,7 +977,7 @@ static void ScaleARGB(const uint8_t* src,
if (clip_y) {
int64_t clipf = (int64_t)(clip_y)*dy;
y += (clipf & 0xffff);
src += (clipf >> 16) * src_stride;
src += (clipf >> 16) * (int64_t)src_stride;
dst += clip_y * dst_stride;
}
@ -973,8 +1011,8 @@ static void ScaleARGB(const uint8_t* src,
filtering = kFilterNone;
if (dx == 0x10000 && dy == 0x10000) {
// Straight copy.
ARGBCopy(src + (y >> 16) * src_stride + (x >> 16) * 4, src_stride,
dst, dst_stride, clip_width, clip_height);
ARGBCopy(src + (y >> 16) * (int64_t)src_stride + (x >> 16) * 4,
src_stride, dst, dst_stride, clip_width, clip_height);
return;
}
}
@ -983,7 +1021,7 @@ static void ScaleARGB(const uint8_t* src,
if (dx == 0x10000 && (x & 0xffff) == 0) {
// Arbitrary scale vertically, but unscaled horizontally.
ScalePlaneVertical(src_height, clip_width, clip_height, src_stride,
dst_stride, src, dst, x, y, dy, 4, filtering);
dst_stride, src, dst, x, y, dy, /*bpp=*/4, filtering);
return;
}
if (filtering && dy < 65536) {

View File

@ -766,18 +766,18 @@ void ScaleRowDown38_3_Box_16_C(const uint16_t* src_ptr,
(src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[stride + 0] +
src_ptr[stride + 1] + src_ptr[stride + 2] + src_ptr[stride * 2 + 0] +
src_ptr[stride * 2 + 1] + src_ptr[stride * 2 + 2]) *
(65536 / 9) >>
(65536u / 9u) >>
16;
dst_ptr[1] =
(src_ptr[3] + src_ptr[4] + src_ptr[5] + src_ptr[stride + 3] +
src_ptr[stride + 4] + src_ptr[stride + 5] + src_ptr[stride * 2 + 3] +
src_ptr[stride * 2 + 4] + src_ptr[stride * 2 + 5]) *
(65536 / 9) >>
(65536u / 9u) >>
16;
dst_ptr[2] =
(src_ptr[6] + src_ptr[7] + src_ptr[stride + 6] + src_ptr[stride + 7] +
src_ptr[stride * 2 + 6] + src_ptr[stride * 2 + 7]) *
(65536 / 6) >>
(65536u / 6u) >>
16;
src_ptr += 8;
dst_ptr += 3;
@ -820,15 +820,15 @@ void ScaleRowDown38_2_Box_16_C(const uint16_t* src_ptr,
for (i = 0; i < dst_width; i += 3) {
dst_ptr[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[stride + 0] +
src_ptr[stride + 1] + src_ptr[stride + 2]) *
(65536 / 6) >>
(65536u / 6u) >>
16;
dst_ptr[1] = (src_ptr[3] + src_ptr[4] + src_ptr[5] + src_ptr[stride + 3] +
src_ptr[stride + 4] + src_ptr[stride + 5]) *
(65536 / 6) >>
(65536u / 6u) >>
16;
dst_ptr[2] =
(src_ptr[6] + src_ptr[7] + src_ptr[stride + 6] + src_ptr[stride + 7]) *
(65536 / 4) >>
(65536u / 4u) >>
16;
src_ptr += 8;
dst_ptr += 3;
@ -1465,7 +1465,7 @@ void ScalePlaneVertical(int src_height,
int x,
int y,
int dy,
int bpp,
int bpp, // bytes per pixel. 4 for ARGB.
enum FilterMode filtering) {
// TODO(fbarchard): Allow higher bpp.
int dst_width_bytes = dst_width * bpp;
@ -1503,14 +1503,6 @@ void ScalePlaneVertical(int src_height,
}
}
#endif
#if defined(HAS_INTERPOLATEROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
InterpolateRow = InterpolateRow_Any_MMI;
if (IS_ALIGNED(dst_width_bytes, 8)) {
InterpolateRow = InterpolateRow_MMI;
}
}
#endif
#if defined(HAS_INTERPOLATEROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
InterpolateRow = InterpolateRow_Any_MSA;
@ -1518,6 +1510,14 @@ void ScalePlaneVertical(int src_height,
InterpolateRow = InterpolateRow_MSA;
}
}
#endif
#if defined(HAS_INTERPOLATEROW_LSX)
if (TestCpuFlag(kCpuHasLSX)) {
InterpolateRow = InterpolateRow_Any_LSX;
if (IS_ALIGNED(dst_width_bytes, 32)) {
InterpolateRow = InterpolateRow_LSX;
}
}
#endif
for (j = 0; j < dst_height; ++j) {
int yi;
@ -1533,6 +1533,7 @@ void ScalePlaneVertical(int src_height,
y += dy;
}
}
void ScalePlaneVertical_16(int src_height,
int dst_width,
int dst_height,
@ -1543,7 +1544,7 @@ void ScalePlaneVertical_16(int src_height,
int x,
int y,
int dy,
int wpp,
int wpp, /* words per pixel. normally 1 */
enum FilterMode filtering) {
// TODO(fbarchard): Allow higher wpp.
int dst_width_words = dst_width * wpp;
@ -1559,32 +1560,32 @@ void ScalePlaneVertical_16(int src_height,
src_argb += (x >> 16) * wpp;
#if defined(HAS_INTERPOLATEROW_16_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) {
InterpolateRow = InterpolateRow_Any_16_SSE2;
if (IS_ALIGNED(dst_width_bytes, 16)) {
InterpolateRow = InterpolateRow_16_Any_SSE2;
if (IS_ALIGNED(dst_width_words, 16)) {
InterpolateRow = InterpolateRow_16_SSE2;
}
}
#endif
#if defined(HAS_INTERPOLATEROW_16_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
InterpolateRow = InterpolateRow_Any_16_SSSE3;
if (IS_ALIGNED(dst_width_bytes, 16)) {
InterpolateRow = InterpolateRow_16_Any_SSSE3;
if (IS_ALIGNED(dst_width_words, 16)) {
InterpolateRow = InterpolateRow_16_SSSE3;
}
}
#endif
#if defined(HAS_INTERPOLATEROW_16_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
InterpolateRow = InterpolateRow_Any_16_AVX2;
if (IS_ALIGNED(dst_width_bytes, 32)) {
InterpolateRow = InterpolateRow_16_Any_AVX2;
if (IS_ALIGNED(dst_width_words, 32)) {
InterpolateRow = InterpolateRow_16_AVX2;
}
}
#endif
#if defined(HAS_INTERPOLATEROW_16_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
InterpolateRow = InterpolateRow_Any_16_NEON;
if (IS_ALIGNED(dst_width_bytes, 16)) {
InterpolateRow = InterpolateRow_16_Any_NEON;
if (IS_ALIGNED(dst_width_words, 8)) {
InterpolateRow = InterpolateRow_16_NEON;
}
}
@ -1604,6 +1605,70 @@ void ScalePlaneVertical_16(int src_height,
}
}
// Use scale to convert lsb formats to msb, depending how many bits there are:
// 32768 = 9 bits
// 16384 = 10 bits
// 4096 = 12 bits
// 256 = 16 bits
// TODO(fbarchard): change scale to bits
void ScalePlaneVertical_16To8(int src_height,
int dst_width,
int dst_height,
int src_stride,
int dst_stride,
const uint16_t* src_argb,
uint8_t* dst_argb,
int x,
int y,
int dy,
int wpp, /* words per pixel. normally 1 */
int scale,
enum FilterMode filtering) {
// TODO(fbarchard): Allow higher wpp.
int dst_width_words = dst_width * wpp;
// TODO(https://crbug.com/libyuv/931): Add NEON 32 bit and AVX2 versions.
void (*InterpolateRow_16To8)(uint8_t * dst_argb, const uint16_t* src_argb,
ptrdiff_t src_stride, int scale, int dst_width,
int source_y_fraction) = InterpolateRow_16To8_C;
const int max_y = (src_height > 1) ? ((src_height - 1) << 16) - 1 : 0;
int j;
assert(wpp >= 1 && wpp <= 2);
assert(src_height != 0);
assert(dst_width > 0);
assert(dst_height > 0);
src_argb += (x >> 16) * wpp;
#if defined(HAS_INTERPOLATEROW_16TO8_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
InterpolateRow_16To8 = InterpolateRow_16To8_Any_NEON;
if (IS_ALIGNED(dst_width, 8)) {
InterpolateRow_16To8 = InterpolateRow_16To8_NEON;
}
}
#endif
#if defined(HAS_INTERPOLATEROW_16TO8_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
InterpolateRow_16To8 = InterpolateRow_16To8_Any_AVX2;
if (IS_ALIGNED(dst_width, 32)) {
InterpolateRow_16To8 = InterpolateRow_16To8_AVX2;
}
}
#endif
for (j = 0; j < dst_height; ++j) {
int yi;
int yf;
if (y > max_y) {
y = max_y;
}
yi = y >> 16;
yf = filtering ? ((y >> 8) & 255) : 0;
InterpolateRow_16To8(dst_argb, src_argb + yi * src_stride, src_stride,
scale, dst_width_words, yf);
dst_argb += dst_stride;
y += dy;
}
}
// Simplify the filtering based on scale factors.
enum FilterMode ScaleFilterReduce(int src_width,
int src_height,
@ -1653,7 +1718,7 @@ int FixedDiv_C(int num, int div) {
return (int)(((int64_t)(num) << 16) / div);
}
// Divide num by div and return as 16.16 fixed point result.
// Divide num - 1 by div - 1 and return as 16.16 fixed point result.
int FixedDiv1_C(int num, int div) {
return (int)((((int64_t)(num) << 16) - 0x00010001) / (div - 1));
}
@ -1696,14 +1761,14 @@ void ScaleSlope(int src_width,
if (dst_width <= Abs(src_width)) {
*dx = FixedDiv(Abs(src_width), dst_width);
*x = CENTERSTART(*dx, -32768); // Subtract 0.5 (32768) to center filter.
} else if (dst_width > 1) {
} else if (src_width > 1 && dst_width > 1) {
*dx = FixedDiv1(Abs(src_width), dst_width);
*x = 0;
}
if (dst_height <= src_height) {
*dy = FixedDiv(src_height, dst_height);
*y = CENTERSTART(*dy, -32768); // Subtract 0.5 (32768) to center filter.
} else if (dst_height > 1) {
} else if (src_height > 1 && dst_height > 1) {
*dy = FixedDiv1(src_height, dst_height);
*y = 0;
}
@ -1712,7 +1777,7 @@ void ScaleSlope(int src_width,
if (dst_width <= Abs(src_width)) {
*dx = FixedDiv(Abs(src_width), dst_width);
*x = CENTERSTART(*dx, -32768); // Subtract 0.5 (32768) to center filter.
} else if (dst_width > 1) {
} else if (src_width > 1 && dst_width > 1) {
*dx = FixedDiv1(Abs(src_width), dst_width);
*x = 0;
}

View File

@ -779,7 +779,7 @@ static const uvec8 kLinearShuffleFar = {2, 3, 0, 1, 6, 7, 4, 5,
static const uvec8 kLinearMadd31 = {3, 1, 1, 3, 3, 1, 1, 3,
3, 1, 1, 3, 3, 1, 1, 3};
#ifdef HAS_SCALEROWUP2LINEAR_SSE2
#ifdef HAS_SCALEROWUP2_LINEAR_SSE2
void ScaleRowUp2_Linear_SSE2(const uint8_t* src_ptr,
uint8_t* dst_ptr,
int dst_width) {
@ -833,7 +833,7 @@ void ScaleRowUp2_Linear_SSE2(const uint8_t* src_ptr,
}
#endif
#ifdef HAS_SCALEROWUP2BILINEAR_SSE2
#ifdef HAS_SCALEROWUP2_BILINEAR_SSE2
void ScaleRowUp2_Bilinear_SSE2(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
@ -949,7 +949,7 @@ void ScaleRowUp2_Bilinear_SSE2(const uint8_t* src_ptr,
}
#endif
#ifdef HAS_SCALEROWUP2LINEAR_12_SSSE3
#ifdef HAS_SCALEROWUP2_LINEAR_12_SSSE3
void ScaleRowUp2_Linear_12_SSSE3(const uint16_t* src_ptr,
uint16_t* dst_ptr,
int dst_width) {
@ -999,7 +999,7 @@ void ScaleRowUp2_Linear_12_SSSE3(const uint16_t* src_ptr,
}
#endif
#ifdef HAS_SCALEROWUP2BILINEAR_12_SSSE3
#ifdef HAS_SCALEROWUP2_BILINEAR_12_SSSE3
void ScaleRowUp2_Bilinear_12_SSSE3(const uint16_t* src_ptr,
ptrdiff_t src_stride,
uint16_t* dst_ptr,
@ -1094,11 +1094,12 @@ void ScaleRowUp2_Bilinear_12_SSSE3(const uint16_t* src_ptr,
: "r"((intptr_t)(src_stride)), // %3
"r"((intptr_t)(dst_stride)), // %4
"m"(kLinearShuffleFar) // %5
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6");
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6",
"xmm7");
}
#endif
#ifdef HAS_SCALEROWUP2LINEAR_16_SSE2
#ifdef HAS_SCALEROWUP2_LINEAR_16_SSE2
void ScaleRowUp2_Linear_16_SSE2(const uint16_t* src_ptr,
uint16_t* dst_ptr,
int dst_width) {
@ -1149,7 +1150,7 @@ void ScaleRowUp2_Linear_16_SSE2(const uint16_t* src_ptr,
}
#endif
#ifdef HAS_SCALEROWUP2BILINEAR_16_SSE2
#ifdef HAS_SCALEROWUP2_BILINEAR_16_SSE2
void ScaleRowUp2_Bilinear_16_SSE2(const uint16_t* src_ptr,
ptrdiff_t src_stride,
uint16_t* dst_ptr,
@ -1242,7 +1243,7 @@ void ScaleRowUp2_Bilinear_16_SSE2(const uint16_t* src_ptr,
"pshufd $0b11011000,%%xmm4,%%xmm4 \n"
"movdqu %%xmm4,(%1) \n" // store above
"packssdw %%xmm2,%%xmm5 \n"
"pshufd $0b11011000,%%xmm4,%%xmm4 \n"
"pshufd $0b11011000,%%xmm5,%%xmm5 \n"
"movdqu %%xmm5,(%1,%4,2) \n" // store below
"lea 0x8(%0),%0 \n"
@ -1254,11 +1255,12 @@ void ScaleRowUp2_Bilinear_16_SSE2(const uint16_t* src_ptr,
"+r"(dst_width) // %2
: "r"((intptr_t)(src_stride)), // %3
"r"((intptr_t)(dst_stride)) // %4
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6");
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6",
"xmm7");
}
#endif
#ifdef HAS_SCALEROWUP2LINEAR_SSSE3
#ifdef HAS_SCALEROWUP2_LINEAR_SSSE3
void ScaleRowUp2_Linear_SSSE3(const uint8_t* src_ptr,
uint8_t* dst_ptr,
int dst_width) {
@ -1283,9 +1285,8 @@ void ScaleRowUp2_Linear_SSSE3(const uint8_t* src_ptr,
"paddw %%xmm4,%%xmm2 \n" // 3*near+far+2 (hi)
"psrlw $2,%%xmm0 \n" // 3/4*near+1/4*far (lo)
"psrlw $2,%%xmm2 \n" // 3/4*near+1/4*far (hi)
"vpackuswb %%xmm2,%%xmm0,%%xmm0 \n"
"vmovdqu %%xmm0,(%1) \n"
"packuswb %%xmm2,%%xmm0 \n"
"movdqu %%xmm0,(%1) \n"
"lea 0x8(%0),%0 \n"
"lea 0x10(%1),%1 \n" // 8 sample to 16 sample
"sub $0x10,%2 \n"
@ -1294,11 +1295,11 @@ void ScaleRowUp2_Linear_SSSE3(const uint8_t* src_ptr,
"+r"(dst_ptr), // %1
"+r"(dst_width) // %2
: "m"(kLinearMadd31) // %3
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6");
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4");
}
#endif
#ifdef HAS_SCALEROWUP2BILINEAR_SSSE3
#ifdef HAS_SCALEROWUP2_BILINEAR_SSSE3
void ScaleRowUp2_Bilinear_SSSE3(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
@ -1385,7 +1386,7 @@ void ScaleRowUp2_Bilinear_SSSE3(const uint8_t* src_ptr,
}
#endif
#ifdef HAS_SCALEROWUP2LINEAR_AVX2
#ifdef HAS_SCALEROWUP2_LINEAR_AVX2
void ScaleRowUp2_Linear_AVX2(const uint8_t* src_ptr,
uint8_t* dst_ptr,
int dst_width) {
@ -1427,7 +1428,7 @@ void ScaleRowUp2_Linear_AVX2(const uint8_t* src_ptr,
}
#endif
#ifdef HAS_SCALEROWUP2BILINEAR_AVX2
#ifdef HAS_SCALEROWUP2_BILINEAR_AVX2
void ScaleRowUp2_Bilinear_AVX2(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
@ -1511,7 +1512,7 @@ void ScaleRowUp2_Bilinear_AVX2(const uint8_t* src_ptr,
}
#endif
#ifdef HAS_SCALEROWUP2LINEAR_12_AVX2
#ifdef HAS_SCALEROWUP2_LINEAR_12_AVX2
void ScaleRowUp2_Linear_12_AVX2(const uint16_t* src_ptr,
uint16_t* dst_ptr,
int dst_width) {
@ -1561,7 +1562,7 @@ void ScaleRowUp2_Linear_12_AVX2(const uint16_t* src_ptr,
}
#endif
#ifdef HAS_SCALEROWUP2BILINEAR_12_AVX2
#ifdef HAS_SCALEROWUP2_BILINEAR_12_AVX2
void ScaleRowUp2_Bilinear_12_AVX2(const uint16_t* src_ptr,
ptrdiff_t src_stride,
uint16_t* dst_ptr,
@ -1625,7 +1626,7 @@ void ScaleRowUp2_Bilinear_12_AVX2(const uint16_t* src_ptr,
}
#endif
#ifdef HAS_SCALEROWUP2LINEAR_16_AVX2
#ifdef HAS_SCALEROWUP2_LINEAR_16_AVX2
void ScaleRowUp2_Linear_16_AVX2(const uint16_t* src_ptr,
uint16_t* dst_ptr,
int dst_width) {
@ -1673,7 +1674,7 @@ void ScaleRowUp2_Linear_16_AVX2(const uint16_t* src_ptr,
}
#endif
#ifdef HAS_SCALEROWUP2BILINEAR_16_AVX2
#ifdef HAS_SCALEROWUP2_BILINEAR_16_AVX2
void ScaleRowUp2_Bilinear_16_AVX2(const uint16_t* src_ptr,
ptrdiff_t src_stride,
uint16_t* dst_ptr,
@ -2326,13 +2327,18 @@ int FixedDiv1_X86(int num, int div) {
return num;
}
#ifdef HAS_SCALEUVROWDOWN2BOX_SSSE3
#if defined(HAS_SCALEUVROWDOWN2BOX_SSSE3) || \
defined(HAS_SCALEUVROWDOWN2BOX_AVX2)
// Shuffle table for splitting UV into upper and lower part of register.
static const uvec8 kShuffleSplitUV = {0u, 2u, 4u, 6u, 8u, 10u, 12u, 14u,
1u, 3u, 5u, 7u, 9u, 11u, 13u, 15u};
static const uvec8 kShuffleMergeUV = {0u, 8u, 2u, 10u, 4u, 12u,
6u, 14u, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80};
#endif
#ifdef HAS_SCALEUVROWDOWN2BOX_SSSE3
void ScaleUVRowDown2Box_SSSE3(const uint8_t* src_ptr,
ptrdiff_t src_stride,
@ -2418,7 +2424,7 @@ void ScaleUVRowDown2Box_AVX2(const uint8_t* src_ptr,
static const uvec8 kUVLinearMadd31 = {3, 1, 3, 1, 1, 3, 1, 3,
3, 1, 3, 1, 1, 3, 1, 3};
#ifdef HAS_SCALEUVROWUP2LINEAR_SSSE3
#ifdef HAS_SCALEUVROWUP2_LINEAR_SSSE3
void ScaleUVRowUp2_Linear_SSSE3(const uint8_t* src_ptr,
uint8_t* dst_ptr,
int dst_width) {
@ -2457,7 +2463,7 @@ void ScaleUVRowUp2_Linear_SSSE3(const uint8_t* src_ptr,
}
#endif
#ifdef HAS_SCALEUVROWUP2BILINEAR_SSSE3
#ifdef HAS_SCALEUVROWUP2_BILINEAR_SSSE3
void ScaleUVRowUp2_Bilinear_SSSE3(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
@ -2542,7 +2548,7 @@ void ScaleUVRowUp2_Bilinear_SSSE3(const uint8_t* src_ptr,
}
#endif
#ifdef HAS_SCALEUVROWUP2LINEAR_AVX2
#ifdef HAS_SCALEUVROWUP2_LINEAR_AVX2
void ScaleUVRowUp2_Linear_AVX2(const uint8_t* src_ptr,
uint8_t* dst_ptr,
@ -2584,7 +2590,7 @@ void ScaleUVRowUp2_Linear_AVX2(const uint8_t* src_ptr,
}
#endif
#ifdef HAS_SCALEUVROWUP2BILINEAR_AVX2
#ifdef HAS_SCALEUVROWUP2_BILINEAR_AVX2
void ScaleUVRowUp2_Bilinear_AVX2(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
@ -2666,10 +2672,10 @@ void ScaleUVRowUp2_Bilinear_AVX2(const uint8_t* src_ptr,
}
#endif
#ifdef HAS_SCALEUVROWUP2LINEAR_16_SSE2
void ScaleUVRowUp2_Linear_16_SSE2(const uint16_t* src_ptr,
uint16_t* dst_ptr,
int dst_width) {
#ifdef HAS_SCALEUVROWUP2_LINEAR_16_SSE41
void ScaleUVRowUp2_Linear_16_SSE41(const uint16_t* src_ptr,
uint16_t* dst_ptr,
int dst_width) {
asm volatile(
"pxor %%xmm5,%%xmm5 \n"
"pcmpeqd %%xmm4,%%xmm4 \n"
@ -2716,12 +2722,12 @@ void ScaleUVRowUp2_Linear_16_SSE2(const uint16_t* src_ptr,
}
#endif
#ifdef HAS_SCALEUVROWUP2BILINEAR_16_SSE2
void ScaleUVRowUp2_Bilinear_16_SSE2(const uint16_t* src_ptr,
ptrdiff_t src_stride,
uint16_t* dst_ptr,
ptrdiff_t dst_stride,
int dst_width) {
#ifdef HAS_SCALEUVROWUP2_BILINEAR_16_SSE41
void ScaleUVRowUp2_Bilinear_16_SSE41(const uint16_t* src_ptr,
ptrdiff_t src_stride,
uint16_t* dst_ptr,
ptrdiff_t dst_stride,
int dst_width) {
asm volatile(
"pxor %%xmm7,%%xmm7 \n"
"pcmpeqd %%xmm6,%%xmm6 \n"
@ -2809,7 +2815,7 @@ void ScaleUVRowUp2_Bilinear_16_SSE2(const uint16_t* src_ptr,
}
#endif
#ifdef HAS_SCALEUVROWUP2LINEAR_16_AVX2
#ifdef HAS_SCALEUVROWUP2_LINEAR_16_AVX2
void ScaleUVRowUp2_Linear_16_AVX2(const uint16_t* src_ptr,
uint16_t* dst_ptr,
int dst_width) {
@ -2856,7 +2862,7 @@ void ScaleUVRowUp2_Linear_16_AVX2(const uint16_t* src_ptr,
}
#endif
#ifdef HAS_SCALEUVROWUP2BILINEAR_16_AVX2
#ifdef HAS_SCALEUVROWUP2_BILINEAR_16_AVX2
void ScaleUVRowUp2_Bilinear_16_AVX2(const uint16_t* src_ptr,
ptrdiff_t src_stride,
uint16_t* dst_ptr,

View File

@ -0,0 +1,739 @@
/*
* Copyright 2022 The LibYuv Project Authors. All rights reserved.
*
* Copyright (c) 2022 Loongson Technology Corporation Limited
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include <assert.h>
#include "libyuv/scale_row.h"
#if !defined(LIBYUV_DISABLE_LSX) && defined(__loongarch_sx)
#include "libyuv/loongson_intrinsics.h"
#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
#define LOAD_DATA(_src, _in, _out) \
{ \
int _tmp1, _tmp2, _tmp3, _tmp4; \
DUP4_ARG2(__lsx_vpickve2gr_w, _in, 0, _in, 1, _in, 2, _in, 3, _tmp1, \
_tmp2, _tmp3, _tmp4); \
_out = __lsx_vinsgr2vr_w(_out, _src[_tmp1], 0); \
_out = __lsx_vinsgr2vr_w(_out, _src[_tmp2], 1); \
_out = __lsx_vinsgr2vr_w(_out, _src[_tmp3], 2); \
_out = __lsx_vinsgr2vr_w(_out, _src[_tmp4], 3); \
}
void ScaleARGBRowDown2_LSX(const uint8_t* src_argb,
ptrdiff_t src_stride,
uint8_t* dst_argb,
int dst_width) {
int x;
int len = dst_width / 4;
(void)src_stride;
__m128i src0, src1, dst0;
for (x = 0; x < len; x++) {
DUP2_ARG2(__lsx_vld, src_argb, 0, src_argb, 16, src0, src1);
dst0 = __lsx_vpickod_w(src1, src0);
__lsx_vst(dst0, dst_argb, 0);
src_argb += 32;
dst_argb += 16;
}
}
void ScaleARGBRowDown2Linear_LSX(const uint8_t* src_argb,
ptrdiff_t src_stride,
uint8_t* dst_argb,
int dst_width) {
int x;
int len = dst_width / 4;
(void)src_stride;
__m128i src0, src1, tmp0, tmp1, dst0;
for (x = 0; x < len; x++) {
DUP2_ARG2(__lsx_vld, src_argb, 0, src_argb, 16, src0, src1);
tmp0 = __lsx_vpickev_w(src1, src0);
tmp1 = __lsx_vpickod_w(src1, src0);
dst0 = __lsx_vavgr_bu(tmp1, tmp0);
__lsx_vst(dst0, dst_argb, 0);
src_argb += 32;
dst_argb += 16;
}
}
void ScaleARGBRowDown2Box_LSX(const uint8_t* src_argb,
ptrdiff_t src_stride,
uint8_t* dst_argb,
int dst_width) {
int x;
int len = dst_width / 4;
const uint8_t* s = src_argb;
const uint8_t* t = src_argb + src_stride;
__m128i src0, src1, src2, src3, tmp0, tmp1, tmp2, tmp3, dst0;
__m128i reg0, reg1, reg2, reg3;
__m128i shuff = {0x0703060205010400, 0x0F0B0E0A0D090C08};
for (x = 0; x < len; x++) {
DUP2_ARG2(__lsx_vld, s, 0, s, 16, src0, src1);
DUP2_ARG2(__lsx_vld, t, 0, t, 16, src2, src3);
DUP4_ARG3(__lsx_vshuf_b, src0, src0, shuff, src1, src1, shuff, src2, src2,
shuff, src3, src3, shuff, tmp0, tmp1, tmp2, tmp3);
DUP4_ARG2(__lsx_vhaddw_hu_bu, tmp0, tmp0, tmp1, tmp1, tmp2, tmp2, tmp3,
tmp3, reg0, reg1, reg2, reg3);
DUP2_ARG2(__lsx_vsadd_hu, reg0, reg2, reg1, reg3, reg0, reg1);
dst0 = __lsx_vsrarni_b_h(reg1, reg0, 2);
__lsx_vst(dst0, dst_argb, 0);
s += 32;
t += 32;
dst_argb += 16;
}
}
void ScaleARGBRowDownEven_LSX(const uint8_t* src_argb,
ptrdiff_t src_stride,
int32_t src_stepx,
uint8_t* dst_argb,
int dst_width) {
int x;
int len = dst_width / 4;
int32_t stepx = src_stepx << 2;
(void)src_stride;
__m128i dst0, dst1, dst2, dst3;
for (x = 0; x < len; x++) {
dst0 = __lsx_vldrepl_w(src_argb, 0);
src_argb += stepx;
dst1 = __lsx_vldrepl_w(src_argb, 0);
src_argb += stepx;
dst2 = __lsx_vldrepl_w(src_argb, 0);
src_argb += stepx;
dst3 = __lsx_vldrepl_w(src_argb, 0);
src_argb += stepx;
__lsx_vstelm_w(dst0, dst_argb, 0, 0);
__lsx_vstelm_w(dst1, dst_argb, 4, 0);
__lsx_vstelm_w(dst2, dst_argb, 8, 0);
__lsx_vstelm_w(dst3, dst_argb, 12, 0);
dst_argb += 16;
}
}
void ScaleARGBRowDownEvenBox_LSX(const uint8_t* src_argb,
ptrdiff_t src_stride,
int src_stepx,
uint8_t* dst_argb,
int dst_width) {
int x;
int len = dst_width / 4;
int32_t stepx = src_stepx * 4;
const uint8_t* next_argb = src_argb + src_stride;
__m128i src0, src1, src2, src3;
__m128i tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
__m128i reg0, reg1, dst0;
for (x = 0; x < len; x++) {
tmp0 = __lsx_vldrepl_d(src_argb, 0);
src_argb += stepx;
tmp1 = __lsx_vldrepl_d(src_argb, 0);
src_argb += stepx;
tmp2 = __lsx_vldrepl_d(src_argb, 0);
src_argb += stepx;
tmp3 = __lsx_vldrepl_d(src_argb, 0);
src_argb += stepx;
tmp4 = __lsx_vldrepl_d(next_argb, 0);
next_argb += stepx;
tmp5 = __lsx_vldrepl_d(next_argb, 0);
next_argb += stepx;
tmp6 = __lsx_vldrepl_d(next_argb, 0);
next_argb += stepx;
tmp7 = __lsx_vldrepl_d(next_argb, 0);
next_argb += stepx;
DUP4_ARG2(__lsx_vilvl_d, tmp1, tmp0, tmp3, tmp2, tmp5, tmp4, tmp7, tmp6,
src0, src1, src2, src3);
DUP2_ARG2(__lsx_vaddwev_h_bu, src0, src2, src1, src3, tmp0, tmp2);
DUP2_ARG2(__lsx_vaddwod_h_bu, src0, src2, src1, src3, tmp1, tmp3);
DUP2_ARG2(__lsx_vpackev_w, tmp1, tmp0, tmp3, tmp2, reg0, reg1);
DUP2_ARG2(__lsx_vpackod_w, tmp1, tmp0, tmp3, tmp2, tmp4, tmp5);
DUP2_ARG2(__lsx_vadd_h, reg0, tmp4, reg1, tmp5, reg0, reg1);
dst0 = __lsx_vsrarni_b_h(reg1, reg0, 2);
dst0 = __lsx_vshuf4i_b(dst0, 0xD8);
__lsx_vst(dst0, dst_argb, 0);
dst_argb += 16;
}
}
void ScaleRowDown2_LSX(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst,
int dst_width) {
int x;
int len = dst_width / 32;
__m128i src0, src1, src2, src3, dst0, dst1;
(void)src_stride;
for (x = 0; x < len; x++) {
DUP4_ARG2(__lsx_vld, src_ptr, 0, src_ptr, 16, src_ptr, 32, src_ptr, 48,
src0, src1, src2, src3);
DUP2_ARG2(__lsx_vpickod_b, src1, src0, src3, src2, dst0, dst1);
__lsx_vst(dst0, dst, 0);
__lsx_vst(dst1, dst, 16);
src_ptr += 64;
dst += 32;
}
}
void ScaleRowDown2Linear_LSX(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst,
int dst_width) {
int x;
int len = dst_width / 32;
__m128i src0, src1, src2, src3;
__m128i tmp0, tmp1, tmp2, tmp3, dst0, dst1;
(void)src_stride;
for (x = 0; x < len; x++) {
DUP4_ARG2(__lsx_vld, src_ptr, 0, src_ptr, 16, src_ptr, 32, src_ptr, 48,
src0, src1, src2, src3);
DUP2_ARG2(__lsx_vpickev_b, src1, src0, src3, src2, tmp0, tmp2);
DUP2_ARG2(__lsx_vpickod_b, src1, src0, src3, src2, tmp1, tmp3);
DUP2_ARG2(__lsx_vavgr_bu, tmp0, tmp1, tmp2, tmp3, dst0, dst1);
__lsx_vst(dst0, dst, 0);
__lsx_vst(dst1, dst, 16);
src_ptr += 64;
dst += 32;
}
}
void ScaleRowDown2Box_LSX(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst,
int dst_width) {
int x;
int len = dst_width / 32;
const uint8_t* src_nex = src_ptr + src_stride;
__m128i src0, src1, src2, src3, src4, src5, src6, src7;
__m128i tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
__m128i dst0, dst1;
for (x = 0; x < len; x++) {
DUP4_ARG2(__lsx_vld, src_ptr, 0, src_ptr, 16, src_ptr, 32, src_ptr, 48,
src0, src1, src2, src3);
DUP4_ARG2(__lsx_vld, src_nex, 0, src_nex, 16, src_nex, 32, src_nex, 48,
src4, src5, src6, src7);
DUP4_ARG2(__lsx_vaddwev_h_bu, src0, src4, src1, src5, src2, src6, src3,
src7, tmp0, tmp2, tmp4, tmp6);
DUP4_ARG2(__lsx_vaddwod_h_bu, src0, src4, src1, src5, src2, src6, src3,
src7, tmp1, tmp3, tmp5, tmp7);
DUP4_ARG2(__lsx_vadd_h, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7,
tmp0, tmp1, tmp2, tmp3);
DUP2_ARG3(__lsx_vsrarni_b_h, tmp1, tmp0, 2, tmp3, tmp2, 2, dst0, dst1);
__lsx_vst(dst0, dst, 0);
__lsx_vst(dst1, dst, 16);
src_ptr += 64;
src_nex += 64;
dst += 32;
}
}
void ScaleRowDown4_LSX(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst,
int dst_width) {
int x;
int len = dst_width / 16;
__m128i src0, src1, src2, src3, tmp0, tmp1, dst0;
(void)src_stride;
for (x = 0; x < len; x++) {
DUP4_ARG2(__lsx_vld, src_ptr, 0, src_ptr, 16, src_ptr, 32, src_ptr, 48,
src0, src1, src2, src3);
DUP2_ARG2(__lsx_vpickev_b, src1, src0, src3, src2, tmp0, tmp1);
dst0 = __lsx_vpickod_b(tmp1, tmp0);
__lsx_vst(dst0, dst, 0);
src_ptr += 64;
dst += 16;
}
}
void ScaleRowDown4Box_LSX(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst,
int dst_width) {
int x;
int len = dst_width / 16;
const uint8_t* ptr1 = src_ptr + src_stride;
const uint8_t* ptr2 = ptr1 + src_stride;
const uint8_t* ptr3 = ptr2 + src_stride;
__m128i src0, src1, src2, src3, src4, src5, src6, src7;
__m128i tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
__m128i reg0, reg1, reg2, reg3, reg4, reg5, reg6, reg7, dst0;
for (x = 0; x < len; x++) {
DUP4_ARG2(__lsx_vld, src_ptr, 0, src_ptr, 16, src_ptr, 32, src_ptr, 48,
src0, src1, src2, src3);
DUP4_ARG2(__lsx_vld, ptr1, 0, ptr1, 16, ptr1, 32, ptr1, 48, src4, src5,
src6, src7);
DUP4_ARG2(__lsx_vaddwev_h_bu, src0, src4, src1, src5, src2, src6, src3,
src7, tmp0, tmp2, tmp4, tmp6);
DUP4_ARG2(__lsx_vaddwod_h_bu, src0, src4, src1, src5, src2, src6, src3,
src7, tmp1, tmp3, tmp5, tmp7);
DUP4_ARG2(__lsx_vadd_h, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7,
reg0, reg1, reg2, reg3);
DUP4_ARG2(__lsx_vld, ptr2, 0, ptr2, 16, ptr2, 32, ptr2, 48, src0, src1,
src2, src3);
DUP4_ARG2(__lsx_vld, ptr3, 0, ptr3, 16, ptr3, 32, ptr3, 48, src4, src5,
src6, src7);
DUP4_ARG2(__lsx_vaddwev_h_bu, src0, src4, src1, src5, src2, src6, src3,
src7, tmp0, tmp2, tmp4, tmp6);
DUP4_ARG2(__lsx_vaddwod_h_bu, src0, src4, src1, src5, src2, src6, src3,
src7, tmp1, tmp3, tmp5, tmp7);
DUP4_ARG2(__lsx_vadd_h, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7,
reg4, reg5, reg6, reg7);
DUP4_ARG2(__lsx_vadd_h, reg0, reg4, reg1, reg5, reg2, reg6, reg3, reg7,
reg0, reg1, reg2, reg3);
DUP4_ARG2(__lsx_vhaddw_wu_hu, reg0, reg0, reg1, reg1, reg2, reg2, reg3,
reg3, reg0, reg1, reg2, reg3);
DUP2_ARG3(__lsx_vsrarni_h_w, reg1, reg0, 4, reg3, reg2, 4, tmp0, tmp1);
dst0 = __lsx_vpickev_b(tmp1, tmp0);
__lsx_vst(dst0, dst, 0);
src_ptr += 64;
ptr1 += 64;
ptr2 += 64;
ptr3 += 64;
dst += 16;
}
}
void ScaleRowDown38_LSX(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst,
int dst_width) {
int x, len;
__m128i src0, src1, tmp0;
__m128i shuff = {0x13100E0B08060300, 0x000000001E1B1816};
assert(dst_width % 3 == 0);
len = dst_width / 12;
(void)src_stride;
for (x = 0; x < len; x++) {
DUP2_ARG2(__lsx_vld, src_ptr, 0, src_ptr, 16, src0, src1);
tmp0 = __lsx_vshuf_b(src1, src0, shuff);
__lsx_vstelm_d(tmp0, dst, 0, 0);
__lsx_vstelm_w(tmp0, dst, 8, 2);
src_ptr += 32;
dst += 12;
}
}
void ScaleRowDown38_2_Box_LSX(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
int dst_width) {
int x, len;
const uint8_t* src_nex = src_ptr + src_stride;
__m128i src0, src1, src2, src3, dst0;
__m128i tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
__m128i reg0, reg1, reg2, reg3;
__m128i shuff = {0x0A08160604120200, 0x000000001E0E0C1A};
__m128i const_0x2AAA = __lsx_vreplgr2vr_h(0x2AAA);
__m128i const_0x4000 = __lsx_vreplgr2vr_w(0x4000);
assert((dst_width % 3 == 0) && (dst_width > 0));
len = dst_width / 12;
for (x = 0; x < len; x++) {
DUP4_ARG2(__lsx_vld, src_ptr, 0, src_ptr, 16, src_nex, 0, src_nex, 16, src0,
src1, src2, src3);
DUP2_ARG2(__lsx_vaddwev_h_bu, src0, src2, src1, src3, tmp0, tmp2);
DUP2_ARG2(__lsx_vaddwod_h_bu, src0, src2, src1, src3, tmp1, tmp3);
DUP2_ARG2(__lsx_vpickev_h, tmp2, tmp0, tmp3, tmp1, reg0, reg1);
DUP2_ARG2(__lsx_vpackod_h, tmp1, tmp0, tmp3, tmp2, reg2, reg3);
tmp4 = __lsx_vpickev_w(reg3, reg2);
tmp5 = __lsx_vadd_h(reg0, reg1);
tmp6 = __lsx_vadd_h(tmp5, tmp4);
tmp7 = __lsx_vmuh_h(tmp6, const_0x2AAA);
tmp0 = __lsx_vpickod_w(reg3, reg2);
tmp1 = __lsx_vhaddw_wu_hu(tmp0, tmp0);
tmp2 = __lsx_vmul_w(tmp1, const_0x4000);
dst0 = __lsx_vshuf_b(tmp2, tmp7, shuff);
__lsx_vstelm_d(dst0, dst_ptr, 0, 0);
__lsx_vstelm_w(dst0, dst_ptr, 8, 2);
src_ptr += 32;
src_nex += 32;
dst_ptr += 12;
}
}
void ScaleRowDown38_3_Box_LSX(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
int dst_width) {
int x, len;
const uint8_t* ptr1 = src_ptr + src_stride;
const uint8_t* ptr2 = ptr1 + src_stride;
__m128i src0, src1, src2, src3, src4, src5;
__m128i tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
__m128i reg0, reg1, reg2, reg3, dst0;
__m128i zero = __lsx_vldi(0);
__m128i shuff = {0x0A08160604120200, 0x000000001E0E0C1A};
__m128i const_0x1C71 = __lsx_vreplgr2vr_h(0x1C71);
__m128i const_0x2AAA = __lsx_vreplgr2vr_w(0x2AAA);
assert((dst_width % 3 == 0) && (dst_width > 0));
len = dst_width / 12;
for (x = 0; x < len; x++) {
DUP4_ARG2(__lsx_vld, src_ptr, 0, src_ptr, 16, ptr1, 0, ptr1, 16, src0, src1,
src2, src3);
DUP2_ARG2(__lsx_vld, ptr2, 0, ptr2, 16, src4, src5);
DUP2_ARG2(__lsx_vaddwev_h_bu, src0, src2, src1, src3, tmp0, tmp2);
DUP2_ARG2(__lsx_vaddwod_h_bu, src0, src2, src1, src3, tmp1, tmp3);
DUP2_ARG2(__lsx_vpackev_b, zero, src4, zero, src5, tmp4, tmp6);
DUP2_ARG2(__lsx_vpackod_b, zero, src4, zero, src5, tmp5, tmp7);
DUP4_ARG2(__lsx_vadd_h, tmp0, tmp4, tmp1, tmp5, tmp2, tmp6, tmp3, tmp7,
tmp0, tmp1, tmp2, tmp3);
DUP2_ARG2(__lsx_vpickev_h, tmp2, tmp0, tmp3, tmp1, reg0, reg1);
DUP2_ARG2(__lsx_vpackod_h, tmp1, tmp0, tmp3, tmp2, reg2, reg3);
tmp4 = __lsx_vpickev_w(reg3, reg2);
tmp5 = __lsx_vadd_h(reg0, reg1);
tmp6 = __lsx_vadd_h(tmp5, tmp4);
tmp7 = __lsx_vmuh_h(tmp6, const_0x1C71);
tmp0 = __lsx_vpickod_w(reg3, reg2);
tmp1 = __lsx_vhaddw_wu_hu(tmp0, tmp0);
tmp2 = __lsx_vmul_w(tmp1, const_0x2AAA);
dst0 = __lsx_vshuf_b(tmp2, tmp7, shuff);
__lsx_vstelm_d(dst0, dst_ptr, 0, 0);
__lsx_vstelm_w(dst0, dst_ptr, 8, 2);
src_ptr += 32;
ptr1 += 32;
ptr2 += 32;
dst_ptr += 12;
}
}
void ScaleAddRow_LSX(const uint8_t* src_ptr, uint16_t* dst_ptr, int src_width) {
int x;
int len = src_width / 16;
__m128i src0, tmp0, tmp1, dst0, dst1;
__m128i zero = __lsx_vldi(0);
assert(src_width > 0);
for (x = 0; x < len; x++) {
src0 = __lsx_vld(src_ptr, 0);
DUP2_ARG2(__lsx_vld, dst_ptr, 0, dst_ptr, 16, dst0, dst1);
tmp0 = __lsx_vilvl_b(zero, src0);
tmp1 = __lsx_vilvh_b(zero, src0);
DUP2_ARG2(__lsx_vadd_h, dst0, tmp0, dst1, tmp1, dst0, dst1);
__lsx_vst(dst0, dst_ptr, 0);
__lsx_vst(dst1, dst_ptr, 16);
src_ptr += 16;
dst_ptr += 16;
}
}
void ScaleFilterCols_LSX(uint8_t* dst_ptr,
const uint8_t* src_ptr,
int dst_width,
int x,
int dx) {
int j;
int len = dst_width / 16;
__m128i tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
__m128i reg0, reg1, reg2, reg3, reg4, reg5, reg6, reg7;
__m128i vec0, vec1, dst0;
__m128i vec_x = __lsx_vreplgr2vr_w(x);
__m128i vec_dx = __lsx_vreplgr2vr_w(dx);
__m128i const1 = __lsx_vreplgr2vr_w(0xFFFF);
__m128i const2 = __lsx_vreplgr2vr_w(0x40);
__m128i const_tmp = {0x0000000100000000, 0x0000000300000002};
vec0 = __lsx_vmul_w(vec_dx, const_tmp);
vec1 = __lsx_vslli_w(vec_dx, 2);
vec_x = __lsx_vadd_w(vec_x, vec0);
for (j = 0; j < len; j++) {
tmp0 = __lsx_vsrai_w(vec_x, 16);
tmp4 = __lsx_vand_v(vec_x, const1);
vec_x = __lsx_vadd_w(vec_x, vec1);
tmp1 = __lsx_vsrai_w(vec_x, 16);
tmp5 = __lsx_vand_v(vec_x, const1);
vec_x = __lsx_vadd_w(vec_x, vec1);
tmp2 = __lsx_vsrai_w(vec_x, 16);
tmp6 = __lsx_vand_v(vec_x, const1);
vec_x = __lsx_vadd_w(vec_x, vec1);
tmp3 = __lsx_vsrai_w(vec_x, 16);
tmp7 = __lsx_vand_v(vec_x, const1);
vec_x = __lsx_vadd_w(vec_x, vec1);
DUP4_ARG2(__lsx_vsrai_w, tmp4, 9, tmp5, 9, tmp6, 9, tmp7, 9, tmp4, tmp5,
tmp6, tmp7);
LOAD_DATA(src_ptr, tmp0, reg0);
LOAD_DATA(src_ptr, tmp1, reg1);
LOAD_DATA(src_ptr, tmp2, reg2);
LOAD_DATA(src_ptr, tmp3, reg3);
DUP4_ARG2(__lsx_vaddi_wu, tmp0, 1, tmp1, 1, tmp2, 1, tmp3, 1, tmp0, tmp1,
tmp2, tmp3);
LOAD_DATA(src_ptr, tmp0, reg4);
LOAD_DATA(src_ptr, tmp1, reg5);
LOAD_DATA(src_ptr, tmp2, reg6);
LOAD_DATA(src_ptr, tmp3, reg7);
DUP4_ARG2(__lsx_vsub_w, reg4, reg0, reg5, reg1, reg6, reg2, reg7, reg3,
reg4, reg5, reg6, reg7);
DUP4_ARG2(__lsx_vmul_w, reg4, tmp4, reg5, tmp5, reg6, tmp6, reg7, tmp7,
reg4, reg5, reg6, reg7);
DUP4_ARG2(__lsx_vadd_w, reg4, const2, reg5, const2, reg6, const2, reg7,
const2, reg4, reg5, reg6, reg7);
DUP4_ARG2(__lsx_vsrai_w, reg4, 7, reg5, 7, reg6, 7, reg7, 7, reg4, reg5,
reg6, reg7);
DUP4_ARG2(__lsx_vadd_w, reg0, reg4, reg1, reg5, reg2, reg6, reg3, reg7,
reg0, reg1, reg2, reg3);
DUP2_ARG2(__lsx_vpickev_h, reg1, reg0, reg3, reg2, tmp0, tmp1);
dst0 = __lsx_vpickev_b(tmp1, tmp0);
__lsx_vst(dst0, dst_ptr, 0);
dst_ptr += 16;
}
}
void ScaleARGBCols_LSX(uint8_t* dst_argb,
const uint8_t* src_argb,
int dst_width,
int x,
int dx) {
const uint32_t* src = (const uint32_t*)src_argb;
uint32_t* dst = (uint32_t*)dst_argb;
int j;
int len = dst_width / 4;
__m128i tmp0, tmp1, tmp2, dst0;
__m128i vec_x = __lsx_vreplgr2vr_w(x);
__m128i vec_dx = __lsx_vreplgr2vr_w(dx);
__m128i const_tmp = {0x0000000100000000, 0x0000000300000002};
tmp0 = __lsx_vmul_w(vec_dx, const_tmp);
tmp1 = __lsx_vslli_w(vec_dx, 2);
vec_x = __lsx_vadd_w(vec_x, tmp0);
for (j = 0; j < len; j++) {
tmp2 = __lsx_vsrai_w(vec_x, 16);
vec_x = __lsx_vadd_w(vec_x, tmp1);
LOAD_DATA(src, tmp2, dst0);
__lsx_vst(dst0, dst, 0);
dst += 4;
}
}
void ScaleARGBFilterCols_LSX(uint8_t* dst_argb,
const uint8_t* src_argb,
int dst_width,
int x,
int dx) {
const uint32_t* src = (const uint32_t*)src_argb;
int j;
int len = dst_width / 8;
__m128i src0, src1, src2, src3;
__m128i tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
__m128i reg0, reg1, reg2, reg3, reg4, reg5, reg6, reg7;
__m128i vec0, vec1, dst0, dst1;
__m128i vec_x = __lsx_vreplgr2vr_w(x);
__m128i vec_dx = __lsx_vreplgr2vr_w(dx);
__m128i const_tmp = {0x0000000100000000, 0x0000000300000002};
__m128i const_7f = __lsx_vldi(0x7F);
vec0 = __lsx_vmul_w(vec_dx, const_tmp);
vec1 = __lsx_vslli_w(vec_dx, 2);
vec_x = __lsx_vadd_w(vec_x, vec0);
for (j = 0; j < len; j++) {
tmp0 = __lsx_vsrai_w(vec_x, 16);
reg0 = __lsx_vsrai_w(vec_x, 9);
vec_x = __lsx_vadd_w(vec_x, vec1);
tmp1 = __lsx_vsrai_w(vec_x, 16);
reg1 = __lsx_vsrai_w(vec_x, 9);
vec_x = __lsx_vadd_w(vec_x, vec1);
DUP2_ARG2(__lsx_vand_v, reg0, const_7f, reg1, const_7f, reg0, reg1);
DUP2_ARG2(__lsx_vshuf4i_b, reg0, 0, reg1, 0, reg0, reg1);
DUP2_ARG2(__lsx_vxor_v, reg0, const_7f, reg1, const_7f, reg2, reg3);
DUP2_ARG2(__lsx_vilvl_b, reg0, reg2, reg1, reg3, reg4, reg6);
DUP2_ARG2(__lsx_vilvh_b, reg0, reg2, reg1, reg3, reg5, reg7);
LOAD_DATA(src, tmp0, src0);
LOAD_DATA(src, tmp1, src1);
DUP2_ARG2(__lsx_vaddi_wu, tmp0, 1, tmp1, 1, tmp0, tmp1);
LOAD_DATA(src, tmp0, src2);
LOAD_DATA(src, tmp1, src3);
DUP2_ARG2(__lsx_vilvl_b, src2, src0, src3, src1, tmp4, tmp6);
DUP2_ARG2(__lsx_vilvh_b, src2, src0, src3, src1, tmp5, tmp7);
DUP4_ARG2(__lsx_vdp2_h_bu, tmp4, reg4, tmp5, reg5, tmp6, reg6, tmp7, reg7,
tmp0, tmp1, tmp2, tmp3);
DUP2_ARG3(__lsx_vsrani_b_h, tmp1, tmp0, 7, tmp3, tmp2, 7, dst0, dst1);
__lsx_vst(dst0, dst_argb, 0);
__lsx_vst(dst1, dst_argb, 16);
dst_argb += 32;
}
}
void ScaleRowDown34_LSX(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst,
int dst_width) {
int x;
(void)src_stride;
__m128i src0, src1, src2, src3;
__m128i dst0, dst1, dst2;
__m128i shuff0 = {0x0908070504030100, 0x141311100F0D0C0B};
__m128i shuff1 = {0x0F0D0C0B09080705, 0x1918171514131110};
__m128i shuff2 = {0x141311100F0D0C0B, 0x1F1D1C1B19181715};
assert((dst_width % 3 == 0) && (dst_width > 0));
for (x = 0; x < dst_width; x += 48) {
DUP4_ARG2(__lsx_vld, src_ptr, 0, src_ptr, 16, src_ptr, 32, src_ptr, 48,
src0, src1, src2, src3);
DUP2_ARG3(__lsx_vshuf_b, src1, src0, shuff0, src2, src1, shuff1, dst0,
dst1);
dst2 = __lsx_vshuf_b(src3, src2, shuff2);
__lsx_vst(dst0, dst, 0);
__lsx_vst(dst1, dst, 16);
__lsx_vst(dst2, dst, 32);
src_ptr += 64;
dst += 48;
}
}
void ScaleRowDown34_0_Box_LSX(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* d,
int dst_width) {
const uint8_t* src_nex = src_ptr + src_stride;
int x;
__m128i src0, src1, src2, src3, src4, src5, src6, src7;
__m128i tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8, tmp9;
__m128i tmp10, tmp11, dst0, dst1, dst2;
__m128i const0 = {0x0103030101010103, 0x0101010303010101};
__m128i const1 = {0x0301010101030301, 0x0103030101010103};
__m128i const2 = {0x0101010303010101, 0x0301010101030301};
__m128i shuff0 = {0x0504030202010100, 0x0A09090807060605};
__m128i shuff1 = {0x0F0E0E0D0D0C0B0A, 0x1514131212111110};
__m128i shuff2 = {0x0A09090807060605, 0x0F0E0E0D0D0C0B0A};
__m128i shift0 = {0x0002000200010002, 0x0001000200020001};
__m128i shift1 = {0x0002000100020002, 0x0002000200010002};
__m128i shift2 = {0x0001000200020001, 0x0002000100020002};
assert((dst_width % 3 == 0) && (dst_width > 0));
for (x = 0; x < dst_width; x += 48) {
DUP4_ARG2(__lsx_vld, src_ptr, 0, src_ptr, 16, src_ptr, 32, src_ptr, 48,
src0, src1, src2, src3);
DUP4_ARG2(__lsx_vld, src_nex, 0, src_nex, 16, src_nex, 32, src_nex, 48,
src4, src5, src6, src7);
DUP4_ARG3(__lsx_vshuf_b, src0, src0, shuff0, src1, src0, shuff1, src1, src1,
shuff2, src2, src2, shuff0, tmp0, tmp1, tmp2, tmp3);
DUP4_ARG3(__lsx_vshuf_b, src3, src2, shuff1, src3, src3, shuff2, src4, src4,
shuff0, src5, src4, shuff1, tmp4, tmp5, tmp6, tmp7);
DUP4_ARG3(__lsx_vshuf_b, src5, src5, shuff2, src6, src6, shuff0, src7, src6,
shuff1, src7, src7, shuff2, tmp8, tmp9, tmp10, tmp11);
DUP4_ARG2(__lsx_vdp2_h_bu, tmp0, const0, tmp1, const1, tmp2, const2, tmp3,
const0, src0, src1, src2, src3);
DUP4_ARG2(__lsx_vdp2_h_bu, tmp4, const1, tmp5, const2, tmp6, const0, tmp7,
const1, src4, src5, src6, src7);
DUP4_ARG2(__lsx_vdp2_h_bu, tmp8, const2, tmp9, const0, tmp10, const1, tmp11,
const2, tmp0, tmp1, tmp2, tmp3);
DUP4_ARG2(__lsx_vsrar_h, src0, shift0, src1, shift1, src2, shift2, src3,
shift0, src0, src1, src2, src3);
DUP4_ARG2(__lsx_vsrar_h, src4, shift1, src5, shift2, src6, shift0, src7,
shift1, src4, src5, src6, src7);
DUP4_ARG2(__lsx_vsrar_h, tmp0, shift2, tmp1, shift0, tmp2, shift1, tmp3,
shift2, tmp0, tmp1, tmp2, tmp3);
DUP4_ARG2(__lsx_vslli_h, src0, 1, src1, 1, src2, 1, src3, 1, tmp5, tmp6,
tmp7, tmp8);
DUP2_ARG2(__lsx_vslli_h, src4, 1, src5, 1, tmp9, tmp10);
DUP4_ARG2(__lsx_vadd_h, src0, tmp5, src1, tmp6, src2, tmp7, src3, tmp8,
src0, src1, src2, src3);
DUP2_ARG2(__lsx_vadd_h, src4, tmp9, src5, tmp10, src4, src5);
DUP4_ARG2(__lsx_vadd_h, src0, src6, src1, src7, src2, tmp0, src3, tmp1,
src0, src1, src2, src3);
DUP2_ARG2(__lsx_vadd_h, src4, tmp2, src5, tmp3, src4, src5);
DUP2_ARG3(__lsx_vsrarni_b_h, src1, src0, 2, src3, src2, 2, dst0, dst1);
dst2 = __lsx_vsrarni_b_h(src5, src4, 2);
__lsx_vst(dst0, d, 0);
__lsx_vst(dst1, d, 16);
__lsx_vst(dst2, d, 32);
src_ptr += 64;
src_nex += 64;
d += 48;
}
}
void ScaleRowDown34_1_Box_LSX(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* d,
int dst_width) {
const uint8_t* src_nex = src_ptr + src_stride;
int x;
__m128i src0, src1, src2, src3, src4, src5, src6, src7;
__m128i tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8, tmp9;
__m128i tmp10, tmp11, dst0, dst1, dst2;
__m128i const0 = {0x0103030101010103, 0x0101010303010101};
__m128i const1 = {0x0301010101030301, 0x0103030101010103};
__m128i const2 = {0x0101010303010101, 0x0301010101030301};
__m128i shuff0 = {0x0504030202010100, 0x0A09090807060605};
__m128i shuff1 = {0x0F0E0E0D0D0C0B0A, 0x1514131212111110};
__m128i shuff2 = {0x0A09090807060605, 0x0F0E0E0D0D0C0B0A};
__m128i shift0 = {0x0002000200010002, 0x0001000200020001};
__m128i shift1 = {0x0002000100020002, 0x0002000200010002};
__m128i shift2 = {0x0001000200020001, 0x0002000100020002};
assert((dst_width % 3 == 0) && (dst_width > 0));
for (x = 0; x < dst_width; x += 48) {
DUP4_ARG2(__lsx_vld, src_ptr, 0, src_ptr, 16, src_ptr, 32, src_ptr, 48,
src0, src1, src2, src3);
DUP4_ARG2(__lsx_vld, src_nex, 0, src_nex, 16, src_nex, 32, src_nex, 48,
src4, src5, src6, src7);
DUP4_ARG3(__lsx_vshuf_b, src0, src0, shuff0, src1, src0, shuff1, src1, src1,
shuff2, src2, src2, shuff0, tmp0, tmp1, tmp2, tmp3);
DUP4_ARG3(__lsx_vshuf_b, src3, src2, shuff1, src3, src3, shuff2, src4, src4,
shuff0, src5, src4, shuff1, tmp4, tmp5, tmp6, tmp7);
DUP4_ARG3(__lsx_vshuf_b, src5, src5, shuff2, src6, src6, shuff0, src7, src6,
shuff1, src7, src7, shuff2, tmp8, tmp9, tmp10, tmp11);
DUP4_ARG2(__lsx_vdp2_h_bu, tmp0, const0, tmp1, const1, tmp2, const2, tmp3,
const0, src0, src1, src2, src3);
DUP4_ARG2(__lsx_vdp2_h_bu, tmp4, const1, tmp5, const2, tmp6, const0, tmp7,
const1, src4, src5, src6, src7);
DUP4_ARG2(__lsx_vdp2_h_bu, tmp8, const2, tmp9, const0, tmp10, const1, tmp11,
const2, tmp0, tmp1, tmp2, tmp3);
DUP4_ARG2(__lsx_vsrar_h, src0, shift0, src1, shift1, src2, shift2, src3,
shift0, src0, src1, src2, src3);
DUP4_ARG2(__lsx_vsrar_h, src4, shift1, src5, shift2, src6, shift0, src7,
shift1, src4, src5, src6, src7);
DUP4_ARG2(__lsx_vsrar_h, tmp0, shift2, tmp1, shift0, tmp2, shift1, tmp3,
shift2, tmp0, tmp1, tmp2, tmp3);
DUP4_ARG2(__lsx_vadd_h, src0, src6, src1, src7, src2, tmp0, src3, tmp1,
src0, src1, src2, src3);
DUP2_ARG2(__lsx_vadd_h, src4, tmp2, src5, tmp3, src4, src5);
DUP2_ARG3(__lsx_vsrarni_b_h, src1, src0, 1, src3, src2, 1, dst0, dst1);
dst2 = __lsx_vsrarni_b_h(src5, src4, 1);
__lsx_vst(dst0, d, 0);
__lsx_vst(dst1, d, 16);
__lsx_vst(dst2, d, 32);
src_ptr += 64;
src_nex += 64;
d += 48;
}
}
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
#endif
#endif // !defined(LIBYUV_DISABLE_LSX) && defined(__loongarch_sx)

View File

@ -0,0 +1,66 @@
/*
* Copyright 2022 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "libyuv/scale.h" /* For FilterMode */
#include <assert.h>
#include <string.h>
#include "libyuv/convert_argb.h"
#include "libyuv/convert_from_argb.h"
#include "libyuv/row.h"
#include "libyuv/scale_argb.h"
#include "libyuv/scale_rgb.h"
#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
// Scale a 24 bit image.
// Converts to ARGB as intermediate step
LIBYUV_API
int RGBScale(const uint8_t* src_rgb,
int src_stride_rgb,
int src_width,
int src_height,
uint8_t* dst_rgb,
int dst_stride_rgb,
int dst_width,
int dst_height,
enum FilterMode filtering) {
int r;
uint8_t* src_argb =
(uint8_t*)malloc(src_width * src_height * 4 + dst_width * dst_height * 4);
uint8_t* dst_argb = src_argb + src_width * src_height * 4;
if (!src_argb) {
return 1;
}
r = RGB24ToARGB(src_rgb, src_stride_rgb, src_argb, src_width * 4, src_width,
src_height);
if (!r) {
r = ARGBScale(src_argb, src_width * 4, src_width, src_height, dst_argb,
dst_width * 4, dst_width, dst_height, filtering);
if (!r) {
r = ARGBToRGB24(dst_argb, dst_width * 4, dst_rgb, dst_stride_rgb,
dst_width, dst_height);
}
}
free(src_argb);
return r;
}
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
#endif

View File

@ -83,9 +83,9 @@ static void ScaleUVDown2(int src_width,
assert((dy & 0x1ffff) == 0); // Test vertical scale is multiple of 2.
// Advance to odd row, even column.
if (filtering == kFilterBilinear) {
src_uv += (y >> 16) * src_stride + (x >> 16) * 2;
src_uv += (y >> 16) * (int64_t)src_stride + (x >> 16) * 2;
} else {
src_uv += (y >> 16) * src_stride + ((x >> 16) - 1) * 2;
src_uv += (y >> 16) * (int64_t)src_stride + ((x >> 16) - 1) * 2;
}
#if defined(HAS_SCALEUVROWDOWN2BOX_SSSE3)
@ -147,22 +147,6 @@ static void ScaleUVDown2(int src_width,
}
}
#endif
#if defined(HAS_SCALEUVROWDOWN2_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
ScaleUVRowDown2 =
filtering == kFilterNone
? ScaleUVRowDown2_Any_MMI
: (filtering == kFilterLinear ? ScaleUVRowDown2Linear_Any_MMI
: ScaleUVRowDown2Box_Any_MMI);
if (IS_ALIGNED(dst_width, 2)) {
ScaleUVRowDown2 =
filtering == kFilterNone
? ScaleUVRowDown2_MMI
: (filtering == kFilterLinear ? ScaleUVRowDown2Linear_MMI
: ScaleUVRowDown2Box_MMI);
}
}
#endif
#if defined(HAS_SCALEUVROWDOWN2_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
ScaleUVRowDown2 =
@ -209,14 +193,14 @@ static void ScaleUVDown4Box(int src_width,
int dy) {
int j;
// Allocate 2 rows of UV.
const int kRowSize = (dst_width * 2 * 2 + 15) & ~15;
align_buffer_64(row, kRowSize * 2);
const int row_size = (dst_width * 2 * 2 + 15) & ~15;
align_buffer_64(row, row_size * 2);
int row_stride = src_stride * (dy >> 16);
void (*ScaleUVRowDown2)(const uint8_t* src_uv, ptrdiff_t src_stride,
uint8_t* dst_uv, int dst_width) =
ScaleUVRowDown2Box_C;
// Advance to odd row, even column.
src_uv += (y >> 16) * src_stride + (x >> 16) * 2;
src_uv += (y >> 16) * (int64_t)src_stride + (x >> 16) * 2;
(void)src_width;
(void)src_height;
(void)dx;
@ -250,9 +234,9 @@ static void ScaleUVDown4Box(int src_width,
for (j = 0; j < dst_height; ++j) {
ScaleUVRowDown2(src_uv, src_stride, row, dst_width * 2);
ScaleUVRowDown2(src_uv + src_stride * 2, src_stride, row + kRowSize,
ScaleUVRowDown2(src_uv + src_stride * 2, src_stride, row + row_size,
dst_width * 2);
ScaleUVRowDown2(row, kRowSize, dst_uv, dst_width);
ScaleUVRowDown2(row, row_size, dst_uv, dst_width);
src_uv += row_stride;
dst_uv += dst_stride;
}
@ -279,7 +263,7 @@ static void ScaleUVDownEven(int src_width,
enum FilterMode filtering) {
int j;
int col_step = dx >> 16;
int row_stride = (dy >> 16) * src_stride;
int row_stride = (dy >> 16) * (int64_t)src_stride;
void (*ScaleUVRowDownEven)(const uint8_t* src_uv, ptrdiff_t src_stride,
int src_step, uint8_t* dst_uv, int dst_width) =
filtering ? ScaleUVRowDownEvenBox_C : ScaleUVRowDownEven_C;
@ -287,7 +271,7 @@ static void ScaleUVDownEven(int src_width,
(void)src_height;
assert(IS_ALIGNED(src_width, 2));
assert(IS_ALIGNED(src_height, 2));
src_uv += (y >> 16) * src_stride + (x >> 16) * 2;
src_uv += (y >> 16) * (int64_t)src_stride + (x >> 16) * 2;
#if defined(HAS_SCALEUVROWDOWNEVEN_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
ScaleUVRowDownEven = filtering ? ScaleUVRowDownEvenBox_Any_SSSE3
@ -316,16 +300,6 @@ static void ScaleUVDownEven(int src_width,
}
}
#endif
#if defined(HAS_SCALEUVROWDOWNEVEN_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
ScaleUVRowDownEven =
filtering ? ScaleUVRowDownEvenBox_Any_MMI : ScaleUVRowDownEven_Any_MMI;
if (IS_ALIGNED(dst_width, 2)) {
ScaleUVRowDownEven =
filtering ? ScaleUVRowDownEvenBox_MMI : ScaleUVRowDownEven_MMI;
}
}
#endif
#if defined(HAS_SCALEUVROWDOWNEVEN_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
ScaleUVRowDownEven =
@ -415,6 +389,14 @@ static void ScaleUVBilinearDown(int src_width,
}
}
#endif
#if defined(HAS_INTERPOLATEROW_LSX)
if (TestCpuFlag(kCpuHasLSX)) {
InterpolateRow = InterpolateRow_Any_LSX;
if (IS_ALIGNED(clip_src_width, 32)) {
InterpolateRow = InterpolateRow_LSX;
}
}
#endif
#if defined(HAS_SCALEUVFILTERCOLS_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
ScaleUVFilterCols = ScaleUVFilterCols_SSSE3;
@ -447,7 +429,7 @@ static void ScaleUVBilinearDown(int src_width,
}
for (j = 0; j < dst_height; ++j) {
int yi = y >> 16;
const uint8_t* src = src_uv + yi * src_stride;
const uint8_t* src = src_uv + yi * (int64_t)src_stride;
if (filtering == kFilterLinear) {
ScaleUVFilterCols(dst_uv, src, dst_width, x, dx);
} else {
@ -513,14 +495,6 @@ static void ScaleUVBilinearUp(int src_width,
}
}
#endif
#if defined(HAS_INTERPOLATEROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
InterpolateRow = InterpolateRow_Any_MMI;
if (IS_ALIGNED(dst_width, 4)) {
InterpolateRow = InterpolateRow_MMI;
}
}
#endif
#if defined(HAS_INTERPOLATEROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
InterpolateRow = InterpolateRow_Any_MSA;
@ -528,6 +502,14 @@ static void ScaleUVBilinearUp(int src_width,
InterpolateRow = InterpolateRow_MSA;
}
}
#endif
#if defined(HAS_INTERPOLATEROW_LSX)
if (TestCpuFlag(kCpuHasLSX)) {
InterpolateRow = InterpolateRow_Any_LSX;
if (IS_ALIGNED(dst_width, 16)) {
InterpolateRow = InterpolateRow_LSX;
}
}
#endif
if (src_width >= 32768) {
ScaleUVFilterCols = filtering ? ScaleUVFilterCols64_C : ScaleUVCols64_C;
@ -566,14 +548,6 @@ static void ScaleUVBilinearUp(int src_width,
}
}
#endif
#if defined(HAS_SCALEUVCOLS_MMI)
if (!filtering && TestCpuFlag(kCpuHasMMI)) {
ScaleUVFilterCols = ScaleUVCols_Any_MMI;
if (IS_ALIGNED(dst_width, 1)) {
ScaleUVFilterCols = ScaleUVCols_MMI;
}
}
#endif
#if defined(HAS_SCALEUVCOLS_MSA)
if (!filtering && TestCpuFlag(kCpuHasMSA)) {
ScaleUVFilterCols = ScaleUVCols_Any_MSA;
@ -588,11 +562,6 @@ static void ScaleUVBilinearUp(int src_width,
if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(dst_width, 8)) {
ScaleUVFilterCols = ScaleUVColsUp2_SSSE3;
}
#endif
#if defined(HAS_SCALEUVCOLSUP2_MMI)
if (TestCpuFlag(kCpuHasMMI) && IS_ALIGNED(dst_width, 4)) {
ScaleUVFilterCols = ScaleUVColsUp2_MMI;
}
#endif
}
@ -602,14 +571,14 @@ static void ScaleUVBilinearUp(int src_width,
{
int yi = y >> 16;
const uint8_t* src = src_uv + yi * src_stride;
const uint8_t* src = src_uv + yi * (int64_t)src_stride;
// Allocate 2 rows of UV.
const int kRowSize = (dst_width * 2 + 15) & ~15;
align_buffer_64(row, kRowSize * 2);
const int row_size = (dst_width * 2 + 15) & ~15;
align_buffer_64(row, row_size * 2);
uint8_t* rowptr = row;
int rowstride = kRowSize;
int rowstride = row_size;
int lasty = yi;
ScaleUVFilterCols(rowptr, src, dst_width, x, dx);
@ -617,7 +586,9 @@ static void ScaleUVBilinearUp(int src_width,
src += src_stride;
}
ScaleUVFilterCols(rowptr + rowstride, src, dst_width, x, dx);
src += src_stride;
if (src_height > 2) {
src += src_stride;
}
for (j = 0; j < dst_height; ++j) {
yi = y >> 16;
@ -625,14 +596,16 @@ static void ScaleUVBilinearUp(int src_width,
if (y > max_y) {
y = max_y;
yi = y >> 16;
src = src_uv + yi * src_stride;
src = src_uv + yi * (int64_t)src_stride;
}
if (yi != lasty) {
ScaleUVFilterCols(rowptr, src, dst_width, x, dx);
rowptr += rowstride;
rowstride = -rowstride;
lasty = yi;
src += src_stride;
if ((y + 65536) < max_y) {
src += src_stride;
}
}
}
if (filtering == kFilterLinear) {
@ -690,12 +663,13 @@ void ScaleUVLinearUp2(int src_width,
#endif
if (dst_height == 1) {
ScaleRowUp(src_uv + ((src_height - 1) / 2) * src_stride, dst_uv, dst_width);
ScaleRowUp(src_uv + ((src_height - 1) / 2) * (int64_t)src_stride, dst_uv,
dst_width);
} else {
dy = FixedDiv(src_height - 1, dst_height - 1);
y = (1 << 15) - 1;
for (i = 0; i < dst_height; ++i) {
ScaleRowUp(src_uv + (y >> 16) * src_stride, dst_uv, dst_width);
ScaleRowUp(src_uv + (y >> 16) * (int64_t)src_stride, dst_uv, dst_width);
dst_uv += dst_stride;
y += dy;
}
@ -777,9 +751,9 @@ void ScaleUVLinearUp2_16(int src_width,
// This function can only scale up by 2 times horizontally.
assert(src_width == ((dst_width + 1) / 2));
#ifdef HAS_SCALEUVROWUP2LINEAR_16_SSE2
if (TestCpuFlag(kCpuHasSSE2)) {
ScaleRowUp = ScaleUVRowUp2_Linear_16_Any_SSE2;
#ifdef HAS_SCALEUVROWUP2LINEAR_16_SSE41
if (TestCpuFlag(kCpuHasSSE41)) {
ScaleRowUp = ScaleUVRowUp2_Linear_16_Any_SSE41;
}
#endif
@ -796,12 +770,13 @@ void ScaleUVLinearUp2_16(int src_width,
#endif
if (dst_height == 1) {
ScaleRowUp(src_uv + ((src_height - 1) / 2) * src_stride, dst_uv, dst_width);
ScaleRowUp(src_uv + ((src_height - 1) / 2) * (int64_t)src_stride, dst_uv,
dst_width);
} else {
dy = FixedDiv(src_height - 1, dst_height - 1);
y = (1 << 15) - 1;
for (i = 0; i < dst_height; ++i) {
ScaleRowUp(src_uv + (y >> 16) * src_stride, dst_uv, dst_width);
ScaleRowUp(src_uv + (y >> 16) * (int64_t)src_stride, dst_uv, dst_width);
dst_uv += dst_stride;
y += dy;
}
@ -829,9 +804,9 @@ void ScaleUVBilinearUp2_16(int src_width,
assert(src_width == ((dst_width + 1) / 2));
assert(src_height == ((dst_height + 1) / 2));
#ifdef HAS_SCALEUVROWUP2BILINEAR_16_SSE2
if (TestCpuFlag(kCpuHasSSE2)) {
Scale2RowUp = ScaleUVRowUp2_Bilinear_16_Any_SSE2;
#ifdef HAS_SCALEUVROWUP2BILINEAR_16_SSE41
if (TestCpuFlag(kCpuHasSSE41)) {
Scale2RowUp = ScaleUVRowUp2_Bilinear_16_Any_SSE41;
}
#endif
@ -896,14 +871,6 @@ static void ScaleUVSimple(int src_width,
}
}
#endif
#if defined(HAS_SCALEUVCOLS_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
ScaleUVCols = ScaleUVCols_Any_MMI;
if (IS_ALIGNED(dst_width, 1)) {
ScaleUVCols = ScaleUVCols_MMI;
}
}
#endif
#if defined(HAS_SCALEUVCOLS_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
ScaleUVCols = ScaleUVCols_Any_MSA;
@ -918,16 +885,12 @@ static void ScaleUVSimple(int src_width,
if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(dst_width, 8)) {
ScaleUVCols = ScaleUVColsUp2_SSSE3;
}
#endif
#if defined(HAS_SCALEUVCOLSUP2_MMI)
if (TestCpuFlag(kCpuHasMMI) && IS_ALIGNED(dst_width, 4)) {
ScaleUVCols = ScaleUVColsUp2_MMI;
}
#endif
}
for (j = 0; j < dst_height; ++j) {
ScaleUVCols(dst_uv, src_uv + (y >> 16) * src_stride, dst_width, x, dx);
ScaleUVCols(dst_uv, src_uv + (y >> 16) * (int64_t)src_stride, dst_width, x,
dx);
dst_uv += dst_stride;
y += dy;
}
@ -935,43 +898,43 @@ static void ScaleUVSimple(int src_width,
// Copy UV with optional flipping
#if HAS_UVCOPY
static int UVCopy(const uint8_t* src_UV,
static int UVCopy(const uint8_t* src_uv,
int src_stride_uv,
uint8_t* dst_UV,
uint8_t* dst_uv,
int dst_stride_uv,
int width,
int height) {
if (!src_UV || !dst_UV || width <= 0 || height == 0) {
if (!src_uv || !dst_uv || width <= 0 || height == 0) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
src_UV = src_UV + (height - 1) * src_stride_uv;
src_uv = src_uv + (height - 1) * (int64_t)src_stride_uv;
src_stride_uv = -src_stride_uv;
}
CopyPlane(src_UV, src_stride_uv, dst_UV, dst_stride_uv, width * 2, height);
CopyPlane(src_uv, src_stride_uv, dst_uv, dst_stride_uv, width * 2, height);
return 0;
}
static int UVCopy_16(const uint16_t* src_UV,
static int UVCopy_16(const uint16_t* src_uv,
int src_stride_uv,
uint16_t* dst_UV,
uint16_t* dst_uv,
int dst_stride_uv,
int width,
int height) {
if (!src_UV || !dst_UV || width <= 0 || height == 0) {
if (!src_uv || !dst_uv || width <= 0 || height == 0) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
src_UV = src_UV + (height - 1) * src_stride_uv;
src_uv = src_uv + (height - 1) * (int64_t)src_stride_uv;
src_stride_uv = -src_stride_uv;
}
CopyPlane_16(src_UV, src_stride_uv, dst_UV, dst_stride_uv, width * 2, height);
CopyPlane_16(src_uv, src_stride_uv, dst_uv, dst_stride_uv, width * 2, height);
return 0;
}
#endif // HAS_UVCOPY
@ -1005,7 +968,7 @@ static void ScaleUV(const uint8_t* src,
// Negative src_height means invert the image.
if (src_height < 0) {
src_height = -src_height;
src = src + (src_height - 1) * src_stride;
src = src + (src_height - 1) * (int64_t)src_stride;
src_stride = -src_stride;
}
ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, &x, &y,
@ -1020,7 +983,7 @@ static void ScaleUV(const uint8_t* src,
if (clip_y) {
int64_t clipf = (int64_t)(clip_y)*dy;
y += (clipf & 0xffff);
src += (clipf >> 16) * src_stride;
src += (clipf >> 16) * (int64_t)src_stride;
dst += clip_y * dst_stride;
}
@ -1061,8 +1024,8 @@ static void ScaleUV(const uint8_t* src,
#ifdef HAS_UVCOPY
if (dx == 0x10000 && dy == 0x10000) {
// Straight copy.
UVCopy(src + (y >> 16) * src_stride + (x >> 16) * 2, src_stride, dst,
dst_stride, clip_width, clip_height);
UVCopy(src + (y >> 16) * (int64_t)src_stride + (x >> 16) * 2,
src_stride, dst, dst_stride, clip_width, clip_height);
return;
}
#endif
@ -1073,7 +1036,7 @@ static void ScaleUV(const uint8_t* src,
if (dx == 0x10000 && (x & 0xffff) == 0) {
// Arbitrary scale vertically, but unscaled horizontally.
ScalePlaneVertical(src_height, clip_width, clip_height, src_stride,
dst_stride, src, dst, x, y, dy, 4, filtering);
dst_stride, src, dst, x, y, dy, /*bpp=*/2, filtering);
return;
}
if (filtering && (dst_width + 1) / 2 == src_width) {
@ -1155,7 +1118,7 @@ int UVScale_16(const uint16_t* src_uv,
// Negative src_height means invert the image.
if (src_height < 0) {
src_height = -src_height;
src_uv = src_uv + (src_height - 1) * src_stride_uv;
src_uv = src_uv + (src_height - 1) * (int64_t)src_stride_uv;
src_stride_uv = -src_stride_uv;
}
src_width = Abs(src_width);
@ -1163,12 +1126,13 @@ int UVScale_16(const uint16_t* src_uv,
#ifdef HAS_UVCOPY
if (!filtering && src_width == dst_width && (src_height % dst_height == 0)) {
if (dst_height == 1) {
UVCopy_16(src_uv + ((src_height - 1) / 2) * src_stride_uv, src_stride_uv,
dst_uv, dst_stride_uv, dst_width, dst_height);
UVCopy_16(src_uv + ((src_height - 1) / 2) * (int64_t)src_stride_uv,
src_stride_uv, dst_uv, dst_stride_uv, dst_width, dst_height);
} else {
dy = src_height / dst_height;
UVCopy_16(src_uv + src_stride_uv * ((dy - 1) / 2), src_stride_uv * dy,
dst_uv, dst_stride_uv, dst_width, dst_height);
UVCopy_16(src_uv + ((dy - 1) / 2) * (int64_t)src_stride_uv,
dy * (int64_t)src_stride_uv, dst_uv, dst_stride_uv, dst_width,
dst_height);
}
return 0;

View File

@ -1,4 +1,5 @@
#!/usr/bin/env python
#!/usr/bin/env vpython3
# Copyright 2017 The LibYuv Project Authors. All rights reserved.
#
# Use of this source code is governed by a BSD-style license
@ -11,7 +12,6 @@
# https://webrtc.googlesource.com/src/+/master/tools_webrtc/autoroller/roll_deps.py
# customized for libyuv.
"""Script to automatically roll dependencies in the libyuv DEPS file."""
import argparse
@ -22,7 +22,7 @@ import os
import re
import subprocess
import sys
import urllib2
import urllib.request
# Skip these dependencies (list without solution name prefix).
@ -78,7 +78,7 @@ def ParseDepsDict(deps_content):
def ParseLocalDepsFile(filename):
with open(filename, 'rb') as f:
deps_content = f.read()
deps_content = f.read().decode('utf-8')
return ParseDepsDict(deps_content)
@ -98,7 +98,7 @@ def ParseCommitPosition(commit_message):
def _RunCommand(command, working_dir=None, ignore_exit_code=False,
extra_env=None):
extra_env=None, input_data=None):
"""Runs a command and returns the output from that command.
If the command fails (exit code != 0), the function will exit the process.
@ -113,12 +113,14 @@ def _RunCommand(command, working_dir=None, ignore_exit_code=False,
assert all(isinstance(value, str) for value in extra_env.values())
logging.debug('extra env: %s', extra_env)
env.update(extra_env)
p = subprocess.Popen(command, stdout=subprocess.PIPE,
stderr=subprocess.PIPE, env=env,
cwd=working_dir, universal_newlines=True)
std_output = p.stdout.read()
err_output = p.stderr.read()
p.wait()
p = subprocess.Popen(command,
stdin=subprocess.PIPE,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
env=env,
cwd=working_dir,
universal_newlines=True)
std_output, err_output = p.communicate(input_data)
p.stdout.close()
p.stderr.close()
if not ignore_exit_code and p.returncode != 0:
@ -154,7 +156,7 @@ def _ReadGitilesContent(url):
# Download and decode BASE64 content until
# https://code.google.com/p/gitiles/issues/detail?id=7 is fixed.
base64_content = ReadUrlContent(url + '?format=TEXT')
return base64.b64decode(base64_content[0])
return base64.b64decode(base64_content[0]).decode('utf-8')
def ReadRemoteCrFile(path_below_src, revision):
@ -170,7 +172,7 @@ def ReadRemoteCrCommit(revision):
def ReadUrlContent(url):
"""Connect to a remote host and read the contents. Returns a list of lines."""
conn = urllib2.urlopen(url)
conn = urllib.request.urlopen(url)
try:
return conn.readlines()
except IOError as e:
@ -193,7 +195,7 @@ def GetMatchingDepsEntries(depsentry_dict, dir_path):
A list of DepsEntry objects.
"""
result = []
for path, depsentry in depsentry_dict.iteritems():
for path, depsentry in depsentry_dict.items():
if path == dir_path:
result.append(depsentry)
else:
@ -203,26 +205,24 @@ def GetMatchingDepsEntries(depsentry_dict, dir_path):
result.append(depsentry)
return result
def BuildDepsentryDict(deps_dict):
"""Builds a dict of paths to DepsEntry objects from a raw parsed deps dict."""
"""Builds a dict of paths to DepsEntry objects from a raw deps dict."""
result = {}
def AddDepsEntries(deps_subdict):
for path, deps_url_spec in deps_subdict.iteritems():
# The deps url is either an URL and a condition, or just the URL.
for path, deps_url_spec in deps_subdict.items():
if isinstance(deps_url_spec, dict):
if deps_url_spec.get('dep_type') == 'cipd':
continue
deps_url = deps_url_spec['url']
else:
deps_url = deps_url_spec
if not result.has_key(path):
if not path in result:
url, revision = deps_url.split('@') if deps_url else (None, None)
result[path] = DepsEntry(path, url, revision)
AddDepsEntries(deps_dict['deps'])
for deps_os in ['win', 'mac', 'unix', 'android', 'ios', 'unix']:
for deps_os in ['win', 'mac', 'linux', 'android', 'ios', 'unix']:
AddDepsEntries(deps_dict.get('deps_os', {}).get(deps_os, {}))
return result
@ -245,7 +245,7 @@ def CalculateChangedDeps(libyuv_deps, new_cr_deps):
result = []
libyuv_entries = BuildDepsentryDict(libyuv_deps)
new_cr_entries = BuildDepsentryDict(new_cr_deps)
for path, libyuv_deps_entry in libyuv_entries.iteritems():
for path, libyuv_deps_entry in libyuv_entries.items():
if path in DONT_AUTOROLL_THESE:
continue
cr_deps_entry = new_cr_entries.get(path)
@ -277,7 +277,7 @@ def CalculateChangedClang(new_cr_rev):
return match.group(1)
raise RollError('Could not parse Clang revision from:\n' + '\n'.join(' ' + l for l in lines))
with open(CLANG_UPDATE_SCRIPT_LOCAL_PATH, 'rb') as f:
with open(CLANG_UPDATE_SCRIPT_LOCAL_PATH, 'r') as f:
current_lines = f.readlines()
current_rev = GetClangRev(current_lines)
@ -335,10 +335,10 @@ def UpdateDepsFile(deps_filename, old_cr_revision, new_cr_revision,
# Update the chromium_revision variable.
with open(deps_filename, 'rb') as deps_file:
deps_content = deps_file.read()
deps_content = deps_file.read().decode('utf-8')
deps_content = deps_content.replace(old_cr_revision, new_cr_revision)
with open(deps_filename, 'wb') as deps_file:
deps_file.write(deps_content)
deps_file.write(deps_content.encode('utf-8'))
# Update each individual DEPS entry.
for dep in changed_deps:
@ -415,13 +415,14 @@ def _UploadCL(commit_queue_mode):
- 1: Run trybots but do not submit to CQ.
- 0: Skip CQ, upload only.
"""
cmd = ['git', 'cl', 'upload', '--force', '--bypass-hooks', '--send-mail']
cmd = ['git', 'cl', 'upload', '--force', '--bypass-hooks']
if commit_queue_mode >= 2:
logging.info('Sending the CL to the CQ...')
cmd.extend(['--use-commit-queue'])
cmd.extend(['-o', 'label=Bot-Commit+1'])
cmd.extend(['-o', 'label=Commit-Queue+2'])
elif commit_queue_mode >= 1:
logging.info('Starting CQ dry run...')
cmd.extend(['--cq-dry-run'])
cmd.extend(['-o', 'label=Commit-Queue+1'])
extra_env = {
'EDITOR': 'true',
'SKIP_GCE_AUTH_FOR_GIT': '1',

View File

@ -1,4 +1,5 @@
#!/usr/bin/env python
#!/usr/bin/env vpython3
# Copyright 2017 The LibYuv Project Authors. All rights reserved.
#
# Use of this source code is governed by a BSD-style license
@ -14,15 +15,13 @@ import sys
import tempfile
import unittest
import roll_deps
from roll_deps import CalculateChangedDeps, GetMatchingDepsEntries, \
ParseDepsDict, ParseLocalDepsFile, UpdateDepsFile
SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
PARENT_DIR = os.path.join(SCRIPT_DIR, os.pardir)
sys.path.append(PARENT_DIR)
import roll_deps # pylint: disable=wrong-import-position
from roll_deps import CalculateChangedDeps, GetMatchingDepsEntries, \
ParseDepsDict, ParseLocalDepsFile, \
UpdateDepsFile # pylint: disable=wrong-import-position
TEST_DATA_VARS = {
'chromium_git': 'https://chromium.googlesource.com',
@ -46,7 +45,7 @@ class TestError(Exception):
pass
class FakeCmd(object):
class FakeCmd():
def __init__(self):
self.expectations = []
@ -86,43 +85,43 @@ class TestRollChromiumRevision(unittest.TestCase):
def testVarLookup(self):
local_scope = {'foo': 'wrong', 'vars': {'foo': 'bar'}}
lookup = roll_deps.VarLookup(local_scope)
self.assertEquals(lookup('foo'), 'bar')
self.assertEqual(lookup('foo'), 'bar')
def testUpdateDepsFile(self):
new_rev = 'aaaaabbbbbcccccdddddeeeeefffff0000011111'
current_rev = TEST_DATA_VARS['chromium_revision']
UpdateDepsFile(self._libyuv_depsfile, current_rev, new_rev, [])
with open(self._libyuv_depsfile) as deps_file:
with open(self._libyuv_depsfile, 'r') as deps_file:
deps_contents = deps_file.read()
self.assertTrue(new_rev in deps_contents,
'Failed to find %s in\n%s' % (new_rev, deps_contents))
def testParseDepsDict(self):
with open(self._libyuv_depsfile) as deps_file:
with open(self._libyuv_depsfile, 'r') as deps_file:
deps_contents = deps_file.read()
local_scope = ParseDepsDict(deps_contents)
vars_dict = local_scope['vars']
def assertVar(variable_name):
self.assertEquals(vars_dict[variable_name], TEST_DATA_VARS[variable_name])
self.assertEqual(vars_dict[variable_name], TEST_DATA_VARS[variable_name])
assertVar('chromium_git')
assertVar('chromium_revision')
self.assertEquals(len(local_scope['deps']), 3)
self.assertEqual(len(local_scope['deps']), 3)
def testGetMatchingDepsEntriesReturnsPathInSimpleCase(self):
entries = GetMatchingDepsEntries(DEPS_ENTRIES, 'src/testing/gtest')
self.assertEquals(len(entries), 1)
self.assertEquals(entries[0], DEPS_ENTRIES['src/testing/gtest'])
self.assertEqual(len(entries), 1)
self.assertEqual(entries[0], DEPS_ENTRIES['src/testing/gtest'])
def testGetMatchingDepsEntriesHandlesSimilarStartingPaths(self):
entries = GetMatchingDepsEntries(DEPS_ENTRIES, 'src/testing')
self.assertEquals(len(entries), 2)
self.assertEqual(len(entries), 2)
def testGetMatchingDepsEntriesHandlesTwoPathsWithIdenticalFirstParts(self):
entries = GetMatchingDepsEntries(DEPS_ENTRIES, 'src/build')
self.assertEquals(len(entries), 1)
self.assertEquals(entries[0], DEPS_ENTRIES['src/build'])
self.assertEqual(len(entries), 1)
self.assertEqual(entries[0], DEPS_ENTRIES['src/build'])
def testCalculateChangedDeps(self):
_SetupGitLsRemoteCall(self.fake,
@ -130,14 +129,14 @@ class TestRollChromiumRevision(unittest.TestCase):
libyuv_deps = ParseLocalDepsFile(self._libyuv_depsfile)
new_cr_deps = ParseLocalDepsFile(self._new_cr_depsfile)
changed_deps = CalculateChangedDeps(libyuv_deps, new_cr_deps)
self.assertEquals(len(changed_deps), 2)
self.assertEquals(changed_deps[0].path, 'src/build')
self.assertEquals(changed_deps[0].current_rev, BUILD_OLD_REV)
self.assertEquals(changed_deps[0].new_rev, BUILD_NEW_REV)
self.assertEqual(len(changed_deps), 2)
self.assertEqual(changed_deps[0].path, 'src/build')
self.assertEqual(changed_deps[0].current_rev, BUILD_OLD_REV)
self.assertEqual(changed_deps[0].new_rev, BUILD_NEW_REV)
self.assertEquals(changed_deps[1].path, 'src/buildtools')
self.assertEquals(changed_deps[1].current_rev, BUILDTOOLS_OLD_REV)
self.assertEquals(changed_deps[1].new_rev, BUILDTOOLS_NEW_REV)
self.assertEqual(changed_deps[1].path, 'src/buildtools')
self.assertEqual(changed_deps[1].current_rev, BUILDTOOLS_OLD_REV)
self.assertEqual(changed_deps[1].new_rev, BUILDTOOLS_NEW_REV)
def _SetupGitLsRemoteCall(cmd_fake, url, revision):

View File

@ -1,4 +1,5 @@
#!/usr/bin/env python
#!/usr/bin/env python3
# Copyright 2016 The LibYuv Project Authors. All rights reserved.
#
# Use of this source code is governed by a BSD-style license
@ -25,8 +26,8 @@ def print_landmines():
# dependency problems, fix the dependency problems instead of adding a
# landmine.
# See the Chromium version in src/build/get_landmines.py for usage examples.
print 'Clobber to remove GYP artifacts after switching bots to GN.'
print 'Another try to remove GYP artifacts after switching bots to GN.'
print('Clobber to remove GYP artifacts after switching bots to GN.')
print('Another try to remove GYP artifacts after switching bots to GN.')
def main():

View File

@ -22,7 +22,8 @@ namespace libyuv {
// TODO(fbarchard): clang x86 has a higher accuracy YUV to RGB.
// Port to Visual C and other CPUs
#if !defined(LIBYUV_DISABLE_X86) && (defined(__x86_64__) || defined(__i386__))
#if !defined(LIBYUV_BIT_EXACT) && !defined(LIBYUV_DISABLE_X86) && \
(defined(__x86_64__) || defined(__i386__))
#define ERROR_FULL 5
#define ERROR_J420 4
#else
@ -579,28 +580,28 @@ TEST_F(LibYUVColorTest, TestGreyYUV) {
static void PrintHistogram(int rh[256], int gh[256], int bh[256]) {
int i;
printf("hist");
printf("hist ");
for (i = 0; i < 256; ++i) {
if (rh[i] || gh[i] || bh[i]) {
printf("\t%8d", i - 128);
printf(" %8d", i - 128);
}
}
printf("\nred");
printf("\nred ");
for (i = 0; i < 256; ++i) {
if (rh[i] || gh[i] || bh[i]) {
printf("\t%8d", rh[i]);
printf(" %8d", rh[i]);
}
}
printf("\ngreen");
for (i = 0; i < 256; ++i) {
if (rh[i] || gh[i] || bh[i]) {
printf("\t%8d", gh[i]);
printf(" %8d", gh[i]);
}
}
printf("\nblue");
printf("\nblue ");
for (i = 0; i < 256; ++i) {
if (rh[i] || gh[i] || bh[i]) {
printf("\t%8d", bh[i]);
printf(" %8d", bh[i]);
}
}
printf("\n");
@ -608,10 +609,10 @@ static void PrintHistogram(int rh[256], int gh[256], int bh[256]) {
// Step by 5 on inner loop goes from 0 to 255 inclusive.
// Set to 1 for better converage. 3, 5 or 17 for faster testing.
#ifdef ENABLE_SLOW_TESTS
#define FASTSTEP 1
#else
#ifdef DISABLE_SLOW_TESTS
#define FASTSTEP 5
#else
#define FASTSTEP 1
#endif
// BT.601 limited range.

File diff suppressed because it is too large Load Diff

View File

@ -40,6 +40,7 @@ TEST_F(LibYUVBaseTest, TestCpuHas) {
int has_gfni = TestCpuFlag(kCpuHasGFNI);
int has_avx512bw = TestCpuFlag(kCpuHasAVX512BW);
int has_avx512vl = TestCpuFlag(kCpuHasAVX512VL);
int has_avx512vnni = TestCpuFlag(kCpuHasAVX512VNNI);
int has_avx512vbmi = TestCpuFlag(kCpuHasAVX512VBMI);
int has_avx512vbmi2 = TestCpuFlag(kCpuHasAVX512VBMI2);
int has_avx512vbitalg = TestCpuFlag(kCpuHasAVX512VBITALG);
@ -57,6 +58,7 @@ TEST_F(LibYUVBaseTest, TestCpuHas) {
printf("Has GFNI %d\n", has_gfni);
printf("Has AVX512BW %d\n", has_avx512bw);
printf("Has AVX512VL %d\n", has_avx512vl);
printf("Has AVX512VNNI %d\n", has_avx512vnni);
printf("Has AVX512VBMI %d\n", has_avx512vbmi);
printf("Has AVX512VBMI2 %d\n", has_avx512vbmi2);
printf("Has AVX512VBITALG %d\n", has_avx512vbitalg);
@ -67,8 +69,15 @@ TEST_F(LibYUVBaseTest, TestCpuHas) {
printf("Has MIPS %d\n", has_mips);
int has_msa = TestCpuFlag(kCpuHasMSA);
printf("Has MSA %d\n", has_msa);
int has_mmi = TestCpuFlag(kCpuHasMMI);
printf("Has MMI %d\n", has_mmi);
#endif
#if defined(__loongarch__)
int has_loongarch = TestCpuFlag(kCpuHasLOONGARCH);
printf("Has LOONGARCH %d\n", has_loongarch);
int has_lsx = TestCpuFlag(kCpuHasLSX);
printf("Has LSX %d\n", has_lsx);
int has_lasx = TestCpuFlag(kCpuHasLASX);
printf("Has LASX %d\n", has_lasx);
#endif
}
@ -149,6 +158,9 @@ TEST_F(LibYUVBaseTest, TestCompilerMacros) {
#ifdef _MIPS_ARCH_LOONGSON3A
printf("_MIPS_ARCH_LOONGSON3A %d\n", _MIPS_ARCH_LOONGSON3A);
#endif
#ifdef __loongarch__
printf("__loongarch__ %d\n", __loongarch__);
#endif
#ifdef _WIN32
printf("_WIN32 %d\n", _WIN32);
#endif
@ -239,17 +251,13 @@ TEST_F(LibYUVBaseTest, TestLinuxNeon) {
#endif
}
TEST_F(LibYUVBaseTest, TestLinuxMipsMsaMmi) {
TEST_F(LibYUVBaseTest, TestLinuxMipsMsa) {
if (FileExists("../../unit_test/testdata/mips.txt")) {
printf("Note: testing to load \"../../unit_test/testdata/mips.txt\"\n");
EXPECT_EQ(0, MipsCpuCaps("../../unit_test/testdata/mips.txt"));
EXPECT_EQ(kCpuHasMMI,
MipsCpuCaps("../../unit_test/testdata/mips_loongson3.txt"));
EXPECT_EQ(kCpuHasMMI,
MipsCpuCaps("../../unit_test/testdata/mips_loongson_mmi.txt"));
EXPECT_EQ(kCpuHasMSA, MipsCpuCaps("../../unit_test/testdata/mips_msa.txt"));
EXPECT_EQ(kCpuHasMMI | kCpuHasMSA,
EXPECT_EQ(kCpuHasMSA,
MipsCpuCaps("../../unit_test/testdata/mips_loongson2k.txt"));
} else {
printf("WARNING: unable to load \"../../unit_test/testdata/mips.txt\"\n");

View File

@ -29,6 +29,12 @@
#include "libyuv/row.h" /* For ScaleSumSamples_Neon */
#endif
#if defined(LIBYUV_BIT_EXACT)
#define EXPECTED_ATTENUATE_DIFF 0
#else
#define EXPECTED_ATTENUATE_DIFF 2
#endif
namespace libyuv {
TEST_F(LibYUVPlanarTest, TestAttenuate) {
@ -100,9 +106,9 @@ TEST_F(LibYUVPlanarTest, TestAttenuate) {
EXPECT_EQ(32, atten_pixels[128 * 4 + 1]);
EXPECT_EQ(21, atten_pixels[128 * 4 + 2]);
EXPECT_EQ(128, atten_pixels[128 * 4 + 3]);
EXPECT_NEAR(255, atten_pixels[255 * 4 + 0], 1);
EXPECT_NEAR(127, atten_pixels[255 * 4 + 1], 1);
EXPECT_NEAR(85, atten_pixels[255 * 4 + 2], 1);
EXPECT_NEAR(254, atten_pixels[255 * 4 + 0], EXPECTED_ATTENUATE_DIFF);
EXPECT_NEAR(127, atten_pixels[255 * 4 + 1], EXPECTED_ATTENUATE_DIFF);
EXPECT_NEAR(85, atten_pixels[255 * 4 + 2], EXPECTED_ATTENUATE_DIFF);
EXPECT_EQ(255, atten_pixels[255 * 4 + 3]);
free_aligned_buffer_page_end(atten2_pixels);
@ -158,28 +164,29 @@ TEST_F(LibYUVPlanarTest, ARGBAttenuate_Any) {
int max_diff = TestAttenuateI(benchmark_width_ + 1, benchmark_height_,
benchmark_iterations_, disable_cpu_flags_,
benchmark_cpu_info_, +1, 0);
EXPECT_LE(max_diff, 2);
EXPECT_LE(max_diff, EXPECTED_ATTENUATE_DIFF);
}
TEST_F(LibYUVPlanarTest, ARGBAttenuate_Unaligned) {
int max_diff =
TestAttenuateI(benchmark_width_, benchmark_height_, benchmark_iterations_,
disable_cpu_flags_, benchmark_cpu_info_, +1, 1);
EXPECT_LE(max_diff, 2);
EXPECT_LE(max_diff, EXPECTED_ATTENUATE_DIFF);
}
TEST_F(LibYUVPlanarTest, ARGBAttenuate_Invert) {
int max_diff =
TestAttenuateI(benchmark_width_, benchmark_height_, benchmark_iterations_,
disable_cpu_flags_, benchmark_cpu_info_, -1, 0);
EXPECT_LE(max_diff, 2);
EXPECT_LE(max_diff, EXPECTED_ATTENUATE_DIFF);
}
TEST_F(LibYUVPlanarTest, ARGBAttenuate_Opt) {
int max_diff =
TestAttenuateI(benchmark_width_, benchmark_height_, benchmark_iterations_,
disable_cpu_flags_, benchmark_cpu_info_, +1, 0);
EXPECT_LE(max_diff, 2);
EXPECT_LE(max_diff, EXPECTED_ATTENUATE_DIFF);
}
static int TestUnattenuateI(int width,
@ -231,28 +238,28 @@ TEST_F(LibYUVPlanarTest, ARGBUnattenuate_Any) {
int max_diff = TestUnattenuateI(benchmark_width_ + 1, benchmark_height_,
benchmark_iterations_, disable_cpu_flags_,
benchmark_cpu_info_, +1, 0);
EXPECT_LE(max_diff, 2);
EXPECT_LE(max_diff, EXPECTED_ATTENUATE_DIFF);
}
TEST_F(LibYUVPlanarTest, ARGBUnattenuate_Unaligned) {
int max_diff = TestUnattenuateI(benchmark_width_, benchmark_height_,
benchmark_iterations_, disable_cpu_flags_,
benchmark_cpu_info_, +1, 1);
EXPECT_LE(max_diff, 2);
EXPECT_LE(max_diff, EXPECTED_ATTENUATE_DIFF);
}
TEST_F(LibYUVPlanarTest, ARGBUnattenuate_Invert) {
int max_diff = TestUnattenuateI(benchmark_width_, benchmark_height_,
benchmark_iterations_, disable_cpu_flags_,
benchmark_cpu_info_, -1, 0);
EXPECT_LE(max_diff, 2);
EXPECT_LE(max_diff, EXPECTED_ATTENUATE_DIFF);
}
TEST_F(LibYUVPlanarTest, ARGBUnattenuate_Opt) {
int max_diff = TestUnattenuateI(benchmark_width_, benchmark_height_,
benchmark_iterations_, disable_cpu_flags_,
benchmark_cpu_info_, +1, 0);
EXPECT_LE(max_diff, 2);
EXPECT_LE(max_diff, EXPECTED_ATTENUATE_DIFF);
}
TEST_F(LibYUVPlanarTest, TestARGBComputeCumulativeSum) {
@ -1073,6 +1080,87 @@ TEST_F(LibYUVPlanarTest, TestInterpolatePlane) {
}
}
TEST_F(LibYUVPlanarTest, TestInterpolatePlane_16) {
SIMD_ALIGNED(uint16_t orig_pixels_0[1280]);
SIMD_ALIGNED(uint16_t orig_pixels_1[1280]);
SIMD_ALIGNED(uint16_t interpolate_pixels[1280]);
memset(orig_pixels_0, 0, sizeof(orig_pixels_0));
memset(orig_pixels_1, 0, sizeof(orig_pixels_1));
orig_pixels_0[0] = 16u;
orig_pixels_0[1] = 32u;
orig_pixels_0[2] = 64u;
orig_pixels_0[3] = 128u;
orig_pixels_0[4] = 0u;
orig_pixels_0[5] = 0u;
orig_pixels_0[6] = 0u;
orig_pixels_0[7] = 255u;
orig_pixels_0[8] = 0u;
orig_pixels_0[9] = 0u;
orig_pixels_0[10] = 0u;
orig_pixels_0[11] = 0u;
orig_pixels_0[12] = 0u;
orig_pixels_0[13] = 0u;
orig_pixels_0[14] = 0u;
orig_pixels_0[15] = 0u;
orig_pixels_1[0] = 0u;
orig_pixels_1[1] = 0u;
orig_pixels_1[2] = 0u;
orig_pixels_1[3] = 0u;
orig_pixels_1[4] = 0u;
orig_pixels_1[5] = 0u;
orig_pixels_1[6] = 0u;
orig_pixels_1[7] = 0u;
orig_pixels_1[8] = 0u;
orig_pixels_1[9] = 0u;
orig_pixels_1[10] = 0u;
orig_pixels_1[11] = 0u;
orig_pixels_1[12] = 255u;
orig_pixels_1[13] = 255u;
orig_pixels_1[14] = 255u;
orig_pixels_1[15] = 255u;
InterpolatePlane_16(&orig_pixels_0[0], 0, &orig_pixels_1[0], 0,
&interpolate_pixels[0], 0, 16, 1, 128);
EXPECT_EQ(8u, interpolate_pixels[0]);
EXPECT_EQ(16u, interpolate_pixels[1]);
EXPECT_EQ(32u, interpolate_pixels[2]);
EXPECT_EQ(64u, interpolate_pixels[3]);
EXPECT_EQ(0u, interpolate_pixels[4]);
EXPECT_EQ(0u, interpolate_pixels[5]);
EXPECT_EQ(0u, interpolate_pixels[6]);
EXPECT_EQ(128u, interpolate_pixels[7]);
EXPECT_EQ(0u, interpolate_pixels[8]);
EXPECT_EQ(0u, interpolate_pixels[9]);
EXPECT_EQ(0u, interpolate_pixels[10]);
EXPECT_EQ(0u, interpolate_pixels[11]);
EXPECT_EQ(128u, interpolate_pixels[12]);
EXPECT_EQ(128u, interpolate_pixels[13]);
EXPECT_EQ(128u, interpolate_pixels[14]);
EXPECT_EQ(128u, interpolate_pixels[15]);
InterpolatePlane_16(&orig_pixels_0[0], 0, &orig_pixels_1[0], 0,
&interpolate_pixels[0], 0, 16, 1, 0);
EXPECT_EQ(16u, interpolate_pixels[0]);
EXPECT_EQ(32u, interpolate_pixels[1]);
EXPECT_EQ(64u, interpolate_pixels[2]);
EXPECT_EQ(128u, interpolate_pixels[3]);
InterpolatePlane_16(&orig_pixels_0[0], 0, &orig_pixels_1[0], 0,
&interpolate_pixels[0], 0, 16, 1, 192);
EXPECT_EQ(4u, interpolate_pixels[0]);
EXPECT_EQ(8u, interpolate_pixels[1]);
EXPECT_EQ(16u, interpolate_pixels[2]);
EXPECT_EQ(32u, interpolate_pixels[3]);
for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
InterpolatePlane_16(&orig_pixels_0[0], 0, &orig_pixels_1[0], 0,
&interpolate_pixels[0], 0, 1280, 1, 123);
}
}
#define TESTTERP(FMT_A, BPP_A, STRIDE_A, FMT_B, BPP_B, STRIDE_B, W1280, TERP, \
N, NEG, OFF) \
TEST_F(LibYUVPlanarTest, ARGBInterpolate##TERP##N) { \
@ -1477,6 +1565,251 @@ TEST_F(LibYUVPlanarTest, TestCopyPlane) {
EXPECT_EQ(0, err);
}
TEST_F(LibYUVPlanarTest, CopyPlane_Opt) {
int i;
int y_plane_size = benchmark_width_ * benchmark_height_;
align_buffer_page_end(orig_y, y_plane_size);
align_buffer_page_end(dst_c, y_plane_size);
align_buffer_page_end(dst_opt, y_plane_size);
MemRandomize(orig_y, y_plane_size);
memset(dst_c, 1, y_plane_size);
memset(dst_opt, 2, y_plane_size);
// Disable all optimizations.
MaskCpuFlags(disable_cpu_flags_);
for (i = 0; i < benchmark_iterations_; i++) {
CopyPlane(orig_y, benchmark_width_, dst_c, benchmark_width_,
benchmark_width_, benchmark_height_);
}
// Enable optimizations.
MaskCpuFlags(benchmark_cpu_info_);
for (i = 0; i < benchmark_iterations_; i++) {
CopyPlane(orig_y, benchmark_width_, dst_opt, benchmark_width_,
benchmark_width_, benchmark_height_);
}
for (i = 0; i < y_plane_size; ++i) {
EXPECT_EQ(dst_c[i], dst_opt[i]);
}
free_aligned_buffer_page_end(orig_y);
free_aligned_buffer_page_end(dst_c);
free_aligned_buffer_page_end(dst_opt);
}
TEST_F(LibYUVPlanarTest, TestCopyPlaneZero) {
// Test to verify copying a rect with a zero height or width does
// not touch destination memory.
uint8_t src = 42;
uint8_t dst = 0;
// Disable all optimizations.
MaskCpuFlags(disable_cpu_flags_);
CopyPlane(&src, 0, &dst, 0, 0, 0);
EXPECT_EQ(src, 42);
EXPECT_EQ(dst, 0);
CopyPlane(&src, 1, &dst, 1, 1, 0);
EXPECT_EQ(src, 42);
EXPECT_EQ(dst, 0);
CopyPlane(&src, 1, &dst, 1, 0, 1);
EXPECT_EQ(src, 42);
EXPECT_EQ(dst, 0);
// Enable optimizations.
MaskCpuFlags(benchmark_cpu_info_);
CopyPlane(&src, 0, &dst, 0, 0, 0);
EXPECT_EQ(src, 42);
EXPECT_EQ(dst, 0);
CopyPlane(&src, 1, &dst, 1, 1, 0);
EXPECT_EQ(src, 42);
EXPECT_EQ(dst, 0);
CopyPlane(&src, 1, &dst, 1, 0, 1);
EXPECT_EQ(src, 42);
EXPECT_EQ(dst, 0);
}
TEST_F(LibYUVPlanarTest, TestDetilePlane) {
int i, j;
// orig is tiled. Allocate enough memory for tiles.
int tile_width = (benchmark_width_ + 15) & ~15;
int tile_height = (benchmark_height_ + 15) & ~15;
int tile_plane_size = tile_width * tile_height;
int y_plane_size = benchmark_width_ * benchmark_height_;
align_buffer_page_end(tile_y, tile_plane_size);
align_buffer_page_end(dst_c, y_plane_size);
align_buffer_page_end(dst_opt, y_plane_size);
MemRandomize(tile_y, tile_plane_size);
memset(dst_c, 0, y_plane_size);
memset(dst_opt, 0, y_plane_size);
// Disable all optimizations.
MaskCpuFlags(disable_cpu_flags_);
for (j = 0; j < benchmark_iterations_; j++) {
DetilePlane(tile_y, tile_width, dst_c, benchmark_width_, benchmark_width_,
benchmark_height_, 16);
}
// Enable optimizations.
MaskCpuFlags(benchmark_cpu_info_);
for (j = 0; j < benchmark_iterations_; j++) {
DetilePlane(tile_y, tile_width, dst_opt, benchmark_width_, benchmark_width_,
benchmark_height_, 16);
}
for (i = 0; i < y_plane_size; ++i) {
EXPECT_EQ(dst_c[i], dst_opt[i]);
}
free_aligned_buffer_page_end(tile_y);
free_aligned_buffer_page_end(dst_c);
free_aligned_buffer_page_end(dst_opt);
}
TEST_F(LibYUVPlanarTest, TestDetilePlane_16) {
int i, j;
// orig is tiled. Allocate enough memory for tiles.
int tile_width = (benchmark_width_ + 15) & ~15;
int tile_height = (benchmark_height_ + 15) & ~15;
int tile_plane_size = tile_width * tile_height * 2;
int y_plane_size = benchmark_width_ * benchmark_height_ * 2;
align_buffer_page_end(tile_y, tile_plane_size);
align_buffer_page_end(dst_c, y_plane_size);
align_buffer_page_end(dst_opt, y_plane_size);
MemRandomize(tile_y, tile_plane_size);
memset(dst_c, 0, y_plane_size);
memset(dst_opt, 0, y_plane_size);
// Disable all optimizations.
MaskCpuFlags(disable_cpu_flags_);
for (j = 0; j < benchmark_iterations_; j++) {
DetilePlane_16((const uint16_t*)tile_y, tile_width, (uint16_t*)dst_c,
benchmark_width_, benchmark_width_, benchmark_height_, 16);
}
// Enable optimizations.
MaskCpuFlags(benchmark_cpu_info_);
for (j = 0; j < benchmark_iterations_; j++) {
DetilePlane_16((const uint16_t*)tile_y, tile_width, (uint16_t*)dst_opt,
benchmark_width_, benchmark_width_, benchmark_height_, 16);
}
for (i = 0; i < y_plane_size; ++i) {
EXPECT_EQ(dst_c[i], dst_opt[i]);
}
free_aligned_buffer_page_end(tile_y);
free_aligned_buffer_page_end(dst_c);
free_aligned_buffer_page_end(dst_opt);
}
// Compares DetileSplitUV to 2 step Detile + SplitUV
TEST_F(LibYUVPlanarTest, TestDetileSplitUVPlane_Correctness) {
int i, j;
// orig is tiled. Allocate enough memory for tiles.
int tile_width = (benchmark_width_ + 15) & ~15;
int tile_height = (benchmark_height_ + 15) & ~15;
int tile_plane_size = tile_width * tile_height;
int uv_plane_size = ((benchmark_width_ + 1) / 2) * benchmark_height_;
align_buffer_page_end(tile_uv, tile_plane_size);
align_buffer_page_end(detiled_uv, tile_plane_size);
align_buffer_page_end(dst_u_two_stage, uv_plane_size);
align_buffer_page_end(dst_u_opt, uv_plane_size);
align_buffer_page_end(dst_v_two_stage, uv_plane_size);
align_buffer_page_end(dst_v_opt, uv_plane_size);
MemRandomize(tile_uv, tile_plane_size);
memset(detiled_uv, 0, tile_plane_size);
memset(dst_u_two_stage, 0, uv_plane_size);
memset(dst_u_opt, 0, uv_plane_size);
memset(dst_v_two_stage, 0, uv_plane_size);
memset(dst_v_opt, 0, uv_plane_size);
DetileSplitUVPlane(tile_uv, tile_width, dst_u_opt, (benchmark_width_ + 1) / 2,
dst_v_opt, (benchmark_width_ + 1) / 2, benchmark_width_,
benchmark_height_, 16);
// Benchmark 2 step conversion for comparison.
for (j = 0; j < benchmark_iterations_; j++) {
DetilePlane(tile_uv, tile_width, detiled_uv, benchmark_width_,
benchmark_width_, benchmark_height_, 16);
SplitUVPlane(detiled_uv, tile_width, dst_u_two_stage,
(benchmark_width_ + 1) / 2, dst_v_two_stage,
(benchmark_width_ + 1) / 2, (benchmark_width_ + 1) / 2,
benchmark_height_);
}
for (i = 0; i < uv_plane_size; ++i) {
EXPECT_EQ(dst_u_two_stage[i], dst_u_opt[i]);
EXPECT_EQ(dst_v_two_stage[i], dst_v_opt[i]);
}
free_aligned_buffer_page_end(tile_uv);
free_aligned_buffer_page_end(detiled_uv);
free_aligned_buffer_page_end(dst_u_two_stage);
free_aligned_buffer_page_end(dst_u_opt);
free_aligned_buffer_page_end(dst_v_two_stage);
free_aligned_buffer_page_end(dst_v_opt);
}
TEST_F(LibYUVPlanarTest, TestDetileSplitUVPlane_Benchmark) {
int i, j;
// orig is tiled. Allocate enough memory for tiles.
int tile_width = (benchmark_width_ + 15) & ~15;
int tile_height = (benchmark_height_ + 15) & ~15;
int tile_plane_size = tile_width * tile_height;
int uv_plane_size = ((benchmark_width_ + 1) / 2) * benchmark_height_;
align_buffer_page_end(tile_uv, tile_plane_size);
align_buffer_page_end(dst_u_c, uv_plane_size);
align_buffer_page_end(dst_u_opt, uv_plane_size);
align_buffer_page_end(dst_v_c, uv_plane_size);
align_buffer_page_end(dst_v_opt, uv_plane_size);
MemRandomize(tile_uv, tile_plane_size);
memset(dst_u_c, 0, uv_plane_size);
memset(dst_u_opt, 0, uv_plane_size);
memset(dst_v_c, 0, uv_plane_size);
memset(dst_v_opt, 0, uv_plane_size);
// Disable all optimizations.
MaskCpuFlags(disable_cpu_flags_);
DetileSplitUVPlane(tile_uv, tile_width, dst_u_c, (benchmark_width_ + 1) / 2,
dst_v_c, (benchmark_width_ + 1) / 2, benchmark_width_,
benchmark_height_, 16);
// Enable optimizations.
MaskCpuFlags(benchmark_cpu_info_);
for (j = 0; j < benchmark_iterations_; j++) {
DetileSplitUVPlane(
tile_uv, tile_width, dst_u_opt, (benchmark_width_ + 1) / 2, dst_v_opt,
(benchmark_width_ + 1) / 2, benchmark_width_, benchmark_height_, 16);
}
for (i = 0; i < uv_plane_size; ++i) {
EXPECT_EQ(dst_u_c[i], dst_u_opt[i]);
EXPECT_EQ(dst_v_c[i], dst_v_opt[i]);
}
free_aligned_buffer_page_end(tile_uv);
free_aligned_buffer_page_end(dst_u_c);
free_aligned_buffer_page_end(dst_u_opt);
free_aligned_buffer_page_end(dst_v_c);
free_aligned_buffer_page_end(dst_v_opt);
}
static int TestMultiply(int width,
int height,
int benchmark_iterations,
@ -1966,7 +2299,7 @@ static int TestBlur(int width,
return max_diff;
}
#if defined(ENABLE_SLOW_TESTS) || defined(__x86_64__) || defined(__i386__)
#if !defined(DISABLE_SLOW_TESTS) || defined(__x86_64__) || defined(__i386__)
#define DISABLED_ARM(name) name
#else
#define DISABLED_ARM(name) DISABLED_##name
@ -3131,13 +3464,13 @@ TEST_F(LibYUVPlanarTest, SplitXRGBPlane_Opt) {
#define TESTQPLANARTOP(FUNC, STYPE, DTYPE, DEPTH) \
TESTQPLANARTOPI(FUNC, STYPE, DTYPE, DEPTH, benchmark_width_ + 1, _Any, +, 0) \
TESTQPLANARTOPI(FUNC, STYPE, DTYPE, DEPTH, benchmark_width_, _Unaligned, +, \
1) \
2) \
TESTQPLANARTOPI(FUNC, STYPE, DTYPE, DEPTH, benchmark_width_, _Invert, -, 0) \
TESTQPLANARTOPI(FUNC, STYPE, DTYPE, DEPTH, benchmark_width_, _Opt, +, 0) \
TESTQPLANAROTOPI(FUNC, STYPE, DTYPE, DEPTH, benchmark_width_ + 1, _Any, +, \
0) \
TESTQPLANAROTOPI(FUNC, STYPE, DTYPE, DEPTH, benchmark_width_, _Unaligned, +, \
1) \
2) \
TESTQPLANAROTOPI(FUNC, STYPE, DTYPE, DEPTH, benchmark_width_, _Invert, -, 0) \
TESTQPLANAROTOPI(FUNC, STYPE, DTYPE, DEPTH, benchmark_width_, _Opt, +, 0)
@ -3190,7 +3523,7 @@ TESTQPLANARTOP(MergeARGB16To8, uint16_t, uint8_t, 16)
#define TESTTPLANARTOP(FUNC, STYPE, DTYPE, DEPTH) \
TESTTPLANARTOPI(FUNC, STYPE, DTYPE, DEPTH, benchmark_width_ + 1, _Any, +, 0) \
TESTTPLANARTOPI(FUNC, STYPE, DTYPE, DEPTH, benchmark_width_, _Unaligned, +, \
1) \
2) \
TESTTPLANARTOPI(FUNC, STYPE, DTYPE, DEPTH, benchmark_width_, _Invert, -, 0) \
TESTTPLANARTOPI(FUNC, STYPE, DTYPE, DEPTH, benchmark_width_, _Opt, +, 0)
@ -3216,19 +3549,19 @@ TEST_F(LibYUVPlanarTest, MergeUVRow_16_Opt) {
MergeUVRow_16_C(reinterpret_cast<const uint16_t*>(src_pixels_u),
reinterpret_cast<const uint16_t*>(src_pixels_v),
reinterpret_cast<uint16_t*>(dst_pixels_uv_c), 64, kPixels);
reinterpret_cast<uint16_t*>(dst_pixels_uv_c), 16, kPixels);
int has_avx2 = TestCpuFlag(kCpuHasAVX2);
for (int i = 0; i < benchmark_iterations_; ++i) {
if (has_avx2) {
MergeUVRow_16_AVX2(reinterpret_cast<const uint16_t*>(src_pixels_u),
reinterpret_cast<const uint16_t*>(src_pixels_v),
reinterpret_cast<uint16_t*>(dst_pixels_uv_opt), 64,
reinterpret_cast<uint16_t*>(dst_pixels_uv_opt), 16,
kPixels);
} else {
MergeUVRow_16_C(reinterpret_cast<const uint16_t*>(src_pixels_u),
reinterpret_cast<const uint16_t*>(src_pixels_v),
reinterpret_cast<uint16_t*>(dst_pixels_uv_opt), 64,
reinterpret_cast<uint16_t*>(dst_pixels_uv_opt), 16,
kPixels);
}
}
@ -3315,6 +3648,64 @@ TEST_F(LibYUVPlanarTest, Convert16To8Plane) {
free_aligned_buffer_page_end(dst_pixels_y_c);
}
TEST_F(LibYUVPlanarTest, YUY2ToY) {
const int kPixels = benchmark_width_ * benchmark_height_;
align_buffer_page_end(src_pixels_y, kPixels * 2);
align_buffer_page_end(dst_pixels_y_opt, kPixels);
align_buffer_page_end(dst_pixels_y_c, kPixels);
MemRandomize(src_pixels_y, kPixels * 2);
memset(dst_pixels_y_opt, 0, kPixels);
memset(dst_pixels_y_c, 1, kPixels);
MaskCpuFlags(disable_cpu_flags_);
YUY2ToY(src_pixels_y, benchmark_width_ * 2, dst_pixels_y_c, benchmark_width_,
benchmark_width_, benchmark_height_);
MaskCpuFlags(benchmark_cpu_info_);
for (int i = 0; i < benchmark_iterations_; ++i) {
YUY2ToY(src_pixels_y, benchmark_width_ * 2, dst_pixels_y_opt,
benchmark_width_, benchmark_width_, benchmark_height_);
}
for (int i = 0; i < kPixels; ++i) {
EXPECT_EQ(dst_pixels_y_opt[i], dst_pixels_y_c[i]);
}
free_aligned_buffer_page_end(src_pixels_y);
free_aligned_buffer_page_end(dst_pixels_y_opt);
free_aligned_buffer_page_end(dst_pixels_y_c);
}
TEST_F(LibYUVPlanarTest, UYVYToY) {
const int kPixels = benchmark_width_ * benchmark_height_;
align_buffer_page_end(src_pixels_y, kPixels * 2);
align_buffer_page_end(dst_pixels_y_opt, kPixels);
align_buffer_page_end(dst_pixels_y_c, kPixels);
MemRandomize(src_pixels_y, kPixels * 2);
memset(dst_pixels_y_opt, 0, kPixels);
memset(dst_pixels_y_c, 1, kPixels);
MaskCpuFlags(disable_cpu_flags_);
UYVYToY(src_pixels_y, benchmark_width_ * 2, dst_pixels_y_c, benchmark_width_,
benchmark_width_, benchmark_height_);
MaskCpuFlags(benchmark_cpu_info_);
for (int i = 0; i < benchmark_iterations_; ++i) {
UYVYToY(src_pixels_y, benchmark_width_ * 2, dst_pixels_y_opt,
benchmark_width_, benchmark_width_, benchmark_height_);
}
for (int i = 0; i < kPixels; ++i) {
EXPECT_EQ(dst_pixels_y_opt[i], dst_pixels_y_c[i]);
}
free_aligned_buffer_page_end(src_pixels_y);
free_aligned_buffer_page_end(dst_pixels_y_opt);
free_aligned_buffer_page_end(dst_pixels_y_c);
}
#ifdef ENABLE_ROW_TESTS
// TODO(fbarchard): Improve test for more platforms.
#ifdef HAS_CONVERT16TO8ROW_AVX2
@ -3361,6 +3752,35 @@ TEST_F(LibYUVPlanarTest, Convert16To8Row_Opt) {
free_aligned_buffer_page_end(dst_pixels_y_c);
}
#endif // HAS_CONVERT16TO8ROW_AVX2
#ifdef HAS_UYVYTOYROW_NEON
TEST_F(LibYUVPlanarTest, UYVYToYRow_Opt) {
// NEON does multiple of 16, so round count up
const int kPixels = (benchmark_width_ * benchmark_height_ + 15) & ~15;
align_buffer_page_end(src_pixels_y, kPixels * 2);
align_buffer_page_end(dst_pixels_y_opt, kPixels);
align_buffer_page_end(dst_pixels_y_c, kPixels);
MemRandomize(src_pixels_y, kPixels * 2);
memset(dst_pixels_y_opt, 0, kPixels);
memset(dst_pixels_y_c, 1, kPixels);
UYVYToYRow_C(src_pixels_y, dst_pixels_y_c, kPixels);
for (int i = 0; i < benchmark_iterations_; ++i) {
UYVYToYRow_NEON(src_pixels_y, dst_pixels_y_opt, kPixels);
}
for (int i = 0; i < kPixels; ++i) {
EXPECT_EQ(dst_pixels_y_opt[i], dst_pixels_y_c[i]);
}
free_aligned_buffer_page_end(src_pixels_y);
free_aligned_buffer_page_end(dst_pixels_y_opt);
free_aligned_buffer_page_end(dst_pixels_y_c);
}
#endif // HAS_UYVYTOYROW_NEON
#endif // ENABLE_ROW_TESTS
TEST_F(LibYUVPlanarTest, Convert8To16Plane) {

View File

@ -16,6 +16,8 @@
namespace libyuv {
#define SUBSAMPLE(v, a) ((((v) + (a)-1)) / (a))
static void I420TestRotate(int src_width,
int src_height,
int dst_width,
@ -135,6 +137,94 @@ TEST_F(LibYUVRotateTest, DISABLED_I420Rotate270_Odd) {
benchmark_cpu_info_);
}
static void I422TestRotate(int src_width,
int src_height,
int dst_width,
int dst_height,
libyuv::RotationMode mode,
int benchmark_iterations,
int disable_cpu_flags,
int benchmark_cpu_info) {
if (src_width < 1) {
src_width = 1;
}
if (src_height == 0) {
src_height = 1;
}
if (dst_width < 1) {
dst_width = 1;
}
if (dst_height < 1) {
dst_height = 1;
}
int src_i422_y_size = src_width * Abs(src_height);
int src_i422_uv_size = ((src_width + 1) / 2) * Abs(src_height);
int src_i422_size = src_i422_y_size + src_i422_uv_size * 2;
align_buffer_page_end(src_i422, src_i422_size);
for (int i = 0; i < src_i422_size; ++i) {
src_i422[i] = fastrand() & 0xff;
}
int dst_i422_y_size = dst_width * dst_height;
int dst_i422_uv_size = ((dst_width + 1) / 2) * dst_height;
int dst_i422_size = dst_i422_y_size + dst_i422_uv_size * 2;
align_buffer_page_end(dst_i422_c, dst_i422_size);
align_buffer_page_end(dst_i422_opt, dst_i422_size);
memset(dst_i422_c, 2, dst_i422_size);
memset(dst_i422_opt, 3, dst_i422_size);
MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization.
I422Rotate(src_i422, src_width, src_i422 + src_i422_y_size,
(src_width + 1) / 2, src_i422 + src_i422_y_size + src_i422_uv_size,
(src_width + 1) / 2, dst_i422_c, dst_width,
dst_i422_c + dst_i422_y_size, (dst_width + 1) / 2,
dst_i422_c + dst_i422_y_size + dst_i422_uv_size,
(dst_width + 1) / 2, src_width, src_height, mode);
MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization.
for (int i = 0; i < benchmark_iterations; ++i) {
I422Rotate(
src_i422, src_width, src_i422 + src_i422_y_size, (src_width + 1) / 2,
src_i422 + src_i422_y_size + src_i422_uv_size, (src_width + 1) / 2,
dst_i422_opt, dst_width, dst_i422_opt + dst_i422_y_size,
(dst_width + 1) / 2, dst_i422_opt + dst_i422_y_size + dst_i422_uv_size,
(dst_width + 1) / 2, src_width, src_height, mode);
}
// Rotation should be exact.
for (int i = 0; i < dst_i422_size; ++i) {
EXPECT_EQ(dst_i422_c[i], dst_i422_opt[i]);
}
free_aligned_buffer_page_end(dst_i422_c);
free_aligned_buffer_page_end(dst_i422_opt);
free_aligned_buffer_page_end(src_i422);
}
TEST_F(LibYUVRotateTest, I422Rotate0_Opt) {
I422TestRotate(benchmark_width_, benchmark_height_, benchmark_width_,
benchmark_height_, kRotate0, benchmark_iterations_,
disable_cpu_flags_, benchmark_cpu_info_);
}
TEST_F(LibYUVRotateTest, I422Rotate90_Opt) {
I422TestRotate(benchmark_width_, benchmark_height_, benchmark_height_,
benchmark_width_, kRotate90, benchmark_iterations_,
disable_cpu_flags_, benchmark_cpu_info_);
}
TEST_F(LibYUVRotateTest, I422Rotate180_Opt) {
I422TestRotate(benchmark_width_, benchmark_height_, benchmark_width_,
benchmark_height_, kRotate180, benchmark_iterations_,
disable_cpu_flags_, benchmark_cpu_info_);
}
TEST_F(LibYUVRotateTest, I422Rotate270_Opt) {
I422TestRotate(benchmark_width_, benchmark_height_, benchmark_height_,
benchmark_width_, kRotate270, benchmark_iterations_,
disable_cpu_flags_, benchmark_cpu_info_);
}
static void I444TestRotate(int src_width,
int src_height,
int dst_width,
@ -391,4 +481,119 @@ TEST_F(LibYUVRotateTest, NV12Rotate270_Invert) {
disable_cpu_flags_, benchmark_cpu_info_);
}
// Test Android 420 to I420 Rotate
#define TESTAPLANARTOPI(SRC_FMT_PLANAR, PIXEL_STRIDE, SRC_SUBSAMP_X, \
SRC_SUBSAMP_Y, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \
W1280, N, NEG, OFF, PN, OFF_U, OFF_V, ROT) \
TEST_F(LibYUVRotateTest, \
SRC_FMT_PLANAR##To##FMT_PLANAR##Rotate##ROT##To##PN##N) { \
const int kWidth = W1280; \
const int kHeight = benchmark_height_; \
const int kSizeUV = \
SUBSAMPLE(kWidth, SRC_SUBSAMP_X) * SUBSAMPLE(kHeight, SRC_SUBSAMP_Y); \
align_buffer_page_end(src_y, kWidth* kHeight + OFF); \
align_buffer_page_end(src_uv, \
kSizeUV*((PIXEL_STRIDE == 3) ? 3 : 2) + OFF); \
align_buffer_page_end(dst_y_c, kWidth* kHeight); \
align_buffer_page_end(dst_u_c, SUBSAMPLE(kWidth, SUBSAMP_X) * \
SUBSAMPLE(kHeight, SUBSAMP_Y)); \
align_buffer_page_end(dst_v_c, SUBSAMPLE(kWidth, SUBSAMP_X) * \
SUBSAMPLE(kHeight, SUBSAMP_Y)); \
align_buffer_page_end(dst_y_opt, kWidth* kHeight); \
align_buffer_page_end(dst_u_opt, SUBSAMPLE(kWidth, SUBSAMP_X) * \
SUBSAMPLE(kHeight, SUBSAMP_Y)); \
align_buffer_page_end(dst_v_opt, SUBSAMPLE(kWidth, SUBSAMP_X) * \
SUBSAMPLE(kHeight, SUBSAMP_Y)); \
uint8_t* src_u = src_uv + OFF_U; \
uint8_t* src_v = src_uv + (PIXEL_STRIDE == 1 ? kSizeUV : OFF_V); \
int src_stride_uv = SUBSAMPLE(kWidth, SUBSAMP_X) * PIXEL_STRIDE; \
for (int i = 0; i < kHeight; ++i) \
for (int j = 0; j < kWidth; ++j) \
src_y[i * kWidth + j + OFF] = (fastrand() & 0xff); \
for (int i = 0; i < SUBSAMPLE(kHeight, SRC_SUBSAMP_Y); ++i) { \
for (int j = 0; j < SUBSAMPLE(kWidth, SRC_SUBSAMP_X); ++j) { \
src_u[(i * src_stride_uv) + j * PIXEL_STRIDE + OFF] = \
(fastrand() & 0xff); \
src_v[(i * src_stride_uv) + j * PIXEL_STRIDE + OFF] = \
(fastrand() & 0xff); \
} \
} \
memset(dst_y_c, 1, kWidth* kHeight); \
memset(dst_u_c, 2, \
SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
memset(dst_v_c, 3, \
SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
memset(dst_y_opt, 101, kWidth* kHeight); \
memset(dst_u_opt, 102, \
SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
memset(dst_v_opt, 103, \
SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
MaskCpuFlags(disable_cpu_flags_); \
SRC_FMT_PLANAR##To##FMT_PLANAR##Rotate( \
src_y + OFF, kWidth, src_u + OFF, SUBSAMPLE(kWidth, SRC_SUBSAMP_X), \
src_v + OFF, SUBSAMPLE(kWidth, SRC_SUBSAMP_X), PIXEL_STRIDE, dst_y_c, \
kWidth, dst_u_c, SUBSAMPLE(kWidth, SUBSAMP_X), dst_v_c, \
SUBSAMPLE(kWidth, SUBSAMP_X), kWidth, NEG kHeight, \
(libyuv::RotationMode)ROT); \
MaskCpuFlags(benchmark_cpu_info_); \
for (int i = 0; i < benchmark_iterations_; ++i) { \
SRC_FMT_PLANAR##To##FMT_PLANAR##Rotate( \
src_y + OFF, kWidth, src_u + OFF, SUBSAMPLE(kWidth, SRC_SUBSAMP_X), \
src_v + OFF, SUBSAMPLE(kWidth, SRC_SUBSAMP_X), PIXEL_STRIDE, \
dst_y_opt, kWidth, dst_u_opt, SUBSAMPLE(kWidth, SUBSAMP_X), \
dst_v_opt, SUBSAMPLE(kWidth, SUBSAMP_X), kWidth, NEG kHeight, \
(libyuv::RotationMode)ROT); \
} \
for (int i = 0; i < kHeight; ++i) { \
for (int j = 0; j < kWidth; ++j) { \
EXPECT_EQ(dst_y_c[i * kWidth + j], dst_y_opt[i * kWidth + j]); \
} \
} \
for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) { \
for (int j = 0; j < SUBSAMPLE(kWidth, SUBSAMP_X); ++j) { \
EXPECT_EQ(dst_u_c[i * SUBSAMPLE(kWidth, SUBSAMP_X) + j], \
dst_u_opt[i * SUBSAMPLE(kWidth, SUBSAMP_X) + j]); \
} \
} \
for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) { \
for (int j = 0; j < SUBSAMPLE(kWidth, SUBSAMP_X); ++j) { \
EXPECT_EQ(dst_v_c[i * SUBSAMPLE(kWidth, SUBSAMP_X) + j], \
dst_v_opt[i * SUBSAMPLE(kWidth, SUBSAMP_X) + j]); \
} \
} \
free_aligned_buffer_page_end(dst_y_c); \
free_aligned_buffer_page_end(dst_u_c); \
free_aligned_buffer_page_end(dst_v_c); \
free_aligned_buffer_page_end(dst_y_opt); \
free_aligned_buffer_page_end(dst_u_opt); \
free_aligned_buffer_page_end(dst_v_opt); \
free_aligned_buffer_page_end(src_y); \
free_aligned_buffer_page_end(src_uv); \
}
#define TESTAPLANARTOP(SRC_FMT_PLANAR, PN, PIXEL_STRIDE, OFF_U, OFF_V, \
SRC_SUBSAMP_X, SRC_SUBSAMP_Y, FMT_PLANAR, SUBSAMP_X, \
SUBSAMP_Y) \
TESTAPLANARTOPI(SRC_FMT_PLANAR, PIXEL_STRIDE, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \
FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, benchmark_width_ + 1, \
_Any, +, 0, PN, OFF_U, OFF_V, 0) \
TESTAPLANARTOPI(SRC_FMT_PLANAR, PIXEL_STRIDE, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \
FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, benchmark_width_, \
_Unaligned, +, 2, PN, OFF_U, OFF_V, 0) \
TESTAPLANARTOPI(SRC_FMT_PLANAR, PIXEL_STRIDE, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \
FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, benchmark_width_, _Invert, \
-, 0, PN, OFF_U, OFF_V, 0) \
TESTAPLANARTOPI(SRC_FMT_PLANAR, PIXEL_STRIDE, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \
FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, benchmark_width_, _Opt, +, \
0, PN, OFF_U, OFF_V, 0) \
TESTAPLANARTOPI(SRC_FMT_PLANAR, PIXEL_STRIDE, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \
FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, benchmark_width_, _Opt, +, \
0, PN, OFF_U, OFF_V, 180)
TESTAPLANARTOP(Android420, I420, 1, 0, 0, 2, 2, I420, 2, 2)
TESTAPLANARTOP(Android420, NV12, 2, 0, 1, 2, 2, I420, 2, 2)
TESTAPLANARTOP(Android420, NV21, 2, 1, 0, 2, 2, I420, 2, 2)
#undef TESTAPLANARTOP
#undef TESTAPLANARTOPI
} // namespace libyuv

View File

@ -22,6 +22,12 @@ namespace libyuv {
#define STRINGIZE(line) #line
#define FILELINESTR(file, line) file ":" STRINGIZE(line)
#if !defined(DISABLE_SLOW_TESTS) || defined(__x86_64__) || defined(__i386__)
// SLOW TESTS are those that are unoptimized C code.
// FULL TESTS are optimized but test many variations of the same code.
#define ENABLE_FULL_TESTS
#endif
// Test scaling with C vs Opt and return maximum pixel difference. 0 = exact.
static int ARGBTestFilter(int src_width,
int src_height,
@ -251,23 +257,30 @@ static int ARGBClipTestFilter(int src_width,
// Test a scale factor with all 4 filters. Expect unfiltered to be exact, but
// filtering is different fixed point implementations for SSSE3, Neon and C.
#ifdef ENABLE_SLOW_TESTS
#ifndef DISABLE_SLOW_TESTS
#define TEST_FACTOR(name, nom, denom) \
TEST_FACTOR1(, name, None, nom, denom, 0) \
TEST_FACTOR1(, name, Linear, nom, denom, 3) \
TEST_FACTOR1(, name, Bilinear, nom, denom, 3) \
TEST_FACTOR1(, name, Box, nom, denom, 3)
#else
#if defined(ENABLE_FULL_TESTS)
#define TEST_FACTOR(name, nom, denom) \
TEST_FACTOR1(DISABLED_, name, None, nom, denom, 0) \
TEST_FACTOR1(DISABLED_, name, Linear, nom, denom, 3) \
TEST_FACTOR1(DISABLED_, name, Bilinear, nom, denom, 3) \
TEST_FACTOR1(DISABLED_, name, Box, nom, denom, 3)
#else
#define TEST_FACTOR(name, nom, denom) \
TEST_FACTOR1(DISABLED_, name, Bilinear, nom, denom, 3)
#endif
#endif
TEST_FACTOR(2, 1, 2)
TEST_FACTOR(4, 1, 4)
// TEST_FACTOR(8, 1, 8) Disable for benchmark performance.
#ifndef DISABLE_SLOW_TESTS
TEST_FACTOR(8, 1, 8)
#endif
TEST_FACTOR(3by4, 3, 4)
TEST_FACTOR(3by8, 3, 8)
TEST_FACTOR(3, 1, 3)
@ -305,28 +318,33 @@ TEST_FACTOR(3, 1, 3)
EXPECT_LE(diff, max_diff); \
}
/// Test scale to a specified size with all 4 filters.
#ifdef ENABLE_SLOW_TESTS
#ifndef DISABLE_SLOW_TESTS
// Test scale to a specified size with all 4 filters.
#define TEST_SCALETO(name, width, height) \
TEST_SCALETO1(, name, width, height, None, 0) \
TEST_SCALETO1(, name, width, height, Linear, 3) \
TEST_SCALETO1(, name, width, height, Bilinear, 3)
#else
#if defined(ENABLE_FULL_TESTS)
#define TEST_SCALETO(name, width, height) \
TEST_SCALETO1(DISABLED_, name, width, height, None, 0) \
TEST_SCALETO1(DISABLED_, name, width, height, Linear, 3) \
TEST_SCALETO1(DISABLED_, name, width, height, Bilinear, 3)
#else
#define TEST_SCALETO(name, width, height) \
TEST_SCALETO1(DISABLED_, name, width, height, Bilinear, 3)
#endif
#endif
TEST_SCALETO(ARGBScale, 1, 1)
TEST_SCALETO(ARGBScale, 256, 144) /* 128x72 * 2 */
TEST_SCALETO(ARGBScale, 320, 240)
TEST_SCALETO(ARGBScale, 569, 480)
TEST_SCALETO(ARGBScale, 640, 360)
#ifdef ENABLE_SLOW_TESTS
#ifndef DISABLE_SLOW_TESTS
TEST_SCALETO(ARGBScale, 256, 144) /* 128x72 * 2 */
TEST_SCALETO(ARGBScale, 320, 240)
TEST_SCALETO(ARGBScale, 1280, 720)
TEST_SCALETO(ARGBScale, 1920, 1080)
#endif // ENABLE_SLOW_TESTS
#endif // DISABLE_SLOW_TESTS
#undef TEST_SCALETO1
#undef TEST_SCALETO
@ -339,10 +357,14 @@ TEST_SCALETO(ARGBScale, 1920, 1080)
EXPECT_LE(diff, max_diff); \
}
#if defined(ENABLE_FULL_TESTS)
// Test scale with swapped width and height with all 3 filters.
TEST_SCALESWAPXY1(ARGBScale, None, 0)
TEST_SCALESWAPXY1(ARGBScale, Linear, 0)
TEST_SCALESWAPXY1(ARGBScale, Bilinear, 0)
#else
TEST_SCALESWAPXY1(ARGBScale, Bilinear, 0)
#endif
#undef TEST_SCALESWAPXY1
// Scale with YUV conversion to ARGB and clipping.
@ -490,11 +512,11 @@ TEST_F(LibYUVScaleTest, YUVToRGBScaleDown) {
}
TEST_F(LibYUVScaleTest, ARGBTest3x) {
const int kSrcStride = 48 * 4;
const int kDstStride = 16 * 4;
const int kSrcStride = 480 * 4;
const int kDstStride = 160 * 4;
const int kSize = kSrcStride * 3;
align_buffer_page_end(orig_pixels, kSize);
for (int i = 0; i < 48 * 3; ++i) {
for (int i = 0; i < 480 * 3; ++i) {
orig_pixels[i * 4 + 0] = i;
orig_pixels[i * 4 + 1] = 255 - i;
orig_pixels[i * 4 + 2] = i + 1;
@ -502,36 +524,36 @@ TEST_F(LibYUVScaleTest, ARGBTest3x) {
}
align_buffer_page_end(dest_pixels, kDstStride);
int iterations16 =
benchmark_width_ * benchmark_height_ / (16 * 1) * benchmark_iterations_;
for (int i = 0; i < iterations16; ++i) {
ARGBScale(orig_pixels, kSrcStride, 48, 3, dest_pixels, kDstStride, 16, 1,
int iterations160 = (benchmark_width_ * benchmark_height_ + (160 - 1)) / 160 *
benchmark_iterations_;
for (int i = 0; i < iterations160; ++i) {
ARGBScale(orig_pixels, kSrcStride, 480, 3, dest_pixels, kDstStride, 160, 1,
kFilterBilinear);
}
EXPECT_EQ(49, dest_pixels[0]);
EXPECT_EQ(255 - 49, dest_pixels[1]);
EXPECT_EQ(50, dest_pixels[2]);
EXPECT_EQ(59, dest_pixels[3]);
EXPECT_EQ(225, dest_pixels[0]);
EXPECT_EQ(255 - 225, dest_pixels[1]);
EXPECT_EQ(226, dest_pixels[2]);
EXPECT_EQ(235, dest_pixels[3]);
ARGBScale(orig_pixels, kSrcStride, 48, 3, dest_pixels, kDstStride, 16, 1,
ARGBScale(orig_pixels, kSrcStride, 480, 3, dest_pixels, kDstStride, 160, 1,
kFilterNone);
EXPECT_EQ(49, dest_pixels[0]);
EXPECT_EQ(255 - 49, dest_pixels[1]);
EXPECT_EQ(50, dest_pixels[2]);
EXPECT_EQ(59, dest_pixels[3]);
EXPECT_EQ(225, dest_pixels[0]);
EXPECT_EQ(255 - 225, dest_pixels[1]);
EXPECT_EQ(226, dest_pixels[2]);
EXPECT_EQ(235, dest_pixels[3]);
free_aligned_buffer_page_end(dest_pixels);
free_aligned_buffer_page_end(orig_pixels);
}
TEST_F(LibYUVScaleTest, ARGBTest4x) {
const int kSrcStride = 64 * 4;
const int kDstStride = 16 * 4;
const int kSrcStride = 640 * 4;
const int kDstStride = 160 * 4;
const int kSize = kSrcStride * 4;
align_buffer_page_end(orig_pixels, kSize);
for (int i = 0; i < 64 * 4; ++i) {
for (int i = 0; i < 640 * 4; ++i) {
orig_pixels[i * 4 + 0] = i;
orig_pixels[i * 4 + 1] = 255 - i;
orig_pixels[i * 4 + 2] = i + 1;
@ -539,26 +561,25 @@ TEST_F(LibYUVScaleTest, ARGBTest4x) {
}
align_buffer_page_end(dest_pixels, kDstStride);
int iterations16 =
benchmark_width_ * benchmark_height_ / (16 * 1) * benchmark_iterations_;
for (int i = 0; i < iterations16; ++i) {
ARGBScale(orig_pixels, kSrcStride, 64, 4, dest_pixels, kDstStride, 16, 1,
int iterations160 = (benchmark_width_ * benchmark_height_ + (160 - 1)) / 160 *
benchmark_iterations_;
for (int i = 0; i < iterations160; ++i) {
ARGBScale(orig_pixels, kSrcStride, 640, 4, dest_pixels, kDstStride, 160, 1,
kFilterBilinear);
}
EXPECT_NEAR((65 + 66 + 129 + 130 + 2) / 4, dest_pixels[0], 4);
EXPECT_NEAR((255 - 65 + 255 - 66 + 255 - 129 + 255 - 130 + 2) / 4,
dest_pixels[1], 4);
EXPECT_NEAR((1 * 4 + 65 + 66 + 129 + 130 + 2) / 4, dest_pixels[2], 4);
EXPECT_NEAR((10 * 4 + 65 + 66 + 129 + 130 + 2) / 4, dest_pixels[3], 4);
EXPECT_NEAR(66, dest_pixels[0], 4);
EXPECT_NEAR(255 - 66, dest_pixels[1], 4);
EXPECT_NEAR(67, dest_pixels[2], 4);
EXPECT_NEAR(76, dest_pixels[3], 4);
ARGBScale(orig_pixels, kSrcStride, 64, 4, dest_pixels, kDstStride, 16, 1,
ARGBScale(orig_pixels, kSrcStride, 640, 4, dest_pixels, kDstStride, 160, 1,
kFilterNone);
EXPECT_EQ(130, dest_pixels[0]);
EXPECT_EQ(255 - 130, dest_pixels[1]);
EXPECT_EQ(130 + 1, dest_pixels[2]);
EXPECT_EQ(130 + 10, dest_pixels[3]);
EXPECT_EQ(2, dest_pixels[0]);
EXPECT_EQ(255 - 2, dest_pixels[1]);
EXPECT_EQ(3, dest_pixels[2]);
EXPECT_EQ(12, dest_pixels[3]);
free_aligned_buffer_page_end(dest_pixels);
free_aligned_buffer_page_end(orig_pixels);

View File

@ -0,0 +1,280 @@
/*
* Copyright 2022 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include <stdlib.h>
#include <time.h>
#include "../unit_test/unit_test.h"
#include "libyuv/cpu_id.h"
#include "libyuv/scale_rgb.h"
namespace libyuv {
#define STRINGIZE(line) #line
#define FILELINESTR(file, line) file ":" STRINGIZE(line)
#if !defined(DISABLE_SLOW_TESTS) || defined(__x86_64__) || defined(__i386__)
// SLOW TESTS are those that are unoptimized C code.
// FULL TESTS are optimized but test many variations of the same code.
#define ENABLE_FULL_TESTS
#endif
// Test scaling with C vs Opt and return maximum pixel difference. 0 = exact.
static int RGBTestFilter(int src_width,
int src_height,
int dst_width,
int dst_height,
FilterMode f,
int benchmark_iterations,
int disable_cpu_flags,
int benchmark_cpu_info) {
if (!SizeValid(src_width, src_height, dst_width, dst_height)) {
return 0;
}
int i, j;
const int b = 0; // 128 to test for padding/stride.
int64_t src_rgb_plane_size =
(Abs(src_width) + b * 3) * (Abs(src_height) + b * 3) * 3LL;
int src_stride_rgb = (b * 3 + Abs(src_width)) * 3;
align_buffer_page_end(src_rgb, src_rgb_plane_size);
if (!src_rgb) {
printf("Skipped. Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n");
return 0;
}
MemRandomize(src_rgb, src_rgb_plane_size);
int64_t dst_rgb_plane_size = (dst_width + b * 3) * (dst_height + b * 3) * 3LL;
int dst_stride_rgb = (b * 3 + dst_width) * 3;
align_buffer_page_end(dst_rgb_c, dst_rgb_plane_size);
align_buffer_page_end(dst_rgb_opt, dst_rgb_plane_size);
if (!dst_rgb_c || !dst_rgb_opt) {
printf("Skipped. Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n");
return 0;
}
memset(dst_rgb_c, 2, dst_rgb_plane_size);
memset(dst_rgb_opt, 3, dst_rgb_plane_size);
// Warm up both versions for consistent benchmarks.
MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization.
RGBScale(src_rgb + (src_stride_rgb * b) + b * 3, src_stride_rgb, src_width,
src_height, dst_rgb_c + (dst_stride_rgb * b) + b * 3, dst_stride_rgb,
dst_width, dst_height, f);
MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization.
RGBScale(src_rgb + (src_stride_rgb * b) + b * 3, src_stride_rgb, src_width,
src_height, dst_rgb_opt + (dst_stride_rgb * b) + b * 3,
dst_stride_rgb, dst_width, dst_height, f);
MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization.
double c_time = get_time();
RGBScale(src_rgb + (src_stride_rgb * b) + b * 3, src_stride_rgb, src_width,
src_height, dst_rgb_c + (dst_stride_rgb * b) + b * 3, dst_stride_rgb,
dst_width, dst_height, f);
c_time = (get_time() - c_time);
MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization.
double opt_time = get_time();
for (i = 0; i < benchmark_iterations; ++i) {
RGBScale(src_rgb + (src_stride_rgb * b) + b * 3, src_stride_rgb, src_width,
src_height, dst_rgb_opt + (dst_stride_rgb * b) + b * 3,
dst_stride_rgb, dst_width, dst_height, f);
}
opt_time = (get_time() - opt_time) / benchmark_iterations;
// Report performance of C vs OPT
printf("filter %d - %8d us C - %8d us OPT\n", f,
static_cast<int>(c_time * 1e6), static_cast<int>(opt_time * 1e6));
// C version may be a little off from the optimized. Order of
// operations may introduce rounding somewhere. So do a difference
// of the buffers and look to see that the max difference isn't
// over 2.
int max_diff = 0;
for (i = b; i < (dst_height + b); ++i) {
for (j = b * 3; j < (dst_width + b) * 3; ++j) {
int abs_diff = Abs(dst_rgb_c[(i * dst_stride_rgb) + j] -
dst_rgb_opt[(i * dst_stride_rgb) + j]);
if (abs_diff > max_diff) {
max_diff = abs_diff;
}
}
}
free_aligned_buffer_page_end(dst_rgb_c);
free_aligned_buffer_page_end(dst_rgb_opt);
free_aligned_buffer_page_end(src_rgb);
return max_diff;
}
// The following adjustments in dimensions ensure the scale factor will be
// exactly achieved.
#define DX(x, nom, denom) static_cast<int>((Abs(x) / nom) * nom)
#define SX(x, nom, denom) static_cast<int>((x / nom) * denom)
#define TEST_FACTOR1(name, filter, nom, denom, max_diff) \
TEST_F(LibYUVScaleTest, RGBScaleDownBy##name##_##filter) { \
int diff = RGBTestFilter( \
SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom), \
DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom), \
kFilter##filter, benchmark_iterations_, disable_cpu_flags_, \
benchmark_cpu_info_); \
EXPECT_LE(diff, max_diff); \
}
#if defined(ENABLE_FULL_TESTS)
// Test a scale factor with all 4 filters. Expect unfiltered to be exact, but
// filtering is different fixed point implementations for SSSE3, Neon and C.
#define TEST_FACTOR(name, nom, denom) \
TEST_FACTOR1(name, None, nom, denom, 0) \
TEST_FACTOR1(name, Linear, nom, denom, 3) \
TEST_FACTOR1(name, Bilinear, nom, denom, 3) \
TEST_FACTOR1(name, Box, nom, denom, 3)
#else
// Test a scale factor with Bilinear.
#define TEST_FACTOR(name, nom, denom) \
TEST_FACTOR1(name, Bilinear, nom, denom, 3)
#endif
TEST_FACTOR(2, 1, 2)
#ifndef DISABLE_SLOW_TESTS
TEST_FACTOR(4, 1, 4)
// TEST_FACTOR(8, 1, 8) Disable for benchmark performance.
TEST_FACTOR(3by4, 3, 4)
TEST_FACTOR(3by8, 3, 8)
TEST_FACTOR(3, 1, 3)
#endif
#undef TEST_FACTOR1
#undef TEST_FACTOR
#undef SX
#undef DX
#define TEST_SCALETO1(name, width, height, filter, max_diff) \
TEST_F(LibYUVScaleTest, name##To##width##x##height##_##filter) { \
int diff = RGBTestFilter(benchmark_width_, benchmark_height_, width, \
height, kFilter##filter, benchmark_iterations_, \
disable_cpu_flags_, benchmark_cpu_info_); \
EXPECT_LE(diff, max_diff); \
} \
TEST_F(LibYUVScaleTest, name##From##width##x##height##_##filter) { \
int diff = RGBTestFilter(width, height, Abs(benchmark_width_), \
Abs(benchmark_height_), kFilter##filter, \
benchmark_iterations_, disable_cpu_flags_, \
benchmark_cpu_info_); \
EXPECT_LE(diff, max_diff); \
}
#if defined(ENABLE_FULL_TESTS)
/// Test scale to a specified size with all 4 filters.
#define TEST_SCALETO(name, width, height) \
TEST_SCALETO1(name, width, height, None, 0) \
TEST_SCALETO1(name, width, height, Linear, 3) \
TEST_SCALETO1(name, width, height, Bilinear, 3)
#else
#define TEST_SCALETO(name, width, height) \
TEST_SCALETO1(name, width, height, Bilinear, 3)
#endif
TEST_SCALETO(RGBScale, 640, 360)
#ifndef DISABLE_SLOW_TESTS
TEST_SCALETO(RGBScale, 1, 1)
TEST_SCALETO(RGBScale, 256, 144) /* 128x72 * 3 */
TEST_SCALETO(RGBScale, 320, 240)
TEST_SCALETO(RGBScale, 569, 480)
TEST_SCALETO(RGBScale, 1280, 720)
TEST_SCALETO(RGBScale, 1920, 1080)
#endif // DISABLE_SLOW_TESTS
#undef TEST_SCALETO1
#undef TEST_SCALETO
#define TEST_SCALESWAPXY1(name, filter, max_diff) \
TEST_F(LibYUVScaleTest, name##SwapXY_##filter) { \
int diff = RGBTestFilter(benchmark_width_, benchmark_height_, \
benchmark_height_, benchmark_width_, \
kFilter##filter, benchmark_iterations_, \
disable_cpu_flags_, benchmark_cpu_info_); \
EXPECT_LE(diff, max_diff); \
}
#if defined(ENABLE_FULL_TESTS)
// Test scale with swapped width and height with all 3 filters.
TEST_SCALESWAPXY1(RGBScale, None, 0)
TEST_SCALESWAPXY1(RGBScale, Linear, 0)
TEST_SCALESWAPXY1(RGBScale, Bilinear, 0)
#else
TEST_SCALESWAPXY1(RGBScale, Bilinear, 0)
#endif
#undef TEST_SCALESWAPXY1
TEST_F(LibYUVScaleTest, RGBTest3x) {
const int kSrcStride = 480 * 3;
const int kDstStride = 160 * 3;
const int kSize = kSrcStride * 3;
align_buffer_page_end(orig_pixels, kSize);
for (int i = 0; i < 480 * 3; ++i) {
orig_pixels[i * 3 + 0] = i;
orig_pixels[i * 3 + 1] = 255 - i;
}
align_buffer_page_end(dest_pixels, kDstStride);
int iterations160 = (benchmark_width_ * benchmark_height_ + (160 - 1)) / 160 *
benchmark_iterations_;
for (int i = 0; i < iterations160; ++i) {
RGBScale(orig_pixels, kSrcStride, 480, 3, dest_pixels, kDstStride, 160, 1,
kFilterBilinear);
}
EXPECT_EQ(225, dest_pixels[0]);
EXPECT_EQ(255 - 225, dest_pixels[1]);
RGBScale(orig_pixels, kSrcStride, 480, 3, dest_pixels, kDstStride, 160, 1,
kFilterNone);
EXPECT_EQ(225, dest_pixels[0]);
EXPECT_EQ(255 - 225, dest_pixels[1]);
free_aligned_buffer_page_end(dest_pixels);
free_aligned_buffer_page_end(orig_pixels);
}
TEST_F(LibYUVScaleTest, RGBTest4x) {
const int kSrcStride = 640 * 3;
const int kDstStride = 160 * 3;
const int kSize = kSrcStride * 4;
align_buffer_page_end(orig_pixels, kSize);
for (int i = 0; i < 640 * 4; ++i) {
orig_pixels[i * 3 + 0] = i;
orig_pixels[i * 3 + 1] = 255 - i;
}
align_buffer_page_end(dest_pixels, kDstStride);
int iterations160 = (benchmark_width_ * benchmark_height_ + (160 - 1)) / 160 *
benchmark_iterations_;
for (int i = 0; i < iterations160; ++i) {
RGBScale(orig_pixels, kSrcStride, 640, 4, dest_pixels, kDstStride, 160, 1,
kFilterBilinear);
}
EXPECT_EQ(66, dest_pixels[0]);
EXPECT_EQ(190, dest_pixels[1]);
RGBScale(orig_pixels, kSrcStride, 64, 4, dest_pixels, kDstStride, 16, 1,
kFilterNone);
EXPECT_EQ(2, dest_pixels[0]); // expect the 3rd pixel of the 3rd row
EXPECT_EQ(255 - 2, dest_pixels[1]);
free_aligned_buffer_page_end(dest_pixels);
free_aligned_buffer_page_end(orig_pixels);
}
} // namespace libyuv

View File

@ -22,6 +22,12 @@
#define STRINGIZE(line) #line
#define FILELINESTR(file, line) file ":" STRINGIZE(line)
#if !defined(DISABLE_SLOW_TESTS) || defined(__x86_64__) || defined(__i386__)
// SLOW TESTS are those that are unoptimized C code.
// FULL TESTS are optimized but test many variations of the same code.
#define ENABLE_FULL_TESTS
#endif
namespace libyuv {
// Test scaling with C vs Opt and return maximum pixel difference. 0 = exact.
@ -882,23 +888,31 @@ static int NV12TestFilter(int src_width,
// Test a scale factor with all 4 filters. Expect unfiltered to be exact, but
// filtering is different fixed point implementations for SSSE3, Neon and C.
#ifdef ENABLE_SLOW_TESTS
#ifndef DISABLE_SLOW_TESTS
#define TEST_FACTOR(name, nom, denom, boxdiff) \
TEST_FACTOR1(, name, None, nom, denom, 0) \
TEST_FACTOR1(, name, Linear, nom, denom, 3) \
TEST_FACTOR1(, name, Bilinear, nom, denom, 3) \
TEST_FACTOR1(, name, Box, nom, denom, boxdiff)
#else
#if defined(ENABLE_FULL_TESTS)
#define TEST_FACTOR(name, nom, denom, boxdiff) \
TEST_FACTOR1(DISABLED_, name, None, nom, denom, 0) \
TEST_FACTOR1(DISABLED_, name, Linear, nom, denom, 3) \
TEST_FACTOR1(DISABLED_, name, Bilinear, nom, denom, 3) \
TEST_FACTOR1(DISABLED_, name, Box, nom, denom, boxdiff)
#else
#define TEST_FACTOR(name, nom, denom, boxdiff) \
TEST_FACTOR1(DISABLED_, name, Bilinear, nom, denom, 3) \
TEST_FACTOR1(DISABLED_, name, Box, nom, denom, boxdiff)
#endif
#endif
TEST_FACTOR(2, 1, 2, 0)
TEST_FACTOR(4, 1, 4, 0)
// TEST_FACTOR(8, 1, 8, 0) Disable for benchmark performance. Takes 90 seconds.
#ifndef DISABLE_SLOW_TESTS
TEST_FACTOR(8, 1, 8, 0)
#endif
TEST_FACTOR(3by4, 3, 4, 1)
TEST_FACTOR(3by8, 3, 8, 1)
TEST_FACTOR(3, 1, 3, 0)
@ -1008,7 +1022,7 @@ TEST_FACTOR(3, 1, 3, 0)
EXPECT_LE(diff, max_diff); \
}
#ifdef ENABLE_SLOW_TESTS
#ifndef DISABLE_SLOW_TESTS
// Test scale to a specified size with all 4 filters.
#define TEST_SCALETO(name, width, height) \
TEST_SCALETO1(, name, width, height, None, 0) \
@ -1016,23 +1030,28 @@ TEST_FACTOR(3, 1, 3, 0)
TEST_SCALETO1(, name, width, height, Bilinear, 3) \
TEST_SCALETO1(, name, width, height, Box, 3)
#else
// Test scale to a specified size with all 4 filters.
#if defined(ENABLE_FULL_TESTS)
#define TEST_SCALETO(name, width, height) \
TEST_SCALETO1(DISABLED_, name, width, height, None, 0) \
TEST_SCALETO1(DISABLED_, name, width, height, Linear, 3) \
TEST_SCALETO1(DISABLED_, name, width, height, Bilinear, 3) \
TEST_SCALETO1(DISABLED_, name, width, height, Box, 3)
#else
#define TEST_SCALETO(name, width, height) \
TEST_SCALETO1(DISABLED_, name, width, height, Bilinear, 3) \
TEST_SCALETO1(DISABLED_, name, width, height, Box, 3)
#endif
#endif
TEST_SCALETO(Scale, 1, 1)
TEST_SCALETO(Scale, 256, 144) /* 128x72 * 2 */
TEST_SCALETO(Scale, 320, 240)
TEST_SCALETO(Scale, 569, 480)
TEST_SCALETO(Scale, 640, 360)
#ifndef DISABLE_SLOW_TESTS
TEST_SCALETO(Scale, 256, 144) /* 128x72 * 2 */
TEST_SCALETO(Scale, 320, 240)
TEST_SCALETO(Scale, 1280, 720)
#ifdef ENABLE_SLOW_TESTS
TEST_SCALETO(Scale, 1920, 1080)
#endif // ENABLE_SLOW_TESTS
#endif // DISABLE_SLOW_TESTS
#undef TEST_SCALETO1
#undef TEST_SCALETO
@ -1088,16 +1107,21 @@ TEST_SCALETO(Scale, 1920, 1080)
}
// Test scale to a specified size with all 4 filters.
#ifdef ENABLE_SLOW_TESTS
#ifndef DISABLE_SLOW_TESTS
TEST_SCALESWAPXY1(, Scale, None, 0)
TEST_SCALESWAPXY1(, Scale, Linear, 3)
TEST_SCALESWAPXY1(, Scale, Bilinear, 3)
TEST_SCALESWAPXY1(, Scale, Box, 3)
#else
#if defined(ENABLE_FULL_TESTS)
TEST_SCALESWAPXY1(DISABLED_, Scale, None, 0)
TEST_SCALESWAPXY1(DISABLED_, Scale, Linear, 3)
TEST_SCALESWAPXY1(DISABLED_, Scale, Bilinear, 3)
TEST_SCALESWAPXY1(DISABLED_, Scale, Box, 3)
#else
TEST_SCALESWAPXY1(DISABLED_, Scale, Bilinear, 3)
TEST_SCALESWAPXY1(DISABLED_, Scale, Box, 3)
#endif
#endif
#undef TEST_SCALESWAPXY1
@ -1197,10 +1221,6 @@ extern "C" void ScaleRowUp2_16_NEON(const uint16_t* src_ptr,
ptrdiff_t src_stride,
uint16_t* dst,
int dst_width);
extern "C" void ScaleRowUp2_16_MMI(const uint16_t* src_ptr,
ptrdiff_t src_stride,
uint16_t* dst,
int dst_width);
extern "C" void ScaleRowUp2_16_C(const uint16_t* src_ptr,
ptrdiff_t src_stride,
uint16_t* dst,
@ -1227,13 +1247,6 @@ TEST_F(LibYUVScaleTest, TestScaleRowUp2_16) {
} else {
ScaleRowUp2_16_C(&orig_pixels[0], 640, &dst_pixels_opt[0], 1280);
}
#elif !defined(LIBYUV_DISABLE_MMI) && defined(_MIPS_ARCH_LOONGSON3A)
int has_mmi = TestCpuFlag(kCpuHasMMI);
if (has_mmi) {
ScaleRowUp2_16_MMI(&orig_pixels[0], 640, &dst_pixels_opt[0], 1280);
} else {
ScaleRowUp2_16_C(&orig_pixels[0], 640, &dst_pixels_opt[0], 1280);
}
#else
ScaleRowUp2_16_C(&orig_pixels[0], 640, &dst_pixels_opt[0], 1280);
#endif
@ -1385,56 +1398,56 @@ TEST_FACTOR(3, 1, 3, 0)
#undef DX
TEST_F(LibYUVScaleTest, PlaneTest3x) {
const int kSrcStride = 48;
const int kDstStride = 16;
const int kSrcStride = 480;
const int kDstStride = 160;
const int kSize = kSrcStride * 3;
align_buffer_page_end(orig_pixels, kSize);
for (int i = 0; i < 48 * 3; ++i) {
for (int i = 0; i < 480 * 3; ++i) {
orig_pixels[i] = i;
}
align_buffer_page_end(dest_pixels, kDstStride);
int iterations16 =
benchmark_width_ * benchmark_height_ / (16 * 1) * benchmark_iterations_;
for (int i = 0; i < iterations16; ++i) {
ScalePlane(orig_pixels, kSrcStride, 48, 3, dest_pixels, kDstStride, 16, 1,
int iterations160 = (benchmark_width_ * benchmark_height_ + (160 - 1)) / 160 *
benchmark_iterations_;
for (int i = 0; i < iterations160; ++i) {
ScalePlane(orig_pixels, kSrcStride, 480, 3, dest_pixels, kDstStride, 160, 1,
kFilterBilinear);
}
EXPECT_EQ(49, dest_pixels[0]);
EXPECT_EQ(225, dest_pixels[0]);
ScalePlane(orig_pixels, kSrcStride, 48, 3, dest_pixels, kDstStride, 16, 1,
ScalePlane(orig_pixels, kSrcStride, 480, 3, dest_pixels, kDstStride, 160, 1,
kFilterNone);
EXPECT_EQ(49, dest_pixels[0]);
EXPECT_EQ(225, dest_pixels[0]);
free_aligned_buffer_page_end(dest_pixels);
free_aligned_buffer_page_end(orig_pixels);
}
TEST_F(LibYUVScaleTest, PlaneTest4x) {
const int kSrcStride = 64;
const int kDstStride = 16;
const int kSrcStride = 640;
const int kDstStride = 160;
const int kSize = kSrcStride * 4;
align_buffer_page_end(orig_pixels, kSize);
for (int i = 0; i < 64 * 4; ++i) {
for (int i = 0; i < 640 * 4; ++i) {
orig_pixels[i] = i;
}
align_buffer_page_end(dest_pixels, kDstStride);
int iterations16 =
benchmark_width_ * benchmark_height_ / (16 * 1) * benchmark_iterations_;
for (int i = 0; i < iterations16; ++i) {
ScalePlane(orig_pixels, kSrcStride, 64, 4, dest_pixels, kDstStride, 16, 1,
int iterations160 = (benchmark_width_ * benchmark_height_ + (160 - 1)) / 160 *
benchmark_iterations_;
for (int i = 0; i < iterations160; ++i) {
ScalePlane(orig_pixels, kSrcStride, 640, 4, dest_pixels, kDstStride, 160, 1,
kFilterBilinear);
}
EXPECT_EQ((65 + 66 + 129 + 130 + 2) / 4, dest_pixels[0]);
EXPECT_EQ(66, dest_pixels[0]);
ScalePlane(orig_pixels, kSrcStride, 64, 4, dest_pixels, kDstStride, 16, 1,
ScalePlane(orig_pixels, kSrcStride, 640, 4, dest_pixels, kDstStride, 160, 1,
kFilterNone);
EXPECT_EQ(130, dest_pixels[0]); // expect the 3rd pixel of the 3rd row
EXPECT_EQ(2, dest_pixels[0]); // expect the 3rd pixel of the 3rd row
free_aligned_buffer_page_end(dest_pixels);
free_aligned_buffer_page_end(orig_pixels);
@ -1532,4 +1545,57 @@ TEST_F(LibYUVScaleTest, PlaneTestRotate_Box) {
free_aligned_buffer_page_end(orig_pixels);
}
TEST_F(LibYUVScaleTest, PlaneTest1_Box) {
align_buffer_page_end(orig_pixels, 3);
align_buffer_page_end(dst_pixels, 3);
// Pad the 1x1 byte image with invalid values before and after in case libyuv
// reads outside the memory boundaries.
orig_pixels[0] = 0;
orig_pixels[1] = 1; // scale this pixel
orig_pixels[2] = 2;
dst_pixels[0] = 3;
dst_pixels[1] = 3;
dst_pixels[2] = 3;
libyuv::ScalePlane(orig_pixels + 1, /* src_stride= */ 1, /* src_width= */ 1,
/* src_height= */ 1, dst_pixels, /* dst_stride= */ 1,
/* dst_width= */ 1, /* dst_height= */ 2,
libyuv::kFilterBox);
EXPECT_EQ(dst_pixels[0], 1);
EXPECT_EQ(dst_pixels[1], 1);
EXPECT_EQ(dst_pixels[2], 3);
free_aligned_buffer_page_end(dst_pixels);
free_aligned_buffer_page_end(orig_pixels);
}
TEST_F(LibYUVScaleTest, PlaneTest1_16_Box) {
align_buffer_page_end(orig_pixels_alloc, 3 * 2);
align_buffer_page_end(dst_pixels_alloc, 3 * 2);
uint16_t* orig_pixels = (uint16_t*)orig_pixels_alloc;
uint16_t* dst_pixels = (uint16_t*)dst_pixels_alloc;
// Pad the 1x1 byte image with invalid values before and after in case libyuv
// reads outside the memory boundaries.
orig_pixels[0] = 0;
orig_pixels[1] = 1; // scale this pixel
orig_pixels[2] = 2;
dst_pixels[0] = 3;
dst_pixels[1] = 3;
dst_pixels[2] = 3;
libyuv::ScalePlane_16(
orig_pixels + 1, /* src_stride= */ 1, /* src_width= */ 1,
/* src_height= */ 1, dst_pixels, /* dst_stride= */ 1,
/* dst_width= */ 1, /* dst_height= */ 2, libyuv::kFilterNone);
EXPECT_EQ(dst_pixels[0], 1);
EXPECT_EQ(dst_pixels[1], 1);
EXPECT_EQ(dst_pixels[2], 3);
free_aligned_buffer_page_end(dst_pixels_alloc);
free_aligned_buffer_page_end(orig_pixels_alloc);
}
} // namespace libyuv

View File

@ -20,6 +20,12 @@ namespace libyuv {
#define STRINGIZE(line) #line
#define FILELINESTR(file, line) file ":" STRINGIZE(line)
#if !defined(DISABLE_SLOW_TESTS) || defined(__x86_64__) || defined(__i386__)
// SLOW TESTS are those that are unoptimized C code.
// FULL TESTS are optimized but test many variations of the same code.
#define ENABLE_FULL_TESTS
#endif
// Test scaling with C vs Opt and return maximum pixel difference. 0 = exact.
static int UVTestFilter(int src_width,
int src_height,
@ -125,6 +131,7 @@ static int UVTestFilter(int src_width,
EXPECT_LE(diff, max_diff); \
}
#if defined(ENABLE_FULL_TESTS)
// Test a scale factor with all 4 filters. Expect unfiltered to be exact, but
// filtering is different fixed point implementations for SSSE3, Neon and C.
#define TEST_FACTOR(name, nom, denom) \
@ -132,6 +139,11 @@ static int UVTestFilter(int src_width,
TEST_FACTOR1(name, Linear, nom, denom, 3) \
TEST_FACTOR1(name, Bilinear, nom, denom, 3) \
TEST_FACTOR1(name, Box, nom, denom, 3)
#else
// Test a scale factor with Bilinear.
#define TEST_FACTOR(name, nom, denom) \
TEST_FACTOR1(name, Bilinear, nom, denom, 3)
#endif
TEST_FACTOR(2, 1, 2)
TEST_FACTOR(4, 1, 4)
@ -159,21 +171,26 @@ TEST_FACTOR(3, 1, 3)
EXPECT_LE(diff, max_diff); \
}
#if defined(ENABLE_FULL_TESTS)
/// Test scale to a specified size with all 4 filters.
#define TEST_SCALETO(name, width, height) \
TEST_SCALETO1(name, width, height, None, 0) \
TEST_SCALETO1(name, width, height, Linear, 3) \
TEST_SCALETO1(name, width, height, Bilinear, 3)
#else
#define TEST_SCALETO(name, width, height) \
TEST_SCALETO1(name, width, height, Bilinear, 3)
#endif
TEST_SCALETO(UVScale, 1, 1)
TEST_SCALETO(UVScale, 256, 144) /* 128x72 * 2 */
TEST_SCALETO(UVScale, 320, 240)
TEST_SCALETO(UVScale, 569, 480)
TEST_SCALETO(UVScale, 640, 360)
#ifdef ENABLE_SLOW_TESTS
#ifndef DISABLE_SLOW_TESTS
TEST_SCALETO(UVScale, 256, 144) /* 128x72 * 2 */
TEST_SCALETO(UVScale, 320, 240)
TEST_SCALETO(UVScale, 1280, 720)
TEST_SCALETO(UVScale, 1920, 1080)
#endif // ENABLE_SLOW_TESTS
#endif // DISABLE_SLOW_TESTS
#undef TEST_SCALETO1
#undef TEST_SCALETO
@ -186,70 +203,73 @@ TEST_SCALETO(UVScale, 1920, 1080)
EXPECT_LE(diff, max_diff); \
}
#if defined(ENABLE_FULL_TESTS)
// Test scale with swapped width and height with all 3 filters.
TEST_SCALESWAPXY1(UVScale, None, 0)
TEST_SCALESWAPXY1(UVScale, Linear, 0)
TEST_SCALESWAPXY1(UVScale, Bilinear, 0)
#else
TEST_SCALESWAPXY1(UVScale, Bilinear, 0)
#endif
#undef TEST_SCALESWAPXY1
TEST_F(LibYUVScaleTest, UVTest3x) {
const int kSrcStride = 48 * 2;
const int kDstStride = 16 * 2;
const int kSrcStride = 480 * 2;
const int kDstStride = 160 * 2;
const int kSize = kSrcStride * 3;
align_buffer_page_end(orig_pixels, kSize);
for (int i = 0; i < 48 * 3; ++i) {
for (int i = 0; i < 480 * 3; ++i) {
orig_pixels[i * 2 + 0] = i;
orig_pixels[i * 2 + 1] = 255 - i;
}
align_buffer_page_end(dest_pixels, kDstStride);
int iterations16 =
benchmark_width_ * benchmark_height_ / (16 * 1) * benchmark_iterations_;
for (int i = 0; i < iterations16; ++i) {
UVScale(orig_pixels, kSrcStride, 48, 3, dest_pixels, kDstStride, 16, 1,
int iterations160 = (benchmark_width_ * benchmark_height_ + (160 - 1)) / 160 *
benchmark_iterations_;
for (int i = 0; i < iterations160; ++i) {
UVScale(orig_pixels, kSrcStride, 480, 3, dest_pixels, kDstStride, 160, 1,
kFilterBilinear);
}
EXPECT_EQ(49, dest_pixels[0]);
EXPECT_EQ(255 - 49, dest_pixels[1]);
EXPECT_EQ(225, dest_pixels[0]);
EXPECT_EQ(255 - 225, dest_pixels[1]);
UVScale(orig_pixels, kSrcStride, 48, 3, dest_pixels, kDstStride, 16, 1,
UVScale(orig_pixels, kSrcStride, 480, 3, dest_pixels, kDstStride, 160, 1,
kFilterNone);
EXPECT_EQ(49, dest_pixels[0]);
EXPECT_EQ(255 - 49, dest_pixels[1]);
EXPECT_EQ(225, dest_pixels[0]);
EXPECT_EQ(255 - 225, dest_pixels[1]);
free_aligned_buffer_page_end(dest_pixels);
free_aligned_buffer_page_end(orig_pixels);
}
TEST_F(LibYUVScaleTest, UVTest4x) {
const int kSrcStride = 64 * 2;
const int kDstStride = 16 * 2;
const int kSrcStride = 640 * 2;
const int kDstStride = 160 * 2;
const int kSize = kSrcStride * 4;
align_buffer_page_end(orig_pixels, kSize);
for (int i = 0; i < 64 * 4; ++i) {
for (int i = 0; i < 640 * 4; ++i) {
orig_pixels[i * 2 + 0] = i;
orig_pixels[i * 2 + 1] = 255 - i;
}
align_buffer_page_end(dest_pixels, kDstStride);
int iterations16 =
benchmark_width_ * benchmark_height_ / (16 * 1) * benchmark_iterations_;
for (int i = 0; i < iterations16; ++i) {
UVScale(orig_pixels, kSrcStride, 64, 4, dest_pixels, kDstStride, 16, 1,
int iterations160 = (benchmark_width_ * benchmark_height_ + (160 - 1)) / 160 *
benchmark_iterations_;
for (int i = 0; i < iterations160; ++i) {
UVScale(orig_pixels, kSrcStride, 640, 4, dest_pixels, kDstStride, 160, 1,
kFilterBilinear);
}
EXPECT_EQ((65 + 66 + 129 + 130 + 2) / 4, dest_pixels[0]);
EXPECT_EQ((255 - 65 + 255 - 66 + 255 - 129 + 255 - 130 + 2) / 4,
dest_pixels[1]);
EXPECT_EQ(66, dest_pixels[0]);
EXPECT_EQ(190, dest_pixels[1]);
UVScale(orig_pixels, kSrcStride, 64, 4, dest_pixels, kDstStride, 16, 1,
kFilterNone);
EXPECT_EQ(130, dest_pixels[0]); // expect the 3rd pixel of the 3rd row
EXPECT_EQ(255 - 130, dest_pixels[1]);
EXPECT_EQ(2, dest_pixels[0]); // expect the 3rd pixel of the 3rd row
EXPECT_EQ(255 - 2, dest_pixels[1]);
free_aligned_buffer_page_end(dest_pixels);
free_aligned_buffer_page_end(orig_pixels);

View File

@ -77,8 +77,15 @@ int TestCpuEnv(int cpu_info) {
if (TestEnv("LIBYUV_DISABLE_MSA")) {
cpu_info &= ~libyuv::kCpuHasMSA;
}
if (TestEnv("LIBYUV_DISABLE_MMI")) {
cpu_info &= ~libyuv::kCpuHasMMI;
#endif
#if defined(__longarch__) && defined(__linux__)
if (TestEnv("LIBYUV_DISABLE_LSX")) {
cpu_info &= ~libyuv::kCpuHasLSX;
}
#endif
#if defined(__longarch__) && defined(__linux__)
if (TestEnv("LIBYUV_DISABLE_LASX")) {
cpu_info &= ~libyuv::kCpuHasLASX;
}
#endif
#if !defined(__pnacl__) && !defined(__CLR_VER) && \
@ -120,6 +127,9 @@ int TestCpuEnv(int cpu_info) {
if (TestEnv("LIBYUV_DISABLE_AVX512VL")) {
cpu_info &= ~libyuv::kCpuHasAVX512VL;
}
if (TestEnv("LIBYUV_DISABLE_AVX512VNNI")) {
cpu_info &= ~libyuv::kCpuHasAVX512VNNI;
}
if (TestEnv("LIBYUV_DISABLE_AVX512VBMI")) {
cpu_info &= ~libyuv::kCpuHasAVX512VBMI;
}

View File

@ -14,7 +14,6 @@
#ifdef _WIN32
#include <windows.h>
#else
#include <sys/resource.h>
#include <sys/time.h>
#endif
@ -111,10 +110,13 @@ inline int fastrand() {
return static_cast<int>((fastrand_seed >> 16) & 0xffff);
}
// ubsan fails if dst is unaligned unless we use uint8
static inline void MemRandomize(uint8_t* dst, int64_t len) {
int64_t i;
for (i = 0; i < len - 1; i += 2) {
*reinterpret_cast<uint16_t*>(dst) = fastrand();
int r = fastrand();
dst[0] = static_cast<uint8_t>(r);
dst[1] = static_cast<uint8_t>(r >> 8);
dst += 2;
}
for (; i < len; ++i) {

View File

@ -23,6 +23,7 @@ int main(int argc, const char* argv[]) {
int has_arm = TestCpuFlag(kCpuHasARM);
int has_mips = TestCpuFlag(kCpuHasMIPS);
int has_x86 = TestCpuFlag(kCpuHasX86);
int has_loongarch = TestCpuFlag(kCpuHasLOONGARCH);
(void)argc;
(void)argv;
@ -65,6 +66,7 @@ int main(int argc, const char* argv[]) {
printf("Has ARM %x\n", has_arm);
printf("Has MIPS %x\n", has_mips);
printf("Has X86 %x\n", has_x86);
printf("Has LOONGARCH %x\n", has_loongarch);
if (has_arm) {
int has_neon = TestCpuFlag(kCpuHasNEON);
printf("Has NEON %x\n", has_neon);
@ -72,8 +74,12 @@ int main(int argc, const char* argv[]) {
if (has_mips) {
int has_msa = TestCpuFlag(kCpuHasMSA);
printf("Has MSA %x\n", has_msa);
int has_mmi = TestCpuFlag(kCpuHasMMI);
printf("Has MMI %x\n", has_mmi);
}
if (has_loongarch) {
int has_lsx = TestCpuFlag(kCpuHasLSX);
printf("Has LSX %x\n", has_lsx);
int has_lasx = TestCpuFlag(kCpuHasLASX);
printf("Has LASX %x\n", has_lasx);
}
if (has_x86) {
int has_sse2 = TestCpuFlag(kCpuHasSSE2);
@ -88,6 +94,7 @@ int main(int argc, const char* argv[]) {
int has_gfni = TestCpuFlag(kCpuHasGFNI);
int has_avx512bw = TestCpuFlag(kCpuHasAVX512BW);
int has_avx512vl = TestCpuFlag(kCpuHasAVX512VL);
int has_avx512vnni = TestCpuFlag(kCpuHasAVX512VNNI);
int has_avx512vbmi = TestCpuFlag(kCpuHasAVX512VBMI);
int has_avx512vbmi2 = TestCpuFlag(kCpuHasAVX512VBMI2);
int has_avx512vbitalg = TestCpuFlag(kCpuHasAVX512VBITALG);
@ -104,6 +111,7 @@ int main(int argc, const char* argv[]) {
printf("Has GFNI %x\n", has_gfni);
printf("Has AVX512BW %x\n", has_avx512bw);
printf("Has AVX512VL %x\n", has_avx512vl);
printf("Has AVX512VNNI %x\n", has_avx512vnni);
printf("Has AVX512VBMI %x\n", has_avx512vbmi);
printf("Has AVX512VBMI2 %x\n", has_avx512vbmi2);
printf("Has AVX512VBITALG %x\n", has_avx512vbitalg);

View File

@ -248,13 +248,13 @@ bool UpdateMetrics(uint8_t* ch_org,
int number_of_frames,
metric* cur_distortion_psnr,
metric* distorted_frame,
bool do_psnr) {
bool compute_psnr) {
const int uv_offset = (do_swap_uv ? uv_size : 0);
const uint8_t* const u_org = ch_org + y_size + uv_offset;
const uint8_t* const u_rec = ch_rec + y_size;
const uint8_t* const v_org = ch_org + y_size + (uv_size - uv_offset);
const uint8_t* const v_rec = ch_rec + y_size + uv_size;
if (do_psnr) {
if (compute_psnr) {
#ifdef HAVE_JPEG
double y_err = static_cast<double>(
libyuv::ComputeSumSquareError(ch_org, ch_rec, y_size));

View File

@ -42,9 +42,9 @@ static __inline uint32_t Abs(int32_t v) {
}
// Parse PYUV format. ie name.1920x800_24Hz_P420.yuv
bool ExtractResolutionFromFilename(const char* name,
int* width_ptr,
int* height_ptr) {
static bool ExtractResolutionFromFilename(const char* name,
int* width_ptr,
int* height_ptr) {
// Isolate the .width_height. section of the filename by searching for a
// dot or underscore followed by a digit.
for (int i = 0; name[i]; ++i) {
@ -59,7 +59,7 @@ bool ExtractResolutionFromFilename(const char* name,
return false;
}
void PrintHelp(const char* program) {
static void PrintHelp(const char* program) {
printf("%s [-options] src_argb.raw dst_yuv.raw\n", program);
printf(
" -s <width> <height> .... specify source resolution. "
@ -78,7 +78,7 @@ void PrintHelp(const char* program) {
exit(0);
}
void ParseOptions(int argc, const char* argv[]) {
static void ParseOptions(int argc, const char* argv[]) {
if (argc <= 1) {
PrintHelp(argv[0]);
}
@ -165,23 +165,23 @@ static int TileARGBScale(const uint8_t* src_argb,
int src_height,
uint8_t* dst_argb,
int dst_stride_argb,
int dst_width,
int dst_height,
int destination_width,
int destination_height,
libyuv::FilterMode filtering) {
for (int y = 0; y < dst_height; y += kTileY) {
for (int x = 0; x < dst_width; x += kTileX) {
for (int y = 0; y < destination_height; y += kTileY) {
for (int x = 0; x < destination_width; x += kTileX) {
int clip_width = kTileX;
if (x + clip_width > dst_width) {
clip_width = dst_width - x;
if (x + clip_width > destination_width) {
clip_width = destination_width - x;
}
int clip_height = kTileY;
if (y + clip_height > dst_height) {
clip_height = dst_height - y;
if (y + clip_height > destination_height) {
clip_height = destination_height - y;
}
int r = libyuv::ARGBScaleClip(src_argb, src_stride_argb, src_width,
src_height, dst_argb, dst_stride_argb,
dst_width, dst_height, x, y, clip_width,
clip_height, filtering);
destination_width, destination_height, x, y,
clip_width, clip_height, filtering);
if (r) {
return r;
}

View File

@ -6,7 +6,7 @@ ARCH="$1"
BUILD_DIR=$(echo "$(cd "$(dirname "$2")"; pwd -P)/$(basename "$2")")
SOURCE_CODE_ARCHIVE="$3"
MINIOSVERSION="9.0"
MINIOSVERSION="11.0"
OPT_CFLAGS="-Os -g"
OPT_LDFLAGS=""

View File

@ -45,9 +45,7 @@ absl_sources = [ "dependencies/third_party/abseil-cpp/" + x for x in [
"absl/strings/internal/string_constant.h",
"absl/base/internal/inline_variable.h",
"absl/base/internal/cycleclock.cc",
"absl/base/internal/exponential_biased.cc",
"absl/base/internal/low_level_alloc.cc",
"absl/base/internal/periodic_sampler.cc",
"absl/base/internal/raw_logging.cc",
"absl/base/internal/scoped_set_env.cc",
"absl/base/internal/spinlock.cc",
@ -72,7 +70,6 @@ absl_sources = [ "dependencies/third_party/abseil-cpp/" + x for x in [
"absl/debugging/internal/examine_stack.cc",
"absl/debugging/internal/stack_consumption.cc",
"absl/debugging/internal/vdso_support.cc",
"absl/debugging/leak_check_disable.cc",
"absl/debugging/stacktrace.cc",
"absl/debugging/symbolize.cc",
"absl/flags/flag.cc",
@ -241,10 +238,8 @@ absl_sources = [ "dependencies/third_party/abseil-cpp/" + x for x in [
"absl/base/log_severity.h",
"absl/base/internal/sysinfo.h",
"absl/base/internal/scoped_set_env.h",
"absl/base/internal/periodic_sampler.h",
"absl/base/internal/spinlock.h",
"absl/base/internal/raw_logging.h",
"absl/base/internal/exponential_biased.h",
"absl/base/dynamic_annotations.h",
"absl/strings/match.h",
"absl/algorithm/container.h",
@ -315,7 +310,6 @@ absl_sources = [ "dependencies/third_party/abseil-cpp/" + x for x in [
"absl/base/internal/scheduling_mode.h",
"absl/base/internal/tsan_mutex_interface.h",
"absl/base/internal/unaligned_access.h",
"absl/container/internal/have_sse.h",
"absl/container/internal/inlined_vector.h",
"absl/debugging/internal/stacktrace_unimplemented-inl.inc",
"absl/debugging/internal/stacktrace_generic-inl.inc",
@ -334,7 +328,6 @@ absl_sources = [ "dependencies/third_party/abseil-cpp/" + x for x in [
"absl/strings/str_format.h",
"absl/synchronization/internal/kernel_timeout.h",
"absl/synchronization/internal/per_thread_sem.h",
"absl/time/internal/zoneinfo.inc",
"absl/types/internal/optional.h",
"absl/types/internal/span.h",
"absl/types/variant.h",
@ -358,18 +351,31 @@ absl_sources = [ "dependencies/third_party/abseil-cpp/" + x for x in [
"absl/functional/internal/function_ref.h",
"absl/functional/bind_front.h",
"absl/functional/internal/front_binder.h",
"absl/base/internal/cycleclock_config.h",
"absl/base/internal/prefetch.h",
"absl/base/internal/unscaledcycleclock_config.h",
"absl/functional/any_invocable.h",
"absl/profiling/internal/exponential_biased.h",
"absl/strings/cord_analysis.h",
"absl/strings/internal/has_absl_stringify.h",
"absl/container/internal/common_policy_traits.h",
"absl/functional/internal/any_invocable.h",
"absl/strings/cord_buffer.h",
"absl/strings/internal/stringify_sink.h",
"absl/cleanup/cleanup.h",
"absl/strings/internal/cord_data_edge.h",
"absl/cleanup/internal/cleanup.h",
"absl/strings/internal/cord_rep_crc.h",
]]
webrtc_sources = [
"rtc_base/socket_address.h",
"rtc_base/arraysize.h",
"rtc_base/strings/string_builder.h",
"rtc_base/atomic_ops.h",
"rtc_base/weak_ptr.h",
"rtc_base/zero_memory.h",
"rtc_base/unique_id_generator.h",
"rtc_base/numerics/safe_conversions.h",
"rtc_base/time/timestamp_extrapolator.h",
"rtc_base/third_party/base64/base64.h",
"rtc_base/platform_thread_types.h",
"rtc_base/task_queue.h",
@ -411,9 +417,7 @@ webrtc_sources = [
"rtc_base/network_monitor.h",
"rtc_base/message_digest.h",
"rtc_base/network.h",
"rtc_base/message_handler.h",
"rtc_base/memory/aligned_malloc.h",
"rtc_base/location.h",
"rtc_base/byte_order.h",
"rtc_base/ifaddrs_converter.h",
"rtc_base/helpers.h",
@ -426,7 +430,6 @@ webrtc_sources = [
"rtc_base/experiments/min_video_bitrate_experiment.h",
"rtc_base/experiments/field_trial_list.h",
"rtc_base/experiments/field_trial_units.h",
"rtc_base/experiments/jitter_upper_bound_experiment.h",
"rtc_base/experiments/field_trial_parser.h",
"rtc_base/data_rate_limiter.h",
"rtc_base/experiments/cpu_speed_experiment.h",
@ -441,7 +444,6 @@ webrtc_sources = [
"rtc_base/async_socket.h",
"rtc_base/async_udp_socket.h",
"rtc_base/async_resolver_interface.h",
"rtc_base/async_invoker.h",
"rtc_base/async_tcp_socket.h",
"rtc_base/byte_buffer.h",
"rtc_base/crypt_string.h",
@ -472,7 +474,6 @@ webrtc_sources = [
"rtc_base/system/inline.h",
"rtc_base/type_traits.h",
"rtc_base/numerics/safe_compare.h",
"rtc_base/async_invoker.cc",
"rtc_base/async_packet_socket.cc",
"rtc_base/async_resolver_interface.cc",
"rtc_base/async_socket.cc",
@ -494,7 +495,6 @@ webrtc_sources = [
"rtc_base/experiments/field_trial_list.cc",
"rtc_base/experiments/field_trial_parser.cc",
"rtc_base/experiments/field_trial_units.cc",
"rtc_base/experiments/jitter_upper_bound_experiment.cc",
"rtc_base/experiments/keyframe_interval_settings.cc",
"rtc_base/experiments/min_video_bitrate_experiment.cc",
"rtc_base/experiments/normalize_simulcast_size_experiment.cc",
@ -510,13 +510,11 @@ webrtc_sources = [
"rtc_base/http_common.cc",
"rtc_base/ifaddrs_converter.cc",
"rtc_base/ip_address.cc",
"rtc_base/location.cc",
"rtc_base/log_sinks.cc",
"rtc_base/logging.cc",
"rtc_base/memory/aligned_malloc.cc",
"rtc_base/memory/fifo_buffer.cc",
"rtc_base/message_digest.cc",
"rtc_base/message_handler.cc",
"rtc_base/net_helper.cc",
"rtc_base/net_helpers.cc",
"rtc_base/network.cc",
@ -576,7 +574,6 @@ webrtc_sources = [
"rtc_base/third_party/base64/base64.cc",
"rtc_base/third_party/sigslot/sigslot.cc",
"rtc_base/thread.cc",
"rtc_base/time/timestamp_extrapolator.cc",
"rtc_base/time_utils.cc",
"rtc_base/timestamp_aligner.cc",
"rtc_base/unique_id_generator.cc",
@ -585,7 +582,6 @@ webrtc_sources = [
"rtc_base/ref_count.h",
"rtc_base/ref_counter.h",
"rtc_base/logging.h",
"api/task_queue/queued_task.h",
"api/audio/audio_frame.cc",
"api/audio/channel_layout.h",
"api/audio/channel_layout.cc",
@ -643,7 +639,6 @@ webrtc_sources = [
"api/rtp_packet_info.cc",
"api/rtp_parameters.cc",
"api/rtp_receiver_interface.cc",
"api/rtp_sender_interface.cc",
"api/rtp_transceiver_interface.cc",
"api/sctp_transport_interface.cc",
"api/stats_types.cc",
@ -694,7 +689,6 @@ webrtc_sources = [
"pc/audio_rtp_receiver.cc",
"pc/audio_track.cc",
"pc/channel.cc",
"pc/channel_manager.cc",
"pc/data_channel_controller.cc",
"pc/dtls_srtp_transport.h",
"pc/dtls_srtp_transport.cc",
@ -725,7 +719,6 @@ webrtc_sources = [
"pc/rtp_sender.cc",
"pc/rtp_transceiver.cc",
"pc/rtp_transport.cc",
"pc/sctp_data_channel_transport.cc",
"pc/sctp_transport.cc",
"pc/sctp_utils.cc",
"pc/sdp_serializer.cc",
@ -735,7 +728,6 @@ webrtc_sources = [
"pc/srtp_filter.cc",
"pc/srtp_session.cc",
"pc/srtp_transport.cc",
"pc/stats_collector.cc",
"pc/track_media_info_map.cc",
"pc/transport_stats.cc",
"pc/video_rtp_receiver.cc",
@ -764,7 +756,6 @@ webrtc_sources = [
"media/engine/internal_encoder_factory.cc",
"media/engine/multiplex_codec_factory.cc",
"media/engine/payload_type_mapper.cc",
"media/engine/simulcast.cc",
"media/engine/simulcast_encoder_adapter.cc",
"media/engine/unhandled_packets_buffer.cc",
"media/engine/webrtc_media_engine.h",
@ -1129,7 +1120,6 @@ webrtc_sources = [
"modules/audio_processing/transient/transient_suppressor_impl.cc",
"modules/audio_processing/transient/wpd_node.cc",
"modules/audio_processing/transient/wpd_tree.cc",
"modules/audio_processing/typing_detection.cc",
"modules/audio_processing/utility/cascaded_biquad_filter.cc",
"modules/audio_processing/utility/delay_estimator.cc",
"modules/audio_processing/utility/delay_estimator_wrapper.cc",
@ -1168,10 +1158,8 @@ webrtc_sources = [
"modules/congestion_controller/goog_cc/trendline_estimator.cc",
"modules/pacing/bitrate_prober.cc",
"modules/pacing/interval_budget.cc",
"modules/pacing/paced_sender.cc",
"modules/pacing/pacing_controller.cc",
"modules/pacing/packet_router.cc",
"modules/pacing/round_robin_packet_queue.cc",
"modules/pacing/task_queue_paced_sender.cc",
"modules/rtp_rtcp/include/report_block_data.cc",
"modules/rtp_rtcp/include/rtp_rtcp_defines.cc",
@ -1195,7 +1183,6 @@ webrtc_sources = [
"modules/rtp_rtcp/source/rtcp_packet/common_header.cc",
"modules/rtp_rtcp/source/rtcp_packet/compound_packet.cc",
"modules/rtp_rtcp/source/rtcp_packet/dlrr.cc",
"modules/rtp_rtcp/source/rtcp_packet/extended_jitter_report.cc",
"modules/rtp_rtcp/source/rtcp_packet/extended_reports.cc",
"modules/rtp_rtcp/source/rtcp_packet/fir.cc",
"modules/rtp_rtcp/source/rtcp_packet/loss_notification.cc",
@ -1253,7 +1240,6 @@ webrtc_sources = [
"modules/rtp_rtcp/source/tmmbr_help.cc",
"modules/rtp_rtcp/source/ulpfec_generator.cc",
"modules/rtp_rtcp/source/ulpfec_header_reader_writer.cc",
"modules/rtp_rtcp/source/ulpfec_receiver_impl.cc",
"modules/rtp_rtcp/source/video_rtp_depacketizer.cc",
"modules/rtp_rtcp/source/video_rtp_depacketizer_av1.cc",
"modules/rtp_rtcp/source/video_rtp_depacketizer_generic.cc",
@ -1262,11 +1248,9 @@ webrtc_sources = [
"modules/rtp_rtcp/source/video_rtp_depacketizer_vp8.cc",
"modules/rtp_rtcp/source/video_rtp_depacketizer_vp9.cc",
"modules/rtp_rtcp/source/receive_statistics_impl.cc",
"modules/utility/source/process_thread_impl.cc",
"modules/video_capture/device_info_impl.cc",
"modules/video_capture/video_capture_factory.cc",
"modules/video_capture/video_capture_impl.cc",
"modules/video_coding/codec_timer.cc",
"modules/video_coding/codecs/h264/h264.cc",
"modules/video_coding/codecs/h264/h264_color_space.cc",
"modules/video_coding/codecs/h264/h264_decoder_impl.cc",
@ -1281,7 +1265,6 @@ webrtc_sources = [
"modules/video_coding/encoded_frame.cc",
"modules/video_coding/fec_controller_default.cc",
"modules/video_coding/frame_buffer2.cc",
"modules/video_coding/frame_buffer3.cc",
"modules/video_coding/frame_dependencies_calculator.cc",
"modules/video_coding/frame_object.cc",
"modules/video_coding/generic_decoder.cc",
@ -1289,16 +1272,10 @@ webrtc_sources = [
"modules/video_coding/h264_sps_pps_tracker.cc",
"modules/video_coding/histogram.cc",
"modules/video_coding/include/video_codec_interface.cc",
"modules/video_coding/inter_frame_delay.cc",
"modules/video_coding/jitter_estimator.cc",
"modules/video_coding/loss_notification_controller.cc",
"modules/video_coding/media_opt_util.cc",
"modules/video_coding/packet_buffer.cc",
"modules/video_coding/rtp_frame_reference_finder.cc",
"modules/video_coding/rtt_filter.cc",
"modules/video_coding/timestamp_map.cc",
"modules/video_coding/timing.cc",
"modules/video_coding/unique_timestamp_counter.cc",
"modules/video_coding/utility/decoded_frames_history.cc",
"modules/video_coding/utility/frame_dropper.cc",
"modules/video_coding/utility/ivf_file_reader.cc",
@ -1319,11 +1296,6 @@ webrtc_sources = [
"modules/video_coding/codecs/vp9/svc_config.cc",
"modules/video_coding/codecs/vp9/vp9.cc",
"modules/video_coding/codecs/vp9/vp9_frame_buffer_pool.cc",
"modules/video_processing/util/denoiser_filter.cc",
"modules/video_processing/util/denoiser_filter_c.cc",
"modules/video_processing/util/noise_estimation.cc",
"modules/video_processing/util/skin_detection.cc",
"modules/video_processing/video_denoiser.cc",
"call/adaptation/encoder_settings.cc",
"call/adaptation/resource_adaptation_processor_interface.cc",
"call/adaptation/video_source_restrictions.cc",
@ -1414,10 +1386,8 @@ webrtc_sources = [
"common_video/h264/pps_parser.cc",
"common_video/h264/sps_parser.cc",
"common_video/h264/sps_vui_rewriter.cc",
"common_video/incoming_video_stream.cc",
"common_video/libyuv/webrtc_libyuv.cc",
"common_video/video_frame_buffer.cc",
"common_video/video_render_frames.cc",
"p2p/base/async_stun_tcp_socket.cc",
"p2p/base/basic_async_resolver_factory.cc",
"p2p/base/basic_ice_controller.cc",
@ -1487,7 +1457,6 @@ webrtc_sources = [
"video/adaptation/overuse_frame_detector.cc",
"video/adaptation/quality_scaler_resource.cc",
"video/buffered_frame_decryptor.cc",
"video/call_stats.cc",
"video/encoder_bitrate_adjuster.cc",
"video/encoder_overshoot_detector.cc",
"video/encoder_rtcp_feedback.cc",
@ -1495,20 +1464,16 @@ webrtc_sources = [
"video/frame_encode_metadata_writer.cc",
"video/quality_limitation_reason_tracker.cc",
"video/quality_threshold.cc",
"video/receive_statistics_proxy.cc",
"video/report_block_stats.cc",
"video/rtp_video_stream_receiver.cc",
"video/rtp_video_stream_receiver_frame_transformer_delegate.cc",
"video/send_delay_stats.cc",
"video/send_statistics_proxy.cc",
"video/stats_counter.cc",
"video/stream_synchronization.cc",
"video/transport_adapter.cc",
"video/video_quality_observer.cc",
"video/video_send_stream.cc",
"video/video_send_stream_impl.cc",
"video/video_source_sink_controller.cc",
"video/video_stream_decoder.cc",
"video/video_stream_decoder_impl.cc",
"video/video_stream_encoder.cc",
"audio/audio_level.cc",
@ -1520,7 +1485,6 @@ webrtc_sources = [
"audio/channel_receive_frame_transformer_delegate.cc",
"audio/channel_send.cc",
"audio/channel_send_frame_transformer_delegate.cc",
"audio/null_audio_poller.cc",
"audio/remix_resample.cc",
"audio/utility/audio_frame_operations.cc",
"audio/utility/channel_mixer.cc",
@ -1644,7 +1608,6 @@ webrtc_sources = [
"api/video_codecs/video_decoder_factory.h",
"api/video_codecs/video_decoder_software_fallback_wrapper.h",
"api/video_codecs/video_encoder.h",
"api/video_codecs/video_encoder_config.h",
"api/video_codecs/video_encoder_factory.h",
"api/video_codecs/video_encoder_software_fallback_wrapper.h",
"api/video_codecs/vp8_frame_config.h",
@ -1717,10 +1680,8 @@ webrtc_sources = [
"common_video/h264/sps_parser.h",
"common_video/h264/sps_vui_rewriter.h",
"common_video/include/bitrate_adjuster.h",
"common_video/include/incoming_video_stream.h",
"common_video/include/video_frame_buffer.h",
"common_video/libyuv/include/webrtc_libyuv.h",
"common_video/video_render_frames.h",
"logging/rtc_event_log/encoder/blob_encoding.h",
"logging/rtc_event_log/encoder/delta_encoding.h",
"logging/rtc_event_log/encoder/rtc_event_log_encoder_common.h",
@ -1772,7 +1733,6 @@ webrtc_sources = [
"media/engine/internal_encoder_factory.h",
"media/engine/multiplex_codec_factory.h",
"media/engine/payload_type_mapper.h",
"media/engine/simulcast.h",
"media/engine/simulcast_encoder_adapter.h",
"media/engine/unhandled_packets_buffer.h",
"media/engine/webrtc_media_engine_defaults.h",
@ -1861,7 +1821,6 @@ webrtc_sources = [
"modules/audio_coding/codecs/pcm16b/pcm16b_common.h",
"modules/audio_coding/codecs/red/audio_encoder_copy_red.h",
"modules/audio_coding/include/audio_coding_module.h",
"modules/audio_coding/neteq/relative_arrival_delay_tracker.h",
"modules/audio_coding/neteq/reorder_optimizer.h",
"modules/audio_coding/neteq/underrun_optimizer.h",
"modules/audio_coding/neteq/accelerate.h",
@ -2034,7 +1993,6 @@ webrtc_sources = [
"modules/audio_processing/transient/transient_suppressor_impl.h",
"modules/audio_processing/transient/wpd_node.h",
"modules/audio_processing/transient/wpd_tree.h",
"modules/audio_processing/typing_detection.h",
"modules/audio_processing/utility/cascaded_biquad_filter.h",
"modules/audio_processing/utility/delay_estimator.h",
"modules/audio_processing/utility/delay_estimator_wrapper.h",
@ -2074,10 +2032,8 @@ webrtc_sources = [
"modules/include/module_common_types.h",
"modules/pacing/bitrate_prober.h",
"modules/pacing/interval_budget.h",
"modules/pacing/paced_sender.h",
"modules/pacing/pacing_controller.h",
"modules/pacing/packet_router.h",
"modules/pacing/round_robin_packet_queue.h",
"modules/pacing/task_queue_paced_sender.h",
"modules/remote_bitrate_estimator/aimd_rate_control.h",
"modules/remote_bitrate_estimator/include/bwe_defines.h",
@ -2112,7 +2068,6 @@ webrtc_sources = [
"modules/rtp_rtcp/source/rtcp_packet/common_header.h",
"modules/rtp_rtcp/source/rtcp_packet/compound_packet.h",
"modules/rtp_rtcp/source/rtcp_packet/dlrr.h",
"modules/rtp_rtcp/source/rtcp_packet/extended_jitter_report.h",
"modules/rtp_rtcp/source/rtcp_packet/extended_reports.h",
"modules/rtp_rtcp/source/rtcp_packet/fir.h",
"modules/rtp_rtcp/source/rtcp_packet/loss_notification.h",
@ -2169,7 +2124,6 @@ webrtc_sources = [
"modules/rtp_rtcp/source/tmmbr_help.h",
"modules/rtp_rtcp/source/ulpfec_generator.h",
"modules/rtp_rtcp/source/ulpfec_header_reader_writer.h",
"modules/rtp_rtcp/source/ulpfec_receiver_impl.h",
"modules/rtp_rtcp/source/video_rtp_depacketizer.h",
"modules/rtp_rtcp/source/video_rtp_depacketizer_av1.h",
"modules/rtp_rtcp/source/video_rtp_depacketizer_generic.h",
@ -2180,14 +2134,11 @@ webrtc_sources = [
"modules/third_party/fft/fft.h",
"modules/third_party/g711/g711.h",
"modules/third_party/g722/g722_enc_dec.h",
"modules/utility/source/process_thread_impl.h",
"modules/video_capture/device_info_impl.h",
"modules/video_capture/video_capture_factory.h",
"modules/video_capture/video_capture_impl.h",
"modules/video_coding/utility/vp9_constants.h",
"modules/video_coding/utility/bandwidth_quality_scaler.h",
"modules/video_coding/codec_timer.h",
"modules/video_coding/codecs/av1/libaom_av1_decoder.h",
"modules/video_coding/codecs/av1/libaom_av1_encoder.h",
"modules/video_coding/codecs/h264/include/h264.h",
"modules/video_coding/codecs/multiplex/include/augmented_video_frame_buffer.h",
@ -2219,20 +2170,14 @@ webrtc_sources = [
"modules/video_coding/include/video_codec_interface.h",
"modules/video_coding/include/video_coding_defines.h",
"modules/video_coding/include/video_error_codes.h",
"modules/video_coding/inter_frame_delay.h",
"modules/video_coding/jitter_buffer.h",
"modules/video_coding/jitter_estimator.h",
"modules/video_coding/loss_notification_controller.h",
"modules/video_coding/media_opt_util.h",
"modules/video_coding/packet.h",
"modules/video_coding/packet_buffer.h",
"modules/video_coding/receiver.h",
"modules/video_coding/rtp_frame_reference_finder.h",
"modules/video_coding/rtt_filter.h",
"modules/video_coding/session_info.h",
"modules/video_coding/timestamp_map.h",
"modules/video_coding/timing.h",
"modules/video_coding/unique_timestamp_counter.h",
"modules/video_coding/utility/decoded_frames_history.h",
"modules/video_coding/utility/frame_dropper.h",
"modules/video_coding/utility/ivf_file_reader.h",
@ -2244,12 +2189,6 @@ webrtc_sources = [
"modules/video_coding/utility/vp9_uncompressed_header_parser.h",
"modules/video_coding/video_coding_impl.h",
"modules/video_coding/video_receiver2.h",
"modules/video_processing/util/denoiser_filter.h",
"modules/video_processing/util/denoiser_filter_c.h",
"modules/video_processing/util/denoiser_filter_neon.h",
"modules/video_processing/util/noise_estimation.h",
"modules/video_processing/util/skin_detection.h",
"modules/video_processing/video_denoiser.h",
"p2p/base/async_stun_tcp_socket.h",
"p2p/base/basic_async_resolver_factory.h",
"p2p/base/basic_ice_controller.h",
@ -2285,7 +2224,6 @@ webrtc_sources = [
"pc/audio_rtp_receiver.h",
"pc/audio_track.h",
"pc/channel.h",
"pc/channel_manager.h",
"pc/data_channel_controller.h",
"pc/dtls_transport.h",
"pc/dtmf_sender.h",
@ -2317,7 +2255,6 @@ webrtc_sources = [
"pc/rtp_sender_proxy.h",
"pc/rtp_transceiver.h",
"pc/rtp_transport.h",
"pc/sctp_data_channel_transport.h",
"pc/sctp_transport.h",
"pc/sctp_utils.h",
"pc/sdp_serializer.h",
@ -2327,7 +2264,6 @@ webrtc_sources = [
"pc/srtp_filter.h",
"pc/srtp_session.h",
"pc/srtp_transport.h",
"pc/stats_collector.h",
"pc/track_media_info_map.h",
"pc/transport_stats.h",
"pc/video_rtp_receiver.h",
@ -2354,7 +2290,6 @@ webrtc_sources = [
"modules/audio_processing/transient/transient_suppressor.h",
"modules/audio_processing/utility/delay_estimator_internal.h",
"modules/audio_processing/vad/common.h",
"modules/include/module.h",
"modules/include/module_common_types_public.h",
"modules/include/module_fec_types.h",
"modules/rtp_rtcp/source/byte_io.h",
@ -2385,7 +2320,6 @@ webrtc_sources = [
"api/transport/data_channel_transport_interface.h",
"api/transport/enums.h",
"api/transport/network_control.h",
"api/transport/webrtc_key_value_config.h",
"api/turn_customizer.h",
"api/video/video_bitrate_allocator_factory.h",
"api/video/video_codec_type.h",
@ -2397,13 +2331,11 @@ webrtc_sources = [
"audio/channel_receive.h",
"audio/channel_send.h",
"audio/channel_send_frame_transformer_delegate.h",
"audio/null_audio_poller.h",
"audio/utility/channel_mixer.h",
"audio/utility/channel_mixing_matrix.h",
"audio/voip/audio_egress.h",
"p2p/base/ice_controller_factory_interface.h",
"rtc_base/bitstream_reader.h",
"rtc_base/async_invoker_inl.h",
"rtc_base/buffer.h",
"rtc_base/compile_assert_c.h",
"rtc_base/dscp.h",
@ -2422,28 +2354,22 @@ webrtc_sources = [
"rtc_base/units/unit_base.h",
"rtc_base/containers/flat_map.h",
"rtc_base/containers/flat_tree.h",
"rtc_base/containers/as_const.h",
"rtc_base/containers/not_fn.h",
"rtc_base/containers/invoke.h",
"rtc_base/containers/void_t.h",
"rtc_base/containers/flat_set.h",
"rtc_base/containers/identity.h",
"video/adaptation/encode_usage_resource.h",
"video/adaptation/overuse_frame_detector.h",
"video/call_stats.h",
"video/encoder_bitrate_adjuster.h",
"video/encoder_overshoot_detector.h",
"video/encoder_rtcp_feedback.h",
"video/frame_dumping_decoder.h",
"video/frame_encode_metadata_writer.h",
"video/quality_limitation_reason_tracker.h",
"video/receive_statistics_proxy.h",
"video/rtp_video_stream_receiver_frame_transformer_delegate.h",
"video/send_delay_stats.h",
"video/send_statistics_proxy.h",
"video/stream_synchronization.h",
"video/transport_adapter.h",
"video/video_quality_observer.h",
"video/video_send_stream_impl.h",
"video/video_source_sink_controller.h",
"video/video_stream_decoder_impl.h",
@ -2458,7 +2384,6 @@ webrtc_sources = [
"api/video/video_codec_constants.h",
"api/video/video_frame_type.h",
"api/video/video_sink_interface.h",
"api/video/video_stream_encoder_interface.h",
"audio/audio_send_stream.h",
"audio/channel_receive_frame_transformer_delegate.h",
"audio/remix_resample.h",
@ -2509,7 +2434,6 @@ webrtc_sources = [
"modules/audio_processing/vad/noise_gmm_tables.h",
"modules/congestion_controller/goog_cc/delay_increase_detector_interface.h",
"modules/rtp_rtcp/include/receive_statistics.h",
"modules/utility/include/process_thread.h",
"modules/video_coding/codecs/vp9/include/vp9_globals.h",
"pc/rtp_transport_internal.h",
"rtc_base/mdns_responder_interface.h",
@ -2517,15 +2441,12 @@ webrtc_sources = [
"rtc_base/numerics/sequence_number_util.h",
"rtc_base/openssl.h",
"rtc_base/socket_server.h",
"rtc_base/task_utils/to_queued_task.h",
"video/adaptation/quality_scaler_resource.h",
"video/buffered_frame_decryptor.h",
"video/quality_threshold.h",
"video/report_block_stats.h",
"video/rtp_video_stream_receiver.h",
"video/stats_counter.h",
"video/video_send_stream.h",
"video/video_stream_decoder.h",
"video/video_stream_encoder.h",
"rtc_base/socket_factory.h",
"api/audio_codecs/audio_decoder_factory_template.h",
@ -2583,11 +2504,9 @@ webrtc_sources = [
"rtc_base/ignore_wundef.h",
"rtc_base/numerics/math_utils.h",
"rtc_base/numerics/mod_ops.h",
"rtc_base/numerics/moving_median_filter.h",
"rtc_base/socket_adapters.h",
"rtc_base/ssl_roots.h",
"rtc_base/system/warn_current_thread_is_deadlocked.h",
"rtc_base/thread_message.h",
"rtc_base/trace_event.h",
"api/audio_codecs/isac/audio_decoder_isac.h",
"api/audio_codecs/isac/audio_encoder_isac.h",
@ -2608,7 +2527,6 @@ webrtc_sources = [
"modules/audio_coding/codecs/ilbc/state_search.h",
"modules/audio_coding/codecs/isac/audio_decoder_isac_t_impl.h",
"modules/audio_coding/codecs/isac/audio_encoder_isac_t_impl.h",
"modules/audio_processing/agc/gain_map_internal.h",
"modules/video_capture/video_capture_config.h",
"modules/video_coding/codecs/vp8/include/vp8.h",
"p2p/base/p2p_transport_channel_ice_field_trials.h",
@ -2634,7 +2552,6 @@ webrtc_sources = [
"pc/channel_interface.h",
"call/packet_receiver.h",
"p2p/base/transport_info.h",
"modules/rtp_rtcp/include/ulpfec_receiver.h",
"modules/rtp_rtcp/include/rtp_packet_sender.h",
"rtc_base/numerics/moving_max_counter.h",
"modules/audio_coding/codecs/ilbc/enhancer_interface.h",
@ -2656,9 +2573,7 @@ webrtc_sources = [
"pc/peer_connection_proxy.h",
"pc/used_ids.h",
"rtc_base/numerics/divide_round.h",
"rtc_base/system/thread_registry.h",
"rtc_base/one_time_event.h",
"rtc_base/format_macros.h",
"audio/conversion.h",
"modules/audio_coding/codecs/ilbc/cb_mem_energy_augmentation.h",
"modules/audio_coding/codecs/ilbc/unpack_bits.h",
@ -2678,7 +2593,6 @@ webrtc_sources = [
"api/priority.h",
"api/transport/sctp_transport_factory_interface.h",
"api/video/video_adaptation_reason.h",
"api/video/video_stream_encoder_observer.h",
"api/video_codecs/spatial_layer.h",
"common_audio/third_party/ooura/fft_size_128/ooura_fft.h",
"common_audio/third_party/ooura/fft_size_128/ooura_fft.cc",
@ -2702,13 +2616,10 @@ webrtc_sources = [
"rtc_base/openssl_key_pair.h",
"rtc_base/openssl_key_pair.cc",
"rtc_base/synchronization/mutex.h",
"rtc_base/synchronization/mutex.cc",
"rtc_base/synchronization/mutex_critical_section.h",
"rtc_base/synchronization/mutex_pthread.h",
"rtc_base/deprecated/recursive_critical_section.h",
"rtc_base/deprecated/recursive_critical_section.cc",
"rtc_base/task_utils/pending_task_safety_flag.h",
"rtc_base/task_utils/pending_task_safety_flag.cc",
"api/video/video_frame_metadata.h",
"api/video/video_frame_metadata.cc",
"modules/rtp_rtcp/source/rtp_rtcp_impl2.h",
@ -2728,7 +2639,6 @@ webrtc_sources = [
"modules/rtp_rtcp/source/rtp_video_layers_allocation_extension.cc",
"modules/video_coding/chain_diff_calculator.h",
"modules/video_coding/chain_diff_calculator.cc",
"modules/video_coding/deprecated/nack_module.h",
"modules/video_coding/rtp_frame_id_only_ref_finder.h",
"modules/video_coding/rtp_frame_id_only_ref_finder.cc",
"modules/video_coding/svc/scalable_video_controller.h",
@ -2764,7 +2674,6 @@ webrtc_sources = [
"modules/video_coding/rtp_generic_ref_finder.cc",
"pc/sctp_data_channel.h",
"pc/sctp_data_channel.cc",
"pc/stats_collector_interface.h",
"rtc_base/callback_list.h",
"rtc_base/callback_list.cc",
"call/adaptation/broadcast_resource_listener.h",
@ -2885,8 +2794,6 @@ webrtc_sources = [
"api/video_track_source_proxy_factory.h",
"modules/remote_bitrate_estimator/packet_arrival_map.h",
"modules/remote_bitrate_estimator/packet_arrival_map.cc",
"modules/audio_processing/agc/clipping_predictor.h",
"modules/audio_processing/agc/clipping_predictor.cc",
"modules/rtp_rtcp/source/capture_clock_offset_updater.h",
"modules/rtp_rtcp/source/capture_clock_offset_updater.cc",
"pc/video_track_source_proxy.h",
@ -2895,10 +2802,6 @@ webrtc_sources = [
"modules/rtp_rtcp/source/absolute_capture_time_interpolator.cc",
"media/base/sdp_video_format_utils.h",
"media/base/sdp_video_format_utils.cc",
"modules/audio_processing/agc/clipping_predictor_evaluator.h",
"modules/audio_processing/agc/clipping_predictor_evaluator.cc",
"modules/audio_processing/agc/clipping_predictor_level_buffer.h",
"modules/audio_processing/agc/clipping_predictor_level_buffer.cc",
"media/sctp/sctp_transport_factory.h",
"media/sctp/sctp_transport_factory.cc",
"media/sctp/dcsctp_transport.h",
@ -2908,7 +2811,6 @@ webrtc_sources = [
"modules/audio_coding/neteq/underrun_optimizer.cc",
"common_video/framerate_controller.cc",
"modules/audio_processing/agc/analog_gain_stats_reporter.cc",
"modules/audio_coding/neteq/relative_arrival_delay_tracker.cc",
"modules/audio_processing/agc2/vad_wrapper.cc",
"modules/audio_processing/agc2/adaptive_digital_gain_controller.cc",
"modules/video_coding/utility/framerate_controller_deprecated.h",
@ -2927,14 +2829,12 @@ webrtc_sources = [
"modules/video_coding/h265_vps_sps_pps_tracker.cc",
"modules/rtp_rtcp/source/video_rtp_depacketizer_h265.cc",
"common_video/h265/h265_bitstream_parser.cc",
"api/video_codecs/video_encoder_config.cc",
"video/frame_cadence_adapter.cc",
"modules/video_coding/codecs/h265/include/h265_globals.h",
"video/frame_cadence_adapter.h",
"common_video/h265/h265_common.h",
"modules/video_coding/h265_vps_sps_pps_tracker.h",
"common_video/h265/h265_pps_parser.h",
"modules/video_coding/frame_buffer3.h",
"common_video/h265/h265_bitstream_parser.h",
"modules/rtp_rtcp/source/video_rtp_depacketizer_h265.h",
"common_video/h265/h265_sps_parser.h",
@ -2945,8 +2845,6 @@ webrtc_sources = [
"api/video/i444_buffer.cc",
"logging/rtc_event_log/events/rtc_event_field_encoding_parser.cc",
"logging/rtc_event_log/events/rtc_event_field_encoding_parser.h",
"video/frame_buffer_proxy.cc",
"video/frame_buffer_proxy.h",
"video/decode_synchronizer.cc",
"video/decode_synchronizer.h",
"logging/rtc_event_log/events/fixed_length_encoding_parameters_v3.cc",
@ -2969,7 +2867,93 @@ webrtc_sources = [
"logging/rtc_event_log/events/rtc_event_definition.h",
"modules/video_coding/utility/ivf_defines.h",
"video/frame_decode_scheduler.h",
"modules/video_coding/codecs/av1/libaom_av1_encoder_supported.h",
"api/video/i210_buffer.cc",
"api/video/i422_buffer.cc",
"video/config/video_encoder_config.cc",
"modules/audio_processing/aec3/config_selector.cc",
"modules/rtp_rtcp/source/ulpfec_receiver.cc",
"api/video_codecs/av1_profile.cc",
"modules/utility/maybe_worker_thread.cc",
"pc/legacy_stats_collector.cc",
"modules/audio_coding/neteq/packet_arrival_history.cc",
"api/task_queue/pending_task_safety_flag.cc",
"video/render/incoming_video_stream.cc",
"modules/pacing/prioritized_packet_queue.cc",
"video/unique_timestamp_counter.cc",
"modules/audio_processing/agc2/clipping_predictor.cc",
"api/video_codecs/scalability_mode.cc",
"modules/video_coding/codecs/vp8/vp8_scalability.cc",
"video/render/video_render_frames.cc",
"modules/video_coding/svc/scalability_mode_util.cc",
"modules/audio_processing/transient/voice_probability_delay_unit.cc",
"modules/audio_processing/aec3/multi_channel_content_detector.cc",
"video/video_stream_buffer_controller.cc",
"modules/audio_processing/agc2/clipping_predictor_level_buffer.cc",
"api/video/frame_buffer.cc",
"modules/video_coding/timing/inter_frame_delay.cc",
"modules/video_coding/timing/jitter_estimator.cc",
"modules/video_coding/timing/timing.cc",
"video/config/encoder_stream_factory.cc",
"p2p/base/ice_switch_reason.cc",
"p2p/base/wrapping_active_ice_controller.cc",
"modules/video_coding/timing/codec_timer.cc",
"modules/video_coding/timing/timestamp_extrapolator.cc",
"modules/video_coding/timing/frame_delay_variation_kalman_filter.cc",
"modules/video_coding/timing/rtt_filter.cc",
"video/config/simulcast.cc",
"api/field_trials_view.h",
"api/make_ref_counted.h",
"api/task_queue/pending_task_safety_flag.h",
"api/video/frame_buffer.h",
"api/video/i210_buffer.h",
"api/video/i422_buffer.h",
"api/video/resolution.h",
"api/video_codecs/av1_profile.h",
"api/video_codecs/scalability_mode.h",
"modules/audio_coding/neteq/packet_arrival_history.h",
"modules/audio_processing/aec3/block.h",
"modules/audio_processing/aec3/config_selector.h",
"modules/audio_processing/aec3/multi_channel_content_detector.h",
"modules/audio_processing/agc2/clipping_predictor.h",
"modules/audio_processing/agc2/clipping_predictor_level_buffer.h",
"modules/audio_processing/transient/voice_probability_delay_unit.h",
"modules/pacing/prioritized_packet_queue.h",
"modules/rtp_rtcp/source/ulpfec_receiver.h",
"modules/utility/maybe_worker_thread.h",
"modules/video_coding/codecs/vp8/vp8_scalability.h",
"modules/video_coding/svc/scalability_mode_util.h",
"modules/video_coding/timing/codec_timer.h",
"modules/video_coding/timing/frame_delay_variation_kalman_filter.h",
"modules/video_coding/timing/inter_frame_delay.h",
"modules/video_coding/timing/jitter_estimator.h",
"modules/video_coding/timing/rtt_filter.h",
"modules/video_coding/timing/timestamp_extrapolator.h",
"modules/video_coding/timing/timing.h",
"video/config/encoder_stream_factory.h",
"video/config/simulcast.h",
"video/config/video_encoder_config.h",
"video/render/incoming_video_stream.h",
"video/render/video_render_frames.h",
"video/unique_timestamp_counter.h",
"video/video_stream_buffer_controller.h",
"api/video_codecs/simulcast_stream.h",
"api/video_codecs/video_encoder_factory_template.h",
"modules/audio_processing/agc2/gain_map_internal.h",
"p2p/base/ice_switch_reason.h",
"p2p/base/wrapping_active_ice_controller.h",
"pc/legacy_stats_collector.h",
"rtc_base/numerics/moving_percentile_filter.h",
"api/video_codecs/video_encoder_factory_template_libvpx_vp8_adapter.h",
"modules/video_coding/utility/vp8_constants.h",
"p2p/base/active_ice_controller_factory_interface.h",
"p2p/base/active_ice_controller_interface.h",
"pc/legacy_stats_collector_interface.h",
"rtc_base/memory/always_valid_pointer.h",
"video/video_stream_encoder_interface.h",
"video/video_stream_encoder_observer.h",
"api/video_codecs/video_encoder_factory_template_libvpx_vp9_adapter.h",
"p2p/base/ice_agent_interface.h",
"api/video_codecs/video_encoder_factory_template_open_h264_adapter.h",
]
ios_objc_sources = [
@ -3150,7 +3134,6 @@ ios_sources = [
"objc/api/peerconnection/RTCRtpReceiver.mm",
"objc/api/peerconnection/RTCMediaStream.mm",
"objc/api/peerconnection/RTCRtpTransceiver.mm",
"objc/api/peerconnection/RTCPeerConnectionFactory.mm",
"objc/api/peerconnection/RTCCertificate.mm",
"objc/api/peerconnection/RTCDtmfSender.mm",
"objc/api/peerconnection/RTCMediaStreamTrack.mm",
@ -3220,6 +3203,11 @@ ios_sources = [
"objc/api/peerconnection/RTCIceCandidateErrorEvent.h",
"objc/api/peerconnection/RTCIceCandidateErrorEvent+Private.h",
"objc/api/peerconnection/RTCIceCandidateErrorEvent.mm",
"objc/native/api/ssl_certificate_verifier.mm",
"objc/native/api/ssl_certificate_verifier.h",
"objc/base/RTCSSLCertificateVerifier.h",
"objc/native/api/objc_audio_device_module.h",
"objc/components/audio/RTCAudioDevice.h",
]
common_arm_specific_sources = [webrtc_source_dir + "/" + path for path in [
@ -3229,7 +3217,6 @@ common_arm_specific_sources = [webrtc_source_dir + "/" + path for path in [
"modules/audio_coding/codecs/isac/fix/source/lattice_neon.c",
"modules/audio_coding/codecs/isac/fix/source/transform_neon.c",
"modules/audio_processing/aecm/aecm_core_neon.cc",
"modules/video_processing/util/denoiser_filter_neon.cc",
"common_audio/fir_filter_neon.cc",
"common_audio/signal_processing/cross_correlation_neon.c",
"common_audio/signal_processing/downsample_fast_neon.c",
@ -3325,105 +3312,6 @@ arch_specific_cflags = select({
"@build_bazel_rules_apple//apple:ios_x86_64": common_flags + x86_64_specific_flags,
})
'''cc_library(
name = "usrsctp",
srcs = [ "dependencies/third_party/usrsctp/" + path for path in [
"usrsctplib/usrsctplib/netinet/sctp.h",
"usrsctplib/usrsctplib/netinet/sctp_asconf.c",
"usrsctplib/usrsctplib/netinet/sctp_asconf.h",
"usrsctplib/usrsctplib/netinet/sctp_auth.c",
"usrsctplib/usrsctplib/netinet/sctp_auth.h",
"usrsctplib/usrsctplib/netinet/sctp_bsd_addr.c",
"usrsctplib/usrsctplib/netinet/sctp_bsd_addr.h",
"usrsctplib/usrsctplib/netinet/sctp_callout.c",
"usrsctplib/usrsctplib/netinet/sctp_callout.h",
"usrsctplib/usrsctplib/netinet/sctp_cc_functions.c",
"usrsctplib/usrsctplib/netinet/sctp_constants.h",
"usrsctplib/usrsctplib/netinet/sctp_crc32.c",
"usrsctplib/usrsctplib/netinet/sctp_crc32.h",
"usrsctplib/usrsctplib/netinet/sctp_header.h",
"usrsctplib/usrsctplib/netinet/sctp_indata.c",
"usrsctplib/usrsctplib/netinet/sctp_indata.h",
"usrsctplib/usrsctplib/netinet/sctp_input.c",
"usrsctplib/usrsctplib/netinet/sctp_input.h",
"usrsctplib/usrsctplib/netinet/sctp_lock_userspace.h",
"usrsctplib/usrsctplib/netinet/sctp_os.h",
"usrsctplib/usrsctplib/netinet/sctp_os_userspace.h",
"usrsctplib/usrsctplib/netinet/sctp_output.c",
"usrsctplib/usrsctplib/netinet/sctp_output.h",
"usrsctplib/usrsctplib/netinet/sctp_pcb.c",
"usrsctplib/usrsctplib/netinet/sctp_pcb.h",
"usrsctplib/usrsctplib/netinet/sctp_peeloff.c",
"usrsctplib/usrsctplib/netinet/sctp_peeloff.h",
"usrsctplib/usrsctplib/netinet/sctp_process_lock.h",
"usrsctplib/usrsctplib/netinet/sctp_sha1.c",
"usrsctplib/usrsctplib/netinet/sctp_sha1.h",
"usrsctplib/usrsctplib/netinet/sctp_ss_functions.c",
"usrsctplib/usrsctplib/netinet/sctp_structs.h",
"usrsctplib/usrsctplib/netinet/sctp_sysctl.c",
"usrsctplib/usrsctplib/netinet/sctp_sysctl.h",
"usrsctplib/usrsctplib/netinet/sctp_timer.c",
"usrsctplib/usrsctplib/netinet/sctp_timer.h",
"usrsctplib/usrsctplib/netinet/sctp_uio.h",
"usrsctplib/usrsctplib/netinet/sctp_userspace.c",
"usrsctplib/usrsctplib/netinet/sctp_usrreq.c",
"usrsctplib/usrsctplib/netinet/sctp_var.h",
"usrsctplib/usrsctplib/netinet/sctputil.c",
"usrsctplib/usrsctplib/netinet/sctputil.h",
"usrsctplib/usrsctplib/netinet6/sctp6_usrreq.c",
"usrsctplib/usrsctplib/netinet6/sctp6_var.h",
"usrsctplib/usrsctplib/user_atomic.h",
"usrsctplib/usrsctplib/user_environment.c",
"usrsctplib/usrsctplib/user_environment.h",
"usrsctplib/usrsctplib/user_inpcb.h",
"usrsctplib/usrsctplib/user_ip6_var.h",
"usrsctplib/usrsctplib/user_ip_icmp.h",
"usrsctplib/usrsctplib/user_malloc.h",
"usrsctplib/usrsctplib/user_mbuf.c",
"usrsctplib/usrsctplib/user_mbuf.h",
"usrsctplib/usrsctplib/user_queue.h",
"usrsctplib/usrsctplib/user_recv_thread.c",
"usrsctplib/usrsctplib/user_recv_thread.h",
"usrsctplib/usrsctplib/user_route.h",
"usrsctplib/usrsctplib/user_socket.c",
"usrsctplib/usrsctplib/user_socketvar.h",
"usrsctplib/usrsctplib/user_uma.h",
"usrsctplib/usrsctplib/usrsctp.h",
]],
copts = [
"-Ithird-party/webrtc/dependencies/third_party/usrsctp/usrsctplib/usrsctplib",
"-DHAVE_SA_LEN",
"-DHAVE_SCONN_LEN",
"-D__APPLE_USE_RFC_2292",
"-D__Userspace_os_Darwin",
"-UINET",
"-UINET6",
"-U__APPLE__",
"-DWEBRTC_IOS",
"-DWEBRTC_MAC",
"-DWEBRTC_POSIX",
"-DRTC_ENABLE_VP9",
"-DBSD=1",
"-DUSE_KISS_FFT",
"-DHAVE_PTHREAD",
"-DWEBRTC_APM_DEBUG_DUMP=0",
"-DWEBRTC_USE_BUILTIN_ISAC_FLOAT",
"-DWEBRTC_OPUS_VARIABLE_COMPLEXITY=0",
"-DHAVE_NETINET_IN_H",
"-DWEBRTC_INCLUDE_INTERNAL_AUDIO_DEVICE",
"-DSCTP_SIMPLE_ALLOCATOR",
"-DSCTP_PROCESS_LEVEL_LOCKS",
"-D__Userspace__",
"-D__Userspace_os_Darwin",
"-DPACKAGE_VERSION=''",
"-DHAVE_SCTP",
"-DWEBRTC_HAVE_USRSCTP",
"-DWEBRTC_HAVE_SCTP",
"-DNON_WINDOWS_DEFINE",
] + arch_specific_cflags + optimization_flags,
visibility = ["//visibility:public"],
)'''
arch_specific_crc32c_sources = select({
"@build_bazel_rules_apple//apple:ios_armv7": [
],
@ -3596,6 +3484,8 @@ dcsctp_sources = [ "webrtc/net/dcsctp/" + path for path in [
"rx/data_tracker.h",
"rx/reassembly_queue.h",
"rx/reassembly_streams.h",
"rx/interleaved_reassembly_streams.cc",
"rx/interleaved_reassembly_streams.h",
"tx/outstanding_data.cc",
"tx/retransmission_error_counter.cc",
@ -3608,6 +3498,8 @@ dcsctp_sources = [ "webrtc/net/dcsctp/" + path for path in [
"tx/outstanding_data.h",
"tx/rr_send_queue.h",
"tx/send_queue.h",
"tx/stream_scheduler.h",
"tx/stream_scheduler.cc",
"timer/task_queue_timeout.cc",
"timer/timer.cc",
@ -3704,8 +3596,13 @@ fft4g_sources = [
"fft4g/fft4g.cc",
]
opus_headers = [
"dependencies/third_party/opus/src/include/opus.h",
"dependencies/third_party/opus/src/include/opus_multistream.h",
]
raw_combined_sources = webrtc_sources
combined_sources = [webrtc_source_dir + "/" + path for path in raw_combined_sources] + arch_specific_sources + [ webrtc_source_dir + "/" + "sdk/" + path for path in ios_sources + ios_objc_sources] + absl_sources + fft4g_sources + rnnoise_sources + pffft_sources + crc32c_sources + dcsctp_sources
combined_sources = [webrtc_source_dir + "/" + path for path in raw_combined_sources] + arch_specific_sources + [ webrtc_source_dir + "/" + "sdk/" + path for path in ios_sources + ios_objc_sources] + absl_sources + fft4g_sources + rnnoise_sources + pffft_sources + crc32c_sources + dcsctp_sources + opus_headers
objc_library(
name = "webrtc_lib",
@ -3716,11 +3613,10 @@ objc_library(
"-Ithird-party/webrtc/" + webrtc_source_dir + "/",
"-Ithird-party/webrtc/dependencies",
"-Ithird-party/webrtc/dependencies/third_party/abseil-cpp",
#"-Ithird-party/webrtc/dependencies/third_party/usrsctp/usrsctplib",
#"-Ithird-party/webrtc/dependencies/third_party/usrsctp/usrsctplib/usrsctplib",
"-Ithird-party/webrtc/dependencies/third_party/crc32c/src/include",
"-Ithird-party/webrtc/dependencies/third_party/libsrtp/include",
"-Ithird-party/webrtc/dependencies/third_party/libsrtp/crypto/include",
"-Ithird-party/webrtc/dependencies/third_party/opus/src/include",
"-Ithird-party/libyuv",
"-Ithird-party/libyuv/third_party/libyuv/include",
"-Ithird-party/webrtc/" + webrtc_source_dir + "/" + "testing/gtest/include",
@ -3734,7 +3630,6 @@ objc_library(
"-DWEBRTC_USE_BUILTIN_ISAC_FLOAT",
"-DWEBRTC_OPUS_VARIABLE_COMPLEXITY=0",
"-DHAVE_NETINET_IN_H",
"-DWEBRTC_INCLUDE_INTERNAL_AUDIO_DEVICE",
"-DSCTP_SIMPLE_ALLOCATOR",
"-DSCTP_PROCESS_LEVEL_LOCKS",
"-D__Userspace__",

View File

@ -13,12 +13,11 @@ set(ABSL_INTERNAL_DLL_FILES
"base/internal/atomic_hook.h"
"base/internal/cycleclock.cc"
"base/internal/cycleclock.h"
"base/internal/cycleclock_config.h"
"base/internal/direct_mmap.h"
"base/internal/dynamic_annotations.h"
"base/internal/endian.h"
"base/internal/errno_saver.h"
"base/internal/exponential_biased.cc"
"base/internal/exponential_biased.h"
"base/internal/fast_type_id.h"
"base/internal/hide_ptr.h"
"base/internal/identity.h"
@ -28,8 +27,7 @@ set(ABSL_INTERNAL_DLL_FILES
"base/internal/low_level_alloc.h"
"base/internal/low_level_scheduling.h"
"base/internal/per_thread_tls.h"
"base/internal/periodic_sampler.cc"
"base/internal/periodic_sampler.h"
"base/internal/prefetch.h"
"base/internal/pretty_function.h"
"base/internal/raw_logging.cc"
"base/internal/raw_logging.h"
@ -53,6 +51,7 @@ set(ABSL_INTERNAL_DLL_FILES
"base/internal/unaligned_access.h"
"base/internal/unscaledcycleclock.cc"
"base/internal/unscaledcycleclock.h"
"base/internal/unscaledcycleclock_config.h"
"base/log_severity.cc"
"base/log_severity.h"
"base/macros.h"
@ -72,6 +71,7 @@ set(ABSL_INTERNAL_DLL_FILES
"container/internal/btree.h"
"container/internal/btree_container.h"
"container/internal/common.h"
"container/internal/common_policy_traits.h"
"container/internal/compressed_tuple.h"
"container/internal/container_memory.h"
"container/internal/counting_allocator.h"
@ -82,20 +82,36 @@ set(ABSL_INTERNAL_DLL_FILES
"container/internal/hashtablez_sampler.cc"
"container/internal/hashtablez_sampler.h"
"container/internal/hashtablez_sampler_force_weak_definition.cc"
"container/internal/have_sse.h"
"container/internal/inlined_vector.h"
"container/internal/layout.h"
"container/internal/node_hash_policy.h"
"container/internal/node_slot_policy.h"
"container/internal/raw_hash_map.h"
"container/internal/raw_hash_set.cc"
"container/internal/raw_hash_set.h"
"container/internal/tracked.h"
"container/node_hash_map.h"
"container/node_hash_set.h"
"crc/crc32c.cc"
"crc/crc32c.h"
"crc/internal/cpu_detect.cc"
"crc/internal/cpu_detect.h"
"crc/internal/crc32c.h"
"crc/internal/crc32c_inline.h"
"crc/internal/crc32_x86_arm_combined_simd.h"
"crc/internal/crc.cc"
"crc/internal/crc.h"
"crc/internal/crc_internal.h"
"crc/internal/crc_x86_arm_combined.cc"
"crc/internal/crc_memcpy_fallback.cc"
"crc/internal/crc_memcpy.h"
"crc/internal/crc_memcpy_x86_64.cc"
"crc/internal/crc_non_temporal_memcpy.cc"
"crc/internal/crc_x86_arm_combined.cc"
"crc/internal/non_temporal_arm_intrinsics.h"
"crc/internal/non_temporal_memcpy.h"
"debugging/failure_signal_handler.cc"
"debugging/failure_signal_handler.h"
"debugging/leak_check.h"
"debugging/leak_check_disable.cc"
"debugging/stacktrace.cc"
"debugging/stacktrace.h"
"debugging/symbolize.cc"
@ -114,9 +130,11 @@ set(ABSL_INTERNAL_DLL_FILES
"debugging/internal/symbolize.h"
"debugging/internal/vdso_support.cc"
"debugging/internal/vdso_support.h"
"functional/any_invocable.h"
"functional/internal/front_binder.h"
"functional/bind_front.h"
"functional/function_ref.h"
"functional/internal/any_invocable.h"
"functional/internal/function_ref.h"
"hash/hash.h"
"hash/internal/city.h"
@ -133,6 +151,10 @@ set(ABSL_INTERNAL_DLL_FILES
"numeric/int128.h"
"numeric/internal/bits.h"
"numeric/internal/representation.h"
"profiling/internal/exponential_biased.cc"
"profiling/internal/exponential_biased.h"
"profiling/internal/periodic_sampler.cc"
"profiling/internal/periodic_sampler.h"
"profiling/internal/sample_recorder.h"
"random/bernoulli_distribution.h"
"random/beta_distribution.h"
@ -196,22 +218,29 @@ set(ABSL_INTERNAL_DLL_FILES
"strings/charconv.h"
"strings/cord.cc"
"strings/cord.h"
"strings/cord_analysis.cc"
"strings/cord_analysis.h"
"strings/cord_buffer.cc"
"strings/cord_buffer.h"
"strings/escaping.cc"
"strings/escaping.h"
"strings/internal/charconv_bigint.cc"
"strings/internal/charconv_bigint.h"
"strings/internal/charconv_parse.cc"
"strings/internal/charconv_parse.h"
"strings/internal/cord_data_edge.h"
"strings/internal/cord_internal.cc"
"strings/internal/cord_internal.h"
"strings/internal/cord_rep_consume.h"
"strings/internal/cord_rep_consume.cc"
"strings/internal/cord_rep_btree.cc"
"strings/internal/cord_rep_btree.h"
"strings/internal/cord_rep_btree_navigator.cc"
"strings/internal/cord_rep_btree_navigator.h"
"strings/internal/cord_rep_btree_reader.cc"
"strings/internal/cord_rep_btree_reader.h"
"strings/internal/cord_rep_crc.cc"
"strings/internal/cord_rep_crc.h"
"strings/internal/cord_rep_consume.h"
"strings/internal/cord_rep_consume.cc"
"strings/internal/cord_rep_flat.h"
"strings/internal/cord_rep_ring.cc"
"strings/internal/cord_rep_ring.h"
@ -227,8 +256,13 @@ set(ABSL_INTERNAL_DLL_FILES
"strings/internal/cordz_statistics.h"
"strings/internal/cordz_update_scope.h"
"strings/internal/cordz_update_tracker.h"
"strings/internal/damerau_levenshtein_distance.h"
"strings/internal/damerau_levenshtein_distance.cc"
"strings/internal/stl_type_traits.h"
"strings/internal/string_constant.h"
"strings/internal/stringify_sink.h"
"strings/internal/stringify_sink.cc"
"strings/internal/has_absl_stringify.h"
"strings/match.cc"
"strings/match.h"
"strings/numbers.cc"
@ -341,126 +375,160 @@ set(ABSL_INTERNAL_DLL_FILES
"types/internal/span.h"
"types/variant.h"
"utility/utility.h"
"debugging/leak_check.cc"
)
set(ABSL_INTERNAL_DLL_TARGETS
"stacktrace"
"symbolize"
"examine_stack"
"failure_signal_handler"
"debugging_internal"
"demangle_internal"
"leak_check"
"leak_check_disable"
"stack_consumption"
"debugging"
"hash"
"spy_hash_state"
"city"
"memory"
"strings"
"strings_internal"
"cord"
"str_format"
"str_format_internal"
"pow10_helper"
"int128"
"numeric"
"utility"
"any"
"bad_any_cast"
"bad_any_cast_impl"
"span"
"optional"
"bad_optional_access"
"bad_variant_access"
"variant"
"compare"
"algorithm"
"algorithm_container"
"graphcycles_internal"
"kernel_timeout_internal"
"synchronization"
"thread_pool"
"bind_front"
"function_ref"
"any"
"any_invocable"
"atomic_hook"
"log_severity"
"raw_logging_internal"
"spinlock_wait"
"config"
"dynamic_annotations"
"core_headers"
"malloc_internal"
"base_internal"
"bad_any_cast"
"bad_any_cast_impl"
"bad_optional_access"
"bad_variant_access"
"base"
"throw_delegate"
"pretty_function"
"endian"
"base_internal"
"bind_front"
"bits"
"exponential_biased"
"periodic_sampler"
"scoped_set_env"
"type_traits"
"meta"
"random_random"
"random_bit_gen_ref"
"random_distributions"
"random_seed_gen_exception"
"random_seed_sequences"
"random_internal_traits"
"random_internal_distribution_caller"
"random_internal_distributions"
"random_internal_fast_uniform_bits"
"random_internal_seed_material"
"random_internal_pool_urbg"
"random_internal_explicit_seed_seq"
"random_internal_sequence_urbg"
"random_internal_salted_seed_seq"
"random_internal_iostream_state_saver"
"random_internal_generate_real"
"random_internal_wide_multiply"
"random_internal_fastmath"
"random_internal_nonsecure_base"
"random_internal_pcg_engine"
"random_internal_randen_engine"
"random_internal_platform"
"random_internal_randen"
"random_internal_randen_slow"
"random_internal_randen_hwaes"
"random_internal_randen_hwaes_impl"
"random_internal_uniform_helper"
"status"
"time"
"civil_time"
"time_zone"
"container"
"btree"
"city"
"civil_time"
"compare"
"compressed_tuple"
"fixed_array"
"inlined_vector_internal"
"inlined_vector"
"config"
"container"
"container_common"
"container_memory"
"cord"
"core_headers"
"counting_allocator"
"crc_cpu_detect",
"crc_internal",
"crc32c",
"debugging"
"debugging_internal"
"demangle_internal"
"dynamic_annotations"
"endian"
"examine_stack"
"exponential_biased"
"failure_signal_handler"
"fixed_array"
"flat_hash_map"
"flat_hash_set"
"node_hash_map"
"node_hash_set"
"container_memory"
"function_ref"
"graphcycles_internal"
"hash"
"hash_function_defaults"
"hash_policy_traits"
"hashtablez_sampler"
"hashtable_debug"
"hashtable_debug_hooks"
"have_sse"
"node_hash_policy"
"raw_hash_map"
"container_common"
"raw_hash_set"
"hashtablez_sampler"
"inlined_vector"
"inlined_vector_internal"
"int128"
"kernel_timeout_internal"
"layout"
"tracked"
"leak_check"
"log_severity"
"malloc_internal"
"memory"
"meta"
"node_hash_map"
"node_hash_set"
"node_slot_policy"
"non_temporal_arm_intrinsics",
"non_temporal_memcpy",
"numeric"
"optional"
"periodic_sampler"
"pow10_helper"
"pretty_function"
"random_bit_gen_ref"
"random_distributions"
"random_internal_distribution_caller"
"random_internal_distributions"
"random_internal_explicit_seed_seq"
"random_internal_fastmath"
"random_internal_fast_uniform_bits"
"random_internal_generate_real"
"random_internal_iostream_state_saver"
"random_internal_nonsecure_base"
"random_internal_pcg_engine"
"random_internal_platform"
"random_internal_pool_urbg"
"random_internal_randen"
"random_internal_randen_engine"
"random_internal_randen_hwaes"
"random_internal_randen_hwaes_impl"
"random_internal_randen_slow"
"random_internal_salted_seed_seq"
"random_internal_seed_material"
"random_internal_sequence_urbg"
"random_internal_traits"
"random_internal_uniform_helper"
"random_internal_wide_multiply"
"random_random"
"random_seed_gen_exception"
"random_seed_sequences"
"raw_hash_map"
"raw_hash_set"
"raw_logging_internal"
"sample_recorder"
"scoped_set_env"
"span"
"spinlock_wait"
"spy_hash_state"
"stack_consumption"
"stacktrace"
"status"
"str_format"
"str_format_internal"
"strings"
"strings_internal"
"symbolize"
"synchronization"
"thread_pool"
"throw_delegate"
"time"
"time_zone"
"tracked"
"type_traits"
"utility"
"variant"
)
function(_absl_target_compile_features_if_available TARGET TYPE FEATURE)
if(FEATURE IN_LIST CMAKE_CXX_COMPILE_FEATURES)
target_compile_features(${TARGET} ${TYPE} ${FEATURE})
else()
message(WARNING "Feature ${FEATURE} is unknown for the CXX compiler")
endif()
endfunction()
include(CheckCXXSourceCompiles)
check_cxx_source_compiles(
[==[
#ifdef _MSC_VER
# if _MSVC_LANG < 201700L
# error "The compiler defaults or is configured for C++ < 17"
# endif
#elif __cplusplus < 201700L
# error "The compiler defaults or is configured for C++ < 17"
#endif
int main() { return 0; }
]==]
ABSL_INTERNAL_AT_LEAST_CXX17)
if(ABSL_INTERNAL_AT_LEAST_CXX17)
set(ABSL_INTERNAL_CXX_STD_FEATURE cxx_std_17)
else()
set(ABSL_INTERNAL_CXX_STD_FEATURE cxx_std_14)
endif()
function(absl_internal_dll_contains)
cmake_parse_arguments(ABSL_INTERNAL_DLL
""
@ -538,7 +606,27 @@ function(absl_make_dll)
NOMINMAX
INTERFACE
${ABSL_CC_LIB_DEFINES}
ABSL_CONSUME_DLL
)
if(ABSL_PROPAGATE_CXX_STD)
# Abseil libraries require C++14 as the current minimum standard. When
# compiled with C++17 (either because it is the compiler's default or
# explicitly requested), then Abseil requires C++17.
_absl_target_compile_features_if_available(${_NAME} PUBLIC ${ABSL_INTERNAL_CXX_STD_FEATURE})
else()
# Note: This is legacy (before CMake 3.8) behavior. Setting the
# target-level CXX_STANDARD property to ABSL_CXX_STANDARD (which is
# initialized by CMAKE_CXX_STANDARD) should have no real effect, since
# that is the default value anyway.
#
# CXX_STANDARD_REQUIRED does guard against the top-level CMake project
# not having enabled CMAKE_CXX_STANDARD_REQUIRED (which prevents
# "decaying" to an older standard if the requested one isn't available).
set_property(TARGET ${_NAME} PROPERTY CXX_STANDARD ${ABSL_CXX_STANDARD})
set_property(TARGET ${_NAME} PROPERTY CXX_STANDARD_REQUIRED ON)
endif()
install(TARGETS abseil_dll EXPORT ${PROJECT_NAME}Targets
RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}

View File

@ -26,6 +26,12 @@ if(NOT DEFINED ABSL_IDE_FOLDER)
set(ABSL_IDE_FOLDER Abseil)
endif()
if(ABSL_USE_SYSTEM_INCLUDES)
set(ABSL_INTERNAL_INCLUDE_WARNING_GUARD SYSTEM)
else()
set(ABSL_INTERNAL_INCLUDE_WARNING_GUARD "")
endif()
# absl_cc_library()
#
# CMake function to imitate Bazel's cc_library rule.
@ -40,7 +46,8 @@ endif()
# LINKOPTS: List of link options
# PUBLIC: Add this so that this library will be exported under absl::
# Also in IDE, target will appear in Abseil folder while non PUBLIC will be in Abseil/internal.
# TESTONLY: When added, this target will only be built if BUILD_TESTING=ON.
# TESTONLY: When added, this target will only be built if both
# BUILD_TESTING=ON and ABSL_BUILD_TESTING=ON.
#
# Note:
# By default, absl_cc_library will always create a library named absl_${NAME},
@ -82,7 +89,9 @@ function(absl_cc_library)
${ARGN}
)
if(ABSL_CC_LIB_TESTONLY AND NOT BUILD_TESTING)
if(ABSL_CC_LIB_TESTONLY AND
NOT ((BUILD_TESTING AND ABSL_BUILD_TESTING) OR
(ABSL_BUILD_TEST_HELPERS AND ABSL_CC_LIB_PUBLIC)))
return()
endif()
@ -164,10 +173,14 @@ function(absl_cc_library)
set(PC_CFLAGS "${PC_CFLAGS} ${cflag}")
elseif(${cflag} MATCHES "^(-W|/w[1234eo])")
# Don't impose our warnings on others.
elseif(${cflag} MATCHES "^-m")
# Don't impose CPU instruction requirements on others, as
# the code performs feature detection on runtime.
else()
set(PC_CFLAGS "${PC_CFLAGS} ${cflag}")
endif()
endforeach()
string(REPLACE ";" " " PC_LINKOPTS "${ABSL_CC_LIB_LINKOPTS}")
FILE(GENERATE OUTPUT "${CMAKE_BINARY_DIR}/lib/pkgconfig/absl_${_NAME}.pc" CONTENT "\
prefix=${CMAKE_INSTALL_PREFIX}\n\
exec_prefix=\${prefix}\n\
@ -179,7 +192,7 @@ Description: Abseil ${_NAME} library\n\
URL: https://abseil.io/\n\
Version: ${PC_VERSION}\n\
Requires:${PC_DEPS}\n\
Libs: -L\${libdir} $<JOIN:${ABSL_CC_LIB_LINKOPTS}, > $<$<NOT:$<BOOL:${ABSL_CC_LIB_IS_INTERFACE}>>:-labsl_${_NAME}>\n\
Libs: -L\${libdir} ${PC_LINKOPTS} $<$<NOT:$<BOOL:${ABSL_CC_LIB_IS_INTERFACE}>>:-labsl_${_NAME}>\n\
Cflags: -I\${includedir}${PC_CFLAGS}\n")
INSTALL(FILES "${CMAKE_BINARY_DIR}/lib/pkgconfig/absl_${_NAME}.pc"
DESTINATION "${CMAKE_INSTALL_LIBDIR}/pkgconfig")
@ -236,7 +249,7 @@ Cflags: -I\${includedir}${PC_CFLAGS}\n")
# unconditionally.
set_property(TARGET ${_NAME} PROPERTY LINKER_LANGUAGE "CXX")
target_include_directories(${_NAME}
target_include_directories(${_NAME} ${ABSL_INTERNAL_INCLUDE_WARNING_GUARD}
PUBLIC
"$<BUILD_INTERFACE:${ABSL_COMMON_INCLUDE_DIRS}>"
$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>
@ -255,10 +268,10 @@ Cflags: -I\${includedir}${PC_CFLAGS}\n")
endif()
if(ABSL_PROPAGATE_CXX_STD)
# Abseil libraries require C++11 as the current minimum standard.
# Top-level application CMake projects should ensure a consistent C++
# standard for all compiled sources by setting CMAKE_CXX_STANDARD.
target_compile_features(${_NAME} PUBLIC cxx_std_11)
# Abseil libraries require C++14 as the current minimum standard. When
# compiled with C++17 (either because it is the compiler's default or
# explicitly requested), then Abseil requires C++17.
_absl_target_compile_features_if_available(${_NAME} PUBLIC ${ABSL_INTERNAL_CXX_STD_FEATURE})
else()
# Note: This is legacy (before CMake 3.8) behavior. Setting the
# target-level CXX_STANDARD property to ABSL_CXX_STANDARD (which is
@ -284,7 +297,7 @@ Cflags: -I\${includedir}${PC_CFLAGS}\n")
else()
# Generating header-only library
add_library(${_NAME} INTERFACE)
target_include_directories(${_NAME}
target_include_directories(${_NAME} ${ABSL_INTERNAL_INCLUDE_WARNING_GUARD}
INTERFACE
"$<BUILD_INTERFACE:${ABSL_COMMON_INCLUDE_DIRS}>"
$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>
@ -303,10 +316,10 @@ Cflags: -I\${includedir}${PC_CFLAGS}\n")
target_compile_definitions(${_NAME} INTERFACE ${ABSL_CC_LIB_DEFINES})
if(ABSL_PROPAGATE_CXX_STD)
# Abseil libraries require C++11 as the current minimum standard.
# Abseil libraries require C++14 as the current minimum standard.
# Top-level application CMake projects should ensure a consistent C++
# standard for all compiled sources by setting CMAKE_CXX_STANDARD.
target_compile_features(${_NAME} INTERFACE cxx_std_11)
_absl_target_compile_features_if_available(${_NAME} INTERFACE ${ABSL_INTERNAL_CXX_STD_FEATURE})
# (INTERFACE libraries can't have the CXX_STANDARD property set, so there
# is no legacy behavior else case).
@ -364,7 +377,7 @@ endfunction()
# GTest::gtest_main
# )
function(absl_cc_test)
if(NOT BUILD_TESTING)
if(NOT (BUILD_TESTING AND ABSL_BUILD_TESTING))
return()
endif()
@ -415,10 +428,10 @@ function(absl_cc_test)
set_property(TARGET ${_NAME} PROPERTY FOLDER ${ABSL_IDE_FOLDER}/test)
if(ABSL_PROPAGATE_CXX_STD)
# Abseil libraries require C++11 as the current minimum standard.
# Abseil libraries require C++14 as the current minimum standard.
# Top-level application CMake projects should ensure a consistent C++
# standard for all compiled sources by setting CMAKE_CXX_STANDARD.
target_compile_features(${_NAME} PUBLIC cxx_std_11)
_absl_target_compile_features_if_available(${_NAME} PUBLIC ${ABSL_INTERNAL_CXX_STD_FEATURE})
else()
# Note: This is legacy (before CMake 3.8) behavior. Setting the
# target-level CXX_STANDARD property to ABSL_CXX_STANDARD (which is
@ -434,11 +447,3 @@ function(absl_cc_test)
add_test(NAME ${_NAME} COMMAND ${_NAME})
endfunction()
function(check_target my_target)
if(NOT TARGET ${my_target})
message(FATAL_ERROR " ABSL: compiling absl requires a ${my_target} CMake target in your project,
see CMake/README.md for more details")
endif(NOT TARGET ${my_target})
endfunction()

View File

@ -1,4 +1,4 @@
cmake_minimum_required(VERSION 2.8.2)
cmake_minimum_required(VERSION 3.10)
project(googletest-external NONE)

View File

@ -20,8 +20,10 @@ googletest framework
### Step-by-Step Instructions
1. If you want to build the Abseil tests, integrate the Abseil dependency
[Google Test](https://github.com/google/googletest) into your CMake project. To disable Abseil tests, you have to pass
`-DBUILD_TESTING=OFF` when configuring your project with CMake.
[Google Test](https://github.com/google/googletest) into your CMake
project. To disable Abseil tests, you have to pass either
`-DBUILD_TESTING=OFF` or `-DABSL_BUILD_TESTING=OFF` when configuring your
project with CMake.
2. Download Abseil and copy it into a subdirectory in your CMake project or add
Abseil as a [git submodule](https://git-scm.com/docs/git-submodule) in your
@ -37,12 +39,12 @@ section of your executable or of your library.<br>
Here is a short CMakeLists.txt example of an application project using Abseil.
```cmake
cmake_minimum_required(VERSION 3.8.2)
cmake_minimum_required(VERSION 3.10)
project(my_app_project)
# Pick the C++ standard to compile with.
# Abseil currently supports C++11, C++14, and C++17.
set(CMAKE_CXX_STANDARD 11)
# Abseil currently supports C++14, C++17, and C++20.
set(CMAKE_CXX_STANDARD 14)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
add_subdirectory(abseil-cpp)
@ -60,7 +62,7 @@ will control Abseil library targets) is set to at least that minimum. For
example:
```cmake
cmake_minimum_required(VERSION 3.8.2)
cmake_minimum_required(VERSION 3.10)
project(my_lib_project)
# Leave C++ standard up to the root application, so set it only if this is the
@ -91,7 +93,8 @@ setting a consistent `CMAKE_CXX_STANDARD` that is sufficiently high.
### Running Abseil Tests with CMake
Use the `-DBUILD_TESTING=ON` flag to run Abseil tests.
Use the `-DABSL_BUILD_TESTING=ON` flag to run Abseil tests. Note that
BUILD_TESTING must also be on (the default).
You will need to provide Abseil with a Googletest dependency. There are two
options for how to do this:
@ -109,7 +112,7 @@ For example, to run just the Abseil tests, you could use this script:
cd path/to/abseil-cpp
mkdir build
cd build
cmake -DBUILD_TESTING=ON -DABSL_USE_GOOGLETEST_HEAD=ON ..
cmake -DABSL_BUILD_TESTING=ON -DABSL_USE_GOOGLETEST_HEAD=ON ..
make -j
ctest
```
@ -175,7 +178,7 @@ cmake --build /temporary/build/abseil-cpp --target install
## Google Test Options
`-DBUILD_TESTING=ON` must be set to enable testing
`-DABSL_BUILD_TESTING=ON` must be set to enable testing
- Have Abseil download and build Google Test for you: `-DABSL_USE_EXTERNAL_GOOGLETEST=OFF` (default)
- Download and build latest Google Test: `-DABSL_USE_GOOGLETEST_HEAD=ON`

View File

@ -15,7 +15,7 @@
# A simple CMakeLists.txt for testing cmake installation
cmake_minimum_required(VERSION 3.5)
cmake_minimum_required(VERSION 3.10)
project(absl_cmake_testing CXX)
add_executable(simple simple.cc)

View File

@ -55,10 +55,10 @@ cmake "${absl_dir}" \
-DABSL_USE_EXTERNAL_GOOGLETEST=ON \
-DABSL_FIND_GOOGLETEST=ON \
-DCMAKE_BUILD_TYPE=Release \
-DBUILD_TESTING=ON \
-DABSL_BUILD_TESTING=ON \
-DBUILD_SHARED_LIBS="${build_shared_libs}"
make -j $(nproc)
ctest -j $(nproc)
ctest -j $(nproc) --output-on-failure
make install
ldconfig
popd

Some files were not shown because too many files have changed in this diff Show More