mirror of
https://github.com/Swiftgram/Telegram-iOS.git
synced 2025-11-06 17:00:13 +00:00
Upgrade webrtc dependencies and build script
This commit is contained in:
parent
31f1820b30
commit
17fd673361
151
third-party/boringssl/BUILD
vendored
151
third-party/boringssl/BUILD
vendored
@ -12,36 +12,70 @@
|
||||
# OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
|
||||
# CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
|
||||
|
||||
licenses(["notice"])
|
||||
|
||||
exports_files(["LICENSE"])
|
||||
|
||||
#load("@rules_cc//cc:defs.bzl", "cc_binary", "cc_library")
|
||||
|
||||
load(
|
||||
":BUILD.generated.bzl",
|
||||
"crypto_headers",
|
||||
"crypto_internal_headers",
|
||||
"crypto_sources",
|
||||
"crypto_sources_mac_x86_64",
|
||||
"crypto_sources_ios_aarch64",
|
||||
"crypto_sources_ios_arm",
|
||||
"crypto_sources_asm",
|
||||
"fips_fragments",
|
||||
"ssl_headers",
|
||||
"ssl_internal_headers",
|
||||
"ssl_sources",
|
||||
"tool_sources",
|
||||
"tool_headers",
|
||||
"tool_sources",
|
||||
)
|
||||
|
||||
posix_copts = [
|
||||
licenses(["notice"])
|
||||
|
||||
exports_files(["LICENSE"])
|
||||
|
||||
# By default, the C files will expect assembly files, if any, to be linked in
|
||||
# with the build. This default can be flipped with -DOPENSSL_NO_ASM. If building
|
||||
# in a configuration where we have no assembly optimizations, -DOPENSSL_NO_ASM
|
||||
# has no effect, and either value is fine.
|
||||
#
|
||||
# Like C files, assembly files are wrapped in #ifdef (or NASM equivalent), so it
|
||||
# is safe to include a file for the wrong platform in the build. It will just
|
||||
# output an empty object file. However, we need some platform selectors to
|
||||
# distinguish between gas or NASM syntax.
|
||||
#
|
||||
# For all non-Windows platforms, we use gas assembly syntax and can assume any
|
||||
# GCC-compatible toolchain includes a gas-compatible assembler.
|
||||
#
|
||||
# For Windows, we use NASM on x86 and x86_64 and gas, specifically
|
||||
# clang-assembler, on aarch64. We have not yet added NASM support to this build,
|
||||
# and would need to detect MSVC vs clang-cl for aarch64 so, for now, we just
|
||||
# disable assembly on Windows across the board.
|
||||
#
|
||||
# These two selects for asm_sources and asm_copts must be kept in sync. If we
|
||||
# specify assembly, we don't want OPENSSL_NO_ASM. If we don't specify assembly,
|
||||
# we want OPENSSL_NO_ASM, in case the C files expect them in some format (e.g.
|
||||
# NASM) this build file doesn't yet support.
|
||||
#
|
||||
# TODO(https://crbug.com/boringssl/531): Enable assembly for Windows.
|
||||
asm_sources = select({
|
||||
"@platforms//os:windows": [],
|
||||
"//conditions:default": crypto_sources_asm,
|
||||
})
|
||||
asm_copts = select({
|
||||
"@platforms//os:windows": ["-DOPENSSL_NO_ASM"],
|
||||
"//conditions:default": [],
|
||||
})
|
||||
|
||||
# Configure C, C++, and common flags for GCC-compatible toolchains.
|
||||
#
|
||||
# TODO(davidben): Can we remove some of these? In Bazel, are warnings the
|
||||
# toolchain or project's responsibility? -Wa,--noexecstack should be unnecessary
|
||||
# now, though https://crbug.com/boringssl/292 tracks testing this in CI.
|
||||
# -fno-common did not become default until
|
||||
# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=85678.
|
||||
gcc_copts = [
|
||||
# Assembler option --noexecstack adds .note.GNU-stack to each object to
|
||||
# ensure that binaries can be built with non-executable stack.
|
||||
"-Wa,--noexecstack",
|
||||
|
||||
# This is needed on Linux systems (at least) to get rwlock in pthread.
|
||||
"-D_XOPEN_SOURCE=700",
|
||||
|
||||
# This list of warnings should match those in the top-level CMakeLists.txt.
|
||||
"-Wall",
|
||||
"-Werror",
|
||||
@ -51,72 +85,63 @@ posix_copts = [
|
||||
"-Wwrite-strings",
|
||||
"-Wshadow",
|
||||
"-fno-common",
|
||||
"-Wno-unused-but-set-variable",
|
||||
|
||||
# Modern build environments should be able to set this to use atomic
|
||||
# operations for reference counting rather than locks. However, it's
|
||||
# known not to work on some Android builds.
|
||||
# "-DOPENSSL_C11_ATOMIC",
|
||||
]
|
||||
|
||||
boringssl_copts = select({
|
||||
"@build_bazel_rules_apple//apple:ios_arm64": posix_copts,
|
||||
"//build-system:ios_sim_arm64": posix_copts,
|
||||
"@build_bazel_rules_apple//apple:ios_x86_64": posix_copts,
|
||||
})
|
||||
|
||||
crypto_sources_asm = select({
|
||||
"@build_bazel_rules_apple//apple:ios_arm64": crypto_sources_ios_aarch64,
|
||||
"//build-system:ios_sim_arm64": crypto_sources_ios_aarch64,
|
||||
"@build_bazel_rules_apple//apple:ios_x86_64": crypto_sources_mac_x86_64,
|
||||
})
|
||||
|
||||
# For C targets only (not C++), compile with C11 support.
|
||||
posix_copts_c11 = [
|
||||
gcc_copts_c11 = [
|
||||
"-std=c11",
|
||||
"-Wmissing-prototypes",
|
||||
"-Wold-style-definition",
|
||||
"-Wstrict-prototypes",
|
||||
]
|
||||
|
||||
boringssl_copts_c11 = boringssl_copts + select({
|
||||
"@build_bazel_rules_apple//apple:ios_arm64": posix_copts_c11,
|
||||
"//build-system:ios_sim_arm64": posix_copts_c11,
|
||||
"@build_bazel_rules_apple//apple:ios_x86_64": posix_copts_c11,
|
||||
})
|
||||
|
||||
# For C++ targets only (not C), compile with C++11 support.
|
||||
posix_copts_cxx = [
|
||||
"-std=c++11",
|
||||
gcc_copts_cxx = [
|
||||
"-std=c++14",
|
||||
"-Wmissing-declarations",
|
||||
]
|
||||
|
||||
boringssl_copts = [
|
||||
"-DBORINGSSL_IMPLEMENTATION",
|
||||
] + select({
|
||||
# We assume that non-Windows builds use a GCC-compatible toolchain and that
|
||||
# Windows builds do not.
|
||||
#
|
||||
# TODO(davidben): Should these be querying something in @bazel_tools?
|
||||
# Unfortunately, @bazel_tools is undocumented. See
|
||||
# https://github.com/bazelbuild/bazel/issues/14914
|
||||
"@platforms//os:windows": [],
|
||||
"//conditions:default": gcc_copts,
|
||||
}) + select({
|
||||
# This is needed on glibc systems to get rwlock in pthreads, but it should
|
||||
# not be set on Apple platforms or FreeBSD, where it instead disables APIs
|
||||
# we use.
|
||||
# See compat(5), sys/cdefs.h, and https://crbug.com/boringssl/471
|
||||
"@platforms//os:linux": ["-D_XOPEN_SOURCE=700"],
|
||||
# Without WIN32_LEAN_AND_MEAN, <windows.h> pulls in wincrypt.h, which
|
||||
# conflicts with our <openssl/x509.h>.
|
||||
"@platforms//os:windows": ["-DWIN32_LEAN_AND_MEAN"],
|
||||
"//conditions:default": [],
|
||||
}) + asm_copts
|
||||
|
||||
boringssl_copts_c11 = boringssl_copts + select({
|
||||
"@platforms//os:windows": ["/std:c11"],
|
||||
"//conditions:default": gcc_copts_c11,
|
||||
})
|
||||
|
||||
boringssl_copts_cxx = boringssl_copts + select({
|
||||
"@build_bazel_rules_apple//apple:ios_arm64": posix_copts_cxx,
|
||||
"//build-system:ios_sim_arm64": posix_copts_cxx,
|
||||
"@build_bazel_rules_apple//apple:ios_x86_64": posix_copts_cxx,
|
||||
"@platforms//os:windows": [],
|
||||
"//conditions:default": gcc_copts_cxx,
|
||||
})
|
||||
|
||||
cc_library(
|
||||
name = "crypto",
|
||||
srcs = crypto_sources + crypto_internal_headers + crypto_sources_asm + ["aes_ige.c"],
|
||||
srcs = crypto_sources + crypto_internal_headers + asm_sources,
|
||||
hdrs = crypto_headers + fips_fragments,
|
||||
copts = boringssl_copts_c11,
|
||||
includes = ["src/include"],
|
||||
linkopts = select({
|
||||
"@build_bazel_rules_apple//apple:ios_arm64": [],
|
||||
"//build-system:ios_sim_arm64": [],
|
||||
"@build_bazel_rules_apple//apple:ios_x86_64": [],
|
||||
"@platforms//os:windows": ["-defaultlib:advapi32.lib"],
|
||||
"//conditions:default": ["-pthread"],
|
||||
}),
|
||||
visibility = ["//visibility:public"],
|
||||
linkstatic = 1,
|
||||
visibility = ["//visibility:public"],
|
||||
)
|
||||
|
||||
filegroup(
|
||||
name='libcrypto',
|
||||
srcs=[':crypto'],
|
||||
#output_group = 'library',
|
||||
visibility = ["//visibility:public"],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
@ -130,3 +155,11 @@ cc_library(
|
||||
":crypto",
|
||||
],
|
||||
)
|
||||
|
||||
cc_binary(
|
||||
name = "bssl",
|
||||
srcs = tool_sources + tool_headers,
|
||||
copts = boringssl_copts_cxx,
|
||||
visibility = ["//visibility:public"],
|
||||
deps = [":ssl"],
|
||||
)
|
||||
|
||||
661
third-party/boringssl/BUILD.generated.bzl
vendored
661
third-party/boringssl/BUILD.generated.bzl
vendored
@ -37,17 +37,21 @@ fips_fragments = [
|
||||
"src/crypto/fipsmodule/cipher/aead.c",
|
||||
"src/crypto/fipsmodule/cipher/cipher.c",
|
||||
"src/crypto/fipsmodule/cipher/e_aes.c",
|
||||
"src/crypto/fipsmodule/cipher/e_des.c",
|
||||
"src/crypto/fipsmodule/des/des.c",
|
||||
"src/crypto/fipsmodule/cipher/e_aesccm.c",
|
||||
"src/crypto/fipsmodule/cmac/cmac.c",
|
||||
"src/crypto/fipsmodule/dh/check.c",
|
||||
"src/crypto/fipsmodule/dh/dh.c",
|
||||
"src/crypto/fipsmodule/digest/digest.c",
|
||||
"src/crypto/fipsmodule/digest/digests.c",
|
||||
"src/crypto/fipsmodule/digestsign/digestsign.c",
|
||||
"src/crypto/fipsmodule/ec/ec.c",
|
||||
"src/crypto/fipsmodule/ec/ec_key.c",
|
||||
"src/crypto/fipsmodule/ec/ec_montgomery.c",
|
||||
"src/crypto/fipsmodule/ec/felem.c",
|
||||
"src/crypto/fipsmodule/ec/oct.c",
|
||||
"src/crypto/fipsmodule/ec/p224-64.c",
|
||||
"src/crypto/fipsmodule/ec/p256-x86_64.c",
|
||||
"src/crypto/fipsmodule/ec/p256-nistz.c",
|
||||
"src/crypto/fipsmodule/ec/p256.c",
|
||||
"src/crypto/fipsmodule/ec/scalar.c",
|
||||
"src/crypto/fipsmodule/ec/simple.c",
|
||||
"src/crypto/fipsmodule/ec/simple_mul.c",
|
||||
@ -55,6 +59,7 @@ fips_fragments = [
|
||||
"src/crypto/fipsmodule/ec/wnaf.c",
|
||||
"src/crypto/fipsmodule/ecdh/ecdh.c",
|
||||
"src/crypto/fipsmodule/ecdsa/ecdsa.c",
|
||||
"src/crypto/fipsmodule/hkdf/hkdf.c",
|
||||
"src/crypto/fipsmodule/hmac/hmac.c",
|
||||
"src/crypto/fipsmodule/md4/md4.c",
|
||||
"src/crypto/fipsmodule/md5/md5.c",
|
||||
@ -66,19 +71,20 @@ fips_fragments = [
|
||||
"src/crypto/fipsmodule/modes/ofb.c",
|
||||
"src/crypto/fipsmodule/modes/polyval.c",
|
||||
"src/crypto/fipsmodule/rand/ctrdrbg.c",
|
||||
"src/crypto/fipsmodule/rand/fork_detect.c",
|
||||
"src/crypto/fipsmodule/rand/rand.c",
|
||||
"src/crypto/fipsmodule/rand/urandom.c",
|
||||
"src/crypto/fipsmodule/rsa/blinding.c",
|
||||
"src/crypto/fipsmodule/rsa/padding.c",
|
||||
"src/crypto/fipsmodule/rsa/rsa.c",
|
||||
"src/crypto/fipsmodule/rsa/rsa_impl.c",
|
||||
"src/crypto/fipsmodule/self_check/fips.c",
|
||||
"src/crypto/fipsmodule/self_check/self_check.c",
|
||||
"src/crypto/fipsmodule/sha/sha1-altivec.c",
|
||||
"src/crypto/fipsmodule/service_indicator/service_indicator.c",
|
||||
"src/crypto/fipsmodule/sha/sha1.c",
|
||||
"src/crypto/fipsmodule/sha/sha256.c",
|
||||
"src/crypto/fipsmodule/sha/sha512.c",
|
||||
"src/crypto/fipsmodule/tls/kdf.c",
|
||||
"src/third_party/fiat/p256.c",
|
||||
]
|
||||
|
||||
ssl_internal_headers = [
|
||||
@ -93,6 +99,8 @@ ssl_sources = [
|
||||
"src/ssl/d1_srtp.cc",
|
||||
"src/ssl/dtls_method.cc",
|
||||
"src/ssl/dtls_record.cc",
|
||||
"src/ssl/encrypted_client_hello.cc",
|
||||
"src/ssl/extensions.cc",
|
||||
"src/ssl/handoff.cc",
|
||||
"src/ssl/handshake.cc",
|
||||
"src/ssl/handshake_client.cc",
|
||||
@ -115,7 +123,6 @@ ssl_sources = [
|
||||
"src/ssl/ssl_versions.cc",
|
||||
"src/ssl/ssl_x509.cc",
|
||||
"src/ssl/t1_enc.cc",
|
||||
"src/ssl/t1_lib.cc",
|
||||
"src/ssl/tls13_both.cc",
|
||||
"src/ssl/tls13_client.cc",
|
||||
"src/ssl/tls13_enc.cc",
|
||||
@ -128,12 +135,14 @@ crypto_headers = [
|
||||
"src/include/openssl/aead.h",
|
||||
"src/include/openssl/aes.h",
|
||||
"src/include/openssl/arm_arch.h",
|
||||
"src/include/openssl/asm_base.h",
|
||||
"src/include/openssl/asn1.h",
|
||||
"src/include/openssl/asn1_mac.h",
|
||||
"src/include/openssl/asn1t.h",
|
||||
"src/include/openssl/base.h",
|
||||
"src/include/openssl/base64.h",
|
||||
"src/include/openssl/bio.h",
|
||||
"src/include/openssl/blake2.h",
|
||||
"src/include/openssl/blowfish.h",
|
||||
"src/include/openssl/bn.h",
|
||||
"src/include/openssl/buf.h",
|
||||
@ -146,6 +155,7 @@ crypto_headers = [
|
||||
"src/include/openssl/conf.h",
|
||||
"src/include/openssl/cpu.h",
|
||||
"src/include/openssl/crypto.h",
|
||||
"src/include/openssl/ctrdrbg.h",
|
||||
"src/include/openssl/curve25519.h",
|
||||
"src/include/openssl/des.h",
|
||||
"src/include/openssl/dh.h",
|
||||
@ -159,11 +169,17 @@ crypto_headers = [
|
||||
"src/include/openssl/engine.h",
|
||||
"src/include/openssl/err.h",
|
||||
"src/include/openssl/evp.h",
|
||||
"src/include/openssl/evp_errors.h",
|
||||
"src/include/openssl/ex_data.h",
|
||||
"src/include/openssl/experimental/dilithium.h",
|
||||
"src/include/openssl/experimental/kyber.h",
|
||||
"src/include/openssl/experimental/spx.h",
|
||||
"src/include/openssl/hkdf.h",
|
||||
"src/include/openssl/hmac.h",
|
||||
"src/include/openssl/hpke.h",
|
||||
"src/include/openssl/hrss.h",
|
||||
"src/include/openssl/is_boringssl.h",
|
||||
"src/include/openssl/kdf.h",
|
||||
"src/include/openssl/lhash.h",
|
||||
"src/include/openssl/md4.h",
|
||||
"src/include/openssl/md5.h",
|
||||
@ -181,31 +197,43 @@ crypto_headers = [
|
||||
"src/include/openssl/pkcs8.h",
|
||||
"src/include/openssl/poly1305.h",
|
||||
"src/include/openssl/pool.h",
|
||||
"src/include/openssl/posix_time.h",
|
||||
"src/include/openssl/rand.h",
|
||||
"src/include/openssl/rc4.h",
|
||||
"src/include/openssl/ripemd.h",
|
||||
"src/include/openssl/rsa.h",
|
||||
"src/include/openssl/safestack.h",
|
||||
"src/include/openssl/service_indicator.h",
|
||||
"src/include/openssl/sha.h",
|
||||
"src/include/openssl/siphash.h",
|
||||
"src/include/openssl/span.h",
|
||||
"src/include/openssl/stack.h",
|
||||
"src/include/openssl/target.h",
|
||||
"src/include/openssl/thread.h",
|
||||
"src/include/openssl/time.h",
|
||||
"src/include/openssl/trust_token.h",
|
||||
"src/include/openssl/type_check.h",
|
||||
"src/include/openssl/x509.h",
|
||||
"src/include/openssl/x509_vfy.h",
|
||||
"src/include/openssl/x509v3.h",
|
||||
"src/include/openssl/x509v3_errors.h",
|
||||
]
|
||||
|
||||
crypto_internal_headers = [
|
||||
"src/crypto/asn1/asn1_locl.h",
|
||||
"src/crypto/asn1/internal.h",
|
||||
"src/crypto/bio/internal.h",
|
||||
"src/crypto/bytestring/internal.h",
|
||||
"src/crypto/chacha/internal.h",
|
||||
"src/crypto/cipher_extra/internal.h",
|
||||
"src/crypto/conf/conf_def.h",
|
||||
"src/crypto/conf/internal.h",
|
||||
"src/crypto/cpu-arm-linux.h",
|
||||
"src/crypto/cpu_arm_linux.h",
|
||||
"src/crypto/curve25519/curve25519_tables.h",
|
||||
"src/crypto/curve25519/internal.h",
|
||||
"src/crypto/des/internal.h",
|
||||
"src/crypto/dilithium/internal.h",
|
||||
"src/crypto/dsa/internal.h",
|
||||
"src/crypto/ec_extra/internal.h",
|
||||
"src/crypto/err/internal.h",
|
||||
"src/crypto/evp/internal.h",
|
||||
"src/crypto/fipsmodule/aes/internal.h",
|
||||
@ -213,38 +241,52 @@ crypto_internal_headers = [
|
||||
"src/crypto/fipsmodule/bn/rsaz_exp.h",
|
||||
"src/crypto/fipsmodule/cipher/internal.h",
|
||||
"src/crypto/fipsmodule/delocate.h",
|
||||
"src/crypto/fipsmodule/des/internal.h",
|
||||
"src/crypto/fipsmodule/dh/internal.h",
|
||||
"src/crypto/fipsmodule/digest/internal.h",
|
||||
"src/crypto/fipsmodule/digest/md32_common.h",
|
||||
"src/crypto/fipsmodule/ec/builtin_curves.h",
|
||||
"src/crypto/fipsmodule/ec/internal.h",
|
||||
"src/crypto/fipsmodule/ec/p256-x86_64-table.h",
|
||||
"src/crypto/fipsmodule/ec/p256-x86_64.h",
|
||||
"src/crypto/fipsmodule/ec/p256-nistz-table.h",
|
||||
"src/crypto/fipsmodule/ec/p256-nistz.h",
|
||||
"src/crypto/fipsmodule/ec/p256_table.h",
|
||||
"src/crypto/fipsmodule/ecdsa/internal.h",
|
||||
"src/crypto/fipsmodule/md5/internal.h",
|
||||
"src/crypto/fipsmodule/modes/internal.h",
|
||||
"src/crypto/fipsmodule/rand/fork_detect.h",
|
||||
"src/crypto/fipsmodule/rand/getrandom_fillin.h",
|
||||
"src/crypto/fipsmodule/rand/internal.h",
|
||||
"src/crypto/fipsmodule/rsa/internal.h",
|
||||
"src/crypto/fipsmodule/service_indicator/internal.h",
|
||||
"src/crypto/fipsmodule/sha/internal.h",
|
||||
"src/crypto/fipsmodule/tls/internal.h",
|
||||
"src/crypto/hrss/internal.h",
|
||||
"src/crypto/internal.h",
|
||||
"src/crypto/keccak/internal.h",
|
||||
"src/crypto/kyber/internal.h",
|
||||
"src/crypto/lhash/internal.h",
|
||||
"src/crypto/obj/obj_dat.h",
|
||||
"src/crypto/pkcs7/internal.h",
|
||||
"src/crypto/pkcs8/internal.h",
|
||||
"src/crypto/poly1305/internal.h",
|
||||
"src/crypto/pool/internal.h",
|
||||
"src/crypto/x509/charmap.h",
|
||||
"src/crypto/rsa_extra/internal.h",
|
||||
"src/crypto/spx/address.h",
|
||||
"src/crypto/spx/fors.h",
|
||||
"src/crypto/spx/merkle.h",
|
||||
"src/crypto/spx/params.h",
|
||||
"src/crypto/spx/spx_util.h",
|
||||
"src/crypto/spx/thash.h",
|
||||
"src/crypto/spx/wots.h",
|
||||
"src/crypto/trust_token/internal.h",
|
||||
"src/crypto/x509/ext_dat.h",
|
||||
"src/crypto/x509/internal.h",
|
||||
"src/crypto/x509/vpm_int.h",
|
||||
"src/crypto/x509v3/ext_dat.h",
|
||||
"src/crypto/x509v3/internal.h",
|
||||
"src/crypto/x509v3/pcy_int.h",
|
||||
"src/third_party/fiat/curve25519_32.h",
|
||||
"src/third_party/fiat/curve25519_64.h",
|
||||
"src/third_party/fiat/curve25519_tables.h",
|
||||
"src/third_party/fiat/internal.h",
|
||||
"src/third_party/fiat/curve25519_64_adx.h",
|
||||
"src/third_party/fiat/curve25519_64_msvc.h",
|
||||
"src/third_party/fiat/p256_32.h",
|
||||
"src/third_party/fiat/p256_64.h",
|
||||
"src/third_party/fiat/p256_64_msvc.h",
|
||||
]
|
||||
|
||||
crypto_sources = [
|
||||
@ -253,36 +295,34 @@ crypto_sources = [
|
||||
"src/crypto/asn1/a_bool.c",
|
||||
"src/crypto/asn1/a_d2i_fp.c",
|
||||
"src/crypto/asn1/a_dup.c",
|
||||
"src/crypto/asn1/a_enum.c",
|
||||
"src/crypto/asn1/a_gentm.c",
|
||||
"src/crypto/asn1/a_i2d_fp.c",
|
||||
"src/crypto/asn1/a_int.c",
|
||||
"src/crypto/asn1/a_mbstr.c",
|
||||
"src/crypto/asn1/a_object.c",
|
||||
"src/crypto/asn1/a_octet.c",
|
||||
"src/crypto/asn1/a_print.c",
|
||||
"src/crypto/asn1/a_strex.c",
|
||||
"src/crypto/asn1/a_strnid.c",
|
||||
"src/crypto/asn1/a_time.c",
|
||||
"src/crypto/asn1/a_type.c",
|
||||
"src/crypto/asn1/a_utctm.c",
|
||||
"src/crypto/asn1/a_utf8.c",
|
||||
"src/crypto/asn1/asn1_lib.c",
|
||||
"src/crypto/asn1/asn1_par.c",
|
||||
"src/crypto/asn1/asn_pack.c",
|
||||
"src/crypto/asn1/f_enum.c",
|
||||
"src/crypto/asn1/f_int.c",
|
||||
"src/crypto/asn1/f_string.c",
|
||||
"src/crypto/asn1/posix_time.c",
|
||||
"src/crypto/asn1/tasn_dec.c",
|
||||
"src/crypto/asn1/tasn_enc.c",
|
||||
"src/crypto/asn1/tasn_fre.c",
|
||||
"src/crypto/asn1/tasn_new.c",
|
||||
"src/crypto/asn1/tasn_typ.c",
|
||||
"src/crypto/asn1/tasn_utl.c",
|
||||
"src/crypto/asn1/time_support.c",
|
||||
"src/crypto/base64/base64.c",
|
||||
"src/crypto/bio/bio.c",
|
||||
"src/crypto/bio/bio_mem.c",
|
||||
"src/crypto/bio/connect.c",
|
||||
"src/crypto/bio/errno.c",
|
||||
"src/crypto/bio/fd.c",
|
||||
"src/crypto/bio/file.c",
|
||||
"src/crypto/bio/hexdump.c",
|
||||
@ -290,6 +330,7 @@ crypto_sources = [
|
||||
"src/crypto/bio/printf.c",
|
||||
"src/crypto/bio/socket.c",
|
||||
"src/crypto/bio/socket_helper.c",
|
||||
"src/crypto/blake2/blake2.c",
|
||||
"src/crypto/bn_extra/bn_asn1.c",
|
||||
"src/crypto/bn_extra/convert.c",
|
||||
"src/crypto/buf/buf.c",
|
||||
@ -301,39 +342,43 @@ crypto_sources = [
|
||||
"src/crypto/chacha/chacha.c",
|
||||
"src/crypto/cipher_extra/cipher_extra.c",
|
||||
"src/crypto/cipher_extra/derive_key.c",
|
||||
"src/crypto/cipher_extra/e_aesccm.c",
|
||||
"src/crypto/cipher_extra/e_aesctrhmac.c",
|
||||
"src/crypto/cipher_extra/e_aesgcmsiv.c",
|
||||
"src/crypto/cipher_extra/e_chacha20poly1305.c",
|
||||
"src/crypto/cipher_extra/e_des.c",
|
||||
"src/crypto/cipher_extra/e_null.c",
|
||||
"src/crypto/cipher_extra/e_rc2.c",
|
||||
"src/crypto/cipher_extra/e_rc4.c",
|
||||
"src/crypto/cipher_extra/e_tls.c",
|
||||
"src/crypto/cipher_extra/tls_cbc.c",
|
||||
"src/crypto/cmac/cmac.c",
|
||||
"src/crypto/conf/conf.c",
|
||||
"src/crypto/cpu-aarch64-fuchsia.c",
|
||||
"src/crypto/cpu-aarch64-linux.c",
|
||||
"src/crypto/cpu-arm-linux.c",
|
||||
"src/crypto/cpu-arm.c",
|
||||
"src/crypto/cpu-intel.c",
|
||||
"src/crypto/cpu-ppc64le.c",
|
||||
"src/crypto/cpu_aarch64_apple.c",
|
||||
"src/crypto/cpu_aarch64_fuchsia.c",
|
||||
"src/crypto/cpu_aarch64_linux.c",
|
||||
"src/crypto/cpu_aarch64_openbsd.c",
|
||||
"src/crypto/cpu_aarch64_sysreg.c",
|
||||
"src/crypto/cpu_aarch64_win.c",
|
||||
"src/crypto/cpu_arm_freebsd.c",
|
||||
"src/crypto/cpu_arm_linux.c",
|
||||
"src/crypto/cpu_intel.c",
|
||||
"src/crypto/crypto.c",
|
||||
"src/crypto/curve25519/curve25519.c",
|
||||
"src/crypto/curve25519/curve25519_64_adx.c",
|
||||
"src/crypto/curve25519/spake25519.c",
|
||||
"src/crypto/dh/check.c",
|
||||
"src/crypto/dh/dh.c",
|
||||
"src/crypto/dh/dh_asn1.c",
|
||||
"src/crypto/dh/params.c",
|
||||
"src/crypto/des/des.c",
|
||||
"src/crypto/dh_extra/dh_asn1.c",
|
||||
"src/crypto/dh_extra/params.c",
|
||||
"src/crypto/digest_extra/digest_extra.c",
|
||||
"src/crypto/dilithium/dilithium.c",
|
||||
"src/crypto/dsa/dsa.c",
|
||||
"src/crypto/dsa/dsa_asn1.c",
|
||||
"src/crypto/ec_extra/ec_asn1.c",
|
||||
"src/crypto/ec_extra/ec_derive.c",
|
||||
"src/crypto/ec_extra/hash_to_curve.c",
|
||||
"src/crypto/ecdh_extra/ecdh_extra.c",
|
||||
"src/crypto/ecdsa_extra/ecdsa_asn1.c",
|
||||
"src/crypto/engine/engine.c",
|
||||
"src/crypto/err/err.c",
|
||||
"src/crypto/evp/digestsign.c",
|
||||
"src/crypto/evp/evp.c",
|
||||
"src/crypto/evp/evp_asn1.c",
|
||||
"src/crypto/evp/evp_ctx.c",
|
||||
@ -342,6 +387,7 @@ crypto_sources = [
|
||||
"src/crypto/evp/p_ec_asn1.c",
|
||||
"src/crypto/evp/p_ed25519.c",
|
||||
"src/crypto/evp/p_ed25519_asn1.c",
|
||||
"src/crypto/evp/p_hkdf.c",
|
||||
"src/crypto/evp/p_rsa.c",
|
||||
"src/crypto/evp/p_rsa_asn1.c",
|
||||
"src/crypto/evp/p_x25519.c",
|
||||
@ -353,9 +399,10 @@ crypto_sources = [
|
||||
"src/crypto/ex_data.c",
|
||||
"src/crypto/fipsmodule/bcm.c",
|
||||
"src/crypto/fipsmodule/fips_shared_support.c",
|
||||
"src/crypto/fipsmodule/is_fips.c",
|
||||
"src/crypto/hkdf/hkdf.c",
|
||||
"src/crypto/hpke/hpke.c",
|
||||
"src/crypto/hrss/hrss.c",
|
||||
"src/crypto/keccak/keccak.c",
|
||||
"src/crypto/kyber/kyber.c",
|
||||
"src/crypto/lhash/lhash.c",
|
||||
"src/crypto/mem.c",
|
||||
"src/crypto/obj/obj.c",
|
||||
@ -379,34 +426,71 @@ crypto_sources = [
|
||||
"src/crypto/pool/pool.c",
|
||||
"src/crypto/rand_extra/deterministic.c",
|
||||
"src/crypto/rand_extra/forkunsafe.c",
|
||||
"src/crypto/rand_extra/fuchsia.c",
|
||||
"src/crypto/rand_extra/getentropy.c",
|
||||
"src/crypto/rand_extra/ios.c",
|
||||
"src/crypto/rand_extra/passive.c",
|
||||
"src/crypto/rand_extra/rand_extra.c",
|
||||
"src/crypto/rand_extra/trusty.c",
|
||||
"src/crypto/rand_extra/windows.c",
|
||||
"src/crypto/rc4/rc4.c",
|
||||
"src/crypto/refcount_c11.c",
|
||||
"src/crypto/refcount_lock.c",
|
||||
"src/crypto/refcount.c",
|
||||
"src/crypto/rsa_extra/rsa_asn1.c",
|
||||
"src/crypto/rsa_extra/rsa_crypt.c",
|
||||
"src/crypto/rsa_extra/rsa_print.c",
|
||||
"src/crypto/siphash/siphash.c",
|
||||
"src/crypto/spx/address.c",
|
||||
"src/crypto/spx/fors.c",
|
||||
"src/crypto/spx/merkle.c",
|
||||
"src/crypto/spx/spx.c",
|
||||
"src/crypto/spx/spx_util.c",
|
||||
"src/crypto/spx/thash.c",
|
||||
"src/crypto/spx/wots.c",
|
||||
"src/crypto/stack/stack.c",
|
||||
"src/crypto/thread.c",
|
||||
"src/crypto/thread_none.c",
|
||||
"src/crypto/thread_pthread.c",
|
||||
"src/crypto/thread_win.c",
|
||||
"src/crypto/trust_token/pmbtoken.c",
|
||||
"src/crypto/trust_token/trust_token.c",
|
||||
"src/crypto/trust_token/voprf.c",
|
||||
"src/crypto/x509/a_digest.c",
|
||||
"src/crypto/x509/a_sign.c",
|
||||
"src/crypto/x509/a_strex.c",
|
||||
"src/crypto/x509/a_verify.c",
|
||||
"src/crypto/x509/algorithm.c",
|
||||
"src/crypto/x509/asn1_gen.c",
|
||||
"src/crypto/x509/by_dir.c",
|
||||
"src/crypto/x509/by_file.c",
|
||||
"src/crypto/x509/i2d_pr.c",
|
||||
"src/crypto/x509/name_print.c",
|
||||
"src/crypto/x509/policy.c",
|
||||
"src/crypto/x509/rsa_pss.c",
|
||||
"src/crypto/x509/t_crl.c",
|
||||
"src/crypto/x509/t_req.c",
|
||||
"src/crypto/x509/t_x509.c",
|
||||
"src/crypto/x509/t_x509a.c",
|
||||
"src/crypto/x509/v3_akey.c",
|
||||
"src/crypto/x509/v3_akeya.c",
|
||||
"src/crypto/x509/v3_alt.c",
|
||||
"src/crypto/x509/v3_bcons.c",
|
||||
"src/crypto/x509/v3_bitst.c",
|
||||
"src/crypto/x509/v3_conf.c",
|
||||
"src/crypto/x509/v3_cpols.c",
|
||||
"src/crypto/x509/v3_crld.c",
|
||||
"src/crypto/x509/v3_enum.c",
|
||||
"src/crypto/x509/v3_extku.c",
|
||||
"src/crypto/x509/v3_genn.c",
|
||||
"src/crypto/x509/v3_ia5.c",
|
||||
"src/crypto/x509/v3_info.c",
|
||||
"src/crypto/x509/v3_int.c",
|
||||
"src/crypto/x509/v3_lib.c",
|
||||
"src/crypto/x509/v3_ncons.c",
|
||||
"src/crypto/x509/v3_ocsp.c",
|
||||
"src/crypto/x509/v3_pcons.c",
|
||||
"src/crypto/x509/v3_pmaps.c",
|
||||
"src/crypto/x509/v3_prn.c",
|
||||
"src/crypto/x509/v3_purp.c",
|
||||
"src/crypto/x509/v3_skey.c",
|
||||
"src/crypto/x509/v3_utl.c",
|
||||
"src/crypto/x509/x509.c",
|
||||
"src/crypto/x509/x509_att.c",
|
||||
"src/crypto/x509/x509_cmp.c",
|
||||
@ -415,7 +499,6 @@ crypto_sources = [
|
||||
"src/crypto/x509/x509_ext.c",
|
||||
"src/crypto/x509/x509_lu.c",
|
||||
"src/crypto/x509/x509_obj.c",
|
||||
"src/crypto/x509/x509_r2x.c",
|
||||
"src/crypto/x509/x509_req.c",
|
||||
"src/crypto/x509/x509_set.c",
|
||||
"src/crypto/x509/x509_trs.c",
|
||||
@ -432,9 +515,7 @@ crypto_sources = [
|
||||
"src/crypto/x509/x_attrib.c",
|
||||
"src/crypto/x509/x_crl.c",
|
||||
"src/crypto/x509/x_exten.c",
|
||||
"src/crypto/x509/x_info.c",
|
||||
"src/crypto/x509/x_name.c",
|
||||
"src/crypto/x509/x_pkey.c",
|
||||
"src/crypto/x509/x_pubkey.c",
|
||||
"src/crypto/x509/x_req.c",
|
||||
"src/crypto/x509/x_sig.c",
|
||||
@ -442,40 +523,283 @@ crypto_sources = [
|
||||
"src/crypto/x509/x_val.c",
|
||||
"src/crypto/x509/x_x509.c",
|
||||
"src/crypto/x509/x_x509a.c",
|
||||
"src/crypto/x509v3/pcy_cache.c",
|
||||
"src/crypto/x509v3/pcy_data.c",
|
||||
"src/crypto/x509v3/pcy_lib.c",
|
||||
"src/crypto/x509v3/pcy_map.c",
|
||||
"src/crypto/x509v3/pcy_node.c",
|
||||
"src/crypto/x509v3/pcy_tree.c",
|
||||
"src/crypto/x509v3/v3_akey.c",
|
||||
"src/crypto/x509v3/v3_akeya.c",
|
||||
"src/crypto/x509v3/v3_alt.c",
|
||||
"src/crypto/x509v3/v3_bcons.c",
|
||||
"src/crypto/x509v3/v3_bitst.c",
|
||||
"src/crypto/x509v3/v3_conf.c",
|
||||
"src/crypto/x509v3/v3_cpols.c",
|
||||
"src/crypto/x509v3/v3_crld.c",
|
||||
"src/crypto/x509v3/v3_enum.c",
|
||||
"src/crypto/x509v3/v3_extku.c",
|
||||
"src/crypto/x509v3/v3_genn.c",
|
||||
"src/crypto/x509v3/v3_ia5.c",
|
||||
"src/crypto/x509v3/v3_info.c",
|
||||
"src/crypto/x509v3/v3_int.c",
|
||||
"src/crypto/x509v3/v3_lib.c",
|
||||
"src/crypto/x509v3/v3_ncons.c",
|
||||
"src/crypto/x509v3/v3_ocsp.c",
|
||||
"src/crypto/x509v3/v3_pci.c",
|
||||
"src/crypto/x509v3/v3_pcia.c",
|
||||
"src/crypto/x509v3/v3_pcons.c",
|
||||
"src/crypto/x509v3/v3_pku.c",
|
||||
"src/crypto/x509v3/v3_pmaps.c",
|
||||
"src/crypto/x509v3/v3_prn.c",
|
||||
"src/crypto/x509v3/v3_purp.c",
|
||||
"src/crypto/x509v3/v3_skey.c",
|
||||
"src/crypto/x509v3/v3_sxnet.c",
|
||||
"src/crypto/x509v3/v3_utl.c",
|
||||
"src/third_party/fiat/curve25519.c",
|
||||
]
|
||||
|
||||
crypto_sources_asm = [
|
||||
"apple-aarch64/crypto/chacha/chacha-armv8-apple.S",
|
||||
"apple-aarch64/crypto/cipher_extra/chacha20_poly1305_armv8-apple.S",
|
||||
"apple-aarch64/crypto/fipsmodule/aesv8-armv8-apple.S",
|
||||
"apple-aarch64/crypto/fipsmodule/aesv8-gcm-armv8-apple.S",
|
||||
"apple-aarch64/crypto/fipsmodule/armv8-mont-apple.S",
|
||||
"apple-aarch64/crypto/fipsmodule/bn-armv8-apple.S",
|
||||
"apple-aarch64/crypto/fipsmodule/ghash-neon-armv8-apple.S",
|
||||
"apple-aarch64/crypto/fipsmodule/ghashv8-armv8-apple.S",
|
||||
"apple-aarch64/crypto/fipsmodule/p256-armv8-asm-apple.S",
|
||||
"apple-aarch64/crypto/fipsmodule/p256_beeu-armv8-asm-apple.S",
|
||||
"apple-aarch64/crypto/fipsmodule/sha1-armv8-apple.S",
|
||||
"apple-aarch64/crypto/fipsmodule/sha256-armv8-apple.S",
|
||||
"apple-aarch64/crypto/fipsmodule/sha512-armv8-apple.S",
|
||||
"apple-aarch64/crypto/fipsmodule/vpaes-armv8-apple.S",
|
||||
"apple-aarch64/crypto/test/trampoline-armv8-apple.S",
|
||||
"apple-x86/crypto/chacha/chacha-x86-apple.S",
|
||||
"apple-x86/crypto/fipsmodule/aesni-x86-apple.S",
|
||||
"apple-x86/crypto/fipsmodule/bn-586-apple.S",
|
||||
"apple-x86/crypto/fipsmodule/co-586-apple.S",
|
||||
"apple-x86/crypto/fipsmodule/ghash-ssse3-x86-apple.S",
|
||||
"apple-x86/crypto/fipsmodule/ghash-x86-apple.S",
|
||||
"apple-x86/crypto/fipsmodule/md5-586-apple.S",
|
||||
"apple-x86/crypto/fipsmodule/sha1-586-apple.S",
|
||||
"apple-x86/crypto/fipsmodule/sha256-586-apple.S",
|
||||
"apple-x86/crypto/fipsmodule/sha512-586-apple.S",
|
||||
"apple-x86/crypto/fipsmodule/vpaes-x86-apple.S",
|
||||
"apple-x86/crypto/fipsmodule/x86-mont-apple.S",
|
||||
"apple-x86/crypto/test/trampoline-x86-apple.S",
|
||||
"apple-x86_64/crypto/chacha/chacha-x86_64-apple.S",
|
||||
"apple-x86_64/crypto/cipher_extra/aes128gcmsiv-x86_64-apple.S",
|
||||
"apple-x86_64/crypto/cipher_extra/chacha20_poly1305_x86_64-apple.S",
|
||||
"apple-x86_64/crypto/fipsmodule/aesni-gcm-x86_64-apple.S",
|
||||
"apple-x86_64/crypto/fipsmodule/aesni-x86_64-apple.S",
|
||||
"apple-x86_64/crypto/fipsmodule/ghash-ssse3-x86_64-apple.S",
|
||||
"apple-x86_64/crypto/fipsmodule/ghash-x86_64-apple.S",
|
||||
"apple-x86_64/crypto/fipsmodule/md5-x86_64-apple.S",
|
||||
"apple-x86_64/crypto/fipsmodule/p256-x86_64-asm-apple.S",
|
||||
"apple-x86_64/crypto/fipsmodule/p256_beeu-x86_64-asm-apple.S",
|
||||
"apple-x86_64/crypto/fipsmodule/rdrand-x86_64-apple.S",
|
||||
"apple-x86_64/crypto/fipsmodule/rsaz-avx2-apple.S",
|
||||
"apple-x86_64/crypto/fipsmodule/sha1-x86_64-apple.S",
|
||||
"apple-x86_64/crypto/fipsmodule/sha256-x86_64-apple.S",
|
||||
"apple-x86_64/crypto/fipsmodule/sha512-x86_64-apple.S",
|
||||
"apple-x86_64/crypto/fipsmodule/vpaes-x86_64-apple.S",
|
||||
"apple-x86_64/crypto/fipsmodule/x86_64-mont-apple.S",
|
||||
"apple-x86_64/crypto/fipsmodule/x86_64-mont5-apple.S",
|
||||
"apple-x86_64/crypto/test/trampoline-x86_64-apple.S",
|
||||
"linux-aarch64/crypto/chacha/chacha-armv8-linux.S",
|
||||
"linux-aarch64/crypto/cipher_extra/chacha20_poly1305_armv8-linux.S",
|
||||
"linux-aarch64/crypto/fipsmodule/aesv8-armv8-linux.S",
|
||||
"linux-aarch64/crypto/fipsmodule/aesv8-gcm-armv8-linux.S",
|
||||
"linux-aarch64/crypto/fipsmodule/armv8-mont-linux.S",
|
||||
"linux-aarch64/crypto/fipsmodule/bn-armv8-linux.S",
|
||||
"linux-aarch64/crypto/fipsmodule/ghash-neon-armv8-linux.S",
|
||||
"linux-aarch64/crypto/fipsmodule/ghashv8-armv8-linux.S",
|
||||
"linux-aarch64/crypto/fipsmodule/p256-armv8-asm-linux.S",
|
||||
"linux-aarch64/crypto/fipsmodule/p256_beeu-armv8-asm-linux.S",
|
||||
"linux-aarch64/crypto/fipsmodule/sha1-armv8-linux.S",
|
||||
"linux-aarch64/crypto/fipsmodule/sha256-armv8-linux.S",
|
||||
"linux-aarch64/crypto/fipsmodule/sha512-armv8-linux.S",
|
||||
"linux-aarch64/crypto/fipsmodule/vpaes-armv8-linux.S",
|
||||
"linux-aarch64/crypto/test/trampoline-armv8-linux.S",
|
||||
"linux-arm/crypto/chacha/chacha-armv4-linux.S",
|
||||
"linux-arm/crypto/fipsmodule/aesv8-armv7-linux.S",
|
||||
"linux-arm/crypto/fipsmodule/armv4-mont-linux.S",
|
||||
"linux-arm/crypto/fipsmodule/bsaes-armv7-linux.S",
|
||||
"linux-arm/crypto/fipsmodule/ghash-armv4-linux.S",
|
||||
"linux-arm/crypto/fipsmodule/ghashv8-armv7-linux.S",
|
||||
"linux-arm/crypto/fipsmodule/sha1-armv4-large-linux.S",
|
||||
"linux-arm/crypto/fipsmodule/sha256-armv4-linux.S",
|
||||
"linux-arm/crypto/fipsmodule/sha512-armv4-linux.S",
|
||||
"linux-arm/crypto/fipsmodule/vpaes-armv7-linux.S",
|
||||
"linux-arm/crypto/test/trampoline-armv4-linux.S",
|
||||
"linux-x86/crypto/chacha/chacha-x86-linux.S",
|
||||
"linux-x86/crypto/fipsmodule/aesni-x86-linux.S",
|
||||
"linux-x86/crypto/fipsmodule/bn-586-linux.S",
|
||||
"linux-x86/crypto/fipsmodule/co-586-linux.S",
|
||||
"linux-x86/crypto/fipsmodule/ghash-ssse3-x86-linux.S",
|
||||
"linux-x86/crypto/fipsmodule/ghash-x86-linux.S",
|
||||
"linux-x86/crypto/fipsmodule/md5-586-linux.S",
|
||||
"linux-x86/crypto/fipsmodule/sha1-586-linux.S",
|
||||
"linux-x86/crypto/fipsmodule/sha256-586-linux.S",
|
||||
"linux-x86/crypto/fipsmodule/sha512-586-linux.S",
|
||||
"linux-x86/crypto/fipsmodule/vpaes-x86-linux.S",
|
||||
"linux-x86/crypto/fipsmodule/x86-mont-linux.S",
|
||||
"linux-x86/crypto/test/trampoline-x86-linux.S",
|
||||
"linux-x86_64/crypto/chacha/chacha-x86_64-linux.S",
|
||||
"linux-x86_64/crypto/cipher_extra/aes128gcmsiv-x86_64-linux.S",
|
||||
"linux-x86_64/crypto/cipher_extra/chacha20_poly1305_x86_64-linux.S",
|
||||
"linux-x86_64/crypto/fipsmodule/aesni-gcm-x86_64-linux.S",
|
||||
"linux-x86_64/crypto/fipsmodule/aesni-x86_64-linux.S",
|
||||
"linux-x86_64/crypto/fipsmodule/ghash-ssse3-x86_64-linux.S",
|
||||
"linux-x86_64/crypto/fipsmodule/ghash-x86_64-linux.S",
|
||||
"linux-x86_64/crypto/fipsmodule/md5-x86_64-linux.S",
|
||||
"linux-x86_64/crypto/fipsmodule/p256-x86_64-asm-linux.S",
|
||||
"linux-x86_64/crypto/fipsmodule/p256_beeu-x86_64-asm-linux.S",
|
||||
"linux-x86_64/crypto/fipsmodule/rdrand-x86_64-linux.S",
|
||||
"linux-x86_64/crypto/fipsmodule/rsaz-avx2-linux.S",
|
||||
"linux-x86_64/crypto/fipsmodule/sha1-x86_64-linux.S",
|
||||
"linux-x86_64/crypto/fipsmodule/sha256-x86_64-linux.S",
|
||||
"linux-x86_64/crypto/fipsmodule/sha512-x86_64-linux.S",
|
||||
"linux-x86_64/crypto/fipsmodule/vpaes-x86_64-linux.S",
|
||||
"linux-x86_64/crypto/fipsmodule/x86_64-mont-linux.S",
|
||||
"linux-x86_64/crypto/fipsmodule/x86_64-mont5-linux.S",
|
||||
"linux-x86_64/crypto/test/trampoline-x86_64-linux.S",
|
||||
"src/crypto/curve25519/asm/x25519-asm-arm.S",
|
||||
"src/crypto/hrss/asm/poly_rq_mul.S",
|
||||
"src/crypto/poly1305/poly1305_arm_asm.S",
|
||||
"src/third_party/fiat/asm/fiat_curve25519_adx_mul.S",
|
||||
"src/third_party/fiat/asm/fiat_curve25519_adx_square.S",
|
||||
"src/third_party/fiat/asm/fiat_p256_adx_mul.S",
|
||||
"src/third_party/fiat/asm/fiat_p256_adx_sqr.S",
|
||||
"win-aarch64/crypto/chacha/chacha-armv8-win.S",
|
||||
"win-aarch64/crypto/cipher_extra/chacha20_poly1305_armv8-win.S",
|
||||
"win-aarch64/crypto/fipsmodule/aesv8-armv8-win.S",
|
||||
"win-aarch64/crypto/fipsmodule/aesv8-gcm-armv8-win.S",
|
||||
"win-aarch64/crypto/fipsmodule/armv8-mont-win.S",
|
||||
"win-aarch64/crypto/fipsmodule/bn-armv8-win.S",
|
||||
"win-aarch64/crypto/fipsmodule/ghash-neon-armv8-win.S",
|
||||
"win-aarch64/crypto/fipsmodule/ghashv8-armv8-win.S",
|
||||
"win-aarch64/crypto/fipsmodule/p256-armv8-asm-win.S",
|
||||
"win-aarch64/crypto/fipsmodule/p256_beeu-armv8-asm-win.S",
|
||||
"win-aarch64/crypto/fipsmodule/sha1-armv8-win.S",
|
||||
"win-aarch64/crypto/fipsmodule/sha256-armv8-win.S",
|
||||
"win-aarch64/crypto/fipsmodule/sha512-armv8-win.S",
|
||||
"win-aarch64/crypto/fipsmodule/vpaes-armv8-win.S",
|
||||
"win-aarch64/crypto/test/trampoline-armv8-win.S",
|
||||
]
|
||||
|
||||
crypto_sources_nasm = [
|
||||
"win-x86/crypto/chacha/chacha-x86-win.asm",
|
||||
"win-x86/crypto/fipsmodule/aesni-x86-win.asm",
|
||||
"win-x86/crypto/fipsmodule/bn-586-win.asm",
|
||||
"win-x86/crypto/fipsmodule/co-586-win.asm",
|
||||
"win-x86/crypto/fipsmodule/ghash-ssse3-x86-win.asm",
|
||||
"win-x86/crypto/fipsmodule/ghash-x86-win.asm",
|
||||
"win-x86/crypto/fipsmodule/md5-586-win.asm",
|
||||
"win-x86/crypto/fipsmodule/sha1-586-win.asm",
|
||||
"win-x86/crypto/fipsmodule/sha256-586-win.asm",
|
||||
"win-x86/crypto/fipsmodule/sha512-586-win.asm",
|
||||
"win-x86/crypto/fipsmodule/vpaes-x86-win.asm",
|
||||
"win-x86/crypto/fipsmodule/x86-mont-win.asm",
|
||||
"win-x86/crypto/test/trampoline-x86-win.asm",
|
||||
"win-x86_64/crypto/chacha/chacha-x86_64-win.asm",
|
||||
"win-x86_64/crypto/cipher_extra/aes128gcmsiv-x86_64-win.asm",
|
||||
"win-x86_64/crypto/cipher_extra/chacha20_poly1305_x86_64-win.asm",
|
||||
"win-x86_64/crypto/fipsmodule/aesni-gcm-x86_64-win.asm",
|
||||
"win-x86_64/crypto/fipsmodule/aesni-x86_64-win.asm",
|
||||
"win-x86_64/crypto/fipsmodule/ghash-ssse3-x86_64-win.asm",
|
||||
"win-x86_64/crypto/fipsmodule/ghash-x86_64-win.asm",
|
||||
"win-x86_64/crypto/fipsmodule/md5-x86_64-win.asm",
|
||||
"win-x86_64/crypto/fipsmodule/p256-x86_64-asm-win.asm",
|
||||
"win-x86_64/crypto/fipsmodule/p256_beeu-x86_64-asm-win.asm",
|
||||
"win-x86_64/crypto/fipsmodule/rdrand-x86_64-win.asm",
|
||||
"win-x86_64/crypto/fipsmodule/rsaz-avx2-win.asm",
|
||||
"win-x86_64/crypto/fipsmodule/sha1-x86_64-win.asm",
|
||||
"win-x86_64/crypto/fipsmodule/sha256-x86_64-win.asm",
|
||||
"win-x86_64/crypto/fipsmodule/sha512-x86_64-win.asm",
|
||||
"win-x86_64/crypto/fipsmodule/vpaes-x86_64-win.asm",
|
||||
"win-x86_64/crypto/fipsmodule/x86_64-mont-win.asm",
|
||||
"win-x86_64/crypto/fipsmodule/x86_64-mont5-win.asm",
|
||||
"win-x86_64/crypto/test/trampoline-x86_64-win.asm",
|
||||
]
|
||||
|
||||
pki_headers = [
|
||||
"src/include/openssl/pki/certificate.h",
|
||||
"src/include/openssl/pki/signature_verify_cache.h",
|
||||
]
|
||||
|
||||
pki_internal_headers = [
|
||||
"src/pki/cert_error_id.h",
|
||||
"src/pki/cert_error_params.h",
|
||||
"src/pki/cert_errors.h",
|
||||
"src/pki/cert_issuer_source.h",
|
||||
"src/pki/cert_issuer_source_static.h",
|
||||
"src/pki/cert_issuer_source_sync_unittest.h",
|
||||
"src/pki/certificate_policies.h",
|
||||
"src/pki/common_cert_errors.h",
|
||||
"src/pki/crl.h",
|
||||
"src/pki/encode_values.h",
|
||||
"src/pki/extended_key_usage.h",
|
||||
"src/pki/general_names.h",
|
||||
"src/pki/input.h",
|
||||
"src/pki/ip_util.h",
|
||||
"src/pki/mock_signature_verify_cache.h",
|
||||
"src/pki/name_constraints.h",
|
||||
"src/pki/nist_pkits_unittest.h",
|
||||
"src/pki/ocsp.h",
|
||||
"src/pki/ocsp_revocation_status.h",
|
||||
"src/pki/ocsp_verify_result.h",
|
||||
"src/pki/parse_certificate.h",
|
||||
"src/pki/parse_name.h",
|
||||
"src/pki/parse_values.h",
|
||||
"src/pki/parsed_certificate.h",
|
||||
"src/pki/parser.h",
|
||||
"src/pki/path_builder.h",
|
||||
"src/pki/pem.h",
|
||||
"src/pki/revocation_util.h",
|
||||
"src/pki/signature_algorithm.h",
|
||||
"src/pki/simple_path_builder_delegate.h",
|
||||
"src/pki/string_util.h",
|
||||
"src/pki/test_helpers.h",
|
||||
"src/pki/testdata/nist-pkits/pkits_testcases-inl.h",
|
||||
"src/pki/trust_store.h",
|
||||
"src/pki/trust_store_collection.h",
|
||||
"src/pki/trust_store_in_memory.h",
|
||||
"src/pki/verify_certificate_chain.h",
|
||||
"src/pki/verify_certificate_chain_typed_unittest.h",
|
||||
"src/pki/verify_name_match.h",
|
||||
"src/pki/verify_signed_data.h",
|
||||
]
|
||||
|
||||
pki_sources = [
|
||||
"src/pki/cert_error_id.cc",
|
||||
"src/pki/cert_error_params.cc",
|
||||
"src/pki/cert_errors.cc",
|
||||
"src/pki/cert_issuer_source_static.cc",
|
||||
"src/pki/certificate.cc",
|
||||
"src/pki/certificate_policies.cc",
|
||||
"src/pki/common_cert_errors.cc",
|
||||
"src/pki/crl.cc",
|
||||
"src/pki/encode_values.cc",
|
||||
"src/pki/extended_key_usage.cc",
|
||||
"src/pki/general_names.cc",
|
||||
"src/pki/input.cc",
|
||||
"src/pki/ip_util.cc",
|
||||
"src/pki/name_constraints.cc",
|
||||
"src/pki/ocsp.cc",
|
||||
"src/pki/ocsp_verify_result.cc",
|
||||
"src/pki/parse_certificate.cc",
|
||||
"src/pki/parse_name.cc",
|
||||
"src/pki/parse_values.cc",
|
||||
"src/pki/parsed_certificate.cc",
|
||||
"src/pki/parser.cc",
|
||||
"src/pki/path_builder.cc",
|
||||
"src/pki/pem.cc",
|
||||
"src/pki/revocation_util.cc",
|
||||
"src/pki/signature_algorithm.cc",
|
||||
"src/pki/simple_path_builder_delegate.cc",
|
||||
"src/pki/string_util.cc",
|
||||
"src/pki/trust_store.cc",
|
||||
"src/pki/trust_store_collection.cc",
|
||||
"src/pki/trust_store_in_memory.cc",
|
||||
"src/pki/verify_certificate_chain.cc",
|
||||
"src/pki/verify_name_match.cc",
|
||||
"src/pki/verify_signed_data.cc",
|
||||
]
|
||||
|
||||
rust_bssl_sys = [
|
||||
"src/rust/bssl-sys/src/lib.rs",
|
||||
]
|
||||
|
||||
rust_bssl_crypto = [
|
||||
"src/rust/bssl-crypto/src/aead.rs",
|
||||
"src/rust/bssl-crypto/src/aes.rs",
|
||||
"src/rust/bssl-crypto/src/cipher/aes_cbc.rs",
|
||||
"src/rust/bssl-crypto/src/cipher/aes_ctr.rs",
|
||||
"src/rust/bssl-crypto/src/cipher/mod.rs",
|
||||
"src/rust/bssl-crypto/src/digest.rs",
|
||||
"src/rust/bssl-crypto/src/ec.rs",
|
||||
"src/rust/bssl-crypto/src/ecdh.rs",
|
||||
"src/rust/bssl-crypto/src/ecdsa.rs",
|
||||
"src/rust/bssl-crypto/src/ed25519.rs",
|
||||
"src/rust/bssl-crypto/src/hkdf.rs",
|
||||
"src/rust/bssl-crypto/src/hmac.rs",
|
||||
"src/rust/bssl-crypto/src/hpke.rs",
|
||||
"src/rust/bssl-crypto/src/lib.rs",
|
||||
"src/rust/bssl-crypto/src/macros.rs",
|
||||
"src/rust/bssl-crypto/src/mem.rs",
|
||||
"src/rust/bssl-crypto/src/rand.rs",
|
||||
"src/rust/bssl-crypto/src/rsa.rs",
|
||||
"src/rust/bssl-crypto/src/scoped.rs",
|
||||
"src/rust/bssl-crypto/src/test_helpers.rs",
|
||||
"src/rust/bssl-crypto/src/x25519.rs",
|
||||
]
|
||||
|
||||
tool_sources = [
|
||||
@ -484,7 +808,9 @@ tool_sources = [
|
||||
"src/tool/client.cc",
|
||||
"src/tool/const.cc",
|
||||
"src/tool/digest.cc",
|
||||
"src/tool/fd.cc",
|
||||
"src/tool/file.cc",
|
||||
"src/tool/generate_ech.cc",
|
||||
"src/tool/generate_ed25519.cc",
|
||||
"src/tool/genrsa.cc",
|
||||
"src/tool/pkcs12.cc",
|
||||
@ -500,180 +826,3 @@ tool_headers = [
|
||||
"src/tool/internal.h",
|
||||
"src/tool/transport_common.h",
|
||||
]
|
||||
|
||||
crypto_sources_ios_aarch64 = [
|
||||
"ios-aarch64/crypto/chacha/chacha-armv8.S",
|
||||
"ios-aarch64/crypto/fipsmodule/aesv8-armx64.S",
|
||||
"ios-aarch64/crypto/fipsmodule/armv8-mont.S",
|
||||
"ios-aarch64/crypto/fipsmodule/ghash-neon-armv8.S",
|
||||
"ios-aarch64/crypto/fipsmodule/ghashv8-armx64.S",
|
||||
"ios-aarch64/crypto/fipsmodule/sha1-armv8.S",
|
||||
"ios-aarch64/crypto/fipsmodule/sha256-armv8.S",
|
||||
"ios-aarch64/crypto/fipsmodule/sha512-armv8.S",
|
||||
"ios-aarch64/crypto/fipsmodule/vpaes-armv8.S",
|
||||
"ios-aarch64/crypto/test/trampoline-armv8.S",
|
||||
]
|
||||
|
||||
crypto_sources_ios_arm = [
|
||||
"ios-arm/crypto/chacha/chacha-armv4.S",
|
||||
"ios-arm/crypto/fipsmodule/aesv8-armx32.S",
|
||||
"ios-arm/crypto/fipsmodule/armv4-mont.S",
|
||||
"ios-arm/crypto/fipsmodule/bsaes-armv7.S",
|
||||
"ios-arm/crypto/fipsmodule/ghash-armv4.S",
|
||||
"ios-arm/crypto/fipsmodule/ghashv8-armx32.S",
|
||||
"ios-arm/crypto/fipsmodule/sha1-armv4-large.S",
|
||||
"ios-arm/crypto/fipsmodule/sha256-armv4.S",
|
||||
"ios-arm/crypto/fipsmodule/sha512-armv4.S",
|
||||
"ios-arm/crypto/fipsmodule/vpaes-armv7.S",
|
||||
"ios-arm/crypto/test/trampoline-armv4.S",
|
||||
]
|
||||
|
||||
crypto_sources_linux_aarch64 = [
|
||||
"linux-aarch64/crypto/chacha/chacha-armv8.S",
|
||||
"linux-aarch64/crypto/fipsmodule/aesv8-armx64.S",
|
||||
"linux-aarch64/crypto/fipsmodule/armv8-mont.S",
|
||||
"linux-aarch64/crypto/fipsmodule/ghash-neon-armv8.S",
|
||||
"linux-aarch64/crypto/fipsmodule/ghashv8-armx64.S",
|
||||
"linux-aarch64/crypto/fipsmodule/sha1-armv8.S",
|
||||
"linux-aarch64/crypto/fipsmodule/sha256-armv8.S",
|
||||
"linux-aarch64/crypto/fipsmodule/sha512-armv8.S",
|
||||
"linux-aarch64/crypto/fipsmodule/vpaes-armv8.S",
|
||||
"linux-aarch64/crypto/test/trampoline-armv8.S",
|
||||
]
|
||||
|
||||
crypto_sources_linux_arm = [
|
||||
"linux-arm/crypto/chacha/chacha-armv4.S",
|
||||
"linux-arm/crypto/fipsmodule/aesv8-armx32.S",
|
||||
"linux-arm/crypto/fipsmodule/armv4-mont.S",
|
||||
"linux-arm/crypto/fipsmodule/bsaes-armv7.S",
|
||||
"linux-arm/crypto/fipsmodule/ghash-armv4.S",
|
||||
"linux-arm/crypto/fipsmodule/ghashv8-armx32.S",
|
||||
"linux-arm/crypto/fipsmodule/sha1-armv4-large.S",
|
||||
"linux-arm/crypto/fipsmodule/sha256-armv4.S",
|
||||
"linux-arm/crypto/fipsmodule/sha512-armv4.S",
|
||||
"linux-arm/crypto/fipsmodule/vpaes-armv7.S",
|
||||
"linux-arm/crypto/test/trampoline-armv4.S",
|
||||
"src/crypto/curve25519/asm/x25519-asm-arm.S",
|
||||
"src/crypto/poly1305/poly1305_arm_asm.S",
|
||||
]
|
||||
|
||||
crypto_sources_linux_ppc64le = [
|
||||
"linux-ppc64le/crypto/fipsmodule/aesp8-ppc.S",
|
||||
"linux-ppc64le/crypto/fipsmodule/ghashp8-ppc.S",
|
||||
"linux-ppc64le/crypto/test/trampoline-ppc.S",
|
||||
]
|
||||
|
||||
crypto_sources_linux_x86 = [
|
||||
"linux-x86/crypto/chacha/chacha-x86.S",
|
||||
"linux-x86/crypto/fipsmodule/aesni-x86.S",
|
||||
"linux-x86/crypto/fipsmodule/bn-586.S",
|
||||
"linux-x86/crypto/fipsmodule/co-586.S",
|
||||
"linux-x86/crypto/fipsmodule/ghash-ssse3-x86.S",
|
||||
"linux-x86/crypto/fipsmodule/ghash-x86.S",
|
||||
"linux-x86/crypto/fipsmodule/md5-586.S",
|
||||
"linux-x86/crypto/fipsmodule/sha1-586.S",
|
||||
"linux-x86/crypto/fipsmodule/sha256-586.S",
|
||||
"linux-x86/crypto/fipsmodule/sha512-586.S",
|
||||
"linux-x86/crypto/fipsmodule/vpaes-x86.S",
|
||||
"linux-x86/crypto/fipsmodule/x86-mont.S",
|
||||
"linux-x86/crypto/test/trampoline-x86.S",
|
||||
]
|
||||
|
||||
crypto_sources_linux_x86_64 = [
|
||||
"linux-x86_64/crypto/chacha/chacha-x86_64.S",
|
||||
"linux-x86_64/crypto/cipher_extra/aes128gcmsiv-x86_64.S",
|
||||
"linux-x86_64/crypto/cipher_extra/chacha20_poly1305_x86_64.S",
|
||||
"linux-x86_64/crypto/fipsmodule/aesni-gcm-x86_64.S",
|
||||
"linux-x86_64/crypto/fipsmodule/aesni-x86_64.S",
|
||||
"linux-x86_64/crypto/fipsmodule/ghash-ssse3-x86_64.S",
|
||||
"linux-x86_64/crypto/fipsmodule/ghash-x86_64.S",
|
||||
"linux-x86_64/crypto/fipsmodule/md5-x86_64.S",
|
||||
"linux-x86_64/crypto/fipsmodule/p256-x86_64-asm.S",
|
||||
"linux-x86_64/crypto/fipsmodule/p256_beeu-x86_64-asm.S",
|
||||
"linux-x86_64/crypto/fipsmodule/rdrand-x86_64.S",
|
||||
"linux-x86_64/crypto/fipsmodule/rsaz-avx2.S",
|
||||
"linux-x86_64/crypto/fipsmodule/sha1-x86_64.S",
|
||||
"linux-x86_64/crypto/fipsmodule/sha256-x86_64.S",
|
||||
"linux-x86_64/crypto/fipsmodule/sha512-x86_64.S",
|
||||
"linux-x86_64/crypto/fipsmodule/vpaes-x86_64.S",
|
||||
"linux-x86_64/crypto/fipsmodule/x86_64-mont.S",
|
||||
"linux-x86_64/crypto/fipsmodule/x86_64-mont5.S",
|
||||
"linux-x86_64/crypto/test/trampoline-x86_64.S",
|
||||
"src/crypto/hrss/asm/poly_rq_mul.S",
|
||||
]
|
||||
|
||||
crypto_sources_mac_x86 = [
|
||||
"mac-x86/crypto/chacha/chacha-x86.S",
|
||||
"mac-x86/crypto/fipsmodule/aesni-x86.S",
|
||||
"mac-x86/crypto/fipsmodule/bn-586.S",
|
||||
"mac-x86/crypto/fipsmodule/co-586.S",
|
||||
"mac-x86/crypto/fipsmodule/ghash-ssse3-x86.S",
|
||||
"mac-x86/crypto/fipsmodule/ghash-x86.S",
|
||||
"mac-x86/crypto/fipsmodule/md5-586.S",
|
||||
"mac-x86/crypto/fipsmodule/sha1-586.S",
|
||||
"mac-x86/crypto/fipsmodule/sha256-586.S",
|
||||
"mac-x86/crypto/fipsmodule/sha512-586.S",
|
||||
"mac-x86/crypto/fipsmodule/vpaes-x86.S",
|
||||
"mac-x86/crypto/fipsmodule/x86-mont.S",
|
||||
"mac-x86/crypto/test/trampoline-x86.S",
|
||||
]
|
||||
|
||||
crypto_sources_mac_x86_64 = [
|
||||
"mac-x86_64/crypto/chacha/chacha-x86_64.S",
|
||||
"mac-x86_64/crypto/cipher_extra/aes128gcmsiv-x86_64.S",
|
||||
"mac-x86_64/crypto/cipher_extra/chacha20_poly1305_x86_64.S",
|
||||
"mac-x86_64/crypto/fipsmodule/aesni-gcm-x86_64.S",
|
||||
"mac-x86_64/crypto/fipsmodule/aesni-x86_64.S",
|
||||
"mac-x86_64/crypto/fipsmodule/ghash-ssse3-x86_64.S",
|
||||
"mac-x86_64/crypto/fipsmodule/ghash-x86_64.S",
|
||||
"mac-x86_64/crypto/fipsmodule/md5-x86_64.S",
|
||||
"mac-x86_64/crypto/fipsmodule/p256-x86_64-asm.S",
|
||||
"mac-x86_64/crypto/fipsmodule/p256_beeu-x86_64-asm.S",
|
||||
"mac-x86_64/crypto/fipsmodule/rdrand-x86_64.S",
|
||||
"mac-x86_64/crypto/fipsmodule/rsaz-avx2.S",
|
||||
"mac-x86_64/crypto/fipsmodule/sha1-x86_64.S",
|
||||
"mac-x86_64/crypto/fipsmodule/sha256-x86_64.S",
|
||||
"mac-x86_64/crypto/fipsmodule/sha512-x86_64.S",
|
||||
"mac-x86_64/crypto/fipsmodule/vpaes-x86_64.S",
|
||||
"mac-x86_64/crypto/fipsmodule/x86_64-mont.S",
|
||||
"mac-x86_64/crypto/fipsmodule/x86_64-mont5.S",
|
||||
"mac-x86_64/crypto/test/trampoline-x86_64.S",
|
||||
]
|
||||
|
||||
crypto_sources_win_x86 = [
|
||||
"win-x86/crypto/chacha/chacha-x86.asm",
|
||||
"win-x86/crypto/fipsmodule/aesni-x86.asm",
|
||||
"win-x86/crypto/fipsmodule/bn-586.asm",
|
||||
"win-x86/crypto/fipsmodule/co-586.asm",
|
||||
"win-x86/crypto/fipsmodule/ghash-ssse3-x86.asm",
|
||||
"win-x86/crypto/fipsmodule/ghash-x86.asm",
|
||||
"win-x86/crypto/fipsmodule/md5-586.asm",
|
||||
"win-x86/crypto/fipsmodule/sha1-586.asm",
|
||||
"win-x86/crypto/fipsmodule/sha256-586.asm",
|
||||
"win-x86/crypto/fipsmodule/sha512-586.asm",
|
||||
"win-x86/crypto/fipsmodule/vpaes-x86.asm",
|
||||
"win-x86/crypto/fipsmodule/x86-mont.asm",
|
||||
"win-x86/crypto/test/trampoline-x86.asm",
|
||||
]
|
||||
|
||||
crypto_sources_win_x86_64 = [
|
||||
"win-x86_64/crypto/chacha/chacha-x86_64.asm",
|
||||
"win-x86_64/crypto/cipher_extra/aes128gcmsiv-x86_64.asm",
|
||||
"win-x86_64/crypto/cipher_extra/chacha20_poly1305_x86_64.asm",
|
||||
"win-x86_64/crypto/fipsmodule/aesni-gcm-x86_64.asm",
|
||||
"win-x86_64/crypto/fipsmodule/aesni-x86_64.asm",
|
||||
"win-x86_64/crypto/fipsmodule/ghash-ssse3-x86_64.asm",
|
||||
"win-x86_64/crypto/fipsmodule/ghash-x86_64.asm",
|
||||
"win-x86_64/crypto/fipsmodule/md5-x86_64.asm",
|
||||
"win-x86_64/crypto/fipsmodule/p256-x86_64-asm.asm",
|
||||
"win-x86_64/crypto/fipsmodule/p256_beeu-x86_64-asm.asm",
|
||||
"win-x86_64/crypto/fipsmodule/rdrand-x86_64.asm",
|
||||
"win-x86_64/crypto/fipsmodule/rsaz-avx2.asm",
|
||||
"win-x86_64/crypto/fipsmodule/sha1-x86_64.asm",
|
||||
"win-x86_64/crypto/fipsmodule/sha256-x86_64.asm",
|
||||
"win-x86_64/crypto/fipsmodule/sha512-x86_64.asm",
|
||||
"win-x86_64/crypto/fipsmodule/vpaes-x86_64.asm",
|
||||
"win-x86_64/crypto/fipsmodule/x86_64-mont.asm",
|
||||
"win-x86_64/crypto/fipsmodule/x86_64-mont5.asm",
|
||||
"win-x86_64/crypto/test/trampoline-x86_64.asm",
|
||||
]
|
||||
|
||||
2314
third-party/boringssl/BUILD.generated_tests.bzl
vendored
Normal file
2314
third-party/boringssl/BUILD.generated_tests.bzl
vendored
Normal file
File diff suppressed because it is too large
Load Diff
576
third-party/boringssl/CMakeLists.txt
vendored
Normal file
576
third-party/boringssl/CMakeLists.txt
vendored
Normal file
@ -0,0 +1,576 @@
|
||||
# Copyright (c) 2015, Google Inc.
|
||||
#
|
||||
# Permission to use, copy, modify, and/or distribute this software for any
|
||||
# purpose with or without fee is hereby granted, provided that the above
|
||||
# copyright notice and this permission notice appear in all copies.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
||||
# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
||||
# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
|
||||
# SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
||||
# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
|
||||
# OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
|
||||
# CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
|
||||
# This file is created by generate_build_files.py. Do not edit manually.
|
||||
|
||||
cmake_minimum_required(VERSION 3.12)
|
||||
|
||||
project(BoringSSL LANGUAGES C CXX)
|
||||
|
||||
set(CMAKE_CXX_STANDARD 14)
|
||||
set(CMAKE_CXX_STANDARD_REQUIRED ON)
|
||||
set(CMAKE_C_STANDARD 11)
|
||||
set(CMAKE_C_STANDARD_REQUIRED ON)
|
||||
if(CMAKE_COMPILER_IS_GNUCXX OR CMAKE_CXX_COMPILER_ID MATCHES "Clang")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fvisibility=hidden -fno-common -fno-exceptions -fno-rtti")
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fvisibility=hidden -fno-common")
|
||||
endif()
|
||||
|
||||
# pthread_rwlock_t requires a feature flag on glibc.
|
||||
if(CMAKE_SYSTEM_NAME STREQUAL "Linux")
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -D_XOPEN_SOURCE=700")
|
||||
endif()
|
||||
|
||||
if(WIN32)
|
||||
add_definitions(-D_HAS_EXCEPTIONS=0)
|
||||
add_definitions(-DWIN32_LEAN_AND_MEAN)
|
||||
add_definitions(-DNOMINMAX)
|
||||
# Allow use of fopen.
|
||||
add_definitions(-D_CRT_SECURE_NO_WARNINGS)
|
||||
endif()
|
||||
|
||||
add_definitions(-DBORINGSSL_IMPLEMENTATION)
|
||||
|
||||
if(OPENSSL_NO_ASM)
|
||||
add_definitions(-DOPENSSL_NO_ASM)
|
||||
else()
|
||||
# On x86 and x86_64 Windows, we use the NASM output.
|
||||
if(WIN32 AND CMAKE_SYSTEM_PROCESSOR MATCHES "AMD64|x86_64|amd64|x86|i[3-6]86")
|
||||
enable_language(ASM_NASM)
|
||||
set(OPENSSL_NASM TRUE)
|
||||
set(CMAKE_ASM_NASM_FLAGS "${CMAKE_ASM_NASM_FLAGS} -gcv8")
|
||||
else()
|
||||
enable_language(ASM)
|
||||
set(OPENSSL_ASM TRUE)
|
||||
# Work around https://gitlab.kitware.com/cmake/cmake/-/issues/20771 in older
|
||||
# CMake versions.
|
||||
if(APPLE AND CMAKE_VERSION VERSION_LESS 3.19)
|
||||
if(CMAKE_OSX_SYSROOT)
|
||||
set(CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} -isysroot \"${CMAKE_OSX_SYSROOT}\"")
|
||||
endif()
|
||||
foreach(arch ${CMAKE_OSX_ARCHITECTURES})
|
||||
set(CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} -arch ${arch}")
|
||||
endforeach()
|
||||
endif()
|
||||
if(NOT WIN32)
|
||||
set(CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} -Wa,--noexecstack")
|
||||
endif()
|
||||
# Clang's integerated assembler does not support debug symbols.
|
||||
if(NOT CMAKE_ASM_COMPILER_ID MATCHES "Clang")
|
||||
set(CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} -Wa,-g")
|
||||
endif()
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(BUILD_SHARED_LIBS)
|
||||
add_definitions(-DBORINGSSL_SHARED_LIBRARY)
|
||||
# Enable position-independent code globally. This is needed because
|
||||
# some library targets are OBJECT libraries.
|
||||
set(CMAKE_POSITION_INDEPENDENT_CODE TRUE)
|
||||
endif()
|
||||
|
||||
set(
|
||||
CRYPTO_SOURCES_ASM
|
||||
|
||||
apple-aarch64/crypto/chacha/chacha-armv8-apple.S
|
||||
apple-aarch64/crypto/cipher_extra/chacha20_poly1305_armv8-apple.S
|
||||
apple-aarch64/crypto/fipsmodule/aesv8-armv8-apple.S
|
||||
apple-aarch64/crypto/fipsmodule/aesv8-gcm-armv8-apple.S
|
||||
apple-aarch64/crypto/fipsmodule/armv8-mont-apple.S
|
||||
apple-aarch64/crypto/fipsmodule/bn-armv8-apple.S
|
||||
apple-aarch64/crypto/fipsmodule/ghash-neon-armv8-apple.S
|
||||
apple-aarch64/crypto/fipsmodule/ghashv8-armv8-apple.S
|
||||
apple-aarch64/crypto/fipsmodule/p256-armv8-asm-apple.S
|
||||
apple-aarch64/crypto/fipsmodule/p256_beeu-armv8-asm-apple.S
|
||||
apple-aarch64/crypto/fipsmodule/sha1-armv8-apple.S
|
||||
apple-aarch64/crypto/fipsmodule/sha256-armv8-apple.S
|
||||
apple-aarch64/crypto/fipsmodule/sha512-armv8-apple.S
|
||||
apple-aarch64/crypto/fipsmodule/vpaes-armv8-apple.S
|
||||
apple-aarch64/crypto/test/trampoline-armv8-apple.S
|
||||
apple-x86/crypto/chacha/chacha-x86-apple.S
|
||||
apple-x86/crypto/fipsmodule/aesni-x86-apple.S
|
||||
apple-x86/crypto/fipsmodule/bn-586-apple.S
|
||||
apple-x86/crypto/fipsmodule/co-586-apple.S
|
||||
apple-x86/crypto/fipsmodule/ghash-ssse3-x86-apple.S
|
||||
apple-x86/crypto/fipsmodule/ghash-x86-apple.S
|
||||
apple-x86/crypto/fipsmodule/md5-586-apple.S
|
||||
apple-x86/crypto/fipsmodule/sha1-586-apple.S
|
||||
apple-x86/crypto/fipsmodule/sha256-586-apple.S
|
||||
apple-x86/crypto/fipsmodule/sha512-586-apple.S
|
||||
apple-x86/crypto/fipsmodule/vpaes-x86-apple.S
|
||||
apple-x86/crypto/fipsmodule/x86-mont-apple.S
|
||||
apple-x86/crypto/test/trampoline-x86-apple.S
|
||||
apple-x86_64/crypto/chacha/chacha-x86_64-apple.S
|
||||
apple-x86_64/crypto/cipher_extra/aes128gcmsiv-x86_64-apple.S
|
||||
apple-x86_64/crypto/cipher_extra/chacha20_poly1305_x86_64-apple.S
|
||||
apple-x86_64/crypto/fipsmodule/aesni-gcm-x86_64-apple.S
|
||||
apple-x86_64/crypto/fipsmodule/aesni-x86_64-apple.S
|
||||
apple-x86_64/crypto/fipsmodule/ghash-ssse3-x86_64-apple.S
|
||||
apple-x86_64/crypto/fipsmodule/ghash-x86_64-apple.S
|
||||
apple-x86_64/crypto/fipsmodule/md5-x86_64-apple.S
|
||||
apple-x86_64/crypto/fipsmodule/p256-x86_64-asm-apple.S
|
||||
apple-x86_64/crypto/fipsmodule/p256_beeu-x86_64-asm-apple.S
|
||||
apple-x86_64/crypto/fipsmodule/rdrand-x86_64-apple.S
|
||||
apple-x86_64/crypto/fipsmodule/rsaz-avx2-apple.S
|
||||
apple-x86_64/crypto/fipsmodule/sha1-x86_64-apple.S
|
||||
apple-x86_64/crypto/fipsmodule/sha256-x86_64-apple.S
|
||||
apple-x86_64/crypto/fipsmodule/sha512-x86_64-apple.S
|
||||
apple-x86_64/crypto/fipsmodule/vpaes-x86_64-apple.S
|
||||
apple-x86_64/crypto/fipsmodule/x86_64-mont-apple.S
|
||||
apple-x86_64/crypto/fipsmodule/x86_64-mont5-apple.S
|
||||
apple-x86_64/crypto/test/trampoline-x86_64-apple.S
|
||||
linux-aarch64/crypto/chacha/chacha-armv8-linux.S
|
||||
linux-aarch64/crypto/cipher_extra/chacha20_poly1305_armv8-linux.S
|
||||
linux-aarch64/crypto/fipsmodule/aesv8-armv8-linux.S
|
||||
linux-aarch64/crypto/fipsmodule/aesv8-gcm-armv8-linux.S
|
||||
linux-aarch64/crypto/fipsmodule/armv8-mont-linux.S
|
||||
linux-aarch64/crypto/fipsmodule/bn-armv8-linux.S
|
||||
linux-aarch64/crypto/fipsmodule/ghash-neon-armv8-linux.S
|
||||
linux-aarch64/crypto/fipsmodule/ghashv8-armv8-linux.S
|
||||
linux-aarch64/crypto/fipsmodule/p256-armv8-asm-linux.S
|
||||
linux-aarch64/crypto/fipsmodule/p256_beeu-armv8-asm-linux.S
|
||||
linux-aarch64/crypto/fipsmodule/sha1-armv8-linux.S
|
||||
linux-aarch64/crypto/fipsmodule/sha256-armv8-linux.S
|
||||
linux-aarch64/crypto/fipsmodule/sha512-armv8-linux.S
|
||||
linux-aarch64/crypto/fipsmodule/vpaes-armv8-linux.S
|
||||
linux-aarch64/crypto/test/trampoline-armv8-linux.S
|
||||
linux-arm/crypto/chacha/chacha-armv4-linux.S
|
||||
linux-arm/crypto/fipsmodule/aesv8-armv7-linux.S
|
||||
linux-arm/crypto/fipsmodule/armv4-mont-linux.S
|
||||
linux-arm/crypto/fipsmodule/bsaes-armv7-linux.S
|
||||
linux-arm/crypto/fipsmodule/ghash-armv4-linux.S
|
||||
linux-arm/crypto/fipsmodule/ghashv8-armv7-linux.S
|
||||
linux-arm/crypto/fipsmodule/sha1-armv4-large-linux.S
|
||||
linux-arm/crypto/fipsmodule/sha256-armv4-linux.S
|
||||
linux-arm/crypto/fipsmodule/sha512-armv4-linux.S
|
||||
linux-arm/crypto/fipsmodule/vpaes-armv7-linux.S
|
||||
linux-arm/crypto/test/trampoline-armv4-linux.S
|
||||
linux-x86/crypto/chacha/chacha-x86-linux.S
|
||||
linux-x86/crypto/fipsmodule/aesni-x86-linux.S
|
||||
linux-x86/crypto/fipsmodule/bn-586-linux.S
|
||||
linux-x86/crypto/fipsmodule/co-586-linux.S
|
||||
linux-x86/crypto/fipsmodule/ghash-ssse3-x86-linux.S
|
||||
linux-x86/crypto/fipsmodule/ghash-x86-linux.S
|
||||
linux-x86/crypto/fipsmodule/md5-586-linux.S
|
||||
linux-x86/crypto/fipsmodule/sha1-586-linux.S
|
||||
linux-x86/crypto/fipsmodule/sha256-586-linux.S
|
||||
linux-x86/crypto/fipsmodule/sha512-586-linux.S
|
||||
linux-x86/crypto/fipsmodule/vpaes-x86-linux.S
|
||||
linux-x86/crypto/fipsmodule/x86-mont-linux.S
|
||||
linux-x86/crypto/test/trampoline-x86-linux.S
|
||||
linux-x86_64/crypto/chacha/chacha-x86_64-linux.S
|
||||
linux-x86_64/crypto/cipher_extra/aes128gcmsiv-x86_64-linux.S
|
||||
linux-x86_64/crypto/cipher_extra/chacha20_poly1305_x86_64-linux.S
|
||||
linux-x86_64/crypto/fipsmodule/aesni-gcm-x86_64-linux.S
|
||||
linux-x86_64/crypto/fipsmodule/aesni-x86_64-linux.S
|
||||
linux-x86_64/crypto/fipsmodule/ghash-ssse3-x86_64-linux.S
|
||||
linux-x86_64/crypto/fipsmodule/ghash-x86_64-linux.S
|
||||
linux-x86_64/crypto/fipsmodule/md5-x86_64-linux.S
|
||||
linux-x86_64/crypto/fipsmodule/p256-x86_64-asm-linux.S
|
||||
linux-x86_64/crypto/fipsmodule/p256_beeu-x86_64-asm-linux.S
|
||||
linux-x86_64/crypto/fipsmodule/rdrand-x86_64-linux.S
|
||||
linux-x86_64/crypto/fipsmodule/rsaz-avx2-linux.S
|
||||
linux-x86_64/crypto/fipsmodule/sha1-x86_64-linux.S
|
||||
linux-x86_64/crypto/fipsmodule/sha256-x86_64-linux.S
|
||||
linux-x86_64/crypto/fipsmodule/sha512-x86_64-linux.S
|
||||
linux-x86_64/crypto/fipsmodule/vpaes-x86_64-linux.S
|
||||
linux-x86_64/crypto/fipsmodule/x86_64-mont-linux.S
|
||||
linux-x86_64/crypto/fipsmodule/x86_64-mont5-linux.S
|
||||
linux-x86_64/crypto/test/trampoline-x86_64-linux.S
|
||||
src/crypto/curve25519/asm/x25519-asm-arm.S
|
||||
src/crypto/hrss/asm/poly_rq_mul.S
|
||||
src/crypto/poly1305/poly1305_arm_asm.S
|
||||
src/third_party/fiat/asm/fiat_curve25519_adx_mul.S
|
||||
src/third_party/fiat/asm/fiat_curve25519_adx_square.S
|
||||
src/third_party/fiat/asm/fiat_p256_adx_mul.S
|
||||
src/third_party/fiat/asm/fiat_p256_adx_sqr.S
|
||||
win-aarch64/crypto/chacha/chacha-armv8-win.S
|
||||
win-aarch64/crypto/cipher_extra/chacha20_poly1305_armv8-win.S
|
||||
win-aarch64/crypto/fipsmodule/aesv8-armv8-win.S
|
||||
win-aarch64/crypto/fipsmodule/aesv8-gcm-armv8-win.S
|
||||
win-aarch64/crypto/fipsmodule/armv8-mont-win.S
|
||||
win-aarch64/crypto/fipsmodule/bn-armv8-win.S
|
||||
win-aarch64/crypto/fipsmodule/ghash-neon-armv8-win.S
|
||||
win-aarch64/crypto/fipsmodule/ghashv8-armv8-win.S
|
||||
win-aarch64/crypto/fipsmodule/p256-armv8-asm-win.S
|
||||
win-aarch64/crypto/fipsmodule/p256_beeu-armv8-asm-win.S
|
||||
win-aarch64/crypto/fipsmodule/sha1-armv8-win.S
|
||||
win-aarch64/crypto/fipsmodule/sha256-armv8-win.S
|
||||
win-aarch64/crypto/fipsmodule/sha512-armv8-win.S
|
||||
win-aarch64/crypto/fipsmodule/vpaes-armv8-win.S
|
||||
win-aarch64/crypto/test/trampoline-armv8-win.S
|
||||
)
|
||||
|
||||
set(
|
||||
CRYPTO_SOURCES_NASM
|
||||
|
||||
win-x86/crypto/chacha/chacha-x86-win.asm
|
||||
win-x86/crypto/fipsmodule/aesni-x86-win.asm
|
||||
win-x86/crypto/fipsmodule/bn-586-win.asm
|
||||
win-x86/crypto/fipsmodule/co-586-win.asm
|
||||
win-x86/crypto/fipsmodule/ghash-ssse3-x86-win.asm
|
||||
win-x86/crypto/fipsmodule/ghash-x86-win.asm
|
||||
win-x86/crypto/fipsmodule/md5-586-win.asm
|
||||
win-x86/crypto/fipsmodule/sha1-586-win.asm
|
||||
win-x86/crypto/fipsmodule/sha256-586-win.asm
|
||||
win-x86/crypto/fipsmodule/sha512-586-win.asm
|
||||
win-x86/crypto/fipsmodule/vpaes-x86-win.asm
|
||||
win-x86/crypto/fipsmodule/x86-mont-win.asm
|
||||
win-x86/crypto/test/trampoline-x86-win.asm
|
||||
win-x86_64/crypto/chacha/chacha-x86_64-win.asm
|
||||
win-x86_64/crypto/cipher_extra/aes128gcmsiv-x86_64-win.asm
|
||||
win-x86_64/crypto/cipher_extra/chacha20_poly1305_x86_64-win.asm
|
||||
win-x86_64/crypto/fipsmodule/aesni-gcm-x86_64-win.asm
|
||||
win-x86_64/crypto/fipsmodule/aesni-x86_64-win.asm
|
||||
win-x86_64/crypto/fipsmodule/ghash-ssse3-x86_64-win.asm
|
||||
win-x86_64/crypto/fipsmodule/ghash-x86_64-win.asm
|
||||
win-x86_64/crypto/fipsmodule/md5-x86_64-win.asm
|
||||
win-x86_64/crypto/fipsmodule/p256-x86_64-asm-win.asm
|
||||
win-x86_64/crypto/fipsmodule/p256_beeu-x86_64-asm-win.asm
|
||||
win-x86_64/crypto/fipsmodule/rdrand-x86_64-win.asm
|
||||
win-x86_64/crypto/fipsmodule/rsaz-avx2-win.asm
|
||||
win-x86_64/crypto/fipsmodule/sha1-x86_64-win.asm
|
||||
win-x86_64/crypto/fipsmodule/sha256-x86_64-win.asm
|
||||
win-x86_64/crypto/fipsmodule/sha512-x86_64-win.asm
|
||||
win-x86_64/crypto/fipsmodule/vpaes-x86_64-win.asm
|
||||
win-x86_64/crypto/fipsmodule/x86_64-mont-win.asm
|
||||
win-x86_64/crypto/fipsmodule/x86_64-mont5-win.asm
|
||||
win-x86_64/crypto/test/trampoline-x86_64-win.asm
|
||||
)
|
||||
|
||||
if(OPENSSL_ASM)
|
||||
list(APPEND CRYPTO_SOURCES_ASM_USED ${CRYPTO_SOURCES_ASM})
|
||||
endif()
|
||||
if(OPENSSL_NASM)
|
||||
list(APPEND CRYPTO_SOURCES_ASM_USED ${CRYPTO_SOURCES_NASM})
|
||||
endif()
|
||||
|
||||
add_library(
|
||||
crypto
|
||||
|
||||
${CRYPTO_SOURCES_ASM_USED}
|
||||
err_data.c
|
||||
src/crypto/asn1/a_bitstr.c
|
||||
src/crypto/asn1/a_bool.c
|
||||
src/crypto/asn1/a_d2i_fp.c
|
||||
src/crypto/asn1/a_dup.c
|
||||
src/crypto/asn1/a_gentm.c
|
||||
src/crypto/asn1/a_i2d_fp.c
|
||||
src/crypto/asn1/a_int.c
|
||||
src/crypto/asn1/a_mbstr.c
|
||||
src/crypto/asn1/a_object.c
|
||||
src/crypto/asn1/a_octet.c
|
||||
src/crypto/asn1/a_strex.c
|
||||
src/crypto/asn1/a_strnid.c
|
||||
src/crypto/asn1/a_time.c
|
||||
src/crypto/asn1/a_type.c
|
||||
src/crypto/asn1/a_utctm.c
|
||||
src/crypto/asn1/asn1_lib.c
|
||||
src/crypto/asn1/asn1_par.c
|
||||
src/crypto/asn1/asn_pack.c
|
||||
src/crypto/asn1/f_int.c
|
||||
src/crypto/asn1/f_string.c
|
||||
src/crypto/asn1/posix_time.c
|
||||
src/crypto/asn1/tasn_dec.c
|
||||
src/crypto/asn1/tasn_enc.c
|
||||
src/crypto/asn1/tasn_fre.c
|
||||
src/crypto/asn1/tasn_new.c
|
||||
src/crypto/asn1/tasn_typ.c
|
||||
src/crypto/asn1/tasn_utl.c
|
||||
src/crypto/base64/base64.c
|
||||
src/crypto/bio/bio.c
|
||||
src/crypto/bio/bio_mem.c
|
||||
src/crypto/bio/connect.c
|
||||
src/crypto/bio/errno.c
|
||||
src/crypto/bio/fd.c
|
||||
src/crypto/bio/file.c
|
||||
src/crypto/bio/hexdump.c
|
||||
src/crypto/bio/pair.c
|
||||
src/crypto/bio/printf.c
|
||||
src/crypto/bio/socket.c
|
||||
src/crypto/bio/socket_helper.c
|
||||
src/crypto/blake2/blake2.c
|
||||
src/crypto/bn_extra/bn_asn1.c
|
||||
src/crypto/bn_extra/convert.c
|
||||
src/crypto/buf/buf.c
|
||||
src/crypto/bytestring/asn1_compat.c
|
||||
src/crypto/bytestring/ber.c
|
||||
src/crypto/bytestring/cbb.c
|
||||
src/crypto/bytestring/cbs.c
|
||||
src/crypto/bytestring/unicode.c
|
||||
src/crypto/chacha/chacha.c
|
||||
src/crypto/cipher_extra/cipher_extra.c
|
||||
src/crypto/cipher_extra/derive_key.c
|
||||
src/crypto/cipher_extra/e_aesctrhmac.c
|
||||
src/crypto/cipher_extra/e_aesgcmsiv.c
|
||||
src/crypto/cipher_extra/e_chacha20poly1305.c
|
||||
src/crypto/cipher_extra/e_des.c
|
||||
src/crypto/cipher_extra/e_null.c
|
||||
src/crypto/cipher_extra/e_rc2.c
|
||||
src/crypto/cipher_extra/e_rc4.c
|
||||
src/crypto/cipher_extra/e_tls.c
|
||||
src/crypto/cipher_extra/tls_cbc.c
|
||||
src/crypto/conf/conf.c
|
||||
src/crypto/cpu_aarch64_apple.c
|
||||
src/crypto/cpu_aarch64_fuchsia.c
|
||||
src/crypto/cpu_aarch64_linux.c
|
||||
src/crypto/cpu_aarch64_openbsd.c
|
||||
src/crypto/cpu_aarch64_sysreg.c
|
||||
src/crypto/cpu_aarch64_win.c
|
||||
src/crypto/cpu_arm_freebsd.c
|
||||
src/crypto/cpu_arm_linux.c
|
||||
src/crypto/cpu_intel.c
|
||||
src/crypto/crypto.c
|
||||
src/crypto/curve25519/curve25519.c
|
||||
src/crypto/curve25519/curve25519_64_adx.c
|
||||
src/crypto/curve25519/spake25519.c
|
||||
src/crypto/des/des.c
|
||||
src/crypto/dh_extra/dh_asn1.c
|
||||
src/crypto/dh_extra/params.c
|
||||
src/crypto/digest_extra/digest_extra.c
|
||||
src/crypto/dilithium/dilithium.c
|
||||
src/crypto/dsa/dsa.c
|
||||
src/crypto/dsa/dsa_asn1.c
|
||||
src/crypto/ec_extra/ec_asn1.c
|
||||
src/crypto/ec_extra/ec_derive.c
|
||||
src/crypto/ec_extra/hash_to_curve.c
|
||||
src/crypto/ecdh_extra/ecdh_extra.c
|
||||
src/crypto/ecdsa_extra/ecdsa_asn1.c
|
||||
src/crypto/engine/engine.c
|
||||
src/crypto/err/err.c
|
||||
src/crypto/evp/evp.c
|
||||
src/crypto/evp/evp_asn1.c
|
||||
src/crypto/evp/evp_ctx.c
|
||||
src/crypto/evp/p_dsa_asn1.c
|
||||
src/crypto/evp/p_ec.c
|
||||
src/crypto/evp/p_ec_asn1.c
|
||||
src/crypto/evp/p_ed25519.c
|
||||
src/crypto/evp/p_ed25519_asn1.c
|
||||
src/crypto/evp/p_hkdf.c
|
||||
src/crypto/evp/p_rsa.c
|
||||
src/crypto/evp/p_rsa_asn1.c
|
||||
src/crypto/evp/p_x25519.c
|
||||
src/crypto/evp/p_x25519_asn1.c
|
||||
src/crypto/evp/pbkdf.c
|
||||
src/crypto/evp/print.c
|
||||
src/crypto/evp/scrypt.c
|
||||
src/crypto/evp/sign.c
|
||||
src/crypto/ex_data.c
|
||||
src/crypto/fipsmodule/bcm.c
|
||||
src/crypto/fipsmodule/fips_shared_support.c
|
||||
src/crypto/hpke/hpke.c
|
||||
src/crypto/hrss/hrss.c
|
||||
src/crypto/keccak/keccak.c
|
||||
src/crypto/kyber/kyber.c
|
||||
src/crypto/lhash/lhash.c
|
||||
src/crypto/mem.c
|
||||
src/crypto/obj/obj.c
|
||||
src/crypto/obj/obj_xref.c
|
||||
src/crypto/pem/pem_all.c
|
||||
src/crypto/pem/pem_info.c
|
||||
src/crypto/pem/pem_lib.c
|
||||
src/crypto/pem/pem_oth.c
|
||||
src/crypto/pem/pem_pk8.c
|
||||
src/crypto/pem/pem_pkey.c
|
||||
src/crypto/pem/pem_x509.c
|
||||
src/crypto/pem/pem_xaux.c
|
||||
src/crypto/pkcs7/pkcs7.c
|
||||
src/crypto/pkcs7/pkcs7_x509.c
|
||||
src/crypto/pkcs8/p5_pbev2.c
|
||||
src/crypto/pkcs8/pkcs8.c
|
||||
src/crypto/pkcs8/pkcs8_x509.c
|
||||
src/crypto/poly1305/poly1305.c
|
||||
src/crypto/poly1305/poly1305_arm.c
|
||||
src/crypto/poly1305/poly1305_vec.c
|
||||
src/crypto/pool/pool.c
|
||||
src/crypto/rand_extra/deterministic.c
|
||||
src/crypto/rand_extra/forkunsafe.c
|
||||
src/crypto/rand_extra/getentropy.c
|
||||
src/crypto/rand_extra/ios.c
|
||||
src/crypto/rand_extra/passive.c
|
||||
src/crypto/rand_extra/rand_extra.c
|
||||
src/crypto/rand_extra/trusty.c
|
||||
src/crypto/rand_extra/windows.c
|
||||
src/crypto/rc4/rc4.c
|
||||
src/crypto/refcount.c
|
||||
src/crypto/rsa_extra/rsa_asn1.c
|
||||
src/crypto/rsa_extra/rsa_crypt.c
|
||||
src/crypto/rsa_extra/rsa_print.c
|
||||
src/crypto/siphash/siphash.c
|
||||
src/crypto/spx/address.c
|
||||
src/crypto/spx/fors.c
|
||||
src/crypto/spx/merkle.c
|
||||
src/crypto/spx/spx.c
|
||||
src/crypto/spx/spx_util.c
|
||||
src/crypto/spx/thash.c
|
||||
src/crypto/spx/wots.c
|
||||
src/crypto/stack/stack.c
|
||||
src/crypto/thread.c
|
||||
src/crypto/thread_none.c
|
||||
src/crypto/thread_pthread.c
|
||||
src/crypto/thread_win.c
|
||||
src/crypto/trust_token/pmbtoken.c
|
||||
src/crypto/trust_token/trust_token.c
|
||||
src/crypto/trust_token/voprf.c
|
||||
src/crypto/x509/a_digest.c
|
||||
src/crypto/x509/a_sign.c
|
||||
src/crypto/x509/a_verify.c
|
||||
src/crypto/x509/algorithm.c
|
||||
src/crypto/x509/asn1_gen.c
|
||||
src/crypto/x509/by_dir.c
|
||||
src/crypto/x509/by_file.c
|
||||
src/crypto/x509/i2d_pr.c
|
||||
src/crypto/x509/name_print.c
|
||||
src/crypto/x509/policy.c
|
||||
src/crypto/x509/rsa_pss.c
|
||||
src/crypto/x509/t_crl.c
|
||||
src/crypto/x509/t_req.c
|
||||
src/crypto/x509/t_x509.c
|
||||
src/crypto/x509/t_x509a.c
|
||||
src/crypto/x509/v3_akey.c
|
||||
src/crypto/x509/v3_akeya.c
|
||||
src/crypto/x509/v3_alt.c
|
||||
src/crypto/x509/v3_bcons.c
|
||||
src/crypto/x509/v3_bitst.c
|
||||
src/crypto/x509/v3_conf.c
|
||||
src/crypto/x509/v3_cpols.c
|
||||
src/crypto/x509/v3_crld.c
|
||||
src/crypto/x509/v3_enum.c
|
||||
src/crypto/x509/v3_extku.c
|
||||
src/crypto/x509/v3_genn.c
|
||||
src/crypto/x509/v3_ia5.c
|
||||
src/crypto/x509/v3_info.c
|
||||
src/crypto/x509/v3_int.c
|
||||
src/crypto/x509/v3_lib.c
|
||||
src/crypto/x509/v3_ncons.c
|
||||
src/crypto/x509/v3_ocsp.c
|
||||
src/crypto/x509/v3_pcons.c
|
||||
src/crypto/x509/v3_pmaps.c
|
||||
src/crypto/x509/v3_prn.c
|
||||
src/crypto/x509/v3_purp.c
|
||||
src/crypto/x509/v3_skey.c
|
||||
src/crypto/x509/v3_utl.c
|
||||
src/crypto/x509/x509.c
|
||||
src/crypto/x509/x509_att.c
|
||||
src/crypto/x509/x509_cmp.c
|
||||
src/crypto/x509/x509_d2.c
|
||||
src/crypto/x509/x509_def.c
|
||||
src/crypto/x509/x509_ext.c
|
||||
src/crypto/x509/x509_lu.c
|
||||
src/crypto/x509/x509_obj.c
|
||||
src/crypto/x509/x509_req.c
|
||||
src/crypto/x509/x509_set.c
|
||||
src/crypto/x509/x509_trs.c
|
||||
src/crypto/x509/x509_txt.c
|
||||
src/crypto/x509/x509_v3.c
|
||||
src/crypto/x509/x509_vfy.c
|
||||
src/crypto/x509/x509_vpm.c
|
||||
src/crypto/x509/x509cset.c
|
||||
src/crypto/x509/x509name.c
|
||||
src/crypto/x509/x509rset.c
|
||||
src/crypto/x509/x509spki.c
|
||||
src/crypto/x509/x_algor.c
|
||||
src/crypto/x509/x_all.c
|
||||
src/crypto/x509/x_attrib.c
|
||||
src/crypto/x509/x_crl.c
|
||||
src/crypto/x509/x_exten.c
|
||||
src/crypto/x509/x_name.c
|
||||
src/crypto/x509/x_pubkey.c
|
||||
src/crypto/x509/x_req.c
|
||||
src/crypto/x509/x_sig.c
|
||||
src/crypto/x509/x_spki.c
|
||||
src/crypto/x509/x_val.c
|
||||
src/crypto/x509/x_x509.c
|
||||
src/crypto/x509/x_x509a.c
|
||||
)
|
||||
|
||||
target_include_directories(crypto PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/src/include>)
|
||||
|
||||
add_library(
|
||||
ssl
|
||||
|
||||
src/ssl/bio_ssl.cc
|
||||
src/ssl/d1_both.cc
|
||||
src/ssl/d1_lib.cc
|
||||
src/ssl/d1_pkt.cc
|
||||
src/ssl/d1_srtp.cc
|
||||
src/ssl/dtls_method.cc
|
||||
src/ssl/dtls_record.cc
|
||||
src/ssl/encrypted_client_hello.cc
|
||||
src/ssl/extensions.cc
|
||||
src/ssl/handoff.cc
|
||||
src/ssl/handshake.cc
|
||||
src/ssl/handshake_client.cc
|
||||
src/ssl/handshake_server.cc
|
||||
src/ssl/s3_both.cc
|
||||
src/ssl/s3_lib.cc
|
||||
src/ssl/s3_pkt.cc
|
||||
src/ssl/ssl_aead_ctx.cc
|
||||
src/ssl/ssl_asn1.cc
|
||||
src/ssl/ssl_buffer.cc
|
||||
src/ssl/ssl_cert.cc
|
||||
src/ssl/ssl_cipher.cc
|
||||
src/ssl/ssl_file.cc
|
||||
src/ssl/ssl_key_share.cc
|
||||
src/ssl/ssl_lib.cc
|
||||
src/ssl/ssl_privkey.cc
|
||||
src/ssl/ssl_session.cc
|
||||
src/ssl/ssl_stat.cc
|
||||
src/ssl/ssl_transcript.cc
|
||||
src/ssl/ssl_versions.cc
|
||||
src/ssl/ssl_x509.cc
|
||||
src/ssl/t1_enc.cc
|
||||
src/ssl/tls13_both.cc
|
||||
src/ssl/tls13_client.cc
|
||||
src/ssl/tls13_enc.cc
|
||||
src/ssl/tls13_server.cc
|
||||
src/ssl/tls_method.cc
|
||||
src/ssl/tls_record.cc
|
||||
)
|
||||
|
||||
target_link_libraries(ssl crypto)
|
||||
|
||||
add_executable(
|
||||
bssl
|
||||
|
||||
src/tool/args.cc
|
||||
src/tool/ciphers.cc
|
||||
src/tool/client.cc
|
||||
src/tool/const.cc
|
||||
src/tool/digest.cc
|
||||
src/tool/fd.cc
|
||||
src/tool/file.cc
|
||||
src/tool/generate_ech.cc
|
||||
src/tool/generate_ed25519.cc
|
||||
src/tool/genrsa.cc
|
||||
src/tool/pkcs12.cc
|
||||
src/tool/rand.cc
|
||||
src/tool/server.cc
|
||||
src/tool/sign.cc
|
||||
src/tool/speed.cc
|
||||
src/tool/tool.cc
|
||||
src/tool/transport_common.cc
|
||||
)
|
||||
|
||||
target_link_libraries(bssl ssl crypto)
|
||||
|
||||
if(NOT CMAKE_SYSTEM_NAME STREQUAL "Android")
|
||||
find_package(Threads REQUIRED)
|
||||
target_link_libraries(crypto Threads::Threads)
|
||||
endif()
|
||||
|
||||
if(WIN32)
|
||||
target_link_libraries(crypto ws2_32)
|
||||
endif()
|
||||
|
||||
@ -21,6 +21,7 @@ record keeping.)
|
||||
27287199
|
||||
27287880
|
||||
27287883
|
||||
263291445
|
||||
|
||||
OpenSSL License
|
||||
---------------
|
||||
1
third-party/boringssl/WORKSPACE
vendored
Normal file
1
third-party/boringssl/WORKSPACE
vendored
Normal file
@ -0,0 +1 @@
|
||||
workspace(name = "boringssl")
|
||||
@ -1,21 +1,11 @@
|
||||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#if !defined(__has_feature)
|
||||
#define __has_feature(x) 0
|
||||
#endif
|
||||
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
|
||||
#define OPENSSL_NO_ASM
|
||||
#endif
|
||||
#include <openssl/asm_base.h>
|
||||
|
||||
#if !defined(OPENSSL_NO_ASM)
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_AARCH64) && defined(__APPLE__)
|
||||
#include <openssl/arm_arch.h>
|
||||
|
||||
|
||||
|
||||
.section __TEXT,__const
|
||||
|
||||
.align 5
|
||||
@ -28,24 +18,12 @@ Lone:
|
||||
|
||||
.text
|
||||
|
||||
.globl _ChaCha20_ctr32
|
||||
.private_extern _ChaCha20_ctr32
|
||||
.globl _ChaCha20_ctr32_nohw
|
||||
.private_extern _ChaCha20_ctr32_nohw
|
||||
|
||||
.align 5
|
||||
_ChaCha20_ctr32:
|
||||
cbz x2,Labort
|
||||
#if __has_feature(hwaddress_sanitizer) && __clang_major__ >= 10
|
||||
adrp x5,:pg_hi21_nc:_OPENSSL_armcap_P
|
||||
#else
|
||||
adrp x5,_OPENSSL_armcap_P@PAGE
|
||||
#endif
|
||||
cmp x2,#192
|
||||
b.lo Lshort
|
||||
ldr w17,[x5,_OPENSSL_armcap_P@PAGEOFF]
|
||||
tst w17,#ARMV7_NEON
|
||||
b.ne ChaCha20_neon
|
||||
|
||||
Lshort:
|
||||
_ChaCha20_ctr32_nohw:
|
||||
AARCH64_SIGN_LINK_REGISTER
|
||||
stp x29,x30,[sp,#-96]!
|
||||
add x29,sp,#0
|
||||
|
||||
@ -62,7 +40,7 @@ Lshort:
|
||||
ldp x24,x25,[x3] // load key
|
||||
ldp x26,x27,[x3,#16]
|
||||
ldp x28,x30,[x4] // load counter
|
||||
#ifdef __ARMEB__
|
||||
#ifdef __AARCH64EB__
|
||||
ror x24,x24,#32
|
||||
ror x25,x25,#32
|
||||
ror x26,x26,#32
|
||||
@ -223,7 +201,7 @@ Loop:
|
||||
add x20,x20,x21,lsl#32
|
||||
ldp x19,x21,[x1,#48]
|
||||
add x1,x1,#64
|
||||
#ifdef __ARMEB__
|
||||
#ifdef __AARCH64EB__
|
||||
rev x5,x5
|
||||
rev x7,x7
|
||||
rev x9,x9
|
||||
@ -258,7 +236,7 @@ Loop:
|
||||
ldp x25,x26,[x29,#64]
|
||||
ldp x27,x28,[x29,#80]
|
||||
ldp x29,x30,[sp],#96
|
||||
Labort:
|
||||
AARCH64_VALIDATE_LINK_REGISTER
|
||||
ret
|
||||
|
||||
.align 4
|
||||
@ -279,7 +257,7 @@ Less_than_64:
|
||||
add x15,x15,x16,lsl#32
|
||||
add x17,x17,x19,lsl#32
|
||||
add x20,x20,x21,lsl#32
|
||||
#ifdef __ARMEB__
|
||||
#ifdef __AARCH64EB__
|
||||
rev x5,x5
|
||||
rev x7,x7
|
||||
rev x9,x9
|
||||
@ -314,12 +292,16 @@ Loop_tail:
|
||||
ldp x25,x26,[x29,#64]
|
||||
ldp x27,x28,[x29,#80]
|
||||
ldp x29,x30,[sp],#96
|
||||
AARCH64_VALIDATE_LINK_REGISTER
|
||||
ret
|
||||
|
||||
|
||||
.globl _ChaCha20_ctr32_neon
|
||||
.private_extern _ChaCha20_ctr32_neon
|
||||
|
||||
.align 5
|
||||
ChaCha20_neon:
|
||||
_ChaCha20_ctr32_neon:
|
||||
AARCH64_SIGN_LINK_REGISTER
|
||||
stp x29,x30,[sp,#-96]!
|
||||
add x29,sp,#0
|
||||
|
||||
@ -343,7 +325,7 @@ ChaCha20_neon:
|
||||
ldp x28,x30,[x4] // load counter
|
||||
ld1 {v27.4s},[x4]
|
||||
ld1 {v31.4s},[x5]
|
||||
#ifdef __ARMEB__
|
||||
#ifdef __AARCH64EB__
|
||||
rev64 v24.4s,v24.4s
|
||||
ror x24,x24,#32
|
||||
ror x25,x25,#32
|
||||
@ -641,7 +623,7 @@ Loop_neon:
|
||||
add x20,x20,x21,lsl#32
|
||||
ldp x19,x21,[x1,#48]
|
||||
add x1,x1,#64
|
||||
#ifdef __ARMEB__
|
||||
#ifdef __AARCH64EB__
|
||||
rev x5,x5
|
||||
rev x7,x7
|
||||
rev x9,x9
|
||||
@ -700,6 +682,7 @@ Loop_neon:
|
||||
ldp x25,x26,[x29,#64]
|
||||
ldp x27,x28,[x29,#80]
|
||||
ldp x29,x30,[sp],#96
|
||||
AARCH64_VALIDATE_LINK_REGISTER
|
||||
ret
|
||||
|
||||
Ltail_neon:
|
||||
@ -720,7 +703,7 @@ Ltail_neon:
|
||||
add x20,x20,x21,lsl#32
|
||||
ldp x19,x21,[x1,#48]
|
||||
add x1,x1,#64
|
||||
#ifdef __ARMEB__
|
||||
#ifdef __AARCH64EB__
|
||||
rev x5,x5
|
||||
rev x7,x7
|
||||
rev x9,x9
|
||||
@ -809,11 +792,13 @@ Ldone_neon:
|
||||
ldp x25,x26,[x29,#64]
|
||||
ldp x27,x28,[x29,#80]
|
||||
ldp x29,x30,[sp],#96
|
||||
AARCH64_VALIDATE_LINK_REGISTER
|
||||
ret
|
||||
|
||||
|
||||
.align 5
|
||||
ChaCha20_512_neon:
|
||||
AARCH64_SIGN_LINK_REGISTER
|
||||
stp x29,x30,[sp,#-96]!
|
||||
add x29,sp,#0
|
||||
|
||||
@ -836,7 +821,7 @@ L512_or_more_neon:
|
||||
ldp x28,x30,[x4] // load counter
|
||||
ld1 {v27.4s},[x4]
|
||||
ld1 {v31.4s},[x5]
|
||||
#ifdef __ARMEB__
|
||||
#ifdef __AARCH64EB__
|
||||
rev64 v24.4s,v24.4s
|
||||
ror x24,x24,#32
|
||||
ror x25,x25,#32
|
||||
@ -1349,7 +1334,7 @@ Loop_upper_neon:
|
||||
add x20,x20,x21,lsl#32
|
||||
ldp x19,x21,[x1,#48]
|
||||
add x1,x1,#64
|
||||
#ifdef __ARMEB__
|
||||
#ifdef __AARCH64EB__
|
||||
rev x5,x5
|
||||
rev x7,x7
|
||||
rev x9,x9
|
||||
@ -1863,7 +1848,7 @@ Loop_lower_neon:
|
||||
add x1,x1,#64
|
||||
add v21.4s,v21.4s,v25.4s
|
||||
|
||||
#ifdef __ARMEB__
|
||||
#ifdef __AARCH64EB__
|
||||
rev x5,x5
|
||||
rev x7,x7
|
||||
rev x9,x9
|
||||
@ -1977,6 +1962,7 @@ Ldone_512_neon:
|
||||
ldp x25,x26,[x29,#64]
|
||||
ldp x27,x28,[x29,#80]
|
||||
ldp x29,x30,[sp],#96
|
||||
AARCH64_VALIDATE_LINK_REGISTER
|
||||
ret
|
||||
|
||||
#endif // !OPENSSL_NO_ASM
|
||||
#endif // !OPENSSL_NO_ASM && defined(OPENSSL_AARCH64) && defined(__APPLE__)
|
||||
3009
third-party/boringssl/apple-aarch64/crypto/cipher_extra/chacha20_poly1305_armv8-apple.S
vendored
Normal file
3009
third-party/boringssl/apple-aarch64/crypto/cipher_extra/chacha20_poly1305_armv8-apple.S
vendored
Normal file
File diff suppressed because it is too large
Load Diff
@ -1,17 +1,9 @@
|
||||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#if !defined(__has_feature)
|
||||
#define __has_feature(x) 0
|
||||
#endif
|
||||
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
|
||||
#define OPENSSL_NO_ASM
|
||||
#endif
|
||||
#include <openssl/asm_base.h>
|
||||
|
||||
#if !defined(OPENSSL_NO_ASM)
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_AARCH64) && defined(__APPLE__)
|
||||
#include <openssl/arm_arch.h>
|
||||
|
||||
#if __ARM_MAX_ARCH__>=7
|
||||
@ -32,6 +24,8 @@ Lrcon:
|
||||
.align 5
|
||||
_aes_hw_set_encrypt_key:
|
||||
Lenc_key:
|
||||
// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
stp x29,x30,[sp,#-16]!
|
||||
add x29,sp,#0
|
||||
mov x3,#-1
|
||||
@ -200,6 +194,7 @@ Lenc_key_abort:
|
||||
|
||||
.align 5
|
||||
_aes_hw_set_decrypt_key:
|
||||
AARCH64_SIGN_LINK_REGISTER
|
||||
stp x29,x30,[sp,#-16]!
|
||||
add x29,sp,#0
|
||||
bl Lenc_key
|
||||
@ -233,6 +228,7 @@ Loop_imc:
|
||||
eor x0,x0,x0 // return value
|
||||
Ldec_key_abort:
|
||||
ldp x29,x30,[sp],#16
|
||||
AARCH64_VALIDATE_LINK_REGISTER
|
||||
ret
|
||||
|
||||
.globl _aes_hw_encrypt
|
||||
@ -240,6 +236,7 @@ Ldec_key_abort:
|
||||
|
||||
.align 5
|
||||
_aes_hw_encrypt:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
ldr w3,[x2,#240]
|
||||
ld1 {v0.4s},[x2],#16
|
||||
ld1 {v2.16b},[x0]
|
||||
@ -270,6 +267,7 @@ Loop_enc:
|
||||
|
||||
.align 5
|
||||
_aes_hw_decrypt:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
ldr w3,[x2,#240]
|
||||
ld1 {v0.4s},[x2],#16
|
||||
ld1 {v2.16b},[x0]
|
||||
@ -300,6 +298,8 @@ Loop_dec:
|
||||
|
||||
.align 5
|
||||
_aes_hw_cbc_encrypt:
|
||||
// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
stp x29,x30,[sp,#-16]!
|
||||
add x29,sp,#0
|
||||
subs x2,x2,#16
|
||||
@ -591,6 +591,8 @@ Lcbc_abort:
|
||||
|
||||
.align 5
|
||||
_aes_hw_ctr32_encrypt_blocks:
|
||||
// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
stp x29,x30,[sp,#-16]!
|
||||
add x29,sp,#0
|
||||
ldr w5,[x3,#240]
|
||||
@ -610,20 +612,34 @@ _aes_hw_ctr32_encrypt_blocks:
|
||||
add x7,x3,#32
|
||||
mov w6,w5
|
||||
csel x12,xzr,x12,lo
|
||||
#ifndef __ARMEB__
|
||||
|
||||
// ARM Cortex-A57 and Cortex-A72 cores running in 32-bit mode are
|
||||
// affected by silicon errata #1742098 [0] and #1655431 [1],
|
||||
// respectively, where the second instruction of an aese/aesmc
|
||||
// instruction pair may execute twice if an interrupt is taken right
|
||||
// after the first instruction consumes an input register of which a
|
||||
// single 32-bit lane has been updated the last time it was modified.
|
||||
//
|
||||
// This function uses a counter in one 32-bit lane. The vmov lines
|
||||
// could write to v1.16b and v18.16b directly, but that trips this bugs.
|
||||
// We write to v6.16b and copy to the final register as a workaround.
|
||||
//
|
||||
// [0] ARM-EPM-049219 v23 Cortex-A57 MPCore Software Developers Errata Notice
|
||||
// [1] ARM-EPM-012079 v11.0 Cortex-A72 MPCore Software Developers Errata Notice
|
||||
#ifndef __AARCH64EB__
|
||||
rev w8, w8
|
||||
#endif
|
||||
orr v1.16b,v0.16b,v0.16b
|
||||
add w10, w8, #1
|
||||
orr v18.16b,v0.16b,v0.16b
|
||||
add w8, w8, #2
|
||||
orr v6.16b,v0.16b,v0.16b
|
||||
rev w10, w10
|
||||
mov v1.s[3],w10
|
||||
mov v6.s[3],w10
|
||||
add w8, w8, #2
|
||||
orr v1.16b,v6.16b,v6.16b
|
||||
b.ls Lctr32_tail
|
||||
rev w12, w8
|
||||
mov v6.s[3],w12
|
||||
sub x2,x2,#3 // bias
|
||||
mov v18.s[3],w12
|
||||
orr v18.16b,v6.16b,v6.16b
|
||||
b Loop3x_ctr32
|
||||
|
||||
.align 4
|
||||
@ -650,11 +666,11 @@ Loop3x_ctr32:
|
||||
aese v1.16b,v16.16b
|
||||
aesmc v5.16b,v1.16b
|
||||
ld1 {v2.16b},[x0],#16
|
||||
orr v0.16b,v6.16b,v6.16b
|
||||
add w9,w8,#1
|
||||
aese v18.16b,v16.16b
|
||||
aesmc v18.16b,v18.16b
|
||||
ld1 {v3.16b},[x0],#16
|
||||
orr v1.16b,v6.16b,v6.16b
|
||||
rev w9,w9
|
||||
aese v4.16b,v17.16b
|
||||
aesmc v4.16b,v4.16b
|
||||
aese v5.16b,v17.16b
|
||||
@ -663,8 +679,6 @@ Loop3x_ctr32:
|
||||
mov x7,x3
|
||||
aese v18.16b,v17.16b
|
||||
aesmc v17.16b,v18.16b
|
||||
orr v18.16b,v6.16b,v6.16b
|
||||
add w9,w8,#1
|
||||
aese v4.16b,v20.16b
|
||||
aesmc v4.16b,v4.16b
|
||||
aese v5.16b,v20.16b
|
||||
@ -679,21 +693,26 @@ Loop3x_ctr32:
|
||||
aesmc v4.16b,v4.16b
|
||||
aese v5.16b,v21.16b
|
||||
aesmc v5.16b,v5.16b
|
||||
// Note the logic to update v0.16b, v1.16b, and v1.16b is written to work
|
||||
// around a bug in ARM Cortex-A57 and Cortex-A72 cores running in
|
||||
// 32-bit mode. See the comment above.
|
||||
eor v19.16b,v19.16b,v7.16b
|
||||
rev w9,w9
|
||||
mov v6.s[3], w9
|
||||
aese v17.16b,v21.16b
|
||||
aesmc v17.16b,v17.16b
|
||||
mov v0.s[3], w9
|
||||
orr v0.16b,v6.16b,v6.16b
|
||||
rev w10,w10
|
||||
aese v4.16b,v22.16b
|
||||
aesmc v4.16b,v4.16b
|
||||
mov v6.s[3], w10
|
||||
rev w12,w8
|
||||
aese v5.16b,v22.16b
|
||||
aesmc v5.16b,v5.16b
|
||||
mov v1.s[3], w10
|
||||
rev w12,w8
|
||||
orr v1.16b,v6.16b,v6.16b
|
||||
mov v6.s[3], w12
|
||||
aese v17.16b,v22.16b
|
||||
aesmc v17.16b,v17.16b
|
||||
mov v18.s[3], w12
|
||||
orr v18.16b,v6.16b,v6.16b
|
||||
subs x2,x2,#3
|
||||
aese v4.16b,v23.16b
|
||||
aese v5.16b,v23.16b
|
||||
@ -769,4 +788,4 @@ Lctr32_done:
|
||||
ret
|
||||
|
||||
#endif
|
||||
#endif // !OPENSSL_NO_ASM
|
||||
#endif // !OPENSSL_NO_ASM && defined(OPENSSL_AARCH64) && defined(__APPLE__)
|
||||
1555
third-party/boringssl/apple-aarch64/crypto/fipsmodule/aesv8-gcm-armv8-apple.S
vendored
Normal file
1555
third-party/boringssl/apple-aarch64/crypto/fipsmodule/aesv8-gcm-armv8-apple.S
vendored
Normal file
File diff suppressed because it is too large
Load Diff
@ -1,17 +1,11 @@
|
||||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#if !defined(__has_feature)
|
||||
#define __has_feature(x) 0
|
||||
#endif
|
||||
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
|
||||
#define OPENSSL_NO_ASM
|
||||
#endif
|
||||
#include <openssl/asm_base.h>
|
||||
|
||||
#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_AARCH64) && defined(__APPLE__)
|
||||
#include <openssl/arm_arch.h>
|
||||
|
||||
#if !defined(OPENSSL_NO_ASM)
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
.text
|
||||
|
||||
.globl _bn_mul_mont
|
||||
@ -19,6 +13,7 @@
|
||||
|
||||
.align 5
|
||||
_bn_mul_mont:
|
||||
AARCH64_SIGN_LINK_REGISTER
|
||||
tst x5,#7
|
||||
b.eq __bn_sqr8x_mont
|
||||
tst x5,#3
|
||||
@ -216,11 +211,14 @@ Lcond_copy:
|
||||
mov x0,#1
|
||||
ldp x23,x24,[x29,#48]
|
||||
ldr x29,[sp],#64
|
||||
AARCH64_VALIDATE_LINK_REGISTER
|
||||
ret
|
||||
|
||||
|
||||
.align 5
|
||||
__bn_sqr8x_mont:
|
||||
// Not adding AARCH64_SIGN_LINK_REGISTER here because __bn_sqr8x_mont is jumped to
|
||||
// only from bn_mul_mont which has already signed the return address.
|
||||
cmp x1,x2
|
||||
b.ne __bn_mul4x_mont
|
||||
Lsqr8x_mont:
|
||||
@ -974,11 +972,16 @@ Lsqr8x_done:
|
||||
ldp x25,x26,[x29,#64]
|
||||
ldp x27,x28,[x29,#80]
|
||||
ldr x29,[sp],#128
|
||||
// x30 is popped earlier
|
||||
AARCH64_VALIDATE_LINK_REGISTER
|
||||
ret
|
||||
|
||||
|
||||
.align 5
|
||||
__bn_mul4x_mont:
|
||||
// Not adding AARCH64_SIGN_LINK_REGISTER here because __bn_mul4x_mont is jumped to
|
||||
// only from bn_mul_mont or __bn_mul8x_mont which have already signed the
|
||||
// return address.
|
||||
stp x29,x30,[sp,#-128]!
|
||||
add x29,sp,#0
|
||||
stp x19,x20,[sp,#16]
|
||||
@ -1412,9 +1415,11 @@ Lmul4x_done:
|
||||
ldp x25,x26,[x29,#64]
|
||||
ldp x27,x28,[x29,#80]
|
||||
ldr x29,[sp],#128
|
||||
// x30 is popped earlier
|
||||
AARCH64_VALIDATE_LINK_REGISTER
|
||||
ret
|
||||
|
||||
.byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
|
||||
.align 2
|
||||
.align 4
|
||||
#endif // !OPENSSL_NO_ASM
|
||||
#endif // !OPENSSL_NO_ASM && defined(OPENSSL_AARCH64) && defined(__APPLE__)
|
||||
89
third-party/boringssl/apple-aarch64/crypto/fipsmodule/bn-armv8-apple.S
vendored
Normal file
89
third-party/boringssl/apple-aarch64/crypto/fipsmodule/bn-armv8-apple.S
vendored
Normal file
@ -0,0 +1,89 @@
|
||||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#include <openssl/asm_base.h>
|
||||
|
||||
#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_AARCH64) && defined(__APPLE__)
|
||||
#include <openssl/arm_arch.h>
|
||||
|
||||
.text
|
||||
|
||||
// BN_ULONG bn_add_words(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
|
||||
// size_t num);
|
||||
|
||||
.globl _bn_add_words
|
||||
.private_extern _bn_add_words
|
||||
.align 4
|
||||
_bn_add_words:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
# Clear the carry flag.
|
||||
cmn xzr, xzr
|
||||
|
||||
# aarch64 can load two registers at a time, so we do two loop iterations at
|
||||
# at a time. Split x3 = 2 * x8 + x3. This allows loop
|
||||
# operations to use CBNZ without clobbering the carry flag.
|
||||
lsr x8, x3, #1
|
||||
and x3, x3, #1
|
||||
|
||||
cbz x8, Ladd_tail
|
||||
Ladd_loop:
|
||||
ldp x4, x5, [x1], #16
|
||||
ldp x6, x7, [x2], #16
|
||||
sub x8, x8, #1
|
||||
adcs x4, x4, x6
|
||||
adcs x5, x5, x7
|
||||
stp x4, x5, [x0], #16
|
||||
cbnz x8, Ladd_loop
|
||||
|
||||
Ladd_tail:
|
||||
cbz x3, Ladd_exit
|
||||
ldr x4, [x1], #8
|
||||
ldr x6, [x2], #8
|
||||
adcs x4, x4, x6
|
||||
str x4, [x0], #8
|
||||
|
||||
Ladd_exit:
|
||||
cset x0, cs
|
||||
ret
|
||||
|
||||
|
||||
// BN_ULONG bn_sub_words(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
|
||||
// size_t num);
|
||||
|
||||
.globl _bn_sub_words
|
||||
.private_extern _bn_sub_words
|
||||
.align 4
|
||||
_bn_sub_words:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
# Set the carry flag. Arm's borrow bit is flipped from the carry flag,
|
||||
# so we want C = 1 here.
|
||||
cmp xzr, xzr
|
||||
|
||||
# aarch64 can load two registers at a time, so we do two loop iterations at
|
||||
# at a time. Split x3 = 2 * x8 + x3. This allows loop
|
||||
# operations to use CBNZ without clobbering the carry flag.
|
||||
lsr x8, x3, #1
|
||||
and x3, x3, #1
|
||||
|
||||
cbz x8, Lsub_tail
|
||||
Lsub_loop:
|
||||
ldp x4, x5, [x1], #16
|
||||
ldp x6, x7, [x2], #16
|
||||
sub x8, x8, #1
|
||||
sbcs x4, x4, x6
|
||||
sbcs x5, x5, x7
|
||||
stp x4, x5, [x0], #16
|
||||
cbnz x8, Lsub_loop
|
||||
|
||||
Lsub_tail:
|
||||
cbz x3, Lsub_exit
|
||||
ldr x4, [x1], #8
|
||||
ldr x6, [x2], #8
|
||||
sbcs x4, x4, x6
|
||||
str x4, [x0], #8
|
||||
|
||||
Lsub_exit:
|
||||
cset x0, cc
|
||||
ret
|
||||
|
||||
#endif // !OPENSSL_NO_ASM && defined(OPENSSL_AARCH64) && defined(__APPLE__)
|
||||
@ -1,17 +1,11 @@
|
||||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#if !defined(__has_feature)
|
||||
#define __has_feature(x) 0
|
||||
#endif
|
||||
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
|
||||
#define OPENSSL_NO_ASM
|
||||
#endif
|
||||
#include <openssl/asm_base.h>
|
||||
|
||||
#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_AARCH64) && defined(__APPLE__)
|
||||
#include <openssl/arm_arch.h>
|
||||
|
||||
#if !defined(OPENSSL_NO_ASM)
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
.text
|
||||
|
||||
.globl _gcm_init_neon
|
||||
@ -19,6 +13,7 @@
|
||||
|
||||
.align 4
|
||||
_gcm_init_neon:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
// This function is adapted from gcm_init_v8. xC2 is t3.
|
||||
ld1 {v17.2d}, [x1] // load H
|
||||
movi v19.16b, #0xe1
|
||||
@ -44,6 +39,7 @@ _gcm_init_neon:
|
||||
|
||||
.align 4
|
||||
_gcm_gmult_neon:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
ld1 {v3.16b}, [x0] // load Xi
|
||||
ld1 {v5.1d}, [x1], #8 // load twisted H
|
||||
ld1 {v6.1d}, [x1]
|
||||
@ -63,6 +59,7 @@ _gcm_gmult_neon:
|
||||
|
||||
.align 4
|
||||
_gcm_ghash_neon:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
ld1 {v0.16b}, [x0] // load Xi
|
||||
ld1 {v5.1d}, [x1], #8 // load twisted H
|
||||
ld1 {v6.1d}, [x1]
|
||||
@ -335,4 +332,4 @@ Lmasks:
|
||||
.byte 71,72,65,83,72,32,102,111,114,32,65,82,77,118,56,44,32,100,101,114,105,118,101,100,32,102,114,111,109,32,65,82,77,118,52,32,118,101,114,115,105,111,110,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
|
||||
.align 2
|
||||
.align 2
|
||||
#endif // !OPENSSL_NO_ASM
|
||||
#endif // !OPENSSL_NO_ASM && defined(OPENSSL_AARCH64) && defined(__APPLE__)
|
||||
565
third-party/boringssl/apple-aarch64/crypto/fipsmodule/ghashv8-armv8-apple.S
vendored
Normal file
565
third-party/boringssl/apple-aarch64/crypto/fipsmodule/ghashv8-armv8-apple.S
vendored
Normal file
@ -0,0 +1,565 @@
|
||||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#include <openssl/asm_base.h>
|
||||
|
||||
#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_AARCH64) && defined(__APPLE__)
|
||||
#include <openssl/arm_arch.h>
|
||||
|
||||
#if __ARM_MAX_ARCH__>=7
|
||||
.text
|
||||
|
||||
.globl _gcm_init_v8
|
||||
.private_extern _gcm_init_v8
|
||||
|
||||
.align 4
|
||||
_gcm_init_v8:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
ld1 {v17.2d},[x1] //load input H
|
||||
movi v19.16b,#0xe1
|
||||
shl v19.2d,v19.2d,#57 //0xc2.0
|
||||
ext v3.16b,v17.16b,v17.16b,#8
|
||||
ushr v18.2d,v19.2d,#63
|
||||
dup v17.4s,v17.s[1]
|
||||
ext v16.16b,v18.16b,v19.16b,#8 //t0=0xc2....01
|
||||
ushr v18.2d,v3.2d,#63
|
||||
sshr v17.4s,v17.4s,#31 //broadcast carry bit
|
||||
and v18.16b,v18.16b,v16.16b
|
||||
shl v3.2d,v3.2d,#1
|
||||
ext v18.16b,v18.16b,v18.16b,#8
|
||||
and v16.16b,v16.16b,v17.16b
|
||||
orr v3.16b,v3.16b,v18.16b //H<<<=1
|
||||
eor v20.16b,v3.16b,v16.16b //twisted H
|
||||
st1 {v20.2d},[x0],#16 //store Htable[0]
|
||||
|
||||
//calculate H^2
|
||||
ext v16.16b,v20.16b,v20.16b,#8 //Karatsuba pre-processing
|
||||
pmull v0.1q,v20.1d,v20.1d
|
||||
eor v16.16b,v16.16b,v20.16b
|
||||
pmull2 v2.1q,v20.2d,v20.2d
|
||||
pmull v1.1q,v16.1d,v16.1d
|
||||
|
||||
ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing
|
||||
eor v18.16b,v0.16b,v2.16b
|
||||
eor v1.16b,v1.16b,v17.16b
|
||||
eor v1.16b,v1.16b,v18.16b
|
||||
pmull v18.1q,v0.1d,v19.1d //1st phase
|
||||
|
||||
ins v2.d[0],v1.d[1]
|
||||
ins v1.d[1],v0.d[0]
|
||||
eor v0.16b,v1.16b,v18.16b
|
||||
|
||||
ext v18.16b,v0.16b,v0.16b,#8 //2nd phase
|
||||
pmull v0.1q,v0.1d,v19.1d
|
||||
eor v18.16b,v18.16b,v2.16b
|
||||
eor v22.16b,v0.16b,v18.16b
|
||||
|
||||
ext v17.16b,v22.16b,v22.16b,#8 //Karatsuba pre-processing
|
||||
eor v17.16b,v17.16b,v22.16b
|
||||
ext v21.16b,v16.16b,v17.16b,#8 //pack Karatsuba pre-processed
|
||||
st1 {v21.2d,v22.2d},[x0],#32 //store Htable[1..2]
|
||||
//calculate H^3 and H^4
|
||||
pmull v0.1q,v20.1d, v22.1d
|
||||
pmull v5.1q,v22.1d,v22.1d
|
||||
pmull2 v2.1q,v20.2d, v22.2d
|
||||
pmull2 v7.1q,v22.2d,v22.2d
|
||||
pmull v1.1q,v16.1d,v17.1d
|
||||
pmull v6.1q,v17.1d,v17.1d
|
||||
|
||||
ext v16.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing
|
||||
ext v17.16b,v5.16b,v7.16b,#8
|
||||
eor v18.16b,v0.16b,v2.16b
|
||||
eor v1.16b,v1.16b,v16.16b
|
||||
eor v4.16b,v5.16b,v7.16b
|
||||
eor v6.16b,v6.16b,v17.16b
|
||||
eor v1.16b,v1.16b,v18.16b
|
||||
pmull v18.1q,v0.1d,v19.1d //1st phase
|
||||
eor v6.16b,v6.16b,v4.16b
|
||||
pmull v4.1q,v5.1d,v19.1d
|
||||
|
||||
ins v2.d[0],v1.d[1]
|
||||
ins v7.d[0],v6.d[1]
|
||||
ins v1.d[1],v0.d[0]
|
||||
ins v6.d[1],v5.d[0]
|
||||
eor v0.16b,v1.16b,v18.16b
|
||||
eor v5.16b,v6.16b,v4.16b
|
||||
|
||||
ext v18.16b,v0.16b,v0.16b,#8 //2nd phase
|
||||
ext v4.16b,v5.16b,v5.16b,#8
|
||||
pmull v0.1q,v0.1d,v19.1d
|
||||
pmull v5.1q,v5.1d,v19.1d
|
||||
eor v18.16b,v18.16b,v2.16b
|
||||
eor v4.16b,v4.16b,v7.16b
|
||||
eor v20.16b, v0.16b,v18.16b //H^3
|
||||
eor v22.16b,v5.16b,v4.16b //H^4
|
||||
|
||||
ext v16.16b,v20.16b, v20.16b,#8 //Karatsuba pre-processing
|
||||
ext v17.16b,v22.16b,v22.16b,#8
|
||||
eor v16.16b,v16.16b,v20.16b
|
||||
eor v17.16b,v17.16b,v22.16b
|
||||
ext v21.16b,v16.16b,v17.16b,#8 //pack Karatsuba pre-processed
|
||||
st1 {v20.2d,v21.2d,v22.2d},[x0] //store Htable[3..5]
|
||||
ret
|
||||
|
||||
.globl _gcm_gmult_v8
|
||||
.private_extern _gcm_gmult_v8
|
||||
|
||||
.align 4
|
||||
_gcm_gmult_v8:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
ld1 {v17.2d},[x0] //load Xi
|
||||
movi v19.16b,#0xe1
|
||||
ld1 {v20.2d,v21.2d},[x1] //load twisted H, ...
|
||||
shl v19.2d,v19.2d,#57
|
||||
#ifndef __AARCH64EB__
|
||||
rev64 v17.16b,v17.16b
|
||||
#endif
|
||||
ext v3.16b,v17.16b,v17.16b,#8
|
||||
|
||||
pmull v0.1q,v20.1d,v3.1d //H.lo·Xi.lo
|
||||
eor v17.16b,v17.16b,v3.16b //Karatsuba pre-processing
|
||||
pmull2 v2.1q,v20.2d,v3.2d //H.hi·Xi.hi
|
||||
pmull v1.1q,v21.1d,v17.1d //(H.lo+H.hi)·(Xi.lo+Xi.hi)
|
||||
|
||||
ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing
|
||||
eor v18.16b,v0.16b,v2.16b
|
||||
eor v1.16b,v1.16b,v17.16b
|
||||
eor v1.16b,v1.16b,v18.16b
|
||||
pmull v18.1q,v0.1d,v19.1d //1st phase of reduction
|
||||
|
||||
ins v2.d[0],v1.d[1]
|
||||
ins v1.d[1],v0.d[0]
|
||||
eor v0.16b,v1.16b,v18.16b
|
||||
|
||||
ext v18.16b,v0.16b,v0.16b,#8 //2nd phase of reduction
|
||||
pmull v0.1q,v0.1d,v19.1d
|
||||
eor v18.16b,v18.16b,v2.16b
|
||||
eor v0.16b,v0.16b,v18.16b
|
||||
|
||||
#ifndef __AARCH64EB__
|
||||
rev64 v0.16b,v0.16b
|
||||
#endif
|
||||
ext v0.16b,v0.16b,v0.16b,#8
|
||||
st1 {v0.2d},[x0] //write out Xi
|
||||
|
||||
ret
|
||||
|
||||
.globl _gcm_ghash_v8
|
||||
.private_extern _gcm_ghash_v8
|
||||
|
||||
.align 4
|
||||
_gcm_ghash_v8:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
cmp x3,#64
|
||||
b.hs Lgcm_ghash_v8_4x
|
||||
ld1 {v0.2d},[x0] //load [rotated] Xi
|
||||
//"[rotated]" means that
|
||||
//loaded value would have
|
||||
//to be rotated in order to
|
||||
//make it appear as in
|
||||
//algorithm specification
|
||||
subs x3,x3,#32 //see if x3 is 32 or larger
|
||||
mov x12,#16 //x12 is used as post-
|
||||
//increment for input pointer;
|
||||
//as loop is modulo-scheduled
|
||||
//x12 is zeroed just in time
|
||||
//to preclude overstepping
|
||||
//inp[len], which means that
|
||||
//last block[s] are actually
|
||||
//loaded twice, but last
|
||||
//copy is not processed
|
||||
ld1 {v20.2d,v21.2d},[x1],#32 //load twisted H, ..., H^2
|
||||
movi v19.16b,#0xe1
|
||||
ld1 {v22.2d},[x1]
|
||||
csel x12,xzr,x12,eq //is it time to zero x12?
|
||||
ext v0.16b,v0.16b,v0.16b,#8 //rotate Xi
|
||||
ld1 {v16.2d},[x2],#16 //load [rotated] I[0]
|
||||
shl v19.2d,v19.2d,#57 //compose 0xc2.0 constant
|
||||
#ifndef __AARCH64EB__
|
||||
rev64 v16.16b,v16.16b
|
||||
rev64 v0.16b,v0.16b
|
||||
#endif
|
||||
ext v3.16b,v16.16b,v16.16b,#8 //rotate I[0]
|
||||
b.lo Lodd_tail_v8 //x3 was less than 32
|
||||
ld1 {v17.2d},[x2],x12 //load [rotated] I[1]
|
||||
#ifndef __AARCH64EB__
|
||||
rev64 v17.16b,v17.16b
|
||||
#endif
|
||||
ext v7.16b,v17.16b,v17.16b,#8
|
||||
eor v3.16b,v3.16b,v0.16b //I[i]^=Xi
|
||||
pmull v4.1q,v20.1d,v7.1d //H·Ii+1
|
||||
eor v17.16b,v17.16b,v7.16b //Karatsuba pre-processing
|
||||
pmull2 v6.1q,v20.2d,v7.2d
|
||||
b Loop_mod2x_v8
|
||||
|
||||
.align 4
|
||||
Loop_mod2x_v8:
|
||||
ext v18.16b,v3.16b,v3.16b,#8
|
||||
subs x3,x3,#32 //is there more data?
|
||||
pmull v0.1q,v22.1d,v3.1d //H^2.lo·Xi.lo
|
||||
csel x12,xzr,x12,lo //is it time to zero x12?
|
||||
|
||||
pmull v5.1q,v21.1d,v17.1d
|
||||
eor v18.16b,v18.16b,v3.16b //Karatsuba pre-processing
|
||||
pmull2 v2.1q,v22.2d,v3.2d //H^2.hi·Xi.hi
|
||||
eor v0.16b,v0.16b,v4.16b //accumulate
|
||||
pmull2 v1.1q,v21.2d,v18.2d //(H^2.lo+H^2.hi)·(Xi.lo+Xi.hi)
|
||||
ld1 {v16.2d},[x2],x12 //load [rotated] I[i+2]
|
||||
|
||||
eor v2.16b,v2.16b,v6.16b
|
||||
csel x12,xzr,x12,eq //is it time to zero x12?
|
||||
eor v1.16b,v1.16b,v5.16b
|
||||
|
||||
ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing
|
||||
eor v18.16b,v0.16b,v2.16b
|
||||
eor v1.16b,v1.16b,v17.16b
|
||||
ld1 {v17.2d},[x2],x12 //load [rotated] I[i+3]
|
||||
#ifndef __AARCH64EB__
|
||||
rev64 v16.16b,v16.16b
|
||||
#endif
|
||||
eor v1.16b,v1.16b,v18.16b
|
||||
pmull v18.1q,v0.1d,v19.1d //1st phase of reduction
|
||||
|
||||
#ifndef __AARCH64EB__
|
||||
rev64 v17.16b,v17.16b
|
||||
#endif
|
||||
ins v2.d[0],v1.d[1]
|
||||
ins v1.d[1],v0.d[0]
|
||||
ext v7.16b,v17.16b,v17.16b,#8
|
||||
ext v3.16b,v16.16b,v16.16b,#8
|
||||
eor v0.16b,v1.16b,v18.16b
|
||||
pmull v4.1q,v20.1d,v7.1d //H·Ii+1
|
||||
eor v3.16b,v3.16b,v2.16b //accumulate v3.16b early
|
||||
|
||||
ext v18.16b,v0.16b,v0.16b,#8 //2nd phase of reduction
|
||||
pmull v0.1q,v0.1d,v19.1d
|
||||
eor v3.16b,v3.16b,v18.16b
|
||||
eor v17.16b,v17.16b,v7.16b //Karatsuba pre-processing
|
||||
eor v3.16b,v3.16b,v0.16b
|
||||
pmull2 v6.1q,v20.2d,v7.2d
|
||||
b.hs Loop_mod2x_v8 //there was at least 32 more bytes
|
||||
|
||||
eor v2.16b,v2.16b,v18.16b
|
||||
ext v3.16b,v16.16b,v16.16b,#8 //re-construct v3.16b
|
||||
adds x3,x3,#32 //re-construct x3
|
||||
eor v0.16b,v0.16b,v2.16b //re-construct v0.16b
|
||||
b.eq Ldone_v8 //is x3 zero?
|
||||
Lodd_tail_v8:
|
||||
ext v18.16b,v0.16b,v0.16b,#8
|
||||
eor v3.16b,v3.16b,v0.16b //inp^=Xi
|
||||
eor v17.16b,v16.16b,v18.16b //v17.16b is rotated inp^Xi
|
||||
|
||||
pmull v0.1q,v20.1d,v3.1d //H.lo·Xi.lo
|
||||
eor v17.16b,v17.16b,v3.16b //Karatsuba pre-processing
|
||||
pmull2 v2.1q,v20.2d,v3.2d //H.hi·Xi.hi
|
||||
pmull v1.1q,v21.1d,v17.1d //(H.lo+H.hi)·(Xi.lo+Xi.hi)
|
||||
|
||||
ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing
|
||||
eor v18.16b,v0.16b,v2.16b
|
||||
eor v1.16b,v1.16b,v17.16b
|
||||
eor v1.16b,v1.16b,v18.16b
|
||||
pmull v18.1q,v0.1d,v19.1d //1st phase of reduction
|
||||
|
||||
ins v2.d[0],v1.d[1]
|
||||
ins v1.d[1],v0.d[0]
|
||||
eor v0.16b,v1.16b,v18.16b
|
||||
|
||||
ext v18.16b,v0.16b,v0.16b,#8 //2nd phase of reduction
|
||||
pmull v0.1q,v0.1d,v19.1d
|
||||
eor v18.16b,v18.16b,v2.16b
|
||||
eor v0.16b,v0.16b,v18.16b
|
||||
|
||||
Ldone_v8:
|
||||
#ifndef __AARCH64EB__
|
||||
rev64 v0.16b,v0.16b
|
||||
#endif
|
||||
ext v0.16b,v0.16b,v0.16b,#8
|
||||
st1 {v0.2d},[x0] //write out Xi
|
||||
|
||||
ret
|
||||
|
||||
|
||||
.align 4
|
||||
gcm_ghash_v8_4x:
|
||||
Lgcm_ghash_v8_4x:
|
||||
ld1 {v0.2d},[x0] //load [rotated] Xi
|
||||
ld1 {v20.2d,v21.2d,v22.2d},[x1],#48 //load twisted H, ..., H^2
|
||||
movi v19.16b,#0xe1
|
||||
ld1 {v26.2d,v27.2d,v28.2d},[x1] //load twisted H^3, ..., H^4
|
||||
shl v19.2d,v19.2d,#57 //compose 0xc2.0 constant
|
||||
|
||||
ld1 {v4.2d,v5.2d,v6.2d,v7.2d},[x2],#64
|
||||
#ifndef __AARCH64EB__
|
||||
rev64 v0.16b,v0.16b
|
||||
rev64 v5.16b,v5.16b
|
||||
rev64 v6.16b,v6.16b
|
||||
rev64 v7.16b,v7.16b
|
||||
rev64 v4.16b,v4.16b
|
||||
#endif
|
||||
ext v25.16b,v7.16b,v7.16b,#8
|
||||
ext v24.16b,v6.16b,v6.16b,#8
|
||||
ext v23.16b,v5.16b,v5.16b,#8
|
||||
|
||||
pmull v29.1q,v20.1d,v25.1d //H·Ii+3
|
||||
eor v7.16b,v7.16b,v25.16b
|
||||
pmull2 v31.1q,v20.2d,v25.2d
|
||||
pmull v30.1q,v21.1d,v7.1d
|
||||
|
||||
pmull v16.1q,v22.1d,v24.1d //H^2·Ii+2
|
||||
eor v6.16b,v6.16b,v24.16b
|
||||
pmull2 v24.1q,v22.2d,v24.2d
|
||||
pmull2 v6.1q,v21.2d,v6.2d
|
||||
|
||||
eor v29.16b,v29.16b,v16.16b
|
||||
eor v31.16b,v31.16b,v24.16b
|
||||
eor v30.16b,v30.16b,v6.16b
|
||||
|
||||
pmull v7.1q,v26.1d,v23.1d //H^3·Ii+1
|
||||
eor v5.16b,v5.16b,v23.16b
|
||||
pmull2 v23.1q,v26.2d,v23.2d
|
||||
pmull v5.1q,v27.1d,v5.1d
|
||||
|
||||
eor v29.16b,v29.16b,v7.16b
|
||||
eor v31.16b,v31.16b,v23.16b
|
||||
eor v30.16b,v30.16b,v5.16b
|
||||
|
||||
subs x3,x3,#128
|
||||
b.lo Ltail4x
|
||||
|
||||
b Loop4x
|
||||
|
||||
.align 4
|
||||
Loop4x:
|
||||
eor v16.16b,v4.16b,v0.16b
|
||||
ld1 {v4.2d,v5.2d,v6.2d,v7.2d},[x2],#64
|
||||
ext v3.16b,v16.16b,v16.16b,#8
|
||||
#ifndef __AARCH64EB__
|
||||
rev64 v5.16b,v5.16b
|
||||
rev64 v6.16b,v6.16b
|
||||
rev64 v7.16b,v7.16b
|
||||
rev64 v4.16b,v4.16b
|
||||
#endif
|
||||
|
||||
pmull v0.1q,v28.1d,v3.1d //H^4·(Xi+Ii)
|
||||
eor v16.16b,v16.16b,v3.16b
|
||||
pmull2 v2.1q,v28.2d,v3.2d
|
||||
ext v25.16b,v7.16b,v7.16b,#8
|
||||
pmull2 v1.1q,v27.2d,v16.2d
|
||||
|
||||
eor v0.16b,v0.16b,v29.16b
|
||||
eor v2.16b,v2.16b,v31.16b
|
||||
ext v24.16b,v6.16b,v6.16b,#8
|
||||
eor v1.16b,v1.16b,v30.16b
|
||||
ext v23.16b,v5.16b,v5.16b,#8
|
||||
|
||||
ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing
|
||||
eor v18.16b,v0.16b,v2.16b
|
||||
pmull v29.1q,v20.1d,v25.1d //H·Ii+3
|
||||
eor v7.16b,v7.16b,v25.16b
|
||||
eor v1.16b,v1.16b,v17.16b
|
||||
pmull2 v31.1q,v20.2d,v25.2d
|
||||
eor v1.16b,v1.16b,v18.16b
|
||||
pmull v30.1q,v21.1d,v7.1d
|
||||
|
||||
pmull v18.1q,v0.1d,v19.1d //1st phase of reduction
|
||||
ins v2.d[0],v1.d[1]
|
||||
ins v1.d[1],v0.d[0]
|
||||
pmull v16.1q,v22.1d,v24.1d //H^2·Ii+2
|
||||
eor v6.16b,v6.16b,v24.16b
|
||||
pmull2 v24.1q,v22.2d,v24.2d
|
||||
eor v0.16b,v1.16b,v18.16b
|
||||
pmull2 v6.1q,v21.2d,v6.2d
|
||||
|
||||
eor v29.16b,v29.16b,v16.16b
|
||||
eor v31.16b,v31.16b,v24.16b
|
||||
eor v30.16b,v30.16b,v6.16b
|
||||
|
||||
ext v18.16b,v0.16b,v0.16b,#8 //2nd phase of reduction
|
||||
pmull v0.1q,v0.1d,v19.1d
|
||||
pmull v7.1q,v26.1d,v23.1d //H^3·Ii+1
|
||||
eor v5.16b,v5.16b,v23.16b
|
||||
eor v18.16b,v18.16b,v2.16b
|
||||
pmull2 v23.1q,v26.2d,v23.2d
|
||||
pmull v5.1q,v27.1d,v5.1d
|
||||
|
||||
eor v0.16b,v0.16b,v18.16b
|
||||
eor v29.16b,v29.16b,v7.16b
|
||||
eor v31.16b,v31.16b,v23.16b
|
||||
ext v0.16b,v0.16b,v0.16b,#8
|
||||
eor v30.16b,v30.16b,v5.16b
|
||||
|
||||
subs x3,x3,#64
|
||||
b.hs Loop4x
|
||||
|
||||
Ltail4x:
|
||||
eor v16.16b,v4.16b,v0.16b
|
||||
ext v3.16b,v16.16b,v16.16b,#8
|
||||
|
||||
pmull v0.1q,v28.1d,v3.1d //H^4·(Xi+Ii)
|
||||
eor v16.16b,v16.16b,v3.16b
|
||||
pmull2 v2.1q,v28.2d,v3.2d
|
||||
pmull2 v1.1q,v27.2d,v16.2d
|
||||
|
||||
eor v0.16b,v0.16b,v29.16b
|
||||
eor v2.16b,v2.16b,v31.16b
|
||||
eor v1.16b,v1.16b,v30.16b
|
||||
|
||||
adds x3,x3,#64
|
||||
b.eq Ldone4x
|
||||
|
||||
cmp x3,#32
|
||||
b.lo Lone
|
||||
b.eq Ltwo
|
||||
Lthree:
|
||||
ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing
|
||||
eor v18.16b,v0.16b,v2.16b
|
||||
eor v1.16b,v1.16b,v17.16b
|
||||
ld1 {v4.2d,v5.2d,v6.2d},[x2]
|
||||
eor v1.16b,v1.16b,v18.16b
|
||||
#ifndef __AARCH64EB__
|
||||
rev64 v5.16b,v5.16b
|
||||
rev64 v6.16b,v6.16b
|
||||
rev64 v4.16b,v4.16b
|
||||
#endif
|
||||
|
||||
pmull v18.1q,v0.1d,v19.1d //1st phase of reduction
|
||||
ins v2.d[0],v1.d[1]
|
||||
ins v1.d[1],v0.d[0]
|
||||
ext v24.16b,v6.16b,v6.16b,#8
|
||||
ext v23.16b,v5.16b,v5.16b,#8
|
||||
eor v0.16b,v1.16b,v18.16b
|
||||
|
||||
pmull v29.1q,v20.1d,v24.1d //H·Ii+2
|
||||
eor v6.16b,v6.16b,v24.16b
|
||||
|
||||
ext v18.16b,v0.16b,v0.16b,#8 //2nd phase of reduction
|
||||
pmull v0.1q,v0.1d,v19.1d
|
||||
eor v18.16b,v18.16b,v2.16b
|
||||
pmull2 v31.1q,v20.2d,v24.2d
|
||||
pmull v30.1q,v21.1d,v6.1d
|
||||
eor v0.16b,v0.16b,v18.16b
|
||||
pmull v7.1q,v22.1d,v23.1d //H^2·Ii+1
|
||||
eor v5.16b,v5.16b,v23.16b
|
||||
ext v0.16b,v0.16b,v0.16b,#8
|
||||
|
||||
pmull2 v23.1q,v22.2d,v23.2d
|
||||
eor v16.16b,v4.16b,v0.16b
|
||||
pmull2 v5.1q,v21.2d,v5.2d
|
||||
ext v3.16b,v16.16b,v16.16b,#8
|
||||
|
||||
eor v29.16b,v29.16b,v7.16b
|
||||
eor v31.16b,v31.16b,v23.16b
|
||||
eor v30.16b,v30.16b,v5.16b
|
||||
|
||||
pmull v0.1q,v26.1d,v3.1d //H^3·(Xi+Ii)
|
||||
eor v16.16b,v16.16b,v3.16b
|
||||
pmull2 v2.1q,v26.2d,v3.2d
|
||||
pmull v1.1q,v27.1d,v16.1d
|
||||
|
||||
eor v0.16b,v0.16b,v29.16b
|
||||
eor v2.16b,v2.16b,v31.16b
|
||||
eor v1.16b,v1.16b,v30.16b
|
||||
b Ldone4x
|
||||
|
||||
.align 4
|
||||
Ltwo:
|
||||
ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing
|
||||
eor v18.16b,v0.16b,v2.16b
|
||||
eor v1.16b,v1.16b,v17.16b
|
||||
ld1 {v4.2d,v5.2d},[x2]
|
||||
eor v1.16b,v1.16b,v18.16b
|
||||
#ifndef __AARCH64EB__
|
||||
rev64 v5.16b,v5.16b
|
||||
rev64 v4.16b,v4.16b
|
||||
#endif
|
||||
|
||||
pmull v18.1q,v0.1d,v19.1d //1st phase of reduction
|
||||
ins v2.d[0],v1.d[1]
|
||||
ins v1.d[1],v0.d[0]
|
||||
ext v23.16b,v5.16b,v5.16b,#8
|
||||
eor v0.16b,v1.16b,v18.16b
|
||||
|
||||
ext v18.16b,v0.16b,v0.16b,#8 //2nd phase of reduction
|
||||
pmull v0.1q,v0.1d,v19.1d
|
||||
eor v18.16b,v18.16b,v2.16b
|
||||
eor v0.16b,v0.16b,v18.16b
|
||||
ext v0.16b,v0.16b,v0.16b,#8
|
||||
|
||||
pmull v29.1q,v20.1d,v23.1d //H·Ii+1
|
||||
eor v5.16b,v5.16b,v23.16b
|
||||
|
||||
eor v16.16b,v4.16b,v0.16b
|
||||
ext v3.16b,v16.16b,v16.16b,#8
|
||||
|
||||
pmull2 v31.1q,v20.2d,v23.2d
|
||||
pmull v30.1q,v21.1d,v5.1d
|
||||
|
||||
pmull v0.1q,v22.1d,v3.1d //H^2·(Xi+Ii)
|
||||
eor v16.16b,v16.16b,v3.16b
|
||||
pmull2 v2.1q,v22.2d,v3.2d
|
||||
pmull2 v1.1q,v21.2d,v16.2d
|
||||
|
||||
eor v0.16b,v0.16b,v29.16b
|
||||
eor v2.16b,v2.16b,v31.16b
|
||||
eor v1.16b,v1.16b,v30.16b
|
||||
b Ldone4x
|
||||
|
||||
.align 4
|
||||
Lone:
|
||||
ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing
|
||||
eor v18.16b,v0.16b,v2.16b
|
||||
eor v1.16b,v1.16b,v17.16b
|
||||
ld1 {v4.2d},[x2]
|
||||
eor v1.16b,v1.16b,v18.16b
|
||||
#ifndef __AARCH64EB__
|
||||
rev64 v4.16b,v4.16b
|
||||
#endif
|
||||
|
||||
pmull v18.1q,v0.1d,v19.1d //1st phase of reduction
|
||||
ins v2.d[0],v1.d[1]
|
||||
ins v1.d[1],v0.d[0]
|
||||
eor v0.16b,v1.16b,v18.16b
|
||||
|
||||
ext v18.16b,v0.16b,v0.16b,#8 //2nd phase of reduction
|
||||
pmull v0.1q,v0.1d,v19.1d
|
||||
eor v18.16b,v18.16b,v2.16b
|
||||
eor v0.16b,v0.16b,v18.16b
|
||||
ext v0.16b,v0.16b,v0.16b,#8
|
||||
|
||||
eor v16.16b,v4.16b,v0.16b
|
||||
ext v3.16b,v16.16b,v16.16b,#8
|
||||
|
||||
pmull v0.1q,v20.1d,v3.1d
|
||||
eor v16.16b,v16.16b,v3.16b
|
||||
pmull2 v2.1q,v20.2d,v3.2d
|
||||
pmull v1.1q,v21.1d,v16.1d
|
||||
|
||||
Ldone4x:
|
||||
ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing
|
||||
eor v18.16b,v0.16b,v2.16b
|
||||
eor v1.16b,v1.16b,v17.16b
|
||||
eor v1.16b,v1.16b,v18.16b
|
||||
|
||||
pmull v18.1q,v0.1d,v19.1d //1st phase of reduction
|
||||
ins v2.d[0],v1.d[1]
|
||||
ins v1.d[1],v0.d[0]
|
||||
eor v0.16b,v1.16b,v18.16b
|
||||
|
||||
ext v18.16b,v0.16b,v0.16b,#8 //2nd phase of reduction
|
||||
pmull v0.1q,v0.1d,v19.1d
|
||||
eor v18.16b,v18.16b,v2.16b
|
||||
eor v0.16b,v0.16b,v18.16b
|
||||
ext v0.16b,v0.16b,v0.16b,#8
|
||||
|
||||
#ifndef __AARCH64EB__
|
||||
rev64 v0.16b,v0.16b
|
||||
#endif
|
||||
st1 {v0.2d},[x0] //write out Xi
|
||||
|
||||
ret
|
||||
|
||||
.byte 71,72,65,83,72,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
|
||||
.align 2
|
||||
.align 2
|
||||
#endif
|
||||
#endif // !OPENSSL_NO_ASM && defined(OPENSSL_AARCH64) && defined(__APPLE__)
|
||||
1726
third-party/boringssl/apple-aarch64/crypto/fipsmodule/p256-armv8-asm-apple.S
vendored
Normal file
1726
third-party/boringssl/apple-aarch64/crypto/fipsmodule/p256-armv8-asm-apple.S
vendored
Normal file
File diff suppressed because it is too large
Load Diff
309
third-party/boringssl/apple-aarch64/crypto/fipsmodule/p256_beeu-armv8-asm-apple.S
vendored
Normal file
309
third-party/boringssl/apple-aarch64/crypto/fipsmodule/p256_beeu-armv8-asm-apple.S
vendored
Normal file
@ -0,0 +1,309 @@
|
||||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#include <openssl/asm_base.h>
|
||||
|
||||
#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_AARCH64) && defined(__APPLE__)
|
||||
#include "openssl/arm_arch.h"
|
||||
|
||||
.text
|
||||
.globl _beeu_mod_inverse_vartime
|
||||
.private_extern _beeu_mod_inverse_vartime
|
||||
|
||||
.align 4
|
||||
_beeu_mod_inverse_vartime:
|
||||
// Reserve enough space for 14 8-byte registers on the stack
|
||||
// in the first stp call for x29, x30.
|
||||
// Then store the remaining callee-saved registers.
|
||||
//
|
||||
// | x29 | x30 | x19 | x20 | ... | x27 | x28 | x0 | x2 |
|
||||
// ^ ^
|
||||
// sp <------------------- 112 bytes ----------------> old sp
|
||||
// x29 (FP)
|
||||
//
|
||||
AARCH64_SIGN_LINK_REGISTER
|
||||
stp x29,x30,[sp,#-112]!
|
||||
add x29,sp,#0
|
||||
stp x19,x20,[sp,#16]
|
||||
stp x21,x22,[sp,#32]
|
||||
stp x23,x24,[sp,#48]
|
||||
stp x25,x26,[sp,#64]
|
||||
stp x27,x28,[sp,#80]
|
||||
stp x0,x2,[sp,#96]
|
||||
|
||||
// B = b3..b0 := a
|
||||
ldp x25,x26,[x1]
|
||||
ldp x27,x28,[x1,#16]
|
||||
|
||||
// n3..n0 := n
|
||||
// Note: the value of input params are changed in the following.
|
||||
ldp x0,x1,[x2]
|
||||
ldp x2,x30,[x2,#16]
|
||||
|
||||
// A = a3..a0 := n
|
||||
mov x21, x0
|
||||
mov x22, x1
|
||||
mov x23, x2
|
||||
mov x24, x30
|
||||
|
||||
// X = x4..x0 := 1
|
||||
mov x3, #1
|
||||
eor x4, x4, x4
|
||||
eor x5, x5, x5
|
||||
eor x6, x6, x6
|
||||
eor x7, x7, x7
|
||||
|
||||
// Y = y4..y0 := 0
|
||||
eor x8, x8, x8
|
||||
eor x9, x9, x9
|
||||
eor x10, x10, x10
|
||||
eor x11, x11, x11
|
||||
eor x12, x12, x12
|
||||
|
||||
Lbeeu_loop:
|
||||
// if B == 0, jump to .Lbeeu_loop_end
|
||||
orr x14, x25, x26
|
||||
orr x14, x14, x27
|
||||
|
||||
// reverse the bit order of x25. This is needed for clz after this macro
|
||||
rbit x15, x25
|
||||
|
||||
orr x14, x14, x28
|
||||
cbz x14,Lbeeu_loop_end
|
||||
|
||||
|
||||
// 0 < B < |n|,
|
||||
// 0 < A <= |n|,
|
||||
// (1) X*a == B (mod |n|),
|
||||
// (2) (-1)*Y*a == A (mod |n|)
|
||||
|
||||
// Now divide B by the maximum possible power of two in the
|
||||
// integers, and divide X by the same value mod |n|.
|
||||
// When we're done, (1) still holds.
|
||||
|
||||
// shift := number of trailing 0s in x25
|
||||
// ( = number of leading 0s in x15; see the "rbit" instruction in TEST_B_ZERO)
|
||||
clz x13, x15
|
||||
|
||||
// If there is no shift, goto shift_A_Y
|
||||
cbz x13, Lbeeu_shift_A_Y
|
||||
|
||||
// Shift B right by "x13" bits
|
||||
neg x14, x13
|
||||
lsr x25, x25, x13
|
||||
lsl x15, x26, x14
|
||||
|
||||
lsr x26, x26, x13
|
||||
lsl x19, x27, x14
|
||||
|
||||
orr x25, x25, x15
|
||||
|
||||
lsr x27, x27, x13
|
||||
lsl x20, x28, x14
|
||||
|
||||
orr x26, x26, x19
|
||||
|
||||
lsr x28, x28, x13
|
||||
|
||||
orr x27, x27, x20
|
||||
|
||||
|
||||
// Shift X right by "x13" bits, adding n whenever X becomes odd.
|
||||
// x13--;
|
||||
// x14 := 0; needed in the addition to the most significant word in SHIFT1
|
||||
eor x14, x14, x14
|
||||
Lbeeu_shift_loop_X:
|
||||
tbz x3, #0, Lshift1_0
|
||||
adds x3, x3, x0
|
||||
adcs x4, x4, x1
|
||||
adcs x5, x5, x2
|
||||
adcs x6, x6, x30
|
||||
adc x7, x7, x14
|
||||
Lshift1_0:
|
||||
// var0 := [var1|var0]<64..1>;
|
||||
// i.e. concatenate var1 and var0,
|
||||
// extract bits <64..1> from the resulting 128-bit value
|
||||
// and put them in var0
|
||||
extr x3, x4, x3, #1
|
||||
extr x4, x5, x4, #1
|
||||
extr x5, x6, x5, #1
|
||||
extr x6, x7, x6, #1
|
||||
lsr x7, x7, #1
|
||||
|
||||
subs x13, x13, #1
|
||||
bne Lbeeu_shift_loop_X
|
||||
|
||||
// Note: the steps above perform the same sequence as in p256_beeu-x86_64-asm.pl
|
||||
// with the following differences:
|
||||
// - "x13" is set directly to the number of trailing 0s in B
|
||||
// (using rbit and clz instructions)
|
||||
// - The loop is only used to call SHIFT1(X)
|
||||
// and x13 is decreased while executing the X loop.
|
||||
// - SHIFT256(B, x13) is performed before right-shifting X; they are independent
|
||||
|
||||
Lbeeu_shift_A_Y:
|
||||
// Same for A and Y.
|
||||
// Afterwards, (2) still holds.
|
||||
// Reverse the bit order of x21
|
||||
// x13 := number of trailing 0s in x21 (= number of leading 0s in x15)
|
||||
rbit x15, x21
|
||||
clz x13, x15
|
||||
|
||||
// If there is no shift, goto |B-A|, X+Y update
|
||||
cbz x13, Lbeeu_update_B_X_or_A_Y
|
||||
|
||||
// Shift A right by "x13" bits
|
||||
neg x14, x13
|
||||
lsr x21, x21, x13
|
||||
lsl x15, x22, x14
|
||||
|
||||
lsr x22, x22, x13
|
||||
lsl x19, x23, x14
|
||||
|
||||
orr x21, x21, x15
|
||||
|
||||
lsr x23, x23, x13
|
||||
lsl x20, x24, x14
|
||||
|
||||
orr x22, x22, x19
|
||||
|
||||
lsr x24, x24, x13
|
||||
|
||||
orr x23, x23, x20
|
||||
|
||||
|
||||
// Shift Y right by "x13" bits, adding n whenever Y becomes odd.
|
||||
// x13--;
|
||||
// x14 := 0; needed in the addition to the most significant word in SHIFT1
|
||||
eor x14, x14, x14
|
||||
Lbeeu_shift_loop_Y:
|
||||
tbz x8, #0, Lshift1_1
|
||||
adds x8, x8, x0
|
||||
adcs x9, x9, x1
|
||||
adcs x10, x10, x2
|
||||
adcs x11, x11, x30
|
||||
adc x12, x12, x14
|
||||
Lshift1_1:
|
||||
// var0 := [var1|var0]<64..1>;
|
||||
// i.e. concatenate var1 and var0,
|
||||
// extract bits <64..1> from the resulting 128-bit value
|
||||
// and put them in var0
|
||||
extr x8, x9, x8, #1
|
||||
extr x9, x10, x9, #1
|
||||
extr x10, x11, x10, #1
|
||||
extr x11, x12, x11, #1
|
||||
lsr x12, x12, #1
|
||||
|
||||
subs x13, x13, #1
|
||||
bne Lbeeu_shift_loop_Y
|
||||
|
||||
Lbeeu_update_B_X_or_A_Y:
|
||||
// Try T := B - A; if cs, continue with B > A (cs: carry set = no borrow)
|
||||
// Note: this is a case of unsigned arithmetic, where T fits in 4 64-bit words
|
||||
// without taking a sign bit if generated. The lack of a carry would
|
||||
// indicate a negative result. See, for example,
|
||||
// https://community.arm.com/developer/ip-products/processors/b/processors-ip-blog/posts/condition-codes-1-condition-flags-and-codes
|
||||
subs x14, x25, x21
|
||||
sbcs x15, x26, x22
|
||||
sbcs x19, x27, x23
|
||||
sbcs x20, x28, x24
|
||||
bcs Lbeeu_B_greater_than_A
|
||||
|
||||
// Else A > B =>
|
||||
// A := A - B; Y := Y + X; goto beginning of the loop
|
||||
subs x21, x21, x25
|
||||
sbcs x22, x22, x26
|
||||
sbcs x23, x23, x27
|
||||
sbcs x24, x24, x28
|
||||
|
||||
adds x8, x8, x3
|
||||
adcs x9, x9, x4
|
||||
adcs x10, x10, x5
|
||||
adcs x11, x11, x6
|
||||
adc x12, x12, x7
|
||||
b Lbeeu_loop
|
||||
|
||||
Lbeeu_B_greater_than_A:
|
||||
// Continue with B > A =>
|
||||
// B := B - A; X := X + Y; goto beginning of the loop
|
||||
mov x25, x14
|
||||
mov x26, x15
|
||||
mov x27, x19
|
||||
mov x28, x20
|
||||
|
||||
adds x3, x3, x8
|
||||
adcs x4, x4, x9
|
||||
adcs x5, x5, x10
|
||||
adcs x6, x6, x11
|
||||
adc x7, x7, x12
|
||||
b Lbeeu_loop
|
||||
|
||||
Lbeeu_loop_end:
|
||||
// The Euclid's algorithm loop ends when A == gcd(a,n);
|
||||
// this would be 1, when a and n are co-prime (i.e. do not have a common factor).
|
||||
// Since (-1)*Y*a == A (mod |n|), Y>0
|
||||
// then out = -Y mod n
|
||||
|
||||
// Verify that A = 1 ==> (-1)*Y*a = A = 1 (mod |n|)
|
||||
// Is A-1 == 0?
|
||||
// If not, fail.
|
||||
sub x14, x21, #1
|
||||
orr x14, x14, x22
|
||||
orr x14, x14, x23
|
||||
orr x14, x14, x24
|
||||
cbnz x14, Lbeeu_err
|
||||
|
||||
// If Y>n ==> Y:=Y-n
|
||||
Lbeeu_reduction_loop:
|
||||
// x_i := y_i - n_i (X is no longer needed, use it as temp)
|
||||
// (x14 = 0 from above)
|
||||
subs x3, x8, x0
|
||||
sbcs x4, x9, x1
|
||||
sbcs x5, x10, x2
|
||||
sbcs x6, x11, x30
|
||||
sbcs x7, x12, x14
|
||||
|
||||
// If result is non-negative (i.e., cs = carry set = no borrow),
|
||||
// y_i := x_i; goto reduce again
|
||||
// else
|
||||
// y_i := y_i; continue
|
||||
csel x8, x3, x8, cs
|
||||
csel x9, x4, x9, cs
|
||||
csel x10, x5, x10, cs
|
||||
csel x11, x6, x11, cs
|
||||
csel x12, x7, x12, cs
|
||||
bcs Lbeeu_reduction_loop
|
||||
|
||||
// Now Y < n (Y cannot be equal to n, since the inverse cannot be 0)
|
||||
// out = -Y = n-Y
|
||||
subs x8, x0, x8
|
||||
sbcs x9, x1, x9
|
||||
sbcs x10, x2, x10
|
||||
sbcs x11, x30, x11
|
||||
|
||||
// Save Y in output (out (x0) was saved on the stack)
|
||||
ldr x3, [sp,#96]
|
||||
stp x8, x9, [x3]
|
||||
stp x10, x11, [x3,#16]
|
||||
// return 1 (success)
|
||||
mov x0, #1
|
||||
b Lbeeu_finish
|
||||
|
||||
Lbeeu_err:
|
||||
// return 0 (error)
|
||||
eor x0, x0, x0
|
||||
|
||||
Lbeeu_finish:
|
||||
// Restore callee-saved registers, except x0, x2
|
||||
add sp,x29,#0
|
||||
ldp x19,x20,[sp,#16]
|
||||
ldp x21,x22,[sp,#32]
|
||||
ldp x23,x24,[sp,#48]
|
||||
ldp x25,x26,[sp,#64]
|
||||
ldp x27,x28,[sp,#80]
|
||||
ldp x29,x30,[sp],#112
|
||||
|
||||
AARCH64_VALIDATE_LINK_REGISTER
|
||||
ret
|
||||
|
||||
#endif // !OPENSSL_NO_ASM && defined(OPENSSL_AARCH64) && defined(__APPLE__)
|
||||
@ -1,35 +1,20 @@
|
||||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#if !defined(__has_feature)
|
||||
#define __has_feature(x) 0
|
||||
#endif
|
||||
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
|
||||
#define OPENSSL_NO_ASM
|
||||
#endif
|
||||
#include <openssl/asm_base.h>
|
||||
|
||||
#if !defined(OPENSSL_NO_ASM)
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_AARCH64) && defined(__APPLE__)
|
||||
#include <openssl/arm_arch.h>
|
||||
|
||||
.text
|
||||
|
||||
|
||||
.globl _sha1_block_data_order
|
||||
.private_extern _sha1_block_data_order
|
||||
.globl _sha1_block_data_order_nohw
|
||||
.private_extern _sha1_block_data_order_nohw
|
||||
|
||||
.align 6
|
||||
_sha1_block_data_order:
|
||||
#if __has_feature(hwaddress_sanitizer) && __clang_major__ >= 10
|
||||
adrp x16,:pg_hi21_nc:_OPENSSL_armcap_P
|
||||
#else
|
||||
adrp x16,_OPENSSL_armcap_P@PAGE
|
||||
#endif
|
||||
ldr w16,[x16,_OPENSSL_armcap_P@PAGEOFF]
|
||||
tst w16,#ARMV8_SHA1
|
||||
b.ne Lv8_entry
|
||||
_sha1_block_data_order_nohw:
|
||||
// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
|
||||
stp x29,x30,[sp,#-96]!
|
||||
add x29,sp,#0
|
||||
@ -48,7 +33,7 @@ Loop:
|
||||
movz w28,#0x7999
|
||||
sub x2,x2,#1
|
||||
movk w28,#0x5a82,lsl#16
|
||||
#ifdef __ARMEB__
|
||||
#ifdef __AARCH64EB__
|
||||
ror x3,x3,#32
|
||||
#else
|
||||
rev32 x3,x3
|
||||
@ -66,7 +51,7 @@ Loop:
|
||||
ror w21,w21,#2
|
||||
add w23,w23,w4 // future e+=X[i]
|
||||
add w24,w24,w25 // e+=F(b,c,d)
|
||||
#ifdef __ARMEB__
|
||||
#ifdef __AARCH64EB__
|
||||
ror x5,x5,#32
|
||||
#else
|
||||
rev32 x5,x5
|
||||
@ -91,7 +76,7 @@ Loop:
|
||||
ror w24,w24,#2
|
||||
add w21,w21,w6 // future e+=X[i]
|
||||
add w22,w22,w25 // e+=F(b,c,d)
|
||||
#ifdef __ARMEB__
|
||||
#ifdef __AARCH64EB__
|
||||
ror x7,x7,#32
|
||||
#else
|
||||
rev32 x7,x7
|
||||
@ -116,7 +101,7 @@ Loop:
|
||||
ror w22,w22,#2
|
||||
add w24,w24,w8 // future e+=X[i]
|
||||
add w20,w20,w25 // e+=F(b,c,d)
|
||||
#ifdef __ARMEB__
|
||||
#ifdef __AARCH64EB__
|
||||
ror x9,x9,#32
|
||||
#else
|
||||
rev32 x9,x9
|
||||
@ -141,7 +126,7 @@ Loop:
|
||||
ror w20,w20,#2
|
||||
add w22,w22,w10 // future e+=X[i]
|
||||
add w23,w23,w25 // e+=F(b,c,d)
|
||||
#ifdef __ARMEB__
|
||||
#ifdef __AARCH64EB__
|
||||
ror x11,x11,#32
|
||||
#else
|
||||
rev32 x11,x11
|
||||
@ -166,7 +151,7 @@ Loop:
|
||||
ror w23,w23,#2
|
||||
add w20,w20,w12 // future e+=X[i]
|
||||
add w21,w21,w25 // e+=F(b,c,d)
|
||||
#ifdef __ARMEB__
|
||||
#ifdef __AARCH64EB__
|
||||
ror x13,x13,#32
|
||||
#else
|
||||
rev32 x13,x13
|
||||
@ -191,7 +176,7 @@ Loop:
|
||||
ror w21,w21,#2
|
||||
add w23,w23,w14 // future e+=X[i]
|
||||
add w24,w24,w25 // e+=F(b,c,d)
|
||||
#ifdef __ARMEB__
|
||||
#ifdef __AARCH64EB__
|
||||
ror x15,x15,#32
|
||||
#else
|
||||
rev32 x15,x15
|
||||
@ -216,7 +201,7 @@ Loop:
|
||||
ror w24,w24,#2
|
||||
add w21,w21,w16 // future e+=X[i]
|
||||
add w22,w22,w25 // e+=F(b,c,d)
|
||||
#ifdef __ARMEB__
|
||||
#ifdef __AARCH64EB__
|
||||
ror x17,x17,#32
|
||||
#else
|
||||
rev32 x17,x17
|
||||
@ -1086,10 +1071,13 @@ Loop:
|
||||
ldr x29,[sp],#96
|
||||
ret
|
||||
|
||||
.globl _sha1_block_data_order_hw
|
||||
.private_extern _sha1_block_data_order_hw
|
||||
|
||||
.align 6
|
||||
sha1_block_armv8:
|
||||
Lv8_entry:
|
||||
_sha1_block_data_order_hw:
|
||||
// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
stp x29,x30,[sp,#-16]!
|
||||
add x29,sp,#0
|
||||
|
||||
@ -1227,6 +1215,4 @@ Lconst:
|
||||
.byte 83,72,65,49,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
|
||||
.align 2
|
||||
.align 2
|
||||
.comm _OPENSSL_armcap_P,4,4
|
||||
.private_extern _OPENSSL_armcap_P
|
||||
#endif // !OPENSSL_NO_ASM
|
||||
#endif // !OPENSSL_NO_ASM && defined(OPENSSL_AARCH64) && defined(__APPLE__)
|
||||
@ -1,18 +1,10 @@
|
||||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#if !defined(__has_feature)
|
||||
#define __has_feature(x) 0
|
||||
#endif
|
||||
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
|
||||
#define OPENSSL_NO_ASM
|
||||
#endif
|
||||
#include <openssl/asm_base.h>
|
||||
|
||||
#if !defined(OPENSSL_NO_ASM)
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
// Copyright 2014-2016 The OpenSSL Project Authors. All Rights Reserved.
|
||||
#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_AARCH64) && defined(__APPLE__)
|
||||
// Copyright 2014-2020 The OpenSSL Project Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the OpenSSL license (the "License"). You may not use
|
||||
// this file except in compliance with the License. You can obtain a copy
|
||||
@ -40,6 +32,7 @@
|
||||
// Denver 2.01 10.5 (+26%) 6.70 (+8%)
|
||||
// X-Gene 20.0 (+100%) 12.8 (+300%(***))
|
||||
// Mongoose 2.36 13.0 (+50%) 8.36 (+33%)
|
||||
// Kryo 1.92 17.4 (+30%) 11.2 (+8%)
|
||||
//
|
||||
// (*) Software SHA256 results are of lesser relevance, presented
|
||||
// mostly for informational purposes.
|
||||
@ -48,7 +41,7 @@
|
||||
// on Cortex-A53 (or by 4 cycles per round).
|
||||
// (***) Super-impressive coefficients over gcc-generated code are
|
||||
// indication of some compiler "pathology", most notably code
|
||||
// generated with -mgeneral-regs-only is significanty faster
|
||||
// generated with -mgeneral-regs-only is significantly faster
|
||||
// and the gap is only 40-90%.
|
||||
|
||||
#ifndef __KERNEL__
|
||||
@ -57,22 +50,12 @@
|
||||
|
||||
.text
|
||||
|
||||
|
||||
.globl _sha256_block_data_order
|
||||
.private_extern _sha256_block_data_order
|
||||
.globl _sha256_block_data_order_nohw
|
||||
.private_extern _sha256_block_data_order_nohw
|
||||
|
||||
.align 6
|
||||
_sha256_block_data_order:
|
||||
#ifndef __KERNEL__
|
||||
#if __has_feature(hwaddress_sanitizer) && __clang_major__ >= 10
|
||||
adrp x16,:pg_hi21_nc:_OPENSSL_armcap_P
|
||||
#else
|
||||
adrp x16,_OPENSSL_armcap_P@PAGE
|
||||
#endif
|
||||
ldr w16,[x16,_OPENSSL_armcap_P@PAGEOFF]
|
||||
tst w16,#ARMV8_SHA256
|
||||
b.ne Lv8_entry
|
||||
#endif
|
||||
_sha256_block_data_order_nohw:
|
||||
AARCH64_SIGN_LINK_REGISTER
|
||||
stp x29,x30,[sp,#-128]!
|
||||
add x29,sp,#0
|
||||
|
||||
@ -97,7 +80,7 @@ Loop:
|
||||
ldr w19,[x30],#4 // *K++
|
||||
eor w28,w21,w22 // magic seed
|
||||
str x1,[x29,#112]
|
||||
#ifndef __ARMEB__
|
||||
#ifndef __AARCH64EB__
|
||||
rev w3,w3 // 0
|
||||
#endif
|
||||
ror w16,w24,#6
|
||||
@ -120,7 +103,7 @@ Loop:
|
||||
add w27,w27,w28 // h+=Maj(a,b,c)
|
||||
ldr w28,[x30],#4 // *K++, w19 in next round
|
||||
//add w27,w27,w17 // h+=Sigma0(a)
|
||||
#ifndef __ARMEB__
|
||||
#ifndef __AARCH64EB__
|
||||
rev w4,w4 // 1
|
||||
#endif
|
||||
ldp w5,w6,[x1],#2*4
|
||||
@ -145,7 +128,7 @@ Loop:
|
||||
add w26,w26,w19 // h+=Maj(a,b,c)
|
||||
ldr w19,[x30],#4 // *K++, w28 in next round
|
||||
//add w26,w26,w17 // h+=Sigma0(a)
|
||||
#ifndef __ARMEB__
|
||||
#ifndef __AARCH64EB__
|
||||
rev w5,w5 // 2
|
||||
#endif
|
||||
add w26,w26,w17 // h+=Sigma0(a)
|
||||
@ -169,7 +152,7 @@ Loop:
|
||||
add w25,w25,w28 // h+=Maj(a,b,c)
|
||||
ldr w28,[x30],#4 // *K++, w19 in next round
|
||||
//add w25,w25,w17 // h+=Sigma0(a)
|
||||
#ifndef __ARMEB__
|
||||
#ifndef __AARCH64EB__
|
||||
rev w6,w6 // 3
|
||||
#endif
|
||||
ldp w7,w8,[x1],#2*4
|
||||
@ -194,7 +177,7 @@ Loop:
|
||||
add w24,w24,w19 // h+=Maj(a,b,c)
|
||||
ldr w19,[x30],#4 // *K++, w28 in next round
|
||||
//add w24,w24,w17 // h+=Sigma0(a)
|
||||
#ifndef __ARMEB__
|
||||
#ifndef __AARCH64EB__
|
||||
rev w7,w7 // 4
|
||||
#endif
|
||||
add w24,w24,w17 // h+=Sigma0(a)
|
||||
@ -218,7 +201,7 @@ Loop:
|
||||
add w23,w23,w28 // h+=Maj(a,b,c)
|
||||
ldr w28,[x30],#4 // *K++, w19 in next round
|
||||
//add w23,w23,w17 // h+=Sigma0(a)
|
||||
#ifndef __ARMEB__
|
||||
#ifndef __AARCH64EB__
|
||||
rev w8,w8 // 5
|
||||
#endif
|
||||
ldp w9,w10,[x1],#2*4
|
||||
@ -243,7 +226,7 @@ Loop:
|
||||
add w22,w22,w19 // h+=Maj(a,b,c)
|
||||
ldr w19,[x30],#4 // *K++, w28 in next round
|
||||
//add w22,w22,w17 // h+=Sigma0(a)
|
||||
#ifndef __ARMEB__
|
||||
#ifndef __AARCH64EB__
|
||||
rev w9,w9 // 6
|
||||
#endif
|
||||
add w22,w22,w17 // h+=Sigma0(a)
|
||||
@ -267,7 +250,7 @@ Loop:
|
||||
add w21,w21,w28 // h+=Maj(a,b,c)
|
||||
ldr w28,[x30],#4 // *K++, w19 in next round
|
||||
//add w21,w21,w17 // h+=Sigma0(a)
|
||||
#ifndef __ARMEB__
|
||||
#ifndef __AARCH64EB__
|
||||
rev w10,w10 // 7
|
||||
#endif
|
||||
ldp w11,w12,[x1],#2*4
|
||||
@ -292,7 +275,7 @@ Loop:
|
||||
add w20,w20,w19 // h+=Maj(a,b,c)
|
||||
ldr w19,[x30],#4 // *K++, w28 in next round
|
||||
//add w20,w20,w17 // h+=Sigma0(a)
|
||||
#ifndef __ARMEB__
|
||||
#ifndef __AARCH64EB__
|
||||
rev w11,w11 // 8
|
||||
#endif
|
||||
add w20,w20,w17 // h+=Sigma0(a)
|
||||
@ -316,7 +299,7 @@ Loop:
|
||||
add w27,w27,w28 // h+=Maj(a,b,c)
|
||||
ldr w28,[x30],#4 // *K++, w19 in next round
|
||||
//add w27,w27,w17 // h+=Sigma0(a)
|
||||
#ifndef __ARMEB__
|
||||
#ifndef __AARCH64EB__
|
||||
rev w12,w12 // 9
|
||||
#endif
|
||||
ldp w13,w14,[x1],#2*4
|
||||
@ -341,7 +324,7 @@ Loop:
|
||||
add w26,w26,w19 // h+=Maj(a,b,c)
|
||||
ldr w19,[x30],#4 // *K++, w28 in next round
|
||||
//add w26,w26,w17 // h+=Sigma0(a)
|
||||
#ifndef __ARMEB__
|
||||
#ifndef __AARCH64EB__
|
||||
rev w13,w13 // 10
|
||||
#endif
|
||||
add w26,w26,w17 // h+=Sigma0(a)
|
||||
@ -365,7 +348,7 @@ Loop:
|
||||
add w25,w25,w28 // h+=Maj(a,b,c)
|
||||
ldr w28,[x30],#4 // *K++, w19 in next round
|
||||
//add w25,w25,w17 // h+=Sigma0(a)
|
||||
#ifndef __ARMEB__
|
||||
#ifndef __AARCH64EB__
|
||||
rev w14,w14 // 11
|
||||
#endif
|
||||
ldp w15,w0,[x1],#2*4
|
||||
@ -391,7 +374,7 @@ Loop:
|
||||
add w24,w24,w19 // h+=Maj(a,b,c)
|
||||
ldr w19,[x30],#4 // *K++, w28 in next round
|
||||
//add w24,w24,w17 // h+=Sigma0(a)
|
||||
#ifndef __ARMEB__
|
||||
#ifndef __AARCH64EB__
|
||||
rev w15,w15 // 12
|
||||
#endif
|
||||
add w24,w24,w17 // h+=Sigma0(a)
|
||||
@ -416,7 +399,7 @@ Loop:
|
||||
add w23,w23,w28 // h+=Maj(a,b,c)
|
||||
ldr w28,[x30],#4 // *K++, w19 in next round
|
||||
//add w23,w23,w17 // h+=Sigma0(a)
|
||||
#ifndef __ARMEB__
|
||||
#ifndef __AARCH64EB__
|
||||
rev w0,w0 // 13
|
||||
#endif
|
||||
ldp w1,w2,[x1]
|
||||
@ -442,7 +425,7 @@ Loop:
|
||||
add w22,w22,w19 // h+=Maj(a,b,c)
|
||||
ldr w19,[x30],#4 // *K++, w28 in next round
|
||||
//add w22,w22,w17 // h+=Sigma0(a)
|
||||
#ifndef __ARMEB__
|
||||
#ifndef __AARCH64EB__
|
||||
rev w1,w1 // 14
|
||||
#endif
|
||||
ldr w6,[sp,#12]
|
||||
@ -468,7 +451,7 @@ Loop:
|
||||
add w21,w21,w28 // h+=Maj(a,b,c)
|
||||
ldr w28,[x30],#4 // *K++, w19 in next round
|
||||
//add w21,w21,w17 // h+=Sigma0(a)
|
||||
#ifndef __ARMEB__
|
||||
#ifndef __AARCH64EB__
|
||||
rev w2,w2 // 15
|
||||
#endif
|
||||
ldr w7,[sp,#0]
|
||||
@ -1033,6 +1016,7 @@ Loop_16_xx:
|
||||
ldp x25,x26,[x29,#64]
|
||||
ldp x27,x28,[x29,#80]
|
||||
ldp x29,x30,[sp],#128
|
||||
AARCH64_VALIDATE_LINK_REGISTER
|
||||
ret
|
||||
|
||||
|
||||
@ -1063,10 +1047,13 @@ LK256:
|
||||
.align 2
|
||||
.text
|
||||
#ifndef __KERNEL__
|
||||
.globl _sha256_block_data_order_hw
|
||||
.private_extern _sha256_block_data_order_hw
|
||||
|
||||
.align 6
|
||||
sha256_block_armv8:
|
||||
Lv8_entry:
|
||||
_sha256_block_data_order_hw:
|
||||
// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
stp x29,x30,[sp,#-16]!
|
||||
add x29,sp,#0
|
||||
|
||||
@ -1203,8 +1190,4 @@ Loop_hw:
|
||||
ret
|
||||
|
||||
#endif
|
||||
#ifndef __KERNEL__
|
||||
.comm _OPENSSL_armcap_P,4,4
|
||||
.private_extern _OPENSSL_armcap_P
|
||||
#endif
|
||||
#endif // !OPENSSL_NO_ASM
|
||||
#endif // !OPENSSL_NO_ASM && defined(OPENSSL_AARCH64) && defined(__APPLE__)
|
||||
@ -1,18 +1,10 @@
|
||||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#if !defined(__has_feature)
|
||||
#define __has_feature(x) 0
|
||||
#endif
|
||||
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
|
||||
#define OPENSSL_NO_ASM
|
||||
#endif
|
||||
#include <openssl/asm_base.h>
|
||||
|
||||
#if !defined(OPENSSL_NO_ASM)
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
// Copyright 2014-2016 The OpenSSL Project Authors. All Rights Reserved.
|
||||
#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_AARCH64) && defined(__APPLE__)
|
||||
// Copyright 2014-2020 The OpenSSL Project Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the OpenSSL license (the "License"). You may not use
|
||||
// this file except in compliance with the License. You can obtain a copy
|
||||
@ -40,6 +32,7 @@
|
||||
// Denver 2.01 10.5 (+26%) 6.70 (+8%)
|
||||
// X-Gene 20.0 (+100%) 12.8 (+300%(***))
|
||||
// Mongoose 2.36 13.0 (+50%) 8.36 (+33%)
|
||||
// Kryo 1.92 17.4 (+30%) 11.2 (+8%)
|
||||
//
|
||||
// (*) Software SHA256 results are of lesser relevance, presented
|
||||
// mostly for informational purposes.
|
||||
@ -48,7 +41,7 @@
|
||||
// on Cortex-A53 (or by 4 cycles per round).
|
||||
// (***) Super-impressive coefficients over gcc-generated code are
|
||||
// indication of some compiler "pathology", most notably code
|
||||
// generated with -mgeneral-regs-only is significanty faster
|
||||
// generated with -mgeneral-regs-only is significantly faster
|
||||
// and the gap is only 40-90%.
|
||||
|
||||
#ifndef __KERNEL__
|
||||
@ -57,12 +50,12 @@
|
||||
|
||||
.text
|
||||
|
||||
|
||||
.globl _sha512_block_data_order
|
||||
.private_extern _sha512_block_data_order
|
||||
.globl _sha512_block_data_order_nohw
|
||||
.private_extern _sha512_block_data_order_nohw
|
||||
|
||||
.align 6
|
||||
_sha512_block_data_order:
|
||||
_sha512_block_data_order_nohw:
|
||||
AARCH64_SIGN_LINK_REGISTER
|
||||
stp x29,x30,[sp,#-128]!
|
||||
add x29,sp,#0
|
||||
|
||||
@ -87,7 +80,7 @@ Loop:
|
||||
ldr x19,[x30],#8 // *K++
|
||||
eor x28,x21,x22 // magic seed
|
||||
str x1,[x29,#112]
|
||||
#ifndef __ARMEB__
|
||||
#ifndef __AARCH64EB__
|
||||
rev x3,x3 // 0
|
||||
#endif
|
||||
ror x16,x24,#14
|
||||
@ -110,7 +103,7 @@ Loop:
|
||||
add x27,x27,x28 // h+=Maj(a,b,c)
|
||||
ldr x28,[x30],#8 // *K++, x19 in next round
|
||||
//add x27,x27,x17 // h+=Sigma0(a)
|
||||
#ifndef __ARMEB__
|
||||
#ifndef __AARCH64EB__
|
||||
rev x4,x4 // 1
|
||||
#endif
|
||||
ldp x5,x6,[x1],#2*8
|
||||
@ -135,7 +128,7 @@ Loop:
|
||||
add x26,x26,x19 // h+=Maj(a,b,c)
|
||||
ldr x19,[x30],#8 // *K++, x28 in next round
|
||||
//add x26,x26,x17 // h+=Sigma0(a)
|
||||
#ifndef __ARMEB__
|
||||
#ifndef __AARCH64EB__
|
||||
rev x5,x5 // 2
|
||||
#endif
|
||||
add x26,x26,x17 // h+=Sigma0(a)
|
||||
@ -159,7 +152,7 @@ Loop:
|
||||
add x25,x25,x28 // h+=Maj(a,b,c)
|
||||
ldr x28,[x30],#8 // *K++, x19 in next round
|
||||
//add x25,x25,x17 // h+=Sigma0(a)
|
||||
#ifndef __ARMEB__
|
||||
#ifndef __AARCH64EB__
|
||||
rev x6,x6 // 3
|
||||
#endif
|
||||
ldp x7,x8,[x1],#2*8
|
||||
@ -184,7 +177,7 @@ Loop:
|
||||
add x24,x24,x19 // h+=Maj(a,b,c)
|
||||
ldr x19,[x30],#8 // *K++, x28 in next round
|
||||
//add x24,x24,x17 // h+=Sigma0(a)
|
||||
#ifndef __ARMEB__
|
||||
#ifndef __AARCH64EB__
|
||||
rev x7,x7 // 4
|
||||
#endif
|
||||
add x24,x24,x17 // h+=Sigma0(a)
|
||||
@ -208,7 +201,7 @@ Loop:
|
||||
add x23,x23,x28 // h+=Maj(a,b,c)
|
||||
ldr x28,[x30],#8 // *K++, x19 in next round
|
||||
//add x23,x23,x17 // h+=Sigma0(a)
|
||||
#ifndef __ARMEB__
|
||||
#ifndef __AARCH64EB__
|
||||
rev x8,x8 // 5
|
||||
#endif
|
||||
ldp x9,x10,[x1],#2*8
|
||||
@ -233,7 +226,7 @@ Loop:
|
||||
add x22,x22,x19 // h+=Maj(a,b,c)
|
||||
ldr x19,[x30],#8 // *K++, x28 in next round
|
||||
//add x22,x22,x17 // h+=Sigma0(a)
|
||||
#ifndef __ARMEB__
|
||||
#ifndef __AARCH64EB__
|
||||
rev x9,x9 // 6
|
||||
#endif
|
||||
add x22,x22,x17 // h+=Sigma0(a)
|
||||
@ -257,7 +250,7 @@ Loop:
|
||||
add x21,x21,x28 // h+=Maj(a,b,c)
|
||||
ldr x28,[x30],#8 // *K++, x19 in next round
|
||||
//add x21,x21,x17 // h+=Sigma0(a)
|
||||
#ifndef __ARMEB__
|
||||
#ifndef __AARCH64EB__
|
||||
rev x10,x10 // 7
|
||||
#endif
|
||||
ldp x11,x12,[x1],#2*8
|
||||
@ -282,7 +275,7 @@ Loop:
|
||||
add x20,x20,x19 // h+=Maj(a,b,c)
|
||||
ldr x19,[x30],#8 // *K++, x28 in next round
|
||||
//add x20,x20,x17 // h+=Sigma0(a)
|
||||
#ifndef __ARMEB__
|
||||
#ifndef __AARCH64EB__
|
||||
rev x11,x11 // 8
|
||||
#endif
|
||||
add x20,x20,x17 // h+=Sigma0(a)
|
||||
@ -306,7 +299,7 @@ Loop:
|
||||
add x27,x27,x28 // h+=Maj(a,b,c)
|
||||
ldr x28,[x30],#8 // *K++, x19 in next round
|
||||
//add x27,x27,x17 // h+=Sigma0(a)
|
||||
#ifndef __ARMEB__
|
||||
#ifndef __AARCH64EB__
|
||||
rev x12,x12 // 9
|
||||
#endif
|
||||
ldp x13,x14,[x1],#2*8
|
||||
@ -331,7 +324,7 @@ Loop:
|
||||
add x26,x26,x19 // h+=Maj(a,b,c)
|
||||
ldr x19,[x30],#8 // *K++, x28 in next round
|
||||
//add x26,x26,x17 // h+=Sigma0(a)
|
||||
#ifndef __ARMEB__
|
||||
#ifndef __AARCH64EB__
|
||||
rev x13,x13 // 10
|
||||
#endif
|
||||
add x26,x26,x17 // h+=Sigma0(a)
|
||||
@ -355,7 +348,7 @@ Loop:
|
||||
add x25,x25,x28 // h+=Maj(a,b,c)
|
||||
ldr x28,[x30],#8 // *K++, x19 in next round
|
||||
//add x25,x25,x17 // h+=Sigma0(a)
|
||||
#ifndef __ARMEB__
|
||||
#ifndef __AARCH64EB__
|
||||
rev x14,x14 // 11
|
||||
#endif
|
||||
ldp x15,x0,[x1],#2*8
|
||||
@ -381,7 +374,7 @@ Loop:
|
||||
add x24,x24,x19 // h+=Maj(a,b,c)
|
||||
ldr x19,[x30],#8 // *K++, x28 in next round
|
||||
//add x24,x24,x17 // h+=Sigma0(a)
|
||||
#ifndef __ARMEB__
|
||||
#ifndef __AARCH64EB__
|
||||
rev x15,x15 // 12
|
||||
#endif
|
||||
add x24,x24,x17 // h+=Sigma0(a)
|
||||
@ -406,7 +399,7 @@ Loop:
|
||||
add x23,x23,x28 // h+=Maj(a,b,c)
|
||||
ldr x28,[x30],#8 // *K++, x19 in next round
|
||||
//add x23,x23,x17 // h+=Sigma0(a)
|
||||
#ifndef __ARMEB__
|
||||
#ifndef __AARCH64EB__
|
||||
rev x0,x0 // 13
|
||||
#endif
|
||||
ldp x1,x2,[x1]
|
||||
@ -432,7 +425,7 @@ Loop:
|
||||
add x22,x22,x19 // h+=Maj(a,b,c)
|
||||
ldr x19,[x30],#8 // *K++, x28 in next round
|
||||
//add x22,x22,x17 // h+=Sigma0(a)
|
||||
#ifndef __ARMEB__
|
||||
#ifndef __AARCH64EB__
|
||||
rev x1,x1 // 14
|
||||
#endif
|
||||
ldr x6,[sp,#24]
|
||||
@ -458,7 +451,7 @@ Loop:
|
||||
add x21,x21,x28 // h+=Maj(a,b,c)
|
||||
ldr x28,[x30],#8 // *K++, x19 in next round
|
||||
//add x21,x21,x17 // h+=Sigma0(a)
|
||||
#ifndef __ARMEB__
|
||||
#ifndef __AARCH64EB__
|
||||
rev x2,x2 // 15
|
||||
#endif
|
||||
ldr x7,[sp,#0]
|
||||
@ -1023,6 +1016,7 @@ Loop_16_xx:
|
||||
ldp x25,x26,[x29,#64]
|
||||
ldp x27,x28,[x29,#80]
|
||||
ldp x29,x30,[sp],#128
|
||||
AARCH64_VALIDATE_LINK_REGISTER
|
||||
ret
|
||||
|
||||
|
||||
@ -1075,8 +1069,528 @@ LK512:
|
||||
.byte 83,72,65,53,49,50,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
|
||||
.align 2
|
||||
.align 2
|
||||
.text
|
||||
#ifndef __KERNEL__
|
||||
.comm _OPENSSL_armcap_P,4,4
|
||||
.private_extern _OPENSSL_armcap_P
|
||||
.globl _sha512_block_data_order_hw
|
||||
.private_extern _sha512_block_data_order_hw
|
||||
|
||||
.align 6
|
||||
_sha512_block_data_order_hw:
|
||||
// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
stp x29,x30,[sp,#-16]!
|
||||
add x29,sp,#0
|
||||
|
||||
ld1 {v16.16b,v17.16b,v18.16b,v19.16b},[x1],#64 // load input
|
||||
ld1 {v20.16b,v21.16b,v22.16b,v23.16b},[x1],#64
|
||||
|
||||
ld1 {v0.2d,v1.2d,v2.2d,v3.2d},[x0] // load context
|
||||
adrp x3,LK512@PAGE
|
||||
add x3,x3,LK512@PAGEOFF
|
||||
|
||||
rev64 v16.16b,v16.16b
|
||||
rev64 v17.16b,v17.16b
|
||||
rev64 v18.16b,v18.16b
|
||||
rev64 v19.16b,v19.16b
|
||||
rev64 v20.16b,v20.16b
|
||||
rev64 v21.16b,v21.16b
|
||||
rev64 v22.16b,v22.16b
|
||||
rev64 v23.16b,v23.16b
|
||||
b Loop_hw
|
||||
|
||||
.align 4
|
||||
Loop_hw:
|
||||
ld1 {v24.2d},[x3],#16
|
||||
subs x2,x2,#1
|
||||
sub x4,x1,#128
|
||||
orr v26.16b,v0.16b,v0.16b // offload
|
||||
orr v27.16b,v1.16b,v1.16b
|
||||
orr v28.16b,v2.16b,v2.16b
|
||||
orr v29.16b,v3.16b,v3.16b
|
||||
csel x1,x1,x4,ne // conditional rewind
|
||||
add v24.2d,v24.2d,v16.2d
|
||||
ld1 {v25.2d},[x3],#16
|
||||
ext v24.16b,v24.16b,v24.16b,#8
|
||||
ext v5.16b,v2.16b,v3.16b,#8
|
||||
ext v6.16b,v1.16b,v2.16b,#8
|
||||
add v3.2d,v3.2d,v24.2d // "T1 + H + K512[i]"
|
||||
.long 0xcec08230 //sha512su0 v16.16b,v17.16b
|
||||
ext v7.16b,v20.16b,v21.16b,#8
|
||||
.long 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b
|
||||
.long 0xce678af0 //sha512su1 v16.16b,v23.16b,v7.16b
|
||||
add v4.2d,v1.2d,v3.2d // "D + T1"
|
||||
.long 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b
|
||||
add v25.2d,v25.2d,v17.2d
|
||||
ld1 {v24.2d},[x3],#16
|
||||
ext v25.16b,v25.16b,v25.16b,#8
|
||||
ext v5.16b,v4.16b,v2.16b,#8
|
||||
ext v6.16b,v0.16b,v4.16b,#8
|
||||
add v2.2d,v2.2d,v25.2d // "T1 + H + K512[i]"
|
||||
.long 0xcec08251 //sha512su0 v17.16b,v18.16b
|
||||
ext v7.16b,v21.16b,v22.16b,#8
|
||||
.long 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b
|
||||
.long 0xce678a11 //sha512su1 v17.16b,v16.16b,v7.16b
|
||||
add v1.2d,v0.2d,v2.2d // "D + T1"
|
||||
.long 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b
|
||||
add v24.2d,v24.2d,v18.2d
|
||||
ld1 {v25.2d},[x3],#16
|
||||
ext v24.16b,v24.16b,v24.16b,#8
|
||||
ext v5.16b,v1.16b,v4.16b,#8
|
||||
ext v6.16b,v3.16b,v1.16b,#8
|
||||
add v4.2d,v4.2d,v24.2d // "T1 + H + K512[i]"
|
||||
.long 0xcec08272 //sha512su0 v18.16b,v19.16b
|
||||
ext v7.16b,v22.16b,v23.16b,#8
|
||||
.long 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b
|
||||
.long 0xce678a32 //sha512su1 v18.16b,v17.16b,v7.16b
|
||||
add v0.2d,v3.2d,v4.2d // "D + T1"
|
||||
.long 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b
|
||||
add v25.2d,v25.2d,v19.2d
|
||||
ld1 {v24.2d},[x3],#16
|
||||
ext v25.16b,v25.16b,v25.16b,#8
|
||||
ext v5.16b,v0.16b,v1.16b,#8
|
||||
ext v6.16b,v2.16b,v0.16b,#8
|
||||
add v1.2d,v1.2d,v25.2d // "T1 + H + K512[i]"
|
||||
.long 0xcec08293 //sha512su0 v19.16b,v20.16b
|
||||
ext v7.16b,v23.16b,v16.16b,#8
|
||||
.long 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b
|
||||
.long 0xce678a53 //sha512su1 v19.16b,v18.16b,v7.16b
|
||||
add v3.2d,v2.2d,v1.2d // "D + T1"
|
||||
.long 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b
|
||||
add v24.2d,v24.2d,v20.2d
|
||||
ld1 {v25.2d},[x3],#16
|
||||
ext v24.16b,v24.16b,v24.16b,#8
|
||||
ext v5.16b,v3.16b,v0.16b,#8
|
||||
ext v6.16b,v4.16b,v3.16b,#8
|
||||
add v0.2d,v0.2d,v24.2d // "T1 + H + K512[i]"
|
||||
.long 0xcec082b4 //sha512su0 v20.16b,v21.16b
|
||||
ext v7.16b,v16.16b,v17.16b,#8
|
||||
.long 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b
|
||||
.long 0xce678a74 //sha512su1 v20.16b,v19.16b,v7.16b
|
||||
add v2.2d,v4.2d,v0.2d // "D + T1"
|
||||
.long 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b
|
||||
add v25.2d,v25.2d,v21.2d
|
||||
ld1 {v24.2d},[x3],#16
|
||||
ext v25.16b,v25.16b,v25.16b,#8
|
||||
ext v5.16b,v2.16b,v3.16b,#8
|
||||
ext v6.16b,v1.16b,v2.16b,#8
|
||||
add v3.2d,v3.2d,v25.2d // "T1 + H + K512[i]"
|
||||
.long 0xcec082d5 //sha512su0 v21.16b,v22.16b
|
||||
ext v7.16b,v17.16b,v18.16b,#8
|
||||
.long 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b
|
||||
.long 0xce678a95 //sha512su1 v21.16b,v20.16b,v7.16b
|
||||
add v4.2d,v1.2d,v3.2d // "D + T1"
|
||||
.long 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b
|
||||
add v24.2d,v24.2d,v22.2d
|
||||
ld1 {v25.2d},[x3],#16
|
||||
ext v24.16b,v24.16b,v24.16b,#8
|
||||
ext v5.16b,v4.16b,v2.16b,#8
|
||||
ext v6.16b,v0.16b,v4.16b,#8
|
||||
add v2.2d,v2.2d,v24.2d // "T1 + H + K512[i]"
|
||||
.long 0xcec082f6 //sha512su0 v22.16b,v23.16b
|
||||
ext v7.16b,v18.16b,v19.16b,#8
|
||||
.long 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b
|
||||
.long 0xce678ab6 //sha512su1 v22.16b,v21.16b,v7.16b
|
||||
add v1.2d,v0.2d,v2.2d // "D + T1"
|
||||
.long 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b
|
||||
add v25.2d,v25.2d,v23.2d
|
||||
ld1 {v24.2d},[x3],#16
|
||||
ext v25.16b,v25.16b,v25.16b,#8
|
||||
ext v5.16b,v1.16b,v4.16b,#8
|
||||
ext v6.16b,v3.16b,v1.16b,#8
|
||||
add v4.2d,v4.2d,v25.2d // "T1 + H + K512[i]"
|
||||
.long 0xcec08217 //sha512su0 v23.16b,v16.16b
|
||||
ext v7.16b,v19.16b,v20.16b,#8
|
||||
.long 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b
|
||||
.long 0xce678ad7 //sha512su1 v23.16b,v22.16b,v7.16b
|
||||
add v0.2d,v3.2d,v4.2d // "D + T1"
|
||||
.long 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b
|
||||
add v24.2d,v24.2d,v16.2d
|
||||
ld1 {v25.2d},[x3],#16
|
||||
ext v24.16b,v24.16b,v24.16b,#8
|
||||
ext v5.16b,v0.16b,v1.16b,#8
|
||||
ext v6.16b,v2.16b,v0.16b,#8
|
||||
add v1.2d,v1.2d,v24.2d // "T1 + H + K512[i]"
|
||||
.long 0xcec08230 //sha512su0 v16.16b,v17.16b
|
||||
ext v7.16b,v20.16b,v21.16b,#8
|
||||
.long 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b
|
||||
.long 0xce678af0 //sha512su1 v16.16b,v23.16b,v7.16b
|
||||
add v3.2d,v2.2d,v1.2d // "D + T1"
|
||||
.long 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b
|
||||
add v25.2d,v25.2d,v17.2d
|
||||
ld1 {v24.2d},[x3],#16
|
||||
ext v25.16b,v25.16b,v25.16b,#8
|
||||
ext v5.16b,v3.16b,v0.16b,#8
|
||||
ext v6.16b,v4.16b,v3.16b,#8
|
||||
add v0.2d,v0.2d,v25.2d // "T1 + H + K512[i]"
|
||||
.long 0xcec08251 //sha512su0 v17.16b,v18.16b
|
||||
ext v7.16b,v21.16b,v22.16b,#8
|
||||
.long 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b
|
||||
.long 0xce678a11 //sha512su1 v17.16b,v16.16b,v7.16b
|
||||
add v2.2d,v4.2d,v0.2d // "D + T1"
|
||||
.long 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b
|
||||
add v24.2d,v24.2d,v18.2d
|
||||
ld1 {v25.2d},[x3],#16
|
||||
ext v24.16b,v24.16b,v24.16b,#8
|
||||
ext v5.16b,v2.16b,v3.16b,#8
|
||||
ext v6.16b,v1.16b,v2.16b,#8
|
||||
add v3.2d,v3.2d,v24.2d // "T1 + H + K512[i]"
|
||||
.long 0xcec08272 //sha512su0 v18.16b,v19.16b
|
||||
ext v7.16b,v22.16b,v23.16b,#8
|
||||
.long 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b
|
||||
.long 0xce678a32 //sha512su1 v18.16b,v17.16b,v7.16b
|
||||
add v4.2d,v1.2d,v3.2d // "D + T1"
|
||||
.long 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b
|
||||
add v25.2d,v25.2d,v19.2d
|
||||
ld1 {v24.2d},[x3],#16
|
||||
ext v25.16b,v25.16b,v25.16b,#8
|
||||
ext v5.16b,v4.16b,v2.16b,#8
|
||||
ext v6.16b,v0.16b,v4.16b,#8
|
||||
add v2.2d,v2.2d,v25.2d // "T1 + H + K512[i]"
|
||||
.long 0xcec08293 //sha512su0 v19.16b,v20.16b
|
||||
ext v7.16b,v23.16b,v16.16b,#8
|
||||
.long 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b
|
||||
.long 0xce678a53 //sha512su1 v19.16b,v18.16b,v7.16b
|
||||
add v1.2d,v0.2d,v2.2d // "D + T1"
|
||||
.long 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b
|
||||
add v24.2d,v24.2d,v20.2d
|
||||
ld1 {v25.2d},[x3],#16
|
||||
ext v24.16b,v24.16b,v24.16b,#8
|
||||
ext v5.16b,v1.16b,v4.16b,#8
|
||||
ext v6.16b,v3.16b,v1.16b,#8
|
||||
add v4.2d,v4.2d,v24.2d // "T1 + H + K512[i]"
|
||||
.long 0xcec082b4 //sha512su0 v20.16b,v21.16b
|
||||
ext v7.16b,v16.16b,v17.16b,#8
|
||||
.long 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b
|
||||
.long 0xce678a74 //sha512su1 v20.16b,v19.16b,v7.16b
|
||||
add v0.2d,v3.2d,v4.2d // "D + T1"
|
||||
.long 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b
|
||||
add v25.2d,v25.2d,v21.2d
|
||||
ld1 {v24.2d},[x3],#16
|
||||
ext v25.16b,v25.16b,v25.16b,#8
|
||||
ext v5.16b,v0.16b,v1.16b,#8
|
||||
ext v6.16b,v2.16b,v0.16b,#8
|
||||
add v1.2d,v1.2d,v25.2d // "T1 + H + K512[i]"
|
||||
.long 0xcec082d5 //sha512su0 v21.16b,v22.16b
|
||||
ext v7.16b,v17.16b,v18.16b,#8
|
||||
.long 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b
|
||||
.long 0xce678a95 //sha512su1 v21.16b,v20.16b,v7.16b
|
||||
add v3.2d,v2.2d,v1.2d // "D + T1"
|
||||
.long 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b
|
||||
add v24.2d,v24.2d,v22.2d
|
||||
ld1 {v25.2d},[x3],#16
|
||||
ext v24.16b,v24.16b,v24.16b,#8
|
||||
ext v5.16b,v3.16b,v0.16b,#8
|
||||
ext v6.16b,v4.16b,v3.16b,#8
|
||||
add v0.2d,v0.2d,v24.2d // "T1 + H + K512[i]"
|
||||
.long 0xcec082f6 //sha512su0 v22.16b,v23.16b
|
||||
ext v7.16b,v18.16b,v19.16b,#8
|
||||
.long 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b
|
||||
.long 0xce678ab6 //sha512su1 v22.16b,v21.16b,v7.16b
|
||||
add v2.2d,v4.2d,v0.2d // "D + T1"
|
||||
.long 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b
|
||||
add v25.2d,v25.2d,v23.2d
|
||||
ld1 {v24.2d},[x3],#16
|
||||
ext v25.16b,v25.16b,v25.16b,#8
|
||||
ext v5.16b,v2.16b,v3.16b,#8
|
||||
ext v6.16b,v1.16b,v2.16b,#8
|
||||
add v3.2d,v3.2d,v25.2d // "T1 + H + K512[i]"
|
||||
.long 0xcec08217 //sha512su0 v23.16b,v16.16b
|
||||
ext v7.16b,v19.16b,v20.16b,#8
|
||||
.long 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b
|
||||
.long 0xce678ad7 //sha512su1 v23.16b,v22.16b,v7.16b
|
||||
add v4.2d,v1.2d,v3.2d // "D + T1"
|
||||
.long 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b
|
||||
add v24.2d,v24.2d,v16.2d
|
||||
ld1 {v25.2d},[x3],#16
|
||||
ext v24.16b,v24.16b,v24.16b,#8
|
||||
ext v5.16b,v4.16b,v2.16b,#8
|
||||
ext v6.16b,v0.16b,v4.16b,#8
|
||||
add v2.2d,v2.2d,v24.2d // "T1 + H + K512[i]"
|
||||
.long 0xcec08230 //sha512su0 v16.16b,v17.16b
|
||||
ext v7.16b,v20.16b,v21.16b,#8
|
||||
.long 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b
|
||||
.long 0xce678af0 //sha512su1 v16.16b,v23.16b,v7.16b
|
||||
add v1.2d,v0.2d,v2.2d // "D + T1"
|
||||
.long 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b
|
||||
add v25.2d,v25.2d,v17.2d
|
||||
ld1 {v24.2d},[x3],#16
|
||||
ext v25.16b,v25.16b,v25.16b,#8
|
||||
ext v5.16b,v1.16b,v4.16b,#8
|
||||
ext v6.16b,v3.16b,v1.16b,#8
|
||||
add v4.2d,v4.2d,v25.2d // "T1 + H + K512[i]"
|
||||
.long 0xcec08251 //sha512su0 v17.16b,v18.16b
|
||||
ext v7.16b,v21.16b,v22.16b,#8
|
||||
.long 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b
|
||||
.long 0xce678a11 //sha512su1 v17.16b,v16.16b,v7.16b
|
||||
add v0.2d,v3.2d,v4.2d // "D + T1"
|
||||
.long 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b
|
||||
add v24.2d,v24.2d,v18.2d
|
||||
ld1 {v25.2d},[x3],#16
|
||||
ext v24.16b,v24.16b,v24.16b,#8
|
||||
ext v5.16b,v0.16b,v1.16b,#8
|
||||
ext v6.16b,v2.16b,v0.16b,#8
|
||||
add v1.2d,v1.2d,v24.2d // "T1 + H + K512[i]"
|
||||
.long 0xcec08272 //sha512su0 v18.16b,v19.16b
|
||||
ext v7.16b,v22.16b,v23.16b,#8
|
||||
.long 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b
|
||||
.long 0xce678a32 //sha512su1 v18.16b,v17.16b,v7.16b
|
||||
add v3.2d,v2.2d,v1.2d // "D + T1"
|
||||
.long 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b
|
||||
add v25.2d,v25.2d,v19.2d
|
||||
ld1 {v24.2d},[x3],#16
|
||||
ext v25.16b,v25.16b,v25.16b,#8
|
||||
ext v5.16b,v3.16b,v0.16b,#8
|
||||
ext v6.16b,v4.16b,v3.16b,#8
|
||||
add v0.2d,v0.2d,v25.2d // "T1 + H + K512[i]"
|
||||
.long 0xcec08293 //sha512su0 v19.16b,v20.16b
|
||||
ext v7.16b,v23.16b,v16.16b,#8
|
||||
.long 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b
|
||||
.long 0xce678a53 //sha512su1 v19.16b,v18.16b,v7.16b
|
||||
add v2.2d,v4.2d,v0.2d // "D + T1"
|
||||
.long 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b
|
||||
add v24.2d,v24.2d,v20.2d
|
||||
ld1 {v25.2d},[x3],#16
|
||||
ext v24.16b,v24.16b,v24.16b,#8
|
||||
ext v5.16b,v2.16b,v3.16b,#8
|
||||
ext v6.16b,v1.16b,v2.16b,#8
|
||||
add v3.2d,v3.2d,v24.2d // "T1 + H + K512[i]"
|
||||
.long 0xcec082b4 //sha512su0 v20.16b,v21.16b
|
||||
ext v7.16b,v16.16b,v17.16b,#8
|
||||
.long 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b
|
||||
.long 0xce678a74 //sha512su1 v20.16b,v19.16b,v7.16b
|
||||
add v4.2d,v1.2d,v3.2d // "D + T1"
|
||||
.long 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b
|
||||
add v25.2d,v25.2d,v21.2d
|
||||
ld1 {v24.2d},[x3],#16
|
||||
ext v25.16b,v25.16b,v25.16b,#8
|
||||
ext v5.16b,v4.16b,v2.16b,#8
|
||||
ext v6.16b,v0.16b,v4.16b,#8
|
||||
add v2.2d,v2.2d,v25.2d // "T1 + H + K512[i]"
|
||||
.long 0xcec082d5 //sha512su0 v21.16b,v22.16b
|
||||
ext v7.16b,v17.16b,v18.16b,#8
|
||||
.long 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b
|
||||
.long 0xce678a95 //sha512su1 v21.16b,v20.16b,v7.16b
|
||||
add v1.2d,v0.2d,v2.2d // "D + T1"
|
||||
.long 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b
|
||||
add v24.2d,v24.2d,v22.2d
|
||||
ld1 {v25.2d},[x3],#16
|
||||
ext v24.16b,v24.16b,v24.16b,#8
|
||||
ext v5.16b,v1.16b,v4.16b,#8
|
||||
ext v6.16b,v3.16b,v1.16b,#8
|
||||
add v4.2d,v4.2d,v24.2d // "T1 + H + K512[i]"
|
||||
.long 0xcec082f6 //sha512su0 v22.16b,v23.16b
|
||||
ext v7.16b,v18.16b,v19.16b,#8
|
||||
.long 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b
|
||||
.long 0xce678ab6 //sha512su1 v22.16b,v21.16b,v7.16b
|
||||
add v0.2d,v3.2d,v4.2d // "D + T1"
|
||||
.long 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b
|
||||
add v25.2d,v25.2d,v23.2d
|
||||
ld1 {v24.2d},[x3],#16
|
||||
ext v25.16b,v25.16b,v25.16b,#8
|
||||
ext v5.16b,v0.16b,v1.16b,#8
|
||||
ext v6.16b,v2.16b,v0.16b,#8
|
||||
add v1.2d,v1.2d,v25.2d // "T1 + H + K512[i]"
|
||||
.long 0xcec08217 //sha512su0 v23.16b,v16.16b
|
||||
ext v7.16b,v19.16b,v20.16b,#8
|
||||
.long 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b
|
||||
.long 0xce678ad7 //sha512su1 v23.16b,v22.16b,v7.16b
|
||||
add v3.2d,v2.2d,v1.2d // "D + T1"
|
||||
.long 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b
|
||||
add v24.2d,v24.2d,v16.2d
|
||||
ld1 {v25.2d},[x3],#16
|
||||
ext v24.16b,v24.16b,v24.16b,#8
|
||||
ext v5.16b,v3.16b,v0.16b,#8
|
||||
ext v6.16b,v4.16b,v3.16b,#8
|
||||
add v0.2d,v0.2d,v24.2d // "T1 + H + K512[i]"
|
||||
.long 0xcec08230 //sha512su0 v16.16b,v17.16b
|
||||
ext v7.16b,v20.16b,v21.16b,#8
|
||||
.long 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b
|
||||
.long 0xce678af0 //sha512su1 v16.16b,v23.16b,v7.16b
|
||||
add v2.2d,v4.2d,v0.2d // "D + T1"
|
||||
.long 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b
|
||||
add v25.2d,v25.2d,v17.2d
|
||||
ld1 {v24.2d},[x3],#16
|
||||
ext v25.16b,v25.16b,v25.16b,#8
|
||||
ext v5.16b,v2.16b,v3.16b,#8
|
||||
ext v6.16b,v1.16b,v2.16b,#8
|
||||
add v3.2d,v3.2d,v25.2d // "T1 + H + K512[i]"
|
||||
.long 0xcec08251 //sha512su0 v17.16b,v18.16b
|
||||
ext v7.16b,v21.16b,v22.16b,#8
|
||||
.long 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b
|
||||
.long 0xce678a11 //sha512su1 v17.16b,v16.16b,v7.16b
|
||||
add v4.2d,v1.2d,v3.2d // "D + T1"
|
||||
.long 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b
|
||||
add v24.2d,v24.2d,v18.2d
|
||||
ld1 {v25.2d},[x3],#16
|
||||
ext v24.16b,v24.16b,v24.16b,#8
|
||||
ext v5.16b,v4.16b,v2.16b,#8
|
||||
ext v6.16b,v0.16b,v4.16b,#8
|
||||
add v2.2d,v2.2d,v24.2d // "T1 + H + K512[i]"
|
||||
.long 0xcec08272 //sha512su0 v18.16b,v19.16b
|
||||
ext v7.16b,v22.16b,v23.16b,#8
|
||||
.long 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b
|
||||
.long 0xce678a32 //sha512su1 v18.16b,v17.16b,v7.16b
|
||||
add v1.2d,v0.2d,v2.2d // "D + T1"
|
||||
.long 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b
|
||||
add v25.2d,v25.2d,v19.2d
|
||||
ld1 {v24.2d},[x3],#16
|
||||
ext v25.16b,v25.16b,v25.16b,#8
|
||||
ext v5.16b,v1.16b,v4.16b,#8
|
||||
ext v6.16b,v3.16b,v1.16b,#8
|
||||
add v4.2d,v4.2d,v25.2d // "T1 + H + K512[i]"
|
||||
.long 0xcec08293 //sha512su0 v19.16b,v20.16b
|
||||
ext v7.16b,v23.16b,v16.16b,#8
|
||||
.long 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b
|
||||
.long 0xce678a53 //sha512su1 v19.16b,v18.16b,v7.16b
|
||||
add v0.2d,v3.2d,v4.2d // "D + T1"
|
||||
.long 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b
|
||||
add v24.2d,v24.2d,v20.2d
|
||||
ld1 {v25.2d},[x3],#16
|
||||
ext v24.16b,v24.16b,v24.16b,#8
|
||||
ext v5.16b,v0.16b,v1.16b,#8
|
||||
ext v6.16b,v2.16b,v0.16b,#8
|
||||
add v1.2d,v1.2d,v24.2d // "T1 + H + K512[i]"
|
||||
.long 0xcec082b4 //sha512su0 v20.16b,v21.16b
|
||||
ext v7.16b,v16.16b,v17.16b,#8
|
||||
.long 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b
|
||||
.long 0xce678a74 //sha512su1 v20.16b,v19.16b,v7.16b
|
||||
add v3.2d,v2.2d,v1.2d // "D + T1"
|
||||
.long 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b
|
||||
add v25.2d,v25.2d,v21.2d
|
||||
ld1 {v24.2d},[x3],#16
|
||||
ext v25.16b,v25.16b,v25.16b,#8
|
||||
ext v5.16b,v3.16b,v0.16b,#8
|
||||
ext v6.16b,v4.16b,v3.16b,#8
|
||||
add v0.2d,v0.2d,v25.2d // "T1 + H + K512[i]"
|
||||
.long 0xcec082d5 //sha512su0 v21.16b,v22.16b
|
||||
ext v7.16b,v17.16b,v18.16b,#8
|
||||
.long 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b
|
||||
.long 0xce678a95 //sha512su1 v21.16b,v20.16b,v7.16b
|
||||
add v2.2d,v4.2d,v0.2d // "D + T1"
|
||||
.long 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b
|
||||
add v24.2d,v24.2d,v22.2d
|
||||
ld1 {v25.2d},[x3],#16
|
||||
ext v24.16b,v24.16b,v24.16b,#8
|
||||
ext v5.16b,v2.16b,v3.16b,#8
|
||||
ext v6.16b,v1.16b,v2.16b,#8
|
||||
add v3.2d,v3.2d,v24.2d // "T1 + H + K512[i]"
|
||||
.long 0xcec082f6 //sha512su0 v22.16b,v23.16b
|
||||
ext v7.16b,v18.16b,v19.16b,#8
|
||||
.long 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b
|
||||
.long 0xce678ab6 //sha512su1 v22.16b,v21.16b,v7.16b
|
||||
add v4.2d,v1.2d,v3.2d // "D + T1"
|
||||
.long 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b
|
||||
add v25.2d,v25.2d,v23.2d
|
||||
ld1 {v24.2d},[x3],#16
|
||||
ext v25.16b,v25.16b,v25.16b,#8
|
||||
ext v5.16b,v4.16b,v2.16b,#8
|
||||
ext v6.16b,v0.16b,v4.16b,#8
|
||||
add v2.2d,v2.2d,v25.2d // "T1 + H + K512[i]"
|
||||
.long 0xcec08217 //sha512su0 v23.16b,v16.16b
|
||||
ext v7.16b,v19.16b,v20.16b,#8
|
||||
.long 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b
|
||||
.long 0xce678ad7 //sha512su1 v23.16b,v22.16b,v7.16b
|
||||
add v1.2d,v0.2d,v2.2d // "D + T1"
|
||||
.long 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b
|
||||
ld1 {v25.2d},[x3],#16
|
||||
add v24.2d,v24.2d,v16.2d
|
||||
ld1 {v16.16b},[x1],#16 // load next input
|
||||
ext v24.16b,v24.16b,v24.16b,#8
|
||||
ext v5.16b,v1.16b,v4.16b,#8
|
||||
ext v6.16b,v3.16b,v1.16b,#8
|
||||
add v4.2d,v4.2d,v24.2d // "T1 + H + K512[i]"
|
||||
.long 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b
|
||||
rev64 v16.16b,v16.16b
|
||||
add v0.2d,v3.2d,v4.2d // "D + T1"
|
||||
.long 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b
|
||||
ld1 {v24.2d},[x3],#16
|
||||
add v25.2d,v25.2d,v17.2d
|
||||
ld1 {v17.16b},[x1],#16 // load next input
|
||||
ext v25.16b,v25.16b,v25.16b,#8
|
||||
ext v5.16b,v0.16b,v1.16b,#8
|
||||
ext v6.16b,v2.16b,v0.16b,#8
|
||||
add v1.2d,v1.2d,v25.2d // "T1 + H + K512[i]"
|
||||
.long 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b
|
||||
rev64 v17.16b,v17.16b
|
||||
add v3.2d,v2.2d,v1.2d // "D + T1"
|
||||
.long 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b
|
||||
ld1 {v25.2d},[x3],#16
|
||||
add v24.2d,v24.2d,v18.2d
|
||||
ld1 {v18.16b},[x1],#16 // load next input
|
||||
ext v24.16b,v24.16b,v24.16b,#8
|
||||
ext v5.16b,v3.16b,v0.16b,#8
|
||||
ext v6.16b,v4.16b,v3.16b,#8
|
||||
add v0.2d,v0.2d,v24.2d // "T1 + H + K512[i]"
|
||||
.long 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b
|
||||
rev64 v18.16b,v18.16b
|
||||
add v2.2d,v4.2d,v0.2d // "D + T1"
|
||||
.long 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b
|
||||
ld1 {v24.2d},[x3],#16
|
||||
add v25.2d,v25.2d,v19.2d
|
||||
ld1 {v19.16b},[x1],#16 // load next input
|
||||
ext v25.16b,v25.16b,v25.16b,#8
|
||||
ext v5.16b,v2.16b,v3.16b,#8
|
||||
ext v6.16b,v1.16b,v2.16b,#8
|
||||
add v3.2d,v3.2d,v25.2d // "T1 + H + K512[i]"
|
||||
.long 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b
|
||||
rev64 v19.16b,v19.16b
|
||||
add v4.2d,v1.2d,v3.2d // "D + T1"
|
||||
.long 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b
|
||||
ld1 {v25.2d},[x3],#16
|
||||
add v24.2d,v24.2d,v20.2d
|
||||
ld1 {v20.16b},[x1],#16 // load next input
|
||||
ext v24.16b,v24.16b,v24.16b,#8
|
||||
ext v5.16b,v4.16b,v2.16b,#8
|
||||
ext v6.16b,v0.16b,v4.16b,#8
|
||||
add v2.2d,v2.2d,v24.2d // "T1 + H + K512[i]"
|
||||
.long 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b
|
||||
rev64 v20.16b,v20.16b
|
||||
add v1.2d,v0.2d,v2.2d // "D + T1"
|
||||
.long 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b
|
||||
ld1 {v24.2d},[x3],#16
|
||||
add v25.2d,v25.2d,v21.2d
|
||||
ld1 {v21.16b},[x1],#16 // load next input
|
||||
ext v25.16b,v25.16b,v25.16b,#8
|
||||
ext v5.16b,v1.16b,v4.16b,#8
|
||||
ext v6.16b,v3.16b,v1.16b,#8
|
||||
add v4.2d,v4.2d,v25.2d // "T1 + H + K512[i]"
|
||||
.long 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b
|
||||
rev64 v21.16b,v21.16b
|
||||
add v0.2d,v3.2d,v4.2d // "D + T1"
|
||||
.long 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b
|
||||
ld1 {v25.2d},[x3],#16
|
||||
add v24.2d,v24.2d,v22.2d
|
||||
ld1 {v22.16b},[x1],#16 // load next input
|
||||
ext v24.16b,v24.16b,v24.16b,#8
|
||||
ext v5.16b,v0.16b,v1.16b,#8
|
||||
ext v6.16b,v2.16b,v0.16b,#8
|
||||
add v1.2d,v1.2d,v24.2d // "T1 + H + K512[i]"
|
||||
.long 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b
|
||||
rev64 v22.16b,v22.16b
|
||||
add v3.2d,v2.2d,v1.2d // "D + T1"
|
||||
.long 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b
|
||||
sub x3,x3,#80*8 // rewind
|
||||
add v25.2d,v25.2d,v23.2d
|
||||
ld1 {v23.16b},[x1],#16 // load next input
|
||||
ext v25.16b,v25.16b,v25.16b,#8
|
||||
ext v5.16b,v3.16b,v0.16b,#8
|
||||
ext v6.16b,v4.16b,v3.16b,#8
|
||||
add v0.2d,v0.2d,v25.2d // "T1 + H + K512[i]"
|
||||
.long 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b
|
||||
rev64 v23.16b,v23.16b
|
||||
add v2.2d,v4.2d,v0.2d // "D + T1"
|
||||
.long 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b
|
||||
add v0.2d,v0.2d,v26.2d // accumulate
|
||||
add v1.2d,v1.2d,v27.2d
|
||||
add v2.2d,v2.2d,v28.2d
|
||||
add v3.2d,v3.2d,v29.2d
|
||||
|
||||
cbnz x2,Loop_hw
|
||||
|
||||
st1 {v0.2d,v1.2d,v2.2d,v3.2d},[x0] // store context
|
||||
|
||||
ldr x29,[sp],#16
|
||||
ret
|
||||
|
||||
#endif
|
||||
#endif // !OPENSSL_NO_ASM
|
||||
#endif // !OPENSSL_NO_ASM && defined(OPENSSL_AARCH64) && defined(__APPLE__)
|
||||
@ -1,17 +1,11 @@
|
||||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#if !defined(__has_feature)
|
||||
#define __has_feature(x) 0
|
||||
#endif
|
||||
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
|
||||
#define OPENSSL_NO_ASM
|
||||
#endif
|
||||
#include <openssl/asm_base.h>
|
||||
|
||||
#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_AARCH64) && defined(__APPLE__)
|
||||
#include <openssl/arm_arch.h>
|
||||
|
||||
#if !defined(OPENSSL_NO_ASM)
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
.section __TEXT,__const
|
||||
|
||||
|
||||
@ -214,6 +208,7 @@ Lenc_entry:
|
||||
|
||||
.align 4
|
||||
_vpaes_encrypt:
|
||||
AARCH64_SIGN_LINK_REGISTER
|
||||
stp x29,x30,[sp,#-16]!
|
||||
add x29,sp,#0
|
||||
|
||||
@ -223,6 +218,7 @@ _vpaes_encrypt:
|
||||
st1 {v0.16b}, [x1]
|
||||
|
||||
ldp x29,x30,[sp],#16
|
||||
AARCH64_VALIDATE_LINK_REGISTER
|
||||
ret
|
||||
|
||||
|
||||
@ -451,6 +447,7 @@ Ldec_entry:
|
||||
|
||||
.align 4
|
||||
_vpaes_decrypt:
|
||||
AARCH64_SIGN_LINK_REGISTER
|
||||
stp x29,x30,[sp,#-16]!
|
||||
add x29,sp,#0
|
||||
|
||||
@ -460,6 +457,7 @@ _vpaes_decrypt:
|
||||
st1 {v0.16b}, [x1]
|
||||
|
||||
ldp x29,x30,[sp],#16
|
||||
AARCH64_VALIDATE_LINK_REGISTER
|
||||
ret
|
||||
|
||||
|
||||
@ -629,6 +627,7 @@ _vpaes_key_preheat:
|
||||
|
||||
.align 4
|
||||
_vpaes_schedule_core:
|
||||
AARCH64_SIGN_LINK_REGISTER
|
||||
stp x29, x30, [sp,#-16]!
|
||||
add x29,sp,#0
|
||||
|
||||
@ -798,6 +797,7 @@ Lschedule_mangle_last_dec:
|
||||
eor v6.16b, v6.16b, v6.16b // vpxor %xmm6, %xmm6, %xmm6
|
||||
eor v7.16b, v7.16b, v7.16b // vpxor %xmm7, %xmm7, %xmm7
|
||||
ldp x29, x30, [sp],#16
|
||||
AARCH64_VALIDATE_LINK_REGISTER
|
||||
ret
|
||||
|
||||
|
||||
@ -1000,7 +1000,7 @@ Lschedule_mangle_dec:
|
||||
|
||||
Lschedule_mangle_both:
|
||||
tbl v3.16b, {v3.16b}, v1.16b // vpshufb %xmm1, %xmm3, %xmm3
|
||||
add x8, x8, #64-16 // add $-16, %r8
|
||||
add x8, x8, #48 // add $-16, %r8
|
||||
and x8, x8, #~(1<<6) // and $0x30, %r8
|
||||
st1 {v3.2d}, [x2] // vmovdqu %xmm3, (%rdx)
|
||||
ret
|
||||
@ -1011,6 +1011,7 @@ Lschedule_mangle_both:
|
||||
|
||||
.align 4
|
||||
_vpaes_set_encrypt_key:
|
||||
AARCH64_SIGN_LINK_REGISTER
|
||||
stp x29,x30,[sp,#-16]!
|
||||
add x29,sp,#0
|
||||
stp d8,d9,[sp,#-16]! // ABI spec says so
|
||||
@ -1026,6 +1027,7 @@ _vpaes_set_encrypt_key:
|
||||
|
||||
ldp d8,d9,[sp],#16
|
||||
ldp x29,x30,[sp],#16
|
||||
AARCH64_VALIDATE_LINK_REGISTER
|
||||
ret
|
||||
|
||||
|
||||
@ -1034,6 +1036,7 @@ _vpaes_set_encrypt_key:
|
||||
|
||||
.align 4
|
||||
_vpaes_set_decrypt_key:
|
||||
AARCH64_SIGN_LINK_REGISTER
|
||||
stp x29,x30,[sp,#-16]!
|
||||
add x29,sp,#0
|
||||
stp d8,d9,[sp,#-16]! // ABI spec says so
|
||||
@ -1053,6 +1056,7 @@ _vpaes_set_decrypt_key:
|
||||
|
||||
ldp d8,d9,[sp],#16
|
||||
ldp x29,x30,[sp],#16
|
||||
AARCH64_VALIDATE_LINK_REGISTER
|
||||
ret
|
||||
|
||||
.globl _vpaes_cbc_encrypt
|
||||
@ -1060,6 +1064,7 @@ _vpaes_set_decrypt_key:
|
||||
|
||||
.align 4
|
||||
_vpaes_cbc_encrypt:
|
||||
AARCH64_SIGN_LINK_REGISTER
|
||||
cbz x2, Lcbc_abort
|
||||
cmp w5, #0 // check direction
|
||||
b.eq vpaes_cbc_decrypt
|
||||
@ -1087,12 +1092,15 @@ Lcbc_enc_loop:
|
||||
|
||||
ldp x29,x30,[sp],#16
|
||||
Lcbc_abort:
|
||||
AARCH64_VALIDATE_LINK_REGISTER
|
||||
ret
|
||||
|
||||
|
||||
|
||||
.align 4
|
||||
vpaes_cbc_decrypt:
|
||||
// Not adding AARCH64_SIGN_LINK_REGISTER here because vpaes_cbc_decrypt is jumped to
|
||||
// only from vpaes_cbc_encrypt which has already signed the return address.
|
||||
stp x29,x30,[sp,#-16]!
|
||||
add x29,sp,#0
|
||||
stp d8,d9,[sp,#-16]! // ABI spec says so
|
||||
@ -1134,6 +1142,7 @@ Lcbc_dec_done:
|
||||
ldp d10,d11,[sp],#16
|
||||
ldp d8,d9,[sp],#16
|
||||
ldp x29,x30,[sp],#16
|
||||
AARCH64_VALIDATE_LINK_REGISTER
|
||||
ret
|
||||
|
||||
.globl _vpaes_ctr32_encrypt_blocks
|
||||
@ -1141,6 +1150,7 @@ Lcbc_dec_done:
|
||||
|
||||
.align 4
|
||||
_vpaes_ctr32_encrypt_blocks:
|
||||
AARCH64_SIGN_LINK_REGISTER
|
||||
stp x29,x30,[sp,#-16]!
|
||||
add x29,sp,#0
|
||||
stp d8,d9,[sp,#-16]! // ABI spec says so
|
||||
@ -1208,6 +1218,7 @@ Lctr32_done:
|
||||
ldp d10,d11,[sp],#16
|
||||
ldp d8,d9,[sp],#16
|
||||
ldp x29,x30,[sp],#16
|
||||
AARCH64_VALIDATE_LINK_REGISTER
|
||||
ret
|
||||
|
||||
#endif // !OPENSSL_NO_ASM
|
||||
#endif // !OPENSSL_NO_ASM && defined(OPENSSL_AARCH64) && defined(__APPLE__)
|
||||
@ -1,17 +1,11 @@
|
||||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#if !defined(__has_feature)
|
||||
#define __has_feature(x) 0
|
||||
#endif
|
||||
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
|
||||
#define OPENSSL_NO_ASM
|
||||
#endif
|
||||
#include <openssl/asm_base.h>
|
||||
|
||||
#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_AARCH64) && defined(__APPLE__)
|
||||
#include <openssl/arm_arch.h>
|
||||
|
||||
#if !defined(OPENSSL_NO_ASM)
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
.text
|
||||
|
||||
// abi_test_trampoline loads callee-saved registers from |state|, calls |func|
|
||||
@ -25,6 +19,8 @@
|
||||
.private_extern _abi_test_trampoline
|
||||
.align 4
|
||||
_abi_test_trampoline:
|
||||
Labi_test_trampoline_begin:
|
||||
AARCH64_SIGN_LINK_REGISTER
|
||||
// Stack layout (low to high addresses)
|
||||
// x29,x30 (16 bytes)
|
||||
// d8-d15 (64 bytes)
|
||||
@ -127,6 +123,7 @@ Lx29_ok:
|
||||
ldp x27, x28, [sp, #144]
|
||||
|
||||
ldp x29, x30, [sp], #176
|
||||
AARCH64_VALIDATE_LINK_REGISTER
|
||||
ret
|
||||
|
||||
|
||||
@ -134,6 +131,7 @@ Lx29_ok:
|
||||
.private_extern _abi_test_clobber_x0
|
||||
.align 4
|
||||
_abi_test_clobber_x0:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x0, xzr
|
||||
ret
|
||||
|
||||
@ -142,6 +140,7 @@ _abi_test_clobber_x0:
|
||||
.private_extern _abi_test_clobber_x1
|
||||
.align 4
|
||||
_abi_test_clobber_x1:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x1, xzr
|
||||
ret
|
||||
|
||||
@ -150,6 +149,7 @@ _abi_test_clobber_x1:
|
||||
.private_extern _abi_test_clobber_x2
|
||||
.align 4
|
||||
_abi_test_clobber_x2:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x2, xzr
|
||||
ret
|
||||
|
||||
@ -158,6 +158,7 @@ _abi_test_clobber_x2:
|
||||
.private_extern _abi_test_clobber_x3
|
||||
.align 4
|
||||
_abi_test_clobber_x3:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x3, xzr
|
||||
ret
|
||||
|
||||
@ -166,6 +167,7 @@ _abi_test_clobber_x3:
|
||||
.private_extern _abi_test_clobber_x4
|
||||
.align 4
|
||||
_abi_test_clobber_x4:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x4, xzr
|
||||
ret
|
||||
|
||||
@ -174,6 +176,7 @@ _abi_test_clobber_x4:
|
||||
.private_extern _abi_test_clobber_x5
|
||||
.align 4
|
||||
_abi_test_clobber_x5:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x5, xzr
|
||||
ret
|
||||
|
||||
@ -182,6 +185,7 @@ _abi_test_clobber_x5:
|
||||
.private_extern _abi_test_clobber_x6
|
||||
.align 4
|
||||
_abi_test_clobber_x6:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x6, xzr
|
||||
ret
|
||||
|
||||
@ -190,6 +194,7 @@ _abi_test_clobber_x6:
|
||||
.private_extern _abi_test_clobber_x7
|
||||
.align 4
|
||||
_abi_test_clobber_x7:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x7, xzr
|
||||
ret
|
||||
|
||||
@ -198,6 +203,7 @@ _abi_test_clobber_x7:
|
||||
.private_extern _abi_test_clobber_x8
|
||||
.align 4
|
||||
_abi_test_clobber_x8:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x8, xzr
|
||||
ret
|
||||
|
||||
@ -206,6 +212,7 @@ _abi_test_clobber_x8:
|
||||
.private_extern _abi_test_clobber_x9
|
||||
.align 4
|
||||
_abi_test_clobber_x9:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x9, xzr
|
||||
ret
|
||||
|
||||
@ -214,6 +221,7 @@ _abi_test_clobber_x9:
|
||||
.private_extern _abi_test_clobber_x10
|
||||
.align 4
|
||||
_abi_test_clobber_x10:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x10, xzr
|
||||
ret
|
||||
|
||||
@ -222,6 +230,7 @@ _abi_test_clobber_x10:
|
||||
.private_extern _abi_test_clobber_x11
|
||||
.align 4
|
||||
_abi_test_clobber_x11:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x11, xzr
|
||||
ret
|
||||
|
||||
@ -230,6 +239,7 @@ _abi_test_clobber_x11:
|
||||
.private_extern _abi_test_clobber_x12
|
||||
.align 4
|
||||
_abi_test_clobber_x12:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x12, xzr
|
||||
ret
|
||||
|
||||
@ -238,6 +248,7 @@ _abi_test_clobber_x12:
|
||||
.private_extern _abi_test_clobber_x13
|
||||
.align 4
|
||||
_abi_test_clobber_x13:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x13, xzr
|
||||
ret
|
||||
|
||||
@ -246,6 +257,7 @@ _abi_test_clobber_x13:
|
||||
.private_extern _abi_test_clobber_x14
|
||||
.align 4
|
||||
_abi_test_clobber_x14:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x14, xzr
|
||||
ret
|
||||
|
||||
@ -254,6 +266,7 @@ _abi_test_clobber_x14:
|
||||
.private_extern _abi_test_clobber_x15
|
||||
.align 4
|
||||
_abi_test_clobber_x15:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x15, xzr
|
||||
ret
|
||||
|
||||
@ -262,6 +275,7 @@ _abi_test_clobber_x15:
|
||||
.private_extern _abi_test_clobber_x16
|
||||
.align 4
|
||||
_abi_test_clobber_x16:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x16, xzr
|
||||
ret
|
||||
|
||||
@ -270,6 +284,7 @@ _abi_test_clobber_x16:
|
||||
.private_extern _abi_test_clobber_x17
|
||||
.align 4
|
||||
_abi_test_clobber_x17:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x17, xzr
|
||||
ret
|
||||
|
||||
@ -278,6 +293,7 @@ _abi_test_clobber_x17:
|
||||
.private_extern _abi_test_clobber_x19
|
||||
.align 4
|
||||
_abi_test_clobber_x19:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x19, xzr
|
||||
ret
|
||||
|
||||
@ -286,6 +302,7 @@ _abi_test_clobber_x19:
|
||||
.private_extern _abi_test_clobber_x20
|
||||
.align 4
|
||||
_abi_test_clobber_x20:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x20, xzr
|
||||
ret
|
||||
|
||||
@ -294,6 +311,7 @@ _abi_test_clobber_x20:
|
||||
.private_extern _abi_test_clobber_x21
|
||||
.align 4
|
||||
_abi_test_clobber_x21:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x21, xzr
|
||||
ret
|
||||
|
||||
@ -302,6 +320,7 @@ _abi_test_clobber_x21:
|
||||
.private_extern _abi_test_clobber_x22
|
||||
.align 4
|
||||
_abi_test_clobber_x22:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x22, xzr
|
||||
ret
|
||||
|
||||
@ -310,6 +329,7 @@ _abi_test_clobber_x22:
|
||||
.private_extern _abi_test_clobber_x23
|
||||
.align 4
|
||||
_abi_test_clobber_x23:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x23, xzr
|
||||
ret
|
||||
|
||||
@ -318,6 +338,7 @@ _abi_test_clobber_x23:
|
||||
.private_extern _abi_test_clobber_x24
|
||||
.align 4
|
||||
_abi_test_clobber_x24:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x24, xzr
|
||||
ret
|
||||
|
||||
@ -326,6 +347,7 @@ _abi_test_clobber_x24:
|
||||
.private_extern _abi_test_clobber_x25
|
||||
.align 4
|
||||
_abi_test_clobber_x25:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x25, xzr
|
||||
ret
|
||||
|
||||
@ -334,6 +356,7 @@ _abi_test_clobber_x25:
|
||||
.private_extern _abi_test_clobber_x26
|
||||
.align 4
|
||||
_abi_test_clobber_x26:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x26, xzr
|
||||
ret
|
||||
|
||||
@ -342,6 +365,7 @@ _abi_test_clobber_x26:
|
||||
.private_extern _abi_test_clobber_x27
|
||||
.align 4
|
||||
_abi_test_clobber_x27:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x27, xzr
|
||||
ret
|
||||
|
||||
@ -350,6 +374,7 @@ _abi_test_clobber_x27:
|
||||
.private_extern _abi_test_clobber_x28
|
||||
.align 4
|
||||
_abi_test_clobber_x28:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x28, xzr
|
||||
ret
|
||||
|
||||
@ -358,6 +383,7 @@ _abi_test_clobber_x28:
|
||||
.private_extern _abi_test_clobber_x29
|
||||
.align 4
|
||||
_abi_test_clobber_x29:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x29, xzr
|
||||
ret
|
||||
|
||||
@ -366,6 +392,7 @@ _abi_test_clobber_x29:
|
||||
.private_extern _abi_test_clobber_d0
|
||||
.align 4
|
||||
_abi_test_clobber_d0:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d0, xzr
|
||||
ret
|
||||
|
||||
@ -374,6 +401,7 @@ _abi_test_clobber_d0:
|
||||
.private_extern _abi_test_clobber_d1
|
||||
.align 4
|
||||
_abi_test_clobber_d1:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d1, xzr
|
||||
ret
|
||||
|
||||
@ -382,6 +410,7 @@ _abi_test_clobber_d1:
|
||||
.private_extern _abi_test_clobber_d2
|
||||
.align 4
|
||||
_abi_test_clobber_d2:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d2, xzr
|
||||
ret
|
||||
|
||||
@ -390,6 +419,7 @@ _abi_test_clobber_d2:
|
||||
.private_extern _abi_test_clobber_d3
|
||||
.align 4
|
||||
_abi_test_clobber_d3:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d3, xzr
|
||||
ret
|
||||
|
||||
@ -398,6 +428,7 @@ _abi_test_clobber_d3:
|
||||
.private_extern _abi_test_clobber_d4
|
||||
.align 4
|
||||
_abi_test_clobber_d4:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d4, xzr
|
||||
ret
|
||||
|
||||
@ -406,6 +437,7 @@ _abi_test_clobber_d4:
|
||||
.private_extern _abi_test_clobber_d5
|
||||
.align 4
|
||||
_abi_test_clobber_d5:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d5, xzr
|
||||
ret
|
||||
|
||||
@ -414,6 +446,7 @@ _abi_test_clobber_d5:
|
||||
.private_extern _abi_test_clobber_d6
|
||||
.align 4
|
||||
_abi_test_clobber_d6:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d6, xzr
|
||||
ret
|
||||
|
||||
@ -422,6 +455,7 @@ _abi_test_clobber_d6:
|
||||
.private_extern _abi_test_clobber_d7
|
||||
.align 4
|
||||
_abi_test_clobber_d7:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d7, xzr
|
||||
ret
|
||||
|
||||
@ -430,6 +464,7 @@ _abi_test_clobber_d7:
|
||||
.private_extern _abi_test_clobber_d8
|
||||
.align 4
|
||||
_abi_test_clobber_d8:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d8, xzr
|
||||
ret
|
||||
|
||||
@ -438,6 +473,7 @@ _abi_test_clobber_d8:
|
||||
.private_extern _abi_test_clobber_d9
|
||||
.align 4
|
||||
_abi_test_clobber_d9:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d9, xzr
|
||||
ret
|
||||
|
||||
@ -446,6 +482,7 @@ _abi_test_clobber_d9:
|
||||
.private_extern _abi_test_clobber_d10
|
||||
.align 4
|
||||
_abi_test_clobber_d10:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d10, xzr
|
||||
ret
|
||||
|
||||
@ -454,6 +491,7 @@ _abi_test_clobber_d10:
|
||||
.private_extern _abi_test_clobber_d11
|
||||
.align 4
|
||||
_abi_test_clobber_d11:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d11, xzr
|
||||
ret
|
||||
|
||||
@ -462,6 +500,7 @@ _abi_test_clobber_d11:
|
||||
.private_extern _abi_test_clobber_d12
|
||||
.align 4
|
||||
_abi_test_clobber_d12:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d12, xzr
|
||||
ret
|
||||
|
||||
@ -470,6 +509,7 @@ _abi_test_clobber_d12:
|
||||
.private_extern _abi_test_clobber_d13
|
||||
.align 4
|
||||
_abi_test_clobber_d13:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d13, xzr
|
||||
ret
|
||||
|
||||
@ -478,6 +518,7 @@ _abi_test_clobber_d13:
|
||||
.private_extern _abi_test_clobber_d14
|
||||
.align 4
|
||||
_abi_test_clobber_d14:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d14, xzr
|
||||
ret
|
||||
|
||||
@ -486,6 +527,7 @@ _abi_test_clobber_d14:
|
||||
.private_extern _abi_test_clobber_d15
|
||||
.align 4
|
||||
_abi_test_clobber_d15:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d15, xzr
|
||||
ret
|
||||
|
||||
@ -494,6 +536,7 @@ _abi_test_clobber_d15:
|
||||
.private_extern _abi_test_clobber_d16
|
||||
.align 4
|
||||
_abi_test_clobber_d16:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d16, xzr
|
||||
ret
|
||||
|
||||
@ -502,6 +545,7 @@ _abi_test_clobber_d16:
|
||||
.private_extern _abi_test_clobber_d17
|
||||
.align 4
|
||||
_abi_test_clobber_d17:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d17, xzr
|
||||
ret
|
||||
|
||||
@ -510,6 +554,7 @@ _abi_test_clobber_d17:
|
||||
.private_extern _abi_test_clobber_d18
|
||||
.align 4
|
||||
_abi_test_clobber_d18:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d18, xzr
|
||||
ret
|
||||
|
||||
@ -518,6 +563,7 @@ _abi_test_clobber_d18:
|
||||
.private_extern _abi_test_clobber_d19
|
||||
.align 4
|
||||
_abi_test_clobber_d19:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d19, xzr
|
||||
ret
|
||||
|
||||
@ -526,6 +572,7 @@ _abi_test_clobber_d19:
|
||||
.private_extern _abi_test_clobber_d20
|
||||
.align 4
|
||||
_abi_test_clobber_d20:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d20, xzr
|
||||
ret
|
||||
|
||||
@ -534,6 +581,7 @@ _abi_test_clobber_d20:
|
||||
.private_extern _abi_test_clobber_d21
|
||||
.align 4
|
||||
_abi_test_clobber_d21:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d21, xzr
|
||||
ret
|
||||
|
||||
@ -542,6 +590,7 @@ _abi_test_clobber_d21:
|
||||
.private_extern _abi_test_clobber_d22
|
||||
.align 4
|
||||
_abi_test_clobber_d22:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d22, xzr
|
||||
ret
|
||||
|
||||
@ -550,6 +599,7 @@ _abi_test_clobber_d22:
|
||||
.private_extern _abi_test_clobber_d23
|
||||
.align 4
|
||||
_abi_test_clobber_d23:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d23, xzr
|
||||
ret
|
||||
|
||||
@ -558,6 +608,7 @@ _abi_test_clobber_d23:
|
||||
.private_extern _abi_test_clobber_d24
|
||||
.align 4
|
||||
_abi_test_clobber_d24:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d24, xzr
|
||||
ret
|
||||
|
||||
@ -566,6 +617,7 @@ _abi_test_clobber_d24:
|
||||
.private_extern _abi_test_clobber_d25
|
||||
.align 4
|
||||
_abi_test_clobber_d25:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d25, xzr
|
||||
ret
|
||||
|
||||
@ -574,6 +626,7 @@ _abi_test_clobber_d25:
|
||||
.private_extern _abi_test_clobber_d26
|
||||
.align 4
|
||||
_abi_test_clobber_d26:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d26, xzr
|
||||
ret
|
||||
|
||||
@ -582,6 +635,7 @@ _abi_test_clobber_d26:
|
||||
.private_extern _abi_test_clobber_d27
|
||||
.align 4
|
||||
_abi_test_clobber_d27:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d27, xzr
|
||||
ret
|
||||
|
||||
@ -590,6 +644,7 @@ _abi_test_clobber_d27:
|
||||
.private_extern _abi_test_clobber_d28
|
||||
.align 4
|
||||
_abi_test_clobber_d28:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d28, xzr
|
||||
ret
|
||||
|
||||
@ -598,6 +653,7 @@ _abi_test_clobber_d28:
|
||||
.private_extern _abi_test_clobber_d29
|
||||
.align 4
|
||||
_abi_test_clobber_d29:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d29, xzr
|
||||
ret
|
||||
|
||||
@ -606,6 +662,7 @@ _abi_test_clobber_d29:
|
||||
.private_extern _abi_test_clobber_d30
|
||||
.align 4
|
||||
_abi_test_clobber_d30:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d30, xzr
|
||||
ret
|
||||
|
||||
@ -614,6 +671,7 @@ _abi_test_clobber_d30:
|
||||
.private_extern _abi_test_clobber_d31
|
||||
.align 4
|
||||
_abi_test_clobber_d31:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d31, xzr
|
||||
ret
|
||||
|
||||
@ -622,6 +680,7 @@ _abi_test_clobber_d31:
|
||||
.private_extern _abi_test_clobber_v8_upper
|
||||
.align 4
|
||||
_abi_test_clobber_v8_upper:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov v8.d[1], xzr
|
||||
ret
|
||||
|
||||
@ -630,6 +689,7 @@ _abi_test_clobber_v8_upper:
|
||||
.private_extern _abi_test_clobber_v9_upper
|
||||
.align 4
|
||||
_abi_test_clobber_v9_upper:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov v9.d[1], xzr
|
||||
ret
|
||||
|
||||
@ -638,6 +698,7 @@ _abi_test_clobber_v9_upper:
|
||||
.private_extern _abi_test_clobber_v10_upper
|
||||
.align 4
|
||||
_abi_test_clobber_v10_upper:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov v10.d[1], xzr
|
||||
ret
|
||||
|
||||
@ -646,6 +707,7 @@ _abi_test_clobber_v10_upper:
|
||||
.private_extern _abi_test_clobber_v11_upper
|
||||
.align 4
|
||||
_abi_test_clobber_v11_upper:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov v11.d[1], xzr
|
||||
ret
|
||||
|
||||
@ -654,6 +716,7 @@ _abi_test_clobber_v11_upper:
|
||||
.private_extern _abi_test_clobber_v12_upper
|
||||
.align 4
|
||||
_abi_test_clobber_v12_upper:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov v12.d[1], xzr
|
||||
ret
|
||||
|
||||
@ -662,6 +725,7 @@ _abi_test_clobber_v12_upper:
|
||||
.private_extern _abi_test_clobber_v13_upper
|
||||
.align 4
|
||||
_abi_test_clobber_v13_upper:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov v13.d[1], xzr
|
||||
ret
|
||||
|
||||
@ -670,6 +734,7 @@ _abi_test_clobber_v13_upper:
|
||||
.private_extern _abi_test_clobber_v14_upper
|
||||
.align 4
|
||||
_abi_test_clobber_v14_upper:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov v14.d[1], xzr
|
||||
ret
|
||||
|
||||
@ -678,7 +743,8 @@ _abi_test_clobber_v14_upper:
|
||||
.private_extern _abi_test_clobber_v15_upper
|
||||
.align 4
|
||||
_abi_test_clobber_v15_upper:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov v15.d[1], xzr
|
||||
ret
|
||||
|
||||
#endif // !OPENSSL_NO_ASM
|
||||
#endif // !OPENSSL_NO_ASM && defined(OPENSSL_AARCH64) && defined(__APPLE__)
|
||||
957
third-party/boringssl/apple-x86/crypto/chacha/chacha-x86-apple.S
vendored
Normal file
957
third-party/boringssl/apple-x86/crypto/chacha/chacha-x86-apple.S
vendored
Normal file
@ -0,0 +1,957 @@
|
||||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#include <openssl/asm_base.h>
|
||||
|
||||
#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) && defined(__APPLE__)
|
||||
.text
|
||||
.globl _ChaCha20_ctr32_nohw
|
||||
.private_extern _ChaCha20_ctr32_nohw
|
||||
.align 4
|
||||
_ChaCha20_ctr32_nohw:
|
||||
L_ChaCha20_ctr32_nohw_begin:
|
||||
pushl %ebp
|
||||
pushl %ebx
|
||||
pushl %esi
|
||||
pushl %edi
|
||||
movl 32(%esp),%esi
|
||||
movl 36(%esp),%edi
|
||||
subl $132,%esp
|
||||
movl (%esi),%eax
|
||||
movl 4(%esi),%ebx
|
||||
movl 8(%esi),%ecx
|
||||
movl 12(%esi),%edx
|
||||
movl %eax,80(%esp)
|
||||
movl %ebx,84(%esp)
|
||||
movl %ecx,88(%esp)
|
||||
movl %edx,92(%esp)
|
||||
movl 16(%esi),%eax
|
||||
movl 20(%esi),%ebx
|
||||
movl 24(%esi),%ecx
|
||||
movl 28(%esi),%edx
|
||||
movl %eax,96(%esp)
|
||||
movl %ebx,100(%esp)
|
||||
movl %ecx,104(%esp)
|
||||
movl %edx,108(%esp)
|
||||
movl (%edi),%eax
|
||||
movl 4(%edi),%ebx
|
||||
movl 8(%edi),%ecx
|
||||
movl 12(%edi),%edx
|
||||
subl $1,%eax
|
||||
movl %eax,112(%esp)
|
||||
movl %ebx,116(%esp)
|
||||
movl %ecx,120(%esp)
|
||||
movl %edx,124(%esp)
|
||||
jmp L000entry
|
||||
.align 4,0x90
|
||||
L001outer_loop:
|
||||
movl %ebx,156(%esp)
|
||||
movl %eax,152(%esp)
|
||||
movl %ecx,160(%esp)
|
||||
L000entry:
|
||||
movl $1634760805,%eax
|
||||
movl $857760878,4(%esp)
|
||||
movl $2036477234,8(%esp)
|
||||
movl $1797285236,12(%esp)
|
||||
movl 84(%esp),%ebx
|
||||
movl 88(%esp),%ebp
|
||||
movl 104(%esp),%ecx
|
||||
movl 108(%esp),%esi
|
||||
movl 116(%esp),%edx
|
||||
movl 120(%esp),%edi
|
||||
movl %ebx,20(%esp)
|
||||
movl %ebp,24(%esp)
|
||||
movl %ecx,40(%esp)
|
||||
movl %esi,44(%esp)
|
||||
movl %edx,52(%esp)
|
||||
movl %edi,56(%esp)
|
||||
movl 92(%esp),%ebx
|
||||
movl 124(%esp),%edi
|
||||
movl 112(%esp),%edx
|
||||
movl 80(%esp),%ebp
|
||||
movl 96(%esp),%ecx
|
||||
movl 100(%esp),%esi
|
||||
addl $1,%edx
|
||||
movl %ebx,28(%esp)
|
||||
movl %edi,60(%esp)
|
||||
movl %edx,112(%esp)
|
||||
movl $10,%ebx
|
||||
jmp L002loop
|
||||
.align 4,0x90
|
||||
L002loop:
|
||||
addl %ebp,%eax
|
||||
movl %ebx,128(%esp)
|
||||
movl %ebp,%ebx
|
||||
xorl %eax,%edx
|
||||
roll $16,%edx
|
||||
addl %edx,%ecx
|
||||
xorl %ecx,%ebx
|
||||
movl 52(%esp),%edi
|
||||
roll $12,%ebx
|
||||
movl 20(%esp),%ebp
|
||||
addl %ebx,%eax
|
||||
xorl %eax,%edx
|
||||
movl %eax,(%esp)
|
||||
roll $8,%edx
|
||||
movl 4(%esp),%eax
|
||||
addl %edx,%ecx
|
||||
movl %edx,48(%esp)
|
||||
xorl %ecx,%ebx
|
||||
addl %ebp,%eax
|
||||
roll $7,%ebx
|
||||
xorl %eax,%edi
|
||||
movl %ecx,32(%esp)
|
||||
roll $16,%edi
|
||||
movl %ebx,16(%esp)
|
||||
addl %edi,%esi
|
||||
movl 40(%esp),%ecx
|
||||
xorl %esi,%ebp
|
||||
movl 56(%esp),%edx
|
||||
roll $12,%ebp
|
||||
movl 24(%esp),%ebx
|
||||
addl %ebp,%eax
|
||||
xorl %eax,%edi
|
||||
movl %eax,4(%esp)
|
||||
roll $8,%edi
|
||||
movl 8(%esp),%eax
|
||||
addl %edi,%esi
|
||||
movl %edi,52(%esp)
|
||||
xorl %esi,%ebp
|
||||
addl %ebx,%eax
|
||||
roll $7,%ebp
|
||||
xorl %eax,%edx
|
||||
movl %esi,36(%esp)
|
||||
roll $16,%edx
|
||||
movl %ebp,20(%esp)
|
||||
addl %edx,%ecx
|
||||
movl 44(%esp),%esi
|
||||
xorl %ecx,%ebx
|
||||
movl 60(%esp),%edi
|
||||
roll $12,%ebx
|
||||
movl 28(%esp),%ebp
|
||||
addl %ebx,%eax
|
||||
xorl %eax,%edx
|
||||
movl %eax,8(%esp)
|
||||
roll $8,%edx
|
||||
movl 12(%esp),%eax
|
||||
addl %edx,%ecx
|
||||
movl %edx,56(%esp)
|
||||
xorl %ecx,%ebx
|
||||
addl %ebp,%eax
|
||||
roll $7,%ebx
|
||||
xorl %eax,%edi
|
||||
roll $16,%edi
|
||||
movl %ebx,24(%esp)
|
||||
addl %edi,%esi
|
||||
xorl %esi,%ebp
|
||||
roll $12,%ebp
|
||||
movl 20(%esp),%ebx
|
||||
addl %ebp,%eax
|
||||
xorl %eax,%edi
|
||||
movl %eax,12(%esp)
|
||||
roll $8,%edi
|
||||
movl (%esp),%eax
|
||||
addl %edi,%esi
|
||||
movl %edi,%edx
|
||||
xorl %esi,%ebp
|
||||
addl %ebx,%eax
|
||||
roll $7,%ebp
|
||||
xorl %eax,%edx
|
||||
roll $16,%edx
|
||||
movl %ebp,28(%esp)
|
||||
addl %edx,%ecx
|
||||
xorl %ecx,%ebx
|
||||
movl 48(%esp),%edi
|
||||
roll $12,%ebx
|
||||
movl 24(%esp),%ebp
|
||||
addl %ebx,%eax
|
||||
xorl %eax,%edx
|
||||
movl %eax,(%esp)
|
||||
roll $8,%edx
|
||||
movl 4(%esp),%eax
|
||||
addl %edx,%ecx
|
||||
movl %edx,60(%esp)
|
||||
xorl %ecx,%ebx
|
||||
addl %ebp,%eax
|
||||
roll $7,%ebx
|
||||
xorl %eax,%edi
|
||||
movl %ecx,40(%esp)
|
||||
roll $16,%edi
|
||||
movl %ebx,20(%esp)
|
||||
addl %edi,%esi
|
||||
movl 32(%esp),%ecx
|
||||
xorl %esi,%ebp
|
||||
movl 52(%esp),%edx
|
||||
roll $12,%ebp
|
||||
movl 28(%esp),%ebx
|
||||
addl %ebp,%eax
|
||||
xorl %eax,%edi
|
||||
movl %eax,4(%esp)
|
||||
roll $8,%edi
|
||||
movl 8(%esp),%eax
|
||||
addl %edi,%esi
|
||||
movl %edi,48(%esp)
|
||||
xorl %esi,%ebp
|
||||
addl %ebx,%eax
|
||||
roll $7,%ebp
|
||||
xorl %eax,%edx
|
||||
movl %esi,44(%esp)
|
||||
roll $16,%edx
|
||||
movl %ebp,24(%esp)
|
||||
addl %edx,%ecx
|
||||
movl 36(%esp),%esi
|
||||
xorl %ecx,%ebx
|
||||
movl 56(%esp),%edi
|
||||
roll $12,%ebx
|
||||
movl 16(%esp),%ebp
|
||||
addl %ebx,%eax
|
||||
xorl %eax,%edx
|
||||
movl %eax,8(%esp)
|
||||
roll $8,%edx
|
||||
movl 12(%esp),%eax
|
||||
addl %edx,%ecx
|
||||
movl %edx,52(%esp)
|
||||
xorl %ecx,%ebx
|
||||
addl %ebp,%eax
|
||||
roll $7,%ebx
|
||||
xorl %eax,%edi
|
||||
roll $16,%edi
|
||||
movl %ebx,28(%esp)
|
||||
addl %edi,%esi
|
||||
xorl %esi,%ebp
|
||||
movl 48(%esp),%edx
|
||||
roll $12,%ebp
|
||||
movl 128(%esp),%ebx
|
||||
addl %ebp,%eax
|
||||
xorl %eax,%edi
|
||||
movl %eax,12(%esp)
|
||||
roll $8,%edi
|
||||
movl (%esp),%eax
|
||||
addl %edi,%esi
|
||||
movl %edi,56(%esp)
|
||||
xorl %esi,%ebp
|
||||
roll $7,%ebp
|
||||
decl %ebx
|
||||
jnz L002loop
|
||||
movl 160(%esp),%ebx
|
||||
addl $1634760805,%eax
|
||||
addl 80(%esp),%ebp
|
||||
addl 96(%esp),%ecx
|
||||
addl 100(%esp),%esi
|
||||
cmpl $64,%ebx
|
||||
jb L003tail
|
||||
movl 156(%esp),%ebx
|
||||
addl 112(%esp),%edx
|
||||
addl 120(%esp),%edi
|
||||
xorl (%ebx),%eax
|
||||
xorl 16(%ebx),%ebp
|
||||
movl %eax,(%esp)
|
||||
movl 152(%esp),%eax
|
||||
xorl 32(%ebx),%ecx
|
||||
xorl 36(%ebx),%esi
|
||||
xorl 48(%ebx),%edx
|
||||
xorl 56(%ebx),%edi
|
||||
movl %ebp,16(%eax)
|
||||
movl %ecx,32(%eax)
|
||||
movl %esi,36(%eax)
|
||||
movl %edx,48(%eax)
|
||||
movl %edi,56(%eax)
|
||||
movl 4(%esp),%ebp
|
||||
movl 8(%esp),%ecx
|
||||
movl 12(%esp),%esi
|
||||
movl 20(%esp),%edx
|
||||
movl 24(%esp),%edi
|
||||
addl $857760878,%ebp
|
||||
addl $2036477234,%ecx
|
||||
addl $1797285236,%esi
|
||||
addl 84(%esp),%edx
|
||||
addl 88(%esp),%edi
|
||||
xorl 4(%ebx),%ebp
|
||||
xorl 8(%ebx),%ecx
|
||||
xorl 12(%ebx),%esi
|
||||
xorl 20(%ebx),%edx
|
||||
xorl 24(%ebx),%edi
|
||||
movl %ebp,4(%eax)
|
||||
movl %ecx,8(%eax)
|
||||
movl %esi,12(%eax)
|
||||
movl %edx,20(%eax)
|
||||
movl %edi,24(%eax)
|
||||
movl 28(%esp),%ebp
|
||||
movl 40(%esp),%ecx
|
||||
movl 44(%esp),%esi
|
||||
movl 52(%esp),%edx
|
||||
movl 60(%esp),%edi
|
||||
addl 92(%esp),%ebp
|
||||
addl 104(%esp),%ecx
|
||||
addl 108(%esp),%esi
|
||||
addl 116(%esp),%edx
|
||||
addl 124(%esp),%edi
|
||||
xorl 28(%ebx),%ebp
|
||||
xorl 40(%ebx),%ecx
|
||||
xorl 44(%ebx),%esi
|
||||
xorl 52(%ebx),%edx
|
||||
xorl 60(%ebx),%edi
|
||||
leal 64(%ebx),%ebx
|
||||
movl %ebp,28(%eax)
|
||||
movl (%esp),%ebp
|
||||
movl %ecx,40(%eax)
|
||||
movl 160(%esp),%ecx
|
||||
movl %esi,44(%eax)
|
||||
movl %edx,52(%eax)
|
||||
movl %edi,60(%eax)
|
||||
movl %ebp,(%eax)
|
||||
leal 64(%eax),%eax
|
||||
subl $64,%ecx
|
||||
jnz L001outer_loop
|
||||
jmp L004done
|
||||
L003tail:
|
||||
addl 112(%esp),%edx
|
||||
addl 120(%esp),%edi
|
||||
movl %eax,(%esp)
|
||||
movl %ebp,16(%esp)
|
||||
movl %ecx,32(%esp)
|
||||
movl %esi,36(%esp)
|
||||
movl %edx,48(%esp)
|
||||
movl %edi,56(%esp)
|
||||
movl 4(%esp),%ebp
|
||||
movl 8(%esp),%ecx
|
||||
movl 12(%esp),%esi
|
||||
movl 20(%esp),%edx
|
||||
movl 24(%esp),%edi
|
||||
addl $857760878,%ebp
|
||||
addl $2036477234,%ecx
|
||||
addl $1797285236,%esi
|
||||
addl 84(%esp),%edx
|
||||
addl 88(%esp),%edi
|
||||
movl %ebp,4(%esp)
|
||||
movl %ecx,8(%esp)
|
||||
movl %esi,12(%esp)
|
||||
movl %edx,20(%esp)
|
||||
movl %edi,24(%esp)
|
||||
movl 28(%esp),%ebp
|
||||
movl 40(%esp),%ecx
|
||||
movl 44(%esp),%esi
|
||||
movl 52(%esp),%edx
|
||||
movl 60(%esp),%edi
|
||||
addl 92(%esp),%ebp
|
||||
addl 104(%esp),%ecx
|
||||
addl 108(%esp),%esi
|
||||
addl 116(%esp),%edx
|
||||
addl 124(%esp),%edi
|
||||
movl %ebp,28(%esp)
|
||||
movl 156(%esp),%ebp
|
||||
movl %ecx,40(%esp)
|
||||
movl 152(%esp),%ecx
|
||||
movl %esi,44(%esp)
|
||||
xorl %esi,%esi
|
||||
movl %edx,52(%esp)
|
||||
movl %edi,60(%esp)
|
||||
xorl %eax,%eax
|
||||
xorl %edx,%edx
|
||||
L005tail_loop:
|
||||
movb (%esi,%ebp,1),%al
|
||||
movb (%esp,%esi,1),%dl
|
||||
leal 1(%esi),%esi
|
||||
xorb %dl,%al
|
||||
movb %al,-1(%ecx,%esi,1)
|
||||
decl %ebx
|
||||
jnz L005tail_loop
|
||||
L004done:
|
||||
addl $132,%esp
|
||||
popl %edi
|
||||
popl %esi
|
||||
popl %ebx
|
||||
popl %ebp
|
||||
ret
|
||||
.globl _ChaCha20_ctr32_ssse3
|
||||
.private_extern _ChaCha20_ctr32_ssse3
|
||||
.align 4
|
||||
_ChaCha20_ctr32_ssse3:
|
||||
L_ChaCha20_ctr32_ssse3_begin:
|
||||
pushl %ebp
|
||||
pushl %ebx
|
||||
pushl %esi
|
||||
pushl %edi
|
||||
call Lpic_point
|
||||
Lpic_point:
|
||||
popl %eax
|
||||
movl 20(%esp),%edi
|
||||
movl 24(%esp),%esi
|
||||
movl 28(%esp),%ecx
|
||||
movl 32(%esp),%edx
|
||||
movl 36(%esp),%ebx
|
||||
movl %esp,%ebp
|
||||
subl $524,%esp
|
||||
andl $-64,%esp
|
||||
movl %ebp,512(%esp)
|
||||
leal Lssse3_data-Lpic_point(%eax),%eax
|
||||
movdqu (%ebx),%xmm3
|
||||
cmpl $256,%ecx
|
||||
jb L0061x
|
||||
movl %edx,516(%esp)
|
||||
movl %ebx,520(%esp)
|
||||
subl $256,%ecx
|
||||
leal 384(%esp),%ebp
|
||||
movdqu (%edx),%xmm7
|
||||
pshufd $0,%xmm3,%xmm0
|
||||
pshufd $85,%xmm3,%xmm1
|
||||
pshufd $170,%xmm3,%xmm2
|
||||
pshufd $255,%xmm3,%xmm3
|
||||
paddd 48(%eax),%xmm0
|
||||
pshufd $0,%xmm7,%xmm4
|
||||
pshufd $85,%xmm7,%xmm5
|
||||
psubd 64(%eax),%xmm0
|
||||
pshufd $170,%xmm7,%xmm6
|
||||
pshufd $255,%xmm7,%xmm7
|
||||
movdqa %xmm0,64(%ebp)
|
||||
movdqa %xmm1,80(%ebp)
|
||||
movdqa %xmm2,96(%ebp)
|
||||
movdqa %xmm3,112(%ebp)
|
||||
movdqu 16(%edx),%xmm3
|
||||
movdqa %xmm4,-64(%ebp)
|
||||
movdqa %xmm5,-48(%ebp)
|
||||
movdqa %xmm6,-32(%ebp)
|
||||
movdqa %xmm7,-16(%ebp)
|
||||
movdqa 32(%eax),%xmm7
|
||||
leal 128(%esp),%ebx
|
||||
pshufd $0,%xmm3,%xmm0
|
||||
pshufd $85,%xmm3,%xmm1
|
||||
pshufd $170,%xmm3,%xmm2
|
||||
pshufd $255,%xmm3,%xmm3
|
||||
pshufd $0,%xmm7,%xmm4
|
||||
pshufd $85,%xmm7,%xmm5
|
||||
pshufd $170,%xmm7,%xmm6
|
||||
pshufd $255,%xmm7,%xmm7
|
||||
movdqa %xmm0,(%ebp)
|
||||
movdqa %xmm1,16(%ebp)
|
||||
movdqa %xmm2,32(%ebp)
|
||||
movdqa %xmm3,48(%ebp)
|
||||
movdqa %xmm4,-128(%ebp)
|
||||
movdqa %xmm5,-112(%ebp)
|
||||
movdqa %xmm6,-96(%ebp)
|
||||
movdqa %xmm7,-80(%ebp)
|
||||
leal 128(%esi),%esi
|
||||
leal 128(%edi),%edi
|
||||
jmp L007outer_loop
|
||||
.align 4,0x90
|
||||
L007outer_loop:
|
||||
movdqa -112(%ebp),%xmm1
|
||||
movdqa -96(%ebp),%xmm2
|
||||
movdqa -80(%ebp),%xmm3
|
||||
movdqa -48(%ebp),%xmm5
|
||||
movdqa -32(%ebp),%xmm6
|
||||
movdqa -16(%ebp),%xmm7
|
||||
movdqa %xmm1,-112(%ebx)
|
||||
movdqa %xmm2,-96(%ebx)
|
||||
movdqa %xmm3,-80(%ebx)
|
||||
movdqa %xmm5,-48(%ebx)
|
||||
movdqa %xmm6,-32(%ebx)
|
||||
movdqa %xmm7,-16(%ebx)
|
||||
movdqa 32(%ebp),%xmm2
|
||||
movdqa 48(%ebp),%xmm3
|
||||
movdqa 64(%ebp),%xmm4
|
||||
movdqa 80(%ebp),%xmm5
|
||||
movdqa 96(%ebp),%xmm6
|
||||
movdqa 112(%ebp),%xmm7
|
||||
paddd 64(%eax),%xmm4
|
||||
movdqa %xmm2,32(%ebx)
|
||||
movdqa %xmm3,48(%ebx)
|
||||
movdqa %xmm4,64(%ebx)
|
||||
movdqa %xmm5,80(%ebx)
|
||||
movdqa %xmm6,96(%ebx)
|
||||
movdqa %xmm7,112(%ebx)
|
||||
movdqa %xmm4,64(%ebp)
|
||||
movdqa -128(%ebp),%xmm0
|
||||
movdqa %xmm4,%xmm6
|
||||
movdqa -64(%ebp),%xmm3
|
||||
movdqa (%ebp),%xmm4
|
||||
movdqa 16(%ebp),%xmm5
|
||||
movl $10,%edx
|
||||
nop
|
||||
.align 4,0x90
|
||||
L008loop:
|
||||
paddd %xmm3,%xmm0
|
||||
movdqa %xmm3,%xmm2
|
||||
pxor %xmm0,%xmm6
|
||||
pshufb (%eax),%xmm6
|
||||
paddd %xmm6,%xmm4
|
||||
pxor %xmm4,%xmm2
|
||||
movdqa -48(%ebx),%xmm3
|
||||
movdqa %xmm2,%xmm1
|
||||
pslld $12,%xmm2
|
||||
psrld $20,%xmm1
|
||||
por %xmm1,%xmm2
|
||||
movdqa -112(%ebx),%xmm1
|
||||
paddd %xmm2,%xmm0
|
||||
movdqa 80(%ebx),%xmm7
|
||||
pxor %xmm0,%xmm6
|
||||
movdqa %xmm0,-128(%ebx)
|
||||
pshufb 16(%eax),%xmm6
|
||||
paddd %xmm6,%xmm4
|
||||
movdqa %xmm6,64(%ebx)
|
||||
pxor %xmm4,%xmm2
|
||||
paddd %xmm3,%xmm1
|
||||
movdqa %xmm2,%xmm0
|
||||
pslld $7,%xmm2
|
||||
psrld $25,%xmm0
|
||||
pxor %xmm1,%xmm7
|
||||
por %xmm0,%xmm2
|
||||
movdqa %xmm4,(%ebx)
|
||||
pshufb (%eax),%xmm7
|
||||
movdqa %xmm2,-64(%ebx)
|
||||
paddd %xmm7,%xmm5
|
||||
movdqa 32(%ebx),%xmm4
|
||||
pxor %xmm5,%xmm3
|
||||
movdqa -32(%ebx),%xmm2
|
||||
movdqa %xmm3,%xmm0
|
||||
pslld $12,%xmm3
|
||||
psrld $20,%xmm0
|
||||
por %xmm0,%xmm3
|
||||
movdqa -96(%ebx),%xmm0
|
||||
paddd %xmm3,%xmm1
|
||||
movdqa 96(%ebx),%xmm6
|
||||
pxor %xmm1,%xmm7
|
||||
movdqa %xmm1,-112(%ebx)
|
||||
pshufb 16(%eax),%xmm7
|
||||
paddd %xmm7,%xmm5
|
||||
movdqa %xmm7,80(%ebx)
|
||||
pxor %xmm5,%xmm3
|
||||
paddd %xmm2,%xmm0
|
||||
movdqa %xmm3,%xmm1
|
||||
pslld $7,%xmm3
|
||||
psrld $25,%xmm1
|
||||
pxor %xmm0,%xmm6
|
||||
por %xmm1,%xmm3
|
||||
movdqa %xmm5,16(%ebx)
|
||||
pshufb (%eax),%xmm6
|
||||
movdqa %xmm3,-48(%ebx)
|
||||
paddd %xmm6,%xmm4
|
||||
movdqa 48(%ebx),%xmm5
|
||||
pxor %xmm4,%xmm2
|
||||
movdqa -16(%ebx),%xmm3
|
||||
movdqa %xmm2,%xmm1
|
||||
pslld $12,%xmm2
|
||||
psrld $20,%xmm1
|
||||
por %xmm1,%xmm2
|
||||
movdqa -80(%ebx),%xmm1
|
||||
paddd %xmm2,%xmm0
|
||||
movdqa 112(%ebx),%xmm7
|
||||
pxor %xmm0,%xmm6
|
||||
movdqa %xmm0,-96(%ebx)
|
||||
pshufb 16(%eax),%xmm6
|
||||
paddd %xmm6,%xmm4
|
||||
movdqa %xmm6,96(%ebx)
|
||||
pxor %xmm4,%xmm2
|
||||
paddd %xmm3,%xmm1
|
||||
movdqa %xmm2,%xmm0
|
||||
pslld $7,%xmm2
|
||||
psrld $25,%xmm0
|
||||
pxor %xmm1,%xmm7
|
||||
por %xmm0,%xmm2
|
||||
pshufb (%eax),%xmm7
|
||||
movdqa %xmm2,-32(%ebx)
|
||||
paddd %xmm7,%xmm5
|
||||
pxor %xmm5,%xmm3
|
||||
movdqa -48(%ebx),%xmm2
|
||||
movdqa %xmm3,%xmm0
|
||||
pslld $12,%xmm3
|
||||
psrld $20,%xmm0
|
||||
por %xmm0,%xmm3
|
||||
movdqa -128(%ebx),%xmm0
|
||||
paddd %xmm3,%xmm1
|
||||
pxor %xmm1,%xmm7
|
||||
movdqa %xmm1,-80(%ebx)
|
||||
pshufb 16(%eax),%xmm7
|
||||
paddd %xmm7,%xmm5
|
||||
movdqa %xmm7,%xmm6
|
||||
pxor %xmm5,%xmm3
|
||||
paddd %xmm2,%xmm0
|
||||
movdqa %xmm3,%xmm1
|
||||
pslld $7,%xmm3
|
||||
psrld $25,%xmm1
|
||||
pxor %xmm0,%xmm6
|
||||
por %xmm1,%xmm3
|
||||
pshufb (%eax),%xmm6
|
||||
movdqa %xmm3,-16(%ebx)
|
||||
paddd %xmm6,%xmm4
|
||||
pxor %xmm4,%xmm2
|
||||
movdqa -32(%ebx),%xmm3
|
||||
movdqa %xmm2,%xmm1
|
||||
pslld $12,%xmm2
|
||||
psrld $20,%xmm1
|
||||
por %xmm1,%xmm2
|
||||
movdqa -112(%ebx),%xmm1
|
||||
paddd %xmm2,%xmm0
|
||||
movdqa 64(%ebx),%xmm7
|
||||
pxor %xmm0,%xmm6
|
||||
movdqa %xmm0,-128(%ebx)
|
||||
pshufb 16(%eax),%xmm6
|
||||
paddd %xmm6,%xmm4
|
||||
movdqa %xmm6,112(%ebx)
|
||||
pxor %xmm4,%xmm2
|
||||
paddd %xmm3,%xmm1
|
||||
movdqa %xmm2,%xmm0
|
||||
pslld $7,%xmm2
|
||||
psrld $25,%xmm0
|
||||
pxor %xmm1,%xmm7
|
||||
por %xmm0,%xmm2
|
||||
movdqa %xmm4,32(%ebx)
|
||||
pshufb (%eax),%xmm7
|
||||
movdqa %xmm2,-48(%ebx)
|
||||
paddd %xmm7,%xmm5
|
||||
movdqa (%ebx),%xmm4
|
||||
pxor %xmm5,%xmm3
|
||||
movdqa -16(%ebx),%xmm2
|
||||
movdqa %xmm3,%xmm0
|
||||
pslld $12,%xmm3
|
||||
psrld $20,%xmm0
|
||||
por %xmm0,%xmm3
|
||||
movdqa -96(%ebx),%xmm0
|
||||
paddd %xmm3,%xmm1
|
||||
movdqa 80(%ebx),%xmm6
|
||||
pxor %xmm1,%xmm7
|
||||
movdqa %xmm1,-112(%ebx)
|
||||
pshufb 16(%eax),%xmm7
|
||||
paddd %xmm7,%xmm5
|
||||
movdqa %xmm7,64(%ebx)
|
||||
pxor %xmm5,%xmm3
|
||||
paddd %xmm2,%xmm0
|
||||
movdqa %xmm3,%xmm1
|
||||
pslld $7,%xmm3
|
||||
psrld $25,%xmm1
|
||||
pxor %xmm0,%xmm6
|
||||
por %xmm1,%xmm3
|
||||
movdqa %xmm5,48(%ebx)
|
||||
pshufb (%eax),%xmm6
|
||||
movdqa %xmm3,-32(%ebx)
|
||||
paddd %xmm6,%xmm4
|
||||
movdqa 16(%ebx),%xmm5
|
||||
pxor %xmm4,%xmm2
|
||||
movdqa -64(%ebx),%xmm3
|
||||
movdqa %xmm2,%xmm1
|
||||
pslld $12,%xmm2
|
||||
psrld $20,%xmm1
|
||||
por %xmm1,%xmm2
|
||||
movdqa -80(%ebx),%xmm1
|
||||
paddd %xmm2,%xmm0
|
||||
movdqa 96(%ebx),%xmm7
|
||||
pxor %xmm0,%xmm6
|
||||
movdqa %xmm0,-96(%ebx)
|
||||
pshufb 16(%eax),%xmm6
|
||||
paddd %xmm6,%xmm4
|
||||
movdqa %xmm6,80(%ebx)
|
||||
pxor %xmm4,%xmm2
|
||||
paddd %xmm3,%xmm1
|
||||
movdqa %xmm2,%xmm0
|
||||
pslld $7,%xmm2
|
||||
psrld $25,%xmm0
|
||||
pxor %xmm1,%xmm7
|
||||
por %xmm0,%xmm2
|
||||
pshufb (%eax),%xmm7
|
||||
movdqa %xmm2,-16(%ebx)
|
||||
paddd %xmm7,%xmm5
|
||||
pxor %xmm5,%xmm3
|
||||
movdqa %xmm3,%xmm0
|
||||
pslld $12,%xmm3
|
||||
psrld $20,%xmm0
|
||||
por %xmm0,%xmm3
|
||||
movdqa -128(%ebx),%xmm0
|
||||
paddd %xmm3,%xmm1
|
||||
movdqa 64(%ebx),%xmm6
|
||||
pxor %xmm1,%xmm7
|
||||
movdqa %xmm1,-80(%ebx)
|
||||
pshufb 16(%eax),%xmm7
|
||||
paddd %xmm7,%xmm5
|
||||
movdqa %xmm7,96(%ebx)
|
||||
pxor %xmm5,%xmm3
|
||||
movdqa %xmm3,%xmm1
|
||||
pslld $7,%xmm3
|
||||
psrld $25,%xmm1
|
||||
por %xmm1,%xmm3
|
||||
decl %edx
|
||||
jnz L008loop
|
||||
movdqa %xmm3,-64(%ebx)
|
||||
movdqa %xmm4,(%ebx)
|
||||
movdqa %xmm5,16(%ebx)
|
||||
movdqa %xmm6,64(%ebx)
|
||||
movdqa %xmm7,96(%ebx)
|
||||
movdqa -112(%ebx),%xmm1
|
||||
movdqa -96(%ebx),%xmm2
|
||||
movdqa -80(%ebx),%xmm3
|
||||
paddd -128(%ebp),%xmm0
|
||||
paddd -112(%ebp),%xmm1
|
||||
paddd -96(%ebp),%xmm2
|
||||
paddd -80(%ebp),%xmm3
|
||||
movdqa %xmm0,%xmm6
|
||||
punpckldq %xmm1,%xmm0
|
||||
movdqa %xmm2,%xmm7
|
||||
punpckldq %xmm3,%xmm2
|
||||
punpckhdq %xmm1,%xmm6
|
||||
punpckhdq %xmm3,%xmm7
|
||||
movdqa %xmm0,%xmm1
|
||||
punpcklqdq %xmm2,%xmm0
|
||||
movdqa %xmm6,%xmm3
|
||||
punpcklqdq %xmm7,%xmm6
|
||||
punpckhqdq %xmm2,%xmm1
|
||||
punpckhqdq %xmm7,%xmm3
|
||||
movdqu -128(%esi),%xmm4
|
||||
movdqu -64(%esi),%xmm5
|
||||
movdqu (%esi),%xmm2
|
||||
movdqu 64(%esi),%xmm7
|
||||
leal 16(%esi),%esi
|
||||
pxor %xmm0,%xmm4
|
||||
movdqa -64(%ebx),%xmm0
|
||||
pxor %xmm1,%xmm5
|
||||
movdqa -48(%ebx),%xmm1
|
||||
pxor %xmm2,%xmm6
|
||||
movdqa -32(%ebx),%xmm2
|
||||
pxor %xmm3,%xmm7
|
||||
movdqa -16(%ebx),%xmm3
|
||||
movdqu %xmm4,-128(%edi)
|
||||
movdqu %xmm5,-64(%edi)
|
||||
movdqu %xmm6,(%edi)
|
||||
movdqu %xmm7,64(%edi)
|
||||
leal 16(%edi),%edi
|
||||
paddd -64(%ebp),%xmm0
|
||||
paddd -48(%ebp),%xmm1
|
||||
paddd -32(%ebp),%xmm2
|
||||
paddd -16(%ebp),%xmm3
|
||||
movdqa %xmm0,%xmm6
|
||||
punpckldq %xmm1,%xmm0
|
||||
movdqa %xmm2,%xmm7
|
||||
punpckldq %xmm3,%xmm2
|
||||
punpckhdq %xmm1,%xmm6
|
||||
punpckhdq %xmm3,%xmm7
|
||||
movdqa %xmm0,%xmm1
|
||||
punpcklqdq %xmm2,%xmm0
|
||||
movdqa %xmm6,%xmm3
|
||||
punpcklqdq %xmm7,%xmm6
|
||||
punpckhqdq %xmm2,%xmm1
|
||||
punpckhqdq %xmm7,%xmm3
|
||||
movdqu -128(%esi),%xmm4
|
||||
movdqu -64(%esi),%xmm5
|
||||
movdqu (%esi),%xmm2
|
||||
movdqu 64(%esi),%xmm7
|
||||
leal 16(%esi),%esi
|
||||
pxor %xmm0,%xmm4
|
||||
movdqa (%ebx),%xmm0
|
||||
pxor %xmm1,%xmm5
|
||||
movdqa 16(%ebx),%xmm1
|
||||
pxor %xmm2,%xmm6
|
||||
movdqa 32(%ebx),%xmm2
|
||||
pxor %xmm3,%xmm7
|
||||
movdqa 48(%ebx),%xmm3
|
||||
movdqu %xmm4,-128(%edi)
|
||||
movdqu %xmm5,-64(%edi)
|
||||
movdqu %xmm6,(%edi)
|
||||
movdqu %xmm7,64(%edi)
|
||||
leal 16(%edi),%edi
|
||||
paddd (%ebp),%xmm0
|
||||
paddd 16(%ebp),%xmm1
|
||||
paddd 32(%ebp),%xmm2
|
||||
paddd 48(%ebp),%xmm3
|
||||
movdqa %xmm0,%xmm6
|
||||
punpckldq %xmm1,%xmm0
|
||||
movdqa %xmm2,%xmm7
|
||||
punpckldq %xmm3,%xmm2
|
||||
punpckhdq %xmm1,%xmm6
|
||||
punpckhdq %xmm3,%xmm7
|
||||
movdqa %xmm0,%xmm1
|
||||
punpcklqdq %xmm2,%xmm0
|
||||
movdqa %xmm6,%xmm3
|
||||
punpcklqdq %xmm7,%xmm6
|
||||
punpckhqdq %xmm2,%xmm1
|
||||
punpckhqdq %xmm7,%xmm3
|
||||
movdqu -128(%esi),%xmm4
|
||||
movdqu -64(%esi),%xmm5
|
||||
movdqu (%esi),%xmm2
|
||||
movdqu 64(%esi),%xmm7
|
||||
leal 16(%esi),%esi
|
||||
pxor %xmm0,%xmm4
|
||||
movdqa 64(%ebx),%xmm0
|
||||
pxor %xmm1,%xmm5
|
||||
movdqa 80(%ebx),%xmm1
|
||||
pxor %xmm2,%xmm6
|
||||
movdqa 96(%ebx),%xmm2
|
||||
pxor %xmm3,%xmm7
|
||||
movdqa 112(%ebx),%xmm3
|
||||
movdqu %xmm4,-128(%edi)
|
||||
movdqu %xmm5,-64(%edi)
|
||||
movdqu %xmm6,(%edi)
|
||||
movdqu %xmm7,64(%edi)
|
||||
leal 16(%edi),%edi
|
||||
paddd 64(%ebp),%xmm0
|
||||
paddd 80(%ebp),%xmm1
|
||||
paddd 96(%ebp),%xmm2
|
||||
paddd 112(%ebp),%xmm3
|
||||
movdqa %xmm0,%xmm6
|
||||
punpckldq %xmm1,%xmm0
|
||||
movdqa %xmm2,%xmm7
|
||||
punpckldq %xmm3,%xmm2
|
||||
punpckhdq %xmm1,%xmm6
|
||||
punpckhdq %xmm3,%xmm7
|
||||
movdqa %xmm0,%xmm1
|
||||
punpcklqdq %xmm2,%xmm0
|
||||
movdqa %xmm6,%xmm3
|
||||
punpcklqdq %xmm7,%xmm6
|
||||
punpckhqdq %xmm2,%xmm1
|
||||
punpckhqdq %xmm7,%xmm3
|
||||
movdqu -128(%esi),%xmm4
|
||||
movdqu -64(%esi),%xmm5
|
||||
movdqu (%esi),%xmm2
|
||||
movdqu 64(%esi),%xmm7
|
||||
leal 208(%esi),%esi
|
||||
pxor %xmm0,%xmm4
|
||||
pxor %xmm1,%xmm5
|
||||
pxor %xmm2,%xmm6
|
||||
pxor %xmm3,%xmm7
|
||||
movdqu %xmm4,-128(%edi)
|
||||
movdqu %xmm5,-64(%edi)
|
||||
movdqu %xmm6,(%edi)
|
||||
movdqu %xmm7,64(%edi)
|
||||
leal 208(%edi),%edi
|
||||
subl $256,%ecx
|
||||
jnc L007outer_loop
|
||||
addl $256,%ecx
|
||||
jz L009done
|
||||
movl 520(%esp),%ebx
|
||||
leal -128(%esi),%esi
|
||||
movl 516(%esp),%edx
|
||||
leal -128(%edi),%edi
|
||||
movd 64(%ebp),%xmm2
|
||||
movdqu (%ebx),%xmm3
|
||||
paddd 96(%eax),%xmm2
|
||||
pand 112(%eax),%xmm3
|
||||
por %xmm2,%xmm3
|
||||
L0061x:
|
||||
movdqa 32(%eax),%xmm0
|
||||
movdqu (%edx),%xmm1
|
||||
movdqu 16(%edx),%xmm2
|
||||
movdqa (%eax),%xmm6
|
||||
movdqa 16(%eax),%xmm7
|
||||
movl %ebp,48(%esp)
|
||||
movdqa %xmm0,(%esp)
|
||||
movdqa %xmm1,16(%esp)
|
||||
movdqa %xmm2,32(%esp)
|
||||
movdqa %xmm3,48(%esp)
|
||||
movl $10,%edx
|
||||
jmp L010loop1x
|
||||
.align 4,0x90
|
||||
L011outer1x:
|
||||
movdqa 80(%eax),%xmm3
|
||||
movdqa (%esp),%xmm0
|
||||
movdqa 16(%esp),%xmm1
|
||||
movdqa 32(%esp),%xmm2
|
||||
paddd 48(%esp),%xmm3
|
||||
movl $10,%edx
|
||||
movdqa %xmm3,48(%esp)
|
||||
jmp L010loop1x
|
||||
.align 4,0x90
|
||||
L010loop1x:
|
||||
paddd %xmm1,%xmm0
|
||||
pxor %xmm0,%xmm3
|
||||
.byte 102,15,56,0,222
|
||||
paddd %xmm3,%xmm2
|
||||
pxor %xmm2,%xmm1
|
||||
movdqa %xmm1,%xmm4
|
||||
psrld $20,%xmm1
|
||||
pslld $12,%xmm4
|
||||
por %xmm4,%xmm1
|
||||
paddd %xmm1,%xmm0
|
||||
pxor %xmm0,%xmm3
|
||||
.byte 102,15,56,0,223
|
||||
paddd %xmm3,%xmm2
|
||||
pxor %xmm2,%xmm1
|
||||
movdqa %xmm1,%xmm4
|
||||
psrld $25,%xmm1
|
||||
pslld $7,%xmm4
|
||||
por %xmm4,%xmm1
|
||||
pshufd $78,%xmm2,%xmm2
|
||||
pshufd $57,%xmm1,%xmm1
|
||||
pshufd $147,%xmm3,%xmm3
|
||||
nop
|
||||
paddd %xmm1,%xmm0
|
||||
pxor %xmm0,%xmm3
|
||||
.byte 102,15,56,0,222
|
||||
paddd %xmm3,%xmm2
|
||||
pxor %xmm2,%xmm1
|
||||
movdqa %xmm1,%xmm4
|
||||
psrld $20,%xmm1
|
||||
pslld $12,%xmm4
|
||||
por %xmm4,%xmm1
|
||||
paddd %xmm1,%xmm0
|
||||
pxor %xmm0,%xmm3
|
||||
.byte 102,15,56,0,223
|
||||
paddd %xmm3,%xmm2
|
||||
pxor %xmm2,%xmm1
|
||||
movdqa %xmm1,%xmm4
|
||||
psrld $25,%xmm1
|
||||
pslld $7,%xmm4
|
||||
por %xmm4,%xmm1
|
||||
pshufd $78,%xmm2,%xmm2
|
||||
pshufd $147,%xmm1,%xmm1
|
||||
pshufd $57,%xmm3,%xmm3
|
||||
decl %edx
|
||||
jnz L010loop1x
|
||||
paddd (%esp),%xmm0
|
||||
paddd 16(%esp),%xmm1
|
||||
paddd 32(%esp),%xmm2
|
||||
paddd 48(%esp),%xmm3
|
||||
cmpl $64,%ecx
|
||||
jb L012tail
|
||||
movdqu (%esi),%xmm4
|
||||
movdqu 16(%esi),%xmm5
|
||||
pxor %xmm4,%xmm0
|
||||
movdqu 32(%esi),%xmm4
|
||||
pxor %xmm5,%xmm1
|
||||
movdqu 48(%esi),%xmm5
|
||||
pxor %xmm4,%xmm2
|
||||
pxor %xmm5,%xmm3
|
||||
leal 64(%esi),%esi
|
||||
movdqu %xmm0,(%edi)
|
||||
movdqu %xmm1,16(%edi)
|
||||
movdqu %xmm2,32(%edi)
|
||||
movdqu %xmm3,48(%edi)
|
||||
leal 64(%edi),%edi
|
||||
subl $64,%ecx
|
||||
jnz L011outer1x
|
||||
jmp L009done
|
||||
L012tail:
|
||||
movdqa %xmm0,(%esp)
|
||||
movdqa %xmm1,16(%esp)
|
||||
movdqa %xmm2,32(%esp)
|
||||
movdqa %xmm3,48(%esp)
|
||||
xorl %eax,%eax
|
||||
xorl %edx,%edx
|
||||
xorl %ebp,%ebp
|
||||
L013tail_loop:
|
||||
movb (%esp,%ebp,1),%al
|
||||
movb (%esi,%ebp,1),%dl
|
||||
leal 1(%ebp),%ebp
|
||||
xorb %dl,%al
|
||||
movb %al,-1(%edi,%ebp,1)
|
||||
decl %ecx
|
||||
jnz L013tail_loop
|
||||
L009done:
|
||||
movl 512(%esp),%esp
|
||||
popl %edi
|
||||
popl %esi
|
||||
popl %ebx
|
||||
popl %ebp
|
||||
ret
|
||||
.align 6,0x90
|
||||
Lssse3_data:
|
||||
.byte 2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13
|
||||
.byte 3,0,1,2,7,4,5,6,11,8,9,10,15,12,13,14
|
||||
.long 1634760805,857760878,2036477234,1797285236
|
||||
.long 0,1,2,3
|
||||
.long 4,4,4,4
|
||||
.long 1,0,0,0
|
||||
.long 4,0,0,0
|
||||
.long 0,-1,-1,-1
|
||||
.align 6,0x90
|
||||
.byte 67,104,97,67,104,97,50,48,32,102,111,114,32,120,56,54
|
||||
.byte 44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32
|
||||
.byte 60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111
|
||||
.byte 114,103,62,0
|
||||
#endif // !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) && defined(__APPLE__)
|
||||
2475
third-party/boringssl/apple-x86/crypto/fipsmodule/aesni-x86-apple.S
vendored
Normal file
2475
third-party/boringssl/apple-x86/crypto/fipsmodule/aesni-x86-apple.S
vendored
Normal file
File diff suppressed because it is too large
Load Diff
987
third-party/boringssl/apple-x86/crypto/fipsmodule/bn-586-apple.S
vendored
Normal file
987
third-party/boringssl/apple-x86/crypto/fipsmodule/bn-586-apple.S
vendored
Normal file
@ -0,0 +1,987 @@
|
||||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#include <openssl/asm_base.h>
|
||||
|
||||
#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) && defined(__APPLE__)
|
||||
.text
|
||||
.globl _bn_mul_add_words
|
||||
.private_extern _bn_mul_add_words
|
||||
.align 4
|
||||
_bn_mul_add_words:
|
||||
L_bn_mul_add_words_begin:
|
||||
call L000PIC_me_up
|
||||
L000PIC_me_up:
|
||||
popl %eax
|
||||
movl L_OPENSSL_ia32cap_P$non_lazy_ptr-L000PIC_me_up(%eax),%eax
|
||||
btl $26,(%eax)
|
||||
jnc L001maw_non_sse2
|
||||
movl 4(%esp),%eax
|
||||
movl 8(%esp),%edx
|
||||
movl 12(%esp),%ecx
|
||||
movd 16(%esp),%mm0
|
||||
pxor %mm1,%mm1
|
||||
jmp L002maw_sse2_entry
|
||||
.align 4,0x90
|
||||
L003maw_sse2_unrolled:
|
||||
movd (%eax),%mm3
|
||||
paddq %mm3,%mm1
|
||||
movd (%edx),%mm2
|
||||
pmuludq %mm0,%mm2
|
||||
movd 4(%edx),%mm4
|
||||
pmuludq %mm0,%mm4
|
||||
movd 8(%edx),%mm6
|
||||
pmuludq %mm0,%mm6
|
||||
movd 12(%edx),%mm7
|
||||
pmuludq %mm0,%mm7
|
||||
paddq %mm2,%mm1
|
||||
movd 4(%eax),%mm3
|
||||
paddq %mm4,%mm3
|
||||
movd 8(%eax),%mm5
|
||||
paddq %mm6,%mm5
|
||||
movd 12(%eax),%mm4
|
||||
paddq %mm4,%mm7
|
||||
movd %mm1,(%eax)
|
||||
movd 16(%edx),%mm2
|
||||
pmuludq %mm0,%mm2
|
||||
psrlq $32,%mm1
|
||||
movd 20(%edx),%mm4
|
||||
pmuludq %mm0,%mm4
|
||||
paddq %mm3,%mm1
|
||||
movd 24(%edx),%mm6
|
||||
pmuludq %mm0,%mm6
|
||||
movd %mm1,4(%eax)
|
||||
psrlq $32,%mm1
|
||||
movd 28(%edx),%mm3
|
||||
addl $32,%edx
|
||||
pmuludq %mm0,%mm3
|
||||
paddq %mm5,%mm1
|
||||
movd 16(%eax),%mm5
|
||||
paddq %mm5,%mm2
|
||||
movd %mm1,8(%eax)
|
||||
psrlq $32,%mm1
|
||||
paddq %mm7,%mm1
|
||||
movd 20(%eax),%mm5
|
||||
paddq %mm5,%mm4
|
||||
movd %mm1,12(%eax)
|
||||
psrlq $32,%mm1
|
||||
paddq %mm2,%mm1
|
||||
movd 24(%eax),%mm5
|
||||
paddq %mm5,%mm6
|
||||
movd %mm1,16(%eax)
|
||||
psrlq $32,%mm1
|
||||
paddq %mm4,%mm1
|
||||
movd 28(%eax),%mm5
|
||||
paddq %mm5,%mm3
|
||||
movd %mm1,20(%eax)
|
||||
psrlq $32,%mm1
|
||||
paddq %mm6,%mm1
|
||||
movd %mm1,24(%eax)
|
||||
psrlq $32,%mm1
|
||||
paddq %mm3,%mm1
|
||||
movd %mm1,28(%eax)
|
||||
leal 32(%eax),%eax
|
||||
psrlq $32,%mm1
|
||||
subl $8,%ecx
|
||||
jz L004maw_sse2_exit
|
||||
L002maw_sse2_entry:
|
||||
testl $4294967288,%ecx
|
||||
jnz L003maw_sse2_unrolled
|
||||
.align 2,0x90
|
||||
L005maw_sse2_loop:
|
||||
movd (%edx),%mm2
|
||||
movd (%eax),%mm3
|
||||
pmuludq %mm0,%mm2
|
||||
leal 4(%edx),%edx
|
||||
paddq %mm3,%mm1
|
||||
paddq %mm2,%mm1
|
||||
movd %mm1,(%eax)
|
||||
subl $1,%ecx
|
||||
psrlq $32,%mm1
|
||||
leal 4(%eax),%eax
|
||||
jnz L005maw_sse2_loop
|
||||
L004maw_sse2_exit:
|
||||
movd %mm1,%eax
|
||||
emms
|
||||
ret
|
||||
.align 4,0x90
|
||||
L001maw_non_sse2:
|
||||
pushl %ebp
|
||||
pushl %ebx
|
||||
pushl %esi
|
||||
pushl %edi
|
||||
|
||||
xorl %esi,%esi
|
||||
movl 20(%esp),%edi
|
||||
movl 28(%esp),%ecx
|
||||
movl 24(%esp),%ebx
|
||||
andl $4294967288,%ecx
|
||||
movl 32(%esp),%ebp
|
||||
pushl %ecx
|
||||
jz L006maw_finish
|
||||
.align 4,0x90
|
||||
L007maw_loop:
|
||||
# Round 0
|
||||
movl (%ebx),%eax
|
||||
mull %ebp
|
||||
addl %esi,%eax
|
||||
adcl $0,%edx
|
||||
addl (%edi),%eax
|
||||
adcl $0,%edx
|
||||
movl %eax,(%edi)
|
||||
movl %edx,%esi
|
||||
# Round 4
|
||||
movl 4(%ebx),%eax
|
||||
mull %ebp
|
||||
addl %esi,%eax
|
||||
adcl $0,%edx
|
||||
addl 4(%edi),%eax
|
||||
adcl $0,%edx
|
||||
movl %eax,4(%edi)
|
||||
movl %edx,%esi
|
||||
# Round 8
|
||||
movl 8(%ebx),%eax
|
||||
mull %ebp
|
||||
addl %esi,%eax
|
||||
adcl $0,%edx
|
||||
addl 8(%edi),%eax
|
||||
adcl $0,%edx
|
||||
movl %eax,8(%edi)
|
||||
movl %edx,%esi
|
||||
# Round 12
|
||||
movl 12(%ebx),%eax
|
||||
mull %ebp
|
||||
addl %esi,%eax
|
||||
adcl $0,%edx
|
||||
addl 12(%edi),%eax
|
||||
adcl $0,%edx
|
||||
movl %eax,12(%edi)
|
||||
movl %edx,%esi
|
||||
# Round 16
|
||||
movl 16(%ebx),%eax
|
||||
mull %ebp
|
||||
addl %esi,%eax
|
||||
adcl $0,%edx
|
||||
addl 16(%edi),%eax
|
||||
adcl $0,%edx
|
||||
movl %eax,16(%edi)
|
||||
movl %edx,%esi
|
||||
# Round 20
|
||||
movl 20(%ebx),%eax
|
||||
mull %ebp
|
||||
addl %esi,%eax
|
||||
adcl $0,%edx
|
||||
addl 20(%edi),%eax
|
||||
adcl $0,%edx
|
||||
movl %eax,20(%edi)
|
||||
movl %edx,%esi
|
||||
# Round 24
|
||||
movl 24(%ebx),%eax
|
||||
mull %ebp
|
||||
addl %esi,%eax
|
||||
adcl $0,%edx
|
||||
addl 24(%edi),%eax
|
||||
adcl $0,%edx
|
||||
movl %eax,24(%edi)
|
||||
movl %edx,%esi
|
||||
# Round 28
|
||||
movl 28(%ebx),%eax
|
||||
mull %ebp
|
||||
addl %esi,%eax
|
||||
adcl $0,%edx
|
||||
addl 28(%edi),%eax
|
||||
adcl $0,%edx
|
||||
movl %eax,28(%edi)
|
||||
movl %edx,%esi
|
||||
|
||||
subl $8,%ecx
|
||||
leal 32(%ebx),%ebx
|
||||
leal 32(%edi),%edi
|
||||
jnz L007maw_loop
|
||||
L006maw_finish:
|
||||
movl 32(%esp),%ecx
|
||||
andl $7,%ecx
|
||||
jnz L008maw_finish2
|
||||
jmp L009maw_end
|
||||
L008maw_finish2:
|
||||
# Tail Round 0
|
||||
movl (%ebx),%eax
|
||||
mull %ebp
|
||||
addl %esi,%eax
|
||||
adcl $0,%edx
|
||||
addl (%edi),%eax
|
||||
adcl $0,%edx
|
||||
decl %ecx
|
||||
movl %eax,(%edi)
|
||||
movl %edx,%esi
|
||||
jz L009maw_end
|
||||
# Tail Round 1
|
||||
movl 4(%ebx),%eax
|
||||
mull %ebp
|
||||
addl %esi,%eax
|
||||
adcl $0,%edx
|
||||
addl 4(%edi),%eax
|
||||
adcl $0,%edx
|
||||
decl %ecx
|
||||
movl %eax,4(%edi)
|
||||
movl %edx,%esi
|
||||
jz L009maw_end
|
||||
# Tail Round 2
|
||||
movl 8(%ebx),%eax
|
||||
mull %ebp
|
||||
addl %esi,%eax
|
||||
adcl $0,%edx
|
||||
addl 8(%edi),%eax
|
||||
adcl $0,%edx
|
||||
decl %ecx
|
||||
movl %eax,8(%edi)
|
||||
movl %edx,%esi
|
||||
jz L009maw_end
|
||||
# Tail Round 3
|
||||
movl 12(%ebx),%eax
|
||||
mull %ebp
|
||||
addl %esi,%eax
|
||||
adcl $0,%edx
|
||||
addl 12(%edi),%eax
|
||||
adcl $0,%edx
|
||||
decl %ecx
|
||||
movl %eax,12(%edi)
|
||||
movl %edx,%esi
|
||||
jz L009maw_end
|
||||
# Tail Round 4
|
||||
movl 16(%ebx),%eax
|
||||
mull %ebp
|
||||
addl %esi,%eax
|
||||
adcl $0,%edx
|
||||
addl 16(%edi),%eax
|
||||
adcl $0,%edx
|
||||
decl %ecx
|
||||
movl %eax,16(%edi)
|
||||
movl %edx,%esi
|
||||
jz L009maw_end
|
||||
# Tail Round 5
|
||||
movl 20(%ebx),%eax
|
||||
mull %ebp
|
||||
addl %esi,%eax
|
||||
adcl $0,%edx
|
||||
addl 20(%edi),%eax
|
||||
adcl $0,%edx
|
||||
decl %ecx
|
||||
movl %eax,20(%edi)
|
||||
movl %edx,%esi
|
||||
jz L009maw_end
|
||||
# Tail Round 6
|
||||
movl 24(%ebx),%eax
|
||||
mull %ebp
|
||||
addl %esi,%eax
|
||||
adcl $0,%edx
|
||||
addl 24(%edi),%eax
|
||||
adcl $0,%edx
|
||||
movl %eax,24(%edi)
|
||||
movl %edx,%esi
|
||||
L009maw_end:
|
||||
movl %esi,%eax
|
||||
popl %ecx
|
||||
popl %edi
|
||||
popl %esi
|
||||
popl %ebx
|
||||
popl %ebp
|
||||
ret
|
||||
.globl _bn_mul_words
|
||||
.private_extern _bn_mul_words
|
||||
.align 4
|
||||
_bn_mul_words:
|
||||
L_bn_mul_words_begin:
|
||||
call L010PIC_me_up
|
||||
L010PIC_me_up:
|
||||
popl %eax
|
||||
movl L_OPENSSL_ia32cap_P$non_lazy_ptr-L010PIC_me_up(%eax),%eax
|
||||
btl $26,(%eax)
|
||||
jnc L011mw_non_sse2
|
||||
movl 4(%esp),%eax
|
||||
movl 8(%esp),%edx
|
||||
movl 12(%esp),%ecx
|
||||
movd 16(%esp),%mm0
|
||||
pxor %mm1,%mm1
|
||||
.align 4,0x90
|
||||
L012mw_sse2_loop:
|
||||
movd (%edx),%mm2
|
||||
pmuludq %mm0,%mm2
|
||||
leal 4(%edx),%edx
|
||||
paddq %mm2,%mm1
|
||||
movd %mm1,(%eax)
|
||||
subl $1,%ecx
|
||||
psrlq $32,%mm1
|
||||
leal 4(%eax),%eax
|
||||
jnz L012mw_sse2_loop
|
||||
movd %mm1,%eax
|
||||
emms
|
||||
ret
|
||||
.align 4,0x90
|
||||
L011mw_non_sse2:
|
||||
pushl %ebp
|
||||
pushl %ebx
|
||||
pushl %esi
|
||||
pushl %edi
|
||||
|
||||
xorl %esi,%esi
|
||||
movl 20(%esp),%edi
|
||||
movl 24(%esp),%ebx
|
||||
movl 28(%esp),%ebp
|
||||
movl 32(%esp),%ecx
|
||||
andl $4294967288,%ebp
|
||||
jz L013mw_finish
|
||||
L014mw_loop:
|
||||
# Round 0
|
||||
movl (%ebx),%eax
|
||||
mull %ecx
|
||||
addl %esi,%eax
|
||||
adcl $0,%edx
|
||||
movl %eax,(%edi)
|
||||
movl %edx,%esi
|
||||
# Round 4
|
||||
movl 4(%ebx),%eax
|
||||
mull %ecx
|
||||
addl %esi,%eax
|
||||
adcl $0,%edx
|
||||
movl %eax,4(%edi)
|
||||
movl %edx,%esi
|
||||
# Round 8
|
||||
movl 8(%ebx),%eax
|
||||
mull %ecx
|
||||
addl %esi,%eax
|
||||
adcl $0,%edx
|
||||
movl %eax,8(%edi)
|
||||
movl %edx,%esi
|
||||
# Round 12
|
||||
movl 12(%ebx),%eax
|
||||
mull %ecx
|
||||
addl %esi,%eax
|
||||
adcl $0,%edx
|
||||
movl %eax,12(%edi)
|
||||
movl %edx,%esi
|
||||
# Round 16
|
||||
movl 16(%ebx),%eax
|
||||
mull %ecx
|
||||
addl %esi,%eax
|
||||
adcl $0,%edx
|
||||
movl %eax,16(%edi)
|
||||
movl %edx,%esi
|
||||
# Round 20
|
||||
movl 20(%ebx),%eax
|
||||
mull %ecx
|
||||
addl %esi,%eax
|
||||
adcl $0,%edx
|
||||
movl %eax,20(%edi)
|
||||
movl %edx,%esi
|
||||
# Round 24
|
||||
movl 24(%ebx),%eax
|
||||
mull %ecx
|
||||
addl %esi,%eax
|
||||
adcl $0,%edx
|
||||
movl %eax,24(%edi)
|
||||
movl %edx,%esi
|
||||
# Round 28
|
||||
movl 28(%ebx),%eax
|
||||
mull %ecx
|
||||
addl %esi,%eax
|
||||
adcl $0,%edx
|
||||
movl %eax,28(%edi)
|
||||
movl %edx,%esi
|
||||
|
||||
addl $32,%ebx
|
||||
addl $32,%edi
|
||||
subl $8,%ebp
|
||||
jz L013mw_finish
|
||||
jmp L014mw_loop
|
||||
L013mw_finish:
|
||||
movl 28(%esp),%ebp
|
||||
andl $7,%ebp
|
||||
jnz L015mw_finish2
|
||||
jmp L016mw_end
|
||||
L015mw_finish2:
|
||||
# Tail Round 0
|
||||
movl (%ebx),%eax
|
||||
mull %ecx
|
||||
addl %esi,%eax
|
||||
adcl $0,%edx
|
||||
movl %eax,(%edi)
|
||||
movl %edx,%esi
|
||||
decl %ebp
|
||||
jz L016mw_end
|
||||
# Tail Round 1
|
||||
movl 4(%ebx),%eax
|
||||
mull %ecx
|
||||
addl %esi,%eax
|
||||
adcl $0,%edx
|
||||
movl %eax,4(%edi)
|
||||
movl %edx,%esi
|
||||
decl %ebp
|
||||
jz L016mw_end
|
||||
# Tail Round 2
|
||||
movl 8(%ebx),%eax
|
||||
mull %ecx
|
||||
addl %esi,%eax
|
||||
adcl $0,%edx
|
||||
movl %eax,8(%edi)
|
||||
movl %edx,%esi
|
||||
decl %ebp
|
||||
jz L016mw_end
|
||||
# Tail Round 3
|
||||
movl 12(%ebx),%eax
|
||||
mull %ecx
|
||||
addl %esi,%eax
|
||||
adcl $0,%edx
|
||||
movl %eax,12(%edi)
|
||||
movl %edx,%esi
|
||||
decl %ebp
|
||||
jz L016mw_end
|
||||
# Tail Round 4
|
||||
movl 16(%ebx),%eax
|
||||
mull %ecx
|
||||
addl %esi,%eax
|
||||
adcl $0,%edx
|
||||
movl %eax,16(%edi)
|
||||
movl %edx,%esi
|
||||
decl %ebp
|
||||
jz L016mw_end
|
||||
# Tail Round 5
|
||||
movl 20(%ebx),%eax
|
||||
mull %ecx
|
||||
addl %esi,%eax
|
||||
adcl $0,%edx
|
||||
movl %eax,20(%edi)
|
||||
movl %edx,%esi
|
||||
decl %ebp
|
||||
jz L016mw_end
|
||||
# Tail Round 6
|
||||
movl 24(%ebx),%eax
|
||||
mull %ecx
|
||||
addl %esi,%eax
|
||||
adcl $0,%edx
|
||||
movl %eax,24(%edi)
|
||||
movl %edx,%esi
|
||||
L016mw_end:
|
||||
movl %esi,%eax
|
||||
popl %edi
|
||||
popl %esi
|
||||
popl %ebx
|
||||
popl %ebp
|
||||
ret
|
||||
.globl _bn_sqr_words
|
||||
.private_extern _bn_sqr_words
|
||||
.align 4
|
||||
_bn_sqr_words:
|
||||
L_bn_sqr_words_begin:
|
||||
call L017PIC_me_up
|
||||
L017PIC_me_up:
|
||||
popl %eax
|
||||
movl L_OPENSSL_ia32cap_P$non_lazy_ptr-L017PIC_me_up(%eax),%eax
|
||||
btl $26,(%eax)
|
||||
jnc L018sqr_non_sse2
|
||||
movl 4(%esp),%eax
|
||||
movl 8(%esp),%edx
|
||||
movl 12(%esp),%ecx
|
||||
.align 4,0x90
|
||||
L019sqr_sse2_loop:
|
||||
movd (%edx),%mm0
|
||||
pmuludq %mm0,%mm0
|
||||
leal 4(%edx),%edx
|
||||
movq %mm0,(%eax)
|
||||
subl $1,%ecx
|
||||
leal 8(%eax),%eax
|
||||
jnz L019sqr_sse2_loop
|
||||
emms
|
||||
ret
|
||||
.align 4,0x90
|
||||
L018sqr_non_sse2:
|
||||
pushl %ebp
|
||||
pushl %ebx
|
||||
pushl %esi
|
||||
pushl %edi
|
||||
|
||||
movl 20(%esp),%esi
|
||||
movl 24(%esp),%edi
|
||||
movl 28(%esp),%ebx
|
||||
andl $4294967288,%ebx
|
||||
jz L020sw_finish
|
||||
L021sw_loop:
|
||||
# Round 0
|
||||
movl (%edi),%eax
|
||||
mull %eax
|
||||
movl %eax,(%esi)
|
||||
movl %edx,4(%esi)
|
||||
# Round 4
|
||||
movl 4(%edi),%eax
|
||||
mull %eax
|
||||
movl %eax,8(%esi)
|
||||
movl %edx,12(%esi)
|
||||
# Round 8
|
||||
movl 8(%edi),%eax
|
||||
mull %eax
|
||||
movl %eax,16(%esi)
|
||||
movl %edx,20(%esi)
|
||||
# Round 12
|
||||
movl 12(%edi),%eax
|
||||
mull %eax
|
||||
movl %eax,24(%esi)
|
||||
movl %edx,28(%esi)
|
||||
# Round 16
|
||||
movl 16(%edi),%eax
|
||||
mull %eax
|
||||
movl %eax,32(%esi)
|
||||
movl %edx,36(%esi)
|
||||
# Round 20
|
||||
movl 20(%edi),%eax
|
||||
mull %eax
|
||||
movl %eax,40(%esi)
|
||||
movl %edx,44(%esi)
|
||||
# Round 24
|
||||
movl 24(%edi),%eax
|
||||
mull %eax
|
||||
movl %eax,48(%esi)
|
||||
movl %edx,52(%esi)
|
||||
# Round 28
|
||||
movl 28(%edi),%eax
|
||||
mull %eax
|
||||
movl %eax,56(%esi)
|
||||
movl %edx,60(%esi)
|
||||
|
||||
addl $32,%edi
|
||||
addl $64,%esi
|
||||
subl $8,%ebx
|
||||
jnz L021sw_loop
|
||||
L020sw_finish:
|
||||
movl 28(%esp),%ebx
|
||||
andl $7,%ebx
|
||||
jz L022sw_end
|
||||
# Tail Round 0
|
||||
movl (%edi),%eax
|
||||
mull %eax
|
||||
movl %eax,(%esi)
|
||||
decl %ebx
|
||||
movl %edx,4(%esi)
|
||||
jz L022sw_end
|
||||
# Tail Round 1
|
||||
movl 4(%edi),%eax
|
||||
mull %eax
|
||||
movl %eax,8(%esi)
|
||||
decl %ebx
|
||||
movl %edx,12(%esi)
|
||||
jz L022sw_end
|
||||
# Tail Round 2
|
||||
movl 8(%edi),%eax
|
||||
mull %eax
|
||||
movl %eax,16(%esi)
|
||||
decl %ebx
|
||||
movl %edx,20(%esi)
|
||||
jz L022sw_end
|
||||
# Tail Round 3
|
||||
movl 12(%edi),%eax
|
||||
mull %eax
|
||||
movl %eax,24(%esi)
|
||||
decl %ebx
|
||||
movl %edx,28(%esi)
|
||||
jz L022sw_end
|
||||
# Tail Round 4
|
||||
movl 16(%edi),%eax
|
||||
mull %eax
|
||||
movl %eax,32(%esi)
|
||||
decl %ebx
|
||||
movl %edx,36(%esi)
|
||||
jz L022sw_end
|
||||
# Tail Round 5
|
||||
movl 20(%edi),%eax
|
||||
mull %eax
|
||||
movl %eax,40(%esi)
|
||||
decl %ebx
|
||||
movl %edx,44(%esi)
|
||||
jz L022sw_end
|
||||
# Tail Round 6
|
||||
movl 24(%edi),%eax
|
||||
mull %eax
|
||||
movl %eax,48(%esi)
|
||||
movl %edx,52(%esi)
|
||||
L022sw_end:
|
||||
popl %edi
|
||||
popl %esi
|
||||
popl %ebx
|
||||
popl %ebp
|
||||
ret
|
||||
.globl _bn_div_words
|
||||
.private_extern _bn_div_words
|
||||
.align 4
|
||||
_bn_div_words:
|
||||
L_bn_div_words_begin:
|
||||
movl 4(%esp),%edx
|
||||
movl 8(%esp),%eax
|
||||
movl 12(%esp),%ecx
|
||||
divl %ecx
|
||||
ret
|
||||
.globl _bn_add_words
|
||||
.private_extern _bn_add_words
|
||||
.align 4
|
||||
_bn_add_words:
|
||||
L_bn_add_words_begin:
|
||||
pushl %ebp
|
||||
pushl %ebx
|
||||
pushl %esi
|
||||
pushl %edi
|
||||
|
||||
movl 20(%esp),%ebx
|
||||
movl 24(%esp),%esi
|
||||
movl 28(%esp),%edi
|
||||
movl 32(%esp),%ebp
|
||||
xorl %eax,%eax
|
||||
andl $4294967288,%ebp
|
||||
jz L023aw_finish
|
||||
L024aw_loop:
|
||||
# Round 0
|
||||
movl (%esi),%ecx
|
||||
movl (%edi),%edx
|
||||
addl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
addl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
movl %ecx,(%ebx)
|
||||
# Round 1
|
||||
movl 4(%esi),%ecx
|
||||
movl 4(%edi),%edx
|
||||
addl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
addl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
movl %ecx,4(%ebx)
|
||||
# Round 2
|
||||
movl 8(%esi),%ecx
|
||||
movl 8(%edi),%edx
|
||||
addl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
addl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
movl %ecx,8(%ebx)
|
||||
# Round 3
|
||||
movl 12(%esi),%ecx
|
||||
movl 12(%edi),%edx
|
||||
addl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
addl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
movl %ecx,12(%ebx)
|
||||
# Round 4
|
||||
movl 16(%esi),%ecx
|
||||
movl 16(%edi),%edx
|
||||
addl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
addl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
movl %ecx,16(%ebx)
|
||||
# Round 5
|
||||
movl 20(%esi),%ecx
|
||||
movl 20(%edi),%edx
|
||||
addl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
addl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
movl %ecx,20(%ebx)
|
||||
# Round 6
|
||||
movl 24(%esi),%ecx
|
||||
movl 24(%edi),%edx
|
||||
addl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
addl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
movl %ecx,24(%ebx)
|
||||
# Round 7
|
||||
movl 28(%esi),%ecx
|
||||
movl 28(%edi),%edx
|
||||
addl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
addl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
movl %ecx,28(%ebx)
|
||||
|
||||
addl $32,%esi
|
||||
addl $32,%edi
|
||||
addl $32,%ebx
|
||||
subl $8,%ebp
|
||||
jnz L024aw_loop
|
||||
L023aw_finish:
|
||||
movl 32(%esp),%ebp
|
||||
andl $7,%ebp
|
||||
jz L025aw_end
|
||||
# Tail Round 0
|
||||
movl (%esi),%ecx
|
||||
movl (%edi),%edx
|
||||
addl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
addl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
decl %ebp
|
||||
movl %ecx,(%ebx)
|
||||
jz L025aw_end
|
||||
# Tail Round 1
|
||||
movl 4(%esi),%ecx
|
||||
movl 4(%edi),%edx
|
||||
addl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
addl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
decl %ebp
|
||||
movl %ecx,4(%ebx)
|
||||
jz L025aw_end
|
||||
# Tail Round 2
|
||||
movl 8(%esi),%ecx
|
||||
movl 8(%edi),%edx
|
||||
addl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
addl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
decl %ebp
|
||||
movl %ecx,8(%ebx)
|
||||
jz L025aw_end
|
||||
# Tail Round 3
|
||||
movl 12(%esi),%ecx
|
||||
movl 12(%edi),%edx
|
||||
addl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
addl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
decl %ebp
|
||||
movl %ecx,12(%ebx)
|
||||
jz L025aw_end
|
||||
# Tail Round 4
|
||||
movl 16(%esi),%ecx
|
||||
movl 16(%edi),%edx
|
||||
addl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
addl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
decl %ebp
|
||||
movl %ecx,16(%ebx)
|
||||
jz L025aw_end
|
||||
# Tail Round 5
|
||||
movl 20(%esi),%ecx
|
||||
movl 20(%edi),%edx
|
||||
addl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
addl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
decl %ebp
|
||||
movl %ecx,20(%ebx)
|
||||
jz L025aw_end
|
||||
# Tail Round 6
|
||||
movl 24(%esi),%ecx
|
||||
movl 24(%edi),%edx
|
||||
addl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
addl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
movl %ecx,24(%ebx)
|
||||
L025aw_end:
|
||||
popl %edi
|
||||
popl %esi
|
||||
popl %ebx
|
||||
popl %ebp
|
||||
ret
|
||||
.globl _bn_sub_words
|
||||
.private_extern _bn_sub_words
|
||||
.align 4
|
||||
_bn_sub_words:
|
||||
L_bn_sub_words_begin:
|
||||
pushl %ebp
|
||||
pushl %ebx
|
||||
pushl %esi
|
||||
pushl %edi
|
||||
|
||||
movl 20(%esp),%ebx
|
||||
movl 24(%esp),%esi
|
||||
movl 28(%esp),%edi
|
||||
movl 32(%esp),%ebp
|
||||
xorl %eax,%eax
|
||||
andl $4294967288,%ebp
|
||||
jz L026aw_finish
|
||||
L027aw_loop:
|
||||
# Round 0
|
||||
movl (%esi),%ecx
|
||||
movl (%edi),%edx
|
||||
subl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
subl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
movl %ecx,(%ebx)
|
||||
# Round 1
|
||||
movl 4(%esi),%ecx
|
||||
movl 4(%edi),%edx
|
||||
subl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
subl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
movl %ecx,4(%ebx)
|
||||
# Round 2
|
||||
movl 8(%esi),%ecx
|
||||
movl 8(%edi),%edx
|
||||
subl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
subl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
movl %ecx,8(%ebx)
|
||||
# Round 3
|
||||
movl 12(%esi),%ecx
|
||||
movl 12(%edi),%edx
|
||||
subl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
subl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
movl %ecx,12(%ebx)
|
||||
# Round 4
|
||||
movl 16(%esi),%ecx
|
||||
movl 16(%edi),%edx
|
||||
subl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
subl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
movl %ecx,16(%ebx)
|
||||
# Round 5
|
||||
movl 20(%esi),%ecx
|
||||
movl 20(%edi),%edx
|
||||
subl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
subl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
movl %ecx,20(%ebx)
|
||||
# Round 6
|
||||
movl 24(%esi),%ecx
|
||||
movl 24(%edi),%edx
|
||||
subl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
subl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
movl %ecx,24(%ebx)
|
||||
# Round 7
|
||||
movl 28(%esi),%ecx
|
||||
movl 28(%edi),%edx
|
||||
subl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
subl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
movl %ecx,28(%ebx)
|
||||
|
||||
addl $32,%esi
|
||||
addl $32,%edi
|
||||
addl $32,%ebx
|
||||
subl $8,%ebp
|
||||
jnz L027aw_loop
|
||||
L026aw_finish:
|
||||
movl 32(%esp),%ebp
|
||||
andl $7,%ebp
|
||||
jz L028aw_end
|
||||
# Tail Round 0
|
||||
movl (%esi),%ecx
|
||||
movl (%edi),%edx
|
||||
subl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
subl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
decl %ebp
|
||||
movl %ecx,(%ebx)
|
||||
jz L028aw_end
|
||||
# Tail Round 1
|
||||
movl 4(%esi),%ecx
|
||||
movl 4(%edi),%edx
|
||||
subl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
subl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
decl %ebp
|
||||
movl %ecx,4(%ebx)
|
||||
jz L028aw_end
|
||||
# Tail Round 2
|
||||
movl 8(%esi),%ecx
|
||||
movl 8(%edi),%edx
|
||||
subl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
subl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
decl %ebp
|
||||
movl %ecx,8(%ebx)
|
||||
jz L028aw_end
|
||||
# Tail Round 3
|
||||
movl 12(%esi),%ecx
|
||||
movl 12(%edi),%edx
|
||||
subl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
subl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
decl %ebp
|
||||
movl %ecx,12(%ebx)
|
||||
jz L028aw_end
|
||||
# Tail Round 4
|
||||
movl 16(%esi),%ecx
|
||||
movl 16(%edi),%edx
|
||||
subl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
subl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
decl %ebp
|
||||
movl %ecx,16(%ebx)
|
||||
jz L028aw_end
|
||||
# Tail Round 5
|
||||
movl 20(%esi),%ecx
|
||||
movl 20(%edi),%edx
|
||||
subl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
subl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
decl %ebp
|
||||
movl %ecx,20(%ebx)
|
||||
jz L028aw_end
|
||||
# Tail Round 6
|
||||
movl 24(%esi),%ecx
|
||||
movl 24(%edi),%edx
|
||||
subl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
subl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
movl %ecx,24(%ebx)
|
||||
L028aw_end:
|
||||
popl %edi
|
||||
popl %esi
|
||||
popl %ebx
|
||||
popl %ebp
|
||||
ret
|
||||
.section __IMPORT,__pointers,non_lazy_symbol_pointers
|
||||
L_OPENSSL_ia32cap_P$non_lazy_ptr:
|
||||
.indirect_symbol _OPENSSL_ia32cap_P
|
||||
.long 0
|
||||
#endif // !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) && defined(__APPLE__)
|
||||
1256
third-party/boringssl/apple-x86/crypto/fipsmodule/co-586-apple.S
vendored
Normal file
1256
third-party/boringssl/apple-x86/crypto/fipsmodule/co-586-apple.S
vendored
Normal file
File diff suppressed because it is too large
Load Diff
288
third-party/boringssl/apple-x86/crypto/fipsmodule/ghash-ssse3-x86-apple.S
vendored
Normal file
288
third-party/boringssl/apple-x86/crypto/fipsmodule/ghash-ssse3-x86-apple.S
vendored
Normal file
@ -0,0 +1,288 @@
|
||||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#include <openssl/asm_base.h>
|
||||
|
||||
#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) && defined(__APPLE__)
|
||||
.text
|
||||
.globl _gcm_gmult_ssse3
|
||||
.private_extern _gcm_gmult_ssse3
|
||||
.align 4
|
||||
_gcm_gmult_ssse3:
|
||||
L_gcm_gmult_ssse3_begin:
|
||||
pushl %ebp
|
||||
pushl %ebx
|
||||
pushl %esi
|
||||
pushl %edi
|
||||
movl 20(%esp),%edi
|
||||
movl 24(%esp),%esi
|
||||
movdqu (%edi),%xmm0
|
||||
call L000pic_point
|
||||
L000pic_point:
|
||||
popl %eax
|
||||
movdqa Lreverse_bytes-L000pic_point(%eax),%xmm7
|
||||
movdqa Llow4_mask-L000pic_point(%eax),%xmm2
|
||||
.byte 102,15,56,0,199
|
||||
movdqa %xmm2,%xmm1
|
||||
pandn %xmm0,%xmm1
|
||||
psrld $4,%xmm1
|
||||
pand %xmm2,%xmm0
|
||||
pxor %xmm2,%xmm2
|
||||
pxor %xmm3,%xmm3
|
||||
movl $5,%eax
|
||||
L001loop_row_1:
|
||||
movdqa (%esi),%xmm4
|
||||
leal 16(%esi),%esi
|
||||
movdqa %xmm2,%xmm6
|
||||
.byte 102,15,58,15,243,1
|
||||
movdqa %xmm6,%xmm3
|
||||
psrldq $1,%xmm2
|
||||
movdqa %xmm4,%xmm5
|
||||
.byte 102,15,56,0,224
|
||||
.byte 102,15,56,0,233
|
||||
pxor %xmm5,%xmm2
|
||||
movdqa %xmm4,%xmm5
|
||||
psllq $60,%xmm5
|
||||
movdqa %xmm5,%xmm6
|
||||
pslldq $8,%xmm6
|
||||
pxor %xmm6,%xmm3
|
||||
psrldq $8,%xmm5
|
||||
pxor %xmm5,%xmm2
|
||||
psrlq $4,%xmm4
|
||||
pxor %xmm4,%xmm2
|
||||
subl $1,%eax
|
||||
jnz L001loop_row_1
|
||||
pxor %xmm3,%xmm2
|
||||
psrlq $1,%xmm3
|
||||
pxor %xmm3,%xmm2
|
||||
psrlq $1,%xmm3
|
||||
pxor %xmm3,%xmm2
|
||||
psrlq $5,%xmm3
|
||||
pxor %xmm3,%xmm2
|
||||
pxor %xmm3,%xmm3
|
||||
movl $5,%eax
|
||||
L002loop_row_2:
|
||||
movdqa (%esi),%xmm4
|
||||
leal 16(%esi),%esi
|
||||
movdqa %xmm2,%xmm6
|
||||
.byte 102,15,58,15,243,1
|
||||
movdqa %xmm6,%xmm3
|
||||
psrldq $1,%xmm2
|
||||
movdqa %xmm4,%xmm5
|
||||
.byte 102,15,56,0,224
|
||||
.byte 102,15,56,0,233
|
||||
pxor %xmm5,%xmm2
|
||||
movdqa %xmm4,%xmm5
|
||||
psllq $60,%xmm5
|
||||
movdqa %xmm5,%xmm6
|
||||
pslldq $8,%xmm6
|
||||
pxor %xmm6,%xmm3
|
||||
psrldq $8,%xmm5
|
||||
pxor %xmm5,%xmm2
|
||||
psrlq $4,%xmm4
|
||||
pxor %xmm4,%xmm2
|
||||
subl $1,%eax
|
||||
jnz L002loop_row_2
|
||||
pxor %xmm3,%xmm2
|
||||
psrlq $1,%xmm3
|
||||
pxor %xmm3,%xmm2
|
||||
psrlq $1,%xmm3
|
||||
pxor %xmm3,%xmm2
|
||||
psrlq $5,%xmm3
|
||||
pxor %xmm3,%xmm2
|
||||
pxor %xmm3,%xmm3
|
||||
movl $6,%eax
|
||||
L003loop_row_3:
|
||||
movdqa (%esi),%xmm4
|
||||
leal 16(%esi),%esi
|
||||
movdqa %xmm2,%xmm6
|
||||
.byte 102,15,58,15,243,1
|
||||
movdqa %xmm6,%xmm3
|
||||
psrldq $1,%xmm2
|
||||
movdqa %xmm4,%xmm5
|
||||
.byte 102,15,56,0,224
|
||||
.byte 102,15,56,0,233
|
||||
pxor %xmm5,%xmm2
|
||||
movdqa %xmm4,%xmm5
|
||||
psllq $60,%xmm5
|
||||
movdqa %xmm5,%xmm6
|
||||
pslldq $8,%xmm6
|
||||
pxor %xmm6,%xmm3
|
||||
psrldq $8,%xmm5
|
||||
pxor %xmm5,%xmm2
|
||||
psrlq $4,%xmm4
|
||||
pxor %xmm4,%xmm2
|
||||
subl $1,%eax
|
||||
jnz L003loop_row_3
|
||||
pxor %xmm3,%xmm2
|
||||
psrlq $1,%xmm3
|
||||
pxor %xmm3,%xmm2
|
||||
psrlq $1,%xmm3
|
||||
pxor %xmm3,%xmm2
|
||||
psrlq $5,%xmm3
|
||||
pxor %xmm3,%xmm2
|
||||
pxor %xmm3,%xmm3
|
||||
.byte 102,15,56,0,215
|
||||
movdqu %xmm2,(%edi)
|
||||
pxor %xmm0,%xmm0
|
||||
pxor %xmm1,%xmm1
|
||||
pxor %xmm2,%xmm2
|
||||
pxor %xmm3,%xmm3
|
||||
pxor %xmm4,%xmm4
|
||||
pxor %xmm5,%xmm5
|
||||
pxor %xmm6,%xmm6
|
||||
popl %edi
|
||||
popl %esi
|
||||
popl %ebx
|
||||
popl %ebp
|
||||
ret
|
||||
.globl _gcm_ghash_ssse3
|
||||
.private_extern _gcm_ghash_ssse3
|
||||
.align 4
|
||||
_gcm_ghash_ssse3:
|
||||
L_gcm_ghash_ssse3_begin:
|
||||
pushl %ebp
|
||||
pushl %ebx
|
||||
pushl %esi
|
||||
pushl %edi
|
||||
movl 20(%esp),%edi
|
||||
movl 24(%esp),%esi
|
||||
movl 28(%esp),%edx
|
||||
movl 32(%esp),%ecx
|
||||
movdqu (%edi),%xmm0
|
||||
call L004pic_point
|
||||
L004pic_point:
|
||||
popl %ebx
|
||||
movdqa Lreverse_bytes-L004pic_point(%ebx),%xmm7
|
||||
andl $-16,%ecx
|
||||
.byte 102,15,56,0,199
|
||||
pxor %xmm3,%xmm3
|
||||
L005loop_ghash:
|
||||
movdqa Llow4_mask-L004pic_point(%ebx),%xmm2
|
||||
movdqu (%edx),%xmm1
|
||||
.byte 102,15,56,0,207
|
||||
pxor %xmm1,%xmm0
|
||||
movdqa %xmm2,%xmm1
|
||||
pandn %xmm0,%xmm1
|
||||
psrld $4,%xmm1
|
||||
pand %xmm2,%xmm0
|
||||
pxor %xmm2,%xmm2
|
||||
movl $5,%eax
|
||||
L006loop_row_4:
|
||||
movdqa (%esi),%xmm4
|
||||
leal 16(%esi),%esi
|
||||
movdqa %xmm2,%xmm6
|
||||
.byte 102,15,58,15,243,1
|
||||
movdqa %xmm6,%xmm3
|
||||
psrldq $1,%xmm2
|
||||
movdqa %xmm4,%xmm5
|
||||
.byte 102,15,56,0,224
|
||||
.byte 102,15,56,0,233
|
||||
pxor %xmm5,%xmm2
|
||||
movdqa %xmm4,%xmm5
|
||||
psllq $60,%xmm5
|
||||
movdqa %xmm5,%xmm6
|
||||
pslldq $8,%xmm6
|
||||
pxor %xmm6,%xmm3
|
||||
psrldq $8,%xmm5
|
||||
pxor %xmm5,%xmm2
|
||||
psrlq $4,%xmm4
|
||||
pxor %xmm4,%xmm2
|
||||
subl $1,%eax
|
||||
jnz L006loop_row_4
|
||||
pxor %xmm3,%xmm2
|
||||
psrlq $1,%xmm3
|
||||
pxor %xmm3,%xmm2
|
||||
psrlq $1,%xmm3
|
||||
pxor %xmm3,%xmm2
|
||||
psrlq $5,%xmm3
|
||||
pxor %xmm3,%xmm2
|
||||
pxor %xmm3,%xmm3
|
||||
movl $5,%eax
|
||||
L007loop_row_5:
|
||||
movdqa (%esi),%xmm4
|
||||
leal 16(%esi),%esi
|
||||
movdqa %xmm2,%xmm6
|
||||
.byte 102,15,58,15,243,1
|
||||
movdqa %xmm6,%xmm3
|
||||
psrldq $1,%xmm2
|
||||
movdqa %xmm4,%xmm5
|
||||
.byte 102,15,56,0,224
|
||||
.byte 102,15,56,0,233
|
||||
pxor %xmm5,%xmm2
|
||||
movdqa %xmm4,%xmm5
|
||||
psllq $60,%xmm5
|
||||
movdqa %xmm5,%xmm6
|
||||
pslldq $8,%xmm6
|
||||
pxor %xmm6,%xmm3
|
||||
psrldq $8,%xmm5
|
||||
pxor %xmm5,%xmm2
|
||||
psrlq $4,%xmm4
|
||||
pxor %xmm4,%xmm2
|
||||
subl $1,%eax
|
||||
jnz L007loop_row_5
|
||||
pxor %xmm3,%xmm2
|
||||
psrlq $1,%xmm3
|
||||
pxor %xmm3,%xmm2
|
||||
psrlq $1,%xmm3
|
||||
pxor %xmm3,%xmm2
|
||||
psrlq $5,%xmm3
|
||||
pxor %xmm3,%xmm2
|
||||
pxor %xmm3,%xmm3
|
||||
movl $6,%eax
|
||||
L008loop_row_6:
|
||||
movdqa (%esi),%xmm4
|
||||
leal 16(%esi),%esi
|
||||
movdqa %xmm2,%xmm6
|
||||
.byte 102,15,58,15,243,1
|
||||
movdqa %xmm6,%xmm3
|
||||
psrldq $1,%xmm2
|
||||
movdqa %xmm4,%xmm5
|
||||
.byte 102,15,56,0,224
|
||||
.byte 102,15,56,0,233
|
||||
pxor %xmm5,%xmm2
|
||||
movdqa %xmm4,%xmm5
|
||||
psllq $60,%xmm5
|
||||
movdqa %xmm5,%xmm6
|
||||
pslldq $8,%xmm6
|
||||
pxor %xmm6,%xmm3
|
||||
psrldq $8,%xmm5
|
||||
pxor %xmm5,%xmm2
|
||||
psrlq $4,%xmm4
|
||||
pxor %xmm4,%xmm2
|
||||
subl $1,%eax
|
||||
jnz L008loop_row_6
|
||||
pxor %xmm3,%xmm2
|
||||
psrlq $1,%xmm3
|
||||
pxor %xmm3,%xmm2
|
||||
psrlq $1,%xmm3
|
||||
pxor %xmm3,%xmm2
|
||||
psrlq $5,%xmm3
|
||||
pxor %xmm3,%xmm2
|
||||
pxor %xmm3,%xmm3
|
||||
movdqa %xmm2,%xmm0
|
||||
leal -256(%esi),%esi
|
||||
leal 16(%edx),%edx
|
||||
subl $16,%ecx
|
||||
jnz L005loop_ghash
|
||||
.byte 102,15,56,0,199
|
||||
movdqu %xmm0,(%edi)
|
||||
pxor %xmm0,%xmm0
|
||||
pxor %xmm1,%xmm1
|
||||
pxor %xmm2,%xmm2
|
||||
pxor %xmm3,%xmm3
|
||||
pxor %xmm4,%xmm4
|
||||
pxor %xmm5,%xmm5
|
||||
pxor %xmm6,%xmm6
|
||||
popl %edi
|
||||
popl %esi
|
||||
popl %ebx
|
||||
popl %ebp
|
||||
ret
|
||||
.align 4,0x90
|
||||
Lreverse_bytes:
|
||||
.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
|
||||
.align 4,0x90
|
||||
Llow4_mask:
|
||||
.long 252645135,252645135,252645135,252645135
|
||||
#endif // !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) && defined(__APPLE__)
|
||||
322
third-party/boringssl/apple-x86/crypto/fipsmodule/ghash-x86-apple.S
vendored
Normal file
322
third-party/boringssl/apple-x86/crypto/fipsmodule/ghash-x86-apple.S
vendored
Normal file
@ -0,0 +1,322 @@
|
||||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#include <openssl/asm_base.h>
|
||||
|
||||
#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) && defined(__APPLE__)
|
||||
.text
|
||||
.globl _gcm_init_clmul
|
||||
.private_extern _gcm_init_clmul
|
||||
.align 4
|
||||
_gcm_init_clmul:
|
||||
L_gcm_init_clmul_begin:
|
||||
movl 4(%esp),%edx
|
||||
movl 8(%esp),%eax
|
||||
call L000pic
|
||||
L000pic:
|
||||
popl %ecx
|
||||
leal Lbswap-L000pic(%ecx),%ecx
|
||||
movdqu (%eax),%xmm2
|
||||
pshufd $78,%xmm2,%xmm2
|
||||
pshufd $255,%xmm2,%xmm4
|
||||
movdqa %xmm2,%xmm3
|
||||
psllq $1,%xmm2
|
||||
pxor %xmm5,%xmm5
|
||||
psrlq $63,%xmm3
|
||||
pcmpgtd %xmm4,%xmm5
|
||||
pslldq $8,%xmm3
|
||||
por %xmm3,%xmm2
|
||||
pand 16(%ecx),%xmm5
|
||||
pxor %xmm5,%xmm2
|
||||
movdqa %xmm2,%xmm0
|
||||
movdqa %xmm0,%xmm1
|
||||
pshufd $78,%xmm0,%xmm3
|
||||
pshufd $78,%xmm2,%xmm4
|
||||
pxor %xmm0,%xmm3
|
||||
pxor %xmm2,%xmm4
|
||||
.byte 102,15,58,68,194,0
|
||||
.byte 102,15,58,68,202,17
|
||||
.byte 102,15,58,68,220,0
|
||||
xorps %xmm0,%xmm3
|
||||
xorps %xmm1,%xmm3
|
||||
movdqa %xmm3,%xmm4
|
||||
psrldq $8,%xmm3
|
||||
pslldq $8,%xmm4
|
||||
pxor %xmm3,%xmm1
|
||||
pxor %xmm4,%xmm0
|
||||
movdqa %xmm0,%xmm4
|
||||
movdqa %xmm0,%xmm3
|
||||
psllq $5,%xmm0
|
||||
pxor %xmm0,%xmm3
|
||||
psllq $1,%xmm0
|
||||
pxor %xmm3,%xmm0
|
||||
psllq $57,%xmm0
|
||||
movdqa %xmm0,%xmm3
|
||||
pslldq $8,%xmm0
|
||||
psrldq $8,%xmm3
|
||||
pxor %xmm4,%xmm0
|
||||
pxor %xmm3,%xmm1
|
||||
movdqa %xmm0,%xmm4
|
||||
psrlq $1,%xmm0
|
||||
pxor %xmm4,%xmm1
|
||||
pxor %xmm0,%xmm4
|
||||
psrlq $5,%xmm0
|
||||
pxor %xmm4,%xmm0
|
||||
psrlq $1,%xmm0
|
||||
pxor %xmm1,%xmm0
|
||||
pshufd $78,%xmm2,%xmm3
|
||||
pshufd $78,%xmm0,%xmm4
|
||||
pxor %xmm2,%xmm3
|
||||
movdqu %xmm2,(%edx)
|
||||
pxor %xmm0,%xmm4
|
||||
movdqu %xmm0,16(%edx)
|
||||
.byte 102,15,58,15,227,8
|
||||
movdqu %xmm4,32(%edx)
|
||||
ret
|
||||
.globl _gcm_gmult_clmul
|
||||
.private_extern _gcm_gmult_clmul
|
||||
.align 4
|
||||
_gcm_gmult_clmul:
|
||||
L_gcm_gmult_clmul_begin:
|
||||
movl 4(%esp),%eax
|
||||
movl 8(%esp),%edx
|
||||
call L001pic
|
||||
L001pic:
|
||||
popl %ecx
|
||||
leal Lbswap-L001pic(%ecx),%ecx
|
||||
movdqu (%eax),%xmm0
|
||||
movdqa (%ecx),%xmm5
|
||||
movups (%edx),%xmm2
|
||||
.byte 102,15,56,0,197
|
||||
movups 32(%edx),%xmm4
|
||||
movdqa %xmm0,%xmm1
|
||||
pshufd $78,%xmm0,%xmm3
|
||||
pxor %xmm0,%xmm3
|
||||
.byte 102,15,58,68,194,0
|
||||
.byte 102,15,58,68,202,17
|
||||
.byte 102,15,58,68,220,0
|
||||
xorps %xmm0,%xmm3
|
||||
xorps %xmm1,%xmm3
|
||||
movdqa %xmm3,%xmm4
|
||||
psrldq $8,%xmm3
|
||||
pslldq $8,%xmm4
|
||||
pxor %xmm3,%xmm1
|
||||
pxor %xmm4,%xmm0
|
||||
movdqa %xmm0,%xmm4
|
||||
movdqa %xmm0,%xmm3
|
||||
psllq $5,%xmm0
|
||||
pxor %xmm0,%xmm3
|
||||
psllq $1,%xmm0
|
||||
pxor %xmm3,%xmm0
|
||||
psllq $57,%xmm0
|
||||
movdqa %xmm0,%xmm3
|
||||
pslldq $8,%xmm0
|
||||
psrldq $8,%xmm3
|
||||
pxor %xmm4,%xmm0
|
||||
pxor %xmm3,%xmm1
|
||||
movdqa %xmm0,%xmm4
|
||||
psrlq $1,%xmm0
|
||||
pxor %xmm4,%xmm1
|
||||
pxor %xmm0,%xmm4
|
||||
psrlq $5,%xmm0
|
||||
pxor %xmm4,%xmm0
|
||||
psrlq $1,%xmm0
|
||||
pxor %xmm1,%xmm0
|
||||
.byte 102,15,56,0,197
|
||||
movdqu %xmm0,(%eax)
|
||||
ret
|
||||
.globl _gcm_ghash_clmul
|
||||
.private_extern _gcm_ghash_clmul
|
||||
.align 4
|
||||
_gcm_ghash_clmul:
|
||||
L_gcm_ghash_clmul_begin:
|
||||
pushl %ebp
|
||||
pushl %ebx
|
||||
pushl %esi
|
||||
pushl %edi
|
||||
movl 20(%esp),%eax
|
||||
movl 24(%esp),%edx
|
||||
movl 28(%esp),%esi
|
||||
movl 32(%esp),%ebx
|
||||
call L002pic
|
||||
L002pic:
|
||||
popl %ecx
|
||||
leal Lbswap-L002pic(%ecx),%ecx
|
||||
movdqu (%eax),%xmm0
|
||||
movdqa (%ecx),%xmm5
|
||||
movdqu (%edx),%xmm2
|
||||
.byte 102,15,56,0,197
|
||||
subl $16,%ebx
|
||||
jz L003odd_tail
|
||||
movdqu (%esi),%xmm3
|
||||
movdqu 16(%esi),%xmm6
|
||||
.byte 102,15,56,0,221
|
||||
.byte 102,15,56,0,245
|
||||
movdqu 32(%edx),%xmm5
|
||||
pxor %xmm3,%xmm0
|
||||
pshufd $78,%xmm6,%xmm3
|
||||
movdqa %xmm6,%xmm7
|
||||
pxor %xmm6,%xmm3
|
||||
leal 32(%esi),%esi
|
||||
.byte 102,15,58,68,242,0
|
||||
.byte 102,15,58,68,250,17
|
||||
.byte 102,15,58,68,221,0
|
||||
movups 16(%edx),%xmm2
|
||||
nop
|
||||
subl $32,%ebx
|
||||
jbe L004even_tail
|
||||
jmp L005mod_loop
|
||||
.align 5,0x90
|
||||
L005mod_loop:
|
||||
pshufd $78,%xmm0,%xmm4
|
||||
movdqa %xmm0,%xmm1
|
||||
pxor %xmm0,%xmm4
|
||||
nop
|
||||
.byte 102,15,58,68,194,0
|
||||
.byte 102,15,58,68,202,17
|
||||
.byte 102,15,58,68,229,16
|
||||
movups (%edx),%xmm2
|
||||
xorps %xmm6,%xmm0
|
||||
movdqa (%ecx),%xmm5
|
||||
xorps %xmm7,%xmm1
|
||||
movdqu (%esi),%xmm7
|
||||
pxor %xmm0,%xmm3
|
||||
movdqu 16(%esi),%xmm6
|
||||
pxor %xmm1,%xmm3
|
||||
.byte 102,15,56,0,253
|
||||
pxor %xmm3,%xmm4
|
||||
movdqa %xmm4,%xmm3
|
||||
psrldq $8,%xmm4
|
||||
pslldq $8,%xmm3
|
||||
pxor %xmm4,%xmm1
|
||||
pxor %xmm3,%xmm0
|
||||
.byte 102,15,56,0,245
|
||||
pxor %xmm7,%xmm1
|
||||
movdqa %xmm6,%xmm7
|
||||
movdqa %xmm0,%xmm4
|
||||
movdqa %xmm0,%xmm3
|
||||
psllq $5,%xmm0
|
||||
pxor %xmm0,%xmm3
|
||||
psllq $1,%xmm0
|
||||
pxor %xmm3,%xmm0
|
||||
.byte 102,15,58,68,242,0
|
||||
movups 32(%edx),%xmm5
|
||||
psllq $57,%xmm0
|
||||
movdqa %xmm0,%xmm3
|
||||
pslldq $8,%xmm0
|
||||
psrldq $8,%xmm3
|
||||
pxor %xmm4,%xmm0
|
||||
pxor %xmm3,%xmm1
|
||||
pshufd $78,%xmm7,%xmm3
|
||||
movdqa %xmm0,%xmm4
|
||||
psrlq $1,%xmm0
|
||||
pxor %xmm7,%xmm3
|
||||
pxor %xmm4,%xmm1
|
||||
.byte 102,15,58,68,250,17
|
||||
movups 16(%edx),%xmm2
|
||||
pxor %xmm0,%xmm4
|
||||
psrlq $5,%xmm0
|
||||
pxor %xmm4,%xmm0
|
||||
psrlq $1,%xmm0
|
||||
pxor %xmm1,%xmm0
|
||||
.byte 102,15,58,68,221,0
|
||||
leal 32(%esi),%esi
|
||||
subl $32,%ebx
|
||||
ja L005mod_loop
|
||||
L004even_tail:
|
||||
pshufd $78,%xmm0,%xmm4
|
||||
movdqa %xmm0,%xmm1
|
||||
pxor %xmm0,%xmm4
|
||||
.byte 102,15,58,68,194,0
|
||||
.byte 102,15,58,68,202,17
|
||||
.byte 102,15,58,68,229,16
|
||||
movdqa (%ecx),%xmm5
|
||||
xorps %xmm6,%xmm0
|
||||
xorps %xmm7,%xmm1
|
||||
pxor %xmm0,%xmm3
|
||||
pxor %xmm1,%xmm3
|
||||
pxor %xmm3,%xmm4
|
||||
movdqa %xmm4,%xmm3
|
||||
psrldq $8,%xmm4
|
||||
pslldq $8,%xmm3
|
||||
pxor %xmm4,%xmm1
|
||||
pxor %xmm3,%xmm0
|
||||
movdqa %xmm0,%xmm4
|
||||
movdqa %xmm0,%xmm3
|
||||
psllq $5,%xmm0
|
||||
pxor %xmm0,%xmm3
|
||||
psllq $1,%xmm0
|
||||
pxor %xmm3,%xmm0
|
||||
psllq $57,%xmm0
|
||||
movdqa %xmm0,%xmm3
|
||||
pslldq $8,%xmm0
|
||||
psrldq $8,%xmm3
|
||||
pxor %xmm4,%xmm0
|
||||
pxor %xmm3,%xmm1
|
||||
movdqa %xmm0,%xmm4
|
||||
psrlq $1,%xmm0
|
||||
pxor %xmm4,%xmm1
|
||||
pxor %xmm0,%xmm4
|
||||
psrlq $5,%xmm0
|
||||
pxor %xmm4,%xmm0
|
||||
psrlq $1,%xmm0
|
||||
pxor %xmm1,%xmm0
|
||||
testl %ebx,%ebx
|
||||
jnz L006done
|
||||
movups (%edx),%xmm2
|
||||
L003odd_tail:
|
||||
movdqu (%esi),%xmm3
|
||||
.byte 102,15,56,0,221
|
||||
pxor %xmm3,%xmm0
|
||||
movdqa %xmm0,%xmm1
|
||||
pshufd $78,%xmm0,%xmm3
|
||||
pshufd $78,%xmm2,%xmm4
|
||||
pxor %xmm0,%xmm3
|
||||
pxor %xmm2,%xmm4
|
||||
.byte 102,15,58,68,194,0
|
||||
.byte 102,15,58,68,202,17
|
||||
.byte 102,15,58,68,220,0
|
||||
xorps %xmm0,%xmm3
|
||||
xorps %xmm1,%xmm3
|
||||
movdqa %xmm3,%xmm4
|
||||
psrldq $8,%xmm3
|
||||
pslldq $8,%xmm4
|
||||
pxor %xmm3,%xmm1
|
||||
pxor %xmm4,%xmm0
|
||||
movdqa %xmm0,%xmm4
|
||||
movdqa %xmm0,%xmm3
|
||||
psllq $5,%xmm0
|
||||
pxor %xmm0,%xmm3
|
||||
psllq $1,%xmm0
|
||||
pxor %xmm3,%xmm0
|
||||
psllq $57,%xmm0
|
||||
movdqa %xmm0,%xmm3
|
||||
pslldq $8,%xmm0
|
||||
psrldq $8,%xmm3
|
||||
pxor %xmm4,%xmm0
|
||||
pxor %xmm3,%xmm1
|
||||
movdqa %xmm0,%xmm4
|
||||
psrlq $1,%xmm0
|
||||
pxor %xmm4,%xmm1
|
||||
pxor %xmm0,%xmm4
|
||||
psrlq $5,%xmm0
|
||||
pxor %xmm4,%xmm0
|
||||
psrlq $1,%xmm0
|
||||
pxor %xmm1,%xmm0
|
||||
L006done:
|
||||
.byte 102,15,56,0,197
|
||||
movdqu %xmm0,(%eax)
|
||||
popl %edi
|
||||
popl %esi
|
||||
popl %ebx
|
||||
popl %ebp
|
||||
ret
|
||||
.align 6,0x90
|
||||
Lbswap:
|
||||
.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
|
||||
.byte 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,194
|
||||
.byte 71,72,65,83,72,32,102,111,114,32,120,56,54,44,32,67
|
||||
.byte 82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112
|
||||
.byte 112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62
|
||||
.byte 0
|
||||
#endif // !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) && defined(__APPLE__)
|
||||
684
third-party/boringssl/apple-x86/crypto/fipsmodule/md5-586-apple.S
vendored
Normal file
684
third-party/boringssl/apple-x86/crypto/fipsmodule/md5-586-apple.S
vendored
Normal file
@ -0,0 +1,684 @@
|
||||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#include <openssl/asm_base.h>
|
||||
|
||||
#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) && defined(__APPLE__)
|
||||
.text
|
||||
.globl _md5_block_asm_data_order
|
||||
.private_extern _md5_block_asm_data_order
|
||||
.align 4
|
||||
_md5_block_asm_data_order:
|
||||
L_md5_block_asm_data_order_begin:
|
||||
pushl %esi
|
||||
pushl %edi
|
||||
movl 12(%esp),%edi
|
||||
movl 16(%esp),%esi
|
||||
movl 20(%esp),%ecx
|
||||
pushl %ebp
|
||||
shll $6,%ecx
|
||||
pushl %ebx
|
||||
addl %esi,%ecx
|
||||
subl $64,%ecx
|
||||
movl (%edi),%eax
|
||||
pushl %ecx
|
||||
movl 4(%edi),%ebx
|
||||
movl 8(%edi),%ecx
|
||||
movl 12(%edi),%edx
|
||||
L000start:
|
||||
|
||||
# R0 section
|
||||
movl %ecx,%edi
|
||||
movl (%esi),%ebp
|
||||
# R0 0
|
||||
xorl %edx,%edi
|
||||
andl %ebx,%edi
|
||||
leal 3614090360(%eax,%ebp,1),%eax
|
||||
xorl %edx,%edi
|
||||
addl %edi,%eax
|
||||
movl %ebx,%edi
|
||||
roll $7,%eax
|
||||
movl 4(%esi),%ebp
|
||||
addl %ebx,%eax
|
||||
# R0 1
|
||||
xorl %ecx,%edi
|
||||
andl %eax,%edi
|
||||
leal 3905402710(%edx,%ebp,1),%edx
|
||||
xorl %ecx,%edi
|
||||
addl %edi,%edx
|
||||
movl %eax,%edi
|
||||
roll $12,%edx
|
||||
movl 8(%esi),%ebp
|
||||
addl %eax,%edx
|
||||
# R0 2
|
||||
xorl %ebx,%edi
|
||||
andl %edx,%edi
|
||||
leal 606105819(%ecx,%ebp,1),%ecx
|
||||
xorl %ebx,%edi
|
||||
addl %edi,%ecx
|
||||
movl %edx,%edi
|
||||
roll $17,%ecx
|
||||
movl 12(%esi),%ebp
|
||||
addl %edx,%ecx
|
||||
# R0 3
|
||||
xorl %eax,%edi
|
||||
andl %ecx,%edi
|
||||
leal 3250441966(%ebx,%ebp,1),%ebx
|
||||
xorl %eax,%edi
|
||||
addl %edi,%ebx
|
||||
movl %ecx,%edi
|
||||
roll $22,%ebx
|
||||
movl 16(%esi),%ebp
|
||||
addl %ecx,%ebx
|
||||
# R0 4
|
||||
xorl %edx,%edi
|
||||
andl %ebx,%edi
|
||||
leal 4118548399(%eax,%ebp,1),%eax
|
||||
xorl %edx,%edi
|
||||
addl %edi,%eax
|
||||
movl %ebx,%edi
|
||||
roll $7,%eax
|
||||
movl 20(%esi),%ebp
|
||||
addl %ebx,%eax
|
||||
# R0 5
|
||||
xorl %ecx,%edi
|
||||
andl %eax,%edi
|
||||
leal 1200080426(%edx,%ebp,1),%edx
|
||||
xorl %ecx,%edi
|
||||
addl %edi,%edx
|
||||
movl %eax,%edi
|
||||
roll $12,%edx
|
||||
movl 24(%esi),%ebp
|
||||
addl %eax,%edx
|
||||
# R0 6
|
||||
xorl %ebx,%edi
|
||||
andl %edx,%edi
|
||||
leal 2821735955(%ecx,%ebp,1),%ecx
|
||||
xorl %ebx,%edi
|
||||
addl %edi,%ecx
|
||||
movl %edx,%edi
|
||||
roll $17,%ecx
|
||||
movl 28(%esi),%ebp
|
||||
addl %edx,%ecx
|
||||
# R0 7
|
||||
xorl %eax,%edi
|
||||
andl %ecx,%edi
|
||||
leal 4249261313(%ebx,%ebp,1),%ebx
|
||||
xorl %eax,%edi
|
||||
addl %edi,%ebx
|
||||
movl %ecx,%edi
|
||||
roll $22,%ebx
|
||||
movl 32(%esi),%ebp
|
||||
addl %ecx,%ebx
|
||||
# R0 8
|
||||
xorl %edx,%edi
|
||||
andl %ebx,%edi
|
||||
leal 1770035416(%eax,%ebp,1),%eax
|
||||
xorl %edx,%edi
|
||||
addl %edi,%eax
|
||||
movl %ebx,%edi
|
||||
roll $7,%eax
|
||||
movl 36(%esi),%ebp
|
||||
addl %ebx,%eax
|
||||
# R0 9
|
||||
xorl %ecx,%edi
|
||||
andl %eax,%edi
|
||||
leal 2336552879(%edx,%ebp,1),%edx
|
||||
xorl %ecx,%edi
|
||||
addl %edi,%edx
|
||||
movl %eax,%edi
|
||||
roll $12,%edx
|
||||
movl 40(%esi),%ebp
|
||||
addl %eax,%edx
|
||||
# R0 10
|
||||
xorl %ebx,%edi
|
||||
andl %edx,%edi
|
||||
leal 4294925233(%ecx,%ebp,1),%ecx
|
||||
xorl %ebx,%edi
|
||||
addl %edi,%ecx
|
||||
movl %edx,%edi
|
||||
roll $17,%ecx
|
||||
movl 44(%esi),%ebp
|
||||
addl %edx,%ecx
|
||||
# R0 11
|
||||
xorl %eax,%edi
|
||||
andl %ecx,%edi
|
||||
leal 2304563134(%ebx,%ebp,1),%ebx
|
||||
xorl %eax,%edi
|
||||
addl %edi,%ebx
|
||||
movl %ecx,%edi
|
||||
roll $22,%ebx
|
||||
movl 48(%esi),%ebp
|
||||
addl %ecx,%ebx
|
||||
# R0 12
|
||||
xorl %edx,%edi
|
||||
andl %ebx,%edi
|
||||
leal 1804603682(%eax,%ebp,1),%eax
|
||||
xorl %edx,%edi
|
||||
addl %edi,%eax
|
||||
movl %ebx,%edi
|
||||
roll $7,%eax
|
||||
movl 52(%esi),%ebp
|
||||
addl %ebx,%eax
|
||||
# R0 13
|
||||
xorl %ecx,%edi
|
||||
andl %eax,%edi
|
||||
leal 4254626195(%edx,%ebp,1),%edx
|
||||
xorl %ecx,%edi
|
||||
addl %edi,%edx
|
||||
movl %eax,%edi
|
||||
roll $12,%edx
|
||||
movl 56(%esi),%ebp
|
||||
addl %eax,%edx
|
||||
# R0 14
|
||||
xorl %ebx,%edi
|
||||
andl %edx,%edi
|
||||
leal 2792965006(%ecx,%ebp,1),%ecx
|
||||
xorl %ebx,%edi
|
||||
addl %edi,%ecx
|
||||
movl %edx,%edi
|
||||
roll $17,%ecx
|
||||
movl 60(%esi),%ebp
|
||||
addl %edx,%ecx
|
||||
# R0 15
|
||||
xorl %eax,%edi
|
||||
andl %ecx,%edi
|
||||
leal 1236535329(%ebx,%ebp,1),%ebx
|
||||
xorl %eax,%edi
|
||||
addl %edi,%ebx
|
||||
movl %ecx,%edi
|
||||
roll $22,%ebx
|
||||
movl 4(%esi),%ebp
|
||||
addl %ecx,%ebx
|
||||
|
||||
# R1 section
|
||||
# R1 16
|
||||
leal 4129170786(%eax,%ebp,1),%eax
|
||||
xorl %ebx,%edi
|
||||
andl %edx,%edi
|
||||
movl 24(%esi),%ebp
|
||||
xorl %ecx,%edi
|
||||
addl %edi,%eax
|
||||
movl %ebx,%edi
|
||||
roll $5,%eax
|
||||
addl %ebx,%eax
|
||||
# R1 17
|
||||
leal 3225465664(%edx,%ebp,1),%edx
|
||||
xorl %eax,%edi
|
||||
andl %ecx,%edi
|
||||
movl 44(%esi),%ebp
|
||||
xorl %ebx,%edi
|
||||
addl %edi,%edx
|
||||
movl %eax,%edi
|
||||
roll $9,%edx
|
||||
addl %eax,%edx
|
||||
# R1 18
|
||||
leal 643717713(%ecx,%ebp,1),%ecx
|
||||
xorl %edx,%edi
|
||||
andl %ebx,%edi
|
||||
movl (%esi),%ebp
|
||||
xorl %eax,%edi
|
||||
addl %edi,%ecx
|
||||
movl %edx,%edi
|
||||
roll $14,%ecx
|
||||
addl %edx,%ecx
|
||||
# R1 19
|
||||
leal 3921069994(%ebx,%ebp,1),%ebx
|
||||
xorl %ecx,%edi
|
||||
andl %eax,%edi
|
||||
movl 20(%esi),%ebp
|
||||
xorl %edx,%edi
|
||||
addl %edi,%ebx
|
||||
movl %ecx,%edi
|
||||
roll $20,%ebx
|
||||
addl %ecx,%ebx
|
||||
# R1 20
|
||||
leal 3593408605(%eax,%ebp,1),%eax
|
||||
xorl %ebx,%edi
|
||||
andl %edx,%edi
|
||||
movl 40(%esi),%ebp
|
||||
xorl %ecx,%edi
|
||||
addl %edi,%eax
|
||||
movl %ebx,%edi
|
||||
roll $5,%eax
|
||||
addl %ebx,%eax
|
||||
# R1 21
|
||||
leal 38016083(%edx,%ebp,1),%edx
|
||||
xorl %eax,%edi
|
||||
andl %ecx,%edi
|
||||
movl 60(%esi),%ebp
|
||||
xorl %ebx,%edi
|
||||
addl %edi,%edx
|
||||
movl %eax,%edi
|
||||
roll $9,%edx
|
||||
addl %eax,%edx
|
||||
# R1 22
|
||||
leal 3634488961(%ecx,%ebp,1),%ecx
|
||||
xorl %edx,%edi
|
||||
andl %ebx,%edi
|
||||
movl 16(%esi),%ebp
|
||||
xorl %eax,%edi
|
||||
addl %edi,%ecx
|
||||
movl %edx,%edi
|
||||
roll $14,%ecx
|
||||
addl %edx,%ecx
|
||||
# R1 23
|
||||
leal 3889429448(%ebx,%ebp,1),%ebx
|
||||
xorl %ecx,%edi
|
||||
andl %eax,%edi
|
||||
movl 36(%esi),%ebp
|
||||
xorl %edx,%edi
|
||||
addl %edi,%ebx
|
||||
movl %ecx,%edi
|
||||
roll $20,%ebx
|
||||
addl %ecx,%ebx
|
||||
# R1 24
|
||||
leal 568446438(%eax,%ebp,1),%eax
|
||||
xorl %ebx,%edi
|
||||
andl %edx,%edi
|
||||
movl 56(%esi),%ebp
|
||||
xorl %ecx,%edi
|
||||
addl %edi,%eax
|
||||
movl %ebx,%edi
|
||||
roll $5,%eax
|
||||
addl %ebx,%eax
|
||||
# R1 25
|
||||
leal 3275163606(%edx,%ebp,1),%edx
|
||||
xorl %eax,%edi
|
||||
andl %ecx,%edi
|
||||
movl 12(%esi),%ebp
|
||||
xorl %ebx,%edi
|
||||
addl %edi,%edx
|
||||
movl %eax,%edi
|
||||
roll $9,%edx
|
||||
addl %eax,%edx
|
||||
# R1 26
|
||||
leal 4107603335(%ecx,%ebp,1),%ecx
|
||||
xorl %edx,%edi
|
||||
andl %ebx,%edi
|
||||
movl 32(%esi),%ebp
|
||||
xorl %eax,%edi
|
||||
addl %edi,%ecx
|
||||
movl %edx,%edi
|
||||
roll $14,%ecx
|
||||
addl %edx,%ecx
|
||||
# R1 27
|
||||
leal 1163531501(%ebx,%ebp,1),%ebx
|
||||
xorl %ecx,%edi
|
||||
andl %eax,%edi
|
||||
movl 52(%esi),%ebp
|
||||
xorl %edx,%edi
|
||||
addl %edi,%ebx
|
||||
movl %ecx,%edi
|
||||
roll $20,%ebx
|
||||
addl %ecx,%ebx
|
||||
# R1 28
|
||||
leal 2850285829(%eax,%ebp,1),%eax
|
||||
xorl %ebx,%edi
|
||||
andl %edx,%edi
|
||||
movl 8(%esi),%ebp
|
||||
xorl %ecx,%edi
|
||||
addl %edi,%eax
|
||||
movl %ebx,%edi
|
||||
roll $5,%eax
|
||||
addl %ebx,%eax
|
||||
# R1 29
|
||||
leal 4243563512(%edx,%ebp,1),%edx
|
||||
xorl %eax,%edi
|
||||
andl %ecx,%edi
|
||||
movl 28(%esi),%ebp
|
||||
xorl %ebx,%edi
|
||||
addl %edi,%edx
|
||||
movl %eax,%edi
|
||||
roll $9,%edx
|
||||
addl %eax,%edx
|
||||
# R1 30
|
||||
leal 1735328473(%ecx,%ebp,1),%ecx
|
||||
xorl %edx,%edi
|
||||
andl %ebx,%edi
|
||||
movl 48(%esi),%ebp
|
||||
xorl %eax,%edi
|
||||
addl %edi,%ecx
|
||||
movl %edx,%edi
|
||||
roll $14,%ecx
|
||||
addl %edx,%ecx
|
||||
# R1 31
|
||||
leal 2368359562(%ebx,%ebp,1),%ebx
|
||||
xorl %ecx,%edi
|
||||
andl %eax,%edi
|
||||
movl 20(%esi),%ebp
|
||||
xorl %edx,%edi
|
||||
addl %edi,%ebx
|
||||
movl %ecx,%edi
|
||||
roll $20,%ebx
|
||||
addl %ecx,%ebx
|
||||
|
||||
# R2 section
|
||||
# R2 32
|
||||
xorl %edx,%edi
|
||||
xorl %ebx,%edi
|
||||
leal 4294588738(%eax,%ebp,1),%eax
|
||||
addl %edi,%eax
|
||||
roll $4,%eax
|
||||
movl 32(%esi),%ebp
|
||||
movl %ebx,%edi
|
||||
# R2 33
|
||||
leal 2272392833(%edx,%ebp,1),%edx
|
||||
addl %ebx,%eax
|
||||
xorl %ecx,%edi
|
||||
xorl %eax,%edi
|
||||
movl 44(%esi),%ebp
|
||||
addl %edi,%edx
|
||||
movl %eax,%edi
|
||||
roll $11,%edx
|
||||
addl %eax,%edx
|
||||
# R2 34
|
||||
xorl %ebx,%edi
|
||||
xorl %edx,%edi
|
||||
leal 1839030562(%ecx,%ebp,1),%ecx
|
||||
addl %edi,%ecx
|
||||
roll $16,%ecx
|
||||
movl 56(%esi),%ebp
|
||||
movl %edx,%edi
|
||||
# R2 35
|
||||
leal 4259657740(%ebx,%ebp,1),%ebx
|
||||
addl %edx,%ecx
|
||||
xorl %eax,%edi
|
||||
xorl %ecx,%edi
|
||||
movl 4(%esi),%ebp
|
||||
addl %edi,%ebx
|
||||
movl %ecx,%edi
|
||||
roll $23,%ebx
|
||||
addl %ecx,%ebx
|
||||
# R2 36
|
||||
xorl %edx,%edi
|
||||
xorl %ebx,%edi
|
||||
leal 2763975236(%eax,%ebp,1),%eax
|
||||
addl %edi,%eax
|
||||
roll $4,%eax
|
||||
movl 16(%esi),%ebp
|
||||
movl %ebx,%edi
|
||||
# R2 37
|
||||
leal 1272893353(%edx,%ebp,1),%edx
|
||||
addl %ebx,%eax
|
||||
xorl %ecx,%edi
|
||||
xorl %eax,%edi
|
||||
movl 28(%esi),%ebp
|
||||
addl %edi,%edx
|
||||
movl %eax,%edi
|
||||
roll $11,%edx
|
||||
addl %eax,%edx
|
||||
# R2 38
|
||||
xorl %ebx,%edi
|
||||
xorl %edx,%edi
|
||||
leal 4139469664(%ecx,%ebp,1),%ecx
|
||||
addl %edi,%ecx
|
||||
roll $16,%ecx
|
||||
movl 40(%esi),%ebp
|
||||
movl %edx,%edi
|
||||
# R2 39
|
||||
leal 3200236656(%ebx,%ebp,1),%ebx
|
||||
addl %edx,%ecx
|
||||
xorl %eax,%edi
|
||||
xorl %ecx,%edi
|
||||
movl 52(%esi),%ebp
|
||||
addl %edi,%ebx
|
||||
movl %ecx,%edi
|
||||
roll $23,%ebx
|
||||
addl %ecx,%ebx
|
||||
# R2 40
|
||||
xorl %edx,%edi
|
||||
xorl %ebx,%edi
|
||||
leal 681279174(%eax,%ebp,1),%eax
|
||||
addl %edi,%eax
|
||||
roll $4,%eax
|
||||
movl (%esi),%ebp
|
||||
movl %ebx,%edi
|
||||
# R2 41
|
||||
leal 3936430074(%edx,%ebp,1),%edx
|
||||
addl %ebx,%eax
|
||||
xorl %ecx,%edi
|
||||
xorl %eax,%edi
|
||||
movl 12(%esi),%ebp
|
||||
addl %edi,%edx
|
||||
movl %eax,%edi
|
||||
roll $11,%edx
|
||||
addl %eax,%edx
|
||||
# R2 42
|
||||
xorl %ebx,%edi
|
||||
xorl %edx,%edi
|
||||
leal 3572445317(%ecx,%ebp,1),%ecx
|
||||
addl %edi,%ecx
|
||||
roll $16,%ecx
|
||||
movl 24(%esi),%ebp
|
||||
movl %edx,%edi
|
||||
# R2 43
|
||||
leal 76029189(%ebx,%ebp,1),%ebx
|
||||
addl %edx,%ecx
|
||||
xorl %eax,%edi
|
||||
xorl %ecx,%edi
|
||||
movl 36(%esi),%ebp
|
||||
addl %edi,%ebx
|
||||
movl %ecx,%edi
|
||||
roll $23,%ebx
|
||||
addl %ecx,%ebx
|
||||
# R2 44
|
||||
xorl %edx,%edi
|
||||
xorl %ebx,%edi
|
||||
leal 3654602809(%eax,%ebp,1),%eax
|
||||
addl %edi,%eax
|
||||
roll $4,%eax
|
||||
movl 48(%esi),%ebp
|
||||
movl %ebx,%edi
|
||||
# R2 45
|
||||
leal 3873151461(%edx,%ebp,1),%edx
|
||||
addl %ebx,%eax
|
||||
xorl %ecx,%edi
|
||||
xorl %eax,%edi
|
||||
movl 60(%esi),%ebp
|
||||
addl %edi,%edx
|
||||
movl %eax,%edi
|
||||
roll $11,%edx
|
||||
addl %eax,%edx
|
||||
# R2 46
|
||||
xorl %ebx,%edi
|
||||
xorl %edx,%edi
|
||||
leal 530742520(%ecx,%ebp,1),%ecx
|
||||
addl %edi,%ecx
|
||||
roll $16,%ecx
|
||||
movl 8(%esi),%ebp
|
||||
movl %edx,%edi
|
||||
# R2 47
|
||||
leal 3299628645(%ebx,%ebp,1),%ebx
|
||||
addl %edx,%ecx
|
||||
xorl %eax,%edi
|
||||
xorl %ecx,%edi
|
||||
movl (%esi),%ebp
|
||||
addl %edi,%ebx
|
||||
movl $-1,%edi
|
||||
roll $23,%ebx
|
||||
addl %ecx,%ebx
|
||||
|
||||
# R3 section
|
||||
# R3 48
|
||||
xorl %edx,%edi
|
||||
orl %ebx,%edi
|
||||
leal 4096336452(%eax,%ebp,1),%eax
|
||||
xorl %ecx,%edi
|
||||
movl 28(%esi),%ebp
|
||||
addl %edi,%eax
|
||||
movl $-1,%edi
|
||||
roll $6,%eax
|
||||
xorl %ecx,%edi
|
||||
addl %ebx,%eax
|
||||
# R3 49
|
||||
orl %eax,%edi
|
||||
leal 1126891415(%edx,%ebp,1),%edx
|
||||
xorl %ebx,%edi
|
||||
movl 56(%esi),%ebp
|
||||
addl %edi,%edx
|
||||
movl $-1,%edi
|
||||
roll $10,%edx
|
||||
xorl %ebx,%edi
|
||||
addl %eax,%edx
|
||||
# R3 50
|
||||
orl %edx,%edi
|
||||
leal 2878612391(%ecx,%ebp,1),%ecx
|
||||
xorl %eax,%edi
|
||||
movl 20(%esi),%ebp
|
||||
addl %edi,%ecx
|
||||
movl $-1,%edi
|
||||
roll $15,%ecx
|
||||
xorl %eax,%edi
|
||||
addl %edx,%ecx
|
||||
# R3 51
|
||||
orl %ecx,%edi
|
||||
leal 4237533241(%ebx,%ebp,1),%ebx
|
||||
xorl %edx,%edi
|
||||
movl 48(%esi),%ebp
|
||||
addl %edi,%ebx
|
||||
movl $-1,%edi
|
||||
roll $21,%ebx
|
||||
xorl %edx,%edi
|
||||
addl %ecx,%ebx
|
||||
# R3 52
|
||||
orl %ebx,%edi
|
||||
leal 1700485571(%eax,%ebp,1),%eax
|
||||
xorl %ecx,%edi
|
||||
movl 12(%esi),%ebp
|
||||
addl %edi,%eax
|
||||
movl $-1,%edi
|
||||
roll $6,%eax
|
||||
xorl %ecx,%edi
|
||||
addl %ebx,%eax
|
||||
# R3 53
|
||||
orl %eax,%edi
|
||||
leal 2399980690(%edx,%ebp,1),%edx
|
||||
xorl %ebx,%edi
|
||||
movl 40(%esi),%ebp
|
||||
addl %edi,%edx
|
||||
movl $-1,%edi
|
||||
roll $10,%edx
|
||||
xorl %ebx,%edi
|
||||
addl %eax,%edx
|
||||
# R3 54
|
||||
orl %edx,%edi
|
||||
leal 4293915773(%ecx,%ebp,1),%ecx
|
||||
xorl %eax,%edi
|
||||
movl 4(%esi),%ebp
|
||||
addl %edi,%ecx
|
||||
movl $-1,%edi
|
||||
roll $15,%ecx
|
||||
xorl %eax,%edi
|
||||
addl %edx,%ecx
|
||||
# R3 55
|
||||
orl %ecx,%edi
|
||||
leal 2240044497(%ebx,%ebp,1),%ebx
|
||||
xorl %edx,%edi
|
||||
movl 32(%esi),%ebp
|
||||
addl %edi,%ebx
|
||||
movl $-1,%edi
|
||||
roll $21,%ebx
|
||||
xorl %edx,%edi
|
||||
addl %ecx,%ebx
|
||||
# R3 56
|
||||
orl %ebx,%edi
|
||||
leal 1873313359(%eax,%ebp,1),%eax
|
||||
xorl %ecx,%edi
|
||||
movl 60(%esi),%ebp
|
||||
addl %edi,%eax
|
||||
movl $-1,%edi
|
||||
roll $6,%eax
|
||||
xorl %ecx,%edi
|
||||
addl %ebx,%eax
|
||||
# R3 57
|
||||
orl %eax,%edi
|
||||
leal 4264355552(%edx,%ebp,1),%edx
|
||||
xorl %ebx,%edi
|
||||
movl 24(%esi),%ebp
|
||||
addl %edi,%edx
|
||||
movl $-1,%edi
|
||||
roll $10,%edx
|
||||
xorl %ebx,%edi
|
||||
addl %eax,%edx
|
||||
# R3 58
|
||||
orl %edx,%edi
|
||||
leal 2734768916(%ecx,%ebp,1),%ecx
|
||||
xorl %eax,%edi
|
||||
movl 52(%esi),%ebp
|
||||
addl %edi,%ecx
|
||||
movl $-1,%edi
|
||||
roll $15,%ecx
|
||||
xorl %eax,%edi
|
||||
addl %edx,%ecx
|
||||
# R3 59
|
||||
orl %ecx,%edi
|
||||
leal 1309151649(%ebx,%ebp,1),%ebx
|
||||
xorl %edx,%edi
|
||||
movl 16(%esi),%ebp
|
||||
addl %edi,%ebx
|
||||
movl $-1,%edi
|
||||
roll $21,%ebx
|
||||
xorl %edx,%edi
|
||||
addl %ecx,%ebx
|
||||
# R3 60
|
||||
orl %ebx,%edi
|
||||
leal 4149444226(%eax,%ebp,1),%eax
|
||||
xorl %ecx,%edi
|
||||
movl 44(%esi),%ebp
|
||||
addl %edi,%eax
|
||||
movl $-1,%edi
|
||||
roll $6,%eax
|
||||
xorl %ecx,%edi
|
||||
addl %ebx,%eax
|
||||
# R3 61
|
||||
orl %eax,%edi
|
||||
leal 3174756917(%edx,%ebp,1),%edx
|
||||
xorl %ebx,%edi
|
||||
movl 8(%esi),%ebp
|
||||
addl %edi,%edx
|
||||
movl $-1,%edi
|
||||
roll $10,%edx
|
||||
xorl %ebx,%edi
|
||||
addl %eax,%edx
|
||||
# R3 62
|
||||
orl %edx,%edi
|
||||
leal 718787259(%ecx,%ebp,1),%ecx
|
||||
xorl %eax,%edi
|
||||
movl 36(%esi),%ebp
|
||||
addl %edi,%ecx
|
||||
movl $-1,%edi
|
||||
roll $15,%ecx
|
||||
xorl %eax,%edi
|
||||
addl %edx,%ecx
|
||||
# R3 63
|
||||
orl %ecx,%edi
|
||||
leal 3951481745(%ebx,%ebp,1),%ebx
|
||||
xorl %edx,%edi
|
||||
movl 24(%esp),%ebp
|
||||
addl %edi,%ebx
|
||||
addl $64,%esi
|
||||
roll $21,%ebx
|
||||
movl (%ebp),%edi
|
||||
addl %ecx,%ebx
|
||||
addl %edi,%eax
|
||||
movl 4(%ebp),%edi
|
||||
addl %edi,%ebx
|
||||
movl 8(%ebp),%edi
|
||||
addl %edi,%ecx
|
||||
movl 12(%ebp),%edi
|
||||
addl %edi,%edx
|
||||
movl %eax,(%ebp)
|
||||
movl %ebx,4(%ebp)
|
||||
movl (%esp),%edi
|
||||
movl %ecx,8(%ebp)
|
||||
movl %edx,12(%ebp)
|
||||
cmpl %esi,%edi
|
||||
jae L000start
|
||||
popl %eax
|
||||
popl %ebx
|
||||
popl %ebp
|
||||
popl %edi
|
||||
popl %esi
|
||||
ret
|
||||
#endif // !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) && defined(__APPLE__)
|
||||
3782
third-party/boringssl/apple-x86/crypto/fipsmodule/sha1-586-apple.S
vendored
Normal file
3782
third-party/boringssl/apple-x86/crypto/fipsmodule/sha1-586-apple.S
vendored
Normal file
File diff suppressed because it is too large
Load Diff
5593
third-party/boringssl/apple-x86/crypto/fipsmodule/sha256-586-apple.S
vendored
Normal file
5593
third-party/boringssl/apple-x86/crypto/fipsmodule/sha256-586-apple.S
vendored
Normal file
File diff suppressed because it is too large
Load Diff
2837
third-party/boringssl/apple-x86/crypto/fipsmodule/sha512-586-apple.S
vendored
Normal file
2837
third-party/boringssl/apple-x86/crypto/fipsmodule/sha512-586-apple.S
vendored
Normal file
File diff suppressed because it is too large
Load Diff
680
third-party/boringssl/apple-x86/crypto/fipsmodule/vpaes-x86-apple.S
vendored
Normal file
680
third-party/boringssl/apple-x86/crypto/fipsmodule/vpaes-x86-apple.S
vendored
Normal file
@ -0,0 +1,680 @@
|
||||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#include <openssl/asm_base.h>
|
||||
|
||||
#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) && defined(__APPLE__)
|
||||
.text
|
||||
#ifdef BORINGSSL_DISPATCH_TEST
|
||||
#endif
|
||||
.align 6,0x90
|
||||
L_vpaes_consts:
|
||||
.long 218628480,235210255,168496130,67568393
|
||||
.long 252381056,17041926,33884169,51187212
|
||||
.long 252645135,252645135,252645135,252645135
|
||||
.long 1512730624,3266504856,1377990664,3401244816
|
||||
.long 830229760,1275146365,2969422977,3447763452
|
||||
.long 3411033600,2979783055,338359620,2782886510
|
||||
.long 4209124096,907596821,221174255,1006095553
|
||||
.long 191964160,3799684038,3164090317,1589111125
|
||||
.long 182528256,1777043520,2877432650,3265356744
|
||||
.long 1874708224,3503451415,3305285752,363511674
|
||||
.long 1606117888,3487855781,1093350906,2384367825
|
||||
.long 197121,67569157,134941193,202313229
|
||||
.long 67569157,134941193,202313229,197121
|
||||
.long 134941193,202313229,197121,67569157
|
||||
.long 202313229,197121,67569157,134941193
|
||||
.long 33619971,100992007,168364043,235736079
|
||||
.long 235736079,33619971,100992007,168364043
|
||||
.long 168364043,235736079,33619971,100992007
|
||||
.long 100992007,168364043,235736079,33619971
|
||||
.long 50462976,117835012,185207048,252579084
|
||||
.long 252314880,51251460,117574920,184942860
|
||||
.long 184682752,252054788,50987272,118359308
|
||||
.long 118099200,185467140,251790600,50727180
|
||||
.long 2946363062,528716217,1300004225,1881839624
|
||||
.long 1532713819,1532713819,1532713819,1532713819
|
||||
.long 3602276352,4288629033,3737020424,4153884961
|
||||
.long 1354558464,32357713,2958822624,3775749553
|
||||
.long 1201988352,132424512,1572796698,503232858
|
||||
.long 2213177600,1597421020,4103937655,675398315
|
||||
.long 2749646592,4273543773,1511898873,121693092
|
||||
.long 3040248576,1103263732,2871565598,1608280554
|
||||
.long 2236667136,2588920351,482954393,64377734
|
||||
.long 3069987328,291237287,2117370568,3650299247
|
||||
.long 533321216,3573750986,2572112006,1401264716
|
||||
.long 1339849704,2721158661,548607111,3445553514
|
||||
.long 2128193280,3054596040,2183486460,1257083700
|
||||
.long 655635200,1165381986,3923443150,2344132524
|
||||
.long 190078720,256924420,290342170,357187870
|
||||
.long 1610966272,2263057382,4103205268,309794674
|
||||
.long 2592527872,2233205587,1335446729,3402964816
|
||||
.long 3973531904,3225098121,3002836325,1918774430
|
||||
.long 3870401024,2102906079,2284471353,4117666579
|
||||
.long 617007872,1021508343,366931923,691083277
|
||||
.long 2528395776,3491914898,2968704004,1613121270
|
||||
.long 3445188352,3247741094,844474987,4093578302
|
||||
.long 651481088,1190302358,1689581232,574775300
|
||||
.long 4289380608,206939853,2555985458,2489840491
|
||||
.long 2130264064,327674451,3566485037,3349835193
|
||||
.long 2470714624,316102159,3636825756,3393945945
|
||||
.byte 86,101,99,116,111,114,32,80,101,114,109,117,116,97,116,105
|
||||
.byte 111,110,32,65,69,83,32,102,111,114,32,120,56,54,47,83
|
||||
.byte 83,83,69,51,44,32,77,105,107,101,32,72,97,109,98,117
|
||||
.byte 114,103,32,40,83,116,97,110,102,111,114,100,32,85,110,105
|
||||
.byte 118,101,114,115,105,116,121,41,0
|
||||
.align 6,0x90
|
||||
.private_extern __vpaes_preheat
|
||||
.align 4
|
||||
__vpaes_preheat:
|
||||
addl (%esp),%ebp
|
||||
movdqa -48(%ebp),%xmm7
|
||||
movdqa -16(%ebp),%xmm6
|
||||
ret
|
||||
.private_extern __vpaes_encrypt_core
|
||||
.align 4
|
||||
__vpaes_encrypt_core:
|
||||
movl $16,%ecx
|
||||
movl 240(%edx),%eax
|
||||
movdqa %xmm6,%xmm1
|
||||
movdqa (%ebp),%xmm2
|
||||
pandn %xmm0,%xmm1
|
||||
pand %xmm6,%xmm0
|
||||
movdqu (%edx),%xmm5
|
||||
.byte 102,15,56,0,208
|
||||
movdqa 16(%ebp),%xmm0
|
||||
pxor %xmm5,%xmm2
|
||||
psrld $4,%xmm1
|
||||
addl $16,%edx
|
||||
.byte 102,15,56,0,193
|
||||
leal 192(%ebp),%ebx
|
||||
pxor %xmm2,%xmm0
|
||||
jmp L000enc_entry
|
||||
.align 4,0x90
|
||||
L001enc_loop:
|
||||
movdqa 32(%ebp),%xmm4
|
||||
movdqa 48(%ebp),%xmm0
|
||||
.byte 102,15,56,0,226
|
||||
.byte 102,15,56,0,195
|
||||
pxor %xmm5,%xmm4
|
||||
movdqa 64(%ebp),%xmm5
|
||||
pxor %xmm4,%xmm0
|
||||
movdqa -64(%ebx,%ecx,1),%xmm1
|
||||
.byte 102,15,56,0,234
|
||||
movdqa 80(%ebp),%xmm2
|
||||
movdqa (%ebx,%ecx,1),%xmm4
|
||||
.byte 102,15,56,0,211
|
||||
movdqa %xmm0,%xmm3
|
||||
pxor %xmm5,%xmm2
|
||||
.byte 102,15,56,0,193
|
||||
addl $16,%edx
|
||||
pxor %xmm2,%xmm0
|
||||
.byte 102,15,56,0,220
|
||||
addl $16,%ecx
|
||||
pxor %xmm0,%xmm3
|
||||
.byte 102,15,56,0,193
|
||||
andl $48,%ecx
|
||||
subl $1,%eax
|
||||
pxor %xmm3,%xmm0
|
||||
L000enc_entry:
|
||||
movdqa %xmm6,%xmm1
|
||||
movdqa -32(%ebp),%xmm5
|
||||
pandn %xmm0,%xmm1
|
||||
psrld $4,%xmm1
|
||||
pand %xmm6,%xmm0
|
||||
.byte 102,15,56,0,232
|
||||
movdqa %xmm7,%xmm3
|
||||
pxor %xmm1,%xmm0
|
||||
.byte 102,15,56,0,217
|
||||
movdqa %xmm7,%xmm4
|
||||
pxor %xmm5,%xmm3
|
||||
.byte 102,15,56,0,224
|
||||
movdqa %xmm7,%xmm2
|
||||
pxor %xmm5,%xmm4
|
||||
.byte 102,15,56,0,211
|
||||
movdqa %xmm7,%xmm3
|
||||
pxor %xmm0,%xmm2
|
||||
.byte 102,15,56,0,220
|
||||
movdqu (%edx),%xmm5
|
||||
pxor %xmm1,%xmm3
|
||||
jnz L001enc_loop
|
||||
movdqa 96(%ebp),%xmm4
|
||||
movdqa 112(%ebp),%xmm0
|
||||
.byte 102,15,56,0,226
|
||||
pxor %xmm5,%xmm4
|
||||
.byte 102,15,56,0,195
|
||||
movdqa 64(%ebx,%ecx,1),%xmm1
|
||||
pxor %xmm4,%xmm0
|
||||
.byte 102,15,56,0,193
|
||||
ret
|
||||
.private_extern __vpaes_decrypt_core
|
||||
.align 4
|
||||
__vpaes_decrypt_core:
|
||||
leal 608(%ebp),%ebx
|
||||
movl 240(%edx),%eax
|
||||
movdqa %xmm6,%xmm1
|
||||
movdqa -64(%ebx),%xmm2
|
||||
pandn %xmm0,%xmm1
|
||||
movl %eax,%ecx
|
||||
psrld $4,%xmm1
|
||||
movdqu (%edx),%xmm5
|
||||
shll $4,%ecx
|
||||
pand %xmm6,%xmm0
|
||||
.byte 102,15,56,0,208
|
||||
movdqa -48(%ebx),%xmm0
|
||||
xorl $48,%ecx
|
||||
.byte 102,15,56,0,193
|
||||
andl $48,%ecx
|
||||
pxor %xmm5,%xmm2
|
||||
movdqa 176(%ebp),%xmm5
|
||||
pxor %xmm2,%xmm0
|
||||
addl $16,%edx
|
||||
leal -352(%ebx,%ecx,1),%ecx
|
||||
jmp L002dec_entry
|
||||
.align 4,0x90
|
||||
L003dec_loop:
|
||||
movdqa -32(%ebx),%xmm4
|
||||
movdqa -16(%ebx),%xmm1
|
||||
.byte 102,15,56,0,226
|
||||
.byte 102,15,56,0,203
|
||||
pxor %xmm4,%xmm0
|
||||
movdqa (%ebx),%xmm4
|
||||
pxor %xmm1,%xmm0
|
||||
movdqa 16(%ebx),%xmm1
|
||||
.byte 102,15,56,0,226
|
||||
.byte 102,15,56,0,197
|
||||
.byte 102,15,56,0,203
|
||||
pxor %xmm4,%xmm0
|
||||
movdqa 32(%ebx),%xmm4
|
||||
pxor %xmm1,%xmm0
|
||||
movdqa 48(%ebx),%xmm1
|
||||
.byte 102,15,56,0,226
|
||||
.byte 102,15,56,0,197
|
||||
.byte 102,15,56,0,203
|
||||
pxor %xmm4,%xmm0
|
||||
movdqa 64(%ebx),%xmm4
|
||||
pxor %xmm1,%xmm0
|
||||
movdqa 80(%ebx),%xmm1
|
||||
.byte 102,15,56,0,226
|
||||
.byte 102,15,56,0,197
|
||||
.byte 102,15,56,0,203
|
||||
pxor %xmm4,%xmm0
|
||||
addl $16,%edx
|
||||
.byte 102,15,58,15,237,12
|
||||
pxor %xmm1,%xmm0
|
||||
subl $1,%eax
|
||||
L002dec_entry:
|
||||
movdqa %xmm6,%xmm1
|
||||
movdqa -32(%ebp),%xmm2
|
||||
pandn %xmm0,%xmm1
|
||||
pand %xmm6,%xmm0
|
||||
psrld $4,%xmm1
|
||||
.byte 102,15,56,0,208
|
||||
movdqa %xmm7,%xmm3
|
||||
pxor %xmm1,%xmm0
|
||||
.byte 102,15,56,0,217
|
||||
movdqa %xmm7,%xmm4
|
||||
pxor %xmm2,%xmm3
|
||||
.byte 102,15,56,0,224
|
||||
pxor %xmm2,%xmm4
|
||||
movdqa %xmm7,%xmm2
|
||||
.byte 102,15,56,0,211
|
||||
movdqa %xmm7,%xmm3
|
||||
pxor %xmm0,%xmm2
|
||||
.byte 102,15,56,0,220
|
||||
movdqu (%edx),%xmm0
|
||||
pxor %xmm1,%xmm3
|
||||
jnz L003dec_loop
|
||||
movdqa 96(%ebx),%xmm4
|
||||
.byte 102,15,56,0,226
|
||||
pxor %xmm0,%xmm4
|
||||
movdqa 112(%ebx),%xmm0
|
||||
movdqa (%ecx),%xmm2
|
||||
.byte 102,15,56,0,195
|
||||
pxor %xmm4,%xmm0
|
||||
.byte 102,15,56,0,194
|
||||
ret
|
||||
.private_extern __vpaes_schedule_core
|
||||
.align 4
|
||||
__vpaes_schedule_core:
|
||||
addl (%esp),%ebp
|
||||
movdqu (%esi),%xmm0
|
||||
movdqa 320(%ebp),%xmm2
|
||||
movdqa %xmm0,%xmm3
|
||||
leal (%ebp),%ebx
|
||||
movdqa %xmm2,4(%esp)
|
||||
call __vpaes_schedule_transform
|
||||
movdqa %xmm0,%xmm7
|
||||
testl %edi,%edi
|
||||
jnz L004schedule_am_decrypting
|
||||
movdqu %xmm0,(%edx)
|
||||
jmp L005schedule_go
|
||||
L004schedule_am_decrypting:
|
||||
movdqa 256(%ebp,%ecx,1),%xmm1
|
||||
.byte 102,15,56,0,217
|
||||
movdqu %xmm3,(%edx)
|
||||
xorl $48,%ecx
|
||||
L005schedule_go:
|
||||
cmpl $192,%eax
|
||||
ja L006schedule_256
|
||||
je L007schedule_192
|
||||
L008schedule_128:
|
||||
movl $10,%eax
|
||||
L009loop_schedule_128:
|
||||
call __vpaes_schedule_round
|
||||
decl %eax
|
||||
jz L010schedule_mangle_last
|
||||
call __vpaes_schedule_mangle
|
||||
jmp L009loop_schedule_128
|
||||
.align 4,0x90
|
||||
L007schedule_192:
|
||||
movdqu 8(%esi),%xmm0
|
||||
call __vpaes_schedule_transform
|
||||
movdqa %xmm0,%xmm6
|
||||
pxor %xmm4,%xmm4
|
||||
movhlps %xmm4,%xmm6
|
||||
movl $4,%eax
|
||||
L011loop_schedule_192:
|
||||
call __vpaes_schedule_round
|
||||
.byte 102,15,58,15,198,8
|
||||
call __vpaes_schedule_mangle
|
||||
call __vpaes_schedule_192_smear
|
||||
call __vpaes_schedule_mangle
|
||||
call __vpaes_schedule_round
|
||||
decl %eax
|
||||
jz L010schedule_mangle_last
|
||||
call __vpaes_schedule_mangle
|
||||
call __vpaes_schedule_192_smear
|
||||
jmp L011loop_schedule_192
|
||||
.align 4,0x90
|
||||
L006schedule_256:
|
||||
movdqu 16(%esi),%xmm0
|
||||
call __vpaes_schedule_transform
|
||||
movl $7,%eax
|
||||
L012loop_schedule_256:
|
||||
call __vpaes_schedule_mangle
|
||||
movdqa %xmm0,%xmm6
|
||||
call __vpaes_schedule_round
|
||||
decl %eax
|
||||
jz L010schedule_mangle_last
|
||||
call __vpaes_schedule_mangle
|
||||
pshufd $255,%xmm0,%xmm0
|
||||
movdqa %xmm7,20(%esp)
|
||||
movdqa %xmm6,%xmm7
|
||||
call L_vpaes_schedule_low_round
|
||||
movdqa 20(%esp),%xmm7
|
||||
jmp L012loop_schedule_256
|
||||
.align 4,0x90
|
||||
L010schedule_mangle_last:
|
||||
leal 384(%ebp),%ebx
|
||||
testl %edi,%edi
|
||||
jnz L013schedule_mangle_last_dec
|
||||
movdqa 256(%ebp,%ecx,1),%xmm1
|
||||
.byte 102,15,56,0,193
|
||||
leal 352(%ebp),%ebx
|
||||
addl $32,%edx
|
||||
L013schedule_mangle_last_dec:
|
||||
addl $-16,%edx
|
||||
pxor 336(%ebp),%xmm0
|
||||
call __vpaes_schedule_transform
|
||||
movdqu %xmm0,(%edx)
|
||||
pxor %xmm0,%xmm0
|
||||
pxor %xmm1,%xmm1
|
||||
pxor %xmm2,%xmm2
|
||||
pxor %xmm3,%xmm3
|
||||
pxor %xmm4,%xmm4
|
||||
pxor %xmm5,%xmm5
|
||||
pxor %xmm6,%xmm6
|
||||
pxor %xmm7,%xmm7
|
||||
ret
|
||||
.private_extern __vpaes_schedule_192_smear
|
||||
.align 4
|
||||
__vpaes_schedule_192_smear:
|
||||
pshufd $128,%xmm6,%xmm1
|
||||
pshufd $254,%xmm7,%xmm0
|
||||
pxor %xmm1,%xmm6
|
||||
pxor %xmm1,%xmm1
|
||||
pxor %xmm0,%xmm6
|
||||
movdqa %xmm6,%xmm0
|
||||
movhlps %xmm1,%xmm6
|
||||
ret
|
||||
.private_extern __vpaes_schedule_round
|
||||
.align 4
|
||||
__vpaes_schedule_round:
|
||||
movdqa 8(%esp),%xmm2
|
||||
pxor %xmm1,%xmm1
|
||||
.byte 102,15,58,15,202,15
|
||||
.byte 102,15,58,15,210,15
|
||||
pxor %xmm1,%xmm7
|
||||
pshufd $255,%xmm0,%xmm0
|
||||
.byte 102,15,58,15,192,1
|
||||
movdqa %xmm2,8(%esp)
|
||||
L_vpaes_schedule_low_round:
|
||||
movdqa %xmm7,%xmm1
|
||||
pslldq $4,%xmm7
|
||||
pxor %xmm1,%xmm7
|
||||
movdqa %xmm7,%xmm1
|
||||
pslldq $8,%xmm7
|
||||
pxor %xmm1,%xmm7
|
||||
pxor 336(%ebp),%xmm7
|
||||
movdqa -16(%ebp),%xmm4
|
||||
movdqa -48(%ebp),%xmm5
|
||||
movdqa %xmm4,%xmm1
|
||||
pandn %xmm0,%xmm1
|
||||
psrld $4,%xmm1
|
||||
pand %xmm4,%xmm0
|
||||
movdqa -32(%ebp),%xmm2
|
||||
.byte 102,15,56,0,208
|
||||
pxor %xmm1,%xmm0
|
||||
movdqa %xmm5,%xmm3
|
||||
.byte 102,15,56,0,217
|
||||
pxor %xmm2,%xmm3
|
||||
movdqa %xmm5,%xmm4
|
||||
.byte 102,15,56,0,224
|
||||
pxor %xmm2,%xmm4
|
||||
movdqa %xmm5,%xmm2
|
||||
.byte 102,15,56,0,211
|
||||
pxor %xmm0,%xmm2
|
||||
movdqa %xmm5,%xmm3
|
||||
.byte 102,15,56,0,220
|
||||
pxor %xmm1,%xmm3
|
||||
movdqa 32(%ebp),%xmm4
|
||||
.byte 102,15,56,0,226
|
||||
movdqa 48(%ebp),%xmm0
|
||||
.byte 102,15,56,0,195
|
||||
pxor %xmm4,%xmm0
|
||||
pxor %xmm7,%xmm0
|
||||
movdqa %xmm0,%xmm7
|
||||
ret
|
||||
.private_extern __vpaes_schedule_transform
|
||||
.align 4
|
||||
__vpaes_schedule_transform:
|
||||
movdqa -16(%ebp),%xmm2
|
||||
movdqa %xmm2,%xmm1
|
||||
pandn %xmm0,%xmm1
|
||||
psrld $4,%xmm1
|
||||
pand %xmm2,%xmm0
|
||||
movdqa (%ebx),%xmm2
|
||||
.byte 102,15,56,0,208
|
||||
movdqa 16(%ebx),%xmm0
|
||||
.byte 102,15,56,0,193
|
||||
pxor %xmm2,%xmm0
|
||||
ret
|
||||
.private_extern __vpaes_schedule_mangle
|
||||
.align 4
|
||||
__vpaes_schedule_mangle:
|
||||
movdqa %xmm0,%xmm4
|
||||
movdqa 128(%ebp),%xmm5
|
||||
testl %edi,%edi
|
||||
jnz L014schedule_mangle_dec
|
||||
addl $16,%edx
|
||||
pxor 336(%ebp),%xmm4
|
||||
.byte 102,15,56,0,229
|
||||
movdqa %xmm4,%xmm3
|
||||
.byte 102,15,56,0,229
|
||||
pxor %xmm4,%xmm3
|
||||
.byte 102,15,56,0,229
|
||||
pxor %xmm4,%xmm3
|
||||
jmp L015schedule_mangle_both
|
||||
.align 4,0x90
|
||||
L014schedule_mangle_dec:
|
||||
movdqa -16(%ebp),%xmm2
|
||||
leal 416(%ebp),%esi
|
||||
movdqa %xmm2,%xmm1
|
||||
pandn %xmm4,%xmm1
|
||||
psrld $4,%xmm1
|
||||
pand %xmm2,%xmm4
|
||||
movdqa (%esi),%xmm2
|
||||
.byte 102,15,56,0,212
|
||||
movdqa 16(%esi),%xmm3
|
||||
.byte 102,15,56,0,217
|
||||
pxor %xmm2,%xmm3
|
||||
.byte 102,15,56,0,221
|
||||
movdqa 32(%esi),%xmm2
|
||||
.byte 102,15,56,0,212
|
||||
pxor %xmm3,%xmm2
|
||||
movdqa 48(%esi),%xmm3
|
||||
.byte 102,15,56,0,217
|
||||
pxor %xmm2,%xmm3
|
||||
.byte 102,15,56,0,221
|
||||
movdqa 64(%esi),%xmm2
|
||||
.byte 102,15,56,0,212
|
||||
pxor %xmm3,%xmm2
|
||||
movdqa 80(%esi),%xmm3
|
||||
.byte 102,15,56,0,217
|
||||
pxor %xmm2,%xmm3
|
||||
.byte 102,15,56,0,221
|
||||
movdqa 96(%esi),%xmm2
|
||||
.byte 102,15,56,0,212
|
||||
pxor %xmm3,%xmm2
|
||||
movdqa 112(%esi),%xmm3
|
||||
.byte 102,15,56,0,217
|
||||
pxor %xmm2,%xmm3
|
||||
addl $-16,%edx
|
||||
L015schedule_mangle_both:
|
||||
movdqa 256(%ebp,%ecx,1),%xmm1
|
||||
.byte 102,15,56,0,217
|
||||
addl $-16,%ecx
|
||||
andl $48,%ecx
|
||||
movdqu %xmm3,(%edx)
|
||||
ret
|
||||
.globl _vpaes_set_encrypt_key
|
||||
.private_extern _vpaes_set_encrypt_key
|
||||
.align 4
|
||||
_vpaes_set_encrypt_key:
|
||||
L_vpaes_set_encrypt_key_begin:
|
||||
pushl %ebp
|
||||
pushl %ebx
|
||||
pushl %esi
|
||||
pushl %edi
|
||||
#ifdef BORINGSSL_DISPATCH_TEST
|
||||
pushl %ebx
|
||||
pushl %edx
|
||||
call L016pic
|
||||
L016pic:
|
||||
popl %ebx
|
||||
leal _BORINGSSL_function_hit+5-L016pic(%ebx),%ebx
|
||||
movl $1,%edx
|
||||
movb %dl,(%ebx)
|
||||
popl %edx
|
||||
popl %ebx
|
||||
#endif
|
||||
movl 20(%esp),%esi
|
||||
leal -56(%esp),%ebx
|
||||
movl 24(%esp),%eax
|
||||
andl $-16,%ebx
|
||||
movl 28(%esp),%edx
|
||||
xchgl %esp,%ebx
|
||||
movl %ebx,48(%esp)
|
||||
movl %eax,%ebx
|
||||
shrl $5,%ebx
|
||||
addl $5,%ebx
|
||||
movl %ebx,240(%edx)
|
||||
movl $48,%ecx
|
||||
movl $0,%edi
|
||||
leal L_vpaes_consts+0x30-L017pic_point,%ebp
|
||||
call __vpaes_schedule_core
|
||||
L017pic_point:
|
||||
movl 48(%esp),%esp
|
||||
xorl %eax,%eax
|
||||
popl %edi
|
||||
popl %esi
|
||||
popl %ebx
|
||||
popl %ebp
|
||||
ret
|
||||
.globl _vpaes_set_decrypt_key
|
||||
.private_extern _vpaes_set_decrypt_key
|
||||
.align 4
|
||||
_vpaes_set_decrypt_key:
|
||||
L_vpaes_set_decrypt_key_begin:
|
||||
pushl %ebp
|
||||
pushl %ebx
|
||||
pushl %esi
|
||||
pushl %edi
|
||||
movl 20(%esp),%esi
|
||||
leal -56(%esp),%ebx
|
||||
movl 24(%esp),%eax
|
||||
andl $-16,%ebx
|
||||
movl 28(%esp),%edx
|
||||
xchgl %esp,%ebx
|
||||
movl %ebx,48(%esp)
|
||||
movl %eax,%ebx
|
||||
shrl $5,%ebx
|
||||
addl $5,%ebx
|
||||
movl %ebx,240(%edx)
|
||||
shll $4,%ebx
|
||||
leal 16(%edx,%ebx,1),%edx
|
||||
movl $1,%edi
|
||||
movl %eax,%ecx
|
||||
shrl $1,%ecx
|
||||
andl $32,%ecx
|
||||
xorl $32,%ecx
|
||||
leal L_vpaes_consts+0x30-L018pic_point,%ebp
|
||||
call __vpaes_schedule_core
|
||||
L018pic_point:
|
||||
movl 48(%esp),%esp
|
||||
xorl %eax,%eax
|
||||
popl %edi
|
||||
popl %esi
|
||||
popl %ebx
|
||||
popl %ebp
|
||||
ret
|
||||
.globl _vpaes_encrypt
|
||||
.private_extern _vpaes_encrypt
|
||||
.align 4
|
||||
_vpaes_encrypt:
|
||||
L_vpaes_encrypt_begin:
|
||||
pushl %ebp
|
||||
pushl %ebx
|
||||
pushl %esi
|
||||
pushl %edi
|
||||
#ifdef BORINGSSL_DISPATCH_TEST
|
||||
pushl %ebx
|
||||
pushl %edx
|
||||
call L019pic
|
||||
L019pic:
|
||||
popl %ebx
|
||||
leal _BORINGSSL_function_hit+4-L019pic(%ebx),%ebx
|
||||
movl $1,%edx
|
||||
movb %dl,(%ebx)
|
||||
popl %edx
|
||||
popl %ebx
|
||||
#endif
|
||||
leal L_vpaes_consts+0x30-L020pic_point,%ebp
|
||||
call __vpaes_preheat
|
||||
L020pic_point:
|
||||
movl 20(%esp),%esi
|
||||
leal -56(%esp),%ebx
|
||||
movl 24(%esp),%edi
|
||||
andl $-16,%ebx
|
||||
movl 28(%esp),%edx
|
||||
xchgl %esp,%ebx
|
||||
movl %ebx,48(%esp)
|
||||
movdqu (%esi),%xmm0
|
||||
call __vpaes_encrypt_core
|
||||
movdqu %xmm0,(%edi)
|
||||
movl 48(%esp),%esp
|
||||
popl %edi
|
||||
popl %esi
|
||||
popl %ebx
|
||||
popl %ebp
|
||||
ret
|
||||
.globl _vpaes_decrypt
|
||||
.private_extern _vpaes_decrypt
|
||||
.align 4
|
||||
_vpaes_decrypt:
|
||||
L_vpaes_decrypt_begin:
|
||||
pushl %ebp
|
||||
pushl %ebx
|
||||
pushl %esi
|
||||
pushl %edi
|
||||
leal L_vpaes_consts+0x30-L021pic_point,%ebp
|
||||
call __vpaes_preheat
|
||||
L021pic_point:
|
||||
movl 20(%esp),%esi
|
||||
leal -56(%esp),%ebx
|
||||
movl 24(%esp),%edi
|
||||
andl $-16,%ebx
|
||||
movl 28(%esp),%edx
|
||||
xchgl %esp,%ebx
|
||||
movl %ebx,48(%esp)
|
||||
movdqu (%esi),%xmm0
|
||||
call __vpaes_decrypt_core
|
||||
movdqu %xmm0,(%edi)
|
||||
movl 48(%esp),%esp
|
||||
popl %edi
|
||||
popl %esi
|
||||
popl %ebx
|
||||
popl %ebp
|
||||
ret
|
||||
.globl _vpaes_cbc_encrypt
|
||||
.private_extern _vpaes_cbc_encrypt
|
||||
.align 4
|
||||
_vpaes_cbc_encrypt:
|
||||
L_vpaes_cbc_encrypt_begin:
|
||||
pushl %ebp
|
||||
pushl %ebx
|
||||
pushl %esi
|
||||
pushl %edi
|
||||
movl 20(%esp),%esi
|
||||
movl 24(%esp),%edi
|
||||
movl 28(%esp),%eax
|
||||
movl 32(%esp),%edx
|
||||
subl $16,%eax
|
||||
jc L022cbc_abort
|
||||
leal -56(%esp),%ebx
|
||||
movl 36(%esp),%ebp
|
||||
andl $-16,%ebx
|
||||
movl 40(%esp),%ecx
|
||||
xchgl %esp,%ebx
|
||||
movdqu (%ebp),%xmm1
|
||||
subl %esi,%edi
|
||||
movl %ebx,48(%esp)
|
||||
movl %edi,(%esp)
|
||||
movl %edx,4(%esp)
|
||||
movl %ebp,8(%esp)
|
||||
movl %eax,%edi
|
||||
leal L_vpaes_consts+0x30-L023pic_point,%ebp
|
||||
call __vpaes_preheat
|
||||
L023pic_point:
|
||||
cmpl $0,%ecx
|
||||
je L024cbc_dec_loop
|
||||
jmp L025cbc_enc_loop
|
||||
.align 4,0x90
|
||||
L025cbc_enc_loop:
|
||||
movdqu (%esi),%xmm0
|
||||
pxor %xmm1,%xmm0
|
||||
call __vpaes_encrypt_core
|
||||
movl (%esp),%ebx
|
||||
movl 4(%esp),%edx
|
||||
movdqa %xmm0,%xmm1
|
||||
movdqu %xmm0,(%ebx,%esi,1)
|
||||
leal 16(%esi),%esi
|
||||
subl $16,%edi
|
||||
jnc L025cbc_enc_loop
|
||||
jmp L026cbc_done
|
||||
.align 4,0x90
|
||||
L024cbc_dec_loop:
|
||||
movdqu (%esi),%xmm0
|
||||
movdqa %xmm1,16(%esp)
|
||||
movdqa %xmm0,32(%esp)
|
||||
call __vpaes_decrypt_core
|
||||
movl (%esp),%ebx
|
||||
movl 4(%esp),%edx
|
||||
pxor 16(%esp),%xmm0
|
||||
movdqa 32(%esp),%xmm1
|
||||
movdqu %xmm0,(%ebx,%esi,1)
|
||||
leal 16(%esi),%esi
|
||||
subl $16,%edi
|
||||
jnc L024cbc_dec_loop
|
||||
L026cbc_done:
|
||||
movl 8(%esp),%ebx
|
||||
movl 48(%esp),%esp
|
||||
movdqu %xmm1,(%ebx)
|
||||
L022cbc_abort:
|
||||
popl %edi
|
||||
popl %esi
|
||||
popl %ebx
|
||||
popl %ebp
|
||||
ret
|
||||
#endif // !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) && defined(__APPLE__)
|
||||
484
third-party/boringssl/apple-x86/crypto/fipsmodule/x86-mont-apple.S
vendored
Normal file
484
third-party/boringssl/apple-x86/crypto/fipsmodule/x86-mont-apple.S
vendored
Normal file
@ -0,0 +1,484 @@
|
||||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#include <openssl/asm_base.h>
|
||||
|
||||
#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) && defined(__APPLE__)
|
||||
.text
|
||||
.globl _bn_mul_mont
|
||||
.private_extern _bn_mul_mont
|
||||
.align 4
|
||||
_bn_mul_mont:
|
||||
L_bn_mul_mont_begin:
|
||||
pushl %ebp
|
||||
pushl %ebx
|
||||
pushl %esi
|
||||
pushl %edi
|
||||
xorl %eax,%eax
|
||||
movl 40(%esp),%edi
|
||||
cmpl $4,%edi
|
||||
jl L000just_leave
|
||||
leal 20(%esp),%esi
|
||||
leal 24(%esp),%edx
|
||||
addl $2,%edi
|
||||
negl %edi
|
||||
leal -32(%esp,%edi,4),%ebp
|
||||
negl %edi
|
||||
movl %ebp,%eax
|
||||
subl %edx,%eax
|
||||
andl $2047,%eax
|
||||
subl %eax,%ebp
|
||||
xorl %ebp,%edx
|
||||
andl $2048,%edx
|
||||
xorl $2048,%edx
|
||||
subl %edx,%ebp
|
||||
andl $-64,%ebp
|
||||
movl %esp,%eax
|
||||
subl %ebp,%eax
|
||||
andl $-4096,%eax
|
||||
movl %esp,%edx
|
||||
leal (%ebp,%eax,1),%esp
|
||||
movl (%esp),%eax
|
||||
cmpl %ebp,%esp
|
||||
ja L001page_walk
|
||||
jmp L002page_walk_done
|
||||
.align 4,0x90
|
||||
L001page_walk:
|
||||
leal -4096(%esp),%esp
|
||||
movl (%esp),%eax
|
||||
cmpl %ebp,%esp
|
||||
ja L001page_walk
|
||||
L002page_walk_done:
|
||||
movl (%esi),%eax
|
||||
movl 4(%esi),%ebx
|
||||
movl 8(%esi),%ecx
|
||||
movl 12(%esi),%ebp
|
||||
movl 16(%esi),%esi
|
||||
movl (%esi),%esi
|
||||
movl %eax,4(%esp)
|
||||
movl %ebx,8(%esp)
|
||||
movl %ecx,12(%esp)
|
||||
movl %ebp,16(%esp)
|
||||
movl %esi,20(%esp)
|
||||
leal -3(%edi),%ebx
|
||||
movl %edx,24(%esp)
|
||||
call L003PIC_me_up
|
||||
L003PIC_me_up:
|
||||
popl %eax
|
||||
movl L_OPENSSL_ia32cap_P$non_lazy_ptr-L003PIC_me_up(%eax),%eax
|
||||
btl $26,(%eax)
|
||||
jnc L004non_sse2
|
||||
movl $-1,%eax
|
||||
movd %eax,%mm7
|
||||
movl 8(%esp),%esi
|
||||
movl 12(%esp),%edi
|
||||
movl 16(%esp),%ebp
|
||||
xorl %edx,%edx
|
||||
xorl %ecx,%ecx
|
||||
movd (%edi),%mm4
|
||||
movd (%esi),%mm5
|
||||
movd (%ebp),%mm3
|
||||
pmuludq %mm4,%mm5
|
||||
movq %mm5,%mm2
|
||||
movq %mm5,%mm0
|
||||
pand %mm7,%mm0
|
||||
pmuludq 20(%esp),%mm5
|
||||
pmuludq %mm5,%mm3
|
||||
paddq %mm0,%mm3
|
||||
movd 4(%ebp),%mm1
|
||||
movd 4(%esi),%mm0
|
||||
psrlq $32,%mm2
|
||||
psrlq $32,%mm3
|
||||
incl %ecx
|
||||
.align 4,0x90
|
||||
L0051st:
|
||||
pmuludq %mm4,%mm0
|
||||
pmuludq %mm5,%mm1
|
||||
paddq %mm0,%mm2
|
||||
paddq %mm1,%mm3
|
||||
movq %mm2,%mm0
|
||||
pand %mm7,%mm0
|
||||
movd 4(%ebp,%ecx,4),%mm1
|
||||
paddq %mm0,%mm3
|
||||
movd 4(%esi,%ecx,4),%mm0
|
||||
psrlq $32,%mm2
|
||||
movd %mm3,28(%esp,%ecx,4)
|
||||
psrlq $32,%mm3
|
||||
leal 1(%ecx),%ecx
|
||||
cmpl %ebx,%ecx
|
||||
jl L0051st
|
||||
pmuludq %mm4,%mm0
|
||||
pmuludq %mm5,%mm1
|
||||
paddq %mm0,%mm2
|
||||
paddq %mm1,%mm3
|
||||
movq %mm2,%mm0
|
||||
pand %mm7,%mm0
|
||||
paddq %mm0,%mm3
|
||||
movd %mm3,28(%esp,%ecx,4)
|
||||
psrlq $32,%mm2
|
||||
psrlq $32,%mm3
|
||||
paddq %mm2,%mm3
|
||||
movq %mm3,32(%esp,%ebx,4)
|
||||
incl %edx
|
||||
L006outer:
|
||||
xorl %ecx,%ecx
|
||||
movd (%edi,%edx,4),%mm4
|
||||
movd (%esi),%mm5
|
||||
movd 32(%esp),%mm6
|
||||
movd (%ebp),%mm3
|
||||
pmuludq %mm4,%mm5
|
||||
paddq %mm6,%mm5
|
||||
movq %mm5,%mm0
|
||||
movq %mm5,%mm2
|
||||
pand %mm7,%mm0
|
||||
pmuludq 20(%esp),%mm5
|
||||
pmuludq %mm5,%mm3
|
||||
paddq %mm0,%mm3
|
||||
movd 36(%esp),%mm6
|
||||
movd 4(%ebp),%mm1
|
||||
movd 4(%esi),%mm0
|
||||
psrlq $32,%mm2
|
||||
psrlq $32,%mm3
|
||||
paddq %mm6,%mm2
|
||||
incl %ecx
|
||||
decl %ebx
|
||||
L007inner:
|
||||
pmuludq %mm4,%mm0
|
||||
pmuludq %mm5,%mm1
|
||||
paddq %mm0,%mm2
|
||||
paddq %mm1,%mm3
|
||||
movq %mm2,%mm0
|
||||
movd 36(%esp,%ecx,4),%mm6
|
||||
pand %mm7,%mm0
|
||||
movd 4(%ebp,%ecx,4),%mm1
|
||||
paddq %mm0,%mm3
|
||||
movd 4(%esi,%ecx,4),%mm0
|
||||
psrlq $32,%mm2
|
||||
movd %mm3,28(%esp,%ecx,4)
|
||||
psrlq $32,%mm3
|
||||
paddq %mm6,%mm2
|
||||
decl %ebx
|
||||
leal 1(%ecx),%ecx
|
||||
jnz L007inner
|
||||
movl %ecx,%ebx
|
||||
pmuludq %mm4,%mm0
|
||||
pmuludq %mm5,%mm1
|
||||
paddq %mm0,%mm2
|
||||
paddq %mm1,%mm3
|
||||
movq %mm2,%mm0
|
||||
pand %mm7,%mm0
|
||||
paddq %mm0,%mm3
|
||||
movd %mm3,28(%esp,%ecx,4)
|
||||
psrlq $32,%mm2
|
||||
psrlq $32,%mm3
|
||||
movd 36(%esp,%ebx,4),%mm6
|
||||
paddq %mm2,%mm3
|
||||
paddq %mm6,%mm3
|
||||
movq %mm3,32(%esp,%ebx,4)
|
||||
leal 1(%edx),%edx
|
||||
cmpl %ebx,%edx
|
||||
jle L006outer
|
||||
emms
|
||||
jmp L008common_tail
|
||||
.align 4,0x90
|
||||
L004non_sse2:
|
||||
movl 8(%esp),%esi
|
||||
leal 1(%ebx),%ebp
|
||||
movl 12(%esp),%edi
|
||||
xorl %ecx,%ecx
|
||||
movl %esi,%edx
|
||||
andl $1,%ebp
|
||||
subl %edi,%edx
|
||||
leal 4(%edi,%ebx,4),%eax
|
||||
orl %edx,%ebp
|
||||
movl (%edi),%edi
|
||||
jz L009bn_sqr_mont
|
||||
movl %eax,28(%esp)
|
||||
movl (%esi),%eax
|
||||
xorl %edx,%edx
|
||||
.align 4,0x90
|
||||
L010mull:
|
||||
movl %edx,%ebp
|
||||
mull %edi
|
||||
addl %eax,%ebp
|
||||
leal 1(%ecx),%ecx
|
||||
adcl $0,%edx
|
||||
movl (%esi,%ecx,4),%eax
|
||||
cmpl %ebx,%ecx
|
||||
movl %ebp,28(%esp,%ecx,4)
|
||||
jl L010mull
|
||||
movl %edx,%ebp
|
||||
mull %edi
|
||||
movl 20(%esp),%edi
|
||||
addl %ebp,%eax
|
||||
movl 16(%esp),%esi
|
||||
adcl $0,%edx
|
||||
imull 32(%esp),%edi
|
||||
movl %eax,32(%esp,%ebx,4)
|
||||
xorl %ecx,%ecx
|
||||
movl %edx,36(%esp,%ebx,4)
|
||||
movl %ecx,40(%esp,%ebx,4)
|
||||
movl (%esi),%eax
|
||||
mull %edi
|
||||
addl 32(%esp),%eax
|
||||
movl 4(%esi),%eax
|
||||
adcl $0,%edx
|
||||
incl %ecx
|
||||
jmp L0112ndmadd
|
||||
.align 4,0x90
|
||||
L0121stmadd:
|
||||
movl %edx,%ebp
|
||||
mull %edi
|
||||
addl 32(%esp,%ecx,4),%ebp
|
||||
leal 1(%ecx),%ecx
|
||||
adcl $0,%edx
|
||||
addl %eax,%ebp
|
||||
movl (%esi,%ecx,4),%eax
|
||||
adcl $0,%edx
|
||||
cmpl %ebx,%ecx
|
||||
movl %ebp,28(%esp,%ecx,4)
|
||||
jl L0121stmadd
|
||||
movl %edx,%ebp
|
||||
mull %edi
|
||||
addl 32(%esp,%ebx,4),%eax
|
||||
movl 20(%esp),%edi
|
||||
adcl $0,%edx
|
||||
movl 16(%esp),%esi
|
||||
addl %eax,%ebp
|
||||
adcl $0,%edx
|
||||
imull 32(%esp),%edi
|
||||
xorl %ecx,%ecx
|
||||
addl 36(%esp,%ebx,4),%edx
|
||||
movl %ebp,32(%esp,%ebx,4)
|
||||
adcl $0,%ecx
|
||||
movl (%esi),%eax
|
||||
movl %edx,36(%esp,%ebx,4)
|
||||
movl %ecx,40(%esp,%ebx,4)
|
||||
mull %edi
|
||||
addl 32(%esp),%eax
|
||||
movl 4(%esi),%eax
|
||||
adcl $0,%edx
|
||||
movl $1,%ecx
|
||||
.align 4,0x90
|
||||
L0112ndmadd:
|
||||
movl %edx,%ebp
|
||||
mull %edi
|
||||
addl 32(%esp,%ecx,4),%ebp
|
||||
leal 1(%ecx),%ecx
|
||||
adcl $0,%edx
|
||||
addl %eax,%ebp
|
||||
movl (%esi,%ecx,4),%eax
|
||||
adcl $0,%edx
|
||||
cmpl %ebx,%ecx
|
||||
movl %ebp,24(%esp,%ecx,4)
|
||||
jl L0112ndmadd
|
||||
movl %edx,%ebp
|
||||
mull %edi
|
||||
addl 32(%esp,%ebx,4),%ebp
|
||||
adcl $0,%edx
|
||||
addl %eax,%ebp
|
||||
adcl $0,%edx
|
||||
movl %ebp,28(%esp,%ebx,4)
|
||||
xorl %eax,%eax
|
||||
movl 12(%esp),%ecx
|
||||
addl 36(%esp,%ebx,4),%edx
|
||||
adcl 40(%esp,%ebx,4),%eax
|
||||
leal 4(%ecx),%ecx
|
||||
movl %edx,32(%esp,%ebx,4)
|
||||
cmpl 28(%esp),%ecx
|
||||
movl %eax,36(%esp,%ebx,4)
|
||||
je L008common_tail
|
||||
movl (%ecx),%edi
|
||||
movl 8(%esp),%esi
|
||||
movl %ecx,12(%esp)
|
||||
xorl %ecx,%ecx
|
||||
xorl %edx,%edx
|
||||
movl (%esi),%eax
|
||||
jmp L0121stmadd
|
||||
.align 4,0x90
|
||||
L009bn_sqr_mont:
|
||||
movl %ebx,(%esp)
|
||||
movl %ecx,12(%esp)
|
||||
movl %edi,%eax
|
||||
mull %edi
|
||||
movl %eax,32(%esp)
|
||||
movl %edx,%ebx
|
||||
shrl $1,%edx
|
||||
andl $1,%ebx
|
||||
incl %ecx
|
||||
.align 4,0x90
|
||||
L013sqr:
|
||||
movl (%esi,%ecx,4),%eax
|
||||
movl %edx,%ebp
|
||||
mull %edi
|
||||
addl %ebp,%eax
|
||||
leal 1(%ecx),%ecx
|
||||
adcl $0,%edx
|
||||
leal (%ebx,%eax,2),%ebp
|
||||
shrl $31,%eax
|
||||
cmpl (%esp),%ecx
|
||||
movl %eax,%ebx
|
||||
movl %ebp,28(%esp,%ecx,4)
|
||||
jl L013sqr
|
||||
movl (%esi,%ecx,4),%eax
|
||||
movl %edx,%ebp
|
||||
mull %edi
|
||||
addl %ebp,%eax
|
||||
movl 20(%esp),%edi
|
||||
adcl $0,%edx
|
||||
movl 16(%esp),%esi
|
||||
leal (%ebx,%eax,2),%ebp
|
||||
imull 32(%esp),%edi
|
||||
shrl $31,%eax
|
||||
movl %ebp,32(%esp,%ecx,4)
|
||||
leal (%eax,%edx,2),%ebp
|
||||
movl (%esi),%eax
|
||||
shrl $31,%edx
|
||||
movl %ebp,36(%esp,%ecx,4)
|
||||
movl %edx,40(%esp,%ecx,4)
|
||||
mull %edi
|
||||
addl 32(%esp),%eax
|
||||
movl %ecx,%ebx
|
||||
adcl $0,%edx
|
||||
movl 4(%esi),%eax
|
||||
movl $1,%ecx
|
||||
.align 4,0x90
|
||||
L0143rdmadd:
|
||||
movl %edx,%ebp
|
||||
mull %edi
|
||||
addl 32(%esp,%ecx,4),%ebp
|
||||
adcl $0,%edx
|
||||
addl %eax,%ebp
|
||||
movl 4(%esi,%ecx,4),%eax
|
||||
adcl $0,%edx
|
||||
movl %ebp,28(%esp,%ecx,4)
|
||||
movl %edx,%ebp
|
||||
mull %edi
|
||||
addl 36(%esp,%ecx,4),%ebp
|
||||
leal 2(%ecx),%ecx
|
||||
adcl $0,%edx
|
||||
addl %eax,%ebp
|
||||
movl (%esi,%ecx,4),%eax
|
||||
adcl $0,%edx
|
||||
cmpl %ebx,%ecx
|
||||
movl %ebp,24(%esp,%ecx,4)
|
||||
jl L0143rdmadd
|
||||
movl %edx,%ebp
|
||||
mull %edi
|
||||
addl 32(%esp,%ebx,4),%ebp
|
||||
adcl $0,%edx
|
||||
addl %eax,%ebp
|
||||
adcl $0,%edx
|
||||
movl %ebp,28(%esp,%ebx,4)
|
||||
movl 12(%esp),%ecx
|
||||
xorl %eax,%eax
|
||||
movl 8(%esp),%esi
|
||||
addl 36(%esp,%ebx,4),%edx
|
||||
adcl 40(%esp,%ebx,4),%eax
|
||||
movl %edx,32(%esp,%ebx,4)
|
||||
cmpl %ebx,%ecx
|
||||
movl %eax,36(%esp,%ebx,4)
|
||||
je L008common_tail
|
||||
movl 4(%esi,%ecx,4),%edi
|
||||
leal 1(%ecx),%ecx
|
||||
movl %edi,%eax
|
||||
movl %ecx,12(%esp)
|
||||
mull %edi
|
||||
addl 32(%esp,%ecx,4),%eax
|
||||
adcl $0,%edx
|
||||
movl %eax,32(%esp,%ecx,4)
|
||||
xorl %ebp,%ebp
|
||||
cmpl %ebx,%ecx
|
||||
leal 1(%ecx),%ecx
|
||||
je L015sqrlast
|
||||
movl %edx,%ebx
|
||||
shrl $1,%edx
|
||||
andl $1,%ebx
|
||||
.align 4,0x90
|
||||
L016sqradd:
|
||||
movl (%esi,%ecx,4),%eax
|
||||
movl %edx,%ebp
|
||||
mull %edi
|
||||
addl %ebp,%eax
|
||||
leal (%eax,%eax,1),%ebp
|
||||
adcl $0,%edx
|
||||
shrl $31,%eax
|
||||
addl 32(%esp,%ecx,4),%ebp
|
||||
leal 1(%ecx),%ecx
|
||||
adcl $0,%eax
|
||||
addl %ebx,%ebp
|
||||
adcl $0,%eax
|
||||
cmpl (%esp),%ecx
|
||||
movl %ebp,28(%esp,%ecx,4)
|
||||
movl %eax,%ebx
|
||||
jle L016sqradd
|
||||
movl %edx,%ebp
|
||||
addl %edx,%edx
|
||||
shrl $31,%ebp
|
||||
addl %ebx,%edx
|
||||
adcl $0,%ebp
|
||||
L015sqrlast:
|
||||
movl 20(%esp),%edi
|
||||
movl 16(%esp),%esi
|
||||
imull 32(%esp),%edi
|
||||
addl 32(%esp,%ecx,4),%edx
|
||||
movl (%esi),%eax
|
||||
adcl $0,%ebp
|
||||
movl %edx,32(%esp,%ecx,4)
|
||||
movl %ebp,36(%esp,%ecx,4)
|
||||
mull %edi
|
||||
addl 32(%esp),%eax
|
||||
leal -1(%ecx),%ebx
|
||||
adcl $0,%edx
|
||||
movl $1,%ecx
|
||||
movl 4(%esi),%eax
|
||||
jmp L0143rdmadd
|
||||
.align 4,0x90
|
||||
L008common_tail:
|
||||
movl 16(%esp),%ebp
|
||||
movl 4(%esp),%edi
|
||||
leal 32(%esp),%esi
|
||||
movl (%esi),%eax
|
||||
movl %ebx,%ecx
|
||||
xorl %edx,%edx
|
||||
.align 4,0x90
|
||||
L017sub:
|
||||
sbbl (%ebp,%edx,4),%eax
|
||||
movl %eax,(%edi,%edx,4)
|
||||
decl %ecx
|
||||
movl 4(%esi,%edx,4),%eax
|
||||
leal 1(%edx),%edx
|
||||
jge L017sub
|
||||
sbbl $0,%eax
|
||||
movl $-1,%edx
|
||||
xorl %eax,%edx
|
||||
jmp L018copy
|
||||
.align 4,0x90
|
||||
L018copy:
|
||||
movl 32(%esp,%ebx,4),%esi
|
||||
movl (%edi,%ebx,4),%ebp
|
||||
movl %ecx,32(%esp,%ebx,4)
|
||||
andl %eax,%esi
|
||||
andl %edx,%ebp
|
||||
orl %esi,%ebp
|
||||
movl %ebp,(%edi,%ebx,4)
|
||||
decl %ebx
|
||||
jge L018copy
|
||||
movl 24(%esp),%esp
|
||||
movl $1,%eax
|
||||
L000just_leave:
|
||||
popl %edi
|
||||
popl %esi
|
||||
popl %ebx
|
||||
popl %ebp
|
||||
ret
|
||||
.byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105
|
||||
.byte 112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56
|
||||
.byte 54,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121
|
||||
.byte 32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46
|
||||
.byte 111,114,103,62,0
|
||||
.section __IMPORT,__pointers,non_lazy_symbol_pointers
|
||||
L_OPENSSL_ia32cap_P$non_lazy_ptr:
|
||||
.indirect_symbol _OPENSSL_ia32cap_P
|
||||
.long 0
|
||||
#endif // !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) && defined(__APPLE__)
|
||||
168
third-party/boringssl/apple-x86/crypto/test/trampoline-x86-apple.S
vendored
Normal file
168
third-party/boringssl/apple-x86/crypto/test/trampoline-x86-apple.S
vendored
Normal file
@ -0,0 +1,168 @@
|
||||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#include <openssl/asm_base.h>
|
||||
|
||||
#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) && defined(__APPLE__)
|
||||
.text
|
||||
.globl _abi_test_trampoline
|
||||
.private_extern _abi_test_trampoline
|
||||
.align 4
|
||||
_abi_test_trampoline:
|
||||
L_abi_test_trampoline_begin:
|
||||
pushl %ebp
|
||||
pushl %ebx
|
||||
pushl %esi
|
||||
pushl %edi
|
||||
movl 24(%esp),%ecx
|
||||
movl (%ecx),%esi
|
||||
movl 4(%ecx),%edi
|
||||
movl 8(%ecx),%ebx
|
||||
movl 12(%ecx),%ebp
|
||||
subl $44,%esp
|
||||
movl 72(%esp),%eax
|
||||
xorl %ecx,%ecx
|
||||
L000loop:
|
||||
cmpl 76(%esp),%ecx
|
||||
jae L001loop_done
|
||||
movl (%eax,%ecx,4),%edx
|
||||
movl %edx,(%esp,%ecx,4)
|
||||
addl $1,%ecx
|
||||
jmp L000loop
|
||||
L001loop_done:
|
||||
call *64(%esp)
|
||||
addl $44,%esp
|
||||
movl 24(%esp),%ecx
|
||||
movl %esi,(%ecx)
|
||||
movl %edi,4(%ecx)
|
||||
movl %ebx,8(%ecx)
|
||||
movl %ebp,12(%ecx)
|
||||
popl %edi
|
||||
popl %esi
|
||||
popl %ebx
|
||||
popl %ebp
|
||||
ret
|
||||
.globl _abi_test_get_and_clear_direction_flag
|
||||
.private_extern _abi_test_get_and_clear_direction_flag
|
||||
.align 4
|
||||
_abi_test_get_and_clear_direction_flag:
|
||||
L_abi_test_get_and_clear_direction_flag_begin:
|
||||
pushfl
|
||||
popl %eax
|
||||
andl $1024,%eax
|
||||
shrl $10,%eax
|
||||
cld
|
||||
ret
|
||||
.globl _abi_test_set_direction_flag
|
||||
.private_extern _abi_test_set_direction_flag
|
||||
.align 4
|
||||
_abi_test_set_direction_flag:
|
||||
L_abi_test_set_direction_flag_begin:
|
||||
std
|
||||
ret
|
||||
.globl _abi_test_clobber_eax
|
||||
.private_extern _abi_test_clobber_eax
|
||||
.align 4
|
||||
_abi_test_clobber_eax:
|
||||
L_abi_test_clobber_eax_begin:
|
||||
xorl %eax,%eax
|
||||
ret
|
||||
.globl _abi_test_clobber_ebx
|
||||
.private_extern _abi_test_clobber_ebx
|
||||
.align 4
|
||||
_abi_test_clobber_ebx:
|
||||
L_abi_test_clobber_ebx_begin:
|
||||
xorl %ebx,%ebx
|
||||
ret
|
||||
.globl _abi_test_clobber_ecx
|
||||
.private_extern _abi_test_clobber_ecx
|
||||
.align 4
|
||||
_abi_test_clobber_ecx:
|
||||
L_abi_test_clobber_ecx_begin:
|
||||
xorl %ecx,%ecx
|
||||
ret
|
||||
.globl _abi_test_clobber_edx
|
||||
.private_extern _abi_test_clobber_edx
|
||||
.align 4
|
||||
_abi_test_clobber_edx:
|
||||
L_abi_test_clobber_edx_begin:
|
||||
xorl %edx,%edx
|
||||
ret
|
||||
.globl _abi_test_clobber_edi
|
||||
.private_extern _abi_test_clobber_edi
|
||||
.align 4
|
||||
_abi_test_clobber_edi:
|
||||
L_abi_test_clobber_edi_begin:
|
||||
xorl %edi,%edi
|
||||
ret
|
||||
.globl _abi_test_clobber_esi
|
||||
.private_extern _abi_test_clobber_esi
|
||||
.align 4
|
||||
_abi_test_clobber_esi:
|
||||
L_abi_test_clobber_esi_begin:
|
||||
xorl %esi,%esi
|
||||
ret
|
||||
.globl _abi_test_clobber_ebp
|
||||
.private_extern _abi_test_clobber_ebp
|
||||
.align 4
|
||||
_abi_test_clobber_ebp:
|
||||
L_abi_test_clobber_ebp_begin:
|
||||
xorl %ebp,%ebp
|
||||
ret
|
||||
.globl _abi_test_clobber_xmm0
|
||||
.private_extern _abi_test_clobber_xmm0
|
||||
.align 4
|
||||
_abi_test_clobber_xmm0:
|
||||
L_abi_test_clobber_xmm0_begin:
|
||||
pxor %xmm0,%xmm0
|
||||
ret
|
||||
.globl _abi_test_clobber_xmm1
|
||||
.private_extern _abi_test_clobber_xmm1
|
||||
.align 4
|
||||
_abi_test_clobber_xmm1:
|
||||
L_abi_test_clobber_xmm1_begin:
|
||||
pxor %xmm1,%xmm1
|
||||
ret
|
||||
.globl _abi_test_clobber_xmm2
|
||||
.private_extern _abi_test_clobber_xmm2
|
||||
.align 4
|
||||
_abi_test_clobber_xmm2:
|
||||
L_abi_test_clobber_xmm2_begin:
|
||||
pxor %xmm2,%xmm2
|
||||
ret
|
||||
.globl _abi_test_clobber_xmm3
|
||||
.private_extern _abi_test_clobber_xmm3
|
||||
.align 4
|
||||
_abi_test_clobber_xmm3:
|
||||
L_abi_test_clobber_xmm3_begin:
|
||||
pxor %xmm3,%xmm3
|
||||
ret
|
||||
.globl _abi_test_clobber_xmm4
|
||||
.private_extern _abi_test_clobber_xmm4
|
||||
.align 4
|
||||
_abi_test_clobber_xmm4:
|
||||
L_abi_test_clobber_xmm4_begin:
|
||||
pxor %xmm4,%xmm4
|
||||
ret
|
||||
.globl _abi_test_clobber_xmm5
|
||||
.private_extern _abi_test_clobber_xmm5
|
||||
.align 4
|
||||
_abi_test_clobber_xmm5:
|
||||
L_abi_test_clobber_xmm5_begin:
|
||||
pxor %xmm5,%xmm5
|
||||
ret
|
||||
.globl _abi_test_clobber_xmm6
|
||||
.private_extern _abi_test_clobber_xmm6
|
||||
.align 4
|
||||
_abi_test_clobber_xmm6:
|
||||
L_abi_test_clobber_xmm6_begin:
|
||||
pxor %xmm6,%xmm6
|
||||
ret
|
||||
.globl _abi_test_clobber_xmm7
|
||||
.private_extern _abi_test_clobber_xmm7
|
||||
.align 4
|
||||
_abi_test_clobber_xmm7:
|
||||
L_abi_test_clobber_xmm7_begin:
|
||||
pxor %xmm7,%xmm7
|
||||
ret
|
||||
#endif // !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) && defined(__APPLE__)
|
||||
@ -1,20 +1,12 @@
|
||||
# This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
# source tree. Do not edit by hand.
|
||||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#if defined(__has_feature)
|
||||
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
|
||||
#define OPENSSL_NO_ASM
|
||||
#endif
|
||||
#endif
|
||||
#include <openssl/asm_base.h>
|
||||
|
||||
#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) && defined(__APPLE__)
|
||||
.text
|
||||
|
||||
|
||||
|
||||
.section __DATA,__const
|
||||
.p2align 6
|
||||
L$zero:
|
||||
.long 0,0,0,0
|
||||
@ -44,18 +36,14 @@ L$incz:
|
||||
L$sixteen:
|
||||
.long 16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16
|
||||
.byte 67,104,97,67,104,97,50,48,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
|
||||
.globl _ChaCha20_ctr32
|
||||
.private_extern _ChaCha20_ctr32
|
||||
.text
|
||||
.globl _ChaCha20_ctr32_nohw
|
||||
.private_extern _ChaCha20_ctr32_nohw
|
||||
|
||||
.p2align 6
|
||||
_ChaCha20_ctr32:
|
||||
|
||||
cmpq $0,%rdx
|
||||
je L$no_data
|
||||
movq _OPENSSL_ia32cap_P+4(%rip),%r10
|
||||
testl $512,%r10d
|
||||
jnz L$ChaCha20_ssse3
|
||||
_ChaCha20_ctr32_nohw:
|
||||
|
||||
_CET_ENDBR
|
||||
pushq %rbx
|
||||
|
||||
pushq %rbp
|
||||
@ -324,20 +312,18 @@ L$done:
|
||||
leaq (%rsi),%rsp
|
||||
|
||||
L$no_data:
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
|
||||
|
||||
.globl _ChaCha20_ctr32_ssse3
|
||||
.private_extern _ChaCha20_ctr32_ssse3
|
||||
|
||||
.p2align 5
|
||||
ChaCha20_ssse3:
|
||||
L$ChaCha20_ssse3:
|
||||
_ChaCha20_ctr32_ssse3:
|
||||
|
||||
_CET_ENDBR
|
||||
movq %rsp,%r9
|
||||
|
||||
cmpq $128,%rdx
|
||||
ja L$ChaCha20_4x
|
||||
|
||||
L$do_sse3_after_all:
|
||||
subq $64+8,%rsp
|
||||
movdqa L$sigma(%rip),%xmm0
|
||||
movdqu (%rcx),%xmm1
|
||||
@ -461,28 +447,19 @@ L$done_ssse3:
|
||||
leaq (%r9),%rsp
|
||||
|
||||
L$ssse3_epilogue:
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
|
||||
|
||||
.globl _ChaCha20_ctr32_ssse3_4x
|
||||
.private_extern _ChaCha20_ctr32_ssse3_4x
|
||||
|
||||
.p2align 5
|
||||
ChaCha20_4x:
|
||||
L$ChaCha20_4x:
|
||||
_ChaCha20_ctr32_ssse3_4x:
|
||||
|
||||
_CET_ENDBR
|
||||
movq %rsp,%r9
|
||||
|
||||
movq %r10,%r11
|
||||
shrq $32,%r10
|
||||
testq $32,%r10
|
||||
jnz L$ChaCha20_8x
|
||||
cmpq $192,%rdx
|
||||
ja L$proceed4x
|
||||
|
||||
andq $71303168,%r11
|
||||
cmpq $4194304,%r11
|
||||
je L$do_sse3_after_all
|
||||
|
||||
L$proceed4x:
|
||||
subq $0x140+8,%rsp
|
||||
movdqa L$sigma(%rip),%xmm11
|
||||
movdqu (%rcx),%xmm15
|
||||
@ -1013,14 +990,16 @@ L$done4x:
|
||||
leaq (%r9),%rsp
|
||||
|
||||
L$4x_epilogue:
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
|
||||
|
||||
.globl _ChaCha20_ctr32_avx2
|
||||
.private_extern _ChaCha20_ctr32_avx2
|
||||
|
||||
.p2align 5
|
||||
ChaCha20_8x:
|
||||
L$ChaCha20_8x:
|
||||
_ChaCha20_ctr32_avx2:
|
||||
|
||||
_CET_ENDBR
|
||||
movq %rsp,%r9
|
||||
|
||||
subq $0x280+8,%rsp
|
||||
@ -1619,7 +1598,7 @@ L$done8x:
|
||||
leaq (%r9),%rsp
|
||||
|
||||
L$8x_epilogue:
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
|
||||
|
||||
#endif
|
||||
@ -1,17 +1,10 @@
|
||||
# This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
# source tree. Do not edit by hand.
|
||||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#if defined(__has_feature)
|
||||
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
|
||||
#define OPENSSL_NO_ASM
|
||||
#endif
|
||||
#endif
|
||||
#include <openssl/asm_base.h>
|
||||
|
||||
#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
.data
|
||||
#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) && defined(__APPLE__)
|
||||
.section __DATA,__const
|
||||
|
||||
.p2align 4
|
||||
one:
|
||||
@ -69,7 +62,7 @@ GFMUL:
|
||||
vpxor %xmm4,%xmm3,%xmm2
|
||||
|
||||
vpxor %xmm5,%xmm2,%xmm0
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
|
||||
|
||||
.globl _aesgcmsiv_htable_init
|
||||
@ -78,6 +71,7 @@ GFMUL:
|
||||
.p2align 4
|
||||
_aesgcmsiv_htable_init:
|
||||
|
||||
_CET_ENDBR
|
||||
vmovdqa (%rsi),%xmm0
|
||||
vmovdqa %xmm0,%xmm1
|
||||
vmovdqa %xmm0,(%rdi)
|
||||
@ -95,7 +89,7 @@ _aesgcmsiv_htable_init:
|
||||
vmovdqa %xmm0,96(%rdi)
|
||||
call GFMUL
|
||||
vmovdqa %xmm0,112(%rdi)
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
|
||||
|
||||
.globl _aesgcmsiv_htable6_init
|
||||
@ -104,6 +98,7 @@ _aesgcmsiv_htable_init:
|
||||
.p2align 4
|
||||
_aesgcmsiv_htable6_init:
|
||||
|
||||
_CET_ENDBR
|
||||
vmovdqa (%rsi),%xmm0
|
||||
vmovdqa %xmm0,%xmm1
|
||||
vmovdqa %xmm0,(%rdi)
|
||||
@ -117,7 +112,7 @@ _aesgcmsiv_htable6_init:
|
||||
vmovdqa %xmm0,64(%rdi)
|
||||
call GFMUL
|
||||
vmovdqa %xmm0,80(%rdi)
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
|
||||
|
||||
.globl _aesgcmsiv_htable_polyval
|
||||
@ -126,9 +121,10 @@ _aesgcmsiv_htable6_init:
|
||||
.p2align 4
|
||||
_aesgcmsiv_htable_polyval:
|
||||
|
||||
_CET_ENDBR
|
||||
testq %rdx,%rdx
|
||||
jnz L$htable_polyval_start
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
|
||||
L$htable_polyval_start:
|
||||
vzeroall
|
||||
@ -334,7 +330,7 @@ L$htable_polyval_out:
|
||||
|
||||
vmovdqu %xmm1,(%rcx)
|
||||
vzeroupper
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
|
||||
|
||||
.globl _aesgcmsiv_polyval_horner
|
||||
@ -343,9 +339,10 @@ L$htable_polyval_out:
|
||||
.p2align 4
|
||||
_aesgcmsiv_polyval_horner:
|
||||
|
||||
_CET_ENDBR
|
||||
testq %rcx,%rcx
|
||||
jnz L$polyval_horner_start
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
|
||||
L$polyval_horner_start:
|
||||
|
||||
@ -367,7 +364,7 @@ L$polyval_horner_loop:
|
||||
|
||||
|
||||
vmovdqa %xmm0,(%rdi)
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
|
||||
|
||||
.globl _aes128gcmsiv_aes_ks
|
||||
@ -376,6 +373,7 @@ L$polyval_horner_loop:
|
||||
.p2align 4
|
||||
_aes128gcmsiv_aes_ks:
|
||||
|
||||
_CET_ENDBR
|
||||
vmovdqu (%rdi),%xmm1
|
||||
vmovdqa %xmm1,(%rsi)
|
||||
|
||||
@ -423,7 +421,7 @@ L$ks128_loop:
|
||||
vpxor %xmm3,%xmm1,%xmm1
|
||||
vpxor %xmm2,%xmm1,%xmm1
|
||||
vmovdqa %xmm1,32(%rsi)
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
|
||||
|
||||
.globl _aes256gcmsiv_aes_ks
|
||||
@ -432,6 +430,7 @@ L$ks128_loop:
|
||||
.p2align 4
|
||||
_aes256gcmsiv_aes_ks:
|
||||
|
||||
_CET_ENDBR
|
||||
vmovdqu (%rdi),%xmm1
|
||||
vmovdqu 16(%rdi),%xmm3
|
||||
vmovdqa %xmm1,(%rsi)
|
||||
@ -471,7 +470,7 @@ L$ks256_loop:
|
||||
vpxor %xmm4,%xmm1,%xmm1
|
||||
vpxor %xmm2,%xmm1,%xmm1
|
||||
vmovdqa %xmm1,32(%rsi)
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
|
||||
.globl _aes128gcmsiv_aes_ks_enc_x1
|
||||
.private_extern _aes128gcmsiv_aes_ks_enc_x1
|
||||
@ -479,6 +478,7 @@ L$ks256_loop:
|
||||
.p2align 4
|
||||
_aes128gcmsiv_aes_ks_enc_x1:
|
||||
|
||||
_CET_ENDBR
|
||||
vmovdqa (%rcx),%xmm1
|
||||
vmovdqa 0(%rdi),%xmm4
|
||||
|
||||
@ -612,7 +612,7 @@ _aes128gcmsiv_aes_ks_enc_x1:
|
||||
|
||||
|
||||
vmovdqa %xmm4,0(%rsi)
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
|
||||
|
||||
.globl _aes128gcmsiv_kdf
|
||||
@ -621,6 +621,7 @@ _aes128gcmsiv_aes_ks_enc_x1:
|
||||
.p2align 4
|
||||
_aes128gcmsiv_kdf:
|
||||
|
||||
_CET_ENDBR
|
||||
|
||||
|
||||
|
||||
@ -705,7 +706,7 @@ _aes128gcmsiv_kdf:
|
||||
vmovdqa %xmm10,16(%rsi)
|
||||
vmovdqa %xmm11,32(%rsi)
|
||||
vmovdqa %xmm12,48(%rsi)
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
|
||||
|
||||
.globl _aes128gcmsiv_enc_msg_x4
|
||||
@ -714,9 +715,10 @@ _aes128gcmsiv_kdf:
|
||||
.p2align 4
|
||||
_aes128gcmsiv_enc_msg_x4:
|
||||
|
||||
_CET_ENDBR
|
||||
testq %r8,%r8
|
||||
jnz L$128_enc_msg_x4_start
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
|
||||
L$128_enc_msg_x4_start:
|
||||
pushq %r12
|
||||
@ -880,7 +882,7 @@ L$128_enc_msg_x4_out:
|
||||
|
||||
popq %r12
|
||||
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
|
||||
|
||||
.globl _aes128gcmsiv_enc_msg_x8
|
||||
@ -889,9 +891,10 @@ L$128_enc_msg_x4_out:
|
||||
.p2align 4
|
||||
_aes128gcmsiv_enc_msg_x8:
|
||||
|
||||
_CET_ENDBR
|
||||
testq %r8,%r8
|
||||
jnz L$128_enc_msg_x8_start
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
|
||||
L$128_enc_msg_x8_start:
|
||||
pushq %r12
|
||||
@ -1135,7 +1138,7 @@ L$128_enc_msg_x8_out:
|
||||
|
||||
popq %r12
|
||||
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
|
||||
|
||||
.globl _aes128gcmsiv_dec
|
||||
@ -1144,21 +1147,23 @@ L$128_enc_msg_x8_out:
|
||||
.p2align 4
|
||||
_aes128gcmsiv_dec:
|
||||
|
||||
_CET_ENDBR
|
||||
testq $~15,%r9
|
||||
jnz L$128_dec_start
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
|
||||
L$128_dec_start:
|
||||
vzeroupper
|
||||
vmovdqa (%rdx),%xmm0
|
||||
|
||||
|
||||
vmovdqu 16(%rdx),%xmm15
|
||||
vpor OR_MASK(%rip),%xmm15,%xmm15
|
||||
movq %rdx,%rax
|
||||
|
||||
leaq 32(%rax),%rax
|
||||
leaq 32(%rcx),%rcx
|
||||
|
||||
|
||||
vmovdqu (%rdi,%r9,1),%xmm15
|
||||
vpor OR_MASK(%rip),%xmm15,%xmm15
|
||||
andq $~15,%r9
|
||||
|
||||
|
||||
@ -1627,7 +1632,7 @@ L$128_dec_loop2:
|
||||
|
||||
L$128_dec_out:
|
||||
vmovdqu %xmm0,(%rdx)
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
|
||||
|
||||
.globl _aes128gcmsiv_ecb_enc_block
|
||||
@ -1636,6 +1641,7 @@ L$128_dec_out:
|
||||
.p2align 4
|
||||
_aes128gcmsiv_ecb_enc_block:
|
||||
|
||||
_CET_ENDBR
|
||||
vmovdqa (%rdi),%xmm1
|
||||
|
||||
vpxor (%rdx),%xmm1,%xmm1
|
||||
@ -1652,7 +1658,7 @@ _aes128gcmsiv_ecb_enc_block:
|
||||
|
||||
vmovdqa %xmm1,(%rsi)
|
||||
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
|
||||
|
||||
.globl _aes256gcmsiv_aes_ks_enc_x1
|
||||
@ -1661,6 +1667,7 @@ _aes128gcmsiv_ecb_enc_block:
|
||||
.p2align 4
|
||||
_aes256gcmsiv_aes_ks_enc_x1:
|
||||
|
||||
_CET_ENDBR
|
||||
vmovdqa con1(%rip),%xmm0
|
||||
vmovdqa mask(%rip),%xmm15
|
||||
vmovdqa (%rdi),%xmm8
|
||||
@ -1835,7 +1842,7 @@ _aes256gcmsiv_aes_ks_enc_x1:
|
||||
vmovdqu %xmm1,224(%rdx)
|
||||
|
||||
vmovdqa %xmm8,(%rsi)
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
|
||||
|
||||
.globl _aes256gcmsiv_ecb_enc_block
|
||||
@ -1844,6 +1851,7 @@ _aes256gcmsiv_aes_ks_enc_x1:
|
||||
.p2align 4
|
||||
_aes256gcmsiv_ecb_enc_block:
|
||||
|
||||
_CET_ENDBR
|
||||
vmovdqa (%rdi),%xmm1
|
||||
vpxor (%rdx),%xmm1,%xmm1
|
||||
vaesenc 16(%rdx),%xmm1,%xmm1
|
||||
@ -1861,7 +1869,7 @@ _aes256gcmsiv_ecb_enc_block:
|
||||
vaesenc 208(%rdx),%xmm1,%xmm1
|
||||
vaesenclast 224(%rdx),%xmm1,%xmm1
|
||||
vmovdqa %xmm1,(%rsi)
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
|
||||
|
||||
.globl _aes256gcmsiv_enc_msg_x4
|
||||
@ -1870,9 +1878,10 @@ _aes256gcmsiv_ecb_enc_block:
|
||||
.p2align 4
|
||||
_aes256gcmsiv_enc_msg_x4:
|
||||
|
||||
_CET_ENDBR
|
||||
testq %r8,%r8
|
||||
jnz L$256_enc_msg_x4_start
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
|
||||
L$256_enc_msg_x4_start:
|
||||
movq %r8,%r10
|
||||
@ -2062,7 +2071,7 @@ L$256_enc_msg_x4_loop2:
|
||||
jne L$256_enc_msg_x4_loop2
|
||||
|
||||
L$256_enc_msg_x4_out:
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
|
||||
|
||||
.globl _aes256gcmsiv_enc_msg_x8
|
||||
@ -2071,9 +2080,10 @@ L$256_enc_msg_x4_out:
|
||||
.p2align 4
|
||||
_aes256gcmsiv_enc_msg_x8:
|
||||
|
||||
_CET_ENDBR
|
||||
testq %r8,%r8
|
||||
jnz L$256_enc_msg_x8_start
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
|
||||
L$256_enc_msg_x8_start:
|
||||
|
||||
@ -2350,7 +2360,7 @@ L$256_enc_msg_x8_loop2:
|
||||
jnz L$256_enc_msg_x8_loop2
|
||||
|
||||
L$256_enc_msg_x8_out:
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
|
||||
|
||||
|
||||
@ -2360,21 +2370,23 @@ L$256_enc_msg_x8_out:
|
||||
.p2align 4
|
||||
_aes256gcmsiv_dec:
|
||||
|
||||
_CET_ENDBR
|
||||
testq $~15,%r9
|
||||
jnz L$256_dec_start
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
|
||||
L$256_dec_start:
|
||||
vzeroupper
|
||||
vmovdqa (%rdx),%xmm0
|
||||
|
||||
|
||||
vmovdqu 16(%rdx),%xmm15
|
||||
vpor OR_MASK(%rip),%xmm15,%xmm15
|
||||
movq %rdx,%rax
|
||||
|
||||
leaq 32(%rax),%rax
|
||||
leaq 32(%rcx),%rcx
|
||||
|
||||
|
||||
vmovdqu (%rdi,%r9,1),%xmm15
|
||||
vpor OR_MASK(%rip),%xmm15,%xmm15
|
||||
andq $~15,%r9
|
||||
|
||||
|
||||
@ -2911,7 +2923,7 @@ L$256_dec_loop2:
|
||||
|
||||
L$256_dec_out:
|
||||
vmovdqu %xmm0,(%rdx)
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
|
||||
|
||||
.globl _aes256gcmsiv_kdf
|
||||
@ -2920,6 +2932,7 @@ L$256_dec_out:
|
||||
.p2align 4
|
||||
_aes256gcmsiv_kdf:
|
||||
|
||||
_CET_ENDBR
|
||||
|
||||
|
||||
|
||||
@ -3062,7 +3075,7 @@ _aes256gcmsiv_kdf:
|
||||
vmovdqa %xmm11,48(%rsi)
|
||||
vmovdqa %xmm12,64(%rsi)
|
||||
vmovdqa %xmm13,80(%rsi)
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
|
||||
|
||||
#endif
|
||||
File diff suppressed because it is too large
Load Diff
@ -1,16 +1,9 @@
|
||||
# This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
# source tree. Do not edit by hand.
|
||||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#if defined(__has_feature)
|
||||
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
|
||||
#define OPENSSL_NO_ASM
|
||||
#endif
|
||||
#endif
|
||||
#include <openssl/asm_base.h>
|
||||
|
||||
#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) && defined(__APPLE__)
|
||||
.text
|
||||
|
||||
|
||||
@ -221,7 +214,7 @@ L$resume_ctr32:
|
||||
movbeq 0(%r14),%r12
|
||||
vaesenc %xmm1,%xmm14,%xmm14
|
||||
vmovups 160-128(%rcx),%xmm1
|
||||
cmpl $11,%ebp
|
||||
cmpl $11,%r10d
|
||||
jb L$enc_tail
|
||||
|
||||
vaesenc %xmm15,%xmm9,%xmm9
|
||||
@ -305,6 +298,9 @@ L$enc_tail:
|
||||
vpaddb %xmm2,%xmm1,%xmm0
|
||||
movq %r13,112+8(%rsp)
|
||||
leaq 96(%rdi),%rdi
|
||||
|
||||
prefetcht0 512(%rdi)
|
||||
prefetcht0 576(%rdi)
|
||||
vaesenclast %xmm5,%xmm11,%xmm11
|
||||
vpaddb %xmm2,%xmm0,%xmm5
|
||||
movq %r12,120+8(%rsp)
|
||||
@ -317,7 +313,7 @@ L$enc_tail:
|
||||
vaesenclast %xmm3,%xmm14,%xmm14
|
||||
vpaddb %xmm2,%xmm7,%xmm3
|
||||
|
||||
addq $0x60,%r10
|
||||
addq $0x60,%rax
|
||||
subq $0x6,%rdx
|
||||
jc L$6x_done
|
||||
|
||||
@ -340,7 +336,7 @@ L$6x_done:
|
||||
vpxor 16+8(%rsp),%xmm8,%xmm8
|
||||
vpxor %xmm4,%xmm8,%xmm8
|
||||
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
|
||||
|
||||
.globl _aesni_gcm_decrypt
|
||||
@ -349,41 +345,50 @@ L$6x_done:
|
||||
.p2align 5
|
||||
_aesni_gcm_decrypt:
|
||||
|
||||
xorq %r10,%r10
|
||||
|
||||
_CET_ENDBR
|
||||
xorq %rax,%rax
|
||||
|
||||
|
||||
|
||||
cmpq $0x60,%rdx
|
||||
jb L$gcm_dec_abort
|
||||
|
||||
leaq (%rsp),%rax
|
||||
pushq %rbp
|
||||
|
||||
|
||||
movq %rsp,%rbp
|
||||
|
||||
pushq %rbx
|
||||
|
||||
pushq %rbp
|
||||
|
||||
pushq %r12
|
||||
|
||||
|
||||
pushq %r13
|
||||
|
||||
|
||||
pushq %r14
|
||||
|
||||
|
||||
pushq %r15
|
||||
|
||||
|
||||
vzeroupper
|
||||
|
||||
movq 16(%rbp),%r12
|
||||
vmovdqu (%r8),%xmm1
|
||||
addq $-128,%rsp
|
||||
movl 12(%r8),%ebx
|
||||
leaq L$bswap_mask(%rip),%r11
|
||||
leaq -128(%rcx),%r14
|
||||
movq $0xf80,%r15
|
||||
vmovdqu (%r9),%xmm8
|
||||
vmovdqu (%r12),%xmm8
|
||||
andq $-128,%rsp
|
||||
vmovdqu (%r11),%xmm0
|
||||
leaq 128(%rcx),%rcx
|
||||
leaq 32+32(%r9),%r9
|
||||
movl 240-128(%rcx),%ebp
|
||||
leaq 32(%r9),%r9
|
||||
movl 240-128(%rcx),%r10d
|
||||
vpshufb %xmm0,%xmm8,%xmm8
|
||||
|
||||
andq %r15,%r14
|
||||
@ -396,7 +401,7 @@ _aesni_gcm_decrypt:
|
||||
L$dec_no_key_aliasing:
|
||||
|
||||
vmovdqu 80(%rdi),%xmm7
|
||||
leaq (%rdi),%r14
|
||||
movq %rdi,%r14
|
||||
vmovdqu 64(%rdi),%xmm4
|
||||
|
||||
|
||||
@ -409,7 +414,7 @@ L$dec_no_key_aliasing:
|
||||
|
||||
vmovdqu 48(%rdi),%xmm5
|
||||
shrq $4,%rdx
|
||||
xorq %r10,%r10
|
||||
xorq %rax,%rax
|
||||
vmovdqu 32(%rdi),%xmm6
|
||||
vpshufb %xmm0,%xmm7,%xmm7
|
||||
vmovdqu 16(%rdi),%xmm2
|
||||
@ -427,6 +432,7 @@ L$dec_no_key_aliasing:
|
||||
|
||||
call _aesni_ctr32_ghash_6x
|
||||
|
||||
movq 16(%rbp),%r12
|
||||
vmovups %xmm9,-96(%rsi)
|
||||
vmovups %xmm10,-80(%rsi)
|
||||
vmovups %xmm11,-64(%rsi)
|
||||
@ -435,26 +441,26 @@ L$dec_no_key_aliasing:
|
||||
vmovups %xmm14,-16(%rsi)
|
||||
|
||||
vpshufb (%r11),%xmm8,%xmm8
|
||||
vmovdqu %xmm8,-64(%r9)
|
||||
vmovdqu %xmm8,(%r12)
|
||||
|
||||
vzeroupper
|
||||
movq -48(%rax),%r15
|
||||
leaq -40(%rbp),%rsp
|
||||
|
||||
movq -40(%rax),%r14
|
||||
popq %r15
|
||||
|
||||
movq -32(%rax),%r13
|
||||
popq %r14
|
||||
|
||||
movq -24(%rax),%r12
|
||||
popq %r13
|
||||
|
||||
movq -16(%rax),%rbp
|
||||
popq %r12
|
||||
|
||||
movq -8(%rax),%rbx
|
||||
popq %rbx
|
||||
|
||||
leaq (%rax),%rsp
|
||||
popq %rbp
|
||||
|
||||
L$gcm_dec_abort:
|
||||
movq %r10,%rax
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
|
||||
|
||||
|
||||
|
||||
@ -463,7 +469,7 @@ _aesni_ctr32_6x:
|
||||
|
||||
vmovdqu 0-128(%rcx),%xmm4
|
||||
vmovdqu 32(%r11),%xmm2
|
||||
leaq -1(%rbp),%r13
|
||||
leaq -1(%r10),%r13
|
||||
vmovups 16-128(%rcx),%xmm15
|
||||
leaq 32-128(%rcx),%r12
|
||||
vpxor %xmm4,%xmm1,%xmm9
|
||||
@ -524,7 +530,7 @@ L$oop_ctr32:
|
||||
vmovups %xmm14,80(%rsi)
|
||||
leaq 96(%rsi),%rsi
|
||||
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
.p2align 5
|
||||
L$handle_ctr32_2:
|
||||
vpshufb %xmm0,%xmm1,%xmm6
|
||||
@ -556,11 +562,13 @@ L$handle_ctr32_2:
|
||||
.p2align 5
|
||||
_aesni_gcm_encrypt:
|
||||
|
||||
|
||||
_CET_ENDBR
|
||||
#ifdef BORINGSSL_DISPATCH_TEST
|
||||
|
||||
movb $1,_BORINGSSL_function_hit+2(%rip)
|
||||
#endif
|
||||
xorq %r10,%r10
|
||||
xorq %rax,%rax
|
||||
|
||||
|
||||
|
||||
@ -568,20 +576,26 @@ _aesni_gcm_encrypt:
|
||||
cmpq $288,%rdx
|
||||
jb L$gcm_enc_abort
|
||||
|
||||
leaq (%rsp),%rax
|
||||
pushq %rbp
|
||||
|
||||
|
||||
movq %rsp,%rbp
|
||||
|
||||
pushq %rbx
|
||||
|
||||
pushq %rbp
|
||||
|
||||
pushq %r12
|
||||
|
||||
|
||||
pushq %r13
|
||||
|
||||
|
||||
pushq %r14
|
||||
|
||||
|
||||
pushq %r15
|
||||
|
||||
|
||||
vzeroupper
|
||||
|
||||
vmovdqu (%r8),%xmm1
|
||||
@ -593,7 +607,7 @@ _aesni_gcm_encrypt:
|
||||
leaq 128(%rcx),%rcx
|
||||
vmovdqu (%r11),%xmm0
|
||||
andq $-128,%rsp
|
||||
movl 240-128(%rcx),%ebp
|
||||
movl 240-128(%rcx),%r10d
|
||||
|
||||
andq %r15,%r14
|
||||
andq %rsp,%r15
|
||||
@ -604,7 +618,7 @@ _aesni_gcm_encrypt:
|
||||
subq %r15,%rsp
|
||||
L$enc_no_key_aliasing:
|
||||
|
||||
leaq (%rsi),%r14
|
||||
movq %rsi,%r14
|
||||
|
||||
|
||||
|
||||
@ -632,10 +646,11 @@ L$enc_no_key_aliasing:
|
||||
|
||||
call _aesni_ctr32_6x
|
||||
|
||||
vmovdqu (%r9),%xmm8
|
||||
leaq 32+32(%r9),%r9
|
||||
movq 16(%rbp),%r12
|
||||
leaq 32(%r9),%r9
|
||||
vmovdqu (%r12),%xmm8
|
||||
subq $12,%rdx
|
||||
movq $192,%r10
|
||||
movq $192,%rax
|
||||
vpshufb %xmm0,%xmm8,%xmm8
|
||||
|
||||
call _aesni_ctr32_ghash_6x
|
||||
@ -811,29 +826,31 @@ L$enc_no_key_aliasing:
|
||||
vpclmulqdq $0x10,%xmm3,%xmm8,%xmm8
|
||||
vpxor %xmm7,%xmm2,%xmm2
|
||||
vpxor %xmm2,%xmm8,%xmm8
|
||||
movq 16(%rbp),%r12
|
||||
vpshufb (%r11),%xmm8,%xmm8
|
||||
vmovdqu %xmm8,-64(%r9)
|
||||
vmovdqu %xmm8,(%r12)
|
||||
|
||||
vzeroupper
|
||||
movq -48(%rax),%r15
|
||||
leaq -40(%rbp),%rsp
|
||||
|
||||
movq -40(%rax),%r14
|
||||
popq %r15
|
||||
|
||||
movq -32(%rax),%r13
|
||||
popq %r14
|
||||
|
||||
movq -24(%rax),%r12
|
||||
popq %r13
|
||||
|
||||
movq -16(%rax),%rbp
|
||||
popq %r12
|
||||
|
||||
movq -8(%rax),%rbx
|
||||
popq %rbx
|
||||
|
||||
leaq (%rax),%rsp
|
||||
popq %rbp
|
||||
|
||||
L$gcm_enc_abort:
|
||||
movq %r10,%rax
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
|
||||
|
||||
|
||||
.section __DATA,__const
|
||||
.p2align 6
|
||||
L$bswap_mask:
|
||||
.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
|
||||
@ -847,4 +864,5 @@ L$one_lsb:
|
||||
.byte 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
|
||||
.byte 65,69,83,45,78,73,32,71,67,77,32,109,111,100,117,108,101,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
|
||||
.p2align 6
|
||||
.text
|
||||
#endif
|
||||
@ -1,16 +1,9 @@
|
||||
# This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
# source tree. Do not edit by hand.
|
||||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#if defined(__has_feature)
|
||||
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
|
||||
#define OPENSSL_NO_ASM
|
||||
#endif
|
||||
#endif
|
||||
#include <openssl/asm_base.h>
|
||||
|
||||
#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) && defined(__APPLE__)
|
||||
.text
|
||||
|
||||
.globl _aes_hw_encrypt
|
||||
@ -19,6 +12,7 @@
|
||||
.p2align 4
|
||||
_aes_hw_encrypt:
|
||||
|
||||
_CET_ENDBR
|
||||
#ifdef BORINGSSL_DISPATCH_TEST
|
||||
|
||||
movb $1,_BORINGSSL_function_hit+1(%rip)
|
||||
@ -40,7 +34,7 @@ L$oop_enc1_1:
|
||||
pxor %xmm1,%xmm1
|
||||
movups %xmm2,(%rsi)
|
||||
pxor %xmm2,%xmm2
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
|
||||
|
||||
|
||||
@ -50,6 +44,7 @@ L$oop_enc1_1:
|
||||
.p2align 4
|
||||
_aes_hw_decrypt:
|
||||
|
||||
_CET_ENDBR
|
||||
movups (%rdi),%xmm2
|
||||
movl 240(%rdx),%eax
|
||||
movups (%rdx),%xmm0
|
||||
@ -67,7 +62,7 @@ L$oop_dec1_2:
|
||||
pxor %xmm1,%xmm1
|
||||
movups %xmm2,(%rsi)
|
||||
pxor %xmm2,%xmm2
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
|
||||
|
||||
|
||||
@ -98,7 +93,7 @@ L$enc_loop2:
|
||||
.byte 102,15,56,220,217
|
||||
.byte 102,15,56,221,208
|
||||
.byte 102,15,56,221,216
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
|
||||
|
||||
|
||||
@ -129,7 +124,7 @@ L$dec_loop2:
|
||||
.byte 102,15,56,222,217
|
||||
.byte 102,15,56,223,208
|
||||
.byte 102,15,56,223,216
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
|
||||
|
||||
|
||||
@ -165,7 +160,7 @@ L$enc_loop3:
|
||||
.byte 102,15,56,221,208
|
||||
.byte 102,15,56,221,216
|
||||
.byte 102,15,56,221,224
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
|
||||
|
||||
|
||||
@ -201,7 +196,7 @@ L$dec_loop3:
|
||||
.byte 102,15,56,223,208
|
||||
.byte 102,15,56,223,216
|
||||
.byte 102,15,56,223,224
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
|
||||
|
||||
|
||||
@ -243,7 +238,7 @@ L$enc_loop4:
|
||||
.byte 102,15,56,221,216
|
||||
.byte 102,15,56,221,224
|
||||
.byte 102,15,56,221,232
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
|
||||
|
||||
|
||||
@ -285,7 +280,7 @@ L$dec_loop4:
|
||||
.byte 102,15,56,223,216
|
||||
.byte 102,15,56,223,224
|
||||
.byte 102,15,56,223,232
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
|
||||
|
||||
|
||||
@ -341,7 +336,7 @@ L$enc_loop6_enter:
|
||||
.byte 102,15,56,221,232
|
||||
.byte 102,15,56,221,240
|
||||
.byte 102,15,56,221,248
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
|
||||
|
||||
|
||||
@ -397,7 +392,7 @@ L$dec_loop6_enter:
|
||||
.byte 102,15,56,223,232
|
||||
.byte 102,15,56,223,240
|
||||
.byte 102,15,56,223,248
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
|
||||
|
||||
|
||||
@ -463,7 +458,7 @@ L$enc_loop8_enter:
|
||||
.byte 102,15,56,221,248
|
||||
.byte 102,68,15,56,221,192
|
||||
.byte 102,68,15,56,221,200
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
|
||||
|
||||
|
||||
@ -529,7 +524,7 @@ L$dec_loop8_enter:
|
||||
.byte 102,15,56,223,248
|
||||
.byte 102,68,15,56,223,192
|
||||
.byte 102,68,15,56,223,200
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
|
||||
|
||||
.globl _aes_hw_ecb_encrypt
|
||||
@ -538,6 +533,7 @@ L$dec_loop8_enter:
|
||||
.p2align 4
|
||||
_aes_hw_ecb_encrypt:
|
||||
|
||||
_CET_ENDBR
|
||||
andq $-16,%rdx
|
||||
jz L$ecb_ret
|
||||
|
||||
@ -874,7 +870,7 @@ L$ecb_dec_six:
|
||||
L$ecb_ret:
|
||||
xorps %xmm0,%xmm0
|
||||
pxor %xmm1,%xmm1
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
|
||||
|
||||
.globl _aes_hw_ctr32_encrypt_blocks
|
||||
@ -883,6 +879,7 @@ L$ecb_ret:
|
||||
.p2align 4
|
||||
_aes_hw_ctr32_encrypt_blocks:
|
||||
|
||||
_CET_ENDBR
|
||||
#ifdef BORINGSSL_DISPATCH_TEST
|
||||
movb $1,_BORINGSSL_function_hit(%rip)
|
||||
#endif
|
||||
@ -972,10 +969,7 @@ L$ctr32_bulk:
|
||||
leaq 7(%r8),%r9
|
||||
movl %r10d,96+12(%rsp)
|
||||
bswapl %r9d
|
||||
leaq _OPENSSL_ia32cap_P(%rip),%r10
|
||||
movl 4(%r10),%r10d
|
||||
xorl %ebp,%r9d
|
||||
andl $71303168,%r10d
|
||||
movl %r9d,112+12(%rsp)
|
||||
|
||||
movups 16(%rcx),%xmm1
|
||||
@ -986,104 +980,10 @@ L$ctr32_bulk:
|
||||
cmpq $8,%rdx
|
||||
jb L$ctr32_tail
|
||||
|
||||
subq $6,%rdx
|
||||
cmpl $4194304,%r10d
|
||||
je L$ctr32_6x
|
||||
|
||||
leaq 128(%rcx),%rcx
|
||||
subq $2,%rdx
|
||||
subq $8,%rdx
|
||||
jmp L$ctr32_loop8
|
||||
|
||||
.p2align 4
|
||||
L$ctr32_6x:
|
||||
shll $4,%eax
|
||||
movl $48,%r10d
|
||||
bswapl %ebp
|
||||
leaq 32(%rcx,%rax,1),%rcx
|
||||
subq %rax,%r10
|
||||
jmp L$ctr32_loop6
|
||||
|
||||
.p2align 4
|
||||
L$ctr32_loop6:
|
||||
addl $6,%r8d
|
||||
movups -48(%rcx,%r10,1),%xmm0
|
||||
.byte 102,15,56,220,209
|
||||
movl %r8d,%eax
|
||||
xorl %ebp,%eax
|
||||
.byte 102,15,56,220,217
|
||||
.byte 0x0f,0x38,0xf1,0x44,0x24,12
|
||||
leal 1(%r8),%eax
|
||||
.byte 102,15,56,220,225
|
||||
xorl %ebp,%eax
|
||||
.byte 0x0f,0x38,0xf1,0x44,0x24,28
|
||||
.byte 102,15,56,220,233
|
||||
leal 2(%r8),%eax
|
||||
xorl %ebp,%eax
|
||||
.byte 102,15,56,220,241
|
||||
.byte 0x0f,0x38,0xf1,0x44,0x24,44
|
||||
leal 3(%r8),%eax
|
||||
.byte 102,15,56,220,249
|
||||
movups -32(%rcx,%r10,1),%xmm1
|
||||
xorl %ebp,%eax
|
||||
|
||||
.byte 102,15,56,220,208
|
||||
.byte 0x0f,0x38,0xf1,0x44,0x24,60
|
||||
leal 4(%r8),%eax
|
||||
.byte 102,15,56,220,216
|
||||
xorl %ebp,%eax
|
||||
.byte 0x0f,0x38,0xf1,0x44,0x24,76
|
||||
.byte 102,15,56,220,224
|
||||
leal 5(%r8),%eax
|
||||
xorl %ebp,%eax
|
||||
.byte 102,15,56,220,232
|
||||
.byte 0x0f,0x38,0xf1,0x44,0x24,92
|
||||
movq %r10,%rax
|
||||
.byte 102,15,56,220,240
|
||||
.byte 102,15,56,220,248
|
||||
movups -16(%rcx,%r10,1),%xmm0
|
||||
|
||||
call L$enc_loop6
|
||||
|
||||
movdqu (%rdi),%xmm8
|
||||
movdqu 16(%rdi),%xmm9
|
||||
movdqu 32(%rdi),%xmm10
|
||||
movdqu 48(%rdi),%xmm11
|
||||
movdqu 64(%rdi),%xmm12
|
||||
movdqu 80(%rdi),%xmm13
|
||||
leaq 96(%rdi),%rdi
|
||||
movups -64(%rcx,%r10,1),%xmm1
|
||||
pxor %xmm2,%xmm8
|
||||
movaps 0(%rsp),%xmm2
|
||||
pxor %xmm3,%xmm9
|
||||
movaps 16(%rsp),%xmm3
|
||||
pxor %xmm4,%xmm10
|
||||
movaps 32(%rsp),%xmm4
|
||||
pxor %xmm5,%xmm11
|
||||
movaps 48(%rsp),%xmm5
|
||||
pxor %xmm6,%xmm12
|
||||
movaps 64(%rsp),%xmm6
|
||||
pxor %xmm7,%xmm13
|
||||
movaps 80(%rsp),%xmm7
|
||||
movdqu %xmm8,(%rsi)
|
||||
movdqu %xmm9,16(%rsi)
|
||||
movdqu %xmm10,32(%rsi)
|
||||
movdqu %xmm11,48(%rsi)
|
||||
movdqu %xmm12,64(%rsi)
|
||||
movdqu %xmm13,80(%rsi)
|
||||
leaq 96(%rsi),%rsi
|
||||
|
||||
subq $6,%rdx
|
||||
jnc L$ctr32_loop6
|
||||
|
||||
addq $6,%rdx
|
||||
jz L$ctr32_done
|
||||
|
||||
leal -48(%r10),%eax
|
||||
leaq -80(%rcx,%r10,1),%rcx
|
||||
negl %eax
|
||||
shrl $4,%eax
|
||||
jmp L$ctr32_tail
|
||||
|
||||
.p2align 5
|
||||
L$ctr32_loop8:
|
||||
addl $8,%r8d
|
||||
@ -1260,6 +1160,8 @@ L$ctr32_enc_done:
|
||||
pxor %xmm0,%xmm13
|
||||
movdqu 80(%rdi),%xmm15
|
||||
pxor %xmm0,%xmm14
|
||||
prefetcht0 448(%rdi)
|
||||
prefetcht0 512(%rdi)
|
||||
pxor %xmm0,%xmm15
|
||||
.byte 102,15,56,220,209
|
||||
.byte 102,15,56,220,217
|
||||
@ -1457,7 +1359,7 @@ L$ctr32_done:
|
||||
leaq (%r11),%rsp
|
||||
|
||||
L$ctr32_epilogue:
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
|
||||
|
||||
.globl _aes_hw_cbc_encrypt
|
||||
@ -1466,6 +1368,7 @@ L$ctr32_epilogue:
|
||||
.p2align 4
|
||||
_aes_hw_cbc_encrypt:
|
||||
|
||||
_CET_ENDBR
|
||||
testq %rdx,%rdx
|
||||
jz L$cbc_ret
|
||||
|
||||
@ -1582,16 +1485,10 @@ L$cbc_decrypt_bulk:
|
||||
movdqa %xmm5,%xmm14
|
||||
movdqu 80(%rdi),%xmm7
|
||||
movdqa %xmm6,%xmm15
|
||||
leaq _OPENSSL_ia32cap_P(%rip),%r9
|
||||
movl 4(%r9),%r9d
|
||||
cmpq $0x70,%rdx
|
||||
jbe L$cbc_dec_six_or_seven
|
||||
|
||||
andl $71303168,%r9d
|
||||
subq $0x50,%rdx
|
||||
cmpl $4194304,%r9d
|
||||
je L$cbc_dec_loop6_enter
|
||||
subq $0x20,%rdx
|
||||
subq $0x70,%rdx
|
||||
leaq 112(%rcx),%rcx
|
||||
jmp L$cbc_dec_loop8_enter
|
||||
.p2align 4
|
||||
@ -1862,51 +1759,6 @@ L$cbc_dec_seven:
|
||||
pxor %xmm9,%xmm9
|
||||
jmp L$cbc_dec_tail_collected
|
||||
|
||||
.p2align 4
|
||||
L$cbc_dec_loop6:
|
||||
movups %xmm7,(%rsi)
|
||||
leaq 16(%rsi),%rsi
|
||||
movdqu 0(%rdi),%xmm2
|
||||
movdqu 16(%rdi),%xmm3
|
||||
movdqa %xmm2,%xmm11
|
||||
movdqu 32(%rdi),%xmm4
|
||||
movdqa %xmm3,%xmm12
|
||||
movdqu 48(%rdi),%xmm5
|
||||
movdqa %xmm4,%xmm13
|
||||
movdqu 64(%rdi),%xmm6
|
||||
movdqa %xmm5,%xmm14
|
||||
movdqu 80(%rdi),%xmm7
|
||||
movdqa %xmm6,%xmm15
|
||||
L$cbc_dec_loop6_enter:
|
||||
leaq 96(%rdi),%rdi
|
||||
movdqa %xmm7,%xmm8
|
||||
|
||||
call _aesni_decrypt6
|
||||
|
||||
pxor %xmm10,%xmm2
|
||||
movdqa %xmm8,%xmm10
|
||||
pxor %xmm11,%xmm3
|
||||
movdqu %xmm2,(%rsi)
|
||||
pxor %xmm12,%xmm4
|
||||
movdqu %xmm3,16(%rsi)
|
||||
pxor %xmm13,%xmm5
|
||||
movdqu %xmm4,32(%rsi)
|
||||
pxor %xmm14,%xmm6
|
||||
movq %rbp,%rcx
|
||||
movdqu %xmm5,48(%rsi)
|
||||
pxor %xmm15,%xmm7
|
||||
movl %r10d,%eax
|
||||
movdqu %xmm6,64(%rsi)
|
||||
leaq 80(%rsi),%rsi
|
||||
subq $0x60,%rdx
|
||||
ja L$cbc_dec_loop6
|
||||
|
||||
movdqa %xmm7,%xmm2
|
||||
addq $0x50,%rdx
|
||||
jle L$cbc_dec_clear_tail_collected
|
||||
movups %xmm7,(%rsi)
|
||||
leaq 16(%rsi),%rsi
|
||||
|
||||
L$cbc_dec_tail:
|
||||
movups (%rdi),%xmm2
|
||||
subq $0x10,%rdx
|
||||
@ -2050,7 +1902,7 @@ L$cbc_dec_ret:
|
||||
leaq (%r11),%rsp
|
||||
|
||||
L$cbc_ret:
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
|
||||
|
||||
.globl _aes_hw_set_decrypt_key
|
||||
@ -2059,6 +1911,7 @@ L$cbc_ret:
|
||||
.p2align 4
|
||||
_aes_hw_set_decrypt_key:
|
||||
|
||||
_CET_ENDBR
|
||||
.byte 0x48,0x83,0xEC,0x08
|
||||
|
||||
call __aesni_set_encrypt_key
|
||||
@ -2094,7 +1947,7 @@ L$dec_key_inverse:
|
||||
L$dec_key_ret:
|
||||
addq $8,%rsp
|
||||
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
|
||||
L$SEH_end_set_decrypt_key:
|
||||
|
||||
@ -2105,6 +1958,7 @@ L$SEH_end_set_decrypt_key:
|
||||
_aes_hw_set_encrypt_key:
|
||||
__aesni_set_encrypt_key:
|
||||
|
||||
_CET_ENDBR
|
||||
#ifdef BORINGSSL_DISPATCH_TEST
|
||||
movb $1,_BORINGSSL_function_hit+3(%rip)
|
||||
#endif
|
||||
@ -2404,7 +2258,7 @@ L$enc_key_ret:
|
||||
pxor %xmm5,%xmm5
|
||||
addq $8,%rsp
|
||||
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
|
||||
L$SEH_end_set_encrypt_key:
|
||||
|
||||
@ -2419,7 +2273,7 @@ L$key_expansion_128_cold:
|
||||
xorps %xmm4,%xmm0
|
||||
shufps $255,%xmm1,%xmm1
|
||||
xorps %xmm1,%xmm0
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
|
||||
.p2align 4
|
||||
L$key_expansion_192a:
|
||||
@ -2439,7 +2293,7 @@ L$key_expansion_192b_warm:
|
||||
pxor %xmm1,%xmm0
|
||||
pshufd $255,%xmm0,%xmm3
|
||||
pxor %xmm3,%xmm2
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
|
||||
.p2align 4
|
||||
L$key_expansion_192b:
|
||||
@ -2462,7 +2316,7 @@ L$key_expansion_256a_cold:
|
||||
xorps %xmm4,%xmm0
|
||||
shufps $255,%xmm1,%xmm1
|
||||
xorps %xmm1,%xmm0
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
|
||||
.p2align 4
|
||||
L$key_expansion_256b:
|
||||
@ -2475,9 +2329,10 @@ L$key_expansion_256b:
|
||||
xorps %xmm4,%xmm2
|
||||
shufps $170,%xmm1,%xmm1
|
||||
xorps %xmm1,%xmm2
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
|
||||
|
||||
.section __DATA,__const
|
||||
.p2align 6
|
||||
L$bswap_mask:
|
||||
.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
|
||||
@ -2500,4 +2355,5 @@ L$key_rcon1b:
|
||||
|
||||
.byte 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69,83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
|
||||
.p2align 6
|
||||
.text
|
||||
#endif
|
||||
@ -1,16 +1,9 @@
|
||||
# This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
# source tree. Do not edit by hand.
|
||||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#if defined(__has_feature)
|
||||
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
|
||||
#define OPENSSL_NO_ASM
|
||||
#endif
|
||||
#endif
|
||||
#include <openssl/asm_base.h>
|
||||
|
||||
#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) && defined(__APPLE__)
|
||||
.text
|
||||
|
||||
|
||||
@ -23,7 +16,8 @@
|
||||
.p2align 4
|
||||
_gcm_gmult_ssse3:
|
||||
|
||||
L$gmult_seh_begin:
|
||||
|
||||
_CET_ENDBR
|
||||
movdqu (%rdi),%xmm0
|
||||
movdqa L$reverse_bytes(%rip),%xmm10
|
||||
movdqa L$low4_mask(%rip),%xmm2
|
||||
@ -198,8 +192,8 @@ L$oop_row_3:
|
||||
pxor %xmm4,%xmm4
|
||||
pxor %xmm5,%xmm5
|
||||
pxor %xmm6,%xmm6
|
||||
.byte 0xf3,0xc3
|
||||
L$gmult_seh_end:
|
||||
ret
|
||||
|
||||
|
||||
|
||||
|
||||
@ -212,8 +206,9 @@ L$gmult_seh_end:
|
||||
.private_extern _gcm_ghash_ssse3
|
||||
.p2align 4
|
||||
_gcm_ghash_ssse3:
|
||||
L$ghash_seh_begin:
|
||||
|
||||
|
||||
_CET_ENDBR
|
||||
movdqu (%rdi),%xmm0
|
||||
movdqa L$reverse_bytes(%rip),%xmm10
|
||||
movdqa L$low4_mask(%rip),%xmm11
|
||||
@ -410,11 +405,12 @@ L$oop_row_6:
|
||||
pxor %xmm4,%xmm4
|
||||
pxor %xmm5,%xmm5
|
||||
pxor %xmm6,%xmm6
|
||||
.byte 0xf3,0xc3
|
||||
L$ghash_seh_end:
|
||||
ret
|
||||
|
||||
|
||||
|
||||
|
||||
.section __DATA,__const
|
||||
.p2align 4
|
||||
|
||||
|
||||
@ -423,4 +419,5 @@ L$reverse_bytes:
|
||||
|
||||
L$low4_mask:
|
||||
.quad 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f
|
||||
.text
|
||||
#endif
|
||||
@ -1,24 +1,18 @@
|
||||
# This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
# source tree. Do not edit by hand.
|
||||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#if defined(__has_feature)
|
||||
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
|
||||
#define OPENSSL_NO_ASM
|
||||
#endif
|
||||
#endif
|
||||
#include <openssl/asm_base.h>
|
||||
|
||||
#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) && defined(__APPLE__)
|
||||
.text
|
||||
|
||||
.globl _gcm_init_clmul
|
||||
.private_extern _gcm_init_clmul
|
||||
|
||||
.p2align 4
|
||||
_gcm_init_clmul:
|
||||
|
||||
|
||||
_CET_ENDBR
|
||||
L$_init_clmul:
|
||||
movdqu (%rsi),%xmm2
|
||||
pshufd $78,%xmm2,%xmm2
|
||||
@ -169,7 +163,8 @@ L$_init_clmul:
|
||||
movdqu %xmm0,64(%rdi)
|
||||
.byte 102,15,58,15,227,8
|
||||
movdqu %xmm4,80(%rdi)
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
|
||||
|
||||
|
||||
.globl _gcm_gmult_clmul
|
||||
@ -178,6 +173,7 @@ L$_init_clmul:
|
||||
.p2align 4
|
||||
_gcm_gmult_clmul:
|
||||
|
||||
_CET_ENDBR
|
||||
L$_gmult_clmul:
|
||||
movdqu (%rdi),%xmm0
|
||||
movdqa L$bswap_mask(%rip),%xmm5
|
||||
@ -223,7 +219,7 @@ L$_gmult_clmul:
|
||||
pxor %xmm1,%xmm0
|
||||
.byte 102,15,56,0,197
|
||||
movdqu %xmm0,(%rdi)
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
|
||||
|
||||
.globl _gcm_ghash_clmul
|
||||
@ -232,6 +228,8 @@ L$_gmult_clmul:
|
||||
.p2align 5
|
||||
_gcm_ghash_clmul:
|
||||
|
||||
|
||||
_CET_ENDBR
|
||||
L$_ghash_clmul:
|
||||
movdqa L$bswap_mask(%rip),%xmm10
|
||||
|
||||
@ -244,15 +242,9 @@ L$_ghash_clmul:
|
||||
jz L$odd_tail
|
||||
|
||||
movdqu 16(%rsi),%xmm6
|
||||
leaq _OPENSSL_ia32cap_P(%rip),%rax
|
||||
movl 4(%rax),%eax
|
||||
cmpq $0x30,%rcx
|
||||
jb L$skip4x
|
||||
|
||||
andl $71303168,%eax
|
||||
cmpl $4194304,%eax
|
||||
je L$skip4x
|
||||
|
||||
subq $0x30,%rcx
|
||||
movq $0xA040608020C0E000,%rax
|
||||
movdqu 48(%rsi),%xmm14
|
||||
@ -610,7 +602,8 @@ L$odd_tail:
|
||||
L$done:
|
||||
.byte 102,65,15,56,0,194
|
||||
movdqu %xmm0,(%rdi)
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
|
||||
|
||||
|
||||
.globl _gcm_init_avx
|
||||
@ -619,6 +612,7 @@ L$done:
|
||||
.p2align 5
|
||||
_gcm_init_avx:
|
||||
|
||||
_CET_ENDBR
|
||||
vzeroupper
|
||||
|
||||
vmovdqu (%rsi),%xmm2
|
||||
@ -720,7 +714,8 @@ L$init_start_avx:
|
||||
vmovdqu %xmm5,-16(%rdi)
|
||||
|
||||
vzeroupper
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
|
||||
|
||||
|
||||
.globl _gcm_gmult_avx
|
||||
@ -729,6 +724,7 @@ L$init_start_avx:
|
||||
.p2align 5
|
||||
_gcm_gmult_avx:
|
||||
|
||||
_CET_ENDBR
|
||||
jmp L$_gmult_clmul
|
||||
|
||||
|
||||
@ -738,6 +734,7 @@ _gcm_gmult_avx:
|
||||
.p2align 5
|
||||
_gcm_ghash_avx:
|
||||
|
||||
_CET_ENDBR
|
||||
vzeroupper
|
||||
|
||||
vmovdqu (%rdi),%xmm10
|
||||
@ -1108,9 +1105,11 @@ L$tail_no_xor_avx:
|
||||
vpshufb %xmm13,%xmm10,%xmm10
|
||||
vmovdqu %xmm10,(%rdi)
|
||||
vzeroupper
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
|
||||
|
||||
|
||||
.section __DATA,__const
|
||||
.p2align 6
|
||||
L$bswap_mask:
|
||||
.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
|
||||
@ -1118,10 +1117,9 @@ L$0x1c2_polynomial:
|
||||
.byte 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2
|
||||
L$7_mask:
|
||||
.long 7,0,7,0
|
||||
L$7_mask_poly:
|
||||
.long 7,0,450,0
|
||||
.p2align 6
|
||||
|
||||
.byte 71,72,65,83,72,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
|
||||
.p2align 6
|
||||
.text
|
||||
#endif
|
||||
@ -1,16 +1,9 @@
|
||||
# This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
# source tree. Do not edit by hand.
|
||||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#if defined(__has_feature)
|
||||
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
|
||||
#define OPENSSL_NO_ASM
|
||||
#endif
|
||||
#endif
|
||||
#include <openssl/asm_base.h>
|
||||
|
||||
#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) && defined(__APPLE__)
|
||||
.text
|
||||
.p2align 4
|
||||
|
||||
@ -19,6 +12,7 @@
|
||||
|
||||
_md5_block_asm_data_order:
|
||||
|
||||
_CET_ENDBR
|
||||
pushq %rbp
|
||||
|
||||
pushq %rbx
|
||||
@ -690,7 +684,7 @@ L$end:
|
||||
addq $40,%rsp
|
||||
|
||||
L$epilogue:
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
|
||||
|
||||
#endif
|
||||
@ -1,20 +1,14 @@
|
||||
# This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
# source tree. Do not edit by hand.
|
||||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#if defined(__has_feature)
|
||||
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
|
||||
#define OPENSSL_NO_ASM
|
||||
#endif
|
||||
#endif
|
||||
#include <openssl/asm_base.h>
|
||||
|
||||
#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) && defined(__APPLE__)
|
||||
.text
|
||||
|
||||
|
||||
|
||||
.section __DATA,__const
|
||||
.p2align 6
|
||||
L$poly:
|
||||
.quad 0xffffffffffffffff, 0x00000000ffffffff, 0x0000000000000000, 0xffffffff00000001
|
||||
@ -33,6 +27,7 @@ L$ord:
|
||||
.quad 0xf3b9cac2fc632551, 0xbce6faada7179e84, 0xffffffffffffffff, 0xffffffff00000000
|
||||
L$ordK:
|
||||
.quad 0xccd1c8aaee00bc4f
|
||||
.text
|
||||
|
||||
|
||||
|
||||
@ -42,6 +37,7 @@ L$ordK:
|
||||
.p2align 5
|
||||
_ecp_nistz256_neg:
|
||||
|
||||
_CET_ENDBR
|
||||
pushq %r12
|
||||
|
||||
pushq %r13
|
||||
@ -87,7 +83,7 @@ L$neg_body:
|
||||
leaq 16(%rsp),%rsp
|
||||
|
||||
L$neg_epilogue:
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
|
||||
|
||||
|
||||
@ -102,6 +98,7 @@ L$neg_epilogue:
|
||||
.p2align 5
|
||||
_ecp_nistz256_ord_mul_mont:
|
||||
|
||||
_CET_ENDBR
|
||||
leaq _OPENSSL_ia32cap_P(%rip),%rcx
|
||||
movq 8(%rcx),%rcx
|
||||
andl $0x80100,%ecx
|
||||
@ -414,7 +411,7 @@ L$ord_mul_body:
|
||||
leaq 48(%rsp),%rsp
|
||||
|
||||
L$ord_mul_epilogue:
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
|
||||
|
||||
|
||||
@ -430,6 +427,7 @@ L$ord_mul_epilogue:
|
||||
.p2align 5
|
||||
_ecp_nistz256_ord_sqr_mont:
|
||||
|
||||
_CET_ENDBR
|
||||
leaq _OPENSSL_ia32cap_P(%rip),%rcx
|
||||
movq 8(%rcx),%rcx
|
||||
andl $0x80100,%ecx
|
||||
@ -712,7 +710,7 @@ L$oop_ord_sqr:
|
||||
leaq 48(%rsp),%rsp
|
||||
|
||||
L$ord_sqr_epilogue:
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
|
||||
|
||||
|
||||
@ -948,7 +946,7 @@ L$ord_mulx_body:
|
||||
leaq 48(%rsp),%rsp
|
||||
|
||||
L$ord_mulx_epilogue:
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
|
||||
|
||||
|
||||
@ -1156,7 +1154,7 @@ L$oop_ord_sqrx:
|
||||
leaq 48(%rsp),%rsp
|
||||
|
||||
L$ord_sqrx_epilogue:
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
|
||||
|
||||
|
||||
@ -1171,6 +1169,7 @@ L$ord_sqrx_epilogue:
|
||||
.p2align 5
|
||||
_ecp_nistz256_mul_mont:
|
||||
|
||||
_CET_ENDBR
|
||||
leaq _OPENSSL_ia32cap_P(%rip),%rcx
|
||||
movq 8(%rcx),%rcx
|
||||
andl $0x80100,%ecx
|
||||
@ -1227,7 +1226,7 @@ L$mul_mont_done:
|
||||
leaq 48(%rsp),%rsp
|
||||
|
||||
L$mul_epilogue:
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
|
||||
|
||||
|
||||
@ -1445,7 +1444,7 @@ __ecp_nistz256_mul_montq:
|
||||
movq %r8,16(%rdi)
|
||||
movq %r9,24(%rdi)
|
||||
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
|
||||
|
||||
|
||||
@ -1462,6 +1461,7 @@ __ecp_nistz256_mul_montq:
|
||||
.p2align 5
|
||||
_ecp_nistz256_sqr_mont:
|
||||
|
||||
_CET_ENDBR
|
||||
leaq _OPENSSL_ia32cap_P(%rip),%rcx
|
||||
movq 8(%rcx),%rcx
|
||||
andl $0x80100,%ecx
|
||||
@ -1513,7 +1513,7 @@ L$sqr_mont_done:
|
||||
leaq 48(%rsp),%rsp
|
||||
|
||||
L$sqr_epilogue:
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
|
||||
|
||||
|
||||
@ -1677,7 +1677,7 @@ __ecp_nistz256_sqr_montq:
|
||||
movq %r14,16(%rdi)
|
||||
movq %r15,24(%rdi)
|
||||
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
|
||||
|
||||
|
||||
@ -1845,7 +1845,7 @@ __ecp_nistz256_mul_montx:
|
||||
movq %r8,16(%rdi)
|
||||
movq %r9,24(%rdi)
|
||||
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
|
||||
|
||||
|
||||
@ -1975,7 +1975,7 @@ __ecp_nistz256_sqr_montx:
|
||||
movq %r14,16(%rdi)
|
||||
movq %r15,24(%rdi)
|
||||
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
|
||||
|
||||
|
||||
@ -1986,6 +1986,7 @@ __ecp_nistz256_sqr_montx:
|
||||
.p2align 5
|
||||
_ecp_nistz256_select_w5:
|
||||
|
||||
_CET_ENDBR
|
||||
leaq _OPENSSL_ia32cap_P(%rip),%rax
|
||||
movq 8(%rax),%rax
|
||||
testl $32,%eax
|
||||
@ -2040,7 +2041,7 @@ L$select_loop_sse_w5:
|
||||
movdqu %xmm5,48(%rdi)
|
||||
movdqu %xmm6,64(%rdi)
|
||||
movdqu %xmm7,80(%rdi)
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
|
||||
L$SEH_end_ecp_nistz256_select_w5:
|
||||
|
||||
@ -2053,6 +2054,7 @@ L$SEH_end_ecp_nistz256_select_w5:
|
||||
.p2align 5
|
||||
_ecp_nistz256_select_w7:
|
||||
|
||||
_CET_ENDBR
|
||||
leaq _OPENSSL_ia32cap_P(%rip),%rax
|
||||
movq 8(%rax),%rax
|
||||
testl $32,%eax
|
||||
@ -2096,7 +2098,7 @@ L$select_loop_sse_w7:
|
||||
movdqu %xmm3,16(%rdi)
|
||||
movdqu %xmm4,32(%rdi)
|
||||
movdqu %xmm5,48(%rdi)
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
|
||||
L$SEH_end_ecp_nistz256_select_w7:
|
||||
|
||||
@ -2159,7 +2161,7 @@ L$select_loop_avx2_w5:
|
||||
vmovdqu %ymm3,32(%rdi)
|
||||
vmovdqu %ymm4,64(%rdi)
|
||||
vzeroupper
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
|
||||
L$SEH_end_ecp_nistz256_avx2_select_w5:
|
||||
|
||||
@ -2173,6 +2175,7 @@ L$SEH_end_ecp_nistz256_avx2_select_w5:
|
||||
_ecp_nistz256_avx2_select_w7:
|
||||
|
||||
L$avx2_select_w7:
|
||||
_CET_ENDBR
|
||||
vzeroupper
|
||||
vmovdqa L$Three(%rip),%ymm0
|
||||
|
||||
@ -2240,7 +2243,7 @@ L$select_loop_avx2_w7:
|
||||
vmovdqu %ymm2,0(%rdi)
|
||||
vmovdqu %ymm3,32(%rdi)
|
||||
vzeroupper
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
|
||||
L$SEH_end_ecp_nistz256_avx2_select_w7:
|
||||
|
||||
@ -2274,7 +2277,7 @@ __ecp_nistz256_add_toq:
|
||||
movq %r8,16(%rdi)
|
||||
movq %r9,24(%rdi)
|
||||
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
|
||||
|
||||
|
||||
@ -2307,7 +2310,7 @@ __ecp_nistz256_sub_fromq:
|
||||
movq %r8,16(%rdi)
|
||||
movq %r9,24(%rdi)
|
||||
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
|
||||
|
||||
|
||||
@ -2336,7 +2339,7 @@ __ecp_nistz256_subq:
|
||||
cmovnzq %rcx,%r8
|
||||
cmovnzq %r10,%r9
|
||||
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
|
||||
|
||||
|
||||
@ -2370,7 +2373,7 @@ __ecp_nistz256_mul_by_2q:
|
||||
movq %r8,16(%rdi)
|
||||
movq %r9,24(%rdi)
|
||||
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
|
||||
|
||||
.globl _ecp_nistz256_point_double
|
||||
@ -2379,6 +2382,7 @@ __ecp_nistz256_mul_by_2q:
|
||||
.p2align 5
|
||||
_ecp_nistz256_point_double:
|
||||
|
||||
_CET_ENDBR
|
||||
leaq _OPENSSL_ia32cap_P(%rip),%rcx
|
||||
movq 8(%rcx),%rcx
|
||||
andl $0x80100,%ecx
|
||||
@ -2598,7 +2602,7 @@ L$point_double_shortcutq:
|
||||
leaq (%rsi),%rsp
|
||||
|
||||
L$point_doubleq_epilogue:
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
|
||||
|
||||
.globl _ecp_nistz256_point_add
|
||||
@ -2607,6 +2611,7 @@ L$point_doubleq_epilogue:
|
||||
.p2align 5
|
||||
_ecp_nistz256_point_add:
|
||||
|
||||
_CET_ENDBR
|
||||
leaq _OPENSSL_ia32cap_P(%rip),%rcx
|
||||
movq 8(%rcx),%rcx
|
||||
andl $0x80100,%ecx
|
||||
@ -3029,7 +3034,7 @@ L$add_doneq:
|
||||
leaq (%rsi),%rsp
|
||||
|
||||
L$point_addq_epilogue:
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
|
||||
|
||||
.globl _ecp_nistz256_point_add_affine
|
||||
@ -3038,6 +3043,7 @@ L$point_addq_epilogue:
|
||||
.p2align 5
|
||||
_ecp_nistz256_point_add_affine:
|
||||
|
||||
_CET_ENDBR
|
||||
leaq _OPENSSL_ia32cap_P(%rip),%rcx
|
||||
movq 8(%rcx),%rcx
|
||||
andl $0x80100,%ecx
|
||||
@ -3357,7 +3363,7 @@ L$add_affineq_body:
|
||||
leaq (%rsi),%rsp
|
||||
|
||||
L$add_affineq_epilogue:
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
|
||||
|
||||
|
||||
@ -3391,7 +3397,7 @@ __ecp_nistz256_add_tox:
|
||||
movq %r8,16(%rdi)
|
||||
movq %r9,24(%rdi)
|
||||
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
|
||||
|
||||
|
||||
@ -3426,7 +3432,7 @@ __ecp_nistz256_sub_fromx:
|
||||
movq %r8,16(%rdi)
|
||||
movq %r9,24(%rdi)
|
||||
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
|
||||
|
||||
|
||||
@ -3457,7 +3463,7 @@ __ecp_nistz256_subx:
|
||||
cmovcq %rcx,%r8
|
||||
cmovcq %r10,%r9
|
||||
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
|
||||
|
||||
|
||||
@ -3492,7 +3498,7 @@ __ecp_nistz256_mul_by_2x:
|
||||
movq %r8,16(%rdi)
|
||||
movq %r9,24(%rdi)
|
||||
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
|
||||
|
||||
|
||||
@ -3714,7 +3720,7 @@ L$point_double_shortcutx:
|
||||
leaq (%rsi),%rsp
|
||||
|
||||
L$point_doublex_epilogue:
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
|
||||
|
||||
|
||||
@ -4139,7 +4145,7 @@ L$add_donex:
|
||||
leaq (%rsi),%rsp
|
||||
|
||||
L$point_addx_epilogue:
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
|
||||
|
||||
|
||||
@ -4461,7 +4467,7 @@ L$add_affinex_body:
|
||||
leaq (%rsi),%rsp
|
||||
|
||||
L$add_affinex_epilogue:
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
|
||||
|
||||
#endif
|
||||
@ -1,16 +1,9 @@
|
||||
# This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
# source tree. Do not edit by hand.
|
||||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#if defined(__has_feature)
|
||||
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
|
||||
#define OPENSSL_NO_ASM
|
||||
#endif
|
||||
#endif
|
||||
#include <openssl/asm_base.h>
|
||||
|
||||
#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) && defined(__APPLE__)
|
||||
.text
|
||||
|
||||
|
||||
@ -20,6 +13,7 @@
|
||||
.p2align 5
|
||||
_beeu_mod_inverse_vartime:
|
||||
|
||||
_CET_ENDBR
|
||||
pushq %rbp
|
||||
|
||||
pushq %r12
|
||||
@ -321,7 +315,7 @@ L$beeu_finish:
|
||||
|
||||
popq %rbp
|
||||
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
|
||||
|
||||
|
||||
@ -1,16 +1,9 @@
|
||||
# This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
# source tree. Do not edit by hand.
|
||||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#if defined(__has_feature)
|
||||
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
|
||||
#define OPENSSL_NO_ASM
|
||||
#endif
|
||||
#endif
|
||||
#include <openssl/asm_base.h>
|
||||
|
||||
#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) && defined(__APPLE__)
|
||||
.text
|
||||
|
||||
|
||||
@ -22,12 +15,13 @@
|
||||
.p2align 4
|
||||
_CRYPTO_rdrand:
|
||||
|
||||
_CET_ENDBR
|
||||
xorq %rax,%rax
|
||||
.byte 72,15,199,242
|
||||
|
||||
adcq %rax,%rax
|
||||
movq %rdx,0(%rdi)
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
|
||||
|
||||
|
||||
@ -41,6 +35,7 @@ _CRYPTO_rdrand:
|
||||
.p2align 4
|
||||
_CRYPTO_rdrand_multiple8_buf:
|
||||
|
||||
_CET_ENDBR
|
||||
testq %rsi,%rsi
|
||||
jz L$out
|
||||
movq $8,%rdx
|
||||
@ -53,10 +48,10 @@ L$loop:
|
||||
jnz L$loop
|
||||
L$out:
|
||||
movq $1,%rax
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
L$err:
|
||||
xorq %rax,%rax
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
|
||||
|
||||
#endif
|
||||
@ -1,16 +1,9 @@
|
||||
# This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
# source tree. Do not edit by hand.
|
||||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#if defined(__has_feature)
|
||||
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
|
||||
#define OPENSSL_NO_ASM
|
||||
#endif
|
||||
#endif
|
||||
#include <openssl/asm_base.h>
|
||||
|
||||
#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) && defined(__APPLE__)
|
||||
.text
|
||||
|
||||
.globl _rsaz_1024_sqr_avx2
|
||||
@ -19,6 +12,7 @@
|
||||
.p2align 6
|
||||
_rsaz_1024_sqr_avx2:
|
||||
|
||||
_CET_ENDBR
|
||||
leaq (%rsp),%rax
|
||||
|
||||
pushq %rbx
|
||||
@ -664,7 +658,7 @@ L$OOP_REDUCE_1024:
|
||||
leaq (%rax),%rsp
|
||||
|
||||
L$sqr_1024_epilogue:
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
|
||||
|
||||
.globl _rsaz_1024_mul_avx2
|
||||
@ -673,6 +667,7 @@ L$sqr_1024_epilogue:
|
||||
.p2align 6
|
||||
_rsaz_1024_mul_avx2:
|
||||
|
||||
_CET_ENDBR
|
||||
leaq (%rsp),%rax
|
||||
|
||||
pushq %rbx
|
||||
@ -1220,7 +1215,7 @@ L$oop_mul_1024:
|
||||
leaq (%rax),%rsp
|
||||
|
||||
L$mul_1024_epilogue:
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
|
||||
|
||||
.globl _rsaz_1024_red2norm_avx2
|
||||
@ -1229,6 +1224,7 @@ L$mul_1024_epilogue:
|
||||
.p2align 5
|
||||
_rsaz_1024_red2norm_avx2:
|
||||
|
||||
_CET_ENDBR
|
||||
subq $-128,%rsi
|
||||
xorq %rax,%rax
|
||||
movq -128(%rsi),%r8
|
||||
@ -1419,7 +1415,7 @@ _rsaz_1024_red2norm_avx2:
|
||||
adcq $0,%r11
|
||||
movq %rax,120(%rdi)
|
||||
movq %r11,%rax
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
|
||||
|
||||
|
||||
@ -1429,6 +1425,7 @@ _rsaz_1024_red2norm_avx2:
|
||||
.p2align 5
|
||||
_rsaz_1024_norm2red_avx2:
|
||||
|
||||
_CET_ENDBR
|
||||
subq $-128,%rdi
|
||||
movq (%rsi),%r8
|
||||
movl $0x1fffffff,%eax
|
||||
@ -1580,7 +1577,7 @@ _rsaz_1024_norm2red_avx2:
|
||||
movq %r8,168(%rdi)
|
||||
movq %r8,176(%rdi)
|
||||
movq %r8,184(%rdi)
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
|
||||
|
||||
.globl _rsaz_1024_scatter5_avx2
|
||||
@ -1589,6 +1586,7 @@ _rsaz_1024_norm2red_avx2:
|
||||
.p2align 5
|
||||
_rsaz_1024_scatter5_avx2:
|
||||
|
||||
_CET_ENDBR
|
||||
vzeroupper
|
||||
vmovdqu L$scatter_permd(%rip),%ymm5
|
||||
shll $4,%edx
|
||||
@ -1607,7 +1605,7 @@ L$oop_scatter_1024:
|
||||
jnz L$oop_scatter_1024
|
||||
|
||||
vzeroupper
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
|
||||
|
||||
|
||||
@ -1617,6 +1615,7 @@ L$oop_scatter_1024:
|
||||
.p2align 5
|
||||
_rsaz_1024_gather5_avx2:
|
||||
|
||||
_CET_ENDBR
|
||||
vzeroupper
|
||||
movq %rsp,%r11
|
||||
|
||||
@ -1729,10 +1728,11 @@ L$oop_gather_1024:
|
||||
vzeroupper
|
||||
leaq (%r11),%rsp
|
||||
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
|
||||
L$SEH_end_rsaz_1024_gather5:
|
||||
|
||||
.section __DATA,__const
|
||||
.p2align 6
|
||||
L$and_mask:
|
||||
.quad 0x1fffffff,0x1fffffff,0x1fffffff,0x1fffffff
|
||||
@ -1745,4 +1745,5 @@ L$inc:
|
||||
.long 2,2,2,2, 3,3,3,3
|
||||
.long 4,4,4,4, 4,4,4,4
|
||||
.p2align 6
|
||||
.text
|
||||
#endif
|
||||
File diff suppressed because it is too large
Load Diff
@ -1,36 +1,18 @@
|
||||
# This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
# source tree. Do not edit by hand.
|
||||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#if defined(__has_feature)
|
||||
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
|
||||
#define OPENSSL_NO_ASM
|
||||
#endif
|
||||
#endif
|
||||
#include <openssl/asm_base.h>
|
||||
|
||||
#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) && defined(__APPLE__)
|
||||
.text
|
||||
|
||||
|
||||
.globl _sha256_block_data_order
|
||||
.private_extern _sha256_block_data_order
|
||||
.globl _sha256_block_data_order_nohw
|
||||
.private_extern _sha256_block_data_order_nohw
|
||||
|
||||
.p2align 4
|
||||
_sha256_block_data_order:
|
||||
_sha256_block_data_order_nohw:
|
||||
|
||||
leaq _OPENSSL_ia32cap_P(%rip),%r11
|
||||
movl 0(%r11),%r9d
|
||||
movl 4(%r11),%r10d
|
||||
movl 8(%r11),%r11d
|
||||
andl $1073741824,%r9d
|
||||
andl $268435968,%r10d
|
||||
orl %r9d,%r10d
|
||||
cmpl $1342177792,%r10d
|
||||
je L$avx_shortcut
|
||||
testl $512,%r10d
|
||||
jnz L$ssse3_shortcut
|
||||
_CET_ENDBR
|
||||
movq %rsp,%rax
|
||||
|
||||
pushq %rbx
|
||||
@ -1734,9 +1716,10 @@ L$rounds_16_xx:
|
||||
leaq (%rsi),%rsp
|
||||
|
||||
L$epilogue:
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
|
||||
|
||||
.section __DATA,__const
|
||||
.p2align 6
|
||||
|
||||
K256:
|
||||
@ -1780,11 +1763,225 @@ K256:
|
||||
.long 0xffffffff,0xffffffff,0x03020100,0x0b0a0908
|
||||
.long 0xffffffff,0xffffffff,0x03020100,0x0b0a0908
|
||||
.byte 83,72,65,50,53,54,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
|
||||
.text
|
||||
.globl _sha256_block_data_order_hw
|
||||
.private_extern _sha256_block_data_order_hw
|
||||
|
||||
.p2align 6
|
||||
sha256_block_data_order_ssse3:
|
||||
_sha256_block_data_order_hw:
|
||||
|
||||
L$ssse3_shortcut:
|
||||
_CET_ENDBR
|
||||
leaq K256+128(%rip),%rcx
|
||||
movdqu (%rdi),%xmm1
|
||||
movdqu 16(%rdi),%xmm2
|
||||
movdqa 512-128(%rcx),%xmm7
|
||||
|
||||
pshufd $0x1b,%xmm1,%xmm0
|
||||
pshufd $0xb1,%xmm1,%xmm1
|
||||
pshufd $0x1b,%xmm2,%xmm2
|
||||
movdqa %xmm7,%xmm8
|
||||
.byte 102,15,58,15,202,8
|
||||
punpcklqdq %xmm0,%xmm2
|
||||
jmp L$oop_shaext
|
||||
|
||||
.p2align 4
|
||||
L$oop_shaext:
|
||||
movdqu (%rsi),%xmm3
|
||||
movdqu 16(%rsi),%xmm4
|
||||
movdqu 32(%rsi),%xmm5
|
||||
.byte 102,15,56,0,223
|
||||
movdqu 48(%rsi),%xmm6
|
||||
|
||||
movdqa 0-128(%rcx),%xmm0
|
||||
paddd %xmm3,%xmm0
|
||||
.byte 102,15,56,0,231
|
||||
movdqa %xmm2,%xmm10
|
||||
.byte 15,56,203,209
|
||||
pshufd $0x0e,%xmm0,%xmm0
|
||||
nop
|
||||
movdqa %xmm1,%xmm9
|
||||
.byte 15,56,203,202
|
||||
|
||||
movdqa 32-128(%rcx),%xmm0
|
||||
paddd %xmm4,%xmm0
|
||||
.byte 102,15,56,0,239
|
||||
.byte 15,56,203,209
|
||||
pshufd $0x0e,%xmm0,%xmm0
|
||||
leaq 64(%rsi),%rsi
|
||||
.byte 15,56,204,220
|
||||
.byte 15,56,203,202
|
||||
|
||||
movdqa 64-128(%rcx),%xmm0
|
||||
paddd %xmm5,%xmm0
|
||||
.byte 102,15,56,0,247
|
||||
.byte 15,56,203,209
|
||||
pshufd $0x0e,%xmm0,%xmm0
|
||||
movdqa %xmm6,%xmm7
|
||||
.byte 102,15,58,15,253,4
|
||||
nop
|
||||
paddd %xmm7,%xmm3
|
||||
.byte 15,56,204,229
|
||||
.byte 15,56,203,202
|
||||
|
||||
movdqa 96-128(%rcx),%xmm0
|
||||
paddd %xmm6,%xmm0
|
||||
.byte 15,56,205,222
|
||||
.byte 15,56,203,209
|
||||
pshufd $0x0e,%xmm0,%xmm0
|
||||
movdqa %xmm3,%xmm7
|
||||
.byte 102,15,58,15,254,4
|
||||
nop
|
||||
paddd %xmm7,%xmm4
|
||||
.byte 15,56,204,238
|
||||
.byte 15,56,203,202
|
||||
movdqa 128-128(%rcx),%xmm0
|
||||
paddd %xmm3,%xmm0
|
||||
.byte 15,56,205,227
|
||||
.byte 15,56,203,209
|
||||
pshufd $0x0e,%xmm0,%xmm0
|
||||
movdqa %xmm4,%xmm7
|
||||
.byte 102,15,58,15,251,4
|
||||
nop
|
||||
paddd %xmm7,%xmm5
|
||||
.byte 15,56,204,243
|
||||
.byte 15,56,203,202
|
||||
movdqa 160-128(%rcx),%xmm0
|
||||
paddd %xmm4,%xmm0
|
||||
.byte 15,56,205,236
|
||||
.byte 15,56,203,209
|
||||
pshufd $0x0e,%xmm0,%xmm0
|
||||
movdqa %xmm5,%xmm7
|
||||
.byte 102,15,58,15,252,4
|
||||
nop
|
||||
paddd %xmm7,%xmm6
|
||||
.byte 15,56,204,220
|
||||
.byte 15,56,203,202
|
||||
movdqa 192-128(%rcx),%xmm0
|
||||
paddd %xmm5,%xmm0
|
||||
.byte 15,56,205,245
|
||||
.byte 15,56,203,209
|
||||
pshufd $0x0e,%xmm0,%xmm0
|
||||
movdqa %xmm6,%xmm7
|
||||
.byte 102,15,58,15,253,4
|
||||
nop
|
||||
paddd %xmm7,%xmm3
|
||||
.byte 15,56,204,229
|
||||
.byte 15,56,203,202
|
||||
movdqa 224-128(%rcx),%xmm0
|
||||
paddd %xmm6,%xmm0
|
||||
.byte 15,56,205,222
|
||||
.byte 15,56,203,209
|
||||
pshufd $0x0e,%xmm0,%xmm0
|
||||
movdqa %xmm3,%xmm7
|
||||
.byte 102,15,58,15,254,4
|
||||
nop
|
||||
paddd %xmm7,%xmm4
|
||||
.byte 15,56,204,238
|
||||
.byte 15,56,203,202
|
||||
movdqa 256-128(%rcx),%xmm0
|
||||
paddd %xmm3,%xmm0
|
||||
.byte 15,56,205,227
|
||||
.byte 15,56,203,209
|
||||
pshufd $0x0e,%xmm0,%xmm0
|
||||
movdqa %xmm4,%xmm7
|
||||
.byte 102,15,58,15,251,4
|
||||
nop
|
||||
paddd %xmm7,%xmm5
|
||||
.byte 15,56,204,243
|
||||
.byte 15,56,203,202
|
||||
movdqa 288-128(%rcx),%xmm0
|
||||
paddd %xmm4,%xmm0
|
||||
.byte 15,56,205,236
|
||||
.byte 15,56,203,209
|
||||
pshufd $0x0e,%xmm0,%xmm0
|
||||
movdqa %xmm5,%xmm7
|
||||
.byte 102,15,58,15,252,4
|
||||
nop
|
||||
paddd %xmm7,%xmm6
|
||||
.byte 15,56,204,220
|
||||
.byte 15,56,203,202
|
||||
movdqa 320-128(%rcx),%xmm0
|
||||
paddd %xmm5,%xmm0
|
||||
.byte 15,56,205,245
|
||||
.byte 15,56,203,209
|
||||
pshufd $0x0e,%xmm0,%xmm0
|
||||
movdqa %xmm6,%xmm7
|
||||
.byte 102,15,58,15,253,4
|
||||
nop
|
||||
paddd %xmm7,%xmm3
|
||||
.byte 15,56,204,229
|
||||
.byte 15,56,203,202
|
||||
movdqa 352-128(%rcx),%xmm0
|
||||
paddd %xmm6,%xmm0
|
||||
.byte 15,56,205,222
|
||||
.byte 15,56,203,209
|
||||
pshufd $0x0e,%xmm0,%xmm0
|
||||
movdqa %xmm3,%xmm7
|
||||
.byte 102,15,58,15,254,4
|
||||
nop
|
||||
paddd %xmm7,%xmm4
|
||||
.byte 15,56,204,238
|
||||
.byte 15,56,203,202
|
||||
movdqa 384-128(%rcx),%xmm0
|
||||
paddd %xmm3,%xmm0
|
||||
.byte 15,56,205,227
|
||||
.byte 15,56,203,209
|
||||
pshufd $0x0e,%xmm0,%xmm0
|
||||
movdqa %xmm4,%xmm7
|
||||
.byte 102,15,58,15,251,4
|
||||
nop
|
||||
paddd %xmm7,%xmm5
|
||||
.byte 15,56,204,243
|
||||
.byte 15,56,203,202
|
||||
movdqa 416-128(%rcx),%xmm0
|
||||
paddd %xmm4,%xmm0
|
||||
.byte 15,56,205,236
|
||||
.byte 15,56,203,209
|
||||
pshufd $0x0e,%xmm0,%xmm0
|
||||
movdqa %xmm5,%xmm7
|
||||
.byte 102,15,58,15,252,4
|
||||
.byte 15,56,203,202
|
||||
paddd %xmm7,%xmm6
|
||||
|
||||
movdqa 448-128(%rcx),%xmm0
|
||||
paddd %xmm5,%xmm0
|
||||
.byte 15,56,203,209
|
||||
pshufd $0x0e,%xmm0,%xmm0
|
||||
.byte 15,56,205,245
|
||||
movdqa %xmm8,%xmm7
|
||||
.byte 15,56,203,202
|
||||
|
||||
movdqa 480-128(%rcx),%xmm0
|
||||
paddd %xmm6,%xmm0
|
||||
nop
|
||||
.byte 15,56,203,209
|
||||
pshufd $0x0e,%xmm0,%xmm0
|
||||
decq %rdx
|
||||
nop
|
||||
.byte 15,56,203,202
|
||||
|
||||
paddd %xmm10,%xmm2
|
||||
paddd %xmm9,%xmm1
|
||||
jnz L$oop_shaext
|
||||
|
||||
pshufd $0xb1,%xmm2,%xmm2
|
||||
pshufd $0x1b,%xmm1,%xmm7
|
||||
pshufd $0xb1,%xmm1,%xmm1
|
||||
punpckhqdq %xmm2,%xmm1
|
||||
.byte 102,15,58,15,215,8
|
||||
|
||||
movdqu %xmm1,(%rdi)
|
||||
movdqu %xmm2,16(%rdi)
|
||||
ret
|
||||
|
||||
|
||||
.globl _sha256_block_data_order_ssse3
|
||||
.private_extern _sha256_block_data_order_ssse3
|
||||
|
||||
.p2align 6
|
||||
_sha256_block_data_order_ssse3:
|
||||
|
||||
_CET_ENDBR
|
||||
movq %rsp,%rax
|
||||
|
||||
pushq %rbx
|
||||
@ -2890,14 +3087,16 @@ L$ssse3_00_47:
|
||||
leaq (%rsi),%rsp
|
||||
|
||||
L$epilogue_ssse3:
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
|
||||
|
||||
.globl _sha256_block_data_order_avx
|
||||
.private_extern _sha256_block_data_order_avx
|
||||
|
||||
.p2align 6
|
||||
sha256_block_data_order_avx:
|
||||
_sha256_block_data_order_avx:
|
||||
|
||||
L$avx_shortcut:
|
||||
_CET_ENDBR
|
||||
movq %rsp,%rax
|
||||
|
||||
pushq %rbx
|
||||
@ -3965,7 +4164,7 @@ L$avx_00_47:
|
||||
leaq (%rsi),%rsp
|
||||
|
||||
L$epilogue_avx:
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
|
||||
|
||||
#endif
|
||||
@ -1,34 +1,18 @@
|
||||
# This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
# source tree. Do not edit by hand.
|
||||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#if defined(__has_feature)
|
||||
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
|
||||
#define OPENSSL_NO_ASM
|
||||
#endif
|
||||
#endif
|
||||
#include <openssl/asm_base.h>
|
||||
|
||||
#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) && defined(__APPLE__)
|
||||
.text
|
||||
|
||||
|
||||
.globl _sha512_block_data_order
|
||||
.private_extern _sha512_block_data_order
|
||||
.globl _sha512_block_data_order_nohw
|
||||
.private_extern _sha512_block_data_order_nohw
|
||||
|
||||
.p2align 4
|
||||
_sha512_block_data_order:
|
||||
_sha512_block_data_order_nohw:
|
||||
|
||||
leaq _OPENSSL_ia32cap_P(%rip),%r11
|
||||
movl 0(%r11),%r9d
|
||||
movl 4(%r11),%r10d
|
||||
movl 8(%r11),%r11d
|
||||
andl $1073741824,%r9d
|
||||
andl $268435968,%r10d
|
||||
orl %r9d,%r10d
|
||||
cmpl $1342177792,%r10d
|
||||
je L$avx_shortcut
|
||||
_CET_ENDBR
|
||||
movq %rsp,%rax
|
||||
|
||||
pushq %rbx
|
||||
@ -1732,9 +1716,10 @@ L$rounds_16_xx:
|
||||
leaq (%rsi),%rsp
|
||||
|
||||
L$epilogue:
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
|
||||
|
||||
.section __DATA,__const
|
||||
.p2align 6
|
||||
|
||||
K512:
|
||||
@ -1822,11 +1807,14 @@ K512:
|
||||
.quad 0x0001020304050607,0x08090a0b0c0d0e0f
|
||||
.quad 0x0001020304050607,0x08090a0b0c0d0e0f
|
||||
.byte 83,72,65,53,49,50,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
|
||||
.text
|
||||
.globl _sha512_block_data_order_avx
|
||||
.private_extern _sha512_block_data_order_avx
|
||||
|
||||
.p2align 6
|
||||
sha512_block_data_order_avx:
|
||||
_sha512_block_data_order_avx:
|
||||
|
||||
L$avx_shortcut:
|
||||
_CET_ENDBR
|
||||
movq %rsp,%rax
|
||||
|
||||
pushq %rbx
|
||||
@ -2984,7 +2972,7 @@ L$avx_00_47:
|
||||
leaq (%rsi),%rsp
|
||||
|
||||
L$epilogue_avx:
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
|
||||
|
||||
#endif
|
||||
@ -1,16 +1,9 @@
|
||||
# This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
# source tree. Do not edit by hand.
|
||||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#if defined(__has_feature)
|
||||
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
|
||||
#define OPENSSL_NO_ASM
|
||||
#endif
|
||||
#endif
|
||||
#include <openssl/asm_base.h>
|
||||
|
||||
#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) && defined(__APPLE__)
|
||||
.text
|
||||
|
||||
|
||||
@ -111,7 +104,7 @@ L$enc_entry:
|
||||
movdqa 64(%r11,%r10,1),%xmm1
|
||||
pxor %xmm4,%xmm0
|
||||
.byte 102,15,56,0,193
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
|
||||
|
||||
|
||||
@ -286,7 +279,7 @@ L$enc2x_entry:
|
||||
pxor %xmm12,%xmm6
|
||||
.byte 102,15,56,0,193
|
||||
.byte 102,15,56,0,241
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
|
||||
|
||||
|
||||
@ -394,7 +387,7 @@ L$dec_entry:
|
||||
.byte 102,15,56,0,195
|
||||
pxor %xmm4,%xmm0
|
||||
.byte 102,15,56,0,194
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
|
||||
|
||||
|
||||
@ -572,7 +565,7 @@ L$schedule_mangle_last_dec:
|
||||
pxor %xmm5,%xmm5
|
||||
pxor %xmm6,%xmm6
|
||||
pxor %xmm7,%xmm7
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
|
||||
|
||||
|
||||
@ -601,7 +594,7 @@ _vpaes_schedule_192_smear:
|
||||
pxor %xmm0,%xmm6
|
||||
movdqa %xmm6,%xmm0
|
||||
movhlps %xmm1,%xmm6
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
|
||||
|
||||
|
||||
@ -679,7 +672,7 @@ _vpaes_schedule_low_round:
|
||||
|
||||
pxor %xmm7,%xmm0
|
||||
movdqa %xmm0,%xmm7
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
|
||||
|
||||
|
||||
@ -705,7 +698,7 @@ _vpaes_schedule_transform:
|
||||
movdqa 16(%r11),%xmm0
|
||||
.byte 102,15,56,0,193
|
||||
pxor %xmm2,%xmm0
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
|
||||
|
||||
|
||||
@ -799,7 +792,7 @@ L$schedule_mangle_both:
|
||||
addq $-16,%r8
|
||||
andq $0x30,%r8
|
||||
movdqu %xmm3,(%rdx)
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
|
||||
|
||||
|
||||
@ -812,6 +805,7 @@ L$schedule_mangle_both:
|
||||
.p2align 4
|
||||
_vpaes_set_encrypt_key:
|
||||
|
||||
_CET_ENDBR
|
||||
#ifdef BORINGSSL_DISPATCH_TEST
|
||||
|
||||
movb $1,_BORINGSSL_function_hit+5(%rip)
|
||||
@ -826,7 +820,7 @@ _vpaes_set_encrypt_key:
|
||||
movl $0x30,%r8d
|
||||
call _vpaes_schedule_core
|
||||
xorl %eax,%eax
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
|
||||
|
||||
|
||||
@ -836,6 +830,7 @@ _vpaes_set_encrypt_key:
|
||||
.p2align 4
|
||||
_vpaes_set_decrypt_key:
|
||||
|
||||
_CET_ENDBR
|
||||
movl %esi,%eax
|
||||
shrl $5,%eax
|
||||
addl $5,%eax
|
||||
@ -850,7 +845,7 @@ _vpaes_set_decrypt_key:
|
||||
xorl $32,%r8d
|
||||
call _vpaes_schedule_core
|
||||
xorl %eax,%eax
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
|
||||
|
||||
|
||||
@ -860,6 +855,7 @@ _vpaes_set_decrypt_key:
|
||||
.p2align 4
|
||||
_vpaes_encrypt:
|
||||
|
||||
_CET_ENDBR
|
||||
#ifdef BORINGSSL_DISPATCH_TEST
|
||||
|
||||
movb $1,_BORINGSSL_function_hit+4(%rip)
|
||||
@ -868,7 +864,7 @@ _vpaes_encrypt:
|
||||
call _vpaes_preheat
|
||||
call _vpaes_encrypt_core
|
||||
movdqu %xmm0,(%rsi)
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
|
||||
|
||||
|
||||
@ -878,11 +874,12 @@ _vpaes_encrypt:
|
||||
.p2align 4
|
||||
_vpaes_decrypt:
|
||||
|
||||
_CET_ENDBR
|
||||
movdqu (%rdi),%xmm0
|
||||
call _vpaes_preheat
|
||||
call _vpaes_decrypt_core
|
||||
movdqu %xmm0,(%rsi)
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
|
||||
|
||||
.globl _vpaes_cbc_encrypt
|
||||
@ -891,6 +888,7 @@ _vpaes_decrypt:
|
||||
.p2align 4
|
||||
_vpaes_cbc_encrypt:
|
||||
|
||||
_CET_ENDBR
|
||||
xchgq %rcx,%rdx
|
||||
subq $16,%rcx
|
||||
jc L$cbc_abort
|
||||
@ -925,7 +923,7 @@ L$cbc_dec_loop:
|
||||
L$cbc_done:
|
||||
movdqu %xmm6,(%r8)
|
||||
L$cbc_abort:
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
|
||||
|
||||
.globl _vpaes_ctr32_encrypt_blocks
|
||||
@ -934,6 +932,7 @@ L$cbc_abort:
|
||||
.p2align 4
|
||||
_vpaes_ctr32_encrypt_blocks:
|
||||
|
||||
_CET_ENDBR
|
||||
|
||||
xchgq %rcx,%rdx
|
||||
testq %rcx,%rcx
|
||||
@ -988,7 +987,7 @@ L$ctr32_loop:
|
||||
|
||||
L$ctr32_done:
|
||||
L$ctr32_abort:
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
|
||||
|
||||
|
||||
@ -1009,7 +1008,7 @@ _vpaes_preheat:
|
||||
movdqa 64(%r10),%xmm12
|
||||
movdqa 80(%r10),%xmm15
|
||||
movdqa 96(%r10),%xmm14
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
|
||||
|
||||
|
||||
@ -1018,6 +1017,7 @@ _vpaes_preheat:
|
||||
|
||||
|
||||
|
||||
.section __DATA,__const
|
||||
.p2align 6
|
||||
_vpaes_consts:
|
||||
L$k_inv:
|
||||
@ -1127,4 +1127,5 @@ L$ctr_add_two:
|
||||
.byte 86,101,99,116,111,114,32,80,101,114,109,117,116,97,116,105,111,110,32,65,69,83,32,102,111,114,32,120,56,54,95,54,52,47,83,83,83,69,51,44,32,77,105,107,101,32,72,97,109,98,117,114,103,32,40,83,116,97,110,102,111,114,100,32,85,110,105,118,101,114,115,105,116,121,41,0
|
||||
.p2align 6
|
||||
|
||||
.text
|
||||
#endif
|
||||
@ -1,43 +1,21 @@
|
||||
# This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
# source tree. Do not edit by hand.
|
||||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#if defined(__has_feature)
|
||||
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
|
||||
#define OPENSSL_NO_ASM
|
||||
#endif
|
||||
#endif
|
||||
#include <openssl/asm_base.h>
|
||||
|
||||
#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) && defined(__APPLE__)
|
||||
.text
|
||||
|
||||
|
||||
|
||||
.globl _bn_mul_mont
|
||||
.private_extern _bn_mul_mont
|
||||
.globl _bn_mul_mont_nohw
|
||||
.private_extern _bn_mul_mont_nohw
|
||||
|
||||
.p2align 4
|
||||
_bn_mul_mont:
|
||||
_bn_mul_mont_nohw:
|
||||
|
||||
_CET_ENDBR
|
||||
movl %r9d,%r9d
|
||||
movq %rsp,%rax
|
||||
|
||||
testl $3,%r9d
|
||||
jnz L$mul_enter
|
||||
cmpl $8,%r9d
|
||||
jb L$mul_enter
|
||||
leaq _OPENSSL_ia32cap_P(%rip),%r11
|
||||
movl 8(%r11),%r11d
|
||||
cmpq %rsi,%rdx
|
||||
jne L$mul4x_enter
|
||||
testl $7,%r9d
|
||||
jz L$sqr8x_enter
|
||||
jmp L$mul4x_enter
|
||||
|
||||
.p2align 4
|
||||
L$mul_enter:
|
||||
pushq %rbx
|
||||
|
||||
pushq %rbp
|
||||
@ -267,20 +245,19 @@ L$copy:
|
||||
leaq (%rsi),%rsp
|
||||
|
||||
L$mul_epilogue:
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
|
||||
|
||||
.globl _bn_mul4x_mont
|
||||
.private_extern _bn_mul4x_mont
|
||||
|
||||
.p2align 4
|
||||
bn_mul4x_mont:
|
||||
_bn_mul4x_mont:
|
||||
|
||||
_CET_ENDBR
|
||||
movl %r9d,%r9d
|
||||
movq %rsp,%rax
|
||||
|
||||
L$mul4x_enter:
|
||||
andl $0x80100,%r11d
|
||||
cmpl $0x80100,%r11d
|
||||
je L$mulx4x_enter
|
||||
pushq %rbx
|
||||
|
||||
pushq %rbp
|
||||
@ -701,19 +678,22 @@ L$copy4x:
|
||||
leaq (%rsi),%rsp
|
||||
|
||||
L$mul4x_epilogue:
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
.globl _bn_sqr8x_mont
|
||||
.private_extern _bn_sqr8x_mont
|
||||
|
||||
.p2align 5
|
||||
bn_sqr8x_mont:
|
||||
_bn_sqr8x_mont:
|
||||
|
||||
_CET_ENDBR
|
||||
movl %r9d,%r9d
|
||||
movq %rsp,%rax
|
||||
|
||||
L$sqr8x_enter:
|
||||
pushq %rbx
|
||||
|
||||
pushq %rbp
|
||||
@ -788,11 +768,8 @@ L$sqr8x_body:
|
||||
pxor %xmm0,%xmm0
|
||||
.byte 102,72,15,110,207
|
||||
.byte 102,73,15,110,218
|
||||
leaq _OPENSSL_ia32cap_P(%rip),%rax
|
||||
movl 8(%rax),%eax
|
||||
andl $0x80100,%eax
|
||||
cmpl $0x80100,%eax
|
||||
jne L$sqr8x_nox
|
||||
testq %rdx,%rdx
|
||||
jz L$sqr8x_nox
|
||||
|
||||
call _bn_sqrx8x_internal
|
||||
|
||||
@ -892,16 +869,18 @@ L$sqr8x_cond_copy:
|
||||
leaq (%rsi),%rsp
|
||||
|
||||
L$sqr8x_epilogue:
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
|
||||
|
||||
.globl _bn_mulx4x_mont
|
||||
.private_extern _bn_mulx4x_mont
|
||||
|
||||
.p2align 5
|
||||
bn_mulx4x_mont:
|
||||
_bn_mulx4x_mont:
|
||||
|
||||
_CET_ENDBR
|
||||
movq %rsp,%rax
|
||||
|
||||
L$mulx4x_enter:
|
||||
pushq %rbx
|
||||
|
||||
pushq %rbp
|
||||
@ -1248,7 +1227,7 @@ L$mulx4x_cond_copy:
|
||||
leaq (%rsi),%rsp
|
||||
|
||||
L$mulx4x_epilogue:
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
|
||||
|
||||
.byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
|
||||
@ -1,16 +1,9 @@
|
||||
# This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
# source tree. Do not edit by hand.
|
||||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#if defined(__has_feature)
|
||||
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
|
||||
#define OPENSSL_NO_ASM
|
||||
#endif
|
||||
#endif
|
||||
#include <openssl/asm_base.h>
|
||||
|
||||
#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) && defined(__APPLE__)
|
||||
.text
|
||||
|
||||
|
||||
@ -21,6 +14,7 @@
|
||||
.p2align 6
|
||||
_bn_mul_mont_gather5:
|
||||
|
||||
_CET_ENDBR
|
||||
movl %r9d,%r9d
|
||||
movq %rsp,%rax
|
||||
|
||||
@ -206,6 +200,7 @@ L$mul_body:
|
||||
por %xmm2,%xmm0
|
||||
por %xmm3,%xmm1
|
||||
por %xmm1,%xmm0
|
||||
|
||||
pshufd $0x4e,%xmm0,%xmm1
|
||||
por %xmm1,%xmm0
|
||||
leaq 256(%r12),%r12
|
||||
@ -329,6 +324,7 @@ L$outer:
|
||||
por %xmm2,%xmm4
|
||||
por %xmm3,%xmm5
|
||||
por %xmm5,%xmm4
|
||||
|
||||
pshufd $0x4e,%xmm4,%xmm0
|
||||
por %xmm4,%xmm0
|
||||
leaq 256(%r12),%r12
|
||||
@ -453,7 +449,7 @@ L$copy:
|
||||
leaq (%rsi),%rsp
|
||||
|
||||
L$mul_epilogue:
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
|
||||
|
||||
|
||||
@ -558,7 +554,7 @@ L$mul4x_body:
|
||||
leaq (%rsi),%rsp
|
||||
|
||||
L$mul4x_epilogue:
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
|
||||
|
||||
|
||||
@ -697,6 +693,7 @@ mul4x_internal:
|
||||
por %xmm2,%xmm0
|
||||
por %xmm3,%xmm1
|
||||
por %xmm1,%xmm0
|
||||
|
||||
pshufd $0x4e,%xmm0,%xmm1
|
||||
por %xmm1,%xmm0
|
||||
leaq 256(%r12),%r12
|
||||
@ -904,6 +901,7 @@ L$outer4x:
|
||||
por %xmm2,%xmm4
|
||||
por %xmm3,%xmm5
|
||||
por %xmm5,%xmm4
|
||||
|
||||
pshufd $0x4e,%xmm4,%xmm0
|
||||
por %xmm4,%xmm0
|
||||
leaq 256(%r12),%r12
|
||||
@ -1095,6 +1093,7 @@ L$inner4x:
|
||||
.p2align 5
|
||||
_bn_power5:
|
||||
|
||||
_CET_ENDBR
|
||||
movq %rsp,%rax
|
||||
|
||||
leaq _OPENSSL_ia32cap_P(%rip),%r11
|
||||
@ -1222,7 +1221,7 @@ L$power5_body:
|
||||
leaq (%rsi),%rsp
|
||||
|
||||
L$power5_epilogue:
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
|
||||
|
||||
|
||||
@ -1234,6 +1233,7 @@ L$power5_epilogue:
|
||||
_bn_sqr8x_internal:
|
||||
__bn_sqr8x_internal:
|
||||
|
||||
_CET_ENDBR
|
||||
|
||||
|
||||
|
||||
@ -2007,7 +2007,7 @@ L$8x_no_tail:
|
||||
|
||||
cmpq %rdx,%rdi
|
||||
jb L$8x_reduction_loop
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
|
||||
|
||||
|
||||
@ -2063,188 +2063,7 @@ L$sqr4x_sub_entry:
|
||||
|
||||
movq %r9,%r10
|
||||
negq %r9
|
||||
.byte 0xf3,0xc3
|
||||
|
||||
|
||||
.globl _bn_from_montgomery
|
||||
.private_extern _bn_from_montgomery
|
||||
|
||||
.p2align 5
|
||||
_bn_from_montgomery:
|
||||
|
||||
testl $7,%r9d
|
||||
jz bn_from_mont8x
|
||||
xorl %eax,%eax
|
||||
.byte 0xf3,0xc3
|
||||
|
||||
|
||||
|
||||
|
||||
.p2align 5
|
||||
bn_from_mont8x:
|
||||
|
||||
.byte 0x67
|
||||
movq %rsp,%rax
|
||||
|
||||
pushq %rbx
|
||||
|
||||
pushq %rbp
|
||||
|
||||
pushq %r12
|
||||
|
||||
pushq %r13
|
||||
|
||||
pushq %r14
|
||||
|
||||
pushq %r15
|
||||
|
||||
L$from_prologue:
|
||||
|
||||
shll $3,%r9d
|
||||
leaq (%r9,%r9,2),%r10
|
||||
negq %r9
|
||||
movq (%r8),%r8
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
leaq -320(%rsp,%r9,2),%r11
|
||||
movq %rsp,%rbp
|
||||
subq %rdi,%r11
|
||||
andq $4095,%r11
|
||||
cmpq %r11,%r10
|
||||
jb L$from_sp_alt
|
||||
subq %r11,%rbp
|
||||
leaq -320(%rbp,%r9,2),%rbp
|
||||
jmp L$from_sp_done
|
||||
|
||||
.p2align 5
|
||||
L$from_sp_alt:
|
||||
leaq 4096-320(,%r9,2),%r10
|
||||
leaq -320(%rbp,%r9,2),%rbp
|
||||
subq %r10,%r11
|
||||
movq $0,%r10
|
||||
cmovcq %r10,%r11
|
||||
subq %r11,%rbp
|
||||
L$from_sp_done:
|
||||
andq $-64,%rbp
|
||||
movq %rsp,%r11
|
||||
subq %rbp,%r11
|
||||
andq $-4096,%r11
|
||||
leaq (%r11,%rbp,1),%rsp
|
||||
movq (%rsp),%r10
|
||||
cmpq %rbp,%rsp
|
||||
ja L$from_page_walk
|
||||
jmp L$from_page_walk_done
|
||||
|
||||
L$from_page_walk:
|
||||
leaq -4096(%rsp),%rsp
|
||||
movq (%rsp),%r10
|
||||
cmpq %rbp,%rsp
|
||||
ja L$from_page_walk
|
||||
L$from_page_walk_done:
|
||||
|
||||
movq %r9,%r10
|
||||
negq %r9
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
movq %r8,32(%rsp)
|
||||
movq %rax,40(%rsp)
|
||||
|
||||
L$from_body:
|
||||
movq %r9,%r11
|
||||
leaq 48(%rsp),%rax
|
||||
pxor %xmm0,%xmm0
|
||||
jmp L$mul_by_1
|
||||
|
||||
.p2align 5
|
||||
L$mul_by_1:
|
||||
movdqu (%rsi),%xmm1
|
||||
movdqu 16(%rsi),%xmm2
|
||||
movdqu 32(%rsi),%xmm3
|
||||
movdqa %xmm0,(%rax,%r9,1)
|
||||
movdqu 48(%rsi),%xmm4
|
||||
movdqa %xmm0,16(%rax,%r9,1)
|
||||
.byte 0x48,0x8d,0xb6,0x40,0x00,0x00,0x00
|
||||
movdqa %xmm1,(%rax)
|
||||
movdqa %xmm0,32(%rax,%r9,1)
|
||||
movdqa %xmm2,16(%rax)
|
||||
movdqa %xmm0,48(%rax,%r9,1)
|
||||
movdqa %xmm3,32(%rax)
|
||||
movdqa %xmm4,48(%rax)
|
||||
leaq 64(%rax),%rax
|
||||
subq $64,%r11
|
||||
jnz L$mul_by_1
|
||||
|
||||
.byte 102,72,15,110,207
|
||||
.byte 102,72,15,110,209
|
||||
.byte 0x67
|
||||
movq %rcx,%rbp
|
||||
.byte 102,73,15,110,218
|
||||
leaq _OPENSSL_ia32cap_P(%rip),%r11
|
||||
movl 8(%r11),%r11d
|
||||
andl $0x80108,%r11d
|
||||
cmpl $0x80108,%r11d
|
||||
jne L$from_mont_nox
|
||||
|
||||
leaq (%rax,%r9,1),%rdi
|
||||
call __bn_sqrx8x_reduction
|
||||
call __bn_postx4x_internal
|
||||
|
||||
pxor %xmm0,%xmm0
|
||||
leaq 48(%rsp),%rax
|
||||
jmp L$from_mont_zero
|
||||
|
||||
.p2align 5
|
||||
L$from_mont_nox:
|
||||
call __bn_sqr8x_reduction
|
||||
call __bn_post4x_internal
|
||||
|
||||
pxor %xmm0,%xmm0
|
||||
leaq 48(%rsp),%rax
|
||||
jmp L$from_mont_zero
|
||||
|
||||
.p2align 5
|
||||
L$from_mont_zero:
|
||||
movq 40(%rsp),%rsi
|
||||
|
||||
movdqa %xmm0,0(%rax)
|
||||
movdqa %xmm0,16(%rax)
|
||||
movdqa %xmm0,32(%rax)
|
||||
movdqa %xmm0,48(%rax)
|
||||
leaq 64(%rax),%rax
|
||||
subq $32,%r9
|
||||
jnz L$from_mont_zero
|
||||
|
||||
movq $1,%rax
|
||||
movq -48(%rsi),%r15
|
||||
|
||||
movq -40(%rsi),%r14
|
||||
|
||||
movq -32(%rsi),%r13
|
||||
|
||||
movq -24(%rsi),%r12
|
||||
|
||||
movq -16(%rsi),%rbp
|
||||
|
||||
movq -8(%rsi),%rbx
|
||||
|
||||
leaq (%rsi),%rsp
|
||||
|
||||
L$from_epilogue:
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
|
||||
|
||||
|
||||
@ -2354,7 +2173,7 @@ L$mulx4x_body:
|
||||
leaq (%rsi),%rsp
|
||||
|
||||
L$mulx4x_epilogue:
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
|
||||
|
||||
|
||||
@ -2501,6 +2320,7 @@ mulx4x_internal:
|
||||
por %xmm2,%xmm0
|
||||
por %xmm3,%xmm1
|
||||
pxor %xmm1,%xmm0
|
||||
|
||||
pshufd $0x4e,%xmm0,%xmm1
|
||||
por %xmm1,%xmm0
|
||||
leaq 256(%rdi),%rdi
|
||||
@ -2651,6 +2471,7 @@ L$mulx4x_outer:
|
||||
por %xmm2,%xmm4
|
||||
por %xmm3,%xmm5
|
||||
por %xmm5,%xmm4
|
||||
|
||||
pshufd $0x4e,%xmm4,%xmm0
|
||||
por %xmm4,%xmm0
|
||||
leaq 256(%rdi),%rdi
|
||||
@ -2913,7 +2734,7 @@ L$powerx5_body:
|
||||
leaq (%rsi),%rsp
|
||||
|
||||
L$powerx5_epilogue:
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
|
||||
|
||||
|
||||
@ -2925,6 +2746,7 @@ L$powerx5_epilogue:
|
||||
_bn_sqrx8x_internal:
|
||||
__bn_sqrx8x_internal:
|
||||
|
||||
_CET_ENDBR
|
||||
|
||||
|
||||
|
||||
@ -3535,7 +3357,7 @@ L$sqrx8x_no_tail:
|
||||
leaq 64(%rdi,%rcx,1),%rdi
|
||||
cmpq 8+8(%rsp),%r8
|
||||
jb L$sqrx8x_reduction_loop
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
|
||||
|
||||
.p2align 5
|
||||
@ -3588,7 +3410,7 @@ L$sqrx4x_sub_entry:
|
||||
|
||||
negq %r9
|
||||
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
|
||||
|
||||
.globl _bn_scatter5
|
||||
@ -3597,8 +3419,18 @@ L$sqrx4x_sub_entry:
|
||||
.p2align 4
|
||||
_bn_scatter5:
|
||||
|
||||
_CET_ENDBR
|
||||
cmpl $0,%esi
|
||||
jz L$scatter_epilogue
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
leaq (%rdx,%rcx,8),%rdx
|
||||
L$scatter:
|
||||
movq (%rdi),%rax
|
||||
@ -3608,7 +3440,7 @@ L$scatter:
|
||||
subl $1,%esi
|
||||
jnz L$scatter
|
||||
L$scatter_epilogue:
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
|
||||
|
||||
|
||||
@ -3619,6 +3451,7 @@ L$scatter_epilogue:
|
||||
_bn_gather5:
|
||||
|
||||
L$SEH_begin_bn_gather5:
|
||||
_CET_ENDBR
|
||||
|
||||
.byte 0x4c,0x8d,0x14,0x24
|
||||
|
||||
@ -3767,6 +3600,7 @@ L$gather:
|
||||
por %xmm3,%xmm5
|
||||
por %xmm5,%xmm4
|
||||
leaq 256(%r11),%r11
|
||||
|
||||
pshufd $0x4e,%xmm4,%xmm0
|
||||
por %xmm4,%xmm0
|
||||
movq %xmm0,(%rdi)
|
||||
@ -3776,13 +3610,15 @@ L$gather:
|
||||
|
||||
leaq (%r10),%rsp
|
||||
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
L$SEH_end_bn_gather5:
|
||||
|
||||
|
||||
.section __DATA,__const
|
||||
.p2align 6
|
||||
L$inc:
|
||||
.long 0,0, 1,1
|
||||
.long 2,2, 2,2
|
||||
.byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,119,105,116,104,32,115,99,97,116,116,101,114,47,103,97,116,104,101,114,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
|
||||
.text
|
||||
#endif
|
||||
@ -1,16 +1,9 @@
|
||||
# This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
# source tree. Do not edit by hand.
|
||||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#if defined(__has_feature)
|
||||
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
|
||||
#define OPENSSL_NO_ASM
|
||||
#endif
|
||||
#endif
|
||||
#include <openssl/asm_base.h>
|
||||
|
||||
#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) && defined(__APPLE__)
|
||||
.text
|
||||
|
||||
|
||||
@ -25,9 +18,10 @@
|
||||
.private_extern _abi_test_trampoline
|
||||
.p2align 4
|
||||
_abi_test_trampoline:
|
||||
L$abi_test_trampoline_seh_begin:
|
||||
|
||||
|
||||
_CET_ENDBR
|
||||
|
||||
|
||||
|
||||
|
||||
@ -38,27 +32,26 @@ L$abi_test_trampoline_seh_begin:
|
||||
|
||||
subq $120,%rsp
|
||||
|
||||
L$abi_test_trampoline_seh_prolog_alloc:
|
||||
|
||||
movq %r8,48(%rsp)
|
||||
movq %rbx,64(%rsp)
|
||||
|
||||
L$abi_test_trampoline_seh_prolog_rbx:
|
||||
|
||||
movq %rbp,72(%rsp)
|
||||
|
||||
L$abi_test_trampoline_seh_prolog_rbp:
|
||||
|
||||
movq %r12,80(%rsp)
|
||||
|
||||
L$abi_test_trampoline_seh_prolog_r12:
|
||||
|
||||
movq %r13,88(%rsp)
|
||||
|
||||
L$abi_test_trampoline_seh_prolog_r13:
|
||||
|
||||
movq %r14,96(%rsp)
|
||||
|
||||
L$abi_test_trampoline_seh_prolog_r14:
|
||||
|
||||
movq %r15,104(%rsp)
|
||||
|
||||
L$abi_test_trampoline_seh_prolog_r15:
|
||||
L$abi_test_trampoline_seh_prolog_end:
|
||||
|
||||
movq 0(%rsi),%rbx
|
||||
movq 8(%rsi),%rbp
|
||||
movq 16(%rsi),%r12
|
||||
@ -180,257 +173,288 @@ L$call_done:
|
||||
|
||||
|
||||
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
|
||||
|
||||
L$abi_test_trampoline_seh_end:
|
||||
|
||||
|
||||
.globl _abi_test_clobber_rax
|
||||
.private_extern _abi_test_clobber_rax
|
||||
.p2align 4
|
||||
_abi_test_clobber_rax:
|
||||
_CET_ENDBR
|
||||
xorq %rax,%rax
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_rbx
|
||||
.private_extern _abi_test_clobber_rbx
|
||||
.p2align 4
|
||||
_abi_test_clobber_rbx:
|
||||
_CET_ENDBR
|
||||
xorq %rbx,%rbx
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_rcx
|
||||
.private_extern _abi_test_clobber_rcx
|
||||
.p2align 4
|
||||
_abi_test_clobber_rcx:
|
||||
_CET_ENDBR
|
||||
xorq %rcx,%rcx
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_rdx
|
||||
.private_extern _abi_test_clobber_rdx
|
||||
.p2align 4
|
||||
_abi_test_clobber_rdx:
|
||||
_CET_ENDBR
|
||||
xorq %rdx,%rdx
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_rdi
|
||||
.private_extern _abi_test_clobber_rdi
|
||||
.p2align 4
|
||||
_abi_test_clobber_rdi:
|
||||
_CET_ENDBR
|
||||
xorq %rdi,%rdi
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_rsi
|
||||
.private_extern _abi_test_clobber_rsi
|
||||
.p2align 4
|
||||
_abi_test_clobber_rsi:
|
||||
_CET_ENDBR
|
||||
xorq %rsi,%rsi
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_rbp
|
||||
.private_extern _abi_test_clobber_rbp
|
||||
.p2align 4
|
||||
_abi_test_clobber_rbp:
|
||||
_CET_ENDBR
|
||||
xorq %rbp,%rbp
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_r8
|
||||
.private_extern _abi_test_clobber_r8
|
||||
.p2align 4
|
||||
_abi_test_clobber_r8:
|
||||
_CET_ENDBR
|
||||
xorq %r8,%r8
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_r9
|
||||
.private_extern _abi_test_clobber_r9
|
||||
.p2align 4
|
||||
_abi_test_clobber_r9:
|
||||
_CET_ENDBR
|
||||
xorq %r9,%r9
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_r10
|
||||
.private_extern _abi_test_clobber_r10
|
||||
.p2align 4
|
||||
_abi_test_clobber_r10:
|
||||
_CET_ENDBR
|
||||
xorq %r10,%r10
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_r11
|
||||
.private_extern _abi_test_clobber_r11
|
||||
.p2align 4
|
||||
_abi_test_clobber_r11:
|
||||
_CET_ENDBR
|
||||
xorq %r11,%r11
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_r12
|
||||
.private_extern _abi_test_clobber_r12
|
||||
.p2align 4
|
||||
_abi_test_clobber_r12:
|
||||
_CET_ENDBR
|
||||
xorq %r12,%r12
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_r13
|
||||
.private_extern _abi_test_clobber_r13
|
||||
.p2align 4
|
||||
_abi_test_clobber_r13:
|
||||
_CET_ENDBR
|
||||
xorq %r13,%r13
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_r14
|
||||
.private_extern _abi_test_clobber_r14
|
||||
.p2align 4
|
||||
_abi_test_clobber_r14:
|
||||
_CET_ENDBR
|
||||
xorq %r14,%r14
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_r15
|
||||
.private_extern _abi_test_clobber_r15
|
||||
.p2align 4
|
||||
_abi_test_clobber_r15:
|
||||
_CET_ENDBR
|
||||
xorq %r15,%r15
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_xmm0
|
||||
.private_extern _abi_test_clobber_xmm0
|
||||
.p2align 4
|
||||
_abi_test_clobber_xmm0:
|
||||
_CET_ENDBR
|
||||
pxor %xmm0,%xmm0
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_xmm1
|
||||
.private_extern _abi_test_clobber_xmm1
|
||||
.p2align 4
|
||||
_abi_test_clobber_xmm1:
|
||||
_CET_ENDBR
|
||||
pxor %xmm1,%xmm1
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_xmm2
|
||||
.private_extern _abi_test_clobber_xmm2
|
||||
.p2align 4
|
||||
_abi_test_clobber_xmm2:
|
||||
_CET_ENDBR
|
||||
pxor %xmm2,%xmm2
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_xmm3
|
||||
.private_extern _abi_test_clobber_xmm3
|
||||
.p2align 4
|
||||
_abi_test_clobber_xmm3:
|
||||
_CET_ENDBR
|
||||
pxor %xmm3,%xmm3
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_xmm4
|
||||
.private_extern _abi_test_clobber_xmm4
|
||||
.p2align 4
|
||||
_abi_test_clobber_xmm4:
|
||||
_CET_ENDBR
|
||||
pxor %xmm4,%xmm4
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_xmm5
|
||||
.private_extern _abi_test_clobber_xmm5
|
||||
.p2align 4
|
||||
_abi_test_clobber_xmm5:
|
||||
_CET_ENDBR
|
||||
pxor %xmm5,%xmm5
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_xmm6
|
||||
.private_extern _abi_test_clobber_xmm6
|
||||
.p2align 4
|
||||
_abi_test_clobber_xmm6:
|
||||
_CET_ENDBR
|
||||
pxor %xmm6,%xmm6
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_xmm7
|
||||
.private_extern _abi_test_clobber_xmm7
|
||||
.p2align 4
|
||||
_abi_test_clobber_xmm7:
|
||||
_CET_ENDBR
|
||||
pxor %xmm7,%xmm7
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_xmm8
|
||||
.private_extern _abi_test_clobber_xmm8
|
||||
.p2align 4
|
||||
_abi_test_clobber_xmm8:
|
||||
_CET_ENDBR
|
||||
pxor %xmm8,%xmm8
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_xmm9
|
||||
.private_extern _abi_test_clobber_xmm9
|
||||
.p2align 4
|
||||
_abi_test_clobber_xmm9:
|
||||
_CET_ENDBR
|
||||
pxor %xmm9,%xmm9
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_xmm10
|
||||
.private_extern _abi_test_clobber_xmm10
|
||||
.p2align 4
|
||||
_abi_test_clobber_xmm10:
|
||||
_CET_ENDBR
|
||||
pxor %xmm10,%xmm10
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_xmm11
|
||||
.private_extern _abi_test_clobber_xmm11
|
||||
.p2align 4
|
||||
_abi_test_clobber_xmm11:
|
||||
_CET_ENDBR
|
||||
pxor %xmm11,%xmm11
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_xmm12
|
||||
.private_extern _abi_test_clobber_xmm12
|
||||
.p2align 4
|
||||
_abi_test_clobber_xmm12:
|
||||
_CET_ENDBR
|
||||
pxor %xmm12,%xmm12
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_xmm13
|
||||
.private_extern _abi_test_clobber_xmm13
|
||||
.p2align 4
|
||||
_abi_test_clobber_xmm13:
|
||||
_CET_ENDBR
|
||||
pxor %xmm13,%xmm13
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_xmm14
|
||||
.private_extern _abi_test_clobber_xmm14
|
||||
.p2align 4
|
||||
_abi_test_clobber_xmm14:
|
||||
_CET_ENDBR
|
||||
pxor %xmm14,%xmm14
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_xmm15
|
||||
.private_extern _abi_test_clobber_xmm15
|
||||
.p2align 4
|
||||
_abi_test_clobber_xmm15:
|
||||
_CET_ENDBR
|
||||
pxor %xmm15,%xmm15
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
|
||||
|
||||
|
||||
@ -441,18 +465,19 @@ _abi_test_clobber_xmm15:
|
||||
.p2align 4
|
||||
_abi_test_bad_unwind_wrong_register:
|
||||
|
||||
L$abi_test_bad_unwind_wrong_register_seh_begin:
|
||||
|
||||
_CET_ENDBR
|
||||
pushq %r12
|
||||
|
||||
L$abi_test_bad_unwind_wrong_register_seh_push_r13:
|
||||
|
||||
|
||||
|
||||
|
||||
nop
|
||||
popq %r12
|
||||
|
||||
.byte 0xf3,0xc3
|
||||
L$abi_test_bad_unwind_wrong_register_seh_end:
|
||||
ret
|
||||
|
||||
|
||||
|
||||
|
||||
@ -465,10 +490,11 @@ L$abi_test_bad_unwind_wrong_register_seh_end:
|
||||
.p2align 4
|
||||
_abi_test_bad_unwind_temporary:
|
||||
|
||||
L$abi_test_bad_unwind_temporary_seh_begin:
|
||||
|
||||
_CET_ENDBR
|
||||
pushq %r12
|
||||
|
||||
L$abi_test_bad_unwind_temporary_seh_push_r12:
|
||||
|
||||
|
||||
movq %r12,%rax
|
||||
incq %rax
|
||||
@ -481,8 +507,8 @@ L$abi_test_bad_unwind_temporary_seh_push_r12:
|
||||
|
||||
popq %r12
|
||||
|
||||
.byte 0xf3,0xc3
|
||||
L$abi_test_bad_unwind_temporary_seh_end:
|
||||
ret
|
||||
|
||||
|
||||
|
||||
|
||||
@ -493,12 +519,13 @@ L$abi_test_bad_unwind_temporary_seh_end:
|
||||
.globl _abi_test_get_and_clear_direction_flag
|
||||
.private_extern _abi_test_get_and_clear_direction_flag
|
||||
_abi_test_get_and_clear_direction_flag:
|
||||
_CET_ENDBR
|
||||
pushfq
|
||||
popq %rax
|
||||
andq $0x400,%rax
|
||||
shrq $10,%rax
|
||||
cld
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
|
||||
|
||||
|
||||
@ -507,7 +534,8 @@ _abi_test_get_and_clear_direction_flag:
|
||||
.globl _abi_test_set_direction_flag
|
||||
.private_extern _abi_test_set_direction_flag
|
||||
_abi_test_set_direction_flag:
|
||||
_CET_ENDBR
|
||||
std
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
|
||||
#endif
|
||||
1585
third-party/boringssl/err_data.c
vendored
1585
third-party/boringssl/err_data.c
vendored
File diff suppressed because it is too large
Load Diff
@ -1,246 +0,0 @@
|
||||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#if !defined(__has_feature)
|
||||
#define __has_feature(x) 0
|
||||
#endif
|
||||
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
|
||||
#define OPENSSL_NO_ASM
|
||||
#endif
|
||||
|
||||
#if !defined(OPENSSL_NO_ASM)
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
#include <openssl/arm_arch.h>
|
||||
|
||||
.text
|
||||
|
||||
.globl _gcm_init_v8
|
||||
.private_extern _gcm_init_v8
|
||||
|
||||
.align 4
|
||||
_gcm_init_v8:
|
||||
ld1 {v17.2d},[x1] //load input H
|
||||
movi v19.16b,#0xe1
|
||||
shl v19.2d,v19.2d,#57 //0xc2.0
|
||||
ext v3.16b,v17.16b,v17.16b,#8
|
||||
ushr v18.2d,v19.2d,#63
|
||||
dup v17.4s,v17.s[1]
|
||||
ext v16.16b,v18.16b,v19.16b,#8 //t0=0xc2....01
|
||||
ushr v18.2d,v3.2d,#63
|
||||
sshr v17.4s,v17.4s,#31 //broadcast carry bit
|
||||
and v18.16b,v18.16b,v16.16b
|
||||
shl v3.2d,v3.2d,#1
|
||||
ext v18.16b,v18.16b,v18.16b,#8
|
||||
and v16.16b,v16.16b,v17.16b
|
||||
orr v3.16b,v3.16b,v18.16b //H<<<=1
|
||||
eor v20.16b,v3.16b,v16.16b //twisted H
|
||||
st1 {v20.2d},[x0],#16 //store Htable[0]
|
||||
|
||||
//calculate H^2
|
||||
ext v16.16b,v20.16b,v20.16b,#8 //Karatsuba pre-processing
|
||||
pmull v0.1q,v20.1d,v20.1d
|
||||
eor v16.16b,v16.16b,v20.16b
|
||||
pmull2 v2.1q,v20.2d,v20.2d
|
||||
pmull v1.1q,v16.1d,v16.1d
|
||||
|
||||
ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing
|
||||
eor v18.16b,v0.16b,v2.16b
|
||||
eor v1.16b,v1.16b,v17.16b
|
||||
eor v1.16b,v1.16b,v18.16b
|
||||
pmull v18.1q,v0.1d,v19.1d //1st phase
|
||||
|
||||
ins v2.d[0],v1.d[1]
|
||||
ins v1.d[1],v0.d[0]
|
||||
eor v0.16b,v1.16b,v18.16b
|
||||
|
||||
ext v18.16b,v0.16b,v0.16b,#8 //2nd phase
|
||||
pmull v0.1q,v0.1d,v19.1d
|
||||
eor v18.16b,v18.16b,v2.16b
|
||||
eor v22.16b,v0.16b,v18.16b
|
||||
|
||||
ext v17.16b,v22.16b,v22.16b,#8 //Karatsuba pre-processing
|
||||
eor v17.16b,v17.16b,v22.16b
|
||||
ext v21.16b,v16.16b,v17.16b,#8 //pack Karatsuba pre-processed
|
||||
st1 {v21.2d,v22.2d},[x0] //store Htable[1..2]
|
||||
|
||||
ret
|
||||
|
||||
.globl _gcm_gmult_v8
|
||||
.private_extern _gcm_gmult_v8
|
||||
|
||||
.align 4
|
||||
_gcm_gmult_v8:
|
||||
ld1 {v17.2d},[x0] //load Xi
|
||||
movi v19.16b,#0xe1
|
||||
ld1 {v20.2d,v21.2d},[x1] //load twisted H, ...
|
||||
shl v19.2d,v19.2d,#57
|
||||
#ifndef __ARMEB__
|
||||
rev64 v17.16b,v17.16b
|
||||
#endif
|
||||
ext v3.16b,v17.16b,v17.16b,#8
|
||||
|
||||
pmull v0.1q,v20.1d,v3.1d //H.lo·Xi.lo
|
||||
eor v17.16b,v17.16b,v3.16b //Karatsuba pre-processing
|
||||
pmull2 v2.1q,v20.2d,v3.2d //H.hi·Xi.hi
|
||||
pmull v1.1q,v21.1d,v17.1d //(H.lo+H.hi)·(Xi.lo+Xi.hi)
|
||||
|
||||
ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing
|
||||
eor v18.16b,v0.16b,v2.16b
|
||||
eor v1.16b,v1.16b,v17.16b
|
||||
eor v1.16b,v1.16b,v18.16b
|
||||
pmull v18.1q,v0.1d,v19.1d //1st phase of reduction
|
||||
|
||||
ins v2.d[0],v1.d[1]
|
||||
ins v1.d[1],v0.d[0]
|
||||
eor v0.16b,v1.16b,v18.16b
|
||||
|
||||
ext v18.16b,v0.16b,v0.16b,#8 //2nd phase of reduction
|
||||
pmull v0.1q,v0.1d,v19.1d
|
||||
eor v18.16b,v18.16b,v2.16b
|
||||
eor v0.16b,v0.16b,v18.16b
|
||||
|
||||
#ifndef __ARMEB__
|
||||
rev64 v0.16b,v0.16b
|
||||
#endif
|
||||
ext v0.16b,v0.16b,v0.16b,#8
|
||||
st1 {v0.2d},[x0] //write out Xi
|
||||
|
||||
ret
|
||||
|
||||
.globl _gcm_ghash_v8
|
||||
.private_extern _gcm_ghash_v8
|
||||
|
||||
.align 4
|
||||
_gcm_ghash_v8:
|
||||
ld1 {v0.2d},[x0] //load [rotated] Xi
|
||||
//"[rotated]" means that
|
||||
//loaded value would have
|
||||
//to be rotated in order to
|
||||
//make it appear as in
|
||||
//algorithm specification
|
||||
subs x3,x3,#32 //see if x3 is 32 or larger
|
||||
mov x12,#16 //x12 is used as post-
|
||||
//increment for input pointer;
|
||||
//as loop is modulo-scheduled
|
||||
//x12 is zeroed just in time
|
||||
//to preclude overstepping
|
||||
//inp[len], which means that
|
||||
//last block[s] are actually
|
||||
//loaded twice, but last
|
||||
//copy is not processed
|
||||
ld1 {v20.2d,v21.2d},[x1],#32 //load twisted H, ..., H^2
|
||||
movi v19.16b,#0xe1
|
||||
ld1 {v22.2d},[x1]
|
||||
csel x12,xzr,x12,eq //is it time to zero x12?
|
||||
ext v0.16b,v0.16b,v0.16b,#8 //rotate Xi
|
||||
ld1 {v16.2d},[x2],#16 //load [rotated] I[0]
|
||||
shl v19.2d,v19.2d,#57 //compose 0xc2.0 constant
|
||||
#ifndef __ARMEB__
|
||||
rev64 v16.16b,v16.16b
|
||||
rev64 v0.16b,v0.16b
|
||||
#endif
|
||||
ext v3.16b,v16.16b,v16.16b,#8 //rotate I[0]
|
||||
b.lo Lodd_tail_v8 //x3 was less than 32
|
||||
ld1 {v17.2d},[x2],x12 //load [rotated] I[1]
|
||||
#ifndef __ARMEB__
|
||||
rev64 v17.16b,v17.16b
|
||||
#endif
|
||||
ext v7.16b,v17.16b,v17.16b,#8
|
||||
eor v3.16b,v3.16b,v0.16b //I[i]^=Xi
|
||||
pmull v4.1q,v20.1d,v7.1d //H·Ii+1
|
||||
eor v17.16b,v17.16b,v7.16b //Karatsuba pre-processing
|
||||
pmull2 v6.1q,v20.2d,v7.2d
|
||||
b Loop_mod2x_v8
|
||||
|
||||
.align 4
|
||||
Loop_mod2x_v8:
|
||||
ext v18.16b,v3.16b,v3.16b,#8
|
||||
subs x3,x3,#32 //is there more data?
|
||||
pmull v0.1q,v22.1d,v3.1d //H^2.lo·Xi.lo
|
||||
csel x12,xzr,x12,lo //is it time to zero x12?
|
||||
|
||||
pmull v5.1q,v21.1d,v17.1d
|
||||
eor v18.16b,v18.16b,v3.16b //Karatsuba pre-processing
|
||||
pmull2 v2.1q,v22.2d,v3.2d //H^2.hi·Xi.hi
|
||||
eor v0.16b,v0.16b,v4.16b //accumulate
|
||||
pmull2 v1.1q,v21.2d,v18.2d //(H^2.lo+H^2.hi)·(Xi.lo+Xi.hi)
|
||||
ld1 {v16.2d},[x2],x12 //load [rotated] I[i+2]
|
||||
|
||||
eor v2.16b,v2.16b,v6.16b
|
||||
csel x12,xzr,x12,eq //is it time to zero x12?
|
||||
eor v1.16b,v1.16b,v5.16b
|
||||
|
||||
ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing
|
||||
eor v18.16b,v0.16b,v2.16b
|
||||
eor v1.16b,v1.16b,v17.16b
|
||||
ld1 {v17.2d},[x2],x12 //load [rotated] I[i+3]
|
||||
#ifndef __ARMEB__
|
||||
rev64 v16.16b,v16.16b
|
||||
#endif
|
||||
eor v1.16b,v1.16b,v18.16b
|
||||
pmull v18.1q,v0.1d,v19.1d //1st phase of reduction
|
||||
|
||||
#ifndef __ARMEB__
|
||||
rev64 v17.16b,v17.16b
|
||||
#endif
|
||||
ins v2.d[0],v1.d[1]
|
||||
ins v1.d[1],v0.d[0]
|
||||
ext v7.16b,v17.16b,v17.16b,#8
|
||||
ext v3.16b,v16.16b,v16.16b,#8
|
||||
eor v0.16b,v1.16b,v18.16b
|
||||
pmull v4.1q,v20.1d,v7.1d //H·Ii+1
|
||||
eor v3.16b,v3.16b,v2.16b //accumulate v3.16b early
|
||||
|
||||
ext v18.16b,v0.16b,v0.16b,#8 //2nd phase of reduction
|
||||
pmull v0.1q,v0.1d,v19.1d
|
||||
eor v3.16b,v3.16b,v18.16b
|
||||
eor v17.16b,v17.16b,v7.16b //Karatsuba pre-processing
|
||||
eor v3.16b,v3.16b,v0.16b
|
||||
pmull2 v6.1q,v20.2d,v7.2d
|
||||
b.hs Loop_mod2x_v8 //there was at least 32 more bytes
|
||||
|
||||
eor v2.16b,v2.16b,v18.16b
|
||||
ext v3.16b,v16.16b,v16.16b,#8 //re-construct v3.16b
|
||||
adds x3,x3,#32 //re-construct x3
|
||||
eor v0.16b,v0.16b,v2.16b //re-construct v0.16b
|
||||
b.eq Ldone_v8 //is x3 zero?
|
||||
Lodd_tail_v8:
|
||||
ext v18.16b,v0.16b,v0.16b,#8
|
||||
eor v3.16b,v3.16b,v0.16b //inp^=Xi
|
||||
eor v17.16b,v16.16b,v18.16b //v17.16b is rotated inp^Xi
|
||||
|
||||
pmull v0.1q,v20.1d,v3.1d //H.lo·Xi.lo
|
||||
eor v17.16b,v17.16b,v3.16b //Karatsuba pre-processing
|
||||
pmull2 v2.1q,v20.2d,v3.2d //H.hi·Xi.hi
|
||||
pmull v1.1q,v21.1d,v17.1d //(H.lo+H.hi)·(Xi.lo+Xi.hi)
|
||||
|
||||
ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing
|
||||
eor v18.16b,v0.16b,v2.16b
|
||||
eor v1.16b,v1.16b,v17.16b
|
||||
eor v1.16b,v1.16b,v18.16b
|
||||
pmull v18.1q,v0.1d,v19.1d //1st phase of reduction
|
||||
|
||||
ins v2.d[0],v1.d[1]
|
||||
ins v1.d[1],v0.d[0]
|
||||
eor v0.16b,v1.16b,v18.16b
|
||||
|
||||
ext v18.16b,v0.16b,v0.16b,#8 //2nd phase of reduction
|
||||
pmull v0.1q,v0.1d,v19.1d
|
||||
eor v18.16b,v18.16b,v2.16b
|
||||
eor v0.16b,v0.16b,v18.16b
|
||||
|
||||
Ldone_v8:
|
||||
#ifndef __ARMEB__
|
||||
rev64 v0.16b,v0.16b
|
||||
#endif
|
||||
ext v0.16b,v0.16b,v0.16b,#8
|
||||
st1 {v0.2d},[x0] //write out Xi
|
||||
|
||||
ret
|
||||
|
||||
.byte 71,72,65,83,72,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
|
||||
.align 2
|
||||
.align 2
|
||||
#endif // !OPENSSL_NO_ASM
|
||||
@ -1,376 +0,0 @@
|
||||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#if !defined(__has_feature)
|
||||
#define __has_feature(x) 0
|
||||
#endif
|
||||
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
|
||||
#define OPENSSL_NO_ASM
|
||||
#endif
|
||||
|
||||
#if !defined(OPENSSL_NO_ASM)
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
.syntax unified
|
||||
|
||||
|
||||
|
||||
|
||||
.text
|
||||
|
||||
@ abi_test_trampoline loads callee-saved registers from |state|, calls |func|
|
||||
@ with |argv|, then saves the callee-saved registers into |state|. It returns
|
||||
@ the result of |func|. The |unwind| argument is unused.
|
||||
@ uint32_t abi_test_trampoline(void (*func)(...), CallerState *state,
|
||||
@ const uint32_t *argv, size_t argc,
|
||||
@ int unwind);
|
||||
|
||||
.globl _abi_test_trampoline
|
||||
.private_extern _abi_test_trampoline
|
||||
.align 4
|
||||
_abi_test_trampoline:
|
||||
@ Save parameters and all callee-saved registers. For convenience, we
|
||||
@ save r9 on iOS even though it's volatile.
|
||||
vstmdb sp!, {d8,d9,d10,d11,d12,d13,d14,d15}
|
||||
stmdb sp!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11,lr}
|
||||
|
||||
@ Reserve stack space for six (10-4) stack parameters, plus an extra 4
|
||||
@ bytes to keep it 8-byte-aligned (see AAPCS, section 5.3).
|
||||
sub sp, sp, #28
|
||||
|
||||
@ Every register in AAPCS is either non-volatile or a parameter (except
|
||||
@ r9 on iOS), so this code, by the actual call, loses all its scratch
|
||||
@ registers. First fill in stack parameters while there are registers
|
||||
@ to spare.
|
||||
cmp r3, #4
|
||||
bls Lstack_args_done
|
||||
mov r4, sp @ r4 is the output pointer.
|
||||
add r5, r2, r3, lsl #2 @ Set r5 to the end of argv.
|
||||
add r2, r2, #16 @ Skip four arguments.
|
||||
Lstack_args_loop:
|
||||
ldr r6, [r2], #4
|
||||
cmp r2, r5
|
||||
str r6, [r4], #4
|
||||
bne Lstack_args_loop
|
||||
|
||||
Lstack_args_done:
|
||||
@ Load registers from |r1|.
|
||||
vldmia r1!, {d8,d9,d10,d11,d12,d13,d14,d15}
|
||||
#if defined(__APPLE__)
|
||||
@ r9 is not volatile on iOS.
|
||||
ldmia r1!, {r4,r5,r6,r7,r8,r10-r11}
|
||||
#else
|
||||
ldmia r1!, {r4,r5,r6,r7,r8,r9,r10,r11}
|
||||
#endif
|
||||
|
||||
@ Load register parameters. This uses up our remaining registers, so we
|
||||
@ repurpose lr as scratch space.
|
||||
ldr r3, [sp, #40] @ Reload argc.
|
||||
ldr lr, [sp, #36] @ Load argv into lr.
|
||||
cmp r3, #3
|
||||
bhi Larg_r3
|
||||
beq Larg_r2
|
||||
cmp r3, #1
|
||||
bhi Larg_r1
|
||||
beq Larg_r0
|
||||
b Largs_done
|
||||
|
||||
Larg_r3:
|
||||
ldr r3, [lr, #12] @ argv[3]
|
||||
Larg_r2:
|
||||
ldr r2, [lr, #8] @ argv[2]
|
||||
Larg_r1:
|
||||
ldr r1, [lr, #4] @ argv[1]
|
||||
Larg_r0:
|
||||
ldr r0, [lr] @ argv[0]
|
||||
Largs_done:
|
||||
|
||||
@ With every other register in use, load the function pointer into lr
|
||||
@ and call the function.
|
||||
ldr lr, [sp, #28]
|
||||
blx lr
|
||||
|
||||
@ r1-r3 are free for use again. The trampoline only supports
|
||||
@ single-return functions. Pass r4-r11 to the caller.
|
||||
ldr r1, [sp, #32]
|
||||
vstmia r1!, {d8,d9,d10,d11,d12,d13,d14,d15}
|
||||
#if defined(__APPLE__)
|
||||
@ r9 is not volatile on iOS.
|
||||
stmia r1!, {r4,r5,r6,r7,r8,r10-r11}
|
||||
#else
|
||||
stmia r1!, {r4,r5,r6,r7,r8,r9,r10,r11}
|
||||
#endif
|
||||
|
||||
@ Unwind the stack and restore registers.
|
||||
add sp, sp, #44 @ 44 = 28+16
|
||||
ldmia sp!, {r4,r5,r6,r7,r8,r9,r10,r11,lr} @ Skip r0-r3 (see +16 above).
|
||||
vldmia sp!, {d8,d9,d10,d11,d12,d13,d14,d15}
|
||||
|
||||
bx lr
|
||||
|
||||
|
||||
.globl _abi_test_clobber_r0
|
||||
.private_extern _abi_test_clobber_r0
|
||||
.align 4
|
||||
_abi_test_clobber_r0:
|
||||
mov r0, #0
|
||||
bx lr
|
||||
|
||||
|
||||
.globl _abi_test_clobber_r1
|
||||
.private_extern _abi_test_clobber_r1
|
||||
.align 4
|
||||
_abi_test_clobber_r1:
|
||||
mov r1, #0
|
||||
bx lr
|
||||
|
||||
|
||||
.globl _abi_test_clobber_r2
|
||||
.private_extern _abi_test_clobber_r2
|
||||
.align 4
|
||||
_abi_test_clobber_r2:
|
||||
mov r2, #0
|
||||
bx lr
|
||||
|
||||
|
||||
.globl _abi_test_clobber_r3
|
||||
.private_extern _abi_test_clobber_r3
|
||||
.align 4
|
||||
_abi_test_clobber_r3:
|
||||
mov r3, #0
|
||||
bx lr
|
||||
|
||||
|
||||
.globl _abi_test_clobber_r4
|
||||
.private_extern _abi_test_clobber_r4
|
||||
.align 4
|
||||
_abi_test_clobber_r4:
|
||||
mov r4, #0
|
||||
bx lr
|
||||
|
||||
|
||||
.globl _abi_test_clobber_r5
|
||||
.private_extern _abi_test_clobber_r5
|
||||
.align 4
|
||||
_abi_test_clobber_r5:
|
||||
mov r5, #0
|
||||
bx lr
|
||||
|
||||
|
||||
.globl _abi_test_clobber_r6
|
||||
.private_extern _abi_test_clobber_r6
|
||||
.align 4
|
||||
_abi_test_clobber_r6:
|
||||
mov r6, #0
|
||||
bx lr
|
||||
|
||||
|
||||
.globl _abi_test_clobber_r7
|
||||
.private_extern _abi_test_clobber_r7
|
||||
.align 4
|
||||
_abi_test_clobber_r7:
|
||||
mov r7, #0
|
||||
bx lr
|
||||
|
||||
|
||||
.globl _abi_test_clobber_r8
|
||||
.private_extern _abi_test_clobber_r8
|
||||
.align 4
|
||||
_abi_test_clobber_r8:
|
||||
mov r8, #0
|
||||
bx lr
|
||||
|
||||
|
||||
.globl _abi_test_clobber_r9
|
||||
.private_extern _abi_test_clobber_r9
|
||||
.align 4
|
||||
_abi_test_clobber_r9:
|
||||
mov r9, #0
|
||||
bx lr
|
||||
|
||||
|
||||
.globl _abi_test_clobber_r10
|
||||
.private_extern _abi_test_clobber_r10
|
||||
.align 4
|
||||
_abi_test_clobber_r10:
|
||||
mov r10, #0
|
||||
bx lr
|
||||
|
||||
|
||||
.globl _abi_test_clobber_r11
|
||||
.private_extern _abi_test_clobber_r11
|
||||
.align 4
|
||||
_abi_test_clobber_r11:
|
||||
mov r11, #0
|
||||
bx lr
|
||||
|
||||
|
||||
.globl _abi_test_clobber_r12
|
||||
.private_extern _abi_test_clobber_r12
|
||||
.align 4
|
||||
_abi_test_clobber_r12:
|
||||
mov r12, #0
|
||||
bx lr
|
||||
|
||||
|
||||
.globl _abi_test_clobber_d0
|
||||
.private_extern _abi_test_clobber_d0
|
||||
.align 4
|
||||
_abi_test_clobber_d0:
|
||||
mov r0, #0
|
||||
vmov s0, r0
|
||||
vmov s1, r0
|
||||
bx lr
|
||||
|
||||
|
||||
.globl _abi_test_clobber_d1
|
||||
.private_extern _abi_test_clobber_d1
|
||||
.align 4
|
||||
_abi_test_clobber_d1:
|
||||
mov r0, #0
|
||||
vmov s2, r0
|
||||
vmov s3, r0
|
||||
bx lr
|
||||
|
||||
|
||||
.globl _abi_test_clobber_d2
|
||||
.private_extern _abi_test_clobber_d2
|
||||
.align 4
|
||||
_abi_test_clobber_d2:
|
||||
mov r0, #0
|
||||
vmov s4, r0
|
||||
vmov s5, r0
|
||||
bx lr
|
||||
|
||||
|
||||
.globl _abi_test_clobber_d3
|
||||
.private_extern _abi_test_clobber_d3
|
||||
.align 4
|
||||
_abi_test_clobber_d3:
|
||||
mov r0, #0
|
||||
vmov s6, r0
|
||||
vmov s7, r0
|
||||
bx lr
|
||||
|
||||
|
||||
.globl _abi_test_clobber_d4
|
||||
.private_extern _abi_test_clobber_d4
|
||||
.align 4
|
||||
_abi_test_clobber_d4:
|
||||
mov r0, #0
|
||||
vmov s8, r0
|
||||
vmov s9, r0
|
||||
bx lr
|
||||
|
||||
|
||||
.globl _abi_test_clobber_d5
|
||||
.private_extern _abi_test_clobber_d5
|
||||
.align 4
|
||||
_abi_test_clobber_d5:
|
||||
mov r0, #0
|
||||
vmov s10, r0
|
||||
vmov s11, r0
|
||||
bx lr
|
||||
|
||||
|
||||
.globl _abi_test_clobber_d6
|
||||
.private_extern _abi_test_clobber_d6
|
||||
.align 4
|
||||
_abi_test_clobber_d6:
|
||||
mov r0, #0
|
||||
vmov s12, r0
|
||||
vmov s13, r0
|
||||
bx lr
|
||||
|
||||
|
||||
.globl _abi_test_clobber_d7
|
||||
.private_extern _abi_test_clobber_d7
|
||||
.align 4
|
||||
_abi_test_clobber_d7:
|
||||
mov r0, #0
|
||||
vmov s14, r0
|
||||
vmov s15, r0
|
||||
bx lr
|
||||
|
||||
|
||||
.globl _abi_test_clobber_d8
|
||||
.private_extern _abi_test_clobber_d8
|
||||
.align 4
|
||||
_abi_test_clobber_d8:
|
||||
mov r0, #0
|
||||
vmov s16, r0
|
||||
vmov s17, r0
|
||||
bx lr
|
||||
|
||||
|
||||
.globl _abi_test_clobber_d9
|
||||
.private_extern _abi_test_clobber_d9
|
||||
.align 4
|
||||
_abi_test_clobber_d9:
|
||||
mov r0, #0
|
||||
vmov s18, r0
|
||||
vmov s19, r0
|
||||
bx lr
|
||||
|
||||
|
||||
.globl _abi_test_clobber_d10
|
||||
.private_extern _abi_test_clobber_d10
|
||||
.align 4
|
||||
_abi_test_clobber_d10:
|
||||
mov r0, #0
|
||||
vmov s20, r0
|
||||
vmov s21, r0
|
||||
bx lr
|
||||
|
||||
|
||||
.globl _abi_test_clobber_d11
|
||||
.private_extern _abi_test_clobber_d11
|
||||
.align 4
|
||||
_abi_test_clobber_d11:
|
||||
mov r0, #0
|
||||
vmov s22, r0
|
||||
vmov s23, r0
|
||||
bx lr
|
||||
|
||||
|
||||
.globl _abi_test_clobber_d12
|
||||
.private_extern _abi_test_clobber_d12
|
||||
.align 4
|
||||
_abi_test_clobber_d12:
|
||||
mov r0, #0
|
||||
vmov s24, r0
|
||||
vmov s25, r0
|
||||
bx lr
|
||||
|
||||
|
||||
.globl _abi_test_clobber_d13
|
||||
.private_extern _abi_test_clobber_d13
|
||||
.align 4
|
||||
_abi_test_clobber_d13:
|
||||
mov r0, #0
|
||||
vmov s26, r0
|
||||
vmov s27, r0
|
||||
bx lr
|
||||
|
||||
|
||||
.globl _abi_test_clobber_d14
|
||||
.private_extern _abi_test_clobber_d14
|
||||
.align 4
|
||||
_abi_test_clobber_d14:
|
||||
mov r0, #0
|
||||
vmov s28, r0
|
||||
vmov s29, r0
|
||||
bx lr
|
||||
|
||||
|
||||
.globl _abi_test_clobber_d15
|
||||
.private_extern _abi_test_clobber_d15
|
||||
.align 4
|
||||
_abi_test_clobber_d15:
|
||||
mov r0, #0
|
||||
vmov s30, r0
|
||||
vmov s31, r0
|
||||
bx lr
|
||||
|
||||
#endif // !OPENSSL_NO_ASM
|
||||
1968
third-party/boringssl/linux-aarch64/crypto/chacha/chacha-armv8-linux.S
vendored
Normal file
1968
third-party/boringssl/linux-aarch64/crypto/chacha/chacha-armv8-linux.S
vendored
Normal file
File diff suppressed because it is too large
Load Diff
3009
third-party/boringssl/linux-aarch64/crypto/cipher_extra/chacha20_poly1305_armv8-linux.S
vendored
Normal file
3009
third-party/boringssl/linux-aarch64/crypto/cipher_extra/chacha20_poly1305_armv8-linux.S
vendored
Normal file
File diff suppressed because it is too large
Load Diff
791
third-party/boringssl/linux-aarch64/crypto/fipsmodule/aesv8-armv8-linux.S
vendored
Normal file
791
third-party/boringssl/linux-aarch64/crypto/fipsmodule/aesv8-armv8-linux.S
vendored
Normal file
@ -0,0 +1,791 @@
|
||||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#include <openssl/asm_base.h>
|
||||
|
||||
#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_AARCH64) && defined(__ELF__)
|
||||
#include <openssl/arm_arch.h>
|
||||
|
||||
#if __ARM_MAX_ARCH__>=7
|
||||
.text
|
||||
.arch armv8-a+crypto
|
||||
.section .rodata
|
||||
.align 5
|
||||
.Lrcon:
|
||||
.long 0x01,0x01,0x01,0x01
|
||||
.long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d // rotate-n-splat
|
||||
.long 0x1b,0x1b,0x1b,0x1b
|
||||
|
||||
.text
|
||||
|
||||
.globl aes_hw_set_encrypt_key
|
||||
.hidden aes_hw_set_encrypt_key
|
||||
.type aes_hw_set_encrypt_key,%function
|
||||
.align 5
|
||||
aes_hw_set_encrypt_key:
|
||||
.Lenc_key:
|
||||
// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
stp x29,x30,[sp,#-16]!
|
||||
add x29,sp,#0
|
||||
mov x3,#-1
|
||||
cmp x0,#0
|
||||
b.eq .Lenc_key_abort
|
||||
cmp x2,#0
|
||||
b.eq .Lenc_key_abort
|
||||
mov x3,#-2
|
||||
cmp w1,#128
|
||||
b.lt .Lenc_key_abort
|
||||
cmp w1,#256
|
||||
b.gt .Lenc_key_abort
|
||||
tst w1,#0x3f
|
||||
b.ne .Lenc_key_abort
|
||||
|
||||
adrp x3,.Lrcon
|
||||
add x3,x3,:lo12:.Lrcon
|
||||
cmp w1,#192
|
||||
|
||||
eor v0.16b,v0.16b,v0.16b
|
||||
ld1 {v3.16b},[x0],#16
|
||||
mov w1,#8 // reuse w1
|
||||
ld1 {v1.4s,v2.4s},[x3],#32
|
||||
|
||||
b.lt .Loop128
|
||||
b.eq .L192
|
||||
b .L256
|
||||
|
||||
.align 4
|
||||
.Loop128:
|
||||
tbl v6.16b,{v3.16b},v2.16b
|
||||
ext v5.16b,v0.16b,v3.16b,#12
|
||||
st1 {v3.4s},[x2],#16
|
||||
aese v6.16b,v0.16b
|
||||
subs w1,w1,#1
|
||||
|
||||
eor v3.16b,v3.16b,v5.16b
|
||||
ext v5.16b,v0.16b,v5.16b,#12
|
||||
eor v3.16b,v3.16b,v5.16b
|
||||
ext v5.16b,v0.16b,v5.16b,#12
|
||||
eor v6.16b,v6.16b,v1.16b
|
||||
eor v3.16b,v3.16b,v5.16b
|
||||
shl v1.16b,v1.16b,#1
|
||||
eor v3.16b,v3.16b,v6.16b
|
||||
b.ne .Loop128
|
||||
|
||||
ld1 {v1.4s},[x3]
|
||||
|
||||
tbl v6.16b,{v3.16b},v2.16b
|
||||
ext v5.16b,v0.16b,v3.16b,#12
|
||||
st1 {v3.4s},[x2],#16
|
||||
aese v6.16b,v0.16b
|
||||
|
||||
eor v3.16b,v3.16b,v5.16b
|
||||
ext v5.16b,v0.16b,v5.16b,#12
|
||||
eor v3.16b,v3.16b,v5.16b
|
||||
ext v5.16b,v0.16b,v5.16b,#12
|
||||
eor v6.16b,v6.16b,v1.16b
|
||||
eor v3.16b,v3.16b,v5.16b
|
||||
shl v1.16b,v1.16b,#1
|
||||
eor v3.16b,v3.16b,v6.16b
|
||||
|
||||
tbl v6.16b,{v3.16b},v2.16b
|
||||
ext v5.16b,v0.16b,v3.16b,#12
|
||||
st1 {v3.4s},[x2],#16
|
||||
aese v6.16b,v0.16b
|
||||
|
||||
eor v3.16b,v3.16b,v5.16b
|
||||
ext v5.16b,v0.16b,v5.16b,#12
|
||||
eor v3.16b,v3.16b,v5.16b
|
||||
ext v5.16b,v0.16b,v5.16b,#12
|
||||
eor v6.16b,v6.16b,v1.16b
|
||||
eor v3.16b,v3.16b,v5.16b
|
||||
eor v3.16b,v3.16b,v6.16b
|
||||
st1 {v3.4s},[x2]
|
||||
add x2,x2,#0x50
|
||||
|
||||
mov w12,#10
|
||||
b .Ldone
|
||||
|
||||
.align 4
|
||||
.L192:
|
||||
ld1 {v4.8b},[x0],#8
|
||||
movi v6.16b,#8 // borrow v6.16b
|
||||
st1 {v3.4s},[x2],#16
|
||||
sub v2.16b,v2.16b,v6.16b // adjust the mask
|
||||
|
||||
.Loop192:
|
||||
tbl v6.16b,{v4.16b},v2.16b
|
||||
ext v5.16b,v0.16b,v3.16b,#12
|
||||
st1 {v4.8b},[x2],#8
|
||||
aese v6.16b,v0.16b
|
||||
subs w1,w1,#1
|
||||
|
||||
eor v3.16b,v3.16b,v5.16b
|
||||
ext v5.16b,v0.16b,v5.16b,#12
|
||||
eor v3.16b,v3.16b,v5.16b
|
||||
ext v5.16b,v0.16b,v5.16b,#12
|
||||
eor v3.16b,v3.16b,v5.16b
|
||||
|
||||
dup v5.4s,v3.s[3]
|
||||
eor v5.16b,v5.16b,v4.16b
|
||||
eor v6.16b,v6.16b,v1.16b
|
||||
ext v4.16b,v0.16b,v4.16b,#12
|
||||
shl v1.16b,v1.16b,#1
|
||||
eor v4.16b,v4.16b,v5.16b
|
||||
eor v3.16b,v3.16b,v6.16b
|
||||
eor v4.16b,v4.16b,v6.16b
|
||||
st1 {v3.4s},[x2],#16
|
||||
b.ne .Loop192
|
||||
|
||||
mov w12,#12
|
||||
add x2,x2,#0x20
|
||||
b .Ldone
|
||||
|
||||
.align 4
|
||||
.L256:
|
||||
ld1 {v4.16b},[x0]
|
||||
mov w1,#7
|
||||
mov w12,#14
|
||||
st1 {v3.4s},[x2],#16
|
||||
|
||||
.Loop256:
|
||||
tbl v6.16b,{v4.16b},v2.16b
|
||||
ext v5.16b,v0.16b,v3.16b,#12
|
||||
st1 {v4.4s},[x2],#16
|
||||
aese v6.16b,v0.16b
|
||||
subs w1,w1,#1
|
||||
|
||||
eor v3.16b,v3.16b,v5.16b
|
||||
ext v5.16b,v0.16b,v5.16b,#12
|
||||
eor v3.16b,v3.16b,v5.16b
|
||||
ext v5.16b,v0.16b,v5.16b,#12
|
||||
eor v6.16b,v6.16b,v1.16b
|
||||
eor v3.16b,v3.16b,v5.16b
|
||||
shl v1.16b,v1.16b,#1
|
||||
eor v3.16b,v3.16b,v6.16b
|
||||
st1 {v3.4s},[x2],#16
|
||||
b.eq .Ldone
|
||||
|
||||
dup v6.4s,v3.s[3] // just splat
|
||||
ext v5.16b,v0.16b,v4.16b,#12
|
||||
aese v6.16b,v0.16b
|
||||
|
||||
eor v4.16b,v4.16b,v5.16b
|
||||
ext v5.16b,v0.16b,v5.16b,#12
|
||||
eor v4.16b,v4.16b,v5.16b
|
||||
ext v5.16b,v0.16b,v5.16b,#12
|
||||
eor v4.16b,v4.16b,v5.16b
|
||||
|
||||
eor v4.16b,v4.16b,v6.16b
|
||||
b .Loop256
|
||||
|
||||
.Ldone:
|
||||
str w12,[x2]
|
||||
mov x3,#0
|
||||
|
||||
.Lenc_key_abort:
|
||||
mov x0,x3 // return value
|
||||
ldr x29,[sp],#16
|
||||
ret
|
||||
.size aes_hw_set_encrypt_key,.-aes_hw_set_encrypt_key
|
||||
|
||||
.globl aes_hw_set_decrypt_key
|
||||
.hidden aes_hw_set_decrypt_key
|
||||
.type aes_hw_set_decrypt_key,%function
|
||||
.align 5
|
||||
aes_hw_set_decrypt_key:
|
||||
AARCH64_SIGN_LINK_REGISTER
|
||||
stp x29,x30,[sp,#-16]!
|
||||
add x29,sp,#0
|
||||
bl .Lenc_key
|
||||
|
||||
cmp x0,#0
|
||||
b.ne .Ldec_key_abort
|
||||
|
||||
sub x2,x2,#240 // restore original x2
|
||||
mov x4,#-16
|
||||
add x0,x2,x12,lsl#4 // end of key schedule
|
||||
|
||||
ld1 {v0.4s},[x2]
|
||||
ld1 {v1.4s},[x0]
|
||||
st1 {v0.4s},[x0],x4
|
||||
st1 {v1.4s},[x2],#16
|
||||
|
||||
.Loop_imc:
|
||||
ld1 {v0.4s},[x2]
|
||||
ld1 {v1.4s},[x0]
|
||||
aesimc v0.16b,v0.16b
|
||||
aesimc v1.16b,v1.16b
|
||||
st1 {v0.4s},[x0],x4
|
||||
st1 {v1.4s},[x2],#16
|
||||
cmp x0,x2
|
||||
b.hi .Loop_imc
|
||||
|
||||
ld1 {v0.4s},[x2]
|
||||
aesimc v0.16b,v0.16b
|
||||
st1 {v0.4s},[x0]
|
||||
|
||||
eor x0,x0,x0 // return value
|
||||
.Ldec_key_abort:
|
||||
ldp x29,x30,[sp],#16
|
||||
AARCH64_VALIDATE_LINK_REGISTER
|
||||
ret
|
||||
.size aes_hw_set_decrypt_key,.-aes_hw_set_decrypt_key
|
||||
.globl aes_hw_encrypt
|
||||
.hidden aes_hw_encrypt
|
||||
.type aes_hw_encrypt,%function
|
||||
.align 5
|
||||
aes_hw_encrypt:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
ldr w3,[x2,#240]
|
||||
ld1 {v0.4s},[x2],#16
|
||||
ld1 {v2.16b},[x0]
|
||||
sub w3,w3,#2
|
||||
ld1 {v1.4s},[x2],#16
|
||||
|
||||
.Loop_enc:
|
||||
aese v2.16b,v0.16b
|
||||
aesmc v2.16b,v2.16b
|
||||
ld1 {v0.4s},[x2],#16
|
||||
subs w3,w3,#2
|
||||
aese v2.16b,v1.16b
|
||||
aesmc v2.16b,v2.16b
|
||||
ld1 {v1.4s},[x2],#16
|
||||
b.gt .Loop_enc
|
||||
|
||||
aese v2.16b,v0.16b
|
||||
aesmc v2.16b,v2.16b
|
||||
ld1 {v0.4s},[x2]
|
||||
aese v2.16b,v1.16b
|
||||
eor v2.16b,v2.16b,v0.16b
|
||||
|
||||
st1 {v2.16b},[x1]
|
||||
ret
|
||||
.size aes_hw_encrypt,.-aes_hw_encrypt
|
||||
.globl aes_hw_decrypt
|
||||
.hidden aes_hw_decrypt
|
||||
.type aes_hw_decrypt,%function
|
||||
.align 5
|
||||
aes_hw_decrypt:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
ldr w3,[x2,#240]
|
||||
ld1 {v0.4s},[x2],#16
|
||||
ld1 {v2.16b},[x0]
|
||||
sub w3,w3,#2
|
||||
ld1 {v1.4s},[x2],#16
|
||||
|
||||
.Loop_dec:
|
||||
aesd v2.16b,v0.16b
|
||||
aesimc v2.16b,v2.16b
|
||||
ld1 {v0.4s},[x2],#16
|
||||
subs w3,w3,#2
|
||||
aesd v2.16b,v1.16b
|
||||
aesimc v2.16b,v2.16b
|
||||
ld1 {v1.4s},[x2],#16
|
||||
b.gt .Loop_dec
|
||||
|
||||
aesd v2.16b,v0.16b
|
||||
aesimc v2.16b,v2.16b
|
||||
ld1 {v0.4s},[x2]
|
||||
aesd v2.16b,v1.16b
|
||||
eor v2.16b,v2.16b,v0.16b
|
||||
|
||||
st1 {v2.16b},[x1]
|
||||
ret
|
||||
.size aes_hw_decrypt,.-aes_hw_decrypt
|
||||
.globl aes_hw_cbc_encrypt
|
||||
.hidden aes_hw_cbc_encrypt
|
||||
.type aes_hw_cbc_encrypt,%function
|
||||
.align 5
|
||||
aes_hw_cbc_encrypt:
|
||||
// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
stp x29,x30,[sp,#-16]!
|
||||
add x29,sp,#0
|
||||
subs x2,x2,#16
|
||||
mov x8,#16
|
||||
b.lo .Lcbc_abort
|
||||
csel x8,xzr,x8,eq
|
||||
|
||||
cmp w5,#0 // en- or decrypting?
|
||||
ldr w5,[x3,#240]
|
||||
and x2,x2,#-16
|
||||
ld1 {v6.16b},[x4]
|
||||
ld1 {v0.16b},[x0],x8
|
||||
|
||||
ld1 {v16.4s,v17.4s},[x3] // load key schedule...
|
||||
sub w5,w5,#6
|
||||
add x7,x3,x5,lsl#4 // pointer to last 7 round keys
|
||||
sub w5,w5,#2
|
||||
ld1 {v18.4s,v19.4s},[x7],#32
|
||||
ld1 {v20.4s,v21.4s},[x7],#32
|
||||
ld1 {v22.4s,v23.4s},[x7],#32
|
||||
ld1 {v7.4s},[x7]
|
||||
|
||||
add x7,x3,#32
|
||||
mov w6,w5
|
||||
b.eq .Lcbc_dec
|
||||
|
||||
cmp w5,#2
|
||||
eor v0.16b,v0.16b,v6.16b
|
||||
eor v5.16b,v16.16b,v7.16b
|
||||
b.eq .Lcbc_enc128
|
||||
|
||||
ld1 {v2.4s,v3.4s},[x7]
|
||||
add x7,x3,#16
|
||||
add x6,x3,#16*4
|
||||
add x12,x3,#16*5
|
||||
aese v0.16b,v16.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
add x14,x3,#16*6
|
||||
add x3,x3,#16*7
|
||||
b .Lenter_cbc_enc
|
||||
|
||||
.align 4
|
||||
.Loop_cbc_enc:
|
||||
aese v0.16b,v16.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
st1 {v6.16b},[x1],#16
|
||||
.Lenter_cbc_enc:
|
||||
aese v0.16b,v17.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
aese v0.16b,v2.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
ld1 {v16.4s},[x6]
|
||||
cmp w5,#4
|
||||
aese v0.16b,v3.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
ld1 {v17.4s},[x12]
|
||||
b.eq .Lcbc_enc192
|
||||
|
||||
aese v0.16b,v16.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
ld1 {v16.4s},[x14]
|
||||
aese v0.16b,v17.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
ld1 {v17.4s},[x3]
|
||||
nop
|
||||
|
||||
.Lcbc_enc192:
|
||||
aese v0.16b,v16.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
subs x2,x2,#16
|
||||
aese v0.16b,v17.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
csel x8,xzr,x8,eq
|
||||
aese v0.16b,v18.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
aese v0.16b,v19.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
ld1 {v16.16b},[x0],x8
|
||||
aese v0.16b,v20.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
eor v16.16b,v16.16b,v5.16b
|
||||
aese v0.16b,v21.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
ld1 {v17.4s},[x7] // re-pre-load rndkey[1]
|
||||
aese v0.16b,v22.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
aese v0.16b,v23.16b
|
||||
eor v6.16b,v0.16b,v7.16b
|
||||
b.hs .Loop_cbc_enc
|
||||
|
||||
st1 {v6.16b},[x1],#16
|
||||
b .Lcbc_done
|
||||
|
||||
.align 5
|
||||
.Lcbc_enc128:
|
||||
ld1 {v2.4s,v3.4s},[x7]
|
||||
aese v0.16b,v16.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
b .Lenter_cbc_enc128
|
||||
.Loop_cbc_enc128:
|
||||
aese v0.16b,v16.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
st1 {v6.16b},[x1],#16
|
||||
.Lenter_cbc_enc128:
|
||||
aese v0.16b,v17.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
subs x2,x2,#16
|
||||
aese v0.16b,v2.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
csel x8,xzr,x8,eq
|
||||
aese v0.16b,v3.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
aese v0.16b,v18.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
aese v0.16b,v19.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
ld1 {v16.16b},[x0],x8
|
||||
aese v0.16b,v20.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
aese v0.16b,v21.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
aese v0.16b,v22.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
eor v16.16b,v16.16b,v5.16b
|
||||
aese v0.16b,v23.16b
|
||||
eor v6.16b,v0.16b,v7.16b
|
||||
b.hs .Loop_cbc_enc128
|
||||
|
||||
st1 {v6.16b},[x1],#16
|
||||
b .Lcbc_done
|
||||
.align 5
|
||||
.Lcbc_dec:
|
||||
ld1 {v18.16b},[x0],#16
|
||||
subs x2,x2,#32 // bias
|
||||
add w6,w5,#2
|
||||
orr v3.16b,v0.16b,v0.16b
|
||||
orr v1.16b,v0.16b,v0.16b
|
||||
orr v19.16b,v18.16b,v18.16b
|
||||
b.lo .Lcbc_dec_tail
|
||||
|
||||
orr v1.16b,v18.16b,v18.16b
|
||||
ld1 {v18.16b},[x0],#16
|
||||
orr v2.16b,v0.16b,v0.16b
|
||||
orr v3.16b,v1.16b,v1.16b
|
||||
orr v19.16b,v18.16b,v18.16b
|
||||
|
||||
.Loop3x_cbc_dec:
|
||||
aesd v0.16b,v16.16b
|
||||
aesimc v0.16b,v0.16b
|
||||
aesd v1.16b,v16.16b
|
||||
aesimc v1.16b,v1.16b
|
||||
aesd v18.16b,v16.16b
|
||||
aesimc v18.16b,v18.16b
|
||||
ld1 {v16.4s},[x7],#16
|
||||
subs w6,w6,#2
|
||||
aesd v0.16b,v17.16b
|
||||
aesimc v0.16b,v0.16b
|
||||
aesd v1.16b,v17.16b
|
||||
aesimc v1.16b,v1.16b
|
||||
aesd v18.16b,v17.16b
|
||||
aesimc v18.16b,v18.16b
|
||||
ld1 {v17.4s},[x7],#16
|
||||
b.gt .Loop3x_cbc_dec
|
||||
|
||||
aesd v0.16b,v16.16b
|
||||
aesimc v0.16b,v0.16b
|
||||
aesd v1.16b,v16.16b
|
||||
aesimc v1.16b,v1.16b
|
||||
aesd v18.16b,v16.16b
|
||||
aesimc v18.16b,v18.16b
|
||||
eor v4.16b,v6.16b,v7.16b
|
||||
subs x2,x2,#0x30
|
||||
eor v5.16b,v2.16b,v7.16b
|
||||
csel x6,x2,x6,lo // x6, w6, is zero at this point
|
||||
aesd v0.16b,v17.16b
|
||||
aesimc v0.16b,v0.16b
|
||||
aesd v1.16b,v17.16b
|
||||
aesimc v1.16b,v1.16b
|
||||
aesd v18.16b,v17.16b
|
||||
aesimc v18.16b,v18.16b
|
||||
eor v17.16b,v3.16b,v7.16b
|
||||
add x0,x0,x6 // x0 is adjusted in such way that
|
||||
// at exit from the loop v1.16b-v18.16b
|
||||
// are loaded with last "words"
|
||||
orr v6.16b,v19.16b,v19.16b
|
||||
mov x7,x3
|
||||
aesd v0.16b,v20.16b
|
||||
aesimc v0.16b,v0.16b
|
||||
aesd v1.16b,v20.16b
|
||||
aesimc v1.16b,v1.16b
|
||||
aesd v18.16b,v20.16b
|
||||
aesimc v18.16b,v18.16b
|
||||
ld1 {v2.16b},[x0],#16
|
||||
aesd v0.16b,v21.16b
|
||||
aesimc v0.16b,v0.16b
|
||||
aesd v1.16b,v21.16b
|
||||
aesimc v1.16b,v1.16b
|
||||
aesd v18.16b,v21.16b
|
||||
aesimc v18.16b,v18.16b
|
||||
ld1 {v3.16b},[x0],#16
|
||||
aesd v0.16b,v22.16b
|
||||
aesimc v0.16b,v0.16b
|
||||
aesd v1.16b,v22.16b
|
||||
aesimc v1.16b,v1.16b
|
||||
aesd v18.16b,v22.16b
|
||||
aesimc v18.16b,v18.16b
|
||||
ld1 {v19.16b},[x0],#16
|
||||
aesd v0.16b,v23.16b
|
||||
aesd v1.16b,v23.16b
|
||||
aesd v18.16b,v23.16b
|
||||
ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0]
|
||||
add w6,w5,#2
|
||||
eor v4.16b,v4.16b,v0.16b
|
||||
eor v5.16b,v5.16b,v1.16b
|
||||
eor v18.16b,v18.16b,v17.16b
|
||||
ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1]
|
||||
st1 {v4.16b},[x1],#16
|
||||
orr v0.16b,v2.16b,v2.16b
|
||||
st1 {v5.16b},[x1],#16
|
||||
orr v1.16b,v3.16b,v3.16b
|
||||
st1 {v18.16b},[x1],#16
|
||||
orr v18.16b,v19.16b,v19.16b
|
||||
b.hs .Loop3x_cbc_dec
|
||||
|
||||
cmn x2,#0x30
|
||||
b.eq .Lcbc_done
|
||||
nop
|
||||
|
||||
.Lcbc_dec_tail:
|
||||
aesd v1.16b,v16.16b
|
||||
aesimc v1.16b,v1.16b
|
||||
aesd v18.16b,v16.16b
|
||||
aesimc v18.16b,v18.16b
|
||||
ld1 {v16.4s},[x7],#16
|
||||
subs w6,w6,#2
|
||||
aesd v1.16b,v17.16b
|
||||
aesimc v1.16b,v1.16b
|
||||
aesd v18.16b,v17.16b
|
||||
aesimc v18.16b,v18.16b
|
||||
ld1 {v17.4s},[x7],#16
|
||||
b.gt .Lcbc_dec_tail
|
||||
|
||||
aesd v1.16b,v16.16b
|
||||
aesimc v1.16b,v1.16b
|
||||
aesd v18.16b,v16.16b
|
||||
aesimc v18.16b,v18.16b
|
||||
aesd v1.16b,v17.16b
|
||||
aesimc v1.16b,v1.16b
|
||||
aesd v18.16b,v17.16b
|
||||
aesimc v18.16b,v18.16b
|
||||
aesd v1.16b,v20.16b
|
||||
aesimc v1.16b,v1.16b
|
||||
aesd v18.16b,v20.16b
|
||||
aesimc v18.16b,v18.16b
|
||||
cmn x2,#0x20
|
||||
aesd v1.16b,v21.16b
|
||||
aesimc v1.16b,v1.16b
|
||||
aesd v18.16b,v21.16b
|
||||
aesimc v18.16b,v18.16b
|
||||
eor v5.16b,v6.16b,v7.16b
|
||||
aesd v1.16b,v22.16b
|
||||
aesimc v1.16b,v1.16b
|
||||
aesd v18.16b,v22.16b
|
||||
aesimc v18.16b,v18.16b
|
||||
eor v17.16b,v3.16b,v7.16b
|
||||
aesd v1.16b,v23.16b
|
||||
aesd v18.16b,v23.16b
|
||||
b.eq .Lcbc_dec_one
|
||||
eor v5.16b,v5.16b,v1.16b
|
||||
eor v17.16b,v17.16b,v18.16b
|
||||
orr v6.16b,v19.16b,v19.16b
|
||||
st1 {v5.16b},[x1],#16
|
||||
st1 {v17.16b},[x1],#16
|
||||
b .Lcbc_done
|
||||
|
||||
.Lcbc_dec_one:
|
||||
eor v5.16b,v5.16b,v18.16b
|
||||
orr v6.16b,v19.16b,v19.16b
|
||||
st1 {v5.16b},[x1],#16
|
||||
|
||||
.Lcbc_done:
|
||||
st1 {v6.16b},[x4]
|
||||
.Lcbc_abort:
|
||||
ldr x29,[sp],#16
|
||||
ret
|
||||
.size aes_hw_cbc_encrypt,.-aes_hw_cbc_encrypt
|
||||
.globl aes_hw_ctr32_encrypt_blocks
|
||||
.hidden aes_hw_ctr32_encrypt_blocks
|
||||
.type aes_hw_ctr32_encrypt_blocks,%function
|
||||
.align 5
|
||||
aes_hw_ctr32_encrypt_blocks:
|
||||
// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
stp x29,x30,[sp,#-16]!
|
||||
add x29,sp,#0
|
||||
ldr w5,[x3,#240]
|
||||
|
||||
ldr w8, [x4, #12]
|
||||
ld1 {v0.4s},[x4]
|
||||
|
||||
ld1 {v16.4s,v17.4s},[x3] // load key schedule...
|
||||
sub w5,w5,#4
|
||||
mov x12,#16
|
||||
cmp x2,#2
|
||||
add x7,x3,x5,lsl#4 // pointer to last 5 round keys
|
||||
sub w5,w5,#2
|
||||
ld1 {v20.4s,v21.4s},[x7],#32
|
||||
ld1 {v22.4s,v23.4s},[x7],#32
|
||||
ld1 {v7.4s},[x7]
|
||||
add x7,x3,#32
|
||||
mov w6,w5
|
||||
csel x12,xzr,x12,lo
|
||||
|
||||
// ARM Cortex-A57 and Cortex-A72 cores running in 32-bit mode are
|
||||
// affected by silicon errata #1742098 [0] and #1655431 [1],
|
||||
// respectively, where the second instruction of an aese/aesmc
|
||||
// instruction pair may execute twice if an interrupt is taken right
|
||||
// after the first instruction consumes an input register of which a
|
||||
// single 32-bit lane has been updated the last time it was modified.
|
||||
//
|
||||
// This function uses a counter in one 32-bit lane. The vmov lines
|
||||
// could write to v1.16b and v18.16b directly, but that trips this bugs.
|
||||
// We write to v6.16b and copy to the final register as a workaround.
|
||||
//
|
||||
// [0] ARM-EPM-049219 v23 Cortex-A57 MPCore Software Developers Errata Notice
|
||||
// [1] ARM-EPM-012079 v11.0 Cortex-A72 MPCore Software Developers Errata Notice
|
||||
#ifndef __AARCH64EB__
|
||||
rev w8, w8
|
||||
#endif
|
||||
add w10, w8, #1
|
||||
orr v6.16b,v0.16b,v0.16b
|
||||
rev w10, w10
|
||||
mov v6.s[3],w10
|
||||
add w8, w8, #2
|
||||
orr v1.16b,v6.16b,v6.16b
|
||||
b.ls .Lctr32_tail
|
||||
rev w12, w8
|
||||
mov v6.s[3],w12
|
||||
sub x2,x2,#3 // bias
|
||||
orr v18.16b,v6.16b,v6.16b
|
||||
b .Loop3x_ctr32
|
||||
|
||||
.align 4
|
||||
.Loop3x_ctr32:
|
||||
aese v0.16b,v16.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
aese v1.16b,v16.16b
|
||||
aesmc v1.16b,v1.16b
|
||||
aese v18.16b,v16.16b
|
||||
aesmc v18.16b,v18.16b
|
||||
ld1 {v16.4s},[x7],#16
|
||||
subs w6,w6,#2
|
||||
aese v0.16b,v17.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
aese v1.16b,v17.16b
|
||||
aesmc v1.16b,v1.16b
|
||||
aese v18.16b,v17.16b
|
||||
aesmc v18.16b,v18.16b
|
||||
ld1 {v17.4s},[x7],#16
|
||||
b.gt .Loop3x_ctr32
|
||||
|
||||
aese v0.16b,v16.16b
|
||||
aesmc v4.16b,v0.16b
|
||||
aese v1.16b,v16.16b
|
||||
aesmc v5.16b,v1.16b
|
||||
ld1 {v2.16b},[x0],#16
|
||||
add w9,w8,#1
|
||||
aese v18.16b,v16.16b
|
||||
aesmc v18.16b,v18.16b
|
||||
ld1 {v3.16b},[x0],#16
|
||||
rev w9,w9
|
||||
aese v4.16b,v17.16b
|
||||
aesmc v4.16b,v4.16b
|
||||
aese v5.16b,v17.16b
|
||||
aesmc v5.16b,v5.16b
|
||||
ld1 {v19.16b},[x0],#16
|
||||
mov x7,x3
|
||||
aese v18.16b,v17.16b
|
||||
aesmc v17.16b,v18.16b
|
||||
aese v4.16b,v20.16b
|
||||
aesmc v4.16b,v4.16b
|
||||
aese v5.16b,v20.16b
|
||||
aesmc v5.16b,v5.16b
|
||||
eor v2.16b,v2.16b,v7.16b
|
||||
add w10,w8,#2
|
||||
aese v17.16b,v20.16b
|
||||
aesmc v17.16b,v17.16b
|
||||
eor v3.16b,v3.16b,v7.16b
|
||||
add w8,w8,#3
|
||||
aese v4.16b,v21.16b
|
||||
aesmc v4.16b,v4.16b
|
||||
aese v5.16b,v21.16b
|
||||
aesmc v5.16b,v5.16b
|
||||
// Note the logic to update v0.16b, v1.16b, and v1.16b is written to work
|
||||
// around a bug in ARM Cortex-A57 and Cortex-A72 cores running in
|
||||
// 32-bit mode. See the comment above.
|
||||
eor v19.16b,v19.16b,v7.16b
|
||||
mov v6.s[3], w9
|
||||
aese v17.16b,v21.16b
|
||||
aesmc v17.16b,v17.16b
|
||||
orr v0.16b,v6.16b,v6.16b
|
||||
rev w10,w10
|
||||
aese v4.16b,v22.16b
|
||||
aesmc v4.16b,v4.16b
|
||||
mov v6.s[3], w10
|
||||
rev w12,w8
|
||||
aese v5.16b,v22.16b
|
||||
aesmc v5.16b,v5.16b
|
||||
orr v1.16b,v6.16b,v6.16b
|
||||
mov v6.s[3], w12
|
||||
aese v17.16b,v22.16b
|
||||
aesmc v17.16b,v17.16b
|
||||
orr v18.16b,v6.16b,v6.16b
|
||||
subs x2,x2,#3
|
||||
aese v4.16b,v23.16b
|
||||
aese v5.16b,v23.16b
|
||||
aese v17.16b,v23.16b
|
||||
|
||||
eor v2.16b,v2.16b,v4.16b
|
||||
ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0]
|
||||
st1 {v2.16b},[x1],#16
|
||||
eor v3.16b,v3.16b,v5.16b
|
||||
mov w6,w5
|
||||
st1 {v3.16b},[x1],#16
|
||||
eor v19.16b,v19.16b,v17.16b
|
||||
ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1]
|
||||
st1 {v19.16b},[x1],#16
|
||||
b.hs .Loop3x_ctr32
|
||||
|
||||
adds x2,x2,#3
|
||||
b.eq .Lctr32_done
|
||||
cmp x2,#1
|
||||
mov x12,#16
|
||||
csel x12,xzr,x12,eq
|
||||
|
||||
.Lctr32_tail:
|
||||
aese v0.16b,v16.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
aese v1.16b,v16.16b
|
||||
aesmc v1.16b,v1.16b
|
||||
ld1 {v16.4s},[x7],#16
|
||||
subs w6,w6,#2
|
||||
aese v0.16b,v17.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
aese v1.16b,v17.16b
|
||||
aesmc v1.16b,v1.16b
|
||||
ld1 {v17.4s},[x7],#16
|
||||
b.gt .Lctr32_tail
|
||||
|
||||
aese v0.16b,v16.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
aese v1.16b,v16.16b
|
||||
aesmc v1.16b,v1.16b
|
||||
aese v0.16b,v17.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
aese v1.16b,v17.16b
|
||||
aesmc v1.16b,v1.16b
|
||||
ld1 {v2.16b},[x0],x12
|
||||
aese v0.16b,v20.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
aese v1.16b,v20.16b
|
||||
aesmc v1.16b,v1.16b
|
||||
ld1 {v3.16b},[x0]
|
||||
aese v0.16b,v21.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
aese v1.16b,v21.16b
|
||||
aesmc v1.16b,v1.16b
|
||||
eor v2.16b,v2.16b,v7.16b
|
||||
aese v0.16b,v22.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
aese v1.16b,v22.16b
|
||||
aesmc v1.16b,v1.16b
|
||||
eor v3.16b,v3.16b,v7.16b
|
||||
aese v0.16b,v23.16b
|
||||
aese v1.16b,v23.16b
|
||||
|
||||
cmp x2,#1
|
||||
eor v2.16b,v2.16b,v0.16b
|
||||
eor v3.16b,v3.16b,v1.16b
|
||||
st1 {v2.16b},[x1],#16
|
||||
b.eq .Lctr32_done
|
||||
st1 {v3.16b},[x1]
|
||||
|
||||
.Lctr32_done:
|
||||
ldr x29,[sp],#16
|
||||
ret
|
||||
.size aes_hw_ctr32_encrypt_blocks,.-aes_hw_ctr32_encrypt_blocks
|
||||
#endif
|
||||
#endif // !OPENSSL_NO_ASM && defined(OPENSSL_AARCH64) && defined(__ELF__)
|
||||
1555
third-party/boringssl/linux-aarch64/crypto/fipsmodule/aesv8-gcm-armv8-linux.S
vendored
Normal file
1555
third-party/boringssl/linux-aarch64/crypto/fipsmodule/aesv8-gcm-armv8-linux.S
vendored
Normal file
File diff suppressed because it is too large
Load Diff
1425
third-party/boringssl/linux-aarch64/crypto/fipsmodule/armv8-mont-linux.S
vendored
Normal file
1425
third-party/boringssl/linux-aarch64/crypto/fipsmodule/armv8-mont-linux.S
vendored
Normal file
File diff suppressed because it is too large
Load Diff
89
third-party/boringssl/linux-aarch64/crypto/fipsmodule/bn-armv8-linux.S
vendored
Normal file
89
third-party/boringssl/linux-aarch64/crypto/fipsmodule/bn-armv8-linux.S
vendored
Normal file
@ -0,0 +1,89 @@
|
||||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#include <openssl/asm_base.h>
|
||||
|
||||
#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_AARCH64) && defined(__ELF__)
|
||||
#include <openssl/arm_arch.h>
|
||||
|
||||
.text
|
||||
|
||||
// BN_ULONG bn_add_words(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
|
||||
// size_t num);
|
||||
.type bn_add_words, %function
|
||||
.globl bn_add_words
|
||||
.hidden bn_add_words
|
||||
.align 4
|
||||
bn_add_words:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
# Clear the carry flag.
|
||||
cmn xzr, xzr
|
||||
|
||||
# aarch64 can load two registers at a time, so we do two loop iterations at
|
||||
# at a time. Split x3 = 2 * x8 + x3. This allows loop
|
||||
# operations to use CBNZ without clobbering the carry flag.
|
||||
lsr x8, x3, #1
|
||||
and x3, x3, #1
|
||||
|
||||
cbz x8, .Ladd_tail
|
||||
.Ladd_loop:
|
||||
ldp x4, x5, [x1], #16
|
||||
ldp x6, x7, [x2], #16
|
||||
sub x8, x8, #1
|
||||
adcs x4, x4, x6
|
||||
adcs x5, x5, x7
|
||||
stp x4, x5, [x0], #16
|
||||
cbnz x8, .Ladd_loop
|
||||
|
||||
.Ladd_tail:
|
||||
cbz x3, .Ladd_exit
|
||||
ldr x4, [x1], #8
|
||||
ldr x6, [x2], #8
|
||||
adcs x4, x4, x6
|
||||
str x4, [x0], #8
|
||||
|
||||
.Ladd_exit:
|
||||
cset x0, cs
|
||||
ret
|
||||
.size bn_add_words,.-bn_add_words
|
||||
|
||||
// BN_ULONG bn_sub_words(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
|
||||
// size_t num);
|
||||
.type bn_sub_words, %function
|
||||
.globl bn_sub_words
|
||||
.hidden bn_sub_words
|
||||
.align 4
|
||||
bn_sub_words:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
# Set the carry flag. Arm's borrow bit is flipped from the carry flag,
|
||||
# so we want C = 1 here.
|
||||
cmp xzr, xzr
|
||||
|
||||
# aarch64 can load two registers at a time, so we do two loop iterations at
|
||||
# at a time. Split x3 = 2 * x8 + x3. This allows loop
|
||||
# operations to use CBNZ without clobbering the carry flag.
|
||||
lsr x8, x3, #1
|
||||
and x3, x3, #1
|
||||
|
||||
cbz x8, .Lsub_tail
|
||||
.Lsub_loop:
|
||||
ldp x4, x5, [x1], #16
|
||||
ldp x6, x7, [x2], #16
|
||||
sub x8, x8, #1
|
||||
sbcs x4, x4, x6
|
||||
sbcs x5, x5, x7
|
||||
stp x4, x5, [x0], #16
|
||||
cbnz x8, .Lsub_loop
|
||||
|
||||
.Lsub_tail:
|
||||
cbz x3, .Lsub_exit
|
||||
ldr x4, [x1], #8
|
||||
ldr x6, [x2], #8
|
||||
sbcs x4, x4, x6
|
||||
str x4, [x0], #8
|
||||
|
||||
.Lsub_exit:
|
||||
cset x0, cc
|
||||
ret
|
||||
.size bn_sub_words,.-bn_sub_words
|
||||
#endif // !OPENSSL_NO_ASM && defined(OPENSSL_AARCH64) && defined(__ELF__)
|
||||
335
third-party/boringssl/linux-aarch64/crypto/fipsmodule/ghash-neon-armv8-linux.S
vendored
Normal file
335
third-party/boringssl/linux-aarch64/crypto/fipsmodule/ghash-neon-armv8-linux.S
vendored
Normal file
@ -0,0 +1,335 @@
|
||||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#include <openssl/asm_base.h>
|
||||
|
||||
#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_AARCH64) && defined(__ELF__)
|
||||
#include <openssl/arm_arch.h>
|
||||
|
||||
.text
|
||||
|
||||
.globl gcm_init_neon
|
||||
.hidden gcm_init_neon
|
||||
.type gcm_init_neon,%function
|
||||
.align 4
|
||||
gcm_init_neon:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
// This function is adapted from gcm_init_v8. xC2 is t3.
|
||||
ld1 {v17.2d}, [x1] // load H
|
||||
movi v19.16b, #0xe1
|
||||
shl v19.2d, v19.2d, #57 // 0xc2.0
|
||||
ext v3.16b, v17.16b, v17.16b, #8
|
||||
ushr v18.2d, v19.2d, #63
|
||||
dup v17.4s, v17.s[1]
|
||||
ext v16.16b, v18.16b, v19.16b, #8 // t0=0xc2....01
|
||||
ushr v18.2d, v3.2d, #63
|
||||
sshr v17.4s, v17.4s, #31 // broadcast carry bit
|
||||
and v18.16b, v18.16b, v16.16b
|
||||
shl v3.2d, v3.2d, #1
|
||||
ext v18.16b, v18.16b, v18.16b, #8
|
||||
and v16.16b, v16.16b, v17.16b
|
||||
orr v3.16b, v3.16b, v18.16b // H<<<=1
|
||||
eor v5.16b, v3.16b, v16.16b // twisted H
|
||||
st1 {v5.2d}, [x0] // store Htable[0]
|
||||
ret
|
||||
.size gcm_init_neon,.-gcm_init_neon
|
||||
|
||||
.globl gcm_gmult_neon
|
||||
.hidden gcm_gmult_neon
|
||||
.type gcm_gmult_neon,%function
|
||||
.align 4
|
||||
gcm_gmult_neon:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
ld1 {v3.16b}, [x0] // load Xi
|
||||
ld1 {v5.1d}, [x1], #8 // load twisted H
|
||||
ld1 {v6.1d}, [x1]
|
||||
adrp x9, .Lmasks // load constants
|
||||
add x9, x9, :lo12:.Lmasks
|
||||
ld1 {v24.2d, v25.2d}, [x9]
|
||||
rev64 v3.16b, v3.16b // byteswap Xi
|
||||
ext v3.16b, v3.16b, v3.16b, #8
|
||||
eor v7.8b, v5.8b, v6.8b // Karatsuba pre-processing
|
||||
|
||||
mov x3, #16
|
||||
b .Lgmult_neon
|
||||
.size gcm_gmult_neon,.-gcm_gmult_neon
|
||||
|
||||
.globl gcm_ghash_neon
|
||||
.hidden gcm_ghash_neon
|
||||
.type gcm_ghash_neon,%function
|
||||
.align 4
|
||||
gcm_ghash_neon:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
ld1 {v0.16b}, [x0] // load Xi
|
||||
ld1 {v5.1d}, [x1], #8 // load twisted H
|
||||
ld1 {v6.1d}, [x1]
|
||||
adrp x9, .Lmasks // load constants
|
||||
add x9, x9, :lo12:.Lmasks
|
||||
ld1 {v24.2d, v25.2d}, [x9]
|
||||
rev64 v0.16b, v0.16b // byteswap Xi
|
||||
ext v0.16b, v0.16b, v0.16b, #8
|
||||
eor v7.8b, v5.8b, v6.8b // Karatsuba pre-processing
|
||||
|
||||
.Loop_neon:
|
||||
ld1 {v3.16b}, [x2], #16 // load inp
|
||||
rev64 v3.16b, v3.16b // byteswap inp
|
||||
ext v3.16b, v3.16b, v3.16b, #8
|
||||
eor v3.16b, v3.16b, v0.16b // inp ^= Xi
|
||||
|
||||
.Lgmult_neon:
|
||||
// Split the input into v3 and v4. (The upper halves are unused,
|
||||
// so it is okay to leave them alone.)
|
||||
ins v4.d[0], v3.d[1]
|
||||
ext v16.8b, v5.8b, v5.8b, #1 // A1
|
||||
pmull v16.8h, v16.8b, v3.8b // F = A1*B
|
||||
ext v0.8b, v3.8b, v3.8b, #1 // B1
|
||||
pmull v0.8h, v5.8b, v0.8b // E = A*B1
|
||||
ext v17.8b, v5.8b, v5.8b, #2 // A2
|
||||
pmull v17.8h, v17.8b, v3.8b // H = A2*B
|
||||
ext v19.8b, v3.8b, v3.8b, #2 // B2
|
||||
pmull v19.8h, v5.8b, v19.8b // G = A*B2
|
||||
ext v18.8b, v5.8b, v5.8b, #3 // A3
|
||||
eor v16.16b, v16.16b, v0.16b // L = E + F
|
||||
pmull v18.8h, v18.8b, v3.8b // J = A3*B
|
||||
ext v0.8b, v3.8b, v3.8b, #3 // B3
|
||||
eor v17.16b, v17.16b, v19.16b // M = G + H
|
||||
pmull v0.8h, v5.8b, v0.8b // I = A*B3
|
||||
|
||||
// Here we diverge from the 32-bit version. It computes the following
|
||||
// (instructions reordered for clarity):
|
||||
//
|
||||
// veor $t0#lo, $t0#lo, $t0#hi @ t0 = P0 + P1 (L)
|
||||
// vand $t0#hi, $t0#hi, $k48
|
||||
// veor $t0#lo, $t0#lo, $t0#hi
|
||||
//
|
||||
// veor $t1#lo, $t1#lo, $t1#hi @ t1 = P2 + P3 (M)
|
||||
// vand $t1#hi, $t1#hi, $k32
|
||||
// veor $t1#lo, $t1#lo, $t1#hi
|
||||
//
|
||||
// veor $t2#lo, $t2#lo, $t2#hi @ t2 = P4 + P5 (N)
|
||||
// vand $t2#hi, $t2#hi, $k16
|
||||
// veor $t2#lo, $t2#lo, $t2#hi
|
||||
//
|
||||
// veor $t3#lo, $t3#lo, $t3#hi @ t3 = P6 + P7 (K)
|
||||
// vmov.i64 $t3#hi, #0
|
||||
//
|
||||
// $kN is a mask with the bottom N bits set. AArch64 cannot compute on
|
||||
// upper halves of SIMD registers, so we must split each half into
|
||||
// separate registers. To compensate, we pair computations up and
|
||||
// parallelize.
|
||||
|
||||
ext v19.8b, v3.8b, v3.8b, #4 // B4
|
||||
eor v18.16b, v18.16b, v0.16b // N = I + J
|
||||
pmull v19.8h, v5.8b, v19.8b // K = A*B4
|
||||
|
||||
// This can probably be scheduled more efficiently. For now, we just
|
||||
// pair up independent instructions.
|
||||
zip1 v20.2d, v16.2d, v17.2d
|
||||
zip1 v22.2d, v18.2d, v19.2d
|
||||
zip2 v21.2d, v16.2d, v17.2d
|
||||
zip2 v23.2d, v18.2d, v19.2d
|
||||
eor v20.16b, v20.16b, v21.16b
|
||||
eor v22.16b, v22.16b, v23.16b
|
||||
and v21.16b, v21.16b, v24.16b
|
||||
and v23.16b, v23.16b, v25.16b
|
||||
eor v20.16b, v20.16b, v21.16b
|
||||
eor v22.16b, v22.16b, v23.16b
|
||||
zip1 v16.2d, v20.2d, v21.2d
|
||||
zip1 v18.2d, v22.2d, v23.2d
|
||||
zip2 v17.2d, v20.2d, v21.2d
|
||||
zip2 v19.2d, v22.2d, v23.2d
|
||||
|
||||
ext v16.16b, v16.16b, v16.16b, #15 // t0 = t0 << 8
|
||||
ext v17.16b, v17.16b, v17.16b, #14 // t1 = t1 << 16
|
||||
pmull v0.8h, v5.8b, v3.8b // D = A*B
|
||||
ext v19.16b, v19.16b, v19.16b, #12 // t3 = t3 << 32
|
||||
ext v18.16b, v18.16b, v18.16b, #13 // t2 = t2 << 24
|
||||
eor v16.16b, v16.16b, v17.16b
|
||||
eor v18.16b, v18.16b, v19.16b
|
||||
eor v0.16b, v0.16b, v16.16b
|
||||
eor v0.16b, v0.16b, v18.16b
|
||||
eor v3.8b, v3.8b, v4.8b // Karatsuba pre-processing
|
||||
ext v16.8b, v7.8b, v7.8b, #1 // A1
|
||||
pmull v16.8h, v16.8b, v3.8b // F = A1*B
|
||||
ext v1.8b, v3.8b, v3.8b, #1 // B1
|
||||
pmull v1.8h, v7.8b, v1.8b // E = A*B1
|
||||
ext v17.8b, v7.8b, v7.8b, #2 // A2
|
||||
pmull v17.8h, v17.8b, v3.8b // H = A2*B
|
||||
ext v19.8b, v3.8b, v3.8b, #2 // B2
|
||||
pmull v19.8h, v7.8b, v19.8b // G = A*B2
|
||||
ext v18.8b, v7.8b, v7.8b, #3 // A3
|
||||
eor v16.16b, v16.16b, v1.16b // L = E + F
|
||||
pmull v18.8h, v18.8b, v3.8b // J = A3*B
|
||||
ext v1.8b, v3.8b, v3.8b, #3 // B3
|
||||
eor v17.16b, v17.16b, v19.16b // M = G + H
|
||||
pmull v1.8h, v7.8b, v1.8b // I = A*B3
|
||||
|
||||
// Here we diverge from the 32-bit version. It computes the following
|
||||
// (instructions reordered for clarity):
|
||||
//
|
||||
// veor $t0#lo, $t0#lo, $t0#hi @ t0 = P0 + P1 (L)
|
||||
// vand $t0#hi, $t0#hi, $k48
|
||||
// veor $t0#lo, $t0#lo, $t0#hi
|
||||
//
|
||||
// veor $t1#lo, $t1#lo, $t1#hi @ t1 = P2 + P3 (M)
|
||||
// vand $t1#hi, $t1#hi, $k32
|
||||
// veor $t1#lo, $t1#lo, $t1#hi
|
||||
//
|
||||
// veor $t2#lo, $t2#lo, $t2#hi @ t2 = P4 + P5 (N)
|
||||
// vand $t2#hi, $t2#hi, $k16
|
||||
// veor $t2#lo, $t2#lo, $t2#hi
|
||||
//
|
||||
// veor $t3#lo, $t3#lo, $t3#hi @ t3 = P6 + P7 (K)
|
||||
// vmov.i64 $t3#hi, #0
|
||||
//
|
||||
// $kN is a mask with the bottom N bits set. AArch64 cannot compute on
|
||||
// upper halves of SIMD registers, so we must split each half into
|
||||
// separate registers. To compensate, we pair computations up and
|
||||
// parallelize.
|
||||
|
||||
ext v19.8b, v3.8b, v3.8b, #4 // B4
|
||||
eor v18.16b, v18.16b, v1.16b // N = I + J
|
||||
pmull v19.8h, v7.8b, v19.8b // K = A*B4
|
||||
|
||||
// This can probably be scheduled more efficiently. For now, we just
|
||||
// pair up independent instructions.
|
||||
zip1 v20.2d, v16.2d, v17.2d
|
||||
zip1 v22.2d, v18.2d, v19.2d
|
||||
zip2 v21.2d, v16.2d, v17.2d
|
||||
zip2 v23.2d, v18.2d, v19.2d
|
||||
eor v20.16b, v20.16b, v21.16b
|
||||
eor v22.16b, v22.16b, v23.16b
|
||||
and v21.16b, v21.16b, v24.16b
|
||||
and v23.16b, v23.16b, v25.16b
|
||||
eor v20.16b, v20.16b, v21.16b
|
||||
eor v22.16b, v22.16b, v23.16b
|
||||
zip1 v16.2d, v20.2d, v21.2d
|
||||
zip1 v18.2d, v22.2d, v23.2d
|
||||
zip2 v17.2d, v20.2d, v21.2d
|
||||
zip2 v19.2d, v22.2d, v23.2d
|
||||
|
||||
ext v16.16b, v16.16b, v16.16b, #15 // t0 = t0 << 8
|
||||
ext v17.16b, v17.16b, v17.16b, #14 // t1 = t1 << 16
|
||||
pmull v1.8h, v7.8b, v3.8b // D = A*B
|
||||
ext v19.16b, v19.16b, v19.16b, #12 // t3 = t3 << 32
|
||||
ext v18.16b, v18.16b, v18.16b, #13 // t2 = t2 << 24
|
||||
eor v16.16b, v16.16b, v17.16b
|
||||
eor v18.16b, v18.16b, v19.16b
|
||||
eor v1.16b, v1.16b, v16.16b
|
||||
eor v1.16b, v1.16b, v18.16b
|
||||
ext v16.8b, v6.8b, v6.8b, #1 // A1
|
||||
pmull v16.8h, v16.8b, v4.8b // F = A1*B
|
||||
ext v2.8b, v4.8b, v4.8b, #1 // B1
|
||||
pmull v2.8h, v6.8b, v2.8b // E = A*B1
|
||||
ext v17.8b, v6.8b, v6.8b, #2 // A2
|
||||
pmull v17.8h, v17.8b, v4.8b // H = A2*B
|
||||
ext v19.8b, v4.8b, v4.8b, #2 // B2
|
||||
pmull v19.8h, v6.8b, v19.8b // G = A*B2
|
||||
ext v18.8b, v6.8b, v6.8b, #3 // A3
|
||||
eor v16.16b, v16.16b, v2.16b // L = E + F
|
||||
pmull v18.8h, v18.8b, v4.8b // J = A3*B
|
||||
ext v2.8b, v4.8b, v4.8b, #3 // B3
|
||||
eor v17.16b, v17.16b, v19.16b // M = G + H
|
||||
pmull v2.8h, v6.8b, v2.8b // I = A*B3
|
||||
|
||||
// Here we diverge from the 32-bit version. It computes the following
|
||||
// (instructions reordered for clarity):
|
||||
//
|
||||
// veor $t0#lo, $t0#lo, $t0#hi @ t0 = P0 + P1 (L)
|
||||
// vand $t0#hi, $t0#hi, $k48
|
||||
// veor $t0#lo, $t0#lo, $t0#hi
|
||||
//
|
||||
// veor $t1#lo, $t1#lo, $t1#hi @ t1 = P2 + P3 (M)
|
||||
// vand $t1#hi, $t1#hi, $k32
|
||||
// veor $t1#lo, $t1#lo, $t1#hi
|
||||
//
|
||||
// veor $t2#lo, $t2#lo, $t2#hi @ t2 = P4 + P5 (N)
|
||||
// vand $t2#hi, $t2#hi, $k16
|
||||
// veor $t2#lo, $t2#lo, $t2#hi
|
||||
//
|
||||
// veor $t3#lo, $t3#lo, $t3#hi @ t3 = P6 + P7 (K)
|
||||
// vmov.i64 $t3#hi, #0
|
||||
//
|
||||
// $kN is a mask with the bottom N bits set. AArch64 cannot compute on
|
||||
// upper halves of SIMD registers, so we must split each half into
|
||||
// separate registers. To compensate, we pair computations up and
|
||||
// parallelize.
|
||||
|
||||
ext v19.8b, v4.8b, v4.8b, #4 // B4
|
||||
eor v18.16b, v18.16b, v2.16b // N = I + J
|
||||
pmull v19.8h, v6.8b, v19.8b // K = A*B4
|
||||
|
||||
// This can probably be scheduled more efficiently. For now, we just
|
||||
// pair up independent instructions.
|
||||
zip1 v20.2d, v16.2d, v17.2d
|
||||
zip1 v22.2d, v18.2d, v19.2d
|
||||
zip2 v21.2d, v16.2d, v17.2d
|
||||
zip2 v23.2d, v18.2d, v19.2d
|
||||
eor v20.16b, v20.16b, v21.16b
|
||||
eor v22.16b, v22.16b, v23.16b
|
||||
and v21.16b, v21.16b, v24.16b
|
||||
and v23.16b, v23.16b, v25.16b
|
||||
eor v20.16b, v20.16b, v21.16b
|
||||
eor v22.16b, v22.16b, v23.16b
|
||||
zip1 v16.2d, v20.2d, v21.2d
|
||||
zip1 v18.2d, v22.2d, v23.2d
|
||||
zip2 v17.2d, v20.2d, v21.2d
|
||||
zip2 v19.2d, v22.2d, v23.2d
|
||||
|
||||
ext v16.16b, v16.16b, v16.16b, #15 // t0 = t0 << 8
|
||||
ext v17.16b, v17.16b, v17.16b, #14 // t1 = t1 << 16
|
||||
pmull v2.8h, v6.8b, v4.8b // D = A*B
|
||||
ext v19.16b, v19.16b, v19.16b, #12 // t3 = t3 << 32
|
||||
ext v18.16b, v18.16b, v18.16b, #13 // t2 = t2 << 24
|
||||
eor v16.16b, v16.16b, v17.16b
|
||||
eor v18.16b, v18.16b, v19.16b
|
||||
eor v2.16b, v2.16b, v16.16b
|
||||
eor v2.16b, v2.16b, v18.16b
|
||||
ext v16.16b, v0.16b, v2.16b, #8
|
||||
eor v1.16b, v1.16b, v0.16b // Karatsuba post-processing
|
||||
eor v1.16b, v1.16b, v2.16b
|
||||
eor v1.16b, v1.16b, v16.16b // Xm overlaps Xh.lo and Xl.hi
|
||||
ins v0.d[1], v1.d[0] // Xh|Xl - 256-bit result
|
||||
// This is a no-op due to the ins instruction below.
|
||||
// ins v2.d[0], v1.d[1]
|
||||
|
||||
// equivalent of reduction_avx from ghash-x86_64.pl
|
||||
shl v17.2d, v0.2d, #57 // 1st phase
|
||||
shl v18.2d, v0.2d, #62
|
||||
eor v18.16b, v18.16b, v17.16b //
|
||||
shl v17.2d, v0.2d, #63
|
||||
eor v18.16b, v18.16b, v17.16b //
|
||||
// Note Xm contains {Xl.d[1], Xh.d[0]}.
|
||||
eor v18.16b, v18.16b, v1.16b
|
||||
ins v0.d[1], v18.d[0] // Xl.d[1] ^= t2.d[0]
|
||||
ins v2.d[0], v18.d[1] // Xh.d[0] ^= t2.d[1]
|
||||
|
||||
ushr v18.2d, v0.2d, #1 // 2nd phase
|
||||
eor v2.16b, v2.16b,v0.16b
|
||||
eor v0.16b, v0.16b,v18.16b //
|
||||
ushr v18.2d, v18.2d, #6
|
||||
ushr v0.2d, v0.2d, #1 //
|
||||
eor v0.16b, v0.16b, v2.16b //
|
||||
eor v0.16b, v0.16b, v18.16b //
|
||||
|
||||
subs x3, x3, #16
|
||||
bne .Loop_neon
|
||||
|
||||
rev64 v0.16b, v0.16b // byteswap Xi and write
|
||||
ext v0.16b, v0.16b, v0.16b, #8
|
||||
st1 {v0.16b}, [x0]
|
||||
|
||||
ret
|
||||
.size gcm_ghash_neon,.-gcm_ghash_neon
|
||||
|
||||
.section .rodata
|
||||
.align 4
|
||||
.Lmasks:
|
||||
.quad 0x0000ffffffffffff // k48
|
||||
.quad 0x00000000ffffffff // k32
|
||||
.quad 0x000000000000ffff // k16
|
||||
.quad 0x0000000000000000 // k0
|
||||
.byte 71,72,65,83,72,32,102,111,114,32,65,82,77,118,56,44,32,100,101,114,105,118,101,100,32,102,114,111,109,32,65,82,77,118,52,32,118,101,114,115,105,111,110,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
|
||||
.align 2
|
||||
.align 2
|
||||
#endif // !OPENSSL_NO_ASM && defined(OPENSSL_AARCH64) && defined(__ELF__)
|
||||
565
third-party/boringssl/linux-aarch64/crypto/fipsmodule/ghashv8-armv8-linux.S
vendored
Normal file
565
third-party/boringssl/linux-aarch64/crypto/fipsmodule/ghashv8-armv8-linux.S
vendored
Normal file
@ -0,0 +1,565 @@
|
||||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#include <openssl/asm_base.h>
|
||||
|
||||
#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_AARCH64) && defined(__ELF__)
|
||||
#include <openssl/arm_arch.h>
|
||||
|
||||
#if __ARM_MAX_ARCH__>=7
|
||||
.text
|
||||
.arch armv8-a+crypto
|
||||
.globl gcm_init_v8
|
||||
.hidden gcm_init_v8
|
||||
.type gcm_init_v8,%function
|
||||
.align 4
|
||||
gcm_init_v8:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
ld1 {v17.2d},[x1] //load input H
|
||||
movi v19.16b,#0xe1
|
||||
shl v19.2d,v19.2d,#57 //0xc2.0
|
||||
ext v3.16b,v17.16b,v17.16b,#8
|
||||
ushr v18.2d,v19.2d,#63
|
||||
dup v17.4s,v17.s[1]
|
||||
ext v16.16b,v18.16b,v19.16b,#8 //t0=0xc2....01
|
||||
ushr v18.2d,v3.2d,#63
|
||||
sshr v17.4s,v17.4s,#31 //broadcast carry bit
|
||||
and v18.16b,v18.16b,v16.16b
|
||||
shl v3.2d,v3.2d,#1
|
||||
ext v18.16b,v18.16b,v18.16b,#8
|
||||
and v16.16b,v16.16b,v17.16b
|
||||
orr v3.16b,v3.16b,v18.16b //H<<<=1
|
||||
eor v20.16b,v3.16b,v16.16b //twisted H
|
||||
st1 {v20.2d},[x0],#16 //store Htable[0]
|
||||
|
||||
//calculate H^2
|
||||
ext v16.16b,v20.16b,v20.16b,#8 //Karatsuba pre-processing
|
||||
pmull v0.1q,v20.1d,v20.1d
|
||||
eor v16.16b,v16.16b,v20.16b
|
||||
pmull2 v2.1q,v20.2d,v20.2d
|
||||
pmull v1.1q,v16.1d,v16.1d
|
||||
|
||||
ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing
|
||||
eor v18.16b,v0.16b,v2.16b
|
||||
eor v1.16b,v1.16b,v17.16b
|
||||
eor v1.16b,v1.16b,v18.16b
|
||||
pmull v18.1q,v0.1d,v19.1d //1st phase
|
||||
|
||||
ins v2.d[0],v1.d[1]
|
||||
ins v1.d[1],v0.d[0]
|
||||
eor v0.16b,v1.16b,v18.16b
|
||||
|
||||
ext v18.16b,v0.16b,v0.16b,#8 //2nd phase
|
||||
pmull v0.1q,v0.1d,v19.1d
|
||||
eor v18.16b,v18.16b,v2.16b
|
||||
eor v22.16b,v0.16b,v18.16b
|
||||
|
||||
ext v17.16b,v22.16b,v22.16b,#8 //Karatsuba pre-processing
|
||||
eor v17.16b,v17.16b,v22.16b
|
||||
ext v21.16b,v16.16b,v17.16b,#8 //pack Karatsuba pre-processed
|
||||
st1 {v21.2d,v22.2d},[x0],#32 //store Htable[1..2]
|
||||
//calculate H^3 and H^4
|
||||
pmull v0.1q,v20.1d, v22.1d
|
||||
pmull v5.1q,v22.1d,v22.1d
|
||||
pmull2 v2.1q,v20.2d, v22.2d
|
||||
pmull2 v7.1q,v22.2d,v22.2d
|
||||
pmull v1.1q,v16.1d,v17.1d
|
||||
pmull v6.1q,v17.1d,v17.1d
|
||||
|
||||
ext v16.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing
|
||||
ext v17.16b,v5.16b,v7.16b,#8
|
||||
eor v18.16b,v0.16b,v2.16b
|
||||
eor v1.16b,v1.16b,v16.16b
|
||||
eor v4.16b,v5.16b,v7.16b
|
||||
eor v6.16b,v6.16b,v17.16b
|
||||
eor v1.16b,v1.16b,v18.16b
|
||||
pmull v18.1q,v0.1d,v19.1d //1st phase
|
||||
eor v6.16b,v6.16b,v4.16b
|
||||
pmull v4.1q,v5.1d,v19.1d
|
||||
|
||||
ins v2.d[0],v1.d[1]
|
||||
ins v7.d[0],v6.d[1]
|
||||
ins v1.d[1],v0.d[0]
|
||||
ins v6.d[1],v5.d[0]
|
||||
eor v0.16b,v1.16b,v18.16b
|
||||
eor v5.16b,v6.16b,v4.16b
|
||||
|
||||
ext v18.16b,v0.16b,v0.16b,#8 //2nd phase
|
||||
ext v4.16b,v5.16b,v5.16b,#8
|
||||
pmull v0.1q,v0.1d,v19.1d
|
||||
pmull v5.1q,v5.1d,v19.1d
|
||||
eor v18.16b,v18.16b,v2.16b
|
||||
eor v4.16b,v4.16b,v7.16b
|
||||
eor v20.16b, v0.16b,v18.16b //H^3
|
||||
eor v22.16b,v5.16b,v4.16b //H^4
|
||||
|
||||
ext v16.16b,v20.16b, v20.16b,#8 //Karatsuba pre-processing
|
||||
ext v17.16b,v22.16b,v22.16b,#8
|
||||
eor v16.16b,v16.16b,v20.16b
|
||||
eor v17.16b,v17.16b,v22.16b
|
||||
ext v21.16b,v16.16b,v17.16b,#8 //pack Karatsuba pre-processed
|
||||
st1 {v20.2d,v21.2d,v22.2d},[x0] //store Htable[3..5]
|
||||
ret
|
||||
.size gcm_init_v8,.-gcm_init_v8
|
||||
.globl gcm_gmult_v8
|
||||
.hidden gcm_gmult_v8
|
||||
.type gcm_gmult_v8,%function
|
||||
.align 4
|
||||
gcm_gmult_v8:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
ld1 {v17.2d},[x0] //load Xi
|
||||
movi v19.16b,#0xe1
|
||||
ld1 {v20.2d,v21.2d},[x1] //load twisted H, ...
|
||||
shl v19.2d,v19.2d,#57
|
||||
#ifndef __AARCH64EB__
|
||||
rev64 v17.16b,v17.16b
|
||||
#endif
|
||||
ext v3.16b,v17.16b,v17.16b,#8
|
||||
|
||||
pmull v0.1q,v20.1d,v3.1d //H.lo·Xi.lo
|
||||
eor v17.16b,v17.16b,v3.16b //Karatsuba pre-processing
|
||||
pmull2 v2.1q,v20.2d,v3.2d //H.hi·Xi.hi
|
||||
pmull v1.1q,v21.1d,v17.1d //(H.lo+H.hi)·(Xi.lo+Xi.hi)
|
||||
|
||||
ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing
|
||||
eor v18.16b,v0.16b,v2.16b
|
||||
eor v1.16b,v1.16b,v17.16b
|
||||
eor v1.16b,v1.16b,v18.16b
|
||||
pmull v18.1q,v0.1d,v19.1d //1st phase of reduction
|
||||
|
||||
ins v2.d[0],v1.d[1]
|
||||
ins v1.d[1],v0.d[0]
|
||||
eor v0.16b,v1.16b,v18.16b
|
||||
|
||||
ext v18.16b,v0.16b,v0.16b,#8 //2nd phase of reduction
|
||||
pmull v0.1q,v0.1d,v19.1d
|
||||
eor v18.16b,v18.16b,v2.16b
|
||||
eor v0.16b,v0.16b,v18.16b
|
||||
|
||||
#ifndef __AARCH64EB__
|
||||
rev64 v0.16b,v0.16b
|
||||
#endif
|
||||
ext v0.16b,v0.16b,v0.16b,#8
|
||||
st1 {v0.2d},[x0] //write out Xi
|
||||
|
||||
ret
|
||||
.size gcm_gmult_v8,.-gcm_gmult_v8
|
||||
.globl gcm_ghash_v8
|
||||
.hidden gcm_ghash_v8
|
||||
.type gcm_ghash_v8,%function
|
||||
.align 4
|
||||
gcm_ghash_v8:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
cmp x3,#64
|
||||
b.hs .Lgcm_ghash_v8_4x
|
||||
ld1 {v0.2d},[x0] //load [rotated] Xi
|
||||
//"[rotated]" means that
|
||||
//loaded value would have
|
||||
//to be rotated in order to
|
||||
//make it appear as in
|
||||
//algorithm specification
|
||||
subs x3,x3,#32 //see if x3 is 32 or larger
|
||||
mov x12,#16 //x12 is used as post-
|
||||
//increment for input pointer;
|
||||
//as loop is modulo-scheduled
|
||||
//x12 is zeroed just in time
|
||||
//to preclude overstepping
|
||||
//inp[len], which means that
|
||||
//last block[s] are actually
|
||||
//loaded twice, but last
|
||||
//copy is not processed
|
||||
ld1 {v20.2d,v21.2d},[x1],#32 //load twisted H, ..., H^2
|
||||
movi v19.16b,#0xe1
|
||||
ld1 {v22.2d},[x1]
|
||||
csel x12,xzr,x12,eq //is it time to zero x12?
|
||||
ext v0.16b,v0.16b,v0.16b,#8 //rotate Xi
|
||||
ld1 {v16.2d},[x2],#16 //load [rotated] I[0]
|
||||
shl v19.2d,v19.2d,#57 //compose 0xc2.0 constant
|
||||
#ifndef __AARCH64EB__
|
||||
rev64 v16.16b,v16.16b
|
||||
rev64 v0.16b,v0.16b
|
||||
#endif
|
||||
ext v3.16b,v16.16b,v16.16b,#8 //rotate I[0]
|
||||
b.lo .Lodd_tail_v8 //x3 was less than 32
|
||||
ld1 {v17.2d},[x2],x12 //load [rotated] I[1]
|
||||
#ifndef __AARCH64EB__
|
||||
rev64 v17.16b,v17.16b
|
||||
#endif
|
||||
ext v7.16b,v17.16b,v17.16b,#8
|
||||
eor v3.16b,v3.16b,v0.16b //I[i]^=Xi
|
||||
pmull v4.1q,v20.1d,v7.1d //H·Ii+1
|
||||
eor v17.16b,v17.16b,v7.16b //Karatsuba pre-processing
|
||||
pmull2 v6.1q,v20.2d,v7.2d
|
||||
b .Loop_mod2x_v8
|
||||
|
||||
.align 4
|
||||
.Loop_mod2x_v8:
|
||||
ext v18.16b,v3.16b,v3.16b,#8
|
||||
subs x3,x3,#32 //is there more data?
|
||||
pmull v0.1q,v22.1d,v3.1d //H^2.lo·Xi.lo
|
||||
csel x12,xzr,x12,lo //is it time to zero x12?
|
||||
|
||||
pmull v5.1q,v21.1d,v17.1d
|
||||
eor v18.16b,v18.16b,v3.16b //Karatsuba pre-processing
|
||||
pmull2 v2.1q,v22.2d,v3.2d //H^2.hi·Xi.hi
|
||||
eor v0.16b,v0.16b,v4.16b //accumulate
|
||||
pmull2 v1.1q,v21.2d,v18.2d //(H^2.lo+H^2.hi)·(Xi.lo+Xi.hi)
|
||||
ld1 {v16.2d},[x2],x12 //load [rotated] I[i+2]
|
||||
|
||||
eor v2.16b,v2.16b,v6.16b
|
||||
csel x12,xzr,x12,eq //is it time to zero x12?
|
||||
eor v1.16b,v1.16b,v5.16b
|
||||
|
||||
ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing
|
||||
eor v18.16b,v0.16b,v2.16b
|
||||
eor v1.16b,v1.16b,v17.16b
|
||||
ld1 {v17.2d},[x2],x12 //load [rotated] I[i+3]
|
||||
#ifndef __AARCH64EB__
|
||||
rev64 v16.16b,v16.16b
|
||||
#endif
|
||||
eor v1.16b,v1.16b,v18.16b
|
||||
pmull v18.1q,v0.1d,v19.1d //1st phase of reduction
|
||||
|
||||
#ifndef __AARCH64EB__
|
||||
rev64 v17.16b,v17.16b
|
||||
#endif
|
||||
ins v2.d[0],v1.d[1]
|
||||
ins v1.d[1],v0.d[0]
|
||||
ext v7.16b,v17.16b,v17.16b,#8
|
||||
ext v3.16b,v16.16b,v16.16b,#8
|
||||
eor v0.16b,v1.16b,v18.16b
|
||||
pmull v4.1q,v20.1d,v7.1d //H·Ii+1
|
||||
eor v3.16b,v3.16b,v2.16b //accumulate v3.16b early
|
||||
|
||||
ext v18.16b,v0.16b,v0.16b,#8 //2nd phase of reduction
|
||||
pmull v0.1q,v0.1d,v19.1d
|
||||
eor v3.16b,v3.16b,v18.16b
|
||||
eor v17.16b,v17.16b,v7.16b //Karatsuba pre-processing
|
||||
eor v3.16b,v3.16b,v0.16b
|
||||
pmull2 v6.1q,v20.2d,v7.2d
|
||||
b.hs .Loop_mod2x_v8 //there was at least 32 more bytes
|
||||
|
||||
eor v2.16b,v2.16b,v18.16b
|
||||
ext v3.16b,v16.16b,v16.16b,#8 //re-construct v3.16b
|
||||
adds x3,x3,#32 //re-construct x3
|
||||
eor v0.16b,v0.16b,v2.16b //re-construct v0.16b
|
||||
b.eq .Ldone_v8 //is x3 zero?
|
||||
.Lodd_tail_v8:
|
||||
ext v18.16b,v0.16b,v0.16b,#8
|
||||
eor v3.16b,v3.16b,v0.16b //inp^=Xi
|
||||
eor v17.16b,v16.16b,v18.16b //v17.16b is rotated inp^Xi
|
||||
|
||||
pmull v0.1q,v20.1d,v3.1d //H.lo·Xi.lo
|
||||
eor v17.16b,v17.16b,v3.16b //Karatsuba pre-processing
|
||||
pmull2 v2.1q,v20.2d,v3.2d //H.hi·Xi.hi
|
||||
pmull v1.1q,v21.1d,v17.1d //(H.lo+H.hi)·(Xi.lo+Xi.hi)
|
||||
|
||||
ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing
|
||||
eor v18.16b,v0.16b,v2.16b
|
||||
eor v1.16b,v1.16b,v17.16b
|
||||
eor v1.16b,v1.16b,v18.16b
|
||||
pmull v18.1q,v0.1d,v19.1d //1st phase of reduction
|
||||
|
||||
ins v2.d[0],v1.d[1]
|
||||
ins v1.d[1],v0.d[0]
|
||||
eor v0.16b,v1.16b,v18.16b
|
||||
|
||||
ext v18.16b,v0.16b,v0.16b,#8 //2nd phase of reduction
|
||||
pmull v0.1q,v0.1d,v19.1d
|
||||
eor v18.16b,v18.16b,v2.16b
|
||||
eor v0.16b,v0.16b,v18.16b
|
||||
|
||||
.Ldone_v8:
|
||||
#ifndef __AARCH64EB__
|
||||
rev64 v0.16b,v0.16b
|
||||
#endif
|
||||
ext v0.16b,v0.16b,v0.16b,#8
|
||||
st1 {v0.2d},[x0] //write out Xi
|
||||
|
||||
ret
|
||||
.size gcm_ghash_v8,.-gcm_ghash_v8
|
||||
.type gcm_ghash_v8_4x,%function
|
||||
.align 4
|
||||
gcm_ghash_v8_4x:
|
||||
.Lgcm_ghash_v8_4x:
|
||||
ld1 {v0.2d},[x0] //load [rotated] Xi
|
||||
ld1 {v20.2d,v21.2d,v22.2d},[x1],#48 //load twisted H, ..., H^2
|
||||
movi v19.16b,#0xe1
|
||||
ld1 {v26.2d,v27.2d,v28.2d},[x1] //load twisted H^3, ..., H^4
|
||||
shl v19.2d,v19.2d,#57 //compose 0xc2.0 constant
|
||||
|
||||
ld1 {v4.2d,v5.2d,v6.2d,v7.2d},[x2],#64
|
||||
#ifndef __AARCH64EB__
|
||||
rev64 v0.16b,v0.16b
|
||||
rev64 v5.16b,v5.16b
|
||||
rev64 v6.16b,v6.16b
|
||||
rev64 v7.16b,v7.16b
|
||||
rev64 v4.16b,v4.16b
|
||||
#endif
|
||||
ext v25.16b,v7.16b,v7.16b,#8
|
||||
ext v24.16b,v6.16b,v6.16b,#8
|
||||
ext v23.16b,v5.16b,v5.16b,#8
|
||||
|
||||
pmull v29.1q,v20.1d,v25.1d //H·Ii+3
|
||||
eor v7.16b,v7.16b,v25.16b
|
||||
pmull2 v31.1q,v20.2d,v25.2d
|
||||
pmull v30.1q,v21.1d,v7.1d
|
||||
|
||||
pmull v16.1q,v22.1d,v24.1d //H^2·Ii+2
|
||||
eor v6.16b,v6.16b,v24.16b
|
||||
pmull2 v24.1q,v22.2d,v24.2d
|
||||
pmull2 v6.1q,v21.2d,v6.2d
|
||||
|
||||
eor v29.16b,v29.16b,v16.16b
|
||||
eor v31.16b,v31.16b,v24.16b
|
||||
eor v30.16b,v30.16b,v6.16b
|
||||
|
||||
pmull v7.1q,v26.1d,v23.1d //H^3·Ii+1
|
||||
eor v5.16b,v5.16b,v23.16b
|
||||
pmull2 v23.1q,v26.2d,v23.2d
|
||||
pmull v5.1q,v27.1d,v5.1d
|
||||
|
||||
eor v29.16b,v29.16b,v7.16b
|
||||
eor v31.16b,v31.16b,v23.16b
|
||||
eor v30.16b,v30.16b,v5.16b
|
||||
|
||||
subs x3,x3,#128
|
||||
b.lo .Ltail4x
|
||||
|
||||
b .Loop4x
|
||||
|
||||
.align 4
|
||||
.Loop4x:
|
||||
eor v16.16b,v4.16b,v0.16b
|
||||
ld1 {v4.2d,v5.2d,v6.2d,v7.2d},[x2],#64
|
||||
ext v3.16b,v16.16b,v16.16b,#8
|
||||
#ifndef __AARCH64EB__
|
||||
rev64 v5.16b,v5.16b
|
||||
rev64 v6.16b,v6.16b
|
||||
rev64 v7.16b,v7.16b
|
||||
rev64 v4.16b,v4.16b
|
||||
#endif
|
||||
|
||||
pmull v0.1q,v28.1d,v3.1d //H^4·(Xi+Ii)
|
||||
eor v16.16b,v16.16b,v3.16b
|
||||
pmull2 v2.1q,v28.2d,v3.2d
|
||||
ext v25.16b,v7.16b,v7.16b,#8
|
||||
pmull2 v1.1q,v27.2d,v16.2d
|
||||
|
||||
eor v0.16b,v0.16b,v29.16b
|
||||
eor v2.16b,v2.16b,v31.16b
|
||||
ext v24.16b,v6.16b,v6.16b,#8
|
||||
eor v1.16b,v1.16b,v30.16b
|
||||
ext v23.16b,v5.16b,v5.16b,#8
|
||||
|
||||
ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing
|
||||
eor v18.16b,v0.16b,v2.16b
|
||||
pmull v29.1q,v20.1d,v25.1d //H·Ii+3
|
||||
eor v7.16b,v7.16b,v25.16b
|
||||
eor v1.16b,v1.16b,v17.16b
|
||||
pmull2 v31.1q,v20.2d,v25.2d
|
||||
eor v1.16b,v1.16b,v18.16b
|
||||
pmull v30.1q,v21.1d,v7.1d
|
||||
|
||||
pmull v18.1q,v0.1d,v19.1d //1st phase of reduction
|
||||
ins v2.d[0],v1.d[1]
|
||||
ins v1.d[1],v0.d[0]
|
||||
pmull v16.1q,v22.1d,v24.1d //H^2·Ii+2
|
||||
eor v6.16b,v6.16b,v24.16b
|
||||
pmull2 v24.1q,v22.2d,v24.2d
|
||||
eor v0.16b,v1.16b,v18.16b
|
||||
pmull2 v6.1q,v21.2d,v6.2d
|
||||
|
||||
eor v29.16b,v29.16b,v16.16b
|
||||
eor v31.16b,v31.16b,v24.16b
|
||||
eor v30.16b,v30.16b,v6.16b
|
||||
|
||||
ext v18.16b,v0.16b,v0.16b,#8 //2nd phase of reduction
|
||||
pmull v0.1q,v0.1d,v19.1d
|
||||
pmull v7.1q,v26.1d,v23.1d //H^3·Ii+1
|
||||
eor v5.16b,v5.16b,v23.16b
|
||||
eor v18.16b,v18.16b,v2.16b
|
||||
pmull2 v23.1q,v26.2d,v23.2d
|
||||
pmull v5.1q,v27.1d,v5.1d
|
||||
|
||||
eor v0.16b,v0.16b,v18.16b
|
||||
eor v29.16b,v29.16b,v7.16b
|
||||
eor v31.16b,v31.16b,v23.16b
|
||||
ext v0.16b,v0.16b,v0.16b,#8
|
||||
eor v30.16b,v30.16b,v5.16b
|
||||
|
||||
subs x3,x3,#64
|
||||
b.hs .Loop4x
|
||||
|
||||
.Ltail4x:
|
||||
eor v16.16b,v4.16b,v0.16b
|
||||
ext v3.16b,v16.16b,v16.16b,#8
|
||||
|
||||
pmull v0.1q,v28.1d,v3.1d //H^4·(Xi+Ii)
|
||||
eor v16.16b,v16.16b,v3.16b
|
||||
pmull2 v2.1q,v28.2d,v3.2d
|
||||
pmull2 v1.1q,v27.2d,v16.2d
|
||||
|
||||
eor v0.16b,v0.16b,v29.16b
|
||||
eor v2.16b,v2.16b,v31.16b
|
||||
eor v1.16b,v1.16b,v30.16b
|
||||
|
||||
adds x3,x3,#64
|
||||
b.eq .Ldone4x
|
||||
|
||||
cmp x3,#32
|
||||
b.lo .Lone
|
||||
b.eq .Ltwo
|
||||
.Lthree:
|
||||
ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing
|
||||
eor v18.16b,v0.16b,v2.16b
|
||||
eor v1.16b,v1.16b,v17.16b
|
||||
ld1 {v4.2d,v5.2d,v6.2d},[x2]
|
||||
eor v1.16b,v1.16b,v18.16b
|
||||
#ifndef __AARCH64EB__
|
||||
rev64 v5.16b,v5.16b
|
||||
rev64 v6.16b,v6.16b
|
||||
rev64 v4.16b,v4.16b
|
||||
#endif
|
||||
|
||||
pmull v18.1q,v0.1d,v19.1d //1st phase of reduction
|
||||
ins v2.d[0],v1.d[1]
|
||||
ins v1.d[1],v0.d[0]
|
||||
ext v24.16b,v6.16b,v6.16b,#8
|
||||
ext v23.16b,v5.16b,v5.16b,#8
|
||||
eor v0.16b,v1.16b,v18.16b
|
||||
|
||||
pmull v29.1q,v20.1d,v24.1d //H·Ii+2
|
||||
eor v6.16b,v6.16b,v24.16b
|
||||
|
||||
ext v18.16b,v0.16b,v0.16b,#8 //2nd phase of reduction
|
||||
pmull v0.1q,v0.1d,v19.1d
|
||||
eor v18.16b,v18.16b,v2.16b
|
||||
pmull2 v31.1q,v20.2d,v24.2d
|
||||
pmull v30.1q,v21.1d,v6.1d
|
||||
eor v0.16b,v0.16b,v18.16b
|
||||
pmull v7.1q,v22.1d,v23.1d //H^2·Ii+1
|
||||
eor v5.16b,v5.16b,v23.16b
|
||||
ext v0.16b,v0.16b,v0.16b,#8
|
||||
|
||||
pmull2 v23.1q,v22.2d,v23.2d
|
||||
eor v16.16b,v4.16b,v0.16b
|
||||
pmull2 v5.1q,v21.2d,v5.2d
|
||||
ext v3.16b,v16.16b,v16.16b,#8
|
||||
|
||||
eor v29.16b,v29.16b,v7.16b
|
||||
eor v31.16b,v31.16b,v23.16b
|
||||
eor v30.16b,v30.16b,v5.16b
|
||||
|
||||
pmull v0.1q,v26.1d,v3.1d //H^3·(Xi+Ii)
|
||||
eor v16.16b,v16.16b,v3.16b
|
||||
pmull2 v2.1q,v26.2d,v3.2d
|
||||
pmull v1.1q,v27.1d,v16.1d
|
||||
|
||||
eor v0.16b,v0.16b,v29.16b
|
||||
eor v2.16b,v2.16b,v31.16b
|
||||
eor v1.16b,v1.16b,v30.16b
|
||||
b .Ldone4x
|
||||
|
||||
.align 4
|
||||
.Ltwo:
|
||||
ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing
|
||||
eor v18.16b,v0.16b,v2.16b
|
||||
eor v1.16b,v1.16b,v17.16b
|
||||
ld1 {v4.2d,v5.2d},[x2]
|
||||
eor v1.16b,v1.16b,v18.16b
|
||||
#ifndef __AARCH64EB__
|
||||
rev64 v5.16b,v5.16b
|
||||
rev64 v4.16b,v4.16b
|
||||
#endif
|
||||
|
||||
pmull v18.1q,v0.1d,v19.1d //1st phase of reduction
|
||||
ins v2.d[0],v1.d[1]
|
||||
ins v1.d[1],v0.d[0]
|
||||
ext v23.16b,v5.16b,v5.16b,#8
|
||||
eor v0.16b,v1.16b,v18.16b
|
||||
|
||||
ext v18.16b,v0.16b,v0.16b,#8 //2nd phase of reduction
|
||||
pmull v0.1q,v0.1d,v19.1d
|
||||
eor v18.16b,v18.16b,v2.16b
|
||||
eor v0.16b,v0.16b,v18.16b
|
||||
ext v0.16b,v0.16b,v0.16b,#8
|
||||
|
||||
pmull v29.1q,v20.1d,v23.1d //H·Ii+1
|
||||
eor v5.16b,v5.16b,v23.16b
|
||||
|
||||
eor v16.16b,v4.16b,v0.16b
|
||||
ext v3.16b,v16.16b,v16.16b,#8
|
||||
|
||||
pmull2 v31.1q,v20.2d,v23.2d
|
||||
pmull v30.1q,v21.1d,v5.1d
|
||||
|
||||
pmull v0.1q,v22.1d,v3.1d //H^2·(Xi+Ii)
|
||||
eor v16.16b,v16.16b,v3.16b
|
||||
pmull2 v2.1q,v22.2d,v3.2d
|
||||
pmull2 v1.1q,v21.2d,v16.2d
|
||||
|
||||
eor v0.16b,v0.16b,v29.16b
|
||||
eor v2.16b,v2.16b,v31.16b
|
||||
eor v1.16b,v1.16b,v30.16b
|
||||
b .Ldone4x
|
||||
|
||||
.align 4
|
||||
.Lone:
|
||||
ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing
|
||||
eor v18.16b,v0.16b,v2.16b
|
||||
eor v1.16b,v1.16b,v17.16b
|
||||
ld1 {v4.2d},[x2]
|
||||
eor v1.16b,v1.16b,v18.16b
|
||||
#ifndef __AARCH64EB__
|
||||
rev64 v4.16b,v4.16b
|
||||
#endif
|
||||
|
||||
pmull v18.1q,v0.1d,v19.1d //1st phase of reduction
|
||||
ins v2.d[0],v1.d[1]
|
||||
ins v1.d[1],v0.d[0]
|
||||
eor v0.16b,v1.16b,v18.16b
|
||||
|
||||
ext v18.16b,v0.16b,v0.16b,#8 //2nd phase of reduction
|
||||
pmull v0.1q,v0.1d,v19.1d
|
||||
eor v18.16b,v18.16b,v2.16b
|
||||
eor v0.16b,v0.16b,v18.16b
|
||||
ext v0.16b,v0.16b,v0.16b,#8
|
||||
|
||||
eor v16.16b,v4.16b,v0.16b
|
||||
ext v3.16b,v16.16b,v16.16b,#8
|
||||
|
||||
pmull v0.1q,v20.1d,v3.1d
|
||||
eor v16.16b,v16.16b,v3.16b
|
||||
pmull2 v2.1q,v20.2d,v3.2d
|
||||
pmull v1.1q,v21.1d,v16.1d
|
||||
|
||||
.Ldone4x:
|
||||
ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing
|
||||
eor v18.16b,v0.16b,v2.16b
|
||||
eor v1.16b,v1.16b,v17.16b
|
||||
eor v1.16b,v1.16b,v18.16b
|
||||
|
||||
pmull v18.1q,v0.1d,v19.1d //1st phase of reduction
|
||||
ins v2.d[0],v1.d[1]
|
||||
ins v1.d[1],v0.d[0]
|
||||
eor v0.16b,v1.16b,v18.16b
|
||||
|
||||
ext v18.16b,v0.16b,v0.16b,#8 //2nd phase of reduction
|
||||
pmull v0.1q,v0.1d,v19.1d
|
||||
eor v18.16b,v18.16b,v2.16b
|
||||
eor v0.16b,v0.16b,v18.16b
|
||||
ext v0.16b,v0.16b,v0.16b,#8
|
||||
|
||||
#ifndef __AARCH64EB__
|
||||
rev64 v0.16b,v0.16b
|
||||
#endif
|
||||
st1 {v0.2d},[x0] //write out Xi
|
||||
|
||||
ret
|
||||
.size gcm_ghash_v8_4x,.-gcm_ghash_v8_4x
|
||||
.byte 71,72,65,83,72,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
|
||||
.align 2
|
||||
.align 2
|
||||
#endif
|
||||
#endif // !OPENSSL_NO_ASM && defined(OPENSSL_AARCH64) && defined(__ELF__)
|
||||
1726
third-party/boringssl/linux-aarch64/crypto/fipsmodule/p256-armv8-asm-linux.S
vendored
Normal file
1726
third-party/boringssl/linux-aarch64/crypto/fipsmodule/p256-armv8-asm-linux.S
vendored
Normal file
File diff suppressed because it is too large
Load Diff
309
third-party/boringssl/linux-aarch64/crypto/fipsmodule/p256_beeu-armv8-asm-linux.S
vendored
Normal file
309
third-party/boringssl/linux-aarch64/crypto/fipsmodule/p256_beeu-armv8-asm-linux.S
vendored
Normal file
@ -0,0 +1,309 @@
|
||||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#include <openssl/asm_base.h>
|
||||
|
||||
#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_AARCH64) && defined(__ELF__)
|
||||
#include "openssl/arm_arch.h"
|
||||
|
||||
.text
|
||||
.globl beeu_mod_inverse_vartime
|
||||
.hidden beeu_mod_inverse_vartime
|
||||
.type beeu_mod_inverse_vartime, %function
|
||||
.align 4
|
||||
beeu_mod_inverse_vartime:
|
||||
// Reserve enough space for 14 8-byte registers on the stack
|
||||
// in the first stp call for x29, x30.
|
||||
// Then store the remaining callee-saved registers.
|
||||
//
|
||||
// | x29 | x30 | x19 | x20 | ... | x27 | x28 | x0 | x2 |
|
||||
// ^ ^
|
||||
// sp <------------------- 112 bytes ----------------> old sp
|
||||
// x29 (FP)
|
||||
//
|
||||
AARCH64_SIGN_LINK_REGISTER
|
||||
stp x29,x30,[sp,#-112]!
|
||||
add x29,sp,#0
|
||||
stp x19,x20,[sp,#16]
|
||||
stp x21,x22,[sp,#32]
|
||||
stp x23,x24,[sp,#48]
|
||||
stp x25,x26,[sp,#64]
|
||||
stp x27,x28,[sp,#80]
|
||||
stp x0,x2,[sp,#96]
|
||||
|
||||
// B = b3..b0 := a
|
||||
ldp x25,x26,[x1]
|
||||
ldp x27,x28,[x1,#16]
|
||||
|
||||
// n3..n0 := n
|
||||
// Note: the value of input params are changed in the following.
|
||||
ldp x0,x1,[x2]
|
||||
ldp x2,x30,[x2,#16]
|
||||
|
||||
// A = a3..a0 := n
|
||||
mov x21, x0
|
||||
mov x22, x1
|
||||
mov x23, x2
|
||||
mov x24, x30
|
||||
|
||||
// X = x4..x0 := 1
|
||||
mov x3, #1
|
||||
eor x4, x4, x4
|
||||
eor x5, x5, x5
|
||||
eor x6, x6, x6
|
||||
eor x7, x7, x7
|
||||
|
||||
// Y = y4..y0 := 0
|
||||
eor x8, x8, x8
|
||||
eor x9, x9, x9
|
||||
eor x10, x10, x10
|
||||
eor x11, x11, x11
|
||||
eor x12, x12, x12
|
||||
|
||||
.Lbeeu_loop:
|
||||
// if B == 0, jump to .Lbeeu_loop_end
|
||||
orr x14, x25, x26
|
||||
orr x14, x14, x27
|
||||
|
||||
// reverse the bit order of x25. This is needed for clz after this macro
|
||||
rbit x15, x25
|
||||
|
||||
orr x14, x14, x28
|
||||
cbz x14,.Lbeeu_loop_end
|
||||
|
||||
|
||||
// 0 < B < |n|,
|
||||
// 0 < A <= |n|,
|
||||
// (1) X*a == B (mod |n|),
|
||||
// (2) (-1)*Y*a == A (mod |n|)
|
||||
|
||||
// Now divide B by the maximum possible power of two in the
|
||||
// integers, and divide X by the same value mod |n|.
|
||||
// When we're done, (1) still holds.
|
||||
|
||||
// shift := number of trailing 0s in x25
|
||||
// ( = number of leading 0s in x15; see the "rbit" instruction in TEST_B_ZERO)
|
||||
clz x13, x15
|
||||
|
||||
// If there is no shift, goto shift_A_Y
|
||||
cbz x13, .Lbeeu_shift_A_Y
|
||||
|
||||
// Shift B right by "x13" bits
|
||||
neg x14, x13
|
||||
lsr x25, x25, x13
|
||||
lsl x15, x26, x14
|
||||
|
||||
lsr x26, x26, x13
|
||||
lsl x19, x27, x14
|
||||
|
||||
orr x25, x25, x15
|
||||
|
||||
lsr x27, x27, x13
|
||||
lsl x20, x28, x14
|
||||
|
||||
orr x26, x26, x19
|
||||
|
||||
lsr x28, x28, x13
|
||||
|
||||
orr x27, x27, x20
|
||||
|
||||
|
||||
// Shift X right by "x13" bits, adding n whenever X becomes odd.
|
||||
// x13--;
|
||||
// x14 := 0; needed in the addition to the most significant word in SHIFT1
|
||||
eor x14, x14, x14
|
||||
.Lbeeu_shift_loop_X:
|
||||
tbz x3, #0, .Lshift1_0
|
||||
adds x3, x3, x0
|
||||
adcs x4, x4, x1
|
||||
adcs x5, x5, x2
|
||||
adcs x6, x6, x30
|
||||
adc x7, x7, x14
|
||||
.Lshift1_0:
|
||||
// var0 := [var1|var0]<64..1>;
|
||||
// i.e. concatenate var1 and var0,
|
||||
// extract bits <64..1> from the resulting 128-bit value
|
||||
// and put them in var0
|
||||
extr x3, x4, x3, #1
|
||||
extr x4, x5, x4, #1
|
||||
extr x5, x6, x5, #1
|
||||
extr x6, x7, x6, #1
|
||||
lsr x7, x7, #1
|
||||
|
||||
subs x13, x13, #1
|
||||
bne .Lbeeu_shift_loop_X
|
||||
|
||||
// Note: the steps above perform the same sequence as in p256_beeu-x86_64-asm.pl
|
||||
// with the following differences:
|
||||
// - "x13" is set directly to the number of trailing 0s in B
|
||||
// (using rbit and clz instructions)
|
||||
// - The loop is only used to call SHIFT1(X)
|
||||
// and x13 is decreased while executing the X loop.
|
||||
// - SHIFT256(B, x13) is performed before right-shifting X; they are independent
|
||||
|
||||
.Lbeeu_shift_A_Y:
|
||||
// Same for A and Y.
|
||||
// Afterwards, (2) still holds.
|
||||
// Reverse the bit order of x21
|
||||
// x13 := number of trailing 0s in x21 (= number of leading 0s in x15)
|
||||
rbit x15, x21
|
||||
clz x13, x15
|
||||
|
||||
// If there is no shift, goto |B-A|, X+Y update
|
||||
cbz x13, .Lbeeu_update_B_X_or_A_Y
|
||||
|
||||
// Shift A right by "x13" bits
|
||||
neg x14, x13
|
||||
lsr x21, x21, x13
|
||||
lsl x15, x22, x14
|
||||
|
||||
lsr x22, x22, x13
|
||||
lsl x19, x23, x14
|
||||
|
||||
orr x21, x21, x15
|
||||
|
||||
lsr x23, x23, x13
|
||||
lsl x20, x24, x14
|
||||
|
||||
orr x22, x22, x19
|
||||
|
||||
lsr x24, x24, x13
|
||||
|
||||
orr x23, x23, x20
|
||||
|
||||
|
||||
// Shift Y right by "x13" bits, adding n whenever Y becomes odd.
|
||||
// x13--;
|
||||
// x14 := 0; needed in the addition to the most significant word in SHIFT1
|
||||
eor x14, x14, x14
|
||||
.Lbeeu_shift_loop_Y:
|
||||
tbz x8, #0, .Lshift1_1
|
||||
adds x8, x8, x0
|
||||
adcs x9, x9, x1
|
||||
adcs x10, x10, x2
|
||||
adcs x11, x11, x30
|
||||
adc x12, x12, x14
|
||||
.Lshift1_1:
|
||||
// var0 := [var1|var0]<64..1>;
|
||||
// i.e. concatenate var1 and var0,
|
||||
// extract bits <64..1> from the resulting 128-bit value
|
||||
// and put them in var0
|
||||
extr x8, x9, x8, #1
|
||||
extr x9, x10, x9, #1
|
||||
extr x10, x11, x10, #1
|
||||
extr x11, x12, x11, #1
|
||||
lsr x12, x12, #1
|
||||
|
||||
subs x13, x13, #1
|
||||
bne .Lbeeu_shift_loop_Y
|
||||
|
||||
.Lbeeu_update_B_X_or_A_Y:
|
||||
// Try T := B - A; if cs, continue with B > A (cs: carry set = no borrow)
|
||||
// Note: this is a case of unsigned arithmetic, where T fits in 4 64-bit words
|
||||
// without taking a sign bit if generated. The lack of a carry would
|
||||
// indicate a negative result. See, for example,
|
||||
// https://community.arm.com/developer/ip-products/processors/b/processors-ip-blog/posts/condition-codes-1-condition-flags-and-codes
|
||||
subs x14, x25, x21
|
||||
sbcs x15, x26, x22
|
||||
sbcs x19, x27, x23
|
||||
sbcs x20, x28, x24
|
||||
bcs .Lbeeu_B_greater_than_A
|
||||
|
||||
// Else A > B =>
|
||||
// A := A - B; Y := Y + X; goto beginning of the loop
|
||||
subs x21, x21, x25
|
||||
sbcs x22, x22, x26
|
||||
sbcs x23, x23, x27
|
||||
sbcs x24, x24, x28
|
||||
|
||||
adds x8, x8, x3
|
||||
adcs x9, x9, x4
|
||||
adcs x10, x10, x5
|
||||
adcs x11, x11, x6
|
||||
adc x12, x12, x7
|
||||
b .Lbeeu_loop
|
||||
|
||||
.Lbeeu_B_greater_than_A:
|
||||
// Continue with B > A =>
|
||||
// B := B - A; X := X + Y; goto beginning of the loop
|
||||
mov x25, x14
|
||||
mov x26, x15
|
||||
mov x27, x19
|
||||
mov x28, x20
|
||||
|
||||
adds x3, x3, x8
|
||||
adcs x4, x4, x9
|
||||
adcs x5, x5, x10
|
||||
adcs x6, x6, x11
|
||||
adc x7, x7, x12
|
||||
b .Lbeeu_loop
|
||||
|
||||
.Lbeeu_loop_end:
|
||||
// The Euclid's algorithm loop ends when A == gcd(a,n);
|
||||
// this would be 1, when a and n are co-prime (i.e. do not have a common factor).
|
||||
// Since (-1)*Y*a == A (mod |n|), Y>0
|
||||
// then out = -Y mod n
|
||||
|
||||
// Verify that A = 1 ==> (-1)*Y*a = A = 1 (mod |n|)
|
||||
// Is A-1 == 0?
|
||||
// If not, fail.
|
||||
sub x14, x21, #1
|
||||
orr x14, x14, x22
|
||||
orr x14, x14, x23
|
||||
orr x14, x14, x24
|
||||
cbnz x14, .Lbeeu_err
|
||||
|
||||
// If Y>n ==> Y:=Y-n
|
||||
.Lbeeu_reduction_loop:
|
||||
// x_i := y_i - n_i (X is no longer needed, use it as temp)
|
||||
// (x14 = 0 from above)
|
||||
subs x3, x8, x0
|
||||
sbcs x4, x9, x1
|
||||
sbcs x5, x10, x2
|
||||
sbcs x6, x11, x30
|
||||
sbcs x7, x12, x14
|
||||
|
||||
// If result is non-negative (i.e., cs = carry set = no borrow),
|
||||
// y_i := x_i; goto reduce again
|
||||
// else
|
||||
// y_i := y_i; continue
|
||||
csel x8, x3, x8, cs
|
||||
csel x9, x4, x9, cs
|
||||
csel x10, x5, x10, cs
|
||||
csel x11, x6, x11, cs
|
||||
csel x12, x7, x12, cs
|
||||
bcs .Lbeeu_reduction_loop
|
||||
|
||||
// Now Y < n (Y cannot be equal to n, since the inverse cannot be 0)
|
||||
// out = -Y = n-Y
|
||||
subs x8, x0, x8
|
||||
sbcs x9, x1, x9
|
||||
sbcs x10, x2, x10
|
||||
sbcs x11, x30, x11
|
||||
|
||||
// Save Y in output (out (x0) was saved on the stack)
|
||||
ldr x3, [sp,#96]
|
||||
stp x8, x9, [x3]
|
||||
stp x10, x11, [x3,#16]
|
||||
// return 1 (success)
|
||||
mov x0, #1
|
||||
b .Lbeeu_finish
|
||||
|
||||
.Lbeeu_err:
|
||||
// return 0 (error)
|
||||
eor x0, x0, x0
|
||||
|
||||
.Lbeeu_finish:
|
||||
// Restore callee-saved registers, except x0, x2
|
||||
add sp,x29,#0
|
||||
ldp x19,x20,[sp,#16]
|
||||
ldp x21,x22,[sp,#32]
|
||||
ldp x23,x24,[sp,#48]
|
||||
ldp x25,x26,[sp,#64]
|
||||
ldp x27,x28,[sp,#80]
|
||||
ldp x29,x30,[sp],#112
|
||||
|
||||
AARCH64_VALIDATE_LINK_REGISTER
|
||||
ret
|
||||
.size beeu_mod_inverse_vartime,.-beeu_mod_inverse_vartime
|
||||
#endif // !OPENSSL_NO_ASM && defined(OPENSSL_AARCH64) && defined(__ELF__)
|
||||
1218
third-party/boringssl/linux-aarch64/crypto/fipsmodule/sha1-armv8-linux.S
vendored
Normal file
1218
third-party/boringssl/linux-aarch64/crypto/fipsmodule/sha1-armv8-linux.S
vendored
Normal file
File diff suppressed because it is too large
Load Diff
1193
third-party/boringssl/linux-aarch64/crypto/fipsmodule/sha256-armv8-linux.S
vendored
Normal file
1193
third-party/boringssl/linux-aarch64/crypto/fipsmodule/sha256-armv8-linux.S
vendored
Normal file
File diff suppressed because it is too large
Load Diff
1596
third-party/boringssl/linux-aarch64/crypto/fipsmodule/sha512-armv8-linux.S
vendored
Normal file
1596
third-party/boringssl/linux-aarch64/crypto/fipsmodule/sha512-armv8-linux.S
vendored
Normal file
File diff suppressed because it is too large
Load Diff
1224
third-party/boringssl/linux-aarch64/crypto/fipsmodule/vpaes-armv8-linux.S
vendored
Normal file
1224
third-party/boringssl/linux-aarch64/crypto/fipsmodule/vpaes-armv8-linux.S
vendored
Normal file
File diff suppressed because it is too large
Load Diff
750
third-party/boringssl/linux-aarch64/crypto/test/trampoline-armv8-linux.S
vendored
Normal file
750
third-party/boringssl/linux-aarch64/crypto/test/trampoline-armv8-linux.S
vendored
Normal file
@ -0,0 +1,750 @@
|
||||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#include <openssl/asm_base.h>
|
||||
|
||||
#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_AARCH64) && defined(__ELF__)
|
||||
#include <openssl/arm_arch.h>
|
||||
|
||||
.text
|
||||
|
||||
// abi_test_trampoline loads callee-saved registers from |state|, calls |func|
|
||||
// with |argv|, then saves the callee-saved registers into |state|. It returns
|
||||
// the result of |func|. The |unwind| argument is unused.
|
||||
// uint64_t abi_test_trampoline(void (*func)(...), CallerState *state,
|
||||
// const uint64_t *argv, size_t argc,
|
||||
// uint64_t unwind);
|
||||
.type abi_test_trampoline, %function
|
||||
.globl abi_test_trampoline
|
||||
.hidden abi_test_trampoline
|
||||
.align 4
|
||||
abi_test_trampoline:
|
||||
.Labi_test_trampoline_begin:
|
||||
AARCH64_SIGN_LINK_REGISTER
|
||||
// Stack layout (low to high addresses)
|
||||
// x29,x30 (16 bytes)
|
||||
// d8-d15 (64 bytes)
|
||||
// x19-x28 (80 bytes)
|
||||
// x1 (8 bytes)
|
||||
// padding (8 bytes)
|
||||
stp x29, x30, [sp, #-176]!
|
||||
mov x29, sp
|
||||
|
||||
// Saved callee-saved registers and |state|.
|
||||
stp d8, d9, [sp, #16]
|
||||
stp d10, d11, [sp, #32]
|
||||
stp d12, d13, [sp, #48]
|
||||
stp d14, d15, [sp, #64]
|
||||
stp x19, x20, [sp, #80]
|
||||
stp x21, x22, [sp, #96]
|
||||
stp x23, x24, [sp, #112]
|
||||
stp x25, x26, [sp, #128]
|
||||
stp x27, x28, [sp, #144]
|
||||
str x1, [sp, #160]
|
||||
|
||||
// Load registers from |state|, with the exception of x29. x29 is the
|
||||
// frame pointer and also callee-saved, but AAPCS64 allows platforms to
|
||||
// mandate that x29 always point to a frame. iOS64 does so, which means
|
||||
// we cannot fill x29 with entropy without violating ABI rules
|
||||
// ourselves. x29 is tested separately below.
|
||||
ldp d8, d9, [x1], #16
|
||||
ldp d10, d11, [x1], #16
|
||||
ldp d12, d13, [x1], #16
|
||||
ldp d14, d15, [x1], #16
|
||||
ldp x19, x20, [x1], #16
|
||||
ldp x21, x22, [x1], #16
|
||||
ldp x23, x24, [x1], #16
|
||||
ldp x25, x26, [x1], #16
|
||||
ldp x27, x28, [x1], #16
|
||||
|
||||
// Move parameters into temporary registers.
|
||||
mov x9, x0
|
||||
mov x10, x2
|
||||
mov x11, x3
|
||||
|
||||
// Load parameters into registers.
|
||||
cbz x11, .Largs_done
|
||||
ldr x0, [x10], #8
|
||||
subs x11, x11, #1
|
||||
b.eq .Largs_done
|
||||
ldr x1, [x10], #8
|
||||
subs x11, x11, #1
|
||||
b.eq .Largs_done
|
||||
ldr x2, [x10], #8
|
||||
subs x11, x11, #1
|
||||
b.eq .Largs_done
|
||||
ldr x3, [x10], #8
|
||||
subs x11, x11, #1
|
||||
b.eq .Largs_done
|
||||
ldr x4, [x10], #8
|
||||
subs x11, x11, #1
|
||||
b.eq .Largs_done
|
||||
ldr x5, [x10], #8
|
||||
subs x11, x11, #1
|
||||
b.eq .Largs_done
|
||||
ldr x6, [x10], #8
|
||||
subs x11, x11, #1
|
||||
b.eq .Largs_done
|
||||
ldr x7, [x10], #8
|
||||
|
||||
.Largs_done:
|
||||
blr x9
|
||||
|
||||
// Reload |state| and store registers.
|
||||
ldr x1, [sp, #160]
|
||||
stp d8, d9, [x1], #16
|
||||
stp d10, d11, [x1], #16
|
||||
stp d12, d13, [x1], #16
|
||||
stp d14, d15, [x1], #16
|
||||
stp x19, x20, [x1], #16
|
||||
stp x21, x22, [x1], #16
|
||||
stp x23, x24, [x1], #16
|
||||
stp x25, x26, [x1], #16
|
||||
stp x27, x28, [x1], #16
|
||||
|
||||
// |func| is required to preserve x29, the frame pointer. We cannot load
|
||||
// random values into x29 (see comment above), so compare it against the
|
||||
// expected value and zero the field of |state| if corrupted.
|
||||
mov x9, sp
|
||||
cmp x29, x9
|
||||
b.eq .Lx29_ok
|
||||
str xzr, [x1]
|
||||
|
||||
.Lx29_ok:
|
||||
// Restore callee-saved registers.
|
||||
ldp d8, d9, [sp, #16]
|
||||
ldp d10, d11, [sp, #32]
|
||||
ldp d12, d13, [sp, #48]
|
||||
ldp d14, d15, [sp, #64]
|
||||
ldp x19, x20, [sp, #80]
|
||||
ldp x21, x22, [sp, #96]
|
||||
ldp x23, x24, [sp, #112]
|
||||
ldp x25, x26, [sp, #128]
|
||||
ldp x27, x28, [sp, #144]
|
||||
|
||||
ldp x29, x30, [sp], #176
|
||||
AARCH64_VALIDATE_LINK_REGISTER
|
||||
ret
|
||||
.size abi_test_trampoline,.-abi_test_trampoline
|
||||
.type abi_test_clobber_x0, %function
|
||||
.globl abi_test_clobber_x0
|
||||
.hidden abi_test_clobber_x0
|
||||
.align 4
|
||||
abi_test_clobber_x0:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x0, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x0,.-abi_test_clobber_x0
|
||||
.type abi_test_clobber_x1, %function
|
||||
.globl abi_test_clobber_x1
|
||||
.hidden abi_test_clobber_x1
|
||||
.align 4
|
||||
abi_test_clobber_x1:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x1, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x1,.-abi_test_clobber_x1
|
||||
.type abi_test_clobber_x2, %function
|
||||
.globl abi_test_clobber_x2
|
||||
.hidden abi_test_clobber_x2
|
||||
.align 4
|
||||
abi_test_clobber_x2:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x2, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x2,.-abi_test_clobber_x2
|
||||
.type abi_test_clobber_x3, %function
|
||||
.globl abi_test_clobber_x3
|
||||
.hidden abi_test_clobber_x3
|
||||
.align 4
|
||||
abi_test_clobber_x3:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x3, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x3,.-abi_test_clobber_x3
|
||||
.type abi_test_clobber_x4, %function
|
||||
.globl abi_test_clobber_x4
|
||||
.hidden abi_test_clobber_x4
|
||||
.align 4
|
||||
abi_test_clobber_x4:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x4, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x4,.-abi_test_clobber_x4
|
||||
.type abi_test_clobber_x5, %function
|
||||
.globl abi_test_clobber_x5
|
||||
.hidden abi_test_clobber_x5
|
||||
.align 4
|
||||
abi_test_clobber_x5:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x5, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x5,.-abi_test_clobber_x5
|
||||
.type abi_test_clobber_x6, %function
|
||||
.globl abi_test_clobber_x6
|
||||
.hidden abi_test_clobber_x6
|
||||
.align 4
|
||||
abi_test_clobber_x6:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x6, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x6,.-abi_test_clobber_x6
|
||||
.type abi_test_clobber_x7, %function
|
||||
.globl abi_test_clobber_x7
|
||||
.hidden abi_test_clobber_x7
|
||||
.align 4
|
||||
abi_test_clobber_x7:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x7, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x7,.-abi_test_clobber_x7
|
||||
.type abi_test_clobber_x8, %function
|
||||
.globl abi_test_clobber_x8
|
||||
.hidden abi_test_clobber_x8
|
||||
.align 4
|
||||
abi_test_clobber_x8:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x8, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x8,.-abi_test_clobber_x8
|
||||
.type abi_test_clobber_x9, %function
|
||||
.globl abi_test_clobber_x9
|
||||
.hidden abi_test_clobber_x9
|
||||
.align 4
|
||||
abi_test_clobber_x9:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x9, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x9,.-abi_test_clobber_x9
|
||||
.type abi_test_clobber_x10, %function
|
||||
.globl abi_test_clobber_x10
|
||||
.hidden abi_test_clobber_x10
|
||||
.align 4
|
||||
abi_test_clobber_x10:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x10, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x10,.-abi_test_clobber_x10
|
||||
.type abi_test_clobber_x11, %function
|
||||
.globl abi_test_clobber_x11
|
||||
.hidden abi_test_clobber_x11
|
||||
.align 4
|
||||
abi_test_clobber_x11:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x11, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x11,.-abi_test_clobber_x11
|
||||
.type abi_test_clobber_x12, %function
|
||||
.globl abi_test_clobber_x12
|
||||
.hidden abi_test_clobber_x12
|
||||
.align 4
|
||||
abi_test_clobber_x12:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x12, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x12,.-abi_test_clobber_x12
|
||||
.type abi_test_clobber_x13, %function
|
||||
.globl abi_test_clobber_x13
|
||||
.hidden abi_test_clobber_x13
|
||||
.align 4
|
||||
abi_test_clobber_x13:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x13, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x13,.-abi_test_clobber_x13
|
||||
.type abi_test_clobber_x14, %function
|
||||
.globl abi_test_clobber_x14
|
||||
.hidden abi_test_clobber_x14
|
||||
.align 4
|
||||
abi_test_clobber_x14:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x14, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x14,.-abi_test_clobber_x14
|
||||
.type abi_test_clobber_x15, %function
|
||||
.globl abi_test_clobber_x15
|
||||
.hidden abi_test_clobber_x15
|
||||
.align 4
|
||||
abi_test_clobber_x15:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x15, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x15,.-abi_test_clobber_x15
|
||||
.type abi_test_clobber_x16, %function
|
||||
.globl abi_test_clobber_x16
|
||||
.hidden abi_test_clobber_x16
|
||||
.align 4
|
||||
abi_test_clobber_x16:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x16, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x16,.-abi_test_clobber_x16
|
||||
.type abi_test_clobber_x17, %function
|
||||
.globl abi_test_clobber_x17
|
||||
.hidden abi_test_clobber_x17
|
||||
.align 4
|
||||
abi_test_clobber_x17:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x17, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x17,.-abi_test_clobber_x17
|
||||
.type abi_test_clobber_x19, %function
|
||||
.globl abi_test_clobber_x19
|
||||
.hidden abi_test_clobber_x19
|
||||
.align 4
|
||||
abi_test_clobber_x19:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x19, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x19,.-abi_test_clobber_x19
|
||||
.type abi_test_clobber_x20, %function
|
||||
.globl abi_test_clobber_x20
|
||||
.hidden abi_test_clobber_x20
|
||||
.align 4
|
||||
abi_test_clobber_x20:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x20, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x20,.-abi_test_clobber_x20
|
||||
.type abi_test_clobber_x21, %function
|
||||
.globl abi_test_clobber_x21
|
||||
.hidden abi_test_clobber_x21
|
||||
.align 4
|
||||
abi_test_clobber_x21:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x21, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x21,.-abi_test_clobber_x21
|
||||
.type abi_test_clobber_x22, %function
|
||||
.globl abi_test_clobber_x22
|
||||
.hidden abi_test_clobber_x22
|
||||
.align 4
|
||||
abi_test_clobber_x22:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x22, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x22,.-abi_test_clobber_x22
|
||||
.type abi_test_clobber_x23, %function
|
||||
.globl abi_test_clobber_x23
|
||||
.hidden abi_test_clobber_x23
|
||||
.align 4
|
||||
abi_test_clobber_x23:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x23, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x23,.-abi_test_clobber_x23
|
||||
.type abi_test_clobber_x24, %function
|
||||
.globl abi_test_clobber_x24
|
||||
.hidden abi_test_clobber_x24
|
||||
.align 4
|
||||
abi_test_clobber_x24:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x24, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x24,.-abi_test_clobber_x24
|
||||
.type abi_test_clobber_x25, %function
|
||||
.globl abi_test_clobber_x25
|
||||
.hidden abi_test_clobber_x25
|
||||
.align 4
|
||||
abi_test_clobber_x25:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x25, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x25,.-abi_test_clobber_x25
|
||||
.type abi_test_clobber_x26, %function
|
||||
.globl abi_test_clobber_x26
|
||||
.hidden abi_test_clobber_x26
|
||||
.align 4
|
||||
abi_test_clobber_x26:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x26, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x26,.-abi_test_clobber_x26
|
||||
.type abi_test_clobber_x27, %function
|
||||
.globl abi_test_clobber_x27
|
||||
.hidden abi_test_clobber_x27
|
||||
.align 4
|
||||
abi_test_clobber_x27:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x27, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x27,.-abi_test_clobber_x27
|
||||
.type abi_test_clobber_x28, %function
|
||||
.globl abi_test_clobber_x28
|
||||
.hidden abi_test_clobber_x28
|
||||
.align 4
|
||||
abi_test_clobber_x28:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x28, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x28,.-abi_test_clobber_x28
|
||||
.type abi_test_clobber_x29, %function
|
||||
.globl abi_test_clobber_x29
|
||||
.hidden abi_test_clobber_x29
|
||||
.align 4
|
||||
abi_test_clobber_x29:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x29, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x29,.-abi_test_clobber_x29
|
||||
.type abi_test_clobber_d0, %function
|
||||
.globl abi_test_clobber_d0
|
||||
.hidden abi_test_clobber_d0
|
||||
.align 4
|
||||
abi_test_clobber_d0:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d0, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d0,.-abi_test_clobber_d0
|
||||
.type abi_test_clobber_d1, %function
|
||||
.globl abi_test_clobber_d1
|
||||
.hidden abi_test_clobber_d1
|
||||
.align 4
|
||||
abi_test_clobber_d1:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d1, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d1,.-abi_test_clobber_d1
|
||||
.type abi_test_clobber_d2, %function
|
||||
.globl abi_test_clobber_d2
|
||||
.hidden abi_test_clobber_d2
|
||||
.align 4
|
||||
abi_test_clobber_d2:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d2, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d2,.-abi_test_clobber_d2
|
||||
.type abi_test_clobber_d3, %function
|
||||
.globl abi_test_clobber_d3
|
||||
.hidden abi_test_clobber_d3
|
||||
.align 4
|
||||
abi_test_clobber_d3:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d3, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d3,.-abi_test_clobber_d3
|
||||
.type abi_test_clobber_d4, %function
|
||||
.globl abi_test_clobber_d4
|
||||
.hidden abi_test_clobber_d4
|
||||
.align 4
|
||||
abi_test_clobber_d4:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d4, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d4,.-abi_test_clobber_d4
|
||||
.type abi_test_clobber_d5, %function
|
||||
.globl abi_test_clobber_d5
|
||||
.hidden abi_test_clobber_d5
|
||||
.align 4
|
||||
abi_test_clobber_d5:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d5, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d5,.-abi_test_clobber_d5
|
||||
.type abi_test_clobber_d6, %function
|
||||
.globl abi_test_clobber_d6
|
||||
.hidden abi_test_clobber_d6
|
||||
.align 4
|
||||
abi_test_clobber_d6:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d6, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d6,.-abi_test_clobber_d6
|
||||
.type abi_test_clobber_d7, %function
|
||||
.globl abi_test_clobber_d7
|
||||
.hidden abi_test_clobber_d7
|
||||
.align 4
|
||||
abi_test_clobber_d7:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d7, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d7,.-abi_test_clobber_d7
|
||||
.type abi_test_clobber_d8, %function
|
||||
.globl abi_test_clobber_d8
|
||||
.hidden abi_test_clobber_d8
|
||||
.align 4
|
||||
abi_test_clobber_d8:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d8, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d8,.-abi_test_clobber_d8
|
||||
.type abi_test_clobber_d9, %function
|
||||
.globl abi_test_clobber_d9
|
||||
.hidden abi_test_clobber_d9
|
||||
.align 4
|
||||
abi_test_clobber_d9:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d9, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d9,.-abi_test_clobber_d9
|
||||
.type abi_test_clobber_d10, %function
|
||||
.globl abi_test_clobber_d10
|
||||
.hidden abi_test_clobber_d10
|
||||
.align 4
|
||||
abi_test_clobber_d10:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d10, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d10,.-abi_test_clobber_d10
|
||||
.type abi_test_clobber_d11, %function
|
||||
.globl abi_test_clobber_d11
|
||||
.hidden abi_test_clobber_d11
|
||||
.align 4
|
||||
abi_test_clobber_d11:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d11, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d11,.-abi_test_clobber_d11
|
||||
.type abi_test_clobber_d12, %function
|
||||
.globl abi_test_clobber_d12
|
||||
.hidden abi_test_clobber_d12
|
||||
.align 4
|
||||
abi_test_clobber_d12:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d12, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d12,.-abi_test_clobber_d12
|
||||
.type abi_test_clobber_d13, %function
|
||||
.globl abi_test_clobber_d13
|
||||
.hidden abi_test_clobber_d13
|
||||
.align 4
|
||||
abi_test_clobber_d13:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d13, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d13,.-abi_test_clobber_d13
|
||||
.type abi_test_clobber_d14, %function
|
||||
.globl abi_test_clobber_d14
|
||||
.hidden abi_test_clobber_d14
|
||||
.align 4
|
||||
abi_test_clobber_d14:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d14, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d14,.-abi_test_clobber_d14
|
||||
.type abi_test_clobber_d15, %function
|
||||
.globl abi_test_clobber_d15
|
||||
.hidden abi_test_clobber_d15
|
||||
.align 4
|
||||
abi_test_clobber_d15:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d15, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d15,.-abi_test_clobber_d15
|
||||
.type abi_test_clobber_d16, %function
|
||||
.globl abi_test_clobber_d16
|
||||
.hidden abi_test_clobber_d16
|
||||
.align 4
|
||||
abi_test_clobber_d16:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d16, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d16,.-abi_test_clobber_d16
|
||||
.type abi_test_clobber_d17, %function
|
||||
.globl abi_test_clobber_d17
|
||||
.hidden abi_test_clobber_d17
|
||||
.align 4
|
||||
abi_test_clobber_d17:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d17, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d17,.-abi_test_clobber_d17
|
||||
.type abi_test_clobber_d18, %function
|
||||
.globl abi_test_clobber_d18
|
||||
.hidden abi_test_clobber_d18
|
||||
.align 4
|
||||
abi_test_clobber_d18:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d18, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d18,.-abi_test_clobber_d18
|
||||
.type abi_test_clobber_d19, %function
|
||||
.globl abi_test_clobber_d19
|
||||
.hidden abi_test_clobber_d19
|
||||
.align 4
|
||||
abi_test_clobber_d19:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d19, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d19,.-abi_test_clobber_d19
|
||||
.type abi_test_clobber_d20, %function
|
||||
.globl abi_test_clobber_d20
|
||||
.hidden abi_test_clobber_d20
|
||||
.align 4
|
||||
abi_test_clobber_d20:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d20, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d20,.-abi_test_clobber_d20
|
||||
.type abi_test_clobber_d21, %function
|
||||
.globl abi_test_clobber_d21
|
||||
.hidden abi_test_clobber_d21
|
||||
.align 4
|
||||
abi_test_clobber_d21:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d21, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d21,.-abi_test_clobber_d21
|
||||
.type abi_test_clobber_d22, %function
|
||||
.globl abi_test_clobber_d22
|
||||
.hidden abi_test_clobber_d22
|
||||
.align 4
|
||||
abi_test_clobber_d22:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d22, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d22,.-abi_test_clobber_d22
|
||||
.type abi_test_clobber_d23, %function
|
||||
.globl abi_test_clobber_d23
|
||||
.hidden abi_test_clobber_d23
|
||||
.align 4
|
||||
abi_test_clobber_d23:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d23, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d23,.-abi_test_clobber_d23
|
||||
.type abi_test_clobber_d24, %function
|
||||
.globl abi_test_clobber_d24
|
||||
.hidden abi_test_clobber_d24
|
||||
.align 4
|
||||
abi_test_clobber_d24:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d24, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d24,.-abi_test_clobber_d24
|
||||
.type abi_test_clobber_d25, %function
|
||||
.globl abi_test_clobber_d25
|
||||
.hidden abi_test_clobber_d25
|
||||
.align 4
|
||||
abi_test_clobber_d25:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d25, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d25,.-abi_test_clobber_d25
|
||||
.type abi_test_clobber_d26, %function
|
||||
.globl abi_test_clobber_d26
|
||||
.hidden abi_test_clobber_d26
|
||||
.align 4
|
||||
abi_test_clobber_d26:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d26, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d26,.-abi_test_clobber_d26
|
||||
.type abi_test_clobber_d27, %function
|
||||
.globl abi_test_clobber_d27
|
||||
.hidden abi_test_clobber_d27
|
||||
.align 4
|
||||
abi_test_clobber_d27:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d27, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d27,.-abi_test_clobber_d27
|
||||
.type abi_test_clobber_d28, %function
|
||||
.globl abi_test_clobber_d28
|
||||
.hidden abi_test_clobber_d28
|
||||
.align 4
|
||||
abi_test_clobber_d28:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d28, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d28,.-abi_test_clobber_d28
|
||||
.type abi_test_clobber_d29, %function
|
||||
.globl abi_test_clobber_d29
|
||||
.hidden abi_test_clobber_d29
|
||||
.align 4
|
||||
abi_test_clobber_d29:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d29, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d29,.-abi_test_clobber_d29
|
||||
.type abi_test_clobber_d30, %function
|
||||
.globl abi_test_clobber_d30
|
||||
.hidden abi_test_clobber_d30
|
||||
.align 4
|
||||
abi_test_clobber_d30:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d30, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d30,.-abi_test_clobber_d30
|
||||
.type abi_test_clobber_d31, %function
|
||||
.globl abi_test_clobber_d31
|
||||
.hidden abi_test_clobber_d31
|
||||
.align 4
|
||||
abi_test_clobber_d31:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d31, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d31,.-abi_test_clobber_d31
|
||||
.type abi_test_clobber_v8_upper, %function
|
||||
.globl abi_test_clobber_v8_upper
|
||||
.hidden abi_test_clobber_v8_upper
|
||||
.align 4
|
||||
abi_test_clobber_v8_upper:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov v8.d[1], xzr
|
||||
ret
|
||||
.size abi_test_clobber_v8_upper,.-abi_test_clobber_v8_upper
|
||||
.type abi_test_clobber_v9_upper, %function
|
||||
.globl abi_test_clobber_v9_upper
|
||||
.hidden abi_test_clobber_v9_upper
|
||||
.align 4
|
||||
abi_test_clobber_v9_upper:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov v9.d[1], xzr
|
||||
ret
|
||||
.size abi_test_clobber_v9_upper,.-abi_test_clobber_v9_upper
|
||||
.type abi_test_clobber_v10_upper, %function
|
||||
.globl abi_test_clobber_v10_upper
|
||||
.hidden abi_test_clobber_v10_upper
|
||||
.align 4
|
||||
abi_test_clobber_v10_upper:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov v10.d[1], xzr
|
||||
ret
|
||||
.size abi_test_clobber_v10_upper,.-abi_test_clobber_v10_upper
|
||||
.type abi_test_clobber_v11_upper, %function
|
||||
.globl abi_test_clobber_v11_upper
|
||||
.hidden abi_test_clobber_v11_upper
|
||||
.align 4
|
||||
abi_test_clobber_v11_upper:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov v11.d[1], xzr
|
||||
ret
|
||||
.size abi_test_clobber_v11_upper,.-abi_test_clobber_v11_upper
|
||||
.type abi_test_clobber_v12_upper, %function
|
||||
.globl abi_test_clobber_v12_upper
|
||||
.hidden abi_test_clobber_v12_upper
|
||||
.align 4
|
||||
abi_test_clobber_v12_upper:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov v12.d[1], xzr
|
||||
ret
|
||||
.size abi_test_clobber_v12_upper,.-abi_test_clobber_v12_upper
|
||||
.type abi_test_clobber_v13_upper, %function
|
||||
.globl abi_test_clobber_v13_upper
|
||||
.hidden abi_test_clobber_v13_upper
|
||||
.align 4
|
||||
abi_test_clobber_v13_upper:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov v13.d[1], xzr
|
||||
ret
|
||||
.size abi_test_clobber_v13_upper,.-abi_test_clobber_v13_upper
|
||||
.type abi_test_clobber_v14_upper, %function
|
||||
.globl abi_test_clobber_v14_upper
|
||||
.hidden abi_test_clobber_v14_upper
|
||||
.align 4
|
||||
abi_test_clobber_v14_upper:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov v14.d[1], xzr
|
||||
ret
|
||||
.size abi_test_clobber_v14_upper,.-abi_test_clobber_v14_upper
|
||||
.type abi_test_clobber_v15_upper, %function
|
||||
.globl abi_test_clobber_v15_upper
|
||||
.hidden abi_test_clobber_v15_upper
|
||||
.align 4
|
||||
abi_test_clobber_v15_upper:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov v15.d[1], xzr
|
||||
ret
|
||||
.size abi_test_clobber_v15_upper,.-abi_test_clobber_v15_upper
|
||||
#endif // !OPENSSL_NO_ASM && defined(OPENSSL_AARCH64) && defined(__ELF__)
|
||||
@ -1,22 +1,14 @@
|
||||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#if !defined(__has_feature)
|
||||
#define __has_feature(x) 0
|
||||
#endif
|
||||
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
|
||||
#define OPENSSL_NO_ASM
|
||||
#endif
|
||||
#include <openssl/asm_base.h>
|
||||
|
||||
#if !defined(OPENSSL_NO_ASM)
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_ARM) && defined(__ELF__)
|
||||
#include <openssl/arm_arch.h>
|
||||
|
||||
@ Silence ARMv8 deprecated IT instruction warnings. This file is used by both
|
||||
@ ARMv7 and ARMv8 processors and does not use ARMv8 instructions.
|
||||
|
||||
.arch armv7-a
|
||||
|
||||
.text
|
||||
#if defined(__thumb2__) || defined(__clang__)
|
||||
@ -33,53 +25,21 @@
|
||||
#endif
|
||||
|
||||
.align 5
|
||||
Lsigma:
|
||||
.Lsigma:
|
||||
.long 0x61707865,0x3320646e,0x79622d32,0x6b206574 @ endian-neutral
|
||||
Lone:
|
||||
.Lone:
|
||||
.long 1,0,0,0
|
||||
#if __ARM_MAX_ARCH__>=7
|
||||
LOPENSSL_armcap:
|
||||
.word OPENSSL_armcap_P-LChaCha20_ctr32
|
||||
#else
|
||||
.word -1
|
||||
#endif
|
||||
|
||||
.globl _ChaCha20_ctr32
|
||||
.private_extern _ChaCha20_ctr32
|
||||
#ifdef __thumb2__
|
||||
.thumb_func _ChaCha20_ctr32
|
||||
#endif
|
||||
.globl ChaCha20_ctr32_nohw
|
||||
.hidden ChaCha20_ctr32_nohw
|
||||
.type ChaCha20_ctr32_nohw,%function
|
||||
.align 5
|
||||
_ChaCha20_ctr32:
|
||||
LChaCha20_ctr32:
|
||||
ChaCha20_ctr32_nohw:
|
||||
ldr r12,[sp,#0] @ pull pointer to counter and nonce
|
||||
stmdb sp!,{r0,r1,r2,r4-r11,lr}
|
||||
#if __ARM_ARCH__<7 && !defined(__thumb2__)
|
||||
sub r14,pc,#16 @ _ChaCha20_ctr32
|
||||
#else
|
||||
adr r14,LChaCha20_ctr32
|
||||
#endif
|
||||
cmp r2,#0 @ len==0?
|
||||
#ifdef __thumb2__
|
||||
itt eq
|
||||
#endif
|
||||
addeq sp,sp,#4*3
|
||||
beq Lno_data
|
||||
#if __ARM_MAX_ARCH__>=7
|
||||
cmp r2,#192 @ test len
|
||||
bls Lshort
|
||||
ldr r4,[r14,#-32]
|
||||
ldr r4,[r14,r4]
|
||||
# ifdef __APPLE__
|
||||
ldr r4,[r4]
|
||||
# endif
|
||||
tst r4,#ARMV7_NEON
|
||||
bne LChaCha20_neon
|
||||
Lshort:
|
||||
#endif
|
||||
adr r14,.Lsigma
|
||||
ldmia r12,{r4,r5,r6,r7} @ load counter and nonce
|
||||
sub sp,sp,#4*(16) @ off-load area
|
||||
sub r14,r14,#64 @ Lsigma
|
||||
stmdb sp!,{r4,r5,r6,r7} @ copy counter and nonce
|
||||
ldmia r3,{r4,r5,r6,r7,r8,r9,r10,r11} @ load key
|
||||
ldmia r14,{r0,r1,r2,r3} @ load sigma
|
||||
@ -87,25 +47,25 @@ Lshort:
|
||||
stmdb sp!,{r0,r1,r2,r3} @ copy sigma
|
||||
str r10,[sp,#4*(16+10)] @ off-load "rx"
|
||||
str r11,[sp,#4*(16+11)] @ off-load "rx"
|
||||
b Loop_outer_enter
|
||||
b .Loop_outer_enter
|
||||
|
||||
.align 4
|
||||
Loop_outer:
|
||||
.Loop_outer:
|
||||
ldmia sp,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} @ load key material
|
||||
str r11,[sp,#4*(32+2)] @ save len
|
||||
str r12, [sp,#4*(32+1)] @ save inp
|
||||
str r14, [sp,#4*(32+0)] @ save out
|
||||
Loop_outer_enter:
|
||||
.Loop_outer_enter:
|
||||
ldr r11, [sp,#4*(15)]
|
||||
ldr r12,[sp,#4*(12)] @ modulo-scheduled load
|
||||
ldr r10, [sp,#4*(13)]
|
||||
ldr r14,[sp,#4*(14)]
|
||||
str r11, [sp,#4*(16+15)]
|
||||
mov r11,#10
|
||||
b Loop
|
||||
b .Loop
|
||||
|
||||
.align 4
|
||||
Loop:
|
||||
.Loop:
|
||||
subs r11,r11,#1
|
||||
add r0,r0,r4
|
||||
mov r12,r12,ror#16
|
||||
@ -215,7 +175,7 @@ Loop:
|
||||
mov r4,r4,ror#25
|
||||
eor r7,r7,r8,ror#25
|
||||
eor r4,r4,r9,ror#25
|
||||
bne Loop
|
||||
bne .Loop
|
||||
|
||||
ldr r11,[sp,#4*(32+2)] @ load len
|
||||
|
||||
@ -240,12 +200,12 @@ Loop:
|
||||
ldr r8,[sp,#4*(0)] @ load key material
|
||||
ldr r9,[sp,#4*(1)]
|
||||
|
||||
#if __ARM_ARCH__>=6 || !defined(__ARMEB__)
|
||||
# if __ARM_ARCH__<7
|
||||
#if __ARM_ARCH>=6 || !defined(__ARMEB__)
|
||||
# if __ARM_ARCH<7
|
||||
orr r10,r12,r14
|
||||
tst r10,#3 @ are input and output aligned?
|
||||
ldr r10,[sp,#4*(2)]
|
||||
bne Lunaligned
|
||||
bne .Lunaligned
|
||||
cmp r11,#64 @ restore flags
|
||||
# else
|
||||
ldr r10,[sp,#4*(2)]
|
||||
@ -267,7 +227,7 @@ Loop:
|
||||
# endif
|
||||
ldrhs r10,[r12,#-8]
|
||||
ldrhs r11,[r12,#-4]
|
||||
# if __ARM_ARCH__>=6 && defined(__ARMEB__)
|
||||
# if __ARM_ARCH>=6 && defined(__ARMEB__)
|
||||
rev r0,r0
|
||||
rev r1,r1
|
||||
rev r2,r2
|
||||
@ -304,7 +264,7 @@ Loop:
|
||||
# endif
|
||||
ldrhs r10,[r12,#-8]
|
||||
ldrhs r11,[r12,#-4]
|
||||
# if __ARM_ARCH__>=6 && defined(__ARMEB__)
|
||||
# if __ARM_ARCH>=6 && defined(__ARMEB__)
|
||||
rev r4,r4
|
||||
rev r5,r5
|
||||
rev r6,r6
|
||||
@ -349,7 +309,7 @@ Loop:
|
||||
# endif
|
||||
ldrhs r10,[r12,#-8]
|
||||
ldrhs r11,[r12,#-4]
|
||||
# if __ARM_ARCH__>=6 && defined(__ARMEB__)
|
||||
# if __ARM_ARCH>=6 && defined(__ARMEB__)
|
||||
rev r0,r0
|
||||
rev r1,r1
|
||||
rev r2,r2
|
||||
@ -391,7 +351,7 @@ Loop:
|
||||
# endif
|
||||
ldrhs r10,[r12,#-8]
|
||||
ldrhs r11,[r12,#-4]
|
||||
# if __ARM_ARCH__>=6 && defined(__ARMEB__)
|
||||
# if __ARM_ARCH>=6 && defined(__ARMEB__)
|
||||
rev r4,r4
|
||||
rev r5,r5
|
||||
rev r6,r6
|
||||
@ -419,18 +379,18 @@ Loop:
|
||||
subhs r11,r8,#64 @ len-=64
|
||||
str r6,[r14,#-8]
|
||||
str r7,[r14,#-4]
|
||||
bhi Loop_outer
|
||||
bhi .Loop_outer
|
||||
|
||||
beq Ldone
|
||||
# if __ARM_ARCH__<7
|
||||
b Ltail
|
||||
beq .Ldone
|
||||
# if __ARM_ARCH<7
|
||||
b .Ltail
|
||||
|
||||
.align 4
|
||||
Lunaligned:@ unaligned endian-neutral path
|
||||
.Lunaligned:@ unaligned endian-neutral path
|
||||
cmp r11,#64 @ restore flags
|
||||
# endif
|
||||
#endif
|
||||
#if __ARM_ARCH__<7
|
||||
#if __ARM_ARCH<7
|
||||
ldr r11,[sp,#4*(3)]
|
||||
add r0,r0,r8 @ accumulate key material
|
||||
add r1,r1,r9
|
||||
@ -786,42 +746,40 @@ Lunaligned:@ unaligned endian-neutral path
|
||||
it hs
|
||||
# endif
|
||||
subhs r11,r8,#64 @ len-=64
|
||||
bhi Loop_outer
|
||||
bhi .Loop_outer
|
||||
|
||||
beq Ldone
|
||||
beq .Ldone
|
||||
#endif
|
||||
|
||||
Ltail:
|
||||
.Ltail:
|
||||
ldr r12,[sp,#4*(32+1)] @ load inp
|
||||
add r9,sp,#4*(0)
|
||||
ldr r14,[sp,#4*(32+0)] @ load out
|
||||
|
||||
Loop_tail:
|
||||
.Loop_tail:
|
||||
ldrb r10,[r9],#1 @ read buffer on stack
|
||||
ldrb r11,[r12],#1 @ read input
|
||||
subs r8,r8,#1
|
||||
eor r11,r11,r10
|
||||
strb r11,[r14],#1 @ store output
|
||||
bne Loop_tail
|
||||
bne .Loop_tail
|
||||
|
||||
Ldone:
|
||||
.Ldone:
|
||||
add sp,sp,#4*(32+3)
|
||||
Lno_data:
|
||||
ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,pc}
|
||||
|
||||
.size ChaCha20_ctr32_nohw,.-ChaCha20_ctr32_nohw
|
||||
#if __ARM_MAX_ARCH__>=7
|
||||
.arch armv7-a
|
||||
.fpu neon
|
||||
|
||||
|
||||
|
||||
#ifdef __thumb2__
|
||||
.thumb_func ChaCha20_neon
|
||||
#endif
|
||||
.globl ChaCha20_ctr32_neon
|
||||
.hidden ChaCha20_ctr32_neon
|
||||
.type ChaCha20_ctr32_neon,%function
|
||||
.align 5
|
||||
ChaCha20_neon:
|
||||
ChaCha20_ctr32_neon:
|
||||
ldr r12,[sp,#0] @ pull pointer to counter and nonce
|
||||
stmdb sp!,{r0,r1,r2,r4-r11,lr}
|
||||
LChaCha20_neon:
|
||||
adr r14,Lsigma
|
||||
adr r14,.Lsigma
|
||||
vstmdb sp!,{d8,d9,d10,d11,d12,d13,d14,d15} @ ABI spec says so
|
||||
stmdb sp!,{r0,r1,r2,r3}
|
||||
|
||||
@ -848,13 +806,13 @@ LChaCha20_neon:
|
||||
vmov q8,q0
|
||||
vmov q5,q1
|
||||
vmov q9,q1
|
||||
b Loop_neon_enter
|
||||
b .Loop_neon_enter
|
||||
|
||||
.align 4
|
||||
Loop_neon_outer:
|
||||
.Loop_neon_outer:
|
||||
ldmia sp,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} @ load key material
|
||||
cmp r11,#64*2 @ if len<=64*2
|
||||
bls Lbreak_neon @ switch to integer-only
|
||||
bls .Lbreak_neon @ switch to integer-only
|
||||
vmov q4,q0
|
||||
str r11,[sp,#4*(32+2)] @ save len
|
||||
vmov q8,q0
|
||||
@ -862,7 +820,7 @@ Loop_neon_outer:
|
||||
vmov q5,q1
|
||||
str r14, [sp,#4*(32+0)] @ save out
|
||||
vmov q9,q1
|
||||
Loop_neon_enter:
|
||||
.Loop_neon_enter:
|
||||
ldr r11, [sp,#4*(15)]
|
||||
vadd.i32 q7,q3,q12 @ counter+1
|
||||
ldr r12,[sp,#4*(12)] @ modulo-scheduled load
|
||||
@ -874,10 +832,10 @@ Loop_neon_enter:
|
||||
str r11, [sp,#4*(16+15)]
|
||||
mov r11,#10
|
||||
add r12,r12,#3 @ counter+3
|
||||
b Loop_neon
|
||||
b .Loop_neon
|
||||
|
||||
.align 4
|
||||
Loop_neon:
|
||||
.Loop_neon:
|
||||
subs r11,r11,#1
|
||||
vadd.i32 q0,q0,q1
|
||||
add r0,r0,r4
|
||||
@ -1095,7 +1053,7 @@ Loop_neon:
|
||||
eor r7,r7,r8,ror#25
|
||||
vext.8 q11,q11,q11,#4
|
||||
eor r4,r4,r9,ror#25
|
||||
bne Loop_neon
|
||||
bne .Loop_neon
|
||||
|
||||
add r11,sp,#32
|
||||
vld1.32 {q12,q13},[sp] @ load key material
|
||||
@ -1136,7 +1094,7 @@ Loop_neon:
|
||||
vadd.i32 q11,q11,q15
|
||||
|
||||
cmp r11,#64*4
|
||||
blo Ltail_neon
|
||||
blo .Ltail_neon
|
||||
|
||||
vld1.8 {q12,q13},[r12]! @ load input
|
||||
mov r11,sp
|
||||
@ -1290,12 +1248,12 @@ Loop_neon:
|
||||
sub r11,r8,#64*4 @ len-=64*4
|
||||
str r6,[r14,#-8]
|
||||
str r7,[r14,#-4]
|
||||
bhi Loop_neon_outer
|
||||
bhi .Loop_neon_outer
|
||||
|
||||
b Ldone_neon
|
||||
b .Ldone_neon
|
||||
|
||||
.align 4
|
||||
Lbreak_neon:
|
||||
.Lbreak_neon:
|
||||
@ harmonize NEON and integer-only stack frames: load data
|
||||
@ from NEON frame, but save to integer-only one; distance
|
||||
@ between the two is 4*(32+4+16-32)=4*(20).
|
||||
@ -1321,25 +1279,25 @@ Lbreak_neon:
|
||||
add sp,sp,#4*(20) @ switch frame
|
||||
vst1.32 {q2,q3},[r11]
|
||||
mov r11,#10
|
||||
b Loop @ go integer-only
|
||||
b .Loop @ go integer-only
|
||||
|
||||
.align 4
|
||||
Ltail_neon:
|
||||
.Ltail_neon:
|
||||
cmp r11,#64*3
|
||||
bhs L192_or_more_neon
|
||||
bhs .L192_or_more_neon
|
||||
cmp r11,#64*2
|
||||
bhs L128_or_more_neon
|
||||
bhs .L128_or_more_neon
|
||||
cmp r11,#64*1
|
||||
bhs L64_or_more_neon
|
||||
bhs .L64_or_more_neon
|
||||
|
||||
add r8,sp,#4*(8)
|
||||
vst1.8 {q0,q1},[sp]
|
||||
add r10,sp,#4*(0)
|
||||
vst1.8 {q2,q3},[r8]
|
||||
b Loop_tail_neon
|
||||
b .Loop_tail_neon
|
||||
|
||||
.align 4
|
||||
L64_or_more_neon:
|
||||
.L64_or_more_neon:
|
||||
vld1.8 {q12,q13},[r12]!
|
||||
vld1.8 {q14,q15},[r12]!
|
||||
veor q0,q0,q12
|
||||
@ -1349,17 +1307,17 @@ L64_or_more_neon:
|
||||
vst1.8 {q0,q1},[r14]!
|
||||
vst1.8 {q2,q3},[r14]!
|
||||
|
||||
beq Ldone_neon
|
||||
beq .Ldone_neon
|
||||
|
||||
add r8,sp,#4*(8)
|
||||
vst1.8 {q4,q5},[sp]
|
||||
add r10,sp,#4*(0)
|
||||
vst1.8 {q6,q7},[r8]
|
||||
sub r11,r11,#64*1 @ len-=64*1
|
||||
b Loop_tail_neon
|
||||
b .Loop_tail_neon
|
||||
|
||||
.align 4
|
||||
L128_or_more_neon:
|
||||
.L128_or_more_neon:
|
||||
vld1.8 {q12,q13},[r12]!
|
||||
vld1.8 {q14,q15},[r12]!
|
||||
veor q0,q0,q12
|
||||
@ -1378,17 +1336,17 @@ L128_or_more_neon:
|
||||
vst1.8 {q4,q5},[r14]!
|
||||
vst1.8 {q6,q7},[r14]!
|
||||
|
||||
beq Ldone_neon
|
||||
beq .Ldone_neon
|
||||
|
||||
add r8,sp,#4*(8)
|
||||
vst1.8 {q8,q9},[sp]
|
||||
add r10,sp,#4*(0)
|
||||
vst1.8 {q10,q11},[r8]
|
||||
sub r11,r11,#64*2 @ len-=64*2
|
||||
b Loop_tail_neon
|
||||
b .Loop_tail_neon
|
||||
|
||||
.align 4
|
||||
L192_or_more_neon:
|
||||
.L192_or_more_neon:
|
||||
vld1.8 {q12,q13},[r12]!
|
||||
vld1.8 {q14,q15},[r12]!
|
||||
veor q0,q0,q12
|
||||
@ -1416,7 +1374,7 @@ L192_or_more_neon:
|
||||
vst1.8 {q8,q9},[r14]!
|
||||
vst1.8 {q10,q11},[r14]!
|
||||
|
||||
beq Ldone_neon
|
||||
beq .Ldone_neon
|
||||
|
||||
ldmia sp,{r8,r9,r10,r11} @ load key material
|
||||
add r0,r0,r8 @ accumulate key material
|
||||
@ -1475,24 +1433,19 @@ L192_or_more_neon:
|
||||
add r10,sp,#4*(0)
|
||||
sub r11,r11,#64*3 @ len-=64*3
|
||||
|
||||
Loop_tail_neon:
|
||||
.Loop_tail_neon:
|
||||
ldrb r8,[r10],#1 @ read buffer on stack
|
||||
ldrb r9,[r12],#1 @ read input
|
||||
subs r11,r11,#1
|
||||
eor r8,r8,r9
|
||||
strb r8,[r14],#1 @ store output
|
||||
bne Loop_tail_neon
|
||||
bne .Loop_tail_neon
|
||||
|
||||
Ldone_neon:
|
||||
.Ldone_neon:
|
||||
add sp,sp,#4*(32+4)
|
||||
vldmia sp,{d8,d9,d10,d11,d12,d13,d14,d15}
|
||||
add sp,sp,#4*(16+3)
|
||||
ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,pc}
|
||||
|
||||
.comm _OPENSSL_armcap_P,4
|
||||
.non_lazy_symbol_pointer
|
||||
OPENSSL_armcap_P:
|
||||
.indirect_symbol _OPENSSL_armcap_P
|
||||
.long 0
|
||||
.size ChaCha20_ctr32_neon,.-ChaCha20_ctr32_neon
|
||||
#endif
|
||||
#endif // !OPENSSL_NO_ASM
|
||||
#endif // !OPENSSL_NO_ASM && defined(OPENSSL_ARM) && defined(__ELF__)
|
||||
@ -1,55 +1,45 @@
|
||||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#if !defined(__has_feature)
|
||||
#define __has_feature(x) 0
|
||||
#endif
|
||||
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
|
||||
#define OPENSSL_NO_ASM
|
||||
#endif
|
||||
#include <openssl/asm_base.h>
|
||||
|
||||
#if !defined(OPENSSL_NO_ASM)
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_ARM) && defined(__ELF__)
|
||||
#include <openssl/arm_arch.h>
|
||||
|
||||
#if __ARM_MAX_ARCH__>=7
|
||||
.text
|
||||
|
||||
|
||||
.arch armv7-a @ don't confuse not-so-latest binutils with argv8 :-)
|
||||
.fpu neon
|
||||
.code 32
|
||||
#undef __thumb2__
|
||||
.align 5
|
||||
Lrcon:
|
||||
.Lrcon:
|
||||
.long 0x01,0x01,0x01,0x01
|
||||
.long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d @ rotate-n-splat
|
||||
.long 0x1b,0x1b,0x1b,0x1b
|
||||
|
||||
.text
|
||||
|
||||
.globl _aes_hw_set_encrypt_key
|
||||
.private_extern _aes_hw_set_encrypt_key
|
||||
#ifdef __thumb2__
|
||||
.thumb_func _aes_hw_set_encrypt_key
|
||||
#endif
|
||||
.globl aes_hw_set_encrypt_key
|
||||
.hidden aes_hw_set_encrypt_key
|
||||
.type aes_hw_set_encrypt_key,%function
|
||||
.align 5
|
||||
_aes_hw_set_encrypt_key:
|
||||
Lenc_key:
|
||||
aes_hw_set_encrypt_key:
|
||||
.Lenc_key:
|
||||
mov r3,#-1
|
||||
cmp r0,#0
|
||||
beq Lenc_key_abort
|
||||
beq .Lenc_key_abort
|
||||
cmp r2,#0
|
||||
beq Lenc_key_abort
|
||||
beq .Lenc_key_abort
|
||||
mov r3,#-2
|
||||
cmp r1,#128
|
||||
blt Lenc_key_abort
|
||||
blt .Lenc_key_abort
|
||||
cmp r1,#256
|
||||
bgt Lenc_key_abort
|
||||
bgt .Lenc_key_abort
|
||||
tst r1,#0x3f
|
||||
bne Lenc_key_abort
|
||||
bne .Lenc_key_abort
|
||||
|
||||
adr r3,Lrcon
|
||||
adr r3,.Lrcon
|
||||
cmp r1,#192
|
||||
|
||||
veor q0,q0,q0
|
||||
@ -57,12 +47,12 @@ Lenc_key:
|
||||
mov r1,#8 @ reuse r1
|
||||
vld1.32 {q1,q2},[r3]!
|
||||
|
||||
blt Loop128
|
||||
beq L192
|
||||
b L256
|
||||
blt .Loop128
|
||||
beq .L192
|
||||
b .L256
|
||||
|
||||
.align 4
|
||||
Loop128:
|
||||
.Loop128:
|
||||
vtbl.8 d20,{q3},d4
|
||||
vtbl.8 d21,{q3},d5
|
||||
vext.8 q9,q0,q3,#12
|
||||
@ -78,7 +68,7 @@ Loop128:
|
||||
veor q3,q3,q9
|
||||
vshl.u8 q1,q1,#1
|
||||
veor q3,q3,q10
|
||||
bne Loop128
|
||||
bne .Loop128
|
||||
|
||||
vld1.32 {q1},[r3]
|
||||
|
||||
@ -114,16 +104,16 @@ Loop128:
|
||||
add r2,r2,#0x50
|
||||
|
||||
mov r12,#10
|
||||
b Ldone
|
||||
b .Ldone
|
||||
|
||||
.align 4
|
||||
L192:
|
||||
.L192:
|
||||
vld1.8 {d16},[r0]!
|
||||
vmov.i8 q10,#8 @ borrow q10
|
||||
vst1.32 {q3},[r2]!
|
||||
vsub.i8 q2,q2,q10 @ adjust the mask
|
||||
|
||||
Loop192:
|
||||
.Loop192:
|
||||
vtbl.8 d20,{q8},d4
|
||||
vtbl.8 d21,{q8},d5
|
||||
vext.8 q9,q0,q3,#12
|
||||
@ -146,20 +136,20 @@ Loop192:
|
||||
veor q3,q3,q10
|
||||
veor q8,q8,q10
|
||||
vst1.32 {q3},[r2]!
|
||||
bne Loop192
|
||||
bne .Loop192
|
||||
|
||||
mov r12,#12
|
||||
add r2,r2,#0x20
|
||||
b Ldone
|
||||
b .Ldone
|
||||
|
||||
.align 4
|
||||
L256:
|
||||
.L256:
|
||||
vld1.8 {q8},[r0]
|
||||
mov r1,#7
|
||||
mov r12,#14
|
||||
vst1.32 {q3},[r2]!
|
||||
|
||||
Loop256:
|
||||
.Loop256:
|
||||
vtbl.8 d20,{q8},d4
|
||||
vtbl.8 d21,{q8},d5
|
||||
vext.8 q9,q0,q3,#12
|
||||
@ -176,7 +166,7 @@ Loop256:
|
||||
vshl.u8 q1,q1,#1
|
||||
veor q3,q3,q10
|
||||
vst1.32 {q3},[r2]!
|
||||
beq Ldone
|
||||
beq .Ldone
|
||||
|
||||
vdup.32 q10,d7[1]
|
||||
vext.8 q9,q0,q8,#12
|
||||
@ -189,30 +179,28 @@ Loop256:
|
||||
veor q8,q8,q9
|
||||
|
||||
veor q8,q8,q10
|
||||
b Loop256
|
||||
b .Loop256
|
||||
|
||||
Ldone:
|
||||
.Ldone:
|
||||
str r12,[r2]
|
||||
mov r3,#0
|
||||
|
||||
Lenc_key_abort:
|
||||
.Lenc_key_abort:
|
||||
mov r0,r3 @ return value
|
||||
|
||||
bx lr
|
||||
.size aes_hw_set_encrypt_key,.-aes_hw_set_encrypt_key
|
||||
|
||||
|
||||
.globl _aes_hw_set_decrypt_key
|
||||
.private_extern _aes_hw_set_decrypt_key
|
||||
#ifdef __thumb2__
|
||||
.thumb_func _aes_hw_set_decrypt_key
|
||||
#endif
|
||||
.globl aes_hw_set_decrypt_key
|
||||
.hidden aes_hw_set_decrypt_key
|
||||
.type aes_hw_set_decrypt_key,%function
|
||||
.align 5
|
||||
_aes_hw_set_decrypt_key:
|
||||
aes_hw_set_decrypt_key:
|
||||
stmdb sp!,{r4,lr}
|
||||
bl Lenc_key
|
||||
bl .Lenc_key
|
||||
|
||||
cmp r0,#0
|
||||
bne Ldec_key_abort
|
||||
bne .Ldec_key_abort
|
||||
|
||||
sub r2,r2,#240 @ restore original r2
|
||||
mov r4,#-16
|
||||
@ -223,7 +211,7 @@ _aes_hw_set_decrypt_key:
|
||||
vst1.32 {q0},[r0],r4
|
||||
vst1.32 {q1},[r2]!
|
||||
|
||||
Loop_imc:
|
||||
.Loop_imc:
|
||||
vld1.32 {q0},[r2]
|
||||
vld1.32 {q1},[r0]
|
||||
.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
|
||||
@ -231,30 +219,29 @@ Loop_imc:
|
||||
vst1.32 {q0},[r0],r4
|
||||
vst1.32 {q1},[r2]!
|
||||
cmp r0,r2
|
||||
bhi Loop_imc
|
||||
bhi .Loop_imc
|
||||
|
||||
vld1.32 {q0},[r2]
|
||||
.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
|
||||
vst1.32 {q0},[r0]
|
||||
|
||||
eor r0,r0,r0 @ return value
|
||||
Ldec_key_abort:
|
||||
.Ldec_key_abort:
|
||||
ldmia sp!,{r4,pc}
|
||||
|
||||
.globl _aes_hw_encrypt
|
||||
.private_extern _aes_hw_encrypt
|
||||
#ifdef __thumb2__
|
||||
.thumb_func _aes_hw_encrypt
|
||||
#endif
|
||||
.size aes_hw_set_decrypt_key,.-aes_hw_set_decrypt_key
|
||||
.globl aes_hw_encrypt
|
||||
.hidden aes_hw_encrypt
|
||||
.type aes_hw_encrypt,%function
|
||||
.align 5
|
||||
_aes_hw_encrypt:
|
||||
aes_hw_encrypt:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
ldr r3,[r2,#240]
|
||||
vld1.32 {q0},[r2]!
|
||||
vld1.8 {q2},[r0]
|
||||
sub r3,r3,#2
|
||||
vld1.32 {q1},[r2]!
|
||||
|
||||
Loop_enc:
|
||||
.Loop_enc:
|
||||
.byte 0x00,0x43,0xb0,0xf3 @ aese q2,q0
|
||||
.byte 0x84,0x43,0xb0,0xf3 @ aesmc q2,q2
|
||||
vld1.32 {q0},[r2]!
|
||||
@ -262,7 +249,7 @@ Loop_enc:
|
||||
.byte 0x02,0x43,0xb0,0xf3 @ aese q2,q1
|
||||
.byte 0x84,0x43,0xb0,0xf3 @ aesmc q2,q2
|
||||
vld1.32 {q1},[r2]!
|
||||
bgt Loop_enc
|
||||
bgt .Loop_enc
|
||||
|
||||
.byte 0x00,0x43,0xb0,0xf3 @ aese q2,q0
|
||||
.byte 0x84,0x43,0xb0,0xf3 @ aesmc q2,q2
|
||||
@ -272,21 +259,20 @@ Loop_enc:
|
||||
|
||||
vst1.8 {q2},[r1]
|
||||
bx lr
|
||||
|
||||
.globl _aes_hw_decrypt
|
||||
.private_extern _aes_hw_decrypt
|
||||
#ifdef __thumb2__
|
||||
.thumb_func _aes_hw_decrypt
|
||||
#endif
|
||||
.size aes_hw_encrypt,.-aes_hw_encrypt
|
||||
.globl aes_hw_decrypt
|
||||
.hidden aes_hw_decrypt
|
||||
.type aes_hw_decrypt,%function
|
||||
.align 5
|
||||
_aes_hw_decrypt:
|
||||
aes_hw_decrypt:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
ldr r3,[r2,#240]
|
||||
vld1.32 {q0},[r2]!
|
||||
vld1.8 {q2},[r0]
|
||||
sub r3,r3,#2
|
||||
vld1.32 {q1},[r2]!
|
||||
|
||||
Loop_dec:
|
||||
.Loop_dec:
|
||||
.byte 0x40,0x43,0xb0,0xf3 @ aesd q2,q0
|
||||
.byte 0xc4,0x43,0xb0,0xf3 @ aesimc q2,q2
|
||||
vld1.32 {q0},[r2]!
|
||||
@ -294,7 +280,7 @@ Loop_dec:
|
||||
.byte 0x42,0x43,0xb0,0xf3 @ aesd q2,q1
|
||||
.byte 0xc4,0x43,0xb0,0xf3 @ aesimc q2,q2
|
||||
vld1.32 {q1},[r2]!
|
||||
bgt Loop_dec
|
||||
bgt .Loop_dec
|
||||
|
||||
.byte 0x40,0x43,0xb0,0xf3 @ aesd q2,q0
|
||||
.byte 0xc4,0x43,0xb0,0xf3 @ aesimc q2,q2
|
||||
@ -304,21 +290,19 @@ Loop_dec:
|
||||
|
||||
vst1.8 {q2},[r1]
|
||||
bx lr
|
||||
|
||||
.globl _aes_hw_cbc_encrypt
|
||||
.private_extern _aes_hw_cbc_encrypt
|
||||
#ifdef __thumb2__
|
||||
.thumb_func _aes_hw_cbc_encrypt
|
||||
#endif
|
||||
.size aes_hw_decrypt,.-aes_hw_decrypt
|
||||
.globl aes_hw_cbc_encrypt
|
||||
.hidden aes_hw_cbc_encrypt
|
||||
.type aes_hw_cbc_encrypt,%function
|
||||
.align 5
|
||||
_aes_hw_cbc_encrypt:
|
||||
aes_hw_cbc_encrypt:
|
||||
mov ip,sp
|
||||
stmdb sp!,{r4,r5,r6,r7,r8,lr}
|
||||
vstmdb sp!,{d8,d9,d10,d11,d12,d13,d14,d15} @ ABI specification says so
|
||||
ldmia ip,{r4,r5} @ load remaining args
|
||||
subs r2,r2,#16
|
||||
mov r8,#16
|
||||
blo Lcbc_abort
|
||||
blo .Lcbc_abort
|
||||
moveq r8,#0
|
||||
|
||||
cmp r5,#0 @ en- or decrypting?
|
||||
@ -338,12 +322,12 @@ _aes_hw_cbc_encrypt:
|
||||
|
||||
add r7,r3,#32
|
||||
mov r6,r5
|
||||
beq Lcbc_dec
|
||||
beq .Lcbc_dec
|
||||
|
||||
cmp r5,#2
|
||||
veor q0,q0,q6
|
||||
veor q5,q8,q7
|
||||
beq Lcbc_enc128
|
||||
beq .Lcbc_enc128
|
||||
|
||||
vld1.32 {q2,q3},[r7]
|
||||
add r7,r3,#16
|
||||
@ -353,14 +337,14 @@ _aes_hw_cbc_encrypt:
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
add r14,r3,#16*6
|
||||
add r3,r3,#16*7
|
||||
b Lenter_cbc_enc
|
||||
b .Lenter_cbc_enc
|
||||
|
||||
.align 4
|
||||
Loop_cbc_enc:
|
||||
.Loop_cbc_enc:
|
||||
.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
vst1.8 {q6},[r1]!
|
||||
Lenter_cbc_enc:
|
||||
.Lenter_cbc_enc:
|
||||
.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
.byte 0x04,0x03,0xb0,0xf3 @ aese q0,q2
|
||||
@ -370,7 +354,7 @@ Lenter_cbc_enc:
|
||||
.byte 0x06,0x03,0xb0,0xf3 @ aese q0,q3
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
vld1.32 {q9},[r12]
|
||||
beq Lcbc_enc192
|
||||
beq .Lcbc_enc192
|
||||
|
||||
.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
@ -380,7 +364,7 @@ Lenter_cbc_enc:
|
||||
vld1.32 {q9},[r3]
|
||||
nop
|
||||
|
||||
Lcbc_enc192:
|
||||
.Lcbc_enc192:
|
||||
.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
subs r2,r2,#16
|
||||
@ -402,22 +386,22 @@ Lcbc_enc192:
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
.byte 0x2e,0x03,0xb0,0xf3 @ aese q0,q15
|
||||
veor q6,q0,q7
|
||||
bhs Loop_cbc_enc
|
||||
bhs .Loop_cbc_enc
|
||||
|
||||
vst1.8 {q6},[r1]!
|
||||
b Lcbc_done
|
||||
b .Lcbc_done
|
||||
|
||||
.align 5
|
||||
Lcbc_enc128:
|
||||
.Lcbc_enc128:
|
||||
vld1.32 {q2,q3},[r7]
|
||||
.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
b Lenter_cbc_enc128
|
||||
Loop_cbc_enc128:
|
||||
b .Lenter_cbc_enc128
|
||||
.Loop_cbc_enc128:
|
||||
.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
vst1.8 {q6},[r1]!
|
||||
Lenter_cbc_enc128:
|
||||
.Lenter_cbc_enc128:
|
||||
.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
subs r2,r2,#16
|
||||
@ -440,19 +424,19 @@ Lenter_cbc_enc128:
|
||||
veor q8,q8,q5
|
||||
.byte 0x2e,0x03,0xb0,0xf3 @ aese q0,q15
|
||||
veor q6,q0,q7
|
||||
bhs Loop_cbc_enc128
|
||||
bhs .Loop_cbc_enc128
|
||||
|
||||
vst1.8 {q6},[r1]!
|
||||
b Lcbc_done
|
||||
b .Lcbc_done
|
||||
.align 5
|
||||
Lcbc_dec:
|
||||
.Lcbc_dec:
|
||||
vld1.8 {q10},[r0]!
|
||||
subs r2,r2,#32 @ bias
|
||||
add r6,r5,#2
|
||||
vorr q3,q0,q0
|
||||
vorr q1,q0,q0
|
||||
vorr q11,q10,q10
|
||||
blo Lcbc_dec_tail
|
||||
blo .Lcbc_dec_tail
|
||||
|
||||
vorr q1,q10,q10
|
||||
vld1.8 {q10},[r0]!
|
||||
@ -460,7 +444,7 @@ Lcbc_dec:
|
||||
vorr q3,q1,q1
|
||||
vorr q11,q10,q10
|
||||
|
||||
Loop3x_cbc_dec:
|
||||
.Loop3x_cbc_dec:
|
||||
.byte 0x60,0x03,0xb0,0xf3 @ aesd q0,q8
|
||||
.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
|
||||
.byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8
|
||||
@ -476,7 +460,7 @@ Loop3x_cbc_dec:
|
||||
.byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9
|
||||
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
|
||||
vld1.32 {q9},[r7]!
|
||||
bgt Loop3x_cbc_dec
|
||||
bgt .Loop3x_cbc_dec
|
||||
|
||||
.byte 0x60,0x03,0xb0,0xf3 @ aesd q0,q8
|
||||
.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
|
||||
@ -536,13 +520,13 @@ Loop3x_cbc_dec:
|
||||
vorr q1,q3,q3
|
||||
vst1.8 {q10},[r1]!
|
||||
vorr q10,q11,q11
|
||||
bhs Loop3x_cbc_dec
|
||||
bhs .Loop3x_cbc_dec
|
||||
|
||||
cmn r2,#0x30
|
||||
beq Lcbc_done
|
||||
beq .Lcbc_done
|
||||
nop
|
||||
|
||||
Lcbc_dec_tail:
|
||||
.Lcbc_dec_tail:
|
||||
.byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8
|
||||
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
|
||||
.byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8
|
||||
@ -554,7 +538,7 @@ Lcbc_dec_tail:
|
||||
.byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9
|
||||
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
|
||||
vld1.32 {q9},[r7]!
|
||||
bgt Lcbc_dec_tail
|
||||
bgt .Lcbc_dec_tail
|
||||
|
||||
.byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8
|
||||
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
|
||||
@ -581,32 +565,30 @@ Lcbc_dec_tail:
|
||||
veor q9,q3,q7
|
||||
.byte 0x6e,0x23,0xb0,0xf3 @ aesd q1,q15
|
||||
.byte 0x6e,0x43,0xf0,0xf3 @ aesd q10,q15
|
||||
beq Lcbc_dec_one
|
||||
beq .Lcbc_dec_one
|
||||
veor q5,q5,q1
|
||||
veor q9,q9,q10
|
||||
vorr q6,q11,q11
|
||||
vst1.8 {q5},[r1]!
|
||||
vst1.8 {q9},[r1]!
|
||||
b Lcbc_done
|
||||
b .Lcbc_done
|
||||
|
||||
Lcbc_dec_one:
|
||||
.Lcbc_dec_one:
|
||||
veor q5,q5,q10
|
||||
vorr q6,q11,q11
|
||||
vst1.8 {q5},[r1]!
|
||||
|
||||
Lcbc_done:
|
||||
.Lcbc_done:
|
||||
vst1.8 {q6},[r4]
|
||||
Lcbc_abort:
|
||||
.Lcbc_abort:
|
||||
vldmia sp!,{d8,d9,d10,d11,d12,d13,d14,d15}
|
||||
ldmia sp!,{r4,r5,r6,r7,r8,pc}
|
||||
|
||||
.globl _aes_hw_ctr32_encrypt_blocks
|
||||
.private_extern _aes_hw_ctr32_encrypt_blocks
|
||||
#ifdef __thumb2__
|
||||
.thumb_func _aes_hw_ctr32_encrypt_blocks
|
||||
#endif
|
||||
.size aes_hw_cbc_encrypt,.-aes_hw_cbc_encrypt
|
||||
.globl aes_hw_ctr32_encrypt_blocks
|
||||
.hidden aes_hw_ctr32_encrypt_blocks
|
||||
.type aes_hw_ctr32_encrypt_blocks,%function
|
||||
.align 5
|
||||
_aes_hw_ctr32_encrypt_blocks:
|
||||
aes_hw_ctr32_encrypt_blocks:
|
||||
mov ip,sp
|
||||
stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,lr}
|
||||
vstmdb sp!,{d8,d9,d10,d11,d12,d13,d14,d15} @ ABI specification says so
|
||||
@ -628,24 +610,38 @@ _aes_hw_ctr32_encrypt_blocks:
|
||||
add r7,r3,#32
|
||||
mov r6,r5
|
||||
movlo r12,#0
|
||||
|
||||
@ ARM Cortex-A57 and Cortex-A72 cores running in 32-bit mode are
|
||||
@ affected by silicon errata #1742098 [0] and #1655431 [1],
|
||||
@ respectively, where the second instruction of an aese/aesmc
|
||||
@ instruction pair may execute twice if an interrupt is taken right
|
||||
@ after the first instruction consumes an input register of which a
|
||||
@ single 32-bit lane has been updated the last time it was modified.
|
||||
@
|
||||
@ This function uses a counter in one 32-bit lane. The
|
||||
@ could write to q1 and q10 directly, but that trips this bugs.
|
||||
@ We write to q6 and copy to the final register as a workaround.
|
||||
@
|
||||
@ [0] ARM-EPM-049219 v23 Cortex-A57 MPCore Software Developers Errata Notice
|
||||
@ [1] ARM-EPM-012079 v11.0 Cortex-A72 MPCore Software Developers Errata Notice
|
||||
#ifndef __ARMEB__
|
||||
rev r8, r8
|
||||
#endif
|
||||
vorr q1,q0,q0
|
||||
add r10, r8, #1
|
||||
vorr q10,q0,q0
|
||||
add r8, r8, #2
|
||||
vorr q6,q0,q0
|
||||
rev r10, r10
|
||||
vmov.32 d3[1],r10
|
||||
bls Lctr32_tail
|
||||
vmov.32 d13[1],r10
|
||||
add r8, r8, #2
|
||||
vorr q1,q6,q6
|
||||
bls .Lctr32_tail
|
||||
rev r12, r8
|
||||
vmov.32 d13[1],r12
|
||||
sub r2,r2,#3 @ bias
|
||||
vmov.32 d21[1],r12
|
||||
b Loop3x_ctr32
|
||||
vorr q10,q6,q6
|
||||
b .Loop3x_ctr32
|
||||
|
||||
.align 4
|
||||
Loop3x_ctr32:
|
||||
.Loop3x_ctr32:
|
||||
.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
.byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8
|
||||
@ -661,18 +657,18 @@ Loop3x_ctr32:
|
||||
.byte 0x22,0x43,0xf0,0xf3 @ aese q10,q9
|
||||
.byte 0xa4,0x43,0xf0,0xf3 @ aesmc q10,q10
|
||||
vld1.32 {q9},[r7]!
|
||||
bgt Loop3x_ctr32
|
||||
bgt .Loop3x_ctr32
|
||||
|
||||
.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
|
||||
.byte 0x80,0x83,0xb0,0xf3 @ aesmc q4,q0
|
||||
.byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8
|
||||
.byte 0x82,0xa3,0xb0,0xf3 @ aesmc q5,q1
|
||||
vld1.8 {q2},[r0]!
|
||||
vorr q0,q6,q6
|
||||
add r9,r8,#1
|
||||
.byte 0x20,0x43,0xf0,0xf3 @ aese q10,q8
|
||||
.byte 0xa4,0x43,0xf0,0xf3 @ aesmc q10,q10
|
||||
vld1.8 {q3},[r0]!
|
||||
vorr q1,q6,q6
|
||||
rev r9,r9
|
||||
.byte 0x22,0x83,0xb0,0xf3 @ aese q4,q9
|
||||
.byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4
|
||||
.byte 0x22,0xa3,0xb0,0xf3 @ aese q5,q9
|
||||
@ -681,8 +677,6 @@ Loop3x_ctr32:
|
||||
mov r7,r3
|
||||
.byte 0x22,0x43,0xf0,0xf3 @ aese q10,q9
|
||||
.byte 0xa4,0x23,0xf0,0xf3 @ aesmc q9,q10
|
||||
vorr q10,q6,q6
|
||||
add r9,r8,#1
|
||||
.byte 0x28,0x83,0xb0,0xf3 @ aese q4,q12
|
||||
.byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4
|
||||
.byte 0x28,0xa3,0xb0,0xf3 @ aese q5,q12
|
||||
@ -697,21 +691,26 @@ Loop3x_ctr32:
|
||||
.byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4
|
||||
.byte 0x2a,0xa3,0xb0,0xf3 @ aese q5,q13
|
||||
.byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5
|
||||
@ Note the logic to update q0, q1, and q1 is written to work
|
||||
@ around a bug in ARM Cortex-A57 and Cortex-A72 cores running in
|
||||
@ 32-bit mode. See the comment above.
|
||||
veor q11,q11,q7
|
||||
rev r9,r9
|
||||
vmov.32 d13[1], r9
|
||||
.byte 0x2a,0x23,0xf0,0xf3 @ aese q9,q13
|
||||
.byte 0xa2,0x23,0xf0,0xf3 @ aesmc q9,q9
|
||||
vmov.32 d1[1], r9
|
||||
vorr q0,q6,q6
|
||||
rev r10,r10
|
||||
.byte 0x2c,0x83,0xb0,0xf3 @ aese q4,q14
|
||||
.byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4
|
||||
vmov.32 d13[1], r10
|
||||
rev r12,r8
|
||||
.byte 0x2c,0xa3,0xb0,0xf3 @ aese q5,q14
|
||||
.byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5
|
||||
vmov.32 d3[1], r10
|
||||
rev r12,r8
|
||||
vorr q1,q6,q6
|
||||
vmov.32 d13[1], r12
|
||||
.byte 0x2c,0x23,0xf0,0xf3 @ aese q9,q14
|
||||
.byte 0xa2,0x23,0xf0,0xf3 @ aesmc q9,q9
|
||||
vmov.32 d21[1], r12
|
||||
vorr q10,q6,q6
|
||||
subs r2,r2,#3
|
||||
.byte 0x2e,0x83,0xb0,0xf3 @ aese q4,q15
|
||||
.byte 0x2e,0xa3,0xb0,0xf3 @ aese q5,q15
|
||||
@ -726,15 +725,15 @@ Loop3x_ctr32:
|
||||
veor q11,q11,q9
|
||||
vld1.32 {q9},[r7]! @ re-pre-load rndkey[1]
|
||||
vst1.8 {q11},[r1]!
|
||||
bhs Loop3x_ctr32
|
||||
bhs .Loop3x_ctr32
|
||||
|
||||
adds r2,r2,#3
|
||||
beq Lctr32_done
|
||||
beq .Lctr32_done
|
||||
cmp r2,#1
|
||||
mov r12,#16
|
||||
moveq r12,#0
|
||||
|
||||
Lctr32_tail:
|
||||
.Lctr32_tail:
|
||||
.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
.byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8
|
||||
@ -746,7 +745,7 @@ Lctr32_tail:
|
||||
.byte 0x22,0x23,0xb0,0xf3 @ aese q1,q9
|
||||
.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1
|
||||
vld1.32 {q9},[r7]!
|
||||
bgt Lctr32_tail
|
||||
bgt .Lctr32_tail
|
||||
|
||||
.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
@ -779,12 +778,12 @@ Lctr32_tail:
|
||||
veor q2,q2,q0
|
||||
veor q3,q3,q1
|
||||
vst1.8 {q2},[r1]!
|
||||
beq Lctr32_done
|
||||
beq .Lctr32_done
|
||||
vst1.8 {q3},[r1]
|
||||
|
||||
Lctr32_done:
|
||||
.Lctr32_done:
|
||||
vldmia sp!,{d8,d9,d10,d11,d12,d13,d14,d15}
|
||||
ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,pc}
|
||||
|
||||
.size aes_hw_ctr32_encrypt_blocks,.-aes_hw_ctr32_encrypt_blocks
|
||||
#endif
|
||||
#endif // !OPENSSL_NO_ASM
|
||||
#endif // !OPENSSL_NO_ASM && defined(OPENSSL_ARM) && defined(__ELF__)
|
||||
@ -1,22 +1,14 @@
|
||||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#if !defined(__has_feature)
|
||||
#define __has_feature(x) 0
|
||||
#endif
|
||||
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
|
||||
#define OPENSSL_NO_ASM
|
||||
#endif
|
||||
#include <openssl/asm_base.h>
|
||||
|
||||
#if !defined(OPENSSL_NO_ASM)
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_ARM) && defined(__ELF__)
|
||||
#include <openssl/arm_arch.h>
|
||||
|
||||
@ Silence ARMv8 deprecated IT instruction warnings. This file is used by both
|
||||
@ ARMv7 and ARMv8 processors and does not use ARMv8 instructions.
|
||||
|
||||
.arch armv7-a
|
||||
|
||||
.text
|
||||
#if defined(__thumb2__)
|
||||
@ -26,40 +18,14 @@
|
||||
.code 32
|
||||
#endif
|
||||
|
||||
#if __ARM_MAX_ARCH__>=7
|
||||
.align 5
|
||||
LOPENSSL_armcap:
|
||||
.word OPENSSL_armcap_P-Lbn_mul_mont
|
||||
#endif
|
||||
|
||||
.globl _bn_mul_mont
|
||||
.private_extern _bn_mul_mont
|
||||
#ifdef __thumb2__
|
||||
.thumb_func _bn_mul_mont
|
||||
#endif
|
||||
.globl bn_mul_mont_nohw
|
||||
.hidden bn_mul_mont_nohw
|
||||
.type bn_mul_mont_nohw,%function
|
||||
|
||||
.align 5
|
||||
_bn_mul_mont:
|
||||
Lbn_mul_mont:
|
||||
bn_mul_mont_nohw:
|
||||
ldr ip,[sp,#4] @ load num
|
||||
stmdb sp!,{r0,r2} @ sp points at argument block
|
||||
#if __ARM_MAX_ARCH__>=7
|
||||
tst ip,#7
|
||||
bne Lialu
|
||||
adr r0,Lbn_mul_mont
|
||||
ldr r2,LOPENSSL_armcap
|
||||
ldr r0,[r0,r2]
|
||||
#ifdef __APPLE__
|
||||
ldr r0,[r0]
|
||||
#endif
|
||||
tst r0,#ARMV7_NEON @ NEON available?
|
||||
ldmia sp, {r0,r2}
|
||||
beq Lialu
|
||||
add sp,sp,#8
|
||||
b bn_mul8x_mont_neon
|
||||
.align 4
|
||||
Lialu:
|
||||
#endif
|
||||
cmp ip,#2
|
||||
mov r0,ip @ load num
|
||||
#ifdef __thumb2__
|
||||
@ -67,7 +33,7 @@ Lialu:
|
||||
#endif
|
||||
movlt r0,#0
|
||||
addlt sp,sp,#2*4
|
||||
blt Labrt
|
||||
blt .Labrt
|
||||
|
||||
stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr} @ save 10 registers
|
||||
|
||||
@ -92,7 +58,7 @@ Lialu:
|
||||
umlal r10,r12,r6,r8 @ np[0]*n0+"t[0]"
|
||||
mov r4,sp
|
||||
|
||||
L1st:
|
||||
.L1st:
|
||||
ldr r5,[r1],#4 @ ap[j],ap++
|
||||
mov r10,r11
|
||||
ldr r6,[r3],#4 @ np[j],np++
|
||||
@ -104,7 +70,7 @@ L1st:
|
||||
str r12,[r4],#4 @ tp[j-1]=,tp++
|
||||
adc r12,r14,#0
|
||||
cmp r4,r0
|
||||
bne L1st
|
||||
bne .L1st
|
||||
|
||||
adds r12,r12,r11
|
||||
ldr r4,[r0,#13*4] @ restore bp
|
||||
@ -115,7 +81,7 @@ L1st:
|
||||
mov r7,sp
|
||||
str r14,[r0,#4] @ tp[num]=
|
||||
|
||||
Louter:
|
||||
.Louter:
|
||||
sub r7,r0,r7 @ "original" r0-1 value
|
||||
sub r1,r1,r7 @ "rewind" ap to &ap[1]
|
||||
ldr r2,[r4,#4]! @ *(++bp)
|
||||
@ -133,7 +99,7 @@ Louter:
|
||||
umlal r10,r12,r6,r8 @ np[0]*n0+"tp[0]"
|
||||
mov r4,sp
|
||||
|
||||
Linner:
|
||||
.Linner:
|
||||
ldr r5,[r1],#4 @ ap[j],ap++
|
||||
adds r10,r11,r7 @ +=tp[j]
|
||||
ldr r6,[r3],#4 @ np[j],np++
|
||||
@ -147,7 +113,7 @@ Linner:
|
||||
str r12,[r4],#4 @ tp[j-1]=,tp++
|
||||
adc r12,r14,#0
|
||||
cmp r4,r0
|
||||
bne Linner
|
||||
bne .Linner
|
||||
|
||||
adds r12,r12,r11
|
||||
mov r14,#0
|
||||
@ -165,7 +131,7 @@ Linner:
|
||||
itt ne
|
||||
#endif
|
||||
movne r7,sp
|
||||
bne Louter
|
||||
bne .Louter
|
||||
|
||||
ldr r2,[r0,#12*4] @ pull rp
|
||||
mov r5,sp
|
||||
@ -176,17 +142,17 @@ Linner:
|
||||
sub r3,r3,r5 @ "rewind" r3 to &np[0]
|
||||
|
||||
subs r7,r7,r7 @ "clear" carry flag
|
||||
Lsub: ldr r7,[r4],#4
|
||||
.Lsub: ldr r7,[r4],#4
|
||||
ldr r6,[r3],#4
|
||||
sbcs r7,r7,r6 @ tp[j]-np[j]
|
||||
str r7,[r2],#4 @ rp[j]=
|
||||
teq r4,r0 @ preserve carry
|
||||
bne Lsub
|
||||
bne .Lsub
|
||||
sbcs r14,r14,#0 @ upmost carry
|
||||
mov r4,sp @ "rewind" r4
|
||||
sub r2,r2,r5 @ "rewind" r2
|
||||
|
||||
Lcopy: ldr r7,[r4] @ conditional copy
|
||||
.Lcopy: ldr r7,[r4] @ conditional copy
|
||||
ldr r5,[r2]
|
||||
str sp,[r4],#4 @ zap tp
|
||||
#ifdef __thumb2__
|
||||
@ -195,29 +161,29 @@ Lcopy: ldr r7,[r4] @ conditional copy
|
||||
movcc r5,r7
|
||||
str r5,[r2],#4
|
||||
teq r4,r0 @ preserve carry
|
||||
bne Lcopy
|
||||
bne .Lcopy
|
||||
|
||||
mov sp,r0
|
||||
add sp,sp,#4 @ skip over tp[num+1]
|
||||
ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr} @ restore registers
|
||||
add sp,sp,#2*4 @ skip over {r0,r2}
|
||||
mov r0,#1
|
||||
Labrt:
|
||||
#if __ARM_ARCH__>=5
|
||||
.Labrt:
|
||||
#if __ARM_ARCH>=5
|
||||
bx lr @ bx lr
|
||||
#else
|
||||
tst lr,#1
|
||||
moveq pc,lr @ be binary compatible with V4, yet
|
||||
.word 0xe12fff1e @ interoperable with Thumb ISA:-)
|
||||
#endif
|
||||
|
||||
.size bn_mul_mont_nohw,.-bn_mul_mont_nohw
|
||||
#if __ARM_MAX_ARCH__>=7
|
||||
.arch armv7-a
|
||||
.fpu neon
|
||||
|
||||
|
||||
|
||||
#ifdef __thumb2__
|
||||
.thumb_func bn_mul8x_mont_neon
|
||||
#endif
|
||||
.globl bn_mul8x_mont_neon
|
||||
.hidden bn_mul8x_mont_neon
|
||||
.type bn_mul8x_mont_neon,%function
|
||||
.align 5
|
||||
bn_mul8x_mont_neon:
|
||||
mov ip,sp
|
||||
@ -227,7 +193,7 @@ bn_mul8x_mont_neon:
|
||||
mov ip,sp
|
||||
|
||||
cmp r5,#8
|
||||
bhi LNEON_8n
|
||||
bhi .LNEON_8n
|
||||
|
||||
@ special case for r5==8, everything is in register bank...
|
||||
|
||||
@ -280,10 +246,10 @@ bn_mul8x_mont_neon:
|
||||
veor q13,q13
|
||||
vshr.u64 d10,d10,#16
|
||||
|
||||
b LNEON_outer8
|
||||
b .LNEON_outer8
|
||||
|
||||
.align 4
|
||||
LNEON_outer8:
|
||||
.LNEON_outer8:
|
||||
vld1.32 {d28[0]}, [r2,:32]!
|
||||
veor d8,d8,d8
|
||||
vzip.16 d28,d8
|
||||
@ -328,7 +294,7 @@ LNEON_outer8:
|
||||
veor q13,q13
|
||||
vshr.u64 d10,d10,#16
|
||||
|
||||
bne LNEON_outer8
|
||||
bne .LNEON_outer8
|
||||
|
||||
vadd.u64 d12,d12,d10
|
||||
mov r7,sp
|
||||
@ -339,10 +305,10 @@ LNEON_outer8:
|
||||
vshr.u64 d10,d13,#16
|
||||
vzip.16 d12,d13
|
||||
|
||||
b LNEON_tail_entry
|
||||
b .LNEON_tail_entry
|
||||
|
||||
.align 4
|
||||
LNEON_8n:
|
||||
.LNEON_8n:
|
||||
veor q6,q6,q6
|
||||
sub r7,sp,#128
|
||||
veor q7,q7,q7
|
||||
@ -358,23 +324,23 @@ LNEON_8n:
|
||||
veor q12,q12,q12
|
||||
veor q13,q13,q13
|
||||
|
||||
LNEON_8n_init:
|
||||
.LNEON_8n_init:
|
||||
vst1.64 {q6,q7},[r7,:256]!
|
||||
subs r8,r8,#8
|
||||
vst1.64 {q8,q9},[r7,:256]!
|
||||
vst1.64 {q10,q11},[r7,:256]!
|
||||
vst1.64 {q12,q13},[r7,:256]!
|
||||
bne LNEON_8n_init
|
||||
bne .LNEON_8n_init
|
||||
|
||||
add r6,sp,#256
|
||||
vld1.32 {d0,d1,d2,d3},[r1]!
|
||||
add r10,sp,#8
|
||||
vld1.32 {d30[0]},[r4,:32]
|
||||
mov r9,r5
|
||||
b LNEON_8n_outer
|
||||
b .LNEON_8n_outer
|
||||
|
||||
.align 4
|
||||
LNEON_8n_outer:
|
||||
.LNEON_8n_outer:
|
||||
vld1.32 {d28[0]},[r2,:32]! @ *b++
|
||||
veor d8,d8,d8
|
||||
vzip.16 d28,d8
|
||||
@ -629,10 +595,10 @@ LNEON_8n_outer:
|
||||
vst1.32 {d29},[r10,:64] @ put aside smashed m[8*i+7]
|
||||
add r10,sp,#8 @ rewind
|
||||
sub r8,r5,#8
|
||||
b LNEON_8n_inner
|
||||
b .LNEON_8n_inner
|
||||
|
||||
.align 4
|
||||
LNEON_8n_inner:
|
||||
.LNEON_8n_inner:
|
||||
subs r8,r8,#8
|
||||
vmlal.u32 q6,d28,d0[0]
|
||||
vld1.64 {q13},[r6,:128]
|
||||
@ -816,7 +782,7 @@ LNEON_8n_inner:
|
||||
vst1.64 {q13},[r7,:128]!
|
||||
vmlal.u32 q12,d29,d7[1]
|
||||
|
||||
bne LNEON_8n_inner
|
||||
bne .LNEON_8n_inner
|
||||
add r6,sp,#128
|
||||
vst1.64 {q6,q7},[r7,:256]!
|
||||
veor q2,q2,q2 @ d4-d5
|
||||
@ -833,7 +799,7 @@ LNEON_8n_inner:
|
||||
|
||||
itt ne
|
||||
subne r3,r3,r5,lsl#2 @ rewind
|
||||
bne LNEON_8n_outer
|
||||
bne .LNEON_8n_outer
|
||||
|
||||
add r7,sp,#128
|
||||
vst1.64 {q2,q3}, [sp,:256]! @ start wiping stack frame
|
||||
@ -846,10 +812,10 @@ LNEON_8n_inner:
|
||||
vzip.16 d12,d13
|
||||
|
||||
mov r8,r5
|
||||
b LNEON_tail_entry
|
||||
b .LNEON_tail_entry
|
||||
|
||||
.align 4
|
||||
LNEON_tail:
|
||||
.LNEON_tail:
|
||||
vadd.u64 d12,d12,d10
|
||||
vshr.u64 d10,d12,#16
|
||||
vld1.64 {q8,q9}, [r6, :256]!
|
||||
@ -859,7 +825,7 @@ LNEON_tail:
|
||||
vld1.64 {q12,q13}, [r6, :256]!
|
||||
vzip.16 d12,d13
|
||||
|
||||
LNEON_tail_entry:
|
||||
.LNEON_tail_entry:
|
||||
vadd.u64 d14,d14,d10
|
||||
vst1.32 {d12[0]}, [r7, :32]!
|
||||
vshr.u64 d10,d14,#16
|
||||
@ -905,14 +871,14 @@ LNEON_tail_entry:
|
||||
vld1.64 {q6,q7}, [r6, :256]!
|
||||
subs r8,r8,#8
|
||||
vst1.32 {d26[0]}, [r7, :32]!
|
||||
bne LNEON_tail
|
||||
bne .LNEON_tail
|
||||
|
||||
vst1.32 {d10[0]}, [r7, :32] @ top-most bit
|
||||
sub r3,r3,r5,lsl#2 @ rewind r3
|
||||
subs r1,sp,#0 @ clear carry flag
|
||||
add r2,sp,r5,lsl#2
|
||||
|
||||
LNEON_sub:
|
||||
.LNEON_sub:
|
||||
ldmia r1!, {r4,r5,r6,r7}
|
||||
ldmia r3!, {r8,r9,r10,r11}
|
||||
sbcs r8, r4,r8
|
||||
@ -921,7 +887,7 @@ LNEON_sub:
|
||||
sbcs r11,r7,r11
|
||||
teq r1,r2 @ preserves carry
|
||||
stmia r0!, {r8,r9,r10,r11}
|
||||
bne LNEON_sub
|
||||
bne .LNEON_sub
|
||||
|
||||
ldr r10, [r1] @ load top-most bit
|
||||
mov r11,sp
|
||||
@ -933,7 +899,7 @@ LNEON_sub:
|
||||
mov r3,r2 @ second 3/4th of frame
|
||||
sbcs r10,r10,#0 @ result is carry flag
|
||||
|
||||
LNEON_copy_n_zap:
|
||||
.LNEON_copy_n_zap:
|
||||
ldmia r1!, {r4,r5,r6,r7}
|
||||
ldmia r0, {r8,r9,r10,r11}
|
||||
it cc
|
||||
@ -960,23 +926,14 @@ LNEON_copy_n_zap:
|
||||
movcc r11,r7
|
||||
teq r1,r2 @ preserves carry
|
||||
stmia r0!, {r8,r9,r10,r11}
|
||||
bne LNEON_copy_n_zap
|
||||
bne .LNEON_copy_n_zap
|
||||
|
||||
mov sp,ip
|
||||
vldmia sp!,{d8,d9,d10,d11,d12,d13,d14,d15}
|
||||
ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11}
|
||||
bx lr @ bx lr
|
||||
|
||||
.size bn_mul8x_mont_neon,.-bn_mul8x_mont_neon
|
||||
#endif
|
||||
.byte 77,111,110,116,103,111,109,101,114,121,32,109,117,108,116,105,112,108,105,99,97,116,105,111,110,32,102,111,114,32,65,82,77,118,52,47,78,69,79,78,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
|
||||
.align 2
|
||||
.align 2
|
||||
#if __ARM_MAX_ARCH__>=7
|
||||
.comm _OPENSSL_armcap_P,4
|
||||
.non_lazy_symbol_pointer
|
||||
OPENSSL_armcap_P:
|
||||
.indirect_symbol _OPENSSL_armcap_P
|
||||
.long 0
|
||||
.private_extern _OPENSSL_armcap_P
|
||||
#endif
|
||||
#endif // !OPENSSL_NO_ASM
|
||||
#endif // !OPENSSL_NO_ASM && defined(OPENSSL_ARM) && defined(__ELF__)
|
||||
@ -1,17 +1,9 @@
|
||||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#if !defined(__has_feature)
|
||||
#define __has_feature(x) 0
|
||||
#endif
|
||||
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
|
||||
#define OPENSSL_NO_ASM
|
||||
#endif
|
||||
#include <openssl/asm_base.h>
|
||||
|
||||
#if !defined(OPENSSL_NO_ASM)
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_ARM) && defined(__ELF__)
|
||||
@ Copyright 2012-2016 The OpenSSL Project Authors. All Rights Reserved.
|
||||
@
|
||||
@ Licensed under the OpenSSL license (the "License"). You may not use
|
||||
@ -75,7 +67,6 @@
|
||||
# define VFP_ABI_FRAME 0
|
||||
# define BSAES_ASM_EXTENDED_KEY
|
||||
# define XTS_CHAIN_TWEAK
|
||||
# define __ARM_ARCH__ __LINUX_ARM_ARCH__
|
||||
# define __ARM_MAX_ARCH__ 7
|
||||
#endif
|
||||
|
||||
@ -84,8 +75,8 @@
|
||||
#endif
|
||||
|
||||
#if __ARM_MAX_ARCH__>=7
|
||||
|
||||
|
||||
.arch armv7-a
|
||||
.fpu neon
|
||||
|
||||
.text
|
||||
.syntax unified @ ARMv7-capable assembler is expected to handle this
|
||||
@ -96,20 +87,18 @@
|
||||
# undef __thumb2__
|
||||
#endif
|
||||
|
||||
#ifdef __thumb2__
|
||||
.thumb_func _bsaes_decrypt8
|
||||
#endif
|
||||
.type _bsaes_decrypt8,%function
|
||||
.align 4
|
||||
_bsaes_decrypt8:
|
||||
adr r6,.
|
||||
vldmia r4!, {q9} @ round 0 key
|
||||
#if defined(__thumb2__) || defined(__APPLE__)
|
||||
adr r6,LM0ISR
|
||||
adr r6,.LM0ISR
|
||||
#else
|
||||
add r6,r6,#LM0ISR-_bsaes_decrypt8
|
||||
add r6,r6,#.LM0ISR-_bsaes_decrypt8
|
||||
#endif
|
||||
|
||||
vldmia r6!, {q8} @ LM0ISR
|
||||
vldmia r6!, {q8} @ .LM0ISR
|
||||
veor q10, q0, q9 @ xor with round0 key
|
||||
veor q11, q1, q9
|
||||
vtbl.8 d0, {q10}, d16
|
||||
@ -134,8 +123,8 @@ _bsaes_decrypt8:
|
||||
vtbl.8 d13, {q10}, d17
|
||||
vtbl.8 d14, {q11}, d16
|
||||
vtbl.8 d15, {q11}, d17
|
||||
vmov.i8 q8,#0x55 @ compose LBS0
|
||||
vmov.i8 q9,#0x33 @ compose LBS1
|
||||
vmov.i8 q8,#0x55 @ compose .LBS0
|
||||
vmov.i8 q9,#0x33 @ compose .LBS1
|
||||
vshr.u64 q10, q6, #1
|
||||
vshr.u64 q11, q4, #1
|
||||
veor q10, q10, q7
|
||||
@ -160,7 +149,7 @@ _bsaes_decrypt8:
|
||||
vshl.u64 q11, q11, #1
|
||||
veor q2, q2, q10
|
||||
veor q0, q0, q11
|
||||
vmov.i8 q8,#0x0f @ compose LBS2
|
||||
vmov.i8 q8,#0x0f @ compose .LBS2
|
||||
vshr.u64 q10, q5, #2
|
||||
vshr.u64 q11, q4, #2
|
||||
veor q10, q10, q7
|
||||
@ -210,9 +199,9 @@ _bsaes_decrypt8:
|
||||
veor q1, q1, q10
|
||||
veor q0, q0, q11
|
||||
sub r5,r5,#1
|
||||
b Ldec_sbox
|
||||
b .Ldec_sbox
|
||||
.align 4
|
||||
Ldec_loop:
|
||||
.Ldec_loop:
|
||||
vldmia r4!, {q8,q9,q10,q11}
|
||||
veor q8, q8, q0
|
||||
veor q9, q9, q1
|
||||
@ -242,7 +231,7 @@ Ldec_loop:
|
||||
vtbl.8 d13, {q10}, d25
|
||||
vtbl.8 d14, {q11}, d24
|
||||
vtbl.8 d15, {q11}, d25
|
||||
Ldec_sbox:
|
||||
.Ldec_sbox:
|
||||
veor q1, q1, q4
|
||||
veor q3, q3, q4
|
||||
|
||||
@ -391,7 +380,7 @@ Ldec_sbox:
|
||||
veor q4, q4, q0
|
||||
veor q7, q7, q3
|
||||
subs r5,r5,#1
|
||||
bcc Ldec_done
|
||||
bcc .Ldec_done
|
||||
@ multiplication by 0x05-0x00-0x04-0x00
|
||||
vext.8 q8, q0, q0, #8
|
||||
vext.8 q14, q3, q3, #8
|
||||
@ -470,16 +459,16 @@ Ldec_sbox:
|
||||
veor q2, q3, q10
|
||||
vmov q3, q11
|
||||
@ vmov q5, q9
|
||||
vldmia r6, {q12} @ LISR
|
||||
vldmia r6, {q12} @ .LISR
|
||||
ite eq @ Thumb2 thing, sanity check in ARM
|
||||
addeq r6,r6,#0x10
|
||||
bne Ldec_loop
|
||||
vldmia r6, {q12} @ LISRM0
|
||||
b Ldec_loop
|
||||
bne .Ldec_loop
|
||||
vldmia r6, {q12} @ .LISRM0
|
||||
b .Ldec_loop
|
||||
.align 4
|
||||
Ldec_done:
|
||||
vmov.i8 q8,#0x55 @ compose LBS0
|
||||
vmov.i8 q9,#0x33 @ compose LBS1
|
||||
.Ldec_done:
|
||||
vmov.i8 q8,#0x55 @ compose .LBS0
|
||||
vmov.i8 q9,#0x33 @ compose .LBS1
|
||||
vshr.u64 q10, q3, #1
|
||||
vshr.u64 q11, q2, #1
|
||||
veor q10, q10, q5
|
||||
@ -504,7 +493,7 @@ Ldec_done:
|
||||
vshl.u64 q11, q11, #1
|
||||
veor q6, q6, q10
|
||||
veor q0, q0, q11
|
||||
vmov.i8 q8,#0x0f @ compose LBS2
|
||||
vmov.i8 q8,#0x0f @ compose .LBS2
|
||||
vshr.u64 q10, q7, #2
|
||||
vshr.u64 q11, q2, #2
|
||||
veor q10, q10, q5
|
||||
@ -563,46 +552,44 @@ Ldec_done:
|
||||
veor q0, q0, q8
|
||||
veor q1, q1, q8
|
||||
bx lr
|
||||
.size _bsaes_decrypt8,.-_bsaes_decrypt8
|
||||
|
||||
|
||||
|
||||
.type _bsaes_const,%object
|
||||
.align 6
|
||||
_bsaes_const:
|
||||
LM0ISR:@ InvShiftRows constants
|
||||
.LM0ISR:@ InvShiftRows constants
|
||||
.quad 0x0a0e0206070b0f03, 0x0004080c0d010509
|
||||
LISR:
|
||||
.LISR:
|
||||
.quad 0x0504070602010003, 0x0f0e0d0c080b0a09
|
||||
LISRM0:
|
||||
.LISRM0:
|
||||
.quad 0x01040b0e0205080f, 0x0306090c00070a0d
|
||||
LM0SR:@ ShiftRows constants
|
||||
.LM0SR:@ ShiftRows constants
|
||||
.quad 0x0a0e02060f03070b, 0x0004080c05090d01
|
||||
LSR:
|
||||
.LSR:
|
||||
.quad 0x0504070600030201, 0x0f0e0d0c0a09080b
|
||||
LSRM0:
|
||||
.LSRM0:
|
||||
.quad 0x0304090e00050a0f, 0x01060b0c0207080d
|
||||
LM0:
|
||||
.LM0:
|
||||
.quad 0x02060a0e03070b0f, 0x0004080c0105090d
|
||||
LREVM0SR:
|
||||
.LREVM0SR:
|
||||
.quad 0x090d01050c000408, 0x03070b0f060a0e02
|
||||
.byte 66,105,116,45,115,108,105,99,101,100,32,65,69,83,32,102,111,114,32,78,69,79,78,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
|
||||
.align 2
|
||||
.align 6
|
||||
.size _bsaes_const,.-_bsaes_const
|
||||
|
||||
|
||||
#ifdef __thumb2__
|
||||
.thumb_func _bsaes_encrypt8
|
||||
#endif
|
||||
.type _bsaes_encrypt8,%function
|
||||
.align 4
|
||||
_bsaes_encrypt8:
|
||||
adr r6,.
|
||||
vldmia r4!, {q9} @ round 0 key
|
||||
#if defined(__thumb2__) || defined(__APPLE__)
|
||||
adr r6,LM0SR
|
||||
adr r6,.LM0SR
|
||||
#else
|
||||
sub r6,r6,#_bsaes_encrypt8-LM0SR
|
||||
sub r6,r6,#_bsaes_encrypt8-.LM0SR
|
||||
#endif
|
||||
|
||||
vldmia r6!, {q8} @ LM0SR
|
||||
vldmia r6!, {q8} @ .LM0SR
|
||||
_bsaes_encrypt8_alt:
|
||||
veor q10, q0, q9 @ xor with round0 key
|
||||
veor q11, q1, q9
|
||||
@ -629,8 +616,8 @@ _bsaes_encrypt8_alt:
|
||||
vtbl.8 d14, {q11}, d16
|
||||
vtbl.8 d15, {q11}, d17
|
||||
_bsaes_encrypt8_bitslice:
|
||||
vmov.i8 q8,#0x55 @ compose LBS0
|
||||
vmov.i8 q9,#0x33 @ compose LBS1
|
||||
vmov.i8 q8,#0x55 @ compose .LBS0
|
||||
vmov.i8 q9,#0x33 @ compose .LBS1
|
||||
vshr.u64 q10, q6, #1
|
||||
vshr.u64 q11, q4, #1
|
||||
veor q10, q10, q7
|
||||
@ -655,7 +642,7 @@ _bsaes_encrypt8_bitslice:
|
||||
vshl.u64 q11, q11, #1
|
||||
veor q2, q2, q10
|
||||
veor q0, q0, q11
|
||||
vmov.i8 q8,#0x0f @ compose LBS2
|
||||
vmov.i8 q8,#0x0f @ compose .LBS2
|
||||
vshr.u64 q10, q5, #2
|
||||
vshr.u64 q11, q4, #2
|
||||
veor q10, q10, q7
|
||||
@ -705,9 +692,9 @@ _bsaes_encrypt8_bitslice:
|
||||
veor q1, q1, q10
|
||||
veor q0, q0, q11
|
||||
sub r5,r5,#1
|
||||
b Lenc_sbox
|
||||
b .Lenc_sbox
|
||||
.align 4
|
||||
Lenc_loop:
|
||||
.Lenc_loop:
|
||||
vldmia r4!, {q8,q9,q10,q11}
|
||||
veor q8, q8, q0
|
||||
veor q9, q9, q1
|
||||
@ -737,7 +724,7 @@ Lenc_loop:
|
||||
vtbl.8 d13, {q10}, d25
|
||||
vtbl.8 d14, {q11}, d24
|
||||
vtbl.8 d15, {q11}, d25
|
||||
Lenc_sbox:
|
||||
.Lenc_sbox:
|
||||
veor q2, q2, q1
|
||||
veor q5, q5, q6
|
||||
veor q3, q3, q0
|
||||
@ -885,7 +872,7 @@ Lenc_sbox:
|
||||
|
||||
veor q6, q6, q3
|
||||
subs r5,r5,#1
|
||||
bcc Lenc_done
|
||||
bcc .Lenc_done
|
||||
vext.8 q8, q0, q0, #12 @ x0 <<< 32
|
||||
vext.8 q9, q1, q1, #12
|
||||
veor q0, q0, q8 @ x0 ^ (x0 <<< 32)
|
||||
@ -932,16 +919,16 @@ Lenc_sbox:
|
||||
@ vmov q4, q8
|
||||
veor q2, q2, q10
|
||||
@ vmov q5, q9
|
||||
vldmia r6, {q12} @ LSR
|
||||
vldmia r6, {q12} @ .LSR
|
||||
ite eq @ Thumb2 thing, samity check in ARM
|
||||
addeq r6,r6,#0x10
|
||||
bne Lenc_loop
|
||||
vldmia r6, {q12} @ LSRM0
|
||||
b Lenc_loop
|
||||
bne .Lenc_loop
|
||||
vldmia r6, {q12} @ .LSRM0
|
||||
b .Lenc_loop
|
||||
.align 4
|
||||
Lenc_done:
|
||||
vmov.i8 q8,#0x55 @ compose LBS0
|
||||
vmov.i8 q9,#0x33 @ compose LBS1
|
||||
.Lenc_done:
|
||||
vmov.i8 q8,#0x55 @ compose .LBS0
|
||||
vmov.i8 q9,#0x33 @ compose .LBS1
|
||||
vshr.u64 q10, q2, #1
|
||||
vshr.u64 q11, q3, #1
|
||||
veor q10, q10, q5
|
||||
@ -966,7 +953,7 @@ Lenc_done:
|
||||
vshl.u64 q11, q11, #1
|
||||
veor q4, q4, q10
|
||||
veor q0, q0, q11
|
||||
vmov.i8 q8,#0x0f @ compose LBS2
|
||||
vmov.i8 q8,#0x0f @ compose .LBS2
|
||||
vshr.u64 q10, q7, #2
|
||||
vshr.u64 q11, q3, #2
|
||||
veor q10, q10, q5
|
||||
@ -1025,18 +1012,16 @@ Lenc_done:
|
||||
veor q0, q0, q8
|
||||
veor q1, q1, q8
|
||||
bx lr
|
||||
|
||||
#ifdef __thumb2__
|
||||
.thumb_func _bsaes_key_convert
|
||||
#endif
|
||||
.size _bsaes_encrypt8,.-_bsaes_encrypt8
|
||||
.type _bsaes_key_convert,%function
|
||||
.align 4
|
||||
_bsaes_key_convert:
|
||||
adr r6,.
|
||||
vld1.8 {q7}, [r4]! @ load round 0 key
|
||||
#if defined(__thumb2__) || defined(__APPLE__)
|
||||
adr r6,LM0
|
||||
adr r6,.LM0
|
||||
#else
|
||||
sub r6,r6,#_bsaes_key_convert-LM0
|
||||
sub r6,r6,#_bsaes_key_convert-.LM0
|
||||
#endif
|
||||
vld1.8 {q15}, [r4]! @ load round 1 key
|
||||
|
||||
@ -1046,7 +1031,7 @@ _bsaes_key_convert:
|
||||
vmov.i8 q11, #0x08
|
||||
vmov.i8 q12, #0x10
|
||||
vmov.i8 q13, #0x20
|
||||
vldmia r6, {q14} @ LM0
|
||||
vldmia r6, {q14} @ .LM0
|
||||
|
||||
#ifdef __ARMEL__
|
||||
vrev32.8 q7, q7
|
||||
@ -1054,10 +1039,10 @@ _bsaes_key_convert:
|
||||
#endif
|
||||
sub r5,r5,#1
|
||||
vstmia r12!, {q7} @ save round 0 key
|
||||
b Lkey_loop
|
||||
b .Lkey_loop
|
||||
|
||||
.align 4
|
||||
Lkey_loop:
|
||||
.Lkey_loop:
|
||||
vtbl.8 d14,{q15},d28
|
||||
vtbl.8 d15,{q15},d29
|
||||
vmov.i8 q6, #0x40
|
||||
@ -1081,19 +1066,17 @@ Lkey_loop:
|
||||
#endif
|
||||
subs r5,r5,#1
|
||||
vstmia r12!,{q0,q1,q2,q3,q4,q5,q6,q7} @ write bit-sliced round key
|
||||
bne Lkey_loop
|
||||
bne .Lkey_loop
|
||||
|
||||
vmov.i8 q7,#0x63 @ compose L63
|
||||
vmov.i8 q7,#0x63 @ compose .L63
|
||||
@ don't save last round key
|
||||
bx lr
|
||||
|
||||
.globl _bsaes_cbc_encrypt
|
||||
.private_extern _bsaes_cbc_encrypt
|
||||
#ifdef __thumb2__
|
||||
.thumb_func _bsaes_cbc_encrypt
|
||||
#endif
|
||||
.size _bsaes_key_convert,.-_bsaes_key_convert
|
||||
.globl bsaes_cbc_encrypt
|
||||
.hidden bsaes_cbc_encrypt
|
||||
.type bsaes_cbc_encrypt,%function
|
||||
.align 5
|
||||
_bsaes_cbc_encrypt:
|
||||
bsaes_cbc_encrypt:
|
||||
@ In OpenSSL, this function had a fallback to aes_nohw_cbc_encrypt for
|
||||
@ short inputs. We patch this out, using bsaes for all input sizes.
|
||||
|
||||
@ -1144,12 +1127,12 @@ _bsaes_cbc_encrypt:
|
||||
#endif
|
||||
|
||||
vld1.8 {q15}, [r8] @ load IV
|
||||
b Lcbc_dec_loop
|
||||
b .Lcbc_dec_loop
|
||||
|
||||
.align 4
|
||||
Lcbc_dec_loop:
|
||||
.Lcbc_dec_loop:
|
||||
subs r2, r2, #0x8
|
||||
bmi Lcbc_dec_loop_finish
|
||||
bmi .Lcbc_dec_loop_finish
|
||||
|
||||
vld1.8 {q0,q1}, [r0]! @ load input
|
||||
vld1.8 {q2,q3}, [r0]!
|
||||
@ -1187,11 +1170,11 @@ Lcbc_dec_loop:
|
||||
vst1.8 {q3}, [r1]!
|
||||
vst1.8 {q5}, [r1]!
|
||||
|
||||
b Lcbc_dec_loop
|
||||
b .Lcbc_dec_loop
|
||||
|
||||
Lcbc_dec_loop_finish:
|
||||
.Lcbc_dec_loop_finish:
|
||||
adds r2, r2, #8
|
||||
beq Lcbc_dec_done
|
||||
beq .Lcbc_dec_done
|
||||
|
||||
@ Set up most parameters for the _bsaes_decrypt8 call.
|
||||
#ifndef BSAES_ASM_EXTENDED_KEY
|
||||
@ -1204,19 +1187,19 @@ Lcbc_dec_loop_finish:
|
||||
|
||||
vld1.8 {q0}, [r0]! @ load input
|
||||
cmp r2, #2
|
||||
blo Lcbc_dec_one
|
||||
blo .Lcbc_dec_one
|
||||
vld1.8 {q1}, [r0]!
|
||||
beq Lcbc_dec_two
|
||||
beq .Lcbc_dec_two
|
||||
vld1.8 {q2}, [r0]!
|
||||
cmp r2, #4
|
||||
blo Lcbc_dec_three
|
||||
blo .Lcbc_dec_three
|
||||
vld1.8 {q3}, [r0]!
|
||||
beq Lcbc_dec_four
|
||||
beq .Lcbc_dec_four
|
||||
vld1.8 {q4}, [r0]!
|
||||
cmp r2, #6
|
||||
blo Lcbc_dec_five
|
||||
blo .Lcbc_dec_five
|
||||
vld1.8 {q5}, [r0]!
|
||||
beq Lcbc_dec_six
|
||||
beq .Lcbc_dec_six
|
||||
vld1.8 {q6}, [r0]!
|
||||
sub r0, r0, #0x70
|
||||
|
||||
@ -1240,9 +1223,9 @@ Lcbc_dec_loop_finish:
|
||||
vst1.8 {q2}, [r1]!
|
||||
vst1.8 {q7}, [r1]!
|
||||
vst1.8 {q3}, [r1]!
|
||||
b Lcbc_dec_done
|
||||
b .Lcbc_dec_done
|
||||
.align 4
|
||||
Lcbc_dec_six:
|
||||
.Lcbc_dec_six:
|
||||
sub r0, r0, #0x60
|
||||
bl _bsaes_decrypt8
|
||||
vldmia r9,{q14} @ reload IV
|
||||
@ -1261,9 +1244,9 @@ Lcbc_dec_six:
|
||||
vst1.8 {q4}, [r1]!
|
||||
vst1.8 {q2}, [r1]!
|
||||
vst1.8 {q7}, [r1]!
|
||||
b Lcbc_dec_done
|
||||
b .Lcbc_dec_done
|
||||
.align 4
|
||||
Lcbc_dec_five:
|
||||
.Lcbc_dec_five:
|
||||
sub r0, r0, #0x50
|
||||
bl _bsaes_decrypt8
|
||||
vldmia r9, {q14} @ reload IV
|
||||
@ -1279,9 +1262,9 @@ Lcbc_dec_five:
|
||||
vst1.8 {q6}, [r1]!
|
||||
vst1.8 {q4}, [r1]!
|
||||
vst1.8 {q2}, [r1]!
|
||||
b Lcbc_dec_done
|
||||
b .Lcbc_dec_done
|
||||
.align 4
|
||||
Lcbc_dec_four:
|
||||
.Lcbc_dec_four:
|
||||
sub r0, r0, #0x40
|
||||
bl _bsaes_decrypt8
|
||||
vldmia r9, {q14} @ reload IV
|
||||
@ -1295,9 +1278,9 @@ Lcbc_dec_four:
|
||||
vst1.8 {q0,q1}, [r1]! @ write output
|
||||
vst1.8 {q6}, [r1]!
|
||||
vst1.8 {q4}, [r1]!
|
||||
b Lcbc_dec_done
|
||||
b .Lcbc_dec_done
|
||||
.align 4
|
||||
Lcbc_dec_three:
|
||||
.Lcbc_dec_three:
|
||||
sub r0, r0, #0x30
|
||||
bl _bsaes_decrypt8
|
||||
vldmia r9, {q14} @ reload IV
|
||||
@ -1308,9 +1291,9 @@ Lcbc_dec_three:
|
||||
veor q6, q6, q9
|
||||
vst1.8 {q0,q1}, [r1]! @ write output
|
||||
vst1.8 {q6}, [r1]!
|
||||
b Lcbc_dec_done
|
||||
b .Lcbc_dec_done
|
||||
.align 4
|
||||
Lcbc_dec_two:
|
||||
.Lcbc_dec_two:
|
||||
sub r0, r0, #0x20
|
||||
bl _bsaes_decrypt8
|
||||
vldmia r9, {q14} @ reload IV
|
||||
@ -1319,9 +1302,9 @@ Lcbc_dec_two:
|
||||
vld1.8 {q15}, [r0]! @ reload input
|
||||
veor q1, q1, q8
|
||||
vst1.8 {q0,q1}, [r1]! @ write output
|
||||
b Lcbc_dec_done
|
||||
b .Lcbc_dec_done
|
||||
.align 4
|
||||
Lcbc_dec_one:
|
||||
.Lcbc_dec_one:
|
||||
sub r0, r0, #0x10
|
||||
bl _bsaes_decrypt8
|
||||
vldmia r9, {q14} @ reload IV
|
||||
@ -1329,14 +1312,14 @@ Lcbc_dec_one:
|
||||
veor q0, q0, q14 @ ^= IV
|
||||
vst1.8 {q0}, [r1]! @ write output
|
||||
|
||||
Lcbc_dec_done:
|
||||
.Lcbc_dec_done:
|
||||
#ifndef BSAES_ASM_EXTENDED_KEY
|
||||
vmov.i32 q0, #0
|
||||
vmov.i32 q1, #0
|
||||
Lcbc_dec_bzero:@ wipe key schedule [if any]
|
||||
.Lcbc_dec_bzero:@ wipe key schedule [if any]
|
||||
vstmia sp!, {q0,q1}
|
||||
cmp sp, r9
|
||||
bne Lcbc_dec_bzero
|
||||
bne .Lcbc_dec_bzero
|
||||
#endif
|
||||
|
||||
mov sp, r9
|
||||
@ -1344,14 +1327,12 @@ Lcbc_dec_bzero:@ wipe key schedule [if any]
|
||||
vst1.8 {q15}, [r8] @ return IV
|
||||
VFP_ABI_POP
|
||||
ldmia sp!, {r4,r5,r6,r7,r8,r9,r10, pc}
|
||||
|
||||
.globl _bsaes_ctr32_encrypt_blocks
|
||||
.private_extern _bsaes_ctr32_encrypt_blocks
|
||||
#ifdef __thumb2__
|
||||
.thumb_func _bsaes_ctr32_encrypt_blocks
|
||||
#endif
|
||||
.size bsaes_cbc_encrypt,.-bsaes_cbc_encrypt
|
||||
.globl bsaes_ctr32_encrypt_blocks
|
||||
.hidden bsaes_ctr32_encrypt_blocks
|
||||
.type bsaes_ctr32_encrypt_blocks,%function
|
||||
.align 5
|
||||
_bsaes_ctr32_encrypt_blocks:
|
||||
bsaes_ctr32_encrypt_blocks:
|
||||
@ In OpenSSL, short inputs fall back to aes_nohw_* here. We patch this
|
||||
@ out to retain a constant-time implementation.
|
||||
mov ip, sp
|
||||
@ -1377,10 +1358,10 @@ _bsaes_ctr32_encrypt_blocks:
|
||||
|
||||
vld1.8 {q0}, [r8] @ load counter
|
||||
#ifdef __APPLE__
|
||||
mov r8, #:lower16:(LREVM0SR-LM0)
|
||||
mov r8, #:lower16:(.LREVM0SR-.LM0)
|
||||
add r8, r6, r8
|
||||
#else
|
||||
add r8, r6, #LREVM0SR-LM0 @ borrow r8
|
||||
add r8, r6, #.LREVM0SR-.LM0 @ borrow r8
|
||||
#endif
|
||||
vldmia sp, {q4} @ load round0 key
|
||||
#else
|
||||
@ -1400,7 +1381,7 @@ _bsaes_ctr32_encrypt_blocks:
|
||||
.align 2
|
||||
add r12, r3, #248
|
||||
vld1.8 {q0}, [r8] @ load counter
|
||||
adrl r8, LREVM0SR @ borrow r8
|
||||
adrl r8, .LREVM0SR @ borrow r8
|
||||
vldmia r12, {q4} @ load round0 key
|
||||
sub sp, #0x10 @ place for adjusted round0 key
|
||||
#endif
|
||||
@ -1412,10 +1393,10 @@ _bsaes_ctr32_encrypt_blocks:
|
||||
vrev32.8 q4,q4
|
||||
vadd.u32 q9,q8,q8 @ compose 2<<96
|
||||
vstmia sp, {q4} @ save adjusted round0 key
|
||||
b Lctr_enc_loop
|
||||
b .Lctr_enc_loop
|
||||
|
||||
.align 4
|
||||
Lctr_enc_loop:
|
||||
.Lctr_enc_loop:
|
||||
vadd.u32 q10, q8, q9 @ compose 3<<96
|
||||
vadd.u32 q1, q0, q8 @ +1
|
||||
vadd.u32 q2, q0, q9 @ +2
|
||||
@ -1435,20 +1416,20 @@ Lctr_enc_loop:
|
||||
#else
|
||||
add r4, r3, #264
|
||||
#endif
|
||||
vldmia r8, {q8} @ LREVM0SR
|
||||
vldmia r8, {q8} @ .LREVM0SR
|
||||
mov r5, r10 @ pass rounds
|
||||
vstmia r9, {q10} @ save next counter
|
||||
#ifdef __APPLE__
|
||||
mov r6, #:lower16:(LREVM0SR-LSR)
|
||||
mov r6, #:lower16:(.LREVM0SR-.LSR)
|
||||
sub r6, r8, r6
|
||||
#else
|
||||
sub r6, r8, #LREVM0SR-LSR @ pass constants
|
||||
sub r6, r8, #.LREVM0SR-.LSR @ pass constants
|
||||
#endif
|
||||
|
||||
bl _bsaes_encrypt8_alt
|
||||
|
||||
subs r2, r2, #8
|
||||
blo Lctr_enc_loop_done
|
||||
blo .Lctr_enc_loop_done
|
||||
|
||||
vld1.8 {q8,q9}, [r0]! @ load input
|
||||
vld1.8 {q10,q11}, [r0]!
|
||||
@ -1475,51 +1456,51 @@ Lctr_enc_loop:
|
||||
vst1.8 {q5}, [r1]!
|
||||
vldmia r9, {q0} @ load counter
|
||||
|
||||
bne Lctr_enc_loop
|
||||
b Lctr_enc_done
|
||||
bne .Lctr_enc_loop
|
||||
b .Lctr_enc_done
|
||||
|
||||
.align 4
|
||||
Lctr_enc_loop_done:
|
||||
.Lctr_enc_loop_done:
|
||||
add r2, r2, #8
|
||||
vld1.8 {q8}, [r0]! @ load input
|
||||
veor q0, q8
|
||||
vst1.8 {q0}, [r1]! @ write output
|
||||
cmp r2, #2
|
||||
blo Lctr_enc_done
|
||||
blo .Lctr_enc_done
|
||||
vld1.8 {q9}, [r0]!
|
||||
veor q1, q9
|
||||
vst1.8 {q1}, [r1]!
|
||||
beq Lctr_enc_done
|
||||
beq .Lctr_enc_done
|
||||
vld1.8 {q10}, [r0]!
|
||||
veor q4, q10
|
||||
vst1.8 {q4}, [r1]!
|
||||
cmp r2, #4
|
||||
blo Lctr_enc_done
|
||||
blo .Lctr_enc_done
|
||||
vld1.8 {q11}, [r0]!
|
||||
veor q6, q11
|
||||
vst1.8 {q6}, [r1]!
|
||||
beq Lctr_enc_done
|
||||
beq .Lctr_enc_done
|
||||
vld1.8 {q12}, [r0]!
|
||||
veor q3, q12
|
||||
vst1.8 {q3}, [r1]!
|
||||
cmp r2, #6
|
||||
blo Lctr_enc_done
|
||||
blo .Lctr_enc_done
|
||||
vld1.8 {q13}, [r0]!
|
||||
veor q7, q13
|
||||
vst1.8 {q7}, [r1]!
|
||||
beq Lctr_enc_done
|
||||
beq .Lctr_enc_done
|
||||
vld1.8 {q14}, [r0]
|
||||
veor q2, q14
|
||||
vst1.8 {q2}, [r1]!
|
||||
|
||||
Lctr_enc_done:
|
||||
.Lctr_enc_done:
|
||||
vmov.i32 q0, #0
|
||||
vmov.i32 q1, #0
|
||||
#ifndef BSAES_ASM_EXTENDED_KEY
|
||||
Lctr_enc_bzero:@ wipe key schedule [if any]
|
||||
.Lctr_enc_bzero:@ wipe key schedule [if any]
|
||||
vstmia sp!, {q0,q1}
|
||||
cmp sp, r9
|
||||
bne Lctr_enc_bzero
|
||||
bne .Lctr_enc_bzero
|
||||
#else
|
||||
vstmia sp, {q0,q1}
|
||||
#endif
|
||||
@ -1531,6 +1512,6 @@ Lctr_enc_bzero:@ wipe key schedule [if any]
|
||||
|
||||
@ OpenSSL contains aes_nohw_* fallback code here. We patch this
|
||||
@ out to retain a constant-time implementation.
|
||||
|
||||
.size bsaes_ctr32_encrypt_blocks,.-bsaes_ctr32_encrypt_blocks
|
||||
#endif
|
||||
#endif // !OPENSSL_NO_ASM
|
||||
#endif // !OPENSSL_NO_ASM && defined(OPENSSL_ARM) && defined(__ELF__)
|
||||
@ -1,23 +1,15 @@
|
||||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#if !defined(__has_feature)
|
||||
#define __has_feature(x) 0
|
||||
#endif
|
||||
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
|
||||
#define OPENSSL_NO_ASM
|
||||
#endif
|
||||
#include <openssl/asm_base.h>
|
||||
|
||||
#if !defined(OPENSSL_NO_ASM)
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_ARM) && defined(__ELF__)
|
||||
#include <openssl/arm_arch.h>
|
||||
|
||||
@ Silence ARMv8 deprecated IT instruction warnings. This file is used by both
|
||||
@ ARMv7 and ARMv8 processors and does not use ARMv8 instructions. (ARMv8 PMULL
|
||||
@ instructions are in aesv8-armx.pl.)
|
||||
|
||||
.arch armv7-a
|
||||
|
||||
.text
|
||||
#if defined(__thumb2__) || defined(__clang__)
|
||||
@ -31,16 +23,14 @@
|
||||
.code 32
|
||||
#endif
|
||||
#if __ARM_MAX_ARCH__>=7
|
||||
.arch armv7-a
|
||||
.fpu neon
|
||||
|
||||
|
||||
|
||||
.globl _gcm_init_neon
|
||||
.private_extern _gcm_init_neon
|
||||
#ifdef __thumb2__
|
||||
.thumb_func _gcm_init_neon
|
||||
#endif
|
||||
.globl gcm_init_neon
|
||||
.hidden gcm_init_neon
|
||||
.type gcm_init_neon,%function
|
||||
.align 4
|
||||
_gcm_init_neon:
|
||||
gcm_init_neon:
|
||||
vld1.64 d7,[r1]! @ load H
|
||||
vmov.i8 q8,#0xe1
|
||||
vld1.64 d6,[r1]
|
||||
@ -56,15 +46,13 @@ _gcm_init_neon:
|
||||
vstmia r0,{q3}
|
||||
|
||||
bx lr @ bx lr
|
||||
.size gcm_init_neon,.-gcm_init_neon
|
||||
|
||||
|
||||
.globl _gcm_gmult_neon
|
||||
.private_extern _gcm_gmult_neon
|
||||
#ifdef __thumb2__
|
||||
.thumb_func _gcm_gmult_neon
|
||||
#endif
|
||||
.globl gcm_gmult_neon
|
||||
.hidden gcm_gmult_neon
|
||||
.type gcm_gmult_neon,%function
|
||||
.align 4
|
||||
_gcm_gmult_neon:
|
||||
gcm_gmult_neon:
|
||||
vld1.64 d7,[r0]! @ load Xi
|
||||
vld1.64 d6,[r0]!
|
||||
vmov.i64 d29,#0x0000ffffffffffff
|
||||
@ -76,16 +64,14 @@ _gcm_gmult_neon:
|
||||
vmov.i64 d31,#0x000000000000ffff
|
||||
veor d28,d26,d27 @ Karatsuba pre-processing
|
||||
mov r3,#16
|
||||
b Lgmult_neon
|
||||
b .Lgmult_neon
|
||||
.size gcm_gmult_neon,.-gcm_gmult_neon
|
||||
|
||||
|
||||
.globl _gcm_ghash_neon
|
||||
.private_extern _gcm_ghash_neon
|
||||
#ifdef __thumb2__
|
||||
.thumb_func _gcm_ghash_neon
|
||||
#endif
|
||||
.globl gcm_ghash_neon
|
||||
.hidden gcm_ghash_neon
|
||||
.type gcm_ghash_neon,%function
|
||||
.align 4
|
||||
_gcm_ghash_neon:
|
||||
gcm_ghash_neon:
|
||||
vld1.64 d1,[r0]! @ load Xi
|
||||
vld1.64 d0,[r0]!
|
||||
vmov.i64 d29,#0x0000ffffffffffff
|
||||
@ -97,14 +83,14 @@ _gcm_ghash_neon:
|
||||
vmov.i64 d31,#0x000000000000ffff
|
||||
veor d28,d26,d27 @ Karatsuba pre-processing
|
||||
|
||||
Loop_neon:
|
||||
.Loop_neon:
|
||||
vld1.64 d7,[r2]! @ load inp
|
||||
vld1.64 d6,[r2]!
|
||||
#ifdef __ARMEL__
|
||||
vrev64.8 q3,q3
|
||||
#endif
|
||||
veor q3,q0 @ inp^=Xi
|
||||
Lgmult_neon:
|
||||
.Lgmult_neon:
|
||||
vext.8 d16, d26, d26, #1 @ A1
|
||||
vmull.p8 q8, d16, d6 @ F = A1*B
|
||||
vext.8 d0, d6, d6, #1 @ B1
|
||||
@ -240,7 +226,7 @@ Lgmult_neon:
|
||||
veor q0,q0,q10 @
|
||||
|
||||
subs r3,#16
|
||||
bne Loop_neon
|
||||
bne .Loop_neon
|
||||
|
||||
#ifdef __ARMEL__
|
||||
vrev64.8 q0,q0
|
||||
@ -250,9 +236,9 @@ Lgmult_neon:
|
||||
vst1.64 d0,[r0]
|
||||
|
||||
bx lr @ bx lr
|
||||
|
||||
.size gcm_ghash_neon,.-gcm_ghash_neon
|
||||
#endif
|
||||
.byte 71,72,65,83,72,32,102,111,114,32,65,82,77,118,52,47,78,69,79,78,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
|
||||
.align 2
|
||||
.align 2
|
||||
#endif // !OPENSSL_NO_ASM
|
||||
#endif // !OPENSSL_NO_ASM && defined(OPENSSL_ARM) && defined(__ELF__)
|
||||
@ -1,30 +1,22 @@
|
||||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#if !defined(__has_feature)
|
||||
#define __has_feature(x) 0
|
||||
#endif
|
||||
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
|
||||
#define OPENSSL_NO_ASM
|
||||
#endif
|
||||
#include <openssl/asm_base.h>
|
||||
|
||||
#if !defined(OPENSSL_NO_ASM)
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_ARM) && defined(__ELF__)
|
||||
#include <openssl/arm_arch.h>
|
||||
|
||||
#if __ARM_MAX_ARCH__>=7
|
||||
.text
|
||||
|
||||
.fpu neon
|
||||
.code 32
|
||||
#undef __thumb2__
|
||||
.globl _gcm_init_v8
|
||||
.private_extern _gcm_init_v8
|
||||
#ifdef __thumb2__
|
||||
.thumb_func _gcm_init_v8
|
||||
#endif
|
||||
.globl gcm_init_v8
|
||||
.hidden gcm_init_v8
|
||||
.type gcm_init_v8,%function
|
||||
.align 4
|
||||
_gcm_init_v8:
|
||||
gcm_init_v8:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
vld1.64 {q9},[r1] @ load input H
|
||||
vmov.i8 q11,#0xe1
|
||||
vshl.i64 q11,q11,#57 @ 0xc2.0
|
||||
@ -67,17 +59,15 @@ _gcm_init_v8:
|
||||
vext.8 q9,q14,q14,#8 @ Karatsuba pre-processing
|
||||
veor q9,q9,q14
|
||||
vext.8 q13,q8,q9,#8 @ pack Karatsuba pre-processed
|
||||
vst1.64 {q13,q14},[r0] @ store Htable[1..2]
|
||||
|
||||
vst1.64 {q13,q14},[r0]! @ store Htable[1..2]
|
||||
bx lr
|
||||
|
||||
.globl _gcm_gmult_v8
|
||||
.private_extern _gcm_gmult_v8
|
||||
#ifdef __thumb2__
|
||||
.thumb_func _gcm_gmult_v8
|
||||
#endif
|
||||
.size gcm_init_v8,.-gcm_init_v8
|
||||
.globl gcm_gmult_v8
|
||||
.hidden gcm_gmult_v8
|
||||
.type gcm_gmult_v8,%function
|
||||
.align 4
|
||||
_gcm_gmult_v8:
|
||||
gcm_gmult_v8:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
vld1.64 {q9},[r0] @ load Xi
|
||||
vmov.i8 q11,#0xe1
|
||||
vld1.64 {q12,q13},[r1] @ load twisted H, ...
|
||||
@ -114,14 +104,13 @@ _gcm_gmult_v8:
|
||||
vst1.64 {q0},[r0] @ write out Xi
|
||||
|
||||
bx lr
|
||||
|
||||
.globl _gcm_ghash_v8
|
||||
.private_extern _gcm_ghash_v8
|
||||
#ifdef __thumb2__
|
||||
.thumb_func _gcm_ghash_v8
|
||||
#endif
|
||||
.size gcm_gmult_v8,.-gcm_gmult_v8
|
||||
.globl gcm_ghash_v8
|
||||
.hidden gcm_ghash_v8
|
||||
.type gcm_ghash_v8,%function
|
||||
.align 4
|
||||
_gcm_ghash_v8:
|
||||
gcm_ghash_v8:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
vstmdb sp!,{d8,d9,d10,d11,d12,d13,d14,d15} @ 32-bit ABI says so
|
||||
vld1.64 {q0},[r0] @ load [rotated] Xi
|
||||
@ "[rotated]" means that
|
||||
@ -151,7 +140,7 @@ _gcm_ghash_v8:
|
||||
vrev64.8 q0,q0
|
||||
#endif
|
||||
vext.8 q3,q8,q8,#8 @ rotate I[0]
|
||||
blo Lodd_tail_v8 @ r3 was less than 32
|
||||
blo .Lodd_tail_v8 @ r3 was less than 32
|
||||
vld1.64 {q9},[r2],r12 @ load [rotated] I[1]
|
||||
#ifndef __ARMEB__
|
||||
vrev64.8 q9,q9
|
||||
@ -161,10 +150,10 @@ _gcm_ghash_v8:
|
||||
.byte 0x8e,0x8e,0xa8,0xf2 @ pmull q4,q12,q7 @ H·Ii+1
|
||||
veor q9,q9,q7 @ Karatsuba pre-processing
|
||||
.byte 0x8f,0xce,0xa9,0xf2 @ pmull2 q6,q12,q7
|
||||
b Loop_mod2x_v8
|
||||
b .Loop_mod2x_v8
|
||||
|
||||
.align 4
|
||||
Loop_mod2x_v8:
|
||||
.Loop_mod2x_v8:
|
||||
vext.8 q10,q3,q3,#8
|
||||
subs r3,r3,#32 @ is there more data?
|
||||
.byte 0x86,0x0e,0xac,0xf2 @ pmull q0,q14,q3 @ H^2.lo·Xi.lo
|
||||
@ -208,14 +197,14 @@ Loop_mod2x_v8:
|
||||
veor q9,q9,q7 @ Karatsuba pre-processing
|
||||
veor q3,q3,q0
|
||||
.byte 0x8f,0xce,0xa9,0xf2 @ pmull2 q6,q12,q7
|
||||
bhs Loop_mod2x_v8 @ there was at least 32 more bytes
|
||||
bhs .Loop_mod2x_v8 @ there was at least 32 more bytes
|
||||
|
||||
veor q2,q2,q10
|
||||
vext.8 q3,q8,q8,#8 @ re-construct q3
|
||||
adds r3,r3,#32 @ re-construct r3
|
||||
veor q0,q0,q2 @ re-construct q0
|
||||
beq Ldone_v8 @ is r3 zero?
|
||||
Lodd_tail_v8:
|
||||
beq .Ldone_v8 @ is r3 zero?
|
||||
.Lodd_tail_v8:
|
||||
vext.8 q10,q0,q0,#8
|
||||
veor q3,q3,q0 @ inp^=Xi
|
||||
veor q9,q8,q10 @ q9 is rotated inp^Xi
|
||||
@ -240,7 +229,7 @@ Lodd_tail_v8:
|
||||
veor q10,q10,q2
|
||||
veor q0,q0,q10
|
||||
|
||||
Ldone_v8:
|
||||
.Ldone_v8:
|
||||
#ifndef __ARMEB__
|
||||
vrev64.8 q0,q0
|
||||
#endif
|
||||
@ -249,8 +238,9 @@ Ldone_v8:
|
||||
|
||||
vldmia sp!,{d8,d9,d10,d11,d12,d13,d14,d15} @ 32-bit ABI says so
|
||||
bx lr
|
||||
|
||||
.size gcm_ghash_v8,.-gcm_ghash_v8
|
||||
.byte 71,72,65,83,72,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
|
||||
.align 2
|
||||
.align 2
|
||||
#endif // !OPENSSL_NO_ASM
|
||||
#endif
|
||||
#endif // !OPENSSL_NO_ASM && defined(OPENSSL_ARM) && defined(__ELF__)
|
||||
@ -1,17 +1,9 @@
|
||||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#if !defined(__has_feature)
|
||||
#define __has_feature(x) 0
|
||||
#endif
|
||||
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
|
||||
#define OPENSSL_NO_ASM
|
||||
#endif
|
||||
#include <openssl/asm_base.h>
|
||||
|
||||
#if !defined(OPENSSL_NO_ASM)
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_ARM) && defined(__ELF__)
|
||||
#include <openssl/arm_arch.h>
|
||||
|
||||
.text
|
||||
@ -22,39 +14,24 @@
|
||||
.code 32
|
||||
#endif
|
||||
|
||||
.globl _sha1_block_data_order
|
||||
.private_extern _sha1_block_data_order
|
||||
#ifdef __thumb2__
|
||||
.thumb_func _sha1_block_data_order
|
||||
#endif
|
||||
.globl sha1_block_data_order_nohw
|
||||
.hidden sha1_block_data_order_nohw
|
||||
.type sha1_block_data_order_nohw,%function
|
||||
|
||||
.align 5
|
||||
_sha1_block_data_order:
|
||||
#if __ARM_MAX_ARCH__>=7
|
||||
Lsha1_block:
|
||||
adr r3,Lsha1_block
|
||||
ldr r12,LOPENSSL_armcap
|
||||
ldr r12,[r3,r12] @ OPENSSL_armcap_P
|
||||
#ifdef __APPLE__
|
||||
ldr r12,[r12]
|
||||
#endif
|
||||
tst r12,#ARMV8_SHA1
|
||||
bne LARMv8
|
||||
tst r12,#ARMV7_NEON
|
||||
bne LNEON
|
||||
#endif
|
||||
sha1_block_data_order_nohw:
|
||||
stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr}
|
||||
add r2,r1,r2,lsl#6 @ r2 to point at the end of r1
|
||||
ldmia r0,{r3,r4,r5,r6,r7}
|
||||
Lloop:
|
||||
ldr r8,LK_00_19
|
||||
.Lloop:
|
||||
ldr r8,.LK_00_19
|
||||
mov r14,sp
|
||||
sub sp,sp,#15*4
|
||||
mov r5,r5,ror#30
|
||||
mov r6,r6,ror#30
|
||||
mov r7,r7,ror#30 @ [6]
|
||||
L_00_15:
|
||||
#if __ARM_ARCH__<7
|
||||
.L_00_15:
|
||||
#if __ARM_ARCH<7
|
||||
ldrb r10,[r1,#2]
|
||||
ldrb r9,[r1,#3]
|
||||
ldrb r11,[r1,#1]
|
||||
@ -79,7 +56,7 @@ L_00_15:
|
||||
eor r10,r10,r6,ror#2 @ F_00_19(B,C,D)
|
||||
str r9,[r14,#-4]!
|
||||
add r7,r7,r10 @ E+=F_00_19(B,C,D)
|
||||
#if __ARM_ARCH__<7
|
||||
#if __ARM_ARCH<7
|
||||
ldrb r10,[r1,#2]
|
||||
ldrb r9,[r1,#3]
|
||||
ldrb r11,[r1,#1]
|
||||
@ -104,7 +81,7 @@ L_00_15:
|
||||
eor r10,r10,r5,ror#2 @ F_00_19(B,C,D)
|
||||
str r9,[r14,#-4]!
|
||||
add r6,r6,r10 @ E+=F_00_19(B,C,D)
|
||||
#if __ARM_ARCH__<7
|
||||
#if __ARM_ARCH<7
|
||||
ldrb r10,[r1,#2]
|
||||
ldrb r9,[r1,#3]
|
||||
ldrb r11,[r1,#1]
|
||||
@ -129,7 +106,7 @@ L_00_15:
|
||||
eor r10,r10,r4,ror#2 @ F_00_19(B,C,D)
|
||||
str r9,[r14,#-4]!
|
||||
add r5,r5,r10 @ E+=F_00_19(B,C,D)
|
||||
#if __ARM_ARCH__<7
|
||||
#if __ARM_ARCH<7
|
||||
ldrb r10,[r1,#2]
|
||||
ldrb r9,[r1,#3]
|
||||
ldrb r11,[r1,#1]
|
||||
@ -154,7 +131,7 @@ L_00_15:
|
||||
eor r10,r10,r3,ror#2 @ F_00_19(B,C,D)
|
||||
str r9,[r14,#-4]!
|
||||
add r4,r4,r10 @ E+=F_00_19(B,C,D)
|
||||
#if __ARM_ARCH__<7
|
||||
#if __ARM_ARCH<7
|
||||
ldrb r10,[r1,#2]
|
||||
ldrb r9,[r1,#3]
|
||||
ldrb r11,[r1,#1]
|
||||
@ -185,9 +162,9 @@ L_00_15:
|
||||
#else
|
||||
teq r14,sp
|
||||
#endif
|
||||
bne L_00_15 @ [((11+4)*5+2)*3]
|
||||
bne .L_00_15 @ [((11+4)*5+2)*3]
|
||||
sub sp,sp,#25*4
|
||||
#if __ARM_ARCH__<7
|
||||
#if __ARM_ARCH<7
|
||||
ldrb r10,[r1,#2]
|
||||
ldrb r9,[r1,#3]
|
||||
ldrb r11,[r1,#1]
|
||||
@ -281,9 +258,9 @@ L_00_15:
|
||||
eor r10,r10,r7,ror#2 @ F_00_19(B,C,D)
|
||||
add r3,r3,r10 @ E+=F_00_19(B,C,D)
|
||||
|
||||
ldr r8,LK_20_39 @ [+15+16*4]
|
||||
ldr r8,.LK_20_39 @ [+15+16*4]
|
||||
cmn sp,#0 @ [+3], clear carry to denote 20_39
|
||||
L_20_39_or_60_79:
|
||||
.L_20_39_or_60_79:
|
||||
ldr r9,[r14,#15*4]
|
||||
ldr r10,[r14,#13*4]
|
||||
ldr r11,[r14,#7*4]
|
||||
@ -370,12 +347,12 @@ L_20_39_or_60_79:
|
||||
#else
|
||||
teq r14,sp @ preserve carry
|
||||
#endif
|
||||
bne L_20_39_or_60_79 @ [+((12+3)*5+2)*4]
|
||||
bcs L_done @ [+((12+3)*5+2)*4], spare 300 bytes
|
||||
bne .L_20_39_or_60_79 @ [+((12+3)*5+2)*4]
|
||||
bcs .L_done @ [+((12+3)*5+2)*4], spare 300 bytes
|
||||
|
||||
ldr r8,LK_40_59
|
||||
ldr r8,.LK_40_59
|
||||
sub sp,sp,#20*4 @ [+2]
|
||||
L_40_59:
|
||||
.L_40_59:
|
||||
ldr r9,[r14,#15*4]
|
||||
ldr r10,[r14,#13*4]
|
||||
ldr r11,[r14,#7*4]
|
||||
@ -467,13 +444,13 @@ L_40_59:
|
||||
#else
|
||||
teq r14,sp
|
||||
#endif
|
||||
bne L_40_59 @ [+((12+5)*5+2)*4]
|
||||
bne .L_40_59 @ [+((12+5)*5+2)*4]
|
||||
|
||||
ldr r8,LK_60_79
|
||||
ldr r8,.LK_60_79
|
||||
sub sp,sp,#20*4
|
||||
cmp sp,#0 @ set carry to denote 60_79
|
||||
b L_20_39_or_60_79 @ [+4], spare 300 bytes
|
||||
L_done:
|
||||
b .L_20_39_or_60_79 @ [+4], spare 300 bytes
|
||||
.L_done:
|
||||
add sp,sp,#80*4 @ "deallocate" stack frame
|
||||
ldmia r0,{r8,r9,r10,r11,r12}
|
||||
add r3,r8,r3
|
||||
@ -483,9 +460,9 @@ L_done:
|
||||
add r7,r12,r7,ror#2
|
||||
stmia r0,{r3,r4,r5,r6,r7}
|
||||
teq r1,r2
|
||||
bne Lloop @ [+18], total 1307
|
||||
bne .Lloop @ [+18], total 1307
|
||||
|
||||
#if __ARM_ARCH__>=5
|
||||
#if __ARM_ARCH>=5
|
||||
ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,pc}
|
||||
#else
|
||||
ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr}
|
||||
@ -493,37 +470,32 @@ L_done:
|
||||
moveq pc,lr @ be binary compatible with V4, yet
|
||||
.word 0xe12fff1e @ interoperable with Thumb ISA:-)
|
||||
#endif
|
||||
|
||||
.size sha1_block_data_order_nohw,.-sha1_block_data_order_nohw
|
||||
|
||||
.align 5
|
||||
LK_00_19:.word 0x5a827999
|
||||
LK_20_39:.word 0x6ed9eba1
|
||||
LK_40_59:.word 0x8f1bbcdc
|
||||
LK_60_79:.word 0xca62c1d6
|
||||
#if __ARM_MAX_ARCH__>=7
|
||||
LOPENSSL_armcap:
|
||||
.word OPENSSL_armcap_P-Lsha1_block
|
||||
#endif
|
||||
.LK_00_19:.word 0x5a827999
|
||||
.LK_20_39:.word 0x6ed9eba1
|
||||
.LK_40_59:.word 0x8f1bbcdc
|
||||
.LK_60_79:.word 0xca62c1d6
|
||||
.byte 83,72,65,49,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,65,82,77,118,52,47,78,69,79,78,47,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
|
||||
.align 2
|
||||
.align 5
|
||||
#if __ARM_MAX_ARCH__>=7
|
||||
.arch armv7-a
|
||||
.fpu neon
|
||||
|
||||
|
||||
|
||||
#ifdef __thumb2__
|
||||
.thumb_func sha1_block_data_order_neon
|
||||
#endif
|
||||
.globl sha1_block_data_order_neon
|
||||
.hidden sha1_block_data_order_neon
|
||||
.type sha1_block_data_order_neon,%function
|
||||
.align 4
|
||||
sha1_block_data_order_neon:
|
||||
LNEON:
|
||||
stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr}
|
||||
add r2,r1,r2,lsl#6 @ r2 to point at the end of r1
|
||||
@ dmb @ errata #451034 on early Cortex A8
|
||||
@ vstmdb sp!,{d8-d15} @ ABI specification says so
|
||||
mov r14,sp
|
||||
sub r12,sp,#64
|
||||
adr r8,LK_00_19
|
||||
adr r8,.LK_00_19
|
||||
bic r12,r12,#15 @ align for 128-bit stores
|
||||
|
||||
ldmia r0,{r3,r4,r5,r6,r7} @ load context
|
||||
@ -545,7 +517,7 @@ LNEON:
|
||||
vst1.32 {q10},[r12,:128]!
|
||||
ldr r9,[sp] @ big RAW stall
|
||||
|
||||
Loop_neon:
|
||||
.Loop_neon:
|
||||
vext.8 q8,q0,q1,#8
|
||||
bic r10,r6,r4
|
||||
add r7,r7,r9
|
||||
@ -1358,11 +1330,11 @@ Loop_neon:
|
||||
stmia r0,{r3,r4,r5,r6,r7}
|
||||
itt ne
|
||||
addne r12,sp,#3*16
|
||||
bne Loop_neon
|
||||
bne .Loop_neon
|
||||
|
||||
@ vldmia sp!,{d8-d15}
|
||||
ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,pc}
|
||||
|
||||
.size sha1_block_data_order_neon,.-sha1_block_data_order_neon
|
||||
#endif
|
||||
#if __ARM_MAX_ARCH__>=7
|
||||
|
||||
@ -1372,16 +1344,15 @@ Loop_neon:
|
||||
# define INST(a,b,c,d) .byte a,b,c,d|0x10
|
||||
# endif
|
||||
|
||||
#ifdef __thumb2__
|
||||
.thumb_func sha1_block_data_order_armv8
|
||||
#endif
|
||||
.globl sha1_block_data_order_hw
|
||||
.hidden sha1_block_data_order_hw
|
||||
.type sha1_block_data_order_hw,%function
|
||||
.align 5
|
||||
sha1_block_data_order_armv8:
|
||||
LARMv8:
|
||||
sha1_block_data_order_hw:
|
||||
vstmdb sp!,{d8,d9,d10,d11,d12,d13,d14,d15} @ ABI specification says so
|
||||
|
||||
veor q1,q1,q1
|
||||
adr r3,LK_00_19
|
||||
adr r3,.LK_00_19
|
||||
vld1.32 {q0},[r0]!
|
||||
vld1.32 {d2[0]},[r0]
|
||||
sub r0,r0,#16
|
||||
@ -1390,7 +1361,7 @@ LARMv8:
|
||||
vld1.32 {d20[],d21[]},[r3,:32]!
|
||||
vld1.32 {d22[],d23[]},[r3,:32]
|
||||
|
||||
Loop_v8:
|
||||
.Loop_v8:
|
||||
vld1.8 {q4,q5},[r1]!
|
||||
vld1.8 {q6,q7},[r1]!
|
||||
vrev32.8 q4,q4
|
||||
@ -1498,21 +1469,13 @@ Loop_v8:
|
||||
|
||||
vadd.i32 q1,q1,q2
|
||||
vadd.i32 q0,q0,q14
|
||||
bne Loop_v8
|
||||
bne .Loop_v8
|
||||
|
||||
vst1.32 {q0},[r0]!
|
||||
vst1.32 {d2[0]},[r0]
|
||||
|
||||
vldmia sp!,{d8,d9,d10,d11,d12,d13,d14,d15}
|
||||
bx lr @ bx lr
|
||||
|
||||
.size sha1_block_data_order_hw,.-sha1_block_data_order_hw
|
||||
#endif
|
||||
#if __ARM_MAX_ARCH__>=7
|
||||
.comm _OPENSSL_armcap_P,4
|
||||
.non_lazy_symbol_pointer
|
||||
OPENSSL_armcap_P:
|
||||
.indirect_symbol _OPENSSL_armcap_P
|
||||
.long 0
|
||||
.private_extern _OPENSSL_armcap_P
|
||||
#endif
|
||||
#endif // !OPENSSL_NO_ASM
|
||||
#endif // !OPENSSL_NO_ASM && defined(OPENSSL_ARM) && defined(__ELF__)
|
||||
@ -1,17 +1,9 @@
|
||||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#if !defined(__has_feature)
|
||||
#define __has_feature(x) 0
|
||||
#endif
|
||||
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
|
||||
#define OPENSSL_NO_ASM
|
||||
#endif
|
||||
#include <openssl/asm_base.h>
|
||||
|
||||
#if !defined(OPENSSL_NO_ASM)
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_ARM) && defined(__ELF__)
|
||||
@ Copyright 2007-2016 The OpenSSL Project Authors. All Rights Reserved.
|
||||
@
|
||||
@ Licensed under the OpenSSL license (the "License"). You may not use
|
||||
@ -60,14 +52,14 @@
|
||||
#ifndef __KERNEL__
|
||||
# include <openssl/arm_arch.h>
|
||||
#else
|
||||
# define __ARM_ARCH__ __LINUX_ARM_ARCH__
|
||||
# define __ARM_ARCH __LINUX_ARM_ARCH__
|
||||
# define __ARM_MAX_ARCH__ 7
|
||||
#endif
|
||||
|
||||
@ Silence ARMv8 deprecated IT instruction warnings. This file is used by both
|
||||
@ ARMv7 and ARMv8 processors. It does have ARMv8-only code, but those
|
||||
@ instructions are manually-encoded. (See unsha256.)
|
||||
|
||||
.arch armv7-a
|
||||
|
||||
.text
|
||||
#if defined(__thumb2__)
|
||||
@ -77,7 +69,7 @@
|
||||
.code 32
|
||||
#endif
|
||||
|
||||
|
||||
.type K256,%object
|
||||
.align 5
|
||||
K256:
|
||||
.word 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
|
||||
@ -96,51 +88,28 @@ K256:
|
||||
.word 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
|
||||
.word 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
|
||||
.word 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
|
||||
|
||||
.size K256,.-K256
|
||||
.word 0 @ terminator
|
||||
#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
|
||||
LOPENSSL_armcap:
|
||||
.word OPENSSL_armcap_P-Lsha256_block_data_order
|
||||
#endif
|
||||
.align 5
|
||||
|
||||
.globl _sha256_block_data_order
|
||||
.private_extern _sha256_block_data_order
|
||||
#ifdef __thumb2__
|
||||
.thumb_func _sha256_block_data_order
|
||||
#endif
|
||||
_sha256_block_data_order:
|
||||
Lsha256_block_data_order:
|
||||
#if __ARM_ARCH__<7 && !defined(__thumb2__)
|
||||
sub r3,pc,#8 @ _sha256_block_data_order
|
||||
#else
|
||||
adr r3,Lsha256_block_data_order
|
||||
#endif
|
||||
#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
|
||||
ldr r12,LOPENSSL_armcap
|
||||
ldr r12,[r3,r12] @ OPENSSL_armcap_P
|
||||
#ifdef __APPLE__
|
||||
ldr r12,[r12]
|
||||
#endif
|
||||
tst r12,#ARMV8_SHA256
|
||||
bne LARMv8
|
||||
tst r12,#ARMV7_NEON
|
||||
bne LNEON
|
||||
#endif
|
||||
.globl sha256_block_data_order_nohw
|
||||
.hidden sha256_block_data_order_nohw
|
||||
.type sha256_block_data_order_nohw,%function
|
||||
sha256_block_data_order_nohw:
|
||||
add r2,r1,r2,lsl#6 @ len to point at the end of inp
|
||||
stmdb sp!,{r0,r1,r2,r4-r11,lr}
|
||||
ldmia r0,{r4,r5,r6,r7,r8,r9,r10,r11}
|
||||
sub r14,r3,#256+32 @ K256
|
||||
adr r14,K256
|
||||
sub sp,sp,#16*4 @ alloca(X[16])
|
||||
Loop:
|
||||
# if __ARM_ARCH__>=7
|
||||
.Loop:
|
||||
# if __ARM_ARCH>=7
|
||||
ldr r2,[r1],#4
|
||||
# else
|
||||
ldrb r2,[r1,#3]
|
||||
# endif
|
||||
eor r3,r5,r6 @ magic
|
||||
eor r12,r12,r12
|
||||
#if __ARM_ARCH__>=7
|
||||
#if __ARM_ARCH>=7
|
||||
@ ldr r2,[r1],#4 @ 0
|
||||
# if 0==15
|
||||
str r1,[sp,#17*4] @ make room for r1
|
||||
@ -181,7 +150,7 @@ Loop:
|
||||
cmp r12,#0xf2 @ done?
|
||||
#endif
|
||||
#if 0<15
|
||||
# if __ARM_ARCH__>=7
|
||||
# if __ARM_ARCH>=7
|
||||
ldr r2,[r1],#4 @ prefetch
|
||||
# else
|
||||
ldrb r2,[r1,#3]
|
||||
@ -198,7 +167,7 @@ Loop:
|
||||
eor r3,r3,r5 @ Maj(a,b,c)
|
||||
add r11,r11,r0,ror#2 @ h+=Sigma0(a)
|
||||
@ add r11,r11,r3 @ h+=Maj(a,b,c)
|
||||
#if __ARM_ARCH__>=7
|
||||
#if __ARM_ARCH>=7
|
||||
@ ldr r2,[r1],#4 @ 1
|
||||
# if 1==15
|
||||
str r1,[sp,#17*4] @ make room for r1
|
||||
@ -239,7 +208,7 @@ Loop:
|
||||
cmp r3,#0xf2 @ done?
|
||||
#endif
|
||||
#if 1<15
|
||||
# if __ARM_ARCH__>=7
|
||||
# if __ARM_ARCH>=7
|
||||
ldr r2,[r1],#4 @ prefetch
|
||||
# else
|
||||
ldrb r2,[r1,#3]
|
||||
@ -256,7 +225,7 @@ Loop:
|
||||
eor r12,r12,r4 @ Maj(a,b,c)
|
||||
add r10,r10,r0,ror#2 @ h+=Sigma0(a)
|
||||
@ add r10,r10,r12 @ h+=Maj(a,b,c)
|
||||
#if __ARM_ARCH__>=7
|
||||
#if __ARM_ARCH>=7
|
||||
@ ldr r2,[r1],#4 @ 2
|
||||
# if 2==15
|
||||
str r1,[sp,#17*4] @ make room for r1
|
||||
@ -297,7 +266,7 @@ Loop:
|
||||
cmp r12,#0xf2 @ done?
|
||||
#endif
|
||||
#if 2<15
|
||||
# if __ARM_ARCH__>=7
|
||||
# if __ARM_ARCH>=7
|
||||
ldr r2,[r1],#4 @ prefetch
|
||||
# else
|
||||
ldrb r2,[r1,#3]
|
||||
@ -314,7 +283,7 @@ Loop:
|
||||
eor r3,r3,r11 @ Maj(a,b,c)
|
||||
add r9,r9,r0,ror#2 @ h+=Sigma0(a)
|
||||
@ add r9,r9,r3 @ h+=Maj(a,b,c)
|
||||
#if __ARM_ARCH__>=7
|
||||
#if __ARM_ARCH>=7
|
||||
@ ldr r2,[r1],#4 @ 3
|
||||
# if 3==15
|
||||
str r1,[sp,#17*4] @ make room for r1
|
||||
@ -355,7 +324,7 @@ Loop:
|
||||
cmp r3,#0xf2 @ done?
|
||||
#endif
|
||||
#if 3<15
|
||||
# if __ARM_ARCH__>=7
|
||||
# if __ARM_ARCH>=7
|
||||
ldr r2,[r1],#4 @ prefetch
|
||||
# else
|
||||
ldrb r2,[r1,#3]
|
||||
@ -372,7 +341,7 @@ Loop:
|
||||
eor r12,r12,r10 @ Maj(a,b,c)
|
||||
add r8,r8,r0,ror#2 @ h+=Sigma0(a)
|
||||
@ add r8,r8,r12 @ h+=Maj(a,b,c)
|
||||
#if __ARM_ARCH__>=7
|
||||
#if __ARM_ARCH>=7
|
||||
@ ldr r2,[r1],#4 @ 4
|
||||
# if 4==15
|
||||
str r1,[sp,#17*4] @ make room for r1
|
||||
@ -413,7 +382,7 @@ Loop:
|
||||
cmp r12,#0xf2 @ done?
|
||||
#endif
|
||||
#if 4<15
|
||||
# if __ARM_ARCH__>=7
|
||||
# if __ARM_ARCH>=7
|
||||
ldr r2,[r1],#4 @ prefetch
|
||||
# else
|
||||
ldrb r2,[r1,#3]
|
||||
@ -430,7 +399,7 @@ Loop:
|
||||
eor r3,r3,r9 @ Maj(a,b,c)
|
||||
add r7,r7,r0,ror#2 @ h+=Sigma0(a)
|
||||
@ add r7,r7,r3 @ h+=Maj(a,b,c)
|
||||
#if __ARM_ARCH__>=7
|
||||
#if __ARM_ARCH>=7
|
||||
@ ldr r2,[r1],#4 @ 5
|
||||
# if 5==15
|
||||
str r1,[sp,#17*4] @ make room for r1
|
||||
@ -471,7 +440,7 @@ Loop:
|
||||
cmp r3,#0xf2 @ done?
|
||||
#endif
|
||||
#if 5<15
|
||||
# if __ARM_ARCH__>=7
|
||||
# if __ARM_ARCH>=7
|
||||
ldr r2,[r1],#4 @ prefetch
|
||||
# else
|
||||
ldrb r2,[r1,#3]
|
||||
@ -488,7 +457,7 @@ Loop:
|
||||
eor r12,r12,r8 @ Maj(a,b,c)
|
||||
add r6,r6,r0,ror#2 @ h+=Sigma0(a)
|
||||
@ add r6,r6,r12 @ h+=Maj(a,b,c)
|
||||
#if __ARM_ARCH__>=7
|
||||
#if __ARM_ARCH>=7
|
||||
@ ldr r2,[r1],#4 @ 6
|
||||
# if 6==15
|
||||
str r1,[sp,#17*4] @ make room for r1
|
||||
@ -529,7 +498,7 @@ Loop:
|
||||
cmp r12,#0xf2 @ done?
|
||||
#endif
|
||||
#if 6<15
|
||||
# if __ARM_ARCH__>=7
|
||||
# if __ARM_ARCH>=7
|
||||
ldr r2,[r1],#4 @ prefetch
|
||||
# else
|
||||
ldrb r2,[r1,#3]
|
||||
@ -546,7 +515,7 @@ Loop:
|
||||
eor r3,r3,r7 @ Maj(a,b,c)
|
||||
add r5,r5,r0,ror#2 @ h+=Sigma0(a)
|
||||
@ add r5,r5,r3 @ h+=Maj(a,b,c)
|
||||
#if __ARM_ARCH__>=7
|
||||
#if __ARM_ARCH>=7
|
||||
@ ldr r2,[r1],#4 @ 7
|
||||
# if 7==15
|
||||
str r1,[sp,#17*4] @ make room for r1
|
||||
@ -587,7 +556,7 @@ Loop:
|
||||
cmp r3,#0xf2 @ done?
|
||||
#endif
|
||||
#if 7<15
|
||||
# if __ARM_ARCH__>=7
|
||||
# if __ARM_ARCH>=7
|
||||
ldr r2,[r1],#4 @ prefetch
|
||||
# else
|
||||
ldrb r2,[r1,#3]
|
||||
@ -604,7 +573,7 @@ Loop:
|
||||
eor r12,r12,r6 @ Maj(a,b,c)
|
||||
add r4,r4,r0,ror#2 @ h+=Sigma0(a)
|
||||
@ add r4,r4,r12 @ h+=Maj(a,b,c)
|
||||
#if __ARM_ARCH__>=7
|
||||
#if __ARM_ARCH>=7
|
||||
@ ldr r2,[r1],#4 @ 8
|
||||
# if 8==15
|
||||
str r1,[sp,#17*4] @ make room for r1
|
||||
@ -645,7 +614,7 @@ Loop:
|
||||
cmp r12,#0xf2 @ done?
|
||||
#endif
|
||||
#if 8<15
|
||||
# if __ARM_ARCH__>=7
|
||||
# if __ARM_ARCH>=7
|
||||
ldr r2,[r1],#4 @ prefetch
|
||||
# else
|
||||
ldrb r2,[r1,#3]
|
||||
@ -662,7 +631,7 @@ Loop:
|
||||
eor r3,r3,r5 @ Maj(a,b,c)
|
||||
add r11,r11,r0,ror#2 @ h+=Sigma0(a)
|
||||
@ add r11,r11,r3 @ h+=Maj(a,b,c)
|
||||
#if __ARM_ARCH__>=7
|
||||
#if __ARM_ARCH>=7
|
||||
@ ldr r2,[r1],#4 @ 9
|
||||
# if 9==15
|
||||
str r1,[sp,#17*4] @ make room for r1
|
||||
@ -703,7 +672,7 @@ Loop:
|
||||
cmp r3,#0xf2 @ done?
|
||||
#endif
|
||||
#if 9<15
|
||||
# if __ARM_ARCH__>=7
|
||||
# if __ARM_ARCH>=7
|
||||
ldr r2,[r1],#4 @ prefetch
|
||||
# else
|
||||
ldrb r2,[r1,#3]
|
||||
@ -720,7 +689,7 @@ Loop:
|
||||
eor r12,r12,r4 @ Maj(a,b,c)
|
||||
add r10,r10,r0,ror#2 @ h+=Sigma0(a)
|
||||
@ add r10,r10,r12 @ h+=Maj(a,b,c)
|
||||
#if __ARM_ARCH__>=7
|
||||
#if __ARM_ARCH>=7
|
||||
@ ldr r2,[r1],#4 @ 10
|
||||
# if 10==15
|
||||
str r1,[sp,#17*4] @ make room for r1
|
||||
@ -761,7 +730,7 @@ Loop:
|
||||
cmp r12,#0xf2 @ done?
|
||||
#endif
|
||||
#if 10<15
|
||||
# if __ARM_ARCH__>=7
|
||||
# if __ARM_ARCH>=7
|
||||
ldr r2,[r1],#4 @ prefetch
|
||||
# else
|
||||
ldrb r2,[r1,#3]
|
||||
@ -778,7 +747,7 @@ Loop:
|
||||
eor r3,r3,r11 @ Maj(a,b,c)
|
||||
add r9,r9,r0,ror#2 @ h+=Sigma0(a)
|
||||
@ add r9,r9,r3 @ h+=Maj(a,b,c)
|
||||
#if __ARM_ARCH__>=7
|
||||
#if __ARM_ARCH>=7
|
||||
@ ldr r2,[r1],#4 @ 11
|
||||
# if 11==15
|
||||
str r1,[sp,#17*4] @ make room for r1
|
||||
@ -819,7 +788,7 @@ Loop:
|
||||
cmp r3,#0xf2 @ done?
|
||||
#endif
|
||||
#if 11<15
|
||||
# if __ARM_ARCH__>=7
|
||||
# if __ARM_ARCH>=7
|
||||
ldr r2,[r1],#4 @ prefetch
|
||||
# else
|
||||
ldrb r2,[r1,#3]
|
||||
@ -836,7 +805,7 @@ Loop:
|
||||
eor r12,r12,r10 @ Maj(a,b,c)
|
||||
add r8,r8,r0,ror#2 @ h+=Sigma0(a)
|
||||
@ add r8,r8,r12 @ h+=Maj(a,b,c)
|
||||
#if __ARM_ARCH__>=7
|
||||
#if __ARM_ARCH>=7
|
||||
@ ldr r2,[r1],#4 @ 12
|
||||
# if 12==15
|
||||
str r1,[sp,#17*4] @ make room for r1
|
||||
@ -877,7 +846,7 @@ Loop:
|
||||
cmp r12,#0xf2 @ done?
|
||||
#endif
|
||||
#if 12<15
|
||||
# if __ARM_ARCH__>=7
|
||||
# if __ARM_ARCH>=7
|
||||
ldr r2,[r1],#4 @ prefetch
|
||||
# else
|
||||
ldrb r2,[r1,#3]
|
||||
@ -894,7 +863,7 @@ Loop:
|
||||
eor r3,r3,r9 @ Maj(a,b,c)
|
||||
add r7,r7,r0,ror#2 @ h+=Sigma0(a)
|
||||
@ add r7,r7,r3 @ h+=Maj(a,b,c)
|
||||
#if __ARM_ARCH__>=7
|
||||
#if __ARM_ARCH>=7
|
||||
@ ldr r2,[r1],#4 @ 13
|
||||
# if 13==15
|
||||
str r1,[sp,#17*4] @ make room for r1
|
||||
@ -935,7 +904,7 @@ Loop:
|
||||
cmp r3,#0xf2 @ done?
|
||||
#endif
|
||||
#if 13<15
|
||||
# if __ARM_ARCH__>=7
|
||||
# if __ARM_ARCH>=7
|
||||
ldr r2,[r1],#4 @ prefetch
|
||||
# else
|
||||
ldrb r2,[r1,#3]
|
||||
@ -952,7 +921,7 @@ Loop:
|
||||
eor r12,r12,r8 @ Maj(a,b,c)
|
||||
add r6,r6,r0,ror#2 @ h+=Sigma0(a)
|
||||
@ add r6,r6,r12 @ h+=Maj(a,b,c)
|
||||
#if __ARM_ARCH__>=7
|
||||
#if __ARM_ARCH>=7
|
||||
@ ldr r2,[r1],#4 @ 14
|
||||
# if 14==15
|
||||
str r1,[sp,#17*4] @ make room for r1
|
||||
@ -993,7 +962,7 @@ Loop:
|
||||
cmp r12,#0xf2 @ done?
|
||||
#endif
|
||||
#if 14<15
|
||||
# if __ARM_ARCH__>=7
|
||||
# if __ARM_ARCH>=7
|
||||
ldr r2,[r1],#4 @ prefetch
|
||||
# else
|
||||
ldrb r2,[r1,#3]
|
||||
@ -1010,7 +979,7 @@ Loop:
|
||||
eor r3,r3,r7 @ Maj(a,b,c)
|
||||
add r5,r5,r0,ror#2 @ h+=Sigma0(a)
|
||||
@ add r5,r5,r3 @ h+=Maj(a,b,c)
|
||||
#if __ARM_ARCH__>=7
|
||||
#if __ARM_ARCH>=7
|
||||
@ ldr r2,[r1],#4 @ 15
|
||||
# if 15==15
|
||||
str r1,[sp,#17*4] @ make room for r1
|
||||
@ -1051,7 +1020,7 @@ Loop:
|
||||
cmp r3,#0xf2 @ done?
|
||||
#endif
|
||||
#if 15<15
|
||||
# if __ARM_ARCH__>=7
|
||||
# if __ARM_ARCH>=7
|
||||
ldr r2,[r1],#4 @ prefetch
|
||||
# else
|
||||
ldrb r2,[r1,#3]
|
||||
@ -1068,7 +1037,7 @@ Loop:
|
||||
eor r12,r12,r6 @ Maj(a,b,c)
|
||||
add r4,r4,r0,ror#2 @ h+=Sigma0(a)
|
||||
@ add r4,r4,r12 @ h+=Maj(a,b,c)
|
||||
Lrounds_16_xx:
|
||||
.Lrounds_16_xx:
|
||||
@ ldr r2,[sp,#1*4] @ 16
|
||||
@ ldr r1,[sp,#14*4]
|
||||
mov r0,r2,ror#7
|
||||
@ -1101,7 +1070,7 @@ Lrounds_16_xx:
|
||||
cmp r12,#0xf2 @ done?
|
||||
#endif
|
||||
#if 16<15
|
||||
# if __ARM_ARCH__>=7
|
||||
# if __ARM_ARCH>=7
|
||||
ldr r2,[r1],#4 @ prefetch
|
||||
# else
|
||||
ldrb r2,[r1,#3]
|
||||
@ -1150,7 +1119,7 @@ Lrounds_16_xx:
|
||||
cmp r3,#0xf2 @ done?
|
||||
#endif
|
||||
#if 17<15
|
||||
# if __ARM_ARCH__>=7
|
||||
# if __ARM_ARCH>=7
|
||||
ldr r2,[r1],#4 @ prefetch
|
||||
# else
|
||||
ldrb r2,[r1,#3]
|
||||
@ -1199,7 +1168,7 @@ Lrounds_16_xx:
|
||||
cmp r12,#0xf2 @ done?
|
||||
#endif
|
||||
#if 18<15
|
||||
# if __ARM_ARCH__>=7
|
||||
# if __ARM_ARCH>=7
|
||||
ldr r2,[r1],#4 @ prefetch
|
||||
# else
|
||||
ldrb r2,[r1,#3]
|
||||
@ -1248,7 +1217,7 @@ Lrounds_16_xx:
|
||||
cmp r3,#0xf2 @ done?
|
||||
#endif
|
||||
#if 19<15
|
||||
# if __ARM_ARCH__>=7
|
||||
# if __ARM_ARCH>=7
|
||||
ldr r2,[r1],#4 @ prefetch
|
||||
# else
|
||||
ldrb r2,[r1,#3]
|
||||
@ -1297,7 +1266,7 @@ Lrounds_16_xx:
|
||||
cmp r12,#0xf2 @ done?
|
||||
#endif
|
||||
#if 20<15
|
||||
# if __ARM_ARCH__>=7
|
||||
# if __ARM_ARCH>=7
|
||||
ldr r2,[r1],#4 @ prefetch
|
||||
# else
|
||||
ldrb r2,[r1,#3]
|
||||
@ -1346,7 +1315,7 @@ Lrounds_16_xx:
|
||||
cmp r3,#0xf2 @ done?
|
||||
#endif
|
||||
#if 21<15
|
||||
# if __ARM_ARCH__>=7
|
||||
# if __ARM_ARCH>=7
|
||||
ldr r2,[r1],#4 @ prefetch
|
||||
# else
|
||||
ldrb r2,[r1,#3]
|
||||
@ -1395,7 +1364,7 @@ Lrounds_16_xx:
|
||||
cmp r12,#0xf2 @ done?
|
||||
#endif
|
||||
#if 22<15
|
||||
# if __ARM_ARCH__>=7
|
||||
# if __ARM_ARCH>=7
|
||||
ldr r2,[r1],#4 @ prefetch
|
||||
# else
|
||||
ldrb r2,[r1,#3]
|
||||
@ -1444,7 +1413,7 @@ Lrounds_16_xx:
|
||||
cmp r3,#0xf2 @ done?
|
||||
#endif
|
||||
#if 23<15
|
||||
# if __ARM_ARCH__>=7
|
||||
# if __ARM_ARCH>=7
|
||||
ldr r2,[r1],#4 @ prefetch
|
||||
# else
|
||||
ldrb r2,[r1,#3]
|
||||
@ -1493,7 +1462,7 @@ Lrounds_16_xx:
|
||||
cmp r12,#0xf2 @ done?
|
||||
#endif
|
||||
#if 24<15
|
||||
# if __ARM_ARCH__>=7
|
||||
# if __ARM_ARCH>=7
|
||||
ldr r2,[r1],#4 @ prefetch
|
||||
# else
|
||||
ldrb r2,[r1,#3]
|
||||
@ -1542,7 +1511,7 @@ Lrounds_16_xx:
|
||||
cmp r3,#0xf2 @ done?
|
||||
#endif
|
||||
#if 25<15
|
||||
# if __ARM_ARCH__>=7
|
||||
# if __ARM_ARCH>=7
|
||||
ldr r2,[r1],#4 @ prefetch
|
||||
# else
|
||||
ldrb r2,[r1,#3]
|
||||
@ -1591,7 +1560,7 @@ Lrounds_16_xx:
|
||||
cmp r12,#0xf2 @ done?
|
||||
#endif
|
||||
#if 26<15
|
||||
# if __ARM_ARCH__>=7
|
||||
# if __ARM_ARCH>=7
|
||||
ldr r2,[r1],#4 @ prefetch
|
||||
# else
|
||||
ldrb r2,[r1,#3]
|
||||
@ -1640,7 +1609,7 @@ Lrounds_16_xx:
|
||||
cmp r3,#0xf2 @ done?
|
||||
#endif
|
||||
#if 27<15
|
||||
# if __ARM_ARCH__>=7
|
||||
# if __ARM_ARCH>=7
|
||||
ldr r2,[r1],#4 @ prefetch
|
||||
# else
|
||||
ldrb r2,[r1,#3]
|
||||
@ -1689,7 +1658,7 @@ Lrounds_16_xx:
|
||||
cmp r12,#0xf2 @ done?
|
||||
#endif
|
||||
#if 28<15
|
||||
# if __ARM_ARCH__>=7
|
||||
# if __ARM_ARCH>=7
|
||||
ldr r2,[r1],#4 @ prefetch
|
||||
# else
|
||||
ldrb r2,[r1,#3]
|
||||
@ -1738,7 +1707,7 @@ Lrounds_16_xx:
|
||||
cmp r3,#0xf2 @ done?
|
||||
#endif
|
||||
#if 29<15
|
||||
# if __ARM_ARCH__>=7
|
||||
# if __ARM_ARCH>=7
|
||||
ldr r2,[r1],#4 @ prefetch
|
||||
# else
|
||||
ldrb r2,[r1,#3]
|
||||
@ -1787,7 +1756,7 @@ Lrounds_16_xx:
|
||||
cmp r12,#0xf2 @ done?
|
||||
#endif
|
||||
#if 30<15
|
||||
# if __ARM_ARCH__>=7
|
||||
# if __ARM_ARCH>=7
|
||||
ldr r2,[r1],#4 @ prefetch
|
||||
# else
|
||||
ldrb r2,[r1,#3]
|
||||
@ -1836,7 +1805,7 @@ Lrounds_16_xx:
|
||||
cmp r3,#0xf2 @ done?
|
||||
#endif
|
||||
#if 31<15
|
||||
# if __ARM_ARCH__>=7
|
||||
# if __ARM_ARCH>=7
|
||||
ldr r2,[r1],#4 @ prefetch
|
||||
# else
|
||||
ldrb r2,[r1,#3]
|
||||
@ -1853,11 +1822,11 @@ Lrounds_16_xx:
|
||||
eor r12,r12,r6 @ Maj(a,b,c)
|
||||
add r4,r4,r0,ror#2 @ h+=Sigma0(a)
|
||||
@ add r4,r4,r12 @ h+=Maj(a,b,c)
|
||||
#if __ARM_ARCH__>=7
|
||||
#if __ARM_ARCH>=7
|
||||
ite eq @ Thumb2 thing, sanity check in ARM
|
||||
#endif
|
||||
ldreq r3,[sp,#16*4] @ pull ctx
|
||||
bne Lrounds_16_xx
|
||||
bne .Lrounds_16_xx
|
||||
|
||||
add r4,r4,r12 @ h+=Maj(a,b,c) from the past
|
||||
ldr r0,[r3,#0]
|
||||
@ -1881,10 +1850,10 @@ Lrounds_16_xx:
|
||||
stmia r3,{r4,r5,r6,r7,r8,r9,r10,r11}
|
||||
cmp r1,r12
|
||||
sub r14,r14,#256 @ rewind Ktbl
|
||||
bne Loop
|
||||
bne .Loop
|
||||
|
||||
add sp,sp,#19*4 @ destroy frame
|
||||
#if __ARM_ARCH__>=5
|
||||
#if __ARM_ARCH>=5
|
||||
ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,pc}
|
||||
#else
|
||||
ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,lr}
|
||||
@ -1892,24 +1861,43 @@ Lrounds_16_xx:
|
||||
moveq pc,lr @ be binary compatible with V4, yet
|
||||
.word 0xe12fff1e @ interoperable with Thumb ISA:-)
|
||||
#endif
|
||||
|
||||
.size sha256_block_data_order_nohw,.-sha256_block_data_order_nohw
|
||||
#if __ARM_MAX_ARCH__>=7
|
||||
.arch armv7-a
|
||||
.fpu neon
|
||||
|
||||
|
||||
|
||||
.globl _sha256_block_data_order_neon
|
||||
.private_extern _sha256_block_data_order_neon
|
||||
#ifdef __thumb2__
|
||||
.thumb_func _sha256_block_data_order_neon
|
||||
.LK256_shortcut_neon:
|
||||
@ PC is 8 bytes ahead in Arm mode and 4 bytes ahead in Thumb mode.
|
||||
#if defined(__thumb2__)
|
||||
.word K256-(.LK256_add_neon+4)
|
||||
#else
|
||||
.word K256-(.LK256_add_neon+8)
|
||||
#endif
|
||||
|
||||
.globl sha256_block_data_order_neon
|
||||
.hidden sha256_block_data_order_neon
|
||||
.type sha256_block_data_order_neon,%function
|
||||
.align 5
|
||||
.skip 16
|
||||
_sha256_block_data_order_neon:
|
||||
LNEON:
|
||||
sha256_block_data_order_neon:
|
||||
stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr}
|
||||
|
||||
sub r11,sp,#16*4+16
|
||||
adr r14,K256
|
||||
|
||||
@ K256 is just at the boundary of being easily referenced by an ADR from
|
||||
@ this function. In Arm mode, when building with __ARM_ARCH=6, it does
|
||||
@ not fit. By moving code around, we could make it fit, but this is too
|
||||
@ fragile. For simplicity, just load the offset from
|
||||
@ .LK256_shortcut_neon.
|
||||
@
|
||||
@ TODO(davidben): adrl would avoid a load, but clang-assembler does not
|
||||
@ support it. We might be able to emulate it with a macro, but Android's
|
||||
@ did not work when I tried it.
|
||||
@ https://android.googlesource.com/platform/ndk/+/refs/heads/master/docs/ClangMigration.md#arm
|
||||
ldr r14,.LK256_shortcut_neon
|
||||
.LK256_add_neon:
|
||||
add r14,pc,r14
|
||||
|
||||
bic r11,r11,#15 @ align for 128-bit stores
|
||||
mov r12,sp
|
||||
mov sp,r11 @ alloca
|
||||
@ -1946,10 +1934,10 @@ LNEON:
|
||||
ldr r2,[sp,#0]
|
||||
eor r12,r12,r12
|
||||
eor r3,r5,r6
|
||||
b L_00_48
|
||||
b .L_00_48
|
||||
|
||||
.align 4
|
||||
L_00_48:
|
||||
.L_00_48:
|
||||
vext.8 q8,q0,q1,#4
|
||||
add r11,r11,r2
|
||||
eor r2,r9,r10
|
||||
@ -2345,7 +2333,7 @@ L_00_48:
|
||||
teq r2,#0 @ check for K256 terminator
|
||||
ldr r2,[sp,#0]
|
||||
sub r1,r1,#64
|
||||
bne L_00_48
|
||||
bne .L_00_48
|
||||
|
||||
ldr r1,[sp,#68]
|
||||
ldr r0,[sp,#72]
|
||||
@ -2678,10 +2666,10 @@ L_00_48:
|
||||
ldreq sp,[sp,#76] @ restore original sp
|
||||
itt ne
|
||||
eorne r3,r5,r6
|
||||
bne L_00_48
|
||||
bne .L_00_48
|
||||
|
||||
ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,pc}
|
||||
|
||||
.size sha256_block_data_order_neon,.-sha256_block_data_order_neon
|
||||
#endif
|
||||
#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
|
||||
|
||||
@ -2691,19 +2679,32 @@ L_00_48:
|
||||
# define INST(a,b,c,d) .byte a,b,c,d
|
||||
# endif
|
||||
|
||||
#ifdef __thumb2__
|
||||
.thumb_func sha256_block_data_order_armv8
|
||||
.LK256_shortcut_hw:
|
||||
@ PC is 8 bytes ahead in Arm mode and 4 bytes ahead in Thumb mode.
|
||||
#if defined(__thumb2__)
|
||||
.word K256-(.LK256_add_hw+4)
|
||||
#else
|
||||
.word K256-(.LK256_add_hw+8)
|
||||
#endif
|
||||
|
||||
.globl sha256_block_data_order_hw
|
||||
.hidden sha256_block_data_order_hw
|
||||
.type sha256_block_data_order_hw,%function
|
||||
.align 5
|
||||
sha256_block_data_order_armv8:
|
||||
LARMv8:
|
||||
sha256_block_data_order_hw:
|
||||
@ K256 is too far to reference from one ADR command in Thumb mode. In
|
||||
@ Arm mode, we could make it fit by aligning the ADR offset to a 64-byte
|
||||
@ boundary. For simplicity, just load the offset from .LK256_shortcut_hw.
|
||||
ldr r3,.LK256_shortcut_hw
|
||||
.LK256_add_hw:
|
||||
add r3,pc,r3
|
||||
|
||||
vld1.32 {q0,q1},[r0]
|
||||
sub r3,r3,#256+32
|
||||
add r2,r1,r2,lsl#6 @ len to point at the end of inp
|
||||
b Loop_v8
|
||||
b .Loop_v8
|
||||
|
||||
.align 4
|
||||
Loop_v8:
|
||||
.Loop_v8:
|
||||
vld1.8 {q8,q9},[r1]!
|
||||
vld1.8 {q10,q11},[r1]!
|
||||
vld1.32 {q12},[r3]!
|
||||
@ -2825,22 +2826,14 @@ Loop_v8:
|
||||
vadd.i32 q0,q0,q14
|
||||
vadd.i32 q1,q1,q15
|
||||
it ne
|
||||
bne Loop_v8
|
||||
bne .Loop_v8
|
||||
|
||||
vst1.32 {q0,q1},[r0]
|
||||
|
||||
bx lr @ bx lr
|
||||
|
||||
.size sha256_block_data_order_hw,.-sha256_block_data_order_hw
|
||||
#endif
|
||||
.byte 83,72,65,50,53,54,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,65,82,77,118,52,47,78,69,79,78,47,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
|
||||
.align 2
|
||||
.align 2
|
||||
#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
|
||||
.comm _OPENSSL_armcap_P,4
|
||||
.non_lazy_symbol_pointer
|
||||
OPENSSL_armcap_P:
|
||||
.indirect_symbol _OPENSSL_armcap_P
|
||||
.long 0
|
||||
.private_extern _OPENSSL_armcap_P
|
||||
#endif
|
||||
#endif // !OPENSSL_NO_ASM
|
||||
#endif // !OPENSSL_NO_ASM && defined(OPENSSL_ARM) && defined(__ELF__)
|
||||
@ -1,17 +1,9 @@
|
||||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#if !defined(__has_feature)
|
||||
#define __has_feature(x) 0
|
||||
#endif
|
||||
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
|
||||
#define OPENSSL_NO_ASM
|
||||
#endif
|
||||
#include <openssl/asm_base.h>
|
||||
|
||||
#if !defined(OPENSSL_NO_ASM)
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_ARM) && defined(__ELF__)
|
||||
@ Copyright 2007-2016 The OpenSSL Project Authors. All Rights Reserved.
|
||||
@
|
||||
@ Licensed under the OpenSSL license (the "License"). You may not use
|
||||
@ -71,7 +63,6 @@
|
||||
# define VFP_ABI_PUSH vstmdb sp!,{d8-d15}
|
||||
# define VFP_ABI_POP vldmia sp!,{d8-d15}
|
||||
#else
|
||||
# define __ARM_ARCH__ __LINUX_ARM_ARCH__
|
||||
# define __ARM_MAX_ARCH__ 7
|
||||
# define VFP_ABI_PUSH
|
||||
# define VFP_ABI_POP
|
||||
@ -79,7 +70,7 @@
|
||||
|
||||
@ Silence ARMv8 deprecated IT instruction warnings. This file is used by both
|
||||
@ ARMv7 and ARMv8 processors and does not use ARMv8 instructions.
|
||||
|
||||
.arch armv7-a
|
||||
|
||||
#ifdef __ARMEL__
|
||||
# define LO 0
|
||||
@ -100,7 +91,7 @@
|
||||
.code 32
|
||||
#endif
|
||||
|
||||
|
||||
.type K512,%object
|
||||
.align 5
|
||||
K512:
|
||||
WORD64(0x428a2f98,0xd728ae22, 0x71374491,0x23ef65cd)
|
||||
@ -143,39 +134,15 @@ K512:
|
||||
WORD64(0x3c9ebe0a,0x15c9bebc, 0x431d67c4,0x9c100d4c)
|
||||
WORD64(0x4cc5d4be,0xcb3e42b6, 0x597f299c,0xfc657e2a)
|
||||
WORD64(0x5fcb6fab,0x3ad6faec, 0x6c44198c,0x4a475817)
|
||||
.size K512,.-K512
|
||||
|
||||
#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
|
||||
LOPENSSL_armcap:
|
||||
.word OPENSSL_armcap_P-Lsha512_block_data_order
|
||||
.skip 32-4
|
||||
#else
|
||||
.skip 32
|
||||
#endif
|
||||
|
||||
.globl _sha512_block_data_order
|
||||
.private_extern _sha512_block_data_order
|
||||
#ifdef __thumb2__
|
||||
.thumb_func _sha512_block_data_order
|
||||
#endif
|
||||
_sha512_block_data_order:
|
||||
Lsha512_block_data_order:
|
||||
#if __ARM_ARCH__<7 && !defined(__thumb2__)
|
||||
sub r3,pc,#8 @ _sha512_block_data_order
|
||||
#else
|
||||
adr r3,Lsha512_block_data_order
|
||||
#endif
|
||||
#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
|
||||
ldr r12,LOPENSSL_armcap
|
||||
ldr r12,[r3,r12] @ OPENSSL_armcap_P
|
||||
#ifdef __APPLE__
|
||||
ldr r12,[r12]
|
||||
#endif
|
||||
tst r12,#ARMV7_NEON
|
||||
bne LNEON
|
||||
#endif
|
||||
.globl sha512_block_data_order_nohw
|
||||
.hidden sha512_block_data_order_nohw
|
||||
.type sha512_block_data_order_nohw,%function
|
||||
sha512_block_data_order_nohw:
|
||||
add r2,r1,r2,lsl#7 @ len to point at the end of inp
|
||||
stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr}
|
||||
sub r14,r3,#672 @ K512
|
||||
adr r14,K512
|
||||
sub sp,sp,#9*8
|
||||
|
||||
ldr r7,[r0,#32+LO]
|
||||
@ -184,7 +151,7 @@ Lsha512_block_data_order:
|
||||
ldr r10, [r0,#48+HI]
|
||||
ldr r11, [r0,#56+LO]
|
||||
ldr r12, [r0,#56+HI]
|
||||
Loop:
|
||||
.Loop:
|
||||
str r9, [sp,#48+0]
|
||||
str r10, [sp,#48+4]
|
||||
str r11, [sp,#56+0]
|
||||
@ -208,8 +175,8 @@ Loop:
|
||||
str r3,[sp,#40+0]
|
||||
str r4,[sp,#40+4]
|
||||
|
||||
L00_15:
|
||||
#if __ARM_ARCH__<7
|
||||
.L00_15:
|
||||
#if __ARM_ARCH<7
|
||||
ldrb r3,[r1,#7]
|
||||
ldrb r9, [r1,#6]
|
||||
ldrb r10, [r1,#5]
|
||||
@ -286,7 +253,7 @@ L00_15:
|
||||
teq r9,#148
|
||||
|
||||
ldr r12,[sp,#16+0] @ c.lo
|
||||
#if __ARM_ARCH__>=7
|
||||
#if __ARM_ARCH>=7
|
||||
it eq @ Thumb2 thing, sanity check in ARM
|
||||
#endif
|
||||
orreq r14,r14,#1
|
||||
@ -324,11 +291,11 @@ L00_15:
|
||||
tst r14,#1
|
||||
add r14,r14,#8
|
||||
tst r14,#1
|
||||
beq L00_15
|
||||
beq .L00_15
|
||||
ldr r9,[sp,#184+0]
|
||||
ldr r10,[sp,#184+4]
|
||||
bic r14,r14,#1
|
||||
L16_79:
|
||||
.L16_79:
|
||||
@ sigma0(x) (ROTR((x),1) ^ ROTR((x),8) ^ ((x)>>7))
|
||||
@ LO lo>>1^hi<<31 ^ lo>>8^hi<<24 ^ lo>>7^hi<<25
|
||||
@ HI hi>>1^lo<<31 ^ hi>>8^lo<<24 ^ hi>>7
|
||||
@ -426,7 +393,7 @@ L16_79:
|
||||
teq r9,#23
|
||||
|
||||
ldr r12,[sp,#16+0] @ c.lo
|
||||
#if __ARM_ARCH__>=7
|
||||
#if __ARM_ARCH>=7
|
||||
it eq @ Thumb2 thing, sanity check in ARM
|
||||
#endif
|
||||
orreq r14,r14,#1
|
||||
@ -463,12 +430,12 @@ L16_79:
|
||||
adc r6,r6,r4 @ h += T
|
||||
tst r14,#1
|
||||
add r14,r14,#8
|
||||
#if __ARM_ARCH__>=7
|
||||
#if __ARM_ARCH>=7
|
||||
ittt eq @ Thumb2 thing, sanity check in ARM
|
||||
#endif
|
||||
ldreq r9,[sp,#184+0]
|
||||
ldreq r10,[sp,#184+4]
|
||||
beq L16_79
|
||||
beq .L16_79
|
||||
bic r14,r14,#1
|
||||
|
||||
ldr r3,[sp,#8+0]
|
||||
@ -539,10 +506,10 @@ L16_79:
|
||||
sub r14,r14,#640
|
||||
|
||||
teq r1,r2
|
||||
bne Loop
|
||||
bne .Loop
|
||||
|
||||
add sp,sp,#8*9 @ destroy frame
|
||||
#if __ARM_ARCH__>=5
|
||||
#if __ARM_ARCH>=5
|
||||
ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,pc}
|
||||
#else
|
||||
ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr}
|
||||
@ -550,25 +517,22 @@ L16_79:
|
||||
moveq pc,lr @ be binary compatible with V4, yet
|
||||
.word 0xe12fff1e @ interoperable with Thumb ISA:-)
|
||||
#endif
|
||||
|
||||
.size sha512_block_data_order_nohw,.-sha512_block_data_order_nohw
|
||||
#if __ARM_MAX_ARCH__>=7
|
||||
.arch armv7-a
|
||||
.fpu neon
|
||||
|
||||
|
||||
|
||||
.globl _sha512_block_data_order_neon
|
||||
.private_extern _sha512_block_data_order_neon
|
||||
#ifdef __thumb2__
|
||||
.thumb_func _sha512_block_data_order_neon
|
||||
#endif
|
||||
.globl sha512_block_data_order_neon
|
||||
.hidden sha512_block_data_order_neon
|
||||
.type sha512_block_data_order_neon,%function
|
||||
.align 4
|
||||
_sha512_block_data_order_neon:
|
||||
LNEON:
|
||||
sha512_block_data_order_neon:
|
||||
dmb @ errata #451034 on early Cortex A8
|
||||
add r2,r1,r2,lsl#7 @ len to point at the end of inp
|
||||
adr r3,K512
|
||||
VFP_ABI_PUSH
|
||||
vldmia r0,{d16,d17,d18,d19,d20,d21,d22,d23} @ load context
|
||||
Loop_neon:
|
||||
.Loop_neon:
|
||||
vshr.u64 d24,d20,#14 @ 0
|
||||
#if 0<16
|
||||
vld1.64 {d0},[r1]! @ handles unaligned
|
||||
@ -1162,7 +1126,7 @@ Loop_neon:
|
||||
vadd.i64 d30,d27
|
||||
@ vadd.i64 d16,d30
|
||||
mov r12,#4
|
||||
L16_79_neon:
|
||||
.L16_79_neon:
|
||||
subs r12,#1
|
||||
vshr.u64 q12,q7,#19
|
||||
vshr.u64 q13,q7,#61
|
||||
@ -1868,7 +1832,7 @@ L16_79_neon:
|
||||
vadd.i64 d20,d27
|
||||
vadd.i64 d30,d27
|
||||
@ vadd.i64 d16,d30
|
||||
bne L16_79_neon
|
||||
bne .L16_79_neon
|
||||
|
||||
vadd.i64 d16,d30 @ h+=Maj from the past
|
||||
vldmia r0,{d24,d25,d26,d27,d28,d29,d30,d31} @ load context to temp
|
||||
@ -1879,21 +1843,13 @@ L16_79_neon:
|
||||
vstmia r0,{d16,d17,d18,d19,d20,d21,d22,d23} @ save context
|
||||
teq r1,r2
|
||||
sub r3,#640 @ rewind K512
|
||||
bne Loop_neon
|
||||
bne .Loop_neon
|
||||
|
||||
VFP_ABI_POP
|
||||
bx lr @ .word 0xe12fff1e
|
||||
|
||||
.size sha512_block_data_order_neon,.-sha512_block_data_order_neon
|
||||
#endif
|
||||
.byte 83,72,65,53,49,50,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,65,82,77,118,52,47,78,69,79,78,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
|
||||
.align 2
|
||||
.align 2
|
||||
#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
|
||||
.comm _OPENSSL_armcap_P,4
|
||||
.non_lazy_symbol_pointer
|
||||
OPENSSL_armcap_P:
|
||||
.indirect_symbol _OPENSSL_armcap_P
|
||||
.long 0
|
||||
.private_extern _OPENSSL_armcap_P
|
||||
#endif
|
||||
#endif // !OPENSSL_NO_ASM
|
||||
#endif // !OPENSSL_NO_ASM && defined(OPENSSL_ARM) && defined(__ELF__)
|
||||
@ -1,21 +1,13 @@
|
||||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#if !defined(__has_feature)
|
||||
#define __has_feature(x) 0
|
||||
#endif
|
||||
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
|
||||
#define OPENSSL_NO_ASM
|
||||
#endif
|
||||
#include <openssl/asm_base.h>
|
||||
|
||||
#if !defined(OPENSSL_NO_ASM)
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_ARM) && defined(__ELF__)
|
||||
.syntax unified
|
||||
|
||||
|
||||
|
||||
.arch armv7-a
|
||||
.fpu neon
|
||||
|
||||
#if defined(__thumb2__)
|
||||
.thumb
|
||||
@ -25,20 +17,20 @@
|
||||
|
||||
.text
|
||||
|
||||
|
||||
.type _vpaes_consts,%object
|
||||
.align 7 @ totally strategic alignment
|
||||
_vpaes_consts:
|
||||
Lk_mc_forward:@ mc_forward
|
||||
.Lk_mc_forward:@ mc_forward
|
||||
.quad 0x0407060500030201, 0x0C0F0E0D080B0A09
|
||||
.quad 0x080B0A0904070605, 0x000302010C0F0E0D
|
||||
.quad 0x0C0F0E0D080B0A09, 0x0407060500030201
|
||||
.quad 0x000302010C0F0E0D, 0x080B0A0904070605
|
||||
Lk_mc_backward:@ mc_backward
|
||||
.Lk_mc_backward:@ mc_backward
|
||||
.quad 0x0605040702010003, 0x0E0D0C0F0A09080B
|
||||
.quad 0x020100030E0D0C0F, 0x0A09080B06050407
|
||||
.quad 0x0E0D0C0F0A09080B, 0x0605040702010003
|
||||
.quad 0x0A09080B06050407, 0x020100030E0D0C0F
|
||||
Lk_sr:@ sr
|
||||
.Lk_sr:@ sr
|
||||
.quad 0x0706050403020100, 0x0F0E0D0C0B0A0908
|
||||
.quad 0x030E09040F0A0500, 0x0B06010C07020D08
|
||||
.quad 0x0F060D040B020900, 0x070E050C030A0108
|
||||
@ -47,42 +39,40 @@ Lk_sr:@ sr
|
||||
@
|
||||
@ "Hot" constants
|
||||
@
|
||||
Lk_inv:@ inv, inva
|
||||
.Lk_inv:@ inv, inva
|
||||
.quad 0x0E05060F0D080180, 0x040703090A0B0C02
|
||||
.quad 0x01040A060F0B0780, 0x030D0E0C02050809
|
||||
Lk_ipt:@ input transform (lo, hi)
|
||||
.Lk_ipt:@ input transform (lo, hi)
|
||||
.quad 0xC2B2E8985A2A7000, 0xCABAE09052227808
|
||||
.quad 0x4C01307D317C4D00, 0xCD80B1FCB0FDCC81
|
||||
Lk_sbo:@ sbou, sbot
|
||||
.Lk_sbo:@ sbou, sbot
|
||||
.quad 0xD0D26D176FBDC700, 0x15AABF7AC502A878
|
||||
.quad 0xCFE474A55FBB6A00, 0x8E1E90D1412B35FA
|
||||
Lk_sb1:@ sb1u, sb1t
|
||||
.Lk_sb1:@ sb1u, sb1t
|
||||
.quad 0x3618D415FAE22300, 0x3BF7CCC10D2ED9EF
|
||||
.quad 0xB19BE18FCB503E00, 0xA5DF7A6E142AF544
|
||||
Lk_sb2:@ sb2u, sb2t
|
||||
.Lk_sb2:@ sb2u, sb2t
|
||||
.quad 0x69EB88400AE12900, 0xC2A163C8AB82234A
|
||||
.quad 0xE27A93C60B712400, 0x5EB7E955BC982FCD
|
||||
|
||||
.byte 86,101,99,116,111,114,32,80,101,114,109,117,116,97,116,105,111,110,32,65,69,83,32,102,111,114,32,65,82,77,118,55,32,78,69,79,78,44,32,77,105,107,101,32,72,97,109,98,117,114,103,32,40,83,116,97,110,102,111,114,100,32,85,110,105,118,101,114,115,105,116,121,41,0
|
||||
.align 2
|
||||
|
||||
.size _vpaes_consts,.-_vpaes_consts
|
||||
.align 6
|
||||
@@
|
||||
@@ _aes_preheat
|
||||
@@
|
||||
@@ Fills q9-q15 as specified below.
|
||||
@@
|
||||
#ifdef __thumb2__
|
||||
.thumb_func _vpaes_preheat
|
||||
#endif
|
||||
.type _vpaes_preheat,%function
|
||||
.align 4
|
||||
_vpaes_preheat:
|
||||
adr r10, Lk_inv
|
||||
vmov.i8 q9, #0x0f @ Lk_s0F
|
||||
vld1.64 {q10,q11}, [r10]! @ Lk_inv
|
||||
add r10, r10, #64 @ Skip Lk_ipt, Lk_sbo
|
||||
vld1.64 {q12,q13}, [r10]! @ Lk_sb1
|
||||
vld1.64 {q14,q15}, [r10] @ Lk_sb2
|
||||
adr r10, .Lk_inv
|
||||
vmov.i8 q9, #0x0f @ .Lk_s0F
|
||||
vld1.64 {q10,q11}, [r10]! @ .Lk_inv
|
||||
add r10, r10, #64 @ Skip .Lk_ipt, .Lk_sbo
|
||||
vld1.64 {q12,q13}, [r10]! @ .Lk_sb1
|
||||
vld1.64 {q14,q15}, [r10] @ .Lk_sb2
|
||||
bx lr
|
||||
|
||||
@@
|
||||
@ -100,18 +90,16 @@ _vpaes_preheat:
|
||||
@@ Preserves q6-q8 so you get some local vectors
|
||||
@@
|
||||
@@
|
||||
#ifdef __thumb2__
|
||||
.thumb_func _vpaes_encrypt_core
|
||||
#endif
|
||||
.type _vpaes_encrypt_core,%function
|
||||
.align 4
|
||||
_vpaes_encrypt_core:
|
||||
mov r9, r2
|
||||
ldr r8, [r2,#240] @ pull rounds
|
||||
adr r11, Lk_ipt
|
||||
adr r11, .Lk_ipt
|
||||
@ vmovdqa .Lk_ipt(%rip), %xmm2 # iptlo
|
||||
@ vmovdqa .Lk_ipt+16(%rip), %xmm3 # ipthi
|
||||
vld1.64 {q2, q3}, [r11]
|
||||
adr r11, Lk_mc_forward+16
|
||||
adr r11, .Lk_mc_forward+16
|
||||
vld1.64 {q5}, [r9]! @ vmovdqu (%r9), %xmm5 # round0 key
|
||||
vand q1, q0, q9 @ vpand %xmm9, %xmm0, %xmm1
|
||||
vshr.u8 q0, q0, #4 @ vpsrlb $4, %xmm0, %xmm0
|
||||
@ -125,15 +113,15 @@ _vpaes_encrypt_core:
|
||||
@ .Lenc_entry ends with a bnz instruction which is normally paired with
|
||||
@ subs in .Lenc_loop.
|
||||
tst r8, r8
|
||||
b Lenc_entry
|
||||
b .Lenc_entry
|
||||
|
||||
.align 4
|
||||
Lenc_loop:
|
||||
.Lenc_loop:
|
||||
@ middle of middle round
|
||||
add r10, r11, #0x40
|
||||
vtbl.8 d8, {q13}, d4 @ vpshufb %xmm2, %xmm13, %xmm4 # 4 = sb1u
|
||||
vtbl.8 d9, {q13}, d5
|
||||
vld1.64 {q1}, [r11]! @ vmovdqa -0x40(%r11,%r10), %xmm1 # Lk_mc_forward[]
|
||||
vld1.64 {q1}, [r11]! @ vmovdqa -0x40(%r11,%r10), %xmm1 # .Lk_mc_forward[]
|
||||
vtbl.8 d0, {q12}, d6 @ vpshufb %xmm3, %xmm12, %xmm0 # 0 = sb1t
|
||||
vtbl.8 d1, {q12}, d7
|
||||
veor q4, q4, q5 @ vpxor %xmm5, %xmm4, %xmm4 # 4 = sb1u + k
|
||||
@ -142,7 +130,7 @@ Lenc_loop:
|
||||
veor q0, q0, q4 @ vpxor %xmm4, %xmm0, %xmm0 # 0 = A
|
||||
vtbl.8 d4, {q14}, d6 @ vpshufb %xmm3, %xmm14, %xmm2 # 2 = sb2t
|
||||
vtbl.8 d5, {q14}, d7
|
||||
vld1.64 {q4}, [r10] @ vmovdqa (%r11,%r10), %xmm4 # Lk_mc_backward[]
|
||||
vld1.64 {q4}, [r10] @ vmovdqa (%r11,%r10), %xmm4 # .Lk_mc_backward[]
|
||||
vtbl.8 d6, {q0}, d2 @ vpshufb %xmm1, %xmm0, %xmm3 # 0 = B
|
||||
vtbl.8 d7, {q0}, d3
|
||||
veor q2, q2, q5 @ vpxor %xmm5, %xmm2, %xmm2 # 2 = 2A
|
||||
@ -159,7 +147,7 @@ Lenc_loop:
|
||||
veor q0, q0, q4 @ vpxor %xmm4, %xmm0, %xmm0 # 0 = 2A+3B+C+D
|
||||
subs r8, r8, #1 @ nr--
|
||||
|
||||
Lenc_entry:
|
||||
.Lenc_entry:
|
||||
@ top of round
|
||||
vand q1, q0, q9 @ vpand %xmm0, %xmm9, %xmm1 # 0 = k
|
||||
vshr.u8 q0, q0, #4 @ vpsrlb $4, %xmm0, %xmm0 # 1 = i
|
||||
@ -179,19 +167,19 @@ Lenc_entry:
|
||||
veor q2, q2, q1 @ vpxor %xmm1, %xmm2, %xmm2 # 2 = io
|
||||
veor q3, q3, q0 @ vpxor %xmm0, %xmm3, %xmm3 # 3 = jo
|
||||
vld1.64 {q5}, [r9]! @ vmovdqu (%r9), %xmm5
|
||||
bne Lenc_loop
|
||||
bne .Lenc_loop
|
||||
|
||||
@ middle of last round
|
||||
add r10, r11, #0x80
|
||||
|
||||
adr r11, Lk_sbo
|
||||
adr r11, .Lk_sbo
|
||||
@ Read to q1 instead of q4, so the vtbl.8 instruction below does not
|
||||
@ overlap table and destination registers.
|
||||
vld1.64 {q1}, [r11]! @ vmovdqa -0x60(%r10), %xmm4 # 3 : sbou
|
||||
vld1.64 {q0}, [r11] @ vmovdqa -0x50(%r10), %xmm0 # 0 : sbot Lk_sbo+16
|
||||
vld1.64 {q0}, [r11] @ vmovdqa -0x50(%r10), %xmm0 # 0 : sbot .Lk_sbo+16
|
||||
vtbl.8 d8, {q1}, d4 @ vpshufb %xmm2, %xmm4, %xmm4 # 4 = sbou
|
||||
vtbl.8 d9, {q1}, d5
|
||||
vld1.64 {q1}, [r10] @ vmovdqa 0x40(%r11,%r10), %xmm1 # Lk_sr[]
|
||||
vld1.64 {q1}, [r10] @ vmovdqa 0x40(%r11,%r10), %xmm1 # .Lk_sr[]
|
||||
@ Write to q2 instead of q0 below, to avoid overlapping table and
|
||||
@ destination registers.
|
||||
vtbl.8 d4, {q0}, d6 @ vpshufb %xmm3, %xmm0, %xmm0 # 0 = sb1t
|
||||
@ -202,15 +190,13 @@ Lenc_entry:
|
||||
vtbl.8 d0, {q2}, d2 @ vpshufb %xmm1, %xmm0, %xmm0
|
||||
vtbl.8 d1, {q2}, d3
|
||||
bx lr
|
||||
.size _vpaes_encrypt_core,.-_vpaes_encrypt_core
|
||||
|
||||
|
||||
.globl _vpaes_encrypt
|
||||
.private_extern _vpaes_encrypt
|
||||
#ifdef __thumb2__
|
||||
.thumb_func _vpaes_encrypt
|
||||
#endif
|
||||
.globl vpaes_encrypt
|
||||
.hidden vpaes_encrypt
|
||||
.type vpaes_encrypt,%function
|
||||
.align 4
|
||||
_vpaes_encrypt:
|
||||
vpaes_encrypt:
|
||||
@ _vpaes_encrypt_core uses r8-r11. Round up to r7-r11 to maintain stack
|
||||
@ alignment.
|
||||
stmdb sp!, {r7,r8,r9,r10,r11,lr}
|
||||
@ -224,33 +210,33 @@ _vpaes_encrypt:
|
||||
|
||||
vldmia sp!, {d8,d9,d10,d11}
|
||||
ldmia sp!, {r7,r8,r9,r10,r11, pc} @ return
|
||||
|
||||
.size vpaes_encrypt,.-vpaes_encrypt
|
||||
|
||||
@
|
||||
@ Decryption stuff
|
||||
@
|
||||
|
||||
.type _vpaes_decrypt_consts,%object
|
||||
.align 4
|
||||
_vpaes_decrypt_consts:
|
||||
Lk_dipt:@ decryption input transform
|
||||
.Lk_dipt:@ decryption input transform
|
||||
.quad 0x0F505B040B545F00, 0x154A411E114E451A
|
||||
.quad 0x86E383E660056500, 0x12771772F491F194
|
||||
Lk_dsbo:@ decryption sbox final output
|
||||
.Lk_dsbo:@ decryption sbox final output
|
||||
.quad 0x1387EA537EF94000, 0xC7AA6DB9D4943E2D
|
||||
.quad 0x12D7560F93441D00, 0xCA4B8159D8C58E9C
|
||||
Lk_dsb9:@ decryption sbox output *9*u, *9*t
|
||||
.Lk_dsb9:@ decryption sbox output *9*u, *9*t
|
||||
.quad 0x851C03539A86D600, 0xCAD51F504F994CC9
|
||||
.quad 0xC03B1789ECD74900, 0x725E2C9EB2FBA565
|
||||
Lk_dsbd:@ decryption sbox output *D*u, *D*t
|
||||
.Lk_dsbd:@ decryption sbox output *D*u, *D*t
|
||||
.quad 0x7D57CCDFE6B1A200, 0xF56E9B13882A4439
|
||||
.quad 0x3CE2FAF724C6CB00, 0x2931180D15DEEFD3
|
||||
Lk_dsbb:@ decryption sbox output *B*u, *B*t
|
||||
.Lk_dsbb:@ decryption sbox output *B*u, *B*t
|
||||
.quad 0xD022649296B44200, 0x602646F6B0F2D404
|
||||
.quad 0xC19498A6CD596700, 0xF3FF0C3E3255AA6B
|
||||
Lk_dsbe:@ decryption sbox output *E*u, *E*t
|
||||
.Lk_dsbe:@ decryption sbox output *E*u, *E*t
|
||||
.quad 0x46F2929626D4D000, 0x2242600464B4F6B0
|
||||
.quad 0x0C55A6CDFFAAC100, 0x9467F36B98593E32
|
||||
|
||||
.size _vpaes_decrypt_consts,.-_vpaes_decrypt_consts
|
||||
|
||||
@@
|
||||
@@ Decryption core
|
||||
@ -259,9 +245,7 @@ Lk_dsbe:@ decryption sbox output *E*u, *E*t
|
||||
@@ the values from _vpaes_preheat. q9-q11 must still be set from
|
||||
@@ _vpaes_preheat.
|
||||
@@
|
||||
#ifdef __thumb2__
|
||||
.thumb_func _vpaes_decrypt_core
|
||||
#endif
|
||||
.type _vpaes_decrypt_core,%function
|
||||
.align 4
|
||||
_vpaes_decrypt_core:
|
||||
mov r9, r2
|
||||
@ -275,22 +259,22 @@ _vpaes_decrypt_core:
|
||||
@ q12-q15, registers normally use for preloaded constants. This is fine
|
||||
@ because decryption doesn't use those constants. The values are
|
||||
@ constant, so this does not interfere with potential 2x optimizations.
|
||||
adr r7, Lk_dipt
|
||||
adr r7, .Lk_dipt
|
||||
|
||||
vld1.64 {q12,q13}, [r7] @ vmovdqa Lk_dipt(%rip), %xmm2 # iptlo
|
||||
vld1.64 {q12,q13}, [r7] @ vmovdqa .Lk_dipt(%rip), %xmm2 # iptlo
|
||||
lsl r11, r8, #4 @ mov %rax, %r11; shl $4, %r11
|
||||
eor r11, r11, #0x30 @ xor $0x30, %r11
|
||||
adr r10, Lk_sr
|
||||
adr r10, .Lk_sr
|
||||
and r11, r11, #0x30 @ and $0x30, %r11
|
||||
add r11, r11, r10
|
||||
adr r10, Lk_mc_forward+48
|
||||
adr r10, .Lk_mc_forward+48
|
||||
|
||||
vld1.64 {q4}, [r9]! @ vmovdqu (%r9), %xmm4 # round0 key
|
||||
vand q1, q0, q9 @ vpand %xmm9, %xmm0, %xmm1
|
||||
vshr.u8 q0, q0, #4 @ vpsrlb $4, %xmm0, %xmm0
|
||||
vtbl.8 d4, {q12}, d2 @ vpshufb %xmm1, %xmm2, %xmm2
|
||||
vtbl.8 d5, {q12}, d3
|
||||
vld1.64 {q5}, [r10] @ vmovdqa Lk_mc_forward+48(%rip), %xmm5
|
||||
vld1.64 {q5}, [r10] @ vmovdqa .Lk_mc_forward+48(%rip), %xmm5
|
||||
@ vmovdqa .Lk_dipt+16(%rip), %xmm1 # ipthi
|
||||
vtbl.8 d0, {q13}, d0 @ vpshufb %xmm0, %xmm1, %xmm0
|
||||
vtbl.8 d1, {q13}, d1
|
||||
@ -300,17 +284,17 @@ _vpaes_decrypt_core:
|
||||
@ .Ldec_entry ends with a bnz instruction which is normally paired with
|
||||
@ subs in .Ldec_loop.
|
||||
tst r8, r8
|
||||
b Ldec_entry
|
||||
b .Ldec_entry
|
||||
|
||||
.align 4
|
||||
Ldec_loop:
|
||||
.Ldec_loop:
|
||||
@
|
||||
@ Inverse mix columns
|
||||
@
|
||||
|
||||
@ We load .Lk_dsb* into q12-q15 on-demand. See the comment at the top of
|
||||
@ the function.
|
||||
adr r10, Lk_dsb9
|
||||
adr r10, .Lk_dsb9
|
||||
vld1.64 {q12,q13}, [r10]! @ vmovdqa -0x20(%r10),%xmm4 # 4 : sb9u
|
||||
@ vmovdqa -0x10(%r10),%xmm1 # 0 : sb9t
|
||||
@ Load sbd* ahead of time.
|
||||
@ -378,7 +362,7 @@ Ldec_loop:
|
||||
veor q0, q0, q1 @ vpxor %xmm1, %xmm0, %xmm0 # 0 = ch
|
||||
subs r8, r8, #1 @ sub $1,%rax # nr--
|
||||
|
||||
Ldec_entry:
|
||||
.Ldec_entry:
|
||||
@ top of round
|
||||
vand q1, q0, q9 @ vpand %xmm9, %xmm0, %xmm1 # 0 = k
|
||||
vshr.u8 q0, q0, #4 @ vpsrlb $4, %xmm0, %xmm0 # 1 = i
|
||||
@ -398,11 +382,11 @@ Ldec_entry:
|
||||
veor q2, q2, q1 @ vpxor %xmm1, %xmm2, %xmm2 # 2 = io
|
||||
veor q3, q3, q0 @ vpxor %xmm0, %xmm3, %xmm3 # 3 = jo
|
||||
vld1.64 {q0}, [r9]! @ vmovdqu (%r9), %xmm0
|
||||
bne Ldec_loop
|
||||
bne .Ldec_loop
|
||||
|
||||
@ middle of last round
|
||||
|
||||
adr r10, Lk_dsbo
|
||||
adr r10, .Lk_dsbo
|
||||
|
||||
@ Write to q1 rather than q4 to avoid overlapping table and destination.
|
||||
vld1.64 {q1}, [r10]! @ vmovdqa 0x60(%r10), %xmm4 # 3 : sbou
|
||||
@ -412,7 +396,7 @@ Ldec_entry:
|
||||
vld1.64 {q2}, [r10] @ vmovdqa 0x70(%r10), %xmm1 # 0 : sbot
|
||||
vtbl.8 d2, {q2}, d6 @ vpshufb %xmm3, %xmm1, %xmm1 # 0 = sb1t
|
||||
vtbl.8 d3, {q2}, d7
|
||||
vld1.64 {q2}, [r11] @ vmovdqa -0x160(%r11), %xmm2 # Lk_sr-Lk_dsbd=-0x160
|
||||
vld1.64 {q2}, [r11] @ vmovdqa -0x160(%r11), %xmm2 # .Lk_sr-.Lk_dsbd=-0x160
|
||||
veor q4, q4, q0 @ vpxor %xmm0, %xmm4, %xmm4 # 4 = sb1u + k
|
||||
@ Write to q1 rather than q0 so the table and destination registers
|
||||
@ below do not overlap.
|
||||
@ -420,15 +404,13 @@ Ldec_entry:
|
||||
vtbl.8 d0, {q1}, d4 @ vpshufb %xmm2, %xmm0, %xmm0
|
||||
vtbl.8 d1, {q1}, d5
|
||||
bx lr
|
||||
.size _vpaes_decrypt_core,.-_vpaes_decrypt_core
|
||||
|
||||
|
||||
.globl _vpaes_decrypt
|
||||
.private_extern _vpaes_decrypt
|
||||
#ifdef __thumb2__
|
||||
.thumb_func _vpaes_decrypt
|
||||
#endif
|
||||
.globl vpaes_decrypt
|
||||
.hidden vpaes_decrypt
|
||||
.type vpaes_decrypt,%function
|
||||
.align 4
|
||||
_vpaes_decrypt:
|
||||
vpaes_decrypt:
|
||||
@ _vpaes_decrypt_core uses r7-r11.
|
||||
stmdb sp!, {r7,r8,r9,r10,r11,lr}
|
||||
@ _vpaes_decrypt_core uses q4-q5 (d8-d11), which are callee-saved.
|
||||
@ -441,7 +423,7 @@ _vpaes_decrypt:
|
||||
|
||||
vldmia sp!, {d8,d9,d10,d11}
|
||||
ldmia sp!, {r7,r8,r9,r10,r11, pc} @ return
|
||||
|
||||
.size vpaes_decrypt,.-vpaes_decrypt
|
||||
@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
|
||||
@@ @@
|
||||
@@ AES key schedule @@
|
||||
@ -460,50 +442,46 @@ _vpaes_decrypt:
|
||||
@
|
||||
@ Key schedule constants
|
||||
@
|
||||
|
||||
.type _vpaes_key_consts,%object
|
||||
.align 4
|
||||
_vpaes_key_consts:
|
||||
Lk_dksd:@ decryption key schedule: invskew x*D
|
||||
.Lk_dksd:@ decryption key schedule: invskew x*D
|
||||
.quad 0xFEB91A5DA3E44700, 0x0740E3A45A1DBEF9
|
||||
.quad 0x41C277F4B5368300, 0x5FDC69EAAB289D1E
|
||||
Lk_dksb:@ decryption key schedule: invskew x*B
|
||||
.Lk_dksb:@ decryption key schedule: invskew x*B
|
||||
.quad 0x9A4FCA1F8550D500, 0x03D653861CC94C99
|
||||
.quad 0x115BEDA7B6FC4A00, 0xD993256F7E3482C8
|
||||
Lk_dkse:@ decryption key schedule: invskew x*E + 0x63
|
||||
.Lk_dkse:@ decryption key schedule: invskew x*E + 0x63
|
||||
.quad 0xD5031CCA1FC9D600, 0x53859A4C994F5086
|
||||
.quad 0xA23196054FDC7BE8, 0xCD5EF96A20B31487
|
||||
Lk_dks9:@ decryption key schedule: invskew x*9
|
||||
.Lk_dks9:@ decryption key schedule: invskew x*9
|
||||
.quad 0xB6116FC87ED9A700, 0x4AED933482255BFC
|
||||
.quad 0x4576516227143300, 0x8BB89FACE9DAFDCE
|
||||
|
||||
Lk_rcon:@ rcon
|
||||
.Lk_rcon:@ rcon
|
||||
.quad 0x1F8391B9AF9DEEB6, 0x702A98084D7C7D81
|
||||
|
||||
Lk_opt:@ output transform
|
||||
.Lk_opt:@ output transform
|
||||
.quad 0xFF9F4929D6B66000, 0xF7974121DEBE6808
|
||||
.quad 0x01EDBD5150BCEC00, 0xE10D5DB1B05C0CE0
|
||||
Lk_deskew:@ deskew tables: inverts the sbox's "skew"
|
||||
.Lk_deskew:@ deskew tables: inverts the sbox's "skew"
|
||||
.quad 0x07E4A34047A4E300, 0x1DFEB95A5DBEF91A
|
||||
.quad 0x5F36B5DC83EA6900, 0x2841C2ABF49D1E77
|
||||
.size _vpaes_key_consts,.-_vpaes_key_consts
|
||||
|
||||
|
||||
#ifdef __thumb2__
|
||||
.thumb_func _vpaes_key_preheat
|
||||
#endif
|
||||
.type _vpaes_key_preheat,%function
|
||||
.align 4
|
||||
_vpaes_key_preheat:
|
||||
adr r11, Lk_rcon
|
||||
vmov.i8 q12, #0x5b @ Lk_s63
|
||||
adr r10, Lk_inv @ Must be aligned to 8 mod 16.
|
||||
vmov.i8 q9, #0x0f @ Lk_s0F
|
||||
vld1.64 {q10,q11}, [r10] @ Lk_inv
|
||||
vld1.64 {q8}, [r11] @ Lk_rcon
|
||||
adr r11, .Lk_rcon
|
||||
vmov.i8 q12, #0x5b @ .Lk_s63
|
||||
adr r10, .Lk_inv @ Must be aligned to 8 mod 16.
|
||||
vmov.i8 q9, #0x0f @ .Lk_s0F
|
||||
vld1.64 {q10,q11}, [r10] @ .Lk_inv
|
||||
vld1.64 {q8}, [r11] @ .Lk_rcon
|
||||
bx lr
|
||||
.size _vpaes_key_preheat,.-_vpaes_key_preheat
|
||||
|
||||
|
||||
#ifdef __thumb2__
|
||||
.thumb_func _vpaes_schedule_core
|
||||
#endif
|
||||
.type _vpaes_schedule_core,%function
|
||||
.align 4
|
||||
_vpaes_schedule_core:
|
||||
@ We only need to save lr, but ARM requires an 8-byte stack alignment,
|
||||
@ -512,7 +490,7 @@ _vpaes_schedule_core:
|
||||
|
||||
bl _vpaes_key_preheat @ load the tables
|
||||
|
||||
adr r11, Lk_ipt @ Must be aligned to 8 mod 16.
|
||||
adr r11, .Lk_ipt @ Must be aligned to 8 mod 16.
|
||||
vld1.64 {q0}, [r0]! @ vmovdqu (%rdi), %xmm0 # load key (unaligned)
|
||||
|
||||
@ input transform
|
||||
@ -520,18 +498,18 @@ _vpaes_schedule_core:
|
||||
@ overlap table and destination.
|
||||
vmov q4, q0 @ vmovdqa %xmm0, %xmm3
|
||||
bl _vpaes_schedule_transform
|
||||
adr r10, Lk_sr @ Must be aligned to 8 mod 16.
|
||||
adr r10, .Lk_sr @ Must be aligned to 8 mod 16.
|
||||
vmov q7, q0 @ vmovdqa %xmm0, %xmm7
|
||||
|
||||
add r8, r8, r10
|
||||
tst r3, r3
|
||||
bne Lschedule_am_decrypting
|
||||
bne .Lschedule_am_decrypting
|
||||
|
||||
@ encrypting, output zeroth round key after transform
|
||||
vst1.64 {q0}, [r2] @ vmovdqu %xmm0, (%rdx)
|
||||
b Lschedule_go
|
||||
b .Lschedule_go
|
||||
|
||||
Lschedule_am_decrypting:
|
||||
.Lschedule_am_decrypting:
|
||||
@ decrypting, output zeroth round key after shiftrows
|
||||
vld1.64 {q1}, [r8] @ vmovdqa (%r8,%r10), %xmm1
|
||||
vtbl.8 d6, {q4}, d2 @ vpshufb %xmm1, %xmm3, %xmm3
|
||||
@ -539,10 +517,10 @@ Lschedule_am_decrypting:
|
||||
vst1.64 {q3}, [r2] @ vmovdqu %xmm3, (%rdx)
|
||||
eor r8, r8, #0x30 @ xor $0x30, %r8
|
||||
|
||||
Lschedule_go:
|
||||
.Lschedule_go:
|
||||
cmp r1, #192 @ cmp $192, %esi
|
||||
bhi Lschedule_256
|
||||
beq Lschedule_192
|
||||
bhi .Lschedule_256
|
||||
beq .Lschedule_192
|
||||
@ 128: fall though
|
||||
|
||||
@@
|
||||
@ -553,15 +531,15 @@ Lschedule_go:
|
||||
@@ This schedule is really simple, because all its parts
|
||||
@@ are accomplished by the subroutines.
|
||||
@@
|
||||
Lschedule_128:
|
||||
.Lschedule_128:
|
||||
mov r0, #10 @ mov $10, %esi
|
||||
|
||||
Loop_schedule_128:
|
||||
.Loop_schedule_128:
|
||||
bl _vpaes_schedule_round
|
||||
subs r0, r0, #1 @ dec %esi
|
||||
beq Lschedule_mangle_last
|
||||
beq .Lschedule_mangle_last
|
||||
bl _vpaes_schedule_mangle @ write output
|
||||
b Loop_schedule_128
|
||||
b .Loop_schedule_128
|
||||
|
||||
@@
|
||||
@@ .aes_schedule_192
|
||||
@ -579,7 +557,7 @@ Loop_schedule_128:
|
||||
@@ keys.
|
||||
@@
|
||||
.align 4
|
||||
Lschedule_192:
|
||||
.Lschedule_192:
|
||||
sub r0, r0, #8
|
||||
vld1.64 {q0}, [r0] @ vmovdqu 8(%rdi),%xmm0 # load key part 2 (very unaligned)
|
||||
bl _vpaes_schedule_transform @ input transform
|
||||
@ -588,7 +566,7 @@ Lschedule_192:
|
||||
@ vmovhlps %xmm4, %xmm6, %xmm6 # clobber low side with zeros
|
||||
mov r0, #4 @ mov $4, %esi
|
||||
|
||||
Loop_schedule_192:
|
||||
.Loop_schedule_192:
|
||||
bl _vpaes_schedule_round
|
||||
vext.8 q0, q6, q0, #8 @ vpalignr $8,%xmm6,%xmm0,%xmm0
|
||||
bl _vpaes_schedule_mangle @ save key n
|
||||
@ -596,10 +574,10 @@ Loop_schedule_192:
|
||||
bl _vpaes_schedule_mangle @ save key n+1
|
||||
bl _vpaes_schedule_round
|
||||
subs r0, r0, #1 @ dec %esi
|
||||
beq Lschedule_mangle_last
|
||||
beq .Lschedule_mangle_last
|
||||
bl _vpaes_schedule_mangle @ save key n+2
|
||||
bl _vpaes_schedule_192_smear
|
||||
b Loop_schedule_192
|
||||
b .Loop_schedule_192
|
||||
|
||||
@@
|
||||
@@ .aes_schedule_256
|
||||
@ -612,19 +590,19 @@ Loop_schedule_192:
|
||||
@@ high side's, except no rcon and no rotation.
|
||||
@@
|
||||
.align 4
|
||||
Lschedule_256:
|
||||
.Lschedule_256:
|
||||
vld1.64 {q0}, [r0] @ vmovdqu 16(%rdi),%xmm0 # load key part 2 (unaligned)
|
||||
bl _vpaes_schedule_transform @ input transform
|
||||
mov r0, #7 @ mov $7, %esi
|
||||
|
||||
Loop_schedule_256:
|
||||
.Loop_schedule_256:
|
||||
bl _vpaes_schedule_mangle @ output low result
|
||||
vmov q6, q0 @ vmovdqa %xmm0, %xmm6 # save cur_lo in xmm6
|
||||
|
||||
@ high round
|
||||
bl _vpaes_schedule_round
|
||||
subs r0, r0, #1 @ dec %esi
|
||||
beq Lschedule_mangle_last
|
||||
beq .Lschedule_mangle_last
|
||||
bl _vpaes_schedule_mangle
|
||||
|
||||
@ low round. swap xmm7 and xmm6
|
||||
@ -635,7 +613,7 @@ Loop_schedule_256:
|
||||
bl _vpaes_schedule_low_round
|
||||
vmov q7, q5 @ vmovdqa %xmm5, %xmm7
|
||||
|
||||
b Loop_schedule_256
|
||||
b .Loop_schedule_256
|
||||
|
||||
@@
|
||||
@@ .aes_schedule_mangle_last
|
||||
@ -648,23 +626,23 @@ Loop_schedule_256:
|
||||
@@ Always called right before return... jumps to cleanup and exits
|
||||
@@
|
||||
.align 4
|
||||
Lschedule_mangle_last:
|
||||
.Lschedule_mangle_last:
|
||||
@ schedule last round key from xmm0
|
||||
adr r11, Lk_deskew @ lea Lk_deskew(%rip),%r11 # prepare to deskew
|
||||
adr r11, .Lk_deskew @ lea .Lk_deskew(%rip),%r11 # prepare to deskew
|
||||
tst r3, r3
|
||||
bne Lschedule_mangle_last_dec
|
||||
bne .Lschedule_mangle_last_dec
|
||||
|
||||
@ encrypting
|
||||
vld1.64 {q1}, [r8] @ vmovdqa (%r8,%r10),%xmm1
|
||||
adr r11, Lk_opt @ lea Lk_opt(%rip), %r11 # prepare to output transform
|
||||
adr r11, .Lk_opt @ lea .Lk_opt(%rip), %r11 # prepare to output transform
|
||||
add r2, r2, #32 @ add $32, %rdx
|
||||
vmov q2, q0
|
||||
vtbl.8 d0, {q2}, d2 @ vpshufb %xmm1, %xmm0, %xmm0 # output permute
|
||||
vtbl.8 d1, {q2}, d3
|
||||
|
||||
Lschedule_mangle_last_dec:
|
||||
.Lschedule_mangle_last_dec:
|
||||
sub r2, r2, #16 @ add $-16, %rdx
|
||||
veor q0, q0, q12 @ vpxor Lk_s63(%rip), %xmm0, %xmm0
|
||||
veor q0, q0, q12 @ vpxor .Lk_s63(%rip), %xmm0, %xmm0
|
||||
bl _vpaes_schedule_transform @ output transform
|
||||
vst1.64 {q0}, [r2] @ vmovdqu %xmm0, (%rdx) # save last key
|
||||
|
||||
@ -678,7 +656,7 @@ Lschedule_mangle_last_dec:
|
||||
veor q6, q6, q6 @ vpxor %xmm6, %xmm6, %xmm6
|
||||
veor q7, q7, q7 @ vpxor %xmm7, %xmm7, %xmm7
|
||||
ldmia sp!, {r3,pc} @ return
|
||||
|
||||
.size _vpaes_schedule_core,.-_vpaes_schedule_core
|
||||
|
||||
@@
|
||||
@@ .aes_schedule_192_smear
|
||||
@ -693,9 +671,7 @@ Lschedule_mangle_last_dec:
|
||||
@@ q6: b+c+d b+c 0 0
|
||||
@@ q0: b+c+d b+c b a
|
||||
@@
|
||||
#ifdef __thumb2__
|
||||
.thumb_func _vpaes_schedule_192_smear
|
||||
#endif
|
||||
.type _vpaes_schedule_192_smear,%function
|
||||
.align 4
|
||||
_vpaes_schedule_192_smear:
|
||||
vmov.i8 q1, #0
|
||||
@ -708,7 +684,7 @@ _vpaes_schedule_192_smear:
|
||||
vmov q0, q6 @ vmovdqa %xmm6, %xmm0
|
||||
vmov d12, d2 @ vmovhlps %xmm1, %xmm6, %xmm6 # clobber low side with zeros
|
||||
bx lr
|
||||
|
||||
.size _vpaes_schedule_192_smear,.-_vpaes_schedule_192_smear
|
||||
|
||||
@@
|
||||
@@ .aes_schedule_round
|
||||
@ -728,9 +704,7 @@ _vpaes_schedule_192_smear:
|
||||
@@ Returns results in q7 = q0.
|
||||
@@ Clobbers q1-q4, r11.
|
||||
@@
|
||||
#ifdef __thumb2__
|
||||
.thumb_func _vpaes_schedule_round
|
||||
#endif
|
||||
.type _vpaes_schedule_round,%function
|
||||
.align 4
|
||||
_vpaes_schedule_round:
|
||||
@ extract rcon from xmm8
|
||||
@ -749,7 +723,7 @@ _vpaes_schedule_round:
|
||||
_vpaes_schedule_low_round:
|
||||
@ The x86_64 version pins .Lk_sb1 in %xmm13 and .Lk_sb1+16 in %xmm12.
|
||||
@ We pin other values in _vpaes_key_preheat, so load them now.
|
||||
adr r11, Lk_sb1
|
||||
adr r11, .Lk_sb1
|
||||
vld1.64 {q14,q15}, [r11]
|
||||
|
||||
@ smear xmm7
|
||||
@ -769,7 +743,7 @@ _vpaes_schedule_low_round:
|
||||
veor q3, q3, q2 @ vpxor %xmm2, %xmm3, %xmm3 # 3 = iak = 1/i + a/k
|
||||
vtbl.8 d8, {q10}, d2 @ vpshufb %xmm1, %xmm10, %xmm4 # 4 = 1/j
|
||||
vtbl.8 d9, {q10}, d3
|
||||
veor q7, q7, q12 @ vpxor Lk_s63(%rip), %xmm7, %xmm7
|
||||
veor q7, q7, q12 @ vpxor .Lk_s63(%rip), %xmm7, %xmm7
|
||||
vtbl.8 d6, {q10}, d6 @ vpshufb %xmm3, %xmm10, %xmm3 # 2 = 1/iak
|
||||
vtbl.8 d7, {q10}, d7
|
||||
veor q4, q4, q2 @ vpxor %xmm2, %xmm4, %xmm4 # 4 = jak = 1/j + a/k
|
||||
@ -787,7 +761,7 @@ _vpaes_schedule_low_round:
|
||||
veor q0, q1, q7 @ vpxor %xmm7, %xmm1, %xmm0
|
||||
veor q7, q1, q7 @ vmovdqa %xmm0, %xmm7
|
||||
bx lr
|
||||
|
||||
.size _vpaes_schedule_round,.-_vpaes_schedule_round
|
||||
|
||||
@@
|
||||
@@ .aes_schedule_transform
|
||||
@ -798,9 +772,7 @@ _vpaes_schedule_low_round:
|
||||
@@ Output in q0
|
||||
@@ Clobbers q1, q2, q14, q15
|
||||
@@
|
||||
#ifdef __thumb2__
|
||||
.thumb_func _vpaes_schedule_transform
|
||||
#endif
|
||||
.type _vpaes_schedule_transform,%function
|
||||
.align 4
|
||||
_vpaes_schedule_transform:
|
||||
vld1.64 {q14,q15}, [r11] @ vmovdqa (%r11), %xmm2 # lo
|
||||
@ -813,7 +785,7 @@ _vpaes_schedule_transform:
|
||||
vtbl.8 d1, {q15}, d1
|
||||
veor q0, q0, q2 @ vpxor %xmm2, %xmm0, %xmm0
|
||||
bx lr
|
||||
|
||||
.size _vpaes_schedule_transform,.-_vpaes_schedule_transform
|
||||
|
||||
@@
|
||||
@@ .aes_schedule_mangle
|
||||
@ -838,20 +810,18 @@ _vpaes_schedule_transform:
|
||||
@@ Preserves q0
|
||||
@@ Clobbers q1-q5
|
||||
@@
|
||||
#ifdef __thumb2__
|
||||
.thumb_func _vpaes_schedule_mangle
|
||||
#endif
|
||||
.type _vpaes_schedule_mangle,%function
|
||||
.align 4
|
||||
_vpaes_schedule_mangle:
|
||||
tst r3, r3
|
||||
vmov q4, q0 @ vmovdqa %xmm0, %xmm4 # save xmm0 for later
|
||||
adr r11, Lk_mc_forward @ Must be aligned to 8 mod 16.
|
||||
vld1.64 {q5}, [r11] @ vmovdqa Lk_mc_forward(%rip),%xmm5
|
||||
bne Lschedule_mangle_dec
|
||||
adr r11, .Lk_mc_forward @ Must be aligned to 8 mod 16.
|
||||
vld1.64 {q5}, [r11] @ vmovdqa .Lk_mc_forward(%rip),%xmm5
|
||||
bne .Lschedule_mangle_dec
|
||||
|
||||
@ encrypting
|
||||
@ Write to q2 so we do not overlap table and destination below.
|
||||
veor q2, q0, q12 @ vpxor Lk_s63(%rip), %xmm0, %xmm4
|
||||
veor q2, q0, q12 @ vpxor .Lk_s63(%rip), %xmm0, %xmm4
|
||||
add r2, r2, #16 @ add $16, %rdx
|
||||
vtbl.8 d8, {q2}, d10 @ vpshufb %xmm5, %xmm4, %xmm4
|
||||
vtbl.8 d9, {q2}, d11
|
||||
@ -863,11 +833,11 @@ _vpaes_schedule_mangle:
|
||||
vld1.64 {q1}, [r8] @ vmovdqa (%r8,%r10), %xmm1
|
||||
veor q3, q3, q4 @ vpxor %xmm4, %xmm3, %xmm3
|
||||
|
||||
b Lschedule_mangle_both
|
||||
b .Lschedule_mangle_both
|
||||
.align 4
|
||||
Lschedule_mangle_dec:
|
||||
.Lschedule_mangle_dec:
|
||||
@ inverse mix columns
|
||||
adr r11, Lk_dksd @ lea Lk_dksd(%rip),%r11
|
||||
adr r11, .Lk_dksd @ lea .Lk_dksd(%rip),%r11
|
||||
vshr.u8 q1, q4, #4 @ vpsrlb $4, %xmm4, %xmm1 # 1 = hi
|
||||
vand q4, q4, q9 @ vpand %xmm9, %xmm4, %xmm4 # 4 = lo
|
||||
|
||||
@ -921,7 +891,7 @@ Lschedule_mangle_dec:
|
||||
|
||||
sub r2, r2, #16 @ add $-16, %rdx
|
||||
|
||||
Lschedule_mangle_both:
|
||||
.Lschedule_mangle_both:
|
||||
@ Write to q2 so table and destination do not overlap.
|
||||
vtbl.8 d4, {q3}, d2 @ vpshufb %xmm1, %xmm3, %xmm3
|
||||
vtbl.8 d5, {q3}, d3
|
||||
@ -929,15 +899,13 @@ Lschedule_mangle_both:
|
||||
and r8, r8, #~(1<<6) @ and $0x30, %r8
|
||||
vst1.64 {q2}, [r2] @ vmovdqu %xmm3, (%rdx)
|
||||
bx lr
|
||||
.size _vpaes_schedule_mangle,.-_vpaes_schedule_mangle
|
||||
|
||||
|
||||
.globl _vpaes_set_encrypt_key
|
||||
.private_extern _vpaes_set_encrypt_key
|
||||
#ifdef __thumb2__
|
||||
.thumb_func _vpaes_set_encrypt_key
|
||||
#endif
|
||||
.globl vpaes_set_encrypt_key
|
||||
.hidden vpaes_set_encrypt_key
|
||||
.type vpaes_set_encrypt_key,%function
|
||||
.align 4
|
||||
_vpaes_set_encrypt_key:
|
||||
vpaes_set_encrypt_key:
|
||||
stmdb sp!, {r7,r8,r9,r10,r11, lr}
|
||||
vstmdb sp!, {d8,d9,d10,d11,d12,d13,d14,d15}
|
||||
|
||||
@ -952,15 +920,13 @@ _vpaes_set_encrypt_key:
|
||||
|
||||
vldmia sp!, {d8,d9,d10,d11,d12,d13,d14,d15}
|
||||
ldmia sp!, {r7,r8,r9,r10,r11, pc} @ return
|
||||
.size vpaes_set_encrypt_key,.-vpaes_set_encrypt_key
|
||||
|
||||
|
||||
.globl _vpaes_set_decrypt_key
|
||||
.private_extern _vpaes_set_decrypt_key
|
||||
#ifdef __thumb2__
|
||||
.thumb_func _vpaes_set_decrypt_key
|
||||
#endif
|
||||
.globl vpaes_set_decrypt_key
|
||||
.hidden vpaes_set_decrypt_key
|
||||
.type vpaes_set_decrypt_key,%function
|
||||
.align 4
|
||||
_vpaes_set_decrypt_key:
|
||||
vpaes_set_decrypt_key:
|
||||
stmdb sp!, {r7,r8,r9,r10,r11, lr}
|
||||
vstmdb sp!, {d8,d9,d10,d11,d12,d13,d14,d15}
|
||||
|
||||
@ -979,10 +945,10 @@ _vpaes_set_decrypt_key:
|
||||
|
||||
vldmia sp!, {d8,d9,d10,d11,d12,d13,d14,d15}
|
||||
ldmia sp!, {r7,r8,r9,r10,r11, pc} @ return
|
||||
|
||||
.size vpaes_set_decrypt_key,.-vpaes_set_decrypt_key
|
||||
|
||||
@ Additional constants for converting to bsaes.
|
||||
|
||||
.type _vpaes_convert_consts,%object
|
||||
.align 4
|
||||
_vpaes_convert_consts:
|
||||
@ .Lk_opt_then_skew applies skew(opt(x)) XOR 0x63, where skew is the linear
|
||||
@ -1016,25 +982,23 @@ _vpaes_convert_consts:
|
||||
@ table[1] |= skew(opt(i<<4)) << (i*8)
|
||||
@ print(" .quad 0x%016x, 0x%016x" % u128_to_u64s(table[0]))
|
||||
@ print(" .quad 0x%016x, 0x%016x" % u128_to_u64s(table[1]))
|
||||
Lk_opt_then_skew:
|
||||
.Lk_opt_then_skew:
|
||||
.quad 0x9cb8436798bc4763, 0x6440bb9f6044bf9b
|
||||
.quad 0x1f30062936192f00, 0xb49bad829db284ab
|
||||
|
||||
@ .Lk_decrypt_transform is a permutation which performs an 8-bit left-rotation
|
||||
@ followed by a byte-swap on each 32-bit word of a vector. E.g., 0x11223344
|
||||
@ becomes 0x22334411 and then 0x11443322.
|
||||
Lk_decrypt_transform:
|
||||
.Lk_decrypt_transform:
|
||||
.quad 0x0704050603000102, 0x0f0c0d0e0b08090a
|
||||
|
||||
.size _vpaes_convert_consts,.-_vpaes_convert_consts
|
||||
|
||||
@ void vpaes_encrypt_key_to_bsaes(AES_KEY *bsaes, const AES_KEY *vpaes);
|
||||
.globl _vpaes_encrypt_key_to_bsaes
|
||||
.private_extern _vpaes_encrypt_key_to_bsaes
|
||||
#ifdef __thumb2__
|
||||
.thumb_func _vpaes_encrypt_key_to_bsaes
|
||||
#endif
|
||||
.globl vpaes_encrypt_key_to_bsaes
|
||||
.hidden vpaes_encrypt_key_to_bsaes
|
||||
.type vpaes_encrypt_key_to_bsaes,%function
|
||||
.align 4
|
||||
_vpaes_encrypt_key_to_bsaes:
|
||||
vpaes_encrypt_key_to_bsaes:
|
||||
stmdb sp!, {r11, lr}
|
||||
|
||||
@ See _vpaes_schedule_core for the key schedule logic. In particular,
|
||||
@ -1050,13 +1014,13 @@ _vpaes_encrypt_key_to_bsaes:
|
||||
@ cost of extra REV and VREV32 operations in little-endian ARM.
|
||||
|
||||
vmov.i8 q9, #0x0f @ Required by _vpaes_schedule_transform
|
||||
adr r2, Lk_mc_forward @ Must be aligned to 8 mod 16.
|
||||
add r3, r2, 0x90 @ Lk_sr+0x10-Lk_mc_forward = 0x90 (Apple's toolchain doesn't support the expression)
|
||||
adr r2, .Lk_mc_forward @ Must be aligned to 8 mod 16.
|
||||
add r3, r2, 0x90 @ .Lk_sr+0x10-.Lk_mc_forward = 0x90 (Apple's toolchain doesn't support the expression)
|
||||
|
||||
vld1.64 {q12}, [r2]
|
||||
vmov.i8 q10, #0x5b @ Lk_s63 from vpaes-x86_64
|
||||
adr r11, Lk_opt @ Must be aligned to 8 mod 16.
|
||||
vmov.i8 q11, #0x63 @ LK_s63 without Lk_ipt applied
|
||||
vmov.i8 q10, #0x5b @ .Lk_s63 from vpaes-x86_64
|
||||
adr r11, .Lk_opt @ Must be aligned to 8 mod 16.
|
||||
vmov.i8 q11, #0x63 @ .LK_s63 without .Lk_ipt applied
|
||||
|
||||
@ vpaes stores one fewer round count than bsaes, but the number of keys
|
||||
@ is the same.
|
||||
@ -1074,7 +1038,7 @@ _vpaes_encrypt_key_to_bsaes:
|
||||
@ The middle keys have _vpaes_schedule_transform(.Lk_ipt) applied,
|
||||
@ followed by _vpaes_schedule_mangle. _vpaes_schedule_mangle XORs 0x63,
|
||||
@ multiplies by the circulant 0,1,1,1, then applies ShiftRows.
|
||||
Loop_enc_key_to_bsaes:
|
||||
.Loop_enc_key_to_bsaes:
|
||||
vld1.64 {q0}, [r1]!
|
||||
|
||||
@ Invert the ShiftRows step (see .Lschedule_mangle_both). Note we cycle
|
||||
@ -1089,7 +1053,7 @@ Loop_enc_key_to_bsaes:
|
||||
|
||||
@ Handle the last key differently.
|
||||
subs r2, r2, #1
|
||||
beq Loop_enc_key_to_bsaes_last
|
||||
beq .Loop_enc_key_to_bsaes_last
|
||||
|
||||
@ Multiply by the circulant. This is its own inverse.
|
||||
vtbl.8 d2, {q0}, d24
|
||||
@ -1107,9 +1071,9 @@ Loop_enc_key_to_bsaes:
|
||||
bl _vpaes_schedule_transform
|
||||
vrev32.8 q0, q0
|
||||
vst1.64 {q0}, [r0]!
|
||||
b Loop_enc_key_to_bsaes
|
||||
b .Loop_enc_key_to_bsaes
|
||||
|
||||
Loop_enc_key_to_bsaes_last:
|
||||
.Loop_enc_key_to_bsaes_last:
|
||||
@ The final key does not have a basis transform (note
|
||||
@ .Lschedule_mangle_last inverts the original transform). It only XORs
|
||||
@ 0x63 and applies ShiftRows. The latter was already inverted in the
|
||||
@ -1125,16 +1089,14 @@ Loop_enc_key_to_bsaes_last:
|
||||
veor q2, q2, q2
|
||||
|
||||
ldmia sp!, {r11, pc} @ return
|
||||
|
||||
.size vpaes_encrypt_key_to_bsaes,.-vpaes_encrypt_key_to_bsaes
|
||||
|
||||
@ void vpaes_decrypt_key_to_bsaes(AES_KEY *vpaes, const AES_KEY *bsaes);
|
||||
.globl _vpaes_decrypt_key_to_bsaes
|
||||
.private_extern _vpaes_decrypt_key_to_bsaes
|
||||
#ifdef __thumb2__
|
||||
.thumb_func _vpaes_decrypt_key_to_bsaes
|
||||
#endif
|
||||
.globl vpaes_decrypt_key_to_bsaes
|
||||
.hidden vpaes_decrypt_key_to_bsaes
|
||||
.type vpaes_decrypt_key_to_bsaes,%function
|
||||
.align 4
|
||||
_vpaes_decrypt_key_to_bsaes:
|
||||
vpaes_decrypt_key_to_bsaes:
|
||||
stmdb sp!, {r11, lr}
|
||||
|
||||
@ See _vpaes_schedule_core for the key schedule logic. Note vpaes
|
||||
@ -1151,9 +1113,9 @@ _vpaes_decrypt_key_to_bsaes:
|
||||
@ byteswapped, as a convenience for (unsupported) big-endian ARM, at the
|
||||
@ cost of extra REV and VREV32 operations in little-endian ARM.
|
||||
|
||||
adr r2, Lk_decrypt_transform
|
||||
adr r3, Lk_sr+0x30
|
||||
adr r11, Lk_opt_then_skew @ Input to _vpaes_schedule_transform.
|
||||
adr r2, .Lk_decrypt_transform
|
||||
adr r3, .Lk_sr+0x30
|
||||
adr r11, .Lk_opt_then_skew @ Input to _vpaes_schedule_transform.
|
||||
vld1.64 {q12}, [r2] @ Reuse q12 from encryption.
|
||||
vmov.i8 q9, #0x0f @ Required by _vpaes_schedule_transform
|
||||
|
||||
@ -1173,7 +1135,7 @@ _vpaes_decrypt_key_to_bsaes:
|
||||
@ See _vpaes_schedule_mangle for the transform on the middle keys. Note
|
||||
@ it simultaneously inverts MixColumns and the S-box affine transform.
|
||||
@ See .Lk_dksd through .Lk_dks9.
|
||||
Loop_dec_key_to_bsaes:
|
||||
.Loop_dec_key_to_bsaes:
|
||||
vld1.64 {q0}, [r1]!
|
||||
|
||||
@ Invert the ShiftRows step (see .Lschedule_mangle_both). Note going
|
||||
@ -1188,7 +1150,7 @@ Loop_dec_key_to_bsaes:
|
||||
|
||||
@ Handle the last key differently.
|
||||
subs r2, r2, #1
|
||||
beq Loop_dec_key_to_bsaes_last
|
||||
beq .Loop_dec_key_to_bsaes_last
|
||||
|
||||
@ Undo the basis change and reapply the S-box affine transform.
|
||||
bl _vpaes_schedule_transform
|
||||
@ -1201,9 +1163,9 @@ Loop_dec_key_to_bsaes:
|
||||
vtbl.8 d3, {q0}, d25
|
||||
|
||||
vst1.64 {q1}, [r0]!
|
||||
b Loop_dec_key_to_bsaes
|
||||
b .Loop_dec_key_to_bsaes
|
||||
|
||||
Loop_dec_key_to_bsaes_last:
|
||||
.Loop_dec_key_to_bsaes_last:
|
||||
@ The final key only inverts ShiftRows (already done in the loop). See
|
||||
@ .Lschedule_am_decrypting. Its basis is not transformed.
|
||||
vrev32.8 q0, q0
|
||||
@ -1215,14 +1177,12 @@ Loop_dec_key_to_bsaes_last:
|
||||
veor q2, q2, q2
|
||||
|
||||
ldmia sp!, {r11, pc} @ return
|
||||
|
||||
.globl _vpaes_ctr32_encrypt_blocks
|
||||
.private_extern _vpaes_ctr32_encrypt_blocks
|
||||
#ifdef __thumb2__
|
||||
.thumb_func _vpaes_ctr32_encrypt_blocks
|
||||
#endif
|
||||
.size vpaes_decrypt_key_to_bsaes,.-vpaes_decrypt_key_to_bsaes
|
||||
.globl vpaes_ctr32_encrypt_blocks
|
||||
.hidden vpaes_ctr32_encrypt_blocks
|
||||
.type vpaes_ctr32_encrypt_blocks,%function
|
||||
.align 4
|
||||
_vpaes_ctr32_encrypt_blocks:
|
||||
vpaes_ctr32_encrypt_blocks:
|
||||
mov ip, sp
|
||||
stmdb sp!, {r7,r8,r9,r10,r11, lr}
|
||||
@ This function uses q4-q7 (d8-d15), which are callee-saved.
|
||||
@ -1231,7 +1191,7 @@ _vpaes_ctr32_encrypt_blocks:
|
||||
cmp r2, #0
|
||||
@ r8 is passed on the stack.
|
||||
ldr r8, [ip]
|
||||
beq Lctr32_done
|
||||
beq .Lctr32_done
|
||||
|
||||
@ _vpaes_encrypt_core expects the key in r2, so swap r2 and r3.
|
||||
mov r9, r3
|
||||
@ -1245,9 +1205,9 @@ _vpaes_ctr32_encrypt_blocks:
|
||||
bl _vpaes_preheat
|
||||
rev r7, r7 @ The counter is big-endian.
|
||||
|
||||
Lctr32_loop:
|
||||
.Lctr32_loop:
|
||||
vmov q0, q7
|
||||
vld1.8 {q6}, [r0]! @ Load input ahead of time
|
||||
vld1.8 {q6}, [r0]! @ .Load input ahead of time
|
||||
bl _vpaes_encrypt_core
|
||||
veor q0, q0, q6 @ XOR input and result
|
||||
vst1.8 {q0}, [r1]!
|
||||
@ -1256,10 +1216,10 @@ Lctr32_loop:
|
||||
add r7, r7, #1
|
||||
rev r9, r7
|
||||
vmov.32 d15[1], r9
|
||||
bne Lctr32_loop
|
||||
bne .Lctr32_loop
|
||||
|
||||
Lctr32_done:
|
||||
.Lctr32_done:
|
||||
vldmia sp!, {d8,d9,d10,d11,d12,d13,d14,d15}
|
||||
ldmia sp!, {r7,r8,r9,r10,r11, pc} @ return
|
||||
|
||||
#endif // !OPENSSL_NO_ASM
|
||||
.size vpaes_ctr32_encrypt_blocks,.-vpaes_ctr32_encrypt_blocks
|
||||
#endif // !OPENSSL_NO_ASM && defined(OPENSSL_ARM) && defined(__ELF__)
|
||||
368
third-party/boringssl/linux-arm/crypto/test/trampoline-armv4-linux.S
vendored
Normal file
368
third-party/boringssl/linux-arm/crypto/test/trampoline-armv4-linux.S
vendored
Normal file
@ -0,0 +1,368 @@
|
||||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#include <openssl/asm_base.h>
|
||||
|
||||
#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_ARM) && defined(__ELF__)
|
||||
.syntax unified
|
||||
|
||||
.arch armv7-a
|
||||
.fpu vfp
|
||||
|
||||
.text
|
||||
|
||||
@ abi_test_trampoline loads callee-saved registers from |state|, calls |func|
|
||||
@ with |argv|, then saves the callee-saved registers into |state|. It returns
|
||||
@ the result of |func|. The |unwind| argument is unused.
|
||||
@ uint32_t abi_test_trampoline(void (*func)(...), CallerState *state,
|
||||
@ const uint32_t *argv, size_t argc,
|
||||
@ int unwind);
|
||||
.type abi_test_trampoline, %function
|
||||
.globl abi_test_trampoline
|
||||
.hidden abi_test_trampoline
|
||||
.align 4
|
||||
abi_test_trampoline:
|
||||
@ Save parameters and all callee-saved registers. For convenience, we
|
||||
@ save r9 on iOS even though it's volatile.
|
||||
vstmdb sp!, {d8,d9,d10,d11,d12,d13,d14,d15}
|
||||
stmdb sp!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11,lr}
|
||||
|
||||
@ Reserve stack space for six (10-4) stack parameters, plus an extra 4
|
||||
@ bytes to keep it 8-byte-aligned (see AAPCS, section 5.3).
|
||||
sub sp, sp, #28
|
||||
|
||||
@ Every register in AAPCS is either non-volatile or a parameter (except
|
||||
@ r9 on iOS), so this code, by the actual call, loses all its scratch
|
||||
@ registers. First fill in stack parameters while there are registers
|
||||
@ to spare.
|
||||
cmp r3, #4
|
||||
bls .Lstack_args_done
|
||||
mov r4, sp @ r4 is the output pointer.
|
||||
add r5, r2, r3, lsl #2 @ Set r5 to the end of argv.
|
||||
add r2, r2, #16 @ Skip four arguments.
|
||||
.Lstack_args_loop:
|
||||
ldr r6, [r2], #4
|
||||
cmp r2, r5
|
||||
str r6, [r4], #4
|
||||
bne .Lstack_args_loop
|
||||
|
||||
.Lstack_args_done:
|
||||
@ Load registers from |r1|.
|
||||
vldmia r1!, {d8,d9,d10,d11,d12,d13,d14,d15}
|
||||
#if defined(__APPLE__)
|
||||
@ r9 is not volatile on iOS.
|
||||
ldmia r1!, {r4,r5,r6,r7,r8,r10-r11}
|
||||
#else
|
||||
ldmia r1!, {r4,r5,r6,r7,r8,r9,r10,r11}
|
||||
#endif
|
||||
|
||||
@ Load register parameters. This uses up our remaining registers, so we
|
||||
@ repurpose lr as scratch space.
|
||||
ldr r3, [sp, #40] @ Reload argc.
|
||||
ldr lr, [sp, #36] @ .Load argv into lr.
|
||||
cmp r3, #3
|
||||
bhi .Larg_r3
|
||||
beq .Larg_r2
|
||||
cmp r3, #1
|
||||
bhi .Larg_r1
|
||||
beq .Larg_r0
|
||||
b .Largs_done
|
||||
|
||||
.Larg_r3:
|
||||
ldr r3, [lr, #12] @ argv[3]
|
||||
.Larg_r2:
|
||||
ldr r2, [lr, #8] @ argv[2]
|
||||
.Larg_r1:
|
||||
ldr r1, [lr, #4] @ argv[1]
|
||||
.Larg_r0:
|
||||
ldr r0, [lr] @ argv[0]
|
||||
.Largs_done:
|
||||
|
||||
@ With every other register in use, load the function pointer into lr
|
||||
@ and call the function.
|
||||
ldr lr, [sp, #28]
|
||||
blx lr
|
||||
|
||||
@ r1-r3 are free for use again. The trampoline only supports
|
||||
@ single-return functions. Pass r4-r11 to the caller.
|
||||
ldr r1, [sp, #32]
|
||||
vstmia r1!, {d8,d9,d10,d11,d12,d13,d14,d15}
|
||||
#if defined(__APPLE__)
|
||||
@ r9 is not volatile on iOS.
|
||||
stmia r1!, {r4,r5,r6,r7,r8,r10-r11}
|
||||
#else
|
||||
stmia r1!, {r4,r5,r6,r7,r8,r9,r10,r11}
|
||||
#endif
|
||||
|
||||
@ Unwind the stack and restore registers.
|
||||
add sp, sp, #44 @ 44 = 28+16
|
||||
ldmia sp!, {r4,r5,r6,r7,r8,r9,r10,r11,lr} @ Skip r0-r3 (see +16 above).
|
||||
vldmia sp!, {d8,d9,d10,d11,d12,d13,d14,d15}
|
||||
|
||||
bx lr
|
||||
.size abi_test_trampoline,.-abi_test_trampoline
|
||||
.type abi_test_clobber_r0, %function
|
||||
.globl abi_test_clobber_r0
|
||||
.hidden abi_test_clobber_r0
|
||||
.align 4
|
||||
abi_test_clobber_r0:
|
||||
mov r0, #0
|
||||
bx lr
|
||||
.size abi_test_clobber_r0,.-abi_test_clobber_r0
|
||||
.type abi_test_clobber_r1, %function
|
||||
.globl abi_test_clobber_r1
|
||||
.hidden abi_test_clobber_r1
|
||||
.align 4
|
||||
abi_test_clobber_r1:
|
||||
mov r1, #0
|
||||
bx lr
|
||||
.size abi_test_clobber_r1,.-abi_test_clobber_r1
|
||||
.type abi_test_clobber_r2, %function
|
||||
.globl abi_test_clobber_r2
|
||||
.hidden abi_test_clobber_r2
|
||||
.align 4
|
||||
abi_test_clobber_r2:
|
||||
mov r2, #0
|
||||
bx lr
|
||||
.size abi_test_clobber_r2,.-abi_test_clobber_r2
|
||||
.type abi_test_clobber_r3, %function
|
||||
.globl abi_test_clobber_r3
|
||||
.hidden abi_test_clobber_r3
|
||||
.align 4
|
||||
abi_test_clobber_r3:
|
||||
mov r3, #0
|
||||
bx lr
|
||||
.size abi_test_clobber_r3,.-abi_test_clobber_r3
|
||||
.type abi_test_clobber_r4, %function
|
||||
.globl abi_test_clobber_r4
|
||||
.hidden abi_test_clobber_r4
|
||||
.align 4
|
||||
abi_test_clobber_r4:
|
||||
mov r4, #0
|
||||
bx lr
|
||||
.size abi_test_clobber_r4,.-abi_test_clobber_r4
|
||||
.type abi_test_clobber_r5, %function
|
||||
.globl abi_test_clobber_r5
|
||||
.hidden abi_test_clobber_r5
|
||||
.align 4
|
||||
abi_test_clobber_r5:
|
||||
mov r5, #0
|
||||
bx lr
|
||||
.size abi_test_clobber_r5,.-abi_test_clobber_r5
|
||||
.type abi_test_clobber_r6, %function
|
||||
.globl abi_test_clobber_r6
|
||||
.hidden abi_test_clobber_r6
|
||||
.align 4
|
||||
abi_test_clobber_r6:
|
||||
mov r6, #0
|
||||
bx lr
|
||||
.size abi_test_clobber_r6,.-abi_test_clobber_r6
|
||||
.type abi_test_clobber_r7, %function
|
||||
.globl abi_test_clobber_r7
|
||||
.hidden abi_test_clobber_r7
|
||||
.align 4
|
||||
abi_test_clobber_r7:
|
||||
mov r7, #0
|
||||
bx lr
|
||||
.size abi_test_clobber_r7,.-abi_test_clobber_r7
|
||||
.type abi_test_clobber_r8, %function
|
||||
.globl abi_test_clobber_r8
|
||||
.hidden abi_test_clobber_r8
|
||||
.align 4
|
||||
abi_test_clobber_r8:
|
||||
mov r8, #0
|
||||
bx lr
|
||||
.size abi_test_clobber_r8,.-abi_test_clobber_r8
|
||||
.type abi_test_clobber_r9, %function
|
||||
.globl abi_test_clobber_r9
|
||||
.hidden abi_test_clobber_r9
|
||||
.align 4
|
||||
abi_test_clobber_r9:
|
||||
mov r9, #0
|
||||
bx lr
|
||||
.size abi_test_clobber_r9,.-abi_test_clobber_r9
|
||||
.type abi_test_clobber_r10, %function
|
||||
.globl abi_test_clobber_r10
|
||||
.hidden abi_test_clobber_r10
|
||||
.align 4
|
||||
abi_test_clobber_r10:
|
||||
mov r10, #0
|
||||
bx lr
|
||||
.size abi_test_clobber_r10,.-abi_test_clobber_r10
|
||||
.type abi_test_clobber_r11, %function
|
||||
.globl abi_test_clobber_r11
|
||||
.hidden abi_test_clobber_r11
|
||||
.align 4
|
||||
abi_test_clobber_r11:
|
||||
mov r11, #0
|
||||
bx lr
|
||||
.size abi_test_clobber_r11,.-abi_test_clobber_r11
|
||||
.type abi_test_clobber_r12, %function
|
||||
.globl abi_test_clobber_r12
|
||||
.hidden abi_test_clobber_r12
|
||||
.align 4
|
||||
abi_test_clobber_r12:
|
||||
mov r12, #0
|
||||
bx lr
|
||||
.size abi_test_clobber_r12,.-abi_test_clobber_r12
|
||||
.type abi_test_clobber_d0, %function
|
||||
.globl abi_test_clobber_d0
|
||||
.hidden abi_test_clobber_d0
|
||||
.align 4
|
||||
abi_test_clobber_d0:
|
||||
mov r0, #0
|
||||
vmov s0, r0
|
||||
vmov s1, r0
|
||||
bx lr
|
||||
.size abi_test_clobber_d0,.-abi_test_clobber_d0
|
||||
.type abi_test_clobber_d1, %function
|
||||
.globl abi_test_clobber_d1
|
||||
.hidden abi_test_clobber_d1
|
||||
.align 4
|
||||
abi_test_clobber_d1:
|
||||
mov r0, #0
|
||||
vmov s2, r0
|
||||
vmov s3, r0
|
||||
bx lr
|
||||
.size abi_test_clobber_d1,.-abi_test_clobber_d1
|
||||
.type abi_test_clobber_d2, %function
|
||||
.globl abi_test_clobber_d2
|
||||
.hidden abi_test_clobber_d2
|
||||
.align 4
|
||||
abi_test_clobber_d2:
|
||||
mov r0, #0
|
||||
vmov s4, r0
|
||||
vmov s5, r0
|
||||
bx lr
|
||||
.size abi_test_clobber_d2,.-abi_test_clobber_d2
|
||||
.type abi_test_clobber_d3, %function
|
||||
.globl abi_test_clobber_d3
|
||||
.hidden abi_test_clobber_d3
|
||||
.align 4
|
||||
abi_test_clobber_d3:
|
||||
mov r0, #0
|
||||
vmov s6, r0
|
||||
vmov s7, r0
|
||||
bx lr
|
||||
.size abi_test_clobber_d3,.-abi_test_clobber_d3
|
||||
.type abi_test_clobber_d4, %function
|
||||
.globl abi_test_clobber_d4
|
||||
.hidden abi_test_clobber_d4
|
||||
.align 4
|
||||
abi_test_clobber_d4:
|
||||
mov r0, #0
|
||||
vmov s8, r0
|
||||
vmov s9, r0
|
||||
bx lr
|
||||
.size abi_test_clobber_d4,.-abi_test_clobber_d4
|
||||
.type abi_test_clobber_d5, %function
|
||||
.globl abi_test_clobber_d5
|
||||
.hidden abi_test_clobber_d5
|
||||
.align 4
|
||||
abi_test_clobber_d5:
|
||||
mov r0, #0
|
||||
vmov s10, r0
|
||||
vmov s11, r0
|
||||
bx lr
|
||||
.size abi_test_clobber_d5,.-abi_test_clobber_d5
|
||||
.type abi_test_clobber_d6, %function
|
||||
.globl abi_test_clobber_d6
|
||||
.hidden abi_test_clobber_d6
|
||||
.align 4
|
||||
abi_test_clobber_d6:
|
||||
mov r0, #0
|
||||
vmov s12, r0
|
||||
vmov s13, r0
|
||||
bx lr
|
||||
.size abi_test_clobber_d6,.-abi_test_clobber_d6
|
||||
.type abi_test_clobber_d7, %function
|
||||
.globl abi_test_clobber_d7
|
||||
.hidden abi_test_clobber_d7
|
||||
.align 4
|
||||
abi_test_clobber_d7:
|
||||
mov r0, #0
|
||||
vmov s14, r0
|
||||
vmov s15, r0
|
||||
bx lr
|
||||
.size abi_test_clobber_d7,.-abi_test_clobber_d7
|
||||
.type abi_test_clobber_d8, %function
|
||||
.globl abi_test_clobber_d8
|
||||
.hidden abi_test_clobber_d8
|
||||
.align 4
|
||||
abi_test_clobber_d8:
|
||||
mov r0, #0
|
||||
vmov s16, r0
|
||||
vmov s17, r0
|
||||
bx lr
|
||||
.size abi_test_clobber_d8,.-abi_test_clobber_d8
|
||||
.type abi_test_clobber_d9, %function
|
||||
.globl abi_test_clobber_d9
|
||||
.hidden abi_test_clobber_d9
|
||||
.align 4
|
||||
abi_test_clobber_d9:
|
||||
mov r0, #0
|
||||
vmov s18, r0
|
||||
vmov s19, r0
|
||||
bx lr
|
||||
.size abi_test_clobber_d9,.-abi_test_clobber_d9
|
||||
.type abi_test_clobber_d10, %function
|
||||
.globl abi_test_clobber_d10
|
||||
.hidden abi_test_clobber_d10
|
||||
.align 4
|
||||
abi_test_clobber_d10:
|
||||
mov r0, #0
|
||||
vmov s20, r0
|
||||
vmov s21, r0
|
||||
bx lr
|
||||
.size abi_test_clobber_d10,.-abi_test_clobber_d10
|
||||
.type abi_test_clobber_d11, %function
|
||||
.globl abi_test_clobber_d11
|
||||
.hidden abi_test_clobber_d11
|
||||
.align 4
|
||||
abi_test_clobber_d11:
|
||||
mov r0, #0
|
||||
vmov s22, r0
|
||||
vmov s23, r0
|
||||
bx lr
|
||||
.size abi_test_clobber_d11,.-abi_test_clobber_d11
|
||||
.type abi_test_clobber_d12, %function
|
||||
.globl abi_test_clobber_d12
|
||||
.hidden abi_test_clobber_d12
|
||||
.align 4
|
||||
abi_test_clobber_d12:
|
||||
mov r0, #0
|
||||
vmov s24, r0
|
||||
vmov s25, r0
|
||||
bx lr
|
||||
.size abi_test_clobber_d12,.-abi_test_clobber_d12
|
||||
.type abi_test_clobber_d13, %function
|
||||
.globl abi_test_clobber_d13
|
||||
.hidden abi_test_clobber_d13
|
||||
.align 4
|
||||
abi_test_clobber_d13:
|
||||
mov r0, #0
|
||||
vmov s26, r0
|
||||
vmov s27, r0
|
||||
bx lr
|
||||
.size abi_test_clobber_d13,.-abi_test_clobber_d13
|
||||
.type abi_test_clobber_d14, %function
|
||||
.globl abi_test_clobber_d14
|
||||
.hidden abi_test_clobber_d14
|
||||
.align 4
|
||||
abi_test_clobber_d14:
|
||||
mov r0, #0
|
||||
vmov s28, r0
|
||||
vmov s29, r0
|
||||
bx lr
|
||||
.size abi_test_clobber_d14,.-abi_test_clobber_d14
|
||||
.type abi_test_clobber_d15, %function
|
||||
.globl abi_test_clobber_d15
|
||||
.hidden abi_test_clobber_d15
|
||||
.align 4
|
||||
abi_test_clobber_d15:
|
||||
mov r0, #0
|
||||
vmov s30, r0
|
||||
vmov s31, r0
|
||||
bx lr
|
||||
.size abi_test_clobber_d15,.-abi_test_clobber_d15
|
||||
#endif // !OPENSSL_NO_ASM && defined(OPENSSL_ARM) && defined(__ELF__)
|
||||
961
third-party/boringssl/linux-x86/crypto/chacha/chacha-x86-linux.S
vendored
Normal file
961
third-party/boringssl/linux-x86/crypto/chacha/chacha-x86-linux.S
vendored
Normal file
@ -0,0 +1,961 @@
|
||||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#include <openssl/asm_base.h>
|
||||
|
||||
#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) && defined(__ELF__)
|
||||
.text
|
||||
.globl ChaCha20_ctr32_nohw
|
||||
.hidden ChaCha20_ctr32_nohw
|
||||
.type ChaCha20_ctr32_nohw,@function
|
||||
.align 16
|
||||
ChaCha20_ctr32_nohw:
|
||||
.L_ChaCha20_ctr32_nohw_begin:
|
||||
pushl %ebp
|
||||
pushl %ebx
|
||||
pushl %esi
|
||||
pushl %edi
|
||||
movl 32(%esp),%esi
|
||||
movl 36(%esp),%edi
|
||||
subl $132,%esp
|
||||
movl (%esi),%eax
|
||||
movl 4(%esi),%ebx
|
||||
movl 8(%esi),%ecx
|
||||
movl 12(%esi),%edx
|
||||
movl %eax,80(%esp)
|
||||
movl %ebx,84(%esp)
|
||||
movl %ecx,88(%esp)
|
||||
movl %edx,92(%esp)
|
||||
movl 16(%esi),%eax
|
||||
movl 20(%esi),%ebx
|
||||
movl 24(%esi),%ecx
|
||||
movl 28(%esi),%edx
|
||||
movl %eax,96(%esp)
|
||||
movl %ebx,100(%esp)
|
||||
movl %ecx,104(%esp)
|
||||
movl %edx,108(%esp)
|
||||
movl (%edi),%eax
|
||||
movl 4(%edi),%ebx
|
||||
movl 8(%edi),%ecx
|
||||
movl 12(%edi),%edx
|
||||
subl $1,%eax
|
||||
movl %eax,112(%esp)
|
||||
movl %ebx,116(%esp)
|
||||
movl %ecx,120(%esp)
|
||||
movl %edx,124(%esp)
|
||||
jmp .L000entry
|
||||
.align 16
|
||||
.L001outer_loop:
|
||||
movl %ebx,156(%esp)
|
||||
movl %eax,152(%esp)
|
||||
movl %ecx,160(%esp)
|
||||
.L000entry:
|
||||
movl $1634760805,%eax
|
||||
movl $857760878,4(%esp)
|
||||
movl $2036477234,8(%esp)
|
||||
movl $1797285236,12(%esp)
|
||||
movl 84(%esp),%ebx
|
||||
movl 88(%esp),%ebp
|
||||
movl 104(%esp),%ecx
|
||||
movl 108(%esp),%esi
|
||||
movl 116(%esp),%edx
|
||||
movl 120(%esp),%edi
|
||||
movl %ebx,20(%esp)
|
||||
movl %ebp,24(%esp)
|
||||
movl %ecx,40(%esp)
|
||||
movl %esi,44(%esp)
|
||||
movl %edx,52(%esp)
|
||||
movl %edi,56(%esp)
|
||||
movl 92(%esp),%ebx
|
||||
movl 124(%esp),%edi
|
||||
movl 112(%esp),%edx
|
||||
movl 80(%esp),%ebp
|
||||
movl 96(%esp),%ecx
|
||||
movl 100(%esp),%esi
|
||||
addl $1,%edx
|
||||
movl %ebx,28(%esp)
|
||||
movl %edi,60(%esp)
|
||||
movl %edx,112(%esp)
|
||||
movl $10,%ebx
|
||||
jmp .L002loop
|
||||
.align 16
|
||||
.L002loop:
|
||||
addl %ebp,%eax
|
||||
movl %ebx,128(%esp)
|
||||
movl %ebp,%ebx
|
||||
xorl %eax,%edx
|
||||
roll $16,%edx
|
||||
addl %edx,%ecx
|
||||
xorl %ecx,%ebx
|
||||
movl 52(%esp),%edi
|
||||
roll $12,%ebx
|
||||
movl 20(%esp),%ebp
|
||||
addl %ebx,%eax
|
||||
xorl %eax,%edx
|
||||
movl %eax,(%esp)
|
||||
roll $8,%edx
|
||||
movl 4(%esp),%eax
|
||||
addl %edx,%ecx
|
||||
movl %edx,48(%esp)
|
||||
xorl %ecx,%ebx
|
||||
addl %ebp,%eax
|
||||
roll $7,%ebx
|
||||
xorl %eax,%edi
|
||||
movl %ecx,32(%esp)
|
||||
roll $16,%edi
|
||||
movl %ebx,16(%esp)
|
||||
addl %edi,%esi
|
||||
movl 40(%esp),%ecx
|
||||
xorl %esi,%ebp
|
||||
movl 56(%esp),%edx
|
||||
roll $12,%ebp
|
||||
movl 24(%esp),%ebx
|
||||
addl %ebp,%eax
|
||||
xorl %eax,%edi
|
||||
movl %eax,4(%esp)
|
||||
roll $8,%edi
|
||||
movl 8(%esp),%eax
|
||||
addl %edi,%esi
|
||||
movl %edi,52(%esp)
|
||||
xorl %esi,%ebp
|
||||
addl %ebx,%eax
|
||||
roll $7,%ebp
|
||||
xorl %eax,%edx
|
||||
movl %esi,36(%esp)
|
||||
roll $16,%edx
|
||||
movl %ebp,20(%esp)
|
||||
addl %edx,%ecx
|
||||
movl 44(%esp),%esi
|
||||
xorl %ecx,%ebx
|
||||
movl 60(%esp),%edi
|
||||
roll $12,%ebx
|
||||
movl 28(%esp),%ebp
|
||||
addl %ebx,%eax
|
||||
xorl %eax,%edx
|
||||
movl %eax,8(%esp)
|
||||
roll $8,%edx
|
||||
movl 12(%esp),%eax
|
||||
addl %edx,%ecx
|
||||
movl %edx,56(%esp)
|
||||
xorl %ecx,%ebx
|
||||
addl %ebp,%eax
|
||||
roll $7,%ebx
|
||||
xorl %eax,%edi
|
||||
roll $16,%edi
|
||||
movl %ebx,24(%esp)
|
||||
addl %edi,%esi
|
||||
xorl %esi,%ebp
|
||||
roll $12,%ebp
|
||||
movl 20(%esp),%ebx
|
||||
addl %ebp,%eax
|
||||
xorl %eax,%edi
|
||||
movl %eax,12(%esp)
|
||||
roll $8,%edi
|
||||
movl (%esp),%eax
|
||||
addl %edi,%esi
|
||||
movl %edi,%edx
|
||||
xorl %esi,%ebp
|
||||
addl %ebx,%eax
|
||||
roll $7,%ebp
|
||||
xorl %eax,%edx
|
||||
roll $16,%edx
|
||||
movl %ebp,28(%esp)
|
||||
addl %edx,%ecx
|
||||
xorl %ecx,%ebx
|
||||
movl 48(%esp),%edi
|
||||
roll $12,%ebx
|
||||
movl 24(%esp),%ebp
|
||||
addl %ebx,%eax
|
||||
xorl %eax,%edx
|
||||
movl %eax,(%esp)
|
||||
roll $8,%edx
|
||||
movl 4(%esp),%eax
|
||||
addl %edx,%ecx
|
||||
movl %edx,60(%esp)
|
||||
xorl %ecx,%ebx
|
||||
addl %ebp,%eax
|
||||
roll $7,%ebx
|
||||
xorl %eax,%edi
|
||||
movl %ecx,40(%esp)
|
||||
roll $16,%edi
|
||||
movl %ebx,20(%esp)
|
||||
addl %edi,%esi
|
||||
movl 32(%esp),%ecx
|
||||
xorl %esi,%ebp
|
||||
movl 52(%esp),%edx
|
||||
roll $12,%ebp
|
||||
movl 28(%esp),%ebx
|
||||
addl %ebp,%eax
|
||||
xorl %eax,%edi
|
||||
movl %eax,4(%esp)
|
||||
roll $8,%edi
|
||||
movl 8(%esp),%eax
|
||||
addl %edi,%esi
|
||||
movl %edi,48(%esp)
|
||||
xorl %esi,%ebp
|
||||
addl %ebx,%eax
|
||||
roll $7,%ebp
|
||||
xorl %eax,%edx
|
||||
movl %esi,44(%esp)
|
||||
roll $16,%edx
|
||||
movl %ebp,24(%esp)
|
||||
addl %edx,%ecx
|
||||
movl 36(%esp),%esi
|
||||
xorl %ecx,%ebx
|
||||
movl 56(%esp),%edi
|
||||
roll $12,%ebx
|
||||
movl 16(%esp),%ebp
|
||||
addl %ebx,%eax
|
||||
xorl %eax,%edx
|
||||
movl %eax,8(%esp)
|
||||
roll $8,%edx
|
||||
movl 12(%esp),%eax
|
||||
addl %edx,%ecx
|
||||
movl %edx,52(%esp)
|
||||
xorl %ecx,%ebx
|
||||
addl %ebp,%eax
|
||||
roll $7,%ebx
|
||||
xorl %eax,%edi
|
||||
roll $16,%edi
|
||||
movl %ebx,28(%esp)
|
||||
addl %edi,%esi
|
||||
xorl %esi,%ebp
|
||||
movl 48(%esp),%edx
|
||||
roll $12,%ebp
|
||||
movl 128(%esp),%ebx
|
||||
addl %ebp,%eax
|
||||
xorl %eax,%edi
|
||||
movl %eax,12(%esp)
|
||||
roll $8,%edi
|
||||
movl (%esp),%eax
|
||||
addl %edi,%esi
|
||||
movl %edi,56(%esp)
|
||||
xorl %esi,%ebp
|
||||
roll $7,%ebp
|
||||
decl %ebx
|
||||
jnz .L002loop
|
||||
movl 160(%esp),%ebx
|
||||
addl $1634760805,%eax
|
||||
addl 80(%esp),%ebp
|
||||
addl 96(%esp),%ecx
|
||||
addl 100(%esp),%esi
|
||||
cmpl $64,%ebx
|
||||
jb .L003tail
|
||||
movl 156(%esp),%ebx
|
||||
addl 112(%esp),%edx
|
||||
addl 120(%esp),%edi
|
||||
xorl (%ebx),%eax
|
||||
xorl 16(%ebx),%ebp
|
||||
movl %eax,(%esp)
|
||||
movl 152(%esp),%eax
|
||||
xorl 32(%ebx),%ecx
|
||||
xorl 36(%ebx),%esi
|
||||
xorl 48(%ebx),%edx
|
||||
xorl 56(%ebx),%edi
|
||||
movl %ebp,16(%eax)
|
||||
movl %ecx,32(%eax)
|
||||
movl %esi,36(%eax)
|
||||
movl %edx,48(%eax)
|
||||
movl %edi,56(%eax)
|
||||
movl 4(%esp),%ebp
|
||||
movl 8(%esp),%ecx
|
||||
movl 12(%esp),%esi
|
||||
movl 20(%esp),%edx
|
||||
movl 24(%esp),%edi
|
||||
addl $857760878,%ebp
|
||||
addl $2036477234,%ecx
|
||||
addl $1797285236,%esi
|
||||
addl 84(%esp),%edx
|
||||
addl 88(%esp),%edi
|
||||
xorl 4(%ebx),%ebp
|
||||
xorl 8(%ebx),%ecx
|
||||
xorl 12(%ebx),%esi
|
||||
xorl 20(%ebx),%edx
|
||||
xorl 24(%ebx),%edi
|
||||
movl %ebp,4(%eax)
|
||||
movl %ecx,8(%eax)
|
||||
movl %esi,12(%eax)
|
||||
movl %edx,20(%eax)
|
||||
movl %edi,24(%eax)
|
||||
movl 28(%esp),%ebp
|
||||
movl 40(%esp),%ecx
|
||||
movl 44(%esp),%esi
|
||||
movl 52(%esp),%edx
|
||||
movl 60(%esp),%edi
|
||||
addl 92(%esp),%ebp
|
||||
addl 104(%esp),%ecx
|
||||
addl 108(%esp),%esi
|
||||
addl 116(%esp),%edx
|
||||
addl 124(%esp),%edi
|
||||
xorl 28(%ebx),%ebp
|
||||
xorl 40(%ebx),%ecx
|
||||
xorl 44(%ebx),%esi
|
||||
xorl 52(%ebx),%edx
|
||||
xorl 60(%ebx),%edi
|
||||
leal 64(%ebx),%ebx
|
||||
movl %ebp,28(%eax)
|
||||
movl (%esp),%ebp
|
||||
movl %ecx,40(%eax)
|
||||
movl 160(%esp),%ecx
|
||||
movl %esi,44(%eax)
|
||||
movl %edx,52(%eax)
|
||||
movl %edi,60(%eax)
|
||||
movl %ebp,(%eax)
|
||||
leal 64(%eax),%eax
|
||||
subl $64,%ecx
|
||||
jnz .L001outer_loop
|
||||
jmp .L004done
|
||||
.L003tail:
|
||||
addl 112(%esp),%edx
|
||||
addl 120(%esp),%edi
|
||||
movl %eax,(%esp)
|
||||
movl %ebp,16(%esp)
|
||||
movl %ecx,32(%esp)
|
||||
movl %esi,36(%esp)
|
||||
movl %edx,48(%esp)
|
||||
movl %edi,56(%esp)
|
||||
movl 4(%esp),%ebp
|
||||
movl 8(%esp),%ecx
|
||||
movl 12(%esp),%esi
|
||||
movl 20(%esp),%edx
|
||||
movl 24(%esp),%edi
|
||||
addl $857760878,%ebp
|
||||
addl $2036477234,%ecx
|
||||
addl $1797285236,%esi
|
||||
addl 84(%esp),%edx
|
||||
addl 88(%esp),%edi
|
||||
movl %ebp,4(%esp)
|
||||
movl %ecx,8(%esp)
|
||||
movl %esi,12(%esp)
|
||||
movl %edx,20(%esp)
|
||||
movl %edi,24(%esp)
|
||||
movl 28(%esp),%ebp
|
||||
movl 40(%esp),%ecx
|
||||
movl 44(%esp),%esi
|
||||
movl 52(%esp),%edx
|
||||
movl 60(%esp),%edi
|
||||
addl 92(%esp),%ebp
|
||||
addl 104(%esp),%ecx
|
||||
addl 108(%esp),%esi
|
||||
addl 116(%esp),%edx
|
||||
addl 124(%esp),%edi
|
||||
movl %ebp,28(%esp)
|
||||
movl 156(%esp),%ebp
|
||||
movl %ecx,40(%esp)
|
||||
movl 152(%esp),%ecx
|
||||
movl %esi,44(%esp)
|
||||
xorl %esi,%esi
|
||||
movl %edx,52(%esp)
|
||||
movl %edi,60(%esp)
|
||||
xorl %eax,%eax
|
||||
xorl %edx,%edx
|
||||
.L005tail_loop:
|
||||
movb (%esi,%ebp,1),%al
|
||||
movb (%esp,%esi,1),%dl
|
||||
leal 1(%esi),%esi
|
||||
xorb %dl,%al
|
||||
movb %al,-1(%ecx,%esi,1)
|
||||
decl %ebx
|
||||
jnz .L005tail_loop
|
||||
.L004done:
|
||||
addl $132,%esp
|
||||
popl %edi
|
||||
popl %esi
|
||||
popl %ebx
|
||||
popl %ebp
|
||||
ret
|
||||
.size ChaCha20_ctr32_nohw,.-.L_ChaCha20_ctr32_nohw_begin
|
||||
.globl ChaCha20_ctr32_ssse3
|
||||
.hidden ChaCha20_ctr32_ssse3
|
||||
.type ChaCha20_ctr32_ssse3,@function
|
||||
.align 16
|
||||
ChaCha20_ctr32_ssse3:
|
||||
.L_ChaCha20_ctr32_ssse3_begin:
|
||||
pushl %ebp
|
||||
pushl %ebx
|
||||
pushl %esi
|
||||
pushl %edi
|
||||
call .Lpic_point
|
||||
.Lpic_point:
|
||||
popl %eax
|
||||
movl 20(%esp),%edi
|
||||
movl 24(%esp),%esi
|
||||
movl 28(%esp),%ecx
|
||||
movl 32(%esp),%edx
|
||||
movl 36(%esp),%ebx
|
||||
movl %esp,%ebp
|
||||
subl $524,%esp
|
||||
andl $-64,%esp
|
||||
movl %ebp,512(%esp)
|
||||
leal .Lssse3_data-.Lpic_point(%eax),%eax
|
||||
movdqu (%ebx),%xmm3
|
||||
cmpl $256,%ecx
|
||||
jb .L0061x
|
||||
movl %edx,516(%esp)
|
||||
movl %ebx,520(%esp)
|
||||
subl $256,%ecx
|
||||
leal 384(%esp),%ebp
|
||||
movdqu (%edx),%xmm7
|
||||
pshufd $0,%xmm3,%xmm0
|
||||
pshufd $85,%xmm3,%xmm1
|
||||
pshufd $170,%xmm3,%xmm2
|
||||
pshufd $255,%xmm3,%xmm3
|
||||
paddd 48(%eax),%xmm0
|
||||
pshufd $0,%xmm7,%xmm4
|
||||
pshufd $85,%xmm7,%xmm5
|
||||
psubd 64(%eax),%xmm0
|
||||
pshufd $170,%xmm7,%xmm6
|
||||
pshufd $255,%xmm7,%xmm7
|
||||
movdqa %xmm0,64(%ebp)
|
||||
movdqa %xmm1,80(%ebp)
|
||||
movdqa %xmm2,96(%ebp)
|
||||
movdqa %xmm3,112(%ebp)
|
||||
movdqu 16(%edx),%xmm3
|
||||
movdqa %xmm4,-64(%ebp)
|
||||
movdqa %xmm5,-48(%ebp)
|
||||
movdqa %xmm6,-32(%ebp)
|
||||
movdqa %xmm7,-16(%ebp)
|
||||
movdqa 32(%eax),%xmm7
|
||||
leal 128(%esp),%ebx
|
||||
pshufd $0,%xmm3,%xmm0
|
||||
pshufd $85,%xmm3,%xmm1
|
||||
pshufd $170,%xmm3,%xmm2
|
||||
pshufd $255,%xmm3,%xmm3
|
||||
pshufd $0,%xmm7,%xmm4
|
||||
pshufd $85,%xmm7,%xmm5
|
||||
pshufd $170,%xmm7,%xmm6
|
||||
pshufd $255,%xmm7,%xmm7
|
||||
movdqa %xmm0,(%ebp)
|
||||
movdqa %xmm1,16(%ebp)
|
||||
movdqa %xmm2,32(%ebp)
|
||||
movdqa %xmm3,48(%ebp)
|
||||
movdqa %xmm4,-128(%ebp)
|
||||
movdqa %xmm5,-112(%ebp)
|
||||
movdqa %xmm6,-96(%ebp)
|
||||
movdqa %xmm7,-80(%ebp)
|
||||
leal 128(%esi),%esi
|
||||
leal 128(%edi),%edi
|
||||
jmp .L007outer_loop
|
||||
.align 16
|
||||
.L007outer_loop:
|
||||
movdqa -112(%ebp),%xmm1
|
||||
movdqa -96(%ebp),%xmm2
|
||||
movdqa -80(%ebp),%xmm3
|
||||
movdqa -48(%ebp),%xmm5
|
||||
movdqa -32(%ebp),%xmm6
|
||||
movdqa -16(%ebp),%xmm7
|
||||
movdqa %xmm1,-112(%ebx)
|
||||
movdqa %xmm2,-96(%ebx)
|
||||
movdqa %xmm3,-80(%ebx)
|
||||
movdqa %xmm5,-48(%ebx)
|
||||
movdqa %xmm6,-32(%ebx)
|
||||
movdqa %xmm7,-16(%ebx)
|
||||
movdqa 32(%ebp),%xmm2
|
||||
movdqa 48(%ebp),%xmm3
|
||||
movdqa 64(%ebp),%xmm4
|
||||
movdqa 80(%ebp),%xmm5
|
||||
movdqa 96(%ebp),%xmm6
|
||||
movdqa 112(%ebp),%xmm7
|
||||
paddd 64(%eax),%xmm4
|
||||
movdqa %xmm2,32(%ebx)
|
||||
movdqa %xmm3,48(%ebx)
|
||||
movdqa %xmm4,64(%ebx)
|
||||
movdqa %xmm5,80(%ebx)
|
||||
movdqa %xmm6,96(%ebx)
|
||||
movdqa %xmm7,112(%ebx)
|
||||
movdqa %xmm4,64(%ebp)
|
||||
movdqa -128(%ebp),%xmm0
|
||||
movdqa %xmm4,%xmm6
|
||||
movdqa -64(%ebp),%xmm3
|
||||
movdqa (%ebp),%xmm4
|
||||
movdqa 16(%ebp),%xmm5
|
||||
movl $10,%edx
|
||||
nop
|
||||
.align 16
|
||||
.L008loop:
|
||||
paddd %xmm3,%xmm0
|
||||
movdqa %xmm3,%xmm2
|
||||
pxor %xmm0,%xmm6
|
||||
pshufb (%eax),%xmm6
|
||||
paddd %xmm6,%xmm4
|
||||
pxor %xmm4,%xmm2
|
||||
movdqa -48(%ebx),%xmm3
|
||||
movdqa %xmm2,%xmm1
|
||||
pslld $12,%xmm2
|
||||
psrld $20,%xmm1
|
||||
por %xmm1,%xmm2
|
||||
movdqa -112(%ebx),%xmm1
|
||||
paddd %xmm2,%xmm0
|
||||
movdqa 80(%ebx),%xmm7
|
||||
pxor %xmm0,%xmm6
|
||||
movdqa %xmm0,-128(%ebx)
|
||||
pshufb 16(%eax),%xmm6
|
||||
paddd %xmm6,%xmm4
|
||||
movdqa %xmm6,64(%ebx)
|
||||
pxor %xmm4,%xmm2
|
||||
paddd %xmm3,%xmm1
|
||||
movdqa %xmm2,%xmm0
|
||||
pslld $7,%xmm2
|
||||
psrld $25,%xmm0
|
||||
pxor %xmm1,%xmm7
|
||||
por %xmm0,%xmm2
|
||||
movdqa %xmm4,(%ebx)
|
||||
pshufb (%eax),%xmm7
|
||||
movdqa %xmm2,-64(%ebx)
|
||||
paddd %xmm7,%xmm5
|
||||
movdqa 32(%ebx),%xmm4
|
||||
pxor %xmm5,%xmm3
|
||||
movdqa -32(%ebx),%xmm2
|
||||
movdqa %xmm3,%xmm0
|
||||
pslld $12,%xmm3
|
||||
psrld $20,%xmm0
|
||||
por %xmm0,%xmm3
|
||||
movdqa -96(%ebx),%xmm0
|
||||
paddd %xmm3,%xmm1
|
||||
movdqa 96(%ebx),%xmm6
|
||||
pxor %xmm1,%xmm7
|
||||
movdqa %xmm1,-112(%ebx)
|
||||
pshufb 16(%eax),%xmm7
|
||||
paddd %xmm7,%xmm5
|
||||
movdqa %xmm7,80(%ebx)
|
||||
pxor %xmm5,%xmm3
|
||||
paddd %xmm2,%xmm0
|
||||
movdqa %xmm3,%xmm1
|
||||
pslld $7,%xmm3
|
||||
psrld $25,%xmm1
|
||||
pxor %xmm0,%xmm6
|
||||
por %xmm1,%xmm3
|
||||
movdqa %xmm5,16(%ebx)
|
||||
pshufb (%eax),%xmm6
|
||||
movdqa %xmm3,-48(%ebx)
|
||||
paddd %xmm6,%xmm4
|
||||
movdqa 48(%ebx),%xmm5
|
||||
pxor %xmm4,%xmm2
|
||||
movdqa -16(%ebx),%xmm3
|
||||
movdqa %xmm2,%xmm1
|
||||
pslld $12,%xmm2
|
||||
psrld $20,%xmm1
|
||||
por %xmm1,%xmm2
|
||||
movdqa -80(%ebx),%xmm1
|
||||
paddd %xmm2,%xmm0
|
||||
movdqa 112(%ebx),%xmm7
|
||||
pxor %xmm0,%xmm6
|
||||
movdqa %xmm0,-96(%ebx)
|
||||
pshufb 16(%eax),%xmm6
|
||||
paddd %xmm6,%xmm4
|
||||
movdqa %xmm6,96(%ebx)
|
||||
pxor %xmm4,%xmm2
|
||||
paddd %xmm3,%xmm1
|
||||
movdqa %xmm2,%xmm0
|
||||
pslld $7,%xmm2
|
||||
psrld $25,%xmm0
|
||||
pxor %xmm1,%xmm7
|
||||
por %xmm0,%xmm2
|
||||
pshufb (%eax),%xmm7
|
||||
movdqa %xmm2,-32(%ebx)
|
||||
paddd %xmm7,%xmm5
|
||||
pxor %xmm5,%xmm3
|
||||
movdqa -48(%ebx),%xmm2
|
||||
movdqa %xmm3,%xmm0
|
||||
pslld $12,%xmm3
|
||||
psrld $20,%xmm0
|
||||
por %xmm0,%xmm3
|
||||
movdqa -128(%ebx),%xmm0
|
||||
paddd %xmm3,%xmm1
|
||||
pxor %xmm1,%xmm7
|
||||
movdqa %xmm1,-80(%ebx)
|
||||
pshufb 16(%eax),%xmm7
|
||||
paddd %xmm7,%xmm5
|
||||
movdqa %xmm7,%xmm6
|
||||
pxor %xmm5,%xmm3
|
||||
paddd %xmm2,%xmm0
|
||||
movdqa %xmm3,%xmm1
|
||||
pslld $7,%xmm3
|
||||
psrld $25,%xmm1
|
||||
pxor %xmm0,%xmm6
|
||||
por %xmm1,%xmm3
|
||||
pshufb (%eax),%xmm6
|
||||
movdqa %xmm3,-16(%ebx)
|
||||
paddd %xmm6,%xmm4
|
||||
pxor %xmm4,%xmm2
|
||||
movdqa -32(%ebx),%xmm3
|
||||
movdqa %xmm2,%xmm1
|
||||
pslld $12,%xmm2
|
||||
psrld $20,%xmm1
|
||||
por %xmm1,%xmm2
|
||||
movdqa -112(%ebx),%xmm1
|
||||
paddd %xmm2,%xmm0
|
||||
movdqa 64(%ebx),%xmm7
|
||||
pxor %xmm0,%xmm6
|
||||
movdqa %xmm0,-128(%ebx)
|
||||
pshufb 16(%eax),%xmm6
|
||||
paddd %xmm6,%xmm4
|
||||
movdqa %xmm6,112(%ebx)
|
||||
pxor %xmm4,%xmm2
|
||||
paddd %xmm3,%xmm1
|
||||
movdqa %xmm2,%xmm0
|
||||
pslld $7,%xmm2
|
||||
psrld $25,%xmm0
|
||||
pxor %xmm1,%xmm7
|
||||
por %xmm0,%xmm2
|
||||
movdqa %xmm4,32(%ebx)
|
||||
pshufb (%eax),%xmm7
|
||||
movdqa %xmm2,-48(%ebx)
|
||||
paddd %xmm7,%xmm5
|
||||
movdqa (%ebx),%xmm4
|
||||
pxor %xmm5,%xmm3
|
||||
movdqa -16(%ebx),%xmm2
|
||||
movdqa %xmm3,%xmm0
|
||||
pslld $12,%xmm3
|
||||
psrld $20,%xmm0
|
||||
por %xmm0,%xmm3
|
||||
movdqa -96(%ebx),%xmm0
|
||||
paddd %xmm3,%xmm1
|
||||
movdqa 80(%ebx),%xmm6
|
||||
pxor %xmm1,%xmm7
|
||||
movdqa %xmm1,-112(%ebx)
|
||||
pshufb 16(%eax),%xmm7
|
||||
paddd %xmm7,%xmm5
|
||||
movdqa %xmm7,64(%ebx)
|
||||
pxor %xmm5,%xmm3
|
||||
paddd %xmm2,%xmm0
|
||||
movdqa %xmm3,%xmm1
|
||||
pslld $7,%xmm3
|
||||
psrld $25,%xmm1
|
||||
pxor %xmm0,%xmm6
|
||||
por %xmm1,%xmm3
|
||||
movdqa %xmm5,48(%ebx)
|
||||
pshufb (%eax),%xmm6
|
||||
movdqa %xmm3,-32(%ebx)
|
||||
paddd %xmm6,%xmm4
|
||||
movdqa 16(%ebx),%xmm5
|
||||
pxor %xmm4,%xmm2
|
||||
movdqa -64(%ebx),%xmm3
|
||||
movdqa %xmm2,%xmm1
|
||||
pslld $12,%xmm2
|
||||
psrld $20,%xmm1
|
||||
por %xmm1,%xmm2
|
||||
movdqa -80(%ebx),%xmm1
|
||||
paddd %xmm2,%xmm0
|
||||
movdqa 96(%ebx),%xmm7
|
||||
pxor %xmm0,%xmm6
|
||||
movdqa %xmm0,-96(%ebx)
|
||||
pshufb 16(%eax),%xmm6
|
||||
paddd %xmm6,%xmm4
|
||||
movdqa %xmm6,80(%ebx)
|
||||
pxor %xmm4,%xmm2
|
||||
paddd %xmm3,%xmm1
|
||||
movdqa %xmm2,%xmm0
|
||||
pslld $7,%xmm2
|
||||
psrld $25,%xmm0
|
||||
pxor %xmm1,%xmm7
|
||||
por %xmm0,%xmm2
|
||||
pshufb (%eax),%xmm7
|
||||
movdqa %xmm2,-16(%ebx)
|
||||
paddd %xmm7,%xmm5
|
||||
pxor %xmm5,%xmm3
|
||||
movdqa %xmm3,%xmm0
|
||||
pslld $12,%xmm3
|
||||
psrld $20,%xmm0
|
||||
por %xmm0,%xmm3
|
||||
movdqa -128(%ebx),%xmm0
|
||||
paddd %xmm3,%xmm1
|
||||
movdqa 64(%ebx),%xmm6
|
||||
pxor %xmm1,%xmm7
|
||||
movdqa %xmm1,-80(%ebx)
|
||||
pshufb 16(%eax),%xmm7
|
||||
paddd %xmm7,%xmm5
|
||||
movdqa %xmm7,96(%ebx)
|
||||
pxor %xmm5,%xmm3
|
||||
movdqa %xmm3,%xmm1
|
||||
pslld $7,%xmm3
|
||||
psrld $25,%xmm1
|
||||
por %xmm1,%xmm3
|
||||
decl %edx
|
||||
jnz .L008loop
|
||||
movdqa %xmm3,-64(%ebx)
|
||||
movdqa %xmm4,(%ebx)
|
||||
movdqa %xmm5,16(%ebx)
|
||||
movdqa %xmm6,64(%ebx)
|
||||
movdqa %xmm7,96(%ebx)
|
||||
movdqa -112(%ebx),%xmm1
|
||||
movdqa -96(%ebx),%xmm2
|
||||
movdqa -80(%ebx),%xmm3
|
||||
paddd -128(%ebp),%xmm0
|
||||
paddd -112(%ebp),%xmm1
|
||||
paddd -96(%ebp),%xmm2
|
||||
paddd -80(%ebp),%xmm3
|
||||
movdqa %xmm0,%xmm6
|
||||
punpckldq %xmm1,%xmm0
|
||||
movdqa %xmm2,%xmm7
|
||||
punpckldq %xmm3,%xmm2
|
||||
punpckhdq %xmm1,%xmm6
|
||||
punpckhdq %xmm3,%xmm7
|
||||
movdqa %xmm0,%xmm1
|
||||
punpcklqdq %xmm2,%xmm0
|
||||
movdqa %xmm6,%xmm3
|
||||
punpcklqdq %xmm7,%xmm6
|
||||
punpckhqdq %xmm2,%xmm1
|
||||
punpckhqdq %xmm7,%xmm3
|
||||
movdqu -128(%esi),%xmm4
|
||||
movdqu -64(%esi),%xmm5
|
||||
movdqu (%esi),%xmm2
|
||||
movdqu 64(%esi),%xmm7
|
||||
leal 16(%esi),%esi
|
||||
pxor %xmm0,%xmm4
|
||||
movdqa -64(%ebx),%xmm0
|
||||
pxor %xmm1,%xmm5
|
||||
movdqa -48(%ebx),%xmm1
|
||||
pxor %xmm2,%xmm6
|
||||
movdqa -32(%ebx),%xmm2
|
||||
pxor %xmm3,%xmm7
|
||||
movdqa -16(%ebx),%xmm3
|
||||
movdqu %xmm4,-128(%edi)
|
||||
movdqu %xmm5,-64(%edi)
|
||||
movdqu %xmm6,(%edi)
|
||||
movdqu %xmm7,64(%edi)
|
||||
leal 16(%edi),%edi
|
||||
paddd -64(%ebp),%xmm0
|
||||
paddd -48(%ebp),%xmm1
|
||||
paddd -32(%ebp),%xmm2
|
||||
paddd -16(%ebp),%xmm3
|
||||
movdqa %xmm0,%xmm6
|
||||
punpckldq %xmm1,%xmm0
|
||||
movdqa %xmm2,%xmm7
|
||||
punpckldq %xmm3,%xmm2
|
||||
punpckhdq %xmm1,%xmm6
|
||||
punpckhdq %xmm3,%xmm7
|
||||
movdqa %xmm0,%xmm1
|
||||
punpcklqdq %xmm2,%xmm0
|
||||
movdqa %xmm6,%xmm3
|
||||
punpcklqdq %xmm7,%xmm6
|
||||
punpckhqdq %xmm2,%xmm1
|
||||
punpckhqdq %xmm7,%xmm3
|
||||
movdqu -128(%esi),%xmm4
|
||||
movdqu -64(%esi),%xmm5
|
||||
movdqu (%esi),%xmm2
|
||||
movdqu 64(%esi),%xmm7
|
||||
leal 16(%esi),%esi
|
||||
pxor %xmm0,%xmm4
|
||||
movdqa (%ebx),%xmm0
|
||||
pxor %xmm1,%xmm5
|
||||
movdqa 16(%ebx),%xmm1
|
||||
pxor %xmm2,%xmm6
|
||||
movdqa 32(%ebx),%xmm2
|
||||
pxor %xmm3,%xmm7
|
||||
movdqa 48(%ebx),%xmm3
|
||||
movdqu %xmm4,-128(%edi)
|
||||
movdqu %xmm5,-64(%edi)
|
||||
movdqu %xmm6,(%edi)
|
||||
movdqu %xmm7,64(%edi)
|
||||
leal 16(%edi),%edi
|
||||
paddd (%ebp),%xmm0
|
||||
paddd 16(%ebp),%xmm1
|
||||
paddd 32(%ebp),%xmm2
|
||||
paddd 48(%ebp),%xmm3
|
||||
movdqa %xmm0,%xmm6
|
||||
punpckldq %xmm1,%xmm0
|
||||
movdqa %xmm2,%xmm7
|
||||
punpckldq %xmm3,%xmm2
|
||||
punpckhdq %xmm1,%xmm6
|
||||
punpckhdq %xmm3,%xmm7
|
||||
movdqa %xmm0,%xmm1
|
||||
punpcklqdq %xmm2,%xmm0
|
||||
movdqa %xmm6,%xmm3
|
||||
punpcklqdq %xmm7,%xmm6
|
||||
punpckhqdq %xmm2,%xmm1
|
||||
punpckhqdq %xmm7,%xmm3
|
||||
movdqu -128(%esi),%xmm4
|
||||
movdqu -64(%esi),%xmm5
|
||||
movdqu (%esi),%xmm2
|
||||
movdqu 64(%esi),%xmm7
|
||||
leal 16(%esi),%esi
|
||||
pxor %xmm0,%xmm4
|
||||
movdqa 64(%ebx),%xmm0
|
||||
pxor %xmm1,%xmm5
|
||||
movdqa 80(%ebx),%xmm1
|
||||
pxor %xmm2,%xmm6
|
||||
movdqa 96(%ebx),%xmm2
|
||||
pxor %xmm3,%xmm7
|
||||
movdqa 112(%ebx),%xmm3
|
||||
movdqu %xmm4,-128(%edi)
|
||||
movdqu %xmm5,-64(%edi)
|
||||
movdqu %xmm6,(%edi)
|
||||
movdqu %xmm7,64(%edi)
|
||||
leal 16(%edi),%edi
|
||||
paddd 64(%ebp),%xmm0
|
||||
paddd 80(%ebp),%xmm1
|
||||
paddd 96(%ebp),%xmm2
|
||||
paddd 112(%ebp),%xmm3
|
||||
movdqa %xmm0,%xmm6
|
||||
punpckldq %xmm1,%xmm0
|
||||
movdqa %xmm2,%xmm7
|
||||
punpckldq %xmm3,%xmm2
|
||||
punpckhdq %xmm1,%xmm6
|
||||
punpckhdq %xmm3,%xmm7
|
||||
movdqa %xmm0,%xmm1
|
||||
punpcklqdq %xmm2,%xmm0
|
||||
movdqa %xmm6,%xmm3
|
||||
punpcklqdq %xmm7,%xmm6
|
||||
punpckhqdq %xmm2,%xmm1
|
||||
punpckhqdq %xmm7,%xmm3
|
||||
movdqu -128(%esi),%xmm4
|
||||
movdqu -64(%esi),%xmm5
|
||||
movdqu (%esi),%xmm2
|
||||
movdqu 64(%esi),%xmm7
|
||||
leal 208(%esi),%esi
|
||||
pxor %xmm0,%xmm4
|
||||
pxor %xmm1,%xmm5
|
||||
pxor %xmm2,%xmm6
|
||||
pxor %xmm3,%xmm7
|
||||
movdqu %xmm4,-128(%edi)
|
||||
movdqu %xmm5,-64(%edi)
|
||||
movdqu %xmm6,(%edi)
|
||||
movdqu %xmm7,64(%edi)
|
||||
leal 208(%edi),%edi
|
||||
subl $256,%ecx
|
||||
jnc .L007outer_loop
|
||||
addl $256,%ecx
|
||||
jz .L009done
|
||||
movl 520(%esp),%ebx
|
||||
leal -128(%esi),%esi
|
||||
movl 516(%esp),%edx
|
||||
leal -128(%edi),%edi
|
||||
movd 64(%ebp),%xmm2
|
||||
movdqu (%ebx),%xmm3
|
||||
paddd 96(%eax),%xmm2
|
||||
pand 112(%eax),%xmm3
|
||||
por %xmm2,%xmm3
|
||||
.L0061x:
|
||||
movdqa 32(%eax),%xmm0
|
||||
movdqu (%edx),%xmm1
|
||||
movdqu 16(%edx),%xmm2
|
||||
movdqa (%eax),%xmm6
|
||||
movdqa 16(%eax),%xmm7
|
||||
movl %ebp,48(%esp)
|
||||
movdqa %xmm0,(%esp)
|
||||
movdqa %xmm1,16(%esp)
|
||||
movdqa %xmm2,32(%esp)
|
||||
movdqa %xmm3,48(%esp)
|
||||
movl $10,%edx
|
||||
jmp .L010loop1x
|
||||
.align 16
|
||||
.L011outer1x:
|
||||
movdqa 80(%eax),%xmm3
|
||||
movdqa (%esp),%xmm0
|
||||
movdqa 16(%esp),%xmm1
|
||||
movdqa 32(%esp),%xmm2
|
||||
paddd 48(%esp),%xmm3
|
||||
movl $10,%edx
|
||||
movdqa %xmm3,48(%esp)
|
||||
jmp .L010loop1x
|
||||
.align 16
|
||||
.L010loop1x:
|
||||
paddd %xmm1,%xmm0
|
||||
pxor %xmm0,%xmm3
|
||||
.byte 102,15,56,0,222
|
||||
paddd %xmm3,%xmm2
|
||||
pxor %xmm2,%xmm1
|
||||
movdqa %xmm1,%xmm4
|
||||
psrld $20,%xmm1
|
||||
pslld $12,%xmm4
|
||||
por %xmm4,%xmm1
|
||||
paddd %xmm1,%xmm0
|
||||
pxor %xmm0,%xmm3
|
||||
.byte 102,15,56,0,223
|
||||
paddd %xmm3,%xmm2
|
||||
pxor %xmm2,%xmm1
|
||||
movdqa %xmm1,%xmm4
|
||||
psrld $25,%xmm1
|
||||
pslld $7,%xmm4
|
||||
por %xmm4,%xmm1
|
||||
pshufd $78,%xmm2,%xmm2
|
||||
pshufd $57,%xmm1,%xmm1
|
||||
pshufd $147,%xmm3,%xmm3
|
||||
nop
|
||||
paddd %xmm1,%xmm0
|
||||
pxor %xmm0,%xmm3
|
||||
.byte 102,15,56,0,222
|
||||
paddd %xmm3,%xmm2
|
||||
pxor %xmm2,%xmm1
|
||||
movdqa %xmm1,%xmm4
|
||||
psrld $20,%xmm1
|
||||
pslld $12,%xmm4
|
||||
por %xmm4,%xmm1
|
||||
paddd %xmm1,%xmm0
|
||||
pxor %xmm0,%xmm3
|
||||
.byte 102,15,56,0,223
|
||||
paddd %xmm3,%xmm2
|
||||
pxor %xmm2,%xmm1
|
||||
movdqa %xmm1,%xmm4
|
||||
psrld $25,%xmm1
|
||||
pslld $7,%xmm4
|
||||
por %xmm4,%xmm1
|
||||
pshufd $78,%xmm2,%xmm2
|
||||
pshufd $147,%xmm1,%xmm1
|
||||
pshufd $57,%xmm3,%xmm3
|
||||
decl %edx
|
||||
jnz .L010loop1x
|
||||
paddd (%esp),%xmm0
|
||||
paddd 16(%esp),%xmm1
|
||||
paddd 32(%esp),%xmm2
|
||||
paddd 48(%esp),%xmm3
|
||||
cmpl $64,%ecx
|
||||
jb .L012tail
|
||||
movdqu (%esi),%xmm4
|
||||
movdqu 16(%esi),%xmm5
|
||||
pxor %xmm4,%xmm0
|
||||
movdqu 32(%esi),%xmm4
|
||||
pxor %xmm5,%xmm1
|
||||
movdqu 48(%esi),%xmm5
|
||||
pxor %xmm4,%xmm2
|
||||
pxor %xmm5,%xmm3
|
||||
leal 64(%esi),%esi
|
||||
movdqu %xmm0,(%edi)
|
||||
movdqu %xmm1,16(%edi)
|
||||
movdqu %xmm2,32(%edi)
|
||||
movdqu %xmm3,48(%edi)
|
||||
leal 64(%edi),%edi
|
||||
subl $64,%ecx
|
||||
jnz .L011outer1x
|
||||
jmp .L009done
|
||||
.L012tail:
|
||||
movdqa %xmm0,(%esp)
|
||||
movdqa %xmm1,16(%esp)
|
||||
movdqa %xmm2,32(%esp)
|
||||
movdqa %xmm3,48(%esp)
|
||||
xorl %eax,%eax
|
||||
xorl %edx,%edx
|
||||
xorl %ebp,%ebp
|
||||
.L013tail_loop:
|
||||
movb (%esp,%ebp,1),%al
|
||||
movb (%esi,%ebp,1),%dl
|
||||
leal 1(%ebp),%ebp
|
||||
xorb %dl,%al
|
||||
movb %al,-1(%edi,%ebp,1)
|
||||
decl %ecx
|
||||
jnz .L013tail_loop
|
||||
.L009done:
|
||||
movl 512(%esp),%esp
|
||||
popl %edi
|
||||
popl %esi
|
||||
popl %ebx
|
||||
popl %ebp
|
||||
ret
|
||||
.size ChaCha20_ctr32_ssse3,.-.L_ChaCha20_ctr32_ssse3_begin
|
||||
.align 64
|
||||
.Lssse3_data:
|
||||
.byte 2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13
|
||||
.byte 3,0,1,2,7,4,5,6,11,8,9,10,15,12,13,14
|
||||
.long 1634760805,857760878,2036477234,1797285236
|
||||
.long 0,1,2,3
|
||||
.long 4,4,4,4
|
||||
.long 1,0,0,0
|
||||
.long 4,0,0,0
|
||||
.long 0,-1,-1,-1
|
||||
.align 64
|
||||
.byte 67,104,97,67,104,97,50,48,32,102,111,114,32,120,56,54
|
||||
.byte 44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32
|
||||
.byte 60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111
|
||||
.byte 114,103,62,0
|
||||
#endif // !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) && defined(__ELF__)
|
||||
2511
third-party/boringssl/linux-x86/crypto/fipsmodule/aesni-x86-linux.S
vendored
Normal file
2511
third-party/boringssl/linux-x86/crypto/fipsmodule/aesni-x86-linux.S
vendored
Normal file
File diff suppressed because it is too large
Load Diff
995
third-party/boringssl/linux-x86/crypto/fipsmodule/bn-586-linux.S
vendored
Normal file
995
third-party/boringssl/linux-x86/crypto/fipsmodule/bn-586-linux.S
vendored
Normal file
@ -0,0 +1,995 @@
|
||||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#include <openssl/asm_base.h>
|
||||
|
||||
#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) && defined(__ELF__)
|
||||
.text
|
||||
.globl bn_mul_add_words
|
||||
.hidden bn_mul_add_words
|
||||
.type bn_mul_add_words,@function
|
||||
.align 16
|
||||
bn_mul_add_words:
|
||||
.L_bn_mul_add_words_begin:
|
||||
call .L000PIC_me_up
|
||||
.L000PIC_me_up:
|
||||
popl %eax
|
||||
leal OPENSSL_ia32cap_P-.L000PIC_me_up(%eax),%eax
|
||||
btl $26,(%eax)
|
||||
jnc .L001maw_non_sse2
|
||||
movl 4(%esp),%eax
|
||||
movl 8(%esp),%edx
|
||||
movl 12(%esp),%ecx
|
||||
movd 16(%esp),%mm0
|
||||
pxor %mm1,%mm1
|
||||
jmp .L002maw_sse2_entry
|
||||
.align 16
|
||||
.L003maw_sse2_unrolled:
|
||||
movd (%eax),%mm3
|
||||
paddq %mm3,%mm1
|
||||
movd (%edx),%mm2
|
||||
pmuludq %mm0,%mm2
|
||||
movd 4(%edx),%mm4
|
||||
pmuludq %mm0,%mm4
|
||||
movd 8(%edx),%mm6
|
||||
pmuludq %mm0,%mm6
|
||||
movd 12(%edx),%mm7
|
||||
pmuludq %mm0,%mm7
|
||||
paddq %mm2,%mm1
|
||||
movd 4(%eax),%mm3
|
||||
paddq %mm4,%mm3
|
||||
movd 8(%eax),%mm5
|
||||
paddq %mm6,%mm5
|
||||
movd 12(%eax),%mm4
|
||||
paddq %mm4,%mm7
|
||||
movd %mm1,(%eax)
|
||||
movd 16(%edx),%mm2
|
||||
pmuludq %mm0,%mm2
|
||||
psrlq $32,%mm1
|
||||
movd 20(%edx),%mm4
|
||||
pmuludq %mm0,%mm4
|
||||
paddq %mm3,%mm1
|
||||
movd 24(%edx),%mm6
|
||||
pmuludq %mm0,%mm6
|
||||
movd %mm1,4(%eax)
|
||||
psrlq $32,%mm1
|
||||
movd 28(%edx),%mm3
|
||||
addl $32,%edx
|
||||
pmuludq %mm0,%mm3
|
||||
paddq %mm5,%mm1
|
||||
movd 16(%eax),%mm5
|
||||
paddq %mm5,%mm2
|
||||
movd %mm1,8(%eax)
|
||||
psrlq $32,%mm1
|
||||
paddq %mm7,%mm1
|
||||
movd 20(%eax),%mm5
|
||||
paddq %mm5,%mm4
|
||||
movd %mm1,12(%eax)
|
||||
psrlq $32,%mm1
|
||||
paddq %mm2,%mm1
|
||||
movd 24(%eax),%mm5
|
||||
paddq %mm5,%mm6
|
||||
movd %mm1,16(%eax)
|
||||
psrlq $32,%mm1
|
||||
paddq %mm4,%mm1
|
||||
movd 28(%eax),%mm5
|
||||
paddq %mm5,%mm3
|
||||
movd %mm1,20(%eax)
|
||||
psrlq $32,%mm1
|
||||
paddq %mm6,%mm1
|
||||
movd %mm1,24(%eax)
|
||||
psrlq $32,%mm1
|
||||
paddq %mm3,%mm1
|
||||
movd %mm1,28(%eax)
|
||||
leal 32(%eax),%eax
|
||||
psrlq $32,%mm1
|
||||
subl $8,%ecx
|
||||
jz .L004maw_sse2_exit
|
||||
.L002maw_sse2_entry:
|
||||
testl $4294967288,%ecx
|
||||
jnz .L003maw_sse2_unrolled
|
||||
.align 4
|
||||
.L005maw_sse2_loop:
|
||||
movd (%edx),%mm2
|
||||
movd (%eax),%mm3
|
||||
pmuludq %mm0,%mm2
|
||||
leal 4(%edx),%edx
|
||||
paddq %mm3,%mm1
|
||||
paddq %mm2,%mm1
|
||||
movd %mm1,(%eax)
|
||||
subl $1,%ecx
|
||||
psrlq $32,%mm1
|
||||
leal 4(%eax),%eax
|
||||
jnz .L005maw_sse2_loop
|
||||
.L004maw_sse2_exit:
|
||||
movd %mm1,%eax
|
||||
emms
|
||||
ret
|
||||
.align 16
|
||||
.L001maw_non_sse2:
|
||||
pushl %ebp
|
||||
pushl %ebx
|
||||
pushl %esi
|
||||
pushl %edi
|
||||
|
||||
xorl %esi,%esi
|
||||
movl 20(%esp),%edi
|
||||
movl 28(%esp),%ecx
|
||||
movl 24(%esp),%ebx
|
||||
andl $4294967288,%ecx
|
||||
movl 32(%esp),%ebp
|
||||
pushl %ecx
|
||||
jz .L006maw_finish
|
||||
.align 16
|
||||
.L007maw_loop:
|
||||
|
||||
movl (%ebx),%eax
|
||||
mull %ebp
|
||||
addl %esi,%eax
|
||||
adcl $0,%edx
|
||||
addl (%edi),%eax
|
||||
adcl $0,%edx
|
||||
movl %eax,(%edi)
|
||||
movl %edx,%esi
|
||||
|
||||
movl 4(%ebx),%eax
|
||||
mull %ebp
|
||||
addl %esi,%eax
|
||||
adcl $0,%edx
|
||||
addl 4(%edi),%eax
|
||||
adcl $0,%edx
|
||||
movl %eax,4(%edi)
|
||||
movl %edx,%esi
|
||||
|
||||
movl 8(%ebx),%eax
|
||||
mull %ebp
|
||||
addl %esi,%eax
|
||||
adcl $0,%edx
|
||||
addl 8(%edi),%eax
|
||||
adcl $0,%edx
|
||||
movl %eax,8(%edi)
|
||||
movl %edx,%esi
|
||||
|
||||
movl 12(%ebx),%eax
|
||||
mull %ebp
|
||||
addl %esi,%eax
|
||||
adcl $0,%edx
|
||||
addl 12(%edi),%eax
|
||||
adcl $0,%edx
|
||||
movl %eax,12(%edi)
|
||||
movl %edx,%esi
|
||||
|
||||
movl 16(%ebx),%eax
|
||||
mull %ebp
|
||||
addl %esi,%eax
|
||||
adcl $0,%edx
|
||||
addl 16(%edi),%eax
|
||||
adcl $0,%edx
|
||||
movl %eax,16(%edi)
|
||||
movl %edx,%esi
|
||||
|
||||
movl 20(%ebx),%eax
|
||||
mull %ebp
|
||||
addl %esi,%eax
|
||||
adcl $0,%edx
|
||||
addl 20(%edi),%eax
|
||||
adcl $0,%edx
|
||||
movl %eax,20(%edi)
|
||||
movl %edx,%esi
|
||||
|
||||
movl 24(%ebx),%eax
|
||||
mull %ebp
|
||||
addl %esi,%eax
|
||||
adcl $0,%edx
|
||||
addl 24(%edi),%eax
|
||||
adcl $0,%edx
|
||||
movl %eax,24(%edi)
|
||||
movl %edx,%esi
|
||||
|
||||
movl 28(%ebx),%eax
|
||||
mull %ebp
|
||||
addl %esi,%eax
|
||||
adcl $0,%edx
|
||||
addl 28(%edi),%eax
|
||||
adcl $0,%edx
|
||||
movl %eax,28(%edi)
|
||||
movl %edx,%esi
|
||||
|
||||
subl $8,%ecx
|
||||
leal 32(%ebx),%ebx
|
||||
leal 32(%edi),%edi
|
||||
jnz .L007maw_loop
|
||||
.L006maw_finish:
|
||||
movl 32(%esp),%ecx
|
||||
andl $7,%ecx
|
||||
jnz .L008maw_finish2
|
||||
jmp .L009maw_end
|
||||
.L008maw_finish2:
|
||||
|
||||
movl (%ebx),%eax
|
||||
mull %ebp
|
||||
addl %esi,%eax
|
||||
adcl $0,%edx
|
||||
addl (%edi),%eax
|
||||
adcl $0,%edx
|
||||
decl %ecx
|
||||
movl %eax,(%edi)
|
||||
movl %edx,%esi
|
||||
jz .L009maw_end
|
||||
|
||||
movl 4(%ebx),%eax
|
||||
mull %ebp
|
||||
addl %esi,%eax
|
||||
adcl $0,%edx
|
||||
addl 4(%edi),%eax
|
||||
adcl $0,%edx
|
||||
decl %ecx
|
||||
movl %eax,4(%edi)
|
||||
movl %edx,%esi
|
||||
jz .L009maw_end
|
||||
|
||||
movl 8(%ebx),%eax
|
||||
mull %ebp
|
||||
addl %esi,%eax
|
||||
adcl $0,%edx
|
||||
addl 8(%edi),%eax
|
||||
adcl $0,%edx
|
||||
decl %ecx
|
||||
movl %eax,8(%edi)
|
||||
movl %edx,%esi
|
||||
jz .L009maw_end
|
||||
|
||||
movl 12(%ebx),%eax
|
||||
mull %ebp
|
||||
addl %esi,%eax
|
||||
adcl $0,%edx
|
||||
addl 12(%edi),%eax
|
||||
adcl $0,%edx
|
||||
decl %ecx
|
||||
movl %eax,12(%edi)
|
||||
movl %edx,%esi
|
||||
jz .L009maw_end
|
||||
|
||||
movl 16(%ebx),%eax
|
||||
mull %ebp
|
||||
addl %esi,%eax
|
||||
adcl $0,%edx
|
||||
addl 16(%edi),%eax
|
||||
adcl $0,%edx
|
||||
decl %ecx
|
||||
movl %eax,16(%edi)
|
||||
movl %edx,%esi
|
||||
jz .L009maw_end
|
||||
|
||||
movl 20(%ebx),%eax
|
||||
mull %ebp
|
||||
addl %esi,%eax
|
||||
adcl $0,%edx
|
||||
addl 20(%edi),%eax
|
||||
adcl $0,%edx
|
||||
decl %ecx
|
||||
movl %eax,20(%edi)
|
||||
movl %edx,%esi
|
||||
jz .L009maw_end
|
||||
|
||||
movl 24(%ebx),%eax
|
||||
mull %ebp
|
||||
addl %esi,%eax
|
||||
adcl $0,%edx
|
||||
addl 24(%edi),%eax
|
||||
adcl $0,%edx
|
||||
movl %eax,24(%edi)
|
||||
movl %edx,%esi
|
||||
.L009maw_end:
|
||||
movl %esi,%eax
|
||||
popl %ecx
|
||||
popl %edi
|
||||
popl %esi
|
||||
popl %ebx
|
||||
popl %ebp
|
||||
ret
|
||||
.size bn_mul_add_words,.-.L_bn_mul_add_words_begin
|
||||
.globl bn_mul_words
|
||||
.hidden bn_mul_words
|
||||
.type bn_mul_words,@function
|
||||
.align 16
|
||||
bn_mul_words:
|
||||
.L_bn_mul_words_begin:
|
||||
call .L010PIC_me_up
|
||||
.L010PIC_me_up:
|
||||
popl %eax
|
||||
leal OPENSSL_ia32cap_P-.L010PIC_me_up(%eax),%eax
|
||||
btl $26,(%eax)
|
||||
jnc .L011mw_non_sse2
|
||||
movl 4(%esp),%eax
|
||||
movl 8(%esp),%edx
|
||||
movl 12(%esp),%ecx
|
||||
movd 16(%esp),%mm0
|
||||
pxor %mm1,%mm1
|
||||
.align 16
|
||||
.L012mw_sse2_loop:
|
||||
movd (%edx),%mm2
|
||||
pmuludq %mm0,%mm2
|
||||
leal 4(%edx),%edx
|
||||
paddq %mm2,%mm1
|
||||
movd %mm1,(%eax)
|
||||
subl $1,%ecx
|
||||
psrlq $32,%mm1
|
||||
leal 4(%eax),%eax
|
||||
jnz .L012mw_sse2_loop
|
||||
movd %mm1,%eax
|
||||
emms
|
||||
ret
|
||||
.align 16
|
||||
.L011mw_non_sse2:
|
||||
pushl %ebp
|
||||
pushl %ebx
|
||||
pushl %esi
|
||||
pushl %edi
|
||||
|
||||
xorl %esi,%esi
|
||||
movl 20(%esp),%edi
|
||||
movl 24(%esp),%ebx
|
||||
movl 28(%esp),%ebp
|
||||
movl 32(%esp),%ecx
|
||||
andl $4294967288,%ebp
|
||||
jz .L013mw_finish
|
||||
.L014mw_loop:
|
||||
|
||||
movl (%ebx),%eax
|
||||
mull %ecx
|
||||
addl %esi,%eax
|
||||
adcl $0,%edx
|
||||
movl %eax,(%edi)
|
||||
movl %edx,%esi
|
||||
|
||||
movl 4(%ebx),%eax
|
||||
mull %ecx
|
||||
addl %esi,%eax
|
||||
adcl $0,%edx
|
||||
movl %eax,4(%edi)
|
||||
movl %edx,%esi
|
||||
|
||||
movl 8(%ebx),%eax
|
||||
mull %ecx
|
||||
addl %esi,%eax
|
||||
adcl $0,%edx
|
||||
movl %eax,8(%edi)
|
||||
movl %edx,%esi
|
||||
|
||||
movl 12(%ebx),%eax
|
||||
mull %ecx
|
||||
addl %esi,%eax
|
||||
adcl $0,%edx
|
||||
movl %eax,12(%edi)
|
||||
movl %edx,%esi
|
||||
|
||||
movl 16(%ebx),%eax
|
||||
mull %ecx
|
||||
addl %esi,%eax
|
||||
adcl $0,%edx
|
||||
movl %eax,16(%edi)
|
||||
movl %edx,%esi
|
||||
|
||||
movl 20(%ebx),%eax
|
||||
mull %ecx
|
||||
addl %esi,%eax
|
||||
adcl $0,%edx
|
||||
movl %eax,20(%edi)
|
||||
movl %edx,%esi
|
||||
|
||||
movl 24(%ebx),%eax
|
||||
mull %ecx
|
||||
addl %esi,%eax
|
||||
adcl $0,%edx
|
||||
movl %eax,24(%edi)
|
||||
movl %edx,%esi
|
||||
|
||||
movl 28(%ebx),%eax
|
||||
mull %ecx
|
||||
addl %esi,%eax
|
||||
adcl $0,%edx
|
||||
movl %eax,28(%edi)
|
||||
movl %edx,%esi
|
||||
|
||||
addl $32,%ebx
|
||||
addl $32,%edi
|
||||
subl $8,%ebp
|
||||
jz .L013mw_finish
|
||||
jmp .L014mw_loop
|
||||
.L013mw_finish:
|
||||
movl 28(%esp),%ebp
|
||||
andl $7,%ebp
|
||||
jnz .L015mw_finish2
|
||||
jmp .L016mw_end
|
||||
.L015mw_finish2:
|
||||
|
||||
movl (%ebx),%eax
|
||||
mull %ecx
|
||||
addl %esi,%eax
|
||||
adcl $0,%edx
|
||||
movl %eax,(%edi)
|
||||
movl %edx,%esi
|
||||
decl %ebp
|
||||
jz .L016mw_end
|
||||
|
||||
movl 4(%ebx),%eax
|
||||
mull %ecx
|
||||
addl %esi,%eax
|
||||
adcl $0,%edx
|
||||
movl %eax,4(%edi)
|
||||
movl %edx,%esi
|
||||
decl %ebp
|
||||
jz .L016mw_end
|
||||
|
||||
movl 8(%ebx),%eax
|
||||
mull %ecx
|
||||
addl %esi,%eax
|
||||
adcl $0,%edx
|
||||
movl %eax,8(%edi)
|
||||
movl %edx,%esi
|
||||
decl %ebp
|
||||
jz .L016mw_end
|
||||
|
||||
movl 12(%ebx),%eax
|
||||
mull %ecx
|
||||
addl %esi,%eax
|
||||
adcl $0,%edx
|
||||
movl %eax,12(%edi)
|
||||
movl %edx,%esi
|
||||
decl %ebp
|
||||
jz .L016mw_end
|
||||
|
||||
movl 16(%ebx),%eax
|
||||
mull %ecx
|
||||
addl %esi,%eax
|
||||
adcl $0,%edx
|
||||
movl %eax,16(%edi)
|
||||
movl %edx,%esi
|
||||
decl %ebp
|
||||
jz .L016mw_end
|
||||
|
||||
movl 20(%ebx),%eax
|
||||
mull %ecx
|
||||
addl %esi,%eax
|
||||
adcl $0,%edx
|
||||
movl %eax,20(%edi)
|
||||
movl %edx,%esi
|
||||
decl %ebp
|
||||
jz .L016mw_end
|
||||
|
||||
movl 24(%ebx),%eax
|
||||
mull %ecx
|
||||
addl %esi,%eax
|
||||
adcl $0,%edx
|
||||
movl %eax,24(%edi)
|
||||
movl %edx,%esi
|
||||
.L016mw_end:
|
||||
movl %esi,%eax
|
||||
popl %edi
|
||||
popl %esi
|
||||
popl %ebx
|
||||
popl %ebp
|
||||
ret
|
||||
.size bn_mul_words,.-.L_bn_mul_words_begin
|
||||
.globl bn_sqr_words
|
||||
.hidden bn_sqr_words
|
||||
.type bn_sqr_words,@function
|
||||
.align 16
|
||||
bn_sqr_words:
|
||||
.L_bn_sqr_words_begin:
|
||||
call .L017PIC_me_up
|
||||
.L017PIC_me_up:
|
||||
popl %eax
|
||||
leal OPENSSL_ia32cap_P-.L017PIC_me_up(%eax),%eax
|
||||
btl $26,(%eax)
|
||||
jnc .L018sqr_non_sse2
|
||||
movl 4(%esp),%eax
|
||||
movl 8(%esp),%edx
|
||||
movl 12(%esp),%ecx
|
||||
.align 16
|
||||
.L019sqr_sse2_loop:
|
||||
movd (%edx),%mm0
|
||||
pmuludq %mm0,%mm0
|
||||
leal 4(%edx),%edx
|
||||
movq %mm0,(%eax)
|
||||
subl $1,%ecx
|
||||
leal 8(%eax),%eax
|
||||
jnz .L019sqr_sse2_loop
|
||||
emms
|
||||
ret
|
||||
.align 16
|
||||
.L018sqr_non_sse2:
|
||||
pushl %ebp
|
||||
pushl %ebx
|
||||
pushl %esi
|
||||
pushl %edi
|
||||
|
||||
movl 20(%esp),%esi
|
||||
movl 24(%esp),%edi
|
||||
movl 28(%esp),%ebx
|
||||
andl $4294967288,%ebx
|
||||
jz .L020sw_finish
|
||||
.L021sw_loop:
|
||||
|
||||
movl (%edi),%eax
|
||||
mull %eax
|
||||
movl %eax,(%esi)
|
||||
movl %edx,4(%esi)
|
||||
|
||||
movl 4(%edi),%eax
|
||||
mull %eax
|
||||
movl %eax,8(%esi)
|
||||
movl %edx,12(%esi)
|
||||
|
||||
movl 8(%edi),%eax
|
||||
mull %eax
|
||||
movl %eax,16(%esi)
|
||||
movl %edx,20(%esi)
|
||||
|
||||
movl 12(%edi),%eax
|
||||
mull %eax
|
||||
movl %eax,24(%esi)
|
||||
movl %edx,28(%esi)
|
||||
|
||||
movl 16(%edi),%eax
|
||||
mull %eax
|
||||
movl %eax,32(%esi)
|
||||
movl %edx,36(%esi)
|
||||
|
||||
movl 20(%edi),%eax
|
||||
mull %eax
|
||||
movl %eax,40(%esi)
|
||||
movl %edx,44(%esi)
|
||||
|
||||
movl 24(%edi),%eax
|
||||
mull %eax
|
||||
movl %eax,48(%esi)
|
||||
movl %edx,52(%esi)
|
||||
|
||||
movl 28(%edi),%eax
|
||||
mull %eax
|
||||
movl %eax,56(%esi)
|
||||
movl %edx,60(%esi)
|
||||
|
||||
addl $32,%edi
|
||||
addl $64,%esi
|
||||
subl $8,%ebx
|
||||
jnz .L021sw_loop
|
||||
.L020sw_finish:
|
||||
movl 28(%esp),%ebx
|
||||
andl $7,%ebx
|
||||
jz .L022sw_end
|
||||
|
||||
movl (%edi),%eax
|
||||
mull %eax
|
||||
movl %eax,(%esi)
|
||||
decl %ebx
|
||||
movl %edx,4(%esi)
|
||||
jz .L022sw_end
|
||||
|
||||
movl 4(%edi),%eax
|
||||
mull %eax
|
||||
movl %eax,8(%esi)
|
||||
decl %ebx
|
||||
movl %edx,12(%esi)
|
||||
jz .L022sw_end
|
||||
|
||||
movl 8(%edi),%eax
|
||||
mull %eax
|
||||
movl %eax,16(%esi)
|
||||
decl %ebx
|
||||
movl %edx,20(%esi)
|
||||
jz .L022sw_end
|
||||
|
||||
movl 12(%edi),%eax
|
||||
mull %eax
|
||||
movl %eax,24(%esi)
|
||||
decl %ebx
|
||||
movl %edx,28(%esi)
|
||||
jz .L022sw_end
|
||||
|
||||
movl 16(%edi),%eax
|
||||
mull %eax
|
||||
movl %eax,32(%esi)
|
||||
decl %ebx
|
||||
movl %edx,36(%esi)
|
||||
jz .L022sw_end
|
||||
|
||||
movl 20(%edi),%eax
|
||||
mull %eax
|
||||
movl %eax,40(%esi)
|
||||
decl %ebx
|
||||
movl %edx,44(%esi)
|
||||
jz .L022sw_end
|
||||
|
||||
movl 24(%edi),%eax
|
||||
mull %eax
|
||||
movl %eax,48(%esi)
|
||||
movl %edx,52(%esi)
|
||||
.L022sw_end:
|
||||
popl %edi
|
||||
popl %esi
|
||||
popl %ebx
|
||||
popl %ebp
|
||||
ret
|
||||
.size bn_sqr_words,.-.L_bn_sqr_words_begin
|
||||
.globl bn_div_words
|
||||
.hidden bn_div_words
|
||||
.type bn_div_words,@function
|
||||
.align 16
|
||||
bn_div_words:
|
||||
.L_bn_div_words_begin:
|
||||
movl 4(%esp),%edx
|
||||
movl 8(%esp),%eax
|
||||
movl 12(%esp),%ecx
|
||||
divl %ecx
|
||||
ret
|
||||
.size bn_div_words,.-.L_bn_div_words_begin
|
||||
.globl bn_add_words
|
||||
.hidden bn_add_words
|
||||
.type bn_add_words,@function
|
||||
.align 16
|
||||
bn_add_words:
|
||||
.L_bn_add_words_begin:
|
||||
pushl %ebp
|
||||
pushl %ebx
|
||||
pushl %esi
|
||||
pushl %edi
|
||||
|
||||
movl 20(%esp),%ebx
|
||||
movl 24(%esp),%esi
|
||||
movl 28(%esp),%edi
|
||||
movl 32(%esp),%ebp
|
||||
xorl %eax,%eax
|
||||
andl $4294967288,%ebp
|
||||
jz .L023aw_finish
|
||||
.L024aw_loop:
|
||||
|
||||
movl (%esi),%ecx
|
||||
movl (%edi),%edx
|
||||
addl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
addl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
movl %ecx,(%ebx)
|
||||
|
||||
movl 4(%esi),%ecx
|
||||
movl 4(%edi),%edx
|
||||
addl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
addl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
movl %ecx,4(%ebx)
|
||||
|
||||
movl 8(%esi),%ecx
|
||||
movl 8(%edi),%edx
|
||||
addl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
addl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
movl %ecx,8(%ebx)
|
||||
|
||||
movl 12(%esi),%ecx
|
||||
movl 12(%edi),%edx
|
||||
addl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
addl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
movl %ecx,12(%ebx)
|
||||
|
||||
movl 16(%esi),%ecx
|
||||
movl 16(%edi),%edx
|
||||
addl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
addl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
movl %ecx,16(%ebx)
|
||||
|
||||
movl 20(%esi),%ecx
|
||||
movl 20(%edi),%edx
|
||||
addl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
addl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
movl %ecx,20(%ebx)
|
||||
|
||||
movl 24(%esi),%ecx
|
||||
movl 24(%edi),%edx
|
||||
addl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
addl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
movl %ecx,24(%ebx)
|
||||
|
||||
movl 28(%esi),%ecx
|
||||
movl 28(%edi),%edx
|
||||
addl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
addl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
movl %ecx,28(%ebx)
|
||||
|
||||
addl $32,%esi
|
||||
addl $32,%edi
|
||||
addl $32,%ebx
|
||||
subl $8,%ebp
|
||||
jnz .L024aw_loop
|
||||
.L023aw_finish:
|
||||
movl 32(%esp),%ebp
|
||||
andl $7,%ebp
|
||||
jz .L025aw_end
|
||||
|
||||
movl (%esi),%ecx
|
||||
movl (%edi),%edx
|
||||
addl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
addl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
decl %ebp
|
||||
movl %ecx,(%ebx)
|
||||
jz .L025aw_end
|
||||
|
||||
movl 4(%esi),%ecx
|
||||
movl 4(%edi),%edx
|
||||
addl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
addl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
decl %ebp
|
||||
movl %ecx,4(%ebx)
|
||||
jz .L025aw_end
|
||||
|
||||
movl 8(%esi),%ecx
|
||||
movl 8(%edi),%edx
|
||||
addl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
addl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
decl %ebp
|
||||
movl %ecx,8(%ebx)
|
||||
jz .L025aw_end
|
||||
|
||||
movl 12(%esi),%ecx
|
||||
movl 12(%edi),%edx
|
||||
addl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
addl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
decl %ebp
|
||||
movl %ecx,12(%ebx)
|
||||
jz .L025aw_end
|
||||
|
||||
movl 16(%esi),%ecx
|
||||
movl 16(%edi),%edx
|
||||
addl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
addl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
decl %ebp
|
||||
movl %ecx,16(%ebx)
|
||||
jz .L025aw_end
|
||||
|
||||
movl 20(%esi),%ecx
|
||||
movl 20(%edi),%edx
|
||||
addl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
addl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
decl %ebp
|
||||
movl %ecx,20(%ebx)
|
||||
jz .L025aw_end
|
||||
|
||||
movl 24(%esi),%ecx
|
||||
movl 24(%edi),%edx
|
||||
addl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
addl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
movl %ecx,24(%ebx)
|
||||
.L025aw_end:
|
||||
popl %edi
|
||||
popl %esi
|
||||
popl %ebx
|
||||
popl %ebp
|
||||
ret
|
||||
.size bn_add_words,.-.L_bn_add_words_begin
|
||||
.globl bn_sub_words
|
||||
.hidden bn_sub_words
|
||||
.type bn_sub_words,@function
|
||||
.align 16
|
||||
bn_sub_words:
|
||||
.L_bn_sub_words_begin:
|
||||
pushl %ebp
|
||||
pushl %ebx
|
||||
pushl %esi
|
||||
pushl %edi
|
||||
|
||||
movl 20(%esp),%ebx
|
||||
movl 24(%esp),%esi
|
||||
movl 28(%esp),%edi
|
||||
movl 32(%esp),%ebp
|
||||
xorl %eax,%eax
|
||||
andl $4294967288,%ebp
|
||||
jz .L026aw_finish
|
||||
.L027aw_loop:
|
||||
|
||||
movl (%esi),%ecx
|
||||
movl (%edi),%edx
|
||||
subl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
subl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
movl %ecx,(%ebx)
|
||||
|
||||
movl 4(%esi),%ecx
|
||||
movl 4(%edi),%edx
|
||||
subl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
subl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
movl %ecx,4(%ebx)
|
||||
|
||||
movl 8(%esi),%ecx
|
||||
movl 8(%edi),%edx
|
||||
subl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
subl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
movl %ecx,8(%ebx)
|
||||
|
||||
movl 12(%esi),%ecx
|
||||
movl 12(%edi),%edx
|
||||
subl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
subl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
movl %ecx,12(%ebx)
|
||||
|
||||
movl 16(%esi),%ecx
|
||||
movl 16(%edi),%edx
|
||||
subl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
subl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
movl %ecx,16(%ebx)
|
||||
|
||||
movl 20(%esi),%ecx
|
||||
movl 20(%edi),%edx
|
||||
subl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
subl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
movl %ecx,20(%ebx)
|
||||
|
||||
movl 24(%esi),%ecx
|
||||
movl 24(%edi),%edx
|
||||
subl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
subl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
movl %ecx,24(%ebx)
|
||||
|
||||
movl 28(%esi),%ecx
|
||||
movl 28(%edi),%edx
|
||||
subl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
subl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
movl %ecx,28(%ebx)
|
||||
|
||||
addl $32,%esi
|
||||
addl $32,%edi
|
||||
addl $32,%ebx
|
||||
subl $8,%ebp
|
||||
jnz .L027aw_loop
|
||||
.L026aw_finish:
|
||||
movl 32(%esp),%ebp
|
||||
andl $7,%ebp
|
||||
jz .L028aw_end
|
||||
|
||||
movl (%esi),%ecx
|
||||
movl (%edi),%edx
|
||||
subl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
subl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
decl %ebp
|
||||
movl %ecx,(%ebx)
|
||||
jz .L028aw_end
|
||||
|
||||
movl 4(%esi),%ecx
|
||||
movl 4(%edi),%edx
|
||||
subl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
subl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
decl %ebp
|
||||
movl %ecx,4(%ebx)
|
||||
jz .L028aw_end
|
||||
|
||||
movl 8(%esi),%ecx
|
||||
movl 8(%edi),%edx
|
||||
subl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
subl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
decl %ebp
|
||||
movl %ecx,8(%ebx)
|
||||
jz .L028aw_end
|
||||
|
||||
movl 12(%esi),%ecx
|
||||
movl 12(%edi),%edx
|
||||
subl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
subl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
decl %ebp
|
||||
movl %ecx,12(%ebx)
|
||||
jz .L028aw_end
|
||||
|
||||
movl 16(%esi),%ecx
|
||||
movl 16(%edi),%edx
|
||||
subl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
subl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
decl %ebp
|
||||
movl %ecx,16(%ebx)
|
||||
jz .L028aw_end
|
||||
|
||||
movl 20(%esi),%ecx
|
||||
movl 20(%edi),%edx
|
||||
subl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
subl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
decl %ebp
|
||||
movl %ecx,20(%ebx)
|
||||
jz .L028aw_end
|
||||
|
||||
movl 24(%esi),%ecx
|
||||
movl 24(%edi),%edx
|
||||
subl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
subl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
movl %ecx,24(%ebx)
|
||||
.L028aw_end:
|
||||
popl %edi
|
||||
popl %esi
|
||||
popl %ebx
|
||||
popl %ebp
|
||||
ret
|
||||
.size bn_sub_words,.-.L_bn_sub_words_begin
|
||||
#endif // !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) && defined(__ELF__)
|
||||
1264
third-party/boringssl/linux-x86/crypto/fipsmodule/co-586-linux.S
vendored
Normal file
1264
third-party/boringssl/linux-x86/crypto/fipsmodule/co-586-linux.S
vendored
Normal file
File diff suppressed because it is too large
Load Diff
292
third-party/boringssl/linux-x86/crypto/fipsmodule/ghash-ssse3-x86-linux.S
vendored
Normal file
292
third-party/boringssl/linux-x86/crypto/fipsmodule/ghash-ssse3-x86-linux.S
vendored
Normal file
@ -0,0 +1,292 @@
|
||||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#include <openssl/asm_base.h>
|
||||
|
||||
#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) && defined(__ELF__)
|
||||
.text
|
||||
.globl gcm_gmult_ssse3
|
||||
.hidden gcm_gmult_ssse3
|
||||
.type gcm_gmult_ssse3,@function
|
||||
.align 16
|
||||
gcm_gmult_ssse3:
|
||||
.L_gcm_gmult_ssse3_begin:
|
||||
pushl %ebp
|
||||
pushl %ebx
|
||||
pushl %esi
|
||||
pushl %edi
|
||||
movl 20(%esp),%edi
|
||||
movl 24(%esp),%esi
|
||||
movdqu (%edi),%xmm0
|
||||
call .L000pic_point
|
||||
.L000pic_point:
|
||||
popl %eax
|
||||
movdqa .Lreverse_bytes-.L000pic_point(%eax),%xmm7
|
||||
movdqa .Llow4_mask-.L000pic_point(%eax),%xmm2
|
||||
.byte 102,15,56,0,199
|
||||
movdqa %xmm2,%xmm1
|
||||
pandn %xmm0,%xmm1
|
||||
psrld $4,%xmm1
|
||||
pand %xmm2,%xmm0
|
||||
pxor %xmm2,%xmm2
|
||||
pxor %xmm3,%xmm3
|
||||
movl $5,%eax
|
||||
.L001loop_row_1:
|
||||
movdqa (%esi),%xmm4
|
||||
leal 16(%esi),%esi
|
||||
movdqa %xmm2,%xmm6
|
||||
.byte 102,15,58,15,243,1
|
||||
movdqa %xmm6,%xmm3
|
||||
psrldq $1,%xmm2
|
||||
movdqa %xmm4,%xmm5
|
||||
.byte 102,15,56,0,224
|
||||
.byte 102,15,56,0,233
|
||||
pxor %xmm5,%xmm2
|
||||
movdqa %xmm4,%xmm5
|
||||
psllq $60,%xmm5
|
||||
movdqa %xmm5,%xmm6
|
||||
pslldq $8,%xmm6
|
||||
pxor %xmm6,%xmm3
|
||||
psrldq $8,%xmm5
|
||||
pxor %xmm5,%xmm2
|
||||
psrlq $4,%xmm4
|
||||
pxor %xmm4,%xmm2
|
||||
subl $1,%eax
|
||||
jnz .L001loop_row_1
|
||||
pxor %xmm3,%xmm2
|
||||
psrlq $1,%xmm3
|
||||
pxor %xmm3,%xmm2
|
||||
psrlq $1,%xmm3
|
||||
pxor %xmm3,%xmm2
|
||||
psrlq $5,%xmm3
|
||||
pxor %xmm3,%xmm2
|
||||
pxor %xmm3,%xmm3
|
||||
movl $5,%eax
|
||||
.L002loop_row_2:
|
||||
movdqa (%esi),%xmm4
|
||||
leal 16(%esi),%esi
|
||||
movdqa %xmm2,%xmm6
|
||||
.byte 102,15,58,15,243,1
|
||||
movdqa %xmm6,%xmm3
|
||||
psrldq $1,%xmm2
|
||||
movdqa %xmm4,%xmm5
|
||||
.byte 102,15,56,0,224
|
||||
.byte 102,15,56,0,233
|
||||
pxor %xmm5,%xmm2
|
||||
movdqa %xmm4,%xmm5
|
||||
psllq $60,%xmm5
|
||||
movdqa %xmm5,%xmm6
|
||||
pslldq $8,%xmm6
|
||||
pxor %xmm6,%xmm3
|
||||
psrldq $8,%xmm5
|
||||
pxor %xmm5,%xmm2
|
||||
psrlq $4,%xmm4
|
||||
pxor %xmm4,%xmm2
|
||||
subl $1,%eax
|
||||
jnz .L002loop_row_2
|
||||
pxor %xmm3,%xmm2
|
||||
psrlq $1,%xmm3
|
||||
pxor %xmm3,%xmm2
|
||||
psrlq $1,%xmm3
|
||||
pxor %xmm3,%xmm2
|
||||
psrlq $5,%xmm3
|
||||
pxor %xmm3,%xmm2
|
||||
pxor %xmm3,%xmm3
|
||||
movl $6,%eax
|
||||
.L003loop_row_3:
|
||||
movdqa (%esi),%xmm4
|
||||
leal 16(%esi),%esi
|
||||
movdqa %xmm2,%xmm6
|
||||
.byte 102,15,58,15,243,1
|
||||
movdqa %xmm6,%xmm3
|
||||
psrldq $1,%xmm2
|
||||
movdqa %xmm4,%xmm5
|
||||
.byte 102,15,56,0,224
|
||||
.byte 102,15,56,0,233
|
||||
pxor %xmm5,%xmm2
|
||||
movdqa %xmm4,%xmm5
|
||||
psllq $60,%xmm5
|
||||
movdqa %xmm5,%xmm6
|
||||
pslldq $8,%xmm6
|
||||
pxor %xmm6,%xmm3
|
||||
psrldq $8,%xmm5
|
||||
pxor %xmm5,%xmm2
|
||||
psrlq $4,%xmm4
|
||||
pxor %xmm4,%xmm2
|
||||
subl $1,%eax
|
||||
jnz .L003loop_row_3
|
||||
pxor %xmm3,%xmm2
|
||||
psrlq $1,%xmm3
|
||||
pxor %xmm3,%xmm2
|
||||
psrlq $1,%xmm3
|
||||
pxor %xmm3,%xmm2
|
||||
psrlq $5,%xmm3
|
||||
pxor %xmm3,%xmm2
|
||||
pxor %xmm3,%xmm3
|
||||
.byte 102,15,56,0,215
|
||||
movdqu %xmm2,(%edi)
|
||||
pxor %xmm0,%xmm0
|
||||
pxor %xmm1,%xmm1
|
||||
pxor %xmm2,%xmm2
|
||||
pxor %xmm3,%xmm3
|
||||
pxor %xmm4,%xmm4
|
||||
pxor %xmm5,%xmm5
|
||||
pxor %xmm6,%xmm6
|
||||
popl %edi
|
||||
popl %esi
|
||||
popl %ebx
|
||||
popl %ebp
|
||||
ret
|
||||
.size gcm_gmult_ssse3,.-.L_gcm_gmult_ssse3_begin
|
||||
.globl gcm_ghash_ssse3
|
||||
.hidden gcm_ghash_ssse3
|
||||
.type gcm_ghash_ssse3,@function
|
||||
.align 16
|
||||
gcm_ghash_ssse3:
|
||||
.L_gcm_ghash_ssse3_begin:
|
||||
pushl %ebp
|
||||
pushl %ebx
|
||||
pushl %esi
|
||||
pushl %edi
|
||||
movl 20(%esp),%edi
|
||||
movl 24(%esp),%esi
|
||||
movl 28(%esp),%edx
|
||||
movl 32(%esp),%ecx
|
||||
movdqu (%edi),%xmm0
|
||||
call .L004pic_point
|
||||
.L004pic_point:
|
||||
popl %ebx
|
||||
movdqa .Lreverse_bytes-.L004pic_point(%ebx),%xmm7
|
||||
andl $-16,%ecx
|
||||
.byte 102,15,56,0,199
|
||||
pxor %xmm3,%xmm3
|
||||
.L005loop_ghash:
|
||||
movdqa .Llow4_mask-.L004pic_point(%ebx),%xmm2
|
||||
movdqu (%edx),%xmm1
|
||||
.byte 102,15,56,0,207
|
||||
pxor %xmm1,%xmm0
|
||||
movdqa %xmm2,%xmm1
|
||||
pandn %xmm0,%xmm1
|
||||
psrld $4,%xmm1
|
||||
pand %xmm2,%xmm0
|
||||
pxor %xmm2,%xmm2
|
||||
movl $5,%eax
|
||||
.L006loop_row_4:
|
||||
movdqa (%esi),%xmm4
|
||||
leal 16(%esi),%esi
|
||||
movdqa %xmm2,%xmm6
|
||||
.byte 102,15,58,15,243,1
|
||||
movdqa %xmm6,%xmm3
|
||||
psrldq $1,%xmm2
|
||||
movdqa %xmm4,%xmm5
|
||||
.byte 102,15,56,0,224
|
||||
.byte 102,15,56,0,233
|
||||
pxor %xmm5,%xmm2
|
||||
movdqa %xmm4,%xmm5
|
||||
psllq $60,%xmm5
|
||||
movdqa %xmm5,%xmm6
|
||||
pslldq $8,%xmm6
|
||||
pxor %xmm6,%xmm3
|
||||
psrldq $8,%xmm5
|
||||
pxor %xmm5,%xmm2
|
||||
psrlq $4,%xmm4
|
||||
pxor %xmm4,%xmm2
|
||||
subl $1,%eax
|
||||
jnz .L006loop_row_4
|
||||
pxor %xmm3,%xmm2
|
||||
psrlq $1,%xmm3
|
||||
pxor %xmm3,%xmm2
|
||||
psrlq $1,%xmm3
|
||||
pxor %xmm3,%xmm2
|
||||
psrlq $5,%xmm3
|
||||
pxor %xmm3,%xmm2
|
||||
pxor %xmm3,%xmm3
|
||||
movl $5,%eax
|
||||
.L007loop_row_5:
|
||||
movdqa (%esi),%xmm4
|
||||
leal 16(%esi),%esi
|
||||
movdqa %xmm2,%xmm6
|
||||
.byte 102,15,58,15,243,1
|
||||
movdqa %xmm6,%xmm3
|
||||
psrldq $1,%xmm2
|
||||
movdqa %xmm4,%xmm5
|
||||
.byte 102,15,56,0,224
|
||||
.byte 102,15,56,0,233
|
||||
pxor %xmm5,%xmm2
|
||||
movdqa %xmm4,%xmm5
|
||||
psllq $60,%xmm5
|
||||
movdqa %xmm5,%xmm6
|
||||
pslldq $8,%xmm6
|
||||
pxor %xmm6,%xmm3
|
||||
psrldq $8,%xmm5
|
||||
pxor %xmm5,%xmm2
|
||||
psrlq $4,%xmm4
|
||||
pxor %xmm4,%xmm2
|
||||
subl $1,%eax
|
||||
jnz .L007loop_row_5
|
||||
pxor %xmm3,%xmm2
|
||||
psrlq $1,%xmm3
|
||||
pxor %xmm3,%xmm2
|
||||
psrlq $1,%xmm3
|
||||
pxor %xmm3,%xmm2
|
||||
psrlq $5,%xmm3
|
||||
pxor %xmm3,%xmm2
|
||||
pxor %xmm3,%xmm3
|
||||
movl $6,%eax
|
||||
.L008loop_row_6:
|
||||
movdqa (%esi),%xmm4
|
||||
leal 16(%esi),%esi
|
||||
movdqa %xmm2,%xmm6
|
||||
.byte 102,15,58,15,243,1
|
||||
movdqa %xmm6,%xmm3
|
||||
psrldq $1,%xmm2
|
||||
movdqa %xmm4,%xmm5
|
||||
.byte 102,15,56,0,224
|
||||
.byte 102,15,56,0,233
|
||||
pxor %xmm5,%xmm2
|
||||
movdqa %xmm4,%xmm5
|
||||
psllq $60,%xmm5
|
||||
movdqa %xmm5,%xmm6
|
||||
pslldq $8,%xmm6
|
||||
pxor %xmm6,%xmm3
|
||||
psrldq $8,%xmm5
|
||||
pxor %xmm5,%xmm2
|
||||
psrlq $4,%xmm4
|
||||
pxor %xmm4,%xmm2
|
||||
subl $1,%eax
|
||||
jnz .L008loop_row_6
|
||||
pxor %xmm3,%xmm2
|
||||
psrlq $1,%xmm3
|
||||
pxor %xmm3,%xmm2
|
||||
psrlq $1,%xmm3
|
||||
pxor %xmm3,%xmm2
|
||||
psrlq $5,%xmm3
|
||||
pxor %xmm3,%xmm2
|
||||
pxor %xmm3,%xmm3
|
||||
movdqa %xmm2,%xmm0
|
||||
leal -256(%esi),%esi
|
||||
leal 16(%edx),%edx
|
||||
subl $16,%ecx
|
||||
jnz .L005loop_ghash
|
||||
.byte 102,15,56,0,199
|
||||
movdqu %xmm0,(%edi)
|
||||
pxor %xmm0,%xmm0
|
||||
pxor %xmm1,%xmm1
|
||||
pxor %xmm2,%xmm2
|
||||
pxor %xmm3,%xmm3
|
||||
pxor %xmm4,%xmm4
|
||||
pxor %xmm5,%xmm5
|
||||
pxor %xmm6,%xmm6
|
||||
popl %edi
|
||||
popl %esi
|
||||
popl %ebx
|
||||
popl %ebp
|
||||
ret
|
||||
.size gcm_ghash_ssse3,.-.L_gcm_ghash_ssse3_begin
|
||||
.align 16
|
||||
.Lreverse_bytes:
|
||||
.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
|
||||
.align 16
|
||||
.Llow4_mask:
|
||||
.long 252645135,252645135,252645135,252645135
|
||||
#endif // !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) && defined(__ELF__)
|
||||
328
third-party/boringssl/linux-x86/crypto/fipsmodule/ghash-x86-linux.S
vendored
Normal file
328
third-party/boringssl/linux-x86/crypto/fipsmodule/ghash-x86-linux.S
vendored
Normal file
@ -0,0 +1,328 @@
|
||||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#include <openssl/asm_base.h>
|
||||
|
||||
#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) && defined(__ELF__)
|
||||
.text
|
||||
.globl gcm_init_clmul
|
||||
.hidden gcm_init_clmul
|
||||
.type gcm_init_clmul,@function
|
||||
.align 16
|
||||
gcm_init_clmul:
|
||||
.L_gcm_init_clmul_begin:
|
||||
movl 4(%esp),%edx
|
||||
movl 8(%esp),%eax
|
||||
call .L000pic
|
||||
.L000pic:
|
||||
popl %ecx
|
||||
leal .Lbswap-.L000pic(%ecx),%ecx
|
||||
movdqu (%eax),%xmm2
|
||||
pshufd $78,%xmm2,%xmm2
|
||||
pshufd $255,%xmm2,%xmm4
|
||||
movdqa %xmm2,%xmm3
|
||||
psllq $1,%xmm2
|
||||
pxor %xmm5,%xmm5
|
||||
psrlq $63,%xmm3
|
||||
pcmpgtd %xmm4,%xmm5
|
||||
pslldq $8,%xmm3
|
||||
por %xmm3,%xmm2
|
||||
pand 16(%ecx),%xmm5
|
||||
pxor %xmm5,%xmm2
|
||||
movdqa %xmm2,%xmm0
|
||||
movdqa %xmm0,%xmm1
|
||||
pshufd $78,%xmm0,%xmm3
|
||||
pshufd $78,%xmm2,%xmm4
|
||||
pxor %xmm0,%xmm3
|
||||
pxor %xmm2,%xmm4
|
||||
.byte 102,15,58,68,194,0
|
||||
.byte 102,15,58,68,202,17
|
||||
.byte 102,15,58,68,220,0
|
||||
xorps %xmm0,%xmm3
|
||||
xorps %xmm1,%xmm3
|
||||
movdqa %xmm3,%xmm4
|
||||
psrldq $8,%xmm3
|
||||
pslldq $8,%xmm4
|
||||
pxor %xmm3,%xmm1
|
||||
pxor %xmm4,%xmm0
|
||||
movdqa %xmm0,%xmm4
|
||||
movdqa %xmm0,%xmm3
|
||||
psllq $5,%xmm0
|
||||
pxor %xmm0,%xmm3
|
||||
psllq $1,%xmm0
|
||||
pxor %xmm3,%xmm0
|
||||
psllq $57,%xmm0
|
||||
movdqa %xmm0,%xmm3
|
||||
pslldq $8,%xmm0
|
||||
psrldq $8,%xmm3
|
||||
pxor %xmm4,%xmm0
|
||||
pxor %xmm3,%xmm1
|
||||
movdqa %xmm0,%xmm4
|
||||
psrlq $1,%xmm0
|
||||
pxor %xmm4,%xmm1
|
||||
pxor %xmm0,%xmm4
|
||||
psrlq $5,%xmm0
|
||||
pxor %xmm4,%xmm0
|
||||
psrlq $1,%xmm0
|
||||
pxor %xmm1,%xmm0
|
||||
pshufd $78,%xmm2,%xmm3
|
||||
pshufd $78,%xmm0,%xmm4
|
||||
pxor %xmm2,%xmm3
|
||||
movdqu %xmm2,(%edx)
|
||||
pxor %xmm0,%xmm4
|
||||
movdqu %xmm0,16(%edx)
|
||||
.byte 102,15,58,15,227,8
|
||||
movdqu %xmm4,32(%edx)
|
||||
ret
|
||||
.size gcm_init_clmul,.-.L_gcm_init_clmul_begin
|
||||
.globl gcm_gmult_clmul
|
||||
.hidden gcm_gmult_clmul
|
||||
.type gcm_gmult_clmul,@function
|
||||
.align 16
|
||||
gcm_gmult_clmul:
|
||||
.L_gcm_gmult_clmul_begin:
|
||||
movl 4(%esp),%eax
|
||||
movl 8(%esp),%edx
|
||||
call .L001pic
|
||||
.L001pic:
|
||||
popl %ecx
|
||||
leal .Lbswap-.L001pic(%ecx),%ecx
|
||||
movdqu (%eax),%xmm0
|
||||
movdqa (%ecx),%xmm5
|
||||
movups (%edx),%xmm2
|
||||
.byte 102,15,56,0,197
|
||||
movups 32(%edx),%xmm4
|
||||
movdqa %xmm0,%xmm1
|
||||
pshufd $78,%xmm0,%xmm3
|
||||
pxor %xmm0,%xmm3
|
||||
.byte 102,15,58,68,194,0
|
||||
.byte 102,15,58,68,202,17
|
||||
.byte 102,15,58,68,220,0
|
||||
xorps %xmm0,%xmm3
|
||||
xorps %xmm1,%xmm3
|
||||
movdqa %xmm3,%xmm4
|
||||
psrldq $8,%xmm3
|
||||
pslldq $8,%xmm4
|
||||
pxor %xmm3,%xmm1
|
||||
pxor %xmm4,%xmm0
|
||||
movdqa %xmm0,%xmm4
|
||||
movdqa %xmm0,%xmm3
|
||||
psllq $5,%xmm0
|
||||
pxor %xmm0,%xmm3
|
||||
psllq $1,%xmm0
|
||||
pxor %xmm3,%xmm0
|
||||
psllq $57,%xmm0
|
||||
movdqa %xmm0,%xmm3
|
||||
pslldq $8,%xmm0
|
||||
psrldq $8,%xmm3
|
||||
pxor %xmm4,%xmm0
|
||||
pxor %xmm3,%xmm1
|
||||
movdqa %xmm0,%xmm4
|
||||
psrlq $1,%xmm0
|
||||
pxor %xmm4,%xmm1
|
||||
pxor %xmm0,%xmm4
|
||||
psrlq $5,%xmm0
|
||||
pxor %xmm4,%xmm0
|
||||
psrlq $1,%xmm0
|
||||
pxor %xmm1,%xmm0
|
||||
.byte 102,15,56,0,197
|
||||
movdqu %xmm0,(%eax)
|
||||
ret
|
||||
.size gcm_gmult_clmul,.-.L_gcm_gmult_clmul_begin
|
||||
.globl gcm_ghash_clmul
|
||||
.hidden gcm_ghash_clmul
|
||||
.type gcm_ghash_clmul,@function
|
||||
.align 16
|
||||
gcm_ghash_clmul:
|
||||
.L_gcm_ghash_clmul_begin:
|
||||
pushl %ebp
|
||||
pushl %ebx
|
||||
pushl %esi
|
||||
pushl %edi
|
||||
movl 20(%esp),%eax
|
||||
movl 24(%esp),%edx
|
||||
movl 28(%esp),%esi
|
||||
movl 32(%esp),%ebx
|
||||
call .L002pic
|
||||
.L002pic:
|
||||
popl %ecx
|
||||
leal .Lbswap-.L002pic(%ecx),%ecx
|
||||
movdqu (%eax),%xmm0
|
||||
movdqa (%ecx),%xmm5
|
||||
movdqu (%edx),%xmm2
|
||||
.byte 102,15,56,0,197
|
||||
subl $16,%ebx
|
||||
jz .L003odd_tail
|
||||
movdqu (%esi),%xmm3
|
||||
movdqu 16(%esi),%xmm6
|
||||
.byte 102,15,56,0,221
|
||||
.byte 102,15,56,0,245
|
||||
movdqu 32(%edx),%xmm5
|
||||
pxor %xmm3,%xmm0
|
||||
pshufd $78,%xmm6,%xmm3
|
||||
movdqa %xmm6,%xmm7
|
||||
pxor %xmm6,%xmm3
|
||||
leal 32(%esi),%esi
|
||||
.byte 102,15,58,68,242,0
|
||||
.byte 102,15,58,68,250,17
|
||||
.byte 102,15,58,68,221,0
|
||||
movups 16(%edx),%xmm2
|
||||
nop
|
||||
subl $32,%ebx
|
||||
jbe .L004even_tail
|
||||
jmp .L005mod_loop
|
||||
.align 32
|
||||
.L005mod_loop:
|
||||
pshufd $78,%xmm0,%xmm4
|
||||
movdqa %xmm0,%xmm1
|
||||
pxor %xmm0,%xmm4
|
||||
nop
|
||||
.byte 102,15,58,68,194,0
|
||||
.byte 102,15,58,68,202,17
|
||||
.byte 102,15,58,68,229,16
|
||||
movups (%edx),%xmm2
|
||||
xorps %xmm6,%xmm0
|
||||
movdqa (%ecx),%xmm5
|
||||
xorps %xmm7,%xmm1
|
||||
movdqu (%esi),%xmm7
|
||||
pxor %xmm0,%xmm3
|
||||
movdqu 16(%esi),%xmm6
|
||||
pxor %xmm1,%xmm3
|
||||
.byte 102,15,56,0,253
|
||||
pxor %xmm3,%xmm4
|
||||
movdqa %xmm4,%xmm3
|
||||
psrldq $8,%xmm4
|
||||
pslldq $8,%xmm3
|
||||
pxor %xmm4,%xmm1
|
||||
pxor %xmm3,%xmm0
|
||||
.byte 102,15,56,0,245
|
||||
pxor %xmm7,%xmm1
|
||||
movdqa %xmm6,%xmm7
|
||||
movdqa %xmm0,%xmm4
|
||||
movdqa %xmm0,%xmm3
|
||||
psllq $5,%xmm0
|
||||
pxor %xmm0,%xmm3
|
||||
psllq $1,%xmm0
|
||||
pxor %xmm3,%xmm0
|
||||
.byte 102,15,58,68,242,0
|
||||
movups 32(%edx),%xmm5
|
||||
psllq $57,%xmm0
|
||||
movdqa %xmm0,%xmm3
|
||||
pslldq $8,%xmm0
|
||||
psrldq $8,%xmm3
|
||||
pxor %xmm4,%xmm0
|
||||
pxor %xmm3,%xmm1
|
||||
pshufd $78,%xmm7,%xmm3
|
||||
movdqa %xmm0,%xmm4
|
||||
psrlq $1,%xmm0
|
||||
pxor %xmm7,%xmm3
|
||||
pxor %xmm4,%xmm1
|
||||
.byte 102,15,58,68,250,17
|
||||
movups 16(%edx),%xmm2
|
||||
pxor %xmm0,%xmm4
|
||||
psrlq $5,%xmm0
|
||||
pxor %xmm4,%xmm0
|
||||
psrlq $1,%xmm0
|
||||
pxor %xmm1,%xmm0
|
||||
.byte 102,15,58,68,221,0
|
||||
leal 32(%esi),%esi
|
||||
subl $32,%ebx
|
||||
ja .L005mod_loop
|
||||
.L004even_tail:
|
||||
pshufd $78,%xmm0,%xmm4
|
||||
movdqa %xmm0,%xmm1
|
||||
pxor %xmm0,%xmm4
|
||||
.byte 102,15,58,68,194,0
|
||||
.byte 102,15,58,68,202,17
|
||||
.byte 102,15,58,68,229,16
|
||||
movdqa (%ecx),%xmm5
|
||||
xorps %xmm6,%xmm0
|
||||
xorps %xmm7,%xmm1
|
||||
pxor %xmm0,%xmm3
|
||||
pxor %xmm1,%xmm3
|
||||
pxor %xmm3,%xmm4
|
||||
movdqa %xmm4,%xmm3
|
||||
psrldq $8,%xmm4
|
||||
pslldq $8,%xmm3
|
||||
pxor %xmm4,%xmm1
|
||||
pxor %xmm3,%xmm0
|
||||
movdqa %xmm0,%xmm4
|
||||
movdqa %xmm0,%xmm3
|
||||
psllq $5,%xmm0
|
||||
pxor %xmm0,%xmm3
|
||||
psllq $1,%xmm0
|
||||
pxor %xmm3,%xmm0
|
||||
psllq $57,%xmm0
|
||||
movdqa %xmm0,%xmm3
|
||||
pslldq $8,%xmm0
|
||||
psrldq $8,%xmm3
|
||||
pxor %xmm4,%xmm0
|
||||
pxor %xmm3,%xmm1
|
||||
movdqa %xmm0,%xmm4
|
||||
psrlq $1,%xmm0
|
||||
pxor %xmm4,%xmm1
|
||||
pxor %xmm0,%xmm4
|
||||
psrlq $5,%xmm0
|
||||
pxor %xmm4,%xmm0
|
||||
psrlq $1,%xmm0
|
||||
pxor %xmm1,%xmm0
|
||||
testl %ebx,%ebx
|
||||
jnz .L006done
|
||||
movups (%edx),%xmm2
|
||||
.L003odd_tail:
|
||||
movdqu (%esi),%xmm3
|
||||
.byte 102,15,56,0,221
|
||||
pxor %xmm3,%xmm0
|
||||
movdqa %xmm0,%xmm1
|
||||
pshufd $78,%xmm0,%xmm3
|
||||
pshufd $78,%xmm2,%xmm4
|
||||
pxor %xmm0,%xmm3
|
||||
pxor %xmm2,%xmm4
|
||||
.byte 102,15,58,68,194,0
|
||||
.byte 102,15,58,68,202,17
|
||||
.byte 102,15,58,68,220,0
|
||||
xorps %xmm0,%xmm3
|
||||
xorps %xmm1,%xmm3
|
||||
movdqa %xmm3,%xmm4
|
||||
psrldq $8,%xmm3
|
||||
pslldq $8,%xmm4
|
||||
pxor %xmm3,%xmm1
|
||||
pxor %xmm4,%xmm0
|
||||
movdqa %xmm0,%xmm4
|
||||
movdqa %xmm0,%xmm3
|
||||
psllq $5,%xmm0
|
||||
pxor %xmm0,%xmm3
|
||||
psllq $1,%xmm0
|
||||
pxor %xmm3,%xmm0
|
||||
psllq $57,%xmm0
|
||||
movdqa %xmm0,%xmm3
|
||||
pslldq $8,%xmm0
|
||||
psrldq $8,%xmm3
|
||||
pxor %xmm4,%xmm0
|
||||
pxor %xmm3,%xmm1
|
||||
movdqa %xmm0,%xmm4
|
||||
psrlq $1,%xmm0
|
||||
pxor %xmm4,%xmm1
|
||||
pxor %xmm0,%xmm4
|
||||
psrlq $5,%xmm0
|
||||
pxor %xmm4,%xmm0
|
||||
psrlq $1,%xmm0
|
||||
pxor %xmm1,%xmm0
|
||||
.L006done:
|
||||
.byte 102,15,56,0,197
|
||||
movdqu %xmm0,(%eax)
|
||||
popl %edi
|
||||
popl %esi
|
||||
popl %ebx
|
||||
popl %ebp
|
||||
ret
|
||||
.size gcm_ghash_clmul,.-.L_gcm_ghash_clmul_begin
|
||||
.align 64
|
||||
.Lbswap:
|
||||
.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
|
||||
.byte 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,194
|
||||
.byte 71,72,65,83,72,32,102,111,114,32,120,56,54,44,32,67
|
||||
.byte 82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112
|
||||
.byte 112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62
|
||||
.byte 0
|
||||
#endif // !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) && defined(__ELF__)
|
||||
686
third-party/boringssl/linux-x86/crypto/fipsmodule/md5-586-linux.S
vendored
Normal file
686
third-party/boringssl/linux-x86/crypto/fipsmodule/md5-586-linux.S
vendored
Normal file
@ -0,0 +1,686 @@
|
||||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#include <openssl/asm_base.h>
|
||||
|
||||
#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) && defined(__ELF__)
|
||||
.text
|
||||
.globl md5_block_asm_data_order
|
||||
.hidden md5_block_asm_data_order
|
||||
.type md5_block_asm_data_order,@function
|
||||
.align 16
|
||||
md5_block_asm_data_order:
|
||||
.L_md5_block_asm_data_order_begin:
|
||||
pushl %esi
|
||||
pushl %edi
|
||||
movl 12(%esp),%edi
|
||||
movl 16(%esp),%esi
|
||||
movl 20(%esp),%ecx
|
||||
pushl %ebp
|
||||
shll $6,%ecx
|
||||
pushl %ebx
|
||||
addl %esi,%ecx
|
||||
subl $64,%ecx
|
||||
movl (%edi),%eax
|
||||
pushl %ecx
|
||||
movl 4(%edi),%ebx
|
||||
movl 8(%edi),%ecx
|
||||
movl 12(%edi),%edx
|
||||
.L000start:
|
||||
|
||||
|
||||
movl %ecx,%edi
|
||||
movl (%esi),%ebp
|
||||
|
||||
xorl %edx,%edi
|
||||
andl %ebx,%edi
|
||||
leal 3614090360(%eax,%ebp,1),%eax
|
||||
xorl %edx,%edi
|
||||
addl %edi,%eax
|
||||
movl %ebx,%edi
|
||||
roll $7,%eax
|
||||
movl 4(%esi),%ebp
|
||||
addl %ebx,%eax
|
||||
|
||||
xorl %ecx,%edi
|
||||
andl %eax,%edi
|
||||
leal 3905402710(%edx,%ebp,1),%edx
|
||||
xorl %ecx,%edi
|
||||
addl %edi,%edx
|
||||
movl %eax,%edi
|
||||
roll $12,%edx
|
||||
movl 8(%esi),%ebp
|
||||
addl %eax,%edx
|
||||
|
||||
xorl %ebx,%edi
|
||||
andl %edx,%edi
|
||||
leal 606105819(%ecx,%ebp,1),%ecx
|
||||
xorl %ebx,%edi
|
||||
addl %edi,%ecx
|
||||
movl %edx,%edi
|
||||
roll $17,%ecx
|
||||
movl 12(%esi),%ebp
|
||||
addl %edx,%ecx
|
||||
|
||||
xorl %eax,%edi
|
||||
andl %ecx,%edi
|
||||
leal 3250441966(%ebx,%ebp,1),%ebx
|
||||
xorl %eax,%edi
|
||||
addl %edi,%ebx
|
||||
movl %ecx,%edi
|
||||
roll $22,%ebx
|
||||
movl 16(%esi),%ebp
|
||||
addl %ecx,%ebx
|
||||
|
||||
xorl %edx,%edi
|
||||
andl %ebx,%edi
|
||||
leal 4118548399(%eax,%ebp,1),%eax
|
||||
xorl %edx,%edi
|
||||
addl %edi,%eax
|
||||
movl %ebx,%edi
|
||||
roll $7,%eax
|
||||
movl 20(%esi),%ebp
|
||||
addl %ebx,%eax
|
||||
|
||||
xorl %ecx,%edi
|
||||
andl %eax,%edi
|
||||
leal 1200080426(%edx,%ebp,1),%edx
|
||||
xorl %ecx,%edi
|
||||
addl %edi,%edx
|
||||
movl %eax,%edi
|
||||
roll $12,%edx
|
||||
movl 24(%esi),%ebp
|
||||
addl %eax,%edx
|
||||
|
||||
xorl %ebx,%edi
|
||||
andl %edx,%edi
|
||||
leal 2821735955(%ecx,%ebp,1),%ecx
|
||||
xorl %ebx,%edi
|
||||
addl %edi,%ecx
|
||||
movl %edx,%edi
|
||||
roll $17,%ecx
|
||||
movl 28(%esi),%ebp
|
||||
addl %edx,%ecx
|
||||
|
||||
xorl %eax,%edi
|
||||
andl %ecx,%edi
|
||||
leal 4249261313(%ebx,%ebp,1),%ebx
|
||||
xorl %eax,%edi
|
||||
addl %edi,%ebx
|
||||
movl %ecx,%edi
|
||||
roll $22,%ebx
|
||||
movl 32(%esi),%ebp
|
||||
addl %ecx,%ebx
|
||||
|
||||
xorl %edx,%edi
|
||||
andl %ebx,%edi
|
||||
leal 1770035416(%eax,%ebp,1),%eax
|
||||
xorl %edx,%edi
|
||||
addl %edi,%eax
|
||||
movl %ebx,%edi
|
||||
roll $7,%eax
|
||||
movl 36(%esi),%ebp
|
||||
addl %ebx,%eax
|
||||
|
||||
xorl %ecx,%edi
|
||||
andl %eax,%edi
|
||||
leal 2336552879(%edx,%ebp,1),%edx
|
||||
xorl %ecx,%edi
|
||||
addl %edi,%edx
|
||||
movl %eax,%edi
|
||||
roll $12,%edx
|
||||
movl 40(%esi),%ebp
|
||||
addl %eax,%edx
|
||||
|
||||
xorl %ebx,%edi
|
||||
andl %edx,%edi
|
||||
leal 4294925233(%ecx,%ebp,1),%ecx
|
||||
xorl %ebx,%edi
|
||||
addl %edi,%ecx
|
||||
movl %edx,%edi
|
||||
roll $17,%ecx
|
||||
movl 44(%esi),%ebp
|
||||
addl %edx,%ecx
|
||||
|
||||
xorl %eax,%edi
|
||||
andl %ecx,%edi
|
||||
leal 2304563134(%ebx,%ebp,1),%ebx
|
||||
xorl %eax,%edi
|
||||
addl %edi,%ebx
|
||||
movl %ecx,%edi
|
||||
roll $22,%ebx
|
||||
movl 48(%esi),%ebp
|
||||
addl %ecx,%ebx
|
||||
|
||||
xorl %edx,%edi
|
||||
andl %ebx,%edi
|
||||
leal 1804603682(%eax,%ebp,1),%eax
|
||||
xorl %edx,%edi
|
||||
addl %edi,%eax
|
||||
movl %ebx,%edi
|
||||
roll $7,%eax
|
||||
movl 52(%esi),%ebp
|
||||
addl %ebx,%eax
|
||||
|
||||
xorl %ecx,%edi
|
||||
andl %eax,%edi
|
||||
leal 4254626195(%edx,%ebp,1),%edx
|
||||
xorl %ecx,%edi
|
||||
addl %edi,%edx
|
||||
movl %eax,%edi
|
||||
roll $12,%edx
|
||||
movl 56(%esi),%ebp
|
||||
addl %eax,%edx
|
||||
|
||||
xorl %ebx,%edi
|
||||
andl %edx,%edi
|
||||
leal 2792965006(%ecx,%ebp,1),%ecx
|
||||
xorl %ebx,%edi
|
||||
addl %edi,%ecx
|
||||
movl %edx,%edi
|
||||
roll $17,%ecx
|
||||
movl 60(%esi),%ebp
|
||||
addl %edx,%ecx
|
||||
|
||||
xorl %eax,%edi
|
||||
andl %ecx,%edi
|
||||
leal 1236535329(%ebx,%ebp,1),%ebx
|
||||
xorl %eax,%edi
|
||||
addl %edi,%ebx
|
||||
movl %ecx,%edi
|
||||
roll $22,%ebx
|
||||
movl 4(%esi),%ebp
|
||||
addl %ecx,%ebx
|
||||
|
||||
|
||||
|
||||
leal 4129170786(%eax,%ebp,1),%eax
|
||||
xorl %ebx,%edi
|
||||
andl %edx,%edi
|
||||
movl 24(%esi),%ebp
|
||||
xorl %ecx,%edi
|
||||
addl %edi,%eax
|
||||
movl %ebx,%edi
|
||||
roll $5,%eax
|
||||
addl %ebx,%eax
|
||||
|
||||
leal 3225465664(%edx,%ebp,1),%edx
|
||||
xorl %eax,%edi
|
||||
andl %ecx,%edi
|
||||
movl 44(%esi),%ebp
|
||||
xorl %ebx,%edi
|
||||
addl %edi,%edx
|
||||
movl %eax,%edi
|
||||
roll $9,%edx
|
||||
addl %eax,%edx
|
||||
|
||||
leal 643717713(%ecx,%ebp,1),%ecx
|
||||
xorl %edx,%edi
|
||||
andl %ebx,%edi
|
||||
movl (%esi),%ebp
|
||||
xorl %eax,%edi
|
||||
addl %edi,%ecx
|
||||
movl %edx,%edi
|
||||
roll $14,%ecx
|
||||
addl %edx,%ecx
|
||||
|
||||
leal 3921069994(%ebx,%ebp,1),%ebx
|
||||
xorl %ecx,%edi
|
||||
andl %eax,%edi
|
||||
movl 20(%esi),%ebp
|
||||
xorl %edx,%edi
|
||||
addl %edi,%ebx
|
||||
movl %ecx,%edi
|
||||
roll $20,%ebx
|
||||
addl %ecx,%ebx
|
||||
|
||||
leal 3593408605(%eax,%ebp,1),%eax
|
||||
xorl %ebx,%edi
|
||||
andl %edx,%edi
|
||||
movl 40(%esi),%ebp
|
||||
xorl %ecx,%edi
|
||||
addl %edi,%eax
|
||||
movl %ebx,%edi
|
||||
roll $5,%eax
|
||||
addl %ebx,%eax
|
||||
|
||||
leal 38016083(%edx,%ebp,1),%edx
|
||||
xorl %eax,%edi
|
||||
andl %ecx,%edi
|
||||
movl 60(%esi),%ebp
|
||||
xorl %ebx,%edi
|
||||
addl %edi,%edx
|
||||
movl %eax,%edi
|
||||
roll $9,%edx
|
||||
addl %eax,%edx
|
||||
|
||||
leal 3634488961(%ecx,%ebp,1),%ecx
|
||||
xorl %edx,%edi
|
||||
andl %ebx,%edi
|
||||
movl 16(%esi),%ebp
|
||||
xorl %eax,%edi
|
||||
addl %edi,%ecx
|
||||
movl %edx,%edi
|
||||
roll $14,%ecx
|
||||
addl %edx,%ecx
|
||||
|
||||
leal 3889429448(%ebx,%ebp,1),%ebx
|
||||
xorl %ecx,%edi
|
||||
andl %eax,%edi
|
||||
movl 36(%esi),%ebp
|
||||
xorl %edx,%edi
|
||||
addl %edi,%ebx
|
||||
movl %ecx,%edi
|
||||
roll $20,%ebx
|
||||
addl %ecx,%ebx
|
||||
|
||||
leal 568446438(%eax,%ebp,1),%eax
|
||||
xorl %ebx,%edi
|
||||
andl %edx,%edi
|
||||
movl 56(%esi),%ebp
|
||||
xorl %ecx,%edi
|
||||
addl %edi,%eax
|
||||
movl %ebx,%edi
|
||||
roll $5,%eax
|
||||
addl %ebx,%eax
|
||||
|
||||
leal 3275163606(%edx,%ebp,1),%edx
|
||||
xorl %eax,%edi
|
||||
andl %ecx,%edi
|
||||
movl 12(%esi),%ebp
|
||||
xorl %ebx,%edi
|
||||
addl %edi,%edx
|
||||
movl %eax,%edi
|
||||
roll $9,%edx
|
||||
addl %eax,%edx
|
||||
|
||||
leal 4107603335(%ecx,%ebp,1),%ecx
|
||||
xorl %edx,%edi
|
||||
andl %ebx,%edi
|
||||
movl 32(%esi),%ebp
|
||||
xorl %eax,%edi
|
||||
addl %edi,%ecx
|
||||
movl %edx,%edi
|
||||
roll $14,%ecx
|
||||
addl %edx,%ecx
|
||||
|
||||
leal 1163531501(%ebx,%ebp,1),%ebx
|
||||
xorl %ecx,%edi
|
||||
andl %eax,%edi
|
||||
movl 52(%esi),%ebp
|
||||
xorl %edx,%edi
|
||||
addl %edi,%ebx
|
||||
movl %ecx,%edi
|
||||
roll $20,%ebx
|
||||
addl %ecx,%ebx
|
||||
|
||||
leal 2850285829(%eax,%ebp,1),%eax
|
||||
xorl %ebx,%edi
|
||||
andl %edx,%edi
|
||||
movl 8(%esi),%ebp
|
||||
xorl %ecx,%edi
|
||||
addl %edi,%eax
|
||||
movl %ebx,%edi
|
||||
roll $5,%eax
|
||||
addl %ebx,%eax
|
||||
|
||||
leal 4243563512(%edx,%ebp,1),%edx
|
||||
xorl %eax,%edi
|
||||
andl %ecx,%edi
|
||||
movl 28(%esi),%ebp
|
||||
xorl %ebx,%edi
|
||||
addl %edi,%edx
|
||||
movl %eax,%edi
|
||||
roll $9,%edx
|
||||
addl %eax,%edx
|
||||
|
||||
leal 1735328473(%ecx,%ebp,1),%ecx
|
||||
xorl %edx,%edi
|
||||
andl %ebx,%edi
|
||||
movl 48(%esi),%ebp
|
||||
xorl %eax,%edi
|
||||
addl %edi,%ecx
|
||||
movl %edx,%edi
|
||||
roll $14,%ecx
|
||||
addl %edx,%ecx
|
||||
|
||||
leal 2368359562(%ebx,%ebp,1),%ebx
|
||||
xorl %ecx,%edi
|
||||
andl %eax,%edi
|
||||
movl 20(%esi),%ebp
|
||||
xorl %edx,%edi
|
||||
addl %edi,%ebx
|
||||
movl %ecx,%edi
|
||||
roll $20,%ebx
|
||||
addl %ecx,%ebx
|
||||
|
||||
|
||||
|
||||
xorl %edx,%edi
|
||||
xorl %ebx,%edi
|
||||
leal 4294588738(%eax,%ebp,1),%eax
|
||||
addl %edi,%eax
|
||||
roll $4,%eax
|
||||
movl 32(%esi),%ebp
|
||||
movl %ebx,%edi
|
||||
|
||||
leal 2272392833(%edx,%ebp,1),%edx
|
||||
addl %ebx,%eax
|
||||
xorl %ecx,%edi
|
||||
xorl %eax,%edi
|
||||
movl 44(%esi),%ebp
|
||||
addl %edi,%edx
|
||||
movl %eax,%edi
|
||||
roll $11,%edx
|
||||
addl %eax,%edx
|
||||
|
||||
xorl %ebx,%edi
|
||||
xorl %edx,%edi
|
||||
leal 1839030562(%ecx,%ebp,1),%ecx
|
||||
addl %edi,%ecx
|
||||
roll $16,%ecx
|
||||
movl 56(%esi),%ebp
|
||||
movl %edx,%edi
|
||||
|
||||
leal 4259657740(%ebx,%ebp,1),%ebx
|
||||
addl %edx,%ecx
|
||||
xorl %eax,%edi
|
||||
xorl %ecx,%edi
|
||||
movl 4(%esi),%ebp
|
||||
addl %edi,%ebx
|
||||
movl %ecx,%edi
|
||||
roll $23,%ebx
|
||||
addl %ecx,%ebx
|
||||
|
||||
xorl %edx,%edi
|
||||
xorl %ebx,%edi
|
||||
leal 2763975236(%eax,%ebp,1),%eax
|
||||
addl %edi,%eax
|
||||
roll $4,%eax
|
||||
movl 16(%esi),%ebp
|
||||
movl %ebx,%edi
|
||||
|
||||
leal 1272893353(%edx,%ebp,1),%edx
|
||||
addl %ebx,%eax
|
||||
xorl %ecx,%edi
|
||||
xorl %eax,%edi
|
||||
movl 28(%esi),%ebp
|
||||
addl %edi,%edx
|
||||
movl %eax,%edi
|
||||
roll $11,%edx
|
||||
addl %eax,%edx
|
||||
|
||||
xorl %ebx,%edi
|
||||
xorl %edx,%edi
|
||||
leal 4139469664(%ecx,%ebp,1),%ecx
|
||||
addl %edi,%ecx
|
||||
roll $16,%ecx
|
||||
movl 40(%esi),%ebp
|
||||
movl %edx,%edi
|
||||
|
||||
leal 3200236656(%ebx,%ebp,1),%ebx
|
||||
addl %edx,%ecx
|
||||
xorl %eax,%edi
|
||||
xorl %ecx,%edi
|
||||
movl 52(%esi),%ebp
|
||||
addl %edi,%ebx
|
||||
movl %ecx,%edi
|
||||
roll $23,%ebx
|
||||
addl %ecx,%ebx
|
||||
|
||||
xorl %edx,%edi
|
||||
xorl %ebx,%edi
|
||||
leal 681279174(%eax,%ebp,1),%eax
|
||||
addl %edi,%eax
|
||||
roll $4,%eax
|
||||
movl (%esi),%ebp
|
||||
movl %ebx,%edi
|
||||
|
||||
leal 3936430074(%edx,%ebp,1),%edx
|
||||
addl %ebx,%eax
|
||||
xorl %ecx,%edi
|
||||
xorl %eax,%edi
|
||||
movl 12(%esi),%ebp
|
||||
addl %edi,%edx
|
||||
movl %eax,%edi
|
||||
roll $11,%edx
|
||||
addl %eax,%edx
|
||||
|
||||
xorl %ebx,%edi
|
||||
xorl %edx,%edi
|
||||
leal 3572445317(%ecx,%ebp,1),%ecx
|
||||
addl %edi,%ecx
|
||||
roll $16,%ecx
|
||||
movl 24(%esi),%ebp
|
||||
movl %edx,%edi
|
||||
|
||||
leal 76029189(%ebx,%ebp,1),%ebx
|
||||
addl %edx,%ecx
|
||||
xorl %eax,%edi
|
||||
xorl %ecx,%edi
|
||||
movl 36(%esi),%ebp
|
||||
addl %edi,%ebx
|
||||
movl %ecx,%edi
|
||||
roll $23,%ebx
|
||||
addl %ecx,%ebx
|
||||
|
||||
xorl %edx,%edi
|
||||
xorl %ebx,%edi
|
||||
leal 3654602809(%eax,%ebp,1),%eax
|
||||
addl %edi,%eax
|
||||
roll $4,%eax
|
||||
movl 48(%esi),%ebp
|
||||
movl %ebx,%edi
|
||||
|
||||
leal 3873151461(%edx,%ebp,1),%edx
|
||||
addl %ebx,%eax
|
||||
xorl %ecx,%edi
|
||||
xorl %eax,%edi
|
||||
movl 60(%esi),%ebp
|
||||
addl %edi,%edx
|
||||
movl %eax,%edi
|
||||
roll $11,%edx
|
||||
addl %eax,%edx
|
||||
|
||||
xorl %ebx,%edi
|
||||
xorl %edx,%edi
|
||||
leal 530742520(%ecx,%ebp,1),%ecx
|
||||
addl %edi,%ecx
|
||||
roll $16,%ecx
|
||||
movl 8(%esi),%ebp
|
||||
movl %edx,%edi
|
||||
|
||||
leal 3299628645(%ebx,%ebp,1),%ebx
|
||||
addl %edx,%ecx
|
||||
xorl %eax,%edi
|
||||
xorl %ecx,%edi
|
||||
movl (%esi),%ebp
|
||||
addl %edi,%ebx
|
||||
movl $-1,%edi
|
||||
roll $23,%ebx
|
||||
addl %ecx,%ebx
|
||||
|
||||
|
||||
|
||||
xorl %edx,%edi
|
||||
orl %ebx,%edi
|
||||
leal 4096336452(%eax,%ebp,1),%eax
|
||||
xorl %ecx,%edi
|
||||
movl 28(%esi),%ebp
|
||||
addl %edi,%eax
|
||||
movl $-1,%edi
|
||||
roll $6,%eax
|
||||
xorl %ecx,%edi
|
||||
addl %ebx,%eax
|
||||
|
||||
orl %eax,%edi
|
||||
leal 1126891415(%edx,%ebp,1),%edx
|
||||
xorl %ebx,%edi
|
||||
movl 56(%esi),%ebp
|
||||
addl %edi,%edx
|
||||
movl $-1,%edi
|
||||
roll $10,%edx
|
||||
xorl %ebx,%edi
|
||||
addl %eax,%edx
|
||||
|
||||
orl %edx,%edi
|
||||
leal 2878612391(%ecx,%ebp,1),%ecx
|
||||
xorl %eax,%edi
|
||||
movl 20(%esi),%ebp
|
||||
addl %edi,%ecx
|
||||
movl $-1,%edi
|
||||
roll $15,%ecx
|
||||
xorl %eax,%edi
|
||||
addl %edx,%ecx
|
||||
|
||||
orl %ecx,%edi
|
||||
leal 4237533241(%ebx,%ebp,1),%ebx
|
||||
xorl %edx,%edi
|
||||
movl 48(%esi),%ebp
|
||||
addl %edi,%ebx
|
||||
movl $-1,%edi
|
||||
roll $21,%ebx
|
||||
xorl %edx,%edi
|
||||
addl %ecx,%ebx
|
||||
|
||||
orl %ebx,%edi
|
||||
leal 1700485571(%eax,%ebp,1),%eax
|
||||
xorl %ecx,%edi
|
||||
movl 12(%esi),%ebp
|
||||
addl %edi,%eax
|
||||
movl $-1,%edi
|
||||
roll $6,%eax
|
||||
xorl %ecx,%edi
|
||||
addl %ebx,%eax
|
||||
|
||||
orl %eax,%edi
|
||||
leal 2399980690(%edx,%ebp,1),%edx
|
||||
xorl %ebx,%edi
|
||||
movl 40(%esi),%ebp
|
||||
addl %edi,%edx
|
||||
movl $-1,%edi
|
||||
roll $10,%edx
|
||||
xorl %ebx,%edi
|
||||
addl %eax,%edx
|
||||
|
||||
orl %edx,%edi
|
||||
leal 4293915773(%ecx,%ebp,1),%ecx
|
||||
xorl %eax,%edi
|
||||
movl 4(%esi),%ebp
|
||||
addl %edi,%ecx
|
||||
movl $-1,%edi
|
||||
roll $15,%ecx
|
||||
xorl %eax,%edi
|
||||
addl %edx,%ecx
|
||||
|
||||
orl %ecx,%edi
|
||||
leal 2240044497(%ebx,%ebp,1),%ebx
|
||||
xorl %edx,%edi
|
||||
movl 32(%esi),%ebp
|
||||
addl %edi,%ebx
|
||||
movl $-1,%edi
|
||||
roll $21,%ebx
|
||||
xorl %edx,%edi
|
||||
addl %ecx,%ebx
|
||||
|
||||
orl %ebx,%edi
|
||||
leal 1873313359(%eax,%ebp,1),%eax
|
||||
xorl %ecx,%edi
|
||||
movl 60(%esi),%ebp
|
||||
addl %edi,%eax
|
||||
movl $-1,%edi
|
||||
roll $6,%eax
|
||||
xorl %ecx,%edi
|
||||
addl %ebx,%eax
|
||||
|
||||
orl %eax,%edi
|
||||
leal 4264355552(%edx,%ebp,1),%edx
|
||||
xorl %ebx,%edi
|
||||
movl 24(%esi),%ebp
|
||||
addl %edi,%edx
|
||||
movl $-1,%edi
|
||||
roll $10,%edx
|
||||
xorl %ebx,%edi
|
||||
addl %eax,%edx
|
||||
|
||||
orl %edx,%edi
|
||||
leal 2734768916(%ecx,%ebp,1),%ecx
|
||||
xorl %eax,%edi
|
||||
movl 52(%esi),%ebp
|
||||
addl %edi,%ecx
|
||||
movl $-1,%edi
|
||||
roll $15,%ecx
|
||||
xorl %eax,%edi
|
||||
addl %edx,%ecx
|
||||
|
||||
orl %ecx,%edi
|
||||
leal 1309151649(%ebx,%ebp,1),%ebx
|
||||
xorl %edx,%edi
|
||||
movl 16(%esi),%ebp
|
||||
addl %edi,%ebx
|
||||
movl $-1,%edi
|
||||
roll $21,%ebx
|
||||
xorl %edx,%edi
|
||||
addl %ecx,%ebx
|
||||
|
||||
orl %ebx,%edi
|
||||
leal 4149444226(%eax,%ebp,1),%eax
|
||||
xorl %ecx,%edi
|
||||
movl 44(%esi),%ebp
|
||||
addl %edi,%eax
|
||||
movl $-1,%edi
|
||||
roll $6,%eax
|
||||
xorl %ecx,%edi
|
||||
addl %ebx,%eax
|
||||
|
||||
orl %eax,%edi
|
||||
leal 3174756917(%edx,%ebp,1),%edx
|
||||
xorl %ebx,%edi
|
||||
movl 8(%esi),%ebp
|
||||
addl %edi,%edx
|
||||
movl $-1,%edi
|
||||
roll $10,%edx
|
||||
xorl %ebx,%edi
|
||||
addl %eax,%edx
|
||||
|
||||
orl %edx,%edi
|
||||
leal 718787259(%ecx,%ebp,1),%ecx
|
||||
xorl %eax,%edi
|
||||
movl 36(%esi),%ebp
|
||||
addl %edi,%ecx
|
||||
movl $-1,%edi
|
||||
roll $15,%ecx
|
||||
xorl %eax,%edi
|
||||
addl %edx,%ecx
|
||||
|
||||
orl %ecx,%edi
|
||||
leal 3951481745(%ebx,%ebp,1),%ebx
|
||||
xorl %edx,%edi
|
||||
movl 24(%esp),%ebp
|
||||
addl %edi,%ebx
|
||||
addl $64,%esi
|
||||
roll $21,%ebx
|
||||
movl (%ebp),%edi
|
||||
addl %ecx,%ebx
|
||||
addl %edi,%eax
|
||||
movl 4(%ebp),%edi
|
||||
addl %edi,%ebx
|
||||
movl 8(%ebp),%edi
|
||||
addl %edi,%ecx
|
||||
movl 12(%ebp),%edi
|
||||
addl %edi,%edx
|
||||
movl %eax,(%ebp)
|
||||
movl %ebx,4(%ebp)
|
||||
movl (%esp),%edi
|
||||
movl %ecx,8(%ebp)
|
||||
movl %edx,12(%ebp)
|
||||
cmpl %esi,%edi
|
||||
jae .L000start
|
||||
popl %eax
|
||||
popl %ebx
|
||||
popl %ebp
|
||||
popl %edi
|
||||
popl %esi
|
||||
ret
|
||||
.size md5_block_asm_data_order,.-.L_md5_block_asm_data_order_begin
|
||||
#endif // !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) && defined(__ELF__)
|
||||
3788
third-party/boringssl/linux-x86/crypto/fipsmodule/sha1-586-linux.S
vendored
Normal file
3788
third-party/boringssl/linux-x86/crypto/fipsmodule/sha1-586-linux.S
vendored
Normal file
File diff suppressed because it is too large
Load Diff
5599
third-party/boringssl/linux-x86/crypto/fipsmodule/sha256-586-linux.S
vendored
Normal file
5599
third-party/boringssl/linux-x86/crypto/fipsmodule/sha256-586-linux.S
vendored
Normal file
File diff suppressed because it is too large
Load Diff
2835
third-party/boringssl/linux-x86/crypto/fipsmodule/sha512-586-linux.S
vendored
Normal file
2835
third-party/boringssl/linux-x86/crypto/fipsmodule/sha512-586-linux.S
vendored
Normal file
File diff suppressed because it is too large
Load Diff
706
third-party/boringssl/linux-x86/crypto/fipsmodule/vpaes-x86-linux.S
vendored
Normal file
706
third-party/boringssl/linux-x86/crypto/fipsmodule/vpaes-x86-linux.S
vendored
Normal file
@ -0,0 +1,706 @@
|
||||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#include <openssl/asm_base.h>
|
||||
|
||||
#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) && defined(__ELF__)
|
||||
.text
|
||||
#ifdef BORINGSSL_DISPATCH_TEST
|
||||
#endif
|
||||
.align 64
|
||||
.L_vpaes_consts:
|
||||
.long 218628480,235210255,168496130,67568393
|
||||
.long 252381056,17041926,33884169,51187212
|
||||
.long 252645135,252645135,252645135,252645135
|
||||
.long 1512730624,3266504856,1377990664,3401244816
|
||||
.long 830229760,1275146365,2969422977,3447763452
|
||||
.long 3411033600,2979783055,338359620,2782886510
|
||||
.long 4209124096,907596821,221174255,1006095553
|
||||
.long 191964160,3799684038,3164090317,1589111125
|
||||
.long 182528256,1777043520,2877432650,3265356744
|
||||
.long 1874708224,3503451415,3305285752,363511674
|
||||
.long 1606117888,3487855781,1093350906,2384367825
|
||||
.long 197121,67569157,134941193,202313229
|
||||
.long 67569157,134941193,202313229,197121
|
||||
.long 134941193,202313229,197121,67569157
|
||||
.long 202313229,197121,67569157,134941193
|
||||
.long 33619971,100992007,168364043,235736079
|
||||
.long 235736079,33619971,100992007,168364043
|
||||
.long 168364043,235736079,33619971,100992007
|
||||
.long 100992007,168364043,235736079,33619971
|
||||
.long 50462976,117835012,185207048,252579084
|
||||
.long 252314880,51251460,117574920,184942860
|
||||
.long 184682752,252054788,50987272,118359308
|
||||
.long 118099200,185467140,251790600,50727180
|
||||
.long 2946363062,528716217,1300004225,1881839624
|
||||
.long 1532713819,1532713819,1532713819,1532713819
|
||||
.long 3602276352,4288629033,3737020424,4153884961
|
||||
.long 1354558464,32357713,2958822624,3775749553
|
||||
.long 1201988352,132424512,1572796698,503232858
|
||||
.long 2213177600,1597421020,4103937655,675398315
|
||||
.long 2749646592,4273543773,1511898873,121693092
|
||||
.long 3040248576,1103263732,2871565598,1608280554
|
||||
.long 2236667136,2588920351,482954393,64377734
|
||||
.long 3069987328,291237287,2117370568,3650299247
|
||||
.long 533321216,3573750986,2572112006,1401264716
|
||||
.long 1339849704,2721158661,548607111,3445553514
|
||||
.long 2128193280,3054596040,2183486460,1257083700
|
||||
.long 655635200,1165381986,3923443150,2344132524
|
||||
.long 190078720,256924420,290342170,357187870
|
||||
.long 1610966272,2263057382,4103205268,309794674
|
||||
.long 2592527872,2233205587,1335446729,3402964816
|
||||
.long 3973531904,3225098121,3002836325,1918774430
|
||||
.long 3870401024,2102906079,2284471353,4117666579
|
||||
.long 617007872,1021508343,366931923,691083277
|
||||
.long 2528395776,3491914898,2968704004,1613121270
|
||||
.long 3445188352,3247741094,844474987,4093578302
|
||||
.long 651481088,1190302358,1689581232,574775300
|
||||
.long 4289380608,206939853,2555985458,2489840491
|
||||
.long 2130264064,327674451,3566485037,3349835193
|
||||
.long 2470714624,316102159,3636825756,3393945945
|
||||
.byte 86,101,99,116,111,114,32,80,101,114,109,117,116,97,116,105
|
||||
.byte 111,110,32,65,69,83,32,102,111,114,32,120,56,54,47,83
|
||||
.byte 83,83,69,51,44,32,77,105,107,101,32,72,97,109,98,117
|
||||
.byte 114,103,32,40,83,116,97,110,102,111,114,100,32,85,110,105
|
||||
.byte 118,101,114,115,105,116,121,41,0
|
||||
.align 64
|
||||
.hidden _vpaes_preheat
|
||||
.type _vpaes_preheat,@function
|
||||
.align 16
|
||||
_vpaes_preheat:
|
||||
addl (%esp),%ebp
|
||||
movdqa -48(%ebp),%xmm7
|
||||
movdqa -16(%ebp),%xmm6
|
||||
ret
|
||||
.size _vpaes_preheat,.-_vpaes_preheat
|
||||
.hidden _vpaes_encrypt_core
|
||||
.type _vpaes_encrypt_core,@function
|
||||
.align 16
|
||||
_vpaes_encrypt_core:
|
||||
movl $16,%ecx
|
||||
movl 240(%edx),%eax
|
||||
movdqa %xmm6,%xmm1
|
||||
movdqa (%ebp),%xmm2
|
||||
pandn %xmm0,%xmm1
|
||||
pand %xmm6,%xmm0
|
||||
movdqu (%edx),%xmm5
|
||||
.byte 102,15,56,0,208
|
||||
movdqa 16(%ebp),%xmm0
|
||||
pxor %xmm5,%xmm2
|
||||
psrld $4,%xmm1
|
||||
addl $16,%edx
|
||||
.byte 102,15,56,0,193
|
||||
leal 192(%ebp),%ebx
|
||||
pxor %xmm2,%xmm0
|
||||
jmp .L000enc_entry
|
||||
.align 16
|
||||
.L001enc_loop:
|
||||
movdqa 32(%ebp),%xmm4
|
||||
movdqa 48(%ebp),%xmm0
|
||||
.byte 102,15,56,0,226
|
||||
.byte 102,15,56,0,195
|
||||
pxor %xmm5,%xmm4
|
||||
movdqa 64(%ebp),%xmm5
|
||||
pxor %xmm4,%xmm0
|
||||
movdqa -64(%ebx,%ecx,1),%xmm1
|
||||
.byte 102,15,56,0,234
|
||||
movdqa 80(%ebp),%xmm2
|
||||
movdqa (%ebx,%ecx,1),%xmm4
|
||||
.byte 102,15,56,0,211
|
||||
movdqa %xmm0,%xmm3
|
||||
pxor %xmm5,%xmm2
|
||||
.byte 102,15,56,0,193
|
||||
addl $16,%edx
|
||||
pxor %xmm2,%xmm0
|
||||
.byte 102,15,56,0,220
|
||||
addl $16,%ecx
|
||||
pxor %xmm0,%xmm3
|
||||
.byte 102,15,56,0,193
|
||||
andl $48,%ecx
|
||||
subl $1,%eax
|
||||
pxor %xmm3,%xmm0
|
||||
.L000enc_entry:
|
||||
movdqa %xmm6,%xmm1
|
||||
movdqa -32(%ebp),%xmm5
|
||||
pandn %xmm0,%xmm1
|
||||
psrld $4,%xmm1
|
||||
pand %xmm6,%xmm0
|
||||
.byte 102,15,56,0,232
|
||||
movdqa %xmm7,%xmm3
|
||||
pxor %xmm1,%xmm0
|
||||
.byte 102,15,56,0,217
|
||||
movdqa %xmm7,%xmm4
|
||||
pxor %xmm5,%xmm3
|
||||
.byte 102,15,56,0,224
|
||||
movdqa %xmm7,%xmm2
|
||||
pxor %xmm5,%xmm4
|
||||
.byte 102,15,56,0,211
|
||||
movdqa %xmm7,%xmm3
|
||||
pxor %xmm0,%xmm2
|
||||
.byte 102,15,56,0,220
|
||||
movdqu (%edx),%xmm5
|
||||
pxor %xmm1,%xmm3
|
||||
jnz .L001enc_loop
|
||||
movdqa 96(%ebp),%xmm4
|
||||
movdqa 112(%ebp),%xmm0
|
||||
.byte 102,15,56,0,226
|
||||
pxor %xmm5,%xmm4
|
||||
.byte 102,15,56,0,195
|
||||
movdqa 64(%ebx,%ecx,1),%xmm1
|
||||
pxor %xmm4,%xmm0
|
||||
.byte 102,15,56,0,193
|
||||
ret
|
||||
.size _vpaes_encrypt_core,.-_vpaes_encrypt_core
|
||||
.hidden _vpaes_decrypt_core
|
||||
.type _vpaes_decrypt_core,@function
|
||||
.align 16
|
||||
_vpaes_decrypt_core:
|
||||
leal 608(%ebp),%ebx
|
||||
movl 240(%edx),%eax
|
||||
movdqa %xmm6,%xmm1
|
||||
movdqa -64(%ebx),%xmm2
|
||||
pandn %xmm0,%xmm1
|
||||
movl %eax,%ecx
|
||||
psrld $4,%xmm1
|
||||
movdqu (%edx),%xmm5
|
||||
shll $4,%ecx
|
||||
pand %xmm6,%xmm0
|
||||
.byte 102,15,56,0,208
|
||||
movdqa -48(%ebx),%xmm0
|
||||
xorl $48,%ecx
|
||||
.byte 102,15,56,0,193
|
||||
andl $48,%ecx
|
||||
pxor %xmm5,%xmm2
|
||||
movdqa 176(%ebp),%xmm5
|
||||
pxor %xmm2,%xmm0
|
||||
addl $16,%edx
|
||||
leal -352(%ebx,%ecx,1),%ecx
|
||||
jmp .L002dec_entry
|
||||
.align 16
|
||||
.L003dec_loop:
|
||||
movdqa -32(%ebx),%xmm4
|
||||
movdqa -16(%ebx),%xmm1
|
||||
.byte 102,15,56,0,226
|
||||
.byte 102,15,56,0,203
|
||||
pxor %xmm4,%xmm0
|
||||
movdqa (%ebx),%xmm4
|
||||
pxor %xmm1,%xmm0
|
||||
movdqa 16(%ebx),%xmm1
|
||||
.byte 102,15,56,0,226
|
||||
.byte 102,15,56,0,197
|
||||
.byte 102,15,56,0,203
|
||||
pxor %xmm4,%xmm0
|
||||
movdqa 32(%ebx),%xmm4
|
||||
pxor %xmm1,%xmm0
|
||||
movdqa 48(%ebx),%xmm1
|
||||
.byte 102,15,56,0,226
|
||||
.byte 102,15,56,0,197
|
||||
.byte 102,15,56,0,203
|
||||
pxor %xmm4,%xmm0
|
||||
movdqa 64(%ebx),%xmm4
|
||||
pxor %xmm1,%xmm0
|
||||
movdqa 80(%ebx),%xmm1
|
||||
.byte 102,15,56,0,226
|
||||
.byte 102,15,56,0,197
|
||||
.byte 102,15,56,0,203
|
||||
pxor %xmm4,%xmm0
|
||||
addl $16,%edx
|
||||
.byte 102,15,58,15,237,12
|
||||
pxor %xmm1,%xmm0
|
||||
subl $1,%eax
|
||||
.L002dec_entry:
|
||||
movdqa %xmm6,%xmm1
|
||||
movdqa -32(%ebp),%xmm2
|
||||
pandn %xmm0,%xmm1
|
||||
pand %xmm6,%xmm0
|
||||
psrld $4,%xmm1
|
||||
.byte 102,15,56,0,208
|
||||
movdqa %xmm7,%xmm3
|
||||
pxor %xmm1,%xmm0
|
||||
.byte 102,15,56,0,217
|
||||
movdqa %xmm7,%xmm4
|
||||
pxor %xmm2,%xmm3
|
||||
.byte 102,15,56,0,224
|
||||
pxor %xmm2,%xmm4
|
||||
movdqa %xmm7,%xmm2
|
||||
.byte 102,15,56,0,211
|
||||
movdqa %xmm7,%xmm3
|
||||
pxor %xmm0,%xmm2
|
||||
.byte 102,15,56,0,220
|
||||
movdqu (%edx),%xmm0
|
||||
pxor %xmm1,%xmm3
|
||||
jnz .L003dec_loop
|
||||
movdqa 96(%ebx),%xmm4
|
||||
.byte 102,15,56,0,226
|
||||
pxor %xmm0,%xmm4
|
||||
movdqa 112(%ebx),%xmm0
|
||||
movdqa (%ecx),%xmm2
|
||||
.byte 102,15,56,0,195
|
||||
pxor %xmm4,%xmm0
|
||||
.byte 102,15,56,0,194
|
||||
ret
|
||||
.size _vpaes_decrypt_core,.-_vpaes_decrypt_core
|
||||
.hidden _vpaes_schedule_core
|
||||
.type _vpaes_schedule_core,@function
|
||||
.align 16
|
||||
_vpaes_schedule_core:
|
||||
addl (%esp),%ebp
|
||||
movdqu (%esi),%xmm0
|
||||
movdqa 320(%ebp),%xmm2
|
||||
movdqa %xmm0,%xmm3
|
||||
leal (%ebp),%ebx
|
||||
movdqa %xmm2,4(%esp)
|
||||
call _vpaes_schedule_transform
|
||||
movdqa %xmm0,%xmm7
|
||||
testl %edi,%edi
|
||||
jnz .L004schedule_am_decrypting
|
||||
movdqu %xmm0,(%edx)
|
||||
jmp .L005schedule_go
|
||||
.L004schedule_am_decrypting:
|
||||
movdqa 256(%ebp,%ecx,1),%xmm1
|
||||
.byte 102,15,56,0,217
|
||||
movdqu %xmm3,(%edx)
|
||||
xorl $48,%ecx
|
||||
.L005schedule_go:
|
||||
cmpl $192,%eax
|
||||
ja .L006schedule_256
|
||||
je .L007schedule_192
|
||||
.L008schedule_128:
|
||||
movl $10,%eax
|
||||
.L009loop_schedule_128:
|
||||
call _vpaes_schedule_round
|
||||
decl %eax
|
||||
jz .L010schedule_mangle_last
|
||||
call _vpaes_schedule_mangle
|
||||
jmp .L009loop_schedule_128
|
||||
.align 16
|
||||
.L007schedule_192:
|
||||
movdqu 8(%esi),%xmm0
|
||||
call _vpaes_schedule_transform
|
||||
movdqa %xmm0,%xmm6
|
||||
pxor %xmm4,%xmm4
|
||||
movhlps %xmm4,%xmm6
|
||||
movl $4,%eax
|
||||
.L011loop_schedule_192:
|
||||
call _vpaes_schedule_round
|
||||
.byte 102,15,58,15,198,8
|
||||
call _vpaes_schedule_mangle
|
||||
call _vpaes_schedule_192_smear
|
||||
call _vpaes_schedule_mangle
|
||||
call _vpaes_schedule_round
|
||||
decl %eax
|
||||
jz .L010schedule_mangle_last
|
||||
call _vpaes_schedule_mangle
|
||||
call _vpaes_schedule_192_smear
|
||||
jmp .L011loop_schedule_192
|
||||
.align 16
|
||||
.L006schedule_256:
|
||||
movdqu 16(%esi),%xmm0
|
||||
call _vpaes_schedule_transform
|
||||
movl $7,%eax
|
||||
.L012loop_schedule_256:
|
||||
call _vpaes_schedule_mangle
|
||||
movdqa %xmm0,%xmm6
|
||||
call _vpaes_schedule_round
|
||||
decl %eax
|
||||
jz .L010schedule_mangle_last
|
||||
call _vpaes_schedule_mangle
|
||||
pshufd $255,%xmm0,%xmm0
|
||||
movdqa %xmm7,20(%esp)
|
||||
movdqa %xmm6,%xmm7
|
||||
call .L_vpaes_schedule_low_round
|
||||
movdqa 20(%esp),%xmm7
|
||||
jmp .L012loop_schedule_256
|
||||
.align 16
|
||||
.L010schedule_mangle_last:
|
||||
leal 384(%ebp),%ebx
|
||||
testl %edi,%edi
|
||||
jnz .L013schedule_mangle_last_dec
|
||||
movdqa 256(%ebp,%ecx,1),%xmm1
|
||||
.byte 102,15,56,0,193
|
||||
leal 352(%ebp),%ebx
|
||||
addl $32,%edx
|
||||
.L013schedule_mangle_last_dec:
|
||||
addl $-16,%edx
|
||||
pxor 336(%ebp),%xmm0
|
||||
call _vpaes_schedule_transform
|
||||
movdqu %xmm0,(%edx)
|
||||
pxor %xmm0,%xmm0
|
||||
pxor %xmm1,%xmm1
|
||||
pxor %xmm2,%xmm2
|
||||
pxor %xmm3,%xmm3
|
||||
pxor %xmm4,%xmm4
|
||||
pxor %xmm5,%xmm5
|
||||
pxor %xmm6,%xmm6
|
||||
pxor %xmm7,%xmm7
|
||||
ret
|
||||
.size _vpaes_schedule_core,.-_vpaes_schedule_core
|
||||
.hidden _vpaes_schedule_192_smear
|
||||
.type _vpaes_schedule_192_smear,@function
|
||||
.align 16
|
||||
_vpaes_schedule_192_smear:
|
||||
pshufd $128,%xmm6,%xmm1
|
||||
pshufd $254,%xmm7,%xmm0
|
||||
pxor %xmm1,%xmm6
|
||||
pxor %xmm1,%xmm1
|
||||
pxor %xmm0,%xmm6
|
||||
movdqa %xmm6,%xmm0
|
||||
movhlps %xmm1,%xmm6
|
||||
ret
|
||||
.size _vpaes_schedule_192_smear,.-_vpaes_schedule_192_smear
|
||||
.hidden _vpaes_schedule_round
|
||||
.type _vpaes_schedule_round,@function
|
||||
.align 16
|
||||
_vpaes_schedule_round:
|
||||
movdqa 8(%esp),%xmm2
|
||||
pxor %xmm1,%xmm1
|
||||
.byte 102,15,58,15,202,15
|
||||
.byte 102,15,58,15,210,15
|
||||
pxor %xmm1,%xmm7
|
||||
pshufd $255,%xmm0,%xmm0
|
||||
.byte 102,15,58,15,192,1
|
||||
movdqa %xmm2,8(%esp)
|
||||
.L_vpaes_schedule_low_round:
|
||||
movdqa %xmm7,%xmm1
|
||||
pslldq $4,%xmm7
|
||||
pxor %xmm1,%xmm7
|
||||
movdqa %xmm7,%xmm1
|
||||
pslldq $8,%xmm7
|
||||
pxor %xmm1,%xmm7
|
||||
pxor 336(%ebp),%xmm7
|
||||
movdqa -16(%ebp),%xmm4
|
||||
movdqa -48(%ebp),%xmm5
|
||||
movdqa %xmm4,%xmm1
|
||||
pandn %xmm0,%xmm1
|
||||
psrld $4,%xmm1
|
||||
pand %xmm4,%xmm0
|
||||
movdqa -32(%ebp),%xmm2
|
||||
.byte 102,15,56,0,208
|
||||
pxor %xmm1,%xmm0
|
||||
movdqa %xmm5,%xmm3
|
||||
.byte 102,15,56,0,217
|
||||
pxor %xmm2,%xmm3
|
||||
movdqa %xmm5,%xmm4
|
||||
.byte 102,15,56,0,224
|
||||
pxor %xmm2,%xmm4
|
||||
movdqa %xmm5,%xmm2
|
||||
.byte 102,15,56,0,211
|
||||
pxor %xmm0,%xmm2
|
||||
movdqa %xmm5,%xmm3
|
||||
.byte 102,15,56,0,220
|
||||
pxor %xmm1,%xmm3
|
||||
movdqa 32(%ebp),%xmm4
|
||||
.byte 102,15,56,0,226
|
||||
movdqa 48(%ebp),%xmm0
|
||||
.byte 102,15,56,0,195
|
||||
pxor %xmm4,%xmm0
|
||||
pxor %xmm7,%xmm0
|
||||
movdqa %xmm0,%xmm7
|
||||
ret
|
||||
.size _vpaes_schedule_round,.-_vpaes_schedule_round
|
||||
.hidden _vpaes_schedule_transform
|
||||
.type _vpaes_schedule_transform,@function
|
||||
.align 16
|
||||
_vpaes_schedule_transform:
|
||||
movdqa -16(%ebp),%xmm2
|
||||
movdqa %xmm2,%xmm1
|
||||
pandn %xmm0,%xmm1
|
||||
psrld $4,%xmm1
|
||||
pand %xmm2,%xmm0
|
||||
movdqa (%ebx),%xmm2
|
||||
.byte 102,15,56,0,208
|
||||
movdqa 16(%ebx),%xmm0
|
||||
.byte 102,15,56,0,193
|
||||
pxor %xmm2,%xmm0
|
||||
ret
|
||||
.size _vpaes_schedule_transform,.-_vpaes_schedule_transform
|
||||
.hidden _vpaes_schedule_mangle
|
||||
.type _vpaes_schedule_mangle,@function
|
||||
.align 16
|
||||
_vpaes_schedule_mangle:
|
||||
movdqa %xmm0,%xmm4
|
||||
movdqa 128(%ebp),%xmm5
|
||||
testl %edi,%edi
|
||||
jnz .L014schedule_mangle_dec
|
||||
addl $16,%edx
|
||||
pxor 336(%ebp),%xmm4
|
||||
.byte 102,15,56,0,229
|
||||
movdqa %xmm4,%xmm3
|
||||
.byte 102,15,56,0,229
|
||||
pxor %xmm4,%xmm3
|
||||
.byte 102,15,56,0,229
|
||||
pxor %xmm4,%xmm3
|
||||
jmp .L015schedule_mangle_both
|
||||
.align 16
|
||||
.L014schedule_mangle_dec:
|
||||
movdqa -16(%ebp),%xmm2
|
||||
leal 416(%ebp),%esi
|
||||
movdqa %xmm2,%xmm1
|
||||
pandn %xmm4,%xmm1
|
||||
psrld $4,%xmm1
|
||||
pand %xmm2,%xmm4
|
||||
movdqa (%esi),%xmm2
|
||||
.byte 102,15,56,0,212
|
||||
movdqa 16(%esi),%xmm3
|
||||
.byte 102,15,56,0,217
|
||||
pxor %xmm2,%xmm3
|
||||
.byte 102,15,56,0,221
|
||||
movdqa 32(%esi),%xmm2
|
||||
.byte 102,15,56,0,212
|
||||
pxor %xmm3,%xmm2
|
||||
movdqa 48(%esi),%xmm3
|
||||
.byte 102,15,56,0,217
|
||||
pxor %xmm2,%xmm3
|
||||
.byte 102,15,56,0,221
|
||||
movdqa 64(%esi),%xmm2
|
||||
.byte 102,15,56,0,212
|
||||
pxor %xmm3,%xmm2
|
||||
movdqa 80(%esi),%xmm3
|
||||
.byte 102,15,56,0,217
|
||||
pxor %xmm2,%xmm3
|
||||
.byte 102,15,56,0,221
|
||||
movdqa 96(%esi),%xmm2
|
||||
.byte 102,15,56,0,212
|
||||
pxor %xmm3,%xmm2
|
||||
movdqa 112(%esi),%xmm3
|
||||
.byte 102,15,56,0,217
|
||||
pxor %xmm2,%xmm3
|
||||
addl $-16,%edx
|
||||
.L015schedule_mangle_both:
|
||||
movdqa 256(%ebp,%ecx,1),%xmm1
|
||||
.byte 102,15,56,0,217
|
||||
addl $-16,%ecx
|
||||
andl $48,%ecx
|
||||
movdqu %xmm3,(%edx)
|
||||
ret
|
||||
.size _vpaes_schedule_mangle,.-_vpaes_schedule_mangle
|
||||
.globl vpaes_set_encrypt_key
|
||||
.hidden vpaes_set_encrypt_key
|
||||
.type vpaes_set_encrypt_key,@function
|
||||
.align 16
|
||||
vpaes_set_encrypt_key:
|
||||
.L_vpaes_set_encrypt_key_begin:
|
||||
pushl %ebp
|
||||
pushl %ebx
|
||||
pushl %esi
|
||||
pushl %edi
|
||||
#ifdef BORINGSSL_DISPATCH_TEST
|
||||
pushl %ebx
|
||||
pushl %edx
|
||||
call .L016pic
|
||||
.L016pic:
|
||||
popl %ebx
|
||||
leal BORINGSSL_function_hit+5-.L016pic(%ebx),%ebx
|
||||
movl $1,%edx
|
||||
movb %dl,(%ebx)
|
||||
popl %edx
|
||||
popl %ebx
|
||||
#endif
|
||||
movl 20(%esp),%esi
|
||||
leal -56(%esp),%ebx
|
||||
movl 24(%esp),%eax
|
||||
andl $-16,%ebx
|
||||
movl 28(%esp),%edx
|
||||
xchgl %esp,%ebx
|
||||
movl %ebx,48(%esp)
|
||||
movl %eax,%ebx
|
||||
shrl $5,%ebx
|
||||
addl $5,%ebx
|
||||
movl %ebx,240(%edx)
|
||||
movl $48,%ecx
|
||||
movl $0,%edi
|
||||
leal .L_vpaes_consts+0x30-.L017pic_point,%ebp
|
||||
call _vpaes_schedule_core
|
||||
.L017pic_point:
|
||||
movl 48(%esp),%esp
|
||||
xorl %eax,%eax
|
||||
popl %edi
|
||||
popl %esi
|
||||
popl %ebx
|
||||
popl %ebp
|
||||
ret
|
||||
.size vpaes_set_encrypt_key,.-.L_vpaes_set_encrypt_key_begin
|
||||
.globl vpaes_set_decrypt_key
|
||||
.hidden vpaes_set_decrypt_key
|
||||
.type vpaes_set_decrypt_key,@function
|
||||
.align 16
|
||||
vpaes_set_decrypt_key:
|
||||
.L_vpaes_set_decrypt_key_begin:
|
||||
pushl %ebp
|
||||
pushl %ebx
|
||||
pushl %esi
|
||||
pushl %edi
|
||||
movl 20(%esp),%esi
|
||||
leal -56(%esp),%ebx
|
||||
movl 24(%esp),%eax
|
||||
andl $-16,%ebx
|
||||
movl 28(%esp),%edx
|
||||
xchgl %esp,%ebx
|
||||
movl %ebx,48(%esp)
|
||||
movl %eax,%ebx
|
||||
shrl $5,%ebx
|
||||
addl $5,%ebx
|
||||
movl %ebx,240(%edx)
|
||||
shll $4,%ebx
|
||||
leal 16(%edx,%ebx,1),%edx
|
||||
movl $1,%edi
|
||||
movl %eax,%ecx
|
||||
shrl $1,%ecx
|
||||
andl $32,%ecx
|
||||
xorl $32,%ecx
|
||||
leal .L_vpaes_consts+0x30-.L018pic_point,%ebp
|
||||
call _vpaes_schedule_core
|
||||
.L018pic_point:
|
||||
movl 48(%esp),%esp
|
||||
xorl %eax,%eax
|
||||
popl %edi
|
||||
popl %esi
|
||||
popl %ebx
|
||||
popl %ebp
|
||||
ret
|
||||
.size vpaes_set_decrypt_key,.-.L_vpaes_set_decrypt_key_begin
|
||||
.globl vpaes_encrypt
|
||||
.hidden vpaes_encrypt
|
||||
.type vpaes_encrypt,@function
|
||||
.align 16
|
||||
vpaes_encrypt:
|
||||
.L_vpaes_encrypt_begin:
|
||||
pushl %ebp
|
||||
pushl %ebx
|
||||
pushl %esi
|
||||
pushl %edi
|
||||
#ifdef BORINGSSL_DISPATCH_TEST
|
||||
pushl %ebx
|
||||
pushl %edx
|
||||
call .L019pic
|
||||
.L019pic:
|
||||
popl %ebx
|
||||
leal BORINGSSL_function_hit+4-.L019pic(%ebx),%ebx
|
||||
movl $1,%edx
|
||||
movb %dl,(%ebx)
|
||||
popl %edx
|
||||
popl %ebx
|
||||
#endif
|
||||
leal .L_vpaes_consts+0x30-.L020pic_point,%ebp
|
||||
call _vpaes_preheat
|
||||
.L020pic_point:
|
||||
movl 20(%esp),%esi
|
||||
leal -56(%esp),%ebx
|
||||
movl 24(%esp),%edi
|
||||
andl $-16,%ebx
|
||||
movl 28(%esp),%edx
|
||||
xchgl %esp,%ebx
|
||||
movl %ebx,48(%esp)
|
||||
movdqu (%esi),%xmm0
|
||||
call _vpaes_encrypt_core
|
||||
movdqu %xmm0,(%edi)
|
||||
movl 48(%esp),%esp
|
||||
popl %edi
|
||||
popl %esi
|
||||
popl %ebx
|
||||
popl %ebp
|
||||
ret
|
||||
.size vpaes_encrypt,.-.L_vpaes_encrypt_begin
|
||||
.globl vpaes_decrypt
|
||||
.hidden vpaes_decrypt
|
||||
.type vpaes_decrypt,@function
|
||||
.align 16
|
||||
vpaes_decrypt:
|
||||
.L_vpaes_decrypt_begin:
|
||||
pushl %ebp
|
||||
pushl %ebx
|
||||
pushl %esi
|
||||
pushl %edi
|
||||
leal .L_vpaes_consts+0x30-.L021pic_point,%ebp
|
||||
call _vpaes_preheat
|
||||
.L021pic_point:
|
||||
movl 20(%esp),%esi
|
||||
leal -56(%esp),%ebx
|
||||
movl 24(%esp),%edi
|
||||
andl $-16,%ebx
|
||||
movl 28(%esp),%edx
|
||||
xchgl %esp,%ebx
|
||||
movl %ebx,48(%esp)
|
||||
movdqu (%esi),%xmm0
|
||||
call _vpaes_decrypt_core
|
||||
movdqu %xmm0,(%edi)
|
||||
movl 48(%esp),%esp
|
||||
popl %edi
|
||||
popl %esi
|
||||
popl %ebx
|
||||
popl %ebp
|
||||
ret
|
||||
.size vpaes_decrypt,.-.L_vpaes_decrypt_begin
|
||||
.globl vpaes_cbc_encrypt
|
||||
.hidden vpaes_cbc_encrypt
|
||||
.type vpaes_cbc_encrypt,@function
|
||||
.align 16
|
||||
vpaes_cbc_encrypt:
|
||||
.L_vpaes_cbc_encrypt_begin:
|
||||
pushl %ebp
|
||||
pushl %ebx
|
||||
pushl %esi
|
||||
pushl %edi
|
||||
movl 20(%esp),%esi
|
||||
movl 24(%esp),%edi
|
||||
movl 28(%esp),%eax
|
||||
movl 32(%esp),%edx
|
||||
subl $16,%eax
|
||||
jc .L022cbc_abort
|
||||
leal -56(%esp),%ebx
|
||||
movl 36(%esp),%ebp
|
||||
andl $-16,%ebx
|
||||
movl 40(%esp),%ecx
|
||||
xchgl %esp,%ebx
|
||||
movdqu (%ebp),%xmm1
|
||||
subl %esi,%edi
|
||||
movl %ebx,48(%esp)
|
||||
movl %edi,(%esp)
|
||||
movl %edx,4(%esp)
|
||||
movl %ebp,8(%esp)
|
||||
movl %eax,%edi
|
||||
leal .L_vpaes_consts+0x30-.L023pic_point,%ebp
|
||||
call _vpaes_preheat
|
||||
.L023pic_point:
|
||||
cmpl $0,%ecx
|
||||
je .L024cbc_dec_loop
|
||||
jmp .L025cbc_enc_loop
|
||||
.align 16
|
||||
.L025cbc_enc_loop:
|
||||
movdqu (%esi),%xmm0
|
||||
pxor %xmm1,%xmm0
|
||||
call _vpaes_encrypt_core
|
||||
movl (%esp),%ebx
|
||||
movl 4(%esp),%edx
|
||||
movdqa %xmm0,%xmm1
|
||||
movdqu %xmm0,(%ebx,%esi,1)
|
||||
leal 16(%esi),%esi
|
||||
subl $16,%edi
|
||||
jnc .L025cbc_enc_loop
|
||||
jmp .L026cbc_done
|
||||
.align 16
|
||||
.L024cbc_dec_loop:
|
||||
movdqu (%esi),%xmm0
|
||||
movdqa %xmm1,16(%esp)
|
||||
movdqa %xmm0,32(%esp)
|
||||
call _vpaes_decrypt_core
|
||||
movl (%esp),%ebx
|
||||
movl 4(%esp),%edx
|
||||
pxor 16(%esp),%xmm0
|
||||
movdqa 32(%esp),%xmm1
|
||||
movdqu %xmm0,(%ebx,%esi,1)
|
||||
leal 16(%esi),%esi
|
||||
subl $16,%edi
|
||||
jnc .L024cbc_dec_loop
|
||||
.L026cbc_done:
|
||||
movl 8(%esp),%ebx
|
||||
movl 48(%esp),%esp
|
||||
movdqu %xmm1,(%ebx)
|
||||
.L022cbc_abort:
|
||||
popl %edi
|
||||
popl %esi
|
||||
popl %ebx
|
||||
popl %ebp
|
||||
ret
|
||||
.size vpaes_cbc_encrypt,.-.L_vpaes_cbc_encrypt_begin
|
||||
#endif // !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) && defined(__ELF__)
|
||||
482
third-party/boringssl/linux-x86/crypto/fipsmodule/x86-mont-linux.S
vendored
Normal file
482
third-party/boringssl/linux-x86/crypto/fipsmodule/x86-mont-linux.S
vendored
Normal file
@ -0,0 +1,482 @@
|
||||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#include <openssl/asm_base.h>
|
||||
|
||||
#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) && defined(__ELF__)
|
||||
.text
|
||||
.globl bn_mul_mont
|
||||
.hidden bn_mul_mont
|
||||
.type bn_mul_mont,@function
|
||||
.align 16
|
||||
bn_mul_mont:
|
||||
.L_bn_mul_mont_begin:
|
||||
pushl %ebp
|
||||
pushl %ebx
|
||||
pushl %esi
|
||||
pushl %edi
|
||||
xorl %eax,%eax
|
||||
movl 40(%esp),%edi
|
||||
cmpl $4,%edi
|
||||
jl .L000just_leave
|
||||
leal 20(%esp),%esi
|
||||
leal 24(%esp),%edx
|
||||
addl $2,%edi
|
||||
negl %edi
|
||||
leal -32(%esp,%edi,4),%ebp
|
||||
negl %edi
|
||||
movl %ebp,%eax
|
||||
subl %edx,%eax
|
||||
andl $2047,%eax
|
||||
subl %eax,%ebp
|
||||
xorl %ebp,%edx
|
||||
andl $2048,%edx
|
||||
xorl $2048,%edx
|
||||
subl %edx,%ebp
|
||||
andl $-64,%ebp
|
||||
movl %esp,%eax
|
||||
subl %ebp,%eax
|
||||
andl $-4096,%eax
|
||||
movl %esp,%edx
|
||||
leal (%ebp,%eax,1),%esp
|
||||
movl (%esp),%eax
|
||||
cmpl %ebp,%esp
|
||||
ja .L001page_walk
|
||||
jmp .L002page_walk_done
|
||||
.align 16
|
||||
.L001page_walk:
|
||||
leal -4096(%esp),%esp
|
||||
movl (%esp),%eax
|
||||
cmpl %ebp,%esp
|
||||
ja .L001page_walk
|
||||
.L002page_walk_done:
|
||||
movl (%esi),%eax
|
||||
movl 4(%esi),%ebx
|
||||
movl 8(%esi),%ecx
|
||||
movl 12(%esi),%ebp
|
||||
movl 16(%esi),%esi
|
||||
movl (%esi),%esi
|
||||
movl %eax,4(%esp)
|
||||
movl %ebx,8(%esp)
|
||||
movl %ecx,12(%esp)
|
||||
movl %ebp,16(%esp)
|
||||
movl %esi,20(%esp)
|
||||
leal -3(%edi),%ebx
|
||||
movl %edx,24(%esp)
|
||||
call .L003PIC_me_up
|
||||
.L003PIC_me_up:
|
||||
popl %eax
|
||||
leal OPENSSL_ia32cap_P-.L003PIC_me_up(%eax),%eax
|
||||
btl $26,(%eax)
|
||||
jnc .L004non_sse2
|
||||
movl $-1,%eax
|
||||
movd %eax,%mm7
|
||||
movl 8(%esp),%esi
|
||||
movl 12(%esp),%edi
|
||||
movl 16(%esp),%ebp
|
||||
xorl %edx,%edx
|
||||
xorl %ecx,%ecx
|
||||
movd (%edi),%mm4
|
||||
movd (%esi),%mm5
|
||||
movd (%ebp),%mm3
|
||||
pmuludq %mm4,%mm5
|
||||
movq %mm5,%mm2
|
||||
movq %mm5,%mm0
|
||||
pand %mm7,%mm0
|
||||
pmuludq 20(%esp),%mm5
|
||||
pmuludq %mm5,%mm3
|
||||
paddq %mm0,%mm3
|
||||
movd 4(%ebp),%mm1
|
||||
movd 4(%esi),%mm0
|
||||
psrlq $32,%mm2
|
||||
psrlq $32,%mm3
|
||||
incl %ecx
|
||||
.align 16
|
||||
.L0051st:
|
||||
pmuludq %mm4,%mm0
|
||||
pmuludq %mm5,%mm1
|
||||
paddq %mm0,%mm2
|
||||
paddq %mm1,%mm3
|
||||
movq %mm2,%mm0
|
||||
pand %mm7,%mm0
|
||||
movd 4(%ebp,%ecx,4),%mm1
|
||||
paddq %mm0,%mm3
|
||||
movd 4(%esi,%ecx,4),%mm0
|
||||
psrlq $32,%mm2
|
||||
movd %mm3,28(%esp,%ecx,4)
|
||||
psrlq $32,%mm3
|
||||
leal 1(%ecx),%ecx
|
||||
cmpl %ebx,%ecx
|
||||
jl .L0051st
|
||||
pmuludq %mm4,%mm0
|
||||
pmuludq %mm5,%mm1
|
||||
paddq %mm0,%mm2
|
||||
paddq %mm1,%mm3
|
||||
movq %mm2,%mm0
|
||||
pand %mm7,%mm0
|
||||
paddq %mm0,%mm3
|
||||
movd %mm3,28(%esp,%ecx,4)
|
||||
psrlq $32,%mm2
|
||||
psrlq $32,%mm3
|
||||
paddq %mm2,%mm3
|
||||
movq %mm3,32(%esp,%ebx,4)
|
||||
incl %edx
|
||||
.L006outer:
|
||||
xorl %ecx,%ecx
|
||||
movd (%edi,%edx,4),%mm4
|
||||
movd (%esi),%mm5
|
||||
movd 32(%esp),%mm6
|
||||
movd (%ebp),%mm3
|
||||
pmuludq %mm4,%mm5
|
||||
paddq %mm6,%mm5
|
||||
movq %mm5,%mm0
|
||||
movq %mm5,%mm2
|
||||
pand %mm7,%mm0
|
||||
pmuludq 20(%esp),%mm5
|
||||
pmuludq %mm5,%mm3
|
||||
paddq %mm0,%mm3
|
||||
movd 36(%esp),%mm6
|
||||
movd 4(%ebp),%mm1
|
||||
movd 4(%esi),%mm0
|
||||
psrlq $32,%mm2
|
||||
psrlq $32,%mm3
|
||||
paddq %mm6,%mm2
|
||||
incl %ecx
|
||||
decl %ebx
|
||||
.L007inner:
|
||||
pmuludq %mm4,%mm0
|
||||
pmuludq %mm5,%mm1
|
||||
paddq %mm0,%mm2
|
||||
paddq %mm1,%mm3
|
||||
movq %mm2,%mm0
|
||||
movd 36(%esp,%ecx,4),%mm6
|
||||
pand %mm7,%mm0
|
||||
movd 4(%ebp,%ecx,4),%mm1
|
||||
paddq %mm0,%mm3
|
||||
movd 4(%esi,%ecx,4),%mm0
|
||||
psrlq $32,%mm2
|
||||
movd %mm3,28(%esp,%ecx,4)
|
||||
psrlq $32,%mm3
|
||||
paddq %mm6,%mm2
|
||||
decl %ebx
|
||||
leal 1(%ecx),%ecx
|
||||
jnz .L007inner
|
||||
movl %ecx,%ebx
|
||||
pmuludq %mm4,%mm0
|
||||
pmuludq %mm5,%mm1
|
||||
paddq %mm0,%mm2
|
||||
paddq %mm1,%mm3
|
||||
movq %mm2,%mm0
|
||||
pand %mm7,%mm0
|
||||
paddq %mm0,%mm3
|
||||
movd %mm3,28(%esp,%ecx,4)
|
||||
psrlq $32,%mm2
|
||||
psrlq $32,%mm3
|
||||
movd 36(%esp,%ebx,4),%mm6
|
||||
paddq %mm2,%mm3
|
||||
paddq %mm6,%mm3
|
||||
movq %mm3,32(%esp,%ebx,4)
|
||||
leal 1(%edx),%edx
|
||||
cmpl %ebx,%edx
|
||||
jle .L006outer
|
||||
emms
|
||||
jmp .L008common_tail
|
||||
.align 16
|
||||
.L004non_sse2:
|
||||
movl 8(%esp),%esi
|
||||
leal 1(%ebx),%ebp
|
||||
movl 12(%esp),%edi
|
||||
xorl %ecx,%ecx
|
||||
movl %esi,%edx
|
||||
andl $1,%ebp
|
||||
subl %edi,%edx
|
||||
leal 4(%edi,%ebx,4),%eax
|
||||
orl %edx,%ebp
|
||||
movl (%edi),%edi
|
||||
jz .L009bn_sqr_mont
|
||||
movl %eax,28(%esp)
|
||||
movl (%esi),%eax
|
||||
xorl %edx,%edx
|
||||
.align 16
|
||||
.L010mull:
|
||||
movl %edx,%ebp
|
||||
mull %edi
|
||||
addl %eax,%ebp
|
||||
leal 1(%ecx),%ecx
|
||||
adcl $0,%edx
|
||||
movl (%esi,%ecx,4),%eax
|
||||
cmpl %ebx,%ecx
|
||||
movl %ebp,28(%esp,%ecx,4)
|
||||
jl .L010mull
|
||||
movl %edx,%ebp
|
||||
mull %edi
|
||||
movl 20(%esp),%edi
|
||||
addl %ebp,%eax
|
||||
movl 16(%esp),%esi
|
||||
adcl $0,%edx
|
||||
imull 32(%esp),%edi
|
||||
movl %eax,32(%esp,%ebx,4)
|
||||
xorl %ecx,%ecx
|
||||
movl %edx,36(%esp,%ebx,4)
|
||||
movl %ecx,40(%esp,%ebx,4)
|
||||
movl (%esi),%eax
|
||||
mull %edi
|
||||
addl 32(%esp),%eax
|
||||
movl 4(%esi),%eax
|
||||
adcl $0,%edx
|
||||
incl %ecx
|
||||
jmp .L0112ndmadd
|
||||
.align 16
|
||||
.L0121stmadd:
|
||||
movl %edx,%ebp
|
||||
mull %edi
|
||||
addl 32(%esp,%ecx,4),%ebp
|
||||
leal 1(%ecx),%ecx
|
||||
adcl $0,%edx
|
||||
addl %eax,%ebp
|
||||
movl (%esi,%ecx,4),%eax
|
||||
adcl $0,%edx
|
||||
cmpl %ebx,%ecx
|
||||
movl %ebp,28(%esp,%ecx,4)
|
||||
jl .L0121stmadd
|
||||
movl %edx,%ebp
|
||||
mull %edi
|
||||
addl 32(%esp,%ebx,4),%eax
|
||||
movl 20(%esp),%edi
|
||||
adcl $0,%edx
|
||||
movl 16(%esp),%esi
|
||||
addl %eax,%ebp
|
||||
adcl $0,%edx
|
||||
imull 32(%esp),%edi
|
||||
xorl %ecx,%ecx
|
||||
addl 36(%esp,%ebx,4),%edx
|
||||
movl %ebp,32(%esp,%ebx,4)
|
||||
adcl $0,%ecx
|
||||
movl (%esi),%eax
|
||||
movl %edx,36(%esp,%ebx,4)
|
||||
movl %ecx,40(%esp,%ebx,4)
|
||||
mull %edi
|
||||
addl 32(%esp),%eax
|
||||
movl 4(%esi),%eax
|
||||
adcl $0,%edx
|
||||
movl $1,%ecx
|
||||
.align 16
|
||||
.L0112ndmadd:
|
||||
movl %edx,%ebp
|
||||
mull %edi
|
||||
addl 32(%esp,%ecx,4),%ebp
|
||||
leal 1(%ecx),%ecx
|
||||
adcl $0,%edx
|
||||
addl %eax,%ebp
|
||||
movl (%esi,%ecx,4),%eax
|
||||
adcl $0,%edx
|
||||
cmpl %ebx,%ecx
|
||||
movl %ebp,24(%esp,%ecx,4)
|
||||
jl .L0112ndmadd
|
||||
movl %edx,%ebp
|
||||
mull %edi
|
||||
addl 32(%esp,%ebx,4),%ebp
|
||||
adcl $0,%edx
|
||||
addl %eax,%ebp
|
||||
adcl $0,%edx
|
||||
movl %ebp,28(%esp,%ebx,4)
|
||||
xorl %eax,%eax
|
||||
movl 12(%esp),%ecx
|
||||
addl 36(%esp,%ebx,4),%edx
|
||||
adcl 40(%esp,%ebx,4),%eax
|
||||
leal 4(%ecx),%ecx
|
||||
movl %edx,32(%esp,%ebx,4)
|
||||
cmpl 28(%esp),%ecx
|
||||
movl %eax,36(%esp,%ebx,4)
|
||||
je .L008common_tail
|
||||
movl (%ecx),%edi
|
||||
movl 8(%esp),%esi
|
||||
movl %ecx,12(%esp)
|
||||
xorl %ecx,%ecx
|
||||
xorl %edx,%edx
|
||||
movl (%esi),%eax
|
||||
jmp .L0121stmadd
|
||||
.align 16
|
||||
.L009bn_sqr_mont:
|
||||
movl %ebx,(%esp)
|
||||
movl %ecx,12(%esp)
|
||||
movl %edi,%eax
|
||||
mull %edi
|
||||
movl %eax,32(%esp)
|
||||
movl %edx,%ebx
|
||||
shrl $1,%edx
|
||||
andl $1,%ebx
|
||||
incl %ecx
|
||||
.align 16
|
||||
.L013sqr:
|
||||
movl (%esi,%ecx,4),%eax
|
||||
movl %edx,%ebp
|
||||
mull %edi
|
||||
addl %ebp,%eax
|
||||
leal 1(%ecx),%ecx
|
||||
adcl $0,%edx
|
||||
leal (%ebx,%eax,2),%ebp
|
||||
shrl $31,%eax
|
||||
cmpl (%esp),%ecx
|
||||
movl %eax,%ebx
|
||||
movl %ebp,28(%esp,%ecx,4)
|
||||
jl .L013sqr
|
||||
movl (%esi,%ecx,4),%eax
|
||||
movl %edx,%ebp
|
||||
mull %edi
|
||||
addl %ebp,%eax
|
||||
movl 20(%esp),%edi
|
||||
adcl $0,%edx
|
||||
movl 16(%esp),%esi
|
||||
leal (%ebx,%eax,2),%ebp
|
||||
imull 32(%esp),%edi
|
||||
shrl $31,%eax
|
||||
movl %ebp,32(%esp,%ecx,4)
|
||||
leal (%eax,%edx,2),%ebp
|
||||
movl (%esi),%eax
|
||||
shrl $31,%edx
|
||||
movl %ebp,36(%esp,%ecx,4)
|
||||
movl %edx,40(%esp,%ecx,4)
|
||||
mull %edi
|
||||
addl 32(%esp),%eax
|
||||
movl %ecx,%ebx
|
||||
adcl $0,%edx
|
||||
movl 4(%esi),%eax
|
||||
movl $1,%ecx
|
||||
.align 16
|
||||
.L0143rdmadd:
|
||||
movl %edx,%ebp
|
||||
mull %edi
|
||||
addl 32(%esp,%ecx,4),%ebp
|
||||
adcl $0,%edx
|
||||
addl %eax,%ebp
|
||||
movl 4(%esi,%ecx,4),%eax
|
||||
adcl $0,%edx
|
||||
movl %ebp,28(%esp,%ecx,4)
|
||||
movl %edx,%ebp
|
||||
mull %edi
|
||||
addl 36(%esp,%ecx,4),%ebp
|
||||
leal 2(%ecx),%ecx
|
||||
adcl $0,%edx
|
||||
addl %eax,%ebp
|
||||
movl (%esi,%ecx,4),%eax
|
||||
adcl $0,%edx
|
||||
cmpl %ebx,%ecx
|
||||
movl %ebp,24(%esp,%ecx,4)
|
||||
jl .L0143rdmadd
|
||||
movl %edx,%ebp
|
||||
mull %edi
|
||||
addl 32(%esp,%ebx,4),%ebp
|
||||
adcl $0,%edx
|
||||
addl %eax,%ebp
|
||||
adcl $0,%edx
|
||||
movl %ebp,28(%esp,%ebx,4)
|
||||
movl 12(%esp),%ecx
|
||||
xorl %eax,%eax
|
||||
movl 8(%esp),%esi
|
||||
addl 36(%esp,%ebx,4),%edx
|
||||
adcl 40(%esp,%ebx,4),%eax
|
||||
movl %edx,32(%esp,%ebx,4)
|
||||
cmpl %ebx,%ecx
|
||||
movl %eax,36(%esp,%ebx,4)
|
||||
je .L008common_tail
|
||||
movl 4(%esi,%ecx,4),%edi
|
||||
leal 1(%ecx),%ecx
|
||||
movl %edi,%eax
|
||||
movl %ecx,12(%esp)
|
||||
mull %edi
|
||||
addl 32(%esp,%ecx,4),%eax
|
||||
adcl $0,%edx
|
||||
movl %eax,32(%esp,%ecx,4)
|
||||
xorl %ebp,%ebp
|
||||
cmpl %ebx,%ecx
|
||||
leal 1(%ecx),%ecx
|
||||
je .L015sqrlast
|
||||
movl %edx,%ebx
|
||||
shrl $1,%edx
|
||||
andl $1,%ebx
|
||||
.align 16
|
||||
.L016sqradd:
|
||||
movl (%esi,%ecx,4),%eax
|
||||
movl %edx,%ebp
|
||||
mull %edi
|
||||
addl %ebp,%eax
|
||||
leal (%eax,%eax,1),%ebp
|
||||
adcl $0,%edx
|
||||
shrl $31,%eax
|
||||
addl 32(%esp,%ecx,4),%ebp
|
||||
leal 1(%ecx),%ecx
|
||||
adcl $0,%eax
|
||||
addl %ebx,%ebp
|
||||
adcl $0,%eax
|
||||
cmpl (%esp),%ecx
|
||||
movl %ebp,28(%esp,%ecx,4)
|
||||
movl %eax,%ebx
|
||||
jle .L016sqradd
|
||||
movl %edx,%ebp
|
||||
addl %edx,%edx
|
||||
shrl $31,%ebp
|
||||
addl %ebx,%edx
|
||||
adcl $0,%ebp
|
||||
.L015sqrlast:
|
||||
movl 20(%esp),%edi
|
||||
movl 16(%esp),%esi
|
||||
imull 32(%esp),%edi
|
||||
addl 32(%esp,%ecx,4),%edx
|
||||
movl (%esi),%eax
|
||||
adcl $0,%ebp
|
||||
movl %edx,32(%esp,%ecx,4)
|
||||
movl %ebp,36(%esp,%ecx,4)
|
||||
mull %edi
|
||||
addl 32(%esp),%eax
|
||||
leal -1(%ecx),%ebx
|
||||
adcl $0,%edx
|
||||
movl $1,%ecx
|
||||
movl 4(%esi),%eax
|
||||
jmp .L0143rdmadd
|
||||
.align 16
|
||||
.L008common_tail:
|
||||
movl 16(%esp),%ebp
|
||||
movl 4(%esp),%edi
|
||||
leal 32(%esp),%esi
|
||||
movl (%esi),%eax
|
||||
movl %ebx,%ecx
|
||||
xorl %edx,%edx
|
||||
.align 16
|
||||
.L017sub:
|
||||
sbbl (%ebp,%edx,4),%eax
|
||||
movl %eax,(%edi,%edx,4)
|
||||
decl %ecx
|
||||
movl 4(%esi,%edx,4),%eax
|
||||
leal 1(%edx),%edx
|
||||
jge .L017sub
|
||||
sbbl $0,%eax
|
||||
movl $-1,%edx
|
||||
xorl %eax,%edx
|
||||
jmp .L018copy
|
||||
.align 16
|
||||
.L018copy:
|
||||
movl 32(%esp,%ebx,4),%esi
|
||||
movl (%edi,%ebx,4),%ebp
|
||||
movl %ecx,32(%esp,%ebx,4)
|
||||
andl %eax,%esi
|
||||
andl %edx,%ebp
|
||||
orl %esi,%ebp
|
||||
movl %ebp,(%edi,%ebx,4)
|
||||
decl %ebx
|
||||
jge .L018copy
|
||||
movl 24(%esp),%esp
|
||||
movl $1,%eax
|
||||
.L000just_leave:
|
||||
popl %edi
|
||||
popl %esi
|
||||
popl %ebx
|
||||
popl %ebp
|
||||
ret
|
||||
.size bn_mul_mont,.-.L_bn_mul_mont_begin
|
||||
.byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105
|
||||
.byte 112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56
|
||||
.byte 54,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121
|
||||
.byte 32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46
|
||||
.byte 111,114,103,62,0
|
||||
#endif // !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) && defined(__ELF__)
|
||||
204
third-party/boringssl/linux-x86/crypto/test/trampoline-x86-linux.S
vendored
Normal file
204
third-party/boringssl/linux-x86/crypto/test/trampoline-x86-linux.S
vendored
Normal file
@ -0,0 +1,204 @@
|
||||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#include <openssl/asm_base.h>
|
||||
|
||||
#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) && defined(__ELF__)
|
||||
.text
|
||||
.globl abi_test_trampoline
|
||||
.hidden abi_test_trampoline
|
||||
.type abi_test_trampoline,@function
|
||||
.align 16
|
||||
abi_test_trampoline:
|
||||
.L_abi_test_trampoline_begin:
|
||||
pushl %ebp
|
||||
pushl %ebx
|
||||
pushl %esi
|
||||
pushl %edi
|
||||
movl 24(%esp),%ecx
|
||||
movl (%ecx),%esi
|
||||
movl 4(%ecx),%edi
|
||||
movl 8(%ecx),%ebx
|
||||
movl 12(%ecx),%ebp
|
||||
subl $44,%esp
|
||||
movl 72(%esp),%eax
|
||||
xorl %ecx,%ecx
|
||||
.L000loop:
|
||||
cmpl 76(%esp),%ecx
|
||||
jae .L001loop_done
|
||||
movl (%eax,%ecx,4),%edx
|
||||
movl %edx,(%esp,%ecx,4)
|
||||
addl $1,%ecx
|
||||
jmp .L000loop
|
||||
.L001loop_done:
|
||||
call *64(%esp)
|
||||
addl $44,%esp
|
||||
movl 24(%esp),%ecx
|
||||
movl %esi,(%ecx)
|
||||
movl %edi,4(%ecx)
|
||||
movl %ebx,8(%ecx)
|
||||
movl %ebp,12(%ecx)
|
||||
popl %edi
|
||||
popl %esi
|
||||
popl %ebx
|
||||
popl %ebp
|
||||
ret
|
||||
.size abi_test_trampoline,.-.L_abi_test_trampoline_begin
|
||||
.globl abi_test_get_and_clear_direction_flag
|
||||
.hidden abi_test_get_and_clear_direction_flag
|
||||
.type abi_test_get_and_clear_direction_flag,@function
|
||||
.align 16
|
||||
abi_test_get_and_clear_direction_flag:
|
||||
.L_abi_test_get_and_clear_direction_flag_begin:
|
||||
pushfl
|
||||
popl %eax
|
||||
andl $1024,%eax
|
||||
shrl $10,%eax
|
||||
cld
|
||||
ret
|
||||
.size abi_test_get_and_clear_direction_flag,.-.L_abi_test_get_and_clear_direction_flag_begin
|
||||
.globl abi_test_set_direction_flag
|
||||
.hidden abi_test_set_direction_flag
|
||||
.type abi_test_set_direction_flag,@function
|
||||
.align 16
|
||||
abi_test_set_direction_flag:
|
||||
.L_abi_test_set_direction_flag_begin:
|
||||
std
|
||||
ret
|
||||
.size abi_test_set_direction_flag,.-.L_abi_test_set_direction_flag_begin
|
||||
.globl abi_test_clobber_eax
|
||||
.hidden abi_test_clobber_eax
|
||||
.type abi_test_clobber_eax,@function
|
||||
.align 16
|
||||
abi_test_clobber_eax:
|
||||
.L_abi_test_clobber_eax_begin:
|
||||
xorl %eax,%eax
|
||||
ret
|
||||
.size abi_test_clobber_eax,.-.L_abi_test_clobber_eax_begin
|
||||
.globl abi_test_clobber_ebx
|
||||
.hidden abi_test_clobber_ebx
|
||||
.type abi_test_clobber_ebx,@function
|
||||
.align 16
|
||||
abi_test_clobber_ebx:
|
||||
.L_abi_test_clobber_ebx_begin:
|
||||
xorl %ebx,%ebx
|
||||
ret
|
||||
.size abi_test_clobber_ebx,.-.L_abi_test_clobber_ebx_begin
|
||||
.globl abi_test_clobber_ecx
|
||||
.hidden abi_test_clobber_ecx
|
||||
.type abi_test_clobber_ecx,@function
|
||||
.align 16
|
||||
abi_test_clobber_ecx:
|
||||
.L_abi_test_clobber_ecx_begin:
|
||||
xorl %ecx,%ecx
|
||||
ret
|
||||
.size abi_test_clobber_ecx,.-.L_abi_test_clobber_ecx_begin
|
||||
.globl abi_test_clobber_edx
|
||||
.hidden abi_test_clobber_edx
|
||||
.type abi_test_clobber_edx,@function
|
||||
.align 16
|
||||
abi_test_clobber_edx:
|
||||
.L_abi_test_clobber_edx_begin:
|
||||
xorl %edx,%edx
|
||||
ret
|
||||
.size abi_test_clobber_edx,.-.L_abi_test_clobber_edx_begin
|
||||
.globl abi_test_clobber_edi
|
||||
.hidden abi_test_clobber_edi
|
||||
.type abi_test_clobber_edi,@function
|
||||
.align 16
|
||||
abi_test_clobber_edi:
|
||||
.L_abi_test_clobber_edi_begin:
|
||||
xorl %edi,%edi
|
||||
ret
|
||||
.size abi_test_clobber_edi,.-.L_abi_test_clobber_edi_begin
|
||||
.globl abi_test_clobber_esi
|
||||
.hidden abi_test_clobber_esi
|
||||
.type abi_test_clobber_esi,@function
|
||||
.align 16
|
||||
abi_test_clobber_esi:
|
||||
.L_abi_test_clobber_esi_begin:
|
||||
xorl %esi,%esi
|
||||
ret
|
||||
.size abi_test_clobber_esi,.-.L_abi_test_clobber_esi_begin
|
||||
.globl abi_test_clobber_ebp
|
||||
.hidden abi_test_clobber_ebp
|
||||
.type abi_test_clobber_ebp,@function
|
||||
.align 16
|
||||
abi_test_clobber_ebp:
|
||||
.L_abi_test_clobber_ebp_begin:
|
||||
xorl %ebp,%ebp
|
||||
ret
|
||||
.size abi_test_clobber_ebp,.-.L_abi_test_clobber_ebp_begin
|
||||
.globl abi_test_clobber_xmm0
|
||||
.hidden abi_test_clobber_xmm0
|
||||
.type abi_test_clobber_xmm0,@function
|
||||
.align 16
|
||||
abi_test_clobber_xmm0:
|
||||
.L_abi_test_clobber_xmm0_begin:
|
||||
pxor %xmm0,%xmm0
|
||||
ret
|
||||
.size abi_test_clobber_xmm0,.-.L_abi_test_clobber_xmm0_begin
|
||||
.globl abi_test_clobber_xmm1
|
||||
.hidden abi_test_clobber_xmm1
|
||||
.type abi_test_clobber_xmm1,@function
|
||||
.align 16
|
||||
abi_test_clobber_xmm1:
|
||||
.L_abi_test_clobber_xmm1_begin:
|
||||
pxor %xmm1,%xmm1
|
||||
ret
|
||||
.size abi_test_clobber_xmm1,.-.L_abi_test_clobber_xmm1_begin
|
||||
.globl abi_test_clobber_xmm2
|
||||
.hidden abi_test_clobber_xmm2
|
||||
.type abi_test_clobber_xmm2,@function
|
||||
.align 16
|
||||
abi_test_clobber_xmm2:
|
||||
.L_abi_test_clobber_xmm2_begin:
|
||||
pxor %xmm2,%xmm2
|
||||
ret
|
||||
.size abi_test_clobber_xmm2,.-.L_abi_test_clobber_xmm2_begin
|
||||
.globl abi_test_clobber_xmm3
|
||||
.hidden abi_test_clobber_xmm3
|
||||
.type abi_test_clobber_xmm3,@function
|
||||
.align 16
|
||||
abi_test_clobber_xmm3:
|
||||
.L_abi_test_clobber_xmm3_begin:
|
||||
pxor %xmm3,%xmm3
|
||||
ret
|
||||
.size abi_test_clobber_xmm3,.-.L_abi_test_clobber_xmm3_begin
|
||||
.globl abi_test_clobber_xmm4
|
||||
.hidden abi_test_clobber_xmm4
|
||||
.type abi_test_clobber_xmm4,@function
|
||||
.align 16
|
||||
abi_test_clobber_xmm4:
|
||||
.L_abi_test_clobber_xmm4_begin:
|
||||
pxor %xmm4,%xmm4
|
||||
ret
|
||||
.size abi_test_clobber_xmm4,.-.L_abi_test_clobber_xmm4_begin
|
||||
.globl abi_test_clobber_xmm5
|
||||
.hidden abi_test_clobber_xmm5
|
||||
.type abi_test_clobber_xmm5,@function
|
||||
.align 16
|
||||
abi_test_clobber_xmm5:
|
||||
.L_abi_test_clobber_xmm5_begin:
|
||||
pxor %xmm5,%xmm5
|
||||
ret
|
||||
.size abi_test_clobber_xmm5,.-.L_abi_test_clobber_xmm5_begin
|
||||
.globl abi_test_clobber_xmm6
|
||||
.hidden abi_test_clobber_xmm6
|
||||
.type abi_test_clobber_xmm6,@function
|
||||
.align 16
|
||||
abi_test_clobber_xmm6:
|
||||
.L_abi_test_clobber_xmm6_begin:
|
||||
pxor %xmm6,%xmm6
|
||||
ret
|
||||
.size abi_test_clobber_xmm6,.-.L_abi_test_clobber_xmm6_begin
|
||||
.globl abi_test_clobber_xmm7
|
||||
.hidden abi_test_clobber_xmm7
|
||||
.type abi_test_clobber_xmm7,@function
|
||||
.align 16
|
||||
abi_test_clobber_xmm7:
|
||||
.L_abi_test_clobber_xmm7_begin:
|
||||
pxor %xmm7,%xmm7
|
||||
ret
|
||||
.size abi_test_clobber_xmm7,.-.L_abi_test_clobber_xmm7_begin
|
||||
#endif // !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) && defined(__ELF__)
|
||||
1610
third-party/boringssl/linux-x86_64/crypto/chacha/chacha-x86_64-linux.S
vendored
Normal file
1610
third-party/boringssl/linux-x86_64/crypto/chacha/chacha-x86_64-linux.S
vendored
Normal file
File diff suppressed because it is too large
Load Diff
3091
third-party/boringssl/linux-x86_64/crypto/cipher_extra/aes128gcmsiv-x86_64-linux.S
vendored
Normal file
3091
third-party/boringssl/linux-x86_64/crypto/cipher_extra/aes128gcmsiv-x86_64-linux.S
vendored
Normal file
File diff suppressed because it is too large
Load Diff
8918
third-party/boringssl/linux-x86_64/crypto/cipher_extra/chacha20_poly1305_x86_64-linux.S
vendored
Normal file
8918
third-party/boringssl/linux-x86_64/crypto/cipher_extra/chacha20_poly1305_x86_64-linux.S
vendored
Normal file
File diff suppressed because it is too large
Load Diff
883
third-party/boringssl/linux-x86_64/crypto/fipsmodule/aesni-gcm-x86_64-linux.S
vendored
Normal file
883
third-party/boringssl/linux-x86_64/crypto/fipsmodule/aesni-gcm-x86_64-linux.S
vendored
Normal file
@ -0,0 +1,883 @@
|
||||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#include <openssl/asm_base.h>
|
||||
|
||||
#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) && defined(__ELF__)
|
||||
.text
|
||||
|
||||
.type _aesni_ctr32_ghash_6x,@function
|
||||
.align 32
|
||||
_aesni_ctr32_ghash_6x:
|
||||
.cfi_startproc
|
||||
vmovdqu 32(%r11),%xmm2
|
||||
subq $6,%rdx
|
||||
vpxor %xmm4,%xmm4,%xmm4
|
||||
vmovdqu 0-128(%rcx),%xmm15
|
||||
vpaddb %xmm2,%xmm1,%xmm10
|
||||
vpaddb %xmm2,%xmm10,%xmm11
|
||||
vpaddb %xmm2,%xmm11,%xmm12
|
||||
vpaddb %xmm2,%xmm12,%xmm13
|
||||
vpaddb %xmm2,%xmm13,%xmm14
|
||||
vpxor %xmm15,%xmm1,%xmm9
|
||||
vmovdqu %xmm4,16+8(%rsp)
|
||||
jmp .Loop6x
|
||||
|
||||
.align 32
|
||||
.Loop6x:
|
||||
addl $100663296,%ebx
|
||||
jc .Lhandle_ctr32
|
||||
vmovdqu 0-32(%r9),%xmm3
|
||||
vpaddb %xmm2,%xmm14,%xmm1
|
||||
vpxor %xmm15,%xmm10,%xmm10
|
||||
vpxor %xmm15,%xmm11,%xmm11
|
||||
|
||||
.Lresume_ctr32:
|
||||
vmovdqu %xmm1,(%r8)
|
||||
vpclmulqdq $0x10,%xmm3,%xmm7,%xmm5
|
||||
vpxor %xmm15,%xmm12,%xmm12
|
||||
vmovups 16-128(%rcx),%xmm2
|
||||
vpclmulqdq $0x01,%xmm3,%xmm7,%xmm6
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
xorq %r12,%r12
|
||||
cmpq %r14,%r15
|
||||
|
||||
vaesenc %xmm2,%xmm9,%xmm9
|
||||
vmovdqu 48+8(%rsp),%xmm0
|
||||
vpxor %xmm15,%xmm13,%xmm13
|
||||
vpclmulqdq $0x00,%xmm3,%xmm7,%xmm1
|
||||
vaesenc %xmm2,%xmm10,%xmm10
|
||||
vpxor %xmm15,%xmm14,%xmm14
|
||||
setnc %r12b
|
||||
vpclmulqdq $0x11,%xmm3,%xmm7,%xmm7
|
||||
vaesenc %xmm2,%xmm11,%xmm11
|
||||
vmovdqu 16-32(%r9),%xmm3
|
||||
negq %r12
|
||||
vaesenc %xmm2,%xmm12,%xmm12
|
||||
vpxor %xmm5,%xmm6,%xmm6
|
||||
vpclmulqdq $0x00,%xmm3,%xmm0,%xmm5
|
||||
vpxor %xmm4,%xmm8,%xmm8
|
||||
vaesenc %xmm2,%xmm13,%xmm13
|
||||
vpxor %xmm5,%xmm1,%xmm4
|
||||
andq $0x60,%r12
|
||||
vmovups 32-128(%rcx),%xmm15
|
||||
vpclmulqdq $0x10,%xmm3,%xmm0,%xmm1
|
||||
vaesenc %xmm2,%xmm14,%xmm14
|
||||
|
||||
vpclmulqdq $0x01,%xmm3,%xmm0,%xmm2
|
||||
leaq (%r14,%r12,1),%r14
|
||||
vaesenc %xmm15,%xmm9,%xmm9
|
||||
vpxor 16+8(%rsp),%xmm8,%xmm8
|
||||
vpclmulqdq $0x11,%xmm3,%xmm0,%xmm3
|
||||
vmovdqu 64+8(%rsp),%xmm0
|
||||
vaesenc %xmm15,%xmm10,%xmm10
|
||||
movbeq 88(%r14),%r13
|
||||
vaesenc %xmm15,%xmm11,%xmm11
|
||||
movbeq 80(%r14),%r12
|
||||
vaesenc %xmm15,%xmm12,%xmm12
|
||||
movq %r13,32+8(%rsp)
|
||||
vaesenc %xmm15,%xmm13,%xmm13
|
||||
movq %r12,40+8(%rsp)
|
||||
vmovdqu 48-32(%r9),%xmm5
|
||||
vaesenc %xmm15,%xmm14,%xmm14
|
||||
|
||||
vmovups 48-128(%rcx),%xmm15
|
||||
vpxor %xmm1,%xmm6,%xmm6
|
||||
vpclmulqdq $0x00,%xmm5,%xmm0,%xmm1
|
||||
vaesenc %xmm15,%xmm9,%xmm9
|
||||
vpxor %xmm2,%xmm6,%xmm6
|
||||
vpclmulqdq $0x10,%xmm5,%xmm0,%xmm2
|
||||
vaesenc %xmm15,%xmm10,%xmm10
|
||||
vpxor %xmm3,%xmm7,%xmm7
|
||||
vpclmulqdq $0x01,%xmm5,%xmm0,%xmm3
|
||||
vaesenc %xmm15,%xmm11,%xmm11
|
||||
vpclmulqdq $0x11,%xmm5,%xmm0,%xmm5
|
||||
vmovdqu 80+8(%rsp),%xmm0
|
||||
vaesenc %xmm15,%xmm12,%xmm12
|
||||
vaesenc %xmm15,%xmm13,%xmm13
|
||||
vpxor %xmm1,%xmm4,%xmm4
|
||||
vmovdqu 64-32(%r9),%xmm1
|
||||
vaesenc %xmm15,%xmm14,%xmm14
|
||||
|
||||
vmovups 64-128(%rcx),%xmm15
|
||||
vpxor %xmm2,%xmm6,%xmm6
|
||||
vpclmulqdq $0x00,%xmm1,%xmm0,%xmm2
|
||||
vaesenc %xmm15,%xmm9,%xmm9
|
||||
vpxor %xmm3,%xmm6,%xmm6
|
||||
vpclmulqdq $0x10,%xmm1,%xmm0,%xmm3
|
||||
vaesenc %xmm15,%xmm10,%xmm10
|
||||
movbeq 72(%r14),%r13
|
||||
vpxor %xmm5,%xmm7,%xmm7
|
||||
vpclmulqdq $0x01,%xmm1,%xmm0,%xmm5
|
||||
vaesenc %xmm15,%xmm11,%xmm11
|
||||
movbeq 64(%r14),%r12
|
||||
vpclmulqdq $0x11,%xmm1,%xmm0,%xmm1
|
||||
vmovdqu 96+8(%rsp),%xmm0
|
||||
vaesenc %xmm15,%xmm12,%xmm12
|
||||
movq %r13,48+8(%rsp)
|
||||
vaesenc %xmm15,%xmm13,%xmm13
|
||||
movq %r12,56+8(%rsp)
|
||||
vpxor %xmm2,%xmm4,%xmm4
|
||||
vmovdqu 96-32(%r9),%xmm2
|
||||
vaesenc %xmm15,%xmm14,%xmm14
|
||||
|
||||
vmovups 80-128(%rcx),%xmm15
|
||||
vpxor %xmm3,%xmm6,%xmm6
|
||||
vpclmulqdq $0x00,%xmm2,%xmm0,%xmm3
|
||||
vaesenc %xmm15,%xmm9,%xmm9
|
||||
vpxor %xmm5,%xmm6,%xmm6
|
||||
vpclmulqdq $0x10,%xmm2,%xmm0,%xmm5
|
||||
vaesenc %xmm15,%xmm10,%xmm10
|
||||
movbeq 56(%r14),%r13
|
||||
vpxor %xmm1,%xmm7,%xmm7
|
||||
vpclmulqdq $0x01,%xmm2,%xmm0,%xmm1
|
||||
vpxor 112+8(%rsp),%xmm8,%xmm8
|
||||
vaesenc %xmm15,%xmm11,%xmm11
|
||||
movbeq 48(%r14),%r12
|
||||
vpclmulqdq $0x11,%xmm2,%xmm0,%xmm2
|
||||
vaesenc %xmm15,%xmm12,%xmm12
|
||||
movq %r13,64+8(%rsp)
|
||||
vaesenc %xmm15,%xmm13,%xmm13
|
||||
movq %r12,72+8(%rsp)
|
||||
vpxor %xmm3,%xmm4,%xmm4
|
||||
vmovdqu 112-32(%r9),%xmm3
|
||||
vaesenc %xmm15,%xmm14,%xmm14
|
||||
|
||||
vmovups 96-128(%rcx),%xmm15
|
||||
vpxor %xmm5,%xmm6,%xmm6
|
||||
vpclmulqdq $0x10,%xmm3,%xmm8,%xmm5
|
||||
vaesenc %xmm15,%xmm9,%xmm9
|
||||
vpxor %xmm1,%xmm6,%xmm6
|
||||
vpclmulqdq $0x01,%xmm3,%xmm8,%xmm1
|
||||
vaesenc %xmm15,%xmm10,%xmm10
|
||||
movbeq 40(%r14),%r13
|
||||
vpxor %xmm2,%xmm7,%xmm7
|
||||
vpclmulqdq $0x00,%xmm3,%xmm8,%xmm2
|
||||
vaesenc %xmm15,%xmm11,%xmm11
|
||||
movbeq 32(%r14),%r12
|
||||
vpclmulqdq $0x11,%xmm3,%xmm8,%xmm8
|
||||
vaesenc %xmm15,%xmm12,%xmm12
|
||||
movq %r13,80+8(%rsp)
|
||||
vaesenc %xmm15,%xmm13,%xmm13
|
||||
movq %r12,88+8(%rsp)
|
||||
vpxor %xmm5,%xmm6,%xmm6
|
||||
vaesenc %xmm15,%xmm14,%xmm14
|
||||
vpxor %xmm1,%xmm6,%xmm6
|
||||
|
||||
vmovups 112-128(%rcx),%xmm15
|
||||
vpslldq $8,%xmm6,%xmm5
|
||||
vpxor %xmm2,%xmm4,%xmm4
|
||||
vmovdqu 16(%r11),%xmm3
|
||||
|
||||
vaesenc %xmm15,%xmm9,%xmm9
|
||||
vpxor %xmm8,%xmm7,%xmm7
|
||||
vaesenc %xmm15,%xmm10,%xmm10
|
||||
vpxor %xmm5,%xmm4,%xmm4
|
||||
movbeq 24(%r14),%r13
|
||||
vaesenc %xmm15,%xmm11,%xmm11
|
||||
movbeq 16(%r14),%r12
|
||||
vpalignr $8,%xmm4,%xmm4,%xmm0
|
||||
vpclmulqdq $0x10,%xmm3,%xmm4,%xmm4
|
||||
movq %r13,96+8(%rsp)
|
||||
vaesenc %xmm15,%xmm12,%xmm12
|
||||
movq %r12,104+8(%rsp)
|
||||
vaesenc %xmm15,%xmm13,%xmm13
|
||||
vmovups 128-128(%rcx),%xmm1
|
||||
vaesenc %xmm15,%xmm14,%xmm14
|
||||
|
||||
vaesenc %xmm1,%xmm9,%xmm9
|
||||
vmovups 144-128(%rcx),%xmm15
|
||||
vaesenc %xmm1,%xmm10,%xmm10
|
||||
vpsrldq $8,%xmm6,%xmm6
|
||||
vaesenc %xmm1,%xmm11,%xmm11
|
||||
vpxor %xmm6,%xmm7,%xmm7
|
||||
vaesenc %xmm1,%xmm12,%xmm12
|
||||
vpxor %xmm0,%xmm4,%xmm4
|
||||
movbeq 8(%r14),%r13
|
||||
vaesenc %xmm1,%xmm13,%xmm13
|
||||
movbeq 0(%r14),%r12
|
||||
vaesenc %xmm1,%xmm14,%xmm14
|
||||
vmovups 160-128(%rcx),%xmm1
|
||||
cmpl $11,%r10d
|
||||
jb .Lenc_tail
|
||||
|
||||
vaesenc %xmm15,%xmm9,%xmm9
|
||||
vaesenc %xmm15,%xmm10,%xmm10
|
||||
vaesenc %xmm15,%xmm11,%xmm11
|
||||
vaesenc %xmm15,%xmm12,%xmm12
|
||||
vaesenc %xmm15,%xmm13,%xmm13
|
||||
vaesenc %xmm15,%xmm14,%xmm14
|
||||
|
||||
vaesenc %xmm1,%xmm9,%xmm9
|
||||
vaesenc %xmm1,%xmm10,%xmm10
|
||||
vaesenc %xmm1,%xmm11,%xmm11
|
||||
vaesenc %xmm1,%xmm12,%xmm12
|
||||
vaesenc %xmm1,%xmm13,%xmm13
|
||||
vmovups 176-128(%rcx),%xmm15
|
||||
vaesenc %xmm1,%xmm14,%xmm14
|
||||
vmovups 192-128(%rcx),%xmm1
|
||||
je .Lenc_tail
|
||||
|
||||
vaesenc %xmm15,%xmm9,%xmm9
|
||||
vaesenc %xmm15,%xmm10,%xmm10
|
||||
vaesenc %xmm15,%xmm11,%xmm11
|
||||
vaesenc %xmm15,%xmm12,%xmm12
|
||||
vaesenc %xmm15,%xmm13,%xmm13
|
||||
vaesenc %xmm15,%xmm14,%xmm14
|
||||
|
||||
vaesenc %xmm1,%xmm9,%xmm9
|
||||
vaesenc %xmm1,%xmm10,%xmm10
|
||||
vaesenc %xmm1,%xmm11,%xmm11
|
||||
vaesenc %xmm1,%xmm12,%xmm12
|
||||
vaesenc %xmm1,%xmm13,%xmm13
|
||||
vmovups 208-128(%rcx),%xmm15
|
||||
vaesenc %xmm1,%xmm14,%xmm14
|
||||
vmovups 224-128(%rcx),%xmm1
|
||||
jmp .Lenc_tail
|
||||
|
||||
.align 32
|
||||
.Lhandle_ctr32:
|
||||
vmovdqu (%r11),%xmm0
|
||||
vpshufb %xmm0,%xmm1,%xmm6
|
||||
vmovdqu 48(%r11),%xmm5
|
||||
vpaddd 64(%r11),%xmm6,%xmm10
|
||||
vpaddd %xmm5,%xmm6,%xmm11
|
||||
vmovdqu 0-32(%r9),%xmm3
|
||||
vpaddd %xmm5,%xmm10,%xmm12
|
||||
vpshufb %xmm0,%xmm10,%xmm10
|
||||
vpaddd %xmm5,%xmm11,%xmm13
|
||||
vpshufb %xmm0,%xmm11,%xmm11
|
||||
vpxor %xmm15,%xmm10,%xmm10
|
||||
vpaddd %xmm5,%xmm12,%xmm14
|
||||
vpshufb %xmm0,%xmm12,%xmm12
|
||||
vpxor %xmm15,%xmm11,%xmm11
|
||||
vpaddd %xmm5,%xmm13,%xmm1
|
||||
vpshufb %xmm0,%xmm13,%xmm13
|
||||
vpshufb %xmm0,%xmm14,%xmm14
|
||||
vpshufb %xmm0,%xmm1,%xmm1
|
||||
jmp .Lresume_ctr32
|
||||
|
||||
.align 32
|
||||
.Lenc_tail:
|
||||
vaesenc %xmm15,%xmm9,%xmm9
|
||||
vmovdqu %xmm7,16+8(%rsp)
|
||||
vpalignr $8,%xmm4,%xmm4,%xmm8
|
||||
vaesenc %xmm15,%xmm10,%xmm10
|
||||
vpclmulqdq $0x10,%xmm3,%xmm4,%xmm4
|
||||
vpxor 0(%rdi),%xmm1,%xmm2
|
||||
vaesenc %xmm15,%xmm11,%xmm11
|
||||
vpxor 16(%rdi),%xmm1,%xmm0
|
||||
vaesenc %xmm15,%xmm12,%xmm12
|
||||
vpxor 32(%rdi),%xmm1,%xmm5
|
||||
vaesenc %xmm15,%xmm13,%xmm13
|
||||
vpxor 48(%rdi),%xmm1,%xmm6
|
||||
vaesenc %xmm15,%xmm14,%xmm14
|
||||
vpxor 64(%rdi),%xmm1,%xmm7
|
||||
vpxor 80(%rdi),%xmm1,%xmm3
|
||||
vmovdqu (%r8),%xmm1
|
||||
|
||||
vaesenclast %xmm2,%xmm9,%xmm9
|
||||
vmovdqu 32(%r11),%xmm2
|
||||
vaesenclast %xmm0,%xmm10,%xmm10
|
||||
vpaddb %xmm2,%xmm1,%xmm0
|
||||
movq %r13,112+8(%rsp)
|
||||
leaq 96(%rdi),%rdi
|
||||
|
||||
prefetcht0 512(%rdi)
|
||||
prefetcht0 576(%rdi)
|
||||
vaesenclast %xmm5,%xmm11,%xmm11
|
||||
vpaddb %xmm2,%xmm0,%xmm5
|
||||
movq %r12,120+8(%rsp)
|
||||
leaq 96(%rsi),%rsi
|
||||
vmovdqu 0-128(%rcx),%xmm15
|
||||
vaesenclast %xmm6,%xmm12,%xmm12
|
||||
vpaddb %xmm2,%xmm5,%xmm6
|
||||
vaesenclast %xmm7,%xmm13,%xmm13
|
||||
vpaddb %xmm2,%xmm6,%xmm7
|
||||
vaesenclast %xmm3,%xmm14,%xmm14
|
||||
vpaddb %xmm2,%xmm7,%xmm3
|
||||
|
||||
addq $0x60,%rax
|
||||
subq $0x6,%rdx
|
||||
jc .L6x_done
|
||||
|
||||
vmovups %xmm9,-96(%rsi)
|
||||
vpxor %xmm15,%xmm1,%xmm9
|
||||
vmovups %xmm10,-80(%rsi)
|
||||
vmovdqa %xmm0,%xmm10
|
||||
vmovups %xmm11,-64(%rsi)
|
||||
vmovdqa %xmm5,%xmm11
|
||||
vmovups %xmm12,-48(%rsi)
|
||||
vmovdqa %xmm6,%xmm12
|
||||
vmovups %xmm13,-32(%rsi)
|
||||
vmovdqa %xmm7,%xmm13
|
||||
vmovups %xmm14,-16(%rsi)
|
||||
vmovdqa %xmm3,%xmm14
|
||||
vmovdqu 32+8(%rsp),%xmm7
|
||||
jmp .Loop6x
|
||||
|
||||
.L6x_done:
|
||||
vpxor 16+8(%rsp),%xmm8,%xmm8
|
||||
vpxor %xmm4,%xmm8,%xmm8
|
||||
|
||||
ret
|
||||
.cfi_endproc
|
||||
.size _aesni_ctr32_ghash_6x,.-_aesni_ctr32_ghash_6x
|
||||
.globl aesni_gcm_decrypt
|
||||
.hidden aesni_gcm_decrypt
|
||||
.type aesni_gcm_decrypt,@function
|
||||
.align 32
|
||||
aesni_gcm_decrypt:
|
||||
.cfi_startproc
|
||||
|
||||
_CET_ENDBR
|
||||
xorq %rax,%rax
|
||||
|
||||
|
||||
|
||||
cmpq $0x60,%rdx
|
||||
jb .Lgcm_dec_abort
|
||||
|
||||
pushq %rbp
|
||||
.cfi_adjust_cfa_offset 8
|
||||
.cfi_offset %rbp,-16
|
||||
|
||||
movq %rsp,%rbp
|
||||
.cfi_def_cfa_register %rbp
|
||||
pushq %rbx
|
||||
.cfi_offset %rbx,-24
|
||||
|
||||
pushq %r12
|
||||
.cfi_offset %r12,-32
|
||||
|
||||
pushq %r13
|
||||
.cfi_offset %r13,-40
|
||||
|
||||
pushq %r14
|
||||
.cfi_offset %r14,-48
|
||||
|
||||
pushq %r15
|
||||
.cfi_offset %r15,-56
|
||||
|
||||
vzeroupper
|
||||
|
||||
movq 16(%rbp),%r12
|
||||
vmovdqu (%r8),%xmm1
|
||||
addq $-128,%rsp
|
||||
movl 12(%r8),%ebx
|
||||
leaq .Lbswap_mask(%rip),%r11
|
||||
leaq -128(%rcx),%r14
|
||||
movq $0xf80,%r15
|
||||
vmovdqu (%r12),%xmm8
|
||||
andq $-128,%rsp
|
||||
vmovdqu (%r11),%xmm0
|
||||
leaq 128(%rcx),%rcx
|
||||
leaq 32(%r9),%r9
|
||||
movl 240-128(%rcx),%r10d
|
||||
vpshufb %xmm0,%xmm8,%xmm8
|
||||
|
||||
andq %r15,%r14
|
||||
andq %rsp,%r15
|
||||
subq %r14,%r15
|
||||
jc .Ldec_no_key_aliasing
|
||||
cmpq $768,%r15
|
||||
jnc .Ldec_no_key_aliasing
|
||||
subq %r15,%rsp
|
||||
.Ldec_no_key_aliasing:
|
||||
|
||||
vmovdqu 80(%rdi),%xmm7
|
||||
movq %rdi,%r14
|
||||
vmovdqu 64(%rdi),%xmm4
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
leaq -192(%rdi,%rdx,1),%r15
|
||||
|
||||
vmovdqu 48(%rdi),%xmm5
|
||||
shrq $4,%rdx
|
||||
xorq %rax,%rax
|
||||
vmovdqu 32(%rdi),%xmm6
|
||||
vpshufb %xmm0,%xmm7,%xmm7
|
||||
vmovdqu 16(%rdi),%xmm2
|
||||
vpshufb %xmm0,%xmm4,%xmm4
|
||||
vmovdqu (%rdi),%xmm3
|
||||
vpshufb %xmm0,%xmm5,%xmm5
|
||||
vmovdqu %xmm4,48(%rsp)
|
||||
vpshufb %xmm0,%xmm6,%xmm6
|
||||
vmovdqu %xmm5,64(%rsp)
|
||||
vpshufb %xmm0,%xmm2,%xmm2
|
||||
vmovdqu %xmm6,80(%rsp)
|
||||
vpshufb %xmm0,%xmm3,%xmm3
|
||||
vmovdqu %xmm2,96(%rsp)
|
||||
vmovdqu %xmm3,112(%rsp)
|
||||
|
||||
call _aesni_ctr32_ghash_6x
|
||||
|
||||
movq 16(%rbp),%r12
|
||||
vmovups %xmm9,-96(%rsi)
|
||||
vmovups %xmm10,-80(%rsi)
|
||||
vmovups %xmm11,-64(%rsi)
|
||||
vmovups %xmm12,-48(%rsi)
|
||||
vmovups %xmm13,-32(%rsi)
|
||||
vmovups %xmm14,-16(%rsi)
|
||||
|
||||
vpshufb (%r11),%xmm8,%xmm8
|
||||
vmovdqu %xmm8,(%r12)
|
||||
|
||||
vzeroupper
|
||||
leaq -40(%rbp),%rsp
|
||||
.cfi_def_cfa %rsp, 0x38
|
||||
popq %r15
|
||||
.cfi_adjust_cfa_offset -8
|
||||
.cfi_restore %r15
|
||||
popq %r14
|
||||
.cfi_adjust_cfa_offset -8
|
||||
.cfi_restore %r14
|
||||
popq %r13
|
||||
.cfi_adjust_cfa_offset -8
|
||||
.cfi_restore %r13
|
||||
popq %r12
|
||||
.cfi_adjust_cfa_offset -8
|
||||
.cfi_restore %r12
|
||||
popq %rbx
|
||||
.cfi_adjust_cfa_offset -8
|
||||
.cfi_restore %rbx
|
||||
popq %rbp
|
||||
.cfi_adjust_cfa_offset -8
|
||||
.cfi_restore %rbp
|
||||
.Lgcm_dec_abort:
|
||||
ret
|
||||
|
||||
.cfi_endproc
|
||||
.size aesni_gcm_decrypt,.-aesni_gcm_decrypt
|
||||
.type _aesni_ctr32_6x,@function
|
||||
.align 32
|
||||
_aesni_ctr32_6x:
|
||||
.cfi_startproc
|
||||
vmovdqu 0-128(%rcx),%xmm4
|
||||
vmovdqu 32(%r11),%xmm2
|
||||
leaq -1(%r10),%r13
|
||||
vmovups 16-128(%rcx),%xmm15
|
||||
leaq 32-128(%rcx),%r12
|
||||
vpxor %xmm4,%xmm1,%xmm9
|
||||
addl $100663296,%ebx
|
||||
jc .Lhandle_ctr32_2
|
||||
vpaddb %xmm2,%xmm1,%xmm10
|
||||
vpaddb %xmm2,%xmm10,%xmm11
|
||||
vpxor %xmm4,%xmm10,%xmm10
|
||||
vpaddb %xmm2,%xmm11,%xmm12
|
||||
vpxor %xmm4,%xmm11,%xmm11
|
||||
vpaddb %xmm2,%xmm12,%xmm13
|
||||
vpxor %xmm4,%xmm12,%xmm12
|
||||
vpaddb %xmm2,%xmm13,%xmm14
|
||||
vpxor %xmm4,%xmm13,%xmm13
|
||||
vpaddb %xmm2,%xmm14,%xmm1
|
||||
vpxor %xmm4,%xmm14,%xmm14
|
||||
jmp .Loop_ctr32
|
||||
|
||||
.align 16
|
||||
.Loop_ctr32:
|
||||
vaesenc %xmm15,%xmm9,%xmm9
|
||||
vaesenc %xmm15,%xmm10,%xmm10
|
||||
vaesenc %xmm15,%xmm11,%xmm11
|
||||
vaesenc %xmm15,%xmm12,%xmm12
|
||||
vaesenc %xmm15,%xmm13,%xmm13
|
||||
vaesenc %xmm15,%xmm14,%xmm14
|
||||
vmovups (%r12),%xmm15
|
||||
leaq 16(%r12),%r12
|
||||
decl %r13d
|
||||
jnz .Loop_ctr32
|
||||
|
||||
vmovdqu (%r12),%xmm3
|
||||
vaesenc %xmm15,%xmm9,%xmm9
|
||||
vpxor 0(%rdi),%xmm3,%xmm4
|
||||
vaesenc %xmm15,%xmm10,%xmm10
|
||||
vpxor 16(%rdi),%xmm3,%xmm5
|
||||
vaesenc %xmm15,%xmm11,%xmm11
|
||||
vpxor 32(%rdi),%xmm3,%xmm6
|
||||
vaesenc %xmm15,%xmm12,%xmm12
|
||||
vpxor 48(%rdi),%xmm3,%xmm8
|
||||
vaesenc %xmm15,%xmm13,%xmm13
|
||||
vpxor 64(%rdi),%xmm3,%xmm2
|
||||
vaesenc %xmm15,%xmm14,%xmm14
|
||||
vpxor 80(%rdi),%xmm3,%xmm3
|
||||
leaq 96(%rdi),%rdi
|
||||
|
||||
vaesenclast %xmm4,%xmm9,%xmm9
|
||||
vaesenclast %xmm5,%xmm10,%xmm10
|
||||
vaesenclast %xmm6,%xmm11,%xmm11
|
||||
vaesenclast %xmm8,%xmm12,%xmm12
|
||||
vaesenclast %xmm2,%xmm13,%xmm13
|
||||
vaesenclast %xmm3,%xmm14,%xmm14
|
||||
vmovups %xmm9,0(%rsi)
|
||||
vmovups %xmm10,16(%rsi)
|
||||
vmovups %xmm11,32(%rsi)
|
||||
vmovups %xmm12,48(%rsi)
|
||||
vmovups %xmm13,64(%rsi)
|
||||
vmovups %xmm14,80(%rsi)
|
||||
leaq 96(%rsi),%rsi
|
||||
|
||||
ret
|
||||
.align 32
|
||||
.Lhandle_ctr32_2:
|
||||
vpshufb %xmm0,%xmm1,%xmm6
|
||||
vmovdqu 48(%r11),%xmm5
|
||||
vpaddd 64(%r11),%xmm6,%xmm10
|
||||
vpaddd %xmm5,%xmm6,%xmm11
|
||||
vpaddd %xmm5,%xmm10,%xmm12
|
||||
vpshufb %xmm0,%xmm10,%xmm10
|
||||
vpaddd %xmm5,%xmm11,%xmm13
|
||||
vpshufb %xmm0,%xmm11,%xmm11
|
||||
vpxor %xmm4,%xmm10,%xmm10
|
||||
vpaddd %xmm5,%xmm12,%xmm14
|
||||
vpshufb %xmm0,%xmm12,%xmm12
|
||||
vpxor %xmm4,%xmm11,%xmm11
|
||||
vpaddd %xmm5,%xmm13,%xmm1
|
||||
vpshufb %xmm0,%xmm13,%xmm13
|
||||
vpxor %xmm4,%xmm12,%xmm12
|
||||
vpshufb %xmm0,%xmm14,%xmm14
|
||||
vpxor %xmm4,%xmm13,%xmm13
|
||||
vpshufb %xmm0,%xmm1,%xmm1
|
||||
vpxor %xmm4,%xmm14,%xmm14
|
||||
jmp .Loop_ctr32
|
||||
.cfi_endproc
|
||||
.size _aesni_ctr32_6x,.-_aesni_ctr32_6x
|
||||
|
||||
.globl aesni_gcm_encrypt
|
||||
.hidden aesni_gcm_encrypt
|
||||
.type aesni_gcm_encrypt,@function
|
||||
.align 32
|
||||
aesni_gcm_encrypt:
|
||||
.cfi_startproc
|
||||
|
||||
_CET_ENDBR
|
||||
#ifdef BORINGSSL_DISPATCH_TEST
|
||||
.extern BORINGSSL_function_hit
|
||||
.hidden BORINGSSL_function_hit
|
||||
movb $1,BORINGSSL_function_hit+2(%rip)
|
||||
#endif
|
||||
xorq %rax,%rax
|
||||
|
||||
|
||||
|
||||
|
||||
cmpq $288,%rdx
|
||||
jb .Lgcm_enc_abort
|
||||
|
||||
pushq %rbp
|
||||
.cfi_adjust_cfa_offset 8
|
||||
.cfi_offset %rbp,-16
|
||||
|
||||
movq %rsp,%rbp
|
||||
.cfi_def_cfa_register %rbp
|
||||
pushq %rbx
|
||||
.cfi_offset %rbx,-24
|
||||
|
||||
pushq %r12
|
||||
.cfi_offset %r12,-32
|
||||
|
||||
pushq %r13
|
||||
.cfi_offset %r13,-40
|
||||
|
||||
pushq %r14
|
||||
.cfi_offset %r14,-48
|
||||
|
||||
pushq %r15
|
||||
.cfi_offset %r15,-56
|
||||
|
||||
vzeroupper
|
||||
|
||||
vmovdqu (%r8),%xmm1
|
||||
addq $-128,%rsp
|
||||
movl 12(%r8),%ebx
|
||||
leaq .Lbswap_mask(%rip),%r11
|
||||
leaq -128(%rcx),%r14
|
||||
movq $0xf80,%r15
|
||||
leaq 128(%rcx),%rcx
|
||||
vmovdqu (%r11),%xmm0
|
||||
andq $-128,%rsp
|
||||
movl 240-128(%rcx),%r10d
|
||||
|
||||
andq %r15,%r14
|
||||
andq %rsp,%r15
|
||||
subq %r14,%r15
|
||||
jc .Lenc_no_key_aliasing
|
||||
cmpq $768,%r15
|
||||
jnc .Lenc_no_key_aliasing
|
||||
subq %r15,%rsp
|
||||
.Lenc_no_key_aliasing:
|
||||
|
||||
movq %rsi,%r14
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
leaq -192(%rsi,%rdx,1),%r15
|
||||
|
||||
shrq $4,%rdx
|
||||
|
||||
call _aesni_ctr32_6x
|
||||
vpshufb %xmm0,%xmm9,%xmm8
|
||||
vpshufb %xmm0,%xmm10,%xmm2
|
||||
vmovdqu %xmm8,112(%rsp)
|
||||
vpshufb %xmm0,%xmm11,%xmm4
|
||||
vmovdqu %xmm2,96(%rsp)
|
||||
vpshufb %xmm0,%xmm12,%xmm5
|
||||
vmovdqu %xmm4,80(%rsp)
|
||||
vpshufb %xmm0,%xmm13,%xmm6
|
||||
vmovdqu %xmm5,64(%rsp)
|
||||
vpshufb %xmm0,%xmm14,%xmm7
|
||||
vmovdqu %xmm6,48(%rsp)
|
||||
|
||||
call _aesni_ctr32_6x
|
||||
|
||||
movq 16(%rbp),%r12
|
||||
leaq 32(%r9),%r9
|
||||
vmovdqu (%r12),%xmm8
|
||||
subq $12,%rdx
|
||||
movq $192,%rax
|
||||
vpshufb %xmm0,%xmm8,%xmm8
|
||||
|
||||
call _aesni_ctr32_ghash_6x
|
||||
vmovdqu 32(%rsp),%xmm7
|
||||
vmovdqu (%r11),%xmm0
|
||||
vmovdqu 0-32(%r9),%xmm3
|
||||
vpunpckhqdq %xmm7,%xmm7,%xmm1
|
||||
vmovdqu 32-32(%r9),%xmm15
|
||||
vmovups %xmm9,-96(%rsi)
|
||||
vpshufb %xmm0,%xmm9,%xmm9
|
||||
vpxor %xmm7,%xmm1,%xmm1
|
||||
vmovups %xmm10,-80(%rsi)
|
||||
vpshufb %xmm0,%xmm10,%xmm10
|
||||
vmovups %xmm11,-64(%rsi)
|
||||
vpshufb %xmm0,%xmm11,%xmm11
|
||||
vmovups %xmm12,-48(%rsi)
|
||||
vpshufb %xmm0,%xmm12,%xmm12
|
||||
vmovups %xmm13,-32(%rsi)
|
||||
vpshufb %xmm0,%xmm13,%xmm13
|
||||
vmovups %xmm14,-16(%rsi)
|
||||
vpshufb %xmm0,%xmm14,%xmm14
|
||||
vmovdqu %xmm9,16(%rsp)
|
||||
vmovdqu 48(%rsp),%xmm6
|
||||
vmovdqu 16-32(%r9),%xmm0
|
||||
vpunpckhqdq %xmm6,%xmm6,%xmm2
|
||||
vpclmulqdq $0x00,%xmm3,%xmm7,%xmm5
|
||||
vpxor %xmm6,%xmm2,%xmm2
|
||||
vpclmulqdq $0x11,%xmm3,%xmm7,%xmm7
|
||||
vpclmulqdq $0x00,%xmm15,%xmm1,%xmm1
|
||||
|
||||
vmovdqu 64(%rsp),%xmm9
|
||||
vpclmulqdq $0x00,%xmm0,%xmm6,%xmm4
|
||||
vmovdqu 48-32(%r9),%xmm3
|
||||
vpxor %xmm5,%xmm4,%xmm4
|
||||
vpunpckhqdq %xmm9,%xmm9,%xmm5
|
||||
vpclmulqdq $0x11,%xmm0,%xmm6,%xmm6
|
||||
vpxor %xmm9,%xmm5,%xmm5
|
||||
vpxor %xmm7,%xmm6,%xmm6
|
||||
vpclmulqdq $0x10,%xmm15,%xmm2,%xmm2
|
||||
vmovdqu 80-32(%r9),%xmm15
|
||||
vpxor %xmm1,%xmm2,%xmm2
|
||||
|
||||
vmovdqu 80(%rsp),%xmm1
|
||||
vpclmulqdq $0x00,%xmm3,%xmm9,%xmm7
|
||||
vmovdqu 64-32(%r9),%xmm0
|
||||
vpxor %xmm4,%xmm7,%xmm7
|
||||
vpunpckhqdq %xmm1,%xmm1,%xmm4
|
||||
vpclmulqdq $0x11,%xmm3,%xmm9,%xmm9
|
||||
vpxor %xmm1,%xmm4,%xmm4
|
||||
vpxor %xmm6,%xmm9,%xmm9
|
||||
vpclmulqdq $0x00,%xmm15,%xmm5,%xmm5
|
||||
vpxor %xmm2,%xmm5,%xmm5
|
||||
|
||||
vmovdqu 96(%rsp),%xmm2
|
||||
vpclmulqdq $0x00,%xmm0,%xmm1,%xmm6
|
||||
vmovdqu 96-32(%r9),%xmm3
|
||||
vpxor %xmm7,%xmm6,%xmm6
|
||||
vpunpckhqdq %xmm2,%xmm2,%xmm7
|
||||
vpclmulqdq $0x11,%xmm0,%xmm1,%xmm1
|
||||
vpxor %xmm2,%xmm7,%xmm7
|
||||
vpxor %xmm9,%xmm1,%xmm1
|
||||
vpclmulqdq $0x10,%xmm15,%xmm4,%xmm4
|
||||
vmovdqu 128-32(%r9),%xmm15
|
||||
vpxor %xmm5,%xmm4,%xmm4
|
||||
|
||||
vpxor 112(%rsp),%xmm8,%xmm8
|
||||
vpclmulqdq $0x00,%xmm3,%xmm2,%xmm5
|
||||
vmovdqu 112-32(%r9),%xmm0
|
||||
vpunpckhqdq %xmm8,%xmm8,%xmm9
|
||||
vpxor %xmm6,%xmm5,%xmm5
|
||||
vpclmulqdq $0x11,%xmm3,%xmm2,%xmm2
|
||||
vpxor %xmm8,%xmm9,%xmm9
|
||||
vpxor %xmm1,%xmm2,%xmm2
|
||||
vpclmulqdq $0x00,%xmm15,%xmm7,%xmm7
|
||||
vpxor %xmm4,%xmm7,%xmm4
|
||||
|
||||
vpclmulqdq $0x00,%xmm0,%xmm8,%xmm6
|
||||
vmovdqu 0-32(%r9),%xmm3
|
||||
vpunpckhqdq %xmm14,%xmm14,%xmm1
|
||||
vpclmulqdq $0x11,%xmm0,%xmm8,%xmm8
|
||||
vpxor %xmm14,%xmm1,%xmm1
|
||||
vpxor %xmm5,%xmm6,%xmm5
|
||||
vpclmulqdq $0x10,%xmm15,%xmm9,%xmm9
|
||||
vmovdqu 32-32(%r9),%xmm15
|
||||
vpxor %xmm2,%xmm8,%xmm7
|
||||
vpxor %xmm4,%xmm9,%xmm6
|
||||
|
||||
vmovdqu 16-32(%r9),%xmm0
|
||||
vpxor %xmm5,%xmm7,%xmm9
|
||||
vpclmulqdq $0x00,%xmm3,%xmm14,%xmm4
|
||||
vpxor %xmm9,%xmm6,%xmm6
|
||||
vpunpckhqdq %xmm13,%xmm13,%xmm2
|
||||
vpclmulqdq $0x11,%xmm3,%xmm14,%xmm14
|
||||
vpxor %xmm13,%xmm2,%xmm2
|
||||
vpslldq $8,%xmm6,%xmm9
|
||||
vpclmulqdq $0x00,%xmm15,%xmm1,%xmm1
|
||||
vpxor %xmm9,%xmm5,%xmm8
|
||||
vpsrldq $8,%xmm6,%xmm6
|
||||
vpxor %xmm6,%xmm7,%xmm7
|
||||
|
||||
vpclmulqdq $0x00,%xmm0,%xmm13,%xmm5
|
||||
vmovdqu 48-32(%r9),%xmm3
|
||||
vpxor %xmm4,%xmm5,%xmm5
|
||||
vpunpckhqdq %xmm12,%xmm12,%xmm9
|
||||
vpclmulqdq $0x11,%xmm0,%xmm13,%xmm13
|
||||
vpxor %xmm12,%xmm9,%xmm9
|
||||
vpxor %xmm14,%xmm13,%xmm13
|
||||
vpalignr $8,%xmm8,%xmm8,%xmm14
|
||||
vpclmulqdq $0x10,%xmm15,%xmm2,%xmm2
|
||||
vmovdqu 80-32(%r9),%xmm15
|
||||
vpxor %xmm1,%xmm2,%xmm2
|
||||
|
||||
vpclmulqdq $0x00,%xmm3,%xmm12,%xmm4
|
||||
vmovdqu 64-32(%r9),%xmm0
|
||||
vpxor %xmm5,%xmm4,%xmm4
|
||||
vpunpckhqdq %xmm11,%xmm11,%xmm1
|
||||
vpclmulqdq $0x11,%xmm3,%xmm12,%xmm12
|
||||
vpxor %xmm11,%xmm1,%xmm1
|
||||
vpxor %xmm13,%xmm12,%xmm12
|
||||
vxorps 16(%rsp),%xmm7,%xmm7
|
||||
vpclmulqdq $0x00,%xmm15,%xmm9,%xmm9
|
||||
vpxor %xmm2,%xmm9,%xmm9
|
||||
|
||||
vpclmulqdq $0x10,16(%r11),%xmm8,%xmm8
|
||||
vxorps %xmm14,%xmm8,%xmm8
|
||||
|
||||
vpclmulqdq $0x00,%xmm0,%xmm11,%xmm5
|
||||
vmovdqu 96-32(%r9),%xmm3
|
||||
vpxor %xmm4,%xmm5,%xmm5
|
||||
vpunpckhqdq %xmm10,%xmm10,%xmm2
|
||||
vpclmulqdq $0x11,%xmm0,%xmm11,%xmm11
|
||||
vpxor %xmm10,%xmm2,%xmm2
|
||||
vpalignr $8,%xmm8,%xmm8,%xmm14
|
||||
vpxor %xmm12,%xmm11,%xmm11
|
||||
vpclmulqdq $0x10,%xmm15,%xmm1,%xmm1
|
||||
vmovdqu 128-32(%r9),%xmm15
|
||||
vpxor %xmm9,%xmm1,%xmm1
|
||||
|
||||
vxorps %xmm7,%xmm14,%xmm14
|
||||
vpclmulqdq $0x10,16(%r11),%xmm8,%xmm8
|
||||
vxorps %xmm14,%xmm8,%xmm8
|
||||
|
||||
vpclmulqdq $0x00,%xmm3,%xmm10,%xmm4
|
||||
vmovdqu 112-32(%r9),%xmm0
|
||||
vpxor %xmm5,%xmm4,%xmm4
|
||||
vpunpckhqdq %xmm8,%xmm8,%xmm9
|
||||
vpclmulqdq $0x11,%xmm3,%xmm10,%xmm10
|
||||
vpxor %xmm8,%xmm9,%xmm9
|
||||
vpxor %xmm11,%xmm10,%xmm10
|
||||
vpclmulqdq $0x00,%xmm15,%xmm2,%xmm2
|
||||
vpxor %xmm1,%xmm2,%xmm2
|
||||
|
||||
vpclmulqdq $0x00,%xmm0,%xmm8,%xmm5
|
||||
vpclmulqdq $0x11,%xmm0,%xmm8,%xmm7
|
||||
vpxor %xmm4,%xmm5,%xmm5
|
||||
vpclmulqdq $0x10,%xmm15,%xmm9,%xmm6
|
||||
vpxor %xmm10,%xmm7,%xmm7
|
||||
vpxor %xmm2,%xmm6,%xmm6
|
||||
|
||||
vpxor %xmm5,%xmm7,%xmm4
|
||||
vpxor %xmm4,%xmm6,%xmm6
|
||||
vpslldq $8,%xmm6,%xmm1
|
||||
vmovdqu 16(%r11),%xmm3
|
||||
vpsrldq $8,%xmm6,%xmm6
|
||||
vpxor %xmm1,%xmm5,%xmm8
|
||||
vpxor %xmm6,%xmm7,%xmm7
|
||||
|
||||
vpalignr $8,%xmm8,%xmm8,%xmm2
|
||||
vpclmulqdq $0x10,%xmm3,%xmm8,%xmm8
|
||||
vpxor %xmm2,%xmm8,%xmm8
|
||||
|
||||
vpalignr $8,%xmm8,%xmm8,%xmm2
|
||||
vpclmulqdq $0x10,%xmm3,%xmm8,%xmm8
|
||||
vpxor %xmm7,%xmm2,%xmm2
|
||||
vpxor %xmm2,%xmm8,%xmm8
|
||||
movq 16(%rbp),%r12
|
||||
vpshufb (%r11),%xmm8,%xmm8
|
||||
vmovdqu %xmm8,(%r12)
|
||||
|
||||
vzeroupper
|
||||
leaq -40(%rbp),%rsp
|
||||
.cfi_def_cfa %rsp, 0x38
|
||||
popq %r15
|
||||
.cfi_adjust_cfa_offset -8
|
||||
.cfi_restore %r15
|
||||
popq %r14
|
||||
.cfi_adjust_cfa_offset -8
|
||||
.cfi_restore %r14
|
||||
popq %r13
|
||||
.cfi_adjust_cfa_offset -8
|
||||
.cfi_restore %r13
|
||||
popq %r12
|
||||
.cfi_adjust_cfa_offset -8
|
||||
.cfi_restore %r12
|
||||
popq %rbx
|
||||
.cfi_adjust_cfa_offset -8
|
||||
.cfi_restore %rbx
|
||||
popq %rbp
|
||||
.cfi_adjust_cfa_offset -8
|
||||
.cfi_restore %rbp
|
||||
.Lgcm_enc_abort:
|
||||
ret
|
||||
|
||||
.cfi_endproc
|
||||
.size aesni_gcm_encrypt,.-aesni_gcm_encrypt
|
||||
.section .rodata
|
||||
.align 64
|
||||
.Lbswap_mask:
|
||||
.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
|
||||
.Lpoly:
|
||||
.byte 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2
|
||||
.Lone_msb:
|
||||
.byte 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
|
||||
.Ltwo_lsb:
|
||||
.byte 2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
|
||||
.Lone_lsb:
|
||||
.byte 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
|
||||
.byte 65,69,83,45,78,73,32,71,67,77,32,109,111,100,117,108,101,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
|
||||
.align 64
|
||||
.text
|
||||
#endif
|
||||
2361
third-party/boringssl/linux-x86_64/crypto/fipsmodule/aesni-x86_64-linux.S
vendored
Normal file
2361
third-party/boringssl/linux-x86_64/crypto/fipsmodule/aesni-x86_64-linux.S
vendored
Normal file
File diff suppressed because it is too large
Load Diff
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user