Upgrade webrtc dependencies and build script

This commit is contained in:
Isaac 2024-03-07 00:27:52 +04:00
parent 31f1820b30
commit 17fd673361
2542 changed files with 440334 additions and 1294732 deletions

View File

@ -12,36 +12,70 @@
# OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
# CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
licenses(["notice"])
exports_files(["LICENSE"])
#load("@rules_cc//cc:defs.bzl", "cc_binary", "cc_library")
load(
":BUILD.generated.bzl",
"crypto_headers",
"crypto_internal_headers",
"crypto_sources",
"crypto_sources_mac_x86_64",
"crypto_sources_ios_aarch64",
"crypto_sources_ios_arm",
"crypto_sources_asm",
"fips_fragments",
"ssl_headers",
"ssl_internal_headers",
"ssl_sources",
"tool_sources",
"tool_headers",
"tool_sources",
)
posix_copts = [
licenses(["notice"])
exports_files(["LICENSE"])
# By default, the C files will expect assembly files, if any, to be linked in
# with the build. This default can be flipped with -DOPENSSL_NO_ASM. If building
# in a configuration where we have no assembly optimizations, -DOPENSSL_NO_ASM
# has no effect, and either value is fine.
#
# Like C files, assembly files are wrapped in #ifdef (or NASM equivalent), so it
# is safe to include a file for the wrong platform in the build. It will just
# output an empty object file. However, we need some platform selectors to
# distinguish between gas or NASM syntax.
#
# For all non-Windows platforms, we use gas assembly syntax and can assume any
# GCC-compatible toolchain includes a gas-compatible assembler.
#
# For Windows, we use NASM on x86 and x86_64 and gas, specifically
# clang-assembler, on aarch64. We have not yet added NASM support to this build,
# and would need to detect MSVC vs clang-cl for aarch64 so, for now, we just
# disable assembly on Windows across the board.
#
# These two selects for asm_sources and asm_copts must be kept in sync. If we
# specify assembly, we don't want OPENSSL_NO_ASM. If we don't specify assembly,
# we want OPENSSL_NO_ASM, in case the C files expect them in some format (e.g.
# NASM) this build file doesn't yet support.
#
# TODO(https://crbug.com/boringssl/531): Enable assembly for Windows.
asm_sources = select({
"@platforms//os:windows": [],
"//conditions:default": crypto_sources_asm,
})
asm_copts = select({
"@platforms//os:windows": ["-DOPENSSL_NO_ASM"],
"//conditions:default": [],
})
# Configure C, C++, and common flags for GCC-compatible toolchains.
#
# TODO(davidben): Can we remove some of these? In Bazel, are warnings the
# toolchain or project's responsibility? -Wa,--noexecstack should be unnecessary
# now, though https://crbug.com/boringssl/292 tracks testing this in CI.
# -fno-common did not become default until
# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=85678.
gcc_copts = [
# Assembler option --noexecstack adds .note.GNU-stack to each object to
# ensure that binaries can be built with non-executable stack.
"-Wa,--noexecstack",
# This is needed on Linux systems (at least) to get rwlock in pthread.
"-D_XOPEN_SOURCE=700",
# This list of warnings should match those in the top-level CMakeLists.txt.
"-Wall",
"-Werror",
@ -51,72 +85,63 @@ posix_copts = [
"-Wwrite-strings",
"-Wshadow",
"-fno-common",
"-Wno-unused-but-set-variable",
# Modern build environments should be able to set this to use atomic
# operations for reference counting rather than locks. However, it's
# known not to work on some Android builds.
# "-DOPENSSL_C11_ATOMIC",
]
boringssl_copts = select({
"@build_bazel_rules_apple//apple:ios_arm64": posix_copts,
"//build-system:ios_sim_arm64": posix_copts,
"@build_bazel_rules_apple//apple:ios_x86_64": posix_copts,
})
crypto_sources_asm = select({
"@build_bazel_rules_apple//apple:ios_arm64": crypto_sources_ios_aarch64,
"//build-system:ios_sim_arm64": crypto_sources_ios_aarch64,
"@build_bazel_rules_apple//apple:ios_x86_64": crypto_sources_mac_x86_64,
})
# For C targets only (not C++), compile with C11 support.
posix_copts_c11 = [
gcc_copts_c11 = [
"-std=c11",
"-Wmissing-prototypes",
"-Wold-style-definition",
"-Wstrict-prototypes",
]
boringssl_copts_c11 = boringssl_copts + select({
"@build_bazel_rules_apple//apple:ios_arm64": posix_copts_c11,
"//build-system:ios_sim_arm64": posix_copts_c11,
"@build_bazel_rules_apple//apple:ios_x86_64": posix_copts_c11,
})
# For C++ targets only (not C), compile with C++11 support.
posix_copts_cxx = [
"-std=c++11",
gcc_copts_cxx = [
"-std=c++14",
"-Wmissing-declarations",
]
boringssl_copts = [
"-DBORINGSSL_IMPLEMENTATION",
] + select({
# We assume that non-Windows builds use a GCC-compatible toolchain and that
# Windows builds do not.
#
# TODO(davidben): Should these be querying something in @bazel_tools?
# Unfortunately, @bazel_tools is undocumented. See
# https://github.com/bazelbuild/bazel/issues/14914
"@platforms//os:windows": [],
"//conditions:default": gcc_copts,
}) + select({
# This is needed on glibc systems to get rwlock in pthreads, but it should
# not be set on Apple platforms or FreeBSD, where it instead disables APIs
# we use.
# See compat(5), sys/cdefs.h, and https://crbug.com/boringssl/471
"@platforms//os:linux": ["-D_XOPEN_SOURCE=700"],
# Without WIN32_LEAN_AND_MEAN, <windows.h> pulls in wincrypt.h, which
# conflicts with our <openssl/x509.h>.
"@platforms//os:windows": ["-DWIN32_LEAN_AND_MEAN"],
"//conditions:default": [],
}) + asm_copts
boringssl_copts_c11 = boringssl_copts + select({
"@platforms//os:windows": ["/std:c11"],
"//conditions:default": gcc_copts_c11,
})
boringssl_copts_cxx = boringssl_copts + select({
"@build_bazel_rules_apple//apple:ios_arm64": posix_copts_cxx,
"//build-system:ios_sim_arm64": posix_copts_cxx,
"@build_bazel_rules_apple//apple:ios_x86_64": posix_copts_cxx,
"@platforms//os:windows": [],
"//conditions:default": gcc_copts_cxx,
})
cc_library(
name = "crypto",
srcs = crypto_sources + crypto_internal_headers + crypto_sources_asm + ["aes_ige.c"],
srcs = crypto_sources + crypto_internal_headers + asm_sources,
hdrs = crypto_headers + fips_fragments,
copts = boringssl_copts_c11,
includes = ["src/include"],
linkopts = select({
"@build_bazel_rules_apple//apple:ios_arm64": [],
"//build-system:ios_sim_arm64": [],
"@build_bazel_rules_apple//apple:ios_x86_64": [],
"@platforms//os:windows": ["-defaultlib:advapi32.lib"],
"//conditions:default": ["-pthread"],
}),
visibility = ["//visibility:public"],
linkstatic = 1,
visibility = ["//visibility:public"],
)
filegroup(
name='libcrypto',
srcs=[':crypto'],
#output_group = 'library',
visibility = ["//visibility:public"],
)
cc_library(
@ -130,3 +155,11 @@ cc_library(
":crypto",
],
)
cc_binary(
name = "bssl",
srcs = tool_sources + tool_headers,
copts = boringssl_copts_cxx,
visibility = ["//visibility:public"],
deps = [":ssl"],
)

View File

@ -37,17 +37,21 @@ fips_fragments = [
"src/crypto/fipsmodule/cipher/aead.c",
"src/crypto/fipsmodule/cipher/cipher.c",
"src/crypto/fipsmodule/cipher/e_aes.c",
"src/crypto/fipsmodule/cipher/e_des.c",
"src/crypto/fipsmodule/des/des.c",
"src/crypto/fipsmodule/cipher/e_aesccm.c",
"src/crypto/fipsmodule/cmac/cmac.c",
"src/crypto/fipsmodule/dh/check.c",
"src/crypto/fipsmodule/dh/dh.c",
"src/crypto/fipsmodule/digest/digest.c",
"src/crypto/fipsmodule/digest/digests.c",
"src/crypto/fipsmodule/digestsign/digestsign.c",
"src/crypto/fipsmodule/ec/ec.c",
"src/crypto/fipsmodule/ec/ec_key.c",
"src/crypto/fipsmodule/ec/ec_montgomery.c",
"src/crypto/fipsmodule/ec/felem.c",
"src/crypto/fipsmodule/ec/oct.c",
"src/crypto/fipsmodule/ec/p224-64.c",
"src/crypto/fipsmodule/ec/p256-x86_64.c",
"src/crypto/fipsmodule/ec/p256-nistz.c",
"src/crypto/fipsmodule/ec/p256.c",
"src/crypto/fipsmodule/ec/scalar.c",
"src/crypto/fipsmodule/ec/simple.c",
"src/crypto/fipsmodule/ec/simple_mul.c",
@ -55,6 +59,7 @@ fips_fragments = [
"src/crypto/fipsmodule/ec/wnaf.c",
"src/crypto/fipsmodule/ecdh/ecdh.c",
"src/crypto/fipsmodule/ecdsa/ecdsa.c",
"src/crypto/fipsmodule/hkdf/hkdf.c",
"src/crypto/fipsmodule/hmac/hmac.c",
"src/crypto/fipsmodule/md4/md4.c",
"src/crypto/fipsmodule/md5/md5.c",
@ -66,19 +71,20 @@ fips_fragments = [
"src/crypto/fipsmodule/modes/ofb.c",
"src/crypto/fipsmodule/modes/polyval.c",
"src/crypto/fipsmodule/rand/ctrdrbg.c",
"src/crypto/fipsmodule/rand/fork_detect.c",
"src/crypto/fipsmodule/rand/rand.c",
"src/crypto/fipsmodule/rand/urandom.c",
"src/crypto/fipsmodule/rsa/blinding.c",
"src/crypto/fipsmodule/rsa/padding.c",
"src/crypto/fipsmodule/rsa/rsa.c",
"src/crypto/fipsmodule/rsa/rsa_impl.c",
"src/crypto/fipsmodule/self_check/fips.c",
"src/crypto/fipsmodule/self_check/self_check.c",
"src/crypto/fipsmodule/sha/sha1-altivec.c",
"src/crypto/fipsmodule/service_indicator/service_indicator.c",
"src/crypto/fipsmodule/sha/sha1.c",
"src/crypto/fipsmodule/sha/sha256.c",
"src/crypto/fipsmodule/sha/sha512.c",
"src/crypto/fipsmodule/tls/kdf.c",
"src/third_party/fiat/p256.c",
]
ssl_internal_headers = [
@ -93,6 +99,8 @@ ssl_sources = [
"src/ssl/d1_srtp.cc",
"src/ssl/dtls_method.cc",
"src/ssl/dtls_record.cc",
"src/ssl/encrypted_client_hello.cc",
"src/ssl/extensions.cc",
"src/ssl/handoff.cc",
"src/ssl/handshake.cc",
"src/ssl/handshake_client.cc",
@ -115,7 +123,6 @@ ssl_sources = [
"src/ssl/ssl_versions.cc",
"src/ssl/ssl_x509.cc",
"src/ssl/t1_enc.cc",
"src/ssl/t1_lib.cc",
"src/ssl/tls13_both.cc",
"src/ssl/tls13_client.cc",
"src/ssl/tls13_enc.cc",
@ -128,12 +135,14 @@ crypto_headers = [
"src/include/openssl/aead.h",
"src/include/openssl/aes.h",
"src/include/openssl/arm_arch.h",
"src/include/openssl/asm_base.h",
"src/include/openssl/asn1.h",
"src/include/openssl/asn1_mac.h",
"src/include/openssl/asn1t.h",
"src/include/openssl/base.h",
"src/include/openssl/base64.h",
"src/include/openssl/bio.h",
"src/include/openssl/blake2.h",
"src/include/openssl/blowfish.h",
"src/include/openssl/bn.h",
"src/include/openssl/buf.h",
@ -146,6 +155,7 @@ crypto_headers = [
"src/include/openssl/conf.h",
"src/include/openssl/cpu.h",
"src/include/openssl/crypto.h",
"src/include/openssl/ctrdrbg.h",
"src/include/openssl/curve25519.h",
"src/include/openssl/des.h",
"src/include/openssl/dh.h",
@ -159,11 +169,17 @@ crypto_headers = [
"src/include/openssl/engine.h",
"src/include/openssl/err.h",
"src/include/openssl/evp.h",
"src/include/openssl/evp_errors.h",
"src/include/openssl/ex_data.h",
"src/include/openssl/experimental/dilithium.h",
"src/include/openssl/experimental/kyber.h",
"src/include/openssl/experimental/spx.h",
"src/include/openssl/hkdf.h",
"src/include/openssl/hmac.h",
"src/include/openssl/hpke.h",
"src/include/openssl/hrss.h",
"src/include/openssl/is_boringssl.h",
"src/include/openssl/kdf.h",
"src/include/openssl/lhash.h",
"src/include/openssl/md4.h",
"src/include/openssl/md5.h",
@ -181,31 +197,43 @@ crypto_headers = [
"src/include/openssl/pkcs8.h",
"src/include/openssl/poly1305.h",
"src/include/openssl/pool.h",
"src/include/openssl/posix_time.h",
"src/include/openssl/rand.h",
"src/include/openssl/rc4.h",
"src/include/openssl/ripemd.h",
"src/include/openssl/rsa.h",
"src/include/openssl/safestack.h",
"src/include/openssl/service_indicator.h",
"src/include/openssl/sha.h",
"src/include/openssl/siphash.h",
"src/include/openssl/span.h",
"src/include/openssl/stack.h",
"src/include/openssl/target.h",
"src/include/openssl/thread.h",
"src/include/openssl/time.h",
"src/include/openssl/trust_token.h",
"src/include/openssl/type_check.h",
"src/include/openssl/x509.h",
"src/include/openssl/x509_vfy.h",
"src/include/openssl/x509v3.h",
"src/include/openssl/x509v3_errors.h",
]
crypto_internal_headers = [
"src/crypto/asn1/asn1_locl.h",
"src/crypto/asn1/internal.h",
"src/crypto/bio/internal.h",
"src/crypto/bytestring/internal.h",
"src/crypto/chacha/internal.h",
"src/crypto/cipher_extra/internal.h",
"src/crypto/conf/conf_def.h",
"src/crypto/conf/internal.h",
"src/crypto/cpu-arm-linux.h",
"src/crypto/cpu_arm_linux.h",
"src/crypto/curve25519/curve25519_tables.h",
"src/crypto/curve25519/internal.h",
"src/crypto/des/internal.h",
"src/crypto/dilithium/internal.h",
"src/crypto/dsa/internal.h",
"src/crypto/ec_extra/internal.h",
"src/crypto/err/internal.h",
"src/crypto/evp/internal.h",
"src/crypto/fipsmodule/aes/internal.h",
@ -213,38 +241,52 @@ crypto_internal_headers = [
"src/crypto/fipsmodule/bn/rsaz_exp.h",
"src/crypto/fipsmodule/cipher/internal.h",
"src/crypto/fipsmodule/delocate.h",
"src/crypto/fipsmodule/des/internal.h",
"src/crypto/fipsmodule/dh/internal.h",
"src/crypto/fipsmodule/digest/internal.h",
"src/crypto/fipsmodule/digest/md32_common.h",
"src/crypto/fipsmodule/ec/builtin_curves.h",
"src/crypto/fipsmodule/ec/internal.h",
"src/crypto/fipsmodule/ec/p256-x86_64-table.h",
"src/crypto/fipsmodule/ec/p256-x86_64.h",
"src/crypto/fipsmodule/ec/p256-nistz-table.h",
"src/crypto/fipsmodule/ec/p256-nistz.h",
"src/crypto/fipsmodule/ec/p256_table.h",
"src/crypto/fipsmodule/ecdsa/internal.h",
"src/crypto/fipsmodule/md5/internal.h",
"src/crypto/fipsmodule/modes/internal.h",
"src/crypto/fipsmodule/rand/fork_detect.h",
"src/crypto/fipsmodule/rand/getrandom_fillin.h",
"src/crypto/fipsmodule/rand/internal.h",
"src/crypto/fipsmodule/rsa/internal.h",
"src/crypto/fipsmodule/service_indicator/internal.h",
"src/crypto/fipsmodule/sha/internal.h",
"src/crypto/fipsmodule/tls/internal.h",
"src/crypto/hrss/internal.h",
"src/crypto/internal.h",
"src/crypto/keccak/internal.h",
"src/crypto/kyber/internal.h",
"src/crypto/lhash/internal.h",
"src/crypto/obj/obj_dat.h",
"src/crypto/pkcs7/internal.h",
"src/crypto/pkcs8/internal.h",
"src/crypto/poly1305/internal.h",
"src/crypto/pool/internal.h",
"src/crypto/x509/charmap.h",
"src/crypto/rsa_extra/internal.h",
"src/crypto/spx/address.h",
"src/crypto/spx/fors.h",
"src/crypto/spx/merkle.h",
"src/crypto/spx/params.h",
"src/crypto/spx/spx_util.h",
"src/crypto/spx/thash.h",
"src/crypto/spx/wots.h",
"src/crypto/trust_token/internal.h",
"src/crypto/x509/ext_dat.h",
"src/crypto/x509/internal.h",
"src/crypto/x509/vpm_int.h",
"src/crypto/x509v3/ext_dat.h",
"src/crypto/x509v3/internal.h",
"src/crypto/x509v3/pcy_int.h",
"src/third_party/fiat/curve25519_32.h",
"src/third_party/fiat/curve25519_64.h",
"src/third_party/fiat/curve25519_tables.h",
"src/third_party/fiat/internal.h",
"src/third_party/fiat/curve25519_64_adx.h",
"src/third_party/fiat/curve25519_64_msvc.h",
"src/third_party/fiat/p256_32.h",
"src/third_party/fiat/p256_64.h",
"src/third_party/fiat/p256_64_msvc.h",
]
crypto_sources = [
@ -253,36 +295,34 @@ crypto_sources = [
"src/crypto/asn1/a_bool.c",
"src/crypto/asn1/a_d2i_fp.c",
"src/crypto/asn1/a_dup.c",
"src/crypto/asn1/a_enum.c",
"src/crypto/asn1/a_gentm.c",
"src/crypto/asn1/a_i2d_fp.c",
"src/crypto/asn1/a_int.c",
"src/crypto/asn1/a_mbstr.c",
"src/crypto/asn1/a_object.c",
"src/crypto/asn1/a_octet.c",
"src/crypto/asn1/a_print.c",
"src/crypto/asn1/a_strex.c",
"src/crypto/asn1/a_strnid.c",
"src/crypto/asn1/a_time.c",
"src/crypto/asn1/a_type.c",
"src/crypto/asn1/a_utctm.c",
"src/crypto/asn1/a_utf8.c",
"src/crypto/asn1/asn1_lib.c",
"src/crypto/asn1/asn1_par.c",
"src/crypto/asn1/asn_pack.c",
"src/crypto/asn1/f_enum.c",
"src/crypto/asn1/f_int.c",
"src/crypto/asn1/f_string.c",
"src/crypto/asn1/posix_time.c",
"src/crypto/asn1/tasn_dec.c",
"src/crypto/asn1/tasn_enc.c",
"src/crypto/asn1/tasn_fre.c",
"src/crypto/asn1/tasn_new.c",
"src/crypto/asn1/tasn_typ.c",
"src/crypto/asn1/tasn_utl.c",
"src/crypto/asn1/time_support.c",
"src/crypto/base64/base64.c",
"src/crypto/bio/bio.c",
"src/crypto/bio/bio_mem.c",
"src/crypto/bio/connect.c",
"src/crypto/bio/errno.c",
"src/crypto/bio/fd.c",
"src/crypto/bio/file.c",
"src/crypto/bio/hexdump.c",
@ -290,6 +330,7 @@ crypto_sources = [
"src/crypto/bio/printf.c",
"src/crypto/bio/socket.c",
"src/crypto/bio/socket_helper.c",
"src/crypto/blake2/blake2.c",
"src/crypto/bn_extra/bn_asn1.c",
"src/crypto/bn_extra/convert.c",
"src/crypto/buf/buf.c",
@ -301,39 +342,43 @@ crypto_sources = [
"src/crypto/chacha/chacha.c",
"src/crypto/cipher_extra/cipher_extra.c",
"src/crypto/cipher_extra/derive_key.c",
"src/crypto/cipher_extra/e_aesccm.c",
"src/crypto/cipher_extra/e_aesctrhmac.c",
"src/crypto/cipher_extra/e_aesgcmsiv.c",
"src/crypto/cipher_extra/e_chacha20poly1305.c",
"src/crypto/cipher_extra/e_des.c",
"src/crypto/cipher_extra/e_null.c",
"src/crypto/cipher_extra/e_rc2.c",
"src/crypto/cipher_extra/e_rc4.c",
"src/crypto/cipher_extra/e_tls.c",
"src/crypto/cipher_extra/tls_cbc.c",
"src/crypto/cmac/cmac.c",
"src/crypto/conf/conf.c",
"src/crypto/cpu-aarch64-fuchsia.c",
"src/crypto/cpu-aarch64-linux.c",
"src/crypto/cpu-arm-linux.c",
"src/crypto/cpu-arm.c",
"src/crypto/cpu-intel.c",
"src/crypto/cpu-ppc64le.c",
"src/crypto/cpu_aarch64_apple.c",
"src/crypto/cpu_aarch64_fuchsia.c",
"src/crypto/cpu_aarch64_linux.c",
"src/crypto/cpu_aarch64_openbsd.c",
"src/crypto/cpu_aarch64_sysreg.c",
"src/crypto/cpu_aarch64_win.c",
"src/crypto/cpu_arm_freebsd.c",
"src/crypto/cpu_arm_linux.c",
"src/crypto/cpu_intel.c",
"src/crypto/crypto.c",
"src/crypto/curve25519/curve25519.c",
"src/crypto/curve25519/curve25519_64_adx.c",
"src/crypto/curve25519/spake25519.c",
"src/crypto/dh/check.c",
"src/crypto/dh/dh.c",
"src/crypto/dh/dh_asn1.c",
"src/crypto/dh/params.c",
"src/crypto/des/des.c",
"src/crypto/dh_extra/dh_asn1.c",
"src/crypto/dh_extra/params.c",
"src/crypto/digest_extra/digest_extra.c",
"src/crypto/dilithium/dilithium.c",
"src/crypto/dsa/dsa.c",
"src/crypto/dsa/dsa_asn1.c",
"src/crypto/ec_extra/ec_asn1.c",
"src/crypto/ec_extra/ec_derive.c",
"src/crypto/ec_extra/hash_to_curve.c",
"src/crypto/ecdh_extra/ecdh_extra.c",
"src/crypto/ecdsa_extra/ecdsa_asn1.c",
"src/crypto/engine/engine.c",
"src/crypto/err/err.c",
"src/crypto/evp/digestsign.c",
"src/crypto/evp/evp.c",
"src/crypto/evp/evp_asn1.c",
"src/crypto/evp/evp_ctx.c",
@ -342,6 +387,7 @@ crypto_sources = [
"src/crypto/evp/p_ec_asn1.c",
"src/crypto/evp/p_ed25519.c",
"src/crypto/evp/p_ed25519_asn1.c",
"src/crypto/evp/p_hkdf.c",
"src/crypto/evp/p_rsa.c",
"src/crypto/evp/p_rsa_asn1.c",
"src/crypto/evp/p_x25519.c",
@ -353,9 +399,10 @@ crypto_sources = [
"src/crypto/ex_data.c",
"src/crypto/fipsmodule/bcm.c",
"src/crypto/fipsmodule/fips_shared_support.c",
"src/crypto/fipsmodule/is_fips.c",
"src/crypto/hkdf/hkdf.c",
"src/crypto/hpke/hpke.c",
"src/crypto/hrss/hrss.c",
"src/crypto/keccak/keccak.c",
"src/crypto/kyber/kyber.c",
"src/crypto/lhash/lhash.c",
"src/crypto/mem.c",
"src/crypto/obj/obj.c",
@ -379,34 +426,71 @@ crypto_sources = [
"src/crypto/pool/pool.c",
"src/crypto/rand_extra/deterministic.c",
"src/crypto/rand_extra/forkunsafe.c",
"src/crypto/rand_extra/fuchsia.c",
"src/crypto/rand_extra/getentropy.c",
"src/crypto/rand_extra/ios.c",
"src/crypto/rand_extra/passive.c",
"src/crypto/rand_extra/rand_extra.c",
"src/crypto/rand_extra/trusty.c",
"src/crypto/rand_extra/windows.c",
"src/crypto/rc4/rc4.c",
"src/crypto/refcount_c11.c",
"src/crypto/refcount_lock.c",
"src/crypto/refcount.c",
"src/crypto/rsa_extra/rsa_asn1.c",
"src/crypto/rsa_extra/rsa_crypt.c",
"src/crypto/rsa_extra/rsa_print.c",
"src/crypto/siphash/siphash.c",
"src/crypto/spx/address.c",
"src/crypto/spx/fors.c",
"src/crypto/spx/merkle.c",
"src/crypto/spx/spx.c",
"src/crypto/spx/spx_util.c",
"src/crypto/spx/thash.c",
"src/crypto/spx/wots.c",
"src/crypto/stack/stack.c",
"src/crypto/thread.c",
"src/crypto/thread_none.c",
"src/crypto/thread_pthread.c",
"src/crypto/thread_win.c",
"src/crypto/trust_token/pmbtoken.c",
"src/crypto/trust_token/trust_token.c",
"src/crypto/trust_token/voprf.c",
"src/crypto/x509/a_digest.c",
"src/crypto/x509/a_sign.c",
"src/crypto/x509/a_strex.c",
"src/crypto/x509/a_verify.c",
"src/crypto/x509/algorithm.c",
"src/crypto/x509/asn1_gen.c",
"src/crypto/x509/by_dir.c",
"src/crypto/x509/by_file.c",
"src/crypto/x509/i2d_pr.c",
"src/crypto/x509/name_print.c",
"src/crypto/x509/policy.c",
"src/crypto/x509/rsa_pss.c",
"src/crypto/x509/t_crl.c",
"src/crypto/x509/t_req.c",
"src/crypto/x509/t_x509.c",
"src/crypto/x509/t_x509a.c",
"src/crypto/x509/v3_akey.c",
"src/crypto/x509/v3_akeya.c",
"src/crypto/x509/v3_alt.c",
"src/crypto/x509/v3_bcons.c",
"src/crypto/x509/v3_bitst.c",
"src/crypto/x509/v3_conf.c",
"src/crypto/x509/v3_cpols.c",
"src/crypto/x509/v3_crld.c",
"src/crypto/x509/v3_enum.c",
"src/crypto/x509/v3_extku.c",
"src/crypto/x509/v3_genn.c",
"src/crypto/x509/v3_ia5.c",
"src/crypto/x509/v3_info.c",
"src/crypto/x509/v3_int.c",
"src/crypto/x509/v3_lib.c",
"src/crypto/x509/v3_ncons.c",
"src/crypto/x509/v3_ocsp.c",
"src/crypto/x509/v3_pcons.c",
"src/crypto/x509/v3_pmaps.c",
"src/crypto/x509/v3_prn.c",
"src/crypto/x509/v3_purp.c",
"src/crypto/x509/v3_skey.c",
"src/crypto/x509/v3_utl.c",
"src/crypto/x509/x509.c",
"src/crypto/x509/x509_att.c",
"src/crypto/x509/x509_cmp.c",
@ -415,7 +499,6 @@ crypto_sources = [
"src/crypto/x509/x509_ext.c",
"src/crypto/x509/x509_lu.c",
"src/crypto/x509/x509_obj.c",
"src/crypto/x509/x509_r2x.c",
"src/crypto/x509/x509_req.c",
"src/crypto/x509/x509_set.c",
"src/crypto/x509/x509_trs.c",
@ -432,9 +515,7 @@ crypto_sources = [
"src/crypto/x509/x_attrib.c",
"src/crypto/x509/x_crl.c",
"src/crypto/x509/x_exten.c",
"src/crypto/x509/x_info.c",
"src/crypto/x509/x_name.c",
"src/crypto/x509/x_pkey.c",
"src/crypto/x509/x_pubkey.c",
"src/crypto/x509/x_req.c",
"src/crypto/x509/x_sig.c",
@ -442,40 +523,283 @@ crypto_sources = [
"src/crypto/x509/x_val.c",
"src/crypto/x509/x_x509.c",
"src/crypto/x509/x_x509a.c",
"src/crypto/x509v3/pcy_cache.c",
"src/crypto/x509v3/pcy_data.c",
"src/crypto/x509v3/pcy_lib.c",
"src/crypto/x509v3/pcy_map.c",
"src/crypto/x509v3/pcy_node.c",
"src/crypto/x509v3/pcy_tree.c",
"src/crypto/x509v3/v3_akey.c",
"src/crypto/x509v3/v3_akeya.c",
"src/crypto/x509v3/v3_alt.c",
"src/crypto/x509v3/v3_bcons.c",
"src/crypto/x509v3/v3_bitst.c",
"src/crypto/x509v3/v3_conf.c",
"src/crypto/x509v3/v3_cpols.c",
"src/crypto/x509v3/v3_crld.c",
"src/crypto/x509v3/v3_enum.c",
"src/crypto/x509v3/v3_extku.c",
"src/crypto/x509v3/v3_genn.c",
"src/crypto/x509v3/v3_ia5.c",
"src/crypto/x509v3/v3_info.c",
"src/crypto/x509v3/v3_int.c",
"src/crypto/x509v3/v3_lib.c",
"src/crypto/x509v3/v3_ncons.c",
"src/crypto/x509v3/v3_ocsp.c",
"src/crypto/x509v3/v3_pci.c",
"src/crypto/x509v3/v3_pcia.c",
"src/crypto/x509v3/v3_pcons.c",
"src/crypto/x509v3/v3_pku.c",
"src/crypto/x509v3/v3_pmaps.c",
"src/crypto/x509v3/v3_prn.c",
"src/crypto/x509v3/v3_purp.c",
"src/crypto/x509v3/v3_skey.c",
"src/crypto/x509v3/v3_sxnet.c",
"src/crypto/x509v3/v3_utl.c",
"src/third_party/fiat/curve25519.c",
]
crypto_sources_asm = [
"apple-aarch64/crypto/chacha/chacha-armv8-apple.S",
"apple-aarch64/crypto/cipher_extra/chacha20_poly1305_armv8-apple.S",
"apple-aarch64/crypto/fipsmodule/aesv8-armv8-apple.S",
"apple-aarch64/crypto/fipsmodule/aesv8-gcm-armv8-apple.S",
"apple-aarch64/crypto/fipsmodule/armv8-mont-apple.S",
"apple-aarch64/crypto/fipsmodule/bn-armv8-apple.S",
"apple-aarch64/crypto/fipsmodule/ghash-neon-armv8-apple.S",
"apple-aarch64/crypto/fipsmodule/ghashv8-armv8-apple.S",
"apple-aarch64/crypto/fipsmodule/p256-armv8-asm-apple.S",
"apple-aarch64/crypto/fipsmodule/p256_beeu-armv8-asm-apple.S",
"apple-aarch64/crypto/fipsmodule/sha1-armv8-apple.S",
"apple-aarch64/crypto/fipsmodule/sha256-armv8-apple.S",
"apple-aarch64/crypto/fipsmodule/sha512-armv8-apple.S",
"apple-aarch64/crypto/fipsmodule/vpaes-armv8-apple.S",
"apple-aarch64/crypto/test/trampoline-armv8-apple.S",
"apple-x86/crypto/chacha/chacha-x86-apple.S",
"apple-x86/crypto/fipsmodule/aesni-x86-apple.S",
"apple-x86/crypto/fipsmodule/bn-586-apple.S",
"apple-x86/crypto/fipsmodule/co-586-apple.S",
"apple-x86/crypto/fipsmodule/ghash-ssse3-x86-apple.S",
"apple-x86/crypto/fipsmodule/ghash-x86-apple.S",
"apple-x86/crypto/fipsmodule/md5-586-apple.S",
"apple-x86/crypto/fipsmodule/sha1-586-apple.S",
"apple-x86/crypto/fipsmodule/sha256-586-apple.S",
"apple-x86/crypto/fipsmodule/sha512-586-apple.S",
"apple-x86/crypto/fipsmodule/vpaes-x86-apple.S",
"apple-x86/crypto/fipsmodule/x86-mont-apple.S",
"apple-x86/crypto/test/trampoline-x86-apple.S",
"apple-x86_64/crypto/chacha/chacha-x86_64-apple.S",
"apple-x86_64/crypto/cipher_extra/aes128gcmsiv-x86_64-apple.S",
"apple-x86_64/crypto/cipher_extra/chacha20_poly1305_x86_64-apple.S",
"apple-x86_64/crypto/fipsmodule/aesni-gcm-x86_64-apple.S",
"apple-x86_64/crypto/fipsmodule/aesni-x86_64-apple.S",
"apple-x86_64/crypto/fipsmodule/ghash-ssse3-x86_64-apple.S",
"apple-x86_64/crypto/fipsmodule/ghash-x86_64-apple.S",
"apple-x86_64/crypto/fipsmodule/md5-x86_64-apple.S",
"apple-x86_64/crypto/fipsmodule/p256-x86_64-asm-apple.S",
"apple-x86_64/crypto/fipsmodule/p256_beeu-x86_64-asm-apple.S",
"apple-x86_64/crypto/fipsmodule/rdrand-x86_64-apple.S",
"apple-x86_64/crypto/fipsmodule/rsaz-avx2-apple.S",
"apple-x86_64/crypto/fipsmodule/sha1-x86_64-apple.S",
"apple-x86_64/crypto/fipsmodule/sha256-x86_64-apple.S",
"apple-x86_64/crypto/fipsmodule/sha512-x86_64-apple.S",
"apple-x86_64/crypto/fipsmodule/vpaes-x86_64-apple.S",
"apple-x86_64/crypto/fipsmodule/x86_64-mont-apple.S",
"apple-x86_64/crypto/fipsmodule/x86_64-mont5-apple.S",
"apple-x86_64/crypto/test/trampoline-x86_64-apple.S",
"linux-aarch64/crypto/chacha/chacha-armv8-linux.S",
"linux-aarch64/crypto/cipher_extra/chacha20_poly1305_armv8-linux.S",
"linux-aarch64/crypto/fipsmodule/aesv8-armv8-linux.S",
"linux-aarch64/crypto/fipsmodule/aesv8-gcm-armv8-linux.S",
"linux-aarch64/crypto/fipsmodule/armv8-mont-linux.S",
"linux-aarch64/crypto/fipsmodule/bn-armv8-linux.S",
"linux-aarch64/crypto/fipsmodule/ghash-neon-armv8-linux.S",
"linux-aarch64/crypto/fipsmodule/ghashv8-armv8-linux.S",
"linux-aarch64/crypto/fipsmodule/p256-armv8-asm-linux.S",
"linux-aarch64/crypto/fipsmodule/p256_beeu-armv8-asm-linux.S",
"linux-aarch64/crypto/fipsmodule/sha1-armv8-linux.S",
"linux-aarch64/crypto/fipsmodule/sha256-armv8-linux.S",
"linux-aarch64/crypto/fipsmodule/sha512-armv8-linux.S",
"linux-aarch64/crypto/fipsmodule/vpaes-armv8-linux.S",
"linux-aarch64/crypto/test/trampoline-armv8-linux.S",
"linux-arm/crypto/chacha/chacha-armv4-linux.S",
"linux-arm/crypto/fipsmodule/aesv8-armv7-linux.S",
"linux-arm/crypto/fipsmodule/armv4-mont-linux.S",
"linux-arm/crypto/fipsmodule/bsaes-armv7-linux.S",
"linux-arm/crypto/fipsmodule/ghash-armv4-linux.S",
"linux-arm/crypto/fipsmodule/ghashv8-armv7-linux.S",
"linux-arm/crypto/fipsmodule/sha1-armv4-large-linux.S",
"linux-arm/crypto/fipsmodule/sha256-armv4-linux.S",
"linux-arm/crypto/fipsmodule/sha512-armv4-linux.S",
"linux-arm/crypto/fipsmodule/vpaes-armv7-linux.S",
"linux-arm/crypto/test/trampoline-armv4-linux.S",
"linux-x86/crypto/chacha/chacha-x86-linux.S",
"linux-x86/crypto/fipsmodule/aesni-x86-linux.S",
"linux-x86/crypto/fipsmodule/bn-586-linux.S",
"linux-x86/crypto/fipsmodule/co-586-linux.S",
"linux-x86/crypto/fipsmodule/ghash-ssse3-x86-linux.S",
"linux-x86/crypto/fipsmodule/ghash-x86-linux.S",
"linux-x86/crypto/fipsmodule/md5-586-linux.S",
"linux-x86/crypto/fipsmodule/sha1-586-linux.S",
"linux-x86/crypto/fipsmodule/sha256-586-linux.S",
"linux-x86/crypto/fipsmodule/sha512-586-linux.S",
"linux-x86/crypto/fipsmodule/vpaes-x86-linux.S",
"linux-x86/crypto/fipsmodule/x86-mont-linux.S",
"linux-x86/crypto/test/trampoline-x86-linux.S",
"linux-x86_64/crypto/chacha/chacha-x86_64-linux.S",
"linux-x86_64/crypto/cipher_extra/aes128gcmsiv-x86_64-linux.S",
"linux-x86_64/crypto/cipher_extra/chacha20_poly1305_x86_64-linux.S",
"linux-x86_64/crypto/fipsmodule/aesni-gcm-x86_64-linux.S",
"linux-x86_64/crypto/fipsmodule/aesni-x86_64-linux.S",
"linux-x86_64/crypto/fipsmodule/ghash-ssse3-x86_64-linux.S",
"linux-x86_64/crypto/fipsmodule/ghash-x86_64-linux.S",
"linux-x86_64/crypto/fipsmodule/md5-x86_64-linux.S",
"linux-x86_64/crypto/fipsmodule/p256-x86_64-asm-linux.S",
"linux-x86_64/crypto/fipsmodule/p256_beeu-x86_64-asm-linux.S",
"linux-x86_64/crypto/fipsmodule/rdrand-x86_64-linux.S",
"linux-x86_64/crypto/fipsmodule/rsaz-avx2-linux.S",
"linux-x86_64/crypto/fipsmodule/sha1-x86_64-linux.S",
"linux-x86_64/crypto/fipsmodule/sha256-x86_64-linux.S",
"linux-x86_64/crypto/fipsmodule/sha512-x86_64-linux.S",
"linux-x86_64/crypto/fipsmodule/vpaes-x86_64-linux.S",
"linux-x86_64/crypto/fipsmodule/x86_64-mont-linux.S",
"linux-x86_64/crypto/fipsmodule/x86_64-mont5-linux.S",
"linux-x86_64/crypto/test/trampoline-x86_64-linux.S",
"src/crypto/curve25519/asm/x25519-asm-arm.S",
"src/crypto/hrss/asm/poly_rq_mul.S",
"src/crypto/poly1305/poly1305_arm_asm.S",
"src/third_party/fiat/asm/fiat_curve25519_adx_mul.S",
"src/third_party/fiat/asm/fiat_curve25519_adx_square.S",
"src/third_party/fiat/asm/fiat_p256_adx_mul.S",
"src/third_party/fiat/asm/fiat_p256_adx_sqr.S",
"win-aarch64/crypto/chacha/chacha-armv8-win.S",
"win-aarch64/crypto/cipher_extra/chacha20_poly1305_armv8-win.S",
"win-aarch64/crypto/fipsmodule/aesv8-armv8-win.S",
"win-aarch64/crypto/fipsmodule/aesv8-gcm-armv8-win.S",
"win-aarch64/crypto/fipsmodule/armv8-mont-win.S",
"win-aarch64/crypto/fipsmodule/bn-armv8-win.S",
"win-aarch64/crypto/fipsmodule/ghash-neon-armv8-win.S",
"win-aarch64/crypto/fipsmodule/ghashv8-armv8-win.S",
"win-aarch64/crypto/fipsmodule/p256-armv8-asm-win.S",
"win-aarch64/crypto/fipsmodule/p256_beeu-armv8-asm-win.S",
"win-aarch64/crypto/fipsmodule/sha1-armv8-win.S",
"win-aarch64/crypto/fipsmodule/sha256-armv8-win.S",
"win-aarch64/crypto/fipsmodule/sha512-armv8-win.S",
"win-aarch64/crypto/fipsmodule/vpaes-armv8-win.S",
"win-aarch64/crypto/test/trampoline-armv8-win.S",
]
crypto_sources_nasm = [
"win-x86/crypto/chacha/chacha-x86-win.asm",
"win-x86/crypto/fipsmodule/aesni-x86-win.asm",
"win-x86/crypto/fipsmodule/bn-586-win.asm",
"win-x86/crypto/fipsmodule/co-586-win.asm",
"win-x86/crypto/fipsmodule/ghash-ssse3-x86-win.asm",
"win-x86/crypto/fipsmodule/ghash-x86-win.asm",
"win-x86/crypto/fipsmodule/md5-586-win.asm",
"win-x86/crypto/fipsmodule/sha1-586-win.asm",
"win-x86/crypto/fipsmodule/sha256-586-win.asm",
"win-x86/crypto/fipsmodule/sha512-586-win.asm",
"win-x86/crypto/fipsmodule/vpaes-x86-win.asm",
"win-x86/crypto/fipsmodule/x86-mont-win.asm",
"win-x86/crypto/test/trampoline-x86-win.asm",
"win-x86_64/crypto/chacha/chacha-x86_64-win.asm",
"win-x86_64/crypto/cipher_extra/aes128gcmsiv-x86_64-win.asm",
"win-x86_64/crypto/cipher_extra/chacha20_poly1305_x86_64-win.asm",
"win-x86_64/crypto/fipsmodule/aesni-gcm-x86_64-win.asm",
"win-x86_64/crypto/fipsmodule/aesni-x86_64-win.asm",
"win-x86_64/crypto/fipsmodule/ghash-ssse3-x86_64-win.asm",
"win-x86_64/crypto/fipsmodule/ghash-x86_64-win.asm",
"win-x86_64/crypto/fipsmodule/md5-x86_64-win.asm",
"win-x86_64/crypto/fipsmodule/p256-x86_64-asm-win.asm",
"win-x86_64/crypto/fipsmodule/p256_beeu-x86_64-asm-win.asm",
"win-x86_64/crypto/fipsmodule/rdrand-x86_64-win.asm",
"win-x86_64/crypto/fipsmodule/rsaz-avx2-win.asm",
"win-x86_64/crypto/fipsmodule/sha1-x86_64-win.asm",
"win-x86_64/crypto/fipsmodule/sha256-x86_64-win.asm",
"win-x86_64/crypto/fipsmodule/sha512-x86_64-win.asm",
"win-x86_64/crypto/fipsmodule/vpaes-x86_64-win.asm",
"win-x86_64/crypto/fipsmodule/x86_64-mont-win.asm",
"win-x86_64/crypto/fipsmodule/x86_64-mont5-win.asm",
"win-x86_64/crypto/test/trampoline-x86_64-win.asm",
]
pki_headers = [
"src/include/openssl/pki/certificate.h",
"src/include/openssl/pki/signature_verify_cache.h",
]
pki_internal_headers = [
"src/pki/cert_error_id.h",
"src/pki/cert_error_params.h",
"src/pki/cert_errors.h",
"src/pki/cert_issuer_source.h",
"src/pki/cert_issuer_source_static.h",
"src/pki/cert_issuer_source_sync_unittest.h",
"src/pki/certificate_policies.h",
"src/pki/common_cert_errors.h",
"src/pki/crl.h",
"src/pki/encode_values.h",
"src/pki/extended_key_usage.h",
"src/pki/general_names.h",
"src/pki/input.h",
"src/pki/ip_util.h",
"src/pki/mock_signature_verify_cache.h",
"src/pki/name_constraints.h",
"src/pki/nist_pkits_unittest.h",
"src/pki/ocsp.h",
"src/pki/ocsp_revocation_status.h",
"src/pki/ocsp_verify_result.h",
"src/pki/parse_certificate.h",
"src/pki/parse_name.h",
"src/pki/parse_values.h",
"src/pki/parsed_certificate.h",
"src/pki/parser.h",
"src/pki/path_builder.h",
"src/pki/pem.h",
"src/pki/revocation_util.h",
"src/pki/signature_algorithm.h",
"src/pki/simple_path_builder_delegate.h",
"src/pki/string_util.h",
"src/pki/test_helpers.h",
"src/pki/testdata/nist-pkits/pkits_testcases-inl.h",
"src/pki/trust_store.h",
"src/pki/trust_store_collection.h",
"src/pki/trust_store_in_memory.h",
"src/pki/verify_certificate_chain.h",
"src/pki/verify_certificate_chain_typed_unittest.h",
"src/pki/verify_name_match.h",
"src/pki/verify_signed_data.h",
]
pki_sources = [
"src/pki/cert_error_id.cc",
"src/pki/cert_error_params.cc",
"src/pki/cert_errors.cc",
"src/pki/cert_issuer_source_static.cc",
"src/pki/certificate.cc",
"src/pki/certificate_policies.cc",
"src/pki/common_cert_errors.cc",
"src/pki/crl.cc",
"src/pki/encode_values.cc",
"src/pki/extended_key_usage.cc",
"src/pki/general_names.cc",
"src/pki/input.cc",
"src/pki/ip_util.cc",
"src/pki/name_constraints.cc",
"src/pki/ocsp.cc",
"src/pki/ocsp_verify_result.cc",
"src/pki/parse_certificate.cc",
"src/pki/parse_name.cc",
"src/pki/parse_values.cc",
"src/pki/parsed_certificate.cc",
"src/pki/parser.cc",
"src/pki/path_builder.cc",
"src/pki/pem.cc",
"src/pki/revocation_util.cc",
"src/pki/signature_algorithm.cc",
"src/pki/simple_path_builder_delegate.cc",
"src/pki/string_util.cc",
"src/pki/trust_store.cc",
"src/pki/trust_store_collection.cc",
"src/pki/trust_store_in_memory.cc",
"src/pki/verify_certificate_chain.cc",
"src/pki/verify_name_match.cc",
"src/pki/verify_signed_data.cc",
]
rust_bssl_sys = [
"src/rust/bssl-sys/src/lib.rs",
]
rust_bssl_crypto = [
"src/rust/bssl-crypto/src/aead.rs",
"src/rust/bssl-crypto/src/aes.rs",
"src/rust/bssl-crypto/src/cipher/aes_cbc.rs",
"src/rust/bssl-crypto/src/cipher/aes_ctr.rs",
"src/rust/bssl-crypto/src/cipher/mod.rs",
"src/rust/bssl-crypto/src/digest.rs",
"src/rust/bssl-crypto/src/ec.rs",
"src/rust/bssl-crypto/src/ecdh.rs",
"src/rust/bssl-crypto/src/ecdsa.rs",
"src/rust/bssl-crypto/src/ed25519.rs",
"src/rust/bssl-crypto/src/hkdf.rs",
"src/rust/bssl-crypto/src/hmac.rs",
"src/rust/bssl-crypto/src/hpke.rs",
"src/rust/bssl-crypto/src/lib.rs",
"src/rust/bssl-crypto/src/macros.rs",
"src/rust/bssl-crypto/src/mem.rs",
"src/rust/bssl-crypto/src/rand.rs",
"src/rust/bssl-crypto/src/rsa.rs",
"src/rust/bssl-crypto/src/scoped.rs",
"src/rust/bssl-crypto/src/test_helpers.rs",
"src/rust/bssl-crypto/src/x25519.rs",
]
tool_sources = [
@ -484,7 +808,9 @@ tool_sources = [
"src/tool/client.cc",
"src/tool/const.cc",
"src/tool/digest.cc",
"src/tool/fd.cc",
"src/tool/file.cc",
"src/tool/generate_ech.cc",
"src/tool/generate_ed25519.cc",
"src/tool/genrsa.cc",
"src/tool/pkcs12.cc",
@ -500,180 +826,3 @@ tool_headers = [
"src/tool/internal.h",
"src/tool/transport_common.h",
]
crypto_sources_ios_aarch64 = [
"ios-aarch64/crypto/chacha/chacha-armv8.S",
"ios-aarch64/crypto/fipsmodule/aesv8-armx64.S",
"ios-aarch64/crypto/fipsmodule/armv8-mont.S",
"ios-aarch64/crypto/fipsmodule/ghash-neon-armv8.S",
"ios-aarch64/crypto/fipsmodule/ghashv8-armx64.S",
"ios-aarch64/crypto/fipsmodule/sha1-armv8.S",
"ios-aarch64/crypto/fipsmodule/sha256-armv8.S",
"ios-aarch64/crypto/fipsmodule/sha512-armv8.S",
"ios-aarch64/crypto/fipsmodule/vpaes-armv8.S",
"ios-aarch64/crypto/test/trampoline-armv8.S",
]
crypto_sources_ios_arm = [
"ios-arm/crypto/chacha/chacha-armv4.S",
"ios-arm/crypto/fipsmodule/aesv8-armx32.S",
"ios-arm/crypto/fipsmodule/armv4-mont.S",
"ios-arm/crypto/fipsmodule/bsaes-armv7.S",
"ios-arm/crypto/fipsmodule/ghash-armv4.S",
"ios-arm/crypto/fipsmodule/ghashv8-armx32.S",
"ios-arm/crypto/fipsmodule/sha1-armv4-large.S",
"ios-arm/crypto/fipsmodule/sha256-armv4.S",
"ios-arm/crypto/fipsmodule/sha512-armv4.S",
"ios-arm/crypto/fipsmodule/vpaes-armv7.S",
"ios-arm/crypto/test/trampoline-armv4.S",
]
crypto_sources_linux_aarch64 = [
"linux-aarch64/crypto/chacha/chacha-armv8.S",
"linux-aarch64/crypto/fipsmodule/aesv8-armx64.S",
"linux-aarch64/crypto/fipsmodule/armv8-mont.S",
"linux-aarch64/crypto/fipsmodule/ghash-neon-armv8.S",
"linux-aarch64/crypto/fipsmodule/ghashv8-armx64.S",
"linux-aarch64/crypto/fipsmodule/sha1-armv8.S",
"linux-aarch64/crypto/fipsmodule/sha256-armv8.S",
"linux-aarch64/crypto/fipsmodule/sha512-armv8.S",
"linux-aarch64/crypto/fipsmodule/vpaes-armv8.S",
"linux-aarch64/crypto/test/trampoline-armv8.S",
]
crypto_sources_linux_arm = [
"linux-arm/crypto/chacha/chacha-armv4.S",
"linux-arm/crypto/fipsmodule/aesv8-armx32.S",
"linux-arm/crypto/fipsmodule/armv4-mont.S",
"linux-arm/crypto/fipsmodule/bsaes-armv7.S",
"linux-arm/crypto/fipsmodule/ghash-armv4.S",
"linux-arm/crypto/fipsmodule/ghashv8-armx32.S",
"linux-arm/crypto/fipsmodule/sha1-armv4-large.S",
"linux-arm/crypto/fipsmodule/sha256-armv4.S",
"linux-arm/crypto/fipsmodule/sha512-armv4.S",
"linux-arm/crypto/fipsmodule/vpaes-armv7.S",
"linux-arm/crypto/test/trampoline-armv4.S",
"src/crypto/curve25519/asm/x25519-asm-arm.S",
"src/crypto/poly1305/poly1305_arm_asm.S",
]
crypto_sources_linux_ppc64le = [
"linux-ppc64le/crypto/fipsmodule/aesp8-ppc.S",
"linux-ppc64le/crypto/fipsmodule/ghashp8-ppc.S",
"linux-ppc64le/crypto/test/trampoline-ppc.S",
]
crypto_sources_linux_x86 = [
"linux-x86/crypto/chacha/chacha-x86.S",
"linux-x86/crypto/fipsmodule/aesni-x86.S",
"linux-x86/crypto/fipsmodule/bn-586.S",
"linux-x86/crypto/fipsmodule/co-586.S",
"linux-x86/crypto/fipsmodule/ghash-ssse3-x86.S",
"linux-x86/crypto/fipsmodule/ghash-x86.S",
"linux-x86/crypto/fipsmodule/md5-586.S",
"linux-x86/crypto/fipsmodule/sha1-586.S",
"linux-x86/crypto/fipsmodule/sha256-586.S",
"linux-x86/crypto/fipsmodule/sha512-586.S",
"linux-x86/crypto/fipsmodule/vpaes-x86.S",
"linux-x86/crypto/fipsmodule/x86-mont.S",
"linux-x86/crypto/test/trampoline-x86.S",
]
crypto_sources_linux_x86_64 = [
"linux-x86_64/crypto/chacha/chacha-x86_64.S",
"linux-x86_64/crypto/cipher_extra/aes128gcmsiv-x86_64.S",
"linux-x86_64/crypto/cipher_extra/chacha20_poly1305_x86_64.S",
"linux-x86_64/crypto/fipsmodule/aesni-gcm-x86_64.S",
"linux-x86_64/crypto/fipsmodule/aesni-x86_64.S",
"linux-x86_64/crypto/fipsmodule/ghash-ssse3-x86_64.S",
"linux-x86_64/crypto/fipsmodule/ghash-x86_64.S",
"linux-x86_64/crypto/fipsmodule/md5-x86_64.S",
"linux-x86_64/crypto/fipsmodule/p256-x86_64-asm.S",
"linux-x86_64/crypto/fipsmodule/p256_beeu-x86_64-asm.S",
"linux-x86_64/crypto/fipsmodule/rdrand-x86_64.S",
"linux-x86_64/crypto/fipsmodule/rsaz-avx2.S",
"linux-x86_64/crypto/fipsmodule/sha1-x86_64.S",
"linux-x86_64/crypto/fipsmodule/sha256-x86_64.S",
"linux-x86_64/crypto/fipsmodule/sha512-x86_64.S",
"linux-x86_64/crypto/fipsmodule/vpaes-x86_64.S",
"linux-x86_64/crypto/fipsmodule/x86_64-mont.S",
"linux-x86_64/crypto/fipsmodule/x86_64-mont5.S",
"linux-x86_64/crypto/test/trampoline-x86_64.S",
"src/crypto/hrss/asm/poly_rq_mul.S",
]
crypto_sources_mac_x86 = [
"mac-x86/crypto/chacha/chacha-x86.S",
"mac-x86/crypto/fipsmodule/aesni-x86.S",
"mac-x86/crypto/fipsmodule/bn-586.S",
"mac-x86/crypto/fipsmodule/co-586.S",
"mac-x86/crypto/fipsmodule/ghash-ssse3-x86.S",
"mac-x86/crypto/fipsmodule/ghash-x86.S",
"mac-x86/crypto/fipsmodule/md5-586.S",
"mac-x86/crypto/fipsmodule/sha1-586.S",
"mac-x86/crypto/fipsmodule/sha256-586.S",
"mac-x86/crypto/fipsmodule/sha512-586.S",
"mac-x86/crypto/fipsmodule/vpaes-x86.S",
"mac-x86/crypto/fipsmodule/x86-mont.S",
"mac-x86/crypto/test/trampoline-x86.S",
]
crypto_sources_mac_x86_64 = [
"mac-x86_64/crypto/chacha/chacha-x86_64.S",
"mac-x86_64/crypto/cipher_extra/aes128gcmsiv-x86_64.S",
"mac-x86_64/crypto/cipher_extra/chacha20_poly1305_x86_64.S",
"mac-x86_64/crypto/fipsmodule/aesni-gcm-x86_64.S",
"mac-x86_64/crypto/fipsmodule/aesni-x86_64.S",
"mac-x86_64/crypto/fipsmodule/ghash-ssse3-x86_64.S",
"mac-x86_64/crypto/fipsmodule/ghash-x86_64.S",
"mac-x86_64/crypto/fipsmodule/md5-x86_64.S",
"mac-x86_64/crypto/fipsmodule/p256-x86_64-asm.S",
"mac-x86_64/crypto/fipsmodule/p256_beeu-x86_64-asm.S",
"mac-x86_64/crypto/fipsmodule/rdrand-x86_64.S",
"mac-x86_64/crypto/fipsmodule/rsaz-avx2.S",
"mac-x86_64/crypto/fipsmodule/sha1-x86_64.S",
"mac-x86_64/crypto/fipsmodule/sha256-x86_64.S",
"mac-x86_64/crypto/fipsmodule/sha512-x86_64.S",
"mac-x86_64/crypto/fipsmodule/vpaes-x86_64.S",
"mac-x86_64/crypto/fipsmodule/x86_64-mont.S",
"mac-x86_64/crypto/fipsmodule/x86_64-mont5.S",
"mac-x86_64/crypto/test/trampoline-x86_64.S",
]
crypto_sources_win_x86 = [
"win-x86/crypto/chacha/chacha-x86.asm",
"win-x86/crypto/fipsmodule/aesni-x86.asm",
"win-x86/crypto/fipsmodule/bn-586.asm",
"win-x86/crypto/fipsmodule/co-586.asm",
"win-x86/crypto/fipsmodule/ghash-ssse3-x86.asm",
"win-x86/crypto/fipsmodule/ghash-x86.asm",
"win-x86/crypto/fipsmodule/md5-586.asm",
"win-x86/crypto/fipsmodule/sha1-586.asm",
"win-x86/crypto/fipsmodule/sha256-586.asm",
"win-x86/crypto/fipsmodule/sha512-586.asm",
"win-x86/crypto/fipsmodule/vpaes-x86.asm",
"win-x86/crypto/fipsmodule/x86-mont.asm",
"win-x86/crypto/test/trampoline-x86.asm",
]
crypto_sources_win_x86_64 = [
"win-x86_64/crypto/chacha/chacha-x86_64.asm",
"win-x86_64/crypto/cipher_extra/aes128gcmsiv-x86_64.asm",
"win-x86_64/crypto/cipher_extra/chacha20_poly1305_x86_64.asm",
"win-x86_64/crypto/fipsmodule/aesni-gcm-x86_64.asm",
"win-x86_64/crypto/fipsmodule/aesni-x86_64.asm",
"win-x86_64/crypto/fipsmodule/ghash-ssse3-x86_64.asm",
"win-x86_64/crypto/fipsmodule/ghash-x86_64.asm",
"win-x86_64/crypto/fipsmodule/md5-x86_64.asm",
"win-x86_64/crypto/fipsmodule/p256-x86_64-asm.asm",
"win-x86_64/crypto/fipsmodule/p256_beeu-x86_64-asm.asm",
"win-x86_64/crypto/fipsmodule/rdrand-x86_64.asm",
"win-x86_64/crypto/fipsmodule/rsaz-avx2.asm",
"win-x86_64/crypto/fipsmodule/sha1-x86_64.asm",
"win-x86_64/crypto/fipsmodule/sha256-x86_64.asm",
"win-x86_64/crypto/fipsmodule/sha512-x86_64.asm",
"win-x86_64/crypto/fipsmodule/vpaes-x86_64.asm",
"win-x86_64/crypto/fipsmodule/x86_64-mont.asm",
"win-x86_64/crypto/fipsmodule/x86_64-mont5.asm",
"win-x86_64/crypto/test/trampoline-x86_64.asm",
]

File diff suppressed because it is too large Load Diff

576
third-party/boringssl/CMakeLists.txt vendored Normal file
View File

@ -0,0 +1,576 @@
# Copyright (c) 2015, Google Inc.
#
# Permission to use, copy, modify, and/or distribute this software for any
# purpose with or without fee is hereby granted, provided that the above
# copyright notice and this permission notice appear in all copies.
#
# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
# SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
# OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
# CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
# This file is created by generate_build_files.py. Do not edit manually.
cmake_minimum_required(VERSION 3.12)
project(BoringSSL LANGUAGES C CXX)
set(CMAKE_CXX_STANDARD 14)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_C_STANDARD 11)
set(CMAKE_C_STANDARD_REQUIRED ON)
if(CMAKE_COMPILER_IS_GNUCXX OR CMAKE_CXX_COMPILER_ID MATCHES "Clang")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fvisibility=hidden -fno-common -fno-exceptions -fno-rtti")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fvisibility=hidden -fno-common")
endif()
# pthread_rwlock_t requires a feature flag on glibc.
if(CMAKE_SYSTEM_NAME STREQUAL "Linux")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -D_XOPEN_SOURCE=700")
endif()
if(WIN32)
add_definitions(-D_HAS_EXCEPTIONS=0)
add_definitions(-DWIN32_LEAN_AND_MEAN)
add_definitions(-DNOMINMAX)
# Allow use of fopen.
add_definitions(-D_CRT_SECURE_NO_WARNINGS)
endif()
add_definitions(-DBORINGSSL_IMPLEMENTATION)
if(OPENSSL_NO_ASM)
add_definitions(-DOPENSSL_NO_ASM)
else()
# On x86 and x86_64 Windows, we use the NASM output.
if(WIN32 AND CMAKE_SYSTEM_PROCESSOR MATCHES "AMD64|x86_64|amd64|x86|i[3-6]86")
enable_language(ASM_NASM)
set(OPENSSL_NASM TRUE)
set(CMAKE_ASM_NASM_FLAGS "${CMAKE_ASM_NASM_FLAGS} -gcv8")
else()
enable_language(ASM)
set(OPENSSL_ASM TRUE)
# Work around https://gitlab.kitware.com/cmake/cmake/-/issues/20771 in older
# CMake versions.
if(APPLE AND CMAKE_VERSION VERSION_LESS 3.19)
if(CMAKE_OSX_SYSROOT)
set(CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} -isysroot \"${CMAKE_OSX_SYSROOT}\"")
endif()
foreach(arch ${CMAKE_OSX_ARCHITECTURES})
set(CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} -arch ${arch}")
endforeach()
endif()
if(NOT WIN32)
set(CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} -Wa,--noexecstack")
endif()
# Clang's integerated assembler does not support debug symbols.
if(NOT CMAKE_ASM_COMPILER_ID MATCHES "Clang")
set(CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} -Wa,-g")
endif()
endif()
endif()
if(BUILD_SHARED_LIBS)
add_definitions(-DBORINGSSL_SHARED_LIBRARY)
# Enable position-independent code globally. This is needed because
# some library targets are OBJECT libraries.
set(CMAKE_POSITION_INDEPENDENT_CODE TRUE)
endif()
set(
CRYPTO_SOURCES_ASM
apple-aarch64/crypto/chacha/chacha-armv8-apple.S
apple-aarch64/crypto/cipher_extra/chacha20_poly1305_armv8-apple.S
apple-aarch64/crypto/fipsmodule/aesv8-armv8-apple.S
apple-aarch64/crypto/fipsmodule/aesv8-gcm-armv8-apple.S
apple-aarch64/crypto/fipsmodule/armv8-mont-apple.S
apple-aarch64/crypto/fipsmodule/bn-armv8-apple.S
apple-aarch64/crypto/fipsmodule/ghash-neon-armv8-apple.S
apple-aarch64/crypto/fipsmodule/ghashv8-armv8-apple.S
apple-aarch64/crypto/fipsmodule/p256-armv8-asm-apple.S
apple-aarch64/crypto/fipsmodule/p256_beeu-armv8-asm-apple.S
apple-aarch64/crypto/fipsmodule/sha1-armv8-apple.S
apple-aarch64/crypto/fipsmodule/sha256-armv8-apple.S
apple-aarch64/crypto/fipsmodule/sha512-armv8-apple.S
apple-aarch64/crypto/fipsmodule/vpaes-armv8-apple.S
apple-aarch64/crypto/test/trampoline-armv8-apple.S
apple-x86/crypto/chacha/chacha-x86-apple.S
apple-x86/crypto/fipsmodule/aesni-x86-apple.S
apple-x86/crypto/fipsmodule/bn-586-apple.S
apple-x86/crypto/fipsmodule/co-586-apple.S
apple-x86/crypto/fipsmodule/ghash-ssse3-x86-apple.S
apple-x86/crypto/fipsmodule/ghash-x86-apple.S
apple-x86/crypto/fipsmodule/md5-586-apple.S
apple-x86/crypto/fipsmodule/sha1-586-apple.S
apple-x86/crypto/fipsmodule/sha256-586-apple.S
apple-x86/crypto/fipsmodule/sha512-586-apple.S
apple-x86/crypto/fipsmodule/vpaes-x86-apple.S
apple-x86/crypto/fipsmodule/x86-mont-apple.S
apple-x86/crypto/test/trampoline-x86-apple.S
apple-x86_64/crypto/chacha/chacha-x86_64-apple.S
apple-x86_64/crypto/cipher_extra/aes128gcmsiv-x86_64-apple.S
apple-x86_64/crypto/cipher_extra/chacha20_poly1305_x86_64-apple.S
apple-x86_64/crypto/fipsmodule/aesni-gcm-x86_64-apple.S
apple-x86_64/crypto/fipsmodule/aesni-x86_64-apple.S
apple-x86_64/crypto/fipsmodule/ghash-ssse3-x86_64-apple.S
apple-x86_64/crypto/fipsmodule/ghash-x86_64-apple.S
apple-x86_64/crypto/fipsmodule/md5-x86_64-apple.S
apple-x86_64/crypto/fipsmodule/p256-x86_64-asm-apple.S
apple-x86_64/crypto/fipsmodule/p256_beeu-x86_64-asm-apple.S
apple-x86_64/crypto/fipsmodule/rdrand-x86_64-apple.S
apple-x86_64/crypto/fipsmodule/rsaz-avx2-apple.S
apple-x86_64/crypto/fipsmodule/sha1-x86_64-apple.S
apple-x86_64/crypto/fipsmodule/sha256-x86_64-apple.S
apple-x86_64/crypto/fipsmodule/sha512-x86_64-apple.S
apple-x86_64/crypto/fipsmodule/vpaes-x86_64-apple.S
apple-x86_64/crypto/fipsmodule/x86_64-mont-apple.S
apple-x86_64/crypto/fipsmodule/x86_64-mont5-apple.S
apple-x86_64/crypto/test/trampoline-x86_64-apple.S
linux-aarch64/crypto/chacha/chacha-armv8-linux.S
linux-aarch64/crypto/cipher_extra/chacha20_poly1305_armv8-linux.S
linux-aarch64/crypto/fipsmodule/aesv8-armv8-linux.S
linux-aarch64/crypto/fipsmodule/aesv8-gcm-armv8-linux.S
linux-aarch64/crypto/fipsmodule/armv8-mont-linux.S
linux-aarch64/crypto/fipsmodule/bn-armv8-linux.S
linux-aarch64/crypto/fipsmodule/ghash-neon-armv8-linux.S
linux-aarch64/crypto/fipsmodule/ghashv8-armv8-linux.S
linux-aarch64/crypto/fipsmodule/p256-armv8-asm-linux.S
linux-aarch64/crypto/fipsmodule/p256_beeu-armv8-asm-linux.S
linux-aarch64/crypto/fipsmodule/sha1-armv8-linux.S
linux-aarch64/crypto/fipsmodule/sha256-armv8-linux.S
linux-aarch64/crypto/fipsmodule/sha512-armv8-linux.S
linux-aarch64/crypto/fipsmodule/vpaes-armv8-linux.S
linux-aarch64/crypto/test/trampoline-armv8-linux.S
linux-arm/crypto/chacha/chacha-armv4-linux.S
linux-arm/crypto/fipsmodule/aesv8-armv7-linux.S
linux-arm/crypto/fipsmodule/armv4-mont-linux.S
linux-arm/crypto/fipsmodule/bsaes-armv7-linux.S
linux-arm/crypto/fipsmodule/ghash-armv4-linux.S
linux-arm/crypto/fipsmodule/ghashv8-armv7-linux.S
linux-arm/crypto/fipsmodule/sha1-armv4-large-linux.S
linux-arm/crypto/fipsmodule/sha256-armv4-linux.S
linux-arm/crypto/fipsmodule/sha512-armv4-linux.S
linux-arm/crypto/fipsmodule/vpaes-armv7-linux.S
linux-arm/crypto/test/trampoline-armv4-linux.S
linux-x86/crypto/chacha/chacha-x86-linux.S
linux-x86/crypto/fipsmodule/aesni-x86-linux.S
linux-x86/crypto/fipsmodule/bn-586-linux.S
linux-x86/crypto/fipsmodule/co-586-linux.S
linux-x86/crypto/fipsmodule/ghash-ssse3-x86-linux.S
linux-x86/crypto/fipsmodule/ghash-x86-linux.S
linux-x86/crypto/fipsmodule/md5-586-linux.S
linux-x86/crypto/fipsmodule/sha1-586-linux.S
linux-x86/crypto/fipsmodule/sha256-586-linux.S
linux-x86/crypto/fipsmodule/sha512-586-linux.S
linux-x86/crypto/fipsmodule/vpaes-x86-linux.S
linux-x86/crypto/fipsmodule/x86-mont-linux.S
linux-x86/crypto/test/trampoline-x86-linux.S
linux-x86_64/crypto/chacha/chacha-x86_64-linux.S
linux-x86_64/crypto/cipher_extra/aes128gcmsiv-x86_64-linux.S
linux-x86_64/crypto/cipher_extra/chacha20_poly1305_x86_64-linux.S
linux-x86_64/crypto/fipsmodule/aesni-gcm-x86_64-linux.S
linux-x86_64/crypto/fipsmodule/aesni-x86_64-linux.S
linux-x86_64/crypto/fipsmodule/ghash-ssse3-x86_64-linux.S
linux-x86_64/crypto/fipsmodule/ghash-x86_64-linux.S
linux-x86_64/crypto/fipsmodule/md5-x86_64-linux.S
linux-x86_64/crypto/fipsmodule/p256-x86_64-asm-linux.S
linux-x86_64/crypto/fipsmodule/p256_beeu-x86_64-asm-linux.S
linux-x86_64/crypto/fipsmodule/rdrand-x86_64-linux.S
linux-x86_64/crypto/fipsmodule/rsaz-avx2-linux.S
linux-x86_64/crypto/fipsmodule/sha1-x86_64-linux.S
linux-x86_64/crypto/fipsmodule/sha256-x86_64-linux.S
linux-x86_64/crypto/fipsmodule/sha512-x86_64-linux.S
linux-x86_64/crypto/fipsmodule/vpaes-x86_64-linux.S
linux-x86_64/crypto/fipsmodule/x86_64-mont-linux.S
linux-x86_64/crypto/fipsmodule/x86_64-mont5-linux.S
linux-x86_64/crypto/test/trampoline-x86_64-linux.S
src/crypto/curve25519/asm/x25519-asm-arm.S
src/crypto/hrss/asm/poly_rq_mul.S
src/crypto/poly1305/poly1305_arm_asm.S
src/third_party/fiat/asm/fiat_curve25519_adx_mul.S
src/third_party/fiat/asm/fiat_curve25519_adx_square.S
src/third_party/fiat/asm/fiat_p256_adx_mul.S
src/third_party/fiat/asm/fiat_p256_adx_sqr.S
win-aarch64/crypto/chacha/chacha-armv8-win.S
win-aarch64/crypto/cipher_extra/chacha20_poly1305_armv8-win.S
win-aarch64/crypto/fipsmodule/aesv8-armv8-win.S
win-aarch64/crypto/fipsmodule/aesv8-gcm-armv8-win.S
win-aarch64/crypto/fipsmodule/armv8-mont-win.S
win-aarch64/crypto/fipsmodule/bn-armv8-win.S
win-aarch64/crypto/fipsmodule/ghash-neon-armv8-win.S
win-aarch64/crypto/fipsmodule/ghashv8-armv8-win.S
win-aarch64/crypto/fipsmodule/p256-armv8-asm-win.S
win-aarch64/crypto/fipsmodule/p256_beeu-armv8-asm-win.S
win-aarch64/crypto/fipsmodule/sha1-armv8-win.S
win-aarch64/crypto/fipsmodule/sha256-armv8-win.S
win-aarch64/crypto/fipsmodule/sha512-armv8-win.S
win-aarch64/crypto/fipsmodule/vpaes-armv8-win.S
win-aarch64/crypto/test/trampoline-armv8-win.S
)
set(
CRYPTO_SOURCES_NASM
win-x86/crypto/chacha/chacha-x86-win.asm
win-x86/crypto/fipsmodule/aesni-x86-win.asm
win-x86/crypto/fipsmodule/bn-586-win.asm
win-x86/crypto/fipsmodule/co-586-win.asm
win-x86/crypto/fipsmodule/ghash-ssse3-x86-win.asm
win-x86/crypto/fipsmodule/ghash-x86-win.asm
win-x86/crypto/fipsmodule/md5-586-win.asm
win-x86/crypto/fipsmodule/sha1-586-win.asm
win-x86/crypto/fipsmodule/sha256-586-win.asm
win-x86/crypto/fipsmodule/sha512-586-win.asm
win-x86/crypto/fipsmodule/vpaes-x86-win.asm
win-x86/crypto/fipsmodule/x86-mont-win.asm
win-x86/crypto/test/trampoline-x86-win.asm
win-x86_64/crypto/chacha/chacha-x86_64-win.asm
win-x86_64/crypto/cipher_extra/aes128gcmsiv-x86_64-win.asm
win-x86_64/crypto/cipher_extra/chacha20_poly1305_x86_64-win.asm
win-x86_64/crypto/fipsmodule/aesni-gcm-x86_64-win.asm
win-x86_64/crypto/fipsmodule/aesni-x86_64-win.asm
win-x86_64/crypto/fipsmodule/ghash-ssse3-x86_64-win.asm
win-x86_64/crypto/fipsmodule/ghash-x86_64-win.asm
win-x86_64/crypto/fipsmodule/md5-x86_64-win.asm
win-x86_64/crypto/fipsmodule/p256-x86_64-asm-win.asm
win-x86_64/crypto/fipsmodule/p256_beeu-x86_64-asm-win.asm
win-x86_64/crypto/fipsmodule/rdrand-x86_64-win.asm
win-x86_64/crypto/fipsmodule/rsaz-avx2-win.asm
win-x86_64/crypto/fipsmodule/sha1-x86_64-win.asm
win-x86_64/crypto/fipsmodule/sha256-x86_64-win.asm
win-x86_64/crypto/fipsmodule/sha512-x86_64-win.asm
win-x86_64/crypto/fipsmodule/vpaes-x86_64-win.asm
win-x86_64/crypto/fipsmodule/x86_64-mont-win.asm
win-x86_64/crypto/fipsmodule/x86_64-mont5-win.asm
win-x86_64/crypto/test/trampoline-x86_64-win.asm
)
if(OPENSSL_ASM)
list(APPEND CRYPTO_SOURCES_ASM_USED ${CRYPTO_SOURCES_ASM})
endif()
if(OPENSSL_NASM)
list(APPEND CRYPTO_SOURCES_ASM_USED ${CRYPTO_SOURCES_NASM})
endif()
add_library(
crypto
${CRYPTO_SOURCES_ASM_USED}
err_data.c
src/crypto/asn1/a_bitstr.c
src/crypto/asn1/a_bool.c
src/crypto/asn1/a_d2i_fp.c
src/crypto/asn1/a_dup.c
src/crypto/asn1/a_gentm.c
src/crypto/asn1/a_i2d_fp.c
src/crypto/asn1/a_int.c
src/crypto/asn1/a_mbstr.c
src/crypto/asn1/a_object.c
src/crypto/asn1/a_octet.c
src/crypto/asn1/a_strex.c
src/crypto/asn1/a_strnid.c
src/crypto/asn1/a_time.c
src/crypto/asn1/a_type.c
src/crypto/asn1/a_utctm.c
src/crypto/asn1/asn1_lib.c
src/crypto/asn1/asn1_par.c
src/crypto/asn1/asn_pack.c
src/crypto/asn1/f_int.c
src/crypto/asn1/f_string.c
src/crypto/asn1/posix_time.c
src/crypto/asn1/tasn_dec.c
src/crypto/asn1/tasn_enc.c
src/crypto/asn1/tasn_fre.c
src/crypto/asn1/tasn_new.c
src/crypto/asn1/tasn_typ.c
src/crypto/asn1/tasn_utl.c
src/crypto/base64/base64.c
src/crypto/bio/bio.c
src/crypto/bio/bio_mem.c
src/crypto/bio/connect.c
src/crypto/bio/errno.c
src/crypto/bio/fd.c
src/crypto/bio/file.c
src/crypto/bio/hexdump.c
src/crypto/bio/pair.c
src/crypto/bio/printf.c
src/crypto/bio/socket.c
src/crypto/bio/socket_helper.c
src/crypto/blake2/blake2.c
src/crypto/bn_extra/bn_asn1.c
src/crypto/bn_extra/convert.c
src/crypto/buf/buf.c
src/crypto/bytestring/asn1_compat.c
src/crypto/bytestring/ber.c
src/crypto/bytestring/cbb.c
src/crypto/bytestring/cbs.c
src/crypto/bytestring/unicode.c
src/crypto/chacha/chacha.c
src/crypto/cipher_extra/cipher_extra.c
src/crypto/cipher_extra/derive_key.c
src/crypto/cipher_extra/e_aesctrhmac.c
src/crypto/cipher_extra/e_aesgcmsiv.c
src/crypto/cipher_extra/e_chacha20poly1305.c
src/crypto/cipher_extra/e_des.c
src/crypto/cipher_extra/e_null.c
src/crypto/cipher_extra/e_rc2.c
src/crypto/cipher_extra/e_rc4.c
src/crypto/cipher_extra/e_tls.c
src/crypto/cipher_extra/tls_cbc.c
src/crypto/conf/conf.c
src/crypto/cpu_aarch64_apple.c
src/crypto/cpu_aarch64_fuchsia.c
src/crypto/cpu_aarch64_linux.c
src/crypto/cpu_aarch64_openbsd.c
src/crypto/cpu_aarch64_sysreg.c
src/crypto/cpu_aarch64_win.c
src/crypto/cpu_arm_freebsd.c
src/crypto/cpu_arm_linux.c
src/crypto/cpu_intel.c
src/crypto/crypto.c
src/crypto/curve25519/curve25519.c
src/crypto/curve25519/curve25519_64_adx.c
src/crypto/curve25519/spake25519.c
src/crypto/des/des.c
src/crypto/dh_extra/dh_asn1.c
src/crypto/dh_extra/params.c
src/crypto/digest_extra/digest_extra.c
src/crypto/dilithium/dilithium.c
src/crypto/dsa/dsa.c
src/crypto/dsa/dsa_asn1.c
src/crypto/ec_extra/ec_asn1.c
src/crypto/ec_extra/ec_derive.c
src/crypto/ec_extra/hash_to_curve.c
src/crypto/ecdh_extra/ecdh_extra.c
src/crypto/ecdsa_extra/ecdsa_asn1.c
src/crypto/engine/engine.c
src/crypto/err/err.c
src/crypto/evp/evp.c
src/crypto/evp/evp_asn1.c
src/crypto/evp/evp_ctx.c
src/crypto/evp/p_dsa_asn1.c
src/crypto/evp/p_ec.c
src/crypto/evp/p_ec_asn1.c
src/crypto/evp/p_ed25519.c
src/crypto/evp/p_ed25519_asn1.c
src/crypto/evp/p_hkdf.c
src/crypto/evp/p_rsa.c
src/crypto/evp/p_rsa_asn1.c
src/crypto/evp/p_x25519.c
src/crypto/evp/p_x25519_asn1.c
src/crypto/evp/pbkdf.c
src/crypto/evp/print.c
src/crypto/evp/scrypt.c
src/crypto/evp/sign.c
src/crypto/ex_data.c
src/crypto/fipsmodule/bcm.c
src/crypto/fipsmodule/fips_shared_support.c
src/crypto/hpke/hpke.c
src/crypto/hrss/hrss.c
src/crypto/keccak/keccak.c
src/crypto/kyber/kyber.c
src/crypto/lhash/lhash.c
src/crypto/mem.c
src/crypto/obj/obj.c
src/crypto/obj/obj_xref.c
src/crypto/pem/pem_all.c
src/crypto/pem/pem_info.c
src/crypto/pem/pem_lib.c
src/crypto/pem/pem_oth.c
src/crypto/pem/pem_pk8.c
src/crypto/pem/pem_pkey.c
src/crypto/pem/pem_x509.c
src/crypto/pem/pem_xaux.c
src/crypto/pkcs7/pkcs7.c
src/crypto/pkcs7/pkcs7_x509.c
src/crypto/pkcs8/p5_pbev2.c
src/crypto/pkcs8/pkcs8.c
src/crypto/pkcs8/pkcs8_x509.c
src/crypto/poly1305/poly1305.c
src/crypto/poly1305/poly1305_arm.c
src/crypto/poly1305/poly1305_vec.c
src/crypto/pool/pool.c
src/crypto/rand_extra/deterministic.c
src/crypto/rand_extra/forkunsafe.c
src/crypto/rand_extra/getentropy.c
src/crypto/rand_extra/ios.c
src/crypto/rand_extra/passive.c
src/crypto/rand_extra/rand_extra.c
src/crypto/rand_extra/trusty.c
src/crypto/rand_extra/windows.c
src/crypto/rc4/rc4.c
src/crypto/refcount.c
src/crypto/rsa_extra/rsa_asn1.c
src/crypto/rsa_extra/rsa_crypt.c
src/crypto/rsa_extra/rsa_print.c
src/crypto/siphash/siphash.c
src/crypto/spx/address.c
src/crypto/spx/fors.c
src/crypto/spx/merkle.c
src/crypto/spx/spx.c
src/crypto/spx/spx_util.c
src/crypto/spx/thash.c
src/crypto/spx/wots.c
src/crypto/stack/stack.c
src/crypto/thread.c
src/crypto/thread_none.c
src/crypto/thread_pthread.c
src/crypto/thread_win.c
src/crypto/trust_token/pmbtoken.c
src/crypto/trust_token/trust_token.c
src/crypto/trust_token/voprf.c
src/crypto/x509/a_digest.c
src/crypto/x509/a_sign.c
src/crypto/x509/a_verify.c
src/crypto/x509/algorithm.c
src/crypto/x509/asn1_gen.c
src/crypto/x509/by_dir.c
src/crypto/x509/by_file.c
src/crypto/x509/i2d_pr.c
src/crypto/x509/name_print.c
src/crypto/x509/policy.c
src/crypto/x509/rsa_pss.c
src/crypto/x509/t_crl.c
src/crypto/x509/t_req.c
src/crypto/x509/t_x509.c
src/crypto/x509/t_x509a.c
src/crypto/x509/v3_akey.c
src/crypto/x509/v3_akeya.c
src/crypto/x509/v3_alt.c
src/crypto/x509/v3_bcons.c
src/crypto/x509/v3_bitst.c
src/crypto/x509/v3_conf.c
src/crypto/x509/v3_cpols.c
src/crypto/x509/v3_crld.c
src/crypto/x509/v3_enum.c
src/crypto/x509/v3_extku.c
src/crypto/x509/v3_genn.c
src/crypto/x509/v3_ia5.c
src/crypto/x509/v3_info.c
src/crypto/x509/v3_int.c
src/crypto/x509/v3_lib.c
src/crypto/x509/v3_ncons.c
src/crypto/x509/v3_ocsp.c
src/crypto/x509/v3_pcons.c
src/crypto/x509/v3_pmaps.c
src/crypto/x509/v3_prn.c
src/crypto/x509/v3_purp.c
src/crypto/x509/v3_skey.c
src/crypto/x509/v3_utl.c
src/crypto/x509/x509.c
src/crypto/x509/x509_att.c
src/crypto/x509/x509_cmp.c
src/crypto/x509/x509_d2.c
src/crypto/x509/x509_def.c
src/crypto/x509/x509_ext.c
src/crypto/x509/x509_lu.c
src/crypto/x509/x509_obj.c
src/crypto/x509/x509_req.c
src/crypto/x509/x509_set.c
src/crypto/x509/x509_trs.c
src/crypto/x509/x509_txt.c
src/crypto/x509/x509_v3.c
src/crypto/x509/x509_vfy.c
src/crypto/x509/x509_vpm.c
src/crypto/x509/x509cset.c
src/crypto/x509/x509name.c
src/crypto/x509/x509rset.c
src/crypto/x509/x509spki.c
src/crypto/x509/x_algor.c
src/crypto/x509/x_all.c
src/crypto/x509/x_attrib.c
src/crypto/x509/x_crl.c
src/crypto/x509/x_exten.c
src/crypto/x509/x_name.c
src/crypto/x509/x_pubkey.c
src/crypto/x509/x_req.c
src/crypto/x509/x_sig.c
src/crypto/x509/x_spki.c
src/crypto/x509/x_val.c
src/crypto/x509/x_x509.c
src/crypto/x509/x_x509a.c
)
target_include_directories(crypto PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/src/include>)
add_library(
ssl
src/ssl/bio_ssl.cc
src/ssl/d1_both.cc
src/ssl/d1_lib.cc
src/ssl/d1_pkt.cc
src/ssl/d1_srtp.cc
src/ssl/dtls_method.cc
src/ssl/dtls_record.cc
src/ssl/encrypted_client_hello.cc
src/ssl/extensions.cc
src/ssl/handoff.cc
src/ssl/handshake.cc
src/ssl/handshake_client.cc
src/ssl/handshake_server.cc
src/ssl/s3_both.cc
src/ssl/s3_lib.cc
src/ssl/s3_pkt.cc
src/ssl/ssl_aead_ctx.cc
src/ssl/ssl_asn1.cc
src/ssl/ssl_buffer.cc
src/ssl/ssl_cert.cc
src/ssl/ssl_cipher.cc
src/ssl/ssl_file.cc
src/ssl/ssl_key_share.cc
src/ssl/ssl_lib.cc
src/ssl/ssl_privkey.cc
src/ssl/ssl_session.cc
src/ssl/ssl_stat.cc
src/ssl/ssl_transcript.cc
src/ssl/ssl_versions.cc
src/ssl/ssl_x509.cc
src/ssl/t1_enc.cc
src/ssl/tls13_both.cc
src/ssl/tls13_client.cc
src/ssl/tls13_enc.cc
src/ssl/tls13_server.cc
src/ssl/tls_method.cc
src/ssl/tls_record.cc
)
target_link_libraries(ssl crypto)
add_executable(
bssl
src/tool/args.cc
src/tool/ciphers.cc
src/tool/client.cc
src/tool/const.cc
src/tool/digest.cc
src/tool/fd.cc
src/tool/file.cc
src/tool/generate_ech.cc
src/tool/generate_ed25519.cc
src/tool/genrsa.cc
src/tool/pkcs12.cc
src/tool/rand.cc
src/tool/server.cc
src/tool/sign.cc
src/tool/speed.cc
src/tool/tool.cc
src/tool/transport_common.cc
)
target_link_libraries(bssl ssl crypto)
if(NOT CMAKE_SYSTEM_NAME STREQUAL "Android")
find_package(Threads REQUIRED)
target_link_libraries(crypto Threads::Threads)
endif()
if(WIN32)
target_link_libraries(crypto ws2_32)
endif()

View File

@ -21,6 +21,7 @@ record keeping.)
27287199
27287880
27287883
263291445
OpenSSL License
---------------

1
third-party/boringssl/WORKSPACE vendored Normal file
View File

@ -0,0 +1 @@
workspace(name = "boringssl")

View File

@ -1,21 +1,11 @@
// This file is generated from a similarly-named Perl script in the BoringSSL
// source tree. Do not edit by hand.
#if !defined(__has_feature)
#define __has_feature(x) 0
#endif
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
#define OPENSSL_NO_ASM
#endif
#include <openssl/asm_base.h>
#if !defined(OPENSSL_NO_ASM)
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_AARCH64) && defined(__APPLE__)
#include <openssl/arm_arch.h>
.section __TEXT,__const
.align 5
@ -28,24 +18,12 @@ Lone:
.text
.globl _ChaCha20_ctr32
.private_extern _ChaCha20_ctr32
.globl _ChaCha20_ctr32_nohw
.private_extern _ChaCha20_ctr32_nohw
.align 5
_ChaCha20_ctr32:
cbz x2,Labort
#if __has_feature(hwaddress_sanitizer) && __clang_major__ >= 10
adrp x5,:pg_hi21_nc:_OPENSSL_armcap_P
#else
adrp x5,_OPENSSL_armcap_P@PAGE
#endif
cmp x2,#192
b.lo Lshort
ldr w17,[x5,_OPENSSL_armcap_P@PAGEOFF]
tst w17,#ARMV7_NEON
b.ne ChaCha20_neon
Lshort:
_ChaCha20_ctr32_nohw:
AARCH64_SIGN_LINK_REGISTER
stp x29,x30,[sp,#-96]!
add x29,sp,#0
@ -62,7 +40,7 @@ Lshort:
ldp x24,x25,[x3] // load key
ldp x26,x27,[x3,#16]
ldp x28,x30,[x4] // load counter
#ifdef __ARMEB__
#ifdef __AARCH64EB__
ror x24,x24,#32
ror x25,x25,#32
ror x26,x26,#32
@ -223,7 +201,7 @@ Loop:
add x20,x20,x21,lsl#32
ldp x19,x21,[x1,#48]
add x1,x1,#64
#ifdef __ARMEB__
#ifdef __AARCH64EB__
rev x5,x5
rev x7,x7
rev x9,x9
@ -258,7 +236,7 @@ Loop:
ldp x25,x26,[x29,#64]
ldp x27,x28,[x29,#80]
ldp x29,x30,[sp],#96
Labort:
AARCH64_VALIDATE_LINK_REGISTER
ret
.align 4
@ -279,7 +257,7 @@ Less_than_64:
add x15,x15,x16,lsl#32
add x17,x17,x19,lsl#32
add x20,x20,x21,lsl#32
#ifdef __ARMEB__
#ifdef __AARCH64EB__
rev x5,x5
rev x7,x7
rev x9,x9
@ -314,12 +292,16 @@ Loop_tail:
ldp x25,x26,[x29,#64]
ldp x27,x28,[x29,#80]
ldp x29,x30,[sp],#96
AARCH64_VALIDATE_LINK_REGISTER
ret
.globl _ChaCha20_ctr32_neon
.private_extern _ChaCha20_ctr32_neon
.align 5
ChaCha20_neon:
_ChaCha20_ctr32_neon:
AARCH64_SIGN_LINK_REGISTER
stp x29,x30,[sp,#-96]!
add x29,sp,#0
@ -343,7 +325,7 @@ ChaCha20_neon:
ldp x28,x30,[x4] // load counter
ld1 {v27.4s},[x4]
ld1 {v31.4s},[x5]
#ifdef __ARMEB__
#ifdef __AARCH64EB__
rev64 v24.4s,v24.4s
ror x24,x24,#32
ror x25,x25,#32
@ -641,7 +623,7 @@ Loop_neon:
add x20,x20,x21,lsl#32
ldp x19,x21,[x1,#48]
add x1,x1,#64
#ifdef __ARMEB__
#ifdef __AARCH64EB__
rev x5,x5
rev x7,x7
rev x9,x9
@ -700,6 +682,7 @@ Loop_neon:
ldp x25,x26,[x29,#64]
ldp x27,x28,[x29,#80]
ldp x29,x30,[sp],#96
AARCH64_VALIDATE_LINK_REGISTER
ret
Ltail_neon:
@ -720,7 +703,7 @@ Ltail_neon:
add x20,x20,x21,lsl#32
ldp x19,x21,[x1,#48]
add x1,x1,#64
#ifdef __ARMEB__
#ifdef __AARCH64EB__
rev x5,x5
rev x7,x7
rev x9,x9
@ -809,11 +792,13 @@ Ldone_neon:
ldp x25,x26,[x29,#64]
ldp x27,x28,[x29,#80]
ldp x29,x30,[sp],#96
AARCH64_VALIDATE_LINK_REGISTER
ret
.align 5
ChaCha20_512_neon:
AARCH64_SIGN_LINK_REGISTER
stp x29,x30,[sp,#-96]!
add x29,sp,#0
@ -836,7 +821,7 @@ L512_or_more_neon:
ldp x28,x30,[x4] // load counter
ld1 {v27.4s},[x4]
ld1 {v31.4s},[x5]
#ifdef __ARMEB__
#ifdef __AARCH64EB__
rev64 v24.4s,v24.4s
ror x24,x24,#32
ror x25,x25,#32
@ -1349,7 +1334,7 @@ Loop_upper_neon:
add x20,x20,x21,lsl#32
ldp x19,x21,[x1,#48]
add x1,x1,#64
#ifdef __ARMEB__
#ifdef __AARCH64EB__
rev x5,x5
rev x7,x7
rev x9,x9
@ -1863,7 +1848,7 @@ Loop_lower_neon:
add x1,x1,#64
add v21.4s,v21.4s,v25.4s
#ifdef __ARMEB__
#ifdef __AARCH64EB__
rev x5,x5
rev x7,x7
rev x9,x9
@ -1977,6 +1962,7 @@ Ldone_512_neon:
ldp x25,x26,[x29,#64]
ldp x27,x28,[x29,#80]
ldp x29,x30,[sp],#96
AARCH64_VALIDATE_LINK_REGISTER
ret
#endif // !OPENSSL_NO_ASM
#endif // !OPENSSL_NO_ASM && defined(OPENSSL_AARCH64) && defined(__APPLE__)

File diff suppressed because it is too large Load Diff

View File

@ -1,17 +1,9 @@
// This file is generated from a similarly-named Perl script in the BoringSSL
// source tree. Do not edit by hand.
#if !defined(__has_feature)
#define __has_feature(x) 0
#endif
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
#define OPENSSL_NO_ASM
#endif
#include <openssl/asm_base.h>
#if !defined(OPENSSL_NO_ASM)
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_AARCH64) && defined(__APPLE__)
#include <openssl/arm_arch.h>
#if __ARM_MAX_ARCH__>=7
@ -32,6 +24,8 @@ Lrcon:
.align 5
_aes_hw_set_encrypt_key:
Lenc_key:
// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
AARCH64_VALID_CALL_TARGET
stp x29,x30,[sp,#-16]!
add x29,sp,#0
mov x3,#-1
@ -200,6 +194,7 @@ Lenc_key_abort:
.align 5
_aes_hw_set_decrypt_key:
AARCH64_SIGN_LINK_REGISTER
stp x29,x30,[sp,#-16]!
add x29,sp,#0
bl Lenc_key
@ -233,6 +228,7 @@ Loop_imc:
eor x0,x0,x0 // return value
Ldec_key_abort:
ldp x29,x30,[sp],#16
AARCH64_VALIDATE_LINK_REGISTER
ret
.globl _aes_hw_encrypt
@ -240,6 +236,7 @@ Ldec_key_abort:
.align 5
_aes_hw_encrypt:
AARCH64_VALID_CALL_TARGET
ldr w3,[x2,#240]
ld1 {v0.4s},[x2],#16
ld1 {v2.16b},[x0]
@ -270,6 +267,7 @@ Loop_enc:
.align 5
_aes_hw_decrypt:
AARCH64_VALID_CALL_TARGET
ldr w3,[x2,#240]
ld1 {v0.4s},[x2],#16
ld1 {v2.16b},[x0]
@ -300,6 +298,8 @@ Loop_dec:
.align 5
_aes_hw_cbc_encrypt:
// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
AARCH64_VALID_CALL_TARGET
stp x29,x30,[sp,#-16]!
add x29,sp,#0
subs x2,x2,#16
@ -591,6 +591,8 @@ Lcbc_abort:
.align 5
_aes_hw_ctr32_encrypt_blocks:
// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
AARCH64_VALID_CALL_TARGET
stp x29,x30,[sp,#-16]!
add x29,sp,#0
ldr w5,[x3,#240]
@ -610,20 +612,34 @@ _aes_hw_ctr32_encrypt_blocks:
add x7,x3,#32
mov w6,w5
csel x12,xzr,x12,lo
#ifndef __ARMEB__
// ARM Cortex-A57 and Cortex-A72 cores running in 32-bit mode are
// affected by silicon errata #1742098 [0] and #1655431 [1],
// respectively, where the second instruction of an aese/aesmc
// instruction pair may execute twice if an interrupt is taken right
// after the first instruction consumes an input register of which a
// single 32-bit lane has been updated the last time it was modified.
//
// This function uses a counter in one 32-bit lane. The vmov lines
// could write to v1.16b and v18.16b directly, but that trips this bugs.
// We write to v6.16b and copy to the final register as a workaround.
//
// [0] ARM-EPM-049219 v23 Cortex-A57 MPCore Software Developers Errata Notice
// [1] ARM-EPM-012079 v11.0 Cortex-A72 MPCore Software Developers Errata Notice
#ifndef __AARCH64EB__
rev w8, w8
#endif
orr v1.16b,v0.16b,v0.16b
add w10, w8, #1
orr v18.16b,v0.16b,v0.16b
add w8, w8, #2
orr v6.16b,v0.16b,v0.16b
rev w10, w10
mov v1.s[3],w10
mov v6.s[3],w10
add w8, w8, #2
orr v1.16b,v6.16b,v6.16b
b.ls Lctr32_tail
rev w12, w8
mov v6.s[3],w12
sub x2,x2,#3 // bias
mov v18.s[3],w12
orr v18.16b,v6.16b,v6.16b
b Loop3x_ctr32
.align 4
@ -650,11 +666,11 @@ Loop3x_ctr32:
aese v1.16b,v16.16b
aesmc v5.16b,v1.16b
ld1 {v2.16b},[x0],#16
orr v0.16b,v6.16b,v6.16b
add w9,w8,#1
aese v18.16b,v16.16b
aesmc v18.16b,v18.16b
ld1 {v3.16b},[x0],#16
orr v1.16b,v6.16b,v6.16b
rev w9,w9
aese v4.16b,v17.16b
aesmc v4.16b,v4.16b
aese v5.16b,v17.16b
@ -663,8 +679,6 @@ Loop3x_ctr32:
mov x7,x3
aese v18.16b,v17.16b
aesmc v17.16b,v18.16b
orr v18.16b,v6.16b,v6.16b
add w9,w8,#1
aese v4.16b,v20.16b
aesmc v4.16b,v4.16b
aese v5.16b,v20.16b
@ -679,21 +693,26 @@ Loop3x_ctr32:
aesmc v4.16b,v4.16b
aese v5.16b,v21.16b
aesmc v5.16b,v5.16b
// Note the logic to update v0.16b, v1.16b, and v1.16b is written to work
// around a bug in ARM Cortex-A57 and Cortex-A72 cores running in
// 32-bit mode. See the comment above.
eor v19.16b,v19.16b,v7.16b
rev w9,w9
mov v6.s[3], w9
aese v17.16b,v21.16b
aesmc v17.16b,v17.16b
mov v0.s[3], w9
orr v0.16b,v6.16b,v6.16b
rev w10,w10
aese v4.16b,v22.16b
aesmc v4.16b,v4.16b
mov v6.s[3], w10
rev w12,w8
aese v5.16b,v22.16b
aesmc v5.16b,v5.16b
mov v1.s[3], w10
rev w12,w8
orr v1.16b,v6.16b,v6.16b
mov v6.s[3], w12
aese v17.16b,v22.16b
aesmc v17.16b,v17.16b
mov v18.s[3], w12
orr v18.16b,v6.16b,v6.16b
subs x2,x2,#3
aese v4.16b,v23.16b
aese v5.16b,v23.16b
@ -769,4 +788,4 @@ Lctr32_done:
ret
#endif
#endif // !OPENSSL_NO_ASM
#endif // !OPENSSL_NO_ASM && defined(OPENSSL_AARCH64) && defined(__APPLE__)

File diff suppressed because it is too large Load Diff

View File

@ -1,17 +1,11 @@
// This file is generated from a similarly-named Perl script in the BoringSSL
// source tree. Do not edit by hand.
#if !defined(__has_feature)
#define __has_feature(x) 0
#endif
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
#define OPENSSL_NO_ASM
#endif
#include <openssl/asm_base.h>
#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_AARCH64) && defined(__APPLE__)
#include <openssl/arm_arch.h>
#if !defined(OPENSSL_NO_ASM)
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
.text
.globl _bn_mul_mont
@ -19,6 +13,7 @@
.align 5
_bn_mul_mont:
AARCH64_SIGN_LINK_REGISTER
tst x5,#7
b.eq __bn_sqr8x_mont
tst x5,#3
@ -216,11 +211,14 @@ Lcond_copy:
mov x0,#1
ldp x23,x24,[x29,#48]
ldr x29,[sp],#64
AARCH64_VALIDATE_LINK_REGISTER
ret
.align 5
__bn_sqr8x_mont:
// Not adding AARCH64_SIGN_LINK_REGISTER here because __bn_sqr8x_mont is jumped to
// only from bn_mul_mont which has already signed the return address.
cmp x1,x2
b.ne __bn_mul4x_mont
Lsqr8x_mont:
@ -974,11 +972,16 @@ Lsqr8x_done:
ldp x25,x26,[x29,#64]
ldp x27,x28,[x29,#80]
ldr x29,[sp],#128
// x30 is popped earlier
AARCH64_VALIDATE_LINK_REGISTER
ret
.align 5
__bn_mul4x_mont:
// Not adding AARCH64_SIGN_LINK_REGISTER here because __bn_mul4x_mont is jumped to
// only from bn_mul_mont or __bn_mul8x_mont which have already signed the
// return address.
stp x29,x30,[sp,#-128]!
add x29,sp,#0
stp x19,x20,[sp,#16]
@ -1412,9 +1415,11 @@ Lmul4x_done:
ldp x25,x26,[x29,#64]
ldp x27,x28,[x29,#80]
ldr x29,[sp],#128
// x30 is popped earlier
AARCH64_VALIDATE_LINK_REGISTER
ret
.byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.align 2
.align 4
#endif // !OPENSSL_NO_ASM
#endif // !OPENSSL_NO_ASM && defined(OPENSSL_AARCH64) && defined(__APPLE__)

View File

@ -0,0 +1,89 @@
// This file is generated from a similarly-named Perl script in the BoringSSL
// source tree. Do not edit by hand.
#include <openssl/asm_base.h>
#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_AARCH64) && defined(__APPLE__)
#include <openssl/arm_arch.h>
.text
// BN_ULONG bn_add_words(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
// size_t num);
.globl _bn_add_words
.private_extern _bn_add_words
.align 4
_bn_add_words:
AARCH64_VALID_CALL_TARGET
# Clear the carry flag.
cmn xzr, xzr
# aarch64 can load two registers at a time, so we do two loop iterations at
# at a time. Split x3 = 2 * x8 + x3. This allows loop
# operations to use CBNZ without clobbering the carry flag.
lsr x8, x3, #1
and x3, x3, #1
cbz x8, Ladd_tail
Ladd_loop:
ldp x4, x5, [x1], #16
ldp x6, x7, [x2], #16
sub x8, x8, #1
adcs x4, x4, x6
adcs x5, x5, x7
stp x4, x5, [x0], #16
cbnz x8, Ladd_loop
Ladd_tail:
cbz x3, Ladd_exit
ldr x4, [x1], #8
ldr x6, [x2], #8
adcs x4, x4, x6
str x4, [x0], #8
Ladd_exit:
cset x0, cs
ret
// BN_ULONG bn_sub_words(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
// size_t num);
.globl _bn_sub_words
.private_extern _bn_sub_words
.align 4
_bn_sub_words:
AARCH64_VALID_CALL_TARGET
# Set the carry flag. Arm's borrow bit is flipped from the carry flag,
# so we want C = 1 here.
cmp xzr, xzr
# aarch64 can load two registers at a time, so we do two loop iterations at
# at a time. Split x3 = 2 * x8 + x3. This allows loop
# operations to use CBNZ without clobbering the carry flag.
lsr x8, x3, #1
and x3, x3, #1
cbz x8, Lsub_tail
Lsub_loop:
ldp x4, x5, [x1], #16
ldp x6, x7, [x2], #16
sub x8, x8, #1
sbcs x4, x4, x6
sbcs x5, x5, x7
stp x4, x5, [x0], #16
cbnz x8, Lsub_loop
Lsub_tail:
cbz x3, Lsub_exit
ldr x4, [x1], #8
ldr x6, [x2], #8
sbcs x4, x4, x6
str x4, [x0], #8
Lsub_exit:
cset x0, cc
ret
#endif // !OPENSSL_NO_ASM && defined(OPENSSL_AARCH64) && defined(__APPLE__)

View File

@ -1,17 +1,11 @@
// This file is generated from a similarly-named Perl script in the BoringSSL
// source tree. Do not edit by hand.
#if !defined(__has_feature)
#define __has_feature(x) 0
#endif
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
#define OPENSSL_NO_ASM
#endif
#include <openssl/asm_base.h>
#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_AARCH64) && defined(__APPLE__)
#include <openssl/arm_arch.h>
#if !defined(OPENSSL_NO_ASM)
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
.text
.globl _gcm_init_neon
@ -19,6 +13,7 @@
.align 4
_gcm_init_neon:
AARCH64_VALID_CALL_TARGET
// This function is adapted from gcm_init_v8. xC2 is t3.
ld1 {v17.2d}, [x1] // load H
movi v19.16b, #0xe1
@ -44,6 +39,7 @@ _gcm_init_neon:
.align 4
_gcm_gmult_neon:
AARCH64_VALID_CALL_TARGET
ld1 {v3.16b}, [x0] // load Xi
ld1 {v5.1d}, [x1], #8 // load twisted H
ld1 {v6.1d}, [x1]
@ -63,6 +59,7 @@ _gcm_gmult_neon:
.align 4
_gcm_ghash_neon:
AARCH64_VALID_CALL_TARGET
ld1 {v0.16b}, [x0] // load Xi
ld1 {v5.1d}, [x1], #8 // load twisted H
ld1 {v6.1d}, [x1]
@ -335,4 +332,4 @@ Lmasks:
.byte 71,72,65,83,72,32,102,111,114,32,65,82,77,118,56,44,32,100,101,114,105,118,101,100,32,102,114,111,109,32,65,82,77,118,52,32,118,101,114,115,105,111,110,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.align 2
.align 2
#endif // !OPENSSL_NO_ASM
#endif // !OPENSSL_NO_ASM && defined(OPENSSL_AARCH64) && defined(__APPLE__)

View File

@ -0,0 +1,565 @@
// This file is generated from a similarly-named Perl script in the BoringSSL
// source tree. Do not edit by hand.
#include <openssl/asm_base.h>
#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_AARCH64) && defined(__APPLE__)
#include <openssl/arm_arch.h>
#if __ARM_MAX_ARCH__>=7
.text
.globl _gcm_init_v8
.private_extern _gcm_init_v8
.align 4
_gcm_init_v8:
AARCH64_VALID_CALL_TARGET
ld1 {v17.2d},[x1] //load input H
movi v19.16b,#0xe1
shl v19.2d,v19.2d,#57 //0xc2.0
ext v3.16b,v17.16b,v17.16b,#8
ushr v18.2d,v19.2d,#63
dup v17.4s,v17.s[1]
ext v16.16b,v18.16b,v19.16b,#8 //t0=0xc2....01
ushr v18.2d,v3.2d,#63
sshr v17.4s,v17.4s,#31 //broadcast carry bit
and v18.16b,v18.16b,v16.16b
shl v3.2d,v3.2d,#1
ext v18.16b,v18.16b,v18.16b,#8
and v16.16b,v16.16b,v17.16b
orr v3.16b,v3.16b,v18.16b //H<<<=1
eor v20.16b,v3.16b,v16.16b //twisted H
st1 {v20.2d},[x0],#16 //store Htable[0]
//calculate H^2
ext v16.16b,v20.16b,v20.16b,#8 //Karatsuba pre-processing
pmull v0.1q,v20.1d,v20.1d
eor v16.16b,v16.16b,v20.16b
pmull2 v2.1q,v20.2d,v20.2d
pmull v1.1q,v16.1d,v16.1d
ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing
eor v18.16b,v0.16b,v2.16b
eor v1.16b,v1.16b,v17.16b
eor v1.16b,v1.16b,v18.16b
pmull v18.1q,v0.1d,v19.1d //1st phase
ins v2.d[0],v1.d[1]
ins v1.d[1],v0.d[0]
eor v0.16b,v1.16b,v18.16b
ext v18.16b,v0.16b,v0.16b,#8 //2nd phase
pmull v0.1q,v0.1d,v19.1d
eor v18.16b,v18.16b,v2.16b
eor v22.16b,v0.16b,v18.16b
ext v17.16b,v22.16b,v22.16b,#8 //Karatsuba pre-processing
eor v17.16b,v17.16b,v22.16b
ext v21.16b,v16.16b,v17.16b,#8 //pack Karatsuba pre-processed
st1 {v21.2d,v22.2d},[x0],#32 //store Htable[1..2]
//calculate H^3 and H^4
pmull v0.1q,v20.1d, v22.1d
pmull v5.1q,v22.1d,v22.1d
pmull2 v2.1q,v20.2d, v22.2d
pmull2 v7.1q,v22.2d,v22.2d
pmull v1.1q,v16.1d,v17.1d
pmull v6.1q,v17.1d,v17.1d
ext v16.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing
ext v17.16b,v5.16b,v7.16b,#8
eor v18.16b,v0.16b,v2.16b
eor v1.16b,v1.16b,v16.16b
eor v4.16b,v5.16b,v7.16b
eor v6.16b,v6.16b,v17.16b
eor v1.16b,v1.16b,v18.16b
pmull v18.1q,v0.1d,v19.1d //1st phase
eor v6.16b,v6.16b,v4.16b
pmull v4.1q,v5.1d,v19.1d
ins v2.d[0],v1.d[1]
ins v7.d[0],v6.d[1]
ins v1.d[1],v0.d[0]
ins v6.d[1],v5.d[0]
eor v0.16b,v1.16b,v18.16b
eor v5.16b,v6.16b,v4.16b
ext v18.16b,v0.16b,v0.16b,#8 //2nd phase
ext v4.16b,v5.16b,v5.16b,#8
pmull v0.1q,v0.1d,v19.1d
pmull v5.1q,v5.1d,v19.1d
eor v18.16b,v18.16b,v2.16b
eor v4.16b,v4.16b,v7.16b
eor v20.16b, v0.16b,v18.16b //H^3
eor v22.16b,v5.16b,v4.16b //H^4
ext v16.16b,v20.16b, v20.16b,#8 //Karatsuba pre-processing
ext v17.16b,v22.16b,v22.16b,#8
eor v16.16b,v16.16b,v20.16b
eor v17.16b,v17.16b,v22.16b
ext v21.16b,v16.16b,v17.16b,#8 //pack Karatsuba pre-processed
st1 {v20.2d,v21.2d,v22.2d},[x0] //store Htable[3..5]
ret
.globl _gcm_gmult_v8
.private_extern _gcm_gmult_v8
.align 4
_gcm_gmult_v8:
AARCH64_VALID_CALL_TARGET
ld1 {v17.2d},[x0] //load Xi
movi v19.16b,#0xe1
ld1 {v20.2d,v21.2d},[x1] //load twisted H, ...
shl v19.2d,v19.2d,#57
#ifndef __AARCH64EB__
rev64 v17.16b,v17.16b
#endif
ext v3.16b,v17.16b,v17.16b,#8
pmull v0.1q,v20.1d,v3.1d //H.lo·Xi.lo
eor v17.16b,v17.16b,v3.16b //Karatsuba pre-processing
pmull2 v2.1q,v20.2d,v3.2d //H.hi·Xi.hi
pmull v1.1q,v21.1d,v17.1d //(H.lo+H.hi)·(Xi.lo+Xi.hi)
ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing
eor v18.16b,v0.16b,v2.16b
eor v1.16b,v1.16b,v17.16b
eor v1.16b,v1.16b,v18.16b
pmull v18.1q,v0.1d,v19.1d //1st phase of reduction
ins v2.d[0],v1.d[1]
ins v1.d[1],v0.d[0]
eor v0.16b,v1.16b,v18.16b
ext v18.16b,v0.16b,v0.16b,#8 //2nd phase of reduction
pmull v0.1q,v0.1d,v19.1d
eor v18.16b,v18.16b,v2.16b
eor v0.16b,v0.16b,v18.16b
#ifndef __AARCH64EB__
rev64 v0.16b,v0.16b
#endif
ext v0.16b,v0.16b,v0.16b,#8
st1 {v0.2d},[x0] //write out Xi
ret
.globl _gcm_ghash_v8
.private_extern _gcm_ghash_v8
.align 4
_gcm_ghash_v8:
AARCH64_VALID_CALL_TARGET
cmp x3,#64
b.hs Lgcm_ghash_v8_4x
ld1 {v0.2d},[x0] //load [rotated] Xi
//"[rotated]" means that
//loaded value would have
//to be rotated in order to
//make it appear as in
//algorithm specification
subs x3,x3,#32 //see if x3 is 32 or larger
mov x12,#16 //x12 is used as post-
//increment for input pointer;
//as loop is modulo-scheduled
//x12 is zeroed just in time
//to preclude overstepping
//inp[len], which means that
//last block[s] are actually
//loaded twice, but last
//copy is not processed
ld1 {v20.2d,v21.2d},[x1],#32 //load twisted H, ..., H^2
movi v19.16b,#0xe1
ld1 {v22.2d},[x1]
csel x12,xzr,x12,eq //is it time to zero x12?
ext v0.16b,v0.16b,v0.16b,#8 //rotate Xi
ld1 {v16.2d},[x2],#16 //load [rotated] I[0]
shl v19.2d,v19.2d,#57 //compose 0xc2.0 constant
#ifndef __AARCH64EB__
rev64 v16.16b,v16.16b
rev64 v0.16b,v0.16b
#endif
ext v3.16b,v16.16b,v16.16b,#8 //rotate I[0]
b.lo Lodd_tail_v8 //x3 was less than 32
ld1 {v17.2d},[x2],x12 //load [rotated] I[1]
#ifndef __AARCH64EB__
rev64 v17.16b,v17.16b
#endif
ext v7.16b,v17.16b,v17.16b,#8
eor v3.16b,v3.16b,v0.16b //I[i]^=Xi
pmull v4.1q,v20.1d,v7.1d //H·Ii+1
eor v17.16b,v17.16b,v7.16b //Karatsuba pre-processing
pmull2 v6.1q,v20.2d,v7.2d
b Loop_mod2x_v8
.align 4
Loop_mod2x_v8:
ext v18.16b,v3.16b,v3.16b,#8
subs x3,x3,#32 //is there more data?
pmull v0.1q,v22.1d,v3.1d //H^2.lo·Xi.lo
csel x12,xzr,x12,lo //is it time to zero x12?
pmull v5.1q,v21.1d,v17.1d
eor v18.16b,v18.16b,v3.16b //Karatsuba pre-processing
pmull2 v2.1q,v22.2d,v3.2d //H^2.hi·Xi.hi
eor v0.16b,v0.16b,v4.16b //accumulate
pmull2 v1.1q,v21.2d,v18.2d //(H^2.lo+H^2.hi)·(Xi.lo+Xi.hi)
ld1 {v16.2d},[x2],x12 //load [rotated] I[i+2]
eor v2.16b,v2.16b,v6.16b
csel x12,xzr,x12,eq //is it time to zero x12?
eor v1.16b,v1.16b,v5.16b
ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing
eor v18.16b,v0.16b,v2.16b
eor v1.16b,v1.16b,v17.16b
ld1 {v17.2d},[x2],x12 //load [rotated] I[i+3]
#ifndef __AARCH64EB__
rev64 v16.16b,v16.16b
#endif
eor v1.16b,v1.16b,v18.16b
pmull v18.1q,v0.1d,v19.1d //1st phase of reduction
#ifndef __AARCH64EB__
rev64 v17.16b,v17.16b
#endif
ins v2.d[0],v1.d[1]
ins v1.d[1],v0.d[0]
ext v7.16b,v17.16b,v17.16b,#8
ext v3.16b,v16.16b,v16.16b,#8
eor v0.16b,v1.16b,v18.16b
pmull v4.1q,v20.1d,v7.1d //H·Ii+1
eor v3.16b,v3.16b,v2.16b //accumulate v3.16b early
ext v18.16b,v0.16b,v0.16b,#8 //2nd phase of reduction
pmull v0.1q,v0.1d,v19.1d
eor v3.16b,v3.16b,v18.16b
eor v17.16b,v17.16b,v7.16b //Karatsuba pre-processing
eor v3.16b,v3.16b,v0.16b
pmull2 v6.1q,v20.2d,v7.2d
b.hs Loop_mod2x_v8 //there was at least 32 more bytes
eor v2.16b,v2.16b,v18.16b
ext v3.16b,v16.16b,v16.16b,#8 //re-construct v3.16b
adds x3,x3,#32 //re-construct x3
eor v0.16b,v0.16b,v2.16b //re-construct v0.16b
b.eq Ldone_v8 //is x3 zero?
Lodd_tail_v8:
ext v18.16b,v0.16b,v0.16b,#8
eor v3.16b,v3.16b,v0.16b //inp^=Xi
eor v17.16b,v16.16b,v18.16b //v17.16b is rotated inp^Xi
pmull v0.1q,v20.1d,v3.1d //H.lo·Xi.lo
eor v17.16b,v17.16b,v3.16b //Karatsuba pre-processing
pmull2 v2.1q,v20.2d,v3.2d //H.hi·Xi.hi
pmull v1.1q,v21.1d,v17.1d //(H.lo+H.hi)·(Xi.lo+Xi.hi)
ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing
eor v18.16b,v0.16b,v2.16b
eor v1.16b,v1.16b,v17.16b
eor v1.16b,v1.16b,v18.16b
pmull v18.1q,v0.1d,v19.1d //1st phase of reduction
ins v2.d[0],v1.d[1]
ins v1.d[1],v0.d[0]
eor v0.16b,v1.16b,v18.16b
ext v18.16b,v0.16b,v0.16b,#8 //2nd phase of reduction
pmull v0.1q,v0.1d,v19.1d
eor v18.16b,v18.16b,v2.16b
eor v0.16b,v0.16b,v18.16b
Ldone_v8:
#ifndef __AARCH64EB__
rev64 v0.16b,v0.16b
#endif
ext v0.16b,v0.16b,v0.16b,#8
st1 {v0.2d},[x0] //write out Xi
ret
.align 4
gcm_ghash_v8_4x:
Lgcm_ghash_v8_4x:
ld1 {v0.2d},[x0] //load [rotated] Xi
ld1 {v20.2d,v21.2d,v22.2d},[x1],#48 //load twisted H, ..., H^2
movi v19.16b,#0xe1
ld1 {v26.2d,v27.2d,v28.2d},[x1] //load twisted H^3, ..., H^4
shl v19.2d,v19.2d,#57 //compose 0xc2.0 constant
ld1 {v4.2d,v5.2d,v6.2d,v7.2d},[x2],#64
#ifndef __AARCH64EB__
rev64 v0.16b,v0.16b
rev64 v5.16b,v5.16b
rev64 v6.16b,v6.16b
rev64 v7.16b,v7.16b
rev64 v4.16b,v4.16b
#endif
ext v25.16b,v7.16b,v7.16b,#8
ext v24.16b,v6.16b,v6.16b,#8
ext v23.16b,v5.16b,v5.16b,#8
pmull v29.1q,v20.1d,v25.1d //H·Ii+3
eor v7.16b,v7.16b,v25.16b
pmull2 v31.1q,v20.2d,v25.2d
pmull v30.1q,v21.1d,v7.1d
pmull v16.1q,v22.1d,v24.1d //H^2·Ii+2
eor v6.16b,v6.16b,v24.16b
pmull2 v24.1q,v22.2d,v24.2d
pmull2 v6.1q,v21.2d,v6.2d
eor v29.16b,v29.16b,v16.16b
eor v31.16b,v31.16b,v24.16b
eor v30.16b,v30.16b,v6.16b
pmull v7.1q,v26.1d,v23.1d //H^3·Ii+1
eor v5.16b,v5.16b,v23.16b
pmull2 v23.1q,v26.2d,v23.2d
pmull v5.1q,v27.1d,v5.1d
eor v29.16b,v29.16b,v7.16b
eor v31.16b,v31.16b,v23.16b
eor v30.16b,v30.16b,v5.16b
subs x3,x3,#128
b.lo Ltail4x
b Loop4x
.align 4
Loop4x:
eor v16.16b,v4.16b,v0.16b
ld1 {v4.2d,v5.2d,v6.2d,v7.2d},[x2],#64
ext v3.16b,v16.16b,v16.16b,#8
#ifndef __AARCH64EB__
rev64 v5.16b,v5.16b
rev64 v6.16b,v6.16b
rev64 v7.16b,v7.16b
rev64 v4.16b,v4.16b
#endif
pmull v0.1q,v28.1d,v3.1d //H^4·(Xi+Ii)
eor v16.16b,v16.16b,v3.16b
pmull2 v2.1q,v28.2d,v3.2d
ext v25.16b,v7.16b,v7.16b,#8
pmull2 v1.1q,v27.2d,v16.2d
eor v0.16b,v0.16b,v29.16b
eor v2.16b,v2.16b,v31.16b
ext v24.16b,v6.16b,v6.16b,#8
eor v1.16b,v1.16b,v30.16b
ext v23.16b,v5.16b,v5.16b,#8
ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing
eor v18.16b,v0.16b,v2.16b
pmull v29.1q,v20.1d,v25.1d //H·Ii+3
eor v7.16b,v7.16b,v25.16b
eor v1.16b,v1.16b,v17.16b
pmull2 v31.1q,v20.2d,v25.2d
eor v1.16b,v1.16b,v18.16b
pmull v30.1q,v21.1d,v7.1d
pmull v18.1q,v0.1d,v19.1d //1st phase of reduction
ins v2.d[0],v1.d[1]
ins v1.d[1],v0.d[0]
pmull v16.1q,v22.1d,v24.1d //H^2·Ii+2
eor v6.16b,v6.16b,v24.16b
pmull2 v24.1q,v22.2d,v24.2d
eor v0.16b,v1.16b,v18.16b
pmull2 v6.1q,v21.2d,v6.2d
eor v29.16b,v29.16b,v16.16b
eor v31.16b,v31.16b,v24.16b
eor v30.16b,v30.16b,v6.16b
ext v18.16b,v0.16b,v0.16b,#8 //2nd phase of reduction
pmull v0.1q,v0.1d,v19.1d
pmull v7.1q,v26.1d,v23.1d //H^3·Ii+1
eor v5.16b,v5.16b,v23.16b
eor v18.16b,v18.16b,v2.16b
pmull2 v23.1q,v26.2d,v23.2d
pmull v5.1q,v27.1d,v5.1d
eor v0.16b,v0.16b,v18.16b
eor v29.16b,v29.16b,v7.16b
eor v31.16b,v31.16b,v23.16b
ext v0.16b,v0.16b,v0.16b,#8
eor v30.16b,v30.16b,v5.16b
subs x3,x3,#64
b.hs Loop4x
Ltail4x:
eor v16.16b,v4.16b,v0.16b
ext v3.16b,v16.16b,v16.16b,#8
pmull v0.1q,v28.1d,v3.1d //H^4·(Xi+Ii)
eor v16.16b,v16.16b,v3.16b
pmull2 v2.1q,v28.2d,v3.2d
pmull2 v1.1q,v27.2d,v16.2d
eor v0.16b,v0.16b,v29.16b
eor v2.16b,v2.16b,v31.16b
eor v1.16b,v1.16b,v30.16b
adds x3,x3,#64
b.eq Ldone4x
cmp x3,#32
b.lo Lone
b.eq Ltwo
Lthree:
ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing
eor v18.16b,v0.16b,v2.16b
eor v1.16b,v1.16b,v17.16b
ld1 {v4.2d,v5.2d,v6.2d},[x2]
eor v1.16b,v1.16b,v18.16b
#ifndef __AARCH64EB__
rev64 v5.16b,v5.16b
rev64 v6.16b,v6.16b
rev64 v4.16b,v4.16b
#endif
pmull v18.1q,v0.1d,v19.1d //1st phase of reduction
ins v2.d[0],v1.d[1]
ins v1.d[1],v0.d[0]
ext v24.16b,v6.16b,v6.16b,#8
ext v23.16b,v5.16b,v5.16b,#8
eor v0.16b,v1.16b,v18.16b
pmull v29.1q,v20.1d,v24.1d //H·Ii+2
eor v6.16b,v6.16b,v24.16b
ext v18.16b,v0.16b,v0.16b,#8 //2nd phase of reduction
pmull v0.1q,v0.1d,v19.1d
eor v18.16b,v18.16b,v2.16b
pmull2 v31.1q,v20.2d,v24.2d
pmull v30.1q,v21.1d,v6.1d
eor v0.16b,v0.16b,v18.16b
pmull v7.1q,v22.1d,v23.1d //H^2·Ii+1
eor v5.16b,v5.16b,v23.16b
ext v0.16b,v0.16b,v0.16b,#8
pmull2 v23.1q,v22.2d,v23.2d
eor v16.16b,v4.16b,v0.16b
pmull2 v5.1q,v21.2d,v5.2d
ext v3.16b,v16.16b,v16.16b,#8
eor v29.16b,v29.16b,v7.16b
eor v31.16b,v31.16b,v23.16b
eor v30.16b,v30.16b,v5.16b
pmull v0.1q,v26.1d,v3.1d //H^3·(Xi+Ii)
eor v16.16b,v16.16b,v3.16b
pmull2 v2.1q,v26.2d,v3.2d
pmull v1.1q,v27.1d,v16.1d
eor v0.16b,v0.16b,v29.16b
eor v2.16b,v2.16b,v31.16b
eor v1.16b,v1.16b,v30.16b
b Ldone4x
.align 4
Ltwo:
ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing
eor v18.16b,v0.16b,v2.16b
eor v1.16b,v1.16b,v17.16b
ld1 {v4.2d,v5.2d},[x2]
eor v1.16b,v1.16b,v18.16b
#ifndef __AARCH64EB__
rev64 v5.16b,v5.16b
rev64 v4.16b,v4.16b
#endif
pmull v18.1q,v0.1d,v19.1d //1st phase of reduction
ins v2.d[0],v1.d[1]
ins v1.d[1],v0.d[0]
ext v23.16b,v5.16b,v5.16b,#8
eor v0.16b,v1.16b,v18.16b
ext v18.16b,v0.16b,v0.16b,#8 //2nd phase of reduction
pmull v0.1q,v0.1d,v19.1d
eor v18.16b,v18.16b,v2.16b
eor v0.16b,v0.16b,v18.16b
ext v0.16b,v0.16b,v0.16b,#8
pmull v29.1q,v20.1d,v23.1d //H·Ii+1
eor v5.16b,v5.16b,v23.16b
eor v16.16b,v4.16b,v0.16b
ext v3.16b,v16.16b,v16.16b,#8
pmull2 v31.1q,v20.2d,v23.2d
pmull v30.1q,v21.1d,v5.1d
pmull v0.1q,v22.1d,v3.1d //H^2·(Xi+Ii)
eor v16.16b,v16.16b,v3.16b
pmull2 v2.1q,v22.2d,v3.2d
pmull2 v1.1q,v21.2d,v16.2d
eor v0.16b,v0.16b,v29.16b
eor v2.16b,v2.16b,v31.16b
eor v1.16b,v1.16b,v30.16b
b Ldone4x
.align 4
Lone:
ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing
eor v18.16b,v0.16b,v2.16b
eor v1.16b,v1.16b,v17.16b
ld1 {v4.2d},[x2]
eor v1.16b,v1.16b,v18.16b
#ifndef __AARCH64EB__
rev64 v4.16b,v4.16b
#endif
pmull v18.1q,v0.1d,v19.1d //1st phase of reduction
ins v2.d[0],v1.d[1]
ins v1.d[1],v0.d[0]
eor v0.16b,v1.16b,v18.16b
ext v18.16b,v0.16b,v0.16b,#8 //2nd phase of reduction
pmull v0.1q,v0.1d,v19.1d
eor v18.16b,v18.16b,v2.16b
eor v0.16b,v0.16b,v18.16b
ext v0.16b,v0.16b,v0.16b,#8
eor v16.16b,v4.16b,v0.16b
ext v3.16b,v16.16b,v16.16b,#8
pmull v0.1q,v20.1d,v3.1d
eor v16.16b,v16.16b,v3.16b
pmull2 v2.1q,v20.2d,v3.2d
pmull v1.1q,v21.1d,v16.1d
Ldone4x:
ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing
eor v18.16b,v0.16b,v2.16b
eor v1.16b,v1.16b,v17.16b
eor v1.16b,v1.16b,v18.16b
pmull v18.1q,v0.1d,v19.1d //1st phase of reduction
ins v2.d[0],v1.d[1]
ins v1.d[1],v0.d[0]
eor v0.16b,v1.16b,v18.16b
ext v18.16b,v0.16b,v0.16b,#8 //2nd phase of reduction
pmull v0.1q,v0.1d,v19.1d
eor v18.16b,v18.16b,v2.16b
eor v0.16b,v0.16b,v18.16b
ext v0.16b,v0.16b,v0.16b,#8
#ifndef __AARCH64EB__
rev64 v0.16b,v0.16b
#endif
st1 {v0.2d},[x0] //write out Xi
ret
.byte 71,72,65,83,72,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.align 2
.align 2
#endif
#endif // !OPENSSL_NO_ASM && defined(OPENSSL_AARCH64) && defined(__APPLE__)

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,309 @@
// This file is generated from a similarly-named Perl script in the BoringSSL
// source tree. Do not edit by hand.
#include <openssl/asm_base.h>
#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_AARCH64) && defined(__APPLE__)
#include "openssl/arm_arch.h"
.text
.globl _beeu_mod_inverse_vartime
.private_extern _beeu_mod_inverse_vartime
.align 4
_beeu_mod_inverse_vartime:
// Reserve enough space for 14 8-byte registers on the stack
// in the first stp call for x29, x30.
// Then store the remaining callee-saved registers.
//
// | x29 | x30 | x19 | x20 | ... | x27 | x28 | x0 | x2 |
// ^ ^
// sp <------------------- 112 bytes ----------------> old sp
// x29 (FP)
//
AARCH64_SIGN_LINK_REGISTER
stp x29,x30,[sp,#-112]!
add x29,sp,#0
stp x19,x20,[sp,#16]
stp x21,x22,[sp,#32]
stp x23,x24,[sp,#48]
stp x25,x26,[sp,#64]
stp x27,x28,[sp,#80]
stp x0,x2,[sp,#96]
// B = b3..b0 := a
ldp x25,x26,[x1]
ldp x27,x28,[x1,#16]
// n3..n0 := n
// Note: the value of input params are changed in the following.
ldp x0,x1,[x2]
ldp x2,x30,[x2,#16]
// A = a3..a0 := n
mov x21, x0
mov x22, x1
mov x23, x2
mov x24, x30
// X = x4..x0 := 1
mov x3, #1
eor x4, x4, x4
eor x5, x5, x5
eor x6, x6, x6
eor x7, x7, x7
// Y = y4..y0 := 0
eor x8, x8, x8
eor x9, x9, x9
eor x10, x10, x10
eor x11, x11, x11
eor x12, x12, x12
Lbeeu_loop:
// if B == 0, jump to .Lbeeu_loop_end
orr x14, x25, x26
orr x14, x14, x27
// reverse the bit order of x25. This is needed for clz after this macro
rbit x15, x25
orr x14, x14, x28
cbz x14,Lbeeu_loop_end
// 0 < B < |n|,
// 0 < A <= |n|,
// (1) X*a == B (mod |n|),
// (2) (-1)*Y*a == A (mod |n|)
// Now divide B by the maximum possible power of two in the
// integers, and divide X by the same value mod |n|.
// When we're done, (1) still holds.
// shift := number of trailing 0s in x25
// ( = number of leading 0s in x15; see the "rbit" instruction in TEST_B_ZERO)
clz x13, x15
// If there is no shift, goto shift_A_Y
cbz x13, Lbeeu_shift_A_Y
// Shift B right by "x13" bits
neg x14, x13
lsr x25, x25, x13
lsl x15, x26, x14
lsr x26, x26, x13
lsl x19, x27, x14
orr x25, x25, x15
lsr x27, x27, x13
lsl x20, x28, x14
orr x26, x26, x19
lsr x28, x28, x13
orr x27, x27, x20
// Shift X right by "x13" bits, adding n whenever X becomes odd.
// x13--;
// x14 := 0; needed in the addition to the most significant word in SHIFT1
eor x14, x14, x14
Lbeeu_shift_loop_X:
tbz x3, #0, Lshift1_0
adds x3, x3, x0
adcs x4, x4, x1
adcs x5, x5, x2
adcs x6, x6, x30
adc x7, x7, x14
Lshift1_0:
// var0 := [var1|var0]<64..1>;
// i.e. concatenate var1 and var0,
// extract bits <64..1> from the resulting 128-bit value
// and put them in var0
extr x3, x4, x3, #1
extr x4, x5, x4, #1
extr x5, x6, x5, #1
extr x6, x7, x6, #1
lsr x7, x7, #1
subs x13, x13, #1
bne Lbeeu_shift_loop_X
// Note: the steps above perform the same sequence as in p256_beeu-x86_64-asm.pl
// with the following differences:
// - "x13" is set directly to the number of trailing 0s in B
// (using rbit and clz instructions)
// - The loop is only used to call SHIFT1(X)
// and x13 is decreased while executing the X loop.
// - SHIFT256(B, x13) is performed before right-shifting X; they are independent
Lbeeu_shift_A_Y:
// Same for A and Y.
// Afterwards, (2) still holds.
// Reverse the bit order of x21
// x13 := number of trailing 0s in x21 (= number of leading 0s in x15)
rbit x15, x21
clz x13, x15
// If there is no shift, goto |B-A|, X+Y update
cbz x13, Lbeeu_update_B_X_or_A_Y
// Shift A right by "x13" bits
neg x14, x13
lsr x21, x21, x13
lsl x15, x22, x14
lsr x22, x22, x13
lsl x19, x23, x14
orr x21, x21, x15
lsr x23, x23, x13
lsl x20, x24, x14
orr x22, x22, x19
lsr x24, x24, x13
orr x23, x23, x20
// Shift Y right by "x13" bits, adding n whenever Y becomes odd.
// x13--;
// x14 := 0; needed in the addition to the most significant word in SHIFT1
eor x14, x14, x14
Lbeeu_shift_loop_Y:
tbz x8, #0, Lshift1_1
adds x8, x8, x0
adcs x9, x9, x1
adcs x10, x10, x2
adcs x11, x11, x30
adc x12, x12, x14
Lshift1_1:
// var0 := [var1|var0]<64..1>;
// i.e. concatenate var1 and var0,
// extract bits <64..1> from the resulting 128-bit value
// and put them in var0
extr x8, x9, x8, #1
extr x9, x10, x9, #1
extr x10, x11, x10, #1
extr x11, x12, x11, #1
lsr x12, x12, #1
subs x13, x13, #1
bne Lbeeu_shift_loop_Y
Lbeeu_update_B_X_or_A_Y:
// Try T := B - A; if cs, continue with B > A (cs: carry set = no borrow)
// Note: this is a case of unsigned arithmetic, where T fits in 4 64-bit words
// without taking a sign bit if generated. The lack of a carry would
// indicate a negative result. See, for example,
// https://community.arm.com/developer/ip-products/processors/b/processors-ip-blog/posts/condition-codes-1-condition-flags-and-codes
subs x14, x25, x21
sbcs x15, x26, x22
sbcs x19, x27, x23
sbcs x20, x28, x24
bcs Lbeeu_B_greater_than_A
// Else A > B =>
// A := A - B; Y := Y + X; goto beginning of the loop
subs x21, x21, x25
sbcs x22, x22, x26
sbcs x23, x23, x27
sbcs x24, x24, x28
adds x8, x8, x3
adcs x9, x9, x4
adcs x10, x10, x5
adcs x11, x11, x6
adc x12, x12, x7
b Lbeeu_loop
Lbeeu_B_greater_than_A:
// Continue with B > A =>
// B := B - A; X := X + Y; goto beginning of the loop
mov x25, x14
mov x26, x15
mov x27, x19
mov x28, x20
adds x3, x3, x8
adcs x4, x4, x9
adcs x5, x5, x10
adcs x6, x6, x11
adc x7, x7, x12
b Lbeeu_loop
Lbeeu_loop_end:
// The Euclid's algorithm loop ends when A == gcd(a,n);
// this would be 1, when a and n are co-prime (i.e. do not have a common factor).
// Since (-1)*Y*a == A (mod |n|), Y>0
// then out = -Y mod n
// Verify that A = 1 ==> (-1)*Y*a = A = 1 (mod |n|)
// Is A-1 == 0?
// If not, fail.
sub x14, x21, #1
orr x14, x14, x22
orr x14, x14, x23
orr x14, x14, x24
cbnz x14, Lbeeu_err
// If Y>n ==> Y:=Y-n
Lbeeu_reduction_loop:
// x_i := y_i - n_i (X is no longer needed, use it as temp)
// (x14 = 0 from above)
subs x3, x8, x0
sbcs x4, x9, x1
sbcs x5, x10, x2
sbcs x6, x11, x30
sbcs x7, x12, x14
// If result is non-negative (i.e., cs = carry set = no borrow),
// y_i := x_i; goto reduce again
// else
// y_i := y_i; continue
csel x8, x3, x8, cs
csel x9, x4, x9, cs
csel x10, x5, x10, cs
csel x11, x6, x11, cs
csel x12, x7, x12, cs
bcs Lbeeu_reduction_loop
// Now Y < n (Y cannot be equal to n, since the inverse cannot be 0)
// out = -Y = n-Y
subs x8, x0, x8
sbcs x9, x1, x9
sbcs x10, x2, x10
sbcs x11, x30, x11
// Save Y in output (out (x0) was saved on the stack)
ldr x3, [sp,#96]
stp x8, x9, [x3]
stp x10, x11, [x3,#16]
// return 1 (success)
mov x0, #1
b Lbeeu_finish
Lbeeu_err:
// return 0 (error)
eor x0, x0, x0
Lbeeu_finish:
// Restore callee-saved registers, except x0, x2
add sp,x29,#0
ldp x19,x20,[sp,#16]
ldp x21,x22,[sp,#32]
ldp x23,x24,[sp,#48]
ldp x25,x26,[sp,#64]
ldp x27,x28,[sp,#80]
ldp x29,x30,[sp],#112
AARCH64_VALIDATE_LINK_REGISTER
ret
#endif // !OPENSSL_NO_ASM && defined(OPENSSL_AARCH64) && defined(__APPLE__)

View File

@ -1,35 +1,20 @@
// This file is generated from a similarly-named Perl script in the BoringSSL
// source tree. Do not edit by hand.
#if !defined(__has_feature)
#define __has_feature(x) 0
#endif
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
#define OPENSSL_NO_ASM
#endif
#include <openssl/asm_base.h>
#if !defined(OPENSSL_NO_ASM)
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_AARCH64) && defined(__APPLE__)
#include <openssl/arm_arch.h>
.text
.globl _sha1_block_data_order
.private_extern _sha1_block_data_order
.globl _sha1_block_data_order_nohw
.private_extern _sha1_block_data_order_nohw
.align 6
_sha1_block_data_order:
#if __has_feature(hwaddress_sanitizer) && __clang_major__ >= 10
adrp x16,:pg_hi21_nc:_OPENSSL_armcap_P
#else
adrp x16,_OPENSSL_armcap_P@PAGE
#endif
ldr w16,[x16,_OPENSSL_armcap_P@PAGEOFF]
tst w16,#ARMV8_SHA1
b.ne Lv8_entry
_sha1_block_data_order_nohw:
// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
AARCH64_VALID_CALL_TARGET
stp x29,x30,[sp,#-96]!
add x29,sp,#0
@ -48,7 +33,7 @@ Loop:
movz w28,#0x7999
sub x2,x2,#1
movk w28,#0x5a82,lsl#16
#ifdef __ARMEB__
#ifdef __AARCH64EB__
ror x3,x3,#32
#else
rev32 x3,x3
@ -66,7 +51,7 @@ Loop:
ror w21,w21,#2
add w23,w23,w4 // future e+=X[i]
add w24,w24,w25 // e+=F(b,c,d)
#ifdef __ARMEB__
#ifdef __AARCH64EB__
ror x5,x5,#32
#else
rev32 x5,x5
@ -91,7 +76,7 @@ Loop:
ror w24,w24,#2
add w21,w21,w6 // future e+=X[i]
add w22,w22,w25 // e+=F(b,c,d)
#ifdef __ARMEB__
#ifdef __AARCH64EB__
ror x7,x7,#32
#else
rev32 x7,x7
@ -116,7 +101,7 @@ Loop:
ror w22,w22,#2
add w24,w24,w8 // future e+=X[i]
add w20,w20,w25 // e+=F(b,c,d)
#ifdef __ARMEB__
#ifdef __AARCH64EB__
ror x9,x9,#32
#else
rev32 x9,x9
@ -141,7 +126,7 @@ Loop:
ror w20,w20,#2
add w22,w22,w10 // future e+=X[i]
add w23,w23,w25 // e+=F(b,c,d)
#ifdef __ARMEB__
#ifdef __AARCH64EB__
ror x11,x11,#32
#else
rev32 x11,x11
@ -166,7 +151,7 @@ Loop:
ror w23,w23,#2
add w20,w20,w12 // future e+=X[i]
add w21,w21,w25 // e+=F(b,c,d)
#ifdef __ARMEB__
#ifdef __AARCH64EB__
ror x13,x13,#32
#else
rev32 x13,x13
@ -191,7 +176,7 @@ Loop:
ror w21,w21,#2
add w23,w23,w14 // future e+=X[i]
add w24,w24,w25 // e+=F(b,c,d)
#ifdef __ARMEB__
#ifdef __AARCH64EB__
ror x15,x15,#32
#else
rev32 x15,x15
@ -216,7 +201,7 @@ Loop:
ror w24,w24,#2
add w21,w21,w16 // future e+=X[i]
add w22,w22,w25 // e+=F(b,c,d)
#ifdef __ARMEB__
#ifdef __AARCH64EB__
ror x17,x17,#32
#else
rev32 x17,x17
@ -1086,10 +1071,13 @@ Loop:
ldr x29,[sp],#96
ret
.globl _sha1_block_data_order_hw
.private_extern _sha1_block_data_order_hw
.align 6
sha1_block_armv8:
Lv8_entry:
_sha1_block_data_order_hw:
// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
AARCH64_VALID_CALL_TARGET
stp x29,x30,[sp,#-16]!
add x29,sp,#0
@ -1227,6 +1215,4 @@ Lconst:
.byte 83,72,65,49,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.align 2
.align 2
.comm _OPENSSL_armcap_P,4,4
.private_extern _OPENSSL_armcap_P
#endif // !OPENSSL_NO_ASM
#endif // !OPENSSL_NO_ASM && defined(OPENSSL_AARCH64) && defined(__APPLE__)

View File

@ -1,18 +1,10 @@
// This file is generated from a similarly-named Perl script in the BoringSSL
// source tree. Do not edit by hand.
#if !defined(__has_feature)
#define __has_feature(x) 0
#endif
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
#define OPENSSL_NO_ASM
#endif
#include <openssl/asm_base.h>
#if !defined(OPENSSL_NO_ASM)
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
// Copyright 2014-2016 The OpenSSL Project Authors. All Rights Reserved.
#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_AARCH64) && defined(__APPLE__)
// Copyright 2014-2020 The OpenSSL Project Authors. All Rights Reserved.
//
// Licensed under the OpenSSL license (the "License"). You may not use
// this file except in compliance with the License. You can obtain a copy
@ -40,6 +32,7 @@
// Denver 2.01 10.5 (+26%) 6.70 (+8%)
// X-Gene 20.0 (+100%) 12.8 (+300%(***))
// Mongoose 2.36 13.0 (+50%) 8.36 (+33%)
// Kryo 1.92 17.4 (+30%) 11.2 (+8%)
//
// (*) Software SHA256 results are of lesser relevance, presented
// mostly for informational purposes.
@ -48,7 +41,7 @@
// on Cortex-A53 (or by 4 cycles per round).
// (***) Super-impressive coefficients over gcc-generated code are
// indication of some compiler "pathology", most notably code
// generated with -mgeneral-regs-only is significanty faster
// generated with -mgeneral-regs-only is significantly faster
// and the gap is only 40-90%.
#ifndef __KERNEL__
@ -57,22 +50,12 @@
.text
.globl _sha256_block_data_order
.private_extern _sha256_block_data_order
.globl _sha256_block_data_order_nohw
.private_extern _sha256_block_data_order_nohw
.align 6
_sha256_block_data_order:
#ifndef __KERNEL__
#if __has_feature(hwaddress_sanitizer) && __clang_major__ >= 10
adrp x16,:pg_hi21_nc:_OPENSSL_armcap_P
#else
adrp x16,_OPENSSL_armcap_P@PAGE
#endif
ldr w16,[x16,_OPENSSL_armcap_P@PAGEOFF]
tst w16,#ARMV8_SHA256
b.ne Lv8_entry
#endif
_sha256_block_data_order_nohw:
AARCH64_SIGN_LINK_REGISTER
stp x29,x30,[sp,#-128]!
add x29,sp,#0
@ -97,7 +80,7 @@ Loop:
ldr w19,[x30],#4 // *K++
eor w28,w21,w22 // magic seed
str x1,[x29,#112]
#ifndef __ARMEB__
#ifndef __AARCH64EB__
rev w3,w3 // 0
#endif
ror w16,w24,#6
@ -120,7 +103,7 @@ Loop:
add w27,w27,w28 // h+=Maj(a,b,c)
ldr w28,[x30],#4 // *K++, w19 in next round
//add w27,w27,w17 // h+=Sigma0(a)
#ifndef __ARMEB__
#ifndef __AARCH64EB__
rev w4,w4 // 1
#endif
ldp w5,w6,[x1],#2*4
@ -145,7 +128,7 @@ Loop:
add w26,w26,w19 // h+=Maj(a,b,c)
ldr w19,[x30],#4 // *K++, w28 in next round
//add w26,w26,w17 // h+=Sigma0(a)
#ifndef __ARMEB__
#ifndef __AARCH64EB__
rev w5,w5 // 2
#endif
add w26,w26,w17 // h+=Sigma0(a)
@ -169,7 +152,7 @@ Loop:
add w25,w25,w28 // h+=Maj(a,b,c)
ldr w28,[x30],#4 // *K++, w19 in next round
//add w25,w25,w17 // h+=Sigma0(a)
#ifndef __ARMEB__
#ifndef __AARCH64EB__
rev w6,w6 // 3
#endif
ldp w7,w8,[x1],#2*4
@ -194,7 +177,7 @@ Loop:
add w24,w24,w19 // h+=Maj(a,b,c)
ldr w19,[x30],#4 // *K++, w28 in next round
//add w24,w24,w17 // h+=Sigma0(a)
#ifndef __ARMEB__
#ifndef __AARCH64EB__
rev w7,w7 // 4
#endif
add w24,w24,w17 // h+=Sigma0(a)
@ -218,7 +201,7 @@ Loop:
add w23,w23,w28 // h+=Maj(a,b,c)
ldr w28,[x30],#4 // *K++, w19 in next round
//add w23,w23,w17 // h+=Sigma0(a)
#ifndef __ARMEB__
#ifndef __AARCH64EB__
rev w8,w8 // 5
#endif
ldp w9,w10,[x1],#2*4
@ -243,7 +226,7 @@ Loop:
add w22,w22,w19 // h+=Maj(a,b,c)
ldr w19,[x30],#4 // *K++, w28 in next round
//add w22,w22,w17 // h+=Sigma0(a)
#ifndef __ARMEB__
#ifndef __AARCH64EB__
rev w9,w9 // 6
#endif
add w22,w22,w17 // h+=Sigma0(a)
@ -267,7 +250,7 @@ Loop:
add w21,w21,w28 // h+=Maj(a,b,c)
ldr w28,[x30],#4 // *K++, w19 in next round
//add w21,w21,w17 // h+=Sigma0(a)
#ifndef __ARMEB__
#ifndef __AARCH64EB__
rev w10,w10 // 7
#endif
ldp w11,w12,[x1],#2*4
@ -292,7 +275,7 @@ Loop:
add w20,w20,w19 // h+=Maj(a,b,c)
ldr w19,[x30],#4 // *K++, w28 in next round
//add w20,w20,w17 // h+=Sigma0(a)
#ifndef __ARMEB__
#ifndef __AARCH64EB__
rev w11,w11 // 8
#endif
add w20,w20,w17 // h+=Sigma0(a)
@ -316,7 +299,7 @@ Loop:
add w27,w27,w28 // h+=Maj(a,b,c)
ldr w28,[x30],#4 // *K++, w19 in next round
//add w27,w27,w17 // h+=Sigma0(a)
#ifndef __ARMEB__
#ifndef __AARCH64EB__
rev w12,w12 // 9
#endif
ldp w13,w14,[x1],#2*4
@ -341,7 +324,7 @@ Loop:
add w26,w26,w19 // h+=Maj(a,b,c)
ldr w19,[x30],#4 // *K++, w28 in next round
//add w26,w26,w17 // h+=Sigma0(a)
#ifndef __ARMEB__
#ifndef __AARCH64EB__
rev w13,w13 // 10
#endif
add w26,w26,w17 // h+=Sigma0(a)
@ -365,7 +348,7 @@ Loop:
add w25,w25,w28 // h+=Maj(a,b,c)
ldr w28,[x30],#4 // *K++, w19 in next round
//add w25,w25,w17 // h+=Sigma0(a)
#ifndef __ARMEB__
#ifndef __AARCH64EB__
rev w14,w14 // 11
#endif
ldp w15,w0,[x1],#2*4
@ -391,7 +374,7 @@ Loop:
add w24,w24,w19 // h+=Maj(a,b,c)
ldr w19,[x30],#4 // *K++, w28 in next round
//add w24,w24,w17 // h+=Sigma0(a)
#ifndef __ARMEB__
#ifndef __AARCH64EB__
rev w15,w15 // 12
#endif
add w24,w24,w17 // h+=Sigma0(a)
@ -416,7 +399,7 @@ Loop:
add w23,w23,w28 // h+=Maj(a,b,c)
ldr w28,[x30],#4 // *K++, w19 in next round
//add w23,w23,w17 // h+=Sigma0(a)
#ifndef __ARMEB__
#ifndef __AARCH64EB__
rev w0,w0 // 13
#endif
ldp w1,w2,[x1]
@ -442,7 +425,7 @@ Loop:
add w22,w22,w19 // h+=Maj(a,b,c)
ldr w19,[x30],#4 // *K++, w28 in next round
//add w22,w22,w17 // h+=Sigma0(a)
#ifndef __ARMEB__
#ifndef __AARCH64EB__
rev w1,w1 // 14
#endif
ldr w6,[sp,#12]
@ -468,7 +451,7 @@ Loop:
add w21,w21,w28 // h+=Maj(a,b,c)
ldr w28,[x30],#4 // *K++, w19 in next round
//add w21,w21,w17 // h+=Sigma0(a)
#ifndef __ARMEB__
#ifndef __AARCH64EB__
rev w2,w2 // 15
#endif
ldr w7,[sp,#0]
@ -1033,6 +1016,7 @@ Loop_16_xx:
ldp x25,x26,[x29,#64]
ldp x27,x28,[x29,#80]
ldp x29,x30,[sp],#128
AARCH64_VALIDATE_LINK_REGISTER
ret
@ -1063,10 +1047,13 @@ LK256:
.align 2
.text
#ifndef __KERNEL__
.globl _sha256_block_data_order_hw
.private_extern _sha256_block_data_order_hw
.align 6
sha256_block_armv8:
Lv8_entry:
_sha256_block_data_order_hw:
// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
AARCH64_VALID_CALL_TARGET
stp x29,x30,[sp,#-16]!
add x29,sp,#0
@ -1203,8 +1190,4 @@ Loop_hw:
ret
#endif
#ifndef __KERNEL__
.comm _OPENSSL_armcap_P,4,4
.private_extern _OPENSSL_armcap_P
#endif
#endif // !OPENSSL_NO_ASM
#endif // !OPENSSL_NO_ASM && defined(OPENSSL_AARCH64) && defined(__APPLE__)

View File

@ -1,18 +1,10 @@
// This file is generated from a similarly-named Perl script in the BoringSSL
// source tree. Do not edit by hand.
#if !defined(__has_feature)
#define __has_feature(x) 0
#endif
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
#define OPENSSL_NO_ASM
#endif
#include <openssl/asm_base.h>
#if !defined(OPENSSL_NO_ASM)
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
// Copyright 2014-2016 The OpenSSL Project Authors. All Rights Reserved.
#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_AARCH64) && defined(__APPLE__)
// Copyright 2014-2020 The OpenSSL Project Authors. All Rights Reserved.
//
// Licensed under the OpenSSL license (the "License"). You may not use
// this file except in compliance with the License. You can obtain a copy
@ -40,6 +32,7 @@
// Denver 2.01 10.5 (+26%) 6.70 (+8%)
// X-Gene 20.0 (+100%) 12.8 (+300%(***))
// Mongoose 2.36 13.0 (+50%) 8.36 (+33%)
// Kryo 1.92 17.4 (+30%) 11.2 (+8%)
//
// (*) Software SHA256 results are of lesser relevance, presented
// mostly for informational purposes.
@ -48,7 +41,7 @@
// on Cortex-A53 (or by 4 cycles per round).
// (***) Super-impressive coefficients over gcc-generated code are
// indication of some compiler "pathology", most notably code
// generated with -mgeneral-regs-only is significanty faster
// generated with -mgeneral-regs-only is significantly faster
// and the gap is only 40-90%.
#ifndef __KERNEL__
@ -57,12 +50,12 @@
.text
.globl _sha512_block_data_order
.private_extern _sha512_block_data_order
.globl _sha512_block_data_order_nohw
.private_extern _sha512_block_data_order_nohw
.align 6
_sha512_block_data_order:
_sha512_block_data_order_nohw:
AARCH64_SIGN_LINK_REGISTER
stp x29,x30,[sp,#-128]!
add x29,sp,#0
@ -87,7 +80,7 @@ Loop:
ldr x19,[x30],#8 // *K++
eor x28,x21,x22 // magic seed
str x1,[x29,#112]
#ifndef __ARMEB__
#ifndef __AARCH64EB__
rev x3,x3 // 0
#endif
ror x16,x24,#14
@ -110,7 +103,7 @@ Loop:
add x27,x27,x28 // h+=Maj(a,b,c)
ldr x28,[x30],#8 // *K++, x19 in next round
//add x27,x27,x17 // h+=Sigma0(a)
#ifndef __ARMEB__
#ifndef __AARCH64EB__
rev x4,x4 // 1
#endif
ldp x5,x6,[x1],#2*8
@ -135,7 +128,7 @@ Loop:
add x26,x26,x19 // h+=Maj(a,b,c)
ldr x19,[x30],#8 // *K++, x28 in next round
//add x26,x26,x17 // h+=Sigma0(a)
#ifndef __ARMEB__
#ifndef __AARCH64EB__
rev x5,x5 // 2
#endif
add x26,x26,x17 // h+=Sigma0(a)
@ -159,7 +152,7 @@ Loop:
add x25,x25,x28 // h+=Maj(a,b,c)
ldr x28,[x30],#8 // *K++, x19 in next round
//add x25,x25,x17 // h+=Sigma0(a)
#ifndef __ARMEB__
#ifndef __AARCH64EB__
rev x6,x6 // 3
#endif
ldp x7,x8,[x1],#2*8
@ -184,7 +177,7 @@ Loop:
add x24,x24,x19 // h+=Maj(a,b,c)
ldr x19,[x30],#8 // *K++, x28 in next round
//add x24,x24,x17 // h+=Sigma0(a)
#ifndef __ARMEB__
#ifndef __AARCH64EB__
rev x7,x7 // 4
#endif
add x24,x24,x17 // h+=Sigma0(a)
@ -208,7 +201,7 @@ Loop:
add x23,x23,x28 // h+=Maj(a,b,c)
ldr x28,[x30],#8 // *K++, x19 in next round
//add x23,x23,x17 // h+=Sigma0(a)
#ifndef __ARMEB__
#ifndef __AARCH64EB__
rev x8,x8 // 5
#endif
ldp x9,x10,[x1],#2*8
@ -233,7 +226,7 @@ Loop:
add x22,x22,x19 // h+=Maj(a,b,c)
ldr x19,[x30],#8 // *K++, x28 in next round
//add x22,x22,x17 // h+=Sigma0(a)
#ifndef __ARMEB__
#ifndef __AARCH64EB__
rev x9,x9 // 6
#endif
add x22,x22,x17 // h+=Sigma0(a)
@ -257,7 +250,7 @@ Loop:
add x21,x21,x28 // h+=Maj(a,b,c)
ldr x28,[x30],#8 // *K++, x19 in next round
//add x21,x21,x17 // h+=Sigma0(a)
#ifndef __ARMEB__
#ifndef __AARCH64EB__
rev x10,x10 // 7
#endif
ldp x11,x12,[x1],#2*8
@ -282,7 +275,7 @@ Loop:
add x20,x20,x19 // h+=Maj(a,b,c)
ldr x19,[x30],#8 // *K++, x28 in next round
//add x20,x20,x17 // h+=Sigma0(a)
#ifndef __ARMEB__
#ifndef __AARCH64EB__
rev x11,x11 // 8
#endif
add x20,x20,x17 // h+=Sigma0(a)
@ -306,7 +299,7 @@ Loop:
add x27,x27,x28 // h+=Maj(a,b,c)
ldr x28,[x30],#8 // *K++, x19 in next round
//add x27,x27,x17 // h+=Sigma0(a)
#ifndef __ARMEB__
#ifndef __AARCH64EB__
rev x12,x12 // 9
#endif
ldp x13,x14,[x1],#2*8
@ -331,7 +324,7 @@ Loop:
add x26,x26,x19 // h+=Maj(a,b,c)
ldr x19,[x30],#8 // *K++, x28 in next round
//add x26,x26,x17 // h+=Sigma0(a)
#ifndef __ARMEB__
#ifndef __AARCH64EB__
rev x13,x13 // 10
#endif
add x26,x26,x17 // h+=Sigma0(a)
@ -355,7 +348,7 @@ Loop:
add x25,x25,x28 // h+=Maj(a,b,c)
ldr x28,[x30],#8 // *K++, x19 in next round
//add x25,x25,x17 // h+=Sigma0(a)
#ifndef __ARMEB__
#ifndef __AARCH64EB__
rev x14,x14 // 11
#endif
ldp x15,x0,[x1],#2*8
@ -381,7 +374,7 @@ Loop:
add x24,x24,x19 // h+=Maj(a,b,c)
ldr x19,[x30],#8 // *K++, x28 in next round
//add x24,x24,x17 // h+=Sigma0(a)
#ifndef __ARMEB__
#ifndef __AARCH64EB__
rev x15,x15 // 12
#endif
add x24,x24,x17 // h+=Sigma0(a)
@ -406,7 +399,7 @@ Loop:
add x23,x23,x28 // h+=Maj(a,b,c)
ldr x28,[x30],#8 // *K++, x19 in next round
//add x23,x23,x17 // h+=Sigma0(a)
#ifndef __ARMEB__
#ifndef __AARCH64EB__
rev x0,x0 // 13
#endif
ldp x1,x2,[x1]
@ -432,7 +425,7 @@ Loop:
add x22,x22,x19 // h+=Maj(a,b,c)
ldr x19,[x30],#8 // *K++, x28 in next round
//add x22,x22,x17 // h+=Sigma0(a)
#ifndef __ARMEB__
#ifndef __AARCH64EB__
rev x1,x1 // 14
#endif
ldr x6,[sp,#24]
@ -458,7 +451,7 @@ Loop:
add x21,x21,x28 // h+=Maj(a,b,c)
ldr x28,[x30],#8 // *K++, x19 in next round
//add x21,x21,x17 // h+=Sigma0(a)
#ifndef __ARMEB__
#ifndef __AARCH64EB__
rev x2,x2 // 15
#endif
ldr x7,[sp,#0]
@ -1023,6 +1016,7 @@ Loop_16_xx:
ldp x25,x26,[x29,#64]
ldp x27,x28,[x29,#80]
ldp x29,x30,[sp],#128
AARCH64_VALIDATE_LINK_REGISTER
ret
@ -1075,8 +1069,528 @@ LK512:
.byte 83,72,65,53,49,50,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.align 2
.align 2
.text
#ifndef __KERNEL__
.comm _OPENSSL_armcap_P,4,4
.private_extern _OPENSSL_armcap_P
.globl _sha512_block_data_order_hw
.private_extern _sha512_block_data_order_hw
.align 6
_sha512_block_data_order_hw:
// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
AARCH64_VALID_CALL_TARGET
stp x29,x30,[sp,#-16]!
add x29,sp,#0
ld1 {v16.16b,v17.16b,v18.16b,v19.16b},[x1],#64 // load input
ld1 {v20.16b,v21.16b,v22.16b,v23.16b},[x1],#64
ld1 {v0.2d,v1.2d,v2.2d,v3.2d},[x0] // load context
adrp x3,LK512@PAGE
add x3,x3,LK512@PAGEOFF
rev64 v16.16b,v16.16b
rev64 v17.16b,v17.16b
rev64 v18.16b,v18.16b
rev64 v19.16b,v19.16b
rev64 v20.16b,v20.16b
rev64 v21.16b,v21.16b
rev64 v22.16b,v22.16b
rev64 v23.16b,v23.16b
b Loop_hw
.align 4
Loop_hw:
ld1 {v24.2d},[x3],#16
subs x2,x2,#1
sub x4,x1,#128
orr v26.16b,v0.16b,v0.16b // offload
orr v27.16b,v1.16b,v1.16b
orr v28.16b,v2.16b,v2.16b
orr v29.16b,v3.16b,v3.16b
csel x1,x1,x4,ne // conditional rewind
add v24.2d,v24.2d,v16.2d
ld1 {v25.2d},[x3],#16
ext v24.16b,v24.16b,v24.16b,#8
ext v5.16b,v2.16b,v3.16b,#8
ext v6.16b,v1.16b,v2.16b,#8
add v3.2d,v3.2d,v24.2d // "T1 + H + K512[i]"
.long 0xcec08230 //sha512su0 v16.16b,v17.16b
ext v7.16b,v20.16b,v21.16b,#8
.long 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b
.long 0xce678af0 //sha512su1 v16.16b,v23.16b,v7.16b
add v4.2d,v1.2d,v3.2d // "D + T1"
.long 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b
add v25.2d,v25.2d,v17.2d
ld1 {v24.2d},[x3],#16
ext v25.16b,v25.16b,v25.16b,#8
ext v5.16b,v4.16b,v2.16b,#8
ext v6.16b,v0.16b,v4.16b,#8
add v2.2d,v2.2d,v25.2d // "T1 + H + K512[i]"
.long 0xcec08251 //sha512su0 v17.16b,v18.16b
ext v7.16b,v21.16b,v22.16b,#8
.long 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b
.long 0xce678a11 //sha512su1 v17.16b,v16.16b,v7.16b
add v1.2d,v0.2d,v2.2d // "D + T1"
.long 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b
add v24.2d,v24.2d,v18.2d
ld1 {v25.2d},[x3],#16
ext v24.16b,v24.16b,v24.16b,#8
ext v5.16b,v1.16b,v4.16b,#8
ext v6.16b,v3.16b,v1.16b,#8
add v4.2d,v4.2d,v24.2d // "T1 + H + K512[i]"
.long 0xcec08272 //sha512su0 v18.16b,v19.16b
ext v7.16b,v22.16b,v23.16b,#8
.long 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b
.long 0xce678a32 //sha512su1 v18.16b,v17.16b,v7.16b
add v0.2d,v3.2d,v4.2d // "D + T1"
.long 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b
add v25.2d,v25.2d,v19.2d
ld1 {v24.2d},[x3],#16
ext v25.16b,v25.16b,v25.16b,#8
ext v5.16b,v0.16b,v1.16b,#8
ext v6.16b,v2.16b,v0.16b,#8
add v1.2d,v1.2d,v25.2d // "T1 + H + K512[i]"
.long 0xcec08293 //sha512su0 v19.16b,v20.16b
ext v7.16b,v23.16b,v16.16b,#8
.long 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b
.long 0xce678a53 //sha512su1 v19.16b,v18.16b,v7.16b
add v3.2d,v2.2d,v1.2d // "D + T1"
.long 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b
add v24.2d,v24.2d,v20.2d
ld1 {v25.2d},[x3],#16
ext v24.16b,v24.16b,v24.16b,#8
ext v5.16b,v3.16b,v0.16b,#8
ext v6.16b,v4.16b,v3.16b,#8
add v0.2d,v0.2d,v24.2d // "T1 + H + K512[i]"
.long 0xcec082b4 //sha512su0 v20.16b,v21.16b
ext v7.16b,v16.16b,v17.16b,#8
.long 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b
.long 0xce678a74 //sha512su1 v20.16b,v19.16b,v7.16b
add v2.2d,v4.2d,v0.2d // "D + T1"
.long 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b
add v25.2d,v25.2d,v21.2d
ld1 {v24.2d},[x3],#16
ext v25.16b,v25.16b,v25.16b,#8
ext v5.16b,v2.16b,v3.16b,#8
ext v6.16b,v1.16b,v2.16b,#8
add v3.2d,v3.2d,v25.2d // "T1 + H + K512[i]"
.long 0xcec082d5 //sha512su0 v21.16b,v22.16b
ext v7.16b,v17.16b,v18.16b,#8
.long 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b
.long 0xce678a95 //sha512su1 v21.16b,v20.16b,v7.16b
add v4.2d,v1.2d,v3.2d // "D + T1"
.long 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b
add v24.2d,v24.2d,v22.2d
ld1 {v25.2d},[x3],#16
ext v24.16b,v24.16b,v24.16b,#8
ext v5.16b,v4.16b,v2.16b,#8
ext v6.16b,v0.16b,v4.16b,#8
add v2.2d,v2.2d,v24.2d // "T1 + H + K512[i]"
.long 0xcec082f6 //sha512su0 v22.16b,v23.16b
ext v7.16b,v18.16b,v19.16b,#8
.long 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b
.long 0xce678ab6 //sha512su1 v22.16b,v21.16b,v7.16b
add v1.2d,v0.2d,v2.2d // "D + T1"
.long 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b
add v25.2d,v25.2d,v23.2d
ld1 {v24.2d},[x3],#16
ext v25.16b,v25.16b,v25.16b,#8
ext v5.16b,v1.16b,v4.16b,#8
ext v6.16b,v3.16b,v1.16b,#8
add v4.2d,v4.2d,v25.2d // "T1 + H + K512[i]"
.long 0xcec08217 //sha512su0 v23.16b,v16.16b
ext v7.16b,v19.16b,v20.16b,#8
.long 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b
.long 0xce678ad7 //sha512su1 v23.16b,v22.16b,v7.16b
add v0.2d,v3.2d,v4.2d // "D + T1"
.long 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b
add v24.2d,v24.2d,v16.2d
ld1 {v25.2d},[x3],#16
ext v24.16b,v24.16b,v24.16b,#8
ext v5.16b,v0.16b,v1.16b,#8
ext v6.16b,v2.16b,v0.16b,#8
add v1.2d,v1.2d,v24.2d // "T1 + H + K512[i]"
.long 0xcec08230 //sha512su0 v16.16b,v17.16b
ext v7.16b,v20.16b,v21.16b,#8
.long 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b
.long 0xce678af0 //sha512su1 v16.16b,v23.16b,v7.16b
add v3.2d,v2.2d,v1.2d // "D + T1"
.long 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b
add v25.2d,v25.2d,v17.2d
ld1 {v24.2d},[x3],#16
ext v25.16b,v25.16b,v25.16b,#8
ext v5.16b,v3.16b,v0.16b,#8
ext v6.16b,v4.16b,v3.16b,#8
add v0.2d,v0.2d,v25.2d // "T1 + H + K512[i]"
.long 0xcec08251 //sha512su0 v17.16b,v18.16b
ext v7.16b,v21.16b,v22.16b,#8
.long 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b
.long 0xce678a11 //sha512su1 v17.16b,v16.16b,v7.16b
add v2.2d,v4.2d,v0.2d // "D + T1"
.long 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b
add v24.2d,v24.2d,v18.2d
ld1 {v25.2d},[x3],#16
ext v24.16b,v24.16b,v24.16b,#8
ext v5.16b,v2.16b,v3.16b,#8
ext v6.16b,v1.16b,v2.16b,#8
add v3.2d,v3.2d,v24.2d // "T1 + H + K512[i]"
.long 0xcec08272 //sha512su0 v18.16b,v19.16b
ext v7.16b,v22.16b,v23.16b,#8
.long 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b
.long 0xce678a32 //sha512su1 v18.16b,v17.16b,v7.16b
add v4.2d,v1.2d,v3.2d // "D + T1"
.long 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b
add v25.2d,v25.2d,v19.2d
ld1 {v24.2d},[x3],#16
ext v25.16b,v25.16b,v25.16b,#8
ext v5.16b,v4.16b,v2.16b,#8
ext v6.16b,v0.16b,v4.16b,#8
add v2.2d,v2.2d,v25.2d // "T1 + H + K512[i]"
.long 0xcec08293 //sha512su0 v19.16b,v20.16b
ext v7.16b,v23.16b,v16.16b,#8
.long 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b
.long 0xce678a53 //sha512su1 v19.16b,v18.16b,v7.16b
add v1.2d,v0.2d,v2.2d // "D + T1"
.long 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b
add v24.2d,v24.2d,v20.2d
ld1 {v25.2d},[x3],#16
ext v24.16b,v24.16b,v24.16b,#8
ext v5.16b,v1.16b,v4.16b,#8
ext v6.16b,v3.16b,v1.16b,#8
add v4.2d,v4.2d,v24.2d // "T1 + H + K512[i]"
.long 0xcec082b4 //sha512su0 v20.16b,v21.16b
ext v7.16b,v16.16b,v17.16b,#8
.long 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b
.long 0xce678a74 //sha512su1 v20.16b,v19.16b,v7.16b
add v0.2d,v3.2d,v4.2d // "D + T1"
.long 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b
add v25.2d,v25.2d,v21.2d
ld1 {v24.2d},[x3],#16
ext v25.16b,v25.16b,v25.16b,#8
ext v5.16b,v0.16b,v1.16b,#8
ext v6.16b,v2.16b,v0.16b,#8
add v1.2d,v1.2d,v25.2d // "T1 + H + K512[i]"
.long 0xcec082d5 //sha512su0 v21.16b,v22.16b
ext v7.16b,v17.16b,v18.16b,#8
.long 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b
.long 0xce678a95 //sha512su1 v21.16b,v20.16b,v7.16b
add v3.2d,v2.2d,v1.2d // "D + T1"
.long 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b
add v24.2d,v24.2d,v22.2d
ld1 {v25.2d},[x3],#16
ext v24.16b,v24.16b,v24.16b,#8
ext v5.16b,v3.16b,v0.16b,#8
ext v6.16b,v4.16b,v3.16b,#8
add v0.2d,v0.2d,v24.2d // "T1 + H + K512[i]"
.long 0xcec082f6 //sha512su0 v22.16b,v23.16b
ext v7.16b,v18.16b,v19.16b,#8
.long 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b
.long 0xce678ab6 //sha512su1 v22.16b,v21.16b,v7.16b
add v2.2d,v4.2d,v0.2d // "D + T1"
.long 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b
add v25.2d,v25.2d,v23.2d
ld1 {v24.2d},[x3],#16
ext v25.16b,v25.16b,v25.16b,#8
ext v5.16b,v2.16b,v3.16b,#8
ext v6.16b,v1.16b,v2.16b,#8
add v3.2d,v3.2d,v25.2d // "T1 + H + K512[i]"
.long 0xcec08217 //sha512su0 v23.16b,v16.16b
ext v7.16b,v19.16b,v20.16b,#8
.long 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b
.long 0xce678ad7 //sha512su1 v23.16b,v22.16b,v7.16b
add v4.2d,v1.2d,v3.2d // "D + T1"
.long 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b
add v24.2d,v24.2d,v16.2d
ld1 {v25.2d},[x3],#16
ext v24.16b,v24.16b,v24.16b,#8
ext v5.16b,v4.16b,v2.16b,#8
ext v6.16b,v0.16b,v4.16b,#8
add v2.2d,v2.2d,v24.2d // "T1 + H + K512[i]"
.long 0xcec08230 //sha512su0 v16.16b,v17.16b
ext v7.16b,v20.16b,v21.16b,#8
.long 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b
.long 0xce678af0 //sha512su1 v16.16b,v23.16b,v7.16b
add v1.2d,v0.2d,v2.2d // "D + T1"
.long 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b
add v25.2d,v25.2d,v17.2d
ld1 {v24.2d},[x3],#16
ext v25.16b,v25.16b,v25.16b,#8
ext v5.16b,v1.16b,v4.16b,#8
ext v6.16b,v3.16b,v1.16b,#8
add v4.2d,v4.2d,v25.2d // "T1 + H + K512[i]"
.long 0xcec08251 //sha512su0 v17.16b,v18.16b
ext v7.16b,v21.16b,v22.16b,#8
.long 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b
.long 0xce678a11 //sha512su1 v17.16b,v16.16b,v7.16b
add v0.2d,v3.2d,v4.2d // "D + T1"
.long 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b
add v24.2d,v24.2d,v18.2d
ld1 {v25.2d},[x3],#16
ext v24.16b,v24.16b,v24.16b,#8
ext v5.16b,v0.16b,v1.16b,#8
ext v6.16b,v2.16b,v0.16b,#8
add v1.2d,v1.2d,v24.2d // "T1 + H + K512[i]"
.long 0xcec08272 //sha512su0 v18.16b,v19.16b
ext v7.16b,v22.16b,v23.16b,#8
.long 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b
.long 0xce678a32 //sha512su1 v18.16b,v17.16b,v7.16b
add v3.2d,v2.2d,v1.2d // "D + T1"
.long 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b
add v25.2d,v25.2d,v19.2d
ld1 {v24.2d},[x3],#16
ext v25.16b,v25.16b,v25.16b,#8
ext v5.16b,v3.16b,v0.16b,#8
ext v6.16b,v4.16b,v3.16b,#8
add v0.2d,v0.2d,v25.2d // "T1 + H + K512[i]"
.long 0xcec08293 //sha512su0 v19.16b,v20.16b
ext v7.16b,v23.16b,v16.16b,#8
.long 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b
.long 0xce678a53 //sha512su1 v19.16b,v18.16b,v7.16b
add v2.2d,v4.2d,v0.2d // "D + T1"
.long 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b
add v24.2d,v24.2d,v20.2d
ld1 {v25.2d},[x3],#16
ext v24.16b,v24.16b,v24.16b,#8
ext v5.16b,v2.16b,v3.16b,#8
ext v6.16b,v1.16b,v2.16b,#8
add v3.2d,v3.2d,v24.2d // "T1 + H + K512[i]"
.long 0xcec082b4 //sha512su0 v20.16b,v21.16b
ext v7.16b,v16.16b,v17.16b,#8
.long 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b
.long 0xce678a74 //sha512su1 v20.16b,v19.16b,v7.16b
add v4.2d,v1.2d,v3.2d // "D + T1"
.long 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b
add v25.2d,v25.2d,v21.2d
ld1 {v24.2d},[x3],#16
ext v25.16b,v25.16b,v25.16b,#8
ext v5.16b,v4.16b,v2.16b,#8
ext v6.16b,v0.16b,v4.16b,#8
add v2.2d,v2.2d,v25.2d // "T1 + H + K512[i]"
.long 0xcec082d5 //sha512su0 v21.16b,v22.16b
ext v7.16b,v17.16b,v18.16b,#8
.long 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b
.long 0xce678a95 //sha512su1 v21.16b,v20.16b,v7.16b
add v1.2d,v0.2d,v2.2d // "D + T1"
.long 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b
add v24.2d,v24.2d,v22.2d
ld1 {v25.2d},[x3],#16
ext v24.16b,v24.16b,v24.16b,#8
ext v5.16b,v1.16b,v4.16b,#8
ext v6.16b,v3.16b,v1.16b,#8
add v4.2d,v4.2d,v24.2d // "T1 + H + K512[i]"
.long 0xcec082f6 //sha512su0 v22.16b,v23.16b
ext v7.16b,v18.16b,v19.16b,#8
.long 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b
.long 0xce678ab6 //sha512su1 v22.16b,v21.16b,v7.16b
add v0.2d,v3.2d,v4.2d // "D + T1"
.long 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b
add v25.2d,v25.2d,v23.2d
ld1 {v24.2d},[x3],#16
ext v25.16b,v25.16b,v25.16b,#8
ext v5.16b,v0.16b,v1.16b,#8
ext v6.16b,v2.16b,v0.16b,#8
add v1.2d,v1.2d,v25.2d // "T1 + H + K512[i]"
.long 0xcec08217 //sha512su0 v23.16b,v16.16b
ext v7.16b,v19.16b,v20.16b,#8
.long 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b
.long 0xce678ad7 //sha512su1 v23.16b,v22.16b,v7.16b
add v3.2d,v2.2d,v1.2d // "D + T1"
.long 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b
add v24.2d,v24.2d,v16.2d
ld1 {v25.2d},[x3],#16
ext v24.16b,v24.16b,v24.16b,#8
ext v5.16b,v3.16b,v0.16b,#8
ext v6.16b,v4.16b,v3.16b,#8
add v0.2d,v0.2d,v24.2d // "T1 + H + K512[i]"
.long 0xcec08230 //sha512su0 v16.16b,v17.16b
ext v7.16b,v20.16b,v21.16b,#8
.long 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b
.long 0xce678af0 //sha512su1 v16.16b,v23.16b,v7.16b
add v2.2d,v4.2d,v0.2d // "D + T1"
.long 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b
add v25.2d,v25.2d,v17.2d
ld1 {v24.2d},[x3],#16
ext v25.16b,v25.16b,v25.16b,#8
ext v5.16b,v2.16b,v3.16b,#8
ext v6.16b,v1.16b,v2.16b,#8
add v3.2d,v3.2d,v25.2d // "T1 + H + K512[i]"
.long 0xcec08251 //sha512su0 v17.16b,v18.16b
ext v7.16b,v21.16b,v22.16b,#8
.long 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b
.long 0xce678a11 //sha512su1 v17.16b,v16.16b,v7.16b
add v4.2d,v1.2d,v3.2d // "D + T1"
.long 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b
add v24.2d,v24.2d,v18.2d
ld1 {v25.2d},[x3],#16
ext v24.16b,v24.16b,v24.16b,#8
ext v5.16b,v4.16b,v2.16b,#8
ext v6.16b,v0.16b,v4.16b,#8
add v2.2d,v2.2d,v24.2d // "T1 + H + K512[i]"
.long 0xcec08272 //sha512su0 v18.16b,v19.16b
ext v7.16b,v22.16b,v23.16b,#8
.long 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b
.long 0xce678a32 //sha512su1 v18.16b,v17.16b,v7.16b
add v1.2d,v0.2d,v2.2d // "D + T1"
.long 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b
add v25.2d,v25.2d,v19.2d
ld1 {v24.2d},[x3],#16
ext v25.16b,v25.16b,v25.16b,#8
ext v5.16b,v1.16b,v4.16b,#8
ext v6.16b,v3.16b,v1.16b,#8
add v4.2d,v4.2d,v25.2d // "T1 + H + K512[i]"
.long 0xcec08293 //sha512su0 v19.16b,v20.16b
ext v7.16b,v23.16b,v16.16b,#8
.long 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b
.long 0xce678a53 //sha512su1 v19.16b,v18.16b,v7.16b
add v0.2d,v3.2d,v4.2d // "D + T1"
.long 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b
add v24.2d,v24.2d,v20.2d
ld1 {v25.2d},[x3],#16
ext v24.16b,v24.16b,v24.16b,#8
ext v5.16b,v0.16b,v1.16b,#8
ext v6.16b,v2.16b,v0.16b,#8
add v1.2d,v1.2d,v24.2d // "T1 + H + K512[i]"
.long 0xcec082b4 //sha512su0 v20.16b,v21.16b
ext v7.16b,v16.16b,v17.16b,#8
.long 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b
.long 0xce678a74 //sha512su1 v20.16b,v19.16b,v7.16b
add v3.2d,v2.2d,v1.2d // "D + T1"
.long 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b
add v25.2d,v25.2d,v21.2d
ld1 {v24.2d},[x3],#16
ext v25.16b,v25.16b,v25.16b,#8
ext v5.16b,v3.16b,v0.16b,#8
ext v6.16b,v4.16b,v3.16b,#8
add v0.2d,v0.2d,v25.2d // "T1 + H + K512[i]"
.long 0xcec082d5 //sha512su0 v21.16b,v22.16b
ext v7.16b,v17.16b,v18.16b,#8
.long 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b
.long 0xce678a95 //sha512su1 v21.16b,v20.16b,v7.16b
add v2.2d,v4.2d,v0.2d // "D + T1"
.long 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b
add v24.2d,v24.2d,v22.2d
ld1 {v25.2d},[x3],#16
ext v24.16b,v24.16b,v24.16b,#8
ext v5.16b,v2.16b,v3.16b,#8
ext v6.16b,v1.16b,v2.16b,#8
add v3.2d,v3.2d,v24.2d // "T1 + H + K512[i]"
.long 0xcec082f6 //sha512su0 v22.16b,v23.16b
ext v7.16b,v18.16b,v19.16b,#8
.long 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b
.long 0xce678ab6 //sha512su1 v22.16b,v21.16b,v7.16b
add v4.2d,v1.2d,v3.2d // "D + T1"
.long 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b
add v25.2d,v25.2d,v23.2d
ld1 {v24.2d},[x3],#16
ext v25.16b,v25.16b,v25.16b,#8
ext v5.16b,v4.16b,v2.16b,#8
ext v6.16b,v0.16b,v4.16b,#8
add v2.2d,v2.2d,v25.2d // "T1 + H + K512[i]"
.long 0xcec08217 //sha512su0 v23.16b,v16.16b
ext v7.16b,v19.16b,v20.16b,#8
.long 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b
.long 0xce678ad7 //sha512su1 v23.16b,v22.16b,v7.16b
add v1.2d,v0.2d,v2.2d // "D + T1"
.long 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b
ld1 {v25.2d},[x3],#16
add v24.2d,v24.2d,v16.2d
ld1 {v16.16b},[x1],#16 // load next input
ext v24.16b,v24.16b,v24.16b,#8
ext v5.16b,v1.16b,v4.16b,#8
ext v6.16b,v3.16b,v1.16b,#8
add v4.2d,v4.2d,v24.2d // "T1 + H + K512[i]"
.long 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b
rev64 v16.16b,v16.16b
add v0.2d,v3.2d,v4.2d // "D + T1"
.long 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b
ld1 {v24.2d},[x3],#16
add v25.2d,v25.2d,v17.2d
ld1 {v17.16b},[x1],#16 // load next input
ext v25.16b,v25.16b,v25.16b,#8
ext v5.16b,v0.16b,v1.16b,#8
ext v6.16b,v2.16b,v0.16b,#8
add v1.2d,v1.2d,v25.2d // "T1 + H + K512[i]"
.long 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b
rev64 v17.16b,v17.16b
add v3.2d,v2.2d,v1.2d // "D + T1"
.long 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b
ld1 {v25.2d},[x3],#16
add v24.2d,v24.2d,v18.2d
ld1 {v18.16b},[x1],#16 // load next input
ext v24.16b,v24.16b,v24.16b,#8
ext v5.16b,v3.16b,v0.16b,#8
ext v6.16b,v4.16b,v3.16b,#8
add v0.2d,v0.2d,v24.2d // "T1 + H + K512[i]"
.long 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b
rev64 v18.16b,v18.16b
add v2.2d,v4.2d,v0.2d // "D + T1"
.long 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b
ld1 {v24.2d},[x3],#16
add v25.2d,v25.2d,v19.2d
ld1 {v19.16b},[x1],#16 // load next input
ext v25.16b,v25.16b,v25.16b,#8
ext v5.16b,v2.16b,v3.16b,#8
ext v6.16b,v1.16b,v2.16b,#8
add v3.2d,v3.2d,v25.2d // "T1 + H + K512[i]"
.long 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b
rev64 v19.16b,v19.16b
add v4.2d,v1.2d,v3.2d // "D + T1"
.long 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b
ld1 {v25.2d},[x3],#16
add v24.2d,v24.2d,v20.2d
ld1 {v20.16b},[x1],#16 // load next input
ext v24.16b,v24.16b,v24.16b,#8
ext v5.16b,v4.16b,v2.16b,#8
ext v6.16b,v0.16b,v4.16b,#8
add v2.2d,v2.2d,v24.2d // "T1 + H + K512[i]"
.long 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b
rev64 v20.16b,v20.16b
add v1.2d,v0.2d,v2.2d // "D + T1"
.long 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b
ld1 {v24.2d},[x3],#16
add v25.2d,v25.2d,v21.2d
ld1 {v21.16b},[x1],#16 // load next input
ext v25.16b,v25.16b,v25.16b,#8
ext v5.16b,v1.16b,v4.16b,#8
ext v6.16b,v3.16b,v1.16b,#8
add v4.2d,v4.2d,v25.2d // "T1 + H + K512[i]"
.long 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b
rev64 v21.16b,v21.16b
add v0.2d,v3.2d,v4.2d // "D + T1"
.long 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b
ld1 {v25.2d},[x3],#16
add v24.2d,v24.2d,v22.2d
ld1 {v22.16b},[x1],#16 // load next input
ext v24.16b,v24.16b,v24.16b,#8
ext v5.16b,v0.16b,v1.16b,#8
ext v6.16b,v2.16b,v0.16b,#8
add v1.2d,v1.2d,v24.2d // "T1 + H + K512[i]"
.long 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b
rev64 v22.16b,v22.16b
add v3.2d,v2.2d,v1.2d // "D + T1"
.long 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b
sub x3,x3,#80*8 // rewind
add v25.2d,v25.2d,v23.2d
ld1 {v23.16b},[x1],#16 // load next input
ext v25.16b,v25.16b,v25.16b,#8
ext v5.16b,v3.16b,v0.16b,#8
ext v6.16b,v4.16b,v3.16b,#8
add v0.2d,v0.2d,v25.2d // "T1 + H + K512[i]"
.long 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b
rev64 v23.16b,v23.16b
add v2.2d,v4.2d,v0.2d // "D + T1"
.long 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b
add v0.2d,v0.2d,v26.2d // accumulate
add v1.2d,v1.2d,v27.2d
add v2.2d,v2.2d,v28.2d
add v3.2d,v3.2d,v29.2d
cbnz x2,Loop_hw
st1 {v0.2d,v1.2d,v2.2d,v3.2d},[x0] // store context
ldr x29,[sp],#16
ret
#endif
#endif // !OPENSSL_NO_ASM
#endif // !OPENSSL_NO_ASM && defined(OPENSSL_AARCH64) && defined(__APPLE__)

View File

@ -1,17 +1,11 @@
// This file is generated from a similarly-named Perl script in the BoringSSL
// source tree. Do not edit by hand.
#if !defined(__has_feature)
#define __has_feature(x) 0
#endif
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
#define OPENSSL_NO_ASM
#endif
#include <openssl/asm_base.h>
#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_AARCH64) && defined(__APPLE__)
#include <openssl/arm_arch.h>
#if !defined(OPENSSL_NO_ASM)
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
.section __TEXT,__const
@ -214,6 +208,7 @@ Lenc_entry:
.align 4
_vpaes_encrypt:
AARCH64_SIGN_LINK_REGISTER
stp x29,x30,[sp,#-16]!
add x29,sp,#0
@ -223,6 +218,7 @@ _vpaes_encrypt:
st1 {v0.16b}, [x1]
ldp x29,x30,[sp],#16
AARCH64_VALIDATE_LINK_REGISTER
ret
@ -451,6 +447,7 @@ Ldec_entry:
.align 4
_vpaes_decrypt:
AARCH64_SIGN_LINK_REGISTER
stp x29,x30,[sp,#-16]!
add x29,sp,#0
@ -460,6 +457,7 @@ _vpaes_decrypt:
st1 {v0.16b}, [x1]
ldp x29,x30,[sp],#16
AARCH64_VALIDATE_LINK_REGISTER
ret
@ -629,6 +627,7 @@ _vpaes_key_preheat:
.align 4
_vpaes_schedule_core:
AARCH64_SIGN_LINK_REGISTER
stp x29, x30, [sp,#-16]!
add x29,sp,#0
@ -798,6 +797,7 @@ Lschedule_mangle_last_dec:
eor v6.16b, v6.16b, v6.16b // vpxor %xmm6, %xmm6, %xmm6
eor v7.16b, v7.16b, v7.16b // vpxor %xmm7, %xmm7, %xmm7
ldp x29, x30, [sp],#16
AARCH64_VALIDATE_LINK_REGISTER
ret
@ -1000,7 +1000,7 @@ Lschedule_mangle_dec:
Lschedule_mangle_both:
tbl v3.16b, {v3.16b}, v1.16b // vpshufb %xmm1, %xmm3, %xmm3
add x8, x8, #64-16 // add $-16, %r8
add x8, x8, #48 // add $-16, %r8
and x8, x8, #~(1<<6) // and $0x30, %r8
st1 {v3.2d}, [x2] // vmovdqu %xmm3, (%rdx)
ret
@ -1011,6 +1011,7 @@ Lschedule_mangle_both:
.align 4
_vpaes_set_encrypt_key:
AARCH64_SIGN_LINK_REGISTER
stp x29,x30,[sp,#-16]!
add x29,sp,#0
stp d8,d9,[sp,#-16]! // ABI spec says so
@ -1026,6 +1027,7 @@ _vpaes_set_encrypt_key:
ldp d8,d9,[sp],#16
ldp x29,x30,[sp],#16
AARCH64_VALIDATE_LINK_REGISTER
ret
@ -1034,6 +1036,7 @@ _vpaes_set_encrypt_key:
.align 4
_vpaes_set_decrypt_key:
AARCH64_SIGN_LINK_REGISTER
stp x29,x30,[sp,#-16]!
add x29,sp,#0
stp d8,d9,[sp,#-16]! // ABI spec says so
@ -1053,6 +1056,7 @@ _vpaes_set_decrypt_key:
ldp d8,d9,[sp],#16
ldp x29,x30,[sp],#16
AARCH64_VALIDATE_LINK_REGISTER
ret
.globl _vpaes_cbc_encrypt
@ -1060,6 +1064,7 @@ _vpaes_set_decrypt_key:
.align 4
_vpaes_cbc_encrypt:
AARCH64_SIGN_LINK_REGISTER
cbz x2, Lcbc_abort
cmp w5, #0 // check direction
b.eq vpaes_cbc_decrypt
@ -1087,12 +1092,15 @@ Lcbc_enc_loop:
ldp x29,x30,[sp],#16
Lcbc_abort:
AARCH64_VALIDATE_LINK_REGISTER
ret
.align 4
vpaes_cbc_decrypt:
// Not adding AARCH64_SIGN_LINK_REGISTER here because vpaes_cbc_decrypt is jumped to
// only from vpaes_cbc_encrypt which has already signed the return address.
stp x29,x30,[sp,#-16]!
add x29,sp,#0
stp d8,d9,[sp,#-16]! // ABI spec says so
@ -1134,6 +1142,7 @@ Lcbc_dec_done:
ldp d10,d11,[sp],#16
ldp d8,d9,[sp],#16
ldp x29,x30,[sp],#16
AARCH64_VALIDATE_LINK_REGISTER
ret
.globl _vpaes_ctr32_encrypt_blocks
@ -1141,6 +1150,7 @@ Lcbc_dec_done:
.align 4
_vpaes_ctr32_encrypt_blocks:
AARCH64_SIGN_LINK_REGISTER
stp x29,x30,[sp,#-16]!
add x29,sp,#0
stp d8,d9,[sp,#-16]! // ABI spec says so
@ -1208,6 +1218,7 @@ Lctr32_done:
ldp d10,d11,[sp],#16
ldp d8,d9,[sp],#16
ldp x29,x30,[sp],#16
AARCH64_VALIDATE_LINK_REGISTER
ret
#endif // !OPENSSL_NO_ASM
#endif // !OPENSSL_NO_ASM && defined(OPENSSL_AARCH64) && defined(__APPLE__)

View File

@ -1,17 +1,11 @@
// This file is generated from a similarly-named Perl script in the BoringSSL
// source tree. Do not edit by hand.
#if !defined(__has_feature)
#define __has_feature(x) 0
#endif
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
#define OPENSSL_NO_ASM
#endif
#include <openssl/asm_base.h>
#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_AARCH64) && defined(__APPLE__)
#include <openssl/arm_arch.h>
#if !defined(OPENSSL_NO_ASM)
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
.text
// abi_test_trampoline loads callee-saved registers from |state|, calls |func|
@ -25,6 +19,8 @@
.private_extern _abi_test_trampoline
.align 4
_abi_test_trampoline:
Labi_test_trampoline_begin:
AARCH64_SIGN_LINK_REGISTER
// Stack layout (low to high addresses)
// x29,x30 (16 bytes)
// d8-d15 (64 bytes)
@ -127,6 +123,7 @@ Lx29_ok:
ldp x27, x28, [sp, #144]
ldp x29, x30, [sp], #176
AARCH64_VALIDATE_LINK_REGISTER
ret
@ -134,6 +131,7 @@ Lx29_ok:
.private_extern _abi_test_clobber_x0
.align 4
_abi_test_clobber_x0:
AARCH64_VALID_CALL_TARGET
mov x0, xzr
ret
@ -142,6 +140,7 @@ _abi_test_clobber_x0:
.private_extern _abi_test_clobber_x1
.align 4
_abi_test_clobber_x1:
AARCH64_VALID_CALL_TARGET
mov x1, xzr
ret
@ -150,6 +149,7 @@ _abi_test_clobber_x1:
.private_extern _abi_test_clobber_x2
.align 4
_abi_test_clobber_x2:
AARCH64_VALID_CALL_TARGET
mov x2, xzr
ret
@ -158,6 +158,7 @@ _abi_test_clobber_x2:
.private_extern _abi_test_clobber_x3
.align 4
_abi_test_clobber_x3:
AARCH64_VALID_CALL_TARGET
mov x3, xzr
ret
@ -166,6 +167,7 @@ _abi_test_clobber_x3:
.private_extern _abi_test_clobber_x4
.align 4
_abi_test_clobber_x4:
AARCH64_VALID_CALL_TARGET
mov x4, xzr
ret
@ -174,6 +176,7 @@ _abi_test_clobber_x4:
.private_extern _abi_test_clobber_x5
.align 4
_abi_test_clobber_x5:
AARCH64_VALID_CALL_TARGET
mov x5, xzr
ret
@ -182,6 +185,7 @@ _abi_test_clobber_x5:
.private_extern _abi_test_clobber_x6
.align 4
_abi_test_clobber_x6:
AARCH64_VALID_CALL_TARGET
mov x6, xzr
ret
@ -190,6 +194,7 @@ _abi_test_clobber_x6:
.private_extern _abi_test_clobber_x7
.align 4
_abi_test_clobber_x7:
AARCH64_VALID_CALL_TARGET
mov x7, xzr
ret
@ -198,6 +203,7 @@ _abi_test_clobber_x7:
.private_extern _abi_test_clobber_x8
.align 4
_abi_test_clobber_x8:
AARCH64_VALID_CALL_TARGET
mov x8, xzr
ret
@ -206,6 +212,7 @@ _abi_test_clobber_x8:
.private_extern _abi_test_clobber_x9
.align 4
_abi_test_clobber_x9:
AARCH64_VALID_CALL_TARGET
mov x9, xzr
ret
@ -214,6 +221,7 @@ _abi_test_clobber_x9:
.private_extern _abi_test_clobber_x10
.align 4
_abi_test_clobber_x10:
AARCH64_VALID_CALL_TARGET
mov x10, xzr
ret
@ -222,6 +230,7 @@ _abi_test_clobber_x10:
.private_extern _abi_test_clobber_x11
.align 4
_abi_test_clobber_x11:
AARCH64_VALID_CALL_TARGET
mov x11, xzr
ret
@ -230,6 +239,7 @@ _abi_test_clobber_x11:
.private_extern _abi_test_clobber_x12
.align 4
_abi_test_clobber_x12:
AARCH64_VALID_CALL_TARGET
mov x12, xzr
ret
@ -238,6 +248,7 @@ _abi_test_clobber_x12:
.private_extern _abi_test_clobber_x13
.align 4
_abi_test_clobber_x13:
AARCH64_VALID_CALL_TARGET
mov x13, xzr
ret
@ -246,6 +257,7 @@ _abi_test_clobber_x13:
.private_extern _abi_test_clobber_x14
.align 4
_abi_test_clobber_x14:
AARCH64_VALID_CALL_TARGET
mov x14, xzr
ret
@ -254,6 +266,7 @@ _abi_test_clobber_x14:
.private_extern _abi_test_clobber_x15
.align 4
_abi_test_clobber_x15:
AARCH64_VALID_CALL_TARGET
mov x15, xzr
ret
@ -262,6 +275,7 @@ _abi_test_clobber_x15:
.private_extern _abi_test_clobber_x16
.align 4
_abi_test_clobber_x16:
AARCH64_VALID_CALL_TARGET
mov x16, xzr
ret
@ -270,6 +284,7 @@ _abi_test_clobber_x16:
.private_extern _abi_test_clobber_x17
.align 4
_abi_test_clobber_x17:
AARCH64_VALID_CALL_TARGET
mov x17, xzr
ret
@ -278,6 +293,7 @@ _abi_test_clobber_x17:
.private_extern _abi_test_clobber_x19
.align 4
_abi_test_clobber_x19:
AARCH64_VALID_CALL_TARGET
mov x19, xzr
ret
@ -286,6 +302,7 @@ _abi_test_clobber_x19:
.private_extern _abi_test_clobber_x20
.align 4
_abi_test_clobber_x20:
AARCH64_VALID_CALL_TARGET
mov x20, xzr
ret
@ -294,6 +311,7 @@ _abi_test_clobber_x20:
.private_extern _abi_test_clobber_x21
.align 4
_abi_test_clobber_x21:
AARCH64_VALID_CALL_TARGET
mov x21, xzr
ret
@ -302,6 +320,7 @@ _abi_test_clobber_x21:
.private_extern _abi_test_clobber_x22
.align 4
_abi_test_clobber_x22:
AARCH64_VALID_CALL_TARGET
mov x22, xzr
ret
@ -310,6 +329,7 @@ _abi_test_clobber_x22:
.private_extern _abi_test_clobber_x23
.align 4
_abi_test_clobber_x23:
AARCH64_VALID_CALL_TARGET
mov x23, xzr
ret
@ -318,6 +338,7 @@ _abi_test_clobber_x23:
.private_extern _abi_test_clobber_x24
.align 4
_abi_test_clobber_x24:
AARCH64_VALID_CALL_TARGET
mov x24, xzr
ret
@ -326,6 +347,7 @@ _abi_test_clobber_x24:
.private_extern _abi_test_clobber_x25
.align 4
_abi_test_clobber_x25:
AARCH64_VALID_CALL_TARGET
mov x25, xzr
ret
@ -334,6 +356,7 @@ _abi_test_clobber_x25:
.private_extern _abi_test_clobber_x26
.align 4
_abi_test_clobber_x26:
AARCH64_VALID_CALL_TARGET
mov x26, xzr
ret
@ -342,6 +365,7 @@ _abi_test_clobber_x26:
.private_extern _abi_test_clobber_x27
.align 4
_abi_test_clobber_x27:
AARCH64_VALID_CALL_TARGET
mov x27, xzr
ret
@ -350,6 +374,7 @@ _abi_test_clobber_x27:
.private_extern _abi_test_clobber_x28
.align 4
_abi_test_clobber_x28:
AARCH64_VALID_CALL_TARGET
mov x28, xzr
ret
@ -358,6 +383,7 @@ _abi_test_clobber_x28:
.private_extern _abi_test_clobber_x29
.align 4
_abi_test_clobber_x29:
AARCH64_VALID_CALL_TARGET
mov x29, xzr
ret
@ -366,6 +392,7 @@ _abi_test_clobber_x29:
.private_extern _abi_test_clobber_d0
.align 4
_abi_test_clobber_d0:
AARCH64_VALID_CALL_TARGET
fmov d0, xzr
ret
@ -374,6 +401,7 @@ _abi_test_clobber_d0:
.private_extern _abi_test_clobber_d1
.align 4
_abi_test_clobber_d1:
AARCH64_VALID_CALL_TARGET
fmov d1, xzr
ret
@ -382,6 +410,7 @@ _abi_test_clobber_d1:
.private_extern _abi_test_clobber_d2
.align 4
_abi_test_clobber_d2:
AARCH64_VALID_CALL_TARGET
fmov d2, xzr
ret
@ -390,6 +419,7 @@ _abi_test_clobber_d2:
.private_extern _abi_test_clobber_d3
.align 4
_abi_test_clobber_d3:
AARCH64_VALID_CALL_TARGET
fmov d3, xzr
ret
@ -398,6 +428,7 @@ _abi_test_clobber_d3:
.private_extern _abi_test_clobber_d4
.align 4
_abi_test_clobber_d4:
AARCH64_VALID_CALL_TARGET
fmov d4, xzr
ret
@ -406,6 +437,7 @@ _abi_test_clobber_d4:
.private_extern _abi_test_clobber_d5
.align 4
_abi_test_clobber_d5:
AARCH64_VALID_CALL_TARGET
fmov d5, xzr
ret
@ -414,6 +446,7 @@ _abi_test_clobber_d5:
.private_extern _abi_test_clobber_d6
.align 4
_abi_test_clobber_d6:
AARCH64_VALID_CALL_TARGET
fmov d6, xzr
ret
@ -422,6 +455,7 @@ _abi_test_clobber_d6:
.private_extern _abi_test_clobber_d7
.align 4
_abi_test_clobber_d7:
AARCH64_VALID_CALL_TARGET
fmov d7, xzr
ret
@ -430,6 +464,7 @@ _abi_test_clobber_d7:
.private_extern _abi_test_clobber_d8
.align 4
_abi_test_clobber_d8:
AARCH64_VALID_CALL_TARGET
fmov d8, xzr
ret
@ -438,6 +473,7 @@ _abi_test_clobber_d8:
.private_extern _abi_test_clobber_d9
.align 4
_abi_test_clobber_d9:
AARCH64_VALID_CALL_TARGET
fmov d9, xzr
ret
@ -446,6 +482,7 @@ _abi_test_clobber_d9:
.private_extern _abi_test_clobber_d10
.align 4
_abi_test_clobber_d10:
AARCH64_VALID_CALL_TARGET
fmov d10, xzr
ret
@ -454,6 +491,7 @@ _abi_test_clobber_d10:
.private_extern _abi_test_clobber_d11
.align 4
_abi_test_clobber_d11:
AARCH64_VALID_CALL_TARGET
fmov d11, xzr
ret
@ -462,6 +500,7 @@ _abi_test_clobber_d11:
.private_extern _abi_test_clobber_d12
.align 4
_abi_test_clobber_d12:
AARCH64_VALID_CALL_TARGET
fmov d12, xzr
ret
@ -470,6 +509,7 @@ _abi_test_clobber_d12:
.private_extern _abi_test_clobber_d13
.align 4
_abi_test_clobber_d13:
AARCH64_VALID_CALL_TARGET
fmov d13, xzr
ret
@ -478,6 +518,7 @@ _abi_test_clobber_d13:
.private_extern _abi_test_clobber_d14
.align 4
_abi_test_clobber_d14:
AARCH64_VALID_CALL_TARGET
fmov d14, xzr
ret
@ -486,6 +527,7 @@ _abi_test_clobber_d14:
.private_extern _abi_test_clobber_d15
.align 4
_abi_test_clobber_d15:
AARCH64_VALID_CALL_TARGET
fmov d15, xzr
ret
@ -494,6 +536,7 @@ _abi_test_clobber_d15:
.private_extern _abi_test_clobber_d16
.align 4
_abi_test_clobber_d16:
AARCH64_VALID_CALL_TARGET
fmov d16, xzr
ret
@ -502,6 +545,7 @@ _abi_test_clobber_d16:
.private_extern _abi_test_clobber_d17
.align 4
_abi_test_clobber_d17:
AARCH64_VALID_CALL_TARGET
fmov d17, xzr
ret
@ -510,6 +554,7 @@ _abi_test_clobber_d17:
.private_extern _abi_test_clobber_d18
.align 4
_abi_test_clobber_d18:
AARCH64_VALID_CALL_TARGET
fmov d18, xzr
ret
@ -518,6 +563,7 @@ _abi_test_clobber_d18:
.private_extern _abi_test_clobber_d19
.align 4
_abi_test_clobber_d19:
AARCH64_VALID_CALL_TARGET
fmov d19, xzr
ret
@ -526,6 +572,7 @@ _abi_test_clobber_d19:
.private_extern _abi_test_clobber_d20
.align 4
_abi_test_clobber_d20:
AARCH64_VALID_CALL_TARGET
fmov d20, xzr
ret
@ -534,6 +581,7 @@ _abi_test_clobber_d20:
.private_extern _abi_test_clobber_d21
.align 4
_abi_test_clobber_d21:
AARCH64_VALID_CALL_TARGET
fmov d21, xzr
ret
@ -542,6 +590,7 @@ _abi_test_clobber_d21:
.private_extern _abi_test_clobber_d22
.align 4
_abi_test_clobber_d22:
AARCH64_VALID_CALL_TARGET
fmov d22, xzr
ret
@ -550,6 +599,7 @@ _abi_test_clobber_d22:
.private_extern _abi_test_clobber_d23
.align 4
_abi_test_clobber_d23:
AARCH64_VALID_CALL_TARGET
fmov d23, xzr
ret
@ -558,6 +608,7 @@ _abi_test_clobber_d23:
.private_extern _abi_test_clobber_d24
.align 4
_abi_test_clobber_d24:
AARCH64_VALID_CALL_TARGET
fmov d24, xzr
ret
@ -566,6 +617,7 @@ _abi_test_clobber_d24:
.private_extern _abi_test_clobber_d25
.align 4
_abi_test_clobber_d25:
AARCH64_VALID_CALL_TARGET
fmov d25, xzr
ret
@ -574,6 +626,7 @@ _abi_test_clobber_d25:
.private_extern _abi_test_clobber_d26
.align 4
_abi_test_clobber_d26:
AARCH64_VALID_CALL_TARGET
fmov d26, xzr
ret
@ -582,6 +635,7 @@ _abi_test_clobber_d26:
.private_extern _abi_test_clobber_d27
.align 4
_abi_test_clobber_d27:
AARCH64_VALID_CALL_TARGET
fmov d27, xzr
ret
@ -590,6 +644,7 @@ _abi_test_clobber_d27:
.private_extern _abi_test_clobber_d28
.align 4
_abi_test_clobber_d28:
AARCH64_VALID_CALL_TARGET
fmov d28, xzr
ret
@ -598,6 +653,7 @@ _abi_test_clobber_d28:
.private_extern _abi_test_clobber_d29
.align 4
_abi_test_clobber_d29:
AARCH64_VALID_CALL_TARGET
fmov d29, xzr
ret
@ -606,6 +662,7 @@ _abi_test_clobber_d29:
.private_extern _abi_test_clobber_d30
.align 4
_abi_test_clobber_d30:
AARCH64_VALID_CALL_TARGET
fmov d30, xzr
ret
@ -614,6 +671,7 @@ _abi_test_clobber_d30:
.private_extern _abi_test_clobber_d31
.align 4
_abi_test_clobber_d31:
AARCH64_VALID_CALL_TARGET
fmov d31, xzr
ret
@ -622,6 +680,7 @@ _abi_test_clobber_d31:
.private_extern _abi_test_clobber_v8_upper
.align 4
_abi_test_clobber_v8_upper:
AARCH64_VALID_CALL_TARGET
fmov v8.d[1], xzr
ret
@ -630,6 +689,7 @@ _abi_test_clobber_v8_upper:
.private_extern _abi_test_clobber_v9_upper
.align 4
_abi_test_clobber_v9_upper:
AARCH64_VALID_CALL_TARGET
fmov v9.d[1], xzr
ret
@ -638,6 +698,7 @@ _abi_test_clobber_v9_upper:
.private_extern _abi_test_clobber_v10_upper
.align 4
_abi_test_clobber_v10_upper:
AARCH64_VALID_CALL_TARGET
fmov v10.d[1], xzr
ret
@ -646,6 +707,7 @@ _abi_test_clobber_v10_upper:
.private_extern _abi_test_clobber_v11_upper
.align 4
_abi_test_clobber_v11_upper:
AARCH64_VALID_CALL_TARGET
fmov v11.d[1], xzr
ret
@ -654,6 +716,7 @@ _abi_test_clobber_v11_upper:
.private_extern _abi_test_clobber_v12_upper
.align 4
_abi_test_clobber_v12_upper:
AARCH64_VALID_CALL_TARGET
fmov v12.d[1], xzr
ret
@ -662,6 +725,7 @@ _abi_test_clobber_v12_upper:
.private_extern _abi_test_clobber_v13_upper
.align 4
_abi_test_clobber_v13_upper:
AARCH64_VALID_CALL_TARGET
fmov v13.d[1], xzr
ret
@ -670,6 +734,7 @@ _abi_test_clobber_v13_upper:
.private_extern _abi_test_clobber_v14_upper
.align 4
_abi_test_clobber_v14_upper:
AARCH64_VALID_CALL_TARGET
fmov v14.d[1], xzr
ret
@ -678,7 +743,8 @@ _abi_test_clobber_v14_upper:
.private_extern _abi_test_clobber_v15_upper
.align 4
_abi_test_clobber_v15_upper:
AARCH64_VALID_CALL_TARGET
fmov v15.d[1], xzr
ret
#endif // !OPENSSL_NO_ASM
#endif // !OPENSSL_NO_ASM && defined(OPENSSL_AARCH64) && defined(__APPLE__)

View File

@ -0,0 +1,957 @@
// This file is generated from a similarly-named Perl script in the BoringSSL
// source tree. Do not edit by hand.
#include <openssl/asm_base.h>
#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) && defined(__APPLE__)
.text
.globl _ChaCha20_ctr32_nohw
.private_extern _ChaCha20_ctr32_nohw
.align 4
_ChaCha20_ctr32_nohw:
L_ChaCha20_ctr32_nohw_begin:
pushl %ebp
pushl %ebx
pushl %esi
pushl %edi
movl 32(%esp),%esi
movl 36(%esp),%edi
subl $132,%esp
movl (%esi),%eax
movl 4(%esi),%ebx
movl 8(%esi),%ecx
movl 12(%esi),%edx
movl %eax,80(%esp)
movl %ebx,84(%esp)
movl %ecx,88(%esp)
movl %edx,92(%esp)
movl 16(%esi),%eax
movl 20(%esi),%ebx
movl 24(%esi),%ecx
movl 28(%esi),%edx
movl %eax,96(%esp)
movl %ebx,100(%esp)
movl %ecx,104(%esp)
movl %edx,108(%esp)
movl (%edi),%eax
movl 4(%edi),%ebx
movl 8(%edi),%ecx
movl 12(%edi),%edx
subl $1,%eax
movl %eax,112(%esp)
movl %ebx,116(%esp)
movl %ecx,120(%esp)
movl %edx,124(%esp)
jmp L000entry
.align 4,0x90
L001outer_loop:
movl %ebx,156(%esp)
movl %eax,152(%esp)
movl %ecx,160(%esp)
L000entry:
movl $1634760805,%eax
movl $857760878,4(%esp)
movl $2036477234,8(%esp)
movl $1797285236,12(%esp)
movl 84(%esp),%ebx
movl 88(%esp),%ebp
movl 104(%esp),%ecx
movl 108(%esp),%esi
movl 116(%esp),%edx
movl 120(%esp),%edi
movl %ebx,20(%esp)
movl %ebp,24(%esp)
movl %ecx,40(%esp)
movl %esi,44(%esp)
movl %edx,52(%esp)
movl %edi,56(%esp)
movl 92(%esp),%ebx
movl 124(%esp),%edi
movl 112(%esp),%edx
movl 80(%esp),%ebp
movl 96(%esp),%ecx
movl 100(%esp),%esi
addl $1,%edx
movl %ebx,28(%esp)
movl %edi,60(%esp)
movl %edx,112(%esp)
movl $10,%ebx
jmp L002loop
.align 4,0x90
L002loop:
addl %ebp,%eax
movl %ebx,128(%esp)
movl %ebp,%ebx
xorl %eax,%edx
roll $16,%edx
addl %edx,%ecx
xorl %ecx,%ebx
movl 52(%esp),%edi
roll $12,%ebx
movl 20(%esp),%ebp
addl %ebx,%eax
xorl %eax,%edx
movl %eax,(%esp)
roll $8,%edx
movl 4(%esp),%eax
addl %edx,%ecx
movl %edx,48(%esp)
xorl %ecx,%ebx
addl %ebp,%eax
roll $7,%ebx
xorl %eax,%edi
movl %ecx,32(%esp)
roll $16,%edi
movl %ebx,16(%esp)
addl %edi,%esi
movl 40(%esp),%ecx
xorl %esi,%ebp
movl 56(%esp),%edx
roll $12,%ebp
movl 24(%esp),%ebx
addl %ebp,%eax
xorl %eax,%edi
movl %eax,4(%esp)
roll $8,%edi
movl 8(%esp),%eax
addl %edi,%esi
movl %edi,52(%esp)
xorl %esi,%ebp
addl %ebx,%eax
roll $7,%ebp
xorl %eax,%edx
movl %esi,36(%esp)
roll $16,%edx
movl %ebp,20(%esp)
addl %edx,%ecx
movl 44(%esp),%esi
xorl %ecx,%ebx
movl 60(%esp),%edi
roll $12,%ebx
movl 28(%esp),%ebp
addl %ebx,%eax
xorl %eax,%edx
movl %eax,8(%esp)
roll $8,%edx
movl 12(%esp),%eax
addl %edx,%ecx
movl %edx,56(%esp)
xorl %ecx,%ebx
addl %ebp,%eax
roll $7,%ebx
xorl %eax,%edi
roll $16,%edi
movl %ebx,24(%esp)
addl %edi,%esi
xorl %esi,%ebp
roll $12,%ebp
movl 20(%esp),%ebx
addl %ebp,%eax
xorl %eax,%edi
movl %eax,12(%esp)
roll $8,%edi
movl (%esp),%eax
addl %edi,%esi
movl %edi,%edx
xorl %esi,%ebp
addl %ebx,%eax
roll $7,%ebp
xorl %eax,%edx
roll $16,%edx
movl %ebp,28(%esp)
addl %edx,%ecx
xorl %ecx,%ebx
movl 48(%esp),%edi
roll $12,%ebx
movl 24(%esp),%ebp
addl %ebx,%eax
xorl %eax,%edx
movl %eax,(%esp)
roll $8,%edx
movl 4(%esp),%eax
addl %edx,%ecx
movl %edx,60(%esp)
xorl %ecx,%ebx
addl %ebp,%eax
roll $7,%ebx
xorl %eax,%edi
movl %ecx,40(%esp)
roll $16,%edi
movl %ebx,20(%esp)
addl %edi,%esi
movl 32(%esp),%ecx
xorl %esi,%ebp
movl 52(%esp),%edx
roll $12,%ebp
movl 28(%esp),%ebx
addl %ebp,%eax
xorl %eax,%edi
movl %eax,4(%esp)
roll $8,%edi
movl 8(%esp),%eax
addl %edi,%esi
movl %edi,48(%esp)
xorl %esi,%ebp
addl %ebx,%eax
roll $7,%ebp
xorl %eax,%edx
movl %esi,44(%esp)
roll $16,%edx
movl %ebp,24(%esp)
addl %edx,%ecx
movl 36(%esp),%esi
xorl %ecx,%ebx
movl 56(%esp),%edi
roll $12,%ebx
movl 16(%esp),%ebp
addl %ebx,%eax
xorl %eax,%edx
movl %eax,8(%esp)
roll $8,%edx
movl 12(%esp),%eax
addl %edx,%ecx
movl %edx,52(%esp)
xorl %ecx,%ebx
addl %ebp,%eax
roll $7,%ebx
xorl %eax,%edi
roll $16,%edi
movl %ebx,28(%esp)
addl %edi,%esi
xorl %esi,%ebp
movl 48(%esp),%edx
roll $12,%ebp
movl 128(%esp),%ebx
addl %ebp,%eax
xorl %eax,%edi
movl %eax,12(%esp)
roll $8,%edi
movl (%esp),%eax
addl %edi,%esi
movl %edi,56(%esp)
xorl %esi,%ebp
roll $7,%ebp
decl %ebx
jnz L002loop
movl 160(%esp),%ebx
addl $1634760805,%eax
addl 80(%esp),%ebp
addl 96(%esp),%ecx
addl 100(%esp),%esi
cmpl $64,%ebx
jb L003tail
movl 156(%esp),%ebx
addl 112(%esp),%edx
addl 120(%esp),%edi
xorl (%ebx),%eax
xorl 16(%ebx),%ebp
movl %eax,(%esp)
movl 152(%esp),%eax
xorl 32(%ebx),%ecx
xorl 36(%ebx),%esi
xorl 48(%ebx),%edx
xorl 56(%ebx),%edi
movl %ebp,16(%eax)
movl %ecx,32(%eax)
movl %esi,36(%eax)
movl %edx,48(%eax)
movl %edi,56(%eax)
movl 4(%esp),%ebp
movl 8(%esp),%ecx
movl 12(%esp),%esi
movl 20(%esp),%edx
movl 24(%esp),%edi
addl $857760878,%ebp
addl $2036477234,%ecx
addl $1797285236,%esi
addl 84(%esp),%edx
addl 88(%esp),%edi
xorl 4(%ebx),%ebp
xorl 8(%ebx),%ecx
xorl 12(%ebx),%esi
xorl 20(%ebx),%edx
xorl 24(%ebx),%edi
movl %ebp,4(%eax)
movl %ecx,8(%eax)
movl %esi,12(%eax)
movl %edx,20(%eax)
movl %edi,24(%eax)
movl 28(%esp),%ebp
movl 40(%esp),%ecx
movl 44(%esp),%esi
movl 52(%esp),%edx
movl 60(%esp),%edi
addl 92(%esp),%ebp
addl 104(%esp),%ecx
addl 108(%esp),%esi
addl 116(%esp),%edx
addl 124(%esp),%edi
xorl 28(%ebx),%ebp
xorl 40(%ebx),%ecx
xorl 44(%ebx),%esi
xorl 52(%ebx),%edx
xorl 60(%ebx),%edi
leal 64(%ebx),%ebx
movl %ebp,28(%eax)
movl (%esp),%ebp
movl %ecx,40(%eax)
movl 160(%esp),%ecx
movl %esi,44(%eax)
movl %edx,52(%eax)
movl %edi,60(%eax)
movl %ebp,(%eax)
leal 64(%eax),%eax
subl $64,%ecx
jnz L001outer_loop
jmp L004done
L003tail:
addl 112(%esp),%edx
addl 120(%esp),%edi
movl %eax,(%esp)
movl %ebp,16(%esp)
movl %ecx,32(%esp)
movl %esi,36(%esp)
movl %edx,48(%esp)
movl %edi,56(%esp)
movl 4(%esp),%ebp
movl 8(%esp),%ecx
movl 12(%esp),%esi
movl 20(%esp),%edx
movl 24(%esp),%edi
addl $857760878,%ebp
addl $2036477234,%ecx
addl $1797285236,%esi
addl 84(%esp),%edx
addl 88(%esp),%edi
movl %ebp,4(%esp)
movl %ecx,8(%esp)
movl %esi,12(%esp)
movl %edx,20(%esp)
movl %edi,24(%esp)
movl 28(%esp),%ebp
movl 40(%esp),%ecx
movl 44(%esp),%esi
movl 52(%esp),%edx
movl 60(%esp),%edi
addl 92(%esp),%ebp
addl 104(%esp),%ecx
addl 108(%esp),%esi
addl 116(%esp),%edx
addl 124(%esp),%edi
movl %ebp,28(%esp)
movl 156(%esp),%ebp
movl %ecx,40(%esp)
movl 152(%esp),%ecx
movl %esi,44(%esp)
xorl %esi,%esi
movl %edx,52(%esp)
movl %edi,60(%esp)
xorl %eax,%eax
xorl %edx,%edx
L005tail_loop:
movb (%esi,%ebp,1),%al
movb (%esp,%esi,1),%dl
leal 1(%esi),%esi
xorb %dl,%al
movb %al,-1(%ecx,%esi,1)
decl %ebx
jnz L005tail_loop
L004done:
addl $132,%esp
popl %edi
popl %esi
popl %ebx
popl %ebp
ret
.globl _ChaCha20_ctr32_ssse3
.private_extern _ChaCha20_ctr32_ssse3
.align 4
_ChaCha20_ctr32_ssse3:
L_ChaCha20_ctr32_ssse3_begin:
pushl %ebp
pushl %ebx
pushl %esi
pushl %edi
call Lpic_point
Lpic_point:
popl %eax
movl 20(%esp),%edi
movl 24(%esp),%esi
movl 28(%esp),%ecx
movl 32(%esp),%edx
movl 36(%esp),%ebx
movl %esp,%ebp
subl $524,%esp
andl $-64,%esp
movl %ebp,512(%esp)
leal Lssse3_data-Lpic_point(%eax),%eax
movdqu (%ebx),%xmm3
cmpl $256,%ecx
jb L0061x
movl %edx,516(%esp)
movl %ebx,520(%esp)
subl $256,%ecx
leal 384(%esp),%ebp
movdqu (%edx),%xmm7
pshufd $0,%xmm3,%xmm0
pshufd $85,%xmm3,%xmm1
pshufd $170,%xmm3,%xmm2
pshufd $255,%xmm3,%xmm3
paddd 48(%eax),%xmm0
pshufd $0,%xmm7,%xmm4
pshufd $85,%xmm7,%xmm5
psubd 64(%eax),%xmm0
pshufd $170,%xmm7,%xmm6
pshufd $255,%xmm7,%xmm7
movdqa %xmm0,64(%ebp)
movdqa %xmm1,80(%ebp)
movdqa %xmm2,96(%ebp)
movdqa %xmm3,112(%ebp)
movdqu 16(%edx),%xmm3
movdqa %xmm4,-64(%ebp)
movdqa %xmm5,-48(%ebp)
movdqa %xmm6,-32(%ebp)
movdqa %xmm7,-16(%ebp)
movdqa 32(%eax),%xmm7
leal 128(%esp),%ebx
pshufd $0,%xmm3,%xmm0
pshufd $85,%xmm3,%xmm1
pshufd $170,%xmm3,%xmm2
pshufd $255,%xmm3,%xmm3
pshufd $0,%xmm7,%xmm4
pshufd $85,%xmm7,%xmm5
pshufd $170,%xmm7,%xmm6
pshufd $255,%xmm7,%xmm7
movdqa %xmm0,(%ebp)
movdqa %xmm1,16(%ebp)
movdqa %xmm2,32(%ebp)
movdqa %xmm3,48(%ebp)
movdqa %xmm4,-128(%ebp)
movdqa %xmm5,-112(%ebp)
movdqa %xmm6,-96(%ebp)
movdqa %xmm7,-80(%ebp)
leal 128(%esi),%esi
leal 128(%edi),%edi
jmp L007outer_loop
.align 4,0x90
L007outer_loop:
movdqa -112(%ebp),%xmm1
movdqa -96(%ebp),%xmm2
movdqa -80(%ebp),%xmm3
movdqa -48(%ebp),%xmm5
movdqa -32(%ebp),%xmm6
movdqa -16(%ebp),%xmm7
movdqa %xmm1,-112(%ebx)
movdqa %xmm2,-96(%ebx)
movdqa %xmm3,-80(%ebx)
movdqa %xmm5,-48(%ebx)
movdqa %xmm6,-32(%ebx)
movdqa %xmm7,-16(%ebx)
movdqa 32(%ebp),%xmm2
movdqa 48(%ebp),%xmm3
movdqa 64(%ebp),%xmm4
movdqa 80(%ebp),%xmm5
movdqa 96(%ebp),%xmm6
movdqa 112(%ebp),%xmm7
paddd 64(%eax),%xmm4
movdqa %xmm2,32(%ebx)
movdqa %xmm3,48(%ebx)
movdqa %xmm4,64(%ebx)
movdqa %xmm5,80(%ebx)
movdqa %xmm6,96(%ebx)
movdqa %xmm7,112(%ebx)
movdqa %xmm4,64(%ebp)
movdqa -128(%ebp),%xmm0
movdqa %xmm4,%xmm6
movdqa -64(%ebp),%xmm3
movdqa (%ebp),%xmm4
movdqa 16(%ebp),%xmm5
movl $10,%edx
nop
.align 4,0x90
L008loop:
paddd %xmm3,%xmm0
movdqa %xmm3,%xmm2
pxor %xmm0,%xmm6
pshufb (%eax),%xmm6
paddd %xmm6,%xmm4
pxor %xmm4,%xmm2
movdqa -48(%ebx),%xmm3
movdqa %xmm2,%xmm1
pslld $12,%xmm2
psrld $20,%xmm1
por %xmm1,%xmm2
movdqa -112(%ebx),%xmm1
paddd %xmm2,%xmm0
movdqa 80(%ebx),%xmm7
pxor %xmm0,%xmm6
movdqa %xmm0,-128(%ebx)
pshufb 16(%eax),%xmm6
paddd %xmm6,%xmm4
movdqa %xmm6,64(%ebx)
pxor %xmm4,%xmm2
paddd %xmm3,%xmm1
movdqa %xmm2,%xmm0
pslld $7,%xmm2
psrld $25,%xmm0
pxor %xmm1,%xmm7
por %xmm0,%xmm2
movdqa %xmm4,(%ebx)
pshufb (%eax),%xmm7
movdqa %xmm2,-64(%ebx)
paddd %xmm7,%xmm5
movdqa 32(%ebx),%xmm4
pxor %xmm5,%xmm3
movdqa -32(%ebx),%xmm2
movdqa %xmm3,%xmm0
pslld $12,%xmm3
psrld $20,%xmm0
por %xmm0,%xmm3
movdqa -96(%ebx),%xmm0
paddd %xmm3,%xmm1
movdqa 96(%ebx),%xmm6
pxor %xmm1,%xmm7
movdqa %xmm1,-112(%ebx)
pshufb 16(%eax),%xmm7
paddd %xmm7,%xmm5
movdqa %xmm7,80(%ebx)
pxor %xmm5,%xmm3
paddd %xmm2,%xmm0
movdqa %xmm3,%xmm1
pslld $7,%xmm3
psrld $25,%xmm1
pxor %xmm0,%xmm6
por %xmm1,%xmm3
movdqa %xmm5,16(%ebx)
pshufb (%eax),%xmm6
movdqa %xmm3,-48(%ebx)
paddd %xmm6,%xmm4
movdqa 48(%ebx),%xmm5
pxor %xmm4,%xmm2
movdqa -16(%ebx),%xmm3
movdqa %xmm2,%xmm1
pslld $12,%xmm2
psrld $20,%xmm1
por %xmm1,%xmm2
movdqa -80(%ebx),%xmm1
paddd %xmm2,%xmm0
movdqa 112(%ebx),%xmm7
pxor %xmm0,%xmm6
movdqa %xmm0,-96(%ebx)
pshufb 16(%eax),%xmm6
paddd %xmm6,%xmm4
movdqa %xmm6,96(%ebx)
pxor %xmm4,%xmm2
paddd %xmm3,%xmm1
movdqa %xmm2,%xmm0
pslld $7,%xmm2
psrld $25,%xmm0
pxor %xmm1,%xmm7
por %xmm0,%xmm2
pshufb (%eax),%xmm7
movdqa %xmm2,-32(%ebx)
paddd %xmm7,%xmm5
pxor %xmm5,%xmm3
movdqa -48(%ebx),%xmm2
movdqa %xmm3,%xmm0
pslld $12,%xmm3
psrld $20,%xmm0
por %xmm0,%xmm3
movdqa -128(%ebx),%xmm0
paddd %xmm3,%xmm1
pxor %xmm1,%xmm7
movdqa %xmm1,-80(%ebx)
pshufb 16(%eax),%xmm7
paddd %xmm7,%xmm5
movdqa %xmm7,%xmm6
pxor %xmm5,%xmm3
paddd %xmm2,%xmm0
movdqa %xmm3,%xmm1
pslld $7,%xmm3
psrld $25,%xmm1
pxor %xmm0,%xmm6
por %xmm1,%xmm3
pshufb (%eax),%xmm6
movdqa %xmm3,-16(%ebx)
paddd %xmm6,%xmm4
pxor %xmm4,%xmm2
movdqa -32(%ebx),%xmm3
movdqa %xmm2,%xmm1
pslld $12,%xmm2
psrld $20,%xmm1
por %xmm1,%xmm2
movdqa -112(%ebx),%xmm1
paddd %xmm2,%xmm0
movdqa 64(%ebx),%xmm7
pxor %xmm0,%xmm6
movdqa %xmm0,-128(%ebx)
pshufb 16(%eax),%xmm6
paddd %xmm6,%xmm4
movdqa %xmm6,112(%ebx)
pxor %xmm4,%xmm2
paddd %xmm3,%xmm1
movdqa %xmm2,%xmm0
pslld $7,%xmm2
psrld $25,%xmm0
pxor %xmm1,%xmm7
por %xmm0,%xmm2
movdqa %xmm4,32(%ebx)
pshufb (%eax),%xmm7
movdqa %xmm2,-48(%ebx)
paddd %xmm7,%xmm5
movdqa (%ebx),%xmm4
pxor %xmm5,%xmm3
movdqa -16(%ebx),%xmm2
movdqa %xmm3,%xmm0
pslld $12,%xmm3
psrld $20,%xmm0
por %xmm0,%xmm3
movdqa -96(%ebx),%xmm0
paddd %xmm3,%xmm1
movdqa 80(%ebx),%xmm6
pxor %xmm1,%xmm7
movdqa %xmm1,-112(%ebx)
pshufb 16(%eax),%xmm7
paddd %xmm7,%xmm5
movdqa %xmm7,64(%ebx)
pxor %xmm5,%xmm3
paddd %xmm2,%xmm0
movdqa %xmm3,%xmm1
pslld $7,%xmm3
psrld $25,%xmm1
pxor %xmm0,%xmm6
por %xmm1,%xmm3
movdqa %xmm5,48(%ebx)
pshufb (%eax),%xmm6
movdqa %xmm3,-32(%ebx)
paddd %xmm6,%xmm4
movdqa 16(%ebx),%xmm5
pxor %xmm4,%xmm2
movdqa -64(%ebx),%xmm3
movdqa %xmm2,%xmm1
pslld $12,%xmm2
psrld $20,%xmm1
por %xmm1,%xmm2
movdqa -80(%ebx),%xmm1
paddd %xmm2,%xmm0
movdqa 96(%ebx),%xmm7
pxor %xmm0,%xmm6
movdqa %xmm0,-96(%ebx)
pshufb 16(%eax),%xmm6
paddd %xmm6,%xmm4
movdqa %xmm6,80(%ebx)
pxor %xmm4,%xmm2
paddd %xmm3,%xmm1
movdqa %xmm2,%xmm0
pslld $7,%xmm2
psrld $25,%xmm0
pxor %xmm1,%xmm7
por %xmm0,%xmm2
pshufb (%eax),%xmm7
movdqa %xmm2,-16(%ebx)
paddd %xmm7,%xmm5
pxor %xmm5,%xmm3
movdqa %xmm3,%xmm0
pslld $12,%xmm3
psrld $20,%xmm0
por %xmm0,%xmm3
movdqa -128(%ebx),%xmm0
paddd %xmm3,%xmm1
movdqa 64(%ebx),%xmm6
pxor %xmm1,%xmm7
movdqa %xmm1,-80(%ebx)
pshufb 16(%eax),%xmm7
paddd %xmm7,%xmm5
movdqa %xmm7,96(%ebx)
pxor %xmm5,%xmm3
movdqa %xmm3,%xmm1
pslld $7,%xmm3
psrld $25,%xmm1
por %xmm1,%xmm3
decl %edx
jnz L008loop
movdqa %xmm3,-64(%ebx)
movdqa %xmm4,(%ebx)
movdqa %xmm5,16(%ebx)
movdqa %xmm6,64(%ebx)
movdqa %xmm7,96(%ebx)
movdqa -112(%ebx),%xmm1
movdqa -96(%ebx),%xmm2
movdqa -80(%ebx),%xmm3
paddd -128(%ebp),%xmm0
paddd -112(%ebp),%xmm1
paddd -96(%ebp),%xmm2
paddd -80(%ebp),%xmm3
movdqa %xmm0,%xmm6
punpckldq %xmm1,%xmm0
movdqa %xmm2,%xmm7
punpckldq %xmm3,%xmm2
punpckhdq %xmm1,%xmm6
punpckhdq %xmm3,%xmm7
movdqa %xmm0,%xmm1
punpcklqdq %xmm2,%xmm0
movdqa %xmm6,%xmm3
punpcklqdq %xmm7,%xmm6
punpckhqdq %xmm2,%xmm1
punpckhqdq %xmm7,%xmm3
movdqu -128(%esi),%xmm4
movdqu -64(%esi),%xmm5
movdqu (%esi),%xmm2
movdqu 64(%esi),%xmm7
leal 16(%esi),%esi
pxor %xmm0,%xmm4
movdqa -64(%ebx),%xmm0
pxor %xmm1,%xmm5
movdqa -48(%ebx),%xmm1
pxor %xmm2,%xmm6
movdqa -32(%ebx),%xmm2
pxor %xmm3,%xmm7
movdqa -16(%ebx),%xmm3
movdqu %xmm4,-128(%edi)
movdqu %xmm5,-64(%edi)
movdqu %xmm6,(%edi)
movdqu %xmm7,64(%edi)
leal 16(%edi),%edi
paddd -64(%ebp),%xmm0
paddd -48(%ebp),%xmm1
paddd -32(%ebp),%xmm2
paddd -16(%ebp),%xmm3
movdqa %xmm0,%xmm6
punpckldq %xmm1,%xmm0
movdqa %xmm2,%xmm7
punpckldq %xmm3,%xmm2
punpckhdq %xmm1,%xmm6
punpckhdq %xmm3,%xmm7
movdqa %xmm0,%xmm1
punpcklqdq %xmm2,%xmm0
movdqa %xmm6,%xmm3
punpcklqdq %xmm7,%xmm6
punpckhqdq %xmm2,%xmm1
punpckhqdq %xmm7,%xmm3
movdqu -128(%esi),%xmm4
movdqu -64(%esi),%xmm5
movdqu (%esi),%xmm2
movdqu 64(%esi),%xmm7
leal 16(%esi),%esi
pxor %xmm0,%xmm4
movdqa (%ebx),%xmm0
pxor %xmm1,%xmm5
movdqa 16(%ebx),%xmm1
pxor %xmm2,%xmm6
movdqa 32(%ebx),%xmm2
pxor %xmm3,%xmm7
movdqa 48(%ebx),%xmm3
movdqu %xmm4,-128(%edi)
movdqu %xmm5,-64(%edi)
movdqu %xmm6,(%edi)
movdqu %xmm7,64(%edi)
leal 16(%edi),%edi
paddd (%ebp),%xmm0
paddd 16(%ebp),%xmm1
paddd 32(%ebp),%xmm2
paddd 48(%ebp),%xmm3
movdqa %xmm0,%xmm6
punpckldq %xmm1,%xmm0
movdqa %xmm2,%xmm7
punpckldq %xmm3,%xmm2
punpckhdq %xmm1,%xmm6
punpckhdq %xmm3,%xmm7
movdqa %xmm0,%xmm1
punpcklqdq %xmm2,%xmm0
movdqa %xmm6,%xmm3
punpcklqdq %xmm7,%xmm6
punpckhqdq %xmm2,%xmm1
punpckhqdq %xmm7,%xmm3
movdqu -128(%esi),%xmm4
movdqu -64(%esi),%xmm5
movdqu (%esi),%xmm2
movdqu 64(%esi),%xmm7
leal 16(%esi),%esi
pxor %xmm0,%xmm4
movdqa 64(%ebx),%xmm0
pxor %xmm1,%xmm5
movdqa 80(%ebx),%xmm1
pxor %xmm2,%xmm6
movdqa 96(%ebx),%xmm2
pxor %xmm3,%xmm7
movdqa 112(%ebx),%xmm3
movdqu %xmm4,-128(%edi)
movdqu %xmm5,-64(%edi)
movdqu %xmm6,(%edi)
movdqu %xmm7,64(%edi)
leal 16(%edi),%edi
paddd 64(%ebp),%xmm0
paddd 80(%ebp),%xmm1
paddd 96(%ebp),%xmm2
paddd 112(%ebp),%xmm3
movdqa %xmm0,%xmm6
punpckldq %xmm1,%xmm0
movdqa %xmm2,%xmm7
punpckldq %xmm3,%xmm2
punpckhdq %xmm1,%xmm6
punpckhdq %xmm3,%xmm7
movdqa %xmm0,%xmm1
punpcklqdq %xmm2,%xmm0
movdqa %xmm6,%xmm3
punpcklqdq %xmm7,%xmm6
punpckhqdq %xmm2,%xmm1
punpckhqdq %xmm7,%xmm3
movdqu -128(%esi),%xmm4
movdqu -64(%esi),%xmm5
movdqu (%esi),%xmm2
movdqu 64(%esi),%xmm7
leal 208(%esi),%esi
pxor %xmm0,%xmm4
pxor %xmm1,%xmm5
pxor %xmm2,%xmm6
pxor %xmm3,%xmm7
movdqu %xmm4,-128(%edi)
movdqu %xmm5,-64(%edi)
movdqu %xmm6,(%edi)
movdqu %xmm7,64(%edi)
leal 208(%edi),%edi
subl $256,%ecx
jnc L007outer_loop
addl $256,%ecx
jz L009done
movl 520(%esp),%ebx
leal -128(%esi),%esi
movl 516(%esp),%edx
leal -128(%edi),%edi
movd 64(%ebp),%xmm2
movdqu (%ebx),%xmm3
paddd 96(%eax),%xmm2
pand 112(%eax),%xmm3
por %xmm2,%xmm3
L0061x:
movdqa 32(%eax),%xmm0
movdqu (%edx),%xmm1
movdqu 16(%edx),%xmm2
movdqa (%eax),%xmm6
movdqa 16(%eax),%xmm7
movl %ebp,48(%esp)
movdqa %xmm0,(%esp)
movdqa %xmm1,16(%esp)
movdqa %xmm2,32(%esp)
movdqa %xmm3,48(%esp)
movl $10,%edx
jmp L010loop1x
.align 4,0x90
L011outer1x:
movdqa 80(%eax),%xmm3
movdqa (%esp),%xmm0
movdqa 16(%esp),%xmm1
movdqa 32(%esp),%xmm2
paddd 48(%esp),%xmm3
movl $10,%edx
movdqa %xmm3,48(%esp)
jmp L010loop1x
.align 4,0x90
L010loop1x:
paddd %xmm1,%xmm0
pxor %xmm0,%xmm3
.byte 102,15,56,0,222
paddd %xmm3,%xmm2
pxor %xmm2,%xmm1
movdqa %xmm1,%xmm4
psrld $20,%xmm1
pslld $12,%xmm4
por %xmm4,%xmm1
paddd %xmm1,%xmm0
pxor %xmm0,%xmm3
.byte 102,15,56,0,223
paddd %xmm3,%xmm2
pxor %xmm2,%xmm1
movdqa %xmm1,%xmm4
psrld $25,%xmm1
pslld $7,%xmm4
por %xmm4,%xmm1
pshufd $78,%xmm2,%xmm2
pshufd $57,%xmm1,%xmm1
pshufd $147,%xmm3,%xmm3
nop
paddd %xmm1,%xmm0
pxor %xmm0,%xmm3
.byte 102,15,56,0,222
paddd %xmm3,%xmm2
pxor %xmm2,%xmm1
movdqa %xmm1,%xmm4
psrld $20,%xmm1
pslld $12,%xmm4
por %xmm4,%xmm1
paddd %xmm1,%xmm0
pxor %xmm0,%xmm3
.byte 102,15,56,0,223
paddd %xmm3,%xmm2
pxor %xmm2,%xmm1
movdqa %xmm1,%xmm4
psrld $25,%xmm1
pslld $7,%xmm4
por %xmm4,%xmm1
pshufd $78,%xmm2,%xmm2
pshufd $147,%xmm1,%xmm1
pshufd $57,%xmm3,%xmm3
decl %edx
jnz L010loop1x
paddd (%esp),%xmm0
paddd 16(%esp),%xmm1
paddd 32(%esp),%xmm2
paddd 48(%esp),%xmm3
cmpl $64,%ecx
jb L012tail
movdqu (%esi),%xmm4
movdqu 16(%esi),%xmm5
pxor %xmm4,%xmm0
movdqu 32(%esi),%xmm4
pxor %xmm5,%xmm1
movdqu 48(%esi),%xmm5
pxor %xmm4,%xmm2
pxor %xmm5,%xmm3
leal 64(%esi),%esi
movdqu %xmm0,(%edi)
movdqu %xmm1,16(%edi)
movdqu %xmm2,32(%edi)
movdqu %xmm3,48(%edi)
leal 64(%edi),%edi
subl $64,%ecx
jnz L011outer1x
jmp L009done
L012tail:
movdqa %xmm0,(%esp)
movdqa %xmm1,16(%esp)
movdqa %xmm2,32(%esp)
movdqa %xmm3,48(%esp)
xorl %eax,%eax
xorl %edx,%edx
xorl %ebp,%ebp
L013tail_loop:
movb (%esp,%ebp,1),%al
movb (%esi,%ebp,1),%dl
leal 1(%ebp),%ebp
xorb %dl,%al
movb %al,-1(%edi,%ebp,1)
decl %ecx
jnz L013tail_loop
L009done:
movl 512(%esp),%esp
popl %edi
popl %esi
popl %ebx
popl %ebp
ret
.align 6,0x90
Lssse3_data:
.byte 2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13
.byte 3,0,1,2,7,4,5,6,11,8,9,10,15,12,13,14
.long 1634760805,857760878,2036477234,1797285236
.long 0,1,2,3
.long 4,4,4,4
.long 1,0,0,0
.long 4,0,0,0
.long 0,-1,-1,-1
.align 6,0x90
.byte 67,104,97,67,104,97,50,48,32,102,111,114,32,120,56,54
.byte 44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32
.byte 60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111
.byte 114,103,62,0
#endif // !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) && defined(__APPLE__)

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,987 @@
// This file is generated from a similarly-named Perl script in the BoringSSL
// source tree. Do not edit by hand.
#include <openssl/asm_base.h>
#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) && defined(__APPLE__)
.text
.globl _bn_mul_add_words
.private_extern _bn_mul_add_words
.align 4
_bn_mul_add_words:
L_bn_mul_add_words_begin:
call L000PIC_me_up
L000PIC_me_up:
popl %eax
movl L_OPENSSL_ia32cap_P$non_lazy_ptr-L000PIC_me_up(%eax),%eax
btl $26,(%eax)
jnc L001maw_non_sse2
movl 4(%esp),%eax
movl 8(%esp),%edx
movl 12(%esp),%ecx
movd 16(%esp),%mm0
pxor %mm1,%mm1
jmp L002maw_sse2_entry
.align 4,0x90
L003maw_sse2_unrolled:
movd (%eax),%mm3
paddq %mm3,%mm1
movd (%edx),%mm2
pmuludq %mm0,%mm2
movd 4(%edx),%mm4
pmuludq %mm0,%mm4
movd 8(%edx),%mm6
pmuludq %mm0,%mm6
movd 12(%edx),%mm7
pmuludq %mm0,%mm7
paddq %mm2,%mm1
movd 4(%eax),%mm3
paddq %mm4,%mm3
movd 8(%eax),%mm5
paddq %mm6,%mm5
movd 12(%eax),%mm4
paddq %mm4,%mm7
movd %mm1,(%eax)
movd 16(%edx),%mm2
pmuludq %mm0,%mm2
psrlq $32,%mm1
movd 20(%edx),%mm4
pmuludq %mm0,%mm4
paddq %mm3,%mm1
movd 24(%edx),%mm6
pmuludq %mm0,%mm6
movd %mm1,4(%eax)
psrlq $32,%mm1
movd 28(%edx),%mm3
addl $32,%edx
pmuludq %mm0,%mm3
paddq %mm5,%mm1
movd 16(%eax),%mm5
paddq %mm5,%mm2
movd %mm1,8(%eax)
psrlq $32,%mm1
paddq %mm7,%mm1
movd 20(%eax),%mm5
paddq %mm5,%mm4
movd %mm1,12(%eax)
psrlq $32,%mm1
paddq %mm2,%mm1
movd 24(%eax),%mm5
paddq %mm5,%mm6
movd %mm1,16(%eax)
psrlq $32,%mm1
paddq %mm4,%mm1
movd 28(%eax),%mm5
paddq %mm5,%mm3
movd %mm1,20(%eax)
psrlq $32,%mm1
paddq %mm6,%mm1
movd %mm1,24(%eax)
psrlq $32,%mm1
paddq %mm3,%mm1
movd %mm1,28(%eax)
leal 32(%eax),%eax
psrlq $32,%mm1
subl $8,%ecx
jz L004maw_sse2_exit
L002maw_sse2_entry:
testl $4294967288,%ecx
jnz L003maw_sse2_unrolled
.align 2,0x90
L005maw_sse2_loop:
movd (%edx),%mm2
movd (%eax),%mm3
pmuludq %mm0,%mm2
leal 4(%edx),%edx
paddq %mm3,%mm1
paddq %mm2,%mm1
movd %mm1,(%eax)
subl $1,%ecx
psrlq $32,%mm1
leal 4(%eax),%eax
jnz L005maw_sse2_loop
L004maw_sse2_exit:
movd %mm1,%eax
emms
ret
.align 4,0x90
L001maw_non_sse2:
pushl %ebp
pushl %ebx
pushl %esi
pushl %edi
xorl %esi,%esi
movl 20(%esp),%edi
movl 28(%esp),%ecx
movl 24(%esp),%ebx
andl $4294967288,%ecx
movl 32(%esp),%ebp
pushl %ecx
jz L006maw_finish
.align 4,0x90
L007maw_loop:
# Round 0
movl (%ebx),%eax
mull %ebp
addl %esi,%eax
adcl $0,%edx
addl (%edi),%eax
adcl $0,%edx
movl %eax,(%edi)
movl %edx,%esi
# Round 4
movl 4(%ebx),%eax
mull %ebp
addl %esi,%eax
adcl $0,%edx
addl 4(%edi),%eax
adcl $0,%edx
movl %eax,4(%edi)
movl %edx,%esi
# Round 8
movl 8(%ebx),%eax
mull %ebp
addl %esi,%eax
adcl $0,%edx
addl 8(%edi),%eax
adcl $0,%edx
movl %eax,8(%edi)
movl %edx,%esi
# Round 12
movl 12(%ebx),%eax
mull %ebp
addl %esi,%eax
adcl $0,%edx
addl 12(%edi),%eax
adcl $0,%edx
movl %eax,12(%edi)
movl %edx,%esi
# Round 16
movl 16(%ebx),%eax
mull %ebp
addl %esi,%eax
adcl $0,%edx
addl 16(%edi),%eax
adcl $0,%edx
movl %eax,16(%edi)
movl %edx,%esi
# Round 20
movl 20(%ebx),%eax
mull %ebp
addl %esi,%eax
adcl $0,%edx
addl 20(%edi),%eax
adcl $0,%edx
movl %eax,20(%edi)
movl %edx,%esi
# Round 24
movl 24(%ebx),%eax
mull %ebp
addl %esi,%eax
adcl $0,%edx
addl 24(%edi),%eax
adcl $0,%edx
movl %eax,24(%edi)
movl %edx,%esi
# Round 28
movl 28(%ebx),%eax
mull %ebp
addl %esi,%eax
adcl $0,%edx
addl 28(%edi),%eax
adcl $0,%edx
movl %eax,28(%edi)
movl %edx,%esi
subl $8,%ecx
leal 32(%ebx),%ebx
leal 32(%edi),%edi
jnz L007maw_loop
L006maw_finish:
movl 32(%esp),%ecx
andl $7,%ecx
jnz L008maw_finish2
jmp L009maw_end
L008maw_finish2:
# Tail Round 0
movl (%ebx),%eax
mull %ebp
addl %esi,%eax
adcl $0,%edx
addl (%edi),%eax
adcl $0,%edx
decl %ecx
movl %eax,(%edi)
movl %edx,%esi
jz L009maw_end
# Tail Round 1
movl 4(%ebx),%eax
mull %ebp
addl %esi,%eax
adcl $0,%edx
addl 4(%edi),%eax
adcl $0,%edx
decl %ecx
movl %eax,4(%edi)
movl %edx,%esi
jz L009maw_end
# Tail Round 2
movl 8(%ebx),%eax
mull %ebp
addl %esi,%eax
adcl $0,%edx
addl 8(%edi),%eax
adcl $0,%edx
decl %ecx
movl %eax,8(%edi)
movl %edx,%esi
jz L009maw_end
# Tail Round 3
movl 12(%ebx),%eax
mull %ebp
addl %esi,%eax
adcl $0,%edx
addl 12(%edi),%eax
adcl $0,%edx
decl %ecx
movl %eax,12(%edi)
movl %edx,%esi
jz L009maw_end
# Tail Round 4
movl 16(%ebx),%eax
mull %ebp
addl %esi,%eax
adcl $0,%edx
addl 16(%edi),%eax
adcl $0,%edx
decl %ecx
movl %eax,16(%edi)
movl %edx,%esi
jz L009maw_end
# Tail Round 5
movl 20(%ebx),%eax
mull %ebp
addl %esi,%eax
adcl $0,%edx
addl 20(%edi),%eax
adcl $0,%edx
decl %ecx
movl %eax,20(%edi)
movl %edx,%esi
jz L009maw_end
# Tail Round 6
movl 24(%ebx),%eax
mull %ebp
addl %esi,%eax
adcl $0,%edx
addl 24(%edi),%eax
adcl $0,%edx
movl %eax,24(%edi)
movl %edx,%esi
L009maw_end:
movl %esi,%eax
popl %ecx
popl %edi
popl %esi
popl %ebx
popl %ebp
ret
.globl _bn_mul_words
.private_extern _bn_mul_words
.align 4
_bn_mul_words:
L_bn_mul_words_begin:
call L010PIC_me_up
L010PIC_me_up:
popl %eax
movl L_OPENSSL_ia32cap_P$non_lazy_ptr-L010PIC_me_up(%eax),%eax
btl $26,(%eax)
jnc L011mw_non_sse2
movl 4(%esp),%eax
movl 8(%esp),%edx
movl 12(%esp),%ecx
movd 16(%esp),%mm0
pxor %mm1,%mm1
.align 4,0x90
L012mw_sse2_loop:
movd (%edx),%mm2
pmuludq %mm0,%mm2
leal 4(%edx),%edx
paddq %mm2,%mm1
movd %mm1,(%eax)
subl $1,%ecx
psrlq $32,%mm1
leal 4(%eax),%eax
jnz L012mw_sse2_loop
movd %mm1,%eax
emms
ret
.align 4,0x90
L011mw_non_sse2:
pushl %ebp
pushl %ebx
pushl %esi
pushl %edi
xorl %esi,%esi
movl 20(%esp),%edi
movl 24(%esp),%ebx
movl 28(%esp),%ebp
movl 32(%esp),%ecx
andl $4294967288,%ebp
jz L013mw_finish
L014mw_loop:
# Round 0
movl (%ebx),%eax
mull %ecx
addl %esi,%eax
adcl $0,%edx
movl %eax,(%edi)
movl %edx,%esi
# Round 4
movl 4(%ebx),%eax
mull %ecx
addl %esi,%eax
adcl $0,%edx
movl %eax,4(%edi)
movl %edx,%esi
# Round 8
movl 8(%ebx),%eax
mull %ecx
addl %esi,%eax
adcl $0,%edx
movl %eax,8(%edi)
movl %edx,%esi
# Round 12
movl 12(%ebx),%eax
mull %ecx
addl %esi,%eax
adcl $0,%edx
movl %eax,12(%edi)
movl %edx,%esi
# Round 16
movl 16(%ebx),%eax
mull %ecx
addl %esi,%eax
adcl $0,%edx
movl %eax,16(%edi)
movl %edx,%esi
# Round 20
movl 20(%ebx),%eax
mull %ecx
addl %esi,%eax
adcl $0,%edx
movl %eax,20(%edi)
movl %edx,%esi
# Round 24
movl 24(%ebx),%eax
mull %ecx
addl %esi,%eax
adcl $0,%edx
movl %eax,24(%edi)
movl %edx,%esi
# Round 28
movl 28(%ebx),%eax
mull %ecx
addl %esi,%eax
adcl $0,%edx
movl %eax,28(%edi)
movl %edx,%esi
addl $32,%ebx
addl $32,%edi
subl $8,%ebp
jz L013mw_finish
jmp L014mw_loop
L013mw_finish:
movl 28(%esp),%ebp
andl $7,%ebp
jnz L015mw_finish2
jmp L016mw_end
L015mw_finish2:
# Tail Round 0
movl (%ebx),%eax
mull %ecx
addl %esi,%eax
adcl $0,%edx
movl %eax,(%edi)
movl %edx,%esi
decl %ebp
jz L016mw_end
# Tail Round 1
movl 4(%ebx),%eax
mull %ecx
addl %esi,%eax
adcl $0,%edx
movl %eax,4(%edi)
movl %edx,%esi
decl %ebp
jz L016mw_end
# Tail Round 2
movl 8(%ebx),%eax
mull %ecx
addl %esi,%eax
adcl $0,%edx
movl %eax,8(%edi)
movl %edx,%esi
decl %ebp
jz L016mw_end
# Tail Round 3
movl 12(%ebx),%eax
mull %ecx
addl %esi,%eax
adcl $0,%edx
movl %eax,12(%edi)
movl %edx,%esi
decl %ebp
jz L016mw_end
# Tail Round 4
movl 16(%ebx),%eax
mull %ecx
addl %esi,%eax
adcl $0,%edx
movl %eax,16(%edi)
movl %edx,%esi
decl %ebp
jz L016mw_end
# Tail Round 5
movl 20(%ebx),%eax
mull %ecx
addl %esi,%eax
adcl $0,%edx
movl %eax,20(%edi)
movl %edx,%esi
decl %ebp
jz L016mw_end
# Tail Round 6
movl 24(%ebx),%eax
mull %ecx
addl %esi,%eax
adcl $0,%edx
movl %eax,24(%edi)
movl %edx,%esi
L016mw_end:
movl %esi,%eax
popl %edi
popl %esi
popl %ebx
popl %ebp
ret
.globl _bn_sqr_words
.private_extern _bn_sqr_words
.align 4
_bn_sqr_words:
L_bn_sqr_words_begin:
call L017PIC_me_up
L017PIC_me_up:
popl %eax
movl L_OPENSSL_ia32cap_P$non_lazy_ptr-L017PIC_me_up(%eax),%eax
btl $26,(%eax)
jnc L018sqr_non_sse2
movl 4(%esp),%eax
movl 8(%esp),%edx
movl 12(%esp),%ecx
.align 4,0x90
L019sqr_sse2_loop:
movd (%edx),%mm0
pmuludq %mm0,%mm0
leal 4(%edx),%edx
movq %mm0,(%eax)
subl $1,%ecx
leal 8(%eax),%eax
jnz L019sqr_sse2_loop
emms
ret
.align 4,0x90
L018sqr_non_sse2:
pushl %ebp
pushl %ebx
pushl %esi
pushl %edi
movl 20(%esp),%esi
movl 24(%esp),%edi
movl 28(%esp),%ebx
andl $4294967288,%ebx
jz L020sw_finish
L021sw_loop:
# Round 0
movl (%edi),%eax
mull %eax
movl %eax,(%esi)
movl %edx,4(%esi)
# Round 4
movl 4(%edi),%eax
mull %eax
movl %eax,8(%esi)
movl %edx,12(%esi)
# Round 8
movl 8(%edi),%eax
mull %eax
movl %eax,16(%esi)
movl %edx,20(%esi)
# Round 12
movl 12(%edi),%eax
mull %eax
movl %eax,24(%esi)
movl %edx,28(%esi)
# Round 16
movl 16(%edi),%eax
mull %eax
movl %eax,32(%esi)
movl %edx,36(%esi)
# Round 20
movl 20(%edi),%eax
mull %eax
movl %eax,40(%esi)
movl %edx,44(%esi)
# Round 24
movl 24(%edi),%eax
mull %eax
movl %eax,48(%esi)
movl %edx,52(%esi)
# Round 28
movl 28(%edi),%eax
mull %eax
movl %eax,56(%esi)
movl %edx,60(%esi)
addl $32,%edi
addl $64,%esi
subl $8,%ebx
jnz L021sw_loop
L020sw_finish:
movl 28(%esp),%ebx
andl $7,%ebx
jz L022sw_end
# Tail Round 0
movl (%edi),%eax
mull %eax
movl %eax,(%esi)
decl %ebx
movl %edx,4(%esi)
jz L022sw_end
# Tail Round 1
movl 4(%edi),%eax
mull %eax
movl %eax,8(%esi)
decl %ebx
movl %edx,12(%esi)
jz L022sw_end
# Tail Round 2
movl 8(%edi),%eax
mull %eax
movl %eax,16(%esi)
decl %ebx
movl %edx,20(%esi)
jz L022sw_end
# Tail Round 3
movl 12(%edi),%eax
mull %eax
movl %eax,24(%esi)
decl %ebx
movl %edx,28(%esi)
jz L022sw_end
# Tail Round 4
movl 16(%edi),%eax
mull %eax
movl %eax,32(%esi)
decl %ebx
movl %edx,36(%esi)
jz L022sw_end
# Tail Round 5
movl 20(%edi),%eax
mull %eax
movl %eax,40(%esi)
decl %ebx
movl %edx,44(%esi)
jz L022sw_end
# Tail Round 6
movl 24(%edi),%eax
mull %eax
movl %eax,48(%esi)
movl %edx,52(%esi)
L022sw_end:
popl %edi
popl %esi
popl %ebx
popl %ebp
ret
.globl _bn_div_words
.private_extern _bn_div_words
.align 4
_bn_div_words:
L_bn_div_words_begin:
movl 4(%esp),%edx
movl 8(%esp),%eax
movl 12(%esp),%ecx
divl %ecx
ret
.globl _bn_add_words
.private_extern _bn_add_words
.align 4
_bn_add_words:
L_bn_add_words_begin:
pushl %ebp
pushl %ebx
pushl %esi
pushl %edi
movl 20(%esp),%ebx
movl 24(%esp),%esi
movl 28(%esp),%edi
movl 32(%esp),%ebp
xorl %eax,%eax
andl $4294967288,%ebp
jz L023aw_finish
L024aw_loop:
# Round 0
movl (%esi),%ecx
movl (%edi),%edx
addl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
addl %edx,%ecx
adcl $0,%eax
movl %ecx,(%ebx)
# Round 1
movl 4(%esi),%ecx
movl 4(%edi),%edx
addl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
addl %edx,%ecx
adcl $0,%eax
movl %ecx,4(%ebx)
# Round 2
movl 8(%esi),%ecx
movl 8(%edi),%edx
addl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
addl %edx,%ecx
adcl $0,%eax
movl %ecx,8(%ebx)
# Round 3
movl 12(%esi),%ecx
movl 12(%edi),%edx
addl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
addl %edx,%ecx
adcl $0,%eax
movl %ecx,12(%ebx)
# Round 4
movl 16(%esi),%ecx
movl 16(%edi),%edx
addl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
addl %edx,%ecx
adcl $0,%eax
movl %ecx,16(%ebx)
# Round 5
movl 20(%esi),%ecx
movl 20(%edi),%edx
addl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
addl %edx,%ecx
adcl $0,%eax
movl %ecx,20(%ebx)
# Round 6
movl 24(%esi),%ecx
movl 24(%edi),%edx
addl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
addl %edx,%ecx
adcl $0,%eax
movl %ecx,24(%ebx)
# Round 7
movl 28(%esi),%ecx
movl 28(%edi),%edx
addl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
addl %edx,%ecx
adcl $0,%eax
movl %ecx,28(%ebx)
addl $32,%esi
addl $32,%edi
addl $32,%ebx
subl $8,%ebp
jnz L024aw_loop
L023aw_finish:
movl 32(%esp),%ebp
andl $7,%ebp
jz L025aw_end
# Tail Round 0
movl (%esi),%ecx
movl (%edi),%edx
addl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
addl %edx,%ecx
adcl $0,%eax
decl %ebp
movl %ecx,(%ebx)
jz L025aw_end
# Tail Round 1
movl 4(%esi),%ecx
movl 4(%edi),%edx
addl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
addl %edx,%ecx
adcl $0,%eax
decl %ebp
movl %ecx,4(%ebx)
jz L025aw_end
# Tail Round 2
movl 8(%esi),%ecx
movl 8(%edi),%edx
addl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
addl %edx,%ecx
adcl $0,%eax
decl %ebp
movl %ecx,8(%ebx)
jz L025aw_end
# Tail Round 3
movl 12(%esi),%ecx
movl 12(%edi),%edx
addl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
addl %edx,%ecx
adcl $0,%eax
decl %ebp
movl %ecx,12(%ebx)
jz L025aw_end
# Tail Round 4
movl 16(%esi),%ecx
movl 16(%edi),%edx
addl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
addl %edx,%ecx
adcl $0,%eax
decl %ebp
movl %ecx,16(%ebx)
jz L025aw_end
# Tail Round 5
movl 20(%esi),%ecx
movl 20(%edi),%edx
addl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
addl %edx,%ecx
adcl $0,%eax
decl %ebp
movl %ecx,20(%ebx)
jz L025aw_end
# Tail Round 6
movl 24(%esi),%ecx
movl 24(%edi),%edx
addl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
addl %edx,%ecx
adcl $0,%eax
movl %ecx,24(%ebx)
L025aw_end:
popl %edi
popl %esi
popl %ebx
popl %ebp
ret
.globl _bn_sub_words
.private_extern _bn_sub_words
.align 4
_bn_sub_words:
L_bn_sub_words_begin:
pushl %ebp
pushl %ebx
pushl %esi
pushl %edi
movl 20(%esp),%ebx
movl 24(%esp),%esi
movl 28(%esp),%edi
movl 32(%esp),%ebp
xorl %eax,%eax
andl $4294967288,%ebp
jz L026aw_finish
L027aw_loop:
# Round 0
movl (%esi),%ecx
movl (%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
movl %ecx,(%ebx)
# Round 1
movl 4(%esi),%ecx
movl 4(%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
movl %ecx,4(%ebx)
# Round 2
movl 8(%esi),%ecx
movl 8(%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
movl %ecx,8(%ebx)
# Round 3
movl 12(%esi),%ecx
movl 12(%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
movl %ecx,12(%ebx)
# Round 4
movl 16(%esi),%ecx
movl 16(%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
movl %ecx,16(%ebx)
# Round 5
movl 20(%esi),%ecx
movl 20(%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
movl %ecx,20(%ebx)
# Round 6
movl 24(%esi),%ecx
movl 24(%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
movl %ecx,24(%ebx)
# Round 7
movl 28(%esi),%ecx
movl 28(%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
movl %ecx,28(%ebx)
addl $32,%esi
addl $32,%edi
addl $32,%ebx
subl $8,%ebp
jnz L027aw_loop
L026aw_finish:
movl 32(%esp),%ebp
andl $7,%ebp
jz L028aw_end
# Tail Round 0
movl (%esi),%ecx
movl (%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
decl %ebp
movl %ecx,(%ebx)
jz L028aw_end
# Tail Round 1
movl 4(%esi),%ecx
movl 4(%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
decl %ebp
movl %ecx,4(%ebx)
jz L028aw_end
# Tail Round 2
movl 8(%esi),%ecx
movl 8(%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
decl %ebp
movl %ecx,8(%ebx)
jz L028aw_end
# Tail Round 3
movl 12(%esi),%ecx
movl 12(%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
decl %ebp
movl %ecx,12(%ebx)
jz L028aw_end
# Tail Round 4
movl 16(%esi),%ecx
movl 16(%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
decl %ebp
movl %ecx,16(%ebx)
jz L028aw_end
# Tail Round 5
movl 20(%esi),%ecx
movl 20(%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
decl %ebp
movl %ecx,20(%ebx)
jz L028aw_end
# Tail Round 6
movl 24(%esi),%ecx
movl 24(%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
movl %ecx,24(%ebx)
L028aw_end:
popl %edi
popl %esi
popl %ebx
popl %ebp
ret
.section __IMPORT,__pointers,non_lazy_symbol_pointers
L_OPENSSL_ia32cap_P$non_lazy_ptr:
.indirect_symbol _OPENSSL_ia32cap_P
.long 0
#endif // !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) && defined(__APPLE__)

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,288 @@
// This file is generated from a similarly-named Perl script in the BoringSSL
// source tree. Do not edit by hand.
#include <openssl/asm_base.h>
#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) && defined(__APPLE__)
.text
.globl _gcm_gmult_ssse3
.private_extern _gcm_gmult_ssse3
.align 4
_gcm_gmult_ssse3:
L_gcm_gmult_ssse3_begin:
pushl %ebp
pushl %ebx
pushl %esi
pushl %edi
movl 20(%esp),%edi
movl 24(%esp),%esi
movdqu (%edi),%xmm0
call L000pic_point
L000pic_point:
popl %eax
movdqa Lreverse_bytes-L000pic_point(%eax),%xmm7
movdqa Llow4_mask-L000pic_point(%eax),%xmm2
.byte 102,15,56,0,199
movdqa %xmm2,%xmm1
pandn %xmm0,%xmm1
psrld $4,%xmm1
pand %xmm2,%xmm0
pxor %xmm2,%xmm2
pxor %xmm3,%xmm3
movl $5,%eax
L001loop_row_1:
movdqa (%esi),%xmm4
leal 16(%esi),%esi
movdqa %xmm2,%xmm6
.byte 102,15,58,15,243,1
movdqa %xmm6,%xmm3
psrldq $1,%xmm2
movdqa %xmm4,%xmm5
.byte 102,15,56,0,224
.byte 102,15,56,0,233
pxor %xmm5,%xmm2
movdqa %xmm4,%xmm5
psllq $60,%xmm5
movdqa %xmm5,%xmm6
pslldq $8,%xmm6
pxor %xmm6,%xmm3
psrldq $8,%xmm5
pxor %xmm5,%xmm2
psrlq $4,%xmm4
pxor %xmm4,%xmm2
subl $1,%eax
jnz L001loop_row_1
pxor %xmm3,%xmm2
psrlq $1,%xmm3
pxor %xmm3,%xmm2
psrlq $1,%xmm3
pxor %xmm3,%xmm2
psrlq $5,%xmm3
pxor %xmm3,%xmm2
pxor %xmm3,%xmm3
movl $5,%eax
L002loop_row_2:
movdqa (%esi),%xmm4
leal 16(%esi),%esi
movdqa %xmm2,%xmm6
.byte 102,15,58,15,243,1
movdqa %xmm6,%xmm3
psrldq $1,%xmm2
movdqa %xmm4,%xmm5
.byte 102,15,56,0,224
.byte 102,15,56,0,233
pxor %xmm5,%xmm2
movdqa %xmm4,%xmm5
psllq $60,%xmm5
movdqa %xmm5,%xmm6
pslldq $8,%xmm6
pxor %xmm6,%xmm3
psrldq $8,%xmm5
pxor %xmm5,%xmm2
psrlq $4,%xmm4
pxor %xmm4,%xmm2
subl $1,%eax
jnz L002loop_row_2
pxor %xmm3,%xmm2
psrlq $1,%xmm3
pxor %xmm3,%xmm2
psrlq $1,%xmm3
pxor %xmm3,%xmm2
psrlq $5,%xmm3
pxor %xmm3,%xmm2
pxor %xmm3,%xmm3
movl $6,%eax
L003loop_row_3:
movdqa (%esi),%xmm4
leal 16(%esi),%esi
movdqa %xmm2,%xmm6
.byte 102,15,58,15,243,1
movdqa %xmm6,%xmm3
psrldq $1,%xmm2
movdqa %xmm4,%xmm5
.byte 102,15,56,0,224
.byte 102,15,56,0,233
pxor %xmm5,%xmm2
movdqa %xmm4,%xmm5
psllq $60,%xmm5
movdqa %xmm5,%xmm6
pslldq $8,%xmm6
pxor %xmm6,%xmm3
psrldq $8,%xmm5
pxor %xmm5,%xmm2
psrlq $4,%xmm4
pxor %xmm4,%xmm2
subl $1,%eax
jnz L003loop_row_3
pxor %xmm3,%xmm2
psrlq $1,%xmm3
pxor %xmm3,%xmm2
psrlq $1,%xmm3
pxor %xmm3,%xmm2
psrlq $5,%xmm3
pxor %xmm3,%xmm2
pxor %xmm3,%xmm3
.byte 102,15,56,0,215
movdqu %xmm2,(%edi)
pxor %xmm0,%xmm0
pxor %xmm1,%xmm1
pxor %xmm2,%xmm2
pxor %xmm3,%xmm3
pxor %xmm4,%xmm4
pxor %xmm5,%xmm5
pxor %xmm6,%xmm6
popl %edi
popl %esi
popl %ebx
popl %ebp
ret
.globl _gcm_ghash_ssse3
.private_extern _gcm_ghash_ssse3
.align 4
_gcm_ghash_ssse3:
L_gcm_ghash_ssse3_begin:
pushl %ebp
pushl %ebx
pushl %esi
pushl %edi
movl 20(%esp),%edi
movl 24(%esp),%esi
movl 28(%esp),%edx
movl 32(%esp),%ecx
movdqu (%edi),%xmm0
call L004pic_point
L004pic_point:
popl %ebx
movdqa Lreverse_bytes-L004pic_point(%ebx),%xmm7
andl $-16,%ecx
.byte 102,15,56,0,199
pxor %xmm3,%xmm3
L005loop_ghash:
movdqa Llow4_mask-L004pic_point(%ebx),%xmm2
movdqu (%edx),%xmm1
.byte 102,15,56,0,207
pxor %xmm1,%xmm0
movdqa %xmm2,%xmm1
pandn %xmm0,%xmm1
psrld $4,%xmm1
pand %xmm2,%xmm0
pxor %xmm2,%xmm2
movl $5,%eax
L006loop_row_4:
movdqa (%esi),%xmm4
leal 16(%esi),%esi
movdqa %xmm2,%xmm6
.byte 102,15,58,15,243,1
movdqa %xmm6,%xmm3
psrldq $1,%xmm2
movdqa %xmm4,%xmm5
.byte 102,15,56,0,224
.byte 102,15,56,0,233
pxor %xmm5,%xmm2
movdqa %xmm4,%xmm5
psllq $60,%xmm5
movdqa %xmm5,%xmm6
pslldq $8,%xmm6
pxor %xmm6,%xmm3
psrldq $8,%xmm5
pxor %xmm5,%xmm2
psrlq $4,%xmm4
pxor %xmm4,%xmm2
subl $1,%eax
jnz L006loop_row_4
pxor %xmm3,%xmm2
psrlq $1,%xmm3
pxor %xmm3,%xmm2
psrlq $1,%xmm3
pxor %xmm3,%xmm2
psrlq $5,%xmm3
pxor %xmm3,%xmm2
pxor %xmm3,%xmm3
movl $5,%eax
L007loop_row_5:
movdqa (%esi),%xmm4
leal 16(%esi),%esi
movdqa %xmm2,%xmm6
.byte 102,15,58,15,243,1
movdqa %xmm6,%xmm3
psrldq $1,%xmm2
movdqa %xmm4,%xmm5
.byte 102,15,56,0,224
.byte 102,15,56,0,233
pxor %xmm5,%xmm2
movdqa %xmm4,%xmm5
psllq $60,%xmm5
movdqa %xmm5,%xmm6
pslldq $8,%xmm6
pxor %xmm6,%xmm3
psrldq $8,%xmm5
pxor %xmm5,%xmm2
psrlq $4,%xmm4
pxor %xmm4,%xmm2
subl $1,%eax
jnz L007loop_row_5
pxor %xmm3,%xmm2
psrlq $1,%xmm3
pxor %xmm3,%xmm2
psrlq $1,%xmm3
pxor %xmm3,%xmm2
psrlq $5,%xmm3
pxor %xmm3,%xmm2
pxor %xmm3,%xmm3
movl $6,%eax
L008loop_row_6:
movdqa (%esi),%xmm4
leal 16(%esi),%esi
movdqa %xmm2,%xmm6
.byte 102,15,58,15,243,1
movdqa %xmm6,%xmm3
psrldq $1,%xmm2
movdqa %xmm4,%xmm5
.byte 102,15,56,0,224
.byte 102,15,56,0,233
pxor %xmm5,%xmm2
movdqa %xmm4,%xmm5
psllq $60,%xmm5
movdqa %xmm5,%xmm6
pslldq $8,%xmm6
pxor %xmm6,%xmm3
psrldq $8,%xmm5
pxor %xmm5,%xmm2
psrlq $4,%xmm4
pxor %xmm4,%xmm2
subl $1,%eax
jnz L008loop_row_6
pxor %xmm3,%xmm2
psrlq $1,%xmm3
pxor %xmm3,%xmm2
psrlq $1,%xmm3
pxor %xmm3,%xmm2
psrlq $5,%xmm3
pxor %xmm3,%xmm2
pxor %xmm3,%xmm3
movdqa %xmm2,%xmm0
leal -256(%esi),%esi
leal 16(%edx),%edx
subl $16,%ecx
jnz L005loop_ghash
.byte 102,15,56,0,199
movdqu %xmm0,(%edi)
pxor %xmm0,%xmm0
pxor %xmm1,%xmm1
pxor %xmm2,%xmm2
pxor %xmm3,%xmm3
pxor %xmm4,%xmm4
pxor %xmm5,%xmm5
pxor %xmm6,%xmm6
popl %edi
popl %esi
popl %ebx
popl %ebp
ret
.align 4,0x90
Lreverse_bytes:
.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
.align 4,0x90
Llow4_mask:
.long 252645135,252645135,252645135,252645135
#endif // !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) && defined(__APPLE__)

View File

@ -0,0 +1,322 @@
// This file is generated from a similarly-named Perl script in the BoringSSL
// source tree. Do not edit by hand.
#include <openssl/asm_base.h>
#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) && defined(__APPLE__)
.text
.globl _gcm_init_clmul
.private_extern _gcm_init_clmul
.align 4
_gcm_init_clmul:
L_gcm_init_clmul_begin:
movl 4(%esp),%edx
movl 8(%esp),%eax
call L000pic
L000pic:
popl %ecx
leal Lbswap-L000pic(%ecx),%ecx
movdqu (%eax),%xmm2
pshufd $78,%xmm2,%xmm2
pshufd $255,%xmm2,%xmm4
movdqa %xmm2,%xmm3
psllq $1,%xmm2
pxor %xmm5,%xmm5
psrlq $63,%xmm3
pcmpgtd %xmm4,%xmm5
pslldq $8,%xmm3
por %xmm3,%xmm2
pand 16(%ecx),%xmm5
pxor %xmm5,%xmm2
movdqa %xmm2,%xmm0
movdqa %xmm0,%xmm1
pshufd $78,%xmm0,%xmm3
pshufd $78,%xmm2,%xmm4
pxor %xmm0,%xmm3
pxor %xmm2,%xmm4
.byte 102,15,58,68,194,0
.byte 102,15,58,68,202,17
.byte 102,15,58,68,220,0
xorps %xmm0,%xmm3
xorps %xmm1,%xmm3
movdqa %xmm3,%xmm4
psrldq $8,%xmm3
pslldq $8,%xmm4
pxor %xmm3,%xmm1
pxor %xmm4,%xmm0
movdqa %xmm0,%xmm4
movdqa %xmm0,%xmm3
psllq $5,%xmm0
pxor %xmm0,%xmm3
psllq $1,%xmm0
pxor %xmm3,%xmm0
psllq $57,%xmm0
movdqa %xmm0,%xmm3
pslldq $8,%xmm0
psrldq $8,%xmm3
pxor %xmm4,%xmm0
pxor %xmm3,%xmm1
movdqa %xmm0,%xmm4
psrlq $1,%xmm0
pxor %xmm4,%xmm1
pxor %xmm0,%xmm4
psrlq $5,%xmm0
pxor %xmm4,%xmm0
psrlq $1,%xmm0
pxor %xmm1,%xmm0
pshufd $78,%xmm2,%xmm3
pshufd $78,%xmm0,%xmm4
pxor %xmm2,%xmm3
movdqu %xmm2,(%edx)
pxor %xmm0,%xmm4
movdqu %xmm0,16(%edx)
.byte 102,15,58,15,227,8
movdqu %xmm4,32(%edx)
ret
.globl _gcm_gmult_clmul
.private_extern _gcm_gmult_clmul
.align 4
_gcm_gmult_clmul:
L_gcm_gmult_clmul_begin:
movl 4(%esp),%eax
movl 8(%esp),%edx
call L001pic
L001pic:
popl %ecx
leal Lbswap-L001pic(%ecx),%ecx
movdqu (%eax),%xmm0
movdqa (%ecx),%xmm5
movups (%edx),%xmm2
.byte 102,15,56,0,197
movups 32(%edx),%xmm4
movdqa %xmm0,%xmm1
pshufd $78,%xmm0,%xmm3
pxor %xmm0,%xmm3
.byte 102,15,58,68,194,0
.byte 102,15,58,68,202,17
.byte 102,15,58,68,220,0
xorps %xmm0,%xmm3
xorps %xmm1,%xmm3
movdqa %xmm3,%xmm4
psrldq $8,%xmm3
pslldq $8,%xmm4
pxor %xmm3,%xmm1
pxor %xmm4,%xmm0
movdqa %xmm0,%xmm4
movdqa %xmm0,%xmm3
psllq $5,%xmm0
pxor %xmm0,%xmm3
psllq $1,%xmm0
pxor %xmm3,%xmm0
psllq $57,%xmm0
movdqa %xmm0,%xmm3
pslldq $8,%xmm0
psrldq $8,%xmm3
pxor %xmm4,%xmm0
pxor %xmm3,%xmm1
movdqa %xmm0,%xmm4
psrlq $1,%xmm0
pxor %xmm4,%xmm1
pxor %xmm0,%xmm4
psrlq $5,%xmm0
pxor %xmm4,%xmm0
psrlq $1,%xmm0
pxor %xmm1,%xmm0
.byte 102,15,56,0,197
movdqu %xmm0,(%eax)
ret
.globl _gcm_ghash_clmul
.private_extern _gcm_ghash_clmul
.align 4
_gcm_ghash_clmul:
L_gcm_ghash_clmul_begin:
pushl %ebp
pushl %ebx
pushl %esi
pushl %edi
movl 20(%esp),%eax
movl 24(%esp),%edx
movl 28(%esp),%esi
movl 32(%esp),%ebx
call L002pic
L002pic:
popl %ecx
leal Lbswap-L002pic(%ecx),%ecx
movdqu (%eax),%xmm0
movdqa (%ecx),%xmm5
movdqu (%edx),%xmm2
.byte 102,15,56,0,197
subl $16,%ebx
jz L003odd_tail
movdqu (%esi),%xmm3
movdqu 16(%esi),%xmm6
.byte 102,15,56,0,221
.byte 102,15,56,0,245
movdqu 32(%edx),%xmm5
pxor %xmm3,%xmm0
pshufd $78,%xmm6,%xmm3
movdqa %xmm6,%xmm7
pxor %xmm6,%xmm3
leal 32(%esi),%esi
.byte 102,15,58,68,242,0
.byte 102,15,58,68,250,17
.byte 102,15,58,68,221,0
movups 16(%edx),%xmm2
nop
subl $32,%ebx
jbe L004even_tail
jmp L005mod_loop
.align 5,0x90
L005mod_loop:
pshufd $78,%xmm0,%xmm4
movdqa %xmm0,%xmm1
pxor %xmm0,%xmm4
nop
.byte 102,15,58,68,194,0
.byte 102,15,58,68,202,17
.byte 102,15,58,68,229,16
movups (%edx),%xmm2
xorps %xmm6,%xmm0
movdqa (%ecx),%xmm5
xorps %xmm7,%xmm1
movdqu (%esi),%xmm7
pxor %xmm0,%xmm3
movdqu 16(%esi),%xmm6
pxor %xmm1,%xmm3
.byte 102,15,56,0,253
pxor %xmm3,%xmm4
movdqa %xmm4,%xmm3
psrldq $8,%xmm4
pslldq $8,%xmm3
pxor %xmm4,%xmm1
pxor %xmm3,%xmm0
.byte 102,15,56,0,245
pxor %xmm7,%xmm1
movdqa %xmm6,%xmm7
movdqa %xmm0,%xmm4
movdqa %xmm0,%xmm3
psllq $5,%xmm0
pxor %xmm0,%xmm3
psllq $1,%xmm0
pxor %xmm3,%xmm0
.byte 102,15,58,68,242,0
movups 32(%edx),%xmm5
psllq $57,%xmm0
movdqa %xmm0,%xmm3
pslldq $8,%xmm0
psrldq $8,%xmm3
pxor %xmm4,%xmm0
pxor %xmm3,%xmm1
pshufd $78,%xmm7,%xmm3
movdqa %xmm0,%xmm4
psrlq $1,%xmm0
pxor %xmm7,%xmm3
pxor %xmm4,%xmm1
.byte 102,15,58,68,250,17
movups 16(%edx),%xmm2
pxor %xmm0,%xmm4
psrlq $5,%xmm0
pxor %xmm4,%xmm0
psrlq $1,%xmm0
pxor %xmm1,%xmm0
.byte 102,15,58,68,221,0
leal 32(%esi),%esi
subl $32,%ebx
ja L005mod_loop
L004even_tail:
pshufd $78,%xmm0,%xmm4
movdqa %xmm0,%xmm1
pxor %xmm0,%xmm4
.byte 102,15,58,68,194,0
.byte 102,15,58,68,202,17
.byte 102,15,58,68,229,16
movdqa (%ecx),%xmm5
xorps %xmm6,%xmm0
xorps %xmm7,%xmm1
pxor %xmm0,%xmm3
pxor %xmm1,%xmm3
pxor %xmm3,%xmm4
movdqa %xmm4,%xmm3
psrldq $8,%xmm4
pslldq $8,%xmm3
pxor %xmm4,%xmm1
pxor %xmm3,%xmm0
movdqa %xmm0,%xmm4
movdqa %xmm0,%xmm3
psllq $5,%xmm0
pxor %xmm0,%xmm3
psllq $1,%xmm0
pxor %xmm3,%xmm0
psllq $57,%xmm0
movdqa %xmm0,%xmm3
pslldq $8,%xmm0
psrldq $8,%xmm3
pxor %xmm4,%xmm0
pxor %xmm3,%xmm1
movdqa %xmm0,%xmm4
psrlq $1,%xmm0
pxor %xmm4,%xmm1
pxor %xmm0,%xmm4
psrlq $5,%xmm0
pxor %xmm4,%xmm0
psrlq $1,%xmm0
pxor %xmm1,%xmm0
testl %ebx,%ebx
jnz L006done
movups (%edx),%xmm2
L003odd_tail:
movdqu (%esi),%xmm3
.byte 102,15,56,0,221
pxor %xmm3,%xmm0
movdqa %xmm0,%xmm1
pshufd $78,%xmm0,%xmm3
pshufd $78,%xmm2,%xmm4
pxor %xmm0,%xmm3
pxor %xmm2,%xmm4
.byte 102,15,58,68,194,0
.byte 102,15,58,68,202,17
.byte 102,15,58,68,220,0
xorps %xmm0,%xmm3
xorps %xmm1,%xmm3
movdqa %xmm3,%xmm4
psrldq $8,%xmm3
pslldq $8,%xmm4
pxor %xmm3,%xmm1
pxor %xmm4,%xmm0
movdqa %xmm0,%xmm4
movdqa %xmm0,%xmm3
psllq $5,%xmm0
pxor %xmm0,%xmm3
psllq $1,%xmm0
pxor %xmm3,%xmm0
psllq $57,%xmm0
movdqa %xmm0,%xmm3
pslldq $8,%xmm0
psrldq $8,%xmm3
pxor %xmm4,%xmm0
pxor %xmm3,%xmm1
movdqa %xmm0,%xmm4
psrlq $1,%xmm0
pxor %xmm4,%xmm1
pxor %xmm0,%xmm4
psrlq $5,%xmm0
pxor %xmm4,%xmm0
psrlq $1,%xmm0
pxor %xmm1,%xmm0
L006done:
.byte 102,15,56,0,197
movdqu %xmm0,(%eax)
popl %edi
popl %esi
popl %ebx
popl %ebp
ret
.align 6,0x90
Lbswap:
.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
.byte 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,194
.byte 71,72,65,83,72,32,102,111,114,32,120,56,54,44,32,67
.byte 82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112
.byte 112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62
.byte 0
#endif // !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) && defined(__APPLE__)

View File

@ -0,0 +1,684 @@
// This file is generated from a similarly-named Perl script in the BoringSSL
// source tree. Do not edit by hand.
#include <openssl/asm_base.h>
#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) && defined(__APPLE__)
.text
.globl _md5_block_asm_data_order
.private_extern _md5_block_asm_data_order
.align 4
_md5_block_asm_data_order:
L_md5_block_asm_data_order_begin:
pushl %esi
pushl %edi
movl 12(%esp),%edi
movl 16(%esp),%esi
movl 20(%esp),%ecx
pushl %ebp
shll $6,%ecx
pushl %ebx
addl %esi,%ecx
subl $64,%ecx
movl (%edi),%eax
pushl %ecx
movl 4(%edi),%ebx
movl 8(%edi),%ecx
movl 12(%edi),%edx
L000start:
# R0 section
movl %ecx,%edi
movl (%esi),%ebp
# R0 0
xorl %edx,%edi
andl %ebx,%edi
leal 3614090360(%eax,%ebp,1),%eax
xorl %edx,%edi
addl %edi,%eax
movl %ebx,%edi
roll $7,%eax
movl 4(%esi),%ebp
addl %ebx,%eax
# R0 1
xorl %ecx,%edi
andl %eax,%edi
leal 3905402710(%edx,%ebp,1),%edx
xorl %ecx,%edi
addl %edi,%edx
movl %eax,%edi
roll $12,%edx
movl 8(%esi),%ebp
addl %eax,%edx
# R0 2
xorl %ebx,%edi
andl %edx,%edi
leal 606105819(%ecx,%ebp,1),%ecx
xorl %ebx,%edi
addl %edi,%ecx
movl %edx,%edi
roll $17,%ecx
movl 12(%esi),%ebp
addl %edx,%ecx
# R0 3
xorl %eax,%edi
andl %ecx,%edi
leal 3250441966(%ebx,%ebp,1),%ebx
xorl %eax,%edi
addl %edi,%ebx
movl %ecx,%edi
roll $22,%ebx
movl 16(%esi),%ebp
addl %ecx,%ebx
# R0 4
xorl %edx,%edi
andl %ebx,%edi
leal 4118548399(%eax,%ebp,1),%eax
xorl %edx,%edi
addl %edi,%eax
movl %ebx,%edi
roll $7,%eax
movl 20(%esi),%ebp
addl %ebx,%eax
# R0 5
xorl %ecx,%edi
andl %eax,%edi
leal 1200080426(%edx,%ebp,1),%edx
xorl %ecx,%edi
addl %edi,%edx
movl %eax,%edi
roll $12,%edx
movl 24(%esi),%ebp
addl %eax,%edx
# R0 6
xorl %ebx,%edi
andl %edx,%edi
leal 2821735955(%ecx,%ebp,1),%ecx
xorl %ebx,%edi
addl %edi,%ecx
movl %edx,%edi
roll $17,%ecx
movl 28(%esi),%ebp
addl %edx,%ecx
# R0 7
xorl %eax,%edi
andl %ecx,%edi
leal 4249261313(%ebx,%ebp,1),%ebx
xorl %eax,%edi
addl %edi,%ebx
movl %ecx,%edi
roll $22,%ebx
movl 32(%esi),%ebp
addl %ecx,%ebx
# R0 8
xorl %edx,%edi
andl %ebx,%edi
leal 1770035416(%eax,%ebp,1),%eax
xorl %edx,%edi
addl %edi,%eax
movl %ebx,%edi
roll $7,%eax
movl 36(%esi),%ebp
addl %ebx,%eax
# R0 9
xorl %ecx,%edi
andl %eax,%edi
leal 2336552879(%edx,%ebp,1),%edx
xorl %ecx,%edi
addl %edi,%edx
movl %eax,%edi
roll $12,%edx
movl 40(%esi),%ebp
addl %eax,%edx
# R0 10
xorl %ebx,%edi
andl %edx,%edi
leal 4294925233(%ecx,%ebp,1),%ecx
xorl %ebx,%edi
addl %edi,%ecx
movl %edx,%edi
roll $17,%ecx
movl 44(%esi),%ebp
addl %edx,%ecx
# R0 11
xorl %eax,%edi
andl %ecx,%edi
leal 2304563134(%ebx,%ebp,1),%ebx
xorl %eax,%edi
addl %edi,%ebx
movl %ecx,%edi
roll $22,%ebx
movl 48(%esi),%ebp
addl %ecx,%ebx
# R0 12
xorl %edx,%edi
andl %ebx,%edi
leal 1804603682(%eax,%ebp,1),%eax
xorl %edx,%edi
addl %edi,%eax
movl %ebx,%edi
roll $7,%eax
movl 52(%esi),%ebp
addl %ebx,%eax
# R0 13
xorl %ecx,%edi
andl %eax,%edi
leal 4254626195(%edx,%ebp,1),%edx
xorl %ecx,%edi
addl %edi,%edx
movl %eax,%edi
roll $12,%edx
movl 56(%esi),%ebp
addl %eax,%edx
# R0 14
xorl %ebx,%edi
andl %edx,%edi
leal 2792965006(%ecx,%ebp,1),%ecx
xorl %ebx,%edi
addl %edi,%ecx
movl %edx,%edi
roll $17,%ecx
movl 60(%esi),%ebp
addl %edx,%ecx
# R0 15
xorl %eax,%edi
andl %ecx,%edi
leal 1236535329(%ebx,%ebp,1),%ebx
xorl %eax,%edi
addl %edi,%ebx
movl %ecx,%edi
roll $22,%ebx
movl 4(%esi),%ebp
addl %ecx,%ebx
# R1 section
# R1 16
leal 4129170786(%eax,%ebp,1),%eax
xorl %ebx,%edi
andl %edx,%edi
movl 24(%esi),%ebp
xorl %ecx,%edi
addl %edi,%eax
movl %ebx,%edi
roll $5,%eax
addl %ebx,%eax
# R1 17
leal 3225465664(%edx,%ebp,1),%edx
xorl %eax,%edi
andl %ecx,%edi
movl 44(%esi),%ebp
xorl %ebx,%edi
addl %edi,%edx
movl %eax,%edi
roll $9,%edx
addl %eax,%edx
# R1 18
leal 643717713(%ecx,%ebp,1),%ecx
xorl %edx,%edi
andl %ebx,%edi
movl (%esi),%ebp
xorl %eax,%edi
addl %edi,%ecx
movl %edx,%edi
roll $14,%ecx
addl %edx,%ecx
# R1 19
leal 3921069994(%ebx,%ebp,1),%ebx
xorl %ecx,%edi
andl %eax,%edi
movl 20(%esi),%ebp
xorl %edx,%edi
addl %edi,%ebx
movl %ecx,%edi
roll $20,%ebx
addl %ecx,%ebx
# R1 20
leal 3593408605(%eax,%ebp,1),%eax
xorl %ebx,%edi
andl %edx,%edi
movl 40(%esi),%ebp
xorl %ecx,%edi
addl %edi,%eax
movl %ebx,%edi
roll $5,%eax
addl %ebx,%eax
# R1 21
leal 38016083(%edx,%ebp,1),%edx
xorl %eax,%edi
andl %ecx,%edi
movl 60(%esi),%ebp
xorl %ebx,%edi
addl %edi,%edx
movl %eax,%edi
roll $9,%edx
addl %eax,%edx
# R1 22
leal 3634488961(%ecx,%ebp,1),%ecx
xorl %edx,%edi
andl %ebx,%edi
movl 16(%esi),%ebp
xorl %eax,%edi
addl %edi,%ecx
movl %edx,%edi
roll $14,%ecx
addl %edx,%ecx
# R1 23
leal 3889429448(%ebx,%ebp,1),%ebx
xorl %ecx,%edi
andl %eax,%edi
movl 36(%esi),%ebp
xorl %edx,%edi
addl %edi,%ebx
movl %ecx,%edi
roll $20,%ebx
addl %ecx,%ebx
# R1 24
leal 568446438(%eax,%ebp,1),%eax
xorl %ebx,%edi
andl %edx,%edi
movl 56(%esi),%ebp
xorl %ecx,%edi
addl %edi,%eax
movl %ebx,%edi
roll $5,%eax
addl %ebx,%eax
# R1 25
leal 3275163606(%edx,%ebp,1),%edx
xorl %eax,%edi
andl %ecx,%edi
movl 12(%esi),%ebp
xorl %ebx,%edi
addl %edi,%edx
movl %eax,%edi
roll $9,%edx
addl %eax,%edx
# R1 26
leal 4107603335(%ecx,%ebp,1),%ecx
xorl %edx,%edi
andl %ebx,%edi
movl 32(%esi),%ebp
xorl %eax,%edi
addl %edi,%ecx
movl %edx,%edi
roll $14,%ecx
addl %edx,%ecx
# R1 27
leal 1163531501(%ebx,%ebp,1),%ebx
xorl %ecx,%edi
andl %eax,%edi
movl 52(%esi),%ebp
xorl %edx,%edi
addl %edi,%ebx
movl %ecx,%edi
roll $20,%ebx
addl %ecx,%ebx
# R1 28
leal 2850285829(%eax,%ebp,1),%eax
xorl %ebx,%edi
andl %edx,%edi
movl 8(%esi),%ebp
xorl %ecx,%edi
addl %edi,%eax
movl %ebx,%edi
roll $5,%eax
addl %ebx,%eax
# R1 29
leal 4243563512(%edx,%ebp,1),%edx
xorl %eax,%edi
andl %ecx,%edi
movl 28(%esi),%ebp
xorl %ebx,%edi
addl %edi,%edx
movl %eax,%edi
roll $9,%edx
addl %eax,%edx
# R1 30
leal 1735328473(%ecx,%ebp,1),%ecx
xorl %edx,%edi
andl %ebx,%edi
movl 48(%esi),%ebp
xorl %eax,%edi
addl %edi,%ecx
movl %edx,%edi
roll $14,%ecx
addl %edx,%ecx
# R1 31
leal 2368359562(%ebx,%ebp,1),%ebx
xorl %ecx,%edi
andl %eax,%edi
movl 20(%esi),%ebp
xorl %edx,%edi
addl %edi,%ebx
movl %ecx,%edi
roll $20,%ebx
addl %ecx,%ebx
# R2 section
# R2 32
xorl %edx,%edi
xorl %ebx,%edi
leal 4294588738(%eax,%ebp,1),%eax
addl %edi,%eax
roll $4,%eax
movl 32(%esi),%ebp
movl %ebx,%edi
# R2 33
leal 2272392833(%edx,%ebp,1),%edx
addl %ebx,%eax
xorl %ecx,%edi
xorl %eax,%edi
movl 44(%esi),%ebp
addl %edi,%edx
movl %eax,%edi
roll $11,%edx
addl %eax,%edx
# R2 34
xorl %ebx,%edi
xorl %edx,%edi
leal 1839030562(%ecx,%ebp,1),%ecx
addl %edi,%ecx
roll $16,%ecx
movl 56(%esi),%ebp
movl %edx,%edi
# R2 35
leal 4259657740(%ebx,%ebp,1),%ebx
addl %edx,%ecx
xorl %eax,%edi
xorl %ecx,%edi
movl 4(%esi),%ebp
addl %edi,%ebx
movl %ecx,%edi
roll $23,%ebx
addl %ecx,%ebx
# R2 36
xorl %edx,%edi
xorl %ebx,%edi
leal 2763975236(%eax,%ebp,1),%eax
addl %edi,%eax
roll $4,%eax
movl 16(%esi),%ebp
movl %ebx,%edi
# R2 37
leal 1272893353(%edx,%ebp,1),%edx
addl %ebx,%eax
xorl %ecx,%edi
xorl %eax,%edi
movl 28(%esi),%ebp
addl %edi,%edx
movl %eax,%edi
roll $11,%edx
addl %eax,%edx
# R2 38
xorl %ebx,%edi
xorl %edx,%edi
leal 4139469664(%ecx,%ebp,1),%ecx
addl %edi,%ecx
roll $16,%ecx
movl 40(%esi),%ebp
movl %edx,%edi
# R2 39
leal 3200236656(%ebx,%ebp,1),%ebx
addl %edx,%ecx
xorl %eax,%edi
xorl %ecx,%edi
movl 52(%esi),%ebp
addl %edi,%ebx
movl %ecx,%edi
roll $23,%ebx
addl %ecx,%ebx
# R2 40
xorl %edx,%edi
xorl %ebx,%edi
leal 681279174(%eax,%ebp,1),%eax
addl %edi,%eax
roll $4,%eax
movl (%esi),%ebp
movl %ebx,%edi
# R2 41
leal 3936430074(%edx,%ebp,1),%edx
addl %ebx,%eax
xorl %ecx,%edi
xorl %eax,%edi
movl 12(%esi),%ebp
addl %edi,%edx
movl %eax,%edi
roll $11,%edx
addl %eax,%edx
# R2 42
xorl %ebx,%edi
xorl %edx,%edi
leal 3572445317(%ecx,%ebp,1),%ecx
addl %edi,%ecx
roll $16,%ecx
movl 24(%esi),%ebp
movl %edx,%edi
# R2 43
leal 76029189(%ebx,%ebp,1),%ebx
addl %edx,%ecx
xorl %eax,%edi
xorl %ecx,%edi
movl 36(%esi),%ebp
addl %edi,%ebx
movl %ecx,%edi
roll $23,%ebx
addl %ecx,%ebx
# R2 44
xorl %edx,%edi
xorl %ebx,%edi
leal 3654602809(%eax,%ebp,1),%eax
addl %edi,%eax
roll $4,%eax
movl 48(%esi),%ebp
movl %ebx,%edi
# R2 45
leal 3873151461(%edx,%ebp,1),%edx
addl %ebx,%eax
xorl %ecx,%edi
xorl %eax,%edi
movl 60(%esi),%ebp
addl %edi,%edx
movl %eax,%edi
roll $11,%edx
addl %eax,%edx
# R2 46
xorl %ebx,%edi
xorl %edx,%edi
leal 530742520(%ecx,%ebp,1),%ecx
addl %edi,%ecx
roll $16,%ecx
movl 8(%esi),%ebp
movl %edx,%edi
# R2 47
leal 3299628645(%ebx,%ebp,1),%ebx
addl %edx,%ecx
xorl %eax,%edi
xorl %ecx,%edi
movl (%esi),%ebp
addl %edi,%ebx
movl $-1,%edi
roll $23,%ebx
addl %ecx,%ebx
# R3 section
# R3 48
xorl %edx,%edi
orl %ebx,%edi
leal 4096336452(%eax,%ebp,1),%eax
xorl %ecx,%edi
movl 28(%esi),%ebp
addl %edi,%eax
movl $-1,%edi
roll $6,%eax
xorl %ecx,%edi
addl %ebx,%eax
# R3 49
orl %eax,%edi
leal 1126891415(%edx,%ebp,1),%edx
xorl %ebx,%edi
movl 56(%esi),%ebp
addl %edi,%edx
movl $-1,%edi
roll $10,%edx
xorl %ebx,%edi
addl %eax,%edx
# R3 50
orl %edx,%edi
leal 2878612391(%ecx,%ebp,1),%ecx
xorl %eax,%edi
movl 20(%esi),%ebp
addl %edi,%ecx
movl $-1,%edi
roll $15,%ecx
xorl %eax,%edi
addl %edx,%ecx
# R3 51
orl %ecx,%edi
leal 4237533241(%ebx,%ebp,1),%ebx
xorl %edx,%edi
movl 48(%esi),%ebp
addl %edi,%ebx
movl $-1,%edi
roll $21,%ebx
xorl %edx,%edi
addl %ecx,%ebx
# R3 52
orl %ebx,%edi
leal 1700485571(%eax,%ebp,1),%eax
xorl %ecx,%edi
movl 12(%esi),%ebp
addl %edi,%eax
movl $-1,%edi
roll $6,%eax
xorl %ecx,%edi
addl %ebx,%eax
# R3 53
orl %eax,%edi
leal 2399980690(%edx,%ebp,1),%edx
xorl %ebx,%edi
movl 40(%esi),%ebp
addl %edi,%edx
movl $-1,%edi
roll $10,%edx
xorl %ebx,%edi
addl %eax,%edx
# R3 54
orl %edx,%edi
leal 4293915773(%ecx,%ebp,1),%ecx
xorl %eax,%edi
movl 4(%esi),%ebp
addl %edi,%ecx
movl $-1,%edi
roll $15,%ecx
xorl %eax,%edi
addl %edx,%ecx
# R3 55
orl %ecx,%edi
leal 2240044497(%ebx,%ebp,1),%ebx
xorl %edx,%edi
movl 32(%esi),%ebp
addl %edi,%ebx
movl $-1,%edi
roll $21,%ebx
xorl %edx,%edi
addl %ecx,%ebx
# R3 56
orl %ebx,%edi
leal 1873313359(%eax,%ebp,1),%eax
xorl %ecx,%edi
movl 60(%esi),%ebp
addl %edi,%eax
movl $-1,%edi
roll $6,%eax
xorl %ecx,%edi
addl %ebx,%eax
# R3 57
orl %eax,%edi
leal 4264355552(%edx,%ebp,1),%edx
xorl %ebx,%edi
movl 24(%esi),%ebp
addl %edi,%edx
movl $-1,%edi
roll $10,%edx
xorl %ebx,%edi
addl %eax,%edx
# R3 58
orl %edx,%edi
leal 2734768916(%ecx,%ebp,1),%ecx
xorl %eax,%edi
movl 52(%esi),%ebp
addl %edi,%ecx
movl $-1,%edi
roll $15,%ecx
xorl %eax,%edi
addl %edx,%ecx
# R3 59
orl %ecx,%edi
leal 1309151649(%ebx,%ebp,1),%ebx
xorl %edx,%edi
movl 16(%esi),%ebp
addl %edi,%ebx
movl $-1,%edi
roll $21,%ebx
xorl %edx,%edi
addl %ecx,%ebx
# R3 60
orl %ebx,%edi
leal 4149444226(%eax,%ebp,1),%eax
xorl %ecx,%edi
movl 44(%esi),%ebp
addl %edi,%eax
movl $-1,%edi
roll $6,%eax
xorl %ecx,%edi
addl %ebx,%eax
# R3 61
orl %eax,%edi
leal 3174756917(%edx,%ebp,1),%edx
xorl %ebx,%edi
movl 8(%esi),%ebp
addl %edi,%edx
movl $-1,%edi
roll $10,%edx
xorl %ebx,%edi
addl %eax,%edx
# R3 62
orl %edx,%edi
leal 718787259(%ecx,%ebp,1),%ecx
xorl %eax,%edi
movl 36(%esi),%ebp
addl %edi,%ecx
movl $-1,%edi
roll $15,%ecx
xorl %eax,%edi
addl %edx,%ecx
# R3 63
orl %ecx,%edi
leal 3951481745(%ebx,%ebp,1),%ebx
xorl %edx,%edi
movl 24(%esp),%ebp
addl %edi,%ebx
addl $64,%esi
roll $21,%ebx
movl (%ebp),%edi
addl %ecx,%ebx
addl %edi,%eax
movl 4(%ebp),%edi
addl %edi,%ebx
movl 8(%ebp),%edi
addl %edi,%ecx
movl 12(%ebp),%edi
addl %edi,%edx
movl %eax,(%ebp)
movl %ebx,4(%ebp)
movl (%esp),%edi
movl %ecx,8(%ebp)
movl %edx,12(%ebp)
cmpl %esi,%edi
jae L000start
popl %eax
popl %ebx
popl %ebp
popl %edi
popl %esi
ret
#endif // !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) && defined(__APPLE__)

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,680 @@
// This file is generated from a similarly-named Perl script in the BoringSSL
// source tree. Do not edit by hand.
#include <openssl/asm_base.h>
#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) && defined(__APPLE__)
.text
#ifdef BORINGSSL_DISPATCH_TEST
#endif
.align 6,0x90
L_vpaes_consts:
.long 218628480,235210255,168496130,67568393
.long 252381056,17041926,33884169,51187212
.long 252645135,252645135,252645135,252645135
.long 1512730624,3266504856,1377990664,3401244816
.long 830229760,1275146365,2969422977,3447763452
.long 3411033600,2979783055,338359620,2782886510
.long 4209124096,907596821,221174255,1006095553
.long 191964160,3799684038,3164090317,1589111125
.long 182528256,1777043520,2877432650,3265356744
.long 1874708224,3503451415,3305285752,363511674
.long 1606117888,3487855781,1093350906,2384367825
.long 197121,67569157,134941193,202313229
.long 67569157,134941193,202313229,197121
.long 134941193,202313229,197121,67569157
.long 202313229,197121,67569157,134941193
.long 33619971,100992007,168364043,235736079
.long 235736079,33619971,100992007,168364043
.long 168364043,235736079,33619971,100992007
.long 100992007,168364043,235736079,33619971
.long 50462976,117835012,185207048,252579084
.long 252314880,51251460,117574920,184942860
.long 184682752,252054788,50987272,118359308
.long 118099200,185467140,251790600,50727180
.long 2946363062,528716217,1300004225,1881839624
.long 1532713819,1532713819,1532713819,1532713819
.long 3602276352,4288629033,3737020424,4153884961
.long 1354558464,32357713,2958822624,3775749553
.long 1201988352,132424512,1572796698,503232858
.long 2213177600,1597421020,4103937655,675398315
.long 2749646592,4273543773,1511898873,121693092
.long 3040248576,1103263732,2871565598,1608280554
.long 2236667136,2588920351,482954393,64377734
.long 3069987328,291237287,2117370568,3650299247
.long 533321216,3573750986,2572112006,1401264716
.long 1339849704,2721158661,548607111,3445553514
.long 2128193280,3054596040,2183486460,1257083700
.long 655635200,1165381986,3923443150,2344132524
.long 190078720,256924420,290342170,357187870
.long 1610966272,2263057382,4103205268,309794674
.long 2592527872,2233205587,1335446729,3402964816
.long 3973531904,3225098121,3002836325,1918774430
.long 3870401024,2102906079,2284471353,4117666579
.long 617007872,1021508343,366931923,691083277
.long 2528395776,3491914898,2968704004,1613121270
.long 3445188352,3247741094,844474987,4093578302
.long 651481088,1190302358,1689581232,574775300
.long 4289380608,206939853,2555985458,2489840491
.long 2130264064,327674451,3566485037,3349835193
.long 2470714624,316102159,3636825756,3393945945
.byte 86,101,99,116,111,114,32,80,101,114,109,117,116,97,116,105
.byte 111,110,32,65,69,83,32,102,111,114,32,120,56,54,47,83
.byte 83,83,69,51,44,32,77,105,107,101,32,72,97,109,98,117
.byte 114,103,32,40,83,116,97,110,102,111,114,100,32,85,110,105
.byte 118,101,114,115,105,116,121,41,0
.align 6,0x90
.private_extern __vpaes_preheat
.align 4
__vpaes_preheat:
addl (%esp),%ebp
movdqa -48(%ebp),%xmm7
movdqa -16(%ebp),%xmm6
ret
.private_extern __vpaes_encrypt_core
.align 4
__vpaes_encrypt_core:
movl $16,%ecx
movl 240(%edx),%eax
movdqa %xmm6,%xmm1
movdqa (%ebp),%xmm2
pandn %xmm0,%xmm1
pand %xmm6,%xmm0
movdqu (%edx),%xmm5
.byte 102,15,56,0,208
movdqa 16(%ebp),%xmm0
pxor %xmm5,%xmm2
psrld $4,%xmm1
addl $16,%edx
.byte 102,15,56,0,193
leal 192(%ebp),%ebx
pxor %xmm2,%xmm0
jmp L000enc_entry
.align 4,0x90
L001enc_loop:
movdqa 32(%ebp),%xmm4
movdqa 48(%ebp),%xmm0
.byte 102,15,56,0,226
.byte 102,15,56,0,195
pxor %xmm5,%xmm4
movdqa 64(%ebp),%xmm5
pxor %xmm4,%xmm0
movdqa -64(%ebx,%ecx,1),%xmm1
.byte 102,15,56,0,234
movdqa 80(%ebp),%xmm2
movdqa (%ebx,%ecx,1),%xmm4
.byte 102,15,56,0,211
movdqa %xmm0,%xmm3
pxor %xmm5,%xmm2
.byte 102,15,56,0,193
addl $16,%edx
pxor %xmm2,%xmm0
.byte 102,15,56,0,220
addl $16,%ecx
pxor %xmm0,%xmm3
.byte 102,15,56,0,193
andl $48,%ecx
subl $1,%eax
pxor %xmm3,%xmm0
L000enc_entry:
movdqa %xmm6,%xmm1
movdqa -32(%ebp),%xmm5
pandn %xmm0,%xmm1
psrld $4,%xmm1
pand %xmm6,%xmm0
.byte 102,15,56,0,232
movdqa %xmm7,%xmm3
pxor %xmm1,%xmm0
.byte 102,15,56,0,217
movdqa %xmm7,%xmm4
pxor %xmm5,%xmm3
.byte 102,15,56,0,224
movdqa %xmm7,%xmm2
pxor %xmm5,%xmm4
.byte 102,15,56,0,211
movdqa %xmm7,%xmm3
pxor %xmm0,%xmm2
.byte 102,15,56,0,220
movdqu (%edx),%xmm5
pxor %xmm1,%xmm3
jnz L001enc_loop
movdqa 96(%ebp),%xmm4
movdqa 112(%ebp),%xmm0
.byte 102,15,56,0,226
pxor %xmm5,%xmm4
.byte 102,15,56,0,195
movdqa 64(%ebx,%ecx,1),%xmm1
pxor %xmm4,%xmm0
.byte 102,15,56,0,193
ret
.private_extern __vpaes_decrypt_core
.align 4
__vpaes_decrypt_core:
leal 608(%ebp),%ebx
movl 240(%edx),%eax
movdqa %xmm6,%xmm1
movdqa -64(%ebx),%xmm2
pandn %xmm0,%xmm1
movl %eax,%ecx
psrld $4,%xmm1
movdqu (%edx),%xmm5
shll $4,%ecx
pand %xmm6,%xmm0
.byte 102,15,56,0,208
movdqa -48(%ebx),%xmm0
xorl $48,%ecx
.byte 102,15,56,0,193
andl $48,%ecx
pxor %xmm5,%xmm2
movdqa 176(%ebp),%xmm5
pxor %xmm2,%xmm0
addl $16,%edx
leal -352(%ebx,%ecx,1),%ecx
jmp L002dec_entry
.align 4,0x90
L003dec_loop:
movdqa -32(%ebx),%xmm4
movdqa -16(%ebx),%xmm1
.byte 102,15,56,0,226
.byte 102,15,56,0,203
pxor %xmm4,%xmm0
movdqa (%ebx),%xmm4
pxor %xmm1,%xmm0
movdqa 16(%ebx),%xmm1
.byte 102,15,56,0,226
.byte 102,15,56,0,197
.byte 102,15,56,0,203
pxor %xmm4,%xmm0
movdqa 32(%ebx),%xmm4
pxor %xmm1,%xmm0
movdqa 48(%ebx),%xmm1
.byte 102,15,56,0,226
.byte 102,15,56,0,197
.byte 102,15,56,0,203
pxor %xmm4,%xmm0
movdqa 64(%ebx),%xmm4
pxor %xmm1,%xmm0
movdqa 80(%ebx),%xmm1
.byte 102,15,56,0,226
.byte 102,15,56,0,197
.byte 102,15,56,0,203
pxor %xmm4,%xmm0
addl $16,%edx
.byte 102,15,58,15,237,12
pxor %xmm1,%xmm0
subl $1,%eax
L002dec_entry:
movdqa %xmm6,%xmm1
movdqa -32(%ebp),%xmm2
pandn %xmm0,%xmm1
pand %xmm6,%xmm0
psrld $4,%xmm1
.byte 102,15,56,0,208
movdqa %xmm7,%xmm3
pxor %xmm1,%xmm0
.byte 102,15,56,0,217
movdqa %xmm7,%xmm4
pxor %xmm2,%xmm3
.byte 102,15,56,0,224
pxor %xmm2,%xmm4
movdqa %xmm7,%xmm2
.byte 102,15,56,0,211
movdqa %xmm7,%xmm3
pxor %xmm0,%xmm2
.byte 102,15,56,0,220
movdqu (%edx),%xmm0
pxor %xmm1,%xmm3
jnz L003dec_loop
movdqa 96(%ebx),%xmm4
.byte 102,15,56,0,226
pxor %xmm0,%xmm4
movdqa 112(%ebx),%xmm0
movdqa (%ecx),%xmm2
.byte 102,15,56,0,195
pxor %xmm4,%xmm0
.byte 102,15,56,0,194
ret
.private_extern __vpaes_schedule_core
.align 4
__vpaes_schedule_core:
addl (%esp),%ebp
movdqu (%esi),%xmm0
movdqa 320(%ebp),%xmm2
movdqa %xmm0,%xmm3
leal (%ebp),%ebx
movdqa %xmm2,4(%esp)
call __vpaes_schedule_transform
movdqa %xmm0,%xmm7
testl %edi,%edi
jnz L004schedule_am_decrypting
movdqu %xmm0,(%edx)
jmp L005schedule_go
L004schedule_am_decrypting:
movdqa 256(%ebp,%ecx,1),%xmm1
.byte 102,15,56,0,217
movdqu %xmm3,(%edx)
xorl $48,%ecx
L005schedule_go:
cmpl $192,%eax
ja L006schedule_256
je L007schedule_192
L008schedule_128:
movl $10,%eax
L009loop_schedule_128:
call __vpaes_schedule_round
decl %eax
jz L010schedule_mangle_last
call __vpaes_schedule_mangle
jmp L009loop_schedule_128
.align 4,0x90
L007schedule_192:
movdqu 8(%esi),%xmm0
call __vpaes_schedule_transform
movdqa %xmm0,%xmm6
pxor %xmm4,%xmm4
movhlps %xmm4,%xmm6
movl $4,%eax
L011loop_schedule_192:
call __vpaes_schedule_round
.byte 102,15,58,15,198,8
call __vpaes_schedule_mangle
call __vpaes_schedule_192_smear
call __vpaes_schedule_mangle
call __vpaes_schedule_round
decl %eax
jz L010schedule_mangle_last
call __vpaes_schedule_mangle
call __vpaes_schedule_192_smear
jmp L011loop_schedule_192
.align 4,0x90
L006schedule_256:
movdqu 16(%esi),%xmm0
call __vpaes_schedule_transform
movl $7,%eax
L012loop_schedule_256:
call __vpaes_schedule_mangle
movdqa %xmm0,%xmm6
call __vpaes_schedule_round
decl %eax
jz L010schedule_mangle_last
call __vpaes_schedule_mangle
pshufd $255,%xmm0,%xmm0
movdqa %xmm7,20(%esp)
movdqa %xmm6,%xmm7
call L_vpaes_schedule_low_round
movdqa 20(%esp),%xmm7
jmp L012loop_schedule_256
.align 4,0x90
L010schedule_mangle_last:
leal 384(%ebp),%ebx
testl %edi,%edi
jnz L013schedule_mangle_last_dec
movdqa 256(%ebp,%ecx,1),%xmm1
.byte 102,15,56,0,193
leal 352(%ebp),%ebx
addl $32,%edx
L013schedule_mangle_last_dec:
addl $-16,%edx
pxor 336(%ebp),%xmm0
call __vpaes_schedule_transform
movdqu %xmm0,(%edx)
pxor %xmm0,%xmm0
pxor %xmm1,%xmm1
pxor %xmm2,%xmm2
pxor %xmm3,%xmm3
pxor %xmm4,%xmm4
pxor %xmm5,%xmm5
pxor %xmm6,%xmm6
pxor %xmm7,%xmm7
ret
.private_extern __vpaes_schedule_192_smear
.align 4
__vpaes_schedule_192_smear:
pshufd $128,%xmm6,%xmm1
pshufd $254,%xmm7,%xmm0
pxor %xmm1,%xmm6
pxor %xmm1,%xmm1
pxor %xmm0,%xmm6
movdqa %xmm6,%xmm0
movhlps %xmm1,%xmm6
ret
.private_extern __vpaes_schedule_round
.align 4
__vpaes_schedule_round:
movdqa 8(%esp),%xmm2
pxor %xmm1,%xmm1
.byte 102,15,58,15,202,15
.byte 102,15,58,15,210,15
pxor %xmm1,%xmm7
pshufd $255,%xmm0,%xmm0
.byte 102,15,58,15,192,1
movdqa %xmm2,8(%esp)
L_vpaes_schedule_low_round:
movdqa %xmm7,%xmm1
pslldq $4,%xmm7
pxor %xmm1,%xmm7
movdqa %xmm7,%xmm1
pslldq $8,%xmm7
pxor %xmm1,%xmm7
pxor 336(%ebp),%xmm7
movdqa -16(%ebp),%xmm4
movdqa -48(%ebp),%xmm5
movdqa %xmm4,%xmm1
pandn %xmm0,%xmm1
psrld $4,%xmm1
pand %xmm4,%xmm0
movdqa -32(%ebp),%xmm2
.byte 102,15,56,0,208
pxor %xmm1,%xmm0
movdqa %xmm5,%xmm3
.byte 102,15,56,0,217
pxor %xmm2,%xmm3
movdqa %xmm5,%xmm4
.byte 102,15,56,0,224
pxor %xmm2,%xmm4
movdqa %xmm5,%xmm2
.byte 102,15,56,0,211
pxor %xmm0,%xmm2
movdqa %xmm5,%xmm3
.byte 102,15,56,0,220
pxor %xmm1,%xmm3
movdqa 32(%ebp),%xmm4
.byte 102,15,56,0,226
movdqa 48(%ebp),%xmm0
.byte 102,15,56,0,195
pxor %xmm4,%xmm0
pxor %xmm7,%xmm0
movdqa %xmm0,%xmm7
ret
.private_extern __vpaes_schedule_transform
.align 4
__vpaes_schedule_transform:
movdqa -16(%ebp),%xmm2
movdqa %xmm2,%xmm1
pandn %xmm0,%xmm1
psrld $4,%xmm1
pand %xmm2,%xmm0
movdqa (%ebx),%xmm2
.byte 102,15,56,0,208
movdqa 16(%ebx),%xmm0
.byte 102,15,56,0,193
pxor %xmm2,%xmm0
ret
.private_extern __vpaes_schedule_mangle
.align 4
__vpaes_schedule_mangle:
movdqa %xmm0,%xmm4
movdqa 128(%ebp),%xmm5
testl %edi,%edi
jnz L014schedule_mangle_dec
addl $16,%edx
pxor 336(%ebp),%xmm4
.byte 102,15,56,0,229
movdqa %xmm4,%xmm3
.byte 102,15,56,0,229
pxor %xmm4,%xmm3
.byte 102,15,56,0,229
pxor %xmm4,%xmm3
jmp L015schedule_mangle_both
.align 4,0x90
L014schedule_mangle_dec:
movdqa -16(%ebp),%xmm2
leal 416(%ebp),%esi
movdqa %xmm2,%xmm1
pandn %xmm4,%xmm1
psrld $4,%xmm1
pand %xmm2,%xmm4
movdqa (%esi),%xmm2
.byte 102,15,56,0,212
movdqa 16(%esi),%xmm3
.byte 102,15,56,0,217
pxor %xmm2,%xmm3
.byte 102,15,56,0,221
movdqa 32(%esi),%xmm2
.byte 102,15,56,0,212
pxor %xmm3,%xmm2
movdqa 48(%esi),%xmm3
.byte 102,15,56,0,217
pxor %xmm2,%xmm3
.byte 102,15,56,0,221
movdqa 64(%esi),%xmm2
.byte 102,15,56,0,212
pxor %xmm3,%xmm2
movdqa 80(%esi),%xmm3
.byte 102,15,56,0,217
pxor %xmm2,%xmm3
.byte 102,15,56,0,221
movdqa 96(%esi),%xmm2
.byte 102,15,56,0,212
pxor %xmm3,%xmm2
movdqa 112(%esi),%xmm3
.byte 102,15,56,0,217
pxor %xmm2,%xmm3
addl $-16,%edx
L015schedule_mangle_both:
movdqa 256(%ebp,%ecx,1),%xmm1
.byte 102,15,56,0,217
addl $-16,%ecx
andl $48,%ecx
movdqu %xmm3,(%edx)
ret
.globl _vpaes_set_encrypt_key
.private_extern _vpaes_set_encrypt_key
.align 4
_vpaes_set_encrypt_key:
L_vpaes_set_encrypt_key_begin:
pushl %ebp
pushl %ebx
pushl %esi
pushl %edi
#ifdef BORINGSSL_DISPATCH_TEST
pushl %ebx
pushl %edx
call L016pic
L016pic:
popl %ebx
leal _BORINGSSL_function_hit+5-L016pic(%ebx),%ebx
movl $1,%edx
movb %dl,(%ebx)
popl %edx
popl %ebx
#endif
movl 20(%esp),%esi
leal -56(%esp),%ebx
movl 24(%esp),%eax
andl $-16,%ebx
movl 28(%esp),%edx
xchgl %esp,%ebx
movl %ebx,48(%esp)
movl %eax,%ebx
shrl $5,%ebx
addl $5,%ebx
movl %ebx,240(%edx)
movl $48,%ecx
movl $0,%edi
leal L_vpaes_consts+0x30-L017pic_point,%ebp
call __vpaes_schedule_core
L017pic_point:
movl 48(%esp),%esp
xorl %eax,%eax
popl %edi
popl %esi
popl %ebx
popl %ebp
ret
.globl _vpaes_set_decrypt_key
.private_extern _vpaes_set_decrypt_key
.align 4
_vpaes_set_decrypt_key:
L_vpaes_set_decrypt_key_begin:
pushl %ebp
pushl %ebx
pushl %esi
pushl %edi
movl 20(%esp),%esi
leal -56(%esp),%ebx
movl 24(%esp),%eax
andl $-16,%ebx
movl 28(%esp),%edx
xchgl %esp,%ebx
movl %ebx,48(%esp)
movl %eax,%ebx
shrl $5,%ebx
addl $5,%ebx
movl %ebx,240(%edx)
shll $4,%ebx
leal 16(%edx,%ebx,1),%edx
movl $1,%edi
movl %eax,%ecx
shrl $1,%ecx
andl $32,%ecx
xorl $32,%ecx
leal L_vpaes_consts+0x30-L018pic_point,%ebp
call __vpaes_schedule_core
L018pic_point:
movl 48(%esp),%esp
xorl %eax,%eax
popl %edi
popl %esi
popl %ebx
popl %ebp
ret
.globl _vpaes_encrypt
.private_extern _vpaes_encrypt
.align 4
_vpaes_encrypt:
L_vpaes_encrypt_begin:
pushl %ebp
pushl %ebx
pushl %esi
pushl %edi
#ifdef BORINGSSL_DISPATCH_TEST
pushl %ebx
pushl %edx
call L019pic
L019pic:
popl %ebx
leal _BORINGSSL_function_hit+4-L019pic(%ebx),%ebx
movl $1,%edx
movb %dl,(%ebx)
popl %edx
popl %ebx
#endif
leal L_vpaes_consts+0x30-L020pic_point,%ebp
call __vpaes_preheat
L020pic_point:
movl 20(%esp),%esi
leal -56(%esp),%ebx
movl 24(%esp),%edi
andl $-16,%ebx
movl 28(%esp),%edx
xchgl %esp,%ebx
movl %ebx,48(%esp)
movdqu (%esi),%xmm0
call __vpaes_encrypt_core
movdqu %xmm0,(%edi)
movl 48(%esp),%esp
popl %edi
popl %esi
popl %ebx
popl %ebp
ret
.globl _vpaes_decrypt
.private_extern _vpaes_decrypt
.align 4
_vpaes_decrypt:
L_vpaes_decrypt_begin:
pushl %ebp
pushl %ebx
pushl %esi
pushl %edi
leal L_vpaes_consts+0x30-L021pic_point,%ebp
call __vpaes_preheat
L021pic_point:
movl 20(%esp),%esi
leal -56(%esp),%ebx
movl 24(%esp),%edi
andl $-16,%ebx
movl 28(%esp),%edx
xchgl %esp,%ebx
movl %ebx,48(%esp)
movdqu (%esi),%xmm0
call __vpaes_decrypt_core
movdqu %xmm0,(%edi)
movl 48(%esp),%esp
popl %edi
popl %esi
popl %ebx
popl %ebp
ret
.globl _vpaes_cbc_encrypt
.private_extern _vpaes_cbc_encrypt
.align 4
_vpaes_cbc_encrypt:
L_vpaes_cbc_encrypt_begin:
pushl %ebp
pushl %ebx
pushl %esi
pushl %edi
movl 20(%esp),%esi
movl 24(%esp),%edi
movl 28(%esp),%eax
movl 32(%esp),%edx
subl $16,%eax
jc L022cbc_abort
leal -56(%esp),%ebx
movl 36(%esp),%ebp
andl $-16,%ebx
movl 40(%esp),%ecx
xchgl %esp,%ebx
movdqu (%ebp),%xmm1
subl %esi,%edi
movl %ebx,48(%esp)
movl %edi,(%esp)
movl %edx,4(%esp)
movl %ebp,8(%esp)
movl %eax,%edi
leal L_vpaes_consts+0x30-L023pic_point,%ebp
call __vpaes_preheat
L023pic_point:
cmpl $0,%ecx
je L024cbc_dec_loop
jmp L025cbc_enc_loop
.align 4,0x90
L025cbc_enc_loop:
movdqu (%esi),%xmm0
pxor %xmm1,%xmm0
call __vpaes_encrypt_core
movl (%esp),%ebx
movl 4(%esp),%edx
movdqa %xmm0,%xmm1
movdqu %xmm0,(%ebx,%esi,1)
leal 16(%esi),%esi
subl $16,%edi
jnc L025cbc_enc_loop
jmp L026cbc_done
.align 4,0x90
L024cbc_dec_loop:
movdqu (%esi),%xmm0
movdqa %xmm1,16(%esp)
movdqa %xmm0,32(%esp)
call __vpaes_decrypt_core
movl (%esp),%ebx
movl 4(%esp),%edx
pxor 16(%esp),%xmm0
movdqa 32(%esp),%xmm1
movdqu %xmm0,(%ebx,%esi,1)
leal 16(%esi),%esi
subl $16,%edi
jnc L024cbc_dec_loop
L026cbc_done:
movl 8(%esp),%ebx
movl 48(%esp),%esp
movdqu %xmm1,(%ebx)
L022cbc_abort:
popl %edi
popl %esi
popl %ebx
popl %ebp
ret
#endif // !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) && defined(__APPLE__)

View File

@ -0,0 +1,484 @@
// This file is generated from a similarly-named Perl script in the BoringSSL
// source tree. Do not edit by hand.
#include <openssl/asm_base.h>
#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) && defined(__APPLE__)
.text
.globl _bn_mul_mont
.private_extern _bn_mul_mont
.align 4
_bn_mul_mont:
L_bn_mul_mont_begin:
pushl %ebp
pushl %ebx
pushl %esi
pushl %edi
xorl %eax,%eax
movl 40(%esp),%edi
cmpl $4,%edi
jl L000just_leave
leal 20(%esp),%esi
leal 24(%esp),%edx
addl $2,%edi
negl %edi
leal -32(%esp,%edi,4),%ebp
negl %edi
movl %ebp,%eax
subl %edx,%eax
andl $2047,%eax
subl %eax,%ebp
xorl %ebp,%edx
andl $2048,%edx
xorl $2048,%edx
subl %edx,%ebp
andl $-64,%ebp
movl %esp,%eax
subl %ebp,%eax
andl $-4096,%eax
movl %esp,%edx
leal (%ebp,%eax,1),%esp
movl (%esp),%eax
cmpl %ebp,%esp
ja L001page_walk
jmp L002page_walk_done
.align 4,0x90
L001page_walk:
leal -4096(%esp),%esp
movl (%esp),%eax
cmpl %ebp,%esp
ja L001page_walk
L002page_walk_done:
movl (%esi),%eax
movl 4(%esi),%ebx
movl 8(%esi),%ecx
movl 12(%esi),%ebp
movl 16(%esi),%esi
movl (%esi),%esi
movl %eax,4(%esp)
movl %ebx,8(%esp)
movl %ecx,12(%esp)
movl %ebp,16(%esp)
movl %esi,20(%esp)
leal -3(%edi),%ebx
movl %edx,24(%esp)
call L003PIC_me_up
L003PIC_me_up:
popl %eax
movl L_OPENSSL_ia32cap_P$non_lazy_ptr-L003PIC_me_up(%eax),%eax
btl $26,(%eax)
jnc L004non_sse2
movl $-1,%eax
movd %eax,%mm7
movl 8(%esp),%esi
movl 12(%esp),%edi
movl 16(%esp),%ebp
xorl %edx,%edx
xorl %ecx,%ecx
movd (%edi),%mm4
movd (%esi),%mm5
movd (%ebp),%mm3
pmuludq %mm4,%mm5
movq %mm5,%mm2
movq %mm5,%mm0
pand %mm7,%mm0
pmuludq 20(%esp),%mm5
pmuludq %mm5,%mm3
paddq %mm0,%mm3
movd 4(%ebp),%mm1
movd 4(%esi),%mm0
psrlq $32,%mm2
psrlq $32,%mm3
incl %ecx
.align 4,0x90
L0051st:
pmuludq %mm4,%mm0
pmuludq %mm5,%mm1
paddq %mm0,%mm2
paddq %mm1,%mm3
movq %mm2,%mm0
pand %mm7,%mm0
movd 4(%ebp,%ecx,4),%mm1
paddq %mm0,%mm3
movd 4(%esi,%ecx,4),%mm0
psrlq $32,%mm2
movd %mm3,28(%esp,%ecx,4)
psrlq $32,%mm3
leal 1(%ecx),%ecx
cmpl %ebx,%ecx
jl L0051st
pmuludq %mm4,%mm0
pmuludq %mm5,%mm1
paddq %mm0,%mm2
paddq %mm1,%mm3
movq %mm2,%mm0
pand %mm7,%mm0
paddq %mm0,%mm3
movd %mm3,28(%esp,%ecx,4)
psrlq $32,%mm2
psrlq $32,%mm3
paddq %mm2,%mm3
movq %mm3,32(%esp,%ebx,4)
incl %edx
L006outer:
xorl %ecx,%ecx
movd (%edi,%edx,4),%mm4
movd (%esi),%mm5
movd 32(%esp),%mm6
movd (%ebp),%mm3
pmuludq %mm4,%mm5
paddq %mm6,%mm5
movq %mm5,%mm0
movq %mm5,%mm2
pand %mm7,%mm0
pmuludq 20(%esp),%mm5
pmuludq %mm5,%mm3
paddq %mm0,%mm3
movd 36(%esp),%mm6
movd 4(%ebp),%mm1
movd 4(%esi),%mm0
psrlq $32,%mm2
psrlq $32,%mm3
paddq %mm6,%mm2
incl %ecx
decl %ebx
L007inner:
pmuludq %mm4,%mm0
pmuludq %mm5,%mm1
paddq %mm0,%mm2
paddq %mm1,%mm3
movq %mm2,%mm0
movd 36(%esp,%ecx,4),%mm6
pand %mm7,%mm0
movd 4(%ebp,%ecx,4),%mm1
paddq %mm0,%mm3
movd 4(%esi,%ecx,4),%mm0
psrlq $32,%mm2
movd %mm3,28(%esp,%ecx,4)
psrlq $32,%mm3
paddq %mm6,%mm2
decl %ebx
leal 1(%ecx),%ecx
jnz L007inner
movl %ecx,%ebx
pmuludq %mm4,%mm0
pmuludq %mm5,%mm1
paddq %mm0,%mm2
paddq %mm1,%mm3
movq %mm2,%mm0
pand %mm7,%mm0
paddq %mm0,%mm3
movd %mm3,28(%esp,%ecx,4)
psrlq $32,%mm2
psrlq $32,%mm3
movd 36(%esp,%ebx,4),%mm6
paddq %mm2,%mm3
paddq %mm6,%mm3
movq %mm3,32(%esp,%ebx,4)
leal 1(%edx),%edx
cmpl %ebx,%edx
jle L006outer
emms
jmp L008common_tail
.align 4,0x90
L004non_sse2:
movl 8(%esp),%esi
leal 1(%ebx),%ebp
movl 12(%esp),%edi
xorl %ecx,%ecx
movl %esi,%edx
andl $1,%ebp
subl %edi,%edx
leal 4(%edi,%ebx,4),%eax
orl %edx,%ebp
movl (%edi),%edi
jz L009bn_sqr_mont
movl %eax,28(%esp)
movl (%esi),%eax
xorl %edx,%edx
.align 4,0x90
L010mull:
movl %edx,%ebp
mull %edi
addl %eax,%ebp
leal 1(%ecx),%ecx
adcl $0,%edx
movl (%esi,%ecx,4),%eax
cmpl %ebx,%ecx
movl %ebp,28(%esp,%ecx,4)
jl L010mull
movl %edx,%ebp
mull %edi
movl 20(%esp),%edi
addl %ebp,%eax
movl 16(%esp),%esi
adcl $0,%edx
imull 32(%esp),%edi
movl %eax,32(%esp,%ebx,4)
xorl %ecx,%ecx
movl %edx,36(%esp,%ebx,4)
movl %ecx,40(%esp,%ebx,4)
movl (%esi),%eax
mull %edi
addl 32(%esp),%eax
movl 4(%esi),%eax
adcl $0,%edx
incl %ecx
jmp L0112ndmadd
.align 4,0x90
L0121stmadd:
movl %edx,%ebp
mull %edi
addl 32(%esp,%ecx,4),%ebp
leal 1(%ecx),%ecx
adcl $0,%edx
addl %eax,%ebp
movl (%esi,%ecx,4),%eax
adcl $0,%edx
cmpl %ebx,%ecx
movl %ebp,28(%esp,%ecx,4)
jl L0121stmadd
movl %edx,%ebp
mull %edi
addl 32(%esp,%ebx,4),%eax
movl 20(%esp),%edi
adcl $0,%edx
movl 16(%esp),%esi
addl %eax,%ebp
adcl $0,%edx
imull 32(%esp),%edi
xorl %ecx,%ecx
addl 36(%esp,%ebx,4),%edx
movl %ebp,32(%esp,%ebx,4)
adcl $0,%ecx
movl (%esi),%eax
movl %edx,36(%esp,%ebx,4)
movl %ecx,40(%esp,%ebx,4)
mull %edi
addl 32(%esp),%eax
movl 4(%esi),%eax
adcl $0,%edx
movl $1,%ecx
.align 4,0x90
L0112ndmadd:
movl %edx,%ebp
mull %edi
addl 32(%esp,%ecx,4),%ebp
leal 1(%ecx),%ecx
adcl $0,%edx
addl %eax,%ebp
movl (%esi,%ecx,4),%eax
adcl $0,%edx
cmpl %ebx,%ecx
movl %ebp,24(%esp,%ecx,4)
jl L0112ndmadd
movl %edx,%ebp
mull %edi
addl 32(%esp,%ebx,4),%ebp
adcl $0,%edx
addl %eax,%ebp
adcl $0,%edx
movl %ebp,28(%esp,%ebx,4)
xorl %eax,%eax
movl 12(%esp),%ecx
addl 36(%esp,%ebx,4),%edx
adcl 40(%esp,%ebx,4),%eax
leal 4(%ecx),%ecx
movl %edx,32(%esp,%ebx,4)
cmpl 28(%esp),%ecx
movl %eax,36(%esp,%ebx,4)
je L008common_tail
movl (%ecx),%edi
movl 8(%esp),%esi
movl %ecx,12(%esp)
xorl %ecx,%ecx
xorl %edx,%edx
movl (%esi),%eax
jmp L0121stmadd
.align 4,0x90
L009bn_sqr_mont:
movl %ebx,(%esp)
movl %ecx,12(%esp)
movl %edi,%eax
mull %edi
movl %eax,32(%esp)
movl %edx,%ebx
shrl $1,%edx
andl $1,%ebx
incl %ecx
.align 4,0x90
L013sqr:
movl (%esi,%ecx,4),%eax
movl %edx,%ebp
mull %edi
addl %ebp,%eax
leal 1(%ecx),%ecx
adcl $0,%edx
leal (%ebx,%eax,2),%ebp
shrl $31,%eax
cmpl (%esp),%ecx
movl %eax,%ebx
movl %ebp,28(%esp,%ecx,4)
jl L013sqr
movl (%esi,%ecx,4),%eax
movl %edx,%ebp
mull %edi
addl %ebp,%eax
movl 20(%esp),%edi
adcl $0,%edx
movl 16(%esp),%esi
leal (%ebx,%eax,2),%ebp
imull 32(%esp),%edi
shrl $31,%eax
movl %ebp,32(%esp,%ecx,4)
leal (%eax,%edx,2),%ebp
movl (%esi),%eax
shrl $31,%edx
movl %ebp,36(%esp,%ecx,4)
movl %edx,40(%esp,%ecx,4)
mull %edi
addl 32(%esp),%eax
movl %ecx,%ebx
adcl $0,%edx
movl 4(%esi),%eax
movl $1,%ecx
.align 4,0x90
L0143rdmadd:
movl %edx,%ebp
mull %edi
addl 32(%esp,%ecx,4),%ebp
adcl $0,%edx
addl %eax,%ebp
movl 4(%esi,%ecx,4),%eax
adcl $0,%edx
movl %ebp,28(%esp,%ecx,4)
movl %edx,%ebp
mull %edi
addl 36(%esp,%ecx,4),%ebp
leal 2(%ecx),%ecx
adcl $0,%edx
addl %eax,%ebp
movl (%esi,%ecx,4),%eax
adcl $0,%edx
cmpl %ebx,%ecx
movl %ebp,24(%esp,%ecx,4)
jl L0143rdmadd
movl %edx,%ebp
mull %edi
addl 32(%esp,%ebx,4),%ebp
adcl $0,%edx
addl %eax,%ebp
adcl $0,%edx
movl %ebp,28(%esp,%ebx,4)
movl 12(%esp),%ecx
xorl %eax,%eax
movl 8(%esp),%esi
addl 36(%esp,%ebx,4),%edx
adcl 40(%esp,%ebx,4),%eax
movl %edx,32(%esp,%ebx,4)
cmpl %ebx,%ecx
movl %eax,36(%esp,%ebx,4)
je L008common_tail
movl 4(%esi,%ecx,4),%edi
leal 1(%ecx),%ecx
movl %edi,%eax
movl %ecx,12(%esp)
mull %edi
addl 32(%esp,%ecx,4),%eax
adcl $0,%edx
movl %eax,32(%esp,%ecx,4)
xorl %ebp,%ebp
cmpl %ebx,%ecx
leal 1(%ecx),%ecx
je L015sqrlast
movl %edx,%ebx
shrl $1,%edx
andl $1,%ebx
.align 4,0x90
L016sqradd:
movl (%esi,%ecx,4),%eax
movl %edx,%ebp
mull %edi
addl %ebp,%eax
leal (%eax,%eax,1),%ebp
adcl $0,%edx
shrl $31,%eax
addl 32(%esp,%ecx,4),%ebp
leal 1(%ecx),%ecx
adcl $0,%eax
addl %ebx,%ebp
adcl $0,%eax
cmpl (%esp),%ecx
movl %ebp,28(%esp,%ecx,4)
movl %eax,%ebx
jle L016sqradd
movl %edx,%ebp
addl %edx,%edx
shrl $31,%ebp
addl %ebx,%edx
adcl $0,%ebp
L015sqrlast:
movl 20(%esp),%edi
movl 16(%esp),%esi
imull 32(%esp),%edi
addl 32(%esp,%ecx,4),%edx
movl (%esi),%eax
adcl $0,%ebp
movl %edx,32(%esp,%ecx,4)
movl %ebp,36(%esp,%ecx,4)
mull %edi
addl 32(%esp),%eax
leal -1(%ecx),%ebx
adcl $0,%edx
movl $1,%ecx
movl 4(%esi),%eax
jmp L0143rdmadd
.align 4,0x90
L008common_tail:
movl 16(%esp),%ebp
movl 4(%esp),%edi
leal 32(%esp),%esi
movl (%esi),%eax
movl %ebx,%ecx
xorl %edx,%edx
.align 4,0x90
L017sub:
sbbl (%ebp,%edx,4),%eax
movl %eax,(%edi,%edx,4)
decl %ecx
movl 4(%esi,%edx,4),%eax
leal 1(%edx),%edx
jge L017sub
sbbl $0,%eax
movl $-1,%edx
xorl %eax,%edx
jmp L018copy
.align 4,0x90
L018copy:
movl 32(%esp,%ebx,4),%esi
movl (%edi,%ebx,4),%ebp
movl %ecx,32(%esp,%ebx,4)
andl %eax,%esi
andl %edx,%ebp
orl %esi,%ebp
movl %ebp,(%edi,%ebx,4)
decl %ebx
jge L018copy
movl 24(%esp),%esp
movl $1,%eax
L000just_leave:
popl %edi
popl %esi
popl %ebx
popl %ebp
ret
.byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105
.byte 112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56
.byte 54,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121
.byte 32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46
.byte 111,114,103,62,0
.section __IMPORT,__pointers,non_lazy_symbol_pointers
L_OPENSSL_ia32cap_P$non_lazy_ptr:
.indirect_symbol _OPENSSL_ia32cap_P
.long 0
#endif // !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) && defined(__APPLE__)

View File

@ -0,0 +1,168 @@
// This file is generated from a similarly-named Perl script in the BoringSSL
// source tree. Do not edit by hand.
#include <openssl/asm_base.h>
#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) && defined(__APPLE__)
.text
.globl _abi_test_trampoline
.private_extern _abi_test_trampoline
.align 4
_abi_test_trampoline:
L_abi_test_trampoline_begin:
pushl %ebp
pushl %ebx
pushl %esi
pushl %edi
movl 24(%esp),%ecx
movl (%ecx),%esi
movl 4(%ecx),%edi
movl 8(%ecx),%ebx
movl 12(%ecx),%ebp
subl $44,%esp
movl 72(%esp),%eax
xorl %ecx,%ecx
L000loop:
cmpl 76(%esp),%ecx
jae L001loop_done
movl (%eax,%ecx,4),%edx
movl %edx,(%esp,%ecx,4)
addl $1,%ecx
jmp L000loop
L001loop_done:
call *64(%esp)
addl $44,%esp
movl 24(%esp),%ecx
movl %esi,(%ecx)
movl %edi,4(%ecx)
movl %ebx,8(%ecx)
movl %ebp,12(%ecx)
popl %edi
popl %esi
popl %ebx
popl %ebp
ret
.globl _abi_test_get_and_clear_direction_flag
.private_extern _abi_test_get_and_clear_direction_flag
.align 4
_abi_test_get_and_clear_direction_flag:
L_abi_test_get_and_clear_direction_flag_begin:
pushfl
popl %eax
andl $1024,%eax
shrl $10,%eax
cld
ret
.globl _abi_test_set_direction_flag
.private_extern _abi_test_set_direction_flag
.align 4
_abi_test_set_direction_flag:
L_abi_test_set_direction_flag_begin:
std
ret
.globl _abi_test_clobber_eax
.private_extern _abi_test_clobber_eax
.align 4
_abi_test_clobber_eax:
L_abi_test_clobber_eax_begin:
xorl %eax,%eax
ret
.globl _abi_test_clobber_ebx
.private_extern _abi_test_clobber_ebx
.align 4
_abi_test_clobber_ebx:
L_abi_test_clobber_ebx_begin:
xorl %ebx,%ebx
ret
.globl _abi_test_clobber_ecx
.private_extern _abi_test_clobber_ecx
.align 4
_abi_test_clobber_ecx:
L_abi_test_clobber_ecx_begin:
xorl %ecx,%ecx
ret
.globl _abi_test_clobber_edx
.private_extern _abi_test_clobber_edx
.align 4
_abi_test_clobber_edx:
L_abi_test_clobber_edx_begin:
xorl %edx,%edx
ret
.globl _abi_test_clobber_edi
.private_extern _abi_test_clobber_edi
.align 4
_abi_test_clobber_edi:
L_abi_test_clobber_edi_begin:
xorl %edi,%edi
ret
.globl _abi_test_clobber_esi
.private_extern _abi_test_clobber_esi
.align 4
_abi_test_clobber_esi:
L_abi_test_clobber_esi_begin:
xorl %esi,%esi
ret
.globl _abi_test_clobber_ebp
.private_extern _abi_test_clobber_ebp
.align 4
_abi_test_clobber_ebp:
L_abi_test_clobber_ebp_begin:
xorl %ebp,%ebp
ret
.globl _abi_test_clobber_xmm0
.private_extern _abi_test_clobber_xmm0
.align 4
_abi_test_clobber_xmm0:
L_abi_test_clobber_xmm0_begin:
pxor %xmm0,%xmm0
ret
.globl _abi_test_clobber_xmm1
.private_extern _abi_test_clobber_xmm1
.align 4
_abi_test_clobber_xmm1:
L_abi_test_clobber_xmm1_begin:
pxor %xmm1,%xmm1
ret
.globl _abi_test_clobber_xmm2
.private_extern _abi_test_clobber_xmm2
.align 4
_abi_test_clobber_xmm2:
L_abi_test_clobber_xmm2_begin:
pxor %xmm2,%xmm2
ret
.globl _abi_test_clobber_xmm3
.private_extern _abi_test_clobber_xmm3
.align 4
_abi_test_clobber_xmm3:
L_abi_test_clobber_xmm3_begin:
pxor %xmm3,%xmm3
ret
.globl _abi_test_clobber_xmm4
.private_extern _abi_test_clobber_xmm4
.align 4
_abi_test_clobber_xmm4:
L_abi_test_clobber_xmm4_begin:
pxor %xmm4,%xmm4
ret
.globl _abi_test_clobber_xmm5
.private_extern _abi_test_clobber_xmm5
.align 4
_abi_test_clobber_xmm5:
L_abi_test_clobber_xmm5_begin:
pxor %xmm5,%xmm5
ret
.globl _abi_test_clobber_xmm6
.private_extern _abi_test_clobber_xmm6
.align 4
_abi_test_clobber_xmm6:
L_abi_test_clobber_xmm6_begin:
pxor %xmm6,%xmm6
ret
.globl _abi_test_clobber_xmm7
.private_extern _abi_test_clobber_xmm7
.align 4
_abi_test_clobber_xmm7:
L_abi_test_clobber_xmm7_begin:
pxor %xmm7,%xmm7
ret
#endif // !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) && defined(__APPLE__)

View File

@ -1,20 +1,12 @@
# This file is generated from a similarly-named Perl script in the BoringSSL
# source tree. Do not edit by hand.
// This file is generated from a similarly-named Perl script in the BoringSSL
// source tree. Do not edit by hand.
#if defined(__has_feature)
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
#define OPENSSL_NO_ASM
#endif
#endif
#include <openssl/asm_base.h>
#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) && defined(__APPLE__)
.text
.section __DATA,__const
.p2align 6
L$zero:
.long 0,0,0,0
@ -44,18 +36,14 @@ L$incz:
L$sixteen:
.long 16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16
.byte 67,104,97,67,104,97,50,48,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.globl _ChaCha20_ctr32
.private_extern _ChaCha20_ctr32
.text
.globl _ChaCha20_ctr32_nohw
.private_extern _ChaCha20_ctr32_nohw
.p2align 6
_ChaCha20_ctr32:
cmpq $0,%rdx
je L$no_data
movq _OPENSSL_ia32cap_P+4(%rip),%r10
testl $512,%r10d
jnz L$ChaCha20_ssse3
_ChaCha20_ctr32_nohw:
_CET_ENDBR
pushq %rbx
pushq %rbp
@ -324,20 +312,18 @@ L$done:
leaq (%rsi),%rsp
L$no_data:
.byte 0xf3,0xc3
ret
.globl _ChaCha20_ctr32_ssse3
.private_extern _ChaCha20_ctr32_ssse3
.p2align 5
ChaCha20_ssse3:
L$ChaCha20_ssse3:
_ChaCha20_ctr32_ssse3:
_CET_ENDBR
movq %rsp,%r9
cmpq $128,%rdx
ja L$ChaCha20_4x
L$do_sse3_after_all:
subq $64+8,%rsp
movdqa L$sigma(%rip),%xmm0
movdqu (%rcx),%xmm1
@ -461,28 +447,19 @@ L$done_ssse3:
leaq (%r9),%rsp
L$ssse3_epilogue:
.byte 0xf3,0xc3
ret
.globl _ChaCha20_ctr32_ssse3_4x
.private_extern _ChaCha20_ctr32_ssse3_4x
.p2align 5
ChaCha20_4x:
L$ChaCha20_4x:
_ChaCha20_ctr32_ssse3_4x:
_CET_ENDBR
movq %rsp,%r9
movq %r10,%r11
shrq $32,%r10
testq $32,%r10
jnz L$ChaCha20_8x
cmpq $192,%rdx
ja L$proceed4x
andq $71303168,%r11
cmpq $4194304,%r11
je L$do_sse3_after_all
L$proceed4x:
subq $0x140+8,%rsp
movdqa L$sigma(%rip),%xmm11
movdqu (%rcx),%xmm15
@ -1013,14 +990,16 @@ L$done4x:
leaq (%r9),%rsp
L$4x_epilogue:
.byte 0xf3,0xc3
ret
.globl _ChaCha20_ctr32_avx2
.private_extern _ChaCha20_ctr32_avx2
.p2align 5
ChaCha20_8x:
L$ChaCha20_8x:
_ChaCha20_ctr32_avx2:
_CET_ENDBR
movq %rsp,%r9
subq $0x280+8,%rsp
@ -1619,7 +1598,7 @@ L$done8x:
leaq (%r9),%rsp
L$8x_epilogue:
.byte 0xf3,0xc3
ret
#endif

View File

@ -1,17 +1,10 @@
# This file is generated from a similarly-named Perl script in the BoringSSL
# source tree. Do not edit by hand.
// This file is generated from a similarly-named Perl script in the BoringSSL
// source tree. Do not edit by hand.
#if defined(__has_feature)
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
#define OPENSSL_NO_ASM
#endif
#endif
#include <openssl/asm_base.h>
#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
.data
#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) && defined(__APPLE__)
.section __DATA,__const
.p2align 4
one:
@ -69,7 +62,7 @@ GFMUL:
vpxor %xmm4,%xmm3,%xmm2
vpxor %xmm5,%xmm2,%xmm0
.byte 0xf3,0xc3
ret
.globl _aesgcmsiv_htable_init
@ -78,6 +71,7 @@ GFMUL:
.p2align 4
_aesgcmsiv_htable_init:
_CET_ENDBR
vmovdqa (%rsi),%xmm0
vmovdqa %xmm0,%xmm1
vmovdqa %xmm0,(%rdi)
@ -95,7 +89,7 @@ _aesgcmsiv_htable_init:
vmovdqa %xmm0,96(%rdi)
call GFMUL
vmovdqa %xmm0,112(%rdi)
.byte 0xf3,0xc3
ret
.globl _aesgcmsiv_htable6_init
@ -104,6 +98,7 @@ _aesgcmsiv_htable_init:
.p2align 4
_aesgcmsiv_htable6_init:
_CET_ENDBR
vmovdqa (%rsi),%xmm0
vmovdqa %xmm0,%xmm1
vmovdqa %xmm0,(%rdi)
@ -117,7 +112,7 @@ _aesgcmsiv_htable6_init:
vmovdqa %xmm0,64(%rdi)
call GFMUL
vmovdqa %xmm0,80(%rdi)
.byte 0xf3,0xc3
ret
.globl _aesgcmsiv_htable_polyval
@ -126,9 +121,10 @@ _aesgcmsiv_htable6_init:
.p2align 4
_aesgcmsiv_htable_polyval:
_CET_ENDBR
testq %rdx,%rdx
jnz L$htable_polyval_start
.byte 0xf3,0xc3
ret
L$htable_polyval_start:
vzeroall
@ -334,7 +330,7 @@ L$htable_polyval_out:
vmovdqu %xmm1,(%rcx)
vzeroupper
.byte 0xf3,0xc3
ret
.globl _aesgcmsiv_polyval_horner
@ -343,9 +339,10 @@ L$htable_polyval_out:
.p2align 4
_aesgcmsiv_polyval_horner:
_CET_ENDBR
testq %rcx,%rcx
jnz L$polyval_horner_start
.byte 0xf3,0xc3
ret
L$polyval_horner_start:
@ -367,7 +364,7 @@ L$polyval_horner_loop:
vmovdqa %xmm0,(%rdi)
.byte 0xf3,0xc3
ret
.globl _aes128gcmsiv_aes_ks
@ -376,6 +373,7 @@ L$polyval_horner_loop:
.p2align 4
_aes128gcmsiv_aes_ks:
_CET_ENDBR
vmovdqu (%rdi),%xmm1
vmovdqa %xmm1,(%rsi)
@ -423,7 +421,7 @@ L$ks128_loop:
vpxor %xmm3,%xmm1,%xmm1
vpxor %xmm2,%xmm1,%xmm1
vmovdqa %xmm1,32(%rsi)
.byte 0xf3,0xc3
ret
.globl _aes256gcmsiv_aes_ks
@ -432,6 +430,7 @@ L$ks128_loop:
.p2align 4
_aes256gcmsiv_aes_ks:
_CET_ENDBR
vmovdqu (%rdi),%xmm1
vmovdqu 16(%rdi),%xmm3
vmovdqa %xmm1,(%rsi)
@ -471,7 +470,7 @@ L$ks256_loop:
vpxor %xmm4,%xmm1,%xmm1
vpxor %xmm2,%xmm1,%xmm1
vmovdqa %xmm1,32(%rsi)
.byte 0xf3,0xc3
ret
.globl _aes128gcmsiv_aes_ks_enc_x1
.private_extern _aes128gcmsiv_aes_ks_enc_x1
@ -479,6 +478,7 @@ L$ks256_loop:
.p2align 4
_aes128gcmsiv_aes_ks_enc_x1:
_CET_ENDBR
vmovdqa (%rcx),%xmm1
vmovdqa 0(%rdi),%xmm4
@ -612,7 +612,7 @@ _aes128gcmsiv_aes_ks_enc_x1:
vmovdqa %xmm4,0(%rsi)
.byte 0xf3,0xc3
ret
.globl _aes128gcmsiv_kdf
@ -621,6 +621,7 @@ _aes128gcmsiv_aes_ks_enc_x1:
.p2align 4
_aes128gcmsiv_kdf:
_CET_ENDBR
@ -705,7 +706,7 @@ _aes128gcmsiv_kdf:
vmovdqa %xmm10,16(%rsi)
vmovdqa %xmm11,32(%rsi)
vmovdqa %xmm12,48(%rsi)
.byte 0xf3,0xc3
ret
.globl _aes128gcmsiv_enc_msg_x4
@ -714,9 +715,10 @@ _aes128gcmsiv_kdf:
.p2align 4
_aes128gcmsiv_enc_msg_x4:
_CET_ENDBR
testq %r8,%r8
jnz L$128_enc_msg_x4_start
.byte 0xf3,0xc3
ret
L$128_enc_msg_x4_start:
pushq %r12
@ -880,7 +882,7 @@ L$128_enc_msg_x4_out:
popq %r12
.byte 0xf3,0xc3
ret
.globl _aes128gcmsiv_enc_msg_x8
@ -889,9 +891,10 @@ L$128_enc_msg_x4_out:
.p2align 4
_aes128gcmsiv_enc_msg_x8:
_CET_ENDBR
testq %r8,%r8
jnz L$128_enc_msg_x8_start
.byte 0xf3,0xc3
ret
L$128_enc_msg_x8_start:
pushq %r12
@ -1135,7 +1138,7 @@ L$128_enc_msg_x8_out:
popq %r12
.byte 0xf3,0xc3
ret
.globl _aes128gcmsiv_dec
@ -1144,21 +1147,23 @@ L$128_enc_msg_x8_out:
.p2align 4
_aes128gcmsiv_dec:
_CET_ENDBR
testq $~15,%r9
jnz L$128_dec_start
.byte 0xf3,0xc3
ret
L$128_dec_start:
vzeroupper
vmovdqa (%rdx),%xmm0
vmovdqu 16(%rdx),%xmm15
vpor OR_MASK(%rip),%xmm15,%xmm15
movq %rdx,%rax
leaq 32(%rax),%rax
leaq 32(%rcx),%rcx
vmovdqu (%rdi,%r9,1),%xmm15
vpor OR_MASK(%rip),%xmm15,%xmm15
andq $~15,%r9
@ -1627,7 +1632,7 @@ L$128_dec_loop2:
L$128_dec_out:
vmovdqu %xmm0,(%rdx)
.byte 0xf3,0xc3
ret
.globl _aes128gcmsiv_ecb_enc_block
@ -1636,6 +1641,7 @@ L$128_dec_out:
.p2align 4
_aes128gcmsiv_ecb_enc_block:
_CET_ENDBR
vmovdqa (%rdi),%xmm1
vpxor (%rdx),%xmm1,%xmm1
@ -1652,7 +1658,7 @@ _aes128gcmsiv_ecb_enc_block:
vmovdqa %xmm1,(%rsi)
.byte 0xf3,0xc3
ret
.globl _aes256gcmsiv_aes_ks_enc_x1
@ -1661,6 +1667,7 @@ _aes128gcmsiv_ecb_enc_block:
.p2align 4
_aes256gcmsiv_aes_ks_enc_x1:
_CET_ENDBR
vmovdqa con1(%rip),%xmm0
vmovdqa mask(%rip),%xmm15
vmovdqa (%rdi),%xmm8
@ -1835,7 +1842,7 @@ _aes256gcmsiv_aes_ks_enc_x1:
vmovdqu %xmm1,224(%rdx)
vmovdqa %xmm8,(%rsi)
.byte 0xf3,0xc3
ret
.globl _aes256gcmsiv_ecb_enc_block
@ -1844,6 +1851,7 @@ _aes256gcmsiv_aes_ks_enc_x1:
.p2align 4
_aes256gcmsiv_ecb_enc_block:
_CET_ENDBR
vmovdqa (%rdi),%xmm1
vpxor (%rdx),%xmm1,%xmm1
vaesenc 16(%rdx),%xmm1,%xmm1
@ -1861,7 +1869,7 @@ _aes256gcmsiv_ecb_enc_block:
vaesenc 208(%rdx),%xmm1,%xmm1
vaesenclast 224(%rdx),%xmm1,%xmm1
vmovdqa %xmm1,(%rsi)
.byte 0xf3,0xc3
ret
.globl _aes256gcmsiv_enc_msg_x4
@ -1870,9 +1878,10 @@ _aes256gcmsiv_ecb_enc_block:
.p2align 4
_aes256gcmsiv_enc_msg_x4:
_CET_ENDBR
testq %r8,%r8
jnz L$256_enc_msg_x4_start
.byte 0xf3,0xc3
ret
L$256_enc_msg_x4_start:
movq %r8,%r10
@ -2062,7 +2071,7 @@ L$256_enc_msg_x4_loop2:
jne L$256_enc_msg_x4_loop2
L$256_enc_msg_x4_out:
.byte 0xf3,0xc3
ret
.globl _aes256gcmsiv_enc_msg_x8
@ -2071,9 +2080,10 @@ L$256_enc_msg_x4_out:
.p2align 4
_aes256gcmsiv_enc_msg_x8:
_CET_ENDBR
testq %r8,%r8
jnz L$256_enc_msg_x8_start
.byte 0xf3,0xc3
ret
L$256_enc_msg_x8_start:
@ -2350,7 +2360,7 @@ L$256_enc_msg_x8_loop2:
jnz L$256_enc_msg_x8_loop2
L$256_enc_msg_x8_out:
.byte 0xf3,0xc3
ret
@ -2360,21 +2370,23 @@ L$256_enc_msg_x8_out:
.p2align 4
_aes256gcmsiv_dec:
_CET_ENDBR
testq $~15,%r9
jnz L$256_dec_start
.byte 0xf3,0xc3
ret
L$256_dec_start:
vzeroupper
vmovdqa (%rdx),%xmm0
vmovdqu 16(%rdx),%xmm15
vpor OR_MASK(%rip),%xmm15,%xmm15
movq %rdx,%rax
leaq 32(%rax),%rax
leaq 32(%rcx),%rcx
vmovdqu (%rdi,%r9,1),%xmm15
vpor OR_MASK(%rip),%xmm15,%xmm15
andq $~15,%r9
@ -2911,7 +2923,7 @@ L$256_dec_loop2:
L$256_dec_out:
vmovdqu %xmm0,(%rdx)
.byte 0xf3,0xc3
ret
.globl _aes256gcmsiv_kdf
@ -2920,6 +2932,7 @@ L$256_dec_out:
.p2align 4
_aes256gcmsiv_kdf:
_CET_ENDBR
@ -3062,7 +3075,7 @@ _aes256gcmsiv_kdf:
vmovdqa %xmm11,48(%rsi)
vmovdqa %xmm12,64(%rsi)
vmovdqa %xmm13,80(%rsi)
.byte 0xf3,0xc3
ret
#endif

View File

@ -1,16 +1,9 @@
# This file is generated from a similarly-named Perl script in the BoringSSL
# source tree. Do not edit by hand.
// This file is generated from a similarly-named Perl script in the BoringSSL
// source tree. Do not edit by hand.
#if defined(__has_feature)
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
#define OPENSSL_NO_ASM
#endif
#endif
#include <openssl/asm_base.h>
#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) && defined(__APPLE__)
.text
@ -221,7 +214,7 @@ L$resume_ctr32:
movbeq 0(%r14),%r12
vaesenc %xmm1,%xmm14,%xmm14
vmovups 160-128(%rcx),%xmm1
cmpl $11,%ebp
cmpl $11,%r10d
jb L$enc_tail
vaesenc %xmm15,%xmm9,%xmm9
@ -305,6 +298,9 @@ L$enc_tail:
vpaddb %xmm2,%xmm1,%xmm0
movq %r13,112+8(%rsp)
leaq 96(%rdi),%rdi
prefetcht0 512(%rdi)
prefetcht0 576(%rdi)
vaesenclast %xmm5,%xmm11,%xmm11
vpaddb %xmm2,%xmm0,%xmm5
movq %r12,120+8(%rsp)
@ -317,7 +313,7 @@ L$enc_tail:
vaesenclast %xmm3,%xmm14,%xmm14
vpaddb %xmm2,%xmm7,%xmm3
addq $0x60,%r10
addq $0x60,%rax
subq $0x6,%rdx
jc L$6x_done
@ -340,7 +336,7 @@ L$6x_done:
vpxor 16+8(%rsp),%xmm8,%xmm8
vpxor %xmm4,%xmm8,%xmm8
.byte 0xf3,0xc3
ret
.globl _aesni_gcm_decrypt
@ -349,41 +345,50 @@ L$6x_done:
.p2align 5
_aesni_gcm_decrypt:
xorq %r10,%r10
_CET_ENDBR
xorq %rax,%rax
cmpq $0x60,%rdx
jb L$gcm_dec_abort
leaq (%rsp),%rax
pushq %rbp
movq %rsp,%rbp
pushq %rbx
pushq %rbp
pushq %r12
pushq %r13
pushq %r14
pushq %r15
vzeroupper
movq 16(%rbp),%r12
vmovdqu (%r8),%xmm1
addq $-128,%rsp
movl 12(%r8),%ebx
leaq L$bswap_mask(%rip),%r11
leaq -128(%rcx),%r14
movq $0xf80,%r15
vmovdqu (%r9),%xmm8
vmovdqu (%r12),%xmm8
andq $-128,%rsp
vmovdqu (%r11),%xmm0
leaq 128(%rcx),%rcx
leaq 32+32(%r9),%r9
movl 240-128(%rcx),%ebp
leaq 32(%r9),%r9
movl 240-128(%rcx),%r10d
vpshufb %xmm0,%xmm8,%xmm8
andq %r15,%r14
@ -396,7 +401,7 @@ _aesni_gcm_decrypt:
L$dec_no_key_aliasing:
vmovdqu 80(%rdi),%xmm7
leaq (%rdi),%r14
movq %rdi,%r14
vmovdqu 64(%rdi),%xmm4
@ -409,7 +414,7 @@ L$dec_no_key_aliasing:
vmovdqu 48(%rdi),%xmm5
shrq $4,%rdx
xorq %r10,%r10
xorq %rax,%rax
vmovdqu 32(%rdi),%xmm6
vpshufb %xmm0,%xmm7,%xmm7
vmovdqu 16(%rdi),%xmm2
@ -427,6 +432,7 @@ L$dec_no_key_aliasing:
call _aesni_ctr32_ghash_6x
movq 16(%rbp),%r12
vmovups %xmm9,-96(%rsi)
vmovups %xmm10,-80(%rsi)
vmovups %xmm11,-64(%rsi)
@ -435,26 +441,26 @@ L$dec_no_key_aliasing:
vmovups %xmm14,-16(%rsi)
vpshufb (%r11),%xmm8,%xmm8
vmovdqu %xmm8,-64(%r9)
vmovdqu %xmm8,(%r12)
vzeroupper
movq -48(%rax),%r15
leaq -40(%rbp),%rsp
movq -40(%rax),%r14
popq %r15
movq -32(%rax),%r13
popq %r14
movq -24(%rax),%r12
popq %r13
movq -16(%rax),%rbp
popq %r12
movq -8(%rax),%rbx
popq %rbx
leaq (%rax),%rsp
popq %rbp
L$gcm_dec_abort:
movq %r10,%rax
.byte 0xf3,0xc3
ret
@ -463,7 +469,7 @@ _aesni_ctr32_6x:
vmovdqu 0-128(%rcx),%xmm4
vmovdqu 32(%r11),%xmm2
leaq -1(%rbp),%r13
leaq -1(%r10),%r13
vmovups 16-128(%rcx),%xmm15
leaq 32-128(%rcx),%r12
vpxor %xmm4,%xmm1,%xmm9
@ -524,7 +530,7 @@ L$oop_ctr32:
vmovups %xmm14,80(%rsi)
leaq 96(%rsi),%rsi
.byte 0xf3,0xc3
ret
.p2align 5
L$handle_ctr32_2:
vpshufb %xmm0,%xmm1,%xmm6
@ -556,11 +562,13 @@ L$handle_ctr32_2:
.p2align 5
_aesni_gcm_encrypt:
_CET_ENDBR
#ifdef BORINGSSL_DISPATCH_TEST
movb $1,_BORINGSSL_function_hit+2(%rip)
#endif
xorq %r10,%r10
xorq %rax,%rax
@ -568,20 +576,26 @@ _aesni_gcm_encrypt:
cmpq $288,%rdx
jb L$gcm_enc_abort
leaq (%rsp),%rax
pushq %rbp
movq %rsp,%rbp
pushq %rbx
pushq %rbp
pushq %r12
pushq %r13
pushq %r14
pushq %r15
vzeroupper
vmovdqu (%r8),%xmm1
@ -593,7 +607,7 @@ _aesni_gcm_encrypt:
leaq 128(%rcx),%rcx
vmovdqu (%r11),%xmm0
andq $-128,%rsp
movl 240-128(%rcx),%ebp
movl 240-128(%rcx),%r10d
andq %r15,%r14
andq %rsp,%r15
@ -604,7 +618,7 @@ _aesni_gcm_encrypt:
subq %r15,%rsp
L$enc_no_key_aliasing:
leaq (%rsi),%r14
movq %rsi,%r14
@ -632,10 +646,11 @@ L$enc_no_key_aliasing:
call _aesni_ctr32_6x
vmovdqu (%r9),%xmm8
leaq 32+32(%r9),%r9
movq 16(%rbp),%r12
leaq 32(%r9),%r9
vmovdqu (%r12),%xmm8
subq $12,%rdx
movq $192,%r10
movq $192,%rax
vpshufb %xmm0,%xmm8,%xmm8
call _aesni_ctr32_ghash_6x
@ -811,29 +826,31 @@ L$enc_no_key_aliasing:
vpclmulqdq $0x10,%xmm3,%xmm8,%xmm8
vpxor %xmm7,%xmm2,%xmm2
vpxor %xmm2,%xmm8,%xmm8
movq 16(%rbp),%r12
vpshufb (%r11),%xmm8,%xmm8
vmovdqu %xmm8,-64(%r9)
vmovdqu %xmm8,(%r12)
vzeroupper
movq -48(%rax),%r15
leaq -40(%rbp),%rsp
movq -40(%rax),%r14
popq %r15
movq -32(%rax),%r13
popq %r14
movq -24(%rax),%r12
popq %r13
movq -16(%rax),%rbp
popq %r12
movq -8(%rax),%rbx
popq %rbx
leaq (%rax),%rsp
popq %rbp
L$gcm_enc_abort:
movq %r10,%rax
.byte 0xf3,0xc3
ret
.section __DATA,__const
.p2align 6
L$bswap_mask:
.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
@ -847,4 +864,5 @@ L$one_lsb:
.byte 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
.byte 65,69,83,45,78,73,32,71,67,77,32,109,111,100,117,108,101,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.p2align 6
.text
#endif

View File

@ -1,16 +1,9 @@
# This file is generated from a similarly-named Perl script in the BoringSSL
# source tree. Do not edit by hand.
// This file is generated from a similarly-named Perl script in the BoringSSL
// source tree. Do not edit by hand.
#if defined(__has_feature)
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
#define OPENSSL_NO_ASM
#endif
#endif
#include <openssl/asm_base.h>
#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) && defined(__APPLE__)
.text
.globl _aes_hw_encrypt
@ -19,6 +12,7 @@
.p2align 4
_aes_hw_encrypt:
_CET_ENDBR
#ifdef BORINGSSL_DISPATCH_TEST
movb $1,_BORINGSSL_function_hit+1(%rip)
@ -40,7 +34,7 @@ L$oop_enc1_1:
pxor %xmm1,%xmm1
movups %xmm2,(%rsi)
pxor %xmm2,%xmm2
.byte 0xf3,0xc3
ret
@ -50,6 +44,7 @@ L$oop_enc1_1:
.p2align 4
_aes_hw_decrypt:
_CET_ENDBR
movups (%rdi),%xmm2
movl 240(%rdx),%eax
movups (%rdx),%xmm0
@ -67,7 +62,7 @@ L$oop_dec1_2:
pxor %xmm1,%xmm1
movups %xmm2,(%rsi)
pxor %xmm2,%xmm2
.byte 0xf3,0xc3
ret
@ -98,7 +93,7 @@ L$enc_loop2:
.byte 102,15,56,220,217
.byte 102,15,56,221,208
.byte 102,15,56,221,216
.byte 0xf3,0xc3
ret
@ -129,7 +124,7 @@ L$dec_loop2:
.byte 102,15,56,222,217
.byte 102,15,56,223,208
.byte 102,15,56,223,216
.byte 0xf3,0xc3
ret
@ -165,7 +160,7 @@ L$enc_loop3:
.byte 102,15,56,221,208
.byte 102,15,56,221,216
.byte 102,15,56,221,224
.byte 0xf3,0xc3
ret
@ -201,7 +196,7 @@ L$dec_loop3:
.byte 102,15,56,223,208
.byte 102,15,56,223,216
.byte 102,15,56,223,224
.byte 0xf3,0xc3
ret
@ -243,7 +238,7 @@ L$enc_loop4:
.byte 102,15,56,221,216
.byte 102,15,56,221,224
.byte 102,15,56,221,232
.byte 0xf3,0xc3
ret
@ -285,7 +280,7 @@ L$dec_loop4:
.byte 102,15,56,223,216
.byte 102,15,56,223,224
.byte 102,15,56,223,232
.byte 0xf3,0xc3
ret
@ -341,7 +336,7 @@ L$enc_loop6_enter:
.byte 102,15,56,221,232
.byte 102,15,56,221,240
.byte 102,15,56,221,248
.byte 0xf3,0xc3
ret
@ -397,7 +392,7 @@ L$dec_loop6_enter:
.byte 102,15,56,223,232
.byte 102,15,56,223,240
.byte 102,15,56,223,248
.byte 0xf3,0xc3
ret
@ -463,7 +458,7 @@ L$enc_loop8_enter:
.byte 102,15,56,221,248
.byte 102,68,15,56,221,192
.byte 102,68,15,56,221,200
.byte 0xf3,0xc3
ret
@ -529,7 +524,7 @@ L$dec_loop8_enter:
.byte 102,15,56,223,248
.byte 102,68,15,56,223,192
.byte 102,68,15,56,223,200
.byte 0xf3,0xc3
ret
.globl _aes_hw_ecb_encrypt
@ -538,6 +533,7 @@ L$dec_loop8_enter:
.p2align 4
_aes_hw_ecb_encrypt:
_CET_ENDBR
andq $-16,%rdx
jz L$ecb_ret
@ -874,7 +870,7 @@ L$ecb_dec_six:
L$ecb_ret:
xorps %xmm0,%xmm0
pxor %xmm1,%xmm1
.byte 0xf3,0xc3
ret
.globl _aes_hw_ctr32_encrypt_blocks
@ -883,6 +879,7 @@ L$ecb_ret:
.p2align 4
_aes_hw_ctr32_encrypt_blocks:
_CET_ENDBR
#ifdef BORINGSSL_DISPATCH_TEST
movb $1,_BORINGSSL_function_hit(%rip)
#endif
@ -972,10 +969,7 @@ L$ctr32_bulk:
leaq 7(%r8),%r9
movl %r10d,96+12(%rsp)
bswapl %r9d
leaq _OPENSSL_ia32cap_P(%rip),%r10
movl 4(%r10),%r10d
xorl %ebp,%r9d
andl $71303168,%r10d
movl %r9d,112+12(%rsp)
movups 16(%rcx),%xmm1
@ -986,104 +980,10 @@ L$ctr32_bulk:
cmpq $8,%rdx
jb L$ctr32_tail
subq $6,%rdx
cmpl $4194304,%r10d
je L$ctr32_6x
leaq 128(%rcx),%rcx
subq $2,%rdx
subq $8,%rdx
jmp L$ctr32_loop8
.p2align 4
L$ctr32_6x:
shll $4,%eax
movl $48,%r10d
bswapl %ebp
leaq 32(%rcx,%rax,1),%rcx
subq %rax,%r10
jmp L$ctr32_loop6
.p2align 4
L$ctr32_loop6:
addl $6,%r8d
movups -48(%rcx,%r10,1),%xmm0
.byte 102,15,56,220,209
movl %r8d,%eax
xorl %ebp,%eax
.byte 102,15,56,220,217
.byte 0x0f,0x38,0xf1,0x44,0x24,12
leal 1(%r8),%eax
.byte 102,15,56,220,225
xorl %ebp,%eax
.byte 0x0f,0x38,0xf1,0x44,0x24,28
.byte 102,15,56,220,233
leal 2(%r8),%eax
xorl %ebp,%eax
.byte 102,15,56,220,241
.byte 0x0f,0x38,0xf1,0x44,0x24,44
leal 3(%r8),%eax
.byte 102,15,56,220,249
movups -32(%rcx,%r10,1),%xmm1
xorl %ebp,%eax
.byte 102,15,56,220,208
.byte 0x0f,0x38,0xf1,0x44,0x24,60
leal 4(%r8),%eax
.byte 102,15,56,220,216
xorl %ebp,%eax
.byte 0x0f,0x38,0xf1,0x44,0x24,76
.byte 102,15,56,220,224
leal 5(%r8),%eax
xorl %ebp,%eax
.byte 102,15,56,220,232
.byte 0x0f,0x38,0xf1,0x44,0x24,92
movq %r10,%rax
.byte 102,15,56,220,240
.byte 102,15,56,220,248
movups -16(%rcx,%r10,1),%xmm0
call L$enc_loop6
movdqu (%rdi),%xmm8
movdqu 16(%rdi),%xmm9
movdqu 32(%rdi),%xmm10
movdqu 48(%rdi),%xmm11
movdqu 64(%rdi),%xmm12
movdqu 80(%rdi),%xmm13
leaq 96(%rdi),%rdi
movups -64(%rcx,%r10,1),%xmm1
pxor %xmm2,%xmm8
movaps 0(%rsp),%xmm2
pxor %xmm3,%xmm9
movaps 16(%rsp),%xmm3
pxor %xmm4,%xmm10
movaps 32(%rsp),%xmm4
pxor %xmm5,%xmm11
movaps 48(%rsp),%xmm5
pxor %xmm6,%xmm12
movaps 64(%rsp),%xmm6
pxor %xmm7,%xmm13
movaps 80(%rsp),%xmm7
movdqu %xmm8,(%rsi)
movdqu %xmm9,16(%rsi)
movdqu %xmm10,32(%rsi)
movdqu %xmm11,48(%rsi)
movdqu %xmm12,64(%rsi)
movdqu %xmm13,80(%rsi)
leaq 96(%rsi),%rsi
subq $6,%rdx
jnc L$ctr32_loop6
addq $6,%rdx
jz L$ctr32_done
leal -48(%r10),%eax
leaq -80(%rcx,%r10,1),%rcx
negl %eax
shrl $4,%eax
jmp L$ctr32_tail
.p2align 5
L$ctr32_loop8:
addl $8,%r8d
@ -1260,6 +1160,8 @@ L$ctr32_enc_done:
pxor %xmm0,%xmm13
movdqu 80(%rdi),%xmm15
pxor %xmm0,%xmm14
prefetcht0 448(%rdi)
prefetcht0 512(%rdi)
pxor %xmm0,%xmm15
.byte 102,15,56,220,209
.byte 102,15,56,220,217
@ -1457,7 +1359,7 @@ L$ctr32_done:
leaq (%r11),%rsp
L$ctr32_epilogue:
.byte 0xf3,0xc3
ret
.globl _aes_hw_cbc_encrypt
@ -1466,6 +1368,7 @@ L$ctr32_epilogue:
.p2align 4
_aes_hw_cbc_encrypt:
_CET_ENDBR
testq %rdx,%rdx
jz L$cbc_ret
@ -1582,16 +1485,10 @@ L$cbc_decrypt_bulk:
movdqa %xmm5,%xmm14
movdqu 80(%rdi),%xmm7
movdqa %xmm6,%xmm15
leaq _OPENSSL_ia32cap_P(%rip),%r9
movl 4(%r9),%r9d
cmpq $0x70,%rdx
jbe L$cbc_dec_six_or_seven
andl $71303168,%r9d
subq $0x50,%rdx
cmpl $4194304,%r9d
je L$cbc_dec_loop6_enter
subq $0x20,%rdx
subq $0x70,%rdx
leaq 112(%rcx),%rcx
jmp L$cbc_dec_loop8_enter
.p2align 4
@ -1862,51 +1759,6 @@ L$cbc_dec_seven:
pxor %xmm9,%xmm9
jmp L$cbc_dec_tail_collected
.p2align 4
L$cbc_dec_loop6:
movups %xmm7,(%rsi)
leaq 16(%rsi),%rsi
movdqu 0(%rdi),%xmm2
movdqu 16(%rdi),%xmm3
movdqa %xmm2,%xmm11
movdqu 32(%rdi),%xmm4
movdqa %xmm3,%xmm12
movdqu 48(%rdi),%xmm5
movdqa %xmm4,%xmm13
movdqu 64(%rdi),%xmm6
movdqa %xmm5,%xmm14
movdqu 80(%rdi),%xmm7
movdqa %xmm6,%xmm15
L$cbc_dec_loop6_enter:
leaq 96(%rdi),%rdi
movdqa %xmm7,%xmm8
call _aesni_decrypt6
pxor %xmm10,%xmm2
movdqa %xmm8,%xmm10
pxor %xmm11,%xmm3
movdqu %xmm2,(%rsi)
pxor %xmm12,%xmm4
movdqu %xmm3,16(%rsi)
pxor %xmm13,%xmm5
movdqu %xmm4,32(%rsi)
pxor %xmm14,%xmm6
movq %rbp,%rcx
movdqu %xmm5,48(%rsi)
pxor %xmm15,%xmm7
movl %r10d,%eax
movdqu %xmm6,64(%rsi)
leaq 80(%rsi),%rsi
subq $0x60,%rdx
ja L$cbc_dec_loop6
movdqa %xmm7,%xmm2
addq $0x50,%rdx
jle L$cbc_dec_clear_tail_collected
movups %xmm7,(%rsi)
leaq 16(%rsi),%rsi
L$cbc_dec_tail:
movups (%rdi),%xmm2
subq $0x10,%rdx
@ -2050,7 +1902,7 @@ L$cbc_dec_ret:
leaq (%r11),%rsp
L$cbc_ret:
.byte 0xf3,0xc3
ret
.globl _aes_hw_set_decrypt_key
@ -2059,6 +1911,7 @@ L$cbc_ret:
.p2align 4
_aes_hw_set_decrypt_key:
_CET_ENDBR
.byte 0x48,0x83,0xEC,0x08
call __aesni_set_encrypt_key
@ -2094,7 +1947,7 @@ L$dec_key_inverse:
L$dec_key_ret:
addq $8,%rsp
.byte 0xf3,0xc3
ret
L$SEH_end_set_decrypt_key:
@ -2105,6 +1958,7 @@ L$SEH_end_set_decrypt_key:
_aes_hw_set_encrypt_key:
__aesni_set_encrypt_key:
_CET_ENDBR
#ifdef BORINGSSL_DISPATCH_TEST
movb $1,_BORINGSSL_function_hit+3(%rip)
#endif
@ -2404,7 +2258,7 @@ L$enc_key_ret:
pxor %xmm5,%xmm5
addq $8,%rsp
.byte 0xf3,0xc3
ret
L$SEH_end_set_encrypt_key:
@ -2419,7 +2273,7 @@ L$key_expansion_128_cold:
xorps %xmm4,%xmm0
shufps $255,%xmm1,%xmm1
xorps %xmm1,%xmm0
.byte 0xf3,0xc3
ret
.p2align 4
L$key_expansion_192a:
@ -2439,7 +2293,7 @@ L$key_expansion_192b_warm:
pxor %xmm1,%xmm0
pshufd $255,%xmm0,%xmm3
pxor %xmm3,%xmm2
.byte 0xf3,0xc3
ret
.p2align 4
L$key_expansion_192b:
@ -2462,7 +2316,7 @@ L$key_expansion_256a_cold:
xorps %xmm4,%xmm0
shufps $255,%xmm1,%xmm1
xorps %xmm1,%xmm0
.byte 0xf3,0xc3
ret
.p2align 4
L$key_expansion_256b:
@ -2475,9 +2329,10 @@ L$key_expansion_256b:
xorps %xmm4,%xmm2
shufps $170,%xmm1,%xmm1
xorps %xmm1,%xmm2
.byte 0xf3,0xc3
ret
.section __DATA,__const
.p2align 6
L$bswap_mask:
.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
@ -2500,4 +2355,5 @@ L$key_rcon1b:
.byte 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69,83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.p2align 6
.text
#endif

View File

@ -1,16 +1,9 @@
# This file is generated from a similarly-named Perl script in the BoringSSL
# source tree. Do not edit by hand.
// This file is generated from a similarly-named Perl script in the BoringSSL
// source tree. Do not edit by hand.
#if defined(__has_feature)
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
#define OPENSSL_NO_ASM
#endif
#endif
#include <openssl/asm_base.h>
#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) && defined(__APPLE__)
.text
@ -23,7 +16,8 @@
.p2align 4
_gcm_gmult_ssse3:
L$gmult_seh_begin:
_CET_ENDBR
movdqu (%rdi),%xmm0
movdqa L$reverse_bytes(%rip),%xmm10
movdqa L$low4_mask(%rip),%xmm2
@ -198,8 +192,8 @@ L$oop_row_3:
pxor %xmm4,%xmm4
pxor %xmm5,%xmm5
pxor %xmm6,%xmm6
.byte 0xf3,0xc3
L$gmult_seh_end:
ret
@ -212,8 +206,9 @@ L$gmult_seh_end:
.private_extern _gcm_ghash_ssse3
.p2align 4
_gcm_ghash_ssse3:
L$ghash_seh_begin:
_CET_ENDBR
movdqu (%rdi),%xmm0
movdqa L$reverse_bytes(%rip),%xmm10
movdqa L$low4_mask(%rip),%xmm11
@ -410,11 +405,12 @@ L$oop_row_6:
pxor %xmm4,%xmm4
pxor %xmm5,%xmm5
pxor %xmm6,%xmm6
.byte 0xf3,0xc3
L$ghash_seh_end:
ret
.section __DATA,__const
.p2align 4
@ -423,4 +419,5 @@ L$reverse_bytes:
L$low4_mask:
.quad 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f
.text
#endif

View File

@ -1,24 +1,18 @@
# This file is generated from a similarly-named Perl script in the BoringSSL
# source tree. Do not edit by hand.
// This file is generated from a similarly-named Perl script in the BoringSSL
// source tree. Do not edit by hand.
#if defined(__has_feature)
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
#define OPENSSL_NO_ASM
#endif
#endif
#include <openssl/asm_base.h>
#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) && defined(__APPLE__)
.text
.globl _gcm_init_clmul
.private_extern _gcm_init_clmul
.p2align 4
_gcm_init_clmul:
_CET_ENDBR
L$_init_clmul:
movdqu (%rsi),%xmm2
pshufd $78,%xmm2,%xmm2
@ -169,7 +163,8 @@ L$_init_clmul:
movdqu %xmm0,64(%rdi)
.byte 102,15,58,15,227,8
movdqu %xmm4,80(%rdi)
.byte 0xf3,0xc3
ret
.globl _gcm_gmult_clmul
@ -178,6 +173,7 @@ L$_init_clmul:
.p2align 4
_gcm_gmult_clmul:
_CET_ENDBR
L$_gmult_clmul:
movdqu (%rdi),%xmm0
movdqa L$bswap_mask(%rip),%xmm5
@ -223,7 +219,7 @@ L$_gmult_clmul:
pxor %xmm1,%xmm0
.byte 102,15,56,0,197
movdqu %xmm0,(%rdi)
.byte 0xf3,0xc3
ret
.globl _gcm_ghash_clmul
@ -232,6 +228,8 @@ L$_gmult_clmul:
.p2align 5
_gcm_ghash_clmul:
_CET_ENDBR
L$_ghash_clmul:
movdqa L$bswap_mask(%rip),%xmm10
@ -244,15 +242,9 @@ L$_ghash_clmul:
jz L$odd_tail
movdqu 16(%rsi),%xmm6
leaq _OPENSSL_ia32cap_P(%rip),%rax
movl 4(%rax),%eax
cmpq $0x30,%rcx
jb L$skip4x
andl $71303168,%eax
cmpl $4194304,%eax
je L$skip4x
subq $0x30,%rcx
movq $0xA040608020C0E000,%rax
movdqu 48(%rsi),%xmm14
@ -610,7 +602,8 @@ L$odd_tail:
L$done:
.byte 102,65,15,56,0,194
movdqu %xmm0,(%rdi)
.byte 0xf3,0xc3
ret
.globl _gcm_init_avx
@ -619,6 +612,7 @@ L$done:
.p2align 5
_gcm_init_avx:
_CET_ENDBR
vzeroupper
vmovdqu (%rsi),%xmm2
@ -720,7 +714,8 @@ L$init_start_avx:
vmovdqu %xmm5,-16(%rdi)
vzeroupper
.byte 0xf3,0xc3
ret
.globl _gcm_gmult_avx
@ -729,6 +724,7 @@ L$init_start_avx:
.p2align 5
_gcm_gmult_avx:
_CET_ENDBR
jmp L$_gmult_clmul
@ -738,6 +734,7 @@ _gcm_gmult_avx:
.p2align 5
_gcm_ghash_avx:
_CET_ENDBR
vzeroupper
vmovdqu (%rdi),%xmm10
@ -1108,9 +1105,11 @@ L$tail_no_xor_avx:
vpshufb %xmm13,%xmm10,%xmm10
vmovdqu %xmm10,(%rdi)
vzeroupper
.byte 0xf3,0xc3
ret
.section __DATA,__const
.p2align 6
L$bswap_mask:
.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
@ -1118,10 +1117,9 @@ L$0x1c2_polynomial:
.byte 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2
L$7_mask:
.long 7,0,7,0
L$7_mask_poly:
.long 7,0,450,0
.p2align 6
.byte 71,72,65,83,72,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.p2align 6
.text
#endif

View File

@ -1,16 +1,9 @@
# This file is generated from a similarly-named Perl script in the BoringSSL
# source tree. Do not edit by hand.
// This file is generated from a similarly-named Perl script in the BoringSSL
// source tree. Do not edit by hand.
#if defined(__has_feature)
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
#define OPENSSL_NO_ASM
#endif
#endif
#include <openssl/asm_base.h>
#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) && defined(__APPLE__)
.text
.p2align 4
@ -19,6 +12,7 @@
_md5_block_asm_data_order:
_CET_ENDBR
pushq %rbp
pushq %rbx
@ -690,7 +684,7 @@ L$end:
addq $40,%rsp
L$epilogue:
.byte 0xf3,0xc3
ret
#endif

View File

@ -1,20 +1,14 @@
# This file is generated from a similarly-named Perl script in the BoringSSL
# source tree. Do not edit by hand.
// This file is generated from a similarly-named Perl script in the BoringSSL
// source tree. Do not edit by hand.
#if defined(__has_feature)
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
#define OPENSSL_NO_ASM
#endif
#endif
#include <openssl/asm_base.h>
#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) && defined(__APPLE__)
.text
.section __DATA,__const
.p2align 6
L$poly:
.quad 0xffffffffffffffff, 0x00000000ffffffff, 0x0000000000000000, 0xffffffff00000001
@ -33,6 +27,7 @@ L$ord:
.quad 0xf3b9cac2fc632551, 0xbce6faada7179e84, 0xffffffffffffffff, 0xffffffff00000000
L$ordK:
.quad 0xccd1c8aaee00bc4f
.text
@ -42,6 +37,7 @@ L$ordK:
.p2align 5
_ecp_nistz256_neg:
_CET_ENDBR
pushq %r12
pushq %r13
@ -87,7 +83,7 @@ L$neg_body:
leaq 16(%rsp),%rsp
L$neg_epilogue:
.byte 0xf3,0xc3
ret
@ -102,6 +98,7 @@ L$neg_epilogue:
.p2align 5
_ecp_nistz256_ord_mul_mont:
_CET_ENDBR
leaq _OPENSSL_ia32cap_P(%rip),%rcx
movq 8(%rcx),%rcx
andl $0x80100,%ecx
@ -414,7 +411,7 @@ L$ord_mul_body:
leaq 48(%rsp),%rsp
L$ord_mul_epilogue:
.byte 0xf3,0xc3
ret
@ -430,6 +427,7 @@ L$ord_mul_epilogue:
.p2align 5
_ecp_nistz256_ord_sqr_mont:
_CET_ENDBR
leaq _OPENSSL_ia32cap_P(%rip),%rcx
movq 8(%rcx),%rcx
andl $0x80100,%ecx
@ -712,7 +710,7 @@ L$oop_ord_sqr:
leaq 48(%rsp),%rsp
L$ord_sqr_epilogue:
.byte 0xf3,0xc3
ret
@ -948,7 +946,7 @@ L$ord_mulx_body:
leaq 48(%rsp),%rsp
L$ord_mulx_epilogue:
.byte 0xf3,0xc3
ret
@ -1156,7 +1154,7 @@ L$oop_ord_sqrx:
leaq 48(%rsp),%rsp
L$ord_sqrx_epilogue:
.byte 0xf3,0xc3
ret
@ -1171,6 +1169,7 @@ L$ord_sqrx_epilogue:
.p2align 5
_ecp_nistz256_mul_mont:
_CET_ENDBR
leaq _OPENSSL_ia32cap_P(%rip),%rcx
movq 8(%rcx),%rcx
andl $0x80100,%ecx
@ -1227,7 +1226,7 @@ L$mul_mont_done:
leaq 48(%rsp),%rsp
L$mul_epilogue:
.byte 0xf3,0xc3
ret
@ -1445,7 +1444,7 @@ __ecp_nistz256_mul_montq:
movq %r8,16(%rdi)
movq %r9,24(%rdi)
.byte 0xf3,0xc3
ret
@ -1462,6 +1461,7 @@ __ecp_nistz256_mul_montq:
.p2align 5
_ecp_nistz256_sqr_mont:
_CET_ENDBR
leaq _OPENSSL_ia32cap_P(%rip),%rcx
movq 8(%rcx),%rcx
andl $0x80100,%ecx
@ -1513,7 +1513,7 @@ L$sqr_mont_done:
leaq 48(%rsp),%rsp
L$sqr_epilogue:
.byte 0xf3,0xc3
ret
@ -1677,7 +1677,7 @@ __ecp_nistz256_sqr_montq:
movq %r14,16(%rdi)
movq %r15,24(%rdi)
.byte 0xf3,0xc3
ret
@ -1845,7 +1845,7 @@ __ecp_nistz256_mul_montx:
movq %r8,16(%rdi)
movq %r9,24(%rdi)
.byte 0xf3,0xc3
ret
@ -1975,7 +1975,7 @@ __ecp_nistz256_sqr_montx:
movq %r14,16(%rdi)
movq %r15,24(%rdi)
.byte 0xf3,0xc3
ret
@ -1986,6 +1986,7 @@ __ecp_nistz256_sqr_montx:
.p2align 5
_ecp_nistz256_select_w5:
_CET_ENDBR
leaq _OPENSSL_ia32cap_P(%rip),%rax
movq 8(%rax),%rax
testl $32,%eax
@ -2040,7 +2041,7 @@ L$select_loop_sse_w5:
movdqu %xmm5,48(%rdi)
movdqu %xmm6,64(%rdi)
movdqu %xmm7,80(%rdi)
.byte 0xf3,0xc3
ret
L$SEH_end_ecp_nistz256_select_w5:
@ -2053,6 +2054,7 @@ L$SEH_end_ecp_nistz256_select_w5:
.p2align 5
_ecp_nistz256_select_w7:
_CET_ENDBR
leaq _OPENSSL_ia32cap_P(%rip),%rax
movq 8(%rax),%rax
testl $32,%eax
@ -2096,7 +2098,7 @@ L$select_loop_sse_w7:
movdqu %xmm3,16(%rdi)
movdqu %xmm4,32(%rdi)
movdqu %xmm5,48(%rdi)
.byte 0xf3,0xc3
ret
L$SEH_end_ecp_nistz256_select_w7:
@ -2159,7 +2161,7 @@ L$select_loop_avx2_w5:
vmovdqu %ymm3,32(%rdi)
vmovdqu %ymm4,64(%rdi)
vzeroupper
.byte 0xf3,0xc3
ret
L$SEH_end_ecp_nistz256_avx2_select_w5:
@ -2173,6 +2175,7 @@ L$SEH_end_ecp_nistz256_avx2_select_w5:
_ecp_nistz256_avx2_select_w7:
L$avx2_select_w7:
_CET_ENDBR
vzeroupper
vmovdqa L$Three(%rip),%ymm0
@ -2240,7 +2243,7 @@ L$select_loop_avx2_w7:
vmovdqu %ymm2,0(%rdi)
vmovdqu %ymm3,32(%rdi)
vzeroupper
.byte 0xf3,0xc3
ret
L$SEH_end_ecp_nistz256_avx2_select_w7:
@ -2274,7 +2277,7 @@ __ecp_nistz256_add_toq:
movq %r8,16(%rdi)
movq %r9,24(%rdi)
.byte 0xf3,0xc3
ret
@ -2307,7 +2310,7 @@ __ecp_nistz256_sub_fromq:
movq %r8,16(%rdi)
movq %r9,24(%rdi)
.byte 0xf3,0xc3
ret
@ -2336,7 +2339,7 @@ __ecp_nistz256_subq:
cmovnzq %rcx,%r8
cmovnzq %r10,%r9
.byte 0xf3,0xc3
ret
@ -2370,7 +2373,7 @@ __ecp_nistz256_mul_by_2q:
movq %r8,16(%rdi)
movq %r9,24(%rdi)
.byte 0xf3,0xc3
ret
.globl _ecp_nistz256_point_double
@ -2379,6 +2382,7 @@ __ecp_nistz256_mul_by_2q:
.p2align 5
_ecp_nistz256_point_double:
_CET_ENDBR
leaq _OPENSSL_ia32cap_P(%rip),%rcx
movq 8(%rcx),%rcx
andl $0x80100,%ecx
@ -2598,7 +2602,7 @@ L$point_double_shortcutq:
leaq (%rsi),%rsp
L$point_doubleq_epilogue:
.byte 0xf3,0xc3
ret
.globl _ecp_nistz256_point_add
@ -2607,6 +2611,7 @@ L$point_doubleq_epilogue:
.p2align 5
_ecp_nistz256_point_add:
_CET_ENDBR
leaq _OPENSSL_ia32cap_P(%rip),%rcx
movq 8(%rcx),%rcx
andl $0x80100,%ecx
@ -3029,7 +3034,7 @@ L$add_doneq:
leaq (%rsi),%rsp
L$point_addq_epilogue:
.byte 0xf3,0xc3
ret
.globl _ecp_nistz256_point_add_affine
@ -3038,6 +3043,7 @@ L$point_addq_epilogue:
.p2align 5
_ecp_nistz256_point_add_affine:
_CET_ENDBR
leaq _OPENSSL_ia32cap_P(%rip),%rcx
movq 8(%rcx),%rcx
andl $0x80100,%ecx
@ -3357,7 +3363,7 @@ L$add_affineq_body:
leaq (%rsi),%rsp
L$add_affineq_epilogue:
.byte 0xf3,0xc3
ret
@ -3391,7 +3397,7 @@ __ecp_nistz256_add_tox:
movq %r8,16(%rdi)
movq %r9,24(%rdi)
.byte 0xf3,0xc3
ret
@ -3426,7 +3432,7 @@ __ecp_nistz256_sub_fromx:
movq %r8,16(%rdi)
movq %r9,24(%rdi)
.byte 0xf3,0xc3
ret
@ -3457,7 +3463,7 @@ __ecp_nistz256_subx:
cmovcq %rcx,%r8
cmovcq %r10,%r9
.byte 0xf3,0xc3
ret
@ -3492,7 +3498,7 @@ __ecp_nistz256_mul_by_2x:
movq %r8,16(%rdi)
movq %r9,24(%rdi)
.byte 0xf3,0xc3
ret
@ -3714,7 +3720,7 @@ L$point_double_shortcutx:
leaq (%rsi),%rsp
L$point_doublex_epilogue:
.byte 0xf3,0xc3
ret
@ -4139,7 +4145,7 @@ L$add_donex:
leaq (%rsi),%rsp
L$point_addx_epilogue:
.byte 0xf3,0xc3
ret
@ -4461,7 +4467,7 @@ L$add_affinex_body:
leaq (%rsi),%rsp
L$add_affinex_epilogue:
.byte 0xf3,0xc3
ret
#endif

View File

@ -1,16 +1,9 @@
# This file is generated from a similarly-named Perl script in the BoringSSL
# source tree. Do not edit by hand.
// This file is generated from a similarly-named Perl script in the BoringSSL
// source tree. Do not edit by hand.
#if defined(__has_feature)
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
#define OPENSSL_NO_ASM
#endif
#endif
#include <openssl/asm_base.h>
#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) && defined(__APPLE__)
.text
@ -20,6 +13,7 @@
.p2align 5
_beeu_mod_inverse_vartime:
_CET_ENDBR
pushq %rbp
pushq %r12
@ -321,7 +315,7 @@ L$beeu_finish:
popq %rbp
.byte 0xf3,0xc3
ret

View File

@ -1,16 +1,9 @@
# This file is generated from a similarly-named Perl script in the BoringSSL
# source tree. Do not edit by hand.
// This file is generated from a similarly-named Perl script in the BoringSSL
// source tree. Do not edit by hand.
#if defined(__has_feature)
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
#define OPENSSL_NO_ASM
#endif
#endif
#include <openssl/asm_base.h>
#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) && defined(__APPLE__)
.text
@ -22,12 +15,13 @@
.p2align 4
_CRYPTO_rdrand:
_CET_ENDBR
xorq %rax,%rax
.byte 72,15,199,242
adcq %rax,%rax
movq %rdx,0(%rdi)
.byte 0xf3,0xc3
ret
@ -41,6 +35,7 @@ _CRYPTO_rdrand:
.p2align 4
_CRYPTO_rdrand_multiple8_buf:
_CET_ENDBR
testq %rsi,%rsi
jz L$out
movq $8,%rdx
@ -53,10 +48,10 @@ L$loop:
jnz L$loop
L$out:
movq $1,%rax
.byte 0xf3,0xc3
ret
L$err:
xorq %rax,%rax
.byte 0xf3,0xc3
ret
#endif

View File

@ -1,16 +1,9 @@
# This file is generated from a similarly-named Perl script in the BoringSSL
# source tree. Do not edit by hand.
// This file is generated from a similarly-named Perl script in the BoringSSL
// source tree. Do not edit by hand.
#if defined(__has_feature)
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
#define OPENSSL_NO_ASM
#endif
#endif
#include <openssl/asm_base.h>
#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) && defined(__APPLE__)
.text
.globl _rsaz_1024_sqr_avx2
@ -19,6 +12,7 @@
.p2align 6
_rsaz_1024_sqr_avx2:
_CET_ENDBR
leaq (%rsp),%rax
pushq %rbx
@ -664,7 +658,7 @@ L$OOP_REDUCE_1024:
leaq (%rax),%rsp
L$sqr_1024_epilogue:
.byte 0xf3,0xc3
ret
.globl _rsaz_1024_mul_avx2
@ -673,6 +667,7 @@ L$sqr_1024_epilogue:
.p2align 6
_rsaz_1024_mul_avx2:
_CET_ENDBR
leaq (%rsp),%rax
pushq %rbx
@ -1220,7 +1215,7 @@ L$oop_mul_1024:
leaq (%rax),%rsp
L$mul_1024_epilogue:
.byte 0xf3,0xc3
ret
.globl _rsaz_1024_red2norm_avx2
@ -1229,6 +1224,7 @@ L$mul_1024_epilogue:
.p2align 5
_rsaz_1024_red2norm_avx2:
_CET_ENDBR
subq $-128,%rsi
xorq %rax,%rax
movq -128(%rsi),%r8
@ -1419,7 +1415,7 @@ _rsaz_1024_red2norm_avx2:
adcq $0,%r11
movq %rax,120(%rdi)
movq %r11,%rax
.byte 0xf3,0xc3
ret
@ -1429,6 +1425,7 @@ _rsaz_1024_red2norm_avx2:
.p2align 5
_rsaz_1024_norm2red_avx2:
_CET_ENDBR
subq $-128,%rdi
movq (%rsi),%r8
movl $0x1fffffff,%eax
@ -1580,7 +1577,7 @@ _rsaz_1024_norm2red_avx2:
movq %r8,168(%rdi)
movq %r8,176(%rdi)
movq %r8,184(%rdi)
.byte 0xf3,0xc3
ret
.globl _rsaz_1024_scatter5_avx2
@ -1589,6 +1586,7 @@ _rsaz_1024_norm2red_avx2:
.p2align 5
_rsaz_1024_scatter5_avx2:
_CET_ENDBR
vzeroupper
vmovdqu L$scatter_permd(%rip),%ymm5
shll $4,%edx
@ -1607,7 +1605,7 @@ L$oop_scatter_1024:
jnz L$oop_scatter_1024
vzeroupper
.byte 0xf3,0xc3
ret
@ -1617,6 +1615,7 @@ L$oop_scatter_1024:
.p2align 5
_rsaz_1024_gather5_avx2:
_CET_ENDBR
vzeroupper
movq %rsp,%r11
@ -1729,10 +1728,11 @@ L$oop_gather_1024:
vzeroupper
leaq (%r11),%rsp
.byte 0xf3,0xc3
ret
L$SEH_end_rsaz_1024_gather5:
.section __DATA,__const
.p2align 6
L$and_mask:
.quad 0x1fffffff,0x1fffffff,0x1fffffff,0x1fffffff
@ -1745,4 +1745,5 @@ L$inc:
.long 2,2,2,2, 3,3,3,3
.long 4,4,4,4, 4,4,4,4
.p2align 6
.text
#endif

View File

@ -1,36 +1,18 @@
# This file is generated from a similarly-named Perl script in the BoringSSL
# source tree. Do not edit by hand.
// This file is generated from a similarly-named Perl script in the BoringSSL
// source tree. Do not edit by hand.
#if defined(__has_feature)
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
#define OPENSSL_NO_ASM
#endif
#endif
#include <openssl/asm_base.h>
#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) && defined(__APPLE__)
.text
.globl _sha256_block_data_order
.private_extern _sha256_block_data_order
.globl _sha256_block_data_order_nohw
.private_extern _sha256_block_data_order_nohw
.p2align 4
_sha256_block_data_order:
_sha256_block_data_order_nohw:
leaq _OPENSSL_ia32cap_P(%rip),%r11
movl 0(%r11),%r9d
movl 4(%r11),%r10d
movl 8(%r11),%r11d
andl $1073741824,%r9d
andl $268435968,%r10d
orl %r9d,%r10d
cmpl $1342177792,%r10d
je L$avx_shortcut
testl $512,%r10d
jnz L$ssse3_shortcut
_CET_ENDBR
movq %rsp,%rax
pushq %rbx
@ -1734,9 +1716,10 @@ L$rounds_16_xx:
leaq (%rsi),%rsp
L$epilogue:
.byte 0xf3,0xc3
ret
.section __DATA,__const
.p2align 6
K256:
@ -1780,11 +1763,225 @@ K256:
.long 0xffffffff,0xffffffff,0x03020100,0x0b0a0908
.long 0xffffffff,0xffffffff,0x03020100,0x0b0a0908
.byte 83,72,65,50,53,54,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.text
.globl _sha256_block_data_order_hw
.private_extern _sha256_block_data_order_hw
.p2align 6
sha256_block_data_order_ssse3:
_sha256_block_data_order_hw:
L$ssse3_shortcut:
_CET_ENDBR
leaq K256+128(%rip),%rcx
movdqu (%rdi),%xmm1
movdqu 16(%rdi),%xmm2
movdqa 512-128(%rcx),%xmm7
pshufd $0x1b,%xmm1,%xmm0
pshufd $0xb1,%xmm1,%xmm1
pshufd $0x1b,%xmm2,%xmm2
movdqa %xmm7,%xmm8
.byte 102,15,58,15,202,8
punpcklqdq %xmm0,%xmm2
jmp L$oop_shaext
.p2align 4
L$oop_shaext:
movdqu (%rsi),%xmm3
movdqu 16(%rsi),%xmm4
movdqu 32(%rsi),%xmm5
.byte 102,15,56,0,223
movdqu 48(%rsi),%xmm6
movdqa 0-128(%rcx),%xmm0
paddd %xmm3,%xmm0
.byte 102,15,56,0,231
movdqa %xmm2,%xmm10
.byte 15,56,203,209
pshufd $0x0e,%xmm0,%xmm0
nop
movdqa %xmm1,%xmm9
.byte 15,56,203,202
movdqa 32-128(%rcx),%xmm0
paddd %xmm4,%xmm0
.byte 102,15,56,0,239
.byte 15,56,203,209
pshufd $0x0e,%xmm0,%xmm0
leaq 64(%rsi),%rsi
.byte 15,56,204,220
.byte 15,56,203,202
movdqa 64-128(%rcx),%xmm0
paddd %xmm5,%xmm0
.byte 102,15,56,0,247
.byte 15,56,203,209
pshufd $0x0e,%xmm0,%xmm0
movdqa %xmm6,%xmm7
.byte 102,15,58,15,253,4
nop
paddd %xmm7,%xmm3
.byte 15,56,204,229
.byte 15,56,203,202
movdqa 96-128(%rcx),%xmm0
paddd %xmm6,%xmm0
.byte 15,56,205,222
.byte 15,56,203,209
pshufd $0x0e,%xmm0,%xmm0
movdqa %xmm3,%xmm7
.byte 102,15,58,15,254,4
nop
paddd %xmm7,%xmm4
.byte 15,56,204,238
.byte 15,56,203,202
movdqa 128-128(%rcx),%xmm0
paddd %xmm3,%xmm0
.byte 15,56,205,227
.byte 15,56,203,209
pshufd $0x0e,%xmm0,%xmm0
movdqa %xmm4,%xmm7
.byte 102,15,58,15,251,4
nop
paddd %xmm7,%xmm5
.byte 15,56,204,243
.byte 15,56,203,202
movdqa 160-128(%rcx),%xmm0
paddd %xmm4,%xmm0
.byte 15,56,205,236
.byte 15,56,203,209
pshufd $0x0e,%xmm0,%xmm0
movdqa %xmm5,%xmm7
.byte 102,15,58,15,252,4
nop
paddd %xmm7,%xmm6
.byte 15,56,204,220
.byte 15,56,203,202
movdqa 192-128(%rcx),%xmm0
paddd %xmm5,%xmm0
.byte 15,56,205,245
.byte 15,56,203,209
pshufd $0x0e,%xmm0,%xmm0
movdqa %xmm6,%xmm7
.byte 102,15,58,15,253,4
nop
paddd %xmm7,%xmm3
.byte 15,56,204,229
.byte 15,56,203,202
movdqa 224-128(%rcx),%xmm0
paddd %xmm6,%xmm0
.byte 15,56,205,222
.byte 15,56,203,209
pshufd $0x0e,%xmm0,%xmm0
movdqa %xmm3,%xmm7
.byte 102,15,58,15,254,4
nop
paddd %xmm7,%xmm4
.byte 15,56,204,238
.byte 15,56,203,202
movdqa 256-128(%rcx),%xmm0
paddd %xmm3,%xmm0
.byte 15,56,205,227
.byte 15,56,203,209
pshufd $0x0e,%xmm0,%xmm0
movdqa %xmm4,%xmm7
.byte 102,15,58,15,251,4
nop
paddd %xmm7,%xmm5
.byte 15,56,204,243
.byte 15,56,203,202
movdqa 288-128(%rcx),%xmm0
paddd %xmm4,%xmm0
.byte 15,56,205,236
.byte 15,56,203,209
pshufd $0x0e,%xmm0,%xmm0
movdqa %xmm5,%xmm7
.byte 102,15,58,15,252,4
nop
paddd %xmm7,%xmm6
.byte 15,56,204,220
.byte 15,56,203,202
movdqa 320-128(%rcx),%xmm0
paddd %xmm5,%xmm0
.byte 15,56,205,245
.byte 15,56,203,209
pshufd $0x0e,%xmm0,%xmm0
movdqa %xmm6,%xmm7
.byte 102,15,58,15,253,4
nop
paddd %xmm7,%xmm3
.byte 15,56,204,229
.byte 15,56,203,202
movdqa 352-128(%rcx),%xmm0
paddd %xmm6,%xmm0
.byte 15,56,205,222
.byte 15,56,203,209
pshufd $0x0e,%xmm0,%xmm0
movdqa %xmm3,%xmm7
.byte 102,15,58,15,254,4
nop
paddd %xmm7,%xmm4
.byte 15,56,204,238
.byte 15,56,203,202
movdqa 384-128(%rcx),%xmm0
paddd %xmm3,%xmm0
.byte 15,56,205,227
.byte 15,56,203,209
pshufd $0x0e,%xmm0,%xmm0
movdqa %xmm4,%xmm7
.byte 102,15,58,15,251,4
nop
paddd %xmm7,%xmm5
.byte 15,56,204,243
.byte 15,56,203,202
movdqa 416-128(%rcx),%xmm0
paddd %xmm4,%xmm0
.byte 15,56,205,236
.byte 15,56,203,209
pshufd $0x0e,%xmm0,%xmm0
movdqa %xmm5,%xmm7
.byte 102,15,58,15,252,4
.byte 15,56,203,202
paddd %xmm7,%xmm6
movdqa 448-128(%rcx),%xmm0
paddd %xmm5,%xmm0
.byte 15,56,203,209
pshufd $0x0e,%xmm0,%xmm0
.byte 15,56,205,245
movdqa %xmm8,%xmm7
.byte 15,56,203,202
movdqa 480-128(%rcx),%xmm0
paddd %xmm6,%xmm0
nop
.byte 15,56,203,209
pshufd $0x0e,%xmm0,%xmm0
decq %rdx
nop
.byte 15,56,203,202
paddd %xmm10,%xmm2
paddd %xmm9,%xmm1
jnz L$oop_shaext
pshufd $0xb1,%xmm2,%xmm2
pshufd $0x1b,%xmm1,%xmm7
pshufd $0xb1,%xmm1,%xmm1
punpckhqdq %xmm2,%xmm1
.byte 102,15,58,15,215,8
movdqu %xmm1,(%rdi)
movdqu %xmm2,16(%rdi)
ret
.globl _sha256_block_data_order_ssse3
.private_extern _sha256_block_data_order_ssse3
.p2align 6
_sha256_block_data_order_ssse3:
_CET_ENDBR
movq %rsp,%rax
pushq %rbx
@ -2890,14 +3087,16 @@ L$ssse3_00_47:
leaq (%rsi),%rsp
L$epilogue_ssse3:
.byte 0xf3,0xc3
ret
.globl _sha256_block_data_order_avx
.private_extern _sha256_block_data_order_avx
.p2align 6
sha256_block_data_order_avx:
_sha256_block_data_order_avx:
L$avx_shortcut:
_CET_ENDBR
movq %rsp,%rax
pushq %rbx
@ -3965,7 +4164,7 @@ L$avx_00_47:
leaq (%rsi),%rsp
L$epilogue_avx:
.byte 0xf3,0xc3
ret
#endif

View File

@ -1,34 +1,18 @@
# This file is generated from a similarly-named Perl script in the BoringSSL
# source tree. Do not edit by hand.
// This file is generated from a similarly-named Perl script in the BoringSSL
// source tree. Do not edit by hand.
#if defined(__has_feature)
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
#define OPENSSL_NO_ASM
#endif
#endif
#include <openssl/asm_base.h>
#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) && defined(__APPLE__)
.text
.globl _sha512_block_data_order
.private_extern _sha512_block_data_order
.globl _sha512_block_data_order_nohw
.private_extern _sha512_block_data_order_nohw
.p2align 4
_sha512_block_data_order:
_sha512_block_data_order_nohw:
leaq _OPENSSL_ia32cap_P(%rip),%r11
movl 0(%r11),%r9d
movl 4(%r11),%r10d
movl 8(%r11),%r11d
andl $1073741824,%r9d
andl $268435968,%r10d
orl %r9d,%r10d
cmpl $1342177792,%r10d
je L$avx_shortcut
_CET_ENDBR
movq %rsp,%rax
pushq %rbx
@ -1732,9 +1716,10 @@ L$rounds_16_xx:
leaq (%rsi),%rsp
L$epilogue:
.byte 0xf3,0xc3
ret
.section __DATA,__const
.p2align 6
K512:
@ -1822,11 +1807,14 @@ K512:
.quad 0x0001020304050607,0x08090a0b0c0d0e0f
.quad 0x0001020304050607,0x08090a0b0c0d0e0f
.byte 83,72,65,53,49,50,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.text
.globl _sha512_block_data_order_avx
.private_extern _sha512_block_data_order_avx
.p2align 6
sha512_block_data_order_avx:
_sha512_block_data_order_avx:
L$avx_shortcut:
_CET_ENDBR
movq %rsp,%rax
pushq %rbx
@ -2984,7 +2972,7 @@ L$avx_00_47:
leaq (%rsi),%rsp
L$epilogue_avx:
.byte 0xf3,0xc3
ret
#endif

View File

@ -1,16 +1,9 @@
# This file is generated from a similarly-named Perl script in the BoringSSL
# source tree. Do not edit by hand.
// This file is generated from a similarly-named Perl script in the BoringSSL
// source tree. Do not edit by hand.
#if defined(__has_feature)
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
#define OPENSSL_NO_ASM
#endif
#endif
#include <openssl/asm_base.h>
#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) && defined(__APPLE__)
.text
@ -111,7 +104,7 @@ L$enc_entry:
movdqa 64(%r11,%r10,1),%xmm1
pxor %xmm4,%xmm0
.byte 102,15,56,0,193
.byte 0xf3,0xc3
ret
@ -286,7 +279,7 @@ L$enc2x_entry:
pxor %xmm12,%xmm6
.byte 102,15,56,0,193
.byte 102,15,56,0,241
.byte 0xf3,0xc3
ret
@ -394,7 +387,7 @@ L$dec_entry:
.byte 102,15,56,0,195
pxor %xmm4,%xmm0
.byte 102,15,56,0,194
.byte 0xf3,0xc3
ret
@ -572,7 +565,7 @@ L$schedule_mangle_last_dec:
pxor %xmm5,%xmm5
pxor %xmm6,%xmm6
pxor %xmm7,%xmm7
.byte 0xf3,0xc3
ret
@ -601,7 +594,7 @@ _vpaes_schedule_192_smear:
pxor %xmm0,%xmm6
movdqa %xmm6,%xmm0
movhlps %xmm1,%xmm6
.byte 0xf3,0xc3
ret
@ -679,7 +672,7 @@ _vpaes_schedule_low_round:
pxor %xmm7,%xmm0
movdqa %xmm0,%xmm7
.byte 0xf3,0xc3
ret
@ -705,7 +698,7 @@ _vpaes_schedule_transform:
movdqa 16(%r11),%xmm0
.byte 102,15,56,0,193
pxor %xmm2,%xmm0
.byte 0xf3,0xc3
ret
@ -799,7 +792,7 @@ L$schedule_mangle_both:
addq $-16,%r8
andq $0x30,%r8
movdqu %xmm3,(%rdx)
.byte 0xf3,0xc3
ret
@ -812,6 +805,7 @@ L$schedule_mangle_both:
.p2align 4
_vpaes_set_encrypt_key:
_CET_ENDBR
#ifdef BORINGSSL_DISPATCH_TEST
movb $1,_BORINGSSL_function_hit+5(%rip)
@ -826,7 +820,7 @@ _vpaes_set_encrypt_key:
movl $0x30,%r8d
call _vpaes_schedule_core
xorl %eax,%eax
.byte 0xf3,0xc3
ret
@ -836,6 +830,7 @@ _vpaes_set_encrypt_key:
.p2align 4
_vpaes_set_decrypt_key:
_CET_ENDBR
movl %esi,%eax
shrl $5,%eax
addl $5,%eax
@ -850,7 +845,7 @@ _vpaes_set_decrypt_key:
xorl $32,%r8d
call _vpaes_schedule_core
xorl %eax,%eax
.byte 0xf3,0xc3
ret
@ -860,6 +855,7 @@ _vpaes_set_decrypt_key:
.p2align 4
_vpaes_encrypt:
_CET_ENDBR
#ifdef BORINGSSL_DISPATCH_TEST
movb $1,_BORINGSSL_function_hit+4(%rip)
@ -868,7 +864,7 @@ _vpaes_encrypt:
call _vpaes_preheat
call _vpaes_encrypt_core
movdqu %xmm0,(%rsi)
.byte 0xf3,0xc3
ret
@ -878,11 +874,12 @@ _vpaes_encrypt:
.p2align 4
_vpaes_decrypt:
_CET_ENDBR
movdqu (%rdi),%xmm0
call _vpaes_preheat
call _vpaes_decrypt_core
movdqu %xmm0,(%rsi)
.byte 0xf3,0xc3
ret
.globl _vpaes_cbc_encrypt
@ -891,6 +888,7 @@ _vpaes_decrypt:
.p2align 4
_vpaes_cbc_encrypt:
_CET_ENDBR
xchgq %rcx,%rdx
subq $16,%rcx
jc L$cbc_abort
@ -925,7 +923,7 @@ L$cbc_dec_loop:
L$cbc_done:
movdqu %xmm6,(%r8)
L$cbc_abort:
.byte 0xf3,0xc3
ret
.globl _vpaes_ctr32_encrypt_blocks
@ -934,6 +932,7 @@ L$cbc_abort:
.p2align 4
_vpaes_ctr32_encrypt_blocks:
_CET_ENDBR
xchgq %rcx,%rdx
testq %rcx,%rcx
@ -988,7 +987,7 @@ L$ctr32_loop:
L$ctr32_done:
L$ctr32_abort:
.byte 0xf3,0xc3
ret
@ -1009,7 +1008,7 @@ _vpaes_preheat:
movdqa 64(%r10),%xmm12
movdqa 80(%r10),%xmm15
movdqa 96(%r10),%xmm14
.byte 0xf3,0xc3
ret
@ -1018,6 +1017,7 @@ _vpaes_preheat:
.section __DATA,__const
.p2align 6
_vpaes_consts:
L$k_inv:
@ -1127,4 +1127,5 @@ L$ctr_add_two:
.byte 86,101,99,116,111,114,32,80,101,114,109,117,116,97,116,105,111,110,32,65,69,83,32,102,111,114,32,120,56,54,95,54,52,47,83,83,83,69,51,44,32,77,105,107,101,32,72,97,109,98,117,114,103,32,40,83,116,97,110,102,111,114,100,32,85,110,105,118,101,114,115,105,116,121,41,0
.p2align 6
.text
#endif

View File

@ -1,43 +1,21 @@
# This file is generated from a similarly-named Perl script in the BoringSSL
# source tree. Do not edit by hand.
// This file is generated from a similarly-named Perl script in the BoringSSL
// source tree. Do not edit by hand.
#if defined(__has_feature)
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
#define OPENSSL_NO_ASM
#endif
#endif
#include <openssl/asm_base.h>
#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) && defined(__APPLE__)
.text
.globl _bn_mul_mont
.private_extern _bn_mul_mont
.globl _bn_mul_mont_nohw
.private_extern _bn_mul_mont_nohw
.p2align 4
_bn_mul_mont:
_bn_mul_mont_nohw:
_CET_ENDBR
movl %r9d,%r9d
movq %rsp,%rax
testl $3,%r9d
jnz L$mul_enter
cmpl $8,%r9d
jb L$mul_enter
leaq _OPENSSL_ia32cap_P(%rip),%r11
movl 8(%r11),%r11d
cmpq %rsi,%rdx
jne L$mul4x_enter
testl $7,%r9d
jz L$sqr8x_enter
jmp L$mul4x_enter
.p2align 4
L$mul_enter:
pushq %rbx
pushq %rbp
@ -267,20 +245,19 @@ L$copy:
leaq (%rsi),%rsp
L$mul_epilogue:
.byte 0xf3,0xc3
ret
.globl _bn_mul4x_mont
.private_extern _bn_mul4x_mont
.p2align 4
bn_mul4x_mont:
_bn_mul4x_mont:
_CET_ENDBR
movl %r9d,%r9d
movq %rsp,%rax
L$mul4x_enter:
andl $0x80100,%r11d
cmpl $0x80100,%r11d
je L$mulx4x_enter
pushq %rbx
pushq %rbp
@ -701,19 +678,22 @@ L$copy4x:
leaq (%rsi),%rsp
L$mul4x_epilogue:
.byte 0xf3,0xc3
ret
.globl _bn_sqr8x_mont
.private_extern _bn_sqr8x_mont
.p2align 5
bn_sqr8x_mont:
_bn_sqr8x_mont:
_CET_ENDBR
movl %r9d,%r9d
movq %rsp,%rax
L$sqr8x_enter:
pushq %rbx
pushq %rbp
@ -788,11 +768,8 @@ L$sqr8x_body:
pxor %xmm0,%xmm0
.byte 102,72,15,110,207
.byte 102,73,15,110,218
leaq _OPENSSL_ia32cap_P(%rip),%rax
movl 8(%rax),%eax
andl $0x80100,%eax
cmpl $0x80100,%eax
jne L$sqr8x_nox
testq %rdx,%rdx
jz L$sqr8x_nox
call _bn_sqrx8x_internal
@ -892,16 +869,18 @@ L$sqr8x_cond_copy:
leaq (%rsi),%rsp
L$sqr8x_epilogue:
.byte 0xf3,0xc3
ret
.globl _bn_mulx4x_mont
.private_extern _bn_mulx4x_mont
.p2align 5
bn_mulx4x_mont:
_bn_mulx4x_mont:
_CET_ENDBR
movq %rsp,%rax
L$mulx4x_enter:
pushq %rbx
pushq %rbp
@ -1248,7 +1227,7 @@ L$mulx4x_cond_copy:
leaq (%rsi),%rsp
L$mulx4x_epilogue:
.byte 0xf3,0xc3
ret
.byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0

View File

@ -1,16 +1,9 @@
# This file is generated from a similarly-named Perl script in the BoringSSL
# source tree. Do not edit by hand.
// This file is generated from a similarly-named Perl script in the BoringSSL
// source tree. Do not edit by hand.
#if defined(__has_feature)
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
#define OPENSSL_NO_ASM
#endif
#endif
#include <openssl/asm_base.h>
#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) && defined(__APPLE__)
.text
@ -21,6 +14,7 @@
.p2align 6
_bn_mul_mont_gather5:
_CET_ENDBR
movl %r9d,%r9d
movq %rsp,%rax
@ -206,6 +200,7 @@ L$mul_body:
por %xmm2,%xmm0
por %xmm3,%xmm1
por %xmm1,%xmm0
pshufd $0x4e,%xmm0,%xmm1
por %xmm1,%xmm0
leaq 256(%r12),%r12
@ -329,6 +324,7 @@ L$outer:
por %xmm2,%xmm4
por %xmm3,%xmm5
por %xmm5,%xmm4
pshufd $0x4e,%xmm4,%xmm0
por %xmm4,%xmm0
leaq 256(%r12),%r12
@ -453,7 +449,7 @@ L$copy:
leaq (%rsi),%rsp
L$mul_epilogue:
.byte 0xf3,0xc3
ret
@ -558,7 +554,7 @@ L$mul4x_body:
leaq (%rsi),%rsp
L$mul4x_epilogue:
.byte 0xf3,0xc3
ret
@ -697,6 +693,7 @@ mul4x_internal:
por %xmm2,%xmm0
por %xmm3,%xmm1
por %xmm1,%xmm0
pshufd $0x4e,%xmm0,%xmm1
por %xmm1,%xmm0
leaq 256(%r12),%r12
@ -904,6 +901,7 @@ L$outer4x:
por %xmm2,%xmm4
por %xmm3,%xmm5
por %xmm5,%xmm4
pshufd $0x4e,%xmm4,%xmm0
por %xmm4,%xmm0
leaq 256(%r12),%r12
@ -1095,6 +1093,7 @@ L$inner4x:
.p2align 5
_bn_power5:
_CET_ENDBR
movq %rsp,%rax
leaq _OPENSSL_ia32cap_P(%rip),%r11
@ -1222,7 +1221,7 @@ L$power5_body:
leaq (%rsi),%rsp
L$power5_epilogue:
.byte 0xf3,0xc3
ret
@ -1234,6 +1233,7 @@ L$power5_epilogue:
_bn_sqr8x_internal:
__bn_sqr8x_internal:
_CET_ENDBR
@ -2007,7 +2007,7 @@ L$8x_no_tail:
cmpq %rdx,%rdi
jb L$8x_reduction_loop
.byte 0xf3,0xc3
ret
@ -2063,188 +2063,7 @@ L$sqr4x_sub_entry:
movq %r9,%r10
negq %r9
.byte 0xf3,0xc3
.globl _bn_from_montgomery
.private_extern _bn_from_montgomery
.p2align 5
_bn_from_montgomery:
testl $7,%r9d
jz bn_from_mont8x
xorl %eax,%eax
.byte 0xf3,0xc3
.p2align 5
bn_from_mont8x:
.byte 0x67
movq %rsp,%rax
pushq %rbx
pushq %rbp
pushq %r12
pushq %r13
pushq %r14
pushq %r15
L$from_prologue:
shll $3,%r9d
leaq (%r9,%r9,2),%r10
negq %r9
movq (%r8),%r8
leaq -320(%rsp,%r9,2),%r11
movq %rsp,%rbp
subq %rdi,%r11
andq $4095,%r11
cmpq %r11,%r10
jb L$from_sp_alt
subq %r11,%rbp
leaq -320(%rbp,%r9,2),%rbp
jmp L$from_sp_done
.p2align 5
L$from_sp_alt:
leaq 4096-320(,%r9,2),%r10
leaq -320(%rbp,%r9,2),%rbp
subq %r10,%r11
movq $0,%r10
cmovcq %r10,%r11
subq %r11,%rbp
L$from_sp_done:
andq $-64,%rbp
movq %rsp,%r11
subq %rbp,%r11
andq $-4096,%r11
leaq (%r11,%rbp,1),%rsp
movq (%rsp),%r10
cmpq %rbp,%rsp
ja L$from_page_walk
jmp L$from_page_walk_done
L$from_page_walk:
leaq -4096(%rsp),%rsp
movq (%rsp),%r10
cmpq %rbp,%rsp
ja L$from_page_walk
L$from_page_walk_done:
movq %r9,%r10
negq %r9
movq %r8,32(%rsp)
movq %rax,40(%rsp)
L$from_body:
movq %r9,%r11
leaq 48(%rsp),%rax
pxor %xmm0,%xmm0
jmp L$mul_by_1
.p2align 5
L$mul_by_1:
movdqu (%rsi),%xmm1
movdqu 16(%rsi),%xmm2
movdqu 32(%rsi),%xmm3
movdqa %xmm0,(%rax,%r9,1)
movdqu 48(%rsi),%xmm4
movdqa %xmm0,16(%rax,%r9,1)
.byte 0x48,0x8d,0xb6,0x40,0x00,0x00,0x00
movdqa %xmm1,(%rax)
movdqa %xmm0,32(%rax,%r9,1)
movdqa %xmm2,16(%rax)
movdqa %xmm0,48(%rax,%r9,1)
movdqa %xmm3,32(%rax)
movdqa %xmm4,48(%rax)
leaq 64(%rax),%rax
subq $64,%r11
jnz L$mul_by_1
.byte 102,72,15,110,207
.byte 102,72,15,110,209
.byte 0x67
movq %rcx,%rbp
.byte 102,73,15,110,218
leaq _OPENSSL_ia32cap_P(%rip),%r11
movl 8(%r11),%r11d
andl $0x80108,%r11d
cmpl $0x80108,%r11d
jne L$from_mont_nox
leaq (%rax,%r9,1),%rdi
call __bn_sqrx8x_reduction
call __bn_postx4x_internal
pxor %xmm0,%xmm0
leaq 48(%rsp),%rax
jmp L$from_mont_zero
.p2align 5
L$from_mont_nox:
call __bn_sqr8x_reduction
call __bn_post4x_internal
pxor %xmm0,%xmm0
leaq 48(%rsp),%rax
jmp L$from_mont_zero
.p2align 5
L$from_mont_zero:
movq 40(%rsp),%rsi
movdqa %xmm0,0(%rax)
movdqa %xmm0,16(%rax)
movdqa %xmm0,32(%rax)
movdqa %xmm0,48(%rax)
leaq 64(%rax),%rax
subq $32,%r9
jnz L$from_mont_zero
movq $1,%rax
movq -48(%rsi),%r15
movq -40(%rsi),%r14
movq -32(%rsi),%r13
movq -24(%rsi),%r12
movq -16(%rsi),%rbp
movq -8(%rsi),%rbx
leaq (%rsi),%rsp
L$from_epilogue:
.byte 0xf3,0xc3
ret
@ -2354,7 +2173,7 @@ L$mulx4x_body:
leaq (%rsi),%rsp
L$mulx4x_epilogue:
.byte 0xf3,0xc3
ret
@ -2501,6 +2320,7 @@ mulx4x_internal:
por %xmm2,%xmm0
por %xmm3,%xmm1
pxor %xmm1,%xmm0
pshufd $0x4e,%xmm0,%xmm1
por %xmm1,%xmm0
leaq 256(%rdi),%rdi
@ -2651,6 +2471,7 @@ L$mulx4x_outer:
por %xmm2,%xmm4
por %xmm3,%xmm5
por %xmm5,%xmm4
pshufd $0x4e,%xmm4,%xmm0
por %xmm4,%xmm0
leaq 256(%rdi),%rdi
@ -2913,7 +2734,7 @@ L$powerx5_body:
leaq (%rsi),%rsp
L$powerx5_epilogue:
.byte 0xf3,0xc3
ret
@ -2925,6 +2746,7 @@ L$powerx5_epilogue:
_bn_sqrx8x_internal:
__bn_sqrx8x_internal:
_CET_ENDBR
@ -3535,7 +3357,7 @@ L$sqrx8x_no_tail:
leaq 64(%rdi,%rcx,1),%rdi
cmpq 8+8(%rsp),%r8
jb L$sqrx8x_reduction_loop
.byte 0xf3,0xc3
ret
.p2align 5
@ -3588,7 +3410,7 @@ L$sqrx4x_sub_entry:
negq %r9
.byte 0xf3,0xc3
ret
.globl _bn_scatter5
@ -3597,8 +3419,18 @@ L$sqrx4x_sub_entry:
.p2align 4
_bn_scatter5:
_CET_ENDBR
cmpl $0,%esi
jz L$scatter_epilogue
leaq (%rdx,%rcx,8),%rdx
L$scatter:
movq (%rdi),%rax
@ -3608,7 +3440,7 @@ L$scatter:
subl $1,%esi
jnz L$scatter
L$scatter_epilogue:
.byte 0xf3,0xc3
ret
@ -3619,6 +3451,7 @@ L$scatter_epilogue:
_bn_gather5:
L$SEH_begin_bn_gather5:
_CET_ENDBR
.byte 0x4c,0x8d,0x14,0x24
@ -3767,6 +3600,7 @@ L$gather:
por %xmm3,%xmm5
por %xmm5,%xmm4
leaq 256(%r11),%r11
pshufd $0x4e,%xmm4,%xmm0
por %xmm4,%xmm0
movq %xmm0,(%rdi)
@ -3776,13 +3610,15 @@ L$gather:
leaq (%r10),%rsp
.byte 0xf3,0xc3
ret
L$SEH_end_bn_gather5:
.section __DATA,__const
.p2align 6
L$inc:
.long 0,0, 1,1
.long 2,2, 2,2
.byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,119,105,116,104,32,115,99,97,116,116,101,114,47,103,97,116,104,101,114,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.text
#endif

View File

@ -1,16 +1,9 @@
# This file is generated from a similarly-named Perl script in the BoringSSL
# source tree. Do not edit by hand.
// This file is generated from a similarly-named Perl script in the BoringSSL
// source tree. Do not edit by hand.
#if defined(__has_feature)
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
#define OPENSSL_NO_ASM
#endif
#endif
#include <openssl/asm_base.h>
#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) && defined(__APPLE__)
.text
@ -25,9 +18,10 @@
.private_extern _abi_test_trampoline
.p2align 4
_abi_test_trampoline:
L$abi_test_trampoline_seh_begin:
_CET_ENDBR
@ -38,27 +32,26 @@ L$abi_test_trampoline_seh_begin:
subq $120,%rsp
L$abi_test_trampoline_seh_prolog_alloc:
movq %r8,48(%rsp)
movq %rbx,64(%rsp)
L$abi_test_trampoline_seh_prolog_rbx:
movq %rbp,72(%rsp)
L$abi_test_trampoline_seh_prolog_rbp:
movq %r12,80(%rsp)
L$abi_test_trampoline_seh_prolog_r12:
movq %r13,88(%rsp)
L$abi_test_trampoline_seh_prolog_r13:
movq %r14,96(%rsp)
L$abi_test_trampoline_seh_prolog_r14:
movq %r15,104(%rsp)
L$abi_test_trampoline_seh_prolog_r15:
L$abi_test_trampoline_seh_prolog_end:
movq 0(%rsi),%rbx
movq 8(%rsi),%rbp
movq 16(%rsi),%r12
@ -180,257 +173,288 @@ L$call_done:
.byte 0xf3,0xc3
ret
L$abi_test_trampoline_seh_end:
.globl _abi_test_clobber_rax
.private_extern _abi_test_clobber_rax
.p2align 4
_abi_test_clobber_rax:
_CET_ENDBR
xorq %rax,%rax
.byte 0xf3,0xc3
ret
.globl _abi_test_clobber_rbx
.private_extern _abi_test_clobber_rbx
.p2align 4
_abi_test_clobber_rbx:
_CET_ENDBR
xorq %rbx,%rbx
.byte 0xf3,0xc3
ret
.globl _abi_test_clobber_rcx
.private_extern _abi_test_clobber_rcx
.p2align 4
_abi_test_clobber_rcx:
_CET_ENDBR
xorq %rcx,%rcx
.byte 0xf3,0xc3
ret
.globl _abi_test_clobber_rdx
.private_extern _abi_test_clobber_rdx
.p2align 4
_abi_test_clobber_rdx:
_CET_ENDBR
xorq %rdx,%rdx
.byte 0xf3,0xc3
ret
.globl _abi_test_clobber_rdi
.private_extern _abi_test_clobber_rdi
.p2align 4
_abi_test_clobber_rdi:
_CET_ENDBR
xorq %rdi,%rdi
.byte 0xf3,0xc3
ret
.globl _abi_test_clobber_rsi
.private_extern _abi_test_clobber_rsi
.p2align 4
_abi_test_clobber_rsi:
_CET_ENDBR
xorq %rsi,%rsi
.byte 0xf3,0xc3
ret
.globl _abi_test_clobber_rbp
.private_extern _abi_test_clobber_rbp
.p2align 4
_abi_test_clobber_rbp:
_CET_ENDBR
xorq %rbp,%rbp
.byte 0xf3,0xc3
ret
.globl _abi_test_clobber_r8
.private_extern _abi_test_clobber_r8
.p2align 4
_abi_test_clobber_r8:
_CET_ENDBR
xorq %r8,%r8
.byte 0xf3,0xc3
ret
.globl _abi_test_clobber_r9
.private_extern _abi_test_clobber_r9
.p2align 4
_abi_test_clobber_r9:
_CET_ENDBR
xorq %r9,%r9
.byte 0xf3,0xc3
ret
.globl _abi_test_clobber_r10
.private_extern _abi_test_clobber_r10
.p2align 4
_abi_test_clobber_r10:
_CET_ENDBR
xorq %r10,%r10
.byte 0xf3,0xc3
ret
.globl _abi_test_clobber_r11
.private_extern _abi_test_clobber_r11
.p2align 4
_abi_test_clobber_r11:
_CET_ENDBR
xorq %r11,%r11
.byte 0xf3,0xc3
ret
.globl _abi_test_clobber_r12
.private_extern _abi_test_clobber_r12
.p2align 4
_abi_test_clobber_r12:
_CET_ENDBR
xorq %r12,%r12
.byte 0xf3,0xc3
ret
.globl _abi_test_clobber_r13
.private_extern _abi_test_clobber_r13
.p2align 4
_abi_test_clobber_r13:
_CET_ENDBR
xorq %r13,%r13
.byte 0xf3,0xc3
ret
.globl _abi_test_clobber_r14
.private_extern _abi_test_clobber_r14
.p2align 4
_abi_test_clobber_r14:
_CET_ENDBR
xorq %r14,%r14
.byte 0xf3,0xc3
ret
.globl _abi_test_clobber_r15
.private_extern _abi_test_clobber_r15
.p2align 4
_abi_test_clobber_r15:
_CET_ENDBR
xorq %r15,%r15
.byte 0xf3,0xc3
ret
.globl _abi_test_clobber_xmm0
.private_extern _abi_test_clobber_xmm0
.p2align 4
_abi_test_clobber_xmm0:
_CET_ENDBR
pxor %xmm0,%xmm0
.byte 0xf3,0xc3
ret
.globl _abi_test_clobber_xmm1
.private_extern _abi_test_clobber_xmm1
.p2align 4
_abi_test_clobber_xmm1:
_CET_ENDBR
pxor %xmm1,%xmm1
.byte 0xf3,0xc3
ret
.globl _abi_test_clobber_xmm2
.private_extern _abi_test_clobber_xmm2
.p2align 4
_abi_test_clobber_xmm2:
_CET_ENDBR
pxor %xmm2,%xmm2
.byte 0xf3,0xc3
ret
.globl _abi_test_clobber_xmm3
.private_extern _abi_test_clobber_xmm3
.p2align 4
_abi_test_clobber_xmm3:
_CET_ENDBR
pxor %xmm3,%xmm3
.byte 0xf3,0xc3
ret
.globl _abi_test_clobber_xmm4
.private_extern _abi_test_clobber_xmm4
.p2align 4
_abi_test_clobber_xmm4:
_CET_ENDBR
pxor %xmm4,%xmm4
.byte 0xf3,0xc3
ret
.globl _abi_test_clobber_xmm5
.private_extern _abi_test_clobber_xmm5
.p2align 4
_abi_test_clobber_xmm5:
_CET_ENDBR
pxor %xmm5,%xmm5
.byte 0xf3,0xc3
ret
.globl _abi_test_clobber_xmm6
.private_extern _abi_test_clobber_xmm6
.p2align 4
_abi_test_clobber_xmm6:
_CET_ENDBR
pxor %xmm6,%xmm6
.byte 0xf3,0xc3
ret
.globl _abi_test_clobber_xmm7
.private_extern _abi_test_clobber_xmm7
.p2align 4
_abi_test_clobber_xmm7:
_CET_ENDBR
pxor %xmm7,%xmm7
.byte 0xf3,0xc3
ret
.globl _abi_test_clobber_xmm8
.private_extern _abi_test_clobber_xmm8
.p2align 4
_abi_test_clobber_xmm8:
_CET_ENDBR
pxor %xmm8,%xmm8
.byte 0xf3,0xc3
ret
.globl _abi_test_clobber_xmm9
.private_extern _abi_test_clobber_xmm9
.p2align 4
_abi_test_clobber_xmm9:
_CET_ENDBR
pxor %xmm9,%xmm9
.byte 0xf3,0xc3
ret
.globl _abi_test_clobber_xmm10
.private_extern _abi_test_clobber_xmm10
.p2align 4
_abi_test_clobber_xmm10:
_CET_ENDBR
pxor %xmm10,%xmm10
.byte 0xf3,0xc3
ret
.globl _abi_test_clobber_xmm11
.private_extern _abi_test_clobber_xmm11
.p2align 4
_abi_test_clobber_xmm11:
_CET_ENDBR
pxor %xmm11,%xmm11
.byte 0xf3,0xc3
ret
.globl _abi_test_clobber_xmm12
.private_extern _abi_test_clobber_xmm12
.p2align 4
_abi_test_clobber_xmm12:
_CET_ENDBR
pxor %xmm12,%xmm12
.byte 0xf3,0xc3
ret
.globl _abi_test_clobber_xmm13
.private_extern _abi_test_clobber_xmm13
.p2align 4
_abi_test_clobber_xmm13:
_CET_ENDBR
pxor %xmm13,%xmm13
.byte 0xf3,0xc3
ret
.globl _abi_test_clobber_xmm14
.private_extern _abi_test_clobber_xmm14
.p2align 4
_abi_test_clobber_xmm14:
_CET_ENDBR
pxor %xmm14,%xmm14
.byte 0xf3,0xc3
ret
.globl _abi_test_clobber_xmm15
.private_extern _abi_test_clobber_xmm15
.p2align 4
_abi_test_clobber_xmm15:
_CET_ENDBR
pxor %xmm15,%xmm15
.byte 0xf3,0xc3
ret
@ -441,18 +465,19 @@ _abi_test_clobber_xmm15:
.p2align 4
_abi_test_bad_unwind_wrong_register:
L$abi_test_bad_unwind_wrong_register_seh_begin:
_CET_ENDBR
pushq %r12
L$abi_test_bad_unwind_wrong_register_seh_push_r13:
nop
popq %r12
.byte 0xf3,0xc3
L$abi_test_bad_unwind_wrong_register_seh_end:
ret
@ -465,10 +490,11 @@ L$abi_test_bad_unwind_wrong_register_seh_end:
.p2align 4
_abi_test_bad_unwind_temporary:
L$abi_test_bad_unwind_temporary_seh_begin:
_CET_ENDBR
pushq %r12
L$abi_test_bad_unwind_temporary_seh_push_r12:
movq %r12,%rax
incq %rax
@ -481,8 +507,8 @@ L$abi_test_bad_unwind_temporary_seh_push_r12:
popq %r12
.byte 0xf3,0xc3
L$abi_test_bad_unwind_temporary_seh_end:
ret
@ -493,12 +519,13 @@ L$abi_test_bad_unwind_temporary_seh_end:
.globl _abi_test_get_and_clear_direction_flag
.private_extern _abi_test_get_and_clear_direction_flag
_abi_test_get_and_clear_direction_flag:
_CET_ENDBR
pushfq
popq %rax
andq $0x400,%rax
shrq $10,%rax
cld
.byte 0xf3,0xc3
ret
@ -507,7 +534,8 @@ _abi_test_get_and_clear_direction_flag:
.globl _abi_test_set_direction_flag
.private_extern _abi_test_set_direction_flag
_abi_test_set_direction_flag:
_CET_ENDBR
std
.byte 0xf3,0xc3
ret
#endif

File diff suppressed because it is too large Load Diff

View File

@ -1,246 +0,0 @@
// This file is generated from a similarly-named Perl script in the BoringSSL
// source tree. Do not edit by hand.
#if !defined(__has_feature)
#define __has_feature(x) 0
#endif
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
#define OPENSSL_NO_ASM
#endif
#if !defined(OPENSSL_NO_ASM)
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
#include <openssl/arm_arch.h>
.text
.globl _gcm_init_v8
.private_extern _gcm_init_v8
.align 4
_gcm_init_v8:
ld1 {v17.2d},[x1] //load input H
movi v19.16b,#0xe1
shl v19.2d,v19.2d,#57 //0xc2.0
ext v3.16b,v17.16b,v17.16b,#8
ushr v18.2d,v19.2d,#63
dup v17.4s,v17.s[1]
ext v16.16b,v18.16b,v19.16b,#8 //t0=0xc2....01
ushr v18.2d,v3.2d,#63
sshr v17.4s,v17.4s,#31 //broadcast carry bit
and v18.16b,v18.16b,v16.16b
shl v3.2d,v3.2d,#1
ext v18.16b,v18.16b,v18.16b,#8
and v16.16b,v16.16b,v17.16b
orr v3.16b,v3.16b,v18.16b //H<<<=1
eor v20.16b,v3.16b,v16.16b //twisted H
st1 {v20.2d},[x0],#16 //store Htable[0]
//calculate H^2
ext v16.16b,v20.16b,v20.16b,#8 //Karatsuba pre-processing
pmull v0.1q,v20.1d,v20.1d
eor v16.16b,v16.16b,v20.16b
pmull2 v2.1q,v20.2d,v20.2d
pmull v1.1q,v16.1d,v16.1d
ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing
eor v18.16b,v0.16b,v2.16b
eor v1.16b,v1.16b,v17.16b
eor v1.16b,v1.16b,v18.16b
pmull v18.1q,v0.1d,v19.1d //1st phase
ins v2.d[0],v1.d[1]
ins v1.d[1],v0.d[0]
eor v0.16b,v1.16b,v18.16b
ext v18.16b,v0.16b,v0.16b,#8 //2nd phase
pmull v0.1q,v0.1d,v19.1d
eor v18.16b,v18.16b,v2.16b
eor v22.16b,v0.16b,v18.16b
ext v17.16b,v22.16b,v22.16b,#8 //Karatsuba pre-processing
eor v17.16b,v17.16b,v22.16b
ext v21.16b,v16.16b,v17.16b,#8 //pack Karatsuba pre-processed
st1 {v21.2d,v22.2d},[x0] //store Htable[1..2]
ret
.globl _gcm_gmult_v8
.private_extern _gcm_gmult_v8
.align 4
_gcm_gmult_v8:
ld1 {v17.2d},[x0] //load Xi
movi v19.16b,#0xe1
ld1 {v20.2d,v21.2d},[x1] //load twisted H, ...
shl v19.2d,v19.2d,#57
#ifndef __ARMEB__
rev64 v17.16b,v17.16b
#endif
ext v3.16b,v17.16b,v17.16b,#8
pmull v0.1q,v20.1d,v3.1d //H.lo·Xi.lo
eor v17.16b,v17.16b,v3.16b //Karatsuba pre-processing
pmull2 v2.1q,v20.2d,v3.2d //H.hi·Xi.hi
pmull v1.1q,v21.1d,v17.1d //(H.lo+H.hi)·(Xi.lo+Xi.hi)
ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing
eor v18.16b,v0.16b,v2.16b
eor v1.16b,v1.16b,v17.16b
eor v1.16b,v1.16b,v18.16b
pmull v18.1q,v0.1d,v19.1d //1st phase of reduction
ins v2.d[0],v1.d[1]
ins v1.d[1],v0.d[0]
eor v0.16b,v1.16b,v18.16b
ext v18.16b,v0.16b,v0.16b,#8 //2nd phase of reduction
pmull v0.1q,v0.1d,v19.1d
eor v18.16b,v18.16b,v2.16b
eor v0.16b,v0.16b,v18.16b
#ifndef __ARMEB__
rev64 v0.16b,v0.16b
#endif
ext v0.16b,v0.16b,v0.16b,#8
st1 {v0.2d},[x0] //write out Xi
ret
.globl _gcm_ghash_v8
.private_extern _gcm_ghash_v8
.align 4
_gcm_ghash_v8:
ld1 {v0.2d},[x0] //load [rotated] Xi
//"[rotated]" means that
//loaded value would have
//to be rotated in order to
//make it appear as in
//algorithm specification
subs x3,x3,#32 //see if x3 is 32 or larger
mov x12,#16 //x12 is used as post-
//increment for input pointer;
//as loop is modulo-scheduled
//x12 is zeroed just in time
//to preclude overstepping
//inp[len], which means that
//last block[s] are actually
//loaded twice, but last
//copy is not processed
ld1 {v20.2d,v21.2d},[x1],#32 //load twisted H, ..., H^2
movi v19.16b,#0xe1
ld1 {v22.2d},[x1]
csel x12,xzr,x12,eq //is it time to zero x12?
ext v0.16b,v0.16b,v0.16b,#8 //rotate Xi
ld1 {v16.2d},[x2],#16 //load [rotated] I[0]
shl v19.2d,v19.2d,#57 //compose 0xc2.0 constant
#ifndef __ARMEB__
rev64 v16.16b,v16.16b
rev64 v0.16b,v0.16b
#endif
ext v3.16b,v16.16b,v16.16b,#8 //rotate I[0]
b.lo Lodd_tail_v8 //x3 was less than 32
ld1 {v17.2d},[x2],x12 //load [rotated] I[1]
#ifndef __ARMEB__
rev64 v17.16b,v17.16b
#endif
ext v7.16b,v17.16b,v17.16b,#8
eor v3.16b,v3.16b,v0.16b //I[i]^=Xi
pmull v4.1q,v20.1d,v7.1d //H·Ii+1
eor v17.16b,v17.16b,v7.16b //Karatsuba pre-processing
pmull2 v6.1q,v20.2d,v7.2d
b Loop_mod2x_v8
.align 4
Loop_mod2x_v8:
ext v18.16b,v3.16b,v3.16b,#8
subs x3,x3,#32 //is there more data?
pmull v0.1q,v22.1d,v3.1d //H^2.lo·Xi.lo
csel x12,xzr,x12,lo //is it time to zero x12?
pmull v5.1q,v21.1d,v17.1d
eor v18.16b,v18.16b,v3.16b //Karatsuba pre-processing
pmull2 v2.1q,v22.2d,v3.2d //H^2.hi·Xi.hi
eor v0.16b,v0.16b,v4.16b //accumulate
pmull2 v1.1q,v21.2d,v18.2d //(H^2.lo+H^2.hi)·(Xi.lo+Xi.hi)
ld1 {v16.2d},[x2],x12 //load [rotated] I[i+2]
eor v2.16b,v2.16b,v6.16b
csel x12,xzr,x12,eq //is it time to zero x12?
eor v1.16b,v1.16b,v5.16b
ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing
eor v18.16b,v0.16b,v2.16b
eor v1.16b,v1.16b,v17.16b
ld1 {v17.2d},[x2],x12 //load [rotated] I[i+3]
#ifndef __ARMEB__
rev64 v16.16b,v16.16b
#endif
eor v1.16b,v1.16b,v18.16b
pmull v18.1q,v0.1d,v19.1d //1st phase of reduction
#ifndef __ARMEB__
rev64 v17.16b,v17.16b
#endif
ins v2.d[0],v1.d[1]
ins v1.d[1],v0.d[0]
ext v7.16b,v17.16b,v17.16b,#8
ext v3.16b,v16.16b,v16.16b,#8
eor v0.16b,v1.16b,v18.16b
pmull v4.1q,v20.1d,v7.1d //H·Ii+1
eor v3.16b,v3.16b,v2.16b //accumulate v3.16b early
ext v18.16b,v0.16b,v0.16b,#8 //2nd phase of reduction
pmull v0.1q,v0.1d,v19.1d
eor v3.16b,v3.16b,v18.16b
eor v17.16b,v17.16b,v7.16b //Karatsuba pre-processing
eor v3.16b,v3.16b,v0.16b
pmull2 v6.1q,v20.2d,v7.2d
b.hs Loop_mod2x_v8 //there was at least 32 more bytes
eor v2.16b,v2.16b,v18.16b
ext v3.16b,v16.16b,v16.16b,#8 //re-construct v3.16b
adds x3,x3,#32 //re-construct x3
eor v0.16b,v0.16b,v2.16b //re-construct v0.16b
b.eq Ldone_v8 //is x3 zero?
Lodd_tail_v8:
ext v18.16b,v0.16b,v0.16b,#8
eor v3.16b,v3.16b,v0.16b //inp^=Xi
eor v17.16b,v16.16b,v18.16b //v17.16b is rotated inp^Xi
pmull v0.1q,v20.1d,v3.1d //H.lo·Xi.lo
eor v17.16b,v17.16b,v3.16b //Karatsuba pre-processing
pmull2 v2.1q,v20.2d,v3.2d //H.hi·Xi.hi
pmull v1.1q,v21.1d,v17.1d //(H.lo+H.hi)·(Xi.lo+Xi.hi)
ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing
eor v18.16b,v0.16b,v2.16b
eor v1.16b,v1.16b,v17.16b
eor v1.16b,v1.16b,v18.16b
pmull v18.1q,v0.1d,v19.1d //1st phase of reduction
ins v2.d[0],v1.d[1]
ins v1.d[1],v0.d[0]
eor v0.16b,v1.16b,v18.16b
ext v18.16b,v0.16b,v0.16b,#8 //2nd phase of reduction
pmull v0.1q,v0.1d,v19.1d
eor v18.16b,v18.16b,v2.16b
eor v0.16b,v0.16b,v18.16b
Ldone_v8:
#ifndef __ARMEB__
rev64 v0.16b,v0.16b
#endif
ext v0.16b,v0.16b,v0.16b,#8
st1 {v0.2d},[x0] //write out Xi
ret
.byte 71,72,65,83,72,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.align 2
.align 2
#endif // !OPENSSL_NO_ASM

View File

@ -1,376 +0,0 @@
// This file is generated from a similarly-named Perl script in the BoringSSL
// source tree. Do not edit by hand.
#if !defined(__has_feature)
#define __has_feature(x) 0
#endif
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
#define OPENSSL_NO_ASM
#endif
#if !defined(OPENSSL_NO_ASM)
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
.syntax unified
.text
@ abi_test_trampoline loads callee-saved registers from |state|, calls |func|
@ with |argv|, then saves the callee-saved registers into |state|. It returns
@ the result of |func|. The |unwind| argument is unused.
@ uint32_t abi_test_trampoline(void (*func)(...), CallerState *state,
@ const uint32_t *argv, size_t argc,
@ int unwind);
.globl _abi_test_trampoline
.private_extern _abi_test_trampoline
.align 4
_abi_test_trampoline:
@ Save parameters and all callee-saved registers. For convenience, we
@ save r9 on iOS even though it's volatile.
vstmdb sp!, {d8,d9,d10,d11,d12,d13,d14,d15}
stmdb sp!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11,lr}
@ Reserve stack space for six (10-4) stack parameters, plus an extra 4
@ bytes to keep it 8-byte-aligned (see AAPCS, section 5.3).
sub sp, sp, #28
@ Every register in AAPCS is either non-volatile or a parameter (except
@ r9 on iOS), so this code, by the actual call, loses all its scratch
@ registers. First fill in stack parameters while there are registers
@ to spare.
cmp r3, #4
bls Lstack_args_done
mov r4, sp @ r4 is the output pointer.
add r5, r2, r3, lsl #2 @ Set r5 to the end of argv.
add r2, r2, #16 @ Skip four arguments.
Lstack_args_loop:
ldr r6, [r2], #4
cmp r2, r5
str r6, [r4], #4
bne Lstack_args_loop
Lstack_args_done:
@ Load registers from |r1|.
vldmia r1!, {d8,d9,d10,d11,d12,d13,d14,d15}
#if defined(__APPLE__)
@ r9 is not volatile on iOS.
ldmia r1!, {r4,r5,r6,r7,r8,r10-r11}
#else
ldmia r1!, {r4,r5,r6,r7,r8,r9,r10,r11}
#endif
@ Load register parameters. This uses up our remaining registers, so we
@ repurpose lr as scratch space.
ldr r3, [sp, #40] @ Reload argc.
ldr lr, [sp, #36] @ Load argv into lr.
cmp r3, #3
bhi Larg_r3
beq Larg_r2
cmp r3, #1
bhi Larg_r1
beq Larg_r0
b Largs_done
Larg_r3:
ldr r3, [lr, #12] @ argv[3]
Larg_r2:
ldr r2, [lr, #8] @ argv[2]
Larg_r1:
ldr r1, [lr, #4] @ argv[1]
Larg_r0:
ldr r0, [lr] @ argv[0]
Largs_done:
@ With every other register in use, load the function pointer into lr
@ and call the function.
ldr lr, [sp, #28]
blx lr
@ r1-r3 are free for use again. The trampoline only supports
@ single-return functions. Pass r4-r11 to the caller.
ldr r1, [sp, #32]
vstmia r1!, {d8,d9,d10,d11,d12,d13,d14,d15}
#if defined(__APPLE__)
@ r9 is not volatile on iOS.
stmia r1!, {r4,r5,r6,r7,r8,r10-r11}
#else
stmia r1!, {r4,r5,r6,r7,r8,r9,r10,r11}
#endif
@ Unwind the stack and restore registers.
add sp, sp, #44 @ 44 = 28+16
ldmia sp!, {r4,r5,r6,r7,r8,r9,r10,r11,lr} @ Skip r0-r3 (see +16 above).
vldmia sp!, {d8,d9,d10,d11,d12,d13,d14,d15}
bx lr
.globl _abi_test_clobber_r0
.private_extern _abi_test_clobber_r0
.align 4
_abi_test_clobber_r0:
mov r0, #0
bx lr
.globl _abi_test_clobber_r1
.private_extern _abi_test_clobber_r1
.align 4
_abi_test_clobber_r1:
mov r1, #0
bx lr
.globl _abi_test_clobber_r2
.private_extern _abi_test_clobber_r2
.align 4
_abi_test_clobber_r2:
mov r2, #0
bx lr
.globl _abi_test_clobber_r3
.private_extern _abi_test_clobber_r3
.align 4
_abi_test_clobber_r3:
mov r3, #0
bx lr
.globl _abi_test_clobber_r4
.private_extern _abi_test_clobber_r4
.align 4
_abi_test_clobber_r4:
mov r4, #0
bx lr
.globl _abi_test_clobber_r5
.private_extern _abi_test_clobber_r5
.align 4
_abi_test_clobber_r5:
mov r5, #0
bx lr
.globl _abi_test_clobber_r6
.private_extern _abi_test_clobber_r6
.align 4
_abi_test_clobber_r6:
mov r6, #0
bx lr
.globl _abi_test_clobber_r7
.private_extern _abi_test_clobber_r7
.align 4
_abi_test_clobber_r7:
mov r7, #0
bx lr
.globl _abi_test_clobber_r8
.private_extern _abi_test_clobber_r8
.align 4
_abi_test_clobber_r8:
mov r8, #0
bx lr
.globl _abi_test_clobber_r9
.private_extern _abi_test_clobber_r9
.align 4
_abi_test_clobber_r9:
mov r9, #0
bx lr
.globl _abi_test_clobber_r10
.private_extern _abi_test_clobber_r10
.align 4
_abi_test_clobber_r10:
mov r10, #0
bx lr
.globl _abi_test_clobber_r11
.private_extern _abi_test_clobber_r11
.align 4
_abi_test_clobber_r11:
mov r11, #0
bx lr
.globl _abi_test_clobber_r12
.private_extern _abi_test_clobber_r12
.align 4
_abi_test_clobber_r12:
mov r12, #0
bx lr
.globl _abi_test_clobber_d0
.private_extern _abi_test_clobber_d0
.align 4
_abi_test_clobber_d0:
mov r0, #0
vmov s0, r0
vmov s1, r0
bx lr
.globl _abi_test_clobber_d1
.private_extern _abi_test_clobber_d1
.align 4
_abi_test_clobber_d1:
mov r0, #0
vmov s2, r0
vmov s3, r0
bx lr
.globl _abi_test_clobber_d2
.private_extern _abi_test_clobber_d2
.align 4
_abi_test_clobber_d2:
mov r0, #0
vmov s4, r0
vmov s5, r0
bx lr
.globl _abi_test_clobber_d3
.private_extern _abi_test_clobber_d3
.align 4
_abi_test_clobber_d3:
mov r0, #0
vmov s6, r0
vmov s7, r0
bx lr
.globl _abi_test_clobber_d4
.private_extern _abi_test_clobber_d4
.align 4
_abi_test_clobber_d4:
mov r0, #0
vmov s8, r0
vmov s9, r0
bx lr
.globl _abi_test_clobber_d5
.private_extern _abi_test_clobber_d5
.align 4
_abi_test_clobber_d5:
mov r0, #0
vmov s10, r0
vmov s11, r0
bx lr
.globl _abi_test_clobber_d6
.private_extern _abi_test_clobber_d6
.align 4
_abi_test_clobber_d6:
mov r0, #0
vmov s12, r0
vmov s13, r0
bx lr
.globl _abi_test_clobber_d7
.private_extern _abi_test_clobber_d7
.align 4
_abi_test_clobber_d7:
mov r0, #0
vmov s14, r0
vmov s15, r0
bx lr
.globl _abi_test_clobber_d8
.private_extern _abi_test_clobber_d8
.align 4
_abi_test_clobber_d8:
mov r0, #0
vmov s16, r0
vmov s17, r0
bx lr
.globl _abi_test_clobber_d9
.private_extern _abi_test_clobber_d9
.align 4
_abi_test_clobber_d9:
mov r0, #0
vmov s18, r0
vmov s19, r0
bx lr
.globl _abi_test_clobber_d10
.private_extern _abi_test_clobber_d10
.align 4
_abi_test_clobber_d10:
mov r0, #0
vmov s20, r0
vmov s21, r0
bx lr
.globl _abi_test_clobber_d11
.private_extern _abi_test_clobber_d11
.align 4
_abi_test_clobber_d11:
mov r0, #0
vmov s22, r0
vmov s23, r0
bx lr
.globl _abi_test_clobber_d12
.private_extern _abi_test_clobber_d12
.align 4
_abi_test_clobber_d12:
mov r0, #0
vmov s24, r0
vmov s25, r0
bx lr
.globl _abi_test_clobber_d13
.private_extern _abi_test_clobber_d13
.align 4
_abi_test_clobber_d13:
mov r0, #0
vmov s26, r0
vmov s27, r0
bx lr
.globl _abi_test_clobber_d14
.private_extern _abi_test_clobber_d14
.align 4
_abi_test_clobber_d14:
mov r0, #0
vmov s28, r0
vmov s29, r0
bx lr
.globl _abi_test_clobber_d15
.private_extern _abi_test_clobber_d15
.align 4
_abi_test_clobber_d15:
mov r0, #0
vmov s30, r0
vmov s31, r0
bx lr
#endif // !OPENSSL_NO_ASM

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,791 @@
// This file is generated from a similarly-named Perl script in the BoringSSL
// source tree. Do not edit by hand.
#include <openssl/asm_base.h>
#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_AARCH64) && defined(__ELF__)
#include <openssl/arm_arch.h>
#if __ARM_MAX_ARCH__>=7
.text
.arch armv8-a+crypto
.section .rodata
.align 5
.Lrcon:
.long 0x01,0x01,0x01,0x01
.long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d // rotate-n-splat
.long 0x1b,0x1b,0x1b,0x1b
.text
.globl aes_hw_set_encrypt_key
.hidden aes_hw_set_encrypt_key
.type aes_hw_set_encrypt_key,%function
.align 5
aes_hw_set_encrypt_key:
.Lenc_key:
// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
AARCH64_VALID_CALL_TARGET
stp x29,x30,[sp,#-16]!
add x29,sp,#0
mov x3,#-1
cmp x0,#0
b.eq .Lenc_key_abort
cmp x2,#0
b.eq .Lenc_key_abort
mov x3,#-2
cmp w1,#128
b.lt .Lenc_key_abort
cmp w1,#256
b.gt .Lenc_key_abort
tst w1,#0x3f
b.ne .Lenc_key_abort
adrp x3,.Lrcon
add x3,x3,:lo12:.Lrcon
cmp w1,#192
eor v0.16b,v0.16b,v0.16b
ld1 {v3.16b},[x0],#16
mov w1,#8 // reuse w1
ld1 {v1.4s,v2.4s},[x3],#32
b.lt .Loop128
b.eq .L192
b .L256
.align 4
.Loop128:
tbl v6.16b,{v3.16b},v2.16b
ext v5.16b,v0.16b,v3.16b,#12
st1 {v3.4s},[x2],#16
aese v6.16b,v0.16b
subs w1,w1,#1
eor v3.16b,v3.16b,v5.16b
ext v5.16b,v0.16b,v5.16b,#12
eor v3.16b,v3.16b,v5.16b
ext v5.16b,v0.16b,v5.16b,#12
eor v6.16b,v6.16b,v1.16b
eor v3.16b,v3.16b,v5.16b
shl v1.16b,v1.16b,#1
eor v3.16b,v3.16b,v6.16b
b.ne .Loop128
ld1 {v1.4s},[x3]
tbl v6.16b,{v3.16b},v2.16b
ext v5.16b,v0.16b,v3.16b,#12
st1 {v3.4s},[x2],#16
aese v6.16b,v0.16b
eor v3.16b,v3.16b,v5.16b
ext v5.16b,v0.16b,v5.16b,#12
eor v3.16b,v3.16b,v5.16b
ext v5.16b,v0.16b,v5.16b,#12
eor v6.16b,v6.16b,v1.16b
eor v3.16b,v3.16b,v5.16b
shl v1.16b,v1.16b,#1
eor v3.16b,v3.16b,v6.16b
tbl v6.16b,{v3.16b},v2.16b
ext v5.16b,v0.16b,v3.16b,#12
st1 {v3.4s},[x2],#16
aese v6.16b,v0.16b
eor v3.16b,v3.16b,v5.16b
ext v5.16b,v0.16b,v5.16b,#12
eor v3.16b,v3.16b,v5.16b
ext v5.16b,v0.16b,v5.16b,#12
eor v6.16b,v6.16b,v1.16b
eor v3.16b,v3.16b,v5.16b
eor v3.16b,v3.16b,v6.16b
st1 {v3.4s},[x2]
add x2,x2,#0x50
mov w12,#10
b .Ldone
.align 4
.L192:
ld1 {v4.8b},[x0],#8
movi v6.16b,#8 // borrow v6.16b
st1 {v3.4s},[x2],#16
sub v2.16b,v2.16b,v6.16b // adjust the mask
.Loop192:
tbl v6.16b,{v4.16b},v2.16b
ext v5.16b,v0.16b,v3.16b,#12
st1 {v4.8b},[x2],#8
aese v6.16b,v0.16b
subs w1,w1,#1
eor v3.16b,v3.16b,v5.16b
ext v5.16b,v0.16b,v5.16b,#12
eor v3.16b,v3.16b,v5.16b
ext v5.16b,v0.16b,v5.16b,#12
eor v3.16b,v3.16b,v5.16b
dup v5.4s,v3.s[3]
eor v5.16b,v5.16b,v4.16b
eor v6.16b,v6.16b,v1.16b
ext v4.16b,v0.16b,v4.16b,#12
shl v1.16b,v1.16b,#1
eor v4.16b,v4.16b,v5.16b
eor v3.16b,v3.16b,v6.16b
eor v4.16b,v4.16b,v6.16b
st1 {v3.4s},[x2],#16
b.ne .Loop192
mov w12,#12
add x2,x2,#0x20
b .Ldone
.align 4
.L256:
ld1 {v4.16b},[x0]
mov w1,#7
mov w12,#14
st1 {v3.4s},[x2],#16
.Loop256:
tbl v6.16b,{v4.16b},v2.16b
ext v5.16b,v0.16b,v3.16b,#12
st1 {v4.4s},[x2],#16
aese v6.16b,v0.16b
subs w1,w1,#1
eor v3.16b,v3.16b,v5.16b
ext v5.16b,v0.16b,v5.16b,#12
eor v3.16b,v3.16b,v5.16b
ext v5.16b,v0.16b,v5.16b,#12
eor v6.16b,v6.16b,v1.16b
eor v3.16b,v3.16b,v5.16b
shl v1.16b,v1.16b,#1
eor v3.16b,v3.16b,v6.16b
st1 {v3.4s},[x2],#16
b.eq .Ldone
dup v6.4s,v3.s[3] // just splat
ext v5.16b,v0.16b,v4.16b,#12
aese v6.16b,v0.16b
eor v4.16b,v4.16b,v5.16b
ext v5.16b,v0.16b,v5.16b,#12
eor v4.16b,v4.16b,v5.16b
ext v5.16b,v0.16b,v5.16b,#12
eor v4.16b,v4.16b,v5.16b
eor v4.16b,v4.16b,v6.16b
b .Loop256
.Ldone:
str w12,[x2]
mov x3,#0
.Lenc_key_abort:
mov x0,x3 // return value
ldr x29,[sp],#16
ret
.size aes_hw_set_encrypt_key,.-aes_hw_set_encrypt_key
.globl aes_hw_set_decrypt_key
.hidden aes_hw_set_decrypt_key
.type aes_hw_set_decrypt_key,%function
.align 5
aes_hw_set_decrypt_key:
AARCH64_SIGN_LINK_REGISTER
stp x29,x30,[sp,#-16]!
add x29,sp,#0
bl .Lenc_key
cmp x0,#0
b.ne .Ldec_key_abort
sub x2,x2,#240 // restore original x2
mov x4,#-16
add x0,x2,x12,lsl#4 // end of key schedule
ld1 {v0.4s},[x2]
ld1 {v1.4s},[x0]
st1 {v0.4s},[x0],x4
st1 {v1.4s},[x2],#16
.Loop_imc:
ld1 {v0.4s},[x2]
ld1 {v1.4s},[x0]
aesimc v0.16b,v0.16b
aesimc v1.16b,v1.16b
st1 {v0.4s},[x0],x4
st1 {v1.4s},[x2],#16
cmp x0,x2
b.hi .Loop_imc
ld1 {v0.4s},[x2]
aesimc v0.16b,v0.16b
st1 {v0.4s},[x0]
eor x0,x0,x0 // return value
.Ldec_key_abort:
ldp x29,x30,[sp],#16
AARCH64_VALIDATE_LINK_REGISTER
ret
.size aes_hw_set_decrypt_key,.-aes_hw_set_decrypt_key
.globl aes_hw_encrypt
.hidden aes_hw_encrypt
.type aes_hw_encrypt,%function
.align 5
aes_hw_encrypt:
AARCH64_VALID_CALL_TARGET
ldr w3,[x2,#240]
ld1 {v0.4s},[x2],#16
ld1 {v2.16b},[x0]
sub w3,w3,#2
ld1 {v1.4s},[x2],#16
.Loop_enc:
aese v2.16b,v0.16b
aesmc v2.16b,v2.16b
ld1 {v0.4s},[x2],#16
subs w3,w3,#2
aese v2.16b,v1.16b
aesmc v2.16b,v2.16b
ld1 {v1.4s},[x2],#16
b.gt .Loop_enc
aese v2.16b,v0.16b
aesmc v2.16b,v2.16b
ld1 {v0.4s},[x2]
aese v2.16b,v1.16b
eor v2.16b,v2.16b,v0.16b
st1 {v2.16b},[x1]
ret
.size aes_hw_encrypt,.-aes_hw_encrypt
.globl aes_hw_decrypt
.hidden aes_hw_decrypt
.type aes_hw_decrypt,%function
.align 5
aes_hw_decrypt:
AARCH64_VALID_CALL_TARGET
ldr w3,[x2,#240]
ld1 {v0.4s},[x2],#16
ld1 {v2.16b},[x0]
sub w3,w3,#2
ld1 {v1.4s},[x2],#16
.Loop_dec:
aesd v2.16b,v0.16b
aesimc v2.16b,v2.16b
ld1 {v0.4s},[x2],#16
subs w3,w3,#2
aesd v2.16b,v1.16b
aesimc v2.16b,v2.16b
ld1 {v1.4s},[x2],#16
b.gt .Loop_dec
aesd v2.16b,v0.16b
aesimc v2.16b,v2.16b
ld1 {v0.4s},[x2]
aesd v2.16b,v1.16b
eor v2.16b,v2.16b,v0.16b
st1 {v2.16b},[x1]
ret
.size aes_hw_decrypt,.-aes_hw_decrypt
.globl aes_hw_cbc_encrypt
.hidden aes_hw_cbc_encrypt
.type aes_hw_cbc_encrypt,%function
.align 5
aes_hw_cbc_encrypt:
// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
AARCH64_VALID_CALL_TARGET
stp x29,x30,[sp,#-16]!
add x29,sp,#0
subs x2,x2,#16
mov x8,#16
b.lo .Lcbc_abort
csel x8,xzr,x8,eq
cmp w5,#0 // en- or decrypting?
ldr w5,[x3,#240]
and x2,x2,#-16
ld1 {v6.16b},[x4]
ld1 {v0.16b},[x0],x8
ld1 {v16.4s,v17.4s},[x3] // load key schedule...
sub w5,w5,#6
add x7,x3,x5,lsl#4 // pointer to last 7 round keys
sub w5,w5,#2
ld1 {v18.4s,v19.4s},[x7],#32
ld1 {v20.4s,v21.4s},[x7],#32
ld1 {v22.4s,v23.4s},[x7],#32
ld1 {v7.4s},[x7]
add x7,x3,#32
mov w6,w5
b.eq .Lcbc_dec
cmp w5,#2
eor v0.16b,v0.16b,v6.16b
eor v5.16b,v16.16b,v7.16b
b.eq .Lcbc_enc128
ld1 {v2.4s,v3.4s},[x7]
add x7,x3,#16
add x6,x3,#16*4
add x12,x3,#16*5
aese v0.16b,v16.16b
aesmc v0.16b,v0.16b
add x14,x3,#16*6
add x3,x3,#16*7
b .Lenter_cbc_enc
.align 4
.Loop_cbc_enc:
aese v0.16b,v16.16b
aesmc v0.16b,v0.16b
st1 {v6.16b},[x1],#16
.Lenter_cbc_enc:
aese v0.16b,v17.16b
aesmc v0.16b,v0.16b
aese v0.16b,v2.16b
aesmc v0.16b,v0.16b
ld1 {v16.4s},[x6]
cmp w5,#4
aese v0.16b,v3.16b
aesmc v0.16b,v0.16b
ld1 {v17.4s},[x12]
b.eq .Lcbc_enc192
aese v0.16b,v16.16b
aesmc v0.16b,v0.16b
ld1 {v16.4s},[x14]
aese v0.16b,v17.16b
aesmc v0.16b,v0.16b
ld1 {v17.4s},[x3]
nop
.Lcbc_enc192:
aese v0.16b,v16.16b
aesmc v0.16b,v0.16b
subs x2,x2,#16
aese v0.16b,v17.16b
aesmc v0.16b,v0.16b
csel x8,xzr,x8,eq
aese v0.16b,v18.16b
aesmc v0.16b,v0.16b
aese v0.16b,v19.16b
aesmc v0.16b,v0.16b
ld1 {v16.16b},[x0],x8
aese v0.16b,v20.16b
aesmc v0.16b,v0.16b
eor v16.16b,v16.16b,v5.16b
aese v0.16b,v21.16b
aesmc v0.16b,v0.16b
ld1 {v17.4s},[x7] // re-pre-load rndkey[1]
aese v0.16b,v22.16b
aesmc v0.16b,v0.16b
aese v0.16b,v23.16b
eor v6.16b,v0.16b,v7.16b
b.hs .Loop_cbc_enc
st1 {v6.16b},[x1],#16
b .Lcbc_done
.align 5
.Lcbc_enc128:
ld1 {v2.4s,v3.4s},[x7]
aese v0.16b,v16.16b
aesmc v0.16b,v0.16b
b .Lenter_cbc_enc128
.Loop_cbc_enc128:
aese v0.16b,v16.16b
aesmc v0.16b,v0.16b
st1 {v6.16b},[x1],#16
.Lenter_cbc_enc128:
aese v0.16b,v17.16b
aesmc v0.16b,v0.16b
subs x2,x2,#16
aese v0.16b,v2.16b
aesmc v0.16b,v0.16b
csel x8,xzr,x8,eq
aese v0.16b,v3.16b
aesmc v0.16b,v0.16b
aese v0.16b,v18.16b
aesmc v0.16b,v0.16b
aese v0.16b,v19.16b
aesmc v0.16b,v0.16b
ld1 {v16.16b},[x0],x8
aese v0.16b,v20.16b
aesmc v0.16b,v0.16b
aese v0.16b,v21.16b
aesmc v0.16b,v0.16b
aese v0.16b,v22.16b
aesmc v0.16b,v0.16b
eor v16.16b,v16.16b,v5.16b
aese v0.16b,v23.16b
eor v6.16b,v0.16b,v7.16b
b.hs .Loop_cbc_enc128
st1 {v6.16b},[x1],#16
b .Lcbc_done
.align 5
.Lcbc_dec:
ld1 {v18.16b},[x0],#16
subs x2,x2,#32 // bias
add w6,w5,#2
orr v3.16b,v0.16b,v0.16b
orr v1.16b,v0.16b,v0.16b
orr v19.16b,v18.16b,v18.16b
b.lo .Lcbc_dec_tail
orr v1.16b,v18.16b,v18.16b
ld1 {v18.16b},[x0],#16
orr v2.16b,v0.16b,v0.16b
orr v3.16b,v1.16b,v1.16b
orr v19.16b,v18.16b,v18.16b
.Loop3x_cbc_dec:
aesd v0.16b,v16.16b
aesimc v0.16b,v0.16b
aesd v1.16b,v16.16b
aesimc v1.16b,v1.16b
aesd v18.16b,v16.16b
aesimc v18.16b,v18.16b
ld1 {v16.4s},[x7],#16
subs w6,w6,#2
aesd v0.16b,v17.16b
aesimc v0.16b,v0.16b
aesd v1.16b,v17.16b
aesimc v1.16b,v1.16b
aesd v18.16b,v17.16b
aesimc v18.16b,v18.16b
ld1 {v17.4s},[x7],#16
b.gt .Loop3x_cbc_dec
aesd v0.16b,v16.16b
aesimc v0.16b,v0.16b
aesd v1.16b,v16.16b
aesimc v1.16b,v1.16b
aesd v18.16b,v16.16b
aesimc v18.16b,v18.16b
eor v4.16b,v6.16b,v7.16b
subs x2,x2,#0x30
eor v5.16b,v2.16b,v7.16b
csel x6,x2,x6,lo // x6, w6, is zero at this point
aesd v0.16b,v17.16b
aesimc v0.16b,v0.16b
aesd v1.16b,v17.16b
aesimc v1.16b,v1.16b
aesd v18.16b,v17.16b
aesimc v18.16b,v18.16b
eor v17.16b,v3.16b,v7.16b
add x0,x0,x6 // x0 is adjusted in such way that
// at exit from the loop v1.16b-v18.16b
// are loaded with last "words"
orr v6.16b,v19.16b,v19.16b
mov x7,x3
aesd v0.16b,v20.16b
aesimc v0.16b,v0.16b
aesd v1.16b,v20.16b
aesimc v1.16b,v1.16b
aesd v18.16b,v20.16b
aesimc v18.16b,v18.16b
ld1 {v2.16b},[x0],#16
aesd v0.16b,v21.16b
aesimc v0.16b,v0.16b
aesd v1.16b,v21.16b
aesimc v1.16b,v1.16b
aesd v18.16b,v21.16b
aesimc v18.16b,v18.16b
ld1 {v3.16b},[x0],#16
aesd v0.16b,v22.16b
aesimc v0.16b,v0.16b
aesd v1.16b,v22.16b
aesimc v1.16b,v1.16b
aesd v18.16b,v22.16b
aesimc v18.16b,v18.16b
ld1 {v19.16b},[x0],#16
aesd v0.16b,v23.16b
aesd v1.16b,v23.16b
aesd v18.16b,v23.16b
ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0]
add w6,w5,#2
eor v4.16b,v4.16b,v0.16b
eor v5.16b,v5.16b,v1.16b
eor v18.16b,v18.16b,v17.16b
ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1]
st1 {v4.16b},[x1],#16
orr v0.16b,v2.16b,v2.16b
st1 {v5.16b},[x1],#16
orr v1.16b,v3.16b,v3.16b
st1 {v18.16b},[x1],#16
orr v18.16b,v19.16b,v19.16b
b.hs .Loop3x_cbc_dec
cmn x2,#0x30
b.eq .Lcbc_done
nop
.Lcbc_dec_tail:
aesd v1.16b,v16.16b
aesimc v1.16b,v1.16b
aesd v18.16b,v16.16b
aesimc v18.16b,v18.16b
ld1 {v16.4s},[x7],#16
subs w6,w6,#2
aesd v1.16b,v17.16b
aesimc v1.16b,v1.16b
aesd v18.16b,v17.16b
aesimc v18.16b,v18.16b
ld1 {v17.4s},[x7],#16
b.gt .Lcbc_dec_tail
aesd v1.16b,v16.16b
aesimc v1.16b,v1.16b
aesd v18.16b,v16.16b
aesimc v18.16b,v18.16b
aesd v1.16b,v17.16b
aesimc v1.16b,v1.16b
aesd v18.16b,v17.16b
aesimc v18.16b,v18.16b
aesd v1.16b,v20.16b
aesimc v1.16b,v1.16b
aesd v18.16b,v20.16b
aesimc v18.16b,v18.16b
cmn x2,#0x20
aesd v1.16b,v21.16b
aesimc v1.16b,v1.16b
aesd v18.16b,v21.16b
aesimc v18.16b,v18.16b
eor v5.16b,v6.16b,v7.16b
aesd v1.16b,v22.16b
aesimc v1.16b,v1.16b
aesd v18.16b,v22.16b
aesimc v18.16b,v18.16b
eor v17.16b,v3.16b,v7.16b
aesd v1.16b,v23.16b
aesd v18.16b,v23.16b
b.eq .Lcbc_dec_one
eor v5.16b,v5.16b,v1.16b
eor v17.16b,v17.16b,v18.16b
orr v6.16b,v19.16b,v19.16b
st1 {v5.16b},[x1],#16
st1 {v17.16b},[x1],#16
b .Lcbc_done
.Lcbc_dec_one:
eor v5.16b,v5.16b,v18.16b
orr v6.16b,v19.16b,v19.16b
st1 {v5.16b},[x1],#16
.Lcbc_done:
st1 {v6.16b},[x4]
.Lcbc_abort:
ldr x29,[sp],#16
ret
.size aes_hw_cbc_encrypt,.-aes_hw_cbc_encrypt
.globl aes_hw_ctr32_encrypt_blocks
.hidden aes_hw_ctr32_encrypt_blocks
.type aes_hw_ctr32_encrypt_blocks,%function
.align 5
aes_hw_ctr32_encrypt_blocks:
// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
AARCH64_VALID_CALL_TARGET
stp x29,x30,[sp,#-16]!
add x29,sp,#0
ldr w5,[x3,#240]
ldr w8, [x4, #12]
ld1 {v0.4s},[x4]
ld1 {v16.4s,v17.4s},[x3] // load key schedule...
sub w5,w5,#4
mov x12,#16
cmp x2,#2
add x7,x3,x5,lsl#4 // pointer to last 5 round keys
sub w5,w5,#2
ld1 {v20.4s,v21.4s},[x7],#32
ld1 {v22.4s,v23.4s},[x7],#32
ld1 {v7.4s},[x7]
add x7,x3,#32
mov w6,w5
csel x12,xzr,x12,lo
// ARM Cortex-A57 and Cortex-A72 cores running in 32-bit mode are
// affected by silicon errata #1742098 [0] and #1655431 [1],
// respectively, where the second instruction of an aese/aesmc
// instruction pair may execute twice if an interrupt is taken right
// after the first instruction consumes an input register of which a
// single 32-bit lane has been updated the last time it was modified.
//
// This function uses a counter in one 32-bit lane. The vmov lines
// could write to v1.16b and v18.16b directly, but that trips this bugs.
// We write to v6.16b and copy to the final register as a workaround.
//
// [0] ARM-EPM-049219 v23 Cortex-A57 MPCore Software Developers Errata Notice
// [1] ARM-EPM-012079 v11.0 Cortex-A72 MPCore Software Developers Errata Notice
#ifndef __AARCH64EB__
rev w8, w8
#endif
add w10, w8, #1
orr v6.16b,v0.16b,v0.16b
rev w10, w10
mov v6.s[3],w10
add w8, w8, #2
orr v1.16b,v6.16b,v6.16b
b.ls .Lctr32_tail
rev w12, w8
mov v6.s[3],w12
sub x2,x2,#3 // bias
orr v18.16b,v6.16b,v6.16b
b .Loop3x_ctr32
.align 4
.Loop3x_ctr32:
aese v0.16b,v16.16b
aesmc v0.16b,v0.16b
aese v1.16b,v16.16b
aesmc v1.16b,v1.16b
aese v18.16b,v16.16b
aesmc v18.16b,v18.16b
ld1 {v16.4s},[x7],#16
subs w6,w6,#2
aese v0.16b,v17.16b
aesmc v0.16b,v0.16b
aese v1.16b,v17.16b
aesmc v1.16b,v1.16b
aese v18.16b,v17.16b
aesmc v18.16b,v18.16b
ld1 {v17.4s},[x7],#16
b.gt .Loop3x_ctr32
aese v0.16b,v16.16b
aesmc v4.16b,v0.16b
aese v1.16b,v16.16b
aesmc v5.16b,v1.16b
ld1 {v2.16b},[x0],#16
add w9,w8,#1
aese v18.16b,v16.16b
aesmc v18.16b,v18.16b
ld1 {v3.16b},[x0],#16
rev w9,w9
aese v4.16b,v17.16b
aesmc v4.16b,v4.16b
aese v5.16b,v17.16b
aesmc v5.16b,v5.16b
ld1 {v19.16b},[x0],#16
mov x7,x3
aese v18.16b,v17.16b
aesmc v17.16b,v18.16b
aese v4.16b,v20.16b
aesmc v4.16b,v4.16b
aese v5.16b,v20.16b
aesmc v5.16b,v5.16b
eor v2.16b,v2.16b,v7.16b
add w10,w8,#2
aese v17.16b,v20.16b
aesmc v17.16b,v17.16b
eor v3.16b,v3.16b,v7.16b
add w8,w8,#3
aese v4.16b,v21.16b
aesmc v4.16b,v4.16b
aese v5.16b,v21.16b
aesmc v5.16b,v5.16b
// Note the logic to update v0.16b, v1.16b, and v1.16b is written to work
// around a bug in ARM Cortex-A57 and Cortex-A72 cores running in
// 32-bit mode. See the comment above.
eor v19.16b,v19.16b,v7.16b
mov v6.s[3], w9
aese v17.16b,v21.16b
aesmc v17.16b,v17.16b
orr v0.16b,v6.16b,v6.16b
rev w10,w10
aese v4.16b,v22.16b
aesmc v4.16b,v4.16b
mov v6.s[3], w10
rev w12,w8
aese v5.16b,v22.16b
aesmc v5.16b,v5.16b
orr v1.16b,v6.16b,v6.16b
mov v6.s[3], w12
aese v17.16b,v22.16b
aesmc v17.16b,v17.16b
orr v18.16b,v6.16b,v6.16b
subs x2,x2,#3
aese v4.16b,v23.16b
aese v5.16b,v23.16b
aese v17.16b,v23.16b
eor v2.16b,v2.16b,v4.16b
ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0]
st1 {v2.16b},[x1],#16
eor v3.16b,v3.16b,v5.16b
mov w6,w5
st1 {v3.16b},[x1],#16
eor v19.16b,v19.16b,v17.16b
ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1]
st1 {v19.16b},[x1],#16
b.hs .Loop3x_ctr32
adds x2,x2,#3
b.eq .Lctr32_done
cmp x2,#1
mov x12,#16
csel x12,xzr,x12,eq
.Lctr32_tail:
aese v0.16b,v16.16b
aesmc v0.16b,v0.16b
aese v1.16b,v16.16b
aesmc v1.16b,v1.16b
ld1 {v16.4s},[x7],#16
subs w6,w6,#2
aese v0.16b,v17.16b
aesmc v0.16b,v0.16b
aese v1.16b,v17.16b
aesmc v1.16b,v1.16b
ld1 {v17.4s},[x7],#16
b.gt .Lctr32_tail
aese v0.16b,v16.16b
aesmc v0.16b,v0.16b
aese v1.16b,v16.16b
aesmc v1.16b,v1.16b
aese v0.16b,v17.16b
aesmc v0.16b,v0.16b
aese v1.16b,v17.16b
aesmc v1.16b,v1.16b
ld1 {v2.16b},[x0],x12
aese v0.16b,v20.16b
aesmc v0.16b,v0.16b
aese v1.16b,v20.16b
aesmc v1.16b,v1.16b
ld1 {v3.16b},[x0]
aese v0.16b,v21.16b
aesmc v0.16b,v0.16b
aese v1.16b,v21.16b
aesmc v1.16b,v1.16b
eor v2.16b,v2.16b,v7.16b
aese v0.16b,v22.16b
aesmc v0.16b,v0.16b
aese v1.16b,v22.16b
aesmc v1.16b,v1.16b
eor v3.16b,v3.16b,v7.16b
aese v0.16b,v23.16b
aese v1.16b,v23.16b
cmp x2,#1
eor v2.16b,v2.16b,v0.16b
eor v3.16b,v3.16b,v1.16b
st1 {v2.16b},[x1],#16
b.eq .Lctr32_done
st1 {v3.16b},[x1]
.Lctr32_done:
ldr x29,[sp],#16
ret
.size aes_hw_ctr32_encrypt_blocks,.-aes_hw_ctr32_encrypt_blocks
#endif
#endif // !OPENSSL_NO_ASM && defined(OPENSSL_AARCH64) && defined(__ELF__)

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,89 @@
// This file is generated from a similarly-named Perl script in the BoringSSL
// source tree. Do not edit by hand.
#include <openssl/asm_base.h>
#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_AARCH64) && defined(__ELF__)
#include <openssl/arm_arch.h>
.text
// BN_ULONG bn_add_words(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
// size_t num);
.type bn_add_words, %function
.globl bn_add_words
.hidden bn_add_words
.align 4
bn_add_words:
AARCH64_VALID_CALL_TARGET
# Clear the carry flag.
cmn xzr, xzr
# aarch64 can load two registers at a time, so we do two loop iterations at
# at a time. Split x3 = 2 * x8 + x3. This allows loop
# operations to use CBNZ without clobbering the carry flag.
lsr x8, x3, #1
and x3, x3, #1
cbz x8, .Ladd_tail
.Ladd_loop:
ldp x4, x5, [x1], #16
ldp x6, x7, [x2], #16
sub x8, x8, #1
adcs x4, x4, x6
adcs x5, x5, x7
stp x4, x5, [x0], #16
cbnz x8, .Ladd_loop
.Ladd_tail:
cbz x3, .Ladd_exit
ldr x4, [x1], #8
ldr x6, [x2], #8
adcs x4, x4, x6
str x4, [x0], #8
.Ladd_exit:
cset x0, cs
ret
.size bn_add_words,.-bn_add_words
// BN_ULONG bn_sub_words(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
// size_t num);
.type bn_sub_words, %function
.globl bn_sub_words
.hidden bn_sub_words
.align 4
bn_sub_words:
AARCH64_VALID_CALL_TARGET
# Set the carry flag. Arm's borrow bit is flipped from the carry flag,
# so we want C = 1 here.
cmp xzr, xzr
# aarch64 can load two registers at a time, so we do two loop iterations at
# at a time. Split x3 = 2 * x8 + x3. This allows loop
# operations to use CBNZ without clobbering the carry flag.
lsr x8, x3, #1
and x3, x3, #1
cbz x8, .Lsub_tail
.Lsub_loop:
ldp x4, x5, [x1], #16
ldp x6, x7, [x2], #16
sub x8, x8, #1
sbcs x4, x4, x6
sbcs x5, x5, x7
stp x4, x5, [x0], #16
cbnz x8, .Lsub_loop
.Lsub_tail:
cbz x3, .Lsub_exit
ldr x4, [x1], #8
ldr x6, [x2], #8
sbcs x4, x4, x6
str x4, [x0], #8
.Lsub_exit:
cset x0, cc
ret
.size bn_sub_words,.-bn_sub_words
#endif // !OPENSSL_NO_ASM && defined(OPENSSL_AARCH64) && defined(__ELF__)

View File

@ -0,0 +1,335 @@
// This file is generated from a similarly-named Perl script in the BoringSSL
// source tree. Do not edit by hand.
#include <openssl/asm_base.h>
#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_AARCH64) && defined(__ELF__)
#include <openssl/arm_arch.h>
.text
.globl gcm_init_neon
.hidden gcm_init_neon
.type gcm_init_neon,%function
.align 4
gcm_init_neon:
AARCH64_VALID_CALL_TARGET
// This function is adapted from gcm_init_v8. xC2 is t3.
ld1 {v17.2d}, [x1] // load H
movi v19.16b, #0xe1
shl v19.2d, v19.2d, #57 // 0xc2.0
ext v3.16b, v17.16b, v17.16b, #8
ushr v18.2d, v19.2d, #63
dup v17.4s, v17.s[1]
ext v16.16b, v18.16b, v19.16b, #8 // t0=0xc2....01
ushr v18.2d, v3.2d, #63
sshr v17.4s, v17.4s, #31 // broadcast carry bit
and v18.16b, v18.16b, v16.16b
shl v3.2d, v3.2d, #1
ext v18.16b, v18.16b, v18.16b, #8
and v16.16b, v16.16b, v17.16b
orr v3.16b, v3.16b, v18.16b // H<<<=1
eor v5.16b, v3.16b, v16.16b // twisted H
st1 {v5.2d}, [x0] // store Htable[0]
ret
.size gcm_init_neon,.-gcm_init_neon
.globl gcm_gmult_neon
.hidden gcm_gmult_neon
.type gcm_gmult_neon,%function
.align 4
gcm_gmult_neon:
AARCH64_VALID_CALL_TARGET
ld1 {v3.16b}, [x0] // load Xi
ld1 {v5.1d}, [x1], #8 // load twisted H
ld1 {v6.1d}, [x1]
adrp x9, .Lmasks // load constants
add x9, x9, :lo12:.Lmasks
ld1 {v24.2d, v25.2d}, [x9]
rev64 v3.16b, v3.16b // byteswap Xi
ext v3.16b, v3.16b, v3.16b, #8
eor v7.8b, v5.8b, v6.8b // Karatsuba pre-processing
mov x3, #16
b .Lgmult_neon
.size gcm_gmult_neon,.-gcm_gmult_neon
.globl gcm_ghash_neon
.hidden gcm_ghash_neon
.type gcm_ghash_neon,%function
.align 4
gcm_ghash_neon:
AARCH64_VALID_CALL_TARGET
ld1 {v0.16b}, [x0] // load Xi
ld1 {v5.1d}, [x1], #8 // load twisted H
ld1 {v6.1d}, [x1]
adrp x9, .Lmasks // load constants
add x9, x9, :lo12:.Lmasks
ld1 {v24.2d, v25.2d}, [x9]
rev64 v0.16b, v0.16b // byteswap Xi
ext v0.16b, v0.16b, v0.16b, #8
eor v7.8b, v5.8b, v6.8b // Karatsuba pre-processing
.Loop_neon:
ld1 {v3.16b}, [x2], #16 // load inp
rev64 v3.16b, v3.16b // byteswap inp
ext v3.16b, v3.16b, v3.16b, #8
eor v3.16b, v3.16b, v0.16b // inp ^= Xi
.Lgmult_neon:
// Split the input into v3 and v4. (The upper halves are unused,
// so it is okay to leave them alone.)
ins v4.d[0], v3.d[1]
ext v16.8b, v5.8b, v5.8b, #1 // A1
pmull v16.8h, v16.8b, v3.8b // F = A1*B
ext v0.8b, v3.8b, v3.8b, #1 // B1
pmull v0.8h, v5.8b, v0.8b // E = A*B1
ext v17.8b, v5.8b, v5.8b, #2 // A2
pmull v17.8h, v17.8b, v3.8b // H = A2*B
ext v19.8b, v3.8b, v3.8b, #2 // B2
pmull v19.8h, v5.8b, v19.8b // G = A*B2
ext v18.8b, v5.8b, v5.8b, #3 // A3
eor v16.16b, v16.16b, v0.16b // L = E + F
pmull v18.8h, v18.8b, v3.8b // J = A3*B
ext v0.8b, v3.8b, v3.8b, #3 // B3
eor v17.16b, v17.16b, v19.16b // M = G + H
pmull v0.8h, v5.8b, v0.8b // I = A*B3
// Here we diverge from the 32-bit version. It computes the following
// (instructions reordered for clarity):
//
// veor $t0#lo, $t0#lo, $t0#hi @ t0 = P0 + P1 (L)
// vand $t0#hi, $t0#hi, $k48
// veor $t0#lo, $t0#lo, $t0#hi
//
// veor $t1#lo, $t1#lo, $t1#hi @ t1 = P2 + P3 (M)
// vand $t1#hi, $t1#hi, $k32
// veor $t1#lo, $t1#lo, $t1#hi
//
// veor $t2#lo, $t2#lo, $t2#hi @ t2 = P4 + P5 (N)
// vand $t2#hi, $t2#hi, $k16
// veor $t2#lo, $t2#lo, $t2#hi
//
// veor $t3#lo, $t3#lo, $t3#hi @ t3 = P6 + P7 (K)
// vmov.i64 $t3#hi, #0
//
// $kN is a mask with the bottom N bits set. AArch64 cannot compute on
// upper halves of SIMD registers, so we must split each half into
// separate registers. To compensate, we pair computations up and
// parallelize.
ext v19.8b, v3.8b, v3.8b, #4 // B4
eor v18.16b, v18.16b, v0.16b // N = I + J
pmull v19.8h, v5.8b, v19.8b // K = A*B4
// This can probably be scheduled more efficiently. For now, we just
// pair up independent instructions.
zip1 v20.2d, v16.2d, v17.2d
zip1 v22.2d, v18.2d, v19.2d
zip2 v21.2d, v16.2d, v17.2d
zip2 v23.2d, v18.2d, v19.2d
eor v20.16b, v20.16b, v21.16b
eor v22.16b, v22.16b, v23.16b
and v21.16b, v21.16b, v24.16b
and v23.16b, v23.16b, v25.16b
eor v20.16b, v20.16b, v21.16b
eor v22.16b, v22.16b, v23.16b
zip1 v16.2d, v20.2d, v21.2d
zip1 v18.2d, v22.2d, v23.2d
zip2 v17.2d, v20.2d, v21.2d
zip2 v19.2d, v22.2d, v23.2d
ext v16.16b, v16.16b, v16.16b, #15 // t0 = t0 << 8
ext v17.16b, v17.16b, v17.16b, #14 // t1 = t1 << 16
pmull v0.8h, v5.8b, v3.8b // D = A*B
ext v19.16b, v19.16b, v19.16b, #12 // t3 = t3 << 32
ext v18.16b, v18.16b, v18.16b, #13 // t2 = t2 << 24
eor v16.16b, v16.16b, v17.16b
eor v18.16b, v18.16b, v19.16b
eor v0.16b, v0.16b, v16.16b
eor v0.16b, v0.16b, v18.16b
eor v3.8b, v3.8b, v4.8b // Karatsuba pre-processing
ext v16.8b, v7.8b, v7.8b, #1 // A1
pmull v16.8h, v16.8b, v3.8b // F = A1*B
ext v1.8b, v3.8b, v3.8b, #1 // B1
pmull v1.8h, v7.8b, v1.8b // E = A*B1
ext v17.8b, v7.8b, v7.8b, #2 // A2
pmull v17.8h, v17.8b, v3.8b // H = A2*B
ext v19.8b, v3.8b, v3.8b, #2 // B2
pmull v19.8h, v7.8b, v19.8b // G = A*B2
ext v18.8b, v7.8b, v7.8b, #3 // A3
eor v16.16b, v16.16b, v1.16b // L = E + F
pmull v18.8h, v18.8b, v3.8b // J = A3*B
ext v1.8b, v3.8b, v3.8b, #3 // B3
eor v17.16b, v17.16b, v19.16b // M = G + H
pmull v1.8h, v7.8b, v1.8b // I = A*B3
// Here we diverge from the 32-bit version. It computes the following
// (instructions reordered for clarity):
//
// veor $t0#lo, $t0#lo, $t0#hi @ t0 = P0 + P1 (L)
// vand $t0#hi, $t0#hi, $k48
// veor $t0#lo, $t0#lo, $t0#hi
//
// veor $t1#lo, $t1#lo, $t1#hi @ t1 = P2 + P3 (M)
// vand $t1#hi, $t1#hi, $k32
// veor $t1#lo, $t1#lo, $t1#hi
//
// veor $t2#lo, $t2#lo, $t2#hi @ t2 = P4 + P5 (N)
// vand $t2#hi, $t2#hi, $k16
// veor $t2#lo, $t2#lo, $t2#hi
//
// veor $t3#lo, $t3#lo, $t3#hi @ t3 = P6 + P7 (K)
// vmov.i64 $t3#hi, #0
//
// $kN is a mask with the bottom N bits set. AArch64 cannot compute on
// upper halves of SIMD registers, so we must split each half into
// separate registers. To compensate, we pair computations up and
// parallelize.
ext v19.8b, v3.8b, v3.8b, #4 // B4
eor v18.16b, v18.16b, v1.16b // N = I + J
pmull v19.8h, v7.8b, v19.8b // K = A*B4
// This can probably be scheduled more efficiently. For now, we just
// pair up independent instructions.
zip1 v20.2d, v16.2d, v17.2d
zip1 v22.2d, v18.2d, v19.2d
zip2 v21.2d, v16.2d, v17.2d
zip2 v23.2d, v18.2d, v19.2d
eor v20.16b, v20.16b, v21.16b
eor v22.16b, v22.16b, v23.16b
and v21.16b, v21.16b, v24.16b
and v23.16b, v23.16b, v25.16b
eor v20.16b, v20.16b, v21.16b
eor v22.16b, v22.16b, v23.16b
zip1 v16.2d, v20.2d, v21.2d
zip1 v18.2d, v22.2d, v23.2d
zip2 v17.2d, v20.2d, v21.2d
zip2 v19.2d, v22.2d, v23.2d
ext v16.16b, v16.16b, v16.16b, #15 // t0 = t0 << 8
ext v17.16b, v17.16b, v17.16b, #14 // t1 = t1 << 16
pmull v1.8h, v7.8b, v3.8b // D = A*B
ext v19.16b, v19.16b, v19.16b, #12 // t3 = t3 << 32
ext v18.16b, v18.16b, v18.16b, #13 // t2 = t2 << 24
eor v16.16b, v16.16b, v17.16b
eor v18.16b, v18.16b, v19.16b
eor v1.16b, v1.16b, v16.16b
eor v1.16b, v1.16b, v18.16b
ext v16.8b, v6.8b, v6.8b, #1 // A1
pmull v16.8h, v16.8b, v4.8b // F = A1*B
ext v2.8b, v4.8b, v4.8b, #1 // B1
pmull v2.8h, v6.8b, v2.8b // E = A*B1
ext v17.8b, v6.8b, v6.8b, #2 // A2
pmull v17.8h, v17.8b, v4.8b // H = A2*B
ext v19.8b, v4.8b, v4.8b, #2 // B2
pmull v19.8h, v6.8b, v19.8b // G = A*B2
ext v18.8b, v6.8b, v6.8b, #3 // A3
eor v16.16b, v16.16b, v2.16b // L = E + F
pmull v18.8h, v18.8b, v4.8b // J = A3*B
ext v2.8b, v4.8b, v4.8b, #3 // B3
eor v17.16b, v17.16b, v19.16b // M = G + H
pmull v2.8h, v6.8b, v2.8b // I = A*B3
// Here we diverge from the 32-bit version. It computes the following
// (instructions reordered for clarity):
//
// veor $t0#lo, $t0#lo, $t0#hi @ t0 = P0 + P1 (L)
// vand $t0#hi, $t0#hi, $k48
// veor $t0#lo, $t0#lo, $t0#hi
//
// veor $t1#lo, $t1#lo, $t1#hi @ t1 = P2 + P3 (M)
// vand $t1#hi, $t1#hi, $k32
// veor $t1#lo, $t1#lo, $t1#hi
//
// veor $t2#lo, $t2#lo, $t2#hi @ t2 = P4 + P5 (N)
// vand $t2#hi, $t2#hi, $k16
// veor $t2#lo, $t2#lo, $t2#hi
//
// veor $t3#lo, $t3#lo, $t3#hi @ t3 = P6 + P7 (K)
// vmov.i64 $t3#hi, #0
//
// $kN is a mask with the bottom N bits set. AArch64 cannot compute on
// upper halves of SIMD registers, so we must split each half into
// separate registers. To compensate, we pair computations up and
// parallelize.
ext v19.8b, v4.8b, v4.8b, #4 // B4
eor v18.16b, v18.16b, v2.16b // N = I + J
pmull v19.8h, v6.8b, v19.8b // K = A*B4
// This can probably be scheduled more efficiently. For now, we just
// pair up independent instructions.
zip1 v20.2d, v16.2d, v17.2d
zip1 v22.2d, v18.2d, v19.2d
zip2 v21.2d, v16.2d, v17.2d
zip2 v23.2d, v18.2d, v19.2d
eor v20.16b, v20.16b, v21.16b
eor v22.16b, v22.16b, v23.16b
and v21.16b, v21.16b, v24.16b
and v23.16b, v23.16b, v25.16b
eor v20.16b, v20.16b, v21.16b
eor v22.16b, v22.16b, v23.16b
zip1 v16.2d, v20.2d, v21.2d
zip1 v18.2d, v22.2d, v23.2d
zip2 v17.2d, v20.2d, v21.2d
zip2 v19.2d, v22.2d, v23.2d
ext v16.16b, v16.16b, v16.16b, #15 // t0 = t0 << 8
ext v17.16b, v17.16b, v17.16b, #14 // t1 = t1 << 16
pmull v2.8h, v6.8b, v4.8b // D = A*B
ext v19.16b, v19.16b, v19.16b, #12 // t3 = t3 << 32
ext v18.16b, v18.16b, v18.16b, #13 // t2 = t2 << 24
eor v16.16b, v16.16b, v17.16b
eor v18.16b, v18.16b, v19.16b
eor v2.16b, v2.16b, v16.16b
eor v2.16b, v2.16b, v18.16b
ext v16.16b, v0.16b, v2.16b, #8
eor v1.16b, v1.16b, v0.16b // Karatsuba post-processing
eor v1.16b, v1.16b, v2.16b
eor v1.16b, v1.16b, v16.16b // Xm overlaps Xh.lo and Xl.hi
ins v0.d[1], v1.d[0] // Xh|Xl - 256-bit result
// This is a no-op due to the ins instruction below.
// ins v2.d[0], v1.d[1]
// equivalent of reduction_avx from ghash-x86_64.pl
shl v17.2d, v0.2d, #57 // 1st phase
shl v18.2d, v0.2d, #62
eor v18.16b, v18.16b, v17.16b //
shl v17.2d, v0.2d, #63
eor v18.16b, v18.16b, v17.16b //
// Note Xm contains {Xl.d[1], Xh.d[0]}.
eor v18.16b, v18.16b, v1.16b
ins v0.d[1], v18.d[0] // Xl.d[1] ^= t2.d[0]
ins v2.d[0], v18.d[1] // Xh.d[0] ^= t2.d[1]
ushr v18.2d, v0.2d, #1 // 2nd phase
eor v2.16b, v2.16b,v0.16b
eor v0.16b, v0.16b,v18.16b //
ushr v18.2d, v18.2d, #6
ushr v0.2d, v0.2d, #1 //
eor v0.16b, v0.16b, v2.16b //
eor v0.16b, v0.16b, v18.16b //
subs x3, x3, #16
bne .Loop_neon
rev64 v0.16b, v0.16b // byteswap Xi and write
ext v0.16b, v0.16b, v0.16b, #8
st1 {v0.16b}, [x0]
ret
.size gcm_ghash_neon,.-gcm_ghash_neon
.section .rodata
.align 4
.Lmasks:
.quad 0x0000ffffffffffff // k48
.quad 0x00000000ffffffff // k32
.quad 0x000000000000ffff // k16
.quad 0x0000000000000000 // k0
.byte 71,72,65,83,72,32,102,111,114,32,65,82,77,118,56,44,32,100,101,114,105,118,101,100,32,102,114,111,109,32,65,82,77,118,52,32,118,101,114,115,105,111,110,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.align 2
.align 2
#endif // !OPENSSL_NO_ASM && defined(OPENSSL_AARCH64) && defined(__ELF__)

View File

@ -0,0 +1,565 @@
// This file is generated from a similarly-named Perl script in the BoringSSL
// source tree. Do not edit by hand.
#include <openssl/asm_base.h>
#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_AARCH64) && defined(__ELF__)
#include <openssl/arm_arch.h>
#if __ARM_MAX_ARCH__>=7
.text
.arch armv8-a+crypto
.globl gcm_init_v8
.hidden gcm_init_v8
.type gcm_init_v8,%function
.align 4
gcm_init_v8:
AARCH64_VALID_CALL_TARGET
ld1 {v17.2d},[x1] //load input H
movi v19.16b,#0xe1
shl v19.2d,v19.2d,#57 //0xc2.0
ext v3.16b,v17.16b,v17.16b,#8
ushr v18.2d,v19.2d,#63
dup v17.4s,v17.s[1]
ext v16.16b,v18.16b,v19.16b,#8 //t0=0xc2....01
ushr v18.2d,v3.2d,#63
sshr v17.4s,v17.4s,#31 //broadcast carry bit
and v18.16b,v18.16b,v16.16b
shl v3.2d,v3.2d,#1
ext v18.16b,v18.16b,v18.16b,#8
and v16.16b,v16.16b,v17.16b
orr v3.16b,v3.16b,v18.16b //H<<<=1
eor v20.16b,v3.16b,v16.16b //twisted H
st1 {v20.2d},[x0],#16 //store Htable[0]
//calculate H^2
ext v16.16b,v20.16b,v20.16b,#8 //Karatsuba pre-processing
pmull v0.1q,v20.1d,v20.1d
eor v16.16b,v16.16b,v20.16b
pmull2 v2.1q,v20.2d,v20.2d
pmull v1.1q,v16.1d,v16.1d
ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing
eor v18.16b,v0.16b,v2.16b
eor v1.16b,v1.16b,v17.16b
eor v1.16b,v1.16b,v18.16b
pmull v18.1q,v0.1d,v19.1d //1st phase
ins v2.d[0],v1.d[1]
ins v1.d[1],v0.d[0]
eor v0.16b,v1.16b,v18.16b
ext v18.16b,v0.16b,v0.16b,#8 //2nd phase
pmull v0.1q,v0.1d,v19.1d
eor v18.16b,v18.16b,v2.16b
eor v22.16b,v0.16b,v18.16b
ext v17.16b,v22.16b,v22.16b,#8 //Karatsuba pre-processing
eor v17.16b,v17.16b,v22.16b
ext v21.16b,v16.16b,v17.16b,#8 //pack Karatsuba pre-processed
st1 {v21.2d,v22.2d},[x0],#32 //store Htable[1..2]
//calculate H^3 and H^4
pmull v0.1q,v20.1d, v22.1d
pmull v5.1q,v22.1d,v22.1d
pmull2 v2.1q,v20.2d, v22.2d
pmull2 v7.1q,v22.2d,v22.2d
pmull v1.1q,v16.1d,v17.1d
pmull v6.1q,v17.1d,v17.1d
ext v16.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing
ext v17.16b,v5.16b,v7.16b,#8
eor v18.16b,v0.16b,v2.16b
eor v1.16b,v1.16b,v16.16b
eor v4.16b,v5.16b,v7.16b
eor v6.16b,v6.16b,v17.16b
eor v1.16b,v1.16b,v18.16b
pmull v18.1q,v0.1d,v19.1d //1st phase
eor v6.16b,v6.16b,v4.16b
pmull v4.1q,v5.1d,v19.1d
ins v2.d[0],v1.d[1]
ins v7.d[0],v6.d[1]
ins v1.d[1],v0.d[0]
ins v6.d[1],v5.d[0]
eor v0.16b,v1.16b,v18.16b
eor v5.16b,v6.16b,v4.16b
ext v18.16b,v0.16b,v0.16b,#8 //2nd phase
ext v4.16b,v5.16b,v5.16b,#8
pmull v0.1q,v0.1d,v19.1d
pmull v5.1q,v5.1d,v19.1d
eor v18.16b,v18.16b,v2.16b
eor v4.16b,v4.16b,v7.16b
eor v20.16b, v0.16b,v18.16b //H^3
eor v22.16b,v5.16b,v4.16b //H^4
ext v16.16b,v20.16b, v20.16b,#8 //Karatsuba pre-processing
ext v17.16b,v22.16b,v22.16b,#8
eor v16.16b,v16.16b,v20.16b
eor v17.16b,v17.16b,v22.16b
ext v21.16b,v16.16b,v17.16b,#8 //pack Karatsuba pre-processed
st1 {v20.2d,v21.2d,v22.2d},[x0] //store Htable[3..5]
ret
.size gcm_init_v8,.-gcm_init_v8
.globl gcm_gmult_v8
.hidden gcm_gmult_v8
.type gcm_gmult_v8,%function
.align 4
gcm_gmult_v8:
AARCH64_VALID_CALL_TARGET
ld1 {v17.2d},[x0] //load Xi
movi v19.16b,#0xe1
ld1 {v20.2d,v21.2d},[x1] //load twisted H, ...
shl v19.2d,v19.2d,#57
#ifndef __AARCH64EB__
rev64 v17.16b,v17.16b
#endif
ext v3.16b,v17.16b,v17.16b,#8
pmull v0.1q,v20.1d,v3.1d //H.lo·Xi.lo
eor v17.16b,v17.16b,v3.16b //Karatsuba pre-processing
pmull2 v2.1q,v20.2d,v3.2d //H.hi·Xi.hi
pmull v1.1q,v21.1d,v17.1d //(H.lo+H.hi)·(Xi.lo+Xi.hi)
ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing
eor v18.16b,v0.16b,v2.16b
eor v1.16b,v1.16b,v17.16b
eor v1.16b,v1.16b,v18.16b
pmull v18.1q,v0.1d,v19.1d //1st phase of reduction
ins v2.d[0],v1.d[1]
ins v1.d[1],v0.d[0]
eor v0.16b,v1.16b,v18.16b
ext v18.16b,v0.16b,v0.16b,#8 //2nd phase of reduction
pmull v0.1q,v0.1d,v19.1d
eor v18.16b,v18.16b,v2.16b
eor v0.16b,v0.16b,v18.16b
#ifndef __AARCH64EB__
rev64 v0.16b,v0.16b
#endif
ext v0.16b,v0.16b,v0.16b,#8
st1 {v0.2d},[x0] //write out Xi
ret
.size gcm_gmult_v8,.-gcm_gmult_v8
.globl gcm_ghash_v8
.hidden gcm_ghash_v8
.type gcm_ghash_v8,%function
.align 4
gcm_ghash_v8:
AARCH64_VALID_CALL_TARGET
cmp x3,#64
b.hs .Lgcm_ghash_v8_4x
ld1 {v0.2d},[x0] //load [rotated] Xi
//"[rotated]" means that
//loaded value would have
//to be rotated in order to
//make it appear as in
//algorithm specification
subs x3,x3,#32 //see if x3 is 32 or larger
mov x12,#16 //x12 is used as post-
//increment for input pointer;
//as loop is modulo-scheduled
//x12 is zeroed just in time
//to preclude overstepping
//inp[len], which means that
//last block[s] are actually
//loaded twice, but last
//copy is not processed
ld1 {v20.2d,v21.2d},[x1],#32 //load twisted H, ..., H^2
movi v19.16b,#0xe1
ld1 {v22.2d},[x1]
csel x12,xzr,x12,eq //is it time to zero x12?
ext v0.16b,v0.16b,v0.16b,#8 //rotate Xi
ld1 {v16.2d},[x2],#16 //load [rotated] I[0]
shl v19.2d,v19.2d,#57 //compose 0xc2.0 constant
#ifndef __AARCH64EB__
rev64 v16.16b,v16.16b
rev64 v0.16b,v0.16b
#endif
ext v3.16b,v16.16b,v16.16b,#8 //rotate I[0]
b.lo .Lodd_tail_v8 //x3 was less than 32
ld1 {v17.2d},[x2],x12 //load [rotated] I[1]
#ifndef __AARCH64EB__
rev64 v17.16b,v17.16b
#endif
ext v7.16b,v17.16b,v17.16b,#8
eor v3.16b,v3.16b,v0.16b //I[i]^=Xi
pmull v4.1q,v20.1d,v7.1d //H·Ii+1
eor v17.16b,v17.16b,v7.16b //Karatsuba pre-processing
pmull2 v6.1q,v20.2d,v7.2d
b .Loop_mod2x_v8
.align 4
.Loop_mod2x_v8:
ext v18.16b,v3.16b,v3.16b,#8
subs x3,x3,#32 //is there more data?
pmull v0.1q,v22.1d,v3.1d //H^2.lo·Xi.lo
csel x12,xzr,x12,lo //is it time to zero x12?
pmull v5.1q,v21.1d,v17.1d
eor v18.16b,v18.16b,v3.16b //Karatsuba pre-processing
pmull2 v2.1q,v22.2d,v3.2d //H^2.hi·Xi.hi
eor v0.16b,v0.16b,v4.16b //accumulate
pmull2 v1.1q,v21.2d,v18.2d //(H^2.lo+H^2.hi)·(Xi.lo+Xi.hi)
ld1 {v16.2d},[x2],x12 //load [rotated] I[i+2]
eor v2.16b,v2.16b,v6.16b
csel x12,xzr,x12,eq //is it time to zero x12?
eor v1.16b,v1.16b,v5.16b
ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing
eor v18.16b,v0.16b,v2.16b
eor v1.16b,v1.16b,v17.16b
ld1 {v17.2d},[x2],x12 //load [rotated] I[i+3]
#ifndef __AARCH64EB__
rev64 v16.16b,v16.16b
#endif
eor v1.16b,v1.16b,v18.16b
pmull v18.1q,v0.1d,v19.1d //1st phase of reduction
#ifndef __AARCH64EB__
rev64 v17.16b,v17.16b
#endif
ins v2.d[0],v1.d[1]
ins v1.d[1],v0.d[0]
ext v7.16b,v17.16b,v17.16b,#8
ext v3.16b,v16.16b,v16.16b,#8
eor v0.16b,v1.16b,v18.16b
pmull v4.1q,v20.1d,v7.1d //H·Ii+1
eor v3.16b,v3.16b,v2.16b //accumulate v3.16b early
ext v18.16b,v0.16b,v0.16b,#8 //2nd phase of reduction
pmull v0.1q,v0.1d,v19.1d
eor v3.16b,v3.16b,v18.16b
eor v17.16b,v17.16b,v7.16b //Karatsuba pre-processing
eor v3.16b,v3.16b,v0.16b
pmull2 v6.1q,v20.2d,v7.2d
b.hs .Loop_mod2x_v8 //there was at least 32 more bytes
eor v2.16b,v2.16b,v18.16b
ext v3.16b,v16.16b,v16.16b,#8 //re-construct v3.16b
adds x3,x3,#32 //re-construct x3
eor v0.16b,v0.16b,v2.16b //re-construct v0.16b
b.eq .Ldone_v8 //is x3 zero?
.Lodd_tail_v8:
ext v18.16b,v0.16b,v0.16b,#8
eor v3.16b,v3.16b,v0.16b //inp^=Xi
eor v17.16b,v16.16b,v18.16b //v17.16b is rotated inp^Xi
pmull v0.1q,v20.1d,v3.1d //H.lo·Xi.lo
eor v17.16b,v17.16b,v3.16b //Karatsuba pre-processing
pmull2 v2.1q,v20.2d,v3.2d //H.hi·Xi.hi
pmull v1.1q,v21.1d,v17.1d //(H.lo+H.hi)·(Xi.lo+Xi.hi)
ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing
eor v18.16b,v0.16b,v2.16b
eor v1.16b,v1.16b,v17.16b
eor v1.16b,v1.16b,v18.16b
pmull v18.1q,v0.1d,v19.1d //1st phase of reduction
ins v2.d[0],v1.d[1]
ins v1.d[1],v0.d[0]
eor v0.16b,v1.16b,v18.16b
ext v18.16b,v0.16b,v0.16b,#8 //2nd phase of reduction
pmull v0.1q,v0.1d,v19.1d
eor v18.16b,v18.16b,v2.16b
eor v0.16b,v0.16b,v18.16b
.Ldone_v8:
#ifndef __AARCH64EB__
rev64 v0.16b,v0.16b
#endif
ext v0.16b,v0.16b,v0.16b,#8
st1 {v0.2d},[x0] //write out Xi
ret
.size gcm_ghash_v8,.-gcm_ghash_v8
.type gcm_ghash_v8_4x,%function
.align 4
gcm_ghash_v8_4x:
.Lgcm_ghash_v8_4x:
ld1 {v0.2d},[x0] //load [rotated] Xi
ld1 {v20.2d,v21.2d,v22.2d},[x1],#48 //load twisted H, ..., H^2
movi v19.16b,#0xe1
ld1 {v26.2d,v27.2d,v28.2d},[x1] //load twisted H^3, ..., H^4
shl v19.2d,v19.2d,#57 //compose 0xc2.0 constant
ld1 {v4.2d,v5.2d,v6.2d,v7.2d},[x2],#64
#ifndef __AARCH64EB__
rev64 v0.16b,v0.16b
rev64 v5.16b,v5.16b
rev64 v6.16b,v6.16b
rev64 v7.16b,v7.16b
rev64 v4.16b,v4.16b
#endif
ext v25.16b,v7.16b,v7.16b,#8
ext v24.16b,v6.16b,v6.16b,#8
ext v23.16b,v5.16b,v5.16b,#8
pmull v29.1q,v20.1d,v25.1d //H·Ii+3
eor v7.16b,v7.16b,v25.16b
pmull2 v31.1q,v20.2d,v25.2d
pmull v30.1q,v21.1d,v7.1d
pmull v16.1q,v22.1d,v24.1d //H^2·Ii+2
eor v6.16b,v6.16b,v24.16b
pmull2 v24.1q,v22.2d,v24.2d
pmull2 v6.1q,v21.2d,v6.2d
eor v29.16b,v29.16b,v16.16b
eor v31.16b,v31.16b,v24.16b
eor v30.16b,v30.16b,v6.16b
pmull v7.1q,v26.1d,v23.1d //H^3·Ii+1
eor v5.16b,v5.16b,v23.16b
pmull2 v23.1q,v26.2d,v23.2d
pmull v5.1q,v27.1d,v5.1d
eor v29.16b,v29.16b,v7.16b
eor v31.16b,v31.16b,v23.16b
eor v30.16b,v30.16b,v5.16b
subs x3,x3,#128
b.lo .Ltail4x
b .Loop4x
.align 4
.Loop4x:
eor v16.16b,v4.16b,v0.16b
ld1 {v4.2d,v5.2d,v6.2d,v7.2d},[x2],#64
ext v3.16b,v16.16b,v16.16b,#8
#ifndef __AARCH64EB__
rev64 v5.16b,v5.16b
rev64 v6.16b,v6.16b
rev64 v7.16b,v7.16b
rev64 v4.16b,v4.16b
#endif
pmull v0.1q,v28.1d,v3.1d //H^4·(Xi+Ii)
eor v16.16b,v16.16b,v3.16b
pmull2 v2.1q,v28.2d,v3.2d
ext v25.16b,v7.16b,v7.16b,#8
pmull2 v1.1q,v27.2d,v16.2d
eor v0.16b,v0.16b,v29.16b
eor v2.16b,v2.16b,v31.16b
ext v24.16b,v6.16b,v6.16b,#8
eor v1.16b,v1.16b,v30.16b
ext v23.16b,v5.16b,v5.16b,#8
ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing
eor v18.16b,v0.16b,v2.16b
pmull v29.1q,v20.1d,v25.1d //H·Ii+3
eor v7.16b,v7.16b,v25.16b
eor v1.16b,v1.16b,v17.16b
pmull2 v31.1q,v20.2d,v25.2d
eor v1.16b,v1.16b,v18.16b
pmull v30.1q,v21.1d,v7.1d
pmull v18.1q,v0.1d,v19.1d //1st phase of reduction
ins v2.d[0],v1.d[1]
ins v1.d[1],v0.d[0]
pmull v16.1q,v22.1d,v24.1d //H^2·Ii+2
eor v6.16b,v6.16b,v24.16b
pmull2 v24.1q,v22.2d,v24.2d
eor v0.16b,v1.16b,v18.16b
pmull2 v6.1q,v21.2d,v6.2d
eor v29.16b,v29.16b,v16.16b
eor v31.16b,v31.16b,v24.16b
eor v30.16b,v30.16b,v6.16b
ext v18.16b,v0.16b,v0.16b,#8 //2nd phase of reduction
pmull v0.1q,v0.1d,v19.1d
pmull v7.1q,v26.1d,v23.1d //H^3·Ii+1
eor v5.16b,v5.16b,v23.16b
eor v18.16b,v18.16b,v2.16b
pmull2 v23.1q,v26.2d,v23.2d
pmull v5.1q,v27.1d,v5.1d
eor v0.16b,v0.16b,v18.16b
eor v29.16b,v29.16b,v7.16b
eor v31.16b,v31.16b,v23.16b
ext v0.16b,v0.16b,v0.16b,#8
eor v30.16b,v30.16b,v5.16b
subs x3,x3,#64
b.hs .Loop4x
.Ltail4x:
eor v16.16b,v4.16b,v0.16b
ext v3.16b,v16.16b,v16.16b,#8
pmull v0.1q,v28.1d,v3.1d //H^4·(Xi+Ii)
eor v16.16b,v16.16b,v3.16b
pmull2 v2.1q,v28.2d,v3.2d
pmull2 v1.1q,v27.2d,v16.2d
eor v0.16b,v0.16b,v29.16b
eor v2.16b,v2.16b,v31.16b
eor v1.16b,v1.16b,v30.16b
adds x3,x3,#64
b.eq .Ldone4x
cmp x3,#32
b.lo .Lone
b.eq .Ltwo
.Lthree:
ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing
eor v18.16b,v0.16b,v2.16b
eor v1.16b,v1.16b,v17.16b
ld1 {v4.2d,v5.2d,v6.2d},[x2]
eor v1.16b,v1.16b,v18.16b
#ifndef __AARCH64EB__
rev64 v5.16b,v5.16b
rev64 v6.16b,v6.16b
rev64 v4.16b,v4.16b
#endif
pmull v18.1q,v0.1d,v19.1d //1st phase of reduction
ins v2.d[0],v1.d[1]
ins v1.d[1],v0.d[0]
ext v24.16b,v6.16b,v6.16b,#8
ext v23.16b,v5.16b,v5.16b,#8
eor v0.16b,v1.16b,v18.16b
pmull v29.1q,v20.1d,v24.1d //H·Ii+2
eor v6.16b,v6.16b,v24.16b
ext v18.16b,v0.16b,v0.16b,#8 //2nd phase of reduction
pmull v0.1q,v0.1d,v19.1d
eor v18.16b,v18.16b,v2.16b
pmull2 v31.1q,v20.2d,v24.2d
pmull v30.1q,v21.1d,v6.1d
eor v0.16b,v0.16b,v18.16b
pmull v7.1q,v22.1d,v23.1d //H^2·Ii+1
eor v5.16b,v5.16b,v23.16b
ext v0.16b,v0.16b,v0.16b,#8
pmull2 v23.1q,v22.2d,v23.2d
eor v16.16b,v4.16b,v0.16b
pmull2 v5.1q,v21.2d,v5.2d
ext v3.16b,v16.16b,v16.16b,#8
eor v29.16b,v29.16b,v7.16b
eor v31.16b,v31.16b,v23.16b
eor v30.16b,v30.16b,v5.16b
pmull v0.1q,v26.1d,v3.1d //H^3·(Xi+Ii)
eor v16.16b,v16.16b,v3.16b
pmull2 v2.1q,v26.2d,v3.2d
pmull v1.1q,v27.1d,v16.1d
eor v0.16b,v0.16b,v29.16b
eor v2.16b,v2.16b,v31.16b
eor v1.16b,v1.16b,v30.16b
b .Ldone4x
.align 4
.Ltwo:
ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing
eor v18.16b,v0.16b,v2.16b
eor v1.16b,v1.16b,v17.16b
ld1 {v4.2d,v5.2d},[x2]
eor v1.16b,v1.16b,v18.16b
#ifndef __AARCH64EB__
rev64 v5.16b,v5.16b
rev64 v4.16b,v4.16b
#endif
pmull v18.1q,v0.1d,v19.1d //1st phase of reduction
ins v2.d[0],v1.d[1]
ins v1.d[1],v0.d[0]
ext v23.16b,v5.16b,v5.16b,#8
eor v0.16b,v1.16b,v18.16b
ext v18.16b,v0.16b,v0.16b,#8 //2nd phase of reduction
pmull v0.1q,v0.1d,v19.1d
eor v18.16b,v18.16b,v2.16b
eor v0.16b,v0.16b,v18.16b
ext v0.16b,v0.16b,v0.16b,#8
pmull v29.1q,v20.1d,v23.1d //H·Ii+1
eor v5.16b,v5.16b,v23.16b
eor v16.16b,v4.16b,v0.16b
ext v3.16b,v16.16b,v16.16b,#8
pmull2 v31.1q,v20.2d,v23.2d
pmull v30.1q,v21.1d,v5.1d
pmull v0.1q,v22.1d,v3.1d //H^2·(Xi+Ii)
eor v16.16b,v16.16b,v3.16b
pmull2 v2.1q,v22.2d,v3.2d
pmull2 v1.1q,v21.2d,v16.2d
eor v0.16b,v0.16b,v29.16b
eor v2.16b,v2.16b,v31.16b
eor v1.16b,v1.16b,v30.16b
b .Ldone4x
.align 4
.Lone:
ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing
eor v18.16b,v0.16b,v2.16b
eor v1.16b,v1.16b,v17.16b
ld1 {v4.2d},[x2]
eor v1.16b,v1.16b,v18.16b
#ifndef __AARCH64EB__
rev64 v4.16b,v4.16b
#endif
pmull v18.1q,v0.1d,v19.1d //1st phase of reduction
ins v2.d[0],v1.d[1]
ins v1.d[1],v0.d[0]
eor v0.16b,v1.16b,v18.16b
ext v18.16b,v0.16b,v0.16b,#8 //2nd phase of reduction
pmull v0.1q,v0.1d,v19.1d
eor v18.16b,v18.16b,v2.16b
eor v0.16b,v0.16b,v18.16b
ext v0.16b,v0.16b,v0.16b,#8
eor v16.16b,v4.16b,v0.16b
ext v3.16b,v16.16b,v16.16b,#8
pmull v0.1q,v20.1d,v3.1d
eor v16.16b,v16.16b,v3.16b
pmull2 v2.1q,v20.2d,v3.2d
pmull v1.1q,v21.1d,v16.1d
.Ldone4x:
ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing
eor v18.16b,v0.16b,v2.16b
eor v1.16b,v1.16b,v17.16b
eor v1.16b,v1.16b,v18.16b
pmull v18.1q,v0.1d,v19.1d //1st phase of reduction
ins v2.d[0],v1.d[1]
ins v1.d[1],v0.d[0]
eor v0.16b,v1.16b,v18.16b
ext v18.16b,v0.16b,v0.16b,#8 //2nd phase of reduction
pmull v0.1q,v0.1d,v19.1d
eor v18.16b,v18.16b,v2.16b
eor v0.16b,v0.16b,v18.16b
ext v0.16b,v0.16b,v0.16b,#8
#ifndef __AARCH64EB__
rev64 v0.16b,v0.16b
#endif
st1 {v0.2d},[x0] //write out Xi
ret
.size gcm_ghash_v8_4x,.-gcm_ghash_v8_4x
.byte 71,72,65,83,72,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.align 2
.align 2
#endif
#endif // !OPENSSL_NO_ASM && defined(OPENSSL_AARCH64) && defined(__ELF__)

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,309 @@
// This file is generated from a similarly-named Perl script in the BoringSSL
// source tree. Do not edit by hand.
#include <openssl/asm_base.h>
#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_AARCH64) && defined(__ELF__)
#include "openssl/arm_arch.h"
.text
.globl beeu_mod_inverse_vartime
.hidden beeu_mod_inverse_vartime
.type beeu_mod_inverse_vartime, %function
.align 4
beeu_mod_inverse_vartime:
// Reserve enough space for 14 8-byte registers on the stack
// in the first stp call for x29, x30.
// Then store the remaining callee-saved registers.
//
// | x29 | x30 | x19 | x20 | ... | x27 | x28 | x0 | x2 |
// ^ ^
// sp <------------------- 112 bytes ----------------> old sp
// x29 (FP)
//
AARCH64_SIGN_LINK_REGISTER
stp x29,x30,[sp,#-112]!
add x29,sp,#0
stp x19,x20,[sp,#16]
stp x21,x22,[sp,#32]
stp x23,x24,[sp,#48]
stp x25,x26,[sp,#64]
stp x27,x28,[sp,#80]
stp x0,x2,[sp,#96]
// B = b3..b0 := a
ldp x25,x26,[x1]
ldp x27,x28,[x1,#16]
// n3..n0 := n
// Note: the value of input params are changed in the following.
ldp x0,x1,[x2]
ldp x2,x30,[x2,#16]
// A = a3..a0 := n
mov x21, x0
mov x22, x1
mov x23, x2
mov x24, x30
// X = x4..x0 := 1
mov x3, #1
eor x4, x4, x4
eor x5, x5, x5
eor x6, x6, x6
eor x7, x7, x7
// Y = y4..y0 := 0
eor x8, x8, x8
eor x9, x9, x9
eor x10, x10, x10
eor x11, x11, x11
eor x12, x12, x12
.Lbeeu_loop:
// if B == 0, jump to .Lbeeu_loop_end
orr x14, x25, x26
orr x14, x14, x27
// reverse the bit order of x25. This is needed for clz after this macro
rbit x15, x25
orr x14, x14, x28
cbz x14,.Lbeeu_loop_end
// 0 < B < |n|,
// 0 < A <= |n|,
// (1) X*a == B (mod |n|),
// (2) (-1)*Y*a == A (mod |n|)
// Now divide B by the maximum possible power of two in the
// integers, and divide X by the same value mod |n|.
// When we're done, (1) still holds.
// shift := number of trailing 0s in x25
// ( = number of leading 0s in x15; see the "rbit" instruction in TEST_B_ZERO)
clz x13, x15
// If there is no shift, goto shift_A_Y
cbz x13, .Lbeeu_shift_A_Y
// Shift B right by "x13" bits
neg x14, x13
lsr x25, x25, x13
lsl x15, x26, x14
lsr x26, x26, x13
lsl x19, x27, x14
orr x25, x25, x15
lsr x27, x27, x13
lsl x20, x28, x14
orr x26, x26, x19
lsr x28, x28, x13
orr x27, x27, x20
// Shift X right by "x13" bits, adding n whenever X becomes odd.
// x13--;
// x14 := 0; needed in the addition to the most significant word in SHIFT1
eor x14, x14, x14
.Lbeeu_shift_loop_X:
tbz x3, #0, .Lshift1_0
adds x3, x3, x0
adcs x4, x4, x1
adcs x5, x5, x2
adcs x6, x6, x30
adc x7, x7, x14
.Lshift1_0:
// var0 := [var1|var0]<64..1>;
// i.e. concatenate var1 and var0,
// extract bits <64..1> from the resulting 128-bit value
// and put them in var0
extr x3, x4, x3, #1
extr x4, x5, x4, #1
extr x5, x6, x5, #1
extr x6, x7, x6, #1
lsr x7, x7, #1
subs x13, x13, #1
bne .Lbeeu_shift_loop_X
// Note: the steps above perform the same sequence as in p256_beeu-x86_64-asm.pl
// with the following differences:
// - "x13" is set directly to the number of trailing 0s in B
// (using rbit and clz instructions)
// - The loop is only used to call SHIFT1(X)
// and x13 is decreased while executing the X loop.
// - SHIFT256(B, x13) is performed before right-shifting X; they are independent
.Lbeeu_shift_A_Y:
// Same for A and Y.
// Afterwards, (2) still holds.
// Reverse the bit order of x21
// x13 := number of trailing 0s in x21 (= number of leading 0s in x15)
rbit x15, x21
clz x13, x15
// If there is no shift, goto |B-A|, X+Y update
cbz x13, .Lbeeu_update_B_X_or_A_Y
// Shift A right by "x13" bits
neg x14, x13
lsr x21, x21, x13
lsl x15, x22, x14
lsr x22, x22, x13
lsl x19, x23, x14
orr x21, x21, x15
lsr x23, x23, x13
lsl x20, x24, x14
orr x22, x22, x19
lsr x24, x24, x13
orr x23, x23, x20
// Shift Y right by "x13" bits, adding n whenever Y becomes odd.
// x13--;
// x14 := 0; needed in the addition to the most significant word in SHIFT1
eor x14, x14, x14
.Lbeeu_shift_loop_Y:
tbz x8, #0, .Lshift1_1
adds x8, x8, x0
adcs x9, x9, x1
adcs x10, x10, x2
adcs x11, x11, x30
adc x12, x12, x14
.Lshift1_1:
// var0 := [var1|var0]<64..1>;
// i.e. concatenate var1 and var0,
// extract bits <64..1> from the resulting 128-bit value
// and put them in var0
extr x8, x9, x8, #1
extr x9, x10, x9, #1
extr x10, x11, x10, #1
extr x11, x12, x11, #1
lsr x12, x12, #1
subs x13, x13, #1
bne .Lbeeu_shift_loop_Y
.Lbeeu_update_B_X_or_A_Y:
// Try T := B - A; if cs, continue with B > A (cs: carry set = no borrow)
// Note: this is a case of unsigned arithmetic, where T fits in 4 64-bit words
// without taking a sign bit if generated. The lack of a carry would
// indicate a negative result. See, for example,
// https://community.arm.com/developer/ip-products/processors/b/processors-ip-blog/posts/condition-codes-1-condition-flags-and-codes
subs x14, x25, x21
sbcs x15, x26, x22
sbcs x19, x27, x23
sbcs x20, x28, x24
bcs .Lbeeu_B_greater_than_A
// Else A > B =>
// A := A - B; Y := Y + X; goto beginning of the loop
subs x21, x21, x25
sbcs x22, x22, x26
sbcs x23, x23, x27
sbcs x24, x24, x28
adds x8, x8, x3
adcs x9, x9, x4
adcs x10, x10, x5
adcs x11, x11, x6
adc x12, x12, x7
b .Lbeeu_loop
.Lbeeu_B_greater_than_A:
// Continue with B > A =>
// B := B - A; X := X + Y; goto beginning of the loop
mov x25, x14
mov x26, x15
mov x27, x19
mov x28, x20
adds x3, x3, x8
adcs x4, x4, x9
adcs x5, x5, x10
adcs x6, x6, x11
adc x7, x7, x12
b .Lbeeu_loop
.Lbeeu_loop_end:
// The Euclid's algorithm loop ends when A == gcd(a,n);
// this would be 1, when a and n are co-prime (i.e. do not have a common factor).
// Since (-1)*Y*a == A (mod |n|), Y>0
// then out = -Y mod n
// Verify that A = 1 ==> (-1)*Y*a = A = 1 (mod |n|)
// Is A-1 == 0?
// If not, fail.
sub x14, x21, #1
orr x14, x14, x22
orr x14, x14, x23
orr x14, x14, x24
cbnz x14, .Lbeeu_err
// If Y>n ==> Y:=Y-n
.Lbeeu_reduction_loop:
// x_i := y_i - n_i (X is no longer needed, use it as temp)
// (x14 = 0 from above)
subs x3, x8, x0
sbcs x4, x9, x1
sbcs x5, x10, x2
sbcs x6, x11, x30
sbcs x7, x12, x14
// If result is non-negative (i.e., cs = carry set = no borrow),
// y_i := x_i; goto reduce again
// else
// y_i := y_i; continue
csel x8, x3, x8, cs
csel x9, x4, x9, cs
csel x10, x5, x10, cs
csel x11, x6, x11, cs
csel x12, x7, x12, cs
bcs .Lbeeu_reduction_loop
// Now Y < n (Y cannot be equal to n, since the inverse cannot be 0)
// out = -Y = n-Y
subs x8, x0, x8
sbcs x9, x1, x9
sbcs x10, x2, x10
sbcs x11, x30, x11
// Save Y in output (out (x0) was saved on the stack)
ldr x3, [sp,#96]
stp x8, x9, [x3]
stp x10, x11, [x3,#16]
// return 1 (success)
mov x0, #1
b .Lbeeu_finish
.Lbeeu_err:
// return 0 (error)
eor x0, x0, x0
.Lbeeu_finish:
// Restore callee-saved registers, except x0, x2
add sp,x29,#0
ldp x19,x20,[sp,#16]
ldp x21,x22,[sp,#32]
ldp x23,x24,[sp,#48]
ldp x25,x26,[sp,#64]
ldp x27,x28,[sp,#80]
ldp x29,x30,[sp],#112
AARCH64_VALIDATE_LINK_REGISTER
ret
.size beeu_mod_inverse_vartime,.-beeu_mod_inverse_vartime
#endif // !OPENSSL_NO_ASM && defined(OPENSSL_AARCH64) && defined(__ELF__)

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,750 @@
// This file is generated from a similarly-named Perl script in the BoringSSL
// source tree. Do not edit by hand.
#include <openssl/asm_base.h>
#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_AARCH64) && defined(__ELF__)
#include <openssl/arm_arch.h>
.text
// abi_test_trampoline loads callee-saved registers from |state|, calls |func|
// with |argv|, then saves the callee-saved registers into |state|. It returns
// the result of |func|. The |unwind| argument is unused.
// uint64_t abi_test_trampoline(void (*func)(...), CallerState *state,
// const uint64_t *argv, size_t argc,
// uint64_t unwind);
.type abi_test_trampoline, %function
.globl abi_test_trampoline
.hidden abi_test_trampoline
.align 4
abi_test_trampoline:
.Labi_test_trampoline_begin:
AARCH64_SIGN_LINK_REGISTER
// Stack layout (low to high addresses)
// x29,x30 (16 bytes)
// d8-d15 (64 bytes)
// x19-x28 (80 bytes)
// x1 (8 bytes)
// padding (8 bytes)
stp x29, x30, [sp, #-176]!
mov x29, sp
// Saved callee-saved registers and |state|.
stp d8, d9, [sp, #16]
stp d10, d11, [sp, #32]
stp d12, d13, [sp, #48]
stp d14, d15, [sp, #64]
stp x19, x20, [sp, #80]
stp x21, x22, [sp, #96]
stp x23, x24, [sp, #112]
stp x25, x26, [sp, #128]
stp x27, x28, [sp, #144]
str x1, [sp, #160]
// Load registers from |state|, with the exception of x29. x29 is the
// frame pointer and also callee-saved, but AAPCS64 allows platforms to
// mandate that x29 always point to a frame. iOS64 does so, which means
// we cannot fill x29 with entropy without violating ABI rules
// ourselves. x29 is tested separately below.
ldp d8, d9, [x1], #16
ldp d10, d11, [x1], #16
ldp d12, d13, [x1], #16
ldp d14, d15, [x1], #16
ldp x19, x20, [x1], #16
ldp x21, x22, [x1], #16
ldp x23, x24, [x1], #16
ldp x25, x26, [x1], #16
ldp x27, x28, [x1], #16
// Move parameters into temporary registers.
mov x9, x0
mov x10, x2
mov x11, x3
// Load parameters into registers.
cbz x11, .Largs_done
ldr x0, [x10], #8
subs x11, x11, #1
b.eq .Largs_done
ldr x1, [x10], #8
subs x11, x11, #1
b.eq .Largs_done
ldr x2, [x10], #8
subs x11, x11, #1
b.eq .Largs_done
ldr x3, [x10], #8
subs x11, x11, #1
b.eq .Largs_done
ldr x4, [x10], #8
subs x11, x11, #1
b.eq .Largs_done
ldr x5, [x10], #8
subs x11, x11, #1
b.eq .Largs_done
ldr x6, [x10], #8
subs x11, x11, #1
b.eq .Largs_done
ldr x7, [x10], #8
.Largs_done:
blr x9
// Reload |state| and store registers.
ldr x1, [sp, #160]
stp d8, d9, [x1], #16
stp d10, d11, [x1], #16
stp d12, d13, [x1], #16
stp d14, d15, [x1], #16
stp x19, x20, [x1], #16
stp x21, x22, [x1], #16
stp x23, x24, [x1], #16
stp x25, x26, [x1], #16
stp x27, x28, [x1], #16
// |func| is required to preserve x29, the frame pointer. We cannot load
// random values into x29 (see comment above), so compare it against the
// expected value and zero the field of |state| if corrupted.
mov x9, sp
cmp x29, x9
b.eq .Lx29_ok
str xzr, [x1]
.Lx29_ok:
// Restore callee-saved registers.
ldp d8, d9, [sp, #16]
ldp d10, d11, [sp, #32]
ldp d12, d13, [sp, #48]
ldp d14, d15, [sp, #64]
ldp x19, x20, [sp, #80]
ldp x21, x22, [sp, #96]
ldp x23, x24, [sp, #112]
ldp x25, x26, [sp, #128]
ldp x27, x28, [sp, #144]
ldp x29, x30, [sp], #176
AARCH64_VALIDATE_LINK_REGISTER
ret
.size abi_test_trampoline,.-abi_test_trampoline
.type abi_test_clobber_x0, %function
.globl abi_test_clobber_x0
.hidden abi_test_clobber_x0
.align 4
abi_test_clobber_x0:
AARCH64_VALID_CALL_TARGET
mov x0, xzr
ret
.size abi_test_clobber_x0,.-abi_test_clobber_x0
.type abi_test_clobber_x1, %function
.globl abi_test_clobber_x1
.hidden abi_test_clobber_x1
.align 4
abi_test_clobber_x1:
AARCH64_VALID_CALL_TARGET
mov x1, xzr
ret
.size abi_test_clobber_x1,.-abi_test_clobber_x1
.type abi_test_clobber_x2, %function
.globl abi_test_clobber_x2
.hidden abi_test_clobber_x2
.align 4
abi_test_clobber_x2:
AARCH64_VALID_CALL_TARGET
mov x2, xzr
ret
.size abi_test_clobber_x2,.-abi_test_clobber_x2
.type abi_test_clobber_x3, %function
.globl abi_test_clobber_x3
.hidden abi_test_clobber_x3
.align 4
abi_test_clobber_x3:
AARCH64_VALID_CALL_TARGET
mov x3, xzr
ret
.size abi_test_clobber_x3,.-abi_test_clobber_x3
.type abi_test_clobber_x4, %function
.globl abi_test_clobber_x4
.hidden abi_test_clobber_x4
.align 4
abi_test_clobber_x4:
AARCH64_VALID_CALL_TARGET
mov x4, xzr
ret
.size abi_test_clobber_x4,.-abi_test_clobber_x4
.type abi_test_clobber_x5, %function
.globl abi_test_clobber_x5
.hidden abi_test_clobber_x5
.align 4
abi_test_clobber_x5:
AARCH64_VALID_CALL_TARGET
mov x5, xzr
ret
.size abi_test_clobber_x5,.-abi_test_clobber_x5
.type abi_test_clobber_x6, %function
.globl abi_test_clobber_x6
.hidden abi_test_clobber_x6
.align 4
abi_test_clobber_x6:
AARCH64_VALID_CALL_TARGET
mov x6, xzr
ret
.size abi_test_clobber_x6,.-abi_test_clobber_x6
.type abi_test_clobber_x7, %function
.globl abi_test_clobber_x7
.hidden abi_test_clobber_x7
.align 4
abi_test_clobber_x7:
AARCH64_VALID_CALL_TARGET
mov x7, xzr
ret
.size abi_test_clobber_x7,.-abi_test_clobber_x7
.type abi_test_clobber_x8, %function
.globl abi_test_clobber_x8
.hidden abi_test_clobber_x8
.align 4
abi_test_clobber_x8:
AARCH64_VALID_CALL_TARGET
mov x8, xzr
ret
.size abi_test_clobber_x8,.-abi_test_clobber_x8
.type abi_test_clobber_x9, %function
.globl abi_test_clobber_x9
.hidden abi_test_clobber_x9
.align 4
abi_test_clobber_x9:
AARCH64_VALID_CALL_TARGET
mov x9, xzr
ret
.size abi_test_clobber_x9,.-abi_test_clobber_x9
.type abi_test_clobber_x10, %function
.globl abi_test_clobber_x10
.hidden abi_test_clobber_x10
.align 4
abi_test_clobber_x10:
AARCH64_VALID_CALL_TARGET
mov x10, xzr
ret
.size abi_test_clobber_x10,.-abi_test_clobber_x10
.type abi_test_clobber_x11, %function
.globl abi_test_clobber_x11
.hidden abi_test_clobber_x11
.align 4
abi_test_clobber_x11:
AARCH64_VALID_CALL_TARGET
mov x11, xzr
ret
.size abi_test_clobber_x11,.-abi_test_clobber_x11
.type abi_test_clobber_x12, %function
.globl abi_test_clobber_x12
.hidden abi_test_clobber_x12
.align 4
abi_test_clobber_x12:
AARCH64_VALID_CALL_TARGET
mov x12, xzr
ret
.size abi_test_clobber_x12,.-abi_test_clobber_x12
.type abi_test_clobber_x13, %function
.globl abi_test_clobber_x13
.hidden abi_test_clobber_x13
.align 4
abi_test_clobber_x13:
AARCH64_VALID_CALL_TARGET
mov x13, xzr
ret
.size abi_test_clobber_x13,.-abi_test_clobber_x13
.type abi_test_clobber_x14, %function
.globl abi_test_clobber_x14
.hidden abi_test_clobber_x14
.align 4
abi_test_clobber_x14:
AARCH64_VALID_CALL_TARGET
mov x14, xzr
ret
.size abi_test_clobber_x14,.-abi_test_clobber_x14
.type abi_test_clobber_x15, %function
.globl abi_test_clobber_x15
.hidden abi_test_clobber_x15
.align 4
abi_test_clobber_x15:
AARCH64_VALID_CALL_TARGET
mov x15, xzr
ret
.size abi_test_clobber_x15,.-abi_test_clobber_x15
.type abi_test_clobber_x16, %function
.globl abi_test_clobber_x16
.hidden abi_test_clobber_x16
.align 4
abi_test_clobber_x16:
AARCH64_VALID_CALL_TARGET
mov x16, xzr
ret
.size abi_test_clobber_x16,.-abi_test_clobber_x16
.type abi_test_clobber_x17, %function
.globl abi_test_clobber_x17
.hidden abi_test_clobber_x17
.align 4
abi_test_clobber_x17:
AARCH64_VALID_CALL_TARGET
mov x17, xzr
ret
.size abi_test_clobber_x17,.-abi_test_clobber_x17
.type abi_test_clobber_x19, %function
.globl abi_test_clobber_x19
.hidden abi_test_clobber_x19
.align 4
abi_test_clobber_x19:
AARCH64_VALID_CALL_TARGET
mov x19, xzr
ret
.size abi_test_clobber_x19,.-abi_test_clobber_x19
.type abi_test_clobber_x20, %function
.globl abi_test_clobber_x20
.hidden abi_test_clobber_x20
.align 4
abi_test_clobber_x20:
AARCH64_VALID_CALL_TARGET
mov x20, xzr
ret
.size abi_test_clobber_x20,.-abi_test_clobber_x20
.type abi_test_clobber_x21, %function
.globl abi_test_clobber_x21
.hidden abi_test_clobber_x21
.align 4
abi_test_clobber_x21:
AARCH64_VALID_CALL_TARGET
mov x21, xzr
ret
.size abi_test_clobber_x21,.-abi_test_clobber_x21
.type abi_test_clobber_x22, %function
.globl abi_test_clobber_x22
.hidden abi_test_clobber_x22
.align 4
abi_test_clobber_x22:
AARCH64_VALID_CALL_TARGET
mov x22, xzr
ret
.size abi_test_clobber_x22,.-abi_test_clobber_x22
.type abi_test_clobber_x23, %function
.globl abi_test_clobber_x23
.hidden abi_test_clobber_x23
.align 4
abi_test_clobber_x23:
AARCH64_VALID_CALL_TARGET
mov x23, xzr
ret
.size abi_test_clobber_x23,.-abi_test_clobber_x23
.type abi_test_clobber_x24, %function
.globl abi_test_clobber_x24
.hidden abi_test_clobber_x24
.align 4
abi_test_clobber_x24:
AARCH64_VALID_CALL_TARGET
mov x24, xzr
ret
.size abi_test_clobber_x24,.-abi_test_clobber_x24
.type abi_test_clobber_x25, %function
.globl abi_test_clobber_x25
.hidden abi_test_clobber_x25
.align 4
abi_test_clobber_x25:
AARCH64_VALID_CALL_TARGET
mov x25, xzr
ret
.size abi_test_clobber_x25,.-abi_test_clobber_x25
.type abi_test_clobber_x26, %function
.globl abi_test_clobber_x26
.hidden abi_test_clobber_x26
.align 4
abi_test_clobber_x26:
AARCH64_VALID_CALL_TARGET
mov x26, xzr
ret
.size abi_test_clobber_x26,.-abi_test_clobber_x26
.type abi_test_clobber_x27, %function
.globl abi_test_clobber_x27
.hidden abi_test_clobber_x27
.align 4
abi_test_clobber_x27:
AARCH64_VALID_CALL_TARGET
mov x27, xzr
ret
.size abi_test_clobber_x27,.-abi_test_clobber_x27
.type abi_test_clobber_x28, %function
.globl abi_test_clobber_x28
.hidden abi_test_clobber_x28
.align 4
abi_test_clobber_x28:
AARCH64_VALID_CALL_TARGET
mov x28, xzr
ret
.size abi_test_clobber_x28,.-abi_test_clobber_x28
.type abi_test_clobber_x29, %function
.globl abi_test_clobber_x29
.hidden abi_test_clobber_x29
.align 4
abi_test_clobber_x29:
AARCH64_VALID_CALL_TARGET
mov x29, xzr
ret
.size abi_test_clobber_x29,.-abi_test_clobber_x29
.type abi_test_clobber_d0, %function
.globl abi_test_clobber_d0
.hidden abi_test_clobber_d0
.align 4
abi_test_clobber_d0:
AARCH64_VALID_CALL_TARGET
fmov d0, xzr
ret
.size abi_test_clobber_d0,.-abi_test_clobber_d0
.type abi_test_clobber_d1, %function
.globl abi_test_clobber_d1
.hidden abi_test_clobber_d1
.align 4
abi_test_clobber_d1:
AARCH64_VALID_CALL_TARGET
fmov d1, xzr
ret
.size abi_test_clobber_d1,.-abi_test_clobber_d1
.type abi_test_clobber_d2, %function
.globl abi_test_clobber_d2
.hidden abi_test_clobber_d2
.align 4
abi_test_clobber_d2:
AARCH64_VALID_CALL_TARGET
fmov d2, xzr
ret
.size abi_test_clobber_d2,.-abi_test_clobber_d2
.type abi_test_clobber_d3, %function
.globl abi_test_clobber_d3
.hidden abi_test_clobber_d3
.align 4
abi_test_clobber_d3:
AARCH64_VALID_CALL_TARGET
fmov d3, xzr
ret
.size abi_test_clobber_d3,.-abi_test_clobber_d3
.type abi_test_clobber_d4, %function
.globl abi_test_clobber_d4
.hidden abi_test_clobber_d4
.align 4
abi_test_clobber_d4:
AARCH64_VALID_CALL_TARGET
fmov d4, xzr
ret
.size abi_test_clobber_d4,.-abi_test_clobber_d4
.type abi_test_clobber_d5, %function
.globl abi_test_clobber_d5
.hidden abi_test_clobber_d5
.align 4
abi_test_clobber_d5:
AARCH64_VALID_CALL_TARGET
fmov d5, xzr
ret
.size abi_test_clobber_d5,.-abi_test_clobber_d5
.type abi_test_clobber_d6, %function
.globl abi_test_clobber_d6
.hidden abi_test_clobber_d6
.align 4
abi_test_clobber_d6:
AARCH64_VALID_CALL_TARGET
fmov d6, xzr
ret
.size abi_test_clobber_d6,.-abi_test_clobber_d6
.type abi_test_clobber_d7, %function
.globl abi_test_clobber_d7
.hidden abi_test_clobber_d7
.align 4
abi_test_clobber_d7:
AARCH64_VALID_CALL_TARGET
fmov d7, xzr
ret
.size abi_test_clobber_d7,.-abi_test_clobber_d7
.type abi_test_clobber_d8, %function
.globl abi_test_clobber_d8
.hidden abi_test_clobber_d8
.align 4
abi_test_clobber_d8:
AARCH64_VALID_CALL_TARGET
fmov d8, xzr
ret
.size abi_test_clobber_d8,.-abi_test_clobber_d8
.type abi_test_clobber_d9, %function
.globl abi_test_clobber_d9
.hidden abi_test_clobber_d9
.align 4
abi_test_clobber_d9:
AARCH64_VALID_CALL_TARGET
fmov d9, xzr
ret
.size abi_test_clobber_d9,.-abi_test_clobber_d9
.type abi_test_clobber_d10, %function
.globl abi_test_clobber_d10
.hidden abi_test_clobber_d10
.align 4
abi_test_clobber_d10:
AARCH64_VALID_CALL_TARGET
fmov d10, xzr
ret
.size abi_test_clobber_d10,.-abi_test_clobber_d10
.type abi_test_clobber_d11, %function
.globl abi_test_clobber_d11
.hidden abi_test_clobber_d11
.align 4
abi_test_clobber_d11:
AARCH64_VALID_CALL_TARGET
fmov d11, xzr
ret
.size abi_test_clobber_d11,.-abi_test_clobber_d11
.type abi_test_clobber_d12, %function
.globl abi_test_clobber_d12
.hidden abi_test_clobber_d12
.align 4
abi_test_clobber_d12:
AARCH64_VALID_CALL_TARGET
fmov d12, xzr
ret
.size abi_test_clobber_d12,.-abi_test_clobber_d12
.type abi_test_clobber_d13, %function
.globl abi_test_clobber_d13
.hidden abi_test_clobber_d13
.align 4
abi_test_clobber_d13:
AARCH64_VALID_CALL_TARGET
fmov d13, xzr
ret
.size abi_test_clobber_d13,.-abi_test_clobber_d13
.type abi_test_clobber_d14, %function
.globl abi_test_clobber_d14
.hidden abi_test_clobber_d14
.align 4
abi_test_clobber_d14:
AARCH64_VALID_CALL_TARGET
fmov d14, xzr
ret
.size abi_test_clobber_d14,.-abi_test_clobber_d14
.type abi_test_clobber_d15, %function
.globl abi_test_clobber_d15
.hidden abi_test_clobber_d15
.align 4
abi_test_clobber_d15:
AARCH64_VALID_CALL_TARGET
fmov d15, xzr
ret
.size abi_test_clobber_d15,.-abi_test_clobber_d15
.type abi_test_clobber_d16, %function
.globl abi_test_clobber_d16
.hidden abi_test_clobber_d16
.align 4
abi_test_clobber_d16:
AARCH64_VALID_CALL_TARGET
fmov d16, xzr
ret
.size abi_test_clobber_d16,.-abi_test_clobber_d16
.type abi_test_clobber_d17, %function
.globl abi_test_clobber_d17
.hidden abi_test_clobber_d17
.align 4
abi_test_clobber_d17:
AARCH64_VALID_CALL_TARGET
fmov d17, xzr
ret
.size abi_test_clobber_d17,.-abi_test_clobber_d17
.type abi_test_clobber_d18, %function
.globl abi_test_clobber_d18
.hidden abi_test_clobber_d18
.align 4
abi_test_clobber_d18:
AARCH64_VALID_CALL_TARGET
fmov d18, xzr
ret
.size abi_test_clobber_d18,.-abi_test_clobber_d18
.type abi_test_clobber_d19, %function
.globl abi_test_clobber_d19
.hidden abi_test_clobber_d19
.align 4
abi_test_clobber_d19:
AARCH64_VALID_CALL_TARGET
fmov d19, xzr
ret
.size abi_test_clobber_d19,.-abi_test_clobber_d19
.type abi_test_clobber_d20, %function
.globl abi_test_clobber_d20
.hidden abi_test_clobber_d20
.align 4
abi_test_clobber_d20:
AARCH64_VALID_CALL_TARGET
fmov d20, xzr
ret
.size abi_test_clobber_d20,.-abi_test_clobber_d20
.type abi_test_clobber_d21, %function
.globl abi_test_clobber_d21
.hidden abi_test_clobber_d21
.align 4
abi_test_clobber_d21:
AARCH64_VALID_CALL_TARGET
fmov d21, xzr
ret
.size abi_test_clobber_d21,.-abi_test_clobber_d21
.type abi_test_clobber_d22, %function
.globl abi_test_clobber_d22
.hidden abi_test_clobber_d22
.align 4
abi_test_clobber_d22:
AARCH64_VALID_CALL_TARGET
fmov d22, xzr
ret
.size abi_test_clobber_d22,.-abi_test_clobber_d22
.type abi_test_clobber_d23, %function
.globl abi_test_clobber_d23
.hidden abi_test_clobber_d23
.align 4
abi_test_clobber_d23:
AARCH64_VALID_CALL_TARGET
fmov d23, xzr
ret
.size abi_test_clobber_d23,.-abi_test_clobber_d23
.type abi_test_clobber_d24, %function
.globl abi_test_clobber_d24
.hidden abi_test_clobber_d24
.align 4
abi_test_clobber_d24:
AARCH64_VALID_CALL_TARGET
fmov d24, xzr
ret
.size abi_test_clobber_d24,.-abi_test_clobber_d24
.type abi_test_clobber_d25, %function
.globl abi_test_clobber_d25
.hidden abi_test_clobber_d25
.align 4
abi_test_clobber_d25:
AARCH64_VALID_CALL_TARGET
fmov d25, xzr
ret
.size abi_test_clobber_d25,.-abi_test_clobber_d25
.type abi_test_clobber_d26, %function
.globl abi_test_clobber_d26
.hidden abi_test_clobber_d26
.align 4
abi_test_clobber_d26:
AARCH64_VALID_CALL_TARGET
fmov d26, xzr
ret
.size abi_test_clobber_d26,.-abi_test_clobber_d26
.type abi_test_clobber_d27, %function
.globl abi_test_clobber_d27
.hidden abi_test_clobber_d27
.align 4
abi_test_clobber_d27:
AARCH64_VALID_CALL_TARGET
fmov d27, xzr
ret
.size abi_test_clobber_d27,.-abi_test_clobber_d27
.type abi_test_clobber_d28, %function
.globl abi_test_clobber_d28
.hidden abi_test_clobber_d28
.align 4
abi_test_clobber_d28:
AARCH64_VALID_CALL_TARGET
fmov d28, xzr
ret
.size abi_test_clobber_d28,.-abi_test_clobber_d28
.type abi_test_clobber_d29, %function
.globl abi_test_clobber_d29
.hidden abi_test_clobber_d29
.align 4
abi_test_clobber_d29:
AARCH64_VALID_CALL_TARGET
fmov d29, xzr
ret
.size abi_test_clobber_d29,.-abi_test_clobber_d29
.type abi_test_clobber_d30, %function
.globl abi_test_clobber_d30
.hidden abi_test_clobber_d30
.align 4
abi_test_clobber_d30:
AARCH64_VALID_CALL_TARGET
fmov d30, xzr
ret
.size abi_test_clobber_d30,.-abi_test_clobber_d30
.type abi_test_clobber_d31, %function
.globl abi_test_clobber_d31
.hidden abi_test_clobber_d31
.align 4
abi_test_clobber_d31:
AARCH64_VALID_CALL_TARGET
fmov d31, xzr
ret
.size abi_test_clobber_d31,.-abi_test_clobber_d31
.type abi_test_clobber_v8_upper, %function
.globl abi_test_clobber_v8_upper
.hidden abi_test_clobber_v8_upper
.align 4
abi_test_clobber_v8_upper:
AARCH64_VALID_CALL_TARGET
fmov v8.d[1], xzr
ret
.size abi_test_clobber_v8_upper,.-abi_test_clobber_v8_upper
.type abi_test_clobber_v9_upper, %function
.globl abi_test_clobber_v9_upper
.hidden abi_test_clobber_v9_upper
.align 4
abi_test_clobber_v9_upper:
AARCH64_VALID_CALL_TARGET
fmov v9.d[1], xzr
ret
.size abi_test_clobber_v9_upper,.-abi_test_clobber_v9_upper
.type abi_test_clobber_v10_upper, %function
.globl abi_test_clobber_v10_upper
.hidden abi_test_clobber_v10_upper
.align 4
abi_test_clobber_v10_upper:
AARCH64_VALID_CALL_TARGET
fmov v10.d[1], xzr
ret
.size abi_test_clobber_v10_upper,.-abi_test_clobber_v10_upper
.type abi_test_clobber_v11_upper, %function
.globl abi_test_clobber_v11_upper
.hidden abi_test_clobber_v11_upper
.align 4
abi_test_clobber_v11_upper:
AARCH64_VALID_CALL_TARGET
fmov v11.d[1], xzr
ret
.size abi_test_clobber_v11_upper,.-abi_test_clobber_v11_upper
.type abi_test_clobber_v12_upper, %function
.globl abi_test_clobber_v12_upper
.hidden abi_test_clobber_v12_upper
.align 4
abi_test_clobber_v12_upper:
AARCH64_VALID_CALL_TARGET
fmov v12.d[1], xzr
ret
.size abi_test_clobber_v12_upper,.-abi_test_clobber_v12_upper
.type abi_test_clobber_v13_upper, %function
.globl abi_test_clobber_v13_upper
.hidden abi_test_clobber_v13_upper
.align 4
abi_test_clobber_v13_upper:
AARCH64_VALID_CALL_TARGET
fmov v13.d[1], xzr
ret
.size abi_test_clobber_v13_upper,.-abi_test_clobber_v13_upper
.type abi_test_clobber_v14_upper, %function
.globl abi_test_clobber_v14_upper
.hidden abi_test_clobber_v14_upper
.align 4
abi_test_clobber_v14_upper:
AARCH64_VALID_CALL_TARGET
fmov v14.d[1], xzr
ret
.size abi_test_clobber_v14_upper,.-abi_test_clobber_v14_upper
.type abi_test_clobber_v15_upper, %function
.globl abi_test_clobber_v15_upper
.hidden abi_test_clobber_v15_upper
.align 4
abi_test_clobber_v15_upper:
AARCH64_VALID_CALL_TARGET
fmov v15.d[1], xzr
ret
.size abi_test_clobber_v15_upper,.-abi_test_clobber_v15_upper
#endif // !OPENSSL_NO_ASM && defined(OPENSSL_AARCH64) && defined(__ELF__)

View File

@ -1,22 +1,14 @@
// This file is generated from a similarly-named Perl script in the BoringSSL
// source tree. Do not edit by hand.
#if !defined(__has_feature)
#define __has_feature(x) 0
#endif
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
#define OPENSSL_NO_ASM
#endif
#include <openssl/asm_base.h>
#if !defined(OPENSSL_NO_ASM)
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_ARM) && defined(__ELF__)
#include <openssl/arm_arch.h>
@ Silence ARMv8 deprecated IT instruction warnings. This file is used by both
@ ARMv7 and ARMv8 processors and does not use ARMv8 instructions.
.arch armv7-a
.text
#if defined(__thumb2__) || defined(__clang__)
@ -33,53 +25,21 @@
#endif
.align 5
Lsigma:
.Lsigma:
.long 0x61707865,0x3320646e,0x79622d32,0x6b206574 @ endian-neutral
Lone:
.Lone:
.long 1,0,0,0
#if __ARM_MAX_ARCH__>=7
LOPENSSL_armcap:
.word OPENSSL_armcap_P-LChaCha20_ctr32
#else
.word -1
#endif
.globl _ChaCha20_ctr32
.private_extern _ChaCha20_ctr32
#ifdef __thumb2__
.thumb_func _ChaCha20_ctr32
#endif
.globl ChaCha20_ctr32_nohw
.hidden ChaCha20_ctr32_nohw
.type ChaCha20_ctr32_nohw,%function
.align 5
_ChaCha20_ctr32:
LChaCha20_ctr32:
ChaCha20_ctr32_nohw:
ldr r12,[sp,#0] @ pull pointer to counter and nonce
stmdb sp!,{r0,r1,r2,r4-r11,lr}
#if __ARM_ARCH__<7 && !defined(__thumb2__)
sub r14,pc,#16 @ _ChaCha20_ctr32
#else
adr r14,LChaCha20_ctr32
#endif
cmp r2,#0 @ len==0?
#ifdef __thumb2__
itt eq
#endif
addeq sp,sp,#4*3
beq Lno_data
#if __ARM_MAX_ARCH__>=7
cmp r2,#192 @ test len
bls Lshort
ldr r4,[r14,#-32]
ldr r4,[r14,r4]
# ifdef __APPLE__
ldr r4,[r4]
# endif
tst r4,#ARMV7_NEON
bne LChaCha20_neon
Lshort:
#endif
adr r14,.Lsigma
ldmia r12,{r4,r5,r6,r7} @ load counter and nonce
sub sp,sp,#4*(16) @ off-load area
sub r14,r14,#64 @ Lsigma
stmdb sp!,{r4,r5,r6,r7} @ copy counter and nonce
ldmia r3,{r4,r5,r6,r7,r8,r9,r10,r11} @ load key
ldmia r14,{r0,r1,r2,r3} @ load sigma
@ -87,25 +47,25 @@ Lshort:
stmdb sp!,{r0,r1,r2,r3} @ copy sigma
str r10,[sp,#4*(16+10)] @ off-load "rx"
str r11,[sp,#4*(16+11)] @ off-load "rx"
b Loop_outer_enter
b .Loop_outer_enter
.align 4
Loop_outer:
.Loop_outer:
ldmia sp,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} @ load key material
str r11,[sp,#4*(32+2)] @ save len
str r12, [sp,#4*(32+1)] @ save inp
str r14, [sp,#4*(32+0)] @ save out
Loop_outer_enter:
.Loop_outer_enter:
ldr r11, [sp,#4*(15)]
ldr r12,[sp,#4*(12)] @ modulo-scheduled load
ldr r10, [sp,#4*(13)]
ldr r14,[sp,#4*(14)]
str r11, [sp,#4*(16+15)]
mov r11,#10
b Loop
b .Loop
.align 4
Loop:
.Loop:
subs r11,r11,#1
add r0,r0,r4
mov r12,r12,ror#16
@ -215,7 +175,7 @@ Loop:
mov r4,r4,ror#25
eor r7,r7,r8,ror#25
eor r4,r4,r9,ror#25
bne Loop
bne .Loop
ldr r11,[sp,#4*(32+2)] @ load len
@ -240,12 +200,12 @@ Loop:
ldr r8,[sp,#4*(0)] @ load key material
ldr r9,[sp,#4*(1)]
#if __ARM_ARCH__>=6 || !defined(__ARMEB__)
# if __ARM_ARCH__<7
#if __ARM_ARCH>=6 || !defined(__ARMEB__)
# if __ARM_ARCH<7
orr r10,r12,r14
tst r10,#3 @ are input and output aligned?
ldr r10,[sp,#4*(2)]
bne Lunaligned
bne .Lunaligned
cmp r11,#64 @ restore flags
# else
ldr r10,[sp,#4*(2)]
@ -267,7 +227,7 @@ Loop:
# endif
ldrhs r10,[r12,#-8]
ldrhs r11,[r12,#-4]
# if __ARM_ARCH__>=6 && defined(__ARMEB__)
# if __ARM_ARCH>=6 && defined(__ARMEB__)
rev r0,r0
rev r1,r1
rev r2,r2
@ -304,7 +264,7 @@ Loop:
# endif
ldrhs r10,[r12,#-8]
ldrhs r11,[r12,#-4]
# if __ARM_ARCH__>=6 && defined(__ARMEB__)
# if __ARM_ARCH>=6 && defined(__ARMEB__)
rev r4,r4
rev r5,r5
rev r6,r6
@ -349,7 +309,7 @@ Loop:
# endif
ldrhs r10,[r12,#-8]
ldrhs r11,[r12,#-4]
# if __ARM_ARCH__>=6 && defined(__ARMEB__)
# if __ARM_ARCH>=6 && defined(__ARMEB__)
rev r0,r0
rev r1,r1
rev r2,r2
@ -391,7 +351,7 @@ Loop:
# endif
ldrhs r10,[r12,#-8]
ldrhs r11,[r12,#-4]
# if __ARM_ARCH__>=6 && defined(__ARMEB__)
# if __ARM_ARCH>=6 && defined(__ARMEB__)
rev r4,r4
rev r5,r5
rev r6,r6
@ -419,18 +379,18 @@ Loop:
subhs r11,r8,#64 @ len-=64
str r6,[r14,#-8]
str r7,[r14,#-4]
bhi Loop_outer
bhi .Loop_outer
beq Ldone
# if __ARM_ARCH__<7
b Ltail
beq .Ldone
# if __ARM_ARCH<7
b .Ltail
.align 4
Lunaligned:@ unaligned endian-neutral path
.Lunaligned:@ unaligned endian-neutral path
cmp r11,#64 @ restore flags
# endif
#endif
#if __ARM_ARCH__<7
#if __ARM_ARCH<7
ldr r11,[sp,#4*(3)]
add r0,r0,r8 @ accumulate key material
add r1,r1,r9
@ -786,42 +746,40 @@ Lunaligned:@ unaligned endian-neutral path
it hs
# endif
subhs r11,r8,#64 @ len-=64
bhi Loop_outer
bhi .Loop_outer
beq Ldone
beq .Ldone
#endif
Ltail:
.Ltail:
ldr r12,[sp,#4*(32+1)] @ load inp
add r9,sp,#4*(0)
ldr r14,[sp,#4*(32+0)] @ load out
Loop_tail:
.Loop_tail:
ldrb r10,[r9],#1 @ read buffer on stack
ldrb r11,[r12],#1 @ read input
subs r8,r8,#1
eor r11,r11,r10
strb r11,[r14],#1 @ store output
bne Loop_tail
bne .Loop_tail
Ldone:
.Ldone:
add sp,sp,#4*(32+3)
Lno_data:
ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,pc}
.size ChaCha20_ctr32_nohw,.-ChaCha20_ctr32_nohw
#if __ARM_MAX_ARCH__>=7
.arch armv7-a
.fpu neon
#ifdef __thumb2__
.thumb_func ChaCha20_neon
#endif
.globl ChaCha20_ctr32_neon
.hidden ChaCha20_ctr32_neon
.type ChaCha20_ctr32_neon,%function
.align 5
ChaCha20_neon:
ChaCha20_ctr32_neon:
ldr r12,[sp,#0] @ pull pointer to counter and nonce
stmdb sp!,{r0,r1,r2,r4-r11,lr}
LChaCha20_neon:
adr r14,Lsigma
adr r14,.Lsigma
vstmdb sp!,{d8,d9,d10,d11,d12,d13,d14,d15} @ ABI spec says so
stmdb sp!,{r0,r1,r2,r3}
@ -848,13 +806,13 @@ LChaCha20_neon:
vmov q8,q0
vmov q5,q1
vmov q9,q1
b Loop_neon_enter
b .Loop_neon_enter
.align 4
Loop_neon_outer:
.Loop_neon_outer:
ldmia sp,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} @ load key material
cmp r11,#64*2 @ if len<=64*2
bls Lbreak_neon @ switch to integer-only
bls .Lbreak_neon @ switch to integer-only
vmov q4,q0
str r11,[sp,#4*(32+2)] @ save len
vmov q8,q0
@ -862,7 +820,7 @@ Loop_neon_outer:
vmov q5,q1
str r14, [sp,#4*(32+0)] @ save out
vmov q9,q1
Loop_neon_enter:
.Loop_neon_enter:
ldr r11, [sp,#4*(15)]
vadd.i32 q7,q3,q12 @ counter+1
ldr r12,[sp,#4*(12)] @ modulo-scheduled load
@ -874,10 +832,10 @@ Loop_neon_enter:
str r11, [sp,#4*(16+15)]
mov r11,#10
add r12,r12,#3 @ counter+3
b Loop_neon
b .Loop_neon
.align 4
Loop_neon:
.Loop_neon:
subs r11,r11,#1
vadd.i32 q0,q0,q1
add r0,r0,r4
@ -1095,7 +1053,7 @@ Loop_neon:
eor r7,r7,r8,ror#25
vext.8 q11,q11,q11,#4
eor r4,r4,r9,ror#25
bne Loop_neon
bne .Loop_neon
add r11,sp,#32
vld1.32 {q12,q13},[sp] @ load key material
@ -1136,7 +1094,7 @@ Loop_neon:
vadd.i32 q11,q11,q15
cmp r11,#64*4
blo Ltail_neon
blo .Ltail_neon
vld1.8 {q12,q13},[r12]! @ load input
mov r11,sp
@ -1290,12 +1248,12 @@ Loop_neon:
sub r11,r8,#64*4 @ len-=64*4
str r6,[r14,#-8]
str r7,[r14,#-4]
bhi Loop_neon_outer
bhi .Loop_neon_outer
b Ldone_neon
b .Ldone_neon
.align 4
Lbreak_neon:
.Lbreak_neon:
@ harmonize NEON and integer-only stack frames: load data
@ from NEON frame, but save to integer-only one; distance
@ between the two is 4*(32+4+16-32)=4*(20).
@ -1321,25 +1279,25 @@ Lbreak_neon:
add sp,sp,#4*(20) @ switch frame
vst1.32 {q2,q3},[r11]
mov r11,#10
b Loop @ go integer-only
b .Loop @ go integer-only
.align 4
Ltail_neon:
.Ltail_neon:
cmp r11,#64*3
bhs L192_or_more_neon
bhs .L192_or_more_neon
cmp r11,#64*2
bhs L128_or_more_neon
bhs .L128_or_more_neon
cmp r11,#64*1
bhs L64_or_more_neon
bhs .L64_or_more_neon
add r8,sp,#4*(8)
vst1.8 {q0,q1},[sp]
add r10,sp,#4*(0)
vst1.8 {q2,q3},[r8]
b Loop_tail_neon
b .Loop_tail_neon
.align 4
L64_or_more_neon:
.L64_or_more_neon:
vld1.8 {q12,q13},[r12]!
vld1.8 {q14,q15},[r12]!
veor q0,q0,q12
@ -1349,17 +1307,17 @@ L64_or_more_neon:
vst1.8 {q0,q1},[r14]!
vst1.8 {q2,q3},[r14]!
beq Ldone_neon
beq .Ldone_neon
add r8,sp,#4*(8)
vst1.8 {q4,q5},[sp]
add r10,sp,#4*(0)
vst1.8 {q6,q7},[r8]
sub r11,r11,#64*1 @ len-=64*1
b Loop_tail_neon
b .Loop_tail_neon
.align 4
L128_or_more_neon:
.L128_or_more_neon:
vld1.8 {q12,q13},[r12]!
vld1.8 {q14,q15},[r12]!
veor q0,q0,q12
@ -1378,17 +1336,17 @@ L128_or_more_neon:
vst1.8 {q4,q5},[r14]!
vst1.8 {q6,q7},[r14]!
beq Ldone_neon
beq .Ldone_neon
add r8,sp,#4*(8)
vst1.8 {q8,q9},[sp]
add r10,sp,#4*(0)
vst1.8 {q10,q11},[r8]
sub r11,r11,#64*2 @ len-=64*2
b Loop_tail_neon
b .Loop_tail_neon
.align 4
L192_or_more_neon:
.L192_or_more_neon:
vld1.8 {q12,q13},[r12]!
vld1.8 {q14,q15},[r12]!
veor q0,q0,q12
@ -1416,7 +1374,7 @@ L192_or_more_neon:
vst1.8 {q8,q9},[r14]!
vst1.8 {q10,q11},[r14]!
beq Ldone_neon
beq .Ldone_neon
ldmia sp,{r8,r9,r10,r11} @ load key material
add r0,r0,r8 @ accumulate key material
@ -1475,24 +1433,19 @@ L192_or_more_neon:
add r10,sp,#4*(0)
sub r11,r11,#64*3 @ len-=64*3
Loop_tail_neon:
.Loop_tail_neon:
ldrb r8,[r10],#1 @ read buffer on stack
ldrb r9,[r12],#1 @ read input
subs r11,r11,#1
eor r8,r8,r9
strb r8,[r14],#1 @ store output
bne Loop_tail_neon
bne .Loop_tail_neon
Ldone_neon:
.Ldone_neon:
add sp,sp,#4*(32+4)
vldmia sp,{d8,d9,d10,d11,d12,d13,d14,d15}
add sp,sp,#4*(16+3)
ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,pc}
.comm _OPENSSL_armcap_P,4
.non_lazy_symbol_pointer
OPENSSL_armcap_P:
.indirect_symbol _OPENSSL_armcap_P
.long 0
.size ChaCha20_ctr32_neon,.-ChaCha20_ctr32_neon
#endif
#endif // !OPENSSL_NO_ASM
#endif // !OPENSSL_NO_ASM && defined(OPENSSL_ARM) && defined(__ELF__)

View File

@ -1,55 +1,45 @@
// This file is generated from a similarly-named Perl script in the BoringSSL
// source tree. Do not edit by hand.
#if !defined(__has_feature)
#define __has_feature(x) 0
#endif
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
#define OPENSSL_NO_ASM
#endif
#include <openssl/asm_base.h>
#if !defined(OPENSSL_NO_ASM)
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_ARM) && defined(__ELF__)
#include <openssl/arm_arch.h>
#if __ARM_MAX_ARCH__>=7
.text
.arch armv7-a @ don't confuse not-so-latest binutils with argv8 :-)
.fpu neon
.code 32
#undef __thumb2__
.align 5
Lrcon:
.Lrcon:
.long 0x01,0x01,0x01,0x01
.long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d @ rotate-n-splat
.long 0x1b,0x1b,0x1b,0x1b
.text
.globl _aes_hw_set_encrypt_key
.private_extern _aes_hw_set_encrypt_key
#ifdef __thumb2__
.thumb_func _aes_hw_set_encrypt_key
#endif
.globl aes_hw_set_encrypt_key
.hidden aes_hw_set_encrypt_key
.type aes_hw_set_encrypt_key,%function
.align 5
_aes_hw_set_encrypt_key:
Lenc_key:
aes_hw_set_encrypt_key:
.Lenc_key:
mov r3,#-1
cmp r0,#0
beq Lenc_key_abort
beq .Lenc_key_abort
cmp r2,#0
beq Lenc_key_abort
beq .Lenc_key_abort
mov r3,#-2
cmp r1,#128
blt Lenc_key_abort
blt .Lenc_key_abort
cmp r1,#256
bgt Lenc_key_abort
bgt .Lenc_key_abort
tst r1,#0x3f
bne Lenc_key_abort
bne .Lenc_key_abort
adr r3,Lrcon
adr r3,.Lrcon
cmp r1,#192
veor q0,q0,q0
@ -57,12 +47,12 @@ Lenc_key:
mov r1,#8 @ reuse r1
vld1.32 {q1,q2},[r3]!
blt Loop128
beq L192
b L256
blt .Loop128
beq .L192
b .L256
.align 4
Loop128:
.Loop128:
vtbl.8 d20,{q3},d4
vtbl.8 d21,{q3},d5
vext.8 q9,q0,q3,#12
@ -78,7 +68,7 @@ Loop128:
veor q3,q3,q9
vshl.u8 q1,q1,#1
veor q3,q3,q10
bne Loop128
bne .Loop128
vld1.32 {q1},[r3]
@ -114,16 +104,16 @@ Loop128:
add r2,r2,#0x50
mov r12,#10
b Ldone
b .Ldone
.align 4
L192:
.L192:
vld1.8 {d16},[r0]!
vmov.i8 q10,#8 @ borrow q10
vst1.32 {q3},[r2]!
vsub.i8 q2,q2,q10 @ adjust the mask
Loop192:
.Loop192:
vtbl.8 d20,{q8},d4
vtbl.8 d21,{q8},d5
vext.8 q9,q0,q3,#12
@ -146,20 +136,20 @@ Loop192:
veor q3,q3,q10
veor q8,q8,q10
vst1.32 {q3},[r2]!
bne Loop192
bne .Loop192
mov r12,#12
add r2,r2,#0x20
b Ldone
b .Ldone
.align 4
L256:
.L256:
vld1.8 {q8},[r0]
mov r1,#7
mov r12,#14
vst1.32 {q3},[r2]!
Loop256:
.Loop256:
vtbl.8 d20,{q8},d4
vtbl.8 d21,{q8},d5
vext.8 q9,q0,q3,#12
@ -176,7 +166,7 @@ Loop256:
vshl.u8 q1,q1,#1
veor q3,q3,q10
vst1.32 {q3},[r2]!
beq Ldone
beq .Ldone
vdup.32 q10,d7[1]
vext.8 q9,q0,q8,#12
@ -189,30 +179,28 @@ Loop256:
veor q8,q8,q9
veor q8,q8,q10
b Loop256
b .Loop256
Ldone:
.Ldone:
str r12,[r2]
mov r3,#0
Lenc_key_abort:
.Lenc_key_abort:
mov r0,r3 @ return value
bx lr
.size aes_hw_set_encrypt_key,.-aes_hw_set_encrypt_key
.globl _aes_hw_set_decrypt_key
.private_extern _aes_hw_set_decrypt_key
#ifdef __thumb2__
.thumb_func _aes_hw_set_decrypt_key
#endif
.globl aes_hw_set_decrypt_key
.hidden aes_hw_set_decrypt_key
.type aes_hw_set_decrypt_key,%function
.align 5
_aes_hw_set_decrypt_key:
aes_hw_set_decrypt_key:
stmdb sp!,{r4,lr}
bl Lenc_key
bl .Lenc_key
cmp r0,#0
bne Ldec_key_abort
bne .Ldec_key_abort
sub r2,r2,#240 @ restore original r2
mov r4,#-16
@ -223,7 +211,7 @@ _aes_hw_set_decrypt_key:
vst1.32 {q0},[r0],r4
vst1.32 {q1},[r2]!
Loop_imc:
.Loop_imc:
vld1.32 {q0},[r2]
vld1.32 {q1},[r0]
.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
@ -231,30 +219,29 @@ Loop_imc:
vst1.32 {q0},[r0],r4
vst1.32 {q1},[r2]!
cmp r0,r2
bhi Loop_imc
bhi .Loop_imc
vld1.32 {q0},[r2]
.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
vst1.32 {q0},[r0]
eor r0,r0,r0 @ return value
Ldec_key_abort:
.Ldec_key_abort:
ldmia sp!,{r4,pc}
.globl _aes_hw_encrypt
.private_extern _aes_hw_encrypt
#ifdef __thumb2__
.thumb_func _aes_hw_encrypt
#endif
.size aes_hw_set_decrypt_key,.-aes_hw_set_decrypt_key
.globl aes_hw_encrypt
.hidden aes_hw_encrypt
.type aes_hw_encrypt,%function
.align 5
_aes_hw_encrypt:
aes_hw_encrypt:
AARCH64_VALID_CALL_TARGET
ldr r3,[r2,#240]
vld1.32 {q0},[r2]!
vld1.8 {q2},[r0]
sub r3,r3,#2
vld1.32 {q1},[r2]!
Loop_enc:
.Loop_enc:
.byte 0x00,0x43,0xb0,0xf3 @ aese q2,q0
.byte 0x84,0x43,0xb0,0xf3 @ aesmc q2,q2
vld1.32 {q0},[r2]!
@ -262,7 +249,7 @@ Loop_enc:
.byte 0x02,0x43,0xb0,0xf3 @ aese q2,q1
.byte 0x84,0x43,0xb0,0xf3 @ aesmc q2,q2
vld1.32 {q1},[r2]!
bgt Loop_enc
bgt .Loop_enc
.byte 0x00,0x43,0xb0,0xf3 @ aese q2,q0
.byte 0x84,0x43,0xb0,0xf3 @ aesmc q2,q2
@ -272,21 +259,20 @@ Loop_enc:
vst1.8 {q2},[r1]
bx lr
.globl _aes_hw_decrypt
.private_extern _aes_hw_decrypt
#ifdef __thumb2__
.thumb_func _aes_hw_decrypt
#endif
.size aes_hw_encrypt,.-aes_hw_encrypt
.globl aes_hw_decrypt
.hidden aes_hw_decrypt
.type aes_hw_decrypt,%function
.align 5
_aes_hw_decrypt:
aes_hw_decrypt:
AARCH64_VALID_CALL_TARGET
ldr r3,[r2,#240]
vld1.32 {q0},[r2]!
vld1.8 {q2},[r0]
sub r3,r3,#2
vld1.32 {q1},[r2]!
Loop_dec:
.Loop_dec:
.byte 0x40,0x43,0xb0,0xf3 @ aesd q2,q0
.byte 0xc4,0x43,0xb0,0xf3 @ aesimc q2,q2
vld1.32 {q0},[r2]!
@ -294,7 +280,7 @@ Loop_dec:
.byte 0x42,0x43,0xb0,0xf3 @ aesd q2,q1
.byte 0xc4,0x43,0xb0,0xf3 @ aesimc q2,q2
vld1.32 {q1},[r2]!
bgt Loop_dec
bgt .Loop_dec
.byte 0x40,0x43,0xb0,0xf3 @ aesd q2,q0
.byte 0xc4,0x43,0xb0,0xf3 @ aesimc q2,q2
@ -304,21 +290,19 @@ Loop_dec:
vst1.8 {q2},[r1]
bx lr
.globl _aes_hw_cbc_encrypt
.private_extern _aes_hw_cbc_encrypt
#ifdef __thumb2__
.thumb_func _aes_hw_cbc_encrypt
#endif
.size aes_hw_decrypt,.-aes_hw_decrypt
.globl aes_hw_cbc_encrypt
.hidden aes_hw_cbc_encrypt
.type aes_hw_cbc_encrypt,%function
.align 5
_aes_hw_cbc_encrypt:
aes_hw_cbc_encrypt:
mov ip,sp
stmdb sp!,{r4,r5,r6,r7,r8,lr}
vstmdb sp!,{d8,d9,d10,d11,d12,d13,d14,d15} @ ABI specification says so
ldmia ip,{r4,r5} @ load remaining args
subs r2,r2,#16
mov r8,#16
blo Lcbc_abort
blo .Lcbc_abort
moveq r8,#0
cmp r5,#0 @ en- or decrypting?
@ -338,12 +322,12 @@ _aes_hw_cbc_encrypt:
add r7,r3,#32
mov r6,r5
beq Lcbc_dec
beq .Lcbc_dec
cmp r5,#2
veor q0,q0,q6
veor q5,q8,q7
beq Lcbc_enc128
beq .Lcbc_enc128
vld1.32 {q2,q3},[r7]
add r7,r3,#16
@ -353,14 +337,14 @@ _aes_hw_cbc_encrypt:
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
add r14,r3,#16*6
add r3,r3,#16*7
b Lenter_cbc_enc
b .Lenter_cbc_enc
.align 4
Loop_cbc_enc:
.Loop_cbc_enc:
.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
vst1.8 {q6},[r1]!
Lenter_cbc_enc:
.Lenter_cbc_enc:
.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
.byte 0x04,0x03,0xb0,0xf3 @ aese q0,q2
@ -370,7 +354,7 @@ Lenter_cbc_enc:
.byte 0x06,0x03,0xb0,0xf3 @ aese q0,q3
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
vld1.32 {q9},[r12]
beq Lcbc_enc192
beq .Lcbc_enc192
.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
@ -380,7 +364,7 @@ Lenter_cbc_enc:
vld1.32 {q9},[r3]
nop
Lcbc_enc192:
.Lcbc_enc192:
.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
subs r2,r2,#16
@ -402,22 +386,22 @@ Lcbc_enc192:
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
.byte 0x2e,0x03,0xb0,0xf3 @ aese q0,q15
veor q6,q0,q7
bhs Loop_cbc_enc
bhs .Loop_cbc_enc
vst1.8 {q6},[r1]!
b Lcbc_done
b .Lcbc_done
.align 5
Lcbc_enc128:
.Lcbc_enc128:
vld1.32 {q2,q3},[r7]
.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
b Lenter_cbc_enc128
Loop_cbc_enc128:
b .Lenter_cbc_enc128
.Loop_cbc_enc128:
.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
vst1.8 {q6},[r1]!
Lenter_cbc_enc128:
.Lenter_cbc_enc128:
.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
subs r2,r2,#16
@ -440,19 +424,19 @@ Lenter_cbc_enc128:
veor q8,q8,q5
.byte 0x2e,0x03,0xb0,0xf3 @ aese q0,q15
veor q6,q0,q7
bhs Loop_cbc_enc128
bhs .Loop_cbc_enc128
vst1.8 {q6},[r1]!
b Lcbc_done
b .Lcbc_done
.align 5
Lcbc_dec:
.Lcbc_dec:
vld1.8 {q10},[r0]!
subs r2,r2,#32 @ bias
add r6,r5,#2
vorr q3,q0,q0
vorr q1,q0,q0
vorr q11,q10,q10
blo Lcbc_dec_tail
blo .Lcbc_dec_tail
vorr q1,q10,q10
vld1.8 {q10},[r0]!
@ -460,7 +444,7 @@ Lcbc_dec:
vorr q3,q1,q1
vorr q11,q10,q10
Loop3x_cbc_dec:
.Loop3x_cbc_dec:
.byte 0x60,0x03,0xb0,0xf3 @ aesd q0,q8
.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
.byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8
@ -476,7 +460,7 @@ Loop3x_cbc_dec:
.byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
vld1.32 {q9},[r7]!
bgt Loop3x_cbc_dec
bgt .Loop3x_cbc_dec
.byte 0x60,0x03,0xb0,0xf3 @ aesd q0,q8
.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
@ -536,13 +520,13 @@ Loop3x_cbc_dec:
vorr q1,q3,q3
vst1.8 {q10},[r1]!
vorr q10,q11,q11
bhs Loop3x_cbc_dec
bhs .Loop3x_cbc_dec
cmn r2,#0x30
beq Lcbc_done
beq .Lcbc_done
nop
Lcbc_dec_tail:
.Lcbc_dec_tail:
.byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
.byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8
@ -554,7 +538,7 @@ Lcbc_dec_tail:
.byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
vld1.32 {q9},[r7]!
bgt Lcbc_dec_tail
bgt .Lcbc_dec_tail
.byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
@ -581,32 +565,30 @@ Lcbc_dec_tail:
veor q9,q3,q7
.byte 0x6e,0x23,0xb0,0xf3 @ aesd q1,q15
.byte 0x6e,0x43,0xf0,0xf3 @ aesd q10,q15
beq Lcbc_dec_one
beq .Lcbc_dec_one
veor q5,q5,q1
veor q9,q9,q10
vorr q6,q11,q11
vst1.8 {q5},[r1]!
vst1.8 {q9},[r1]!
b Lcbc_done
b .Lcbc_done
Lcbc_dec_one:
.Lcbc_dec_one:
veor q5,q5,q10
vorr q6,q11,q11
vst1.8 {q5},[r1]!
Lcbc_done:
.Lcbc_done:
vst1.8 {q6},[r4]
Lcbc_abort:
.Lcbc_abort:
vldmia sp!,{d8,d9,d10,d11,d12,d13,d14,d15}
ldmia sp!,{r4,r5,r6,r7,r8,pc}
.globl _aes_hw_ctr32_encrypt_blocks
.private_extern _aes_hw_ctr32_encrypt_blocks
#ifdef __thumb2__
.thumb_func _aes_hw_ctr32_encrypt_blocks
#endif
.size aes_hw_cbc_encrypt,.-aes_hw_cbc_encrypt
.globl aes_hw_ctr32_encrypt_blocks
.hidden aes_hw_ctr32_encrypt_blocks
.type aes_hw_ctr32_encrypt_blocks,%function
.align 5
_aes_hw_ctr32_encrypt_blocks:
aes_hw_ctr32_encrypt_blocks:
mov ip,sp
stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,lr}
vstmdb sp!,{d8,d9,d10,d11,d12,d13,d14,d15} @ ABI specification says so
@ -628,24 +610,38 @@ _aes_hw_ctr32_encrypt_blocks:
add r7,r3,#32
mov r6,r5
movlo r12,#0
@ ARM Cortex-A57 and Cortex-A72 cores running in 32-bit mode are
@ affected by silicon errata #1742098 [0] and #1655431 [1],
@ respectively, where the second instruction of an aese/aesmc
@ instruction pair may execute twice if an interrupt is taken right
@ after the first instruction consumes an input register of which a
@ single 32-bit lane has been updated the last time it was modified.
@
@ This function uses a counter in one 32-bit lane. The
@ could write to q1 and q10 directly, but that trips this bugs.
@ We write to q6 and copy to the final register as a workaround.
@
@ [0] ARM-EPM-049219 v23 Cortex-A57 MPCore Software Developers Errata Notice
@ [1] ARM-EPM-012079 v11.0 Cortex-A72 MPCore Software Developers Errata Notice
#ifndef __ARMEB__
rev r8, r8
#endif
vorr q1,q0,q0
add r10, r8, #1
vorr q10,q0,q0
add r8, r8, #2
vorr q6,q0,q0
rev r10, r10
vmov.32 d3[1],r10
bls Lctr32_tail
vmov.32 d13[1],r10
add r8, r8, #2
vorr q1,q6,q6
bls .Lctr32_tail
rev r12, r8
vmov.32 d13[1],r12
sub r2,r2,#3 @ bias
vmov.32 d21[1],r12
b Loop3x_ctr32
vorr q10,q6,q6
b .Loop3x_ctr32
.align 4
Loop3x_ctr32:
.Loop3x_ctr32:
.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
.byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8
@ -661,18 +657,18 @@ Loop3x_ctr32:
.byte 0x22,0x43,0xf0,0xf3 @ aese q10,q9
.byte 0xa4,0x43,0xf0,0xf3 @ aesmc q10,q10
vld1.32 {q9},[r7]!
bgt Loop3x_ctr32
bgt .Loop3x_ctr32
.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
.byte 0x80,0x83,0xb0,0xf3 @ aesmc q4,q0
.byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8
.byte 0x82,0xa3,0xb0,0xf3 @ aesmc q5,q1
vld1.8 {q2},[r0]!
vorr q0,q6,q6
add r9,r8,#1
.byte 0x20,0x43,0xf0,0xf3 @ aese q10,q8
.byte 0xa4,0x43,0xf0,0xf3 @ aesmc q10,q10
vld1.8 {q3},[r0]!
vorr q1,q6,q6
rev r9,r9
.byte 0x22,0x83,0xb0,0xf3 @ aese q4,q9
.byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4
.byte 0x22,0xa3,0xb0,0xf3 @ aese q5,q9
@ -681,8 +677,6 @@ Loop3x_ctr32:
mov r7,r3
.byte 0x22,0x43,0xf0,0xf3 @ aese q10,q9
.byte 0xa4,0x23,0xf0,0xf3 @ aesmc q9,q10
vorr q10,q6,q6
add r9,r8,#1
.byte 0x28,0x83,0xb0,0xf3 @ aese q4,q12
.byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4
.byte 0x28,0xa3,0xb0,0xf3 @ aese q5,q12
@ -697,21 +691,26 @@ Loop3x_ctr32:
.byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4
.byte 0x2a,0xa3,0xb0,0xf3 @ aese q5,q13
.byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5
@ Note the logic to update q0, q1, and q1 is written to work
@ around a bug in ARM Cortex-A57 and Cortex-A72 cores running in
@ 32-bit mode. See the comment above.
veor q11,q11,q7
rev r9,r9
vmov.32 d13[1], r9
.byte 0x2a,0x23,0xf0,0xf3 @ aese q9,q13
.byte 0xa2,0x23,0xf0,0xf3 @ aesmc q9,q9
vmov.32 d1[1], r9
vorr q0,q6,q6
rev r10,r10
.byte 0x2c,0x83,0xb0,0xf3 @ aese q4,q14
.byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4
vmov.32 d13[1], r10
rev r12,r8
.byte 0x2c,0xa3,0xb0,0xf3 @ aese q5,q14
.byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5
vmov.32 d3[1], r10
rev r12,r8
vorr q1,q6,q6
vmov.32 d13[1], r12
.byte 0x2c,0x23,0xf0,0xf3 @ aese q9,q14
.byte 0xa2,0x23,0xf0,0xf3 @ aesmc q9,q9
vmov.32 d21[1], r12
vorr q10,q6,q6
subs r2,r2,#3
.byte 0x2e,0x83,0xb0,0xf3 @ aese q4,q15
.byte 0x2e,0xa3,0xb0,0xf3 @ aese q5,q15
@ -726,15 +725,15 @@ Loop3x_ctr32:
veor q11,q11,q9
vld1.32 {q9},[r7]! @ re-pre-load rndkey[1]
vst1.8 {q11},[r1]!
bhs Loop3x_ctr32
bhs .Loop3x_ctr32
adds r2,r2,#3
beq Lctr32_done
beq .Lctr32_done
cmp r2,#1
mov r12,#16
moveq r12,#0
Lctr32_tail:
.Lctr32_tail:
.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
.byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8
@ -746,7 +745,7 @@ Lctr32_tail:
.byte 0x22,0x23,0xb0,0xf3 @ aese q1,q9
.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1
vld1.32 {q9},[r7]!
bgt Lctr32_tail
bgt .Lctr32_tail
.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
@ -779,12 +778,12 @@ Lctr32_tail:
veor q2,q2,q0
veor q3,q3,q1
vst1.8 {q2},[r1]!
beq Lctr32_done
beq .Lctr32_done
vst1.8 {q3},[r1]
Lctr32_done:
.Lctr32_done:
vldmia sp!,{d8,d9,d10,d11,d12,d13,d14,d15}
ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,pc}
.size aes_hw_ctr32_encrypt_blocks,.-aes_hw_ctr32_encrypt_blocks
#endif
#endif // !OPENSSL_NO_ASM
#endif // !OPENSSL_NO_ASM && defined(OPENSSL_ARM) && defined(__ELF__)

View File

@ -1,22 +1,14 @@
// This file is generated from a similarly-named Perl script in the BoringSSL
// source tree. Do not edit by hand.
#if !defined(__has_feature)
#define __has_feature(x) 0
#endif
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
#define OPENSSL_NO_ASM
#endif
#include <openssl/asm_base.h>
#if !defined(OPENSSL_NO_ASM)
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_ARM) && defined(__ELF__)
#include <openssl/arm_arch.h>
@ Silence ARMv8 deprecated IT instruction warnings. This file is used by both
@ ARMv7 and ARMv8 processors and does not use ARMv8 instructions.
.arch armv7-a
.text
#if defined(__thumb2__)
@ -26,40 +18,14 @@
.code 32
#endif
#if __ARM_MAX_ARCH__>=7
.align 5
LOPENSSL_armcap:
.word OPENSSL_armcap_P-Lbn_mul_mont
#endif
.globl _bn_mul_mont
.private_extern _bn_mul_mont
#ifdef __thumb2__
.thumb_func _bn_mul_mont
#endif
.globl bn_mul_mont_nohw
.hidden bn_mul_mont_nohw
.type bn_mul_mont_nohw,%function
.align 5
_bn_mul_mont:
Lbn_mul_mont:
bn_mul_mont_nohw:
ldr ip,[sp,#4] @ load num
stmdb sp!,{r0,r2} @ sp points at argument block
#if __ARM_MAX_ARCH__>=7
tst ip,#7
bne Lialu
adr r0,Lbn_mul_mont
ldr r2,LOPENSSL_armcap
ldr r0,[r0,r2]
#ifdef __APPLE__
ldr r0,[r0]
#endif
tst r0,#ARMV7_NEON @ NEON available?
ldmia sp, {r0,r2}
beq Lialu
add sp,sp,#8
b bn_mul8x_mont_neon
.align 4
Lialu:
#endif
cmp ip,#2
mov r0,ip @ load num
#ifdef __thumb2__
@ -67,7 +33,7 @@ Lialu:
#endif
movlt r0,#0
addlt sp,sp,#2*4
blt Labrt
blt .Labrt
stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr} @ save 10 registers
@ -92,7 +58,7 @@ Lialu:
umlal r10,r12,r6,r8 @ np[0]*n0+"t[0]"
mov r4,sp
L1st:
.L1st:
ldr r5,[r1],#4 @ ap[j],ap++
mov r10,r11
ldr r6,[r3],#4 @ np[j],np++
@ -104,7 +70,7 @@ L1st:
str r12,[r4],#4 @ tp[j-1]=,tp++
adc r12,r14,#0
cmp r4,r0
bne L1st
bne .L1st
adds r12,r12,r11
ldr r4,[r0,#13*4] @ restore bp
@ -115,7 +81,7 @@ L1st:
mov r7,sp
str r14,[r0,#4] @ tp[num]=
Louter:
.Louter:
sub r7,r0,r7 @ "original" r0-1 value
sub r1,r1,r7 @ "rewind" ap to &ap[1]
ldr r2,[r4,#4]! @ *(++bp)
@ -133,7 +99,7 @@ Louter:
umlal r10,r12,r6,r8 @ np[0]*n0+"tp[0]"
mov r4,sp
Linner:
.Linner:
ldr r5,[r1],#4 @ ap[j],ap++
adds r10,r11,r7 @ +=tp[j]
ldr r6,[r3],#4 @ np[j],np++
@ -147,7 +113,7 @@ Linner:
str r12,[r4],#4 @ tp[j-1]=,tp++
adc r12,r14,#0
cmp r4,r0
bne Linner
bne .Linner
adds r12,r12,r11
mov r14,#0
@ -165,7 +131,7 @@ Linner:
itt ne
#endif
movne r7,sp
bne Louter
bne .Louter
ldr r2,[r0,#12*4] @ pull rp
mov r5,sp
@ -176,17 +142,17 @@ Linner:
sub r3,r3,r5 @ "rewind" r3 to &np[0]
subs r7,r7,r7 @ "clear" carry flag
Lsub: ldr r7,[r4],#4
.Lsub: ldr r7,[r4],#4
ldr r6,[r3],#4
sbcs r7,r7,r6 @ tp[j]-np[j]
str r7,[r2],#4 @ rp[j]=
teq r4,r0 @ preserve carry
bne Lsub
bne .Lsub
sbcs r14,r14,#0 @ upmost carry
mov r4,sp @ "rewind" r4
sub r2,r2,r5 @ "rewind" r2
Lcopy: ldr r7,[r4] @ conditional copy
.Lcopy: ldr r7,[r4] @ conditional copy
ldr r5,[r2]
str sp,[r4],#4 @ zap tp
#ifdef __thumb2__
@ -195,29 +161,29 @@ Lcopy: ldr r7,[r4] @ conditional copy
movcc r5,r7
str r5,[r2],#4
teq r4,r0 @ preserve carry
bne Lcopy
bne .Lcopy
mov sp,r0
add sp,sp,#4 @ skip over tp[num+1]
ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr} @ restore registers
add sp,sp,#2*4 @ skip over {r0,r2}
mov r0,#1
Labrt:
#if __ARM_ARCH__>=5
.Labrt:
#if __ARM_ARCH>=5
bx lr @ bx lr
#else
tst lr,#1
moveq pc,lr @ be binary compatible with V4, yet
.word 0xe12fff1e @ interoperable with Thumb ISA:-)
#endif
.size bn_mul_mont_nohw,.-bn_mul_mont_nohw
#if __ARM_MAX_ARCH__>=7
.arch armv7-a
.fpu neon
#ifdef __thumb2__
.thumb_func bn_mul8x_mont_neon
#endif
.globl bn_mul8x_mont_neon
.hidden bn_mul8x_mont_neon
.type bn_mul8x_mont_neon,%function
.align 5
bn_mul8x_mont_neon:
mov ip,sp
@ -227,7 +193,7 @@ bn_mul8x_mont_neon:
mov ip,sp
cmp r5,#8
bhi LNEON_8n
bhi .LNEON_8n
@ special case for r5==8, everything is in register bank...
@ -280,10 +246,10 @@ bn_mul8x_mont_neon:
veor q13,q13
vshr.u64 d10,d10,#16
b LNEON_outer8
b .LNEON_outer8
.align 4
LNEON_outer8:
.LNEON_outer8:
vld1.32 {d28[0]}, [r2,:32]!
veor d8,d8,d8
vzip.16 d28,d8
@ -328,7 +294,7 @@ LNEON_outer8:
veor q13,q13
vshr.u64 d10,d10,#16
bne LNEON_outer8
bne .LNEON_outer8
vadd.u64 d12,d12,d10
mov r7,sp
@ -339,10 +305,10 @@ LNEON_outer8:
vshr.u64 d10,d13,#16
vzip.16 d12,d13
b LNEON_tail_entry
b .LNEON_tail_entry
.align 4
LNEON_8n:
.LNEON_8n:
veor q6,q6,q6
sub r7,sp,#128
veor q7,q7,q7
@ -358,23 +324,23 @@ LNEON_8n:
veor q12,q12,q12
veor q13,q13,q13
LNEON_8n_init:
.LNEON_8n_init:
vst1.64 {q6,q7},[r7,:256]!
subs r8,r8,#8
vst1.64 {q8,q9},[r7,:256]!
vst1.64 {q10,q11},[r7,:256]!
vst1.64 {q12,q13},[r7,:256]!
bne LNEON_8n_init
bne .LNEON_8n_init
add r6,sp,#256
vld1.32 {d0,d1,d2,d3},[r1]!
add r10,sp,#8
vld1.32 {d30[0]},[r4,:32]
mov r9,r5
b LNEON_8n_outer
b .LNEON_8n_outer
.align 4
LNEON_8n_outer:
.LNEON_8n_outer:
vld1.32 {d28[0]},[r2,:32]! @ *b++
veor d8,d8,d8
vzip.16 d28,d8
@ -629,10 +595,10 @@ LNEON_8n_outer:
vst1.32 {d29},[r10,:64] @ put aside smashed m[8*i+7]
add r10,sp,#8 @ rewind
sub r8,r5,#8
b LNEON_8n_inner
b .LNEON_8n_inner
.align 4
LNEON_8n_inner:
.LNEON_8n_inner:
subs r8,r8,#8
vmlal.u32 q6,d28,d0[0]
vld1.64 {q13},[r6,:128]
@ -816,7 +782,7 @@ LNEON_8n_inner:
vst1.64 {q13},[r7,:128]!
vmlal.u32 q12,d29,d7[1]
bne LNEON_8n_inner
bne .LNEON_8n_inner
add r6,sp,#128
vst1.64 {q6,q7},[r7,:256]!
veor q2,q2,q2 @ d4-d5
@ -833,7 +799,7 @@ LNEON_8n_inner:
itt ne
subne r3,r3,r5,lsl#2 @ rewind
bne LNEON_8n_outer
bne .LNEON_8n_outer
add r7,sp,#128
vst1.64 {q2,q3}, [sp,:256]! @ start wiping stack frame
@ -846,10 +812,10 @@ LNEON_8n_inner:
vzip.16 d12,d13
mov r8,r5
b LNEON_tail_entry
b .LNEON_tail_entry
.align 4
LNEON_tail:
.LNEON_tail:
vadd.u64 d12,d12,d10
vshr.u64 d10,d12,#16
vld1.64 {q8,q9}, [r6, :256]!
@ -859,7 +825,7 @@ LNEON_tail:
vld1.64 {q12,q13}, [r6, :256]!
vzip.16 d12,d13
LNEON_tail_entry:
.LNEON_tail_entry:
vadd.u64 d14,d14,d10
vst1.32 {d12[0]}, [r7, :32]!
vshr.u64 d10,d14,#16
@ -905,14 +871,14 @@ LNEON_tail_entry:
vld1.64 {q6,q7}, [r6, :256]!
subs r8,r8,#8
vst1.32 {d26[0]}, [r7, :32]!
bne LNEON_tail
bne .LNEON_tail
vst1.32 {d10[0]}, [r7, :32] @ top-most bit
sub r3,r3,r5,lsl#2 @ rewind r3
subs r1,sp,#0 @ clear carry flag
add r2,sp,r5,lsl#2
LNEON_sub:
.LNEON_sub:
ldmia r1!, {r4,r5,r6,r7}
ldmia r3!, {r8,r9,r10,r11}
sbcs r8, r4,r8
@ -921,7 +887,7 @@ LNEON_sub:
sbcs r11,r7,r11
teq r1,r2 @ preserves carry
stmia r0!, {r8,r9,r10,r11}
bne LNEON_sub
bne .LNEON_sub
ldr r10, [r1] @ load top-most bit
mov r11,sp
@ -933,7 +899,7 @@ LNEON_sub:
mov r3,r2 @ second 3/4th of frame
sbcs r10,r10,#0 @ result is carry flag
LNEON_copy_n_zap:
.LNEON_copy_n_zap:
ldmia r1!, {r4,r5,r6,r7}
ldmia r0, {r8,r9,r10,r11}
it cc
@ -960,23 +926,14 @@ LNEON_copy_n_zap:
movcc r11,r7
teq r1,r2 @ preserves carry
stmia r0!, {r8,r9,r10,r11}
bne LNEON_copy_n_zap
bne .LNEON_copy_n_zap
mov sp,ip
vldmia sp!,{d8,d9,d10,d11,d12,d13,d14,d15}
ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11}
bx lr @ bx lr
.size bn_mul8x_mont_neon,.-bn_mul8x_mont_neon
#endif
.byte 77,111,110,116,103,111,109,101,114,121,32,109,117,108,116,105,112,108,105,99,97,116,105,111,110,32,102,111,114,32,65,82,77,118,52,47,78,69,79,78,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.align 2
.align 2
#if __ARM_MAX_ARCH__>=7
.comm _OPENSSL_armcap_P,4
.non_lazy_symbol_pointer
OPENSSL_armcap_P:
.indirect_symbol _OPENSSL_armcap_P
.long 0
.private_extern _OPENSSL_armcap_P
#endif
#endif // !OPENSSL_NO_ASM
#endif // !OPENSSL_NO_ASM && defined(OPENSSL_ARM) && defined(__ELF__)

View File

@ -1,17 +1,9 @@
// This file is generated from a similarly-named Perl script in the BoringSSL
// source tree. Do not edit by hand.
#if !defined(__has_feature)
#define __has_feature(x) 0
#endif
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
#define OPENSSL_NO_ASM
#endif
#include <openssl/asm_base.h>
#if !defined(OPENSSL_NO_ASM)
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_ARM) && defined(__ELF__)
@ Copyright 2012-2016 The OpenSSL Project Authors. All Rights Reserved.
@
@ Licensed under the OpenSSL license (the "License"). You may not use
@ -75,7 +67,6 @@
# define VFP_ABI_FRAME 0
# define BSAES_ASM_EXTENDED_KEY
# define XTS_CHAIN_TWEAK
# define __ARM_ARCH__ __LINUX_ARM_ARCH__
# define __ARM_MAX_ARCH__ 7
#endif
@ -84,8 +75,8 @@
#endif
#if __ARM_MAX_ARCH__>=7
.arch armv7-a
.fpu neon
.text
.syntax unified @ ARMv7-capable assembler is expected to handle this
@ -96,20 +87,18 @@
# undef __thumb2__
#endif
#ifdef __thumb2__
.thumb_func _bsaes_decrypt8
#endif
.type _bsaes_decrypt8,%function
.align 4
_bsaes_decrypt8:
adr r6,.
vldmia r4!, {q9} @ round 0 key
#if defined(__thumb2__) || defined(__APPLE__)
adr r6,LM0ISR
adr r6,.LM0ISR
#else
add r6,r6,#LM0ISR-_bsaes_decrypt8
add r6,r6,#.LM0ISR-_bsaes_decrypt8
#endif
vldmia r6!, {q8} @ LM0ISR
vldmia r6!, {q8} @ .LM0ISR
veor q10, q0, q9 @ xor with round0 key
veor q11, q1, q9
vtbl.8 d0, {q10}, d16
@ -134,8 +123,8 @@ _bsaes_decrypt8:
vtbl.8 d13, {q10}, d17
vtbl.8 d14, {q11}, d16
vtbl.8 d15, {q11}, d17
vmov.i8 q8,#0x55 @ compose LBS0
vmov.i8 q9,#0x33 @ compose LBS1
vmov.i8 q8,#0x55 @ compose .LBS0
vmov.i8 q9,#0x33 @ compose .LBS1
vshr.u64 q10, q6, #1
vshr.u64 q11, q4, #1
veor q10, q10, q7
@ -160,7 +149,7 @@ _bsaes_decrypt8:
vshl.u64 q11, q11, #1
veor q2, q2, q10
veor q0, q0, q11
vmov.i8 q8,#0x0f @ compose LBS2
vmov.i8 q8,#0x0f @ compose .LBS2
vshr.u64 q10, q5, #2
vshr.u64 q11, q4, #2
veor q10, q10, q7
@ -210,9 +199,9 @@ _bsaes_decrypt8:
veor q1, q1, q10
veor q0, q0, q11
sub r5,r5,#1
b Ldec_sbox
b .Ldec_sbox
.align 4
Ldec_loop:
.Ldec_loop:
vldmia r4!, {q8,q9,q10,q11}
veor q8, q8, q0
veor q9, q9, q1
@ -242,7 +231,7 @@ Ldec_loop:
vtbl.8 d13, {q10}, d25
vtbl.8 d14, {q11}, d24
vtbl.8 d15, {q11}, d25
Ldec_sbox:
.Ldec_sbox:
veor q1, q1, q4
veor q3, q3, q4
@ -391,7 +380,7 @@ Ldec_sbox:
veor q4, q4, q0
veor q7, q7, q3
subs r5,r5,#1
bcc Ldec_done
bcc .Ldec_done
@ multiplication by 0x05-0x00-0x04-0x00
vext.8 q8, q0, q0, #8
vext.8 q14, q3, q3, #8
@ -470,16 +459,16 @@ Ldec_sbox:
veor q2, q3, q10
vmov q3, q11
@ vmov q5, q9
vldmia r6, {q12} @ LISR
vldmia r6, {q12} @ .LISR
ite eq @ Thumb2 thing, sanity check in ARM
addeq r6,r6,#0x10
bne Ldec_loop
vldmia r6, {q12} @ LISRM0
b Ldec_loop
bne .Ldec_loop
vldmia r6, {q12} @ .LISRM0
b .Ldec_loop
.align 4
Ldec_done:
vmov.i8 q8,#0x55 @ compose LBS0
vmov.i8 q9,#0x33 @ compose LBS1
.Ldec_done:
vmov.i8 q8,#0x55 @ compose .LBS0
vmov.i8 q9,#0x33 @ compose .LBS1
vshr.u64 q10, q3, #1
vshr.u64 q11, q2, #1
veor q10, q10, q5
@ -504,7 +493,7 @@ Ldec_done:
vshl.u64 q11, q11, #1
veor q6, q6, q10
veor q0, q0, q11
vmov.i8 q8,#0x0f @ compose LBS2
vmov.i8 q8,#0x0f @ compose .LBS2
vshr.u64 q10, q7, #2
vshr.u64 q11, q2, #2
veor q10, q10, q5
@ -563,46 +552,44 @@ Ldec_done:
veor q0, q0, q8
veor q1, q1, q8
bx lr
.size _bsaes_decrypt8,.-_bsaes_decrypt8
.type _bsaes_const,%object
.align 6
_bsaes_const:
LM0ISR:@ InvShiftRows constants
.LM0ISR:@ InvShiftRows constants
.quad 0x0a0e0206070b0f03, 0x0004080c0d010509
LISR:
.LISR:
.quad 0x0504070602010003, 0x0f0e0d0c080b0a09
LISRM0:
.LISRM0:
.quad 0x01040b0e0205080f, 0x0306090c00070a0d
LM0SR:@ ShiftRows constants
.LM0SR:@ ShiftRows constants
.quad 0x0a0e02060f03070b, 0x0004080c05090d01
LSR:
.LSR:
.quad 0x0504070600030201, 0x0f0e0d0c0a09080b
LSRM0:
.LSRM0:
.quad 0x0304090e00050a0f, 0x01060b0c0207080d
LM0:
.LM0:
.quad 0x02060a0e03070b0f, 0x0004080c0105090d
LREVM0SR:
.LREVM0SR:
.quad 0x090d01050c000408, 0x03070b0f060a0e02
.byte 66,105,116,45,115,108,105,99,101,100,32,65,69,83,32,102,111,114,32,78,69,79,78,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.align 2
.align 6
.size _bsaes_const,.-_bsaes_const
#ifdef __thumb2__
.thumb_func _bsaes_encrypt8
#endif
.type _bsaes_encrypt8,%function
.align 4
_bsaes_encrypt8:
adr r6,.
vldmia r4!, {q9} @ round 0 key
#if defined(__thumb2__) || defined(__APPLE__)
adr r6,LM0SR
adr r6,.LM0SR
#else
sub r6,r6,#_bsaes_encrypt8-LM0SR
sub r6,r6,#_bsaes_encrypt8-.LM0SR
#endif
vldmia r6!, {q8} @ LM0SR
vldmia r6!, {q8} @ .LM0SR
_bsaes_encrypt8_alt:
veor q10, q0, q9 @ xor with round0 key
veor q11, q1, q9
@ -629,8 +616,8 @@ _bsaes_encrypt8_alt:
vtbl.8 d14, {q11}, d16
vtbl.8 d15, {q11}, d17
_bsaes_encrypt8_bitslice:
vmov.i8 q8,#0x55 @ compose LBS0
vmov.i8 q9,#0x33 @ compose LBS1
vmov.i8 q8,#0x55 @ compose .LBS0
vmov.i8 q9,#0x33 @ compose .LBS1
vshr.u64 q10, q6, #1
vshr.u64 q11, q4, #1
veor q10, q10, q7
@ -655,7 +642,7 @@ _bsaes_encrypt8_bitslice:
vshl.u64 q11, q11, #1
veor q2, q2, q10
veor q0, q0, q11
vmov.i8 q8,#0x0f @ compose LBS2
vmov.i8 q8,#0x0f @ compose .LBS2
vshr.u64 q10, q5, #2
vshr.u64 q11, q4, #2
veor q10, q10, q7
@ -705,9 +692,9 @@ _bsaes_encrypt8_bitslice:
veor q1, q1, q10
veor q0, q0, q11
sub r5,r5,#1
b Lenc_sbox
b .Lenc_sbox
.align 4
Lenc_loop:
.Lenc_loop:
vldmia r4!, {q8,q9,q10,q11}
veor q8, q8, q0
veor q9, q9, q1
@ -737,7 +724,7 @@ Lenc_loop:
vtbl.8 d13, {q10}, d25
vtbl.8 d14, {q11}, d24
vtbl.8 d15, {q11}, d25
Lenc_sbox:
.Lenc_sbox:
veor q2, q2, q1
veor q5, q5, q6
veor q3, q3, q0
@ -885,7 +872,7 @@ Lenc_sbox:
veor q6, q6, q3
subs r5,r5,#1
bcc Lenc_done
bcc .Lenc_done
vext.8 q8, q0, q0, #12 @ x0 <<< 32
vext.8 q9, q1, q1, #12
veor q0, q0, q8 @ x0 ^ (x0 <<< 32)
@ -932,16 +919,16 @@ Lenc_sbox:
@ vmov q4, q8
veor q2, q2, q10
@ vmov q5, q9
vldmia r6, {q12} @ LSR
vldmia r6, {q12} @ .LSR
ite eq @ Thumb2 thing, samity check in ARM
addeq r6,r6,#0x10
bne Lenc_loop
vldmia r6, {q12} @ LSRM0
b Lenc_loop
bne .Lenc_loop
vldmia r6, {q12} @ .LSRM0
b .Lenc_loop
.align 4
Lenc_done:
vmov.i8 q8,#0x55 @ compose LBS0
vmov.i8 q9,#0x33 @ compose LBS1
.Lenc_done:
vmov.i8 q8,#0x55 @ compose .LBS0
vmov.i8 q9,#0x33 @ compose .LBS1
vshr.u64 q10, q2, #1
vshr.u64 q11, q3, #1
veor q10, q10, q5
@ -966,7 +953,7 @@ Lenc_done:
vshl.u64 q11, q11, #1
veor q4, q4, q10
veor q0, q0, q11
vmov.i8 q8,#0x0f @ compose LBS2
vmov.i8 q8,#0x0f @ compose .LBS2
vshr.u64 q10, q7, #2
vshr.u64 q11, q3, #2
veor q10, q10, q5
@ -1025,18 +1012,16 @@ Lenc_done:
veor q0, q0, q8
veor q1, q1, q8
bx lr
#ifdef __thumb2__
.thumb_func _bsaes_key_convert
#endif
.size _bsaes_encrypt8,.-_bsaes_encrypt8
.type _bsaes_key_convert,%function
.align 4
_bsaes_key_convert:
adr r6,.
vld1.8 {q7}, [r4]! @ load round 0 key
#if defined(__thumb2__) || defined(__APPLE__)
adr r6,LM0
adr r6,.LM0
#else
sub r6,r6,#_bsaes_key_convert-LM0
sub r6,r6,#_bsaes_key_convert-.LM0
#endif
vld1.8 {q15}, [r4]! @ load round 1 key
@ -1046,7 +1031,7 @@ _bsaes_key_convert:
vmov.i8 q11, #0x08
vmov.i8 q12, #0x10
vmov.i8 q13, #0x20
vldmia r6, {q14} @ LM0
vldmia r6, {q14} @ .LM0
#ifdef __ARMEL__
vrev32.8 q7, q7
@ -1054,10 +1039,10 @@ _bsaes_key_convert:
#endif
sub r5,r5,#1
vstmia r12!, {q7} @ save round 0 key
b Lkey_loop
b .Lkey_loop
.align 4
Lkey_loop:
.Lkey_loop:
vtbl.8 d14,{q15},d28
vtbl.8 d15,{q15},d29
vmov.i8 q6, #0x40
@ -1081,19 +1066,17 @@ Lkey_loop:
#endif
subs r5,r5,#1
vstmia r12!,{q0,q1,q2,q3,q4,q5,q6,q7} @ write bit-sliced round key
bne Lkey_loop
bne .Lkey_loop
vmov.i8 q7,#0x63 @ compose L63
vmov.i8 q7,#0x63 @ compose .L63
@ don't save last round key
bx lr
.globl _bsaes_cbc_encrypt
.private_extern _bsaes_cbc_encrypt
#ifdef __thumb2__
.thumb_func _bsaes_cbc_encrypt
#endif
.size _bsaes_key_convert,.-_bsaes_key_convert
.globl bsaes_cbc_encrypt
.hidden bsaes_cbc_encrypt
.type bsaes_cbc_encrypt,%function
.align 5
_bsaes_cbc_encrypt:
bsaes_cbc_encrypt:
@ In OpenSSL, this function had a fallback to aes_nohw_cbc_encrypt for
@ short inputs. We patch this out, using bsaes for all input sizes.
@ -1144,12 +1127,12 @@ _bsaes_cbc_encrypt:
#endif
vld1.8 {q15}, [r8] @ load IV
b Lcbc_dec_loop
b .Lcbc_dec_loop
.align 4
Lcbc_dec_loop:
.Lcbc_dec_loop:
subs r2, r2, #0x8
bmi Lcbc_dec_loop_finish
bmi .Lcbc_dec_loop_finish
vld1.8 {q0,q1}, [r0]! @ load input
vld1.8 {q2,q3}, [r0]!
@ -1187,11 +1170,11 @@ Lcbc_dec_loop:
vst1.8 {q3}, [r1]!
vst1.8 {q5}, [r1]!
b Lcbc_dec_loop
b .Lcbc_dec_loop
Lcbc_dec_loop_finish:
.Lcbc_dec_loop_finish:
adds r2, r2, #8
beq Lcbc_dec_done
beq .Lcbc_dec_done
@ Set up most parameters for the _bsaes_decrypt8 call.
#ifndef BSAES_ASM_EXTENDED_KEY
@ -1204,19 +1187,19 @@ Lcbc_dec_loop_finish:
vld1.8 {q0}, [r0]! @ load input
cmp r2, #2
blo Lcbc_dec_one
blo .Lcbc_dec_one
vld1.8 {q1}, [r0]!
beq Lcbc_dec_two
beq .Lcbc_dec_two
vld1.8 {q2}, [r0]!
cmp r2, #4
blo Lcbc_dec_three
blo .Lcbc_dec_three
vld1.8 {q3}, [r0]!
beq Lcbc_dec_four
beq .Lcbc_dec_four
vld1.8 {q4}, [r0]!
cmp r2, #6
blo Lcbc_dec_five
blo .Lcbc_dec_five
vld1.8 {q5}, [r0]!
beq Lcbc_dec_six
beq .Lcbc_dec_six
vld1.8 {q6}, [r0]!
sub r0, r0, #0x70
@ -1240,9 +1223,9 @@ Lcbc_dec_loop_finish:
vst1.8 {q2}, [r1]!
vst1.8 {q7}, [r1]!
vst1.8 {q3}, [r1]!
b Lcbc_dec_done
b .Lcbc_dec_done
.align 4
Lcbc_dec_six:
.Lcbc_dec_six:
sub r0, r0, #0x60
bl _bsaes_decrypt8
vldmia r9,{q14} @ reload IV
@ -1261,9 +1244,9 @@ Lcbc_dec_six:
vst1.8 {q4}, [r1]!
vst1.8 {q2}, [r1]!
vst1.8 {q7}, [r1]!
b Lcbc_dec_done
b .Lcbc_dec_done
.align 4
Lcbc_dec_five:
.Lcbc_dec_five:
sub r0, r0, #0x50
bl _bsaes_decrypt8
vldmia r9, {q14} @ reload IV
@ -1279,9 +1262,9 @@ Lcbc_dec_five:
vst1.8 {q6}, [r1]!
vst1.8 {q4}, [r1]!
vst1.8 {q2}, [r1]!
b Lcbc_dec_done
b .Lcbc_dec_done
.align 4
Lcbc_dec_four:
.Lcbc_dec_four:
sub r0, r0, #0x40
bl _bsaes_decrypt8
vldmia r9, {q14} @ reload IV
@ -1295,9 +1278,9 @@ Lcbc_dec_four:
vst1.8 {q0,q1}, [r1]! @ write output
vst1.8 {q6}, [r1]!
vst1.8 {q4}, [r1]!
b Lcbc_dec_done
b .Lcbc_dec_done
.align 4
Lcbc_dec_three:
.Lcbc_dec_three:
sub r0, r0, #0x30
bl _bsaes_decrypt8
vldmia r9, {q14} @ reload IV
@ -1308,9 +1291,9 @@ Lcbc_dec_three:
veor q6, q6, q9
vst1.8 {q0,q1}, [r1]! @ write output
vst1.8 {q6}, [r1]!
b Lcbc_dec_done
b .Lcbc_dec_done
.align 4
Lcbc_dec_two:
.Lcbc_dec_two:
sub r0, r0, #0x20
bl _bsaes_decrypt8
vldmia r9, {q14} @ reload IV
@ -1319,9 +1302,9 @@ Lcbc_dec_two:
vld1.8 {q15}, [r0]! @ reload input
veor q1, q1, q8
vst1.8 {q0,q1}, [r1]! @ write output
b Lcbc_dec_done
b .Lcbc_dec_done
.align 4
Lcbc_dec_one:
.Lcbc_dec_one:
sub r0, r0, #0x10
bl _bsaes_decrypt8
vldmia r9, {q14} @ reload IV
@ -1329,14 +1312,14 @@ Lcbc_dec_one:
veor q0, q0, q14 @ ^= IV
vst1.8 {q0}, [r1]! @ write output
Lcbc_dec_done:
.Lcbc_dec_done:
#ifndef BSAES_ASM_EXTENDED_KEY
vmov.i32 q0, #0
vmov.i32 q1, #0
Lcbc_dec_bzero:@ wipe key schedule [if any]
.Lcbc_dec_bzero:@ wipe key schedule [if any]
vstmia sp!, {q0,q1}
cmp sp, r9
bne Lcbc_dec_bzero
bne .Lcbc_dec_bzero
#endif
mov sp, r9
@ -1344,14 +1327,12 @@ Lcbc_dec_bzero:@ wipe key schedule [if any]
vst1.8 {q15}, [r8] @ return IV
VFP_ABI_POP
ldmia sp!, {r4,r5,r6,r7,r8,r9,r10, pc}
.globl _bsaes_ctr32_encrypt_blocks
.private_extern _bsaes_ctr32_encrypt_blocks
#ifdef __thumb2__
.thumb_func _bsaes_ctr32_encrypt_blocks
#endif
.size bsaes_cbc_encrypt,.-bsaes_cbc_encrypt
.globl bsaes_ctr32_encrypt_blocks
.hidden bsaes_ctr32_encrypt_blocks
.type bsaes_ctr32_encrypt_blocks,%function
.align 5
_bsaes_ctr32_encrypt_blocks:
bsaes_ctr32_encrypt_blocks:
@ In OpenSSL, short inputs fall back to aes_nohw_* here. We patch this
@ out to retain a constant-time implementation.
mov ip, sp
@ -1377,10 +1358,10 @@ _bsaes_ctr32_encrypt_blocks:
vld1.8 {q0}, [r8] @ load counter
#ifdef __APPLE__
mov r8, #:lower16:(LREVM0SR-LM0)
mov r8, #:lower16:(.LREVM0SR-.LM0)
add r8, r6, r8
#else
add r8, r6, #LREVM0SR-LM0 @ borrow r8
add r8, r6, #.LREVM0SR-.LM0 @ borrow r8
#endif
vldmia sp, {q4} @ load round0 key
#else
@ -1400,7 +1381,7 @@ _bsaes_ctr32_encrypt_blocks:
.align 2
add r12, r3, #248
vld1.8 {q0}, [r8] @ load counter
adrl r8, LREVM0SR @ borrow r8
adrl r8, .LREVM0SR @ borrow r8
vldmia r12, {q4} @ load round0 key
sub sp, #0x10 @ place for adjusted round0 key
#endif
@ -1412,10 +1393,10 @@ _bsaes_ctr32_encrypt_blocks:
vrev32.8 q4,q4
vadd.u32 q9,q8,q8 @ compose 2<<96
vstmia sp, {q4} @ save adjusted round0 key
b Lctr_enc_loop
b .Lctr_enc_loop
.align 4
Lctr_enc_loop:
.Lctr_enc_loop:
vadd.u32 q10, q8, q9 @ compose 3<<96
vadd.u32 q1, q0, q8 @ +1
vadd.u32 q2, q0, q9 @ +2
@ -1435,20 +1416,20 @@ Lctr_enc_loop:
#else
add r4, r3, #264
#endif
vldmia r8, {q8} @ LREVM0SR
vldmia r8, {q8} @ .LREVM0SR
mov r5, r10 @ pass rounds
vstmia r9, {q10} @ save next counter
#ifdef __APPLE__
mov r6, #:lower16:(LREVM0SR-LSR)
mov r6, #:lower16:(.LREVM0SR-.LSR)
sub r6, r8, r6
#else
sub r6, r8, #LREVM0SR-LSR @ pass constants
sub r6, r8, #.LREVM0SR-.LSR @ pass constants
#endif
bl _bsaes_encrypt8_alt
subs r2, r2, #8
blo Lctr_enc_loop_done
blo .Lctr_enc_loop_done
vld1.8 {q8,q9}, [r0]! @ load input
vld1.8 {q10,q11}, [r0]!
@ -1475,51 +1456,51 @@ Lctr_enc_loop:
vst1.8 {q5}, [r1]!
vldmia r9, {q0} @ load counter
bne Lctr_enc_loop
b Lctr_enc_done
bne .Lctr_enc_loop
b .Lctr_enc_done
.align 4
Lctr_enc_loop_done:
.Lctr_enc_loop_done:
add r2, r2, #8
vld1.8 {q8}, [r0]! @ load input
veor q0, q8
vst1.8 {q0}, [r1]! @ write output
cmp r2, #2
blo Lctr_enc_done
blo .Lctr_enc_done
vld1.8 {q9}, [r0]!
veor q1, q9
vst1.8 {q1}, [r1]!
beq Lctr_enc_done
beq .Lctr_enc_done
vld1.8 {q10}, [r0]!
veor q4, q10
vst1.8 {q4}, [r1]!
cmp r2, #4
blo Lctr_enc_done
blo .Lctr_enc_done
vld1.8 {q11}, [r0]!
veor q6, q11
vst1.8 {q6}, [r1]!
beq Lctr_enc_done
beq .Lctr_enc_done
vld1.8 {q12}, [r0]!
veor q3, q12
vst1.8 {q3}, [r1]!
cmp r2, #6
blo Lctr_enc_done
blo .Lctr_enc_done
vld1.8 {q13}, [r0]!
veor q7, q13
vst1.8 {q7}, [r1]!
beq Lctr_enc_done
beq .Lctr_enc_done
vld1.8 {q14}, [r0]
veor q2, q14
vst1.8 {q2}, [r1]!
Lctr_enc_done:
.Lctr_enc_done:
vmov.i32 q0, #0
vmov.i32 q1, #0
#ifndef BSAES_ASM_EXTENDED_KEY
Lctr_enc_bzero:@ wipe key schedule [if any]
.Lctr_enc_bzero:@ wipe key schedule [if any]
vstmia sp!, {q0,q1}
cmp sp, r9
bne Lctr_enc_bzero
bne .Lctr_enc_bzero
#else
vstmia sp, {q0,q1}
#endif
@ -1531,6 +1512,6 @@ Lctr_enc_bzero:@ wipe key schedule [if any]
@ OpenSSL contains aes_nohw_* fallback code here. We patch this
@ out to retain a constant-time implementation.
.size bsaes_ctr32_encrypt_blocks,.-bsaes_ctr32_encrypt_blocks
#endif
#endif // !OPENSSL_NO_ASM
#endif // !OPENSSL_NO_ASM && defined(OPENSSL_ARM) && defined(__ELF__)

View File

@ -1,23 +1,15 @@
// This file is generated from a similarly-named Perl script in the BoringSSL
// source tree. Do not edit by hand.
#if !defined(__has_feature)
#define __has_feature(x) 0
#endif
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
#define OPENSSL_NO_ASM
#endif
#include <openssl/asm_base.h>
#if !defined(OPENSSL_NO_ASM)
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_ARM) && defined(__ELF__)
#include <openssl/arm_arch.h>
@ Silence ARMv8 deprecated IT instruction warnings. This file is used by both
@ ARMv7 and ARMv8 processors and does not use ARMv8 instructions. (ARMv8 PMULL
@ instructions are in aesv8-armx.pl.)
.arch armv7-a
.text
#if defined(__thumb2__) || defined(__clang__)
@ -31,16 +23,14 @@
.code 32
#endif
#if __ARM_MAX_ARCH__>=7
.arch armv7-a
.fpu neon
.globl _gcm_init_neon
.private_extern _gcm_init_neon
#ifdef __thumb2__
.thumb_func _gcm_init_neon
#endif
.globl gcm_init_neon
.hidden gcm_init_neon
.type gcm_init_neon,%function
.align 4
_gcm_init_neon:
gcm_init_neon:
vld1.64 d7,[r1]! @ load H
vmov.i8 q8,#0xe1
vld1.64 d6,[r1]
@ -56,15 +46,13 @@ _gcm_init_neon:
vstmia r0,{q3}
bx lr @ bx lr
.size gcm_init_neon,.-gcm_init_neon
.globl _gcm_gmult_neon
.private_extern _gcm_gmult_neon
#ifdef __thumb2__
.thumb_func _gcm_gmult_neon
#endif
.globl gcm_gmult_neon
.hidden gcm_gmult_neon
.type gcm_gmult_neon,%function
.align 4
_gcm_gmult_neon:
gcm_gmult_neon:
vld1.64 d7,[r0]! @ load Xi
vld1.64 d6,[r0]!
vmov.i64 d29,#0x0000ffffffffffff
@ -76,16 +64,14 @@ _gcm_gmult_neon:
vmov.i64 d31,#0x000000000000ffff
veor d28,d26,d27 @ Karatsuba pre-processing
mov r3,#16
b Lgmult_neon
b .Lgmult_neon
.size gcm_gmult_neon,.-gcm_gmult_neon
.globl _gcm_ghash_neon
.private_extern _gcm_ghash_neon
#ifdef __thumb2__
.thumb_func _gcm_ghash_neon
#endif
.globl gcm_ghash_neon
.hidden gcm_ghash_neon
.type gcm_ghash_neon,%function
.align 4
_gcm_ghash_neon:
gcm_ghash_neon:
vld1.64 d1,[r0]! @ load Xi
vld1.64 d0,[r0]!
vmov.i64 d29,#0x0000ffffffffffff
@ -97,14 +83,14 @@ _gcm_ghash_neon:
vmov.i64 d31,#0x000000000000ffff
veor d28,d26,d27 @ Karatsuba pre-processing
Loop_neon:
.Loop_neon:
vld1.64 d7,[r2]! @ load inp
vld1.64 d6,[r2]!
#ifdef __ARMEL__
vrev64.8 q3,q3
#endif
veor q3,q0 @ inp^=Xi
Lgmult_neon:
.Lgmult_neon:
vext.8 d16, d26, d26, #1 @ A1
vmull.p8 q8, d16, d6 @ F = A1*B
vext.8 d0, d6, d6, #1 @ B1
@ -240,7 +226,7 @@ Lgmult_neon:
veor q0,q0,q10 @
subs r3,#16
bne Loop_neon
bne .Loop_neon
#ifdef __ARMEL__
vrev64.8 q0,q0
@ -250,9 +236,9 @@ Lgmult_neon:
vst1.64 d0,[r0]
bx lr @ bx lr
.size gcm_ghash_neon,.-gcm_ghash_neon
#endif
.byte 71,72,65,83,72,32,102,111,114,32,65,82,77,118,52,47,78,69,79,78,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.align 2
.align 2
#endif // !OPENSSL_NO_ASM
#endif // !OPENSSL_NO_ASM && defined(OPENSSL_ARM) && defined(__ELF__)

View File

@ -1,30 +1,22 @@
// This file is generated from a similarly-named Perl script in the BoringSSL
// source tree. Do not edit by hand.
#if !defined(__has_feature)
#define __has_feature(x) 0
#endif
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
#define OPENSSL_NO_ASM
#endif
#include <openssl/asm_base.h>
#if !defined(OPENSSL_NO_ASM)
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_ARM) && defined(__ELF__)
#include <openssl/arm_arch.h>
#if __ARM_MAX_ARCH__>=7
.text
.fpu neon
.code 32
#undef __thumb2__
.globl _gcm_init_v8
.private_extern _gcm_init_v8
#ifdef __thumb2__
.thumb_func _gcm_init_v8
#endif
.globl gcm_init_v8
.hidden gcm_init_v8
.type gcm_init_v8,%function
.align 4
_gcm_init_v8:
gcm_init_v8:
AARCH64_VALID_CALL_TARGET
vld1.64 {q9},[r1] @ load input H
vmov.i8 q11,#0xe1
vshl.i64 q11,q11,#57 @ 0xc2.0
@ -67,17 +59,15 @@ _gcm_init_v8:
vext.8 q9,q14,q14,#8 @ Karatsuba pre-processing
veor q9,q9,q14
vext.8 q13,q8,q9,#8 @ pack Karatsuba pre-processed
vst1.64 {q13,q14},[r0] @ store Htable[1..2]
vst1.64 {q13,q14},[r0]! @ store Htable[1..2]
bx lr
.globl _gcm_gmult_v8
.private_extern _gcm_gmult_v8
#ifdef __thumb2__
.thumb_func _gcm_gmult_v8
#endif
.size gcm_init_v8,.-gcm_init_v8
.globl gcm_gmult_v8
.hidden gcm_gmult_v8
.type gcm_gmult_v8,%function
.align 4
_gcm_gmult_v8:
gcm_gmult_v8:
AARCH64_VALID_CALL_TARGET
vld1.64 {q9},[r0] @ load Xi
vmov.i8 q11,#0xe1
vld1.64 {q12,q13},[r1] @ load twisted H, ...
@ -114,14 +104,13 @@ _gcm_gmult_v8:
vst1.64 {q0},[r0] @ write out Xi
bx lr
.globl _gcm_ghash_v8
.private_extern _gcm_ghash_v8
#ifdef __thumb2__
.thumb_func _gcm_ghash_v8
#endif
.size gcm_gmult_v8,.-gcm_gmult_v8
.globl gcm_ghash_v8
.hidden gcm_ghash_v8
.type gcm_ghash_v8,%function
.align 4
_gcm_ghash_v8:
gcm_ghash_v8:
AARCH64_VALID_CALL_TARGET
vstmdb sp!,{d8,d9,d10,d11,d12,d13,d14,d15} @ 32-bit ABI says so
vld1.64 {q0},[r0] @ load [rotated] Xi
@ "[rotated]" means that
@ -151,7 +140,7 @@ _gcm_ghash_v8:
vrev64.8 q0,q0
#endif
vext.8 q3,q8,q8,#8 @ rotate I[0]
blo Lodd_tail_v8 @ r3 was less than 32
blo .Lodd_tail_v8 @ r3 was less than 32
vld1.64 {q9},[r2],r12 @ load [rotated] I[1]
#ifndef __ARMEB__
vrev64.8 q9,q9
@ -161,10 +150,10 @@ _gcm_ghash_v8:
.byte 0x8e,0x8e,0xa8,0xf2 @ pmull q4,q12,q7 @ H·Ii+1
veor q9,q9,q7 @ Karatsuba pre-processing
.byte 0x8f,0xce,0xa9,0xf2 @ pmull2 q6,q12,q7
b Loop_mod2x_v8
b .Loop_mod2x_v8
.align 4
Loop_mod2x_v8:
.Loop_mod2x_v8:
vext.8 q10,q3,q3,#8
subs r3,r3,#32 @ is there more data?
.byte 0x86,0x0e,0xac,0xf2 @ pmull q0,q14,q3 @ H^2.lo·Xi.lo
@ -208,14 +197,14 @@ Loop_mod2x_v8:
veor q9,q9,q7 @ Karatsuba pre-processing
veor q3,q3,q0
.byte 0x8f,0xce,0xa9,0xf2 @ pmull2 q6,q12,q7
bhs Loop_mod2x_v8 @ there was at least 32 more bytes
bhs .Loop_mod2x_v8 @ there was at least 32 more bytes
veor q2,q2,q10
vext.8 q3,q8,q8,#8 @ re-construct q3
adds r3,r3,#32 @ re-construct r3
veor q0,q0,q2 @ re-construct q0
beq Ldone_v8 @ is r3 zero?
Lodd_tail_v8:
beq .Ldone_v8 @ is r3 zero?
.Lodd_tail_v8:
vext.8 q10,q0,q0,#8
veor q3,q3,q0 @ inp^=Xi
veor q9,q8,q10 @ q9 is rotated inp^Xi
@ -240,7 +229,7 @@ Lodd_tail_v8:
veor q10,q10,q2
veor q0,q0,q10
Ldone_v8:
.Ldone_v8:
#ifndef __ARMEB__
vrev64.8 q0,q0
#endif
@ -249,8 +238,9 @@ Ldone_v8:
vldmia sp!,{d8,d9,d10,d11,d12,d13,d14,d15} @ 32-bit ABI says so
bx lr
.size gcm_ghash_v8,.-gcm_ghash_v8
.byte 71,72,65,83,72,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.align 2
.align 2
#endif // !OPENSSL_NO_ASM
#endif
#endif // !OPENSSL_NO_ASM && defined(OPENSSL_ARM) && defined(__ELF__)

View File

@ -1,17 +1,9 @@
// This file is generated from a similarly-named Perl script in the BoringSSL
// source tree. Do not edit by hand.
#if !defined(__has_feature)
#define __has_feature(x) 0
#endif
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
#define OPENSSL_NO_ASM
#endif
#include <openssl/asm_base.h>
#if !defined(OPENSSL_NO_ASM)
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_ARM) && defined(__ELF__)
#include <openssl/arm_arch.h>
.text
@ -22,39 +14,24 @@
.code 32
#endif
.globl _sha1_block_data_order
.private_extern _sha1_block_data_order
#ifdef __thumb2__
.thumb_func _sha1_block_data_order
#endif
.globl sha1_block_data_order_nohw
.hidden sha1_block_data_order_nohw
.type sha1_block_data_order_nohw,%function
.align 5
_sha1_block_data_order:
#if __ARM_MAX_ARCH__>=7
Lsha1_block:
adr r3,Lsha1_block
ldr r12,LOPENSSL_armcap
ldr r12,[r3,r12] @ OPENSSL_armcap_P
#ifdef __APPLE__
ldr r12,[r12]
#endif
tst r12,#ARMV8_SHA1
bne LARMv8
tst r12,#ARMV7_NEON
bne LNEON
#endif
sha1_block_data_order_nohw:
stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr}
add r2,r1,r2,lsl#6 @ r2 to point at the end of r1
ldmia r0,{r3,r4,r5,r6,r7}
Lloop:
ldr r8,LK_00_19
.Lloop:
ldr r8,.LK_00_19
mov r14,sp
sub sp,sp,#15*4
mov r5,r5,ror#30
mov r6,r6,ror#30
mov r7,r7,ror#30 @ [6]
L_00_15:
#if __ARM_ARCH__<7
.L_00_15:
#if __ARM_ARCH<7
ldrb r10,[r1,#2]
ldrb r9,[r1,#3]
ldrb r11,[r1,#1]
@ -79,7 +56,7 @@ L_00_15:
eor r10,r10,r6,ror#2 @ F_00_19(B,C,D)
str r9,[r14,#-4]!
add r7,r7,r10 @ E+=F_00_19(B,C,D)
#if __ARM_ARCH__<7
#if __ARM_ARCH<7
ldrb r10,[r1,#2]
ldrb r9,[r1,#3]
ldrb r11,[r1,#1]
@ -104,7 +81,7 @@ L_00_15:
eor r10,r10,r5,ror#2 @ F_00_19(B,C,D)
str r9,[r14,#-4]!
add r6,r6,r10 @ E+=F_00_19(B,C,D)
#if __ARM_ARCH__<7
#if __ARM_ARCH<7
ldrb r10,[r1,#2]
ldrb r9,[r1,#3]
ldrb r11,[r1,#1]
@ -129,7 +106,7 @@ L_00_15:
eor r10,r10,r4,ror#2 @ F_00_19(B,C,D)
str r9,[r14,#-4]!
add r5,r5,r10 @ E+=F_00_19(B,C,D)
#if __ARM_ARCH__<7
#if __ARM_ARCH<7
ldrb r10,[r1,#2]
ldrb r9,[r1,#3]
ldrb r11,[r1,#1]
@ -154,7 +131,7 @@ L_00_15:
eor r10,r10,r3,ror#2 @ F_00_19(B,C,D)
str r9,[r14,#-4]!
add r4,r4,r10 @ E+=F_00_19(B,C,D)
#if __ARM_ARCH__<7
#if __ARM_ARCH<7
ldrb r10,[r1,#2]
ldrb r9,[r1,#3]
ldrb r11,[r1,#1]
@ -185,9 +162,9 @@ L_00_15:
#else
teq r14,sp
#endif
bne L_00_15 @ [((11+4)*5+2)*3]
bne .L_00_15 @ [((11+4)*5+2)*3]
sub sp,sp,#25*4
#if __ARM_ARCH__<7
#if __ARM_ARCH<7
ldrb r10,[r1,#2]
ldrb r9,[r1,#3]
ldrb r11,[r1,#1]
@ -281,9 +258,9 @@ L_00_15:
eor r10,r10,r7,ror#2 @ F_00_19(B,C,D)
add r3,r3,r10 @ E+=F_00_19(B,C,D)
ldr r8,LK_20_39 @ [+15+16*4]
ldr r8,.LK_20_39 @ [+15+16*4]
cmn sp,#0 @ [+3], clear carry to denote 20_39
L_20_39_or_60_79:
.L_20_39_or_60_79:
ldr r9,[r14,#15*4]
ldr r10,[r14,#13*4]
ldr r11,[r14,#7*4]
@ -370,12 +347,12 @@ L_20_39_or_60_79:
#else
teq r14,sp @ preserve carry
#endif
bne L_20_39_or_60_79 @ [+((12+3)*5+2)*4]
bcs L_done @ [+((12+3)*5+2)*4], spare 300 bytes
bne .L_20_39_or_60_79 @ [+((12+3)*5+2)*4]
bcs .L_done @ [+((12+3)*5+2)*4], spare 300 bytes
ldr r8,LK_40_59
ldr r8,.LK_40_59
sub sp,sp,#20*4 @ [+2]
L_40_59:
.L_40_59:
ldr r9,[r14,#15*4]
ldr r10,[r14,#13*4]
ldr r11,[r14,#7*4]
@ -467,13 +444,13 @@ L_40_59:
#else
teq r14,sp
#endif
bne L_40_59 @ [+((12+5)*5+2)*4]
bne .L_40_59 @ [+((12+5)*5+2)*4]
ldr r8,LK_60_79
ldr r8,.LK_60_79
sub sp,sp,#20*4
cmp sp,#0 @ set carry to denote 60_79
b L_20_39_or_60_79 @ [+4], spare 300 bytes
L_done:
b .L_20_39_or_60_79 @ [+4], spare 300 bytes
.L_done:
add sp,sp,#80*4 @ "deallocate" stack frame
ldmia r0,{r8,r9,r10,r11,r12}
add r3,r8,r3
@ -483,9 +460,9 @@ L_done:
add r7,r12,r7,ror#2
stmia r0,{r3,r4,r5,r6,r7}
teq r1,r2
bne Lloop @ [+18], total 1307
bne .Lloop @ [+18], total 1307
#if __ARM_ARCH__>=5
#if __ARM_ARCH>=5
ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,pc}
#else
ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr}
@ -493,37 +470,32 @@ L_done:
moveq pc,lr @ be binary compatible with V4, yet
.word 0xe12fff1e @ interoperable with Thumb ISA:-)
#endif
.size sha1_block_data_order_nohw,.-sha1_block_data_order_nohw
.align 5
LK_00_19:.word 0x5a827999
LK_20_39:.word 0x6ed9eba1
LK_40_59:.word 0x8f1bbcdc
LK_60_79:.word 0xca62c1d6
#if __ARM_MAX_ARCH__>=7
LOPENSSL_armcap:
.word OPENSSL_armcap_P-Lsha1_block
#endif
.LK_00_19:.word 0x5a827999
.LK_20_39:.word 0x6ed9eba1
.LK_40_59:.word 0x8f1bbcdc
.LK_60_79:.word 0xca62c1d6
.byte 83,72,65,49,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,65,82,77,118,52,47,78,69,79,78,47,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.align 2
.align 5
#if __ARM_MAX_ARCH__>=7
.arch armv7-a
.fpu neon
#ifdef __thumb2__
.thumb_func sha1_block_data_order_neon
#endif
.globl sha1_block_data_order_neon
.hidden sha1_block_data_order_neon
.type sha1_block_data_order_neon,%function
.align 4
sha1_block_data_order_neon:
LNEON:
stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr}
add r2,r1,r2,lsl#6 @ r2 to point at the end of r1
@ dmb @ errata #451034 on early Cortex A8
@ vstmdb sp!,{d8-d15} @ ABI specification says so
mov r14,sp
sub r12,sp,#64
adr r8,LK_00_19
adr r8,.LK_00_19
bic r12,r12,#15 @ align for 128-bit stores
ldmia r0,{r3,r4,r5,r6,r7} @ load context
@ -545,7 +517,7 @@ LNEON:
vst1.32 {q10},[r12,:128]!
ldr r9,[sp] @ big RAW stall
Loop_neon:
.Loop_neon:
vext.8 q8,q0,q1,#8
bic r10,r6,r4
add r7,r7,r9
@ -1358,11 +1330,11 @@ Loop_neon:
stmia r0,{r3,r4,r5,r6,r7}
itt ne
addne r12,sp,#3*16
bne Loop_neon
bne .Loop_neon
@ vldmia sp!,{d8-d15}
ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,pc}
.size sha1_block_data_order_neon,.-sha1_block_data_order_neon
#endif
#if __ARM_MAX_ARCH__>=7
@ -1372,16 +1344,15 @@ Loop_neon:
# define INST(a,b,c,d) .byte a,b,c,d|0x10
# endif
#ifdef __thumb2__
.thumb_func sha1_block_data_order_armv8
#endif
.globl sha1_block_data_order_hw
.hidden sha1_block_data_order_hw
.type sha1_block_data_order_hw,%function
.align 5
sha1_block_data_order_armv8:
LARMv8:
sha1_block_data_order_hw:
vstmdb sp!,{d8,d9,d10,d11,d12,d13,d14,d15} @ ABI specification says so
veor q1,q1,q1
adr r3,LK_00_19
adr r3,.LK_00_19
vld1.32 {q0},[r0]!
vld1.32 {d2[0]},[r0]
sub r0,r0,#16
@ -1390,7 +1361,7 @@ LARMv8:
vld1.32 {d20[],d21[]},[r3,:32]!
vld1.32 {d22[],d23[]},[r3,:32]
Loop_v8:
.Loop_v8:
vld1.8 {q4,q5},[r1]!
vld1.8 {q6,q7},[r1]!
vrev32.8 q4,q4
@ -1498,21 +1469,13 @@ Loop_v8:
vadd.i32 q1,q1,q2
vadd.i32 q0,q0,q14
bne Loop_v8
bne .Loop_v8
vst1.32 {q0},[r0]!
vst1.32 {d2[0]},[r0]
vldmia sp!,{d8,d9,d10,d11,d12,d13,d14,d15}
bx lr @ bx lr
.size sha1_block_data_order_hw,.-sha1_block_data_order_hw
#endif
#if __ARM_MAX_ARCH__>=7
.comm _OPENSSL_armcap_P,4
.non_lazy_symbol_pointer
OPENSSL_armcap_P:
.indirect_symbol _OPENSSL_armcap_P
.long 0
.private_extern _OPENSSL_armcap_P
#endif
#endif // !OPENSSL_NO_ASM
#endif // !OPENSSL_NO_ASM && defined(OPENSSL_ARM) && defined(__ELF__)

View File

@ -1,17 +1,9 @@
// This file is generated from a similarly-named Perl script in the BoringSSL
// source tree. Do not edit by hand.
#if !defined(__has_feature)
#define __has_feature(x) 0
#endif
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
#define OPENSSL_NO_ASM
#endif
#include <openssl/asm_base.h>
#if !defined(OPENSSL_NO_ASM)
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_ARM) && defined(__ELF__)
@ Copyright 2007-2016 The OpenSSL Project Authors. All Rights Reserved.
@
@ Licensed under the OpenSSL license (the "License"). You may not use
@ -60,14 +52,14 @@
#ifndef __KERNEL__
# include <openssl/arm_arch.h>
#else
# define __ARM_ARCH__ __LINUX_ARM_ARCH__
# define __ARM_ARCH __LINUX_ARM_ARCH__
# define __ARM_MAX_ARCH__ 7
#endif
@ Silence ARMv8 deprecated IT instruction warnings. This file is used by both
@ ARMv7 and ARMv8 processors. It does have ARMv8-only code, but those
@ instructions are manually-encoded. (See unsha256.)
.arch armv7-a
.text
#if defined(__thumb2__)
@ -77,7 +69,7 @@
.code 32
#endif
.type K256,%object
.align 5
K256:
.word 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
@ -96,51 +88,28 @@ K256:
.word 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
.word 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
.word 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
.size K256,.-K256
.word 0 @ terminator
#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
LOPENSSL_armcap:
.word OPENSSL_armcap_P-Lsha256_block_data_order
#endif
.align 5
.globl _sha256_block_data_order
.private_extern _sha256_block_data_order
#ifdef __thumb2__
.thumb_func _sha256_block_data_order
#endif
_sha256_block_data_order:
Lsha256_block_data_order:
#if __ARM_ARCH__<7 && !defined(__thumb2__)
sub r3,pc,#8 @ _sha256_block_data_order
#else
adr r3,Lsha256_block_data_order
#endif
#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
ldr r12,LOPENSSL_armcap
ldr r12,[r3,r12] @ OPENSSL_armcap_P
#ifdef __APPLE__
ldr r12,[r12]
#endif
tst r12,#ARMV8_SHA256
bne LARMv8
tst r12,#ARMV7_NEON
bne LNEON
#endif
.globl sha256_block_data_order_nohw
.hidden sha256_block_data_order_nohw
.type sha256_block_data_order_nohw,%function
sha256_block_data_order_nohw:
add r2,r1,r2,lsl#6 @ len to point at the end of inp
stmdb sp!,{r0,r1,r2,r4-r11,lr}
ldmia r0,{r4,r5,r6,r7,r8,r9,r10,r11}
sub r14,r3,#256+32 @ K256
adr r14,K256
sub sp,sp,#16*4 @ alloca(X[16])
Loop:
# if __ARM_ARCH__>=7
.Loop:
# if __ARM_ARCH>=7
ldr r2,[r1],#4
# else
ldrb r2,[r1,#3]
# endif
eor r3,r5,r6 @ magic
eor r12,r12,r12
#if __ARM_ARCH__>=7
#if __ARM_ARCH>=7
@ ldr r2,[r1],#4 @ 0
# if 0==15
str r1,[sp,#17*4] @ make room for r1
@ -181,7 +150,7 @@ Loop:
cmp r12,#0xf2 @ done?
#endif
#if 0<15
# if __ARM_ARCH__>=7
# if __ARM_ARCH>=7
ldr r2,[r1],#4 @ prefetch
# else
ldrb r2,[r1,#3]
@ -198,7 +167,7 @@ Loop:
eor r3,r3,r5 @ Maj(a,b,c)
add r11,r11,r0,ror#2 @ h+=Sigma0(a)
@ add r11,r11,r3 @ h+=Maj(a,b,c)
#if __ARM_ARCH__>=7
#if __ARM_ARCH>=7
@ ldr r2,[r1],#4 @ 1
# if 1==15
str r1,[sp,#17*4] @ make room for r1
@ -239,7 +208,7 @@ Loop:
cmp r3,#0xf2 @ done?
#endif
#if 1<15
# if __ARM_ARCH__>=7
# if __ARM_ARCH>=7
ldr r2,[r1],#4 @ prefetch
# else
ldrb r2,[r1,#3]
@ -256,7 +225,7 @@ Loop:
eor r12,r12,r4 @ Maj(a,b,c)
add r10,r10,r0,ror#2 @ h+=Sigma0(a)
@ add r10,r10,r12 @ h+=Maj(a,b,c)
#if __ARM_ARCH__>=7
#if __ARM_ARCH>=7
@ ldr r2,[r1],#4 @ 2
# if 2==15
str r1,[sp,#17*4] @ make room for r1
@ -297,7 +266,7 @@ Loop:
cmp r12,#0xf2 @ done?
#endif
#if 2<15
# if __ARM_ARCH__>=7
# if __ARM_ARCH>=7
ldr r2,[r1],#4 @ prefetch
# else
ldrb r2,[r1,#3]
@ -314,7 +283,7 @@ Loop:
eor r3,r3,r11 @ Maj(a,b,c)
add r9,r9,r0,ror#2 @ h+=Sigma0(a)
@ add r9,r9,r3 @ h+=Maj(a,b,c)
#if __ARM_ARCH__>=7
#if __ARM_ARCH>=7
@ ldr r2,[r1],#4 @ 3
# if 3==15
str r1,[sp,#17*4] @ make room for r1
@ -355,7 +324,7 @@ Loop:
cmp r3,#0xf2 @ done?
#endif
#if 3<15
# if __ARM_ARCH__>=7
# if __ARM_ARCH>=7
ldr r2,[r1],#4 @ prefetch
# else
ldrb r2,[r1,#3]
@ -372,7 +341,7 @@ Loop:
eor r12,r12,r10 @ Maj(a,b,c)
add r8,r8,r0,ror#2 @ h+=Sigma0(a)
@ add r8,r8,r12 @ h+=Maj(a,b,c)
#if __ARM_ARCH__>=7
#if __ARM_ARCH>=7
@ ldr r2,[r1],#4 @ 4
# if 4==15
str r1,[sp,#17*4] @ make room for r1
@ -413,7 +382,7 @@ Loop:
cmp r12,#0xf2 @ done?
#endif
#if 4<15
# if __ARM_ARCH__>=7
# if __ARM_ARCH>=7
ldr r2,[r1],#4 @ prefetch
# else
ldrb r2,[r1,#3]
@ -430,7 +399,7 @@ Loop:
eor r3,r3,r9 @ Maj(a,b,c)
add r7,r7,r0,ror#2 @ h+=Sigma0(a)
@ add r7,r7,r3 @ h+=Maj(a,b,c)
#if __ARM_ARCH__>=7
#if __ARM_ARCH>=7
@ ldr r2,[r1],#4 @ 5
# if 5==15
str r1,[sp,#17*4] @ make room for r1
@ -471,7 +440,7 @@ Loop:
cmp r3,#0xf2 @ done?
#endif
#if 5<15
# if __ARM_ARCH__>=7
# if __ARM_ARCH>=7
ldr r2,[r1],#4 @ prefetch
# else
ldrb r2,[r1,#3]
@ -488,7 +457,7 @@ Loop:
eor r12,r12,r8 @ Maj(a,b,c)
add r6,r6,r0,ror#2 @ h+=Sigma0(a)
@ add r6,r6,r12 @ h+=Maj(a,b,c)
#if __ARM_ARCH__>=7
#if __ARM_ARCH>=7
@ ldr r2,[r1],#4 @ 6
# if 6==15
str r1,[sp,#17*4] @ make room for r1
@ -529,7 +498,7 @@ Loop:
cmp r12,#0xf2 @ done?
#endif
#if 6<15
# if __ARM_ARCH__>=7
# if __ARM_ARCH>=7
ldr r2,[r1],#4 @ prefetch
# else
ldrb r2,[r1,#3]
@ -546,7 +515,7 @@ Loop:
eor r3,r3,r7 @ Maj(a,b,c)
add r5,r5,r0,ror#2 @ h+=Sigma0(a)
@ add r5,r5,r3 @ h+=Maj(a,b,c)
#if __ARM_ARCH__>=7
#if __ARM_ARCH>=7
@ ldr r2,[r1],#4 @ 7
# if 7==15
str r1,[sp,#17*4] @ make room for r1
@ -587,7 +556,7 @@ Loop:
cmp r3,#0xf2 @ done?
#endif
#if 7<15
# if __ARM_ARCH__>=7
# if __ARM_ARCH>=7
ldr r2,[r1],#4 @ prefetch
# else
ldrb r2,[r1,#3]
@ -604,7 +573,7 @@ Loop:
eor r12,r12,r6 @ Maj(a,b,c)
add r4,r4,r0,ror#2 @ h+=Sigma0(a)
@ add r4,r4,r12 @ h+=Maj(a,b,c)
#if __ARM_ARCH__>=7
#if __ARM_ARCH>=7
@ ldr r2,[r1],#4 @ 8
# if 8==15
str r1,[sp,#17*4] @ make room for r1
@ -645,7 +614,7 @@ Loop:
cmp r12,#0xf2 @ done?
#endif
#if 8<15
# if __ARM_ARCH__>=7
# if __ARM_ARCH>=7
ldr r2,[r1],#4 @ prefetch
# else
ldrb r2,[r1,#3]
@ -662,7 +631,7 @@ Loop:
eor r3,r3,r5 @ Maj(a,b,c)
add r11,r11,r0,ror#2 @ h+=Sigma0(a)
@ add r11,r11,r3 @ h+=Maj(a,b,c)
#if __ARM_ARCH__>=7
#if __ARM_ARCH>=7
@ ldr r2,[r1],#4 @ 9
# if 9==15
str r1,[sp,#17*4] @ make room for r1
@ -703,7 +672,7 @@ Loop:
cmp r3,#0xf2 @ done?
#endif
#if 9<15
# if __ARM_ARCH__>=7
# if __ARM_ARCH>=7
ldr r2,[r1],#4 @ prefetch
# else
ldrb r2,[r1,#3]
@ -720,7 +689,7 @@ Loop:
eor r12,r12,r4 @ Maj(a,b,c)
add r10,r10,r0,ror#2 @ h+=Sigma0(a)
@ add r10,r10,r12 @ h+=Maj(a,b,c)
#if __ARM_ARCH__>=7
#if __ARM_ARCH>=7
@ ldr r2,[r1],#4 @ 10
# if 10==15
str r1,[sp,#17*4] @ make room for r1
@ -761,7 +730,7 @@ Loop:
cmp r12,#0xf2 @ done?
#endif
#if 10<15
# if __ARM_ARCH__>=7
# if __ARM_ARCH>=7
ldr r2,[r1],#4 @ prefetch
# else
ldrb r2,[r1,#3]
@ -778,7 +747,7 @@ Loop:
eor r3,r3,r11 @ Maj(a,b,c)
add r9,r9,r0,ror#2 @ h+=Sigma0(a)
@ add r9,r9,r3 @ h+=Maj(a,b,c)
#if __ARM_ARCH__>=7
#if __ARM_ARCH>=7
@ ldr r2,[r1],#4 @ 11
# if 11==15
str r1,[sp,#17*4] @ make room for r1
@ -819,7 +788,7 @@ Loop:
cmp r3,#0xf2 @ done?
#endif
#if 11<15
# if __ARM_ARCH__>=7
# if __ARM_ARCH>=7
ldr r2,[r1],#4 @ prefetch
# else
ldrb r2,[r1,#3]
@ -836,7 +805,7 @@ Loop:
eor r12,r12,r10 @ Maj(a,b,c)
add r8,r8,r0,ror#2 @ h+=Sigma0(a)
@ add r8,r8,r12 @ h+=Maj(a,b,c)
#if __ARM_ARCH__>=7
#if __ARM_ARCH>=7
@ ldr r2,[r1],#4 @ 12
# if 12==15
str r1,[sp,#17*4] @ make room for r1
@ -877,7 +846,7 @@ Loop:
cmp r12,#0xf2 @ done?
#endif
#if 12<15
# if __ARM_ARCH__>=7
# if __ARM_ARCH>=7
ldr r2,[r1],#4 @ prefetch
# else
ldrb r2,[r1,#3]
@ -894,7 +863,7 @@ Loop:
eor r3,r3,r9 @ Maj(a,b,c)
add r7,r7,r0,ror#2 @ h+=Sigma0(a)
@ add r7,r7,r3 @ h+=Maj(a,b,c)
#if __ARM_ARCH__>=7
#if __ARM_ARCH>=7
@ ldr r2,[r1],#4 @ 13
# if 13==15
str r1,[sp,#17*4] @ make room for r1
@ -935,7 +904,7 @@ Loop:
cmp r3,#0xf2 @ done?
#endif
#if 13<15
# if __ARM_ARCH__>=7
# if __ARM_ARCH>=7
ldr r2,[r1],#4 @ prefetch
# else
ldrb r2,[r1,#3]
@ -952,7 +921,7 @@ Loop:
eor r12,r12,r8 @ Maj(a,b,c)
add r6,r6,r0,ror#2 @ h+=Sigma0(a)
@ add r6,r6,r12 @ h+=Maj(a,b,c)
#if __ARM_ARCH__>=7
#if __ARM_ARCH>=7
@ ldr r2,[r1],#4 @ 14
# if 14==15
str r1,[sp,#17*4] @ make room for r1
@ -993,7 +962,7 @@ Loop:
cmp r12,#0xf2 @ done?
#endif
#if 14<15
# if __ARM_ARCH__>=7
# if __ARM_ARCH>=7
ldr r2,[r1],#4 @ prefetch
# else
ldrb r2,[r1,#3]
@ -1010,7 +979,7 @@ Loop:
eor r3,r3,r7 @ Maj(a,b,c)
add r5,r5,r0,ror#2 @ h+=Sigma0(a)
@ add r5,r5,r3 @ h+=Maj(a,b,c)
#if __ARM_ARCH__>=7
#if __ARM_ARCH>=7
@ ldr r2,[r1],#4 @ 15
# if 15==15
str r1,[sp,#17*4] @ make room for r1
@ -1051,7 +1020,7 @@ Loop:
cmp r3,#0xf2 @ done?
#endif
#if 15<15
# if __ARM_ARCH__>=7
# if __ARM_ARCH>=7
ldr r2,[r1],#4 @ prefetch
# else
ldrb r2,[r1,#3]
@ -1068,7 +1037,7 @@ Loop:
eor r12,r12,r6 @ Maj(a,b,c)
add r4,r4,r0,ror#2 @ h+=Sigma0(a)
@ add r4,r4,r12 @ h+=Maj(a,b,c)
Lrounds_16_xx:
.Lrounds_16_xx:
@ ldr r2,[sp,#1*4] @ 16
@ ldr r1,[sp,#14*4]
mov r0,r2,ror#7
@ -1101,7 +1070,7 @@ Lrounds_16_xx:
cmp r12,#0xf2 @ done?
#endif
#if 16<15
# if __ARM_ARCH__>=7
# if __ARM_ARCH>=7
ldr r2,[r1],#4 @ prefetch
# else
ldrb r2,[r1,#3]
@ -1150,7 +1119,7 @@ Lrounds_16_xx:
cmp r3,#0xf2 @ done?
#endif
#if 17<15
# if __ARM_ARCH__>=7
# if __ARM_ARCH>=7
ldr r2,[r1],#4 @ prefetch
# else
ldrb r2,[r1,#3]
@ -1199,7 +1168,7 @@ Lrounds_16_xx:
cmp r12,#0xf2 @ done?
#endif
#if 18<15
# if __ARM_ARCH__>=7
# if __ARM_ARCH>=7
ldr r2,[r1],#4 @ prefetch
# else
ldrb r2,[r1,#3]
@ -1248,7 +1217,7 @@ Lrounds_16_xx:
cmp r3,#0xf2 @ done?
#endif
#if 19<15
# if __ARM_ARCH__>=7
# if __ARM_ARCH>=7
ldr r2,[r1],#4 @ prefetch
# else
ldrb r2,[r1,#3]
@ -1297,7 +1266,7 @@ Lrounds_16_xx:
cmp r12,#0xf2 @ done?
#endif
#if 20<15
# if __ARM_ARCH__>=7
# if __ARM_ARCH>=7
ldr r2,[r1],#4 @ prefetch
# else
ldrb r2,[r1,#3]
@ -1346,7 +1315,7 @@ Lrounds_16_xx:
cmp r3,#0xf2 @ done?
#endif
#if 21<15
# if __ARM_ARCH__>=7
# if __ARM_ARCH>=7
ldr r2,[r1],#4 @ prefetch
# else
ldrb r2,[r1,#3]
@ -1395,7 +1364,7 @@ Lrounds_16_xx:
cmp r12,#0xf2 @ done?
#endif
#if 22<15
# if __ARM_ARCH__>=7
# if __ARM_ARCH>=7
ldr r2,[r1],#4 @ prefetch
# else
ldrb r2,[r1,#3]
@ -1444,7 +1413,7 @@ Lrounds_16_xx:
cmp r3,#0xf2 @ done?
#endif
#if 23<15
# if __ARM_ARCH__>=7
# if __ARM_ARCH>=7
ldr r2,[r1],#4 @ prefetch
# else
ldrb r2,[r1,#3]
@ -1493,7 +1462,7 @@ Lrounds_16_xx:
cmp r12,#0xf2 @ done?
#endif
#if 24<15
# if __ARM_ARCH__>=7
# if __ARM_ARCH>=7
ldr r2,[r1],#4 @ prefetch
# else
ldrb r2,[r1,#3]
@ -1542,7 +1511,7 @@ Lrounds_16_xx:
cmp r3,#0xf2 @ done?
#endif
#if 25<15
# if __ARM_ARCH__>=7
# if __ARM_ARCH>=7
ldr r2,[r1],#4 @ prefetch
# else
ldrb r2,[r1,#3]
@ -1591,7 +1560,7 @@ Lrounds_16_xx:
cmp r12,#0xf2 @ done?
#endif
#if 26<15
# if __ARM_ARCH__>=7
# if __ARM_ARCH>=7
ldr r2,[r1],#4 @ prefetch
# else
ldrb r2,[r1,#3]
@ -1640,7 +1609,7 @@ Lrounds_16_xx:
cmp r3,#0xf2 @ done?
#endif
#if 27<15
# if __ARM_ARCH__>=7
# if __ARM_ARCH>=7
ldr r2,[r1],#4 @ prefetch
# else
ldrb r2,[r1,#3]
@ -1689,7 +1658,7 @@ Lrounds_16_xx:
cmp r12,#0xf2 @ done?
#endif
#if 28<15
# if __ARM_ARCH__>=7
# if __ARM_ARCH>=7
ldr r2,[r1],#4 @ prefetch
# else
ldrb r2,[r1,#3]
@ -1738,7 +1707,7 @@ Lrounds_16_xx:
cmp r3,#0xf2 @ done?
#endif
#if 29<15
# if __ARM_ARCH__>=7
# if __ARM_ARCH>=7
ldr r2,[r1],#4 @ prefetch
# else
ldrb r2,[r1,#3]
@ -1787,7 +1756,7 @@ Lrounds_16_xx:
cmp r12,#0xf2 @ done?
#endif
#if 30<15
# if __ARM_ARCH__>=7
# if __ARM_ARCH>=7
ldr r2,[r1],#4 @ prefetch
# else
ldrb r2,[r1,#3]
@ -1836,7 +1805,7 @@ Lrounds_16_xx:
cmp r3,#0xf2 @ done?
#endif
#if 31<15
# if __ARM_ARCH__>=7
# if __ARM_ARCH>=7
ldr r2,[r1],#4 @ prefetch
# else
ldrb r2,[r1,#3]
@ -1853,11 +1822,11 @@ Lrounds_16_xx:
eor r12,r12,r6 @ Maj(a,b,c)
add r4,r4,r0,ror#2 @ h+=Sigma0(a)
@ add r4,r4,r12 @ h+=Maj(a,b,c)
#if __ARM_ARCH__>=7
#if __ARM_ARCH>=7
ite eq @ Thumb2 thing, sanity check in ARM
#endif
ldreq r3,[sp,#16*4] @ pull ctx
bne Lrounds_16_xx
bne .Lrounds_16_xx
add r4,r4,r12 @ h+=Maj(a,b,c) from the past
ldr r0,[r3,#0]
@ -1881,10 +1850,10 @@ Lrounds_16_xx:
stmia r3,{r4,r5,r6,r7,r8,r9,r10,r11}
cmp r1,r12
sub r14,r14,#256 @ rewind Ktbl
bne Loop
bne .Loop
add sp,sp,#19*4 @ destroy frame
#if __ARM_ARCH__>=5
#if __ARM_ARCH>=5
ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,pc}
#else
ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,lr}
@ -1892,24 +1861,43 @@ Lrounds_16_xx:
moveq pc,lr @ be binary compatible with V4, yet
.word 0xe12fff1e @ interoperable with Thumb ISA:-)
#endif
.size sha256_block_data_order_nohw,.-sha256_block_data_order_nohw
#if __ARM_MAX_ARCH__>=7
.arch armv7-a
.fpu neon
.globl _sha256_block_data_order_neon
.private_extern _sha256_block_data_order_neon
#ifdef __thumb2__
.thumb_func _sha256_block_data_order_neon
.LK256_shortcut_neon:
@ PC is 8 bytes ahead in Arm mode and 4 bytes ahead in Thumb mode.
#if defined(__thumb2__)
.word K256-(.LK256_add_neon+4)
#else
.word K256-(.LK256_add_neon+8)
#endif
.globl sha256_block_data_order_neon
.hidden sha256_block_data_order_neon
.type sha256_block_data_order_neon,%function
.align 5
.skip 16
_sha256_block_data_order_neon:
LNEON:
sha256_block_data_order_neon:
stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr}
sub r11,sp,#16*4+16
adr r14,K256
@ K256 is just at the boundary of being easily referenced by an ADR from
@ this function. In Arm mode, when building with __ARM_ARCH=6, it does
@ not fit. By moving code around, we could make it fit, but this is too
@ fragile. For simplicity, just load the offset from
@ .LK256_shortcut_neon.
@
@ TODO(davidben): adrl would avoid a load, but clang-assembler does not
@ support it. We might be able to emulate it with a macro, but Android's
@ did not work when I tried it.
@ https://android.googlesource.com/platform/ndk/+/refs/heads/master/docs/ClangMigration.md#arm
ldr r14,.LK256_shortcut_neon
.LK256_add_neon:
add r14,pc,r14
bic r11,r11,#15 @ align for 128-bit stores
mov r12,sp
mov sp,r11 @ alloca
@ -1946,10 +1934,10 @@ LNEON:
ldr r2,[sp,#0]
eor r12,r12,r12
eor r3,r5,r6
b L_00_48
b .L_00_48
.align 4
L_00_48:
.L_00_48:
vext.8 q8,q0,q1,#4
add r11,r11,r2
eor r2,r9,r10
@ -2345,7 +2333,7 @@ L_00_48:
teq r2,#0 @ check for K256 terminator
ldr r2,[sp,#0]
sub r1,r1,#64
bne L_00_48
bne .L_00_48
ldr r1,[sp,#68]
ldr r0,[sp,#72]
@ -2678,10 +2666,10 @@ L_00_48:
ldreq sp,[sp,#76] @ restore original sp
itt ne
eorne r3,r5,r6
bne L_00_48
bne .L_00_48
ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,pc}
.size sha256_block_data_order_neon,.-sha256_block_data_order_neon
#endif
#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
@ -2691,19 +2679,32 @@ L_00_48:
# define INST(a,b,c,d) .byte a,b,c,d
# endif
#ifdef __thumb2__
.thumb_func sha256_block_data_order_armv8
.LK256_shortcut_hw:
@ PC is 8 bytes ahead in Arm mode and 4 bytes ahead in Thumb mode.
#if defined(__thumb2__)
.word K256-(.LK256_add_hw+4)
#else
.word K256-(.LK256_add_hw+8)
#endif
.globl sha256_block_data_order_hw
.hidden sha256_block_data_order_hw
.type sha256_block_data_order_hw,%function
.align 5
sha256_block_data_order_armv8:
LARMv8:
sha256_block_data_order_hw:
@ K256 is too far to reference from one ADR command in Thumb mode. In
@ Arm mode, we could make it fit by aligning the ADR offset to a 64-byte
@ boundary. For simplicity, just load the offset from .LK256_shortcut_hw.
ldr r3,.LK256_shortcut_hw
.LK256_add_hw:
add r3,pc,r3
vld1.32 {q0,q1},[r0]
sub r3,r3,#256+32
add r2,r1,r2,lsl#6 @ len to point at the end of inp
b Loop_v8
b .Loop_v8
.align 4
Loop_v8:
.Loop_v8:
vld1.8 {q8,q9},[r1]!
vld1.8 {q10,q11},[r1]!
vld1.32 {q12},[r3]!
@ -2825,22 +2826,14 @@ Loop_v8:
vadd.i32 q0,q0,q14
vadd.i32 q1,q1,q15
it ne
bne Loop_v8
bne .Loop_v8
vst1.32 {q0,q1},[r0]
bx lr @ bx lr
.size sha256_block_data_order_hw,.-sha256_block_data_order_hw
#endif
.byte 83,72,65,50,53,54,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,65,82,77,118,52,47,78,69,79,78,47,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.align 2
.align 2
#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
.comm _OPENSSL_armcap_P,4
.non_lazy_symbol_pointer
OPENSSL_armcap_P:
.indirect_symbol _OPENSSL_armcap_P
.long 0
.private_extern _OPENSSL_armcap_P
#endif
#endif // !OPENSSL_NO_ASM
#endif // !OPENSSL_NO_ASM && defined(OPENSSL_ARM) && defined(__ELF__)

View File

@ -1,17 +1,9 @@
// This file is generated from a similarly-named Perl script in the BoringSSL
// source tree. Do not edit by hand.
#if !defined(__has_feature)
#define __has_feature(x) 0
#endif
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
#define OPENSSL_NO_ASM
#endif
#include <openssl/asm_base.h>
#if !defined(OPENSSL_NO_ASM)
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_ARM) && defined(__ELF__)
@ Copyright 2007-2016 The OpenSSL Project Authors. All Rights Reserved.
@
@ Licensed under the OpenSSL license (the "License"). You may not use
@ -71,7 +63,6 @@
# define VFP_ABI_PUSH vstmdb sp!,{d8-d15}
# define VFP_ABI_POP vldmia sp!,{d8-d15}
#else
# define __ARM_ARCH__ __LINUX_ARM_ARCH__
# define __ARM_MAX_ARCH__ 7
# define VFP_ABI_PUSH
# define VFP_ABI_POP
@ -79,7 +70,7 @@
@ Silence ARMv8 deprecated IT instruction warnings. This file is used by both
@ ARMv7 and ARMv8 processors and does not use ARMv8 instructions.
.arch armv7-a
#ifdef __ARMEL__
# define LO 0
@ -100,7 +91,7 @@
.code 32
#endif
.type K512,%object
.align 5
K512:
WORD64(0x428a2f98,0xd728ae22, 0x71374491,0x23ef65cd)
@ -143,39 +134,15 @@ K512:
WORD64(0x3c9ebe0a,0x15c9bebc, 0x431d67c4,0x9c100d4c)
WORD64(0x4cc5d4be,0xcb3e42b6, 0x597f299c,0xfc657e2a)
WORD64(0x5fcb6fab,0x3ad6faec, 0x6c44198c,0x4a475817)
.size K512,.-K512
#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
LOPENSSL_armcap:
.word OPENSSL_armcap_P-Lsha512_block_data_order
.skip 32-4
#else
.skip 32
#endif
.globl _sha512_block_data_order
.private_extern _sha512_block_data_order
#ifdef __thumb2__
.thumb_func _sha512_block_data_order
#endif
_sha512_block_data_order:
Lsha512_block_data_order:
#if __ARM_ARCH__<7 && !defined(__thumb2__)
sub r3,pc,#8 @ _sha512_block_data_order
#else
adr r3,Lsha512_block_data_order
#endif
#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
ldr r12,LOPENSSL_armcap
ldr r12,[r3,r12] @ OPENSSL_armcap_P
#ifdef __APPLE__
ldr r12,[r12]
#endif
tst r12,#ARMV7_NEON
bne LNEON
#endif
.globl sha512_block_data_order_nohw
.hidden sha512_block_data_order_nohw
.type sha512_block_data_order_nohw,%function
sha512_block_data_order_nohw:
add r2,r1,r2,lsl#7 @ len to point at the end of inp
stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr}
sub r14,r3,#672 @ K512
adr r14,K512
sub sp,sp,#9*8
ldr r7,[r0,#32+LO]
@ -184,7 +151,7 @@ Lsha512_block_data_order:
ldr r10, [r0,#48+HI]
ldr r11, [r0,#56+LO]
ldr r12, [r0,#56+HI]
Loop:
.Loop:
str r9, [sp,#48+0]
str r10, [sp,#48+4]
str r11, [sp,#56+0]
@ -208,8 +175,8 @@ Loop:
str r3,[sp,#40+0]
str r4,[sp,#40+4]
L00_15:
#if __ARM_ARCH__<7
.L00_15:
#if __ARM_ARCH<7
ldrb r3,[r1,#7]
ldrb r9, [r1,#6]
ldrb r10, [r1,#5]
@ -286,7 +253,7 @@ L00_15:
teq r9,#148
ldr r12,[sp,#16+0] @ c.lo
#if __ARM_ARCH__>=7
#if __ARM_ARCH>=7
it eq @ Thumb2 thing, sanity check in ARM
#endif
orreq r14,r14,#1
@ -324,11 +291,11 @@ L00_15:
tst r14,#1
add r14,r14,#8
tst r14,#1
beq L00_15
beq .L00_15
ldr r9,[sp,#184+0]
ldr r10,[sp,#184+4]
bic r14,r14,#1
L16_79:
.L16_79:
@ sigma0(x) (ROTR((x),1) ^ ROTR((x),8) ^ ((x)>>7))
@ LO lo>>1^hi<<31 ^ lo>>8^hi<<24 ^ lo>>7^hi<<25
@ HI hi>>1^lo<<31 ^ hi>>8^lo<<24 ^ hi>>7
@ -426,7 +393,7 @@ L16_79:
teq r9,#23
ldr r12,[sp,#16+0] @ c.lo
#if __ARM_ARCH__>=7
#if __ARM_ARCH>=7
it eq @ Thumb2 thing, sanity check in ARM
#endif
orreq r14,r14,#1
@ -463,12 +430,12 @@ L16_79:
adc r6,r6,r4 @ h += T
tst r14,#1
add r14,r14,#8
#if __ARM_ARCH__>=7
#if __ARM_ARCH>=7
ittt eq @ Thumb2 thing, sanity check in ARM
#endif
ldreq r9,[sp,#184+0]
ldreq r10,[sp,#184+4]
beq L16_79
beq .L16_79
bic r14,r14,#1
ldr r3,[sp,#8+0]
@ -539,10 +506,10 @@ L16_79:
sub r14,r14,#640
teq r1,r2
bne Loop
bne .Loop
add sp,sp,#8*9 @ destroy frame
#if __ARM_ARCH__>=5
#if __ARM_ARCH>=5
ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,pc}
#else
ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr}
@ -550,25 +517,22 @@ L16_79:
moveq pc,lr @ be binary compatible with V4, yet
.word 0xe12fff1e @ interoperable with Thumb ISA:-)
#endif
.size sha512_block_data_order_nohw,.-sha512_block_data_order_nohw
#if __ARM_MAX_ARCH__>=7
.arch armv7-a
.fpu neon
.globl _sha512_block_data_order_neon
.private_extern _sha512_block_data_order_neon
#ifdef __thumb2__
.thumb_func _sha512_block_data_order_neon
#endif
.globl sha512_block_data_order_neon
.hidden sha512_block_data_order_neon
.type sha512_block_data_order_neon,%function
.align 4
_sha512_block_data_order_neon:
LNEON:
sha512_block_data_order_neon:
dmb @ errata #451034 on early Cortex A8
add r2,r1,r2,lsl#7 @ len to point at the end of inp
adr r3,K512
VFP_ABI_PUSH
vldmia r0,{d16,d17,d18,d19,d20,d21,d22,d23} @ load context
Loop_neon:
.Loop_neon:
vshr.u64 d24,d20,#14 @ 0
#if 0<16
vld1.64 {d0},[r1]! @ handles unaligned
@ -1162,7 +1126,7 @@ Loop_neon:
vadd.i64 d30,d27
@ vadd.i64 d16,d30
mov r12,#4
L16_79_neon:
.L16_79_neon:
subs r12,#1
vshr.u64 q12,q7,#19
vshr.u64 q13,q7,#61
@ -1868,7 +1832,7 @@ L16_79_neon:
vadd.i64 d20,d27
vadd.i64 d30,d27
@ vadd.i64 d16,d30
bne L16_79_neon
bne .L16_79_neon
vadd.i64 d16,d30 @ h+=Maj from the past
vldmia r0,{d24,d25,d26,d27,d28,d29,d30,d31} @ load context to temp
@ -1879,21 +1843,13 @@ L16_79_neon:
vstmia r0,{d16,d17,d18,d19,d20,d21,d22,d23} @ save context
teq r1,r2
sub r3,#640 @ rewind K512
bne Loop_neon
bne .Loop_neon
VFP_ABI_POP
bx lr @ .word 0xe12fff1e
.size sha512_block_data_order_neon,.-sha512_block_data_order_neon
#endif
.byte 83,72,65,53,49,50,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,65,82,77,118,52,47,78,69,79,78,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.align 2
.align 2
#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
.comm _OPENSSL_armcap_P,4
.non_lazy_symbol_pointer
OPENSSL_armcap_P:
.indirect_symbol _OPENSSL_armcap_P
.long 0
.private_extern _OPENSSL_armcap_P
#endif
#endif // !OPENSSL_NO_ASM
#endif // !OPENSSL_NO_ASM && defined(OPENSSL_ARM) && defined(__ELF__)

View File

@ -1,21 +1,13 @@
// This file is generated from a similarly-named Perl script in the BoringSSL
// source tree. Do not edit by hand.
#if !defined(__has_feature)
#define __has_feature(x) 0
#endif
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
#define OPENSSL_NO_ASM
#endif
#include <openssl/asm_base.h>
#if !defined(OPENSSL_NO_ASM)
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_ARM) && defined(__ELF__)
.syntax unified
.arch armv7-a
.fpu neon
#if defined(__thumb2__)
.thumb
@ -25,20 +17,20 @@
.text
.type _vpaes_consts,%object
.align 7 @ totally strategic alignment
_vpaes_consts:
Lk_mc_forward:@ mc_forward
.Lk_mc_forward:@ mc_forward
.quad 0x0407060500030201, 0x0C0F0E0D080B0A09
.quad 0x080B0A0904070605, 0x000302010C0F0E0D
.quad 0x0C0F0E0D080B0A09, 0x0407060500030201
.quad 0x000302010C0F0E0D, 0x080B0A0904070605
Lk_mc_backward:@ mc_backward
.Lk_mc_backward:@ mc_backward
.quad 0x0605040702010003, 0x0E0D0C0F0A09080B
.quad 0x020100030E0D0C0F, 0x0A09080B06050407
.quad 0x0E0D0C0F0A09080B, 0x0605040702010003
.quad 0x0A09080B06050407, 0x020100030E0D0C0F
Lk_sr:@ sr
.Lk_sr:@ sr
.quad 0x0706050403020100, 0x0F0E0D0C0B0A0908
.quad 0x030E09040F0A0500, 0x0B06010C07020D08
.quad 0x0F060D040B020900, 0x070E050C030A0108
@ -47,42 +39,40 @@ Lk_sr:@ sr
@
@ "Hot" constants
@
Lk_inv:@ inv, inva
.Lk_inv:@ inv, inva
.quad 0x0E05060F0D080180, 0x040703090A0B0C02
.quad 0x01040A060F0B0780, 0x030D0E0C02050809
Lk_ipt:@ input transform (lo, hi)
.Lk_ipt:@ input transform (lo, hi)
.quad 0xC2B2E8985A2A7000, 0xCABAE09052227808
.quad 0x4C01307D317C4D00, 0xCD80B1FCB0FDCC81
Lk_sbo:@ sbou, sbot
.Lk_sbo:@ sbou, sbot
.quad 0xD0D26D176FBDC700, 0x15AABF7AC502A878
.quad 0xCFE474A55FBB6A00, 0x8E1E90D1412B35FA
Lk_sb1:@ sb1u, sb1t
.Lk_sb1:@ sb1u, sb1t
.quad 0x3618D415FAE22300, 0x3BF7CCC10D2ED9EF
.quad 0xB19BE18FCB503E00, 0xA5DF7A6E142AF544
Lk_sb2:@ sb2u, sb2t
.Lk_sb2:@ sb2u, sb2t
.quad 0x69EB88400AE12900, 0xC2A163C8AB82234A
.quad 0xE27A93C60B712400, 0x5EB7E955BC982FCD
.byte 86,101,99,116,111,114,32,80,101,114,109,117,116,97,116,105,111,110,32,65,69,83,32,102,111,114,32,65,82,77,118,55,32,78,69,79,78,44,32,77,105,107,101,32,72,97,109,98,117,114,103,32,40,83,116,97,110,102,111,114,100,32,85,110,105,118,101,114,115,105,116,121,41,0
.align 2
.size _vpaes_consts,.-_vpaes_consts
.align 6
@@
@@ _aes_preheat
@@
@@ Fills q9-q15 as specified below.
@@
#ifdef __thumb2__
.thumb_func _vpaes_preheat
#endif
.type _vpaes_preheat,%function
.align 4
_vpaes_preheat:
adr r10, Lk_inv
vmov.i8 q9, #0x0f @ Lk_s0F
vld1.64 {q10,q11}, [r10]! @ Lk_inv
add r10, r10, #64 @ Skip Lk_ipt, Lk_sbo
vld1.64 {q12,q13}, [r10]! @ Lk_sb1
vld1.64 {q14,q15}, [r10] @ Lk_sb2
adr r10, .Lk_inv
vmov.i8 q9, #0x0f @ .Lk_s0F
vld1.64 {q10,q11}, [r10]! @ .Lk_inv
add r10, r10, #64 @ Skip .Lk_ipt, .Lk_sbo
vld1.64 {q12,q13}, [r10]! @ .Lk_sb1
vld1.64 {q14,q15}, [r10] @ .Lk_sb2
bx lr
@@
@ -100,18 +90,16 @@ _vpaes_preheat:
@@ Preserves q6-q8 so you get some local vectors
@@
@@
#ifdef __thumb2__
.thumb_func _vpaes_encrypt_core
#endif
.type _vpaes_encrypt_core,%function
.align 4
_vpaes_encrypt_core:
mov r9, r2
ldr r8, [r2,#240] @ pull rounds
adr r11, Lk_ipt
adr r11, .Lk_ipt
@ vmovdqa .Lk_ipt(%rip), %xmm2 # iptlo
@ vmovdqa .Lk_ipt+16(%rip), %xmm3 # ipthi
vld1.64 {q2, q3}, [r11]
adr r11, Lk_mc_forward+16
adr r11, .Lk_mc_forward+16
vld1.64 {q5}, [r9]! @ vmovdqu (%r9), %xmm5 # round0 key
vand q1, q0, q9 @ vpand %xmm9, %xmm0, %xmm1
vshr.u8 q0, q0, #4 @ vpsrlb $4, %xmm0, %xmm0
@ -125,15 +113,15 @@ _vpaes_encrypt_core:
@ .Lenc_entry ends with a bnz instruction which is normally paired with
@ subs in .Lenc_loop.
tst r8, r8
b Lenc_entry
b .Lenc_entry
.align 4
Lenc_loop:
.Lenc_loop:
@ middle of middle round
add r10, r11, #0x40
vtbl.8 d8, {q13}, d4 @ vpshufb %xmm2, %xmm13, %xmm4 # 4 = sb1u
vtbl.8 d9, {q13}, d5
vld1.64 {q1}, [r11]! @ vmovdqa -0x40(%r11,%r10), %xmm1 # Lk_mc_forward[]
vld1.64 {q1}, [r11]! @ vmovdqa -0x40(%r11,%r10), %xmm1 # .Lk_mc_forward[]
vtbl.8 d0, {q12}, d6 @ vpshufb %xmm3, %xmm12, %xmm0 # 0 = sb1t
vtbl.8 d1, {q12}, d7
veor q4, q4, q5 @ vpxor %xmm5, %xmm4, %xmm4 # 4 = sb1u + k
@ -142,7 +130,7 @@ Lenc_loop:
veor q0, q0, q4 @ vpxor %xmm4, %xmm0, %xmm0 # 0 = A
vtbl.8 d4, {q14}, d6 @ vpshufb %xmm3, %xmm14, %xmm2 # 2 = sb2t
vtbl.8 d5, {q14}, d7
vld1.64 {q4}, [r10] @ vmovdqa (%r11,%r10), %xmm4 # Lk_mc_backward[]
vld1.64 {q4}, [r10] @ vmovdqa (%r11,%r10), %xmm4 # .Lk_mc_backward[]
vtbl.8 d6, {q0}, d2 @ vpshufb %xmm1, %xmm0, %xmm3 # 0 = B
vtbl.8 d7, {q0}, d3
veor q2, q2, q5 @ vpxor %xmm5, %xmm2, %xmm2 # 2 = 2A
@ -159,7 +147,7 @@ Lenc_loop:
veor q0, q0, q4 @ vpxor %xmm4, %xmm0, %xmm0 # 0 = 2A+3B+C+D
subs r8, r8, #1 @ nr--
Lenc_entry:
.Lenc_entry:
@ top of round
vand q1, q0, q9 @ vpand %xmm0, %xmm9, %xmm1 # 0 = k
vshr.u8 q0, q0, #4 @ vpsrlb $4, %xmm0, %xmm0 # 1 = i
@ -179,19 +167,19 @@ Lenc_entry:
veor q2, q2, q1 @ vpxor %xmm1, %xmm2, %xmm2 # 2 = io
veor q3, q3, q0 @ vpxor %xmm0, %xmm3, %xmm3 # 3 = jo
vld1.64 {q5}, [r9]! @ vmovdqu (%r9), %xmm5
bne Lenc_loop
bne .Lenc_loop
@ middle of last round
add r10, r11, #0x80
adr r11, Lk_sbo
adr r11, .Lk_sbo
@ Read to q1 instead of q4, so the vtbl.8 instruction below does not
@ overlap table and destination registers.
vld1.64 {q1}, [r11]! @ vmovdqa -0x60(%r10), %xmm4 # 3 : sbou
vld1.64 {q0}, [r11] @ vmovdqa -0x50(%r10), %xmm0 # 0 : sbot Lk_sbo+16
vld1.64 {q0}, [r11] @ vmovdqa -0x50(%r10), %xmm0 # 0 : sbot .Lk_sbo+16
vtbl.8 d8, {q1}, d4 @ vpshufb %xmm2, %xmm4, %xmm4 # 4 = sbou
vtbl.8 d9, {q1}, d5
vld1.64 {q1}, [r10] @ vmovdqa 0x40(%r11,%r10), %xmm1 # Lk_sr[]
vld1.64 {q1}, [r10] @ vmovdqa 0x40(%r11,%r10), %xmm1 # .Lk_sr[]
@ Write to q2 instead of q0 below, to avoid overlapping table and
@ destination registers.
vtbl.8 d4, {q0}, d6 @ vpshufb %xmm3, %xmm0, %xmm0 # 0 = sb1t
@ -202,15 +190,13 @@ Lenc_entry:
vtbl.8 d0, {q2}, d2 @ vpshufb %xmm1, %xmm0, %xmm0
vtbl.8 d1, {q2}, d3
bx lr
.size _vpaes_encrypt_core,.-_vpaes_encrypt_core
.globl _vpaes_encrypt
.private_extern _vpaes_encrypt
#ifdef __thumb2__
.thumb_func _vpaes_encrypt
#endif
.globl vpaes_encrypt
.hidden vpaes_encrypt
.type vpaes_encrypt,%function
.align 4
_vpaes_encrypt:
vpaes_encrypt:
@ _vpaes_encrypt_core uses r8-r11. Round up to r7-r11 to maintain stack
@ alignment.
stmdb sp!, {r7,r8,r9,r10,r11,lr}
@ -224,33 +210,33 @@ _vpaes_encrypt:
vldmia sp!, {d8,d9,d10,d11}
ldmia sp!, {r7,r8,r9,r10,r11, pc} @ return
.size vpaes_encrypt,.-vpaes_encrypt
@
@ Decryption stuff
@
.type _vpaes_decrypt_consts,%object
.align 4
_vpaes_decrypt_consts:
Lk_dipt:@ decryption input transform
.Lk_dipt:@ decryption input transform
.quad 0x0F505B040B545F00, 0x154A411E114E451A
.quad 0x86E383E660056500, 0x12771772F491F194
Lk_dsbo:@ decryption sbox final output
.Lk_dsbo:@ decryption sbox final output
.quad 0x1387EA537EF94000, 0xC7AA6DB9D4943E2D
.quad 0x12D7560F93441D00, 0xCA4B8159D8C58E9C
Lk_dsb9:@ decryption sbox output *9*u, *9*t
.Lk_dsb9:@ decryption sbox output *9*u, *9*t
.quad 0x851C03539A86D600, 0xCAD51F504F994CC9
.quad 0xC03B1789ECD74900, 0x725E2C9EB2FBA565
Lk_dsbd:@ decryption sbox output *D*u, *D*t
.Lk_dsbd:@ decryption sbox output *D*u, *D*t
.quad 0x7D57CCDFE6B1A200, 0xF56E9B13882A4439
.quad 0x3CE2FAF724C6CB00, 0x2931180D15DEEFD3
Lk_dsbb:@ decryption sbox output *B*u, *B*t
.Lk_dsbb:@ decryption sbox output *B*u, *B*t
.quad 0xD022649296B44200, 0x602646F6B0F2D404
.quad 0xC19498A6CD596700, 0xF3FF0C3E3255AA6B
Lk_dsbe:@ decryption sbox output *E*u, *E*t
.Lk_dsbe:@ decryption sbox output *E*u, *E*t
.quad 0x46F2929626D4D000, 0x2242600464B4F6B0
.quad 0x0C55A6CDFFAAC100, 0x9467F36B98593E32
.size _vpaes_decrypt_consts,.-_vpaes_decrypt_consts
@@
@@ Decryption core
@ -259,9 +245,7 @@ Lk_dsbe:@ decryption sbox output *E*u, *E*t
@@ the values from _vpaes_preheat. q9-q11 must still be set from
@@ _vpaes_preheat.
@@
#ifdef __thumb2__
.thumb_func _vpaes_decrypt_core
#endif
.type _vpaes_decrypt_core,%function
.align 4
_vpaes_decrypt_core:
mov r9, r2
@ -275,22 +259,22 @@ _vpaes_decrypt_core:
@ q12-q15, registers normally use for preloaded constants. This is fine
@ because decryption doesn't use those constants. The values are
@ constant, so this does not interfere with potential 2x optimizations.
adr r7, Lk_dipt
adr r7, .Lk_dipt
vld1.64 {q12,q13}, [r7] @ vmovdqa Lk_dipt(%rip), %xmm2 # iptlo
vld1.64 {q12,q13}, [r7] @ vmovdqa .Lk_dipt(%rip), %xmm2 # iptlo
lsl r11, r8, #4 @ mov %rax, %r11; shl $4, %r11
eor r11, r11, #0x30 @ xor $0x30, %r11
adr r10, Lk_sr
adr r10, .Lk_sr
and r11, r11, #0x30 @ and $0x30, %r11
add r11, r11, r10
adr r10, Lk_mc_forward+48
adr r10, .Lk_mc_forward+48
vld1.64 {q4}, [r9]! @ vmovdqu (%r9), %xmm4 # round0 key
vand q1, q0, q9 @ vpand %xmm9, %xmm0, %xmm1
vshr.u8 q0, q0, #4 @ vpsrlb $4, %xmm0, %xmm0
vtbl.8 d4, {q12}, d2 @ vpshufb %xmm1, %xmm2, %xmm2
vtbl.8 d5, {q12}, d3
vld1.64 {q5}, [r10] @ vmovdqa Lk_mc_forward+48(%rip), %xmm5
vld1.64 {q5}, [r10] @ vmovdqa .Lk_mc_forward+48(%rip), %xmm5
@ vmovdqa .Lk_dipt+16(%rip), %xmm1 # ipthi
vtbl.8 d0, {q13}, d0 @ vpshufb %xmm0, %xmm1, %xmm0
vtbl.8 d1, {q13}, d1
@ -300,17 +284,17 @@ _vpaes_decrypt_core:
@ .Ldec_entry ends with a bnz instruction which is normally paired with
@ subs in .Ldec_loop.
tst r8, r8
b Ldec_entry
b .Ldec_entry
.align 4
Ldec_loop:
.Ldec_loop:
@
@ Inverse mix columns
@
@ We load .Lk_dsb* into q12-q15 on-demand. See the comment at the top of
@ the function.
adr r10, Lk_dsb9
adr r10, .Lk_dsb9
vld1.64 {q12,q13}, [r10]! @ vmovdqa -0x20(%r10),%xmm4 # 4 : sb9u
@ vmovdqa -0x10(%r10),%xmm1 # 0 : sb9t
@ Load sbd* ahead of time.
@ -378,7 +362,7 @@ Ldec_loop:
veor q0, q0, q1 @ vpxor %xmm1, %xmm0, %xmm0 # 0 = ch
subs r8, r8, #1 @ sub $1,%rax # nr--
Ldec_entry:
.Ldec_entry:
@ top of round
vand q1, q0, q9 @ vpand %xmm9, %xmm0, %xmm1 # 0 = k
vshr.u8 q0, q0, #4 @ vpsrlb $4, %xmm0, %xmm0 # 1 = i
@ -398,11 +382,11 @@ Ldec_entry:
veor q2, q2, q1 @ vpxor %xmm1, %xmm2, %xmm2 # 2 = io
veor q3, q3, q0 @ vpxor %xmm0, %xmm3, %xmm3 # 3 = jo
vld1.64 {q0}, [r9]! @ vmovdqu (%r9), %xmm0
bne Ldec_loop
bne .Ldec_loop
@ middle of last round
adr r10, Lk_dsbo
adr r10, .Lk_dsbo
@ Write to q1 rather than q4 to avoid overlapping table and destination.
vld1.64 {q1}, [r10]! @ vmovdqa 0x60(%r10), %xmm4 # 3 : sbou
@ -412,7 +396,7 @@ Ldec_entry:
vld1.64 {q2}, [r10] @ vmovdqa 0x70(%r10), %xmm1 # 0 : sbot
vtbl.8 d2, {q2}, d6 @ vpshufb %xmm3, %xmm1, %xmm1 # 0 = sb1t
vtbl.8 d3, {q2}, d7
vld1.64 {q2}, [r11] @ vmovdqa -0x160(%r11), %xmm2 # Lk_sr-Lk_dsbd=-0x160
vld1.64 {q2}, [r11] @ vmovdqa -0x160(%r11), %xmm2 # .Lk_sr-.Lk_dsbd=-0x160
veor q4, q4, q0 @ vpxor %xmm0, %xmm4, %xmm4 # 4 = sb1u + k
@ Write to q1 rather than q0 so the table and destination registers
@ below do not overlap.
@ -420,15 +404,13 @@ Ldec_entry:
vtbl.8 d0, {q1}, d4 @ vpshufb %xmm2, %xmm0, %xmm0
vtbl.8 d1, {q1}, d5
bx lr
.size _vpaes_decrypt_core,.-_vpaes_decrypt_core
.globl _vpaes_decrypt
.private_extern _vpaes_decrypt
#ifdef __thumb2__
.thumb_func _vpaes_decrypt
#endif
.globl vpaes_decrypt
.hidden vpaes_decrypt
.type vpaes_decrypt,%function
.align 4
_vpaes_decrypt:
vpaes_decrypt:
@ _vpaes_decrypt_core uses r7-r11.
stmdb sp!, {r7,r8,r9,r10,r11,lr}
@ _vpaes_decrypt_core uses q4-q5 (d8-d11), which are callee-saved.
@ -441,7 +423,7 @@ _vpaes_decrypt:
vldmia sp!, {d8,d9,d10,d11}
ldmia sp!, {r7,r8,r9,r10,r11, pc} @ return
.size vpaes_decrypt,.-vpaes_decrypt
@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
@@ @@
@@ AES key schedule @@
@ -460,50 +442,46 @@ _vpaes_decrypt:
@
@ Key schedule constants
@
.type _vpaes_key_consts,%object
.align 4
_vpaes_key_consts:
Lk_dksd:@ decryption key schedule: invskew x*D
.Lk_dksd:@ decryption key schedule: invskew x*D
.quad 0xFEB91A5DA3E44700, 0x0740E3A45A1DBEF9
.quad 0x41C277F4B5368300, 0x5FDC69EAAB289D1E
Lk_dksb:@ decryption key schedule: invskew x*B
.Lk_dksb:@ decryption key schedule: invskew x*B
.quad 0x9A4FCA1F8550D500, 0x03D653861CC94C99
.quad 0x115BEDA7B6FC4A00, 0xD993256F7E3482C8
Lk_dkse:@ decryption key schedule: invskew x*E + 0x63
.Lk_dkse:@ decryption key schedule: invskew x*E + 0x63
.quad 0xD5031CCA1FC9D600, 0x53859A4C994F5086
.quad 0xA23196054FDC7BE8, 0xCD5EF96A20B31487
Lk_dks9:@ decryption key schedule: invskew x*9
.Lk_dks9:@ decryption key schedule: invskew x*9
.quad 0xB6116FC87ED9A700, 0x4AED933482255BFC
.quad 0x4576516227143300, 0x8BB89FACE9DAFDCE
Lk_rcon:@ rcon
.Lk_rcon:@ rcon
.quad 0x1F8391B9AF9DEEB6, 0x702A98084D7C7D81
Lk_opt:@ output transform
.Lk_opt:@ output transform
.quad 0xFF9F4929D6B66000, 0xF7974121DEBE6808
.quad 0x01EDBD5150BCEC00, 0xE10D5DB1B05C0CE0
Lk_deskew:@ deskew tables: inverts the sbox's "skew"
.Lk_deskew:@ deskew tables: inverts the sbox's "skew"
.quad 0x07E4A34047A4E300, 0x1DFEB95A5DBEF91A
.quad 0x5F36B5DC83EA6900, 0x2841C2ABF49D1E77
.size _vpaes_key_consts,.-_vpaes_key_consts
#ifdef __thumb2__
.thumb_func _vpaes_key_preheat
#endif
.type _vpaes_key_preheat,%function
.align 4
_vpaes_key_preheat:
adr r11, Lk_rcon
vmov.i8 q12, #0x5b @ Lk_s63
adr r10, Lk_inv @ Must be aligned to 8 mod 16.
vmov.i8 q9, #0x0f @ Lk_s0F
vld1.64 {q10,q11}, [r10] @ Lk_inv
vld1.64 {q8}, [r11] @ Lk_rcon
adr r11, .Lk_rcon
vmov.i8 q12, #0x5b @ .Lk_s63
adr r10, .Lk_inv @ Must be aligned to 8 mod 16.
vmov.i8 q9, #0x0f @ .Lk_s0F
vld1.64 {q10,q11}, [r10] @ .Lk_inv
vld1.64 {q8}, [r11] @ .Lk_rcon
bx lr
.size _vpaes_key_preheat,.-_vpaes_key_preheat
#ifdef __thumb2__
.thumb_func _vpaes_schedule_core
#endif
.type _vpaes_schedule_core,%function
.align 4
_vpaes_schedule_core:
@ We only need to save lr, but ARM requires an 8-byte stack alignment,
@ -512,7 +490,7 @@ _vpaes_schedule_core:
bl _vpaes_key_preheat @ load the tables
adr r11, Lk_ipt @ Must be aligned to 8 mod 16.
adr r11, .Lk_ipt @ Must be aligned to 8 mod 16.
vld1.64 {q0}, [r0]! @ vmovdqu (%rdi), %xmm0 # load key (unaligned)
@ input transform
@ -520,18 +498,18 @@ _vpaes_schedule_core:
@ overlap table and destination.
vmov q4, q0 @ vmovdqa %xmm0, %xmm3
bl _vpaes_schedule_transform
adr r10, Lk_sr @ Must be aligned to 8 mod 16.
adr r10, .Lk_sr @ Must be aligned to 8 mod 16.
vmov q7, q0 @ vmovdqa %xmm0, %xmm7
add r8, r8, r10
tst r3, r3
bne Lschedule_am_decrypting
bne .Lschedule_am_decrypting
@ encrypting, output zeroth round key after transform
vst1.64 {q0}, [r2] @ vmovdqu %xmm0, (%rdx)
b Lschedule_go
b .Lschedule_go
Lschedule_am_decrypting:
.Lschedule_am_decrypting:
@ decrypting, output zeroth round key after shiftrows
vld1.64 {q1}, [r8] @ vmovdqa (%r8,%r10), %xmm1
vtbl.8 d6, {q4}, d2 @ vpshufb %xmm1, %xmm3, %xmm3
@ -539,10 +517,10 @@ Lschedule_am_decrypting:
vst1.64 {q3}, [r2] @ vmovdqu %xmm3, (%rdx)
eor r8, r8, #0x30 @ xor $0x30, %r8
Lschedule_go:
.Lschedule_go:
cmp r1, #192 @ cmp $192, %esi
bhi Lschedule_256
beq Lschedule_192
bhi .Lschedule_256
beq .Lschedule_192
@ 128: fall though
@@
@ -553,15 +531,15 @@ Lschedule_go:
@@ This schedule is really simple, because all its parts
@@ are accomplished by the subroutines.
@@
Lschedule_128:
.Lschedule_128:
mov r0, #10 @ mov $10, %esi
Loop_schedule_128:
.Loop_schedule_128:
bl _vpaes_schedule_round
subs r0, r0, #1 @ dec %esi
beq Lschedule_mangle_last
beq .Lschedule_mangle_last
bl _vpaes_schedule_mangle @ write output
b Loop_schedule_128
b .Loop_schedule_128
@@
@@ .aes_schedule_192
@ -579,7 +557,7 @@ Loop_schedule_128:
@@ keys.
@@
.align 4
Lschedule_192:
.Lschedule_192:
sub r0, r0, #8
vld1.64 {q0}, [r0] @ vmovdqu 8(%rdi),%xmm0 # load key part 2 (very unaligned)
bl _vpaes_schedule_transform @ input transform
@ -588,7 +566,7 @@ Lschedule_192:
@ vmovhlps %xmm4, %xmm6, %xmm6 # clobber low side with zeros
mov r0, #4 @ mov $4, %esi
Loop_schedule_192:
.Loop_schedule_192:
bl _vpaes_schedule_round
vext.8 q0, q6, q0, #8 @ vpalignr $8,%xmm6,%xmm0,%xmm0
bl _vpaes_schedule_mangle @ save key n
@ -596,10 +574,10 @@ Loop_schedule_192:
bl _vpaes_schedule_mangle @ save key n+1
bl _vpaes_schedule_round
subs r0, r0, #1 @ dec %esi
beq Lschedule_mangle_last
beq .Lschedule_mangle_last
bl _vpaes_schedule_mangle @ save key n+2
bl _vpaes_schedule_192_smear
b Loop_schedule_192
b .Loop_schedule_192
@@
@@ .aes_schedule_256
@ -612,19 +590,19 @@ Loop_schedule_192:
@@ high side's, except no rcon and no rotation.
@@
.align 4
Lschedule_256:
.Lschedule_256:
vld1.64 {q0}, [r0] @ vmovdqu 16(%rdi),%xmm0 # load key part 2 (unaligned)
bl _vpaes_schedule_transform @ input transform
mov r0, #7 @ mov $7, %esi
Loop_schedule_256:
.Loop_schedule_256:
bl _vpaes_schedule_mangle @ output low result
vmov q6, q0 @ vmovdqa %xmm0, %xmm6 # save cur_lo in xmm6
@ high round
bl _vpaes_schedule_round
subs r0, r0, #1 @ dec %esi
beq Lschedule_mangle_last
beq .Lschedule_mangle_last
bl _vpaes_schedule_mangle
@ low round. swap xmm7 and xmm6
@ -635,7 +613,7 @@ Loop_schedule_256:
bl _vpaes_schedule_low_round
vmov q7, q5 @ vmovdqa %xmm5, %xmm7
b Loop_schedule_256
b .Loop_schedule_256
@@
@@ .aes_schedule_mangle_last
@ -648,23 +626,23 @@ Loop_schedule_256:
@@ Always called right before return... jumps to cleanup and exits
@@
.align 4
Lschedule_mangle_last:
.Lschedule_mangle_last:
@ schedule last round key from xmm0
adr r11, Lk_deskew @ lea Lk_deskew(%rip),%r11 # prepare to deskew
adr r11, .Lk_deskew @ lea .Lk_deskew(%rip),%r11 # prepare to deskew
tst r3, r3
bne Lschedule_mangle_last_dec
bne .Lschedule_mangle_last_dec
@ encrypting
vld1.64 {q1}, [r8] @ vmovdqa (%r8,%r10),%xmm1
adr r11, Lk_opt @ lea Lk_opt(%rip), %r11 # prepare to output transform
adr r11, .Lk_opt @ lea .Lk_opt(%rip), %r11 # prepare to output transform
add r2, r2, #32 @ add $32, %rdx
vmov q2, q0
vtbl.8 d0, {q2}, d2 @ vpshufb %xmm1, %xmm0, %xmm0 # output permute
vtbl.8 d1, {q2}, d3
Lschedule_mangle_last_dec:
.Lschedule_mangle_last_dec:
sub r2, r2, #16 @ add $-16, %rdx
veor q0, q0, q12 @ vpxor Lk_s63(%rip), %xmm0, %xmm0
veor q0, q0, q12 @ vpxor .Lk_s63(%rip), %xmm0, %xmm0
bl _vpaes_schedule_transform @ output transform
vst1.64 {q0}, [r2] @ vmovdqu %xmm0, (%rdx) # save last key
@ -678,7 +656,7 @@ Lschedule_mangle_last_dec:
veor q6, q6, q6 @ vpxor %xmm6, %xmm6, %xmm6
veor q7, q7, q7 @ vpxor %xmm7, %xmm7, %xmm7
ldmia sp!, {r3,pc} @ return
.size _vpaes_schedule_core,.-_vpaes_schedule_core
@@
@@ .aes_schedule_192_smear
@ -693,9 +671,7 @@ Lschedule_mangle_last_dec:
@@ q6: b+c+d b+c 0 0
@@ q0: b+c+d b+c b a
@@
#ifdef __thumb2__
.thumb_func _vpaes_schedule_192_smear
#endif
.type _vpaes_schedule_192_smear,%function
.align 4
_vpaes_schedule_192_smear:
vmov.i8 q1, #0
@ -708,7 +684,7 @@ _vpaes_schedule_192_smear:
vmov q0, q6 @ vmovdqa %xmm6, %xmm0
vmov d12, d2 @ vmovhlps %xmm1, %xmm6, %xmm6 # clobber low side with zeros
bx lr
.size _vpaes_schedule_192_smear,.-_vpaes_schedule_192_smear
@@
@@ .aes_schedule_round
@ -728,9 +704,7 @@ _vpaes_schedule_192_smear:
@@ Returns results in q7 = q0.
@@ Clobbers q1-q4, r11.
@@
#ifdef __thumb2__
.thumb_func _vpaes_schedule_round
#endif
.type _vpaes_schedule_round,%function
.align 4
_vpaes_schedule_round:
@ extract rcon from xmm8
@ -749,7 +723,7 @@ _vpaes_schedule_round:
_vpaes_schedule_low_round:
@ The x86_64 version pins .Lk_sb1 in %xmm13 and .Lk_sb1+16 in %xmm12.
@ We pin other values in _vpaes_key_preheat, so load them now.
adr r11, Lk_sb1
adr r11, .Lk_sb1
vld1.64 {q14,q15}, [r11]
@ smear xmm7
@ -769,7 +743,7 @@ _vpaes_schedule_low_round:
veor q3, q3, q2 @ vpxor %xmm2, %xmm3, %xmm3 # 3 = iak = 1/i + a/k
vtbl.8 d8, {q10}, d2 @ vpshufb %xmm1, %xmm10, %xmm4 # 4 = 1/j
vtbl.8 d9, {q10}, d3
veor q7, q7, q12 @ vpxor Lk_s63(%rip), %xmm7, %xmm7
veor q7, q7, q12 @ vpxor .Lk_s63(%rip), %xmm7, %xmm7
vtbl.8 d6, {q10}, d6 @ vpshufb %xmm3, %xmm10, %xmm3 # 2 = 1/iak
vtbl.8 d7, {q10}, d7
veor q4, q4, q2 @ vpxor %xmm2, %xmm4, %xmm4 # 4 = jak = 1/j + a/k
@ -787,7 +761,7 @@ _vpaes_schedule_low_round:
veor q0, q1, q7 @ vpxor %xmm7, %xmm1, %xmm0
veor q7, q1, q7 @ vmovdqa %xmm0, %xmm7
bx lr
.size _vpaes_schedule_round,.-_vpaes_schedule_round
@@
@@ .aes_schedule_transform
@ -798,9 +772,7 @@ _vpaes_schedule_low_round:
@@ Output in q0
@@ Clobbers q1, q2, q14, q15
@@
#ifdef __thumb2__
.thumb_func _vpaes_schedule_transform
#endif
.type _vpaes_schedule_transform,%function
.align 4
_vpaes_schedule_transform:
vld1.64 {q14,q15}, [r11] @ vmovdqa (%r11), %xmm2 # lo
@ -813,7 +785,7 @@ _vpaes_schedule_transform:
vtbl.8 d1, {q15}, d1
veor q0, q0, q2 @ vpxor %xmm2, %xmm0, %xmm0
bx lr
.size _vpaes_schedule_transform,.-_vpaes_schedule_transform
@@
@@ .aes_schedule_mangle
@ -838,20 +810,18 @@ _vpaes_schedule_transform:
@@ Preserves q0
@@ Clobbers q1-q5
@@
#ifdef __thumb2__
.thumb_func _vpaes_schedule_mangle
#endif
.type _vpaes_schedule_mangle,%function
.align 4
_vpaes_schedule_mangle:
tst r3, r3
vmov q4, q0 @ vmovdqa %xmm0, %xmm4 # save xmm0 for later
adr r11, Lk_mc_forward @ Must be aligned to 8 mod 16.
vld1.64 {q5}, [r11] @ vmovdqa Lk_mc_forward(%rip),%xmm5
bne Lschedule_mangle_dec
adr r11, .Lk_mc_forward @ Must be aligned to 8 mod 16.
vld1.64 {q5}, [r11] @ vmovdqa .Lk_mc_forward(%rip),%xmm5
bne .Lschedule_mangle_dec
@ encrypting
@ Write to q2 so we do not overlap table and destination below.
veor q2, q0, q12 @ vpxor Lk_s63(%rip), %xmm0, %xmm4
veor q2, q0, q12 @ vpxor .Lk_s63(%rip), %xmm0, %xmm4
add r2, r2, #16 @ add $16, %rdx
vtbl.8 d8, {q2}, d10 @ vpshufb %xmm5, %xmm4, %xmm4
vtbl.8 d9, {q2}, d11
@ -863,11 +833,11 @@ _vpaes_schedule_mangle:
vld1.64 {q1}, [r8] @ vmovdqa (%r8,%r10), %xmm1
veor q3, q3, q4 @ vpxor %xmm4, %xmm3, %xmm3
b Lschedule_mangle_both
b .Lschedule_mangle_both
.align 4
Lschedule_mangle_dec:
.Lschedule_mangle_dec:
@ inverse mix columns
adr r11, Lk_dksd @ lea Lk_dksd(%rip),%r11
adr r11, .Lk_dksd @ lea .Lk_dksd(%rip),%r11
vshr.u8 q1, q4, #4 @ vpsrlb $4, %xmm4, %xmm1 # 1 = hi
vand q4, q4, q9 @ vpand %xmm9, %xmm4, %xmm4 # 4 = lo
@ -921,7 +891,7 @@ Lschedule_mangle_dec:
sub r2, r2, #16 @ add $-16, %rdx
Lschedule_mangle_both:
.Lschedule_mangle_both:
@ Write to q2 so table and destination do not overlap.
vtbl.8 d4, {q3}, d2 @ vpshufb %xmm1, %xmm3, %xmm3
vtbl.8 d5, {q3}, d3
@ -929,15 +899,13 @@ Lschedule_mangle_both:
and r8, r8, #~(1<<6) @ and $0x30, %r8
vst1.64 {q2}, [r2] @ vmovdqu %xmm3, (%rdx)
bx lr
.size _vpaes_schedule_mangle,.-_vpaes_schedule_mangle
.globl _vpaes_set_encrypt_key
.private_extern _vpaes_set_encrypt_key
#ifdef __thumb2__
.thumb_func _vpaes_set_encrypt_key
#endif
.globl vpaes_set_encrypt_key
.hidden vpaes_set_encrypt_key
.type vpaes_set_encrypt_key,%function
.align 4
_vpaes_set_encrypt_key:
vpaes_set_encrypt_key:
stmdb sp!, {r7,r8,r9,r10,r11, lr}
vstmdb sp!, {d8,d9,d10,d11,d12,d13,d14,d15}
@ -952,15 +920,13 @@ _vpaes_set_encrypt_key:
vldmia sp!, {d8,d9,d10,d11,d12,d13,d14,d15}
ldmia sp!, {r7,r8,r9,r10,r11, pc} @ return
.size vpaes_set_encrypt_key,.-vpaes_set_encrypt_key
.globl _vpaes_set_decrypt_key
.private_extern _vpaes_set_decrypt_key
#ifdef __thumb2__
.thumb_func _vpaes_set_decrypt_key
#endif
.globl vpaes_set_decrypt_key
.hidden vpaes_set_decrypt_key
.type vpaes_set_decrypt_key,%function
.align 4
_vpaes_set_decrypt_key:
vpaes_set_decrypt_key:
stmdb sp!, {r7,r8,r9,r10,r11, lr}
vstmdb sp!, {d8,d9,d10,d11,d12,d13,d14,d15}
@ -979,10 +945,10 @@ _vpaes_set_decrypt_key:
vldmia sp!, {d8,d9,d10,d11,d12,d13,d14,d15}
ldmia sp!, {r7,r8,r9,r10,r11, pc} @ return
.size vpaes_set_decrypt_key,.-vpaes_set_decrypt_key
@ Additional constants for converting to bsaes.
.type _vpaes_convert_consts,%object
.align 4
_vpaes_convert_consts:
@ .Lk_opt_then_skew applies skew(opt(x)) XOR 0x63, where skew is the linear
@ -1016,25 +982,23 @@ _vpaes_convert_consts:
@ table[1] |= skew(opt(i<<4)) << (i*8)
@ print(" .quad 0x%016x, 0x%016x" % u128_to_u64s(table[0]))
@ print(" .quad 0x%016x, 0x%016x" % u128_to_u64s(table[1]))
Lk_opt_then_skew:
.Lk_opt_then_skew:
.quad 0x9cb8436798bc4763, 0x6440bb9f6044bf9b
.quad 0x1f30062936192f00, 0xb49bad829db284ab
@ .Lk_decrypt_transform is a permutation which performs an 8-bit left-rotation
@ followed by a byte-swap on each 32-bit word of a vector. E.g., 0x11223344
@ becomes 0x22334411 and then 0x11443322.
Lk_decrypt_transform:
.Lk_decrypt_transform:
.quad 0x0704050603000102, 0x0f0c0d0e0b08090a
.size _vpaes_convert_consts,.-_vpaes_convert_consts
@ void vpaes_encrypt_key_to_bsaes(AES_KEY *bsaes, const AES_KEY *vpaes);
.globl _vpaes_encrypt_key_to_bsaes
.private_extern _vpaes_encrypt_key_to_bsaes
#ifdef __thumb2__
.thumb_func _vpaes_encrypt_key_to_bsaes
#endif
.globl vpaes_encrypt_key_to_bsaes
.hidden vpaes_encrypt_key_to_bsaes
.type vpaes_encrypt_key_to_bsaes,%function
.align 4
_vpaes_encrypt_key_to_bsaes:
vpaes_encrypt_key_to_bsaes:
stmdb sp!, {r11, lr}
@ See _vpaes_schedule_core for the key schedule logic. In particular,
@ -1050,13 +1014,13 @@ _vpaes_encrypt_key_to_bsaes:
@ cost of extra REV and VREV32 operations in little-endian ARM.
vmov.i8 q9, #0x0f @ Required by _vpaes_schedule_transform
adr r2, Lk_mc_forward @ Must be aligned to 8 mod 16.
add r3, r2, 0x90 @ Lk_sr+0x10-Lk_mc_forward = 0x90 (Apple's toolchain doesn't support the expression)
adr r2, .Lk_mc_forward @ Must be aligned to 8 mod 16.
add r3, r2, 0x90 @ .Lk_sr+0x10-.Lk_mc_forward = 0x90 (Apple's toolchain doesn't support the expression)
vld1.64 {q12}, [r2]
vmov.i8 q10, #0x5b @ Lk_s63 from vpaes-x86_64
adr r11, Lk_opt @ Must be aligned to 8 mod 16.
vmov.i8 q11, #0x63 @ LK_s63 without Lk_ipt applied
vmov.i8 q10, #0x5b @ .Lk_s63 from vpaes-x86_64
adr r11, .Lk_opt @ Must be aligned to 8 mod 16.
vmov.i8 q11, #0x63 @ .LK_s63 without .Lk_ipt applied
@ vpaes stores one fewer round count than bsaes, but the number of keys
@ is the same.
@ -1074,7 +1038,7 @@ _vpaes_encrypt_key_to_bsaes:
@ The middle keys have _vpaes_schedule_transform(.Lk_ipt) applied,
@ followed by _vpaes_schedule_mangle. _vpaes_schedule_mangle XORs 0x63,
@ multiplies by the circulant 0,1,1,1, then applies ShiftRows.
Loop_enc_key_to_bsaes:
.Loop_enc_key_to_bsaes:
vld1.64 {q0}, [r1]!
@ Invert the ShiftRows step (see .Lschedule_mangle_both). Note we cycle
@ -1089,7 +1053,7 @@ Loop_enc_key_to_bsaes:
@ Handle the last key differently.
subs r2, r2, #1
beq Loop_enc_key_to_bsaes_last
beq .Loop_enc_key_to_bsaes_last
@ Multiply by the circulant. This is its own inverse.
vtbl.8 d2, {q0}, d24
@ -1107,9 +1071,9 @@ Loop_enc_key_to_bsaes:
bl _vpaes_schedule_transform
vrev32.8 q0, q0
vst1.64 {q0}, [r0]!
b Loop_enc_key_to_bsaes
b .Loop_enc_key_to_bsaes
Loop_enc_key_to_bsaes_last:
.Loop_enc_key_to_bsaes_last:
@ The final key does not have a basis transform (note
@ .Lschedule_mangle_last inverts the original transform). It only XORs
@ 0x63 and applies ShiftRows. The latter was already inverted in the
@ -1125,16 +1089,14 @@ Loop_enc_key_to_bsaes_last:
veor q2, q2, q2
ldmia sp!, {r11, pc} @ return
.size vpaes_encrypt_key_to_bsaes,.-vpaes_encrypt_key_to_bsaes
@ void vpaes_decrypt_key_to_bsaes(AES_KEY *vpaes, const AES_KEY *bsaes);
.globl _vpaes_decrypt_key_to_bsaes
.private_extern _vpaes_decrypt_key_to_bsaes
#ifdef __thumb2__
.thumb_func _vpaes_decrypt_key_to_bsaes
#endif
.globl vpaes_decrypt_key_to_bsaes
.hidden vpaes_decrypt_key_to_bsaes
.type vpaes_decrypt_key_to_bsaes,%function
.align 4
_vpaes_decrypt_key_to_bsaes:
vpaes_decrypt_key_to_bsaes:
stmdb sp!, {r11, lr}
@ See _vpaes_schedule_core for the key schedule logic. Note vpaes
@ -1151,9 +1113,9 @@ _vpaes_decrypt_key_to_bsaes:
@ byteswapped, as a convenience for (unsupported) big-endian ARM, at the
@ cost of extra REV and VREV32 operations in little-endian ARM.
adr r2, Lk_decrypt_transform
adr r3, Lk_sr+0x30
adr r11, Lk_opt_then_skew @ Input to _vpaes_schedule_transform.
adr r2, .Lk_decrypt_transform
adr r3, .Lk_sr+0x30
adr r11, .Lk_opt_then_skew @ Input to _vpaes_schedule_transform.
vld1.64 {q12}, [r2] @ Reuse q12 from encryption.
vmov.i8 q9, #0x0f @ Required by _vpaes_schedule_transform
@ -1173,7 +1135,7 @@ _vpaes_decrypt_key_to_bsaes:
@ See _vpaes_schedule_mangle for the transform on the middle keys. Note
@ it simultaneously inverts MixColumns and the S-box affine transform.
@ See .Lk_dksd through .Lk_dks9.
Loop_dec_key_to_bsaes:
.Loop_dec_key_to_bsaes:
vld1.64 {q0}, [r1]!
@ Invert the ShiftRows step (see .Lschedule_mangle_both). Note going
@ -1188,7 +1150,7 @@ Loop_dec_key_to_bsaes:
@ Handle the last key differently.
subs r2, r2, #1
beq Loop_dec_key_to_bsaes_last
beq .Loop_dec_key_to_bsaes_last
@ Undo the basis change and reapply the S-box affine transform.
bl _vpaes_schedule_transform
@ -1201,9 +1163,9 @@ Loop_dec_key_to_bsaes:
vtbl.8 d3, {q0}, d25
vst1.64 {q1}, [r0]!
b Loop_dec_key_to_bsaes
b .Loop_dec_key_to_bsaes
Loop_dec_key_to_bsaes_last:
.Loop_dec_key_to_bsaes_last:
@ The final key only inverts ShiftRows (already done in the loop). See
@ .Lschedule_am_decrypting. Its basis is not transformed.
vrev32.8 q0, q0
@ -1215,14 +1177,12 @@ Loop_dec_key_to_bsaes_last:
veor q2, q2, q2
ldmia sp!, {r11, pc} @ return
.globl _vpaes_ctr32_encrypt_blocks
.private_extern _vpaes_ctr32_encrypt_blocks
#ifdef __thumb2__
.thumb_func _vpaes_ctr32_encrypt_blocks
#endif
.size vpaes_decrypt_key_to_bsaes,.-vpaes_decrypt_key_to_bsaes
.globl vpaes_ctr32_encrypt_blocks
.hidden vpaes_ctr32_encrypt_blocks
.type vpaes_ctr32_encrypt_blocks,%function
.align 4
_vpaes_ctr32_encrypt_blocks:
vpaes_ctr32_encrypt_blocks:
mov ip, sp
stmdb sp!, {r7,r8,r9,r10,r11, lr}
@ This function uses q4-q7 (d8-d15), which are callee-saved.
@ -1231,7 +1191,7 @@ _vpaes_ctr32_encrypt_blocks:
cmp r2, #0
@ r8 is passed on the stack.
ldr r8, [ip]
beq Lctr32_done
beq .Lctr32_done
@ _vpaes_encrypt_core expects the key in r2, so swap r2 and r3.
mov r9, r3
@ -1245,9 +1205,9 @@ _vpaes_ctr32_encrypt_blocks:
bl _vpaes_preheat
rev r7, r7 @ The counter is big-endian.
Lctr32_loop:
.Lctr32_loop:
vmov q0, q7
vld1.8 {q6}, [r0]! @ Load input ahead of time
vld1.8 {q6}, [r0]! @ .Load input ahead of time
bl _vpaes_encrypt_core
veor q0, q0, q6 @ XOR input and result
vst1.8 {q0}, [r1]!
@ -1256,10 +1216,10 @@ Lctr32_loop:
add r7, r7, #1
rev r9, r7
vmov.32 d15[1], r9
bne Lctr32_loop
bne .Lctr32_loop
Lctr32_done:
.Lctr32_done:
vldmia sp!, {d8,d9,d10,d11,d12,d13,d14,d15}
ldmia sp!, {r7,r8,r9,r10,r11, pc} @ return
#endif // !OPENSSL_NO_ASM
.size vpaes_ctr32_encrypt_blocks,.-vpaes_ctr32_encrypt_blocks
#endif // !OPENSSL_NO_ASM && defined(OPENSSL_ARM) && defined(__ELF__)

View File

@ -0,0 +1,368 @@
// This file is generated from a similarly-named Perl script in the BoringSSL
// source tree. Do not edit by hand.
#include <openssl/asm_base.h>
#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_ARM) && defined(__ELF__)
.syntax unified
.arch armv7-a
.fpu vfp
.text
@ abi_test_trampoline loads callee-saved registers from |state|, calls |func|
@ with |argv|, then saves the callee-saved registers into |state|. It returns
@ the result of |func|. The |unwind| argument is unused.
@ uint32_t abi_test_trampoline(void (*func)(...), CallerState *state,
@ const uint32_t *argv, size_t argc,
@ int unwind);
.type abi_test_trampoline, %function
.globl abi_test_trampoline
.hidden abi_test_trampoline
.align 4
abi_test_trampoline:
@ Save parameters and all callee-saved registers. For convenience, we
@ save r9 on iOS even though it's volatile.
vstmdb sp!, {d8,d9,d10,d11,d12,d13,d14,d15}
stmdb sp!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11,lr}
@ Reserve stack space for six (10-4) stack parameters, plus an extra 4
@ bytes to keep it 8-byte-aligned (see AAPCS, section 5.3).
sub sp, sp, #28
@ Every register in AAPCS is either non-volatile or a parameter (except
@ r9 on iOS), so this code, by the actual call, loses all its scratch
@ registers. First fill in stack parameters while there are registers
@ to spare.
cmp r3, #4
bls .Lstack_args_done
mov r4, sp @ r4 is the output pointer.
add r5, r2, r3, lsl #2 @ Set r5 to the end of argv.
add r2, r2, #16 @ Skip four arguments.
.Lstack_args_loop:
ldr r6, [r2], #4
cmp r2, r5
str r6, [r4], #4
bne .Lstack_args_loop
.Lstack_args_done:
@ Load registers from |r1|.
vldmia r1!, {d8,d9,d10,d11,d12,d13,d14,d15}
#if defined(__APPLE__)
@ r9 is not volatile on iOS.
ldmia r1!, {r4,r5,r6,r7,r8,r10-r11}
#else
ldmia r1!, {r4,r5,r6,r7,r8,r9,r10,r11}
#endif
@ Load register parameters. This uses up our remaining registers, so we
@ repurpose lr as scratch space.
ldr r3, [sp, #40] @ Reload argc.
ldr lr, [sp, #36] @ .Load argv into lr.
cmp r3, #3
bhi .Larg_r3
beq .Larg_r2
cmp r3, #1
bhi .Larg_r1
beq .Larg_r0
b .Largs_done
.Larg_r3:
ldr r3, [lr, #12] @ argv[3]
.Larg_r2:
ldr r2, [lr, #8] @ argv[2]
.Larg_r1:
ldr r1, [lr, #4] @ argv[1]
.Larg_r0:
ldr r0, [lr] @ argv[0]
.Largs_done:
@ With every other register in use, load the function pointer into lr
@ and call the function.
ldr lr, [sp, #28]
blx lr
@ r1-r3 are free for use again. The trampoline only supports
@ single-return functions. Pass r4-r11 to the caller.
ldr r1, [sp, #32]
vstmia r1!, {d8,d9,d10,d11,d12,d13,d14,d15}
#if defined(__APPLE__)
@ r9 is not volatile on iOS.
stmia r1!, {r4,r5,r6,r7,r8,r10-r11}
#else
stmia r1!, {r4,r5,r6,r7,r8,r9,r10,r11}
#endif
@ Unwind the stack and restore registers.
add sp, sp, #44 @ 44 = 28+16
ldmia sp!, {r4,r5,r6,r7,r8,r9,r10,r11,lr} @ Skip r0-r3 (see +16 above).
vldmia sp!, {d8,d9,d10,d11,d12,d13,d14,d15}
bx lr
.size abi_test_trampoline,.-abi_test_trampoline
.type abi_test_clobber_r0, %function
.globl abi_test_clobber_r0
.hidden abi_test_clobber_r0
.align 4
abi_test_clobber_r0:
mov r0, #0
bx lr
.size abi_test_clobber_r0,.-abi_test_clobber_r0
.type abi_test_clobber_r1, %function
.globl abi_test_clobber_r1
.hidden abi_test_clobber_r1
.align 4
abi_test_clobber_r1:
mov r1, #0
bx lr
.size abi_test_clobber_r1,.-abi_test_clobber_r1
.type abi_test_clobber_r2, %function
.globl abi_test_clobber_r2
.hidden abi_test_clobber_r2
.align 4
abi_test_clobber_r2:
mov r2, #0
bx lr
.size abi_test_clobber_r2,.-abi_test_clobber_r2
.type abi_test_clobber_r3, %function
.globl abi_test_clobber_r3
.hidden abi_test_clobber_r3
.align 4
abi_test_clobber_r3:
mov r3, #0
bx lr
.size abi_test_clobber_r3,.-abi_test_clobber_r3
.type abi_test_clobber_r4, %function
.globl abi_test_clobber_r4
.hidden abi_test_clobber_r4
.align 4
abi_test_clobber_r4:
mov r4, #0
bx lr
.size abi_test_clobber_r4,.-abi_test_clobber_r4
.type abi_test_clobber_r5, %function
.globl abi_test_clobber_r5
.hidden abi_test_clobber_r5
.align 4
abi_test_clobber_r5:
mov r5, #0
bx lr
.size abi_test_clobber_r5,.-abi_test_clobber_r5
.type abi_test_clobber_r6, %function
.globl abi_test_clobber_r6
.hidden abi_test_clobber_r6
.align 4
abi_test_clobber_r6:
mov r6, #0
bx lr
.size abi_test_clobber_r6,.-abi_test_clobber_r6
.type abi_test_clobber_r7, %function
.globl abi_test_clobber_r7
.hidden abi_test_clobber_r7
.align 4
abi_test_clobber_r7:
mov r7, #0
bx lr
.size abi_test_clobber_r7,.-abi_test_clobber_r7
.type abi_test_clobber_r8, %function
.globl abi_test_clobber_r8
.hidden abi_test_clobber_r8
.align 4
abi_test_clobber_r8:
mov r8, #0
bx lr
.size abi_test_clobber_r8,.-abi_test_clobber_r8
.type abi_test_clobber_r9, %function
.globl abi_test_clobber_r9
.hidden abi_test_clobber_r9
.align 4
abi_test_clobber_r9:
mov r9, #0
bx lr
.size abi_test_clobber_r9,.-abi_test_clobber_r9
.type abi_test_clobber_r10, %function
.globl abi_test_clobber_r10
.hidden abi_test_clobber_r10
.align 4
abi_test_clobber_r10:
mov r10, #0
bx lr
.size abi_test_clobber_r10,.-abi_test_clobber_r10
.type abi_test_clobber_r11, %function
.globl abi_test_clobber_r11
.hidden abi_test_clobber_r11
.align 4
abi_test_clobber_r11:
mov r11, #0
bx lr
.size abi_test_clobber_r11,.-abi_test_clobber_r11
.type abi_test_clobber_r12, %function
.globl abi_test_clobber_r12
.hidden abi_test_clobber_r12
.align 4
abi_test_clobber_r12:
mov r12, #0
bx lr
.size abi_test_clobber_r12,.-abi_test_clobber_r12
.type abi_test_clobber_d0, %function
.globl abi_test_clobber_d0
.hidden abi_test_clobber_d0
.align 4
abi_test_clobber_d0:
mov r0, #0
vmov s0, r0
vmov s1, r0
bx lr
.size abi_test_clobber_d0,.-abi_test_clobber_d0
.type abi_test_clobber_d1, %function
.globl abi_test_clobber_d1
.hidden abi_test_clobber_d1
.align 4
abi_test_clobber_d1:
mov r0, #0
vmov s2, r0
vmov s3, r0
bx lr
.size abi_test_clobber_d1,.-abi_test_clobber_d1
.type abi_test_clobber_d2, %function
.globl abi_test_clobber_d2
.hidden abi_test_clobber_d2
.align 4
abi_test_clobber_d2:
mov r0, #0
vmov s4, r0
vmov s5, r0
bx lr
.size abi_test_clobber_d2,.-abi_test_clobber_d2
.type abi_test_clobber_d3, %function
.globl abi_test_clobber_d3
.hidden abi_test_clobber_d3
.align 4
abi_test_clobber_d3:
mov r0, #0
vmov s6, r0
vmov s7, r0
bx lr
.size abi_test_clobber_d3,.-abi_test_clobber_d3
.type abi_test_clobber_d4, %function
.globl abi_test_clobber_d4
.hidden abi_test_clobber_d4
.align 4
abi_test_clobber_d4:
mov r0, #0
vmov s8, r0
vmov s9, r0
bx lr
.size abi_test_clobber_d4,.-abi_test_clobber_d4
.type abi_test_clobber_d5, %function
.globl abi_test_clobber_d5
.hidden abi_test_clobber_d5
.align 4
abi_test_clobber_d5:
mov r0, #0
vmov s10, r0
vmov s11, r0
bx lr
.size abi_test_clobber_d5,.-abi_test_clobber_d5
.type abi_test_clobber_d6, %function
.globl abi_test_clobber_d6
.hidden abi_test_clobber_d6
.align 4
abi_test_clobber_d6:
mov r0, #0
vmov s12, r0
vmov s13, r0
bx lr
.size abi_test_clobber_d6,.-abi_test_clobber_d6
.type abi_test_clobber_d7, %function
.globl abi_test_clobber_d7
.hidden abi_test_clobber_d7
.align 4
abi_test_clobber_d7:
mov r0, #0
vmov s14, r0
vmov s15, r0
bx lr
.size abi_test_clobber_d7,.-abi_test_clobber_d7
.type abi_test_clobber_d8, %function
.globl abi_test_clobber_d8
.hidden abi_test_clobber_d8
.align 4
abi_test_clobber_d8:
mov r0, #0
vmov s16, r0
vmov s17, r0
bx lr
.size abi_test_clobber_d8,.-abi_test_clobber_d8
.type abi_test_clobber_d9, %function
.globl abi_test_clobber_d9
.hidden abi_test_clobber_d9
.align 4
abi_test_clobber_d9:
mov r0, #0
vmov s18, r0
vmov s19, r0
bx lr
.size abi_test_clobber_d9,.-abi_test_clobber_d9
.type abi_test_clobber_d10, %function
.globl abi_test_clobber_d10
.hidden abi_test_clobber_d10
.align 4
abi_test_clobber_d10:
mov r0, #0
vmov s20, r0
vmov s21, r0
bx lr
.size abi_test_clobber_d10,.-abi_test_clobber_d10
.type abi_test_clobber_d11, %function
.globl abi_test_clobber_d11
.hidden abi_test_clobber_d11
.align 4
abi_test_clobber_d11:
mov r0, #0
vmov s22, r0
vmov s23, r0
bx lr
.size abi_test_clobber_d11,.-abi_test_clobber_d11
.type abi_test_clobber_d12, %function
.globl abi_test_clobber_d12
.hidden abi_test_clobber_d12
.align 4
abi_test_clobber_d12:
mov r0, #0
vmov s24, r0
vmov s25, r0
bx lr
.size abi_test_clobber_d12,.-abi_test_clobber_d12
.type abi_test_clobber_d13, %function
.globl abi_test_clobber_d13
.hidden abi_test_clobber_d13
.align 4
abi_test_clobber_d13:
mov r0, #0
vmov s26, r0
vmov s27, r0
bx lr
.size abi_test_clobber_d13,.-abi_test_clobber_d13
.type abi_test_clobber_d14, %function
.globl abi_test_clobber_d14
.hidden abi_test_clobber_d14
.align 4
abi_test_clobber_d14:
mov r0, #0
vmov s28, r0
vmov s29, r0
bx lr
.size abi_test_clobber_d14,.-abi_test_clobber_d14
.type abi_test_clobber_d15, %function
.globl abi_test_clobber_d15
.hidden abi_test_clobber_d15
.align 4
abi_test_clobber_d15:
mov r0, #0
vmov s30, r0
vmov s31, r0
bx lr
.size abi_test_clobber_d15,.-abi_test_clobber_d15
#endif // !OPENSSL_NO_ASM && defined(OPENSSL_ARM) && defined(__ELF__)

View File

@ -0,0 +1,961 @@
// This file is generated from a similarly-named Perl script in the BoringSSL
// source tree. Do not edit by hand.
#include <openssl/asm_base.h>
#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) && defined(__ELF__)
.text
.globl ChaCha20_ctr32_nohw
.hidden ChaCha20_ctr32_nohw
.type ChaCha20_ctr32_nohw,@function
.align 16
ChaCha20_ctr32_nohw:
.L_ChaCha20_ctr32_nohw_begin:
pushl %ebp
pushl %ebx
pushl %esi
pushl %edi
movl 32(%esp),%esi
movl 36(%esp),%edi
subl $132,%esp
movl (%esi),%eax
movl 4(%esi),%ebx
movl 8(%esi),%ecx
movl 12(%esi),%edx
movl %eax,80(%esp)
movl %ebx,84(%esp)
movl %ecx,88(%esp)
movl %edx,92(%esp)
movl 16(%esi),%eax
movl 20(%esi),%ebx
movl 24(%esi),%ecx
movl 28(%esi),%edx
movl %eax,96(%esp)
movl %ebx,100(%esp)
movl %ecx,104(%esp)
movl %edx,108(%esp)
movl (%edi),%eax
movl 4(%edi),%ebx
movl 8(%edi),%ecx
movl 12(%edi),%edx
subl $1,%eax
movl %eax,112(%esp)
movl %ebx,116(%esp)
movl %ecx,120(%esp)
movl %edx,124(%esp)
jmp .L000entry
.align 16
.L001outer_loop:
movl %ebx,156(%esp)
movl %eax,152(%esp)
movl %ecx,160(%esp)
.L000entry:
movl $1634760805,%eax
movl $857760878,4(%esp)
movl $2036477234,8(%esp)
movl $1797285236,12(%esp)
movl 84(%esp),%ebx
movl 88(%esp),%ebp
movl 104(%esp),%ecx
movl 108(%esp),%esi
movl 116(%esp),%edx
movl 120(%esp),%edi
movl %ebx,20(%esp)
movl %ebp,24(%esp)
movl %ecx,40(%esp)
movl %esi,44(%esp)
movl %edx,52(%esp)
movl %edi,56(%esp)
movl 92(%esp),%ebx
movl 124(%esp),%edi
movl 112(%esp),%edx
movl 80(%esp),%ebp
movl 96(%esp),%ecx
movl 100(%esp),%esi
addl $1,%edx
movl %ebx,28(%esp)
movl %edi,60(%esp)
movl %edx,112(%esp)
movl $10,%ebx
jmp .L002loop
.align 16
.L002loop:
addl %ebp,%eax
movl %ebx,128(%esp)
movl %ebp,%ebx
xorl %eax,%edx
roll $16,%edx
addl %edx,%ecx
xorl %ecx,%ebx
movl 52(%esp),%edi
roll $12,%ebx
movl 20(%esp),%ebp
addl %ebx,%eax
xorl %eax,%edx
movl %eax,(%esp)
roll $8,%edx
movl 4(%esp),%eax
addl %edx,%ecx
movl %edx,48(%esp)
xorl %ecx,%ebx
addl %ebp,%eax
roll $7,%ebx
xorl %eax,%edi
movl %ecx,32(%esp)
roll $16,%edi
movl %ebx,16(%esp)
addl %edi,%esi
movl 40(%esp),%ecx
xorl %esi,%ebp
movl 56(%esp),%edx
roll $12,%ebp
movl 24(%esp),%ebx
addl %ebp,%eax
xorl %eax,%edi
movl %eax,4(%esp)
roll $8,%edi
movl 8(%esp),%eax
addl %edi,%esi
movl %edi,52(%esp)
xorl %esi,%ebp
addl %ebx,%eax
roll $7,%ebp
xorl %eax,%edx
movl %esi,36(%esp)
roll $16,%edx
movl %ebp,20(%esp)
addl %edx,%ecx
movl 44(%esp),%esi
xorl %ecx,%ebx
movl 60(%esp),%edi
roll $12,%ebx
movl 28(%esp),%ebp
addl %ebx,%eax
xorl %eax,%edx
movl %eax,8(%esp)
roll $8,%edx
movl 12(%esp),%eax
addl %edx,%ecx
movl %edx,56(%esp)
xorl %ecx,%ebx
addl %ebp,%eax
roll $7,%ebx
xorl %eax,%edi
roll $16,%edi
movl %ebx,24(%esp)
addl %edi,%esi
xorl %esi,%ebp
roll $12,%ebp
movl 20(%esp),%ebx
addl %ebp,%eax
xorl %eax,%edi
movl %eax,12(%esp)
roll $8,%edi
movl (%esp),%eax
addl %edi,%esi
movl %edi,%edx
xorl %esi,%ebp
addl %ebx,%eax
roll $7,%ebp
xorl %eax,%edx
roll $16,%edx
movl %ebp,28(%esp)
addl %edx,%ecx
xorl %ecx,%ebx
movl 48(%esp),%edi
roll $12,%ebx
movl 24(%esp),%ebp
addl %ebx,%eax
xorl %eax,%edx
movl %eax,(%esp)
roll $8,%edx
movl 4(%esp),%eax
addl %edx,%ecx
movl %edx,60(%esp)
xorl %ecx,%ebx
addl %ebp,%eax
roll $7,%ebx
xorl %eax,%edi
movl %ecx,40(%esp)
roll $16,%edi
movl %ebx,20(%esp)
addl %edi,%esi
movl 32(%esp),%ecx
xorl %esi,%ebp
movl 52(%esp),%edx
roll $12,%ebp
movl 28(%esp),%ebx
addl %ebp,%eax
xorl %eax,%edi
movl %eax,4(%esp)
roll $8,%edi
movl 8(%esp),%eax
addl %edi,%esi
movl %edi,48(%esp)
xorl %esi,%ebp
addl %ebx,%eax
roll $7,%ebp
xorl %eax,%edx
movl %esi,44(%esp)
roll $16,%edx
movl %ebp,24(%esp)
addl %edx,%ecx
movl 36(%esp),%esi
xorl %ecx,%ebx
movl 56(%esp),%edi
roll $12,%ebx
movl 16(%esp),%ebp
addl %ebx,%eax
xorl %eax,%edx
movl %eax,8(%esp)
roll $8,%edx
movl 12(%esp),%eax
addl %edx,%ecx
movl %edx,52(%esp)
xorl %ecx,%ebx
addl %ebp,%eax
roll $7,%ebx
xorl %eax,%edi
roll $16,%edi
movl %ebx,28(%esp)
addl %edi,%esi
xorl %esi,%ebp
movl 48(%esp),%edx
roll $12,%ebp
movl 128(%esp),%ebx
addl %ebp,%eax
xorl %eax,%edi
movl %eax,12(%esp)
roll $8,%edi
movl (%esp),%eax
addl %edi,%esi
movl %edi,56(%esp)
xorl %esi,%ebp
roll $7,%ebp
decl %ebx
jnz .L002loop
movl 160(%esp),%ebx
addl $1634760805,%eax
addl 80(%esp),%ebp
addl 96(%esp),%ecx
addl 100(%esp),%esi
cmpl $64,%ebx
jb .L003tail
movl 156(%esp),%ebx
addl 112(%esp),%edx
addl 120(%esp),%edi
xorl (%ebx),%eax
xorl 16(%ebx),%ebp
movl %eax,(%esp)
movl 152(%esp),%eax
xorl 32(%ebx),%ecx
xorl 36(%ebx),%esi
xorl 48(%ebx),%edx
xorl 56(%ebx),%edi
movl %ebp,16(%eax)
movl %ecx,32(%eax)
movl %esi,36(%eax)
movl %edx,48(%eax)
movl %edi,56(%eax)
movl 4(%esp),%ebp
movl 8(%esp),%ecx
movl 12(%esp),%esi
movl 20(%esp),%edx
movl 24(%esp),%edi
addl $857760878,%ebp
addl $2036477234,%ecx
addl $1797285236,%esi
addl 84(%esp),%edx
addl 88(%esp),%edi
xorl 4(%ebx),%ebp
xorl 8(%ebx),%ecx
xorl 12(%ebx),%esi
xorl 20(%ebx),%edx
xorl 24(%ebx),%edi
movl %ebp,4(%eax)
movl %ecx,8(%eax)
movl %esi,12(%eax)
movl %edx,20(%eax)
movl %edi,24(%eax)
movl 28(%esp),%ebp
movl 40(%esp),%ecx
movl 44(%esp),%esi
movl 52(%esp),%edx
movl 60(%esp),%edi
addl 92(%esp),%ebp
addl 104(%esp),%ecx
addl 108(%esp),%esi
addl 116(%esp),%edx
addl 124(%esp),%edi
xorl 28(%ebx),%ebp
xorl 40(%ebx),%ecx
xorl 44(%ebx),%esi
xorl 52(%ebx),%edx
xorl 60(%ebx),%edi
leal 64(%ebx),%ebx
movl %ebp,28(%eax)
movl (%esp),%ebp
movl %ecx,40(%eax)
movl 160(%esp),%ecx
movl %esi,44(%eax)
movl %edx,52(%eax)
movl %edi,60(%eax)
movl %ebp,(%eax)
leal 64(%eax),%eax
subl $64,%ecx
jnz .L001outer_loop
jmp .L004done
.L003tail:
addl 112(%esp),%edx
addl 120(%esp),%edi
movl %eax,(%esp)
movl %ebp,16(%esp)
movl %ecx,32(%esp)
movl %esi,36(%esp)
movl %edx,48(%esp)
movl %edi,56(%esp)
movl 4(%esp),%ebp
movl 8(%esp),%ecx
movl 12(%esp),%esi
movl 20(%esp),%edx
movl 24(%esp),%edi
addl $857760878,%ebp
addl $2036477234,%ecx
addl $1797285236,%esi
addl 84(%esp),%edx
addl 88(%esp),%edi
movl %ebp,4(%esp)
movl %ecx,8(%esp)
movl %esi,12(%esp)
movl %edx,20(%esp)
movl %edi,24(%esp)
movl 28(%esp),%ebp
movl 40(%esp),%ecx
movl 44(%esp),%esi
movl 52(%esp),%edx
movl 60(%esp),%edi
addl 92(%esp),%ebp
addl 104(%esp),%ecx
addl 108(%esp),%esi
addl 116(%esp),%edx
addl 124(%esp),%edi
movl %ebp,28(%esp)
movl 156(%esp),%ebp
movl %ecx,40(%esp)
movl 152(%esp),%ecx
movl %esi,44(%esp)
xorl %esi,%esi
movl %edx,52(%esp)
movl %edi,60(%esp)
xorl %eax,%eax
xorl %edx,%edx
.L005tail_loop:
movb (%esi,%ebp,1),%al
movb (%esp,%esi,1),%dl
leal 1(%esi),%esi
xorb %dl,%al
movb %al,-1(%ecx,%esi,1)
decl %ebx
jnz .L005tail_loop
.L004done:
addl $132,%esp
popl %edi
popl %esi
popl %ebx
popl %ebp
ret
.size ChaCha20_ctr32_nohw,.-.L_ChaCha20_ctr32_nohw_begin
.globl ChaCha20_ctr32_ssse3
.hidden ChaCha20_ctr32_ssse3
.type ChaCha20_ctr32_ssse3,@function
.align 16
ChaCha20_ctr32_ssse3:
.L_ChaCha20_ctr32_ssse3_begin:
pushl %ebp
pushl %ebx
pushl %esi
pushl %edi
call .Lpic_point
.Lpic_point:
popl %eax
movl 20(%esp),%edi
movl 24(%esp),%esi
movl 28(%esp),%ecx
movl 32(%esp),%edx
movl 36(%esp),%ebx
movl %esp,%ebp
subl $524,%esp
andl $-64,%esp
movl %ebp,512(%esp)
leal .Lssse3_data-.Lpic_point(%eax),%eax
movdqu (%ebx),%xmm3
cmpl $256,%ecx
jb .L0061x
movl %edx,516(%esp)
movl %ebx,520(%esp)
subl $256,%ecx
leal 384(%esp),%ebp
movdqu (%edx),%xmm7
pshufd $0,%xmm3,%xmm0
pshufd $85,%xmm3,%xmm1
pshufd $170,%xmm3,%xmm2
pshufd $255,%xmm3,%xmm3
paddd 48(%eax),%xmm0
pshufd $0,%xmm7,%xmm4
pshufd $85,%xmm7,%xmm5
psubd 64(%eax),%xmm0
pshufd $170,%xmm7,%xmm6
pshufd $255,%xmm7,%xmm7
movdqa %xmm0,64(%ebp)
movdqa %xmm1,80(%ebp)
movdqa %xmm2,96(%ebp)
movdqa %xmm3,112(%ebp)
movdqu 16(%edx),%xmm3
movdqa %xmm4,-64(%ebp)
movdqa %xmm5,-48(%ebp)
movdqa %xmm6,-32(%ebp)
movdqa %xmm7,-16(%ebp)
movdqa 32(%eax),%xmm7
leal 128(%esp),%ebx
pshufd $0,%xmm3,%xmm0
pshufd $85,%xmm3,%xmm1
pshufd $170,%xmm3,%xmm2
pshufd $255,%xmm3,%xmm3
pshufd $0,%xmm7,%xmm4
pshufd $85,%xmm7,%xmm5
pshufd $170,%xmm7,%xmm6
pshufd $255,%xmm7,%xmm7
movdqa %xmm0,(%ebp)
movdqa %xmm1,16(%ebp)
movdqa %xmm2,32(%ebp)
movdqa %xmm3,48(%ebp)
movdqa %xmm4,-128(%ebp)
movdqa %xmm5,-112(%ebp)
movdqa %xmm6,-96(%ebp)
movdqa %xmm7,-80(%ebp)
leal 128(%esi),%esi
leal 128(%edi),%edi
jmp .L007outer_loop
.align 16
.L007outer_loop:
movdqa -112(%ebp),%xmm1
movdqa -96(%ebp),%xmm2
movdqa -80(%ebp),%xmm3
movdqa -48(%ebp),%xmm5
movdqa -32(%ebp),%xmm6
movdqa -16(%ebp),%xmm7
movdqa %xmm1,-112(%ebx)
movdqa %xmm2,-96(%ebx)
movdqa %xmm3,-80(%ebx)
movdqa %xmm5,-48(%ebx)
movdqa %xmm6,-32(%ebx)
movdqa %xmm7,-16(%ebx)
movdqa 32(%ebp),%xmm2
movdqa 48(%ebp),%xmm3
movdqa 64(%ebp),%xmm4
movdqa 80(%ebp),%xmm5
movdqa 96(%ebp),%xmm6
movdqa 112(%ebp),%xmm7
paddd 64(%eax),%xmm4
movdqa %xmm2,32(%ebx)
movdqa %xmm3,48(%ebx)
movdqa %xmm4,64(%ebx)
movdqa %xmm5,80(%ebx)
movdqa %xmm6,96(%ebx)
movdqa %xmm7,112(%ebx)
movdqa %xmm4,64(%ebp)
movdqa -128(%ebp),%xmm0
movdqa %xmm4,%xmm6
movdqa -64(%ebp),%xmm3
movdqa (%ebp),%xmm4
movdqa 16(%ebp),%xmm5
movl $10,%edx
nop
.align 16
.L008loop:
paddd %xmm3,%xmm0
movdqa %xmm3,%xmm2
pxor %xmm0,%xmm6
pshufb (%eax),%xmm6
paddd %xmm6,%xmm4
pxor %xmm4,%xmm2
movdqa -48(%ebx),%xmm3
movdqa %xmm2,%xmm1
pslld $12,%xmm2
psrld $20,%xmm1
por %xmm1,%xmm2
movdqa -112(%ebx),%xmm1
paddd %xmm2,%xmm0
movdqa 80(%ebx),%xmm7
pxor %xmm0,%xmm6
movdqa %xmm0,-128(%ebx)
pshufb 16(%eax),%xmm6
paddd %xmm6,%xmm4
movdqa %xmm6,64(%ebx)
pxor %xmm4,%xmm2
paddd %xmm3,%xmm1
movdqa %xmm2,%xmm0
pslld $7,%xmm2
psrld $25,%xmm0
pxor %xmm1,%xmm7
por %xmm0,%xmm2
movdqa %xmm4,(%ebx)
pshufb (%eax),%xmm7
movdqa %xmm2,-64(%ebx)
paddd %xmm7,%xmm5
movdqa 32(%ebx),%xmm4
pxor %xmm5,%xmm3
movdqa -32(%ebx),%xmm2
movdqa %xmm3,%xmm0
pslld $12,%xmm3
psrld $20,%xmm0
por %xmm0,%xmm3
movdqa -96(%ebx),%xmm0
paddd %xmm3,%xmm1
movdqa 96(%ebx),%xmm6
pxor %xmm1,%xmm7
movdqa %xmm1,-112(%ebx)
pshufb 16(%eax),%xmm7
paddd %xmm7,%xmm5
movdqa %xmm7,80(%ebx)
pxor %xmm5,%xmm3
paddd %xmm2,%xmm0
movdqa %xmm3,%xmm1
pslld $7,%xmm3
psrld $25,%xmm1
pxor %xmm0,%xmm6
por %xmm1,%xmm3
movdqa %xmm5,16(%ebx)
pshufb (%eax),%xmm6
movdqa %xmm3,-48(%ebx)
paddd %xmm6,%xmm4
movdqa 48(%ebx),%xmm5
pxor %xmm4,%xmm2
movdqa -16(%ebx),%xmm3
movdqa %xmm2,%xmm1
pslld $12,%xmm2
psrld $20,%xmm1
por %xmm1,%xmm2
movdqa -80(%ebx),%xmm1
paddd %xmm2,%xmm0
movdqa 112(%ebx),%xmm7
pxor %xmm0,%xmm6
movdqa %xmm0,-96(%ebx)
pshufb 16(%eax),%xmm6
paddd %xmm6,%xmm4
movdqa %xmm6,96(%ebx)
pxor %xmm4,%xmm2
paddd %xmm3,%xmm1
movdqa %xmm2,%xmm0
pslld $7,%xmm2
psrld $25,%xmm0
pxor %xmm1,%xmm7
por %xmm0,%xmm2
pshufb (%eax),%xmm7
movdqa %xmm2,-32(%ebx)
paddd %xmm7,%xmm5
pxor %xmm5,%xmm3
movdqa -48(%ebx),%xmm2
movdqa %xmm3,%xmm0
pslld $12,%xmm3
psrld $20,%xmm0
por %xmm0,%xmm3
movdqa -128(%ebx),%xmm0
paddd %xmm3,%xmm1
pxor %xmm1,%xmm7
movdqa %xmm1,-80(%ebx)
pshufb 16(%eax),%xmm7
paddd %xmm7,%xmm5
movdqa %xmm7,%xmm6
pxor %xmm5,%xmm3
paddd %xmm2,%xmm0
movdqa %xmm3,%xmm1
pslld $7,%xmm3
psrld $25,%xmm1
pxor %xmm0,%xmm6
por %xmm1,%xmm3
pshufb (%eax),%xmm6
movdqa %xmm3,-16(%ebx)
paddd %xmm6,%xmm4
pxor %xmm4,%xmm2
movdqa -32(%ebx),%xmm3
movdqa %xmm2,%xmm1
pslld $12,%xmm2
psrld $20,%xmm1
por %xmm1,%xmm2
movdqa -112(%ebx),%xmm1
paddd %xmm2,%xmm0
movdqa 64(%ebx),%xmm7
pxor %xmm0,%xmm6
movdqa %xmm0,-128(%ebx)
pshufb 16(%eax),%xmm6
paddd %xmm6,%xmm4
movdqa %xmm6,112(%ebx)
pxor %xmm4,%xmm2
paddd %xmm3,%xmm1
movdqa %xmm2,%xmm0
pslld $7,%xmm2
psrld $25,%xmm0
pxor %xmm1,%xmm7
por %xmm0,%xmm2
movdqa %xmm4,32(%ebx)
pshufb (%eax),%xmm7
movdqa %xmm2,-48(%ebx)
paddd %xmm7,%xmm5
movdqa (%ebx),%xmm4
pxor %xmm5,%xmm3
movdqa -16(%ebx),%xmm2
movdqa %xmm3,%xmm0
pslld $12,%xmm3
psrld $20,%xmm0
por %xmm0,%xmm3
movdqa -96(%ebx),%xmm0
paddd %xmm3,%xmm1
movdqa 80(%ebx),%xmm6
pxor %xmm1,%xmm7
movdqa %xmm1,-112(%ebx)
pshufb 16(%eax),%xmm7
paddd %xmm7,%xmm5
movdqa %xmm7,64(%ebx)
pxor %xmm5,%xmm3
paddd %xmm2,%xmm0
movdqa %xmm3,%xmm1
pslld $7,%xmm3
psrld $25,%xmm1
pxor %xmm0,%xmm6
por %xmm1,%xmm3
movdqa %xmm5,48(%ebx)
pshufb (%eax),%xmm6
movdqa %xmm3,-32(%ebx)
paddd %xmm6,%xmm4
movdqa 16(%ebx),%xmm5
pxor %xmm4,%xmm2
movdqa -64(%ebx),%xmm3
movdqa %xmm2,%xmm1
pslld $12,%xmm2
psrld $20,%xmm1
por %xmm1,%xmm2
movdqa -80(%ebx),%xmm1
paddd %xmm2,%xmm0
movdqa 96(%ebx),%xmm7
pxor %xmm0,%xmm6
movdqa %xmm0,-96(%ebx)
pshufb 16(%eax),%xmm6
paddd %xmm6,%xmm4
movdqa %xmm6,80(%ebx)
pxor %xmm4,%xmm2
paddd %xmm3,%xmm1
movdqa %xmm2,%xmm0
pslld $7,%xmm2
psrld $25,%xmm0
pxor %xmm1,%xmm7
por %xmm0,%xmm2
pshufb (%eax),%xmm7
movdqa %xmm2,-16(%ebx)
paddd %xmm7,%xmm5
pxor %xmm5,%xmm3
movdqa %xmm3,%xmm0
pslld $12,%xmm3
psrld $20,%xmm0
por %xmm0,%xmm3
movdqa -128(%ebx),%xmm0
paddd %xmm3,%xmm1
movdqa 64(%ebx),%xmm6
pxor %xmm1,%xmm7
movdqa %xmm1,-80(%ebx)
pshufb 16(%eax),%xmm7
paddd %xmm7,%xmm5
movdqa %xmm7,96(%ebx)
pxor %xmm5,%xmm3
movdqa %xmm3,%xmm1
pslld $7,%xmm3
psrld $25,%xmm1
por %xmm1,%xmm3
decl %edx
jnz .L008loop
movdqa %xmm3,-64(%ebx)
movdqa %xmm4,(%ebx)
movdqa %xmm5,16(%ebx)
movdqa %xmm6,64(%ebx)
movdqa %xmm7,96(%ebx)
movdqa -112(%ebx),%xmm1
movdqa -96(%ebx),%xmm2
movdqa -80(%ebx),%xmm3
paddd -128(%ebp),%xmm0
paddd -112(%ebp),%xmm1
paddd -96(%ebp),%xmm2
paddd -80(%ebp),%xmm3
movdqa %xmm0,%xmm6
punpckldq %xmm1,%xmm0
movdqa %xmm2,%xmm7
punpckldq %xmm3,%xmm2
punpckhdq %xmm1,%xmm6
punpckhdq %xmm3,%xmm7
movdqa %xmm0,%xmm1
punpcklqdq %xmm2,%xmm0
movdqa %xmm6,%xmm3
punpcklqdq %xmm7,%xmm6
punpckhqdq %xmm2,%xmm1
punpckhqdq %xmm7,%xmm3
movdqu -128(%esi),%xmm4
movdqu -64(%esi),%xmm5
movdqu (%esi),%xmm2
movdqu 64(%esi),%xmm7
leal 16(%esi),%esi
pxor %xmm0,%xmm4
movdqa -64(%ebx),%xmm0
pxor %xmm1,%xmm5
movdqa -48(%ebx),%xmm1
pxor %xmm2,%xmm6
movdqa -32(%ebx),%xmm2
pxor %xmm3,%xmm7
movdqa -16(%ebx),%xmm3
movdqu %xmm4,-128(%edi)
movdqu %xmm5,-64(%edi)
movdqu %xmm6,(%edi)
movdqu %xmm7,64(%edi)
leal 16(%edi),%edi
paddd -64(%ebp),%xmm0
paddd -48(%ebp),%xmm1
paddd -32(%ebp),%xmm2
paddd -16(%ebp),%xmm3
movdqa %xmm0,%xmm6
punpckldq %xmm1,%xmm0
movdqa %xmm2,%xmm7
punpckldq %xmm3,%xmm2
punpckhdq %xmm1,%xmm6
punpckhdq %xmm3,%xmm7
movdqa %xmm0,%xmm1
punpcklqdq %xmm2,%xmm0
movdqa %xmm6,%xmm3
punpcklqdq %xmm7,%xmm6
punpckhqdq %xmm2,%xmm1
punpckhqdq %xmm7,%xmm3
movdqu -128(%esi),%xmm4
movdqu -64(%esi),%xmm5
movdqu (%esi),%xmm2
movdqu 64(%esi),%xmm7
leal 16(%esi),%esi
pxor %xmm0,%xmm4
movdqa (%ebx),%xmm0
pxor %xmm1,%xmm5
movdqa 16(%ebx),%xmm1
pxor %xmm2,%xmm6
movdqa 32(%ebx),%xmm2
pxor %xmm3,%xmm7
movdqa 48(%ebx),%xmm3
movdqu %xmm4,-128(%edi)
movdqu %xmm5,-64(%edi)
movdqu %xmm6,(%edi)
movdqu %xmm7,64(%edi)
leal 16(%edi),%edi
paddd (%ebp),%xmm0
paddd 16(%ebp),%xmm1
paddd 32(%ebp),%xmm2
paddd 48(%ebp),%xmm3
movdqa %xmm0,%xmm6
punpckldq %xmm1,%xmm0
movdqa %xmm2,%xmm7
punpckldq %xmm3,%xmm2
punpckhdq %xmm1,%xmm6
punpckhdq %xmm3,%xmm7
movdqa %xmm0,%xmm1
punpcklqdq %xmm2,%xmm0
movdqa %xmm6,%xmm3
punpcklqdq %xmm7,%xmm6
punpckhqdq %xmm2,%xmm1
punpckhqdq %xmm7,%xmm3
movdqu -128(%esi),%xmm4
movdqu -64(%esi),%xmm5
movdqu (%esi),%xmm2
movdqu 64(%esi),%xmm7
leal 16(%esi),%esi
pxor %xmm0,%xmm4
movdqa 64(%ebx),%xmm0
pxor %xmm1,%xmm5
movdqa 80(%ebx),%xmm1
pxor %xmm2,%xmm6
movdqa 96(%ebx),%xmm2
pxor %xmm3,%xmm7
movdqa 112(%ebx),%xmm3
movdqu %xmm4,-128(%edi)
movdqu %xmm5,-64(%edi)
movdqu %xmm6,(%edi)
movdqu %xmm7,64(%edi)
leal 16(%edi),%edi
paddd 64(%ebp),%xmm0
paddd 80(%ebp),%xmm1
paddd 96(%ebp),%xmm2
paddd 112(%ebp),%xmm3
movdqa %xmm0,%xmm6
punpckldq %xmm1,%xmm0
movdqa %xmm2,%xmm7
punpckldq %xmm3,%xmm2
punpckhdq %xmm1,%xmm6
punpckhdq %xmm3,%xmm7
movdqa %xmm0,%xmm1
punpcklqdq %xmm2,%xmm0
movdqa %xmm6,%xmm3
punpcklqdq %xmm7,%xmm6
punpckhqdq %xmm2,%xmm1
punpckhqdq %xmm7,%xmm3
movdqu -128(%esi),%xmm4
movdqu -64(%esi),%xmm5
movdqu (%esi),%xmm2
movdqu 64(%esi),%xmm7
leal 208(%esi),%esi
pxor %xmm0,%xmm4
pxor %xmm1,%xmm5
pxor %xmm2,%xmm6
pxor %xmm3,%xmm7
movdqu %xmm4,-128(%edi)
movdqu %xmm5,-64(%edi)
movdqu %xmm6,(%edi)
movdqu %xmm7,64(%edi)
leal 208(%edi),%edi
subl $256,%ecx
jnc .L007outer_loop
addl $256,%ecx
jz .L009done
movl 520(%esp),%ebx
leal -128(%esi),%esi
movl 516(%esp),%edx
leal -128(%edi),%edi
movd 64(%ebp),%xmm2
movdqu (%ebx),%xmm3
paddd 96(%eax),%xmm2
pand 112(%eax),%xmm3
por %xmm2,%xmm3
.L0061x:
movdqa 32(%eax),%xmm0
movdqu (%edx),%xmm1
movdqu 16(%edx),%xmm2
movdqa (%eax),%xmm6
movdqa 16(%eax),%xmm7
movl %ebp,48(%esp)
movdqa %xmm0,(%esp)
movdqa %xmm1,16(%esp)
movdqa %xmm2,32(%esp)
movdqa %xmm3,48(%esp)
movl $10,%edx
jmp .L010loop1x
.align 16
.L011outer1x:
movdqa 80(%eax),%xmm3
movdqa (%esp),%xmm0
movdqa 16(%esp),%xmm1
movdqa 32(%esp),%xmm2
paddd 48(%esp),%xmm3
movl $10,%edx
movdqa %xmm3,48(%esp)
jmp .L010loop1x
.align 16
.L010loop1x:
paddd %xmm1,%xmm0
pxor %xmm0,%xmm3
.byte 102,15,56,0,222
paddd %xmm3,%xmm2
pxor %xmm2,%xmm1
movdqa %xmm1,%xmm4
psrld $20,%xmm1
pslld $12,%xmm4
por %xmm4,%xmm1
paddd %xmm1,%xmm0
pxor %xmm0,%xmm3
.byte 102,15,56,0,223
paddd %xmm3,%xmm2
pxor %xmm2,%xmm1
movdqa %xmm1,%xmm4
psrld $25,%xmm1
pslld $7,%xmm4
por %xmm4,%xmm1
pshufd $78,%xmm2,%xmm2
pshufd $57,%xmm1,%xmm1
pshufd $147,%xmm3,%xmm3
nop
paddd %xmm1,%xmm0
pxor %xmm0,%xmm3
.byte 102,15,56,0,222
paddd %xmm3,%xmm2
pxor %xmm2,%xmm1
movdqa %xmm1,%xmm4
psrld $20,%xmm1
pslld $12,%xmm4
por %xmm4,%xmm1
paddd %xmm1,%xmm0
pxor %xmm0,%xmm3
.byte 102,15,56,0,223
paddd %xmm3,%xmm2
pxor %xmm2,%xmm1
movdqa %xmm1,%xmm4
psrld $25,%xmm1
pslld $7,%xmm4
por %xmm4,%xmm1
pshufd $78,%xmm2,%xmm2
pshufd $147,%xmm1,%xmm1
pshufd $57,%xmm3,%xmm3
decl %edx
jnz .L010loop1x
paddd (%esp),%xmm0
paddd 16(%esp),%xmm1
paddd 32(%esp),%xmm2
paddd 48(%esp),%xmm3
cmpl $64,%ecx
jb .L012tail
movdqu (%esi),%xmm4
movdqu 16(%esi),%xmm5
pxor %xmm4,%xmm0
movdqu 32(%esi),%xmm4
pxor %xmm5,%xmm1
movdqu 48(%esi),%xmm5
pxor %xmm4,%xmm2
pxor %xmm5,%xmm3
leal 64(%esi),%esi
movdqu %xmm0,(%edi)
movdqu %xmm1,16(%edi)
movdqu %xmm2,32(%edi)
movdqu %xmm3,48(%edi)
leal 64(%edi),%edi
subl $64,%ecx
jnz .L011outer1x
jmp .L009done
.L012tail:
movdqa %xmm0,(%esp)
movdqa %xmm1,16(%esp)
movdqa %xmm2,32(%esp)
movdqa %xmm3,48(%esp)
xorl %eax,%eax
xorl %edx,%edx
xorl %ebp,%ebp
.L013tail_loop:
movb (%esp,%ebp,1),%al
movb (%esi,%ebp,1),%dl
leal 1(%ebp),%ebp
xorb %dl,%al
movb %al,-1(%edi,%ebp,1)
decl %ecx
jnz .L013tail_loop
.L009done:
movl 512(%esp),%esp
popl %edi
popl %esi
popl %ebx
popl %ebp
ret
.size ChaCha20_ctr32_ssse3,.-.L_ChaCha20_ctr32_ssse3_begin
.align 64
.Lssse3_data:
.byte 2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13
.byte 3,0,1,2,7,4,5,6,11,8,9,10,15,12,13,14
.long 1634760805,857760878,2036477234,1797285236
.long 0,1,2,3
.long 4,4,4,4
.long 1,0,0,0
.long 4,0,0,0
.long 0,-1,-1,-1
.align 64
.byte 67,104,97,67,104,97,50,48,32,102,111,114,32,120,56,54
.byte 44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32
.byte 60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111
.byte 114,103,62,0
#endif // !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) && defined(__ELF__)

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,995 @@
// This file is generated from a similarly-named Perl script in the BoringSSL
// source tree. Do not edit by hand.
#include <openssl/asm_base.h>
#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) && defined(__ELF__)
.text
.globl bn_mul_add_words
.hidden bn_mul_add_words
.type bn_mul_add_words,@function
.align 16
bn_mul_add_words:
.L_bn_mul_add_words_begin:
call .L000PIC_me_up
.L000PIC_me_up:
popl %eax
leal OPENSSL_ia32cap_P-.L000PIC_me_up(%eax),%eax
btl $26,(%eax)
jnc .L001maw_non_sse2
movl 4(%esp),%eax
movl 8(%esp),%edx
movl 12(%esp),%ecx
movd 16(%esp),%mm0
pxor %mm1,%mm1
jmp .L002maw_sse2_entry
.align 16
.L003maw_sse2_unrolled:
movd (%eax),%mm3
paddq %mm3,%mm1
movd (%edx),%mm2
pmuludq %mm0,%mm2
movd 4(%edx),%mm4
pmuludq %mm0,%mm4
movd 8(%edx),%mm6
pmuludq %mm0,%mm6
movd 12(%edx),%mm7
pmuludq %mm0,%mm7
paddq %mm2,%mm1
movd 4(%eax),%mm3
paddq %mm4,%mm3
movd 8(%eax),%mm5
paddq %mm6,%mm5
movd 12(%eax),%mm4
paddq %mm4,%mm7
movd %mm1,(%eax)
movd 16(%edx),%mm2
pmuludq %mm0,%mm2
psrlq $32,%mm1
movd 20(%edx),%mm4
pmuludq %mm0,%mm4
paddq %mm3,%mm1
movd 24(%edx),%mm6
pmuludq %mm0,%mm6
movd %mm1,4(%eax)
psrlq $32,%mm1
movd 28(%edx),%mm3
addl $32,%edx
pmuludq %mm0,%mm3
paddq %mm5,%mm1
movd 16(%eax),%mm5
paddq %mm5,%mm2
movd %mm1,8(%eax)
psrlq $32,%mm1
paddq %mm7,%mm1
movd 20(%eax),%mm5
paddq %mm5,%mm4
movd %mm1,12(%eax)
psrlq $32,%mm1
paddq %mm2,%mm1
movd 24(%eax),%mm5
paddq %mm5,%mm6
movd %mm1,16(%eax)
psrlq $32,%mm1
paddq %mm4,%mm1
movd 28(%eax),%mm5
paddq %mm5,%mm3
movd %mm1,20(%eax)
psrlq $32,%mm1
paddq %mm6,%mm1
movd %mm1,24(%eax)
psrlq $32,%mm1
paddq %mm3,%mm1
movd %mm1,28(%eax)
leal 32(%eax),%eax
psrlq $32,%mm1
subl $8,%ecx
jz .L004maw_sse2_exit
.L002maw_sse2_entry:
testl $4294967288,%ecx
jnz .L003maw_sse2_unrolled
.align 4
.L005maw_sse2_loop:
movd (%edx),%mm2
movd (%eax),%mm3
pmuludq %mm0,%mm2
leal 4(%edx),%edx
paddq %mm3,%mm1
paddq %mm2,%mm1
movd %mm1,(%eax)
subl $1,%ecx
psrlq $32,%mm1
leal 4(%eax),%eax
jnz .L005maw_sse2_loop
.L004maw_sse2_exit:
movd %mm1,%eax
emms
ret
.align 16
.L001maw_non_sse2:
pushl %ebp
pushl %ebx
pushl %esi
pushl %edi
xorl %esi,%esi
movl 20(%esp),%edi
movl 28(%esp),%ecx
movl 24(%esp),%ebx
andl $4294967288,%ecx
movl 32(%esp),%ebp
pushl %ecx
jz .L006maw_finish
.align 16
.L007maw_loop:
movl (%ebx),%eax
mull %ebp
addl %esi,%eax
adcl $0,%edx
addl (%edi),%eax
adcl $0,%edx
movl %eax,(%edi)
movl %edx,%esi
movl 4(%ebx),%eax
mull %ebp
addl %esi,%eax
adcl $0,%edx
addl 4(%edi),%eax
adcl $0,%edx
movl %eax,4(%edi)
movl %edx,%esi
movl 8(%ebx),%eax
mull %ebp
addl %esi,%eax
adcl $0,%edx
addl 8(%edi),%eax
adcl $0,%edx
movl %eax,8(%edi)
movl %edx,%esi
movl 12(%ebx),%eax
mull %ebp
addl %esi,%eax
adcl $0,%edx
addl 12(%edi),%eax
adcl $0,%edx
movl %eax,12(%edi)
movl %edx,%esi
movl 16(%ebx),%eax
mull %ebp
addl %esi,%eax
adcl $0,%edx
addl 16(%edi),%eax
adcl $0,%edx
movl %eax,16(%edi)
movl %edx,%esi
movl 20(%ebx),%eax
mull %ebp
addl %esi,%eax
adcl $0,%edx
addl 20(%edi),%eax
adcl $0,%edx
movl %eax,20(%edi)
movl %edx,%esi
movl 24(%ebx),%eax
mull %ebp
addl %esi,%eax
adcl $0,%edx
addl 24(%edi),%eax
adcl $0,%edx
movl %eax,24(%edi)
movl %edx,%esi
movl 28(%ebx),%eax
mull %ebp
addl %esi,%eax
adcl $0,%edx
addl 28(%edi),%eax
adcl $0,%edx
movl %eax,28(%edi)
movl %edx,%esi
subl $8,%ecx
leal 32(%ebx),%ebx
leal 32(%edi),%edi
jnz .L007maw_loop
.L006maw_finish:
movl 32(%esp),%ecx
andl $7,%ecx
jnz .L008maw_finish2
jmp .L009maw_end
.L008maw_finish2:
movl (%ebx),%eax
mull %ebp
addl %esi,%eax
adcl $0,%edx
addl (%edi),%eax
adcl $0,%edx
decl %ecx
movl %eax,(%edi)
movl %edx,%esi
jz .L009maw_end
movl 4(%ebx),%eax
mull %ebp
addl %esi,%eax
adcl $0,%edx
addl 4(%edi),%eax
adcl $0,%edx
decl %ecx
movl %eax,4(%edi)
movl %edx,%esi
jz .L009maw_end
movl 8(%ebx),%eax
mull %ebp
addl %esi,%eax
adcl $0,%edx
addl 8(%edi),%eax
adcl $0,%edx
decl %ecx
movl %eax,8(%edi)
movl %edx,%esi
jz .L009maw_end
movl 12(%ebx),%eax
mull %ebp
addl %esi,%eax
adcl $0,%edx
addl 12(%edi),%eax
adcl $0,%edx
decl %ecx
movl %eax,12(%edi)
movl %edx,%esi
jz .L009maw_end
movl 16(%ebx),%eax
mull %ebp
addl %esi,%eax
adcl $0,%edx
addl 16(%edi),%eax
adcl $0,%edx
decl %ecx
movl %eax,16(%edi)
movl %edx,%esi
jz .L009maw_end
movl 20(%ebx),%eax
mull %ebp
addl %esi,%eax
adcl $0,%edx
addl 20(%edi),%eax
adcl $0,%edx
decl %ecx
movl %eax,20(%edi)
movl %edx,%esi
jz .L009maw_end
movl 24(%ebx),%eax
mull %ebp
addl %esi,%eax
adcl $0,%edx
addl 24(%edi),%eax
adcl $0,%edx
movl %eax,24(%edi)
movl %edx,%esi
.L009maw_end:
movl %esi,%eax
popl %ecx
popl %edi
popl %esi
popl %ebx
popl %ebp
ret
.size bn_mul_add_words,.-.L_bn_mul_add_words_begin
.globl bn_mul_words
.hidden bn_mul_words
.type bn_mul_words,@function
.align 16
bn_mul_words:
.L_bn_mul_words_begin:
call .L010PIC_me_up
.L010PIC_me_up:
popl %eax
leal OPENSSL_ia32cap_P-.L010PIC_me_up(%eax),%eax
btl $26,(%eax)
jnc .L011mw_non_sse2
movl 4(%esp),%eax
movl 8(%esp),%edx
movl 12(%esp),%ecx
movd 16(%esp),%mm0
pxor %mm1,%mm1
.align 16
.L012mw_sse2_loop:
movd (%edx),%mm2
pmuludq %mm0,%mm2
leal 4(%edx),%edx
paddq %mm2,%mm1
movd %mm1,(%eax)
subl $1,%ecx
psrlq $32,%mm1
leal 4(%eax),%eax
jnz .L012mw_sse2_loop
movd %mm1,%eax
emms
ret
.align 16
.L011mw_non_sse2:
pushl %ebp
pushl %ebx
pushl %esi
pushl %edi
xorl %esi,%esi
movl 20(%esp),%edi
movl 24(%esp),%ebx
movl 28(%esp),%ebp
movl 32(%esp),%ecx
andl $4294967288,%ebp
jz .L013mw_finish
.L014mw_loop:
movl (%ebx),%eax
mull %ecx
addl %esi,%eax
adcl $0,%edx
movl %eax,(%edi)
movl %edx,%esi
movl 4(%ebx),%eax
mull %ecx
addl %esi,%eax
adcl $0,%edx
movl %eax,4(%edi)
movl %edx,%esi
movl 8(%ebx),%eax
mull %ecx
addl %esi,%eax
adcl $0,%edx
movl %eax,8(%edi)
movl %edx,%esi
movl 12(%ebx),%eax
mull %ecx
addl %esi,%eax
adcl $0,%edx
movl %eax,12(%edi)
movl %edx,%esi
movl 16(%ebx),%eax
mull %ecx
addl %esi,%eax
adcl $0,%edx
movl %eax,16(%edi)
movl %edx,%esi
movl 20(%ebx),%eax
mull %ecx
addl %esi,%eax
adcl $0,%edx
movl %eax,20(%edi)
movl %edx,%esi
movl 24(%ebx),%eax
mull %ecx
addl %esi,%eax
adcl $0,%edx
movl %eax,24(%edi)
movl %edx,%esi
movl 28(%ebx),%eax
mull %ecx
addl %esi,%eax
adcl $0,%edx
movl %eax,28(%edi)
movl %edx,%esi
addl $32,%ebx
addl $32,%edi
subl $8,%ebp
jz .L013mw_finish
jmp .L014mw_loop
.L013mw_finish:
movl 28(%esp),%ebp
andl $7,%ebp
jnz .L015mw_finish2
jmp .L016mw_end
.L015mw_finish2:
movl (%ebx),%eax
mull %ecx
addl %esi,%eax
adcl $0,%edx
movl %eax,(%edi)
movl %edx,%esi
decl %ebp
jz .L016mw_end
movl 4(%ebx),%eax
mull %ecx
addl %esi,%eax
adcl $0,%edx
movl %eax,4(%edi)
movl %edx,%esi
decl %ebp
jz .L016mw_end
movl 8(%ebx),%eax
mull %ecx
addl %esi,%eax
adcl $0,%edx
movl %eax,8(%edi)
movl %edx,%esi
decl %ebp
jz .L016mw_end
movl 12(%ebx),%eax
mull %ecx
addl %esi,%eax
adcl $0,%edx
movl %eax,12(%edi)
movl %edx,%esi
decl %ebp
jz .L016mw_end
movl 16(%ebx),%eax
mull %ecx
addl %esi,%eax
adcl $0,%edx
movl %eax,16(%edi)
movl %edx,%esi
decl %ebp
jz .L016mw_end
movl 20(%ebx),%eax
mull %ecx
addl %esi,%eax
adcl $0,%edx
movl %eax,20(%edi)
movl %edx,%esi
decl %ebp
jz .L016mw_end
movl 24(%ebx),%eax
mull %ecx
addl %esi,%eax
adcl $0,%edx
movl %eax,24(%edi)
movl %edx,%esi
.L016mw_end:
movl %esi,%eax
popl %edi
popl %esi
popl %ebx
popl %ebp
ret
.size bn_mul_words,.-.L_bn_mul_words_begin
.globl bn_sqr_words
.hidden bn_sqr_words
.type bn_sqr_words,@function
.align 16
bn_sqr_words:
.L_bn_sqr_words_begin:
call .L017PIC_me_up
.L017PIC_me_up:
popl %eax
leal OPENSSL_ia32cap_P-.L017PIC_me_up(%eax),%eax
btl $26,(%eax)
jnc .L018sqr_non_sse2
movl 4(%esp),%eax
movl 8(%esp),%edx
movl 12(%esp),%ecx
.align 16
.L019sqr_sse2_loop:
movd (%edx),%mm0
pmuludq %mm0,%mm0
leal 4(%edx),%edx
movq %mm0,(%eax)
subl $1,%ecx
leal 8(%eax),%eax
jnz .L019sqr_sse2_loop
emms
ret
.align 16
.L018sqr_non_sse2:
pushl %ebp
pushl %ebx
pushl %esi
pushl %edi
movl 20(%esp),%esi
movl 24(%esp),%edi
movl 28(%esp),%ebx
andl $4294967288,%ebx
jz .L020sw_finish
.L021sw_loop:
movl (%edi),%eax
mull %eax
movl %eax,(%esi)
movl %edx,4(%esi)
movl 4(%edi),%eax
mull %eax
movl %eax,8(%esi)
movl %edx,12(%esi)
movl 8(%edi),%eax
mull %eax
movl %eax,16(%esi)
movl %edx,20(%esi)
movl 12(%edi),%eax
mull %eax
movl %eax,24(%esi)
movl %edx,28(%esi)
movl 16(%edi),%eax
mull %eax
movl %eax,32(%esi)
movl %edx,36(%esi)
movl 20(%edi),%eax
mull %eax
movl %eax,40(%esi)
movl %edx,44(%esi)
movl 24(%edi),%eax
mull %eax
movl %eax,48(%esi)
movl %edx,52(%esi)
movl 28(%edi),%eax
mull %eax
movl %eax,56(%esi)
movl %edx,60(%esi)
addl $32,%edi
addl $64,%esi
subl $8,%ebx
jnz .L021sw_loop
.L020sw_finish:
movl 28(%esp),%ebx
andl $7,%ebx
jz .L022sw_end
movl (%edi),%eax
mull %eax
movl %eax,(%esi)
decl %ebx
movl %edx,4(%esi)
jz .L022sw_end
movl 4(%edi),%eax
mull %eax
movl %eax,8(%esi)
decl %ebx
movl %edx,12(%esi)
jz .L022sw_end
movl 8(%edi),%eax
mull %eax
movl %eax,16(%esi)
decl %ebx
movl %edx,20(%esi)
jz .L022sw_end
movl 12(%edi),%eax
mull %eax
movl %eax,24(%esi)
decl %ebx
movl %edx,28(%esi)
jz .L022sw_end
movl 16(%edi),%eax
mull %eax
movl %eax,32(%esi)
decl %ebx
movl %edx,36(%esi)
jz .L022sw_end
movl 20(%edi),%eax
mull %eax
movl %eax,40(%esi)
decl %ebx
movl %edx,44(%esi)
jz .L022sw_end
movl 24(%edi),%eax
mull %eax
movl %eax,48(%esi)
movl %edx,52(%esi)
.L022sw_end:
popl %edi
popl %esi
popl %ebx
popl %ebp
ret
.size bn_sqr_words,.-.L_bn_sqr_words_begin
.globl bn_div_words
.hidden bn_div_words
.type bn_div_words,@function
.align 16
bn_div_words:
.L_bn_div_words_begin:
movl 4(%esp),%edx
movl 8(%esp),%eax
movl 12(%esp),%ecx
divl %ecx
ret
.size bn_div_words,.-.L_bn_div_words_begin
.globl bn_add_words
.hidden bn_add_words
.type bn_add_words,@function
.align 16
bn_add_words:
.L_bn_add_words_begin:
pushl %ebp
pushl %ebx
pushl %esi
pushl %edi
movl 20(%esp),%ebx
movl 24(%esp),%esi
movl 28(%esp),%edi
movl 32(%esp),%ebp
xorl %eax,%eax
andl $4294967288,%ebp
jz .L023aw_finish
.L024aw_loop:
movl (%esi),%ecx
movl (%edi),%edx
addl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
addl %edx,%ecx
adcl $0,%eax
movl %ecx,(%ebx)
movl 4(%esi),%ecx
movl 4(%edi),%edx
addl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
addl %edx,%ecx
adcl $0,%eax
movl %ecx,4(%ebx)
movl 8(%esi),%ecx
movl 8(%edi),%edx
addl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
addl %edx,%ecx
adcl $0,%eax
movl %ecx,8(%ebx)
movl 12(%esi),%ecx
movl 12(%edi),%edx
addl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
addl %edx,%ecx
adcl $0,%eax
movl %ecx,12(%ebx)
movl 16(%esi),%ecx
movl 16(%edi),%edx
addl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
addl %edx,%ecx
adcl $0,%eax
movl %ecx,16(%ebx)
movl 20(%esi),%ecx
movl 20(%edi),%edx
addl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
addl %edx,%ecx
adcl $0,%eax
movl %ecx,20(%ebx)
movl 24(%esi),%ecx
movl 24(%edi),%edx
addl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
addl %edx,%ecx
adcl $0,%eax
movl %ecx,24(%ebx)
movl 28(%esi),%ecx
movl 28(%edi),%edx
addl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
addl %edx,%ecx
adcl $0,%eax
movl %ecx,28(%ebx)
addl $32,%esi
addl $32,%edi
addl $32,%ebx
subl $8,%ebp
jnz .L024aw_loop
.L023aw_finish:
movl 32(%esp),%ebp
andl $7,%ebp
jz .L025aw_end
movl (%esi),%ecx
movl (%edi),%edx
addl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
addl %edx,%ecx
adcl $0,%eax
decl %ebp
movl %ecx,(%ebx)
jz .L025aw_end
movl 4(%esi),%ecx
movl 4(%edi),%edx
addl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
addl %edx,%ecx
adcl $0,%eax
decl %ebp
movl %ecx,4(%ebx)
jz .L025aw_end
movl 8(%esi),%ecx
movl 8(%edi),%edx
addl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
addl %edx,%ecx
adcl $0,%eax
decl %ebp
movl %ecx,8(%ebx)
jz .L025aw_end
movl 12(%esi),%ecx
movl 12(%edi),%edx
addl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
addl %edx,%ecx
adcl $0,%eax
decl %ebp
movl %ecx,12(%ebx)
jz .L025aw_end
movl 16(%esi),%ecx
movl 16(%edi),%edx
addl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
addl %edx,%ecx
adcl $0,%eax
decl %ebp
movl %ecx,16(%ebx)
jz .L025aw_end
movl 20(%esi),%ecx
movl 20(%edi),%edx
addl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
addl %edx,%ecx
adcl $0,%eax
decl %ebp
movl %ecx,20(%ebx)
jz .L025aw_end
movl 24(%esi),%ecx
movl 24(%edi),%edx
addl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
addl %edx,%ecx
adcl $0,%eax
movl %ecx,24(%ebx)
.L025aw_end:
popl %edi
popl %esi
popl %ebx
popl %ebp
ret
.size bn_add_words,.-.L_bn_add_words_begin
.globl bn_sub_words
.hidden bn_sub_words
.type bn_sub_words,@function
.align 16
bn_sub_words:
.L_bn_sub_words_begin:
pushl %ebp
pushl %ebx
pushl %esi
pushl %edi
movl 20(%esp),%ebx
movl 24(%esp),%esi
movl 28(%esp),%edi
movl 32(%esp),%ebp
xorl %eax,%eax
andl $4294967288,%ebp
jz .L026aw_finish
.L027aw_loop:
movl (%esi),%ecx
movl (%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
movl %ecx,(%ebx)
movl 4(%esi),%ecx
movl 4(%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
movl %ecx,4(%ebx)
movl 8(%esi),%ecx
movl 8(%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
movl %ecx,8(%ebx)
movl 12(%esi),%ecx
movl 12(%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
movl %ecx,12(%ebx)
movl 16(%esi),%ecx
movl 16(%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
movl %ecx,16(%ebx)
movl 20(%esi),%ecx
movl 20(%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
movl %ecx,20(%ebx)
movl 24(%esi),%ecx
movl 24(%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
movl %ecx,24(%ebx)
movl 28(%esi),%ecx
movl 28(%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
movl %ecx,28(%ebx)
addl $32,%esi
addl $32,%edi
addl $32,%ebx
subl $8,%ebp
jnz .L027aw_loop
.L026aw_finish:
movl 32(%esp),%ebp
andl $7,%ebp
jz .L028aw_end
movl (%esi),%ecx
movl (%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
decl %ebp
movl %ecx,(%ebx)
jz .L028aw_end
movl 4(%esi),%ecx
movl 4(%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
decl %ebp
movl %ecx,4(%ebx)
jz .L028aw_end
movl 8(%esi),%ecx
movl 8(%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
decl %ebp
movl %ecx,8(%ebx)
jz .L028aw_end
movl 12(%esi),%ecx
movl 12(%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
decl %ebp
movl %ecx,12(%ebx)
jz .L028aw_end
movl 16(%esi),%ecx
movl 16(%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
decl %ebp
movl %ecx,16(%ebx)
jz .L028aw_end
movl 20(%esi),%ecx
movl 20(%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
decl %ebp
movl %ecx,20(%ebx)
jz .L028aw_end
movl 24(%esi),%ecx
movl 24(%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
movl %ecx,24(%ebx)
.L028aw_end:
popl %edi
popl %esi
popl %ebx
popl %ebp
ret
.size bn_sub_words,.-.L_bn_sub_words_begin
#endif // !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) && defined(__ELF__)

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,292 @@
// This file is generated from a similarly-named Perl script in the BoringSSL
// source tree. Do not edit by hand.
#include <openssl/asm_base.h>
#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) && defined(__ELF__)
.text
.globl gcm_gmult_ssse3
.hidden gcm_gmult_ssse3
.type gcm_gmult_ssse3,@function
.align 16
gcm_gmult_ssse3:
.L_gcm_gmult_ssse3_begin:
pushl %ebp
pushl %ebx
pushl %esi
pushl %edi
movl 20(%esp),%edi
movl 24(%esp),%esi
movdqu (%edi),%xmm0
call .L000pic_point
.L000pic_point:
popl %eax
movdqa .Lreverse_bytes-.L000pic_point(%eax),%xmm7
movdqa .Llow4_mask-.L000pic_point(%eax),%xmm2
.byte 102,15,56,0,199
movdqa %xmm2,%xmm1
pandn %xmm0,%xmm1
psrld $4,%xmm1
pand %xmm2,%xmm0
pxor %xmm2,%xmm2
pxor %xmm3,%xmm3
movl $5,%eax
.L001loop_row_1:
movdqa (%esi),%xmm4
leal 16(%esi),%esi
movdqa %xmm2,%xmm6
.byte 102,15,58,15,243,1
movdqa %xmm6,%xmm3
psrldq $1,%xmm2
movdqa %xmm4,%xmm5
.byte 102,15,56,0,224
.byte 102,15,56,0,233
pxor %xmm5,%xmm2
movdqa %xmm4,%xmm5
psllq $60,%xmm5
movdqa %xmm5,%xmm6
pslldq $8,%xmm6
pxor %xmm6,%xmm3
psrldq $8,%xmm5
pxor %xmm5,%xmm2
psrlq $4,%xmm4
pxor %xmm4,%xmm2
subl $1,%eax
jnz .L001loop_row_1
pxor %xmm3,%xmm2
psrlq $1,%xmm3
pxor %xmm3,%xmm2
psrlq $1,%xmm3
pxor %xmm3,%xmm2
psrlq $5,%xmm3
pxor %xmm3,%xmm2
pxor %xmm3,%xmm3
movl $5,%eax
.L002loop_row_2:
movdqa (%esi),%xmm4
leal 16(%esi),%esi
movdqa %xmm2,%xmm6
.byte 102,15,58,15,243,1
movdqa %xmm6,%xmm3
psrldq $1,%xmm2
movdqa %xmm4,%xmm5
.byte 102,15,56,0,224
.byte 102,15,56,0,233
pxor %xmm5,%xmm2
movdqa %xmm4,%xmm5
psllq $60,%xmm5
movdqa %xmm5,%xmm6
pslldq $8,%xmm6
pxor %xmm6,%xmm3
psrldq $8,%xmm5
pxor %xmm5,%xmm2
psrlq $4,%xmm4
pxor %xmm4,%xmm2
subl $1,%eax
jnz .L002loop_row_2
pxor %xmm3,%xmm2
psrlq $1,%xmm3
pxor %xmm3,%xmm2
psrlq $1,%xmm3
pxor %xmm3,%xmm2
psrlq $5,%xmm3
pxor %xmm3,%xmm2
pxor %xmm3,%xmm3
movl $6,%eax
.L003loop_row_3:
movdqa (%esi),%xmm4
leal 16(%esi),%esi
movdqa %xmm2,%xmm6
.byte 102,15,58,15,243,1
movdqa %xmm6,%xmm3
psrldq $1,%xmm2
movdqa %xmm4,%xmm5
.byte 102,15,56,0,224
.byte 102,15,56,0,233
pxor %xmm5,%xmm2
movdqa %xmm4,%xmm5
psllq $60,%xmm5
movdqa %xmm5,%xmm6
pslldq $8,%xmm6
pxor %xmm6,%xmm3
psrldq $8,%xmm5
pxor %xmm5,%xmm2
psrlq $4,%xmm4
pxor %xmm4,%xmm2
subl $1,%eax
jnz .L003loop_row_3
pxor %xmm3,%xmm2
psrlq $1,%xmm3
pxor %xmm3,%xmm2
psrlq $1,%xmm3
pxor %xmm3,%xmm2
psrlq $5,%xmm3
pxor %xmm3,%xmm2
pxor %xmm3,%xmm3
.byte 102,15,56,0,215
movdqu %xmm2,(%edi)
pxor %xmm0,%xmm0
pxor %xmm1,%xmm1
pxor %xmm2,%xmm2
pxor %xmm3,%xmm3
pxor %xmm4,%xmm4
pxor %xmm5,%xmm5
pxor %xmm6,%xmm6
popl %edi
popl %esi
popl %ebx
popl %ebp
ret
.size gcm_gmult_ssse3,.-.L_gcm_gmult_ssse3_begin
.globl gcm_ghash_ssse3
.hidden gcm_ghash_ssse3
.type gcm_ghash_ssse3,@function
.align 16
gcm_ghash_ssse3:
.L_gcm_ghash_ssse3_begin:
pushl %ebp
pushl %ebx
pushl %esi
pushl %edi
movl 20(%esp),%edi
movl 24(%esp),%esi
movl 28(%esp),%edx
movl 32(%esp),%ecx
movdqu (%edi),%xmm0
call .L004pic_point
.L004pic_point:
popl %ebx
movdqa .Lreverse_bytes-.L004pic_point(%ebx),%xmm7
andl $-16,%ecx
.byte 102,15,56,0,199
pxor %xmm3,%xmm3
.L005loop_ghash:
movdqa .Llow4_mask-.L004pic_point(%ebx),%xmm2
movdqu (%edx),%xmm1
.byte 102,15,56,0,207
pxor %xmm1,%xmm0
movdqa %xmm2,%xmm1
pandn %xmm0,%xmm1
psrld $4,%xmm1
pand %xmm2,%xmm0
pxor %xmm2,%xmm2
movl $5,%eax
.L006loop_row_4:
movdqa (%esi),%xmm4
leal 16(%esi),%esi
movdqa %xmm2,%xmm6
.byte 102,15,58,15,243,1
movdqa %xmm6,%xmm3
psrldq $1,%xmm2
movdqa %xmm4,%xmm5
.byte 102,15,56,0,224
.byte 102,15,56,0,233
pxor %xmm5,%xmm2
movdqa %xmm4,%xmm5
psllq $60,%xmm5
movdqa %xmm5,%xmm6
pslldq $8,%xmm6
pxor %xmm6,%xmm3
psrldq $8,%xmm5
pxor %xmm5,%xmm2
psrlq $4,%xmm4
pxor %xmm4,%xmm2
subl $1,%eax
jnz .L006loop_row_4
pxor %xmm3,%xmm2
psrlq $1,%xmm3
pxor %xmm3,%xmm2
psrlq $1,%xmm3
pxor %xmm3,%xmm2
psrlq $5,%xmm3
pxor %xmm3,%xmm2
pxor %xmm3,%xmm3
movl $5,%eax
.L007loop_row_5:
movdqa (%esi),%xmm4
leal 16(%esi),%esi
movdqa %xmm2,%xmm6
.byte 102,15,58,15,243,1
movdqa %xmm6,%xmm3
psrldq $1,%xmm2
movdqa %xmm4,%xmm5
.byte 102,15,56,0,224
.byte 102,15,56,0,233
pxor %xmm5,%xmm2
movdqa %xmm4,%xmm5
psllq $60,%xmm5
movdqa %xmm5,%xmm6
pslldq $8,%xmm6
pxor %xmm6,%xmm3
psrldq $8,%xmm5
pxor %xmm5,%xmm2
psrlq $4,%xmm4
pxor %xmm4,%xmm2
subl $1,%eax
jnz .L007loop_row_5
pxor %xmm3,%xmm2
psrlq $1,%xmm3
pxor %xmm3,%xmm2
psrlq $1,%xmm3
pxor %xmm3,%xmm2
psrlq $5,%xmm3
pxor %xmm3,%xmm2
pxor %xmm3,%xmm3
movl $6,%eax
.L008loop_row_6:
movdqa (%esi),%xmm4
leal 16(%esi),%esi
movdqa %xmm2,%xmm6
.byte 102,15,58,15,243,1
movdqa %xmm6,%xmm3
psrldq $1,%xmm2
movdqa %xmm4,%xmm5
.byte 102,15,56,0,224
.byte 102,15,56,0,233
pxor %xmm5,%xmm2
movdqa %xmm4,%xmm5
psllq $60,%xmm5
movdqa %xmm5,%xmm6
pslldq $8,%xmm6
pxor %xmm6,%xmm3
psrldq $8,%xmm5
pxor %xmm5,%xmm2
psrlq $4,%xmm4
pxor %xmm4,%xmm2
subl $1,%eax
jnz .L008loop_row_6
pxor %xmm3,%xmm2
psrlq $1,%xmm3
pxor %xmm3,%xmm2
psrlq $1,%xmm3
pxor %xmm3,%xmm2
psrlq $5,%xmm3
pxor %xmm3,%xmm2
pxor %xmm3,%xmm3
movdqa %xmm2,%xmm0
leal -256(%esi),%esi
leal 16(%edx),%edx
subl $16,%ecx
jnz .L005loop_ghash
.byte 102,15,56,0,199
movdqu %xmm0,(%edi)
pxor %xmm0,%xmm0
pxor %xmm1,%xmm1
pxor %xmm2,%xmm2
pxor %xmm3,%xmm3
pxor %xmm4,%xmm4
pxor %xmm5,%xmm5
pxor %xmm6,%xmm6
popl %edi
popl %esi
popl %ebx
popl %ebp
ret
.size gcm_ghash_ssse3,.-.L_gcm_ghash_ssse3_begin
.align 16
.Lreverse_bytes:
.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
.align 16
.Llow4_mask:
.long 252645135,252645135,252645135,252645135
#endif // !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) && defined(__ELF__)

View File

@ -0,0 +1,328 @@
// This file is generated from a similarly-named Perl script in the BoringSSL
// source tree. Do not edit by hand.
#include <openssl/asm_base.h>
#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) && defined(__ELF__)
.text
.globl gcm_init_clmul
.hidden gcm_init_clmul
.type gcm_init_clmul,@function
.align 16
gcm_init_clmul:
.L_gcm_init_clmul_begin:
movl 4(%esp),%edx
movl 8(%esp),%eax
call .L000pic
.L000pic:
popl %ecx
leal .Lbswap-.L000pic(%ecx),%ecx
movdqu (%eax),%xmm2
pshufd $78,%xmm2,%xmm2
pshufd $255,%xmm2,%xmm4
movdqa %xmm2,%xmm3
psllq $1,%xmm2
pxor %xmm5,%xmm5
psrlq $63,%xmm3
pcmpgtd %xmm4,%xmm5
pslldq $8,%xmm3
por %xmm3,%xmm2
pand 16(%ecx),%xmm5
pxor %xmm5,%xmm2
movdqa %xmm2,%xmm0
movdqa %xmm0,%xmm1
pshufd $78,%xmm0,%xmm3
pshufd $78,%xmm2,%xmm4
pxor %xmm0,%xmm3
pxor %xmm2,%xmm4
.byte 102,15,58,68,194,0
.byte 102,15,58,68,202,17
.byte 102,15,58,68,220,0
xorps %xmm0,%xmm3
xorps %xmm1,%xmm3
movdqa %xmm3,%xmm4
psrldq $8,%xmm3
pslldq $8,%xmm4
pxor %xmm3,%xmm1
pxor %xmm4,%xmm0
movdqa %xmm0,%xmm4
movdqa %xmm0,%xmm3
psllq $5,%xmm0
pxor %xmm0,%xmm3
psllq $1,%xmm0
pxor %xmm3,%xmm0
psllq $57,%xmm0
movdqa %xmm0,%xmm3
pslldq $8,%xmm0
psrldq $8,%xmm3
pxor %xmm4,%xmm0
pxor %xmm3,%xmm1
movdqa %xmm0,%xmm4
psrlq $1,%xmm0
pxor %xmm4,%xmm1
pxor %xmm0,%xmm4
psrlq $5,%xmm0
pxor %xmm4,%xmm0
psrlq $1,%xmm0
pxor %xmm1,%xmm0
pshufd $78,%xmm2,%xmm3
pshufd $78,%xmm0,%xmm4
pxor %xmm2,%xmm3
movdqu %xmm2,(%edx)
pxor %xmm0,%xmm4
movdqu %xmm0,16(%edx)
.byte 102,15,58,15,227,8
movdqu %xmm4,32(%edx)
ret
.size gcm_init_clmul,.-.L_gcm_init_clmul_begin
.globl gcm_gmult_clmul
.hidden gcm_gmult_clmul
.type gcm_gmult_clmul,@function
.align 16
gcm_gmult_clmul:
.L_gcm_gmult_clmul_begin:
movl 4(%esp),%eax
movl 8(%esp),%edx
call .L001pic
.L001pic:
popl %ecx
leal .Lbswap-.L001pic(%ecx),%ecx
movdqu (%eax),%xmm0
movdqa (%ecx),%xmm5
movups (%edx),%xmm2
.byte 102,15,56,0,197
movups 32(%edx),%xmm4
movdqa %xmm0,%xmm1
pshufd $78,%xmm0,%xmm3
pxor %xmm0,%xmm3
.byte 102,15,58,68,194,0
.byte 102,15,58,68,202,17
.byte 102,15,58,68,220,0
xorps %xmm0,%xmm3
xorps %xmm1,%xmm3
movdqa %xmm3,%xmm4
psrldq $8,%xmm3
pslldq $8,%xmm4
pxor %xmm3,%xmm1
pxor %xmm4,%xmm0
movdqa %xmm0,%xmm4
movdqa %xmm0,%xmm3
psllq $5,%xmm0
pxor %xmm0,%xmm3
psllq $1,%xmm0
pxor %xmm3,%xmm0
psllq $57,%xmm0
movdqa %xmm0,%xmm3
pslldq $8,%xmm0
psrldq $8,%xmm3
pxor %xmm4,%xmm0
pxor %xmm3,%xmm1
movdqa %xmm0,%xmm4
psrlq $1,%xmm0
pxor %xmm4,%xmm1
pxor %xmm0,%xmm4
psrlq $5,%xmm0
pxor %xmm4,%xmm0
psrlq $1,%xmm0
pxor %xmm1,%xmm0
.byte 102,15,56,0,197
movdqu %xmm0,(%eax)
ret
.size gcm_gmult_clmul,.-.L_gcm_gmult_clmul_begin
.globl gcm_ghash_clmul
.hidden gcm_ghash_clmul
.type gcm_ghash_clmul,@function
.align 16
gcm_ghash_clmul:
.L_gcm_ghash_clmul_begin:
pushl %ebp
pushl %ebx
pushl %esi
pushl %edi
movl 20(%esp),%eax
movl 24(%esp),%edx
movl 28(%esp),%esi
movl 32(%esp),%ebx
call .L002pic
.L002pic:
popl %ecx
leal .Lbswap-.L002pic(%ecx),%ecx
movdqu (%eax),%xmm0
movdqa (%ecx),%xmm5
movdqu (%edx),%xmm2
.byte 102,15,56,0,197
subl $16,%ebx
jz .L003odd_tail
movdqu (%esi),%xmm3
movdqu 16(%esi),%xmm6
.byte 102,15,56,0,221
.byte 102,15,56,0,245
movdqu 32(%edx),%xmm5
pxor %xmm3,%xmm0
pshufd $78,%xmm6,%xmm3
movdqa %xmm6,%xmm7
pxor %xmm6,%xmm3
leal 32(%esi),%esi
.byte 102,15,58,68,242,0
.byte 102,15,58,68,250,17
.byte 102,15,58,68,221,0
movups 16(%edx),%xmm2
nop
subl $32,%ebx
jbe .L004even_tail
jmp .L005mod_loop
.align 32
.L005mod_loop:
pshufd $78,%xmm0,%xmm4
movdqa %xmm0,%xmm1
pxor %xmm0,%xmm4
nop
.byte 102,15,58,68,194,0
.byte 102,15,58,68,202,17
.byte 102,15,58,68,229,16
movups (%edx),%xmm2
xorps %xmm6,%xmm0
movdqa (%ecx),%xmm5
xorps %xmm7,%xmm1
movdqu (%esi),%xmm7
pxor %xmm0,%xmm3
movdqu 16(%esi),%xmm6
pxor %xmm1,%xmm3
.byte 102,15,56,0,253
pxor %xmm3,%xmm4
movdqa %xmm4,%xmm3
psrldq $8,%xmm4
pslldq $8,%xmm3
pxor %xmm4,%xmm1
pxor %xmm3,%xmm0
.byte 102,15,56,0,245
pxor %xmm7,%xmm1
movdqa %xmm6,%xmm7
movdqa %xmm0,%xmm4
movdqa %xmm0,%xmm3
psllq $5,%xmm0
pxor %xmm0,%xmm3
psllq $1,%xmm0
pxor %xmm3,%xmm0
.byte 102,15,58,68,242,0
movups 32(%edx),%xmm5
psllq $57,%xmm0
movdqa %xmm0,%xmm3
pslldq $8,%xmm0
psrldq $8,%xmm3
pxor %xmm4,%xmm0
pxor %xmm3,%xmm1
pshufd $78,%xmm7,%xmm3
movdqa %xmm0,%xmm4
psrlq $1,%xmm0
pxor %xmm7,%xmm3
pxor %xmm4,%xmm1
.byte 102,15,58,68,250,17
movups 16(%edx),%xmm2
pxor %xmm0,%xmm4
psrlq $5,%xmm0
pxor %xmm4,%xmm0
psrlq $1,%xmm0
pxor %xmm1,%xmm0
.byte 102,15,58,68,221,0
leal 32(%esi),%esi
subl $32,%ebx
ja .L005mod_loop
.L004even_tail:
pshufd $78,%xmm0,%xmm4
movdqa %xmm0,%xmm1
pxor %xmm0,%xmm4
.byte 102,15,58,68,194,0
.byte 102,15,58,68,202,17
.byte 102,15,58,68,229,16
movdqa (%ecx),%xmm5
xorps %xmm6,%xmm0
xorps %xmm7,%xmm1
pxor %xmm0,%xmm3
pxor %xmm1,%xmm3
pxor %xmm3,%xmm4
movdqa %xmm4,%xmm3
psrldq $8,%xmm4
pslldq $8,%xmm3
pxor %xmm4,%xmm1
pxor %xmm3,%xmm0
movdqa %xmm0,%xmm4
movdqa %xmm0,%xmm3
psllq $5,%xmm0
pxor %xmm0,%xmm3
psllq $1,%xmm0
pxor %xmm3,%xmm0
psllq $57,%xmm0
movdqa %xmm0,%xmm3
pslldq $8,%xmm0
psrldq $8,%xmm3
pxor %xmm4,%xmm0
pxor %xmm3,%xmm1
movdqa %xmm0,%xmm4
psrlq $1,%xmm0
pxor %xmm4,%xmm1
pxor %xmm0,%xmm4
psrlq $5,%xmm0
pxor %xmm4,%xmm0
psrlq $1,%xmm0
pxor %xmm1,%xmm0
testl %ebx,%ebx
jnz .L006done
movups (%edx),%xmm2
.L003odd_tail:
movdqu (%esi),%xmm3
.byte 102,15,56,0,221
pxor %xmm3,%xmm0
movdqa %xmm0,%xmm1
pshufd $78,%xmm0,%xmm3
pshufd $78,%xmm2,%xmm4
pxor %xmm0,%xmm3
pxor %xmm2,%xmm4
.byte 102,15,58,68,194,0
.byte 102,15,58,68,202,17
.byte 102,15,58,68,220,0
xorps %xmm0,%xmm3
xorps %xmm1,%xmm3
movdqa %xmm3,%xmm4
psrldq $8,%xmm3
pslldq $8,%xmm4
pxor %xmm3,%xmm1
pxor %xmm4,%xmm0
movdqa %xmm0,%xmm4
movdqa %xmm0,%xmm3
psllq $5,%xmm0
pxor %xmm0,%xmm3
psllq $1,%xmm0
pxor %xmm3,%xmm0
psllq $57,%xmm0
movdqa %xmm0,%xmm3
pslldq $8,%xmm0
psrldq $8,%xmm3
pxor %xmm4,%xmm0
pxor %xmm3,%xmm1
movdqa %xmm0,%xmm4
psrlq $1,%xmm0
pxor %xmm4,%xmm1
pxor %xmm0,%xmm4
psrlq $5,%xmm0
pxor %xmm4,%xmm0
psrlq $1,%xmm0
pxor %xmm1,%xmm0
.L006done:
.byte 102,15,56,0,197
movdqu %xmm0,(%eax)
popl %edi
popl %esi
popl %ebx
popl %ebp
ret
.size gcm_ghash_clmul,.-.L_gcm_ghash_clmul_begin
.align 64
.Lbswap:
.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
.byte 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,194
.byte 71,72,65,83,72,32,102,111,114,32,120,56,54,44,32,67
.byte 82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112
.byte 112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62
.byte 0
#endif // !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) && defined(__ELF__)

View File

@ -0,0 +1,686 @@
// This file is generated from a similarly-named Perl script in the BoringSSL
// source tree. Do not edit by hand.
#include <openssl/asm_base.h>
#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) && defined(__ELF__)
.text
.globl md5_block_asm_data_order
.hidden md5_block_asm_data_order
.type md5_block_asm_data_order,@function
.align 16
md5_block_asm_data_order:
.L_md5_block_asm_data_order_begin:
pushl %esi
pushl %edi
movl 12(%esp),%edi
movl 16(%esp),%esi
movl 20(%esp),%ecx
pushl %ebp
shll $6,%ecx
pushl %ebx
addl %esi,%ecx
subl $64,%ecx
movl (%edi),%eax
pushl %ecx
movl 4(%edi),%ebx
movl 8(%edi),%ecx
movl 12(%edi),%edx
.L000start:
movl %ecx,%edi
movl (%esi),%ebp
xorl %edx,%edi
andl %ebx,%edi
leal 3614090360(%eax,%ebp,1),%eax
xorl %edx,%edi
addl %edi,%eax
movl %ebx,%edi
roll $7,%eax
movl 4(%esi),%ebp
addl %ebx,%eax
xorl %ecx,%edi
andl %eax,%edi
leal 3905402710(%edx,%ebp,1),%edx
xorl %ecx,%edi
addl %edi,%edx
movl %eax,%edi
roll $12,%edx
movl 8(%esi),%ebp
addl %eax,%edx
xorl %ebx,%edi
andl %edx,%edi
leal 606105819(%ecx,%ebp,1),%ecx
xorl %ebx,%edi
addl %edi,%ecx
movl %edx,%edi
roll $17,%ecx
movl 12(%esi),%ebp
addl %edx,%ecx
xorl %eax,%edi
andl %ecx,%edi
leal 3250441966(%ebx,%ebp,1),%ebx
xorl %eax,%edi
addl %edi,%ebx
movl %ecx,%edi
roll $22,%ebx
movl 16(%esi),%ebp
addl %ecx,%ebx
xorl %edx,%edi
andl %ebx,%edi
leal 4118548399(%eax,%ebp,1),%eax
xorl %edx,%edi
addl %edi,%eax
movl %ebx,%edi
roll $7,%eax
movl 20(%esi),%ebp
addl %ebx,%eax
xorl %ecx,%edi
andl %eax,%edi
leal 1200080426(%edx,%ebp,1),%edx
xorl %ecx,%edi
addl %edi,%edx
movl %eax,%edi
roll $12,%edx
movl 24(%esi),%ebp
addl %eax,%edx
xorl %ebx,%edi
andl %edx,%edi
leal 2821735955(%ecx,%ebp,1),%ecx
xorl %ebx,%edi
addl %edi,%ecx
movl %edx,%edi
roll $17,%ecx
movl 28(%esi),%ebp
addl %edx,%ecx
xorl %eax,%edi
andl %ecx,%edi
leal 4249261313(%ebx,%ebp,1),%ebx
xorl %eax,%edi
addl %edi,%ebx
movl %ecx,%edi
roll $22,%ebx
movl 32(%esi),%ebp
addl %ecx,%ebx
xorl %edx,%edi
andl %ebx,%edi
leal 1770035416(%eax,%ebp,1),%eax
xorl %edx,%edi
addl %edi,%eax
movl %ebx,%edi
roll $7,%eax
movl 36(%esi),%ebp
addl %ebx,%eax
xorl %ecx,%edi
andl %eax,%edi
leal 2336552879(%edx,%ebp,1),%edx
xorl %ecx,%edi
addl %edi,%edx
movl %eax,%edi
roll $12,%edx
movl 40(%esi),%ebp
addl %eax,%edx
xorl %ebx,%edi
andl %edx,%edi
leal 4294925233(%ecx,%ebp,1),%ecx
xorl %ebx,%edi
addl %edi,%ecx
movl %edx,%edi
roll $17,%ecx
movl 44(%esi),%ebp
addl %edx,%ecx
xorl %eax,%edi
andl %ecx,%edi
leal 2304563134(%ebx,%ebp,1),%ebx
xorl %eax,%edi
addl %edi,%ebx
movl %ecx,%edi
roll $22,%ebx
movl 48(%esi),%ebp
addl %ecx,%ebx
xorl %edx,%edi
andl %ebx,%edi
leal 1804603682(%eax,%ebp,1),%eax
xorl %edx,%edi
addl %edi,%eax
movl %ebx,%edi
roll $7,%eax
movl 52(%esi),%ebp
addl %ebx,%eax
xorl %ecx,%edi
andl %eax,%edi
leal 4254626195(%edx,%ebp,1),%edx
xorl %ecx,%edi
addl %edi,%edx
movl %eax,%edi
roll $12,%edx
movl 56(%esi),%ebp
addl %eax,%edx
xorl %ebx,%edi
andl %edx,%edi
leal 2792965006(%ecx,%ebp,1),%ecx
xorl %ebx,%edi
addl %edi,%ecx
movl %edx,%edi
roll $17,%ecx
movl 60(%esi),%ebp
addl %edx,%ecx
xorl %eax,%edi
andl %ecx,%edi
leal 1236535329(%ebx,%ebp,1),%ebx
xorl %eax,%edi
addl %edi,%ebx
movl %ecx,%edi
roll $22,%ebx
movl 4(%esi),%ebp
addl %ecx,%ebx
leal 4129170786(%eax,%ebp,1),%eax
xorl %ebx,%edi
andl %edx,%edi
movl 24(%esi),%ebp
xorl %ecx,%edi
addl %edi,%eax
movl %ebx,%edi
roll $5,%eax
addl %ebx,%eax
leal 3225465664(%edx,%ebp,1),%edx
xorl %eax,%edi
andl %ecx,%edi
movl 44(%esi),%ebp
xorl %ebx,%edi
addl %edi,%edx
movl %eax,%edi
roll $9,%edx
addl %eax,%edx
leal 643717713(%ecx,%ebp,1),%ecx
xorl %edx,%edi
andl %ebx,%edi
movl (%esi),%ebp
xorl %eax,%edi
addl %edi,%ecx
movl %edx,%edi
roll $14,%ecx
addl %edx,%ecx
leal 3921069994(%ebx,%ebp,1),%ebx
xorl %ecx,%edi
andl %eax,%edi
movl 20(%esi),%ebp
xorl %edx,%edi
addl %edi,%ebx
movl %ecx,%edi
roll $20,%ebx
addl %ecx,%ebx
leal 3593408605(%eax,%ebp,1),%eax
xorl %ebx,%edi
andl %edx,%edi
movl 40(%esi),%ebp
xorl %ecx,%edi
addl %edi,%eax
movl %ebx,%edi
roll $5,%eax
addl %ebx,%eax
leal 38016083(%edx,%ebp,1),%edx
xorl %eax,%edi
andl %ecx,%edi
movl 60(%esi),%ebp
xorl %ebx,%edi
addl %edi,%edx
movl %eax,%edi
roll $9,%edx
addl %eax,%edx
leal 3634488961(%ecx,%ebp,1),%ecx
xorl %edx,%edi
andl %ebx,%edi
movl 16(%esi),%ebp
xorl %eax,%edi
addl %edi,%ecx
movl %edx,%edi
roll $14,%ecx
addl %edx,%ecx
leal 3889429448(%ebx,%ebp,1),%ebx
xorl %ecx,%edi
andl %eax,%edi
movl 36(%esi),%ebp
xorl %edx,%edi
addl %edi,%ebx
movl %ecx,%edi
roll $20,%ebx
addl %ecx,%ebx
leal 568446438(%eax,%ebp,1),%eax
xorl %ebx,%edi
andl %edx,%edi
movl 56(%esi),%ebp
xorl %ecx,%edi
addl %edi,%eax
movl %ebx,%edi
roll $5,%eax
addl %ebx,%eax
leal 3275163606(%edx,%ebp,1),%edx
xorl %eax,%edi
andl %ecx,%edi
movl 12(%esi),%ebp
xorl %ebx,%edi
addl %edi,%edx
movl %eax,%edi
roll $9,%edx
addl %eax,%edx
leal 4107603335(%ecx,%ebp,1),%ecx
xorl %edx,%edi
andl %ebx,%edi
movl 32(%esi),%ebp
xorl %eax,%edi
addl %edi,%ecx
movl %edx,%edi
roll $14,%ecx
addl %edx,%ecx
leal 1163531501(%ebx,%ebp,1),%ebx
xorl %ecx,%edi
andl %eax,%edi
movl 52(%esi),%ebp
xorl %edx,%edi
addl %edi,%ebx
movl %ecx,%edi
roll $20,%ebx
addl %ecx,%ebx
leal 2850285829(%eax,%ebp,1),%eax
xorl %ebx,%edi
andl %edx,%edi
movl 8(%esi),%ebp
xorl %ecx,%edi
addl %edi,%eax
movl %ebx,%edi
roll $5,%eax
addl %ebx,%eax
leal 4243563512(%edx,%ebp,1),%edx
xorl %eax,%edi
andl %ecx,%edi
movl 28(%esi),%ebp
xorl %ebx,%edi
addl %edi,%edx
movl %eax,%edi
roll $9,%edx
addl %eax,%edx
leal 1735328473(%ecx,%ebp,1),%ecx
xorl %edx,%edi
andl %ebx,%edi
movl 48(%esi),%ebp
xorl %eax,%edi
addl %edi,%ecx
movl %edx,%edi
roll $14,%ecx
addl %edx,%ecx
leal 2368359562(%ebx,%ebp,1),%ebx
xorl %ecx,%edi
andl %eax,%edi
movl 20(%esi),%ebp
xorl %edx,%edi
addl %edi,%ebx
movl %ecx,%edi
roll $20,%ebx
addl %ecx,%ebx
xorl %edx,%edi
xorl %ebx,%edi
leal 4294588738(%eax,%ebp,1),%eax
addl %edi,%eax
roll $4,%eax
movl 32(%esi),%ebp
movl %ebx,%edi
leal 2272392833(%edx,%ebp,1),%edx
addl %ebx,%eax
xorl %ecx,%edi
xorl %eax,%edi
movl 44(%esi),%ebp
addl %edi,%edx
movl %eax,%edi
roll $11,%edx
addl %eax,%edx
xorl %ebx,%edi
xorl %edx,%edi
leal 1839030562(%ecx,%ebp,1),%ecx
addl %edi,%ecx
roll $16,%ecx
movl 56(%esi),%ebp
movl %edx,%edi
leal 4259657740(%ebx,%ebp,1),%ebx
addl %edx,%ecx
xorl %eax,%edi
xorl %ecx,%edi
movl 4(%esi),%ebp
addl %edi,%ebx
movl %ecx,%edi
roll $23,%ebx
addl %ecx,%ebx
xorl %edx,%edi
xorl %ebx,%edi
leal 2763975236(%eax,%ebp,1),%eax
addl %edi,%eax
roll $4,%eax
movl 16(%esi),%ebp
movl %ebx,%edi
leal 1272893353(%edx,%ebp,1),%edx
addl %ebx,%eax
xorl %ecx,%edi
xorl %eax,%edi
movl 28(%esi),%ebp
addl %edi,%edx
movl %eax,%edi
roll $11,%edx
addl %eax,%edx
xorl %ebx,%edi
xorl %edx,%edi
leal 4139469664(%ecx,%ebp,1),%ecx
addl %edi,%ecx
roll $16,%ecx
movl 40(%esi),%ebp
movl %edx,%edi
leal 3200236656(%ebx,%ebp,1),%ebx
addl %edx,%ecx
xorl %eax,%edi
xorl %ecx,%edi
movl 52(%esi),%ebp
addl %edi,%ebx
movl %ecx,%edi
roll $23,%ebx
addl %ecx,%ebx
xorl %edx,%edi
xorl %ebx,%edi
leal 681279174(%eax,%ebp,1),%eax
addl %edi,%eax
roll $4,%eax
movl (%esi),%ebp
movl %ebx,%edi
leal 3936430074(%edx,%ebp,1),%edx
addl %ebx,%eax
xorl %ecx,%edi
xorl %eax,%edi
movl 12(%esi),%ebp
addl %edi,%edx
movl %eax,%edi
roll $11,%edx
addl %eax,%edx
xorl %ebx,%edi
xorl %edx,%edi
leal 3572445317(%ecx,%ebp,1),%ecx
addl %edi,%ecx
roll $16,%ecx
movl 24(%esi),%ebp
movl %edx,%edi
leal 76029189(%ebx,%ebp,1),%ebx
addl %edx,%ecx
xorl %eax,%edi
xorl %ecx,%edi
movl 36(%esi),%ebp
addl %edi,%ebx
movl %ecx,%edi
roll $23,%ebx
addl %ecx,%ebx
xorl %edx,%edi
xorl %ebx,%edi
leal 3654602809(%eax,%ebp,1),%eax
addl %edi,%eax
roll $4,%eax
movl 48(%esi),%ebp
movl %ebx,%edi
leal 3873151461(%edx,%ebp,1),%edx
addl %ebx,%eax
xorl %ecx,%edi
xorl %eax,%edi
movl 60(%esi),%ebp
addl %edi,%edx
movl %eax,%edi
roll $11,%edx
addl %eax,%edx
xorl %ebx,%edi
xorl %edx,%edi
leal 530742520(%ecx,%ebp,1),%ecx
addl %edi,%ecx
roll $16,%ecx
movl 8(%esi),%ebp
movl %edx,%edi
leal 3299628645(%ebx,%ebp,1),%ebx
addl %edx,%ecx
xorl %eax,%edi
xorl %ecx,%edi
movl (%esi),%ebp
addl %edi,%ebx
movl $-1,%edi
roll $23,%ebx
addl %ecx,%ebx
xorl %edx,%edi
orl %ebx,%edi
leal 4096336452(%eax,%ebp,1),%eax
xorl %ecx,%edi
movl 28(%esi),%ebp
addl %edi,%eax
movl $-1,%edi
roll $6,%eax
xorl %ecx,%edi
addl %ebx,%eax
orl %eax,%edi
leal 1126891415(%edx,%ebp,1),%edx
xorl %ebx,%edi
movl 56(%esi),%ebp
addl %edi,%edx
movl $-1,%edi
roll $10,%edx
xorl %ebx,%edi
addl %eax,%edx
orl %edx,%edi
leal 2878612391(%ecx,%ebp,1),%ecx
xorl %eax,%edi
movl 20(%esi),%ebp
addl %edi,%ecx
movl $-1,%edi
roll $15,%ecx
xorl %eax,%edi
addl %edx,%ecx
orl %ecx,%edi
leal 4237533241(%ebx,%ebp,1),%ebx
xorl %edx,%edi
movl 48(%esi),%ebp
addl %edi,%ebx
movl $-1,%edi
roll $21,%ebx
xorl %edx,%edi
addl %ecx,%ebx
orl %ebx,%edi
leal 1700485571(%eax,%ebp,1),%eax
xorl %ecx,%edi
movl 12(%esi),%ebp
addl %edi,%eax
movl $-1,%edi
roll $6,%eax
xorl %ecx,%edi
addl %ebx,%eax
orl %eax,%edi
leal 2399980690(%edx,%ebp,1),%edx
xorl %ebx,%edi
movl 40(%esi),%ebp
addl %edi,%edx
movl $-1,%edi
roll $10,%edx
xorl %ebx,%edi
addl %eax,%edx
orl %edx,%edi
leal 4293915773(%ecx,%ebp,1),%ecx
xorl %eax,%edi
movl 4(%esi),%ebp
addl %edi,%ecx
movl $-1,%edi
roll $15,%ecx
xorl %eax,%edi
addl %edx,%ecx
orl %ecx,%edi
leal 2240044497(%ebx,%ebp,1),%ebx
xorl %edx,%edi
movl 32(%esi),%ebp
addl %edi,%ebx
movl $-1,%edi
roll $21,%ebx
xorl %edx,%edi
addl %ecx,%ebx
orl %ebx,%edi
leal 1873313359(%eax,%ebp,1),%eax
xorl %ecx,%edi
movl 60(%esi),%ebp
addl %edi,%eax
movl $-1,%edi
roll $6,%eax
xorl %ecx,%edi
addl %ebx,%eax
orl %eax,%edi
leal 4264355552(%edx,%ebp,1),%edx
xorl %ebx,%edi
movl 24(%esi),%ebp
addl %edi,%edx
movl $-1,%edi
roll $10,%edx
xorl %ebx,%edi
addl %eax,%edx
orl %edx,%edi
leal 2734768916(%ecx,%ebp,1),%ecx
xorl %eax,%edi
movl 52(%esi),%ebp
addl %edi,%ecx
movl $-1,%edi
roll $15,%ecx
xorl %eax,%edi
addl %edx,%ecx
orl %ecx,%edi
leal 1309151649(%ebx,%ebp,1),%ebx
xorl %edx,%edi
movl 16(%esi),%ebp
addl %edi,%ebx
movl $-1,%edi
roll $21,%ebx
xorl %edx,%edi
addl %ecx,%ebx
orl %ebx,%edi
leal 4149444226(%eax,%ebp,1),%eax
xorl %ecx,%edi
movl 44(%esi),%ebp
addl %edi,%eax
movl $-1,%edi
roll $6,%eax
xorl %ecx,%edi
addl %ebx,%eax
orl %eax,%edi
leal 3174756917(%edx,%ebp,1),%edx
xorl %ebx,%edi
movl 8(%esi),%ebp
addl %edi,%edx
movl $-1,%edi
roll $10,%edx
xorl %ebx,%edi
addl %eax,%edx
orl %edx,%edi
leal 718787259(%ecx,%ebp,1),%ecx
xorl %eax,%edi
movl 36(%esi),%ebp
addl %edi,%ecx
movl $-1,%edi
roll $15,%ecx
xorl %eax,%edi
addl %edx,%ecx
orl %ecx,%edi
leal 3951481745(%ebx,%ebp,1),%ebx
xorl %edx,%edi
movl 24(%esp),%ebp
addl %edi,%ebx
addl $64,%esi
roll $21,%ebx
movl (%ebp),%edi
addl %ecx,%ebx
addl %edi,%eax
movl 4(%ebp),%edi
addl %edi,%ebx
movl 8(%ebp),%edi
addl %edi,%ecx
movl 12(%ebp),%edi
addl %edi,%edx
movl %eax,(%ebp)
movl %ebx,4(%ebp)
movl (%esp),%edi
movl %ecx,8(%ebp)
movl %edx,12(%ebp)
cmpl %esi,%edi
jae .L000start
popl %eax
popl %ebx
popl %ebp
popl %edi
popl %esi
ret
.size md5_block_asm_data_order,.-.L_md5_block_asm_data_order_begin
#endif // !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) && defined(__ELF__)

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,706 @@
// This file is generated from a similarly-named Perl script in the BoringSSL
// source tree. Do not edit by hand.
#include <openssl/asm_base.h>
#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) && defined(__ELF__)
.text
#ifdef BORINGSSL_DISPATCH_TEST
#endif
.align 64
.L_vpaes_consts:
.long 218628480,235210255,168496130,67568393
.long 252381056,17041926,33884169,51187212
.long 252645135,252645135,252645135,252645135
.long 1512730624,3266504856,1377990664,3401244816
.long 830229760,1275146365,2969422977,3447763452
.long 3411033600,2979783055,338359620,2782886510
.long 4209124096,907596821,221174255,1006095553
.long 191964160,3799684038,3164090317,1589111125
.long 182528256,1777043520,2877432650,3265356744
.long 1874708224,3503451415,3305285752,363511674
.long 1606117888,3487855781,1093350906,2384367825
.long 197121,67569157,134941193,202313229
.long 67569157,134941193,202313229,197121
.long 134941193,202313229,197121,67569157
.long 202313229,197121,67569157,134941193
.long 33619971,100992007,168364043,235736079
.long 235736079,33619971,100992007,168364043
.long 168364043,235736079,33619971,100992007
.long 100992007,168364043,235736079,33619971
.long 50462976,117835012,185207048,252579084
.long 252314880,51251460,117574920,184942860
.long 184682752,252054788,50987272,118359308
.long 118099200,185467140,251790600,50727180
.long 2946363062,528716217,1300004225,1881839624
.long 1532713819,1532713819,1532713819,1532713819
.long 3602276352,4288629033,3737020424,4153884961
.long 1354558464,32357713,2958822624,3775749553
.long 1201988352,132424512,1572796698,503232858
.long 2213177600,1597421020,4103937655,675398315
.long 2749646592,4273543773,1511898873,121693092
.long 3040248576,1103263732,2871565598,1608280554
.long 2236667136,2588920351,482954393,64377734
.long 3069987328,291237287,2117370568,3650299247
.long 533321216,3573750986,2572112006,1401264716
.long 1339849704,2721158661,548607111,3445553514
.long 2128193280,3054596040,2183486460,1257083700
.long 655635200,1165381986,3923443150,2344132524
.long 190078720,256924420,290342170,357187870
.long 1610966272,2263057382,4103205268,309794674
.long 2592527872,2233205587,1335446729,3402964816
.long 3973531904,3225098121,3002836325,1918774430
.long 3870401024,2102906079,2284471353,4117666579
.long 617007872,1021508343,366931923,691083277
.long 2528395776,3491914898,2968704004,1613121270
.long 3445188352,3247741094,844474987,4093578302
.long 651481088,1190302358,1689581232,574775300
.long 4289380608,206939853,2555985458,2489840491
.long 2130264064,327674451,3566485037,3349835193
.long 2470714624,316102159,3636825756,3393945945
.byte 86,101,99,116,111,114,32,80,101,114,109,117,116,97,116,105
.byte 111,110,32,65,69,83,32,102,111,114,32,120,56,54,47,83
.byte 83,83,69,51,44,32,77,105,107,101,32,72,97,109,98,117
.byte 114,103,32,40,83,116,97,110,102,111,114,100,32,85,110,105
.byte 118,101,114,115,105,116,121,41,0
.align 64
.hidden _vpaes_preheat
.type _vpaes_preheat,@function
.align 16
_vpaes_preheat:
addl (%esp),%ebp
movdqa -48(%ebp),%xmm7
movdqa -16(%ebp),%xmm6
ret
.size _vpaes_preheat,.-_vpaes_preheat
.hidden _vpaes_encrypt_core
.type _vpaes_encrypt_core,@function
.align 16
_vpaes_encrypt_core:
movl $16,%ecx
movl 240(%edx),%eax
movdqa %xmm6,%xmm1
movdqa (%ebp),%xmm2
pandn %xmm0,%xmm1
pand %xmm6,%xmm0
movdqu (%edx),%xmm5
.byte 102,15,56,0,208
movdqa 16(%ebp),%xmm0
pxor %xmm5,%xmm2
psrld $4,%xmm1
addl $16,%edx
.byte 102,15,56,0,193
leal 192(%ebp),%ebx
pxor %xmm2,%xmm0
jmp .L000enc_entry
.align 16
.L001enc_loop:
movdqa 32(%ebp),%xmm4
movdqa 48(%ebp),%xmm0
.byte 102,15,56,0,226
.byte 102,15,56,0,195
pxor %xmm5,%xmm4
movdqa 64(%ebp),%xmm5
pxor %xmm4,%xmm0
movdqa -64(%ebx,%ecx,1),%xmm1
.byte 102,15,56,0,234
movdqa 80(%ebp),%xmm2
movdqa (%ebx,%ecx,1),%xmm4
.byte 102,15,56,0,211
movdqa %xmm0,%xmm3
pxor %xmm5,%xmm2
.byte 102,15,56,0,193
addl $16,%edx
pxor %xmm2,%xmm0
.byte 102,15,56,0,220
addl $16,%ecx
pxor %xmm0,%xmm3
.byte 102,15,56,0,193
andl $48,%ecx
subl $1,%eax
pxor %xmm3,%xmm0
.L000enc_entry:
movdqa %xmm6,%xmm1
movdqa -32(%ebp),%xmm5
pandn %xmm0,%xmm1
psrld $4,%xmm1
pand %xmm6,%xmm0
.byte 102,15,56,0,232
movdqa %xmm7,%xmm3
pxor %xmm1,%xmm0
.byte 102,15,56,0,217
movdqa %xmm7,%xmm4
pxor %xmm5,%xmm3
.byte 102,15,56,0,224
movdqa %xmm7,%xmm2
pxor %xmm5,%xmm4
.byte 102,15,56,0,211
movdqa %xmm7,%xmm3
pxor %xmm0,%xmm2
.byte 102,15,56,0,220
movdqu (%edx),%xmm5
pxor %xmm1,%xmm3
jnz .L001enc_loop
movdqa 96(%ebp),%xmm4
movdqa 112(%ebp),%xmm0
.byte 102,15,56,0,226
pxor %xmm5,%xmm4
.byte 102,15,56,0,195
movdqa 64(%ebx,%ecx,1),%xmm1
pxor %xmm4,%xmm0
.byte 102,15,56,0,193
ret
.size _vpaes_encrypt_core,.-_vpaes_encrypt_core
.hidden _vpaes_decrypt_core
.type _vpaes_decrypt_core,@function
.align 16
_vpaes_decrypt_core:
leal 608(%ebp),%ebx
movl 240(%edx),%eax
movdqa %xmm6,%xmm1
movdqa -64(%ebx),%xmm2
pandn %xmm0,%xmm1
movl %eax,%ecx
psrld $4,%xmm1
movdqu (%edx),%xmm5
shll $4,%ecx
pand %xmm6,%xmm0
.byte 102,15,56,0,208
movdqa -48(%ebx),%xmm0
xorl $48,%ecx
.byte 102,15,56,0,193
andl $48,%ecx
pxor %xmm5,%xmm2
movdqa 176(%ebp),%xmm5
pxor %xmm2,%xmm0
addl $16,%edx
leal -352(%ebx,%ecx,1),%ecx
jmp .L002dec_entry
.align 16
.L003dec_loop:
movdqa -32(%ebx),%xmm4
movdqa -16(%ebx),%xmm1
.byte 102,15,56,0,226
.byte 102,15,56,0,203
pxor %xmm4,%xmm0
movdqa (%ebx),%xmm4
pxor %xmm1,%xmm0
movdqa 16(%ebx),%xmm1
.byte 102,15,56,0,226
.byte 102,15,56,0,197
.byte 102,15,56,0,203
pxor %xmm4,%xmm0
movdqa 32(%ebx),%xmm4
pxor %xmm1,%xmm0
movdqa 48(%ebx),%xmm1
.byte 102,15,56,0,226
.byte 102,15,56,0,197
.byte 102,15,56,0,203
pxor %xmm4,%xmm0
movdqa 64(%ebx),%xmm4
pxor %xmm1,%xmm0
movdqa 80(%ebx),%xmm1
.byte 102,15,56,0,226
.byte 102,15,56,0,197
.byte 102,15,56,0,203
pxor %xmm4,%xmm0
addl $16,%edx
.byte 102,15,58,15,237,12
pxor %xmm1,%xmm0
subl $1,%eax
.L002dec_entry:
movdqa %xmm6,%xmm1
movdqa -32(%ebp),%xmm2
pandn %xmm0,%xmm1
pand %xmm6,%xmm0
psrld $4,%xmm1
.byte 102,15,56,0,208
movdqa %xmm7,%xmm3
pxor %xmm1,%xmm0
.byte 102,15,56,0,217
movdqa %xmm7,%xmm4
pxor %xmm2,%xmm3
.byte 102,15,56,0,224
pxor %xmm2,%xmm4
movdqa %xmm7,%xmm2
.byte 102,15,56,0,211
movdqa %xmm7,%xmm3
pxor %xmm0,%xmm2
.byte 102,15,56,0,220
movdqu (%edx),%xmm0
pxor %xmm1,%xmm3
jnz .L003dec_loop
movdqa 96(%ebx),%xmm4
.byte 102,15,56,0,226
pxor %xmm0,%xmm4
movdqa 112(%ebx),%xmm0
movdqa (%ecx),%xmm2
.byte 102,15,56,0,195
pxor %xmm4,%xmm0
.byte 102,15,56,0,194
ret
.size _vpaes_decrypt_core,.-_vpaes_decrypt_core
.hidden _vpaes_schedule_core
.type _vpaes_schedule_core,@function
.align 16
_vpaes_schedule_core:
addl (%esp),%ebp
movdqu (%esi),%xmm0
movdqa 320(%ebp),%xmm2
movdqa %xmm0,%xmm3
leal (%ebp),%ebx
movdqa %xmm2,4(%esp)
call _vpaes_schedule_transform
movdqa %xmm0,%xmm7
testl %edi,%edi
jnz .L004schedule_am_decrypting
movdqu %xmm0,(%edx)
jmp .L005schedule_go
.L004schedule_am_decrypting:
movdqa 256(%ebp,%ecx,1),%xmm1
.byte 102,15,56,0,217
movdqu %xmm3,(%edx)
xorl $48,%ecx
.L005schedule_go:
cmpl $192,%eax
ja .L006schedule_256
je .L007schedule_192
.L008schedule_128:
movl $10,%eax
.L009loop_schedule_128:
call _vpaes_schedule_round
decl %eax
jz .L010schedule_mangle_last
call _vpaes_schedule_mangle
jmp .L009loop_schedule_128
.align 16
.L007schedule_192:
movdqu 8(%esi),%xmm0
call _vpaes_schedule_transform
movdqa %xmm0,%xmm6
pxor %xmm4,%xmm4
movhlps %xmm4,%xmm6
movl $4,%eax
.L011loop_schedule_192:
call _vpaes_schedule_round
.byte 102,15,58,15,198,8
call _vpaes_schedule_mangle
call _vpaes_schedule_192_smear
call _vpaes_schedule_mangle
call _vpaes_schedule_round
decl %eax
jz .L010schedule_mangle_last
call _vpaes_schedule_mangle
call _vpaes_schedule_192_smear
jmp .L011loop_schedule_192
.align 16
.L006schedule_256:
movdqu 16(%esi),%xmm0
call _vpaes_schedule_transform
movl $7,%eax
.L012loop_schedule_256:
call _vpaes_schedule_mangle
movdqa %xmm0,%xmm6
call _vpaes_schedule_round
decl %eax
jz .L010schedule_mangle_last
call _vpaes_schedule_mangle
pshufd $255,%xmm0,%xmm0
movdqa %xmm7,20(%esp)
movdqa %xmm6,%xmm7
call .L_vpaes_schedule_low_round
movdqa 20(%esp),%xmm7
jmp .L012loop_schedule_256
.align 16
.L010schedule_mangle_last:
leal 384(%ebp),%ebx
testl %edi,%edi
jnz .L013schedule_mangle_last_dec
movdqa 256(%ebp,%ecx,1),%xmm1
.byte 102,15,56,0,193
leal 352(%ebp),%ebx
addl $32,%edx
.L013schedule_mangle_last_dec:
addl $-16,%edx
pxor 336(%ebp),%xmm0
call _vpaes_schedule_transform
movdqu %xmm0,(%edx)
pxor %xmm0,%xmm0
pxor %xmm1,%xmm1
pxor %xmm2,%xmm2
pxor %xmm3,%xmm3
pxor %xmm4,%xmm4
pxor %xmm5,%xmm5
pxor %xmm6,%xmm6
pxor %xmm7,%xmm7
ret
.size _vpaes_schedule_core,.-_vpaes_schedule_core
.hidden _vpaes_schedule_192_smear
.type _vpaes_schedule_192_smear,@function
.align 16
_vpaes_schedule_192_smear:
pshufd $128,%xmm6,%xmm1
pshufd $254,%xmm7,%xmm0
pxor %xmm1,%xmm6
pxor %xmm1,%xmm1
pxor %xmm0,%xmm6
movdqa %xmm6,%xmm0
movhlps %xmm1,%xmm6
ret
.size _vpaes_schedule_192_smear,.-_vpaes_schedule_192_smear
.hidden _vpaes_schedule_round
.type _vpaes_schedule_round,@function
.align 16
_vpaes_schedule_round:
movdqa 8(%esp),%xmm2
pxor %xmm1,%xmm1
.byte 102,15,58,15,202,15
.byte 102,15,58,15,210,15
pxor %xmm1,%xmm7
pshufd $255,%xmm0,%xmm0
.byte 102,15,58,15,192,1
movdqa %xmm2,8(%esp)
.L_vpaes_schedule_low_round:
movdqa %xmm7,%xmm1
pslldq $4,%xmm7
pxor %xmm1,%xmm7
movdqa %xmm7,%xmm1
pslldq $8,%xmm7
pxor %xmm1,%xmm7
pxor 336(%ebp),%xmm7
movdqa -16(%ebp),%xmm4
movdqa -48(%ebp),%xmm5
movdqa %xmm4,%xmm1
pandn %xmm0,%xmm1
psrld $4,%xmm1
pand %xmm4,%xmm0
movdqa -32(%ebp),%xmm2
.byte 102,15,56,0,208
pxor %xmm1,%xmm0
movdqa %xmm5,%xmm3
.byte 102,15,56,0,217
pxor %xmm2,%xmm3
movdqa %xmm5,%xmm4
.byte 102,15,56,0,224
pxor %xmm2,%xmm4
movdqa %xmm5,%xmm2
.byte 102,15,56,0,211
pxor %xmm0,%xmm2
movdqa %xmm5,%xmm3
.byte 102,15,56,0,220
pxor %xmm1,%xmm3
movdqa 32(%ebp),%xmm4
.byte 102,15,56,0,226
movdqa 48(%ebp),%xmm0
.byte 102,15,56,0,195
pxor %xmm4,%xmm0
pxor %xmm7,%xmm0
movdqa %xmm0,%xmm7
ret
.size _vpaes_schedule_round,.-_vpaes_schedule_round
.hidden _vpaes_schedule_transform
.type _vpaes_schedule_transform,@function
.align 16
_vpaes_schedule_transform:
movdqa -16(%ebp),%xmm2
movdqa %xmm2,%xmm1
pandn %xmm0,%xmm1
psrld $4,%xmm1
pand %xmm2,%xmm0
movdqa (%ebx),%xmm2
.byte 102,15,56,0,208
movdqa 16(%ebx),%xmm0
.byte 102,15,56,0,193
pxor %xmm2,%xmm0
ret
.size _vpaes_schedule_transform,.-_vpaes_schedule_transform
.hidden _vpaes_schedule_mangle
.type _vpaes_schedule_mangle,@function
.align 16
_vpaes_schedule_mangle:
movdqa %xmm0,%xmm4
movdqa 128(%ebp),%xmm5
testl %edi,%edi
jnz .L014schedule_mangle_dec
addl $16,%edx
pxor 336(%ebp),%xmm4
.byte 102,15,56,0,229
movdqa %xmm4,%xmm3
.byte 102,15,56,0,229
pxor %xmm4,%xmm3
.byte 102,15,56,0,229
pxor %xmm4,%xmm3
jmp .L015schedule_mangle_both
.align 16
.L014schedule_mangle_dec:
movdqa -16(%ebp),%xmm2
leal 416(%ebp),%esi
movdqa %xmm2,%xmm1
pandn %xmm4,%xmm1
psrld $4,%xmm1
pand %xmm2,%xmm4
movdqa (%esi),%xmm2
.byte 102,15,56,0,212
movdqa 16(%esi),%xmm3
.byte 102,15,56,0,217
pxor %xmm2,%xmm3
.byte 102,15,56,0,221
movdqa 32(%esi),%xmm2
.byte 102,15,56,0,212
pxor %xmm3,%xmm2
movdqa 48(%esi),%xmm3
.byte 102,15,56,0,217
pxor %xmm2,%xmm3
.byte 102,15,56,0,221
movdqa 64(%esi),%xmm2
.byte 102,15,56,0,212
pxor %xmm3,%xmm2
movdqa 80(%esi),%xmm3
.byte 102,15,56,0,217
pxor %xmm2,%xmm3
.byte 102,15,56,0,221
movdqa 96(%esi),%xmm2
.byte 102,15,56,0,212
pxor %xmm3,%xmm2
movdqa 112(%esi),%xmm3
.byte 102,15,56,0,217
pxor %xmm2,%xmm3
addl $-16,%edx
.L015schedule_mangle_both:
movdqa 256(%ebp,%ecx,1),%xmm1
.byte 102,15,56,0,217
addl $-16,%ecx
andl $48,%ecx
movdqu %xmm3,(%edx)
ret
.size _vpaes_schedule_mangle,.-_vpaes_schedule_mangle
.globl vpaes_set_encrypt_key
.hidden vpaes_set_encrypt_key
.type vpaes_set_encrypt_key,@function
.align 16
vpaes_set_encrypt_key:
.L_vpaes_set_encrypt_key_begin:
pushl %ebp
pushl %ebx
pushl %esi
pushl %edi
#ifdef BORINGSSL_DISPATCH_TEST
pushl %ebx
pushl %edx
call .L016pic
.L016pic:
popl %ebx
leal BORINGSSL_function_hit+5-.L016pic(%ebx),%ebx
movl $1,%edx
movb %dl,(%ebx)
popl %edx
popl %ebx
#endif
movl 20(%esp),%esi
leal -56(%esp),%ebx
movl 24(%esp),%eax
andl $-16,%ebx
movl 28(%esp),%edx
xchgl %esp,%ebx
movl %ebx,48(%esp)
movl %eax,%ebx
shrl $5,%ebx
addl $5,%ebx
movl %ebx,240(%edx)
movl $48,%ecx
movl $0,%edi
leal .L_vpaes_consts+0x30-.L017pic_point,%ebp
call _vpaes_schedule_core
.L017pic_point:
movl 48(%esp),%esp
xorl %eax,%eax
popl %edi
popl %esi
popl %ebx
popl %ebp
ret
.size vpaes_set_encrypt_key,.-.L_vpaes_set_encrypt_key_begin
.globl vpaes_set_decrypt_key
.hidden vpaes_set_decrypt_key
.type vpaes_set_decrypt_key,@function
.align 16
vpaes_set_decrypt_key:
.L_vpaes_set_decrypt_key_begin:
pushl %ebp
pushl %ebx
pushl %esi
pushl %edi
movl 20(%esp),%esi
leal -56(%esp),%ebx
movl 24(%esp),%eax
andl $-16,%ebx
movl 28(%esp),%edx
xchgl %esp,%ebx
movl %ebx,48(%esp)
movl %eax,%ebx
shrl $5,%ebx
addl $5,%ebx
movl %ebx,240(%edx)
shll $4,%ebx
leal 16(%edx,%ebx,1),%edx
movl $1,%edi
movl %eax,%ecx
shrl $1,%ecx
andl $32,%ecx
xorl $32,%ecx
leal .L_vpaes_consts+0x30-.L018pic_point,%ebp
call _vpaes_schedule_core
.L018pic_point:
movl 48(%esp),%esp
xorl %eax,%eax
popl %edi
popl %esi
popl %ebx
popl %ebp
ret
.size vpaes_set_decrypt_key,.-.L_vpaes_set_decrypt_key_begin
.globl vpaes_encrypt
.hidden vpaes_encrypt
.type vpaes_encrypt,@function
.align 16
vpaes_encrypt:
.L_vpaes_encrypt_begin:
pushl %ebp
pushl %ebx
pushl %esi
pushl %edi
#ifdef BORINGSSL_DISPATCH_TEST
pushl %ebx
pushl %edx
call .L019pic
.L019pic:
popl %ebx
leal BORINGSSL_function_hit+4-.L019pic(%ebx),%ebx
movl $1,%edx
movb %dl,(%ebx)
popl %edx
popl %ebx
#endif
leal .L_vpaes_consts+0x30-.L020pic_point,%ebp
call _vpaes_preheat
.L020pic_point:
movl 20(%esp),%esi
leal -56(%esp),%ebx
movl 24(%esp),%edi
andl $-16,%ebx
movl 28(%esp),%edx
xchgl %esp,%ebx
movl %ebx,48(%esp)
movdqu (%esi),%xmm0
call _vpaes_encrypt_core
movdqu %xmm0,(%edi)
movl 48(%esp),%esp
popl %edi
popl %esi
popl %ebx
popl %ebp
ret
.size vpaes_encrypt,.-.L_vpaes_encrypt_begin
.globl vpaes_decrypt
.hidden vpaes_decrypt
.type vpaes_decrypt,@function
.align 16
vpaes_decrypt:
.L_vpaes_decrypt_begin:
pushl %ebp
pushl %ebx
pushl %esi
pushl %edi
leal .L_vpaes_consts+0x30-.L021pic_point,%ebp
call _vpaes_preheat
.L021pic_point:
movl 20(%esp),%esi
leal -56(%esp),%ebx
movl 24(%esp),%edi
andl $-16,%ebx
movl 28(%esp),%edx
xchgl %esp,%ebx
movl %ebx,48(%esp)
movdqu (%esi),%xmm0
call _vpaes_decrypt_core
movdqu %xmm0,(%edi)
movl 48(%esp),%esp
popl %edi
popl %esi
popl %ebx
popl %ebp
ret
.size vpaes_decrypt,.-.L_vpaes_decrypt_begin
.globl vpaes_cbc_encrypt
.hidden vpaes_cbc_encrypt
.type vpaes_cbc_encrypt,@function
.align 16
vpaes_cbc_encrypt:
.L_vpaes_cbc_encrypt_begin:
pushl %ebp
pushl %ebx
pushl %esi
pushl %edi
movl 20(%esp),%esi
movl 24(%esp),%edi
movl 28(%esp),%eax
movl 32(%esp),%edx
subl $16,%eax
jc .L022cbc_abort
leal -56(%esp),%ebx
movl 36(%esp),%ebp
andl $-16,%ebx
movl 40(%esp),%ecx
xchgl %esp,%ebx
movdqu (%ebp),%xmm1
subl %esi,%edi
movl %ebx,48(%esp)
movl %edi,(%esp)
movl %edx,4(%esp)
movl %ebp,8(%esp)
movl %eax,%edi
leal .L_vpaes_consts+0x30-.L023pic_point,%ebp
call _vpaes_preheat
.L023pic_point:
cmpl $0,%ecx
je .L024cbc_dec_loop
jmp .L025cbc_enc_loop
.align 16
.L025cbc_enc_loop:
movdqu (%esi),%xmm0
pxor %xmm1,%xmm0
call _vpaes_encrypt_core
movl (%esp),%ebx
movl 4(%esp),%edx
movdqa %xmm0,%xmm1
movdqu %xmm0,(%ebx,%esi,1)
leal 16(%esi),%esi
subl $16,%edi
jnc .L025cbc_enc_loop
jmp .L026cbc_done
.align 16
.L024cbc_dec_loop:
movdqu (%esi),%xmm0
movdqa %xmm1,16(%esp)
movdqa %xmm0,32(%esp)
call _vpaes_decrypt_core
movl (%esp),%ebx
movl 4(%esp),%edx
pxor 16(%esp),%xmm0
movdqa 32(%esp),%xmm1
movdqu %xmm0,(%ebx,%esi,1)
leal 16(%esi),%esi
subl $16,%edi
jnc .L024cbc_dec_loop
.L026cbc_done:
movl 8(%esp),%ebx
movl 48(%esp),%esp
movdqu %xmm1,(%ebx)
.L022cbc_abort:
popl %edi
popl %esi
popl %ebx
popl %ebp
ret
.size vpaes_cbc_encrypt,.-.L_vpaes_cbc_encrypt_begin
#endif // !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) && defined(__ELF__)

View File

@ -0,0 +1,482 @@
// This file is generated from a similarly-named Perl script in the BoringSSL
// source tree. Do not edit by hand.
#include <openssl/asm_base.h>
#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) && defined(__ELF__)
.text
.globl bn_mul_mont
.hidden bn_mul_mont
.type bn_mul_mont,@function
.align 16
bn_mul_mont:
.L_bn_mul_mont_begin:
pushl %ebp
pushl %ebx
pushl %esi
pushl %edi
xorl %eax,%eax
movl 40(%esp),%edi
cmpl $4,%edi
jl .L000just_leave
leal 20(%esp),%esi
leal 24(%esp),%edx
addl $2,%edi
negl %edi
leal -32(%esp,%edi,4),%ebp
negl %edi
movl %ebp,%eax
subl %edx,%eax
andl $2047,%eax
subl %eax,%ebp
xorl %ebp,%edx
andl $2048,%edx
xorl $2048,%edx
subl %edx,%ebp
andl $-64,%ebp
movl %esp,%eax
subl %ebp,%eax
andl $-4096,%eax
movl %esp,%edx
leal (%ebp,%eax,1),%esp
movl (%esp),%eax
cmpl %ebp,%esp
ja .L001page_walk
jmp .L002page_walk_done
.align 16
.L001page_walk:
leal -4096(%esp),%esp
movl (%esp),%eax
cmpl %ebp,%esp
ja .L001page_walk
.L002page_walk_done:
movl (%esi),%eax
movl 4(%esi),%ebx
movl 8(%esi),%ecx
movl 12(%esi),%ebp
movl 16(%esi),%esi
movl (%esi),%esi
movl %eax,4(%esp)
movl %ebx,8(%esp)
movl %ecx,12(%esp)
movl %ebp,16(%esp)
movl %esi,20(%esp)
leal -3(%edi),%ebx
movl %edx,24(%esp)
call .L003PIC_me_up
.L003PIC_me_up:
popl %eax
leal OPENSSL_ia32cap_P-.L003PIC_me_up(%eax),%eax
btl $26,(%eax)
jnc .L004non_sse2
movl $-1,%eax
movd %eax,%mm7
movl 8(%esp),%esi
movl 12(%esp),%edi
movl 16(%esp),%ebp
xorl %edx,%edx
xorl %ecx,%ecx
movd (%edi),%mm4
movd (%esi),%mm5
movd (%ebp),%mm3
pmuludq %mm4,%mm5
movq %mm5,%mm2
movq %mm5,%mm0
pand %mm7,%mm0
pmuludq 20(%esp),%mm5
pmuludq %mm5,%mm3
paddq %mm0,%mm3
movd 4(%ebp),%mm1
movd 4(%esi),%mm0
psrlq $32,%mm2
psrlq $32,%mm3
incl %ecx
.align 16
.L0051st:
pmuludq %mm4,%mm0
pmuludq %mm5,%mm1
paddq %mm0,%mm2
paddq %mm1,%mm3
movq %mm2,%mm0
pand %mm7,%mm0
movd 4(%ebp,%ecx,4),%mm1
paddq %mm0,%mm3
movd 4(%esi,%ecx,4),%mm0
psrlq $32,%mm2
movd %mm3,28(%esp,%ecx,4)
psrlq $32,%mm3
leal 1(%ecx),%ecx
cmpl %ebx,%ecx
jl .L0051st
pmuludq %mm4,%mm0
pmuludq %mm5,%mm1
paddq %mm0,%mm2
paddq %mm1,%mm3
movq %mm2,%mm0
pand %mm7,%mm0
paddq %mm0,%mm3
movd %mm3,28(%esp,%ecx,4)
psrlq $32,%mm2
psrlq $32,%mm3
paddq %mm2,%mm3
movq %mm3,32(%esp,%ebx,4)
incl %edx
.L006outer:
xorl %ecx,%ecx
movd (%edi,%edx,4),%mm4
movd (%esi),%mm5
movd 32(%esp),%mm6
movd (%ebp),%mm3
pmuludq %mm4,%mm5
paddq %mm6,%mm5
movq %mm5,%mm0
movq %mm5,%mm2
pand %mm7,%mm0
pmuludq 20(%esp),%mm5
pmuludq %mm5,%mm3
paddq %mm0,%mm3
movd 36(%esp),%mm6
movd 4(%ebp),%mm1
movd 4(%esi),%mm0
psrlq $32,%mm2
psrlq $32,%mm3
paddq %mm6,%mm2
incl %ecx
decl %ebx
.L007inner:
pmuludq %mm4,%mm0
pmuludq %mm5,%mm1
paddq %mm0,%mm2
paddq %mm1,%mm3
movq %mm2,%mm0
movd 36(%esp,%ecx,4),%mm6
pand %mm7,%mm0
movd 4(%ebp,%ecx,4),%mm1
paddq %mm0,%mm3
movd 4(%esi,%ecx,4),%mm0
psrlq $32,%mm2
movd %mm3,28(%esp,%ecx,4)
psrlq $32,%mm3
paddq %mm6,%mm2
decl %ebx
leal 1(%ecx),%ecx
jnz .L007inner
movl %ecx,%ebx
pmuludq %mm4,%mm0
pmuludq %mm5,%mm1
paddq %mm0,%mm2
paddq %mm1,%mm3
movq %mm2,%mm0
pand %mm7,%mm0
paddq %mm0,%mm3
movd %mm3,28(%esp,%ecx,4)
psrlq $32,%mm2
psrlq $32,%mm3
movd 36(%esp,%ebx,4),%mm6
paddq %mm2,%mm3
paddq %mm6,%mm3
movq %mm3,32(%esp,%ebx,4)
leal 1(%edx),%edx
cmpl %ebx,%edx
jle .L006outer
emms
jmp .L008common_tail
.align 16
.L004non_sse2:
movl 8(%esp),%esi
leal 1(%ebx),%ebp
movl 12(%esp),%edi
xorl %ecx,%ecx
movl %esi,%edx
andl $1,%ebp
subl %edi,%edx
leal 4(%edi,%ebx,4),%eax
orl %edx,%ebp
movl (%edi),%edi
jz .L009bn_sqr_mont
movl %eax,28(%esp)
movl (%esi),%eax
xorl %edx,%edx
.align 16
.L010mull:
movl %edx,%ebp
mull %edi
addl %eax,%ebp
leal 1(%ecx),%ecx
adcl $0,%edx
movl (%esi,%ecx,4),%eax
cmpl %ebx,%ecx
movl %ebp,28(%esp,%ecx,4)
jl .L010mull
movl %edx,%ebp
mull %edi
movl 20(%esp),%edi
addl %ebp,%eax
movl 16(%esp),%esi
adcl $0,%edx
imull 32(%esp),%edi
movl %eax,32(%esp,%ebx,4)
xorl %ecx,%ecx
movl %edx,36(%esp,%ebx,4)
movl %ecx,40(%esp,%ebx,4)
movl (%esi),%eax
mull %edi
addl 32(%esp),%eax
movl 4(%esi),%eax
adcl $0,%edx
incl %ecx
jmp .L0112ndmadd
.align 16
.L0121stmadd:
movl %edx,%ebp
mull %edi
addl 32(%esp,%ecx,4),%ebp
leal 1(%ecx),%ecx
adcl $0,%edx
addl %eax,%ebp
movl (%esi,%ecx,4),%eax
adcl $0,%edx
cmpl %ebx,%ecx
movl %ebp,28(%esp,%ecx,4)
jl .L0121stmadd
movl %edx,%ebp
mull %edi
addl 32(%esp,%ebx,4),%eax
movl 20(%esp),%edi
adcl $0,%edx
movl 16(%esp),%esi
addl %eax,%ebp
adcl $0,%edx
imull 32(%esp),%edi
xorl %ecx,%ecx
addl 36(%esp,%ebx,4),%edx
movl %ebp,32(%esp,%ebx,4)
adcl $0,%ecx
movl (%esi),%eax
movl %edx,36(%esp,%ebx,4)
movl %ecx,40(%esp,%ebx,4)
mull %edi
addl 32(%esp),%eax
movl 4(%esi),%eax
adcl $0,%edx
movl $1,%ecx
.align 16
.L0112ndmadd:
movl %edx,%ebp
mull %edi
addl 32(%esp,%ecx,4),%ebp
leal 1(%ecx),%ecx
adcl $0,%edx
addl %eax,%ebp
movl (%esi,%ecx,4),%eax
adcl $0,%edx
cmpl %ebx,%ecx
movl %ebp,24(%esp,%ecx,4)
jl .L0112ndmadd
movl %edx,%ebp
mull %edi
addl 32(%esp,%ebx,4),%ebp
adcl $0,%edx
addl %eax,%ebp
adcl $0,%edx
movl %ebp,28(%esp,%ebx,4)
xorl %eax,%eax
movl 12(%esp),%ecx
addl 36(%esp,%ebx,4),%edx
adcl 40(%esp,%ebx,4),%eax
leal 4(%ecx),%ecx
movl %edx,32(%esp,%ebx,4)
cmpl 28(%esp),%ecx
movl %eax,36(%esp,%ebx,4)
je .L008common_tail
movl (%ecx),%edi
movl 8(%esp),%esi
movl %ecx,12(%esp)
xorl %ecx,%ecx
xorl %edx,%edx
movl (%esi),%eax
jmp .L0121stmadd
.align 16
.L009bn_sqr_mont:
movl %ebx,(%esp)
movl %ecx,12(%esp)
movl %edi,%eax
mull %edi
movl %eax,32(%esp)
movl %edx,%ebx
shrl $1,%edx
andl $1,%ebx
incl %ecx
.align 16
.L013sqr:
movl (%esi,%ecx,4),%eax
movl %edx,%ebp
mull %edi
addl %ebp,%eax
leal 1(%ecx),%ecx
adcl $0,%edx
leal (%ebx,%eax,2),%ebp
shrl $31,%eax
cmpl (%esp),%ecx
movl %eax,%ebx
movl %ebp,28(%esp,%ecx,4)
jl .L013sqr
movl (%esi,%ecx,4),%eax
movl %edx,%ebp
mull %edi
addl %ebp,%eax
movl 20(%esp),%edi
adcl $0,%edx
movl 16(%esp),%esi
leal (%ebx,%eax,2),%ebp
imull 32(%esp),%edi
shrl $31,%eax
movl %ebp,32(%esp,%ecx,4)
leal (%eax,%edx,2),%ebp
movl (%esi),%eax
shrl $31,%edx
movl %ebp,36(%esp,%ecx,4)
movl %edx,40(%esp,%ecx,4)
mull %edi
addl 32(%esp),%eax
movl %ecx,%ebx
adcl $0,%edx
movl 4(%esi),%eax
movl $1,%ecx
.align 16
.L0143rdmadd:
movl %edx,%ebp
mull %edi
addl 32(%esp,%ecx,4),%ebp
adcl $0,%edx
addl %eax,%ebp
movl 4(%esi,%ecx,4),%eax
adcl $0,%edx
movl %ebp,28(%esp,%ecx,4)
movl %edx,%ebp
mull %edi
addl 36(%esp,%ecx,4),%ebp
leal 2(%ecx),%ecx
adcl $0,%edx
addl %eax,%ebp
movl (%esi,%ecx,4),%eax
adcl $0,%edx
cmpl %ebx,%ecx
movl %ebp,24(%esp,%ecx,4)
jl .L0143rdmadd
movl %edx,%ebp
mull %edi
addl 32(%esp,%ebx,4),%ebp
adcl $0,%edx
addl %eax,%ebp
adcl $0,%edx
movl %ebp,28(%esp,%ebx,4)
movl 12(%esp),%ecx
xorl %eax,%eax
movl 8(%esp),%esi
addl 36(%esp,%ebx,4),%edx
adcl 40(%esp,%ebx,4),%eax
movl %edx,32(%esp,%ebx,4)
cmpl %ebx,%ecx
movl %eax,36(%esp,%ebx,4)
je .L008common_tail
movl 4(%esi,%ecx,4),%edi
leal 1(%ecx),%ecx
movl %edi,%eax
movl %ecx,12(%esp)
mull %edi
addl 32(%esp,%ecx,4),%eax
adcl $0,%edx
movl %eax,32(%esp,%ecx,4)
xorl %ebp,%ebp
cmpl %ebx,%ecx
leal 1(%ecx),%ecx
je .L015sqrlast
movl %edx,%ebx
shrl $1,%edx
andl $1,%ebx
.align 16
.L016sqradd:
movl (%esi,%ecx,4),%eax
movl %edx,%ebp
mull %edi
addl %ebp,%eax
leal (%eax,%eax,1),%ebp
adcl $0,%edx
shrl $31,%eax
addl 32(%esp,%ecx,4),%ebp
leal 1(%ecx),%ecx
adcl $0,%eax
addl %ebx,%ebp
adcl $0,%eax
cmpl (%esp),%ecx
movl %ebp,28(%esp,%ecx,4)
movl %eax,%ebx
jle .L016sqradd
movl %edx,%ebp
addl %edx,%edx
shrl $31,%ebp
addl %ebx,%edx
adcl $0,%ebp
.L015sqrlast:
movl 20(%esp),%edi
movl 16(%esp),%esi
imull 32(%esp),%edi
addl 32(%esp,%ecx,4),%edx
movl (%esi),%eax
adcl $0,%ebp
movl %edx,32(%esp,%ecx,4)
movl %ebp,36(%esp,%ecx,4)
mull %edi
addl 32(%esp),%eax
leal -1(%ecx),%ebx
adcl $0,%edx
movl $1,%ecx
movl 4(%esi),%eax
jmp .L0143rdmadd
.align 16
.L008common_tail:
movl 16(%esp),%ebp
movl 4(%esp),%edi
leal 32(%esp),%esi
movl (%esi),%eax
movl %ebx,%ecx
xorl %edx,%edx
.align 16
.L017sub:
sbbl (%ebp,%edx,4),%eax
movl %eax,(%edi,%edx,4)
decl %ecx
movl 4(%esi,%edx,4),%eax
leal 1(%edx),%edx
jge .L017sub
sbbl $0,%eax
movl $-1,%edx
xorl %eax,%edx
jmp .L018copy
.align 16
.L018copy:
movl 32(%esp,%ebx,4),%esi
movl (%edi,%ebx,4),%ebp
movl %ecx,32(%esp,%ebx,4)
andl %eax,%esi
andl %edx,%ebp
orl %esi,%ebp
movl %ebp,(%edi,%ebx,4)
decl %ebx
jge .L018copy
movl 24(%esp),%esp
movl $1,%eax
.L000just_leave:
popl %edi
popl %esi
popl %ebx
popl %ebp
ret
.size bn_mul_mont,.-.L_bn_mul_mont_begin
.byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105
.byte 112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56
.byte 54,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121
.byte 32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46
.byte 111,114,103,62,0
#endif // !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) && defined(__ELF__)

View File

@ -0,0 +1,204 @@
// This file is generated from a similarly-named Perl script in the BoringSSL
// source tree. Do not edit by hand.
#include <openssl/asm_base.h>
#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) && defined(__ELF__)
.text
.globl abi_test_trampoline
.hidden abi_test_trampoline
.type abi_test_trampoline,@function
.align 16
abi_test_trampoline:
.L_abi_test_trampoline_begin:
pushl %ebp
pushl %ebx
pushl %esi
pushl %edi
movl 24(%esp),%ecx
movl (%ecx),%esi
movl 4(%ecx),%edi
movl 8(%ecx),%ebx
movl 12(%ecx),%ebp
subl $44,%esp
movl 72(%esp),%eax
xorl %ecx,%ecx
.L000loop:
cmpl 76(%esp),%ecx
jae .L001loop_done
movl (%eax,%ecx,4),%edx
movl %edx,(%esp,%ecx,4)
addl $1,%ecx
jmp .L000loop
.L001loop_done:
call *64(%esp)
addl $44,%esp
movl 24(%esp),%ecx
movl %esi,(%ecx)
movl %edi,4(%ecx)
movl %ebx,8(%ecx)
movl %ebp,12(%ecx)
popl %edi
popl %esi
popl %ebx
popl %ebp
ret
.size abi_test_trampoline,.-.L_abi_test_trampoline_begin
.globl abi_test_get_and_clear_direction_flag
.hidden abi_test_get_and_clear_direction_flag
.type abi_test_get_and_clear_direction_flag,@function
.align 16
abi_test_get_and_clear_direction_flag:
.L_abi_test_get_and_clear_direction_flag_begin:
pushfl
popl %eax
andl $1024,%eax
shrl $10,%eax
cld
ret
.size abi_test_get_and_clear_direction_flag,.-.L_abi_test_get_and_clear_direction_flag_begin
.globl abi_test_set_direction_flag
.hidden abi_test_set_direction_flag
.type abi_test_set_direction_flag,@function
.align 16
abi_test_set_direction_flag:
.L_abi_test_set_direction_flag_begin:
std
ret
.size abi_test_set_direction_flag,.-.L_abi_test_set_direction_flag_begin
.globl abi_test_clobber_eax
.hidden abi_test_clobber_eax
.type abi_test_clobber_eax,@function
.align 16
abi_test_clobber_eax:
.L_abi_test_clobber_eax_begin:
xorl %eax,%eax
ret
.size abi_test_clobber_eax,.-.L_abi_test_clobber_eax_begin
.globl abi_test_clobber_ebx
.hidden abi_test_clobber_ebx
.type abi_test_clobber_ebx,@function
.align 16
abi_test_clobber_ebx:
.L_abi_test_clobber_ebx_begin:
xorl %ebx,%ebx
ret
.size abi_test_clobber_ebx,.-.L_abi_test_clobber_ebx_begin
.globl abi_test_clobber_ecx
.hidden abi_test_clobber_ecx
.type abi_test_clobber_ecx,@function
.align 16
abi_test_clobber_ecx:
.L_abi_test_clobber_ecx_begin:
xorl %ecx,%ecx
ret
.size abi_test_clobber_ecx,.-.L_abi_test_clobber_ecx_begin
.globl abi_test_clobber_edx
.hidden abi_test_clobber_edx
.type abi_test_clobber_edx,@function
.align 16
abi_test_clobber_edx:
.L_abi_test_clobber_edx_begin:
xorl %edx,%edx
ret
.size abi_test_clobber_edx,.-.L_abi_test_clobber_edx_begin
.globl abi_test_clobber_edi
.hidden abi_test_clobber_edi
.type abi_test_clobber_edi,@function
.align 16
abi_test_clobber_edi:
.L_abi_test_clobber_edi_begin:
xorl %edi,%edi
ret
.size abi_test_clobber_edi,.-.L_abi_test_clobber_edi_begin
.globl abi_test_clobber_esi
.hidden abi_test_clobber_esi
.type abi_test_clobber_esi,@function
.align 16
abi_test_clobber_esi:
.L_abi_test_clobber_esi_begin:
xorl %esi,%esi
ret
.size abi_test_clobber_esi,.-.L_abi_test_clobber_esi_begin
.globl abi_test_clobber_ebp
.hidden abi_test_clobber_ebp
.type abi_test_clobber_ebp,@function
.align 16
abi_test_clobber_ebp:
.L_abi_test_clobber_ebp_begin:
xorl %ebp,%ebp
ret
.size abi_test_clobber_ebp,.-.L_abi_test_clobber_ebp_begin
.globl abi_test_clobber_xmm0
.hidden abi_test_clobber_xmm0
.type abi_test_clobber_xmm0,@function
.align 16
abi_test_clobber_xmm0:
.L_abi_test_clobber_xmm0_begin:
pxor %xmm0,%xmm0
ret
.size abi_test_clobber_xmm0,.-.L_abi_test_clobber_xmm0_begin
.globl abi_test_clobber_xmm1
.hidden abi_test_clobber_xmm1
.type abi_test_clobber_xmm1,@function
.align 16
abi_test_clobber_xmm1:
.L_abi_test_clobber_xmm1_begin:
pxor %xmm1,%xmm1
ret
.size abi_test_clobber_xmm1,.-.L_abi_test_clobber_xmm1_begin
.globl abi_test_clobber_xmm2
.hidden abi_test_clobber_xmm2
.type abi_test_clobber_xmm2,@function
.align 16
abi_test_clobber_xmm2:
.L_abi_test_clobber_xmm2_begin:
pxor %xmm2,%xmm2
ret
.size abi_test_clobber_xmm2,.-.L_abi_test_clobber_xmm2_begin
.globl abi_test_clobber_xmm3
.hidden abi_test_clobber_xmm3
.type abi_test_clobber_xmm3,@function
.align 16
abi_test_clobber_xmm3:
.L_abi_test_clobber_xmm3_begin:
pxor %xmm3,%xmm3
ret
.size abi_test_clobber_xmm3,.-.L_abi_test_clobber_xmm3_begin
.globl abi_test_clobber_xmm4
.hidden abi_test_clobber_xmm4
.type abi_test_clobber_xmm4,@function
.align 16
abi_test_clobber_xmm4:
.L_abi_test_clobber_xmm4_begin:
pxor %xmm4,%xmm4
ret
.size abi_test_clobber_xmm4,.-.L_abi_test_clobber_xmm4_begin
.globl abi_test_clobber_xmm5
.hidden abi_test_clobber_xmm5
.type abi_test_clobber_xmm5,@function
.align 16
abi_test_clobber_xmm5:
.L_abi_test_clobber_xmm5_begin:
pxor %xmm5,%xmm5
ret
.size abi_test_clobber_xmm5,.-.L_abi_test_clobber_xmm5_begin
.globl abi_test_clobber_xmm6
.hidden abi_test_clobber_xmm6
.type abi_test_clobber_xmm6,@function
.align 16
abi_test_clobber_xmm6:
.L_abi_test_clobber_xmm6_begin:
pxor %xmm6,%xmm6
ret
.size abi_test_clobber_xmm6,.-.L_abi_test_clobber_xmm6_begin
.globl abi_test_clobber_xmm7
.hidden abi_test_clobber_xmm7
.type abi_test_clobber_xmm7,@function
.align 16
abi_test_clobber_xmm7:
.L_abi_test_clobber_xmm7_begin:
pxor %xmm7,%xmm7
ret
.size abi_test_clobber_xmm7,.-.L_abi_test_clobber_xmm7_begin
#endif // !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) && defined(__ELF__)

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,883 @@
// This file is generated from a similarly-named Perl script in the BoringSSL
// source tree. Do not edit by hand.
#include <openssl/asm_base.h>
#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) && defined(__ELF__)
.text
.type _aesni_ctr32_ghash_6x,@function
.align 32
_aesni_ctr32_ghash_6x:
.cfi_startproc
vmovdqu 32(%r11),%xmm2
subq $6,%rdx
vpxor %xmm4,%xmm4,%xmm4
vmovdqu 0-128(%rcx),%xmm15
vpaddb %xmm2,%xmm1,%xmm10
vpaddb %xmm2,%xmm10,%xmm11
vpaddb %xmm2,%xmm11,%xmm12
vpaddb %xmm2,%xmm12,%xmm13
vpaddb %xmm2,%xmm13,%xmm14
vpxor %xmm15,%xmm1,%xmm9
vmovdqu %xmm4,16+8(%rsp)
jmp .Loop6x
.align 32
.Loop6x:
addl $100663296,%ebx
jc .Lhandle_ctr32
vmovdqu 0-32(%r9),%xmm3
vpaddb %xmm2,%xmm14,%xmm1
vpxor %xmm15,%xmm10,%xmm10
vpxor %xmm15,%xmm11,%xmm11
.Lresume_ctr32:
vmovdqu %xmm1,(%r8)
vpclmulqdq $0x10,%xmm3,%xmm7,%xmm5
vpxor %xmm15,%xmm12,%xmm12
vmovups 16-128(%rcx),%xmm2
vpclmulqdq $0x01,%xmm3,%xmm7,%xmm6
xorq %r12,%r12
cmpq %r14,%r15
vaesenc %xmm2,%xmm9,%xmm9
vmovdqu 48+8(%rsp),%xmm0
vpxor %xmm15,%xmm13,%xmm13
vpclmulqdq $0x00,%xmm3,%xmm7,%xmm1
vaesenc %xmm2,%xmm10,%xmm10
vpxor %xmm15,%xmm14,%xmm14
setnc %r12b
vpclmulqdq $0x11,%xmm3,%xmm7,%xmm7
vaesenc %xmm2,%xmm11,%xmm11
vmovdqu 16-32(%r9),%xmm3
negq %r12
vaesenc %xmm2,%xmm12,%xmm12
vpxor %xmm5,%xmm6,%xmm6
vpclmulqdq $0x00,%xmm3,%xmm0,%xmm5
vpxor %xmm4,%xmm8,%xmm8
vaesenc %xmm2,%xmm13,%xmm13
vpxor %xmm5,%xmm1,%xmm4
andq $0x60,%r12
vmovups 32-128(%rcx),%xmm15
vpclmulqdq $0x10,%xmm3,%xmm0,%xmm1
vaesenc %xmm2,%xmm14,%xmm14
vpclmulqdq $0x01,%xmm3,%xmm0,%xmm2
leaq (%r14,%r12,1),%r14
vaesenc %xmm15,%xmm9,%xmm9
vpxor 16+8(%rsp),%xmm8,%xmm8
vpclmulqdq $0x11,%xmm3,%xmm0,%xmm3
vmovdqu 64+8(%rsp),%xmm0
vaesenc %xmm15,%xmm10,%xmm10
movbeq 88(%r14),%r13
vaesenc %xmm15,%xmm11,%xmm11
movbeq 80(%r14),%r12
vaesenc %xmm15,%xmm12,%xmm12
movq %r13,32+8(%rsp)
vaesenc %xmm15,%xmm13,%xmm13
movq %r12,40+8(%rsp)
vmovdqu 48-32(%r9),%xmm5
vaesenc %xmm15,%xmm14,%xmm14
vmovups 48-128(%rcx),%xmm15
vpxor %xmm1,%xmm6,%xmm6
vpclmulqdq $0x00,%xmm5,%xmm0,%xmm1
vaesenc %xmm15,%xmm9,%xmm9
vpxor %xmm2,%xmm6,%xmm6
vpclmulqdq $0x10,%xmm5,%xmm0,%xmm2
vaesenc %xmm15,%xmm10,%xmm10
vpxor %xmm3,%xmm7,%xmm7
vpclmulqdq $0x01,%xmm5,%xmm0,%xmm3
vaesenc %xmm15,%xmm11,%xmm11
vpclmulqdq $0x11,%xmm5,%xmm0,%xmm5
vmovdqu 80+8(%rsp),%xmm0
vaesenc %xmm15,%xmm12,%xmm12
vaesenc %xmm15,%xmm13,%xmm13
vpxor %xmm1,%xmm4,%xmm4
vmovdqu 64-32(%r9),%xmm1
vaesenc %xmm15,%xmm14,%xmm14
vmovups 64-128(%rcx),%xmm15
vpxor %xmm2,%xmm6,%xmm6
vpclmulqdq $0x00,%xmm1,%xmm0,%xmm2
vaesenc %xmm15,%xmm9,%xmm9
vpxor %xmm3,%xmm6,%xmm6
vpclmulqdq $0x10,%xmm1,%xmm0,%xmm3
vaesenc %xmm15,%xmm10,%xmm10
movbeq 72(%r14),%r13
vpxor %xmm5,%xmm7,%xmm7
vpclmulqdq $0x01,%xmm1,%xmm0,%xmm5
vaesenc %xmm15,%xmm11,%xmm11
movbeq 64(%r14),%r12
vpclmulqdq $0x11,%xmm1,%xmm0,%xmm1
vmovdqu 96+8(%rsp),%xmm0
vaesenc %xmm15,%xmm12,%xmm12
movq %r13,48+8(%rsp)
vaesenc %xmm15,%xmm13,%xmm13
movq %r12,56+8(%rsp)
vpxor %xmm2,%xmm4,%xmm4
vmovdqu 96-32(%r9),%xmm2
vaesenc %xmm15,%xmm14,%xmm14
vmovups 80-128(%rcx),%xmm15
vpxor %xmm3,%xmm6,%xmm6
vpclmulqdq $0x00,%xmm2,%xmm0,%xmm3
vaesenc %xmm15,%xmm9,%xmm9
vpxor %xmm5,%xmm6,%xmm6
vpclmulqdq $0x10,%xmm2,%xmm0,%xmm5
vaesenc %xmm15,%xmm10,%xmm10
movbeq 56(%r14),%r13
vpxor %xmm1,%xmm7,%xmm7
vpclmulqdq $0x01,%xmm2,%xmm0,%xmm1
vpxor 112+8(%rsp),%xmm8,%xmm8
vaesenc %xmm15,%xmm11,%xmm11
movbeq 48(%r14),%r12
vpclmulqdq $0x11,%xmm2,%xmm0,%xmm2
vaesenc %xmm15,%xmm12,%xmm12
movq %r13,64+8(%rsp)
vaesenc %xmm15,%xmm13,%xmm13
movq %r12,72+8(%rsp)
vpxor %xmm3,%xmm4,%xmm4
vmovdqu 112-32(%r9),%xmm3
vaesenc %xmm15,%xmm14,%xmm14
vmovups 96-128(%rcx),%xmm15
vpxor %xmm5,%xmm6,%xmm6
vpclmulqdq $0x10,%xmm3,%xmm8,%xmm5
vaesenc %xmm15,%xmm9,%xmm9
vpxor %xmm1,%xmm6,%xmm6
vpclmulqdq $0x01,%xmm3,%xmm8,%xmm1
vaesenc %xmm15,%xmm10,%xmm10
movbeq 40(%r14),%r13
vpxor %xmm2,%xmm7,%xmm7
vpclmulqdq $0x00,%xmm3,%xmm8,%xmm2
vaesenc %xmm15,%xmm11,%xmm11
movbeq 32(%r14),%r12
vpclmulqdq $0x11,%xmm3,%xmm8,%xmm8
vaesenc %xmm15,%xmm12,%xmm12
movq %r13,80+8(%rsp)
vaesenc %xmm15,%xmm13,%xmm13
movq %r12,88+8(%rsp)
vpxor %xmm5,%xmm6,%xmm6
vaesenc %xmm15,%xmm14,%xmm14
vpxor %xmm1,%xmm6,%xmm6
vmovups 112-128(%rcx),%xmm15
vpslldq $8,%xmm6,%xmm5
vpxor %xmm2,%xmm4,%xmm4
vmovdqu 16(%r11),%xmm3
vaesenc %xmm15,%xmm9,%xmm9
vpxor %xmm8,%xmm7,%xmm7
vaesenc %xmm15,%xmm10,%xmm10
vpxor %xmm5,%xmm4,%xmm4
movbeq 24(%r14),%r13
vaesenc %xmm15,%xmm11,%xmm11
movbeq 16(%r14),%r12
vpalignr $8,%xmm4,%xmm4,%xmm0
vpclmulqdq $0x10,%xmm3,%xmm4,%xmm4
movq %r13,96+8(%rsp)
vaesenc %xmm15,%xmm12,%xmm12
movq %r12,104+8(%rsp)
vaesenc %xmm15,%xmm13,%xmm13
vmovups 128-128(%rcx),%xmm1
vaesenc %xmm15,%xmm14,%xmm14
vaesenc %xmm1,%xmm9,%xmm9
vmovups 144-128(%rcx),%xmm15
vaesenc %xmm1,%xmm10,%xmm10
vpsrldq $8,%xmm6,%xmm6
vaesenc %xmm1,%xmm11,%xmm11
vpxor %xmm6,%xmm7,%xmm7
vaesenc %xmm1,%xmm12,%xmm12
vpxor %xmm0,%xmm4,%xmm4
movbeq 8(%r14),%r13
vaesenc %xmm1,%xmm13,%xmm13
movbeq 0(%r14),%r12
vaesenc %xmm1,%xmm14,%xmm14
vmovups 160-128(%rcx),%xmm1
cmpl $11,%r10d
jb .Lenc_tail
vaesenc %xmm15,%xmm9,%xmm9
vaesenc %xmm15,%xmm10,%xmm10
vaesenc %xmm15,%xmm11,%xmm11
vaesenc %xmm15,%xmm12,%xmm12
vaesenc %xmm15,%xmm13,%xmm13
vaesenc %xmm15,%xmm14,%xmm14
vaesenc %xmm1,%xmm9,%xmm9
vaesenc %xmm1,%xmm10,%xmm10
vaesenc %xmm1,%xmm11,%xmm11
vaesenc %xmm1,%xmm12,%xmm12
vaesenc %xmm1,%xmm13,%xmm13
vmovups 176-128(%rcx),%xmm15
vaesenc %xmm1,%xmm14,%xmm14
vmovups 192-128(%rcx),%xmm1
je .Lenc_tail
vaesenc %xmm15,%xmm9,%xmm9
vaesenc %xmm15,%xmm10,%xmm10
vaesenc %xmm15,%xmm11,%xmm11
vaesenc %xmm15,%xmm12,%xmm12
vaesenc %xmm15,%xmm13,%xmm13
vaesenc %xmm15,%xmm14,%xmm14
vaesenc %xmm1,%xmm9,%xmm9
vaesenc %xmm1,%xmm10,%xmm10
vaesenc %xmm1,%xmm11,%xmm11
vaesenc %xmm1,%xmm12,%xmm12
vaesenc %xmm1,%xmm13,%xmm13
vmovups 208-128(%rcx),%xmm15
vaesenc %xmm1,%xmm14,%xmm14
vmovups 224-128(%rcx),%xmm1
jmp .Lenc_tail
.align 32
.Lhandle_ctr32:
vmovdqu (%r11),%xmm0
vpshufb %xmm0,%xmm1,%xmm6
vmovdqu 48(%r11),%xmm5
vpaddd 64(%r11),%xmm6,%xmm10
vpaddd %xmm5,%xmm6,%xmm11
vmovdqu 0-32(%r9),%xmm3
vpaddd %xmm5,%xmm10,%xmm12
vpshufb %xmm0,%xmm10,%xmm10
vpaddd %xmm5,%xmm11,%xmm13
vpshufb %xmm0,%xmm11,%xmm11
vpxor %xmm15,%xmm10,%xmm10
vpaddd %xmm5,%xmm12,%xmm14
vpshufb %xmm0,%xmm12,%xmm12
vpxor %xmm15,%xmm11,%xmm11
vpaddd %xmm5,%xmm13,%xmm1
vpshufb %xmm0,%xmm13,%xmm13
vpshufb %xmm0,%xmm14,%xmm14
vpshufb %xmm0,%xmm1,%xmm1
jmp .Lresume_ctr32
.align 32
.Lenc_tail:
vaesenc %xmm15,%xmm9,%xmm9
vmovdqu %xmm7,16+8(%rsp)
vpalignr $8,%xmm4,%xmm4,%xmm8
vaesenc %xmm15,%xmm10,%xmm10
vpclmulqdq $0x10,%xmm3,%xmm4,%xmm4
vpxor 0(%rdi),%xmm1,%xmm2
vaesenc %xmm15,%xmm11,%xmm11
vpxor 16(%rdi),%xmm1,%xmm0
vaesenc %xmm15,%xmm12,%xmm12
vpxor 32(%rdi),%xmm1,%xmm5
vaesenc %xmm15,%xmm13,%xmm13
vpxor 48(%rdi),%xmm1,%xmm6
vaesenc %xmm15,%xmm14,%xmm14
vpxor 64(%rdi),%xmm1,%xmm7
vpxor 80(%rdi),%xmm1,%xmm3
vmovdqu (%r8),%xmm1
vaesenclast %xmm2,%xmm9,%xmm9
vmovdqu 32(%r11),%xmm2
vaesenclast %xmm0,%xmm10,%xmm10
vpaddb %xmm2,%xmm1,%xmm0
movq %r13,112+8(%rsp)
leaq 96(%rdi),%rdi
prefetcht0 512(%rdi)
prefetcht0 576(%rdi)
vaesenclast %xmm5,%xmm11,%xmm11
vpaddb %xmm2,%xmm0,%xmm5
movq %r12,120+8(%rsp)
leaq 96(%rsi),%rsi
vmovdqu 0-128(%rcx),%xmm15
vaesenclast %xmm6,%xmm12,%xmm12
vpaddb %xmm2,%xmm5,%xmm6
vaesenclast %xmm7,%xmm13,%xmm13
vpaddb %xmm2,%xmm6,%xmm7
vaesenclast %xmm3,%xmm14,%xmm14
vpaddb %xmm2,%xmm7,%xmm3
addq $0x60,%rax
subq $0x6,%rdx
jc .L6x_done
vmovups %xmm9,-96(%rsi)
vpxor %xmm15,%xmm1,%xmm9
vmovups %xmm10,-80(%rsi)
vmovdqa %xmm0,%xmm10
vmovups %xmm11,-64(%rsi)
vmovdqa %xmm5,%xmm11
vmovups %xmm12,-48(%rsi)
vmovdqa %xmm6,%xmm12
vmovups %xmm13,-32(%rsi)
vmovdqa %xmm7,%xmm13
vmovups %xmm14,-16(%rsi)
vmovdqa %xmm3,%xmm14
vmovdqu 32+8(%rsp),%xmm7
jmp .Loop6x
.L6x_done:
vpxor 16+8(%rsp),%xmm8,%xmm8
vpxor %xmm4,%xmm8,%xmm8
ret
.cfi_endproc
.size _aesni_ctr32_ghash_6x,.-_aesni_ctr32_ghash_6x
.globl aesni_gcm_decrypt
.hidden aesni_gcm_decrypt
.type aesni_gcm_decrypt,@function
.align 32
aesni_gcm_decrypt:
.cfi_startproc
_CET_ENDBR
xorq %rax,%rax
cmpq $0x60,%rdx
jb .Lgcm_dec_abort
pushq %rbp
.cfi_adjust_cfa_offset 8
.cfi_offset %rbp,-16
movq %rsp,%rbp
.cfi_def_cfa_register %rbp
pushq %rbx
.cfi_offset %rbx,-24
pushq %r12
.cfi_offset %r12,-32
pushq %r13
.cfi_offset %r13,-40
pushq %r14
.cfi_offset %r14,-48
pushq %r15
.cfi_offset %r15,-56
vzeroupper
movq 16(%rbp),%r12
vmovdqu (%r8),%xmm1
addq $-128,%rsp
movl 12(%r8),%ebx
leaq .Lbswap_mask(%rip),%r11
leaq -128(%rcx),%r14
movq $0xf80,%r15
vmovdqu (%r12),%xmm8
andq $-128,%rsp
vmovdqu (%r11),%xmm0
leaq 128(%rcx),%rcx
leaq 32(%r9),%r9
movl 240-128(%rcx),%r10d
vpshufb %xmm0,%xmm8,%xmm8
andq %r15,%r14
andq %rsp,%r15
subq %r14,%r15
jc .Ldec_no_key_aliasing
cmpq $768,%r15
jnc .Ldec_no_key_aliasing
subq %r15,%rsp
.Ldec_no_key_aliasing:
vmovdqu 80(%rdi),%xmm7
movq %rdi,%r14
vmovdqu 64(%rdi),%xmm4
leaq -192(%rdi,%rdx,1),%r15
vmovdqu 48(%rdi),%xmm5
shrq $4,%rdx
xorq %rax,%rax
vmovdqu 32(%rdi),%xmm6
vpshufb %xmm0,%xmm7,%xmm7
vmovdqu 16(%rdi),%xmm2
vpshufb %xmm0,%xmm4,%xmm4
vmovdqu (%rdi),%xmm3
vpshufb %xmm0,%xmm5,%xmm5
vmovdqu %xmm4,48(%rsp)
vpshufb %xmm0,%xmm6,%xmm6
vmovdqu %xmm5,64(%rsp)
vpshufb %xmm0,%xmm2,%xmm2
vmovdqu %xmm6,80(%rsp)
vpshufb %xmm0,%xmm3,%xmm3
vmovdqu %xmm2,96(%rsp)
vmovdqu %xmm3,112(%rsp)
call _aesni_ctr32_ghash_6x
movq 16(%rbp),%r12
vmovups %xmm9,-96(%rsi)
vmovups %xmm10,-80(%rsi)
vmovups %xmm11,-64(%rsi)
vmovups %xmm12,-48(%rsi)
vmovups %xmm13,-32(%rsi)
vmovups %xmm14,-16(%rsi)
vpshufb (%r11),%xmm8,%xmm8
vmovdqu %xmm8,(%r12)
vzeroupper
leaq -40(%rbp),%rsp
.cfi_def_cfa %rsp, 0x38
popq %r15
.cfi_adjust_cfa_offset -8
.cfi_restore %r15
popq %r14
.cfi_adjust_cfa_offset -8
.cfi_restore %r14
popq %r13
.cfi_adjust_cfa_offset -8
.cfi_restore %r13
popq %r12
.cfi_adjust_cfa_offset -8
.cfi_restore %r12
popq %rbx
.cfi_adjust_cfa_offset -8
.cfi_restore %rbx
popq %rbp
.cfi_adjust_cfa_offset -8
.cfi_restore %rbp
.Lgcm_dec_abort:
ret
.cfi_endproc
.size aesni_gcm_decrypt,.-aesni_gcm_decrypt
.type _aesni_ctr32_6x,@function
.align 32
_aesni_ctr32_6x:
.cfi_startproc
vmovdqu 0-128(%rcx),%xmm4
vmovdqu 32(%r11),%xmm2
leaq -1(%r10),%r13
vmovups 16-128(%rcx),%xmm15
leaq 32-128(%rcx),%r12
vpxor %xmm4,%xmm1,%xmm9
addl $100663296,%ebx
jc .Lhandle_ctr32_2
vpaddb %xmm2,%xmm1,%xmm10
vpaddb %xmm2,%xmm10,%xmm11
vpxor %xmm4,%xmm10,%xmm10
vpaddb %xmm2,%xmm11,%xmm12
vpxor %xmm4,%xmm11,%xmm11
vpaddb %xmm2,%xmm12,%xmm13
vpxor %xmm4,%xmm12,%xmm12
vpaddb %xmm2,%xmm13,%xmm14
vpxor %xmm4,%xmm13,%xmm13
vpaddb %xmm2,%xmm14,%xmm1
vpxor %xmm4,%xmm14,%xmm14
jmp .Loop_ctr32
.align 16
.Loop_ctr32:
vaesenc %xmm15,%xmm9,%xmm9
vaesenc %xmm15,%xmm10,%xmm10
vaesenc %xmm15,%xmm11,%xmm11
vaesenc %xmm15,%xmm12,%xmm12
vaesenc %xmm15,%xmm13,%xmm13
vaesenc %xmm15,%xmm14,%xmm14
vmovups (%r12),%xmm15
leaq 16(%r12),%r12
decl %r13d
jnz .Loop_ctr32
vmovdqu (%r12),%xmm3
vaesenc %xmm15,%xmm9,%xmm9
vpxor 0(%rdi),%xmm3,%xmm4
vaesenc %xmm15,%xmm10,%xmm10
vpxor 16(%rdi),%xmm3,%xmm5
vaesenc %xmm15,%xmm11,%xmm11
vpxor 32(%rdi),%xmm3,%xmm6
vaesenc %xmm15,%xmm12,%xmm12
vpxor 48(%rdi),%xmm3,%xmm8
vaesenc %xmm15,%xmm13,%xmm13
vpxor 64(%rdi),%xmm3,%xmm2
vaesenc %xmm15,%xmm14,%xmm14
vpxor 80(%rdi),%xmm3,%xmm3
leaq 96(%rdi),%rdi
vaesenclast %xmm4,%xmm9,%xmm9
vaesenclast %xmm5,%xmm10,%xmm10
vaesenclast %xmm6,%xmm11,%xmm11
vaesenclast %xmm8,%xmm12,%xmm12
vaesenclast %xmm2,%xmm13,%xmm13
vaesenclast %xmm3,%xmm14,%xmm14
vmovups %xmm9,0(%rsi)
vmovups %xmm10,16(%rsi)
vmovups %xmm11,32(%rsi)
vmovups %xmm12,48(%rsi)
vmovups %xmm13,64(%rsi)
vmovups %xmm14,80(%rsi)
leaq 96(%rsi),%rsi
ret
.align 32
.Lhandle_ctr32_2:
vpshufb %xmm0,%xmm1,%xmm6
vmovdqu 48(%r11),%xmm5
vpaddd 64(%r11),%xmm6,%xmm10
vpaddd %xmm5,%xmm6,%xmm11
vpaddd %xmm5,%xmm10,%xmm12
vpshufb %xmm0,%xmm10,%xmm10
vpaddd %xmm5,%xmm11,%xmm13
vpshufb %xmm0,%xmm11,%xmm11
vpxor %xmm4,%xmm10,%xmm10
vpaddd %xmm5,%xmm12,%xmm14
vpshufb %xmm0,%xmm12,%xmm12
vpxor %xmm4,%xmm11,%xmm11
vpaddd %xmm5,%xmm13,%xmm1
vpshufb %xmm0,%xmm13,%xmm13
vpxor %xmm4,%xmm12,%xmm12
vpshufb %xmm0,%xmm14,%xmm14
vpxor %xmm4,%xmm13,%xmm13
vpshufb %xmm0,%xmm1,%xmm1
vpxor %xmm4,%xmm14,%xmm14
jmp .Loop_ctr32
.cfi_endproc
.size _aesni_ctr32_6x,.-_aesni_ctr32_6x
.globl aesni_gcm_encrypt
.hidden aesni_gcm_encrypt
.type aesni_gcm_encrypt,@function
.align 32
aesni_gcm_encrypt:
.cfi_startproc
_CET_ENDBR
#ifdef BORINGSSL_DISPATCH_TEST
.extern BORINGSSL_function_hit
.hidden BORINGSSL_function_hit
movb $1,BORINGSSL_function_hit+2(%rip)
#endif
xorq %rax,%rax
cmpq $288,%rdx
jb .Lgcm_enc_abort
pushq %rbp
.cfi_adjust_cfa_offset 8
.cfi_offset %rbp,-16
movq %rsp,%rbp
.cfi_def_cfa_register %rbp
pushq %rbx
.cfi_offset %rbx,-24
pushq %r12
.cfi_offset %r12,-32
pushq %r13
.cfi_offset %r13,-40
pushq %r14
.cfi_offset %r14,-48
pushq %r15
.cfi_offset %r15,-56
vzeroupper
vmovdqu (%r8),%xmm1
addq $-128,%rsp
movl 12(%r8),%ebx
leaq .Lbswap_mask(%rip),%r11
leaq -128(%rcx),%r14
movq $0xf80,%r15
leaq 128(%rcx),%rcx
vmovdqu (%r11),%xmm0
andq $-128,%rsp
movl 240-128(%rcx),%r10d
andq %r15,%r14
andq %rsp,%r15
subq %r14,%r15
jc .Lenc_no_key_aliasing
cmpq $768,%r15
jnc .Lenc_no_key_aliasing
subq %r15,%rsp
.Lenc_no_key_aliasing:
movq %rsi,%r14
leaq -192(%rsi,%rdx,1),%r15
shrq $4,%rdx
call _aesni_ctr32_6x
vpshufb %xmm0,%xmm9,%xmm8
vpshufb %xmm0,%xmm10,%xmm2
vmovdqu %xmm8,112(%rsp)
vpshufb %xmm0,%xmm11,%xmm4
vmovdqu %xmm2,96(%rsp)
vpshufb %xmm0,%xmm12,%xmm5
vmovdqu %xmm4,80(%rsp)
vpshufb %xmm0,%xmm13,%xmm6
vmovdqu %xmm5,64(%rsp)
vpshufb %xmm0,%xmm14,%xmm7
vmovdqu %xmm6,48(%rsp)
call _aesni_ctr32_6x
movq 16(%rbp),%r12
leaq 32(%r9),%r9
vmovdqu (%r12),%xmm8
subq $12,%rdx
movq $192,%rax
vpshufb %xmm0,%xmm8,%xmm8
call _aesni_ctr32_ghash_6x
vmovdqu 32(%rsp),%xmm7
vmovdqu (%r11),%xmm0
vmovdqu 0-32(%r9),%xmm3
vpunpckhqdq %xmm7,%xmm7,%xmm1
vmovdqu 32-32(%r9),%xmm15
vmovups %xmm9,-96(%rsi)
vpshufb %xmm0,%xmm9,%xmm9
vpxor %xmm7,%xmm1,%xmm1
vmovups %xmm10,-80(%rsi)
vpshufb %xmm0,%xmm10,%xmm10
vmovups %xmm11,-64(%rsi)
vpshufb %xmm0,%xmm11,%xmm11
vmovups %xmm12,-48(%rsi)
vpshufb %xmm0,%xmm12,%xmm12
vmovups %xmm13,-32(%rsi)
vpshufb %xmm0,%xmm13,%xmm13
vmovups %xmm14,-16(%rsi)
vpshufb %xmm0,%xmm14,%xmm14
vmovdqu %xmm9,16(%rsp)
vmovdqu 48(%rsp),%xmm6
vmovdqu 16-32(%r9),%xmm0
vpunpckhqdq %xmm6,%xmm6,%xmm2
vpclmulqdq $0x00,%xmm3,%xmm7,%xmm5
vpxor %xmm6,%xmm2,%xmm2
vpclmulqdq $0x11,%xmm3,%xmm7,%xmm7
vpclmulqdq $0x00,%xmm15,%xmm1,%xmm1
vmovdqu 64(%rsp),%xmm9
vpclmulqdq $0x00,%xmm0,%xmm6,%xmm4
vmovdqu 48-32(%r9),%xmm3
vpxor %xmm5,%xmm4,%xmm4
vpunpckhqdq %xmm9,%xmm9,%xmm5
vpclmulqdq $0x11,%xmm0,%xmm6,%xmm6
vpxor %xmm9,%xmm5,%xmm5
vpxor %xmm7,%xmm6,%xmm6
vpclmulqdq $0x10,%xmm15,%xmm2,%xmm2
vmovdqu 80-32(%r9),%xmm15
vpxor %xmm1,%xmm2,%xmm2
vmovdqu 80(%rsp),%xmm1
vpclmulqdq $0x00,%xmm3,%xmm9,%xmm7
vmovdqu 64-32(%r9),%xmm0
vpxor %xmm4,%xmm7,%xmm7
vpunpckhqdq %xmm1,%xmm1,%xmm4
vpclmulqdq $0x11,%xmm3,%xmm9,%xmm9
vpxor %xmm1,%xmm4,%xmm4
vpxor %xmm6,%xmm9,%xmm9
vpclmulqdq $0x00,%xmm15,%xmm5,%xmm5
vpxor %xmm2,%xmm5,%xmm5
vmovdqu 96(%rsp),%xmm2
vpclmulqdq $0x00,%xmm0,%xmm1,%xmm6
vmovdqu 96-32(%r9),%xmm3
vpxor %xmm7,%xmm6,%xmm6
vpunpckhqdq %xmm2,%xmm2,%xmm7
vpclmulqdq $0x11,%xmm0,%xmm1,%xmm1
vpxor %xmm2,%xmm7,%xmm7
vpxor %xmm9,%xmm1,%xmm1
vpclmulqdq $0x10,%xmm15,%xmm4,%xmm4
vmovdqu 128-32(%r9),%xmm15
vpxor %xmm5,%xmm4,%xmm4
vpxor 112(%rsp),%xmm8,%xmm8
vpclmulqdq $0x00,%xmm3,%xmm2,%xmm5
vmovdqu 112-32(%r9),%xmm0
vpunpckhqdq %xmm8,%xmm8,%xmm9
vpxor %xmm6,%xmm5,%xmm5
vpclmulqdq $0x11,%xmm3,%xmm2,%xmm2
vpxor %xmm8,%xmm9,%xmm9
vpxor %xmm1,%xmm2,%xmm2
vpclmulqdq $0x00,%xmm15,%xmm7,%xmm7
vpxor %xmm4,%xmm7,%xmm4
vpclmulqdq $0x00,%xmm0,%xmm8,%xmm6
vmovdqu 0-32(%r9),%xmm3
vpunpckhqdq %xmm14,%xmm14,%xmm1
vpclmulqdq $0x11,%xmm0,%xmm8,%xmm8
vpxor %xmm14,%xmm1,%xmm1
vpxor %xmm5,%xmm6,%xmm5
vpclmulqdq $0x10,%xmm15,%xmm9,%xmm9
vmovdqu 32-32(%r9),%xmm15
vpxor %xmm2,%xmm8,%xmm7
vpxor %xmm4,%xmm9,%xmm6
vmovdqu 16-32(%r9),%xmm0
vpxor %xmm5,%xmm7,%xmm9
vpclmulqdq $0x00,%xmm3,%xmm14,%xmm4
vpxor %xmm9,%xmm6,%xmm6
vpunpckhqdq %xmm13,%xmm13,%xmm2
vpclmulqdq $0x11,%xmm3,%xmm14,%xmm14
vpxor %xmm13,%xmm2,%xmm2
vpslldq $8,%xmm6,%xmm9
vpclmulqdq $0x00,%xmm15,%xmm1,%xmm1
vpxor %xmm9,%xmm5,%xmm8
vpsrldq $8,%xmm6,%xmm6
vpxor %xmm6,%xmm7,%xmm7
vpclmulqdq $0x00,%xmm0,%xmm13,%xmm5
vmovdqu 48-32(%r9),%xmm3
vpxor %xmm4,%xmm5,%xmm5
vpunpckhqdq %xmm12,%xmm12,%xmm9
vpclmulqdq $0x11,%xmm0,%xmm13,%xmm13
vpxor %xmm12,%xmm9,%xmm9
vpxor %xmm14,%xmm13,%xmm13
vpalignr $8,%xmm8,%xmm8,%xmm14
vpclmulqdq $0x10,%xmm15,%xmm2,%xmm2
vmovdqu 80-32(%r9),%xmm15
vpxor %xmm1,%xmm2,%xmm2
vpclmulqdq $0x00,%xmm3,%xmm12,%xmm4
vmovdqu 64-32(%r9),%xmm0
vpxor %xmm5,%xmm4,%xmm4
vpunpckhqdq %xmm11,%xmm11,%xmm1
vpclmulqdq $0x11,%xmm3,%xmm12,%xmm12
vpxor %xmm11,%xmm1,%xmm1
vpxor %xmm13,%xmm12,%xmm12
vxorps 16(%rsp),%xmm7,%xmm7
vpclmulqdq $0x00,%xmm15,%xmm9,%xmm9
vpxor %xmm2,%xmm9,%xmm9
vpclmulqdq $0x10,16(%r11),%xmm8,%xmm8
vxorps %xmm14,%xmm8,%xmm8
vpclmulqdq $0x00,%xmm0,%xmm11,%xmm5
vmovdqu 96-32(%r9),%xmm3
vpxor %xmm4,%xmm5,%xmm5
vpunpckhqdq %xmm10,%xmm10,%xmm2
vpclmulqdq $0x11,%xmm0,%xmm11,%xmm11
vpxor %xmm10,%xmm2,%xmm2
vpalignr $8,%xmm8,%xmm8,%xmm14
vpxor %xmm12,%xmm11,%xmm11
vpclmulqdq $0x10,%xmm15,%xmm1,%xmm1
vmovdqu 128-32(%r9),%xmm15
vpxor %xmm9,%xmm1,%xmm1
vxorps %xmm7,%xmm14,%xmm14
vpclmulqdq $0x10,16(%r11),%xmm8,%xmm8
vxorps %xmm14,%xmm8,%xmm8
vpclmulqdq $0x00,%xmm3,%xmm10,%xmm4
vmovdqu 112-32(%r9),%xmm0
vpxor %xmm5,%xmm4,%xmm4
vpunpckhqdq %xmm8,%xmm8,%xmm9
vpclmulqdq $0x11,%xmm3,%xmm10,%xmm10
vpxor %xmm8,%xmm9,%xmm9
vpxor %xmm11,%xmm10,%xmm10
vpclmulqdq $0x00,%xmm15,%xmm2,%xmm2
vpxor %xmm1,%xmm2,%xmm2
vpclmulqdq $0x00,%xmm0,%xmm8,%xmm5
vpclmulqdq $0x11,%xmm0,%xmm8,%xmm7
vpxor %xmm4,%xmm5,%xmm5
vpclmulqdq $0x10,%xmm15,%xmm9,%xmm6
vpxor %xmm10,%xmm7,%xmm7
vpxor %xmm2,%xmm6,%xmm6
vpxor %xmm5,%xmm7,%xmm4
vpxor %xmm4,%xmm6,%xmm6
vpslldq $8,%xmm6,%xmm1
vmovdqu 16(%r11),%xmm3
vpsrldq $8,%xmm6,%xmm6
vpxor %xmm1,%xmm5,%xmm8
vpxor %xmm6,%xmm7,%xmm7
vpalignr $8,%xmm8,%xmm8,%xmm2
vpclmulqdq $0x10,%xmm3,%xmm8,%xmm8
vpxor %xmm2,%xmm8,%xmm8
vpalignr $8,%xmm8,%xmm8,%xmm2
vpclmulqdq $0x10,%xmm3,%xmm8,%xmm8
vpxor %xmm7,%xmm2,%xmm2
vpxor %xmm2,%xmm8,%xmm8
movq 16(%rbp),%r12
vpshufb (%r11),%xmm8,%xmm8
vmovdqu %xmm8,(%r12)
vzeroupper
leaq -40(%rbp),%rsp
.cfi_def_cfa %rsp, 0x38
popq %r15
.cfi_adjust_cfa_offset -8
.cfi_restore %r15
popq %r14
.cfi_adjust_cfa_offset -8
.cfi_restore %r14
popq %r13
.cfi_adjust_cfa_offset -8
.cfi_restore %r13
popq %r12
.cfi_adjust_cfa_offset -8
.cfi_restore %r12
popq %rbx
.cfi_adjust_cfa_offset -8
.cfi_restore %rbx
popq %rbp
.cfi_adjust_cfa_offset -8
.cfi_restore %rbp
.Lgcm_enc_abort:
ret
.cfi_endproc
.size aesni_gcm_encrypt,.-aesni_gcm_encrypt
.section .rodata
.align 64
.Lbswap_mask:
.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
.Lpoly:
.byte 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2
.Lone_msb:
.byte 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
.Ltwo_lsb:
.byte 2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
.Lone_lsb:
.byte 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
.byte 65,69,83,45,78,73,32,71,67,77,32,109,111,100,117,108,101,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.align 64
.text
#endif

File diff suppressed because it is too large Load Diff

Some files were not shown because too many files have changed in this diff Show More