You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
172 lines
4.9 KiB
172 lines
4.9 KiB
// Copyright 2018 The Gemmlowp Authors. All Rights Reserved.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
// detect_platform.h: Sets up macros that control architecture-specific
|
|
// features of gemmlowp's implementation.
|
|
|
|
#ifndef GEMMLOWP_INTERNAL_DETECT_PLATFORM_H_
|
|
#define GEMMLOWP_INTERNAL_DETECT_PLATFORM_H_
|
|
|
|
// Our inline assembly path assume GCC/Clang syntax.
|
|
// Native Client doesn't seem to support inline assembly(?).
|
|
#if defined(__GNUC__) && !defined(__native_client__)
|
|
#define GEMMLOWP_ALLOW_INLINE_ASM
|
|
#endif
|
|
|
|
// Define macro statement that avoids inlining for GCC.
|
|
// For non-GCC, define as empty macro.
|
|
#if defined(__GNUC__)
|
|
#define GEMMLOWP_NOINLINE __attribute__((noinline))
|
|
#else
|
|
#define GEMMLOWP_NOINLINE
|
|
#endif
|
|
|
|
// Detect ARM, 32-bit or 64-bit
|
|
#ifdef __arm__
|
|
#define GEMMLOWP_ARM_32
|
|
#endif
|
|
|
|
#ifdef __aarch64__
|
|
#define GEMMLOWP_ARM_64
|
|
#endif
|
|
|
|
#if defined(GEMMLOWP_ARM_32) || defined(GEMMLOWP_ARM_64)
|
|
#define GEMMLOWP_ARM
|
|
#endif
|
|
|
|
// Detect MIPS, 32-bit or 64-bit
|
|
#if defined(__mips) && !defined(__LP64__)
|
|
#define GEMMLOWP_MIPS_32
|
|
#endif
|
|
|
|
#if defined(__mips) && defined(__LP64__)
|
|
#define GEMMLOWP_MIPS_64
|
|
#endif
|
|
|
|
#if defined(GEMMLOWP_MIPS_32) || defined(GEMMLOWP_MIPS_64)
|
|
#define GEMMLOWP_MIPS
|
|
#endif
|
|
|
|
// Detect x86, 32-bit or 64-bit
|
|
#if defined(__i386__) || defined(_M_IX86) || defined(_X86_) || defined(__i386)
|
|
#define GEMMLOWP_X86_32
|
|
#endif
|
|
|
|
#if defined(__x86_64__) || defined(_M_X64) || defined(__amd64)
|
|
#define GEMMLOWP_X86_64
|
|
#endif
|
|
|
|
#if defined(GEMMLOWP_X86_32) || defined(GEMMLOWP_X86_64)
|
|
#define GEMMLOWP_X86
|
|
#endif
|
|
|
|
// Detect WebAssembly SIMD.
|
|
#if defined(__wasm_simd128__)
|
|
#define GEMMLOWP_WASMSIMD
|
|
#endif
|
|
|
|
// Some of our optimized paths use inline assembly and for
|
|
// now we don't bother enabling some other optimized paths using intrinddics
|
|
// where we can't use inline assembly paths.
|
|
#ifdef GEMMLOWP_ALLOW_INLINE_ASM
|
|
|
|
// Detect NEON. It's important to check for both tokens.
|
|
#if (defined __ARM_NEON) || (defined __ARM_NEON__)
|
|
#define GEMMLOWP_NEON
|
|
#endif
|
|
|
|
// Convenience NEON tokens for 32-bit or 64-bit
|
|
#if defined(GEMMLOWP_NEON) && defined(GEMMLOWP_ARM_32)
|
|
#define GEMMLOWP_NEON_32
|
|
#endif
|
|
|
|
#if defined(GEMMLOWP_NEON) && defined(GEMMLOWP_ARM_64)
|
|
#define GEMMLOWP_NEON_64
|
|
#endif
|
|
|
|
// Detect MIPS MSA.
|
|
// Limit MSA optimizations to little-endian CPUs for now.
|
|
// TODO: Perhaps, eventually support MSA optimizations on big-endian CPUs?
|
|
#if defined(GEMMLOWP_MIPS) && (__mips_isa_rev >= 5) && defined(__mips_msa) && \
|
|
defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
|
|
#define GEMMLOWP_MSA
|
|
#endif
|
|
|
|
// Convenience MIPS MSA tokens for 32-bit or 64-bit.
|
|
#if defined(GEMMLOWP_MSA) && defined(GEMMLOWP_MIPS_32)
|
|
#define GEMMLOWP_MSA_32
|
|
#endif
|
|
|
|
#if defined(GEMMLOWP_MSA) && defined(GEMMLOWP_MIPS_64)
|
|
#define GEMMLOWP_MSA_64
|
|
#endif
|
|
|
|
// compiler define for AVX2 -D GEMMLOWP_ENABLE_AVX2
|
|
// Detect AVX2
|
|
#if defined(__AVX2__) && defined(GEMMLOWP_ENABLE_AVX2)
|
|
#define GEMMLOWP_AVX2
|
|
// Detect SSE4.
|
|
// MSVC does not have __SSE4_1__ macro, but will enable SSE4
|
|
// when AVX is turned on.
|
|
#elif defined(__SSE4_1__) || (defined(_MSC_VER) && defined(__AVX__))
|
|
#define GEMMLOWP_SSE4
|
|
// Detect SSE3.
|
|
#elif defined(__SSE3__)
|
|
#define GEMMLOWP_SSE3
|
|
#endif
|
|
|
|
// Convenience SSE4 tokens for 32-bit or 64-bit
|
|
#if defined(GEMMLOWP_SSE4) && defined(GEMMLOWP_X86_32) && \
|
|
!defined(GEMMLOWP_DISABLE_SSE4)
|
|
#define GEMMLOWP_SSE4_32
|
|
#endif
|
|
|
|
#if defined(GEMMLOWP_SSE3) && defined(GEMMLOWP_X86_32)
|
|
#define GEMMLOWP_SSE3_32
|
|
#endif
|
|
|
|
#if defined(GEMMLOWP_SSE4) && defined(GEMMLOWP_X86_64) && \
|
|
!defined(GEMMLOWP_DISABLE_SSE4)
|
|
#define GEMMLOWP_SSE4_64
|
|
#endif
|
|
|
|
#if defined(GEMMLOWP_SSE3) && defined(GEMMLOWP_X86_64)
|
|
#define GEMMLOWP_SSE3_64
|
|
#endif
|
|
|
|
#if defined(GEMMLOWP_AVX2) && defined(GEMMLOWP_X86_64)
|
|
#define GEMMLOWP_AVX2_64
|
|
#endif
|
|
|
|
#if defined(__has_feature)
|
|
#if __has_feature(memory_sanitizer)
|
|
#include <sanitizer/msan_interface.h>
|
|
#define GEMMLOWP_MARK_MEMORY_AS_INITIALIZED __msan_unpoison
|
|
#elif __has_feature(address_sanitizer)
|
|
#include <sanitizer/asan_interface.h>
|
|
#define GEMMLOWP_MARK_MEMORY_AS_INITIALIZED __asan_unpoison_memory_region
|
|
#endif
|
|
#endif
|
|
|
|
#endif // GEMMLOWP_ALLOW_INLINE_ASM
|
|
|
|
// Detect Android. Don't conflate with ARM - we care about tuning
|
|
// for non-ARM Android devices too. This can be used in conjunction
|
|
// with x86 to tune differently for mobile x86 CPUs (Atom) vs. desktop x86 CPUs.
|
|
#if defined(__ANDROID__) || defined(ANDROID)
|
|
#define GEMMLOWP_ANDROID
|
|
#endif
|
|
|
|
#endif // GEMMLOWP_INTERNAL_DETECT_PLATFORM_H_
|