You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
347 lines
8.5 KiB
347 lines
8.5 KiB
4 months ago
|
/*
|
||
|
* Copyright (c) 2016 Cyril Hrubis <chrubis@suse.cz>
|
||
|
*
|
||
|
* This program is free software: you can redistribute it and/or modify
|
||
|
* it under the terms of the GNU General Public License as published by
|
||
|
* the Free Software Foundation, either version 2 of the License, or
|
||
|
* (at your option) any later version.
|
||
|
*
|
||
|
* This program is distributed in the hope that it will be useful,
|
||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||
|
* GNU General Public License for more details.
|
||
|
*
|
||
|
* You should have received a copy of the GNU General Public License
|
||
|
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||
|
*/
|
||
|
/* The LTP library has some of its own atomic synchronisation primitives
|
||
|
* contained in this file. Generally speaking these should not be used
|
||
|
* directly in tests for synchronisation, instead use tst_checkpoint.h,
|
||
|
* tst_fuzzy_sync.h or the POSIX library.
|
||
|
*
|
||
|
* Notes on compile and runtime memory barriers and atomics.
|
||
|
*
|
||
|
* Within the LTP library we have three concerns when accessing variables
|
||
|
* shared by multiple threads or processes:
|
||
|
*
|
||
|
* (1) Removal or reordering of accesses by the compiler.
|
||
|
* (2) Atomicity of addition.
|
||
|
* (3) LOAD-STORE ordering between threads.
|
||
|
*
|
||
|
* The first (1) is the most likely to cause an error if not properly
|
||
|
* handled. We avoid it by using volatile variables and statements which will
|
||
|
* not be removed or reordered by the compiler during optimisation. This includes
|
||
|
* the __atomic and __sync intrinsics and volatile asm statements marked with
|
||
|
* "memory" as well as variables marked with volatile.
|
||
|
*
|
||
|
* On any platform Linux is likely to run on, a LOAD (fetch) or STORE of a
|
||
|
* 32-bit integer will be atomic. However fetching and adding to a variable is
|
||
|
* quite likely not; so for (2) we need to ensure we use atomic addition.
|
||
|
*
|
||
|
* Finally, for tst_fuzzy_sync at least, we need to ensure that LOADs and
|
||
|
* STOREs of any shared variables (including non-atomics) that are made
|
||
|
* between calls to tst_fzsync_wait are completed (globally visible) before
|
||
|
* tst_fzsync_wait completes. For this, runtime memory and instruction
|
||
|
* barriers are required in addition to compile time.
|
||
|
*
|
||
|
* We use full sequential ordering (__ATOMIC_SEQ_CST) for the sake of
|
||
|
* simplicity. LTP tests tend to be syscall heavy so any performance gain from
|
||
|
* using a weaker memory model is unlikely to result in a relatively large
|
||
|
* performance improvement while at the same time being a potent source of
|
||
|
* confusion.
|
||
|
*
|
||
|
* Likewise, for the fallback ASM, the simplest "definitely will work, always"
|
||
|
* approach is preferred over anything more performant.
|
||
|
*
|
||
|
* Also see Documentation/memory-barriers.txt in the kernel tree and
|
||
|
* https://gcc.gnu.org/onlinedocs/gcc/_005f_005fatomic-Builtins.html
|
||
|
* terminology may vary between sources.
|
||
|
*/
|
||
|
|
||
|
#ifndef TST_ATOMIC_H__
|
||
|
#define TST_ATOMIC_H__
|
||
|
|
||
|
#include "config.h"
|
||
|
|
||
|
#if HAVE_ATOMIC_MEMORY_MODEL == 1
|
||
|
static inline int tst_atomic_add_return(int i, int *v)
|
||
|
{
|
||
|
return __atomic_add_fetch(v, i, __ATOMIC_SEQ_CST);
|
||
|
}
|
||
|
|
||
|
static inline int tst_atomic_load(int *v)
|
||
|
{
|
||
|
return __atomic_load_n(v, __ATOMIC_SEQ_CST);
|
||
|
}
|
||
|
|
||
|
static inline void tst_atomic_store(int i, int *v)
|
||
|
{
|
||
|
__atomic_store_n(v, i, __ATOMIC_SEQ_CST);
|
||
|
}
|
||
|
|
||
|
#elif HAVE_SYNC_ADD_AND_FETCH == 1
|
||
|
static inline int tst_atomic_add_return(int i, int *v)
|
||
|
{
|
||
|
return __sync_add_and_fetch(v, i);
|
||
|
}
|
||
|
|
||
|
static inline int tst_atomic_load(int *v)
|
||
|
{
|
||
|
int ret;
|
||
|
|
||
|
__sync_synchronize();
|
||
|
ret = *v;
|
||
|
__sync_synchronize();
|
||
|
return ret;
|
||
|
}
|
||
|
|
||
|
static inline void tst_atomic_store(int i, int *v)
|
||
|
{
|
||
|
__sync_synchronize();
|
||
|
*v = i;
|
||
|
__sync_synchronize();
|
||
|
}
|
||
|
|
||
|
#elif defined(__i386__) || defined(__x86_64__)
|
||
|
# define LTP_USE_GENERIC_LOAD_STORE_ASM 1
|
||
|
|
||
|
static inline int tst_atomic_add_return(int i, int *v)
|
||
|
{
|
||
|
int __ret = i;
|
||
|
|
||
|
/*
|
||
|
* taken from arch/x86/include/asm/cmpxchg.h
|
||
|
*/
|
||
|
asm volatile ("lock; xaddl %0, %1\n"
|
||
|
: "+r" (__ret), "+m" (*v) : : "memory", "cc");
|
||
|
|
||
|
return i + __ret;
|
||
|
}
|
||
|
|
||
|
#elif defined(__powerpc__) || defined(__powerpc64__)
|
||
|
static inline int tst_atomic_add_return(int i, int *v)
|
||
|
{
|
||
|
int t;
|
||
|
|
||
|
/* taken from arch/powerpc/include/asm/atomic.h */
|
||
|
asm volatile(
|
||
|
" sync\n"
|
||
|
"1: lwarx %0,0,%2 # atomic_add_return\n"
|
||
|
" add %0,%1,%0\n"
|
||
|
" stwcx. %0,0,%2 \n"
|
||
|
" bne- 1b\n"
|
||
|
" sync\n"
|
||
|
: "=&r" (t)
|
||
|
: "r" (i), "r" (v)
|
||
|
: "cc", "memory");
|
||
|
|
||
|
return t;
|
||
|
}
|
||
|
|
||
|
static inline int tst_atomic_load(int *v)
|
||
|
{
|
||
|
int ret;
|
||
|
|
||
|
asm volatile("sync\n" : : : "memory");
|
||
|
ret = *v;
|
||
|
asm volatile("sync\n" : : : "memory");
|
||
|
|
||
|
return ret;
|
||
|
}
|
||
|
|
||
|
static inline void tst_atomic_store(int i, int *v)
|
||
|
{
|
||
|
asm volatile("sync\n" : : : "memory");
|
||
|
*v = i;
|
||
|
asm volatile("sync\n" : : : "memory");
|
||
|
}
|
||
|
|
||
|
#elif defined(__s390__) || defined(__s390x__)
|
||
|
# define LTP_USE_GENERIC_LOAD_STORE_ASM 1
|
||
|
|
||
|
static inline int tst_atomic_add_return(int i, int *v)
|
||
|
{
|
||
|
int old_val, new_val;
|
||
|
|
||
|
/* taken from arch/s390/include/asm/atomic.h */
|
||
|
asm volatile(
|
||
|
" l %0,%2\n"
|
||
|
"0: lr %1,%0\n"
|
||
|
" ar %1,%3\n"
|
||
|
" cs %0,%1,%2\n"
|
||
|
" jl 0b"
|
||
|
: "=&d" (old_val), "=&d" (new_val), "+Q" (*v)
|
||
|
: "d" (i)
|
||
|
: "cc", "memory");
|
||
|
|
||
|
return old_val + i;
|
||
|
}
|
||
|
|
||
|
#elif defined(__arc__)
|
||
|
|
||
|
/*ARCv2 defines the smp barriers */
|
||
|
#ifdef __ARC700__
|
||
|
#define smp_mb() asm volatile("" : : : "memory")
|
||
|
#else
|
||
|
#define smp_mb() asm volatile("dmb 3\n" : : : "memory")
|
||
|
#endif
|
||
|
|
||
|
static inline int tst_atomic_add_return(int i, int *v)
|
||
|
{
|
||
|
unsigned int val;
|
||
|
|
||
|
smp_mb();
|
||
|
|
||
|
asm volatile(
|
||
|
"1: llock %[val], [%[ctr]] \n"
|
||
|
" add %[val], %[val], %[i] \n"
|
||
|
" scond %[val], [%[ctr]] \n"
|
||
|
" bnz 1b \n"
|
||
|
: [val] "=&r" (val)
|
||
|
: [ctr] "r" (v),
|
||
|
[i] "ir" (i)
|
||
|
: "cc", "memory");
|
||
|
|
||
|
smp_mb();
|
||
|
|
||
|
return val;
|
||
|
}
|
||
|
|
||
|
static inline int tst_atomic_load(int *v)
|
||
|
{
|
||
|
int ret;
|
||
|
|
||
|
smp_mb();
|
||
|
ret = *v;
|
||
|
smp_mb();
|
||
|
|
||
|
return ret;
|
||
|
}
|
||
|
|
||
|
static inline void tst_atomic_store(int i, int *v)
|
||
|
{
|
||
|
smp_mb();
|
||
|
*v = i;
|
||
|
smp_mb();
|
||
|
}
|
||
|
|
||
|
#elif defined (__aarch64__)
|
||
|
static inline int tst_atomic_add_return(int i, int *v)
|
||
|
{
|
||
|
unsigned long tmp;
|
||
|
int result;
|
||
|
|
||
|
__asm__ __volatile__(
|
||
|
" prfm pstl1strm, %2 \n"
|
||
|
"1: ldaxr %w0, %2 \n"
|
||
|
" add %w0, %w0, %w3 \n"
|
||
|
" stlxr %w1, %w0, %2 \n"
|
||
|
" cbnz %w1, 1b \n"
|
||
|
" dmb ish \n"
|
||
|
: "=&r" (result), "=&r" (tmp), "+Q" (*v)
|
||
|
: "Ir" (i)
|
||
|
: "memory");
|
||
|
|
||
|
return result;
|
||
|
}
|
||
|
|
||
|
/* We are using load and store exclusive (ldaxr & stlxr) instructions to try
|
||
|
* and help prevent the tst_atomic_load and, more likely, tst_atomic_store
|
||
|
* functions from interfering with tst_atomic_add_return which takes advantage
|
||
|
* of exclusivity. It is not clear if this is a good idea or not, but does
|
||
|
* mean that all three functions are very similar.
|
||
|
*/
|
||
|
static inline int tst_atomic_load(int *v)
|
||
|
{
|
||
|
int ret;
|
||
|
unsigned long tmp;
|
||
|
|
||
|
asm volatile("//atomic_load \n"
|
||
|
" prfm pstl1strm, %[v] \n"
|
||
|
"1: ldaxr %w[ret], %[v] \n"
|
||
|
" stlxr %w[tmp], %w[ret], %[v] \n"
|
||
|
" cbnz %w[tmp], 1b \n"
|
||
|
" dmb ish \n"
|
||
|
: [tmp] "=&r" (tmp), [ret] "=&r" (ret), [v] "+Q" (*v)
|
||
|
: : "memory");
|
||
|
|
||
|
return ret;
|
||
|
}
|
||
|
|
||
|
static inline void tst_atomic_store(int i, int *v)
|
||
|
{
|
||
|
unsigned long tmp;
|
||
|
|
||
|
asm volatile("//atomic_store \n"
|
||
|
" prfm pstl1strm, %[v] \n"
|
||
|
"1: ldaxr %w[tmp], %[v] \n"
|
||
|
" stlxr %w[tmp], %w[i], %[v] \n"
|
||
|
" cbnz %w[tmp], 1b \n"
|
||
|
" dmb ish \n"
|
||
|
: [tmp] "=&r" (tmp), [v] "+Q" (*v)
|
||
|
: [i] "r" (i)
|
||
|
: "memory");
|
||
|
}
|
||
|
|
||
|
#elif defined(__sparc__) && defined(__arch64__)
|
||
|
# define LTP_USE_GENERIC_LOAD_STORE_ASM 1
|
||
|
static inline int tst_atomic_add_return(int i, int *v)
|
||
|
{
|
||
|
int ret, tmp;
|
||
|
|
||
|
/* Based on arch/sparc/lib/atomic_64.S with the exponential backoff
|
||
|
* function removed because we are unlikely to have a large (>= 16?)
|
||
|
* number of cores continuously trying to update one variable.
|
||
|
*/
|
||
|
asm volatile("/*atomic_add_return*/ \n"
|
||
|
"1: ldsw [%[v]], %[ret]; \n"
|
||
|
" add %[ret], %[i], %[tmp]; \n"
|
||
|
" cas [%[v]], %[ret], %[tmp]; \n"
|
||
|
" cmp %[ret], %[tmp]; \n"
|
||
|
" bne,pn %%icc, 1b; \n"
|
||
|
" nop; \n"
|
||
|
" add %[ret], %[i], %[ret]; \n"
|
||
|
: [ret] "=r&" (ret), [tmp] "=r&" (tmp)
|
||
|
: [i] "r" (i), [v] "r" (v)
|
||
|
: "memory", "cc");
|
||
|
|
||
|
return ret;
|
||
|
}
|
||
|
|
||
|
#else /* HAVE_SYNC_ADD_AND_FETCH == 1 */
|
||
|
# error Your compiler does not provide __atomic_add_fetch, __sync_add_and_fetch \
|
||
|
and an LTP implementation is missing for your architecture.
|
||
|
#endif
|
||
|
|
||
|
#ifdef LTP_USE_GENERIC_LOAD_STORE_ASM
|
||
|
static inline int tst_atomic_load(int *v)
|
||
|
{
|
||
|
int ret;
|
||
|
|
||
|
asm volatile("" : : : "memory");
|
||
|
ret = *v;
|
||
|
asm volatile("" : : : "memory");
|
||
|
|
||
|
return ret;
|
||
|
}
|
||
|
|
||
|
static inline void tst_atomic_store(int i, int *v)
|
||
|
{
|
||
|
asm volatile("" : : : "memory");
|
||
|
*v = i;
|
||
|
asm volatile("" : : : "memory");
|
||
|
}
|
||
|
#endif
|
||
|
|
||
|
static inline int tst_atomic_inc(int *v)
|
||
|
{
|
||
|
return tst_atomic_add_return(1, v);
|
||
|
}
|
||
|
|
||
|
static inline int tst_atomic_dec(int *v)
|
||
|
{
|
||
|
return tst_atomic_add_return(-1, v);
|
||
|
}
|
||
|
|
||
|
#endif /* TST_ATOMIC_H__ */
|