v811_spc009/external/ltp/include/tst_atomic.h

/*
 * Copyright (c) 2016 Cyril Hrubis <chrubis@suse.cz>
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 */
/* The LTP library has some of its own atomic synchronisation primitives
 * contained in this file. Generally speaking these should not be used
 * directly in tests for synchronisation, instead use tst_checkpoint.h,
 * tst_fuzzy_sync.h or the POSIX library.
 *
 * Notes on compile and runtime memory barriers and atomics.
 *
 * Within the LTP library we have three concerns when accessing variables
 * shared by multiple threads or processes:
 *
 * (1) Removal or reordering of accesses by the compiler.
 * (2) Atomicity of addition.
 * (3) LOAD-STORE ordering between threads.
 *
 * The first (1) is the most likely to cause an error if not properly
 * handled. We avoid it by using volatile variables and statements which will
 * not be removed or reordered by the compiler during optimisation. This includes
 * the __atomic and __sync intrinsics and volatile asm statements marked with
 * "memory" as well as variables marked with volatile.
 *
 * On any platform Linux is likely to run on, a LOAD (fetch) or STORE of a
 * 32-bit integer will be atomic. However fetching and adding to a variable is
 * quite likely not; so for (2) we need to ensure we use atomic addition.
 *
 * Finally, for tst_fuzzy_sync at least, we need to ensure that LOADs and
 * STOREs of any shared variables (including non-atomics) that are made
 * between calls to tst_fzsync_wait are completed (globally visible) before
 * tst_fzsync_wait completes. For this, runtime memory and instruction
 * barriers are required in addition to compile time.
 *
 * We use full sequential ordering (__ATOMIC_SEQ_CST) for the sake of
 * simplicity. LTP tests tend to be syscall heavy so any performance gain from
 * using a weaker memory model is unlikely to result in a relatively large
 * performance improvement while at the same time being a potent source of
 * confusion.
 *
 * Likewise, for the fallback ASM, the simplest "definitely will work, always"
 * approach is preferred over anything more performant.
 *
 * Also see Documentation/memory-barriers.txt in the kernel tree and
 * https://gcc.gnu.org/onlinedocs/gcc/_005f_005fatomic-Builtins.html
 * terminology may vary between sources.
 */

#ifndef TST_ATOMIC_H__
#define TST_ATOMIC_H__

#include "config.h"

#if HAVE_ATOMIC_MEMORY_MODEL == 1
static inline int tst_atomic_add_return(int i, int *v)
{
	return __atomic_add_fetch(v, i, __ATOMIC_SEQ_CST);
}

static inline int tst_atomic_load(int *v)
{
	return __atomic_load_n(v, __ATOMIC_SEQ_CST);
}

static inline void tst_atomic_store(int i, int *v)
{
	__atomic_store_n(v, i, __ATOMIC_SEQ_CST);
}

#elif HAVE_SYNC_ADD_AND_FETCH == 1
static inline int tst_atomic_add_return(int i, int *v)
{
	return __sync_add_and_fetch(v, i);
}

static inline int tst_atomic_load(int *v)
{
	int ret;

	__sync_synchronize();
	ret = *v;
	__sync_synchronize();
	return ret;
}

static inline void tst_atomic_store(int i, int *v)
{
	__sync_synchronize();
	*v = i;
	__sync_synchronize();
}

#elif defined(__i386__) || defined(__x86_64__)
# define LTP_USE_GENERIC_LOAD_STORE_ASM 1

static inline int tst_atomic_add_return(int i, int *v)
{
	int __ret = i;

	/*
	 * taken from arch/x86/include/asm/cmpxchg.h
	 */
	asm volatile ("lock; xaddl %0, %1\n"
		: "+r" (__ret), "+m" (*v) : : "memory", "cc");

	return i + __ret;
}

#elif defined(__powerpc__) || defined(__powerpc64__)
static inline int tst_atomic_add_return(int i, int *v)
{
	int t;

	/* taken from arch/powerpc/include/asm/atomic.h */
	asm volatile(
		"	sync\n"
		"1:	lwarx	%0,0,%2		# atomic_add_return\n"
		"	add %0,%1,%0\n"
		"	stwcx.	%0,0,%2 \n"
		"	bne-	1b\n"
		"	sync\n"
		: "=&r" (t)
		: "r" (i), "r" (v)
		: "cc", "memory");

	return t;
}

static inline int tst_atomic_load(int *v)
{
	int ret;

	asm volatile("sync\n" : : : "memory");
	ret = *v;
	asm volatile("sync\n" : : : "memory");

	return ret;
}

static inline void tst_atomic_store(int i, int *v)
{
	asm volatile("sync\n" : : : "memory");
	*v = i;
	asm volatile("sync\n" : : : "memory");
}

#elif defined(__s390__) || defined(__s390x__)
# define LTP_USE_GENERIC_LOAD_STORE_ASM 1

static inline int tst_atomic_add_return(int i, int *v)
{
	int old_val, new_val;

	/* taken from arch/s390/include/asm/atomic.h */
	asm volatile(
		"	l	%0,%2\n"
		"0:	lr	%1,%0\n"
		"	ar	%1,%3\n"
		"	cs	%0,%1,%2\n"
		"	jl	0b"
		: "=&d" (old_val), "=&d" (new_val), "+Q" (*v)
		: "d" (i)
		: "cc", "memory");

	return old_val + i;
}

#elif defined(__arc__)

/*ARCv2 defines the smp barriers */
#ifdef __ARC700__
#define smp_mb()	asm volatile("" : : : "memory")
#else
#define smp_mb()	asm volatile("dmb 3\n" : : : "memory")
#endif

static inline int tst_atomic_add_return(int i, int *v)
{
	unsigned int val;

	smp_mb();

	asm volatile(
		"1:	llock   %[val], [%[ctr]]	\n"
		"	add     %[val], %[val], %[i]	\n"
		"	scond   %[val], [%[ctr]]	\n"
		"	bnz     1b			\n"
		: [val]	"=&r"	(val)
		: [ctr]	"r"	(v),
		  [i]	"ir"	(i)
		: "cc", "memory");

	smp_mb();

	return val;
}

static inline int tst_atomic_load(int *v)
{
	int ret;

	smp_mb();
	ret = *v;
	smp_mb();

	return ret;
}

static inline void tst_atomic_store(int i, int *v)
{
	smp_mb();
	*v = i;
	smp_mb();
}

#elif defined (__aarch64__)
static inline int tst_atomic_add_return(int i, int *v)
{
	unsigned long tmp;
	int result;

	__asm__ __volatile__(
"       prfm    pstl1strm, %2	\n"
"1:     ldaxr	%w0, %2		\n"
"       add	%w0, %w0, %w3	\n"
"       stlxr	%w1, %w0, %2	\n"
"       cbnz	%w1, 1b		\n"
"       dmb ish			\n"
	: "=&r" (result), "=&r" (tmp), "+Q" (*v)
	: "Ir" (i)
	: "memory");

	return result;
}

/* We are using load and store exclusive (ldaxr & stlxr) instructions to try
 * and help prevent the tst_atomic_load and, more likely, tst_atomic_store
 * functions from interfering with tst_atomic_add_return which takes advantage
 * of exclusivity. It is not clear if this is a good idea or not, but does
 * mean that all three functions are very similar.
 */
static inline int tst_atomic_load(int *v)
{
	int ret;
	unsigned long tmp;

	asm volatile("//atomic_load			\n"
		"	prfm	pstl1strm,  %[v]	\n"
		"1:	ldaxr	%w[ret], %[v]		\n"
		"	stlxr   %w[tmp], %w[ret], %[v]  \n"
		"	cbnz    %w[tmp], 1b		\n"
		"	dmb ish				\n"
		: [tmp] "=&r" (tmp), [ret] "=&r" (ret), [v] "+Q" (*v)
		: : "memory");

	return ret;
}

static inline void tst_atomic_store(int i, int *v)
{
	unsigned long tmp;

	asm volatile("//atomic_store			\n"
		"	prfm	pstl1strm, %[v]		\n"
		"1:	ldaxr	%w[tmp], %[v]		\n"
		"	stlxr   %w[tmp], %w[i], %[v]	\n"
		"	cbnz    %w[tmp], 1b		\n"
		"	dmb ish				\n"
		: [tmp] "=&r" (tmp), [v] "+Q" (*v)
		: [i] "r" (i)
		: "memory");
}

#elif defined(__sparc__) && defined(__arch64__)
# define LTP_USE_GENERIC_LOAD_STORE_ASM 1
static inline int tst_atomic_add_return(int i, int *v)
{
	int ret, tmp;

	/* Based on arch/sparc/lib/atomic_64.S with the exponential backoff
	 * function removed because we are unlikely to have a large (>= 16?)
	 * number of cores continuously trying to update one variable.
	 */
	asm volatile("/*atomic_add_return*/		\n"
		"1:	ldsw	[%[v]], %[ret];		\n"
		"	add	%[ret], %[i], %[tmp];	\n"
		"	cas	[%[v]], %[ret], %[tmp];	\n"
		"	cmp	%[ret], %[tmp];		\n"
		"	bne,pn	%%icc, 1b;		\n"
		"	nop;				\n"
		"	add	%[ret], %[i], %[ret];	\n"
		: [ret] "=r&" (ret), [tmp] "=r&" (tmp)
		: [i] "r" (i), [v] "r" (v)
		: "memory", "cc");

	return ret;
}

#else /* HAVE_SYNC_ADD_AND_FETCH == 1 */
# error Your compiler does not provide __atomic_add_fetch, __sync_add_and_fetch \
        and an LTP implementation is missing for your architecture.
#endif

#ifdef LTP_USE_GENERIC_LOAD_STORE_ASM
static inline int tst_atomic_load(int *v)
{
	int ret;

	asm volatile("" : : : "memory");
	ret = *v;
	asm volatile("" : : : "memory");

	return ret;
}

static inline void tst_atomic_store(int i, int *v)
{
	asm volatile("" : : : "memory");
	*v = i;
	asm volatile("" : : : "memory");
}
#endif

static inline int tst_atomic_inc(int *v)
{
	return tst_atomic_add_return(1, v);
}

static inline int tst_atomic_dec(int *v)
{
	return tst_atomic_add_return(-1, v);
}

#endif	/* TST_ATOMIC_H__ */
v811_spc009_project 4 months ago			`/*`
			`* Copyright (c) 2016 Cyril Hrubis <chrubis@suse.cz>`
			`*`
			`* This program is free software: you can redistribute it and/or modify`
			`* it under the terms of the GNU General Public License as published by`
			`* the Free Software Foundation, either version 2 of the License, or`
			`* (at your option) any later version.`
			`*`
			`* This program is distributed in the hope that it will be useful,`
			`* but WITHOUT ANY WARRANTY; without even the implied warranty of`
			`* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the`
			`* GNU General Public License for more details.`
			`*`
			`* You should have received a copy of the GNU General Public License`
			`* along with this program. If not, see <http://www.gnu.org/licenses/>.`
			`*/`
			`/* The LTP library has some of its own atomic synchronisation primitives`
			`* contained in this file. Generally speaking these should not be used`
			`* directly in tests for synchronisation, instead use tst_checkpoint.h,`
			`* tst_fuzzy_sync.h or the POSIX library.`
			`*`
			`* Notes on compile and runtime memory barriers and atomics.`
			`*`
			`* Within the LTP library we have three concerns when accessing variables`
			`* shared by multiple threads or processes:`
			`*`
			`* (1) Removal or reordering of accesses by the compiler.`
			`* (2) Atomicity of addition.`
			`* (3) LOAD-STORE ordering between threads.`
			`*`
			`* The first (1) is the most likely to cause an error if not properly`
			`* handled. We avoid it by using volatile variables and statements which will`
			`* not be removed or reordered by the compiler during optimisation. This includes`
			`* the __atomic and __sync intrinsics and volatile asm statements marked with`
			`* "memory" as well as variables marked with volatile.`
			`*`
			`* On any platform Linux is likely to run on, a LOAD (fetch) or STORE of a`
			`* 32-bit integer will be atomic. However fetching and adding to a variable is`
			`* quite likely not; so for (2) we need to ensure we use atomic addition.`
			`*`
			`* Finally, for tst_fuzzy_sync at least, we need to ensure that LOADs and`
			`* STOREs of any shared variables (including non-atomics) that are made`
			`* between calls to tst_fzsync_wait are completed (globally visible) before`
			`* tst_fzsync_wait completes. For this, runtime memory and instruction`
			`* barriers are required in addition to compile time.`
			`*`
			`* We use full sequential ordering (__ATOMIC_SEQ_CST) for the sake of`
			`* simplicity. LTP tests tend to be syscall heavy so any performance gain from`
			`* using a weaker memory model is unlikely to result in a relatively large`
			`* performance improvement while at the same time being a potent source of`
			`* confusion.`
			`*`
			`* Likewise, for the fallback ASM, the simplest "definitely will work, always"`
			`* approach is preferred over anything more performant.`
			`*`
			`* Also see Documentation/memory-barriers.txt in the kernel tree and`
			`* https://gcc.gnu.org/onlinedocs/gcc/_005f_005fatomic-Builtins.html`
			`* terminology may vary between sources.`
			`*/`

			`#ifndef TST_ATOMIC_H__`
			`#define TST_ATOMIC_H__`

			`#include "config.h"`

			`#if HAVE_ATOMIC_MEMORY_MODEL == 1`
			`static inline int tst_atomic_add_return(int i, int *v)`
			`{`
			`return __atomic_add_fetch(v, i, __ATOMIC_SEQ_CST);`
			`}`

			`static inline int tst_atomic_load(int *v)`
			`{`
			`return __atomic_load_n(v, __ATOMIC_SEQ_CST);`
			`}`

			`static inline void tst_atomic_store(int i, int *v)`
			`{`
			`__atomic_store_n(v, i, __ATOMIC_SEQ_CST);`
			`}`

			`#elif HAVE_SYNC_ADD_AND_FETCH == 1`
			`static inline int tst_atomic_add_return(int i, int *v)`
			`{`
			`return __sync_add_and_fetch(v, i);`
			`}`

			`static inline int tst_atomic_load(int *v)`
			`{`
			`int ret;`

			`__sync_synchronize();`
			`ret = *v;`
			`__sync_synchronize();`
			`return ret;`
			`}`

			`static inline void tst_atomic_store(int i, int *v)`
			`{`
			`__sync_synchronize();`
			`*v = i;`
			`__sync_synchronize();`
			`}`

			`#elif defined(__i386__) \|\| defined(__x86_64__)`
			`# define LTP_USE_GENERIC_LOAD_STORE_ASM 1`

			`static inline int tst_atomic_add_return(int i, int *v)`
			`{`
			`int __ret = i;`

			`/*`
			`* taken from arch/x86/include/asm/cmpxchg.h`
			`*/`
			`asm volatile ("lock; xaddl %0, %1\n"`
			`: "+r" (__ret), "+m" (*v) : : "memory", "cc");`

			`return i + __ret;`
			`}`

			`#elif defined(__powerpc__) \|\| defined(__powerpc64__)`
			`static inline int tst_atomic_add_return(int i, int *v)`
			`{`
			`int t;`

			`/* taken from arch/powerpc/include/asm/atomic.h */`
			`asm volatile(`
			`" sync\n"`
			`"1: lwarx %0,0,%2 # atomic_add_return\n"`
			`" add %0,%1,%0\n"`
			`" stwcx. %0,0,%2 \n"`
			`" bne- 1b\n"`
			`" sync\n"`
			`: "=&r" (t)`
			`: "r" (i), "r" (v)`
			`: "cc", "memory");`

			`return t;`
			`}`

			`static inline int tst_atomic_load(int *v)`
			`{`
			`int ret;`

			`asm volatile("sync\n" : : : "memory");`
			`ret = *v;`
			`asm volatile("sync\n" : : : "memory");`

			`return ret;`
			`}`

			`static inline void tst_atomic_store(int i, int *v)`
			`{`
			`asm volatile("sync\n" : : : "memory");`
			`*v = i;`
			`asm volatile("sync\n" : : : "memory");`
			`}`

			`#elif defined(__s390__) \|\| defined(__s390x__)`
			`# define LTP_USE_GENERIC_LOAD_STORE_ASM 1`

			`static inline int tst_atomic_add_return(int i, int *v)`
			`{`
			`int old_val, new_val;`

			`/* taken from arch/s390/include/asm/atomic.h */`
			`asm volatile(`
			`" l %0,%2\n"`
			`"0: lr %1,%0\n"`
			`" ar %1,%3\n"`
			`" cs %0,%1,%2\n"`
			`" jl 0b"`
			`: "=&d" (old_val), "=&d" (new_val), "+Q" (*v)`
			`: "d" (i)`
			`: "cc", "memory");`

			`return old_val + i;`
			`}`

			`#elif defined(__arc__)`

			`/ARCv2 defines the smp barriers /`
			`#ifdef __ARC700__`
			`#define smp_mb() asm volatile("" : : : "memory")`
			`#else`
			`#define smp_mb() asm volatile("dmb 3\n" : : : "memory")`
			`#endif`

			`static inline int tst_atomic_add_return(int i, int *v)`
			`{`
			`unsigned int val;`

			`smp_mb();`

			`asm volatile(`
			`"1: llock %[val], [%[ctr]] \n"`
			`" add %[val], %[val], %[i] \n"`
			`" scond %[val], [%[ctr]] \n"`
			`" bnz 1b \n"`
			`: [val] "=&r" (val)`
			`: [ctr] "r" (v),`
			`[i] "ir" (i)`
			`: "cc", "memory");`

			`smp_mb();`

			`return val;`
			`}`

			`static inline int tst_atomic_load(int *v)`
			`{`
			`int ret;`

			`smp_mb();`
			`ret = *v;`
			`smp_mb();`

			`return ret;`
			`}`

			`static inline void tst_atomic_store(int i, int *v)`
			`{`
			`smp_mb();`
			`*v = i;`
			`smp_mb();`
			`}`

			`#elif defined (__aarch64__)`
			`static inline int tst_atomic_add_return(int i, int *v)`
			`{`
			`unsigned long tmp;`
			`int result;`

			`__asm__ __volatile__(`
			`" prfm pstl1strm, %2 \n"`
			`"1: ldaxr %w0, %2 \n"`
			`" add %w0, %w0, %w3 \n"`
			`" stlxr %w1, %w0, %2 \n"`
			`" cbnz %w1, 1b \n"`
			`" dmb ish \n"`
			`: "=&r" (result), "=&r" (tmp), "+Q" (*v)`
			`: "Ir" (i)`
			`: "memory");`

			`return result;`
			`}`

			`/* We are using load and store exclusive (ldaxr & stlxr) instructions to try`
			`* and help prevent the tst_atomic_load and, more likely, tst_atomic_store`
			`* functions from interfering with tst_atomic_add_return which takes advantage`
			`* of exclusivity. It is not clear if this is a good idea or not, but does`
			`* mean that all three functions are very similar.`
			`*/`
			`static inline int tst_atomic_load(int *v)`
			`{`
			`int ret;`
			`unsigned long tmp;`

			`asm volatile("//atomic_load \n"`
			`" prfm pstl1strm, %[v] \n"`
			`"1: ldaxr %w[ret], %[v] \n"`
			`" stlxr %w[tmp], %w[ret], %[v] \n"`
			`" cbnz %w[tmp], 1b \n"`
			`" dmb ish \n"`
			`: [tmp] "=&r" (tmp), [ret] "=&r" (ret), [v] "+Q" (*v)`
			`: : "memory");`

			`return ret;`
			`}`

			`static inline void tst_atomic_store(int i, int *v)`
			`{`
			`unsigned long tmp;`

			`asm volatile("//atomic_store \n"`
			`" prfm pstl1strm, %[v] \n"`
			`"1: ldaxr %w[tmp], %[v] \n"`
			`" stlxr %w[tmp], %w[i], %[v] \n"`
			`" cbnz %w[tmp], 1b \n"`
			`" dmb ish \n"`
			`: [tmp] "=&r" (tmp), [v] "+Q" (*v)`
			`: [i] "r" (i)`
			`: "memory");`
			`}`

			`#elif defined(__sparc__) && defined(__arch64__)`
			`# define LTP_USE_GENERIC_LOAD_STORE_ASM 1`
			`static inline int tst_atomic_add_return(int i, int *v)`
			`{`
			`int ret, tmp;`

			`/* Based on arch/sparc/lib/atomic_64.S with the exponential backoff`
			`* function removed because we are unlikely to have a large (>= 16?)`
			`* number of cores continuously trying to update one variable.`
			`*/`
			`asm volatile("/atomic_add_return/ \n"`
			`"1: ldsw [%[v]], %[ret]; \n"`
			`" add %[ret], %[i], %[tmp]; \n"`
			`" cas [%[v]], %[ret], %[tmp]; \n"`
			`" cmp %[ret], %[tmp]; \n"`
			`" bne,pn %%icc, 1b; \n"`
			`" nop; \n"`
			`" add %[ret], %[i], %[ret]; \n"`
			`: [ret] "=r&" (ret), [tmp] "=r&" (tmp)`
			`: [i] "r" (i), [v] "r" (v)`
			`: "memory", "cc");`

			`return ret;`
			`}`

			`#else /* HAVE_SYNC_ADD_AND_FETCH == 1 */`
			`# error Your compiler does not provide __atomic_add_fetch, __sync_add_and_fetch \`
			`and an LTP implementation is missing for your architecture.`
			`#endif`

			`#ifdef LTP_USE_GENERIC_LOAD_STORE_ASM`
			`static inline int tst_atomic_load(int *v)`
			`{`
			`int ret;`

			`asm volatile("" : : : "memory");`
			`ret = *v;`
			`asm volatile("" : : : "memory");`

			`return ret;`
			`}`

			`static inline void tst_atomic_store(int i, int *v)`
			`{`
			`asm volatile("" : : : "memory");`
			`*v = i;`
			`asm volatile("" : : : "memory");`
			`}`
			`#endif`

			`static inline int tst_atomic_inc(int *v)`
			`{`
			`return tst_atomic_add_return(1, v);`
			`}`

			`static inline int tst_atomic_dec(int *v)`
			`{`
			`return tst_atomic_add_return(-1, v);`
			`}`

			`#endif /* TST_ATOMIC_H__ */`