[mirror_ubuntu-hirsute-kernel.git] / arch / mips / include / asm / sync.h

/* SPDX-License-Identifier: GPL-2.0-only */
#ifndef __MIPS_ASM_SYNC_H__
#define __MIPS_ASM_SYNC_H__

/*
 * sync types are defined by the MIPS64 Instruction Set documentation in Volume
 * II-A of the MIPS Architecture Reference Manual, which can be found here:
 *
 *   https://www.mips.com/?do-download=the-mips64-instruction-set-v6-06
 *
 * Two types of barrier are provided:
 *
 *   1) Completion barriers, which ensure that a memory operation has actually
 *      completed & often involve stalling the CPU pipeline to do so.
 *
 *   2) Ordering barriers, which only ensure that affected memory operations
 *      won't be reordered in the CPU pipeline in a manner that violates the
 *      restrictions imposed by the barrier.
 *
 * Ordering barriers can be more efficient than completion barriers, since:
 *
 *   a) Ordering barriers only require memory access instructions which preceed
 *      them in program order (older instructions) to reach a point in the
 *      load/store datapath beyond which reordering is not possible before
 *      allowing memory access instructions which follow them (younger
 *      instructions) to be performed.  That is, older instructions don't
 *      actually need to complete - they just need to get far enough that all
 *      other coherent CPUs will observe their completion before they observe
 *      the effects of younger instructions.
 *
 *   b) Multiple variants of ordering barrier are provided which allow the
 *      effects to be restricted to different combinations of older or younger
 *      loads or stores. By way of example, if we only care that stores older
 *      than a barrier are observed prior to stores that are younger than a
 *      barrier & don't care about the ordering of loads then the 'wmb'
 *      ordering barrier can be used. Limiting the barrier's effects to stores
 *      allows loads to continue unaffected & potentially allows the CPU to
 *      make progress faster than if younger loads had to wait for older stores
 *      to complete.
 */

/*
 * No sync instruction at all; used to allow code to nullify the effect of the
 * __SYNC() macro without needing lots of #ifdefery.
 */
#define __SYNC_none	-1

/*
 * A full completion barrier; all memory accesses appearing prior to this sync
 * instruction in program order must complete before any memory accesses
 * appearing after this sync instruction in program order.
 */
#define __SYNC_full	0x00

/*
 * For now we use a full completion barrier to implement all sync types, until
 * we're satisfied that lightweight ordering barriers defined by MIPSr6 are
 * sufficient to uphold our desired memory model.
 */
#define __SYNC_aq	__SYNC_full
#define __SYNC_rl	__SYNC_full
#define __SYNC_mb	__SYNC_full

/*
 * ...except on Cavium Octeon CPUs, which have been using the 'wmb' ordering
 * barrier since 2010 & omit 'rmb' barriers because the CPUs don't perform
 * speculative reads.
 */
#ifdef CONFIG_CPU_CAVIUM_OCTEON
# define __SYNC_rmb	__SYNC_none
# define __SYNC_wmb	0x04
#else
# define __SYNC_rmb	__SYNC_full
# define __SYNC_wmb	__SYNC_full
#endif

/*
 * A GINV sync is a little different; it doesn't relate directly to loads or
 * stores, but instead causes synchronization of an icache or TLB global
 * invalidation operation triggered by the ginvi or ginvt instructions
 * respectively. In cases where we need to know that a ginvi or ginvt operation
 * has been performed by all coherent CPUs, we must issue a sync instruction of
 * this type. Once this instruction graduates all coherent CPUs will have
 * observed the invalidation.
 */
#define __SYNC_ginv	0x14

/* Trivial; indicate that we always need this sync instruction. */
#define __SYNC_always	(1 << 0)

/*
 * Indicate that we need this sync instruction only on systems with weakly
 * ordered memory access. In general this is most MIPS systems, but there are
 * exceptions which provide strongly ordered memory.
 */
#ifdef CONFIG_WEAK_ORDERING
# define __SYNC_weak_ordering	(1 << 1)
#else
# define __SYNC_weak_ordering	0
#endif

/*
 * Indicate that we need this sync instruction only on systems where LL/SC
 * don't implicitly provide a memory barrier. In general this is most MIPS
 * systems.
 */
#ifdef CONFIG_WEAK_REORDERING_BEYOND_LLSC
# define __SYNC_weak_llsc	(1 << 2)
#else
# define __SYNC_weak_llsc	0
#endif

/*
 * Some Loongson 3 CPUs have a bug wherein execution of a memory access (load,
 * store or prefetch) in between an LL & SC can cause the SC instruction to
 * erroneously succeed, breaking atomicity. Whilst it's unusual to write code
 * containing such sequences, this bug bites harder than we might otherwise
 * expect due to reordering & speculation:
 *
 * 1) A memory access appearing prior to the LL in program order may actually
 *    be executed after the LL - this is the reordering case.
 *
 *    In order to avoid this we need to place a memory barrier (ie. a SYNC
 *    instruction) prior to every LL instruction, in between it and any earlier
 *    memory access instructions.
 *
 *    This reordering case is fixed by 3A R2 CPUs, ie. 3A2000 models and later.
 *
 * 2) If a conditional branch exists between an LL & SC with a target outside
 *    of the LL-SC loop, for example an exit upon value mismatch in cmpxchg()
 *    or similar, then misprediction of the branch may allow speculative
 *    execution of memory accesses from outside of the LL-SC loop.
 *
 *    In order to avoid this we need a memory barrier (ie. a SYNC instruction)
 *    at each affected branch target.
 *
 *    This case affects all current Loongson 3 CPUs.
 *
 * The above described cases cause an error in the cache coherence protocol;
 * such that the Invalidate of a competing LL-SC goes 'missing' and SC
 * erroneously observes its core still has Exclusive state and lets the SC
 * proceed.
 *
 * Therefore the error only occurs on SMP systems.
 */
#ifdef CONFIG_CPU_LOONGSON3_WORKAROUNDS
# define __SYNC_loongson3_war	(1 << 31)
#else
# define __SYNC_loongson3_war	0
#endif

/*
 * Some Cavium Octeon CPUs suffer from a bug that causes a single wmb ordering
 * barrier to be ineffective, requiring the use of 2 in sequence to provide an
 * effective barrier as noted by commit 6b07d38aaa52 ("MIPS: Octeon: Use
 * optimized memory barrier primitives."). Here we specify that the affected
 * sync instructions should be emitted twice.
 * Note that this expression is evaluated by the assembler (not the compiler),
 * and that the assembler evaluates '==' as 0 or -1, not 0 or 1.
 */
#ifdef CONFIG_CPU_CAVIUM_OCTEON
# define __SYNC_rpt(type)	(1 - (type == __SYNC_wmb))
#else
# define __SYNC_rpt(type)	1
#endif

/*
 * The main event. Here we actually emit a sync instruction of a given type, if
 * reason is non-zero.
 *
 * In future we have the option of emitting entries in a fixups-style table
 * here that would allow us to opportunistically remove some sync instructions
 * when we detect at runtime that we're running on a CPU that doesn't need
 * them.
 */
#ifdef CONFIG_CPU_HAS_SYNC
# define ____SYNC(_type, _reason, _else)			\
	.if	(( _type ) != -1) && ( _reason );		\
	.set	push;						\
	.set	MIPS_ISA_LEVEL_RAW;				\
	.rept	__SYNC_rpt(_type);				\
	sync	_type;						\
	.endr;							\
	.set	pop;						\
	.else;							\
	_else;							\
	.endif
#else
# define ____SYNC(_type, _reason, _else)
#endif

/*
 * Preprocessor magic to expand macros used as arguments before we insert them
 * into assembly code.
 */
#ifdef __ASSEMBLY__
# define ___SYNC(type, reason, else)				\
	____SYNC(type, reason, else)
#else
# define ___SYNC(type, reason, else)				\
	__stringify(____SYNC(type, reason, else))
#endif

#define __SYNC(type, reason)					\
	___SYNC(__SYNC_##type, __SYNC_##reason, )
#define __SYNC_ELSE(type, reason, else)				\
	___SYNC(__SYNC_##type, __SYNC_##reason, else)

#endif /* __MIPS_ASM_SYNC_H__ */
Commit	Line	Data
bf929272 PB	1	/* SPDX-License-Identifier: GPL-2.0-only */
	2	#ifndef __MIPS_ASM_SYNC_H__
	3	#define __MIPS_ASM_SYNC_H__
	4
	5	/*
	6	* sync types are defined by the MIPS64 Instruction Set documentation in Volume
	7	* II-A of the MIPS Architecture Reference Manual, which can be found here:
	8	*
	9	* https://www.mips.com/?do-download=the-mips64-instruction-set-v6-06
	10	*
	11	* Two types of barrier are provided:
	12	*
	13	* 1) Completion barriers, which ensure that a memory operation has actually
	14	* completed & often involve stalling the CPU pipeline to do so.
	15	*
	16	* 2) Ordering barriers, which only ensure that affected memory operations
	17	* won't be reordered in the CPU pipeline in a manner that violates the
	18	* restrictions imposed by the barrier.
	19	*
	20	* Ordering barriers can be more efficient than completion barriers, since:
	21	*
	22	* a) Ordering barriers only require memory access instructions which preceed
	23	* them in program order (older instructions) to reach a point in the
	24	* load/store datapath beyond which reordering is not possible before
	25	* allowing memory access instructions which follow them (younger
	26	* instructions) to be performed. That is, older instructions don't
	27	* actually need to complete - they just need to get far enough that all
	28	* other coherent CPUs will observe their completion before they observe
	29	* the effects of younger instructions.
	30	*
	31	* b) Multiple variants of ordering barrier are provided which allow the
	32	* effects to be restricted to different combinations of older or younger
	33	* loads or stores. By way of example, if we only care that stores older
	34	* than a barrier are observed prior to stores that are younger than a
	35	* barrier & don't care about the ordering of loads then the 'wmb'
	36	* ordering barrier can be used. Limiting the barrier's effects to stores
	37	* allows loads to continue unaffected & potentially allows the CPU to
	38	* make progress faster than if younger loads had to wait for older stores
	39	* to complete.
	40	*/
	41
	42	/*
	43	* No sync instruction at all; used to allow code to nullify the effect of the
	44	* __SYNC() macro without needing lots of #ifdefery.
	45	*/
	46	#define __SYNC_none -1
	47
	48	/*
	49	* A full completion barrier; all memory accesses appearing prior to this sync
	50	* instruction in program order must complete before any memory accesses
	51	* appearing after this sync instruction in program order.
	52	*/
	53	#define __SYNC_full 0x00
	54
	55	/*
	56	* For now we use a full completion barrier to implement all sync types, until
	57	* we're satisfied that lightweight ordering barriers defined by MIPSr6 are
	58	* sufficient to uphold our desired memory model.
	59	*/
	60	#define __SYNC_aq __SYNC_full
	61	#define __SYNC_rl __SYNC_full
	62	#define __SYNC_mb __SYNC_full
	63
	64	/*
65	* ...except on Cavium Octeon CPUs, which have been using the 'wmb' ordering
66	* barrier since 2010 & omit 'rmb' barriers because the CPUs don't perform
67	* speculative reads.
68	*/
69	#ifdef CONFIG_CPU_CAVIUM_OCTEON
70	# define __SYNC_rmb __SYNC_none
71	# define __SYNC_wmb 0x04
72	#else
73	# define __SYNC_rmb __SYNC_full
74	# define __SYNC_wmb __SYNC_full
75	#endif
76
77	/*
78	* A GINV sync is a little different; it doesn't relate directly to loads or
79	* stores, but instead causes synchronization of an icache or TLB global
80	* invalidation operation triggered by the ginvi or ginvt instructions
81	* respectively. In cases where we need to know that a ginvi or ginvt operation
82	* has been performed by all coherent CPUs, we must issue a sync instruction of
83	* this type. Once this instruction graduates all coherent CPUs will have
84	* observed the invalidation.
85	*/
86	#define __SYNC_ginv 0x14
87
88	/* Trivial; indicate that we always need this sync instruction. */
89	#define __SYNC_always (1 << 0)
90
91	/*
92	* Indicate that we need this sync instruction only on systems with weakly
93	* ordered memory access. In general this is most MIPS systems, but there are
94	* exceptions which provide strongly ordered memory.
95	*/
96	#ifdef CONFIG_WEAK_ORDERING
97	# define __SYNC_weak_ordering (1 << 1)
98	#else
99	# define __SYNC_weak_ordering 0
100	#endif
101
102	/*
103	* Indicate that we need this sync instruction only on systems where LL/SC
104	* don't implicitly provide a memory barrier. In general this is most MIPS
105	* systems.
106	*/
107	#ifdef CONFIG_WEAK_REORDERING_BEYOND_LLSC
108	# define __SYNC_weak_llsc (1 << 2)
109	#else
110	# define __SYNC_weak_llsc 0
111	#endif
112
113	/*
114	* Some Loongson 3 CPUs have a bug wherein execution of a memory access (load,
115	* store or prefetch) in between an LL & SC can cause the SC instruction to
116	* erroneously succeed, breaking atomicity. Whilst it's unusual to write code
117	* containing such sequences, this bug bites harder than we might otherwise
118	* expect due to reordering & speculation:
119	*
120	* 1) A memory access appearing prior to the LL in program order may actually
121	* be executed after the LL - this is the reordering case.
122	*
123	* In order to avoid this we need to place a memory barrier (ie. a SYNC
124	* instruction) prior to every LL instruction, in between it and any earlier
125	* memory access instructions.
126	*
127	* This reordering case is fixed by 3A R2 CPUs, ie. 3A2000 models and later.
128	*
129	* 2) If a conditional branch exists between an LL & SC with a target outside
130	* of the LL-SC loop, for example an exit upon value mismatch in cmpxchg()
131	* or similar, then misprediction of the branch may allow speculative
132	* execution of memory accesses from outside of the LL-SC loop.
133	*
134	* In order to avoid this we need a memory barrier (ie. a SYNC instruction)
135	* at each affected branch target.
136	*
137	* This case affects all current Loongson 3 CPUs.
138	*
139	* The above described cases cause an error in the cache coherence protocol;
140	* such that the Invalidate of a competing LL-SC goes 'missing' and SC
141	* erroneously observes its core still has Exclusive state and lets the SC
142	* proceed.
143	*
144	* Therefore the error only occurs on SMP systems.
145	*/
146	#ifdef CONFIG_CPU_LOONGSON3_WORKAROUNDS
147	# define __SYNC_loongson3_war (1 << 31)
148	#else
149	# define __SYNC_loongson3_war 0
150	#endif
151
152	/*
153	* Some Cavium Octeon CPUs suffer from a bug that causes a single wmb ordering
154	* barrier to be ineffective, requiring the use of 2 in sequence to provide an
155	* effective barrier as noted by commit 6b07d38aaa52 ("MIPS: Octeon: Use
156	* optimized memory barrier primitives."). Here we specify that the affected
157	* sync instructions should be emitted twice.
97e914b7 MT	158	* Note that this expression is evaluated by the assembler (not the compiler),
97e914b7 MT	159	* and that the assembler evaluates '==' as 0 or -1, not 0 or 1.
bf929272 PB	160	*/
bf929272 PB	161	#ifdef CONFIG_CPU_CAVIUM_OCTEON
97e914b7	162	# define __SYNC_rpt(type) (1 - (type == __SYNC_wmb))
bf929272 PB	163	#else
	164	# define __SYNC_rpt(type) 1
	165	#endif
	166
	167	/*
	168	* The main event. Here we actually emit a sync instruction of a given type, if
	169	* reason is non-zero.
	170	*
	171	* In future we have the option of emitting entries in a fixups-style table
	172	* here that would allow us to opportunistically remove some sync instructions
	173	* when we detect at runtime that we're running on a CPU that doesn't need
	174	* them.
	175	*/
	176	#ifdef CONFIG_CPU_HAS_SYNC
	177	# define ____SYNC(_type, _reason, _else) \
	178	.if (( _type ) != -1) && ( _reason ); \
	179	.set push; \
	180	.set MIPS_ISA_LEVEL_RAW; \
	181	.rept __SYNC_rpt(_type); \
	182	sync _type; \
	183	.endr; \
	184	.set pop; \
	185	.else; \
	186	_else; \
	187	.endif
	188	#else
	189	# define ____SYNC(_type, _reason, _else)
	190	#endif
	191
	192	/*
	193	* Preprocessor magic to expand macros used as arguments before we insert them
	194	* into assembly code.
	195	*/
	196	#ifdef __ASSEMBLY__
	197	# define ___SYNC(type, reason, else) \
	198	____SYNC(type, reason, else)
	199	#else
	200	# define ___SYNC(type, reason, else) \
	201	__stringify(____SYNC(type, reason, else))
	202	#endif
	203
	204	#define __SYNC(type, reason) \
	205	___SYNC(__SYNC_##type, __SYNC_##reason, )
	206	#define __SYNC_ELSE(type, reason, else) \
	207	___SYNC(__SYNC_##type, __SYNC_##reason, else)
	208
	209	#endif /* __MIPS_ASM_SYNC_H__ */