[mirror_ubuntu-artful-kernel.git] / arch / sh / include / asm / unaligned-sh4a.h

#ifndef __ASM_SH_UNALIGNED_SH4A_H
#define __ASM_SH_UNALIGNED_SH4A_H

/*
 * SH-4A has support for unaligned 32-bit loads, and 32-bit loads only.
 * Support for 64-bit accesses are done through shifting and masking
 * relative to the endianness. Unaligned stores are not supported by the
 * instruction encoding, so these continue to use the packed
 * struct.
 *
 * The same note as with the movli.l/movco.l pair applies here, as long
 * as the load is guaranteed to be inlined, nothing else will hook in to
 * r0 and we get the return value for free.
 *
 * NOTE: Due to the fact we require r0 encoding, care should be taken to
 * avoid mixing these heavily with other r0 consumers, such as the atomic
 * ops. Failure to adhere to this can result in the compiler running out
 * of spill registers and blowing up when building at low optimization
 * levels. See http://gcc.gnu.org/bugzilla/show_bug.cgi?id=34777.
 */
#include <linux/unaligned/packed_struct.h>
#include <linux/types.h>
#include <asm/byteorder.h>

static inline u16 sh4a_get_unaligned_cpu16(const u8 *p)
{
#ifdef __LITTLE_ENDIAN
	return p[0] | p[1] << 8;
#else
	return p[0] << 8 | p[1];
#endif
}

static __always_inline u32 sh4a_get_unaligned_cpu32(const u8 *p)
{
	unsigned long unaligned;

	__asm__ __volatile__ (
		"movua.l	@%1, %0\n\t"
		 : "=z" (unaligned)
		 : "r" (p)
	);

	return unaligned;
}

/*
 * Even though movua.l supports auto-increment on the read side, it can
 * only store to r0 due to instruction encoding constraints, so just let
 * the compiler sort it out on its own.
 */
static inline u64 sh4a_get_unaligned_cpu64(const u8 *p)
{
#ifdef __LITTLE_ENDIAN
	return (u64)sh4a_get_unaligned_cpu32(p + 4) << 32 |
		    sh4a_get_unaligned_cpu32(p);
#else
	return (u64)sh4a_get_unaligned_cpu32(p) << 32 |
		    sh4a_get_unaligned_cpu32(p + 4);
#endif
}

static inline u16 get_unaligned_le16(const void *p)
{
	return le16_to_cpu(sh4a_get_unaligned_cpu16(p));
}

static inline u32 get_unaligned_le32(const void *p)
{
	return le32_to_cpu(sh4a_get_unaligned_cpu32(p));
}

static inline u64 get_unaligned_le64(const void *p)
{
	return le64_to_cpu(sh4a_get_unaligned_cpu64(p));
}

static inline u16 get_unaligned_be16(const void *p)
{
	return be16_to_cpu(sh4a_get_unaligned_cpu16(p));
}

static inline u32 get_unaligned_be32(const void *p)
{
	return be32_to_cpu(sh4a_get_unaligned_cpu32(p));
}

static inline u64 get_unaligned_be64(const void *p)
{
	return be64_to_cpu(sh4a_get_unaligned_cpu64(p));
}

static inline void nonnative_put_le16(u16 val, u8 *p)
{
	*p++ = val;
	*p++ = val >> 8;
}

static inline void nonnative_put_le32(u32 val, u8 *p)
{
	nonnative_put_le16(val, p);
	nonnative_put_le16(val >> 16, p + 2);
}

static inline void nonnative_put_le64(u64 val, u8 *p)
{
	nonnative_put_le32(val, p);
	nonnative_put_le32(val >> 32, p + 4);
}

static inline void nonnative_put_be16(u16 val, u8 *p)
{
	*p++ = val >> 8;
	*p++ = val;
}

static inline void nonnative_put_be32(u32 val, u8 *p)
{
	nonnative_put_be16(val >> 16, p);
	nonnative_put_be16(val, p + 2);
}

static inline void nonnative_put_be64(u64 val, u8 *p)
{
	nonnative_put_be32(val >> 32, p);
	nonnative_put_be32(val, p + 4);
}

static inline void put_unaligned_le16(u16 val, void *p)
{
#ifdef __LITTLE_ENDIAN
	__put_unaligned_cpu16(val, p);
#else
	nonnative_put_le16(val, p);
#endif
}

static inline void put_unaligned_le32(u32 val, void *p)
{
#ifdef __LITTLE_ENDIAN
	__put_unaligned_cpu32(val, p);
#else
	nonnative_put_le32(val, p);
#endif
}

static inline void put_unaligned_le64(u64 val, void *p)
{
#ifdef __LITTLE_ENDIAN
	__put_unaligned_cpu64(val, p);
#else
	nonnative_put_le64(val, p);
#endif
}

static inline void put_unaligned_be16(u16 val, void *p)
{
#ifdef __BIG_ENDIAN
	__put_unaligned_cpu16(val, p);
#else
	nonnative_put_be16(val, p);
#endif
}

static inline void put_unaligned_be32(u32 val, void *p)
{
#ifdef __BIG_ENDIAN
	__put_unaligned_cpu32(val, p);
#else
	nonnative_put_be32(val, p);
#endif
}

static inline void put_unaligned_be64(u64 val, void *p)
{
#ifdef __BIG_ENDIAN
	__put_unaligned_cpu64(val, p);
#else
	nonnative_put_be64(val, p);
#endif
}

/*
 * While it's a bit non-obvious, even though the generic le/be wrappers
 * use the __get/put_xxx prefixing, they actually wrap in to the
 * non-prefixed get/put_xxx variants as provided above.
 */
#include <linux/unaligned/generic.h>

#ifdef __LITTLE_ENDIAN
# define get_unaligned __get_unaligned_le
# define put_unaligned __put_unaligned_le
#else
# define get_unaligned __get_unaligned_be
# define put_unaligned __put_unaligned_be
#endif

#endif /* __ASM_SH_UNALIGNED_SH4A_H */
Commit	Line	Data
95b781c2 PM	1	#ifndef __ASM_SH_UNALIGNED_SH4A_H
	2	#define __ASM_SH_UNALIGNED_SH4A_H
	3
	4	/*
	5	* SH-4A has support for unaligned 32-bit loads, and 32-bit loads only.
48c72fcc MD	6	* Support for 64-bit accesses are done through shifting and masking
	7	* relative to the endianness. Unaligned stores are not supported by the
	8	* instruction encoding, so these continue to use the packed
95b781c2 PM	9	* struct.
	10	*
	11	* The same note as with the movli.l/movco.l pair applies here, as long
25985edc	12	* as the load is guaranteed to be inlined, nothing else will hook in to
95b781c2 PM	13	* r0 and we get the return value for free.
	14	*
	15	* NOTE: Due to the fact we require r0 encoding, care should be taken to
	16	* avoid mixing these heavily with other r0 consumers, such as the atomic
	17	* ops. Failure to adhere to this can result in the compiler running out
	18	* of spill registers and blowing up when building at low optimization
	19	* levels. See http://gcc.gnu.org/bugzilla/show_bug.cgi?id=34777.
	20	*/
1dee92bb	21	#include <linux/unaligned/packed_struct.h>
95b781c2 PM	22	#include <linux/types.h>
	23	#include <asm/byteorder.h>
	24
1dee92bb PM	25	static inline u16 sh4a_get_unaligned_cpu16(const u8 *p)
	26	{
	27	#ifdef __LITTLE_ENDIAN
	28	return p[0] \| p[1] << 8;
	29	#else
	30	return p[0] << 8 \| p[1];
	31	#endif
	32	}
	33
	34	static __always_inline u32 sh4a_get_unaligned_cpu32(const u8 *p)
95b781c2 PM	35	{
	36	unsigned long unaligned;
	37
	38	__asm__ __volatile__ (
	39	"movua.l @%1, %0\n\t"
	40	: "=z" (unaligned)
	41	: "r" (p)
	42	);
	43
	44	return unaligned;
	45	}
	46
95b781c2 PM	47	/*
	48	* Even though movua.l supports auto-increment on the read side, it can
	49	* only store to r0 due to instruction encoding constraints, so just let
	50	* the compiler sort it out on its own.
	51	*/
1dee92bb	52	static inline u64 sh4a_get_unaligned_cpu64(const u8 *p)
95b781c2 PM	53	{
95b781c2 PM	54	#ifdef __LITTLE_ENDIAN
1dee92bb PM	55	return (u64)sh4a_get_unaligned_cpu32(p + 4) << 32 \|
1dee92bb PM	56	sh4a_get_unaligned_cpu32(p);
95b781c2	57	#else
1dee92bb PM	58	return (u64)sh4a_get_unaligned_cpu32(p) << 32 \|
1dee92bb PM	59	sh4a_get_unaligned_cpu32(p + 4);
95b781c2 PM	60	#endif
	61	}
	62
	63	static inline u16 get_unaligned_le16(const void *p)
	64	{
1dee92bb	65	return le16_to_cpu(sh4a_get_unaligned_cpu16(p));
95b781c2 PM	66	}
	67
	68	static inline u32 get_unaligned_le32(const void *p)
	69	{
1dee92bb	70	return le32_to_cpu(sh4a_get_unaligned_cpu32(p));
95b781c2 PM	71	}
	72
	73	static inline u64 get_unaligned_le64(const void *p)
	74	{
1dee92bb	75	return le64_to_cpu(sh4a_get_unaligned_cpu64(p));
95b781c2 PM	76	}
	77
	78	static inline u16 get_unaligned_be16(const void *p)
	79	{
1dee92bb	80	return be16_to_cpu(sh4a_get_unaligned_cpu16(p));
95b781c2 PM	81	}
	82
	83	static inline u32 get_unaligned_be32(const void *p)
	84	{
1dee92bb	85	return be32_to_cpu(sh4a_get_unaligned_cpu32(p));
95b781c2 PM	86	}
	87
	88	static inline u64 get_unaligned_be64(const void *p)
	89	{
1dee92bb	90	return be64_to_cpu(sh4a_get_unaligned_cpu64(p));
95b781c2 PM	91	}
95b781c2 PM	92
1dee92bb	93	static inline void nonnative_put_le16(u16 val, u8 *p)
95b781c2 PM	94	{
	95	*p++ = val;
	96	*p++ = val >> 8;
	97	}
	98
1dee92bb	99	static inline void nonnative_put_le32(u32 val, u8 *p)
95b781c2	100	{
1dee92bb PM	101	nonnative_put_le16(val, p);
1dee92bb PM	102	nonnative_put_le16(val >> 16, p + 2);
95b781c2 PM	103	}
95b781c2 PM	104
1dee92bb	105	static inline void nonnative_put_le64(u64 val, u8 *p)
95b781c2	106	{
1dee92bb PM	107	nonnative_put_le32(val, p);
1dee92bb PM	108	nonnative_put_le32(val >> 32, p + 4);
95b781c2 PM	109	}
95b781c2 PM	110
1dee92bb	111	static inline void nonnative_put_be16(u16 val, u8 *p)
95b781c2 PM	112	{
	113	*p++ = val >> 8;
	114	*p++ = val;
	115	}
	116
1dee92bb	117	static inline void nonnative_put_be32(u32 val, u8 *p)
95b781c2	118	{
1dee92bb PM	119	nonnative_put_be16(val >> 16, p);
1dee92bb PM	120	nonnative_put_be16(val, p + 2);
95b781c2 PM	121	}
95b781c2 PM	122
1dee92bb	123	static inline void nonnative_put_be64(u64 val, u8 *p)
95b781c2	124	{
1dee92bb PM	125	nonnative_put_be32(val >> 32, p);
1dee92bb PM	126	nonnative_put_be32(val, p + 4);
95b781c2 PM	127	}
	128
	129	static inline void put_unaligned_le16(u16 val, void *p)
	130	{
	131	#ifdef __LITTLE_ENDIAN
1dee92bb	132	__put_unaligned_cpu16(val, p);
95b781c2	133	#else
1dee92bb	134	nonnative_put_le16(val, p);
95b781c2 PM	135	#endif
	136	}
	137
	138	static inline void put_unaligned_le32(u32 val, void *p)
	139	{
	140	#ifdef __LITTLE_ENDIAN
1dee92bb	141	__put_unaligned_cpu32(val, p);
95b781c2	142	#else
1dee92bb	143	nonnative_put_le32(val, p);
95b781c2 PM	144	#endif
	145	}
	146
	147	static inline void put_unaligned_le64(u64 val, void *p)
	148	{
	149	#ifdef __LITTLE_ENDIAN
1dee92bb	150	__put_unaligned_cpu64(val, p);
95b781c2	151	#else
1dee92bb	152	nonnative_put_le64(val, p);
95b781c2 PM	153	#endif
	154	}
	155
	156	static inline void put_unaligned_be16(u16 val, void *p)
	157	{
	158	#ifdef __BIG_ENDIAN
1dee92bb	159	__put_unaligned_cpu16(val, p);
95b781c2	160	#else
1dee92bb	161	nonnative_put_be16(val, p);
95b781c2 PM	162	#endif
	163	}
	164
	165	static inline void put_unaligned_be32(u32 val, void *p)
	166	{
	167	#ifdef __BIG_ENDIAN
1dee92bb	168	__put_unaligned_cpu32(val, p);
95b781c2	169	#else
1dee92bb	170	nonnative_put_be32(val, p);
95b781c2 PM	171	#endif
	172	}
	173
	174	static inline void put_unaligned_be64(u64 val, void *p)
	175	{
	176	#ifdef __BIG_ENDIAN
1dee92bb	177	__put_unaligned_cpu64(val, p);
95b781c2	178	#else
1dee92bb	179	nonnative_put_be64(val, p);
95b781c2 PM	180	#endif
	181	}
	182
	183	/*
1dee92bb PM	184	* While it's a bit non-obvious, even though the generic le/be wrappers
	185	* use the __get/put_xxx prefixing, they actually wrap in to the
	186	* non-prefixed get/put_xxx variants as provided above.
95b781c2	187	*/
1dee92bb	188	#include <linux/unaligned/generic.h>
95b781c2 PM	189
	190	#ifdef __LITTLE_ENDIAN
	191	# define get_unaligned __get_unaligned_le
	192	# define put_unaligned __put_unaligned_le
	193	#else
	194	# define get_unaligned __get_unaligned_be
	195	# define put_unaligned __put_unaligned_be
	196	#endif
	197
	198	#endif /* __ASM_SH_UNALIGNED_SH4A_H */