/* { dg-do compile } */
/* { dg-options "-O2" } */
/* { dg-require-effective-target lp64 } */

#include <stdint.h>

/* One byte variable set should be scalar
**set1byte:
**	strb	w1, \[x0\]
**	ret
*/
void __attribute__((__noinline__))
set1byte (int64_t *src, char c)
{
  __builtin_memset (src, c, 1);
}

/* Special cases for setting 0.  */
/* 1-byte should be STRB with wzr
**set0byte:
**	strb	wzr, \[x0\]
**	ret
*/
void __attribute__((__noinline__))
set0byte (int64_t *src)
{
  __builtin_memset (src, 0, 1);
}

/* 35bytes would become 4 scalar instructions.  So favour NEON.
**set0neon:
**	movi	v0.4s, 0
**	stp	q0, q0, \[x0\]
**	str	wzr, \[x0, 31\]
**	ret
*/
void __attribute__((__noinline__))
set0neon (int64_t *src)
{
  __builtin_memset (src, 0, 35);
}

/* 36bytes should be scalar however.
**set0scalar:
**	stp	xzr, xzr, \[x0\]
**	stp	xzr, xzr, \[x0, 16\]
**	str	wzr, \[x0, 32\]
**	ret
*/
void __attribute__((__noinline__))
set0scalar (int64_t *src)
{
  __builtin_memset (src, 0, 36);
}


/* 256-bytes expanded
**set256byte:
**	dup	v0.16b, w1
**	stp	q0, q0, \[x0\]
**	stp	q0, q0, \[x0, 32\]
**	stp	q0, q0, \[x0, 64\]
**	stp	q0, q0, \[x0, 96\]
**	stp	q0, q0, \[x0, 128\]
**	stp	q0, q0, \[x0, 160\]
**	stp	q0, q0, \[x0, 192\]
**	stp	q0, q0, \[x0, 224\]
**	ret
*/
void __attribute__((__noinline__))
set256byte (int64_t *src, char c)
{
  __builtin_memset (src, c, 256);
}

/* More than 256 bytes goes to memset
**set257byte:
**	mov	x2, 257
**	mov	w1, 99
**	b	memset
*/
void __attribute__((__noinline__))
set257byte (int64_t *src)
{
  __builtin_memset (src, 'c', 257);
}

/* { dg-final { check-function-bodies "**" "" "" } } */
