/* { dg-do compile } */
/* { dg-options "-O" } */
/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */

#include <arm_neon.h>

void ext();

/*
** s32_1:
**	fmov	s0, w0
**	ins	v0\.s\[1\], w1
**	ret
*/
int32x2_t s32_1(int32_t a0, int32_t a1) {
  if (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
    return (int32x2_t) { a1, a0 };
  else
    return (int32x2_t) { a0, a1 };
}
/*
** s32_2:
**	fmov	s0, w0
**	ld1	{v0\.s}\[1\], \[x1\]
**	ret
*/
int32x2_t s32_2(int32_t a0, int32_t *ptr) {
  if (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
    return (int32x2_t) { ptr[0], a0 };
  else
    return (int32x2_t) { a0, ptr[0] };
}
/*
** s32_3:
**	ldr	s0, \[x0\]
**	ins	v0\.s\[1\], w1
**	ret
*/
int32x2_t s32_3(int32_t *ptr, int32_t a1) {
  if (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
    return (int32x2_t) { a1, ptr[0] };
  else
    return (int32x2_t) { ptr[0], a1 };
}
/*
** s32_4:
**	stp	w1, w2, \[x0\]
**	ret
*/
void s32_4(int32x2_t *res, int32_t a0, int32_t a1) {
  res[0] = (int32x2_t) { a0, a1 };
}
/*
** s32_5:
**	stp	w1, w2, \[x0, #?4\]
**	ret
*/
void s32_5(uintptr_t res, int32_t a0, int32_t a1) {
  *(int32x2_t *)(res + 4) = (int32x2_t) { a0, a1 };
}
/* Currently uses d8 to hold res across the call.  */
int32x2_t s32_6(int32_t a0, int32_t a1) {
  int32x2_t res = { a0, a1 };
  ext ();
  return res;
}

/*
** f32_1:
**	ins	v0\.s\[1\], v1\.s\[0\]
**	ret
*/
float32x2_t f32_1(float32_t a0, float32_t a1) {
  if (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
    return (float32x2_t) { a1, a0 };
  else
    return (float32x2_t) { a0, a1 };
}
/*
** f32_2:
**	ld1	{v0\.s}\[1\], \[x0\]
**	ret
*/
float32x2_t f32_2(float32_t a0, float32_t *ptr) {
  if (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
    return (float32x2_t) { ptr[0], a0 };
  else
    return (float32x2_t) { a0, ptr[0] };
}
/*
** f32_3:
**	ldr	s0, \[x0\]
**	ins	v0\.s\[1\], v1\.s\[0\]
**	ret
*/
float32x2_t f32_3(float32_t a0, float32_t a1, float32_t *ptr) {
  if (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
    return (float32x2_t) { a1, ptr[0] };
  else
    return (float32x2_t) { ptr[0], a1 };
}
/*
** f32_4:
**	stp	s0, s1, \[x0\]
**	ret
*/
void f32_4(float32x2_t *res, float32_t a0, float32_t a1) {
  res[0] = (float32x2_t) { a0, a1 };
}
/*
** f32_5:
**	stp	s0, s1, \[x0, #?4\]
**	ret
*/
void f32_5(uintptr_t res, float32_t a0, float32_t a1) {
  *(float32x2_t *)(res + 4) = (float32x2_t) { a0, a1 };
}
/* Currently uses d8 to hold res across the call.  */
float32x2_t f32_6(float32_t a0, float32_t a1) {
  float32x2_t res = { a0, a1 };
  ext ();
  return res;
}
