/* { dg-do run } */
/* { dg-options "-O2 -mavx5124vnniw" } */
/* { dg-require-effective-target avx5124vnniw } */

#define DEFAULT_VALUE 0x7ffffffe

#define AVX5124VNNIW
#include "avx512f-helper.h"

#define SIZE (AVX512F_LEN / 32)

#include "avx512f-mask-type.h"

void
CALC (short *src1, short* src2, short *src3,
      short *src4, int* prev_dst, short *mult, int *dst)
{
  int i;

  for (i = 0; i < SIZE; i++)
    {
      int p1dword, p2dword;
      long long int tmp;
      dst[i] = prev_dst[i];
      p1dword = (int)(src1[2*i  ]) * (int)(mult[0]);
      p2dword = (int)(src1[2*i+1]) * (int)(mult[1]);
      tmp = (long long)dst[i] + p1dword + p2dword;
      if (tmp > 0x7fffffff)
	dst[i] = 0x7fffffff;
      else
	dst[i] += p1dword + p2dword;

      p1dword = (int)(src2[2*i  ]) * (int)(mult[2]);
      p2dword = (int)(src2[2*i+1]) * (int)(mult[3]);
      tmp = (long long)dst[i] + p1dword + p2dword;
      if (tmp > 0x7fffffff)
	dst[i] = 0x7fffffff;
      else
	dst[i] += p1dword + p2dword;

      p1dword = (int)(src3[2*i  ]) * (int)(mult[4]);
      p2dword = (int)(src3[2*i+1]) * (int)(mult[5]);
      tmp = (long long)dst[i] + p1dword + p2dword;
      if (tmp > 0x7fffffff)
	dst[i] = 0x7fffffff;
      else
	dst[i] += p1dword + p2dword;

      p1dword = (int)(src4[2*i  ]) * (int)(mult[6]);
      p2dword = (int)(src4[2*i+1]) * (int)(mult[7]);
      tmp = (long long)dst[i] + p1dword + p2dword;
      if (tmp > 0x7fffffff)
	dst[i] = 0x7fffffff;
      else
	dst[i] += p1dword + p2dword;
    }
}

void
TEST (void)
{
  int i;
  UNION_TYPE (AVX512F_LEN, i_w) src1, src2, src3, src4;
  UNION_TYPE (AVX512F_LEN, i_d) src5, dst, res1, res2, res3;
  UNION_TYPE (128, i_w) mult;
  MASK_TYPE mask = MASK_VALUE;
  int res_ref[SIZE];

  for (i = 0; i < SIZE * 2; i++)
    {
      src1.a[i] = 2 + 7 * i % 291;
      src2.a[i] = 3 + 11 * (i % 377) * i;
      src3.a[i] = src1.a[i] * src1.a[i];
      src4.a[i] = src2.a[i] * src2.a[i];
    }
  for (i = 0; i < 8; i++)
    mult.a[i] = 3 + i * 2;

  for (i = 0; i < SIZE; i++)
    src5.a[i] = DEFAULT_VALUE;

  CALC (src1.a, src2.a, src3.a, src4.a, src5.a, mult.a, res_ref);

  res1.x = INTRINSIC (_4dpwssds_epi32)	     (      src5.x, src1.x, src2.x, src3.x, src4.x, &mult.x);
  res2.x = INTRINSIC (_mask_4dpwssds_epi32)  (src5.x, mask, src1.x, src2.x, src3.x, src4.x, &mult.x);
  res3.x = INTRINSIC (_maskz_4dpwssds_epi32) (mask, src5.x, src1.x, src2.x, src3.x, src4.x, &mult.x);

  if (UNION_CHECK (AVX512F_LEN, i_d) (res1, res_ref))
    abort ();

  MASK_MERGE (i_d) (res_ref, mask, SIZE);
  if (UNION_CHECK (AVX512F_LEN, i_d) (res2, res_ref))
    abort ();

  MASK_ZERO (i_d) (res_ref, mask, SIZE);
  if (UNION_CHECK (AVX512F_LEN, i_d) (res3, res_ref))
    abort ();
}
