#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable

kernel void test_atomic_fn(volatile global int *G, local int *L, local int *S) {
  int  lid = get_local_id(0), tid = get_global_id(0), gid = get_group_id(0);
  int oldValue, newValue;
  int numIters, i;

  if (gid)
    return;

  // Copy from global to local.
  L[lid] = G[tid];
  barrier(CLK_LOCAL_MEM_FENCE);

  do {
    oldValue = L[0];
    newValue = oldValue + 1;
    oldValue = atomic_cmpxchg(&L[0], oldValue, newValue);
  } while (oldValue == L[0]);

  // Assign the old value, if it is safe to do so.
  L[lid] = oldValue;
  barrier(CLK_LOCAL_MEM_FENCE);

  // Only one WI continues.
  if (lid)
    return;

  *S = 0;
  // Aggregate all the values.
  for (i=0; i<get_local_size(0) ;i++)
    *S += L[i];
}
