#pragma OPENCL EXTENSION cl_khr_local_int32_extended_atomics : enable

kernel void test_atomic_fn(global int *Src, global int *Dst,
                           volatile local int *Local){
  int tid = get_global_id(0), lid = get_local_id(0), gid = get_group_id(0);
  size_t numBits;
  // Copy from global source to local buffer.
  Local[lid] = Src[tid];
  barrier(CLK_LOCAL_MEM_FENCE);
  numBits = sizeof(Local[0]) * 8;
  int  bitIndex = tid - (gid * numBits);
  atomic_and(&Local[0], ~(1U << bitIndex));
  barrier(CLK_LOCAL_MEM_FENCE);
  Dst[gid] = Local[0];
}
