#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable
__kernel void test_atomic_fn(__global uint *src, __global uint *G,
  volatile __local uint *L)
{
  int tid = get_global_id(0), gid = get_group_id(0), lid = get_local_id(0);
  L[gid] = src[gid];
  barrier(CLK_LOCAL_MEM_FENCE);
  atomic_add(&L[gid], 2);
  barrier(CLK_GLOBAL_MEM_FENCE);
  // Finally, write out the last value. Again, we're synced, so everyone will be writing the same value
  G[gid] = L[gid];
}
