
__kernel void test_fn( const __global long8 *src, __global long8 *dst, __local long8 *localBuffer, int copiesPerWorkgroup, int copiesPerWorkItem, int stride )
{
 int i;
 copiesPerWorkgroup = copiesPerWorkItem = stride = 1;
 for(i=0; i<copiesPerWorkItem; i++)
   localBuffer[ get_local_id( 0 )*copiesPerWorkItem+i ] = (long8)(long)0;
 barrier( CLK_LOCAL_MEM_FENCE );
 event_t event;
 event = async_work_group_strided_copy( (__local long8*)localBuffer, (__global const long8*)(src+copiesPerWorkgroup*stride*get_group_id(0)), (size_t)copiesPerWorkgroup, (size_t)stride, 0 );
 wait_group_events( 1, &event );
 for(i=0; i<copiesPerWorkItem; i++)
   dst[ get_global_id( 0 )*copiesPerWorkItem*stride+i*stride ] = localBuffer[ get_local_id( 0 )*copiesPerWorkItem+i ];
}
