/*!
[config]
name: Vector storea_half local float2,3,4,8,16
clc_version_min: 11

dimensions: 1
global_size: 1 0 0

[test]
name: vector storea_half local float2
kernel_name: vstorea_half2_local
arg_out: 0 buffer half[5] 0 0 28 46 0xdeadp1
arg_in: 0 buffer half[5] 0 0 0 0  0xdeadp1
arg_in:  1 buffer float2[1] 28 46

[test]
name: vector storea_half local offset float2
kernel_name: vstorea_half2_local_offset
arg_out: 0 buffer half[7] 0 0  28 46  28 46 0xdeadp1
arg_in: 0 buffer half[7] 0 0 0 0 0 0  0xdeadp1
arg_in: 1 buffer float2[1] 28 46

[test]
name: vector storea_half local float3
kernel_name: vstorea_half3_local
arg_out: 0 buffer half[8] 0 0 0 0 121 66 189 0xdeadp1
arg_in: 0 buffer half[8] 0 0 0 0 0 0 0  0xdeadp1
arg_in:  1 buffer float3[1] 121 66 189

[test]
name: vector storea_half local offset float3
kernel_name: vstorea_half3_local_offset
arg_out: 0 buffer half[12] 0 0 0 0  121 66 189 0  121 66 189 0xdeadp1
arg_in: 0 buffer half[12] 0 0 0 0 0 0 0 0 0 0 0  0xdeadp1
arg_in: 1 buffer float3[1] 121 66 189

[test]
name: vector storea_half local float4
kernel_name: vstorea_half4_local
arg_out: 0 buffer half[9] 0 0 0 0 120 155 52 202 0xdeadp1
arg_in: 0 buffer half[9] 0 0 0 0 0 0 0 0  0xdeadp1
arg_in:  1 buffer float4[1] 120 155 52 202

[test]
name: vector storea_half local offset float4
kernel_name: vstorea_half4_local_offset
arg_out: 0 buffer half[13] 0 0 0 0  120 155 52 202  120 155 52 202 0xdeadp1
arg_in: 0 buffer half[13] 0 0 0 0 0 0 0 0 0 0 0 0  0xdeadp1
arg_in: 1 buffer float4[1] 120 155 52 202

[test]
name: vector storea_half local float8
kernel_name: vstorea_half8_local
arg_out: 0 buffer half[17] 0 0 0 0 0 0 0 0 116 189 192 64 98 22 43 70 0xdeadp1
arg_in: 0 buffer half[17] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0  0xdeadp1
arg_in:  1 buffer float8[1] 116 189 192 64 98 22 43 70

[test]
name: vector storea_half local offset float8
kernel_name: vstorea_half8_local_offset
arg_out: 0 buffer half[25] 0 0 0 0 0 0 0 0  116 189 192 64 98 22 43 70  116 189 192 64 98 22 43 70 0xdeadp1
arg_in: 0 buffer half[25] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0  0xdeadp1
arg_in: 1 buffer float8[1] 116 189 192 64 98 22 43 70

[test]
name: vector storea_half local float16
kernel_name: vstorea_half16_local
arg_out: 0 buffer half[33] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 185 240 246 145 213 116 228 2 209 132 121 113 5 151 154 171 0xdeadp1
arg_in: 0 buffer half[33] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0  0xdeadp1
arg_in:  1 buffer float16[1] 185 240 246 145 213 116 228 2 209 132 121 113 5 151 154 171

[test]
name: vector storea_half local offset float16
kernel_name: vstorea_half16_local_offset
arg_out: 0 buffer half[49] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0  185 240 246 145 213 116 228 2 209 132 121 113 5 151 154 171  185 240 246 145 213 116 228 2 209 132 121 113 5 151 154 171 0xdeadp1
arg_in: 0 buffer half[49] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0  0xdeadp1
arg_in: 1 buffer float16[1] 185 240 246 145 213 116 228 2 209 132 121 113 5 151 154 171

!*/

kernel void vstorea_half2_local(global half *out,
                             global float2 *in) {
    float2 tmp = in[0];
    volatile local short loc[4];
    for (int i = 0; i < 4; ++i)
        loc[i] = 0;

    vstorea_half2(tmp, 0, (local half*)loc + 2);

    for (int i = 0; i < 4; ++i)
        ((global short *)out)[i] = loc[i];
}

kernel void vstorea_half2_local_offset(global half *out,
                                    global float2 *in) {
    float2 tmp = in[0];
    volatile local short loc[6];
    for (int i = 0; i < 6; ++i)
        loc[i] = 0;

    vstorea_half2(tmp, 0, (local half*)loc + 2);
    vstorea_half2(tmp, 1, (local half*)loc + 2);

    for (int i = 0; i < 6; ++i)
        ((global short *)out)[i] = loc[i];
}

kernel void vstorea_half3_local(global half *out,
                             global float3 *in) {
    float3 tmp = in[0];
    volatile local short loc[7];
    for (int i = 0; i < 7; ++i)
        loc[i] = 0;

    vstorea_half3(tmp, 0, (local half*)loc + 4);

    for (int i = 0; i < 7; ++i)
        ((global short *)out)[i] = loc[i];
}

kernel void vstorea_half3_local_offset(global half *out,
                                    global float3 *in) {
    float3 tmp = in[0];
    volatile local short loc[11];
    for (int i = 0; i < 11; ++i)
        loc[i] = 0;

    vstorea_half3(tmp, 0, (local half*)loc + 4);
    vstorea_half3(tmp, 1, (local half*)loc + 4);

    for (int i = 0; i < 11; ++i)
        ((global short *)out)[i] = loc[i];
}

kernel void vstorea_half4_local(global half *out,
                             global float4 *in) {
    float4 tmp = in[0];
    volatile local short loc[8];
    for (int i = 0; i < 8; ++i)
        loc[i] = 0;

    vstorea_half4(tmp, 0, (local half*)loc + 4);

    for (int i = 0; i < 8; ++i)
        ((global short *)out)[i] = loc[i];
}

kernel void vstorea_half4_local_offset(global half *out,
                                    global float4 *in) {
    float4 tmp = in[0];
    volatile local short loc[12];
    for (int i = 0; i < 12; ++i)
        loc[i] = 0;

    vstorea_half4(tmp, 0, (local half*)loc + 4);
    vstorea_half4(tmp, 1, (local half*)loc + 4);

    for (int i = 0; i < 12; ++i)
        ((global short *)out)[i] = loc[i];
}

kernel void vstorea_half8_local(global half *out,
                             global float8 *in) {
    float8 tmp = in[0];
    volatile local short loc[16];
    for (int i = 0; i < 16; ++i)
        loc[i] = 0;

    vstorea_half8(tmp, 0, (local half*)loc + 8);

    for (int i = 0; i < 16; ++i)
        ((global short *)out)[i] = loc[i];
}

kernel void vstorea_half8_local_offset(global half *out,
                                    global float8 *in) {
    float8 tmp = in[0];
    volatile local short loc[24];
    for (int i = 0; i < 24; ++i)
        loc[i] = 0;

    vstorea_half8(tmp, 0, (local half*)loc + 8);
    vstorea_half8(tmp, 1, (local half*)loc + 8);

    for (int i = 0; i < 24; ++i)
        ((global short *)out)[i] = loc[i];
}

kernel void vstorea_half16_local(global half *out,
                             global float16 *in) {
    float16 tmp = in[0];
    volatile local short loc[32];
    for (int i = 0; i < 32; ++i)
        loc[i] = 0;

    vstorea_half16(tmp, 0, (local half*)loc + 16);

    for (int i = 0; i < 32; ++i)
        ((global short *)out)[i] = loc[i];
}

kernel void vstorea_half16_local_offset(global half *out,
                                    global float16 *in) {
    float16 tmp = in[0];
    volatile local short loc[48];
    for (int i = 0; i < 48; ++i)
        loc[i] = 0;

    vstorea_half16(tmp, 0, (local half*)loc + 16);
    vstorea_half16(tmp, 1, (local half*)loc + 16);

    for (int i = 0; i < 48; ++i)
        ((global short *)out)[i] = loc[i];
}
