/*!
[config]
name: Vector load_half local float2,3,4,8,16
clc_version_min: 11

dimensions: 1
global_size: 1 0 0

[test]
name: vector load_half local float
kernel_name: vload_half_local
arg_in:  0 buffer half[2] 0 68
arg_out: 1 buffer float[2] 0  68

[test]
name: vector load_half local offset float
kernel_name: vload_half_local_offset
arg_in:  0 buffer half[3] 0 0 68
arg_out: 1 buffer float[2] 0  68

[test]
name: vector load_half local float2
kernel_name: vload_half2_local
arg_in:  0 buffer half[3] 0 28 46
arg_out: 1 buffer float2[2] 0 28 28 46

[test]
name: vector load_half local offset float2
kernel_name: vload_half2_local_offset
arg_in:  0 buffer half[5] 0 0 0 28 46
arg_out: 1 buffer float2[2] 0 28 28 46

[test]
name: vector load_half local float3
kernel_name: vload_half3_local
arg_in:  0 buffer half[4] 0 121 66 189
arg_out: 1 buffer float3[2] 0 121 66 121 66 189

[test]
name: vector load_half local offset float3
kernel_name: vload_half3_local_offset
arg_in:  0 buffer half[7] 0 0 0 0 121 66 189
arg_out: 1 buffer float3[2] 0 121 66 121 66 189

[test]
name: vector load_half local float4
kernel_name: vload_half4_local
arg_in:  0 buffer half[5] 0 120 155 52 202
arg_out: 1 buffer float4[2] 0 120 155 52 120 155 52 202

[test]
name: vector load_half local offset float4
kernel_name: vload_half4_local_offset
arg_in:  0 buffer half[9] 0 0 0 0 0 120 155 52 202
arg_out: 1 buffer float4[2] 0 120 155 52 120 155 52 202

[test]
name: vector load_half local float8
kernel_name: vload_half8_local
arg_in:  0 buffer half[9] 0 116 189 192 64 98 22 43 70
arg_out: 1 buffer float8[2] 0 116 189 192 64 98 22 43 116 189 192 64 98 22 43 70

[test]
name: vector load_half local offset float8
kernel_name: vload_half8_local_offset
arg_in:  0 buffer half[17] 0 0 0 0 0 0 0 0 0 116 189 192 64 98 22 43 70
arg_out: 1 buffer float8[2] 0 116 189 192 64 98 22 43 116 189 192 64 98 22 43 70

[test]
name: vector load_half local float16
kernel_name: vload_half16_local
arg_in:  0 buffer half[17] 0 185 240 246 145 213 116 228 2 209 132 121 113 5 151 154 171
arg_out: 1 buffer float16[2] 0 185 240 246 145 213 116 228 2 209 132 121 113 5 151 154 185 240 246 145 213 116 228 2 209 132 121 113 5 151 154 171

[test]
name: vector load_half local offset float16
kernel_name: vload_half16_local_offset
arg_in:  0 buffer half[33] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 185 240 246 145 213 116 228 2 209 132 121 113 5 151 154 171
arg_out: 1 buffer float16[2] 0 185 240 246 145 213 116 228 2 209 132 121 113 5 151 154 185 240 246 145 213 116 228 2 209 132 121 113 5 151 154 171

!*/

kernel void vload_half_local(global half *in,
                             global float *out) {
    volatile local short loc[2];
    for (int i = 0; i < 2; ++i)
        loc[i] = ((global short *)in)[i];

    out[0] = vload_half(0, (local half*)loc);
    out[1] = vload_half(0, (local half*)loc + 1);
}

kernel void vload_half_local_offset(global half *in,
                                    global float *out) {
    volatile local short loc[3];
    for (int i = 0; i < 3; ++i)
        loc[i] = ((global short *)in)[i];

    out[0] = vload_half(1, (local half*)loc);
    out[1] = vload_half(1, (local half*)loc + 1);
}

kernel void vload_half2_local(global half *in,
                             global float2 *out) {
    volatile local short loc[3];
    for (int i = 0; i < 3; ++i)
        loc[i] = ((global short *)in)[i];

    out[0] = vload_half2(0, (local half*)loc);
    out[1] = vload_half2(0, (local half*)loc + 1);
}

kernel void vload_half2_local_offset(global half *in,
                                    global float2 *out) {
    volatile local short loc[5];
    for (int i = 0; i < 5; ++i)
        loc[i] = ((global short *)in)[i];

    out[0] = vload_half2(1, (local half*)loc);
    out[1] = vload_half2(1, (local half*)loc + 1);
}

kernel void vload_half3_local(global half *in,
                             global float3 *out) {
    volatile local short loc[4];
    for (int i = 0; i < 4; ++i)
        loc[i] = ((global short *)in)[i];

    out[0] = vload_half3(0, (local half*)loc);
    out[1] = vload_half3(0, (local half*)loc + 1);
}

kernel void vload_half3_local_offset(global half *in,
                                    global float3 *out) {
    volatile local short loc[8];
    for (int i = 0; i < 8; ++i)
        loc[i] = ((global short *)in)[i];

    out[0] = vload_half3(1, (local half*)loc);
    out[1] = vload_half3(1, (local half*)loc + 1);
}

kernel void vload_half4_local(global half *in,
                             global float4 *out) {
    volatile local short loc[5];
    for (int i = 0; i < 5; ++i)
        loc[i] = ((global short *)in)[i];

    out[0] = vload_half4(0, (local half*)loc);
    out[1] = vload_half4(0, (local half*)loc + 1);
}

kernel void vload_half4_local_offset(global half *in,
                                    global float4 *out) {
    volatile local short loc[9];
    for (int i = 0; i < 9; ++i)
        loc[i] = ((global short *)in)[i];

    out[0] = vload_half4(1, (local half*)loc);
    out[1] = vload_half4(1, (local half*)loc + 1);
}

kernel void vload_half8_local(global half *in,
                             global float8 *out) {
    volatile local short loc[9];
    for (int i = 0; i < 9; ++i)
        loc[i] = ((global short *)in)[i];

    out[0] = vload_half8(0, (local half*)loc);
    out[1] = vload_half8(0, (local half*)loc + 1);
}

kernel void vload_half8_local_offset(global half *in,
                                    global float8 *out) {
    volatile local short loc[17];
    for (int i = 0; i < 17; ++i)
        loc[i] = ((global short *)in)[i];

    out[0] = vload_half8(1, (local half*)loc);
    out[1] = vload_half8(1, (local half*)loc + 1);
}

kernel void vload_half16_local(global half *in,
                             global float16 *out) {
    volatile local short loc[17];
    for (int i = 0; i < 17; ++i)
        loc[i] = ((global short *)in)[i];

    out[0] = vload_half16(0, (local half*)loc);
    out[1] = vload_half16(0, (local half*)loc + 1);
}

kernel void vload_half16_local_offset(global half *in,
                                    global float16 *out) {
    volatile local short loc[33];
    for (int i = 0; i < 33; ++i)
        loc[i] = ((global short *)in)[i];

    out[0] = vload_half16(1, (local half*)loc);
    out[1] = vload_half16(1, (local half*)loc + 1);
}
