/*!
[config]
name: Vector loada_half private float2,3,4,8,16
clc_version_min: 11

dimensions: 1
global_size: 1 0 0

[test]
name: vector loada_half private float2
kernel_name: vloada_half2_private
arg_in:  0 buffer half[4] 0 0 28 46
arg_out: 1 buffer float2[2] 0 0  28 46

[test]
name: vector loada_half private offset float2
kernel_name: vloada_half2_private_offset
arg_in:  0 buffer half[6] 0 0 0 0 28 46
arg_out: 1 buffer float2[2] 0 0  28 46

[test]
name: vector loada_half private float3
kernel_name: vloada_half3_private
arg_in:  0 buffer half[7] 0 0 0 0 121 66 189
arg_out: 1 buffer float3[2] 0 0 0  121 66 189

[test]
name: vector loada_half private offset float3
kernel_name: vloada_half3_private_offset
arg_in:  0 buffer half[11] 0 0 0 0 0 0 0 0 121 66 189
arg_out: 1 buffer float3[2] 0 0 0  121 66 189

[test]
name: vector loada_half private float4
kernel_name: vloada_half4_private
arg_in:  0 buffer half[8] 0 0 0 0 120 155 52 202
arg_out: 1 buffer float4[2] 0 0 0 0  120 155 52 202

[test]
name: vector loada_half private offset float4
kernel_name: vloada_half4_private_offset
arg_in:  0 buffer half[12] 0 0 0 0 0 0 0 0 120 155 52 202
arg_out: 1 buffer float4[2] 0 0 0 0  120 155 52 202

[test]
name: vector loada_half private float8
kernel_name: vloada_half8_private
arg_in:  0 buffer half[16] 0 0 0 0 0 0 0 0 116 189 192 64 98 22 43 70
arg_out: 1 buffer float8[2] 0 0 0 0 0 0 0 0  116 189 192 64 98 22 43 70

[test]
name: vector loada_half private offset float8
kernel_name: vloada_half8_private_offset
arg_in:  0 buffer half[24] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 116 189 192 64 98 22 43 70
arg_out: 1 buffer float8[2] 0 0 0 0 0 0 0 0  116 189 192 64 98 22 43 70

[test]
name: vector loada_half private float16
kernel_name: vloada_half16_private
arg_in:  0 buffer half[32] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 185 240 246 145 213 116 228 2 209 132 121 113 5 151 154 171
arg_out: 1 buffer float16[2] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0  185 240 246 145 213 116 228 2 209 132 121 113 5 151 154 171

[test]
name: vector loada_half private offset float16
kernel_name: vloada_half16_private_offset
arg_in:  0 buffer half[48] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 185 240 246 145 213 116 228 2 209 132 121 113 5 151 154 171
arg_out: 1 buffer float16[2] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0  185 240 246 145 213 116 228 2 209 132 121 113 5 151 154 171

!*/

kernel void vloada_half2_private(global half *in,
                             global float2 *out) {
    volatile private short loc[4];
    for (int i = 0; i < 4; ++i)
        loc[i] = ((global short *)in)[i];

    out[0] = vloada_half2(0, (private half*)loc);
    out[1] = vloada_half2(0, (private half*)loc + 2);
}

kernel void vloada_half2_private_offset(global half *in,
                                    global float2 *out) {
    volatile private short loc[6];
    for (int i = 0; i < 6; ++i)
        loc[i] = ((global short *)in)[i];

    out[0] = vloada_half2(1, (private half*)loc);
    out[1] = vloada_half2(1, (private half*)loc + 2);
}

kernel void vloada_half3_private(global half *in,
                             global float3 *out) {
    volatile private short loc[7];
    for (int i = 0; i < 7; ++i)
        loc[i] = ((global short *)in)[i];

    out[0] = vloada_half3(0, (private half*)loc);
    out[1] = vloada_half3(0, (private half*)loc + 4);
}

kernel void vloada_half3_private_offset(global half *in,
                                    global float3 *out) {
    volatile private short loc[11];
    for (int i = 0; i < 11; ++i)
        loc[i] = ((global short *)in)[i];

    out[0] = vloada_half3(1, (private half*)loc);
    out[1] = vloada_half3(1, (private half*)loc + 4);
}

kernel void vloada_half4_private(global half *in,
                             global float4 *out) {
    volatile private short loc[8];
    for (int i = 0; i < 8; ++i)
        loc[i] = ((global short *)in)[i];

    out[0] = vloada_half4(0, (private half*)loc);
    out[1] = vloada_half4(0, (private half*)loc + 4);
}

kernel void vloada_half4_private_offset(global half *in,
                                    global float4 *out) {
    volatile private short loc[12];
    for (int i = 0; i < 12; ++i)
        loc[i] = ((global short *)in)[i];

    out[0] = vloada_half4(1, (private half*)loc);
    out[1] = vloada_half4(1, (private half*)loc + 4);
}

kernel void vloada_half8_private(global half *in,
                             global float8 *out) {
    volatile private short loc[16];
    for (int i = 0; i < 16; ++i)
        loc[i] = ((global short *)in)[i];

    out[0] = vloada_half8(0, (private half*)loc);
    out[1] = vloada_half8(0, (private half*)loc + 8);
}

kernel void vloada_half8_private_offset(global half *in,
                                    global float8 *out) {
    volatile private short loc[24];
    for (int i = 0; i < 24; ++i)
        loc[i] = ((global short *)in)[i];

    out[0] = vloada_half8(1, (private half*)loc);
    out[1] = vloada_half8(1, (private half*)loc + 8);
}

kernel void vloada_half16_private(global half *in,
                             global float16 *out) {
    volatile private short loc[32];
    for (int i = 0; i < 32; ++i)
        loc[i] = ((global short *)in)[i];

    out[0] = vloada_half16(0, (private half*)loc);
    out[1] = vloada_half16(0, (private half*)loc + 16);
}

kernel void vloada_half16_private_offset(global half *in,
                                    global float16 *out) {
    volatile private short loc[48];
    for (int i = 0; i < 48; ++i)
        loc[i] = ((global short *)in)[i];

    out[0] = vloada_half16(1, (private half*)loc);
    out[1] = vloada_half16(1, (private half*)loc + 16);
}
