/*!
[config]
name: Vector load private double2,3,4,8,16
clc_version_min: 11

dimensions: 1
global_size: 1 0 0
require_device_extensions: cl_khr_fp64
[test]
name: vector load private double2
kernel_name: vload2_private
arg_in:  0 buffer double[3] 0 28 46
arg_out: 1 buffer double2[2] 0 28 28 46

[test]
name: vector load private offset double2
kernel_name: vload2_private_offset
arg_in:  0 buffer double[5] 0 0 0 28 46
arg_out: 1 buffer double2[2] 0 28 28 46

[test]
name: vector load private double3
kernel_name: vload3_private
arg_in:  0 buffer double[4] 0 121 66 189
arg_out: 1 buffer double3[2] 0 121 66 121 66 189

[test]
name: vector load private offset double3
kernel_name: vload3_private_offset
arg_in:  0 buffer double[7] 0 0 0 0 121 66 189
arg_out: 1 buffer double3[2] 0 121 66 121 66 189

[test]
name: vector load private double4
kernel_name: vload4_private
arg_in:  0 buffer double[5] 0 120 155 52 202
arg_out: 1 buffer double4[2] 0 120 155 52 120 155 52 202

[test]
name: vector load private offset double4
kernel_name: vload4_private_offset
arg_in:  0 buffer double[9] 0 0 0 0 0 120 155 52 202
arg_out: 1 buffer double4[2] 0 120 155 52 120 155 52 202

[test]
name: vector load private double8
kernel_name: vload8_private
arg_in:  0 buffer double[9] 0 116 189 192 64 98 22 43 70
arg_out: 1 buffer double8[2] 0 116 189 192 64 98 22 43 116 189 192 64 98 22 43 70

[test]
name: vector load private offset double8
kernel_name: vload8_private_offset
arg_in:  0 buffer double[17] 0 0 0 0 0 0 0 0 0 116 189 192 64 98 22 43 70
arg_out: 1 buffer double8[2] 0 116 189 192 64 98 22 43 116 189 192 64 98 22 43 70

[test]
name: vector load private double16
kernel_name: vload16_private
arg_in:  0 buffer double[17] 0 185 240 246 145 213 116 228 2 209 132 121 113 5 151 154 171
arg_out: 1 buffer double16[2] 0 185 240 246 145 213 116 228 2 209 132 121 113 5 151 154 185 240 246 145 213 116 228 2 209 132 121 113 5 151 154 171

[test]
name: vector load private offset double16
kernel_name: vload16_private_offset
arg_in:  0 buffer double[33] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 185 240 246 145 213 116 228 2 209 132 121 113 5 151 154 171
arg_out: 1 buffer double16[2] 0 185 240 246 145 213 116 228 2 209 132 121 113 5 151 154 185 240 246 145 213 116 228 2 209 132 121 113 5 151 154 171

!*/

#pragma OPENCL EXTENSION cl_khr_fp64: enable

kernel void vload2_private(global double *in,
                             global double2 *out) {
    volatile private double loc[3];
    for (int i = 0; i < 3; ++i)
        loc[i] = in[i];

    out[0] = vload2(0, (private double*)loc);
    out[1] = vload2(0, (private double*)loc + 1);
}

kernel void vload2_private_offset(global double *in,
                                    global double2 *out) {
    volatile private double loc[5];
    for (int i = 0; i < 5; ++i)
        loc[i] = in[i];

    out[0] = vload2(1, (private double*)loc);
    out[1] = vload2(1, (private double*)loc + 1);
}

kernel void vload3_private(global double *in,
                             global double3 *out) {
    volatile private double loc[4];
    for (int i = 0; i < 4; ++i)
        loc[i] = in[i];

    out[0] = vload3(0, (private double*)loc);
    out[1] = vload3(0, (private double*)loc + 1);
}

kernel void vload3_private_offset(global double *in,
                                    global double3 *out) {
    volatile private double loc[8];
    for (int i = 0; i < 8; ++i)
        loc[i] = in[i];

    out[0] = vload3(1, (private double*)loc);
    out[1] = vload3(1, (private double*)loc + 1);
}

kernel void vload4_private(global double *in,
                             global double4 *out) {
    volatile private double loc[5];
    for (int i = 0; i < 5; ++i)
        loc[i] = in[i];

    out[0] = vload4(0, (private double*)loc);
    out[1] = vload4(0, (private double*)loc + 1);
}

kernel void vload4_private_offset(global double *in,
                                    global double4 *out) {
    volatile private double loc[9];
    for (int i = 0; i < 9; ++i)
        loc[i] = in[i];

    out[0] = vload4(1, (private double*)loc);
    out[1] = vload4(1, (private double*)loc + 1);
}

kernel void vload8_private(global double *in,
                             global double8 *out) {
    volatile private double loc[9];
    for (int i = 0; i < 9; ++i)
        loc[i] = in[i];

    out[0] = vload8(0, (private double*)loc);
    out[1] = vload8(0, (private double*)loc + 1);
}

kernel void vload8_private_offset(global double *in,
                                    global double8 *out) {
    volatile private double loc[17];
    for (int i = 0; i < 17; ++i)
        loc[i] = in[i];

    out[0] = vload8(1, (private double*)loc);
    out[1] = vload8(1, (private double*)loc + 1);
}

kernel void vload16_private(global double *in,
                             global double16 *out) {
    volatile private double loc[17];
    for (int i = 0; i < 17; ++i)
        loc[i] = in[i];

    out[0] = vload16(0, (private double*)loc);
    out[1] = vload16(0, (private double*)loc + 1);
}

kernel void vload16_private_offset(global double *in,
                                    global double16 *out) {
    volatile private double loc[33];
    for (int i = 0; i < 33; ++i)
        loc[i] = in[i];

    out[0] = vload16(1, (private double*)loc);
    out[1] = vload16(1, (private double*)loc + 1);
}
