/*******************************************************************************
* Copyright (C) 2018 Intel Corporation
*
* This software and the related documents are Intel copyrighted  materials,  and
* your use of  them is  governed by the  express license  under which  they were
* provided to you (License).  Unless the License provides otherwise, you may not
* use, modify, copy, publish, distribute,  disclose or transmit this software or
* the related documents without Intel's prior written permission.
*
* This software and the related documents  are provided as  is,  with no express
* or implied  warranties,  other  than those  that are  expressly stated  in the
* License.
*******************************************************************************/

/*
 *
 *  Content:
 *            Touch test for all available oneMKL VM OMP offlaod math functions
 *            with ILP64 API:
 *
 *            HA (High Accuracy), LA (Low Accuracy), EP (Enhanced Performance)
 *            single, double, complex precision function diffferent variants:
 *            basic, explicit mode, strided and strided with explicit mode
 *            are being called.
 *
 *******************************************************************************/

#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdint.h>
#include <float.h>
#include <complex.h>
#include <omp.h>
#if (defined __INTEL_COMPILER) || (defined __INTEL_LLVM_COMPILER)
  #include <mathimf.h>
#else
  #include <math.h>
#endif

#include "mkl.h"
#include "mkl_omp_offload.h"

/**
 * Common constants:
 */
/* oneMKL VM vector length - number of inputs to be evaluated */
static const int64_t length = 1000;

/* Stride values for strided API tests */
static const int64_t stride = 3;

/* Maximum printed number of errors for each function - to reduce log size */
static const int max_printed = 1;

/* Maximum allowed ulp's (+1.0 due to rounded reference double precision function) */
static const float  s_allowed_ulp[] = { 1.0f, 4.0f, 5000.0f };
static const float  c_allowed_ulp[] = { 1.0f, 4.0f, 9000.0f };
static const double d_allowed_ulp[] = { 1.0 + 1.0,   4.0 + 1.0,  7.0e7 };
static const double z_allowed_ulp[] = { 1.0 + 1.0,   4.0 + 1.0,  1.0e8 };

/* Offload device number */
static const int devnum = 0;

/* Fixed argument value */
static const double fixed = 3.14;

/* Enable errors printout */
static const int print_err = 1;

/* Enable warnings printout */
static const int print_warn = 0;

/**
 * Mapping to oneMKL VM accuracy mode constants:
 */
static const unsigned int vm_mode[] = {VML_HA, VML_LA, VML_EP};

/* Maximum function full name length */
#define NAME_LEN 64

/**
 * Available function API variants:
 */
enum VmApi
{
    kFunc = 0,   /* Basic */
    kMFunc,      /* Explicit mode */
    kFuncI,      /* Strided */
    kMFuncI,     /* Strided with explicit mode */
    kApiNum      /* Number of API variants */
};

/**
 * Available function accuracies:
 */
enum VmAccuracy
{
    kHA = 0,  /* HA */
    kLA,      /* LA */
    kEP,      /* EP */
    kAccNum   /* Number of accuracies */
};

/**
 * Available function precisions:
 */
enum VmPrecision
{
    kSP = 0,  /* Single precision */
    kDP,      /* Double precision */
    kCP,      /* Complex single precision */
    kZP,      /* Complex double precision */
};

/**
 * Available function argument layouts:
 */
enum VmLayout
{
    kVtoV = 0,     /* 1 vector argument  -> 1 vector result */
    kVVtoV,        /* 2 vector arguments -> 1 vector result */
    kVtoVV,        /* 1 vector argument  -> 2 vector results */
    kVXtoV,        /* 1 vector and 1 fixed arguments -> 1 vector result */
    kVVXtoV,       /* 2 vector and 4 fixed arguments -> 1 vector result */
    kVCtoVR,       /* 1 complex argument  -> 1 real result */
    kVRtoVC,       /* 1 real argument  -> 1 complex result */
};

/**
 * Input arguments:
 */
typedef struct
{
    float          *sarg1, *sarg2;
    double         *darg1, *darg2;
    float complex  *carg1, *carg2;
    double complex *zarg1, *zarg2;

    double *sref1, *sref2;
    double *dref1, *dref2;
    double complex *cref1, *cref2;
    double complex *zref1, *zref2;
} VmInputData;

/**
 * Output results:
 */
typedef struct
{
    float          *sres1[kApiNum], *sres2[kApiNum];
    double         *dres1[kApiNum], *dres2[kApiNum];
    float complex  *cres1[kApiNum], *cres2[kApiNum];
    double complex *zres1[kApiNum], *zres2[kApiNum];
} VmOutputData;

/**
 * Types for pointers to real reference functions :
 */
typedef double (*RefVtoV)(double);
typedef double (*RefVVtoV)(double, double);
typedef void   (*RefVtoVV)(double, double*, double* );
typedef double (*RefVVXtoV)(double, double, double, double, double, double );

/**
 * Types for pointers to complex reference functions :
 */
typedef double complex (*CRefVtoV)(double complex);
typedef double         (*CRefCtoR)(double complex);
typedef double complex (*CRefRtoC)(double);
typedef double complex (*CRefVVtoV)(double complex, double complex);

/**
 * Type for pointer to VM functions launcher:
 */
typedef void (*VmFunc)(int, VmInputData*, VmOutputData*);

/**
 * @brief VM functions family launchers
 *
 * Run all variants of VM function family
 *
 * @param[in] acc          Accuracy
 * @param[in] in           Input and reference resutl arrays
 * @param[out] out         Output arrays
 *
 */
 /**
 * Abs
 */
static void own_vm_abs (int acc, VmInputData* in, VmOutputData* out)
{

    float  *sarg = in->sarg1, *sres = out->sres1[kFunc], *sires = out->sres1[kFuncI], *smres = out->sres1[kMFunc], *smires = out->sres1[kMFuncI];
    double *darg = in->darg1, *dres = out->dres1[kFunc], *dires = out->dres1[kFuncI], *dmres = out->dres1[kMFunc], *dmires = out->dres1[kMFuncI];
    MKL_Complex8  *carg  = (MKL_Complex8*)in->carg1,  *cres = (MKL_Complex8*)out->cres1[kFunc],  *cires = (MKL_Complex8*)out->cres1[kFuncI];
    MKL_Complex8  *cmres = (MKL_Complex8*)out->cres1[kMFunc],  *cmires = (MKL_Complex8*)out->cres1[kMFuncI];
    MKL_Complex16 *zarg  = (MKL_Complex16*)in->zarg1, *zres = (MKL_Complex16*)out->zres1[kFunc], *zires = (MKL_Complex16*)out->zres1[kFuncI];
    MKL_Complex16 *zmres = (MKL_Complex16*)out->zres1[kMFunc], *zmires = (MKL_Complex16*)out->zres1[kMFuncI];

    #pragma omp target data map(to:sarg[0:length]) map(to:darg[0:length]) \
                            map(to:carg[0:length]) map(to:zarg[0:length]) \
                            map(tofrom:sres[0:length]) map(tofrom:sires[0:length]) map(tofrom:smres[0:length]) map(tofrom:smires[0:length]) \
                            map(tofrom:dres[0:length]) map(tofrom:dires[0:length]) map(tofrom:dmres[0:length]) map(tofrom:dmires[0:length]) \
                            map(tofrom:cres[0:length]) map(tofrom:cires[0:length]) map(tofrom:cmres[0:length]) map(tofrom:cmires[0:length]) \
                            map(tofrom:zres[0:length]) map(tofrom:zires[0:length]) map(tofrom:zmres[0:length]) map(tofrom:zmires[0:length]) \
                            device(devnum)
    {
        int mode = vm_mode[acc];

        #pragma omp dispatch device(devnum)
        vmlSetMode(mode);

        #pragma omp dispatch device(devnum)
        vsAbs_64   (length, sarg, sres);
        #pragma omp dispatch device(devnum)
        vsAbsI_64  (length/stride, sarg, stride, sires, stride);
        #pragma omp dispatch device(devnum)
        vdAbs_64   (length, darg, dres);
        #pragma omp dispatch device(devnum)
        vdAbsI_64  (length/stride, darg, stride, dires, stride);
        #pragma omp dispatch device(devnum)
        vcAbs_64   (length, carg, (float*)cres);
        #pragma omp dispatch device(devnum)
        vcAbsI_64  (length/stride, carg, stride, (float*)cires, stride);
        #pragma omp dispatch device(devnum)
        vzAbs_64   (length, zarg, (double*)zres);
        #pragma omp dispatch device(devnum)
        vzAbsI_64  (length/stride, zarg, stride, (double*)zires, stride);

        #pragma omp dispatch device(devnum)
        vmsAbs_64  (length, sarg, smres, mode);
        #pragma omp dispatch device(devnum)
        vmsAbsI_64 (length/stride, sarg, stride, smires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmdAbs_64  (length, darg, dmres, mode);
        #pragma omp dispatch device(devnum)
        vmdAbsI_64 (length/stride, darg, stride, dmires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmcAbs_64  (length, carg, (float*)cmres, mode);
        #pragma omp dispatch device(devnum)
        vmcAbsI_64 (length/stride, carg, stride, (float*)cmires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmzAbs_64  (length, zarg, (double*)zmres, mode);
        #pragma omp dispatch device(devnum)
        vmzAbsI_64 (length/stride, zarg, stride, (double*)zmires, stride, mode);

    }
    // #pragma omp taskwait

    return;
} /* own_vm_abs */

 /**
 * Arg
 */
static void own_vm_arg (int acc, VmInputData* in, VmOutputData* out)
{

    MKL_Complex8  *carg  = (MKL_Complex8*)in->carg1,  *cres = (MKL_Complex8*)out->cres1[kFunc],  *cires = (MKL_Complex8*)out->cres1[kFuncI];
    MKL_Complex8  *cmres = (MKL_Complex8*)out->cres1[kMFunc],  *cmires = (MKL_Complex8*)out->cres1[kMFuncI];
    MKL_Complex16 *zarg  = (MKL_Complex16*)in->zarg1, *zres = (MKL_Complex16*)out->zres1[kFunc], *zires = (MKL_Complex16*)out->zres1[kFuncI];
    MKL_Complex16 *zmres = (MKL_Complex16*)out->zres1[kMFunc], *zmires = (MKL_Complex16*)out->zres1[kMFuncI];

    #pragma omp target data map(to:carg[0:length]) map(to:zarg[0:length]) \
                            map(tofrom:cres[0:length]) map(tofrom:cires[0:length]) map(tofrom:cmres[0:length]) map(tofrom:cmires[0:length]) \
                            map(tofrom:zres[0:length]) map(tofrom:zires[0:length]) map(tofrom:zmres[0:length]) map(tofrom:zmires[0:length]) \
                            device(devnum)
    {
        int mode = vm_mode[acc];

        #pragma omp dispatch device(devnum)
        vmlSetMode(mode);

        #pragma omp dispatch device(devnum)
        vcArg_64   (length, carg, (float*)cres);
        #pragma omp dispatch device(devnum)
        vcArgI_64  (length/stride, carg, stride, (float*)cires, stride);
        #pragma omp dispatch device(devnum)
        vzArg_64   (length, zarg, (double*)zres);
        #pragma omp dispatch device(devnum)
        vzArgI_64  (length/stride, zarg, stride, (double*)zires, stride);

        #pragma omp dispatch device(devnum)
        vmcArg_64  (length, carg, (float*)cmres, mode);
        #pragma omp dispatch device(devnum)
        vmcArgI_64 (length/stride, carg, stride, (float*)cmires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmzArg_64  (length, zarg, (double*)zmres, mode);
        #pragma omp dispatch device(devnum)
        vmzArgI_64 (length/stride, zarg, stride, (double*)zmires, stride, mode);

    }
    // #pragma omp taskwait

    return;
} /* own_vm_arg */

 /**
 * CIS
 */
static void own_vm_cis (int acc, VmInputData* in, VmOutputData* out)
{

    float  *sarg = in->sarg1;
    double *darg = in->darg1;
    MKL_Complex8  *cres   = (MKL_Complex8*)out->cres1[kFunc],  *cires   = (MKL_Complex8*)out->cres1[kFuncI];
    MKL_Complex8  *cmres  = (MKL_Complex8*)out->cres1[kMFunc], *cmires  = (MKL_Complex8*)out->cres1[kMFuncI];
    MKL_Complex16 *zres   = (MKL_Complex16*)out->zres1[kFunc], *zires   = (MKL_Complex16*)out->zres1[kFuncI];
    MKL_Complex16 *zmres  = (MKL_Complex16*)out->zres1[kMFunc], *zmires = (MKL_Complex16*)out->zres1[kMFuncI];

    #pragma omp target data map(to:sarg[0:length]) map(to:darg[0:length]) \
                            map(tofrom:cres[0:length]) map(tofrom:cires[0:length]) map(tofrom:cmres[0:length]) map(tofrom:cmires[0:length]) \
                            map(tofrom:zres[0:length]) map(tofrom:zires[0:length]) map(tofrom:zmres[0:length]) map(tofrom:zmires[0:length]) \
                            device(devnum)
    {
        int mode = vm_mode[acc];

        #pragma omp dispatch device(devnum)
        vmlSetMode(mode);

        #pragma omp dispatch device(devnum)
        vcCIS_64   (length, sarg, cres);
        #pragma omp dispatch device(devnum)
        vcCISI_64  (length/stride, sarg, stride, cires, stride);
        #pragma omp dispatch device(devnum)
        vzCIS_64   (length, darg, zres);
        #pragma omp dispatch device(devnum)
        vzCISI_64  (length/stride, darg, stride, zires, stride);

        #pragma omp dispatch device(devnum)
        vmcCIS_64  (length, sarg, cmres, mode);
        #pragma omp dispatch device(devnum)
        vmcCISI_64 (length/stride, sarg, stride, cmires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmzCIS_64  (length, darg, zmres, mode);
        #pragma omp dispatch device(devnum)
        vmzCISI_64 (length/stride, darg, stride, zmires, stride, mode);

    }
    // #pragma omp taskwait

    return;
} /* own_vm_cis */

/**
 * Acos
 */
static void own_vm_acos (int acc, VmInputData* in, VmOutputData* out)
{

    float  *sarg = in->sarg1, *sres = out->sres1[kFunc], *sires = out->sres1[kFuncI], *smres = out->sres1[kMFunc], *smires = out->sres1[kMFuncI];
    double *darg = in->darg1, *dres = out->dres1[kFunc], *dires = out->dres1[kFuncI], *dmres = out->dres1[kMFunc], *dmires = out->dres1[kMFuncI];
    MKL_Complex8  *carg  = (MKL_Complex8*)in->carg1,  *cres = (MKL_Complex8*)out->cres1[kFunc],  *cires = (MKL_Complex8*)out->cres1[kFuncI];
    MKL_Complex8  *cmres = (MKL_Complex8*)out->cres1[kMFunc],  *cmires = (MKL_Complex8*)out->cres1[kMFuncI];
    MKL_Complex16 *zarg  = (MKL_Complex16*)in->zarg1, *zres = (MKL_Complex16*)out->zres1[kFunc], *zires = (MKL_Complex16*)out->zres1[kFuncI];
    MKL_Complex16 *zmres = (MKL_Complex16*)out->zres1[kMFunc], *zmires = (MKL_Complex16*)out->zres1[kMFuncI];

    #pragma omp target data map(to:sarg[0:length]) map(to:darg[0:length]) \
                            map(to:carg[0:length]) map(to:zarg[0:length]) \
                            map(tofrom:sres[0:length]) map(tofrom:sires[0:length]) map(tofrom:smres[0:length]) map(tofrom:smires[0:length]) \
                            map(tofrom:dres[0:length]) map(tofrom:dires[0:length]) map(tofrom:dmres[0:length]) map(tofrom:dmires[0:length]) \
                            map(tofrom:cres[0:length]) map(tofrom:cires[0:length]) map(tofrom:cmres[0:length]) map(tofrom:cmires[0:length]) \
                            map(tofrom:zres[0:length]) map(tofrom:zires[0:length]) map(tofrom:zmres[0:length]) map(tofrom:zmires[0:length]) \
                            device(devnum)
    {
        int mode = vm_mode[acc];

        #pragma omp dispatch device(devnum)
        vmlSetMode(mode);

        #pragma omp dispatch device(devnum)
        vsAcos_64   (length, sarg, sres);
        #pragma omp dispatch device(devnum)
        vsAcosI_64  (length/stride, sarg, stride, sires, stride);
        #pragma omp dispatch device(devnum)
        vdAcos_64   (length, darg, dres);
        #pragma omp dispatch device(devnum)
        vdAcosI_64  (length/stride, darg, stride, dires, stride);
        #pragma omp dispatch device(devnum)
        vcAcos_64   (length, carg, cres);
        #pragma omp dispatch device(devnum)
        vcAcosI_64  (length/stride, carg, stride, cires, stride);
        #pragma omp dispatch device(devnum)
        vzAcos_64   (length, zarg, zres);
        #pragma omp dispatch device(devnum)
        vzAcosI_64  (length/stride, zarg, stride, zires, stride);

        #pragma omp dispatch device(devnum)
        vmsAcos_64  (length, sarg, smres, mode);
        #pragma omp dispatch device(devnum)
        vmsAcosI_64 (length/stride, sarg, stride, smires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmdAcos_64  (length, darg, dmres, mode);
        #pragma omp dispatch device(devnum)
        vmdAcosI_64 (length/stride, darg, stride, dmires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmcAcos_64  (length, carg, cmres, mode);
        #pragma omp dispatch device(devnum)
        vmcAcosI_64 (length/stride, carg, stride, cmires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmzAcos_64  (length, zarg, zmres, mode);
        #pragma omp dispatch device(devnum)
        vmzAcosI_64 (length/stride, zarg, stride, zmires, stride, mode);

    }
    // #pragma omp taskwait

    return;
} /* own_vm_acos */

/**
 * Atan
 */
static void own_vm_atan (int acc, VmInputData* in, VmOutputData* out)
{

    float  *sarg = in->sarg1, *sres = out->sres1[kFunc], *sires = out->sres1[kFuncI], *smres = out->sres1[kMFunc], *smires = out->sres1[kMFuncI];
    double *darg = in->darg1, *dres = out->dres1[kFunc], *dires = out->dres1[kFuncI], *dmres = out->dres1[kMFunc], *dmires = out->dres1[kMFuncI];
    MKL_Complex8  *carg  = (MKL_Complex8*)in->carg1,  *cres = (MKL_Complex8*)out->cres1[kFunc],  *cires = (MKL_Complex8*)out->cres1[kFuncI];
    MKL_Complex8  *cmres = (MKL_Complex8*)out->cres1[kMFunc],  *cmires = (MKL_Complex8*)out->cres1[kMFuncI];
    MKL_Complex16 *zarg  = (MKL_Complex16*)in->zarg1, *zres = (MKL_Complex16*)out->zres1[kFunc], *zires = (MKL_Complex16*)out->zres1[kFuncI];
    MKL_Complex16 *zmres = (MKL_Complex16*)out->zres1[kMFunc], *zmires = (MKL_Complex16*)out->zres1[kMFuncI];

    #pragma omp target data map(to:sarg[0:length]) map(to:darg[0:length]) \
                            map(to:carg[0:length]) map(to:zarg[0:length]) \
                            map(tofrom:sres[0:length]) map(tofrom:sires[0:length]) map(tofrom:smres[0:length]) map(tofrom:smires[0:length]) \
                            map(tofrom:dres[0:length]) map(tofrom:dires[0:length]) map(tofrom:dmres[0:length]) map(tofrom:dmires[0:length]) \
                            map(tofrom:cres[0:length]) map(tofrom:cires[0:length]) map(tofrom:cmres[0:length]) map(tofrom:cmires[0:length]) \
                            map(tofrom:zres[0:length]) map(tofrom:zires[0:length]) map(tofrom:zmres[0:length]) map(tofrom:zmires[0:length]) \
                            device(devnum)
    {
        int mode = vm_mode[acc];

        #pragma omp dispatch device(devnum)
        vmlSetMode(mode);

        #pragma omp dispatch device(devnum)
        vsAtan_64   (length, sarg, sres);
        #pragma omp dispatch device(devnum)
        vsAtanI_64  (length/stride, sarg, stride, sires, stride);
        #pragma omp dispatch device(devnum)
        vdAtan_64   (length, darg, dres);
        #pragma omp dispatch device(devnum)
        vdAtanI_64  (length/stride, darg, stride, dires, stride);
        #pragma omp dispatch device(devnum)
        vcAtan_64   (length, carg, cres);
        #pragma omp dispatch device(devnum)
        vcAtanI_64  (length/stride, carg, stride, cires, stride);
        #pragma omp dispatch device(devnum)
        vzAtan_64   (length, zarg, zres);
        #pragma omp dispatch device(devnum)
        vzAtanI_64  (length/stride, zarg, stride, zires, stride);

        #pragma omp dispatch device(devnum)
        vmsAtan_64  (length, sarg, smres, mode);
        #pragma omp dispatch device(devnum)
        vmsAtanI_64 (length/stride, sarg, stride, smires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmdAtan_64  (length, darg, dmres, mode);
        #pragma omp dispatch device(devnum)
        vmdAtanI_64 (length/stride, darg, stride, dmires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmcAtan_64  (length, carg, cmres, mode);
        #pragma omp dispatch device(devnum)
        vmcAtanI_64 (length/stride, carg, stride, cmires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmzAtan_64  (length, zarg, zmres, mode);
        #pragma omp dispatch device(devnum)
        vmzAtanI_64 (length/stride, zarg, stride, zmires, stride, mode);

    }
    // #pragma omp taskwait

    return;
} /* own_vm_atan */

/**
 * Asin
 */
static void own_vm_asin (int acc, VmInputData* in, VmOutputData* out)
{

    float  *sarg = in->sarg1, *sres = out->sres1[kFunc], *sires = out->sres1[kFuncI], *smres = out->sres1[kMFunc], *smires = out->sres1[kMFuncI];
    double *darg = in->darg1, *dres = out->dres1[kFunc], *dires = out->dres1[kFuncI], *dmres = out->dres1[kMFunc], *dmires = out->dres1[kMFuncI];
    MKL_Complex8  *carg  = (MKL_Complex8*)in->carg1,  *cres = (MKL_Complex8*)out->cres1[kFunc],  *cires = (MKL_Complex8*)out->cres1[kFuncI];
    MKL_Complex8  *cmres = (MKL_Complex8*)out->cres1[kMFunc],  *cmires = (MKL_Complex8*)out->cres1[kMFuncI];
    MKL_Complex16 *zarg  = (MKL_Complex16*)in->zarg1, *zres = (MKL_Complex16*)out->zres1[kFunc], *zires = (MKL_Complex16*)out->zres1[kFuncI];
    MKL_Complex16 *zmres = (MKL_Complex16*)out->zres1[kMFunc], *zmires = (MKL_Complex16*)out->zres1[kMFuncI];

    #pragma omp target data map(to:sarg[0:length]) map(to:darg[0:length]) \
                            map(to:carg[0:length]) map(to:zarg[0:length]) \
                            map(tofrom:sres[0:length]) map(tofrom:sires[0:length]) map(tofrom:smres[0:length]) map(tofrom:smires[0:length]) \
                            map(tofrom:dres[0:length]) map(tofrom:dires[0:length]) map(tofrom:dmres[0:length]) map(tofrom:dmires[0:length]) \
                            map(tofrom:cres[0:length]) map(tofrom:cires[0:length]) map(tofrom:cmres[0:length]) map(tofrom:cmires[0:length]) \
                            map(tofrom:zres[0:length]) map(tofrom:zires[0:length]) map(tofrom:zmres[0:length]) map(tofrom:zmires[0:length]) \
                            device(devnum)
    {
        int mode = vm_mode[acc];

        #pragma omp dispatch device(devnum)
        vmlSetMode(mode);

        #pragma omp dispatch device(devnum)
        vsAsin_64   (length, sarg, sres);
        #pragma omp dispatch device(devnum)
        vsAsinI_64  (length/stride, sarg, stride, sires, stride);
        #pragma omp dispatch device(devnum)
        vdAsin_64   (length, darg, dres);
        #pragma omp dispatch device(devnum)
        vdAsinI_64  (length/stride, darg, stride, dires, stride);
        #pragma omp dispatch device(devnum)
        vcAsin_64   (length, carg, cres);
        #pragma omp dispatch device(devnum)
        vcAsinI_64  (length/stride, carg, stride, cires, stride);
        #pragma omp dispatch device(devnum)
        vzAsin_64   (length, zarg, zres);
        #pragma omp dispatch device(devnum)
        vzAsinI_64  (length/stride, zarg, stride, zires, stride);

        #pragma omp dispatch device(devnum)
        vmsAsin_64  (length, sarg, smres, mode);
        #pragma omp dispatch device(devnum)
        vmsAsinI_64 (length/stride, sarg, stride, smires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmdAsin_64  (length, darg, dmres, mode);
        #pragma omp dispatch device(devnum)
        vmdAsinI_64 (length/stride, darg, stride, dmires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmcAsin_64  (length, carg, cmres, mode);
        #pragma omp dispatch device(devnum)
        vmcAsinI_64 (length/stride, carg, stride, cmires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmzAsin_64  (length, zarg, zmres, mode);
        #pragma omp dispatch device(devnum)
        vmzAsinI_64 (length/stride, zarg, stride, zmires, stride, mode);

    }
    // #pragma omp taskwait

    return;
} /* own_vm_asin */

/**
 * Acosh
 */
static void own_vm_acosh (int acc, VmInputData* in, VmOutputData* out)
{

    float  *sarg = in->sarg1, *sres = out->sres1[kFunc], *sires = out->sres1[kFuncI], *smres = out->sres1[kMFunc], *smires = out->sres1[kMFuncI];
    double *darg = in->darg1, *dres = out->dres1[kFunc], *dires = out->dres1[kFuncI], *dmres = out->dres1[kMFunc], *dmires = out->dres1[kMFuncI];
    MKL_Complex8  *carg  = (MKL_Complex8*)in->carg1,  *cres = (MKL_Complex8*)out->cres1[kFunc],  *cires = (MKL_Complex8*)out->cres1[kFuncI];
    MKL_Complex8  *cmres = (MKL_Complex8*)out->cres1[kMFunc],  *cmires = (MKL_Complex8*)out->cres1[kMFuncI];
    MKL_Complex16 *zarg  = (MKL_Complex16*)in->zarg1, *zres = (MKL_Complex16*)out->zres1[kFunc], *zires = (MKL_Complex16*)out->zres1[kFuncI];
    MKL_Complex16 *zmres = (MKL_Complex16*)out->zres1[kMFunc], *zmires = (MKL_Complex16*)out->zres1[kMFuncI];

    #pragma omp target data map(to:sarg[0:length]) map(to:darg[0:length]) \
                            map(to:carg[0:length]) map(to:zarg[0:length]) \
                            map(tofrom:sres[0:length]) map(tofrom:sires[0:length]) map(tofrom:smres[0:length]) map(tofrom:smires[0:length]) \
                            map(tofrom:dres[0:length]) map(tofrom:dires[0:length]) map(tofrom:dmres[0:length]) map(tofrom:dmires[0:length]) \
                            map(tofrom:cres[0:length]) map(tofrom:cires[0:length]) map(tofrom:cmres[0:length]) map(tofrom:cmires[0:length]) \
                            map(tofrom:zres[0:length]) map(tofrom:zires[0:length]) map(tofrom:zmres[0:length]) map(tofrom:zmires[0:length]) \
                            device(devnum)
    {
        int mode = vm_mode[acc];

        #pragma omp dispatch device(devnum)
        vmlSetMode(mode);

        #pragma omp dispatch device(devnum)
        vsAcosh_64   (length, sarg, sres);
        #pragma omp dispatch device(devnum)
        vsAcoshI_64  (length/stride, sarg, stride, sires, stride);
        #pragma omp dispatch device(devnum)
        vdAcosh_64   (length, darg, dres);
        #pragma omp dispatch device(devnum)
        vdAcoshI_64  (length/stride, darg, stride, dires, stride);
        #pragma omp dispatch device(devnum)
        vcAcosh_64   (length, carg, cres);
        #pragma omp dispatch device(devnum)
        vcAcoshI_64  (length/stride, carg, stride, cires, stride);
        #pragma omp dispatch device(devnum)
        vzAcosh_64   (length, zarg, zres);
        #pragma omp dispatch device(devnum)
        vzAcoshI_64  (length/stride, zarg, stride, zires, stride);

        #pragma omp dispatch device(devnum)
        vmsAcosh_64  (length, sarg, smres, mode);
        #pragma omp dispatch device(devnum)
        vmsAcoshI_64 (length/stride, sarg, stride, smires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmdAcosh_64  (length, darg, dmres, mode);
        #pragma omp dispatch device(devnum)
        vmdAcoshI_64 (length/stride, darg, stride, dmires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmcAcosh_64  (length, carg, cmres, mode);
        #pragma omp dispatch device(devnum)
        vmcAcoshI_64 (length/stride, carg, stride, cmires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmzAcosh_64  (length, zarg, zmres, mode);
        #pragma omp dispatch device(devnum)
        vmzAcoshI_64 (length/stride, zarg, stride, zmires, stride, mode);

    }
    // #pragma omp taskwait

    return;
} /* own_vm_acosh */

/**
 * Asinh
 */
static void own_vm_asinh (int acc, VmInputData* in, VmOutputData* out)
{

    float  *sarg = in->sarg1, *sres = out->sres1[kFunc], *sires = out->sres1[kFuncI], *smres = out->sres1[kMFunc], *smires = out->sres1[kMFuncI];
    double *darg = in->darg1, *dres = out->dres1[kFunc], *dires = out->dres1[kFuncI], *dmres = out->dres1[kMFunc], *dmires = out->dres1[kMFuncI];
    MKL_Complex8  *carg  = (MKL_Complex8*)in->carg1,  *cres = (MKL_Complex8*)out->cres1[kFunc],  *cires = (MKL_Complex8*)out->cres1[kFuncI];
    MKL_Complex8  *cmres = (MKL_Complex8*)out->cres1[kMFunc],  *cmires = (MKL_Complex8*)out->cres1[kMFuncI];
    MKL_Complex16 *zarg  = (MKL_Complex16*)in->zarg1, *zres = (MKL_Complex16*)out->zres1[kFunc], *zires = (MKL_Complex16*)out->zres1[kFuncI];
    MKL_Complex16 *zmres = (MKL_Complex16*)out->zres1[kMFunc], *zmires = (MKL_Complex16*)out->zres1[kMFuncI];

    #pragma omp target data map(to:sarg[0:length]) map(to:darg[0:length]) \
                            map(to:carg[0:length]) map(to:zarg[0:length]) \
                            map(tofrom:sres[0:length]) map(tofrom:sires[0:length]) map(tofrom:smres[0:length]) map(tofrom:smires[0:length]) \
                            map(tofrom:dres[0:length]) map(tofrom:dires[0:length]) map(tofrom:dmres[0:length]) map(tofrom:dmires[0:length]) \
                            map(tofrom:cres[0:length]) map(tofrom:cires[0:length]) map(tofrom:cmres[0:length]) map(tofrom:cmires[0:length]) \
                            map(tofrom:zres[0:length]) map(tofrom:zires[0:length]) map(tofrom:zmres[0:length]) map(tofrom:zmires[0:length]) \
                            device(devnum)
    {
        int mode = vm_mode[acc];

        #pragma omp dispatch device(devnum)
        vmlSetMode(mode);

        #pragma omp dispatch device(devnum)
        vsAsinh_64   (length, sarg, sres);
        #pragma omp dispatch device(devnum)
        vsAsinhI_64  (length/stride, sarg, stride, sires, stride);
        #pragma omp dispatch device(devnum)
        vdAsinh_64   (length, darg, dres);
        #pragma omp dispatch device(devnum)
        vdAsinhI_64  (length/stride, darg, stride, dires, stride);
        #pragma omp dispatch device(devnum)
        vcAsinh_64   (length, carg, cres);
        #pragma omp dispatch device(devnum)
        vcAsinhI_64  (length/stride, carg, stride, cires, stride);
        #pragma omp dispatch device(devnum)
        vzAsinh_64   (length, zarg, zres);
        #pragma omp dispatch device(devnum)
        vzAsinhI_64  (length/stride, zarg, stride, zires, stride);

        #pragma omp dispatch device(devnum)
        vmsAsinh_64  (length, sarg, smres, mode);
        #pragma omp dispatch device(devnum)
        vmsAsinhI_64 (length/stride, sarg, stride, smires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmdAsinh_64  (length, darg, dmres, mode);
        #pragma omp dispatch device(devnum)
        vmdAsinhI_64 (length/stride, darg, stride, dmires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmcAsinh_64  (length, carg, cmres, mode);
        #pragma omp dispatch device(devnum)
        vmcAsinhI_64 (length/stride, carg, stride, cmires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmzAsinh_64  (length, zarg, zmres, mode);
        #pragma omp dispatch device(devnum)
        vmzAsinhI_64 (length/stride, zarg, stride, zmires, stride, mode);

    }
    // #pragma omp taskwait

    return;
} /* own_vm_asinh */

/**
 * Atanh
 */
static void own_vm_atanh (int acc, VmInputData* in, VmOutputData* out)
{

    float  *sarg = in->sarg1, *sres = out->sres1[kFunc], *sires = out->sres1[kFuncI], *smres = out->sres1[kMFunc], *smires = out->sres1[kMFuncI];
    double *darg = in->darg1, *dres = out->dres1[kFunc], *dires = out->dres1[kFuncI], *dmres = out->dres1[kMFunc], *dmires = out->dres1[kMFuncI];
    MKL_Complex8  *carg  = (MKL_Complex8*)in->carg1,  *cres = (MKL_Complex8*)out->cres1[kFunc],  *cires = (MKL_Complex8*)out->cres1[kFuncI];
    MKL_Complex8  *cmres = (MKL_Complex8*)out->cres1[kMFunc],  *cmires = (MKL_Complex8*)out->cres1[kMFuncI];
    MKL_Complex16 *zarg  = (MKL_Complex16*)in->zarg1, *zres = (MKL_Complex16*)out->zres1[kFunc], *zires = (MKL_Complex16*)out->zres1[kFuncI];
    MKL_Complex16 *zmres = (MKL_Complex16*)out->zres1[kMFunc], *zmires = (MKL_Complex16*)out->zres1[kMFuncI];

    #pragma omp target data map(to:sarg[0:length]) map(to:darg[0:length]) \
                            map(to:carg[0:length]) map(to:zarg[0:length]) \
                            map(tofrom:sres[0:length]) map(tofrom:sires[0:length]) map(tofrom:smres[0:length]) map(tofrom:smires[0:length]) \
                            map(tofrom:dres[0:length]) map(tofrom:dires[0:length]) map(tofrom:dmres[0:length]) map(tofrom:dmires[0:length]) \
                            map(tofrom:cres[0:length]) map(tofrom:cires[0:length]) map(tofrom:cmres[0:length]) map(tofrom:cmires[0:length]) \
                            map(tofrom:zres[0:length]) map(tofrom:zires[0:length]) map(tofrom:zmres[0:length]) map(tofrom:zmires[0:length]) \
                            device(devnum)
    {
        int mode = vm_mode[acc];

        #pragma omp dispatch device(devnum)
        vmlSetMode(mode);

        #pragma omp dispatch device(devnum)
        vsAtanh_64   (length, sarg, sres);
        #pragma omp dispatch device(devnum)
        vsAtanhI_64  (length/stride, sarg, stride, sires, stride);
        #pragma omp dispatch device(devnum)
        vdAtanh_64   (length, darg, dres);
        #pragma omp dispatch device(devnum)
        vdAtanhI_64  (length/stride, darg, stride, dires, stride);
        #pragma omp dispatch device(devnum)
        vcAtanh_64   (length, carg, cres);
        #pragma omp dispatch device(devnum)
        vcAtanhI_64  (length/stride, carg, stride, cires, stride);
        #pragma omp dispatch device(devnum)
        vzAtanh_64   (length, zarg, zres);
        #pragma omp dispatch device(devnum)
        vzAtanhI_64  (length/stride, zarg, stride, zires, stride);

        #pragma omp dispatch device(devnum)
        vmsAtanh_64  (length, sarg, smres, mode);
        #pragma omp dispatch device(devnum)
        vmsAtanhI_64 (length/stride, sarg, stride, smires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmdAtanh_64  (length, darg, dmres, mode);
        #pragma omp dispatch device(devnum)
        vmdAtanhI_64 (length/stride, darg, stride, dmires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmcAtanh_64  (length, carg, cmres, mode);
        #pragma omp dispatch device(devnum)
        vmcAtanhI_64 (length/stride, carg, stride, cmires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmzAtanh_64  (length, zarg, zmres, mode);
        #pragma omp dispatch device(devnum)
        vmzAtanhI_64 (length/stride, zarg, stride, zmires, stride, mode);

    }
    // #pragma omp taskwait

    return;
} /* own_vm_atanh */

/**
 * Add
 */
static void own_vm_add (int acc, VmInputData* in, VmOutputData* out)
{

    float  *sarg1 = in->sarg1, *sarg2 = in->sarg2, *sres = out->sres1[kFunc], *sires = out->sres1[kFuncI], *smres = out->sres1[kMFunc], *smires = out->sres1[kMFuncI];
    double *darg1 = in->darg1, *darg2 = in->darg2, *dres = out->dres1[kFunc], *dires = out->dres1[kFuncI], *dmres = out->dres1[kMFunc], *dmires = out->dres1[kMFuncI];
    MKL_Complex8  *carg1 = (MKL_Complex8*)in->carg1,  *carg2 = (MKL_Complex8*)in->carg2,  *cres = (MKL_Complex8*)out->cres1[kFunc];
    MKL_Complex8  *cires = (MKL_Complex8*)out->cres1[kFuncI],  *cmres = (MKL_Complex8*)out->cres1[kMFunc],  *cmires = (MKL_Complex8*)out->cres1[kMFuncI];
    MKL_Complex16 *zarg1 = (MKL_Complex16*)in->zarg1, *zarg2 = (MKL_Complex16*)in->zarg2, *zres = (MKL_Complex16*)out->zres1[kFunc];
    MKL_Complex16 *zires = (MKL_Complex16*)out->zres1[kFuncI], *zmres = (MKL_Complex16*)out->zres1[kMFunc], *zmires = (MKL_Complex16*)out->zres1[kMFuncI];

    #pragma omp target data map(to:sarg1[0:length]) map(to:darg1[0:length]) map(to:sarg2[0:length]) map(to:darg2[0:length])\
                            map(to:carg1[0:length]) map(to:zarg1[0:length]) map(to:carg2[0:length]) map(to:zarg2[0:length])\
                            map(tofrom:sres[0:length]) map(tofrom:sires[0:length]) map(tofrom:smres[0:length]) map(tofrom:smires[0:length]) \
                            map(tofrom:dres[0:length]) map(tofrom:dires[0:length]) map(tofrom:dmres[0:length]) map(tofrom:dmires[0:length]) \
                            map(tofrom:cres[0:length]) map(tofrom:cires[0:length]) map(tofrom:cmres[0:length]) map(tofrom:cmires[0:length]) \
                            map(tofrom:zres[0:length]) map(tofrom:zires[0:length]) map(tofrom:zmres[0:length]) map(tofrom:zmires[0:length]) \
                            device(devnum)
    {
        int mode = vm_mode[acc];

        #pragma omp dispatch device(devnum)
        vmlSetMode(mode);

        #pragma omp dispatch device(devnum)
        vsAdd_64   (length, sarg1, sarg2, sres);
        #pragma omp dispatch device(devnum)
        vsAddI_64  (length/stride, sarg1, stride, sarg2, stride, sires, stride);
        #pragma omp dispatch device(devnum)
        vdAdd_64   (length, darg1, darg2, dres);
        #pragma omp dispatch device(devnum)
        vdAddI_64  (length/stride, darg1, stride, darg2, stride, dires, stride);
        #pragma omp dispatch device(devnum)
        vcAdd_64   (length, carg1, carg2, cres);
        #pragma omp dispatch device(devnum)
        vcAddI_64  (length/stride, carg1, stride, carg2, stride, cires, stride);
        #pragma omp dispatch device(devnum)
        vzAdd_64   (length, zarg1, zarg2, zres);
        #pragma omp dispatch device(devnum)
        vzAddI_64  (length/stride, zarg1, stride, zarg2, stride, zires, stride);

        #pragma omp dispatch device(devnum)
        vmsAdd_64  (length, sarg1, sarg2, smres, mode);
        #pragma omp dispatch device(devnum)
        vmsAddI_64 (length/stride, sarg1, stride, sarg2, stride, smires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmdAdd_64  (length, darg1, darg2, dmres, mode);
        #pragma omp dispatch device(devnum)
        vmdAddI_64 (length/stride, darg1, stride, darg2, stride, dmires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmcAdd_64  (length, carg1, carg2, cmres, mode);
        #pragma omp dispatch device(devnum)
        vmcAddI_64 (length/stride, carg1, stride, carg2, stride, cmires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmzAdd_64  (length, zarg1, zarg2, zmres, mode);
        #pragma omp dispatch device(devnum)
        vmzAddI_64 (length/stride, zarg1, stride, zarg2, stride, zmires, stride, mode);

    }
    // #pragma omp taskwait

    return;
} /* own_vm_add */

/**
 * Sub
 */
static void own_vm_sub (int acc, VmInputData* in, VmOutputData* out)
{

    float  *sarg1 = in->sarg1, *sarg2 = in->sarg2, *sres = out->sres1[kFunc], *sires = out->sres1[kFuncI], *smres = out->sres1[kMFunc], *smires = out->sres1[kMFuncI];
    double *darg1 = in->darg1, *darg2 = in->darg2, *dres = out->dres1[kFunc], *dires = out->dres1[kFuncI], *dmres = out->dres1[kMFunc], *dmires = out->dres1[kMFuncI];
    MKL_Complex8  *carg1 = (MKL_Complex8*)in->carg1,  *carg2 = (MKL_Complex8*)in->carg2,  *cres = (MKL_Complex8*)out->cres1[kFunc];
    MKL_Complex8  *cires = (MKL_Complex8*)out->cres1[kFuncI],  *cmres = (MKL_Complex8*)out->cres1[kMFunc],  *cmires = (MKL_Complex8*)out->cres1[kMFuncI];
    MKL_Complex16 *zarg1 = (MKL_Complex16*)in->zarg1, *zarg2 = (MKL_Complex16*)in->zarg2, *zres = (MKL_Complex16*)out->zres1[kFunc];
    MKL_Complex16 *zires = (MKL_Complex16*)out->zres1[kFuncI], *zmres = (MKL_Complex16*)out->zres1[kMFunc], *zmires = (MKL_Complex16*)out->zres1[kMFuncI];

    #pragma omp target data map(to:sarg1[0:length]) map(to:darg1[0:length]) map(to:sarg2[0:length]) map(to:darg2[0:length])\
                            map(to:carg1[0:length]) map(to:zarg1[0:length]) map(to:carg2[0:length]) map(to:zarg2[0:length])\
                            map(tofrom:sres[0:length]) map(tofrom:sires[0:length]) map(tofrom:smres[0:length]) map(tofrom:smires[0:length]) \
                            map(tofrom:dres[0:length]) map(tofrom:dires[0:length]) map(tofrom:dmres[0:length]) map(tofrom:dmires[0:length]) \
                            map(tofrom:cres[0:length]) map(tofrom:cires[0:length]) map(tofrom:cmres[0:length]) map(tofrom:cmires[0:length]) \
                            map(tofrom:zres[0:length]) map(tofrom:zires[0:length]) map(tofrom:zmres[0:length]) map(tofrom:zmires[0:length]) \
                            device(devnum)
    {
        int mode = vm_mode[acc];

        #pragma omp dispatch device(devnum)
        vmlSetMode(mode);

        #pragma omp dispatch device(devnum)
        vsSub_64   (length, sarg1, sarg2, sres);
        #pragma omp dispatch device(devnum)
        vsSubI_64  (length/stride, sarg1, stride, sarg2, stride, sires, stride);
        #pragma omp dispatch device(devnum)
        vdSub_64   (length, darg1, darg2, dres);
        #pragma omp dispatch device(devnum)
        vdSubI_64  (length/stride, darg1, stride, darg2, stride, dires, stride);
        #pragma omp dispatch device(devnum)
        vcSub_64   (length, carg1, carg2, cres);
        #pragma omp dispatch device(devnum)
        vcSubI_64  (length/stride, carg1, stride, carg2, stride, cires, stride);
        #pragma omp dispatch device(devnum)
        vzSub_64   (length, zarg1, zarg2, zres);
        #pragma omp dispatch device(devnum)
        vzSubI_64  (length/stride, zarg1, stride, zarg2, stride, zires, stride);

        #pragma omp dispatch device(devnum)
        vmsSub_64  (length, sarg1, sarg2, smres, mode);
        #pragma omp dispatch device(devnum)
        vmsSubI_64 (length/stride, sarg1, stride, sarg2, stride, smires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmdSub_64  (length, darg1, darg2, dmres, mode);
        #pragma omp dispatch device(devnum)
        vmdSubI_64 (length/stride, darg1, stride, darg2, stride, dmires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmcSub_64  (length, carg1, carg2, cmres, mode);
        #pragma omp dispatch device(devnum)
        vmcSubI_64 (length/stride, carg1, stride, carg2, stride, cmires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmzSub_64  (length, zarg1, zarg2, zmres, mode);
        #pragma omp dispatch device(devnum)
        vmzSubI_64 (length/stride, zarg1, stride, zarg2, stride, zmires, stride, mode);

    }
    // #pragma omp taskwait

    return;
} /* own_vm_sub */

/**
 * Mul
 */
static void own_vm_mul (int acc, VmInputData* in, VmOutputData* out)
{

    float  *sarg1 = in->sarg1, *sarg2 = in->sarg2, *sres = out->sres1[kFunc], *sires = out->sres1[kFuncI], *smres = out->sres1[kMFunc], *smires = out->sres1[kMFuncI];
    double *darg1 = in->darg1, *darg2 = in->darg2, *dres = out->dres1[kFunc], *dires = out->dres1[kFuncI], *dmres = out->dres1[kMFunc], *dmires = out->dres1[kMFuncI];
    MKL_Complex8  *carg1 = (MKL_Complex8*)in->carg1,  *carg2 = (MKL_Complex8*)in->carg2,  *cres = (MKL_Complex8*)out->cres1[kFunc];
    MKL_Complex8  *cires = (MKL_Complex8*)out->cres1[kFuncI],  *cmres = (MKL_Complex8*)out->cres1[kMFunc],  *cmires = (MKL_Complex8*)out->cres1[kMFuncI];
    MKL_Complex16 *zarg1 = (MKL_Complex16*)in->zarg1, *zarg2 = (MKL_Complex16*)in->zarg2, *zres = (MKL_Complex16*)out->zres1[kFunc];
    MKL_Complex16 *zires = (MKL_Complex16*)out->zres1[kFuncI], *zmres = (MKL_Complex16*)out->zres1[kMFunc], *zmires = (MKL_Complex16*)out->zres1[kMFuncI];

    #pragma omp target data map(to:sarg1[0:length]) map(to:darg1[0:length]) map(to:sarg2[0:length]) map(to:darg2[0:length])\
                            map(to:carg1[0:length]) map(to:zarg1[0:length]) map(to:carg2[0:length]) map(to:zarg2[0:length])\
                            map(tofrom:sres[0:length]) map(tofrom:sires[0:length]) map(tofrom:smres[0:length]) map(tofrom:smires[0:length]) \
                            map(tofrom:dres[0:length]) map(tofrom:dires[0:length]) map(tofrom:dmres[0:length]) map(tofrom:dmires[0:length]) \
                            map(tofrom:cres[0:length]) map(tofrom:cires[0:length]) map(tofrom:cmres[0:length]) map(tofrom:cmires[0:length]) \
                            map(tofrom:zres[0:length]) map(tofrom:zires[0:length]) map(tofrom:zmres[0:length]) map(tofrom:zmires[0:length]) \
                            device(devnum)
    {
        int mode = vm_mode[acc];

        #pragma omp dispatch device(devnum)
        vmlSetMode(mode);

        #pragma omp dispatch device(devnum)
        vsMul_64   (length, sarg1, sarg2, sres);
        #pragma omp dispatch device(devnum)
        vsMulI_64  (length/stride, sarg1, stride, sarg2, stride, sires, stride);
        #pragma omp dispatch device(devnum)
        vdMul_64   (length, darg1, darg2, dres);
        #pragma omp dispatch device(devnum)
        vdMulI_64  (length/stride, darg1, stride, darg2, stride, dires, stride);
        #pragma omp dispatch device(devnum)
        vcMul_64   (length, carg1, carg2, cres);
        #pragma omp dispatch device(devnum)
        vcMulI_64  (length/stride, carg1, stride, carg2, stride, cires, stride);
        #pragma omp dispatch device(devnum)
        vzMul_64   (length, zarg1, zarg2, zres);
        #pragma omp dispatch device(devnum)
        vzMulI_64  (length/stride, zarg1, stride, zarg2, stride, zires, stride);

        #pragma omp dispatch device(devnum)
        vmsMul_64  (length, sarg1, sarg2, smres, mode);
        #pragma omp dispatch device(devnum)
        vmsMulI_64 (length/stride, sarg1, stride, sarg2, stride, smires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmdMul_64  (length, darg1, darg2, dmres, mode);
        #pragma omp dispatch device(devnum)
        vmdMulI_64 (length/stride, darg1, stride, darg2, stride, dmires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmcMul_64  (length, carg1, carg2, cmres, mode);
        #pragma omp dispatch device(devnum)
        vmcMulI_64 (length/stride, carg1, stride, carg2, stride, cmires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmzMul_64  (length, zarg1, zarg2, zmres, mode);
        #pragma omp dispatch device(devnum)
        vmzMulI_64 (length/stride, zarg1, stride, zarg2, stride, zmires, stride, mode);

    }
    // #pragma omp taskwait

    return;
} /* own_vm_mul */

/**
 * Atan2
 */
static void own_vm_atan2 (int acc, VmInputData* in, VmOutputData* out)
{

    float  *sarg1 = in->sarg1, *sarg2 = in->sarg2, *sres = out->sres1[kFunc], *sires = out->sres1[kFuncI], *smres = out->sres1[kMFunc], *smires = out->sres1[kMFuncI];
    double *darg1 = in->darg1, *darg2 = in->darg2, *dres = out->dres1[kFunc], *dires = out->dres1[kFuncI], *dmres = out->dres1[kMFunc], *dmires = out->dres1[kMFuncI];

    #pragma omp target data map(to:sarg1[0:length]) map(to:darg1[0:length]) map(to:sarg2[0:length]) map(to:darg2[0:length])\
                            map(tofrom:sres[0:length]) map(tofrom:sires[0:length]) map(tofrom:smres[0:length]) map(tofrom:smires[0:length]) \
                            map(tofrom:dres[0:length]) map(tofrom:dires[0:length]) map(tofrom:dmres[0:length]) map(tofrom:dmires[0:length]) \
                            device(devnum)
    {
        int mode = vm_mode[acc];

        #pragma omp dispatch device(devnum)
        vmlSetMode(mode);

        #pragma omp dispatch device(devnum)
        vsAtan2_64   (length, sarg1, sarg2, sres);
        #pragma omp dispatch device(devnum)
        vsAtan2I_64  (length/stride, sarg1, stride, sarg2, stride, sires, stride);
        #pragma omp dispatch device(devnum)
        vdAtan2_64   (length, darg1, darg2, dres);
        #pragma omp dispatch device(devnum)
        vdAtan2I_64  (length/stride, darg1, stride, darg2, stride, dires, stride);

        #pragma omp dispatch device(devnum)
        vmsAtan2_64  (length, sarg1, sarg2, smres, mode);
        #pragma omp dispatch device(devnum)
        vmsAtan2I_64 (length/stride, sarg1, stride, sarg2, stride, smires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmdAtan2_64  (length, darg1, darg2, dmres, mode);
        #pragma omp dispatch device(devnum)
        vmdAtan2I_64 (length/stride, darg1, stride, darg2, stride, dmires, stride, mode);

    }
    // #pragma omp taskwait

    return;
} /* own_vm_atan2 */

/**
 * Atan2pi
 */
static void own_vm_atan2pi (int acc, VmInputData* in, VmOutputData* out)
{

    float  *sarg1 = in->sarg1, *sarg2 = in->sarg2, *sres = out->sres1[kFunc], *sires = out->sres1[kFuncI], *smres = out->sres1[kMFunc], *smires = out->sres1[kMFuncI];
    double *darg1 = in->darg1, *darg2 = in->darg2, *dres = out->dres1[kFunc], *dires = out->dres1[kFuncI], *dmres = out->dres1[kMFunc], *dmires = out->dres1[kMFuncI];

    #pragma omp target data map(to:sarg1[0:length]) map(to:darg1[0:length]) map(to:sarg2[0:length]) map(to:darg2[0:length])\
                            map(tofrom:sres[0:length]) map(tofrom:sires[0:length]) map(tofrom:smres[0:length]) map(tofrom:smires[0:length]) \
                            map(tofrom:dres[0:length]) map(tofrom:dires[0:length]) map(tofrom:dmres[0:length]) map(tofrom:dmires[0:length]) \
                            device(devnum)
    {
        int mode = vm_mode[acc];

        #pragma omp dispatch device(devnum)
        vmlSetMode(mode);

        #pragma omp dispatch device(devnum)
        vsAtan2pi_64   (length, sarg1, sarg2, sres);
        #pragma omp dispatch device(devnum)
        vsAtan2piI_64  (length/stride, sarg1, stride, sarg2, stride, sires, stride);
        #pragma omp dispatch device(devnum)
        vdAtan2pi_64   (length, darg1, darg2, dres);
        #pragma omp dispatch device(devnum)
        vdAtan2piI_64  (length/stride, darg1, stride, darg2, stride, dires, stride);

        #pragma omp dispatch device(devnum)
        vmsAtan2pi_64  (length, sarg1, sarg2, smres, mode);
        #pragma omp dispatch device(devnum)
        vmsAtan2piI_64 (length/stride, sarg1, stride, sarg2, stride, smires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmdAtan2pi_64  (length, darg1, darg2, dmres, mode);
        #pragma omp dispatch device(devnum)
        vmdAtan2piI_64 (length/stride, darg1, stride, darg2, stride, dmires, stride, mode);

    }
    // #pragma omp taskwait

    return;
} /* own_vm_atan2pi */

/**
 * CopySign
 */
static void own_vm_copysign (int acc, VmInputData* in, VmOutputData* out)
{

    float  *sarg1 = in->sarg1, *sarg2 = in->sarg2, *sres = out->sres1[kFunc], *sires = out->sres1[kFuncI], *smres = out->sres1[kMFunc], *smires = out->sres1[kMFuncI];
    double *darg1 = in->darg1, *darg2 = in->darg2, *dres = out->dres1[kFunc], *dires = out->dres1[kFuncI], *dmres = out->dres1[kMFunc], *dmires = out->dres1[kMFuncI];

    #pragma omp target data map(to:sarg1[0:length]) map(to:darg1[0:length]) map(to:sarg2[0:length]) map(to:darg2[0:length])\
                            map(tofrom:sres[0:length]) map(tofrom:sires[0:length]) map(tofrom:smres[0:length]) map(tofrom:smires[0:length]) \
                            map(tofrom:dres[0:length]) map(tofrom:dires[0:length]) map(tofrom:dmres[0:length]) map(tofrom:dmires[0:length]) \
                            device(devnum)
    {
        int mode = vm_mode[acc];

        #pragma omp dispatch device(devnum)
        vmlSetMode(mode);

        #pragma omp dispatch device(devnum)
        vsCopySign_64   (length, sarg1, sarg2, sres);
        #pragma omp dispatch device(devnum)
        vsCopySignI_64  (length/stride, sarg1, stride, sarg2, stride, sires, stride);
        #pragma omp dispatch device(devnum)
        vdCopySign_64   (length, darg1, darg2, dres);
        #pragma omp dispatch device(devnum)
        vdCopySignI_64  (length/stride, darg1, stride, darg2, stride, dires, stride);

        #pragma omp dispatch device(devnum)
        vmsCopySign_64  (length, sarg1, sarg2, smres, mode);
        #pragma omp dispatch device(devnum)
        vmsCopySignI_64 (length/stride, sarg1, stride, sarg2, stride, smires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmdCopySign_64  (length, darg1, darg2, dmres, mode);
        #pragma omp dispatch device(devnum)
        vmdCopySignI_64 (length/stride, darg1, stride, darg2, stride, dmires, stride, mode);

    }
    // #pragma omp taskwait

    return;
} /* own_vm_copysign */

/**
 * Fdim
 */
static void own_vm_fdim (int acc, VmInputData* in, VmOutputData* out)
{

    float  *sarg1 = in->sarg1, *sarg2 = in->sarg2, *sres = out->sres1[kFunc], *sires = out->sres1[kFuncI], *smres = out->sres1[kMFunc], *smires = out->sres1[kMFuncI];
    double *darg1 = in->darg1, *darg2 = in->darg2, *dres = out->dres1[kFunc], *dires = out->dres1[kFuncI], *dmres = out->dres1[kMFunc], *dmires = out->dres1[kMFuncI];

    #pragma omp target data map(to:sarg1[0:length]) map(to:darg1[0:length]) map(to:sarg2[0:length]) map(to:darg2[0:length])\
                            map(tofrom:sres[0:length]) map(tofrom:sires[0:length]) map(tofrom:smres[0:length]) map(tofrom:smires[0:length]) \
                            map(tofrom:dres[0:length]) map(tofrom:dires[0:length]) map(tofrom:dmres[0:length]) map(tofrom:dmires[0:length]) \
                            device(devnum)
    {
        int mode = vm_mode[acc];

        #pragma omp dispatch device(devnum)
        vmlSetMode(mode);

        #pragma omp dispatch device(devnum)
        vsFdim_64   (length, sarg1, sarg2, sres);
        #pragma omp dispatch device(devnum)
        vsFdimI_64  (length/stride, sarg1, stride, sarg2, stride, sires, stride);
        #pragma omp dispatch device(devnum)
        vdFdim_64   (length, darg1, darg2, dres);
        #pragma omp dispatch device(devnum)
        vdFdimI_64  (length/stride, darg1, stride, darg2, stride, dires, stride);

        #pragma omp dispatch device(devnum)
        vmsFdim_64  (length, sarg1, sarg2, smres, mode);
        #pragma omp dispatch device(devnum)
        vmsFdimI_64 (length/stride, sarg1, stride, sarg2, stride, smires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmdFdim_64  (length, darg1, darg2, dmres, mode);
        #pragma omp dispatch device(devnum)
        vmdFdimI_64 (length/stride, darg1, stride, darg2, stride, dmires, stride, mode);

    }
    // #pragma omp taskwait

    return;
} /* own_vm_fdim */

/**
 * Fmax
 */
static void own_vm_fmax (int acc, VmInputData* in, VmOutputData* out)
{

    float  *sarg1 = in->sarg1, *sarg2 = in->sarg2, *sres = out->sres1[kFunc], *sires = out->sres1[kFuncI], *smres = out->sres1[kMFunc], *smires = out->sres1[kMFuncI];
    double *darg1 = in->darg1, *darg2 = in->darg2, *dres = out->dres1[kFunc], *dires = out->dres1[kFuncI], *dmres = out->dres1[kMFunc], *dmires = out->dres1[kMFuncI];

    #pragma omp target data map(to:sarg1[0:length]) map(to:darg1[0:length]) map(to:sarg2[0:length]) map(to:darg2[0:length])\
                            map(tofrom:sres[0:length]) map(tofrom:sires[0:length]) map(tofrom:smres[0:length]) map(tofrom:smires[0:length]) \
                            map(tofrom:dres[0:length]) map(tofrom:dires[0:length]) map(tofrom:dmres[0:length]) map(tofrom:dmires[0:length]) \
                            device(devnum)
    {
        int mode = vm_mode[acc];

        #pragma omp dispatch device(devnum)
        vmlSetMode(mode);

        #pragma omp dispatch device(devnum)
        vsFmax_64   (length, sarg1, sarg2, sres);
        #pragma omp dispatch device(devnum)
        vsFmaxI_64  (length/stride, sarg1, stride, sarg2, stride, sires, stride);
        #pragma omp dispatch device(devnum)
        vdFmax_64   (length, darg1, darg2, dres);
        #pragma omp dispatch device(devnum)
        vdFmaxI_64  (length/stride, darg1, stride, darg2, stride, dires, stride);

        #pragma omp dispatch device(devnum)
        vmsFmax_64  (length, sarg1, sarg2, smres, mode);
        #pragma omp dispatch device(devnum)
        vmsFmaxI_64 (length/stride, sarg1, stride, sarg2, stride, smires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmdFmax_64  (length, darg1, darg2, dmres, mode);
        #pragma omp dispatch device(devnum)
        vmdFmaxI_64 (length/stride, darg1, stride, darg2, stride, dmires, stride, mode);

    }
    // #pragma omp taskwait

    return;
} /* own_vm_fmax */

/**
 * Fmin
 */
static void own_vm_fmin (int acc, VmInputData* in, VmOutputData* out)
{

    float  *sarg1 = in->sarg1, *sarg2 = in->sarg2, *sres = out->sres1[kFunc], *sires = out->sres1[kFuncI], *smres = out->sres1[kMFunc], *smires = out->sres1[kMFuncI];
    double *darg1 = in->darg1, *darg2 = in->darg2, *dres = out->dres1[kFunc], *dires = out->dres1[kFuncI], *dmres = out->dres1[kMFunc], *dmires = out->dres1[kMFuncI];

    #pragma omp target data map(to:sarg1[0:length]) map(to:darg1[0:length]) map(to:sarg2[0:length]) map(to:darg2[0:length])\
                            map(tofrom:sres[0:length]) map(tofrom:sires[0:length]) map(tofrom:smres[0:length]) map(tofrom:smires[0:length]) \
                            map(tofrom:dres[0:length]) map(tofrom:dires[0:length]) map(tofrom:dmres[0:length]) map(tofrom:dmires[0:length]) \
                            device(devnum)
    {
        int mode = vm_mode[acc];

        #pragma omp dispatch device(devnum)
        vmlSetMode(mode);

        #pragma omp dispatch device(devnum)
        vsFmin_64   (length, sarg1, sarg2, sres);
        #pragma omp dispatch device(devnum)
        vsFminI_64  (length/stride, sarg1, stride, sarg2, stride, sires, stride);
        #pragma omp dispatch device(devnum)
        vdFmin_64   (length, darg1, darg2, dres);
        #pragma omp dispatch device(devnum)
        vdFminI_64  (length/stride, darg1, stride, darg2, stride, dires, stride);

        #pragma omp dispatch device(devnum)
        vmsFmin_64  (length, sarg1, sarg2, smres, mode);
        #pragma omp dispatch device(devnum)
        vmsFminI_64 (length/stride, sarg1, stride, sarg2, stride, smires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmdFmin_64  (length, darg1, darg2, dmres, mode);
        #pragma omp dispatch device(devnum)
        vmdFminI_64 (length/stride, darg1, stride, darg2, stride, dmires, stride, mode);

    }
    // #pragma omp taskwait

    return;
} /* own_vm_fmin */

/**
 * MaxMag
 */
static void own_vm_maxmag (int acc, VmInputData* in, VmOutputData* out)
{

    float  *sarg1 = in->sarg1, *sarg2 = in->sarg2, *sres = out->sres1[kFunc], *sires = out->sres1[kFuncI], *smres = out->sres1[kMFunc], *smires = out->sres1[kMFuncI];
    double *darg1 = in->darg1, *darg2 = in->darg2, *dres = out->dres1[kFunc], *dires = out->dres1[kFuncI], *dmres = out->dres1[kMFunc], *dmires = out->dres1[kMFuncI];

    #pragma omp target data map(to:sarg1[0:length]) map(to:darg1[0:length]) map(to:sarg2[0:length]) map(to:darg2[0:length])\
                            map(tofrom:sres[0:length]) map(tofrom:sires[0:length]) map(tofrom:smres[0:length]) map(tofrom:smires[0:length]) \
                            map(tofrom:dres[0:length]) map(tofrom:dires[0:length]) map(tofrom:dmres[0:length]) map(tofrom:dmires[0:length]) \
                            device(devnum)
    {
        int mode = vm_mode[acc];

        #pragma omp dispatch device(devnum)
        vmlSetMode(mode);

        #pragma omp dispatch device(devnum)
        vsMaxMag_64   (length, sarg1, sarg2, sres);
        #pragma omp dispatch device(devnum)
        vsMaxMagI_64  (length/stride, sarg1, stride, sarg2, stride, sires, stride);
        #pragma omp dispatch device(devnum)
        vdMaxMag_64   (length, darg1, darg2, dres);
        #pragma omp dispatch device(devnum)
        vdMaxMagI_64  (length/stride, darg1, stride, darg2, stride, dires, stride);

        #pragma omp dispatch device(devnum)
        vmsMaxMag_64  (length, sarg1, sarg2, smres, mode);
        #pragma omp dispatch device(devnum)
        vmsMaxMagI_64 (length/stride, sarg1, stride, sarg2, stride, smires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmdMaxMag_64  (length, darg1, darg2, dmres, mode);
        #pragma omp dispatch device(devnum)
        vmdMaxMagI_64 (length/stride, darg1, stride, darg2, stride, dmires, stride, mode);

    }
    // #pragma omp taskwait

    return;
} /* own_vm_maxmag */

/**
 * MinMag
 */
static void own_vm_minmag (int acc, VmInputData* in, VmOutputData* out)
{

    float  *sarg1 = in->sarg1, *sarg2 = in->sarg2, *sres = out->sres1[kFunc], *sires = out->sres1[kFuncI], *smres = out->sres1[kMFunc], *smires = out->sres1[kMFuncI];
    double *darg1 = in->darg1, *darg2 = in->darg2, *dres = out->dres1[kFunc], *dires = out->dres1[kFuncI], *dmres = out->dres1[kMFunc], *dmires = out->dres1[kMFuncI];

    #pragma omp target data map(to:sarg1[0:length]) map(to:darg1[0:length]) map(to:sarg2[0:length]) map(to:darg2[0:length])\
                            map(tofrom:sres[0:length]) map(tofrom:sires[0:length]) map(tofrom:smres[0:length]) map(tofrom:smires[0:length]) \
                            map(tofrom:dres[0:length]) map(tofrom:dires[0:length]) map(tofrom:dmres[0:length]) map(tofrom:dmires[0:length]) \
                            device(devnum)
    {
        int mode = vm_mode[acc];

        #pragma omp dispatch device(devnum)
        vmlSetMode(mode);

        #pragma omp dispatch device(devnum)
        vsMinMag_64   (length, sarg1, sarg2, sres);
        #pragma omp dispatch device(devnum)
        vsMinMagI_64  (length/stride, sarg1, stride, sarg2, stride, sires, stride);
        #pragma omp dispatch device(devnum)
        vdMinMag_64   (length, darg1, darg2, dres);
        #pragma omp dispatch device(devnum)
        vdMinMagI_64  (length/stride, darg1, stride, darg2, stride, dires, stride);

        #pragma omp dispatch device(devnum)
        vmsMinMag_64  (length, sarg1, sarg2, smres, mode);
        #pragma omp dispatch device(devnum)
        vmsMinMagI_64 (length/stride, sarg1, stride, sarg2, stride, smires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmdMinMag_64  (length, darg1, darg2, dmres, mode);
        #pragma omp dispatch device(devnum)
        vmdMinMagI_64 (length/stride, darg1, stride, darg2, stride, dmires, stride, mode);

    }
    // #pragma omp taskwait

    return;
} /* own_vm_minmag */

/**
 * Fmod
 */
static void own_vm_fmod (int acc, VmInputData* in, VmOutputData* out)
{

    float  *sarg1 = in->sarg1, *sarg2 = in->sarg2, *sres = out->sres1[kFunc], *sires = out->sres1[kFuncI], *smres = out->sres1[kMFunc], *smires = out->sres1[kMFuncI];
    double *darg1 = in->darg1, *darg2 = in->darg2, *dres = out->dres1[kFunc], *dires = out->dres1[kFuncI], *dmres = out->dres1[kMFunc], *dmires = out->dres1[kMFuncI];

    #pragma omp target data map(to:sarg1[0:length]) map(to:darg1[0:length]) map(to:sarg2[0:length]) map(to:darg2[0:length])\
                            map(tofrom:sres[0:length]) map(tofrom:sires[0:length]) map(tofrom:smres[0:length]) map(tofrom:smires[0:length]) \
                            map(tofrom:dres[0:length]) map(tofrom:dires[0:length]) map(tofrom:dmres[0:length]) map(tofrom:dmires[0:length]) \
                            device(devnum)
    {
        int mode = vm_mode[acc];

        #pragma omp dispatch device(devnum)
        vmlSetMode(mode);

        #pragma omp dispatch device(devnum)
        vsFmod_64   (length, sarg1, sarg2, sres);
        #pragma omp dispatch device(devnum)
        vsFmodI_64  (length/stride, sarg1, stride, sarg2, stride, sires, stride);
        #pragma omp dispatch device(devnum)
        vdFmod_64   (length, darg1, darg2, dres);
        #pragma omp dispatch device(devnum)
        vdFmodI_64  (length/stride, darg1, stride, darg2, stride, dires, stride);

        #pragma omp dispatch device(devnum)
        vmsFmod_64  (length, sarg1, sarg2, smres, mode);
        #pragma omp dispatch device(devnum)
        vmsFmodI_64 (length/stride, sarg1, stride, sarg2, stride, smires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmdFmod_64  (length, darg1, darg2, dmres, mode);
        #pragma omp dispatch device(devnum)
        vmdFmodI_64 (length/stride, darg1, stride, darg2, stride, dmires, stride, mode);

    }
    // #pragma omp taskwait

    return;
} /* own_vm_fmod */

/**
 * Hypot
 */
static void own_vm_hypot (int acc, VmInputData* in, VmOutputData* out)
{

    float  *sarg1 = in->sarg1, *sarg2 = in->sarg2, *sres = out->sres1[kFunc], *sires = out->sres1[kFuncI], *smres = out->sres1[kMFunc], *smires = out->sres1[kMFuncI];
    double *darg1 = in->darg1, *darg2 = in->darg2, *dres = out->dres1[kFunc], *dires = out->dres1[kFuncI], *dmres = out->dres1[kMFunc], *dmires = out->dres1[kMFuncI];

    #pragma omp target data map(to:sarg1[0:length]) map(to:darg1[0:length]) map(to:sarg2[0:length]) map(to:darg2[0:length])\
                            map(tofrom:sres[0:length]) map(tofrom:sires[0:length]) map(tofrom:smres[0:length]) map(tofrom:smires[0:length]) \
                            map(tofrom:dres[0:length]) map(tofrom:dires[0:length]) map(tofrom:dmres[0:length]) map(tofrom:dmires[0:length]) \
                            device(devnum)
    {
        int mode = vm_mode[acc];

        #pragma omp dispatch device(devnum)
        vmlSetMode(mode);

        #pragma omp dispatch device(devnum)
        vsHypot_64   (length, sarg1, sarg2, sres);
        #pragma omp dispatch device(devnum)
        vsHypotI_64  (length/stride, sarg1, stride, sarg2, stride, sires, stride);
        #pragma omp dispatch device(devnum)
        vdHypot_64   (length, darg1, darg2, dres);
        #pragma omp dispatch device(devnum)
        vdHypotI_64  (length/stride, darg1, stride, darg2, stride, dires, stride);

        #pragma omp dispatch device(devnum)
        vmsHypot_64  (length, sarg1, sarg2, smres, mode);
        #pragma omp dispatch device(devnum)
        vmsHypotI_64 (length/stride, sarg1, stride, sarg2, stride, smires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmdHypot_64  (length, darg1, darg2, dmres, mode);
        #pragma omp dispatch device(devnum)
        vmdHypotI_64 (length/stride, darg1, stride, darg2, stride, dmires, stride, mode);

    }
    // #pragma omp taskwait

    return;
} /* own_vm_hypot */

/**
 * NextAfter
 */
static void own_vm_nextafter (int acc, VmInputData* in, VmOutputData* out)
{

    float  *sarg1 = in->sarg1, *sarg2 = in->sarg2, *sres = out->sres1[kFunc], *sires = out->sres1[kFuncI], *smres = out->sres1[kMFunc], *smires = out->sres1[kMFuncI];
    double *darg1 = in->darg1, *darg2 = in->darg2, *dres = out->dres1[kFunc], *dires = out->dres1[kFuncI], *dmres = out->dres1[kMFunc], *dmires = out->dres1[kMFuncI];

    #pragma omp target data map(to:sarg1[0:length]) map(to:darg1[0:length]) map(to:sarg2[0:length]) map(to:darg2[0:length])\
                            map(tofrom:sres[0:length]) map(tofrom:sires[0:length]) map(tofrom:smres[0:length]) map(tofrom:smires[0:length]) \
                            map(tofrom:dres[0:length]) map(tofrom:dires[0:length]) map(tofrom:dmres[0:length]) map(tofrom:dmires[0:length]) \
                            device(devnum)
    {
        int mode = vm_mode[acc];

        #pragma omp dispatch device(devnum)
        vmlSetMode(mode);

        #pragma omp dispatch device(devnum)
        vsNextAfter_64   (length, sarg1, sarg2, sres);
        #pragma omp dispatch device(devnum)
        vsNextAfterI_64  (length/stride, sarg1, stride, sarg2, stride, sires, stride);
        #pragma omp dispatch device(devnum)
        vdNextAfter_64   (length, darg1, darg2, dres);
        #pragma omp dispatch device(devnum)
        vdNextAfterI_64  (length/stride, darg1, stride, darg2, stride, dires, stride);

        #pragma omp dispatch device(devnum)
        vmsNextAfter_64  (length, sarg1, sarg2, smres, mode);
        #pragma omp dispatch device(devnum)
        vmsNextAfterI_64 (length/stride, sarg1, stride, sarg2, stride, smires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmdNextAfter_64  (length, darg1, darg2, dmres, mode);
        #pragma omp dispatch device(devnum)
        vmdNextAfterI_64 (length/stride, darg1, stride, darg2, stride, dmires, stride, mode);

    }
    // #pragma omp taskwait

    return;
} /* own_vm_nextafter */

/**
 * Powr
 */
static void own_vm_powr (int acc, VmInputData* in, VmOutputData* out)
{

    float  *sarg1 = in->sarg1, *sarg2 = in->sarg2, *sres = out->sres1[kFunc], *sires = out->sres1[kFuncI], *smres = out->sres1[kMFunc], *smires = out->sres1[kMFuncI];
    double *darg1 = in->darg1, *darg2 = in->darg2, *dres = out->dres1[kFunc], *dires = out->dres1[kFuncI], *dmres = out->dres1[kMFunc], *dmires = out->dres1[kMFuncI];

    #pragma omp target data map(to:sarg1[0:length]) map(to:darg1[0:length]) map(to:sarg2[0:length]) map(to:darg2[0:length])\
                            map(tofrom:sres[0:length]) map(tofrom:sires[0:length]) map(tofrom:smres[0:length]) map(tofrom:smires[0:length]) \
                            map(tofrom:dres[0:length]) map(tofrom:dires[0:length]) map(tofrom:dmres[0:length]) map(tofrom:dmires[0:length]) \
                            device(devnum)
    {
        int mode = vm_mode[acc];

        #pragma omp dispatch device(devnum)
        vmlSetMode(mode);

        #pragma omp dispatch device(devnum)
        vsPowr_64   (length, sarg1, sarg2, sres);
        #pragma omp dispatch device(devnum)
        vsPowrI_64  (length/stride, sarg1, stride, sarg2, stride, sires, stride);
        #pragma omp dispatch device(devnum)
        vdPowr_64   (length, darg1, darg2, dres);
        #pragma omp dispatch device(devnum)
        vdPowrI_64  (length/stride, darg1, stride, darg2, stride, dires, stride);

        #pragma omp dispatch device(devnum)
        vmsPowr_64  (length, sarg1, sarg2, smres, mode);
        #pragma omp dispatch device(devnum)
        vmsPowrI_64 (length/stride, sarg1, stride, sarg2, stride, smires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmdPowr_64  (length, darg1, darg2, dmres, mode);
        #pragma omp dispatch device(devnum)
        vmdPowrI_64 (length/stride, darg1, stride, darg2, stride, dmires, stride, mode);

    }
    // #pragma omp taskwait

    return;
} /* own_vm_powr */

/**
 * Pow2o3
 */
static void own_vm_pow2o3 (int acc, VmInputData* in, VmOutputData* out)
{

    float  *sarg = in->sarg1, *sres = out->sres1[kFunc], *sires = out->sres1[kFuncI], *smres = out->sres1[kMFunc], *smires = out->sres1[kMFuncI];
    double *darg = in->darg1, *dres = out->dres1[kFunc], *dires = out->dres1[kFuncI], *dmres = out->dres1[kMFunc], *dmires = out->dres1[kMFuncI];

    #pragma omp target data map(to:sarg[0:length]) map(to:darg[0:length]) \
                            map(tofrom:sres[0:length]) map(tofrom:sires[0:length]) map(tofrom:smres[0:length]) map(tofrom:smires[0:length]) \
                            map(tofrom:dres[0:length]) map(tofrom:dires[0:length]) map(tofrom:dmres[0:length]) map(tofrom:dmires[0:length]) \
                            device(devnum)
    {
        int mode = vm_mode[acc];

        #pragma omp dispatch device(devnum)
        vmlSetMode(mode);

        #pragma omp dispatch device(devnum)
        vsPow2o3_64   (length, sarg, sres);
        #pragma omp dispatch device(devnum)
        vsPow2o3I_64  (length/stride, sarg, stride, sires, stride);
        #pragma omp dispatch device(devnum)
        vdPow2o3_64   (length, darg, dres);
        #pragma omp dispatch device(devnum)
        vdPow2o3I_64  (length/stride, darg, stride, dires, stride);

        #pragma omp dispatch device(devnum)
        vmsPow2o3_64  (length, sarg, smres, mode);
        #pragma omp dispatch device(devnum)
        vmsPow2o3I_64 (length/stride, sarg, stride, smires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmdPow2o3_64  (length, darg, dmres, mode);
        #pragma omp dispatch device(devnum)
        vmdPow2o3I_64 (length/stride, darg, stride, dmires, stride, mode);

    }
    // #pragma omp taskwait

    return;
} /* own_vm_pow2o3 */

/**
 * Pow3o2
 */
static void own_vm_pow3o2 (int acc, VmInputData* in, VmOutputData* out)
{

    float  *sarg = in->sarg1, *sres = out->sres1[kFunc], *sires = out->sres1[kFuncI], *smres = out->sres1[kMFunc], *smires = out->sres1[kMFuncI];
    double *darg = in->darg1, *dres = out->dres1[kFunc], *dires = out->dres1[kFuncI], *dmres = out->dres1[kMFunc], *dmires = out->dres1[kMFuncI];

    #pragma omp target data map(to:sarg[0:length]) map(to:darg[0:length]) \
                            map(tofrom:sres[0:length]) map(tofrom:sires[0:length]) map(tofrom:smres[0:length]) map(tofrom:smires[0:length]) \
                            map(tofrom:dres[0:length]) map(tofrom:dires[0:length]) map(tofrom:dmres[0:length]) map(tofrom:dmires[0:length]) \
                            device(devnum)
    {
        int mode = vm_mode[acc];

        #pragma omp dispatch device(devnum)
        vmlSetMode(mode);

        #pragma omp dispatch device(devnum)
        vsPow3o2_64   (length, sarg, sres);
        #pragma omp dispatch device(devnum)
        vsPow3o2I_64  (length/stride, sarg, stride, sires, stride);
        #pragma omp dispatch device(devnum)
        vdPow3o2_64   (length, darg, dres);
        #pragma omp dispatch device(devnum)
        vdPow3o2I_64  (length/stride, darg, stride, dires, stride);

        #pragma omp dispatch device(devnum)
        vmsPow3o2_64  (length, sarg, smres, mode);
        #pragma omp dispatch device(devnum)
        vmsPow3o2I_64 (length/stride, sarg, stride, smires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmdPow3o2_64  (length, darg, dmres, mode);
        #pragma omp dispatch device(devnum)
        vmdPow3o2I_64 (length/stride, darg, stride, dmires, stride, mode);

    }
    // #pragma omp taskwait

    return;
} /* own_vm_pow3o2 */

/**
 * Cbrt
 */
static void own_vm_cbrt (int acc, VmInputData* in, VmOutputData* out)
{

    float  *sarg = in->sarg1, *sres = out->sres1[kFunc], *sires = out->sres1[kFuncI], *smres = out->sres1[kMFunc], *smires = out->sres1[kMFuncI];
    double *darg = in->darg1, *dres = out->dres1[kFunc], *dires = out->dres1[kFuncI], *dmres = out->dres1[kMFunc], *dmires = out->dres1[kMFuncI];

    #pragma omp target data map(to:sarg[0:length]) map(to:darg[0:length]) \
                            map(tofrom:sres[0:length]) map(tofrom:sires[0:length]) map(tofrom:smres[0:length]) map(tofrom:smires[0:length]) \
                            map(tofrom:dres[0:length]) map(tofrom:dires[0:length]) map(tofrom:dmres[0:length]) map(tofrom:dmires[0:length]) \
                            device(devnum)
    {
        int mode = vm_mode[acc];

        #pragma omp dispatch device(devnum)
        vmlSetMode(mode);

        #pragma omp dispatch device(devnum)
        vsCbrt_64   (length, sarg, sres);
        #pragma omp dispatch device(devnum)
        vsCbrtI_64  (length/stride, sarg, stride, sires, stride);
        #pragma omp dispatch device(devnum)
        vdCbrt_64   (length, darg, dres);
        #pragma omp dispatch device(devnum)
        vdCbrtI_64  (length/stride, darg, stride, dires, stride);

        #pragma omp dispatch device(devnum)
        vmsCbrt_64  (length, sarg, smres, mode);
        #pragma omp dispatch device(devnum)
        vmsCbrtI_64 (length/stride, sarg, stride, smires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmdCbrt_64  (length, darg, dmres, mode);
        #pragma omp dispatch device(devnum)
        vmdCbrtI_64 (length/stride, darg, stride, dmires, stride, mode);

    }
    // #pragma omp taskwait

    return;
} /* own_vm_cbrt */

/**
 * InvCbrt
 */
static void own_vm_invcbrt (int acc, VmInputData* in, VmOutputData* out)
{

    float  *sarg = in->sarg1, *sres = out->sres1[kFunc], *sires = out->sres1[kFuncI], *smres = out->sres1[kMFunc], *smires = out->sres1[kMFuncI];
    double *darg = in->darg1, *dres = out->dres1[kFunc], *dires = out->dres1[kFuncI], *dmres = out->dres1[kMFunc], *dmires = out->dres1[kMFuncI];

    #pragma omp target data map(to:sarg[0:length]) map(to:darg[0:length]) \
                            map(tofrom:sres[0:length]) map(tofrom:sires[0:length]) map(tofrom:smres[0:length]) map(tofrom:smires[0:length]) \
                            map(tofrom:dres[0:length]) map(tofrom:dires[0:length]) map(tofrom:dmres[0:length]) map(tofrom:dmires[0:length]) \
                            device(devnum)
    {
        int mode = vm_mode[acc];

        #pragma omp dispatch device(devnum)
        vmlSetMode(mode);

        #pragma omp dispatch device(devnum)
        vsInvCbrt_64   (length, sarg, sres);
        #pragma omp dispatch device(devnum)
        vsInvCbrtI_64  (length/stride, sarg, stride, sires, stride);
        #pragma omp dispatch device(devnum)
        vdInvCbrt_64   (length, darg, dres);
        #pragma omp dispatch device(devnum)
        vdInvCbrtI_64  (length/stride, darg, stride, dires, stride);

        #pragma omp dispatch device(devnum)
        vmsInvCbrt_64  (length, sarg, smres, mode);
        #pragma omp dispatch device(devnum)
        vmsInvCbrtI_64 (length/stride, sarg, stride, smires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmdInvCbrt_64  (length, darg, dmres, mode);
        #pragma omp dispatch device(devnum)
        vmdInvCbrtI_64 (length/stride, darg, stride, dmires, stride, mode);

    }
    // #pragma omp taskwait

    return;
} /* own_vm_invcbrt */

/**
 * InvSqrt
 */
static void own_vm_invsqrt (int acc, VmInputData* in, VmOutputData* out)
{

    float  *sarg = in->sarg1, *sres = out->sres1[kFunc], *sires = out->sres1[kFuncI], *smres = out->sres1[kMFunc], *smires = out->sres1[kMFuncI];
    double *darg = in->darg1, *dres = out->dres1[kFunc], *dires = out->dres1[kFuncI], *dmres = out->dres1[kMFunc], *dmires = out->dres1[kMFuncI];

    #pragma omp target data map(to:sarg[0:length]) map(to:darg[0:length]) \
                            map(tofrom:sres[0:length]) map(tofrom:sires[0:length]) map(tofrom:smres[0:length]) map(tofrom:smires[0:length]) \
                            map(tofrom:dres[0:length]) map(tofrom:dires[0:length]) map(tofrom:dmres[0:length]) map(tofrom:dmires[0:length]) \
                            device(devnum)
    {
        int mode = vm_mode[acc];

        #pragma omp dispatch device(devnum)
        vmlSetMode(mode);

        #pragma omp dispatch device(devnum)
        vsInvSqrt_64   (length, sarg, sres);
        #pragma omp dispatch device(devnum)
        vsInvSqrtI_64  (length/stride, sarg, stride, sires, stride);
        #pragma omp dispatch device(devnum)
        vdInvSqrt_64   (length, darg, dres);
        #pragma omp dispatch device(devnum)
        vdInvSqrtI_64  (length/stride, darg, stride, dires, stride);

        #pragma omp dispatch device(devnum)
        vmsInvSqrt_64  (length, sarg, smres, mode);
        #pragma omp dispatch device(devnum)
        vmsInvSqrtI_64 (length/stride, sarg, stride, smires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmdInvSqrt_64  (length, darg, dmres, mode);
        #pragma omp dispatch device(devnum)
        vmdInvSqrtI_64 (length/stride, darg, stride, dmires, stride, mode);

    }
    // #pragma omp taskwait

    return;
} /* own_vm_invsqrt */

/**
 * Remainder
 */
static void own_vm_remainder (int acc, VmInputData* in, VmOutputData* out)
{

    float  *sarg1 = in->sarg1, *sarg2 = in->sarg2, *sres = out->sres1[kFunc], *sires = out->sres1[kFuncI], *smres = out->sres1[kMFunc], *smires = out->sres1[kMFuncI];
    double *darg1 = in->darg1, *darg2 = in->darg2, *dres = out->dres1[kFunc], *dires = out->dres1[kFuncI], *dmres = out->dres1[kMFunc], *dmires = out->dres1[kMFuncI];

    #pragma omp target data map(to:sarg1[0:length]) map(to:darg1[0:length]) map(to:sarg2[0:length]) map(to:darg2[0:length])\
                            map(tofrom:sres[0:length]) map(tofrom:sires[0:length]) map(tofrom:smres[0:length]) map(tofrom:smires[0:length]) \
                            map(tofrom:dres[0:length]) map(tofrom:dires[0:length]) map(tofrom:dmres[0:length]) map(tofrom:dmires[0:length]) \
                            device(devnum)
    {
        int mode = vm_mode[acc];

        #pragma omp dispatch device(devnum)
        vmlSetMode(mode);

        #pragma omp dispatch device(devnum)
        vsRemainder_64   (length, sarg1, sarg2, sres);
        #pragma omp dispatch device(devnum)
        vsRemainderI_64  (length/stride, sarg1, stride, sarg2, stride, sires, stride);
        #pragma omp dispatch device(devnum)
        vdRemainder_64   (length, darg1, darg2, dres);
        #pragma omp dispatch device(devnum)
        vdRemainderI_64  (length/stride, darg1, stride, darg2, stride, dires, stride);

        #pragma omp dispatch device(devnum)
        vmsRemainder_64  (length, sarg1, sarg2, smres, mode);
        #pragma omp dispatch device(devnum)
        vmsRemainderI_64 (length/stride, sarg1, stride, sarg2, stride, smires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmdRemainder_64  (length, darg1, darg2, dmres, mode);
        #pragma omp dispatch device(devnum)
        vmdRemainderI_64 (length/stride, darg1, stride, darg2, stride, dmires, stride, mode);

    }
    // #pragma omp taskwait

    return;
} /* own_vm_remainder */

/**
 * Div
 */
static void own_vm_div (int acc, VmInputData* in, VmOutputData* out)
{

    float  *sarg1 = in->sarg1, *sarg2 = in->sarg2, *sres = out->sres1[kFunc], *sires = out->sres1[kFuncI], *smres = out->sres1[kMFunc], *smires = out->sres1[kMFuncI];
    double *darg1 = in->darg1, *darg2 = in->darg2, *dres = out->dres1[kFunc], *dires = out->dres1[kFuncI], *dmres = out->dres1[kMFunc], *dmires = out->dres1[kMFuncI];
    MKL_Complex8  *carg1 = (MKL_Complex8*)in->carg1,  *carg2 = (MKL_Complex8*)in->carg2,  *cres = (MKL_Complex8*)out->cres1[kFunc];
    MKL_Complex8  *cires = (MKL_Complex8*)out->cres1[kFuncI],  *cmres = (MKL_Complex8*)out->cres1[kMFunc],  *cmires = (MKL_Complex8*)out->cres1[kMFuncI];
    MKL_Complex16 *zarg1 = (MKL_Complex16*)in->zarg1, *zarg2 = (MKL_Complex16*)in->zarg2, *zres = (MKL_Complex16*)out->zres1[kFunc];
    MKL_Complex16 *zires = (MKL_Complex16*)out->zres1[kFuncI], *zmres = (MKL_Complex16*)out->zres1[kMFunc], *zmires = (MKL_Complex16*)out->zres1[kMFuncI];

    #pragma omp target data map(to:sarg1[0:length]) map(to:darg1[0:length]) map(to:sarg2[0:length]) map(to:darg2[0:length])\
                            map(to:carg1[0:length]) map(to:zarg1[0:length]) map(to:carg2[0:length]) map(to:zarg2[0:length])\
                            map(tofrom:sres[0:length]) map(tofrom:sires[0:length]) map(tofrom:smres[0:length]) map(tofrom:smires[0:length]) \
                            map(tofrom:dres[0:length]) map(tofrom:dires[0:length]) map(tofrom:dmres[0:length]) map(tofrom:dmires[0:length]) \
                            map(tofrom:cres[0:length]) map(tofrom:cires[0:length]) map(tofrom:cmres[0:length]) map(tofrom:cmires[0:length]) \
                            map(tofrom:zres[0:length]) map(tofrom:zires[0:length]) map(tofrom:zmres[0:length]) map(tofrom:zmires[0:length]) \
                            device(devnum)
    {
        int mode = vm_mode[acc];

        #pragma omp dispatch device(devnum)
        vmlSetMode(mode);

        #pragma omp dispatch device(devnum)
        vsDiv_64   (length, sarg1, sarg2, sres);
        #pragma omp dispatch device(devnum)
        vsDivI_64  (length/stride, sarg1, stride, sarg2, stride, sires, stride);
        #pragma omp dispatch device(devnum)
        vdDiv_64   (length, darg1, darg2, dres);
        #pragma omp dispatch device(devnum)
        vdDivI_64  (length/stride, darg1, stride, darg2, stride, dires, stride);
        #pragma omp dispatch device(devnum)
        vcDiv_64   (length, carg1, carg2, cres);
        #pragma omp dispatch device(devnum)
        vcDivI_64  (length/stride, carg1, stride, carg2, stride, cires, stride);
        #pragma omp dispatch device(devnum)
        vzDiv_64   (length, zarg1, zarg2, zres);
        #pragma omp dispatch device(devnum)
        vzDivI_64  (length/stride, zarg1, stride, zarg2, stride, zires, stride);

        #pragma omp dispatch device(devnum)
        vmsDiv_64  (length, sarg1, sarg2, smres, mode);
        #pragma omp dispatch device(devnum)
        vmsDivI_64 (length/stride, sarg1, stride, sarg2, stride, smires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmdDiv_64  (length, darg1, darg2, dmres, mode);
        #pragma omp dispatch device(devnum)
        vmdDivI_64 (length/stride, darg1, stride, darg2, stride, dmires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmcDiv_64  (length, carg1, carg2, cmres, mode);
        #pragma omp dispatch device(devnum)
        vmcDivI_64 (length/stride, carg1, stride, carg2, stride, cmires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmzDiv_64  (length, zarg1, zarg2, zmres, mode);
        #pragma omp dispatch device(devnum)
        vmzDivI_64 (length/stride, zarg1, stride, zarg2, stride, zmires, stride, mode);

    }
    // #pragma omp taskwait

    return;
} /* own_vm_div */

/**
 * Pow
 */
static void own_vm_pow (int acc, VmInputData* in, VmOutputData* out)
{

    float  *sarg1 = in->sarg1, *sarg2 = in->sarg2, *sres = out->sres1[kFunc], *sires = out->sres1[kFuncI], *smres = out->sres1[kMFunc], *smires = out->sres1[kMFuncI];
    double *darg1 = in->darg1, *darg2 = in->darg2, *dres = out->dres1[kFunc], *dires = out->dres1[kFuncI], *dmres = out->dres1[kMFunc], *dmires = out->dres1[kMFuncI];
    MKL_Complex8  *carg1 = (MKL_Complex8*)in->carg1,  *carg2 = (MKL_Complex8*)in->carg2,  *cres = (MKL_Complex8*)out->cres1[kFunc];
    MKL_Complex8  *cires = (MKL_Complex8*)out->cres1[kFuncI],  *cmres = (MKL_Complex8*)out->cres1[kMFunc],  *cmires = (MKL_Complex8*)out->cres1[kMFuncI];
    MKL_Complex16 *zarg1 = (MKL_Complex16*)in->zarg1, *zarg2 = (MKL_Complex16*)in->zarg2, *zres = (MKL_Complex16*)out->zres1[kFunc];
    MKL_Complex16 *zires = (MKL_Complex16*)out->zres1[kFuncI], *zmres = (MKL_Complex16*)out->zres1[kMFunc], *zmires = (MKL_Complex16*)out->zres1[kMFuncI];

    #pragma omp target data map(to:sarg1[0:length]) map(to:darg1[0:length]) map(to:sarg2[0:length]) map(to:darg2[0:length])\
                            map(to:carg1[0:length]) map(to:zarg1[0:length]) map(to:carg2[0:length]) map(to:zarg2[0:length])\
                            map(tofrom:sres[0:length]) map(tofrom:sires[0:length]) map(tofrom:smres[0:length]) map(tofrom:smires[0:length]) \
                            map(tofrom:dres[0:length]) map(tofrom:dires[0:length]) map(tofrom:dmres[0:length]) map(tofrom:dmires[0:length]) \
                            map(tofrom:cres[0:length]) map(tofrom:cires[0:length]) map(tofrom:cmres[0:length]) map(tofrom:cmires[0:length]) \
                            map(tofrom:zres[0:length]) map(tofrom:zires[0:length]) map(tofrom:zmres[0:length]) map(tofrom:zmires[0:length]) \
                            device(devnum)
    {
        int mode = vm_mode[acc];

        #pragma omp dispatch device(devnum)
        vmlSetMode(mode);

        #pragma omp dispatch device(devnum)
        vsPow_64   (length, sarg1, sarg2, sres);
        #pragma omp dispatch device(devnum)
        vsPowI_64  (length/stride, sarg1, stride, sarg2, stride, sires, stride);
        #pragma omp dispatch device(devnum)
        vdPow_64   (length, darg1, darg2, dres);
        #pragma omp dispatch device(devnum)
        vdPowI_64  (length/stride, darg1, stride, darg2, stride, dires, stride);
        #pragma omp dispatch device(devnum)
        vcPow_64   (length, carg1, carg2, cres);
        #pragma omp dispatch device(devnum)
        vcPowI_64  (length/stride, carg1, stride, carg2, stride, cires, stride);
        #pragma omp dispatch device(devnum)
        vzPow_64   (length, zarg1, zarg2, zres);
        #pragma omp dispatch device(devnum)
        vzPowI_64  (length/stride, zarg1, stride, zarg2, stride, zires, stride);

        #pragma omp dispatch device(devnum)
        vmsPow_64  (length, sarg1, sarg2, smres, mode);
        #pragma omp dispatch device(devnum)
        vmsPowI_64 (length/stride, sarg1, stride, sarg2, stride, smires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmdPow_64  (length, darg1, darg2, dmres, mode);
        #pragma omp dispatch device(devnum)
        vmdPowI_64 (length/stride, darg1, stride, darg2, stride, dmires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmcPow_64  (length, carg1, carg2, cmres, mode);
        #pragma omp dispatch device(devnum)
        vmcPowI_64 (length/stride, carg1, stride, carg2, stride, cmires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmzPow_64  (length, zarg1, zarg2, zmres, mode);
        #pragma omp dispatch device(devnum)
        vmzPowI_64 (length/stride, zarg1, stride, zarg2, stride, zmires, stride, mode);

    }
    // #pragma omp taskwait

    return;
} /* own_vm_div */

/**
 * Powx
 */
static void own_vm_powx (int acc, VmInputData* in, VmOutputData* out)
{

    float  *sarg1 = in->sarg1, sarg2 = fixed, *sres = out->sres1[kFunc], *sires = out->sres1[kFuncI], *smres = out->sres1[kMFunc], *smires = out->sres1[kMFuncI];
    double *darg1 = in->darg1, darg2 = fixed, *dres = out->dres1[kFunc], *dires = out->dres1[kFuncI], *dmres = out->dres1[kMFunc], *dmires = out->dres1[kMFuncI];
    MKL_Complex8  *carg1 = (MKL_Complex8*)in->carg1,  carg2 = {fixed, fixed}, *cres = (MKL_Complex8*)out->cres1[kFunc],  *cires = (MKL_Complex8*)out->cres1[kFuncI];
    MKL_Complex8  *cmres = (MKL_Complex8*)out->cres1[kMFunc],  *cmires = (MKL_Complex8*)out->cres1[kMFuncI];
    MKL_Complex16 *zarg1 = (MKL_Complex16*)in->zarg1, zarg2 = {fixed, fixed}, *zres = (MKL_Complex16*)out->zres1[kFunc], *zires = (MKL_Complex16*)out->zres1[kFuncI];
    MKL_Complex16 *zmres = (MKL_Complex16*)out->zres1[kMFunc], *zmires = (MKL_Complex16*)out->zres1[kMFuncI];

    #pragma omp target data map(to:sarg1[0:length]) map(to:darg1[0:length])\
                            map(to:carg1[0:length]) map(to:zarg1[0:length])\
                            map(tofrom:sres[0:length]) map(tofrom:sires[0:length]) map(tofrom:smres[0:length]) map(tofrom:smires[0:length]) \
                            map(tofrom:dres[0:length]) map(tofrom:dires[0:length]) map(tofrom:dmres[0:length]) map(tofrom:dmires[0:length]) \
                            map(tofrom:cres[0:length]) map(tofrom:cires[0:length]) map(tofrom:cmres[0:length]) map(tofrom:cmires[0:length]) \
                            map(tofrom:zres[0:length]) map(tofrom:zires[0:length]) map(tofrom:zmres[0:length]) map(tofrom:zmires[0:length]) \
                            device(devnum)
    {
        int mode = vm_mode[acc];

        #pragma omp dispatch device(devnum)
        vmlSetMode(mode);

        #pragma omp dispatch device(devnum)
        vsPowx_64   (length, sarg1, sarg2, sres);
        #pragma omp dispatch device(devnum)
        vsPowxI_64  (length/stride, sarg1, stride, sarg2, sires, stride);
        #pragma omp dispatch device(devnum)
        vdPowx_64   (length, darg1, darg2, dres);
        #pragma omp dispatch device(devnum)
        vdPowxI_64  (length/stride, darg1, stride, darg2, dires, stride);
        #pragma omp dispatch device(devnum)
        vcPowx_64   (length, carg1, carg2, cres);
        #pragma omp dispatch device(devnum)
        vcPowxI_64  (length/stride, carg1, stride, carg2, cires, stride);
        #pragma omp dispatch device(devnum)
        vzPowx_64   (length, zarg1, zarg2, zres);
        #pragma omp dispatch device(devnum)
        vzPowxI_64  (length/stride, zarg1, stride, zarg2, zires, stride);

        #pragma omp dispatch device(devnum)
        vmsPowx_64  (length, sarg1, sarg2, smres, mode);
        #pragma omp dispatch device(devnum)
        vmsPowxI_64 (length/stride, sarg1, stride, sarg2, smires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmdPowx_64  (length, darg1, darg2, dmres, mode);
        #pragma omp dispatch device(devnum)
        vmdPowxI_64 (length/stride, darg1, stride, darg2, dmires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmcPowx_64  (length, carg1, carg2, cmres, mode);
        #pragma omp dispatch device(devnum)
        vmcPowxI_64 (length/stride, carg1, stride, carg2, cmires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmzPowx_64  (length, zarg1, zarg2, zmres, mode);
        #pragma omp dispatch device(devnum)
        vmzPowxI_64 (length/stride, zarg1, stride, zarg2, zmires, stride, mode);

    }
    // #pragma omp taskwait

    return;
} /* own_vm_powx */

/**
 * LinearFrac
 */
static void own_vm_linearfrac (int acc, VmInputData* in, VmOutputData* out)
{

    float  *sarg1 = in->sarg1, *sarg2 = in->sarg2, *sres = out->sres1[kFunc], *sires = out->sres1[kFuncI];
    float  *smres = out->sres1[kMFunc], *smires = out->sres1[kMFuncI];
    double *darg1 = in->darg1, *darg2 = in->darg2, *dres = out->dres1[kFunc], *dires = out->dres1[kFuncI];
    double *dmres = out->dres1[kMFunc], *dmires = out->dres1[kMFuncI];

    #pragma omp target data map(to:sarg1[0:length]) map(to:darg1[0:length]) map(to:sarg2[0:length]) map(to:darg2[0:length]) \
                            map(tofrom:sres[0:length]) map(tofrom:sires[0:length]) map(tofrom:smres[0:length]) map(tofrom:smires[0:length]) \
                            map(tofrom:dres[0:length]) map(tofrom:dires[0:length]) map(tofrom:dmres[0:length]) map(tofrom:dmires[0:length]) \
                            device(devnum)
    {
        float  sfixed = (float)fixed;
        double dfixed = (double)fixed;
        int mode = vm_mode[acc];

        #pragma omp dispatch device(devnum)
        vmlSetMode(mode);

        #pragma omp dispatch device(devnum)
        vsLinearFrac_64   (length, sarg1, sarg2, sfixed, sfixed, sfixed, sfixed, sres);
        #pragma omp dispatch device(devnum)
        vsLinearFracI_64  (length/stride, sarg1, stride, sarg2, stride, sfixed, sfixed, sfixed, sfixed, sires, stride);
        #pragma omp dispatch device(devnum)
        vdLinearFrac_64   (length, darg1, darg2, dfixed, dfixed, dfixed, dfixed, dres);
        #pragma omp dispatch device(devnum)
        vdLinearFracI_64  (length/stride, darg1, stride, darg2, stride, dfixed, dfixed, dfixed, dfixed, dires, stride);

        #pragma omp dispatch device(devnum)
        vmsLinearFrac_64  (length, sarg1, sarg2, sfixed, sfixed, sfixed, sfixed, smres, mode);
        #pragma omp dispatch device(devnum)
        vmsLinearFracI_64 (length/stride, sarg1, stride, sarg2, stride, sfixed, sfixed, sfixed, sfixed, smires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmdLinearFrac_64  (length, darg1, darg2, dfixed, dfixed, dfixed, dfixed, dmres, mode);
        #pragma omp dispatch device(devnum)
        vmdLinearFracI_64 (length/stride, darg1, stride, darg2, stride, dfixed, dfixed, dfixed, dfixed, dmires, stride, mode);

    }
    // #pragma omp taskwait

    return;
} /* own_vm_linearfrac */

/**
 * Sqrt
 */
static void own_vm_sqrt (int acc, VmInputData* in, VmOutputData* out)
{

    float  *sarg = in->sarg1, *sres = out->sres1[kFunc], *sires = out->sres1[kFuncI], *smres = out->sres1[kMFunc], *smires = out->sres1[kMFuncI];
    double *darg = in->darg1, *dres = out->dres1[kFunc], *dires = out->dres1[kFuncI], *dmres = out->dres1[kMFunc], *dmires = out->dres1[kMFuncI];
    MKL_Complex8  *carg  = (MKL_Complex8*)in->carg1,  *cres = (MKL_Complex8*)out->cres1[kFunc],  *cires = (MKL_Complex8*)out->cres1[kFuncI];
    MKL_Complex8  *cmres = (MKL_Complex8*)out->cres1[kMFunc],  *cmires = (MKL_Complex8*)out->cres1[kMFuncI];
    MKL_Complex16 *zarg  = (MKL_Complex16*)in->zarg1, *zres = (MKL_Complex16*)out->zres1[kFunc], *zires = (MKL_Complex16*)out->zres1[kFuncI];
    MKL_Complex16 *zmres = (MKL_Complex16*)out->zres1[kMFunc], *zmires = (MKL_Complex16*)out->zres1[kMFuncI];

    #pragma omp target data map(to:sarg[0:length]) map(to:darg[0:length]) \
                            map(to:carg[0:length]) map(to:zarg[0:length]) \
                            map(tofrom:sres[0:length]) map(tofrom:sires[0:length]) map(tofrom:smres[0:length]) map(tofrom:smires[0:length]) \
                            map(tofrom:dres[0:length]) map(tofrom:dires[0:length]) map(tofrom:dmres[0:length]) map(tofrom:dmires[0:length]) \
                            map(tofrom:cres[0:length]) map(tofrom:cires[0:length]) map(tofrom:cmres[0:length]) map(tofrom:cmires[0:length]) \
                            map(tofrom:zres[0:length]) map(tofrom:zires[0:length]) map(tofrom:zmres[0:length]) map(tofrom:zmires[0:length]) \
                            device(devnum)
    {
        int mode = vm_mode[acc];

        #pragma omp dispatch device(devnum)
        vmlSetMode(mode);

        #pragma omp dispatch device(devnum)
        vsSqrt_64   (length, sarg, sres);
        #pragma omp dispatch device(devnum)
        vsSqrtI_64  (length/stride, sarg, stride, sires, stride);
        #pragma omp dispatch device(devnum)
        vdSqrt_64   (length, darg, dres);
        #pragma omp dispatch device(devnum)
        vdSqrtI_64  (length/stride, darg, stride, dires, stride);
        #pragma omp dispatch device(devnum)
        vcSqrt_64   (length, carg, cres);
        #pragma omp dispatch device(devnum)
        vcSqrtI_64  (length/stride, carg, stride, cires, stride);
        #pragma omp dispatch device(devnum)
        vzSqrt_64   (length, zarg, zres);
        #pragma omp dispatch device(devnum)
        vzSqrtI_64  (length/stride, zarg, stride, zires, stride);

        #pragma omp dispatch device(devnum)
        vmsSqrt_64  (length, sarg, smres, mode);
        #pragma omp dispatch device(devnum)
        vmsSqrtI_64 (length/stride, sarg, stride, smires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmdSqrt_64  (length, darg, dmres, mode);
        #pragma omp dispatch device(devnum)
        vmdSqrtI_64 (length/stride, darg, stride, dmires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmcSqrt_64  (length, carg, cmres, mode);
        #pragma omp dispatch device(devnum)
        vmcSqrtI_64 (length/stride, carg, stride, cmires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmzSqrt_64  (length, zarg, zmres, mode);
        #pragma omp dispatch device(devnum)
        vmzSqrtI_64 (length/stride, zarg, stride, zmires, stride, mode);

    }
    // #pragma omp taskwait

    return;
} /* own_vm_sqrt */

/**
 * Ln
 */
static void own_vm_ln (int acc, VmInputData* in, VmOutputData* out)
{

    float  *sarg = in->sarg1, *sres = out->sres1[kFunc], *sires = out->sres1[kFuncI], *smres = out->sres1[kMFunc], *smires = out->sres1[kMFuncI];
    double *darg = in->darg1, *dres = out->dres1[kFunc], *dires = out->dres1[kFuncI], *dmres = out->dres1[kMFunc], *dmires = out->dres1[kMFuncI];
    MKL_Complex8  *carg  = (MKL_Complex8*)in->carg1,  *cres = (MKL_Complex8*)out->cres1[kFunc],  *cires = (MKL_Complex8*)out->cres1[kFuncI];
    MKL_Complex8  *cmres = (MKL_Complex8*)out->cres1[kMFunc],  *cmires = (MKL_Complex8*)out->cres1[kMFuncI];
    MKL_Complex16 *zarg  = (MKL_Complex16*)in->zarg1, *zres = (MKL_Complex16*)out->zres1[kFunc], *zires = (MKL_Complex16*)out->zres1[kFuncI];
    MKL_Complex16 *zmres = (MKL_Complex16*)out->zres1[kMFunc], *zmires = (MKL_Complex16*)out->zres1[kMFuncI];

    #pragma omp target data map(to:sarg[0:length]) map(to:darg[0:length]) \
                            map(to:carg[0:length]) map(to:zarg[0:length]) \
                            map(tofrom:sres[0:length]) map(tofrom:sires[0:length]) map(tofrom:smres[0:length]) map(tofrom:smires[0:length]) \
                            map(tofrom:dres[0:length]) map(tofrom:dires[0:length]) map(tofrom:dmres[0:length]) map(tofrom:dmires[0:length]) \
                            map(tofrom:cres[0:length]) map(tofrom:cires[0:length]) map(tofrom:cmres[0:length]) map(tofrom:cmires[0:length]) \
                            map(tofrom:zres[0:length]) map(tofrom:zires[0:length]) map(tofrom:zmres[0:length]) map(tofrom:zmires[0:length]) \
                            device(devnum)
    {
        int mode = vm_mode[acc];

        #pragma omp dispatch device(devnum)
        vmlSetMode(mode);

        #pragma omp dispatch device(devnum)
        vsLn_64   (length, sarg, sres);
        #pragma omp dispatch device(devnum)
        vsLnI_64  (length/stride, sarg, stride, sires, stride);
        #pragma omp dispatch device(devnum)
        vdLn_64   (length, darg, dres);
        #pragma omp dispatch device(devnum)
        vdLnI_64  (length/stride, darg, stride, dires, stride);
        #pragma omp dispatch device(devnum)
        vcLn_64   (length, carg, cres);
        #pragma omp dispatch device(devnum)
        vcLnI_64  (length/stride, carg, stride, cires, stride);
        #pragma omp dispatch device(devnum)
        vzLn_64   (length, zarg, zres);
        #pragma omp dispatch device(devnum)
        vzLnI_64  (length/stride, zarg, stride, zires, stride);

        #pragma omp dispatch device(devnum)
        vmsLn_64  (length, sarg, smres, mode);
        #pragma omp dispatch device(devnum)
        vmsLnI_64 (length/stride, sarg, stride, smires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmdLn_64  (length, darg, dmres, mode);
        #pragma omp dispatch device(devnum)
        vmdLnI_64 (length/stride, darg, stride, dmires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmcLn_64  (length, carg, cmres, mode);
        #pragma omp dispatch device(devnum)
        vmcLnI_64 (length/stride, carg, stride, cmires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmzLn_64  (length, zarg, zmres, mode);
        #pragma omp dispatch device(devnum)
        vmzLnI_64 (length/stride, zarg, stride, zmires, stride, mode);

    }
    // #pragma omp taskwait

    return;
} /* own_vm_ln */

/**
 * Sind
 */
static void own_vm_sind (int acc, VmInputData* in, VmOutputData* out)
{

    float  *sarg = in->sarg1, *sres = out->sres1[kFunc], *sires = out->sres1[kFuncI], *smres = out->sres1[kMFunc], *smires = out->sres1[kMFuncI];
    double *darg = in->darg1, *dres = out->dres1[kFunc], *dires = out->dres1[kFuncI], *dmres = out->dres1[kMFunc], *dmires = out->dres1[kMFuncI];

    #pragma omp target data map(to:sarg[0:length]) map(to:darg[0:length]) \
                            map(tofrom:sres[0:length]) map(tofrom:sires[0:length]) map(tofrom:smres[0:length]) map(tofrom:smires[0:length]) \
                            map(tofrom:dres[0:length]) map(tofrom:dires[0:length]) map(tofrom:dmres[0:length]) map(tofrom:dmires[0:length]) \
                            device(devnum)
    {
        int mode = vm_mode[acc];

        #pragma omp dispatch device(devnum)
        vmlSetMode(mode);

        #pragma omp dispatch device(devnum)
        vsSind_64   (length, sarg, sres);
        #pragma omp dispatch device(devnum)
        vsSindI_64  (length/stride, sarg, stride, sires, stride);
        #pragma omp dispatch device(devnum)
        vdSind_64   (length, darg, dres);
        #pragma omp dispatch device(devnum)
        vdSindI_64  (length/stride, darg, stride, dires, stride);

        #pragma omp dispatch device(devnum)
        vmsSind_64  (length, sarg, smres, mode);
        #pragma omp dispatch device(devnum)
        vmsSindI_64 (length/stride, sarg, stride, smires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmdSind_64  (length, darg, dmres, mode);
        #pragma omp dispatch device(devnum)
        vmdSindI_64 (length/stride, darg, stride, dmires, stride, mode);

    }
    // #pragma omp taskwait

    return;
} /* own_vm_sind */

/**
 * Cosd
 */
static void own_vm_cosd (int acc, VmInputData* in, VmOutputData* out)
{

    float  *sarg = in->sarg1, *sres = out->sres1[kFunc], *sires = out->sres1[kFuncI], *smres = out->sres1[kMFunc], *smires = out->sres1[kMFuncI];
    double *darg = in->darg1, *dres = out->dres1[kFunc], *dires = out->dres1[kFuncI], *dmres = out->dres1[kMFunc], *dmires = out->dres1[kMFuncI];

    #pragma omp target data map(to:sarg[0:length]) map(to:darg[0:length]) \
                            map(tofrom:sres[0:length]) map(tofrom:sires[0:length]) map(tofrom:smres[0:length]) map(tofrom:smires[0:length]) \
                            map(tofrom:dres[0:length]) map(tofrom:dires[0:length]) map(tofrom:dmres[0:length]) map(tofrom:dmires[0:length]) \
                            device(devnum)
    {
        int mode = vm_mode[acc];

        #pragma omp dispatch device(devnum)
        vmlSetMode(mode);

        #pragma omp dispatch device(devnum)
        vsCosd_64   (length, sarg, sres);
        #pragma omp dispatch device(devnum)
        vsCosdI_64  (length/stride, sarg, stride, sires, stride);
        #pragma omp dispatch device(devnum)
        vdCosd_64   (length, darg, dres);
        #pragma omp dispatch device(devnum)
        vdCosdI_64  (length/stride, darg, stride, dires, stride);

        #pragma omp dispatch device(devnum)
        vmsCosd_64  (length, sarg, smres, mode);
        #pragma omp dispatch device(devnum)
        vmsCosdI_64 (length/stride, sarg, stride, smires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmdCosd_64  (length, darg, dmres, mode);
        #pragma omp dispatch device(devnum)
        vmdCosdI_64 (length/stride, darg, stride, dmires, stride, mode);

    }
    // #pragma omp taskwait

    return;
} /* own_vm_cosd */

/**
 * Tand
 */
static void own_vm_tand (int acc, VmInputData* in, VmOutputData* out)
{

    float  *sarg = in->sarg1, *sres = out->sres1[kFunc], *sires = out->sres1[kFuncI], *smres = out->sres1[kMFunc], *smires = out->sres1[kMFuncI];
    double *darg = in->darg1, *dres = out->dres1[kFunc], *dires = out->dres1[kFuncI], *dmres = out->dres1[kMFunc], *dmires = out->dres1[kMFuncI];

    #pragma omp target data map(to:sarg[0:length]) map(to:darg[0:length]) \
                            map(tofrom:sres[0:length]) map(tofrom:sires[0:length]) map(tofrom:smres[0:length]) map(tofrom:smires[0:length]) \
                            map(tofrom:dres[0:length]) map(tofrom:dires[0:length]) map(tofrom:dmres[0:length]) map(tofrom:dmires[0:length]) \
                            device(devnum)
    {
        int mode = vm_mode[acc];

        #pragma omp dispatch device(devnum)
        vmlSetMode(mode);

        #pragma omp dispatch device(devnum)
        vsTand_64   (length, sarg, sres);
        #pragma omp dispatch device(devnum)
        vsTandI_64  (length/stride, sarg, stride, sires, stride);
        #pragma omp dispatch device(devnum)
        vdTand_64   (length, darg, dres);
        #pragma omp dispatch device(devnum)
        vdTandI_64  (length/stride, darg, stride, dires, stride);

        #pragma omp dispatch device(devnum)
        vmsTand_64  (length, sarg, smres, mode);
        #pragma omp dispatch device(devnum)
        vmsTandI_64 (length/stride, sarg, stride, smires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmdTand_64  (length, darg, dmres, mode);
        #pragma omp dispatch device(devnum)
        vmdTandI_64 (length/stride, darg, stride, dmires, stride, mode);

    }
    // #pragma omp taskwait

    return;
} /* own_vm_tand */

/**
 * Sinpi
 */
static void own_vm_sinpi (int acc, VmInputData* in, VmOutputData* out)
{

    float  *sarg = in->sarg1, *sres = out->sres1[kFunc], *sires = out->sres1[kFuncI], *smres = out->sres1[kMFunc], *smires = out->sres1[kMFuncI];
    double *darg = in->darg1, *dres = out->dres1[kFunc], *dires = out->dres1[kFuncI], *dmres = out->dres1[kMFunc], *dmires = out->dres1[kMFuncI];

    #pragma omp target data map(to:sarg[0:length]) map(to:darg[0:length]) \
                            map(tofrom:sres[0:length]) map(tofrom:sires[0:length]) map(tofrom:smres[0:length]) map(tofrom:smires[0:length]) \
                            map(tofrom:dres[0:length]) map(tofrom:dires[0:length]) map(tofrom:dmres[0:length]) map(tofrom:dmires[0:length]) \
                            device(devnum)
    {
        int mode = vm_mode[acc];

        #pragma omp dispatch device(devnum)
        vmlSetMode(mode);

        #pragma omp dispatch device(devnum)
        vsSinpi_64   (length, sarg, sres);
        #pragma omp dispatch device(devnum)
        vsSinpiI_64  (length/stride, sarg, stride, sires, stride);
        #pragma omp dispatch device(devnum)
        vdSinpi_64   (length, darg, dres);
        #pragma omp dispatch device(devnum)
        vdSinpiI_64  (length/stride, darg, stride, dires, stride);

        #pragma omp dispatch device(devnum)
        vmsSinpi_64  (length, sarg, smres, mode);
        #pragma omp dispatch device(devnum)
        vmsSinpiI_64 (length/stride, sarg, stride, smires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmdSinpi_64  (length, darg, dmres, mode);
        #pragma omp dispatch device(devnum)
        vmdSinpiI_64 (length/stride, darg, stride, dmires, stride, mode);

    }
    // #pragma omp taskwait

    return;
} /* own_vm_sinpi */

/**
 * Cospi
 */
static void own_vm_cospi (int acc, VmInputData* in, VmOutputData* out)
{

    float  *sarg = in->sarg1, *sres = out->sres1[kFunc], *sires = out->sres1[kFuncI], *smres = out->sres1[kMFunc], *smires = out->sres1[kMFuncI];
    double *darg = in->darg1, *dres = out->dres1[kFunc], *dires = out->dres1[kFuncI], *dmres = out->dres1[kMFunc], *dmires = out->dres1[kMFuncI];

    #pragma omp target data map(to:sarg[0:length]) map(to:darg[0:length]) \
                            map(tofrom:sres[0:length]) map(tofrom:sires[0:length]) map(tofrom:smres[0:length]) map(tofrom:smires[0:length]) \
                            map(tofrom:dres[0:length]) map(tofrom:dires[0:length]) map(tofrom:dmres[0:length]) map(tofrom:dmires[0:length]) \
                            device(devnum)
    {
        int mode = vm_mode[acc];

        #pragma omp dispatch device(devnum)
        vmlSetMode(mode);

        #pragma omp dispatch device(devnum)
        vsCospi_64   (length, sarg, sres);
        #pragma omp dispatch device(devnum)
        vsCospiI_64  (length/stride, sarg, stride, sires, stride);
        #pragma omp dispatch device(devnum)
        vdCospi_64   (length, darg, dres);
        #pragma omp dispatch device(devnum)
        vdCospiI_64  (length/stride, darg, stride, dires, stride);

        #pragma omp dispatch device(devnum)
        vmsCospi_64  (length, sarg, smres, mode);
        #pragma omp dispatch device(devnum)
        vmsCospiI_64 (length/stride, sarg, stride, smires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmdCospi_64  (length, darg, dmres, mode);
        #pragma omp dispatch device(devnum)
        vmdCospiI_64 (length/stride, darg, stride, dmires, stride, mode);

    }
    // #pragma omp taskwait

    return;
} /* own_vm_cospi */

/**
 * Tanpi
 */
static void own_vm_tanpi (int acc, VmInputData* in, VmOutputData* out)
{

    float  *sarg = in->sarg1, *sres = out->sres1[kFunc], *sires = out->sres1[kFuncI], *smres = out->sres1[kMFunc], *smires = out->sres1[kMFuncI];
    double *darg = in->darg1, *dres = out->dres1[kFunc], *dires = out->dres1[kFuncI], *dmres = out->dres1[kMFunc], *dmires = out->dres1[kMFuncI];

    #pragma omp target data map(to:sarg[0:length]) map(to:darg[0:length]) \
                            map(tofrom:sres[0:length]) map(tofrom:sires[0:length]) map(tofrom:smres[0:length]) map(tofrom:smires[0:length]) \
                            map(tofrom:dres[0:length]) map(tofrom:dires[0:length]) map(tofrom:dmres[0:length]) map(tofrom:dmires[0:length]) \
                            device(devnum)
    {
        int mode = vm_mode[acc];

        #pragma omp dispatch device(devnum)
        vmlSetMode(mode);

        #pragma omp dispatch device(devnum)
        vsTanpi_64   (length, sarg, sres);
        #pragma omp dispatch device(devnum)
        vsTanpiI_64  (length/stride, sarg, stride, sires, stride);
        #pragma omp dispatch device(devnum)
        vdTanpi_64   (length, darg, dres);
        #pragma omp dispatch device(devnum)
        vdTanpiI_64  (length/stride, darg, stride, dires, stride);

        #pragma omp dispatch device(devnum)
        vmsTanpi_64  (length, sarg, smres, mode);
        #pragma omp dispatch device(devnum)
        vmsTanpiI_64 (length/stride, sarg, stride, smires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmdTanpi_64  (length, darg, dmres, mode);
        #pragma omp dispatch device(devnum)
        vmdTanpiI_64 (length/stride, darg, stride, dmires, stride, mode);

    }
    // #pragma omp taskwait

    return;
} /* own_vm_tanpi */

/**
 * SinCos
 */
static void own_vm_sincos (int acc, VmInputData* in, VmOutputData* out)
{

    float  *sarg = in->sarg1, *sres1 = out->sres1[kFunc], *sires1 = out->sres1[kFuncI], *smres1 = out->sres1[kMFunc], *smires1 = out->sres1[kMFuncI];
    double *darg = in->darg1, *dres1 = out->dres1[kFunc], *dires1 = out->dres1[kFuncI], *dmres1 = out->dres1[kMFunc], *dmires1 = out->dres1[kMFuncI];
    float  *sres2 = out->sres2[kFunc], *sires2 = out->sres2[kFuncI], *smres2 = out->sres2[kMFunc], *smires2 = out->sres2[kMFuncI];
    double *dres2 = out->dres2[kFunc], *dires2 = out->dres2[kFuncI], *dmres2 = out->dres2[kMFunc], *dmires2 = out->dres2[kMFuncI];

    #pragma omp target data map(to:sarg[0:length]) map(to:darg[0:length]) \
                            map(tofrom:sres1[0:length]) map(tofrom:sires1[0:length]) map(tofrom:smres1[0:length]) map(tofrom:smires1[0:length]) \
                            map(tofrom:dres1[0:length]) map(tofrom:dires1[0:length]) map(tofrom:dmres1[0:length]) map(tofrom:dmires1[0:length]) \
                            map(tofrom:sres2[0:length]) map(tofrom:sires2[0:length]) map(tofrom:smres2[0:length]) map(tofrom:smires2[0:length]) \
                            map(tofrom:dres2[0:length]) map(tofrom:dires2[0:length]) map(tofrom:dmres2[0:length]) map(tofrom:dmires2[0:length]) \
                            device(devnum)
    {
        int mode = vm_mode[acc];

        #pragma omp dispatch device(devnum)
        vmlSetMode(mode);

        #pragma omp dispatch device(devnum)
        vsSinCos_64   (length, sarg, sres1, sres2);
        #pragma omp dispatch device(devnum)
        vsSinCosI_64  (length/stride, sarg, stride, sires1, stride, sires2, stride);
        #pragma omp dispatch device(devnum)
        vdSinCos_64   (length, darg, dres1, dres2);
        #pragma omp dispatch device(devnum)
        vdSinCosI_64  (length/stride, darg, stride, dires1, stride, dires2, stride);

        #pragma omp dispatch device(devnum)
        vmsSinCos_64  (length, sarg, smres1, smres2, mode);
        #pragma omp dispatch device(devnum)
        vmsSinCosI_64 (length/stride, sarg, stride, smires1, stride, smires2, stride, mode);
        #pragma omp dispatch device(devnum)
        vmdSinCos_64  (length, darg, dmres1, dmres2, mode);
        #pragma omp dispatch device(devnum)
        vmdSinCosI_64 (length/stride, darg, stride, dmires1, stride, dmires2, stride, mode);

    }
    // #pragma omp taskwait

    return;
} /* own_vm_sincos */

/**
 * Modf
 */
static void own_vm_modf (int acc, VmInputData* in, VmOutputData* out)
{

    float  *sarg = in->sarg1, *sres1 = out->sres1[kFunc], *sires1 = out->sres1[kFuncI], *smres1 = out->sres1[kMFunc], *smires1 = out->sres1[kMFuncI];
    double *darg = in->darg1, *dres1 = out->dres1[kFunc], *dires1 = out->dres1[kFuncI], *dmres1 = out->dres1[kMFunc], *dmires1 = out->dres1[kMFuncI];
    float  *sres2 = out->sres2[kFunc], *sires2 = out->sres2[kFuncI], *smres2 = out->sres2[kMFunc], *smires2 = out->sres2[kMFuncI];
    double *dres2 = out->dres2[kFunc], *dires2 = out->dres2[kFuncI], *dmres2 = out->dres2[kMFunc], *dmires2 = out->dres2[kMFuncI];

    #pragma omp target data map(to:sarg[0:length]) map(to:darg[0:length]) \
                            map(tofrom:sres1[0:length]) map(tofrom:sires1[0:length]) map(tofrom:smres1[0:length]) map(tofrom:smires1[0:length]) \
                            map(tofrom:dres1[0:length]) map(tofrom:dires1[0:length]) map(tofrom:dmres1[0:length]) map(tofrom:dmires1[0:length]) \
                            map(tofrom:sres2[0:length]) map(tofrom:sires2[0:length]) map(tofrom:smres2[0:length]) map(tofrom:smires2[0:length]) \
                            map(tofrom:dres2[0:length]) map(tofrom:dires2[0:length]) map(tofrom:dmres2[0:length]) map(tofrom:dmires2[0:length]) \
                            device(devnum)
    {
        int mode = vm_mode[acc];

        #pragma omp dispatch device(devnum)
        vmlSetMode(mode);

        #pragma omp dispatch device(devnum)
        vsModf_64   (length, sarg, sres1, sres2);
        #pragma omp dispatch device(devnum)
        vsModfI_64  (length/stride, sarg, stride, sires1, stride, sires2, stride);
        #pragma omp dispatch device(devnum)
        vdModf_64   (length, darg, dres1, dres2);
        #pragma omp dispatch device(devnum)
        vdModfI_64  (length/stride, darg, stride, dires1, stride, dires2, stride);

        #pragma omp dispatch device(devnum)
        vmsModf_64  (length, sarg, smres1, smres2, mode);
        #pragma omp dispatch device(devnum)
        vmsModfI_64 (length/stride, sarg, stride, smires1, stride, smires2, stride, mode);
        #pragma omp dispatch device(devnum)
        vmdModf_64  (length, darg, dmres1, dmres2, mode);
        #pragma omp dispatch device(devnum)
        vmdModfI_64 (length/stride, darg, stride, dmires1, stride, dmires2, stride, mode);

    }
    // #pragma omp taskwait

    return;
} /* own_vm_modf */

/**
 * Conj
 */
static void own_vm_conj (int acc, VmInputData* in, VmOutputData* out)
{

    MKL_Complex8  *carg  = (MKL_Complex8*)in->carg1,  *cres = (MKL_Complex8*)out->cres1[kFunc],  *cires = (MKL_Complex8*)out->cres1[kFuncI];
    MKL_Complex8  *cmres = (MKL_Complex8*)out->cres1[kMFunc],  *cmires = (MKL_Complex8*)out->cres1[kMFuncI];
    MKL_Complex16 *zarg  = (MKL_Complex16*)in->zarg1, *zres = (MKL_Complex16*)out->zres1[kFunc], *zires = (MKL_Complex16*)out->zres1[kFuncI];
    MKL_Complex16 *zmres = (MKL_Complex16*)out->zres1[kMFunc], *zmires = (MKL_Complex16*)out->zres1[kMFuncI];

    #pragma omp target data map(to:carg[0:length]) map(to:zarg[0:length]) \
                            map(tofrom:cres[0:length]) map(tofrom:cires[0:length]) map(tofrom:cmres[0:length]) map(tofrom:cmires[0:length]) \
                            map(tofrom:zres[0:length]) map(tofrom:zires[0:length]) map(tofrom:zmres[0:length]) map(tofrom:zmires[0:length]) \
                            device(devnum)
    {
        int mode = vm_mode[acc];

        #pragma omp dispatch device(devnum)
        vmlSetMode(mode);

        #pragma omp dispatch device(devnum)
        vcConj_64   (length, carg, cres);
        #pragma omp dispatch device(devnum)
        vcConjI_64  (length/stride, carg, stride, cires, stride);
        #pragma omp dispatch device(devnum)
        vzConj_64   (length, zarg, zres);
        #pragma omp dispatch device(devnum)
        vzConjI_64  (length/stride, zarg, stride, zires, stride);

        #pragma omp dispatch device(devnum)
        vmcConj_64  (length, carg, cmres, mode);
        #pragma omp dispatch device(devnum)
        vmcConjI_64 (length/stride, carg, stride, cmires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmzConj_64  (length, zarg, zmres, mode);
        #pragma omp dispatch device(devnum)
        vmzConjI_64 (length/stride, zarg, stride, zmires, stride, mode);

    }
    // #pragma omp taskwait

    return;
} /* own_vm_conj */

/**
 * MulByConj
 */
static void own_vm_mulbyconj (int acc, VmInputData* in, VmOutputData* out)
{

    MKL_Complex8  *carg1 = (MKL_Complex8*)in->carg1,  *carg2 = (MKL_Complex8*)in->carg2,  *cres = (MKL_Complex8*)out->cres1[kFunc];
    MKL_Complex8  *cires = (MKL_Complex8*)out->cres1[kFuncI],  *cmres = (MKL_Complex8*)out->cres1[kMFunc],  *cmires = (MKL_Complex8*)out->cres1[kMFuncI];
    MKL_Complex16 *zarg1 = (MKL_Complex16*)in->zarg1, *zarg2 = (MKL_Complex16*)in->zarg2, *zres = (MKL_Complex16*)out->zres1[kFunc];
    MKL_Complex16 *zires = (MKL_Complex16*)out->zres1[kFuncI], *zmres = (MKL_Complex16*)out->zres1[kMFunc], *zmires = (MKL_Complex16*)out->zres1[kMFuncI];

    #pragma omp target data map(to:carg1[0:length]) map(to:zarg1[0:length]) map(to:carg2[0:length]) map(to:zarg2[0:length])\
                            map(tofrom:cres[0:length]) map(tofrom:cires[0:length]) map(tofrom:cmres[0:length]) map(tofrom:cmires[0:length]) \
                            map(tofrom:zres[0:length]) map(tofrom:zires[0:length]) map(tofrom:zmres[0:length]) map(tofrom:zmires[0:length]) \
                            device(devnum)
    {
        int mode = vm_mode[acc];

        #pragma omp dispatch device(devnum)
        vmlSetMode(mode);

        #pragma omp dispatch device(devnum)
        vcMulByConj_64   (length, carg1, carg2, cres);
        #pragma omp dispatch device(devnum)
        vcMulByConjI_64  (length/stride, carg1, stride, carg2, stride, cires, stride);
        #pragma omp dispatch device(devnum)
        vzMulByConj_64   (length, zarg1, zarg2, zres);
        #pragma omp dispatch device(devnum)
        vzMulByConjI_64  (length/stride, zarg1, stride, zarg2, stride, zires, stride);

        #pragma omp dispatch device(devnum)
        vmcMulByConj_64  (length, carg1, carg2, cmres, mode);
        #pragma omp dispatch device(devnum)
        vmcMulByConjI_64 (length/stride, carg1, stride, carg2, stride, cmires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmzMulByConj_64  (length, zarg1, zarg2, zmres, mode);
        #pragma omp dispatch device(devnum)
        vmzMulByConjI_64 (length/stride, zarg1, stride, zarg2, stride, zmires, stride, mode);

    }
    // #pragma omp taskwait

    return;
} /* own_vm_mulbyconj */

/**
 * Sin
 */
static void own_vm_sin (int acc, VmInputData* in, VmOutputData* out)
{

    float  *sarg = in->sarg1, *sres = out->sres1[kFunc], *sires = out->sres1[kFuncI], *smres = out->sres1[kMFunc], *smires = out->sres1[kMFuncI];
    double *darg = in->darg1, *dres = out->dres1[kFunc], *dires = out->dres1[kFuncI], *dmres = out->dres1[kMFunc], *dmires = out->dres1[kMFuncI];
    MKL_Complex8  *carg  = (MKL_Complex8*)in->carg1,  *cres = (MKL_Complex8*)out->cres1[kFunc],  *cires = (MKL_Complex8*)out->cres1[kFuncI];
    MKL_Complex8  *cmres = (MKL_Complex8*)out->cres1[kMFunc],  *cmires = (MKL_Complex8*)out->cres1[kMFuncI];
    MKL_Complex16 *zarg  = (MKL_Complex16*)in->zarg1, *zres = (MKL_Complex16*)out->zres1[kFunc], *zires = (MKL_Complex16*)out->zres1[kFuncI];
    MKL_Complex16 *zmres = (MKL_Complex16*)out->zres1[kMFunc], *zmires = (MKL_Complex16*)out->zres1[kMFuncI];

    #pragma omp target data map(to:sarg[0:length]) map(to:darg[0:length]) \
                            map(to:carg[0:length]) map(to:zarg[0:length]) \
                            map(tofrom:sres[0:length]) map(tofrom:sires[0:length]) map(tofrom:smres[0:length]) map(tofrom:smires[0:length]) \
                            map(tofrom:dres[0:length]) map(tofrom:dires[0:length]) map(tofrom:dmres[0:length]) map(tofrom:dmires[0:length]) \
                            map(tofrom:cres[0:length]) map(tofrom:cires[0:length]) map(tofrom:cmres[0:length]) map(tofrom:cmires[0:length]) \
                            map(tofrom:zres[0:length]) map(tofrom:zires[0:length]) map(tofrom:zmres[0:length]) map(tofrom:zmires[0:length]) \
                            device(devnum)
    {
        int mode = vm_mode[acc];

        #pragma omp dispatch device(devnum)
        vmlSetMode(mode);

        #pragma omp dispatch device(devnum)
        vsSin_64   (length, sarg, sres);
        #pragma omp dispatch device(devnum)
        vsSinI_64  (length/stride, sarg, stride, sires, stride);
        #pragma omp dispatch device(devnum)
        vdSin_64   (length, darg, dres);
        #pragma omp dispatch device(devnum)
        vdSinI_64  (length/stride, darg, stride, dires, stride);
        #pragma omp dispatch device(devnum)
        vcSin_64   (length, carg, cres);
        #pragma omp dispatch device(devnum)
        vcSinI_64  (length/stride, carg, stride, cires, stride);
        #pragma omp dispatch device(devnum)
        vzSin_64   (length, zarg, zres);
        #pragma omp dispatch device(devnum)
        vzSinI_64  (length/stride, zarg, stride, zires, stride);

        #pragma omp dispatch device(devnum)
        vmsSin_64  (length, sarg, smres, mode);
        #pragma omp dispatch device(devnum)
        vmsSinI_64 (length/stride, sarg, stride, smires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmdSin_64  (length, darg, dmres, mode);
        #pragma omp dispatch device(devnum)
        vmdSinI_64 (length/stride, darg, stride, dmires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmcSin_64  (length, carg, cmres, mode);
        #pragma omp dispatch device(devnum)
        vmcSinI_64 (length/stride, carg, stride, cmires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmzSin_64  (length, zarg, zmres, mode);
        #pragma omp dispatch device(devnum)
        vmzSinI_64 (length/stride, zarg, stride, zmires, stride, mode);

    }
    // #pragma omp taskwait

    return;
} /* own_vm_sin */

/**
 * Cos
 */
static void own_vm_cos (int acc, VmInputData* in, VmOutputData* out)
{

    float  *sarg = in->sarg1, *sres = out->sres1[kFunc], *sires = out->sres1[kFuncI], *smres = out->sres1[kMFunc], *smires = out->sres1[kMFuncI];
    double *darg = in->darg1, *dres = out->dres1[kFunc], *dires = out->dres1[kFuncI], *dmres = out->dres1[kMFunc], *dmires = out->dres1[kMFuncI];
    MKL_Complex8  *carg  = (MKL_Complex8*)in->carg1,  *cres = (MKL_Complex8*)out->cres1[kFunc],  *cires = (MKL_Complex8*)out->cres1[kFuncI];
    MKL_Complex8  *cmres = (MKL_Complex8*)out->cres1[kMFunc],  *cmires = (MKL_Complex8*)out->cres1[kMFuncI];
    MKL_Complex16 *zarg  = (MKL_Complex16*)in->zarg1, *zres = (MKL_Complex16*)out->zres1[kFunc], *zires = (MKL_Complex16*)out->zres1[kFuncI];
    MKL_Complex16 *zmres = (MKL_Complex16*)out->zres1[kMFunc], *zmires = (MKL_Complex16*)out->zres1[kMFuncI];

    #pragma omp target data map(to:sarg[0:length]) map(to:darg[0:length]) \
                            map(to:carg[0:length]) map(to:zarg[0:length]) \
                            map(tofrom:sres[0:length]) map(tofrom:sires[0:length]) map(tofrom:smres[0:length]) map(tofrom:smires[0:length]) \
                            map(tofrom:dres[0:length]) map(tofrom:dires[0:length]) map(tofrom:dmres[0:length]) map(tofrom:dmires[0:length]) \
                            map(tofrom:cres[0:length]) map(tofrom:cires[0:length]) map(tofrom:cmres[0:length]) map(tofrom:cmires[0:length]) \
                            map(tofrom:zres[0:length]) map(tofrom:zires[0:length]) map(tofrom:zmres[0:length]) map(tofrom:zmires[0:length]) \
                            device(devnum)
    {
        int mode = vm_mode[acc];

        #pragma omp dispatch device(devnum)
        vmlSetMode(mode);

        #pragma omp dispatch device(devnum)
        vsCos_64   (length, sarg, sres);
        #pragma omp dispatch device(devnum)
        vsCosI_64  (length/stride, sarg, stride, sires, stride);
        #pragma omp dispatch device(devnum)
        vdCos_64   (length, darg, dres);
        #pragma omp dispatch device(devnum)
        vdCosI_64  (length/stride, darg, stride, dires, stride);
        #pragma omp dispatch device(devnum)
        vcCos_64   (length, carg, cres);
        #pragma omp dispatch device(devnum)
        vcCosI_64  (length/stride, carg, stride, cires, stride);
        #pragma omp dispatch device(devnum)
        vzCos_64   (length, zarg, zres);
        #pragma omp dispatch device(devnum)
        vzCosI_64  (length/stride, zarg, stride, zires, stride);

        #pragma omp dispatch device(devnum)
        vmsCos_64  (length, sarg, smres, mode);
        #pragma omp dispatch device(devnum)
        vmsCosI_64 (length/stride, sarg, stride, smires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmdCos_64  (length, darg, dmres, mode);
        #pragma omp dispatch device(devnum)
        vmdCosI_64 (length/stride, darg, stride, dmires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmcCos_64  (length, carg, cmres, mode);
        #pragma omp dispatch device(devnum)
        vmcCosI_64 (length/stride, carg, stride, cmires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmzCos_64  (length, zarg, zmres, mode);
        #pragma omp dispatch device(devnum)
        vmzCosI_64 (length/stride, zarg, stride, zmires, stride, mode);

    }
    // #pragma omp taskwait

    return;
} /* own_vm_cos */

/**
 * Tan
 */
static void own_vm_tan (int acc, VmInputData* in, VmOutputData* out)
{

    float  *sarg = in->sarg1, *sres = out->sres1[kFunc], *sires = out->sres1[kFuncI], *smres = out->sres1[kMFunc], *smires = out->sres1[kMFuncI];
    double *darg = in->darg1, *dres = out->dres1[kFunc], *dires = out->dres1[kFuncI], *dmres = out->dres1[kMFunc], *dmires = out->dres1[kMFuncI];
    MKL_Complex8  *carg  = (MKL_Complex8*)in->carg1,  *cres = (MKL_Complex8*)out->cres1[kFunc],  *cires = (MKL_Complex8*)out->cres1[kFuncI];
    MKL_Complex8  *cmres = (MKL_Complex8*)out->cres1[kMFunc],  *cmires = (MKL_Complex8*)out->cres1[kMFuncI];
    MKL_Complex16 *zarg  = (MKL_Complex16*)in->zarg1, *zres = (MKL_Complex16*)out->zres1[kFunc], *zires = (MKL_Complex16*)out->zres1[kFuncI];
    MKL_Complex16 *zmres = (MKL_Complex16*)out->zres1[kMFunc], *zmires = (MKL_Complex16*)out->zres1[kMFuncI];

    #pragma omp target data map(to:sarg[0:length]) map(to:darg[0:length]) \
                            map(to:carg[0:length]) map(to:zarg[0:length]) \
                            map(tofrom:sres[0:length]) map(tofrom:sires[0:length]) map(tofrom:smres[0:length]) map(tofrom:smires[0:length]) \
                            map(tofrom:dres[0:length]) map(tofrom:dires[0:length]) map(tofrom:dmres[0:length]) map(tofrom:dmires[0:length]) \
                            map(tofrom:cres[0:length]) map(tofrom:cires[0:length]) map(tofrom:cmres[0:length]) map(tofrom:cmires[0:length]) \
                            map(tofrom:zres[0:length]) map(tofrom:zires[0:length]) map(tofrom:zmres[0:length]) map(tofrom:zmires[0:length]) \
                            device(devnum)
    {
        int mode = vm_mode[acc];

        #pragma omp dispatch device(devnum)
        vmlSetMode(mode);

        #pragma omp dispatch device(devnum)
        vsTan_64   (length, sarg, sres);
        #pragma omp dispatch device(devnum)
        vsTanI_64  (length/stride, sarg, stride, sires, stride);
        #pragma omp dispatch device(devnum)
        vdTan_64   (length, darg, dres);
        #pragma omp dispatch device(devnum)
        vdTanI_64  (length/stride, darg, stride, dires, stride);
        #pragma omp dispatch device(devnum)
        vcTan_64   (length, carg, cres);
        #pragma omp dispatch device(devnum)
        vcTanI_64  (length/stride, carg, stride, cires, stride);
        #pragma omp dispatch device(devnum)
        vzTan_64   (length, zarg, zres);
        #pragma omp dispatch device(devnum)
        vzTanI_64  (length/stride, zarg, stride, zires, stride);

        #pragma omp dispatch device(devnum)
        vmsTan_64  (length, sarg, smres, mode);
        #pragma omp dispatch device(devnum)
        vmsTanI_64 (length/stride, sarg, stride, smires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmdTan_64  (length, darg, dmres, mode);
        #pragma omp dispatch device(devnum)
        vmdTanI_64 (length/stride, darg, stride, dmires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmcTan_64  (length, carg, cmres, mode);
        #pragma omp dispatch device(devnum)
        vmcTanI_64 (length/stride, carg, stride, cmires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmzTan_64  (length, zarg, zmres, mode);
        #pragma omp dispatch device(devnum)
        vmzTanI_64 (length/stride, zarg, stride, zmires, stride, mode);

    }
    // #pragma omp taskwait

    return;
} /* own_vm_tan */

/**
 * Inv
 */
static void own_vm_inv (int acc, VmInputData* in, VmOutputData* out)
{

    float  *sarg = in->sarg1, *sres = out->sres1[kFunc], *sires = out->sres1[kFuncI], *smres = out->sres1[kMFunc], *smires = out->sres1[kMFuncI];
    double *darg = in->darg1, *dres = out->dres1[kFunc], *dires = out->dres1[kFuncI], *dmres = out->dres1[kMFunc], *dmires = out->dres1[kMFuncI];

    #pragma omp target data map(to:sarg[0:length]) map(to:darg[0:length]) \
                            map(tofrom:sres[0:length]) map(tofrom:sires[0:length]) map(tofrom:smres[0:length]) map(tofrom:smires[0:length]) \
                            map(tofrom:dres[0:length]) map(tofrom:dires[0:length]) map(tofrom:dmres[0:length]) map(tofrom:dmires[0:length]) \
                            device(devnum)
    {
        int mode = vm_mode[acc];

        #pragma omp dispatch device(devnum)
        vmlSetMode(mode);

        #pragma omp dispatch device(devnum)
        vsInv_64   (length, sarg, sres);
        #pragma omp dispatch device(devnum)
        vsInvI_64  (length/stride, sarg, stride, sires, stride);
        #pragma omp dispatch device(devnum)
        vdInv_64   (length, darg, dres);
        #pragma omp dispatch device(devnum)
        vdInvI_64  (length/stride, darg, stride, dires, stride);

        #pragma omp dispatch device(devnum)
        vmsInv_64  (length, sarg, smres, mode);
        #pragma omp dispatch device(devnum)
        vmsInvI_64 (length/stride, sarg, stride, smires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmdInv_64  (length, darg, dmres, mode);
        #pragma omp dispatch device(devnum)
        vmdInvI_64 (length/stride, darg, stride, dmires, stride, mode);

    }
    // #pragma omp taskwait

    return;
} /* own_vm_inv */

/**
 * Sqr
 */
static void own_vm_sqr (int acc, VmInputData* in, VmOutputData* out)
{

    float  *sarg = in->sarg1, *sres = out->sres1[kFunc], *sires = out->sres1[kFuncI], *smres = out->sres1[kMFunc], *smires = out->sres1[kMFuncI];
    double *darg = in->darg1, *dres = out->dres1[kFunc], *dires = out->dres1[kFuncI], *dmres = out->dres1[kMFunc], *dmires = out->dres1[kMFuncI];

    #pragma omp target data map(to:sarg[0:length]) map(to:darg[0:length]) \
                            map(tofrom:sres[0:length]) map(tofrom:sires[0:length]) map(tofrom:smres[0:length]) map(tofrom:smires[0:length]) \
                            map(tofrom:dres[0:length]) map(tofrom:dires[0:length]) map(tofrom:dmres[0:length]) map(tofrom:dmires[0:length]) \
                            device(devnum)
    {
        int mode = vm_mode[acc];

        #pragma omp dispatch device(devnum)
        vmlSetMode(mode);

        #pragma omp dispatch device(devnum)
        vsSqr_64   (length, sarg, sres);
        #pragma omp dispatch device(devnum)
        vsSqrI_64  (length/stride, sarg, stride, sires, stride);
        #pragma omp dispatch device(devnum)
        vdSqr_64   (length, darg, dres);
        #pragma omp dispatch device(devnum)
        vdSqrI_64  (length/stride, darg, stride, dires, stride);

        #pragma omp dispatch device(devnum)
        vmsSqr_64  (length, sarg, smres, mode);
        #pragma omp dispatch device(devnum)
        vmsSqrI_64 (length/stride, sarg, stride, smires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmdSqr_64  (length, darg, dmres, mode);
        #pragma omp dispatch device(devnum)
        vmdSqrI_64 (length/stride, darg, stride, dmires, stride, mode);

    }
    // #pragma omp taskwait

    return;
} /* own_vm_sqr */

/**
 * Frac
 */
static void own_vm_frac (int acc, VmInputData* in, VmOutputData* out)
{

    float  *sarg = in->sarg1, *sres = out->sres1[kFunc], *sires = out->sres1[kFuncI], *smres = out->sres1[kMFunc], *smires = out->sres1[kMFuncI];
    double *darg = in->darg1, *dres = out->dres1[kFunc], *dires = out->dres1[kFuncI], *dmres = out->dres1[kMFunc], *dmires = out->dres1[kMFuncI];

    #pragma omp target data map(to:sarg[0:length]) map(to:darg[0:length]) \
                            map(tofrom:sres[0:length]) map(tofrom:sires[0:length]) map(tofrom:smres[0:length]) map(tofrom:smires[0:length]) \
                            map(tofrom:dres[0:length]) map(tofrom:dires[0:length]) map(tofrom:dmres[0:length]) map(tofrom:dmires[0:length]) \
                            device(devnum)
    {
        int mode = vm_mode[acc];

        #pragma omp dispatch device(devnum)
        vmlSetMode(mode);

        #pragma omp dispatch device(devnum)
        vsFrac_64   (length, sarg, sres);
        #pragma omp dispatch device(devnum)
        vsFracI_64  (length/stride, sarg, stride, sires, stride);
        #pragma omp dispatch device(devnum)
        vdFrac_64   (length, darg, dres);
        #pragma omp dispatch device(devnum)
        vdFracI_64  (length/stride, darg, stride, dires, stride);

        #pragma omp dispatch device(devnum)
        vmsFrac_64  (length, sarg, smres, mode);
        #pragma omp dispatch device(devnum)
        vmsFracI_64 (length/stride, sarg, stride, smires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmdFrac_64  (length, darg, dmres, mode);
        #pragma omp dispatch device(devnum)
        vmdFracI_64 (length/stride, darg, stride, dmires, stride, mode);

    }
    // #pragma omp taskwait

    return;
} /* own_vm_frac */

/**
 * Ceil
 */
static void own_vm_ceil (int acc, VmInputData* in, VmOutputData* out)
{

    float  *sarg = in->sarg1, *sres = out->sres1[kFunc], *sires = out->sres1[kFuncI], *smres = out->sres1[kMFunc], *smires = out->sres1[kMFuncI];
    double *darg = in->darg1, *dres = out->dres1[kFunc], *dires = out->dres1[kFuncI], *dmres = out->dres1[kMFunc], *dmires = out->dres1[kMFuncI];

    #pragma omp target data map(to:sarg[0:length]) map(to:darg[0:length]) \
                            map(tofrom:sres[0:length]) map(tofrom:sires[0:length]) map(tofrom:smres[0:length]) map(tofrom:smires[0:length]) \
                            map(tofrom:dres[0:length]) map(tofrom:dires[0:length]) map(tofrom:dmres[0:length]) map(tofrom:dmires[0:length]) \
                            device(devnum)
    {
        int mode = vm_mode[acc];

        #pragma omp dispatch device(devnum)
        vmlSetMode(mode);

        #pragma omp dispatch device(devnum)
        vsCeil_64   (length, sarg, sres);
        #pragma omp dispatch device(devnum)
        vsCeilI_64  (length/stride, sarg, stride, sires, stride);
        #pragma omp dispatch device(devnum)
        vdCeil_64   (length, darg, dres);
        #pragma omp dispatch device(devnum)
        vdCeilI_64  (length/stride, darg, stride, dires, stride);

        #pragma omp dispatch device(devnum)
        vmsCeil_64  (length, sarg, smres, mode);
        #pragma omp dispatch device(devnum)
        vmsCeilI_64 (length/stride, sarg, stride, smires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmdCeil_64  (length, darg, dmres, mode);
        #pragma omp dispatch device(devnum)
        vmdCeilI_64 (length/stride, darg, stride, dmires, stride, mode);

    }
    // #pragma omp taskwait

    return;
} /* own_vm_ceil */

/**
 * Floor
 */
static void own_vm_floor (int acc, VmInputData* in, VmOutputData* out)
{

    float  *sarg = in->sarg1, *sres = out->sres1[kFunc], *sires = out->sres1[kFuncI], *smres = out->sres1[kMFunc], *smires = out->sres1[kMFuncI];
    double *darg = in->darg1, *dres = out->dres1[kFunc], *dires = out->dres1[kFuncI], *dmres = out->dres1[kMFunc], *dmires = out->dres1[kMFuncI];

    #pragma omp target data map(to:sarg[0:length]) map(to:darg[0:length]) \
                            map(tofrom:sres[0:length]) map(tofrom:sires[0:length]) map(tofrom:smres[0:length]) map(tofrom:smires[0:length]) \
                            map(tofrom:dres[0:length]) map(tofrom:dires[0:length]) map(tofrom:dmres[0:length]) map(tofrom:dmires[0:length]) \
                            device(devnum)
    {
        int mode = vm_mode[acc];

        #pragma omp dispatch device(devnum)
        vmlSetMode(mode);

        #pragma omp dispatch device(devnum)
        vsFloor_64   (length, sarg, sres);
        #pragma omp dispatch device(devnum)
        vsFloorI_64  (length/stride, sarg, stride, sires, stride);
        #pragma omp dispatch device(devnum)
        vdFloor_64   (length, darg, dres);
        #pragma omp dispatch device(devnum)
        vdFloorI_64  (length/stride, darg, stride, dires, stride);

        #pragma omp dispatch device(devnum)
        vmsFloor_64  (length, sarg, smres, mode);
        #pragma omp dispatch device(devnum)
        vmsFloorI_64 (length/stride, sarg, stride, smires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmdFloor_64  (length, darg, dmres, mode);
        #pragma omp dispatch device(devnum)
        vmdFloorI_64 (length/stride, darg, stride, dmires, stride, mode);

    }
    // #pragma omp taskwait

    return;
} /* own_vm_floor */

/**
 * Round
 */
static void own_vm_round (int acc, VmInputData* in, VmOutputData* out)
{

    float  *sarg = in->sarg1, *sres = out->sres1[kFunc], *sires = out->sres1[kFuncI], *smres = out->sres1[kMFunc], *smires = out->sres1[kMFuncI];
    double *darg = in->darg1, *dres = out->dres1[kFunc], *dires = out->dres1[kFuncI], *dmres = out->dres1[kMFunc], *dmires = out->dres1[kMFuncI];

    #pragma omp target data map(to:sarg[0:length]) map(to:darg[0:length]) \
                            map(tofrom:sres[0:length]) map(tofrom:sires[0:length]) map(tofrom:smres[0:length]) map(tofrom:smires[0:length]) \
                            map(tofrom:dres[0:length]) map(tofrom:dires[0:length]) map(tofrom:dmres[0:length]) map(tofrom:dmires[0:length]) \
                            device(devnum)
    {
        int mode = vm_mode[acc];

        #pragma omp dispatch device(devnum)
        vmlSetMode(mode);

        #pragma omp dispatch device(devnum)
        vsRound_64   (length, sarg, sres);
        #pragma omp dispatch device(devnum)
        vsRoundI_64  (length/stride, sarg, stride, sires, stride);
        #pragma omp dispatch device(devnum)
        vdRound_64   (length, darg, dres);
        #pragma omp dispatch device(devnum)
        vdRoundI_64  (length/stride, darg, stride, dires, stride);

        #pragma omp dispatch device(devnum)
        vmsRound_64  (length, sarg, smres, mode);
        #pragma omp dispatch device(devnum)
        vmsRoundI_64 (length/stride, sarg, stride, smires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmdRound_64  (length, darg, dmres, mode);
        #pragma omp dispatch device(devnum)
        vmdRoundI_64 (length/stride, darg, stride, dmires, stride, mode);

    }
    // #pragma omp taskwait

    return;
} /* own_vm_round */

/**
 * Trunc
 */
static void own_vm_trunc (int acc, VmInputData* in, VmOutputData* out)
{

    float  *sarg = in->sarg1, *sres = out->sres1[kFunc], *sires = out->sres1[kFuncI], *smres = out->sres1[kMFunc], *smires = out->sres1[kMFuncI];
    double *darg = in->darg1, *dres = out->dres1[kFunc], *dires = out->dres1[kFuncI], *dmres = out->dres1[kMFunc], *dmires = out->dres1[kMFuncI];

    #pragma omp target data map(to:sarg[0:length]) map(to:darg[0:length]) \
                            map(tofrom:sres[0:length]) map(tofrom:sires[0:length]) map(tofrom:smres[0:length]) map(tofrom:smires[0:length]) \
                            map(tofrom:dres[0:length]) map(tofrom:dires[0:length]) map(tofrom:dmres[0:length]) map(tofrom:dmires[0:length]) \
                            device(devnum)
    {
        int mode = vm_mode[acc];

        #pragma omp dispatch device(devnum)
        vmlSetMode(mode);

        #pragma omp dispatch device(devnum)
        vsTrunc_64   (length, sarg, sres);
        #pragma omp dispatch device(devnum)
        vsTruncI_64  (length/stride, sarg, stride, sires, stride);
        #pragma omp dispatch device(devnum)
        vdTrunc_64   (length, darg, dres);
        #pragma omp dispatch device(devnum)
        vdTruncI_64  (length/stride, darg, stride, dires, stride);

        #pragma omp dispatch device(devnum)
        vmsTrunc_64  (length, sarg, smres, mode);
        #pragma omp dispatch device(devnum)
        vmsTruncI_64 (length/stride, sarg, stride, smires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmdTrunc_64  (length, darg, dmres, mode);
        #pragma omp dispatch device(devnum)
        vmdTruncI_64 (length/stride, darg, stride, dmires, stride, mode);

    }
    // #pragma omp taskwait

    return;
} /* own_vm_trunc */

/**
 * Rint
 */
static void own_vm_rint (int acc, VmInputData* in, VmOutputData* out)
{

    float  *sarg = in->sarg1, *sres = out->sres1[kFunc], *sires = out->sres1[kFuncI], *smres = out->sres1[kMFunc], *smires = out->sres1[kMFuncI];
    double *darg = in->darg1, *dres = out->dres1[kFunc], *dires = out->dres1[kFuncI], *dmres = out->dres1[kMFunc], *dmires = out->dres1[kMFuncI];

    #pragma omp target data map(to:sarg[0:length]) map(to:darg[0:length]) \
                            map(tofrom:sres[0:length]) map(tofrom:sires[0:length]) map(tofrom:smres[0:length]) map(tofrom:smires[0:length]) \
                            map(tofrom:dres[0:length]) map(tofrom:dires[0:length]) map(tofrom:dmres[0:length]) map(tofrom:dmires[0:length]) \
                            device(devnum)
    {
        int mode = vm_mode[acc];

        #pragma omp dispatch device(devnum)
        vmlSetMode(mode);

        #pragma omp dispatch device(devnum)
        vsRint_64   (length, sarg, sres);
        #pragma omp dispatch device(devnum)
        vsRintI_64  (length/stride, sarg, stride, sires, stride);
        #pragma omp dispatch device(devnum)
        vdRint_64   (length, darg, dres);
        #pragma omp dispatch device(devnum)
        vdRintI_64  (length/stride, darg, stride, dires, stride);

        #pragma omp dispatch device(devnum)
        vmsRint_64  (length, sarg, smres, mode);
        #pragma omp dispatch device(devnum)
        vmsRintI_64 (length/stride, sarg, stride, smires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmdRint_64  (length, darg, dmres, mode);
        #pragma omp dispatch device(devnum)
        vmdRintI_64 (length/stride, darg, stride, dmires, stride, mode);

    }
    // #pragma omp taskwait

    return;
} /* own_vm_rint */

/**
 * NearbyInt
 */
static void own_vm_nearbyint (int acc, VmInputData* in, VmOutputData* out)
{

    float  *sarg = in->sarg1, *sres = out->sres1[kFunc], *sires = out->sres1[kFuncI], *smres = out->sres1[kMFunc], *smires = out->sres1[kMFuncI];
    double *darg = in->darg1, *dres = out->dres1[kFunc], *dires = out->dres1[kFuncI], *dmres = out->dres1[kMFunc], *dmires = out->dres1[kMFuncI];

    #pragma omp target data map(to:sarg[0:length]) map(to:darg[0:length]) \
                            map(tofrom:sres[0:length]) map(tofrom:sires[0:length]) map(tofrom:smres[0:length]) map(tofrom:smires[0:length]) \
                            map(tofrom:dres[0:length]) map(tofrom:dires[0:length]) map(tofrom:dmres[0:length]) map(tofrom:dmires[0:length]) \
                            device(devnum)
    {
        int mode = vm_mode[acc];

        #pragma omp dispatch device(devnum)
        vmlSetMode(mode);

        #pragma omp dispatch device(devnum)
        vsNearbyInt_64   (length, sarg, sres);
        #pragma omp dispatch device(devnum)
        vsNearbyIntI_64  (length/stride, sarg, stride, sires, stride);
        #pragma omp dispatch device(devnum)
        vdNearbyInt_64   (length, darg, dres);
        #pragma omp dispatch device(devnum)
        vdNearbyIntI_64  (length/stride, darg, stride, dires, stride);

        #pragma omp dispatch device(devnum)
        vmsNearbyInt_64  (length, sarg, smres, mode);
        #pragma omp dispatch device(devnum)
        vmsNearbyIntI_64 (length/stride, sarg, stride, smires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmdNearbyInt_64  (length, darg, dmres, mode);
        #pragma omp dispatch device(devnum)
        vmdNearbyIntI_64 (length/stride, darg, stride, dmires, stride, mode);

    }
    // #pragma omp taskwait

    return;
} /* own_vm_nearbyint */

/**
 * Acospi
 */
static void own_vm_acospi (int acc, VmInputData* in, VmOutputData* out)
{

    float  *sarg = in->sarg1, *sres = out->sres1[kFunc], *sires = out->sres1[kFuncI], *smres = out->sres1[kMFunc], *smires = out->sres1[kMFuncI];
    double *darg = in->darg1, *dres = out->dres1[kFunc], *dires = out->dres1[kFuncI], *dmres = out->dres1[kMFunc], *dmires = out->dres1[kMFuncI];

    #pragma omp target data map(to:sarg[0:length]) map(to:darg[0:length]) \
                            map(tofrom:sres[0:length]) map(tofrom:sires[0:length]) map(tofrom:smres[0:length]) map(tofrom:smires[0:length]) \
                            map(tofrom:dres[0:length]) map(tofrom:dires[0:length]) map(tofrom:dmres[0:length]) map(tofrom:dmires[0:length]) \
                            device(devnum)
    {
        int mode = vm_mode[acc];

        #pragma omp dispatch device(devnum)
        vmlSetMode(mode);

        #pragma omp dispatch device(devnum)
        vsAcospi_64   (length, sarg, sres);
        #pragma omp dispatch device(devnum)
        vsAcospiI_64  (length/stride, sarg, stride, sires, stride);
        #pragma omp dispatch device(devnum)
        vdAcospi_64   (length, darg, dres);
        #pragma omp dispatch device(devnum)
        vdAcospiI_64  (length/stride, darg, stride, dires, stride);

        #pragma omp dispatch device(devnum)
        vmsAcospi_64  (length, sarg, smres, mode);
        #pragma omp dispatch device(devnum)
        vmsAcospiI_64 (length/stride, sarg, stride, smires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmdAcospi_64  (length, darg, dmres, mode);
        #pragma omp dispatch device(devnum)
        vmdAcospiI_64 (length/stride, darg, stride, dmires, stride, mode);

    }
    // #pragma omp taskwait

    return;
} /* own_vm_acospi */

/**
 * Asinpi
 */
static void own_vm_asinpi (int acc, VmInputData* in, VmOutputData* out)
{

    float  *sarg = in->sarg1, *sres = out->sres1[kFunc], *sires = out->sres1[kFuncI], *smres = out->sres1[kMFunc], *smires = out->sres1[kMFuncI];
    double *darg = in->darg1, *dres = out->dres1[kFunc], *dires = out->dres1[kFuncI], *dmres = out->dres1[kMFunc], *dmires = out->dres1[kMFuncI];

    #pragma omp target data map(to:sarg[0:length]) map(to:darg[0:length]) \
                            map(tofrom:sres[0:length]) map(tofrom:sires[0:length]) map(tofrom:smres[0:length]) map(tofrom:smires[0:length]) \
                            map(tofrom:dres[0:length]) map(tofrom:dires[0:length]) map(tofrom:dmres[0:length]) map(tofrom:dmires[0:length]) \
                            device(devnum)
    {
        int mode = vm_mode[acc];

        #pragma omp dispatch device(devnum)
        vmlSetMode(mode);

        #pragma omp dispatch device(devnum)
        vsAsinpi_64   (length, sarg, sres);
        #pragma omp dispatch device(devnum)
        vsAsinpiI_64  (length/stride, sarg, stride, sires, stride);
        #pragma omp dispatch device(devnum)
        vdAsinpi_64   (length, darg, dres);
        #pragma omp dispatch device(devnum)
        vdAsinpiI_64  (length/stride, darg, stride, dires, stride);

        #pragma omp dispatch device(devnum)
        vmsAsinpi_64  (length, sarg, smres, mode);
        #pragma omp dispatch device(devnum)
        vmsAsinpiI_64 (length/stride, sarg, stride, smires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmdAsinpi_64  (length, darg, dmres, mode);
        #pragma omp dispatch device(devnum)
        vmdAsinpiI_64 (length/stride, darg, stride, dmires, stride, mode);

    }
    // #pragma omp taskwait

    return;
} /* own_vm_asinpi */

/**
 * Atanpi
 */
static void own_vm_atanpi (int acc, VmInputData* in, VmOutputData* out)
{

    float  *sarg = in->sarg1, *sres = out->sres1[kFunc], *sires = out->sres1[kFuncI], *smres = out->sres1[kMFunc], *smires = out->sres1[kMFuncI];
    double *darg = in->darg1, *dres = out->dres1[kFunc], *dires = out->dres1[kFuncI], *dmres = out->dres1[kMFunc], *dmires = out->dres1[kMFuncI];

    #pragma omp target data map(to:sarg[0:length]) map(to:darg[0:length]) \
                            map(tofrom:sres[0:length]) map(tofrom:sires[0:length]) map(tofrom:smres[0:length]) map(tofrom:smires[0:length]) \
                            map(tofrom:dres[0:length]) map(tofrom:dires[0:length]) map(tofrom:dmres[0:length]) map(tofrom:dmires[0:length]) \
                            device(devnum)
    {
        int mode = vm_mode[acc];

        #pragma omp dispatch device(devnum)
        vmlSetMode(mode);

        #pragma omp dispatch device(devnum)
        vsAtanpi_64   (length, sarg, sres);
        #pragma omp dispatch device(devnum)
        vsAtanpiI_64  (length/stride, sarg, stride, sires, stride);
        #pragma omp dispatch device(devnum)
        vdAtanpi_64   (length, darg, dres);
        #pragma omp dispatch device(devnum)
        vdAtanpiI_64  (length/stride, darg, stride, dires, stride);

        #pragma omp dispatch device(devnum)
        vmsAtanpi_64  (length, sarg, smres, mode);
        #pragma omp dispatch device(devnum)
        vmsAtanpiI_64 (length/stride, sarg, stride, smires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmdAtanpi_64  (length, darg, dmres, mode);
        #pragma omp dispatch device(devnum)
        vmdAtanpiI_64 (length/stride, darg, stride, dmires, stride, mode);

    }
    // #pragma omp taskwait

    return;
} /* own_vm_atanpi */

/**
 * Sinh
 */
static void own_vm_sinh (int acc, VmInputData* in, VmOutputData* out)
{

    float  *sarg = in->sarg1, *sres = out->sres1[kFunc], *sires = out->sres1[kFuncI], *smres = out->sres1[kMFunc], *smires = out->sres1[kMFuncI];
    double *darg = in->darg1, *dres = out->dres1[kFunc], *dires = out->dres1[kFuncI], *dmres = out->dres1[kMFunc], *dmires = out->dres1[kMFuncI];
    MKL_Complex8  *carg  = (MKL_Complex8*)in->carg1,  *cres = (MKL_Complex8*)out->cres1[kFunc],  *cires = (MKL_Complex8*)out->cres1[kFuncI];
    MKL_Complex8  *cmres = (MKL_Complex8*)out->cres1[kMFunc],  *cmires = (MKL_Complex8*)out->cres1[kMFuncI];
    MKL_Complex16 *zarg  = (MKL_Complex16*)in->zarg1, *zres = (MKL_Complex16*)out->zres1[kFunc], *zires = (MKL_Complex16*)out->zres1[kFuncI];
    MKL_Complex16 *zmres = (MKL_Complex16*)out->zres1[kMFunc], *zmires = (MKL_Complex16*)out->zres1[kMFuncI];

    #pragma omp target data map(to:sarg[0:length]) map(to:darg[0:length]) \
                            map(to:carg[0:length]) map(to:zarg[0:length]) \
                            map(tofrom:sres[0:length]) map(tofrom:sires[0:length]) map(tofrom:smres[0:length]) map(tofrom:smires[0:length]) \
                            map(tofrom:dres[0:length]) map(tofrom:dires[0:length]) map(tofrom:dmres[0:length]) map(tofrom:dmires[0:length]) \
                            map(tofrom:cres[0:length]) map(tofrom:cires[0:length]) map(tofrom:cmres[0:length]) map(tofrom:cmires[0:length]) \
                            map(tofrom:zres[0:length]) map(tofrom:zires[0:length]) map(tofrom:zmres[0:length]) map(tofrom:zmires[0:length]) \
                            device(devnum)
    {
        int mode = vm_mode[acc];

        #pragma omp dispatch device(devnum)
        vmlSetMode(mode);

        #pragma omp dispatch device(devnum)
        vsSinh_64   (length, sarg, sres);
        #pragma omp dispatch device(devnum)
        vsSinhI_64  (length/stride, sarg, stride, sires, stride);
        #pragma omp dispatch device(devnum)
        vdSinh_64   (length, darg, dres);
        #pragma omp dispatch device(devnum)
        vdSinhI_64  (length/stride, darg, stride, dires, stride);
        #pragma omp dispatch device(devnum)
        vcSinh_64   (length, carg, cres);
        #pragma omp dispatch device(devnum)
        vcSinhI_64  (length/stride, carg, stride, cires, stride);
        #pragma omp dispatch device(devnum)
        vzSinh_64   (length, zarg, zres);
        #pragma omp dispatch device(devnum)
        vzSinhI_64  (length/stride, zarg, stride, zires, stride);

        #pragma omp dispatch device(devnum)
        vmsSinh_64  (length, sarg, smres, mode);
        #pragma omp dispatch device(devnum)
        vmsSinhI_64 (length/stride, sarg, stride, smires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmdSinh_64  (length, darg, dmres, mode);
        #pragma omp dispatch device(devnum)
        vmdSinhI_64 (length/stride, darg, stride, dmires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmcSinh_64  (length, carg, cmres, mode);
        #pragma omp dispatch device(devnum)
        vmcSinhI_64 (length/stride, carg, stride, cmires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmzSinh_64  (length, zarg, zmres, mode);
        #pragma omp dispatch device(devnum)
        vmzSinhI_64 (length/stride, zarg, stride, zmires, stride, mode);

    }
    // #pragma omp taskwait

    return;
} /* own_vm_sinh */

/**
 * Cosh
 */
static void own_vm_cosh (int acc, VmInputData* in, VmOutputData* out)
{

    float  *sarg = in->sarg1, *sres = out->sres1[kFunc], *sires = out->sres1[kFuncI], *smres = out->sres1[kMFunc], *smires = out->sres1[kMFuncI];
    double *darg = in->darg1, *dres = out->dres1[kFunc], *dires = out->dres1[kFuncI], *dmres = out->dres1[kMFunc], *dmires = out->dres1[kMFuncI];
    MKL_Complex8  *carg  = (MKL_Complex8*)in->carg1,  *cres = (MKL_Complex8*)out->cres1[kFunc],  *cires = (MKL_Complex8*)out->cres1[kFuncI];
    MKL_Complex8  *cmres = (MKL_Complex8*)out->cres1[kMFunc],  *cmires = (MKL_Complex8*)out->cres1[kMFuncI];
    MKL_Complex16 *zarg  = (MKL_Complex16*)in->zarg1, *zres = (MKL_Complex16*)out->zres1[kFunc], *zires = (MKL_Complex16*)out->zres1[kFuncI];
    MKL_Complex16 *zmres = (MKL_Complex16*)out->zres1[kMFunc], *zmires = (MKL_Complex16*)out->zres1[kMFuncI];

    #pragma omp target data map(to:sarg[0:length]) map(to:darg[0:length]) \
                            map(to:carg[0:length]) map(to:zarg[0:length]) \
                            map(tofrom:sres[0:length]) map(tofrom:sires[0:length]) map(tofrom:smres[0:length]) map(tofrom:smires[0:length]) \
                            map(tofrom:dres[0:length]) map(tofrom:dires[0:length]) map(tofrom:dmres[0:length]) map(tofrom:dmires[0:length]) \
                            map(tofrom:cres[0:length]) map(tofrom:cires[0:length]) map(tofrom:cmres[0:length]) map(tofrom:cmires[0:length]) \
                            map(tofrom:zres[0:length]) map(tofrom:zires[0:length]) map(tofrom:zmres[0:length]) map(tofrom:zmires[0:length]) \
                            device(devnum)
    {
        int mode = vm_mode[acc];

        #pragma omp dispatch device(devnum)
        vmlSetMode(mode);

        #pragma omp dispatch device(devnum)
        vsCosh_64   (length, sarg, sres);
        #pragma omp dispatch device(devnum)
        vsCoshI_64  (length/stride, sarg, stride, sires, stride);
        #pragma omp dispatch device(devnum)
        vdCosh_64   (length, darg, dres);
        #pragma omp dispatch device(devnum)
        vdCoshI_64  (length/stride, darg, stride, dires, stride);
        #pragma omp dispatch device(devnum)
        vcCosh_64   (length, carg, cres);
        #pragma omp dispatch device(devnum)
        vcCoshI_64  (length/stride, carg, stride, cires, stride);
        #pragma omp dispatch device(devnum)
        vzCosh_64   (length, zarg, zres);
        #pragma omp dispatch device(devnum)
        vzCoshI_64  (length/stride, zarg, stride, zires, stride);

        #pragma omp dispatch device(devnum)
        vmsCosh_64  (length, sarg, smres, mode);
        #pragma omp dispatch device(devnum)
        vmsCoshI_64 (length/stride, sarg, stride, smires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmdCosh_64  (length, darg, dmres, mode);
        #pragma omp dispatch device(devnum)
        vmdCoshI_64 (length/stride, darg, stride, dmires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmcCosh_64  (length, carg, cmres, mode);
        #pragma omp dispatch device(devnum)
        vmcCoshI_64 (length/stride, carg, stride, cmires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmzCosh_64  (length, zarg, zmres, mode);
        #pragma omp dispatch device(devnum)
        vmzCoshI_64 (length/stride, zarg, stride, zmires, stride, mode);

    }
    // #pragma omp taskwait

    return;
} /* own_vm_cosh */

/**
 * Tanh
 */
static void own_vm_tanh (int acc, VmInputData* in, VmOutputData* out)
{

    float  *sarg = in->sarg1, *sres = out->sres1[kFunc], *sires = out->sres1[kFuncI], *smres = out->sres1[kMFunc], *smires = out->sres1[kMFuncI];
    double *darg = in->darg1, *dres = out->dres1[kFunc], *dires = out->dres1[kFuncI], *dmres = out->dres1[kMFunc], *dmires = out->dres1[kMFuncI];
    MKL_Complex8  *carg  = (MKL_Complex8*)in->carg1,  *cres = (MKL_Complex8*)out->cres1[kFunc],  *cires = (MKL_Complex8*)out->cres1[kFuncI];
    MKL_Complex8  *cmres = (MKL_Complex8*)out->cres1[kMFunc],  *cmires = (MKL_Complex8*)out->cres1[kMFuncI];
    MKL_Complex16 *zarg  = (MKL_Complex16*)in->zarg1, *zres = (MKL_Complex16*)out->zres1[kFunc], *zires = (MKL_Complex16*)out->zres1[kFuncI];
    MKL_Complex16 *zmres = (MKL_Complex16*)out->zres1[kMFunc], *zmires = (MKL_Complex16*)out->zres1[kMFuncI];

    #pragma omp target data map(to:sarg[0:length]) map(to:darg[0:length]) \
                            map(to:carg[0:length]) map(to:zarg[0:length]) \
                            map(tofrom:sres[0:length]) map(tofrom:sires[0:length]) map(tofrom:smres[0:length]) map(tofrom:smires[0:length]) \
                            map(tofrom:dres[0:length]) map(tofrom:dires[0:length]) map(tofrom:dmres[0:length]) map(tofrom:dmires[0:length]) \
                            map(tofrom:cres[0:length]) map(tofrom:cires[0:length]) map(tofrom:cmres[0:length]) map(tofrom:cmires[0:length]) \
                            map(tofrom:zres[0:length]) map(tofrom:zires[0:length]) map(tofrom:zmres[0:length]) map(tofrom:zmires[0:length]) \
                            device(devnum)
    {
        int mode = vm_mode[acc];

        #pragma omp dispatch device(devnum)
        vmlSetMode(mode);

        #pragma omp dispatch device(devnum)
        vsTanh_64   (length, sarg, sres);
        #pragma omp dispatch device(devnum)
        vsTanhI_64  (length/stride, sarg, stride, sires, stride);
        #pragma omp dispatch device(devnum)
        vdTanh_64   (length, darg, dres);
        #pragma omp dispatch device(devnum)
        vdTanhI_64  (length/stride, darg, stride, dires, stride);
        #pragma omp dispatch device(devnum)
        vcTanh_64   (length, carg, cres);
        #pragma omp dispatch device(devnum)
        vcTanhI_64  (length/stride, carg, stride, cires, stride);
        #pragma omp dispatch device(devnum)
        vzTanh_64   (length, zarg, zres);
        #pragma omp dispatch device(devnum)
        vzTanhI_64  (length/stride, zarg, stride, zires, stride);

        #pragma omp dispatch device(devnum)
        vmsTanh_64  (length, sarg, smres, mode);
        #pragma omp dispatch device(devnum)
        vmsTanhI_64 (length/stride, sarg, stride, smires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmdTanh_64  (length, darg, dmres, mode);
        #pragma omp dispatch device(devnum)
        vmdTanhI_64 (length/stride, darg, stride, dmires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmcTanh_64  (length, carg, cmres, mode);
        #pragma omp dispatch device(devnum)
        vmcTanhI_64 (length/stride, carg, stride, cmires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmzTanh_64  (length, zarg, zmres, mode);
        #pragma omp dispatch device(devnum)
        vmzTanhI_64 (length/stride, zarg, stride, zmires, stride, mode);

    }
    // #pragma omp taskwait

    return;
} /* own_vm_tanh */

/**
 * Exp
 */
static void own_vm_exp (int acc, VmInputData* in, VmOutputData* out)
{

    float  *sarg = in->sarg1, *sres = out->sres1[kFunc], *sires = out->sres1[kFuncI], *smres = out->sres1[kMFunc], *smires = out->sres1[kMFuncI];
    double *darg = in->darg1, *dres = out->dres1[kFunc], *dires = out->dres1[kFuncI], *dmres = out->dres1[kMFunc], *dmires = out->dres1[kMFuncI];
    MKL_Complex8  *carg  = (MKL_Complex8*)in->carg1,  *cres = (MKL_Complex8*)out->cres1[kFunc],  *cires = (MKL_Complex8*)out->cres1[kFuncI];
    MKL_Complex8  *cmres = (MKL_Complex8*)out->cres1[kMFunc],  *cmires = (MKL_Complex8*)out->cres1[kMFuncI];
    MKL_Complex16 *zarg  = (MKL_Complex16*)in->zarg1, *zres = (MKL_Complex16*)out->zres1[kFunc], *zires = (MKL_Complex16*)out->zres1[kFuncI];
    MKL_Complex16 *zmres = (MKL_Complex16*)out->zres1[kMFunc], *zmires = (MKL_Complex16*)out->zres1[kMFuncI];

    #pragma omp target data map(to:sarg[0:length]) map(to:darg[0:length]) \
                            map(to:carg[0:length]) map(to:zarg[0:length]) \
                            map(tofrom:sres[0:length]) map(tofrom:sires[0:length]) map(tofrom:smres[0:length]) map(tofrom:smires[0:length]) \
                            map(tofrom:dres[0:length]) map(tofrom:dires[0:length]) map(tofrom:dmres[0:length]) map(tofrom:dmires[0:length]) \
                            map(tofrom:cres[0:length]) map(tofrom:cires[0:length]) map(tofrom:cmres[0:length]) map(tofrom:cmires[0:length]) \
                            map(tofrom:zres[0:length]) map(tofrom:zires[0:length]) map(tofrom:zmres[0:length]) map(tofrom:zmires[0:length]) \
                            device(devnum)
    {
        int mode = vm_mode[acc];

        #pragma omp dispatch device(devnum)
        vmlSetMode(mode);

        #pragma omp dispatch device(devnum)
        vsExp_64   (length, sarg, sres);
        #pragma omp dispatch device(devnum)
        vsExpI_64  (length/stride, sarg, stride, sires, stride);
        #pragma omp dispatch device(devnum)
        vdExp_64   (length, darg, dres);
        #pragma omp dispatch device(devnum)
        vdExpI_64  (length/stride, darg, stride, dires, stride);
        #pragma omp dispatch device(devnum)
        vcExp_64   (length, carg, cres);
        #pragma omp dispatch device(devnum)
        vcExpI_64  (length/stride, carg, stride, cires, stride);
        #pragma omp dispatch device(devnum)
        vzExp_64   (length, zarg, zres);
        #pragma omp dispatch device(devnum)
        vzExpI_64  (length/stride, zarg, stride, zires, stride);

        #pragma omp dispatch device(devnum)
        vmsExp_64  (length, sarg, smres, mode);
        #pragma omp dispatch device(devnum)
        vmsExpI_64 (length/stride, sarg, stride, smires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmdExp_64  (length, darg, dmres, mode);
        #pragma omp dispatch device(devnum)
        vmdExpI_64 (length/stride, darg, stride, dmires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmcExp_64  (length, carg, cmres, mode);
        #pragma omp dispatch device(devnum)
        vmcExpI_64 (length/stride, carg, stride, cmires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmzExp_64  (length, zarg, zmres, mode);
        #pragma omp dispatch device(devnum)
        vmzExpI_64 (length/stride, zarg, stride, zmires, stride, mode);

    }
    // #pragma omp taskwait

    return;
} /* own_vm_exp */

/**
 * Exp2
 */
static void own_vm_exp2 (int acc, VmInputData* in, VmOutputData* out)
{

    float  *sarg = in->sarg1, *sres = out->sres1[kFunc], *sires = out->sres1[kFuncI], *smres = out->sres1[kMFunc], *smires = out->sres1[kMFuncI];
    double *darg = in->darg1, *dres = out->dres1[kFunc], *dires = out->dres1[kFuncI], *dmres = out->dres1[kMFunc], *dmires = out->dres1[kMFuncI];

    #pragma omp target data map(to:sarg[0:length]) map(to:darg[0:length]) \
                            map(tofrom:sres[0:length]) map(tofrom:sires[0:length]) map(tofrom:smres[0:length]) map(tofrom:smires[0:length]) \
                            map(tofrom:dres[0:length]) map(tofrom:dires[0:length]) map(tofrom:dmres[0:length]) map(tofrom:dmires[0:length]) \
                            device(devnum)
    {
        int mode = vm_mode[acc];

        #pragma omp dispatch device(devnum)
        vmlSetMode(mode);

        #pragma omp dispatch device(devnum)
        vsExp2_64   (length, sarg, sres);
        #pragma omp dispatch device(devnum)
        vsExp2I_64  (length/stride, sarg, stride, sires, stride);
        #pragma omp dispatch device(devnum)
        vdExp2_64   (length, darg, dres);
        #pragma omp dispatch device(devnum)
        vdExp2I_64  (length/stride, darg, stride, dires, stride);

        #pragma omp dispatch device(devnum)
        vmsExp2_64  (length, sarg, smres, mode);
        #pragma omp dispatch device(devnum)
        vmsExp2I_64 (length/stride, sarg, stride, smires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmdExp2_64  (length, darg, dmres, mode);
        #pragma omp dispatch device(devnum)
        vmdExp2I_64 (length/stride, darg, stride, dmires, stride, mode);

    }
    // #pragma omp taskwait

    return;
} /* own_vm_exp2 */

/**
 * Exp10
 */
static void own_vm_exp10 (int acc, VmInputData* in, VmOutputData* out)
{

    float  *sarg = in->sarg1, *sres = out->sres1[kFunc], *sires = out->sres1[kFuncI], *smres = out->sres1[kMFunc], *smires = out->sres1[kMFuncI];
    double *darg = in->darg1, *dres = out->dres1[kFunc], *dires = out->dres1[kFuncI], *dmres = out->dres1[kMFunc], *dmires = out->dres1[kMFuncI];

    #pragma omp target data map(to:sarg[0:length]) map(to:darg[0:length]) \
                            map(tofrom:sres[0:length]) map(tofrom:sires[0:length]) map(tofrom:smres[0:length]) map(tofrom:smires[0:length]) \
                            map(tofrom:dres[0:length]) map(tofrom:dires[0:length]) map(tofrom:dmres[0:length]) map(tofrom:dmires[0:length]) \
                            device(devnum)
    {
        int mode = vm_mode[acc];

        #pragma omp dispatch device(devnum)
        vmlSetMode(mode);

        #pragma omp dispatch device(devnum)
        vsExp10_64   (length, sarg, sres);
        #pragma omp dispatch device(devnum)
        vsExp10I_64  (length/stride, sarg, stride, sires, stride);
        #pragma omp dispatch device(devnum)
        vdExp10_64   (length, darg, dres);
        #pragma omp dispatch device(devnum)
        vdExp10I_64  (length/stride, darg, stride, dires, stride);

        #pragma omp dispatch device(devnum)
        vmsExp10_64  (length, sarg, smres, mode);
        #pragma omp dispatch device(devnum)
        vmsExp10I_64 (length/stride, sarg, stride, smires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmdExp10_64  (length, darg, dmres, mode);
        #pragma omp dispatch device(devnum)
        vmdExp10I_64 (length/stride, darg, stride, dmires, stride, mode);

    }
    // #pragma omp taskwait

    return;
} /* own_vm_exp10 */

/**
 * Expm1
 */
static void own_vm_expm1 (int acc, VmInputData* in, VmOutputData* out)
{

    float  *sarg = in->sarg1, *sres = out->sres1[kFunc], *sires = out->sres1[kFuncI], *smres = out->sres1[kMFunc], *smires = out->sres1[kMFuncI];
    double *darg = in->darg1, *dres = out->dres1[kFunc], *dires = out->dres1[kFuncI], *dmres = out->dres1[kMFunc], *dmires = out->dres1[kMFuncI];

    #pragma omp target data map(to:sarg[0:length]) map(to:darg[0:length]) \
                            map(tofrom:sres[0:length]) map(tofrom:sires[0:length]) map(tofrom:smres[0:length]) map(tofrom:smires[0:length]) \
                            map(tofrom:dres[0:length]) map(tofrom:dires[0:length]) map(tofrom:dmres[0:length]) map(tofrom:dmires[0:length]) \
                            device(devnum)
    {
        int mode = vm_mode[acc];

        #pragma omp dispatch device(devnum)
        vmlSetMode(mode);

        #pragma omp dispatch device(devnum)
        vsExpm1_64   (length, sarg, sres);
        #pragma omp dispatch device(devnum)
        vsExpm1I_64  (length/stride, sarg, stride, sires, stride);
        #pragma omp dispatch device(devnum)
        vdExpm1_64   (length, darg, dres);
        #pragma omp dispatch device(devnum)
        vdExpm1I_64  (length/stride, darg, stride, dires, stride);

        #pragma omp dispatch device(devnum)
        vmsExpm1_64  (length, sarg, smres, mode);
        #pragma omp dispatch device(devnum)
        vmsExpm1I_64 (length/stride, sarg, stride, smires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmdExpm1_64  (length, darg, dmres, mode);
        #pragma omp dispatch device(devnum)
        vmdExpm1I_64 (length/stride, darg, stride, dmires, stride, mode);

    }
    // #pragma omp taskwait

    return;
} /* own_vm_expm1 */

/**
 * Log2
 */
static void own_vm_log2 (int acc, VmInputData* in, VmOutputData* out)
{

    float  *sarg = in->sarg1, *sres = out->sres1[kFunc], *sires = out->sres1[kFuncI], *smres = out->sres1[kMFunc], *smires = out->sres1[kMFuncI];
    double *darg = in->darg1, *dres = out->dres1[kFunc], *dires = out->dres1[kFuncI], *dmres = out->dres1[kMFunc], *dmires = out->dres1[kMFuncI];

    #pragma omp target data map(to:sarg[0:length]) map(to:darg[0:length]) \
                            map(tofrom:sres[0:length]) map(tofrom:sires[0:length]) map(tofrom:smres[0:length]) map(tofrom:smires[0:length]) \
                            map(tofrom:dres[0:length]) map(tofrom:dires[0:length]) map(tofrom:dmres[0:length]) map(tofrom:dmires[0:length]) \
                            device(devnum)
    {
        int mode = vm_mode[acc];

        #pragma omp dispatch device(devnum)
        vmlSetMode(mode);

        #pragma omp dispatch device(devnum)
        vsLog2_64   (length, sarg, sres);
        #pragma omp dispatch device(devnum)
        vsLog2I_64  (length/stride, sarg, stride, sires, stride);
        #pragma omp dispatch device(devnum)
        vdLog2_64   (length, darg, dres);
        #pragma omp dispatch device(devnum)
        vdLog2I_64  (length/stride, darg, stride, dires, stride);

        #pragma omp dispatch device(devnum)
        vmsLog2_64  (length, sarg, smres, mode);
        #pragma omp dispatch device(devnum)
        vmsLog2I_64 (length/stride, sarg, stride, smires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmdLog2_64  (length, darg, dmres, mode);
        #pragma omp dispatch device(devnum)
        vmdLog2I_64 (length/stride, darg, stride, dmires, stride, mode);

    }
    // #pragma omp taskwait

    return;
} /* own_vm_log2 */

/**
 * Log10
 */
static void own_vm_log10 (int acc, VmInputData* in, VmOutputData* out)
{

    float  *sarg = in->sarg1, *sres = out->sres1[kFunc], *sires = out->sres1[kFuncI], *smres = out->sres1[kMFunc], *smires = out->sres1[kMFuncI];
    double *darg = in->darg1, *dres = out->dres1[kFunc], *dires = out->dres1[kFuncI], *dmres = out->dres1[kMFunc], *dmires = out->dres1[kMFuncI];
    MKL_Complex8  *carg  = (MKL_Complex8*)in->carg1,  *cres = (MKL_Complex8*)out->cres1[kFunc],  *cires = (MKL_Complex8*)out->cres1[kFuncI];
    MKL_Complex8  *cmres = (MKL_Complex8*)out->cres1[kMFunc],  *cmires = (MKL_Complex8*)out->cres1[kMFuncI];
    MKL_Complex16 *zarg  = (MKL_Complex16*)in->zarg1, *zres = (MKL_Complex16*)out->zres1[kFunc], *zires = (MKL_Complex16*)out->zres1[kFuncI];
    MKL_Complex16 *zmres = (MKL_Complex16*)out->zres1[kMFunc], *zmires = (MKL_Complex16*)out->zres1[kMFuncI];

    #pragma omp target data map(to:sarg[0:length]) map(to:darg[0:length]) \
                            map(to:carg[0:length]) map(to:zarg[0:length]) \
                            map(tofrom:sres[0:length]) map(tofrom:sires[0:length]) map(tofrom:smres[0:length]) map(tofrom:smires[0:length]) \
                            map(tofrom:dres[0:length]) map(tofrom:dires[0:length]) map(tofrom:dmres[0:length]) map(tofrom:dmires[0:length]) \
                            map(tofrom:cres[0:length]) map(tofrom:cires[0:length]) map(tofrom:cmres[0:length]) map(tofrom:cmires[0:length]) \
                            map(tofrom:zres[0:length]) map(tofrom:zires[0:length]) map(tofrom:zmres[0:length]) map(tofrom:zmires[0:length]) \
                            device(devnum)
    {
        int mode = vm_mode[acc];

        #pragma omp dispatch device(devnum)
        vmlSetMode(mode);

        #pragma omp dispatch device(devnum)
        vsLog10_64   (length, sarg, sres);
        #pragma omp dispatch device(devnum)
        vsLog10I_64  (length/stride, sarg, stride, sires, stride);
        #pragma omp dispatch device(devnum)
        vdLog10_64   (length, darg, dres);
        #pragma omp dispatch device(devnum)
        vdLog10I_64  (length/stride, darg, stride, dires, stride);
        #pragma omp dispatch device(devnum)
        vcLog10_64   (length, carg, cres);
        #pragma omp dispatch device(devnum)
        vcLog10I_64  (length/stride, carg, stride, cires, stride);
        #pragma omp dispatch device(devnum)
        vzLog10_64   (length, zarg, zres);
        #pragma omp dispatch device(devnum)
        vzLog10I_64  (length/stride, zarg, stride, zires, stride);

        #pragma omp dispatch device(devnum)
        vmsLog10_64  (length, sarg, smres, mode);
        #pragma omp dispatch device(devnum)
        vmsLog10I_64 (length/stride, sarg, stride, smires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmdLog10_64  (length, darg, dmres, mode);
        #pragma omp dispatch device(devnum)
        vmdLog10I_64 (length/stride, darg, stride, dmires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmcLog10_64  (length, carg, cmres, mode);
        #pragma omp dispatch device(devnum)
        vmcLog10I_64 (length/stride, carg, stride, cmires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmzLog10_64  (length, zarg, zmres, mode);
        #pragma omp dispatch device(devnum)
        vmzLog10I_64 (length/stride, zarg, stride, zmires, stride, mode);

    }
    // #pragma omp taskwait

    return;
} /* own_vm_log10 */

/**
 * Log1p
 */
static void own_vm_log1p (int acc, VmInputData* in, VmOutputData* out)
{

    float  *sarg = in->sarg1, *sres = out->sres1[kFunc], *sires = out->sres1[kFuncI], *smres = out->sres1[kMFunc], *smires = out->sres1[kMFuncI];
    double *darg = in->darg1, *dres = out->dres1[kFunc], *dires = out->dres1[kFuncI], *dmres = out->dres1[kMFunc], *dmires = out->dres1[kMFuncI];

    #pragma omp target data map(to:sarg[0:length]) map(to:darg[0:length]) \
                            map(tofrom:sres[0:length]) map(tofrom:sires[0:length]) map(tofrom:smres[0:length]) map(tofrom:smires[0:length]) \
                            map(tofrom:dres[0:length]) map(tofrom:dires[0:length]) map(tofrom:dmres[0:length]) map(tofrom:dmires[0:length]) \
                            device(devnum)
    {
        int mode = vm_mode[acc];

        #pragma omp dispatch device(devnum)
        vmlSetMode(mode);

        #pragma omp dispatch device(devnum)
        vsLog1p_64   (length, sarg, sres);
        #pragma omp dispatch device(devnum)
        vsLog1pI_64  (length/stride, sarg, stride, sires, stride);
        #pragma omp dispatch device(devnum)
        vdLog1p_64   (length, darg, dres);
        #pragma omp dispatch device(devnum)
        vdLog1pI_64  (length/stride, darg, stride, dires, stride);

        #pragma omp dispatch device(devnum)
        vmsLog1p_64  (length, sarg, smres, mode);
        #pragma omp dispatch device(devnum)
        vmsLog1pI_64 (length/stride, sarg, stride, smires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmdLog1p_64  (length, darg, dmres, mode);
        #pragma omp dispatch device(devnum)
        vmdLog1pI_64 (length/stride, darg, stride, dmires, stride, mode);

    }
    // #pragma omp taskwait

    return;
} /* own_vm_log1p */

/**
 * Erf
 */
static void own_vm_erf (int acc, VmInputData* in, VmOutputData* out)
{

    float  *sarg = in->sarg1, *sres = out->sres1[kFunc], *sires = out->sres1[kFuncI], *smres = out->sres1[kMFunc], *smires = out->sres1[kMFuncI];
    double *darg = in->darg1, *dres = out->dres1[kFunc], *dires = out->dres1[kFuncI], *dmres = out->dres1[kMFunc], *dmires = out->dres1[kMFuncI];

    #pragma omp target data map(to:sarg[0:length]) map(to:darg[0:length]) \
                            map(tofrom:sres[0:length]) map(tofrom:sires[0:length]) map(tofrom:smres[0:length]) map(tofrom:smires[0:length]) \
                            map(tofrom:dres[0:length]) map(tofrom:dires[0:length]) map(tofrom:dmres[0:length]) map(tofrom:dmires[0:length]) \
                            device(devnum)
    {
        int mode = vm_mode[acc];

        #pragma omp dispatch device(devnum)
        vmlSetMode(mode);

        #pragma omp dispatch device(devnum)
        vsErf_64   (length, sarg, sres);
        #pragma omp dispatch device(devnum)
        vsErfI_64  (length/stride, sarg, stride, sires, stride);
        #pragma omp dispatch device(devnum)
        vdErf_64   (length, darg, dres);
        #pragma omp dispatch device(devnum)
        vdErfI_64  (length/stride, darg, stride, dires, stride);

        #pragma omp dispatch device(devnum)
        vmsErf_64  (length, sarg, smres, mode);
        #pragma omp dispatch device(devnum)
        vmsErfI_64 (length/stride, sarg, stride, smires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmdErf_64  (length, darg, dmres, mode);
        #pragma omp dispatch device(devnum)
        vmdErfI_64 (length/stride, darg, stride, dmires, stride, mode);

    }
    // #pragma omp taskwait

    return;
} /* own_vm_erf */

/**
 * Erfc
 */
static void own_vm_erfc (int acc, VmInputData* in, VmOutputData* out)
{

    float  *sarg = in->sarg1, *sres = out->sres1[kFunc], *sires = out->sres1[kFuncI], *smres = out->sres1[kMFunc], *smires = out->sres1[kMFuncI];
    double *darg = in->darg1, *dres = out->dres1[kFunc], *dires = out->dres1[kFuncI], *dmres = out->dres1[kMFunc], *dmires = out->dres1[kMFuncI];

    #pragma omp target data map(to:sarg[0:length]) map(to:darg[0:length]) \
                            map(tofrom:sres[0:length]) map(tofrom:sires[0:length]) map(tofrom:smres[0:length]) map(tofrom:smires[0:length]) \
                            map(tofrom:dres[0:length]) map(tofrom:dires[0:length]) map(tofrom:dmres[0:length]) map(tofrom:dmires[0:length]) \
                            device(devnum)
    {
        int mode = vm_mode[acc];

        #pragma omp dispatch device(devnum)
        vmlSetMode(mode);

        #pragma omp dispatch device(devnum)
        vsErfc_64   (length, sarg, sres);
        #pragma omp dispatch device(devnum)
        vsErfcI_64  (length/stride, sarg, stride, sires, stride);
        #pragma omp dispatch device(devnum)
        vdErfc_64   (length, darg, dres);
        #pragma omp dispatch device(devnum)
        vdErfcI_64  (length/stride, darg, stride, dires, stride);

        #pragma omp dispatch device(devnum)
        vmsErfc_64  (length, sarg, smres, mode);
        #pragma omp dispatch device(devnum)
        vmsErfcI_64 (length/stride, sarg, stride, smires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmdErfc_64  (length, darg, dmres, mode);
        #pragma omp dispatch device(devnum)
        vmdErfcI_64 (length/stride, darg, stride, dmires, stride, mode);

    }
    // #pragma omp taskwait

    return;
} /* own_vm_erfc */

/**
 * ErfInv
 */
static void own_vm_erfinv (int acc, VmInputData* in, VmOutputData* out)
{

    float  *sarg = in->sarg1, *sres = out->sres1[kFunc], *sires = out->sres1[kFuncI], *smres = out->sres1[kMFunc], *smires = out->sres1[kMFuncI];
    double *darg = in->darg1, *dres = out->dres1[kFunc], *dires = out->dres1[kFuncI], *dmres = out->dres1[kMFunc], *dmires = out->dres1[kMFuncI];

    #pragma omp target data map(to:sarg[0:length]) map(to:darg[0:length]) \
                            map(tofrom:sres[0:length]) map(tofrom:sires[0:length]) map(tofrom:smres[0:length]) map(tofrom:smires[0:length]) \
                            map(tofrom:dres[0:length]) map(tofrom:dires[0:length]) map(tofrom:dmres[0:length]) map(tofrom:dmires[0:length]) \
                            device(devnum)
    {
        int mode = vm_mode[acc];

        #pragma omp dispatch device(devnum)
        vmlSetMode(mode);

        #pragma omp dispatch device(devnum)
        vsErfInv_64   (length, sarg, sres);
        #pragma omp dispatch device(devnum)
        vsErfInvI_64  (length/stride, sarg, stride, sires, stride);
        #pragma omp dispatch device(devnum)
        vdErfInv_64   (length, darg, dres);
        #pragma omp dispatch device(devnum)
        vdErfInvI_64  (length/stride, darg, stride, dires, stride);

        #pragma omp dispatch device(devnum)
        vmsErfInv_64  (length, sarg, smres, mode);
        #pragma omp dispatch device(devnum)
        vmsErfInvI_64 (length/stride, sarg, stride, smires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmdErfInv_64  (length, darg, dmres, mode);
        #pragma omp dispatch device(devnum)
        vmdErfInvI_64 (length/stride, darg, stride, dmires, stride, mode);

    }
    // #pragma omp taskwait

    return;
} /* own_vm_erfinv */

/**
 * ErfcInv
 */
static void own_vm_erfcinv (int acc, VmInputData* in, VmOutputData* out)
{

    float  *sarg = in->sarg1, *sres = out->sres1[kFunc], *sires = out->sres1[kFuncI], *smres = out->sres1[kMFunc], *smires = out->sres1[kMFuncI];
    double *darg = in->darg1, *dres = out->dres1[kFunc], *dires = out->dres1[kFuncI], *dmres = out->dres1[kMFunc], *dmires = out->dres1[kMFuncI];

    #pragma omp target data map(to:sarg[0:length]) map(to:darg[0:length]) \
                            map(tofrom:sres[0:length]) map(tofrom:sires[0:length]) map(tofrom:smres[0:length]) map(tofrom:smires[0:length]) \
                            map(tofrom:dres[0:length]) map(tofrom:dires[0:length]) map(tofrom:dmres[0:length]) map(tofrom:dmires[0:length]) \
                            device(devnum)
    {
        int mode = vm_mode[acc];

        #pragma omp dispatch device(devnum)
        vmlSetMode(mode);

        #pragma omp dispatch device(devnum)
        vsErfcInv_64   (length, sarg, sres);
        #pragma omp dispatch device(devnum)
        vsErfcInvI_64  (length/stride, sarg, stride, sires, stride);
        #pragma omp dispatch device(devnum)
        vdErfcInv_64   (length, darg, dres);
        #pragma omp dispatch device(devnum)
        vdErfcInvI_64  (length/stride, darg, stride, dires, stride);

        #pragma omp dispatch device(devnum)
        vmsErfcInv_64  (length, sarg, smres, mode);
        #pragma omp dispatch device(devnum)
        vmsErfcInvI_64 (length/stride, sarg, stride, smires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmdErfcInv_64  (length, darg, dmres, mode);
        #pragma omp dispatch device(devnum)
        vmdErfcInvI_64 (length/stride, darg, stride, dmires, stride, mode);

    }
    // #pragma omp taskwait

    return;
} /* own_vm_erfcinv */

/**
 * CdfNorm
 */
static void own_vm_cdfnorm (int acc, VmInputData* in, VmOutputData* out)
{

    float  *sarg = in->sarg1, *sres = out->sres1[kFunc], *sires = out->sres1[kFuncI], *smres = out->sres1[kMFunc], *smires = out->sres1[kMFuncI];
    double *darg = in->darg1, *dres = out->dres1[kFunc], *dires = out->dres1[kFuncI], *dmres = out->dres1[kMFunc], *dmires = out->dres1[kMFuncI];

    #pragma omp target data map(to:sarg[0:length]) map(to:darg[0:length]) \
                            map(tofrom:sres[0:length]) map(tofrom:sires[0:length]) map(tofrom:smres[0:length]) map(tofrom:smires[0:length]) \
                            map(tofrom:dres[0:length]) map(tofrom:dires[0:length]) map(tofrom:dmres[0:length]) map(tofrom:dmires[0:length]) \
                            device(devnum)
    {
        int mode = vm_mode[acc];

        #pragma omp dispatch device(devnum)
        vmlSetMode(mode);

        #pragma omp dispatch device(devnum)
        vsCdfNorm_64   (length, sarg, sres);
        #pragma omp dispatch device(devnum)
        vsCdfNormI_64  (length/stride, sarg, stride, sires, stride);
        #pragma omp dispatch device(devnum)
        vdCdfNorm_64   (length, darg, dres);
        #pragma omp dispatch device(devnum)
        vdCdfNormI_64  (length/stride, darg, stride, dires, stride);

        #pragma omp dispatch device(devnum)
        vmsCdfNorm_64  (length, sarg, smres, mode);
        #pragma omp dispatch device(devnum)
        vmsCdfNormI_64 (length/stride, sarg, stride, smires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmdCdfNorm_64  (length, darg, dmres, mode);
        #pragma omp dispatch device(devnum)
        vmdCdfNormI_64 (length/stride, darg, stride, dmires, stride, mode);

    }
    // #pragma omp taskwait

    return;
} /* own_vm_cdfnorm */

/**
 * CdfNormInv
 */
static void own_vm_cdfnorminv (int acc, VmInputData* in, VmOutputData* out)
{

    float  *sarg = in->sarg1, *sres = out->sres1[kFunc], *sires = out->sres1[kFuncI], *smres = out->sres1[kMFunc], *smires = out->sres1[kMFuncI];
    double *darg = in->darg1, *dres = out->dres1[kFunc], *dires = out->dres1[kFuncI], *dmres = out->dres1[kMFunc], *dmires = out->dres1[kMFuncI];

    #pragma omp target data map(to:sarg[0:length]) map(to:darg[0:length]) \
                            map(tofrom:sres[0:length]) map(tofrom:sires[0:length]) map(tofrom:smres[0:length]) map(tofrom:smires[0:length]) \
                            map(tofrom:dres[0:length]) map(tofrom:dires[0:length]) map(tofrom:dmres[0:length]) map(tofrom:dmires[0:length]) \
                            device(devnum)
    {
        int mode = vm_mode[acc];

        #pragma omp dispatch device(devnum)
        vmlSetMode(mode);

        #pragma omp dispatch device(devnum)
        vsCdfNormInv_64   (length, sarg, sres);
        #pragma omp dispatch device(devnum)
        vsCdfNormInvI_64  (length/stride, sarg, stride, sires, stride);
        #pragma omp dispatch device(devnum)
        vdCdfNormInv_64   (length, darg, dres);
        #pragma omp dispatch device(devnum)
        vdCdfNormInvI_64  (length/stride, darg, stride, dires, stride);

        #pragma omp dispatch device(devnum)
        vmsCdfNormInv_64  (length, sarg, smres, mode);
        #pragma omp dispatch device(devnum)
        vmsCdfNormInvI_64 (length/stride, sarg, stride, smires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmdCdfNormInv_64  (length, darg, dmres, mode);
        #pragma omp dispatch device(devnum)
        vmdCdfNormInvI_64 (length/stride, darg, stride, dmires, stride, mode);

    }
    // #pragma omp taskwait

    return;
} /* own_vm_cdfnorminv */

/**
 * LGamma
 */
static void own_vm_lgamma (int acc, VmInputData* in, VmOutputData* out)
{

    float  *sarg = in->sarg1, *sres = out->sres1[kFunc], *sires = out->sres1[kFuncI], *smres = out->sres1[kMFunc], *smires = out->sres1[kMFuncI];
    double *darg = in->darg1, *dres = out->dres1[kFunc], *dires = out->dres1[kFuncI], *dmres = out->dres1[kMFunc], *dmires = out->dres1[kMFuncI];

    #pragma omp target data map(to:sarg[0:length]) map(to:darg[0:length]) \
                            map(tofrom:sres[0:length]) map(tofrom:sires[0:length]) map(tofrom:smres[0:length]) map(tofrom:smires[0:length]) \
                            map(tofrom:dres[0:length]) map(tofrom:dires[0:length]) map(tofrom:dmres[0:length]) map(tofrom:dmires[0:length]) \
                            device(devnum)
    {
        int mode = vm_mode[acc];

        #pragma omp dispatch device(devnum)
        vmlSetMode(mode);

        #pragma omp dispatch device(devnum)
        vsLGamma_64   (length, sarg, sres);
        #pragma omp dispatch device(devnum)
        vsLGammaI_64  (length/stride, sarg, stride, sires, stride);
        #pragma omp dispatch device(devnum)
        vdLGamma_64   (length, darg, dres);
        #pragma omp dispatch device(devnum)
        vdLGammaI_64  (length/stride, darg, stride, dires, stride);

        #pragma omp dispatch device(devnum)
        vmsLGamma_64  (length, sarg, smres, mode);
        #pragma omp dispatch device(devnum)
        vmsLGammaI_64 (length/stride, sarg, stride, smires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmdLGamma_64  (length, darg, dmres, mode);
        #pragma omp dispatch device(devnum)
        vmdLGammaI_64 (length/stride, darg, stride, dmires, stride, mode);

    }
    // #pragma omp taskwait

    return;
} /* own_vm_lgamma */

/**
 * TGamma
 */
static void own_vm_tgamma (int acc, VmInputData* in, VmOutputData* out)
{

    float  *sarg = in->sarg1, *sres = out->sres1[kFunc], *sires = out->sres1[kFuncI], *smres = out->sres1[kMFunc], *smires = out->sres1[kMFuncI];
    double *darg = in->darg1, *dres = out->dres1[kFunc], *dires = out->dres1[kFuncI], *dmres = out->dres1[kMFunc], *dmires = out->dres1[kMFuncI];

    #pragma omp target data map(to:sarg[0:length]) map(to:darg[0:length]) \
                            map(tofrom:sres[0:length]) map(tofrom:sires[0:length]) map(tofrom:smres[0:length]) map(tofrom:smires[0:length]) \
                            map(tofrom:dres[0:length]) map(tofrom:dires[0:length]) map(tofrom:dmres[0:length]) map(tofrom:dmires[0:length]) \
                            device(devnum)
    {
        int mode = vm_mode[acc];

        #pragma omp dispatch device(devnum)
        vmlSetMode(mode);

        #pragma omp dispatch device(devnum)
        vsTGamma_64   (length, sarg, sres);
        #pragma omp dispatch device(devnum)
        vsTGammaI_64  (length/stride, sarg, stride, sires, stride);
        #pragma omp dispatch device(devnum)
        vdTGamma_64   (length, darg, dres);
        #pragma omp dispatch device(devnum)
        vdTGammaI_64  (length/stride, darg, stride, dires, stride);

        #pragma omp dispatch device(devnum)
        vmsTGamma_64  (length, sarg, smres, mode);
        #pragma omp dispatch device(devnum)
        vmsTGammaI_64 (length/stride, sarg, stride, smires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmdTGamma_64  (length, darg, dmres, mode);
        #pragma omp dispatch device(devnum)
        vmdTGammaI_64 (length/stride, darg, stride, dmires, stride, mode);

    }
    // #pragma omp taskwait

    return;
} /* own_vm_tgamma */

/**
 * ExpInt1
 */
static void own_vm_expint1 (int acc, VmInputData* in, VmOutputData* out)
{

    float  *sarg = in->sarg1, *sres = out->sres1[kFunc], *sires = out->sres1[kFuncI], *smres = out->sres1[kMFunc], *smires = out->sres1[kMFuncI];
    double *darg = in->darg1, *dres = out->dres1[kFunc], *dires = out->dres1[kFuncI], *dmres = out->dres1[kMFunc], *dmires = out->dres1[kMFuncI];

    #pragma omp target data map(to:sarg[0:length]) map(to:darg[0:length]) \
                            map(tofrom:sres[0:length]) map(tofrom:sires[0:length]) map(tofrom:smres[0:length]) map(tofrom:smires[0:length]) \
                            map(tofrom:dres[0:length]) map(tofrom:dires[0:length]) map(tofrom:dmres[0:length]) map(tofrom:dmires[0:length]) \
                            device(devnum)
    {
        int mode = vm_mode[acc];

        #pragma omp dispatch device(devnum)
        vmlSetMode(mode);

        #pragma omp dispatch device(devnum)
        vsExpInt1_64   (length, sarg, sres);
        #pragma omp dispatch device(devnum)
        vsExpInt1I_64  (length/stride, sarg, stride, sires, stride);
        #pragma omp dispatch device(devnum)
        vdExpInt1_64   (length, darg, dres);
        #pragma omp dispatch device(devnum)
        vdExpInt1I_64  (length/stride, darg, stride, dires, stride);

        #pragma omp dispatch device(devnum)
        vmsExpInt1_64  (length, sarg, smres, mode);
        #pragma omp dispatch device(devnum)
        vmsExpInt1I_64 (length/stride, sarg, stride, smires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmdExpInt1_64  (length, darg, dmres, mode);
        #pragma omp dispatch device(devnum)
        vmdExpInt1I_64 (length/stride, darg, stride, dmires, stride, mode);

    }
    // #pragma omp taskwait

    return;
} /* own_vm_expint1 */

/**
 * Custom reference functions absent in LIBM:
 */
static double own_frac (double x) { return (x - trunc (x)); } /* signed fractional part */
static double own_invcbrt (double x) { return pow(x, -(1.0/3.0)); } /* inverse cube root */
static double own_pow2o3 (double x) { return pow(x, 2.0 / 3.0); } /* power x^(2/3) */
static double own_pow3o2 (double x) { return pow(x, 3.0 / 2.0); } /* power x^(3/2) */
static double own_inv (double x) { return 1.0 / x; } /* inverse 1/x */
static double own_sqr (double x) { return x * x; } /* square x*x */
static double own_add (double x, double y) { return x + y; } /* addition x+y */
static double own_sub (double x, double y) { return x - y; } /* subtraction x-y */
static double own_mul (double x, double y) { return x * y; } /* multiplication x-y */
static double own_div (double x, double y) { return x / y; } /* division x/y */
static double own_expint1 (double x) { double r; vmdExpInt1(1,&x, &r, VML_HA); return r; } /* exponential integral - exists in VM only */
static void   own_modf (double x, double* r1, double* r2) { *r2 = modf(x, r1); } /* truncated integer value |x| and the remaining fraction part x-|x| */
static double own_linearfrac (double x, double y, double a, double b, double c, double d) { return (x * a + b)/(y * c + d); } /* linear fraction (x*a + b)/(y*c + d)*/
static double own_minmag (double x, double y) { return (fabs(x) < fabs(y))?x:y; } /* min(|x|,|y|) */
static double own_maxmag (double x, double y) { return (fabs(x) > fabs(y))?x:y; } /* max(|x|,|y|) */
static void   own_sincos (double x, double* r1, double* r2) { *r1 = sin(x); *r2 = cos(x); return; } /* sin & cos */
static double own_invsqrt (double x) { return 1.0 / sqrt(x); } /* 1/sqrt(x) */

static double complex own_cadd (double complex x, double complex y) { return x + y; } /* complex x+y */
static double complex own_csub (double complex x, double complex y) { return x - y; } /* complex x+y */
static double complex own_cmul (double complex x, double complex y) { return x * y; } /* complex x*y */
static double complex own_cdiv (double complex x, double complex y) { return x / y; } /* complex x+y */
static double own_cabs (double complex x) { return hypot(creal(x),cimag(x)); } /* |x| */
static double own_carg (double complex x) { return atan2(cimag(x),creal(x)); } /* complex argument (atan2) */
static double complex own_cis (double x) { double r1, r2; own_sincos(x, &r2, &r1); double complex r = r1 + I * r2; return r; } /* complex CIS (sincos) */
static double complex own_cmulbyconj (double complex x, double complex y) { return x * conj(y); } /* complex x*conj(y) */

/**
 * @brief Safe malloc
 *
 * own_safe_malloc allocates memory and check resulted pointer.
 * Report error and exit application if unsuccessful.
 *
 * @param[in] size          Size in bytes
 * @return                  Pointer to allocated memory
 *
 */
static void* own_safe_malloc(int size)
{
    void* ptr = malloc (size);
    if (ptr == NULL)
    {
       fprintf (stderr, "\t\tERROR: %d bytes allocated unsuccessfully\n", size);
       exit(-1);
    }

    return ptr;
} /* own_safe_malloc */

/**
 * @brief Safe free
 *
 * own_safe_free deallocates memory.
 * Report error if NULL pointer passed.
 *
 * @param[in] ptr          Pointer to memory
 *
 */
static void own_safe_free(void *ptr)
{
    if (ptr != NULL) { free (ptr); }
    else
    {
       fprintf (stderr, "\t\tERROR: NULL pointer cannot be deallocated\n");
       exit(-1);
    }

    return;
} /* own_safe_free */

/**
 * @brief Allocate all input and reference result arrays
 *
 * Safe allocation of input and reference result arrays memory
 *
 * @param[in] len           Arrays length
 * @param[in, out] in       Input and reference resutl arrays
 * @param[in, out] out      Output arrays
 *
 */
static void own_allocate_data (int len, VmInputData* in, VmOutputData* out)
{
    in->sarg1    =  own_safe_malloc (len * sizeof (float));
    in->sarg2    =  own_safe_malloc (len * sizeof (float));
    in->darg1    =  own_safe_malloc (len * sizeof (double));
    in->darg2    =  own_safe_malloc (len * sizeof (double));
    in->carg1    =  own_safe_malloc (len * sizeof (float complex));
    in->carg2    =  own_safe_malloc (len * sizeof (float complex));
    in->zarg1    =  own_safe_malloc (len * sizeof (double complex));
    in->zarg2    =  own_safe_malloc (len * sizeof (double complex));
    in->sref1    =  own_safe_malloc (len * sizeof (double));
    in->sref2    =  own_safe_malloc (len * sizeof (double));
    in->dref1    =  own_safe_malloc (len * sizeof (double));
    in->dref2    =  own_safe_malloc (len * sizeof (double));
    in->cref1    =  own_safe_malloc (len * sizeof (double complex));
    in->cref2    =  own_safe_malloc (len * sizeof (double complex));
    in->zref1    =  own_safe_malloc (len * sizeof (double complex));
    in->zref2    =  own_safe_malloc (len * sizeof (double complex));

    for (int v = kFunc; v < kApiNum; v = v + 1)
    {
        out->sres1[v]   =  own_safe_malloc (len * sizeof (float));
        out->sres2[v]   =  own_safe_malloc (len * sizeof (float));
        out->dres1[v]   =  own_safe_malloc (len * sizeof (double));
        out->dres2[v]   =  own_safe_malloc (len * sizeof (double));
        out->cres1[v]   =  own_safe_malloc (len * sizeof (float complex));
        out->cres2[v]   =  own_safe_malloc (len * sizeof (float complex));
        out->zres1[v]   =  own_safe_malloc (len * sizeof (double complex));
        out->zres2[v]   =  own_safe_malloc (len * sizeof (double complex));
    }
} /* own_allocate_data */

/**
 * @brief Deallocate all input and reference result arrays
 *
 * Safe deallocation of input and reference result arrays memory
 *
 * @param[in, out] in       Input and reference resutl arrays
 * @param[in, out] out      Output arrays
 *
 */
static void own_deallocate_data (VmInputData* in, VmOutputData* out)
{
    own_safe_free (in->sarg1);
    own_safe_free (in->sarg2);
    own_safe_free (in->darg1);
    own_safe_free (in->darg2);
    own_safe_free (in->carg1);
    own_safe_free (in->carg2);
    own_safe_free (in->zarg1);
    own_safe_free (in->zarg2);
    own_safe_free (in->sref1);
    own_safe_free (in->sref2);
    own_safe_free (in->dref1);
    own_safe_free (in->dref2);
    own_safe_free (in->cref1);
    own_safe_free (in->cref2);
    own_safe_free (in->zref1);
    own_safe_free (in->zref2);

    for (int v = kFunc; v <= kMFuncI; v++)
    {
        own_safe_free (out->sres1[v]);
        own_safe_free (out->sres2[v]);
        own_safe_free (out->dres1[v]);
        own_safe_free (out->dres2[v]);
        own_safe_free (out->cres1[v]);
        own_safe_free (out->cres2[v]);
        own_safe_free (out->zres1[v]);
        own_safe_free (out->zres2[v]);
    }

    return;
} /* own_deallocate_data */

/**
 * @brief Single precision ULP calculation
 *
 * Computes ULP between result and reference value
 *
 * @param[in] res          Computed result
 * @param[in] ref          Reference result
 * @return                 Calculated ULP
 *
 */
static float own_s_compute_ulp (float res, double ref )
{
    int ex       = 0;
    double den   = 1.0;
    float curulp = 0.0;
    if (isfinite (ref))
    {
        frexp (ref, &ex);                                     /* ex: integral power of two of ref */
        den = ldexp (1.0, ex - 24);                           /* den: ulp's denominator 2^(ex-p+1) */
        den = (den == 0.0)? 0x1.p-149 : den;                  /* if den=0 then replace by EPS to avoid divbyzero */
        curulp = (float)fabs ((((double)(res) - ref)) / den); /* |res-ref|/2^(ex-24) */
        curulp = isfinite (curulp)?curulp : FLT_MAX;         /* replace infinite ulp by big finite float number */
    }
    else
    {
        if (fpclassify (res) == (fpclassify (ref))) { curulp = 0; }
        else { curulp = FLT_MAX; }
    }

    return curulp;
} /* own_s_compute_ulp */

/**
 * @brief Complex single precision ULP calculation
 *
 * Computes ULP between result and reference value
 *
 * @param[in] res          Computed result
 * @param[in] ref          Reference result
 * @return                 Calculated ULP
 *
 */
static float own_c_compute_ulp (float complex res, double complex ref )
{
    return fmaxf (own_s_compute_ulp(creal(res), creal(ref)), own_s_compute_ulp(cimag(res), cimag(ref)));
}

/**
 * @brief Double precision ULP calculation
 *
 * Computes ULP between result and reference value
 *
 * @param[in] res          Computed result
 * @param[in] ref          Reference result
 * @return                 Calculated ULP
 *
 */
static double own_d_compute_ulp (double res, double ref )
{
    int ex        = 0;
    double den    = 1.0;
    double curulp = 0.0;
    if (isfinite (ref))
    {
        frexp (ref, &ex);                                      /* ex: integral power of two of ref */
        den = ldexp (1.0, ex - 53);                            /* den: ulp's denominator 2^(ex-p+1) */
        den = (den == 0.0)? 0x1.p-1074 : den;                  /* if den=0 then replace by EPS to avoid divbyzero */
        curulp = (double)fabs ((((double)(res) - ref)) / den); /* |res-ref|/2^(ex-53) */
        curulp = isfinite (curulp)?curulp : DBL_MAX;           /* replace infinite ulp by big finite double number */
    }
    else
    {
        if (fpclassify (res) == (fpclassify (ref))) { curulp = 0; }
        else { curulp = DBL_MAX; }
    }

    return curulp;
} /* own_d_compute_ulp */

/**
 * @brief Complex double precision ULP calculation
 *
 * Computes ULP between result and reference value
 *
 * @param[in] res          Computed result
 * @param[in] ref          Reference result
 * @return                 Calculated ULP
 *
 */
static double own_z_compute_ulp (double complex res, double complex ref )
{
    return fmax (own_d_compute_ulp(creal(res), creal(ref)), own_d_compute_ulp(cimag(res), cimag(ref)));
}

/**
 * @brief Fill inputs
 *
 * Fills input vectors by random numbers
 *
 * @param[in] layout       Function arguments layout
 * @param[in] beg          Begin of input ranges
 * @param[in] end          End of input ranges
 * @param[out] in          Input arrays
 *
 */
static void own_fill_input (int layout, double beg, double end, VmInputData* in)
{
    srand(777);
    for (int i = 0; i < length; i = i + 1)
    {
        /**
         * Generate random numbers in [beg, end) range
         */
        double v1 = (beg + (end - beg) * (double)(rand ()) / (double)(RAND_MAX));
        double v2 = (beg + (end - beg) * (double)(rand ()) / (double)(RAND_MAX));
        double v3 = (beg + (end - beg) * (double)(rand ()) / (double)(RAND_MAX));
        double v4 = (beg + (end - beg) * (double)(rand ()) / (double)(RAND_MAX));

        in->darg1[i] = v1;
        in->darg2[i] = v2;
        in->sarg1[i] = (float)v1;
        in->sarg2[i] = (float)v2;

        in->zarg1[i] = v1 + I * v3;
        in->zarg2[i] = v2 + I * v4;
        in->carg1[i] = (float)v1 + I * (float)v3;
        in->carg2[i] = (float)v2 + I * (float)v4;
    } /* for (int i = 0; i < length; i++) */
} /* own_fill_input */

/**
 * @brief Fill reference vectors
 *
 * Compute reference results
 *
 * @param[in] layout       Function arguments layout
 * @param[in] reffunc      Real reference function
 * @param[in] creffunc     Complex reference function
 * @param[out] in          Input and reference results arrays
 *
 */
static void own_fill_reference (int layout, void* reffunc, void* creffunc, VmInputData* in)
{
    int has_real    = (reffunc != NULL)?1:0, has_complex = (creffunc != NULL)?1:0;
    for (int i = 0; i < length; i = i + 1)
    {
        if (layout == kVtoV)
        {
            if (has_real)
            {
                in->sref1[i] = ((RefVtoV)reffunc) (in->sarg1[i]);
                in->dref1[i] = ((RefVtoV)reffunc) (in->darg1[i]);
            }
            if (has_complex)
            {
                in->cref1[i] = ((CRefVtoV)creffunc) ((double complex) (in->carg1[i]));
                in->zref1[i] = ((CRefVtoV)creffunc) ( in->zarg1[i]);
            }
        }
        else if (layout == kVCtoVR)
        {
            if (has_real)
            {
                in->sref1[i] = ((RefVtoV)reffunc) (in->sarg1[i]);
                in->dref1[i] = ((RefVtoV)reffunc) (in->darg1[i]);
            }
            if (has_complex)
            {
                /* Use complex array containers to accept real results */
                ((double*)(in->cref1))[i] = ((CRefCtoR)creffunc) ((double complex) (in->carg1[i]));
                ((double*)(in->zref1))[i] = ((CRefCtoR)creffunc) (in->zarg1[i]);
            }
        }
        else if (layout == kVRtoVC)
        {
            if (has_real)
            {
                /* Use real arrays for kVRtoVC functions */
                in->sref1[i] = ((RefVtoV)reffunc) (in->sarg1[i]);
                in->dref1[i] = ((RefVtoV)reffunc) (in->darg1[i]);
            }
            if (has_complex)
            {
                in->cref1[i] = ((CRefRtoC)creffunc) (in->sarg1[i]);
                in->zref1[i] = ((CRefRtoC)creffunc) (in->darg1[i]);
            }
        }
        else if (layout == kVVtoV)
        {
            if (has_real)
            {
                in->sref1[i] = ((RefVVtoV)reffunc) (in->sarg1[i], in->sarg2[i]);
                in->dref1[i] = ((RefVVtoV)reffunc) (in->darg1[i], in->darg2[i]);
            }
            if (has_complex)
            {
                in->cref1[i] = ((CRefVVtoV)creffunc) ((double complex) (in->carg1[i]), (double complex) (in->carg2[i]));
                in->zref1[i] = ((CRefVVtoV)creffunc) (in->zarg1[i], in->zarg2[i]);
            }
        }
        else if (layout == kVXtoV)
        {
            float  sfixed = (float)fixed;
            double dfixed = (double)fixed;
            if (has_real)
            {
                in->sref1[i] = ((RefVVtoV)reffunc) (in->sarg1[i], sfixed);
                in->dref1[i] = ((RefVVtoV)reffunc) (in->darg1[i], dfixed);
            }
            if (has_complex)
            {
                double complex cfixed = sfixed + I * sfixed;
                double complex zfixed = dfixed + I * dfixed;
                in->cref1[i] = ((CRefVVtoV)creffunc) ((double complex) (in->carg1[i]), cfixed);
                in->zref1[i] = ((CRefVVtoV)creffunc) (in->zarg1[i], zfixed);
            }
        }
        else if (layout == kVtoVV)
        {
            ((RefVtoVV)reffunc) (in->sarg1[i], &(in->sref1[i]), &(in->sref2[i]));
            ((RefVtoVV)reffunc) (in->darg1[i], &(in->dref1[i]), &(in->dref2[i]));
        }
        else if (layout == kVVXtoV)
        {
            float  sfixed = (float)fixed;
            double dfixed = (double)fixed;
            in->sref1[i] = ((RefVVXtoV)reffunc) (in->sarg1[i], in->sarg2[i], sfixed, sfixed, sfixed, sfixed);
            in->dref1[i] = ((RefVVXtoV)reffunc) (in->darg1[i], in->darg2[i], dfixed, dfixed, dfixed, dfixed);
        }
    } /* for(int i = 0; i < length; i++) */
} /* own_fill_reference */


/**
 * @brief Full VM function name for printout
 *
 * Construct full VM function name with precision, api and accuracy suffices
 *
 * @param[out] buff        Pointer to output string buffer
 * @param[in] maxlen       Maximum string length
 * @param[in] fname        Base function name
 * @param[in] prec         Precision
 * @param[in] api          API variant
 * @param[in] acc          Accuracy
 * @return                 Pointer to constructed name
 *
 */
static char* own_full_name(char* buff, int maxlen, char* fname, int prec, int api, int acc)
{
    const char* api_prefix[]    = {"v", "vm", "v", "vm"};
    const char* api_suffix[]    = {"", "", "I", "I"};
    const char* prec_suffix[]   = {"s", "d", "c", "z"};
    const char* acc_suffix[]    = {"HA", "LA", "EP"};

    snprintf (buff, maxlen, "%s%s%s%s_%s ",
              api_prefix[api], prec_suffix[prec], fname, api_suffix[api], acc_suffix[acc]);

    return buff;
} /* own_full_name */

/**
 * @brief Printout ULP value
 *
 * Print arguments, results and ULP difference.
 *
 * @param[in] fname        Function name
 * @param[in] prec         Function precision
 * @param[in] layout       Function arguments layout
 * @param[in] acc          Function accuracy
 * @param[in] api          Function API variant
 * @param[in] idx          Vector index
 * @param[in] in           Input and reference resutl arrays
 * @param[out] out         Output arrays
 *
 */
static void own_print_ulp (char* fname, int prec, int layout, int acc, int api, int idx, double ulp, VmInputData* in, VmOutputData* out)
{
    char strbuff[NAME_LEN] = {0};

    if (prec == kSP)
    {
        fprintf (stderr, "\t\tULP_OVER_BOUND: %s[%d](", own_full_name (strbuff, NAME_LEN, fname, prec, api, acc), idx);
        fprintf (stderr, "%.3g {%a}", in->sarg1[idx], in->sarg1[idx]);
        if ((layout == kVVtoV) || (layout == kVVXtoV)) fprintf (stderr, ", %.3g {%a}", in->sarg2[idx], in->sarg2[idx]);
        fprintf (stderr, ") = %.3g {%a}", out->sres1[api][idx], out->sres1[api][idx]);
        if (layout == kVtoVV) fprintf (stderr, ", %.3g {%a}", out->sres2[api][idx], out->sres2[api][idx]);
        fprintf (stderr, ", expected = %.3lg {%la}", in->sref1[idx], in->sref1[idx]);
        if (layout == kVtoVV) fprintf (stderr, ", %.3lg {%la}", in->sref2[idx], in->sref2[idx]);
        fprintf (stderr, ", ulp = %.3lg\n", ulp);
    }
    else if (prec == kDP)
    {
        fprintf (stderr, "\t\tULP_OVER_BOUND: %s[%d](", own_full_name (strbuff, NAME_LEN, fname, prec, api, acc), idx);
        fprintf (stderr, "%.3lg {%la}",in->darg1[idx],in->darg1[idx]);
        if ((layout == kVVtoV) || (layout == kVVXtoV)) fprintf (stderr, ", %.3lg {%la}", in->darg2[idx], in->darg2[idx]);
        fprintf (stderr, ") = %.3lg {%la}", out->dres1[api][idx], out->dres1[api][idx]);
        if (layout == kVtoVV) fprintf (stderr, ", %.3lg {%la}", out->dres2[api][idx], out->dres2[api][idx]);
        fprintf (stderr, ", expected = %.3lg {%la}", in->dref1[idx], in->dref1[idx]);
        if (layout == kVtoVV) fprintf (stderr, ", %.3lg {%la}", in->dref2[idx], in->dref2[idx]);
        fprintf (stderr, ", ulp = %.3lg\n", ulp);
   }
    else if (prec == kCP)
    {
        fprintf (stderr, "\t\tULP_OVER_BOUND: %s[%d](", own_full_name(strbuff, NAME_LEN, fname, prec, api, acc), idx);
        if (layout == kVRtoVC) fprintf (stderr, "%.3g {%a}", in->sarg1[idx], in->sarg1[idx]);
        else                  fprintf (stderr, "%.3g+i*%.3g {%a+i*%a}",
                              creal(in->carg1[idx]), cimag(in->carg1[idx]), creal(in->carg1[idx]), cimag(in->carg1[idx]));
        if (layout == kVVtoV)  fprintf (stderr, ", %.3g+i*%.3g {%a+i*%a}",
                              creal(in->carg2[idx]), cimag(in->carg2[idx]), creal(in->carg2[idx]), cimag(in->carg2[idx]));
        if (layout == kVCtoVR) fprintf (stderr, ") = %.3g {%a}", ((float*)(out->cres1[api]))[idx], ((float*)(out->cres1[api]))[idx]);
        else                  fprintf (stderr, ") = %.3g+i*%.3g {%a+i*%a}",
                              creal(out->cres1[api][idx]), cimag(out->cres1[api][idx]), creal(out->cres1[api][idx]), cimag(out->cres1[api][idx]));
        if (layout == kVtoVV)  fprintf (stderr, ", %.3g+i*%.3g {%a+i*%a}",
                              creal(out->cres2[api][idx]), cimag(out->cres2[api][idx]), creal(out->cres2[api][idx]), cimag(out->cres2[api][idx]));
        if (layout == kVCtoVR) fprintf (stderr, ", expected = %.3lg {%la}", ((double*)(in->cref1))[idx], ((double*)(in->cref1))[idx]);
        else                  fprintf (stderr, ", expected = %.3lg+i*%.3lg {%la+i*%la}",
                              creal(in->cref1[idx]), cimag(in->cref1[idx]), creal(in->cref1[idx]), cimag(in->cref1[idx]));
        if (layout == kVtoVV)  fprintf (stderr, ", %.3g+i*%.3g {%la+i*%la}", creal(in->cref2[idx]),
                              cimag(in->cref2[idx]), creal(in->cref2[idx]), cimag(in->cref2[idx]));
        fprintf (stderr, ", ulp = %.3lg\n", ulp);
    }
    else if (prec == kZP)
    {
        fprintf (stderr, "\t\tULP_OVER_BOUND: %s[%d](", own_full_name(strbuff, NAME_LEN, fname, prec, api, acc), idx);
        if (layout == kVRtoVC) fprintf (stderr, "%.3lg {%la}", in->darg1[idx], in->darg1[idx]);
        else                  fprintf (stderr, "%.3lg+i*%.3lg {%la+i*%la}",
                              creal(in->zarg1[idx]), cimag(in->zarg1[idx]), creal(in->zarg1[idx]), cimag(in->zarg1[idx]));
        if (layout == kVVtoV)  fprintf (stderr, ", %.3lg+i*%.3lg {%la+i*%la}",
                              creal(in->zarg2[idx]), cimag(in->zarg2[idx]), creal(in->zarg2[idx]), cimag(in->zarg2[idx]));
        if (layout == kVCtoVR) fprintf (stderr, ") = %.3lg {%la}", ((double*)(out->zres1[api]))[idx], ((double*)(out->zres1[api]))[idx]);
        else                  fprintf (stderr, ") = %.3lg+i*%.3lg {%la+i*%la}",
                              creal(out->zres1[api][idx]), cimag(out->zres1[api][idx]), creal(out->zres1[api][idx]), cimag(out->zres1[api][idx]));
        if (layout == kVtoVV)  fprintf (stderr, ", %.3lg+i*%.3lg {%la+i*%la}",
                              creal(out->zres2[api][idx]), cimag(out->zres2[api][idx]), creal(out->zres2[api][idx]), cimag(out->zres2[api][idx]));
        if (layout == kVCtoVR) fprintf (stderr, ", expected = %.3lg {%la}", ((double*)(in->zref1))[idx], ((double*)(in->zref1))[idx]);
        else                  fprintf (stderr, ", expected = %.3lg+i*%.3lg {%la+i*%la}",
                              creal(in->zref1[idx]), cimag(in->zref1[idx]), creal(in->zref1[idx]), cimag(in->zref1[idx]));
        if (layout == kVtoVV)  fprintf (stderr, ", %.3lg+i*%.3lg {%la+i*%la}",
                              creal(in->zref2[idx]), cimag(in->zref2[idx]), creal(in->zref2[idx]), cimag(in->zref2[idx]));
        fprintf (stderr, ", ulp = %.3lg\n", ulp);
    }

    fflush (stderr);
    return;
} /* own_print_ulp */

/**
 * @brief Evaluation of one VM functions family
 *
 * Measure accuracy on VM functions family in comparison to reference scalar implementations.
 *
 * @param[in] fname        Function name
 * @param[in] beg          Begin of input ranges
 * @param[in] end          End of input ranges
 * @param[in] vmfunc       Pointer to VM functions launcher
 * @param[in] reffunc      Real reference function
 * @param[in] creffunc     Complex reference function
 * @param[in] layout       Function arguments layout
 * @param[in] in           Input and reference resutl arrays
 * @param[out] out         Output arrays
 * @return                 Total number of errors
 *
 */
static int own_evaluate_func (char* fname, double beg, double end,
                              VmFunc vmfunc, void* reffunc, void* creffunc,
                              int layout, VmInputData* in, VmOutputData* out)
{
    int printed = 0;
    int err = 0, warn = 0;
    int serr  = 0, derr  = 0, cerr  = 0, zerr  = 0,
        swarn = 0, dwarn = 0, cwarn = 0, zwarn = 0;
    float  sulp[kApiNum][kAccNum]    = {0}, culp[kApiNum][kAccNum]    = {0},
           smaxulp[kApiNum][kAccNum] = {0}, cmaxulp[kApiNum][kAccNum] = {0},
           sresulp[kAccNum]          = {0}, cresulp[kAccNum]          = {0};
    double dulp[kApiNum][kAccNum]    = {0}, zulp[kApiNum][kAccNum]    = {0},
           dmaxulp[kApiNum][kAccNum] = {0}, zmaxulp[kApiNum][kAccNum] = {0},
           dresulp[kAccNum]          = {0}, zresulp[kAccNum]          = {0};
    int has_real = (reffunc != NULL)?1:0, has_complex = (creffunc != NULL)?1:0;

    own_fill_input (layout, beg, end, in);
    own_fill_reference (layout, reffunc, creffunc, in);

    for (int a = kHA; a < kAccNum; a = a + 1)
    {
        /* Launch all API's of function family */
        vmfunc (a, in, out);

        for (int v = kFunc; v < kApiNum; v = v + 1)
        {
            printed = 0;

            if (has_real)
            {
                for (int i = 0; i < length; i = i + 1)
                {
                    warn = 0; err = 0;
                    /* Use stride increment for evaluating strided functions */
                    if (((v == kFuncI) || (v == kMFuncI)) && ((i % stride) || (i >= length/stride))) continue;

                    /* Compute ULP */
                    sulp[v][a] = own_s_compute_ulp (out->sres1[v][i], in->sref1[i]);
                    dulp[v][a] = own_d_compute_ulp (out->dres1[v][i], in->dref1[i]);
                    if (layout == kVtoVV)
                    {
                        sulp[v][a] = fmaxf (sulp[v][a], own_s_compute_ulp (out->sres2[v][i], in->sref2[i]));
                        dulp[v][a] = fmax  (dulp[v][a], own_d_compute_ulp (out->dres1[v][i], in->dref1[i]));
                    }

                    /* Check if ULP is greater than allowed */
                    if (sulp[v][a] > s_allowed_ulp[a])
                    {
                        /* Allows HA/LA linearfrac functions ULP within EP threshold */
                        /* and report warning if greater than HA/LA limits */
                        if ((layout == kVVXtoV) && (sulp[v][a] < s_allowed_ulp[kEP])) warn = 1;
                        else err = 1;

                        swarn += warn; serr += err;
                        if ((printed < max_printed) && ((print_err && err) || (print_warn && warn)))
                        {
                            own_print_ulp (fname, kSP, layout, a, v, i, sulp[v][a], in, out);
                            printed++;
                        }
                    }

                    /* Check if ULP is greater than allowed */
                    if (dulp[v][a] > d_allowed_ulp[a])
                    {
                        /* Allows HA/LA linearfrac functions ULP within EP threshold */
                        /* and report warning if greater than HA/LA limits */
                        if ((layout == kVVXtoV) && (dulp[v][a] < d_allowed_ulp[kEP])) warn = 1;
                        else err = 1;

                        dwarn += warn; derr += err;
                        if ((printed < max_printed) && ((print_err && err) || (print_warn && warn)))
                        {
                            own_print_ulp (fname, kDP, layout, a, v, i, dulp[v][a], in, out);
                            printed++;
                        }
                    }
                    smaxulp[v][a] = fmaxf (smaxulp[v][a], sulp[v][a]);
                    dmaxulp[v][a] = fmax  (dmaxulp[v][a], dulp[v][a]);
                } /* for (int i = 0; i < length; i++) */
                sresulp[a] = fmaxf (sresulp[a], smaxulp[v][a]);
                dresulp[a] = fmax  (dresulp[a], dmaxulp[v][a]);
            } /* if (has_real) */

            if (has_complex)
            {
                for (int i = 0; i < length; i++)
                {
                    /* Use stride increment for evaluating strided functions */
                    if (((v == kFuncI) || (v == kMFuncI)) && ((i % stride) || (i >= length/stride))) continue;

                    if (layout == kVCtoVR)
                    {
                        float  *sres1 = (float*)(out->cres1[v]);
                        double *sref1 = (double*)(in->cref1);
                        double *dres1 = (double*)(out->zres1[v]), *dref1 = (double*)(in->zref1);
                        culp[v][a] = own_s_compute_ulp (sres1[i], sref1[i]);
                        zulp[v][a] = own_d_compute_ulp (dres1[i], dref1[i]);
                    }
                    else
                    {
                        culp[v][a] = own_c_compute_ulp (out->cres1[v][i], in->cref1[i]);
                        zulp[v][a] = own_z_compute_ulp (out->zres1[v][i], in->zref1[i]);
                    }

                    if (culp[v][a] > c_allowed_ulp[a])
                    {
                        /* Allows HA/LA complex functions ULP within EP threshold */
                        /* and report warning if greater than HA/LA limits */
                        if (culp[v][a] < c_allowed_ulp[kEP]) warn = 1;
                        else err = 1;

                        cwarn += warn; cerr += err;
                        if ((printed < max_printed) && ((print_err && err) || (print_warn && warn)))
                        {
                            own_print_ulp (fname, kCP, layout, a, v, i, culp[v][a], in, out);
                            printed++;
                        }
                    }
                    if (zulp[v][a] > z_allowed_ulp[a])
                    {
                        /* Allows HA/LA complex functions ULP within EP threshold */
                        /* and report warning if greater than HA/LA limits */
                        if (zulp[v][a] < z_allowed_ulp[kEP]) warn = 1;
                        else err = 1;

                        zwarn += warn; zerr += err;
                        if ((printed < max_printed) && ((print_err && err) || (print_warn && warn)))
                        {
                            own_print_ulp (fname, kZP, layout, a, v, i, zulp[v][a], in, out);
                            printed++;
                        }
                    }
                    cmaxulp[v][a] = fmaxf (cmaxulp[v][a], culp[v][a]);
                    zmaxulp[v][a] = fmax  (zmaxulp[v][a], zulp[v][a]);
                } /* for (int i = 0; i < length; i++) */
                cresulp[a] = fmaxf (cresulp[a], cmaxulp[v][a]);
                zresulp[a] = fmax  (zresulp[a], zmaxulp[v][a]);
            } /* if (has_complex) */
        } /* for (int v = kFunc; v <= kMFuncI; v++) */
    } /* for (int a = kHA; a <= kEP; a++) */

    if (has_real)
    {
        fprintf (stdout, "\ts%-11s, ha:,%7.2g, la:, %7.2g, ep:, %7.2g, %s\n",
                 fname, sresulp[kHA], sresulp[kLA], sresulp[kEP], (serr)?"OVER":(swarn)?"WARN":"NORM");
        fprintf (stdout, "\td%-11s, ha:,%7.2lg, la:, %7.2lg, ep:, %7.2lg, %s\n",
                 fname, dresulp[kHA], dresulp[kLA], dresulp[kEP], (derr)?"OVER":(dwarn)?"WARN":"NORM");
    }
    if (has_complex)
    {
        fprintf (stdout, "\tc%-11s, ha:,%7.2g, la:, %7.2g, ep:, %7.2g, %s\n",
                 fname, cresulp[kHA], cresulp[kLA], cresulp[kEP], (cerr)?"OVER":(cwarn)?"WARN":"NORM");
        fprintf (stdout, "\tz%-11s, ha:,%7.2lg, la:, %7.2lg, ep:, %7.2lg, %s\n",
                 fname, zresulp[kHA], zresulp[kLA], zresulp[kEP], (zerr)?"OVER":(zwarn)?"WARN":"NORM");
    }

    fflush (stdout);
    // Return total number of errors
    return (serr + derr + cerr + zerr);
} /* own_evaluate_func */

/**
 * @brief Provide string description of VML status code
 *
 * vml_status_string provides string description of VML status code st
 *
 * @param[in] st  VML status code
 * @return         const char* with text of corresponding code
 *
 */
static const char* vml_status_string(int st) {

    switch (st) {
        case VML_STATUS_OK: return "VML_STATUS_OK";
        case VML_STATUS_BADSIZE: return "VML_STATUS_BADSIZE";
        case VML_STATUS_BADMEM: return "VML_STATUS_BADMEM";
        case VML_STATUS_ERRDOM: return "VML_STATUS_ERRDOM";
        case VML_STATUS_SING: return "VML_STATUS_SING";
        case VML_STATUS_OVERFLOW: return "VML_STATUS_OVERFLOW";
        case VML_STATUS_UNDERFLOW: return "VML_STATUS_UNDERFLOW";
        case VML_STATUS_ACCURACYWARNING: return "VML_STATUS_ACCURACYWARNING";
    }
    return "VML_STATUS_UNKNOWN";
}

/**
 * @brief Main function for VM API testing
 *
 * Main performs accuracy testing of all VM OMP offload math functions
 *
 * @param[in] argc         Number of arguments
 * @param[in] argv         Pointer to argument strings
 * @return                 0
 *
 */
int main (int argc, char **argv)
{
    /* Total errors */
    int err = 0;

    /* Error satatus */
    int st = VML_STATUS_OK;

    VmInputData  in;
    VmOutputData out;

    fprintf (stdout, "OpenMP Offload C vm_all_funcs_64: started...\n"); fflush (stdout);

    own_allocate_data (length, &in, &out);

    vmlClearErrStatus();

    fprintf (stdout, "\t===========================================================\n");
    err += own_evaluate_func ("Asin",       -0.9,   0.9,      own_vm_asin,       asin,           casin,          kVtoV,      &in, &out);
    err += own_evaluate_func ("Acos",       -0.9,   0.9,      own_vm_acos,       acos,           cacos,          kVtoV,      &in, &out);
    err += own_evaluate_func ("Atan",       -10000, 10000,    own_vm_atan,       atan,           catan,          kVtoV,      &in, &out);
    err += own_evaluate_func ("Atan2",      -10000, 10000,    own_vm_atan2,      atan2,          NULL,           kVVtoV,     &in, &out);
    err += own_evaluate_func ("Asinh",      -10000, 10000,    own_vm_asinh,      asinh,          casinh,         kVtoV,      &in, &out);
    err += own_evaluate_func ("Acosh",       1.01,  1000,     own_vm_acosh,      acosh,          cacosh,         kVtoV,      &in, &out);
    err += own_evaluate_func ("Atanh",      -0.9,   0.9,      own_vm_atanh,      atanh,          catanh,         kVtoV,      &in, &out);
    err += own_evaluate_func ("Sin",        -10,    10,       own_vm_sin,        sin,            csin,           kVtoV,      &in, &out);
    err += own_evaluate_func ("Cos",        -10,    10,       own_vm_cos,        cos,            ccos,           kVtoV,      &in, &out);
    err += own_evaluate_func ("Tan",        -10,    10,       own_vm_tan,        tan,            ctan,           kVtoV,      &in, &out);
    err += own_evaluate_func ("SinCos",     -10000, 10000,    own_vm_sincos,     own_sincos,     NULL,           kVtoVV,     &in, &out);
    err += own_evaluate_func ("Sinh",       -50,    50,       own_vm_sinh,       sinh,           csinh,          kVtoV,      &in, &out);
    err += own_evaluate_func ("Cosh",       -50,    50,       own_vm_cosh,       cosh,           ccosh,          kVtoV,      &in, &out);
    err += own_evaluate_func ("Tanh",       -5,     5,        own_vm_tanh,       tanh,           ctanh,          kVtoV,      &in, &out);
    err += own_evaluate_func ("Exp",        -75,    75,       own_vm_exp,        exp,            cexp,           kVtoV,      &in, &out);
    err += own_evaluate_func ("Exp2",       -30,    30,       own_vm_exp2,       exp2,           NULL,           kVtoV,      &in, &out);
    err += own_evaluate_func ("Expm1",      -30,    30,       own_vm_expm1,      expm1,          NULL,           kVtoV,      &in, &out);
    err += own_evaluate_func ("Ln",          1.01,  100000,   own_vm_ln,         log,            clog,           kVtoV,      &in, &out);
    err += own_evaluate_func ("Log2",        1.01,  100000,   own_vm_log2,       log2,           NULL,           kVtoV,      &in, &out);
    err += own_evaluate_func ("Log1p",       0.01,  100000,   own_vm_log1p,      log1p,          NULL,           kVtoV,      &in, &out);
    err += own_evaluate_func ("Pow",         0.1,   10,       own_vm_pow,        pow,            cpow,           kVVtoV,     &in, &out);
    err += own_evaluate_func ("Powr",        0.1,   10,       own_vm_powr,       pow,            NULL,           kVVtoV,     &in, &out);
    err += own_evaluate_func ("Powx",        0.1,   10,       own_vm_powx,       pow,            cpow,           kVXtoV,     &in, &out);
    err += own_evaluate_func ("Pow2o3",      0.1,   10,       own_vm_pow2o3,     own_pow2o3,     NULL,           kVtoV,      &in, &out);
    err += own_evaluate_func ("Pow3o2",      0.1,   10,       own_vm_pow3o2,     own_pow3o2,     NULL,           kVtoV,      &in, &out);
    err += own_evaluate_func ("Sqrt",        0.1,   100,      own_vm_sqrt,       sqrt,           csqrt,          kVtoV,      &in, &out);
    err += own_evaluate_func ("Cbrt",        0.1,   10000,    own_vm_cbrt,       cbrt,           NULL,           kVtoV,      &in, &out);
    err += own_evaluate_func ("InvSqrt",     0.1,   10000,    own_vm_invsqrt,    own_invsqrt,    NULL,           kVtoV,      &in, &out);
    err += own_evaluate_func ("InvCbrt",     0.1,   10000,    own_vm_invcbrt,    own_invcbrt,    NULL,           kVtoV,      &in, &out);
    err += own_evaluate_func ("Hypot",      -10000, 10000,    own_vm_hypot,      hypot,          NULL,           kVVtoV,     &in, &out);
    err += own_evaluate_func ("Erf",        -5,     5,        own_vm_erf,        erf,            NULL,           kVtoV,      &in, &out);
    err += own_evaluate_func ("Erfc",       -2,     5,        own_vm_erfc,       erfc,           NULL,           kVtoV,      &in, &out);
    err += own_evaluate_func ("LGamma",      0,     5,        own_vm_lgamma,     lgamma,         NULL,           kVtoV,      &in, &out);
    err += own_evaluate_func ("TGamma",      0,     5,        own_vm_tgamma,     tgamma,         NULL,           kVtoV,      &in, &out);
    err += own_evaluate_func ("ExpInt1",     0.1,   5,        own_vm_expint1,    own_expint1,    NULL,           kVtoV,      &in, &out);
    err += own_evaluate_func ("Ceil",       -10000, 10000,    own_vm_ceil,       ceil,           NULL,           kVtoV,      &in, &out);
    err += own_evaluate_func ("Floor",      -10000, 10000,    own_vm_floor,      floor,          NULL,           kVtoV,      &in, &out);
    err += own_evaluate_func ("Round",      -10000, 10000,    own_vm_round,      round,          NULL,           kVtoV,      &in, &out);
    err += own_evaluate_func ("Trunc",      -10000, 10000,    own_vm_trunc,      trunc,          NULL,           kVtoV,      &in, &out);
    err += own_evaluate_func ("Rint",       -10000, 10000,    own_vm_rint,       rint,           NULL,           kVtoV,      &in, &out);
    err += own_evaluate_func ("NearbyInt",  -10000, 10000,    own_vm_nearbyint,  nearbyint,      NULL,           kVtoV,      &in, &out);
    err += own_evaluate_func ("Remainder",  -10000, 10000,    own_vm_remainder,  remainder,      NULL,           kVVtoV,     &in, &out);
    err += own_evaluate_func ("LinearFrac", -1000,  1000,     own_vm_linearfrac, own_linearfrac, NULL,           kVVXtoV,    &in, &out);
    err += own_evaluate_func ("Add",        -10000, 10000,    own_vm_add,        own_add,        own_cadd,       kVVtoV,     &in, &out);
    err += own_evaluate_func ("Sub",        -10000, 10000,    own_vm_sub,        own_sub,        own_csub,       kVVtoV,     &in, &out);
    err += own_evaluate_func ("Mul",        -10000, 10000,    own_vm_mul,        own_mul,        own_cmul,       kVVtoV,     &in, &out);
    err += own_evaluate_func ("Div",        -10000, 10000,    own_vm_div,        own_div,        own_cdiv,       kVVtoV,     &in, &out);
    err += own_evaluate_func ("Sqr",        -10000, 10000,    own_vm_sqr,        own_sqr,        NULL,           kVtoV,      &in, &out);
    err += own_evaluate_func ("Inv",        -10000, 10000,    own_vm_inv,        own_inv,        NULL,           kVtoV,      &in, &out);
    err += own_evaluate_func ("Modf",       -10000, 10000,    own_vm_modf,       own_modf,       NULL,           kVtoVV,     &in, &out);
    err += own_evaluate_func ("Fmod",       -10000, 10000,    own_vm_fmod,       fmod,           NULL,           kVVtoV,     &in, &out);
    err += own_evaluate_func ("Fdim",       -10000, 10000,    own_vm_fdim,       fdim,           NULL,           kVVtoV,     &in, &out);
    err += own_evaluate_func ("Fmax",       -10000, 10000,    own_vm_fmax,       fmax,           NULL,           kVVtoV,     &in, &out);
    err += own_evaluate_func ("Fmin",       -10000, 10000,    own_vm_fmin,       fmin,           NULL,           kVVtoV,     &in, &out);
    err += own_evaluate_func ("MaxMag",     -10000, 10000,    own_vm_maxmag,     own_maxmag,     NULL,           kVVtoV,     &in, &out);
    err += own_evaluate_func ("MinMag",     -10000, 10000,    own_vm_minmag,     own_minmag,     NULL,           kVVtoV,     &in, &out);
    err += own_evaluate_func ("NextAfter",  -10000, 10000,    own_vm_nextafter,  nextafter,      NULL,           kVVtoV,     &in, &out);
    err += own_evaluate_func ("CopySign",   -10000, 10000,    own_vm_copysign,   copysign,       NULL,           kVVtoV,     &in, &out);
    err += own_evaluate_func ("Frac",       -10000, 10000,    own_vm_frac,       own_frac,       NULL,           kVtoV,      &in, &out);
    err += own_evaluate_func ("Conj",       -10000, 10000,    own_vm_conj,       NULL,           conj,           kVtoV,      &in, &out);
    err += own_evaluate_func ("MulByConj",  -10000, 10000,    own_vm_mulbyconj,  NULL,           own_cmulbyconj, kVVtoV,     &in, &out);
    err += own_evaluate_func ("CIS",        -10000, 10000,    own_vm_cis,        NULL,           own_cis,        kVRtoVC,    &in, &out);
    err += own_evaluate_func ("Arg",        -10000, 10000,    own_vm_arg,        NULL,           own_carg,       kVCtoVR,    &in, &out);
    err += own_evaluate_func ("Abs",        -10000, 10000,    own_vm_abs,        fabs,           own_cabs,       kVCtoVR,    &in, &out);
    /* Functions with Intel-specific reference LIBM implementations */
#if (defined __INTEL_COMPILER) || (defined __INTEL_LLVM_COMPILER)
    err += own_evaluate_func ("Asinpi",     -0.9,   0.9,      own_vm_asinpi,     asinpi,         NULL,           kVtoV,      &in, &out);
    err += own_evaluate_func ("Acospi",     -0.9,   0.9,      own_vm_acospi,     acospi,         NULL,           kVtoV,      &in, &out);
    err += own_evaluate_func ("Atanpi",     -10000, 10000,    own_vm_atanpi,     atanpi,         NULL,           kVtoV,      &in, &out);
    err += own_evaluate_func ("Atan2pi",    -10000, 10000,    own_vm_atan2pi,    atan2pi,        NULL,           kVVtoV,     &in, &out);
    err += own_evaluate_func ("Sind",       -10000, 10000,    own_vm_sind,       sind,           NULL,           kVtoV,      &in, &out);
    err += own_evaluate_func ("Cosd",       -10000, 10000,    own_vm_cosd,       cosd,           NULL,           kVtoV,      &in, &out);
    err += own_evaluate_func ("Tand",       -10000, 10000,    own_vm_tand,       tand,           NULL,           kVtoV,      &in, &out);
    err += own_evaluate_func ("Sinpi",      -10000, 10000,    own_vm_sinpi,      sinpi,          NULL,           kVtoV,      &in, &out);
    err += own_evaluate_func ("Cospi",      -10000, 10000,    own_vm_cospi,      cospi,          NULL,           kVtoV,      &in, &out);
    err += own_evaluate_func ("Tanpi",      -10000, 10000,    own_vm_tanpi,      tanpi,          NULL,           kVtoV,      &in, &out);
    err += own_evaluate_func ("Exp10",      -30,    30,       own_vm_exp10,      exp10,          NULL,           kVtoV,      &in, &out);
    err += own_evaluate_func ("Log10",       1.01,  100,      own_vm_log10,      log10,          clog10,         kVtoV,      &in, &out);
    err += own_evaluate_func ("ErfInv",     -0.9,   0.9,      own_vm_erfinv,     erfinv,         NULL,           kVtoV,      &in, &out);
    err += own_evaluate_func ("ErfcInv",    -0.1,   1.9,      own_vm_erfcinv,    erfcinv,        NULL,           kVtoV,      &in, &out);
    err += own_evaluate_func ("CdfNorm",    -4,     4,        own_vm_cdfnorm,    cdfnorm,        NULL,           kVtoV,      &in, &out);
    err += own_evaluate_func ("CdfNormInv", -0.1,   0.9,      own_vm_cdfnorminv, cdfnorminv,     NULL,           kVtoV,      &in, &out);
#endif

    st   = vmlGetErrStatus();
    fprintf (stdout, "\t===========================================================\n");
    fprintf (stdout, "OpenMP Offload C vm_all_funcs_64: status: %s[%s], accuracy: %s\n\n",
        vml_status_string(st),
        (st >= VML_STATUS_OK) ? "expected" : "unexpected",
        err > 0 ? "over bounds" : "normal"
    );

    own_deallocate_data (&in, &out);

    return 0;
} /* main */

