v811_spc009/external/OpenCL-CTS/test_common/harness/rounding_mode.cpp

//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//    http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "rounding_mode.h"

#if (defined(__arm__) || defined(__aarch64__))
#define FPSCR_FZ (1 << 24) // Flush-To-Zero mode
#define FPSCR_ROUND_MASK (3 << 22) // Rounding mode:

#define _ARM_FE_FTZ 0x1000000
#define _ARM_FE_NFTZ 0x0
#if defined(__aarch64__)
#define _FPU_GETCW(cw) __asm__("MRS %0,FPCR" : "=r"(cw))
#define _FPU_SETCW(cw) __asm__("MSR FPCR,%0" : : "ri"(cw))
#else
#define _FPU_GETCW(cw) __asm__("VMRS %0,FPSCR" : "=r"(cw))
#define _FPU_SETCW(cw) __asm__("VMSR FPSCR,%0" : : "ri"(cw))
#endif
#endif

#if (defined(__arm__) || defined(__aarch64__)) && defined(__GNUC__)
#define _ARM_FE_TONEAREST 0x0
#define _ARM_FE_UPWARD 0x400000
#define _ARM_FE_DOWNWARD 0x800000
#define _ARM_FE_TOWARDZERO 0xc00000
RoundingMode set_round(RoundingMode r, Type outType)
{
    static const int flt_rounds[kRoundingModeCount] = {
        _ARM_FE_TONEAREST, _ARM_FE_TONEAREST, _ARM_FE_UPWARD, _ARM_FE_DOWNWARD,
        _ARM_FE_TOWARDZERO
    };
    static const int int_rounds[kRoundingModeCount] = {
        _ARM_FE_TOWARDZERO, _ARM_FE_TONEAREST, _ARM_FE_UPWARD, _ARM_FE_DOWNWARD,
        _ARM_FE_TOWARDZERO
    };
    const int *p = int_rounds;
    if (outType == kfloat || outType == kdouble) p = flt_rounds;

    int fpscr = 0;
    RoundingMode oldRound = get_round();

    _FPU_GETCW(fpscr);
    _FPU_SETCW(p[r] | (fpscr & ~FPSCR_ROUND_MASK));

    return oldRound;
}

RoundingMode get_round(void)
{
    int fpscr;
    int oldRound;

    _FPU_GETCW(fpscr);
    oldRound = (fpscr & FPSCR_ROUND_MASK);

    switch (oldRound)
    {
        case _ARM_FE_TONEAREST: return kRoundToNearestEven;
        case _ARM_FE_UPWARD: return kRoundUp;
        case _ARM_FE_DOWNWARD: return kRoundDown;
        case _ARM_FE_TOWARDZERO: return kRoundTowardZero;
    }

    return kDefaultRoundingMode;
}

#elif !(defined(_WIN32) && defined(_MSC_VER))
RoundingMode set_round(RoundingMode r, Type outType)
{
    static const int flt_rounds[kRoundingModeCount] = {
        FE_TONEAREST, FE_TONEAREST, FE_UPWARD, FE_DOWNWARD, FE_TOWARDZERO
    };
    static const int int_rounds[kRoundingModeCount] = {
        FE_TOWARDZERO, FE_TONEAREST, FE_UPWARD, FE_DOWNWARD, FE_TOWARDZERO
    };
    const int *p = int_rounds;
    if (outType == kfloat || outType == kdouble) p = flt_rounds;
    int oldRound = fegetround();
    fesetround(p[r]);

    switch (oldRound)
    {
        case FE_TONEAREST: return kRoundToNearestEven;
        case FE_UPWARD: return kRoundUp;
        case FE_DOWNWARD: return kRoundDown;
        case FE_TOWARDZERO: return kRoundTowardZero;
        default: abort(); // ??!
    }
    return kDefaultRoundingMode; // never happens
}

RoundingMode get_round(void)
{
    int oldRound = fegetround();

    switch (oldRound)
    {
        case FE_TONEAREST: return kRoundToNearestEven;
        case FE_UPWARD: return kRoundUp;
        case FE_DOWNWARD: return kRoundDown;
        case FE_TOWARDZERO: return kRoundTowardZero;
    }

    return kDefaultRoundingMode;
}

#else
RoundingMode set_round(RoundingMode r, Type outType)
{
    static const int flt_rounds[kRoundingModeCount] = { _RC_NEAR, _RC_NEAR,
                                                        _RC_UP, _RC_DOWN,
                                                        _RC_CHOP };
    static const int int_rounds[kRoundingModeCount] = { _RC_CHOP, _RC_NEAR,
                                                        _RC_UP, _RC_DOWN,
                                                        _RC_CHOP };
    const int *p =
        (outType == kfloat || outType == kdouble) ? flt_rounds : int_rounds;
    unsigned int oldRound;

    int err = _controlfp_s(&oldRound, 0, 0); // get rounding mode into oldRound
    if (err)
    {
        vlog_error("\t\tERROR: -- cannot get rounding mode in %s:%d\n",
                   __FILE__, __LINE__);
        return kDefaultRoundingMode; // what else never happens
    }

    oldRound &= _MCW_RC;

    RoundingMode old = (oldRound == _RC_NEAR)
        ? kRoundToNearestEven
        : (oldRound == _RC_UP) ? kRoundUp
                               : (oldRound == _RC_DOWN)
                ? kRoundDown
                : (oldRound == _RC_CHOP) ? kRoundTowardZero
                                         : kDefaultRoundingMode;

    _controlfp_s(&oldRound, p[r], _MCW_RC); // setting new rounding mode
    return old; // returning old rounding mode
}

RoundingMode get_round(void)
{
    unsigned int oldRound;

    int err = _controlfp_s(&oldRound, 0, 0); // get rounding mode into oldRound
    oldRound &= _MCW_RC;
    return (oldRound == _RC_NEAR)
        ? kRoundToNearestEven
        : (oldRound == _RC_UP) ? kRoundUp
                               : (oldRound == _RC_DOWN)
                ? kRoundDown
                : (oldRound == _RC_CHOP) ? kRoundTowardZero
                                         : kDefaultRoundingMode;
}

#endif

//
// FlushToZero() sets the host processor into ftz mode.  It is intended to have
// a remote effect on the behavior of the code in basic_test_conversions.c. Some
// host processors may not support this mode, which case you'll need to do some
// clamping in software by testing against FLT_MIN or DBL_MIN in that file.
//
// Note: IEEE-754 says conversions are basic operations.  As such they do *NOT*
// have the behavior in section 7.5.3 of the OpenCL spec. They *ALWAYS* flush to
// zero for subnormal inputs or outputs when FTZ mode is on like other basic
// operators do (e.g. add, subtract, multiply, divide, etc.)
//
// Configuring hardware to FTZ mode varies by platform.
// CAUTION: Some C implementations may also fail to behave properly in this
// mode.
//
//  On PowerPC, it is done by setting the FPSCR into non-IEEE mode.
//  On Intel, you can do this by turning on the FZ and DAZ bits in the MXCSR --
//  provided that SSE/SSE2
//          is used for floating point computation! If your OS uses x87, you'll
//          need to figure out how to turn that off for the conversions code in
//          basic_test_conversions.c so that they flush to zero properly.
//          Otherwise, you'll need to add appropriate software clamping to
//          basic_test_conversions.c in which case, these function are at
//          liberty to do nothing.
//
#if defined(__i386__) || defined(__x86_64__) || defined(_WIN32)
#include <xmmintrin.h>
#elif defined(__PPC__)
#include <fpu_control.h>
#endif
void *FlushToZero(void)
{
#if defined(__APPLE__) || defined(__linux__) || defined(_WIN32)
#if defined(__i386__) || defined(__x86_64__) || defined(_MSC_VER)
    union {
        int i;
        void *p;
    } u = { _mm_getcsr() };
    _mm_setcsr(u.i | 0x8040);
    return u.p;
#elif defined(__arm__) || defined(__aarch64__)
    int fpscr;
    _FPU_GETCW(fpscr);
    _FPU_SETCW(fpscr | FPSCR_FZ);
    return NULL;
#elif defined(__PPC__)
    fpu_control_t flags = 0;
    _FPU_GETCW(flags);
    flags |= _FPU_MASK_NI;
    _FPU_SETCW(flags);
    return NULL;
#else
#error Unknown arch
#endif
#else
#error  Please configure FlushToZero and UnFlushToZero to behave properly on this operating system.
#endif
}

// Undo the effects of FlushToZero above, restoring the host to default
// behavior, using the information passed in p.
void UnFlushToZero(void *p)
{
#if defined(__APPLE__) || defined(__linux__) || defined(_WIN32)
#if defined(__i386__) || defined(__x86_64__) || defined(_MSC_VER)
    union {
        void *p;
        int i;
    } u = { p };
    _mm_setcsr(u.i);
#elif defined(__arm__) || defined(__aarch64__)
    int fpscr;
    _FPU_GETCW(fpscr);
    _FPU_SETCW(fpscr & ~FPSCR_FZ);
#elif defined(__PPC__)
    fpu_control_t flags = 0;
    _FPU_GETCW(flags);
    flags &= ~_FPU_MASK_NI;
    _FPU_SETCW(flags);
#else
#error Unknown arch
#endif
#else
#error  Please configure FlushToZero and UnFlushToZero to behave properly on this operating system.
#endif
}
v811_spc009_project 4 months ago			`//`
			`// Copyright (c) 2017 The Khronos Group Inc.`
			`//`
			`// Licensed under the Apache License, Version 2.0 (the "License");`
			`// you may not use this file except in compliance with the License.`
			`// You may obtain a copy of the License at`
			`//`
			`// http://www.apache.org/licenses/LICENSE-2.0`
			`//`
			`// Unless required by applicable law or agreed to in writing, software`
			`// distributed under the License is distributed on an "AS IS" BASIS,`
			`// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.`
			`// See the License for the specific language governing permissions and`
			`// limitations under the License.`
			`//`
			`#include "rounding_mode.h"`

			`#if (defined(__arm__) \|\| defined(__aarch64__))`
			`#define FPSCR_FZ (1 << 24) // Flush-To-Zero mode`
			`#define FPSCR_ROUND_MASK (3 << 22) // Rounding mode:`

			`#define _ARM_FE_FTZ 0x1000000`
			`#define _ARM_FE_NFTZ 0x0`
			`#if defined(__aarch64__)`
			`#define _FPU_GETCW(cw) __asm__("MRS %0,FPCR" : "=r"(cw))`
			`#define _FPU_SETCW(cw) __asm__("MSR FPCR,%0" : : "ri"(cw))`
			`#else`
			`#define _FPU_GETCW(cw) __asm__("VMRS %0,FPSCR" : "=r"(cw))`
			`#define _FPU_SETCW(cw) __asm__("VMSR FPSCR,%0" : : "ri"(cw))`
			`#endif`
			`#endif`

			`#if (defined(__arm__) \|\| defined(__aarch64__)) && defined(__GNUC__)`
			`#define _ARM_FE_TONEAREST 0x0`
			`#define _ARM_FE_UPWARD 0x400000`
			`#define _ARM_FE_DOWNWARD 0x800000`
			`#define _ARM_FE_TOWARDZERO 0xc00000`
			`RoundingMode set_round(RoundingMode r, Type outType)`
			`{`
			`static const int flt_rounds[kRoundingModeCount] = {`
			`_ARM_FE_TONEAREST, _ARM_FE_TONEAREST, _ARM_FE_UPWARD, _ARM_FE_DOWNWARD,`
			`_ARM_FE_TOWARDZERO`
			`};`
			`static const int int_rounds[kRoundingModeCount] = {`
			`_ARM_FE_TOWARDZERO, _ARM_FE_TONEAREST, _ARM_FE_UPWARD, _ARM_FE_DOWNWARD,`
			`_ARM_FE_TOWARDZERO`
			`};`
			`const int *p = int_rounds;`
			`if (outType == kfloat \|\| outType == kdouble) p = flt_rounds;`

			`int fpscr = 0;`
			`RoundingMode oldRound = get_round();`

			`_FPU_GETCW(fpscr);`
			`_FPU_SETCW(p[r] \| (fpscr & ~FPSCR_ROUND_MASK));`

			`return oldRound;`
			`}`

			`RoundingMode get_round(void)`
			`{`
			`int fpscr;`
			`int oldRound;`

			`_FPU_GETCW(fpscr);`
			`oldRound = (fpscr & FPSCR_ROUND_MASK);`

			`switch (oldRound)`
			`{`
			`case _ARM_FE_TONEAREST: return kRoundToNearestEven;`
			`case _ARM_FE_UPWARD: return kRoundUp;`
			`case _ARM_FE_DOWNWARD: return kRoundDown;`
			`case _ARM_FE_TOWARDZERO: return kRoundTowardZero;`
			`}`

			`return kDefaultRoundingMode;`
			`}`

			`#elif !(defined(_WIN32) && defined(_MSC_VER))`
			`RoundingMode set_round(RoundingMode r, Type outType)`
			`{`
			`static const int flt_rounds[kRoundingModeCount] = {`
			`FE_TONEAREST, FE_TONEAREST, FE_UPWARD, FE_DOWNWARD, FE_TOWARDZERO`
			`};`
			`static const int int_rounds[kRoundingModeCount] = {`
			`FE_TOWARDZERO, FE_TONEAREST, FE_UPWARD, FE_DOWNWARD, FE_TOWARDZERO`
			`};`
			`const int *p = int_rounds;`
			`if (outType == kfloat \|\| outType == kdouble) p = flt_rounds;`
			`int oldRound = fegetround();`
			`fesetround(p[r]);`

			`switch (oldRound)`
			`{`
			`case FE_TONEAREST: return kRoundToNearestEven;`
			`case FE_UPWARD: return kRoundUp;`
			`case FE_DOWNWARD: return kRoundDown;`
			`case FE_TOWARDZERO: return kRoundTowardZero;`
			`default: abort(); // ??!`
			`}`
			`return kDefaultRoundingMode; // never happens`
			`}`

			`RoundingMode get_round(void)`
			`{`
			`int oldRound = fegetround();`

			`switch (oldRound)`
			`{`
			`case FE_TONEAREST: return kRoundToNearestEven;`
			`case FE_UPWARD: return kRoundUp;`
			`case FE_DOWNWARD: return kRoundDown;`
			`case FE_TOWARDZERO: return kRoundTowardZero;`
			`}`

			`return kDefaultRoundingMode;`
			`}`

			`#else`
			`RoundingMode set_round(RoundingMode r, Type outType)`
			`{`
			`static const int flt_rounds[kRoundingModeCount] = { _RC_NEAR, _RC_NEAR,`
			`_RC_UP, _RC_DOWN,`
			`_RC_CHOP };`
			`static const int int_rounds[kRoundingModeCount] = { _RC_CHOP, _RC_NEAR,`
			`_RC_UP, _RC_DOWN,`
			`_RC_CHOP };`
			`const int *p =`
			`(outType == kfloat \|\| outType == kdouble) ? flt_rounds : int_rounds;`
			`unsigned int oldRound;`

			`int err = _controlfp_s(&oldRound, 0, 0); // get rounding mode into oldRound`
			`if (err)`
			`{`
			`vlog_error("\t\tERROR: -- cannot get rounding mode in %s:%d\n",`
			`__FILE__, __LINE__);`
			`return kDefaultRoundingMode; // what else never happens`
			`}`

			`oldRound &= _MCW_RC;`

			`RoundingMode old = (oldRound == _RC_NEAR)`
			`? kRoundToNearestEven`
			`: (oldRound == _RC_UP) ? kRoundUp`
			`: (oldRound == _RC_DOWN)`
			`? kRoundDown`
			`: (oldRound == _RC_CHOP) ? kRoundTowardZero`
			`: kDefaultRoundingMode;`

			`_controlfp_s(&oldRound, p[r], _MCW_RC); // setting new rounding mode`
			`return old; // returning old rounding mode`
			`}`

			`RoundingMode get_round(void)`
			`{`
			`unsigned int oldRound;`

			`int err = _controlfp_s(&oldRound, 0, 0); // get rounding mode into oldRound`
			`oldRound &= _MCW_RC;`
			`return (oldRound == _RC_NEAR)`
			`? kRoundToNearestEven`
			`: (oldRound == _RC_UP) ? kRoundUp`
			`: (oldRound == _RC_DOWN)`
			`? kRoundDown`
			`: (oldRound == _RC_CHOP) ? kRoundTowardZero`
			`: kDefaultRoundingMode;`
			`}`

			`#endif`

			`//`
			`// FlushToZero() sets the host processor into ftz mode. It is intended to have`
			`// a remote effect on the behavior of the code in basic_test_conversions.c. Some`
			`// host processors may not support this mode, which case you'll need to do some`
			`// clamping in software by testing against FLT_MIN or DBL_MIN in that file.`
			`//`
			`// Note: IEEE-754 says conversions are basic operations. As such they do NOT`
			`// have the behavior in section 7.5.3 of the OpenCL spec. They ALWAYS flush to`
			`// zero for subnormal inputs or outputs when FTZ mode is on like other basic`
			`// operators do (e.g. add, subtract, multiply, divide, etc.)`
			`//`
			`// Configuring hardware to FTZ mode varies by platform.`
			`// CAUTION: Some C implementations may also fail to behave properly in this`
			`// mode.`
			`//`
			`// On PowerPC, it is done by setting the FPSCR into non-IEEE mode.`
			`// On Intel, you can do this by turning on the FZ and DAZ bits in the MXCSR --`
			`// provided that SSE/SSE2`
			`// is used for floating point computation! If your OS uses x87, you'll`
			`// need to figure out how to turn that off for the conversions code in`
			`// basic_test_conversions.c so that they flush to zero properly.`
			`// Otherwise, you'll need to add appropriate software clamping to`
			`// basic_test_conversions.c in which case, these function are at`
			`// liberty to do nothing.`
			`//`
			`#if defined(__i386__) \|\| defined(__x86_64__) \|\| defined(_WIN32)`
			`#include <xmmintrin.h>`
			`#elif defined(__PPC__)`
			`#include <fpu_control.h>`
			`#endif`
			`void *FlushToZero(void)`
			`{`
			`#if defined(__APPLE__) \|\| defined(__linux__) \|\| defined(_WIN32)`
			`#if defined(__i386__) \|\| defined(__x86_64__) \|\| defined(_MSC_VER)`
			`union {`
			`int i;`
			`void *p;`
			`} u = { _mm_getcsr() };`
			`_mm_setcsr(u.i \| 0x8040);`
			`return u.p;`
			`#elif defined(__arm__) \|\| defined(__aarch64__)`
			`int fpscr;`
			`_FPU_GETCW(fpscr);`
			`_FPU_SETCW(fpscr \| FPSCR_FZ);`
			`return NULL;`
			`#elif defined(__PPC__)`
			`fpu_control_t flags = 0;`
			`_FPU_GETCW(flags);`
			`flags \|= _FPU_MASK_NI;`
			`_FPU_SETCW(flags);`
			`return NULL;`
			`#else`
			`#error Unknown arch`
			`#endif`
			`#else`
			`#error Please configure FlushToZero and UnFlushToZero to behave properly on this operating system.`
			`#endif`
			`}`

			`// Undo the effects of FlushToZero above, restoring the host to default`
			`// behavior, using the information passed in p.`
			`void UnFlushToZero(void *p)`
			`{`
			`#if defined(__APPLE__) \|\| defined(__linux__) \|\| defined(_WIN32)`
			`#if defined(__i386__) \|\| defined(__x86_64__) \|\| defined(_MSC_VER)`
			`union {`
			`void *p;`
			`int i;`
			`} u = { p };`
			`_mm_setcsr(u.i);`
			`#elif defined(__arm__) \|\| defined(__aarch64__)`
			`int fpscr;`
			`_FPU_GETCW(fpscr);`
			`_FPU_SETCW(fpscr & ~FPSCR_FZ);`
			`#elif defined(__PPC__)`
			`fpu_control_t flags = 0;`
			`_FPU_GETCW(flags);`
			`flags &= ~_FPU_MASK_NI;`
			`_FPU_SETCW(flags);`
			`#else`
			`#error Unknown arch`
			`#endif`
			`#else`
			`#error Please configure FlushToZero and UnFlushToZero to behave properly on this operating system.`
			`#endif`
			`}`