You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
230 lines
8.3 KiB
230 lines
8.3 KiB
//
|
|
// Copyright (c) 2017 The Khronos Group Inc.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
//
|
|
#include <stdint.h>
|
|
#include <math.h>
|
|
#include "fplib.h"
|
|
|
|
#if !defined(FLT_MANT_DIG)
|
|
#define FLT_MANT_DIG 24
|
|
#endif
|
|
#define as_float(x) (*((float *)(&x)))
|
|
#define as_long(x) (*((int64_t *)(&x)))
|
|
|
|
static uint32_t clz(uint64_t value)
|
|
{
|
|
uint32_t num_zeros;
|
|
|
|
for( num_zeros = 0; num_zeros < (sizeof(uint64_t)*8); num_zeros++)
|
|
{
|
|
volatile uint64_t v = 0x8000000000000000ull & (value << num_zeros);
|
|
if (v) break;
|
|
}
|
|
return num_zeros;
|
|
}
|
|
|
|
float qcom_s64_2_f32(int64_t data, bool sat, roundingMode rnd)
|
|
{
|
|
switch (rnd) {
|
|
case qcomRTZ: {
|
|
int sign = 0;
|
|
if (!data)
|
|
return 0.0f;
|
|
if (data < 0){
|
|
data = - data;
|
|
sign = 1;
|
|
}
|
|
uint32_t exponent = (127 + 64 - clz(data) - 1) << (FLT_MANT_DIG - 1); //add 1 for the implied 1.0 in normalized fp32 numbers
|
|
int mantShift = 40 - clz(data);
|
|
uint32_t mantissa;
|
|
if (mantShift >= 0)
|
|
mantissa = (uint32_t)((uint64_t)data >> mantShift);
|
|
else
|
|
mantissa = (uint32_t)((uint64_t)data << -mantShift);
|
|
mantissa &= 0x7fffff;//mask off the leading 1
|
|
|
|
uint32_t result = exponent | mantissa;
|
|
if (sign)
|
|
result |= 0x80000000;
|
|
return as_float(result);
|
|
break;
|
|
}
|
|
case qcomRTE: return (float)(data); break;
|
|
case qcomRTP: {
|
|
int sign = 0;
|
|
int inExact = 0;
|
|
uint32_t f = 0xdf000000;
|
|
if (!data)
|
|
return 0.0f;
|
|
if (data == 0x8000000000000000)
|
|
return as_float(f);
|
|
if (data < 0){
|
|
data = - data;
|
|
sign = 1;
|
|
}
|
|
uint32_t exponent = (127 + 64 - clz(data) - 1) << (FLT_MANT_DIG - 1); //add 1 for the implied 1.0 in normalized fp32 numbers
|
|
int mantShift = 40 - clz(data);
|
|
uint32_t mantissa;
|
|
if (mantShift >= 0){
|
|
uint64_t temp = (uint64_t)data >> mantShift;
|
|
uint64_t mask = (1 << mantShift) - 1;
|
|
if ((temp << mantShift) != data)
|
|
inExact = 1;
|
|
mantissa = (uint32_t)temp;
|
|
}
|
|
else
|
|
{
|
|
mantissa = (uint32_t)((uint64_t)data << -mantShift);
|
|
}
|
|
mantissa &= 0x7fffff;//mask off the leading 1
|
|
|
|
uint32_t result = exponent | mantissa;
|
|
if (sign)
|
|
result |= 0x80000000;
|
|
if (sign)
|
|
return as_float(result); // for negative inputs return rtz results
|
|
else
|
|
{
|
|
if(inExact)
|
|
{ // for positive inputs return higher next fp
|
|
uint32_t high_float = 0x7f7fffff;
|
|
return nextafterf(as_float(result), as_float(high_float)); // could be simplified with some inc and carry operation
|
|
}
|
|
else
|
|
return as_float(result);
|
|
}
|
|
}
|
|
break;
|
|
case qcomRTN: {
|
|
int sign = 0;
|
|
int inExact = 0;
|
|
uint32_t f = 0xdf000000;
|
|
if (!data)
|
|
return 0.0f;
|
|
if (data == 0x8000000000000000)
|
|
return as_float(f);
|
|
if (data < 0){
|
|
data = - data;
|
|
sign = 1;
|
|
}
|
|
uint32_t exponent = (127 + 64 - clz(data) - 1) << (FLT_MANT_DIG - 1); //add 1 for the implied 1.0 in normalized fp32 numbers
|
|
int mantShift = 40 - clz(data);
|
|
uint32_t mantissa;
|
|
if (mantShift >= 0){
|
|
uint64_t temp = (uint64_t)data >> mantShift;
|
|
uint64_t mask = (1 << mantShift) - 1;
|
|
if (temp << mantShift != data)
|
|
inExact = 1;
|
|
mantissa = (uint32_t)temp;
|
|
}
|
|
else
|
|
mantissa = (uint32_t)((uint64_t)data << -mantShift);
|
|
mantissa &= 0x7fffff;//mask off the leading 1
|
|
|
|
uint32_t result = exponent | mantissa;
|
|
if (sign)
|
|
result |= 0x80000000;
|
|
if (!sign)
|
|
return as_float(result); // for positive inputs return RTZ result
|
|
else{
|
|
if(inExact){ // for negative inputs find the lower next fp number
|
|
uint32_t low_float = 0xff7fffff;
|
|
return nextafterf(as_float(result), as_float(low_float)); // could be simplified with some inc and carry operation
|
|
}
|
|
else
|
|
return as_float(result);
|
|
}
|
|
}
|
|
case qcomRoundingModeCount: {
|
|
break; // Avoid build error for unhandled enum value
|
|
}
|
|
}
|
|
return 0.0f;
|
|
}
|
|
|
|
float qcom_u64_2_f32(uint64_t data, bool sat, roundingMode rnd)
|
|
{
|
|
switch (rnd) {
|
|
case qcomRTZ: {
|
|
if (!data)
|
|
return 0.0f;
|
|
uint32_t exponent = (127 + 64 - clz(data) - 1) << (FLT_MANT_DIG - 1); //add 1 for the implied 1.0 in normalized fp32 numbers
|
|
int mantShift = 40 - clz(data);
|
|
uint32_t mantissa;
|
|
if (mantShift >= 0)
|
|
mantissa = (uint32_t)(data >> mantShift);
|
|
else
|
|
mantissa = (uint32_t)(data << -mantShift);
|
|
mantissa &= 0x7fffff;//mask off the leading 1
|
|
|
|
uint32_t result = exponent | mantissa;
|
|
return as_float(result);
|
|
break;
|
|
}
|
|
case qcomRTE: return (float)(data); break;
|
|
case qcomRTP: {
|
|
int inExact = 0;
|
|
if (!data)
|
|
return 0.0f;
|
|
uint32_t exponent = (127 + 64 - clz(data) - 1) << (FLT_MANT_DIG - 1); //add 1 for the implied 1.0 in normalized fp32 numbers
|
|
int mantShift = 40 - clz(data);
|
|
uint32_t mantissa;
|
|
if (mantShift >= 0){
|
|
uint64_t temp = data >> mantShift;
|
|
uint64_t mask = (1 << mantShift) - 1;
|
|
if (temp << mantShift != data)
|
|
inExact = 1;
|
|
mantissa = (uint32_t)temp;
|
|
}
|
|
else
|
|
mantissa = (uint32_t)(data << -mantShift);
|
|
mantissa &= 0x7fffff;//mask off the leading 1
|
|
|
|
uint32_t result = exponent | mantissa;
|
|
if(inExact){ // for positive inputs return higher next fp
|
|
uint32_t high_float = 0x7f7fffff;
|
|
return nextafterf(as_float(result), as_float(high_float)); // could be simplified with some inc and carry operation
|
|
}
|
|
else
|
|
return as_float(result);
|
|
}
|
|
case qcomRTN: {
|
|
int inExact = 0;
|
|
if (!data)
|
|
return 0.0f;
|
|
uint32_t exponent = (127 + 64 - clz(data) - 1) << (FLT_MANT_DIG - 1); //add 1 for the implied 1.0 in normalized fp32 numbers
|
|
int mantShift = 40 - clz(data);
|
|
uint32_t mantissa;
|
|
if (mantShift >= 0){
|
|
uint64_t temp = (uint64_t)data >> mantShift;
|
|
uint64_t mask = (1 << mantShift) - 1;
|
|
if (temp << mantShift != data)
|
|
inExact = 1;
|
|
mantissa = (uint32_t)temp;
|
|
}
|
|
else
|
|
mantissa = (uint32_t)((uint64_t)data << -mantShift);
|
|
mantissa &= 0x7fffff;//mask off the leading 1
|
|
|
|
uint32_t result = exponent | mantissa;
|
|
return as_float(result); // for positive inputs return RTZ result
|
|
}
|
|
case qcomRoundingModeCount: {
|
|
break; // Avoid build error for unhandled enum value
|
|
}
|
|
}
|
|
return 0.0f;
|
|
}
|