// // Copyright (c) 2017 The Khronos Group Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // #include #include #include "fplib.h" #if !defined(FLT_MANT_DIG) #define FLT_MANT_DIG 24 #endif #define as_float(x) (*((float *)(&x))) #define as_long(x) (*((int64_t *)(&x))) static uint32_t clz(uint64_t value) { uint32_t num_zeros; for( num_zeros = 0; num_zeros < (sizeof(uint64_t)*8); num_zeros++) { volatile uint64_t v = 0x8000000000000000ull & (value << num_zeros); if (v) break; } return num_zeros; } float qcom_s64_2_f32(int64_t data, bool sat, roundingMode rnd) { switch (rnd) { case qcomRTZ: { int sign = 0; if (!data) return 0.0f; if (data < 0){ data = - data; sign = 1; } uint32_t exponent = (127 + 64 - clz(data) - 1) << (FLT_MANT_DIG - 1); //add 1 for the implied 1.0 in normalized fp32 numbers int mantShift = 40 - clz(data); uint32_t mantissa; if (mantShift >= 0) mantissa = (uint32_t)((uint64_t)data >> mantShift); else mantissa = (uint32_t)((uint64_t)data << -mantShift); mantissa &= 0x7fffff;//mask off the leading 1 uint32_t result = exponent | mantissa; if (sign) result |= 0x80000000; return as_float(result); break; } case qcomRTE: return (float)(data); break; case qcomRTP: { int sign = 0; int inExact = 0; uint32_t f = 0xdf000000; if (!data) return 0.0f; if (data == 0x8000000000000000) return as_float(f); if (data < 0){ data = - data; sign = 1; } uint32_t exponent = (127 + 64 - clz(data) - 1) << (FLT_MANT_DIG - 1); //add 1 for the implied 1.0 in normalized fp32 numbers int mantShift = 40 - clz(data); uint32_t mantissa; if (mantShift >= 0){ uint64_t temp = (uint64_t)data >> mantShift; uint64_t mask = (1 << mantShift) - 1; if ((temp << mantShift) != data) inExact = 1; mantissa = (uint32_t)temp; } else { mantissa = (uint32_t)((uint64_t)data << -mantShift); } mantissa &= 0x7fffff;//mask off the leading 1 uint32_t result = exponent | mantissa; if (sign) result |= 0x80000000; if (sign) return as_float(result); // for negative inputs return rtz results else { if(inExact) { // for positive inputs return higher next fp uint32_t high_float = 0x7f7fffff; return nextafterf(as_float(result), as_float(high_float)); // could be simplified with some inc and carry operation } else return as_float(result); } } break; case qcomRTN: { int sign = 0; int inExact = 0; uint32_t f = 0xdf000000; if (!data) return 0.0f; if (data == 0x8000000000000000) return as_float(f); if (data < 0){ data = - data; sign = 1; } uint32_t exponent = (127 + 64 - clz(data) - 1) << (FLT_MANT_DIG - 1); //add 1 for the implied 1.0 in normalized fp32 numbers int mantShift = 40 - clz(data); uint32_t mantissa; if (mantShift >= 0){ uint64_t temp = (uint64_t)data >> mantShift; uint64_t mask = (1 << mantShift) - 1; if (temp << mantShift != data) inExact = 1; mantissa = (uint32_t)temp; } else mantissa = (uint32_t)((uint64_t)data << -mantShift); mantissa &= 0x7fffff;//mask off the leading 1 uint32_t result = exponent | mantissa; if (sign) result |= 0x80000000; if (!sign) return as_float(result); // for positive inputs return RTZ result else{ if(inExact){ // for negative inputs find the lower next fp number uint32_t low_float = 0xff7fffff; return nextafterf(as_float(result), as_float(low_float)); // could be simplified with some inc and carry operation } else return as_float(result); } } case qcomRoundingModeCount: { break; // Avoid build error for unhandled enum value } } return 0.0f; } float qcom_u64_2_f32(uint64_t data, bool sat, roundingMode rnd) { switch (rnd) { case qcomRTZ: { if (!data) return 0.0f; uint32_t exponent = (127 + 64 - clz(data) - 1) << (FLT_MANT_DIG - 1); //add 1 for the implied 1.0 in normalized fp32 numbers int mantShift = 40 - clz(data); uint32_t mantissa; if (mantShift >= 0) mantissa = (uint32_t)(data >> mantShift); else mantissa = (uint32_t)(data << -mantShift); mantissa &= 0x7fffff;//mask off the leading 1 uint32_t result = exponent | mantissa; return as_float(result); break; } case qcomRTE: return (float)(data); break; case qcomRTP: { int inExact = 0; if (!data) return 0.0f; uint32_t exponent = (127 + 64 - clz(data) - 1) << (FLT_MANT_DIG - 1); //add 1 for the implied 1.0 in normalized fp32 numbers int mantShift = 40 - clz(data); uint32_t mantissa; if (mantShift >= 0){ uint64_t temp = data >> mantShift; uint64_t mask = (1 << mantShift) - 1; if (temp << mantShift != data) inExact = 1; mantissa = (uint32_t)temp; } else mantissa = (uint32_t)(data << -mantShift); mantissa &= 0x7fffff;//mask off the leading 1 uint32_t result = exponent | mantissa; if(inExact){ // for positive inputs return higher next fp uint32_t high_float = 0x7f7fffff; return nextafterf(as_float(result), as_float(high_float)); // could be simplified with some inc and carry operation } else return as_float(result); } case qcomRTN: { int inExact = 0; if (!data) return 0.0f; uint32_t exponent = (127 + 64 - clz(data) - 1) << (FLT_MANT_DIG - 1); //add 1 for the implied 1.0 in normalized fp32 numbers int mantShift = 40 - clz(data); uint32_t mantissa; if (mantShift >= 0){ uint64_t temp = (uint64_t)data >> mantShift; uint64_t mask = (1 << mantShift) - 1; if (temp << mantShift != data) inExact = 1; mantissa = (uint32_t)temp; } else mantissa = (uint32_t)((uint64_t)data << -mantShift); mantissa &= 0x7fffff;//mask off the leading 1 uint32_t result = exponent | mantissa; return as_float(result); // for positive inputs return RTZ result } case qcomRoundingModeCount: { break; // Avoid build error for unhandled enum value } } return 0.0f; }