You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
2218 lines
66 KiB
2218 lines
66 KiB
/******************************************************************************
|
|
*
|
|
* Copyright (C) 2018 The Android Open Source Project
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at:
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*
|
|
*****************************************************************************
|
|
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
|
|
*/
|
|
/**
|
|
*******************************************************************************
|
|
* @file
|
|
* ihevc_quant_iquant_ssd.c
|
|
*
|
|
* @brief
|
|
* Contains function definitions for quantization, followed by Inverse
|
|
* quantization to find transform domain SSD
|
|
*
|
|
* @author
|
|
* 100453, 100578
|
|
*
|
|
* @par List of Functions:
|
|
* - ihevc_quant_iquant_ssd()
|
|
* - ihevc_quant_iquant_ssd_flat_scale_mat()
|
|
*
|
|
* @remarks
|
|
* None
|
|
*
|
|
*******************************************************************************
|
|
*/
|
|
|
|
#include <stdio.h>
|
|
#include <string.h>
|
|
#include <stdlib.h>
|
|
#include "ihevc_typedefs.h"
|
|
#include "ihevc_macros.h"
|
|
#include "ihevc_platform_macros.h"
|
|
#include "ihevc_defs.h"
|
|
#include "ihevc_debug.h"
|
|
#include "ihevc_trans_tables.h"
|
|
#include "ihevc_quant_iquant_ssd.h"
|
|
#include "ihevc_func_selector.h"
|
|
#include "ihevc_trans_macros.h"
|
|
#include <assert.h>
|
|
|
|
/*****************************************************************************/
|
|
/* Globals */
|
|
/*****************************************************************************/
|
|
|
|
|
|
/**
|
|
*******************************************************************************
|
|
*
|
|
* @brief
|
|
* This function performs quantization, followed by Inverse
|
|
* quantization to find transform domain SSD
|
|
*
|
|
* @par Description:
|
|
* Performs quantization on coeffs
|
|
*
|
|
* @param[in] pi2_coeffs
|
|
* 4x4 Coeffs
|
|
*
|
|
* @param[in] pi2_quant_coeff
|
|
* Scaling Matrix
|
|
*
|
|
* @param[out] pi2_dst
|
|
* Output 4x4 coefficients
|
|
*
|
|
* @param[in] qp_div
|
|
* Quantization parameter / 6
|
|
*
|
|
* @param[in] qp_rem
|
|
* Quantization parameter % 6
|
|
*
|
|
* @param[in] src_strd
|
|
* Input stride
|
|
*
|
|
* @param[in] dst_strd
|
|
* Output Stride
|
|
*
|
|
* @param[out] csbf
|
|
* coded sub block flag
|
|
*
|
|
* @param[in] csbf_strd
|
|
* coded sub block flag
|
|
*
|
|
* @param[out] zero_col
|
|
* zero column flag
|
|
*
|
|
* @param[out] zero_row
|
|
* zero column flag
|
|
*
|
|
* @returns cbf
|
|
* coded block flag
|
|
*
|
|
* @remarks
|
|
* None
|
|
*
|
|
*******************************************************************************
|
|
*/
|
|
|
|
WORD32 ihevc_quant_iquant_ssd
|
|
(
|
|
WORD16 *pi2_coeffs,
|
|
WORD16 *pi2_quant_coeff,
|
|
WORD16 *pi2_q_dst,
|
|
WORD16 *pi2_iq_dst,
|
|
WORD32 trans_size,
|
|
WORD32 qp_div,/* qpscaled / 6 */
|
|
WORD32 qp_rem,/* qpscaled % 6 */
|
|
WORD32 q_add,
|
|
WORD32 *pi4_quant_round_factor_0_1,
|
|
WORD32 *pi4_quant_round_factor_1_2,
|
|
WORD32 src_strd,
|
|
WORD32 dst_q_strd,
|
|
WORD32 dst_iq_strd,
|
|
UWORD8 *csbf,
|
|
WORD32 csbf_strd,
|
|
WORD32 *zero_col,
|
|
WORD32 *zero_row,
|
|
WORD16 *pi2_dequant_coeff,
|
|
LWORD64 *pi8_cost
|
|
)
|
|
{
|
|
WORD32 i, j;
|
|
WORD32 log2_size;
|
|
WORD16 *pi2_q_dst_orig;
|
|
WORD32 cbf = 0;
|
|
WORD32 bit_depth,shift_iq;
|
|
WORD32 val;
|
|
WORD16 i2_temp;
|
|
WORD32 ssd_cost = 0;
|
|
|
|
(void)pi4_quant_round_factor_0_1;
|
|
(void)pi4_quant_round_factor_1_2;
|
|
pi2_q_dst_orig = pi2_q_dst;
|
|
|
|
/* Quant initialization */
|
|
GETRANGE(log2_size, trans_size);
|
|
log2_size -= 1;
|
|
|
|
bit_depth = 8 + 0;
|
|
shift_iq = bit_depth + log2_size - 5;
|
|
|
|
for(i = 0; i < trans_size; i++)
|
|
{
|
|
for(j = 0; j < trans_size; j++)
|
|
{
|
|
/* Back up the coefficients before Quantization */
|
|
i2_temp = pi2_coeffs[j];
|
|
|
|
/* Quantization */
|
|
QUANT(pi2_q_dst[j], pi2_coeffs[j],
|
|
pi2_quant_coeff[j] * g_ihevc_quant_scales[qp_rem], qp_div,
|
|
log2_size, q_add);
|
|
|
|
/* Inverse Quantization */
|
|
IQUANT(pi2_iq_dst[j],
|
|
pi2_q_dst[j], /*pi2_src[index*src_strd]*/
|
|
pi2_dequant_coeff[j]*g_ihevc_iquant_scales[qp_rem],
|
|
/*pi2_dequant_coeff[index*trans_size] * g_ihevc_iquant_scales[qp_rem] */
|
|
shift_iq,
|
|
qp_div);
|
|
|
|
/* SSD Computation & Accumulation */
|
|
val = i2_temp - pi2_iq_dst[j];
|
|
ssd_cost += val*val;
|
|
|
|
}
|
|
|
|
pi2_q_dst += dst_q_strd;
|
|
pi2_iq_dst += dst_iq_strd;
|
|
pi2_quant_coeff += trans_size;
|
|
pi2_coeffs += src_strd;
|
|
pi2_dequant_coeff += trans_size;
|
|
}
|
|
|
|
/* Store the cost */
|
|
*pi8_cost = ssd_cost;
|
|
|
|
/* CSBF update */
|
|
{
|
|
WORD32 block_row, block_col;
|
|
WORD32 row, col;
|
|
WORD16 *pi2_block;
|
|
UWORD32 temp_zero_col = 0;
|
|
UWORD32 temp_zero_row = 0;
|
|
|
|
pi2_q_dst = pi2_q_dst_orig;
|
|
|
|
for(block_row = 0; block_row < trans_size; block_row += 4)
|
|
{
|
|
//block_col is incrementing by 1 for easy update of csbf pointer
|
|
for(block_col = 0; block_col < trans_size / 4; block_col++)
|
|
{
|
|
pi2_block = pi2_q_dst + block_row * dst_q_strd + block_col * 4;
|
|
*(csbf + block_col) = 0;
|
|
|
|
for(row = 0; row < 4; row++)
|
|
{
|
|
for(col = 0; col < 4; col++)
|
|
{
|
|
if(pi2_block[row * dst_q_strd + col] != 0)
|
|
{
|
|
*(csbf + block_col) = 1;
|
|
break;
|
|
}
|
|
}
|
|
if(*(csbf + block_col) == 1)
|
|
{
|
|
/* zero_col update *//* temp_zero_col = ~zero_col */
|
|
temp_zero_col = (temp_zero_col) | (0xFU << block_col * 4);
|
|
// zero col can be optimized further. Now clearing the
|
|
// entire 4 bits corresponding to 4 colums of 4x4 block
|
|
// even if any 4x4 csbf is set
|
|
|
|
/* zero row update */ /* temp_zero_row = ~zero_row */
|
|
temp_zero_row = (temp_zero_row) | (0xFU << block_row);
|
|
// zero row can be optimized further. Now clearing the
|
|
// entire 4 bits corresponding to 4 rows of 4x4 block
|
|
// even if any 4x4 csbf is set
|
|
|
|
break;
|
|
}
|
|
}
|
|
|
|
cbf = cbf || (*(csbf + block_col)); // cbf update
|
|
}
|
|
csbf += csbf_strd;
|
|
}
|
|
|
|
*zero_col = ~temp_zero_col; //final zero_col storing
|
|
*zero_row = ~temp_zero_row; //final zero_row storing
|
|
}
|
|
|
|
return cbf;
|
|
}
|
|
|
|
/**
|
|
*******************************************************************************
|
|
*
|
|
* @brief
|
|
* This function performs quantization, followed by Inverse
|
|
* quantization
|
|
*
|
|
* @par Description:
|
|
* Performs quantization on coeffs
|
|
*
|
|
* @param[in] pi2_coeffs
|
|
* 4x4 Coeffs
|
|
*
|
|
* @param[in] pi2_quant_coeff
|
|
* Scaling Matrix
|
|
*
|
|
* @param[out] pi2_dst
|
|
* Output 4x4 coefficients
|
|
*
|
|
* @param[in] qp_div
|
|
* Quantization parameter / 6
|
|
*
|
|
* @param[in] qp_rem
|
|
* Quantization parameter % 6
|
|
*
|
|
* @param[in] src_strd
|
|
* Input stride
|
|
*
|
|
* @param[in] dst_strd
|
|
* Output Stride
|
|
*
|
|
* @param[out] csbf
|
|
* coded sub block flag
|
|
*
|
|
* @param[in] csbf_strd
|
|
* coded sub block flag
|
|
*
|
|
* @param[out] zero_col
|
|
* zero column flag
|
|
*
|
|
* @param[out] zero_row
|
|
* zero column flag
|
|
*
|
|
* @returns cbf
|
|
* coded block flag
|
|
*
|
|
* @remarks
|
|
* None
|
|
*
|
|
*******************************************************************************
|
|
*/
|
|
|
|
WORD32 ihevc_quant_iquant
|
|
(
|
|
WORD16 *pi2_coeffs,
|
|
WORD16 *pi2_quant_coeff,
|
|
WORD16 *pi2_q_dst,
|
|
WORD16 *pi2_iq_dst,
|
|
WORD32 trans_size,
|
|
WORD32 qp_div,/* qpscaled / 6 */
|
|
WORD32 qp_rem,/* qpscaled % 6 */
|
|
WORD32 q_add,
|
|
WORD32 *pi4_quant_round_factor_0_1,
|
|
WORD32 *pi4_quant_round_factor_1_2,
|
|
WORD32 src_strd,
|
|
WORD32 dst_q_strd,
|
|
WORD32 dst_iq_strd,
|
|
UWORD8 *csbf,
|
|
WORD32 csbf_strd,
|
|
WORD32 *zero_col,
|
|
WORD32 *zero_row,
|
|
WORD16 *pi2_dequant_coeff,
|
|
LWORD64 *pi8_cost
|
|
)
|
|
{
|
|
WORD32 i, j;
|
|
WORD32 log2_size;
|
|
WORD16 *pi2_q_dst_orig;
|
|
WORD32 cbf = 0;
|
|
WORD32 bit_depth,shift_iq;
|
|
WORD16 i2_temp;
|
|
|
|
(void)pi8_cost;
|
|
(void)pi4_quant_round_factor_0_1;
|
|
(void)pi4_quant_round_factor_1_2;
|
|
pi2_q_dst_orig = pi2_q_dst;
|
|
|
|
/* Quant initialization */
|
|
GETRANGE(log2_size, trans_size);
|
|
log2_size -= 1;
|
|
|
|
bit_depth = 8;
|
|
shift_iq = bit_depth + log2_size - 5;
|
|
|
|
for(i = 0; i < trans_size; i++)
|
|
{
|
|
for(j = 0; j < trans_size; j++)
|
|
{
|
|
/* Back up the coefficients before Quantization */
|
|
i2_temp = pi2_coeffs[j];
|
|
|
|
/* Quantization */
|
|
QUANT(pi2_q_dst[j], pi2_coeffs[j],
|
|
pi2_quant_coeff[j] * g_ihevc_quant_scales[qp_rem], qp_div,
|
|
log2_size, q_add);
|
|
|
|
/* Inverse Quantization */
|
|
IQUANT(pi2_iq_dst[j],
|
|
pi2_q_dst[j], /*pi2_src[index*src_strd]*/
|
|
pi2_dequant_coeff[j]*g_ihevc_iquant_scales[qp_rem],
|
|
shift_iq,
|
|
qp_div);
|
|
}
|
|
|
|
pi2_q_dst += dst_q_strd;
|
|
pi2_iq_dst += dst_iq_strd;
|
|
pi2_quant_coeff += trans_size;
|
|
pi2_coeffs += src_strd;
|
|
pi2_dequant_coeff += trans_size;
|
|
}
|
|
|
|
/* CSBF update */
|
|
{
|
|
WORD32 block_row, block_col;
|
|
WORD32 row, col;
|
|
WORD16 *pi2_block;
|
|
UWORD32 temp_zero_col = 0;
|
|
UWORD32 temp_zero_row = 0;
|
|
|
|
pi2_q_dst = pi2_q_dst_orig;
|
|
|
|
for(block_row = 0; block_row < trans_size; block_row += 4)
|
|
{
|
|
//block_col is incrementing by 1 for easy update of csbf pointer
|
|
for(block_col = 0; block_col < trans_size / 4; block_col++)
|
|
{
|
|
pi2_block = pi2_q_dst + block_row * dst_q_strd + block_col * 4;
|
|
*(csbf + block_col) = 0;
|
|
|
|
for(row = 0; row < 4; row++)
|
|
{
|
|
for(col = 0; col < 4; col++)
|
|
{
|
|
if(pi2_block[row * dst_q_strd + col] != 0)
|
|
{
|
|
*(csbf + block_col) = 1;
|
|
break;
|
|
}
|
|
}
|
|
if(*(csbf + block_col) == 1)
|
|
{
|
|
/* zero_col update *//* temp_zero_col = ~zero_col */
|
|
temp_zero_col = (temp_zero_col) | (0xFU << block_col * 4);
|
|
// zero col can be optimized further. Now clearing the
|
|
// entire 4 bits corresponding to 4 colums of 4x4 block
|
|
// even if any 4x4 csbf is set
|
|
|
|
/* zero row update */ /* temp_zero_row = ~zero_row */
|
|
temp_zero_row = (temp_zero_row) | (0xFU << block_row);
|
|
// zero row can be optimized further. Now clearing the
|
|
// entire 4 bits corresponding to 4 rows of 4x4 block
|
|
// even if any 4x4 csbf is set
|
|
|
|
break;
|
|
}
|
|
}
|
|
|
|
cbf = cbf || (*(csbf + block_col)); // cbf update
|
|
}
|
|
|
|
csbf += csbf_strd;
|
|
}
|
|
|
|
*zero_col = ~temp_zero_col; //final zero_col storing
|
|
*zero_row = ~temp_zero_row; //final zero_row storing
|
|
}
|
|
|
|
return cbf;
|
|
}
|
|
|
|
/**
|
|
*******************************************************************************
|
|
*
|
|
* @brief
|
|
* This function performs quantization, followed by Inverse
|
|
* quantization to find transform domain SSD
|
|
*
|
|
* @par Description:
|
|
* Performs quantization on coeffs
|
|
*
|
|
* @param[in] pi2_coeffs
|
|
* 4x4 Coeffs
|
|
*
|
|
* @param[in] pi2_quant_coeff
|
|
* Scaling Matrix
|
|
*
|
|
* @param[out] pi2_dst
|
|
* Output 4x4 coefficients
|
|
*
|
|
* @param[in] qp_div
|
|
* Quantization parameter / 6
|
|
*
|
|
* @param[in] qp_rem
|
|
* Quantization parameter % 6
|
|
*
|
|
* @param[in] src_strd
|
|
* Input stride
|
|
*
|
|
* @param[in] dst_strd
|
|
* Output Stride
|
|
*
|
|
* @param[out] csbf
|
|
* coded sub block flag
|
|
*
|
|
* @param[in] csbf_strd
|
|
* coded sub block flag
|
|
*
|
|
* @param[out] zero_col
|
|
* zero column flag
|
|
*
|
|
* @param[out] zero_row
|
|
* zero column flag
|
|
*
|
|
* @returns cbf
|
|
* coded block flag
|
|
*
|
|
* @remarks
|
|
* None
|
|
*
|
|
*******************************************************************************
|
|
*/
|
|
|
|
WORD32 ihevc_quant_iquant_ssd_rdoq
|
|
(
|
|
WORD16 *pi2_coeffs,
|
|
WORD16 *pi2_quant_coeff,
|
|
WORD16 *pi2_q_dst,
|
|
WORD16 *pi2_iq_dst,
|
|
WORD32 trans_size,
|
|
WORD32 qp_div,/* qpscaled / 6 */
|
|
WORD32 qp_rem,/* qpscaled % 6 */
|
|
WORD32 q_add,
|
|
WORD32 *pi4_quant_round_factor_0_1,
|
|
WORD32 *pi4_quant_round_factor_1_2,
|
|
WORD32 src_strd,
|
|
WORD32 dst_q_strd,
|
|
WORD32 dst_iq_strd,
|
|
UWORD8 *csbf,
|
|
WORD32 csbf_strd,
|
|
WORD32 *zero_col,
|
|
WORD32 *zero_row,
|
|
WORD16 *pi2_dequant_coeff,
|
|
LWORD64 *pi8_cost
|
|
)
|
|
{
|
|
WORD32 i, j;
|
|
WORD32 log2_size;
|
|
WORD16 *pi2_q_dst_orig;
|
|
WORD32 cbf = 0;
|
|
WORD32 bit_depth,shift_iq;
|
|
WORD32 val;
|
|
WORD16 i2_temp;
|
|
WORD32 ssd_cost = 0;
|
|
|
|
(void)pi4_quant_round_factor_0_1;
|
|
(void)pi4_quant_round_factor_1_2;
|
|
pi2_q_dst_orig = pi2_q_dst;
|
|
|
|
GETRANGE(log2_size, trans_size);
|
|
log2_size -= 1;
|
|
|
|
bit_depth = 8 + 0;
|
|
shift_iq = bit_depth + log2_size - 5;
|
|
|
|
for(i = 0; i < trans_size; i++)
|
|
{
|
|
for(j = 0; j < trans_size; j++)
|
|
{
|
|
/* Back up the coefficients before Quantization */
|
|
i2_temp = pi2_coeffs[j];
|
|
|
|
/* Quantization */
|
|
QUANT(pi2_q_dst[j], pi2_coeffs[j],
|
|
pi2_quant_coeff[j] * g_ihevc_quant_scales[qp_rem], qp_div,
|
|
log2_size, q_add);
|
|
|
|
|
|
if (abs(pi2_q_dst[j]) > 1)
|
|
{
|
|
QUANT(pi2_q_dst[j],i2_temp,
|
|
pi2_quant_coeff[j] * g_ihevc_quant_scales[qp_rem], qp_div,
|
|
log2_size, ((1 << QUANT_ROUND_FACTOR_Q)/2));
|
|
|
|
}
|
|
|
|
|
|
/* Inverse Quantization */
|
|
IQUANT(pi2_iq_dst[j],
|
|
pi2_q_dst[j], /*pi2_src[index*src_strd]*/
|
|
pi2_dequant_coeff[j]*g_ihevc_iquant_scales[qp_rem],
|
|
/*pi2_dequant_coeff[index*trans_size] * g_ihevc_iquant_scales[qp_rem] */
|
|
shift_iq,
|
|
qp_div);
|
|
|
|
/* SSD Computation & Accumulation */
|
|
val = i2_temp - pi2_iq_dst[j];
|
|
ssd_cost += val*val;
|
|
|
|
}
|
|
|
|
pi2_q_dst += dst_q_strd;
|
|
pi2_iq_dst += dst_iq_strd;
|
|
pi2_quant_coeff += trans_size;
|
|
pi2_coeffs += src_strd;
|
|
pi2_dequant_coeff += trans_size;
|
|
}
|
|
/* Store the cost */
|
|
*pi8_cost = ssd_cost;
|
|
|
|
/* CSBF update */
|
|
{
|
|
WORD32 block_row, block_col;
|
|
WORD32 row, col;
|
|
WORD16 *pi2_block;
|
|
UWORD32 temp_zero_col = 0;
|
|
UWORD32 temp_zero_row = 0;
|
|
|
|
pi2_q_dst = pi2_q_dst_orig;
|
|
|
|
for(block_row = 0; block_row < trans_size; block_row += 4)
|
|
{
|
|
//block_col is incrementing by 1 for easy update of csbf pointer
|
|
for(block_col = 0; block_col < trans_size / 4; block_col++)
|
|
{
|
|
pi2_block = pi2_q_dst + block_row * dst_q_strd + block_col * 4;
|
|
*(csbf + block_col) = 0;
|
|
|
|
for(row = 0; row < 4; row++)
|
|
{
|
|
for(col = 0; col < 4; col++)
|
|
{
|
|
if(pi2_block[row * dst_q_strd + col] != 0)
|
|
{
|
|
*(csbf + block_col) = 1;
|
|
break;
|
|
}
|
|
}
|
|
if(*(csbf + block_col) == 1)
|
|
{
|
|
/* zero_col update *//* temp_zero_col = ~zero_col */
|
|
temp_zero_col = (temp_zero_col) | (0xFU << block_col * 4);
|
|
// zero col can be optimized further. Now clearing the
|
|
// entire 4 bits corresponding to 4 colums of 4x4 block
|
|
// even if any 4x4 csbf is set
|
|
|
|
/* zero row update */ /* temp_zero_row = ~zero_row */
|
|
temp_zero_row = (temp_zero_row) | (0xFU << block_row);
|
|
// zero row can be optimized further. Now clearing the
|
|
// entire 4 bits corresponding to 4 rows of 4x4 block
|
|
// even if any 4x4 csbf is set
|
|
|
|
break;
|
|
}
|
|
}
|
|
|
|
cbf = cbf || (*(csbf + block_col)); // cbf update
|
|
}
|
|
csbf += csbf_strd;
|
|
}
|
|
|
|
*zero_col = ~temp_zero_col; //final zero_col storing
|
|
*zero_row = ~temp_zero_row; //final zero_row storing
|
|
}
|
|
|
|
return cbf;
|
|
}
|
|
|
|
WORD32 ihevc_quant_iquant_rdoq
|
|
(
|
|
WORD16 *pi2_coeffs,
|
|
WORD16 *pi2_quant_coeff,
|
|
WORD16 *pi2_q_dst,
|
|
WORD16 *pi2_iq_dst,
|
|
WORD32 trans_size,
|
|
WORD32 qp_div,/* qpscaled / 6 */
|
|
WORD32 qp_rem,/* qpscaled % 6 */
|
|
WORD32 q_add,
|
|
WORD32 *pi4_quant_round_factor_0_1,
|
|
WORD32 *pi4_quant_round_factor_1_2,
|
|
WORD32 src_strd,
|
|
WORD32 dst_q_strd,
|
|
WORD32 dst_iq_strd,
|
|
UWORD8 *csbf,
|
|
WORD32 csbf_strd,
|
|
WORD32 *zero_col,
|
|
WORD32 *zero_row,
|
|
WORD16 *pi2_dequant_coeff,
|
|
LWORD64 *pi8_cost
|
|
)
|
|
{
|
|
WORD32 i, j;
|
|
WORD32 log2_size;
|
|
WORD16 *pi2_q_dst_orig;
|
|
WORD32 cbf = 0;
|
|
WORD32 bit_depth,shift_iq;
|
|
WORD16 i2_temp;
|
|
|
|
(void)pi8_cost;
|
|
(void)pi4_quant_round_factor_0_1;
|
|
(void)pi4_quant_round_factor_1_2;
|
|
pi2_q_dst_orig = pi2_q_dst;
|
|
|
|
GETRANGE(log2_size, trans_size);
|
|
log2_size -= 1;
|
|
|
|
bit_depth = 8 + 0;
|
|
shift_iq = bit_depth + log2_size - 5;
|
|
|
|
for(i = 0; i < trans_size; i++)
|
|
{
|
|
for(j = 0; j < trans_size; j++)
|
|
{
|
|
/* Back up the coefficients before Quantization */
|
|
i2_temp = pi2_coeffs[j];
|
|
|
|
/* Quantization */
|
|
QUANT(pi2_q_dst[j], pi2_coeffs[j],
|
|
pi2_quant_coeff[j] * g_ihevc_quant_scales[qp_rem], qp_div,
|
|
log2_size, q_add);
|
|
|
|
if (abs(pi2_q_dst[j]) > 1)
|
|
{
|
|
QUANT(pi2_q_dst[j],i2_temp,
|
|
pi2_quant_coeff[j] * g_ihevc_quant_scales[qp_rem], qp_div,
|
|
log2_size, ((1 << QUANT_ROUND_FACTOR_Q)/2));
|
|
}
|
|
|
|
/* Inverse Quantization */
|
|
IQUANT(pi2_iq_dst[j],
|
|
pi2_q_dst[j], /*pi2_src[index*src_strd]*/
|
|
pi2_dequant_coeff[j]*g_ihevc_iquant_scales[qp_rem],
|
|
shift_iq,
|
|
qp_div);
|
|
}
|
|
|
|
pi2_q_dst += dst_q_strd;
|
|
pi2_iq_dst += dst_iq_strd;
|
|
pi2_quant_coeff += trans_size;
|
|
pi2_coeffs += src_strd;
|
|
pi2_dequant_coeff += trans_size;
|
|
}
|
|
|
|
/* CSBF update */
|
|
{
|
|
WORD32 block_row, block_col;
|
|
WORD32 row, col;
|
|
WORD16 *pi2_block;
|
|
UWORD32 temp_zero_col = 0;
|
|
UWORD32 temp_zero_row = 0;
|
|
|
|
pi2_q_dst = pi2_q_dst_orig;
|
|
|
|
for(block_row = 0; block_row < trans_size; block_row += 4)
|
|
{
|
|
//block_col is incrementing by 1 for easy update of csbf pointer
|
|
for(block_col = 0; block_col < trans_size / 4; block_col++)
|
|
{
|
|
pi2_block = pi2_q_dst + block_row * dst_q_strd + block_col * 4;
|
|
*(csbf + block_col) = 0;
|
|
|
|
for(row = 0; row < 4; row++)
|
|
{
|
|
for(col = 0; col < 4; col++)
|
|
{
|
|
if(pi2_block[row * dst_q_strd + col] != 0)
|
|
{
|
|
*(csbf + block_col) = 1;
|
|
break;
|
|
}
|
|
}
|
|
if(*(csbf + block_col) == 1)
|
|
{
|
|
/* zero_col update *//* temp_zero_col = ~zero_col */
|
|
temp_zero_col = (temp_zero_col) | (0xFU << block_col * 4);
|
|
// zero col can be optimized further. Now clearing the
|
|
// entire 4 bits corresponding to 4 colums of 4x4 block
|
|
// even if any 4x4 csbf is set
|
|
|
|
/* zero row update */ /* temp_zero_row = ~zero_row */
|
|
temp_zero_row = (temp_zero_row) | (0xFU << block_row);
|
|
// zero row can be optimized further. Now clearing the
|
|
// entire 4 bits corresponding to 4 rows of 4x4 block
|
|
// even if any 4x4 csbf is set
|
|
|
|
break;
|
|
}
|
|
}
|
|
|
|
cbf = cbf || (*(csbf + block_col)); // cbf update
|
|
}
|
|
csbf += csbf_strd;
|
|
}
|
|
|
|
*zero_col = ~temp_zero_col; //final zero_col storing
|
|
*zero_row = ~temp_zero_row; //final zero_row storing
|
|
}
|
|
|
|
return cbf;
|
|
}
|
|
|
|
/**
|
|
*******************************************************************************
|
|
*
|
|
* @brief
|
|
* This function performs quantization(using flat scale matrix), followed by
|
|
* inverse quantization to find transform domain SSD
|
|
*
|
|
* @par Description:
|
|
* Performs quantization on coeffs
|
|
*
|
|
* @param[in] pi2_coeffs
|
|
* 4x4 Coeffs
|
|
*
|
|
* @param[in] pi2_quant_coeff
|
|
* Scaling Matrix
|
|
*
|
|
* @param[out] pi2_dst
|
|
* Output 4x4 coefficients
|
|
*
|
|
* @param[in] qp_div
|
|
* Quantization parameter / 6
|
|
*
|
|
* @param[in] qp_rem
|
|
* Quantization parameter % 6
|
|
*
|
|
* @param[in] src_strd
|
|
* Input stride
|
|
*
|
|
* @param[in] dst_strd
|
|
* Output Stride
|
|
*
|
|
* @param[out] csbf
|
|
* coded sub block flag
|
|
*
|
|
* @param[in] csbf_strd
|
|
* coded sub block flag
|
|
*
|
|
* @param[out] zero_col
|
|
* zero column flag
|
|
*
|
|
* @param[out] zero_row
|
|
* zero column flag
|
|
*
|
|
* @returns cbf
|
|
* coded block flag
|
|
*
|
|
* @remarks
|
|
* None
|
|
*
|
|
*******************************************************************************
|
|
*/
|
|
|
|
WORD32 ihevc_quant_iquant_ssd_flat_scale_mat
|
|
(
|
|
WORD16 *pi2_coeffs,
|
|
WORD16 *pi2_quant_coeff,
|
|
WORD16 *pi2_q_dst,
|
|
WORD16 *pi2_iq_dst,
|
|
WORD32 trans_size,
|
|
WORD32 qp_div,/* qpscaled / 6 */
|
|
WORD32 qp_rem,/* qpscaled % 6 */
|
|
WORD32 q_add,
|
|
WORD32 *pi4_quant_round_factor_0_1,
|
|
WORD32 *pi4_quant_round_factor_1_2,
|
|
WORD32 src_strd,
|
|
WORD32 dst_q_strd,
|
|
WORD32 dst_iq_strd,
|
|
UWORD8 *csbf,
|
|
WORD32 csbf_strd,
|
|
WORD32 *zero_col,
|
|
WORD32 *zero_row,
|
|
WORD16 *pi2_dequant_coeff,
|
|
LWORD64 *pi8_cost
|
|
)
|
|
{
|
|
WORD32 i, j;
|
|
WORD32 log2_size;
|
|
WORD16 *pi2_q_dst_orig;
|
|
WORD32 cbf = 0;
|
|
WORD32 bit_depth,shift_iq;
|
|
WORD32 val;
|
|
WORD16 i2_temp;
|
|
/* Initialize cost to zero */
|
|
WORD32 ssd_cost = 0;
|
|
|
|
(void)pi4_quant_round_factor_0_1;
|
|
(void)pi4_quant_round_factor_1_2;
|
|
pi2_q_dst_orig = pi2_q_dst;
|
|
|
|
/* Quant initialization */
|
|
GETRANGE(log2_size, trans_size);
|
|
log2_size -= 1;
|
|
|
|
bit_depth = 8 + 0;
|
|
shift_iq = bit_depth + log2_size - 5;
|
|
|
|
for(i = 0; i < trans_size; i++)
|
|
{
|
|
for(j = 0; j < trans_size; j++)
|
|
{
|
|
/* Back up the coefficients before Quantization */
|
|
i2_temp = pi2_coeffs[j];
|
|
|
|
/*QUANT(pi2_dst[j], pi2_coeffs[j],
|
|
pi2_quant_coeff[j] * g_ihevc_quant_scales[qp_rem], qp_div,
|
|
log2_size, q_add);*/
|
|
|
|
/* modified by 1028 */
|
|
/* Quantization */
|
|
QUANT_NO_WEIGHTMAT(pi2_q_dst[j], pi2_coeffs[j],
|
|
g_ihevc_quant_scales[qp_rem], qp_div,
|
|
log2_size, q_add);
|
|
|
|
if(pi2_q_dst[j] == 0)
|
|
{
|
|
pi2_iq_dst[j] = 0;
|
|
}
|
|
else
|
|
{
|
|
/* Inverse Quantization */
|
|
IQUANT(pi2_iq_dst[j],
|
|
pi2_q_dst[j], /*pi2_src[index*src_strd]*/
|
|
pi2_dequant_coeff[j]*g_ihevc_iquant_scales[qp_rem], /*pi2_dequant_coeff[index*trans_size] * g_ihevc_iquant_scales[qp_rem] */
|
|
shift_iq,
|
|
qp_div);
|
|
}
|
|
|
|
/* SSD Computation & Accumulation */
|
|
val = i2_temp - pi2_iq_dst[j];
|
|
ssd_cost += val*val;
|
|
|
|
}
|
|
|
|
pi2_q_dst += dst_q_strd;
|
|
pi2_iq_dst += dst_iq_strd;
|
|
pi2_quant_coeff += trans_size;
|
|
pi2_coeffs += src_strd;
|
|
pi2_dequant_coeff += trans_size;
|
|
}
|
|
/* Store the cost */
|
|
*pi8_cost = ssd_cost;
|
|
|
|
/* CSBF update */
|
|
{
|
|
WORD32 block_row, block_col;
|
|
WORD32 row, col;
|
|
WORD16 *pi2_block;
|
|
UWORD32 temp_zero_col = 0;
|
|
UWORD32 temp_zero_row = 0;
|
|
|
|
pi2_q_dst = pi2_q_dst_orig;
|
|
|
|
for(block_row = 0; block_row < trans_size; block_row += 4)
|
|
{
|
|
//block_col is incrementing by 1 for easy update of csbf pointer
|
|
for(block_col = 0; block_col < trans_size / 4; block_col++)
|
|
{
|
|
pi2_block = pi2_q_dst + block_row * dst_q_strd + block_col * 4;
|
|
*(csbf + block_col) = 0;
|
|
|
|
for(row = 0; row < 4; row++)
|
|
{
|
|
for(col = 0; col < 4; col++)
|
|
{
|
|
if(pi2_block[row * dst_q_strd + col] != 0)
|
|
{
|
|
*(csbf + block_col) = 1;
|
|
break;
|
|
}
|
|
}
|
|
if(*(csbf + block_col) == 1)
|
|
{
|
|
/* zero_col update *//* temp_zero_col = ~zero_col */
|
|
temp_zero_col = (temp_zero_col) | (0xFU << block_col * 4);
|
|
// zero col can be optimized further. Now clearing the
|
|
// entire 4 bits corresponding to 4 colums of 4x4 block
|
|
// even if any 4x4 csbf is set
|
|
|
|
/* zero row update */ /* temp_zero_row = ~zero_row */
|
|
temp_zero_row = (temp_zero_row) | (0xFU << block_row);
|
|
// zero row can be optimized further. Now clearing the
|
|
// entire 4 bits corresponding to 4 rows of 4x4 block
|
|
// even if any 4x4 csbf is set
|
|
|
|
break;
|
|
}
|
|
}
|
|
|
|
cbf = cbf || (*(csbf + block_col)); // cbf update
|
|
}
|
|
csbf += csbf_strd;
|
|
}
|
|
|
|
*zero_col = ~temp_zero_col; //final zero_col storing
|
|
*zero_row = ~temp_zero_row; //final zero_row storing
|
|
}
|
|
|
|
return cbf;
|
|
}
|
|
|
|
WORD32 ihevc_quant_iquant_flat_scale_mat
|
|
(
|
|
WORD16 *pi2_coeffs,
|
|
WORD16 *pi2_quant_coeff,
|
|
WORD16 *pi2_q_dst,
|
|
WORD16 *pi2_iq_dst,
|
|
WORD32 trans_size,
|
|
WORD32 qp_div,/* qpscaled / 6 */
|
|
WORD32 qp_rem,/* qpscaled % 6 */
|
|
WORD32 q_add,
|
|
WORD32 *pi4_quant_round_factor_0_1,
|
|
WORD32 *pi4_quant_round_factor_1_2,
|
|
WORD32 src_strd,
|
|
WORD32 dst_q_strd,
|
|
WORD32 dst_iq_strd,
|
|
UWORD8 *csbf,
|
|
WORD32 csbf_strd,
|
|
WORD32 *zero_col,
|
|
WORD32 *zero_row,
|
|
WORD16 *pi2_dequant_coeff,
|
|
LWORD64 *pi8_cost
|
|
)
|
|
{
|
|
WORD32 i, j;
|
|
WORD32 log2_size;
|
|
WORD16 *pi2_q_dst_orig;
|
|
WORD32 cbf = 0;
|
|
WORD32 bit_depth,shift_iq;
|
|
WORD16 i2_temp;
|
|
|
|
(void)pi8_cost;
|
|
(void)pi4_quant_round_factor_0_1;
|
|
(void)pi4_quant_round_factor_1_2;
|
|
pi2_q_dst_orig = pi2_q_dst;
|
|
|
|
/* Quant initialization */
|
|
GETRANGE(log2_size, trans_size);
|
|
log2_size -= 1;
|
|
|
|
bit_depth = 8 + 0;
|
|
shift_iq = bit_depth + log2_size - 5;
|
|
|
|
for(i = 0; i < trans_size; i++)
|
|
{
|
|
for(j = 0; j < trans_size; j++)
|
|
{
|
|
/* Back up the coefficients before Quantization */
|
|
i2_temp = pi2_coeffs[j];
|
|
|
|
/* Quantization */
|
|
QUANT_NO_WEIGHTMAT(pi2_q_dst[j], pi2_coeffs[j],
|
|
g_ihevc_quant_scales[qp_rem], qp_div,
|
|
log2_size, q_add);
|
|
|
|
if(pi2_q_dst[j] == 0)
|
|
{
|
|
pi2_iq_dst[j] = 0;
|
|
}
|
|
else
|
|
{
|
|
/* Inverse Quantization */
|
|
IQUANT(pi2_iq_dst[j],
|
|
pi2_q_dst[j], /*pi2_src[index*src_strd]*/
|
|
pi2_dequant_coeff[j]*g_ihevc_iquant_scales[qp_rem], /*pi2_dequant_coeff[index*trans_size] * g_ihevc_iquant_scales[qp_rem] */
|
|
shift_iq,
|
|
qp_div);
|
|
}
|
|
}
|
|
|
|
pi2_q_dst += dst_q_strd;
|
|
pi2_iq_dst += dst_iq_strd;
|
|
pi2_quant_coeff += trans_size;
|
|
pi2_coeffs += src_strd;
|
|
pi2_dequant_coeff += trans_size;
|
|
}
|
|
|
|
/* CSBF update */
|
|
{
|
|
WORD32 block_row, block_col;
|
|
WORD32 row, col;
|
|
WORD16 *pi2_block;
|
|
UWORD32 temp_zero_col = 0;
|
|
UWORD32 temp_zero_row = 0;
|
|
|
|
pi2_q_dst = pi2_q_dst_orig;
|
|
|
|
for(block_row = 0; block_row < trans_size; block_row += 4)
|
|
{
|
|
//block_col is incrementing by 1 for easy update of csbf pointer
|
|
for(block_col = 0; block_col < trans_size / 4; block_col++)
|
|
{
|
|
pi2_block = pi2_q_dst + block_row * dst_q_strd + block_col * 4;
|
|
*(csbf + block_col) = 0;
|
|
|
|
for(row = 0; row < 4; row++)
|
|
{
|
|
for(col = 0; col < 4; col++)
|
|
{
|
|
if(pi2_block[row * dst_q_strd + col] != 0)
|
|
{
|
|
*(csbf + block_col) = 1;
|
|
break;
|
|
}
|
|
}
|
|
if(*(csbf + block_col) == 1)
|
|
{
|
|
/* zero_col update *//* temp_zero_col = ~zero_col */
|
|
temp_zero_col = (temp_zero_col) | (0xFU << block_col * 4);
|
|
// zero col can be optimized further. Now clearing the
|
|
// entire 4 bits corresponding to 4 colums of 4x4 block
|
|
// even if any 4x4 csbf is set
|
|
|
|
/* zero row update */ /* temp_zero_row = ~zero_row */
|
|
temp_zero_row = (temp_zero_row) | (0xFU << block_row);
|
|
// zero row can be optimized further. Now clearing the
|
|
// entire 4 bits corresponding to 4 rows of 4x4 block
|
|
// even if any 4x4 csbf is set
|
|
|
|
break;
|
|
}
|
|
}
|
|
|
|
cbf = cbf || (*(csbf + block_col)); // cbf update
|
|
}
|
|
csbf += csbf_strd;
|
|
}
|
|
|
|
*zero_col = ~temp_zero_col; //final zero_col storing
|
|
*zero_row = ~temp_zero_row; //final zero_row storing
|
|
}
|
|
|
|
return cbf;
|
|
}
|
|
|
|
/**
|
|
*******************************************************************************
|
|
*
|
|
* @brief
|
|
* This function performs quantization(using flat scale matrix), followed by
|
|
* inverse quantization to find transform domain SSD; when we perform RDOQ.
|
|
* In case the quantized value turns out to be grater than 1, we then requantize
|
|
* use half rounding.
|
|
*
|
|
* @par Description:
|
|
* Performs quantization on coeffs
|
|
*
|
|
* @param[in] pi2_coeffs
|
|
* 4x4 Coeffs
|
|
*
|
|
* @param[in] pi2_quant_coeff
|
|
* Scaling Matrix
|
|
*
|
|
* @param[out] pi2_dst
|
|
* Output 4x4 coefficients
|
|
*
|
|
* @param[in] qp_div
|
|
* Quantization parameter / 6
|
|
*
|
|
* @param[in] qp_rem
|
|
* Quantization parameter % 6
|
|
*
|
|
* @param[in] src_strd
|
|
* Input stride
|
|
*
|
|
* @param[in] dst_strd
|
|
* Output Stride
|
|
*
|
|
* @param[out] csbf
|
|
* coded sub block flag
|
|
*
|
|
* @param[in] csbf_strd
|
|
* coded sub block flag
|
|
*
|
|
* @param[out] zero_col
|
|
* zero column flag
|
|
*
|
|
* @param[out] zero_row
|
|
* zero column flag
|
|
*
|
|
* @returns cbf
|
|
* coded block flag
|
|
*
|
|
* @remarks
|
|
* None
|
|
*
|
|
*******************************************************************************
|
|
*/
|
|
|
|
WORD32 ihevc_quant_iquant_ssd_flat_scale_mat_rdoq
|
|
(
|
|
WORD16 *pi2_coeffs,
|
|
WORD16 *pi2_quant_coeff,
|
|
WORD16 *pi2_q_dst,
|
|
WORD16 *pi2_iq_dst,
|
|
WORD32 trans_size,
|
|
WORD32 qp_div,/* qpscaled / 6 */
|
|
WORD32 qp_rem,/* qpscaled % 6 */
|
|
WORD32 q_add,
|
|
WORD32 *pi4_quant_round_factor_0_1,
|
|
WORD32 *pi4_quant_round_factor_1_2,
|
|
WORD32 src_strd,
|
|
WORD32 dst_q_strd,
|
|
WORD32 dst_iq_strd,
|
|
UWORD8 *csbf,
|
|
WORD32 csbf_strd,
|
|
WORD32 *zero_col,
|
|
WORD32 *zero_row,
|
|
WORD16 *pi2_dequant_coeff,
|
|
LWORD64 *pi8_cost
|
|
)
|
|
{
|
|
WORD32 i, j;
|
|
WORD32 log2_size;
|
|
WORD16 *pi2_q_dst_orig;
|
|
WORD32 cbf = 0;
|
|
WORD32 bit_depth,shift_iq;
|
|
WORD32 val;
|
|
WORD16 i2_temp;
|
|
/* Initialize cost to zero */
|
|
WORD32 ssd_cost = 0;
|
|
|
|
(void)pi4_quant_round_factor_0_1;
|
|
(void)pi4_quant_round_factor_1_2;
|
|
pi2_q_dst_orig = pi2_q_dst;
|
|
|
|
/* Quant initialization */
|
|
GETRANGE(log2_size, trans_size);
|
|
log2_size -= 1;
|
|
|
|
bit_depth = 8 + 0;
|
|
shift_iq = bit_depth + log2_size - 5;
|
|
|
|
for(i = 0; i < trans_size; i++)
|
|
{
|
|
for(j = 0; j < trans_size; j++)
|
|
{
|
|
WORD16 i2_temp1;
|
|
/* Back up the coefficients before Quantization */
|
|
i2_temp = pi2_coeffs[j];
|
|
|
|
/*QUANT(pi2_dst[j], pi2_coeffs[j],
|
|
pi2_quant_coeff[j] * g_ihevc_quant_scales[qp_rem], qp_div,
|
|
log2_size, q_add);*/
|
|
|
|
/* modified by 1028 */
|
|
/* Quantization */
|
|
|
|
if (1)
|
|
{
|
|
QUANT_NO_WEIGHTMAT(pi2_q_dst[j], pi2_coeffs[j],
|
|
g_ihevc_quant_scales[qp_rem], qp_div,
|
|
log2_size, q_add);
|
|
}
|
|
else
|
|
{ \
|
|
WORD16 inp = pi2_coeffs[j],out = pi2_q_dst[j];
|
|
WORD32 quant_coeff = g_ihevc_quant_scales[qp_rem];
|
|
WORD32 log2_trans_size = log2_size;
|
|
WORD32 tmp; \
|
|
WORD32 sign; \
|
|
WORD32 bit_depth,transform_shift; \
|
|
WORD32 q_bits, quant_multiplier; \
|
|
\
|
|
/* q_bits and q_add calculation*/ \
|
|
/* To be moved outside in neon. To be computer once per transform call */ \
|
|
bit_depth = 8; \
|
|
transform_shift = MAX_TR_DYNAMIC_RANGE - bit_depth - log2_trans_size; \
|
|
quant_multiplier = 4 ; /* because quant_coeff are multiplied by 16. Instead of multiplying, we can reduce the division factor q_bits by 4 */ \
|
|
q_bits = QUANT_SHIFT + qp_div + transform_shift + SCALING_Q_SHIFT - quant_multiplier - FLAT_RESCALE_MAT_Q_SHIFT /* 2048 */; \
|
|
\
|
|
sign = (inp)<0 ? -1:1; \
|
|
\
|
|
tmp = (WORD32)(abs(inp)); \
|
|
tmp = tmp * (quant_coeff); \
|
|
tmp = tmp + (((WORD32)q_add) << (q_bits - QUANT_ROUND_FACTOR_Q)); \
|
|
tmp = tmp >> q_bits; \
|
|
\
|
|
tmp = tmp * sign; \
|
|
out = (WORD16) CLIP_S16(tmp); \
|
|
}
|
|
i2_temp1 = pi2_q_dst[j];
|
|
if (abs(pi2_q_dst[j]) > 1)
|
|
{
|
|
QUANT_NO_WEIGHTMAT(pi2_q_dst[j], i2_temp,
|
|
g_ihevc_quant_scales[qp_rem], qp_div,
|
|
log2_size, ((1 << QUANT_ROUND_FACTOR_Q)/2));
|
|
}
|
|
|
|
|
|
ASSERT(abs(i2_temp1-pi2_q_dst[j]) <= 1);
|
|
ASSERT(abs(i2_temp1) <= abs(pi2_q_dst[j]));
|
|
|
|
|
|
/* Inverse Quantization */
|
|
IQUANT(pi2_iq_dst[j],
|
|
pi2_q_dst[j], /*pi2_src[index*src_strd]*/
|
|
pi2_dequant_coeff[j]*g_ihevc_iquant_scales[qp_rem], /*pi2_dequant_coeff[index*trans_size] * g_ihevc_iquant_scales[qp_rem] */
|
|
shift_iq,
|
|
qp_div);
|
|
|
|
/* SSD Computation & Accumulation */
|
|
val = i2_temp - pi2_iq_dst[j];
|
|
ssd_cost += val*val;
|
|
|
|
}
|
|
|
|
pi2_q_dst += dst_q_strd;
|
|
pi2_iq_dst += dst_iq_strd;
|
|
pi2_quant_coeff += trans_size;
|
|
pi2_coeffs += src_strd;
|
|
pi2_dequant_coeff += trans_size;
|
|
|
|
}
|
|
/* Store the cost */
|
|
*pi8_cost = ssd_cost;
|
|
|
|
/* CSBF update */
|
|
{
|
|
WORD32 block_row, block_col;
|
|
WORD32 row, col;
|
|
WORD16 *pi2_block;
|
|
UWORD32 temp_zero_col = 0;
|
|
UWORD32 temp_zero_row = 0;
|
|
|
|
pi2_q_dst = pi2_q_dst_orig;
|
|
|
|
for(block_row = 0; block_row < trans_size; block_row += 4)
|
|
{
|
|
//block_col is incrementing by 1 for easy update of csbf pointer
|
|
for(block_col = 0; block_col < trans_size / 4; block_col++)
|
|
{
|
|
pi2_block = pi2_q_dst + block_row * dst_q_strd + block_col * 4;
|
|
*(csbf + block_col) = 0;
|
|
|
|
for(row = 0; row < 4; row++)
|
|
{
|
|
for(col = 0; col < 4; col++)
|
|
{
|
|
if(pi2_block[row * dst_q_strd + col] != 0)
|
|
{
|
|
*(csbf + block_col) = 1;
|
|
break;
|
|
}
|
|
}
|
|
if(*(csbf + block_col) == 1)
|
|
{
|
|
/* zero_col update *//* temp_zero_col = ~zero_col */
|
|
temp_zero_col = (temp_zero_col) | (0xFU << block_col * 4);
|
|
// zero col can be optimized further. Now clearing the
|
|
// entire 4 bits corresponding to 4 colums of 4x4 block
|
|
// even if any 4x4 csbf is set
|
|
|
|
/* zero row update */ /* temp_zero_row = ~zero_row */
|
|
temp_zero_row = (temp_zero_row) | (0xFU << block_row);
|
|
// zero row can be optimized further. Now clearing the
|
|
// entire 4 bits corresponding to 4 rows of 4x4 block
|
|
// even if any 4x4 csbf is set
|
|
|
|
break;
|
|
}
|
|
}
|
|
|
|
cbf = cbf || (*(csbf + block_col)); // cbf update
|
|
}
|
|
csbf += csbf_strd;
|
|
}
|
|
|
|
*zero_col = ~temp_zero_col; //final zero_col storing
|
|
*zero_row = ~temp_zero_row; //final zero_row storing
|
|
}
|
|
return cbf;
|
|
}
|
|
|
|
WORD32 ihevc_quant_iquant_flat_scale_mat_rdoq
|
|
(
|
|
WORD16 *pi2_coeffs,
|
|
WORD16 *pi2_quant_coeff,
|
|
WORD16 *pi2_q_dst,
|
|
WORD16 *pi2_iq_dst,
|
|
WORD32 trans_size,
|
|
WORD32 qp_div,/* qpscaled / 6 */
|
|
WORD32 qp_rem,/* qpscaled % 6 */
|
|
WORD32 q_add,
|
|
WORD32 *pi4_quant_round_factor_0_1,
|
|
WORD32 *pi4_quant_round_factor_1_2,
|
|
WORD32 src_strd,
|
|
WORD32 dst_q_strd,
|
|
WORD32 dst_iq_strd,
|
|
UWORD8 *csbf,
|
|
WORD32 csbf_strd,
|
|
WORD32 *zero_col,
|
|
WORD32 *zero_row,
|
|
WORD16 *pi2_dequant_coeff,
|
|
LWORD64 *pi8_cost
|
|
)
|
|
{
|
|
WORD32 i, j;
|
|
WORD32 log2_size;
|
|
WORD16 *pi2_q_dst_orig;
|
|
WORD32 cbf = 0;
|
|
WORD32 bit_depth,shift_iq;
|
|
WORD16 i2_temp;
|
|
|
|
(void)pi8_cost;
|
|
(void)pi4_quant_round_factor_0_1;
|
|
(void)pi4_quant_round_factor_1_2;
|
|
pi2_q_dst_orig = pi2_q_dst;
|
|
|
|
/* Quant initialization */
|
|
GETRANGE(log2_size, trans_size);
|
|
log2_size -= 1;
|
|
|
|
bit_depth = 8 + 0;
|
|
shift_iq = bit_depth + log2_size - 5;
|
|
|
|
for(i = 0; i < trans_size; i++)
|
|
{
|
|
for(j = 0; j < trans_size; j++)
|
|
{
|
|
WORD16 i2_temp1;
|
|
/* Back up the coefficients before Quantization */
|
|
i2_temp = pi2_coeffs[j];
|
|
|
|
QUANT_NO_WEIGHTMAT(pi2_q_dst[j], pi2_coeffs[j],
|
|
g_ihevc_quant_scales[qp_rem], qp_div,
|
|
log2_size, q_add);
|
|
|
|
i2_temp1 = pi2_q_dst[j];
|
|
|
|
if (abs(pi2_q_dst[j]) > 1)
|
|
{
|
|
QUANT_NO_WEIGHTMAT(pi2_q_dst[j], i2_temp,
|
|
g_ihevc_quant_scales[qp_rem], qp_div,
|
|
log2_size, ((1 << QUANT_ROUND_FACTOR_Q)/2));
|
|
}
|
|
|
|
ASSERT(abs(i2_temp1-pi2_q_dst[j]) <= 1);
|
|
ASSERT(abs(i2_temp1) <= abs(pi2_q_dst[j]));
|
|
|
|
IQUANT(pi2_iq_dst[j],
|
|
pi2_q_dst[j], /*pi2_src[index*src_strd]*/
|
|
pi2_dequant_coeff[j]*g_ihevc_iquant_scales[qp_rem], /*pi2_dequant_coeff[index*trans_size] * g_ihevc_iquant_scales[qp_rem] */
|
|
shift_iq,
|
|
qp_div);
|
|
}
|
|
|
|
pi2_q_dst += dst_q_strd;
|
|
pi2_iq_dst += dst_iq_strd;
|
|
pi2_quant_coeff += trans_size;
|
|
pi2_coeffs += src_strd;
|
|
pi2_dequant_coeff += trans_size;
|
|
}
|
|
|
|
/* CSBF update */
|
|
{
|
|
WORD32 block_row, block_col;
|
|
WORD32 row, col;
|
|
WORD16 *pi2_block;
|
|
UWORD32 temp_zero_col = 0;
|
|
UWORD32 temp_zero_row = 0;
|
|
|
|
pi2_q_dst = pi2_q_dst_orig;
|
|
|
|
for(block_row = 0; block_row < trans_size; block_row += 4)
|
|
{
|
|
//block_col is incrementing by 1 for easy update of csbf pointer
|
|
for(block_col = 0; block_col < trans_size / 4; block_col++)
|
|
{
|
|
pi2_block = pi2_q_dst + block_row * dst_q_strd + block_col * 4;
|
|
*(csbf + block_col) = 0;
|
|
|
|
for(row = 0; row < 4; row++)
|
|
{
|
|
for(col = 0; col < 4; col++)
|
|
{
|
|
if(pi2_block[row * dst_q_strd + col] != 0)
|
|
{
|
|
*(csbf + block_col) = 1;
|
|
break;
|
|
}
|
|
}
|
|
if(*(csbf + block_col) == 1)
|
|
{
|
|
/* zero_col update *//* temp_zero_col = ~zero_col */
|
|
temp_zero_col = (temp_zero_col) | (0xFU << block_col * 4);
|
|
// zero col can be optimized further. Now clearing the
|
|
// entire 4 bits corresponding to 4 colums of 4x4 block
|
|
// even if any 4x4 csbf is set
|
|
|
|
/* zero row update */ /* temp_zero_row = ~zero_row */
|
|
temp_zero_row = (temp_zero_row) | (0xFU << block_row);
|
|
// zero row can be optimized further. Now clearing the
|
|
// entire 4 bits corresponding to 4 rows of 4x4 block
|
|
// even if any 4x4 csbf is set
|
|
|
|
break;
|
|
}
|
|
}
|
|
|
|
cbf = cbf || (*(csbf + block_col)); // cbf update
|
|
}
|
|
csbf += csbf_strd;
|
|
}
|
|
|
|
*zero_col = ~temp_zero_col; //final zero_col storing
|
|
*zero_row = ~temp_zero_row; //final zero_row storing
|
|
}
|
|
|
|
return cbf;
|
|
}
|
|
|
|
|
|
/**
|
|
*******************************************************************************
|
|
*
|
|
* @brief
|
|
* This function performs quantization, followed by Inverse
|
|
* quantization to find transform domain SSD
|
|
*
|
|
* @par Description:
|
|
* Performs quantization on coeffs
|
|
*
|
|
* @param[in] pi2_coeffs
|
|
* 4x4 Coeffs
|
|
*
|
|
* @param[in] pi2_quant_coeff
|
|
* Scaling Matrix
|
|
*
|
|
* @param[out] pi2_dst
|
|
* Output 4x4 coefficients
|
|
*
|
|
* @param[in] qp_div
|
|
* Quantization parameter / 6
|
|
*
|
|
* @param[in] qp_rem
|
|
* Quantization parameter % 6
|
|
*
|
|
* @param[in] src_strd
|
|
* Input stride
|
|
*
|
|
* @param[in] dst_strd
|
|
* Output Stride
|
|
*
|
|
* @param[out] csbf
|
|
* coded sub block flag
|
|
*
|
|
* @param[in] csbf_strd
|
|
* coded sub block flag
|
|
*
|
|
* @param[out] zero_col
|
|
* zero column flag
|
|
*
|
|
* @param[out] zero_row
|
|
* zero column flag
|
|
*
|
|
* @returns cbf
|
|
* coded block flag
|
|
*
|
|
* @remarks
|
|
* None
|
|
*
|
|
*******************************************************************************
|
|
*/
|
|
|
|
WORD32 ihevc_q_iq_ssd_var_rnd_fact
|
|
(
|
|
WORD16 *pi2_coeffs,
|
|
WORD16 *pi2_quant_coeff,
|
|
WORD16 *pi2_q_dst,
|
|
WORD16 *pi2_iq_dst,
|
|
WORD32 trans_size,
|
|
WORD32 qp_div,/* qpscaled / 6 */
|
|
WORD32 qp_rem,/* qpscaled % 6 */
|
|
WORD32 q_add,
|
|
WORD32 *pi4_quant_round_factor_0_1,
|
|
WORD32 *pi4_quant_round_factor_1_2,
|
|
WORD32 src_strd,
|
|
WORD32 dst_q_strd,
|
|
WORD32 dst_iq_strd,
|
|
UWORD8 *csbf,
|
|
WORD32 csbf_strd,
|
|
WORD32 *zero_col,
|
|
WORD32 *zero_row,
|
|
WORD16 *pi2_dequant_coeff,
|
|
LWORD64 *pi8_cost
|
|
)
|
|
{
|
|
WORD32 i, j;
|
|
WORD32 log2_size;
|
|
WORD16 *pi2_q_dst_orig;
|
|
WORD32 cbf = 0;
|
|
WORD32 bit_depth,shift_iq;
|
|
WORD32 val;
|
|
WORD16 i2_temp;
|
|
//WORD16 i2_temp_1;
|
|
/* Initialize cost to zero */
|
|
WORD32 ssd_cost = 0;
|
|
|
|
(void)q_add;
|
|
pi2_q_dst_orig = pi2_q_dst;
|
|
|
|
|
|
/* Quant initialization */
|
|
GETRANGE(log2_size, trans_size);
|
|
log2_size -= 1;
|
|
|
|
bit_depth = 8 + 0;
|
|
shift_iq = bit_depth + log2_size - 5;
|
|
|
|
for(i = 0; i < trans_size; i++)
|
|
{
|
|
for(j = 0; j < trans_size; j++)
|
|
{
|
|
/* Back up the coefficients before Quantization */
|
|
i2_temp = pi2_coeffs[j];
|
|
|
|
|
|
{
|
|
/* Quantization */
|
|
QUANT(pi2_q_dst[j],i2_temp,
|
|
pi2_quant_coeff[j] * g_ihevc_quant_scales[qp_rem], qp_div,
|
|
log2_size, 0);
|
|
if (abs(pi2_q_dst[j]) >= 2)
|
|
{
|
|
QUANT(pi2_q_dst[j],i2_temp,
|
|
pi2_quant_coeff[j] * g_ihevc_quant_scales[qp_rem], qp_div,
|
|
log2_size, ((1 << QUANT_ROUND_FACTOR_Q)/2));
|
|
|
|
}
|
|
else if (abs(pi2_q_dst[j]) >= 1)
|
|
{
|
|
QUANT(pi2_q_dst[j],i2_temp,
|
|
pi2_quant_coeff[j] * g_ihevc_quant_scales[qp_rem], qp_div,
|
|
log2_size, *pi4_quant_round_factor_1_2);
|
|
}
|
|
|
|
else
|
|
{
|
|
/* Quantization */
|
|
QUANT(pi2_q_dst[j],i2_temp,
|
|
pi2_quant_coeff[j] * g_ihevc_quant_scales[qp_rem], qp_div,
|
|
log2_size, *pi4_quant_round_factor_0_1);
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Inverse Quantization */
|
|
IQUANT(pi2_iq_dst[j],
|
|
pi2_q_dst[j], /*pi2_src[index*src_strd]*/
|
|
pi2_dequant_coeff[j]*g_ihevc_iquant_scales[qp_rem],
|
|
/*pi2_dequant_coeff[index*trans_size] * g_ihevc_iquant_scales[qp_rem] */
|
|
shift_iq,
|
|
qp_div);
|
|
|
|
/* SSD Computation & Accumulation */
|
|
val = i2_temp - pi2_iq_dst[j];
|
|
ssd_cost += val*val;
|
|
|
|
pi4_quant_round_factor_0_1++;
|
|
pi4_quant_round_factor_1_2++;
|
|
}
|
|
|
|
pi2_q_dst += dst_q_strd;
|
|
pi2_iq_dst += dst_iq_strd;
|
|
pi2_quant_coeff += trans_size;
|
|
pi2_coeffs += src_strd;
|
|
pi2_dequant_coeff += trans_size;
|
|
}
|
|
/* Store the cost */
|
|
*pi8_cost = ssd_cost;
|
|
|
|
/* CSBF update */
|
|
{
|
|
WORD32 block_row, block_col;
|
|
WORD32 row, col;
|
|
WORD16 *pi2_block;
|
|
UWORD32 temp_zero_col = 0;
|
|
UWORD32 temp_zero_row = 0;
|
|
|
|
pi2_q_dst = pi2_q_dst_orig;
|
|
|
|
for(block_row = 0; block_row < trans_size; block_row += 4)
|
|
{
|
|
//block_col is incrementing by 1 for easy update of csbf pointer
|
|
for(block_col = 0; block_col < trans_size / 4; block_col++)
|
|
{
|
|
pi2_block = pi2_q_dst + block_row * dst_q_strd + block_col * 4;
|
|
*(csbf + block_col) = 0;
|
|
|
|
for(row = 0; row < 4; row++)
|
|
{
|
|
for(col = 0; col < 4; col++)
|
|
{
|
|
if(pi2_block[row * dst_q_strd + col] != 0)
|
|
{
|
|
*(csbf + block_col) = 1;
|
|
break;
|
|
}
|
|
}
|
|
if(*(csbf + block_col) == 1)
|
|
{
|
|
/* zero_col update *//* temp_zero_col = ~zero_col */
|
|
temp_zero_col = (temp_zero_col) | (0xFU << block_col * 4);
|
|
// zero col can be optimized further. Now clearing the
|
|
// entire 4 bits corresponding to 4 colums of 4x4 block
|
|
// even if any 4x4 csbf is set
|
|
|
|
/* zero row update */ /* temp_zero_row = ~zero_row */
|
|
temp_zero_row = (temp_zero_row) | (0xFU << block_row);
|
|
// zero row can be optimized further. Now clearing the
|
|
// entire 4 bits corresponding to 4 rows of 4x4 block
|
|
// even if any 4x4 csbf is set
|
|
|
|
break;
|
|
}
|
|
}
|
|
|
|
cbf = cbf || (*(csbf + block_col)); // cbf update
|
|
}
|
|
csbf += csbf_strd;
|
|
}
|
|
|
|
*zero_col = ~temp_zero_col; //final zero_col storing
|
|
*zero_row = ~temp_zero_row; //final zero_row storing
|
|
}
|
|
|
|
return cbf;
|
|
}
|
|
|
|
WORD32 ihevc_q_iq_var_rnd_fact
|
|
(
|
|
WORD16 *pi2_coeffs,
|
|
WORD16 *pi2_quant_coeff,
|
|
WORD16 *pi2_q_dst,
|
|
WORD16 *pi2_iq_dst,
|
|
WORD32 trans_size,
|
|
WORD32 qp_div,/* qpscaled / 6 */
|
|
WORD32 qp_rem,/* qpscaled % 6 */
|
|
WORD32 q_add,
|
|
WORD32 *pi4_quant_round_factor_0_1,
|
|
WORD32 *pi4_quant_round_factor_1_2,
|
|
WORD32 src_strd,
|
|
WORD32 dst_q_strd,
|
|
WORD32 dst_iq_strd,
|
|
UWORD8 *csbf,
|
|
WORD32 csbf_strd,
|
|
WORD32 *zero_col,
|
|
WORD32 *zero_row,
|
|
WORD16 *pi2_dequant_coeff,
|
|
LWORD64 *pi8_cost
|
|
)
|
|
{
|
|
WORD32 i, j;
|
|
WORD32 log2_size;
|
|
WORD16 *pi2_q_dst_orig;
|
|
WORD32 cbf = 0;
|
|
WORD32 bit_depth,shift_iq;
|
|
WORD16 i2_temp;
|
|
|
|
(void)q_add;
|
|
(void)pi8_cost;
|
|
pi2_q_dst_orig = pi2_q_dst;
|
|
|
|
GETRANGE(log2_size, trans_size);
|
|
log2_size -= 1;
|
|
|
|
bit_depth = 8 + 0;
|
|
shift_iq = bit_depth + log2_size - 5;
|
|
|
|
for(i = 0; i < trans_size; i++)
|
|
{
|
|
for(j = 0; j < trans_size; j++)
|
|
{
|
|
i2_temp = pi2_coeffs[j];
|
|
|
|
{
|
|
QUANT(pi2_q_dst[j],i2_temp,
|
|
pi2_quant_coeff[j] * g_ihevc_quant_scales[qp_rem], qp_div,
|
|
log2_size, 0);
|
|
|
|
if (abs(pi2_q_dst[j]) >= 2)
|
|
{
|
|
QUANT(pi2_q_dst[j],i2_temp,
|
|
pi2_quant_coeff[j] * g_ihevc_quant_scales[qp_rem], qp_div,
|
|
log2_size, ((1 << QUANT_ROUND_FACTOR_Q)/2));
|
|
}
|
|
else if (abs(pi2_q_dst[j]) >= 1)
|
|
{
|
|
QUANT(pi2_q_dst[j],i2_temp,
|
|
pi2_quant_coeff[j] * g_ihevc_quant_scales[qp_rem], qp_div,
|
|
log2_size, *pi4_quant_round_factor_1_2);
|
|
}
|
|
else
|
|
{
|
|
QUANT(pi2_q_dst[j],i2_temp,
|
|
pi2_quant_coeff[j] * g_ihevc_quant_scales[qp_rem], qp_div,
|
|
log2_size, *pi4_quant_round_factor_0_1);
|
|
}
|
|
}
|
|
|
|
IQUANT(pi2_iq_dst[j],
|
|
pi2_q_dst[j], /*pi2_src[index*src_strd]*/
|
|
pi2_dequant_coeff[j]*g_ihevc_iquant_scales[qp_rem],
|
|
shift_iq,
|
|
qp_div);
|
|
|
|
pi4_quant_round_factor_0_1++;
|
|
pi4_quant_round_factor_1_2++;
|
|
}
|
|
|
|
pi2_q_dst += dst_q_strd;
|
|
pi2_iq_dst += dst_iq_strd;
|
|
pi2_quant_coeff += trans_size;
|
|
pi2_coeffs += src_strd;
|
|
pi2_dequant_coeff += trans_size;
|
|
}
|
|
|
|
/* CSBF update */
|
|
{
|
|
WORD32 block_row, block_col;
|
|
WORD32 row, col;
|
|
WORD16 *pi2_block;
|
|
UWORD32 temp_zero_col = 0;
|
|
UWORD32 temp_zero_row = 0;
|
|
|
|
pi2_q_dst = pi2_q_dst_orig;
|
|
|
|
for(block_row = 0; block_row < trans_size; block_row += 4)
|
|
{
|
|
//block_col is incrementing by 1 for easy update of csbf pointer
|
|
for(block_col = 0; block_col < trans_size / 4; block_col++)
|
|
{
|
|
pi2_block = pi2_q_dst + block_row * dst_q_strd + block_col * 4;
|
|
*(csbf + block_col) = 0;
|
|
|
|
for(row = 0; row < 4; row++)
|
|
{
|
|
for(col = 0; col < 4; col++)
|
|
{
|
|
if(pi2_block[row * dst_q_strd + col] != 0)
|
|
{
|
|
*(csbf + block_col) = 1;
|
|
break;
|
|
}
|
|
}
|
|
if(*(csbf + block_col) == 1)
|
|
{
|
|
/* zero_col update *//* temp_zero_col = ~zero_col */
|
|
temp_zero_col = (temp_zero_col) | (0xFU << block_col * 4);
|
|
// zero col can be optimized further. Now clearing the
|
|
// entire 4 bits corresponding to 4 colums of 4x4 block
|
|
// even if any 4x4 csbf is set
|
|
|
|
/* zero row update */ /* temp_zero_row = ~zero_row */
|
|
temp_zero_row = (temp_zero_row) | (0xFU << block_row);
|
|
// zero row can be optimized further. Now clearing the
|
|
// entire 4 bits corresponding to 4 rows of 4x4 block
|
|
// even if any 4x4 csbf is set
|
|
|
|
break;
|
|
}
|
|
}
|
|
|
|
cbf = cbf || (*(csbf + block_col)); // cbf update
|
|
}
|
|
csbf += csbf_strd;
|
|
}
|
|
|
|
*zero_col = ~temp_zero_col; //final zero_col storing
|
|
*zero_row = ~temp_zero_row; //final zero_row storing
|
|
}
|
|
|
|
return cbf;
|
|
}
|
|
|
|
/**
|
|
*******************************************************************************
|
|
*
|
|
* @brief
|
|
* This function performs quantization(using flat scale matrix), followed by
|
|
* inverse quantization to find transform domain SSD; when we perform RDOQ.
|
|
* In case the quantized value turns out to be grater than 1, we then requantize
|
|
* use half rounding.
|
|
*
|
|
* @par Description:
|
|
* Performs quantization on coeffs
|
|
*
|
|
* @param[in] pi2_coeffs
|
|
* 4x4 Coeffs
|
|
*
|
|
* @param[in] pi2_quant_coeff
|
|
* Scaling Matrix
|
|
*
|
|
* @param[out] pi2_dst
|
|
* Output 4x4 coefficients
|
|
*
|
|
* @param[in] qp_div
|
|
* Quantization parameter / 6
|
|
*
|
|
* @param[in] qp_rem
|
|
* Quantization parameter % 6
|
|
*
|
|
* @param[in] src_strd
|
|
* Input stride
|
|
*
|
|
* @param[in] dst_strd
|
|
* Output Stride
|
|
*
|
|
* @param[out] csbf
|
|
* coded sub block flag
|
|
*
|
|
* @param[in] csbf_strd
|
|
* coded sub block flag
|
|
*
|
|
* @param[out] zero_col
|
|
* zero column flag
|
|
*
|
|
* @param[out] zero_row
|
|
* zero column flag
|
|
*
|
|
* @returns cbf
|
|
* coded block flag
|
|
*
|
|
* @remarks
|
|
* None
|
|
*
|
|
*******************************************************************************
|
|
*/
|
|
|
|
WORD32 ihevc_q_iq_ssd_flat_scale_mat_var_rnd_fact
|
|
(
|
|
WORD16 *pi2_coeffs,
|
|
WORD16 *pi2_quant_coeff,
|
|
WORD16 *pi2_q_dst,
|
|
WORD16 *pi2_iq_dst,
|
|
WORD32 trans_size,
|
|
WORD32 qp_div,/* qpscaled / 6 */
|
|
WORD32 qp_rem,/* qpscaled % 6 */
|
|
WORD32 q_add,
|
|
WORD32 *pi4_quant_round_factor_0_1,
|
|
WORD32 *pi4_quant_round_factor_1_2,
|
|
WORD32 src_strd,
|
|
WORD32 dst_q_strd,
|
|
WORD32 dst_iq_strd,
|
|
UWORD8 *csbf,
|
|
WORD32 csbf_strd,
|
|
WORD32 *zero_col,
|
|
WORD32 *zero_row,
|
|
WORD16 *pi2_dequant_coeff,
|
|
LWORD64 *pi8_cost
|
|
)
|
|
{
|
|
WORD32 i, j;
|
|
WORD32 log2_size;
|
|
WORD16 *pi2_q_dst_orig;
|
|
WORD32 cbf = 0;
|
|
WORD32 bit_depth,shift_iq;
|
|
WORD32 val;
|
|
WORD16 i2_temp;
|
|
/* Initialize cost to zero */
|
|
WORD32 ssd_cost = 0;
|
|
|
|
(void)q_add;
|
|
pi2_q_dst_orig = pi2_q_dst;
|
|
|
|
/* Quant initialization */
|
|
GETRANGE(log2_size, trans_size);
|
|
log2_size -= 1;
|
|
|
|
bit_depth = 8 + 0;
|
|
shift_iq = bit_depth + log2_size - 5;
|
|
|
|
for(i = 0; i < trans_size; i++)
|
|
{
|
|
for(j = 0; j < trans_size; j++)
|
|
{
|
|
WORD16 i2_temp1;
|
|
/* Back up the coefficients before Quantization */
|
|
i2_temp = pi2_coeffs[j];
|
|
|
|
/*QUANT(pi2_dst[j], pi2_coeffs[j],
|
|
pi2_quant_coeff[j] * g_ihevc_quant_scales[qp_rem], qp_div,
|
|
log2_size, q_add);*/
|
|
|
|
/* modified by 1028 */
|
|
/* Quantization */
|
|
|
|
|
|
{
|
|
QUANT_NO_WEIGHTMAT(pi2_q_dst[j], pi2_coeffs[j],
|
|
g_ihevc_quant_scales[qp_rem], qp_div,
|
|
log2_size, 0);
|
|
|
|
i2_temp1 = pi2_q_dst[j];
|
|
|
|
if (abs(pi2_q_dst[j]) >= 2)
|
|
{
|
|
QUANT_NO_WEIGHTMAT(pi2_q_dst[j], i2_temp,
|
|
g_ihevc_quant_scales[qp_rem], qp_div,
|
|
log2_size, ((1 << QUANT_ROUND_FACTOR_Q)/2));
|
|
}
|
|
else if (abs(pi2_q_dst[j]) >= 1)
|
|
{
|
|
QUANT_NO_WEIGHTMAT(pi2_q_dst[j], pi2_coeffs[j],
|
|
g_ihevc_quant_scales[qp_rem], qp_div,
|
|
log2_size, *pi4_quant_round_factor_1_2);
|
|
}
|
|
|
|
else
|
|
{
|
|
QUANT_NO_WEIGHTMAT(pi2_q_dst[j], pi2_coeffs[j],
|
|
g_ihevc_quant_scales[qp_rem], qp_div,
|
|
log2_size, *pi4_quant_round_factor_0_1);
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
ASSERT(abs(i2_temp1-pi2_q_dst[j]) <= 1);
|
|
|
|
|
|
/* Inverse Quantization */
|
|
IQUANT(pi2_iq_dst[j],
|
|
pi2_q_dst[j], /*pi2_src[index*src_strd]*/
|
|
pi2_dequant_coeff[j]*g_ihevc_iquant_scales[qp_rem], /*pi2_dequant_coeff[index*trans_size] * g_ihevc_iquant_scales[qp_rem] */
|
|
shift_iq,
|
|
qp_div);
|
|
|
|
/* SSD Computation & Accumulation */
|
|
val = i2_temp - pi2_iq_dst[j];
|
|
ssd_cost += val*val;
|
|
|
|
pi4_quant_round_factor_0_1++;
|
|
pi4_quant_round_factor_1_2++;
|
|
}
|
|
|
|
pi2_q_dst += dst_q_strd;
|
|
pi2_iq_dst += dst_iq_strd;
|
|
pi2_quant_coeff += trans_size;
|
|
pi2_coeffs += src_strd;
|
|
pi2_dequant_coeff += trans_size;
|
|
|
|
}
|
|
/* Store the cost */
|
|
*pi8_cost = ssd_cost;
|
|
|
|
/* CSBF update */
|
|
{
|
|
WORD32 block_row, block_col;
|
|
WORD32 row, col;
|
|
WORD16 *pi2_block;
|
|
UWORD32 temp_zero_col = 0;
|
|
UWORD32 temp_zero_row = 0;
|
|
|
|
pi2_q_dst = pi2_q_dst_orig;
|
|
|
|
for(block_row = 0; block_row < trans_size; block_row += 4)
|
|
{
|
|
//block_col is incrementing by 1 for easy update of csbf pointer
|
|
for(block_col = 0; block_col < trans_size / 4; block_col++)
|
|
{
|
|
pi2_block = pi2_q_dst + block_row * dst_q_strd + block_col * 4;
|
|
*(csbf + block_col) = 0;
|
|
|
|
for(row = 0; row < 4; row++)
|
|
{
|
|
for(col = 0; col < 4; col++)
|
|
{
|
|
if(pi2_block[row * dst_q_strd + col] != 0)
|
|
{
|
|
*(csbf + block_col) = 1;
|
|
break;
|
|
}
|
|
}
|
|
if(*(csbf + block_col) == 1)
|
|
{
|
|
/* zero_col update *//* temp_zero_col = ~zero_col */
|
|
temp_zero_col = (temp_zero_col) | (0xFU << block_col * 4);
|
|
// zero col can be optimized further. Now clearing the
|
|
// entire 4 bits corresponding to 4 colums of 4x4 block
|
|
// even if any 4x4 csbf is set
|
|
|
|
/* zero row update */ /* temp_zero_row = ~zero_row */
|
|
temp_zero_row = (temp_zero_row) | (0xFU << block_row);
|
|
// zero row can be optimized further. Now clearing the
|
|
// entire 4 bits corresponding to 4 rows of 4x4 block
|
|
// even if any 4x4 csbf is set
|
|
|
|
break;
|
|
}
|
|
}
|
|
|
|
cbf = cbf || (*(csbf + block_col)); // cbf update
|
|
}
|
|
csbf += csbf_strd;
|
|
}
|
|
|
|
*zero_col = ~temp_zero_col; //final zero_col storing
|
|
*zero_row = ~temp_zero_row; //final zero_row storing
|
|
}
|
|
return cbf;
|
|
}
|
|
|
|
WORD32 ihevc_q_iq_flat_scale_mat_var_rnd_fact
|
|
(
|
|
WORD16 *pi2_coeffs,
|
|
WORD16 *pi2_quant_coeff,
|
|
WORD16 *pi2_q_dst,
|
|
WORD16 *pi2_iq_dst,
|
|
WORD32 trans_size,
|
|
WORD32 qp_div,/* qpscaled / 6 */
|
|
WORD32 qp_rem,/* qpscaled % 6 */
|
|
WORD32 q_add,
|
|
WORD32 *pi4_quant_round_factor_0_1,
|
|
WORD32 *pi4_quant_round_factor_1_2,
|
|
WORD32 src_strd,
|
|
WORD32 dst_q_strd,
|
|
WORD32 dst_iq_strd,
|
|
UWORD8 *csbf,
|
|
WORD32 csbf_strd,
|
|
WORD32 *zero_col,
|
|
WORD32 *zero_row,
|
|
WORD16 *pi2_dequant_coeff,
|
|
LWORD64 *pi8_cost
|
|
)
|
|
{
|
|
WORD32 i, j;
|
|
WORD32 log2_size;
|
|
WORD16 *pi2_q_dst_orig;
|
|
WORD32 cbf = 0;
|
|
WORD32 bit_depth,shift_iq;
|
|
WORD16 i2_temp;
|
|
|
|
(void)q_add;
|
|
(void)pi8_cost;
|
|
pi2_q_dst_orig = pi2_q_dst;
|
|
|
|
GETRANGE(log2_size, trans_size);
|
|
log2_size -= 1;
|
|
|
|
bit_depth = 8 + 0;
|
|
shift_iq = bit_depth + log2_size - 5;
|
|
|
|
for(i = 0; i < trans_size; i++)
|
|
{
|
|
for(j = 0; j < trans_size; j++)
|
|
{
|
|
WORD16 i2_temp1;
|
|
|
|
i2_temp = pi2_coeffs[j];
|
|
|
|
{
|
|
QUANT_NO_WEIGHTMAT(pi2_q_dst[j], pi2_coeffs[j],
|
|
g_ihevc_quant_scales[qp_rem], qp_div,
|
|
log2_size, 0);
|
|
|
|
i2_temp1 = pi2_q_dst[j];
|
|
|
|
if (abs(pi2_q_dst[j]) >= 2)
|
|
{
|
|
QUANT_NO_WEIGHTMAT(pi2_q_dst[j], i2_temp,
|
|
g_ihevc_quant_scales[qp_rem], qp_div,
|
|
log2_size, ((1 << QUANT_ROUND_FACTOR_Q)/2));
|
|
}
|
|
else if (abs(pi2_q_dst[j]) >= 1)
|
|
{
|
|
QUANT_NO_WEIGHTMAT(pi2_q_dst[j], pi2_coeffs[j],
|
|
g_ihevc_quant_scales[qp_rem], qp_div,
|
|
log2_size, *pi4_quant_round_factor_1_2);
|
|
}
|
|
else
|
|
{
|
|
QUANT_NO_WEIGHTMAT(pi2_q_dst[j], pi2_coeffs[j],
|
|
g_ihevc_quant_scales[qp_rem], qp_div,
|
|
log2_size, *pi4_quant_round_factor_0_1);
|
|
}
|
|
}
|
|
|
|
ASSERT(abs(i2_temp1-pi2_q_dst[j]) <= 1);
|
|
|
|
IQUANT(pi2_iq_dst[j],
|
|
pi2_q_dst[j], /*pi2_src[index*src_strd]*/
|
|
pi2_dequant_coeff[j]*g_ihevc_iquant_scales[qp_rem],
|
|
shift_iq,
|
|
qp_div);
|
|
|
|
pi4_quant_round_factor_0_1++;
|
|
pi4_quant_round_factor_1_2++;
|
|
}
|
|
|
|
pi2_q_dst += dst_q_strd;
|
|
pi2_iq_dst += dst_iq_strd;
|
|
pi2_quant_coeff += trans_size;
|
|
pi2_coeffs += src_strd;
|
|
pi2_dequant_coeff += trans_size;
|
|
|
|
}
|
|
|
|
/* CSBF update */
|
|
{
|
|
WORD32 block_row, block_col;
|
|
WORD32 row, col;
|
|
WORD16 *pi2_block;
|
|
UWORD32 temp_zero_col = 0;
|
|
UWORD32 temp_zero_row = 0;
|
|
|
|
pi2_q_dst = pi2_q_dst_orig;
|
|
|
|
for(block_row = 0; block_row < trans_size; block_row += 4)
|
|
{
|
|
//block_col is incrementing by 1 for easy update of csbf pointer
|
|
for(block_col = 0; block_col < trans_size / 4; block_col++)
|
|
{
|
|
pi2_block = pi2_q_dst + block_row * dst_q_strd + block_col * 4;
|
|
*(csbf + block_col) = 0;
|
|
|
|
for(row = 0; row < 4; row++)
|
|
{
|
|
for(col = 0; col < 4; col++)
|
|
{
|
|
if(pi2_block[row * dst_q_strd + col] != 0)
|
|
{
|
|
*(csbf + block_col) = 1;
|
|
break;
|
|
}
|
|
}
|
|
if(*(csbf + block_col) == 1)
|
|
{
|
|
/* zero_col update *//* temp_zero_col = ~zero_col */
|
|
temp_zero_col = (temp_zero_col) | (0xFU << block_col * 4);
|
|
// zero col can be optimized further. Now clearing the
|
|
// entire 4 bits corresponding to 4 colums of 4x4 block
|
|
// even if any 4x4 csbf is set
|
|
|
|
/* zero row update */ /* temp_zero_row = ~zero_row */
|
|
temp_zero_row = (temp_zero_row) | (0xFU << block_row);
|
|
// zero row can be optimized further. Now clearing the
|
|
// entire 4 bits corresponding to 4 rows of 4x4 block
|
|
// even if any 4x4 csbf is set
|
|
|
|
break;
|
|
}
|
|
}
|
|
|
|
cbf = cbf || (*(csbf + block_col)); // cbf update
|
|
}
|
|
csbf += csbf_strd;
|
|
}
|
|
|
|
*zero_col = ~temp_zero_col; //final zero_col storing
|
|
*zero_row = ~temp_zero_row; //final zero_row storing
|
|
}
|
|
return cbf;
|
|
}
|