You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

2218 lines
66 KiB

/******************************************************************************
*
* Copyright (C) 2018 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*****************************************************************************
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
*/
/**
*******************************************************************************
* @file
* ihevc_quant_iquant_ssd.c
*
* @brief
* Contains function definitions for quantization, followed by Inverse
* quantization to find transform domain SSD
*
* @author
* 100453, 100578
*
* @par List of Functions:
* - ihevc_quant_iquant_ssd()
* - ihevc_quant_iquant_ssd_flat_scale_mat()
*
* @remarks
* None
*
*******************************************************************************
*/
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include "ihevc_typedefs.h"
#include "ihevc_macros.h"
#include "ihevc_platform_macros.h"
#include "ihevc_defs.h"
#include "ihevc_debug.h"
#include "ihevc_trans_tables.h"
#include "ihevc_quant_iquant_ssd.h"
#include "ihevc_func_selector.h"
#include "ihevc_trans_macros.h"
#include <assert.h>
/*****************************************************************************/
/* Globals */
/*****************************************************************************/
/**
*******************************************************************************
*
* @brief
* This function performs quantization, followed by Inverse
* quantization to find transform domain SSD
*
* @par Description:
* Performs quantization on coeffs
*
* @param[in] pi2_coeffs
* 4x4 Coeffs
*
* @param[in] pi2_quant_coeff
* Scaling Matrix
*
* @param[out] pi2_dst
* Output 4x4 coefficients
*
* @param[in] qp_div
* Quantization parameter / 6
*
* @param[in] qp_rem
* Quantization parameter % 6
*
* @param[in] src_strd
* Input stride
*
* @param[in] dst_strd
* Output Stride
*
* @param[out] csbf
* coded sub block flag
*
* @param[in] csbf_strd
* coded sub block flag
*
* @param[out] zero_col
* zero column flag
*
* @param[out] zero_row
* zero column flag
*
* @returns cbf
* coded block flag
*
* @remarks
* None
*
*******************************************************************************
*/
WORD32 ihevc_quant_iquant_ssd
(
WORD16 *pi2_coeffs,
WORD16 *pi2_quant_coeff,
WORD16 *pi2_q_dst,
WORD16 *pi2_iq_dst,
WORD32 trans_size,
WORD32 qp_div,/* qpscaled / 6 */
WORD32 qp_rem,/* qpscaled % 6 */
WORD32 q_add,
WORD32 *pi4_quant_round_factor_0_1,
WORD32 *pi4_quant_round_factor_1_2,
WORD32 src_strd,
WORD32 dst_q_strd,
WORD32 dst_iq_strd,
UWORD8 *csbf,
WORD32 csbf_strd,
WORD32 *zero_col,
WORD32 *zero_row,
WORD16 *pi2_dequant_coeff,
LWORD64 *pi8_cost
)
{
WORD32 i, j;
WORD32 log2_size;
WORD16 *pi2_q_dst_orig;
WORD32 cbf = 0;
WORD32 bit_depth,shift_iq;
WORD32 val;
WORD16 i2_temp;
WORD32 ssd_cost = 0;
(void)pi4_quant_round_factor_0_1;
(void)pi4_quant_round_factor_1_2;
pi2_q_dst_orig = pi2_q_dst;
/* Quant initialization */
GETRANGE(log2_size, trans_size);
log2_size -= 1;
bit_depth = 8 + 0;
shift_iq = bit_depth + log2_size - 5;
for(i = 0; i < trans_size; i++)
{
for(j = 0; j < trans_size; j++)
{
/* Back up the coefficients before Quantization */
i2_temp = pi2_coeffs[j];
/* Quantization */
QUANT(pi2_q_dst[j], pi2_coeffs[j],
pi2_quant_coeff[j] * g_ihevc_quant_scales[qp_rem], qp_div,
log2_size, q_add);
/* Inverse Quantization */
IQUANT(pi2_iq_dst[j],
pi2_q_dst[j], /*pi2_src[index*src_strd]*/
pi2_dequant_coeff[j]*g_ihevc_iquant_scales[qp_rem],
/*pi2_dequant_coeff[index*trans_size] * g_ihevc_iquant_scales[qp_rem] */
shift_iq,
qp_div);
/* SSD Computation & Accumulation */
val = i2_temp - pi2_iq_dst[j];
ssd_cost += val*val;
}
pi2_q_dst += dst_q_strd;
pi2_iq_dst += dst_iq_strd;
pi2_quant_coeff += trans_size;
pi2_coeffs += src_strd;
pi2_dequant_coeff += trans_size;
}
/* Store the cost */
*pi8_cost = ssd_cost;
/* CSBF update */
{
WORD32 block_row, block_col;
WORD32 row, col;
WORD16 *pi2_block;
UWORD32 temp_zero_col = 0;
UWORD32 temp_zero_row = 0;
pi2_q_dst = pi2_q_dst_orig;
for(block_row = 0; block_row < trans_size; block_row += 4)
{
//block_col is incrementing by 1 for easy update of csbf pointer
for(block_col = 0; block_col < trans_size / 4; block_col++)
{
pi2_block = pi2_q_dst + block_row * dst_q_strd + block_col * 4;
*(csbf + block_col) = 0;
for(row = 0; row < 4; row++)
{
for(col = 0; col < 4; col++)
{
if(pi2_block[row * dst_q_strd + col] != 0)
{
*(csbf + block_col) = 1;
break;
}
}
if(*(csbf + block_col) == 1)
{
/* zero_col update *//* temp_zero_col = ~zero_col */
temp_zero_col = (temp_zero_col) | (0xFU << block_col * 4);
// zero col can be optimized further. Now clearing the
// entire 4 bits corresponding to 4 colums of 4x4 block
// even if any 4x4 csbf is set
/* zero row update */ /* temp_zero_row = ~zero_row */
temp_zero_row = (temp_zero_row) | (0xFU << block_row);
// zero row can be optimized further. Now clearing the
// entire 4 bits corresponding to 4 rows of 4x4 block
// even if any 4x4 csbf is set
break;
}
}
cbf = cbf || (*(csbf + block_col)); // cbf update
}
csbf += csbf_strd;
}
*zero_col = ~temp_zero_col; //final zero_col storing
*zero_row = ~temp_zero_row; //final zero_row storing
}
return cbf;
}
/**
*******************************************************************************
*
* @brief
* This function performs quantization, followed by Inverse
* quantization
*
* @par Description:
* Performs quantization on coeffs
*
* @param[in] pi2_coeffs
* 4x4 Coeffs
*
* @param[in] pi2_quant_coeff
* Scaling Matrix
*
* @param[out] pi2_dst
* Output 4x4 coefficients
*
* @param[in] qp_div
* Quantization parameter / 6
*
* @param[in] qp_rem
* Quantization parameter % 6
*
* @param[in] src_strd
* Input stride
*
* @param[in] dst_strd
* Output Stride
*
* @param[out] csbf
* coded sub block flag
*
* @param[in] csbf_strd
* coded sub block flag
*
* @param[out] zero_col
* zero column flag
*
* @param[out] zero_row
* zero column flag
*
* @returns cbf
* coded block flag
*
* @remarks
* None
*
*******************************************************************************
*/
WORD32 ihevc_quant_iquant
(
WORD16 *pi2_coeffs,
WORD16 *pi2_quant_coeff,
WORD16 *pi2_q_dst,
WORD16 *pi2_iq_dst,
WORD32 trans_size,
WORD32 qp_div,/* qpscaled / 6 */
WORD32 qp_rem,/* qpscaled % 6 */
WORD32 q_add,
WORD32 *pi4_quant_round_factor_0_1,
WORD32 *pi4_quant_round_factor_1_2,
WORD32 src_strd,
WORD32 dst_q_strd,
WORD32 dst_iq_strd,
UWORD8 *csbf,
WORD32 csbf_strd,
WORD32 *zero_col,
WORD32 *zero_row,
WORD16 *pi2_dequant_coeff,
LWORD64 *pi8_cost
)
{
WORD32 i, j;
WORD32 log2_size;
WORD16 *pi2_q_dst_orig;
WORD32 cbf = 0;
WORD32 bit_depth,shift_iq;
WORD16 i2_temp;
(void)pi8_cost;
(void)pi4_quant_round_factor_0_1;
(void)pi4_quant_round_factor_1_2;
pi2_q_dst_orig = pi2_q_dst;
/* Quant initialization */
GETRANGE(log2_size, trans_size);
log2_size -= 1;
bit_depth = 8;
shift_iq = bit_depth + log2_size - 5;
for(i = 0; i < trans_size; i++)
{
for(j = 0; j < trans_size; j++)
{
/* Back up the coefficients before Quantization */
i2_temp = pi2_coeffs[j];
/* Quantization */
QUANT(pi2_q_dst[j], pi2_coeffs[j],
pi2_quant_coeff[j] * g_ihevc_quant_scales[qp_rem], qp_div,
log2_size, q_add);
/* Inverse Quantization */
IQUANT(pi2_iq_dst[j],
pi2_q_dst[j], /*pi2_src[index*src_strd]*/
pi2_dequant_coeff[j]*g_ihevc_iquant_scales[qp_rem],
shift_iq,
qp_div);
}
pi2_q_dst += dst_q_strd;
pi2_iq_dst += dst_iq_strd;
pi2_quant_coeff += trans_size;
pi2_coeffs += src_strd;
pi2_dequant_coeff += trans_size;
}
/* CSBF update */
{
WORD32 block_row, block_col;
WORD32 row, col;
WORD16 *pi2_block;
UWORD32 temp_zero_col = 0;
UWORD32 temp_zero_row = 0;
pi2_q_dst = pi2_q_dst_orig;
for(block_row = 0; block_row < trans_size; block_row += 4)
{
//block_col is incrementing by 1 for easy update of csbf pointer
for(block_col = 0; block_col < trans_size / 4; block_col++)
{
pi2_block = pi2_q_dst + block_row * dst_q_strd + block_col * 4;
*(csbf + block_col) = 0;
for(row = 0; row < 4; row++)
{
for(col = 0; col < 4; col++)
{
if(pi2_block[row * dst_q_strd + col] != 0)
{
*(csbf + block_col) = 1;
break;
}
}
if(*(csbf + block_col) == 1)
{
/* zero_col update *//* temp_zero_col = ~zero_col */
temp_zero_col = (temp_zero_col) | (0xFU << block_col * 4);
// zero col can be optimized further. Now clearing the
// entire 4 bits corresponding to 4 colums of 4x4 block
// even if any 4x4 csbf is set
/* zero row update */ /* temp_zero_row = ~zero_row */
temp_zero_row = (temp_zero_row) | (0xFU << block_row);
// zero row can be optimized further. Now clearing the
// entire 4 bits corresponding to 4 rows of 4x4 block
// even if any 4x4 csbf is set
break;
}
}
cbf = cbf || (*(csbf + block_col)); // cbf update
}
csbf += csbf_strd;
}
*zero_col = ~temp_zero_col; //final zero_col storing
*zero_row = ~temp_zero_row; //final zero_row storing
}
return cbf;
}
/**
*******************************************************************************
*
* @brief
* This function performs quantization, followed by Inverse
* quantization to find transform domain SSD
*
* @par Description:
* Performs quantization on coeffs
*
* @param[in] pi2_coeffs
* 4x4 Coeffs
*
* @param[in] pi2_quant_coeff
* Scaling Matrix
*
* @param[out] pi2_dst
* Output 4x4 coefficients
*
* @param[in] qp_div
* Quantization parameter / 6
*
* @param[in] qp_rem
* Quantization parameter % 6
*
* @param[in] src_strd
* Input stride
*
* @param[in] dst_strd
* Output Stride
*
* @param[out] csbf
* coded sub block flag
*
* @param[in] csbf_strd
* coded sub block flag
*
* @param[out] zero_col
* zero column flag
*
* @param[out] zero_row
* zero column flag
*
* @returns cbf
* coded block flag
*
* @remarks
* None
*
*******************************************************************************
*/
WORD32 ihevc_quant_iquant_ssd_rdoq
(
WORD16 *pi2_coeffs,
WORD16 *pi2_quant_coeff,
WORD16 *pi2_q_dst,
WORD16 *pi2_iq_dst,
WORD32 trans_size,
WORD32 qp_div,/* qpscaled / 6 */
WORD32 qp_rem,/* qpscaled % 6 */
WORD32 q_add,
WORD32 *pi4_quant_round_factor_0_1,
WORD32 *pi4_quant_round_factor_1_2,
WORD32 src_strd,
WORD32 dst_q_strd,
WORD32 dst_iq_strd,
UWORD8 *csbf,
WORD32 csbf_strd,
WORD32 *zero_col,
WORD32 *zero_row,
WORD16 *pi2_dequant_coeff,
LWORD64 *pi8_cost
)
{
WORD32 i, j;
WORD32 log2_size;
WORD16 *pi2_q_dst_orig;
WORD32 cbf = 0;
WORD32 bit_depth,shift_iq;
WORD32 val;
WORD16 i2_temp;
WORD32 ssd_cost = 0;
(void)pi4_quant_round_factor_0_1;
(void)pi4_quant_round_factor_1_2;
pi2_q_dst_orig = pi2_q_dst;
GETRANGE(log2_size, trans_size);
log2_size -= 1;
bit_depth = 8 + 0;
shift_iq = bit_depth + log2_size - 5;
for(i = 0; i < trans_size; i++)
{
for(j = 0; j < trans_size; j++)
{
/* Back up the coefficients before Quantization */
i2_temp = pi2_coeffs[j];
/* Quantization */
QUANT(pi2_q_dst[j], pi2_coeffs[j],
pi2_quant_coeff[j] * g_ihevc_quant_scales[qp_rem], qp_div,
log2_size, q_add);
if (abs(pi2_q_dst[j]) > 1)
{
QUANT(pi2_q_dst[j],i2_temp,
pi2_quant_coeff[j] * g_ihevc_quant_scales[qp_rem], qp_div,
log2_size, ((1 << QUANT_ROUND_FACTOR_Q)/2));
}
/* Inverse Quantization */
IQUANT(pi2_iq_dst[j],
pi2_q_dst[j], /*pi2_src[index*src_strd]*/
pi2_dequant_coeff[j]*g_ihevc_iquant_scales[qp_rem],
/*pi2_dequant_coeff[index*trans_size] * g_ihevc_iquant_scales[qp_rem] */
shift_iq,
qp_div);
/* SSD Computation & Accumulation */
val = i2_temp - pi2_iq_dst[j];
ssd_cost += val*val;
}
pi2_q_dst += dst_q_strd;
pi2_iq_dst += dst_iq_strd;
pi2_quant_coeff += trans_size;
pi2_coeffs += src_strd;
pi2_dequant_coeff += trans_size;
}
/* Store the cost */
*pi8_cost = ssd_cost;
/* CSBF update */
{
WORD32 block_row, block_col;
WORD32 row, col;
WORD16 *pi2_block;
UWORD32 temp_zero_col = 0;
UWORD32 temp_zero_row = 0;
pi2_q_dst = pi2_q_dst_orig;
for(block_row = 0; block_row < trans_size; block_row += 4)
{
//block_col is incrementing by 1 for easy update of csbf pointer
for(block_col = 0; block_col < trans_size / 4; block_col++)
{
pi2_block = pi2_q_dst + block_row * dst_q_strd + block_col * 4;
*(csbf + block_col) = 0;
for(row = 0; row < 4; row++)
{
for(col = 0; col < 4; col++)
{
if(pi2_block[row * dst_q_strd + col] != 0)
{
*(csbf + block_col) = 1;
break;
}
}
if(*(csbf + block_col) == 1)
{
/* zero_col update *//* temp_zero_col = ~zero_col */
temp_zero_col = (temp_zero_col) | (0xFU << block_col * 4);
// zero col can be optimized further. Now clearing the
// entire 4 bits corresponding to 4 colums of 4x4 block
// even if any 4x4 csbf is set
/* zero row update */ /* temp_zero_row = ~zero_row */
temp_zero_row = (temp_zero_row) | (0xFU << block_row);
// zero row can be optimized further. Now clearing the
// entire 4 bits corresponding to 4 rows of 4x4 block
// even if any 4x4 csbf is set
break;
}
}
cbf = cbf || (*(csbf + block_col)); // cbf update
}
csbf += csbf_strd;
}
*zero_col = ~temp_zero_col; //final zero_col storing
*zero_row = ~temp_zero_row; //final zero_row storing
}
return cbf;
}
WORD32 ihevc_quant_iquant_rdoq
(
WORD16 *pi2_coeffs,
WORD16 *pi2_quant_coeff,
WORD16 *pi2_q_dst,
WORD16 *pi2_iq_dst,
WORD32 trans_size,
WORD32 qp_div,/* qpscaled / 6 */
WORD32 qp_rem,/* qpscaled % 6 */
WORD32 q_add,
WORD32 *pi4_quant_round_factor_0_1,
WORD32 *pi4_quant_round_factor_1_2,
WORD32 src_strd,
WORD32 dst_q_strd,
WORD32 dst_iq_strd,
UWORD8 *csbf,
WORD32 csbf_strd,
WORD32 *zero_col,
WORD32 *zero_row,
WORD16 *pi2_dequant_coeff,
LWORD64 *pi8_cost
)
{
WORD32 i, j;
WORD32 log2_size;
WORD16 *pi2_q_dst_orig;
WORD32 cbf = 0;
WORD32 bit_depth,shift_iq;
WORD16 i2_temp;
(void)pi8_cost;
(void)pi4_quant_round_factor_0_1;
(void)pi4_quant_round_factor_1_2;
pi2_q_dst_orig = pi2_q_dst;
GETRANGE(log2_size, trans_size);
log2_size -= 1;
bit_depth = 8 + 0;
shift_iq = bit_depth + log2_size - 5;
for(i = 0; i < trans_size; i++)
{
for(j = 0; j < trans_size; j++)
{
/* Back up the coefficients before Quantization */
i2_temp = pi2_coeffs[j];
/* Quantization */
QUANT(pi2_q_dst[j], pi2_coeffs[j],
pi2_quant_coeff[j] * g_ihevc_quant_scales[qp_rem], qp_div,
log2_size, q_add);
if (abs(pi2_q_dst[j]) > 1)
{
QUANT(pi2_q_dst[j],i2_temp,
pi2_quant_coeff[j] * g_ihevc_quant_scales[qp_rem], qp_div,
log2_size, ((1 << QUANT_ROUND_FACTOR_Q)/2));
}
/* Inverse Quantization */
IQUANT(pi2_iq_dst[j],
pi2_q_dst[j], /*pi2_src[index*src_strd]*/
pi2_dequant_coeff[j]*g_ihevc_iquant_scales[qp_rem],
shift_iq,
qp_div);
}
pi2_q_dst += dst_q_strd;
pi2_iq_dst += dst_iq_strd;
pi2_quant_coeff += trans_size;
pi2_coeffs += src_strd;
pi2_dequant_coeff += trans_size;
}
/* CSBF update */
{
WORD32 block_row, block_col;
WORD32 row, col;
WORD16 *pi2_block;
UWORD32 temp_zero_col = 0;
UWORD32 temp_zero_row = 0;
pi2_q_dst = pi2_q_dst_orig;
for(block_row = 0; block_row < trans_size; block_row += 4)
{
//block_col is incrementing by 1 for easy update of csbf pointer
for(block_col = 0; block_col < trans_size / 4; block_col++)
{
pi2_block = pi2_q_dst + block_row * dst_q_strd + block_col * 4;
*(csbf + block_col) = 0;
for(row = 0; row < 4; row++)
{
for(col = 0; col < 4; col++)
{
if(pi2_block[row * dst_q_strd + col] != 0)
{
*(csbf + block_col) = 1;
break;
}
}
if(*(csbf + block_col) == 1)
{
/* zero_col update *//* temp_zero_col = ~zero_col */
temp_zero_col = (temp_zero_col) | (0xFU << block_col * 4);
// zero col can be optimized further. Now clearing the
// entire 4 bits corresponding to 4 colums of 4x4 block
// even if any 4x4 csbf is set
/* zero row update */ /* temp_zero_row = ~zero_row */
temp_zero_row = (temp_zero_row) | (0xFU << block_row);
// zero row can be optimized further. Now clearing the
// entire 4 bits corresponding to 4 rows of 4x4 block
// even if any 4x4 csbf is set
break;
}
}
cbf = cbf || (*(csbf + block_col)); // cbf update
}
csbf += csbf_strd;
}
*zero_col = ~temp_zero_col; //final zero_col storing
*zero_row = ~temp_zero_row; //final zero_row storing
}
return cbf;
}
/**
*******************************************************************************
*
* @brief
* This function performs quantization(using flat scale matrix), followed by
* inverse quantization to find transform domain SSD
*
* @par Description:
* Performs quantization on coeffs
*
* @param[in] pi2_coeffs
* 4x4 Coeffs
*
* @param[in] pi2_quant_coeff
* Scaling Matrix
*
* @param[out] pi2_dst
* Output 4x4 coefficients
*
* @param[in] qp_div
* Quantization parameter / 6
*
* @param[in] qp_rem
* Quantization parameter % 6
*
* @param[in] src_strd
* Input stride
*
* @param[in] dst_strd
* Output Stride
*
* @param[out] csbf
* coded sub block flag
*
* @param[in] csbf_strd
* coded sub block flag
*
* @param[out] zero_col
* zero column flag
*
* @param[out] zero_row
* zero column flag
*
* @returns cbf
* coded block flag
*
* @remarks
* None
*
*******************************************************************************
*/
WORD32 ihevc_quant_iquant_ssd_flat_scale_mat
(
WORD16 *pi2_coeffs,
WORD16 *pi2_quant_coeff,
WORD16 *pi2_q_dst,
WORD16 *pi2_iq_dst,
WORD32 trans_size,
WORD32 qp_div,/* qpscaled / 6 */
WORD32 qp_rem,/* qpscaled % 6 */
WORD32 q_add,
WORD32 *pi4_quant_round_factor_0_1,
WORD32 *pi4_quant_round_factor_1_2,
WORD32 src_strd,
WORD32 dst_q_strd,
WORD32 dst_iq_strd,
UWORD8 *csbf,
WORD32 csbf_strd,
WORD32 *zero_col,
WORD32 *zero_row,
WORD16 *pi2_dequant_coeff,
LWORD64 *pi8_cost
)
{
WORD32 i, j;
WORD32 log2_size;
WORD16 *pi2_q_dst_orig;
WORD32 cbf = 0;
WORD32 bit_depth,shift_iq;
WORD32 val;
WORD16 i2_temp;
/* Initialize cost to zero */
WORD32 ssd_cost = 0;
(void)pi4_quant_round_factor_0_1;
(void)pi4_quant_round_factor_1_2;
pi2_q_dst_orig = pi2_q_dst;
/* Quant initialization */
GETRANGE(log2_size, trans_size);
log2_size -= 1;
bit_depth = 8 + 0;
shift_iq = bit_depth + log2_size - 5;
for(i = 0; i < trans_size; i++)
{
for(j = 0; j < trans_size; j++)
{
/* Back up the coefficients before Quantization */
i2_temp = pi2_coeffs[j];
/*QUANT(pi2_dst[j], pi2_coeffs[j],
pi2_quant_coeff[j] * g_ihevc_quant_scales[qp_rem], qp_div,
log2_size, q_add);*/
/* modified by 1028 */
/* Quantization */
QUANT_NO_WEIGHTMAT(pi2_q_dst[j], pi2_coeffs[j],
g_ihevc_quant_scales[qp_rem], qp_div,
log2_size, q_add);
if(pi2_q_dst[j] == 0)
{
pi2_iq_dst[j] = 0;
}
else
{
/* Inverse Quantization */
IQUANT(pi2_iq_dst[j],
pi2_q_dst[j], /*pi2_src[index*src_strd]*/
pi2_dequant_coeff[j]*g_ihevc_iquant_scales[qp_rem], /*pi2_dequant_coeff[index*trans_size] * g_ihevc_iquant_scales[qp_rem] */
shift_iq,
qp_div);
}
/* SSD Computation & Accumulation */
val = i2_temp - pi2_iq_dst[j];
ssd_cost += val*val;
}
pi2_q_dst += dst_q_strd;
pi2_iq_dst += dst_iq_strd;
pi2_quant_coeff += trans_size;
pi2_coeffs += src_strd;
pi2_dequant_coeff += trans_size;
}
/* Store the cost */
*pi8_cost = ssd_cost;
/* CSBF update */
{
WORD32 block_row, block_col;
WORD32 row, col;
WORD16 *pi2_block;
UWORD32 temp_zero_col = 0;
UWORD32 temp_zero_row = 0;
pi2_q_dst = pi2_q_dst_orig;
for(block_row = 0; block_row < trans_size; block_row += 4)
{
//block_col is incrementing by 1 for easy update of csbf pointer
for(block_col = 0; block_col < trans_size / 4; block_col++)
{
pi2_block = pi2_q_dst + block_row * dst_q_strd + block_col * 4;
*(csbf + block_col) = 0;
for(row = 0; row < 4; row++)
{
for(col = 0; col < 4; col++)
{
if(pi2_block[row * dst_q_strd + col] != 0)
{
*(csbf + block_col) = 1;
break;
}
}
if(*(csbf + block_col) == 1)
{
/* zero_col update *//* temp_zero_col = ~zero_col */
temp_zero_col = (temp_zero_col) | (0xFU << block_col * 4);
// zero col can be optimized further. Now clearing the
// entire 4 bits corresponding to 4 colums of 4x4 block
// even if any 4x4 csbf is set
/* zero row update */ /* temp_zero_row = ~zero_row */
temp_zero_row = (temp_zero_row) | (0xFU << block_row);
// zero row can be optimized further. Now clearing the
// entire 4 bits corresponding to 4 rows of 4x4 block
// even if any 4x4 csbf is set
break;
}
}
cbf = cbf || (*(csbf + block_col)); // cbf update
}
csbf += csbf_strd;
}
*zero_col = ~temp_zero_col; //final zero_col storing
*zero_row = ~temp_zero_row; //final zero_row storing
}
return cbf;
}
WORD32 ihevc_quant_iquant_flat_scale_mat
(
WORD16 *pi2_coeffs,
WORD16 *pi2_quant_coeff,
WORD16 *pi2_q_dst,
WORD16 *pi2_iq_dst,
WORD32 trans_size,
WORD32 qp_div,/* qpscaled / 6 */
WORD32 qp_rem,/* qpscaled % 6 */
WORD32 q_add,
WORD32 *pi4_quant_round_factor_0_1,
WORD32 *pi4_quant_round_factor_1_2,
WORD32 src_strd,
WORD32 dst_q_strd,
WORD32 dst_iq_strd,
UWORD8 *csbf,
WORD32 csbf_strd,
WORD32 *zero_col,
WORD32 *zero_row,
WORD16 *pi2_dequant_coeff,
LWORD64 *pi8_cost
)
{
WORD32 i, j;
WORD32 log2_size;
WORD16 *pi2_q_dst_orig;
WORD32 cbf = 0;
WORD32 bit_depth,shift_iq;
WORD16 i2_temp;
(void)pi8_cost;
(void)pi4_quant_round_factor_0_1;
(void)pi4_quant_round_factor_1_2;
pi2_q_dst_orig = pi2_q_dst;
/* Quant initialization */
GETRANGE(log2_size, trans_size);
log2_size -= 1;
bit_depth = 8 + 0;
shift_iq = bit_depth + log2_size - 5;
for(i = 0; i < trans_size; i++)
{
for(j = 0; j < trans_size; j++)
{
/* Back up the coefficients before Quantization */
i2_temp = pi2_coeffs[j];
/* Quantization */
QUANT_NO_WEIGHTMAT(pi2_q_dst[j], pi2_coeffs[j],
g_ihevc_quant_scales[qp_rem], qp_div,
log2_size, q_add);
if(pi2_q_dst[j] == 0)
{
pi2_iq_dst[j] = 0;
}
else
{
/* Inverse Quantization */
IQUANT(pi2_iq_dst[j],
pi2_q_dst[j], /*pi2_src[index*src_strd]*/
pi2_dequant_coeff[j]*g_ihevc_iquant_scales[qp_rem], /*pi2_dequant_coeff[index*trans_size] * g_ihevc_iquant_scales[qp_rem] */
shift_iq,
qp_div);
}
}
pi2_q_dst += dst_q_strd;
pi2_iq_dst += dst_iq_strd;
pi2_quant_coeff += trans_size;
pi2_coeffs += src_strd;
pi2_dequant_coeff += trans_size;
}
/* CSBF update */
{
WORD32 block_row, block_col;
WORD32 row, col;
WORD16 *pi2_block;
UWORD32 temp_zero_col = 0;
UWORD32 temp_zero_row = 0;
pi2_q_dst = pi2_q_dst_orig;
for(block_row = 0; block_row < trans_size; block_row += 4)
{
//block_col is incrementing by 1 for easy update of csbf pointer
for(block_col = 0; block_col < trans_size / 4; block_col++)
{
pi2_block = pi2_q_dst + block_row * dst_q_strd + block_col * 4;
*(csbf + block_col) = 0;
for(row = 0; row < 4; row++)
{
for(col = 0; col < 4; col++)
{
if(pi2_block[row * dst_q_strd + col] != 0)
{
*(csbf + block_col) = 1;
break;
}
}
if(*(csbf + block_col) == 1)
{
/* zero_col update *//* temp_zero_col = ~zero_col */
temp_zero_col = (temp_zero_col) | (0xFU << block_col * 4);
// zero col can be optimized further. Now clearing the
// entire 4 bits corresponding to 4 colums of 4x4 block
// even if any 4x4 csbf is set
/* zero row update */ /* temp_zero_row = ~zero_row */
temp_zero_row = (temp_zero_row) | (0xFU << block_row);
// zero row can be optimized further. Now clearing the
// entire 4 bits corresponding to 4 rows of 4x4 block
// even if any 4x4 csbf is set
break;
}
}
cbf = cbf || (*(csbf + block_col)); // cbf update
}
csbf += csbf_strd;
}
*zero_col = ~temp_zero_col; //final zero_col storing
*zero_row = ~temp_zero_row; //final zero_row storing
}
return cbf;
}
/**
*******************************************************************************
*
* @brief
* This function performs quantization(using flat scale matrix), followed by
* inverse quantization to find transform domain SSD; when we perform RDOQ.
* In case the quantized value turns out to be grater than 1, we then requantize
* use half rounding.
*
* @par Description:
* Performs quantization on coeffs
*
* @param[in] pi2_coeffs
* 4x4 Coeffs
*
* @param[in] pi2_quant_coeff
* Scaling Matrix
*
* @param[out] pi2_dst
* Output 4x4 coefficients
*
* @param[in] qp_div
* Quantization parameter / 6
*
* @param[in] qp_rem
* Quantization parameter % 6
*
* @param[in] src_strd
* Input stride
*
* @param[in] dst_strd
* Output Stride
*
* @param[out] csbf
* coded sub block flag
*
* @param[in] csbf_strd
* coded sub block flag
*
* @param[out] zero_col
* zero column flag
*
* @param[out] zero_row
* zero column flag
*
* @returns cbf
* coded block flag
*
* @remarks
* None
*
*******************************************************************************
*/
WORD32 ihevc_quant_iquant_ssd_flat_scale_mat_rdoq
(
WORD16 *pi2_coeffs,
WORD16 *pi2_quant_coeff,
WORD16 *pi2_q_dst,
WORD16 *pi2_iq_dst,
WORD32 trans_size,
WORD32 qp_div,/* qpscaled / 6 */
WORD32 qp_rem,/* qpscaled % 6 */
WORD32 q_add,
WORD32 *pi4_quant_round_factor_0_1,
WORD32 *pi4_quant_round_factor_1_2,
WORD32 src_strd,
WORD32 dst_q_strd,
WORD32 dst_iq_strd,
UWORD8 *csbf,
WORD32 csbf_strd,
WORD32 *zero_col,
WORD32 *zero_row,
WORD16 *pi2_dequant_coeff,
LWORD64 *pi8_cost
)
{
WORD32 i, j;
WORD32 log2_size;
WORD16 *pi2_q_dst_orig;
WORD32 cbf = 0;
WORD32 bit_depth,shift_iq;
WORD32 val;
WORD16 i2_temp;
/* Initialize cost to zero */
WORD32 ssd_cost = 0;
(void)pi4_quant_round_factor_0_1;
(void)pi4_quant_round_factor_1_2;
pi2_q_dst_orig = pi2_q_dst;
/* Quant initialization */
GETRANGE(log2_size, trans_size);
log2_size -= 1;
bit_depth = 8 + 0;
shift_iq = bit_depth + log2_size - 5;
for(i = 0; i < trans_size; i++)
{
for(j = 0; j < trans_size; j++)
{
WORD16 i2_temp1;
/* Back up the coefficients before Quantization */
i2_temp = pi2_coeffs[j];
/*QUANT(pi2_dst[j], pi2_coeffs[j],
pi2_quant_coeff[j] * g_ihevc_quant_scales[qp_rem], qp_div,
log2_size, q_add);*/
/* modified by 1028 */
/* Quantization */
if (1)
{
QUANT_NO_WEIGHTMAT(pi2_q_dst[j], pi2_coeffs[j],
g_ihevc_quant_scales[qp_rem], qp_div,
log2_size, q_add);
}
else
{ \
WORD16 inp = pi2_coeffs[j],out = pi2_q_dst[j];
WORD32 quant_coeff = g_ihevc_quant_scales[qp_rem];
WORD32 log2_trans_size = log2_size;
WORD32 tmp; \
WORD32 sign; \
WORD32 bit_depth,transform_shift; \
WORD32 q_bits, quant_multiplier; \
\
/* q_bits and q_add calculation*/ \
/* To be moved outside in neon. To be computer once per transform call */ \
bit_depth = 8; \
transform_shift = MAX_TR_DYNAMIC_RANGE - bit_depth - log2_trans_size; \
quant_multiplier = 4 ; /* because quant_coeff are multiplied by 16. Instead of multiplying, we can reduce the division factor q_bits by 4 */ \
q_bits = QUANT_SHIFT + qp_div + transform_shift + SCALING_Q_SHIFT - quant_multiplier - FLAT_RESCALE_MAT_Q_SHIFT /* 2048 */; \
\
sign = (inp)<0 ? -1:1; \
\
tmp = (WORD32)(abs(inp)); \
tmp = tmp * (quant_coeff); \
tmp = tmp + (((WORD32)q_add) << (q_bits - QUANT_ROUND_FACTOR_Q)); \
tmp = tmp >> q_bits; \
\
tmp = tmp * sign; \
out = (WORD16) CLIP_S16(tmp); \
}
i2_temp1 = pi2_q_dst[j];
if (abs(pi2_q_dst[j]) > 1)
{
QUANT_NO_WEIGHTMAT(pi2_q_dst[j], i2_temp,
g_ihevc_quant_scales[qp_rem], qp_div,
log2_size, ((1 << QUANT_ROUND_FACTOR_Q)/2));
}
ASSERT(abs(i2_temp1-pi2_q_dst[j]) <= 1);
ASSERT(abs(i2_temp1) <= abs(pi2_q_dst[j]));
/* Inverse Quantization */
IQUANT(pi2_iq_dst[j],
pi2_q_dst[j], /*pi2_src[index*src_strd]*/
pi2_dequant_coeff[j]*g_ihevc_iquant_scales[qp_rem], /*pi2_dequant_coeff[index*trans_size] * g_ihevc_iquant_scales[qp_rem] */
shift_iq,
qp_div);
/* SSD Computation & Accumulation */
val = i2_temp - pi2_iq_dst[j];
ssd_cost += val*val;
}
pi2_q_dst += dst_q_strd;
pi2_iq_dst += dst_iq_strd;
pi2_quant_coeff += trans_size;
pi2_coeffs += src_strd;
pi2_dequant_coeff += trans_size;
}
/* Store the cost */
*pi8_cost = ssd_cost;
/* CSBF update */
{
WORD32 block_row, block_col;
WORD32 row, col;
WORD16 *pi2_block;
UWORD32 temp_zero_col = 0;
UWORD32 temp_zero_row = 0;
pi2_q_dst = pi2_q_dst_orig;
for(block_row = 0; block_row < trans_size; block_row += 4)
{
//block_col is incrementing by 1 for easy update of csbf pointer
for(block_col = 0; block_col < trans_size / 4; block_col++)
{
pi2_block = pi2_q_dst + block_row * dst_q_strd + block_col * 4;
*(csbf + block_col) = 0;
for(row = 0; row < 4; row++)
{
for(col = 0; col < 4; col++)
{
if(pi2_block[row * dst_q_strd + col] != 0)
{
*(csbf + block_col) = 1;
break;
}
}
if(*(csbf + block_col) == 1)
{
/* zero_col update *//* temp_zero_col = ~zero_col */
temp_zero_col = (temp_zero_col) | (0xFU << block_col * 4);
// zero col can be optimized further. Now clearing the
// entire 4 bits corresponding to 4 colums of 4x4 block
// even if any 4x4 csbf is set
/* zero row update */ /* temp_zero_row = ~zero_row */
temp_zero_row = (temp_zero_row) | (0xFU << block_row);
// zero row can be optimized further. Now clearing the
// entire 4 bits corresponding to 4 rows of 4x4 block
// even if any 4x4 csbf is set
break;
}
}
cbf = cbf || (*(csbf + block_col)); // cbf update
}
csbf += csbf_strd;
}
*zero_col = ~temp_zero_col; //final zero_col storing
*zero_row = ~temp_zero_row; //final zero_row storing
}
return cbf;
}
WORD32 ihevc_quant_iquant_flat_scale_mat_rdoq
(
WORD16 *pi2_coeffs,
WORD16 *pi2_quant_coeff,
WORD16 *pi2_q_dst,
WORD16 *pi2_iq_dst,
WORD32 trans_size,
WORD32 qp_div,/* qpscaled / 6 */
WORD32 qp_rem,/* qpscaled % 6 */
WORD32 q_add,
WORD32 *pi4_quant_round_factor_0_1,
WORD32 *pi4_quant_round_factor_1_2,
WORD32 src_strd,
WORD32 dst_q_strd,
WORD32 dst_iq_strd,
UWORD8 *csbf,
WORD32 csbf_strd,
WORD32 *zero_col,
WORD32 *zero_row,
WORD16 *pi2_dequant_coeff,
LWORD64 *pi8_cost
)
{
WORD32 i, j;
WORD32 log2_size;
WORD16 *pi2_q_dst_orig;
WORD32 cbf = 0;
WORD32 bit_depth,shift_iq;
WORD16 i2_temp;
(void)pi8_cost;
(void)pi4_quant_round_factor_0_1;
(void)pi4_quant_round_factor_1_2;
pi2_q_dst_orig = pi2_q_dst;
/* Quant initialization */
GETRANGE(log2_size, trans_size);
log2_size -= 1;
bit_depth = 8 + 0;
shift_iq = bit_depth + log2_size - 5;
for(i = 0; i < trans_size; i++)
{
for(j = 0; j < trans_size; j++)
{
WORD16 i2_temp1;
/* Back up the coefficients before Quantization */
i2_temp = pi2_coeffs[j];
QUANT_NO_WEIGHTMAT(pi2_q_dst[j], pi2_coeffs[j],
g_ihevc_quant_scales[qp_rem], qp_div,
log2_size, q_add);
i2_temp1 = pi2_q_dst[j];
if (abs(pi2_q_dst[j]) > 1)
{
QUANT_NO_WEIGHTMAT(pi2_q_dst[j], i2_temp,
g_ihevc_quant_scales[qp_rem], qp_div,
log2_size, ((1 << QUANT_ROUND_FACTOR_Q)/2));
}
ASSERT(abs(i2_temp1-pi2_q_dst[j]) <= 1);
ASSERT(abs(i2_temp1) <= abs(pi2_q_dst[j]));
IQUANT(pi2_iq_dst[j],
pi2_q_dst[j], /*pi2_src[index*src_strd]*/
pi2_dequant_coeff[j]*g_ihevc_iquant_scales[qp_rem], /*pi2_dequant_coeff[index*trans_size] * g_ihevc_iquant_scales[qp_rem] */
shift_iq,
qp_div);
}
pi2_q_dst += dst_q_strd;
pi2_iq_dst += dst_iq_strd;
pi2_quant_coeff += trans_size;
pi2_coeffs += src_strd;
pi2_dequant_coeff += trans_size;
}
/* CSBF update */
{
WORD32 block_row, block_col;
WORD32 row, col;
WORD16 *pi2_block;
UWORD32 temp_zero_col = 0;
UWORD32 temp_zero_row = 0;
pi2_q_dst = pi2_q_dst_orig;
for(block_row = 0; block_row < trans_size; block_row += 4)
{
//block_col is incrementing by 1 for easy update of csbf pointer
for(block_col = 0; block_col < trans_size / 4; block_col++)
{
pi2_block = pi2_q_dst + block_row * dst_q_strd + block_col * 4;
*(csbf + block_col) = 0;
for(row = 0; row < 4; row++)
{
for(col = 0; col < 4; col++)
{
if(pi2_block[row * dst_q_strd + col] != 0)
{
*(csbf + block_col) = 1;
break;
}
}
if(*(csbf + block_col) == 1)
{
/* zero_col update *//* temp_zero_col = ~zero_col */
temp_zero_col = (temp_zero_col) | (0xFU << block_col * 4);
// zero col can be optimized further. Now clearing the
// entire 4 bits corresponding to 4 colums of 4x4 block
// even if any 4x4 csbf is set
/* zero row update */ /* temp_zero_row = ~zero_row */
temp_zero_row = (temp_zero_row) | (0xFU << block_row);
// zero row can be optimized further. Now clearing the
// entire 4 bits corresponding to 4 rows of 4x4 block
// even if any 4x4 csbf is set
break;
}
}
cbf = cbf || (*(csbf + block_col)); // cbf update
}
csbf += csbf_strd;
}
*zero_col = ~temp_zero_col; //final zero_col storing
*zero_row = ~temp_zero_row; //final zero_row storing
}
return cbf;
}
/**
*******************************************************************************
*
* @brief
* This function performs quantization, followed by Inverse
* quantization to find transform domain SSD
*
* @par Description:
* Performs quantization on coeffs
*
* @param[in] pi2_coeffs
* 4x4 Coeffs
*
* @param[in] pi2_quant_coeff
* Scaling Matrix
*
* @param[out] pi2_dst
* Output 4x4 coefficients
*
* @param[in] qp_div
* Quantization parameter / 6
*
* @param[in] qp_rem
* Quantization parameter % 6
*
* @param[in] src_strd
* Input stride
*
* @param[in] dst_strd
* Output Stride
*
* @param[out] csbf
* coded sub block flag
*
* @param[in] csbf_strd
* coded sub block flag
*
* @param[out] zero_col
* zero column flag
*
* @param[out] zero_row
* zero column flag
*
* @returns cbf
* coded block flag
*
* @remarks
* None
*
*******************************************************************************
*/
WORD32 ihevc_q_iq_ssd_var_rnd_fact
(
WORD16 *pi2_coeffs,
WORD16 *pi2_quant_coeff,
WORD16 *pi2_q_dst,
WORD16 *pi2_iq_dst,
WORD32 trans_size,
WORD32 qp_div,/* qpscaled / 6 */
WORD32 qp_rem,/* qpscaled % 6 */
WORD32 q_add,
WORD32 *pi4_quant_round_factor_0_1,
WORD32 *pi4_quant_round_factor_1_2,
WORD32 src_strd,
WORD32 dst_q_strd,
WORD32 dst_iq_strd,
UWORD8 *csbf,
WORD32 csbf_strd,
WORD32 *zero_col,
WORD32 *zero_row,
WORD16 *pi2_dequant_coeff,
LWORD64 *pi8_cost
)
{
WORD32 i, j;
WORD32 log2_size;
WORD16 *pi2_q_dst_orig;
WORD32 cbf = 0;
WORD32 bit_depth,shift_iq;
WORD32 val;
WORD16 i2_temp;
//WORD16 i2_temp_1;
/* Initialize cost to zero */
WORD32 ssd_cost = 0;
(void)q_add;
pi2_q_dst_orig = pi2_q_dst;
/* Quant initialization */
GETRANGE(log2_size, trans_size);
log2_size -= 1;
bit_depth = 8 + 0;
shift_iq = bit_depth + log2_size - 5;
for(i = 0; i < trans_size; i++)
{
for(j = 0; j < trans_size; j++)
{
/* Back up the coefficients before Quantization */
i2_temp = pi2_coeffs[j];
{
/* Quantization */
QUANT(pi2_q_dst[j],i2_temp,
pi2_quant_coeff[j] * g_ihevc_quant_scales[qp_rem], qp_div,
log2_size, 0);
if (abs(pi2_q_dst[j]) >= 2)
{
QUANT(pi2_q_dst[j],i2_temp,
pi2_quant_coeff[j] * g_ihevc_quant_scales[qp_rem], qp_div,
log2_size, ((1 << QUANT_ROUND_FACTOR_Q)/2));
}
else if (abs(pi2_q_dst[j]) >= 1)
{
QUANT(pi2_q_dst[j],i2_temp,
pi2_quant_coeff[j] * g_ihevc_quant_scales[qp_rem], qp_div,
log2_size, *pi4_quant_round_factor_1_2);
}
else
{
/* Quantization */
QUANT(pi2_q_dst[j],i2_temp,
pi2_quant_coeff[j] * g_ihevc_quant_scales[qp_rem], qp_div,
log2_size, *pi4_quant_round_factor_0_1);
}
}
/* Inverse Quantization */
IQUANT(pi2_iq_dst[j],
pi2_q_dst[j], /*pi2_src[index*src_strd]*/
pi2_dequant_coeff[j]*g_ihevc_iquant_scales[qp_rem],
/*pi2_dequant_coeff[index*trans_size] * g_ihevc_iquant_scales[qp_rem] */
shift_iq,
qp_div);
/* SSD Computation & Accumulation */
val = i2_temp - pi2_iq_dst[j];
ssd_cost += val*val;
pi4_quant_round_factor_0_1++;
pi4_quant_round_factor_1_2++;
}
pi2_q_dst += dst_q_strd;
pi2_iq_dst += dst_iq_strd;
pi2_quant_coeff += trans_size;
pi2_coeffs += src_strd;
pi2_dequant_coeff += trans_size;
}
/* Store the cost */
*pi8_cost = ssd_cost;
/* CSBF update */
{
WORD32 block_row, block_col;
WORD32 row, col;
WORD16 *pi2_block;
UWORD32 temp_zero_col = 0;
UWORD32 temp_zero_row = 0;
pi2_q_dst = pi2_q_dst_orig;
for(block_row = 0; block_row < trans_size; block_row += 4)
{
//block_col is incrementing by 1 for easy update of csbf pointer
for(block_col = 0; block_col < trans_size / 4; block_col++)
{
pi2_block = pi2_q_dst + block_row * dst_q_strd + block_col * 4;
*(csbf + block_col) = 0;
for(row = 0; row < 4; row++)
{
for(col = 0; col < 4; col++)
{
if(pi2_block[row * dst_q_strd + col] != 0)
{
*(csbf + block_col) = 1;
break;
}
}
if(*(csbf + block_col) == 1)
{
/* zero_col update *//* temp_zero_col = ~zero_col */
temp_zero_col = (temp_zero_col) | (0xFU << block_col * 4);
// zero col can be optimized further. Now clearing the
// entire 4 bits corresponding to 4 colums of 4x4 block
// even if any 4x4 csbf is set
/* zero row update */ /* temp_zero_row = ~zero_row */
temp_zero_row = (temp_zero_row) | (0xFU << block_row);
// zero row can be optimized further. Now clearing the
// entire 4 bits corresponding to 4 rows of 4x4 block
// even if any 4x4 csbf is set
break;
}
}
cbf = cbf || (*(csbf + block_col)); // cbf update
}
csbf += csbf_strd;
}
*zero_col = ~temp_zero_col; //final zero_col storing
*zero_row = ~temp_zero_row; //final zero_row storing
}
return cbf;
}
WORD32 ihevc_q_iq_var_rnd_fact
(
WORD16 *pi2_coeffs,
WORD16 *pi2_quant_coeff,
WORD16 *pi2_q_dst,
WORD16 *pi2_iq_dst,
WORD32 trans_size,
WORD32 qp_div,/* qpscaled / 6 */
WORD32 qp_rem,/* qpscaled % 6 */
WORD32 q_add,
WORD32 *pi4_quant_round_factor_0_1,
WORD32 *pi4_quant_round_factor_1_2,
WORD32 src_strd,
WORD32 dst_q_strd,
WORD32 dst_iq_strd,
UWORD8 *csbf,
WORD32 csbf_strd,
WORD32 *zero_col,
WORD32 *zero_row,
WORD16 *pi2_dequant_coeff,
LWORD64 *pi8_cost
)
{
WORD32 i, j;
WORD32 log2_size;
WORD16 *pi2_q_dst_orig;
WORD32 cbf = 0;
WORD32 bit_depth,shift_iq;
WORD16 i2_temp;
(void)q_add;
(void)pi8_cost;
pi2_q_dst_orig = pi2_q_dst;
GETRANGE(log2_size, trans_size);
log2_size -= 1;
bit_depth = 8 + 0;
shift_iq = bit_depth + log2_size - 5;
for(i = 0; i < trans_size; i++)
{
for(j = 0; j < trans_size; j++)
{
i2_temp = pi2_coeffs[j];
{
QUANT(pi2_q_dst[j],i2_temp,
pi2_quant_coeff[j] * g_ihevc_quant_scales[qp_rem], qp_div,
log2_size, 0);
if (abs(pi2_q_dst[j]) >= 2)
{
QUANT(pi2_q_dst[j],i2_temp,
pi2_quant_coeff[j] * g_ihevc_quant_scales[qp_rem], qp_div,
log2_size, ((1 << QUANT_ROUND_FACTOR_Q)/2));
}
else if (abs(pi2_q_dst[j]) >= 1)
{
QUANT(pi2_q_dst[j],i2_temp,
pi2_quant_coeff[j] * g_ihevc_quant_scales[qp_rem], qp_div,
log2_size, *pi4_quant_round_factor_1_2);
}
else
{
QUANT(pi2_q_dst[j],i2_temp,
pi2_quant_coeff[j] * g_ihevc_quant_scales[qp_rem], qp_div,
log2_size, *pi4_quant_round_factor_0_1);
}
}
IQUANT(pi2_iq_dst[j],
pi2_q_dst[j], /*pi2_src[index*src_strd]*/
pi2_dequant_coeff[j]*g_ihevc_iquant_scales[qp_rem],
shift_iq,
qp_div);
pi4_quant_round_factor_0_1++;
pi4_quant_round_factor_1_2++;
}
pi2_q_dst += dst_q_strd;
pi2_iq_dst += dst_iq_strd;
pi2_quant_coeff += trans_size;
pi2_coeffs += src_strd;
pi2_dequant_coeff += trans_size;
}
/* CSBF update */
{
WORD32 block_row, block_col;
WORD32 row, col;
WORD16 *pi2_block;
UWORD32 temp_zero_col = 0;
UWORD32 temp_zero_row = 0;
pi2_q_dst = pi2_q_dst_orig;
for(block_row = 0; block_row < trans_size; block_row += 4)
{
//block_col is incrementing by 1 for easy update of csbf pointer
for(block_col = 0; block_col < trans_size / 4; block_col++)
{
pi2_block = pi2_q_dst + block_row * dst_q_strd + block_col * 4;
*(csbf + block_col) = 0;
for(row = 0; row < 4; row++)
{
for(col = 0; col < 4; col++)
{
if(pi2_block[row * dst_q_strd + col] != 0)
{
*(csbf + block_col) = 1;
break;
}
}
if(*(csbf + block_col) == 1)
{
/* zero_col update *//* temp_zero_col = ~zero_col */
temp_zero_col = (temp_zero_col) | (0xFU << block_col * 4);
// zero col can be optimized further. Now clearing the
// entire 4 bits corresponding to 4 colums of 4x4 block
// even if any 4x4 csbf is set
/* zero row update */ /* temp_zero_row = ~zero_row */
temp_zero_row = (temp_zero_row) | (0xFU << block_row);
// zero row can be optimized further. Now clearing the
// entire 4 bits corresponding to 4 rows of 4x4 block
// even if any 4x4 csbf is set
break;
}
}
cbf = cbf || (*(csbf + block_col)); // cbf update
}
csbf += csbf_strd;
}
*zero_col = ~temp_zero_col; //final zero_col storing
*zero_row = ~temp_zero_row; //final zero_row storing
}
return cbf;
}
/**
*******************************************************************************
*
* @brief
* This function performs quantization(using flat scale matrix), followed by
* inverse quantization to find transform domain SSD; when we perform RDOQ.
* In case the quantized value turns out to be grater than 1, we then requantize
* use half rounding.
*
* @par Description:
* Performs quantization on coeffs
*
* @param[in] pi2_coeffs
* 4x4 Coeffs
*
* @param[in] pi2_quant_coeff
* Scaling Matrix
*
* @param[out] pi2_dst
* Output 4x4 coefficients
*
* @param[in] qp_div
* Quantization parameter / 6
*
* @param[in] qp_rem
* Quantization parameter % 6
*
* @param[in] src_strd
* Input stride
*
* @param[in] dst_strd
* Output Stride
*
* @param[out] csbf
* coded sub block flag
*
* @param[in] csbf_strd
* coded sub block flag
*
* @param[out] zero_col
* zero column flag
*
* @param[out] zero_row
* zero column flag
*
* @returns cbf
* coded block flag
*
* @remarks
* None
*
*******************************************************************************
*/
WORD32 ihevc_q_iq_ssd_flat_scale_mat_var_rnd_fact
(
WORD16 *pi2_coeffs,
WORD16 *pi2_quant_coeff,
WORD16 *pi2_q_dst,
WORD16 *pi2_iq_dst,
WORD32 trans_size,
WORD32 qp_div,/* qpscaled / 6 */
WORD32 qp_rem,/* qpscaled % 6 */
WORD32 q_add,
WORD32 *pi4_quant_round_factor_0_1,
WORD32 *pi4_quant_round_factor_1_2,
WORD32 src_strd,
WORD32 dst_q_strd,
WORD32 dst_iq_strd,
UWORD8 *csbf,
WORD32 csbf_strd,
WORD32 *zero_col,
WORD32 *zero_row,
WORD16 *pi2_dequant_coeff,
LWORD64 *pi8_cost
)
{
WORD32 i, j;
WORD32 log2_size;
WORD16 *pi2_q_dst_orig;
WORD32 cbf = 0;
WORD32 bit_depth,shift_iq;
WORD32 val;
WORD16 i2_temp;
/* Initialize cost to zero */
WORD32 ssd_cost = 0;
(void)q_add;
pi2_q_dst_orig = pi2_q_dst;
/* Quant initialization */
GETRANGE(log2_size, trans_size);
log2_size -= 1;
bit_depth = 8 + 0;
shift_iq = bit_depth + log2_size - 5;
for(i = 0; i < trans_size; i++)
{
for(j = 0; j < trans_size; j++)
{
WORD16 i2_temp1;
/* Back up the coefficients before Quantization */
i2_temp = pi2_coeffs[j];
/*QUANT(pi2_dst[j], pi2_coeffs[j],
pi2_quant_coeff[j] * g_ihevc_quant_scales[qp_rem], qp_div,
log2_size, q_add);*/
/* modified by 1028 */
/* Quantization */
{
QUANT_NO_WEIGHTMAT(pi2_q_dst[j], pi2_coeffs[j],
g_ihevc_quant_scales[qp_rem], qp_div,
log2_size, 0);
i2_temp1 = pi2_q_dst[j];
if (abs(pi2_q_dst[j]) >= 2)
{
QUANT_NO_WEIGHTMAT(pi2_q_dst[j], i2_temp,
g_ihevc_quant_scales[qp_rem], qp_div,
log2_size, ((1 << QUANT_ROUND_FACTOR_Q)/2));
}
else if (abs(pi2_q_dst[j]) >= 1)
{
QUANT_NO_WEIGHTMAT(pi2_q_dst[j], pi2_coeffs[j],
g_ihevc_quant_scales[qp_rem], qp_div,
log2_size, *pi4_quant_round_factor_1_2);
}
else
{
QUANT_NO_WEIGHTMAT(pi2_q_dst[j], pi2_coeffs[j],
g_ihevc_quant_scales[qp_rem], qp_div,
log2_size, *pi4_quant_round_factor_0_1);
}
}
ASSERT(abs(i2_temp1-pi2_q_dst[j]) <= 1);
/* Inverse Quantization */
IQUANT(pi2_iq_dst[j],
pi2_q_dst[j], /*pi2_src[index*src_strd]*/
pi2_dequant_coeff[j]*g_ihevc_iquant_scales[qp_rem], /*pi2_dequant_coeff[index*trans_size] * g_ihevc_iquant_scales[qp_rem] */
shift_iq,
qp_div);
/* SSD Computation & Accumulation */
val = i2_temp - pi2_iq_dst[j];
ssd_cost += val*val;
pi4_quant_round_factor_0_1++;
pi4_quant_round_factor_1_2++;
}
pi2_q_dst += dst_q_strd;
pi2_iq_dst += dst_iq_strd;
pi2_quant_coeff += trans_size;
pi2_coeffs += src_strd;
pi2_dequant_coeff += trans_size;
}
/* Store the cost */
*pi8_cost = ssd_cost;
/* CSBF update */
{
WORD32 block_row, block_col;
WORD32 row, col;
WORD16 *pi2_block;
UWORD32 temp_zero_col = 0;
UWORD32 temp_zero_row = 0;
pi2_q_dst = pi2_q_dst_orig;
for(block_row = 0; block_row < trans_size; block_row += 4)
{
//block_col is incrementing by 1 for easy update of csbf pointer
for(block_col = 0; block_col < trans_size / 4; block_col++)
{
pi2_block = pi2_q_dst + block_row * dst_q_strd + block_col * 4;
*(csbf + block_col) = 0;
for(row = 0; row < 4; row++)
{
for(col = 0; col < 4; col++)
{
if(pi2_block[row * dst_q_strd + col] != 0)
{
*(csbf + block_col) = 1;
break;
}
}
if(*(csbf + block_col) == 1)
{
/* zero_col update *//* temp_zero_col = ~zero_col */
temp_zero_col = (temp_zero_col) | (0xFU << block_col * 4);
// zero col can be optimized further. Now clearing the
// entire 4 bits corresponding to 4 colums of 4x4 block
// even if any 4x4 csbf is set
/* zero row update */ /* temp_zero_row = ~zero_row */
temp_zero_row = (temp_zero_row) | (0xFU << block_row);
// zero row can be optimized further. Now clearing the
// entire 4 bits corresponding to 4 rows of 4x4 block
// even if any 4x4 csbf is set
break;
}
}
cbf = cbf || (*(csbf + block_col)); // cbf update
}
csbf += csbf_strd;
}
*zero_col = ~temp_zero_col; //final zero_col storing
*zero_row = ~temp_zero_row; //final zero_row storing
}
return cbf;
}
WORD32 ihevc_q_iq_flat_scale_mat_var_rnd_fact
(
WORD16 *pi2_coeffs,
WORD16 *pi2_quant_coeff,
WORD16 *pi2_q_dst,
WORD16 *pi2_iq_dst,
WORD32 trans_size,
WORD32 qp_div,/* qpscaled / 6 */
WORD32 qp_rem,/* qpscaled % 6 */
WORD32 q_add,
WORD32 *pi4_quant_round_factor_0_1,
WORD32 *pi4_quant_round_factor_1_2,
WORD32 src_strd,
WORD32 dst_q_strd,
WORD32 dst_iq_strd,
UWORD8 *csbf,
WORD32 csbf_strd,
WORD32 *zero_col,
WORD32 *zero_row,
WORD16 *pi2_dequant_coeff,
LWORD64 *pi8_cost
)
{
WORD32 i, j;
WORD32 log2_size;
WORD16 *pi2_q_dst_orig;
WORD32 cbf = 0;
WORD32 bit_depth,shift_iq;
WORD16 i2_temp;
(void)q_add;
(void)pi8_cost;
pi2_q_dst_orig = pi2_q_dst;
GETRANGE(log2_size, trans_size);
log2_size -= 1;
bit_depth = 8 + 0;
shift_iq = bit_depth + log2_size - 5;
for(i = 0; i < trans_size; i++)
{
for(j = 0; j < trans_size; j++)
{
WORD16 i2_temp1;
i2_temp = pi2_coeffs[j];
{
QUANT_NO_WEIGHTMAT(pi2_q_dst[j], pi2_coeffs[j],
g_ihevc_quant_scales[qp_rem], qp_div,
log2_size, 0);
i2_temp1 = pi2_q_dst[j];
if (abs(pi2_q_dst[j]) >= 2)
{
QUANT_NO_WEIGHTMAT(pi2_q_dst[j], i2_temp,
g_ihevc_quant_scales[qp_rem], qp_div,
log2_size, ((1 << QUANT_ROUND_FACTOR_Q)/2));
}
else if (abs(pi2_q_dst[j]) >= 1)
{
QUANT_NO_WEIGHTMAT(pi2_q_dst[j], pi2_coeffs[j],
g_ihevc_quant_scales[qp_rem], qp_div,
log2_size, *pi4_quant_round_factor_1_2);
}
else
{
QUANT_NO_WEIGHTMAT(pi2_q_dst[j], pi2_coeffs[j],
g_ihevc_quant_scales[qp_rem], qp_div,
log2_size, *pi4_quant_round_factor_0_1);
}
}
ASSERT(abs(i2_temp1-pi2_q_dst[j]) <= 1);
IQUANT(pi2_iq_dst[j],
pi2_q_dst[j], /*pi2_src[index*src_strd]*/
pi2_dequant_coeff[j]*g_ihevc_iquant_scales[qp_rem],
shift_iq,
qp_div);
pi4_quant_round_factor_0_1++;
pi4_quant_round_factor_1_2++;
}
pi2_q_dst += dst_q_strd;
pi2_iq_dst += dst_iq_strd;
pi2_quant_coeff += trans_size;
pi2_coeffs += src_strd;
pi2_dequant_coeff += trans_size;
}
/* CSBF update */
{
WORD32 block_row, block_col;
WORD32 row, col;
WORD16 *pi2_block;
UWORD32 temp_zero_col = 0;
UWORD32 temp_zero_row = 0;
pi2_q_dst = pi2_q_dst_orig;
for(block_row = 0; block_row < trans_size; block_row += 4)
{
//block_col is incrementing by 1 for easy update of csbf pointer
for(block_col = 0; block_col < trans_size / 4; block_col++)
{
pi2_block = pi2_q_dst + block_row * dst_q_strd + block_col * 4;
*(csbf + block_col) = 0;
for(row = 0; row < 4; row++)
{
for(col = 0; col < 4; col++)
{
if(pi2_block[row * dst_q_strd + col] != 0)
{
*(csbf + block_col) = 1;
break;
}
}
if(*(csbf + block_col) == 1)
{
/* zero_col update *//* temp_zero_col = ~zero_col */
temp_zero_col = (temp_zero_col) | (0xFU << block_col * 4);
// zero col can be optimized further. Now clearing the
// entire 4 bits corresponding to 4 colums of 4x4 block
// even if any 4x4 csbf is set
/* zero row update */ /* temp_zero_row = ~zero_row */
temp_zero_row = (temp_zero_row) | (0xFU << block_row);
// zero row can be optimized further. Now clearing the
// entire 4 bits corresponding to 4 rows of 4x4 block
// even if any 4x4 csbf is set
break;
}
}
cbf = cbf || (*(csbf + block_col)); // cbf update
}
csbf += csbf_strd;
}
*zero_col = ~temp_zero_col; //final zero_col storing
*zero_row = ~temp_zero_row; //final zero_row storing
}
return cbf;
}