You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1171 lines
48 KiB

/******************************************************************************
*
* Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
******************************************************************************/
/**
*******************************************************************************
* @file
* ihevcd_iquant_itrans_recon_ctb.c
*
* @brief
* Contains functions for inverse quantization, inverse transform and recon
*
* @author
* Ittiam
*
* @par List of Functions:
* - ihevcd_iquant_itrans_recon_ctb()
*
* @remarks
* None
*
*******************************************************************************
*/
/*****************************************************************************/
/* File Includes */
/*****************************************************************************/
#include <stdio.h>
#include <stddef.h>
#include <stdlib.h>
#include <string.h>
#include "ihevc_typedefs.h"
#include "iv.h"
#include "ivd.h"
#include "ihevcd_cxa.h"
#include "ihevc_defs.h"
#include "ihevc_debug.h"
#include "ihevc_structs.h"
#include "ihevc_cabac_tables.h"
#include "ihevc_macros.h"
#include "ihevc_platform_macros.h"
#include "ihevcd_defs.h"
#include "ihevcd_function_selector.h"
#include "ihevcd_structs.h"
#include "ihevcd_error.h"
#include "ihevcd_bitstream.h"
#include "ihevc_common_tables.h"
/* Intra pred includes */
#include "ihevc_intra_pred.h"
/* Inverse transform common module includes */
#include "ihevc_trans_tables.h"
#include "ihevc_trans_macros.h"
#include "ihevc_itrans_recon.h"
#include "ihevc_recon.h"
#include "ihevc_chroma_itrans_recon.h"
#include "ihevc_chroma_recon.h"
/* Decoder includes */
#include "ihevcd_common_tables.h"
#include "ihevcd_iquant_itrans_recon_ctb.h"
#include "ihevcd_debug.h"
#include "ihevcd_profile.h"
#include "ihevcd_statistics.h"
#include "ihevcd_itrans_recon_dc.h"
static const UWORD32 gau4_ihevcd_4_bit_reverse[] = { 0, 8, 4, 12, 2, 10, 6, 14, 1, 9, 5, 13, 3, 11, 7, 15 };
/* Globals */
static const WORD32 g_i4_ip_funcs[MAX_NUM_IP_MODES] =
{ IP_FUNC_MODE_0, /* Mode 0 */
IP_FUNC_MODE_1, /* Mode 1 */
IP_FUNC_MODE_2, /* Mode 2 */
IP_FUNC_MODE_3TO9, /* Mode 3 */
IP_FUNC_MODE_3TO9, /* Mode 4 */
IP_FUNC_MODE_3TO9, /* Mode 5 */
IP_FUNC_MODE_3TO9, /* Mode 6 */
IP_FUNC_MODE_3TO9, /* Mode 7 */
IP_FUNC_MODE_3TO9, /* Mode 8 */
IP_FUNC_MODE_3TO9, /* Mode 9 */
IP_FUNC_MODE_10, /* Mode 10 */
IP_FUNC_MODE_11TO17, /* Mode 11 */
IP_FUNC_MODE_11TO17, /* Mode 12 */
IP_FUNC_MODE_11TO17, /* Mode 13 */
IP_FUNC_MODE_11TO17, /* Mode 14 */
IP_FUNC_MODE_11TO17, /* Mode 15 */
IP_FUNC_MODE_11TO17, /* Mode 16 */
IP_FUNC_MODE_11TO17, /* Mode 17 */
IP_FUNC_MODE_18_34, /* Mode 18 */
IP_FUNC_MODE_19TO25, /* Mode 19 */
IP_FUNC_MODE_19TO25, /* Mode 20 */
IP_FUNC_MODE_19TO25, /* Mode 21 */
IP_FUNC_MODE_19TO25, /* Mode 22 */
IP_FUNC_MODE_19TO25, /* Mode 23 */
IP_FUNC_MODE_19TO25, /* Mode 24 */
IP_FUNC_MODE_19TO25, /* Mode 25 */
IP_FUNC_MODE_26, /* Mode 26 */
IP_FUNC_MODE_27TO33, /* Mode 27 */
IP_FUNC_MODE_27TO33, /* Mode 26 */
IP_FUNC_MODE_27TO33, /* Mode 29 */
IP_FUNC_MODE_27TO33, /* Mode 30 */
IP_FUNC_MODE_27TO33, /* Mode 31 */
IP_FUNC_MODE_27TO33, /* Mode 32 */
IP_FUNC_MODE_27TO33, /* Mode 33 */
IP_FUNC_MODE_18_34, /* Mode 34 */
};
const WORD16 *g_ai2_ihevc_trans_tables[] =
{ &g_ai2_ihevc_trans_dst_4[0][0],
&g_ai2_ihevc_trans_4[0][0],
&g_ai2_ihevc_trans_8[0][0],
&g_ai2_ihevc_trans_16[0][0],
&g_ai2_ihevc_trans_32[0][0]
};
/*****************************************************************************/
/* Function Prototypes */
/*****************************************************************************/
/* Returns number of ai2_level read from ps_sblk_coeff */
UWORD8* ihevcd_unpack_coeffs(WORD16 *pi2_tu_coeff,
WORD32 log2_trans_size,
UWORD8 *pu1_tu_coeff_data,
WORD16 *pi2_dequant_matrix,
WORD32 qp_rem,
WORD32 qp_div,
TRANSFORM_TYPE e_trans_type,
WORD32 trans_quant_bypass,
UWORD32 *pu4_zero_cols,
UWORD32 *pu4_zero_rows,
UWORD32 *pu4_coeff_type,
WORD16 *pi2_coeff_value)
{
/* Generating coeffs from coeff-map */
WORD32 i;
WORD16 *pi2_sblk_ptr;
WORD32 subblk_pos_x, subblk_pos_y;
WORD32 sblk_scan_idx, coeff_raster_idx;
WORD32 sblk_non_zero_coeff_idx;
tu_sblk_coeff_data_t *ps_tu_sblk_coeff_data;
UWORD8 u1_num_coded_sblks, u1_scan_type;
UWORD8 *pu1_new_tu_coeff_data;
WORD32 trans_size;
WORD32 xs, ys;
WORD32 trans_skip;
WORD16 iquant_out;
WORD32 shift_iq;
{
WORD32 bit_depth;
bit_depth = 8 + 0;
shift_iq = bit_depth + log2_trans_size - 5;
}
trans_size = (1 << log2_trans_size);
/* First byte points to number of coded blocks */
u1_num_coded_sblks = *pu1_tu_coeff_data++;
/* Next byte points to scan type */
u1_scan_type = *pu1_tu_coeff_data++;
/* 0th bit has trans_skip */
trans_skip = u1_scan_type & 1;
u1_scan_type >>= 1;
pi2_sblk_ptr = pi2_tu_coeff;
/* Initially all columns are assumed to be zero */
*pu4_zero_cols = 0xFFFFFFFF;
/* Initially all rows are assumed to be zero */
*pu4_zero_rows = 0xFFFFFFFF;
ps_tu_sblk_coeff_data = (tu_sblk_coeff_data_t *)(pu1_tu_coeff_data);
if(trans_skip)
memset(pi2_tu_coeff, 0, trans_size * trans_size * sizeof(WORD16));
STATS_INIT_SBLK_AND_COEFF_POS();
/* DC only case */
if((e_trans_type != DST_4x4) && (1 == u1_num_coded_sblks)
&& (0 == ps_tu_sblk_coeff_data->u2_subblk_pos)
&& (1 == ps_tu_sblk_coeff_data->u2_sig_coeff_map))
{
*pu4_coeff_type = 1;
if(!trans_quant_bypass)
{
if(4 == trans_size)
{
IQUANT_4x4(iquant_out,
ps_tu_sblk_coeff_data->ai2_level[0],
pi2_dequant_matrix[0]
* g_ihevc_iquant_scales[qp_rem],
shift_iq, qp_div);
}
else
{
IQUANT(iquant_out, ps_tu_sblk_coeff_data->ai2_level[0],
pi2_dequant_matrix[0] * g_ihevc_iquant_scales[qp_rem],
shift_iq, qp_div);
}
if(trans_skip)
iquant_out = (iquant_out + 16) >> 5;
}
else
{
/* setting the column to zero */
for(i = 0; i < trans_size; i++)
*(pi2_tu_coeff + i * trans_size) = 0;
iquant_out = ps_tu_sblk_coeff_data->ai2_level[0];
}
*pi2_coeff_value = iquant_out;
*pi2_tu_coeff = iquant_out;
*pu4_zero_cols &= ~0x1;
*pu4_zero_rows &= ~0x1;
ps_tu_sblk_coeff_data =
(void *)&ps_tu_sblk_coeff_data->ai2_level[1];
STATS_UPDATE_COEFF_COUNT();
STATS_LAST_SBLK_POS_UPDATE(e_trans_type, (trans_skip || trans_quant_bypass), 0, 0);
STATS_UPDATE_SBLK_AND_COEFF_HISTOGRAM(e_trans_type, (trans_quant_bypass || trans_skip));
return ((UWORD8 *)ps_tu_sblk_coeff_data);
}
else
{
*pu4_coeff_type = 0;
/* In case of trans skip, memset has already happened */
if(!trans_skip)
memset(pi2_tu_coeff, 0, trans_size * trans_size * sizeof(WORD16));
}
for(i = 0; i < u1_num_coded_sblks; i++)
{
UWORD32 u4_sig_coeff_map;
subblk_pos_x = ps_tu_sblk_coeff_data->u2_subblk_pos & 0x00FF;
subblk_pos_y = (ps_tu_sblk_coeff_data->u2_subblk_pos & 0xFF00) >> 8;
STATS_LAST_SBLK_POS_UPDATE(e_trans_type, (trans_skip || trans_quant_bypass), subblk_pos_x, subblk_pos_y);
subblk_pos_x = subblk_pos_x * MIN_TU_SIZE;
subblk_pos_y = subblk_pos_y * MIN_TU_SIZE;
pi2_sblk_ptr = pi2_tu_coeff + subblk_pos_y * trans_size
+ subblk_pos_x;
//*pu4_zero_cols &= ~(0xF << subblk_pos_x);
sblk_non_zero_coeff_idx = 0;
u4_sig_coeff_map = ps_tu_sblk_coeff_data->u2_sig_coeff_map;
//for(sblk_scan_idx = (31 - CLZ(u4_sig_coeff_map)); sblk_scan_idx >= 0; sblk_scan_idx--)
sblk_scan_idx = 31;
do
{
WORD32 clz = CLZ(u4_sig_coeff_map);
sblk_scan_idx -= clz;
/* when clz is 31, u4_sig_coeff_map << (clz+1) might result in unknown behaviour in some cases */
/* Hence either use SHL which takes care of handling these issues based on platform or shift in two stages */
u4_sig_coeff_map = u4_sig_coeff_map << clz;
/* Copying coeffs and storing in reverse order */
{
STATS_UPDATE_COEFF_COUNT();
coeff_raster_idx =
gau1_ihevc_invscan4x4[u1_scan_type][sblk_scan_idx];
xs = coeff_raster_idx & 0x3;
ys = coeff_raster_idx >> 2;
if(!trans_quant_bypass)
{
if(4 == trans_size)
{
IQUANT_4x4(iquant_out,
ps_tu_sblk_coeff_data->ai2_level[sblk_non_zero_coeff_idx],
pi2_dequant_matrix[(subblk_pos_x + xs)
+ (subblk_pos_y + ys)
* trans_size]
* g_ihevc_iquant_scales[qp_rem],
shift_iq, qp_div);
sblk_non_zero_coeff_idx++;
}
else
{
IQUANT(iquant_out,
ps_tu_sblk_coeff_data->ai2_level[sblk_non_zero_coeff_idx],
pi2_dequant_matrix[(subblk_pos_x + xs)
+ (subblk_pos_y + ys)
* trans_size]
* g_ihevc_iquant_scales[qp_rem],
shift_iq, qp_div);
sblk_non_zero_coeff_idx++;
}
if(trans_skip)
iquant_out = (iquant_out + 16) >> 5;
}
else
{
iquant_out = ps_tu_sblk_coeff_data->ai2_level[sblk_non_zero_coeff_idx++];
}
*pu4_zero_cols &= ~(0x1 << (subblk_pos_x + xs));
*pu4_zero_rows &= ~(0x1 << (subblk_pos_y + ys));
*(pi2_sblk_ptr + xs + ys * trans_size) = iquant_out;
}
sblk_scan_idx--;
u4_sig_coeff_map <<= 1;
}while(u4_sig_coeff_map);
/* Updating the sblk pointer */
ps_tu_sblk_coeff_data =
(void *)&ps_tu_sblk_coeff_data->ai2_level[sblk_non_zero_coeff_idx];
}
STATS_UPDATE_SBLK_AND_COEFF_HISTOGRAM(e_trans_type, (trans_quant_bypass || trans_skip));
pu1_new_tu_coeff_data = (UWORD8 *)ps_tu_sblk_coeff_data;
return pu1_new_tu_coeff_data;
}
WORD32 ihevcd_get_intra_nbr_flag(process_ctxt_t *ps_proc,
tu_t *ps_tu,
UWORD32 *pu4_intra_nbr_avail,
WORD16 i2_pic_width_in_luma_samples,
UWORD8 i1_constrained_intra_pred_flag,
WORD32 trans_size,
WORD32 ctb_size)
{
sps_t *ps_sps;
UWORD8 u1_bot_lt_avail, u1_left_avail, u1_top_avail, u1_top_rt_avail,
u1_top_lt_avail;
WORD32 x_cur, y_cur, x_nbr, y_nbr;
UWORD8 *pu1_nbr_intra_flag;
UWORD8 *pu1_pic_intra_flag;
UWORD8 top_right, top, top_left, left, bot_left;
WORD32 intra_pos;
WORD32 num_8_blks, num_8_blks_in_bits;
WORD32 numbytes_row = (i2_pic_width_in_luma_samples + 63) / 64;
WORD32 cur_x, cur_y;
WORD32 i;
WORD32 nbr_flags;
ps_sps = ps_proc->ps_sps;
cur_x = ps_tu->b4_pos_x;
cur_y = ps_tu->b4_pos_y;
u1_bot_lt_avail = (pu4_intra_nbr_avail[1 + cur_y + trans_size / MIN_TU_SIZE]
>> (31 - (1 + cur_x - 1))) & 1;
u1_left_avail = (pu4_intra_nbr_avail[1 + cur_y] >> (31 - (1 + cur_x - 1)))
& 1;
u1_top_avail = (pu4_intra_nbr_avail[1 + cur_y - 1] >> (31 - (1 + cur_x)))
& 1;
u1_top_rt_avail = (pu4_intra_nbr_avail[1 + cur_y - 1]
>> (31 - (1 + cur_x + trans_size / MIN_TU_SIZE))) & 1;
u1_top_lt_avail = (pu4_intra_nbr_avail[1 + cur_y - 1]
>> (31 - (1 + cur_x - 1))) & 1;
x_cur = ps_proc->i4_ctb_x * ctb_size + cur_x * MIN_TU_SIZE;
y_cur = ps_proc->i4_ctb_y * ctb_size + cur_y * MIN_TU_SIZE;
pu1_pic_intra_flag = ps_proc->pu1_pic_intra_flag;
/* WORD32 nbr_flags as below MSB --> LSB */
/* Top-Left | Top-Right | Top | Left | Bottom-Left
* 1 4 4 4 4
*/
bot_left = 0;
left = 0;
top_right = 0;
top = 0;
top_left = 0;
num_8_blks = trans_size > 4 ? trans_size / 8 : 1;
num_8_blks_in_bits = ((1 << num_8_blks) - 1);
if(i1_constrained_intra_pred_flag)
{
/* TODO: constrained intra pred not tested */
if(u1_bot_lt_avail)
{
x_nbr = x_cur - 1;
y_nbr = y_cur + trans_size;
pu1_nbr_intra_flag = pu1_pic_intra_flag + y_nbr / 8 * numbytes_row
+ x_nbr / 64;
intra_pos = ((x_nbr / 8) % 8);
for(i = 0; i < num_8_blks; i++)
{
bot_left |= ((*(pu1_nbr_intra_flag + i * numbytes_row)
>> intra_pos) & 1) << i;
}
bot_left &= num_8_blks_in_bits;
}
if(u1_left_avail)
{
x_nbr = x_cur - 1;
y_nbr = y_cur;
pu1_nbr_intra_flag = pu1_pic_intra_flag + y_nbr / 8 * numbytes_row
+ x_nbr / 64;
intra_pos = ((x_nbr / 8) % 8);
for(i = 0; i < num_8_blks; i++)
{
left |= ((*(pu1_nbr_intra_flag + i * numbytes_row) >> intra_pos)
& 1) << i;
}
left &= num_8_blks_in_bits;
}
if(u1_top_avail)
{
x_nbr = x_cur;
y_nbr = y_cur - 1;
pu1_nbr_intra_flag = pu1_pic_intra_flag + y_nbr / 8 * numbytes_row
+ x_nbr / 64;
intra_pos = ((x_nbr / 8) % 8);
top = (*pu1_nbr_intra_flag >> intra_pos);
top &= num_8_blks_in_bits;
/*
for(i=0;i<num_8_blks;i++)
{
top |= ( (*pu1_nbr_intra_flag >> (intra_pos+i)) & 1) << i;
}
*/
}
if(u1_top_rt_avail)
{
x_nbr = x_cur + trans_size;
y_nbr = y_cur - 1;
pu1_nbr_intra_flag = pu1_pic_intra_flag + y_nbr / 8 * numbytes_row
+ x_nbr / 64;
intra_pos = ((x_nbr / 8) % 8);
top_right = (*pu1_nbr_intra_flag >> intra_pos);
top_right &= num_8_blks_in_bits;
/*
for(i=0;i<num_8_blks;i++)
{
top_right |= ( (*pu1_nbr_intra_flag >> (intra_pos+i)) & 1) << i;
}
*/
}
if(u1_top_lt_avail)
{
x_nbr = x_cur - 1;
y_nbr = y_cur - 1;
pu1_nbr_intra_flag = pu1_pic_intra_flag + y_nbr / 8 * numbytes_row
+ x_nbr / 64;
intra_pos = ((x_nbr / 8) % 8);
top_left = (*pu1_nbr_intra_flag >> intra_pos) & 1;
}
}
else
{
if(u1_top_avail)
top = 0xF;
if(u1_top_rt_avail)
top_right = 0xF;
if(u1_bot_lt_avail)
bot_left = 0xF;
if(u1_left_avail)
left = 0xF;
if(u1_top_lt_avail)
top_left = 0x1;
}
/* Handling incomplete CTBs */
{
WORD32 pu_size_limit = MIN(trans_size, 8);
WORD32 cols_remaining = ps_sps->i2_pic_width_in_luma_samples
- (ps_proc->i4_ctb_x << ps_sps->i1_log2_ctb_size)
- (ps_tu->b4_pos_x * MIN_TU_SIZE)
- (1 << (ps_tu->b3_size + 2));
/* ctb_size_top gives number of valid pixels remaining in the current row */
WORD32 ctb_size_top = MIN(ctb_size, cols_remaining);
WORD32 ctb_size_top_bits = (1 << (ctb_size_top / pu_size_limit)) - 1;
WORD32 rows_remaining = ps_sps->i2_pic_height_in_luma_samples
- (ps_proc->i4_ctb_y << ps_sps->i1_log2_ctb_size)
- (ps_tu->b4_pos_y * MIN_TU_SIZE)
- (1 << (ps_tu->b3_size + 2));
/* ctb_size_bot gives number of valid pixels remaining in the current column */
WORD32 ctb_size_bot = MIN(ctb_size, rows_remaining);
WORD32 ctb_size_bot_bits = (1 << (ctb_size_bot / pu_size_limit)) - 1;
top_right &= ctb_size_top_bits;
bot_left &= ctb_size_bot_bits;
}
/* Top-Left | Top-Right | Top | Left | Bottom-Left
* 1 4 4 4 4
*/
/*
nbr_flags = (top_left << 16) | (gau4_ihevcd_4_bit_reverse[top_right] << 12) | (gau4_ihevcd_4_bit_reverse[top] << 8) | (gau4_ihevcd_4_bit_reverse[left] << 4)
| gau4_ihevcd_4_bit_reverse[bot_left];
*/
nbr_flags = (top_left << 16) | (top_right << 12) | (top << 8) | (gau4_ihevcd_4_bit_reverse[left] << 4)
| gau4_ihevcd_4_bit_reverse[bot_left];
return nbr_flags;
}
WORD32 ihevcd_iquant_itrans_recon_ctb(process_ctxt_t *ps_proc)
{
WORD16 *pi2_scaling_mat;
UWORD8 *pu1_y_dst_ctb;
UWORD8 *pu1_uv_dst_ctb;
WORD32 ctb_size;
codec_t *ps_codec;
slice_header_t *ps_slice_hdr;
tu_t *ps_tu;
WORD16 *pi2_ctb_coeff;
WORD32 tu_cnt;
WORD16 *pi2_tu_coeff;
WORD16 *pi2_tmp;
WORD32 pic_strd;
WORD32 luma_nbr_flags;
WORD32 luma_nbr_flags_4x4[4] = { 0 };
WORD32 chroma_nbr_flags = 0;
UWORD8 u1_luma_pred_mode_first_tu = 0;
/* Pointers for generating 2d coeffs from coeff-map */
UWORD8 *pu1_tu_coeff_data;
/* nbr avail map for CTB */
/* 1st bit points to neighbor (left/top_left/bot_left) */
/* 1Tb starts at 2nd bit from msb of 2nd value in array, followed by number of min_tu's in that ctb */
UWORD32 au4_intra_nbr_avail[MAX_CTB_SIZE / MIN_TU_SIZE
+ 2 /* Top nbr + bot nbr */]; UWORD32
top_avail_bits;
sps_t *ps_sps;
pps_t *ps_pps;
WORD32 intra_flag;
UWORD8 *pu1_pic_intra_flag;
/*************************************************************************/
/* Contanis scaling matrix offset in the following order in a 1D buffer */
/* Intra 4 x 4 Y, 4 x 4 U, 4 x 4 V */
/* Inter 4 x 4 Y, 4 x 4 U, 4 x 4 V */
/* Intra 8 x 8 Y, 8 x 8 U, 8 x 8 V */
/* Inter 8 x 8 Y, 8 x 8 U, 8 x 8 V */
/* Intra 16x16 Y, 16x16 U, 16x16 V */
/* Inter 16x16 Y, 16x16 U, 16x16 V */
/* Intra 32x32 Y */
/* Inter 32x32 Y */
/*************************************************************************/
/* Only first 20 entries are used. Array is extended to avoid out of bound
reads. Skip CUs (64x64) read this table, but don't really use the value */
static const WORD32 scaling_mat_offset[] =
{ 0, 16, 32, 48, 64, 80, 96, 160, 224, 288, 352, 416, 480, 736, 992,
1248, 1504, 1760, 2016, 3040, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
PROFILE_DISABLE_IQ_IT_RECON_INTRA_PRED();
ps_sps = ps_proc->ps_sps;
ps_pps = ps_proc->ps_pps;
ps_slice_hdr = ps_proc->ps_slice_hdr;
ps_codec = ps_proc->ps_codec;
pu1_y_dst_ctb = ps_proc->pu1_cur_ctb_luma;
pu1_uv_dst_ctb = ps_proc->pu1_cur_ctb_chroma;
pi2_ctb_coeff = ps_proc->pi2_invscan_out;
ctb_size = (1 << ps_sps->i1_log2_ctb_size);
pu1_tu_coeff_data = (UWORD8 *)ps_proc->pv_tu_coeff_data;
pic_strd = ps_codec->i4_strd;
pi2_tmp = ps_proc->pi2_itrans_intrmd_buf;
pi2_tu_coeff = pi2_ctb_coeff;
ps_tu = ps_proc->ps_tu;
if((1 == ps_sps->i1_scaling_list_enable_flag) && (1 == ps_pps->i1_pps_scaling_list_data_present_flag))
{
pi2_scaling_mat = ps_pps->pi2_scaling_mat;
}
else
{
pi2_scaling_mat = ps_sps->pi2_scaling_mat;
}
{
/* Updating the initial availability map */
WORD32 i;
UWORD8 u1_left_ctb_avail, u1_top_lt_ctb_avail, u1_top_rt_ctb_avail,
u1_top_ctb_avail;
u1_left_ctb_avail = ps_proc->u1_left_ctb_avail;
u1_top_lt_ctb_avail = ps_proc->u1_top_lt_ctb_avail;
u1_top_ctb_avail = ps_proc->u1_top_ctb_avail;
u1_top_rt_ctb_avail = ps_proc->u1_top_rt_ctb_avail;
/* Initializing the availability array */
memset(au4_intra_nbr_avail, 0,
(MAX_CTB_SIZE / MIN_TU_SIZE + 2) * sizeof(UWORD32));
/* Initializing the availability array with CTB level availability flags */
{
WORD32 rows_remaining = ps_sps->i2_pic_height_in_luma_samples - (ps_proc->i4_ctb_y << ps_sps->i1_log2_ctb_size);
WORD32 ctb_size_left = MIN(ctb_size, rows_remaining);
for(i = 0; i < ctb_size_left / MIN_TU_SIZE; i++)
{
au4_intra_nbr_avail[i + 1] = ((UWORD32)u1_left_ctb_avail << 31);
}
}
au4_intra_nbr_avail[0] |= (((UWORD32)u1_top_rt_ctb_avail << 31)
>> (1 + ctb_size / MIN_TU_SIZE)); /* 1+ctb_size/4 position bit pos from msb */
au4_intra_nbr_avail[0] |= ((UWORD32)u1_top_lt_ctb_avail << 31);
{
WORD32 cols_remaining = ps_sps->i2_pic_width_in_luma_samples - (ps_proc->i4_ctb_x << ps_sps->i1_log2_ctb_size);
WORD32 ctb_size_top = MIN(ctb_size, cols_remaining);
WORD32 shift = (31 - (ctb_size / MIN_TU_SIZE));
/* ctb_size_top gives number of valid pixels remaining in the current row */
/* Since we need pattern of 1's starting from the MSB, an additional shift */
/* is needed */
shift += ((ctb_size - ctb_size_top) / MIN_TU_SIZE);
top_avail_bits = ((1 << (ctb_size_top / MIN_TU_SIZE)) - 1)
<< shift;
}
au4_intra_nbr_avail[0] |= (
(u1_top_ctb_avail == 1) ? top_avail_bits : 0x0);
/* Starting from msb 2nd bit to (1+ctb_size/4) bit, set 1 if top avail,or 0 */
}
/* Applying Inverse transform on all the TU's in CTB */
for(tu_cnt = 0; tu_cnt < ps_proc->i4_ctb_tu_cnt; tu_cnt++, ps_tu++)
{
WORD32 transform_skip_flag = 0;
WORD32 transform_skip_flag_v = 0;
WORD32 num_comp, c_idx, func_idx;
WORD32 src_strd, pred_strd, dst_strd;
WORD32 qp_div = 0, qp_rem = 0;
WORD32 qp_div_v = 0, qp_rem_v = 0;
UWORD32 zero_cols = 0, zero_cols_v = 0;
UWORD32 zero_rows = 0, zero_rows_v = 0;
UWORD32 coeff_type = 0, coeff_type_v = 0;
WORD16 i2_coeff_value, i2_coeff_value_v;
WORD32 trans_size = 0;
TRANSFORM_TYPE e_trans_type;
WORD32 log2_y_trans_size_minus_2, log2_uv_trans_size_minus_2;
WORD32 log2_trans_size;
WORD32 chroma_qp_idx;
WORD16 *pi2_src = NULL, *pi2_src_v = NULL;
UWORD8 *pu1_pred = NULL, *pu1_pred_v = NULL;
UWORD8 *pu1_dst = NULL, *pu1_dst_v = NULL;
WORD16 *pi2_dequant_matrix = NULL, *pi2_dequant_matrix_v = NULL;
WORD32 tu_x, tu_y;
WORD32 tu_y_offset, tu_uv_offset;
WORD8 i1_chroma_pic_qp_offset, i1_chroma_slice_qp_offset;
UWORD8 u1_cbf = 0, u1_cbf_v = 0, u1_luma_pred_mode, u1_chroma_pred_mode;
WORD32 offset;
WORD32 pcm_flag;
WORD32 chroma_yuv420sp_vu = (ps_codec->e_ref_chroma_fmt == IV_YUV_420SP_VU);
/* If 420SP_VU is chroma format, pred and dst pointer */
/* will be added +1 to point to U */
WORD32 chroma_yuv420sp_vu_u_offset = 1 * chroma_yuv420sp_vu;
/* If 420SP_VU is chroma format, pred and dst pointer */
/* will be added U offset of +1 and subtracted 2 */
/* to point to V */
WORD32 chroma_yuv420sp_vu_v_offset = -2 * chroma_yuv420sp_vu;
tu_x = ps_tu->b4_pos_x * 4; /* Converting minTU unit to pixel unit */
tu_y = ps_tu->b4_pos_y * 4; /* Converting minTU unit to pixel unit */
{
WORD32 tu_abs_x = (ps_proc->i4_ctb_x << ps_sps->i1_log2_ctb_size) + (tu_x);
WORD32 tu_abs_y = (ps_proc->i4_ctb_y << ps_sps->i1_log2_ctb_size) + (tu_y);
WORD32 numbytes_row = (ps_sps->i2_pic_width_in_luma_samples + 63) / 64;
pu1_pic_intra_flag = ps_proc->pu1_pic_intra_flag;
pu1_pic_intra_flag += (tu_abs_y >> 3) * numbytes_row;
pu1_pic_intra_flag += (tu_abs_x >> 6);
intra_flag = *pu1_pic_intra_flag;
intra_flag &= (1 << ((tu_abs_x >> 3) % 8));
}
u1_luma_pred_mode = ps_tu->b6_luma_intra_mode;
u1_chroma_pred_mode = ps_tu->b3_chroma_intra_mode_idx;
if(u1_chroma_pred_mode != 7)
num_comp = 2; /* Y and UV */
else
num_comp = 1; /* Y */
pcm_flag = 0;
if((intra_flag) && (u1_luma_pred_mode == INTRA_PRED_NONE))
{
UWORD8 *pu1_buf;
UWORD8 *pu1_y_dst = pu1_y_dst_ctb;
UWORD8 *pu1_uv_dst = pu1_uv_dst_ctb;
WORD32 i, j;
tu_sblk_coeff_data_t *ps_tu_sblk_coeff_data;
WORD32 cb_size = 1 << (ps_tu->b3_size + 2);
/* trans_size is used to update availability after reconstruction */
trans_size = cb_size;
pcm_flag = 1;
tu_y_offset = tu_x + tu_y * pic_strd;
pu1_y_dst += tu_x + tu_y * pic_strd;
pu1_uv_dst += tu_x + (tu_y >> 1) * pic_strd;
/* First byte points to number of coded blocks */
pu1_tu_coeff_data++;
/* Next byte points to scan type */
pu1_tu_coeff_data++;
ps_tu_sblk_coeff_data = (tu_sblk_coeff_data_t *)pu1_tu_coeff_data;
pu1_buf = (UWORD8 *)&ps_tu_sblk_coeff_data->ai2_level[0];
{
for(i = 0; i < cb_size; i++)
{
//pu1_y_dst[i * pic_strd + j] = *pu1_buf++;
memcpy(&pu1_y_dst[i * pic_strd], pu1_buf, cb_size);
pu1_buf += cb_size;
}
pu1_uv_dst = pu1_uv_dst + chroma_yuv420sp_vu_u_offset;
/* U */
for(i = 0; i < cb_size / 2; i++)
{
for(j = 0; j < cb_size / 2; j++)
{
pu1_uv_dst[i * pic_strd + 2 * j] = *pu1_buf++;
}
}
pu1_uv_dst = pu1_uv_dst + 1 + chroma_yuv420sp_vu_v_offset;
/* V */
for(i = 0; i < cb_size / 2; i++)
{
for(j = 0; j < cb_size / 2; j++)
{
pu1_uv_dst[i * pic_strd + 2 * j] = *pu1_buf++;
}
}
}
pu1_tu_coeff_data = pu1_buf;
}
for(c_idx = 0; c_idx < num_comp; c_idx++)
{
if(0 == pcm_flag)
{
/* Initializing variables */
pred_strd = pic_strd;
dst_strd = pic_strd;
if(c_idx == 0) /* Y */
{
log2_y_trans_size_minus_2 = ps_tu->b3_size;
trans_size = 1 << (log2_y_trans_size_minus_2 + 2);
log2_trans_size = log2_y_trans_size_minus_2 + 2;
tu_y_offset = tu_x + tu_y * pic_strd;
pi2_src = pi2_tu_coeff;
pu1_pred = pu1_y_dst_ctb + tu_y_offset;
pu1_dst = pu1_y_dst_ctb + tu_y_offset;
/* Calculating scaling matrix offset */
offset = log2_y_trans_size_minus_2 * 6
+ (!intra_flag)
* ((log2_y_trans_size_minus_2
== 3) ? 1 : 3)
+ c_idx;
pi2_dequant_matrix = pi2_scaling_mat
+ scaling_mat_offset[offset];
src_strd = trans_size;
/* 4x4 transform Luma in INTRA mode is DST */
if(log2_y_trans_size_minus_2 == 0 && intra_flag)
{
func_idx = log2_y_trans_size_minus_2;
e_trans_type = DST_4x4;
}
else
{
func_idx = log2_y_trans_size_minus_2 + 1;
e_trans_type = (TRANSFORM_TYPE)(log2_y_trans_size_minus_2 + 1);
}
qp_div = ps_tu->b7_qp / 6;
qp_rem = ps_tu->b7_qp % 6;
u1_cbf = ps_tu->b1_y_cbf;
transform_skip_flag = pu1_tu_coeff_data[1] & 1;
/* Unpacking coeffs */
if(1 == u1_cbf)
{
pu1_tu_coeff_data = ihevcd_unpack_coeffs(
pi2_src, log2_y_trans_size_minus_2 + 2,
pu1_tu_coeff_data, pi2_dequant_matrix,
qp_rem, qp_div, e_trans_type,
ps_tu->b1_transquant_bypass, &zero_cols,
&zero_rows, &coeff_type,
&i2_coeff_value);
}
}
else /* UV interleaved */
{
/* Chroma :If Transform size is 4x4, keep 4x4 else do transform on (trans_size/2 x trans_size/2) */
if(ps_tu->b3_size == 0)
{
/* Chroma 4x4 is present with 4th luma 4x4 block. For this case chroma postion has to be (luma pos x- 4,luma pos y- 4) */
log2_uv_trans_size_minus_2 = ps_tu->b3_size;
tu_uv_offset = (tu_x - 4) + ((tu_y - 4) / 2) * pic_strd;
}
else
{
log2_uv_trans_size_minus_2 = ps_tu->b3_size - 1;
tu_uv_offset = tu_x + (tu_y >> 1) * pic_strd;
}
trans_size = 1 << (log2_uv_trans_size_minus_2 + 2);
log2_trans_size = log2_uv_trans_size_minus_2 + 2;
pi2_src = pi2_tu_coeff;
pi2_src_v = pi2_tu_coeff + trans_size * trans_size;
pu1_pred = pu1_uv_dst_ctb + tu_uv_offset + chroma_yuv420sp_vu_u_offset; /* Pointing to start byte of U*/
pu1_pred_v = pu1_pred + 1 + chroma_yuv420sp_vu_v_offset; /* Pointing to start byte of V*/
pu1_dst = pu1_uv_dst_ctb + tu_uv_offset + chroma_yuv420sp_vu_u_offset; /* Pointing to start byte of U*/
pu1_dst_v = pu1_dst + 1 + chroma_yuv420sp_vu_v_offset; /* Pointing to start byte of V*/
/*TODO: Add support for choosing different tables for U and V,
* change this to a single array to handle flat/default/custom, intra/inter, luma/chroma and various sizes
*/
/* Calculating scaling matrix offset */
/* ((log2_uv_trans_size_minus_2 == 3) ? 1:3) condition check is not needed, since
* max uv trans size is 16x16
*/
offset = log2_uv_trans_size_minus_2 * 6
+ (!intra_flag) * 3 + c_idx;
pi2_dequant_matrix = pi2_scaling_mat
+ scaling_mat_offset[offset];
pi2_dequant_matrix_v = pi2_scaling_mat
+ scaling_mat_offset[offset + 1];
src_strd = trans_size;
func_idx = 1 + 4 + log2_uv_trans_size_minus_2; /* DST func + Y funcs + cur func index*/
/* Handle error cases where 64x64 TU is signalled which results in 32x32 chroma.
* By limiting func_idx to 7, max of 16x16 chroma is called */
func_idx = MIN(func_idx, 7);
e_trans_type = (TRANSFORM_TYPE)(log2_uv_trans_size_minus_2 + 1);
/* QP for U */
i1_chroma_pic_qp_offset = ps_pps->i1_pic_cb_qp_offset;
i1_chroma_slice_qp_offset = ps_slice_hdr->i1_slice_cb_qp_offset;
u1_cbf = ps_tu->b1_cb_cbf;
chroma_qp_idx = ps_tu->b7_qp + i1_chroma_pic_qp_offset
+ i1_chroma_slice_qp_offset;
chroma_qp_idx = CLIP3(chroma_qp_idx, 0, 57);
qp_div = gai2_ihevcd_chroma_qp[chroma_qp_idx] / 6;
qp_rem = gai2_ihevcd_chroma_qp[chroma_qp_idx] % 6;
/* QP for V */
i1_chroma_pic_qp_offset = ps_pps->i1_pic_cr_qp_offset;
i1_chroma_slice_qp_offset = ps_slice_hdr->i1_slice_cr_qp_offset;
u1_cbf_v = ps_tu->b1_cr_cbf;
chroma_qp_idx = ps_tu->b7_qp + i1_chroma_pic_qp_offset
+ i1_chroma_slice_qp_offset;
chroma_qp_idx = CLIP3(chroma_qp_idx, 0, 57);
qp_div_v = gai2_ihevcd_chroma_qp[chroma_qp_idx] / 6;
qp_rem_v = gai2_ihevcd_chroma_qp[chroma_qp_idx] % 6;
/* Unpacking coeffs */
transform_skip_flag = pu1_tu_coeff_data[1] & 1;
if(1 == u1_cbf)
{
pu1_tu_coeff_data = ihevcd_unpack_coeffs(
pi2_src, log2_uv_trans_size_minus_2 + 2,
pu1_tu_coeff_data, pi2_dequant_matrix,
qp_rem, qp_div, e_trans_type,
ps_tu->b1_transquant_bypass, &zero_cols,
&zero_rows, &coeff_type,
&i2_coeff_value);
}
transform_skip_flag_v = pu1_tu_coeff_data[1] & 1;
if(1 == u1_cbf_v)
{
pu1_tu_coeff_data = ihevcd_unpack_coeffs(
pi2_src_v, log2_uv_trans_size_minus_2 + 2,
pu1_tu_coeff_data, pi2_dequant_matrix_v,
qp_rem_v, qp_div_v, e_trans_type,
ps_tu->b1_transquant_bypass, &zero_cols_v,
&zero_rows_v, &coeff_type_v, &i2_coeff_value_v);
}
}
/***************************************************************/
/****************** Intra Prediction **************************/
/***************************************************************/
if(intra_flag) /* Intra */
{
/* While (MAX_TU_SIZE * 2 * 2) + 1 is the actaul size needed,
au1_ref_sub_out size is kept as multiple of 8,
so that SIMD functions can load 64 bits */
UWORD8 au1_ref_sub_out[(MAX_TU_SIZE * 2 * 2) + 8] = {0};
UWORD8 *pu1_top_left, *pu1_top, *pu1_left;
WORD32 luma_pred_func_idx, chroma_pred_func_idx;
/* Get the neighbour availability flags */
/* Done for only Y */
if(c_idx == 0)
{
/* Get neighbor availability for Y only */
luma_nbr_flags = ihevcd_get_intra_nbr_flag(ps_proc,
ps_tu,
au4_intra_nbr_avail,
ps_sps->i2_pic_width_in_luma_samples,
ps_pps->i1_constrained_intra_pred_flag,
trans_size,
ctb_size);
if(trans_size == 4)
luma_nbr_flags_4x4[(ps_tu->b4_pos_x % 2) + (ps_tu->b4_pos_y % 2) * 2] = luma_nbr_flags;
if((ps_tu->b4_pos_x % 2 == 0) && (ps_tu->b4_pos_y % 2 == 0))
{
chroma_nbr_flags = luma_nbr_flags;
}
/* Initializing nbr pointers */
pu1_top = pu1_pred - pic_strd;
pu1_left = pu1_pred - 1;
pu1_top_left = pu1_pred - pic_strd - 1;
/* call reference array substitution */
if(luma_nbr_flags == 0x1ffff)
ps_codec->s_func_selector.ihevc_intra_pred_luma_ref_subst_all_avlble_fptr(
pu1_top_left,
pu1_top, pu1_left, pred_strd, trans_size, luma_nbr_flags, au1_ref_sub_out, 1);
else
ps_codec->s_func_selector.ihevc_intra_pred_luma_ref_substitution_fptr(
pu1_top_left,
pu1_top, pu1_left, pred_strd, trans_size, luma_nbr_flags, au1_ref_sub_out, 1);
/* call reference filtering */
ps_codec->s_func_selector.ihevc_intra_pred_ref_filtering_fptr(
au1_ref_sub_out, trans_size,
au1_ref_sub_out,
u1_luma_pred_mode, ps_sps->i1_strong_intra_smoothing_enable_flag);
/* use the look up to get the function idx */
luma_pred_func_idx = g_i4_ip_funcs[u1_luma_pred_mode];
/* call the intra prediction function */
ps_codec->apf_intra_pred_luma[luma_pred_func_idx](au1_ref_sub_out, 1, pu1_pred, pred_strd, trans_size, u1_luma_pred_mode);
}
else
{
/* In case of yuv420sp_vu, prediction happens as usual. */
/* So point the pu1_pred pointer to original prediction pointer */
UWORD8 *pu1_pred_orig = pu1_pred - chroma_yuv420sp_vu_u_offset;
/* Top-Left | Top-Right | Top | Left | Bottom-Left
* 1 4 4 4 4
*
* Generating chroma_nbr_flags depending upon the transform size */
if(ps_tu->b3_size == 0)
{
/* Take TL,T,L flags of First luma 4x4 block */
chroma_nbr_flags = (luma_nbr_flags_4x4[0] & 0x10FF0);
/* Take TR flags of Second luma 4x4 block */
chroma_nbr_flags |= (luma_nbr_flags_4x4[1] & 0x0F000);
/* Take BL flags of Third luma 4x4 block */
chroma_nbr_flags |= (luma_nbr_flags_4x4[2] & 0x0000F);
}
/* Initializing nbr pointers */
pu1_top = pu1_pred_orig - pic_strd;
pu1_left = pu1_pred_orig - 2;
pu1_top_left = pu1_pred_orig - pic_strd - 2;
/* Chroma pred mode derivation from luma pred mode */
{
tu_t *ps_tu_tmp = ps_tu;
while(!ps_tu_tmp->b1_first_tu_in_cu)
{
ps_tu_tmp--;
}
u1_luma_pred_mode_first_tu = ps_tu_tmp->b6_luma_intra_mode;
}
if(4 == u1_chroma_pred_mode)
u1_chroma_pred_mode = u1_luma_pred_mode_first_tu;
else
{
u1_chroma_pred_mode = gau1_intra_pred_chroma_modes[u1_chroma_pred_mode];
if(u1_chroma_pred_mode ==
u1_luma_pred_mode_first_tu)
{
u1_chroma_pred_mode = INTRA_ANGULAR(34);
}
}
/* call the chroma reference array substitution */
ps_codec->s_func_selector.ihevc_intra_pred_chroma_ref_substitution_fptr(
pu1_top_left,
pu1_top, pu1_left, pic_strd, trans_size, chroma_nbr_flags, au1_ref_sub_out, 1);
/* use the look up to get the function idx */
chroma_pred_func_idx =
g_i4_ip_funcs[u1_chroma_pred_mode];
/* call the intra prediction function */
ps_codec->apf_intra_pred_chroma[chroma_pred_func_idx](au1_ref_sub_out, 1, pu1_pred_orig, pred_strd, trans_size, u1_chroma_pred_mode);
}
}
/* Updating number of transform types */
STATS_UPDATE_ALL_TRANS(e_trans_type, c_idx);
/* IQ, IT and Recon for Y if c_idx == 0, and U if c_idx !=0 */
if(1 == u1_cbf)
{
if(ps_tu->b1_transquant_bypass || transform_skip_flag)
{
/* Recon */
ps_codec->apf_recon[func_idx](pi2_src, pu1_pred, pu1_dst,
src_strd, pred_strd, dst_strd,
zero_cols);
}
else
{
/* Updating coded number of transform types(excluding trans skip and trans quant skip) */
STATS_UPDATE_CODED_TRANS(e_trans_type, c_idx, 0);
/* iQuant , iTrans and Recon */
if((0 == coeff_type))
{
ps_codec->apf_itrans_recon[func_idx](pi2_src, pi2_tmp,
pu1_pred, pu1_dst,
src_strd, pred_strd,
dst_strd, zero_cols,
zero_rows);
}
else /* DC only */
{
STATS_UPDATE_CODED_TRANS(e_trans_type, c_idx, 1);
ps_codec->apf_itrans_recon_dc[c_idx](pu1_pred, pu1_dst,
pred_strd, dst_strd,
log2_trans_size,
i2_coeff_value);
}
}
}
/* IQ, IT and Recon for V */
if(c_idx != 0)
{
if(1 == u1_cbf_v)
{
if(ps_tu->b1_transquant_bypass || transform_skip_flag_v)
{
/* Recon */
ps_codec->apf_recon[func_idx](pi2_src_v, pu1_pred_v,
pu1_dst_v, src_strd,
pred_strd, dst_strd,
zero_cols_v);
}
else
{
/* Updating number of transform types */
STATS_UPDATE_CODED_TRANS(e_trans_type, c_idx, 0);
/* iQuant , iTrans and Recon */
if((0 == coeff_type_v))
{
ps_codec->apf_itrans_recon[func_idx](pi2_src_v,
pi2_tmp,
pu1_pred_v,
pu1_dst_v,
src_strd,
pred_strd,
dst_strd,
zero_cols_v,
zero_rows_v);
}
else /* DC only */
{
STATS_UPDATE_CODED_TRANS(e_trans_type, c_idx, 1);
ps_codec->apf_itrans_recon_dc[c_idx](pu1_pred_v, pu1_dst_v,
pred_strd, dst_strd,
log2_trans_size,
i2_coeff_value_v);
}
}
}
}
}
/* Neighbor availability inside CTB */
/* 1bit per 4x4. Indicates whether that 4x4 block has been reconstructed(avialable) */
/* Used for neighbor availability in intra pred */
if(c_idx == 0)
{
WORD32 i;
WORD32 trans_in_min_tu;
UWORD32 cur_tu_in_bits;
UWORD32 cur_tu_avail_flag;
trans_in_min_tu = trans_size / MIN_TU_SIZE;
cur_tu_in_bits = (1 << trans_in_min_tu) - 1;
cur_tu_in_bits = cur_tu_in_bits << (32 - trans_in_min_tu);
cur_tu_avail_flag = cur_tu_in_bits >> (ps_tu->b4_pos_x + 1);
for(i = 0; i < trans_in_min_tu; i++)
au4_intra_nbr_avail[1 + ps_tu->b4_pos_y + i] |=
cur_tu_avail_flag;
}
}
}
ps_proc->pv_tu_coeff_data = pu1_tu_coeff_data;
return ps_proc->i4_ctb_tu_cnt;
}