You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
794 lines
32 KiB
794 lines
32 KiB
/******************************************************************************
|
|
*
|
|
* Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at:
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*
|
|
******************************************************************************/
|
|
/**
|
|
*******************************************************************************
|
|
* @file
|
|
* ihevc_deblk.c
|
|
*
|
|
* @brief
|
|
* Contains definition for the ctb level deblk function
|
|
*
|
|
* @author
|
|
* Srinivas T
|
|
*
|
|
* @par List of Functions:
|
|
* - ihevc_deblk()
|
|
*
|
|
* @remarks
|
|
* None
|
|
*
|
|
*******************************************************************************
|
|
*/
|
|
|
|
#include <stdio.h>
|
|
#include <stddef.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
#include <assert.h>
|
|
|
|
#include "ihevc_typedefs.h"
|
|
#include "iv.h"
|
|
#include "ivd.h"
|
|
#include "ihevcd_cxa.h"
|
|
#include "ithread.h"
|
|
|
|
#include "ihevc_defs.h"
|
|
#include "ihevc_debug.h"
|
|
#include "ihevc_defs.h"
|
|
#include "ihevc_structs.h"
|
|
#include "ihevc_macros.h"
|
|
#include "ihevc_platform_macros.h"
|
|
#include "ihevc_cabac_tables.h"
|
|
|
|
#include "ihevc_error.h"
|
|
#include "ihevc_common_tables.h"
|
|
|
|
#include "ihevcd_trace.h"
|
|
#include "ihevcd_defs.h"
|
|
#include "ihevcd_function_selector.h"
|
|
#include "ihevcd_structs.h"
|
|
#include "ihevcd_error.h"
|
|
#include "ihevcd_nal.h"
|
|
#include "ihevcd_bitstream.h"
|
|
#include "ihevcd_job_queue.h"
|
|
#include "ihevcd_utils.h"
|
|
#include "ihevcd_debug.h"
|
|
|
|
#include "ihevc_deblk.h"
|
|
#include "ihevc_deblk_tables.h"
|
|
#include "ihevcd_profile.h"
|
|
/**
|
|
*******************************************************************************
|
|
*
|
|
* @brief
|
|
* Deblock CTB level function.
|
|
*
|
|
* @par Description:
|
|
* For a given CTB, deblocking on both vertical and
|
|
* horizontal edges is done. Both the luma and chroma
|
|
* blocks are processed
|
|
*
|
|
* @param[in] ps_deblk
|
|
* Pointer to the deblock context
|
|
*
|
|
* @returns
|
|
*
|
|
* @remarks
|
|
* None
|
|
*
|
|
*******************************************************************************
|
|
*/
|
|
|
|
void ihevcd_deblk_ctb(deblk_ctxt_t *ps_deblk,
|
|
WORD32 i4_is_last_ctb_x,
|
|
WORD32 i4_is_last_ctb_y)
|
|
{
|
|
WORD32 ctb_size;
|
|
WORD32 log2_ctb_size;
|
|
UWORD32 u4_bs;
|
|
WORD32 bs_tz; /*Leading zeros in boundary strength*/
|
|
WORD32 qp_p, qp_q;
|
|
|
|
WORD32 filter_p, filter_q;
|
|
|
|
UWORD8 *pu1_src;
|
|
WORD32 qp_strd;
|
|
UWORD32 *pu4_vert_bs, *pu4_horz_bs;
|
|
UWORD32 *pu4_ctb_vert_bs, *pu4_ctb_horz_bs;
|
|
WORD32 bs_strd;
|
|
WORD32 src_strd;
|
|
UWORD8 *pu1_qp;
|
|
UWORD16 *pu2_ctb_no_loop_filter_flag;
|
|
UWORD16 au2_ctb_no_loop_filter_flag[9];
|
|
|
|
WORD32 col, row;
|
|
|
|
/* Flag to indicate if QP is constant in CTB
|
|
* 0 - top_left, 1 - top, 2 - left, 3 - current */
|
|
UWORD32 u4_qp_const_in_ctb[4] = { 0, 0, 0, 0 };
|
|
WORD32 ctb_indx;
|
|
WORD32 chroma_yuv420sp_vu = ps_deblk->is_chroma_yuv420sp_vu;
|
|
sps_t *ps_sps;
|
|
pps_t *ps_pps;
|
|
codec_t *ps_codec;
|
|
slice_header_t *ps_slice_hdr;
|
|
|
|
PROFILE_DISABLE_DEBLK();
|
|
|
|
ps_sps = ps_deblk->ps_sps;
|
|
ps_pps = ps_deblk->ps_pps;
|
|
ps_codec = ps_deblk->ps_codec;
|
|
ps_slice_hdr = ps_deblk->ps_slice_hdr;
|
|
|
|
log2_ctb_size = ps_sps->i1_log2_ctb_size;
|
|
ctb_size = (1 << ps_sps->i1_log2_ctb_size);
|
|
|
|
/* strides are in units of number of bytes */
|
|
/* ctb_size * ctb_size / 8 / 16 is the number of bytes needed per CTB */
|
|
bs_strd = (ps_sps->i2_pic_wd_in_ctb + 1) << (2 * log2_ctb_size - 7);
|
|
|
|
pu4_vert_bs = (UWORD32 *)((UWORD8 *)ps_deblk->s_bs_ctxt.pu4_pic_vert_bs +
|
|
(ps_deblk->i4_ctb_x << (2 * log2_ctb_size - 7)) +
|
|
ps_deblk->i4_ctb_y * bs_strd);
|
|
pu4_ctb_vert_bs = pu4_vert_bs;
|
|
|
|
pu4_horz_bs = (UWORD32 *)((UWORD8 *)ps_deblk->s_bs_ctxt.pu4_pic_horz_bs +
|
|
(ps_deblk->i4_ctb_x << (2 * log2_ctb_size - 7)) +
|
|
ps_deblk->i4_ctb_y * bs_strd);
|
|
pu4_ctb_horz_bs = pu4_horz_bs;
|
|
|
|
qp_strd = ps_sps->i2_pic_wd_in_ctb << (log2_ctb_size - 3);
|
|
pu1_qp = ps_deblk->s_bs_ctxt.pu1_pic_qp + ((ps_deblk->i4_ctb_x + ps_deblk->i4_ctb_y * qp_strd) << (log2_ctb_size - 3));
|
|
|
|
pu2_ctb_no_loop_filter_flag = ps_deblk->au2_ctb_no_loop_filter_flag;
|
|
|
|
ctb_indx = ps_deblk->i4_ctb_x + ps_sps->i2_pic_wd_in_ctb * ps_deblk->i4_ctb_y;
|
|
if(i4_is_last_ctb_y)
|
|
{
|
|
pu4_vert_bs = (UWORD32 *)((UWORD8 *)pu4_vert_bs + bs_strd);
|
|
pu4_ctb_vert_bs = pu4_vert_bs;
|
|
/* ctb_size/8 is the number of edges per CTB
|
|
* ctb_size/4 is the number of BS values needed per edge
|
|
* divided by 8 for the number of bytes
|
|
* 2 is the number of bits needed for each BS value */
|
|
memset(pu4_vert_bs, 0, 1 << (2 * log2_ctb_size - 7));
|
|
|
|
pu1_qp += (qp_strd << (log2_ctb_size - 3));
|
|
pu2_ctb_no_loop_filter_flag += (ctb_size >> 3);
|
|
ctb_indx += ps_sps->i2_pic_wd_in_ctb;
|
|
}
|
|
|
|
if(i4_is_last_ctb_x)
|
|
{
|
|
pu4_horz_bs = (UWORD32 *)((UWORD8 *)pu4_horz_bs + (1 << (2 * log2_ctb_size - 7)));
|
|
pu4_ctb_horz_bs = pu4_horz_bs;
|
|
memset(pu4_horz_bs, 0, 1 << (2 * log2_ctb_size - 7));
|
|
|
|
pu1_qp += (ctb_size >> 3);
|
|
|
|
for(row = 0; row < (ctb_size >> 3) + 1; row++)
|
|
au2_ctb_no_loop_filter_flag[row] = ps_deblk->au2_ctb_no_loop_filter_flag[row] >> (ctb_size >> 3);
|
|
pu2_ctb_no_loop_filter_flag = au2_ctb_no_loop_filter_flag;
|
|
ctb_indx += 1;
|
|
}
|
|
|
|
u4_qp_const_in_ctb[3] = ps_deblk->s_bs_ctxt.pu1_pic_qp_const_in_ctb[(ctb_indx) >> 3] & (1 << (ctb_indx & 7));
|
|
|
|
if(ps_deblk->i4_ctb_x || i4_is_last_ctb_x)
|
|
{
|
|
u4_qp_const_in_ctb[2] = ps_deblk->s_bs_ctxt.pu1_pic_qp_const_in_ctb[(ctb_indx - 1) >> 3] & (1 << ((ctb_indx - 1) & 7));
|
|
}
|
|
|
|
if((ps_deblk->i4_ctb_x || i4_is_last_ctb_x) && (ps_deblk->i4_ctb_y || i4_is_last_ctb_y))
|
|
{
|
|
u4_qp_const_in_ctb[0] =
|
|
ps_deblk->s_bs_ctxt.pu1_pic_qp_const_in_ctb[(ctb_indx - ps_sps->i2_pic_wd_in_ctb - 1) >> 3] &
|
|
(1 << ((ctb_indx - ps_sps->i2_pic_wd_in_ctb - 1) & 7));
|
|
}
|
|
|
|
|
|
|
|
if(ps_deblk->i4_ctb_y || i4_is_last_ctb_y)
|
|
{
|
|
u4_qp_const_in_ctb[1] =
|
|
ps_deblk->s_bs_ctxt.pu1_pic_qp_const_in_ctb[(ctb_indx - ps_sps->i2_pic_wd_in_ctb) >> 3] &
|
|
(1 << ((ctb_indx - ps_sps->i2_pic_wd_in_ctb) & 7));
|
|
}
|
|
|
|
src_strd = ps_codec->i4_strd;
|
|
|
|
/* Luma Vertical Edge */
|
|
|
|
if(0 == i4_is_last_ctb_x)
|
|
{
|
|
/* Top CTB's slice header */
|
|
slice_header_t *ps_slice_hdr_top;
|
|
{
|
|
WORD32 cur_ctb_indx = ps_deblk->i4_ctb_x + ps_deblk->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb;
|
|
if(i4_is_last_ctb_y)
|
|
cur_ctb_indx += ps_sps->i2_pic_wd_in_ctb;
|
|
ps_slice_hdr_top = ps_codec->ps_slice_hdr_base + ps_deblk->pu1_slice_idx[cur_ctb_indx - ps_sps->i2_pic_wd_in_ctb];
|
|
}
|
|
|
|
pu1_src = ps_deblk->pu1_cur_pic_luma + ((ps_deblk->i4_ctb_x + ps_deblk->i4_ctb_y * ps_deblk->ps_codec->i4_strd) << (log2_ctb_size));
|
|
pu1_src += i4_is_last_ctb_y ? ps_deblk->ps_codec->i4_strd << log2_ctb_size : 0;
|
|
|
|
/** Deblocking is done on a shifted CTB -
|
|
* Vertical edge processing is done by shifting the CTB up by four pixels */
|
|
pu1_src -= 4 * src_strd;
|
|
|
|
for(col = 0; col < ctb_size / 8; col++)
|
|
{
|
|
WORD32 shift = 0;
|
|
|
|
/* downshift vert_bs by ctb_size/2 for each column
|
|
* shift = (col & ((MAX_CTB_SIZE >> log2_ctb_size) - 1)) << (log2_ctb_size - 1);
|
|
* which will reduce to the following assuming ctb size is one of 16, 32 and 64
|
|
* and deblocking is done on 8x8 grid
|
|
*/
|
|
if(6 != log2_ctb_size)
|
|
shift = (col & 1) << (log2_ctb_size - 1);
|
|
|
|
/* BS for the column - Last row is excluded and the top row is included*/
|
|
u4_bs = (pu4_vert_bs[0] >> shift) << 2;
|
|
|
|
if(ps_deblk->i4_ctb_y || i4_is_last_ctb_y)
|
|
{
|
|
/* Picking the last BS of the previous CTB corresponding to the same column */
|
|
UWORD32 *pu4_vert_bs_top = (UWORD32 *)((UWORD8 *)pu4_vert_bs - bs_strd);
|
|
UWORD32 u4_top_bs = (*pu4_vert_bs_top) >> (shift + (1 << (log2_ctb_size - 1)) - 2);
|
|
u4_bs |= u4_top_bs & 3;
|
|
}
|
|
|
|
for(row = 0; row < ctb_size / 4;)
|
|
{
|
|
WORD8 i1_beta_offset_div2 = ps_slice_hdr->i1_beta_offset_div2;
|
|
WORD8 i1_tc_offset_div2 = ps_slice_hdr->i1_tc_offset_div2;
|
|
|
|
/* Trailing zeros are computed and the corresponding rows are not processed */
|
|
bs_tz = CTZ(u4_bs) >> 1;
|
|
if(0 != bs_tz)
|
|
{
|
|
u4_bs = u4_bs >> (bs_tz << 1);
|
|
if((row + bs_tz) >= (ctb_size / 4))
|
|
pu1_src += 4 * (ctb_size / 4 - row) * src_strd;
|
|
else
|
|
pu1_src += 4 * bs_tz * src_strd;
|
|
|
|
row += bs_tz;
|
|
continue;
|
|
}
|
|
|
|
if(0 == row)
|
|
{
|
|
i1_beta_offset_div2 = ps_slice_hdr_top->i1_beta_offset_div2;
|
|
i1_tc_offset_div2 = ps_slice_hdr_top->i1_tc_offset_div2;
|
|
|
|
if(0 == col)
|
|
{
|
|
qp_p = u4_qp_const_in_ctb[0] ?
|
|
pu1_qp[-ctb_size / 8 * qp_strd - ctb_size / 8] :
|
|
pu1_qp[-qp_strd - 1];
|
|
}
|
|
else
|
|
{
|
|
qp_p = u4_qp_const_in_ctb[1] ?
|
|
pu1_qp[-ctb_size / 8 * qp_strd] :
|
|
pu1_qp[col - 1 - qp_strd];
|
|
}
|
|
|
|
qp_q = u4_qp_const_in_ctb[1] ?
|
|
pu1_qp[-ctb_size / 8 * qp_strd] :
|
|
pu1_qp[col - qp_strd];
|
|
}
|
|
else
|
|
{
|
|
if(0 == col)
|
|
{
|
|
qp_p = u4_qp_const_in_ctb[2] ?
|
|
pu1_qp[-ctb_size / 8] :
|
|
pu1_qp[((row - 1) >> 1) * qp_strd - 1];
|
|
}
|
|
else
|
|
{
|
|
qp_p = u4_qp_const_in_ctb[3] ?
|
|
pu1_qp[0] :
|
|
pu1_qp[((row - 1) >> 1) * qp_strd + col - 1];
|
|
}
|
|
|
|
qp_q = u4_qp_const_in_ctb[3] ?
|
|
pu1_qp[0] :
|
|
pu1_qp[((row - 1) >> 1) * qp_strd + col];
|
|
}
|
|
|
|
filter_p = (pu2_ctb_no_loop_filter_flag[(row + 1) >> 1] >> col) & 1;
|
|
filter_q = (pu2_ctb_no_loop_filter_flag[(row + 1) >> 1] >> col) & 2;
|
|
/* filter_p and filter_q are inverted as they are calculated using no_loop_filter_flags */
|
|
filter_p = !filter_p;
|
|
filter_q = !filter_q;
|
|
|
|
if(filter_p || filter_q)
|
|
{
|
|
DUMP_DEBLK_LUMA_VERT(pu1_src, src_strd,
|
|
u4_bs & 3, qp_p, qp_q,
|
|
ps_slice_hdr->i1_beta_offset_div2,
|
|
ps_slice_hdr->i1_tc_offset_div2,
|
|
filter_p, filter_q);
|
|
ps_codec->s_func_selector.ihevc_deblk_luma_vert_fptr(pu1_src, src_strd,
|
|
u4_bs & 3, qp_p, qp_q,
|
|
i1_beta_offset_div2,
|
|
i1_tc_offset_div2,
|
|
filter_p, filter_q);
|
|
}
|
|
|
|
pu1_src += 4 * src_strd;
|
|
u4_bs = u4_bs >> 2;
|
|
row++;
|
|
}
|
|
|
|
if((64 == ctb_size) ||
|
|
((32 == ctb_size) && (col & 1)))
|
|
{
|
|
pu4_vert_bs++;
|
|
}
|
|
pu1_src -= (src_strd << log2_ctb_size);
|
|
pu1_src += 8;
|
|
}
|
|
pu4_vert_bs = pu4_ctb_vert_bs;
|
|
}
|
|
|
|
|
|
/* Luma Horizontal Edge */
|
|
|
|
if(0 == i4_is_last_ctb_y)
|
|
{
|
|
|
|
/* Left CTB's slice header */
|
|
slice_header_t *ps_slice_hdr_left;
|
|
{
|
|
WORD32 cur_ctb_indx = ps_deblk->i4_ctb_x + ps_deblk->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb;
|
|
if(i4_is_last_ctb_x)
|
|
cur_ctb_indx += 1;
|
|
ps_slice_hdr_left = ps_codec->ps_slice_hdr_base + ps_deblk->pu1_slice_idx[cur_ctb_indx - 1];
|
|
}
|
|
pu1_src = ps_deblk->pu1_cur_pic_luma + ((ps_deblk->i4_ctb_x + ps_deblk->i4_ctb_y * ps_deblk->ps_codec->i4_strd) << log2_ctb_size);
|
|
pu1_src += i4_is_last_ctb_x ? ctb_size : 0;
|
|
|
|
/** Deblocking is done on a shifted CTB -
|
|
* Horizontal edge processing is done by shifting the CTB left by four pixels */
|
|
pu1_src -= 4;
|
|
for(row = 0; row < ctb_size / 8; row++)
|
|
{
|
|
WORD32 shift = 0;
|
|
|
|
/* downshift vert_bs by ctb_size/2 for each column
|
|
* shift = (row & (MAX_CTB_SIZE / ctb_size - 1)) * ctb_size / 2;
|
|
* which will reduce to the following assuming ctb size is one of 16, 32 and 64
|
|
* and deblocking is done on 8x8 grid
|
|
*/
|
|
if(6 != log2_ctb_size)
|
|
shift = (row & 1) << (log2_ctb_size - 1);
|
|
|
|
/* BS for the row - Last column is excluded and the left column is included*/
|
|
u4_bs = (pu4_horz_bs[0] >> shift) << 2;
|
|
|
|
if(ps_deblk->i4_ctb_x || i4_is_last_ctb_x)
|
|
{
|
|
/** Picking the last BS of the previous CTB corresponding to the same row
|
|
* UWORD32 *pu4_horz_bs_left = (UWORD32 *)((UWORD8 *)pu4_horz_bs - (ctb_size / 8) * (ctb_size / 4) / 8 * 2);
|
|
*/
|
|
UWORD32 *pu4_horz_bs_left = (UWORD32 *)((UWORD8 *)pu4_horz_bs - (1 << (2 * log2_ctb_size - 7)));
|
|
UWORD32 u4_left_bs = (*pu4_horz_bs_left) >> (shift + (1 << (log2_ctb_size - 1)) - 2);
|
|
u4_bs |= u4_left_bs & 3;
|
|
}
|
|
|
|
for(col = 0; col < ctb_size / 4;)
|
|
{
|
|
WORD8 i1_beta_offset_div2 = ps_slice_hdr->i1_beta_offset_div2;
|
|
WORD8 i1_tc_offset_div2 = ps_slice_hdr->i1_tc_offset_div2;
|
|
|
|
bs_tz = CTZ(u4_bs) >> 1;
|
|
if(0 != bs_tz)
|
|
{
|
|
u4_bs = u4_bs >> (bs_tz << 1);
|
|
|
|
if((col + bs_tz) >= (ctb_size / 4))
|
|
pu1_src += 4 * (ctb_size / 4 - col);
|
|
else
|
|
pu1_src += 4 * bs_tz;
|
|
|
|
col += bs_tz;
|
|
continue;
|
|
}
|
|
|
|
if(0 == col)
|
|
{
|
|
i1_beta_offset_div2 = ps_slice_hdr_left->i1_beta_offset_div2;
|
|
i1_tc_offset_div2 = ps_slice_hdr_left->i1_tc_offset_div2;
|
|
|
|
if(0 == row)
|
|
{
|
|
qp_p = u4_qp_const_in_ctb[0] ?
|
|
pu1_qp[-ctb_size / 8 * qp_strd - ctb_size / 8] :
|
|
pu1_qp[-qp_strd - 1];
|
|
}
|
|
else
|
|
{
|
|
qp_p = u4_qp_const_in_ctb[2] ?
|
|
pu1_qp[-ctb_size / 8] :
|
|
pu1_qp[(row - 1) * qp_strd - 1];
|
|
}
|
|
|
|
qp_q = u4_qp_const_in_ctb[2] ?
|
|
pu1_qp[-ctb_size / 8] :
|
|
pu1_qp[row * qp_strd - 1];
|
|
}
|
|
else
|
|
{
|
|
if(0 == row)
|
|
{
|
|
qp_p = u4_qp_const_in_ctb[1] ?
|
|
pu1_qp[-ctb_size / 8 * qp_strd] :
|
|
pu1_qp[((col - 1) >> 1) - qp_strd];
|
|
}
|
|
else
|
|
{
|
|
qp_p = u4_qp_const_in_ctb[3] ?
|
|
pu1_qp[0] :
|
|
pu1_qp[((col - 1) >> 1) + (row - 1) * qp_strd];
|
|
}
|
|
|
|
qp_q = u4_qp_const_in_ctb[3] ?
|
|
pu1_qp[0] :
|
|
pu1_qp[((col - 1) >> 1) + row * qp_strd];
|
|
}
|
|
|
|
filter_p = (pu2_ctb_no_loop_filter_flag[row] >> ((col + 1) >> 1)) & 1;
|
|
filter_q = (pu2_ctb_no_loop_filter_flag[row + 1] >> ((col + 1) >> 1)) & 1;
|
|
/* filter_p and filter_q are inverted as they are calculated using no_loop_filter_flags */
|
|
filter_p = !filter_p;
|
|
filter_q = !filter_q;
|
|
|
|
if(filter_p || filter_q)
|
|
{
|
|
DUMP_DEBLK_LUMA_HORZ(pu1_src, src_strd,
|
|
u4_bs & 3, qp_p, qp_q,
|
|
ps_slice_hdr->i1_beta_offset_div2,
|
|
ps_slice_hdr->i1_tc_offset_div2,
|
|
filter_p, filter_q);
|
|
ps_codec->s_func_selector.ihevc_deblk_luma_horz_fptr(pu1_src, src_strd,
|
|
u4_bs & 3, qp_p, qp_q,
|
|
i1_beta_offset_div2,
|
|
i1_tc_offset_div2, filter_p, filter_q);
|
|
}
|
|
|
|
pu1_src += 4;
|
|
u4_bs = u4_bs >> 2;
|
|
col++;
|
|
}
|
|
|
|
if((64 == ctb_size) ||
|
|
((32 == ctb_size) && (row & 1)))
|
|
{
|
|
pu4_horz_bs++;
|
|
}
|
|
pu1_src -= ctb_size;
|
|
pu1_src += (src_strd << 3);
|
|
}
|
|
pu4_horz_bs = pu4_ctb_horz_bs;
|
|
}
|
|
|
|
|
|
/* Chroma Veritcal Edge */
|
|
|
|
if(0 == i4_is_last_ctb_x)
|
|
{
|
|
|
|
/* Top CTB's slice header */
|
|
slice_header_t *ps_slice_hdr_top;
|
|
{
|
|
WORD32 cur_ctb_indx = ps_deblk->i4_ctb_x + ps_deblk->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb;
|
|
if(i4_is_last_ctb_y)
|
|
cur_ctb_indx += ps_sps->i2_pic_wd_in_ctb;
|
|
ps_slice_hdr_top = ps_codec->ps_slice_hdr_base + ps_deblk->pu1_slice_idx[cur_ctb_indx - ps_sps->i2_pic_wd_in_ctb];
|
|
}
|
|
|
|
pu1_src = ps_deblk->pu1_cur_pic_chroma + ((ps_deblk->i4_ctb_x + ps_deblk->i4_ctb_y * ps_deblk->ps_codec->i4_strd / 2) << log2_ctb_size);
|
|
pu1_src += i4_is_last_ctb_y ? (ps_deblk->ps_codec->i4_strd / 2) << log2_ctb_size : 0;
|
|
|
|
/** Deblocking is done on a shifted CTB -
|
|
* Vertical edge processing is done by shifting the CTB up by four pixels */
|
|
pu1_src -= 4 * src_strd;
|
|
|
|
for(col = 0; col < ctb_size / 16; col++)
|
|
{
|
|
|
|
/* BS for the column - Last row is excluded and the top row is included*/
|
|
u4_bs = pu4_vert_bs[0] << 2;
|
|
|
|
if(ps_deblk->i4_ctb_y || i4_is_last_ctb_y)
|
|
{
|
|
/* Picking the last BS of the previous CTB corresponding to the same column */
|
|
UWORD32 *pu4_vert_bs_top = (UWORD32 *)((UWORD8 *)pu4_vert_bs - bs_strd);
|
|
UWORD32 u4_top_bs = (*pu4_vert_bs_top) >> ((1 << (log2_ctb_size - 1)) - 2);
|
|
u4_bs |= u4_top_bs & 3;
|
|
}
|
|
|
|
/* Every alternate boundary strength value is used for chroma */
|
|
u4_bs &= 0x22222222;
|
|
|
|
for(row = 0; row < ctb_size / 8;)
|
|
{
|
|
WORD8 i1_tc_offset_div2 = ps_slice_hdr->i1_tc_offset_div2;
|
|
|
|
bs_tz = CTZ(u4_bs) >> 2;
|
|
if(0 != bs_tz)
|
|
{
|
|
if((row + bs_tz) >= (ctb_size / 8))
|
|
pu1_src += 4 * (ctb_size / 8 - row) * src_strd;
|
|
else
|
|
pu1_src += 4 * bs_tz * src_strd;
|
|
row += bs_tz;
|
|
u4_bs = u4_bs >> (bs_tz << 2);
|
|
continue;
|
|
}
|
|
|
|
if(0 == row)
|
|
{
|
|
i1_tc_offset_div2 = ps_slice_hdr_top->i1_tc_offset_div2;
|
|
|
|
if(0 == col)
|
|
{
|
|
qp_p = u4_qp_const_in_ctb[0] ?
|
|
pu1_qp[-ctb_size / 8 * qp_strd - ctb_size / 8] :
|
|
pu1_qp[-qp_strd - 1];
|
|
}
|
|
else
|
|
{
|
|
qp_p = u4_qp_const_in_ctb[1] ?
|
|
pu1_qp[-ctb_size / 8 * qp_strd] :
|
|
pu1_qp[2 * col - 1 - qp_strd];
|
|
}
|
|
|
|
qp_q = u4_qp_const_in_ctb[1] ?
|
|
pu1_qp[-ctb_size / 8 * qp_strd] :
|
|
pu1_qp[2 * col - qp_strd];
|
|
}
|
|
else
|
|
{
|
|
if(0 == col)
|
|
{
|
|
qp_p = u4_qp_const_in_ctb[2] ?
|
|
pu1_qp[-ctb_size / 8] :
|
|
pu1_qp[(row - 1) * qp_strd - 1];
|
|
}
|
|
else
|
|
{
|
|
qp_p = u4_qp_const_in_ctb[3] ?
|
|
pu1_qp[0] :
|
|
pu1_qp[(row - 1) * qp_strd + 2 * col - 1];
|
|
}
|
|
|
|
qp_q = u4_qp_const_in_ctb[3] ?
|
|
pu1_qp[0] :
|
|
pu1_qp[(row - 1) * qp_strd + 2 * col];
|
|
}
|
|
|
|
filter_p = (pu2_ctb_no_loop_filter_flag[row] >> (col << 1)) & 1;
|
|
filter_q = (pu2_ctb_no_loop_filter_flag[row] >> (col << 1)) & 2;
|
|
/* filter_p and filter_q are inverted as they are calculated using no_loop_filter_flags */
|
|
filter_p = !filter_p;
|
|
filter_q = !filter_q;
|
|
|
|
if(filter_p || filter_q)
|
|
{
|
|
ASSERT(1 == ((u4_bs & 3) >> 1));
|
|
DUMP_DEBLK_CHROMA_VERT(pu1_src, src_strd,
|
|
u4_bs & 3, qp_p, qp_q,
|
|
ps_pps->i1_pic_cb_qp_offset,
|
|
ps_pps->i1_pic_cr_qp_offset,
|
|
ps_slice_hdr->i1_tc_offset_div2,
|
|
filter_p, filter_q);
|
|
if(chroma_yuv420sp_vu)
|
|
{
|
|
ps_codec->s_func_selector.ihevc_deblk_chroma_vert_fptr(pu1_src,
|
|
src_strd,
|
|
qp_q,
|
|
qp_p,
|
|
ps_pps->i1_pic_cr_qp_offset,
|
|
ps_pps->i1_pic_cb_qp_offset,
|
|
i1_tc_offset_div2,
|
|
filter_q,
|
|
filter_p);
|
|
}
|
|
else
|
|
{
|
|
ps_codec->s_func_selector.ihevc_deblk_chroma_vert_fptr(pu1_src,
|
|
src_strd,
|
|
qp_p,
|
|
qp_q,
|
|
ps_pps->i1_pic_cb_qp_offset,
|
|
ps_pps->i1_pic_cr_qp_offset,
|
|
i1_tc_offset_div2,
|
|
filter_p,
|
|
filter_q);
|
|
}
|
|
}
|
|
|
|
pu1_src += 4 * src_strd;
|
|
u4_bs = u4_bs >> 4;
|
|
row++;
|
|
}
|
|
|
|
pu4_vert_bs += (64 == ctb_size) ? 2 : 1;
|
|
pu1_src -= ((src_strd / 2) << log2_ctb_size);
|
|
pu1_src += 16;
|
|
}
|
|
}
|
|
|
|
/* Chroma Horizontal Edge */
|
|
|
|
if(0 == i4_is_last_ctb_y)
|
|
{
|
|
|
|
/* Left CTB's slice header */
|
|
slice_header_t *ps_slice_hdr_left;
|
|
{
|
|
WORD32 cur_ctb_indx = ps_deblk->i4_ctb_x + ps_deblk->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb;
|
|
if(i4_is_last_ctb_x)
|
|
cur_ctb_indx += 1;
|
|
ps_slice_hdr_left = ps_codec->ps_slice_hdr_base + ps_deblk->pu1_slice_idx[cur_ctb_indx - 1];
|
|
}
|
|
|
|
pu1_src = ps_deblk->pu1_cur_pic_chroma + ((ps_deblk->i4_ctb_x + ps_deblk->i4_ctb_y * ps_deblk->ps_codec->i4_strd / 2) << log2_ctb_size);
|
|
pu1_src += i4_is_last_ctb_x ? ctb_size : 0;
|
|
|
|
/** Deblocking is done on a shifted CTB -
|
|
* Vertical edge processing is done by shifting the CTB up by four pixels (8 here beacuse UV are interleaved) */
|
|
pu1_src -= 8;
|
|
for(row = 0; row < ctb_size / 16; row++)
|
|
{
|
|
/* BS for the row - Last column is excluded and the left column is included*/
|
|
u4_bs = pu4_horz_bs[0] << 2;
|
|
|
|
if(ps_deblk->i4_ctb_x || i4_is_last_ctb_x)
|
|
{
|
|
/** Picking the last BS of the previous CTB corresponding to the same row
|
|
* UWORD32 *pu4_horz_bs_left = (UWORD32 *)((UWORD8 *)pu4_horz_bs - (ctb_size / 8) * (ctb_size / 4) / 8 * 2);
|
|
*/
|
|
UWORD32 *pu4_horz_bs_left = (UWORD32 *)((UWORD8 *)pu4_horz_bs - (1 << (2 * log2_ctb_size - 7)));
|
|
UWORD32 u4_left_bs = (*pu4_horz_bs_left) >> ((1 << (log2_ctb_size - 1)) - 2);
|
|
u4_bs |= u4_left_bs & 3;
|
|
}
|
|
|
|
/* Every alternate boundary strength value is used for chroma */
|
|
u4_bs &= 0x22222222;
|
|
|
|
for(col = 0; col < ctb_size / 8;)
|
|
{
|
|
WORD8 i1_tc_offset_div2 = ps_slice_hdr->i1_tc_offset_div2;
|
|
|
|
bs_tz = CTZ(u4_bs) >> 2;
|
|
if(0 != bs_tz)
|
|
{
|
|
u4_bs = u4_bs >> (bs_tz << 2);
|
|
|
|
if((col + bs_tz) >= (ctb_size / 8))
|
|
pu1_src += 8 * (ctb_size / 8 - col);
|
|
else
|
|
pu1_src += 8 * bs_tz;
|
|
|
|
col += bs_tz;
|
|
continue;
|
|
}
|
|
|
|
if(0 == col)
|
|
{
|
|
i1_tc_offset_div2 = ps_slice_hdr_left->i1_tc_offset_div2;
|
|
|
|
if(0 == row)
|
|
{
|
|
qp_p = u4_qp_const_in_ctb[0] ?
|
|
pu1_qp[-ctb_size / 8 * qp_strd - ctb_size / 8] :
|
|
pu1_qp[-qp_strd - 1];
|
|
}
|
|
else
|
|
{
|
|
qp_p = u4_qp_const_in_ctb[2] ?
|
|
pu1_qp[-ctb_size / 8] :
|
|
pu1_qp[(2 * row - 1) * qp_strd - 1];
|
|
}
|
|
|
|
qp_q = u4_qp_const_in_ctb[2] ?
|
|
pu1_qp[-ctb_size / 8] :
|
|
pu1_qp[(2 * row) * qp_strd - 1];
|
|
}
|
|
else
|
|
{
|
|
if(0 == row)
|
|
{
|
|
qp_p = u4_qp_const_in_ctb[1] ?
|
|
pu1_qp[-ctb_size / 8 * qp_strd] :
|
|
pu1_qp[col - 1 - qp_strd];
|
|
}
|
|
else
|
|
{
|
|
qp_p = u4_qp_const_in_ctb[3] ?
|
|
pu1_qp[0] :
|
|
pu1_qp[(col - 1) + (2 * row - 1) * qp_strd];
|
|
}
|
|
|
|
qp_q = u4_qp_const_in_ctb[3] ?
|
|
pu1_qp[0] :
|
|
pu1_qp[(col - 1) + 2 * row * qp_strd];
|
|
}
|
|
|
|
filter_p = (pu2_ctb_no_loop_filter_flag[row << 1] >> col) & 1;
|
|
filter_q = (pu2_ctb_no_loop_filter_flag[(row << 1) + 1] >> col) & 1;
|
|
/* filter_p and filter_q are inverted as they are calculated using no_loop_filter_flags */
|
|
filter_p = !filter_p;
|
|
filter_q = !filter_q;
|
|
|
|
if(filter_p || filter_q)
|
|
{
|
|
ASSERT(1 == ((u4_bs & 3) >> 1));
|
|
DUMP_DEBLK_CHROMA_HORZ(pu1_src, src_strd,
|
|
u4_bs & 3, qp_p, qp_q,
|
|
ps_pps->i1_pic_cb_qp_offset,
|
|
ps_pps->i1_pic_cr_qp_offset,
|
|
ps_slice_hdr->i1_tc_offset_div2,
|
|
filter_p, filter_q);
|
|
if(chroma_yuv420sp_vu)
|
|
{
|
|
ps_codec->s_func_selector.ihevc_deblk_chroma_horz_fptr(pu1_src,
|
|
src_strd,
|
|
qp_q,
|
|
qp_p,
|
|
ps_pps->i1_pic_cr_qp_offset,
|
|
ps_pps->i1_pic_cb_qp_offset,
|
|
i1_tc_offset_div2,
|
|
filter_q,
|
|
filter_p);
|
|
}
|
|
else
|
|
{
|
|
ps_codec->s_func_selector.ihevc_deblk_chroma_horz_fptr(pu1_src,
|
|
src_strd,
|
|
qp_p,
|
|
qp_q,
|
|
ps_pps->i1_pic_cb_qp_offset,
|
|
ps_pps->i1_pic_cr_qp_offset,
|
|
i1_tc_offset_div2,
|
|
filter_p,
|
|
filter_q);
|
|
}
|
|
}
|
|
|
|
pu1_src += 8;
|
|
u4_bs = u4_bs >> 4;
|
|
col++;
|
|
}
|
|
|
|
pu4_horz_bs += (64 == ctb_size) ? 2 : 1;
|
|
pu1_src -= ctb_size;
|
|
pu1_src += 8 * src_strd;
|
|
|
|
}
|
|
}
|
|
}
|