You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
212 lines
5.7 KiB
212 lines
5.7 KiB
///*****************************************************************************
|
|
//*
|
|
//* Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore
|
|
//*
|
|
//* Licensed under the Apache License, Version 2.0 (the "License");
|
|
//* you may not use this file except in compliance with the License.
|
|
//* You may obtain a copy of the License at:
|
|
//*
|
|
//* http://www.apache.org/licenses/LICENSE-2.0
|
|
//*
|
|
//* Unless required by applicable law or agreed to in writing, software
|
|
//* distributed under the License is distributed on an "AS IS" BASIS,
|
|
//* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
//* See the License for the specific language governing permissions and
|
|
//* limitations under the License.
|
|
//*
|
|
//*****************************************************************************/
|
|
///**
|
|
///*******************************************************************************
|
|
//* @file
|
|
//* ihevc_deblk_luma_vert.s
|
|
//*
|
|
//* @brief
|
|
//* contains function definitions for inter prediction interpolation.
|
|
//* functions are coded using neon intrinsics and can be compiled using
|
|
|
|
//* rvct
|
|
//*
|
|
//* @author
|
|
//* anand s
|
|
//*
|
|
//* @par list of functions:
|
|
//*
|
|
//*
|
|
//* @remarks
|
|
//* none
|
|
//*
|
|
//*******************************************************************************/
|
|
//void ihevc_deblk_chroma_vert(UWORD8 *pu1_src,
|
|
// WORD32 src_strd,
|
|
// WORD32 quant_param_p,
|
|
// WORD32 quant_param_q,
|
|
// WORD32 qp_offset_u,
|
|
// WORD32 qp_offset_v,
|
|
// WORD32 tc_offset_div2,
|
|
// WORD32 filter_flag_p,
|
|
// WORD32 filter_flag_q)
|
|
|
|
.text
|
|
.align 4
|
|
.include "ihevc_neon_macros.s"
|
|
|
|
|
|
|
|
.extern gai4_ihevc_qp_table
|
|
.extern gai4_ihevc_tc_table
|
|
.globl ihevc_deblk_chroma_vert_av8
|
|
|
|
.type ihevc_deblk_chroma_vert_av8, %function
|
|
|
|
ihevc_deblk_chroma_vert_av8:
|
|
sxtw x4,w4
|
|
sxtw x5,w5
|
|
sxtw x6,w6
|
|
mov x15,x5
|
|
mov x5,x6
|
|
mov x6,x15
|
|
mov x12, x7
|
|
mov x7, x4
|
|
ldr w4, [sp]
|
|
|
|
push_v_regs
|
|
stp x19, x20,[sp,#-16]!
|
|
|
|
sub x8,x0,#4
|
|
add x2,x2,x3
|
|
ld1 {v5.8b},[x8],x1
|
|
add x2,x2,#1
|
|
ld1 {v17.8b},[x8],x1
|
|
ld1 {v16.8b},[x8],x1
|
|
ld1 {v4.8b},[x8]
|
|
|
|
trn1 v29.8b, v5.8b, v17.8b
|
|
trn2 v17.8b, v5.8b, v17.8b
|
|
mov v5.d[0], v29.d[0]
|
|
adds x3,x7,x2,asr #1
|
|
trn1 v29.8b, v16.8b, v4.8b
|
|
trn2 v4.8b, v16.8b, v4.8b
|
|
mov v16.d[0], v29.d[0]
|
|
adrp x7, :got:gai4_ihevc_qp_table
|
|
ldr x7, [x7, #:got_lo12:gai4_ihevc_qp_table]
|
|
|
|
|
|
bmi l1.2944
|
|
cmp x3,#0x39
|
|
bgt lbl78
|
|
ldr w3, [x7,x3,lsl #2]
|
|
sxtw x3,w3
|
|
lbl78:
|
|
sub x20,x3,#6
|
|
csel x3, x20, x3,gt
|
|
l1.2944:
|
|
trn1 v29.4h, v5.4h, v16.4h
|
|
trn2 v16.4h, v5.4h, v16.4h
|
|
mov v5.d[0], v29.d[0]
|
|
adds x2,x6,x2,asr #1
|
|
trn1 v29.4h, v17.4h, v4.4h
|
|
trn2 v4.4h, v17.4h, v4.4h
|
|
mov v17.d[0], v29.d[0]
|
|
bmi l1.2964
|
|
cmp x2,#0x39
|
|
bgt lbl86
|
|
ldr w2, [x7,x2,lsl #2]
|
|
sxtw x2,w2
|
|
lbl86:
|
|
sub x20,x2,#6
|
|
csel x2, x20, x2,gt
|
|
l1.2964:
|
|
trn1 v29.2s, v5.2s, v17.2s
|
|
trn2 v17.2s, v5.2s, v17.2s
|
|
mov v5.d[0], v29.d[0]
|
|
add x3,x3,x5,lsl #1
|
|
trn1 v29.2s, v16.2s, v4.2s
|
|
trn2 v4.2s, v16.2s, v4.2s
|
|
mov v16.d[0], v29.d[0]
|
|
add x6,x3,#2
|
|
uxtl v18.8h, v17.8b
|
|
cmp x6,#0x35
|
|
mov x20,#0x35
|
|
csel x3, x20, x3,gt
|
|
bgt l1.2996
|
|
adds x6,x3,#2
|
|
add x20,x3,#2
|
|
csel x3, x20, x3,pl
|
|
mov x20,#0
|
|
csel x3, x20, x3,mi
|
|
l1.2996:
|
|
usubl v0.8h, v17.8b, v16.8b
|
|
adrp x6, :got:gai4_ihevc_tc_table
|
|
ldr x6, [x6, #:got_lo12:gai4_ihevc_tc_table]
|
|
shl v0.8h, v0.8h,#2
|
|
add x2,x2,x5,lsl #1
|
|
add x5,x2,#2
|
|
uaddw v0.8h, v0.8h , v5.8b
|
|
cmp x5,#0x35
|
|
ldr w3, [x6,x3,lsl #2]
|
|
sxtw x3,w3
|
|
usubw v4.8h, v0.8h , v4.8b
|
|
mov x20,#0x35
|
|
csel x2, x20, x2,gt
|
|
bgt l1.3036
|
|
adds x5,x2,#2
|
|
add x20,x2,#2
|
|
csel x2, x20, x2,pl
|
|
mov x20,#0
|
|
csel x2, x20, x2,mi
|
|
l1.3036:
|
|
|
|
|
|
srshr v6.8h, v4.8h,#3
|
|
dup v2.4h,w3
|
|
ldr w2, [x6,x2,lsl #2]
|
|
sxtw x2,w2
|
|
sub x20,x3,#0
|
|
neg x3, x20
|
|
cmp x12,#0
|
|
dup v3.4h,w2
|
|
sub x20,x2,#0
|
|
neg x2, x20
|
|
dup v30.4h,w3
|
|
dup v31.4h,w2
|
|
|
|
mov v30.d[1],v31.d[0]
|
|
mov v2.d[1],v3.d[0]
|
|
|
|
smin v4.8h, v6.8h , v2.8h
|
|
smax v2.8h, v30.8h , v4.8h
|
|
|
|
uxtl v6.8h, v16.8b
|
|
|
|
add v0.8h, v6.8h , v2.8h
|
|
sub v2.8h, v18.8h , v2.8h
|
|
sqxtun v0.8b, v0.8h
|
|
sub x2,x0,#2
|
|
sqxtun v1.8b, v2.8h
|
|
trn1 v29.2s, v0.2s, v1.2s
|
|
trn2 v1.2s, v0.2s, v1.2s
|
|
mov v0.d[0], v29.d[0]
|
|
trn1 v29.8b, v0.8b, v1.8b
|
|
trn2 v1.8b, v0.8b, v1.8b
|
|
mov v0.d[0], v29.d[0]
|
|
beq l1.3204
|
|
|
|
st1 {v0.h}[0],[x2],x1
|
|
st1 {v1.h}[0],[x2],x1
|
|
st1 {v0.h}[1],[x2],x1
|
|
st1 {v1.h}[1],[x2]
|
|
l1.3204:
|
|
cmp x4,#0
|
|
beq l1.3228
|
|
st1 {v0.h}[2],[x0],x1
|
|
st1 {v1.h}[2],[x0],x1
|
|
st1 {v0.h}[3],[x0],x1
|
|
st1 {v1.h}[3],[x0]
|
|
l1.3228:
|
|
ldp x19, x20,[sp],#16
|
|
pop_v_regs
|
|
ret
|
|
|
|
|
|
|