You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
532 lines
14 KiB
532 lines
14 KiB
@/*****************************************************************************
|
|
@*
|
|
@* Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore
|
|
@*
|
|
@* Licensed under the Apache License, Version 2.0 (the "License");
|
|
@* you may not use this file except in compliance with the License.
|
|
@* You may obtain a copy of the License at:
|
|
@*
|
|
@* http://www.apache.org/licenses/LICENSE-2.0
|
|
@*
|
|
@* Unless required by applicable law or agreed to in writing, software
|
|
@* distributed under the License is distributed on an "AS IS" BASIS,
|
|
@* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
@* See the License for the specific language governing permissions and
|
|
@* limitations under the License.
|
|
@*
|
|
@*****************************************************************************/
|
|
@/**
|
|
@ *******************************************************************************
|
|
@ * @file
|
|
@ * ihevc_padding_neon.s
|
|
@ *
|
|
@ * @brief
|
|
@ * contains function definitions padding
|
|
@ *
|
|
@ * @author
|
|
@ * naveen sr
|
|
@ *
|
|
@ * @par list of functions:
|
|
@ * - ihevc_pad_left_luma()
|
|
@ * - ihevc_pad_left_chroma()
|
|
@ *
|
|
@ * @remarks
|
|
@ * none
|
|
@ *
|
|
@ *******************************************************************************
|
|
@*/
|
|
|
|
@/**
|
|
@*******************************************************************************
|
|
@*
|
|
@* @brief
|
|
@* padding (luma block) at the left of a 2d array
|
|
@*
|
|
@* @par description:
|
|
@* the left column of a 2d array is replicated for pad_size times at the left
|
|
@*
|
|
@*
|
|
@* @param[in] pu1_src
|
|
@* uword8 pointer to the source
|
|
@*
|
|
@* @param[in] src_strd
|
|
@* integer source stride
|
|
@*
|
|
@* @param[in] ht
|
|
@* integer height of the array
|
|
@*
|
|
@* @param[in] wd
|
|
@* integer width of the array
|
|
@*
|
|
@* @param[in] pad_size
|
|
@* integer -padding size of the array
|
|
@*
|
|
@* @param[in] ht
|
|
@* integer height of the array
|
|
@*
|
|
@* @param[in] wd
|
|
@* integer width of the array
|
|
@*
|
|
@* @returns
|
|
@*
|
|
@* @remarks
|
|
@* none
|
|
@*
|
|
@*******************************************************************************
|
|
@*/
|
|
@.if pad_left_luma == c
|
|
@void ihevc_pad_left_luma(uword8 *pu1_src,
|
|
@ word32 src_strd,
|
|
@ word32 ht,
|
|
@ word32 pad_size)
|
|
@**************variables vs registers*************************
|
|
@ r0 => *pu1_src
|
|
@ r1 => src_strd
|
|
@ r2 => ht
|
|
@ r3 => pad_size
|
|
|
|
.text
|
|
.align 4
|
|
|
|
|
|
|
|
|
|
.globl ihevc_pad_left_luma_a9q
|
|
|
|
.type ihevc_pad_left_luma_a9q, %function
|
|
|
|
ihevc_pad_left_luma_a9q:
|
|
|
|
stmfd sp!, {r4-r11,lr} @stack stores the values of the arguments
|
|
|
|
loop_start_luma_left:
|
|
@ pad size is assumed to be pad_left = 80
|
|
sub r4,r0,r3
|
|
|
|
ldrb r8,[r0]
|
|
add r0,r1
|
|
ldrb r9,[r0]
|
|
add r0,r1
|
|
ldrb r10,[r0]
|
|
add r0,r1
|
|
ldrb r11,[r0]
|
|
add r0,r1
|
|
|
|
vdup.u8 q0,r8
|
|
vdup.u8 q1,r9
|
|
vdup.u8 q2,r10
|
|
vdup.u8 q3,r11
|
|
|
|
add r5,r4,r1
|
|
|
|
vst1.8 {d0,d1},[r4]! @128/8 = 16 bytes store
|
|
vst1.8 {d0,d1},[r4]! @ 16 bytes store
|
|
vst1.8 {d0,d1},[r4]! @ 16 bytes store
|
|
vst1.8 {d0,d1},[r4]! @ 16 bytes store
|
|
vst1.8 {d0,d1},[r4] @ 16 bytes store
|
|
|
|
add r6,r5,r1
|
|
|
|
vst1.8 {d2,d3},[r5]! @128/8 = 16 bytes store
|
|
vst1.8 {d2,d3},[r5]! @128/8 = 16 bytes store
|
|
vst1.8 {d2,d3},[r5]! @128/8 = 16 bytes store
|
|
vst1.8 {d2,d3},[r5]! @128/8 = 16 bytes store
|
|
vst1.8 {d2,d3},[r5] @128/8 = 16 bytes store
|
|
|
|
add r7,r6,r1
|
|
|
|
vst1.8 {d4,d5},[r6]! @128/8 = 16 bytes store
|
|
vst1.8 {d4,d5},[r6]! @128/8 = 16 bytes store
|
|
vst1.8 {d4,d5},[r6]! @128/8 = 16 bytes store
|
|
vst1.8 {d4,d5},[r6]! @128/8 = 16 bytes store
|
|
vst1.8 {d4,d5},[r6] @128/8 = 16 bytes store
|
|
|
|
subs r2,#4
|
|
|
|
vst1.8 {d6,d7},[r7]! @128/8 = 16 bytes store
|
|
vst1.8 {d6,d7},[r7]! @128/8 = 16 bytes store
|
|
vst1.8 {d6,d7},[r7]! @128/8 = 16 bytes store
|
|
vst1.8 {d6,d7},[r7]! @128/8 = 16 bytes store
|
|
vst1.8 {d6,d7},[r7]! @128/8 = 16 bytes store
|
|
|
|
@ total of 4rows*(16*5) = 4 * 80 = 4 * pad_left store
|
|
|
|
bne loop_start_luma_left
|
|
|
|
ldmfd sp!,{r4-r11,pc} @reload the registers from sp
|
|
|
|
|
|
|
|
|
|
|
|
@/**
|
|
@*******************************************************************************
|
|
@*
|
|
@* @brief
|
|
@* padding (chroma block) at the left of a 2d array
|
|
@*
|
|
@* @par description:
|
|
@* the left column of a 2d array is replicated for pad_size times at the left
|
|
@*
|
|
@*
|
|
@* @param[in] pu1_src
|
|
@* uword8 pointer to the source
|
|
@*
|
|
@* @param[in] src_strd
|
|
@* integer source stride
|
|
@*
|
|
@* @param[in] ht
|
|
@* integer height of the array
|
|
@*
|
|
@* @param[in] wd
|
|
@* integer width of the array (each colour component)
|
|
@*
|
|
@* @param[in] pad_size
|
|
@* integer -padding size of the array
|
|
@*
|
|
@* @param[in] ht
|
|
@* integer height of the array
|
|
@*
|
|
@* @param[in] wd
|
|
@* integer width of the array
|
|
@*
|
|
@* @returns
|
|
@*
|
|
@* @remarks
|
|
@* none
|
|
@*
|
|
@*******************************************************************************
|
|
@*/
|
|
@.if pad_left_chroma == c
|
|
@void ihevc_pad_left_chroma(uword8 *pu1_src,
|
|
@ word32 src_strd,
|
|
@ word32 ht,
|
|
@ word32 pad_size)
|
|
@{
|
|
@ r0 => *pu1_src
|
|
@ r1 => src_strd
|
|
@ r2 => ht
|
|
@ r3 => pad_size
|
|
|
|
|
|
|
|
.globl ihevc_pad_left_chroma_a9q
|
|
|
|
.type ihevc_pad_left_chroma_a9q, %function
|
|
|
|
ihevc_pad_left_chroma_a9q:
|
|
|
|
stmfd sp!, {r4-r11, lr} @stack stores the values of the arguments
|
|
|
|
loop_start_chroma_left:
|
|
@ pad size is assumed to be pad_left = 80
|
|
sub r4,r0,r3
|
|
|
|
ldrh r8,[r0]
|
|
add r0,r1
|
|
ldrh r9,[r0]
|
|
add r0,r1
|
|
ldrh r10,[r0]
|
|
add r0,r1
|
|
ldrh r11,[r0]
|
|
add r0,r1
|
|
|
|
vdup.u16 q0,r8
|
|
vdup.u16 q1,r9
|
|
vdup.u16 q2,r10
|
|
vdup.u16 q3,r11
|
|
|
|
add r5,r4,r1
|
|
|
|
vst1.8 {d0,d1},[r4]! @128/8 = 16 bytes store
|
|
vst1.8 {d0,d1},[r4]! @ 16 bytes store
|
|
vst1.8 {d0,d1},[r4]! @ 16 bytes store
|
|
vst1.8 {d0,d1},[r4]! @ 16 bytes store
|
|
vst1.8 {d0,d1},[r4] @ 16 bytes store
|
|
|
|
add r6,r5,r1
|
|
|
|
vst1.8 {d2,d3},[r5]! @128/8 = 16 bytes store
|
|
vst1.8 {d2,d3},[r5]! @128/8 = 16 bytes store
|
|
vst1.8 {d2,d3},[r5]! @128/8 = 16 bytes store
|
|
vst1.8 {d2,d3},[r5]! @128/8 = 16 bytes store
|
|
vst1.8 {d2,d3},[r5] @128/8 = 16 bytes store
|
|
|
|
add r7,r6,r1
|
|
|
|
vst1.8 {d4,d5},[r6]! @128/8 = 16 bytes store
|
|
vst1.8 {d4,d5},[r6]! @128/8 = 16 bytes store
|
|
vst1.8 {d4,d5},[r6]! @128/8 = 16 bytes store
|
|
vst1.8 {d4,d5},[r6]! @128/8 = 16 bytes store
|
|
vst1.8 {d4,d5},[r6] @128/8 = 16 bytes store
|
|
|
|
subs r2,#4
|
|
|
|
vst1.8 {d6,d7},[r7]! @128/8 = 16 bytes store
|
|
vst1.8 {d6,d7},[r7]! @128/8 = 16 bytes store
|
|
vst1.8 {d6,d7},[r7]! @128/8 = 16 bytes store
|
|
vst1.8 {d6,d7},[r7]! @128/8 = 16 bytes store
|
|
vst1.8 {d6,d7},[r7]! @128/8 = 16 bytes store
|
|
|
|
@ total of 4rows*(16*5) = 4 * 80 = 4 * pad_left store
|
|
|
|
bne loop_start_chroma_left
|
|
|
|
ldmfd sp!,{r4-r11,pc} @reload the registers from sp
|
|
|
|
|
|
|
|
|
|
|
|
@/**
|
|
@*******************************************************************************
|
|
@*
|
|
@* @brief
|
|
@* padding (luma block) at the right of a 2d array
|
|
@*
|
|
@* @par description:
|
|
@* the right column of a 2d array is replicated for pad_size times at the right
|
|
@*
|
|
@*
|
|
@* @param[in] pu1_src
|
|
@* uword8 pointer to the source
|
|
@*
|
|
@* @param[in] src_strd
|
|
@* integer source stride
|
|
@*
|
|
@* @param[in] ht
|
|
@* integer height of the array
|
|
@*
|
|
@* @param[in] wd
|
|
@* integer width of the array
|
|
@*
|
|
@* @param[in] pad_size
|
|
@* integer -padding size of the array
|
|
@*
|
|
@* @param[in] ht
|
|
@* integer height of the array
|
|
@*
|
|
@* @param[in] wd
|
|
@* integer width of the array
|
|
@*
|
|
@* @returns
|
|
@*
|
|
@* @remarks
|
|
@* none
|
|
@*
|
|
@*******************************************************************************
|
|
@*/
|
|
@.if pad_right_luma == c
|
|
@void ihevc_pad_right_luma(uword8 *pu1_src,
|
|
@ word32 src_strd,
|
|
@ word32 ht,
|
|
@ word32 pad_size)
|
|
@{
|
|
@ word32 row@
|
|
@
|
|
@ for(row = 0@ row < ht@ row++)
|
|
@ {
|
|
@ memset(pu1_src, *(pu1_src -1), pad_size)@
|
|
@
|
|
@ pu1_src += src_strd@
|
|
@ }
|
|
@}
|
|
@
|
|
@ r0 => *pu1_src
|
|
@ r1 => src_strd
|
|
@ r2 => ht
|
|
@ r3 => pad_size
|
|
|
|
|
|
|
|
.globl ihevc_pad_right_luma_a9q
|
|
|
|
.type ihevc_pad_right_luma_a9q, %function
|
|
|
|
ihevc_pad_right_luma_a9q:
|
|
|
|
stmfd sp!, {r4-r11, lr} @stack stores the values of the arguments
|
|
|
|
loop_start_luma_right:
|
|
@ pad size is assumed to be pad_left = 80
|
|
mov r4,r0
|
|
|
|
ldrb r8,[r0, #-1]
|
|
add r0,r1
|
|
ldrb r9,[r0, #-1]
|
|
add r0,r1
|
|
ldrb r10,[r0, #-1]
|
|
add r0,r1
|
|
ldrb r11,[r0, #-1]
|
|
add r0,r1
|
|
|
|
add r5,r4,r1
|
|
add r6,r5,r1
|
|
add r7,r6,r1
|
|
|
|
vdup.u8 q0,r8
|
|
vdup.u8 q1,r9
|
|
vdup.u8 q2,r10
|
|
vdup.u8 q3,r11
|
|
|
|
vst1.8 {d0,d1},[r4]! @128/8 = 16 bytes store
|
|
vst1.8 {d0,d1},[r4]! @ 16 bytes store
|
|
vst1.8 {d0,d1},[r4]! @ 16 bytes store
|
|
vst1.8 {d0,d1},[r4]! @ 16 bytes store
|
|
vst1.8 {d0,d1},[r4] @ 16 bytes store
|
|
|
|
|
|
vst1.8 {d2,d3},[r5]! @128/8 = 16 bytes store
|
|
vst1.8 {d2,d3},[r5]! @128/8 = 16 bytes store
|
|
vst1.8 {d2,d3},[r5]! @128/8 = 16 bytes store
|
|
vst1.8 {d2,d3},[r5]! @128/8 = 16 bytes store
|
|
vst1.8 {d2,d3},[r5] @128/8 = 16 bytes store
|
|
|
|
subs r2,#4
|
|
|
|
vst1.8 {d4,d5},[r6]! @128/8 = 16 bytes store
|
|
vst1.8 {d4,d5},[r6]! @128/8 = 16 bytes store
|
|
vst1.8 {d4,d5},[r6]! @128/8 = 16 bytes store
|
|
vst1.8 {d4,d5},[r6]! @128/8 = 16 bytes store
|
|
vst1.8 {d4,d5},[r6] @128/8 = 16 bytes store
|
|
|
|
vst1.8 {d6,d7},[r7]! @128/8 = 16 bytes store
|
|
vst1.8 {d6,d7},[r7]! @128/8 = 16 bytes store
|
|
vst1.8 {d6,d7},[r7]! @128/8 = 16 bytes store
|
|
vst1.8 {d6,d7},[r7]! @128/8 = 16 bytes store
|
|
vst1.8 {d6,d7},[r7] @128/8 = 16 bytes store
|
|
|
|
|
|
@ total of 4rows*(16*5) = 4 * 80 = 4 * pad_left store
|
|
|
|
|
|
bne loop_start_luma_right
|
|
|
|
ldmfd sp!,{r4-r11,pc} @reload the registers from sp
|
|
|
|
|
|
|
|
|
|
|
|
@/**
|
|
@*******************************************************************************
|
|
@*
|
|
@* @brief
|
|
@@* padding (chroma block) at the right of a 2d array
|
|
@*
|
|
@* @par description:
|
|
@* the right column of a 2d array is replicated for pad_size times at the right
|
|
@*
|
|
@*
|
|
@* @param[in] pu1_src
|
|
@@* uword8 pointer to the source
|
|
@*
|
|
@* @param[in] src_strd
|
|
@* integer source stride
|
|
@*
|
|
@* @param[in] ht
|
|
@@* integer height of the array
|
|
@*
|
|
@* @param[in] wd
|
|
@* integer width of the array (each colour component)
|
|
@*
|
|
@* @param[in] pad_size
|
|
@* integer -padding size of the array
|
|
@*
|
|
@* @param[in] ht
|
|
@@* integer height of the array
|
|
@*
|
|
@* @param[in] wd
|
|
@* integer width of the array
|
|
@*
|
|
@* @returns
|
|
@*
|
|
@* @remarks
|
|
@* none
|
|
@*
|
|
@*******************************************************************************
|
|
@*/
|
|
@.if pad_right_chroma == c
|
|
@void ihevc_pad_right_chroma(uword8 *pu1_src,
|
|
@ word32 src_strd,
|
|
@ word32 ht,
|
|
@ word32 pad_size)
|
|
@ r0 => *pu1_src
|
|
@ r1 => src_strd
|
|
@ r2 => ht
|
|
@ r3 => pad_size
|
|
|
|
|
|
|
|
.globl ihevc_pad_right_chroma_a9q
|
|
|
|
.type ihevc_pad_right_chroma_a9q, %function
|
|
|
|
ihevc_pad_right_chroma_a9q:
|
|
|
|
stmfd sp!, {r4-r11, lr} @stack stores the values of the arguments
|
|
|
|
loop_start_chroma_right:
|
|
@ pad size is assumed to be pad_left = 80
|
|
mov r4,r0
|
|
|
|
ldrh r8,[r0, #-2]
|
|
add r0,r1
|
|
ldrh r9,[r0, #-2]
|
|
add r0,r1
|
|
ldrh r10,[r0, #-2]
|
|
add r0,r1
|
|
ldrh r11,[r0, #-2]
|
|
add r0,r1
|
|
|
|
vdup.u16 q0,r8
|
|
vdup.u16 q1,r9
|
|
vdup.u16 q2,r10
|
|
vdup.u16 q3,r11
|
|
|
|
add r5,r4,r1
|
|
|
|
vst1.8 {d0,d1},[r4]! @128/8 = 16 bytes store
|
|
vst1.8 {d0,d1},[r4]! @ 16 bytes store
|
|
vst1.8 {d0,d1},[r4]! @ 16 bytes store
|
|
vst1.8 {d0,d1},[r4]! @ 16 bytes store
|
|
vst1.8 {d0,d1},[r4] @ 16 bytes store
|
|
|
|
add r6,r5,r1
|
|
|
|
vst1.8 {d2,d3},[r5]! @128/8 = 16 bytes store
|
|
vst1.8 {d2,d3},[r5]! @128/8 = 16 bytes store
|
|
vst1.8 {d2,d3},[r5]! @128/8 = 16 bytes store
|
|
vst1.8 {d2,d3},[r5]! @128/8 = 16 bytes store
|
|
vst1.8 {d2,d3},[r5] @128/8 = 16 bytes store
|
|
|
|
add r7,r6,r1
|
|
|
|
vst1.8 {d4,d5},[r6]! @128/8 = 16 bytes store
|
|
vst1.8 {d4,d5},[r6]! @128/8 = 16 bytes store
|
|
vst1.8 {d4,d5},[r6]! @128/8 = 16 bytes store
|
|
vst1.8 {d4,d5},[r6]! @128/8 = 16 bytes store
|
|
vst1.8 {d4,d5},[r6] @128/8 = 16 bytes store
|
|
|
|
subs r2,#4
|
|
|
|
vst1.8 {d6,d7},[r7]! @128/8 = 16 bytes store
|
|
vst1.8 {d6,d7},[r7]! @128/8 = 16 bytes store
|
|
vst1.8 {d6,d7},[r7]! @128/8 = 16 bytes store
|
|
vst1.8 {d6,d7},[r7]! @128/8 = 16 bytes store
|
|
vst1.8 {d6,d7},[r7] @128/8 = 16 bytes store
|
|
|
|
@ total of 4rows*(16*5) = 4 * 80 = 4 * pad_left store
|
|
|
|
bne loop_start_chroma_right
|
|
|
|
ldmfd sp!,{r4-r11,pc} @reload the registers from sp
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|