You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
396 lines
16 KiB
396 lines
16 KiB
@/******************************************************************************
|
|
@ *
|
|
@ * Copyright (C) 2015 The Android Open Source Project
|
|
@ *
|
|
@ * Licensed under the Apache License, Version 2.0 (the "License");
|
|
@ * you may not use this file except in compliance with the License.
|
|
@ * You may obtain a copy of the License at:
|
|
@ *
|
|
@ * http://www.apache.org/licenses/LICENSE-2.0
|
|
@ *
|
|
@ * Unless required by applicable law or agreed to in writing, software
|
|
@ * distributed under the License is distributed on an "AS IS" BASIS,
|
|
@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
@ * See the License for the specific language governing permissions and
|
|
@ * limitations under the License.
|
|
@ *
|
|
@ *****************************************************************************
|
|
@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
|
|
@*/
|
|
|
|
@/*
|
|
@//----------------------------------------------------------------------------
|
|
@// File Name : impeg2_format_conv.s
|
|
@//
|
|
@// Description : This file has the Idct Implementations for the
|
|
@// MPEG4 SP decoder on neon platform.
|
|
@//
|
|
@// Reference Document :
|
|
@//
|
|
@// Revision History :
|
|
@// Date Author Detail Description
|
|
@// ------------ ---------------- ----------------------------------
|
|
@// Jul 07, 2008 Naveen Kumar T Created
|
|
@//
|
|
@//-------------------------------------------------------------------------
|
|
@*/
|
|
|
|
@/*
|
|
@// ----------------------------------------------------------------------------
|
|
@// Include Files
|
|
@// ----------------------------------------------------------------------------
|
|
@*/
|
|
.text
|
|
.p2align 2
|
|
.equ log2_16 , 4
|
|
.equ log2_2 , 1
|
|
@/*
|
|
@// ----------------------------------------------------------------------------
|
|
@// Struct/Union Types and Define
|
|
@// ----------------------------------------------------------------------------
|
|
@*/
|
|
|
|
@/*
|
|
@// ----------------------------------------------------------------------------
|
|
@// Static Global Data section variables
|
|
@// ----------------------------------------------------------------------------
|
|
@*/
|
|
@//--------------------------- NONE --------------------------------------------
|
|
|
|
@/*
|
|
@// ----------------------------------------------------------------------------
|
|
@// Static Prototype Functions
|
|
@// ----------------------------------------------------------------------------
|
|
@*/
|
|
@// -------------------------- NONE --------------------------------------------
|
|
|
|
@/*
|
|
@// ----------------------------------------------------------------------------
|
|
@// Exported functions
|
|
@// ----------------------------------------------------------------------------
|
|
@*/
|
|
|
|
@/*****************************************************************************
|
|
@* *
|
|
@* Function Name : impeg2_fmt_conv_yuv420p_to_yuv420sp_uv_a9q() *
|
|
@* *
|
|
@* Description : This function conversts the image from YUV420P color *
|
|
@* space to 420SP color space(UV interleaved). *
|
|
@* *
|
|
@* Arguments : R0 pu1_y *
|
|
@* R1 pu1_u *
|
|
@* R2 pu1_v *
|
|
@* R3 pu1_dest_y *
|
|
@* [R13 #40] pu1_dest_uv *
|
|
@* [R13 #44] u2_height *
|
|
@* [R13 #48] u2_width *
|
|
@* [R13 #52] u2_stridey *
|
|
@* [R13 #56] u2_strideu *
|
|
@* [R13 #60] u2_stridev *
|
|
@* [R13 #64] u2_dest_stride_y *
|
|
@* [R13 #68] u2_dest_stride_uv *
|
|
@* [R13 #72] convert_uv_only *
|
|
@* *
|
|
@* Values Returned : None *
|
|
@* *
|
|
@* Register Usage : R0 - R8, Q0 *
|
|
@* *
|
|
@* Stack Usage : 24 Bytes *
|
|
@* *
|
|
@* Interruptibility : Interruptible *
|
|
@* *
|
|
@* Known Limitations *
|
|
@* Assumptions: Image Width: Assumed to be multiple of 16 and *
|
|
@* greater than or equal to 16 *
|
|
@* Image Height: Assumed to be even. *
|
|
@* *
|
|
@* Revision History : *
|
|
@* DD MM YYYY Author(s) Changes (Describe the changes made) *
|
|
@* 07 06 2010 Varshita Draft *
|
|
@* 07 06 2010 Naveen Kr T Completed *
|
|
@* *
|
|
@*****************************************************************************/
|
|
.global impeg2_fmt_conv_yuv420p_to_yuv420sp_uv_a9q
|
|
impeg2_fmt_conv_yuv420p_to_yuv420sp_uv_a9q:
|
|
|
|
@// push the registers on the stack
|
|
stmfd sp!, {r4-r8, lr}
|
|
|
|
ldr r4, [sp, #56] @// Load convert_uv_only
|
|
|
|
cmp r4, #1
|
|
beq yuv420sp_uv_chroma
|
|
@/* Do the preprocessing before the main loops start */
|
|
@// Load the parameters from stack
|
|
ldr r4, [sp, #28] @// Load u2_height from stack
|
|
|
|
ldr r5, [sp, #32] @// Load u2_width from stack
|
|
|
|
ldr r7, [sp, #36] @// Load u2_stridey from stack
|
|
|
|
ldr r8, [sp, #48] @// Load u2_dest_stride_y from stack
|
|
|
|
sub r7, r7, r5 @// Source increment
|
|
|
|
sub r8, r8, r5 @// Destination increment
|
|
|
|
|
|
yuv420sp_uv_row_loop_y:
|
|
mov r6, r5
|
|
|
|
yuv420sp_uv_col_loop_y:
|
|
pld [r0, #128]
|
|
vld1.8 {q0}, [r0]!
|
|
vst1.8 {q0}, [r3]!
|
|
sub r6, r6, #16
|
|
cmp r6, #15
|
|
bgt yuv420sp_uv_col_loop_y
|
|
|
|
cmp r6, #0
|
|
beq yuv420sp_uv_row_loop_end_y
|
|
@//If non-multiple of 16, then go back by few bytes to ensure 16 bytes can be read
|
|
@//Ex if width is 162, above loop will process 160 pixels. And
|
|
@//Both source and destination will point to 146th pixel and then 16 bytes will be read
|
|
@// and written using VLD1 and VST1
|
|
rsb r6, r6, #16
|
|
sub r0, r0, r6
|
|
sub r3, r3, r6
|
|
|
|
vld1.8 {q0}, [r0]!
|
|
vst1.8 {q0}, [r3]!
|
|
|
|
yuv420sp_uv_row_loop_end_y:
|
|
add r0, r0, r7
|
|
add r3, r3, r8
|
|
subs r4, r4, #1
|
|
bgt yuv420sp_uv_row_loop_y
|
|
|
|
yuv420sp_uv_chroma:
|
|
|
|
ldr r3, [sp, #24] @// Load pu1_dest_uv from stack
|
|
|
|
ldr r4, [sp, #28] @// Load u2_height from stack
|
|
add r4, r4, 1
|
|
|
|
ldr r5, [sp, #32] @// Load u2_width from stack
|
|
add r5, r5, 1
|
|
bic r5, r5, #1
|
|
|
|
ldr r7, [sp, #40] @// Load u2_strideu from stack
|
|
|
|
ldr r8, [sp, #52] @// Load u2_dest_stride_uv from stack
|
|
|
|
sub r7, r7, r5, lsr #1 @// Source increment
|
|
|
|
sub r8, r8, r5 @// Destination increment
|
|
|
|
mov r5, r5, lsr #1
|
|
mov r4, r4, lsr #1
|
|
ldr r3, [sp, #24] @// Load pu1_dest_uv from stack
|
|
yuv420sp_uv_row_loop_uv:
|
|
mov r6, r5
|
|
|
|
|
|
yuv420sp_uv_col_loop_uv:
|
|
pld [r1, #128]
|
|
pld [r2, #128]
|
|
vld1.8 d0, [r1]!
|
|
vld1.8 d1, [r2]!
|
|
vst2.8 {d0, d1}, [r3]!
|
|
sub r6, r6, #8
|
|
cmp r6, #7
|
|
bgt yuv420sp_uv_col_loop_uv
|
|
|
|
cmp r6, #0
|
|
beq yuv420sp_uv_row_loop_end_uv
|
|
@//If non-multiple of 16, then go back by few bytes to ensure 16 bytes can be read
|
|
@//Ex if width is 162, above loop will process 160 pixels. And
|
|
@//Both source and destination will point to 146th pixel and then 16 bytes will be read
|
|
@// and written using VLD1 and VST1
|
|
rsb r6, r6, #8
|
|
sub r1, r1, r6
|
|
sub r2, r2, r6
|
|
sub r3, r3, r6, lsl #1
|
|
|
|
vld1.8 d0, [r1]!
|
|
vld1.8 d1, [r2]!
|
|
vst2.8 {d0, d1}, [r3]!
|
|
|
|
yuv420sp_uv_row_loop_end_uv:
|
|
add r1, r1, r7
|
|
add r2, r2, r7
|
|
add r3, r3, r8
|
|
subs r4, r4, #1
|
|
bgt yuv420sp_uv_row_loop_uv
|
|
@//POP THE REGISTERS
|
|
ldmfd sp!, {r4-r8, pc}
|
|
|
|
|
|
|
|
|
|
|
|
@/*****************************************************************************
|
|
@* *
|
|
@* Function Name : impeg2_fmt_conv_yuv420p_to_yuv420sp_vu_a9q() *
|
|
@* *
|
|
@* Description : This function conversts the image from YUV420P color *
|
|
@* space to 420SP color space(VU interleaved). *
|
|
@* This function is similar to above function *
|
|
@* IMP4D_CXA8_YUV420toYUV420SP_VU with a difference in *
|
|
@* VLD1.8 for chroma - order of registers is different *
|
|
@* *
|
|
@* Arguments : R0 pu1_y *
|
|
@* R1 pu1_u *
|
|
@* R2 pu1_v *
|
|
@* R3 pu1_dest_y *
|
|
@* [R13 #40] pu1_dest_uv *
|
|
@* [R13 #44] u2_height *
|
|
@* [R13 #48] u2_width *
|
|
@* [R13 #52] u2_stridey *
|
|
@* [R13 #56] u2_strideu *
|
|
@* [R13 #60] u2_stridev *
|
|
@* [R13 #64] u2_dest_stride_y *
|
|
@* [R13 #68] u2_dest_stride_uv *
|
|
@* [R13 #72] convert_uv_only *
|
|
@* *
|
|
@* Values Returned : None *
|
|
@* *
|
|
@* Register Usage : R0 - R8, Q0 *
|
|
@* *
|
|
@* Stack Usage : 24 Bytes *
|
|
@* *
|
|
@* Interruptibility : Interruptible *
|
|
@* *
|
|
@* Known Limitations *
|
|
@* Assumptions: Image Width: Assumed to be multiple of 16 and *
|
|
@* greater than or equal to 16 *
|
|
@* Image Height: Assumed to be even. *
|
|
@* *
|
|
@* Revision History : *
|
|
@* DD MM YYYY Author(s) Changes (Describe the changes made) *
|
|
@* 07 06 2010 Varshita Draft *
|
|
@* 07 06 2010 Naveen Kr T Completed *
|
|
@* *
|
|
@*****************************************************************************/
|
|
|
|
.global impeg2_fmt_conv_yuv420p_to_yuv420sp_vu_a9q
|
|
impeg2_fmt_conv_yuv420p_to_yuv420sp_vu_a9q:
|
|
|
|
@// push the registers on the stack
|
|
stmfd sp!, {r4-r8, lr}
|
|
|
|
ldr r4, [sp, #56] @// Load convert_uv_only
|
|
|
|
cmp r4, #1
|
|
beq yuv420sp_vu_chroma
|
|
|
|
@/* Do the preprocessing before the main loops start */
|
|
@// Load the parameters from stack
|
|
ldr r4, [sp, #28] @// Load u2_height from stack
|
|
|
|
ldr r5, [sp, #32] @// Load u2_width from stack
|
|
|
|
ldr r7, [sp, #36] @// Load u2_stridey from stack
|
|
|
|
ldr r8, [sp, #48] @// Load u2_dest_stride_y from stack
|
|
|
|
sub r7, r7, r5 @// Source increment
|
|
|
|
sub r8, r8, r5 @// Destination increment
|
|
|
|
|
|
yuv420sp_vu_row_loop_y:
|
|
mov r6, r5
|
|
|
|
yuv420sp_vu_col_loop_y:
|
|
pld [r0, #128]
|
|
vld1.8 {q0}, [r0]!
|
|
vst1.8 {q0}, [r3]!
|
|
sub r6, r6, #16
|
|
cmp r6, #15
|
|
bgt yuv420sp_vu_col_loop_y
|
|
|
|
cmp r6, #0
|
|
beq yuv420sp_vu_row_loop_end_y
|
|
@//If non-multiple of 16, then go back by few bytes to ensure 16 bytes can be read
|
|
@//Ex if width is 162, above loop will process 160 pixels. And
|
|
@//Both source and destination will point to 146th pixel and then 16 bytes will be read
|
|
@// and written using VLD1 and VST1
|
|
rsb r6, r6, #16
|
|
sub r0, r0, r6
|
|
sub r3, r3, r6
|
|
|
|
vld1.8 {q0}, [r0]!
|
|
vst1.8 {q0}, [r3]!
|
|
|
|
yuv420sp_vu_row_loop_end_y:
|
|
add r0, r0, r7
|
|
add r3, r3, r8
|
|
subs r4, r4, #1
|
|
bgt yuv420sp_vu_row_loop_y
|
|
|
|
yuv420sp_vu_chroma:
|
|
|
|
ldr r3, [sp, #24] @// Load pu1_dest_uv from stack
|
|
|
|
ldr r4, [sp, #28] @// Load u2_height from stack
|
|
add r4, r4, 1
|
|
|
|
ldr r5, [sp, #32] @// Load u2_width from stack
|
|
add r5, r5, 1
|
|
bic r5, r5, #1
|
|
|
|
ldr r7, [sp, #40] @// Load u2_strideu from stack
|
|
|
|
ldr r8, [sp, #52] @// Load u2_dest_stride_uv from stack
|
|
|
|
sub r7, r7, r5, lsr #1 @// Source increment
|
|
|
|
sub r8, r8, r5 @// Destination increment
|
|
|
|
mov r5, r5, lsr #1
|
|
mov r4, r4, lsr #1
|
|
ldr r3, [sp, #24] @// Load pu1_dest_uv from stack
|
|
yuv420sp_vu_row_loop_uv:
|
|
mov r6, r5
|
|
|
|
|
|
yuv420sp_vu_col_loop_uv:
|
|
pld [r1, #128]
|
|
pld [r2, #128]
|
|
vld1.8 d1, [r1]!
|
|
vld1.8 d0, [r2]!
|
|
vst2.8 {d0, d1}, [r3]!
|
|
sub r6, r6, #8
|
|
cmp r6, #7
|
|
bgt yuv420sp_vu_col_loop_uv
|
|
|
|
cmp r6, #0
|
|
beq yuv420sp_vu_row_loop_end_uv
|
|
@//If non-multiple of 16, then go back by few bytes to ensure 16 bytes can be read
|
|
@//Ex if width is 162, above loop will process 160 pixels. And
|
|
@//Both source and destination will point to 146th pixel and then 16 bytes will be read
|
|
@// and written using VLD1 and VST1
|
|
rsb r6, r6, #8
|
|
sub r1, r1, r6
|
|
sub r2, r2, r6
|
|
sub r3, r3, r6, lsl #1
|
|
|
|
vld1.8 d1, [r1]!
|
|
vld1.8 d0, [r2]!
|
|
vst2.8 {d0, d1}, [r3]!
|
|
|
|
yuv420sp_vu_row_loop_end_uv:
|
|
add r1, r1, r7
|
|
add r2, r2, r7
|
|
add r3, r3, r8
|
|
subs r4, r4, #1
|
|
bgt yuv420sp_vu_row_loop_uv
|
|
@//POP THE REGISTERS
|
|
ldmfd sp!, {r4-r8, pc}
|
|
|
|
|
|
|
|
|
|
|