You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
237 lines
6.8 KiB
237 lines
6.8 KiB
/******************************************************************************
|
|
*
|
|
* Copyright (C) 2015 The Android Open Source Project
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at:
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*
|
|
*****************************************************************************
|
|
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
|
|
*/
|
|
/**
|
|
*******************************************************************************
|
|
* @file
|
|
* ideint_cac_ssse3.c
|
|
*
|
|
* @brief
|
|
* This file include the definitions of the combing artifact check function
|
|
* of the de-interlacer and some variant of that.
|
|
*
|
|
* @author
|
|
* Ittiam
|
|
*
|
|
* @par List of Functions:
|
|
* cac_4x8()
|
|
* ideint_cac()
|
|
*
|
|
* @remarks
|
|
* In the de-interlacer workspace, cac is not a seperate assembly module as
|
|
* it comes along with the de_int_decision() function. But in C-Model, to
|
|
* keep the things cleaner, it was made to be a separate function during
|
|
* cac experiments long after the assembly was written by Mudit.
|
|
*
|
|
*******************************************************************************
|
|
*/
|
|
/*****************************************************************************/
|
|
/* File Includes */
|
|
/*****************************************************************************/
|
|
/* System include files */
|
|
#include <stdio.h>
|
|
#include <stdint.h>
|
|
#include <string.h>
|
|
#include <stdlib.h>
|
|
#include <immintrin.h>
|
|
|
|
/* User include files */
|
|
#include "icv_datatypes.h"
|
|
#include "icv_macros.h"
|
|
#include "icv.h"
|
|
#include "icv_variance.h"
|
|
#include "icv_sad.h"
|
|
#include "ideint.h"
|
|
#include "ideint_defs.h"
|
|
#include "ideint_structs.h"
|
|
#include "ideint_cac.h"
|
|
|
|
/**
|
|
*******************************************************************************
|
|
*
|
|
* @brief
|
|
* Combing artifact check function for 8x8 block
|
|
*
|
|
* @par Description
|
|
* Determines CAC for 8x8 block by calling 8x4 CAC function
|
|
*
|
|
* @param[in] pu1_top
|
|
* Top field
|
|
*
|
|
* @param[in] pu1_bot
|
|
* Bottom field
|
|
*
|
|
* @param[in] top_strd
|
|
* Top field Stride
|
|
*
|
|
* @param[in] bot_strd
|
|
* Bottom field stride
|
|
*
|
|
* @returns
|
|
* combing artifact flag (1 = detected, 0 = not detected)
|
|
*
|
|
* @remarks
|
|
*
|
|
*******************************************************************************
|
|
*/
|
|
WORD32 ideint_cac_8x8_ssse3(UWORD8 *pu1_top,
|
|
UWORD8 *pu1_bot,
|
|
WORD32 top_strd,
|
|
WORD32 bot_strd)
|
|
{
|
|
WORD32 ca; /* combing artifact result */
|
|
WORD32 i;
|
|
WORD32 adj[2] = {0};
|
|
WORD32 alt[2] = {0};
|
|
WORD32 sum_1, sum_2, sum_3, sum_4;
|
|
WORD32 sum_diff, diff_sum;
|
|
|
|
__m128i top[4];
|
|
__m128i bot[4];
|
|
__m128i sum_t[4];
|
|
__m128i sum_b[4];
|
|
__m128i zero;
|
|
|
|
|
|
zero = _mm_setzero_si128();
|
|
|
|
for(i = 0; i < 4; i++)
|
|
{
|
|
/* Load top */
|
|
top[i] = (__m128i)_mm_loadl_epi64((__m128i *) (pu1_top));
|
|
pu1_top += top_strd;
|
|
|
|
/* Load bottom */
|
|
bot[i] = (__m128i)_mm_loadl_epi64((__m128i *) (pu1_bot));
|
|
pu1_bot += bot_strd;
|
|
|
|
/* Unpack */
|
|
top[i] = _mm_unpacklo_epi8(top[i], zero);
|
|
bot[i] = _mm_unpacklo_epi8(bot[i], zero);
|
|
|
|
/* Compute row sums */
|
|
sum_t[i] = _mm_sad_epu8(top[i], zero);
|
|
sum_b[i] = _mm_sad_epu8(bot[i], zero);
|
|
}
|
|
|
|
/* Compute row based alt and adj */
|
|
for(i = 0; i < 4; i += 2)
|
|
{
|
|
sum_1 = _mm_cvtsi128_si32(sum_t[i + 0]);
|
|
sum_2 = _mm_cvtsi128_si32(sum_b[i + 0]);
|
|
sum_diff = ABS_DIF(sum_1, sum_2);
|
|
if(sum_diff >= RSUM_CSUM_THRESH)
|
|
adj[0] += sum_diff;
|
|
|
|
sum_3 = _mm_cvtsi128_si32(sum_t[i + 1]);
|
|
sum_4 = _mm_cvtsi128_si32(sum_b[i + 1]);
|
|
sum_diff = ABS_DIF(sum_3, sum_4);
|
|
if(sum_diff >= RSUM_CSUM_THRESH)
|
|
adj[0] += sum_diff;
|
|
|
|
alt[0] += ABS_DIF(sum_1, sum_3);
|
|
alt[0] += ABS_DIF(sum_2, sum_4);
|
|
|
|
sum_1 = _mm_cvtsi128_si32(_mm_srli_si128(sum_t[i + 0], 8));
|
|
sum_2 = _mm_cvtsi128_si32(_mm_srli_si128(sum_b[i + 0], 8));
|
|
sum_diff = ABS_DIF(sum_1, sum_2);
|
|
if(sum_diff >= RSUM_CSUM_THRESH)
|
|
adj[1] += sum_diff;
|
|
|
|
sum_3 = _mm_cvtsi128_si32(_mm_srli_si128(sum_t[i + 1], 8));
|
|
sum_4 = _mm_cvtsi128_si32(_mm_srli_si128(sum_b[i + 1], 8));
|
|
sum_diff = ABS_DIF(sum_3, sum_4);
|
|
if(sum_diff >= RSUM_CSUM_THRESH)
|
|
adj[1] += sum_diff;
|
|
|
|
alt[1] += ABS_DIF(sum_1, sum_3);
|
|
alt[1] += ABS_DIF(sum_2, sum_4);
|
|
}
|
|
|
|
/* Compute column based adj */
|
|
{
|
|
__m128i avg1, avg2;
|
|
__m128i top_avg, bot_avg;
|
|
__m128i min, max, diff, thresh;
|
|
__m128i mask;
|
|
avg1 = _mm_avg_epu8(top[0], top[1]);
|
|
avg2 = _mm_avg_epu8(top[2], top[3]);
|
|
top_avg = _mm_avg_epu8(avg1, avg2);
|
|
|
|
avg1 = _mm_avg_epu8(bot[0], bot[1]);
|
|
avg2 = _mm_avg_epu8(bot[2], bot[3]);
|
|
bot_avg = _mm_avg_epu8(avg1, avg2);
|
|
|
|
min = _mm_min_epu8(top_avg, bot_avg);
|
|
max = _mm_max_epu8(top_avg, bot_avg);
|
|
|
|
diff = _mm_sub_epi16(max, min);
|
|
thresh = _mm_set1_epi16((RSUM_CSUM_THRESH >> 2) - 1);
|
|
|
|
mask = _mm_cmpgt_epi16(diff, thresh);
|
|
diff = _mm_and_si128(diff, mask);
|
|
|
|
diff_sum = _mm_extract_epi16(diff, 0);
|
|
diff_sum += _mm_extract_epi16(diff, 1);
|
|
diff_sum += _mm_extract_epi16(diff, 2);
|
|
diff_sum += _mm_extract_epi16(diff, 3);
|
|
|
|
adj[0] += diff_sum << 2;
|
|
|
|
diff_sum = _mm_extract_epi16(diff, 4);
|
|
diff_sum += _mm_extract_epi16(diff, 5);
|
|
diff_sum += _mm_extract_epi16(diff, 6);
|
|
diff_sum += _mm_extract_epi16(diff, 7);
|
|
|
|
adj[1] += diff_sum << 2;
|
|
|
|
}
|
|
|
|
/* Compute column based alt */
|
|
{
|
|
__m128i avg1, avg2;
|
|
__m128i even_avg, odd_avg, diff;
|
|
avg1 = _mm_avg_epu8(top[0], bot[0]);
|
|
avg2 = _mm_avg_epu8(top[2], bot[2]);
|
|
even_avg = _mm_avg_epu8(avg1, avg2);
|
|
|
|
avg1 = _mm_avg_epu8(top[1], bot[1]);
|
|
avg2 = _mm_avg_epu8(top[3], bot[3]);
|
|
odd_avg = _mm_avg_epu8(avg1, avg2);
|
|
|
|
diff = _mm_sad_epu8(even_avg, odd_avg);
|
|
|
|
|
|
diff_sum = _mm_cvtsi128_si32(diff);
|
|
alt[0] += diff_sum << 2;
|
|
|
|
diff_sum = _mm_cvtsi128_si32(_mm_srli_si128(diff, 8));
|
|
alt[1] += diff_sum << 2;
|
|
|
|
}
|
|
alt[0] += (alt[0] >> SAD_BIAS_MULT_SHIFT) + (SAD_BIAS_ADDITIVE >> 1);
|
|
alt[1] += (alt[1] >> SAD_BIAS_MULT_SHIFT) + (SAD_BIAS_ADDITIVE >> 1);
|
|
|
|
ca = (alt[0] < adj[0]);
|
|
ca |= (alt[1] < adj[1]);
|
|
|
|
return ca;
|
|
}
|
|
|