You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
1069 lines
45 KiB
1069 lines
45 KiB
/*
|
|
* Copyright (C) 2016 The Android Open Source Project
|
|
* Copyright (C) 2016 Mopria Alliance, Inc.
|
|
* Copyright (C) 2013 Hewlett-Packard Development Company, L.P.
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*/
|
|
|
|
#include "wprint_scaler.h"
|
|
#include <assert.h>
|
|
#include <stdio.h>
|
|
|
|
#define ROUND_4_DOWN(x) ((x) & ~3)
|
|
#define ROUND_4_UP(x) (ROUND_4_DOWN((x) + 3))
|
|
#define PSCALER_FRACT_BITS_COUNT 24
|
|
|
|
typedef enum {
|
|
FRACTION_ROUND_UP,
|
|
FRACTION_TRUNCATE
|
|
} pscaler_fraction_t;
|
|
|
|
static uint32
|
|
_scaler_fraction_part(uint32 iNum, uint32 iDen, pscaler_fraction_t mode, bool_t *overflow);
|
|
|
|
static void _hw_scale_image_plane(scaler_config_t *pscaler_config, scaler_mode_t scaleMode);
|
|
|
|
static void _calculate_factors(scaler_config_t *pscaler_config, scaler_mode_t scaleMode);
|
|
|
|
void scaler_make_image_scaler_tables(uint16 image_input_width, uint16 image_input_buf_width,
|
|
uint16 image_output_width, uint16 image_output_buf_width, uint16 image_input_height,
|
|
uint16 image_output_height, scaler_config_t *pscaler_config) {
|
|
pscaler_config->iSrcWidth = image_input_width;
|
|
pscaler_config->iSrcHeight = image_input_height;
|
|
pscaler_config->iOutWidth = image_output_width;
|
|
pscaler_config->iOutHeight = image_output_height;
|
|
|
|
if ((image_input_width >= image_output_width) &&
|
|
(image_input_height >= image_output_height)) { // scale DOWN
|
|
pscaler_config->scaleMode = PSCALER_SCALE_DOWN;
|
|
} else if ((image_input_width <= image_output_width) &&
|
|
(image_input_height <= image_output_height)) { // scale UP
|
|
pscaler_config->scaleMode = PSCALER_SCALE_UP;
|
|
} else if (image_input_width > image_output_width) { // mixed scale Y-axis first
|
|
pscaler_config->scaleMode = PSCALER_SCALE_MIXED_YUP;
|
|
} else { // mixed scale X-axis first
|
|
pscaler_config->scaleMode = PSCALER_SCALE_MIXED_XUP;
|
|
}
|
|
|
|
// Setup scale factors
|
|
_calculate_factors(pscaler_config, pscaler_config->scaleMode);
|
|
|
|
// calculates initial buffer sizes for scaling whole image
|
|
// start rows == 0
|
|
// end_rows == image height
|
|
// buffer widths == image widths
|
|
pscaler_config->fSrcStartRow.decimal = 0;
|
|
pscaler_config->fSrcStartRow.fraction = 0;
|
|
pscaler_config->iSrcStartRow = 0;
|
|
pscaler_config->iSrcEndRow = pscaler_config->iSrcHeight;
|
|
pscaler_config->iSrcBufWidth = image_input_buf_width;
|
|
pscaler_config->iOutStartRow = 0;
|
|
pscaler_config->iOutEndRow = pscaler_config->iOutHeight;
|
|
pscaler_config->iOutBufWidth = image_output_buf_width;
|
|
pscaler_config->pSrcBuf = NULL;
|
|
pscaler_config->pOutBuf = NULL;
|
|
pscaler_config->pTmpBuf = NULL;
|
|
}
|
|
|
|
void scaler_calculate_scaling_rows(uint16 start_output_row_number, uint16 end_output_row_number,
|
|
void *tables_ptr, uint16 *start_input_row_number, uint16 *end_input_row_number,
|
|
uint16 *num_output_rows_generated, uint16 *num_rows_offset_to_start_output_row,
|
|
uint32 *mixed_axis_temp_buffer_size_needed) {
|
|
float64_t fSrcEndRow;
|
|
bool_t overflow;
|
|
scaler_config_t *pscaler_config;
|
|
|
|
pscaler_config = (scaler_config_t *) tables_ptr;
|
|
assert (start_output_row_number < pscaler_config->iOutHeight);
|
|
|
|
// copy the output start and end rows
|
|
// Don't ever attempt to output a single row from the scaler.
|
|
if (end_output_row_number == start_output_row_number) {
|
|
if (start_output_row_number == 0) {
|
|
pscaler_config->iOutStartRow = start_output_row_number;
|
|
pscaler_config->iOutEndRow = end_output_row_number + 1;
|
|
*num_rows_offset_to_start_output_row = 0;
|
|
} else {
|
|
pscaler_config->iOutStartRow = start_output_row_number - 1;
|
|
pscaler_config->iOutEndRow = end_output_row_number;
|
|
*num_rows_offset_to_start_output_row = 1;
|
|
}
|
|
} else {
|
|
pscaler_config->iOutStartRow = start_output_row_number;
|
|
pscaler_config->iOutEndRow = end_output_row_number;
|
|
*num_rows_offset_to_start_output_row = 0;
|
|
}
|
|
|
|
if (pscaler_config->iOutEndRow >= pscaler_config->iOutHeight) { // last stripe
|
|
pscaler_config->iOutEndRow = pscaler_config->iOutHeight - 1;
|
|
}
|
|
|
|
if (pscaler_config->scaleMode == PSCALER_SCALE_UP ||
|
|
pscaler_config->scaleMode == PSCALER_SCALE_MIXED_YUP) {
|
|
// scale factors are calculated as dim-1/dim-1
|
|
pscaler_config->iSrcHeight--;
|
|
pscaler_config->iOutHeight--;
|
|
}
|
|
|
|
pscaler_config->fSrcStartRow.decimal = (uint32) pscaler_config->iOutStartRow *
|
|
(uint32) pscaler_config->iSrcHeight / (uint32) pscaler_config->iOutHeight;
|
|
|
|
pscaler_config->fSrcStartRow.fraction = _scaler_fraction_part(
|
|
(uint32) pscaler_config->iOutStartRow * (uint32) pscaler_config->iSrcHeight,
|
|
(uint32) pscaler_config->iOutHeight, FRACTION_ROUND_UP, &overflow);
|
|
|
|
if (overflow) {
|
|
pscaler_config->fSrcStartRow.decimal++;
|
|
}
|
|
|
|
pscaler_config->iSrcStartRow = pscaler_config->fSrcStartRow.decimal;
|
|
|
|
if (pscaler_config->scaleMode == PSCALER_SCALE_UP ||
|
|
pscaler_config->scaleMode == PSCALER_SCALE_MIXED_YUP) {
|
|
fSrcEndRow.decimal = (uint32) pscaler_config->iOutEndRow *
|
|
(uint32) pscaler_config->iSrcHeight / (uint32) pscaler_config->iOutHeight;
|
|
fSrcEndRow.fraction = _scaler_fraction_part(
|
|
(uint32) pscaler_config->iOutEndRow * (uint32) pscaler_config->iSrcHeight,
|
|
(uint32) pscaler_config->iOutHeight, FRACTION_TRUNCATE, &overflow);
|
|
|
|
pscaler_config->iSrcEndRow = (uint16) fSrcEndRow.decimal;
|
|
|
|
if (0 != fSrcEndRow.fraction) {
|
|
// will cause an extra output row to be created...
|
|
pscaler_config->iSrcEndRow++;
|
|
pscaler_config->iOutEndRow++;
|
|
}
|
|
|
|
// restore dimensions
|
|
pscaler_config->iSrcHeight++;
|
|
pscaler_config->iOutHeight++;
|
|
} else {
|
|
fSrcEndRow.decimal = (uint32) (pscaler_config->iOutEndRow + 1) *
|
|
(uint32) pscaler_config->iSrcHeight /
|
|
(uint32) pscaler_config->iOutHeight;
|
|
|
|
fSrcEndRow.fraction = _scaler_fraction_part(
|
|
(uint32) (pscaler_config->iOutEndRow + 1) * (uint32) pscaler_config->iSrcHeight,
|
|
(uint32) pscaler_config->iOutHeight, FRACTION_TRUNCATE, &overflow);
|
|
|
|
pscaler_config->iSrcEndRow = (uint16) fSrcEndRow.decimal;
|
|
|
|
if (0 == fSrcEndRow.fraction) {
|
|
pscaler_config->iSrcEndRow--;
|
|
}
|
|
}
|
|
|
|
// check to be sure we're not going beyond the source image
|
|
if (pscaler_config->iSrcEndRow >= pscaler_config->iSrcHeight) { // last stripe
|
|
pscaler_config->iSrcEndRow = pscaler_config->iSrcHeight - 1;
|
|
}
|
|
|
|
*start_input_row_number = pscaler_config->iSrcStartRow;
|
|
*end_input_row_number = pscaler_config->iSrcEndRow;
|
|
*num_output_rows_generated = (pscaler_config->iOutEndRow - pscaler_config->iOutStartRow + 1);
|
|
|
|
// Calculate the 2nd pass buffer size if mixed scaling is done
|
|
if (pscaler_config->scaleMode == PSCALER_SCALE_MIXED_XUP) {
|
|
*mixed_axis_temp_buffer_size_needed =
|
|
ROUND_4_UP(pscaler_config->iOutWidth + 1) *
|
|
(*end_input_row_number - *start_input_row_number + 1);
|
|
} else if (pscaler_config->scaleMode == PSCALER_SCALE_MIXED_YUP) {
|
|
*mixed_axis_temp_buffer_size_needed =
|
|
ROUND_4_UP(pscaler_config->iSrcWidth) * (*num_output_rows_generated + 1);
|
|
} else {
|
|
*mixed_axis_temp_buffer_size_needed = 0;
|
|
}
|
|
|
|
(*num_output_rows_generated)++;
|
|
}
|
|
|
|
void scaler_scale_image_data(uint8 *input_plane, void *tables_ptr, uint8 *scaled_output_plane,
|
|
uint8 *temp_buffer_for_mixed_axis_scaling) {
|
|
uint16 iOrigWidth, iOrigHeight, iOrigOutBufWidth, iOrigSrcBufWidth;
|
|
uint16 iOrigOutStartRow, iOrigOutEndRow, iOrigSrcStartRow, iOrigSrcEndRow;
|
|
float64_t fOrigSrcStartRow;
|
|
uint8 *pOrigBuf;
|
|
scaler_config_t *pscaler_config;
|
|
|
|
pscaler_config = (scaler_config_t *) tables_ptr;
|
|
pscaler_config->pSrcBuf = input_plane;
|
|
pscaler_config->pOutBuf = scaled_output_plane;
|
|
|
|
if ((PSCALER_SCALE_MIXED_XUP == pscaler_config->scaleMode) ||
|
|
(PSCALER_SCALE_MIXED_YUP == pscaler_config->scaleMode)) {
|
|
pscaler_config->pTmpBuf = temp_buffer_for_mixed_axis_scaling;
|
|
|
|
// save the output buffer
|
|
pOrigBuf = pscaler_config->pOutBuf;
|
|
|
|
// use the temp buff as the output buff for pass 1
|
|
pscaler_config->pOutBuf = pscaler_config->pTmpBuf;
|
|
|
|
if (PSCALER_SCALE_MIXED_YUP == pscaler_config->scaleMode) {
|
|
// save the original output widths
|
|
iOrigWidth = pscaler_config->iOutWidth;
|
|
iOrigOutBufWidth = pscaler_config->iOutBufWidth;
|
|
|
|
// set output widths to input widths (1::1)
|
|
pscaler_config->iOutWidth = pscaler_config->iSrcWidth;
|
|
pscaler_config->iOutBufWidth = pscaler_config->iSrcBufWidth;
|
|
|
|
// calculate the new scaler factors
|
|
_calculate_factors(pscaler_config, PSCALER_SCALE_UP);
|
|
|
|
// Run the photo scaler hardware
|
|
_hw_scale_image_plane(pscaler_config, PSCALER_SCALE_UP);
|
|
|
|
// reset the output widths
|
|
pscaler_config->iOutWidth = iOrigWidth;
|
|
pscaler_config->iOutBufWidth = iOrigOutBufWidth;
|
|
} else {
|
|
// save the original output height and row info
|
|
iOrigHeight = pscaler_config->iOutHeight;
|
|
iOrigOutStartRow = pscaler_config->iOutStartRow;
|
|
iOrigOutEndRow = pscaler_config->iOutEndRow;
|
|
fOrigSrcStartRow.fraction = pscaler_config->fSrcStartRow.fraction;
|
|
|
|
// set output height and rows to input height and rows(1::1)
|
|
pscaler_config->iOutHeight = pscaler_config->iSrcHeight;
|
|
pscaler_config->iOutStartRow = pscaler_config->iSrcStartRow;
|
|
pscaler_config->iOutEndRow = pscaler_config->iSrcEndRow;
|
|
pscaler_config->fSrcStartRow.fraction = 0;
|
|
|
|
// calculate the new scaler factors
|
|
_calculate_factors(pscaler_config, PSCALER_SCALE_UP);
|
|
|
|
// Run the photo scaler hardware
|
|
_hw_scale_image_plane(pscaler_config, PSCALER_SCALE_UP);
|
|
|
|
// reset the output height and rows
|
|
pscaler_config->iOutHeight = iOrigHeight;
|
|
pscaler_config->iOutStartRow = iOrigOutStartRow;
|
|
pscaler_config->iOutEndRow = iOrigOutEndRow;
|
|
pscaler_config->fSrcStartRow.fraction = fOrigSrcStartRow.fraction;
|
|
}
|
|
// restore the original output buffer
|
|
pscaler_config->pOutBuf = pOrigBuf;
|
|
|
|
// save the original input buffer
|
|
pOrigBuf = pscaler_config->pSrcBuf;
|
|
|
|
// use the previous output (temp) buffer as the new input buffer
|
|
pscaler_config->pSrcBuf = pscaler_config->pTmpBuf;
|
|
|
|
if (PSCALER_SCALE_MIXED_YUP == pscaler_config->scaleMode) {
|
|
// save the original input height and rows
|
|
iOrigHeight = pscaler_config->iSrcHeight;
|
|
iOrigSrcStartRow = pscaler_config->iSrcStartRow;
|
|
iOrigSrcEndRow = pscaler_config->iSrcEndRow;
|
|
fOrigSrcStartRow.decimal = pscaler_config->fSrcStartRow.decimal;
|
|
fOrigSrcStartRow.fraction = pscaler_config->fSrcStartRow.fraction;
|
|
|
|
// set the height and rows to 1::1 for the second pass
|
|
pscaler_config->iSrcHeight = pscaler_config->iOutHeight;
|
|
pscaler_config->iSrcStartRow = pscaler_config->iOutStartRow;
|
|
pscaler_config->iSrcEndRow = pscaler_config->iOutEndRow;
|
|
pscaler_config->fSrcStartRow.decimal = pscaler_config->iOutStartRow;
|
|
pscaler_config->fSrcStartRow.fraction = 0;
|
|
|
|
// calculate new scale factors
|
|
_calculate_factors(pscaler_config, PSCALER_SCALE_DOWN);
|
|
|
|
// Run the photo scaler hardware
|
|
_hw_scale_image_plane(pscaler_config, PSCALER_SCALE_DOWN);
|
|
|
|
// restore original input height and rows
|
|
pscaler_config->iSrcHeight = iOrigHeight;
|
|
pscaler_config->iSrcStartRow = iOrigSrcStartRow;
|
|
pscaler_config->iSrcEndRow = iOrigSrcEndRow;
|
|
pscaler_config->fSrcStartRow.decimal = fOrigSrcStartRow.decimal;
|
|
pscaler_config->fSrcStartRow.fraction = fOrigSrcStartRow.fraction;
|
|
} else {
|
|
// save the original input widths
|
|
iOrigWidth = pscaler_config->iSrcWidth;
|
|
iOrigSrcBufWidth = pscaler_config->iSrcBufWidth;
|
|
|
|
// set the widths to 1::1 for the second pass
|
|
pscaler_config->iSrcWidth = pscaler_config->iOutWidth;
|
|
pscaler_config->iSrcBufWidth = pscaler_config->iOutBufWidth;
|
|
|
|
// calculate new scale factors
|
|
_calculate_factors(pscaler_config, PSCALER_SCALE_DOWN);
|
|
|
|
// Run the photo scaler hardware
|
|
_hw_scale_image_plane(pscaler_config, PSCALER_SCALE_DOWN);
|
|
|
|
// restore original input widths
|
|
pscaler_config->iSrcWidth = iOrigWidth;
|
|
pscaler_config->iSrcBufWidth = iOrigSrcBufWidth;
|
|
}
|
|
|
|
// restore the input buffer
|
|
pscaler_config->pTmpBuf = pscaler_config->pSrcBuf;
|
|
pscaler_config->pSrcBuf = pOrigBuf;
|
|
|
|
// release the temp buffer
|
|
pscaler_config->pTmpBuf = NULL;
|
|
} else {
|
|
// Run the photo scaler hardware
|
|
_hw_scale_image_plane(pscaler_config, pscaler_config->scaleMode);
|
|
}
|
|
}
|
|
|
|
static void _calculate_factors(scaler_config_t *pscaler_config, scaler_mode_t scaleMode) {
|
|
bool_t overflow;
|
|
if ((pscaler_config->scaleMode == PSCALER_SCALE_UP) ||
|
|
(pscaler_config->scaleMode == PSCALER_SCALE_MIXED_YUP)) {
|
|
// scale up factors are computed as (dim-1)/(dim-1)
|
|
pscaler_config->iSrcHeight--;
|
|
pscaler_config->iOutHeight--;
|
|
}
|
|
if ((pscaler_config->scaleMode == PSCALER_SCALE_UP) ||
|
|
(pscaler_config->scaleMode == PSCALER_SCALE_MIXED_XUP)) {
|
|
pscaler_config->iSrcWidth--;
|
|
pscaler_config->iOutWidth--;
|
|
}
|
|
|
|
pscaler_config->fXfactor.decimal = (uint32) pscaler_config->iOutWidth /
|
|
(uint32) pscaler_config->iSrcWidth;
|
|
pscaler_config->fXfactor.fraction = _scaler_fraction_part(
|
|
(uint32) pscaler_config->iOutWidth,
|
|
(uint32) pscaler_config->iSrcWidth,
|
|
FRACTION_TRUNCATE,
|
|
&overflow);
|
|
|
|
pscaler_config->fXfactorInv.decimal = (uint32) pscaler_config->iSrcWidth /
|
|
(uint32) pscaler_config->iOutWidth;
|
|
pscaler_config->fXfactorInv.fraction = _scaler_fraction_part(
|
|
(uint32) pscaler_config->iSrcWidth, (uint32) pscaler_config->iOutWidth,
|
|
FRACTION_ROUND_UP, &overflow);
|
|
|
|
if (overflow) {
|
|
pscaler_config->fXfactorInv.decimal++;
|
|
}
|
|
|
|
pscaler_config->fYfactor.decimal = (uint32) pscaler_config->iOutHeight /
|
|
(uint32) pscaler_config->iSrcHeight;
|
|
pscaler_config->fYfactor.fraction = _scaler_fraction_part(
|
|
(uint32) pscaler_config->iOutHeight, (uint32) pscaler_config->iSrcHeight,
|
|
FRACTION_TRUNCATE, &overflow);
|
|
|
|
pscaler_config->fYfactorInv.decimal = (uint32) pscaler_config->iSrcHeight /
|
|
(uint32) pscaler_config->iOutHeight;
|
|
pscaler_config->fYfactorInv.fraction = _scaler_fraction_part(
|
|
(uint32) pscaler_config->iSrcHeight, (uint32) pscaler_config->iOutHeight,
|
|
FRACTION_ROUND_UP, &overflow);
|
|
|
|
if (overflow) {
|
|
pscaler_config->fYfactorInv.decimal++;
|
|
}
|
|
|
|
if ((pscaler_config->scaleMode == PSCALER_SCALE_UP) ||
|
|
(pscaler_config->scaleMode == PSCALER_SCALE_MIXED_YUP)) {
|
|
// restore original dimensions
|
|
pscaler_config->iSrcHeight++;
|
|
pscaler_config->iOutHeight++;
|
|
}
|
|
if ((pscaler_config->scaleMode == PSCALER_SCALE_UP) ||
|
|
(pscaler_config->scaleMode == PSCALER_SCALE_MIXED_XUP)) {
|
|
pscaler_config->iSrcWidth++;
|
|
pscaler_config->iOutWidth++;
|
|
}
|
|
}
|
|
|
|
static uint32 _scaler_fraction_part(uint32 iNum, uint32 iDen, pscaler_fraction_t mode,
|
|
bool_t *overflow) {
|
|
uint32 iFract; // fractional part
|
|
uint32 iRem; // remainder part
|
|
int i; // loop counter
|
|
|
|
*overflow = 0;
|
|
iFract = 0;
|
|
iRem = iNum % iDen;
|
|
|
|
if (iRem == 0) {
|
|
return (0);
|
|
}
|
|
|
|
for (i = PSCALER_FRACT_BITS_COUNT - 1; i >= 0; i--) {
|
|
iRem <<= 1;
|
|
|
|
if (iRem == iDen) {
|
|
iFract |= (1 << i);
|
|
break;
|
|
} else if (iRem > iDen) {
|
|
iFract |= (1 << i);
|
|
iRem -= iDen;
|
|
}
|
|
}
|
|
|
|
if (mode == FRACTION_TRUNCATE) {
|
|
return (iFract << 8);
|
|
} else {
|
|
if (iRem == 0) {
|
|
return (iFract << 8);
|
|
} else {
|
|
if (iFract < 0x00ffffff) {
|
|
iFract++;
|
|
return (iFract << 8);
|
|
} else {
|
|
*overflow = 1;
|
|
return (0);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
#define _RESTRICT_ __restrict__
|
|
|
|
static inline void _scale_row_down_9in(uint8 *_RESTRICT_ in0, uint8 *_RESTRICT_ in1,
|
|
uint8 *_RESTRICT_ in2, uint8 *_RESTRICT_ in3, uint8 *_RESTRICT_ in4, uint8 *_RESTRICT_ in5,
|
|
uint8 *_RESTRICT_ in6, uint8 *_RESTRICT_ in7, uint8 *_RESTRICT_ in8, uint8 *_RESTRICT_ out,
|
|
uint64 position_x, uint64 x_factor_inv, uint32 top_weight, uint32 bot_weight,
|
|
uint32 weight_reciprocal, int out_width) {
|
|
int x;
|
|
uint32 in_col;
|
|
sint32 total_weight;
|
|
|
|
for (x = 0; x < out_width; x++) {
|
|
uint32 acc_r = 0;
|
|
uint32 acc_g = 0;
|
|
uint32 acc_b = 0;
|
|
uint32 curr_weight = 256 - ((position_x >> 24) & 0xff);
|
|
total_weight = x_factor_inv >> 24;
|
|
|
|
in_col = position_x >> 32;
|
|
|
|
while (total_weight > 0) {
|
|
acc_r += (uint32) in0[(in_col * 3) + 0] * curr_weight * top_weight;
|
|
acc_r += (uint32) in1[(in_col * 3) + 0] * curr_weight << 8;
|
|
acc_r += (uint32) in2[(in_col * 3) + 0] * curr_weight << 8;
|
|
acc_r += (uint32) in3[(in_col * 3) + 0] * curr_weight << 8;
|
|
acc_r += (uint32) in4[(in_col * 3) + 0] * curr_weight << 8;
|
|
acc_r += (uint32) in5[(in_col * 3) + 0] * curr_weight << 8;
|
|
acc_r += (uint32) in6[(in_col * 3) + 0] * curr_weight << 8;
|
|
acc_r += (uint32) in7[(in_col * 3) + 0] * curr_weight << 8;
|
|
acc_r += (uint32) in8[(in_col * 3) + 0] * curr_weight * bot_weight;
|
|
|
|
acc_g += (uint32) in0[(in_col * 3) + 1] * curr_weight * top_weight;
|
|
acc_g += (uint32) in1[(in_col * 3) + 1] * curr_weight << 8;
|
|
acc_g += (uint32) in2[(in_col * 3) + 1] * curr_weight << 8;
|
|
acc_g += (uint32) in3[(in_col * 3) + 1] * curr_weight << 8;
|
|
acc_g += (uint32) in4[(in_col * 3) + 1] * curr_weight << 8;
|
|
acc_g += (uint32) in5[(in_col * 3) + 1] * curr_weight << 8;
|
|
acc_g += (uint32) in6[(in_col * 3) + 1] * curr_weight << 8;
|
|
acc_g += (uint32) in7[(in_col * 3) + 1] * curr_weight << 8;
|
|
acc_g += (uint32) in8[(in_col * 3) + 1] * curr_weight * bot_weight;
|
|
|
|
acc_b += (uint32) in0[(in_col * 3) + 2] * curr_weight * top_weight;
|
|
acc_b += (uint32) in1[(in_col * 3) + 2] * curr_weight << 8;
|
|
acc_b += (uint32) in2[(in_col * 3) + 2] * curr_weight << 8;
|
|
acc_b += (uint32) in3[(in_col * 3) + 2] * curr_weight << 8;
|
|
acc_b += (uint32) in4[(in_col * 3) + 2] * curr_weight << 8;
|
|
acc_b += (uint32) in5[(in_col * 3) + 2] * curr_weight << 8;
|
|
acc_b += (uint32) in6[(in_col * 3) + 2] * curr_weight << 8;
|
|
acc_b += (uint32) in7[(in_col * 3) + 2] * curr_weight << 8;
|
|
acc_b += (uint32) in8[(in_col * 3) + 2] * curr_weight * bot_weight;
|
|
|
|
in_col++;
|
|
|
|
total_weight -= curr_weight;
|
|
curr_weight = total_weight > 256 ? 256 : total_weight;
|
|
}
|
|
|
|
position_x += x_factor_inv;
|
|
|
|
out[(x * 3) + 0] = ((uint64) acc_r * weight_reciprocal + ((uint64) 1 << 31)) >> 32;
|
|
out[(x * 3) + 0] = ((uint64) acc_g * weight_reciprocal + ((uint64) 1 << 31)) >> 32;
|
|
out[(x * 3) + 0] = ((uint64) acc_b * weight_reciprocal + ((uint64) 1 << 31)) >> 32;
|
|
}
|
|
}
|
|
|
|
static inline void _scale_row_down_8in(uint8 *_RESTRICT_ in0, uint8 *_RESTRICT_ in1,
|
|
uint8 *_RESTRICT_ in2, uint8 *_RESTRICT_ in3, uint8 *_RESTRICT_ in4, uint8 *_RESTRICT_ in5,
|
|
uint8 *_RESTRICT_ in6, uint8 *_RESTRICT_ in7, uint8 *_RESTRICT_ out, uint64 position_x,
|
|
uint64 x_factor_inv, uint32 top_weight,
|
|
uint32 bot_weight, uint32 weight_reciprocal,
|
|
int out_width) {
|
|
int x;
|
|
uint32 in_col;
|
|
sint32 total_weight;
|
|
|
|
for (x = 0; x < out_width; x++) {
|
|
uint32 acc_r = 0;
|
|
uint32 acc_g = 0;
|
|
uint32 acc_b = 0;
|
|
uint32 curr_weight = 256 - ((position_x >> 24) & 0xff);
|
|
total_weight = x_factor_inv >> 24;
|
|
|
|
in_col = position_x >> 32;
|
|
|
|
while (total_weight > 0) {
|
|
acc_r += (uint32) in0[(in_col * 3) + 0] * curr_weight * top_weight;
|
|
acc_r += (uint32) in1[(in_col * 3) + 0] * curr_weight << 8;
|
|
acc_r += (uint32) in2[(in_col * 3) + 0] * curr_weight << 8;
|
|
acc_r += (uint32) in3[(in_col * 3) + 0] * curr_weight << 8;
|
|
acc_r += (uint32) in4[(in_col * 3) + 0] * curr_weight << 8;
|
|
acc_r += (uint32) in5[(in_col * 3) + 0] * curr_weight << 8;
|
|
acc_r += (uint32) in6[(in_col * 3) + 0] * curr_weight << 8;
|
|
acc_r += (uint32) in7[(in_col * 3) + 0] * curr_weight * bot_weight;
|
|
|
|
acc_g += (uint32) in0[(in_col * 3) + 1] * curr_weight * top_weight;
|
|
acc_g += (uint32) in1[(in_col * 3) + 1] * curr_weight << 8;
|
|
acc_g += (uint32) in2[(in_col * 3) + 1] * curr_weight << 8;
|
|
acc_g += (uint32) in3[(in_col * 3) + 1] * curr_weight << 8;
|
|
acc_g += (uint32) in4[(in_col * 3) + 1] * curr_weight << 8;
|
|
acc_g += (uint32) in5[(in_col * 3) + 1] * curr_weight << 8;
|
|
acc_g += (uint32) in6[(in_col * 3) + 1] * curr_weight << 8;
|
|
acc_g += (uint32) in7[(in_col * 3) + 1] * curr_weight * bot_weight;
|
|
|
|
acc_b += (uint32) in0[(in_col * 3) + 2] * curr_weight * top_weight;
|
|
acc_b += (uint32) in1[(in_col * 3) + 2] * curr_weight << 8;
|
|
acc_b += (uint32) in2[(in_col * 3) + 2] * curr_weight << 8;
|
|
acc_b += (uint32) in3[(in_col * 3) + 2] * curr_weight << 8;
|
|
acc_b += (uint32) in4[(in_col * 3) + 2] * curr_weight << 8;
|
|
acc_b += (uint32) in5[(in_col * 3) + 2] * curr_weight << 8;
|
|
acc_b += (uint32) in6[(in_col * 3) + 2] * curr_weight << 8;
|
|
acc_b += (uint32) in7[(in_col * 3) + 2] * curr_weight * bot_weight;
|
|
|
|
in_col++;
|
|
|
|
total_weight -= curr_weight;
|
|
curr_weight = total_weight > 256 ? 256 : total_weight;
|
|
}
|
|
|
|
position_x += x_factor_inv;
|
|
|
|
out[(x * 3) + 0] = ((uint64) acc_r * weight_reciprocal + ((uint64) 1 << 31)) >> 32;
|
|
out[(x * 3) + 1] = ((uint64) acc_g * weight_reciprocal + ((uint64) 1 << 31)) >> 32;
|
|
out[(x * 3) + 2] = ((uint64) acc_b * weight_reciprocal + ((uint64) 1 << 31)) >> 32;
|
|
}
|
|
}
|
|
|
|
static inline void _scale_row_down_7in(uint8 *_RESTRICT_ in0, uint8 *_RESTRICT_ in1,
|
|
uint8 *_RESTRICT_ in2, uint8 *_RESTRICT_ in3, uint8 *_RESTRICT_ in4, uint8 *_RESTRICT_ in5,
|
|
uint8 *_RESTRICT_ in6, uint8 *_RESTRICT_ out, uint64 position_x, uint64 x_factor_inv,
|
|
uint32 top_weight, uint32 bot_weight, uint32 weight_reciprocal, int out_width) {
|
|
int x;
|
|
uint32 in_col;
|
|
sint32 total_weight;
|
|
|
|
for (x = 0; x < out_width; x++) {
|
|
uint32 acc_r = 0;
|
|
uint32 acc_g = 0;
|
|
uint32 acc_b = 0;
|
|
uint32 curr_weight = 256 - ((position_x >> 24) & 0xff);
|
|
total_weight = x_factor_inv >> 24;
|
|
|
|
in_col = position_x >> 32;
|
|
|
|
while (total_weight > 0) {
|
|
acc_r += (uint32) in0[(in_col * 3) + 0] * curr_weight * top_weight;
|
|
acc_r += (uint32) in1[(in_col * 3) + 0] * curr_weight << 8;
|
|
acc_r += (uint32) in2[(in_col * 3) + 0] * curr_weight << 8;
|
|
acc_r += (uint32) in3[(in_col * 3) + 0] * curr_weight << 8;
|
|
acc_r += (uint32) in4[(in_col * 3) + 0] * curr_weight << 8;
|
|
acc_r += (uint32) in5[(in_col * 3) + 0] * curr_weight << 8;
|
|
acc_r += (uint32) in6[(in_col * 3) + 0] * curr_weight * bot_weight;
|
|
|
|
acc_g += (uint32) in0[(in_col * 3) + 1] * curr_weight * top_weight;
|
|
acc_g += (uint32) in1[(in_col * 3) + 1] * curr_weight << 8;
|
|
acc_g += (uint32) in2[(in_col * 3) + 1] * curr_weight << 8;
|
|
acc_g += (uint32) in3[(in_col * 3) + 1] * curr_weight << 8;
|
|
acc_g += (uint32) in4[(in_col * 3) + 1] * curr_weight << 8;
|
|
acc_g += (uint32) in5[(in_col * 3) + 1] * curr_weight << 8;
|
|
acc_g += (uint32) in6[(in_col * 3) + 1] * curr_weight * bot_weight;
|
|
|
|
acc_b += (uint32) in0[(in_col * 3) + 2] * curr_weight * top_weight;
|
|
acc_b += (uint32) in1[(in_col * 3) + 2] * curr_weight << 8;
|
|
acc_b += (uint32) in2[(in_col * 3) + 2] * curr_weight << 8;
|
|
acc_b += (uint32) in3[(in_col * 3) + 2] * curr_weight << 8;
|
|
acc_b += (uint32) in4[(in_col * 3) + 2] * curr_weight << 8;
|
|
acc_b += (uint32) in5[(in_col * 3) + 2] * curr_weight << 8;
|
|
acc_b += (uint32) in6[(in_col * 3) + 2] * curr_weight * bot_weight;
|
|
|
|
in_col++;
|
|
|
|
total_weight -= curr_weight;
|
|
curr_weight = total_weight > 256 ? 256 : total_weight;
|
|
}
|
|
|
|
position_x += x_factor_inv;
|
|
|
|
out[(x * 3) + 0] = ((uint64) acc_r * weight_reciprocal + ((uint64) 1 << 31)) >> 32;
|
|
out[(x * 3) + 1] = ((uint64) acc_g * weight_reciprocal + ((uint64) 1 << 31)) >> 32;
|
|
out[(x * 3) + 2] = ((uint64) acc_b * weight_reciprocal + ((uint64) 1 << 31)) >> 32;
|
|
}
|
|
}
|
|
|
|
static inline void _scale_row_down_6in(uint8 *_RESTRICT_ in0, uint8 *_RESTRICT_ in1,
|
|
uint8 *_RESTRICT_ in2, uint8 *_RESTRICT_ in3, uint8 *_RESTRICT_ in4, uint8 *_RESTRICT_ in5,
|
|
uint8 *_RESTRICT_ out, uint64 position_x, uint64 x_factor_inv, uint32 top_weight,
|
|
uint32 bot_weight, uint32 weight_reciprocal, int out_width) {
|
|
int x;
|
|
uint32 in_col;
|
|
sint32 total_weight;
|
|
|
|
for (x = 0; x < out_width; x++) {
|
|
uint32 acc_r = 0;
|
|
uint32 acc_g = 0;
|
|
uint32 acc_b = 0;
|
|
uint32 curr_weight = 256 - ((position_x >> 24) & 0xff);
|
|
total_weight = x_factor_inv >> 24;
|
|
|
|
in_col = position_x >> 32;
|
|
|
|
while (total_weight > 0) {
|
|
acc_r += (uint32) in0[(in_col * 3) + 0] * curr_weight * top_weight;
|
|
acc_r += (uint32) in1[(in_col * 3) + 0] * curr_weight << 8;
|
|
acc_r += (uint32) in2[(in_col * 3) + 0] * curr_weight << 8;
|
|
acc_r += (uint32) in3[(in_col * 3) + 0] * curr_weight << 8;
|
|
acc_r += (uint32) in4[(in_col * 3) + 0] * curr_weight << 8;
|
|
acc_r += (uint32) in5[(in_col * 3) + 0] * curr_weight * bot_weight;
|
|
|
|
acc_g += (uint32) in0[(in_col * 3) + 1] * curr_weight * top_weight;
|
|
acc_g += (uint32) in1[(in_col * 3) + 1] * curr_weight << 8;
|
|
acc_g += (uint32) in2[(in_col * 3) + 1] * curr_weight << 8;
|
|
acc_g += (uint32) in3[(in_col * 3) + 1] * curr_weight << 8;
|
|
acc_g += (uint32) in4[(in_col * 3) + 1] * curr_weight << 8;
|
|
acc_g += (uint32) in5[(in_col * 3) + 1] * curr_weight * bot_weight;
|
|
|
|
acc_b += (uint32) in0[(in_col * 3) + 2] * curr_weight * top_weight;
|
|
acc_b += (uint32) in1[(in_col * 3) + 2] * curr_weight << 8;
|
|
acc_b += (uint32) in2[(in_col * 3) + 2] * curr_weight << 8;
|
|
acc_b += (uint32) in3[(in_col * 3) + 2] * curr_weight << 8;
|
|
acc_b += (uint32) in4[(in_col * 3) + 2] * curr_weight << 8;
|
|
acc_b += (uint32) in5[(in_col * 3) + 2] * curr_weight * bot_weight;
|
|
|
|
in_col++;
|
|
|
|
total_weight -= curr_weight;
|
|
curr_weight = total_weight > 256 ? 256 : total_weight;
|
|
}
|
|
|
|
position_x += x_factor_inv;
|
|
|
|
out[(x * 3) + 0] = ((uint64) acc_r * weight_reciprocal + ((uint64) 1 << 31)) >> 32;
|
|
out[(x * 3) + 1] = ((uint64) acc_g * weight_reciprocal + ((uint64) 1 << 31)) >> 32;
|
|
out[(x * 3) + 2] = ((uint64) acc_b * weight_reciprocal + ((uint64) 1 << 31)) >> 32;
|
|
}
|
|
}
|
|
|
|
static inline void _scale_row_down_5in(uint8 *_RESTRICT_ in0, uint8 *_RESTRICT_ in1,
|
|
uint8 *_RESTRICT_ in2, uint8 *_RESTRICT_ in3, uint8 *_RESTRICT_ in4, uint8 *_RESTRICT_ out,
|
|
uint64 position_x, uint64 x_factor_inv, uint32 top_weight, uint32 bot_weight,
|
|
uint32 weight_reciprocal, int out_width) {
|
|
int x;
|
|
uint32 in_col;
|
|
sint32 total_weight;
|
|
|
|
for (x = 0; x < out_width; x++) {
|
|
uint32 acc_r = 0;
|
|
uint32 acc_g = 0;
|
|
uint32 acc_b = 0;
|
|
uint32 curr_weight = 256 - ((position_x >> 24) & 0xff);
|
|
total_weight = x_factor_inv >> 24;
|
|
|
|
in_col = position_x >> 32;
|
|
|
|
while (total_weight > 0) {
|
|
acc_r += (uint32) in0[(in_col * 3) + 0] * curr_weight * top_weight;
|
|
acc_r += (uint32) in1[(in_col * 3) + 0] * curr_weight << 8;
|
|
acc_r += (uint32) in2[(in_col * 3) + 0] * curr_weight << 8;
|
|
acc_r += (uint32) in3[(in_col * 3) + 0] * curr_weight << 8;
|
|
acc_r += (uint32) in4[(in_col * 3) + 0] * curr_weight * bot_weight;
|
|
|
|
acc_g += (uint32) in0[(in_col * 3) + 1] * curr_weight * top_weight;
|
|
acc_g += (uint32) in1[(in_col * 3) + 1] * curr_weight << 8;
|
|
acc_g += (uint32) in2[(in_col * 3) + 1] * curr_weight << 8;
|
|
acc_g += (uint32) in3[(in_col * 3) + 1] * curr_weight << 8;
|
|
acc_g += (uint32) in4[(in_col * 3) + 1] * curr_weight * bot_weight;
|
|
|
|
acc_b += (uint32) in0[(in_col * 3) + 2] * curr_weight * top_weight;
|
|
acc_b += (uint32) in1[(in_col * 3) + 2] * curr_weight << 8;
|
|
acc_b += (uint32) in2[(in_col * 3) + 2] * curr_weight << 8;
|
|
acc_b += (uint32) in3[(in_col * 3) + 2] * curr_weight << 8;
|
|
acc_b += (uint32) in4[(in_col * 3) + 2] * curr_weight * bot_weight;
|
|
|
|
in_col++;
|
|
|
|
total_weight -= curr_weight;
|
|
curr_weight = total_weight > 256 ? 256 : total_weight;
|
|
}
|
|
|
|
position_x += x_factor_inv;
|
|
|
|
out[(x * 3) + 0] = ((uint64) acc_r * weight_reciprocal + ((uint64) 1 << 31)) >> 32;
|
|
out[(x * 3) + 1] = ((uint64) acc_g * weight_reciprocal + ((uint64) 1 << 31)) >> 32;
|
|
out[(x * 3) + 2] = ((uint64) acc_b * weight_reciprocal + ((uint64) 1 << 31)) >> 32;
|
|
}
|
|
}
|
|
|
|
static inline void _scale_row_down_4in(uint8 *_RESTRICT_ in0, uint8 *_RESTRICT_ in1,
|
|
uint8 *_RESTRICT_ in2, uint8 *_RESTRICT_ in3, uint8 *_RESTRICT_ out, uint64 position_x,
|
|
uint64 x_factor_inv, uint32 top_weight, uint32 bot_weight, uint32 weight_reciprocal,
|
|
int out_width) {
|
|
int x;
|
|
uint32 in_col;
|
|
sint32 total_weight;
|
|
|
|
for (x = 0; x < out_width; x++) {
|
|
uint32 acc_r = 0;
|
|
uint32 acc_g = 0;
|
|
uint32 acc_b = 0;
|
|
uint32 curr_weight = 256 - ((position_x >> 24) & 0xff);
|
|
total_weight = x_factor_inv >> 24;
|
|
|
|
in_col = position_x >> 32;
|
|
|
|
while (total_weight > 0) {
|
|
acc_r += (uint32) in0[(in_col * 3) + 0] * curr_weight * top_weight;
|
|
acc_r += (uint32) in1[(in_col * 3) + 0] * curr_weight << 8;
|
|
acc_r += (uint32) in2[(in_col * 3) + 0] * curr_weight << 8;
|
|
acc_r += (uint32) in3[(in_col * 3) + 0] * curr_weight * bot_weight;
|
|
|
|
acc_g += (uint32) in0[(in_col * 3) + 1] * curr_weight * top_weight;
|
|
acc_g += (uint32) in1[(in_col * 3) + 1] * curr_weight << 8;
|
|
acc_g += (uint32) in2[(in_col * 3) + 1] * curr_weight << 8;
|
|
acc_g += (uint32) in3[(in_col * 3) + 1] * curr_weight * bot_weight;
|
|
|
|
acc_b += (uint32) in0[(in_col * 3) + 2] * curr_weight * top_weight;
|
|
acc_b += (uint32) in1[(in_col * 3) + 2] * curr_weight << 8;
|
|
acc_b += (uint32) in2[(in_col * 3) + 2] * curr_weight << 8;
|
|
acc_b += (uint32) in3[(in_col * 3) + 2] * curr_weight * bot_weight;
|
|
|
|
in_col++;
|
|
|
|
total_weight -= curr_weight;
|
|
curr_weight = total_weight > 256 ? 256 : total_weight;
|
|
}
|
|
|
|
position_x += x_factor_inv;
|
|
|
|
out[(x * 3) + 0] = ((uint64) acc_r * weight_reciprocal + ((uint64) 1 << 31)) >> 32;
|
|
out[(x * 3) + 1] = ((uint64) acc_g * weight_reciprocal + ((uint64) 1 << 31)) >> 32;
|
|
out[(x * 3) + 2] = ((uint64) acc_b * weight_reciprocal + ((uint64) 1 << 31)) >> 32;
|
|
}
|
|
}
|
|
|
|
static inline void _scale_row_down_3in(uint8 *_RESTRICT_ in0, uint8 *_RESTRICT_ in1,
|
|
uint8 *_RESTRICT_ in2, uint8 *_RESTRICT_ out, uint64 position_x, uint64 x_factor_inv,
|
|
uint32 top_weight, uint32 bot_weight, uint32 weight_reciprocal, int out_width) {
|
|
int x;
|
|
uint32 in_col;
|
|
sint32 total_weight;
|
|
|
|
for (x = 0; x < out_width; x++) {
|
|
uint32 acc_r = 0;
|
|
uint32 acc_g = 0;
|
|
uint32 acc_b = 0;
|
|
uint32 curr_weight = 256 - ((position_x >> 24) & 0xff);
|
|
total_weight = x_factor_inv >> 24;
|
|
|
|
in_col = position_x >> 32;
|
|
|
|
while (total_weight > 0) {
|
|
acc_r += (uint32) in0[(in_col * 3) + 0] * curr_weight * top_weight;
|
|
acc_r += (uint32) in1[(in_col * 3) + 0] * curr_weight << 8;
|
|
acc_r += (uint32) in2[(in_col * 3) + 0] * curr_weight * bot_weight;
|
|
|
|
acc_g += (uint32) in0[(in_col * 3) + 1] * curr_weight * top_weight;
|
|
acc_g += (uint32) in1[(in_col * 3) + 1] * curr_weight << 8;
|
|
acc_g += (uint32) in2[(in_col * 3) + 1] * curr_weight * bot_weight;
|
|
|
|
acc_b += (uint32) in0[(in_col * 3) + 2] * curr_weight * top_weight;
|
|
acc_b += (uint32) in1[(in_col * 3) + 2] * curr_weight << 8;
|
|
acc_b += (uint32) in2[(in_col * 3) + 2] * curr_weight * bot_weight;
|
|
|
|
in_col++;
|
|
|
|
total_weight -= curr_weight;
|
|
curr_weight = total_weight > 256 ? 256 : total_weight;
|
|
}
|
|
|
|
position_x += x_factor_inv;
|
|
|
|
out[(x * 3) + 0] = ((uint64) acc_r * weight_reciprocal + ((uint64) 1 << 31)) >> 32;
|
|
out[(x * 3) + 1] = ((uint64) acc_g * weight_reciprocal + ((uint64) 1 << 31)) >> 32;
|
|
out[(x * 3) + 2] = ((uint64) acc_b * weight_reciprocal + ((uint64) 1 << 31)) >> 32;
|
|
}
|
|
}
|
|
|
|
static inline void _scale_row_down_2in(uint8 *_RESTRICT_ in0, uint8 *_RESTRICT_ in1,
|
|
uint8 *_RESTRICT_ out, uint64 position_x, uint64 x_factor_inv, uint32 top_weight,
|
|
uint32 bot_weight, uint32 weight_reciprocal, int out_width) {
|
|
int x;
|
|
uint32 in_col;
|
|
sint32 total_weight;
|
|
|
|
for (x = 0; x < out_width; x++) {
|
|
uint32 acc_r = 0;
|
|
uint32 acc_g = 0;
|
|
uint32 acc_b = 0;
|
|
uint32 curr_weight = 256 - ((position_x >> 24) & 0xff);
|
|
total_weight = x_factor_inv >> 24;
|
|
|
|
in_col = position_x >> 32;
|
|
|
|
while (total_weight > 0) {
|
|
acc_r += (uint32) in0[(in_col * 3) + 0] * curr_weight * top_weight;
|
|
acc_r += (uint32) in1[(in_col * 3) + 0] * curr_weight * bot_weight;
|
|
|
|
acc_g += (uint32) in0[(in_col * 3) + 1] * curr_weight * top_weight;
|
|
acc_g += (uint32) in1[(in_col * 3) + 1] * curr_weight * bot_weight;
|
|
|
|
acc_b += (uint32) in0[(in_col * 3) + 2] * curr_weight * top_weight;
|
|
acc_b += (uint32) in1[(in_col * 3) + 2] * curr_weight * bot_weight;
|
|
|
|
in_col++;
|
|
|
|
total_weight -= curr_weight;
|
|
curr_weight = total_weight > 256 ? 256 : total_weight;
|
|
}
|
|
|
|
position_x += x_factor_inv;
|
|
|
|
out[(x * 3) + 0] = ((uint64) acc_r * weight_reciprocal + ((uint64) 1 << 31)) >> 32;
|
|
out[(x * 3) + 1] = ((uint64) acc_g * weight_reciprocal + ((uint64) 1 << 31)) >> 32;
|
|
out[(x * 3) + 2] = ((uint64) acc_b * weight_reciprocal + ((uint64) 1 << 31)) >> 32;
|
|
}
|
|
}
|
|
|
|
static inline void _scale_row_down(uint8 *in, uint8 *_RESTRICT_ out, uint32 in_row_ofs,
|
|
uint64 position_x, uint64 position_y, uint64 x_factor_inv, uint64 y_factor_inv,
|
|
uint32 weight_reciprocal, int out_width) {
|
|
int x;
|
|
uint32 y, in_col, in_rows, top_weight, bot_weight;
|
|
sint32 total_weight;
|
|
|
|
total_weight = y_factor_inv >> 24;
|
|
|
|
top_weight = (uint32) 256 - ((position_y >> 24) & 0xff);
|
|
|
|
if ((sint32) top_weight > total_weight) {
|
|
top_weight = total_weight;
|
|
}
|
|
total_weight -= top_weight;
|
|
|
|
if (total_weight & 0xff) {
|
|
bot_weight = total_weight & 0xff;
|
|
} else if (total_weight > 255) {
|
|
bot_weight = 256;
|
|
} else {
|
|
bot_weight = 0;
|
|
}
|
|
|
|
total_weight -= bot_weight;
|
|
|
|
assert(total_weight >= 0);
|
|
assert((total_weight & 0xff) == 0);
|
|
|
|
in_rows = 2 + (total_weight >> 8);
|
|
|
|
if (in_rows == 2) {
|
|
_scale_row_down_2in(in, in + in_row_ofs,
|
|
out, position_x, x_factor_inv, top_weight, bot_weight, weight_reciprocal,
|
|
out_width);
|
|
} else if (in_rows == 3) {
|
|
_scale_row_down_3in(in, in + in_row_ofs, in + 2 * in_row_ofs,
|
|
out, position_x, x_factor_inv, top_weight, bot_weight, weight_reciprocal,
|
|
out_width);
|
|
} else if (in_rows == 4) {
|
|
_scale_row_down_4in(in, in + in_row_ofs, in + 2 * in_row_ofs, in + 3 * in_row_ofs,
|
|
out, position_x, x_factor_inv, top_weight, bot_weight, weight_reciprocal,
|
|
out_width);
|
|
} else if (in_rows == 5) {
|
|
_scale_row_down_5in(in, in + in_row_ofs, in + 2 * in_row_ofs, in + 3 * in_row_ofs,
|
|
in + 4 * in_row_ofs,
|
|
out, position_x, x_factor_inv,
|
|
top_weight, bot_weight, weight_reciprocal,
|
|
out_width);
|
|
} else if (in_rows == 6) {
|
|
_scale_row_down_6in(in, in + in_row_ofs, in + 2 * in_row_ofs, in + 3 * in_row_ofs,
|
|
in + 4 * in_row_ofs, in + 5 * in_row_ofs,
|
|
out, position_x, x_factor_inv, top_weight, bot_weight, weight_reciprocal,
|
|
out_width);
|
|
} else if (in_rows == 7) {
|
|
_scale_row_down_7in(in, in + in_row_ofs, in + 2 * in_row_ofs, in + 3 * in_row_ofs,
|
|
in + 4 * in_row_ofs, in + 5 * in_row_ofs, in + 6 * in_row_ofs,
|
|
out, position_x, x_factor_inv, top_weight, bot_weight, weight_reciprocal,
|
|
out_width);
|
|
} else if (in_rows == 8) {
|
|
_scale_row_down_8in(in, in + in_row_ofs, in + 2 * in_row_ofs, in + 3 * in_row_ofs,
|
|
in + 4 * in_row_ofs, in + 5 * in_row_ofs, in + 6 * in_row_ofs,
|
|
in + 7 * in_row_ofs,
|
|
out, position_x, x_factor_inv, top_weight, bot_weight, weight_reciprocal,
|
|
out_width);
|
|
} else if (in_rows == 9) {
|
|
_scale_row_down_9in(in, in + in_row_ofs, in + 2 * in_row_ofs, in + 3 * in_row_ofs,
|
|
in + 4 * in_row_ofs, in + 5 * in_row_ofs, in + 6 * in_row_ofs,
|
|
in + 7 * in_row_ofs, in + 8 * in_row_ofs,
|
|
out, position_x, x_factor_inv, top_weight, bot_weight, weight_reciprocal,
|
|
out_width);
|
|
} else {
|
|
for (x = 0; x < out_width; x++) {
|
|
uint32 acc_r = 0;
|
|
uint32 acc_g = 0;
|
|
uint32 acc_b = 0;
|
|
uint32 curr_weight = 256 - ((position_x >> 24) & 0xff);
|
|
total_weight = x_factor_inv >> 24;
|
|
|
|
in_col = position_x >> 32;
|
|
|
|
while (total_weight > 0) {
|
|
acc_r += (uint32) in[(in_col * 3) + 0] * curr_weight * top_weight;
|
|
acc_g += (uint32) in[(in_col * 3) + 1] * curr_weight * top_weight;
|
|
acc_b += (uint32) in[(in_col * 3) + 2] * curr_weight * top_weight;
|
|
|
|
for (y = 1; y < in_rows - 1; y++) {
|
|
acc_r += (uint32) in[y * in_row_ofs + ((in_col * 3) + 0)] * curr_weight * 256;
|
|
acc_g += (uint32) in[y * in_row_ofs + ((in_col * 3) + 1)] * curr_weight * 256;
|
|
acc_b += (uint32) in[y * in_row_ofs + ((in_col * 3) + 2)] * curr_weight * 256;
|
|
}
|
|
|
|
acc_r +=
|
|
(uint32) in[y * in_row_ofs + ((in_col * 3) + 0)] * curr_weight * bot_weight;
|
|
acc_g +=
|
|
(uint32) in[y * in_row_ofs + ((in_col * 3) + 1)] * curr_weight * bot_weight;
|
|
acc_b +=
|
|
(uint32) in[y * in_row_ofs + ((in_col * 3) + 2)] * curr_weight * bot_weight;
|
|
|
|
in_col++;
|
|
total_weight -= curr_weight;
|
|
curr_weight = total_weight > 256 ? 256 : total_weight;
|
|
}
|
|
|
|
position_x += x_factor_inv;
|
|
|
|
out[(x * 3) + 0] = ((uint64) acc_r * weight_reciprocal + ((uint64) 1 << 31)) >> 32;
|
|
out[(x * 3) + 1] = ((uint64) acc_g * weight_reciprocal + ((uint64) 1 << 31)) >> 32;
|
|
out[(x * 3) + 2] = ((uint64) acc_b * weight_reciprocal + ((uint64) 1 << 31)) >> 32;
|
|
}
|
|
}
|
|
}
|
|
|
|
static void _scale_row_up(uint8 *_RESTRICT_ in0, uint8 *_RESTRICT_ in1, uint8 *_RESTRICT_ out,
|
|
sint32 weight_y, uint64 position_x, uint64 increment_x, int out_width) {
|
|
int x;
|
|
for (x = 0; x < out_width; x++) {
|
|
sint32 top_val_r, bot_val_r;
|
|
sint32 top_val_g, bot_val_g;
|
|
sint32 top_val_b, bot_val_b;
|
|
|
|
// Position is tracked with 32 bits of precision, but interpolation is
|
|
// only guided by 10. REVISIT - Check ASM and make sure the compiler
|
|
// handled the second part here optimally.
|
|
uint32 pix_x = position_x >> 32;
|
|
|
|
sint32 weight_x = (position_x & 0xffffffff) >> 22;
|
|
|
|
// top_val and bot_val become 18-bit values here
|
|
top_val_r = (in0[(pix_x * 3) + 0] << 10) +
|
|
weight_x * ((sint32) in0[((pix_x + 1) * 3) + 0] - in0[(pix_x * 3) + 0]);
|
|
bot_val_r = (in1[(pix_x * 3) + 0] << 10) +
|
|
weight_x * ((sint32) in1[((pix_x + 1) * 3) + 0] - in1[(pix_x * 3) + 0]);
|
|
|
|
top_val_g = (in0[(pix_x * 3) + 1] << 10) +
|
|
weight_x * ((sint32) in0[((pix_x + 1) * 3) + 1] - in0[(pix_x * 3) + 1]);
|
|
bot_val_g = (in1[(pix_x * 3) + 1] << 10) +
|
|
weight_x * ((sint32) in1[((pix_x + 1) * 3) + 1] - in1[(pix_x * 3) + 1]);
|
|
|
|
top_val_b = (in0[(pix_x * 3) + 2] << 10) +
|
|
weight_x * ((sint32) in0[((pix_x + 1) * 3) + 2] - in0[(pix_x * 3) + 2]);
|
|
bot_val_b = (in1[(pix_x * 3) + 2] << 10) +
|
|
weight_x * ((sint32) in1[((pix_x + 1) * 3) + 2] - in1[(pix_x * 3) + 2]);
|
|
|
|
// out is an 8-bit value. We do not need to range-check, as overflow
|
|
// is mathematically impossible.
|
|
out[(x * 3) + 0] = ((top_val_r << 10) + weight_y * (bot_val_r - top_val_r)) >> 20;
|
|
out[(x * 3) + 1] = ((top_val_g << 10) + weight_y * (bot_val_g - top_val_g)) >> 20;
|
|
out[(x * 3) + 2] = ((top_val_b << 10) + weight_y * (bot_val_b - top_val_b)) >> 20;
|
|
|
|
position_x += increment_x;
|
|
}
|
|
}
|
|
|
|
static void _hw_scale_image_plane(scaler_config_t *pscaler_config, scaler_mode_t scaleMode) {
|
|
// These pointers duplicate h/w regs
|
|
uint64 x_factor, y_factor, x_factor_inv, y_factor_inv;
|
|
uint32 x_output_width, y_output_width;
|
|
uint32 input_pixel_ptr_offset, output_pixel_ptr_offset;
|
|
uint32 first_xi;
|
|
uint64 first_y_src, first_x_src, weight_reciprocal;
|
|
|
|
// These are internal state
|
|
uint32 r;
|
|
uint8 *outp;
|
|
|
|
x_output_width = pscaler_config->iOutWidth;
|
|
y_output_width = pscaler_config->iOutEndRow -
|
|
pscaler_config->iOutStartRow + 1;
|
|
|
|
input_pixel_ptr_offset = pscaler_config->iSrcBufWidth;
|
|
output_pixel_ptr_offset = pscaler_config->iOutBufWidth;
|
|
|
|
x_factor = (uint64) pscaler_config->fXfactor.decimal << 32;
|
|
x_factor |= pscaler_config->fXfactor.fraction;
|
|
|
|
y_factor = (uint64) pscaler_config->fYfactor.decimal << 32;
|
|
y_factor |= pscaler_config->fYfactor.fraction;
|
|
|
|
x_factor_inv = (uint64) pscaler_config->fXfactorInv.decimal << 32;
|
|
x_factor_inv |= pscaler_config->fXfactorInv.fraction;
|
|
|
|
y_factor_inv = (uint64) pscaler_config->fYfactorInv.decimal << 32;
|
|
y_factor_inv |= pscaler_config->fYfactorInv.fraction;
|
|
|
|
first_y_src = (uint64) pscaler_config->fSrcStartRow.decimal << 32;
|
|
first_y_src |= pscaler_config->fSrcStartRow.fraction;
|
|
|
|
// PC REVISIT - The HW has config registers for these, but they aren't being
|
|
// used by lib_photo_scaler do I don't want to use them, either. For now
|
|
// just print them so I can figure out what's going on and then clear the
|
|
// associated variables. Maybe we're always running the scaler from the
|
|
// left edge of the source so they're implicitly zero?
|
|
first_xi = pscaler_config->iOutStartColumn;
|
|
|
|
first_x_src = (uint64) pscaler_config->fSrcStartColumn.decimal << 32;
|
|
first_x_src |= pscaler_config->fSrcStartColumn.fraction;
|
|
|
|
first_xi = first_x_src = 0;
|
|
|
|
weight_reciprocal = ((uint64) 1 << 32);
|
|
weight_reciprocal /= (x_factor_inv >> 24) * (y_factor_inv >> 24);
|
|
|
|
outp = (pscaler_config->pOutBuf) + (first_xi * 3);
|
|
|
|
// PC - Assume pSrcBuf is already aligned to "true" base of input,
|
|
// so ignore whole-number part of first_y_src.
|
|
first_y_src = first_y_src & 0xffffffff;
|
|
|
|
for (r = 0; r < y_output_width; r++) {
|
|
uint8 *inp = (pscaler_config->pSrcBuf) +
|
|
(first_y_src >> 32) * input_pixel_ptr_offset;
|
|
{
|
|
if (scaleMode == PSCALER_SCALE_UP) {
|
|
_scale_row_up(inp, inp + input_pixel_ptr_offset, outp,
|
|
(first_y_src & 0xffffffff) >> 22, first_x_src,
|
|
x_factor_inv, x_output_width);
|
|
} else {
|
|
_scale_row_down(inp, outp, input_pixel_ptr_offset,
|
|
first_x_src, first_y_src, x_factor_inv, y_factor_inv,
|
|
weight_reciprocal, x_output_width);
|
|
}
|
|
}
|
|
first_y_src += y_factor_inv;
|
|
outp += output_pixel_ptr_offset;
|
|
}
|
|
} |