You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
331 lines
10 KiB
331 lines
10 KiB
// Copyright 2014 PDFium Authors. All rights reserved.
|
|
// Use of this source code is governed by a BSD-style license that can be
|
|
// found in the LICENSE file.
|
|
|
|
// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
|
|
// Original code is licensed as follows:
|
|
/*
|
|
* Copyright 2006-2007 Jeremias Maerki.
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*/
|
|
|
|
#include "fxbarcode/datamatrix/BC_HighLevelEncoder.h"
|
|
|
|
#include <algorithm>
|
|
#include <array>
|
|
#include <limits>
|
|
#include <memory>
|
|
#include <vector>
|
|
|
|
#include "core/fxcrt/fx_extension.h"
|
|
#include "fxbarcode/common/BC_CommonBitMatrix.h"
|
|
#include "fxbarcode/datamatrix/BC_ASCIIEncoder.h"
|
|
#include "fxbarcode/datamatrix/BC_Base256Encoder.h"
|
|
#include "fxbarcode/datamatrix/BC_C40Encoder.h"
|
|
#include "fxbarcode/datamatrix/BC_EdifactEncoder.h"
|
|
#include "fxbarcode/datamatrix/BC_Encoder.h"
|
|
#include "fxbarcode/datamatrix/BC_EncoderContext.h"
|
|
#include "fxbarcode/datamatrix/BC_SymbolInfo.h"
|
|
#include "fxbarcode/datamatrix/BC_TextEncoder.h"
|
|
#include "fxbarcode/datamatrix/BC_X12Encoder.h"
|
|
#include "third_party/base/ptr_util.h"
|
|
|
|
namespace {
|
|
|
|
const wchar_t kPad = 129;
|
|
const wchar_t kMacro05 = 236;
|
|
const wchar_t kMacro06 = 237;
|
|
const wchar_t kMacro05Header[] =
|
|
L"[)>\036"
|
|
L"05";
|
|
const wchar_t kMacro06Header[] =
|
|
L"[)>\036"
|
|
L"06";
|
|
const wchar_t kMacroTrailer = 0x0004;
|
|
|
|
constexpr size_t kEncoderCount =
|
|
static_cast<size_t>(CBC_HighLevelEncoder::Encoding::LAST) + 1;
|
|
static_assert(kEncoderCount == 6, "Bad encoder count");
|
|
|
|
wchar_t Randomize253State(wchar_t ch, int32_t codewordPosition) {
|
|
int32_t pseudoRandom = ((149 * codewordPosition) % 253) + 1;
|
|
int32_t tempVariable = ch + pseudoRandom;
|
|
return tempVariable <= 254 ? static_cast<wchar_t>(tempVariable)
|
|
: static_cast<wchar_t>(tempVariable - 254);
|
|
}
|
|
|
|
int32_t FindMinimums(const std::array<float, kEncoderCount>& charCounts,
|
|
std::array<int32_t, kEncoderCount>* intCharCounts,
|
|
std::array<uint8_t, kEncoderCount>* mins) {
|
|
int32_t min = std::numeric_limits<int32_t>::max();
|
|
for (size_t i = 0; i < kEncoderCount; ++i) {
|
|
int32_t current = static_cast<int32_t>(ceil(charCounts[i]));
|
|
(*intCharCounts)[i] = current;
|
|
if (min > current) {
|
|
min = current;
|
|
for (auto& m : *mins)
|
|
m = 0;
|
|
}
|
|
if (min == current)
|
|
(*mins)[i]++;
|
|
}
|
|
return min;
|
|
}
|
|
|
|
int32_t GetMinimumCount(const std::array<uint8_t, kEncoderCount>& mins) {
|
|
int32_t count = 0;
|
|
for (const auto& m : mins)
|
|
count += m;
|
|
return count;
|
|
}
|
|
|
|
bool IsNativeC40(wchar_t ch) {
|
|
return (ch == ' ') || (ch >= '0' && ch <= '9') || (ch >= 'A' && ch <= 'Z');
|
|
}
|
|
|
|
bool IsNativeText(wchar_t ch) {
|
|
return (ch == ' ') || (ch >= '0' && ch <= '9') || (ch >= 'a' && ch <= 'z');
|
|
}
|
|
|
|
bool IsX12TermSep(wchar_t ch) {
|
|
return (ch == '\r') || (ch == '*') || (ch == '>');
|
|
}
|
|
|
|
bool IsNativeX12(wchar_t ch) {
|
|
return IsX12TermSep(ch) || (ch == ' ') || (ch >= '0' && ch <= '9') ||
|
|
(ch >= 'A' && ch <= 'Z');
|
|
}
|
|
|
|
bool IsNativeEDIFACT(wchar_t ch) {
|
|
return ch >= ' ' && ch <= '^';
|
|
}
|
|
|
|
size_t EncoderIndex(CBC_HighLevelEncoder::Encoding encoding) {
|
|
ASSERT(encoding != CBC_HighLevelEncoder::Encoding::UNKNOWN);
|
|
return static_cast<size_t>(encoding);
|
|
}
|
|
|
|
} // namespace
|
|
|
|
// static
|
|
WideString CBC_HighLevelEncoder::EncodeHighLevel(const WideString& msg) {
|
|
// Per spec. Alpha numeric input is even shorter.
|
|
static constexpr size_t kMaxNumericInputLength = 3116;
|
|
|
|
// Exit early if the input is too long. It will fail no matter what.
|
|
if (msg.GetLength() > kMaxNumericInputLength)
|
|
return WideString();
|
|
|
|
CBC_EncoderContext context(msg);
|
|
if (context.HasCharactersOutsideISO88591Encoding())
|
|
return WideString();
|
|
|
|
if (msg.Back() == kMacroTrailer) {
|
|
WideString left = msg.First(6);
|
|
if (left == kMacro05Header) {
|
|
context.writeCodeword(kMacro05);
|
|
context.setSkipAtEnd(2);
|
|
context.m_pos += 6;
|
|
} else if (left == kMacro06Header) {
|
|
context.writeCodeword(kMacro06);
|
|
context.setSkipAtEnd(2);
|
|
context.m_pos += 6;
|
|
}
|
|
}
|
|
|
|
std::vector<std::unique_ptr<CBC_Encoder>> encoders;
|
|
encoders.push_back(pdfium::MakeUnique<CBC_ASCIIEncoder>());
|
|
encoders.push_back(pdfium::MakeUnique<CBC_C40Encoder>());
|
|
encoders.push_back(pdfium::MakeUnique<CBC_TextEncoder>());
|
|
encoders.push_back(pdfium::MakeUnique<CBC_X12Encoder>());
|
|
encoders.push_back(pdfium::MakeUnique<CBC_EdifactEncoder>());
|
|
encoders.push_back(pdfium::MakeUnique<CBC_Base256Encoder>());
|
|
Encoding encodingMode = Encoding::ASCII;
|
|
while (context.hasMoreCharacters()) {
|
|
if (!encoders[EncoderIndex(encodingMode)]->Encode(&context))
|
|
return WideString();
|
|
|
|
if (context.m_newEncoding != Encoding::UNKNOWN) {
|
|
encodingMode = context.m_newEncoding;
|
|
context.ResetEncoderSignal();
|
|
}
|
|
}
|
|
size_t len = context.m_codewords.GetLength();
|
|
if (!context.UpdateSymbolInfo())
|
|
return WideString();
|
|
|
|
size_t capacity = context.m_symbolInfo->dataCapacity();
|
|
if (len < capacity) {
|
|
if (encodingMode != Encoding::ASCII && encodingMode != Encoding::BASE256)
|
|
context.writeCodeword(0x00fe);
|
|
}
|
|
WideString codewords = context.m_codewords;
|
|
if (codewords.GetLength() < capacity)
|
|
codewords += kPad;
|
|
|
|
while (codewords.GetLength() < capacity)
|
|
codewords += Randomize253State(kPad, codewords.GetLength() + 1);
|
|
|
|
ASSERT(!codewords.IsEmpty());
|
|
return codewords;
|
|
}
|
|
|
|
// static
|
|
CBC_HighLevelEncoder::Encoding CBC_HighLevelEncoder::LookAheadTest(
|
|
const WideString& msg,
|
|
size_t startpos,
|
|
CBC_HighLevelEncoder::Encoding currentMode) {
|
|
if (startpos >= msg.GetLength())
|
|
return currentMode;
|
|
|
|
std::array<float, kEncoderCount> charCounts;
|
|
if (currentMode == Encoding::ASCII) {
|
|
charCounts = {0, 1, 1, 1, 1, 1.25f};
|
|
} else {
|
|
charCounts = {1, 2, 2, 2, 2, 2.25f};
|
|
charCounts[EncoderIndex(currentMode)] = 0;
|
|
}
|
|
|
|
size_t charsProcessed = 0;
|
|
while (true) {
|
|
if ((startpos + charsProcessed) == msg.GetLength()) {
|
|
std::array<int32_t, kEncoderCount> intCharCounts;
|
|
std::array<uint8_t, kEncoderCount> mins;
|
|
int32_t min = FindMinimums(charCounts, &intCharCounts, &mins);
|
|
if (intCharCounts[EncoderIndex(Encoding::ASCII)] == min)
|
|
return Encoding::ASCII;
|
|
const int32_t minCount = GetMinimumCount(mins);
|
|
if (minCount == 1) {
|
|
if (mins[EncoderIndex(Encoding::BASE256)] > 0)
|
|
return Encoding::BASE256;
|
|
if (mins[EncoderIndex(Encoding::EDIFACT)] > 0)
|
|
return Encoding::EDIFACT;
|
|
if (mins[EncoderIndex(Encoding::TEXT)] > 0)
|
|
return Encoding::TEXT;
|
|
if (mins[EncoderIndex(Encoding::X12)] > 0)
|
|
return Encoding::X12;
|
|
}
|
|
return Encoding::C40;
|
|
}
|
|
|
|
wchar_t c = msg[startpos + charsProcessed];
|
|
charsProcessed++;
|
|
{
|
|
auto& count = charCounts[EncoderIndex(Encoding::ASCII)];
|
|
if (FXSYS_IsDecimalDigit(c))
|
|
count += 0.5;
|
|
else if (IsExtendedASCII(c))
|
|
count = ceilf(count) + 2;
|
|
else
|
|
count = ceilf(count) + 1;
|
|
}
|
|
|
|
{
|
|
auto& count = charCounts[EncoderIndex(Encoding::C40)];
|
|
if (IsNativeC40(c))
|
|
count += 2.0f / 3.0f;
|
|
else if (IsExtendedASCII(c))
|
|
count += 8.0f / 3.0f;
|
|
else
|
|
count += 4.0f / 3.0f;
|
|
}
|
|
|
|
{
|
|
auto& count = charCounts[EncoderIndex(Encoding::TEXT)];
|
|
if (IsNativeText(c))
|
|
count += 2.0f / 3.0f;
|
|
else if (IsExtendedASCII(c))
|
|
count += 8.0f / 3.0f;
|
|
else
|
|
count += 4.0f / 3.0f;
|
|
}
|
|
|
|
{
|
|
auto& count = charCounts[EncoderIndex(Encoding::X12)];
|
|
if (IsNativeX12(c))
|
|
count += 2.0f / 3.0f;
|
|
else if (IsExtendedASCII(c))
|
|
count += 13.0f / 3.0f;
|
|
else
|
|
count += 10.0f / 3.0f;
|
|
}
|
|
|
|
{
|
|
auto& count = charCounts[EncoderIndex(Encoding::EDIFACT)];
|
|
if (IsNativeEDIFACT(c))
|
|
count += 3.0f / 4.0f;
|
|
else if (IsExtendedASCII(c))
|
|
count += 17.0f / 4.0f;
|
|
else
|
|
count += 13.0f / 4.0f;
|
|
}
|
|
|
|
charCounts[EncoderIndex(Encoding::BASE256)]++;
|
|
if (charsProcessed < 4)
|
|
continue;
|
|
|
|
std::array<int32_t, kEncoderCount> intCharCounts;
|
|
std::array<uint8_t, kEncoderCount> mins;
|
|
FindMinimums(charCounts, &intCharCounts, &mins);
|
|
int32_t minCount = GetMinimumCount(mins);
|
|
int32_t ascii_count = intCharCounts[EncoderIndex(Encoding::ASCII)];
|
|
int32_t c40_count = intCharCounts[EncoderIndex(Encoding::C40)];
|
|
int32_t text_count = intCharCounts[EncoderIndex(Encoding::TEXT)];
|
|
int32_t x12_count = intCharCounts[EncoderIndex(Encoding::X12)];
|
|
int32_t editfact_count = intCharCounts[EncoderIndex(Encoding::EDIFACT)];
|
|
int32_t base256_count = intCharCounts[EncoderIndex(Encoding::BASE256)];
|
|
int32_t bet_min = std::min({base256_count, editfact_count, text_count});
|
|
if (ascii_count < bet_min && ascii_count < c40_count &&
|
|
ascii_count < x12_count) {
|
|
return Encoding::ASCII;
|
|
}
|
|
if (base256_count < ascii_count ||
|
|
(mins[EncoderIndex(Encoding::C40)] +
|
|
mins[EncoderIndex(Encoding::TEXT)] +
|
|
mins[EncoderIndex(Encoding::X12)] +
|
|
mins[EncoderIndex(Encoding::EDIFACT)]) == 0) {
|
|
return Encoding::BASE256;
|
|
}
|
|
if (minCount == 1) {
|
|
if (mins[EncoderIndex(Encoding::EDIFACT)] > 0)
|
|
return Encoding::EDIFACT;
|
|
if (mins[EncoderIndex(Encoding::TEXT)] > 0)
|
|
return Encoding::TEXT;
|
|
if (mins[EncoderIndex(Encoding::X12)] > 0)
|
|
return Encoding::X12;
|
|
}
|
|
if (c40_count + 1 < ascii_count && c40_count + 1 < bet_min) {
|
|
if (c40_count < x12_count)
|
|
return Encoding::C40;
|
|
if (c40_count == x12_count) {
|
|
size_t p = startpos + charsProcessed + 1;
|
|
while (p < msg.GetLength()) {
|
|
wchar_t tc = msg[p];
|
|
if (IsX12TermSep(tc))
|
|
return Encoding::X12;
|
|
if (!IsNativeX12(tc))
|
|
break;
|
|
p++;
|
|
}
|
|
return Encoding::C40;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// static
|
|
bool CBC_HighLevelEncoder::IsExtendedASCII(wchar_t ch) {
|
|
return ch >= 128 && ch <= 255;
|
|
}
|