You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
336 lines
11 KiB
336 lines
11 KiB
#include "xmpmeta/xmp_parser.h"
|
|
|
|
#include <algorithm>
|
|
#include <cassert>
|
|
#include <cstring>
|
|
#include <sstream>
|
|
#include <stack>
|
|
|
|
#include "android-base/logging.h"
|
|
#include "strings/case.h"
|
|
#include "strings/numbers.h"
|
|
#include "xmpmeta/base64.h"
|
|
#include "xmpmeta/jpeg_io.h"
|
|
#include "xmpmeta/xml/const.h"
|
|
#include "xmpmeta/xml/deserializer_impl.h"
|
|
#include "xmpmeta/xml/search.h"
|
|
#include "xmpmeta/xml/utils.h"
|
|
#include "xmpmeta/xmp_const.h"
|
|
|
|
using ::dynamic_depth::xmpmeta::xml::DepthFirstSearch;
|
|
using ::dynamic_depth::xmpmeta::xml::DeserializerImpl;
|
|
using ::dynamic_depth::xmpmeta::xml::FromXmlChar;
|
|
using ::dynamic_depth::xmpmeta::xml::GetFirstDescriptionElement;
|
|
|
|
namespace dynamic_depth {
|
|
namespace xmpmeta {
|
|
namespace {
|
|
|
|
const char kJpgExtension[] = "jpg";
|
|
const char kJpegExtension[] = "jpeg";
|
|
|
|
bool BoolStringToBool(const string& bool_str, bool* value) {
|
|
if (dynamic_depth::StringCaseEqual(bool_str, "true")) {
|
|
*value = true;
|
|
return true;
|
|
}
|
|
if (dynamic_depth::StringCaseEqual(bool_str, "false")) {
|
|
*value = false;
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
// Converts string_property to the type T.
|
|
template <typename T>
|
|
bool ConvertStringPropertyToType(const string& string_property, T* value);
|
|
|
|
// Gets the end of the XMP meta content. If there is no packet wrapper, returns
|
|
// data.length, otherwise returns 1 + the position of last '>' without '?'
|
|
// before it. Usually the packet wrapper end is "<?xpacket end="w"?>.
|
|
size_t GetXmpContentEnd(const string& data) {
|
|
if (data.empty()) {
|
|
return 0;
|
|
}
|
|
for (size_t i = data.size() - 1; i >= 1; --i) {
|
|
if (data[i] == '>') {
|
|
if (data[i - 1] != '?') {
|
|
return i + 1;
|
|
}
|
|
}
|
|
}
|
|
// It should not reach here for a valid XMP meta.
|
|
LOG(WARNING) << "Failed to find the end of the XMP meta content.";
|
|
return data.size();
|
|
}
|
|
|
|
// True if 's' starts with substring 'x'.
|
|
bool StartsWith(const string& s, const string& x) {
|
|
return s.size() >= x.size() && !s.compare(0, x.size(), x);
|
|
}
|
|
// True if 's' ends with substring 'x'.
|
|
bool EndsWith(const string& s, const string& x) {
|
|
return s.size() >= x.size() && !s.compare(s.size() - x.size(), x.size(), x);
|
|
}
|
|
|
|
// Parses the first valid XMP section. Any other valid XMP section will be
|
|
// ignored.
|
|
bool ParseFirstValidXMPSection(const std::vector<Section>& sections,
|
|
XmpData* xmp) {
|
|
for (const Section& section : sections) {
|
|
if (StartsWith(section.data, XmpConst::Header())) {
|
|
const size_t end = GetXmpContentEnd(section.data);
|
|
// Increment header length by 1 for the null termination.
|
|
const size_t header_length = strlen(XmpConst::Header()) + 1;
|
|
// Check for integer underflow before subtracting.
|
|
if (header_length >= end) {
|
|
LOG(ERROR) << "Invalid content length: "
|
|
<< static_cast<int>(end - header_length);
|
|
return false;
|
|
}
|
|
const size_t content_length = end - header_length;
|
|
// header_length is guaranteed to be <= data.size due to the if condition
|
|
// above. If this contract changes we must add an additonal check.
|
|
const char* content_start = §ion.data[header_length];
|
|
// xmlReadMemory requires an int. Before casting size_t to int we must
|
|
// check for integer overflow.
|
|
if (content_length > INT_MAX) {
|
|
LOG(ERROR) << "First XMP section too large, size: " << content_length;
|
|
return false;
|
|
}
|
|
*xmp->MutableStandardSection() = xmlReadMemory(
|
|
content_start, static_cast<int>(content_length), nullptr, nullptr, 0);
|
|
if (xmp->StandardSection() == nullptr) {
|
|
LOG(WARNING) << "Failed to parse standard section.";
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
// Collects the extended XMP sections with the given name into a string. Other
|
|
// sections will be ignored.
|
|
string GetExtendedXmpSections(const std::vector<Section>& sections,
|
|
const string& section_name) {
|
|
string extended_header = XmpConst::ExtensionHeader();
|
|
extended_header += '\0' + section_name;
|
|
// section_name is dynamically extracted from the xml file and can have an
|
|
// arbitrary size. Check for integer overflow before addition.
|
|
if (extended_header.size() > SIZE_MAX - XmpConst::ExtensionHeaderOffset()) {
|
|
return "";
|
|
}
|
|
const size_t section_start_offset =
|
|
extended_header.size() + XmpConst::ExtensionHeaderOffset();
|
|
|
|
// Compute the size of the buffer to parse the extended sections.
|
|
std::vector<const Section*> xmp_sections;
|
|
std::vector<size_t> xmp_end_offsets;
|
|
size_t buffer_size = 0;
|
|
for (const Section& section : sections) {
|
|
if (extended_header.empty() || StartsWith(section.data, extended_header)) {
|
|
const size_t end_offset = section.data.size();
|
|
const size_t section_size = end_offset - section_start_offset;
|
|
if (end_offset < section_start_offset ||
|
|
section_size > SIZE_MAX - buffer_size) {
|
|
return "";
|
|
}
|
|
buffer_size += section_size;
|
|
xmp_sections.push_back(§ion);
|
|
xmp_end_offsets.push_back(end_offset);
|
|
}
|
|
}
|
|
|
|
// Copy all the relevant sections' data into a buffer.
|
|
string buffer(buffer_size, '\0');
|
|
if (buffer.size() != buffer_size) {
|
|
return "";
|
|
}
|
|
size_t offset = 0;
|
|
for (int i = 0; i < xmp_sections.size(); ++i) {
|
|
const Section* section = xmp_sections[i];
|
|
const size_t length = xmp_end_offsets[i] - section_start_offset;
|
|
std::copy_n(§ion->data[section_start_offset], length, &buffer[offset]);
|
|
offset += length;
|
|
}
|
|
return buffer;
|
|
}
|
|
|
|
// Parses the extended XMP sections with the given name. All other sections
|
|
// will be ignored.
|
|
bool ParseExtendedXmpSections(const std::vector<Section>& sections,
|
|
const string& section_name, XmpData* xmp_data) {
|
|
const string extended_sections =
|
|
GetExtendedXmpSections(sections, section_name);
|
|
// xmlReadMemory requires an int. Before casting size_t to int we must check
|
|
// for integer overflow.
|
|
if (extended_sections.size() > INT_MAX) {
|
|
LOG(WARNING) << "Extended sections too large, size: "
|
|
<< extended_sections.size();
|
|
return false;
|
|
}
|
|
*xmp_data->MutableExtendedSection() = xmlReadMemory(
|
|
extended_sections.data(), static_cast<int>(extended_sections.size()),
|
|
nullptr, nullptr, XML_PARSE_HUGE);
|
|
if (xmp_data->ExtendedSection() == nullptr) {
|
|
LOG(WARNING) << "Failed to parse extended sections.";
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
// Extracts a XmpData from a JPEG image stream.
|
|
bool ExtractXmpMeta(const bool skip_extended, std::istream* file,
|
|
XmpData* xmp_data) {
|
|
// We cannot use CHECK because this is ported to AOSP.
|
|
assert(xmp_data != nullptr); // NOLINT
|
|
xmp_data->Reset();
|
|
|
|
ParseOptions parse_options;
|
|
parse_options.read_meta_only = true;
|
|
if (skip_extended) {
|
|
parse_options.section_header = XmpConst::Header();
|
|
parse_options.section_header_return_first = true;
|
|
}
|
|
const std::vector<Section> sections = Parse(parse_options, file);
|
|
if (sections.empty()) {
|
|
LOG(WARNING) << "No sections found.";
|
|
return false;
|
|
}
|
|
|
|
if (!ParseFirstValidXMPSection(sections, xmp_data)) {
|
|
LOG(WARNING) << "Could not parse first section.";
|
|
return false;
|
|
}
|
|
if (skip_extended) {
|
|
return true;
|
|
}
|
|
string extension_name;
|
|
DeserializerImpl deserializer(
|
|
GetFirstDescriptionElement(xmp_data->StandardSection()));
|
|
if (!deserializer.ParseString(XmpConst::HasExtensionPrefix(),
|
|
XmpConst::HasExtension(), &extension_name)) {
|
|
// No extended sections present, so nothing to parse.
|
|
return true;
|
|
}
|
|
if (!ParseExtendedXmpSections(sections, extension_name, xmp_data)) {
|
|
LOG(WARNING) << "Extended sections present, but could not be parsed.";
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
// Extracts the specified string attribute.
|
|
bool GetStringProperty(const xmlNodePtr node, const char* prefix,
|
|
const char* property, string* value) {
|
|
const xmlDocPtr doc = node->doc;
|
|
for (const _xmlAttr* attribute = node->properties; attribute != nullptr;
|
|
attribute = attribute->next) {
|
|
if (attribute->ns &&
|
|
strcmp(FromXmlChar(attribute->ns->prefix), prefix) == 0 &&
|
|
strcmp(FromXmlChar(attribute->name), property) == 0) {
|
|
xmlChar* attribute_string =
|
|
xmlNodeListGetString(doc, attribute->children, 1);
|
|
*value = FromXmlChar(attribute_string);
|
|
xmlFree(attribute_string);
|
|
return true;
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
// Reads the contents of a node.
|
|
// E.g. <prefix:node_name>Contents Here</prefix:node_name>
|
|
bool ReadNodeContent(const xmlNodePtr node, const char* prefix,
|
|
const char* node_name, string* value) {
|
|
auto* element = DepthFirstSearch(node, node_name);
|
|
if (element == nullptr) {
|
|
return false;
|
|
}
|
|
if (prefix != nullptr &&
|
|
(element->ns == nullptr || element->ns->prefix == nullptr ||
|
|
strcmp(FromXmlChar(element->ns->prefix), prefix) != 0)) {
|
|
return false;
|
|
}
|
|
xmlChar* node_content = xmlNodeGetContent(element);
|
|
*value = FromXmlChar(node_content);
|
|
free(node_content);
|
|
return true;
|
|
}
|
|
|
|
template <typename T>
|
|
bool ConvertStringPropertyToType(const string& string_property, T* value) {
|
|
QCHECK(value) << "Cannot call this method on a generic type";
|
|
return false;
|
|
}
|
|
|
|
template <>
|
|
bool ConvertStringPropertyToType<bool>(const string& string_property,
|
|
bool* value) {
|
|
return BoolStringToBool(string_property, value);
|
|
}
|
|
|
|
template <>
|
|
bool ConvertStringPropertyToType<double>(const string& string_property,
|
|
double* value) {
|
|
*value = std::stod(string_property);
|
|
return true;
|
|
}
|
|
|
|
template <>
|
|
bool ConvertStringPropertyToType<int>(const string& string_property,
|
|
int* value) {
|
|
*value = 0;
|
|
for (int i = 0; i < string_property.size(); ++i) {
|
|
if (!isdigit(string_property[i])) {
|
|
return false;
|
|
}
|
|
}
|
|
|
|
*value = std::atoi(string_property.c_str()); // NOLINT
|
|
return true;
|
|
}
|
|
|
|
template <>
|
|
bool ConvertStringPropertyToType<int64>(const string& string_property,
|
|
int64* value) {
|
|
*value = std::stol(string_property);
|
|
return true;
|
|
}
|
|
|
|
} // namespace
|
|
|
|
bool ReadXmpHeader(const string& filename, const bool skip_extended,
|
|
XmpData* xmp_data) {
|
|
string filename_lower = filename;
|
|
std::transform(filename_lower.begin(), filename_lower.end(),
|
|
filename_lower.begin(), ::tolower);
|
|
if (!EndsWith(filename_lower, kJpgExtension) &&
|
|
!EndsWith(filename_lower, kJpegExtension)) {
|
|
LOG(WARNING) << "XMP parse: only JPEG file is supported";
|
|
return false;
|
|
}
|
|
|
|
std::ifstream file(filename.c_str(), std::ios::binary);
|
|
if (!file.is_open()) {
|
|
LOG(WARNING) << " Could not read file: " << filename;
|
|
return false;
|
|
}
|
|
return ExtractXmpMeta(skip_extended, &file, xmp_data);
|
|
}
|
|
|
|
bool ReadXmpFromMemory(const string& jpeg_contents, const bool skip_extended,
|
|
XmpData* xmp_data) {
|
|
std::istringstream stream(jpeg_contents);
|
|
return ExtractXmpMeta(skip_extended, &stream, xmp_data);
|
|
}
|
|
|
|
bool ReadXmpHeader(std::istream* input_stream, bool skip_extended,
|
|
XmpData* xmp_data) {
|
|
return ExtractXmpMeta(skip_extended, input_stream, xmp_data);
|
|
}
|
|
|
|
} // namespace xmpmeta
|
|
} // namespace dynamic_depth
|