You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
394 lines
14 KiB
394 lines
14 KiB
// Copyright 2015 PDFium Authors. All rights reserved.
|
|
// Use of this source code is governed by a BSD-style license that can be
|
|
// found in the LICENSE file.
|
|
|
|
#include <algorithm>
|
|
#include <memory>
|
|
#include <string>
|
|
#include <utility>
|
|
#include <vector>
|
|
|
|
#include "core/fxcrt/bytestring.h"
|
|
#include "core/fxcrt/widestring.h"
|
|
#include "public/fpdfview.h"
|
|
#include "testing/embedder_test.h"
|
|
#include "testing/gtest/include/gtest/gtest.h"
|
|
#include "testing/range_set.h"
|
|
#include "testing/utils/file_util.h"
|
|
#include "testing/utils/path_service.h"
|
|
|
|
namespace {
|
|
|
|
class MockDownloadHints final : public FX_DOWNLOADHINTS {
|
|
public:
|
|
static void SAddSegment(FX_DOWNLOADHINTS* pThis, size_t offset, size_t size) {
|
|
}
|
|
|
|
MockDownloadHints() {
|
|
FX_DOWNLOADHINTS::version = 1;
|
|
FX_DOWNLOADHINTS::AddSegment = SAddSegment;
|
|
}
|
|
|
|
~MockDownloadHints() = default;
|
|
};
|
|
|
|
class TestAsyncLoader final : public FX_DOWNLOADHINTS, FX_FILEAVAIL {
|
|
public:
|
|
explicit TestAsyncLoader(const std::string& file_name) {
|
|
std::string file_path;
|
|
if (!PathService::GetTestFilePath(file_name, &file_path))
|
|
return;
|
|
file_contents_ = GetFileContents(file_path.c_str(), &file_length_);
|
|
if (!file_contents_)
|
|
return;
|
|
|
|
file_access_.m_FileLen = static_cast<unsigned long>(file_length_);
|
|
file_access_.m_GetBlock = SGetBlock;
|
|
file_access_.m_Param = this;
|
|
|
|
FX_DOWNLOADHINTS::version = 1;
|
|
FX_DOWNLOADHINTS::AddSegment = SAddSegment;
|
|
|
|
FX_FILEAVAIL::version = 1;
|
|
FX_FILEAVAIL::IsDataAvail = SIsDataAvail;
|
|
}
|
|
|
|
bool IsOpened() const { return !!file_contents_; }
|
|
|
|
FPDF_FILEACCESS* file_access() { return &file_access_; }
|
|
FX_DOWNLOADHINTS* hints() { return this; }
|
|
FX_FILEAVAIL* file_avail() { return this; }
|
|
|
|
const std::vector<std::pair<size_t, size_t>>& requested_segments() const {
|
|
return requested_segments_;
|
|
}
|
|
|
|
size_t max_requested_bound() const { return max_requested_bound_; }
|
|
|
|
void ClearRequestedSegments() {
|
|
requested_segments_.clear();
|
|
max_requested_bound_ = 0;
|
|
}
|
|
|
|
bool is_new_data_available() const { return is_new_data_available_; }
|
|
void set_is_new_data_available(bool is_new_data_available) {
|
|
is_new_data_available_ = is_new_data_available;
|
|
}
|
|
|
|
size_t max_already_available_bound() const {
|
|
return available_ranges_.IsEmpty()
|
|
? 0
|
|
: available_ranges_.ranges().rbegin()->second;
|
|
}
|
|
|
|
void FlushRequestedData() {
|
|
for (const auto& it : requested_segments_) {
|
|
SetDataAvailable(it.first, it.second);
|
|
}
|
|
ClearRequestedSegments();
|
|
}
|
|
|
|
char* file_contents() { return file_contents_.get(); }
|
|
size_t file_length() const { return file_length_; }
|
|
|
|
private:
|
|
void SetDataAvailable(size_t start, size_t size) {
|
|
available_ranges_.Union(RangeSet::Range(start, start + size));
|
|
}
|
|
|
|
bool CheckDataAlreadyAvailable(size_t start, size_t size) const {
|
|
return available_ranges_.Contains(RangeSet::Range(start, start + size));
|
|
}
|
|
|
|
int GetBlockImpl(unsigned long pos, unsigned char* pBuf, unsigned long size) {
|
|
if (!IsDataAvailImpl(pos, size))
|
|
return 0;
|
|
const unsigned long end =
|
|
std::min(static_cast<unsigned long>(file_length_), pos + size);
|
|
if (end <= pos)
|
|
return 0;
|
|
memcpy(pBuf, file_contents_.get() + pos, end - pos);
|
|
SetDataAvailable(pos, end - pos);
|
|
return static_cast<int>(end - pos);
|
|
}
|
|
|
|
void AddSegmentImpl(size_t offset, size_t size) {
|
|
requested_segments_.push_back(std::make_pair(offset, size));
|
|
max_requested_bound_ = std::max(max_requested_bound_, offset + size);
|
|
}
|
|
|
|
bool IsDataAvailImpl(size_t offset, size_t size) {
|
|
if (offset + size > file_length_)
|
|
return false;
|
|
if (is_new_data_available_) {
|
|
SetDataAvailable(offset, size);
|
|
return true;
|
|
}
|
|
return CheckDataAlreadyAvailable(offset, size);
|
|
}
|
|
|
|
static int SGetBlock(void* param,
|
|
unsigned long pos,
|
|
unsigned char* pBuf,
|
|
unsigned long size) {
|
|
return static_cast<TestAsyncLoader*>(param)->GetBlockImpl(pos, pBuf, size);
|
|
}
|
|
|
|
static void SAddSegment(FX_DOWNLOADHINTS* pThis, size_t offset, size_t size) {
|
|
return static_cast<TestAsyncLoader*>(pThis)->AddSegmentImpl(offset, size);
|
|
}
|
|
|
|
static FPDF_BOOL SIsDataAvail(FX_FILEAVAIL* pThis,
|
|
size_t offset,
|
|
size_t size) {
|
|
return static_cast<TestAsyncLoader*>(pThis)->IsDataAvailImpl(offset, size);
|
|
}
|
|
|
|
FPDF_FILEACCESS file_access_;
|
|
|
|
std::unique_ptr<char, pdfium::FreeDeleter> file_contents_;
|
|
size_t file_length_ = 0;
|
|
std::vector<std::pair<size_t, size_t>> requested_segments_;
|
|
size_t max_requested_bound_ = 0;
|
|
bool is_new_data_available_ = true;
|
|
|
|
RangeSet available_ranges_;
|
|
};
|
|
|
|
} // namespace
|
|
|
|
class FPDFDataAvailEmbedderTest : public EmbedderTest {};
|
|
|
|
TEST_F(FPDFDataAvailEmbedderTest, TrailerUnterminated) {
|
|
// Document must load without crashing but is too malformed to be available.
|
|
EXPECT_FALSE(OpenDocument("trailer_unterminated.pdf"));
|
|
MockDownloadHints hints;
|
|
EXPECT_FALSE(FPDFAvail_IsDocAvail(avail_, &hints));
|
|
}
|
|
|
|
TEST_F(FPDFDataAvailEmbedderTest, TrailerAsHexstring) {
|
|
// Document must load without crashing but is too malformed to be available.
|
|
EXPECT_FALSE(OpenDocument("trailer_as_hexstring.pdf"));
|
|
MockDownloadHints hints;
|
|
EXPECT_FALSE(FPDFAvail_IsDocAvail(avail_, &hints));
|
|
}
|
|
|
|
TEST_F(FPDFDataAvailEmbedderTest, LoadUsingHintTables) {
|
|
TestAsyncLoader loader("feature_linearized_loading.pdf");
|
|
avail_ = FPDFAvail_Create(loader.file_avail(), loader.file_access());
|
|
ASSERT_EQ(PDF_DATA_AVAIL, FPDFAvail_IsDocAvail(avail_, loader.hints()));
|
|
document_ = FPDFAvail_GetDocument(avail_, nullptr);
|
|
ASSERT_TRUE(document_);
|
|
ASSERT_EQ(PDF_DATA_AVAIL, FPDFAvail_IsPageAvail(avail_, 1, loader.hints()));
|
|
|
|
// No new data available, to prevent load "Pages" node.
|
|
loader.set_is_new_data_available(false);
|
|
ScopedFPDFPage page(FPDF_LoadPage(document(), 1));
|
|
EXPECT_TRUE(page);
|
|
}
|
|
|
|
TEST_F(FPDFDataAvailEmbedderTest, CheckFormAvailIfLinearized) {
|
|
TestAsyncLoader loader("feature_linearized_loading.pdf");
|
|
avail_ = FPDFAvail_Create(loader.file_avail(), loader.file_access());
|
|
ASSERT_EQ(PDF_DATA_AVAIL, FPDFAvail_IsDocAvail(avail_, loader.hints()));
|
|
document_ = FPDFAvail_GetDocument(avail_, nullptr);
|
|
ASSERT_TRUE(document_);
|
|
|
|
// Prevent access to non-requested data to coerce the parser to send new
|
|
// request for non available (non-requested before) data.
|
|
loader.set_is_new_data_available(false);
|
|
loader.ClearRequestedSegments();
|
|
|
|
int status = PDF_FORM_NOTAVAIL;
|
|
while (status == PDF_FORM_NOTAVAIL) {
|
|
loader.FlushRequestedData();
|
|
status = FPDFAvail_IsFormAvail(avail_, loader.hints());
|
|
}
|
|
EXPECT_NE(PDF_FORM_ERROR, status);
|
|
}
|
|
|
|
TEST_F(FPDFDataAvailEmbedderTest,
|
|
DoNotLoadMainCrossRefForFirstPageIfLinearized) {
|
|
TestAsyncLoader loader("feature_linearized_loading.pdf");
|
|
avail_ = FPDFAvail_Create(loader.file_avail(), loader.file_access());
|
|
ASSERT_EQ(PDF_DATA_AVAIL, FPDFAvail_IsDocAvail(avail_, loader.hints()));
|
|
document_ = FPDFAvail_GetDocument(avail_, nullptr);
|
|
ASSERT_TRUE(document_);
|
|
const int first_page_num = FPDFAvail_GetFirstPageNum(document_);
|
|
|
|
// The main cross ref table should not be processed.
|
|
// (It is always at file end)
|
|
EXPECT_GT(loader.file_access()->m_FileLen,
|
|
loader.max_already_available_bound());
|
|
|
|
// Prevent access to non-requested data to coerce the parser to send new
|
|
// request for non available (non-requested before) data.
|
|
loader.set_is_new_data_available(false);
|
|
FPDFAvail_IsPageAvail(avail_, first_page_num, loader.hints());
|
|
|
|
// The main cross ref table should not be requested.
|
|
// (It is always at file end)
|
|
EXPECT_GT(loader.file_access()->m_FileLen, loader.max_requested_bound());
|
|
|
|
// Allow parse page.
|
|
loader.set_is_new_data_available(true);
|
|
ASSERT_EQ(PDF_DATA_AVAIL,
|
|
FPDFAvail_IsPageAvail(avail_, first_page_num, loader.hints()));
|
|
|
|
// The main cross ref table should not be processed.
|
|
// (It is always at file end)
|
|
EXPECT_GT(loader.file_access()->m_FileLen,
|
|
loader.max_already_available_bound());
|
|
|
|
// Prevent loading data, while page loading.
|
|
loader.set_is_new_data_available(false);
|
|
ScopedFPDFPage page(FPDF_LoadPage(document(), first_page_num));
|
|
EXPECT_TRUE(page);
|
|
}
|
|
|
|
TEST_F(FPDFDataAvailEmbedderTest, LoadSecondPageIfLinearizedWithHints) {
|
|
TestAsyncLoader loader("feature_linearized_loading.pdf");
|
|
avail_ = FPDFAvail_Create(loader.file_avail(), loader.file_access());
|
|
ASSERT_EQ(PDF_DATA_AVAIL, FPDFAvail_IsDocAvail(avail_, loader.hints()));
|
|
document_ = FPDFAvail_GetDocument(avail_, nullptr);
|
|
ASSERT_TRUE(document_);
|
|
|
|
static constexpr uint32_t kSecondPageNum = 1;
|
|
|
|
// Prevent access to non-requested data to coerce the parser to send new
|
|
// request for non available (non-requested before) data.
|
|
loader.set_is_new_data_available(false);
|
|
loader.ClearRequestedSegments();
|
|
|
|
int status = PDF_DATA_NOTAVAIL;
|
|
while (status == PDF_DATA_NOTAVAIL) {
|
|
loader.FlushRequestedData();
|
|
status = FPDFAvail_IsPageAvail(avail_, kSecondPageNum, loader.hints());
|
|
}
|
|
EXPECT_EQ(PDF_DATA_AVAIL, status);
|
|
|
|
// Prevent loading data, while page loading.
|
|
loader.set_is_new_data_available(false);
|
|
ScopedFPDFPage page(FPDF_LoadPage(document(), kSecondPageNum));
|
|
EXPECT_TRUE(page);
|
|
}
|
|
|
|
TEST_F(FPDFDataAvailEmbedderTest, LoadInfoAfterReceivingWholeDocument) {
|
|
TestAsyncLoader loader("linearized.pdf");
|
|
loader.set_is_new_data_available(false);
|
|
avail_ = FPDFAvail_Create(loader.file_avail(), loader.file_access());
|
|
while (PDF_DATA_AVAIL != FPDFAvail_IsDocAvail(avail_, loader.hints())) {
|
|
loader.FlushRequestedData();
|
|
}
|
|
|
|
document_ = FPDFAvail_GetDocument(avail_, nullptr);
|
|
ASSERT_TRUE(document_);
|
|
|
|
// The "info" dictionary should still be unavailable.
|
|
EXPECT_FALSE(FPDF_GetMetaText(document_, "CreationDate", nullptr, 0));
|
|
|
|
// Simulate receiving whole file.
|
|
loader.set_is_new_data_available(true);
|
|
// Load second page, to parse additional crossref sections.
|
|
EXPECT_EQ(PDF_DATA_AVAIL, FPDFAvail_IsPageAvail(avail_, 1, loader.hints()));
|
|
|
|
EXPECT_TRUE(FPDF_GetMetaText(document_, "CreationDate", nullptr, 0));
|
|
}
|
|
|
|
TEST_F(FPDFDataAvailEmbedderTest, LoadInfoAfterReceivingFirstPage) {
|
|
TestAsyncLoader loader("linearized.pdf");
|
|
// Map "Info" to an object within the first section without breaking
|
|
// linearization.
|
|
ByteString data(loader.file_contents(), loader.file_length());
|
|
Optional<size_t> index = data.Find("/Info 27 0 R");
|
|
ASSERT_TRUE(index);
|
|
memcpy(loader.file_contents() + *index, "/Info 29 0 R", 12);
|
|
|
|
loader.set_is_new_data_available(false);
|
|
avail_ = FPDFAvail_Create(loader.file_avail(), loader.file_access());
|
|
while (PDF_DATA_AVAIL != FPDFAvail_IsDocAvail(avail_, loader.hints())) {
|
|
loader.FlushRequestedData();
|
|
}
|
|
|
|
document_ = FPDFAvail_GetDocument(avail_, nullptr);
|
|
ASSERT_TRUE(document_);
|
|
|
|
// The "Info" dictionary should be available for the linearized document, if
|
|
// it is located in the first page section.
|
|
// Info was remapped to a dictionary with Type "Catalog"
|
|
unsigned short buffer[100] = {0};
|
|
EXPECT_TRUE(FPDF_GetMetaText(document_, "Type", buffer, sizeof(buffer)));
|
|
constexpr wchar_t kExpectedValue[] = L"Catalog";
|
|
EXPECT_EQ(WideString(kExpectedValue),
|
|
WideString::FromUTF16LE(buffer, FXSYS_len(kExpectedValue)));
|
|
}
|
|
|
|
TEST_F(FPDFDataAvailEmbedderTest, TryLoadInvalidInfo) {
|
|
TestAsyncLoader loader("linearized.pdf");
|
|
// Map "Info" to an invalid object without breaking linearization.
|
|
ByteString data(loader.file_contents(), loader.file_length());
|
|
Optional<size_t> index = data.Find("/Info 27 0 R");
|
|
ASSERT_TRUE(index);
|
|
memcpy(loader.file_contents() + *index, "/Info 99 0 R", 12);
|
|
|
|
loader.set_is_new_data_available(false);
|
|
avail_ = FPDFAvail_Create(loader.file_avail(), loader.file_access());
|
|
while (PDF_DATA_AVAIL != FPDFAvail_IsDocAvail(avail_, loader.hints())) {
|
|
loader.FlushRequestedData();
|
|
}
|
|
|
|
document_ = FPDFAvail_GetDocument(avail_, nullptr);
|
|
ASSERT_TRUE(document_);
|
|
|
|
// Set all data available.
|
|
loader.set_is_new_data_available(true);
|
|
// Check second page, to load additional crossrefs.
|
|
ASSERT_EQ(PDF_DATA_AVAIL, FPDFAvail_IsPageAvail(avail_, 0, loader.hints()));
|
|
|
|
// Test that api is robust enough to handle the bad case.
|
|
EXPECT_FALSE(FPDF_GetMetaText(document_, "Type", nullptr, 0));
|
|
}
|
|
|
|
TEST_F(FPDFDataAvailEmbedderTest, TryLoadNonExistsInfo) {
|
|
TestAsyncLoader loader("linearized.pdf");
|
|
// Break the "Info" parameter without breaking linearization.
|
|
ByteString data(loader.file_contents(), loader.file_length());
|
|
Optional<size_t> index = data.Find("/Info 27 0 R");
|
|
ASSERT_TRUE(index);
|
|
memcpy(loader.file_contents() + *index, "/I_fo 27 0 R", 12);
|
|
|
|
loader.set_is_new_data_available(false);
|
|
avail_ = FPDFAvail_Create(loader.file_avail(), loader.file_access());
|
|
while (PDF_DATA_AVAIL != FPDFAvail_IsDocAvail(avail_, loader.hints())) {
|
|
loader.FlushRequestedData();
|
|
}
|
|
|
|
document_ = FPDFAvail_GetDocument(avail_, nullptr);
|
|
ASSERT_TRUE(document_);
|
|
|
|
// Set all data available.
|
|
loader.set_is_new_data_available(true);
|
|
// Check second page, to load additional crossrefs.
|
|
ASSERT_EQ(PDF_DATA_AVAIL, FPDFAvail_IsPageAvail(avail_, 0, loader.hints()));
|
|
|
|
// Test that api is robust enough to handle the bad case.
|
|
EXPECT_FALSE(FPDF_GetMetaText(document_, "Type", nullptr, 0));
|
|
}
|
|
|
|
TEST_F(FPDFDataAvailEmbedderTest, BadInputsToAPIs) {
|
|
EXPECT_EQ(PDF_DATA_ERROR, FPDFAvail_IsDocAvail(nullptr, nullptr));
|
|
EXPECT_FALSE(FPDFAvail_GetDocument(nullptr, nullptr));
|
|
EXPECT_EQ(0, FPDFAvail_GetFirstPageNum(nullptr));
|
|
EXPECT_EQ(PDF_DATA_ERROR, FPDFAvail_IsPageAvail(nullptr, 0, nullptr));
|
|
EXPECT_EQ(PDF_FORM_ERROR, FPDFAvail_IsFormAvail(nullptr, nullptr));
|
|
EXPECT_EQ(PDF_LINEARIZATION_UNKNOWN, FPDFAvail_IsLinearized(nullptr));
|
|
}
|
|
|
|
TEST_F(FPDFDataAvailEmbedderTest, NegativePageIndex) {
|
|
TestAsyncLoader loader("linearized.pdf");
|
|
avail_ = FPDFAvail_Create(loader.file_avail(), loader.file_access());
|
|
ASSERT_EQ(PDF_DATA_AVAIL, FPDFAvail_IsDocAvail(avail_, loader.hints()));
|
|
EXPECT_EQ(PDF_DATA_NOTAVAIL,
|
|
FPDFAvail_IsPageAvail(avail_, -1, loader.hints()));
|
|
}
|