// Copyright (c) 2012 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. #include "third_party/zlib/google/zip_reader.h" #include #include "base/bind.h" #include "base/files/file.h" #include "base/logging.h" #include "base/macros.h" #include "base/strings/string_util.h" #include "base/strings/utf_string_conversions.h" #include "base/threading/sequenced_task_runner_handle.h" #include "build/build_config.h" #include "third_party/zlib/google/zip_internal.h" #if defined(USE_SYSTEM_MINIZIP) #include #else #include "third_party/zlib/contrib/minizip/unzip.h" #if defined(OS_WIN) #include "third_party/zlib/contrib/minizip/iowin32.h" #endif // defined(OS_WIN) #endif // defined(USE_SYSTEM_MINIZIP) namespace zip { namespace { // StringWriterDelegate -------------------------------------------------------- // A writer delegate that writes no more than |max_read_bytes| to a given // std::string. class StringWriterDelegate : public WriterDelegate { public: StringWriterDelegate(size_t max_read_bytes, std::string* output); ~StringWriterDelegate() override; // WriterDelegate methods: // Returns true. bool PrepareOutput() override; // Appends |num_bytes| bytes from |data| to the output string. Returns false // if |num_bytes| will cause the string to exceed |max_read_bytes|. bool WriteBytes(const char* data, int num_bytes) override; void SetTimeModified(const base::Time& time) override; private: size_t max_read_bytes_; std::string* output_; DISALLOW_COPY_AND_ASSIGN(StringWriterDelegate); }; StringWriterDelegate::StringWriterDelegate(size_t max_read_bytes, std::string* output) : max_read_bytes_(max_read_bytes), output_(output) { } StringWriterDelegate::~StringWriterDelegate() { } bool StringWriterDelegate::PrepareOutput() { return true; } bool StringWriterDelegate::WriteBytes(const char* data, int num_bytes) { if (output_->size() + num_bytes > max_read_bytes_) return false; output_->append(data, num_bytes); return true; } void StringWriterDelegate::SetTimeModified(const base::Time& time) { // Do nothing. } } // namespace // TODO(satorux): The implementation assumes that file names in zip files // are encoded in UTF-8. This is true for zip files created by Zip() // function in zip.h, but not true for user-supplied random zip files. ZipReader::EntryInfo::EntryInfo(const std::string& file_name_in_zip, const unz_file_info& raw_file_info) : file_path_(base::FilePath::FromUTF8Unsafe(file_name_in_zip)), is_directory_(false), is_unsafe_(false), is_encrypted_(false) { original_size_ = raw_file_info.uncompressed_size; // Directory entries in zip files end with "/". is_directory_ = base::EndsWith(file_name_in_zip, "/", base::CompareCase::INSENSITIVE_ASCII); // Check the file name here for directory traversal issues. is_unsafe_ = file_path_.ReferencesParent(); // We also consider that the file name is unsafe, if it's invalid UTF-8. std::u16string file_name_utf16; if (!base::UTF8ToUTF16(file_name_in_zip.data(), file_name_in_zip.size(), &file_name_utf16)) { is_unsafe_ = true; } // We also consider that the file name is unsafe, if it's absolute. // On Windows, IsAbsolute() returns false for paths starting with "/". if (file_path_.IsAbsolute() || base::StartsWith(file_name_in_zip, "/", base::CompareCase::INSENSITIVE_ASCII)) is_unsafe_ = true; // Whether the file is encrypted is bit 0 of the flag. is_encrypted_ = raw_file_info.flag & 1; // Construct the last modified time. The timezone info is not present in // zip files, so we construct the time as local time. base::Time::Exploded exploded_time = {}; // Zero-clear. exploded_time.year = raw_file_info.tmu_date.tm_year; // The month in zip file is 0-based, whereas ours is 1-based. exploded_time.month = raw_file_info.tmu_date.tm_mon + 1; exploded_time.day_of_month = raw_file_info.tmu_date.tm_mday; exploded_time.hour = raw_file_info.tmu_date.tm_hour; exploded_time.minute = raw_file_info.tmu_date.tm_min; exploded_time.second = raw_file_info.tmu_date.tm_sec; exploded_time.millisecond = 0; if (!base::Time::FromLocalExploded(exploded_time, &last_modified_)) last_modified_ = base::Time::UnixEpoch(); } ZipReader::ZipReader() { Reset(); } ZipReader::~ZipReader() { Close(); } bool ZipReader::Open(const base::FilePath& zip_file_path) { DCHECK(!zip_file_); // Use of "Unsafe" function does not look good, but there is no way to do // this safely on Linux. See file_util.h for details. zip_file_ = internal::OpenForUnzipping(zip_file_path.AsUTF8Unsafe()); if (!zip_file_) { return false; } return OpenInternal(); } bool ZipReader::OpenFromPlatformFile(base::PlatformFile zip_fd) { DCHECK(!zip_file_); #if defined(OS_POSIX) zip_file_ = internal::OpenFdForUnzipping(zip_fd); #elif defined(OS_WIN) zip_file_ = internal::OpenHandleForUnzipping(zip_fd); #endif if (!zip_file_) { return false; } return OpenInternal(); } bool ZipReader::OpenFromString(const std::string& data) { zip_file_ = internal::PrepareMemoryForUnzipping(data); if (!zip_file_) return false; return OpenInternal(); } void ZipReader::Close() { if (zip_file_) { unzClose(zip_file_); } Reset(); } bool ZipReader::HasMore() { return !reached_end_; } bool ZipReader::AdvanceToNextEntry() { DCHECK(zip_file_); // Should not go further if we already reached the end. if (reached_end_) return false; unz_file_pos position = {}; if (unzGetFilePos(zip_file_, &position) != UNZ_OK) return false; const int current_entry_index = position.num_of_file; // If we are currently at the last entry, then the next position is the // end of the zip file, so mark that we reached the end. if (current_entry_index + 1 == num_entries_) { reached_end_ = true; } else { DCHECK_LT(current_entry_index + 1, num_entries_); if (unzGoToNextFile(zip_file_) != UNZ_OK) { return false; } } current_entry_info_.reset(); return true; } bool ZipReader::OpenCurrentEntryInZip() { DCHECK(zip_file_); unz_file_info raw_file_info = {}; char raw_file_name_in_zip[internal::kZipMaxPath] = {}; const int result = unzGetCurrentFileInfo(zip_file_, &raw_file_info, raw_file_name_in_zip, sizeof(raw_file_name_in_zip) - 1, NULL, // extraField. 0, // extraFieldBufferSize. NULL, // szComment. 0); // commentBufferSize. if (result != UNZ_OK) return false; if (raw_file_name_in_zip[0] == '\0') return false; current_entry_info_.reset( new EntryInfo(raw_file_name_in_zip, raw_file_info)); return true; } bool ZipReader::ExtractCurrentEntry(WriterDelegate* delegate, uint64_t num_bytes_to_extract) const { DCHECK(zip_file_); const int open_result = unzOpenCurrentFile(zip_file_); if (open_result != UNZ_OK) return false; if (!delegate->PrepareOutput()) return false; std::unique_ptr buf(new char[internal::kZipBufSize]); uint64_t remaining_capacity = num_bytes_to_extract; bool entire_file_extracted = false; while (remaining_capacity > 0) { const int num_bytes_read = unzReadCurrentFile(zip_file_, buf.get(), internal::kZipBufSize); if (num_bytes_read == 0) { entire_file_extracted = true; break; } else if (num_bytes_read < 0) { // If num_bytes_read < 0, then it's a specific UNZ_* error code. break; } else if (num_bytes_read > 0) { uint64_t num_bytes_to_write = std::min( remaining_capacity, base::checked_cast(num_bytes_read)); if (!delegate->WriteBytes(buf.get(), num_bytes_to_write)) break; if (remaining_capacity == base::checked_cast(num_bytes_read)) { // Ensures function returns true if the entire file has been read. entire_file_extracted = (unzReadCurrentFile(zip_file_, buf.get(), 1) == 0); } CHECK_GE(remaining_capacity, num_bytes_to_write); remaining_capacity -= num_bytes_to_write; } } unzCloseCurrentFile(zip_file_); if (entire_file_extracted && current_entry_info()->last_modified() != base::Time::UnixEpoch()) { delegate->SetTimeModified(current_entry_info()->last_modified()); } return entire_file_extracted; } void ZipReader::ExtractCurrentEntryToFilePathAsync( const base::FilePath& output_file_path, SuccessCallback success_callback, FailureCallback failure_callback, const ProgressCallback& progress_callback) { DCHECK(zip_file_); DCHECK(current_entry_info_.get()); // If this is a directory, just create it and return. if (current_entry_info()->is_directory()) { if (base::CreateDirectory(output_file_path)) { base::SequencedTaskRunnerHandle::Get()->PostTask( FROM_HERE, std::move(success_callback)); } else { DVLOG(1) << "Unzip failed: unable to create directory."; base::SequencedTaskRunnerHandle::Get()->PostTask( FROM_HERE, std::move(failure_callback)); } return; } if (unzOpenCurrentFile(zip_file_) != UNZ_OK) { DVLOG(1) << "Unzip failed: unable to open current zip entry."; base::SequencedTaskRunnerHandle::Get()->PostTask( FROM_HERE, std::move(failure_callback)); return; } base::FilePath output_dir_path = output_file_path.DirName(); if (!base::CreateDirectory(output_dir_path)) { DVLOG(1) << "Unzip failed: unable to create containing directory."; base::SequencedTaskRunnerHandle::Get()->PostTask( FROM_HERE, std::move(failure_callback)); return; } const int flags = base::File::FLAG_CREATE_ALWAYS | base::File::FLAG_WRITE; base::File output_file(output_file_path, flags); if (!output_file.IsValid()) { DVLOG(1) << "Unzip failed: unable to create platform file at " << output_file_path.value(); base::SequencedTaskRunnerHandle::Get()->PostTask( FROM_HERE, std::move(failure_callback)); return; } base::SequencedTaskRunnerHandle::Get()->PostTask( FROM_HERE, base::BindOnce(&ZipReader::ExtractChunk, weak_ptr_factory_.GetWeakPtr(), std::move(output_file), std::move(success_callback), std::move(failure_callback), progress_callback, 0 /* initial offset */)); } bool ZipReader::ExtractCurrentEntryToString(uint64_t max_read_bytes, std::string* output) const { DCHECK(output); DCHECK(zip_file_); if (max_read_bytes == 0) { output->clear(); return true; } if (current_entry_info()->is_directory()) { output->clear(); return true; } // The original_size() is the best hint for the real size, so it saves // doing reallocations for the common case when the uncompressed size is // correct. However, we need to assume that the uncompressed size could be // incorrect therefore this function needs to read as much data as possible. std::string contents; contents.reserve( static_cast(std::min(base::checked_cast(max_read_bytes), current_entry_info()->original_size()))); StringWriterDelegate writer(max_read_bytes, &contents); if (!ExtractCurrentEntry(&writer, max_read_bytes)) { if (contents.length() < max_read_bytes) { // There was an error in extracting entry. If ExtractCurrentEntry() // returns false, the entire file was not read - in which case // contents.length() should equal |max_read_bytes| unless an error // occurred which caused extraction to be aborted. output->clear(); } else { // |num_bytes| is less than the length of current entry. output->swap(contents); } return false; } output->swap(contents); return true; } bool ZipReader::OpenInternal() { DCHECK(zip_file_); unz_global_info zip_info = {}; // Zero-clear. if (unzGetGlobalInfo(zip_file_, &zip_info) != UNZ_OK) { return false; } num_entries_ = zip_info.number_entry; if (num_entries_ < 0) return false; // We are already at the end if the zip file is empty. reached_end_ = (num_entries_ == 0); return true; } void ZipReader::Reset() { zip_file_ = NULL; num_entries_ = 0; reached_end_ = false; current_entry_info_.reset(); } void ZipReader::ExtractChunk(base::File output_file, SuccessCallback success_callback, FailureCallback failure_callback, const ProgressCallback& progress_callback, const int64_t offset) { char buffer[internal::kZipBufSize]; const int num_bytes_read = unzReadCurrentFile(zip_file_, buffer, internal::kZipBufSize); if (num_bytes_read == 0) { unzCloseCurrentFile(zip_file_); std::move(success_callback).Run(); } else if (num_bytes_read < 0) { DVLOG(1) << "Unzip failed: error while reading zipfile " << "(" << num_bytes_read << ")"; std::move(failure_callback).Run(); } else { if (num_bytes_read != output_file.Write(offset, buffer, num_bytes_read)) { DVLOG(1) << "Unzip failed: unable to write all bytes to target."; std::move(failure_callback).Run(); return; } int64_t current_progress = offset + num_bytes_read; progress_callback.Run(current_progress); base::SequencedTaskRunnerHandle::Get()->PostTask( FROM_HERE, base::BindOnce(&ZipReader::ExtractChunk, weak_ptr_factory_.GetWeakPtr(), std::move(output_file), std::move(success_callback), std::move(failure_callback), progress_callback, current_progress)); } } // FileWriterDelegate ---------------------------------------------------------- FileWriterDelegate::FileWriterDelegate(base::File* file) : file_(file) {} FileWriterDelegate::FileWriterDelegate(std::unique_ptr file) : file_(file.get()), owned_file_(std::move(file)) {} FileWriterDelegate::~FileWriterDelegate() { if (!file_->SetLength(file_length_)) { DVPLOG(1) << "Failed updating length of written file"; } } bool FileWriterDelegate::PrepareOutput() { return file_->Seek(base::File::FROM_BEGIN, 0) >= 0; } bool FileWriterDelegate::WriteBytes(const char* data, int num_bytes) { int bytes_written = file_->WriteAtCurrentPos(data, num_bytes); if (bytes_written > 0) file_length_ += bytes_written; return bytes_written == num_bytes; } void FileWriterDelegate::SetTimeModified(const base::Time& time) { file_->SetTimes(base::Time::Now(), time); } // FilePathWriterDelegate ------------------------------------------------------ FilePathWriterDelegate::FilePathWriterDelegate( const base::FilePath& output_file_path) : output_file_path_(output_file_path) {} FilePathWriterDelegate::~FilePathWriterDelegate() {} bool FilePathWriterDelegate::PrepareOutput() { // We can't rely on parent directory entries being specified in the // zip, so we make sure they are created. if (!base::CreateDirectory(output_file_path_.DirName())) return false; file_.Initialize(output_file_path_, base::File::FLAG_CREATE_ALWAYS | base::File::FLAG_WRITE); return file_.IsValid(); } bool FilePathWriterDelegate::WriteBytes(const char* data, int num_bytes) { return num_bytes == file_.WriteAtCurrentPos(data, num_bytes); } void FilePathWriterDelegate::SetTimeModified(const base::Time& time) { file_.Close(); base::TouchFile(output_file_path_, base::Time::Now(), time); } } // namespace zip