You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
134 lines
3.6 KiB
134 lines
3.6 KiB
# Copyright (C) 2018 The Android Open Source Project
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
"""Functions to process archive files."""
|
|
|
|
import os
|
|
import tempfile
|
|
import tarfile
|
|
import urllib.parse
|
|
import zipfile
|
|
|
|
|
|
class ZipFileWithPermission(zipfile.ZipFile):
|
|
"""Subclassing Zipfile to preserve file permission.
|
|
|
|
See https://bugs.python.org/issue15795
|
|
"""
|
|
def _extract_member(self, member, targetpath, pwd):
|
|
ret_val = super()._extract_member(member, targetpath, pwd)
|
|
|
|
if not isinstance(member, zipfile.ZipInfo):
|
|
member = self.getinfo(member)
|
|
attr = member.external_attr >> 16
|
|
if attr != 0:
|
|
os.chmod(ret_val, attr)
|
|
return ret_val
|
|
|
|
|
|
def unzip(archive_path, target_path):
|
|
"""Extracts zip file to a path.
|
|
|
|
Args:
|
|
archive_path: Path to the zip file.
|
|
target_path: Path to extract files to.
|
|
"""
|
|
|
|
with ZipFileWithPermission(archive_path) as zfile:
|
|
zfile.extractall(target_path)
|
|
|
|
|
|
def untar(archive_path, target_path):
|
|
"""Extracts tar file to a path.
|
|
|
|
Args:
|
|
archive_path: Path to the tar file.
|
|
target_path: Path to extract files to.
|
|
"""
|
|
|
|
with tarfile.open(archive_path, mode='r') as tfile:
|
|
tfile.extractall(target_path)
|
|
|
|
|
|
ARCHIVE_TYPES = {
|
|
'.zip': unzip,
|
|
'.tar.gz': untar,
|
|
'.tar.bz2': untar,
|
|
'.tar.xz': untar,
|
|
}
|
|
|
|
|
|
def is_supported_archive(url):
|
|
"""Checks whether the url points to a supported archive."""
|
|
return get_extract_func(url) is not None
|
|
|
|
|
|
def get_extract_func(url):
|
|
"""Gets the function to extract an archive.
|
|
|
|
Args:
|
|
url: The url to the archive file.
|
|
|
|
Returns:
|
|
A function to extract the archive. None if not found.
|
|
"""
|
|
|
|
parsed_url = urllib.parse.urlparse(url)
|
|
filename = os.path.basename(parsed_url.path)
|
|
for ext, func in ARCHIVE_TYPES.items():
|
|
if filename.endswith(ext):
|
|
return func
|
|
# crates.io download url does not have file suffix
|
|
# e.g., https://crates.io/api/v1/crates/syn/1.0.16/download
|
|
if url.find('/crates.io/api/') > 0 or url.find('/static.crates.io/crates/'):
|
|
return untar
|
|
return None
|
|
|
|
|
|
def download_and_extract(url):
|
|
"""Downloads and extracts an archive file to a temporary directory.
|
|
|
|
Args:
|
|
url: Url to download.
|
|
|
|
Returns:
|
|
Path to the temporary directory.
|
|
"""
|
|
|
|
print('Downloading {}'.format(url))
|
|
archive_file, _headers = urllib.request.urlretrieve(url)
|
|
|
|
temporary_dir = tempfile.mkdtemp()
|
|
print('Extracting {} to {}'.format(archive_file, temporary_dir))
|
|
get_extract_func(url)(archive_file, temporary_dir)
|
|
|
|
return temporary_dir
|
|
|
|
|
|
def find_archive_root(path):
|
|
"""Finds the real root of an extracted archive.
|
|
|
|
Sometimes archives has additional layers of directories. This function tries
|
|
to guess the right 'root' path by entering all single sub-directories.
|
|
|
|
Args:
|
|
path: Path to the extracted archive.
|
|
|
|
Returns:
|
|
The root path we found.
|
|
"""
|
|
for root, dirs, files in os.walk(path):
|
|
if files or len(dirs) > 1:
|
|
return root
|
|
return path
|