|
@@ -0,0 +1,922 @@
|
|
|
|
|
+from __future__ import absolute_import
|
|
|
|
|
+
|
|
|
|
|
+import cgi
|
|
|
|
|
+import email.utils
|
|
|
|
|
+import getpass
|
|
|
|
|
+import json
|
|
|
|
|
+import logging
|
|
|
|
|
+import mimetypes
|
|
|
|
|
+import os
|
|
|
|
|
+import platform
|
|
|
|
|
+import re
|
|
|
|
|
+import shutil
|
|
|
|
|
+import sys
|
|
|
|
|
+
|
|
|
|
|
+from pip._vendor import requests, six, urllib3
|
|
|
|
|
+from pip._vendor.cachecontrol import CacheControlAdapter
|
|
|
|
|
+from pip._vendor.cachecontrol.caches import FileCache
|
|
|
|
|
+from pip._vendor.lockfile import LockError
|
|
|
|
|
+from pip._vendor.requests.adapters import BaseAdapter, HTTPAdapter
|
|
|
|
|
+from pip._vendor.requests.auth import AuthBase, HTTPBasicAuth
|
|
|
|
|
+from pip._vendor.requests.models import CONTENT_CHUNK_SIZE, Response
|
|
|
|
|
+from pip._vendor.requests.structures import CaseInsensitiveDict
|
|
|
|
|
+from pip._vendor.requests.utils import get_netrc_auth
|
|
|
|
|
+# NOTE: XMLRPC Client is not annotated in typeshed as on 2017-07-17, which is
|
|
|
|
|
+# why we ignore the type on this import
|
|
|
|
|
+from pip._vendor.six.moves import xmlrpc_client # type: ignore
|
|
|
|
|
+from pip._vendor.six.moves.urllib import parse as urllib_parse
|
|
|
|
|
+from pip._vendor.six.moves.urllib import request as urllib_request
|
|
|
|
|
+from pip._vendor.six.moves.urllib.parse import unquote as urllib_unquote
|
|
|
|
|
+from pip._vendor.urllib3.util import IS_PYOPENSSL
|
|
|
|
|
+
|
|
|
|
|
+import pip
|
|
|
|
|
+from pip._internal.compat import WINDOWS
|
|
|
|
|
+from pip._internal.exceptions import HashMismatch, InstallationError
|
|
|
|
|
+from pip._internal.locations import write_delete_marker_file
|
|
|
|
|
+from pip._internal.models import PyPI
|
|
|
|
|
+from pip._internal.utils.encoding import auto_decode
|
|
|
|
|
+from pip._internal.utils.filesystem import check_path_owner
|
|
|
|
|
+from pip._internal.utils.glibc import libc_ver
|
|
|
|
|
+from pip._internal.utils.logging import indent_log
|
|
|
|
|
+from pip._internal.utils.misc import (
|
|
|
|
|
+ ARCHIVE_EXTENSIONS, ask_path_exists, backup_dir, call_subprocess, consume,
|
|
|
|
|
+ display_path, format_size, get_installed_version, rmtree, splitext,
|
|
|
|
|
+ unpack_file,
|
|
|
|
|
+)
|
|
|
|
|
+from pip._internal.utils.setuptools_build import SETUPTOOLS_SHIM
|
|
|
|
|
+from pip._internal.utils.temp_dir import TempDirectory
|
|
|
|
|
+from pip._internal.utils.ui import DownloadProgressProvider
|
|
|
|
|
+from pip._internal.vcs import vcs
|
|
|
|
|
+
|
|
|
|
|
+try:
|
|
|
|
|
+ import ssl # noqa
|
|
|
|
|
+except ImportError:
|
|
|
|
|
+ ssl = None
|
|
|
|
|
+
|
|
|
|
|
+HAS_TLS = (ssl is not None) or IS_PYOPENSSL
|
|
|
|
|
+
|
|
|
|
|
+__all__ = ['get_file_content',
|
|
|
|
|
+ 'is_url', 'url_to_path', 'path_to_url',
|
|
|
|
|
+ 'is_archive_file', 'unpack_vcs_link',
|
|
|
|
|
+ 'unpack_file_url', 'is_vcs_url', 'is_file_url',
|
|
|
|
|
+ 'unpack_http_url', 'unpack_url']
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+logger = logging.getLogger(__name__)
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+def user_agent():
|
|
|
|
|
+ """
|
|
|
|
|
+ Return a string representing the user agent.
|
|
|
|
|
+ """
|
|
|
|
|
+ data = {
|
|
|
|
|
+ "installer": {"name": "pip", "version": pip.__version__},
|
|
|
|
|
+ "python": platform.python_version(),
|
|
|
|
|
+ "implementation": {
|
|
|
|
|
+ "name": platform.python_implementation(),
|
|
|
|
|
+ },
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ if data["implementation"]["name"] == 'CPython':
|
|
|
|
|
+ data["implementation"]["version"] = platform.python_version()
|
|
|
|
|
+ elif data["implementation"]["name"] == 'PyPy':
|
|
|
|
|
+ if sys.pypy_version_info.releaselevel == 'final':
|
|
|
|
|
+ pypy_version_info = sys.pypy_version_info[:3]
|
|
|
|
|
+ else:
|
|
|
|
|
+ pypy_version_info = sys.pypy_version_info
|
|
|
|
|
+ data["implementation"]["version"] = ".".join(
|
|
|
|
|
+ [str(x) for x in pypy_version_info]
|
|
|
|
|
+ )
|
|
|
|
|
+ elif data["implementation"]["name"] == 'Jython':
|
|
|
|
|
+ # Complete Guess
|
|
|
|
|
+ data["implementation"]["version"] = platform.python_version()
|
|
|
|
|
+ elif data["implementation"]["name"] == 'IronPython':
|
|
|
|
|
+ # Complete Guess
|
|
|
|
|
+ data["implementation"]["version"] = platform.python_version()
|
|
|
|
|
+
|
|
|
|
|
+ if sys.platform.startswith("linux"):
|
|
|
|
|
+ from pip._vendor import distro
|
|
|
|
|
+ distro_infos = dict(filter(
|
|
|
|
|
+ lambda x: x[1],
|
|
|
|
|
+ zip(["name", "version", "id"], distro.linux_distribution()),
|
|
|
|
|
+ ))
|
|
|
|
|
+ libc = dict(filter(
|
|
|
|
|
+ lambda x: x[1],
|
|
|
|
|
+ zip(["lib", "version"], libc_ver()),
|
|
|
|
|
+ ))
|
|
|
|
|
+ if libc:
|
|
|
|
|
+ distro_infos["libc"] = libc
|
|
|
|
|
+ if distro_infos:
|
|
|
|
|
+ data["distro"] = distro_infos
|
|
|
|
|
+
|
|
|
|
|
+ if sys.platform.startswith("darwin") and platform.mac_ver()[0]:
|
|
|
|
|
+ data["distro"] = {"name": "macOS", "version": platform.mac_ver()[0]}
|
|
|
|
|
+
|
|
|
|
|
+ if platform.system():
|
|
|
|
|
+ data.setdefault("system", {})["name"] = platform.system()
|
|
|
|
|
+
|
|
|
|
|
+ if platform.release():
|
|
|
|
|
+ data.setdefault("system", {})["release"] = platform.release()
|
|
|
|
|
+
|
|
|
|
|
+ if platform.machine():
|
|
|
|
|
+ data["cpu"] = platform.machine()
|
|
|
|
|
+
|
|
|
|
|
+ if HAS_TLS:
|
|
|
|
|
+ data["openssl_version"] = ssl.OPENSSL_VERSION
|
|
|
|
|
+
|
|
|
|
|
+ setuptools_version = get_installed_version("setuptools")
|
|
|
|
|
+ if setuptools_version is not None:
|
|
|
|
|
+ data["setuptools_version"] = setuptools_version
|
|
|
|
|
+
|
|
|
|
|
+ return "{data[installer][name]}/{data[installer][version]} {json}".format(
|
|
|
|
|
+ data=data,
|
|
|
|
|
+ json=json.dumps(data, separators=(",", ":"), sort_keys=True),
|
|
|
|
|
+ )
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+class MultiDomainBasicAuth(AuthBase):
|
|
|
|
|
+
|
|
|
|
|
+ def __init__(self, prompting=True):
|
|
|
|
|
+ self.prompting = prompting
|
|
|
|
|
+ self.passwords = {}
|
|
|
|
|
+
|
|
|
|
|
+ def __call__(self, req):
|
|
|
|
|
+ parsed = urllib_parse.urlparse(req.url)
|
|
|
|
|
+
|
|
|
|
|
+ # Get the netloc without any embedded credentials
|
|
|
|
|
+ netloc = parsed.netloc.rsplit("@", 1)[-1]
|
|
|
|
|
+
|
|
|
|
|
+ # Set the url of the request to the url without any credentials
|
|
|
|
|
+ req.url = urllib_parse.urlunparse(parsed[:1] + (netloc,) + parsed[2:])
|
|
|
|
|
+
|
|
|
|
|
+ # Use any stored credentials that we have for this netloc
|
|
|
|
|
+ username, password = self.passwords.get(netloc, (None, None))
|
|
|
|
|
+
|
|
|
|
|
+ # Extract credentials embedded in the url if we have none stored
|
|
|
|
|
+ if username is None:
|
|
|
|
|
+ username, password = self.parse_credentials(parsed.netloc)
|
|
|
|
|
+
|
|
|
|
|
+ # Get creds from netrc if we still don't have them
|
|
|
|
|
+ if username is None and password is None:
|
|
|
|
|
+ netrc_auth = get_netrc_auth(req.url)
|
|
|
|
|
+ username, password = netrc_auth if netrc_auth else (None, None)
|
|
|
|
|
+
|
|
|
|
|
+ if username or password:
|
|
|
|
|
+ # Store the username and password
|
|
|
|
|
+ self.passwords[netloc] = (username, password)
|
|
|
|
|
+
|
|
|
|
|
+ # Send the basic auth with this request
|
|
|
|
|
+ req = HTTPBasicAuth(username or "", password or "")(req)
|
|
|
|
|
+
|
|
|
|
|
+ # Attach a hook to handle 401 responses
|
|
|
|
|
+ req.register_hook("response", self.handle_401)
|
|
|
|
|
+
|
|
|
|
|
+ return req
|
|
|
|
|
+
|
|
|
|
|
+ def handle_401(self, resp, **kwargs):
|
|
|
|
|
+ # We only care about 401 responses, anything else we want to just
|
|
|
|
|
+ # pass through the actual response
|
|
|
|
|
+ if resp.status_code != 401:
|
|
|
|
|
+ return resp
|
|
|
|
|
+
|
|
|
|
|
+ # We are not able to prompt the user so simply return the response
|
|
|
|
|
+ if not self.prompting:
|
|
|
|
|
+ return resp
|
|
|
|
|
+
|
|
|
|
|
+ parsed = urllib_parse.urlparse(resp.url)
|
|
|
|
|
+
|
|
|
|
|
+ # Prompt the user for a new username and password
|
|
|
|
|
+ username = six.moves.input("User for %s: " % parsed.netloc)
|
|
|
|
|
+ password = getpass.getpass("Password: ")
|
|
|
|
|
+
|
|
|
|
|
+ # Store the new username and password to use for future requests
|
|
|
|
|
+ if username or password:
|
|
|
|
|
+ self.passwords[parsed.netloc] = (username, password)
|
|
|
|
|
+
|
|
|
|
|
+ # Consume content and release the original connection to allow our new
|
|
|
|
|
+ # request to reuse the same one.
|
|
|
|
|
+ resp.content
|
|
|
|
|
+ resp.raw.release_conn()
|
|
|
|
|
+
|
|
|
|
|
+ # Add our new username and password to the request
|
|
|
|
|
+ req = HTTPBasicAuth(username or "", password or "")(resp.request)
|
|
|
|
|
+
|
|
|
|
|
+ # Send our new request
|
|
|
|
|
+ new_resp = resp.connection.send(req, **kwargs)
|
|
|
|
|
+ new_resp.history.append(resp)
|
|
|
|
|
+
|
|
|
|
|
+ return new_resp
|
|
|
|
|
+
|
|
|
|
|
+ def parse_credentials(self, netloc):
|
|
|
|
|
+ if "@" in netloc:
|
|
|
|
|
+ userinfo = netloc.rsplit("@", 1)[0]
|
|
|
|
|
+ if ":" in userinfo:
|
|
|
|
|
+ user, pwd = userinfo.split(":", 1)
|
|
|
|
|
+ return (urllib_unquote(user), urllib_unquote(pwd))
|
|
|
|
|
+ return urllib_unquote(userinfo), None
|
|
|
|
|
+ return None, None
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+class LocalFSAdapter(BaseAdapter):
|
|
|
|
|
+
|
|
|
|
|
+ def send(self, request, stream=None, timeout=None, verify=None, cert=None,
|
|
|
|
|
+ proxies=None):
|
|
|
|
|
+ pathname = url_to_path(request.url)
|
|
|
|
|
+
|
|
|
|
|
+ resp = Response()
|
|
|
|
|
+ resp.status_code = 200
|
|
|
|
|
+ resp.url = request.url
|
|
|
|
|
+
|
|
|
|
|
+ try:
|
|
|
|
|
+ stats = os.stat(pathname)
|
|
|
|
|
+ except OSError as exc:
|
|
|
|
|
+ resp.status_code = 404
|
|
|
|
|
+ resp.raw = exc
|
|
|
|
|
+ else:
|
|
|
|
|
+ modified = email.utils.formatdate(stats.st_mtime, usegmt=True)
|
|
|
|
|
+ content_type = mimetypes.guess_type(pathname)[0] or "text/plain"
|
|
|
|
|
+ resp.headers = CaseInsensitiveDict({
|
|
|
|
|
+ "Content-Type": content_type,
|
|
|
|
|
+ "Content-Length": stats.st_size,
|
|
|
|
|
+ "Last-Modified": modified,
|
|
|
|
|
+ })
|
|
|
|
|
+
|
|
|
|
|
+ resp.raw = open(pathname, "rb")
|
|
|
|
|
+ resp.close = resp.raw.close
|
|
|
|
|
+
|
|
|
|
|
+ return resp
|
|
|
|
|
+
|
|
|
|
|
+ def close(self):
|
|
|
|
|
+ pass
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+class SafeFileCache(FileCache):
|
|
|
|
|
+ """
|
|
|
|
|
+ A file based cache which is safe to use even when the target directory may
|
|
|
|
|
+ not be accessible or writable.
|
|
|
|
|
+ """
|
|
|
|
|
+
|
|
|
|
|
+ def __init__(self, *args, **kwargs):
|
|
|
|
|
+ super(SafeFileCache, self).__init__(*args, **kwargs)
|
|
|
|
|
+
|
|
|
|
|
+ # Check to ensure that the directory containing our cache directory
|
|
|
|
|
+ # is owned by the user current executing pip. If it does not exist
|
|
|
|
|
+ # we will check the parent directory until we find one that does exist.
|
|
|
|
|
+ # If it is not owned by the user executing pip then we will disable
|
|
|
|
|
+ # the cache and log a warning.
|
|
|
|
|
+ if not check_path_owner(self.directory):
|
|
|
|
|
+ logger.warning(
|
|
|
|
|
+ "The directory '%s' or its parent directory is not owned by "
|
|
|
|
|
+ "the current user and the cache has been disabled. Please "
|
|
|
|
|
+ "check the permissions and owner of that directory. If "
|
|
|
|
|
+ "executing pip with sudo, you may want sudo's -H flag.",
|
|
|
|
|
+ self.directory,
|
|
|
|
|
+ )
|
|
|
|
|
+
|
|
|
|
|
+ # Set our directory to None to disable the Cache
|
|
|
|
|
+ self.directory = None
|
|
|
|
|
+
|
|
|
|
|
+ def get(self, *args, **kwargs):
|
|
|
|
|
+ # If we don't have a directory, then the cache should be a no-op.
|
|
|
|
|
+ if self.directory is None:
|
|
|
|
|
+ return
|
|
|
|
|
+
|
|
|
|
|
+ try:
|
|
|
|
|
+ return super(SafeFileCache, self).get(*args, **kwargs)
|
|
|
|
|
+ except (LockError, OSError, IOError):
|
|
|
|
|
+ # We intentionally silence this error, if we can't access the cache
|
|
|
|
|
+ # then we can just skip caching and process the request as if
|
|
|
|
|
+ # caching wasn't enabled.
|
|
|
|
|
+ pass
|
|
|
|
|
+
|
|
|
|
|
+ def set(self, *args, **kwargs):
|
|
|
|
|
+ # If we don't have a directory, then the cache should be a no-op.
|
|
|
|
|
+ if self.directory is None:
|
|
|
|
|
+ return
|
|
|
|
|
+
|
|
|
|
|
+ try:
|
|
|
|
|
+ return super(SafeFileCache, self).set(*args, **kwargs)
|
|
|
|
|
+ except (LockError, OSError, IOError):
|
|
|
|
|
+ # We intentionally silence this error, if we can't access the cache
|
|
|
|
|
+ # then we can just skip caching and process the request as if
|
|
|
|
|
+ # caching wasn't enabled.
|
|
|
|
|
+ pass
|
|
|
|
|
+
|
|
|
|
|
+ def delete(self, *args, **kwargs):
|
|
|
|
|
+ # If we don't have a directory, then the cache should be a no-op.
|
|
|
|
|
+ if self.directory is None:
|
|
|
|
|
+ return
|
|
|
|
|
+
|
|
|
|
|
+ try:
|
|
|
|
|
+ return super(SafeFileCache, self).delete(*args, **kwargs)
|
|
|
|
|
+ except (LockError, OSError, IOError):
|
|
|
|
|
+ # We intentionally silence this error, if we can't access the cache
|
|
|
|
|
+ # then we can just skip caching and process the request as if
|
|
|
|
|
+ # caching wasn't enabled.
|
|
|
|
|
+ pass
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+class InsecureHTTPAdapter(HTTPAdapter):
|
|
|
|
|
+
|
|
|
|
|
+ def cert_verify(self, conn, url, verify, cert):
|
|
|
|
|
+ conn.cert_reqs = 'CERT_NONE'
|
|
|
|
|
+ conn.ca_certs = None
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+class PipSession(requests.Session):
|
|
|
|
|
+
|
|
|
|
|
+ timeout = None
|
|
|
|
|
+
|
|
|
|
|
+ def __init__(self, *args, **kwargs):
|
|
|
|
|
+ retries = kwargs.pop("retries", 0)
|
|
|
|
|
+ cache = kwargs.pop("cache", None)
|
|
|
|
|
+ insecure_hosts = kwargs.pop("insecure_hosts", [])
|
|
|
|
|
+
|
|
|
|
|
+ super(PipSession, self).__init__(*args, **kwargs)
|
|
|
|
|
+
|
|
|
|
|
+ # Attach our User Agent to the request
|
|
|
|
|
+ self.headers["User-Agent"] = user_agent()
|
|
|
|
|
+
|
|
|
|
|
+ # Attach our Authentication handler to the session
|
|
|
|
|
+ self.auth = MultiDomainBasicAuth()
|
|
|
|
|
+
|
|
|
|
|
+ # Create our urllib3.Retry instance which will allow us to customize
|
|
|
|
|
+ # how we handle retries.
|
|
|
|
|
+ retries = urllib3.Retry(
|
|
|
|
|
+ # Set the total number of retries that a particular request can
|
|
|
|
|
+ # have.
|
|
|
|
|
+ total=retries,
|
|
|
|
|
+
|
|
|
|
|
+ # A 503 error from PyPI typically means that the Fastly -> Origin
|
|
|
|
|
+ # connection got interrupted in some way. A 503 error in general
|
|
|
|
|
+ # is typically considered a transient error so we'll go ahead and
|
|
|
|
|
+ # retry it.
|
|
|
|
|
+ # A 500 may indicate transient error in Amazon S3
|
|
|
|
|
+ # A 520 or 527 - may indicate transient error in CloudFlare
|
|
|
|
|
+ status_forcelist=[500, 503, 520, 527],
|
|
|
|
|
+
|
|
|
|
|
+ # Add a small amount of back off between failed requests in
|
|
|
|
|
+ # order to prevent hammering the service.
|
|
|
|
|
+ backoff_factor=0.25,
|
|
|
|
|
+ )
|
|
|
|
|
+
|
|
|
|
|
+ # We want to _only_ cache responses on securely fetched origins. We do
|
|
|
|
|
+ # this because we can't validate the response of an insecurely fetched
|
|
|
|
|
+ # origin, and we don't want someone to be able to poison the cache and
|
|
|
|
|
+ # require manual eviction from the cache to fix it.
|
|
|
|
|
+ if cache:
|
|
|
|
|
+ secure_adapter = CacheControlAdapter(
|
|
|
|
|
+ cache=SafeFileCache(cache, use_dir_lock=True),
|
|
|
|
|
+ max_retries=retries,
|
|
|
|
|
+ )
|
|
|
|
|
+ else:
|
|
|
|
|
+ secure_adapter = HTTPAdapter(max_retries=retries)
|
|
|
|
|
+
|
|
|
|
|
+ # Our Insecure HTTPAdapter disables HTTPS validation. It does not
|
|
|
|
|
+ # support caching (see above) so we'll use it for all http:// URLs as
|
|
|
|
|
+ # well as any https:// host that we've marked as ignoring TLS errors
|
|
|
|
|
+ # for.
|
|
|
|
|
+ insecure_adapter = InsecureHTTPAdapter(max_retries=retries)
|
|
|
|
|
+
|
|
|
|
|
+ self.mount("https://", secure_adapter)
|
|
|
|
|
+ self.mount("http://", insecure_adapter)
|
|
|
|
|
+
|
|
|
|
|
+ # Enable file:// urls
|
|
|
|
|
+ self.mount("file://", LocalFSAdapter())
|
|
|
|
|
+
|
|
|
|
|
+ # We want to use a non-validating adapter for any requests which are
|
|
|
|
|
+ # deemed insecure.
|
|
|
|
|
+ for host in insecure_hosts:
|
|
|
|
|
+ self.mount("https://{}/".format(host), insecure_adapter)
|
|
|
|
|
+
|
|
|
|
|
+ def request(self, method, url, *args, **kwargs):
|
|
|
|
|
+ # Allow setting a default timeout on a session
|
|
|
|
|
+ kwargs.setdefault("timeout", self.timeout)
|
|
|
|
|
+
|
|
|
|
|
+ # Dispatch the actual request
|
|
|
|
|
+ return super(PipSession, self).request(method, url, *args, **kwargs)
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+def get_file_content(url, comes_from=None, session=None):
|
|
|
|
|
+ """Gets the content of a file; it may be a filename, file: URL, or
|
|
|
|
|
+ http: URL. Returns (location, content). Content is unicode.
|
|
|
|
|
+
|
|
|
|
|
+ :param url: File path or url.
|
|
|
|
|
+ :param comes_from: Origin description of requirements.
|
|
|
|
|
+ :param session: Instance of pip.download.PipSession.
|
|
|
|
|
+ """
|
|
|
|
|
+ if session is None:
|
|
|
|
|
+ raise TypeError(
|
|
|
|
|
+ "get_file_content() missing 1 required keyword argument: 'session'"
|
|
|
|
|
+ )
|
|
|
|
|
+
|
|
|
|
|
+ match = _scheme_re.search(url)
|
|
|
|
|
+ if match:
|
|
|
|
|
+ scheme = match.group(1).lower()
|
|
|
|
|
+ if (scheme == 'file' and comes_from and
|
|
|
|
|
+ comes_from.startswith('http')):
|
|
|
|
|
+ raise InstallationError(
|
|
|
|
|
+ 'Requirements file %s references URL %s, which is local'
|
|
|
|
|
+ % (comes_from, url))
|
|
|
|
|
+ if scheme == 'file':
|
|
|
|
|
+ path = url.split(':', 1)[1]
|
|
|
|
|
+ path = path.replace('\\', '/')
|
|
|
|
|
+ match = _url_slash_drive_re.match(path)
|
|
|
|
|
+ if match:
|
|
|
|
|
+ path = match.group(1) + ':' + path.split('|', 1)[1]
|
|
|
|
|
+ path = urllib_parse.unquote(path)
|
|
|
|
|
+ if path.startswith('/'):
|
|
|
|
|
+ path = '/' + path.lstrip('/')
|
|
|
|
|
+ url = path
|
|
|
|
|
+ else:
|
|
|
|
|
+ # FIXME: catch some errors
|
|
|
|
|
+ resp = session.get(url)
|
|
|
|
|
+ resp.raise_for_status()
|
|
|
|
|
+ return resp.url, resp.text
|
|
|
|
|
+ try:
|
|
|
|
|
+ with open(url, 'rb') as f:
|
|
|
|
|
+ content = auto_decode(f.read())
|
|
|
|
|
+ except IOError as exc:
|
|
|
|
|
+ raise InstallationError(
|
|
|
|
|
+ 'Could not open requirements file: %s' % str(exc)
|
|
|
|
|
+ )
|
|
|
|
|
+ return url, content
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+_scheme_re = re.compile(r'^(http|https|file):', re.I)
|
|
|
|
|
+_url_slash_drive_re = re.compile(r'/*([a-z])\|', re.I)
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+def is_url(name):
|
|
|
|
|
+ """Returns true if the name looks like a URL"""
|
|
|
|
|
+ if ':' not in name:
|
|
|
|
|
+ return False
|
|
|
|
|
+ scheme = name.split(':', 1)[0].lower()
|
|
|
|
|
+ return scheme in ['http', 'https', 'file', 'ftp'] + vcs.all_schemes
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+def url_to_path(url):
|
|
|
|
|
+ """
|
|
|
|
|
+ Convert a file: URL to a path.
|
|
|
|
|
+ """
|
|
|
|
|
+ assert url.startswith('file:'), (
|
|
|
|
|
+ "You can only turn file: urls into filenames (not %r)" % url)
|
|
|
|
|
+
|
|
|
|
|
+ _, netloc, path, _, _ = urllib_parse.urlsplit(url)
|
|
|
|
|
+
|
|
|
|
|
+ # if we have a UNC path, prepend UNC share notation
|
|
|
|
|
+ if netloc:
|
|
|
|
|
+ netloc = '\\\\' + netloc
|
|
|
|
|
+
|
|
|
|
|
+ path = urllib_request.url2pathname(netloc + path)
|
|
|
|
|
+ return path
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+def path_to_url(path):
|
|
|
|
|
+ """
|
|
|
|
|
+ Convert a path to a file: URL. The path will be made absolute and have
|
|
|
|
|
+ quoted path parts.
|
|
|
|
|
+ """
|
|
|
|
|
+ path = os.path.normpath(os.path.abspath(path))
|
|
|
|
|
+ url = urllib_parse.urljoin('file:', urllib_request.pathname2url(path))
|
|
|
|
|
+ return url
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+def is_archive_file(name):
|
|
|
|
|
+ """Return True if `name` is a considered as an archive file."""
|
|
|
|
|
+ ext = splitext(name)[1].lower()
|
|
|
|
|
+ if ext in ARCHIVE_EXTENSIONS:
|
|
|
|
|
+ return True
|
|
|
|
|
+ return False
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+def unpack_vcs_link(link, location):
|
|
|
|
|
+ vcs_backend = _get_used_vcs_backend(link)
|
|
|
|
|
+ vcs_backend.unpack(location)
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+def _get_used_vcs_backend(link):
|
|
|
|
|
+ for backend in vcs.backends:
|
|
|
|
|
+ if link.scheme in backend.schemes:
|
|
|
|
|
+ vcs_backend = backend(link.url)
|
|
|
|
|
+ return vcs_backend
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+def is_vcs_url(link):
|
|
|
|
|
+ return bool(_get_used_vcs_backend(link))
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+def is_file_url(link):
|
|
|
|
|
+ return link.url.lower().startswith('file:')
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+def is_dir_url(link):
|
|
|
|
|
+ """Return whether a file:// Link points to a directory.
|
|
|
|
|
+
|
|
|
|
|
+ ``link`` must not have any other scheme but file://. Call is_file_url()
|
|
|
|
|
+ first.
|
|
|
|
|
+
|
|
|
|
|
+ """
|
|
|
|
|
+ link_path = url_to_path(link.url_without_fragment)
|
|
|
|
|
+ return os.path.isdir(link_path)
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+def _progress_indicator(iterable, *args, **kwargs):
|
|
|
|
|
+ return iterable
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+def _download_url(resp, link, content_file, hashes, progress_bar):
|
|
|
|
|
+ try:
|
|
|
|
|
+ total_length = int(resp.headers['content-length'])
|
|
|
|
|
+ except (ValueError, KeyError, TypeError):
|
|
|
|
|
+ total_length = 0
|
|
|
|
|
+
|
|
|
|
|
+ cached_resp = getattr(resp, "from_cache", False)
|
|
|
|
|
+ if logger.getEffectiveLevel() > logging.INFO:
|
|
|
|
|
+ show_progress = False
|
|
|
|
|
+ elif cached_resp:
|
|
|
|
|
+ show_progress = False
|
|
|
|
|
+ elif total_length > (40 * 1000):
|
|
|
|
|
+ show_progress = True
|
|
|
|
|
+ elif not total_length:
|
|
|
|
|
+ show_progress = True
|
|
|
|
|
+ else:
|
|
|
|
|
+ show_progress = False
|
|
|
|
|
+
|
|
|
|
|
+ show_url = link.show_url
|
|
|
|
|
+
|
|
|
|
|
+ def resp_read(chunk_size):
|
|
|
|
|
+ try:
|
|
|
|
|
+ # Special case for urllib3.
|
|
|
|
|
+ for chunk in resp.raw.stream(
|
|
|
|
|
+ chunk_size,
|
|
|
|
|
+ # We use decode_content=False here because we don't
|
|
|
|
|
+ # want urllib3 to mess with the raw bytes we get
|
|
|
|
|
+ # from the server. If we decompress inside of
|
|
|
|
|
+ # urllib3 then we cannot verify the checksum
|
|
|
|
|
+ # because the checksum will be of the compressed
|
|
|
|
|
+ # file. This breakage will only occur if the
|
|
|
|
|
+ # server adds a Content-Encoding header, which
|
|
|
|
|
+ # depends on how the server was configured:
|
|
|
|
|
+ # - Some servers will notice that the file isn't a
|
|
|
|
|
+ # compressible file and will leave the file alone
|
|
|
|
|
+ # and with an empty Content-Encoding
|
|
|
|
|
+ # - Some servers will notice that the file is
|
|
|
|
|
+ # already compressed and will leave the file
|
|
|
|
|
+ # alone and will add a Content-Encoding: gzip
|
|
|
|
|
+ # header
|
|
|
|
|
+ # - Some servers won't notice anything at all and
|
|
|
|
|
+ # will take a file that's already been compressed
|
|
|
|
|
+ # and compress it again and set the
|
|
|
|
|
+ # Content-Encoding: gzip header
|
|
|
|
|
+ #
|
|
|
|
|
+ # By setting this not to decode automatically we
|
|
|
|
|
+ # hope to eliminate problems with the second case.
|
|
|
|
|
+ decode_content=False):
|
|
|
|
|
+ yield chunk
|
|
|
|
|
+ except AttributeError:
|
|
|
|
|
+ # Standard file-like object.
|
|
|
|
|
+ while True:
|
|
|
|
|
+ chunk = resp.raw.read(chunk_size)
|
|
|
|
|
+ if not chunk:
|
|
|
|
|
+ break
|
|
|
|
|
+ yield chunk
|
|
|
|
|
+
|
|
|
|
|
+ def written_chunks(chunks):
|
|
|
|
|
+ for chunk in chunks:
|
|
|
|
|
+ content_file.write(chunk)
|
|
|
|
|
+ yield chunk
|
|
|
|
|
+
|
|
|
|
|
+ progress_indicator = _progress_indicator
|
|
|
|
|
+
|
|
|
|
|
+ if link.netloc == PyPI.netloc:
|
|
|
|
|
+ url = show_url
|
|
|
|
|
+ else:
|
|
|
|
|
+ url = link.url_without_fragment
|
|
|
|
|
+
|
|
|
|
|
+ if show_progress: # We don't show progress on cached responses
|
|
|
|
|
+ progress_indicator = DownloadProgressProvider(progress_bar,
|
|
|
|
|
+ max=total_length)
|
|
|
|
|
+ if total_length:
|
|
|
|
|
+ logger.info("Downloading %s (%s)", url, format_size(total_length))
|
|
|
|
|
+ else:
|
|
|
|
|
+ logger.info("Downloading %s", url)
|
|
|
|
|
+ elif cached_resp:
|
|
|
|
|
+ logger.info("Using cached %s", url)
|
|
|
|
|
+ else:
|
|
|
|
|
+ logger.info("Downloading %s", url)
|
|
|
|
|
+
|
|
|
|
|
+ logger.debug('Downloading from URL %s', link)
|
|
|
|
|
+
|
|
|
|
|
+ downloaded_chunks = written_chunks(
|
|
|
|
|
+ progress_indicator(
|
|
|
|
|
+ resp_read(CONTENT_CHUNK_SIZE),
|
|
|
|
|
+ CONTENT_CHUNK_SIZE
|
|
|
|
|
+ )
|
|
|
|
|
+ )
|
|
|
|
|
+ if hashes:
|
|
|
|
|
+ hashes.check_against_chunks(downloaded_chunks)
|
|
|
|
|
+ else:
|
|
|
|
|
+ consume(downloaded_chunks)
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+def _copy_file(filename, location, link):
|
|
|
|
|
+ copy = True
|
|
|
|
|
+ download_location = os.path.join(location, link.filename)
|
|
|
|
|
+ if os.path.exists(download_location):
|
|
|
|
|
+ response = ask_path_exists(
|
|
|
|
|
+ 'The file %s exists. (i)gnore, (w)ipe, (b)ackup, (a)abort' %
|
|
|
|
|
+ display_path(download_location), ('i', 'w', 'b', 'a'))
|
|
|
|
|
+ if response == 'i':
|
|
|
|
|
+ copy = False
|
|
|
|
|
+ elif response == 'w':
|
|
|
|
|
+ logger.warning('Deleting %s', display_path(download_location))
|
|
|
|
|
+ os.remove(download_location)
|
|
|
|
|
+ elif response == 'b':
|
|
|
|
|
+ dest_file = backup_dir(download_location)
|
|
|
|
|
+ logger.warning(
|
|
|
|
|
+ 'Backing up %s to %s',
|
|
|
|
|
+ display_path(download_location),
|
|
|
|
|
+ display_path(dest_file),
|
|
|
|
|
+ )
|
|
|
|
|
+ shutil.move(download_location, dest_file)
|
|
|
|
|
+ elif response == 'a':
|
|
|
|
|
+ sys.exit(-1)
|
|
|
|
|
+ if copy:
|
|
|
|
|
+ shutil.copy(filename, download_location)
|
|
|
|
|
+ logger.info('Saved %s', display_path(download_location))
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+def unpack_http_url(link, location, download_dir=None,
|
|
|
|
|
+ session=None, hashes=None, progress_bar="on"):
|
|
|
|
|
+ if session is None:
|
|
|
|
|
+ raise TypeError(
|
|
|
|
|
+ "unpack_http_url() missing 1 required keyword argument: 'session'"
|
|
|
|
|
+ )
|
|
|
|
|
+
|
|
|
|
|
+ with TempDirectory(kind="unpack") as temp_dir:
|
|
|
|
|
+ # If a download dir is specified, is the file already downloaded there?
|
|
|
|
|
+ already_downloaded_path = None
|
|
|
|
|
+ if download_dir:
|
|
|
|
|
+ already_downloaded_path = _check_download_dir(link,
|
|
|
|
|
+ download_dir,
|
|
|
|
|
+ hashes)
|
|
|
|
|
+
|
|
|
|
|
+ if already_downloaded_path:
|
|
|
|
|
+ from_path = already_downloaded_path
|
|
|
|
|
+ content_type = mimetypes.guess_type(from_path)[0]
|
|
|
|
|
+ else:
|
|
|
|
|
+ # let's download to a tmp dir
|
|
|
|
|
+ from_path, content_type = _download_http_url(link,
|
|
|
|
|
+ session,
|
|
|
|
|
+ temp_dir.path,
|
|
|
|
|
+ hashes,
|
|
|
|
|
+ progress_bar)
|
|
|
|
|
+
|
|
|
|
|
+ # unpack the archive to the build dir location. even when only
|
|
|
|
|
+ # downloading archives, they have to be unpacked to parse dependencies
|
|
|
|
|
+ unpack_file(from_path, location, content_type, link)
|
|
|
|
|
+
|
|
|
|
|
+ # a download dir is specified; let's copy the archive there
|
|
|
|
|
+ if download_dir and not already_downloaded_path:
|
|
|
|
|
+ _copy_file(from_path, download_dir, link)
|
|
|
|
|
+
|
|
|
|
|
+ if not already_downloaded_path:
|
|
|
|
|
+ os.unlink(from_path)
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+def unpack_file_url(link, location, download_dir=None, hashes=None):
|
|
|
|
|
+ """Unpack link into location.
|
|
|
|
|
+
|
|
|
|
|
+ If download_dir is provided and link points to a file, make a copy
|
|
|
|
|
+ of the link file inside download_dir.
|
|
|
|
|
+ """
|
|
|
|
|
+ link_path = url_to_path(link.url_without_fragment)
|
|
|
|
|
+
|
|
|
|
|
+ # If it's a url to a local directory
|
|
|
|
|
+ if is_dir_url(link):
|
|
|
|
|
+ if os.path.isdir(location):
|
|
|
|
|
+ rmtree(location)
|
|
|
|
|
+ shutil.copytree(link_path, location, symlinks=True)
|
|
|
|
|
+ if download_dir:
|
|
|
|
|
+ logger.info('Link is a directory, ignoring download_dir')
|
|
|
|
|
+ return
|
|
|
|
|
+
|
|
|
|
|
+ # If --require-hashes is off, `hashes` is either empty, the
|
|
|
|
|
+ # link's embedded hash, or MissingHashes; it is required to
|
|
|
|
|
+ # match. If --require-hashes is on, we are satisfied by any
|
|
|
|
|
+ # hash in `hashes` matching: a URL-based or an option-based
|
|
|
|
|
+ # one; no internet-sourced hash will be in `hashes`.
|
|
|
|
|
+ if hashes:
|
|
|
|
|
+ hashes.check_against_path(link_path)
|
|
|
|
|
+
|
|
|
|
|
+ # If a download dir is specified, is the file already there and valid?
|
|
|
|
|
+ already_downloaded_path = None
|
|
|
|
|
+ if download_dir:
|
|
|
|
|
+ already_downloaded_path = _check_download_dir(link,
|
|
|
|
|
+ download_dir,
|
|
|
|
|
+ hashes)
|
|
|
|
|
+
|
|
|
|
|
+ if already_downloaded_path:
|
|
|
|
|
+ from_path = already_downloaded_path
|
|
|
|
|
+ else:
|
|
|
|
|
+ from_path = link_path
|
|
|
|
|
+
|
|
|
|
|
+ content_type = mimetypes.guess_type(from_path)[0]
|
|
|
|
|
+
|
|
|
|
|
+ # unpack the archive to the build dir location. even when only downloading
|
|
|
|
|
+ # archives, they have to be unpacked to parse dependencies
|
|
|
|
|
+ unpack_file(from_path, location, content_type, link)
|
|
|
|
|
+
|
|
|
|
|
+ # a download dir is specified and not already downloaded
|
|
|
|
|
+ if download_dir and not already_downloaded_path:
|
|
|
|
|
+ _copy_file(from_path, download_dir, link)
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+def _copy_dist_from_dir(link_path, location):
|
|
|
|
|
+ """Copy distribution files in `link_path` to `location`.
|
|
|
|
|
+
|
|
|
|
|
+ Invoked when user requests to install a local directory. E.g.:
|
|
|
|
|
+
|
|
|
|
|
+ pip install .
|
|
|
|
|
+ pip install ~/dev/git-repos/python-prompt-toolkit
|
|
|
|
|
+
|
|
|
|
|
+ """
|
|
|
|
|
+
|
|
|
|
|
+ # Note: This is currently VERY SLOW if you have a lot of data in the
|
|
|
|
|
+ # directory, because it copies everything with `shutil.copytree`.
|
|
|
|
|
+ # What it should really do is build an sdist and install that.
|
|
|
|
|
+ # See https://github.com/pypa/pip/issues/2195
|
|
|
|
|
+
|
|
|
|
|
+ if os.path.isdir(location):
|
|
|
|
|
+ rmtree(location)
|
|
|
|
|
+
|
|
|
|
|
+ # build an sdist
|
|
|
|
|
+ setup_py = 'setup.py'
|
|
|
|
|
+ sdist_args = [sys.executable]
|
|
|
|
|
+ sdist_args.append('-c')
|
|
|
|
|
+ sdist_args.append(SETUPTOOLS_SHIM % setup_py)
|
|
|
|
|
+ sdist_args.append('sdist')
|
|
|
|
|
+ sdist_args += ['--dist-dir', location]
|
|
|
|
|
+ logger.info('Running setup.py sdist for %s', link_path)
|
|
|
|
|
+
|
|
|
|
|
+ with indent_log():
|
|
|
|
|
+ call_subprocess(sdist_args, cwd=link_path, show_stdout=False)
|
|
|
|
|
+
|
|
|
|
|
+ # unpack sdist into `location`
|
|
|
|
|
+ sdist = os.path.join(location, os.listdir(location)[0])
|
|
|
|
|
+ logger.info('Unpacking sdist %s into %s', sdist, location)
|
|
|
|
|
+ unpack_file(sdist, location, content_type=None, link=None)
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+class PipXmlrpcTransport(xmlrpc_client.Transport):
|
|
|
|
|
+ """Provide a `xmlrpclib.Transport` implementation via a `PipSession`
|
|
|
|
|
+ object.
|
|
|
|
|
+ """
|
|
|
|
|
+
|
|
|
|
|
+ def __init__(self, index_url, session, use_datetime=False):
|
|
|
|
|
+ xmlrpc_client.Transport.__init__(self, use_datetime)
|
|
|
|
|
+ index_parts = urllib_parse.urlparse(index_url)
|
|
|
|
|
+ self._scheme = index_parts.scheme
|
|
|
|
|
+ self._session = session
|
|
|
|
|
+
|
|
|
|
|
+ def request(self, host, handler, request_body, verbose=False):
|
|
|
|
|
+ parts = (self._scheme, host, handler, None, None, None)
|
|
|
|
|
+ url = urllib_parse.urlunparse(parts)
|
|
|
|
|
+ try:
|
|
|
|
|
+ headers = {'Content-Type': 'text/xml'}
|
|
|
|
|
+ response = self._session.post(url, data=request_body,
|
|
|
|
|
+ headers=headers, stream=True)
|
|
|
|
|
+ response.raise_for_status()
|
|
|
|
|
+ self.verbose = verbose
|
|
|
|
|
+ return self.parse_response(response.raw)
|
|
|
|
|
+ except requests.HTTPError as exc:
|
|
|
|
|
+ logger.critical(
|
|
|
|
|
+ "HTTP error %s while getting %s",
|
|
|
|
|
+ exc.response.status_code, url,
|
|
|
|
|
+ )
|
|
|
|
|
+ raise
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+def unpack_url(link, location, download_dir=None,
|
|
|
|
|
+ only_download=False, session=None, hashes=None,
|
|
|
|
|
+ progress_bar="on"):
|
|
|
|
|
+ """Unpack link.
|
|
|
|
|
+ If link is a VCS link:
|
|
|
|
|
+ if only_download, export into download_dir and ignore location
|
|
|
|
|
+ else unpack into location
|
|
|
|
|
+ for other types of link:
|
|
|
|
|
+ - unpack into location
|
|
|
|
|
+ - if download_dir, copy the file into download_dir
|
|
|
|
|
+ - if only_download, mark location for deletion
|
|
|
|
|
+
|
|
|
|
|
+ :param hashes: A Hashes object, one of whose embedded hashes must match,
|
|
|
|
|
+ or HashMismatch will be raised. If the Hashes is empty, no matches are
|
|
|
|
|
+ required, and unhashable types of requirements (like VCS ones, which
|
|
|
|
|
+ would ordinarily raise HashUnsupported) are allowed.
|
|
|
|
|
+ """
|
|
|
|
|
+ # non-editable vcs urls
|
|
|
|
|
+ if is_vcs_url(link):
|
|
|
|
|
+ unpack_vcs_link(link, location)
|
|
|
|
|
+
|
|
|
|
|
+ # file urls
|
|
|
|
|
+ elif is_file_url(link):
|
|
|
|
|
+ unpack_file_url(link, location, download_dir, hashes=hashes)
|
|
|
|
|
+
|
|
|
|
|
+ # http urls
|
|
|
|
|
+ else:
|
|
|
|
|
+ if session is None:
|
|
|
|
|
+ session = PipSession()
|
|
|
|
|
+
|
|
|
|
|
+ unpack_http_url(
|
|
|
|
|
+ link,
|
|
|
|
|
+ location,
|
|
|
|
|
+ download_dir,
|
|
|
|
|
+ session,
|
|
|
|
|
+ hashes=hashes,
|
|
|
|
|
+ progress_bar=progress_bar
|
|
|
|
|
+ )
|
|
|
|
|
+ if only_download:
|
|
|
|
|
+ write_delete_marker_file(location)
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+def _download_http_url(link, session, temp_dir, hashes, progress_bar):
|
|
|
|
|
+ """Download link url into temp_dir using provided session"""
|
|
|
|
|
+ target_url = link.url.split('#', 1)[0]
|
|
|
|
|
+ try:
|
|
|
|
|
+ resp = session.get(
|
|
|
|
|
+ target_url,
|
|
|
|
|
+ # We use Accept-Encoding: identity here because requests
|
|
|
|
|
+ # defaults to accepting compressed responses. This breaks in
|
|
|
|
|
+ # a variety of ways depending on how the server is configured.
|
|
|
|
|
+ # - Some servers will notice that the file isn't a compressible
|
|
|
|
|
+ # file and will leave the file alone and with an empty
|
|
|
|
|
+ # Content-Encoding
|
|
|
|
|
+ # - Some servers will notice that the file is already
|
|
|
|
|
+ # compressed and will leave the file alone and will add a
|
|
|
|
|
+ # Content-Encoding: gzip header
|
|
|
|
|
+ # - Some servers won't notice anything at all and will take
|
|
|
|
|
+ # a file that's already been compressed and compress it again
|
|
|
|
|
+ # and set the Content-Encoding: gzip header
|
|
|
|
|
+ # By setting this to request only the identity encoding We're
|
|
|
|
|
+ # hoping to eliminate the third case. Hopefully there does not
|
|
|
|
|
+ # exist a server which when given a file will notice it is
|
|
|
|
|
+ # already compressed and that you're not asking for a
|
|
|
|
|
+ # compressed file and will then decompress it before sending
|
|
|
|
|
+ # because if that's the case I don't think it'll ever be
|
|
|
|
|
+ # possible to make this work.
|
|
|
|
|
+ headers={"Accept-Encoding": "identity"},
|
|
|
|
|
+ stream=True,
|
|
|
|
|
+ )
|
|
|
|
|
+ resp.raise_for_status()
|
|
|
|
|
+ except requests.HTTPError as exc:
|
|
|
|
|
+ logger.critical(
|
|
|
|
|
+ "HTTP error %s while getting %s", exc.response.status_code, link,
|
|
|
|
|
+ )
|
|
|
|
|
+ raise
|
|
|
|
|
+
|
|
|
|
|
+ content_type = resp.headers.get('content-type', '')
|
|
|
|
|
+ filename = link.filename # fallback
|
|
|
|
|
+ # Have a look at the Content-Disposition header for a better guess
|
|
|
|
|
+ content_disposition = resp.headers.get('content-disposition')
|
|
|
|
|
+ if content_disposition:
|
|
|
|
|
+ type, params = cgi.parse_header(content_disposition)
|
|
|
|
|
+ # We use ``or`` here because we don't want to use an "empty" value
|
|
|
|
|
+ # from the filename param.
|
|
|
|
|
+ filename = params.get('filename') or filename
|
|
|
|
|
+ ext = splitext(filename)[1]
|
|
|
|
|
+ if not ext:
|
|
|
|
|
+ ext = mimetypes.guess_extension(content_type)
|
|
|
|
|
+ if ext:
|
|
|
|
|
+ filename += ext
|
|
|
|
|
+ if not ext and link.url != resp.url:
|
|
|
|
|
+ ext = os.path.splitext(resp.url)[1]
|
|
|
|
|
+ if ext:
|
|
|
|
|
+ filename += ext
|
|
|
|
|
+ file_path = os.path.join(temp_dir, filename)
|
|
|
|
|
+ with open(file_path, 'wb') as content_file:
|
|
|
|
|
+ _download_url(resp, link, content_file, hashes, progress_bar)
|
|
|
|
|
+ return file_path, content_type
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+def _check_download_dir(link, download_dir, hashes):
|
|
|
|
|
+ """ Check download_dir for previously downloaded file with correct hash
|
|
|
|
|
+ If a correct file is found return its path else None
|
|
|
|
|
+ """
|
|
|
|
|
+ download_path = os.path.join(download_dir, link.filename)
|
|
|
|
|
+ if os.path.exists(download_path):
|
|
|
|
|
+ # If already downloaded, does its hash match?
|
|
|
|
|
+ logger.info('File was already downloaded %s', download_path)
|
|
|
|
|
+ if hashes:
|
|
|
|
|
+ try:
|
|
|
|
|
+ hashes.check_against_path(download_path)
|
|
|
|
|
+ except HashMismatch:
|
|
|
|
|
+ logger.warning(
|
|
|
|
|
+ 'Previously-downloaded file %s has bad hash. '
|
|
|
|
|
+ 'Re-downloading.',
|
|
|
|
|
+ download_path
|
|
|
|
|
+ )
|
|
|
|
|
+ os.unlink(download_path)
|
|
|
|
|
+ return None
|
|
|
|
|
+ return download_path
|
|
|
|
|
+ return None
|