You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
75 lines
2.4 KiB
75 lines
2.4 KiB
"""Compare the speed of downloading URLs sequentially vs. using futures."""
|
|
|
|
import functools
|
|
import time
|
|
import timeit
|
|
import sys
|
|
|
|
try:
|
|
from urllib2 import urlopen
|
|
except ImportError:
|
|
from urllib.request import urlopen
|
|
|
|
from concurrent.futures import (as_completed, ThreadPoolExecutor,
|
|
ProcessPoolExecutor)
|
|
|
|
URLS = ['http://www.google.com/',
|
|
'http://www.apple.com/',
|
|
'http://www.ibm.com',
|
|
'http://www.thisurlprobablydoesnotexist.com',
|
|
'http://www.slashdot.org/',
|
|
'http://www.python.org/',
|
|
'http://www.bing.com/',
|
|
'http://www.facebook.com/',
|
|
'http://www.yahoo.com/',
|
|
'http://www.youtube.com/',
|
|
'http://www.blogger.com/']
|
|
|
|
def load_url(url, timeout):
|
|
kwargs = {'timeout': timeout} if sys.version_info >= (2, 6) else {}
|
|
return urlopen(url, **kwargs).read()
|
|
|
|
def download_urls_sequential(urls, timeout=60):
|
|
url_to_content = {}
|
|
for url in urls:
|
|
try:
|
|
url_to_content[url] = load_url(url, timeout=timeout)
|
|
except:
|
|
pass
|
|
return url_to_content
|
|
|
|
def download_urls_with_executor(urls, executor, timeout=60):
|
|
try:
|
|
url_to_content = {}
|
|
future_to_url = dict((executor.submit(load_url, url, timeout), url)
|
|
for url in urls)
|
|
|
|
for future in as_completed(future_to_url):
|
|
try:
|
|
url_to_content[future_to_url[future]] = future.result()
|
|
except:
|
|
pass
|
|
return url_to_content
|
|
finally:
|
|
executor.shutdown()
|
|
|
|
def main():
|
|
for name, fn in [('sequential',
|
|
functools.partial(download_urls_sequential, URLS)),
|
|
('processes',
|
|
functools.partial(download_urls_with_executor,
|
|
URLS,
|
|
ProcessPoolExecutor(10))),
|
|
('threads',
|
|
functools.partial(download_urls_with_executor,
|
|
URLS,
|
|
ThreadPoolExecutor(10)))]:
|
|
sys.stdout.write('%s: ' % name.ljust(12))
|
|
start = time.time()
|
|
url_map = fn()
|
|
sys.stdout.write('%.2f seconds (%d of %d downloaded)\n' %
|
|
(time.time() - start, len(url_map), len(URLS)))
|
|
|
|
if __name__ == '__main__':
|
|
main()
|