You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

75 lines
2.4 KiB

"""Compare the speed of downloading URLs sequentially vs. using futures."""
import functools
import time
import timeit
import sys
try:
from urllib2 import urlopen
except ImportError:
from urllib.request import urlopen
from concurrent.futures import (as_completed, ThreadPoolExecutor,
ProcessPoolExecutor)
URLS = ['http://www.google.com/',
'http://www.apple.com/',
'http://www.ibm.com',
'http://www.thisurlprobablydoesnotexist.com',
'http://www.slashdot.org/',
'http://www.python.org/',
'http://www.bing.com/',
'http://www.facebook.com/',
'http://www.yahoo.com/',
'http://www.youtube.com/',
'http://www.blogger.com/']
def load_url(url, timeout):
kwargs = {'timeout': timeout} if sys.version_info >= (2, 6) else {}
return urlopen(url, **kwargs).read()
def download_urls_sequential(urls, timeout=60):
url_to_content = {}
for url in urls:
try:
url_to_content[url] = load_url(url, timeout=timeout)
except:
pass
return url_to_content
def download_urls_with_executor(urls, executor, timeout=60):
try:
url_to_content = {}
future_to_url = dict((executor.submit(load_url, url, timeout), url)
for url in urls)
for future in as_completed(future_to_url):
try:
url_to_content[future_to_url[future]] = future.result()
except:
pass
return url_to_content
finally:
executor.shutdown()
def main():
for name, fn in [('sequential',
functools.partial(download_urls_sequential, URLS)),
('processes',
functools.partial(download_urls_with_executor,
URLS,
ProcessPoolExecutor(10))),
('threads',
functools.partial(download_urls_with_executor,
URLS,
ThreadPoolExecutor(10)))]:
sys.stdout.write('%s: ' % name.ljust(12))
start = time.time()
url_map = fn()
sys.stdout.write('%.2f seconds (%d of %d downloaded)\n' %
(time.time() - start, len(url_map), len(URLS)))
if __name__ == '__main__':
main()