IO密集型 - 并发 IO任务慢: -> asyncio IO任务不太慢: -> concurrent.futures.ThreadPoolExecutor (放弃Threading,拥抱futures) CPU密集型 - 并行 -> multiprocessing (推荐) 或 -> concurrent.futures.ProcessPoolExecutor(并不是真正的并行)
IO密集型 – asyncio举例:
import asyncio import aiohttp import time async def download_one(url): async with aiohttp.ClientSession() as session: async with session.get(url) as response: print(f'Visiting {url}') async def download_all(sites): tasks = [asyncio.create_task(download_one(site)) for site in sites] # 设置return_exceptions=True,避免错误报到执行层,因此可以省略try except # *tasks解包列表, **dict解包字典 await asyncio.gather(*tasks, return_exceptions=True) def main(): sites = [ 'http://www.people.com.cn/', 'http://www.xinhuanet.com/', 'https://www.cctv.com/', 'http://cn.chinadaily.com.cn/', 'https://www.gmw.cn/', 'http://www.youth.cn/' ] start_time = time.perf_counter() # asyncio.run是asyncio的root call asyncio.run(download_all(sites)) end_time = time.perf_counter() print(f'Downloaded {len(sites)} sites in {end_time - start_time} seconds') if __name__ == '__main__': main() # Visiting https://www.gmw.cn/ # Visiting https://www.cctv.com/ # Visiting http://www.people.com.cn/ # Visiting http://cn.chinadaily.com.cn/ # Visiting http://www.xinhuanet.com/ # Visiting http://www.youth.cn/ # Downloaded 6 sites in 0.1884312 seconds
IO密集型 – futures多线程举例
import requests import time import concurrent.futures def download_one(url): try: response = requests.get(url) print(f'Read {len(response.content)} from {url}') except requests.exceptions.RequestException as e: print(f'请求失败,异常为{e}') # 创建线程池,分配6个线程,每个sites中的元素,并发调用函数download_one函数 def download_all(sites): with concurrent.futures.ThreadPoolExecutor(max_workers=6) as executor: try: executor.map(download_one, sites) except concurrent.futures.TimeoutError as e: print(f'请求失败,异常为{e}') def main(): sites = [ 'http://www.people.com.cn/', 'http://www.xinhuanet.com/', 'https://www.cctv.com/', 'http://cn.chinadaily.com.cn/', 'https://www.gmw.cn/', 'http://www.youth.cn/' ] start_time = time.perf_counter() download_all(sites) end_time = time.perf_counter() print(f'Downloaded {len(sites)} sites in {end_time - start_time} seconds') if __name__ == '__main__': main() # Read 99053 from http://www.people.com.cn/ # Read 155876 from https://www.gmw.cn/ # Read 115113 from http://www.xinhuanet.com/ # Read 355932 from https://www.cctv.com/ # Read 138999 from http://cn.chinadaily.com.cn/ # Read 105108 from http://www.youth.cn/ # Downloaded 6 sites in 0.1663658 seconds
CPU密集型 – futures多进程举例 (不推荐)
import time import concurrent.futures as cf def cpu_bound(number): print(number, ' ', sum(i * i for i in range(number))) def calculate_number(numbers): with cf.ProcessPoolExecutor() as executor: executor.map(cpu_bound, numbers, chunksize=8) def main(): start_time = time.perf_counter() numbers = [10000000 + x for x in range(10)] calculate_number(numbers) end_time = time.perf_counter() print(f'Calculation takes {end_time - start_time} seconds') if __name__ == '__main__': main() # 10000000 333333283333335000000 # 10000008 333334083333895000140 # 10000001 333333383333335000000 # 10000009 333334183334055000204 # 10000002 333333483333355000001 # 10000003 333333583333395000005 # 10000004 333333683333455000014 # 10000005 333333783333535000030 # 10000006 333333883333635000055 # 10000007 333333983333755000091 # Calculation takes 4.3930147 seconds
CPU密集型 – multiprocessing多进程举例(推荐)
import time
import multiprocessing
def cpu_bound(number):
print(number, ' ', sum(i * i for i in range(number)))
def calculate_number(numbers):
with multiprocessing.Pool() as pool:
pool.map(cpu_bound, numbers)
def main():
start_time = time.perf_counter()
numbers = [10000000 + x for x in range(10)]
calculate_number(numbers)
end_time = time.perf_counter()
print(f'Calculation takes {end_time - start_time} seconds')
if __name__ == '__main__':
main()
# 10000000 333333283333335000000
# 10000002 333333483333355000001
# 10000006 333333883333635000055
# 10000004 333333683333455000014
# 10000003 333333583333395000005
# 10000001 333333383333335000000
# 10000005 333333783333535000030
# 10000007 333333983333755000091
# 10000009 333334183334055000204
# 10000008 333334083333895000140
# Calculation takes 2.0442163 seconds