IO密集型 - 并发
    IO任务慢:
        -> asyncio
    IO任务不太慢:
        -> concurrent.futures.ThreadPoolExecutor (放弃Threading,拥抱futures)

CPU密集型 - 并行
    -> multiprocessing (推荐)
    或 -> concurrent.futures.ProcessPoolExecutor(并不是真正的并行)

IO密集型 – asyncio举例:

import asyncio
import aiohttp
import time


async def download_one(url):
    async with aiohttp.ClientSession() as session:
        async with session.get(url) as response:
            print(f'Visiting {url}')


async def download_all(sites):
    tasks = [asyncio.create_task(download_one(site)) for site in sites]
    # 设置return_exceptions=True,避免错误报到执行层,因此可以省略try except
    # *tasks解包列表, **dict解包字典
    await asyncio.gather(*tasks, return_exceptions=True)


def main():
    sites = [
        'http://www.people.com.cn/',
        'http://www.xinhuanet.com/',
        'https://www.cctv.com/',
        'http://cn.chinadaily.com.cn/',
        'https://www.gmw.cn/',
        'http://www.youth.cn/'
    ]
    start_time = time.perf_counter()
    # asyncio.run是asyncio的root call
    asyncio.run(download_all(sites))
    end_time = time.perf_counter()
    print(f'Downloaded {len(sites)} sites in {end_time - start_time} seconds')


if __name__ == '__main__':
    main()

# Visiting https://www.gmw.cn/
# Visiting https://www.cctv.com/
# Visiting http://www.people.com.cn/
# Visiting http://cn.chinadaily.com.cn/
# Visiting http://www.xinhuanet.com/
# Visiting http://www.youth.cn/
# Downloaded 6 sites in 0.1884312 seconds

IO密集型 – futures多线程举例

import requests
import time
import concurrent.futures


def download_one(url):
    try:
        response = requests.get(url)
        print(f'Read {len(response.content)} from {url}')
    except requests.exceptions.RequestException as e:
        print(f'请求失败,异常为{e}')


# 创建线程池,分配6个线程,每个sites中的元素,并发调用函数download_one函数
def download_all(sites):
    with concurrent.futures.ThreadPoolExecutor(max_workers=6) as executor:
        try:
            executor.map(download_one, sites)
        except concurrent.futures.TimeoutError as e:
            print(f'请求失败,异常为{e}')


def main():
    sites = [
        'http://www.people.com.cn/',
        'http://www.xinhuanet.com/',
        'https://www.cctv.com/',
        'http://cn.chinadaily.com.cn/',
        'https://www.gmw.cn/',
        'http://www.youth.cn/'
    ]
    start_time = time.perf_counter()
    download_all(sites)
    end_time = time.perf_counter()
    print(f'Downloaded {len(sites)} sites in {end_time - start_time} seconds')


if __name__ == '__main__':
    main()

# Read 99053 from http://www.people.com.cn/
# Read 155876 from https://www.gmw.cn/
# Read 115113 from http://www.xinhuanet.com/
# Read 355932 from https://www.cctv.com/
# Read 138999 from http://cn.chinadaily.com.cn/
# Read 105108 from http://www.youth.cn/
# Downloaded 6 sites in 0.1663658 seconds

CPU密集型 – futures多进程举例 (不推荐)

import time
import concurrent.futures as cf


def cpu_bound(number):
    print(number, ' ', sum(i * i for i in range(number)))


def calculate_number(numbers):
    with cf.ProcessPoolExecutor() as executor:
        executor.map(cpu_bound, numbers, chunksize=8)


def main():
    start_time = time.perf_counter()
    numbers = [10000000 + x for x in range(10)]
    calculate_number(numbers)
    end_time = time.perf_counter()
    print(f'Calculation takes {end_time - start_time} seconds')


if __name__ == '__main__':
    main()

# 10000000   333333283333335000000
# 10000008   333334083333895000140
# 10000001   333333383333335000000
# 10000009   333334183334055000204
# 10000002   333333483333355000001
# 10000003   333333583333395000005
# 10000004   333333683333455000014
# 10000005   333333783333535000030
# 10000006   333333883333635000055
# 10000007   333333983333755000091
# Calculation takes 4.3930147 seconds

CPU密集型 – multiprocessing多进程举例(推荐)

import time
import multiprocessing


def cpu_bound(number):
print(number, ' ', sum(i * i for i in range(number)))


def calculate_number(numbers):
with multiprocessing.Pool() as pool:
pool.map(cpu_bound, numbers)


def main():
start_time = time.perf_counter()
numbers = [10000000 + x for x in range(10)]
calculate_number(numbers)
end_time = time.perf_counter()
print(f'Calculation takes {end_time - start_time} seconds')


if __name__ == '__main__':
main()

# 10000000 333333283333335000000
# 10000002 333333483333355000001
# 10000006 333333883333635000055
# 10000004 333333683333455000014
# 10000003 333333583333395000005
# 10000001 333333383333335000000
# 10000005 333333783333535000030
# 10000007 333333983333755000091
# 10000009 333334183334055000204
# 10000008 333334083333895000140
# Calculation takes 2.0442163 seconds

类似文章