Home > Article > Backend Development > How to use aiohttp in Python
aiohttp is an asynchronous HTTP network module based on asyncio, which provides both the server and the client
import aiohttp import asyncio async def fetch(session, url): # 声明一个支持异步的上下文管理器 async with session.get(url) as response: # response.text()是coroutine对象 需要加await return await response.text(), response.status async def main(): # 声明一个支持异步的上下文管理器 async with aiohttp.ClientSession() as session: html, status = await fetch(session, 'https://cuiqingcai.com') print(f'html: {html[:100]}...') print(f'status: {status}') if __name__ == '__main__': # Python 3.7 及以后,不需要显式声明事件循环,可以使用 asyncio.run(main())来代替最后的启动操作 asyncio.get_event_loop().run_until_complete(main())
session.post('http://httpbin.org/post', data=b'data') session.put('http://httpbin.org/put', data=b'data') session.delete('http://httpbin.org/delete') session.head('http://httpbin.org/get') session.options('http://httpbin.org/get') session.patch('http://httpbin.org/patch', data=b'data')
print('status:', response.status) # 状态码 print('headers:', response.headers)# 响应头 print('body:', await response.text())# 响应体 print('bytes:', await response.read())# 响应体二进制内容 print('json:', await response.json())# 响应体json数据
import aiohttp import asyncio async def main(): #设置 1 秒的超时 timeout = aiohttp.ClientTimeout(total=1) async with aiohttp.ClientSession(timeout=timeout) as session: async with session.get('https://httpbin.org/get') as response: print('status:', response.status) if __name__ == '__main__': asyncio.get_event_loop().run_until_complete(main())
import asyncio import aiohttp # 声明最大并发量为5 CONCURRENCY = 5 semaphore = asyncio.Semaphore(CONCURRENCY) URL = 'https://www.baidu.com' session = None async def scrape_api(): async with semaphore: print('scraping', URL) async with session.get(URL) as response: await asyncio.sleep(1) return await response.text() async def main(): global session session = aiohttp.ClientSession() scrape_index_tasks = [asyncio.ensure_future(scrape_api()) for _ in range(10000)] await asyncio.gather(*scrape_index_tasks) if __name__ == '__main__': asyncio.get_event_loop().run_until_complete(main())
import asyncio import aiohttp import logging import json logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s: %(message)s') INDEX_URL = 'https://dynamic5.scrape.center/api/book/?limit=18&offset={offset}' DETAIL_URL = 'https://dynamic5.scrape.center/api/book/{id}' PAGE_SIZE = 18 PAGE_NUMBER = 100 CONCURRENCY = 5 semaphore = asyncio.Semaphore(CONCURRENCY) session = None async def scrape_api(url): async with semaphore: try: logging.info('scraping %s', url) async with session.get(url) as response: return await response.json() except aiohttp.ClientError: logging.error('error occurred while scraping %s', url, exc_info=True) async def scrape_index(page): url = INDEX_URL.format(offset=PAGE_SIZE * (page - 1)) return await scrape_api(url) async def main(): global session session = aiohttp.ClientSession() scrape_index_tasks = [asyncio.ensure_future(scrape_index(page)) for page in range(1, PAGE_NUMBER + 1)] results = await asyncio.gather(*scrape_index_tasks) logging.info('results %s', json.dumps(results, ensure_ascii=False, indent=2)) if __name__ == '__main__': asyncio.get_event_loop().run_until_complete(main())
The above is the detailed content of How to use aiohttp in Python. For more information, please follow other related articles on the PHP Chinese website!