1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
| import requests import re
#1-爬取数据<!--more--> url = 'https://movie.douban.com/chart' headers = {'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36'} response = requests.get(url,headers = headers) #print(response.text) html_str = response.text
#2-解析数据 patten = re.compile('< a.*?nbg.*?title="(.*?)">',re.S) items = re.findall(patten,html_str) print(items)
#3-存储数据 with open('douban.txt',"w",encoding='utf-8') as f: + for item in items: - f.write(item+'\n')
|