使用了request、re(正则)、csv模块 123456789101112131415161718192021222324252627282930#爬取豆瓣Top250的电影名称,评分,导演与演员import requestsimport reimport csvurl = "https://movie.douban.com/top250" #豆瓣链接headers = { # headers,伪装浏览器访问 "User-Agent":": Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.45 Safari/537.36"}obj = re.compile(r'<li>.*?<span class="title">(?P<name>.*?)</span>.*?' # 正则预编译查找模式 r'<p class="">(?P<actors>.*?)</p>.*?' r'<span class="rating_num" property="v:average">(?P<score>.*?)</span>.*?', re.S)with open("peaTop250.csv", mode="w", encoding="utf-8") as f: # 以utf-8编码打开csv文件 num = 0 while(num<250): # 爬取每一个页面的信息 URL = url + "?start={}&filter=".format(num) #更改页面地址 print(URL) content = requests.get(url=URL, headers=headers).text #得到页面源代码 res = obj.finditer(content) #按照预编译好的规则查找响应的值,并储存到一个迭代器中,存储类型为字典 for i in res: f.write("作者:" + i.group("name") + "\n") #写到csv中 f.write(i.group("actors").replace(" ",'').replace('\n','').replace(" ",' ') + "\n") #把字符串中的空格回车消除和 转化为空格 f.write("评分:" + i.group("score") + "\n") num+=25f.close() #关闭文件print("over")