import requests from lxml import etree from threading import Thread class Spider(object): def __init__(self): self.header = {"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36"} def catch(self, page_num): self.url = f"https://www.169tp.com/xingganmeinv/list_1_{page_num}.html" res = requests.get(self.url,headers=self.header) res.encoding = "gbk" text = res.text tree = etree.HTML(text) lis = tree.xpath("/html/body/div[4]/ul/li") for i in lis: addr = i.xpath("./a/img/@src")[0] title = i.xpath("./a/p/text()") detail = requests.get(addr, headers=self.header).content with open(f"imgs/{title}.jpg", mode="wb") as f: f.write(detail) print(f" ------------------- {title}.jpg 完成 -----------------------") def start(self): for num in range(1,500): self.catch(num) print(f"-------------------------- 第 {num} 页完成-----------------------------------") if __name__ == '__main__': spider = Spider() spider.start()