python爬虫一键爬下整页美女图片

2020-06-28 13:52:28 来源：易采站长站作者：易采站长站整理

r = html.xpath('//*[@id="showimg"]/a/img/@src')
# 获取下一页url
next_url = html.xpath('//*[@id="showimg"]/a[@title="下一张"]/@href')
#获取标题
dir=url.split('/')[-2] dir+='/'+str(html.xpath('//*[@id="showimages"]/div[2]/h2/text()')[0])
if url.split('_')[0]==next_url[0].split('_')[0]:
detail_url_list.put(next_url[0])

savePic(r[0],dir)

print("{id}下载完成".format(id=dir))

# 生成图片详情网址
def get_detail_list(list_url):
#list_url_queue = queue(maxsize=1000)
# time.sleep(1) # 延时1s，模拟比爬取文章详情要快
#page_url = base_url + format(i)+'.html'#有图片的页面地址
#page_url ='http://www.souutu.com/mnmm/xgmm/13062_1.html'
index_rq = requests.get(url=list_url, headers=headers)
# 请求状态码
code = index_rq.status_code
if code == 200:
html = et.HTML(index_rq.text)
# 获取页面所有套图地址
detail_list = html.xpath('//*[@id="body"]/main/div[4]/div/div[@class="card-box"]/div[1]/a/@href')
# 获取下一页url
next_url = html.xpath('//*[@id="showimg"]/a[@title="下一张"]/@href')
html_thread = [] for url in detail_list:
url=url.replace('.html', '_1.html')
print(url)
thread = threading.Thread(target=get_detail_queue, args=(url,))
thread.start()
html_thread.append(thread) # 线程抓取抓取图片
for i in html_thread:
i.join()