Python 爬取百度图片-蒲公英云

Python 爬取百度图片

百度图片抓包数据:
SouthEast
参数详情:
SouthEast 1
数据解析:
SouthEast 2 SouthEast 3

from urllib import request, parse
from http import cookiejar
import re
import time
# 1.提取数据
def main(text,start,length):
    hx = hex(start)
    s = str(hx)[2:len(hx)]
    reqMessage = {
        "tn": "resultjson_com",
        "ipn": "rj",
        "ct": "201326592",
        "is": "",
        "fp": "result",
        "queryWord": text,
        "cl": "2",
        "lm": "-1",
        "ie": "utf-8",
        "oe": "utf-8",
        "adpicid": "",
        "st": "",
        "z": "",
        "ic": "",
        "word": text,
        "s": "",
        "se": "",
        "tab": "",
        "width": "",
        "height": "",
        "face": "",
        "istype": "",
        "qc": "",
        "nc": "",
        "fr": "",
        "cg": "head",
        "pn": str(start),
        "rn": str(length),
        "gsm": s,
        "1511330964840": ""
    };
    cookie=cookiejar.CookieJar()
    cookie_support = request.HTTPCookieProcessor(cookie)
    opener = request.build_opener(cookie_support, request.HTTPHandler)
    request.install_opener(opener)
    reqData = parse.urlencode(reqMessage)
    req = request.Request("http://image.baidu.com/search/acjson?" + reqData, headers={
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36"})
    data = request.urlopen(req).read();
    rm = re.compile(r'"thumbURL":"[\w/\\:.,;=&]*"')
    list = re.findall(rm, data.decode())
    index = start+1
    result=False
    for thumbURL in list:
        url = thumbURL[12:len(thumbURL) - 1]
        downImg(url, "F:/file/baidu/" + str(index) + ".jpg")
        index += 1
        result=True
    return result
# 下载图片
def downImg(url, path):
    print(url)
    req=request.Request(url,headers={
   "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36",
                                     "Referer":"http://image.baidu.com/search/acjson"})
    data= request.urlopen(req).read()
    file=open(path,"wb")
    file.write(data)
    file.close()
    pass
a=0
while a!=-1:
    result= main("美女图片", a*30, 30)
    print("暂停中...")
    a += 1
    if result==False :
        a=-1
    time.sleep(10)
    pass
print("执行完成")