python爬贴吧图片下载到本地
#coding:utf-8
import requests
import re
import urllib
import os, sys
from bs4 import BeautifulSoup as bs
'''
作用:获取百度贴吧图片
'''
def get_content(url): r = urllib.urlopen(url) html = r.read() r.close() return html
html = get_content("http://tieba.baidu.com/p/5347937418") url(http://tb2.bdstatic.com/tb/static-pb/img/cur_zin.cur), pointer;" src="http://imgsrc.baidu.com/forum/w%3D580/sign=af5797c0b9fb43161a1f7a7210a44642/96f8ce1b9d16fdfa3d6b31e5bf8f8c5494ee7b49.jpg" size="230432" changedsize="true">'
regex = r'class="BDE_Image".+?src="(.+?\.jpg)"' pat = re.compile(regex) list = re.findall(pat,html) i = 0 os.mkdir("d://image") for item in list: urllib.urlretrieve(item,"d://image//%s.jpg" % i) print i i += 1
步骤:
- 打开网页获取html
- 分析图片所在的标签特点
- 使用正则筛选图片链接
- 遍历列表使用 urllib.urlretrieve弄到本地
还没有评论,来说两句吧...