爬取网页图片
下载表情包吧指定网页的所有图片
#coding:utf-8
import urllib.request
import urllib.parse
import urllib
from bs4 import BeautifulSoup
def gethtml(url): #下载网页源码
request=urllib.request.Request(url,headers={
"User-Agent":"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko)Chrome/43.0.2357.130 Safari/537.36"})
html=urllib.request.urlopen(request).read().decode('utf-8')
return html
def getjpg(html_doc): #下载图片
bs=BeautifulSoup(html_doc,"html.parser")
jpglist=bs.find_all('img',class_="BDE_Image") #不同网页图片标签不同,要做出改动
x=1
for jpgurl in jpglist:
urllib.request.urlretrieve(jpgurl.get('src'),'%s.jpg' % x)
x=x+1
print('finished')
url="https://tieba.baidu.com/p/4962109239"
html_doc=gethtml(url)
print('downloading.......')
getjpg(html_doc)
还没有评论,来说两句吧...