python从网络抓取照片并保存到本地实例
下面的实例是使用python从网络抓取图片并保存到本地的代码,粘贴在此,供大家交流学习。
程序思路如下:
(1)从excel中依次读取事项id,
(2)然后拼接成图片的url,
(3)据此url请求图片所在网页html,
(4)在其中查找图片tag并进行处理,
(5)请求图片并保存到本地,
(6)将图片名称写到对应事项id所在行的列。
用到的重要类(函数)包括:request.get,xpath,urllib.request.urlretrieve等
import os
import xlrd
import xlwt
import requests
import urllib
from lxml import html
from xlutils.copy import copy
def get_item_info_url(itemid, oldWebId):
return 'http://www.zjzwfw.gov.cn/zjzw/service/list/showItem.do?code=' \
+ itemId + '&webId=53&id=1&oldwebid=' + oldWebId
def get_img_src(url):
page = requests.get(url)
tree = html.fromstring(page.text.encode('utf-8'))
src = tree.xpath('//img/@src')
imgsrc = ''
if len(src) > 0:
if len(src) == 1:
format = src[0].split('.')[-1]
if format == 'gif':
return []
else:
imgsrc = src[0]
elif len(src) == 2:
imgsrc = src[1]
filename= imgsrc.split('/')[-1]
return [imgsrc, filename]
else:
return []
def save_picture_from_url(img_l):
if img_l == []:
return ''
else:
urllib.request.urlretrieve(img_l[0], ws + '/itemsImg/img/' + img_l[1])
return img_l[1]
if __name__ == '__main__':
ws = os.getcwd()
xlsPath = ws + '/itemsImg/items.xls'
workbook = xlrd.open_workbook(xlsPath)
# 获取所有sheet
#print(workbook.sheet_names()) # [u'sheet1', u'sheet2']
sheet1 = workbook.sheet_by_index(0)
rowcount = sheet1.nrows
#print(rowcount)
wb = copy(workbook)
# 通过get_sheet()获取的sheet有write()方法
wsheet = wb.get_sheet(0)
for r in range(rowcount):
itemId= sheet1.cell(r,0).value
oldWebId = sheet1.cell(r,1).value
#itemName = sheet1.cell(r,2).value
#itemDept = sheet1.cell(r, 3).value
if oldWebId is not '':
url = get_item_info_url(itemId, oldWebId)
src = get_img_src(url)
filename = save_picture_from_url(src)
if filename != '':
wsheet.write(r, 4, filename)
print('current item id:' + itemId)
wb.save(xlsPath)
还没有评论,来说两句吧...