python下载图片到本地服务器
最近在做图片下载的任务,写了下面的脚本
# -*- coding: utf-8 -*-
import hashlib
import uuid
import os
import urllib.request
from pymongo import MongoClient
import datetime
# 生成新图片唯一名称
def get_unique_name():
uuid_val = uuid.uuid4()
uuid_str = str(uuid_val).encode("utf-8")
md5 = hashlib.md5()
md5.update(uuid_str)
return md5.hexdigest()
# 生成对应关系时“_id”的值
def get_old_image_md5(addr):
addr = str(addr).encode("utf-8")
md5 = hashlib.md5()
md5.update(addr)
return md5.hexdigest()
conn = MongoClient('172.26.70.219', 27017)
spiders = conn.spiders
sourceAwards_url = spiders.sourceAwards_url
print("连接原始图片库成功")
print("本次需要下载的图片个数为:%s" % sourceAwards_url.find().count())
cli = MongoClient('172.26.70.224', 27017)
vgs_source = cli.vgs_source
sourceDownPics = vgs_source.sourceDownPics
print("连接保存图片库成功")
base_file_path = '/data/images/images/'
# 下载图片
def getPictures():
ioIrrorList = []
exceptionErrorList = []
try:
for x in sourceAwards_url.find():
# 找出属于那个网站的数据
source = x.get("source")
file_path = base_file_path + source.lower() + "/"
# 找出属于那种类型
type = x.get("type")
file_path = file_path + type + "/"
# 如果没有这个path则直接创建
if not os.path.exists(file_path):
os.makedirs(file_path)
if x.get("poster"):
image_url = x.get("poster")
file_suffix = os.path.splitext(image_url)[1]
filename = file_path + get_unique_name() + file_suffix
urllib.request.urlretrieve(image_url, filename=filename)
sourceDownPics.save({"_id": get_old_image_md5(image_url), "sourcePicUrl": image_url, "downPicPath": filename, "createTime": datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")})
except IOError as e:
ioIrrorList.append(x.get("poster"))
print(1, e)
except Exception as e:
exceptionErrorList.append(x.get("poster"))
print(2, e)
if __name__ == '__main__':
getPictures()
print("本次一共下载图片个数%s" % sourceDownPics.find().count())
print("下载失败个数为%s" % (sourceAwards_url.find().count()-sourceDownPics.find().count()))
还没有评论,来说两句吧...