python下载图片到本地服务器

电玩女神 2021-11-11 07:46 609阅读 0赞

最近在做图片下载的任务,写了下面的脚本

  1. # -*- coding: utf-8 -*-
  2. import hashlib
  3. import uuid
  4. import os
  5. import urllib.request
  6. from pymongo import MongoClient
  7. import datetime
  8. # 生成新图片唯一名称
  9. def get_unique_name():
  10. uuid_val = uuid.uuid4()
  11. uuid_str = str(uuid_val).encode("utf-8")
  12. md5 = hashlib.md5()
  13. md5.update(uuid_str)
  14. return md5.hexdigest()
  15. # 生成对应关系时“_id”的值
  16. def get_old_image_md5(addr):
  17. addr = str(addr).encode("utf-8")
  18. md5 = hashlib.md5()
  19. md5.update(addr)
  20. return md5.hexdigest()
  21. conn = MongoClient('172.26.70.219', 27017)
  22. spiders = conn.spiders
  23. sourceAwards_url = spiders.sourceAwards_url
  24. print("连接原始图片库成功")
  25. print("本次需要下载的图片个数为:%s" % sourceAwards_url.find().count())
  26. cli = MongoClient('172.26.70.224', 27017)
  27. vgs_source = cli.vgs_source
  28. sourceDownPics = vgs_source.sourceDownPics
  29. print("连接保存图片库成功")
  30. base_file_path = '/data/images/images/'
  31. # 下载图片
  32. def getPictures():
  33. ioIrrorList = []
  34. exceptionErrorList = []
  35. try:
  36. for x in sourceAwards_url.find():
  37. # 找出属于那个网站的数据
  38. source = x.get("source")
  39. file_path = base_file_path + source.lower() + "/"
  40. # 找出属于那种类型
  41. type = x.get("type")
  42. file_path = file_path + type + "/"
  43. # 如果没有这个path则直接创建
  44. if not os.path.exists(file_path):
  45. os.makedirs(file_path)
  46. if x.get("poster"):
  47. image_url = x.get("poster")
  48. file_suffix = os.path.splitext(image_url)[1]
  49. filename = file_path + get_unique_name() + file_suffix
  50. urllib.request.urlretrieve(image_url, filename=filename)
  51. sourceDownPics.save({"_id": get_old_image_md5(image_url), "sourcePicUrl": image_url, "downPicPath": filename, "createTime": datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")})
  52. except IOError as e:
  53. ioIrrorList.append(x.get("poster"))
  54. print(1, e)
  55. except Exception as e:
  56. exceptionErrorList.append(x.get("poster"))
  57. print(2, e)
  58. if __name__ == '__main__':
  59. getPictures()
  60. print("本次一共下载图片个数%s" % sourceDownPics.find().count())
  61. print("下载失败个数为%s" % (sourceAwards_url.find().count()-sourceDownPics.find().count()))

发表评论

表情:
评论列表 (有 0 条评论,609人围观)

还没有评论,来说两句吧...

相关阅读