python爬取百度图片

向右看齐 2023-07-01 07:30 139阅读 0赞

话不多说,上代码!

  1. import re
  2. import requests
  3. from urllib import error
  4. from bs4 import BeautifulSoup
  5. import os
  6. num = 0
  7. numPicture = 0
  8. file = ''
  9. List = []
  10. def Find(url):
  11. global List
  12. print('正在检测图片总数,请稍等.....')
  13. t = 0
  14. i = 1
  15. s = 0
  16. while t < 10000:
  17. Url = url + str(t)
  18. try:
  19. Result = requests.get(Url, timeout=7)
  20. except BaseException:
  21. t = t + 60
  22. continue
  23. else:
  24. result = Result.text
  25. pic_url = re.findall('"objURL":"(.*?)",', result, re.S) # 先利用正则表达式找到图片url
  26. s += len(pic_url)
  27. if len(pic_url) == 0:
  28. break
  29. else:
  30. List.append(pic_url)
  31. t = t + 60
  32. return s
  33. def recommend(url):
  34. Re = []
  35. try:
  36. html = requests.get(url)
  37. except error.HTTPError as e:
  38. return
  39. else:
  40. html.encoding = 'utf-8'
  41. bsObj = BeautifulSoup(html.text, 'html.parser')
  42. div = bsObj.find('div', id='topRS')
  43. if div is not None:
  44. listA = div.findAll('a')
  45. for i in listA:
  46. if i is not None:
  47. Re.append(i.get_text())
  48. return Re
  49. def dowmloadPicture(html, keyword):
  50. global num
  51. # t =0
  52. pic_url = re.findall('"objURL":"(.*?)",', html, re.S) # 先利用正则表达式找到图片url
  53. print('找到关键词:' + keyword + '的图片,即将开始下载图片...')
  54. for each in pic_url:
  55. print('正在下载第' + str(num + 1) + '张图片,图片地址:' + str(each))
  56. try:
  57. if each is not None:
  58. pic = requests.get(each, timeout=7)
  59. else:
  60. continue
  61. except BaseException:
  62. print('错误,当前图片无法下载')
  63. continue
  64. else:
  65. string = file + r'\\' + keyword + '_' + str(num) + '.jpg'
  66. fp = open(string, 'wb')
  67. fp.write(pic.content)
  68. fp.close()
  69. num += 1
  70. if num >= numPicture:
  71. return
  72. if __name__ == '__main__': # 主函数入口
  73. word = input("请输入搜索关键词(可以是人名,地名等): ")
  74. #add = 'http://image.baidu.com/search/flip?tn=baiduimage&ie=utf-8&word=%E5%BC%A0%E5%A4%A9%E7%88%B1&pn=120'
  75. url = 'http://image.baidu.com/search/flip?tn=baiduimage&ie=utf-8&word=' + word + '&pn='
  76. tot = Find(url)
  77. Recommend = recommend(url) # 记录相关推荐
  78. print('经过检测%s类图片共有%d张' % (word, tot))
  79. numPicture = int(input('请输入想要下载的图片数量 '))
  80. file = input('请建立一个存储图片的文件夹,输入文件夹名称即可')
  81. y = os.path.exists(file)
  82. if y == 1:
  83. print('该文件已存在,请重新输入')
  84. file = input('请建立一个存储图片的文件夹,)输入文件夹名称即可')
  85. os.mkdir(file)
  86. else:
  87. os.mkdir(file)
  88. t = 0
  89. tmp = url
  90. while t < numPicture:
  91. try:
  92. url = tmp + str(t)
  93. result = requests.get(url, timeout=10)
  94. print(url)
  95. except error.HTTPError as e:
  96. print('网络错误,请调整网络后重试')
  97. t = t+60
  98. else:
  99. dowmloadPicture(result.text, word)
  100. t = t + 60
  101. print('当前搜索结束,感谢使用')
  102. print('猜你喜欢')
  103. for re in Recommend:
  104. print(re, end=' ')

运行效果图

在这里插入图片描述
都是志玲姐姐
在这里插入图片描述

发表评论

表情:
评论列表 (有 0 条评论,139人围观)

还没有评论,来说两句吧...

相关阅读