JAVA抓取网页图片并下载到本地

ゞ 浴缸里的玫瑰 2022-08-09 02:24 89阅读 0赞
  1. package com.yong.util;
  2. import java.io.File;
  3. import java.io.FileOutputStream;
  4. import java.io.InputStream;
  5. import java.net.URL;
  6. import java.net.URLConnection;
  7. import java.util.ArrayList;
  8. import java.util.List;
  9. import java.util.regex.Matcher;
  10. import java.util.regex.Pattern;
  11. /***
  12. * java抓取网络图片
  13. * @author swinglife
  14. *
  15. */
  16. public class CatchImage {
  17. // 地址
  18. private static final String URL = "http://www.baidu.com";
  19. // 编码
  20. private static final String ECODING = "UTF-8";
  21. // 获取img标签正则
  22. private static final String IMGURL_REG = "<img.*src=(.*?)[^>]*?>";
  23. // 获取src路径的正则
  24. private static final String IMGSRC_REG = "http:\"?(.*?)(\"|>|\\s+)";
  25. public static void main(String[] args) throws Exception {
  26. CatchImage cm = new CatchImage();
  27. //获得html文本内容
  28. String HTML = cm.getHTML(URL);
  29. //获取图片标签
  30. List<String> imgUrl = cm.getImageUrl(HTML);
  31. //获取图片src地址
  32. List<String> imgSrc = cm.getImageSrc(imgUrl);
  33. //下载图片
  34. cm.Download(imgSrc);
  35. }
  36. /***
  37. * 获取HTML内容
  38. *
  39. * @param url
  40. * @return
  41. * @throws Exception
  42. */
  43. private String getHTML(String url) throws Exception {
  44. URL uri = new URL(url);
  45. URLConnection connection = uri.openConnection();
  46. InputStream in = connection.getInputStream();
  47. byte[] buf = new byte[1024];
  48. int length = 0;
  49. StringBuffer sb = new StringBuffer();
  50. while ((length = in.read(buf, 0, buf.length)) > 0) {
  51. sb.append(new String(buf, ECODING));
  52. }
  53. in.close();
  54. return sb.toString();
  55. }
  56. /***
  57. * 获取ImageUrl地址
  58. *
  59. * @param HTML
  60. * @return
  61. */
  62. private List<String> getImageUrl(String HTML) {
  63. Matcher matcher = Pattern.compile(IMGURL_REG).matcher(HTML);
  64. List<String> listImgUrl = new ArrayList<String>();
  65. while (matcher.find()) {
  66. listImgUrl.add(matcher.group());
  67. }
  68. return listImgUrl;
  69. }
  70. /***
  71. * 获取ImageSrc地址
  72. *
  73. * @param listImageUrl
  74. * @return
  75. */
  76. private List<String> getImageSrc(List<String> listImageUrl) {
  77. List<String> listImgSrc = new ArrayList<String>();
  78. for (String image : listImageUrl) {
  79. Matcher matcher = Pattern.compile(IMGSRC_REG).matcher(image);
  80. while (matcher.find()) {
  81. listImgSrc.add(matcher.group().substring(0, matcher.group().length() - 1));
  82. }
  83. }
  84. return listImgSrc;
  85. }
  86. /***
  87. * 下载图片
  88. *
  89. * @param listImgSrc
  90. */
  91. private void Download(List<String> listImgSrc) {
  92. try {
  93. for (String url : listImgSrc) {
  94. String imageName = url.substring(url.lastIndexOf("/") + 1, url.length());
  95. URL uri = new URL(url);
  96. InputStream in = uri.openStream();
  97. FileOutputStream fo = new FileOutputStream(new File(imageName));
  98. byte[] buf = new byte[1024];
  99. int length = 0;
  100. System.out.println("开始下载:" + url);
  101. while ((length = in.read(buf, 0, buf.length)) != -1) {
  102. fo.write(buf, 0, length);
  103. }
  104. in.close();
  105. fo.close();
  106. System.out.println(imageName + "下载完成");
  107. }
  108. } catch (Exception e) {
  109. System.out.println("下载失败");
  110. }
  111. }
  112. }

发表评论

表情:
评论列表 (有 0 条评论,89人围观)

还没有评论,来说两句吧...

相关阅读

    相关 PHP抓取远程图片本地保存

    最近在工作中需要开发了一个用户素材功能,里面需要将网上的各种图片素材进行本地化存储。于是在网上找了一些相关资料,并根据自身开发需要,整理了一下主要的逻辑代码。 /