java网络爬虫模拟登录案例教学2

亦凉 2022-05-25 14:36 461阅读 0赞

本文为原创博客,仅供技术学习使用。未经允许,禁止将其复制下来上传到百度文库等平台。

模拟登录的网站

我们需要登录的网站为:autonews,模拟登陆的地址为:
https://home.autonews.com/clickshare/cspLogin.do

这里写图片描述

网络抓包分析

在请求表单中,输入用户名及密码。

这里写图片描述

可以看到,我们要向后台提交的数据有哪些。

CSParamsToPassNames:CSParamsToPassNames
userLogin.userName:你的用户名
userLogin.password:你的密码
CSDropAuthCookieSpecified:1
CSDropAuthCookie:1
submit:Sign In

实战代码

  1. package crawlerTest;
  2. /* * 合肥工业大学 管理学院 qianyang 1563178220@qq.com */
  3. import java.io.BufferedOutputStream;
  4. import java.io.BufferedWriter;
  5. import java.io.File;
  6. import java.io.FileOutputStream;
  7. import java.io.IOException;
  8. import java.io.InputStream;
  9. import java.io.OutputStreamWriter;
  10. import java.util.ArrayList;
  11. import java.util.List;
  12. import org.apache.http.Header;
  13. import org.apache.http.HttpEntity;
  14. import org.apache.http.HttpResponse;
  15. import org.apache.http.NameValuePair;
  16. import org.apache.http.client.ClientProtocolException;
  17. import org.apache.http.client.ResponseHandler;
  18. import org.apache.http.client.entity.UrlEncodedFormEntity;
  19. import org.apache.http.client.methods.HttpGet;
  20. import org.apache.http.client.methods.HttpPost;
  21. import org.apache.http.impl.client.BasicResponseHandler;
  22. import org.apache.http.impl.client.DefaultHttpClient;
  23. import org.apache.http.message.BasicNameValuePair;
  24. import org.apache.http.protocol.HTTP;
  25. import org.apache.http.util.EntityUtils;
  26. @SuppressWarnings("deprecation")
  27. public class AutonewsLogin {
  28. // The configuration items
  29. //输入用户名及密码
  30. private static String userName = "";
  31. private static String password = "";
  32. private static String redirectURL = "https://home.autonews.com/clickshare/myhome.do";
  33. // Don't change the following URL
  34. private static String renRenLoginURL = "https://home.autonews.com/clickshare/cspLogin.do";
  35. // The HttpClient is used in one session
  36. private HttpResponse response;
  37. private DefaultHttpClient httpclient = new DefaultHttpClient();
  38. private boolean login() {
  39. //open the LoginURL
  40. HttpPost httpost = new HttpPost(renRenLoginURL);
  41. // All the parameters post to the web site
  42. //建立一个NameValuePair数组,用于存储欲传送的参数
  43. List<NameValuePair> nvps = new ArrayList<NameValuePair>();
  44. // nvps.add(new BasicNameValuePair("CSAuthReq", "1"));
  45. // nvps.add(new BasicNameValuePair("CSTargetURL", "http%3A%2F%2Fwww.autonews.com%2F"));
  46. // nvps.add(new BasicNameValuePair("CSResumeURL", "/clickshare/forceLogin.do"));
  47. // nvps.add(new BasicNameValuePair("CSParamsToPassNames", "CSAuthReq|CSTargetURL|CSResumeURL|CSParamsToPassNames"));
  48. nvps.add(new BasicNameValuePair("userLogin.userName", userName));
  49. nvps.add(new BasicNameValuePair("userLogin.password", password));
  50. // nvps.add(new BasicNameValuePair("CSDropAuthCookieSpecified", "1"));
  51. // nvps.add(new BasicNameValuePair("CSDropAuthCookie", "1"));
  52. nvps.add(new BasicNameValuePair("submit", "Sign In"));
  53. try {
  54. httpost.setEntity(new UrlEncodedFormEntity(nvps, HTTP.UTF_8));
  55. response = httpclient.execute(httpost);
  56. int StatusCode = response.getStatusLine().getStatusCode();
  57. System.out.println(StatusCode);
  58. } catch (Exception e) {
  59. e.printStackTrace();
  60. return false;
  61. } finally {
  62. httpost.abort();
  63. }
  64. return true;
  65. }
  66. private String getRedirectLocation() {
  67. Header locationHeader = response.getFirstHeader("Location");
  68. if (locationHeader == null) {
  69. return null;
  70. }
  71. return locationHeader.getValue();
  72. }
  73. private String getText(String redirectLocation) {
  74. HttpGet httpget = new HttpGet(redirectLocation);
  75. // Create a response handler
  76. ResponseHandler<String> responseHandler = new BasicResponseHandler();
  77. String responseBody = "";
  78. try {
  79. responseBody = httpclient.execute(httpget, responseHandler);
  80. } catch (Exception e) {
  81. e.printStackTrace();
  82. responseBody = null;
  83. } finally {
  84. httpget.abort();
  85. httpclient.getConnectionManager().shutdown();
  86. }
  87. return responseBody;
  88. }
  89. public String printText() {
  90. String html="";
  91. if (login()) {
  92. String redirectLocation = getRedirectLocation();
  93. if (redirectLocation != null) {
  94. html=getText(redirectURL);
  95. }
  96. }
  97. return html;
  98. }
  99. public static void main(String[] args) throws IOException {
  100. AutonewsLogin AutonewsLogin = new AutonewsLogin();
  101. BufferedWriter writer = new BufferedWriter( new OutputStreamWriter( new FileOutputStream( new File("D:\\d.txt")),"gbk"));
  102. String html=AutonewsLogin.printText();
  103. writer.write(html);
  104. writer.close();
  105. AutonewsLogin.downloadFile("http://www.autonews.com/assets/PDF/CA11537753.PDF","E:\\zipFile\\","1");
  106. }
  107. //下载该网页的pdf文件
  108. public void downloadFile(String fileURL, String saveDir,String fileName)
  109. throws IOException {
  110. File fileDir=new File(saveDir);
  111. if(!fileDir.exists()){
  112. fileDir.mkdirs();
  113. }
  114. //图片或zip下载保存地址
  115. String filename=saveDir+fileName+".pdf";
  116. File file=new File(filename);
  117. if(file.exists()){
  118. file.delete();
  119. }
  120. BufferedOutputStream bw = new BufferedOutputStream(new FileOutputStream(filename));
  121. HttpGet httpGet=new HttpGet(fileURL);
  122. HttpResponse httpResponse = httpclient.execute(httpGet);
  123. try {
  124. HttpEntity entity=httpResponse.getEntity();
  125. int i=-1;
  126. byte[] byt= EntityUtils.toByteArray(entity);
  127. bw.write(byt);
  128. System.out.println("文件下载成功!");
  129. } catch (ClientProtocolException e) {
  130. e.printStackTrace();
  131. } catch (IOException e) {
  132. e.printStackTrace();
  133. }
  134. bw.close();
  135. }
  136. }

程序结果

这里写图片描述

这里写图片描述

发表评论

表情:
评论列表 (有 0 条评论,461人围观)

还没有评论,来说两句吧...

相关阅读