爬虫验证码:破解【点击旋转验证码】

蔚落 2022-02-13 11:13 1202阅读 0赞

这里破解某某动漫的点击翻转验证码为例:在这里插入图片描述
分析页面的源码发现,这些翻转的图片,我们可以下载下来,既然可以获取到这些图片,那么这个的破解思路为:

  1. 获取1000张样本集合: 用selenium访问,通过截屏,以及切片,一次获取四张图片,然后点击”换一组”,依次循环1000次

  2. 对获取的图片进行去重: 通过图片的rbg总值来去重,rgb总值相差3000以为,基本认为这是同一张图片,就只保留一张。

  3. 人为手动给去重的图片进行调整。

  4. 通过selenium来模拟登录,输入账号和密码

  5. 截屏,截取验证码图片

  6. 翻转每张验证码,记录翻转次数,通过和样本基本的每张图片的rgb进行一一比较,如果每个像素的rgb都小于100,那么这个翻转后的图片就是正确的位置,返回翻转次数。

  7. 依次翻转,记录各个图片需要翻转的次数

  8. 点击对应图片的元素,进行翻转

  9. 点击提交按钮,进行登录

  10. 判断是否登录成功

获取1000张样本集的代码:

  1. from selenium import webdriver
  2. from selenium.webdriver.support.ui import WebDriverWait
  3. from selenium.webdriver.support import expected_conditions as EC
  4. from selenium.webdriver.common.by import By
  5. from PIL import Image
  6. import random
  7. from io import BytesIO
  8. import time
  9. class Crack(object):
  10. def __init__(self, start_number, count):
  11. self.login_url = "http://www.1kkk.com/"
  12. self.start_number = start_number
  13. self.count = count
  14. self.chrome_options = webdriver.ChromeOptions()
  15. self.chrome_options.add_argument("--healess")
  16. self.browser = webdriver.Chrome()
  17. self.browser.maximize_window()
  18. self.wait = WebDriverWait(self.browser, 30)
  19. def login(self):
  20. """
  21. 输入账号,密码
  22. :return:None
  23. """
  24. self.browser.get(self.login_url)
  25. self.browser.find_element_by_class_name("header-avatar").click()
  26. # 获取所有图片
  27. for num in range(self.start_number, self.start_number+self.count):
  28. self.image_png(num)
  29. self.wait.until(EC.element_to_be_clickable((By.CLASS_NAME, "rotate-refresh"))).click()
  30. time.sleep(0.5)
  31. def save_screen_png(self):
  32. """
  33. 获取网页截图
  34. :return: 截图对象
  35. """
  36. self.wait.until(EC.presence_of_all_elements_located((By.CLASS_NAME, "rotate-background")))
  37. screen_image = self.browser.get_screenshot_as_png()
  38. screenshot = Image.open(BytesIO(screen_image))
  39. screenshot.save("screenshot{}.png".format(random.randint(1, 5)))
  40. return screenshot
  41. def image_png(self, num):
  42. """
  43. 通过获取网页截图,然后进行切片,返回四张图片
  44. :return:
  45. """
  46. screenshot = self.save_screen_png()
  47. images = []
  48. for num_2 in range(1, 5):
  49. # 依次获取5张图片,存入iamges列表中
  50. images.append(self.get_image_position(screenshot, num, num_2))
  51. # 获取整体四张图片的坐标
  52. # 进行切片
  53. def get_image_position(self, screenshot, number, number_2):
  54. """
  55. 获取四张图片的下标
  56. :return: left, top, right, bottom
  57. """
  58. image = self.wait.until(EC.presence_of_element_located((By.XPATH, "//div[@class='rotate-background'][{}]".format(number_2))))
  59. location = image.location
  60. size = image.size
  61. top, bottom, left, right = location['y'], location['y'] + size['height'], location['x'], location['x'] + size[
  62. 'width']
  63. image = screenshot.crop((left, top, right, bottom))
  64. image.save("./static/total_images/image{}_{}.png".format(number, number_2))
  65. return image
  66. def __del__(self):
  67. self.browser.quit()
  68. def download(start_number, count):
  69. """
  70. 初始化登录类,下载图片
  71. :param start_number:开启位置
  72. :param count: 数量
  73. :return:
  74. """
  75. c = Crack(start_number, count)
  76. c.login()
  77. del c
  78. def main():
  79. download(1, 1000)
  80. if __name__ == '__main__':
  81. main()

图片去重的代码:

  1. from PIL import Image
  2. import os
  3. import gevent
  4. from gevent import monkey
  5. monkey.patch_all()
  6. # 图片数量
  7. gCount = 0
  8. # 列表,用来保存rgb
  9. rgb_dif_list = []
  10. # 当前保存图片的名称
  11. gNumber = 0
  12. def sum_rgb(image):
  13. """
  14. 计算rgb的值
  15. :param images: 图片
  16. :return: rgb的值
  17. """
  18. num = 0
  19. for i in range(image.size[0]):
  20. for y in range(image.size[1]):
  21. pixel = image.load()[i, y]
  22. num = num + image.load()[i, y][0] + image.load()[i, y][1] + image.load()[i, y][2]
  23. return num
  24. def check_have_in(num):
  25. """
  26. 通过rgb的总值,来判断是否已经存在列表
  27. :param num: Ture or False
  28. :return:
  29. """
  30. global rgb_dif_list
  31. if num in rgb_dif_list:
  32. # 如果存在,就得删除
  33. return True
  34. else:
  35. # 否则就将rgb存入列表中,更改名字,并返回False
  36. return False
  37. def delete(image_url):
  38. """
  39. 删除图片
  40. :param image_url: 图片的url
  41. :return:
  42. """
  43. print("删除图片:", image_url)
  44. os.remove(image_url)
  45. def start_check(start_number, count):
  46. global rgb_dif_list
  47. global gCount
  48. global gNumber
  49. images_url = "./static/total_images/{}"
  50. save_url = "./static/images/{}"
  51. for number_1 in range(start_number, start_number + count):
  52. for number_2 in range(1, 5):
  53. image_url = images_url.format("image{}_{}.png".format(number_1, number_2))
  54. if os.path.isfile(image_url):
  55. image = Image.open(image_url)
  56. # 通过元素的rgb三个值相加的总数,通过列表保存,如果在列表中存在就添加,否则就删除
  57. rgb_num = sum_rgb(image)
  58. print("image{}_{}.png".format(number_1, number_2), rgb_num)
  59. # 判断该图片的rgb是否已经存在列表中
  60. if rgb_num > 4000000:
  61. continue
  62. for num in range(rgb_num-3000, rgb_num+3000):
  63. check_result = check_have_in(num)
  64. # 判断结果,做响应处理
  65. if check_result:
  66. # 存在情况,退出
  67. break
  68. else:
  69. rgb_dif_list.append(rgb_num)
  70. gCount += 1
  71. # 不存在情况,更改名字
  72. gNumber += 1
  73. image.save(save_url.format("images{}.png".format(gNumber)))
  74. if start_number+count == 501:
  75. print("剩余图片总数为", gCount)
  76. def main():
  77. gevent.joinall([
  78. gevent.spawn(start_check, 1, 100),
  79. gevent.spawn(start_check, 101, 100),
  80. gevent.spawn(start_check, 201, 100),
  81. gevent.spawn(start_check, 301, 100),
  82. gevent.spawn(start_check, 401, 100),
  83. ])
  84. # start_check(1, 10)
  85. if __name__ == "__main__":
  86. main()

验证码破解:

  1. from selenium import webdriver
  2. from selenium.webdriver.common.by import By
  3. from selenium.webdriver.support.ui import WebDriverWait
  4. from selenium.webdriver.support import expected_conditions as EC
  5. from selenium.common.exceptions import TimeoutException
  6. import time
  7. import os
  8. from PIL import Image
  9. from io import BytesIO
  10. class Crack(object):
  11. def __init__(self):
  12. self.login_url = "http://www.1kkk.com/"
  13. # self.chrome_options = webdriver.ChromeOptions()
  14. # self.chrome_options.add_argument("--healess")
  15. self.browser = webdriver.Chrome()
  16. self.browser.maximize_window()
  17. self.wait = WebDriverWait(self.browser, 10)
  18. self.browser.get(self.login_url)
  19. time.sleep(2)
  20. def login(self):
  21. """
  22. 输入账号,密码
  23. :return:None
  24. """
  25. try:
  26. self.wait.until(EC.presence_of_element_located((By.CLASS_NAME, "header-avatar"))).click()
  27. except TimeoutException:
  28. self.browser.refresh()
  29. self.login()
  30. return
  31. # self.browser.find_element_by_class_name("header-avatar").click()
  32. name_page = self.browser.find_element_by_name("txt_name")
  33. name_page.send_keys("18218299414")
  34. password_page = self.browser.find_element_by_name("txt_password")
  35. password_page.send_keys("shao0812")
  36. true_or_false = True
  37. while true_or_false:
  38. true_or_false = False
  39. # 获取四张需要旋转的图片
  40. images = self.image_png()
  41. # 获取整体四张图片的几次
  42. turn_num_list = []
  43. for image in images:
  44. turn_num_list.append(self.image_turn_num(image))
  45. # print(turn_num_list)
  46. for i in turn_num_list:
  47. if i == 5:
  48. self.wait.until(EC.element_to_be_clickable((By.CLASS_NAME, 'rotate-refresh'))).click()
  49. time.sleep(3)
  50. true_or_false = True
  51. # 根据上面得到的旋转次数点击图片
  52. self.click_image(turn_num_list)
  53. # 结果正确,点击登录按钮
  54. self.click_submit()
  55. # todo: 如果旋转出问题,就得重新.来
  56. # try:
  57. if self.browser.find_element_by_css_selector(".tip.color-main").text == "请点击下方图片,旋转至正确方向~":
  58. # 如果登录不成功,将重新刷新页面登录
  59. self.browser.refresh()
  60. self.login()
  61. time.sleep(5)
  62. def click_image(self, turn_num_list):
  63. """
  64. 通过算出来的点击次数,来点击图片
  65. :param turn_num_list: 四张图需要点击的次数
  66. :return: None
  67. """
  68. for i in range(0, len(turn_num_list)):
  69. if turn_num_list[i] == 0:
  70. continue
  71. image = self.wait.until(
  72. EC.presence_of_element_located((By.XPATH, "//div[@class='rotate-background'][{}]".format(i+1))))
  73. for _ in range(turn_num_list[i]):
  74. image.click()
  75. time.sleep(1)
  76. def save_screen_png(self):
  77. """
  78. 获取网页截图
  79. :return: 截图对象
  80. """
  81. screen_image = self.browser.get_screenshot_as_png()
  82. screenshot = Image.open(BytesIO(screen_image))
  83. # screenshot.save("screenshot.png")
  84. return screenshot
  85. def image_png(self):
  86. """
  87. 通过获取网页截图,然后进行切片,返回四张图片
  88. :return:
  89. """
  90. screenshot = self.save_screen_png()
  91. images = []
  92. for num in range(1, 5):
  93. # 依次获取4张图片,存入iamges列表中
  94. images.append(self.get_image(screenshot, num))
  95. return images
  96. def get_image(self, screenshot, number):
  97. """
  98. 获取四张图片的下标
  99. :return: left, top, right, bottom
  100. """
  101. image = self.wait.until(EC.presence_of_element_located((By.XPATH, "//div[@class='rotate-background'][{}]".format(number))))
  102. location = image.location
  103. size = image.size
  104. top, bottom, left, right = location['y'], location['y'] + size['height'], location['x'], location['x'] + size[
  105. 'width']
  106. image = screenshot.crop((left, top, right, bottom))
  107. # image.save("image{}.png".format(number))
  108. return image
  109. def image_turn_num(self, image):
  110. """
  111. 用获取的图片跟图片库的图片比较,
  112. :param image: 原图
  113. :return:
  114. """
  115. for i in range(0, 4):
  116. # 原图最多转三次
  117. dir_path = "./static/images/"
  118. change_image = image.rotate(-90*i)
  119. # change_image.save("change{}.png".format(i))
  120. for or_path in os.listdir(dir_path):
  121. or_image = Image.open(os.path.join(dir_path, or_path))
  122. result = self.examine_pixel(or_image, change_image)
  123. if result:
  124. return i
  125. return 5
  126. def examine_pixel(self, image1, image2):
  127. """
  128. 判断来个图片是否相等
  129. :param image1: 图片1
  130. :param image2: 图片2
  131. :return:
  132. """
  133. thredhold = 100
  134. for x in range(image1.size[0]):
  135. for y in range(image1.size[1]):
  136. pixel1 = image1.load()[x, y]
  137. pixel2 = image2.load()[x, y]
  138. if not (abs(pixel1[0] - pixel2[0]) < thredhold and abs(pixel1[1] - pixel2[1]) < thredhold and abs(pixel1[2] - pixel2[2]) < thredhold):
  139. return False
  140. return True
  141. def click_submit(self):
  142. """
  143. 点击登录按钮
  144. :return: None
  145. """
  146. submit = self.wait.until(EC.element_to_be_clickable((By.ID, "btnLogin")))
  147. submit.click()
  148. def __del__(self):
  149. self.browser.quit()
  150. def main():
  151. """pass"""
  152. c = Crack()
  153. c.login()
  154. if __name__ == "__main__":
  155. main()

发表评论

表情:
评论列表 (有 1 条评论,1202人围观)
蒲公英云3657FA
蒲公英云3657FAV铁粉 2022-12-12 16:49
这个地方免费的羊毛可以薅几百次,http://halo-dev.org/archives/ra

相关阅读

    相关 java爬虫破解滑块验证

    > 使用技术:java+Selenium > > 废话: > > 有爬虫,自然就有反爬虫,就像病毒和杀毒软件一样,有攻就有防,两者彼此推进发展。而目前最流行的反爬技术验证码

    相关 破解滑动验证

    一、介绍 一些网站会在正常的账号密码认证之外加一些验证码,以此来明确地区分人/机行为,从一定程度上达到反爬的效果,对于简单的校验码Tesserocr就可以搞定,如下 !

    相关 springboot文字图片验证

    说明:因为刷新的图标和点击图片文字的时候需要在图片上生成图标,为了美观,所以我引入Font Awesome图标库,如果你需要的话需引入该图标库方可使用 先上验证码效果图: