【百度地图】获取实时路况地图瓦片
步骤一:下载地图瓦片
增加了几个反反爬虫策略:
- 每请求50次更换ip及header
- 每次请求生成1-100的随机数,当为66时暂停0-20s内一个随机时长
- 每完成一个时刻瓦片下载,暂停100-300s内一个随机时长
当请求报错时,更换ip及header重新请求
s1_getTrafficData.py
-- coding: utf-8 --
import http, time, random, os
import urllib.requesthttp://its.map.baidu.com:8002/traffic/TrafficTileService?time=1604900544967&v=016&level=19&x=98760&y=19742
百度地图切片原点左下角,(x, y),x为列,y为行
研究范围:左下(98697, 19700) - 右上(98787, 19766)
def getUserAgent():
agent_list = [
"Mozilla/5.0 (Windows NT 6.2; WOW64; rv:21.0) Gecko/20100101 Firefox/21.0",
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36",
'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:73.0) Gecko/20100101 Firefox/73.0'
]
agent = random.choice(agent_list)
header = (
'User-Agent', agent
)
return header
def getProxy():
ip_list = [
"http://117.93.118.88:3000",
"http://111.177.192.57:3256",
"http://112.195.242.60:3256",
"http://124.205.153.81:80",
"http://49.85.2.17:3000"
"http://117.88.208.32:3000",
"http://114.99.9.117:1133",
"http://125.72.106.132:3256",
"http://49.85.188.37:8014",
"http://124.206.34.66:80",
"http://117.68.192.93:1133",
"http://114.233.170.151:8056",
"http://60.168.207.147:1133",
"http://114.233.170.48:8056",
"http://1.70.67.20:9999",
"http://121.226.215.247:9999",
"http://114.233.194.202:8086",
"http://114.112.127.78:80",
"http://117.66.233.26:9999",
"http://180.122.38.235:8090",
"http://117.95.192.119:9999",
"http://49.85.188.21:8058",
"http://114.233.168.211:8088",
"http://180.120.209.130:8888",
]
ip = random.choice(ip_list)
proxy = urllib.request.ProxyHandler({ 'http': ip})
return proxy
def requestImg(proxy, header, url, file_name):
if os.path.exists(file_name):
return proxy, header
try:
opener = urllib.request.build_opener(proxy, urllib.request.HTTPHandler)
opener = urllib.request.build_opener()
opener.addheaders=[header]
urllib.request.install_opener(opener)
# req=urllib.request.Request(url=url,headers=header)
# res = urllib.request.urlopen(url, timeout=60)
req = urllib.request.Request(url)
res = opener.open(req, timeout=30)
with open(file_name, "wb") as f:
content = res.read()
f.write(content)
res.close()
# except urllib.error.HTTPError or urllib.error.URLError as e:
# print(e.reason)
# except http.client.IncompleteRead or http.client.RemoteDisconnected as e:
# if num_retries == 0: # 重连机制
# return
# else:
# requestImg(proxy, header, url, file_name, num_retries - 1)
except:
print("exception...")
proxy = getProxy()
header = getUserAgent()
proxy, header = requestImg(proxy, header, url, file_name)
return proxy, header
if __name__ == "__main__":
# 3.15-3.21 7:00-20:00
day_index = 7
hour_index = 27
for i in range(1, day_index):
for j in range(hour_index):
t = 1615762800000 + i * 86400000 + j * 1800000
dir_path = "./tiles/images_" + str(i) + "_" + str(j)
if not os.path.exists(dir_path):
os.makedirs(dir_path)
base_url = "http://its.map.baidu.com:8002/traffic/TrafficTileService?level=17&v=016&time="
reset_clock = 50
proxy = getProxy()
header = getUserAgent()
for x in range(24670, 24710):
for y in range(4920, 4950):
url = base_url + str(t) + "&x=" + str(x) + "&y=" + str(y)
file_name = dir_path + "/" + str(x) + "-" + str(y) + '.png'
# 每50次更换ip、header
if reset_clock == 0:
proxy = getProxy()
header = getUserAgent()
reset_clock = 50
# 请求
proxy1, header1 = requestImg(proxy, header, url, file_name)
proxy = proxy1
header = header1
reset_clock = reset_clock - 1 # 重置时钟-1
print(i,j,x,y)
time.sleep(random.random())
# 每次请求如果遇到随机数66,暂停20s
if random.randint(1, 100) == 66:
print("sleeping......")
time.sleep(20 * random.random())
# 某天某时所有请求完毕时,停留久一点
time.sleep(random.uniform(100, 300))
print("------ok------")
步骤二:去除无数据空图
# s2_replaceEmpty
# -*- coding: utf-8 -*-
#!/usr/bin/env python
import glob, re
from PIL import Image
d = 7
t = 27
for i in range(d):
for j in range(t):
p = './tiles/images_'+str(i)+ '_' + str(j) +'/*.png'
# 按照x、y顺序对文件名进行排序
files = glob.glob(p)
for x in files:
try:
img = Image.open(x)
img.close()
except:
img = Image.new(mode='RGBA', size=(256, 256))
img.save(x)
步骤三:拼接瓦片
# s3_mergeImages.py
# -*- coding: utf-8 -*-
#!/usr/bin/env python
import glob, re
from PIL import Image
d = 7
t = 27
for i in range(d):
for j in range(t):
s = './tiles/images_'+str(i)+ '_' + str(j) +'/*.png'
# 按照x、y顺序对文件名进行排序
files = glob.glob(s)
files.sort(key=lambda x: tuple(int(i) for i in re.findall(r'\d+', x)[2:4]))
# 将每一行文件名保存到一个数组中
imagefiles = { }
for item in files:
match = re.findall(r'\d+', item)
# pre = int(match.group())
pre = match[2]
if not imagefiles.get(pre):
imagefiles[pre] = []
imagefiles[pre].append(item)
# 键值对转排序后的列表
imagefiles = sorted(zip(imagefiles.keys(), imagefiles.values()))
# 预先生成合并后大小的空图片
total_width = len(imagefiles) * 256
total_height = len(imagefiles[0][1]) * 256
new_image = Image.new("RGBA", (total_width, total_height))
# 逐行拼接
x_offset = 0
for item in imagefiles:
y_offset = total_height - 256
images = list(map(Image.open, item[1])) # 映射函数,返回列表
for subitem in images:
new_image.paste(subitem, (x_offset, y_offset))
y_offset -= subitem.size[0]
x_offset += images[0].size[0]
f_name = "./merge/merge_"+str(i) + "_"+str(j)+".png"
new_image.save(f_name, quality = 100)
还没有评论,来说两句吧...