1
0
mirror of synced 2024-07-07 20:45:13 +08:00
GitHub520/fetch_ips.py

183 lines
5.8 KiB
Python
Raw Normal View History

#!/usr/bin/env python
# -*- coding:utf-8 -*-
#
# Author : XueWeiHan
# E-mail : 595666367@qq.com
# Date : 2020-05-19 15:27
# Desc : 获取最新的 GitHub 相关域名对应 IP
import os
import re
import json
import traceback
from datetime import datetime, timezone, timedelta
from collections import Counter
import requests
2021-05-31 13:59:34 +08:00
from retry import retry
RAW_URL = [
2021-04-04 17:01:26 +08:00
"alive.github.com",
"live.github.com",
"github.githubassets.com",
2021-01-31 10:44:56 +08:00
"central.github.com",
"desktop.githubusercontent.com",
"assets-cdn.github.com",
"camo.githubusercontent.com",
"github.map.fastly.net",
"github.global.ssl.fastly.net",
"gist.github.com",
"github.io",
"github.com",
2021-07-09 18:12:44 +08:00
"github.blog",
"api.github.com",
"raw.githubusercontent.com",
"user-images.githubusercontent.com",
"favicons.githubusercontent.com",
"avatars5.githubusercontent.com",
"avatars4.githubusercontent.com",
"avatars3.githubusercontent.com",
"avatars2.githubusercontent.com",
"avatars1.githubusercontent.com",
"avatars0.githubusercontent.com",
"avatars.githubusercontent.com",
"codeload.github.com",
"github-cloud.s3.amazonaws.com",
"github-com.s3.amazonaws.com",
"github-production-release-asset-2e65be.s3.amazonaws.com",
"github-production-user-asset-6210df.s3.amazonaws.com",
"github-production-repository-file-5c1aeb.s3.amazonaws.com",
"githubstatus.com",
"github.community",
2021-10-08 12:15:09 +08:00
"github.dev",
"media.githubusercontent.com"]
IPADDRESS_PREFIX = ".ipaddress.com"
HOSTS_TEMPLATE = """# GitHub520 Host Start
2021-03-05 11:52:05 +08:00
{content}
# Update time: {update_time}
2021-07-11 12:20:50 +08:00
# Update url: https://raw.hellogithub.com/hosts
2021-07-09 18:12:44 +08:00
# Star me: https://github.com/521xueweihan/GitHub520
# GitHub520 Host End\n"""
2021-03-05 11:52:05 +08:00
def write_file(hosts_content: str, update_time: str):
output_doc_file_path = os.path.join(os.path.dirname(__file__), "README.md")
template_path = os.path.join(os.path.dirname(__file__),
"README_template.md")
2021-05-31 13:59:34 +08:00
write_host_file(hosts_content)
with open(output_doc_file_path, "r") as old_readme_fb:
old_content = old_readme_fb.read()
old_hosts = old_content.split("```bash")[1].split("```")[0].strip()
2021-03-05 11:52:05 +08:00
old_hosts = old_hosts.split("# Update time:")[0]
if old_hosts == hosts_content:
print("host not change")
return False
with open(template_path, "r") as temp_fb:
template_str = temp_fb.read()
hosts_content = template_str.format(hosts_str=hosts_content,
update_time=update_time)
with open(output_doc_file_path, "w") as output_fb:
output_fb.write(hosts_content)
return True
2021-05-31 13:59:34 +08:00
def write_host_file(hosts_content: str):
output_file_path = os.path.join(os.path.dirname(__file__), 'hosts')
with open(output_file_path, "w") as output_fb:
output_fb.write(hosts_content)
2021-07-11 12:20:50 +08:00
def write_json_file(hosts_list: list):
2021-05-31 13:59:34 +08:00
output_file_path = os.path.join(os.path.dirname(__file__), 'hosts.json')
with open(output_file_path, "w") as output_fb:
2021-07-11 12:20:50 +08:00
json.dump(hosts_list, output_fb)
def make_ipaddress_url(raw_url: str):
"""
生成 ipaddress 对应的 url
:param raw_url: 原始 url
:return: ipaddress url
"""
dot_count = raw_url.count(".")
if dot_count > 1:
raw_url_list = raw_url.split(".")
tmp_url = raw_url_list[-2] + "." + raw_url_list[-1]
ipaddress_url = "https://" + tmp_url + IPADDRESS_PREFIX + "/" + raw_url
else:
ipaddress_url = "https://" + raw_url + IPADDRESS_PREFIX
return ipaddress_url
@retry(tries=3)
def get_ip(session: requests.session, raw_url: str):
url = make_ipaddress_url(raw_url)
try:
rs = session.get(url, timeout=5)
pattern = r"\b(?:[0-9]{1,3}\.){3}[0-9]{1,3}\b"
ip_list = re.findall(pattern, rs.text)
ip_counter_obj = Counter(ip_list).most_common(1)
if ip_counter_obj:
return raw_url, ip_counter_obj[0][0]
raise Exception("ip address empty")
except Exception as ex:
print("get: {}, error: {}".format(url, ex))
raise Exception
@retry(tries=3)
def update_gitee_gist(session: requests.session, host_content):
gitee_token = os.getenv("gitee_token")
gitee_gist_id = os.getenv("gitee_gist_id")
gist_file_name = os.getenv("gitee_gist_file_name")
url = "https://gitee.com/api/v5/gists/{}".format(gitee_gist_id)
headers = {
"Content-Type": "application/json"}
data = {
"access_token": gitee_token,
"files": {gist_file_name: {"content": host_content}},
"public": "true"}
json_data = json.dumps(data)
try:
response = session.patch(url, data=json_data, headers=headers,
timeout=20)
if response.status_code == 200:
print("update gitee gist success")
else:
print("update gitee gist fail: {} {}".format(response.status_code,
response.content))
except Exception as e:
traceback.print_exc(e)
raise Exception(e)
def main():
session = requests.session()
content = ""
2021-07-11 12:20:50 +08:00
content_list = []
for raw_url in RAW_URL:
try:
host_name, ip = get_ip(session, raw_url)
content += ip.ljust(30) + host_name + "\n"
2021-07-11 12:20:50 +08:00
content_list.append((ip, host_name,))
except Exception:
continue
if not content:
return
2021-03-05 11:52:05 +08:00
update_time = datetime.utcnow().astimezone(
timezone(timedelta(hours=8))).replace(microsecond=0).isoformat()
hosts_content = HOSTS_TEMPLATE.format(content=content, update_time=update_time)
has_change = write_file(hosts_content, update_time)
if has_change:
2021-07-11 12:20:50 +08:00
write_json_file(content_list)
print(hosts_content)
if __name__ == '__main__':
main()