2021-01-04 09:45:04 +08:00
|
|
|
#!/usr/bin/env python
|
|
|
|
# -*- coding:utf-8 -*-
|
|
|
|
#
|
|
|
|
# Author : XueWeiHan
|
|
|
|
# E-mail : 595666367@qq.com
|
|
|
|
# Date : 2020-05-19 15:27
|
|
|
|
# Desc : 获取最新的 GitHub 相关域名对应 IP
|
|
|
|
import os
|
|
|
|
import re
|
|
|
|
import json
|
|
|
|
import traceback
|
|
|
|
|
|
|
|
from datetime import datetime, timezone, timedelta
|
|
|
|
from collections import Counter
|
|
|
|
|
|
|
|
import requests
|
2021-05-31 13:59:34 +08:00
|
|
|
from retry import retry
|
2021-01-04 09:45:04 +08:00
|
|
|
|
|
|
|
RAW_URL = [
|
2021-04-04 17:01:26 +08:00
|
|
|
"alive.github.com",
|
|
|
|
"live.github.com",
|
2021-01-04 09:45:04 +08:00
|
|
|
"github.githubassets.com",
|
2021-01-31 10:44:56 +08:00
|
|
|
"central.github.com",
|
|
|
|
"desktop.githubusercontent.com",
|
2021-01-29 21:48:27 +08:00
|
|
|
"assets-cdn.github.com",
|
2021-01-04 09:45:04 +08:00
|
|
|
"camo.githubusercontent.com",
|
|
|
|
"github.map.fastly.net",
|
|
|
|
"github.global.ssl.fastly.net",
|
|
|
|
"gist.github.com",
|
|
|
|
"github.io",
|
|
|
|
"github.com",
|
2021-07-09 18:12:44 +08:00
|
|
|
"github.blog",
|
2021-01-04 09:45:04 +08:00
|
|
|
"api.github.com",
|
|
|
|
"raw.githubusercontent.com",
|
|
|
|
"user-images.githubusercontent.com",
|
|
|
|
"favicons.githubusercontent.com",
|
|
|
|
"avatars5.githubusercontent.com",
|
|
|
|
"avatars4.githubusercontent.com",
|
|
|
|
"avatars3.githubusercontent.com",
|
|
|
|
"avatars2.githubusercontent.com",
|
|
|
|
"avatars1.githubusercontent.com",
|
|
|
|
"avatars0.githubusercontent.com",
|
2021-01-24 17:40:46 +08:00
|
|
|
"avatars.githubusercontent.com",
|
2021-01-04 09:45:04 +08:00
|
|
|
"codeload.github.com",
|
|
|
|
"github-cloud.s3.amazonaws.com",
|
|
|
|
"github-com.s3.amazonaws.com",
|
|
|
|
"github-production-release-asset-2e65be.s3.amazonaws.com",
|
|
|
|
"github-production-user-asset-6210df.s3.amazonaws.com",
|
2021-01-06 10:40:18 +08:00
|
|
|
"github-production-repository-file-5c1aeb.s3.amazonaws.com",
|
2021-01-06 21:41:29 +08:00
|
|
|
"githubstatus.com",
|
2021-02-06 23:29:09 +08:00
|
|
|
"github.community",
|
2021-10-08 12:15:09 +08:00
|
|
|
"github.dev",
|
2022-02-21 12:45:07 +08:00
|
|
|
"collector.github.com",
|
|
|
|
"pipelines.actions.githubusercontent.com",
|
2021-12-22 16:23:58 +08:00
|
|
|
"media.githubusercontent.com",
|
|
|
|
"cloud.githubusercontent.com",
|
2022-06-28 15:41:08 +08:00
|
|
|
"objects.githubusercontent.com",
|
|
|
|
"vscode.dev"]
|
2021-01-04 09:45:04 +08:00
|
|
|
|
|
|
|
IPADDRESS_PREFIX = ".ipaddress.com"
|
|
|
|
|
|
|
|
HOSTS_TEMPLATE = """# GitHub520 Host Start
|
2021-03-05 11:52:05 +08:00
|
|
|
{content}
|
|
|
|
|
|
|
|
# Update time: {update_time}
|
2021-07-11 12:20:50 +08:00
|
|
|
# Update url: https://raw.hellogithub.com/hosts
|
2021-07-09 18:12:44 +08:00
|
|
|
# Star me: https://github.com/521xueweihan/GitHub520
|
2021-01-04 09:45:04 +08:00
|
|
|
# GitHub520 Host End\n"""
|
|
|
|
|
|
|
|
|
2021-03-05 11:52:05 +08:00
|
|
|
def write_file(hosts_content: str, update_time: str):
|
2021-01-04 09:45:04 +08:00
|
|
|
output_doc_file_path = os.path.join(os.path.dirname(__file__), "README.md")
|
|
|
|
template_path = os.path.join(os.path.dirname(__file__),
|
|
|
|
"README_template.md")
|
2021-05-31 13:59:34 +08:00
|
|
|
write_host_file(hosts_content)
|
2021-11-02 17:04:37 +08:00
|
|
|
if os.path.exists(output_doc_file_path):
|
|
|
|
with open(output_doc_file_path, "r") as old_readme_fb:
|
|
|
|
old_content = old_readme_fb.read()
|
|
|
|
old_hosts = old_content.split("```bash")[1].split("```")[0].strip()
|
|
|
|
old_hosts = old_hosts.split("# Update time:")[0].strip()
|
|
|
|
hosts_content_hosts = hosts_content.split("# Update time:")[0].strip()
|
|
|
|
if old_hosts == hosts_content_hosts:
|
|
|
|
print("host not change")
|
|
|
|
return False
|
2021-01-04 09:45:04 +08:00
|
|
|
|
|
|
|
with open(template_path, "r") as temp_fb:
|
|
|
|
template_str = temp_fb.read()
|
|
|
|
hosts_content = template_str.format(hosts_str=hosts_content,
|
|
|
|
update_time=update_time)
|
|
|
|
with open(output_doc_file_path, "w") as output_fb:
|
|
|
|
output_fb.write(hosts_content)
|
|
|
|
return True
|
|
|
|
|
|
|
|
|
2021-05-31 13:59:34 +08:00
|
|
|
def write_host_file(hosts_content: str):
|
|
|
|
output_file_path = os.path.join(os.path.dirname(__file__), 'hosts')
|
|
|
|
with open(output_file_path, "w") as output_fb:
|
|
|
|
output_fb.write(hosts_content)
|
|
|
|
|
|
|
|
|
2021-07-11 12:20:50 +08:00
|
|
|
def write_json_file(hosts_list: list):
|
2021-05-31 13:59:34 +08:00
|
|
|
output_file_path = os.path.join(os.path.dirname(__file__), 'hosts.json')
|
|
|
|
with open(output_file_path, "w") as output_fb:
|
2021-07-11 12:20:50 +08:00
|
|
|
json.dump(hosts_list, output_fb)
|
2021-01-04 09:45:04 +08:00
|
|
|
|
|
|
|
|
|
|
|
def make_ipaddress_url(raw_url: str):
|
|
|
|
"""
|
|
|
|
生成 ipaddress 对应的 url
|
|
|
|
:param raw_url: 原始 url
|
|
|
|
:return: ipaddress 的 url
|
|
|
|
"""
|
2022-11-22 10:53:04 +08:00
|
|
|
return f'https://www.ipaddress.com/site/{raw_url}'
|
2021-01-04 09:45:04 +08:00
|
|
|
|
|
|
|
|
|
|
|
@retry(tries=3)
|
|
|
|
def get_ip(session: requests.session, raw_url: str):
|
|
|
|
url = make_ipaddress_url(raw_url)
|
|
|
|
try:
|
2022-11-22 10:53:04 +08:00
|
|
|
headers = {
|
|
|
|
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7)'
|
|
|
|
' AppleWebKit/537.36 (KHTML, like Gecko) Chrome/1'
|
|
|
|
'06.0.0.0 Safari/537.36'}
|
|
|
|
rs = session.get(url, headers=headers, timeout=5)
|
2021-01-04 09:45:04 +08:00
|
|
|
pattern = r"\b(?:[0-9]{1,3}\.){3}[0-9]{1,3}\b"
|
|
|
|
ip_list = re.findall(pattern, rs.text)
|
|
|
|
ip_counter_obj = Counter(ip_list).most_common(1)
|
|
|
|
if ip_counter_obj:
|
|
|
|
return raw_url, ip_counter_obj[0][0]
|
|
|
|
raise Exception("ip address empty")
|
|
|
|
except Exception as ex:
|
|
|
|
print("get: {}, error: {}".format(url, ex))
|
|
|
|
raise Exception
|
|
|
|
|
|
|
|
|
|
|
|
def main():
|
|
|
|
session = requests.session()
|
|
|
|
content = ""
|
2021-07-11 12:20:50 +08:00
|
|
|
content_list = []
|
2021-01-04 09:45:04 +08:00
|
|
|
for raw_url in RAW_URL:
|
|
|
|
try:
|
|
|
|
host_name, ip = get_ip(session, raw_url)
|
|
|
|
content += ip.ljust(30) + host_name + "\n"
|
2021-07-11 12:20:50 +08:00
|
|
|
content_list.append((ip, host_name,))
|
2022-11-22 10:53:04 +08:00
|
|
|
except Exception as e:
|
|
|
|
traceback.print_exc(e)
|
2021-01-04 09:45:04 +08:00
|
|
|
continue
|
|
|
|
|
|
|
|
if not content:
|
|
|
|
return
|
2021-03-05 11:52:05 +08:00
|
|
|
update_time = datetime.utcnow().astimezone(
|
|
|
|
timezone(timedelta(hours=8))).replace(microsecond=0).isoformat()
|
|
|
|
hosts_content = HOSTS_TEMPLATE.format(content=content, update_time=update_time)
|
|
|
|
has_change = write_file(hosts_content, update_time)
|
2021-01-04 09:45:04 +08:00
|
|
|
if has_change:
|
2021-07-11 12:20:50 +08:00
|
|
|
write_json_file(content_list)
|
2021-01-04 09:45:04 +08:00
|
|
|
print(hosts_content)
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
main()
|