From 5b986829fefb09d27eff4b78be0dc8dc33d2518e Mon Sep 17 00:00:00 2001 From: sqzhang <3257179914@qq.com> Date: Fri, 18 Mar 2022 21:52:29 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BF=AE=E5=A4=8Dpa.per=E6=8E=A5=E5=8F=A3?= =?UTF-8?q?=E5=87=BA=E9=94=99=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- 5.爬取5K分辨率超清唯美壁纸/README.MD | 50 +++---- .../crawler_5K_wallpaper.py | 113 -------------- 5.爬取5K分辨率超清唯美壁纸/main.py | 138 ++++++++++++++++++ 5.爬取5K分辨率超清唯美壁纸/requirement.txt | 2 + README.MD | 29 ++-- 5 files changed, 178 insertions(+), 154 deletions(-) delete mode 100644 5.爬取5K分辨率超清唯美壁纸/crawler_5K_wallpaper.py create mode 100644 5.爬取5K分辨率超清唯美壁纸/main.py create mode 100644 5.爬取5K分辨率超清唯美壁纸/requirement.txt diff --git a/5.爬取5K分辨率超清唯美壁纸/README.MD b/5.爬取5K分辨率超清唯美壁纸/README.MD index 2a70c51..41aacf0 100644 --- a/5.爬取5K分辨率超清唯美壁纸/README.MD +++ b/5.爬取5K分辨率超清唯美壁纸/README.MD @@ -6,17 +6,34 @@
-
- 这里有一款Mac下的小清新壁纸神器[Pap.er][3],可能是Mac下最好的壁纸软件,**自带5K超清分辨率壁纸**,富有多种类型壁纸,当我们想在Windows或者Linux下使用的时候,就可以考虑将**5K超清分辨率壁纸**爬取下来。 +## 功能截图 + +![](example1.png) + +![](example2.gif) + + +## 如何运行 + +```bash +# 跳转到当前目录 +cd 目录名 +# 先卸载依赖库 +pip uninstall -y -r requirement.txt +# 再重新安装依赖库 +pip install -r requirement.txt -i https://pypi.tuna.tsinghua.edu.cn/simple +# 开始运行 +python main.py +``` + + ## 编写思路 -首先,打开Charles软件,进行抓包。打开[Pap.er][3],开始抓包。(由于是Mac系统下的APP,所以非Mac系统的朋友可以直接看抓包结果) - -抓包分析结果如下: +首先,打开Charles软件,进行抓包。打开[Pap.er][3],开始抓包。抓包分析结果如下: | 类型 | 地址 | | ---- | ------------------------------------------------------------ | @@ -141,27 +158,6 @@ if __name__ == '__main__': -## 使用教程 - -1. 确保以下库均已安装: - -```python -# 如果没有安装,请使用pip install module安装 -import requests -import filetype -import os -import json -from contextlib import closing -``` - - - -## 演示图片 - -![](example1.png) - -![](example2.gif) - ## 完整源代码 项目源代码在[GitHub仓库][1] @@ -170,7 +166,7 @@ from contextlib import closing -# License +## License [The MIT License (MIT)][2] diff --git a/5.爬取5K分辨率超清唯美壁纸/crawler_5K_wallpaper.py b/5.爬取5K分辨率超清唯美壁纸/crawler_5K_wallpaper.py deleted file mode 100644 index d2aece8..0000000 --- a/5.爬取5K分辨率超清唯美壁纸/crawler_5K_wallpaper.py +++ /dev/null @@ -1,113 +0,0 @@ -# -*- coding:utf-8 -*- - -from requests import get -from filetype import guess -from os import rename -from os import makedirs -from os.path import exists -from json import loads -from contextlib import closing - - -# 文件下载器 -def Down_load(file_url, file_full_name, now_photo_count, all_photo_count): - headers = {"User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36"} - - # 开始下载图片 - with closing(get(file_url, headers=headers, stream=True)) as response: - chunk_size = 1024 # 单次请求最大值 - content_size = int(response.headers['content-length']) # 文件总大小 - data_count = 0 # 当前已传输的大小 - with open(file_full_name, "wb") as file: - for data in response.iter_content(chunk_size=chunk_size): - file.write(data) - done_block = int((data_count / content_size) * 50) - data_count = data_count + len(data) - now_jd = (data_count / content_size) * 100 - print("\r %s:[%s%s] %d%% %d/%d" % (file_full_name, done_block * '█', ' ' * (50 - 1 - done_block), now_jd, now_photo_count, all_photo_count), end=" ") - - # 下载完图片后获取图片扩展名,并为其增加扩展名 - file_type = guess(file_full_name) - rename(file_full_name, file_full_name + '.' + file_type.extension) - - - -# 爬取不同类型图片 -def crawler_photo(type_id, photo_count): - - # 最新 1, 最热 2, 女生 3, 星空 4 - if(type_id == 1): - url = 'https://service.paper.meiyuan.in/api/v2/columns/flow/5c68ffb9463b7fbfe72b0db0?page=1&per_page=' + str(photo_count) - elif(type_id == 2): - url = 'https://service.paper.meiyuan.in/api/v2/columns/flow/5c69251c9b1c011c41bb97be?page=1&per_page=' + str(photo_count) - elif(type_id == 3): - url = 'https://service.paper.meiyuan.in/api/v2/columns/flow/5c81087e6aee28c541eefc26?page=1&per_page=' + str(photo_count) - elif(type_id == 4): - url = 'https://service.paper.meiyuan.in/api/v2/columns/flow/5c81f64c96fad8fe211f5367?page=1&per_page=' + str(photo_count) - - # 获取图片列表数据 - headers = {"User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36"} - respond = get(url, headers=headers) - photo_data = loads(respond.content) - - # 已经下载的图片张数 - now_photo_count = 1 - - # 所有图片张数 - all_photo_count = len(photo_data) - - # 开始下载并保存5K分辨率壁纸 - for photo in photo_data: - - # 创建一个文件夹存放我们下载的图片 - if not exists('./' + str(type_id)): - makedirs('./' + str(type_id)) - - # 准备下载的图片链接 - file_url = photo['urls']['raw'] - - # 准备下载的图片名称,不包含扩展名 - file_name_only = file_url.split('/') - file_name_only = file_name_only[len(file_name_only) -1] - - # 准备保存到本地的完整路径 - file_full_name = './' + str(type_id) + '/' + file_name_only - - # 开始下载图片 - Down_load(file_url, file_full_name, now_photo_count, all_photo_count) - now_photo_count = now_photo_count + 1 - - - -if __name__ == '__main__': - - # 最新 1, 最热 2, 女生 3, 星空 4 - # 爬取类型为3的图片(女生),一共准备爬取20000张 - wall_paper_id = 1 - wall_paper_count = 10 - while(True): - - # 换行符 - print('\n\n') - - # 选择壁纸类型 - wall_paper_id = input("壁纸类型:最新壁纸 1, 最热壁纸 2, 女生壁纸 3, 星空壁纸 4\n请输入编号以便选择5K超清壁纸类型:") - # 判断输入是否正确 - while(wall_paper_id != str(1) and wall_paper_id != str(2) and wall_paper_id != str(3) and wall_paper_id != str(4)): - wall_paper_id = input("壁纸类型:最新壁纸 1, 最热壁纸 2, 女生壁纸 3, 星空壁纸 4\n请输入编号以便选择5K超清壁纸类型:") - - - # 选择要下载的壁纸数量 - wall_paper_count = input("请输入要下载的5K超清壁纸的数量:") - # 判断输入是否正确 - while(int(wall_paper_count) <= 0): - wall_paper_count = input("请输入要下载的5K超清壁纸的数量:") - - - # 开始爬取5K高清壁纸 - print("正在下载5K超清壁纸,请稍等……") - crawler_photo(int(wall_paper_id), int(wall_paper_count)) - print('\n下载5K高清壁纸成功!') - - - diff --git a/5.爬取5K分辨率超清唯美壁纸/main.py b/5.爬取5K分辨率超清唯美壁纸/main.py new file mode 100644 index 0000000..cca48a9 --- /dev/null +++ b/5.爬取5K分辨率超清唯美壁纸/main.py @@ -0,0 +1,138 @@ +# -*- coding:utf-8 -*- + +import requests +from filetype import guess +from os import rename +from os import makedirs +from os.path import exists +import json +from contextlib import closing + +headers = {"User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36"} + +def get_photo_type(): + """ + 获取可以爬取的壁纸类型 + :return: + """ + url = "https://service.paper.meiyuan.in/api/v2/columns" + res = requests.get(url=url, headers=headers, verify=False) + res_json = json.loads(res.text) + return res_json + + + +def down_load(file_url, file_full_name, now_photo_count, all_photo_count): + """ + 文件下载器 + :param file_url: + :param file_full_name: + :param now_photo_count: + :param all_photo_count: + :return: + """ + + # 开始下载图片 + with closing(requests.get(file_url, headers=headers, stream=True)) as response: + chunk_size = 1024 # 单次请求最大值 + content_size = int(response.headers['content-length']) # 文件总大小 + data_count = 0 # 当前已传输的大小 + with open(file_full_name, "wb") as file: + for data in response.iter_content(chunk_size=chunk_size): + file.write(data) + done_block = int((data_count / content_size) * 50) + data_count = data_count + len(data) + now_jd = (data_count / content_size) * 100 + print("\r %s:[%s%s] %d%% %d/%d" % (file_full_name, done_block * '█', ' ' * (50 - 1 - done_block), now_jd, now_photo_count, all_photo_count), end=" ") + + # 下载完图片后获取图片扩展名,并为其增加扩展名 + file_type = guess(file_full_name) + rename(file_full_name, file_full_name + '.' + file_type.extension) + +def crawler_photo(type_id, photo_count): + """ + 爬取不同类型图片 + :param type_id: + :param photo_count: + :return: + """ + + url = 'https://service.paper.meiyuan.in/api/v2/columns/flow/{}?page=1&per_page={}'.format(type_id, photo_count) + + # 获取图片列表数据 + respond = requests.get(url, headers=headers, verify=False) + photo_data = json.loads(respond.content) + + # 已经下载的图片张数 + now_photo_count = 1 + + # 所有图片张数 + all_photo_count = len(photo_data) + + # 开始下载并保存5K分辨率壁纸 + for photo in photo_data: + + # 创建一个文件夹存放下载的图片 + if not exists('./' + str(type_id)): + makedirs('./' + str(type_id)) + + # 准备下载的图片链接 + file_url = photo['urls']['raw'] + + # 准备下载的图片名称,不包含扩展名 + file_name_only = file_url.split('/') + file_name_only = file_name_only[len(file_name_only) - 1] + + # 准备保存到本地的完整路径 + file_full_name = './' + str(type_id) + '/' + file_name_only + + # 开始下载图片 + down_load(file_url, file_full_name, now_photo_count, all_photo_count) + now_photo_count = now_photo_count + 1 + + + +if __name__ == '__main__': + + # 获取可以爬取的壁纸类型 + res_json = get_photo_type() + + # 壁纸类型 + wall_paper_id = 0 + + # 壁纸数量 + wall_paper_count = 10 + + info_str = "壁纸类型:" + for index, p_type in enumerate(res_json): + info_str = info_str + "{} {}".format(index, p_type['langs']['zh-Hans-CN']) + if index != len(res_json) - 1: + info_str = info_str + ", " + + # 选择壁纸类型,并判断输入是否正确 + while True: + wall_paper_id = input(info_str + "\n请输入编号以便选择5K超清壁纸类型:") + wall_paper_id = wall_paper_id.strip() + wall_paper_id = int(wall_paper_id) + if wall_paper_id >= len(res_json) or wall_paper_id < 0: + continue + else: + break + + # 选择壁纸数量,并判断输入是否正确 + while True: + wall_paper_count = input("请输入要下载的5K超清壁纸的数量:") + wall_paper_count = wall_paper_count.strip() + wall_paper_count = int(wall_paper_count) + if wall_paper_count <= 0: + continue + else: + break + + # 开始爬取5K高清壁纸 + print("正在下载5K超清壁纸,请稍等……") + crawler_photo(res_json[wall_paper_id]['_id'], wall_paper_count) + print('\n下载5K高清壁纸完毕,壁纸位于当前的{}目录。'.format(res_json[wall_paper_id]['_id'])) + + + diff --git a/5.爬取5K分辨率超清唯美壁纸/requirement.txt b/5.爬取5K分辨率超清唯美壁纸/requirement.txt new file mode 100644 index 0000000..e153a6a --- /dev/null +++ b/5.爬取5K分辨率超清唯美壁纸/requirement.txt @@ -0,0 +1,2 @@ +requests +filetype \ No newline at end of file diff --git a/README.MD b/README.MD index a7e4762..4e5ecca 100644 --- a/README.MD +++ b/README.MD @@ -190,25 +190,26 @@ weibo_password = "改成你的微博密码" 这里有一款Mac下的小清新壁纸神器[Pap.er][8],可能是Mac下最好的壁纸软件,**自带5K超清分辨率壁纸**,富有多种类型壁纸,当我们想在Windows或者Linux下使用的时候,就可以考虑将**5K超清分辨率壁纸**爬取下来。 -### 使用教程 - -1. 确保以下库均已安装: - -```python -# 如果没有安装,请使用pip install module安装 -import requests -import filetype -import os -import json -from contextlib import closing -``` - -### 演示图片 +### 功能截图 ![](5.爬取5K分辨率超清唯美壁纸/example1.png) ![](5.爬取5K分辨率超清唯美壁纸/example2.gif) +### 如何运行 + +```bash +# 跳转到当前目录 +cd 目录名 +# 先卸载依赖库 +pip uninstall -y -r requirement.txt +# 再重新安装依赖库 +pip install -r requirement.txt -i https://pypi.tuna.tsinghua.edu.cn/simple +# 开始运行 +python main.py +``` + +