|
- # -*- coding: utf-8 -*-" Y- X4 k- c5 v1 j; M5 I
- # @Time : 2020/1/28 21:12
, f- [/ q3 M& o3 P$ G5 U - # @Author : SunriseCai
/ G! o# b$ U" g; Q7 o# e - # @File : YXLMSpider.py
5 N! _+ C; D) H6 V' v) S/ P - # @Software: PyCharm+ f; e e3 S# `) y
- import os- o0 r* Y# A a- q( F5 ^+ Z" _
- import json& z" A# h7 a. Y1 u
- import time$ _/ A5 U! {2 `/ @* D6 `
- import requests; A- m6 }+ X# @0 w& T B
& D1 G) e' y: i; e9 v- """英雄联盟皮肤爬虫程序"""
& t4 R5 }: a+ L, \ - 4 ^9 U* f9 o' n8 u1 k9 y, K
* @2 X! O6 \' ^- C$ N3 L& d5 N- class YingXLMSpider(object):
$ r ^8 n4 w! K; |. h) S2 k) l [ - def __init__(self):
& C$ F4 B' [& D( T* a - self.onePageUrl = 'https://game.gtimg.cn/images/lol/act/img/js/heroList/hero_list.js'
4 B5 K7 z: \, }# P! W. r - self.skinUrl = 'https://game.gtimg.cn/images/lol/act/img/js/hero/{}.js'! {# O% Q) B/ d$ S# F/ Y& U
- self.headers = {
6 k8 M; e, Q+ G# i+ {8 h @ - 'User-Agent': 'Mozilla/5.0' E8 k3 E& ^5 X! ^2 H. Q( E
- }
+ P5 O" z( u( g; d% ?- g. j - 5 ]1 L: k% M% ~! d, L
- def get_heroList(self):
% ]8 u N. z( ]9 a - """
; d, d9 K" \8 |, e, z: H/ C3 C - :return: 获取英雄的heroId,和英雄名称6 f# c2 @; @0 P# _
- """/ O( X5 ~ ]% r/ r' I+ y0 @5 o& r% W
- res = requests.get(url=self.onePageUrl, headers=self.headers)1 Y( e A. e# ^2 V" h8 H! T
- if res.status_code == 200:: {) ~1 u2 k P+ o u6 D
- data = json.loads(res.text)4 o1 k( L, T& }5 u" A/ w
- for item in data['hero']:6 N5 r/ ?6 f, j) r0 ~6 m; q
- Id = item['heroId']; B3 _8 {( D$ |/ {
- title = item['title']& g9 H1 v* ^7 \2 G& m( g
- self.get_skin_url(Id, title)2 s9 A3 f& Q0 V( b: q" \- l
- else:
5 z' V4 y0 P" U - print('your code is fail')& H% t4 x: }5 U' b
- 6 d/ M: F: H9 c4 {
- def get_skin_url(self, Id, folder):. v( Q2 e" p3 M5 m$ @
- """
( ?: o% P- o3 t7 B - :param Id: 英雄ID,用于拼接url
9 N6 q" k; L* G6 Z2 ]' q1 D. ^ - :param folder: 以英雄名称命名的文件夹
3 b1 Y: C8 u `0 g& ~) _+ M - :return:
K5 ~2 Y {; @- D - """6 r: n' L! f" u" ` N) _8 B
- url = self.skinUrl.format(Id)
0 Q+ }" S( k x9 q# k& |' L i$ c* X - res = requests.get(url, headers=self.headers)
0 W3 u5 s- H. j - if res.status_code == 200:
$ ?* X8 ~& w, m' D - data = json.loads(res.text)
/ U( f* |# @1 a9 }$ N - for item in data['skins']:
: e+ `5 w- [2 U. A - url = item['mainImg']9 u$ m$ _& u% Q; g3 |
- name = item['name'].replace('/', '')) i# `7 r/ V) M: P- L
- self.download_picture(url, name, folder). e8 H& W; h' r7 K6 k. S
4 {& b) u2 E/ z2 ]1 t7 I f" l5 n- else:! k( ]( q- c! m. T
- print('your code is fail')' b; {2 o- |5 X. P+ o7 q
- + ]) z$ b! ^5 L
- def download_picture(self, url, name, folder):
( d2 B( P- V9 c/ l7 ~/ U - """
3 \7 F: e9 f1 T0 c9 C% \3 g4 ^) Q" T1 G - :param url: 皮肤地址
6 @% b R, S; _- ] - :param name: 皮肤名称
" r# p5 w7 p% l+ ~6 V g- ]# |: o4 b - :param folder: 文件夹
9 ~% }: Z; B# Y2 V5 [ - :return:
1 p! D A/ q1 Y' x9 M& T& Q - """: H: y$ k ~8 f* ^7 Z5 y
- # 判断如果文件夹不存在则创建
) o8 O; g/ p( `; y% L - if not os.path.exists(folder):1 T. i3 J: `8 t( l* u8 W% `: T3 p. M# v
- os.makedirs(folder)5 K( Y8 j0 r; l1 ]: p/ F
- # 判断url不为空和 图片不存在本地则下载(主要用于断点重连)" U# @& V$ o" Z
- if not url == '' and not os.path.exists('%s/%s.jpg' % (folder, name)):
0 `( q# F& R. R! k- R, } - time.sleep(1)% W9 _. ]$ N2 x' g( t! k
- res = requests.get(url, headers=self.headers)3 y' d* e5 h; |7 |
- with open('%s/%s.jpg' % (folder, name), 'wb') as f:
7 Q p+ _! q$ y# W - f.write(res.content)5 N9 v3 Y/ A l$ y$ h
- print('%s.jpg' % name, '下载成功')
6 I9 o8 X8 R6 c; y( n2 c - f.close()
7 b2 k0 @; B% L, \+ o
7 I7 ^3 @& P* K- def main(self):2 R( \, U$ W7 X0 L1 e
- self.get_heroList()7 W9 ^+ M- [4 v7 R
1 z! I) K) q7 _& g6 T! \; P- - a1 ?) |7 S- P5 F
- if __name__ == '__main__':
% l" c$ |/ I4 t. D" F( E - spider = YingXLMSpider()
- _0 {- F- f9 [4 @: b% Y - spider.main()- d2 R* P# D, m0 Q9 x
- ' h: _/ W7 u5 e$ K$ U
复制代码 |
|