<code>import re
import scrapy
from bs4 import BeautifulSoup
from scrapy.http import Request
from ..items import ZhibobaItem
import json
import lxml.html
import requests
import json
class
Myspider(scrapy.Spider):
name =
'zhiboba'
allowed_domains = [
'zhibo8.cc'
]
json_url =
'https://bifen4pc.qiumibao.com/json/list.htm?85591'
bash_url =
'https://www.zhibo8.cc/'
def start_requests(self):
yield Request(self.bash_url, self.parse_index)
def parse_index(self, response):
print
(
"enter the parse_index"
)
print
(self.bash_url)
ps = BeautifulSoup(response.text,
'lxml'
).find_all(label=re.compile(
"足球"
))
item = ZhibobaItem()
for
single_p in ps:
item[
'label'
] = single_p.get(
'label'
)
item[
'sdate'
] = single_p.get(
'data-time'
)
item[
'linkurl'
] = self.bash_url + single_p.find(
'a'
)[
'href'
]
home_team = single_p.get_text().split()[2]
item[
'home_team'
] = home_team
visit_team = single_p.get_text().split()[4]
item[
'visit_team'
] = visit_team
print
(
"quit the parse_index"
)
print
(self.json_url)
yield Request(self.json_url, callback=self.get_score, meta={
'home_team'
: home_team,
'visit_team'
: visit_team
})
def get_score(self, response):
print
(
"enter the get_score"
)
json_url = self.json_url
wbdata = response.get(json_url).text
data = json.loads(wbdata)
news = data[
'list'
]
print
(wbdata)
print
(
"quit the get_score"
)</code>