搜尋

首頁  >  問答  >  主體

python - Scrapy的使用,如何請求新的URL,並回呼指定的函數?

關於Python3下Scrapy的使用問題

import re
import scrapy
from bs4 import BeautifulSoup
from scrapy.http import Request
from ..items import ZhibobaItem
import json
import lxml.html
import requests
import json


class Myspider(scrapy.Spider):
    name = 'zhiboba'
    allowed_domains = ['zhibo8.cc']
    json_url = 'https://bifen4pc.qiumibao.com/json/list.htm?85591'
    bash_url = 'https://www.zhibo8.cc/'

    def start_requests(self):
        yield Request(self.bash_url, self.parse_index)

    def parse_index(self, response):
        print("enter the parse_index")
        print(self.bash_url)
        ps = BeautifulSoup(response.text, 'lxml').find_all(label=re.compile("足球"))
        item = ZhibobaItem()
        for single_p in ps:
            item['label'] = single_p.get('label')
            item['sdate'] = single_p.get('data-time')
            item['linkurl'] = self.bash_url + single_p.find('a')['href']
            home_team = single_p.get_text().split()[2]
            item['home_team'] = home_team
            visit_team = single_p.get_text().split()[4]
            item['visit_team'] = visit_team
            print("quit the parse_index")
            print(self.json_url)
            yield Request(self.json_url, callback=self.get_score, meta={'home_team': home_team,
                                                                        'visit_team': visit_team
                                                                        })
    def get_score(self, response):
        print("enter the get_score")
        json_url = self.json_url
        wbdata = response.get(json_url).text
        data = json.loads(wbdata)
        news = data['list']
        print(wbdata)
        print("quit the get_score")

當我執行上述程式碼時,無法成功的呼叫json_url以及對應的回應函數get_score,哪裡不對?

仅有的幸福仅有的幸福2710 天前923

全部回覆(1)我來回復

  • 迷茫

    迷茫2017-06-28 09:27:02

    試著修改allow_domains = []

    回覆
    0
  • 取消回覆