python - Scrapy的使用,如何请求新的URL,并回调指定的函数?
问题描述
关于Python3下Scrapy的使用问题
import reimport scrapyfrom bs4 import BeautifulSoupfrom scrapy.http import Requestfrom ..items import ZhibobaItemimport jsonimport lxml.htmlimport requestsimport jsonclass Myspider(scrapy.Spider): name = ’zhiboba’ allowed_domains = [’zhibo8.cc’] json_url = ’https://bifen4pc.qiumibao.com/json/list.htm?85591’ bash_url = ’https://www.zhibo8.cc/’ def start_requests(self):yield Request(self.bash_url, self.parse_index) def parse_index(self, response):print('enter the parse_index')print(self.bash_url)ps = BeautifulSoup(response.text, ’lxml’).find_all(label=re.compile('足球'))item = ZhibobaItem()for single_p in ps: item[’label’] = single_p.get(’label’) item[’sdate’] = single_p.get(’data-time’) item[’linkurl’] = self.bash_url + single_p.find(’a’)[’href’] home_team = single_p.get_text().split()[2] item[’home_team’] = home_team visit_team = single_p.get_text().split()[4] item[’visit_team’] = visit_team print('quit the parse_index') print(self.json_url) yield Request(self.json_url, callback=self.get_score, meta={’home_team’: home_team, ’visit_team’: visit_team }) def get_score(self, response):print('enter the get_score')json_url = self.json_urlwbdata = response.get(json_url).textdata = json.loads(wbdata)news = data[’list’]print(wbdata)print('quit the get_score')
当我执行上述代码时,无法成功的调用json_url以及相应的响应函数get_score,哪里不对?
问题解答
回答1:试着修改allow_domains = []。