search

Home  >  Q&A  >  body text

python - scrapy crawl 为什么无法跳转到下一个链接?

  1. 代码来自《Learning Scrapy》, 还在学习当中,这段代码基本是抄的,想运行一下,看看效果,无奈没有达到书中所描述的效果, 就是并没有跳转到下一页进行爬虫,帮忙看一下.

  2. 代码:

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

<code>import datetime

import urlparse

import socket

 

from scrapy.loader.processors import MapCompose, Join

from scrapy.linkextractors import LinkExtractor

from scrapy.spiders import CrawlSpider, Rule

from scrapy.loader import ItemLoader

 

from ..items import PropertiesItem

 

 

class EasySpider(CrawlSpider):

    name = 'easy'

    allowed_domains = ['http://192.168.99.100:32768/']

    start_urls = ['http://192.168.99.100:32768/properties/index_00000.html',]

 

    # Rules for horizontal and vertical crawling

    rules = (

        Rule(LinkExtractor(restrict_xpaths='//*[contains(@class,"next")]')),

        Rule(LinkExtractor(restrict_xpaths='//*[@itemprop="url"]'),

             callback='parse_item')

    )

 

    def parse_item(self, response):

        """ This function parses a property page.

 

        @url http://192.168.99.100:32768/properties/property_000000.html

        @returns items 1

        @scrapes title price description address image_urls

        @scrapes url project spider server date

        """

 

        # Create the loader using the response

        l = ItemLoader(item=PropertiesItem(), response=response)

 

        # Load fields using XPath expressions

        l.add_xpath('title', '//*[@itemprop="name"][1]/text()',

                    MapCompose(unicode.strip, unicode.title))

        l.add_xpath('price', './/*[@itemprop="price"][1]/text()',

                    MapCompose(lambda i: i.replace(',', ''), float),

                    re='[,.0-9]+')

        l.add_xpath('description', '//*[@itemprop="description"][1]/text()',

                    MapCompose(unicode.strip), Join())

        l.add_xpath('address',

                    '//*[@itemtype="http://schema.org/Place"][1]/text()',

                    MapCompose(unicode.strip))

        l.add_xpath('image_urls', '//*[@itemprop="image"][1]/@src',

                    MapCompose(lambda i: urlparse.urljoin(response.url, i)))

 

        # Housekeeping fields

        l.add_value('url', response.url)

        l.add_value('project', self.settings.get('BOT_NAME'))

        l.add_value('spider', self.name)

        l.add_value('server', socket.gethostname())

        l.add_value('date', datetime.datetime.now())

 

        return l.load_item()

</code>

  1. 上面的爬虫地址是docker中的
    网页源代码为:

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

76

77

78

79

80

81

82

83

84

85

86

87

88

89

90

91

92

93

94

95

96

97

98

99

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

158

159

160

161

162

163

164

165

166

167

168

169

170

171

172

173

174

175

176

177

178

179

180

181

182

183

184

185

186

187

188

189

190

191

192

193

194

195

196

197

198

199

200

201

202

203

204

205

206

207

208

209

210

211

212

213

214

215

216

217

218

219

220

221

222

223

224

225

226

227

228

229

230

231

232

233

234

235

236

237

238

239

240

241

242

243

244

245

246

247

248

249

250

251

252

253

254

255

256

257

258

259

260

261

262

263

264

265

266

267

268

269

270

271

272

273

274

275

276

277

278

279

280

281

282

283

284

285

286

287

288

289

290

291

292

293

294

295

296

297

298

299

300

301

302

303

304

305

306

307

308

309

310

311

312

313

314

315

316

317

318

319

320

321

322

323

324

325

326

327

328

329

330

331

332

333

334

335

336

337

338

339

340

341

342

343

344

345

346

347

348

349

350

351

352

353

354

355

356

357

358

359

360

361

362

363

364

365

366

367

368

369

370

371

372

373

374

375

376

377

378

379

380

381

382

383

384

385

386

387

388

389

390

391

392

393

394

<code class="HTML">

 

<!DOCTYPE html>

<html>

<head>

<meta charset="UTF-8">

<title>Scrapy Book Tutorial Example</title>

<style type="text/css">

a {text-decoration:none;}

.listing-description { font-size: 7px; display:block; }

</style>

</head>

<body>

<h1>Page 4</h1>

<ul>

 

<li class="listing-maxi" itemscope itemtype="http://schema.org/Product">

  <a class="listing-link" itemprop="url" href="property_000120.html">

  <img itemprop="image" src="../images/i01.jpg" width="15" height="15" alt="thumb" />

  <span class="listing-title" itemprop="name">station split fee i triple</span>, price:

  <span itemprop="price">£881.0pw</span>

  location: <span itemscope itemtype="http://schema.org/Place">

  <span>Wimbledon, London</span></span>

  <span class="listing-description" itemprop="description">while value bathrooms famous ucl towards lounge both

but buses croydon heating square stoke floors maisonette local</span>

  </a>

</li>

 

<li class="listing-maxi" itemscope itemtype="http://schema.org/Product">

  <a class="listing-link" itemprop="url" href="property_000121.html">

  <img itemprop="image" src="../images/i16.jpg" width="15" height="15" alt="thumb" />

  <span class="listing-title" itemprop="name">european station area royal food</span>, price:

  <span itemprop="price">£1151.0pw</span>

  location: <span itemscope itemtype="http://schema.org/Place">

  <span>Battersea, London</span></span>

  <span class="listing-description" itemprop="description">home sw questions today

de look hampstead drawer way have bayswater

team tower hr hyde</span>

  </a>

</li>

 

<li class="listing-maxi" itemscope itemtype="http://schema.org/Product">

  <a class="listing-link" itemprop="url" href="property_000122.html">

  <img itemprop="image" src="../images/i16.jpg" width="15" height="15" alt="thumb" />

  <span class="listing-title" itemprop="name">ontario maintained rm bbc modern</span>, price:

  <span itemprop="price">£1026.0pw</span>

  location: <span itemscope itemtype="http://schema.org/Place">

  <span>Heathrow, London</span></span>

  <span class="listing-description" itemprop="description">quick views waterloo

ensuite elephant moments loads stylish pounds

hoxton notting min cosy boasting dinning

opportunity integrated mid want london</span>

  </a>

</li>

 

<li class="listing-maxi" itemscope itemtype="http://schema.org/Product">

  <a class="listing-link" itemprop="url" href="property_000123.html">

  <img itemprop="image" src="../images/i12.jpg" width="15" height="15" alt="thumb" />

  <span class="listing-title" itemprop="name">jacuzzi marble newbury also wanted</span>, price:

  <span itemprop="price">£1096.0pw</span>

  location: <span itemscope itemtype="http://schema.org/Place">

  <span>South Kensington, London</span></span>

  <span class="listing-description" itemprop="description">your bit email located park westfield</span>

  </a>

</li>

 

<li class="listing-maxi" itemscope itemtype="http://schema.org/Product">

  <a class="listing-link" itemprop="url" href="property_000124.html">

  <img itemprop="image" src="../images/i01.jpg" width="15" height="15" alt="thumb" />

  <span class="listing-title" itemprop="name">woolwich be students gants smart</span>, price:

  <span itemprop="price">£626.34pw</span>

  location: <span itemscope itemtype="http://schema.org/Place">

  <span>Seven Sisters, London</span></span>

  <span class="listing-description" itemprop="description">sainsbury self canada newington students

quality possible built corner</span>

  </a>

</li>

 

<li class="listing-maxi" itemscope itemtype="http://schema.org/Product">

  <a class="listing-link" itemprop="url" href="property_000125.html">

  <img itemprop="image" src="../images/i03.jpg" width="15" height="15" alt="thumb" />

  <span class="listing-title" itemprop="name">soon tottenham highgate excel decorated</span>, price:

  <span itemprop="price">£1161.0pw</span>

  location: <span itemscope itemtype="http://schema.org/Place">

  <span>Bayswater, London</span></span>

  <span class="listing-description" itemprop="description">charges suit live post diner doors dedicated http

british leyton property pleased

link included month sky number mod fulham breakfast carpeted</span>

  </a>

</li>

 

<li class="listing-maxi" itemscope itemtype="http://schema.org/Product">

  <a class="listing-link" itemprop="url" href="property_000126.html">

  <img itemprop="image" src="../images/i09.jpg" width="15" height="15" alt="thumb" />

  <span class="listing-title" itemprop="name">throughout holiday roehampton location ref</span>, price:

  <span itemprop="price">£833.0pw</span>

  location: <span itemscope itemtype="http://schema.org/Place">

  <span>Battersea, London</span></span>

  <span class="listing-description" itemprop="description">stop very offer really vale tube video

pretty benefits brixton others walthamstow including pw car keep

brixton questions anytime days house theatre

fulham victorian maida</span>

  </a>

</li>

 

<li class="listing-maxi" itemscope itemtype="http://schema.org/Product">

  <a class="listing-link" itemprop="url" href="property_000127.html">

  <img itemprop="image" src="../images/i14.jpg" width="15" height="15" alt="thumb" />

  <span class="listing-title" itemprop="name">delightful islington storage fulham basement</span>, price:

  <span itemprop="price">£771.88pw</span>

  location: <span itemscope itemtype="http://schema.org/Place">

  <span>Seven Sisters, London</span></span>

  <span class="listing-description" itemprop="description">self beautifully newington</span>

  </a>

</li>

 

<li class="listing-maxi" itemscope itemtype="http://schema.org/Product">

  <a class="listing-link" itemprop="url" href="property_000128.html">

  <img itemprop="image" src="../images/i04.jpg" width="15" height="15" alt="thumb" />

  <span class="listing-title" itemprop="name">clapton wifi island stay prime</span>, price:

  <span itemprop="price">£1192.0pw</span>

  location: <span itemscope itemtype="http://schema.org/Place">

  <span>Islington, London</span></span>

  <span class="listing-description" itemprop="description">now wi unlimited stores minimum nearby international center superb

benefit when inclusive manned court</span>

  </a>

</li>

 

<li class="listing-maxi" itemscope itemtype="http://schema.org/Product">

  <a class="listing-link" itemprop="url" href="property_000129.html">

  <img itemprop="image" src="../images/i13.jpg" width="15" height="15" alt="thumb" />

  <span class="listing-title" itemprop="name">young ladies space few barnet bright best smart</span>, price:

  <span itemprop="price">£1076.0pw</span>

  location: <span itemscope itemtype="http://schema.org/Place">

  <span>Hammersmith, London</span></span>

  <span class="listing-description" itemprop="description">quick paddington mobile square imperial pleased west

looking shop street

its benefits development direct minutes enjoy creative boasts

access london direct east</span>

  </a>

</li>

 

<li class="listing-maxi" itemscope itemtype="http://schema.org/Product">

  <a class="listing-link" itemprop="url" href="property_000130.html">

  <img itemprop="image" src="../images/i04.jpg" width="15" height="15" alt="thumb" />

  <span class="listing-title" itemprop="name">prime k required commercial hammersmith be hayes john</span>, price:

  <span itemprop="price">£486.85pw</span>

  location: <span itemscope itemtype="http://schema.org/Place">

  <span>Shepherds Bush, London</span></span>

  <span class="listing-description" itemprop="description">soon stoke safe open wardrobes agent apartments french letting</span>

  </a>

</li>

 

<li class="listing-maxi" itemscope itemtype="http://schema.org/Product">

  <a class="listing-link" itemprop="url" href="property_000131.html">

  <img itemprop="image" src="../images/i12.jpg" width="15" height="15" alt="thumb" />

  <span class="listing-title" itemprop="name">upton than outstanding colindale kensal shared junction stepney</span>, price:

  <span itemprop="price">£1029.0pw</span>

  location: <span itemscope itemtype="http://schema.org/Place">

  <span>West Hampstead, London</span></span>

  <span class="listing-description" itemprop="description">pw price beautifully massive

wharf available approx style than</span>

  </a>

</li>

 

<li class="listing-maxi" itemscope itemtype="http://schema.org/Product">

  <a class="listing-link" itemprop="url" href="property_000132.html">

  <img itemprop="image" src="../images/i16.jpg" width="15" height="15" alt="thumb" />

  <span class="listing-title" itemprop="name">royal non transport buses palmers last quick suite</span>, price:

  <span itemprop="price">£884.0pw</span>

  location: <span itemscope itemtype="http://schema.org/Place">

  <span>Stratford, London</span></span>

  <span class="listing-description" itemprop="description">is popular student

paddington station tv hr

popular library church</span>

  </a>

</li>

 

<li class="listing-maxi" itemscope itemtype="http://schema.org/Product">

  <a class="listing-link" itemprop="url" href="property_000133.html">

  <img itemprop="image" src="../images/i04.jpg" width="15" height="15" alt="thumb" />

  <span class="listing-title" itemprop="name">immaculate edgware guy twin warehouse angel views georgian</span>, price:

  <span itemprop="price">£797.0pw</span>

  location: <span itemscope itemtype="http://schema.org/Place">

  <span>Greenwich, London</span></span>

  <span class="listing-description" itemprop="description">mod eat at have able

send whitechapel dss

open spaces la own french following

whole cosy meeting comprises pets piccadilly town</span>

  </a>

</li>

 

<li class="listing-maxi" itemscope itemtype="http://schema.org/Product">

  <a class="listing-link" itemprop="url" href="property_000134.html">

  <img itemprop="image" src="../images/i16.jpg" width="15" height="15" alt="thumb" />

  <span class="listing-title" itemprop="name">leyton distance from suite clerkenwell uni jubilee tulse</span>, price:

  <span itemprop="price">£735.0pw</span>

  location: <span itemscope itemtype="http://schema.org/Place">

  <span>Angel, London</span></span>

  <span class="listing-description" itemprop="description">enquiries showers travel suit fast reference from</span>

  </a>

</li>

 

<li class="listing-maxi" itemscope itemtype="http://schema.org/Product">

  <a class="listing-link" itemprop="url" href="property_000135.html">

  <img itemprop="image" src="../images/i04.jpg" width="15" height="15" alt="thumb" />

  <span class="listing-title" itemprop="name">centre camden chelsea angel bridge stoke cheap required</span>, price:

  <span itemprop="price">£1151.4pw</span>

  location: <span itemscope itemtype="http://schema.org/Place">

  <span>Chelsea, London</span></span>

  <span class="listing-description" itemprop="description">services include close own these ceilings

tiled throw fridge interior</span>

  </a>

</li>

 

<li class="listing-maxi" itemscope itemtype="http://schema.org/Product">

  <a class="listing-link" itemprop="url" href="property_000136.html">

  <img itemprop="image" src="../images/i15.jpg" width="15" height="15" alt="thumb" />

  <span class="listing-title" itemprop="name">th to meg upton leytonstone kingsbury check chelsea</span>, price:

  <span itemprop="price">£785.0pw</span>

  location: <span itemscope itemtype="http://schema.org/Place">

  <span>Camden Town, London</span></span>

  <span class="listing-description" itemprop="description">within suit tenant convenient gov four friends uk

these double richmond utility main either walthamstow

stratford square stoke my us ride</span>

  </a>

</li>

 

<li class="listing-maxi" itemscope itemtype="http://schema.org/Product">

  <a class="listing-link" itemprop="url" href="property_000137.html">

  <img itemprop="image" src="../images/i16.jpg" width="15" height="15" alt="thumb" />

  <span class="listing-title" itemprop="name">rooms northern massive business kingsbury spec beautiful camden</span>, price:

  <span itemprop="price">£1017.0pw</span>

  location: <span itemscope itemtype="http://schema.org/Place">

  <span>Hyde Park, London</span></span>

  <span class="listing-description" itemprop="description">bit housemates built

business professionals to

http together rd booking imperial day

zone move metropolitan bathroom friendly come monthly at bath</span>

  </a>

</li>

 

<li class="listing-maxi" itemscope itemtype="http://schema.org/Product">

  <a class="listing-link" itemprop="url" href="property_000138.html">

  <img itemprop="image" src="../images/i16.jpg" width="15" height="15" alt="thumb" />

  <span class="listing-title" itemprop="name">se newbury clapton beauty superb within royal lots</span>, price:

  <span itemprop="price">£1052.87pw</span>

  location: <span itemscope itemtype="http://schema.org/Place">

  <span>Whitechapel, London</span></span>

  <span class="listing-description" itemprop="description">then as o follow nw only system prices fast</span>

  </a>

</li>

 

<li class="listing-maxi" itemscope itemtype="http://schema.org/Product">

  <a class="listing-link" itemprop="url" href="property_000139.html">

  <img itemprop="image" src="../images/i11.jpg" width="15" height="15" alt="thumb" />

  <span class="listing-title" itemprop="name">apartment mitcham available its soon accomodation wanstead extremely</span>, price:

  <span itemprop="price">£1248.0pw</span>

  location: <span itemscope itemtype="http://schema.org/Place">

  <span>Shoreditch, London</span></span>

  <span class="listing-description" itemprop="description">piccadilly work cafes fee

laundry benefit contract house kensington them</span>

  </a>

</li>

 

<li class="listing-maxi" itemscope itemtype="http://schema.org/Product">

  <a class="listing-link" itemprop="url" href="property_000140.html">

  <img itemprop="image" src="../images/i14.jpg" width="15" height="15" alt="thumb" />

  <span class="listing-title" itemprop="name">star charming x wick</span>, price:

  <span itemprop="price">£1290.0pw</span>

  location: <span itemscope itemtype="http://schema.org/Place">

  <span>Bethnal Green, London</span></span>

  <span class="listing-description" itemprop="description">system rates hospital visit

current ride well walk british

place you tenancy</span>

  </a>

</li>

 

<li class="listing-maxi" itemscope itemtype="http://schema.org/Product">

  <a class="listing-link" itemprop="url" href="property_000141.html">

  <img itemprop="image" src="../images/i01.jpg" width="15" height="15" alt="thumb" />

  <span class="listing-title" itemprop="name">deal looking circus superb</span>, price:

  <span itemprop="price">£897.0pw</span>

  location: <span itemscope itemtype="http://schema.org/Place">

  <span>Islington, London</span></span>

  <span class="listing-description" itemprop="description">uk for tesco square keep marble victorian could tenants

kings south takes from request boasting going when connected

through o electricity

lift tube sainsbury quote full broadway offered value</span>

  </a>

</li>

 

<li class="listing-maxi" itemscope itemtype="http://schema.org/Product">

  <a class="listing-link" itemprop="url" href="property_000142.html">

  <img itemprop="image" src="../images/i14.jpg" width="15" height="15" alt="thumb" />

  <span class="listing-title" itemprop="name">hotel ec first notting</span>, price:

  <span itemprop="price">£787.0pw</span>

  location: <span itemscope itemtype="http://schema.org/Place">

  <span>Camden Town, London</span></span>

  <span class="listing-description" itemprop="description">fashion second walking screen quay unlimited within</span>

  </a>

</li>

 

<li class="listing-maxi" itemscope itemtype="http://schema.org/Product">

  <a class="listing-link" itemprop="url" href="property_000143.html">

  <img itemprop="image" src="../images/i15.jpg" width="15" height="15" alt="thumb" />

  <span class="listing-title" itemprop="name">tulse n mews summer</span>, price:

  <span itemprop="price">£1177.0pw</span>

  location: <span itemscope itemtype="http://schema.org/Place">

  <span>Camden, London</span></span>

  <span class="listing-description" itemprop="description">french finchley thames safe weekly

opposite we house viewings providing week open ad</span>

  </a>

</li>

 

<li class="listing-maxi" itemscope itemtype="http://schema.org/Product">

  <a class="listing-link" itemprop="url" href="property_000144.html">

  <img itemprop="image" src="../images/i15.jpg" width="15" height="15" alt="thumb" />

  <span class="listing-title" itemprop="name">rise furn promotion heating</span>, price:

  <span itemprop="price">£738.0pw</span>

  location: <span itemscope itemtype="http://schema.org/Place">

  <span>Shoreditch, London</span></span>

  <span class="listing-description" itemprop="description">home who miles

liverpool require area westfield

which nearest available our restaurant boasts much en early</span>

  </a>

</li>

 

<li class="listing-maxi" itemscope itemtype="http://schema.org/Product">

  <a class="listing-link" itemprop="url" href="property_000145.html">

  <img itemprop="image" src="../images/i08.jpg" width="15" height="15" alt="thumb" />

  <span class="listing-title" itemprop="name">mansions caledonian the quite</span>, price:

  <span itemprop="price">£703.8pw</span>

  location: <span itemscope itemtype="http://schema.org/Place">

  <span>Croydon, London</span></span>

  <span class="listing-description" itemprop="description">wardrobe leading seconds covent garden wood sought ground

smoker notice boasting needed email ll period

guy sorry bbq thank

refurbished different licence contemporary converted during block itself</span>

  </a>

</li>

 

<li class="listing-maxi" itemscope itemtype="http://schema.org/Product">

  <a class="listing-link" itemprop="url" href="property_000146.html">

  <img itemprop="image" src="../images/i05.jpg" width="15" height="15" alt="thumb" />

  <span class="listing-title" itemprop="name">caledonian service spectacular convenient</span>, price:

  <span itemprop="price">£1184.0pw</span>

  location: <span itemscope itemtype="http://schema.org/Place">

  <span>Tower Hamlets, London</span></span>

  <span class="listing-description" itemprop="description">during opposite bill pay letting bethnal</span>

  </a>

</li>

 

<li class="listing-maxi" itemscope itemtype="http://schema.org/Product">

  <a class="listing-link" itemprop="url" href="property_000147.html">

  <img itemprop="image" src="../images/i02.jpg" width="15" height="15" alt="thumb" />

  <span class="listing-title" itemprop="name">waterloo condition knightsbridge rentals</span>, price:

  <span itemprop="price">£877.85pw</span>

  location: <span itemscope itemtype="http://schema.org/Place">

  <span>Tower Bridge, London</span></span>

  <span class="listing-description" itemprop="description">current free nd professional

overlooking support most there link</span>

  </a>

</li>

 

<li class="listing-maxi" itemscope itemtype="http://schema.org/Product">

  <a class="listing-link" itemprop="url" href="property_000148.html">

  <img itemprop="image" src="../images/i01.jpg" width="15" height="15" alt="thumb" />

  <span class="listing-title" itemprop="name">university mile contract room</span>, price:

  <span itemprop="price">£892.0pw</span>

  location: <span itemscope itemtype="http://schema.org/Place">

  <span>Brixton, London</span></span>

  <span class="listing-description" itemprop="description">professional king not room plus coming many so d

following rent professionals seconds washer tax check french guys

hyde offered conditioning interested</span>

  </a>

</li>

 

<li class="listing-maxi" itemscope itemtype="http://schema.org/Product">

  <a class="listing-link" itemprop="url" href="property_000149.html">

  <img itemprop="image" src="../images/i00.jpg" width="15" height="15" alt="thumb" />

  <span class="listing-title" itemprop="name">accommodation st russell vale</span>, price:

  <span itemprop="price">£1246.39pw</span>

  location: <span itemscope itemtype="http://schema.org/Place">

  <span>Highbury, London</span></span>

  <span class="listing-description" itemprop="description">anytime workstation museum highly wardrobe both speak

attractions residential based avoid stoke most be

buses charges team away

min moments moving move apartments pay right lines trains</span>

  </a>

</li>

</ul>

<ul><li class="next"><a href="index_00005.html" rel="nofollow">next</a></li></ul>

</body></html></code>

ringa_leeringa_lee2899 days ago870

reply all(1)I'll reply

  • PHP中文网

    PHP中文网2017-04-17 17:38:45

    Try exchanging the order of the two rules

    reply
    0
  • Cancelreply