想使用python爬取京东的快递信息,现在需要解决的首要问题是使用python模拟浏览器登陆,遇到了下面的问题。
_t _ntNBMNX
({"username":"\u8bf7\u60a8\u518d\u6b21\u767b\u5f55","_t":"_ntaLJJS"})
所有代码如下。
import urllib
import urllib2
import cookielib
import re
import socket
from bs4 import BeautifulSoup
def Navigate(url, data={}):
tryTimes = 0
while True:
if (tryTimes > 20):
print 'try many time ..'
break
try:
if (data == {}):
req = urllib2.Request(url)
else:
req = urllib2.Request(url, urllib.urlencode(data))
req = urllib2.urlopen(req).read()
tryTimes = tryTimes + 1
except socket.error:
print 'connection failure'
else:
break
return req
def func():
try:
cookie = cookielib.CookieJar()
cookieProc = urllib2.HTTPCookieProcessor(cookie)
except:
raise
else:
opener = urllib2.build_opener(cookieProc)
opener.addheaders = [('User-Agent',
'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11')]
urllib2.install_opener(opener)
url = "https://passport.jd.com/uc/login"
login = Navigate(url)
loginSoup = BeautifulSoup(login,"html.parser")
# looking for uuid
uuid = loginSoup.find_all("form")[0].find_all("input")[0]['value']
#print uuid
clrName=loginSoup.find_all("form")[0].find_all("input")[6]['name']
clrValue=loginSoup.find_all("form")[0].find_all("input")[6]['value']
# look rand prama..
###clr = loginSoup.find_all("span", "clr")[0]
###clrName = clr.find_next_siblings("input")[0]['name']
###clrValue = clr.find_next_siblings("input")[0]['value']
print clrName,clrValue
###
url = "http://passport.jd.com/uc/loginService"
myurl = 'http://127.0.0.1:5000'
#loginurl = 'https://passport.jd.com/new/misc/js/login2016.js'
# print url
postData = {
'loginname': 'my-username',
'nloginpwd': 'my-password',
'loginpwd': 'my-password',
## 'machineNet':'',
## 'machineCpu':'',
## 'machineDisk':'',
str(clrName):str(clrValue),
'uuid': uuid,
'authcode': ''
}
passport = Navigate(url, postData)
print passport
希望各位高手能够帮帮我,先谢了!