我使用的是python3.4,在用requests库爬虫时,向百度post了一个参数,但是返回来的内容却没有python关键词的搜索列表,这是什么原因呢?我的代码如下:
import requests
import urllib
from bs4 import BeautifulSoup
url = 'https://www.baidu.com/s?'
key_word={'key':'python'}
headers = {'User_Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.116 Safari/537.36'}
def get_data(url):
try:
html=requests.post(url,data=key_word,headers=headers)
if html.status_code == 200:
soup=BeautifulSoup(html.text,'lxml')
return soup
else:
print('error')
except urllib.error.HTTPError as e:
print(url, e, str(time.time()))
print(get_data(url))
以下是我返回的内容:
<!DOCTYPE html>
<!--STATUS OK--><html>
<head>
<meta content="IE=edge,chrome=1" http-equiv="X-UA-Compatible"/>
<meta content="text/html;charset=utf-8" http-equiv="content-type"/>
<meta content="always" name="referrer"/>
<title>页é¢ä¸åå¨_ç¾åº¦æç´¢</title>
<style data-for="result">
body {color: #333; background: #fff; padding: 0; margin: 0; position: relative; min-width: 700px; font-family: arial; font-size: 12px }
p, form, ol, ul, li, dl, dt, dd, h3 {margin: 0; padding: 0; list-style: none }
input {padding-top: 0; padding-bottom: 0; -moz-box-sizing: border-box; -webkit-box-sizing: border-box; box-sizing: border-box } img {border: none; }
.logo {width: 117px; height: 38px; cursor: pointer }
#wrapper {_zoom: 1 }
#head {padding-left: 35px; margin-bottom: 20px; width: 900px }
.fm {clear: both; position: relative; z-index: 297 }
.btn, #more {font-size: 14px }
.s_btn {width: 95px; height: 32px; padding-top: 2px\9; font-size: 14px; padding: 0; background-color: #ddd; background-position: 0 -48px; border: 0; cursor: pointer }
.s_btn_h {background-position: -240px -48px }
.s_btn_wr {width: 97px; height: 34px; display: inline-block; background-position: -120px -48px; *position: relative; z-index: 0; vertical-align: top }
#foot {}
#foot span {color: #666 }
.s_ipt_wr {height: 32px }
.s_form:after, .s_tab:after {content: "."; display: block; height: 0; clear: both; visibility: hidden }
.s_form {zoom: 1; height: 55px; padding: 0 0 0 10px }
#result_logo {float: left; margin: 7px 0 0 }
#result_logo img {width: 101px }
#head {padding: 0; margin: 0; width: 100%; position: absolute; z-index: 301; min-width: 1000px; background: #fff; border-bottom: 1px solid #ebebeb; position: fixed; _position: absolute; -webkit-transform: translateZ(0) }
#head .head_wrapper {_width: 1000px }
#head.s_down {box-shadow: 0 0 5px #888 }
.fm {clear: none; float: left; margin: 11px 0 0 10px }
#s_tab {background: #f8f8f8; line-height: 36px; height: 38px; padding: 55px 0 0 121px; float: none; zoom: 1 }
#s_tab a, #s_tab b {width: 54px; display: inline-block; text-decoration: none; text-align: center; color: #666; font-size: 14px }
#s_tab b {border-bottom: 2px solid #38f; font-weight: bold; color: #323232 }
#s_tab a:hover {color: #323232 }
#content_left {width: 540px; padding-left: 121px; padding-top: 5px }
.to_tieba, .to_zhidao_bottom {margin: 10px 0 0 121px }
#help {background: #f5f6f5; zoom: 1; padding: 0 0 0 50px; float: right }
#help a {color: #777; padding: 0 15px; text-decoration: none }
#help a:hover {color: #333 }
#foot {position: fixed; bottom:0; width: 100%; background: #f5f6f5; border-top: 1px solid #ebebeb; text-align: left; height: 42px; line-height: 42px; margin-top: 40px; *margin-top: 0; _position:absolute; _bottom:auto; _top:expression(eval(document.documentElement.scrollTop+document.documentElement.clientHeight-this.offsetHeight-(parseInt(this.currentStyle.marginTop,10)||0)-(parseInt(this.currentStyle.marginBottom,10)||0))); }
.content_none {padding: 45px 0 25px 121px } .s_ipt_wr.bg,
.s_btn_wr.bg, #su.bg {background-image: none }
.s_ipt_wr.bg {background: 0 }
.s_btn_wr {width: auto; height: auto; border-bottom: 1px solid transparent; *border-bottom: 0 }
.s_btn {width: 100px; height: 34px; color: white; letter-spacing: 1px; background: #3385ff; border-bottom: 1px solid #2d78f4; outline: medium; *border-bottom: 0; -webkit-appearance: none; -webkit-border-radius: 0 }
.s_btn:hover {background: #317ef3; border-bottom: 1px solid #2868c8; *border-bottom: 0; box-shadow: 1px 1px 1px #ccc }
.s_btn:active {background: #3075dc; box-shadow: inset 1px 1px 3px #2964bb; -webkit-box-shadow: inset 1px 1px 3px #2964bb; -moz-box-shadow: inset 1px 1px 3px #2964bb; -o-box-shadow: inset 1px 1px 3px #2964bb }
#lg {display: none }
#head .headBlock {margin: -5px 0 6px 121px }
#content_left .leftBlock {margin-bottom: 14px; padding-bottom: 5px; border-bottom: 1px solid #f3f3f3 }
.s_ipt_wr {border: 1px solid #b6b6b6; border-color: #7b7b7b #b6b6b6 #b6b6b6 #7b7b7b; background: #fff; display: inline-block; vertical-align: top; width: 539px; margin-right: 0; border-right-width: 0; border-color: #b8b8b8 transparent #ccc #b8b8b8; overflow: hidden }
.s_ipt_wr.ip_short {width: 439px; }
.s_ipt_wr:hover, .s_ipt_wr.ipthover {border-color: #999 transparent #b3b3b3 #999 }
.s_ipt_wr.iptfocus {border-color: #4791ff transparent #4791ff #4791ff }
.s_ipt_tip {color: #aaa; position: absolute; z-index: -10; font: 16px/22px arial; height: 32px; line-height: 32px; padding-left: 7px; overflow: hidden; width: 526px }
.s_ipt {width: 526px; height: 22px; font: 16px/18px arial; line-height: 22px\9; margin: 6px 0 0 7px; padding: 0; background: transparent; border: 0; outline: 0; -webkit-appearance: none }
#kw {position: relative;display: inline-block;}
input::-ms-clear {display: none }
/*Error page css*/
.norsSuggest {display: inline-block; color: #333; font-family: arial; font-size: 13px; position: relative; }
.norsTitle {font-size: 22px; font-family: Microsoft Yahei; font-weight: normal; color: #333; margin: 35px 0 25px 0; }
.norsTitle2 {font-family: arial; font-size: 13px; color: #666; }
.norsSuggest ol {margin-left: 47px; }
.norsSuggest li {margin: 13px 0; }
</style>
</head>
<body link="#0000cc">
<p class="wrapper_l" id="wrapper">
<p id="head">
<p class="head_wrapper">
<p class="s_form">
<p class="s_form_wrapper">
<a href="/" id="result_logo"><img alt="å°ç¾åº¦é¦é¡µ" src="//www.baidu.com/img/baidu_jgylogo3.gif" title="å°ç¾åº¦é¦é¡µ"/></a>
<form action="/s" class="fm" id="form" name="f">
<input name="ie" type="hidden" value="utf-8"/>
<input name="f" type="hidden" value="8"/>
<input name="rsv_bp" type="hidden" value="1"/>
<input name="ch" type="hidden" value=""/>
<input name="tn" type="hidden" value="baiduerr"/>
<input name="bar" type="hidden" value=""/>
<span class="bg s_ipt_wr iptfocus">
<input autocomplete="off" autofocus="" class="s_ipt" id="kw" maxlength="255" name="wd" value=""/>
</span><span class="bg s_btn_wr">
<input class="bg s_btn" id="su" type="submit" value="ç¾åº¦ä¸ä¸"/>
</span>
</form>
</p>
</p>
</p>
</p>
<p class="s_tab" id="s_tab"><b>ç½é¡µ</b><a href="http://news.baidu.com/ns?cl=2&rn=20&tn=news&word=" wdfield="word">æ°é»</a><a href="http://tieba.baidu.com/f?kw=&fr=wwwt" wdfield="kw">è´´å§</a><a href="http://zhidao.baidu.com/q?ct=17&pn=0&tn=ikaslist&rn=10&word=&fr=wwwt" wdfield="word">ç¥é</a><a href="http://music.baidu.com/search?fr=ps&ie=utf-8&key=" wdfield="key">é³ä¹</a><a href="http://image.baidu.com/i?tn=baiduimage&ps=1&ct=201326592&lm=-1&cl=2&nc=1&ie=utf-8&word=" wdfield="word">å¾ç</a><a href="http://v.baidu.com/v?ct=301989888&rn=20&pn=0&db=0&s=25&ie=utf-8&word=" wdfield="word">è§é¢</a><a href="http://map.baidu.com/m?word=&fr=ps01000" wdfield="word">å°å¾</a><a href="http://wenku.baidu.com/search?word=&lm=0&od=0&ie=utf-8" wdfield="word">æåº</a><a href="//www.baidu.com/more/">æ´å¤Â»</a></p>
<p id="wrapper_wrapper">
<p id="content_left">
<p class="nors">
<p class="norsSuggest">
<h3 class="norsTitle">å¾æ±æï¼æ¨è¦è®¿é®ç页é¢ä¸åå¨ï¼</h3>
<p class="norsTitle2">温馨æ示ï¼</p>
<ol>
<li>请æ£æ¥æ¨è®¿é®çç½åæ¯å¦æ£ç¡®</li>
<li>å¦ææ¨ä¸è½ç¡®è®¤è®¿é®çç½åï¼è¯·æµè§<a href="//www.baidu.com/more/index.html">ç¾åº¦æ´å¤</a>页é¢æ¥çæ´å¤ç½åã</li>
<li>åå°é¡¶é¨éæ°åèµ·æç´¢</li>
<li>å¦æä»»ä½æè§æ建议ï¼è¯·åæ¶<a href="http://qingting.baidu.com/index">åé¦ç»æ们</a>ã</li>
</ol>
</p>
</p>
</p>
</p>
<p id="foot">
<span id="help" style="float:left;padding-left:121px">
<a href="http://help.baidu.com/question" target="_blank">帮å©</a>
<a href="http://www.baidu.com/search/jubao.html" target="_blank">举æ¥</a>
<a href="http://jianyi.baidu.com" target="_blank">ç»ç¾åº¦æ建议</a>
</span>
</p>
</p></body>
<script>
(function(){
var bds = {
util: {}
};
var c = document.getElementById('kw').parentNode;
bds.util.getWinWidth = function(){
return window.document.documentElement.clientWidth;
};
bds.util.setFormWidth = function(){
var width = bds.util.getWinWidth();
if(width < 1217) {bds.util.setClass(c, 'ip_short', 'add')}
else {bds.util.setClass(c, 'ip_short', 'remove')};
};
bds.util.setClass = function(obj, class_name, set) {
var ori_class = obj.className,
has_class_p = -1,
ori_class_arr = [],
new_class = '';
if(ori_class.length) ori_class_arr = ori_class.split(' ');
for( i in ori_class_arr) {
if(ori_class_arr[i] == class_name) has_class_p = i;
}
if( set == 'remove' && has_class_p >= 0) {
ori_class_arr.splice(has_class_p, 1);
new_class = ori_class_arr.join(' ');
obj.className = new_class;
} else if( set == 'add' && has_class_p < 0) {
ori_class_arr.push(class_name);
new_class = ori_class_arr.join(' ');
obj.className = new_class;
}
}
bds.util.setFormWidth();
if (typeof document.addEventListener != "undefined") {
window.addEventListener('resize', bds.util.setFormWidth, false);
document.getElementById('kw').addEventListener('focus', function(){bds.util.setClass(c,'iptfocus', 'add');}, false);
document.getElementById('kw').addEventListener('blur', function(){bds.util.setClass(c,'iptfocus', 'remove');}, false);
} else {
window.attachEvent('onresize', bds.util.setFormWidth, false);
document.getElementById('kw').attachEvent('onfocus', function(){bds.util.setClass(c,'iptfocus', 'add');}, false);
document.getElementById('kw').attachEvent('onblur', function(){bds.util.setClass(c,'iptfocus', 'remove');}, false);
}
})();
</script>
</html>
高洛峰2017-04-18 09:43:09
POST is not possible, use GET to request this URL http://www.baidu.com/s?wd=python
You can test it like this under bash:
curl http://www.baidu.com/s\?wd\=python > python.html
Then open this python.html
and take a look
天蓬老师2017-04-18 09:43:09
You are talking about Baidu’s search prompt list. They use get requests, not post
>>> r=requests.get('https://sp0.baidu.com/5a1Fazu8AA54nxGko9WTAnF6hhy/su?wd=pyth
on')
>>> r.text
'window.baidu.sug({q:"python",p:false,s:["python基础教程","python set","python j
son","python mysql","python web开发","python requests","python for循环","python3
","python环境变量设置","python 多线程"]});'
The URL you enter directly is https://www.baidu.com/baidu?word=python&ie=utf-8&tn=98012088_2_dg&ch=9
Just requests.get directly