search

Home  >  Q&A  >  body text

网页爬虫 - 为什么python模拟登陆 appannie一直返回503 code

#-*-encoding:utf-8-*-
import requests,  xlwt, sys
from bs4 import BeautifulSoup

reload(sys)
referer = "https://www.appannie.com/account/login/?_ref=header"
user_agent = ('Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36')
sys.setdefaultencoding('utf-8')
header = {"User-Agent": user_agent,
           "Referer": referer,
           "Host": "www.appannie.com",
           'Connection': 'keep-alive',
           'Accept': 'application/json, text/plain,*/*',
           'Accept-Encoding': 'gzip, deflate, sdch',
           'Accept-Language': 'zh-CN,zh;q=0.8',
           'X-NewRelic-ID': 'VwcPUFJXGwEBUlJSDgc=',
           'X-Requested-With': 'XMLHttpRequest',
           }


def main():
    url = 'https://www.appannie.com/account/login/'
    # content = requests.get(url,headers = header).content
    # soup = BeautifulSoup(content,'lxml')
    # key = soup.select()
    s = requests.Session()
    s.get(url,headers = header)
    key = s.cookies['csrftoken']
    data = {
           'csrfmiddlewaretoken': key ,
           'next': '/dashboard/home/' ,
           'username':'1195615991@qq.com' ,
           'password':'xxxxx'
    }
    req = s.post(url,data = data)
    if 2 != req.status_code / 100 :
        raise Exception("Error while logging in, code: %d" % (req.status_code))
    cookies = req.cookies
    n = '2017-04-11'
    url_1 = 'https://www.appannie.com/apps/google-play/top-chart/?country=US&category=game&device=&date={}'.format(n)

    req_1 = s.get(url_1,headers = header,cookies = cookies).content
    #print req_1
    soup = BeautifulSoup(req_1,'lxml')
    print soup
    # ids = soup.find_all('span')
    # for id in ids :
    #     name = id.get('title')
    #     print name

if __name__ == '__main__':
    main()
高洛峰高洛峰2788 days ago712

reply all(1)I'll reply

  • 怪我咯

    怪我咯2017-04-18 10:35:46

    两个关键点:
    1. headers的user-agent
    2. csrfmiddlewaretoken参数
    
    # coding: utf-8
    
    import requests
    url = 'https://www.appannie.com/account/login'
    
    session = requests.Session()
    session.headers['user-agent'] = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.133 Safari/537.36'
    
    session.get(url)
    
    token = session.cookies.get('csrftoken')
    
    data = {
        'csrfmiddlewaretoken': token,
        'next':'/dashboard/home/',
        'username':'XXXX',
        'password':'XXXX'
    }
    
    r = session.post(url, data)
    print r.status_code

    reply
    0
  • Cancelreply