Heim >Backend-Entwicklung >Python-Tutorial >Python-Crawler greifen auf mobile APP-Daten zu

Python-Crawler greifen auf mobile APP-Daten zu

大家讲道理Original: 2016-11-09 11:18:422630Durchsuche

Erfassen Sie Supercurriculum-Themendaten.

#!/usr/local/bin/python2.7
# -*- coding: utf8 -*-
"""
  超级课程表话题抓取
"""
import urllib2
from cookielib import CookieJar
import json
 
 
&#39;&#39;&#39; 读Json数据 &#39;&#39;&#39;
def fetch_data(json_data):
    data = json_data[&#39;data&#39;]
    timestampLong = data[&#39;timestampLong&#39;]
    messageBO = data[&#39;messageBOs&#39;]
    topicList = []
    for each in messageBO:
        topicDict = {}
        if each.get(&#39;content&#39;, False):
            topicDict[&#39;content&#39;] = each[&#39;content&#39;]
            topicDict[&#39;schoolName&#39;] = each[&#39;schoolName&#39;]
            topicDict[&#39;messageId&#39;] = each[&#39;messageId&#39;]
            topicDict[&#39;gender&#39;] = each[&#39;studentBO&#39;][&#39;gender&#39;]
            topicDict[&#39;time&#39;] = each[&#39;issueTime&#39;]
            print each[&#39;schoolName&#39;],each[&#39;content&#39;]
            topicList.append(topicDict)
    return timestampLong, topicList
 
 
&#39;&#39;&#39; 加载更多 &#39;&#39;&#39;
def load(timestamp, headers, url):
    headers[&#39;Content-Length&#39;] = &#39;159&#39;
    loadData = &#39;timestamp=%s&phoneBrand=Meizu&platform=1&genderType=-1&topicId=19&phoneVersion=16&selectType=3&channel=MXMarket&phoneModel=M040&versionNumber=7.2.1&&#39; % timestamp
    req = urllib2.Request(url, loadData, headers)
    loadResult = opener.open(req).read()
    loginStatus = json.loads(loadResult).get(&#39;status&#39;, False)
    if loginStatus == 1:
        print &#39;load successful!&#39;
        timestamp, topicList = fetch_data(json.loads(loadResult))
        load(timestamp, headers, url)
    else:
        print &#39;load fail&#39;
        print loadResult
        return False
 
loginUrl = &#39;http://120.55.151.61/V2/StudentSkip/loginCheckV4.action&#39;
topicUrl = &#39;http://120.55.151.61/V2/Treehole/Message/getMessageByTopicIdV3.action&#39;
headers = {
    &#39;Content-Type&#39;: &#39;application/x-www-form-urlencoded; charset=UTF-8&#39;,
    &#39;User-Agent&#39;: &#39;Dalvik/1.6.0 (Linux; U; Android 4.1.1; M040 Build/JRO03H)&#39;,
    &#39;Host&#39;: &#39;120.55.151.61&#39;,
    &#39;Connection&#39;: &#39;Keep-Alive&#39;,
    &#39;Accept-Encoding&#39;: &#39;gzip&#39;,
    &#39;Content-Length&#39;: &#39;207&#39;,
    }
 
&#39;&#39;&#39; ---登录部分--- &#39;&#39;&#39;
loginData = &#39;phoneBrand=Meizu&platform=1&deviceCode=868033014919494&account=FCF030E1F2F6341C1C93BE5BBC422A3D&phoneVersion=16&password=A55B48BB75C79200379D82A18C5F47D6&channel=MXMarket&phoneModel=M040&versionNumber=7.2.1&&#39;
cookieJar = CookieJar()
opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cookieJar))
req = urllib2.Request(loginUrl, loginData, headers)
loginResult = opener.open(req).read()
loginStatus = json.loads(loginResult).get(&#39;data&#39;, False)
if loginResult:
    print &#39;login successful!&#39;
else:
    print &#39;login fail&#39;
    print loginResult
 
&#39;&#39;&#39; ---获取话题--- &#39;&#39;&#39;
topicData = &#39;timestamp=0&phoneBrand=Meizu&platform=1&genderType=-1&topicId=19&phoneVersion=16&selectType=3&channel=MXMarket&phoneModel=M040&versionNumber=7.2.1&&#39;
headers[&#39;Content-Length&#39;] = &#39;147&#39;
topicRequest = urllib2.Request(topicUrl, topicData, headers)
topicHtml = opener.open(topicRequest).read()
topicJson = json.loads(topicHtml)
topicStatus = topicJson.get(&#39;status&#39;, False)
print topicJson
if topicStatus == 1:
    print &#39;fetch topic success!&#39;
    timestamp, topicList = fetch_data(topicJson)
    data = load(timestamp, headers, topicUrl)
    if data:
        timestamp, topicList = fetch_data(data)

Stellungnahme：

Der Inhalt dieses Artikels wird freiwillig von Internetnutzern beigesteuert und das Urheberrecht liegt beim ursprünglichen Autor. Diese Website übernimmt keine entsprechende rechtliche Verantwortung. Wenn Sie Inhalte finden, bei denen der Verdacht eines Plagiats oder einer Rechtsverletzung besteht, wenden Sie sich bitte an admin@php.cn

Vorheriger Artikel：Python erhält aktuelle Daten basierend auf dem AktiencodeNächster Artikel：Python erhält aktuelle Daten basierend auf dem Aktiencode

In Verbindung stehende Artikel

Mehr sehen