
Home  >  Q&A  >  body text

python - How to crawl web content that is covered by the next page/rendered by JS?

I want to extract the current selling price of the item on the web page: and the ITEM_ID of the corresponding product

I am using PYTHON2.7 to operate with requests. The code is as follows:

import requests
import sys
headers = {'User-Agent': 'Mozilla/5.0 (Linux; U; Android 4.0.3; zh-cn; M032 Build/IML74K) AppleWebKit/533.1 (KHTML, like Gecko)Version/4.0 MQQBrowser/4.1 Mobile Safari/533.1'}
r = requests.get('', headers = headers,stream=True)
print r.request.headers['User-Agent']
print r.text
f = open('/workspace/test.txt', 'w')
f.write (r.text)

The obtained code file contains most of the information on the web page, except that it does not have the selling price information and item ID that I need. However, this information can be obtained through the browser's review element. The code snippet is as follows:

This code snippet can be easily obtained by inspecting the element, but it is not available in the source code, so I am very confused about how to obtain it.
I found the following fragment in the source code. I wonder if it is a way to obtain AJAX-related information:

<script src="/static/csgo/js/page.js"></script>


var price_sort = 1;
var lock_sort = 1;
var status_locked = 0;
var get_params = function (_page_no, _page_size, _status_locked, _price_sort, _lock_sort) {
    var data = {};
    data['page_no'] = _page_no;
    data['page_size'] = _page_size;
    data['status_locked'] = _status_locked;
    data['price_sort'] = _price_sort;
    data['lock_sort'] = _lock_sort;

    return data;

var load_data = function (params) {
    if (!AjaxLogin.is_login_box(params.page_no > 1)) {
        return false;
        'type': "GET",
        'url': "/h1z1/433850/get_list_tmp/568258/" + params.status_locked + "/" + params.page_no + "/" + params.price_sort + "/" + params.lock_sort +"?steamid=",
        'data': {},
        'dataType': "json",
        beforeSend: function () {
        success: function (data) {
            var $t_body = $('#js-tbody-data');
            var $page = $('#js-page-html');

            if (data.succ) {


            } else {

        error: function (XmlHttpRequest, textStatus, errorThrown) {

var load = function () {
    var param = get_params(1, 20, status_locked, price_sort, lock_sort);

var change_sort_style = function ($dom, _sort) {
    if (_sort == 1) {
    } else if (_sort == 2) {
    } else {

var page_callback = function (page_no) {
    var param = get_params(page_no, 20, status_locked, price_sort, lock_sort);
$(function () {

        var pid = "568258";
$('#js-dota-tabs .js-item'), $('#js-dota-panel .js-item'), 'click');


    var tradeBtn = $('#js-btn-tradeBuy');
    var weaponKey = $('#js-pop-weaponKey');

    var $sort_lock = $('#js-sort-lock');
    var $sort_price = $('#js-sort-price');
    var $page = $('#js-page-html');

    $page.on('click', '.js-page', function () {
        var page_no = $(this).attr('page_no');

    $page.on('click', '.js-page-jump', function () {
        var page_no = $(this).prev().val();

    $page.on('keyup', '.js-page-jump-no', function (e) {
        $(this).val($(this).val().replace(/[^0-9.]/g, ''));
        if (!$(this).val()) {

    $page.on('keydown', '.js-page-jump-no', function (e) {
        var curKey = e.which;
        if (curKey == 13) {
            page_no = $('.js-page-jump-no').val();

    $sort_lock.on('click', function(){
            lock_sort = 2;
        } else {
            lock_sort = 1;
        change_sort_style($(this), lock_sort);
        price_sort = 0;
        change_sort_style($sort_price, price_sort);

    $sort_price.on('click', function () {
        if (price_sort == 1) {
            price_sort = 2;
        } else {
            price_sort = 1;
        change_sort_style($(this), price_sort);
        lock_sort = 0;
        change_sort_style($sort_lock, lock_sort);

    $('#js-find-locked').on('click', function () {
        if ($(this).prop('checked')) {
            status_locked = 1;
        } else {
            status_locked = 0;
        lock_sort = 1;
        price_sort = 1;
        change_sort_style($sort_lock, 0);
        change_sort_style($sort_price, 0);


    $('#js-buy-count').on('keyup', function () {
        $(this).val($(this).val().replace(/[^0-9]/g, ''));
        var value = $(this).val();
        if (value > 100) {
    $('#js-money-start').on('keyup', function () {
        $(this).val($(this).val().replace(/[^0-9.]/g, ''));
        var value = $(this).val();
        if (value > 99999) {
    $('#js-money-end').on('keyup', function () {
        $(this).val($(this).val().replace(/[^0-9.]/g, ''));
        var value = $(this).val();
        if (value > 99999) {

Looking at the review element, I suspect that the sale history at the same location in the sale listing overwrites the current information.
Looking from the source code, I feel that there is an AJAX or JS rendering process.
As a novice, I really don’t know how to solve this problem, so I would like to ask the gods for advice.

PHPzPHPz2849 days ago1289

reply all(1)I'll reply

  • 给我你的怀抱

    给我你的怀抱2017-05-18 10:57:01

    # coding: utf-8
    import requests
    headers = {'X-Requested-With':'XMLHttpRequest'}
    url = ''
    r = requests.get(url, headers=headers)
    print r.text

  • Cancelreply