搜尋

首頁  >  問答  >  主體

node.js - nodejs 用superagent抓取网页gbk编码乱码的问题

网上找了太多了。什么比如iconv-lite什么的,都试过了。就是不行。

href = 'http://www.qq.com/';
            console.log(href);
            superagent.get(href).end(function (err, res) {
                var str = res.text;
                var buf = new Buffer(str);
                str = iconv.decode(buf, 'GBK');
                console.log(str);


            });  
        
        折腾了一夜了,可有解决方案?
        
        
        
        
巴扎黑巴扎黑2779 天前736

全部回覆(3)我來回復

  • 怪我咯

    怪我咯2017-04-17 14:03:04

    http://web-engineer.cn/article/29

    回覆
    0
  • 阿神

    阿神2017-04-17 14:03:04

    var charset = require('superagent-charset');
    var superagent = charset(require('superagent'));
    
    var href = 'http://www.qq.com/';
    superagent.get(href).charset('gbk').end(function (err, res) {
      console.log(res.text);
    });

    用 superagent-charset 模組


    剛抽空改了下這個模組,https://github.com/52cik/superagent-charset
    不知道作者什麼時候合併我的程式碼,
    你可以直接npm i 52cik/superagent-charset 來安裝使用。

    使用方法:

    var charset = require('superagent-charset');
    var superagent = charset(require('superagent'));
    
    var href = 'http://www.qq.com/';
    superagent
      .get(href)
      .charset() // 不写会自动检测编码
      .end(function (err, res) {
        console.log(res.text);
      });

    回覆
    0
  • 伊谢尔伦

    伊谢尔伦2017-04-17 14:03:04

    不是superagent,使用nodejs提供的http模組
    解碼後最終的檔案內容是UTF-8的

    new Buffer(string[,encoding])本身就會有一個轉碼的過程,預設為UTF-8,也就是認為
    如下,
    先編碼,然後toString後,再new Buffer,最後用GBK解碼,得到的結果就有問題

    var buffer2=iconv.encode("不是superagent,使用nodejs提供的http模块","GBK");
    var str=iconv.decode(new Buffer(buffer2.toString()), "GBK");
    console.log(str);

    貼個使用原生的範例:

    var http=require('http');
    var iconv = require('iconv-lite');
    var zlib=require('zlib');
    
    var clientRequest = http.request('http://www.qq.com', function(response) {
        var dataReceived=false,responseBuffer, responseLength, responseLengthReceived = 0;
    
        responseLength = parseInt(response.headers["content-length"], 10);
        responseLength = !isNaN(responseLength) ? responseLength : 0;
    
        responseBuffer = new Buffer(responseLength);
    
        response.on("data", receiveData);
        response.on("end", processReceivedData);
    
        function receiveData(chunk){
            if (!chunk.length || dataReceived) {
                return;
            }
    
            if (responseLengthReceived + chunk.length > responseBuffer.length) {
                if (responseLengthReceived + chunk.length <= 1024 * 1024 * 16) {
                    var tmpNewBuffer = new Buffer(responseLengthReceived + chunk.length);
                    responseBuffer.copy(tmpNewBuffer, 0, 0, responseBuffer.length);
                    chunk.copy(tmpNewBuffer, responseBuffer.length, 0, chunk.length);
                    responseBuffer = tmpNewBuffer;
                }
            } else {
                chunk.copy(responseBuffer, responseLengthReceived, 0, chunk.length);
            }
    
            responseLengthReceived += chunk.length;
        }
    
        function processReceivedData() {
            responseBuffer = responseBuffer.slice(0, responseLengthReceived);
            var decodeAndReturnResponse = function (error, responseBuffer) {
                var responseBody =iconv.decode(responseBuffer, "GBK");
                console.log(responseBody);
            };
            
            if (contentEncoding && /(gzip|deflate)/.test(contentEncoding)) {
                console.log("zlib.unzip");
                zlib.unzip(responseBuffer, decodeAndReturnResponse);
            } else {
                decodeAndReturnResponse(undefined, responseBuffer);
            }
        }
    });
    
    clientRequest.end();

    回覆
    0
  • 取消回覆