recherche

Maison  >  Questions et réponses  >  le corps du texte

php - curl 抓取某些页面的时候,提示 503 nginx 服务不可用??

尝试抓取的页面,结果:

换其他链接不会出现这样的问题,请问如何解决这个问题??

有没有能够正常抓取到 http://www.chwfsc.com/ 这个网站内容的范例??

阿神阿神2905 Il y a quelques jours6878

répondre à tous(2)je répondrai

  • PHP中文网

    PHP中文网2017-04-11 10:40:16

    mb_http_output('utf-8');
    $url="http://www.chwfsc.com/";
    $ch=curl_init();
    curl_setopt($ch,CURLOPT_URL,$url);
    curl_setopt($ch,CURLOPT_HEADER,0);
    curl_setopt($ch,CURLOPT_NOBODY, true);
    curl_setopt($ch,CURLOPT_RETURNTRANSFER,1);
    curl_setopt($ch,CURLOPT_ENCODING,'utf8');
    curl_setopt($ch,CURLOPT_POST,1);
    curl_setopt($ch,CURLOPT_USERAGENT,'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/536.11 (KHTML, like Gecko) Chrome/20.0.1132.47 Safari/536.11');
    $html=curl_exec($ch);
    curl_close($ch);
    var_dump($html);
    可以了,亲测可用

    répondre
    0
  • PHP中文网

    PHP中文网2017-04-11 10:40:16

    封装个curl试试

    1

    2

    3

    4

    5

    6

    7

    8

    9

    10

    11

    12

    13

    14

    15

    16

    17

    18

    19

    20

    21

    22

    23

    24

    25

    26

    27

    28

    29

    30

    31

    32

    33

    34

    35

    36

    37

    38

    39

    40

    41

    42

    43

    44

    45

    46

    <code>function task() {

            $url = "url";

            $headers = randIp();

            $curl = curl_init();

            curl_setopt($curl, CURLOPT_URL, $url);

            curl_setopt($curl, CURLOPT_BINARYTRANSFER, true); 

            curl_setopt($curl, CURLOPT_HTTPHEADER, $headers);

            curl_setopt($curl, CURLOPT_FOLLOWLOCATION, 1);

            curl_setopt($curl, CURLOPT_SSL_VERIFYPEER, false);

            curl_setopt($curl, CURLOPT_SSL_VERIFYHOST, 2);

            curl_setopt($curl, CURLOPT_USERAGENT,  "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:12.0) Gecko/20100101 Firefox/12.0");      //模拟浏览器类型

            curl_setopt($curl, CURLOPT_TIMEOUT, 300);                               // 设置超时限制防止死循环   

            curl_setopt($curl, CURLOPT_HEADER, 0);                                  // 显示返回的Header区域内容   

            curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1);                          // 获取的信息以文件流的形式返回

            $tmpInfo = curl_exec($curl);

            if (curl_errno($curl)) {

                print "Error: " . curl_error($curl);

            } else {

                curl_close($curl);

            }

        }

    //此函数提供了国内的IP地址

    function randIP(){

           $ip_long = array(

               array('607649792', '608174079'), //36.56.0.0-36.63.255.255

               array('1038614528', '1039007743'), //61.232.0.0-61.237.255.255

               array('1783627776', '1784676351'), //106.80.0.0-106.95.255.255

               array('2035023872', '2035154943'), //121.76.0.0-121.77.255.255

               array('2078801920', '2079064063'), //123.232.0.0-123.235.255.255

               array('-1950089216', '-1948778497'), //139.196.0.0-139.215.255.255

               array('-1425539072', '-1425014785'), //171.8.0.0-171.15.255.255

               array('-1236271104', '-1235419137'), //182.80.0.0-182.92.255.255

               array('-770113536', '-768606209'), //210.25.0.0-210.47.255.255

               array('-569376768', '-564133889'), //222.16.0.0-222.95.255.255

           );

           $rand_key = mt_rand(0, 9);

           $ip= long2ip(mt_rand($ip_long[$rand_key][0], $ip_long[$rand_key][1]));

           $headers['CLIENT-IP'] = $ip;

           $headers['X-FORWARDED-FOR'] = $ip;

     

           $headerArr = array();

           foreach( $headers as $n => $v ) {

               $headerArr[] = $n .':' . $v

           }

           return $headerArr;   

       }</code>

    répondre
    0
  • Annulerrépondre