首页 >php教程 >PHP源码 >php采集远程文章简单类

php采集远程文章简单类

PHP中文网
PHP中文网原创
2016-05-25 17:10:461443浏览

跳至

db = $db;
    }

    function geturlfile($url) {
        $url = trim($url);
        $content = '';
        if (extension_loaded('curl')) {
            $ch = curl_init();
            curl_setopt($ch, CURLOPT_URL, $url);
            curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
            curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
            curl_setopt($ch, CURLOPT_HEADER, 0);
            $content = curl_exec($ch);
            curl_close($ch);
        } else {
            $content = file_get_contents($url);
        }
        return trim($content);
    }

    function get_all_url($code) {
        preg_match_all('/"\' ]+)["|\']?\s*[^>]*>([^>]+)/is', $code, $arr);
        return array('name' => $arr[2], 'url' => $arr[1]);
    }

    function get_sub_content($str, $start, $end) {
        $start = trim($start);
        $end = trim($end);
        if ($start == '' || $end == '') {
            return $str;
        }
        $str = explode($start, $str);
        $str = explode($end, $str[1]);
        return $str[0];
    }
   
    function vd($var) {
        echo "\r\n";
        echo "\r\n";
        var_dump($var);
        echo "\r\n\r\n";
        echo "";
    }

}

?>geturlfile($url);
//定义采集列表区间
$start = '';
$end = '';
//获取区间内的文章URL和TITLE
$code = $gather->get_sub_content($html, $start, $end);
$newsAry = $gather->get_all_url($code);
//打印出结果
//$gather->vd($newsAry);
$tarGetUrl = $newsAry['url'][0];
//获取目标网址HTML
$html = $gather->geturlfile($tarGetUrl);
//定义采集列表区间
$start = '';
$end = '';
//获取区间内的文章URL和TITLE
$code = $gather->get_sub_content($html, $start, $end);
$killHtml = '';
$killHtml2 = '';
$code = str_replace($killHtml, "", $code);
$code = str_replace($killHtml2, "", $code);
$gather->vd($code);
?>

                   

声明:
本文内容由网友自发贡献,版权归原作者所有,本站不承担相应法律责任。如您发现有涉嫌抄袭侵权的内容,请联系admin@php.cn