Home  >  Article  >  Backend Development  >  php根据网址获取网页标题和描述

php根据网址获取网页标题和描述

WBOY
WBOYOriginal
2016-06-23 13:08:56916browse

namespace Xuyaoxiang;		class Snoopy {		private $pattern_array=array(	'title'=>'/<title>(\s*.*)<\/title>/i',	'description'=>'/<meta +name="[d|D]escription" +content="(.*)" +\/>/',	'charset'=>'/charset=\"?([\w-]+)\"?/i',	);			public $url;		public $target_code="utf-8"; //目标编码		public $user_agent='Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.110 Safari/537.36'; //模拟浏览器头部数据		function __construct($url)	{			$this->url=$url;				}		public function set_pattern($key,$val)	{		if(isset($this->pattern_array[$key])){return false;}		$this->pattern_array[$key]=$val;	} //只能够添加规则,不能更改原有的规则				function get_pege_content($pattern_key)	{						if($pattern_key=='')		{			return false;			}					$curl=curl_init();		// 设置你需要抓取的URL			curl_setopt($curl, CURLOPT_URL, $this->url);		// 设置header		curl_setopt($curl, CURLOPT_HEADER, 0);				// 设置cURL 参数,要求结果保存到字符串中还是输出到屏幕上。		curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1);				curl_setopt($curl, CURLOPT_USERAGENT, $this->user_agent);				// 运行cURL,请求网页		$data = curl_exec($curl);				if($data==false){return false;} 				preg_match($this->pattern_array['charset'],$data,$reg_charset); //获取网页编码		$page_charset=strtolower($reg_charset[1]);			$charset==$this->target_code || $data=mb_convert_encoding($data,$this->target_code,$page_charset); //如果不符合你设置的编码,进行转码				preg_match($this->pattern_array[$pattern_key],$data,$content); 			// 关闭URL请求		curl_close($curl);				return  trim($content[1]);	}}	 $snoopy=new snoopy("http://www.qq.com");		 $title=$snoopy->get_pege_content('title');			 print_r($title);


Statement:
The content of this article is voluntarily contributed by netizens, and the copyright belongs to the original author. This site does not assume corresponding legal responsibility. If you find any content suspected of plagiarism or infringement, please contact admin@php.cn