>  기사  >  백엔드 개발  >  自定义HTTP抓包跟过滤

自定义HTTP抓包跟过滤

WBOY
WBOY원래의
2016-06-13 12:11:121133검색

自定义HTTP抓包和过滤
定义一个http抓包类,发送数据到一个自定义的接受脚本,可以发送成功,并收取数据,但是发送到外网,却不行,分析过在浏览器下发送HTTP请求时的request header 信息,通过模拟请求,但超时...

<br />//定义一个HTTP抓包类,其实也可以用curl。。。。。<br /><br /><?php <br />ini_set('error_reporting', E_ALL);<br />class Httpwrap<br />{<br />	private $hostInfo=null;<br />	<br />	private $requestLine=null;<br />	private $requestHeader=null;<br />	private $emptyLine="\r\n";<br />	private $requestBody=null;<br />	private $requestEntity=null;<br />	<br />	private $responseEntity=null;<br />	private $responseHeader=null;<br />	private $responseBody=null;<br />	private $emptyLinePos=null;<br />	<br />	private $connect=null;<br />	private $errNo=null;<br />	private $errStr=null;<br />	<br />	<br />	public function __construct($url)<br />	{<br />		$this->hostInfo=parse_url($url);<br />		$this->setRequestHeader(array('Host' => $this->hostInfo['host']));<br />		$this->setRequestHeader(array('Connection' => 'keep-alive'));<br />	}<br />	//设置HTTP请求行信息,例如: GET /resources  HTTP/1.1<br />	//但为了避免漏掉url中?开始的查询信息,有必要进行判断<br />	public function setRequestLine($method)<br />	{<br />		//如果是POST请求,则自动添加content-type头信息<br />		if(strtolower($method)=='post')<br />		{<br />			$this->setRequestHeader(array('Content-type' => 'application/x-www-form-urlencoded'));<br />		}<br />		if(!empty($this->hostInfo['query']))<br />		{<br />			$this->requestLine=strtoupper($method)."  ".$this->hostInfo['path']."?".$this->hostInfo['query']."  HTTP/1.1 \r\n";<br />		}<br />		else<br />		{<br />			$this->requestLine=strtoupper($method)."  ".$this->hostInfo['path']."  HTTP/1.1 \r\n";<br />		}<br />	}<br />	//设置HTTP请求头。<br />	//接收参数是数组类型,通过迭代拼接key:value,并换行<br />	public function setRequestHeader($header)<br />	{<br />		foreach($header as $key => $value)<br />		{<br />			$this->requestHeader .=$key.":".$value."\r\n";<br />		}<br />	}<br />	//设置HTTP请求体<br />	//接收参数是数组类型,通过迭代拼接key=value,因为最后一席拼接会有一个多余的&,所以有必要去掉<br />	public function setRequestBody($body)<br />	{<br />		foreach($body as $key => $value)<br />		{<br />			$this->requestBody .=$key.'='.$value.'&';<br />		}<br />		$offset=strrpos($this->requestBody, '&');<br />		$this->requestBody=substr($this->requestBody, 0, $offset);<br />	}<br />	//组装  请求行+请求头+请求体,并根据请求体的长度,自动填充请求头的content-length字段<br />	public function setRequestEntity()<br />	{<br />		if(!empty($this->requestBody))<br />		{<br />			$contentLength=strlen($this->requestBody);<br />			$this->setRequestHeader(array('Content-length' => $contentLength));<br />			<br />			$this->requestEntity=$this->requestLine.$this->requestHeader."\r\n".$this->requestBody;<br />		}<br />		else<br />		{<br />			$this->requestEntity=$this->requestLine.$this->requestHeader."\r\n";<br />		}<br />	}<br />	//解析主机名的函数,暂时没有用上.......<br />	public function parseHost($url)<br />	{<br />		$pat='#http://([^/]+)#i';<br />		if(preg_match($pat, $url, $match))<br />		{<br />			return $match[1];<br />		}<br />		else<br />		{<br />			echo '匹配主机信息失败<br />';<br />		}<br />	}<br />	//创建到主机的连接<br />	public function createConnect()<br />	{<br />		$this->connect=fsockopen($this->hostInfo['host'], 80, $this->errNo, $this->errStr) or die('连接主机失败'.$this->errStr);<br />	}<br />	//发送请求<br />	public function sendRequest()<br />	{<br />		$this->setRequestEntity();<br />		echo $this->requestEntity;<br />		exit();<br />		$this->createConnect();<br />		$entityLength=strlen($this->requestEntity);<br />		if($entityLength != fwrite($this->connect, $this->requestEntity, $entityLength))<br />		{<br />			die('写入数据失败<br />');<br />		}<br />		else<br />		{<br />			$this->receiveResponse();<br />		}<br />	}<br />	//接受请求,并依次拼接响应体<br />	public function receiveResponse()<br />	{<br />		while(!feof($this->connect))<br />		{<br />			$this->responseEntity .= fread($this->connect, 1024);<br />		}<br />	}<br />	//计算响应头与响应体之间的空行的位置<br />	public function calculateEmptyLinePos()<br />	{<br />		$this->emptyLinePos=strpos($this->responseEntity,"\r\n\r\n",0);<br />	}<br />	//接受响应体的头部....<br />	public function receiveResponseHeader()<br />	{<br />		$this->calculateEmptyLinePos();<br />		$this->responseHeader=substr($this->responseEntity, 0, $this->emptyLinePos);<br />		echo $this->responseHeader;<br />	}<br />	//接收响应体的body部分<br />	public function receiveResponseBody()<br />	{<br />		$this->calculateEmptyLinePos();<br />		$this->responseBody=substr($this->responseEntity, $this->emptyLinePos);<br />	}<br />	//返回请求结果<br />	public function getResponse()<br />	{<br />		return $this->responseEntity;<br />	}<br />	<br /><br />	public function parseResponse()<br />	{}<br />	public function __destruct()<br />	{<br />		//fclose($this->connect);<br />	} <br />}<br />set_time_limit(60);<br />$http=new Httpwrap("http://www.mmkao.com/Beautyleg/");<br />//设置HTTP请求行<br />$http->setRequestLine("get");<br />//设置HTTP头<br />$http->setRequestHeader(array("Accept" => "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"));<br />$http->setRequestHeader(array("Accept-Language" => "zh-cn,zh;q=0.8,en-us;q=0.5,en;q=0.3"));<br />$http->setRequestHeader(array("Accept-Encoding" => "gzip, deflate"));<br />$http->setRequestHeader(array("User-Agent" => "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.101 Safari/537.36"));<br />//$http->setRequestHeader(array("Cookie" => "BAIDU_DUP_lcr=http://www.baidu.com/s?wd=beautyleg&rsv_spt=1&issp=1&f=3&rsv_bp=0&rsv_idx=2&ie=utf-8&tn=baiduhome_pg&rsv_enter=1&rsv_sug3=6&rsv_sug4=415&rsv_sug1=3&oq=beauty&rsv_sug2=0&rsp=0&inputT=2363; safedog-flow-item=8471BA510DA33350ED344AC374D3044A; bdshare_firstime=1415165097782; cscpvrich_fidx=6; AJSTAT_ok_pages=2; AJSTAT_ok_times=2; CNZZDATA3811623=cnzz_eid%3D253823549-1415164312-http%253A%252F%252Fwww.baidu.com%252F%26ntime%3D1415169712"));<br />//发送数据<br />$http->sendRequest();<br />//$http->receiveResponseHeader();<br /><br />?><br /><br />通过这个类给领一个自定义的脚本,可以发送和接收数据,领一个脚本如下:<br /><br /><?php <br />if(!empty($_POST))<br />{<br />	$str=implode(',',$_POST);<br />	file_put_contents('./post.txt', $str,FILE_APPEND);<br />	echo $str;<br />}<br /><br />?><br />但是给这个网站发送请求时,却超时:网站是:<br />http://www.mmkao.com/Beautyleg/<br />通过chrome给这个网站首页发送请求时的header头信息:<br />Accept:text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8<br />Accept-Encoding:gzip,deflate,sdch<br />Accept-Language:zh,en;q=0.8,zh-TW;q=0.6,zh-CN;q=0.4,ja;q=0.2<br />Cache-Control:max-age=0<br />Connection:keep-alive<br />Cookie:BAIDU_DUP_lcr=http://www.baidu.com/s?wd=beautyleg&rsv_spt=1&issp=1&f=3&rsv_bp=0&rsv_idx=2&ie=utf-8&tn=baiduhome_pg&rsv_enter=1&rsv_sug3=6&rsv_sug4=415&rsv_sug1=3&oq=beauty&rsv_sug2=0&rsp=0&inputT=2363; safedog-flow-item=8471BA510DA33350ED344AC374D3044A; bdshare_firstime=1415165097782; cscpvrich_fidx=7; AJSTAT_ok_pages=3; AJSTAT_ok_times=2; CNZZDATA3811623=cnzz_eid%3D253823549-1415164312-http%253A%252F%252Fwww.baidu.com%252F%26ntime%3D1415169712<br />DNT:1<br />Host:www.mmkao.com<br />User-Agent:Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.101 Safari/537.36<br />Response Headersview source<br /><br />//通过相同的包装,并调用Httpwrap发送请求时,却提示超时,是在不知道哪里出问题........<br /><br />针对这个网站写了一个过滤出图片链接的类:<br /><br /><?php  <br />class Parseimage<br />{<br />	private $responseBody=null;<br />	private $imgLink=null;<br />	private $pageNum=null;<br />	private header=null;<br />	private body=null;<br />	<br />	public function __construct($body)<br />	{<br />		$this->responseBody=$body;<br />	}<br />	//匹配图片src开始的链接地址<br />	public function feedImage()<br />	{<br />		$pat='#<img (.*?)src="(.*?)(?=")#i';<br / alt="自定义HTTP抓包跟过滤" >		if(preg_match_all($pat, $body, $match))<br />		{<br />			foreach($match[2] as $link)<br />			{<br />				$this->imgLink[]=$link;<br />			}<br />		}<br />		else<br />		{<br />			echo '匹配失败图片链接地址失败'."<br />";<br />		}<br />	}<br />	//提取head部分<br />	public function filterHeader($body)<br />	{<br />		$pat='#<head>[\s\S]+</head>#im';<br />		if(preg_match($pat, $body, $match))<br />		{<br />			$this->header=$match[0];<br />		}<br />		else<br />		{<br />			echo '匹配head部分失败'."<br />";<br />		}<br />	}<br />	//提取body部分<br />	public function filterBody($body)<br />	{<br />		$pat='#<body>[\s\S]+</body>#im';<br />		if(preg_match($pat, $body, $match))<br />		{<br />			$this->body=$match[0];<br />		}<br />		else<br />		{<br />			echo '匹配body部分失败'."<br />";<br />		}<br />	}<br />	//提取分页信息,这个只能针对性的匹配,不能通用<br />	public function rollPage($body)<br />	{<br />		$pat='#[\x{4e00}-\x{9fa5}]+\s*\d\s+?/\s+?\d+\s*[\x{4e00}-\x{9fa5}]*#ui';<br />		if(preg_match($pat, $body, $match))<br />		{<br />			$patNum='#/\s*(\d\d*)#';<br />			if(preg_match($patNum, $match[0], $num))<br />			{<br />				$this->pageNum=$num[1];<br />			}<br />			else<br />			{<br />				echo '提取分页具体值失败'."<br />";<br />			}<br />		}<br />		else<br />		{<br />			echo '提取分页统计失败'."<br />";<br />		}<br />	}<br /><br />?><br /><br /><br /><br />附注:  这两个类,,都通过了内网的测试,并成功过滤出图片的链接,但是给http://www.mmkao.com/Beautyleg/发送请求时,却提示超时,,不知道哪里出了问题。。。。。。<br /><br /><br /><br /><br />

------解决思路----------------------
$url = 'http://www.mmkao.com/Beautyleg/';<br />print_r(get_headers($url));
Array<br />(<br />    [0] => HTTP/1.1 200 OK<br />    [1] => Connection: close<br />    [2] => Date: Wed, 05 Nov 2014 08:53:09 GMT<br />    [3] => Content-Length: 13889<br />    [4] => Content-Type: text/html<br />    [5] => Content-Location: http://www.mmkao.com/Beautyleg/index.html<br />    [6] => Last-Modified: Wed, 05 Nov 2014 05:39:09 GMT<br />    [7] => Accept-Ranges: bytes<br />    [8] => ETag: "e8939ad2baf8cf1:693"<br />    [9] => Server: IIS<br />    [10] => X-Powered-By: WAF/2.0<br />    [11] => Set-Cookie: safedog-flow-item=8471BA510DA33350ED344AC374D3044A; expires=Sat, 12-Dec-2150 10:26:25 GMT; domain=mmkao.com; path=/<br />)<br /><br />

성명:
본 글의 내용은 네티즌들의 자발적인 기여로 작성되었으며, 저작권은 원저작자에게 있습니다. 본 사이트는 이에 상응하는 법적 책임을 지지 않습니다. 표절이나 침해가 의심되는 콘텐츠를 발견한 경우 admin@php.cn으로 문의하세요.