抓取网页的内容 fetch
抓取网页的文本内容 (去除HTML标签) fetchtext
抓取网页的链接,表单 fetchlinks fetchform
支持代理主机
支持基本的用户名/密码验证
支持设置 user_agent, referer(来路), cookies 和 header content(头文件)
支持浏览器重定向,并能控制重定向深度
能把网页中的链接扩展成高质量的url(默认)
提交数据并且获取返回值
支持跟踪HTML框架
支持重定向的时候传递cookies
include "Snoopy.class.php"; $snoopy = new Snoopy; $snoopy->proxy_host = "http://www.9it.me"; $snoopy->proxy_port = "80"; $snoopy->agent = "(compatible; MSIE 4.01; MSN 2.5; AOL 4.0; Windows 98)"; $snoopy->referer = "http://www.9it.me"; $snoopy->cookies["SessionID"] = 238472834723489l; $snoopy->cookies["favoriteColor"] = "RED"; $snoopy->rawheaders["Pragma"] = "no-cache"; $snoopy->maxredirs = 2; $snoopy->offsiteok = false; $snoopy->expandlinks = false; $snoopy->user = "joe"; $snoopy->pass = "bloe"; if($snoopy->fetchtext("http://www.9it.me")) { echo "<pre class="brush:php;toolbar:false">".htmlspecialchars($snoopy->results)."\n"; } else echo "error fetching document: ".$snoopy->error."\n";
<?php $url = "http://www.9it.me"; include("snoopy.php"); $snoopy = new Snoopy; $snoopy->fetch($url); //获取所有内容 echo $snoopy->results; //显示结果 //可选以下 $snoopy->fetchtext //获取文本内容(去掉html代码) $snoopy->fetchlinks //获取链接 $snoopy->fetchform //获取表单 ?>
<?php $formvars["username"] = "admin"; $formvars["pwd"] = "admin"; $action = "http://www.9it.me";//</a>表单提交地址 $snoopy->submit($action,$formvars);//$formvars为提交的数组 echo $snoopy->results; //获取表单提交后的 返回的结果 //可选以下 $snoopy->submittext; //提交后只返回 去除html的 文本 $snoopy->submitlinks;//提交后只返回 链接 ?>
<?php $formvars["username"] = "lanfengye"; $formvars["pwd"] = "lanfengye"; $action = "http://www.9it.me"; include "snoopy.php"; $snoopy = new Snoopy; $snoopy->cookies["PHPSESSID"] = 'fc106b1918bd522cc863f36890e6fff7'; //伪装sessionid $snoopy->agent = "(compatible; MSIE 4.01; MSN 2.5; AOL 4.0; Windows 98)"; //伪装浏览器 $snoopy->referer = "http://www.9it.me"; //伪装来源页地址 http_referer $snoopy->rawheaders["Pragma"] = "no-cache"; //cache 的http头信息 $snoopy->rawheaders["X_FORWARDED_FOR"] = "127.0.0.101"; //伪装ip $snoopy->submit($action,$formvars); echo $snoopy->results; ?>
<?php $snoopy->proxy_host = "http://www.9it.me"; $snoopy->proxy_port = "8080"; //使用代理 $snoopy->maxredirs = 2; //重定向次数 $snoopy->expandlinks = true; //是否补全链接 在采集的时候经常用到 // 例如链接为 /images/taoav.gif 可改为它的全链接 <a href="http://www.9it.me/images/taoav.gif">http://www.9it.me/images/taoav.gif</a> $snoopy->maxframes = 5 //允许的最大框架数 //注意抓取框架的时候 $snoopy->results 返回的是一个数组 $snoopy->error //返回报错信息 ?>
以上就介绍了snoopy(强大的PHP采集类) 详细介绍,包括了方面的内容,希望对PHP教程有兴趣的朋友有所帮助。