开源中国个人帐号信息抓取实例,代码供参考,简单改进,可快速制作一个命令行管理自己帐号的工具。 实例使用snoopy,simple_html_dom包,可直接在开源中国进行下载。。。。
- //需要调用到php包,从oschina中检索下载则可
- include "Snoopy.class.php"; // 抓取网页信息, 支持通过http代码方式,
- include "simple_html_dom.php"; //html分析包
- $snoopy = new Snoopy;
- // $snoopy->proxy_host="###";
- // $snoopy->proxy_port="8888";
- // $snoopy->user="***";
- // $snoopy->pass="****";
- $snoopy->agent = "(Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:24.0) Gecko/20131117 Firefox/24.0 PaleMoon/24.1.2)";
- $snoopy->referer = "http://www.oschina.net/";
-
- $snoopy->cookies["oscid"] = '******';
-
-
- $snoopy->fetch("https://www.oschina.net/home/login?goto_page=http://www.oschina.net/");
-
-
- //认证 需要认证才能有权限获取到数据
- $submit_url = "http://www.oschina.net/action/user/hash_login";
- $submit_vars["email"] = "*****";
- $submit_vars["pwd"] = SHA1("*******");
- $submit_vars["save_login"] = 1;
- $submit_vars["submit"] = "现在登录";
- $snoopy->submit($submit_url,$submit_vars);
- echo "认证情况n";
-
- // print $snoopy->results;
-
-
- //认证通过后,开始获取数据
- $snoopy->fetch("http://my.oschina.net/******/admin/inbox");
- echo "开源中国个人帐号信息测试程序n";
- echo "时间:".date("Y-m-d h:i:s")."n";
- // echo iconv("UTF-8","GBK//IGNORE",$snoopy->results);
- $result=$snoopy->results;
-
- // print $result;
-
- //simple_html_dom解释包使用实例
- $SHTML=new simple_html_dom();
- $html = str_get_html($result);
- $messS=$html->find('ul.Msgs li[id]');
- foreach($messS as $mess){
- $aT=$SHTML->load($mess);
- echo "===================n";
- echo $aT."n";
- echo "-----------n下面是上面html信息的分析提取处理:n";
- echo '信息id: '.$aT->find('li[id]',0)->id.''."n";
- echo '信息: '.trim($aT->find('div.msg',0)->plaintext)."n";
- echo '日期: '.trim($aT->find('div.bottom span.date',0)->plaintext).' '."n";
- print("===================n");
- }
-
复制代码
|