Maison > Article > développement back-end > 用PHP抓取百度贴吧邮箱数据,php抓取贴吧邮箱_PHP教程
注:本程序可能非常适合那些做百度贴吧营销的朋友。
去逛百度贴吧的时候,经常会看到楼主分享一些资源,要求留下邮箱,楼主才给发。
对于一个热门的帖子,留下的邮箱数量是非常多的,楼主需要一个一个的去复制那些回复的邮箱,然后再粘贴发送邮件,不是被折磨死就是被累死。无聊至极写了一个抓取百度贴吧邮箱数据的程序,需要的拿走。
程序实现了一键抓取帖子全部邮箱和分页抓取邮箱两个功能,界面懒得做了,效果如下:
老规矩,直接贴源码
<?<span>php </span><span>$url2</span>=""<span>; </span><span>$page</span>=""<span>; </span><span>if</span>(<span>$_GET</span>['url2']==""<span>){ </span><span>$url2</span>="http://tieba.baidu.com/p/2314539885?pn=1"<span>; }</span><span>else</span><span>{ </span><span>$url2</span>=<span>$_GET</span>['url2'<span>]; } </span><span>if</span>(<span>$_GET</span>['page']==""<span>){ </span><span>$page</span>="1"<span>; }</span><span>else</span><span>{ </span><span>$page</span>=<span>$_GET</span>['page'<span>]; } </span>?> <form action="" method="get"> <input type="hidden" value="getAll" name="type" /> <table> <tr> <td>帖子链接:</td><td><input type="text" name="url" value="http://tieba.baidu.com/p/2314539885">$page</span>;?>" /></td> </tr> <tr> <td colspan=2><input type="submit" value="抓取全部邮箱数据" /></td> </tr> </table> </form> <form action="" method="get"> <input type="hidden" value="getNow" name="type" /> <table> <tr> <td>帖子链接:</td><td><input type="text" name="url2" value="<?php echo <span>$url2</span>;?>">php </span><span>if</span>(<span>$_GET</span>['type']!=""<span>){ </span><span>$counts</span>=0<span>; </span><span>if</span>(<span>$_GET</span>['type']=="getAll"<span>){ </span><span>$pages</span>=<span>$_GET</span>['page'<span>]; </span><span>$url</span> = <span>$_GET</span>['url'<span>]; </span><span>for</span>(<span>$i</span>=0;<span>$i</span><<span>$pages</span>;<span>$i</span>++<span>){ </span><span>$ch2</span> =<span> curl_init(); curl_setopt(</span><span>$ch2</span>, CURLOPT_URL, <span>$url</span><span>); curl_setopt(</span><span>$ch2</span>, CURLOPT_FOLLOWLOCATION, <span>TRUE</span><span>); curl_setopt(</span><span>$ch2</span>, CURLOPT_SSL_VERIFYHOST, <span>FALSE</span><span>); curl_setopt(</span><span>$ch2</span>, CURLOPT_SSL_VERIFYPEER, <span>false</span><span>); curl_setopt(</span><span>$ch2</span>, CURLOPT_RETURNTRANSFER, <span>TRUE</span><span>); </span><span>$texts</span> = curl_exec(<span>$ch2</span><span>); curl_close(</span><span>$ch2</span><span>); </span><span>$dat</span>=getEmail(<span>$texts</span><span>); </span><span>for</span>(<span>$j</span>=0;<span>$j</span><<span>count</span>(<span>$dat</span>);<span>$j</span>++<span>){ </span><span>echo</span> <span>$dat</span>[<span>$j</span>]."<br />"<span>; </span><span>$counts</span>++<span>; } } }</span><span>else</span> <span>if</span>(<span>$_GET</span>['type']=="getNow"<span>){ </span><span>$url</span> = <span>$_GET</span>['url2'<span>]; </span><span>$ch2</span> =<span> curl_init(); curl_setopt(</span><span>$ch2</span>, CURLOPT_URL, <span>$url</span><span>); curl_setopt(</span><span>$ch2</span>, CURLOPT_FOLLOWLOCATION, <span>TRUE</span><span>); curl_setopt(</span><span>$ch2</span>, CURLOPT_SSL_VERIFYHOST, <span>FALSE</span><span>); curl_setopt(</span><span>$ch2</span>, CURLOPT_SSL_VERIFYPEER, <span>false</span><span>); curl_setopt(</span><span>$ch2</span>, CURLOPT_RETURNTRANSFER, <span>TRUE</span><span>); </span><span>$texts</span> = curl_exec(<span>$ch2</span><span>); curl_close(</span><span>$ch2</span><span>); </span><span>$dat</span>=getEmail(<span>$texts</span><span>); </span><span>for</span>(<span>$i</span>=0;<span>$i</span><<span>count</span>(<span>$dat</span>);<span>$i</span>++<span>){ </span><span>echo</span> <span>$dat</span>[<span>$i</span>]."<br />"<span>; </span><span>$counts</span>++<span>; } } </span><span>echo</span> '<h2>共采集到数据:'.<span>$counts</span>.'条</h2>'<span>; } </span><span>function</span> getEmail(<span>$str</span><span>){ </span><span>$pattern</span> = "/([a-z0-9\-_\.]+@[a-z0-9]+\.[a-z0-9\-_\.]+)/"<span>; </span><span>preg_match_all</span>(<span>$pattern</span>,<span>$str</span>,<span>$emailArr</span><span>); </span><span>return</span> <span>$emailArr</span>[0<span>]; } </span>?>