Heim >Backend-Entwicklung >PHP-Tutorial >百度小偷:去除竞价与广告_PHP教程
<ol class="dp-xml"> <li class="alt"><span><span class="tag"></span><span class="tag-name">php</span><span> </span></span></li> <li><span>if(isset($_GET[wd])){ </span></li> <li class="alt"> <span> $</span><span class="attribute">keywords</span><span> = $_GET[wd]; </span> </li> <li><span>} </span></li> <li class="alt"> <span>$</span><span class="attribute">uri</span><span> = </span><span class="attribute-value">isset</span><span>($_SERVER[HTTP_X_REWRITE_URL]) ? $_SERVER[HTTP_X_REWRITE_URL] : $_SERVER[REQUEST_URI]; </span> </li> <li> <span>$</span><span class="attribute">content</span><span> = </span><span class="attribute-value">file_get_contents</span><span>("http://www.baidu.com".$uri); </span> </li> <li class="alt"><span> </span></li> <li> <span>if($</span><span class="attribute">uri</span><span>==/ || empty($uri)){ </span> </li> <li class="alt"><span> display($content); </span></li> <li><span>} </span></li> <li class="alt"><span> </span></li> <li><span>//删除"来百度推广"链接 </span></li> <li class="alt"> <span>$</span><span class="attribute">start</span><span> = </span><span class="attribute-value">preg_quote</span><span>("</span><span class="tag"><span class="tag-name">div</span><span> </span><span class="attribute">class</span><span>="r ec_bdtg"</span><span class="tag">></span><span>"); </span></span> </li> <li> <span>$</span><span class="attribute">content</span><span> = </span><span class="attribute-value">preg_replace</span><span>("/".$start."((?!</span><span class="tag"><span class="tag-name">br</span><span class="tag">></span><span>).)*</span><span class="tag"><span class="tag-name">br</span><span class="tag">></span><span>"."/is",,$content); </span></span></span> </li> <li class="alt"><span> </span></li> <li><span>//删除左侧竞价 </span></li> <li class="alt"> <span>$</span><span class="attribute">start</span><span> = </span><span class="attribute-value">"<table>).)*>"<span>; </span><li> <span>$</span><span class="attribute">content</span><span> = </span><span class="attribute-value">preg_replace_callback</span><span>("/".$start."((?!</span><span class="tag"><span>/table</span><span class="tag">></span><span>).)*</span><span class="tag"><span>/table</span><span class="tag">></span><span>/is","search_result_filter",$content); </span></span></span> </li> <li class="alt"><span> </span></li> <li> <span>//删除剩下的多个</span><span class="tag"><span class="tag-name">br</span><span class="tag">></span><span> </span></span> </li> <li class="alt"> <span>$</span><span class="attribute">content</span><span> = </span><span class="attribute-value">str_ireplace</span><span>("</span><span class="tag"><span class="tag-name">br</span><span class="tag">></span><span> </span><span class="tag"><span class="tag-name">br</span><span class="tag">></span><span>",,$content); </span></span></span> </li> <li> <span>//删除剩下的多个</span><span class="tag"><span class="tag-name">br</span><span class="tag">></span><span> </span></span> </li> <li class="alt"> <span>$</span><span class="attribute">content</span><span> = </span><span class="attribute-value">str_ireplace</span><span>("</span><span class="tag"><span class="tag-name">br</span><span class="tag">/></span><span> ",,$content); </span></span> </li> <li><span>//删除右侧推广链接 </span></li> <li class="alt"> <span>$</span><span class="attribute">start</span><span> = </span><span class="attribute-value">preg_quote</span><span>("</span><span class="tag"><span class="tag-name">div</span><span> </span><span class="attribute">id</span><span>="ec_im_container"</span><span class="tag">></span><span>"); </span></span> </li> <li> <span>$</span><span class="attribute">content</span><span> = </span><span class="attribute-value">preg_replace</span><span>("/".$start."((?!</span><span class="tag"><span class="tag-name">style</span><span>).)*"."/is","</span><span class="tag"><span class="tag-name">br</span><span> </span><span class="tag">/></span><span class="tag"></span><span class="tag-name">div</span><span class="tag">></span><span>",$content); </span></span></span> </li> <li class="alt"><span> </span></li> <li><span>display($content); </span></li> <li class="alt"><span>/** </span></li> <li><span> * 搜索结果过滤器 </span></li> <li class="alt"><span> */ </span></li> <li><span>function search_result_filter($matches){ </span></li> <li class="alt"><span> //背景#f5f5f5的是广告 </span></li> <li> <span> if(strpos($matches[0],"</span><span class="attribute">bgcolor</span><span>="#f5f5f5"")) return ; </span> </li> <li class="alt"><span> //ID4位数的是广告 </span></li> <li> <span> if(preg_match("/</span><span class="tag"><span class="tag-name">table</span><span>(.*?)</span><span class="attribute">id</span><span>=\"(d{4,})\"(.*?)</span><span class="tag">></span><span>((?!</span><span class="tag"><span>/table</span><span class="tag">></span><span>).)*</span><span class="tag"><span>/table</span><span class="tag">></span><span>/is",$matches[0])){ </span></span></span></span> </li> <li class="alt"><span> return ; </span></li> <li><span> } </span></li> <li class="alt"><span> return $matches[0]; </span></li> <li><span>} </span></li> <li class="alt"><span> </span></li> <li><span>function display($content) { </span></li> <li class="alt"><span> echo $content; </span></li> <li><span>} </span></li> <p> </p> <p align="left"></p> <div style="display:none;"> <span id="url" itemprop="url">http://www.bkjia.com/PHPjc/486023.html</span><span id="indexUrl" itemprop="indexUrl">www.bkjia.com</span><span id="isOriginal" itemprop="isOriginal">true</span><span id="isBasedOnUrl" itemprop="isBasedOnUrl">http://www.bkjia.com/PHPjc/486023.html</span><span id="genre" itemprop="genre">TechArticle</span><span id="description" itemprop="description">功用如题。 重写规则和Wordpress一致。 兼容IIS、Apache、Nginx。 只有一个文件实现所有功能。 ? php if(isset($_GET[wd])){ $ keywords =$_GET[wd]; } $ uri =...</span> </div> <div class="art_confoot"></div> </table></span> </li> </ol>