这篇文章介绍的内容是关于PHP 爬虫抓取歌词,有着一定的参考价值,现在分享给大家,有需要的朋友可以参考一下
<?php header("Content-type:text/html;charset=utf-8"); class Small_crawler { protected $regular; //获取内容正则 protected $url; //源头 protected $i; //控制数量 protected $count; //总数 protected $lyrics_ze; //歌词内容 protected $lyrics_lrc; //歌词地址匹配 protected $txt; //存储地址 function __construct() { $this->regular = "/<a href=\"\/song\/([0-9])*\".*>*<\/a>/"; $this->lyrics_ze = "/<p id=\"lyricCont\".*<\/p>/"; $this->lyrics_lrc = "/\"http:\/\/.*\"/"; $this->url = 'http://music.baidu.com/'; $this->txt = "playlist.txt"; $this->i = 0; $this->count = 110; } /** * 开始任务 */ function perform() { $data = file_get_contents($this->url); if(preg_match_all($this->regular, $data, $mar)){ $this->lyrics($mar); //歌词筛选 }else{ echo '已没有可用信息'; exit; } } /** * @param $data * 歌词处理 */ function lyrics($data) { if(is_array($data)){ //歌词筛选 foreach ($data[0] as $k=>$y){ $data_s = explode('"',$y); if(!empty($data_s[1])){ $the_lyrics = file_get_contents($this->url.$data_s[1]); if(preg_match_all($this->lyrics_ze, $the_lyrics, $lyrics_x)){ if(preg_match_all($this->lyrics_lrc, $lyrics_x[0][0], $lyrics_c)){ $lyrics_c = explode('"',$lyrics_c[0][0]); $lyrics_file = file_get_contents($lyrics_c[1]); $this->write($lyrics_file); } } } } //循环操作 foreach ($data[0] as $ki=>$yi){ $data_t = explode('"',$yi); if(!empty($data_t[1])){ $the_lyrics_as = file_get_contents($this->url.$data_t[1]); if(preg_match_all($this->regular, $the_lyrics_as, $mars)){ $this->lyrics($mars); //歌词筛选 } } } } } /** * @param $lyrics_file * 写入文件 */ function write($lyrics_file) { if($this->i < $this->count){ $acs = preg_replace('/(\[.*\])/','',$lyrics_file); $myfile = fopen($this->txt, "a"); fwrite($myfile,$acs); fwrite($myfile,"\r\n".$this->i.".-----------------------------------------------------------\r\n"); $this->i++; }else{ exit; //结束停止 } } } //运行 $ts = new Small_crawler(); $ts->perform() ?>
相关推荐:
以上是PHP 爬虫抓取歌词的详细内容。更多信息请关注PHP中文网其他相关文章!