Heim  >  Artikel  >  Backend-Entwicklung  >  php 爬虫

php 爬虫

巴扎黑
巴扎黑Original
2016-11-23 14:06:331192Durchsuche

<?php 

header("Content-Type:text/html; charset=gb2312"); 
$url1 = "http://i8i8.cc/"; 
$ch1 = curl_init(); 
curl_setopt($ch1, CURLOPT_URL, $url1 ); 
curl_setopt($ch1, CURLOPT_RETURNTRANSFER, true); 
$output1 = curl_exec($ch1); 
curl_close($ch1); 
$regx1=&#39;/(<a href=")(\/cclass\/((9|10|11|12|13|14|15)_1\.html))(">)(.*)(<\/a>)/&#39;; 
preg_match_all ($regx1,$output1,$result, PREG_SET_ORDER); 

//print_r($result); 
$conn=mysql_connect(localhost,test,111111); 
mysql_select_db("test",$conn); 
mysql_query(&#39;SET NAMES gb2312;&#39;); 
foreach($result as $key =>$value) 
{ 

$url2 = "http://i8i8.cc".$result[$key][2]; 
//print_r($url2); 
$ch2 = curl_init(); 
curl_setopt($ch2, CURLOPT_URL, $url2 ); 
curl_setopt($ch2, CURLOPT_RETURNTRANSFER, true); 
$output2 = curl_exec($ch2); 
//print_r($output2); 
curl_close($ch2); 
$regx2=&#39;/(<li>)(<a href=")(.*\.html)(.*)(<img src=")(.*)(" alt=)(.*)(<span>)(.*)(<\/span>)/&#39;; 
preg_match_all ($regx2,$output2,$movie, PREG_SET_ORDER); 

//print_r($movie); 

foreach($movie as$k =>$v ) 
{ 
$movie[$k][3]=substr($movie[$k][3],2); 
$movie[$k][3]="http://i8i8.cc".$movie[$k][3]; 
//print_r($movie[$k][3]); 

$url3 = $movie[$k][3]; 
$ch3 = curl_init(); 
curl_setopt($ch3, CURLOPT_URL, $url3 ); 
curl_setopt($ch3, CURLOPT_RETURNTRANSFER, true); 
$output3 = curl_exec($ch3); 
//var_dump($output3); 
curl_close($ch3); 

$regx_name=&#39;/(<div class=")(row_right)("><strong>)(.*)<\/strong>/&#39;; 
preg_match_all ($regx_name,$output3,$movie_name, PREG_SET_ORDER); 

$regx_actor=&#39;/(<div class="img">)(.*)(artlist&keyword=)(.*)(>)(.*)(<\/a>)/&#39;; 
preg_match_all ($regx_actor,$output3,$movie_actor, PREG_SET_ORDER); 

$regx_catalog=&#39;/(<div class="row_right"><A href="\/cclass\/(.*).html">)(.*)(<\/A> )/&#39;; 
preg_match_all ($regx_catalog,$output3,$movie_catalog, PREG_SET_ORDER); 

$regx_year=&#39;/<a href="\/year\/(.*).html">(.*)<\/a>/&#39;; 
preg_match_all ($regx_year,$output3,$movie_year, PREG_SET_ORDER); 
//print_r($movie_year);//[2] 

$regx_update=&#39;/(更新时间:<\/div><div class="row_right">)(.*)<\/div>/&#39;; 
preg_match_all ($regx_update,$output3,$movie_update, PREG_SET_ORDER); 
//print_r($movie_update);//[2] 

$regx_desc=&#39;/(主演。)(.*)<\/div>/&#39;; 
preg_match_all ($regx_desc,$output3,$movie_desc, PREG_SET_ORDER); 
//var_dump($movie_desc);//[2] 
} 



//mysql_query("insert into t_catalog values(&#39;{$key}&#39;,&#39;{$result[$key][6]}&#39;);"); 
} 
mysql_close($conn); 
?>


Stellungnahme:
Der Inhalt dieses Artikels wird freiwillig von Internetnutzern beigesteuert und das Urheberrecht liegt beim ursprünglichen Autor. Diese Website übernimmt keine entsprechende rechtliche Verantwortung. Wenn Sie Inhalte finden, bei denen der Verdacht eines Plagiats oder einer Rechtsverletzung besteht, wenden Sie sich bitte an admin@php.cn
Vorheriger Artikel:PHP extract() 函数Nächster Artikel:php类和对象——Traits