Heim  >  Artikel  >  Backend-Entwicklung  >  PHP-Crawler

PHP-Crawler

巴扎黑
巴扎黑Original
2016-11-23 14:06:331225Durchsuche

<?php 

header("Content-Type:text/html; charset=gb2312"); 
$url1 = "http://i8i8.cc/"; 
$ch1 = curl_init(); 
curl_setopt($ch1, CURLOPT_URL, $url1 ); 
curl_setopt($ch1, CURLOPT_RETURNTRANSFER, true); 
$output1 = curl_exec($ch1); 
curl_close($ch1); 
$regx1=&#39;/(<a href=")(\/cclass\/((9|10|11|12|13|14|15)_1\.html))(">)(.*)(<\/a>)/&#39;; 
preg_match_all ($regx1,$output1,$result, PREG_SET_ORDER); 

//print_r($result); 
$conn=mysql_connect(localhost,test,111111); 
mysql_select_db("test",$conn); 
mysql_query(&#39;SET NAMES gb2312;&#39;); 
foreach($result as $key =>$value) 
{ 

$url2 = "http://i8i8.cc".$result[$key][2]; 
//print_r($url2); 
$ch2 = curl_init(); 
curl_setopt($ch2, CURLOPT_URL, $url2 ); 
curl_setopt($ch2, CURLOPT_RETURNTRANSFER, true); 
$output2 = curl_exec($ch2); 
//print_r($output2); 
curl_close($ch2); 
$regx2=&#39;/(<li>)(<a href=")(.*\.html)(.*)(<img src=")(.*)(" alt=)(.*)(<span>)(.*)(<\/span>)/&#39;; 
preg_match_all ($regx2,$output2,$movie, PREG_SET_ORDER); 

//print_r($movie); 

foreach($movie as$k =>$v ) 
{ 
$movie[$k][3]=substr($movie[$k][3],2); 
$movie[$k][3]="http://i8i8.cc".$movie[$k][3]; 
//print_r($movie[$k][3]); 

$url3 = $movie[$k][3]; 
$ch3 = curl_init(); 
curl_setopt($ch3, CURLOPT_URL, $url3 ); 
curl_setopt($ch3, CURLOPT_RETURNTRANSFER, true); 
$output3 = curl_exec($ch3); 
//var_dump($output3); 
curl_close($ch3); 

$regx_name=&#39;/(<div class=")(row_right)("><strong>)(.*)<\/strong>/&#39;; 
preg_match_all ($regx_name,$output3,$movie_name, PREG_SET_ORDER); 

$regx_actor=&#39;/(<div class="img">)(.*)(artlist&keyword=)(.*)(>)(.*)(<\/a>)/&#39;; 
preg_match_all ($regx_actor,$output3,$movie_actor, PREG_SET_ORDER); 

$regx_catalog=&#39;/(<div class="row_right"><A href="\/cclass\/(.*).html">)(.*)(<\/A> )/&#39;; 
preg_match_all ($regx_catalog,$output3,$movie_catalog, PREG_SET_ORDER); 

$regx_year=&#39;/<a href="\/year\/(.*).html">(.*)<\/a>/&#39;; 
preg_match_all ($regx_year,$output3,$movie_year, PREG_SET_ORDER); 
//print_r($movie_year);//[2] 

$regx_update=&#39;/(更新时间:<\/div><div class="row_right">)(.*)<\/div>/&#39;; 
preg_match_all ($regx_update,$output3,$movie_update, PREG_SET_ORDER); 
//print_r($movie_update);//[2] 

$regx_desc=&#39;/(主演。)(.*)<\/div>/&#39;; 
preg_match_all ($regx_desc,$output3,$movie_desc, PREG_SET_ORDER); 
//var_dump($movie_desc);//[2] 
} 



//mysql_query("insert into t_catalog values(&#39;{$key}&#39;,&#39;{$result[$key][6]}&#39;);"); 
} 
mysql_close($conn); 
?>


Stellungnahme:
Der Inhalt dieses Artikels wird freiwillig von Internetnutzern beigesteuert und das Urheberrecht liegt beim ursprünglichen Autor. Diese Website übernimmt keine entsprechende rechtliche Verantwortung. Wenn Sie Inhalte finden, bei denen der Verdacht eines Plagiats oder einer Rechtsverletzung besteht, wenden Sie sich bitte an admin@php.cn
Vorheriger Artikel:PHP-Extract()-FunktionNächster Artikel:PHP-Extract()-Funktion