Home  >  Article  >  php教程  >  多线程 QQ 号码爬虫

多线程 QQ 号码爬虫

PHP中文网
PHP中文网Original
2016-05-25 16:58:341386browse

php代码

<?php
/*
Homepage: http://www.php.cn
*/
if(!extension_loaded(&#39;pthreads&#39;)) die (&#39;Please install pthreads&#39;);

include_once(&#39;Snoopy.class.php&#39;);

class CrawlerWorker extends Worker {

	protected  static $dbh;
	public function __construct() {

	}
	public function run(){
	/*
		$dbhost = &#39;db.example.com&#39;;			// 数据库服务器
	    $dbuser = &#39;example.com&#39;;        	// 数据库用户名
        $dbpw = &#39;password&#39;;             	// 数据库密码
		$dbname = &#39;example&#39;;				// 数据库名

		self::$dbh  = new PDO("mysql:host=$dbhost;port=3306;dbname=$dbname", $dbuser, $dbpw, array(
			PDO::MYSQL_ATTR_INIT_COMMAND => &#39;SET NAMES \&#39;UTF8\&#39;&#39;,
			PDO::MYSQL_ATTR_COMPRESS => true,
			PDO::ATTR_PERSISTENT => true
			)
		);
	*/
	}
	protected function getInstance(){
        return self::$dbh;
    }

}

/* the collectable class implements machinery for Pool::collect */
class Crawler extends Stackable {
	public $depth = 3;
	private static $level = 0;
	public function __construct($qq) {
		$this->qq = $qq;
	}
	public function run() {

		try {
			$dbh  = $this->worker->getInstance();
			$this->recursion(array($this->qq));
		}
		catch(PDOException $e) {
			$error = sprintf("%s,%s\n", $mobile, $id );
			file_put_contents("mobile_error.log", $error, FILE_APPEND);
		}
		//printf("runtime: %s, %s\n", date(&#39;Y-m-d H:i:s&#39;), $this->worker->getThreadId());
		//$lst = $this->qzone($this->qq);
		//print_r($lst);
	}
	public function recursion($qqs){
		
		if( self::$level <= $this->depth){
			self::$level++;
		}else if(self::$level > 0){
			self::$level--;
		}
		printf("Level: %s\n", self::$level);
		//sleep(1);
		usleep(mt_rand(10000,1000000));
		if(self::$level >= $this->depth){
			return;
		}
		
		foreach($qqs as $uin) {
			$lst = $this->qzone($uin);
			print_r($lst);
			$this->recursion($lst);
		}
	}

	public function qzone($qq){
		$url = &#39;http://m.qzone.com/mqz_get_visitor?g_tk=1191852101&res_mode=0&res_uin=&#39;.$qq.&#39;&offset=0&count=100&page=1&format=json&t=1401762986882&sid=dODKVcYv6azjN87cxXQ5mao1xgakYjHg18c8aa5e0201%3D%3D&#39;;
		$snoopy = new Snoopy;
		 
		// need an proxy?
		//$snoopy->proxy_host = "my.proxy.host";
		//$snoopy->proxy_port = "8080";
		 
		// set browser and referer:
		$snoopy->agent = "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)";
		$snoopy->referer = "http://m.qzone.com/";
		 
		// set some cookies:
		//$snoopy->cookies["SessionID"] = &#39;238472834723489&#39;;
		//$snoopy->cookies["favoriteColor"] = "blue";
		 
		// set an raw-header:
		$snoopy->rawheaders["Pragma"] = "no-cache";
		 
		// set some internal variables:
		$snoopy->maxredirs = 2;
		$snoopy->offsiteok = false;
		$snoopy->expandlinks = false;
		 
		// set username and password (optional)
		//$snoopy->user = "joe";
		//$snoopy->pass = "bloe";
		 
		// fetch the text of the website www.google.com:
		if($snoopy->fetchtext($url)){ 
			// other methods: fetch, fetchform, fetchlinks, submittext and submitlinks

			// response code:
			//print "response code: ".$snoopy->response_code."<br/>\n";
		 
			// print the headers:
			//print "<b>Headers:</b><br/>";
			//while(list($key,$val) = each($snoopy->headers)){
			//	print $key.": ".$val."<br/>\n";
			//}

			// print the texts of the website:
			//print_r( json_decode($snoopy->results) );
			
			$results = array();
			$tmp = json_decode($snoopy->results);
			
			if($tmp){
				if(property_exists($tmp, &#39;data&#39;)){
					foreach( $tmp->data->list as $lst ){
						$results[] = $lst->uin;
					}
				}
			}
			return ($results);
			
		}
		else {
			print "Snoopy: error while fetching document: ".$snoopy->error."\n";
		}		
	}
}

$pool = new Pool(100, \CrawlerWorker::class, []);

#foreach (range(1000, 100000) as $number) {
#	$pool->submit(new Crawler($number));
#}

$pool->submit(new Crawler(&#39;13721218&#39;));
$pool->submit(new Crawler(&#39;291379&#39;));
//$pool->submit(new Crawler(&#39;xxx&#39;));
//$pool->submit(new Crawler(&#39;xxx&#39;));
//$pool->submit(new Crawler(&#39;xxx&#39;));
// 以此类推
//$pool->submit(new Crawler(&#39;nnn&#39;));

$pool->shutdown();
?>
Statement:
The content of this article is voluntarily contributed by netizens, and the copyright belongs to the original author. This site does not assume corresponding legal responsibility. If you find any content suspected of plagiarism or infringement, please contact admin@php.cn