Maison >développement back-end >tutoriel php >PHP多线程批量采集下载美女图片的实现代码_PHP教程
使用curl的多线程,另外curl可以设置请求时间,遇到很慢的url资源,可以果断的放弃,这样没有阻塞,另外有多线程请求,效率应该比较高
下面是代码实现:
<ol class="dp-c"><li class="alt"><span><span class="comment">/** </span> </span></li><li><span><span class="comment"> * curl 多线程 </span> </span></li><li class="alt"><span><span class="comment"> * @author http://www.lai18.com</span> </span></li><li><span><span class="comment"> * @param array $array 并行网址 </span> </span></li><li class="alt"><span><span class="comment"> * @param int $timeout 超时时间</span> </span></li><li><span><span class="comment"> * @return mix </span> </span></li><li class="alt"><span><span class="comment"> */</span><span> </span></span></li><li><span> <span class="keyword">public</span><span> </span><span class="keyword">function</span><span> Curl_http(</span><span class="vars">$array</span><span>,</span><span class="vars">$timeout</span><span>=</span><span class="string">'15'</span><span>){ </span></span></li><li class="alt"><span> <span class="vars">$res</span><span> = </span><span class="keyword">array</span><span>(); </span></span></li><li><span> </span></li><li class="alt"><span> <span class="vars">$mh</span><span> = curl_multi_init();</span><span class="comment">//创建多个curl语柄</span><span> </span></span></li><li><span> </span></li><li class="alt"><span> <span class="keyword">foreach</span><span>(</span><span class="vars">$array</span><span> </span><span class="keyword">as</span><span> </span><span class="vars">$k</span><span>=></span><span class="vars">$url</span><span>){ </span></span></li><li><span> <span class="vars">$conn</span><span>[</span><span class="vars">$k</span><span>]=curl_init(</span><span class="vars">$url</span><span>);</span><span class="comment">//初始化</span><span> </span></span></li><li class="alt"><span> </span></li><li><span> curl_setopt(<span class="vars">$conn</span><span>[</span><span class="vars">$k</span><span>], CURLOPT_TIMEOUT, </span><span class="vars">$timeout</span><span>);</span><span class="comment">//设置超时时间</span><span> </span></span></li><li class="alt"><span> curl_setopt(<span class="vars">$conn</span><span>[</span><span class="vars">$k</span><span>], CURLOPT_USERAGENT, </span><span class="string">'Mozilla/5.0 (compatible; MSIE 5.01; Windows NT 5.0)'</span><span>); </span></span></li><li><span> curl_setopt(<span class="vars">$conn</span><span>[</span><span class="vars">$k</span><span>], CURLOPT_MAXREDIRS, 7);</span><span class="comment">//HTTp定向级别 ,7最高</span><span> </span></span></li><li class="alt"><span> curl_setopt(<span class="vars">$conn</span><span>[</span><span class="vars">$k</span><span>], CURLOPT_HEADER, false);</span><span class="comment">//这里不要header,加块效率</span><span> </span></span></li><li><span> curl_setopt(<span class="vars">$conn</span><span>[</span><span class="vars">$k</span><span>], CURLOPT_FOLLOWLOCATION, 1); </span><span class="comment">// 302 redirect</span><span> </span></span></li><li class="alt"><span> curl_setopt(<span class="vars">$conn</span><span>[</span><span class="vars">$k</span><span>], CURLOPT_RETURNTRANSFER,1);</span><span class="comment">//要求结果为字符串且输出到屏幕上 </span><span> </span></span></li><li><span> curl_setopt(<span class="vars">$conn</span><span>[</span><span class="vars">$k</span><span>], CURLOPT_HTTPGET, true); </span></span></li><li class="alt"><span> </span></li><li><span> curl_multi_add_handle (<span class="vars">$mh</span><span>,</span><span class="vars">$conn</span><span>[</span><span class="vars">$k</span><span>]); </span></span></li><li class="alt"><span> } </span></li><li><span> <span class="comment">//防止死循环耗死cpu 这段是根据网上的写法</span><span> </span></span></li><li class="alt"><span> <span class="keyword">do</span><span> { </span></span></li><li><span> <span class="vars">$mrc</span><span> = curl_multi_exec(</span><span class="vars">$mh</span><span>,</span><span class="vars">$active</span><span>);</span><span class="comment">//当无数据,active=true</span><span> </span></span></li><li class="alt"><span> } <span class="keyword">while</span><span> (</span><span class="vars">$mrc</span><span> == CURLM_CALL_MULTI_PERFORM);</span><span class="comment">//当正在接受数据时</span><span> </span></span></li><li><span> <span class="keyword">while</span><span> (</span><span class="vars">$active</span><span> </span><span class="keyword">and</span><span> </span><span class="vars">$mrc</span><span> == CURLM_OK) {</span><span class="comment">//当无数据时或请求暂停时,active=true</span><span> </span></span></li><li class="alt"><span> <span class="keyword">if</span><span> (curl_multi_select(</span><span class="vars">$mh</span><span>) != -1) { </span></span></li><li><span> <span class="keyword">do</span><span> { </span></span></li><li class="alt"><span> <span class="vars">$mrc</span><span> = curl_multi_exec(</span><span class="vars">$mh</span><span>, </span><span class="vars">$active</span><span>); </span></span></li><li><span> } <span class="keyword">while</span><span> (</span><span class="vars">$mrc</span><span> == CURLM_CALL_MULTI_PERFORM); </span></span></li><li class="alt"><span> } </span></li><li><span> } </span></li><li class="alt"><span> </span></li><li><span> <span class="keyword">foreach</span><span> (</span><span class="vars">$array</span><span> </span><span class="keyword">as</span><span> </span><span class="vars">$k</span><span> => </span><span class="vars">$url</span><span>) { </span></span></li><li class="alt"><span> <span class="keyword">if</span><span>(!curl_errno(</span><span class="vars">$conn</span><span>[</span><span class="vars">$k</span><span>])){ </span></span></li><li><span> <span class="vars">$data</span><span>[</span><span class="vars">$k</span><span>]=curl_multi_getcontent(</span><span class="vars">$conn</span><span>[</span><span class="vars">$k</span><span>]);</span><span class="comment">//数据转换为array</span><span> </span></span></li><li class="alt"><span> <span class="vars">$header</span><span>[</span><span class="vars">$k</span><span>]=curl_getinfo(</span><span class="vars">$conn</span><span>[</span><span class="vars">$k</span><span>]);</span><span class="comment">//返回http头信息</span><span> </span></span></li><li><span> curl_close(<span class="vars">$conn</span><span>[</span><span class="vars">$k</span><span>]);</span><span class="comment">//关闭语柄</span><span> </span></span></li><li class="alt"><span> curl_multi_remove_handle(<span class="vars">$mh</span><span> , </span><span class="vars">$conn</span><span>[</span><span class="vars">$k</span><span>]); </span><span class="comment">//释放资源 </span><span> </span></span></li><li><span> }<span class="keyword">else</span><span>{ </span></span></li><li class="alt"><span> unset(<span class="vars">$k</span><span>,</span><span class="vars">$url</span><span>); </span></span></li><li><span> } </span></li><li class="alt"><span> } </span></li><li><span> </span></li><li class="alt"><span> curl_multi_close(<span class="vars">$mh</span><span>); </span></span></li><li><span> </span></li><li class="alt"><span> <span class="keyword">return</span><span> </span><span class="vars">$data</span><span>; </span></span></li><li><span> </span></li><li class="alt"><span> } </span></li><li><span> </span></li><li class="alt"><span><span class="comment">//参数接收</span><span> </span></span></li><li><span><span class="vars">$callback</span><span> = </span><span class="vars">$_GET</span><span>[</span><span class="string">'callback'</span><span>]; </span></span></li><li class="alt"><span><span class="vars">$hrefs</span><span> = </span><span class="vars">$_GET</span><span>[</span><span class="string">'hrefs'</span><span>]; </span></span></li><li><span><span class="vars">$urlarray</span><span> = </span><span class="func">explode</span><span>(</span><span class="string">','</span><span>,trim(</span><span class="vars">$hrefs</span><span>,</span><span class="string">','</span><span>)); </span></span></li><li class="alt"><span><span class="vars">$date</span><span> = </span><span class="func">date</span><span>(</span><span class="string">'Ymd'</span><span>,time()); </span></span></li><li><span><span class="comment">//实例化</span><span> </span></span></li><li class="alt"><span><span class="vars">$img</span><span> = </span><span class="keyword">new</span><span> HttpImg(); </span></span></li><li><span><span class="vars">$stime</span><span> = </span><span class="vars">$img</span><span>->getMicrotime();</span><span class="comment">//开始时间</span><span> </span></span></li><li class="alt"><span> </span></li><li><span><span class="vars">$data</span><span> = </span><span class="vars">$img</span><span>->Curl_http(</span><span class="vars">$urlarray</span><span>,</span><span class="string">'20'</span><span>);</span><span class="comment">//列表数据</span><span> </span></span></li><li class="alt"><span><span class="func">mkdir</span><span>(</span><span class="string">'./img/'</span><span>.</span><span class="vars">$date</span><span>,0777); </span></span></li><li><span><span class="keyword">foreach</span><span> ((</span><span class="keyword">array</span><span>)</span><span class="vars">$data</span><span> </span><span class="keyword">as</span><span> </span><span class="vars">$k</span><span>=></span><span class="vars">$v</span><span>){ </span></span></li><li class="alt"><span> preg_match_all(<span class="string">"/(href|src)=(["</span><span>|</span><span class="string">']?)([^ "'</span><span>>]+.(jpg|png|PNG|JPG|gif))\2/i", </span><span class="vars">$v</span><span>, </span><span class="vars">$matches</span><span>[</span><span class="vars">$k</span><span>]); </span></span></li><li><span> </span></li><li class="alt"><span> <span class="keyword">if</span><span>(</span><span class="func">count</span><span>(</span><span class="vars">$matches</span><span>[</span><span class="vars">$k</span><span>][3])>0){ </span></span></li><li><span> <span class="vars">$dataimg</span><span> = </span><span class="vars">$img</span><span>->Curl_http(</span><span class="vars">$matches</span><span>[</span><span class="vars">$k</span><span>][3],</span><span class="string">'20'</span><span>);</span><span class="comment">//全部图片数据二进制</span><span> </span></span></li><li class="alt"><span> <span class="vars">$j</span><span> = 0; </span></span></li><li><span> <span class="keyword">foreach</span><span> ((</span><span class="keyword">array</span><span>)</span><span class="vars">$dataimg</span><span> </span><span class="keyword">as</span><span> </span><span class="vars">$kk</span><span>=></span><span class="vars">$vv</span><span>){ </span></span></li><li class="alt"><span> <span class="keyword">if</span><span>(</span><span class="vars">$vv</span><span> !=</span><span class="string">''</span><span>){ </span></span></li><li><span> <span class="vars">$rand</span><span> = rand(1000,9999); </span></span></li><li class="alt"><span> <span class="vars">$basename</span><span> = time().</span><span class="string">"_"</span><span>.</span><span class="vars">$rand</span><span>.</span><span class="string">"."</span><span>.jpg;</span><span class="comment">//保存为jpg格式的文件</span><span> </span></span></li><li><span> <span class="vars">$fname</span><span> = </span><span class="string">'./img/'</span><span>.</span><span class="vars">$date</span><span>.</span><span class="string">"/"</span><span>.</span><span class="string">"$basename"</span><span>; </span></span></li><li class="alt"><span> <span class="func">file_put_contents</span><span>(</span><span class="vars">$fname</span><span>, </span><span class="vars">$vv</span><span>); </span></span></li><li><span> <span class="vars">$j</span><span>++; </span></span></li><li class="alt"><span> <span class="func">echo</span><span> </span><span class="string">"创建第"</span><span>.</span><span class="vars">$j</span><span>.</span><span class="string">"张图片"</span><span>.</span><span class="string">"$fname"</span><span>.</span><span class="string">"<br/>"</span><span>; </span></span></li><li><span> }<span class="keyword">else</span><span>{ </span></span></li><li class="alt"><span> unset(<span class="vars">$kk</span><span>,</span><span class="vars">$vv</span><span>); </span></span></li><li><span> } </span></li><li class="alt"><span> } </span></li><li><span> }<span class="keyword">else</span><span>{ </span></span></li><li class="alt"><span> unset(<span class="vars">$matches</span><span>); </span></span></li><li><span> } </span></li><li class="alt"><span>} </span></li><li><span><span class="vars">$etime</span><span> = </span><span class="vars">$img</span><span>->getMicrotime();</span><span class="comment">//结束时间</span><span> </span></span></li><li class="alt"><span><span class="func">echo</span><span> </span><span class="string">"用时"</span><span>.(</span><span class="vars">$etime</span><span>-</span><span class="vars">$stime</span><span>).</span><span class="string">"秒"</span><span>; </span></span></li><li><span><span class="func">exit</span><span>; </span></span></li></ol>