复制代码 代码如下:
/**
* HOST: components.arrow.com
*/
//set_time_limit(0);
// base function
function curl_get($url, $data = array(), $header = array(), $timeout = 15, $port = 80, $reffer = '', $proxy = '')
{
$ch = curl_init();
if (!empty($data)) {
$data = is_array($data)?http_build_query($data): $data;
$url .= (strpos($url,'?')? '&': "?") . $data;
}
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, $timeout);
curl_setopt($ch, CURLOPT_POST, 0);
curl_setopt($ch, CURLOPT_PORT, $port);
curl_setopt($ch, CURLOPT_HTTPHEADER, $header);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1); //是否抓取跳转后的页面
$reffer && curl_setopt($ch, CURLOPT_REFERER, $reffer);
if($proxy) {
curl_setopt($ch, CURLOPT_PROXY, $proxy);
curl_setopt($ch, CURLOPT_PROXYPORT, 1723);
curl_setopt($ch, CURLOPT_PROXYUSERPWD,"andhm001:andhm123");
}
$result = array();
$result['result'] = curl_exec($ch);
if (0 != curl_errno($ch)) {
$result['error'] = "Error:\n" . curl_error($ch);
}
curl_close($ch);
return $result;
}
复制代码 代码如下:
function curl_post($url, $data = array(), $header = array(), $timeout = 15, $port = 80)
{
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, $timeout);
curl_setopt($ch, CURLOPT_PORT, $port);
!empty ($header) && curl_setopt($ch, CURLOPT_HTTPHEADER, $header);
curl_setopt($ch, CURLOPT_POST, 1);
curl_setopt($ch, CURLOPT_POSTFIELDS, $data);
$result = array();
$result['result'] = curl_exec($ch);
if (0 != curl_errno($ch)) {
$result['error'] = "Error:\n" . curl_error($ch);
}
curl_close($ch);
return $result;
}
/**
* 获取列表页的html源码
* @param string $keywords 搜索关键字
* @param int $start 开始记录数
* @return boolean|array
*/
function getListHtml($keywords, $start = 0)
{
if ($start {
return false;
}
$postData = array(
'search_token' => $keywords,
'start' => $start,
'limit' => 100,
);
$result = curl_post('http://components.arrow.com/part/search/' . $keywords, http_build_query($postData));
if ( isset($result['error']) )
{
return false;
//exit($result['error']);
}
$result = $result['result'];
return $result;
}
/**
* 获取列表页 连接href
* @param string $html html源码
* @return array
*/
function getListHref($html)
{
$pattern = '/
if (preg_match_all($pattern, $html, $matches))
{
return $matches[1];
} else {
// 没有匹配项
return array();
}
}
/**
* 获取下一页数字start
* @param string $html html源码
* @return number
*/
function getListNextPage($html)
{
$pattern = '/<script>buildPagination\(\'\d+\',\'\d+\',\'(\d+)\',\d+\);<\/script>/isU';<BR> if (preg_match($pattern, $html, $matches))<BR> {<BR> return intval($matches[1]);<BR> } else {<BR> return -1;<BR> }<BR> }<br><br>/**<BR> * 获取列表也所有的详细列表<BR> * @param string $keywords 搜索关键字<BR> * @return boolean|array<BR> */<BR> function getListHrefAll($keywords)<BR> {<BR> if (empty($keywords))<BR> {<BR> return false;<BR> }<br><br>$html = getListHtml($keywords);<BR> $hrefList = getListHref($html);<BR> if (empty($hrefList))<BR> {<BR> // 没有结果<BR> return array();<BR> }<BR> $nextPage = getListNextPage($html);<BR> $loop =0;<BR> while ($nextPage > 0)<BR> {<BR> $html = getListHtml($keywords, $nextPage);<BR> $tmpHrefList = getListHref($html);<BR> $hrefList = array_merge($hrefList, $tmpHrefList);<BR> $nextPage = getListNextPage($html);<BR> $loop ++;<BR> }<BR> return $hrefList;<BR> }<br><br>/**<BR> * 获取详情页信息<BR> * @param string $url url地址<BR> * @return array()<BR> */<BR> function getDetail($url)<BR> {<BR> if ( empty($url) )<BR> {<BR> return false;<BR> }<BR> $host = 'http://components.arrow.com';<br><br>$url = $host . $url;<BR> $result = curl_get($url);<BR> if ( isset($result['error']) )<BR> {<BR> return array();<BR> //exit($result['error']);<BR> }<BR> $html = $result['result'];<br><br>$result = array(<BR> 'sup_part' => '', // 供应商型<BR> 'sup_id' => '', // 供应商ID<BR> 'mfg_part' => '', // 制造商型号<BR> 'mfg_name' => '', // 制造商名称<BR> 'cat_name' => '', // 分类名称<BR> 'para' => '', // 属性<BR> 'desc' => '', // 描述<BR> 'pdf_url' => '', // PDF地址<BR> 'sup_stock' => '', // 库存<BR> 'min_purch' => '', // 最小订购量<BR> 'price' => '', // 价格<BR> 'img_url' => '', // 图片地址<BR> 'createtime' => '', // 创建时间<BR> 'datacode' => '', // 批号<BR> 'package' => '', // 封装<BR> 'page_url' => '', // 页面地址<BR> );<br><br>// mfg_part<BR> $pattern = '/<li>[\s\n]*<strong>Part No:\s*<\/strong>(.+)<\/li>/isU';<BR> if (preg_match($pattern, $html, $matches))<BR> {<BR> $result['mfg_part'] = trim($matches[1]);<BR> } else {file_put_contents('page.txt', $html);die('xxx');<BR> return array();<BR> }<br><br>// mfg_name<BR> $pattern = '/<li>[\s\n]*<strong>Manufacturer: <\/strong>(.+)<\/li>/isU';<BR> if (preg_match($pattern, $html, $matches))<BR> {<BR> $result['mfg_name'] = trim($matches[1]);<BR> }<br><br>// cat_name<BR> $pattern = '/displayCategory\(\'(.[^\']+)\'\);/isU';<BR> if (preg_match($pattern, $html, $matches))<BR> {<BR> $result['cat_name'] = trim($matches[1]);<BR> $result['cat_name'] = str_replace('|', '>', $result['cat_name']);<BR> }<br><br>// para<BR> $tablepattern = '/<table\s+id="part_specs".[^>]*>(.+)<\/table>/isU';<BR> if (preg_match($tablepattern, $html, $matches))<BR> {<BR> $pattern = '/<tr>[\s\n]*<td><strong>(.+)<\/strong><\/td><td>(.+)<\/td>[\s\n]*<\/tr>/isU';<BR> if (preg_match_all($pattern, $matches[1], $matches))<BR> {<BR> foreach($matches[1] as $k=>$v)<BR> {<BR> $v = trim($v);<BR> if ('Package Type' == $v)<BR> {<BR> $result['package'] = trim($matches[2][$k]);<BR> continue;<BR> }<BR> $result['para'][$v] = trim($matches[2][$k]);<BR> }<BR> }<BR> }<br><br>// desc<BR> $pattern = '/<div\s+id="part_title">.+<h4>(.+)<\/h4>[\s\n]*<\/div>/isU';<BR> if (preg_match($pattern, $html, $matches))<BR> {<BR> $result['desc'] = trim($matches[1]);<BR> }<br><br>// pdf_url<BR> $pattern = '/<li\s+class="datasheet">[\s\n]*<strong>Datasheet:<\/strong><a\s+href="(.[^"]+)"/isU';<BR> if (preg_match($pattern, $html, $matches))<BR> {<BR> $result['pdf_url'] = $host . trim($matches[1]);<BR> }<br><br>// sup_stock<BR> $pattern = '/<td\s+id="inv_1"\s+class="li_inv">([\d,]+)<\/td>/isU';<BR> if (preg_match($pattern, $html, $matches))<BR> {<BR> $result['sup_stock'] = trim($matches[1]);<BR> $result['sup_stock'] = str_replace(',', '', $result['sup_stock']);<BR> }<br><br>// min_purch<BR> $pattern = '/<span\s+id="multiples">[\s\n]*<strong>Multiple:\s*<\/strong>(.+)<\/span>/isU';<BR> if (preg_match($pattern, $html, $matches))<BR> {<BR> $result['min_purch'] = trim($matches[1]);<BR> }<br><br>// price<BR> $pattern = '/<div\s+id="price_1"\s+class="li_price">(.[^<]+)<\/div>/isU';<BR> if (preg_match($pattern, $html, $matches))<BR> {<BR> $result['price'][1] = trim($matches[1]);<BR> }<BR> $pattern = '/<div\s+id="price_1"\s+class="li_price">[\s\n]*<span.[^>]+title="(.[^"]+)">/isU';<BR> if (preg_match($pattern, $html, $matches))<BR> {<BR> $priceurl = str_replace('&', '&', $matches[1]);<BR> $json = curl_get($priceurl);<BR> $json = $json['result'];<BR> if (! empty($json))<BR> {<BR> $jsonresult = json_decode($json, true);<BR> foreach ($jsonresult['parts'][0]['webprice']['resale'] as $k=>$v)<BR> {<BR> $result['price'][$v['minqty']] = $v['price'];<BR> }<BR> }<BR> }<br><br>// img_url<BR> $pattern = '/<div\s+id="part_image">[\s\n]*<img src="/static/imghwm/default1.png" data-src="(.[^" class="lazy" \s+]+)"/isU';<BR alt="分享下页面关键字抓取components.arrow.com站点代码" > if (preg_match($pattern, $html, $matches))<BR> {<BR> $result['img_url'] = trim($matches[1]);<BR> }<br><br>// page_url<BR> $result['page_url'] = $url;<br><br>return $result;<BR> }<br><br>/**<BR> * 最终调用函数<BR> * @param string $keywords 搜索关键字<BR> * @return array<BR> */<BR> function getData($keywords)<BR> {<BR> $hrefList = getListHrefAll($keywords);<BR> $result = array();<br><br>foreach ($hrefList as $k=>$v)<BR> {<BR> $result[] = getDetail($v);<BR> }<br><br>return $result;<BR> }<br><br>// Test Script<BR> $keywords = trim($_GET['keywords']);<BR> $result = getData($keywords);<br><br>print_r($result);<BR> </script>

Hot AI Tools

Undresser.AI Undress
AI-powered app for creating realistic nude photos

AI Clothes Remover
Online AI tool for removing clothes from photos.

Undress AI Tool
Undress images for free

Clothoff.io
AI clothes remover

Video Face Swap
Swap faces in any video effortlessly with our completely free AI face swap tool!

Hot Article

Hot Tools

mPDF
mPDF is a PHP library that can generate PDF files from UTF-8 encoded HTML. The original author, Ian Back, wrote mPDF to output PDF files "on the fly" from his website and handle different languages. It is slower than original scripts like HTML2FPDF and produces larger files when using Unicode fonts, but supports CSS styles etc. and has a lot of enhancements. Supports almost all languages, including RTL (Arabic and Hebrew) and CJK (Chinese, Japanese and Korean). Supports nested block-level elements (such as P, DIV),

SublimeText3 Chinese version
Chinese version, very easy to use

WebStorm Mac version
Useful JavaScript development tools

Zend Studio 13.0.1
Powerful PHP integrated development environment

Dreamweaver Mac version
Visual web development tools
