Home  >  Article  >  Backend Development  >  Share 蟧.gif" alt="Script Home" />Home Page Web Production Script Column Network Programming Database Script Download CMS Tutorial Electronic Books Graphic Design Media_PHP Tutorial

Share 蟧.gif" alt="Script Home" />Home Page Web Production Script Column Network Programming Database Script Download CMS Tutorial Electronic Books Graphic Design Media_PHP Tutorial

WBOY
WBOYOriginal
2016-07-13 10:39:481124browse

复制代码 代码如下:

/**
 * HOST: www.icbase.com
 */
//set_time_limit(0);
// base function
function curl_get($url, $data = array(), $header = array(), $timeout = 15, $port = 80, $reffer = '', $proxy = '')
{
 $ch = curl_init();
 if (!empty($data)) {
 $data = is_array($data)?http_build_query($data): $data;
 $url .= (strpos($url,'?')? '&': "?") . $data;
 }
 curl_setopt($ch, CURLOPT_URL, $url);
 curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
 curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, $timeout);
 curl_setopt($ch, CURLOPT_POST, 0);
 curl_setopt($ch, CURLOPT_PORT, $port);
 curl_setopt($ch, CURLOPT_HTTPHEADER, $header);
 curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1); //是否抓取跳转后的页面
 $reffer && curl_setopt($ch, CURLOPT_REFERER, $reffer);
 if($proxy) {
 curl_setopt($ch, CURLOPT_PROXY, $proxy);
 curl_setopt($ch, CURLOPT_PROXYPORT, 1723);
 curl_setopt($ch, CURLOPT_PROXYUSERPWD,"andhm001:andhm123");
 }

$result = array();
 $result['result'] = curl_exec($ch);
 if (0 != curl_errno($ch)) {
 $result['error'] = "Error:n" . curl_error($ch);

}
 curl_close($ch);
 return $result;
}

复制代码 代码如下:

function curl_post($url, $data = array(), $header = array(), $timeout = 5, $port = 80)
{
 $ch = curl_init();
 curl_setopt($ch, CURLOPT_URL, $url);
 curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
 curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, $timeout);
 //curl_setopt($ch, CURLOPT_PORT, $port);
 !empty ($header) && curl_setopt($ch, CURLOPT_HTTPHEADER, $header);
 curl_setopt($ch, CURLOPT_POST, 1);
 curl_setopt($ch, CURLOPT_POSTFIELDS, $data);

$result = array();
 $result['result'] = curl_exec($ch);
 if (0 != curl_errno($ch)) {
 $result['error'] = "Error:n" . curl_error($ch);

}
 curl_close($ch);

return $result;
}

/**
* Get the html source code of the list page
* @param string $keywords Search keywords
* @param int $page page number
* @return boolean|array
*/
function getListHtml($keywords, $page=1)
{
 if ($page < 0)
 {
 return false;
 }
 $page = $page == 0 ? 1 : intval($page);
 if ($page == 1)
 {
 $result = curl_get('http://www.icbase.com/ProResult.aspx', array('ProKey' => $keywords));
 if ( isset($result['error']) )
 {
 return false;
 //exit($result['error']);
 }
 $result = $result['result'];

 // asp.net post提交数据
 if(! defined('__VIEWSTATE') && preg_match('/ {
 define('__VIEWSTATE', $matches[1]);
 } else {
 return false;
 }

 if(! defined('__PREVIOUSPAGE') && preg_match('/ {
 define('__PREVIOUSPAGE', $matches[1]);
 } else {
 return false;
 }

 if(! defined('__EVENTVALIDATION') && preg_match('/ {
 define('__EVENTVALIDATION', $matches[1]);
 } else {
 return false;
 }

 return $result;
 }
 $data = array(
 '__EVENTTARGET' => 'pager',
 '__EVENTARGUMENT' => $page,
 '__VIEWSTATE' => __VIEWSTATE,
 '__PREVIOUSPAGE' => __PREVIOUSPAGE,
 '__EVENTVALIDATION' => __EVENTVALIDATION,
 );
 $result = curl_post('http://www.icbase.com/ProResult.aspx?ProKey=' . $keywords, $data);
 if ( isset($result['error']) )
 {
 return false;
 //exit($result['error']);
 }
 $result = $result['result'];
 return $result;
}

/**
* Get the url of the list page a link
* @param string $html html source code
* @return array
*/
function getListHref($html)
{
 $pattern = '/[sn]*]/>/isU';
 if (preg_match_all($pattern, $html, $matches))
 {
 return $matches[1];
 } else {
 // 没有匹配项
 return array();
 }
}

/**
* Get the next page number
* @param string $html html source code
* @return number
*/
function getListNextPage($html)
{
 $pattern = '/]>.+>/isU';
 if (preg_match($pattern, $html, $matches))
 {
 return intval($matches[1]);
 } else {
 return -1;
 }
}

/**
* Get all hrefs in the list
* @param string $keywords Search keywords
* @return boolean|array
*/
function getListHrefAll($keywords)
{
 if (empty($keywords))
 {
 return false;
 }

 $html = getListHtml($keywords);
 $hrefList = getListHref($html);
 if (empty($hrefList))
 {
 // 没有结果
 return array();
 }
 $nextPage = getListNextPage($html);
 while ($nextPage > 0)
 {
 $html = getListHtml($keywords, $nextPage);
 $tmpHrefList = getListHref($html);
 $hrefList = array_merge($hrefList, $tmpHrefList);
 $nextPage = getListNextPage($html);
 }
 return $hrefList;
}

/**
* Get details page information
* @param string $url The url address or the captured html source code is distinguished according to @see $is_url
* @param int $is_url 1 uses url address 0 Directly process html source code
* @return boolean|multitype:|multitype:string
*/
function getDetail($url, $is_url = 1)
{
if ( empty($url) )
{
return false;
}
$host = 'www.icbase.com';
$html = $url;
if ($is_url) {
$url = '/' .ltrim ($url, '/');
$result = curl_get($host . $url);
if ( isset($result['error']) )
{
exit($ result['error']);
}
$html = $result['result'];
}

$result = array(
'sup_part' => '', // Supplier model
'sup_id' => '', // Supplier ID
'mfg_part' => '', // Manufacturer model
'mfg_name' => ; '', //Manufacturer name
'cat_name' => '', // Category name
'para' => '', // Attribute
'desc' => ' ', // Description
'pdf_url' => '', // PDF address
'sup_stock' => '', // Stock
'min_purch' => '', // Minimum order quantity
'price' => '', // Price
'img_url' => '', // Image address
'createtime' => '', // Creation time
'datacode' => '', // Batch number
'package' => '', // Package
'page_url' => '', // Page address
) ;

// mfg_part
$pattern = '/Product Model(.[^<]+) if (preg_match($pattern, $html, $matches))
{
$result['mfg_part'] = trim($matches[1]);
} else {
// This item does not exist, and the instructions are nowhere to be found
return array();
}

// mfg_name
$pattern = '/Manufacturer[ sn]*(.+)/isU';
if (preg_match($pattern, $html, $matches))
{
$result['mfg_name' ] = trim($matches[1]);
}

// para
$pattern = '/(.+)/isU';
if (preg_match($pattern, $html, $matches))
{
if (preg_match_all('/(.+)/isU', $matches[1], $matches))
{
$count = count($matches[1 ]);
$count = intval($count / 2 );
foreach ($matches[1] as $k=>$v)
{
if ($k >= $count)
{
break;
}
if (trim($v) == 'Description')
{
// desc
$result['desc '] = trim($matches[1][$count + $k]);
continue;
}
$v = trim($v);
$result['para'] [$v] = trim($matches[1][$count + $k]);
}
}
}

// pdf_url
$pattern = '/ Details if (preg_match($pattern, $html, $matches ))
{
$result['pdf_url'] = trim($matches[1]);
}

// sup_stock
$pattern = '/[sn]*(d+)/isU';
if (preg_match($pattern, $html, $matches))
{
$result['sup_stock'] = trim($matches[1]);
}

// price
$pattern = '/]+>(d+)+]+>.[^d]*([d.]+) /isU';
if (preg_match_all($pattern, $html, $matches))
{
foreach ($matches[1] as $k=>$v)
{
$result['price'][$v] = '¥' . $matches[2][$k];
}
}

//img_url
$pattern = '/Pictures if (preg_match($pattern, $html, $ matches))
{
$result['img_url'] = trim($matches[1]);
}

// page_url
if ($is_url)
{
$result['page_url'] = $host . $url;
}

return $result;
}

/**
* Final call function
* @param string $keywords Search keywords
* @return array
* /
function getData($keywords)
{
$hrefList = getListHrefAll($keywords);
$result = array();

foreach ($hrefList as $k= >$v)
{
$result[] = getDetail($v);
}

return $result;
}

// Test Script
$keywords = trim($_GET['keywords']);
$result = getData($keywords);

print_r($result);

www.bkjia.comtruehttp://www.bkjia.com/PHPjc/728095.htmlTechArticle复制代码 代码如下: ?php /*** HOST: www.icbase.com*/ //set_time_limit(0); // base function function curl_get($url, $data = array(), $header = array(), $timeout = 15, $port...
Statement:
The content of this article is voluntarily contributed by netizens, and the copyright belongs to the original author. This site does not assume corresponding legal responsibility. If you find any content suspected of plagiarism or infringement, please contact admin@php.cn