Home >Backend Development >PHP Tutorial >Capture the supplier program of 'Weiku Electronic Market'_PHP Tutorial

Capture the supplier program of 'Weiku Electronic Market'_PHP Tutorial

WBOY
WBOYOriginal
2016-07-13 17:48:55905browse

/**
* Capture the supplier main program of "Weiku Electronic Market"
* author Lee.
* Last modify $Date: 2012-2-3 9:30:21 $
* Note: This program is executed according to the encoding GB2312, because the "Weiku Electronic Market" website is encoded in GB2312, and the database must also be consistent
*/ 
class weiku { 
    private $key; // 型号  
    private $pageNum; // 页码  
 
    /**
* Entry program
​​*/ 
    public function go($key) { 
        $this->key = $key; 
        $this->pageNum = $this->getPageNum(); 
        $this->getInfo(); 
    } 
 
    /**
* Get the supplier url link array
* @return ArrayObject
​​*/ 
    private function getInfo() { 
        if ($this->pageNum==1) { # 处理只有一页的情况 
            $arr = $this->shopAddContact($this->shopUrlMatchReArr($this->getContent())); 
            $this->isAddSuccess($arr); 
        } elseif ($this->pageNum>1) { # 多页 
            for ($i=1; $i<=$this->pageNum; $i++) { 
                $arr = $this->shopAddContact($this->shopUrlMatchReArr($this->getContent($i))); 
                $this->isAddSuccess($arr); 
            } 
        }    
    } 
     
    /**
* Print whether the addition was successful
* @param ArrayObject $arr
* @return string
​​*/ 
    private function isAddSuccess($arr) { 
        foreach ($arr as $k=>$v) { 
            if ($this->execAdd($this->getInfoByShopUrl($v))) { 
                echo 'Add Success!!'; 
            } else { 
                echo 'Add Faild!!'; 
            } 
        } 
    } 
 
    /**
* Execute adding to database
* @param ArrayObject $infoArr
* @return Number The number of affected rows
​​*/ 
    private function execAdd($infoArr) { 
        $mysqli = $this->getDb(); 
        if (!emptyempty($infoArr['company'])) { 
            if (!$this->isExists($mysqli, $infoArr)) { 
                $num = $mysqli->query("INSERT INTO weiku(company,person,phone,fax,mobile,qq,msn,email,address,shopUrl) VALUES ('{$infoArr['company']}','{$infoArr['person']}','{$infoArr['phone']}','{$infoArr['fax']}','{$infoArr['mobile']}','{$infoArr['qq']}','{$infoArr['msn']}','{$infoArr['email']}','{$infoArr['address']}','{$infoArr['shopUrl']}')"); 
                return $num; 
            } else { 
                return false; # 表示数据已经存在 
            } 
        } else { 
            return false; 
        } 
    } 
 
    /**
* Connect to database
​​*/ 
    private function getDb() { 
        $mysqli = new mysqli('localhost', 'root', '1715544', 'weiku'); 
        $mysqli->query('SET NAMES GB2312'); 
        return $mysqli; 
    } 
 
    /**
* Check if the company already exists
* @param Resource $mysqli
* @param ArrayObject $infoArr
* @return bool
​​*/ 
    private function isExists($mysqli, $infoArr) { 
        $mysqli->query("SELECT company FROM weiku WHERE company = '{$infoArr['company']}'"); 
        if ($mysqli->affected_rows) { 
            return true; 
        } else { 
            return false; 
        } 
    } 
 
    /**
* Grab information
* @param $url
* @return ArrayObject
​​*/ 
    private function getInfoByShopUrl($url) { 
        $re = $this->getUrlInfo($url); 
        preg_match_all('/公司名称:(.*)/Usi', $re, $companyArr); 
        preg_match_all('/联系人:(.*)/Usi', $re, $personArr); 
        preg_match_all('/电话:(.*)/Usi', $re, $phoneArr); 
        preg_match_all('/传真:(.*)/Usi', $re, $faxArr); 
        preg_match_all('/手机:(.*)/Usi', $re, $mobileArr); 
        preg_match_all('/QQ:(.*)/Usi', $re, $qqArr); 
        preg_match_all('/MSN:(.*)/Usi', $re, $msnArr); 
        preg_match_all('/E-Mail:(.*)/Usi', $re, $emailArr); 
        preg_match_all('/公司地址:(.*)/Usi', $re, $addressArr); 
        $infoArr = array( 
            'company'=>$this->stripATags($companyArr[1][0]), 
            'person'=>trim($personArr[1][0]), 
            'phone'=>trim($phoneArr[1][0]), 
            'fax'=>trim($faxArr[1][0]), 
            'mobile'=>trim($mobileArr[1][0]), 
            'qq'=>$this->formatQqMsn($qqArr[1][0]), 
            'msn'=>$this->formatQqMsn($msnArr[1][0], 'MSN'), 
            'email'=>$this->stripATags($emailArr[1][0]), 
            'address'=>trim($addressArr[1][0]), 
            'shopUrl'=>$url 
        ); 
        return $infoArr; 
    } 
 
    /**
* Get the supplier url array based on the page
* @param string $re
* @return ArrayObject
​​*/ 
    private function shopUrlMatchReArr($re) { 
        $re = preg_replace('//', '', $re); 
        $re = preg_replace('//', '', $re); 
        $re = preg_replace('/[A-Z]/', '', $re); 
        $re = preg_replace('/[0-9]/', '', $re); 
        $re = preg_replace('/.*/', '', $re); 
        $re = preg_replace('/.*/', '', $re); 
        $re = preg_replace('/营业执照/', '', $re); 
        $re = preg_replace('/该企业更多资质>>/', '', $re); 
        $re = preg_replace('/点此反馈/', '', $re); 
        $re = preg_replace('/首页/', '', $re); 
        $re = preg_replace('/IC/', '', $re); 
        $re = preg_replace('/简洁/', '', $re); 
        $re = preg_replace('/信用/', '', $re); 
        $re = preg_replace('/.*更多报价信息>>/', '', $re); 
        $re = preg_replace('/.*/', '', $re); 
        $re = preg_replace('/

.*/', '', $re); 
        preg_match_all('/.+/Usi', $re, $arr); 
        $arr = $this->formatUrlArr(array_unique($arr[1])); 
        return $arr; 
    } 
     
    /**
* * Format array
* @param Array $arr
* @return ArrayObject
​​*/ 
    private function formatUrlArr($arr) { 
        $newArr = array(); 
        foreach ($arr as $key=>$value) { 
            if ($this->isExistsHttp($value)) { 
                $newArr[$key] = $value; 
            } 
        } 
        return $newArr; 
    } 
     
    /**
* Format QQ
* @param string $str
* @return string
​​*/ 
    private function formatQqMsn($str, $e='QQ') { 
        if (emptyempty($str)) return ''; 
        preg_match_all('/alt="'.$e.':(.+)"/Usi', $str, $arr); 
        if (count($arr[1])==1) return $arr[1][0]; 
        $newStr = null; 
        foreach ($arr[1] as $value) { 
            $newStr .= $value . ' '; 
        } 
        return rtrim($newStr, ' '); 
    } 
 
    /**
* Added supplier store link contact.html
* @param array $arr
* @return string
​​*/ 
    private function shopAddContact($arr) { 
        foreach ($arr as $k=>$v) { 
            $arr[$k] = $v . '/contact.html'; 
        } 
        return $arr; 
    } 
 
    /**
* Remove the A tag from the URL
* @param string $site
* @return string
​​*/ 
    private function stripATags($site) { 
        $site = preg_replace('/(.+)/', '1', $site); 
        return $site; 
    } 
 
    /**
* Check if the url has http
* @param string $url
* @return bool
​​*/ 
    private function isExistsHttp($url) { 
        if (stristr($url, 'http://')) { 
            return true; 
        } else { 
            return false; 
        } 
    } 
     
    /**
* Get page content
* @param Number $page
* @return string
​​*/ 
    private function getContent($page=1) { 
        $re = file_get_contents($this->getUrl($this->key, $page)); 
        return $re; 
    } 
     
    /**
* Get page number
* @return Number
​​*/ 
    private function getPageNum() { 
        $i = 1; 
        while (true) { 
            $re = $this->getContent($i); 
            # 处理单页避免处理死循环 
            if (!strstr($re, '下一页')) { 
                break; 
                                                                                # Multiple pages, calculate the page number
If (stristr($re, 'next page')) break;
$i++;
                                                                                                                                               } 
         return $i;
}  

/**
* Get URL link
* @param string $str
* @param int $page page number
* @return string
​​*/
Private function getUrl($str, $page=1) {
Return "http://www.dzsc.com/ic/sell_search.html?keyword={$str}&ic_sel=supplygoods&Submit=%26%23160%3B&page={$page}"; }

/**
* Get page content
* @param string $url
* @return string
​​*/
Private function getUrlInfo($url) {
          $re = file_get_contents($url);
         return $re;
}  
}

/*
Program running idea: According to the IC search function of "Huaqiang Electronic Network", enter the model number to search, and then grab the supplier information

Database structure
CREATE TABLE `weiku` (
`id` mediumint(8) unsigned NOT NULL auto_increment COMMENT 'ID',
`company` varchar(300) default NULL COMMENT 'company name',
`person` varchar(200) default NULL COMMENT 'Contact',
`phone` varchar(300) default NULL COMMENT 'phone',
`fax` varchar(300) default NULL COMMENT 'Fax',
`mobile` varchar(300) default NULL COMMENT 'mobile phone',
`qq` varchar(200) default NULL COMMENT 'QQ',
`msn` varchar(200) default NULL COMMENT 'MSN',
`email` varchar(300) default NULL COMMENT 'email',
`address` varchar(500) default NULL COMMENT 'Company address',
`shopUrl` varchar(200) default NULL COMMENT 'Weiku.com store address',
PRIMARY KEY (`id`)
) ENGINE=InnoDB DEFAULT CHARSET=gb2312
*/

$k = new weiku();
$arr = array_unique(array('MAX3232', 'AML8613', 'MT6225A', 'OM8373PS/N3/A', 'PT7313', 'MAX8212ESA', 'TL431', 'S3C2440', 'TMS320F2812PGFA', 'PCM1704' 'AN6717', 'CA3162E', 'CA3161E', 'LM393N', 'DS18B20', 'SHT10', 'AML8613', 'AN6717', 'LM393N', 'CA3161E', 'CA3162E', 'PCM1704', ' STK392-040', 'K1667', 'MAX232', 'STM32F103', 'LM358'));
foreach ($arr as $v) {
$k->go($v);
}
?>
/**
* Capture the supplier main program of "Weiku Electronic Market"
* author Lee.
* Last modify $Date: 2012-2-3 9:30:21 $
* Note: This program is executed according to the encoding GB2312, because the "Weiku Electronic Market" website is encoded in GB2312, and the database must also be consistent
*/
class weiku {
private $key; //Model
private $pageNum; // Page number

/**

* Entry program

​*/
public function go($key) {
$this->key = $key;
$this->pageNum = $this->getPageNum();
$this->getInfo();
}

 /**
* Get supplier url link array
* @return ArrayObject
​*/
 private function getInfo() {
  if ($this->pageNum==1) { # 处理只有一页的情况
   $arr = $this->shopAddContact($this->shopUrlMatchReArr($this->getContent()));
   $this->isAddSuccess($arr);
  } elseif ($this->pageNum>1) { # 多页
   for ($i=1; $i<=$this->pageNum; $i++) {
    $arr = $this->shopAddContact($this->shopUrlMatchReArr($this->getContent($i)));
    $this->isAddSuccess($arr);
   }
  } 
 }
 
 /**
* Print whether the addition was successful
* @param ArrayObject $arr
* @return string
​*/
 private function isAddSuccess($arr) {
  foreach ($arr as $k=>$v) {
   if ($this->execAdd($this->getInfoByShopUrl($v))) {
    echo 'Add Success!!';
   } else {
    echo 'Add Faild!!';
   }
  }
 }

 /**
* Execute adding to database
* @param ArrayObject $infoArr
* @return Number Number of affected rows
​*/
 private function execAdd($infoArr) {
  $mysqli = $this->getDb();
  if (!empty($infoArr['company'])) {
   if (!$this->isExists($mysqli, $infoArr)) {
    $num = $mysqli->query("INSERT INTO weiku(company,person,phone,fax,mobile,qq,msn,email,address,shopUrl) VALUES ('{$infoArr['company']}','{$infoArr['person']}','{$infoArr['phone']}','{$infoArr['fax']}','{$infoArr['mobile']}','{$infoArr['qq']}','{$infoArr['msn']}','{$infoArr['email']}','{$infoArr['address']}','{$infoArr['shopUrl']}')");
    return $num;
   } else {
    return false; # 表示数据已经存在
   }
  } else {
   return false;
  }
 }

 /**
* Connect to database
​*/
 private function getDb() {
  $mysqli = new mysqli('localhost', 'root', '1715544', 'weiku');
  $mysqli->query('SET NAMES GB2312');
  return $mysqli;
 }

 /**
* Check if the company already exists
* @param Resource $mysqli
* @param ArrayObject $infoArr
* @return bool
​*/
 private function isExists($mysqli, $infoArr) {
  $mysqli->query("SELECT company FROM weiku WHERE company = '{$infoArr['company']}'");
  if ($mysqli->affected_rows) {
   return true;
  } else {
   return false;
  }
 }

 /**
* Grab information
* @param $url
* @return ArrayObject
​*/
 private function getInfoByShopUrl($url) {
  $re = $this->getUrlInfo($url);
  preg_match_all('/公司名称:(.*)/Usi', $re, $companyArr);
  preg_match_all('/联系人:(.*)/Usi', $re, $personArr);
  preg_match_all('/电话:(.*)/Usi', $re, $phoneArr);
  preg_match_all('/传真:(.*)/Usi', $re, $faxArr);
  preg_match_all('/手机:(.*)/Usi', $re, $mobileArr);
  preg_match_all('/QQ:(.*)/Usi', $re, $qqArr);
  preg_match_all('/MSN:(.*)/Usi', $re, $msnArr);
  preg_match_all('/E-Mail:(.*)/Usi', $re, $emailArr);
  preg_match_all('/公司地址:(.*)/Usi', $re, $addressArr);
  $infoArr = array(
   'company'=>$this->stripATags($companyArr[1][0]),
   'person'=>trim($personArr[1][0]),
   'phone'=>trim($phoneArr[1][0]),
   'fax'=>trim($faxArr[1][0]),
   'mobile'=>trim($mobileArr[1][0]),
   'qq'=>$this->formatQqMsn($qqArr[1][0]),
   'msn'=>$this->formatQqMsn($msnArr[1][0], 'MSN'),
   'email'=>$this->stripATags($emailArr[1][0]),
   'address'=>trim($addressArr[1][0]),
   'shopUrl'=>$url
  );
  return $infoArr;
 }

 /**
* Get the supplier url array based on the page
* @param string $re
* @return ArrayObject
​*/
 private function shopUrlMatchReArr($re) {
  $re = preg_replace('//', '', $re);
  $re = preg_replace('//', '', $re);
  $re = preg_replace('/[A-Z]/', '', $re);
  $re = preg_replace('/[0-9]/', '', $re);
  $re = preg_replace('/.*/', '', $re);
  $re = preg_replace('/.*/', '', $re);
  $re = preg_replace('/营业执照/', '', $re);
  $re = preg_replace('/该企业更多资质>>/', '', $re);
  $re = preg_replace('/点此反馈/', '', $re);
  $re = preg_replace('/首页/', '', $re);
  $re = preg_replace('/IC/', '', $re);
  $re = preg_replace('/简洁/', '', $re);
  $re = preg_replace('/信用/', '', $re);
  $re = preg_replace('/.*更多报价信息>>/', '', $re);
  $re = preg_replace('/.*/', '', $re);
  $re = preg_replace('/

.*/', '', $re);
  preg_match_all('/.+/Usi', $re, $arr);
  $arr = $this->formatUrlArr(array_unique($arr[1]));
  return $arr;
 }
 
 /**
* Format array
* @param Array $arr
* @return ArrayObject
​*/
 private function formatUrlArr($arr) {
  $newArr = array();
  foreach ($arr as $key=>$value) {
   if ($this->isExistsHttp($value)) {
    $newArr[$key] = $value;
   }
  }
  return $newArr;
 }
 
 /**
* Format QQ
* @param string $str
* @return string
​*/
 private function formatQqMsn($str, $e='QQ') {
  if (empty($str)) return '';
  preg_match_all('/alt="'.$e.':(.+)"/Usi', $str, $arr);
  if (count($arr[1])==1) return $arr[1][0];
  $newStr = null;
  foreach ($arr[1] as $value) {
   $newStr .= $value . ' ';
  }
  return rtrim($newStr, ' ');
 }

 /**
*Add supplier store link contact.html
* @param array $arr
* @return string
​*/
 private function shopAddContact($arr) {
  foreach ($arr as $k=>$v) {
   $arr[$k] = $v . '/contact.html';
  }
  return $arr;
 }

 /**
* Remove the A tag from the URL
* @param string $site
* @return string
​*/
 private function stripATags($site) {
  $site = preg_replace('/(.+)/', '1', $site);
  return $site;
 }

/**
* Check if the url has http
* @param string $url
* @return bool
​*/
private function isExistsHttp($url) {
if (stristr($url, 'http://')) {
Return true;
} else {
Return false;
}
}

/**
* Get page content
* @param Number $page
* @return string
​*/
private function getContent($page=1) {
$re = file_get_contents($this->getUrl($this->key, $page));
return $re;
}

/**
* Get page number
* @return Number
​*/
private function getPageNum() {
$i = 1;
while (true) {
$re = $this->getContent($i);
# Processing single pages to avoid endless loops
if (!strstr($re, 'next page')) {
Break;
} else {
# Multiple pages, calculate the page number
If (stristr($re, 'next page')) break;
$i++;
}
}
Return $i;
}

/**
* Get URL link
* @param string $str
* @param int $page page number
* @return string
​*/
private function getUrl($str, $page=1) {
retrn "http://www.dzsc.com/ic/sell_search.html?keyword={$str}&ic_sel=supplygoods&Submit=%26%23160%3B&page={$page}"u;


}

/**
* Get page content
* @param string $url
* @return string
​*/
private function getUrlInfo($url) {
$re = file_get_contents($url);
return $re;
}
}

/*
Program running idea: According to the IC search function of "Huaqiang Electronic Network", enter the model number to search, and then grab the supplier information

Database structure
CREATE TABLE `weiku` (
`id` mediumint(8) unsigned NOT NULL auto_increment COMMENT 'ID',
`company` varchar(300) default NULL COMMENT 'company name',
`person` varchar(200) default NULL COMMENT 'Contact',
`phone` varchar(300) default NULL COMMENT 'phone',
`fax` varchar(300) default NULL COMMENT 'Fax',
`mobile` varchar(300) default NULL COMMENT 'mobile phone',
`qq` varchar(200) default NULL COMMENT 'QQ',
`msn` varchar(200) default NULL COMMENT 'MSN',
`email` varchar(300) default NULL COMMENT 'email',
`address` varchar(500) default NULL COMMENT 'Company address',
`shopUrl` varchar(200) default NULL COMMENT 'Weiku.com store address',
PRIMARY KEY (`id`)
) ENGINE=InnoDB DEFAULT CHARSET=gb2312
*/

$k = new weiku();
$arr = array_unique(array('MAX3232', 'AML8613', 'MT6225A', 'OM8373PS/N3/A', 'PT7313', 'MAX8212ESA', 'TL431', 'S3C2440', 'TMS320F2812PGFA', 'PCM1704' 'AN6717', 'CA3162E', 'CA3161E', 'LM393N', 'DS18B20', 'SHT10', 'AML8613', 'AN6717', 'LM393N', 'CA3161E', 'CA3162E', 'PCM1704', ' STK392-040', 'K1667', 'MAX232', 'STM32F103', 'LM358'));
foreach ($arr as $v) {
$k->go($v);
}
?>


Excerpted from Lee.’s column

www.bkjia.comtruehttp: //www.bkjia.com/PHPjc/478400.htmlTechArticle?php /** * Grab the main program of suppliers in the Weiku electronic market* author Lee. * Last modify $ Date: 2012-2-3 9:30:21 $ * Note: This program is executed according to the code GB2312, because the Weiku electronic market website...
Statement:
The content of this article is voluntarily contributed by netizens, and the copyright belongs to the original author. This site does not assume corresponding legal responsibility. If you find any content suspected of plagiarism or infringement, please contact admin@php.cn