Home >Backend Development >PHP Tutorial >Capture the 'Global IC Procurement Network' supplier program_PHP tutorial

Capture the 'Global IC Procurement Network' supplier program_PHP tutorial

WBOY
WBOYOriginal
2016-07-13 17:48:51933browse

/**
* Grab the "Global IC Procurement Network (http://www.qic.com.cn/)" supplier main program
* author Lee.
* Last modify $Date: 2012-2-7 09:35:21 $
*/ 
require_once './config.inc.php'; 
class qic{ 
    private $startId; 
    private $endId; 
     
    public function __construct() { 
        $this->startId = 27688; 
        $this->endId = 55185; 
    } 
     
    public function go() { 
        for ($i=$this->startId; $i<=$this->endId; $i++) { 
            $infoArr = $this->getInfoByUrl($this->getUrl($i)); 
            if (emptyempty($infoArr['company'])) continue; 
            $m = new Model(); 
            if ($m->isExists('qic', "company='{$infoArr['company']}'")) { 
                echo 'Data Exists!!'; continue; 
            } else { 
                if ($this->addInfoInDB($m, $infoArr)) { 
                    echo 'Add Success!!'; 
                } else { 
                    echo 'Add Faild!!'; 
                } 
            } 
        } 
    } 
     
    /**
*Add data into database
* @param Object $m
* @param array $infoArr
* @return Number
​​*/ 
    private function addInfoInDB($m ,$infoArr) { 
        $num = null; 
        $num = $m->insert('qic', array('company', 'person', 'phone', 'mobile', 'fax', 'qq', 'msn', 'email', 'address', 'website', 'shopUrl'), array($infoArr['company'], $infoArr['person'], $infoArr['phone'], $infoArr['mobile'], $infoArr['fax'], $infoArr['qq'], $infoArr['msn'], $infoArr['email'], $infoArr['address'], $infoArr['website'], $infoArr['shopUrl'])); 
        return $num; 
    } 
     
    /**
* Get information based on supplier address
* @param string $re
* @return ArrayObject
​​*/ 
    private function getInfoByUrl($url) { 
        $re = file_get_contents($url); 
        preg_match_all('/

(.*)
/Usi', $re, $companyArr); 
        preg_match_all('/
  • s*联 系 人:(.*)
  • /Usi', $re, $personArr); 
            preg_match_all('/
  • s*电 话:(.*)
  • /Usi', $re, $phoneArr); 
            preg_match_all('/
  • s*手 机:(.*)
  • /Usi', $re, $mobileArr); 
            preg_match_all('/
  • s*传 真:(.*)
  • /Usi', $re, $faxArr); 
            preg_match_all('/
  • s*QQ:(.*)
  • /Usi', $re, $qqArr); 
            preg_match_all('/
  • s*MSN:(.*)
  • /Usi', $re, $msnArr); 
            preg_match_all('/
  • s*邮 箱:(.*)
  • /Usi', $re, $emailArr); 
            preg_match_all('/公司地址:(.*)/Usi', $re, $addressArr); 
            preg_match_all('/公司网址:(.*)/Usi', $re, $websiteArr); 
            $infoArr = array( 
                'company'=>$this->formatString($companyArr[1][0]), 
                'person'=>$this->formatString($personArr[1][0]), 
                'phone'=>$this->formatString($phoneArr[1][0]), 
                'mobile'=>$this->formatString($mobileArr[1][0]), 
                'fax'=>$this->formatString($faxArr[1][0]), 
                'qq'=>$this->formatString($qqArr[1][0], 'qm'), 
                'msn'=>$this->formatString($msnArr[1][0], 'qm'), 
                'email'=>$this->formatString($emailArr[1][0]), 
                'address'=>$this->formatString($addressArr[1][0]), 
                'website'=>$this->formatString($websiteArr[1][0], 'a'), 
                'shopUrl'=>$url 
            ); 
            return $infoArr; 
        } 
         
        /**
    * Get page content
    * @param string $url
    * @return string
    ​​*/ 
        private function getContent($url) { 
            $re = file_get_contents($url); 
            return $re;  
        } 
         
        /**
    * Format string
    * @param string $str
    * @param string $type type
    * @return string
    ​​*/ 
        private function formatString($str, $type='default') { 
            $str = trim($str); 
            switch ($type) { 
                case 'default': 
                    return emptyempty($str) ? '' : $str;  
                    break; 
                case 'qm': # 处理QQ 
                    if (emptyempty($str)) return ''; 
                    preg_match_all('/'(.+)'/si', $str, $arr); 
                    return trim($arr[1][0]); 
                    break; 
                case 'a': 
                    $str = preg_replace('/(.+)/', '1', $str); 
                    return $str; 
                    break; 
                default: 
                    return ''; 
                    break; 
            } 
        } 
         
        /**
    * Get the supplier page address www.2cto.com
    * @param int $shopId
    * @return string
    ​​*/ 
        private function getUrl($shopId) { 
            return "http://www.qic.com.cn/specialstore/tsh_{$shopId}.html";  
        } 
         

     
    /*
     * 表结构
     CREATE TABLE `qic` (
        `id` mediumint(8) unsigned NOT NULL auto_increment,
        `company` varchar(500) NOT NULL,
        `person` varchar(500) NOT NULL,
        `phone` varchar(300) NOT NULL,
        `mobile` varchar(300) NOT NULL,
        `fax` varchar(300) NOT NULL,
        `qq` varchar(300) NOT NULL,
        `msn` varchar(500) NOT NULL,
        `email` varchar(500) NOT NULL,
        `address` varchar(500) NOT NULL,
        `website` varchar(500) NOT NULL,
        `shopUrl` varchar(200) NOT NULL,
        PRIMARY KEY  (`id`)
    ) ENGINE=InnoDB DEFAULT CHARSET=utf8
     * */ 
     
    $q = new qic(); 
    $q->go(); 
    ?> 

    摘自 Lee.的专栏

    www.bkjia.comtruehttp://www.bkjia.com/PHPjc/478402.htmlTechArticle?php /*** Capture the global IC procurement network (http://www.qic.com.cn/) supplier main program * author Lee. * Last modify $Date: 2012-2-7 09:35:21 $*/ require_once ./config.inc.php; class qic...
    Statement:
    The content of this article is voluntarily contributed by netizens, and the copyright belongs to the original author. This site does not assume corresponding legal responsibility. If you find any content suspected of plagiarism or infringement, please contact admin@php.cn