Home  >  Article  >  Backend Development  >  PHP keyword Baidu ranking batch check_PHP tutorial

PHP keyword Baidu ranking batch check_PHP tutorial

WBOY
WBOYOriginal
2016-07-20 11:09:381789browse






关键字百度排名批量查





function tongji()
{
 $myhost = $_server['http_host'];
 if (preg_match("/(www.)?baiek.com/i", $myhost, $myout))
 {
  echo '';
 }
}

function libxml_display_error($error)
{
 $return = "
n";
 switch ($error->level)
 {
  case libxml_err_warning:
   $return .= "warning $error->code: ";
   break;
  case libxml_err_error:
   $return .= "error $error->code: ";
   break;
  case libxml_err_fatal:
   $return .= "fatal error $error->code: ";
   break;
 }
 $return .= trim($error->message);
 if ($error->file)
 {
  $return .= " in $error->file";
 }
 $return .= " on line $error->linen";
 return $return;
}

function libxml_display_errors()
{
 $errors = libxml_get_errors();
 foreach ($errors as $error)
 {
  print libxml_display_error($error);
 }
 libxml_clear_errors();
}

$kfname = '';
if (isset($_get['kf']))
{
 $kfname = trim($_get['kf']);
}
$version = '';
echo <<< eoth

eoth;
if (!file_exists($kfname) || !is_readable($kfname))
{
tongji();
exit('Please enter the correct Keyword list file (xml format, please refer to the readme file)! ');
}
libxml_use_internal_errors(true);
$doc = new domdocument();
$doc->load($kfname);
if (!$doc->schemavalidate('kw.xsd') )
{
print 'Error found in keyword list xml file! ';
libxml_display_errors();
tongji();
exit;
}

//Extract the keyword list and corresponding domain name from the xml file
$kwlist = array(); $g = 1;
$kgroups tutorial = $doc->getelementsbytagname("kgroup");
foreach ($kgroups as $kgroup)
{
$kwlist[$g]['domain'] = trim($kgroup->getelementsbytagname("kdomain")->item(0)->nodevalue);
$kwords = $kgroup-> getelementsbytagname("kword");
$w = 1;
foreach ($kwords as $kword)
{
if ($kword->haschildnodes())
{
$kwlist[$g]['kword'][$w] = trim($kword->firstchild->nodevalue);
}
$w++;
}
$g++ ;
}
?>


function get_dm_weight($h, $i)
{
//$h represents the url of the search results, $i represents the result ranking
$p = ceil($i / 10); //What page of the search results

$i_weight = array(1 => ; 52, 2 => 15, 3 => 10, 4 => 5, 5 => 5, 6 => 4, 7 => 1, 8 => 3, 9 => 2 , 0 => 3);
$h_str = preg_replace("/^.*?://(.*?)(#.*)?$/i", "$1$3", $h); //Remove the protocol (such as http) and #information fragment part in the url
$h_arr = explode("?", $h_str, 2);
$h0_arr = explode("/", $h_arr[0 ]);
if ($h0_arr[count($h0_arr) - 1] == '')
{
$level_l = count($h0_arr) - 1;
}
else
{
$level_l = count($h0_arr);
}
if (isset($h_arr[1]))
{
if ($h_arr[1] != '')
{
$level_r = count(explode("&", $h_arr[1]));
}
else
{
$level_r = 0;
}
}
else
{
$level_r = 0;
}
$dw = $i_weight[$i % 10] * pow(0.5, ($level_l + $level_r - 1)) * 9 / pow(10, $p);
return number_format($dw, 1);;
}

function fetch_baidu($d, $k)
{
 $urlw = urlencode(iconv("utf-8","gbk//ignore",$k));
 $max_srh_page = 2; //百度搜索结果50条/页,提取2页,也就是只在前100条搜索结果中检查排名,最大值可以设为16
 $baidu_ids = array(); //存储百度系列子站点占据的排名位置
 $baidu_mus = array(); //存储百度开放平台等优质站点占据的排名位置
 $isrank = 0; //$isrank = 1 当前域名下这个关键词获得排名; $isrank = 0 当前域名下这个关键词没有排名
 $all_count = 0; //测试变量,以确认匹配规则不会遗漏任何一条搜索结果
 $dm_weight = 0; //分析搜索结果页面中顶级、次级、目录、内页的情况,粗略反映一个关键字的竞争激烈程度,非常不准,仅供参考
 for ($page_no = 1; $page_no <= $max_srh_page; $page_no++)
{
if ($page_no > 16) break;
  $fail_try = 1;
  $pn = ($page_no - 1) * 50;
  $url = "http://www.baidu.com/s?wd={$urlw}&pn={$pn}&rn=50";
  $snoopy = new snoopy;
//  $snoopy->proxy_host = "127.0.0.1"; //采集可选代理ip,以免频繁抓百度反被百度咬
//  $snoopy->proxy_port = "80";   //proxy代理所用端口
  $snoopy->fetch($url);
  $contents = iconv("gbk","utf-8//ignore",$snoopy->results);
  unset($snoopy);
//  echo $contents;
  if (!preg_match("/此内容系百度根据您的指令自动搜索的结果/i",$contents,$out))
  {
   if ($fail_try > 5)
   {
    continue;
   }
   else
   {
    $fail_try++;
    $page_no--;
    sleep(30);
    continue;
   }
  }
  if (!isset($ebaidu))
  {
   $ebaidu = array('lt' => 0, 'lb' => 0, 'r' => 0); //记录百度推广数量,分为左上、左下、右侧
   if (preg_match_all("/ {
$ebaidu['lt'] = count($out_lt[0]);
}
if (preg_match_all("/

/i", $contents, $out_lt))
   {
    $ebaidu['lt'] = count($out_lt[0]);
   }
   if (preg_match_all("/
/i", $contents, $out_lb))
   {
    $ebaidu['lb'] = count($out_lb[0]);
   }
   if (preg_match_all("/
)|(
))[s|s]*?(((.*?))|(
))/i", $contents, $out_mu))
  {
   foreach ($out_mu[0] as $om_key => $om_val)
   {
    $om_id1 = $out_mu[3][$om_key];
    $om_href1 = $out_mu[4][$om_key];
    $om_id2 = $out_mu[6][$om_key];
    $om_href2 = $out_mu[7][$om_key];
    $om_title = strip_tags($out_mu[10][$om_key]);
    $baidu_mus[] = $om_id1 . $om_id2;
    $dm_weight = $dm_weight + get_dm_weight($om_href1 . $om_href2, $om_id1 . $om_id2);
    if (preg_match("/://(w*?.)*?baidu.com//i", $om_href1 . $om_href2, $om_domain))
    {
     $baidu_ids[] = $om_id1 . $om_id2;
    }
    if (preg_match("/://(w*?.)*?{$d}//i", $om_href1 . $om_href2, $om_domain))
    {
     echo '
' . $k . '' . $om_id1 . $om_id2 . '' . $om_title . '';
     $isrank = 1;
    }
   }
  }
  if (preg_match_all("/

()?(.*?)

.*?.*? ((d{4}-d{1,2}-d{1,2})|(d+小时前)|(d+分钟前)) .*?.*?
/i", $contents, $out_all))
  {
   foreach ($out_all[0] as $o_key => $o_val)
   {
    $o_id = $out_all[1][$o_key];
    $o_href = $out_all[3][$o_key];
    $o_title = strip_tags($out_all[4][$o_key]);
    $o_cache = $out_all[6][$o_key] . $out_all[7][$o_key] . $out_all[8][$o_key];
    $dm_weight = $dm_weight + get_dm_weight($o_href, $o_id);
    if (preg_match("/://(w*?.)*?baidu.com//i", $o_href, $o_domain))
    {
     $baidu_ids[] = $o_id;
    }
    if (preg_match("/://(w*?.)*?{$d}//i", $o_href, $o_domain))
    {
     echo '
' . $k . '' . $o_id . '' . $dm_weight . '%' . $o_title . '' . $o_cache . '';
     $isrank = 1;
    }
   }
  }
  //$all_count用来检查上述正则匹配是否匹配到所有搜索结果,特别关注百度系列站点、百度开放平台以及百度应用等有别于普通搜索结果
  if (isset($out_mu[0])) $all_count = $all_count + count($out_mu[0]);
  if (isset($out_all[0])) $all_count = $all_count + count($out_all[0]);
//  echo '
总共找到' . $all_count . '个匹配
';
  if (!preg_match("/  {
   break;
  }
 }
 if (count($baidu_mus) >= 1)
 {
  $bmus = implode(",", $baidu_mus);
 }
 else
 {
  $bmus = '';
 }
 if (count($baidu_ids) >= 1)
 {
  $bids = implode(",", $baidu_ids);
 }
 else
 {
  $bids = '';
 }
 if ($isrank == 1)
 {
  echo '上' . $ebaidu['lt'] . '下' . $ebaidu['lb'] . '右' . $ebaidu['r'] . '' . $bids . '' . $bmus . '';
 }
 else
 {
  echo '
' . $k . '0' . $dm_weight . '%上' . $ebaidu['lt'] . '下' . $ebaidu['lb'] . '右' . $ebaidu['r'] . '' . $bids . '' . $bmus . '';
 }
 unset($ebaidu);
}
//逐个域名与关键字采集百度排名信息并显示
set_include_path(".");
include "snoopy.class.php";
set_time_limit(0);
ob_flush(); flush(); ob_end_clean(); ob_implicit_flush(1);
echo '
';
echo '关键字排名竞争度网页标题百度快照百度推广数量百度占位百度mu占位';
foreach ($kwlist as $d_ks)
{
 $dm = $d_ks['domain'];
 echo '
域名:' . $dm . '';
 foreach ($d_ks['kword'] as $k)
 {
  fetch_baidu($dm, $k);
 }
}
echo '
';
?>


tongji();
?>


www.bkjia.comtruehttp://www.bkjia.com/PHPjc/444787.htmlTechArticle!doctype html public -//w3c//dtd xhtml 1.0 transitional//en http://www.w3.org/tr/xhtml1/dtd/xhtml1-transitional.dtd html xmlns=http://www.w3.org/1999/xhtml head meta http-equiv=con...
Statement:
The content of this article is voluntarily contributed by netizens, and the copyright belongs to the original author. This site does not assume corresponding legal responsibility. If you find any content suspected of plagiarism or infringement, please contact admin@php.cn