Heim  >  Artikel  >  Backend-Entwicklung  >  php 关键字百度排名批量查_PHP教程

php 关键字百度排名批量查_PHP教程

WBOY
WBOYOriginal
2016-07-20 11:09:381784Durchsuche






关键字百度排名批量查





function tongji()
{
 $myhost = $_server['http_host'];
 if (preg_match("/(www.)?baiek.com/i", $myhost, $myout))
 {
  echo '';
 }
}

function libxml_display_error($error)
{
 $return = "
n";
 switch ($error->level)
 {
  case libxml_err_warning:
   $return .= "warning $error->code: ";
   break;
  case libxml_err_error:
   $return .= "error $error->code: ";
   break;
  case libxml_err_fatal:
   $return .= "fatal error $error->code: ";
   break;
 }
 $return .= trim($error->message);
 if ($error->file)
 {
  $return .= " in $error->file";
 }
 $return .= " on line $error->linen";
 return $return;
}

function libxml_display_errors()
{
 $errors = libxml_get_errors();
 foreach ($errors as $error)
 {
  print libxml_display_error($error);
 }
 libxml_clear_errors();
}

$kfname = '';
if (isset($_get['kf']))
{
 $kfname = trim($_get['kf']);
}
$version = '';
echo

eoth;
if (!file_exists($kfname) || !is_readable($kfname))
{
 tongji();
 exit('请输入正确的关键字清单文件(xml格式,请参考readme文件)!!');
}
libxml_use_internal_errors(true);
$doc = new domdocument();
$doc->load($kfname);
if (!$doc->schemavalidate('kw.xsd'))
{
 print '关键字列表xml文件发现错误!';
 libxml_display_errors();
 tongji();
 exit;
}

//由xml文件提取关键字列表与相应的域名
$kwlist = array(); $g = 1;
$kgroups教程 = $doc->getelementsbytagname("kgroup");
foreach ($kgroups as $kgroup)
{
 $kwlist[$g]['domain'] = trim($kgroup->getelementsbytagname("kdomain")->item(0)->nodevalue);
 $kwords = $kgroup->getelementsbytagname("kword");
 $w = 1;
 foreach ($kwords as $kword)
 {
  if ($kword->haschildnodes())
  {
   $kwlist[$g]['kword'][$w] = trim($kword->firstchild->nodevalue);
  }
  $w++;
 }
 $g++;
}
?>


function get_dm_weight($h, $i)
{
 //$h表示搜索结果的url,$i表示结果排名
 $p = ceil($i / 10);  //搜索结果第几页
 
 $i_weight = array(1 => 52, 2 => 15, 3 => 10, 4 => 5, 5 => 5, 6 => 4, 7 => 1, 8 => 3, 9 => 2, 0 => 3);
 $h_str = preg_replace("/^.*?://(.*?)(#.*)?$/i", "$1$3", $h); //去除url中的协议(例如http)与#信息片段部分
 $h_arr = explode("?", $h_str, 2);
 $h0_arr = explode("/", $h_arr[0]);
 if ($h0_arr[count($h0_arr) - 1] == '')
 {
  $level_l = count($h0_arr) - 1;
 }
 else
 {
  $level_l = count($h0_arr);
 }
 if (isset($h_arr[1]))
 {
  if ($h_arr[1] != '')
  {
   $level_r = count(explode("&", $h_arr[1]));
  }
  else
  {
   $level_r = 0;
  }
 }
 else
 {
  $level_r = 0;
 }
 $dw = $i_weight[$i % 10] * pow(0.5, ($level_l + $level_r - 1)) * 9 / pow(10, $p);
 return number_format($dw, 1);;
}

function fetch_baidu($d, $k)
{
 $urlw = urlencode(iconv("utf-8","gbk//ignore",$k));
 $max_srh_page = 2; //百度搜索结果50条/页,提取2页,也就是只在前100条搜索结果中检查排名,最大值可以设为16
 $baidu_ids = array(); //存储百度系列子站点占据的排名位置
 $baidu_mus = array(); //存储百度开放平台等优质站点占据的排名位置
 $isrank = 0; //$isrank = 1 当前域名下这个关键词获得排名; $isrank = 0 当前域名下这个关键词没有排名
 $all_count = 0; //测试变量,以确认匹配规则不会遗漏任何一条搜索结果
 $dm_weight = 0; //分析搜索结果页面中顶级、次级、目录、内页的情况,粗略反映一个关键字的竞争激烈程度,非常不准,仅供参考
 for ($page_no = 1; $page_no  {
  if ($page_no > 16) break;
  $fail_try = 1;
  $pn = ($page_no - 1) * 50;
  $url = "http://www.baidu.com/s?wd={$urlw}&pn={$pn}&rn=50";
  $snoopy = new snoopy;
//  $snoopy->proxy_host = "127.0.0.1"; //采集可选代理ip,以免频繁抓百度反被百度咬
//  $snoopy->proxy_port = "80";   //proxy代理所用端口
  $snoopy->fetch($url);
  $contents = iconv("gbk","utf-8//ignore",$snoopy->results);
  unset($snoopy);
//  echo $contents;
  if (!preg_match("/此内容系百度根据您的指令自动搜索的结果/i",$contents,$out))
  {
   if ($fail_try > 5)
   {
    continue;
   }
   else
   {
    $fail_try++;
    $page_no--;
    sleep(30);
    continue;
   }
  }
  if (!isset($ebaidu))
  {
   $ebaidu = array('lt' => 0, 'lb' => 0, 'r' => 0); //记录百度推广数量,分为左上、左下、右侧
   if (preg_match_all("/    {
    $ebaidu['lt'] = count($out_lt[0]);
   }
   if (preg_match_all("/

/i", $contents, $out_lt))
   {
    $ebaidu['lt'] = count($out_lt[0]);
   }
   if (preg_match_all("/
/i", $contents, $out_lb))
   {
    $ebaidu['lb'] = count($out_lb[0]);
   }
   if (preg_match_all("/
   {
    $ebaidu['r'] = count($out_r[0]);
   }
  }
  if (preg_match_all("/((
)|(
))[s|s]*?(((.*?))|(
))/i", $contents, $out_mu))
  {
   foreach ($out_mu[0] as $om_key => $om_val)
   {
    $om_id1 = $out_mu[3][$om_key];
    $om_href1 = $out_mu[4][$om_key];
    $om_id2 = $out_mu[6][$om_key];
    $om_href2 = $out_mu[7][$om_key];
    $om_title = strip_tags($out_mu[10][$om_key]);
    $baidu_mus[] = $om_id1 . $om_id2;
    $dm_weight = $dm_weight + get_dm_weight($om_href1 . $om_href2, $om_id1 . $om_id2);
    if (preg_match("/://(w*?.)*?baidu.com//i", $om_href1 . $om_href2, $om_domain))
    {
     $baidu_ids[] = $om_id1 . $om_id2;
    }
    if (preg_match("/://(w*?.)*?{$d}//i", $om_href1 . $om_href2, $om_domain))
    {
     echo '
' . $k . '' . $om_id1 . $om_id2 . '' . $om_title . '';
     $isrank = 1;
    }
   }
  }
  if (preg_match_all("/

()?(.*?)

.*?.*? ((d{4}-d{1,2}-d{1,2})|(d+小时前)|(d+分钟前)) .*?.*?
/i", $contents, $out_all))
  {
   foreach ($out_all[0] as $o_key => $o_val)
   {
    $o_id = $out_all[1][$o_key];
    $o_href = $out_all[3][$o_key];
    $o_title = strip_tags($out_all[4][$o_key]);
    $o_cache = $out_all[6][$o_key] . $out_all[7][$o_key] . $out_all[8][$o_key];
    $dm_weight = $dm_weight + get_dm_weight($o_href, $o_id);
    if (preg_match("/://(w*?.)*?baidu.com//i", $o_href, $o_domain))
    {
     $baidu_ids[] = $o_id;
    }
    if (preg_match("/://(w*?.)*?{$d}//i", $o_href, $o_domain))
    {
     echo '
' . $k . '' . $o_id . '' . $dm_weight . '%' . $o_title . '' . $o_cache . '';
     $isrank = 1;
    }
   }
  }
  //$all_count用来检查上述正则匹配是否匹配到所有搜索结果,特别关注百度系列站点、百度开放平台以及百度应用等有别于普通搜索结果
  if (isset($out_mu[0])) $all_count = $all_count + count($out_mu[0]);
  if (isset($out_all[0])) $all_count = $all_count + count($out_all[0]);
//  echo '
总共找到' . $all_count . '个匹配
';
  if (!preg_match("/  {
   break;
  }
 }
 if (count($baidu_mus) >= 1)
 {
  $bmus = implode(",", $baidu_mus);
 }
 else
 {
  $bmus = '';
 }
 if (count($baidu_ids) >= 1)
 {
  $bids = implode(",", $baidu_ids);
 }
 else
 {
  $bids = '';
 }
 if ($isrank == 1)
 {
  echo '上' . $ebaidu['lt'] . '下' . $ebaidu['lb'] . '右' . $ebaidu['r'] . '' . $bids . '' . $bmus . '';
 }
 else
 {
  echo '
' . $k . '0' . $dm_weight . '%上' . $ebaidu['lt'] . '下' . $ebaidu['lb'] . '右' . $ebaidu['r'] . '' . $bids . '' . $bmus . '';
 }
 unset($ebaidu);
}
//逐个域名与关键字采集百度排名信息并显示
set_include_path(".");
include "snoopy.class.php";
set_time_limit(0);
ob_flush(); flush(); ob_end_clean(); ob_implicit_flush(1);
echo '
';
echo '关键字排名竞争度网页标题百度快照百度推广数量百度占位百度mu占位';
foreach ($kwlist as $d_ks)
{
 $dm = $d_ks['domain'];
 echo '
域名:' . $dm . '';
 foreach ($d_ks['kword'] as $k)
 {
  fetch_baidu($dm, $k);
 }
}
echo '
';
?>


tongji();
?>


www.bkjia.comtruehttp://www.bkjia.com/PHPjc/444787.htmlTechArticle!doctype html public -//w3c//dtd xhtml 1.0 transitional//en http://www.w3.org/tr/xhtml1/dtd/xhtml1-transitional.dtd html xmlns=http://www.w3.org/1999/xhtml head meta http-equiv=con...

Stellungnahme:
Der Inhalt dieses Artikels wird freiwillig von Internetnutzern beigesteuert und das Urheberrecht liegt beim ursprünglichen Autor. Diese Website übernimmt keine entsprechende rechtliche Verantwortung. Wenn Sie Inhalte finden, bei denen der Verdacht eines Plagiats oder einer Rechtsverletzung besteht, wenden Sie sich bitte an admin@php.cn
Vorheriger Artikel:php初学篇-php文件上传教程_PHP教程Nächster Artikel:php 多风格的分页类(支持ajax分页)_PHP教程

In Verbindung stehende Artikel

Mehr sehen