search
Homephp教程php手册PHP获取网页上任意表格中内容的通用程序的制作

程序|网页

  转载请联系作者:Email: pengwuwang@21cn.com   darlingpeng@sina.com

  在工作中,经常需要对网页上的表格内容进行处理,但是,由于表格内容制作过程中的随意性,跨行跨列经常发生,所以我作了这几个函数,以获取表格的内容,程序中重要的地方已作了注解,所以在此不再重复说明,经过测试,非常成功.所以拿出来供大家共享.
//作者: 王朋武
//日期: 2005,3,31
//目的: 获取网页上(任意跨行跨列的)表格中的内容
function fun_proc_rowspan($l_str)
{
 $l_a = explode("\n", $l_str);
 for($i=count($l_a)-1; $i>=0; $i--) {
  $l_str = trim($l_a[$i]);
  if(empty($l_str)) continue;
 
  $l_str = eregi_replace("   $l_b = explode("", $l_str); //如有n个,则分成(n+1)个组,最开始一项为空.
  for($j=0; $j  如 +----+----+----+----+----+----+----+
     +----+----+----+----+----+----+----+此一行保存7格, 前6格都有rowspan=2
     +-----------------------------+----+此一行只保存2格
     +----------------------------------+此一行只保存1格
  */
   $l_str = trim($l_b[$j]);
   if(eregi("rowspan", $l_str)) {
    $rowspan = preg_replace("/^
    $rowcont = preg_replace("/^(.+)/", "\\1", $l_str);
    $l_a = fun_add_row_td($l_a, $i, $j, $rowspan, $rowcont);
    $l_str = implode("\n", $l_a);
    return $l_str;
   }
  }//end of for j
 }//end of for i
 return $l_str;
}
function fun_add_row_td($l_a, $r, $l, $add_n, $add_cont) //l_a是数组, r是插入的开始行,l是开始的列, add_n是跨多少行, add_cont是加的内容
{
 for($i=$r; $i  if($i == $r) {
   $l_str = eregi_replace("    $l_b = explode("", $l_str);
   $l_c = "";
   for($j=1; $j    if($j == $l) $l_c .= eregi_replace("rowspan", "", $l_b[$j]); //去掉rowspan
    else $l_c .= $l_b[$j];
   }
   $l_a[$i] = $l_c;
   continue;
  }
  $l_str = eregi_replace(" 分隔符
  $l_b = explode("", $l_str); //以分组
  $l_c = "";
  for($j=1; $j   if($j == $l) {
    $l_c .= "
".$add_cont."";
   }
   $l_c .= $l_b[$j];
  }//end of for j
  $l_a[$i] = $l_c; //更新后的新内容
 }//end of for i
 return $l_a;
}
function fun_proc_colspan($l_str)
{
 $l_a = explode("\n", $l_str);
 for($i=0; $i  $l_str = trim($l_a[$i]);
  if(empty($l_str)) continue;
  $l_str = eregi_replace("   $l_b = explode("", $l_str); //如有n个,则分成(n+1)个组,最开始一项为空.
  for($j=1; $j   $l_str = trim($l_b[$j]);
   if(eregi("
    $colspan = preg_replace("/^    $colcont = preg_replace("/^(.+)/", "\\1", $l_str);
    $l_a = fun_add_col_td($l_a, $i, $j, $colspan, $colcont);
    $l_str = implode("\n", $l_a);
    return $l_str;
   }
  }//end of for j
 }//end of for i
 return $l_str;
}
function fun_add_col_td($l_a, $r, $l, $add_n, $add_cont) //l_a是数组, r是插入的开始行,l是开始的列, add_n是跨多少列, add_cont是加的内容
{
 $l_str = eregi_replace(" 分隔符
 $l_b = explode("", $l_str); //以分组
 $l_c = "";
 for($j=1; $j  if($j == $l) {
   $l_c .= eregi_replace("colspan", "", $l_b[$j]); //去掉colspan
   continue;
  }
  if($j == $l+1) {
   for($k=0; $k    $l_c .= "
".$add_cont."";
  }
  $l_c .= $l_b[$j];
 }//end of for j
 $l_a[$r] = $l_c; //更新后的新内容
 return $l_a;
}
$l_str = file_get_contents("test.htm"); //获取网页内容
$l_str = eregi_replace(".*
$l_str = eregi_replace(".*", "", $l_str);
$l_str = eregi_replace("\r", "", $l_str); //去掉\r
$l_str = eregi_replace("\n", "", $l_str); //去掉\n
$l_str = eregi_replace("", "\n", $l_str); //使表格的一行成为一行数据
$l_str = strip_tags($l_str, ""); //只保留 ,HTML表记
while(eregi("while(eregi("/////至此,表格处理已完成.
$l_str = eregi_replace("", "
", $l_str); //给每一格之间加上标记
$l_str = strip_tags($l_str, "
");
$l_a = explode("\n", $l_str);
for($i=0; $i $l_str = trim($l_a[$i]);
 if(empty($l_str)) continue;
 
 $l_b = explode("
", $l_str);
 foreach($l_b as $val) {
  echo $val." "; //表格每列内容间加空格
 }
 echo "
"; //表格的一行显示为一行
}
//附测试用例和测试结果:
/***********测试用表格*************************************************************



Untitled Document



 
   
   
   
   
   
   
   
 
 
   
   
   
   
 
 
   
   
   
   
   
 
 
   
   
   
   
   
 
 
   
   
   
   
 
 
   
   
   
   
   
 
 
   
   
   
 
 
   
   
 
 
   
 
 
   
   
   
   
   
 
 
   
   
   
   
 
 
   
   
 
 
   
   
 
 
   
   
   
 
1 2 3 4 5 6 7
a1 22 a2 a3
b1 b2 b3 b4 33
c1 c2 c3 c4 44
d1 d2 d3 d4
e1 e2 e3 e4 e5
f1 f2 f3
g1 g2
h1
i1 i2 i3 i4 i5
j1 j2 j3 j4
k1 k2
l1 l2
m1 m2 m3


*********************************************************************************/
/**************
Statement
The content of this article is voluntarily contributed by netizens, and the copyright belongs to the original author. This site does not assume corresponding legal responsibility. If you find any content suspected of plagiarism or infringement, please contact admin@php.cn

Hot AI Tools

Undresser.AI Undress

Undresser.AI Undress

AI-powered app for creating realistic nude photos

AI Clothes Remover

AI Clothes Remover

Online AI tool for removing clothes from photos.

Undress AI Tool

Undress AI Tool

Undress images for free

Clothoff.io

Clothoff.io

AI clothes remover

Video Face Swap

Video Face Swap

Swap faces in any video effortlessly with our completely free AI face swap tool!

Hot Article

Hot Tools

Notepad++7.3.1

Notepad++7.3.1

Easy-to-use and free code editor

SecLists

SecLists

SecLists is the ultimate security tester's companion. It is a collection of various types of lists that are frequently used during security assessments, all in one place. SecLists helps make security testing more efficient and productive by conveniently providing all the lists a security tester might need. List types include usernames, passwords, URLs, fuzzing payloads, sensitive data patterns, web shells, and more. The tester can simply pull this repository onto a new test machine and he will have access to every type of list he needs.

MantisBT

MantisBT

Mantis is an easy-to-deploy web-based defect tracking tool designed to aid in product defect tracking. It requires PHP, MySQL and a web server. Check out our demo and hosting services.

ZendStudio 13.5.1 Mac

ZendStudio 13.5.1 Mac

Powerful PHP integrated development environment

SublimeText3 Chinese version

SublimeText3 Chinese version

Chinese version, very easy to use