程序|网页
转载请联系作者:Email: pengwuwang@21cn.com darlingpeng@sina.com
在工作中,经常需要对网页上的表格内容进行处理,但是,由于表格内容制作过程中的随意性,跨行跨列经常发生,所以我作了这几个函数,以获取表格的内容,程序中重要的地方已作了注解,所以在此不再重复说明,经过测试,非常成功.所以拿出来供大家共享.
//作者: 王朋武
//日期: 2005,3,31
//目的: 获取网页上(任意跨行跨列的)表格中的内容
//日期: 2005,3,31
//目的: 获取网页上(任意跨行跨列的)表格中的内容
function fun_proc_rowspan($l_str)
{
$l_a = explode("\n", $l_str);
for($i=count($l_a)-1; $i>=0; $i--) {
$l_str = trim($l_a[$i]);
if(empty($l_str)) continue;
$l_str = eregi_replace("
$l_b = explode("", $l_str); //如有n个,则分成(n+1)个组,最开始一项为空.
for($j=0; $j 如 +----+----+----+----+----+----+----+
+----+----+----+----+----+----+----+此一行保存7格, 前6格都有rowspan=2
+-----------------------------+----+此一行只保存2格
+----------------------------------+此一行只保存1格
*/
$l_str = trim($l_b[$j]);
if(eregi("rowspan", $l_str)) {
$rowspan = preg_replace("/^ $rowcont = preg_replace("/^(.+)/", "\\1", $l_str);
$l_a = fun_add_row_td($l_a, $i, $j, $rowspan, $rowcont);
$l_str = implode("\n", $l_a);
return $l_str;
}
}//end of for j
}//end of for i
return $l_str;
}
{
$l_a = explode("\n", $l_str);
for($i=count($l_a)-1; $i>=0; $i--) {
$l_str = trim($l_a[$i]);
if(empty($l_str)) continue;
$l_str = eregi_replace("
for($j=0; $j
+----+----+----+----+----+----+----+此一行保存7格, 前6格都有rowspan=2
+-----------------------------+----+此一行只保存2格
+----------------------------------+此一行只保存1格
*/
$l_str = trim($l_b[$j]);
if(eregi("rowspan", $l_str)) {
$rowspan = preg_replace("/^
$l_a = fun_add_row_td($l_a, $i, $j, $rowspan, $rowcont);
$l_str = implode("\n", $l_a);
return $l_str;
}
}//end of for j
}//end of for i
return $l_str;
}
function fun_add_row_td($l_a, $r, $l, $add_n, $add_cont) //l_a是数组, r是插入的开始行,l是开始的列, add_n是跨多少行, add_cont是加的内容
{
for($i=$r; $i if($i == $r) {
$l_str = eregi_replace("
$l_b = explode("", $l_str);
$l_c = "";
for($j=1; $j if($j == $l) $l_c .= eregi_replace("rowspan", "", $l_b[$j]); //去掉rowspan
else $l_c .= $l_b[$j];
}
$l_a[$i] = $l_c;
continue;
}
{
for($i=$r; $i if($i == $r) {
$l_str = eregi_replace("
$l_c = "";
for($j=1; $j
else $l_c .= $l_b[$j];
}
$l_a[$i] = $l_c;
continue;
}
$l_str = eregi_replace("
分隔符
$l_b = explode("", $l_str); //以分组
$l_c = "";
for($j=1; $j if($j == $l) {
$l_c .= "
".$add_cont." ";
}
$l_c .= $l_b[$j];
}//end of for j
$l_a[$i] = $l_c; //更新后的新内容
}//end of for i
return $l_a;
}
$l_b = explode("", $l_str); //以分组
$l_c = "";
for($j=1; $j if($j == $l) {
$l_c .= "
}
$l_c .= $l_b[$j];
}//end of for j
$l_a[$i] = $l_c; //更新后的新内容
}//end of for i
return $l_a;
}
function fun_proc_colspan($l_str)
{
$l_a = explode("\n", $l_str);
for($i=0; $i $l_str = trim($l_a[$i]);
if(empty($l_str)) continue;
{
$l_a = explode("\n", $l_str);
for($i=0; $i
if(empty($l_str)) continue;
$l_str = eregi_replace("
$l_b = explode("", $l_str); //如有n个,则分成(n+1)个组,最开始一项为空.
for($j=1; $j $l_str = trim($l_b[$j]);
if(eregi(" $colspan = preg_replace("/^ $colcont = preg_replace("/^(.+)/", "\\1", $l_str);
$l_a = fun_add_col_td($l_a, $i, $j, $colspan, $colcont);
$l_str = implode("\n", $l_a);
return $l_str;
}
}//end of for j
}//end of for i
return $l_str;
}
for($j=1; $j
if(eregi("
$l_a = fun_add_col_td($l_a, $i, $j, $colspan, $colcont);
$l_str = implode("\n", $l_a);
return $l_str;
}
}//end of for j
}//end of for i
return $l_str;
}
function fun_add_col_td($l_a, $r, $l, $add_n, $add_cont) //l_a是数组, r是插入的开始行,l是开始的列, add_n是跨多少列, add_cont是加的内容
{
$l_str = eregi_replace("
分隔符
$l_b = explode("", $l_str); //以分组
$l_c = "";
for($j=1; $j if($j == $l) {
$l_c .= eregi_replace("colspan", "", $l_b[$j]); //去掉colspan
continue;
}
if($j == $l+1) {
for($k=0; $k $l_c .= "
".$add_cont." ";
}
$l_c .= $l_b[$j];
}//end of for j
$l_a[$r] = $l_c; //更新后的新内容
{
$l_str = eregi_replace("
$l_b = explode("", $l_str); //以分组
$l_c = "";
for($j=1; $j if($j == $l) {
$l_c .= eregi_replace("colspan", "", $l_b[$j]); //去掉colspan
continue;
}
if($j == $l+1) {
for($k=0; $k $l_c .= "
}
$l_c .= $l_b[$j];
}//end of for j
$l_a[$r] = $l_c; //更新后的新内容
return $l_a;
}
}
$l_str = file_get_contents("test.htm"); //获取网页内容
$l_str = eregi_replace(".*
$l_str = eregi_replace(".*", "", $l_str);
$l_str = eregi_replace("\r", "", $l_str); //去掉\r
$l_str = eregi_replace("\n", "", $l_str); //去掉\n
$l_str = eregi_replace("", "\n", $l_str); //使表格的一行成为一行数据
$l_str = strip_tags($l_str, ""); //只保留
, HTML表记
$l_str = eregi_replace("\r", "", $l_str); //去掉\r
$l_str = eregi_replace("\n", "", $l_str); //去掉\n
$l_str = eregi_replace("", "\n", $l_str); //使表格的一行成为一行数据
$l_str = strip_tags($l_str, "
while(eregi(" while(eregi(" /////至此,表格处理已完成.
$l_str = eregi_replace("", "
", $l_str); //给每一格之间加上标记
$l_str = strip_tags($l_str, "
");
", $l_str); //给每一格之间加上标记
$l_str = strip_tags($l_str, "
");
$l_a = explode("\n", $l_str);
for($i=0; $i $l_str = trim($l_a[$i]);
if(empty($l_str)) continue;
$l_b = explode("
", $l_str);
foreach($l_b as $val) {
echo $val." "; //表格每列内容间加空格
}
echo "
"; //表格的一行显示为一行
}
for($i=0; $i
if(empty($l_str)) continue;
$l_b = explode("
", $l_str);
foreach($l_b as $val) {
echo $val." "; //表格每列内容间加空格
}
echo "
"; //表格的一行显示为一行
}
//附测试用例和测试结果:
/***********测试用表格*************************************************************
1 | 2 | 3 | 4 | 5 | 6 | 7 |
a1 | 22 | a2 | a3 | |||
b1 | b2 | b3 | b4 | 33 | ||
c1 | c2 | c3 | c4 | 44 | ||
d1 | d2 | d3 | d4 | |||
e1 | e2 | e3 | e4 | e5 | ||
f1 | f2 | f3 | ||||
g1 | g2 | |||||
h1 | ||||||
i1 | i2 | i3 | i4 | i5 | ||
j1 | j2 | j3 | j4 | |||
k1 | k2 | |||||
l1 | l2 | |||||
m1 | m2 | m3 |
*********************************************************************************/
/**************
Statement
The content of this article is voluntarily contributed by netizens, and the copyright belongs to the original author. This site does not assume corresponding legal responsibility. If you find any content suspected of plagiarism or infringement, please contact admin@php.cn

Hot AI Tools

Undresser.AI Undress
AI-powered app for creating realistic nude photos

AI Clothes Remover
Online AI tool for removing clothes from photos.

Undress AI Tool
Undress images for free

Clothoff.io
AI clothes remover

AI Hentai Generator
Generate AI Hentai for free.

Hot Article
R.E.P.O. Energy Crystals Explained and What They Do (Yellow Crystal)
3 weeks agoBy尊渡假赌尊渡假赌尊渡假赌
R.E.P.O. Best Graphic Settings
3 weeks agoBy尊渡假赌尊渡假赌尊渡假赌
Assassin's Creed Shadows: Seashell Riddle Solution
2 weeks agoByDDD
R.E.P.O. How to Fix Audio if You Can't Hear Anyone
3 weeks agoBy尊渡假赌尊渡假赌尊渡假赌
WWE 2K25: How To Unlock Everything In MyRise
4 weeks agoBy尊渡假赌尊渡假赌尊渡假赌

Hot Tools

Atom editor mac version download
The most popular open source editor

ZendStudio 13.5.1 Mac
Powerful PHP integrated development environment

SublimeText3 Chinese version
Chinese version, very easy to use

WebStorm Mac version
Useful JavaScript development tools

VSCode Windows 64-bit Download
A free and powerful IDE editor launched by Microsoft