Home  >  Article  >  Backend Development  >  National People's Congress copy data processing program_Input_PHP tutorial

National People's Congress copy data processing program_Input_PHP tutorial

WBOY
WBOYOriginal
2016-07-21 16:05:37691browse

//This program is specially designed to parse the articles of National People’s Congress copy materials into the database.
//Editor: Kong Xiuxiang. Date: 2001/4/10
switch($position) {
default:
session_start();
if (!isset($auth_passed)) {
echo "This function is only authorized Only users can use it. ";
return -1;
}

if(isset($u_name)) {
session_name($u_name);

//echo "

".session_name().": This program will serve you wholeheartedly.
n";
//echo "
";
}

?>

Article added to database
< ;/HEAD>

ACTION= "" METHOD = "POST">







< TD rowspan=5>Query description








File Name< TD>
Index file name< ;INPUT NAME = "index_file" TYPE = "text" VALUE="" SIZE = "30">
Index Title
Article Category










break;
case "process":
session_start();
require "config.php3";

$UploadAction=0;
$index_path="\index\ ";
$added=0; //Mark whether the index file already exists.
$die=0; //Exit the loop mark when encountering an abnormal situation.
$data_exist=0; //The measurement of the article to be added already exists
$data_insert=0; //The measurement of the newly added article
$repeat=0; //Is it a duplicate?
$TimeLimit=0; //Set the timeout limit. The default time is 30 seconds. When set to 0, there is no time limit.
set_time_limit($TimeLimit);

//$UploadPath = AddSlashes(dirname ($PATH_TRANSLATED));
$FileName = $UploadPath.$UploadFile_name; //Upload file name
//If(($UploadFile != "none")&&($UploadFile != ""))
If(($UploadFile == "none")||($UploadFile == "")){
$page=$PHP_SELF;
xueroom_error_exit("The file name cannot be empty, or the file is not uploaded Success. ",$page);
}
//$file_data=file($UploadFile);
$link=@MYSQL_CONNECT($hostname,$dbusername,$dbpassword) OR DIE("Cannot connect Database! ");
@mysql_select_db("$dbname") or die("Cannot select database!");

$patterne="/[s*document number s*](.+) n/U"; //Document number
$patternf="/[Original source](.+)n/U"; //Original source
$patterng="/[Original issue number](. +)n/U"; //Original issue number
$patternh="/[Divided into s*category s*number](.+)n/U"; //Category number
$patternb=" /【Copy issue number】(.+)n/U"; //Copy issue number

//$patternc="/.*s*【Title】(.+)n【.*/ Us"; //The title originally did not add n, but when I encountered "Small Sentence (d0a1bee4)", a1be happened to be the internal code of "[". 2001/3/13

//$patternc="/[Title](.+)n[.*/Us"; //Title originally did not add n, but encountered "subsentence ( d0a1bee4)" where a1be happens to be the internal code of "[". 2001/3/13
$patternc="/[s*subscript s*title s*](.+)n【.*/Us"; //The title originally did not add n, but when I encountered "subject" (d0a1bee4)" where a1be happens to be the internal code of "[". 2001/3/13
$patternd="/[s*authored by s* by s*](.+)n/U"; //authored by
$patterna="/(.+【s* Positive s*text s*】)(.+)$/Us"; //Leave a [ as the end mark of the title bar.
$pattern11="/[About the author](.+)[.*/Us";//
$pattern12="/[Abstract](.+)[.*/Us";//



$fp_o=fopen("$UploadFile",'r');
$data=fread($fp_o,filesize($UploadFile)); //File reading Character variable
fclose($fp_o);
$poem_array=preg_split("/ns+ns+ns+/",$data);
$replacement[0]="/(Note.*)$ /s"; // "Note" should be written, otherwise "(upper/lower)" will also be deleted.
$replacement[1]="/(Note.*)ns*/s"; //Designed for an article by Ma Qingzhu. There is a subtitle after the note.
//echo$poem_array[1];
$s=sizeof($poem_array);
for($i=0;$i<$s;$i++){ //For a web page All Tang poems are operated.
//for($i=0;$i<2;$i++){ //Perform operations on all Tang poems in a web page.
    $t_data=$poem_array[$i];
//    if(preg_match($pattern,$data,$matches,PREG_SET_ORDER)){
    if(preg_match($patterna,$t_data,$matchesa,PREG_SET_ORDER)){
        $artical_data=$matchesa[2];   //正文
        $data=$matchesa[1];   //文章头
        if(preg_match($patternb,$data,$matchesb,PREG_SET_ORDER))
            $date_rep=$matchesb[1];     //复印期号
            else
            $date_rep="";
        if(preg_match($patternc,$data,$matchesc,PREG_SET_ORDER)){
            $artical_title=trim($matchesc[1]);   //标题
            $artical_title=preg_replace($replacement,"",$artical_title);
//            echo $artical_title."
";
            }
            else
            $artical_title="";
        if(preg_match($patternd,$data,$matchesd,PREG_SET_ORDER))
            $artical_author=trim($matchesd[1]);   //作者
            else
            $artical_author="";

        if(preg_match($patterne,$data,$matchese,PREG_SET_ORDER)){
            $resource_id=trim($matchese[1]);   //人大编号
//            echo $resource_id."
";
            }
            else
            $resource_id="";
        if(preg_match($patternf,$data,$matchesf,PREG_SET_ORDER))
            $origin_periodical=trim($matchesf[1]);   //期刊
            else
            $origin_periodical="";
        if(preg_match($patterng,$data,$matchesg,PREG_SET_ORDER))
            $date_temp=$matchesg[1];   //出版日期
            else
            $date_temp="";
        if(preg_match($patternh,$data,$matchesh,PREG_SET_ORDER))
            $artical_type=trim($matchesh[1]);   //文章分类
            else
            $artical_type="";
/*
        if(preg_match($pattern11,$data,$matchesd,PREG_SET_ORDER))
            $author_brif=$matchesd[1];   //作者简介
            else
            $author_brif="";
*/
        if(preg_match($pattern12,$data,$matches12,PREG_SET_ORDER))
            $content_brif=$matches12[1];   //内容提要
            else
            $content_brif="";
//        echo$data;
//echo $artical_title;
/*         
        echo $resource_id."
";
        echo $origin_periodical."
";
        echo $artical_title."
";
        echo $artical_author."
";
*/
//    echo $date_rep."
";     
     if(($artical_title=="")||($resource_id=="")){
         $a=$i+1;
              echo "标题或人大编号为空,不合法。第 $a 篇没有加入数据库
";
              $die=1;
              //break;
      }
    else{
          if(strlen(trim(substr($date_temp,6,2)))!=0){
                  $date_pub=substr($date_temp,0,4)."-".substr($date_temp,4,2).substr($date_temp,6,2)." 00:00:00";
            }
           else{
                 $date_pub=substr($date_temp,0,4)."-".substr($date_temp,4,2)."-01 01:01:01";
                         //在日期的数字一定要有效,否则被置零。
//                         $date_pub=substr($date_temp,0,4)."-".substr($date_temp,4,2);
              }         //如果原文发表在报纸上,则有日期

    //    $artical_title=preg_replace($pattern91,"",$artical_title);
        $origin_periodical=addslashes($origin_periodical);
        $artical_title=addslashes($artical_title);
        $date_reprinted=substr($date_rep,0,4)."-".substr($date_rep,4,2)."-01 01:01:01";
        if($content_brif)
            $artical_data="$content_brif rn $artical_data";

             $artical_data=addslashes($artical_data);
             $artical__size=strlen($artical_data);
             $resource_id=trim($resource_id);
             $query = "INSERT INTO artical (resource_id, origin_periodical,date_pub, artical_author, artical_title, artical_data, artical_type,artical__size, date_reprinted,catalog) VALUES ('$resource_id', '$origin_periodical','$date_pub', '$artical_author', '$artical_title', '$artical_data', '$artical_type','$artical__size', '$date_reprinted',$catalog)";
             $q="select artical_id,resource_id,artical_title,artical_author,artical_type from artical where resource_id="$resource_id"";
             $r = @mysql_query($q);
//             $count=@mysql_fetch_row($r);
//             echo "$artical_type,,".$count[4]."
";
        while($count=mysql_fetch_array($r)){  //重复的号码可能有多个,真正重复的不一定是第一个找到的。所以要用循环来找。2001/3/16
                                              //为了找到个理由,我花的代价可是不小。
            if($artical_type==$count[4]&&$artical_author==$count[3]){
                      $data_exist+=1;
                      $id=$count[0];
                      $title=$count[2];
                      $author=$count[3];
                $repeat=1;
                break;
            }
        }
              if($repeat){
            $f_data.="$title作者:$author rn";
            $repeat=0;
              }
           else{

                $result = @mysql_query($query);
                if($result){
                       $data_insert+=1;
                       $id= mysql_insert_id();
                       $f_data.="$artical_title作者:$artical_authorrn";
              }
            else{
                echo $data;
                echo'数据写入失败
';
                 }//if($result)
        }//if(strlen($count)!=0){  //文章是否已经存在。

    }//if(strlen($artical_title)==0){

    }//if(preg_match($patterna,$t_data,$matchesa,PREG_SET_ORDER)){
}//for()

//下面为结束处理。
set_time_limit(30);
mysql_close($link);
$dte_created=date('Y-m-d H:i:s');
$total=$data_exist+$data_insert;

if($index_title==""){
$index_title="学而斋资料";
}
$index_title=$index_title."_".$data_search;

$html_header="$index_title";
$html_header.="
$index_title
rn";
$dte_created=date('Y-m-d H:i:s');
$html_header.="$index_describe 发现了 $total 条/上载日期:$dte_created
rn";
$html_header.="rn";
$data=$html_header.$f_data;
$data.="
";
/*
If(strlen($index_file)==0){
header("Content-type: text/html");
   echo $data;
   }
   else
   {
*/
If(strlen($index_file)!=0){
     $in_file="$index_file";
    $index_file=$index_path.$in_file;
  }
else
       $in_file="temp.html";

$fp=indexfile($index_file,$index_title,$index_describe);
fputs($fp, "发现了 $total 条");
fputs($fp, "查询日期:$dte_created
rn");
fputs($fp,"
rn");
fputs($fp, $f_data);
fputs($fp, "
");
fclose($fp);
echo "文件提取";

if($added){  //如果索引文件存在。$added是一个全局变量。
    echo "索引文件".$in_file."已经存在,新索引已经加入到文件的最后。
";
    //  echo "请看".$in_file.'的最后'.$total.'条。
';
      }

else{
     //将新索引文件写入总索引文件。
     $total_index=$index_path."k_index.html";
     $fp_i=fopen($total_index,'a+');
     $file_link="$index_describe
rn";
     fputs($fp_i, $file_link);
     fclose($fp_i);
}//if($added)
echo"点这里看新加入的文件索引
";

//}//If(strlen($index_file)==0){

if($data_exist>0)
     echo "重复的文章共".$data_exist."篇。
";
if($data_insert>0)
    echo "新加入的文章共".$data_insert."篇。
";
$total=$data_exist+$data_insert;
echo"上传文件".$UploadFile_name."共有文章".$total."篇。";
echo"
返回 ";
echo"";
break;
  }
?> 

www.bkjia.comtruehttp://www.bkjia.com/PHPjc/315649.htmlTechArticle? //本程序是为解析人大复印资料的文章到数据库专用。 //编者:孔秀祥。日期:2001/4/10 switch($position) { default: session_start(); if (!isset($auth_pa...
Statement:
The content of this article is voluntarily contributed by netizens, and the copyright belongs to the original author. This site does not assume corresponding legal responsibility. If you find any content suspected of plagiarism or infringement, please contact admin@php.cn