ホームページ >バックエンド開発 >PHPチュートリアル >パスに中国語が含まれている場合、PHP の解析が失敗する

パスに中国語が含まれている場合、PHP の解析が失敗する

WBOY
WBOYオリジナル
2016-06-13 12:09:101045ブラウズ

路径中包含中文时,PHP解析失败

<br /><?php  <br />header("Content-Type: text/html; charset=utf-8");<br />//static $count=0;<br /><br />//抓包,识别ttx扩展名的文件,提取前100个字节,并插入数据库<br />function read100Bytes(&$extAndDir)<br />{<br />	$conn=new mysqli("localhost","root","cai123","test");<br />	if(mysqli_connect_errno())<br />	{<br />		die(mysqli_connect_error());<br />	}<br />	$conn->query("set names utf8");<br />	$sql="insert into spiderTXT(file_name,file_link,file_100_byte) values(?,?,?)";<br />	$stmt=$conn->prepare($sql);<br />	$stmt->bind_param("sss",$file_name,$file_link,$file_100_byte);<br />	<br />	<br />	$textFile=$extAndDir["text"];<br /><br />		foreach($textFile as $link)<br />		{<br />			if(strtolower(pathinfo($link)["extension"])=="txt")<br />			{<br />				if(file_exists($link) && is_readable($link))<br />				{<br />						$fd=fopen($link,"r") or die("打开文件失败".__line__);<br />						$content=fread($fd,100);<br />						//return $content;<br />						$file_name=pathinfo($link)["basename"];<br />						$file_link=pathinfo($link)["dirname"];<br />						$file_100_byte=$content;<br />						$stmt->execute();<br />						$stmt->close();<br />						fclose($fd);<br />				}<br />				else<br />				{<br />					die("文件不存在或者不可读".__line__);<br />				}<br />				<br />			}<br />			<br />		}<br />	<br />	<br />	<br />	$conn->close();<br />}<br />//根据扩展名分类<br />function extFilter(&$extAndDir,&$fileList,&$extFormat)<br />{<br />	$ext=$extAndDir[0];<br />	$link=$extAndDir[1];<br />	if(in_array(strtolower($ext),$extFormat["img"],true))<br />	{<br />		array_push($fileList["img"],$link);<br />	}<br />	else if(in_array(strtolower($ext),$extFormat["video"],true))<br />	{<br />		array_push($fileList["video"],$link);<br />	}<br />	else if(in_array(strtolower($ext),$extFormat["sound"],true))<br />	{<br />		array_push($fileList["sound"],$link);<br />	}<br />	else if(in_array(strtolower($ext),$extFormat["text"],true))<br />	{<br />		array_push($fileList["text"],$link);<br />	}<br />	else if(in_array(strtolower($ext),$extFormat["pack"],true))<br />	{<br />		array_push($fileList["pack"],$link);<br />	}<br />	else if(in_array(strtolower($ext),$extFormat["execute"],true))<br />	{<br />		array_push($fileList["execute"],$link);<br />	}<br />	else<br />	{<br />		array_push($fileList["other"],$link);<br />	}<br />}<br /><br />//提取路径名,因为使用dirname无法提取完整的路径名,所以写了这个<br />function getDirName($path)<br /> {<br />	$pat="#(\.\w{2,3})$#";<br />	//如果传的是路径+文件如 c:\sys.ini, 则提取路径名<br />	if(preg_match($pat,$path,$match))<br />	{<br />		//echo "extension name: ".$match[1];<br />		$pat2="#^(.*)/(\w{1,100}\.\w{2,3})$#i";<br />		if(preg_match($pat2, $path,$mat))<br />		{<br />			return $mat[1];<br />		}<br />		else<br />		{<br />			echo "not match".__line__."<br />";<br />		}<br />	}<br />	else<br />	{<br />		return $path;<br />	}<br /> }<br /> <br /><br />//读取路径,并根据文件的扩展名分类<br />function readFileList($path,&$extFormat)<br />{<br />	static $fileList=array("img"=>array(), "video"=>array(), "sound"=>array(),"text"=>array(),"pack"=>array(),"execute"=>array(),"other"=>array());<br />	$encoding=mb_detect_encoding($path,array("cp936","gbk" ,"gb2312", "utf-8","ISO-8859-1","ASCII"));<br />			//echo $encoding;<br />			//exit();<br />	//if (in_array($encoding,array("cp936","gbk" ,"gb2312"),true))<br />	//{<br />		$path=iconv($encoding,"utf-8",$path);<br />	//}<br />	<br />	$dirName=getDirName($path);<br />	//$dirName=dirname($path);<br />	<br />	//根据文件夹的名字,命名数组的关键字<br />/*	$pat="#/(.+)$#i";<br />	preg_match($pat, $dirName,$match);<br />	$match=explode("/",$match[1]);<br />	$packs=array_pop($match);<br />	$fileList["$packs"]=null;<br />*/	<br />	if ($fd=opendir($dirName))<br />	{<br />		while($fileName=readdir($fd))<br />		{<br />			$encoding=mb_detect_encoding($fileName,array("cp936","gbk" ,"gb2312", "utf-8","ISO-8859-1","ASCII"));<br />			//echo $encoding;<br />			//exit();<br />			//if (in_array($encoding,array("cp936","gbk" ,"gb2312"),true))<br />			//{<br />				$fileName=iconv($encoding,"utf-8",$fileName);<br />			//}<br />			<br />			if($fileName !="." && $fileName !="..")<br />			{<br />			<br />				if(is_file($dirName.'/'.$fileName))<br />				{	<br />					@$extName=pathinfo($path."/".$fileName)["extension"];<br />					$linkName=$dirName."/".$fileName;<br />				<br />					$extAndDir=array($extName,$linkName);<br />					extFilter($extAndDir,$fileList,$extFormat);<br />					//read100Bytes($extAndDir);<br />					<br />				}<br />				else if(is_dir($dirName.'/'.$fileName))<br />				{<br />					//$fileName=iconv("gb2312","utf-8",$fileName);<br />					readFileList($dirName.'/'.$fileName,$extFormat);<br />					//echo $dirName.'/'.$fileName."<br />";<br />					//die("读取目录出错");<br />				}<br />			}	<br />		<br />		}<br />		return $fileList;<br />	}<br />	@closedir($fd);<br />}<br />//迭代数据<br />function iter(&$arr)<br />{<br />	if(!is_array($arr))<br />		return;<br />	//$num=count($arr);<br />	$list=array();<br />	//echo "length".$num;<br />	foreach($arr as $name=>$val)<br />	{<br />		foreach($val as $item)<br />		array_push($list,$item);<br />	}<br />	return $list;<br />}<br /><br />//组装批量插入数据<br />function mysqliInsert(&$arr)<br />{<br />	$conn= new MySQLi("localhost", "root", "cai123", "test");<br />	if(!$conn)<br />	{<br />		die("连接服务器失败".$conn->error);<br />	}<br />	$conn->query("set names utf8");<br />	$sql="INSERT INTO img(link) VALUES";<br />	foreach ($arr as $key=>$val)<br />	{<br />		foreach($val as $ads)<br />		{<br />			$sql .= "('".$ads."'),";<br />			<br />		}<br />	}<br />	//去掉最后一个逗号,<br />	$sql=substr($sql,0, strlen($sql)-1);<br />	//echo $sql;<br />	<br />	$res = $conn->query($sql);<br />	<br />	if(!$res)<br />	{<br />		die("插入数据库失败".$conn->error);<br />	}<br />	else<br />	{<br />		if($conn->affected_rows>0)<br />		{<br />			exit();<br />		}<br />		else<br />		{<br />			die("没有产生影响");<br />		}<br />	}<br />	$conn->close();<br />	<br />}<br />//预处理批量插入数据<br />function stmtInsert(&$arr)<br />{<br />	$conn= new mysqli("localhost", "root", "cai123", "test");<br />	if(mysqli_connect_errno())<br />	{<br />		die(mysqli_connect_error());<br />	}<br />	$conn->query("set names utf8");<br />	$sql="insert into img(link) values(?)";<br />	$stmt=$conn->prepare($sql);<br />	$stmt->bind_param("s",$items);<br />	<br />	foreach($arr as $key=>$links)<br />	{<br />		foreach ($links as $item)<br />		{<br />			$items=$item;<br />			$b=$stmt->execute();<br />			if(!$b)<br />			{<br />				die($conn->error);	<br />			}<br />		}<br />	}<br />	$stmt->close();<br />	$conn->close();<br />}<br />//<br />function showFile()<br />{<br />	$conn= new MySQLi("localhost", "root", "cai123", "test");<br />	if(!$conn)<br />	{<br />		die("连接服务器失败".$conn->error);<br />	}<br />	$conn->query("set names utf8");<br />	$sql=" SELECT * FROM img GROUP BY link";<br />	$res=$conn->query($sql);<br />	if(!$res)<br />	{<br />		die("查询失败");<br />	}<br />	while($row=$res->fetch_assoc())<br />	{<br />		echo "<img src='".$row["link"]."' width='400px' /><br />";<br />	}<br />	$res->free();<br />}<br /><br />//转换window环境下路径的默认分隔符\为PHP识别更好的/<br />function transPathSep($path)<br />{<br />	//$encoding=mb_detect_encoding($path,array("cp936","gbk" ,"gb2312", "utf-8","ISO-8859-1","ASCII"));<br />	//$path=iconv($encoding,"utf-8",$path);<br />	<br />	$system=$_SERVER["SERVER_SOFTWARE"];<br />	$pat="#\((.*?)\)#";<br />	$sysVer=null;<br />	if(preg_match($pat,$system,$match))<br />	{<br />		$sysVer=$match[1];<br />	}<br />	else<br />	{<br />		die("匹配系统类型失败");<br />	}<br />	if(strtolower($sysVer)=="win32")<br />	{<br />		$realPath=str_replace("\\","/",$path);<br />		return $realPath;<br />	}<br />}<br /><br />//static $fileList=array("img"=>array(), "video"=>array(), "sound"=>array(),"text"=>array(),"pack"=>array());<br />//在此处可以决定把什么类型的文件,归类到哪种类型<br />$imgFormat=array("img"=>array("jpg", "jpeg","png", "bmp","gif","ico"),<br />				"video"=>array("avi","dvix","div","xvid","mpge","mpg","dat","wmv","asx","rm","rmvb","mov","qt","3gp","3g2","mp4","m4v","flv"),<br />				"sound"=>array("mp3","wav","wma","ape","mod","aiff","voc","vov","asf"),<br />				"text"=>array("word","txt","pdf","chm","ppt","ini","html","css","js"),<br />				"pack"=>array("rar" ,"zip","7z","cab","arj","lzh","tar","gz","ace","uue","bz2","jar","iso","mpq"),<br />				"execute"=>array("exe","bat","msi"));<br />				<br />	<br />	<br />/* 遇到一个问题,当路径中出现中文时,会提示打开目录失败,且显示为乱码,比如$dir="E:\Books\php\php 程序设计";时会出现,CSDN的可用分只有33了..............蛋疼<br />*/<br />$dir="C:\Windows\Boot";<br /><br />$path=transPathSep($dir);<br /><br /> //echo pathinfo($path)["dirname"];<br />$arr=readFileList($path,$imgFormat);<br /><br />echo "<pre class="brush:php;toolbar:false">";<br />print_r($arr);<br />echo "
";
//read100Bytes($arr);

echo "
OK";
?>




------解决思路----------------------
你这样能不出错吗?
        while($fileName=readdir($fd))
        {
            $encoding=mb_detect_encoding($fileName,array("cp936","gbk" ,"gb2312", "utf-8","ISO-8859-1","ASCII"));
            $fileName=iconv($encoding,"utf-8",$fileName); //非utf-8的文件名被转成了utf-8的,那么文件还能存在吗?
             
            if($fileName !="." && $fileName !="..")
            {
             
                if(is_file($dirName.'/'.$fileName))

声明:
この記事の内容はネチズンが自主的に寄稿したものであり、著作権は原著者に帰属します。このサイトは、それに相当する法的責任を負いません。盗作または侵害の疑いのあるコンテンツを見つけた場合は、admin@php.cn までご連絡ください。