Home  >  Article  >  Backend Development  >  Convert file encodings in batches or individually

Convert file encodings in batches or individually

WBOY
WBOYOriginal
2016-07-25 08:48:46910browse
Convert the file encoding, such as the original gbk to utf-8. You can convert a single file or an entire directory of files, optionally recursive directories.
Some problems cannot be repeated. For example, gbk is converted to utf8, and then converted to utf8. This will cause garbled characters. I originally tried to detect the encoding before conversion, but it seemed to have failed. I specifically tried a file and checked whether it was gbk or utf-8, and both returned true. I don’t understand this.
  1. /**
  2. * Convert file encoding
  3. * Dependent extensions filesystem and mbstring
  4. * @example
  5. *
  6. * include_once 'ConvertEncode.php';
  7. * $convert = new ConvertEncode();
  8. * try{
  9. * $convert ->setPath('my', true, true);//Directory
  10. * //$convert->setPath('my.php');//Single file
  11. * $convert->setEncode('GBK ', 'UTF-8');
  12. * $convert->convert();
  13. * }catch(ConvertException $e) {
  14. * echo $e->getMessage();
  15. * }
  16. *
  17. */
  18. class ConvertEncode {
  19. /**
  20. * The encoding to be converted to
  21. * @var string
  22. */
  23. private $_to_encoding;
  24. /**
  25. * Encoding before conversion
  26. * @var string
  27. */
  28. private $_from_encoding;
  29. /**
  30. * Directory or single file to be converted
  31. * @var string
  32. */
  33. private $_path;
  34. /**
  35. * Whether it is a directory, it is only set when the given directory is
  36. * @var boolean
  37. */
  38. private $_directory;
  39. /**
  40. * Whether to traverse recursively, only valid for directories
  41. * @var boolean
  42. */
  43. private $_recursion;
  44. /**
  45. * Save all files to be converted, only used when converting files in the directory
  46. * @var array
  47. */
  48. private $_files = array();
  49. /**
  50. *Constructor
  51. */
  52. public function __construct() {
  53. if( ! function_exists('mb_convert_encoding') ) {
  54. throw new ConvertException('mbstring extension be required');
  55. }
  56. }
  57. /**
  58. * Set the directory or single file to be converted
  59. * @param string $path directory or file
  60. * @param boolean whether it is a directory
  61. * @param boolean whether it is a recursive directory
  62. * @return boolean
  63. */
  64. public function setPath($path, $is_dir = false, $rec = false) {
  65. $this->_path = $path;
  66. $this->_directory = $is_dir;
  67. $this->_recursion = $rec;
  68. return true;
  69. }
  70. /**
  71. * Set the encoding before conversion and the encoding to be converted to
  72. * @param string $encode The encoding before conversion
  73. * @param string $encode The encoding to be converted
  74. * @return boolean
  75. */
  76. public function setEncode($encode_from, $encode_to) {
  77. $this->_from_encoding = $encode_from;
  78. $this->_to_encoding = $encode_to;
  79. return true;
  80. }
  81. /**
  82. * Convert encoding, convert separately according to whether it is a directory setting
  83. * @return boolean
  84. */
  85. public function convert() {
  86. if($this->_directory ) {
  87. return $this->_convertDirectory();
  88. }
  89. return $this->_convertFile();
  90. }
  91. /**
  92. * Convert file
  93. * @throws ConvertException
  94. * @return boolean
  95. */
  96. private function _convertFile() {
  97. if( ! file_exists($this->_path) ) {
  98. $message = $this->_path . ' does not exist.';
  99. throw new ConvertException($message);
  100. }
  101. if( ! is_file($this->_path) ) {
  102. $message = $this->_path . ' is not a file.';
  103. throw new ConvertException($message);
  104. }
  105. if( ! $this->_isWR() ) {
  106. $message = $this->_path . ' must can be read and write.';
  107. throw new ConvertException($message);
  108. }
  109. $file_real_path = realpath($this->_path);
  110. $file_content_from = file_get_contents( $file_real_path );
  111. if( mb_check_encoding($file_content_from, $this->_from_encoding) ) {
  112. $file_content_to = mb_convert_encoding( $file_content_from, $this->_to_encoding, $this->_from_encoding );
  113. file_put_contents( $file_real_path, $file_content_to );
  114. }
  115. return true;
  116. }
  117. /**
  118. * Convert directory
  119. * @throws ConvertException
  120. * @return boolean
  121. */
  122. private function _convertDirectory() {
  123. if( ! file_exists($this->_path) ) {
  124. $message = $this->_path . ' does not exist.';
  125. throw new ConvertException($message);
  126. }
  127. if( ! is_dir($this->_path) ) {
  128. $message = $this->_path . ' is not a directory.';
  129. throw new ConvertException($message);
  130. }
  131. if( ! $this->_isWR() ) {
  132. $message = $this->_path . ' must can be read and write.';
  133. throw new ConvertException($message);
  134. }
  135. $this->_scanDirFiles();
  136. if( empty($this->_files) ) {
  137. $message = $this->_path . ' is a empty directory.';
  138. throw new ConvertException($message);
  139. }
  140. foreach( $this->_files as $value ) {
  141. $file_content_from = file_get_contents( $value );
  142. if( mb_check_encoding($file_content_from, $this->_from_encoding) ) {
  143. $file_content_to = mb_convert_encoding( $file_content_from, $this->_to_encoding, $this->_from_encoding );
  144. file_put_contents( $value, $file_content_to );
  145. }
  146. }
  147. return true;
  148. }
  149. /**
  150. * Determine whether the file or directory is readable and writable
  151. * @return boolean returns true if it can be read and written, otherwise returns false
  152. */
  153. private function _isWR() {
  154. if( is_readable($this->_path) && is_writable($this->_path) ) {
  155. return true;
  156. }
  157. return false;
  158. }
  159. /**
  160. * Traverse the directory and find all files, plus absolute paths
  161. * @return boolean
  162. */
  163. private function _scanDirFiles($dir = '') {
  164. $base_path = empty( $dir ) ? realpath($this->_path) . DIRECTORY_SEPARATOR : realpath($dir) . DIRECTORY_SEPARATOR;
  165. $files_tmp = empty( $dir ) ? scandir($this->_path) : scandir($dir);
  166. foreach( $files_tmp as $value ) {
  167. if( $value == '.' || $value == '..' || ( strpos($value, '.') === 0 ) ) {
  168. continue;
  169. }
  170. $value = $base_path . $value;
  171. if( is_dir($value) ) {
  172. if( $this->_recursion ) {
  173. $this->_scanDirFiles($value);
  174. }
  175. }
  176. elseif( is_file($value) ) {
  177. $this->_files[] = $value;
  178. }
  179. }
  180. return true;
  181. }
  182. }
  183. /**
  184. *Conversion exception
  185. *
  186. */
  187. class ConvertException extends Exception {
  188. }
复制代码


Statement:
The content of this article is voluntarily contributed by netizens, and the copyright belongs to the original author. This site does not assume corresponding legal responsibility. If you find any content suspected of plagiarism or infringement, please contact admin@php.cn