Home  >  Article  >  Backend Development  >  How to use Python to batch encode and convert text files to and from each other

How to use Python to batch encode and convert text files to and from each other

WBOY
WBOYforward
2023-04-18 12:43:041264browse

The default C language is GB2312 under Windows, but it will be garbled when put on Linux, because the default for Linux and MacOS is UTF-8, so I wrote a small Python script to convert the files in the specified path.

from sys import argv
import os
from chardet import detect
from codecs import lookup

CONFIG_FILE = '.any2any'
DEFAULT_CONFIG = '''
.c
.h
.cpp
.hpp
.hxx
.cc
.cxx
.C
.c++
.m
.cs
.rs
.java
.kt
.php
.pm
.pl
.py
.sh
.go
.xml
.htm
.html
.css
.js
.jsx
.vue
.txt
.csv
'''

if os.path.exists(CONFIG_FILE):
    with open(CONFIG_FILE, 'r') as config_file:
        file_extension = tuple(config_file.read().split())
else:
    file_extension = tuple(DEFAULT_CONFIG.split())
print(f"将转换 {' '.join(list(file_extension))}")


def bytes_encoding(b: bytes, length: int = 1024) -> str:
    '''
    返回探测到的编码格式
    '''
    return detect(b[:length])['encoding']


def any2any(b: bytes, encoding: str) -> bytes:
    '''
    任意编码字节转换为任意编码字节

    探测输入的字节编码格式,转换为指定编码,并返回对应字节
    '''
    file_encoding = bytes_encoding(b)
    if file_encoding == encoding:
        return b
    return lookup(encoding).encode(lookup(file_encoding).decode(b)[0])[0]


def allfileset(path: str = '.', filepathset: set = set()) -> set:
    '''
    递归路径下所有文件,返回绝对路径集合
    '''
    if os.path.isdir(path):
        for item in os.listdir(path):
            filepath = os.path.join(path, item)
            if os.path.isfile(filepath):
                filepathset.add(os.path.abspath(filepath))
            else:
                allfileset(filepath, filepathset)
    else:
        filepathset.add(os.path.abspath(path))
    return filepathset


def is_valid_inputs() -> bool:
    '''
    检查参数是否输入正确
    '''
    return len(argv) > 1 and all(map(os.path.exists, argv[1:]))


def is_valid_encoding(encoding: str) -> bool:
    '''
    检查是否存在指定编码
    '''
    try:
        lookup(encoding)
        return True
    except:
        return False


def choice_encoding() -> str:
    choice = input('''!!!在转换前注意备份文件!!!
要转换到什么编码?
1. GB18030(Windows下常用,C语言不会乱码)
2. UTF-8(非Windows下通用,例如Linux和macOS)
3. 其他
> ''')
    if choice == '1':
        return 'GB18030'
    elif choice == '2':
        return 'UTF-8'
    elif choice == '3':
        choice = input('输入你想转换到的编码:')
        while not is_valid_encoding(choice):
            choice = input('不存在该编码,重新输入:')
        return choice
    else:
        print('不做任何操作')
        exit()


def main():
    if is_valid_inputs():
        encoding = choice_encoding()
        filepathset = set()
        for path in argv[1:]:
            filepathset.union(filter(lambda s: s.endswith(file_extension), allfileset(path, filepathset)))
        if filepathset:
            for path in filepathset:
                with open(path, 'rb') as f:
                    filebytes = any2any(f.read(), encoding)
                with open(path, 'wb') as f:
                    f.write(filebytes)
                print(f'{path} 已转换到 {encoding}')
            print('转换已完成')
        else:
            print('没有任何可以转换的文件,请检查程序下是否有.any2any配置文件,用空格或换行间隔要转换的文件类型,例如 .c .cpp .cs')
    else:
        print("未收到任何要转换的文件或文件夹路径,或参数错误,请把要转换的文件或文件夹拖动到程序上。")

if __name__ == "__main__":
    try:
        main()
    finally:
        input('按任意键退出...')

You can use pyinstaller to package it into an executable file, take it with you or share it with others. Use

Install pyinstaller

conda install pyinstaller

to package the Python file, where -i The parameter can add an icon to the .exe file, and the -F parameter specifies the script to be packaged

pyinstaller -i icon.ico -F any2any.py

The final packaged executable file is in the dict path.

The above is the detailed content of How to use Python to batch encode and convert text files to and from each other. For more information, please follow other related articles on the PHP Chinese website!

Statement:
This article is reproduced at:yisu.com. If there is any infringement, please contact admin@php.cn delete