Home  >  Article  >  Backend Development  >  C# solution for reading garbled Chinese files

C# solution for reading garbled Chinese files

大家讲道理
大家讲道理Original
2016-11-10 16:30:151481browse

FileStream aFile = new FileStream(SingleFile, FileMode.Open);
StreamReader sr = new StreamReader(aFile, Encoding.GetEncoding("gb2312"), true);
string FileContent = sr.ReadToEnd();
aFile.Close();
ProcessData Pd = new ProcessData();
Pd.ProceData(FileContent);

StreamReader uses 3 parameters. The last one automatically detects utf-8. Most Chinese are gb2312. If it is not utf-8, use gb2312


. The system comes with utf detection, see below

private void DetectEncoding()
{
    if (this.byteLen >= 2)
    {
        this._detectEncoding = false;
        bool flag = false;
        if ((this.byteBuffer[0] == 0xfe) && (this.byteBuffer[1] == 0xff))
        {
            this.encoding = new UnicodeEncoding(true, true);
            this.CompressBuffer(2);
            flag = true;
        }
        else if ((this.byteBuffer[0] == 0xff) && (this.byteBuffer[1] == 0xfe))
        {
            if (((this.byteLen < 4) || (this.byteBuffer[2] != 0)) || (this.byteBuffer[3] != 0))
            {
                this.encoding = new UnicodeEncoding(false, true);
                this.CompressBuffer(2);
                flag = true;
            }
            else
            {
                this.encoding = new UTF32Encoding(false, true);
                this.CompressBuffer(4);
                flag = true;
            }
        }
        else if (((this.byteLen >= 3) && (this.byteBuffer[0] == 0xef)) && ((this.byteBuffer[1] == 0xbb) && (this.byteBuffer[2] == 0xbf)))
        {
            this.encoding = Encoding.UTF8;
            this.CompressBuffer(3);
            flag = true;
        }
        else if ((((this.byteLen >= 4) && (this.byteBuffer[0] == 0)) && ((this.byteBuffer[1] == 0) && (this.byteBuffer[2] == 0xfe))) && (this.byteBuffer[3] == 0xff))
        {
            this.encoding = new UTF32Encoding(true, true);
            this.CompressBuffer(4);
            flag = true;
        }
        else if (this.byteLen == 2)
        {
            this._detectEncoding = true;
        }
        if (flag)
        {
            this.decoder = this.encoding.GetDecoder();
            this._maxCharsPerBuffer = this.encoding.GetMaxCharCount(this.byteBuffer.Length);
            this.charBuffer = new char[this._maxCharsPerBuffer];
        }
    }
}


Statement:
The content of this article is voluntarily contributed by netizens, and the copyright belongs to the original author. This site does not assume corresponding legal responsibility. If you find any content suspected of plagiarism or infringement, please contact admin@php.cn