Sometimes the encoding method of the html file is different from the encoding method specified in the meta information. You can use this code to fix it. This program relies on jsoup and commons-io packages
- import java.io.File;
- import java.io.FileWriter;
- import java.io.IOException;
- import java.io.Writer;
- import java.util.Iterator;
-
- import org.apache. commons.io.FileUtils;
- import org.jsoup.Jsoup;
- import org.jsoup.nodes.Document;
- import org.jsoup.nodes.Element;
- import org.jsoup.select.Elements;
-
- public class main {
-
- /**
- * @param args
- * @throws IOException
- */
- public static void main(String[] args) throws IOException {
- // TODO Auto-generated method stub
-
- File input = new File("C:\Users\jack\Desktop \New Folder\jdk-zh");
- Iterator it = FileUtils.iterateFiles(input, null, true);
- while (it.hasNext()) {
- File file = it.next();
- Document doc = Jsoup.parse(file, "gb2312");
- Elements content = doc.getElementsByAttributeValueStarting("content", "text/html;");
- for (Element meta : content) {
- meta.attr("content ", "text/html; charset=utf-8");
- System.out
- .println("Modify content--------" + file.getName() + "---");
- }
- FileUtils.writeStringToFile(file, doc.html(),"utf-8");
- }
- }
- }
-
Copy code
|