本篇文章主要透過實例程式碼介紹了JAVA讀取PDF、WORD文檔,需要的朋友可以參考下
讀取PDF文件jar引用
<dependency> <groupid>org.apache.pdfbox</groupid> pdfbox</artifactid> <version>1.8.13</version> </dependency>
讀取WORD檔案jar引用
<dependency> <groupid>org.apache.poi</groupid> poi-scratchpad</artifactid> <version>3.16-beta1</version> </dependency> <dependency> <groupid>org.apache.poi</groupid> poi</artifactid> <version>3.16-beta1</version> </dependency>
讀取WORD檔案方法
/** * * @Title: getTextFromWord * @Description: 读取word * @param filePath * 文件路径 * @return: String 读出的Word的内容 */ public static String getTextFromWord(String filePath) { String result = null; File file = new File(filePath); FileInputStream fis = null; try { fis = new FileInputStream(file); @SuppressWarnings("resource") WordExtractor wordExtractor = new WordExtractor(fis); result = wordExtractor.getText(); } catch (FileNotFoundException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } finally { if (fis != null) { try { fis.close(); } catch (IOException e) { e.printStackTrace(); } } } return result; }
讀取PDF檔案方法
/** * * @Title: getTextFromPdf * @Description: 读取pdf文件内容 * @param filePath * @return: 读出的pdf的内容 */ public static String getTextFromPdf(String filePath) { String result = null; FileInputStream is = null; PDDocument document = null; try { is = new FileInputStream(filePath); PDFParser parser = new PDFParser(is); parser.parse(); document = parser.getPDDocument(); PDFTextStripper stripper = new PDFTextStripper(); result = stripper.getText(document); } catch (FileNotFoundException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } finally { if (is != null) { try { is.close(); } catch (IOException e) { e.printStackTrace(); } } if (document != null) { try { document.close(); } catch (IOException e) { e.printStackTrace(); } } } return result; }
以上是詳解JAVA讀取PDF、WORD文件的方法的詳細內容。更多資訊請關注PHP中文網其他相關文章!