public DocumentContent readPath(InputStream stream,Path path)
{
//Tika默认是10*1024*1024,这里防止文件过大导致Tika报错
BodyContentHandler handler = new BodyContentHandler(100*1024*1024);
Metadata metadata = new Metadata();
ParseContext pcontext = new ParseContext();
// 解析PDF文档时应由超类AbstractParser的派生类PDFParser实现
PDFParser pdfparser = new PDFParser();
try {
pdfparser.parse(stream, handler, metadata, pcontext);
} catch (Exception e) {
e.printStackTrace();
}
String[] metadataNames = metadata.names();
DocumentContent documentContent = new DocumentContent();
for (String name : metadataNames) {
if(name.trim().equals("Author")){
documentContent.setAuthor(metadata.get(name));
}
}
documentContent.setText(handler.toString());
documentContent.setFilePath(path.toString());
return documentContent;
}