word纯文本导入,我用的是poi
//其中的实体,可以不用看,只看两种word(doc,docx)数据封装就可以
public static List<Question> parseWord(String in) throws Exception{ List<String> list=new ArrayList<>(); if(iswordtype(in)==1){ InputStream is = new FileInputStream(in); WordExtractor extractor = new WordExtractor(is); String paraTexts2[]=extractor.getCommentsText(); String paraTexts3[]=extractor.getMainTextboxText(); String paraTexts[] = extractor.getParagraphText(); for (int i=0; i<paraTexts.length; i ) { if(StringUtils.isNotBlank(StringUtils.trim(paraTexts[i]))){ list.add(StringUtils.trim(paraTexts[i])); } } //获取文件属性 //printInfo(extractor.getSummaryInformation()); close(is); }else if(iswordtype(in)==2){ OPCPackage oPCPackage = POIXMLDocument.openPackage(in); XWPFDocument doc = new XWPFDocument(oPCPackage); POIXMLTextExtractor ex = new XWPFWordExtractor(doc); POIXMLProperties.CoreProperties coreProps = ex.getCoreProperties(); //printCoreProperties(coreProps); List<XWPFParagraph> paras = doc.getParagraphs(); for (XWPFParagraph para : paras) { //当前段落的属性 //CTPPr pr = para.getCTP().getPPr(); //System.out.println(para.getText()); if(StringUtils.isNotBlank(StringUtils.trim(para.getText()))){ list.add(StringUtils.trim(para.getText())); } } }else{ System.out.println("格式不对"); return null; } List<Question> newlist=null; Question question = null; Question.QuestionOption questionOptionDto=null; List<Question.QuestionOption> qolist=null; StringBuilder questionOption=null; if(list!=null && list.size()>0){ newlist=new ArrayList<>(); question=new Question(); questionOption=new StringBuilder(); qolist=new ArrayList<>(); questionOptionDto=new Question.QuestionOption(); int type=0; for (int i = 0; i < list.size(); i ) { if(list.get(i).contains("、判断")||list.get(i).contains("、是非")||list.get(i).contains("、对错")){ type=3; question.setQuestiontype(type); question.setStem(list.get(i)); System.out.println("n" list.get(i)); continue; }else if(list.get(i).contains("、单选")||list.get(i).contains("、单项")){ type=1; question.setQuestiontype(type); question.setStem(list.get(i)); System.out.println("n" list.get(i)); continue; }else if(list.get(i).contains("、多选")){ type=2; question.setQuestiontype(type); question.setStem(list.get(i)); System.out.println("n" list.get(i)); continue; }else if(list.get(i).startsWith("、案例") || list.get(i).startsWith("、病案")){ type=5; question.setQuestiontype(type); question.setStem(list.get(i)); System.out.println("n" list.get(i)); continue; }else if(list.get(i).contains("、主观")){ type=4; question.setQuestiontype(type); question.setStem(list.get(i)); System.out.println("n" list.get(i)); continue; }else{ if(isTitele(StringUtils.trim(list.get(i)))){ question.setStem(list.get(i)); //questionOption.append("[{"optionCont":"); System.out.println("类型: " type); System.out.println(list.get(i)); }else{ if(type!=0){ System.out.println(list.get(i)); //String[] option=list.get(i).split("^([a-zA-Z] [-\:].*)"); if(StringUtils.trim(list.get(i)).startsWith("答案:")){ questionOptionDto.setOptionCont(list.get(i)); qolist.add(questionOptionDto); question.setQuestionOptionList(qolist); qolist=new ArrayList<>(); questionOptionDto=new Question.QuestionOption(); newlist.add(question); question=new Question(); //questionOption.append("}]"); //questionOption = new StringBuilder(String.format(questionOption.toString(), list.get(i).split("答案:")[1])); //question.setQuestionOptions(questionOption.toString()); //newlist.add(question); //question=new Question(); //questionOption = new StringBuilder(); }else{ // questionOption.append(list.get(i) "n"); questionOptionDto.setOptionCont(list.get(i)); qolist.add(questionOptionDto); questionOptionDto=new Question.QuestionOption(); } }else{ System.out.println(list.get(i)); } } } } } return newlist;
}
/** * 判断文档类型 * @param path */ public static int iswordtype(String path){ int result; if(path.endsWith(".doc")){ result=1; }else if(path.endsWith(".docx")){ result=2; }else{ result=3; } return result; } /** * 关闭输入流 * @param is */ public static void close(InputStream is) { if (is != null) { try { is.close(); } catch (IOException e) { e.printStackTrace(); } } } /** * 输出CoreProperties信息 * @param coreProps */ private static void printCoreProperties(POIXMLProperties.CoreProperties coreProps) { System.out.println("开始信息----------------"); System.out.println(coreProps.getCategory()); //分类 System.out.println(coreProps.getCreator()); //创建者 System.out.println(coreProps.getCreated()); //创建时间 System.out.println(coreProps.getTitle()); //标题 System.out.println("结束信息----------------"); } /** * 输出SummaryInfomation * @param info */ private static void printInfo(SummaryInformation info) { //作者 System.out.println(info.getAuthor()); //字符统计 System.out.println(info.getCharCount()); //页数 System.out.println(info.getPageCount()); //标题 System.out.println(info.getTitle()); //主题 System.out.println(info.getSubject()); } /** * 输出DocumentSummaryInfomation * @param info */ private static void printInfo(DocumentSummaryInformation info) { //分类 System.out.println(info.getCategory()); //公司 System.out.println(info.getCompany()); } //判断Str是否存在小标题号 public static boolean isTitele(String str){ Pattern pattern = Pattern.compile("^([\d] [-\、|\.].*)"); return pattern.matcher(str).matches(); } //判断Str是否是 数字 public static boolean isNumeric(String str){ Pattern pattern = Pattern.compile("[0-9]*"); return pattern.matcher(str).matches(); } //判断Str是否是选择题选择项 public static boolean isSelecteTitele(String str){ Pattern pattern = Pattern.compile("^([a-zA-Z] [-\:].*)"); return pattern.matcher(str).matches(); } //判断Str是否是大标题 public static boolean isBigTilete(String str){ boolean iso= false ; if(str.contains("一、")){ iso=true; }else if(str.contains("二、")){ iso=true; }else if(str.contains("三、")){ iso=true; }else if(str.contains("四、")){ iso=true; }else if(str.contains("五、")){ iso=true; }else if(str.contains("六、")){ iso=true; }else if(str.contains("七、")){ iso=true; }else if(str.contains("八、")){ iso=true; } return iso; }