1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142
| package com.huawei.classroom.student.h13;
import java.io.*; import java.util.*;
public class Analysis { private final String[] chapters; private final Set<Character> ignoreChar = new HashSet<>(Arrays.asList(' ', '\r', '\t'));
public Analysis(String filename) throws Exception { String text = readFromTxt(filename); text = text.replaceAll("[\\pP‘’“”]", " "); this.chapters = splitContentToChapter(text); }
private String readFromTxt(String filename) throws Exception { Reader reader = null; try { StringBuffer buf = new StringBuffer(); char[] chars = new char[1024];
reader = new InputStreamReader(new FileInputStream(filename), "UTF-8"); int readed = reader.read(chars); while (readed != -1) { buf.append(chars, 0, readed); readed = reader.read(chars); } return buf.toString(); } finally { close(reader); } }
public List<String> getTopNWords(int n){ int i, j; Map<String, Integer> map = new HashMap<>(); List<Map.Entry<String, Integer>> mapList; List<String> ans = new ArrayList<>(); for (i = 1; i < this.chapters.length; i++){ String content = this.chapters[i]; for (j = 0; j < content.length() - 1; j++) { String str = content.substring(j, j + 2); if (ignoreChar.contains(str.charAt(0)) || ignoreChar.contains(str.charAt(1))) { continue; } int count; count = map.getOrDefault(str, 0); map.put(str, count + 1); } }
mapList = new ArrayList<>(map.entrySet()); mapList.sort((o1, o2) -> o2.getValue().compareTo(o1.getValue()));
for (i = 0; i < n; i++) { ans.add(mapList.get(i).getKey()); }
return ans; }
private void close(Closeable inout) { if (inout != null) { try { inout.close(); } catch (IOException e) { e.printStackTrace(); } } }
private String[] splitContentToChapter(String content) { String contents[] = content.split(" 第[一,二,三,四,五,六,七,八,九,十,零]{1,5}回 "); return contents; }
public int[] getStringFrequent(String str) throws Exception { int[] counts = new int[120]; if (this.chapters.length > 121) { throw new Exception("拆分的章节数量不对"); } int i, j; int length = str.length(); for (i = 1; i < this.chapters.length; i++) { int count = 0; String content = this.chapters[i]; for (j = 0; j < content.length() + 1 - length; j++) { String contentStr = content.substring(j, j + length); if (str.equals(contentStr)) { count++; } counts[i - 1] = count; } } return counts; }
}
|