-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathLab2NGram.java
More file actions
114 lines (95 loc) · 2.27 KB
/
Lab2NGram.java
File metadata and controls
114 lines (95 loc) · 2.27 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
import java.util.*;
class NGramOP
{
public String strCleanup(String p)
{
p = p.replaceAll("[^A-Za-z]", " ");
//anything NOT a character replace with space.
p = p.trim();// so that there is NO leading or lagging space.
return p;
}
public void NGramBreakDown(String param, int ngram)
{
System.out.println(param);
List<String> nGramStr = new ArrayList<>();
List<String> inputStr = new ArrayList<>();//arraylist without any spaces.
String[] strList = param.split(" ");
for(String s: strList)
{
if(s.trim().length() > 0)//leave out the spaces.
{
inputStr.add(s);
System.out.println("strList: "+s);
}
}
for(String s: inputStr)
{
System.out.println("inputStr: "+s);
if(s.length() >= ngram)
{
int begin = 0;
int end = begin + ngram;
//System.out.println("start index: "+begin+" end index: "+end);
while(true)
{
String tmp = s.substring(begin, end);
//System.out.println("ngram string: "+tmp);
nGramStr.add(tmp);
tmp = "";
begin++;
end = begin + ngram;
//System.out.println("INSIDE start index: "+begin+" end index: "+end);
if(end > s.length()) break;
}
System.out.println("=======================================================");
}
}
Collections.sort(nGramStr);
//determine the max frequency
int maxFreq = 0;
String maxStr = "";
int freq = 0;
for(String s: nGramStr)
{
freq = Collections.frequency(nGramStr, s);
if(freq > maxFreq)
{
maxFreq = freq;
maxStr = s;
}
}
switch(ngram)
{
case 1: System.out.println("Unigram "+maxStr);
break;
case 2: System.out.println("Bigram "+maxStr);
break;
case 3: System.out.println("Trigram "+maxStr);
break;
}
}
}
public class LabStr
{
public static void main(String[] args)
{
Scanner sc = new Scanner(System.in);
String para = "";
int i = 0;
int lines = Integer.parseInt(sc.nextLine());
while(i<lines)
{
String tmp = sc.nextLine();
tmp = tmp + " ";
para = para + tmp;
tmp = "";
i++;
}
int grams = Integer.parseInt(sc.nextLine());
NGramOP NGramObj = new NGramOP();
para = NGramObj.strCleanup(para);
//System.out.println(para);
NGramObj.NGramBreakDown(para, grams);
sc.close(); //last line of main.
}
}