import java.io.*; import java.nio.file.*; import java.nio.charset.*; import java.util.*; class Suffixbaum { String text; int n,k,l; Node root; void printAll(Node node) { if (node.longAndFrequent) { System.out.println(text.substring(node.i, node.i + node.stringDepth)); } for (char key : node.children.keySet()) { Node child = node.children.get(key); printAll(child); } } void unmark(Node node) { Node slink = root; while (slink.stringDepth < node.stringDepth-1) { slink = slink.children.get(text.charAt(node.i+slink.stringDepth+1)); } if (slink.numberOfOccurrences == node.numberOfOccurrences) slink.longAndFrequent = false; for (char key : node.children.keySet()) { Node child = node.children.get(key); unmark(child); } } void annotateLongAndFrequent(Node node) { if (node.numberOfOccurrences >= k && node.stringDepth >= l) { node.longAndFrequent = true; } for (char key : node.children.keySet()) { Node child = node.children.get(key); annotateLongAndFrequent(child); } } void annotateStringDepth(Node node, int depth) { node.stringDepth = depth; for (char key : node.children.keySet()) { Node child = node.children.get(key); annotateStringDepth(child, depth + child.endIndex - child.startIndex + 1); } } void annotateNumberOfOccurrences(Node node) { if (node.children.isEmpty()) { // I am a leaf node.numberOfOccurrences = 1; } else { node.numberOfOccurrences = 0; for (char key : node.children.keySet()) { Node child = node.children.get(key); annotateNumberOfOccurrences(child); node.numberOfOccurrences += child.numberOfOccurrences; } } } void construct() { root = new Node(-1,-1,-1); // insert suffixes from left to right for (int i = 0; i < n; ++i) { // insert suffix starting at i into the tree Node current = root; int pos = i; while (pos < n) { Node next = current.children.get(text.charAt(pos)); if (next != null) { // current node contains child with next character int posOnEdge = next.startIndex; while (text.charAt(pos) == text.charAt(posOnEdge) && posOnEdge <= next.endIndex) { ++pos; ++posOnEdge; } if (posOnEdge > next.endIndex) { // entire edge is read => go to next node current = next; } else { // I have to split the edge (next,current) Node newNode = new Node(next.startIndex, posOnEdge-1, i); Node newLeaf = new Node(pos, n-1, i); // re-link current.children.put(text.charAt(newNode.startIndex), newNode); next.startIndex = posOnEdge; // shorten the edge newNode.children.put(text.charAt(posOnEdge), next); newNode.children.put(text.charAt(pos), newLeaf); pos = n; } } else { // create new leaf and insert into the list of current's // children Node newLeaf = new Node(pos, n-1, i); current.children.put(text.charAt(pos), newLeaf); pos = n; } } } } Suffixbaum(String t, int k, int l) { text = t; n = text.length(); this.l = l; this.k = k; construct(); } } class Node { int startIndex; int endIndex; HashMap children; // satellite info: int stringDepth; int numberOfOccurrences; boolean longAndFrequent; int i; // number of the suffix that inserted this node Node(int s, int e, int i) { startIndex = s; endIndex = e; children = new HashMap(); this.i = i; // non-initialized values: stringDepth = -1; numberOfOccurrences = -1; longAndFrequent = false; } } public class StringAnalyzer { static String content; static Suffixbaum S; static String readFile(String path, Charset encoding) throws IOException { byte[] encoded = Files.readAllBytes(Paths.get(path)); return new String(encoded, encoding); } public static void main(String args[]) { try { content = readFile(args[0], StandardCharsets.US_ASCII); } catch (IOException e) {} S = new Suffixbaum(content + "\0", Integer.parseInt(args[1]), Integer.parseInt(args[2])); S.annotateStringDepth(S.root, 0); S.annotateNumberOfOccurrences(S.root); S.annotateLongAndFrequent(S.root); S.unmark(S.root); S.printAll(S.root); } }