From 1f7817ade8498b4ab38519ebb7b3e22877f50a5d Mon Sep 17 00:00:00 2001 From: Siyong Date: Mon, 20 Jul 2020 18:43:01 -0700 Subject: String Suffix: Terminator char pro tip --- content/6_Advanced/String_Suffix.mdx | 50 +++++++++++++++++++++++++----------- 1 file changed, 35 insertions(+), 15 deletions(-) diff --git a/content/6_Advanced/String_Suffix.mdx b/content/6_Advanced/String_Suffix.mdx index ebfe55e..1b05ae8 100644 --- a/content/6_Advanced/String_Suffix.mdx +++ b/content/6_Advanced/String_Suffix.mdx @@ -206,6 +206,8 @@ Naively, this would take up $O(N^2)$ memory, but *path compression* enables it t A suffix array can be generated by the suffix tree by taking the dfs traversal of the suffix tree. + + @@ -213,7 +215,7 @@ A suffix array can be generated by the suffix tree by taking the dfs traversal o ```cpp -int N, sa[MN];//length of string, suffix array +int N, sa[MN], ctr;//length of string, suffix array, counter struct Edge { @@ -239,6 +241,8 @@ void dfs(int n=0, int d=0) + + ### Generate Suffix Tree from Suffix Array Of course, the above operation can be reversed as well. @@ -246,6 +250,15 @@ Each element in the suffix array corresponds to a leaf in the suffix tree. The LCP array stores information about the Lowest Common Ancestor of two adjacent elements in the suffix array. Using these two pieces of information, we can construct the suffix tree from the suffix array in linear time. + + +Frequently, string suffix structures are greatly simplified by adding a 'terminator' character, such as `$` or `-`, to the end of the string. +In the following samples, these terminators are either explicitly added or assumed to be present in the string already. + + + + + @@ -255,15 +268,11 @@ Using these two pieces of information, we can construct the suffix tree from the int N; char s[MN]; -int sa[MN], lcp[MN]; //suffix and lcp array -struct Edge -{ -public: - int n, l, r; //node, edge covers s[l..r] - Edge(int n=-1, int l=-1, int r=-1):n(n), l(l), r(r) {} - explicit operator bool() const {return n!=-1;} -} c[MN*2][MK]; // edges of suffix tree -int d[MN*2];//length of string in suffix tree +int sa[MN]; //suffix array +int lcp[MN]; //lcp[i] stores the longest common prefix between s[sa[i-1]..] and s[sa[i]..] + +Edge c[MN*2][MK]; // edges of suffix tree +int d[MN*2];//length of string corresponding to a node in the suffix tree int q[MN*2], Q, ctr, rm[MN];//q is used as stack. ctr counts number of nodes in tree std::stack ins[MN]; void build_tree() @@ -272,34 +281,39 @@ void build_tree() for(int i=N-1;i>=1;--i) { while(Q&&lcp[q[Q]]>lcp[i])--Q; - if(lcp[q[Q]]!=lcp[i]) ++rm[q[Q]]; + if(lcp[q[Q]]!=lcp[i]) ++rm[q[Q]];//Right bound of the range where lcp is the longest common prefix q[++Q]=i; } q[0]=0, Q=0; for(int i=1;ilcp[i])--Q; - if(lcp[q[Q]]!=lcp[i]) ins[q[Q]].push(i); + if(lcp[q[Q]]!=lcp[i]) ins[q[Q]].push(i);//Left bound of the range where lcp first becomes a longest common prefix q[++Q]=i; } - q[0]=0, Q=0; + //The left and right bounds computed above can be interpreted as the dfs preorder and postorder + q[0]=0, Q=0;//This q array now holds the stack of ancestors for every new node created auto nn=[&](int l, int dd) { ++ctr; d[ctr]=dd; - int p=q[Q], r=l+dd; + int p=q[Q]; + // p is the parent of this node, per definition of stack q + int r=l+dd; + //s[l..r] is the string corresponding to the node that we are inserting l+=d[p]; + //d[p] is the length of the parent, so s[l..l+d[p]] would have already been covered by the node's ancestors c[p][s[l]]={ctr,l,r}; return ctr; }; for(int i=0;i=0); for(int x;!ins[i].empty();ins[i].pop()) { x=ins[i].top(); x=nn(sa[x], lcp[x]); + // sa[x+1] would be equivalent, per definition of lcp q[++Q]=x; } nn(sa[i], N-sa[i]); @@ -311,11 +325,15 @@ void build_tree() + + ### Generate Suffix Tree from Suffix Automaton One interesting thing about Suffix Trees and Suffix Automata is that the link tree of a Suffix Automaton is equivalent to the Suffix Tree of the reversed string. Since Suffix Automata are much easier to create than Suffix Trees, we can use this as an alternate method to build a Suffix Tree, all in linear time too! + + @@ -349,6 +367,8 @@ void build_tree() + + ## Palindromic Tree (Still don't know what these are!! Benq help!) -- cgit v1.2.3-70-g09d2