aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSiyong2020-07-20 18:43:01 -0700
committerSiyong2020-07-20 18:43:01 -0700
commit1f7817ade8498b4ab38519ebb7b3e22877f50a5d (patch)
tree32f6038f2a73588d4cedcc26a71cbb8031d0fa6f
parent24573c1fb2a6cee38f5e0a9d8db26c6b6bb63c4c (diff)
String Suffix: Terminator char pro tip
-rw-r--r--content/6_Advanced/String_Suffix.mdx50
1 files changed, 35 insertions, 15 deletions
diff --git a/content/6_Advanced/String_Suffix.mdx b/content/6_Advanced/String_Suffix.mdx
index ebfe55e..1b05ae8 100644
--- a/content/6_Advanced/String_Suffix.mdx
+++ b/content/6_Advanced/String_Suffix.mdx
@@ -206,6 +206,8 @@ Naively, this would take up $O(N^2)$ memory, but *path compression* enables it t
A suffix array can be generated by the suffix tree by taking the dfs traversal of the suffix tree.
+<Spoiler title="Sample Code: Suffix Array from Suffix Tree">
+
<LanguageSection>
<CPPSection>
@@ -213,7 +215,7 @@ A suffix array can be generated by the suffix tree by taking the dfs traversal o
<!-- https://codeforces.com/edu/course/2/lesson/2/2/practice/contest/269103/submission/85759835 -->
```cpp
-int N, sa[MN];//length of string, suffix array
+int N, sa[MN], ctr;//length of string, suffix array, counter
struct Edge
{
@@ -239,6 +241,8 @@ void dfs(int n=0, int d=0)
</LanguageSection>
+</Spoiler>
+
### Generate Suffix Tree from Suffix Array
Of course, the above operation can be reversed as well.
@@ -246,6 +250,15 @@ Each element in the suffix array corresponds to a leaf in the suffix tree.
The LCP array stores information about the Lowest Common Ancestor of two adjacent elements in the suffix array.
Using these two pieces of information, we can construct the suffix tree from the suffix array in linear time.
+<Info title="Pro Tip!">
+
+Frequently, string suffix structures are greatly simplified by adding a 'terminator' character, such as `$` or `-`, to the end of the string.
+In the following samples, these terminators are either explicitly added or assumed to be present in the string already.
+
+</Info>
+
+<Spoiler title="Sample Code: Suffix Tree from Suffix Array">
+
<LanguageSection>
<CPPSection>
@@ -255,15 +268,11 @@ Using these two pieces of information, we can construct the suffix tree from the
int N;
char s[MN];
-int sa[MN], lcp[MN]; //suffix and lcp array
-struct Edge
-{
-public:
- int n, l, r; //node, edge covers s[l..r]
- Edge(int n=-1, int l=-1, int r=-1):n(n), l(l), r(r) {}
- explicit operator bool() const {return n!=-1;}
-} c[MN*2][MK]; // edges of suffix tree
-int d[MN*2];//length of string in suffix tree
+int sa[MN]; //suffix array
+int lcp[MN]; //lcp[i] stores the longest common prefix between s[sa[i-1]..] and s[sa[i]..]
+
+Edge c[MN*2][MK]; // edges of suffix tree
+int d[MN*2];//length of string corresponding to a node in the suffix tree
int q[MN*2], Q, ctr, rm[MN];//q is used as stack. ctr counts number of nodes in tree
std::stack<int> ins[MN];
void build_tree()
@@ -272,34 +281,39 @@ void build_tree()
for(int i=N-1;i>=1;--i)
{
while(Q&&lcp[q[Q]]>lcp[i])--Q;
- if(lcp[q[Q]]!=lcp[i]) ++rm[q[Q]];
+ if(lcp[q[Q]]!=lcp[i]) ++rm[q[Q]];//Right bound of the range where lcp is the longest common prefix
q[++Q]=i;
}
q[0]=0, Q=0;
for(int i=1;i<N;++i)
{
while(Q&&lcp[q[Q]]>lcp[i])--Q;
- if(lcp[q[Q]]!=lcp[i]) ins[q[Q]].push(i);
+ if(lcp[q[Q]]!=lcp[i]) ins[q[Q]].push(i);//Left bound of the range where lcp first becomes a longest common prefix
q[++Q]=i;
}
- q[0]=0, Q=0;
+ //The left and right bounds computed above can be interpreted as the dfs preorder and postorder
+ q[0]=0, Q=0;//This q array now holds the stack of ancestors for every new node created
auto nn=[&](int l, int dd)
{
++ctr;
d[ctr]=dd;
- int p=q[Q], r=l+dd;
+ int p=q[Q];
+ // p is the parent of this node, per definition of stack q
+ int r=l+dd;
+ //s[l..r] is the string corresponding to the node that we are inserting
l+=d[p];
+ //d[p] is the length of the parent, so s[l..l+d[p]] would have already been covered by the node's ancestors
c[p][s[l]]={ctr,l,r};
return ctr;
};
for(int i=0;i<N;++i)
{
Q-=rm[i];
- assert(Q>=0);
for(int x;!ins[i].empty();ins[i].pop())
{
x=ins[i].top();
x=nn(sa[x], lcp[x]);
+ // sa[x+1] would be equivalent, per definition of lcp
q[++Q]=x;
}
nn(sa[i], N-sa[i]);
@@ -311,11 +325,15 @@ void build_tree()
</LanguageSection>
+</Spoiler>
+
### Generate Suffix Tree from Suffix Automaton
One interesting thing about Suffix Trees and Suffix Automata is that the link tree of a Suffix Automaton is equivalent to the Suffix Tree of the reversed string.
Since Suffix Automata are much easier to create than Suffix Trees, we can use this as an alternate method to build a Suffix Tree, all in linear time too!
+<Spoiler title="Sample Code: Suffix Tree from Suffix Automaton">
+
<LanguageSection>
<CPPSection>
@@ -349,6 +367,8 @@ void build_tree()
</LanguageSection>
+</Spoiler>
+
## Palindromic Tree
(Still don't know what these are!! Benq help!)