不可重叠最长重复子串
poj1743 Musical Theme
/*********************************\ * @prob: poj1743 Musical Theme * * @auth: Wang Junji * * @stat: Accepted. * * @date: June. 15th, 2012 * * @memo: 后缀数组 * \*********************************/ #include <cstdio> #include <cstdlib> #include <algorithm> #include <cstring> #include <string> const int maxN = 20010; typedef int arr[maxN]; arr wa, wb, ws, wv, sa, r, rank, height; int n; inline int& gmin(int& a, const int& b) {return a < b ? a : (a = b);} inline int& gmax(int& a, const int& b) {return a > b ? a : (a = b);} inline bool cmp(int* r, int a, int b, int len) {return r[a] == r[b] && r[a + len] == r[b + len];} inline void get_sa(int* r, int* sa, int n, int m) { int *x = wa, *y = wb; for (int i = 0; i < m; ++i) ws[i] = 0; for (int i = 0; i < n; ++i) ++ws[x[i] = r[i]]; for (int i = 1; i < m; ++i) ws[i] += ws[i - 1]; for (int i = n - 1; i > -1; --i) sa[--ws[x[i]]] = i; for (int j = 1, p = 1; p < n; j <<= 1, m = p) { p = 0; for (int i = n - j; i < n; ++i) y[p++] = i; for (int i = 0; i < n; ++i) if (sa[i] - j > -1) y[p++] = sa[i] - j; for (int i = 0; i < n; ++i) wv[i] = x[y[i]]; for (int i = 0; i < m; ++i) ws[i] = 0; for (int i = 0; i < n; ++i) ++ws[wv[i]]; for (int i = 1; i < m; ++i) ws[i] += ws[i - 1]; for (int i = n - 1; i > -1; --i) sa[--ws[wv[i]]] = y[i]; std::swap(x, y); x[sa[0]] = 0; p = 1; for (int i = 1; i < n; ++i) x[sa[i]] = cmp(y, sa[i - 1], sa[i], j) ? p - 1 : p++; } return; } inline void get_height(int* r, int* sa, int n) { int k = 0; for (int i = 1; i < n + 1; ++i) rank[sa[i]] = i; for (int i = 0; i < n; height[rank[i++]] = k) { int j = sa[rank[i] - 1]; if (k) --k; while (r[i + k] == r[j + k]) ++k; } return; } inline bool check(int k) { int min_sa = sa[1], max_sa = sa[1]; for (int i = 2; i < n + 1; ++i) { if (height[i] < k) { if (max_sa - min_sa >= k) return 1; else max_sa = min_sa = sa[i]; } else gmax(max_sa, sa[i]), gmin(min_sa, sa[i]); } return max_sa - min_sa >= k; } int main() { freopen("Musical_Theme.in", "r", stdin); freopen("Musical_Theme.out", "w", stdout); while (scanf("%d", &n) && n) { for (int i = 0; i < n; ++i) scanf("%d", r + i); for (int i = 0; i < n - 1; ++i) r[i] -= r[i + 1] - 100; r[--n] = 0; get_sa(r, sa, n + 1, 200); get_height(r, sa, n); int L = 4, R = n + 1, res = 0; while (L < R) { int Mid = (L + R) >> 1; check(Mid) ? (res = L = Mid + 1) : (R = Mid); } printf("%d\n", res); } return 0; } /* 不可重叠最长重复字串问题。 二分答案k,把排序的后缀分成height值不小于k的若干组,若存在一组中的最大sa值和最小sa值不小于k,那么此k成立,否则不成立。 注意最后height数组的取值是1~n而不是0~n-1。 */
可重叠的K次最长重复子串
poj3261 Milk Patterns
/**********************************\ * @prob: poj3261 Milk_Patterns * * @auth: Wang Junji * * @stat: Accepted. * * @date: June. 15th, 2012 * * @memo: 后缀数组 * \**********************************/ #include <cstdio> #include <cstdlib> #include <algorithm> #include <cstring> #include <string> const int maxN = 100010; typedef int arr[maxN]; arr wa, wb, wv, ws, r, rank, sa, height, tab; int n, K; inline bool cmp(int* r, int a, int b, int len) {return r[a] == r[b] && r[a + len] == r[b + len];} inline void get_sa(int* r, int* sa, int n, int m) { int *x = wa, *y = wb; for (int i = 0; i < m; ++i) ws[i] = 0; for (int i = 0; i < n; ++i) ++ws[x[i] = r[i]]; for (int i = 1; i < m; ++i) ws[i] += ws[i - 1]; for (int i = n - 1; i > -1; --i) sa[--ws[x[i]]] = i; for (int j = 1, p = 1; p < n; j <<= 1, m = p) { p = 0; for (int i = n - j; i < n; ++i) y[p++] = i; for (int i = 0; i < n; ++i) if (sa[i] - j > -1) y[p++] = sa[i] - j; for (int i = 0; i < n; ++i) wv[i] = x[y[i]]; for (int i = 0; i < m; ++i) ws[i] = 0; for (int i = 0; i < n; ++i) ++ws[wv[i]]; for (int i = 1; i < m; ++i) ws[i] += ws[i - 1]; for (int i = n - 1; i > -1; --i) sa[--ws[wv[i]]] = y[i]; std::swap(x, y); x[sa[0]] = 0; p = 1; for (int i = 1; i < n; ++i) x[sa[i]] = cmp(y, sa[i - 1], sa[i], j) ? p - 1 : p++; } return; } inline void get_height(int* r, int* sa, int n) { int k = 0; for (int i = 1; i < n + 1; ++i) rank[sa[i]] = i; for (int i = 0; i < n; height[rank[i++]] = k) { int j = sa[rank[i] - 1]; if (k) --k; while (r[i + k] == r[j + k]) ++k; } return; } inline bool check(int k) { int pst = 1; for (int i = 2; i < n + 1; ++i) if (height[i] < k) { if (i - pst >= K) return 1; else pst = i; } return n + 1 - pst >= K; } // int main() { freopen("Milk_Patterns.in", "r", stdin); freopen("Milk_Patterns.out", "w", stdout); scanf("%d%d", &n, &K); for (int i = 0; i < n; ++i) scanf("%d", r + i), tab[i] = r[i]; std::sort(tab, tab + n); int cnt = std::unique(tab, tab + n) - tab; for (int i = 0; i < n; ++i) r[i] = std::lower_bound(tab, tab + cnt, r[i]) - tab + 1; r[n] = 0; get_sa(r, sa, n + 1, cnt + 1); get_height(r, sa, n); int L = 1, R = n + 1, res = 0; while (L < R) { int Mid = (L + R) >> 1; check(Mid) ? (res = Mid, L = Mid + 1) : (R = Mid); } printf("%d\n", res); return 0; } /* 可重复的K次最长重复字串。 二分答案k,把排序后的后缀分成height值不小于k的若干组,若存在一组的元素个数不少于K,那么此k成立,否则不成立。 (注意k和K代表的含义不同。) */
不相同的子串的个数
spoj694 Distinct Substrings
spoj705 New Distinct Substrings
/*****************************\ * @prob: spoj694 & spoj705 * * @auth: Wang Junji * * @stat: Accepted. * * @date: June. 16th, 2012 * * @memo: 后缀数组 * \*****************************/ #include <cstdio> #include <cstdlib> #include <algorithm> #include <cstring> #include <string> const int maxN = 50010; typedef int arr[maxN]; char str[maxN]; arr wa, wb, ws, wv, r, rank, sa, height; int n, T; inline bool cmp(int* r, int a, int b, int len) {return r[a] == r[b] && r[a + len] == r[b + len];} /* cmp */ inline void calc_sa(int* r, int* sa, int n, int m) { int *x = wa, *y = wb; for (int i = 0; i < m; ++i) ws[i] = 0; for (int i = 0; i < n; ++i) ++ws[x[i] = r[i]]; for (int i = 1; i < m; ++i) ws[i] += ws[i - 1]; for (int i = n - 1; i > -1; --i) sa[--ws[x[i]]] = i; for (int j = 1, p = 1; p < n; j <<= 1, m = p) { p = 0; for (int i = n - j; i < n; ++i) y[p++] = i; for (int i = 0; i < n; ++i) if (sa[i] - j > -1) y[p++] = sa[i] - j; for (int i = 0; i < n; ++i) wv[i] = x[y[i]]; for (int i = 0; i < m; ++i) ws[i] = 0; for (int i = 0; i < n; ++i) ++ws[wv[i]]; for (int i = 1; i < m; ++i) ws[i] += ws[i - 1]; for (int i = n - 1; i > -1; --i) sa[--ws[wv[i]]] = y[i]; std::swap(x, y); x[sa[0]] = 0; p = 1; for (int i = 1; i < n; ++i) x[sa[i]] = cmp(y, sa[i - 1], sa[i], j) ? p - 1 : p++; } /* for */ return; } /* calc_sa */ inline void calc_height(int* r, int* sa, int n) { int k = 0; for (int i = 1; i < n + 1; ++i) rank[sa[i]] = i; for (int i = 0; i < n; height[rank[i++]] = k) { int j = sa[rank[i] - 1]; if (k) --k; while (r[i + k] == r[j + k]) ++k; } /* for */ return; } /* calc_height */ int main() { freopen("substr.in", "r", stdin); freopen("substr.out", "w", stdout); scanf("%d", &T); while (T--) { scanf("%s", str); n = strlen(str); for (int i = 0; i < n; ++i) r[i] = str[i]; r[n] = 0; calc_sa(r, sa, n + 1, 128); calc_height(r, sa, n); int ans = 0; for (int i = 1; i < n + 1; ++i) ans += n - sa[i] - height[i]; printf("%d\n", ans); } /* while */ return 0; } /* main */ /* 由于原串的子串一定是某个后缀的前缀,那么原问题等价于求出所有后缀中不相同的前缀个数。 原串的每个后缀i贡献出n - i个前缀,那么若按照字典序,则每个后缀sa[i]贡献出n - sa[i] - height[i]个与前面不同的前缀出来,所以只需要将这些值累加即可。 */
最长回文子串
ural1297 Palindrome
/******************************\ * @prob: NOI1297 Palindrome * * @auth: Wang Junji * * @stat: Accepted. * * @date: June. 16th, 2012 * * @memo: 后缀数组 * \******************************/ #include <cstdio> #include <cstdlib> #include <algorithm> #include <cstring> #include <string> const int maxN = 2010; typedef int arr[maxN]; arr wa, wb, ws, wv, r, rank, sa, height; int f[20][maxN], n, pos; char str[maxN]; inline bool cmp(int* r, int a, int b, int len) {return r[a] == r[b] && r[a + len] == r[b + len];} /* cmp */ inline void calc_sa(int* r, int* sa, int n, int m) { int *x = wa, *y = wb; for (int i = 0; i < m; ++i) ws[i] = 0; for (int i = 0; i < n; ++i) ++ws[x[i] = r[i]]; for (int i = 1; i < m; ++i) ws[i] += ws[i - 1]; for (int i = n - 1; i > -1; --i) sa[--ws[x[i]]] = i; for (int j = 1, p = 1; p < n; j <<= 1, m = p) { p = 0; for (int i = n - j; i < n; ++i) y[p++] = i; for (int i = 0; i < n; ++i) if (sa[i] - j > -1) y[p++] = sa[i] - j; for (int i = 0; i < n; ++i) wv[i] = x[y[i]]; for (int i = 0; i < m; ++i) ws[i] = 0; for (int i = 0; i < n; ++i) ++ws[wv[i]]; for (int i = 1; i < m; ++i) ws[i] += ws[i - 1]; for (int i = n - 1; i > -1; --i) sa[--ws[wv[i]]] = y[i]; std::swap(x, y); x[sa[0]] = 0; p = 1; for (int i = 1; i < n; ++i) x[sa[i]] = cmp(y, sa[i - 1], sa[i], j) ? p - 1 : p++; } /* for */ return; } /* calc_sa */ inline void calc_height(int* r, int* sa, int n) { int k = 0; for (int i = 1; i < n + 1; ++i) rank[sa[i]] = i; for (int i = 0; i < n; height[rank[i++]] = k) { int j = sa[rank[i] - 1]; if (k) --k; while (r[i + k] == r[j + k] && r[i + k]) ++k; } /* for */ return; } /* calc_height */ inline void rmq_init() { for (int i = 1; i < n + 1; ++i) f[0][i] = height[i]; for (int q = 0; 1 << q < n; ++q) for (int i = 1; i + (1 << q) < n + 2; ++i) f[q + 1][i] = std::min(f[q][i], f[q][i + (1 << q)]); } /* rmq_init */ inline int LCP(int a, int b) { a = rank[a], b = rank[b]; if (a > b) std::swap(a, b); ++a; int q = 0; while (1 << q < b - a + 2) ++q; --q; return std::min(f[q][a], f[q][b - (1 << q) + 1]); } /* LCP */ int main() { freopen("Palindrome.in", "r", stdin); freopen("Palindrome.out", "w", stdout); scanf("%s", str); pos = strlen(str); str[pos] = ' '; strncpy(str + pos + 1, str, pos); n = strlen(str); std::reverse(str + pos + 1, str + n); for (int i = 0; i < n; ++i) r[i] = str[i] - ' '; r[n] = 0; calc_sa(r, sa, n + 1, 128); calc_height(r, sa, n); rmq_init(); int ans = 0, res = 0; for (int i = 0; i < pos; ++i) { int ths = LCP(i, n - i - 1); ths <<= 1, --ths; if (ths > ans) ans = ths, res = i - (ths >> 1); ths = LCP(i, n - i); ths <<= 1; if (ths > ans) ans = ths, res = i - (ths >> 1); } /* for */ for (int i = res; i < res + ans; ++i) putchar(str[i]); printf("\n"); return 0; } /* main */ /* 最长回文串。 将原串和反转过后的串与连接起来,中间用一个未出现过的字符连接,于是原问题就变成了求这个新字符串的某两个后缀的最长公共前缀。 枚举中心位置,分奇偶讨论回文串的长度,取出最长的解即可。 */
连续重复子串
poj2406 Power Strings
/*********************************\ * @prob: poj2406 Power_Strings * * @auth: Wang Junji * * @stat: Accepted. * * @date: June. 15th, 2012 * * @memo: 暴力匹配 * \*********************************/ #include <cstdio> #include <cstring> const int maxN = 1000010; char str[maxN]; int n, ans; inline bool check(int len) { for (int i = 0; i + len < n; ++i) if (str[i] - str[i + len]) return 0; return 1; } int main() { freopen("Power_Strings.in", "r", stdin); freopen("Power_Strings.out", "w", stdout); while (scanf("%s", str) != EOF && strcmp(str, ".")) { n = strlen(str); for (int i = 1; i < n + 1; ++i) if (n % i == 0 && check(i)) {ans = n / i; break;} printf("%d\n", ans); } return 0; }
重复次数最多的连续重复子串
poj3693 Maximum repetition substring
/************************************************\ * @prob: poj3693 Maximum repetition substring * * @auth: Wang Junji * @stat: Accepted. * * @date: June. 15th, 2012 * @memo: 后缀数组 * \************************************************/ #include <cstdio> #include <cstdlib> #include <algorithm> #include <cstring> #include <string> const int maxN = 100010; typedef int arr[maxN]; arr wa, wb, wv, ws, r, rank, height, sa, tab; int f[20][maxN], n, top; char str[maxN]; inline bool cmp(int* r, int a, int b, int len) {return r[a] == r[b] && r[a + len] == r[b + len];} inline void get_sa(int* r, int* sa, int n, int m) { int *x = wa, *y = wb; for (int i = 0; i < m; ++i) ws[i] = 0; for (int i = 0; i < n; ++i) ++ws[x[i] = r[i]]; for (int i = 1; i < m; ++i) ws[i] += ws[i - 1]; for (int i = n - 1; i > -1; --i) sa[--ws[x[i]]] = i; for (int j = 1, p = 1; p < n; j <<= 1, m = p) { p = 0; for (int i = n - j; i < n; ++i) y[p++] = i; for (int i = 0; i < n; ++i) if (sa[i] - j > -1) y[p++] = sa[i] - j; for (int i = 0; i < n; ++i) wv[i] = x[y[i]]; for (int i = 0; i < m; ++i) ws[i] = 0; for (int i = 0; i < n; ++i) ++ws[wv[i]]; for (int i = 1; i < m; ++i) ws[i] += ws[i - 1]; for (int i = n - 1; i > -1; --i) sa[--ws[wv[i]]] = y[i]; std::swap(x, y); x[sa[0]] = 0; p = 1; for (int i = 1; i < n; ++i) x[sa[i]] = cmp(y, sa[i - 1], sa[i], j) ? p - 1 : p++; } return; } inline void get_height(int* r, int* sa, int n) { int k = 0; for (int i = 1; i < n + 1; ++i) rank[sa[i]] = i; for (int i = 0; i < n; height[rank[i++]] = k) { int j = sa[rank[i] - 1]; if (k) --k; while (r[i + k] == r[j + k]) ++k; } return; } inline void rmq_init() { for (int i = 1; i < n + 1; ++i) f[0][i] = height[i]; for (int q = 0; 1 << q < n; ++q) for (int i = 1; i + (1 << q) < n + 2; ++i) f[q + 1][i] = std::min(f[q][i], f[q][i + (1 << q)]); return; } inline int LCP(int a, int b) { a = rank[a], b = rank[b]; if (a > b) std::swap(a, b); ++a; int q = 0; for (; 1 << q < b - a + 2; ++q); --q; return std::min(f[q][a], f[q][b - (1 << q) + 1]); } int main() { freopen("substr.in", "r", stdin); freopen("substr.out", "w", stdout); int Case = 0; while (scanf("%s", str) != EOF && strcmp(str, "#")) { n = strlen(str); for (int i = 0; i < n; ++i) r[i] = str[i] - 'a' + 1; r[n] = 0; get_sa(r, sa, n + 1, 27); get_height(r, sa, n); rmq_init(); int _cnt = 1, _pos = 0, _len = n; for (int len = 1; len < n; ++len) for (int i = 0; i + len < n; i += len) { int K = LCP(i, i + len), cnt = K / len + 1, pos = i - len + K % len; if (pos > -1 && K % len && LCP(pos, pos + len) >= K) ++cnt; if (cnt > _cnt) _cnt = cnt, tab[(top = 0)++] = len; if (cnt == _cnt) tab[top++] = len; } bool flag = 0; for (int i = 1; i < n + 1 && !flag; ++i) { int ths = sa[i]; for (int j = 0; j < top; ++j) if (LCP(ths, ths + tab[j]) / tab[j] + 1 == _cnt) { _pos = ths, _len = tab[j]; flag = 1; break; } } printf("Case %d: ", ++Case); for (int i = _pos; i < _pos + _cnt * _len; ++i) putchar(str[i]); printf("\n"); } return 0; } /* 重复次数最多的连续重复子串。 枚举长度len(即重复字串的循环节),然后求出长度为len的子串最多能出现几次。 设长度为len的子串在原串中出现了cnt次,那么这个长度为len * cnt的子串中一定包含了str[0], str[len], str[len * 2], ...中的cnt个,所以只需要看str[i]和str[i + len]往前和往后各能匹配多远。记能够匹配的总长度为K,那么cnt = K / len + 1,若K不能被len整除,则还需要看str[i - len + K % len]和str[i + K % len]能匹配多远,若能够匹配的长度不小于k,那么令此时的cnt加1。 要保证字典序,需要将所有重复了cnt次的可能的循环节长度全部记录下来。然后按后缀数组的顺序从头开始枚举起始位置,并且对于每一个起始位置都枚举一遍所有可能的循环节长度,第一次找到的符合要求的解即为最终的解。 */
最长公共子串
poj2774 Long Long Message
/*************************************\ * @prob: poj2774 Long Long Message * * @auth: Wang Junji * * @stat: Accepted. * * @date: June. 15th, 2012 * * @memo: 后缀数组 * \*************************************/ #include <cstdio> #include <cstdlib> #include <algorithm> #include <cstring> #include <string> const int maxN = 200010; typedef int arr[maxN]; arr wa, wb, ws, wv, r, rank, sa, height; int n, pos; char str[maxN]; inline bool cmp(int* r, int a, int b, int len) {return r[a] == r[b] && r[a + len] == r[b + len];} inline void get_sa(int* r, int* sa, int n, int m) { int *x = wa, *y = wb; for (int i = 0; i < m; ++i) ws[i] = 0; for (int i = 0; i < n; ++i) ++ws[x[i] = r[i]]; for (int i = 1; i < m; ++i) ws[i] += ws[i - 1]; for (int i = n - 1; i > -1; --i) sa[--ws[x[i]]] = i; for (int j = 1, p = 1; p < n; j <<= 1, m = p) { p = 0; for (int i = n - j; i < n; ++i) y[p++] = i; for (int i = 0; i < n; ++i) if (sa[i] - j > -1) y[p++] = sa[i] - j; for (int i = 0; i < n; ++i) wv[i] = x[y[i]]; for (int i = 0; i < m; ++i) ws[i] = 0; for (int i = 0; i < n; ++i) ++ws[wv[i]]; for (int i = 1; i < m; ++i) ws[i] += ws[i - 1]; for (int i = n - 1; i > -1; --i) sa[--ws[wv[i]]] = y[i]; std::swap(x, y); x[sa[0]] = 0; p = 1; for (int i = 1; i < n; ++i) x[sa[i]] = cmp(y, sa[i - 1], sa[i], j) ? p - 1 : p++; } return; } inline void get_height(int* r, int* sa, int n) { int k = 0; for (int i = 1; i < n + 1; ++i) rank[sa[i]] = i; for (int i = 0; i < n; height[rank[i++]] = k) { int j = sa[rank[i] - 1]; if (k) --k; while (r[i + k] == r[j + k]) ++k; } return; } inline int& gmax(int& a, const int& b) {return a > b ? a : (a = b);} int main() { freopen("message.in", "r", stdin); freopen("message.out", "w", stdout); scanf("%s", str); pos = strlen(str); strcat(str, " "); scanf("%s", str + pos + 1); n = strlen(str); for (int i = 0; i < n; ++i) r[i] = str[i]; get_sa(r, sa, n + 1, 128); get_height(r, sa, n); int ans = 0; for (int i = 1; i < n + 1; ++i) if ((sa[i] < pos && sa[i - 1] > pos) || (sa[i] > pos && sa[i - 1] < pos)) gmax(ans, height[i]); printf("%d\n", ans); return 0; } /* 最长公共子串。 把两个字符串连接在一起,中间用一个比特殊字符隔开(比任何字符都小)。 根据height数组来找,若相邻的两个后缀分别属于两个字符串(即一个在特殊字符前一个在特殊字符后),那么取所有满足此条件的最大height值。 */
长度不小于K的公共子串的个数
poj3415 Common Substrings
/*************************************\ * @prob: poj3415 Common Substrings * * @auth: Wang Junji * * @stat: Time Limit Exceeded. * * @date: June. 16th, 2012 * * @memo: 后缀数组 * \*************************************/ #include <cstdio> #include <cstdlib> #include <algorithm> #include <cstring> #include <string> const int maxN = 200010; typedef int arr[maxN]; arr wa, wb, ws, wv, r, rank, sa, height, sta; char str[maxN]; int f[20][maxN], n, pos, K; inline bool cmp(int* r, int a, int b, int len) {return r[a] == r[b] && r[a + len] == r[b + len];} inline void get_sa(int* r, int* sa, int n, int m) { int *x = wa, *y = wb; for (int i = 0; i < m; ++i) ws[i] = 0; for (int i = 0; i < n; ++i) ++ws[x[i] = r[i]]; for (int i = 1; i < m; ++i) ws[i] += ws[i - 1]; for (int i = n - 1; i > -1; --i) sa[--ws[x[i]]] = i; for (int j = 1, p = 1; p < n; j <<= 1, m = p) { p = 0; for (int i = n - j; i < n; ++i) y[p++] = i; for (int i = 0; i < n; ++i) if (sa[i] - j > -1) y[p++] = sa[i] - j; for (int i = 0; i < n; ++i) wv[i] = x[y[i]]; for (int i = 0; i < m; ++i) ws[i] = 0; for (int i = 0; i < n; ++i) ++ws[wv[i]]; for (int i = 1; i < m; ++i) ws[i] += ws[i - 1]; for (int i = n - 1; i > -1; --i) sa[--ws[wv[i]]] = y[i]; std::swap(x, y); x[sa[0]] = 0; p = 1; for (int i = 1; i < n; ++i) x[sa[i]] = cmp(y, sa[i - 1], sa[i], j) ? p - 1 : p++; } return; } inline void get_height(int* r, int* sa, int n) { int k = 0; for (int i = 1; i < n + 1; ++i) rank[sa[i]] = i; for (int i = 0; i < n; height[rank[i++]] = k) { int j = sa[rank[i] - 1]; if (k) --k; while (r[i + k] == r[j + k]) ++k; } return; } inline void rmq_init() { for (int i = 1; i < n + 1; ++i) f[0][i] = height[i]; for (int q = 0; 1 << q < n; ++q) for (int i = 1; i + (1 << q) < n + 2; ++i) f[q + 1][i] = std::min(f[q][i], f[q][i + (1 << q)]); return; } inline int LCP(int a, int b) { a = rank[a], b = rank[b]; if (a > b) std::swap(a, b); ++a; int q = 0; while (1 << q < b - a + 2) ++q; --q; return std::min(f[q][a], f[q][b - (1 << q) + 1]); } int main() { freopen("common_substr.in", "r", stdin); freopen("common_substr.out", "w", stdout); while (scanf("%d", &K) != EOF && K) { scanf("%s", str); pos = strlen(str); strcat(str, " "); scanf("%s", str + pos + 1); n = strlen(str); for (int i = 0; i < n; ++i) r[i] = str[i]; r[n] = 0; get_sa(r, sa, n + 1, 128); get_height(r, sa, n); rmq_init(); int top = 0, ans = 0; for (int i = 1; i < n + 1; ++i) { if (height[i] < K) top = 0; if (sa[i] > pos) for (int j = 0; j < top; ++j) ans += LCP(sta[j], sa[i]) - K + 1; if (sa[i] < pos) sta[top++] = sa[i]; } top = 0; for (int i = 1; i < n + 1; ++i) { if (height[i] < K) top = 0; if (sa[i] < pos) for (int j = 0; j < top; ++j) ans += LCP(sta[j], sa[i]) - K + 1; if (sa[i] > pos) sta[top++] = sa[i]; } printf("%d\n", ans); } return 0; } /* 长度不小于K的公共子串的个数。 首先按照height值不小于K的原则分组,然后在每一组当中统计每组中后缀之间的最长公共前缀之和。扫描一遍,每遇到一个B就统计与之前的A的后缀能产生多少个长度不小于K的公共子串。 */
每个字符串至少出现两次且互不重叠的最长子串
spoj220 Relevant Phrases of Annihilation
/**********************************************************\ * @prob: spoj220 Relevant Phrases of Annihilation * * @auth: Wang Junji * @stat: Time Limit Exceeded. * * @date: June. 16th, 2012 * @memo: 后缀数组 * \**********************************************************/ #include <cstdio> #include <cstdlib> #include <algorithm> #include <cstring> #include <string> const int maxN = 100010, maxM = 20; typedef int arr[maxN]; arr wa, wb, ws, wv, r, rank, sa, height; int pos[maxM], len[maxM], n, N, T; char str[maxN]; inline bool cmp(int* r, int a, int b, int len) {return r[a] == r[b] && r[a + len] == r[b + len];} /* cmp */ inline void calc_sa(int* r, int* sa, int n, int m) { int *x = wa, *y = wb; for (int i = 0; i < m; ++i) ws[i] = 0; for (int i = 0; i < n; ++i) ++ws[x[i] = r[i]]; for (int i = 1; i < m; ++i) ws[i] += ws[i - 1]; for (int i = n - 1; i > -1; --i) sa[--ws[x[i]]] = i; for (int j = 1, p = 1; p < n; j <<= 1, m = p) { p = 0; for (int i = n - j; i < n; ++i) y[p++] = i; for (int i = 0; i < n; ++i) if (sa[i] - j > -1) y[p++] = sa[i] - j; for (int i = 0; i < n; ++i) wv[i] = x[y[i]]; for (int i = 0; i < m; ++i) ws[i] = 0; for (int i = 0; i < n; ++i) ++ws[wv[i]]; for (int i = 1; i < m; ++i) ws[i] += ws[i - 1]; for (int i = n - 1; i > -1; --i) sa[--ws[wv[i]]] = y[i]; std::swap(x, y); x[sa[0]] = 0; p = 1; for (int i = 1; i < n; ++i) x[sa[i]] = cmp(y, sa[i - 1], sa[i], j) ? p - 1 : p++; } /* for */ return; } /* calc_sa */ inline void calc_height(int* r, int* sa, int n) { int k = 0; for (int i = 1; i < n + 1; ++i) rank[sa[i]] = i; for (int i = 0; i < n; height[rank[i++]] = k) { int j = sa[rank[i] - 1]; if (k) --k; while (r[i + k] == r[j + k] && r[i + k]) ++k; } /* for */ return; } /* calc_height */ inline int plc(const int& x) {return std::upper_bound(pos, pos + N, x) - pos - 1;} /* plc */ inline int& gmax(int& a, const int& b) {return a > b ? a : (a = b);} /* gmax */ inline int& gmin(int& a, const int& b) {return a < b ? a : (a = b);} /* gmin */ inline bool check(int k) { static int min_sa[maxM], max_sa[maxN]; memset(min_sa, 0x3f, sizeof min_sa); memset(max_sa, 0xff, sizeof max_sa); int tmp = plc(sa[1]); min_sa[tmp] = max_sa[tmp] = sa[1]; for (int i = 2; i < n + 1; ++i) { if (height[i] < k) { bool flag = 1; for (int j = 0; j < N; ++j) if (max_sa[j] - min_sa[j] < k) { flag = 0; break; } /* if */ if (flag) return 1; memset(min_sa, 0x3f, sizeof min_sa); memset(max_sa, 0xff, sizeof max_sa); int tmp = plc(sa[i]); min_sa[tmp] = max_sa[tmp] = sa[i]; continue; } /* if */ int tmp = plc(sa[i]); gmin(min_sa[tmp], sa[i]), gmax(max_sa[tmp], sa[i]); } /* for */ bool flag = 1; for (int j = 0; j < N; ++j) if (max_sa[j] - min_sa[j] < k) { flag = 0; break; } /* if */ return flag; } /* check */ int main() { freopen("phrases.in", "r", stdin); freopen("phrases.out", "w", stdout); scanf("%d", &T); while (T--) { scanf("%d", &N); int pst = 0, max_len = 0; for (int i = 0; i < N; ++i) { pos[i] = pst; scanf("%s", str + pst); len[i] = strlen(str + pst); gmax(max_len, len[i]); str[pst + len[i]] = ' '; pst += len[i] + 1; } /* for */ n = strlen(str), str[n--] = 0; for (int i = 0; i < n; ++i) r[i] = str[i] - ' '; calc_sa(r, sa, n + 1, 128); calc_height(r, sa, n); int L = 0, R = max_len + 1, res = 0; while (L < R) { int Mid = (L + R) >> 1; check(Mid) ? (res = Mid, L = Mid + 1) : (R = Mid); } /* while */ printf("%d\n", res); } /* while */ return 0; } /* main */ /* 每个字符串至少出现两次且不重叠的最长子串。 二分答案K,按height值分组,然后判断每一组中的字符串是否在每一个串中都出现过,并且在每一个串中出现的最大位置和最小位置之差不小于K。 */
出现在大于一半的字符串中的最长子串
poj3294 Life Forms
/************************************\ * @prob: poj3294 Life_Forms * * @auth: Wang Junji * * @stat: Accepted. * * @date: June. 16th, 2012 * * @memo: 后缀数组 * \************************************/ #include <cstdio> #include <cstdlib> #include <algorithm> #include <cstring> #include <string> #include <bitset> using std::upper_bound; const int maxN = 120010, maxM = 110, maxL = 1010; typedef int arr[maxN]; arr wa, wb, ws, wv, r, rank, sa, height; int len[maxM], pos[maxM], n, N; char tmp_str[maxM][maxL], str[maxN]; std::bitset <maxM> marked; inline bool cmp(int* r, int a, int b, int len) {return r[a] == r[b] && r[a + len] == r[b + len];} inline void calc_sa(int* r, int* sa, int n, int m) { int *x = wa, *y = wb; for (int i = 0; i < m; ++i) ws[i] = 0; for (int i = 0; i < n; ++i) ++ws[x[i] = r[i]]; for (int i = 1; i < m; ++i) ws[i] += ws[i - 1]; for (int i = n - 1; i > -1; --i) sa[--ws[x[i]]] = i; for (int j = 1, p = 1; p < n; j <<= 1, m = p) { p = 0; for (int i = n - j; i < n; ++i) y[p++] = i; for (int i = 0; i < n; ++i) if (sa[i] - j > -1) y[p++] = sa[i] - j; for (int i = 0; i < n; ++i) wv[i] = x[y[i]]; for (int i = 0; i < m; ++i) ws[i] = 0; for (int i = 0; i < n; ++i) ++ws[wv[i]]; for (int i = 1; i < m; ++i) ws[i] += ws[i - 1]; for (int i = n - 1; i > -1; --i) sa[--ws[wv[i]]] = y[i]; std::swap(x, y); x[sa[0]] = 0; p = 1; for (int i = 1; i < n; ++i) x[sa[i]] = cmp(y, sa[i - 1], sa[i], j) ? p - 1 : p++; } return; } inline void calc_height(int* r, int* sa, int n) { int k = 0; for (int i = 1; i < n + 1; ++i) rank[sa[i]] = i; for (int i = 0; i < n; height[rank[i++]] = k) { int j = sa[rank[i] - 1]; if (k) --k; while (r[i + k] == r[j + k] && r[i + k]) ++k; } return; } inline int& gmax(int& a, const int& b) {return a > b ? a : (a = b);} inline bool check(int k) { marked.reset(); marked.set(upper_bound(pos, pos + N, sa[1]) - pos - 1); for (int i = 2; i < n + 1; ++i) { if (height[i] < k) { if (marked.count() > N >> 1) return 1; marked.reset(); } marked.set(upper_bound(pos, pos + N, sa[i]) - pos - 1); } return marked.count() > N >> 1; } int main() { freopen("Life_Forms.in", "r", stdin); freopen("Life_Forms.out", "w", stdout); while (scanf("%d", &N) != EOF && N) { if (N == 1) { scanf("%s", str); puts(str); printf("\n"); continue; } int max_len = 0; for (int i = 0; i < N; ++i) scanf("%s", tmp_str[i]), len[i] = strlen(tmp_str[i]), gmax(max_len, len[i]); pos[0] = 0; strcpy(str, tmp_str[0]); str[len[0]] = ' '; for (int i = 1; i < N; ++i) { pos[i] = pos[i - 1] + (len[i - 1] + 1); str[pos[i] - 1] = ' '; strcpy(str + pos[i], tmp_str[i]); } n = strlen(str); for (int i = 0; i < n; ++i) r[i] = str[i] - ' '; r[n] = 0; calc_sa(r, sa, n + 1, 128); calc_height(r, sa, n); int L = 0, R = max_len + 1, res = 0; while (L < R) { int Mid = (L + R) >> 1; check(Mid) ? (res = Mid, L = Mid + 1) : (R = Mid); } if (!res) printf("?\n\n"); else { marked.reset(); marked.set(upper_bound(pos, pos + N, sa[1]) - pos - 1); for (int i = 2; i < n + 1; ++i) { if (height[i] < res) { if (marked.count() > N >> 1) { for (int j = sa[i - 1]; j < sa[i - 1] + res; ++j) putchar(str[j]); printf("\n"); } marked.reset(); } marked.set(upper_bound(pos, pos + N, sa[i]) - pos - 1); } if (marked.count() > N >> 1) { for (int j = sa[n]; j < sa[n] + res; ++j) putchar(str[j]); printf("\n"); } printf("\n"); } } return 0; } /* 出现在大于一半的字符串中的最长子串。 先把所有字符串连接起来,中间用一个没有出现过的字符连接。二分答案的长度k,将后缀分成height值不小于k的若干组,然后看是否至少存在一组中的后缀在大于一半的字符串中出现过,若是,则此k成立,否则不成立。输出时按照字典序(sa的顺序)扫描一遍,将所有符合条件的部分都输出即可。 */