Hier ist eine Möglichkeit, den Protokollfaktor zu ermitteln. Sei dp[i][j]
wahr, wenn wir den Teilstring erreichen können s[i..j]
. Dann:
dp[0][length(s)-1] ->
true
dp[0][j] ->
if s[0] != s[j+1]:
false
else:
true if any dp[0][k]
for j < k ≤ (j + longestMatchRight[0][j+1])
(The longest match we can use is
also bound by the current range.)
(Initialise left side similarly.)
Jetzt von außen iterieren in:
for i = 1 to length(s)-2:
for j = length(s)-2 to i:
dp[i][j] ->
// We removed on the right
if s[i] != s[j+1]:
false
else:
true if any dp[i][k]
for j < k ≤ (j + longestMatchRight[i][j+1])
// We removed on the left
if s[i-1] != s[j]:
true if dp[i][j]
else:
true if any dp[k][j]
for (i - longestMatchLeft[i-1][j]) ≤ k < i
Wir können die längste Übereinstimmung für jedes Ausgangspaar precompute (i, j)
in O(n^2)
der Wiederholung,
longest(i, j) ->
if s[i] == s[j]:
return 1 + longest(i + 1, j + 1)
else:
return 0
Dies würde es uns ermöglichen, nach einer Teilstring-Übereinstimmung zu suchen, die bei Indizes i
und j
in beginnt O(1)
. (Wir brauchen sowohl die rechte als auch die linke Richtung.)
So erhalten Sie den Protokollfaktor
Wir können uns einen Weg ausdenken, um eine Datenstruktur zu entwickeln, mit der wir feststellen können, ob
any dp[i][k]
for j < k ≤ (j + longestMatchRight[i][j+1])
(And similarly for the left side.)
in O(log n)
Anbetracht dessen, dass wir diese Werte bereits gesehen haben.
Hier ist C ++ - Code mit Segmentbäumen (für rechte und linke Abfragen O(n^2 * log n)
), der den Testgenerator von Bananon enthält. Für 5000 "a" -Zeichen lief es in 3,54s, 420 MB ( https://ideone.com/EIrhnR ). Um den Speicher zu reduzieren, wird einer der Segmentbäume in einer einzelnen Zeile implementiert (ich muss noch untersuchen, ob ich dasselbe mit den Abfragen auf der linken Seite mache, um den Speicher noch weiter zu reduzieren.)
#include <iostream>
#include <string>
#include <ctime>
#include <random>
#include <algorithm> // std::min
using namespace std;
const int MAX_N = 5000;
int seg[2 * MAX_N];
int segsL[MAX_N][2 * MAX_N];
int m[MAX_N][MAX_N][2];
int dp[MAX_N][MAX_N];
int best;
// Adapted from https://codeforces.com/blog/entry/18051
void update(int n, int p, int value) { // set value at position p
for (seg[p += n] = value; p > 1; p >>= 1)
seg[p >> 1] = seg[p] + seg[p ^ 1];
}
// Adapted from https://codeforces.com/blog/entry/18051
int query(int n, int l, int r) { // sum on interval [l, r)
int res = 0;
for (l += n, r += n; l < r; l >>= 1, r >>= 1) {
if (l & 1) res += seg[l++];
if (r & 1) res += seg[--r];
}
return res;
}
// Adapted from https://codeforces.com/blog/entry/18051
void updateL(int n, int i, int p, int value) { // set value at position p
for (segsL[i][p += n] = value; p > 1; p >>= 1)
segsL[i][p >> 1] = segsL[i][p] + segsL[i][p ^ 1];
}
// Adapted from https://codeforces.com/blog/entry/18051
int queryL(int n, int i, int l, int r) { // sum on interval [l, r)
int res = 0;
for (l += n, r += n; l < r; l >>= 1, r >>= 1) {
if (l & 1) res += segsL[i][l++];
if (r & 1) res += segsL[i][--r];
}
return res;
}
// Code by גלעד ברקן
void precalc(int n, string & s) {
int i, j;
for (i = 0; i < n; i++) {
for (j = 0; j < n; j++) {
// [longest match left, longest match right]
m[i][j][0] = (s[i] == s[j]) & 1;
m[i][j][1] = (s[i] == s[j]) & 1;
}
}
for (i = n - 2; i >= 0; i--)
for (j = n - 2; j >= 0; j--)
m[i][j][1] = s[i] == s[j] ? 1 + m[i + 1][j + 1][1] : 0;
for (i = 1; i < n; i++)
for (j = 1; j < n; j++)
m[i][j][0] = s[i] == s[j] ? 1 + m[i - 1][j - 1][0] : 0;
}
// Code by גלעד ברקן
void f(int n, string & s) {
int i, j, k, longest;
dp[0][n - 1] = 1;
update(n, n - 1, 1);
updateL(n, n - 1, 0, 1);
// Right side initialisation
for (j = n - 2; j >= 0; j--) {
if (s[0] == s[j + 1]) {
longest = std::min(j + 1, m[0][j + 1][1]);
for (k = j + 1; k <= j + longest; k++)
dp[0][j] |= dp[0][k];
if (dp[0][j]) {
update(n, j, 1);
updateL(n, j, 0, 1);
best = std::min(best, j + 1);
}
}
}
// Left side initialisation
for (i = 1; i < n; i++) {
if (s[i - 1] == s[n - 1]) {
// We are bound by the current range
longest = std::min(n - i, m[i - 1][n - 1][0]);
for (k = i - 1; k >= i - longest; k--)
dp[i][n - 1] |= dp[k][n - 1];
if (dp[i][n - 1]) {
updateL(n, n - 1, i, 1);
best = std::min(best, n - i);
}
}
}
for (i = 1; i <= n - 2; i++) {
for (int ii = 0; ii < MAX_N; ii++) {
seg[ii * 2] = 0;
seg[ii * 2 + 1] = 0;
}
update(n, n - 1, dp[i][n - 1]);
for (j = n - 2; j >= i; j--) {
// We removed on the right
if (s[i] == s[j + 1]) {
// We are bound by half the current range
longest = std::min(j - i + 1, m[i][j + 1][1]);
//for (k=j+1; k<=j+longest; k++)
//dp[i][j] |= dp[i][k];
if (query(n, j + 1, j + longest + 1)) {
dp[i][j] = 1;
update(n, j, 1);
updateL(n, j, i, 1);
}
}
// We removed on the left
if (s[i - 1] == s[j]) {
// We are bound by half the current range
longest = std::min(j - i + 1, m[i - 1][j][0]);
//for (k=i-1; k>=i-longest; k--)
//dp[i][j] |= dp[k][j];
if (queryL(n, j, i - longest, i)) {
dp[i][j] = 1;
updateL(n, j, i, 1);
update(n, j, 1);
}
}
if (dp[i][j])
best = std::min(best, j - i + 1);
}
}
}
int so(string s) {
for (int i = 0; i < MAX_N; i++) {
seg[i * 2] = 0;
seg[i * 2 + 1] = 0;
for (int j = 0; j < MAX_N; j++) {
segsL[i][j * 2] = 0;
segsL[i][j * 2 + 1] = 0;
m[i][j][0] = 0;
m[i][j][1] = 0;
dp[i][j] = 0;
}
}
int n = s.length();
best = n;
precalc(n, s);
f(n, s);
return best;
}
// End code by גלעד ברקן
// Code by Bananon =======================================================================
int result;
int lps[MAX_N][MAX_N];
bool checked[MAX_N][MAX_N];
void check(int start, int length) {
checked[start][length] = true;
if (length < result) {
result = length;
}
for (int i = lps[start][length]; i != 0; i = lps[start][i - 1]) {
int newLength = length - i;
if (!checked[start][newLength])
check(start, newLength);
int newStart = start + i;
if (!checked[newStart][newLength])
check(newStart, newLength);
}
}
int my(string str) {
int n = str.length();
for (int l = 0; l < n; l++) {
int subLength = n - l;
lps[l][0] = 0;
checked[l][0] = false;
for (int i = 1; i < subLength; ++i) {
int j = lps[l][i - 1];
while (j > 0 && str[i + l] != str[j + l])
j = lps[l][j - 1];
if (str[i + l] == str[j + l]) j++;
lps[l][i] = j;
checked[l][i] = false;
}
}
result = n - 1;
check(0, n - 1);
return result + 1;
}
// generate =================================================================
bool rndBool() {
return rand() % 2 == 0;
}
int rnd(int bound) {
return rand() % bound;
}
void untrim(string & str) {
int length = rnd(str.length());
int prefixLength = rnd(str.length()) + 1;
if (rndBool())
str.append(str.substr(0, prefixLength));
else {
string newStr = str.substr(str.length() - prefixLength, prefixLength);
newStr.append(str);
str = newStr;
}
}
void rndTest(int minTestLength, string s) {
while (s.length() < minTestLength)
untrim(s);
int myAns = my(s);
int soAns = so(s);
cout << myAns << " " << soAns << '\n';
if (soAns != myAns) {
cout << s;
exit(0);
}
}
int main() {
int minTestLength;
cin >> minTestLength;
string seed;
cin >> seed;
while (true)
rndTest(minTestLength, seed);
}
Und hier ist JavaScript-Code (ohne Verbesserung des Protokollfaktors), um zu zeigen, dass die Wiederholung funktioniert. (Um den Protokollfaktor zu erhalten, ersetzen wir die inneren k
Schleifen durch eine einzelne Bereichsabfrage.)
debug = 1
function precalc(s){
let m = new Array(s.length)
for (let i=0; i<s.length; i++){
m[i] = new Array(s.length)
for (let j=0; j<s.length; j++){
// [longest match left, longest match right]
m[i][j] = [(s[i] == s[j]) & 1, (s[i] == s[j]) & 1]
}
}
for (let i=s.length-2; i>=0; i--)
for (let j=s.length-2; j>=0; j--)
m[i][j][1] = s[i] == s[j] ? 1 + m[i+1][j+1][1] : 0
for (let i=1; i<s.length; i++)
for (let j=1; j<s.length; j++)
m[i][j][0] = s[i] == s[j] ? 1 + m[i-1][j-1][0] : 0
return m
}
function f(s){
m = precalc(s)
let n = s.length
let min = s.length
let dp = new Array(s.length)
for (let i=0; i<s.length; i++)
dp[i] = new Array(s.length).fill(0)
dp[0][s.length-1] = 1
// Right side initialisation
for (let j=s.length-2; j>=0; j--){
if (s[0] == s[j+1]){
let longest = Math.min(j + 1, m[0][j+1][1])
for (let k=j+1; k<=j+longest; k++)
dp[0][j] |= dp[0][k]
if (dp[0][j])
min = Math.min(min, j + 1)
}
}
// Left side initialisation
for (let i=1; i<s.length; i++){
if (s[i-1] == s[s.length-1]){
let longest = Math.min(s.length - i, m[i-1][s.length-1][0])
for (let k=i-1; k>=i-longest; k--)
dp[i][s.length-1] |= dp[k][s.length-1]
if (dp[i][s.length-1])
min = Math.min(min, s.length - i)
}
}
for (let i=1; i<=s.length-2; i++){
for (let j=s.length-2; j>=i; j--){
// We removed on the right
if (s[i] == s[j+1]){
// We are bound by half the current range
let longest = Math.min(j - i + 1, m[i][j+1][1])
for (let k=j+1; k<=j+longest; k++)
dp[i][j] |= dp[i][k]
}
// We removed on the left
if (s[i-1] == s[j]){
// We are bound by half the current range
let longest = Math.min(j - i + 1, m[i-1][j][0])
for (let k=i-1; k>=i-longest; k--)
dp[i][j] |= dp[k][j]
}
if (dp[i][j])
min = Math.min(min, j - i + 1)
}
}
if (debug){
let str = ""
for (let row of dp)
str += row + "\n"
console.log(str)
}
return min
}
function main(s){
var strs = [
"caaca",
"bbabbbba",
"baabbabaa",
"bbabbba",
"bbbabbbbba",
"abbabaabbab",
"abbabaabbaba",
"aabaabaaabaab",
"bbabbabbb"
]
for (let s of strs){
let t = new Date
console.log(s)
console.log(f(s))
//console.log((new Date - t)/1000)
console.log("")
}
}
main()