Giải bài toán 2 chuỗi con bằng nhau trong chuỗi bằng Hashing value

Tuong_Le · October 6, 2020, 4:31am

Em lấy ví dụ:
trololo
4
0 0 7 (trololo == trololo)
2 4 3 (trololo == trololo)
3 5 1 (trololo == trololo)
1 3 2 (trololo != trololo)
Output:
Yes
Yes
Yes
No
Ý tưởng giải bài toán bằng việc tính hashing value của tất cả chuỗi con bắt đầu từ s[0].

Sau đó chỉ cần so sánh giá trị hashing của 2 chuỗi cần tìm. Bài toán này, xác suất để 2 chuỗi có hashing value bằng nhau mà 2 chuỗi khác nhau là 10^(-9) rất thấp.
Trong bài làm của em, mọi thứ đều chạy ổn, ngoại trừ việc, mỗi lần check 2 chuỗi con trong chuỗi thì nó phải tạo lại hashvalue table. Em muốn khắc phục chuyện đó nhưng ko biết làm thế nào, mong các anh đọc và gợi ý giúp em. Em xin cảm ơn nhiều.

// Assignment3Problem4.cpp : This file contains the 'main' function. Program execution begins and ends there.

#include "pch.h"
#include <iostream>
#include <string>
#include <vector>
#include <math.h>
using namespace std;

class Solver {
	string s;
public:
	vector<long long> memoization(string s, long long prime, int x)
	{
		vector<long long> result(s.length() + 1);
		long long hash = 0;
		result[0] = 0;
		for (int i = 1; i <= s.length(); i++)
		{
			result[i] = ((x*result[i - 1] + s[i-1]) % prime);
		}
		return result;
	}
	Solver(string s) : s(s) 
	{
		// initialization, precalculation
		

	}
	bool ask(int a, int b, int l) {

		long long prime1 = 1000000007, prime2 = 1000000009;
		int x = 1;
		vector<long long> hash1 = memoization(s, prime1, x);
		vector<long long> hash2 = memoization(s, prime2, x);
		long long h1 = (long long)(hash1[a + l] - pow(x, l)*hash1[a]) % prime1;
		long long h2 = (long long)(hash1[b + l] - pow(x, l)*hash1[b]) % prime1;
		long long h3 = (long long)(hash2[a + l] - pow(x, l)*hash2[a]) % prime2;
		long long h4 = (long long)(hash2[b + l] - pow(x, l)*hash2[b]) % prime2;
		
		return h1 == h2 && h3 == h4;
	}
};
int main() {
	ios_base::sync_with_stdio(0), cin.tie(0);

	string s;
	int q;
	
	cin >> s >> q;
	Solver solver(s);
	for (int i = 0; i < q; i++) {
		int a, b, l;
		cin >> a >> b >> l;
		cout << (solver.ask(a, b, l) ? "Yes\n" : "No\n");
	}
}

rogp10 · October 6, 2020, 4:21am

Đây gọi là memoization, nó khác với precomputation.

Đầu tiên phải dùng mảng tĩnh (hay heap memory và biến tĩnh) để lưu trữ.

Tuong_Le · October 6, 2020, 4:26am

Cảm ơn nhận xét của anh, em sẽ chú ý hơn.

Tuong_Le · October 6, 2020, 7:34am

Code trên sai ở chỗ, x quá nhỏ, dùng hàm pow cũng hoàn toàn có thể xảy ra tràn số. Ý tưởng là (a*b)mod m = (a mod m) *(b mod m). Song lời giải này thì lại bị TLE

// // Assignment3Problem4.cpp : This file contains the 'main' function. Program execution begins and ends there.

#include <iostream>
#include <string>
#include <vector>
#include <math.h>
using namespace std;

class Solver {
	string s;
public:
	vector<long long> precomputePower(long long x, int l, long long prime)
	{
		vector<long long> result(l + 1);
		result[0] = 1;
		for (int i = 1; i <= l; i++)
		{
			result[i] = (result[i-1] % prime)*((x%prime) + prime) % prime;
		}
		return result;
	}
	
	Solver(string s) : s(s) 
	{
		// initialization, precalculation
	}
	vector<long long> memoization(long long prime, long long x)
	{
		vector<long long> result(s.length() + 1);
		long long hash = 0;
		result[0] = 0;
		for (int i = 1; i <= s.length(); i++)
		{
			result[i] = ((x*result[i - 1] + s[i - 1]) % prime + prime) % prime;
		}
		return result;
	}
	bool ask(int a, int b, int l, vector<long long> hash1, vector<long long> hash2, long long prime1, long long prime2, long long x, long long power1, long long power2) {
		
		long long h1 = ((hash1[a + l] - (power1 * hash1[a])) % prime1 + prime1) % prime1;
		long long h2 = ((hash1[b + l] - (power1 * hash1[b])) % prime1 + prime1) % prime1;
		long long h3 = ((hash2[a + l] - (power2 * hash2[a])) % prime2 + prime2) % prime2;
		long long h4 = ((hash2[b + l] - (power2 * hash2[b])) % prime2 + prime2) % prime2;
		return h1 == h2 && h3 == h4;
	}
	
};
int main() {
	ios_base::sync_with_stdio(0), cin.tie(0);
	string s;
	int q;
	cin >> s >> q;
	long long prime1 = 1000000007, prime2 = 1000000009;
	long long x = 4;
	Solver solver(s);
	vector<long long> hash1 = solver.memoization(prime1,x);
	vector<long long> hash2 = solver.memoization(prime2, x);
	vector<long long> powFun1 = solver.precomputePower(x, s.length(), prime1);
	vector<long long> powFun2 = solver.precomputePower(x, s.length(), prime2);
	for (int i = 0; i < q; i++) {
		int a, b, l;
		cin >> a >> b >> l;
		long long power1 = powFun1[l];
		long long power2 = powFun2[l];
		cout << (solver.ask(a, b, l, hash1, hash2, prime1, prime2, x, power1, power2) ? "Yes\n" : "No\n");
	}
}

Tuong_Le · October 6, 2020, 9:44am

Không hiểu tại sao, nhưng tính toán mọi thứ trong class có thể tiết kiệm được rất nhiều thời gian. Và đã hoàn thành được test case.

class{
Solver(string s) : s(s) 
	{
		// initialization, precalculation
	}
	long long prime1 = 1000000007, prime2 = 1000000009;
	int x = 4;
	vector<long long> hash1 = memoization(prime1, x);
	vector<long long> hash2 = memoization(prime2, x);
	vector<long long> powFun1 = precomputePower(x, prime1);
	vector<long long> powFun2 = precomputePower(x, prime2);

}