Given string S and a dictionary of words words, find the number of words[i] that is a subsequence of S.
Example :
Input:
S = "abcde"
words = ["a", "bb", "acd", "ace"]
Output: 3
Explanation: There are three words in words that are a subsequence of S: "a", "acd", "ace".
Note:
All words in words and S will only consists of lowercase letters.
The length of S will be in the range of [1, 50000].
The length of words will be in the range of [1, 5000].
The length of words[i] will be in the range of [1, 50].
// Optimized Brute Force with Hashset
int numMatchingSubseq(string S, vector<string>& words) { // time: O(words_len * S_len); space: O(words_len)
int res = 0, n = S.length();
unordered_set<string> valid, invalid;
for (string word : words) {
if (valid.count(word) || invalid.count(word)) {
if (valid.count(word)) ++res;
continue;
}
// Use two pointers to check
int i = 0, j = 0, m = word.length();
while (i < n && j < m) {
if (word[j] == S[i]) ++j;
++i;
}
if (j == m) {
++res;
valid.insert(word);
} else invalid.insert(word);
}
return res;
}
// Binary Search
int numMatchingSubseq(string S, vector<string>& words) { // time: O(words_len * average str_len * log(S_len)); space: O(S_len)
// Build a lookup index table
vector<vector<int> > idx(26);
for (int i = 0; i < S.length(); ++i) idx[S[i] - 'a'].push_back(i);
int res = 0;
for (const string& word : words) {
int prev = -1;
bool found = true;
for (char c : word) {
auto it = upper_bound(idx[c - 'a'].begin(), idx[c - 'a'].end(), prev);
if (it == idx[c - 'a'].end()) {
found = false;
break;
} else prev = *it;
}
if (found) ++res;
}
return res;
}
bool isSubsequence(const string& S, const string& word) {
int idx = -1;
for (char ch : word) {
idx = S.find(ch, idx + 1);
if (idx == -1) return false;
}
return true;
}
int numMatchingSubseq(string S, vector<string>& words) {
int res = 0;
for (const string& word : words) {
if (isSubsequence(S, word)) ++res;
}
return res;
}