187. Repeated DNA Sequences

All DNA is composed of a series of nucleotides abbreviated as A, C, G, and T, for example: "ACGAATTCCG". When studying DNA, it is sometimes useful to identify repeated sequences within the DNA.

Write a function to find all the 10-letter-long sequences (substrings) that occur more than once in a DNA molecule.

Example:

Input: s = "AAAAACCCCCAAAAACCCCCCAAAAAGGGTTT"

Output: ["AAAAACCCCC", "CCCCCAAAAA"]
// Hashmap
vector<string> findRepeatedDnaSequences(string s) { // time: O(n); space: O(n)
    unordered_map<string, int> mp;
    vector<string> res;
    string tmp;
    for (int i = 0; i + 9 < s.length(); ++i) {
        tmp = s.substr(i, 10);
        if (mp[tmp]++ == 1) res.emplace_back(tmp);
    }
    return res;
}
// Hashset
vector<string> findRepeatedDnaSequences(string s) { // time: O(n); space: O(n)
    unordered_set<string> seen, repeated;
    vector<string> res;
    string tmp;
    for (int i = 0; i + 9 < s.length(); ++i) {
        tmp = s.substr(i, 10);
        if (seen.count(tmp)) repeated.insert(tmp);
        else seen.insert(tmp);
    }
    for (const string str : repeated) res.emplace_back(str);
    return res;
}

Last updated

Was this helpful?