#include #include #include #include #include #include #include using namespace std; using namespace __gnu_cxx; typedef crope bases; bases dna, rna; struct pitem { pitem() {}; pitem(char c) : base(c), type('b') { }; char type, base; int n; bases dna; }; typedef vector pattern; struct titem { titem() {}; titem(char c) : base(c), type('b') { }; char type, base; int reference, plevel; }; typedef vector template_; void finish(); int nat(); bases consts(); char dna_nth(int n) { return (n >= dna.size()) ? 'E' : dna[n]; } void dna_print_firsts() { for (int i = 0; i < 100; i++) { if (dna_nth(i) != 'E') { cout << dna_nth(i); } } } void dna_drop(int n) { if (n > dna.size()) n = dna.size(); dna.erase(0, n); } pattern get_pattern() { int lvl = 0; pattern p; pitem pi; while (1) { char h = dna_nth(0); // cout << "h: " << h << endl; switch (h) { case 'C': dna.pop_front(); p.push_back(pitem('I')); break; case 'F': dna.pop_front(); p.push_back(pitem('C')); break; case 'P': dna.pop_front(); p.push_back(pitem('F')); break; case 'I': { char second = dna_nth(1); switch (second) { case 'C': dna_drop(2); p.push_back(pitem('P')); break; case 'P': dna_drop(2); { int n = nat(); pi.type = '!'; pi.n = n; p.push_back(pi); } break; case 'F': dna_drop(3); { bases s = consts(); pi.type = '?'; pi.dna = s; p.push_back(pi); } break; case 'I': { char third = dna_nth(2); switch (third) { case 'P': dna_drop(3); lvl += 1; pi.type = '('; p.push_back(pi); break; case 'C': case 'F': dna_drop(3); if (lvl == 0) { return p; } else { lvl -= 1; pi.type = ')'; p.push_back(pi); } break; case 'I': for (int rc = 3; rc < 10; rc++) { if (dna_nth(rc) != 'E') { rna.push_back(dna_nth(rc)); } } dna_drop(10); break; default: cout << "get_pattern, third: " << third << endl; finish(); } } break; default: cout << "get_pattern, second: " << second << endl; finish(); } } break; default: // cout << "get_pattern, h: " << h << endl; finish(); } } } void bases_print(bases bs); void print_pattern(pattern p) { for (vector::iterator it = p.begin(); it != p.end(); it++) { switch (it->type) { case 'b': cout << it->base; break; case '!': cout << "!" << it->n; break; case '?': cout << "?\""; bases_print(it->dna); cout << "\""; break; default: cout << it->type; } } } void print_template(template_ t) { for (vector::iterator it = t.begin(); it != t.end(); it++) { switch(it->type) { case 'R': cout << it->reference << "_" << it->plevel; break; case 'L': cout << "|" << it->reference << "|"; break; case 'b': cout << it->base; break; default: cout << it->type; } } } void bases_print_firsts(bases bs) { for (int i = 0; i < 20; i++) { if (i >= bs.size()) return; cout << bs[i]; } } void bases_print(bases bs) { for (int i = 0; i < bs.size(); i++) cout << bs[i]; } int nat() { // cout << "nat()" << endl; char first = dna_nth(0); switch (first) { case 'P': dna.pop_front(); return 0; case 'I': case 'F': dna.pop_front(); { int n = nat(); return 2*n; } break; case 'C': dna.pop_front(); { int n = nat(); return 2*n + 1; } break; default: cout << "nat: " << first << endl; finish(); } assert(0); } bases consts() { // cout << "consts()" << endl; char first = dna_nth(0); char second = dna_nth(1); // cout << "looking at DNA: head: " << first << " second: " << second << endl; bases s; switch (first) { case 'C': dna.pop_front(); s = consts(); s.push_front('I'); return(s); case 'F': dna.pop_front(); s = consts(); s.push_front('C'); return(s); case 'P': dna.pop_front(); s = consts(); s.push_front('F'); return(s); case 'I': if (dna_nth(1) == 'C') { dna_drop(2); s = consts(); s.push_front('P'); return(s); } break; } return s; } template_ get_template() { template_ t; titem ti; while (1) { char first = dna_nth(0); // cout << "first: " << first << endl; switch (first) { case 'C': dna.pop_front(); t.push_back(titem('I')); break; case 'F': dna.pop_front(); t.push_back(titem('C')); break; case 'P': dna.pop_front(); t.push_back(titem('F')); break; case 'I': { char second = dna_nth(1); switch (second) { case 'C': dna_drop(2); t.push_back(titem('P')); break; case 'F': case 'P': dna_drop(2); { int l = nat(); int n = nat(); ti.type = 'R'; ti.reference = n; ti.plevel = l; t.push_back(ti); } break; case 'I': { char third = dna_nth(2); switch (third) { case 'C': case 'F': dna_drop(3); return t; case 'P': dna_drop(3); { int n = nat(); ti.type = 'L'; ti.reference = n; t.push_back(ti); } break; case 'I': for (int rc = 3; rc < 10; rc++) { if (dna_nth(rc) != 'E') { rna.push_back(dna_nth(rc)); } } dna_drop(10); break; default: finish(); } } break; default: finish(); } } break; default: finish(); } } } bases quote(bases bs) { bases res; for (bases::iterator it = bs.mutable_begin(); it != bs.mutable_end(); it++) { switch (*it) { case 'I': res.push_back('C'); break; case 'C': res.push_back('F'); break; case 'F': res.push_back('P'); break; case 'P': res.push_back('I'); res.push_back('C'); break; default: return bases(); } } return res; } bases asnat(int n) { if (n == 0) { bases single; single.push_back('P'); return single; } else if (n % 2 == 0) { bases res = asnat(n/2); res.push_front('I'); return res; } else if (n % 2 == 1) { bases res = asnat(n/2); res.push_front('C'); return res; } else { cout << "asnat: " << n << endl; assert(0); } } typedef vector environment; void replace(template_ t, environment e) { bases r; for (vector::iterator it = t.begin(); it != t.end(); it++) { switch (it->type) { case 'b': r.push_back(it->base); break; case 'R': { bases pres; if (it->reference < e.size()) pres = e[it->reference]; for (int i = 0; i < it->plevel; i++) { pres = quote(pres); } r.append(pres); } break; case 'L': r.append(asnat(it->reference < e.size() ? e[it->reference].size() : 0)); break; default: cout << it->type << endl; assert(0); } } dna.insert(0, r); } environment e; int match_length; void matchreplace(pattern pat, template_ t) { e.clear(); match_length = 0; int i = 0; deque c; for (vector::iterator it = pat.begin(); it != pat.end(); it++) { switch (it->type) { case 'b': if (dna_nth(i) == it->base) { i++; match_length++; } else { match_length = -1; return; } break; case '!': i += it->n; cout << "skipping " << it->n << endl; match_length += it->n; if (i > dna.size()) { match_length = -1; return; } break; case '?': if (it->dna.empty()) goto match_found; { int len = it->dna.size(), trial_pos = i; while (1) { trial_pos = dna.find(it->dna[0], trial_pos); if (trial_pos >= dna.size()) { match_length = -1; return; } if (it->dna.compare(dna.substr(trial_pos, len)) == 0) { match_length += trial_pos - i + len; i = trial_pos + len; break; } trial_pos++; } } match_found: break; case '(': c.push_front(i); cout << "open" << endl; break; case ')': { // TODO: c empty (?) assert(c.size()); assert(i>=c[0]); if (i >= dna.size()) i = dna.size(); cout << "close from " << c[0] << " to " << i << endl; e.push_back(dna.substr(c[0], i-c[0])); c.pop_front(); } break; default: assert(0); } } // cout << "before dropping: "; dna_print_firsts(); cout << endl; dna_drop(i); // cout << "after dropping: "; dna_print_firsts(); cout << endl; replace(t, e); } void print_environment(environment e) { for (int i = 0; i < e.size(); i++) { cout << "e[" << i << "] = "; bases_print_firsts(e[i]); cout << endl; } } int num_iter = 0; void execute() { //for (int i = 0; i < 10; i++) { while (1) { if (num_iter % 100 == 0) { cout << endl << "iteration: " << num_iter << endl; } cout << endl << "iteration: " << num_iter << endl; num_iter++; // cout << "dna = "; // dna_print_firsts(); // cout << " (" << dna.size() << " bases)" << endl; pattern p = get_pattern(); template_ t = get_template(); cout << endl << "execute..."; cout << "pattern "; print_pattern(p); cout << endl; cout << "template "; print_template(t); cout << endl; cout << "remaining DNA "; dna_print_firsts(); cout << endl; matchreplace(p, t); cout << "after matchreplace "; dna_print_firsts(); cout << endl; // if (match_length >= 0) { // cout << "successful match of length " << match_length << endl; // } else { // cout << "failed match" << endl; // } // print_environment(e); // cout << "len(rna) = " << rna.size() / 7.0 << endl; } } string rnafile; void finish() { cout << "finished" << endl; ofstream of(rnafile.c_str()); for (bases::iterator it = rna.mutable_begin(); it != rna.mutable_end(); it++) { of << *it; } of.close(); exit(0); } void load(ifstream &from) { char c; while (!from.eof()) { from.get(c); if (from.eof()) break; dna.push_back(c); } from.close(); cout << "read " << dna.size() << " bytes" << endl; } int main(int argc, char **argv) { if (argc != 3) { cout << "execute " << endl; } else { rnafile = argv[2]; ifstream prefix(argv[1]); load(prefix); ifstream dna("endo.dna"); load(dna); execute(); } return 0; }