Cassiopee  1.0
Suffix indexer and search tool
Cassiopee.h
1 #include <iostream>
2 #include <fstream>
3 #include <list>
4 
5 #include "tree/tree.hh"
6 
7 #include "CassiopeeConfig.h"
8 
9 #include <boost/archive/text_oarchive.hpp>
10 #include <boost/archive/text_iarchive.hpp>
11 #include <boost/archive/binary_oarchive.hpp>
12 #include <boost/archive/binary_iarchive.hpp>
13 // Provide an implementation of serialize for std::list
14 #include <boost/serialization/list.hpp>
15 
16 using namespace std;
17 
18 
19 
20 
24 class Match {
25 public:
26 
30  bool operator==(const Match& p) const {
31  return pos == p.pos && in+del == p.in + p.del;
32  }
33 
37  int in;
41  int del;
45  int subst;
46 
47 
48  long pos;
49 
50  Match();
51 
52 
53 };
54 
64 class TreeNode {
65 public:
69  char c;
70 
71 
75  list<long> positions;
76 
82  long next_pos;
83 
88 
89 
95  TreeNode(char nc);
96 
103  TreeNode(char nc, long pos);
104  TreeNode();
105 
106 
107 private:
108  friend class boost::serialization::access;
109  template<class Archive>
110  void serialize(Archive & ar, const unsigned int /*version*/)
111  {
112  ar & c;
113  ar & next_pos;
114  ar & next_length;
115  ar & positions;
116  }
117 
118 };
119 
120 
121 
122 #ifndef __CASSIOPEE_H_
123 #define __CASSIOPEE_H_
124 //inline std::ostream& operator<<(std::ostream &strm, const TreeNode &a) {
125 // return strm << "TreeNode(" << a.c << ")";
126 //}
127 #endif
128 
129 
136 public:
137 
138 
144  CassieIndexer(const char* path);
145 
146  ~CassieIndexer();
147 
151  void save();
152 
156  void load();
157 
158 
162  void graph();
163 
169  void graph(int depth);
170 
177  string getSuffix(long pos);
178 
182  void index();
183 
187  tree<TreeNode>* getTree();
188 
192  list<Match> matches;
193 
198 
202  char getCharAtSuffix(long pos);
203 
209  void filltree(long pos);
210 
214  long max_depth;
215 
221 
222  long seq_length;
223 
227  bool index_loaded_from_file();
228 
229 private:
230  list<TreeNode> serialized_nodes;
231 
232  bool loaded_from_file;
233 
234  const char* filename;
235  ifstream seqstream;
236  tree<TreeNode> tr;
237 
238  const long MAX_SUFFIX;
239  long suffix_position;
240  char* suffix;
241 
245  long graphNode(tree<TreeNode>::iterator node, long counter, ofstream& myfile, int maxdepth);
246 
250  char* loadSuffix(long pos);
251 
252 
256  void reset_suffix();
257 
258 
259 
267  void fillTreeWithSuffix(tree<TreeNode>::iterator sib, long suffix_pos, long pos);
268  void fillTreeWithSuffix(long suffix_pos, long pos);
269 
270 
271 
272 };
273 
274 
279 public:
283  static void transform_fasta(const string in, const string out);
284 };
285 
289 class Ambiguous {
290 public:
298  static bool isequal(char a, char b);
299 private:
308  static bool ismatchequal(char a, const char b[], int len);
309  static const char K_MATCH[];
310  static const char M_MATCH[];
311  static const char R_MATCH[];
312  static const char Y_MATCH[];
313  static const char S_MATCH[];
314  static const char W_MATCH[];
315  static const char B_MATCH[];
316  static const char V_MATCH[];
317  static const char H_MATCH[];
318  static const char D_MATCH[];
319  static const char N_MATCH[];
320 };
321 
322 
324 {
325  inline bool operator() (const Match* struct1, const Match* struct2)
326  {
327  return (struct1->pos < struct2->pos);
328  }
329 };
330 
336 
337 public:
343  CassieSearch(CassieIndexer* index_ref);
344 
345  ~CassieSearch();
346 
350  void removeDuplicates();
351 
356 
360  list<Match*> matches;
361 
366  bool ambiguity;
367 
371  int nmax;
372 
380  int mode;
381 
388  void search(string suffix, bool clear);
389 
390 
397  void search(string suffix);
398 
404  void search(string suffixes[]);
405 
409  bool isequal(char a,char b);
410 
411 
420 
421  long pattern_length;
422 
426  void sort();
427 
428 private:
429 
433  static bool same_match (Match* first, Match* second)
434  { return ( *first == *second ); }
435 
436  CassieIndexer* indexer;
437 
446  void getMatchesFromNode(tree<TreeNode>::iterator sib, const int nbSubst, const int nbIn, const int nbDel);
447 
448 
449 
461  void searchAtNode(string suffix, const long suffix_pos, const tree<TreeNode>::iterator root, int nbSubst, int nbIn, int nbDel, int nbN);
462 
474  void searchAtNode(string suffix, const long suffix_pos, const tree<TreeNode>::iterator root, const tree<TreeNode>::iterator start_node, int nbSubst, int nbIn, int nbDel, int nbN);
475 
480  bool searchAtreduction(const string suffix, const tree<TreeNode>::iterator sib, long counter, long tree_reducted_pos, int nbSubst, int nbIn, int nbDel, int nbN);
481 
482 
483 };
484 
485 
486 
Definition: Cassiopee.h:323
long max_index_depth
Definition: Cassiopee.h:220
bool do_reduction
Definition: Cassiopee.h:197
char c
Definition: Cassiopee.h:69
int max_indel
Definition: Cassiopee.h:415
Definition: Cassiopee.h:24
int subst
Definition: Cassiopee.h:45
Match * match_limits
Definition: Cassiopee.h:355
int in
Definition: Cassiopee.h:37
list< Match * > matches
Definition: Cassiopee.h:360
int max_subst
Definition: Cassiopee.h:419
list< Match > matches
Definition: Cassiopee.h:192
bool ambiguity
Definition: Cassiopee.h:366
int nmax
Definition: Cassiopee.h:371
int del
Definition: Cassiopee.h:41
bool operator==(const Match &p) const
Definition: Cassiopee.h:30
Definition: Cassiopee.h:335
Definition: Cassiopee.h:135
long next_length
Definition: Cassiopee.h:87
long max_depth
Definition: Cassiopee.h:214
long next_pos
Definition: Cassiopee.h:82
int mode
Definition: Cassiopee.h:380
Definition: Cassiopee.h:278
Definition: Cassiopee.h:289
list< long > positions
Definition: Cassiopee.h:75
Definition: Cassiopee.h:64