libStatGen Software  1
SmithWaterman.cpp
1 /*
2  * Copyright (C) 2010 Regents of the University of Michigan
3  *
4  * This program is free software: you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License as published by
6  * the Free Software Foundation, either version 3 of the License, or
7  * (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program. If not, see <http://www.gnu.org/licenses/>.
16  */
17 
18 #include <stdio.h>
19 #include <stdlib.h>
20 #include <stdint.h>
21 #include "SmithWaterman.h"
22 
23 // put TEST below here, so that makedepend will see the .h, so that we
24 // can get a clean dependency for SmithWaterman.o, so that we can at least
25 // compile the header when we change it.
26 
27 #if defined(TEST)
28 
29 #include <getopt.h>
30 #include "Generic.h"
31 
32 // g++ -g -o testSW -DTEST SmithWaterman.cpp
33 //
34 // Smith-Waterman - test code uses a 256x256 array of int16
35 //
36 int swat(
37  bool showAllCases,
38  const char *A,
39  const char *qualities,
40  const char *B,
41  int direction,
42  const char *expectedCigarString,
43  int expectedSumQ
44 )
45 {
46  int allowedInsertDelete = 1024;
47  int errors = 0;
48 
49  // read length 256
50  // reference length 1024
51  SmithWaterman<256, 1024, uint16_t, const char *, const char *, const char *, uint32_t, uint32_t > sw(&A, &qualities, &B, strlen(A), strlen(B), allowedInsertDelete, direction);
52 
53  //
54  // now we align the read:
55  //
56  sw.populateH();
57  sw.populateAlignment();
58 
59  int sumQ = sw.getSumQ();
60 
61  CigarRoller cigar;
62  cigar.clear();
63  sw.rollCigar(cigar);
64 
65  const char *cigarStr = cigar.getString();
66 
67  //
68  // now we pretty print the results
69  //
70 
71  bool badCigar = false, badQuality = false;
72 
73  if (strcmp(cigarStr, expectedCigarString)!=0)
74  {
75  badCigar = true;
76  errors ++;
77  }
78 
79  if (sumQ != expectedSumQ)
80  {
81  badQuality = true;
82  errors ++;
83  }
84 
85 
86 
87  if (showAllCases || errors>0)
88  {
89  cout << "=============" << endl;
90  cout << " Read: " << A << endl;
91  cout << " Reference: " << B << endl;
92  cout << " Direction: " << direction << endl;
93  cout << "Max Cell: " << sw.maxCostValue << " located at " << sw.maxCostPosition << endl;
94  cout << "M: " << sw.m << " N: " << sw.n << endl;
95  cout << "Cigar String: " << cigarStr ;
96  if (badCigar)
97  cout << " (EXPECTED: " << expectedCigarString << ")";
98  cout << endl;
99  cout << " sumQ:" << sumQ;
100  if (badQuality)
101  cout << " (EXPECTED: " << expectedSumQ << ")";
102  cout << endl;
103 
104  if (strlen(B) < 100 || showAllCases)
105  sw.printH(false);
106 
107  for (vector<pair<int,int> >::iterator i = sw.alignment.begin(); i != sw.alignment.end(); i++) cout << *i << endl;
108 
109  cout << "=============" << endl << endl;
110  }
111 
112  delete cigarStr;
113 
114  return errors;
115 }
116 
117 
118 // test with Sequence 1 = ACACACTA
119 // Sequence 2 = AGCACACA
120 int main(int argc, const char **argv)
121 {
122  int errors = 0;
123 
124  bool showAllCasesFlag = false;
125  int opt;
126 
127  while ((opt = getopt(argc, (char **) argv, "v")) != -1)
128  {
129  switch (opt)
130  {
131  case 'v':
132  showAllCasesFlag = true;
133  break;
134  default:
135  cerr << "usage: testSW [-v]" << std::endl;
136  exit(1);
137  }
138  }
139 
140 
141  // CIGAR explanation - for backward SW runs, the corresponding
142  // CIGAR string is generated from the back of the string to the
143  // front. Recall that the soft clipping is only done at the
144  // "end" of the string, taking direction into account.
145 
146  // forwards - simple
147  errors += swat(showAllCasesFlag, "1234", "\"#$-", "1235", 1, "3M1S", 0);
148 
149  // backwards - simple
150  errors += swat(showAllCasesFlag, "1234", "\"#$-", "1235", -1, "4M", 12);
151 
152  // backwards - soft left clip
153  errors += swat(showAllCasesFlag, "1234", "\"#$-", "0234", -1, "1S3M", 0);
154 
155  // delete in read (arg 1) - forward
156  errors += swat(showAllCasesFlag, "123467890", "\"#$%^&*()-", "1234567890", +1, "4M1D5M", 50);
157 
158  // insert in read (arg 1) - forward
159  errors += swat(showAllCasesFlag, "1234556789", "\"#$%^&*()-", "1234567890", +1, "5M1I4M", 50);
160 
161  // delete in read (arg 1) - backward
162  errors += swat(showAllCasesFlag, "X123467890", "#\"#$%^&*()-", "1234567890", -1, "1S4M1D5M", 50);
163 
164  // insert in read (arg 1) - backward
165  errors += swat(showAllCasesFlag, "1234556789", "\"#$%^&*()-", "0123456789", -1, "4M1I5M", 50);
166 
167  // insert in read (arg 1) - backward
168  errors += swat(showAllCasesFlag, "X1223456789", "00000000000", "00123456789", -1, "1S1M1I8M", 50);
169 
170  // insert in read (arg 1) - backward
171  errors += swat(showAllCasesFlag, "XY1223456789", "000000000000", "000123456789", -1, "2S1M1I8M", 50);
172 
173  // forward - soft right clip of 2 bases - sumQ should be 0
174  errors += swat(showAllCasesFlag, "123456700", "\"#$%^&*()-", "123456789", +1, "7M2S", 0);
175 
176  // insert in read (arg 1) - forward w/2 mismatches at end
177  errors += swat(showAllCasesFlag, "1023456700", "\"#$%^&*()-", "123456789", +1, "1M1I6M2S", 50);
178 
179  // insert in read (arg 1) - forward w/2 mismatches at end
180  errors += swat(showAllCasesFlag, "CTCCACCTCCCGGTT", "111111111111111", "TCCACCTCCCAGGTT", -1, "1S10M1D4M", 50);
181 
182  //
183  errors += swat(showAllCasesFlag, "1234", "0000", "12345", +1, "4M", 0);
184 
185  //
186  errors += swat(showAllCasesFlag, "1234X", "00000", "12345", +1, "4M1S", 0);
187 
188  //
189  errors += swat(showAllCasesFlag, "4321", "0000", "7654321", -1, "4M", 0);
190 
191  //
192  errors += swat(showAllCasesFlag, "X4321", "00000", "7654321", -1, "1S4M", 0);
193 
194  //
195  errors += swat(showAllCasesFlag, "X432A10", "0000000", "76543210", -1, "1S3M1I2M", 50);
196 
197  //
198  errors += swat(showAllCasesFlag, "1345", "0000", "12345", -1, "1M1D3M", 50);
199 
200  errors += swat(showAllCasesFlag, "45689", "00000", "1234567890", -1, "3M1D2M", 50);
201 
202 // errors += swat(showAllCasesFlag, "AATAATTTTTTATATACAGATCGCTGTAGAGTGTAGTTATAGTATGATTCCAACTTTTATTTCTTTCATGACTAATTATATGTATACATGTGCCATGTTGGTGTGCTG", "000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000", "TCCAATGTAGGGCTGTTATAAACAGTGTTGATACATATGTTTTTGTATAAGTCTTTGTTGAATACATGCTTTCATTTTTGTAGGGTATATGTCCAGGAATTAAATTTTTGCATTATTGGGGAAGTTCAAACGTAGATCAGTAGATGTTCCCAAATGATTTTCAGGATATGTATCCATGTAAATTCCTACCAGCAATGCAGGAGAATTCCAATTGCCCATGTTCTAATCAGAATATTGTTATATCCTAAGACTAATTTTAAATATTCTGATGGGTGTAGAGTGGAGGCATAGTATGATTTCAACTTGTATTTCTTTCATGACTAATTATCTTCTATGTTAATTGTTATTTTGTATGTTTATTGCAAAGTGCCTATCCAGAATTTTTGTCTATAATTTTGTTGTGCTGTCTCTTGCTTTATGAATTTTATAGGATTCTTAATATTATAATTGAGTTATCTTTCTTTTTTATTATTATTATTATACTTTAAGTTTTAGGGTATATGTGCACAACGTGCAAGTTTGTCACATATGTATACATGTGCCATGTTGGTGTGCTGCACCCATTAACTCATCATTTAGCATTAGGTATATCTCCTAATGCTATCCCTTCCTCCTCCCCCCACCCCACAACAGTCCCCGGTGTGTGATGTTCCCCTGCCTTTGTCCTCTTTCTTATACTTGCATGAGCAATCTCCTCAAACTGATACTTGCCTTTTTTGTCCTTGGTGTGGTTTGGCTCTGTGTTCCCACCCAAATCTTCATAATACCCATGTGCCAAGGGTGGGACTGGGTGGAGGTAATTGGGTCATGGGGATGGTTTCCCTCATACTATTATGATAGTGAGTGTTTTCACGAGACCTGATGGTTTTATAACTGTGTGGCATTTCCCTTGCTTCCACTCACTCCATCCTGCCACCCTGTGAAGAAGGTGCCTGCTTCTCCTTTGGTTACTGCTATGATTGTAAGTTTCCTGAGGCCTCCCCAGCAACGCAAAACTGTGAATCAATTAAACCTTTTTCCTTTATAAATTACTAAGTCTTGGGTATTTCTTCATAGTGTTGTGAGCATAGACTAAAACAGTAAGTTGTTACCAGGAGTGGGGTACTGCTGTAAGATAACTGAGAATGTGAAAGTGACTTAGGAACTAGGTAATGAGCAGAGGTTGGAACAGTTTAAAAGGCTCAGAAGAAGACAGAAAGATGTGGGAAAGTTTGGA", -1, "77M200D31M", 50);
203 
204  errors += swat(showAllCasesFlag, "TTAGAATGCTATTGTGTTTGGAGATTTGAGGAAAGTGGGCGTGAAGACTTAGTGTTCATTTCCTCAACCTCTCTCTGTGTGAACATACGTCATCGGTCAGAAATTGGG", "000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000", "CCGAGATTGTGCCATTGCACTCCTGCCTGGGTAACAGAGTCAGACCCTGTCTCAAAAAAAAAAAAAAAAAAAAAAAAGATTAGGTTTTATAGATGGAAAATTCACAGCTCTCTCCAGATCAGAAATCTCCAAGAGTAAATTAGTGTCTTAAAGGGGTTGTAATAACTTTCCTATGTGACTAAGTGCATTATTAATCAATTTTTCTATGATCAAGTACTCCTTTACATACCTGCTAATACAATTTTTGATATGAAATCAGTCCTAGAGGGAATCAATGTAAGATACAGACTTGATGAGTGCTTGCAGTTTTTTATTGACAATCTGAAGAATGACTTGACTCTAAATTGCAGCTCAAGGCTTAGAATGCTATTGTGTTTGGAGATTTGAGGAAAGTGGGCGTGAAGACTTAGTGTTCATTTCCTCAACCTCTCTCTGTGTGAACATACAGGAATCAAATCTGTCTAGCCTCTCTTTTTGGCAAGGTTAAGAACAATTCCACTTCATCCTAATCCCAATGATTCCTGCCGACCCTCTTCCAAAAACTATTTAAAGACATGTTCTTCAAAGTTATATTTGTCTTTCCTTCAGGGAGAAAAAGAATACCAATCACTTATAATATGGAAACTAGCAGAAATGGGTCACATAAGTCATCTGTCAGAAATTGGGAAAATAGAGTAGGTCAGTCTTTCCAGTCATGGTACTTTTACCTTCAATCA", -1, "88M200D20M", 50);
205 
206  // prefix TTAGAATGCTATTGTGTTTGGAGATTTGAGGAAAGTGGGCGTGAAGACTTAGTGTTCATTTCCTCAACCTCTCTCTGTGTGAACATAC
207  // suffix GTCATCTGTCAGAAATTGGGA
208  cout << endl << "Total Errors found: " << errors << endl;
209 }
210 #endif
void clear()
Clear this object so that it has no Cigar Operations.
const char * getString()
Get the string reprentation of the Cigar operations in this object, caller must delete the returned v...
The purpose of this class is to provide accessors for setting, updating, modifying the CIGAR object...
Definition: CigarRoller.h:66