libStatGen Software  1
BaseAsciiMap.cpp
1 /*
2  * Copyright (C) 2010 Regents of the University of Michigan
3  *
4  * This program is free software: you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License as published by
6  * the Free Software Foundation, either version 3 of the License, or
7  * (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program. If not, see <http://www.gnu.org/licenses/>.
16  */
17 
18 #include "BaseAsciiMap.h"
19 
20 //
21 // Map ASCII values to a 2 (or 3) bit encoding for the base pair value for
22 // both base and color space.
23 // class 0 -> 'A' (Adenine - 0x41 and 0x61)
24 // class 1 -> 'C' (Cytosine - 0x43 and 0x63)
25 // class 2 -> 'G' (Guanine - 0x47 and 0x67)
26 // class 3 -> 'T' (Thymine - 0x54 and 0x74)
27 // class 4 -> 'N' (Unknown - read error or incomplete data - 0x4E and 0x6E)
28 // class 5 -> not a valid DNA base pair character
29 //
30 // Note: The +1 array size is for the terminating NUL character
31 //
32 // NB: This table also maps 0, 1, 2, and 3 to the corresponding integers,
33 // and '.' to class 4. This allows ABI SOLiD reads to be converted
34 // to integers via ReadIndexer::Word2Integer.
35 //
36 unsigned char BaseAsciiMap::baseColor2int[256+1] =
37  "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005" // 0x00-0x0F
38  "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005" // 0x10-0x1F
39  "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\004\005" // 0x20-0x2F
40  "\000\001\002\003\005\005\005\005\005\005\005\005\005\005\005\005" // 0x30-0x3F
41  "\005\000\005\001\005\005\005\002\005\005\005\005\005\005\004\005" // 0x40-0x4F
42  "\005\005\005\005\003\005\005\005\005\005\005\005\005\005\005\005" // 0x50-0x5F
43  "\005\000\005\001\005\005\005\002\005\005\005\005\005\005\004\005" // 0x60-0x6F
44  "\005\005\005\005\003\005\005\005\005\005\005\005\005\005\005\005" // 0x70-0x7F
45 // not used, but included for completeness:
46  "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005" // 0x80-0x8F
47  "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005" // 0x90-0x9F
48  "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005" // 0xA0-0xAF
49  "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005" // 0xB0-0xBF
50  "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005" // 0xC0-0xCF
51  "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005" // 0xD0-0xDF
52  "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005" // 0xE0-0xEF
53  "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005" // 0xF0-0xFF
54  ;
55 
56 // Map ASCII values to a 2 (or 3) bit encoding for the base pair value for
57 // just base space (ACTGNactgn).
58 unsigned char BaseAsciiMap::base2int[256+1] =
59  "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005" // 0x00-0x0F
60  "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005" // 0x10-0x1F
61  "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005" // 0x20-0x2F
62  "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005" // 0x30-0x3F
63  "\005\000\005\001\005\005\005\002\005\005\005\005\005\005\004\005" // 0x40-0x4F
64  "\005\005\005\005\003\005\005\005\005\005\005\005\005\005\005\005" // 0x50-0x5F
65  "\005\000\005\001\005\005\005\002\005\005\005\005\005\005\004\005" // 0x60-0x6F
66  "\005\005\005\005\003\005\005\005\005\005\005\005\005\005\005\005" // 0x70-0x7F
67 // not used, but included for completeness:
68  "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005" // 0x80-0x8F
69  "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005" // 0x90-0x9F
70  "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005" // 0xA0-0xAF
71  "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005" // 0xB0-0xBF
72  "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005" // 0xC0-0xCF
73  "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005" // 0xD0-0xDF
74  "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005" // 0xE0-0xEF
75  "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005" // 0xF0-0xFF
76  ;
77 
78 // Map ASCII values to a 2 (or 3) bit encoding for the base pair value for
79 // just color space (0123).
80 unsigned char BaseAsciiMap::color2int[256+1] =
81  "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005" // 0x00-0x0F
82  "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005" // 0x10-0x1F
83  "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\004\005" // 0x20-0x2F
84  "\000\001\002\003\005\005\005\005\005\005\005\005\005\005\005\005" // 0x30-0x3F
85  "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005" // 0x40-0x4F
86  "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005" // 0x50-0x5F
87  "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005" // 0x60-0x6F
88  "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005" // 0x70-0x7F
89 // not used, but included for completeness:
90  "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005" // 0x80-0x8F
91  "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005" // 0x90-0x9F
92  "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005" // 0xA0-0xAF
93  "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005" // 0xB0-0xBF
94  "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005" // 0xC0-0xCF
95  "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005" // 0xD0-0xDF
96  "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005" // 0xE0-0xEF
97  "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005" // 0xF0-0xFF
98  ;
99 
100 
101 //
102 // This is obviously for base space use only:
103 //
104 const char BaseAsciiMap::int2base[] = "ACGTNMXXXXXXXXXX";
105 //
106 // convert int to color space value
107 //
108 const char BaseAsciiMap::int2colorSpace[] = "0123NXXXXXXXXXXX";
109 
110 /// This table maps 5' base space to the 3' complement base space
111 /// values, as well as 5' color space values to the corresponding
112 /// 3' complement color space values.
113 ///
114 /// In both cases, invalids are mapped to 'N', which isn't accurate
115 /// for ABI SOLiD, but internally it shouldn't matter (on output it
116 /// will).
117 unsigned char BaseAsciiMap::base2complement[256+1 /* for NUL char */] =
118  "NNNNNNNNNNNNNNNN" // 0x00-0x0F
119  "NNNNNNNNNNNNNNNN" // 0x10-0x1F
120  "NNNNNNNNNNNNNNNN" // 0x20-0x2F
121  "0123NNNNNNNNNNNN" // 0x30-0x3F
122  "NTNGNNNCNNNNNNNN" // 0x40-0x4F
123  "NNNNANNNNNNNNNNN" // 0x50-0x5F
124  "NTNGNNNCNNNNNNNN" // 0x60-0x6F
125  "NNNNANNNNNNNNNNN" // 0x70-0x7F
126 // not used, but included for completeness:
127  "NNNNNNNNNNNNNNNN" // 0x80-0x8F
128  "NNNNNNNNNNNNNNNN" // 0x90-0x9F
129  "NNNNNNNNNNNNNNNN" // 0xA0-0xAF
130  "NNNNNNNNNNNNNNNN" // 0xB0-0xBF
131  "NNNNNNNNNNNNNNNN" // 0xC0-0xCF
132  "NNNNNNNNNNNNNNNN" // 0xD0-0xDF
133  "NNNNNNNNNNNNNNNN" // 0xE0-0xEF
134  "NNNNNNNNNNNNNNNN" // 0xF0-0xFF
135  ;
136 
137 BaseAsciiMap::BaseAsciiMap()
138  : myNumPrimerBases(1)
139 {
140  myBase2IntMapPtr = NULL;
141 }
142 
143 BaseAsciiMap::~BaseAsciiMap()
144 {
145 }
static const char int2base[]
Convert from int representation to the base.
Definition: BaseAsciiMap.h:38
static unsigned char base2int[256+1]
Map ASCII values to a 2 (or 3) bit encoding for the base pair value for just base space (ACTGNactgn)...
Definition: BaseAsciiMap.h:61
static unsigned char baseColor2int[256+1]
Map ASCII values to a 2 (or 3) bit encoding for the base pair value for both base and color space...
Definition: BaseAsciiMap.h:56
static unsigned char color2int[256+1]
Map ASCII values to a 2 (or 3) bit encoding for the base pair value for just color space (0123)...
Definition: BaseAsciiMap.h:65
static unsigned char base2complement[]
This table maps 5&#39; base space to the 3&#39; complement base space values, as well as 5&#39; color space value...
Definition: BaseAsciiMap.h:41
static const char int2colorSpace[]
Convert from int representation to colorspace representation.
Definition: BaseAsciiMap.h:40