libStatGen Software  1
SamValidation.h
1 /*
2  * Copyright (C) 2010 Regents of the University of Michigan
3  *
4  * This program is free software: you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License as published by
6  * the Free Software Foundation, either version 3 of the License, or
7  * (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program. If not, see <http://www.gnu.org/licenses/>.
16  */
17 
18 #ifndef __SAM_VALIDATION_H__
19 #define __SAM_VALIDATION_H__
20 
21 #include "SamFile.h"
22 #include <list>
23 
24 // On windows, ERROR and WARNING are pre-defined macros, so undefine them.
25 #ifdef WARNING
26 #undef WARNING
27 #endif
28 #ifdef ERROR
29 #undef ERROR
30 #endif
31 
32 /// The SamValidationError class describes a validation error that occured,
33 /// containing the error type, severity, and textual error message.
35 {
36 public:
37  /// Severity of the error.
38  enum Severity
39  {
40  WARNING, ///< Warning is used if it is just an invalid value.
41  ERROR ///< Error is used if parsing could not succeed.
42  };
43 
44  /// Type of the error.
45  /// TODO: NOT ALL INVALID TYPES HAVE BEEN ADDED SINCE NOT ALL VALIDATION
46  /// IS COMPLETE YET
47  enum Type
48  {
49  INVALID_QNAME, ///< Invalid read/query name
50  INVALID_REF_ID, ///< Invalid reference id
51  INVALID_RNAME, ///< Invalid reference name
52  INVALID_POS, ///< Invalid position
53  INVALID_MAPQ, ///< Invalid mapping quality
54  INVALID_CIGAR, ///< Invalid CIGAR
55  INVALID_MRNM, ///< Invalid mate/next fragment reference name
56  INVALID_QUAL, ///< Invalid base quality
57  INVALID_TAG ///< Invalid tag
58  };
59 
60  /// Get the string representing the specified type of validation error.
61  static const char* getTypeString(Type type);
62 
63  /// Constructor that sets the type, severity, and message for the
64  /// validation error.
65  SamValidationError(Type type, Severity severity, std::string Message);
66 
67  /// Return the type enum of this validation error object.
68  Type getType() const;
69 
70  /// Return the severity enum of this validation error object.
71  Severity getSeverity() const;
72 
73  /// Return the error message of this validation error object.
74  const char* getMessage() const;
75 
76  /// Return the string representing this object's type of validation error.
77  const char* getTypeString() const;
78 
79  /// Return the string representing this object's severity of validation
80  /// error.
81  const char* getSeverityString() const;
82 
83  /// Get the error string representing this object's error.
84  void getErrorString(std::string& errorString) const;
85 
86  /// Print a formatted output of the error to cerr.
87  void printError() const;
88 
89 private:
91 
92  static const char* enumTypeString[];
93  static const char* enumSeverityString[];
94 
95  Type myType;
96  Severity mySeverity;
97  std::string myMessage;
98 
99 };
100 
101 
102 /// stream output for validation failure information
103 inline std::ostream &operator << (std::ostream &stream,
104  const SamValidationError &error)
105 {
106  std::string errorMessage;
107  error.getErrorString(errorMessage);
108  stream << errorMessage;
109  return stream;
110 }
111 
112 
113 /// The SamValidationErrors class is a container class that holds
114 /// SamValidationError Objects, allowing a validation method to return all
115 /// of the invalid errors rather than just one.
117 {
118 public:
119  /// Constructor.
121  /// Destructor
123 
124  /// Remove all the errors from the container.
125  void clear();
126 
127  /// Add the specified error to this container.
128  void addError(SamValidationError::Type newType,
129  SamValidationError::Severity newSeverity,
130  const char* newMessage);
131 
132  /// Return the number of validation errors contained in this object.
133  unsigned int numErrors();
134 
135  /// Return a pointer to the next error without removing it from the
136  /// container, and returning null once all errors have been retrieved
137  /// until resetErrorIter is called.
138  const SamValidationError* getNextError();
139 
140  /// Reset the iterator to the begining of the errors.
141  void resetErrorIter();
142 
143  /// Append the error messages contained in this container to the passed
144  /// in string.
145  void getErrorString(std::string& errorString) const;
146 
147 private:
148  std::list<const SamValidationError*> myValidationErrors;
149  std::list<const SamValidationError*>::const_iterator myErrorIter;
150 };
151 
152 
153 /// stream output for all validation failures information
154 inline std::ostream& operator << (std::ostream& stream,
155  const SamValidationErrors& errors)
156 {
157  std::string errorString = "";
158  errors.getErrorString(errorString);
159  stream << errorString;
160  return stream;
161 }
162 
163 
164 /// The SamValidator class contains static methods for validating the SAM/BAM
165 /// Record and each of its fields. The generic isValid method performs all of
166 /// the other validations. The SamValidator methods return whether or not what
167 /// is being validated is valid. True means it is valid, false means it is not.
168 /// The specifics of the invalid value(s) are contained in the
169 /// SamValidationErrors object that is passed in (by reference) to the method.
170 /// The specific errors can be pulled out of that object.
171 /// TODO: VALIDATION METHODS STILL NEED TO BE ADDED, and isValid does not yet
172 /// validate all fields!!!
174 {
175 public:
176 
177  /// Validates whether or not the specified SamRecord is valid, calling
178  /// all of the other validations.
179  /// TODO: more validation needs to be added.
180  /// \param samHeader header associated with the record to be validated.
181  /// \param samRecord record to be validated.
182  /// \param validationErrors status to append any errors too.
183  /// \return true if it is valid, false and appends to SamValidationErrors
184  /// if it is not
185  static bool isValid(SamFileHeader& samHeader, SamRecord& samRecord,
186  SamValidationErrors& validationErrors);
187 
188  /// Determines whether or not the specified qname is valid.
189  /// Validation for QNAME is:
190  /// a) length of the qname string is the same as the read name length
191  /// b) length is between 1 and 254.
192  /// c) [ \t\n\r] are not allowed in the name.
193  /// \param qname the read/query name.
194  /// \param qnameLen length of the read including the null (result of
195  /// SamRecord::getReadNameLength().
196  /// \param validationErrors status to append any errors too.
197  /// \return true if it is valid, false and appends to SamValidationErrors
198  /// if it is not
199  static bool isValidQname(const char* qname, uint8_t qnameLen,
200  SamValidationErrors& validationErrors);
201 
202  /// Determines whether or not the flag is valid.
203  /// TODO: currently no validation is done on the flag.
204  /// \param flag flag to be validated.
205  /// \param validationErrors status to append any errors too.
206  /// \return true if it is valid, false and appends to SamValidationErrors
207  /// if it is not
208  static bool isValidFlag(uint16_t flag,
209  SamValidationErrors& validationErrors);
210 
211  /// Validate the reference name including validating against the header.
212  /// 1) Cross validate the rname and the header.
213  /// 2) perform the validation in the method that doesn't take the header.
214  /// \param samHeader header associated with the rname to be validated.
215  /// \param rname reference name to be validated.
216  /// \param validationErrors status to append any errors too.
217  /// \return true if it is valid, false and appends to SamValidationErrors
218  /// if it is not
219  static bool isValidRname(SamFileHeader& samHeader,
220  const char* rname,
221  SamValidationErrors& validationErrors);
222  /// Validate the rname without validating against the header.
223  /// Validation for RNAME is:
224  /// a) cannot be 0 length.
225  /// b) [ \t\n\r@=] are not allowed in the name.
226  /// \param rname reference name to be validated.
227  /// \param validationErrors status to append any errors too.
228  /// \return true if it is valid, false and appends to SamValidationErrors
229  /// if it is not
230  static bool isValidRname(const char* rname,
231  SamValidationErrors& validationErrors);
232 
233  /// Validate whether or not the specified reference id is valid.
234  /// Validation for rID is:
235  /// a) must be between -1 and the number of refInfo.
236  /// -1 is allowed, and otherwise it must properly index into the array.
237  /// \param refID reference id to be validated.
238  /// \param refInfo sam reference information containing the mapping
239  /// from reference id to reference name for this refID.
240  /// \param validationErrors status to append any errors too.
241  /// \return true if it is valid, false and appends to SamValidationErrors
242  /// if it is not
243  static bool isValidRefID(int32_t refID, const SamReferenceInfo& refInfo,
244  SamValidationErrors& validationErrors);
245 
246  /// Validate the refeference position.
247  /// Validation for pos is:
248  /// a) must be between 0 and (2^29)-1.
249  /// \param pos position to be validated.
250  /// \param validationErrors status to append any errors too.
251  /// \return true if it is valid, false and appends to SamValidationErrors
252  /// if it is not
253  static bool isValid1BasedPos(int32_t pos,
254  SamValidationErrors& validationErrors);
255 
256  /// Validate the mapping quality.
257  /// TODO: currently no validation is done on the mapping quality.
258  /// \param mapQuality mapping quality to be validated.
259  /// \param validationErrors status to append any errors too.
260  /// \return true if it is valid, false and appends to SamValidationErrors
261  /// if it is not
262  static bool isValidMapQuality(uint8_t mapQuality,
263  SamValidationErrors& validationErrors);
264 
265  /// Validate the sequence, but not against the cigar or quality string.
266  /// Validation against cigar is done in isValidCigar.
267  /// Validation against the quality string is done in isValidQuality.
268  /// TODO: currently no validation is done in this method.
269  /// \param samRecord record whose sequence should be validated.
270  /// \param validationErrors status to append any errors too.
271  /// \return true if it is valid, false and appends to SamValidationErrors
272  /// if it is not
273  static bool isValidSequence(SamRecord& samRecord,
274  SamValidationErrors& validationErrors);
275 
276  /// Validate the cigar. Cigar validation depends on sequence.
277  /// Validation for CIGAR is:
278  /// a) cannot be 0 length.
279  /// if not "*", validate the following:
280  /// b) must have an integer length for each operator (if not "*"). TODO
281  /// c) all operators must be valid (if not "*"). TODO
282  /// d) evaluates to the same read length as the sequence string.
283  /// \param samRecord record whose cigar should be validated.
284  /// \param validationErrors status to append any errors too.
285  /// \return true if it is valid, false and appends to SamValidationErrors
286  /// if it is not
287  static bool isValidCigar(SamRecord& samRecord,
288  SamValidationErrors& validationErrors);
289 
290  /// Validate the cigar. Cigar validation depends on sequence.
291  /// Validation for CIGAR is:
292  /// a) cannot be 0 length.
293  /// if not "*", validate the following:
294  /// b) must have an integer length for each operator (if not "*"). TODO
295  /// c) all operators must be valid (if not "*"). TODO
296  /// d) evaluates to the same read length as the sequence string.
297  /// \param cigar cigar string to be validated.
298  /// \param sequence sequence to check the cigar against.
299  /// \param validationErrors status to append any errors too.
300  /// \return true if it is valid, false and appends to SamValidationErrors
301  /// if it is not
302  static bool isValidCigar(const char* cigar, const char* sequence,
303  SamValidationErrors& validationErrors);
304 
305  /// Validate the cigar. Cigar validation depends on sequence.
306  /// Validation for CIGAR is:
307  /// a) cannot be 0 length.
308  /// if not "*", validate the following:
309  /// b) TODO: must have an integer length for each operator (if not "*").
310  /// c) TODO: all operators must be valid (if not "*").
311  /// d) evaluates to the same read length as the sequence string.
312  /// \param cigar cigar string to be validated.
313  /// \param seqLen sequence length to check the cigar against.
314  /// \param validationErrors status to append any errors too.
315  /// \return true if it is valid, false and appends to SamValidationErrors
316  /// if it is not
317  static bool isValidCigar(const char* cigar,
318  int seqLen,
319  SamValidationErrors& validationErrors);
320 
321  /// TODO: validate the mate/next fragment's reference name.
322  /// \return true if it is valid, false and appends to SamValidationErrors
323  /// if it is not
324  static bool isValidMrnm();
325 
326  /// TODO: validate the mate/next fragment's position.
327  /// \return true if it is valid, false and appends to SamValidationErrors
328  /// if it is not
329  static bool isValidMpos();
330 
331  /// TODO: validate the insertion size/observed template length.
332  /// \return true if it is valid, false and appends to SamValidationErrors
333  /// if it is not
334  static bool isValidIsize();
335 
336  /// TODO, validate the sequence.
337  /// \return true if it is valid, false and appends to SamValidationErrors
338  /// if it is not
339  static bool isValidSeq();
340 
341  /// Validate the base quality.
342  /// Quality validation depends on sequence.
343  /// Validation for quality is:
344  /// a) quality & sequence are the same length if both are specified.
345  /// TODO: more validation.
346  /// \param samRecord record whose quality should be validated.
347  /// \param validationErrors status to append any errors too.
348  /// \return true if it is valid, false and appends to SamValidationErrors
349  /// if it is not
350  static bool isValidQuality(SamRecord& samRecord,
351  SamValidationErrors& validationErrors);
352 
353  /// Validate the base quality.
354  /// Quality validation depends on sequence.
355  /// Validation for quality is:
356  /// a) quality & sequence are the same length if both are specified.
357  /// TODO: more validation.
358  /// \param quality quality string to be validated.
359  /// \param seqLen sequence length to check the quality against.
360  /// \param validationErrors status to append any errors too.
361  /// \return true if it is valid, false and appends to SamValidationErrors
362  /// if it is not
363  static bool isValidQuality(const char* quality, const char* sequence,
364  SamValidationErrors& validationErrors);
365 
366  /// Validate the base quality.
367  /// Quality validation depends on sequence.
368  /// Validation for quality is:
369  /// a) quality & sequence are the same length if both are specified.
370  /// TODO: more validation.
371  /// \param quality quality string to be validated.
372  /// \param seqLen sequence length to check the quality against.
373  /// \param validationErrors status to append any errors too.
374  /// \return true if it is valid, false and appends to SamValidationErrors
375  /// if it is not
376  bool static isValidQuality(const char* quality,
377  int seqLength,
378  SamValidationErrors& validationErrors);
379 
380  /// Validate the tags.
381  /// Validation for tags is:
382  /// a) check that the "MD" tag is correct if it is present.
383  /// TODO: more validation.
384  /// \param samRecord record whose tags should be validated.
385  /// \param validationErrors status to append any errors too.
386  /// \return true if it is valid, false and appends to SamValidationErrors
387  /// if it is not
388  static bool isValidTags(SamRecord& samRecord,
389  SamValidationErrors& validationErrors);
390 
391  /// TODO validate the tag vtype
392  /// \return true if it is valid, false and appends to SamValidationErrors
393  /// if it is not
394  static bool isValidVtype();
395 
396  /// TODO validate the tag vtype
397  /// \return true if it is valid, false and appends to SamValidationErrors
398  /// if it is not
399  static bool isValidValue();
400 };
401 
402 
403 #endif
The SamValidationError class describes a validation error that occured, containing the error type...
Definition: SamValidation.h:34
void getErrorString(std::string &errorString) const
Append the error messages contained in this container to the passed in string.
Invalid mapping quality.
Definition: SamValidation.h:53
Severity getSeverity() const
Return the severity enum of this validation error object.
const char * getMessage() const
Return the error message of this validation error object.
const char * getTypeString() const
Return the string representing this object&#39;s type of validation error.
Invalid reference name.
Definition: SamValidation.h:51
Type
Type of the error.
Definition: SamValidation.h:47
Invalid base quality.
Definition: SamValidation.h:56
Invalid mate/next fragment reference name.
Definition: SamValidation.h:55
void getErrorString(std::string &errorString) const
Get the error string representing this object&#39;s error.
const char * getSeverityString() const
Return the string representing this object&#39;s severity of validation error.
Error is used if parsing could not succeed.
Definition: SamValidation.h:41
InputFile & operator<<(InputFile &stream, const std::string &str)
Write to a file using streaming.
Definition: InputFile.h:736
Class for tracking the reference information mapping between the reference ids and the reference name...
Type getType() const
Return the type enum of this validation error object.
The SamValidator class contains static methods for validating the SAM/BAM Record and each of its fiel...
This class allows a user to get/set the fields in a SAM/BAM Header.
Definition: SamFileHeader.h:34
Severity
Severity of the error.
Definition: SamValidation.h:38
Warning is used if it is just an invalid value.
Definition: SamValidation.h:40
The SamValidationErrors class is a container class that holds SamValidationError Objects, allowing a validation method to return all of the invalid errors rather than just one.
Invalid read/query name.
Definition: SamValidation.h:49
Class providing an easy to use interface to get/set/operate on the fields in a SAM/BAM record...
Definition: SamRecord.h:51
void printError() const
Print a formatted output of the error to cerr.