00001 // *************************************************************************** 00002 // BamConstants.h (c) 2011 Derek Barnett 00003 // Marth Lab, Department of Biology, Boston College 00004 // --------------------------------------------------------------------------- 00005 // Last modified: 16 October 2011 (DB) 00006 // --------------------------------------------------------------------------- 00007 // Provides basic constants for handling BAM files. 00008 // *************************************************************************** 00009 00010 #ifndef BAM_CONSTANTS_H 00011 #define BAM_CONSTANTS_H 00012 00013 #include "api/api_global.h" 00014 #include <cassert> 00015 #include <string> 00016 00021 namespace BamTools { 00022 namespace Constants { 00023 00024 const uint8_t BAM_SIZEOF_INT = 4; 00025 00026 // header magic number 00027 const char* const BAM_HEADER_MAGIC = "BAM\1"; 00028 const uint8_t BAM_HEADER_MAGIC_LENGTH = 4; 00029 00030 // BAM alignment core size 00031 const uint8_t BAM_CORE_SIZE = 32; 00032 const uint8_t BAM_CORE_BUFFER_SIZE = 8; 00033 00034 // BAM alignment flags 00035 const int BAM_ALIGNMENT_PAIRED = 0x0001; 00036 const int BAM_ALIGNMENT_PROPER_PAIR = 0x0002; 00037 const int BAM_ALIGNMENT_UNMAPPED = 0x0004; 00038 const int BAM_ALIGNMENT_MATE_UNMAPPED = 0x0008; 00039 const int BAM_ALIGNMENT_REVERSE_STRAND = 0x0010; 00040 const int BAM_ALIGNMENT_MATE_REVERSE_STRAND = 0x0020; 00041 const int BAM_ALIGNMENT_READ_1 = 0x0040; 00042 const int BAM_ALIGNMENT_READ_2 = 0x0080; 00043 const int BAM_ALIGNMENT_SECONDARY = 0x0100; 00044 const int BAM_ALIGNMENT_QC_FAILED = 0x0200; 00045 const int BAM_ALIGNMENT_DUPLICATE = 0x0400; 00046 00047 // CIGAR constants 00048 const char* const BAM_CIGAR_LOOKUP = "MIDNSHP=X"; 00049 const uint8_t BAM_CIGAR_MATCH = 0; 00050 const uint8_t BAM_CIGAR_INS = 1; 00051 const uint8_t BAM_CIGAR_DEL = 2; 00052 const uint8_t BAM_CIGAR_REFSKIP = 3; 00053 const uint8_t BAM_CIGAR_SOFTCLIP = 4; 00054 const uint8_t BAM_CIGAR_HARDCLIP = 5; 00055 const uint8_t BAM_CIGAR_PAD = 6; 00056 const uint8_t BAM_CIGAR_SEQMATCH = 7; 00057 const uint8_t BAM_CIGAR_MISMATCH = 8; 00058 00059 const char BAM_CIGAR_MATCH_CHAR = 'M'; 00060 const char BAM_CIGAR_INS_CHAR = 'I'; 00061 const char BAM_CIGAR_DEL_CHAR = 'D'; 00062 const char BAM_CIGAR_REFSKIP_CHAR = 'N'; 00063 const char BAM_CIGAR_SOFTCLIP_CHAR = 'S'; 00064 const char BAM_CIGAR_HARDCLIP_CHAR = 'H'; 00065 const char BAM_CIGAR_PAD_CHAR = 'P'; 00066 const char BAM_CIGAR_SEQMATCH_CHAR = '='; 00067 const char BAM_CIGAR_MISMATCH_CHAR = 'X'; 00068 00069 const int BAM_CIGAR_SHIFT = 4; 00070 const int BAM_CIGAR_MASK = ((1 << BAM_CIGAR_SHIFT) - 1); 00071 00072 // BAM tag types & sizes 00073 const char BAM_TAG_TYPE_ASCII = 'A'; 00074 const char BAM_TAG_TYPE_INT8 = 'c'; 00075 const char BAM_TAG_TYPE_UINT8 = 'C'; 00076 const char BAM_TAG_TYPE_INT16 = 's'; 00077 const char BAM_TAG_TYPE_UINT16 = 'S'; 00078 const char BAM_TAG_TYPE_INT32 = 'i'; 00079 const char BAM_TAG_TYPE_UINT32 = 'I'; 00080 const char BAM_TAG_TYPE_FLOAT = 'f'; 00081 const char BAM_TAG_TYPE_STRING = 'Z'; 00082 const char BAM_TAG_TYPE_HEX = 'H'; 00083 const char BAM_TAG_TYPE_ARRAY = 'B'; 00084 00085 const uint8_t BAM_TAG_TAGSIZE = 2; 00086 const uint8_t BAM_TAG_TYPESIZE = 1; 00087 const uint8_t BAM_TAG_ARRAYBASE_SIZE = 8; 00088 00089 // DNA bases 00090 const char* const BAM_DNA_LOOKUP = "=ACMGRSVTWYHKDBN"; 00091 const uint8_t BAM_BASECODE_EQUAL = 0; 00092 const uint8_t BAM_BASECODE_A = 1; 00093 const uint8_t BAM_BASECODE_C = 2; 00094 const uint8_t BAM_BASECODE_M = 3; 00095 const uint8_t BAM_BASECODE_G = 4; 00096 const uint8_t BAM_BASECODE_R = 5; 00097 const uint8_t BAM_BASECODE_S = 6; 00098 const uint8_t BAM_BASECODE_V = 7; 00099 const uint8_t BAM_BASECODE_T = 8; 00100 const uint8_t BAM_BASECODE_W = 9; 00101 const uint8_t BAM_BASECODE_Y = 10; 00102 const uint8_t BAM_BASECODE_H = 11; 00103 const uint8_t BAM_BASECODE_K = 12; 00104 const uint8_t BAM_BASECODE_D = 13; 00105 const uint8_t BAM_BASECODE_B = 14; 00106 const uint8_t BAM_BASECODE_N = 15; 00107 00108 const char BAM_DNA_EQUAL = '='; 00109 const char BAM_DNA_A = 'A'; 00110 const char BAM_DNA_C = 'C'; 00111 const char BAM_DNA_M = 'M'; 00112 const char BAM_DNA_G = 'G'; 00113 const char BAM_DNA_R = 'R'; 00114 const char BAM_DNA_S = 'S'; 00115 const char BAM_DNA_V = 'V'; 00116 const char BAM_DNA_T = 'T'; 00117 const char BAM_DNA_W = 'W'; 00118 const char BAM_DNA_Y = 'Y'; 00119 const char BAM_DNA_H = 'H'; 00120 const char BAM_DNA_K = 'K'; 00121 const char BAM_DNA_D = 'D'; 00122 const char BAM_DNA_B = 'B'; 00123 const char BAM_DNA_N = 'N'; 00124 const char BAM_DNA_DEL = '-'; 00125 const char BAM_DNA_PAD = '*'; 00126 00127 // zlib & BGZF constants 00128 const char GZIP_ID1 = 31; 00129 const char GZIP_ID2 = 139; 00130 const char CM_DEFLATE = 8; 00131 const char FLG_FEXTRA = 4; 00132 const char OS_UNKNOWN = 255; 00133 const char BGZF_XLEN = 6; 00134 const char BGZF_ID1 = 66; 00135 const char BGZF_ID2 = 67; 00136 const char BGZF_LEN = 2; 00137 00138 const int8_t GZIP_WINDOW_BITS = -15; 00139 const int8_t Z_DEFAULT_MEM_LEVEL = 8; 00140 const uint8_t BGZF_BLOCK_HEADER_LENGTH = 18; 00141 const uint8_t BGZF_BLOCK_FOOTER_LENGTH = 8; 00142 const uint32_t BGZF_MAX_BLOCK_SIZE = 65536; 00143 const uint32_t BGZF_DEFAULT_BLOCK_SIZE = 65536; 00144 00145 } // namespace Constants 00146 00148 // ------------------------- 00149 // tag-type helper structs 00150 // ------------------------- 00151 00152 // fail on any types not specified below 00153 template<typename T> 00154 struct TagTypeHelper { 00155 static bool CanConvertFrom(const char) { assert(false); return false; } 00156 static bool CanConvertTo(const char) { assert(false); return false; } 00157 static char TypeCode(void) { assert(false); return 0; } 00158 }; 00159 00160 template<> 00161 struct TagTypeHelper<uint8_t> { 00162 static bool CanConvertFrom(const char c) { 00163 return ( c == Constants::BAM_TAG_TYPE_ASCII || 00164 c == Constants::BAM_TAG_TYPE_UINT8 ); 00165 } 00166 static bool CanConvertTo(const char c) { 00167 return ( c == Constants::BAM_TAG_TYPE_ASCII || 00168 c == Constants::BAM_TAG_TYPE_UINT8 || 00169 c == Constants::BAM_TAG_TYPE_UINT16 || 00170 c == Constants::BAM_TAG_TYPE_UINT32 ); 00171 } 00172 00173 static char TypeCode(void) { return Constants::BAM_TAG_TYPE_UINT8; } 00174 }; 00175 00176 template<> 00177 struct TagTypeHelper<int8_t> { 00178 static bool CanConvertFrom(const char c) { 00179 return ( c == Constants::BAM_TAG_TYPE_ASCII || 00180 c == Constants::BAM_TAG_TYPE_INT8 ); 00181 } 00182 static bool CanConvertTo(const char c) { 00183 return ( c == Constants::BAM_TAG_TYPE_ASCII || 00184 c == Constants::BAM_TAG_TYPE_INT8 || 00185 c == Constants::BAM_TAG_TYPE_INT16 || 00186 c == Constants::BAM_TAG_TYPE_INT32 ); 00187 } 00188 static char TypeCode(void) { return Constants::BAM_TAG_TYPE_INT8; } 00189 }; 00190 00191 template<> 00192 struct TagTypeHelper<uint16_t> { 00193 static bool CanConvertFrom(const char c) { 00194 return ( c == Constants::BAM_TAG_TYPE_ASCII || 00195 c == Constants::BAM_TAG_TYPE_UINT8 || 00196 c == Constants::BAM_TAG_TYPE_UINT16 ); 00197 } 00198 static bool CanConvertTo(const char c) { 00199 return ( c == Constants::BAM_TAG_TYPE_UINT16 || 00200 c == Constants::BAM_TAG_TYPE_UINT32); 00201 } 00202 static char TypeCode(void) { return Constants::BAM_TAG_TYPE_UINT16; } 00203 }; 00204 00205 template<> 00206 struct TagTypeHelper<int16_t> { 00207 static bool CanConvertFrom(const char c) { 00208 return ( c == Constants::BAM_TAG_TYPE_ASCII || 00209 c == Constants::BAM_TAG_TYPE_INT8 || 00210 c == Constants::BAM_TAG_TYPE_INT16 ); 00211 } 00212 static bool CanConvertTo(const char c) { 00213 return ( c == Constants::BAM_TAG_TYPE_INT16 || 00214 c == Constants::BAM_TAG_TYPE_INT32); 00215 } 00216 static char TypeCode(void) { return Constants::BAM_TAG_TYPE_INT16; } 00217 }; 00218 00219 template<> 00220 struct TagTypeHelper<uint32_t> { 00221 static bool CanConvertFrom(const char c) { 00222 return ( c == Constants::BAM_TAG_TYPE_ASCII || 00223 c == Constants::BAM_TAG_TYPE_UINT8 || 00224 c == Constants::BAM_TAG_TYPE_UINT16 || 00225 c == Constants::BAM_TAG_TYPE_UINT32 ); 00226 } 00227 static bool CanConvertTo(const char c) { 00228 return ( c == Constants::BAM_TAG_TYPE_UINT32 ); 00229 } 00230 static char TypeCode(void) { return Constants::BAM_TAG_TYPE_UINT32; } 00231 }; 00232 00233 template<> 00234 struct TagTypeHelper<int32_t> { 00235 static bool CanConvertFrom(const char c) { 00236 return ( c == Constants::BAM_TAG_TYPE_ASCII || 00237 c == Constants::BAM_TAG_TYPE_INT8 || 00238 c == Constants::BAM_TAG_TYPE_INT16 || 00239 c == Constants::BAM_TAG_TYPE_INT32 ); 00240 } 00241 static bool CanConvertTo(const char c) { 00242 return ( c == Constants::BAM_TAG_TYPE_INT32 ); 00243 } 00244 static char TypeCode(void) { return Constants::BAM_TAG_TYPE_INT32; } 00245 }; 00246 00247 template<> 00248 struct TagTypeHelper<float> { 00249 static bool CanConvertFrom(const char c) { 00250 return ( c == Constants::BAM_TAG_TYPE_ASCII || 00251 c == Constants::BAM_TAG_TYPE_UINT8 || 00252 c == Constants::BAM_TAG_TYPE_INT8 || 00253 c == Constants::BAM_TAG_TYPE_UINT16 || 00254 c == Constants::BAM_TAG_TYPE_INT16 || 00255 c == Constants::BAM_TAG_TYPE_UINT32 || 00256 c == Constants::BAM_TAG_TYPE_INT32 || 00257 c == Constants::BAM_TAG_TYPE_FLOAT); 00258 } 00259 static bool CanConvertTo(const char c) { 00260 return ( c == Constants::BAM_TAG_TYPE_FLOAT ); 00261 } 00262 static char TypeCode(void) { return Constants::BAM_TAG_TYPE_FLOAT; } 00263 }; 00264 00265 template<> 00266 struct TagTypeHelper<std::string> { 00267 static bool CanConvertFrom(const char c) { 00268 return ( c == Constants::BAM_TAG_TYPE_HEX || 00269 c == Constants::BAM_TAG_TYPE_STRING ); 00270 } 00271 static bool CanConvertTo(const char c) { 00272 return ( c == Constants::BAM_TAG_TYPE_HEX || 00273 c == Constants::BAM_TAG_TYPE_STRING ); 00274 } 00275 static char TypeCode(void) { return Constants::BAM_TAG_TYPE_STRING; } 00276 }; 00277 00279 00280 } // namespace BamTools 00281 00282 #endif // BAM_CONSTANTS_H