00001
00002
00003
00004
00005
00006
00007
00008
00009
00010 #ifndef BAMALIGNMENT_H
00011 #define BAMALIGNMENT_H
00012
00013 #include "api/api_global.h"
00014 #include "api/BamAux.h"
00015 #include "api/BamConstants.h"
00016 #include <cstdlib>
00017 #include <cstring>
00018 #include <string>
00019 #include <vector>
00020
00021 namespace BamTools {
00022
00024
00025 namespace Internal {
00026 class BamReaderPrivate;
00027 class BamWriterPrivate;
00028 }
00030
00031
00032 struct API_EXPORT BamAlignment {
00033
00034
00035 public:
00036 BamAlignment(void);
00037 BamAlignment(const BamAlignment& other);
00038 ~BamAlignment(void);
00039
00040
00041 public:
00042 bool IsDuplicate(void) const;
00043 bool IsFailedQC(void) const;
00044 bool IsFirstMate(void) const;
00045 bool IsMapped(void) const;
00046 bool IsMateMapped(void) const;
00047 bool IsMateReverseStrand(void) const;
00048 bool IsPaired(void) const;
00049 bool IsPrimaryAlignment(void) const;
00050 bool IsProperPair(void) const;
00051 bool IsReverseStrand(void) const;
00052 bool IsSecondMate(void) const;
00053
00054
00055 public:
00056 void SetIsDuplicate(bool ok);
00057 void SetIsFailedQC(bool ok);
00058 void SetIsFirstMate(bool ok);
00059 void SetIsMapped(bool ok);
00060 void SetIsMateMapped(bool ok);
00061 void SetIsMateReverseStrand(bool ok);
00062 void SetIsPaired(bool ok);
00063 void SetIsPrimaryAlignment(bool ok);
00064 void SetIsProperPair(bool ok);
00065 void SetIsReverseStrand(bool ok);
00066 void SetIsSecondMate(bool ok);
00067
00068
00069 public:
00070
00071
00072 template<typename T> bool AddTag(const std::string& tag, const std::string& type, const T& value);
00073 template<typename T> bool AddTag(const std::string& tag, const std::vector<T>& values);
00074
00075
00076 template<typename T> bool EditTag(const std::string& tag, const std::string& type, const T& value);
00077 template<typename T> bool EditTag(const std::string& tag, const std::vector<T>& values);
00078
00079
00080 template<typename T> bool GetTag(const std::string& tag, T& destination) const;
00081 template<typename T> bool GetTag(const std::string& tag, std::vector<T>& destination) const;
00082
00083
00084 bool GetTagType(const std::string& tag, char& type) const;
00085
00086
00087 bool HasTag(const std::string& tag) const;
00088
00089
00090 void RemoveTag(const std::string& tag);
00091
00092
00093 public:
00094
00095 bool BuildCharData(void);
00096
00097
00098 int GetEndPosition(bool usePadded = false, bool closedInterval = false) const;
00099
00100
00101 std::string GetErrorString(void) const;
00102
00103
00104 bool GetSoftClips(std::vector<int>& clipSizes,
00105 std::vector<int>& readPositions,
00106 std::vector<int>& genomePositions,
00107 bool usePadded = false) const;
00108
00109
00110 public:
00111 std::string Name;
00112 int32_t Length;
00113 std::string QueryBases;
00114 std::string AlignedBases;
00115 std::string Qualities;
00116 std::string TagData;
00117 int32_t RefID;
00118 int32_t Position;
00119 uint16_t Bin;
00120 uint16_t MapQuality;
00121 uint32_t AlignmentFlag;
00122 std::vector<CigarOp> CigarData;
00123 int32_t MateRefID;
00124 int32_t MatePosition;
00125 int32_t InsertSize;
00126 std::string Filename;
00127
00129
00130 private:
00131 bool FindTag(const std::string& tag,
00132 char*& pTagData,
00133 const unsigned int& tagDataLength,
00134 unsigned int& numBytesParsed) const;
00135 bool IsValidSize(const std::string& tag, const std::string& type) const;
00136 void SetErrorString(const std::string& where, const std::string& what) const;
00137 bool SkipToNextTag(const char storageType,
00138 char*& pTagData,
00139 unsigned int& numBytesParsed) const;
00140
00141
00142 private:
00143
00144 struct BamAlignmentSupportData {
00145
00146
00147 std::string AllCharData;
00148 uint32_t BlockLength;
00149 uint32_t NumCigarOperations;
00150 uint32_t QueryNameLength;
00151 uint32_t QuerySequenceLength;
00152 bool HasCoreOnly;
00153
00154
00155 BamAlignmentSupportData(void)
00156 : BlockLength(0)
00157 , NumCigarOperations(0)
00158 , QueryNameLength(0)
00159 , QuerySequenceLength(0)
00160 , HasCoreOnly(false)
00161 { }
00162 };
00163 BamAlignmentSupportData SupportData;
00164 friend class Internal::BamReaderPrivate;
00165 friend class Internal::BamWriterPrivate;
00166
00167 mutable std::string ErrorString;
00169 };
00170
00171
00172
00173
00185 template<typename T>
00186 inline bool BamAlignment::AddTag(const std::string& tag, const std::string& type, const T& value) {
00187
00188
00189 if ( SupportData.HasCoreOnly )
00190 BuildCharData();
00191
00192
00193 if ( !IsValidSize(tag, type) ) {
00194
00195 return false;
00196 }
00197
00198
00199 if ( !TagTypeHelper<T>::CanConvertTo(type.at(0)) ) {
00200
00201 return false;
00202 }
00203
00204
00205 char* pTagData = (char*)TagData.data();
00206 const unsigned int tagDataLength = TagData.size();
00207 unsigned int numBytesParsed = 0;
00208
00209
00210
00211 if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) {
00212
00213 return false;
00214 }
00215
00216
00217 union { T value; char valueBuffer[sizeof(T)]; } un;
00218 un.value = value;
00219
00220
00221 const std::string newTag = tag + type;
00222 const size_t newTagDataLength = tagDataLength + newTag.size() + sizeof(T);
00223 RaiiBuffer originalTagData(newTagDataLength);
00224 memcpy(originalTagData.Buffer, TagData.c_str(), tagDataLength + 1);
00225
00226
00227 strcat(originalTagData.Buffer + tagDataLength, newTag.data());
00228 memcpy(originalTagData.Buffer + tagDataLength + newTag.size(), un.valueBuffer, sizeof(T));
00229
00230
00231 const char* newTagData = (const char*)originalTagData.Buffer;
00232 TagData.assign(newTagData, newTagDataLength);
00233 return true;
00234 }
00235
00236 template<>
00237 inline bool BamAlignment::AddTag<std::string>(const std::string& tag,
00238 const std::string& type,
00239 const std::string& value)
00240 {
00241
00242 if ( SupportData.HasCoreOnly )
00243 BuildCharData();
00244
00245
00246 if ( !IsValidSize(tag, type) ) {
00247
00248 return false;
00249 }
00250
00251
00252 if ( !TagTypeHelper<std::string>::CanConvertTo(type.at(0)) ) {
00253
00254 return false;
00255 }
00256
00257
00258 char* pTagData = (char*)TagData.data();
00259 const unsigned int tagDataLength = TagData.size();
00260 unsigned int numBytesParsed = 0;
00261
00262
00263
00264 if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) {
00265
00266 return false;
00267 }
00268
00269
00270 const std::string newTag = tag + type + value;
00271 const size_t newTagDataLength = tagDataLength + newTag.size() + 1;
00272 RaiiBuffer originalTagData(newTagDataLength);
00273 memcpy(originalTagData.Buffer, TagData.c_str(), tagDataLength + 1);
00274
00275
00276 strcat(originalTagData.Buffer + tagDataLength, newTag.data());
00277
00278
00279 const char* newTagData = (const char*)originalTagData.Buffer;
00280 TagData.assign(newTagData, newTagDataLength);
00281 return true;
00282 }
00283
00294 template<typename T>
00295 inline bool BamAlignment::AddTag(const std::string& tag, const std::vector<T>& values) {
00296
00297
00298 if ( SupportData.HasCoreOnly )
00299 BuildCharData();
00300
00301
00302 if ( tag.size() != Constants::BAM_TAG_TAGSIZE )
00303 return false;
00304
00305
00306 char* pTagData = (char*)TagData.data();
00307 const unsigned int tagDataLength = TagData.size();
00308 unsigned int numBytesParsed = 0;
00309
00310
00311
00312 if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) {
00313
00314 return false;
00315 }
00316
00317
00318 char newTagBase[Constants::BAM_TAG_ARRAYBASE_SIZE];
00319 memcpy( newTagBase, tag.c_str(), Constants::BAM_TAG_TAGSIZE );
00320 newTagBase[2] = Constants::BAM_TAG_TYPE_ARRAY;
00321 newTagBase[3] = TagTypeHelper<T>::TypeCode();
00322
00323
00324 const int32_t numElements = values.size();
00325 memcpy(newTagBase + 4, &numElements, sizeof(int32_t));
00326
00327
00328 const size_t newTagDataLength = tagDataLength +
00329 Constants::BAM_TAG_ARRAYBASE_SIZE +
00330 numElements*sizeof(T);
00331 RaiiBuffer originalTagData(newTagDataLength);
00332 memcpy(originalTagData.Buffer, TagData.c_str(), tagDataLength+1);
00333
00334
00335 strcat(originalTagData.Buffer + tagDataLength, (const char*)newTagBase);
00336
00337
00338 int elementsBeginOffset = tagDataLength + Constants::BAM_TAG_ARRAYBASE_SIZE;
00339 for ( int i = 0 ; i < numElements; ++i ) {
00340 const T& value = values.at(i);
00341 memcpy(originalTagData.Buffer + elementsBeginOffset + i*sizeof(T), &value, sizeof(T));
00342 }
00343
00344
00345 const char* newTagData = (const char*)originalTagData.Buffer;
00346 TagData.assign(newTagData, newTagDataLength);
00347 return true;
00348 }
00349
00364 template<typename T>
00365 inline bool BamAlignment::EditTag(const std::string& tag, const std::string& type, const T& value) {
00366
00367
00368 if ( SupportData.HasCoreOnly )
00369 BuildCharData();
00370
00371
00372 if ( HasTag(tag) )
00373 RemoveTag(tag);
00374 return AddTag(tag, type, value);
00375 }
00376
00388 template<typename T>
00389 inline bool BamAlignment::EditTag(const std::string& tag, const std::vector<T>& values) {
00390
00391
00392 if ( SupportData.HasCoreOnly )
00393 BuildCharData();
00394
00395
00396 if ( HasTag(tag) )
00397 RemoveTag(tag);
00398 return AddTag(tag, values);
00399 }
00400
00401
00409 template<typename T>
00410 inline bool BamAlignment::GetTag(const std::string& tag, T& destination) const {
00411
00412
00413 if ( SupportData.HasCoreOnly ) {
00414
00415 return false;
00416 }
00417
00418
00419 if ( TagData.empty() ) {
00420
00421 return false;
00422 }
00423
00424
00425 char* pTagData = (char*)TagData.data();
00426 const unsigned int tagDataLength = TagData.size();
00427 unsigned int numBytesParsed = 0;
00428
00429
00430 if ( !FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) {
00431
00432 return false;
00433 }
00434
00435
00436 const char type = *(pTagData - 1);
00437 if ( !TagTypeHelper<T>::CanConvertFrom(type) ) {
00438
00439 return false;
00440 }
00441
00442
00443 int destinationLength = 0;
00444 switch ( type ) {
00445
00446
00447 case (Constants::BAM_TAG_TYPE_ASCII) :
00448 case (Constants::BAM_TAG_TYPE_INT8) :
00449 case (Constants::BAM_TAG_TYPE_UINT8) :
00450 destinationLength = 1;
00451 break;
00452
00453
00454 case (Constants::BAM_TAG_TYPE_INT16) :
00455 case (Constants::BAM_TAG_TYPE_UINT16) :
00456 destinationLength = 2;
00457 break;
00458
00459
00460 case (Constants::BAM_TAG_TYPE_INT32) :
00461 case (Constants::BAM_TAG_TYPE_UINT32) :
00462 case (Constants::BAM_TAG_TYPE_FLOAT) :
00463 destinationLength = 4;
00464 break;
00465
00466
00467 case (Constants::BAM_TAG_TYPE_STRING) :
00468 case (Constants::BAM_TAG_TYPE_HEX) :
00469 case (Constants::BAM_TAG_TYPE_ARRAY) :
00470 SetErrorString("BamAlignment::GetTag",
00471 "cannot store variable length tag data into a numeric destination");
00472 return false;
00473
00474
00475 default:
00476 const std::string message = std::string("invalid tag type: ") + type;
00477 SetErrorString("BamAlignment::GetTag", message);
00478 return false;
00479 }
00480
00481
00482 destination = 0;
00483 memcpy(&destination, pTagData, destinationLength);
00484
00485
00486 return true;
00487 }
00488
00489 template<>
00490 inline bool BamAlignment::GetTag<std::string>(const std::string& tag,
00491 std::string& destination) const
00492 {
00493
00494 if ( SupportData.HasCoreOnly ) {
00495
00496 return false;
00497 }
00498
00499
00500 if ( TagData.empty() ) {
00501
00502 return false;
00503 }
00504
00505
00506 char* pTagData = (char*)TagData.data();
00507 const unsigned int tagDataLength = TagData.size();
00508 unsigned int numBytesParsed = 0;
00509
00510
00511 if ( !FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) {
00512
00513 return false;
00514 }
00515
00516
00517 const unsigned int dataLength = strlen(pTagData);
00518 destination.clear();
00519 destination.resize(dataLength);
00520 memcpy( (char*)destination.data(), pTagData, dataLength );
00521
00522
00523 return true;
00524 }
00525
00533 template<typename T>
00534 inline bool BamAlignment::GetTag(const std::string& tag, std::vector<T>& destination) const {
00535
00536
00537 if ( SupportData.HasCoreOnly ) {
00538
00539 return false;
00540 }
00541
00542
00543 if ( TagData.empty() ) {
00544
00545 return false;
00546 }
00547
00548
00549 char* pTagData = (char*)TagData.data();
00550 const unsigned int tagDataLength = TagData.size();
00551 unsigned int numBytesParsed = 0;
00552
00553
00554 if ( !FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) {
00555
00556 return false;
00557 }
00558
00559
00560 const char tagType = *(pTagData - 1);
00561 if ( tagType != Constants::BAM_TAG_TYPE_ARRAY ) {
00562 SetErrorString("BamAlignment::GetTag", "cannot store a non-array tag in array destination");
00563 return false;
00564 }
00565
00566
00567 const char elementType = *pTagData;
00568 if ( !TagTypeHelper<T>::CanConvertFrom(elementType) ) {
00569
00570 return false;
00571 }
00572 ++pTagData;
00573
00574
00575 int elementLength = 0;
00576 switch ( elementType ) {
00577 case (Constants::BAM_TAG_TYPE_ASCII) :
00578 case (Constants::BAM_TAG_TYPE_INT8) :
00579 case (Constants::BAM_TAG_TYPE_UINT8) :
00580 elementLength = sizeof(uint8_t);
00581 break;
00582
00583 case (Constants::BAM_TAG_TYPE_INT16) :
00584 case (Constants::BAM_TAG_TYPE_UINT16) :
00585 elementLength = sizeof(uint16_t);
00586 break;
00587
00588 case (Constants::BAM_TAG_TYPE_INT32) :
00589 case (Constants::BAM_TAG_TYPE_UINT32) :
00590 case (Constants::BAM_TAG_TYPE_FLOAT) :
00591 elementLength = sizeof(uint32_t);
00592 break;
00593
00594
00595 case (Constants::BAM_TAG_TYPE_STRING) :
00596 case (Constants::BAM_TAG_TYPE_HEX) :
00597 case (Constants::BAM_TAG_TYPE_ARRAY) :
00598 SetErrorString("BamAlignment::GetTag",
00599 "invalid array data, variable-length elements are not allowed");
00600 return false;
00601
00602
00603 default:
00604 const std::string message = std::string("invalid array element type: ") + elementType;
00605 SetErrorString("BamAlignment::GetTag", message);
00606 return false;
00607 }
00608
00609
00610 int32_t numElements;
00611 memcpy(&numElements, pTagData, sizeof(int32_t));
00612 pTagData += 4;
00613 destination.clear();
00614 destination.reserve(numElements);
00615
00616
00617 T value;
00618 for ( int i = 0 ; i < numElements; ++i ) {
00619 memcpy(&value, pTagData, sizeof(T));
00620 pTagData += sizeof(T);
00621 destination.push_back(value);
00622 }
00623
00624
00625 return true;
00626 }
00627
00628 typedef std::vector<BamAlignment> BamAlignmentVector;
00629
00630 }
00631
00632 #endif // BAMALIGNMENT_H