""" Read ID3 tags from a file. Ned Batchelder, http://nedbatchelder.com/code/modules/id3reader.html This code is in the public domain. """ __version__ = '1.53.20070415' # History at the end of the file. # ID3 specs: http://www.id3.org/develop.html import struct, sys, zlib # These are the text encodings, indexed by the first byte of a text value. _encodings = ['iso8859-1', 'utf-16', 'utf-16be', 'utf-8'] # Simple pseudo-id's, mapped to their various representations. # Use these ids with getValue, and you don't need to know what # version of ID3 the file contains. _simpleDataMapping = { 'album': ('TALB', 'TAL', 'v1album', 'TOAL'), 'performer': ('TPE1', 'TP1', 'v1performer', 'TOPE'), 'title': ('TIT2', 'TT2', 'v1title'), 'track': ('TRCK', 'TRK', 'v1track'), 'year': ('TYER', 'TYE', 'v1year'), 'genre': ('TCON', 'TCO', 'v1genre'), 'comment': ('COMM', 'COM', 'v1comment'), } # Provide booleans for older Pythons. try: True, False except NameError: True, False = 1==1, 1==0 # Tracing _t = False def _trace(msg): print msg # Coverage _c = False _features = {} def _coverage(feat): #if _t: _trace('feature '+feat) _features[feat] = _features.setdefault(feat, 0)+1 def _safestr(s): """ Get a good string for printing, that won't throw exceptions, no matter what's in it. """ try: return unicode(s).encode(sys.getdefaultencoding()) except UnicodeError: return '?: '+repr(s) # Can I just say that I think the whole concept of genres is bogus, # since they are so subjective? And the idea of letting someone else pick # one of these things and then have it affect the categorization of my music # is extra bogus. And the list itself is absurd. Polsk Punk? _genres = [ # 0-19 'Blues', 'Classic Rock', 'Country', 'Dance', 'Disco', 'Funk', 'Grunge', 'Hip - Hop', 'Jazz', 'Metal', 'New Age', 'Oldies', 'Other', 'Pop', 'R&B', 'Rap', 'Reggae', 'Rock', 'Techno', 'Industrial', # 20-39 'Alternative', 'Ska', 'Death Metal', 'Pranks', 'Soundtrack', 'Euro - Techno', 'Ambient', 'Trip - Hop', 'Vocal', 'Jazz + Funk', 'Fusion', 'Trance', 'Classical', 'Instrumental', 'Acid', 'House', 'Game', 'Sound Clip', 'Gospel', 'Noise', # 40-59 'Alt Rock', 'Bass', 'Soul', 'Punk', 'Space', 'Meditative', 'Instrumental Pop', 'Instrumental Rock', 'Ethnic', 'Gothic', 'Darkwave', 'Techno - Industrial', 'Electronic', 'Pop - Folk', 'Eurodance', 'Dream', 'Southern Rock', 'Comedy', 'Cult', 'Gangsta Rap', # 60-79 'Top 40', 'Christian Rap', 'Pop / Funk', 'Jungle', 'Native American', 'Cabaret', 'New Wave', 'Psychedelic', 'Rave', 'Showtunes', 'Trailer', 'Lo - Fi', 'Tribal', 'Acid Punk', 'Acid Jazz', 'Polka', 'Retro', 'Musical', 'Rock & Roll', 'Hard Rock', # 80-99 'Folk', 'Folk / Rock', 'National Folk', 'Swing', 'Fast - Fusion', 'Bebob', 'Latin', 'Revival', 'Celtic', 'Bluegrass', 'Avantgarde', 'Gothic Rock', 'Progressive Rock', 'Psychedelic Rock', 'Symphonic Rock', 'Slow Rock', 'Big Band', 'Chorus', 'Easy Listening', 'Acoustic', # 100-119 'Humour', 'Speech', 'Chanson', 'Opera', 'Chamber Music', 'Sonata', 'Symphony', 'Booty Bass', 'Primus', 'Porn Groove', 'Satire', 'Slow Jam', 'Club', 'Tango', 'Samba', 'Folklore', 'Ballad', 'Power Ballad', 'Rhythmic Soul', 'Freestyle', # 120-139 'Duet', 'Punk Rock', 'Drum Solo', 'A Cappella', 'Euro - House', 'Dance Hall', 'Goa', 'Drum & Bass', 'Club - House', 'Hardcore', 'Terror', 'Indie', 'BritPop', 'Negerpunk', 'Polsk Punk', 'Beat', 'Christian Gangsta Rap', 'Heavy Metal', 'Black Metal', 'Crossover', # 140-147 'Contemporary Christian', 'Christian Rock', 'Merengue', 'Salsa', 'Thrash Metal', 'Anime', 'JPop', 'Synthpop' ] class Id3Error(Exception): """ An exception caused by id3reader properly handling a bad ID3 tag. """ pass class _Header: """ Represent the ID3 header in a tag. """ def __init__(self): self.majorVersion = 0 self.revision = 0 self.flags = 0 self.size = 0 self.bUnsynchronized = False self.bExperimental = False self.bFooter = False def __str__(self): return str(self.__dict__) class _Frame: """ Represent an ID3 frame in a tag. """ def __init__(self): self.id = '' self.size = 0 self.flags = 0 self.rawData = '' self.bTagAlterPreserve = False self.bFileAlterPreserve = False self.bReadOnly = False self.bCompressed = False self.bEncrypted = False self.bInGroup = False def __str__(self): return str(self.__dict__) def __repr__(self): return str(self.__dict__) def _interpret(self): """ Examine self.rawData and create a self.value from it. """ if len(self.rawData) == 0: # This is counter to the spec, but seems harmless enough. #if _c: _coverage('zero data') return if self.bCompressed: # Decompress the compressed data. self.rawData = zlib.decompress(self.rawData) if self.id[0] == 'T': # Text fields start with T encoding = ord(self.rawData[0]) if 0 0 and pos self.bytesLeft: #if _c: _coverage('long!') raise Id3Error, 'Long read (%s): (%d > %d)' % (desc, num, self.bytesLeft) bytes = self.file.read(num) self.bytesLeft -= num if len(bytes) 0: frame = self._readFrame() if frame: frame._interpret() self.frames[frame.id] = frame self.allFrames.append(frame) else: #if _c: _coverage('padding') break def _interpretFlags(self): """ Interpret ID3v2.x flags. """ if self.header.flags & 0x80: self.header.bUnsynchronized = True #if _c: _coverage('unsynctag') if self.header.majorVersion == 2: if self.header.flags & 0x40: #if _c: _coverage('compressed') # "Since no compression scheme has been decided yet, # the ID3 decoder (for now) should just ignore the entire # tag if the compression bit is set." self.header.bCompressed = True if self.header.majorVersion >= 3: if self.header.flags & 0x40: #if _c: _coverage('extheader') if self.header.majorVersion == 3: self._readExtHeader = self._readExtHeader_rev3 else: self._readExtHeader = self._readExtHeader_rev4 if self.header.flags & 0x20: #if _c: _coverage('experimental') self.header.bExperimental = True if self.header.majorVersion >= 4: if self.header.flags & 0x10: #if _c: _coverage('footer') self.header.bFooter = True def _readExtHeader_rev3(self): """ Read the ID3v2.3 extended header. """ # We don't interpret this yet, just eat the bytes. size = self._getInteger(self._readBytes(4, 'rev3ehlen')) self._readBytes(size, 'rev3ehdata') def _readExtHeader_rev4(self): """ Read the ID3v2.4 extended header. """ # We don't interpret this yet, just eat the bytes. size = self._getSyncSafeInt(self._readBytes(4, 'rev4ehlen')) self._readBytes(size-4, 'rev4ehdata') def _readId3v1(self): """ Read the ID3v1 tag. spec: http://www.id3.org/id3v1.html """ self.file.seek(-128, 2) tag = self.file.read(128) if len(tag) != 128: return if tag[0:3] != 'TAG': return self.header = _Header() self.header.majorVersion = 1 self.header.revision = 0 self._addV1Frame('v1title', tag[3:33]) self._addV1Frame('v1performer', tag[33:63]) self._addV1Frame('v1album', tag[63:93]) self._addV1Frame('v1year', tag[93:97]) self._addV1Frame('v1comment', tag[97:127]) self._addV1Frame('v1genre', tag[127]) if tag[125] == '\0' and tag[126] != '\0': #if _c: _coverage('id3v1.1') self.header.revision = 1 self._addV1Frame('v1track', str(ord(tag[126]))) else: #if _c: _coverage('id3v1.0') pass return _validIdChars = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789' def _isValidId(self, id): """ Determine if the id bytes make a valid ID3 id. """ for c in id: if not c in self._validIdChars: #if _c: _coverage('bad id') return False #if _c: _coverage('id '+id) return True def _readFrame_rev2(self): """ Read a frame for ID3v2.2: three-byte ids and lengths. spec: http://www.id3.org/id3v2-00.txt """ if self.bytesLeft 30: fr.rawData = fr.rawData[:30] pprint.pprint(self.allFrames) for fr in self.allFrames: if hasattr(fr, 'value'): print '%s: %s' % (fr.id, _safestr(fr.value)) else: print '%s= %s' % (fr.id, _safestr(fr.rawData)) for label in _simpleDataMapping.keys(): v = self.getValue(label) if v: print 'Label %s: %s' % (label, _safestr(v)) def dumpCoverage(self): feats = _features.keys() feats.sort() for feat in feats: print "Feature %-12s: %d" % (feat, _features[feat]) if __name__ == '__main__': if len(sys.argv)