id3.c


1
/* ID3 Reader Version 1.1 (c) 2009-2010 by Malte Marwedel
2
   http://www.marwedels.de/malte
3
4
   Terms of use: GPL Version 2 or later
5
   Example for reading id3 information from mp3 files
6
7
   Supports:
8
     Versions:
9
       ID3 V1.0
10
       ID3 V2.2
11
       ID3 V2.3
12
       ID3 V2.4
13
     Formtats:
14
       ISO-8859-1
15
       UTF-16 with BOM
16
       UTF-16 without BOM
17
18
   Partly supports:
19
     Flags:
20
       Jumps over external header
21
     Formats:
22
       UTF-8 is convertet to ISO-8859-1 (if chars can be represented there)
23
24
   Not supports:
25
     Unsynchronisation
26
     Extended Tag in ID3 V1.x
27
28
Should work on all emebdded systems, which support file operation similar to the
29
unix fread and fseek functions.
30
fseek must support: seek +x bytes from current position
31
                    go to position x (only if you want to use the same opened
32
                    file for mp3 playack afterwards)
33
                    go to position FILE_END - x (only if ID3 V1.0 support
34
                    is wished)
35
malloc and free are used too, but these could be replaced by static buffers.
36
37
Changelog:
38
  Version 1.1: Allowing umlauts in ISO-8859-1. Use version 1.0 if you only want
39
               ASCII in the output.
40
41
compile for pc: gcc -o id3 id3.c -Wall -Wextra -DTEST_PC
42
run on pc: ./id3 <your mp3 base dir>*.mp3
43
*/
44
45
46
#include <stdlib.h>
47
#include <inttypes.h>
48
#include <stdio.h>
49
#include <string.h>
50
51
52
#ifdef TEST_PC
53
#include <unistd.h>
54
55
#else
56
//add your avr specific includes here
57
58
#endif
59
60
//order must fit with the id3 v1 order too
61
#ifndef ID3_TITLE
62
#define ID3_TITLE 0
63
#endif
64
65
#ifndef ID3_ARTIST
66
#define ID3_ARTIST 1
67
#endif
68
69
#ifndef ID3_ALBUM
70
#define ID3_ALBUM 2
71
#endif
72
73
#ifdef TEST_PC
74
75
//for testing on pc only:
76
#define prog_char char
77
#define memcmp_P memcmp
78
79
FILE * stream_file;
80
81
extern void id3_extract(void);
82
83
84
void * s_cmalloc(unsigned short size) {
85
  void * p = calloc(1, size);
86
  if (p == NULL) {
87
    printf("Out of mem\n");
88
    exit(2);
89
  }
90
  return p;
91
}
92
93
uint8_t pgm_read_byte(void * addr) {
94
  return *(uint8_t*)addr;
95
}
96
97
void state_id3_set(char * text, uint8_t id) {
98
  if (id == ID3_TITLE) {
99
    printf("title: %s\n", text);
100
  }
101
  if (id == ID3_ALBUM) {
102
    printf("album: %s\n", text);
103
  }
104
  if (id == ID3_ARTIST) {
105
    printf("artist: %s\n", text);
106
  }
107
}
108
109
int main(int argc, char ** argv) {
110
  if (argc < 2) {
111
    printf("Give one or more mp3 filenames as param\n");
112
    printf("Returns 0 if parsing done, 1: if no file could be openend, 2: if malloc failed\n");
113
    return 1;
114
  }
115
  int i;
116
  for (i = 1; i < argc; i++) {
117
    stream_file = fopen(argv[i], "rb");
118
    if (stream_file != NULL) {
119
      printf("%s:\n", argv[i]);
120
      id3_extract();
121
      fclose(stream_file);
122
    } else {
123
      printf("could not open file '%s'\n", argv[i]);
124
      return 1;
125
    }
126
  }
127
  return 0;
128
}
129
130
#endif
131
132
//==== the id3 reader =================
133
//you might want to put the #define ID3_ int an header file later
134
135
136
extern FILE * stream_file;
137
138
/*
139
See ID3 format at: http://www.id3.org/id3v2.3.0
140
*/
141
142
static prog_char id3_start[3] = {'I','D','3'};
143
static prog_char id3_tag[3] = {'T','A','G'};
144
145
//first half: id3.2 second half id3.3 and id3.4
146
#define ID3_COMPARES 8
147
//4 byte string, 1 byte ouptut index
148
static prog_char id3_table[ID3_COMPARES][5] = {
149
{'T','A','L',0, ID3_ALBUM},
150
{'T','T','2',0, ID3_TITLE},
151
{'T','P','1',0, ID3_ARTIST},
152
{'T','P','2',0, ID3_ARTIST},
153
{'T','A','L','B', ID3_ALBUM},
154
{'T','I','T','2', ID3_TITLE},
155
{'T','P','E','1', ID3_ARTIST},
156
{'T','P','E','2', ID3_ARTIST}};
157
158
static char * id3_v1text(void) {
159
  char * text = s_cmalloc(sizeof(char)*31); //the text is always 30 chars long
160
  fread(text, sizeof(char), 30, stream_file);
161
  uint8_t rp, wp = 0;
162
  for (rp = 0; rp <= 30; rp++) { //fix files with invalid chars
163
    uint8_t t1 = text[rp];
164
    if (t1 >= 0x20) {
165
      text[wp] = t1;
166
      wp++;
167
    }
168
    if (t1 == '\0')
169
      break;
170
  }
171
  text[wp] = '\0';
172
  return text;
173
}
174
175
static char * id3_text(uint16_t framesize) {
176
  //id 3v2 requires the framesize to be at least 1
177
  uint16_t l = 128;
178
  if (l > framesize)
179
    l = framesize;
180
  char * text = s_cmalloc(sizeof(char)*l);
181
  fread(text, sizeof(char), l, stream_file);
182
  /* text encoding defined by text[0]:
183
    $00 – ISO-8859-1 (ASCII).
184
    $01 – UTF-16 with BOM.
185
    $02 – UTF-16BE
186
    $03 – UTF-8: handled as ASCII here, so no special chars may appear
187
  */
188
  uint16_t rp, wp = 0;
189
  if ((text[0] == 1) || (text[0] == 2)) { //utf16, convert
190
    //printf("utf-16\n");
191
    uint16_t rp = 1;
192
    uint8_t bom = 0; //byte order, useful first
193
    if (text[0] == 1) {
194
      rp = 3;
195
      if ((unsigned char)text[1] == 0xfe) {//may be either 0xfe or 0xff
196
        bom = 1; //byte order, useful last
197
        //printf("Info: Rare case with utf16 byte order inverted\n"); //untested, I never found such a file
198
      }
199
    }
200
    while (rp < l) {
201
      uint8_t t1 = text[rp+bom];
202
      uint8_t t2 = text[rp+1-bom];
203
      if ((t1 >= 0x20) && (!t2)) { //if useful char
204
        text[wp] = t1;
205
        wp++;
206
      }
207
      rp += 2;
208
      if ((!t1) & (!t2)) {
209
        break;
210
      }
211
    }
212
    text[wp] = '\0';
213
  } else if (text[0] == 3) { //2 indicate utf-8
214
    //printf("utf-8\n");
215
    rp = 1;
216
    while (rp < l) {
217
      uint8_t t1 = text[rp];
218
      if ((t1 >= 0x20) && (t1 < 0x80)) { //filter all normal ascii chars
219
        text[wp] = t1;
220
        wp++;
221
      } else if (rp+1 < l) { //check if ISO-8859-1 subset
222
        rp++;
223
        uint8_t t2 = text[rp];
224
        /* useful chars: t1=0xC2: t2 = 0xA0 ... 0xBF
225
                         t1=0xC3: t2 = 0x80 ... 0xBF */
226
        if ((t1 == 0xC2) && ((t2 & 0xE0) == 0xA0)) {
227
          text[wp] = t2;
228
          wp++;
229
        } else
230
        if ((t1 == 0xC3) && ((t2 & 0xC0) == 0x80)) { //2 upper bits fixed
231
          text[wp] = t2+0x40;
232
          wp++;
233
        }
234
      }
235
      rp++;
236
    }
237
  } else { //0 indicate ISO-8859-1
238
    //printf("iso-8859-1\n");
239
    rp = 1;
240
    while (rp < l) {
241
      uint8_t t1 = text[rp];
242
      if (t1 >= 0x20) { //filter all useful chars
243
        text[wp] = t1;
244
        wp++;
245
      }
246
      rp++;
247
    }
248
  }
249
  if (wp >= l) //only happens if utf-16 with wrong format occured
250
    wp = l-1;
251
  text[wp] = '\0'; //just make sure its \0 terminated
252
  if (framesize > l) {
253
    fseek(stream_file, framesize-l,  SEEK_CUR);
254
  }
255
  return text;
256
}
257
258
//supports only ID3 version 2.x
259
void id3_extract(void) {
260
  uint8_t id3temp[10];
261
  fread(id3temp, sizeof(uint8_t), 10, stream_file);
262
  uint8_t subversion = id3temp[3];
263
  if ((memcmp_P(id3temp, id3_start, 3) == 0) && (subversion <= 4)) {
264
    //printf("Info: ID3 version 2.%i\n", subversion);
265
    uint32_t headersize = 0;
266
    uint8_t i;
267
    for (i = 0; i < 4; i++) {
268
      headersize <<= 7;
269
      headersize += id3temp[i+6] & 0x7F;
270
    }
271
    if (id3temp[5] & 0x80) {
272
      //printf("Warning: Unsynchronisation must be used, not supported yet, could result in errors in very rare cases\n");
273
    }
274
    if (id3temp[5] & 0x40) { //only very few mp3s use this for a CRC check sum
275
      //printf("Warning: Extended header support is incomplete\n");
276
      fread(id3temp, sizeof(uint8_t), 10, stream_file);
277
      fseek(stream_file, id3temp[3]-6, SEEK_CUR); //in version 2.3, the size is either 10 or 6 and nothing else
278
    }
279
    //printf("Header size: %u\n", headersize); //should be %lu on the AVR platform
280
    //decode information
281
    uint8_t frameheadersize = 10;
282
    uint8_t arrayoffset = ID3_COMPARES/2;
283
    uint8_t cmplen = 4;
284
    if (subversion < 3) {
285
      frameheadersize = 6;
286
      id3temp[9] = 0x00; //handle things as compression, group or discharged is 0 later
287
      arrayoffset = 0;
288
      cmplen = 3;
289
    }
290
    uint32_t parsed = 0;
291
    while (parsed < headersize) { //usually does one loop for every frame
292
      if (fread(id3temp, sizeof(uint8_t), frameheadersize, stream_file) != frameheadersize) {
293
        //printf("Error: File end!\n");
294
        break; //ouch! file end
295
      }
296
      if (id3temp[0] == 0) { //must be ascii to be valid
297
        //either wrong file format, or padding reached
298
        break;
299
      }
300
      //calculate frame size
301
      uint32_t framesize = 0;
302
      uint8_t i;
303
      for (i = 0; i < 4; i++) {
304
        if ((subversion <= 2) && (i < 3)) { //id3 v 2.2: has only 3 bytes frame size
305
          framesize <<= 8;
306
          framesize += id3temp[i+3];
307
        }
308
        if (subversion == 3) { //id3 v 2.3
309
          framesize <<= 8;
310
          framesize += id3temp[i+4];
311
        }
312
        if (subversion == 4) { //id3 v 2.4
313
          framesize <<= 7;
314
          framesize += id3temp[i+4] & 0x7F;
315
        }
316
        //printf("adding %i\n", id3temp[i+4]);
317
      }
318
      //printf("Framesize %i\n", framesize);
319
      //if compression, or group or discharged, or too large, must be larger than 0
320
      if ((id3temp[9] & 0xE0) || (framesize > 10000) || (!framesize)) {
321
        fseek(stream_file, framesize,  SEEK_CUR);
322
        //printf("Not fitting: %i\n", framesize);
323
      } else {
324
        char * text = NULL;
325
        for (i = 0; i < ID3_COMPARES/2; i++) {
326
          uint8_t type = pgm_read_byte(&id3_table[arrayoffset+i][4]);
327
          if (memcmp_P(id3temp, id3_table[arrayoffset+i], cmplen) == 0) {
328
            text = id3_text(framesize);
329
            state_id3_set(text, type);
330
            free(text);
331
            break;
332
          }
333
        }
334
        if (text == NULL) { //no fitting tag found
335
          fseek(stream_file, framesize,  SEEK_CUR);
336
        }
337
      }
338
      parsed += frameheadersize;
339
      parsed += framesize;
340
    }
341
    //make sure, mp3 playing starts at the right position, even if the frames did contain invalid data or padding was used
342
    fseek(stream_file, headersize+10,  SEEK_SET); //not needed if file is re-opened for playing anyway
343
  } else { //no id3 v2 header, look for id3 v1
344
    fseek(stream_file, -128, SEEK_END);
345
    fread(id3temp, sizeof(uint8_t), 3, stream_file);
346
    if (memcmp_P(id3temp, id3_tag, 3) == 0) { //if there is an id3 tag
347
      //printf("Info: ID3 version 1\n");
348
      uint8_t i;
349
      for (i = 0; i < 3; i++) {
350
        char * text = id3_v1text();
351
        if (strlen(text) > 0)
352
          state_id3_set(text, i);
353
        free(text);
354
      }
355
    }
356
    //make sure, mp3 playing starts from the beginning
357
    fseek(stream_file, 0, SEEK_SET); //not needed if file is re-opened for playing anyway
358
  }
359
}