Audio-Scan

 view release on metacpan or  search on metacpan

src/id3.c  view on Meta::CPAN

    }

    // ehsize may be invalid, tested with v2.3-ext-header-invalid.mp3 & v2.4-ext-header-invalid.mp3
    if (ehsize > id3->size_remain - 4) {
      warn("Error: Invalid ID3 extended header size (%s)\n", id3->file);
      ret = 0;
      goto out;
    }

    DEBUG_TRACE("  Skipping extended header, size %d\n", ehsize);

    if ( !_check_buf(id3->infile, id3->buf, ehsize, ID3_BLOCK_SIZE) ) {
      ret = 0;
      goto out;
    }
    buffer_consume(id3->buf, ehsize);

    id3->size_remain -= ehsize + 4;
  }

  // Parse frames
  while (id3->size_remain > 0) {
    //DEBUG_TRACE("    remain: %d\n", id3->size_remain);
    if ( !_id3_parse_v2_frame(id3) ) {
      break;
    }
  }

  if (id3->version_major < 4) {
    // map old year/date/time (TYER/TDAT/TIME) frames to TDRC
    // tested in v2.3-xsop.mp3
    _id3_convert_tdrc(id3);
  }

  // Set id3_version info element, which contains all tag versions found
  {
    SV *version = newSVpvf( "ID3v2.%d.%d", id3->version_major, id3->version_minor );

    if ( my_hv_exists(id3->info, "id3_version") ) {
      SV **entry = my_hv_fetch(id3->info, "id3_version");
      if (entry != NULL) {
        sv_catpv( version, ", " );
        sv_catsv( version, *entry );
      }
    }

    my_hv_store( id3->info, "id3_version", version );
  }

out:
  return ret;
}

int
_id3_parse_v2_frame(id3info *id3)
{
  int ret = 1;
  char id[5];
  uint16_t flags = 0;
  uint32_t size  = 0;
  uint32_t decoded_size = 0;
  uint32_t unsync_extra = 0;
  id3_frametype const *frametype;
  Buffer *tmp_buf = 0;

  // If the frame is compressed, it will be decompressed here
  Buffer *decompressed = 0;

  // tag_data_safe flag is used if skipping artwork and artwork is not raw image data (needs unsync)
  id3->tag_data_safe = 1;

  if ( !_check_buf(id3->infile, id3->buf, 10, ID3_BLOCK_SIZE) ) {
    ret = 0;
    goto out;
  }

  if (id3->version_major == 2) {
    // v2.2
    id3_compat const *compat;

    // Read 3-letter id
    buffer_get(id3->buf, &id, 3);
    id[3] = 0;

    if (id[0] == 0) {
      // padding
      DEBUG_TRACE("  Found start of padding, aborting\n");
      ret = 0;
      goto out;
    }

    size = buffer_get_int24(id3->buf);

    DEBUG_TRACE("  %s, size %d\n", id, size);

    // map 3-char id to 4-char id
    compat = _id3_compat_lookup((char *)&id, 3);
    if (compat && compat->equiv) {
      strncpy(id, compat->equiv, 4);
      id[4] = 0;

      DEBUG_TRACE("    compat -> %s\n", id);
    }
    else {
      // no compat mapping (obsolete), prepend 'Y' to id
      id[4] = 0;
      id[3] = id[2];
      id[2] = id[1];
      id[1] = id[0];
      id[0] = 'Y';

      DEBUG_TRACE("    obsolete/unknown -> %s\n", id);
    }

    id3->size_remain -= 6;

    if (size > id3->size_remain) {
      DEBUG_TRACE("    frame size too big, aborting\n");
      ret = 0;
      goto out;
    }
  }
  else {
    // Read 4-letter id
    buffer_get(id3->buf, &id, 4);
    id[4] = 0;

    if (id[0] == 0) {
      // padding
      DEBUG_TRACE("  Found start of padding, aborting\n");
      ret = 0;
      goto out;
    }

    id3->size_remain -= 4;

    if (id3->version_major == 3) {
      // v2.3
      id3_compat const *compat;

      size  = buffer_get_int(id3->buf);
      flags = buffer_get_short(id3->buf);

      DEBUG_TRACE("  %s, frame flags %x, size %d\n", id, flags, size);

      // map to v2.4 id
      if (id[3] == ' ') {
        // iTunes writes bad frame IDs such as 'TSA ', these should be run through compat
        // as 3-char frames
        compat = _id3_compat_lookup((char *)&id, 3);
      }
      else {
        compat = _id3_compat_lookup((char *)&id, 4);
      }
      if (compat && compat->equiv) {
        strncpy(id, compat->equiv, 4);
        id[4] = 0;

        DEBUG_TRACE("    compat -> %s\n", id);
      }

      id3->size_remain -= 6;

      if (size > id3->size_remain) {
        DEBUG_TRACE("    frame size too big, aborting\n");
        ret = 0;
        goto out;
      }

      if (flags & ID3_FRAME_FLAG_V23_COMPRESSION) {
        // tested with v2.3-compressed-frame.mp3
        decoded_size = buffer_get_int(id3->buf);
        id3->size_remain -= 4;
        size -= 4;
      }

      if (flags & ID3_FRAME_FLAG_V23_ENCRYPTION) {
        // tested with v2.3-encrypted-frame.mp3
#ifdef AUDIO_SCAN_DEBUG
        DEBUG_TRACE("    encrypted, method %d\n", buffer_get_char(id3->buf));
#else
        buffer_consume(id3->buf, 1);
#endif

        id3->size_remain--;
        size--;

        DEBUG_TRACE("    skipping encrypted frame\n");
        _id3_skip(id3, size);
        id3->size_remain -= size;
        goto out;
      }

      if (flags & ID3_FRAME_FLAG_V23_GROUPINGIDENTITY) {
        // tested with v2.3-group-id.mp3
#ifdef AUDIO_SCAN_DEBUG
        DEBUG_TRACE("    group_id %d\n", buffer_get_char(id3->buf));
#else
        buffer_consume(id3->buf, 1);
#endif

        id3->size_remain--;
        size--;
      }

      // Perform decompression if necessary after all optional extra bytes have been read
      // XXX need test for compressed + unsync
      if (flags & ID3_FRAME_FLAG_V23_COMPRESSION && decoded_size) {
        unsigned long tmp_size;

        if ( !_check_buf(id3->infile, id3->buf, size, ID3_BLOCK_SIZE) ) {
          ret = 0;
          goto out;
        }

        DEBUG_TRACE("    decompressing, decoded_size %d\n", decoded_size);

        Newz(0, decompressed, sizeof(Buffer), Buffer);
        buffer_init(decompressed, decoded_size);

        tmp_size = decoded_size;
        if (
          uncompress(buffer_ptr(decompressed), &tmp_size, buffer_ptr(id3->buf), size) != Z_OK
          ||
    	    tmp_size != decoded_size
    	  ) {
          DEBUG_TRACE("    unable to decompress frame\n");
          buffer_free(decompressed);
          Safefree(decompressed);
          decompressed = 0;
        }
        else {
          // Hack buffer so it knows we've added data directly
          decompressed->end = decoded_size;
        }
      }
    }
    else {
      // v2.4

      // iTunes writes non-syncsafe length integers, check for this here
      if ( _varint(buffer_ptr(id3->buf), 4) & 0x80 ) {
        size = buffer_get_int(id3->buf);
        DEBUG_TRACE("    found non-syncsafe iTunes size for %s, size adjusted to %d\n", id, size);
      }
      else {
        size = buffer_get_syncsafe(id3->buf, 4);
      }

      flags = buffer_get_short(id3->buf);

      id3->size_remain -= 6;

      DEBUG_TRACE("  %s, frame flags %x, size %d\n", id, flags, size);

      if (size > id3->size_remain) {
        DEBUG_TRACE("    frame size too big, aborting\n");
        ret = 0;
        goto out;
      }

      // iTunes writes bad frame IDs such as 'TSA ', these should be run through compat
      // as 3-char frames
      if (id[3] == ' ') {
        id3_compat const *compat;
        compat = _id3_compat_lookup((char *)&id, 3);
        if (compat && compat->equiv) {
          strncpy(id, compat->equiv, 4);
          id[4] = 0;

          DEBUG_TRACE("    bad iTunes v2.4 tag, compat -> %s\n", id);
        }
      }

      if (flags & ID3_FRAME_FLAG_V24_GROUPINGIDENTITY) {
        // tested with v2.4-group-id.mp3
#ifdef AUDIO_SCAN_DEBUG
        DEBUG_TRACE("    group_id %d\n", buffer_get_char(id3->buf));
#else
        buffer_consume(id3->buf, 1);
#endif
        id3->size_remain--;
        size--;
      }

      if (flags & ID3_FRAME_FLAG_V24_ENCRYPTION) {
        // tested with v2.4-encrypted-frame.mp3
#ifdef AUDIO_SCAN_DEBUG
        DEBUG_TRACE("    encrypted, method %d\n", buffer_get_char(id3->buf));
#else
        buffer_consume(id3->buf, 1);
#endif

        id3->size_remain--;
        size--;

        DEBUG_TRACE("    skipping encrypted frame\n");
        _id3_skip(id3, size);
        id3->size_remain -= size;
        goto out;
      }

      if (flags & ID3_FRAME_FLAG_V24_DATALENGTHINDICATOR) {
        decoded_size = buffer_get_syncsafe(id3->buf, 4);
        id3->size_remain -= 4;
        size -= 4;

        DEBUG_TRACE("    data length indicator, size %d\n", decoded_size);
      }

      if (flags & ID3_FRAME_FLAG_V24_UNSYNCHRONISATION) {
        // Special case, do not unsync an APIC frame if not reading artwork,
        // FF's are not likely to appear in the part we care about anyway
        if ( !strcmp(id, "APIC") && _env_true("AUDIO_SCAN_NO_ARTWORK") ) {
          DEBUG_TRACE("    Would un-synchronize APIC frame, but ignoring because of AUDIO_SCAN_NO_ARTWORK\n");

          // Reset decoded_size to 0 since we aren't actually decoding.
          // XXX this would break if we have a compressed + unsync APIC frame but not very likely in the real world
          decoded_size = 0;

          id3->tag_data_safe = 0;
        }
        else {
          // tested with v2.4-unsync.mp3
          if ( !_check_buf(id3->infile, id3->buf, size, ID3_BLOCK_SIZE) ) {
            ret = 0;
            goto out;
          }

          decoded_size = _id3_deunsync( buffer_ptr(id3->buf), size );

          unsync_extra = size - decoded_size;

          DEBUG_TRACE("    Un-synchronized frame, new_size %d\n", decoded_size);
        }
      }

      if (flags & ID3_FRAME_FLAG_V24_COMPRESSION) {
        // tested with v2.4-compressed-frame.mp3
        // XXX need test for compressed + unsync
        unsigned long tmp_size;

        if ( !_check_buf(id3->infile, id3->buf, size, ID3_BLOCK_SIZE) ) {
          ret = 0;
          goto out;
        }

        DEBUG_TRACE("    decompressing\n");

        Newz(0, decompressed, sizeof(Buffer), Buffer);
        buffer_init(decompressed, decoded_size);

        tmp_size = decoded_size;
        if (
          uncompress(buffer_ptr(decompressed), &tmp_size, buffer_ptr(id3->buf), size) != Z_OK
          ||
    	    tmp_size != decoded_size
    	  ) {
          DEBUG_TRACE("    unable to decompress frame\n");
          buffer_free(decompressed);
          Safefree(decompressed);
          decompressed = 0;
        }
        else {
          // Hack buffer so it knows we've added data directly
          decompressed->end = decoded_size;
        }
      }
    }
  }

  // Special case, completely skip XHD3 frame (mp3HD) as it will be large
  // Also skip NCON, a large tag written by MusicMatch
  if ( !strcmp(id, "XHD3") || !strcmp(id, "NCON") ) {
    DEBUG_TRACE("    skipping large binary %s frame\n", id);
    _id3_skip(id3, size);
    id3->size_remain -= size;
    goto out;
  }

  frametype = _id3_frametype_lookup(id, 4);
  if (frametype == 0) {
    switch ( id[0] ) {
    case 'T':
      frametype = &id3_frametype_text;
      break;

    case 'W':
      frametype = &id3_frametype_url;
      break;

    case 'X':
    case 'Y':
    case 'Z':
      frametype = &id3_frametype_experimental;
      break;

    default:
      frametype = &id3_frametype_unknown;
      break;
    }
  }

#ifdef AUDIO_SCAN_DEBUG
  {
    int i;
    DEBUG_TRACE("    nfields %d:", frametype->nfields);
    for (i = 0; i < frametype->nfields; ++i) {
      DEBUG_TRACE(" %d", frametype->fields[i]);
    }
    DEBUG_TRACE("\n");
  }
#endif

  // If frame was compressed, temporarily set the id3 buffer to use the decompressed buffer
  if (decompressed) {
    tmp_buf  = id3->buf;
    id3->buf = decompressed;
  }

  if ( !_id3_parse_v2_frame_data(id3, (char *)&id, decoded_size ? decoded_size : size, frametype) ) {
    DEBUG_TRACE("    error parsing frame, aborting\n");
    ret = 0;
    goto out;
  }

  if (id3->size_remain > size) {
    id3->size_remain -= size;
  }
  else {
    id3->size_remain = 0;
  }

  // Consume extra bytes if we had to unsync this frame
  if (unsync_extra) {
    DEBUG_TRACE("    consuming extra bytes after unsync: %d\n", unsync_extra);
    buffer_consume(id3->buf, unsync_extra);
  }

out:
  if (decompressed) {
    // Reset id3 buffer and consume rest of compressed frame
    id3->buf = tmp_buf;
    buffer_consume(id3->buf, size);

    buffer_free(decompressed);
    Safefree(decompressed);
  }

  return ret;
}

int
_id3_parse_v2_frame_data(id3info *id3, char const *id, uint32_t size, id3_frametype const *frametype)
{
  int ret = 1;
  uint32_t read = 0;
  int8_t encoding = -1;

  uint8_t buffer_art = ( !strcmp(id, "APIC") ) ? 1 : 0;
  uint8_t skip_art   = ( buffer_art && _env_true("AUDIO_SCAN_NO_ARTWORK") ) ? 1 : 0;

  // Bug 16703, a completely empty frame is against the rules, skip it
  if (!size)
    return 1;

  if (skip_art) {
    // Only buffer enough for the APIC header fields, this is only a rough guess
    // because the description could technically be very long
    if ( !_check_buf(id3->infile, id3->buf, 128, ID3_BLOCK_SIZE) ) {
      return 0;
    }
    DEBUG_TRACE("    partial read due to AUDIO_SCAN_NO_ARTWORK\n");
  }
  else {
    // Use a special buffering mode for binary artwork, to avoid
    // using 2x the memory of the APIC frame (once for buffer, once for SV)
    if (buffer_art) {
      // Buffer enough for encoding/MIME/picture type/description
      if ( !_check_buf(id3->infile, id3->buf, 128, ID3_BLOCK_SIZE) ) {
        return 0;



( run in 0.598 second using v1.01-cache-2.11-cpan-22024b96cdf )