Skip to content

Commit

Permalink
Respect MIME type aliases
Browse files Browse the repository at this point in the history
* Fixes incorrect `audio/x-wav` subtype of `audio/vnd.wav` to be an
  alias of `audio/vnd.wave`.
  • Loading branch information
jeremy committed Mar 6, 2024
1 parent e5f1dbd commit 16a9ef7
Show file tree
Hide file tree
Showing 14 changed files with 253 additions and 16 deletions.
37 changes: 32 additions & 5 deletions lib/marcel/magic.rb
Original file line number Diff line number Diff line change
Expand Up @@ -25,25 +25,34 @@ def initialize(type)
# Option keys:
# * <i>:extensions</i>: String list or single string of file extensions
# * <i>:parents</i>: String list or single string of parent mime types
# * <i>:aliases</i>: String list or single string of aliased mime types
# * <i>:magic</i>: Mime magic specification
# * <i>:comment</i>: Comment string
def self.add(type, options)
extensions = [options[:extensions]].flatten.compact
extensions.each {|ext| EXTENSIONS[ext] = type }
TYPE_EXTS[type] = extensions

TYPE_ALIASES.delete(type)
[options[:aliases]].flatten.compact.each do |aliased|
TYPE_ALIASES[aliased] = type
end

parents = [options[:parents]].flatten.compact
TYPE_PARENTS[type] = parents unless parents.empty?
extensions.each {|ext| EXTENSIONS[ext] = type }

MAGIC.unshift [type, options[:magic]] if options[:magic]
end

# Removes a mime type from the dictionary. You might want to do this if
# Removes a mime type from the dictionary. You might want to do this if
# you're seeing impossible conflicts (for instance, application/x-gmc-link).
# * <i>type</i>: The mime type to remove. All associated extensions and magic are removed too.
# * <i>type</i>: The mime type to remove.
def self.remove(type)
EXTENSIONS.delete_if {|ext, t| t == type }
MAGIC.delete_if {|t, m| t == type }
EXTENSIONS.delete_if { |ext, t| t == type }
MAGIC.delete_if { |t, m| t == type }
TYPE_EXTS.delete(type)
TYPE_PARENTS.delete(type)
TYPE_ALIASES.delete_if { |aliased, canonical| aliased == type || canonical == type }
end

# Returns true if type is a text format
Expand All @@ -64,11 +73,24 @@ def extensions
TYPE_EXTS[type] || []
end

def canonical
if to = TYPE_ALIASES[type]
self.class.new(to)
else
self
end
end

# Get mime comment
def comment
nil # deprecated
end

# Lookup canonical mime type by mime type string
def self.by_type(type)
new(type.downcase).canonical if type
end

# Lookup mime type by file extension
def self.by_extension(ext)
ext = ext.to_s.downcase
Expand Down Expand Up @@ -111,9 +133,14 @@ def hash
alias == eql?

def self.child?(child, parent)
child, parent = canonical(child), canonical(parent)
child == parent || TYPE_PARENTS[child]&.any? {|p| child?(p, parent) }
end

def self.canonical(aliased_type)
by_type(aliased_type)&.type
end

def self.magic_match(io, method)
return magic_match(StringIO.new(io.to_s), method) unless io.respond_to?(:read)

Expand Down
13 changes: 9 additions & 4 deletions lib/marcel/mime_type.rb
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,16 @@ class MimeType
BINARY = "application/octet-stream"

class << self
def extend(type, extensions: [], parents: [], magic: nil)
def extend(type, extensions: [], aliases: [], parents: [], magic: nil)
if canonical = Marcel::TYPE_ALIASES[type]
warn "#{type} was already aliased to #{canonical}"
end

extensions = (Array(extensions) + Array(Marcel::TYPE_EXTS[type])).uniq
existing_aliases = Marcel::TYPE_ALIASES.select { |_, t| t == type }.keys
aliases = (Array(aliases) + existing_aliases).uniq
parents = (Array(parents) + Array(Marcel::TYPE_PARENTS[type])).uniq
Magic.add(type, extensions: extensions, magic: magic, parents: parents)
Magic.add(type, extensions: extensions, magic: magic, aliases: aliases, parents: parents)
end

# Returns the most appropriate content type for the given file.
Expand All @@ -32,7 +38,6 @@ def for(pathname_or_io = nil, name: nil, extension: nil, declared_type: nil)
end

private

def for_data(pathname_or_io)
if pathname_or_io
with_io(pathname_or_io) do |io|
Expand Down Expand Up @@ -60,7 +65,7 @@ def for_extension(extension)
end

def for_declared_type(declared_type)
type = parse_media_type(declared_type)
type = Marcel::Magic.canonical(parse_media_type(declared_type))

# application/octet-stream is treated as an undeclared/missing type,
# allowing the type to be inferred from the filename. If there's no
Expand Down
6 changes: 3 additions & 3 deletions lib/marcel/mime_type/definitions.rb
Original file line number Diff line number Diff line change
Expand Up @@ -32,13 +32,14 @@
Marcel::MimeType.extend "application/vnd.apple.numbers", extensions: %w( numbers ), parents: "application/zip"
Marcel::MimeType.extend "application/vnd.apple.keynote", extensions: %w( key ), parents: "application/zip"

# Upstream aliases to application/x-x509-cert. Override with a ;format=pem subtype.
Marcel::MimeType.extend "application/x-x509-ca-cert", magic: [[0, '-----BEGIN CERTIFICATE-----']], extensions: %w( pem ), parents: "application/x-x509-cert;format=pem"

Marcel::MimeType.extend "audio/aac", extensions: %w( aac ), parents: "audio/x-aac"
Marcel::MimeType.extend("audio/ogg", extensions: %w( ogg oga ), magic: [[0, 'OggS', [[29, 'vorbis']]]])

Marcel::MimeType.extend "image/vnd.dwg", magic: [[0, "AC10"]]

Marcel::MimeType.extend "application/x-x509-ca-cert", magic: [[0, '-----BEGIN CERTIFICATE-----']], extensions: %w( pem ), parents: "application/x-x509-cert;format=pem"

Marcel::MimeType.extend "image/avif", magic: [[4, "ftypavif"]], extensions: %w( avif )
Marcel::MimeType.extend "image/heif", magic: [[4, "ftypmif1"]], extensions: %w( heif )
Marcel::MimeType.extend "image/heic", magic: [[4, "ftypheic"]], extensions: %w( heic )
Expand All @@ -49,7 +50,6 @@
Marcel::MimeType.extend "video/mp4", magic: [[4, "ftypisom"], [4, "ftypM4V "]], extensions: %w( mp4 m4v )

Marcel::MimeType.extend "audio/flac", magic: [[0, 'fLaC']], extensions: %w( flac ), parents: "audio/x-flac"
Marcel::MimeType.extend "audio/x-wav", magic: [[0, 'RIFF', [[8, 'WAVE']]]], extensions: %w( wav ), parents: "audio/vnd.wav"
Marcel::MimeType.extend "audio/mpc", magic: [[0, "MPCKSH"]], extensions: %w( mpc )

Marcel::MimeType.extend "font/ttf", magic: [[0, "\x00\x01\x00\x00"]], extensions: %w( ttf ttc )
Expand Down
143 changes: 143 additions & 0 deletions lib/marcel/tables.rb
Original file line number Diff line number Diff line change
Expand Up @@ -2148,6 +2148,149 @@ module Marcel
'video/x-sgi-movie' => %w(movie),
'x-conference/x-cooltalk' => %w(ice), # Cooltalk Audio
}
TYPE_ALIASES = {
'application/bat' => 'application/x-bat',
'application/x-coreldraw' => 'application/coreldraw',
'application/x-cdr' => 'application/coreldraw',
'application/cdr' => 'application/coreldraw',
'image/x-cdr' => 'application/coreldraw',
'image/cdr' => 'application/coreldraw',
'application/x-setupscript' => 'application/inf',
'application/x-wine-extension-inf' => 'application/inf',
'application/x-javascript' => 'application/javascript',
'text/javascript' => 'application/javascript',
'application/x-java-vm' => 'application/java-vm',
'application/x-java' => 'application/java-vm',
'application/mac-binhex' => 'application/mac-binhex40',
'application/binhex' => 'application/mac-binhex40',
'application/vnd.ms-word' => 'application/msword',
'application/x-ogg' => 'audio/vorbis',
'application/msonenote' => 'application/onenote',
'application/x-pdf' => 'application/pdf',
'application/pgp' => 'application/pgp-encrypted',
'text/rss' => 'application/rss+xml',
'text/rtf' => 'application/rtf',
'application/smil' => 'application/smil+xml',
'application/x-kchart' => 'application/vnd.kde.kchart',
'application/x-kpresenter' => 'application/vnd.kde.kpresenter',
'application/x-kspread' => 'application/vnd.kde.kspread',
'application/x-kword' => 'application/vnd.kde.kword',
'application/x-koan' => 'application/vnd.koan',
'application/x-123' => 'application/vnd.lotus-1-2-3',
'application/x-mif' => 'application/vnd.mif',
'application/x-frame' => 'application/vnd.mif',
'application/msexcel' => 'application/vnd.ms-excel',
'application/mspowerpoint' => 'application/vnd.ms-powerpoint',
'application/ms-tnef' => 'application/vnd.ms-tnef',
'application/oxps' => 'application/vnd.ms-xpsdocument',
'application/x-vnd.oasis.opendocument.chart' => 'application/vnd.oasis.opendocument.chart',
'application/x-vnd.oasis.opendocument.chart-template' => 'application/vnd.oasis.opendocument.chart-template',
'application/vnd.oasis.opendocument.database' => 'application/vnd.oasis.opendocument.base',
'application/x-vnd.oasis.opendocument.formula' => 'application/vnd.oasis.opendocument.formula',
'application/x-vnd.oasis.opendocument.formula-template' => 'application/vnd.oasis.opendocument.formula-template',
'application/x-vnd.oasis.opendocument.graphics' => 'application/vnd.oasis.opendocument.graphics',
'application/x-vnd.oasis.opendocument.graphics-template' => 'application/vnd.oasis.opendocument.graphics-template',
'application/x-vnd.oasis.opendocument.image' => 'application/vnd.oasis.opendocument.image',
'application/x-vnd.oasis.opendocument.image-template' => 'application/vnd.oasis.opendocument.image-template',
'application/x-vnd.oasis.opendocument.presentation' => 'application/vnd.oasis.opendocument.presentation',
'application/x-vnd.oasis.opendocument.presentation-template' => 'application/vnd.oasis.opendocument.presentation-template',
'application/x-vnd.oasis.opendocument.spreadsheet' => 'application/vnd.oasis.opendocument.spreadsheet',
'application/x-vnd.oasis.opendocument.spreadsheet-template' => 'application/vnd.oasis.opendocument.spreadsheet-template',
'application/x-vnd.oasis.opendocument.text' => 'application/vnd.oasis.opendocument.text',
'application/x-vnd.oasis.opendocument.text-master' => 'application/vnd.oasis.opendocument.text-master',
'application/x-vnd.oasis.opendocument.text-template' => 'application/vnd.oasis.opendocument.text-template',
'application/x-vnd.oasis.opendocument.text-web' => 'application/vnd.oasis.opendocument.text-web',
'application/x-vnd.sun.xml.writer' => 'application/vnd.sun.xml.writer',
'application/vnd.ms-visio' => 'application/vnd.visio',
'image/x-targa' => 'image/x-tga',
'application/x-unix-archive' => 'application/x-archive',
'application/x-arj-compressed' => 'application/x-arj',
'application/x-dbm' => 'application/x-berkeley-db',
'application/vnd.debian.binary-package' => 'application/x-debian-package',
'application/x-Gnumeric-spreadsheet' => 'application/x-gnumeric',
'application/x-gzip' => 'application/gzip',
'application/x-gunzip' => 'application/gzip',
'application/gzipped' => 'application/gzip',
'application/gzip-compressed' => 'application/gzip',
'application/x-gzip-compressed' => 'application/gzip',
'gzip/document' => 'application/gzip',
'application/x-windows-installer' => 'application/x-ms-installer',
'application/x-msi' => 'application/x-ms-installer',
'application/x-rar' => 'application/x-rar-compressed',
'text/x-tex' => 'application/x-tex',
'text/x-texinfo' => 'application/x-texinfo',
'application/x-x509-ca-cert' => 'application/x-x509-cert',
'application/x-x509-user-cert' => 'application/x-x509-cert',
'text/xml' => 'application/xml',
'application/x-xml' => 'application/xml',
'text/x-dtd' => 'application/xml-dtd',
'text/xml-external-parsed-entity' => 'application/xml-external-parsed-entity',
'text/xsl' => 'application/xslt+xml',
'application/x-zip-compressed' => 'application/zip',
'application/x-deflate' => 'application/zlib',
'audio/x-m4a' => 'audio/mp4',
'audio/x-mp4a' => 'audio/mp4',
'audio/x-mpeg' => 'audio/mpeg',
'audio/x-ogg-flac' => 'audio/x-oggflac',
'audio/x-ogg-pcm' => 'audio/x-oggpcm',
'application/x-speex' => 'audio/speex',
'audio/aiff' => 'audio/x-aiff',
'audio/x-realaudio' => 'audio/x-pn-realaudio',
'audio/x-wav' => 'audio/vnd.wave',
'audio/wave' => 'audio/vnd.wave',
'audio/wav' => 'audio/vnd.wave',
'image/x-bmp' => 'image/bmp',
'image/x-ms-bmp' => 'image/bmp',
'image/x-emf' => 'image/emf',
'application/x-emf' => 'image/emf',
'application/x-ms-emz' => 'image/x-emf-compressed',
'image/hevc' => 'image/heic',
'image/hevc-sequence' => 'image/heic-sequence',
'video/jpm' => 'image/jpm',
'image/ntf' => 'image/nitf',
'image/x-psd' => 'image/vnd.adobe.photoshop',
'application/photoshop' => 'image/vnd.adobe.photoshop',
'image/x-dwg' => 'image/vnd.dwg',
'application/acad' => 'image/vnd.dwg',
'application/x-acad' => 'image/vnd.dwg',
'application/autocad_dwg' => 'image/vnd.dwg',
'application/dwg' => 'image/vnd.dwg',
'application/x-dwg' => 'image/vnd.dwg',
'application/x-autocad' => 'image/vnd.dwg',
'drawing/dwg' => 'image/vnd.dwg',
'image/x-icon' => 'image/vnd.microsoft.icon',
'image/x-dcx' => 'image/vnd.zbrush.dcx',
'image/x-pcx' => 'image/vnd.zbrush.pcx',
'image/x-pc-paintbrush' => 'image/vnd.zbrush.pcx',
'image/x-wmf' => 'image/wmf',
'application/x-msmetafile' => 'image/wmf',
'image/x-jb2' => 'image/x-jbig2',
'image/xcf' => 'image/x-xcf',
'application/x-mimearchive' => 'multipart/related',
'message/rfc2557' => 'multipart/related',
'drawing/x-dwf' => 'model/vnd.dwf',
'text/x-asm' => 'text/x-assembly',
'application/x-troff' => 'text/troff',
'application/x-troff-man' => 'text/troff',
'application/x-troff-me' => 'text/troff',
'application/x-troff-ms' => 'text/troff',
'text/x-c' => 'text/x-csrc',
'text/x-java' => 'text/x-java-source',
'text/x-properties' => 'text/x-java-properties',
'text/properties' => 'text/x-java-properties',
'application/x-httpd-jsp' => 'text/x-jsp',
'application/matlab-mat' => 'application/x-matlab-data',
'application/x-tcl' => 'text/x-tcl',
'video/x-daala' => 'video/daala',
'video/x-theora' => 'video/theora',
'video/x-ogg-uvs' => 'video/x-ogguvs',
'video/x-ogg-yuv' => 'video/x-oggyuv',
'video/x-ogg-rgb' => 'video/x-oggrgb',
'video/avi' => 'video/x-msvideo',
'video/msvideo' => 'video/x-msvideo',
'application/font-woff' => 'font/woff',
'application/font-woff2' => 'font/woff2',
}
TYPE_PARENTS = {
'application/bizagi-modeler' => %w(application/zip),
'application/dash+xml' => %w(application/xml),
Expand Down
7 changes: 7 additions & 0 deletions script/generate_tables.rb
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,7 @@ def get_matches(mime, parent)

extensions = {}
types = {}
aliases = {}
magics = []

ARGV.each do |path|
Expand All @@ -137,6 +138,7 @@ def get_matches(mime, parent)
(doc/'mime-info/mime-type').each do |mime|
comments = Hash[*(mime/'_comment').map {|comment| [comment['xml:lang'], comment.inner_text] }.flatten]
type = mime['type']
(mime/'alias').each { |x| aliases[x['type']] = type }
subclass = (mime/'sub-class-of').map{|x| x['type']}
exts = (mime/'glob').map{|x| x['pattern'] =~ /^\*\.([^\[\]]+)$/ ? $1.downcase : nil }.compact
(mime/'magic').each do |magic|
Expand Down Expand Up @@ -222,6 +224,11 @@ def get_matches(mime, parent)
puts " '#{key}' => %w(#{exts}),#{comment}"
end
puts " }"
puts " TYPE_ALIASES = {"
aliases.each do |aliased, type|
puts " '#{aliased}' => '#{type}',"
end
puts " }"
puts " TYPE_PARENTS = {"
types.keys.sort.each do |key|
parents = types[key][1].sort.join(' ')
Expand Down
5 changes: 5 additions & 0 deletions test/declared_type_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -19,4 +19,9 @@ class Marcel::MimeType::DeclaredTypeTest < Marcel::TestCase
test "ignores charset declarations" do
assert_equal "text/html", Marcel::MimeType.for(declared_type: "text/html; charset=utf-8")
end

test "resolves declared type to a canonical MIME type" do
aliased, canonical = Marcel::TYPE_ALIASES.first
assert_equal canonical, Marcel::MimeType.for(declared_type: aliased)
end
end
File renamed without changes.
File renamed without changes.
5 changes: 5 additions & 0 deletions test/illustrator_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,11 @@
require 'rack'

class Marcel::MimeType::IllustratorTest < Marcel::TestCase
test ".ai uploaded as application/illustrator" do
file = files("name/application/illustrator/illustrator.ai")
assert_equal "application/illustrator", Marcel::MimeType.for(file, name: "illustrator.ai", declared_type: "application/illustrator")
end

test ".ai uploaded as application/postscript" do
file = files("name/application/illustrator/illustrator.ai")
assert_equal "application/illustrator", Marcel::MimeType.for(file, name: "illustrator.ai", declared_type: "application/postscript")
Expand Down
8 changes: 7 additions & 1 deletion test/magic_and_declared_type_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,14 @@

class Marcel::MimeType::MagicAndDeclaredTypeTest < Marcel::TestCase
each_content_type_fixture('name') do |file, name, content_type|
test "correctly returns #{content_type} for #{name} given both file and declared type" do
test "detects #{content_type} given magic bytes from #{name} and declared type" do
assert_equal content_type, Marcel::MimeType.for(file, declared_type: content_type)
end

ALIASED[content_type].each do |aliased|
test "detects #{content_type} given magic bytes from #{name} and aliased type #{aliased}" do
assert_equal content_type, Marcel::MimeType.for(file, declared_type: aliased)
end
end
end
end
2 changes: 1 addition & 1 deletion test/magic_and_name_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ class Marcel::MimeType::MagicAndNameTest < Marcel::TestCase
# the file contents and the name. In some cases, the file contents will point to a
# generic type, while the name will choose a more specific subclass
each_content_type_fixture('name') do |file, name, content_type|
test "correctly returns #{content_type} for #{name} given both file and name" do
test "detects #{content_type} given filename #{name} and its magic bytes" do
assert_equal content_type, Marcel::MimeType.for(file, name: name)
end
end
Expand Down
Loading

0 comments on commit 16a9ef7

Please sign in to comment.