Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve performance #40

Merged
merged 3 commits into from
Jul 25, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 10 additions & 21 deletions lib/mimemagic.rb
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
require 'mimemagic/tables'
require 'mimemagic/version'

require 'stringio'

# Mime type detection
class MimeMagic
attr_reader :type, :mediatype, :subtype
Expand Down Expand Up @@ -110,41 +112,28 @@ def self.magic_match(io, method)
if io.respond_to?(:seek) && io.respond_to?(:read)
io.binmode
io.set_encoding(Encoding::BINARY) if io.respond_to?(:set_encoding)
MAGIC.send(method) { |type, matches| magic_match_io(io, matches) }
buffer = "".force_encoding(Encoding::BINARY)
MAGIC.send(method) { |type, matches| magic_match_io(io, matches, buffer) }
else
str = io.respond_to?(:read) ? io.read : io.to_s
str = str.force_encoding(Encoding::BINARY) if str.respond_to?(:force_encoding)
MAGIC.send(method) { |type, matches| magic_match_str(str, matches) }
magic_match(StringIO.new(str), method)
end
end

def self.magic_match_io(io, matches)
def self.magic_match_io(io, matches, buffer)
matches.any? do |offset, value, children|
match =
if Range === offset
io.seek(offset.begin)
x = io.read(offset.end - offset.begin + value.bytesize)
x = io.read(offset.end - offset.begin + value.bytesize, buffer)
x && x.include?(value)
else
io.seek(offset)
io.read(value.bytesize) == value
end
match && (!children || magic_match_io(io, children))
end
end

def self.magic_match_str(str, matches)
matches.any? do |offset, value, children|
match =
if Range === offset
x = str[offset.begin, offset.end - offset.begin + value.bytesize]
x && x.include?(value)
else
str[offset, value.bytesize] == value
io.read(value.bytesize, buffer) == value
end
match && (!children || magic_match_str(str, children))
match && (!children || magic_match_io(io, children, buffer))
end
end

private_class_method :magic_match, :magic_match_io, :magic_match_str
private_class_method :magic_match, :magic_match_io
end
54 changes: 27 additions & 27 deletions lib/mimemagic/tables.rb
Original file line number Diff line number Diff line change
Expand Up @@ -1498,6 +1498,30 @@ class MimeMagic
# @private
# :nodoc:
MAGIC = [
['image/jpeg', [[0, "\377\330\377"], [0, "\377\330"]]],
['image/png', [[0, "\211PNG"]]],
['image/gif', [[0, 'GIF8']]],
['image/tiff', [[0, "MM\000*"], [0, "II*\000"]]],
['image/bmp', [[0, 'BM', [[14, "\f"], [14, '@'], [14, '(']]]]],
['image/vnd.adobe.photoshop', []],
['image/webp', [[0, 'RIFF', [[8, 'WEBP']]]]],
['image/svg+xml', [[0..256, '<!DOCTYPE svg'], [0..256, '<svg']]],
['video/x-msvideo', [[0, 'RIFF', [[8, 'AVI ']]], [0, 'AVF0', [[8, 'AVI ']]]]],
['video/mp4', [[4, 'ftypisom'], [4, 'ftypmp42'], [4, 'ftypMSNV'], [4, 'ftypM4V '], [4, 'ftypf4v ']]],
['video/quicktime', [[12, 'mdat'], [4, 'mdat'], [4, 'moov'], [4, 'ftypqt']]],
['video/mpeg', [[0, "G?\377\020"], [0, "\000\000\001\263"], [0, "\000\000\001\272"]]],
['video/ogg', [[0, 'OggS']]],
['video/webm', [[0, "\032E\337\243", [[5..65, "B\202", [[8..75, 'webm']]]]]]],
['video/x-flv', [[0, 'FLV']]],
['audio/mpeg', [[0, "\377\373"], [0, 'ID3']]],
['audio/x-wav', [[8, 'WAVE'], [8, 'WAV ']]],
['audio/aac', [[0, 'ADIF']]],
['audio/flac', [[0, 'fLaC']]],
['audio/mp4', [[4, 'ftypM4A']]],
['audio/ogg', [[0, 'OggS']]],
['application/pdf', [[0..1024, '%PDF-']]],
['application/msword', [[0, "1\276\000\000"], [0, 'PO^Q`'], [0, "\3767\000#"], [0, "\333\245-\000\000\000"], [2112, 'MSWordDoc'], [2108, 'MSWordDoc'], [2112, 'Microsoft Word document data'], [546, 'bjbj'], [546, 'jbjb']]],
['application/vnd.ms-excel', [[2080, 'Microsoft Excel 5.0 Worksheet']]],
['application/vnd.stardivision.writer', [[2089, 'StarWriter']]],
['application/x-docbook+xml', [[0, '<?xml', [[0..100, '-//OASIS//DTD DocBook XML'], [0..100, '-//KDE//DTD DocBook XML']]]]],
['image/x-eps', [[0, '%!', [[15, 'EPS']]], [0, "\004%!", [[16, 'EPS']]], [0, "\305\320\323\306"]]],
Expand All @@ -1515,7 +1539,6 @@ class MimeMagic
['audio/x-opus+ogg', [[0, 'OggS', [[28, 'OpusHead']]]]],
['audio/x-speex+ogg', [[0, 'OggS', [[28, 'Speex ']]]]],
['audio/x-vorbis+ogg', [[0, 'OggS', [[28, "\001vorbis"]]]]],
['image/svg+xml', [[0..256, '<!DOCTYPE svg'], [0..256, '<svg']]],
['image/x-kodak-kdc', [[242, 'EASTMAN KODAK COMPANY']]],
['image/x-niff', [[0, 'IIN1']]],
['text/x-qml', [[0..256, 'import Qt ']]],
Expand All @@ -1525,7 +1548,6 @@ class MimeMagic
['application/rss+xml', [[0..256, '<rss '], [0..256, '<RSS ']]],
['application/vnd.apple.mpegurl', [[0, '#EXTM3U', [[0..128, '#EXT-X-TARGETDURATION'], [0..128, '#EXT-X-STREAM-INF']]]]],
['text/x-opml+xml', [[0..256, '<opml ']]],
['application/msword', [[0, "1\276\000\000"], [0, 'PO^Q`'], [0, "\3767\000#"], [0, "\333\245-\000\000\000"], [2112, 'MSWordDoc'], [2108, 'MSWordDoc'], [2112, 'Microsoft Word document data'], [546, 'bjbj'], [546, 'jbjb']]],
['application/vnd.ms-cab-compressed', [[0, "MSCF\000\000\000\000"]]],
['application/vnd.ms-wpl', [[0..256, '<?wpl']]],
['application/x-7z-compressed', [[0, "7z\274\257'\034"]]],
Expand Down Expand Up @@ -1576,7 +1598,6 @@ class MimeMagic
['application/mxf', [[0..256, "\006\016+4\002\005\001\001\r\001\002\001\001\002"]]],
['application/ogg', [[0, 'OggS']]],
['application/owl+xml', [[0..256, '<Ontology']]],
['application/pdf', [[0..1024, '%PDF-']]],
['application/pgp-encrypted', [[0, '-----BEGIN PGP MESSAGE-----']]],
['application/pgp-keys', [[0, '-----BEGIN PGP PUBLIC KEY BLOCK-----'], [0, '-----BEGIN PGP PRIVATE KEY BLOCK-----'], [0, "\225\001"], [0, "\225\000"], [0, "\231\000"], [0, "\231\001"]]],
['application/pgp-signature', [[0, '-----BEGIN PGP SIGNATURE-----']]],
Expand All @@ -1591,7 +1612,6 @@ class MimeMagic
['application/vnd.lotus-wordpro', [[0, 'WordPro']]],
['application/vnd.ms-access', [[0, "\000\001\000\000Standard Jet DB"]]],
['application/vnd.ms-asf', [[0, "0&\262u"], [0, '[Reference]']]],
['application/vnd.ms-excel', [[2080, 'Microsoft Excel 5.0 Worksheet']]],
['application/vnd.ms-tnef', [[0, "x\237>\""]]],
['application/vnd.oasis.opendocument.chart', [[0, "PK\003\004", [[30, 'mimetype', [[38, 'application/vnd.oasis.opendocument.chart']]]]]]],
['application/vnd.oasis.opendocument.chart-template', [[0, "PK\003\004", [[30, 'mimetype', [[38, 'application/vnd.oasis.opendocument.chart-template']]]]]]],
Expand Down Expand Up @@ -1731,14 +1751,9 @@ class MimeMagic
['application/xspf+xml', [[0..64, "<playlist version=\"1"], [0..64, "<playlist version='1"]]],
['audio/AMR', [[0, "#!AMR\n"], [0, "#!AMR_MC1.0\n"]]],
['audio/AMR-WB', [[0, "#!AMR-WB\n"], [0, "#!AMR-WB_MC1.0\n"]]],
['audio/aac', [[0, 'ADIF']]],
['audio/ac3', [[0, "\vw"]]],
['audio/annodex', [[0, 'OggS', [[28, "fishead\000", [[56..512, "CMML\000\000\000\000"]]]]]]],
['audio/flac', [[0, 'fLaC']]],
['audio/midi', [[0, 'MThd']]],
['audio/mp4', [[4, 'ftypM4A']]],
['audio/mpeg', [[0, "\377\373"], [0, 'ID3']]],
['audio/ogg', [[0, 'OggS']]],
['audio/prs.sid', [[0, 'PSID']]],
['audio/vnd.dts', [[0, "\177\376\200\001"], [0, "\200\001\177\376"], [0, "\037\377\350\000"], [0, "\350\000\037\377"]]],
['audio/x-adpcm', [[0, '.snd', [[12, "\000\000\000\027"]]], [0, ".sd\000", [[12, "\001\000\000\000"], [12, "\002\000\000\000"], [12, "\003\000\000\000"], [12, "\004\000\000\000"], [12, "\005\000\000\000"], [12, "\006\000\000\000"], [12, "\a\000\000\000"], [12, "\027\000\000\000"]]]]],
Expand All @@ -1758,29 +1773,21 @@ class MimeMagic
['audio/x-speex', [[0, 'Speex']]],
['audio/x-stm', [[20, "!Scream!\032"], [20, "!SCREAM!\032"], [20, "BMOD2STM\032"]]],
['audio/x-tta', [[0, 'TTA1']]],
['audio/x-wav', [[8, 'WAVE'], [8, 'WAV ']]],
['audio/x-wavpack', [[0, 'wvpk']]],
['audio/x-wavpack-correction', [[0, 'wvpk']]],
['audio/x-xi', [[0, 'Extended Instrument:']]],
['audio/x-xm', [[0, 'Extended Module:']]],
['audio/x-xmf', [[0, 'XMF_'], [0, "XMF_2.00\000\000\000\002"]]],
['image/bmp', [[0, 'BM', [[14, "\f"], [14, '@'], [14, '(']]]]],
['image/dpx', [[0, 'SDPX']]],
['image/fits', [[0, 'SIMPLE =']]],
['image/gif', [[0, 'GIF8']]],
['image/jp2', [[0, "\377O\377Q\000"], [3, "\fjP "], [20, 'jp2']]],
['image/jpeg', [[0, "\377\330\377"], [0, "\377\330"]]],
['image/openraster', [[0, "PK\003\004", [[30, 'mimetype', [[38, 'image/openraster']]]]]]],
['image/png', [[0, "\211PNG"]]],
['image/tiff', [[0, "MM\000*"], [0, "II*\000"]]],
['image/vnd.adobe.photoshop', []],
['image/vnd.djvu', [[0, 'AT&TFORM', [[12, 'DJVU']]], [0, 'FORM', [[8, 'DJVU']]]]],
['image/vnd.djvu+multipage', [[0, 'AT&TFORM', [[12, 'DJVM']]], [0, 'FORM', [[8, 'DJVM']]]]],
['image/vnd.dxf', [[0..64, "\nHEADER\n"], [0..64, "\r\nHEADER\r\n"]]],
['image/vnd.microsoft.icon', [[0, "\000\000\001\000", [[5, "\000"]]]]],
['image/vnd.ms-modi', [[0, "EP*\000"]]],
['image/vnd.zbrush.pcx', [[0, "\n", [[1, "\000"], [1, "\002"], [1, "\003"], [1, "\005"]]]]],
['image/webp', [[0, 'RIFF', [[8, 'WEBP']]]]],
['image/x-applix-graphics', [[0, '*BEGIN', [[7, 'GRAPHICS']]]]],
['image/x-canon-crw', [[0, "II\032\000\000\000HEAPCCDR"]]],
['image/x-dds', [[0, 'DDS']]],
Expand All @@ -1795,8 +1802,8 @@ class MimeMagic
['image/x-olympus-orf', [[0, "IIRO\b\000\000\000"]]],
['image/x-panasonic-raw', [[0, "IIU\000\b\000\000\000"]]],
['image/x-panasonic-raw2', [[0, "IIU\000\030\000\000\000"]]],
['image/x-pict', [[10, "\000\021", [[12, "\002\377", [[14, "\f\000", [[16, "\377\376"]]]]]]]]],
['image/x-pict', [[522, "\000\021", [[524, "\002\377", [[526, "\f\000", [[528, "\377\376"]]]]]]]]],
['image/x-pict', [[10, "\000\021", [[12, "\002\377", [[14, "\f\000", [[16, "\377\376"]]]]]]]]],
['image/x-portable-bitmap', [[0, 'P1', [[2, "\n"], [2, ' '], [2, "\t"], [2, "\r"]]], [0, 'P4', [[2, "\n"], [2, ' '], [2, "\t"], [2, "\r"]]]]],
['image/x-portable-graymap', [[0, 'P2', [[2, "\n"], [2, ' '], [2, "\t"], [2, "\r"]]], [0, 'P5', [[2, "\n"], [2, ' '], [2, "\t"], [2, "\r"]]]]],
['image/x-portable-pixmap', [[0, 'P3', [[2, "\n"], [2, ' '], [2, "\t"], [2, "\r"]]], [0, 'P6', [[2, "\n"], [2, ' '], [2, "\t"], [2, "\r"]]]]],
Expand Down Expand Up @@ -1835,10 +1842,10 @@ class MimeMagic
['text/x-makefile', [[0, '#!/usr/bin/make'], [0, '#! /usr/bin/make']]],
['text/x-matlab', [[0, 'function']]],
['text/x-microdvd', [[0, '{1}'], [0, '{0}'], [0..6, '}{']]],
['text/x-modelica', [[0, 'function']]],
['text/x-modelica', [[0, 'class']]],
['text/x-modelica', [[0, 'model']]],
['text/x-modelica', [[0, 'record']]],
['text/x-modelica', [[0, 'model']]],
['text/x-modelica', [[0, 'function']]],
['text/x-mpsub', [[0..256, 'FORMAT=']]],
['text/x-mrml', [[0, '<mrml ']]],
['text/x-ms-regedit', [[0, 'REGEDIT'], [0, 'Windows Registry Editor Version 5.00'], [0, "\377\376W\000i\000n\000d\000o\000w\000s\000 \000R\000e\000g\000i\000s\000t\000r\000y\000 \000E\000d\000i\000t\000o\000r\000"]]],
Expand All @@ -1856,16 +1863,9 @@ class MimeMagic
['video/annodex', [[0, 'OggS', [[28, "fishead\000", [[56..512, "CMML\000\000\000\000"]]]]]]],
['video/dv', []],
['video/mp2t', []],
['video/mp4', [[4, 'ftypisom'], [4, 'ftypmp42'], [4, 'ftypMSNV'], [4, 'ftypM4V '], [4, 'ftypf4v ']]],
['video/mpeg', [[0, "G?\377\020"], [0, "\000\000\001\263"], [0, "\000\000\001\272"]]],
['video/ogg', [[0, 'OggS']]],
['video/quicktime', [[12, 'mdat'], [4, 'mdat'], [4, 'moov'], [4, 'ftypqt']]],
['video/vnd.mpegurl', [[0, '#EXTM4U']]],
['video/webm', [[0, "\032E\337\243", [[5..65, "B\202", [[8..75, 'webm']]]]]]],
['video/x-flic', [[0, "\021\257"], [0, "\022\257"]]],
['video/x-flv', [[0, 'FLV']]],
['video/x-mng', [[0, "\212MNG\r\n\032\n"]]],
['video/x-msvideo', [[0, 'RIFF', [[8, 'AVI ']]], [0, 'AVF0', [[8, 'AVI ']]]]],
['video/x-nsv', [[0, 'NSVf']]],
['video/x-sgi-movie', [[0, 'MOVI']]],
['x-epoc/x-sisx-app', [[0, "z\032 \020"]]],
Expand Down
42 changes: 42 additions & 0 deletions script/generate-mime.rb
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,48 @@ def get_matches(parent)

magics = magics.sort {|a,b| [-a[0],a[1]] <=> [-b[0],b[1]] }

common_types = [
"image/jpeg", # .jpg
"image/png", # .png
"image/gif", # .gif
"image/tiff", # .tiff
"image/bmp", # .bmp
"image/vnd.adobe.photoshop", # .psd
"image/webp", # .webp
"image/svg+xml", # .svg

"video/x-msvideo", # .avi
"video/x-ms-wmv", # .wmv
"video/mp4", # .mp4, .m4v
"video/quicktime", # .mov
"video/mpeg", # .mpeg
"video/ogg", # .ogv
"video/webm", # .webm
"video/x-matroska", # .mkv
"video/x-flv", # .flv

"audio/mpeg", # .mp3
"audio/x-wav", # .wav
"audio/aac", # .aac
"audio/flac", # .flac
"audio/mp4", # .m4a
"audio/ogg", # .ogg

"application/pdf", # .pdf
"application/msword", # .doc
"application/vnd.openxmlformats-officedocument.wordprocessingml.document", # .docx
"application/vnd.ms-powerpoint", # .pps
"application/vnd.openxmlformats-officedocument.presentationml.slideshow", # .ppsx
"application/vnd.ms-excel", # .pps
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", # .ppsx
]

common_magics = common_types.map do |common_type|
magics.find { |_, type, _| type == common_type }
end

magics = (common_magics.compact + magics).uniq

puts "# -*- coding: binary -*-"
puts "# Generated from #{FILE}"
puts "class MimeMagic"
Expand Down