diff options
Diffstat (limited to 'modules/typesniffer/typesniffer.go')
-rw-r--r-- | modules/typesniffer/typesniffer.go | 31 |
1 files changed, 25 insertions, 6 deletions
diff --git a/modules/typesniffer/typesniffer.go b/modules/typesniffer/typesniffer.go index c9fef953c..7887fd42b 100644 --- a/modules/typesniffer/typesniffer.go +++ b/modules/typesniffer/typesniffer.go @@ -4,6 +4,7 @@ package typesniffer import ( + "bytes" "fmt" "io" "net/http" @@ -24,8 +25,9 @@ const ( ) var ( - svgTagRegex = regexp.MustCompile(`(?si)\A\s*(?:(<!--.*?-->|<!DOCTYPE\s+svg([\s:]+.*?>|>))\s*)*<svg[\s>\/]`) - svgTagInXMLRegex = regexp.MustCompile(`(?si)\A<\?xml\b.*?\?>\s*(?:(<!--.*?-->|<!DOCTYPE\s+svg([\s:]+.*?>|>))\s*)*<svg[\s>\/]`) + svgComment = regexp.MustCompile(`(?s)<!--.*?-->`) + svgTagRegex = regexp.MustCompile(`(?si)\A\s*(?:(<!DOCTYPE\s+svg([\s:]+.*?>|>))\s*)*<svg\b`) + svgTagInXMLRegex = regexp.MustCompile(`(?si)\A<\?xml\b.*?\?>\s*(?:(<!DOCTYPE\s+svg([\s:]+.*?>|>))\s*)*<svg\b`) ) // SniffedType contains information about a blobs type. @@ -91,10 +93,27 @@ func DetectContentType(data []byte) SniffedType { data = data[:sniffLen] } - if (strings.Contains(ct, "text/plain") || strings.Contains(ct, "text/html")) && svgTagRegex.Match(data) || - strings.Contains(ct, "text/xml") && svgTagInXMLRegex.Match(data) { - // SVG is unsupported. https://github.com/golang/go/issues/15888 - ct = SvgMimeType + // SVG is unsupported by http.DetectContentType, https://github.com/golang/go/issues/15888 + + detectByHTML := strings.Contains(ct, "text/plain") || strings.Contains(ct, "text/html") + detectByXML := strings.Contains(ct, "text/xml") + if detectByHTML || detectByXML { + dataProcessed := svgComment.ReplaceAll(data, nil) + dataProcessed = bytes.TrimSpace(dataProcessed) + if detectByHTML && svgTagRegex.Match(dataProcessed) || + detectByXML && svgTagInXMLRegex.Match(dataProcessed) { + ct = SvgMimeType + } + } + + if strings.HasPrefix(ct, "audio/") && bytes.HasPrefix(data, []byte("ID3")) { + // The MP3 detection is quite inaccurate, any content with "ID3" prefix will result in "audio/mpeg". + // So remove the "ID3" prefix and detect again, if result is text, then it must be text content. + // This works especially because audio files contain many unprintable/invalid characters like `0x00` + ct2 := http.DetectContentType(data[3:]) + if strings.HasPrefix(ct2, "text/") { + ct = ct2 + } } return SniffedType{ct} |