aboutsummaryrefslogtreecommitdiff
path: root/modules/typesniffer/typesniffer.go
diff options
context:
space:
mode:
Diffstat (limited to 'modules/typesniffer/typesniffer.go')
-rw-r--r--modules/typesniffer/typesniffer.go31
1 files changed, 25 insertions, 6 deletions
diff --git a/modules/typesniffer/typesniffer.go b/modules/typesniffer/typesniffer.go
index c9fef953c..7887fd42b 100644
--- a/modules/typesniffer/typesniffer.go
+++ b/modules/typesniffer/typesniffer.go
@@ -4,6 +4,7 @@
package typesniffer
import (
+ "bytes"
"fmt"
"io"
"net/http"
@@ -24,8 +25,9 @@ const (
)
var (
- svgTagRegex = regexp.MustCompile(`(?si)\A\s*(?:(<!--.*?-->|<!DOCTYPE\s+svg([\s:]+.*?>|>))\s*)*<svg[\s>\/]`)
- svgTagInXMLRegex = regexp.MustCompile(`(?si)\A<\?xml\b.*?\?>\s*(?:(<!--.*?-->|<!DOCTYPE\s+svg([\s:]+.*?>|>))\s*)*<svg[\s>\/]`)
+ svgComment = regexp.MustCompile(`(?s)<!--.*?-->`)
+ svgTagRegex = regexp.MustCompile(`(?si)\A\s*(?:(<!DOCTYPE\s+svg([\s:]+.*?>|>))\s*)*<svg\b`)
+ svgTagInXMLRegex = regexp.MustCompile(`(?si)\A<\?xml\b.*?\?>\s*(?:(<!DOCTYPE\s+svg([\s:]+.*?>|>))\s*)*<svg\b`)
)
// SniffedType contains information about a blobs type.
@@ -91,10 +93,27 @@ func DetectContentType(data []byte) SniffedType {
data = data[:sniffLen]
}
- if (strings.Contains(ct, "text/plain") || strings.Contains(ct, "text/html")) && svgTagRegex.Match(data) ||
- strings.Contains(ct, "text/xml") && svgTagInXMLRegex.Match(data) {
- // SVG is unsupported. https://github.com/golang/go/issues/15888
- ct = SvgMimeType
+ // SVG is unsupported by http.DetectContentType, https://github.com/golang/go/issues/15888
+
+ detectByHTML := strings.Contains(ct, "text/plain") || strings.Contains(ct, "text/html")
+ detectByXML := strings.Contains(ct, "text/xml")
+ if detectByHTML || detectByXML {
+ dataProcessed := svgComment.ReplaceAll(data, nil)
+ dataProcessed = bytes.TrimSpace(dataProcessed)
+ if detectByHTML && svgTagRegex.Match(dataProcessed) ||
+ detectByXML && svgTagInXMLRegex.Match(dataProcessed) {
+ ct = SvgMimeType
+ }
+ }
+
+ if strings.HasPrefix(ct, "audio/") && bytes.HasPrefix(data, []byte("ID3")) {
+ // The MP3 detection is quite inaccurate, any content with "ID3" prefix will result in "audio/mpeg".
+ // So remove the "ID3" prefix and detect again, if result is text, then it must be text content.
+ // This works especially because audio files contain many unprintable/invalid characters like `0x00`
+ ct2 := http.DetectContentType(data[3:])
+ if strings.HasPrefix(ct2, "text/") {
+ ct = ct2
+ }
}
return SniffedType{ct}