diff options
Diffstat (limited to 'modules')
-rw-r--r-- | modules/typesniffer/typesniffer.go | 21 | ||||
-rw-r--r-- | modules/typesniffer/typesniffer_test.go | 25 |
2 files changed, 39 insertions, 7 deletions
diff --git a/modules/typesniffer/typesniffer.go b/modules/typesniffer/typesniffer.go index c9fef953c..5b215496b 100644 --- a/modules/typesniffer/typesniffer.go +++ b/modules/typesniffer/typesniffer.go @@ -4,6 +4,7 @@ package typesniffer import ( + "bytes" "fmt" "io" "net/http" @@ -24,8 +25,9 @@ const ( ) var ( - svgTagRegex = regexp.MustCompile(`(?si)\A\s*(?:(<!--.*?-->|<!DOCTYPE\s+svg([\s:]+.*?>|>))\s*)*<svg[\s>\/]`) - svgTagInXMLRegex = regexp.MustCompile(`(?si)\A<\?xml\b.*?\?>\s*(?:(<!--.*?-->|<!DOCTYPE\s+svg([\s:]+.*?>|>))\s*)*<svg[\s>\/]`) + svgComment = regexp.MustCompile(`(?s)<!--.*?-->`) + svgTagRegex = regexp.MustCompile(`(?si)\A\s*(?:(<!DOCTYPE\s+svg([\s:]+.*?>|>))\s*)*<svg\b`) + svgTagInXMLRegex = regexp.MustCompile(`(?si)\A<\?xml\b.*?\?>\s*(?:(<!DOCTYPE\s+svg([\s:]+.*?>|>))\s*)*<svg\b`) ) // SniffedType contains information about a blobs type. @@ -91,10 +93,17 @@ func DetectContentType(data []byte) SniffedType { data = data[:sniffLen] } - if (strings.Contains(ct, "text/plain") || strings.Contains(ct, "text/html")) && svgTagRegex.Match(data) || - strings.Contains(ct, "text/xml") && svgTagInXMLRegex.Match(data) { - // SVG is unsupported. https://github.com/golang/go/issues/15888 - ct = SvgMimeType + // SVG is unsupported by http.DetectContentType, https://github.com/golang/go/issues/15888 + + detectByHTML := strings.Contains(ct, "text/plain") || strings.Contains(ct, "text/html") + detectByXML := strings.Contains(ct, "text/xml") + if detectByHTML || detectByXML { + dataProcessed := svgComment.ReplaceAll(data, nil) + dataProcessed = bytes.TrimSpace(dataProcessed) + if detectByHTML && svgTagRegex.Match(dataProcessed) || + detectByXML && svgTagInXMLRegex.Match(dataProcessed) { + ct = SvgMimeType + } } return SniffedType{ct} diff --git a/modules/typesniffer/typesniffer_test.go b/modules/typesniffer/typesniffer_test.go index dbce94fc3..2bafdffd1 100644 --- a/modules/typesniffer/typesniffer_test.go +++ b/modules/typesniffer/typesniffer_test.go @@ -28,7 +28,6 @@ func TestIsSvgImage(t *testing.T) { assert.True(t, DetectContentType([]byte("<svg></svg>")).IsSvgImage()) assert.True(t, DetectContentType([]byte(" <svg></svg>")).IsSvgImage()) assert.True(t, DetectContentType([]byte(`<svg width="100"></svg>`)).IsSvgImage()) - assert.True(t, DetectContentType([]byte("<svg/>")).IsSvgImage()) assert.True(t, DetectContentType([]byte(`<?xml version="1.0" encoding="UTF-8"?><svg></svg>`)).IsSvgImage()) assert.True(t, DetectContentType([]byte(`<!-- Comment --> <svg></svg>`)).IsSvgImage()) @@ -57,6 +56,10 @@ func TestIsSvgImage(t *testing.T) { <!-- Multline Comment --> <svg></svg>`)).IsSvgImage()) + + // the DetectContentType should work for incomplete data, because only beginning bytes are used for detection + assert.True(t, DetectContentType([]byte(`<svg>....`)).IsSvgImage()) + assert.False(t, DetectContentType([]byte{}).IsSvgImage()) assert.False(t, DetectContentType([]byte("svg")).IsSvgImage()) assert.False(t, DetectContentType([]byte("<svgfoo></svgfoo>")).IsSvgImage()) @@ -68,6 +71,26 @@ func TestIsSvgImage(t *testing.T) { assert.False(t, DetectContentType([]byte(`<?xml version="1.0" encoding="UTF-8"?> <!-- <svg></svg> inside comment --> <foo></foo>`)).IsSvgImage()) + + assert.False(t, DetectContentType([]byte(` +<!-- comment1 --> +<div> + <!-- comment2 --> + <svg></svg> +</div> +`)).IsSvgImage()) + + assert.False(t, DetectContentType([]byte(` +<!-- comment1 +--> +<div> + <!-- comment2 +--> + <svg></svg> +</div> +`)).IsSvgImage()) + assert.False(t, DetectContentType([]byte(`<html><body><!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd"><svg></svg></body></html>`)).IsSvgImage()) + assert.False(t, DetectContentType([]byte(`<html><body><?xml version="1.0" encoding="UTF-8"?><svg></svg></body></html>`)).IsSvgImage()) } func TestIsPDF(t *testing.T) { |