diff options
| -rw-r--r-- | lib/pleroma/html.ex | 135 | ||||
| -rw-r--r-- | test/emoji/formatter_test.exs | 2 | ||||
| -rw-r--r-- | test/html_test.exs | 44 | ||||
| -rw-r--r-- | test/web/activity_pub/mrf/normalize_markup_test.exs | 10 | ||||
| -rw-r--r-- | test/web/common_api/common_api_test.exs | 4 | 
5 files changed, 98 insertions, 97 deletions
diff --git a/lib/pleroma/html.ex b/lib/pleroma/html.ex index 937bafed5..fd0495049 100644 --- a/lib/pleroma/html.ex +++ b/lib/pleroma/html.ex @@ -3,7 +3,6 @@  # SPDX-License-Identifier: AGPL-3.0-only  defmodule Pleroma.HTML do -  alias HtmlSanitizeEx.Scrubber    defp get_scrubbers(scrubber) when is_atom(scrubber), do: [scrubber]    defp get_scrubbers(scrubbers) when is_list(scrubbers), do: scrubbers @@ -24,9 +23,13 @@ defmodule Pleroma.HTML do      end)    end -  def filter_tags(html, scrubber), do: Scrubber.scrub(html, scrubber) +  def filter_tags(html, scrubber) do +    {:ok, content} = FastSanitize.Sanitizer.scrub(html, scrubber) +    content +  end +    def filter_tags(html), do: filter_tags(html, nil) -  def strip_tags(html), do: Scrubber.scrub(html, Scrubber.StripTags) +  def strip_tags(html), do: filter_tags(html, FastSanitize.Sanitizer.StripTags)    def get_cached_scrubbed_html_for_activity(          content, @@ -36,7 +39,6 @@ defmodule Pleroma.HTML do          callback \\ fn x -> x end        ) do      key = "#{key}#{generate_scrubber_signature(scrubbers)}|#{activity.id}" -      Cachex.fetch!(:scrubber_cache, key, fn _key ->        object = Pleroma.Object.normalize(activity)        ensure_scrubbed_html(content, scrubbers, object.data["fake"] || false, callback) @@ -46,7 +48,7 @@ defmodule Pleroma.HTML do    def get_cached_stripped_html_for_activity(content, activity, key) do      get_cached_scrubbed_html_for_activity(        content, -      HtmlSanitizeEx.Scrubber.StripTags, +      FastSanitize.Sanitizer.StripTags,        activity,        key,        &HtmlEntities.decode/1 @@ -109,13 +111,12 @@ defmodule Pleroma.HTML.Scrubber.TwitterText do    require HtmlSanitizeEx.Scrubber.Meta    alias HtmlSanitizeEx.Scrubber.Meta -  Meta.remove_cdata_sections_before_scrub()    Meta.strip_comments()    # links -  Meta.allow_tag_with_uri_attributes("a", ["href", "data-user", "data-tag"], @valid_schemes) +  Meta.allow_tag_with_uri_attributes(:a, ["href", "data-user", "data-tag"], @valid_schemes) -  Meta.allow_tag_with_this_attribute_values("a", "class", [ +  Meta.allow_tag_with_this_attribute_values(:a, "class", [      "hashtag",      "u-url",      "mention", @@ -123,29 +124,29 @@ defmodule Pleroma.HTML.Scrubber.TwitterText do      "mention u-url"    ]) -  Meta.allow_tag_with_this_attribute_values("a", "rel", [ +  Meta.allow_tag_with_this_attribute_values(:a, "rel", [      "tag",      "nofollow",      "noopener",      "noreferrer"    ]) -  Meta.allow_tag_with_these_attributes("a", ["name", "title"]) +  Meta.allow_tag_with_these_attributes(:a, ["name", "title"])    # paragraphs and linebreaks -  Meta.allow_tag_with_these_attributes("br", []) -  Meta.allow_tag_with_these_attributes("p", []) +  Meta.allow_tag_with_these_attributes(:br, []) +  Meta.allow_tag_with_these_attributes(:p, [])    # microformats -  Meta.allow_tag_with_this_attribute_values("span", "class", ["h-card"]) -  Meta.allow_tag_with_these_attributes("span", []) +  Meta.allow_tag_with_this_attribute_values(:span, "class", ["h-card"]) +  Meta.allow_tag_with_these_attributes(:span, [])    # allow inline images for custom emoji    if Pleroma.Config.get([:markup, :allow_inline_images]) do      # restrict img tags to http/https only, because of MediaProxy. -    Meta.allow_tag_with_uri_attributes("img", ["src"], ["http", "https"]) +    Meta.allow_tag_with_uri_attributes(:img, ["src"], ["http", "https"]) -    Meta.allow_tag_with_these_attributes("img", [ +    Meta.allow_tag_with_these_attributes(:img, [        "width",        "height",        "class", @@ -160,19 +161,19 @@ end  defmodule Pleroma.HTML.Scrubber.Default do    @doc "The default HTML scrubbing policy: no " -  require HtmlSanitizeEx.Scrubber.Meta -  alias HtmlSanitizeEx.Scrubber.Meta +  require FastSanitize.Sanitizer.Meta +  alias FastSanitize.Sanitizer.Meta    # credo:disable-for-previous-line    # No idea how to fix this one⦠   @valid_schemes Pleroma.Config.get([:uri_schemes, :valid_schemes], []) -  Meta.remove_cdata_sections_before_scrub() +#  Meta.remove_cdata_sections_before_scrub()    Meta.strip_comments() -  Meta.allow_tag_with_uri_attributes("a", ["href", "data-user", "data-tag"], @valid_schemes) +  Meta.allow_tag_with_uri_attributes(:a, ["href", "data-user", "data-tag"], @valid_schemes) -  Meta.allow_tag_with_this_attribute_values("a", "class", [ +  Meta.allow_tag_with_this_attribute_values(:a, "class", [      "hashtag",      "u-url",      "mention", @@ -180,7 +181,7 @@ defmodule Pleroma.HTML.Scrubber.Default do      "mention u-url"    ]) -  Meta.allow_tag_with_this_attribute_values("a", "rel", [ +  Meta.allow_tag_with_this_attribute_values(:a, "rel", [      "tag",      "nofollow",      "noopener", @@ -188,37 +189,37 @@ defmodule Pleroma.HTML.Scrubber.Default do      "ugc"    ]) -  Meta.allow_tag_with_these_attributes("a", ["name", "title"]) - -  Meta.allow_tag_with_these_attributes("abbr", ["title"]) - -  Meta.allow_tag_with_these_attributes("b", []) -  Meta.allow_tag_with_these_attributes("blockquote", []) -  Meta.allow_tag_with_these_attributes("br", []) -  Meta.allow_tag_with_these_attributes("code", []) -  Meta.allow_tag_with_these_attributes("del", []) -  Meta.allow_tag_with_these_attributes("em", []) -  Meta.allow_tag_with_these_attributes("i", []) -  Meta.allow_tag_with_these_attributes("li", []) -  Meta.allow_tag_with_these_attributes("ol", []) -  Meta.allow_tag_with_these_attributes("p", []) -  Meta.allow_tag_with_these_attributes("pre", []) -  Meta.allow_tag_with_these_attributes("strong", []) -  Meta.allow_tag_with_these_attributes("sub", []) -  Meta.allow_tag_with_these_attributes("sup", []) -  Meta.allow_tag_with_these_attributes("u", []) -  Meta.allow_tag_with_these_attributes("ul", []) - -  Meta.allow_tag_with_this_attribute_values("span", "class", ["h-card"]) -  Meta.allow_tag_with_these_attributes("span", []) +  Meta.allow_tag_with_these_attributes(:a, ["name", "title"]) + +  Meta.allow_tag_with_these_attributes(:abbr, ["title"]) + +  Meta.allow_tag_with_these_attributes(:b, []) +  Meta.allow_tag_with_these_attributes(:blockquote, []) +  Meta.allow_tag_with_these_attributes(:br, []) +  Meta.allow_tag_with_these_attributes(:code, []) +  Meta.allow_tag_with_these_attributes(:del, []) +  Meta.allow_tag_with_these_attributes(:em, []) +  Meta.allow_tag_with_these_attributes(:i, []) +  Meta.allow_tag_with_these_attributes(:li, []) +  Meta.allow_tag_with_these_attributes(:ol, []) +  Meta.allow_tag_with_these_attributes(:p, []) +  Meta.allow_tag_with_these_attributes(:pre, []) +  Meta.allow_tag_with_these_attributes(:strong, []) +  Meta.allow_tag_with_these_attributes(:sub, []) +  Meta.allow_tag_with_these_attributes(:sup, []) +  Meta.allow_tag_with_these_attributes(:u, []) +  Meta.allow_tag_with_these_attributes(:ul, []) + +  Meta.allow_tag_with_this_attribute_values(:span, "class", ["h-card"]) +  Meta.allow_tag_with_these_attributes(:span, [])    @allow_inline_images Pleroma.Config.get([:markup, :allow_inline_images])    if @allow_inline_images do      # restrict img tags to http/https only, because of MediaProxy. -    Meta.allow_tag_with_uri_attributes("img", ["src"], ["http", "https"]) +    Meta.allow_tag_with_uri_attributes(:img, ["src"], ["http", "https"]) -    Meta.allow_tag_with_these_attributes("img", [ +    Meta.allow_tag_with_these_attributes(:img, [        "width",        "height",        "class", @@ -228,24 +229,24 @@ defmodule Pleroma.HTML.Scrubber.Default do    end    if Pleroma.Config.get([:markup, :allow_tables]) do -    Meta.allow_tag_with_these_attributes("table", []) -    Meta.allow_tag_with_these_attributes("tbody", []) -    Meta.allow_tag_with_these_attributes("td", []) -    Meta.allow_tag_with_these_attributes("th", []) -    Meta.allow_tag_with_these_attributes("thead", []) -    Meta.allow_tag_with_these_attributes("tr", []) +    Meta.allow_tag_with_these_attributes(:table, []) +    Meta.allow_tag_with_these_attributes(:tbody, []) +    Meta.allow_tag_with_these_attributes(:td, []) +    Meta.allow_tag_with_these_attributes(:th, []) +    Meta.allow_tag_with_these_attributes(:thead, []) +    Meta.allow_tag_with_these_attributes(:tr, [])    end    if Pleroma.Config.get([:markup, :allow_headings]) do -    Meta.allow_tag_with_these_attributes("h1", []) -    Meta.allow_tag_with_these_attributes("h2", []) -    Meta.allow_tag_with_these_attributes("h3", []) -    Meta.allow_tag_with_these_attributes("h4", []) -    Meta.allow_tag_with_these_attributes("h5", []) +    Meta.allow_tag_with_these_attributes(:h1, []) +    Meta.allow_tag_with_these_attributes(:h2, []) +    Meta.allow_tag_with_these_attributes(:h3, []) +    Meta.allow_tag_with_these_attributes(:h4, []) +    Meta.allow_tag_with_these_attributes(:h5, [])    end    if Pleroma.Config.get([:markup, :allow_fonts]) do -    Meta.allow_tag_with_these_attributes("font", ["face"]) +    Meta.allow_tag_with_these_attributes(:font, ["face"])    end    Meta.strip_everything_not_covered() @@ -258,7 +259,7 @@ defmodule Pleroma.HTML.Transform.MediaProxy do    def before_scrub(html), do: html -  def scrub_attribute("img", {"src", "http" <> target}) do +  def scrub_attribute(:img, {"src", "http" <> target}) do      media_url =        ("http" <> target)        |> MediaProxy.url() @@ -268,16 +269,16 @@ defmodule Pleroma.HTML.Transform.MediaProxy do    def scrub_attribute(_tag, attribute), do: attribute -  def scrub({"img", attributes, children}) do +  def scrub({:img, attributes, children}) do      attributes =        attributes -      |> Enum.map(fn attr -> scrub_attribute("img", attr) end) +      |> Enum.map(fn attr -> scrub_attribute(:img, attr) end)        |> Enum.reject(&is_nil(&1)) -    {"img", attributes, children} +    {:img, attributes, children}    end -  def scrub({:comment, _children}), do: "" +  def scrub({:comment, _text, _children}), do: ""    def scrub({tag, attributes, children}), do: {tag, attributes, children}    def scrub({_tag, children}), do: children @@ -298,9 +299,9 @@ defmodule Pleroma.HTML.Scrubber.LinksOnly do    Meta.strip_comments()    # links -  Meta.allow_tag_with_uri_attributes("a", ["href"], @valid_schemes) +  Meta.allow_tag_with_uri_attributes(:a, ["href"], @valid_schemes) -  Meta.allow_tag_with_this_attribute_values("a", "rel", [ +  Meta.allow_tag_with_this_attribute_values(:a, "rel", [      "tag",      "nofollow",      "noopener", @@ -309,6 +310,6 @@ defmodule Pleroma.HTML.Scrubber.LinksOnly do      "ugc"    ]) -  Meta.allow_tag_with_these_attributes("a", ["name", "title"]) +  Meta.allow_tag_with_these_attributes(:a, ["name", "title"])    Meta.strip_everything_not_covered()  end diff --git a/test/emoji/formatter_test.exs b/test/emoji/formatter_test.exs index 6d25fc453..3e37ae3f0 100644 --- a/test/emoji/formatter_test.exs +++ b/test/emoji/formatter_test.exs @@ -12,7 +12,7 @@ defmodule Pleroma.Emoji.FormatterTest do        text = "I love :firefox:"        expected_result = -        "I love <img class=\"emoji\" alt=\"firefox\" title=\"firefox\" src=\"/emoji/Firefox.gif\" />" +        "I love <img class=\"emoji\" alt=\"firefox\" title=\"firefox\" src=\"/emoji/Firefox.gif\"/>"        assert Formatter.emojify(text) == expected_result      end diff --git a/test/html_test.exs b/test/html_test.exs index 306ad3b3b..f0869534c 100644 --- a/test/html_test.exs +++ b/test/html_test.exs @@ -21,31 +21,31 @@ defmodule Pleroma.HTMLTest do    """    @html_onerror_sample """ -    <img src="http://example.com/image.jpg" onerror="alert('hacked')"> +  <img src="http://example.com/image.jpg" onerror="alert('hacked')">    """    @html_span_class_sample """ -    <span class="animate-spin">hi</span> +  <span class="animate-spin">hi</span>    """    @html_span_microformats_sample """ -    <span class="h-card"><a class="u-url mention">@<span>foo</span></a></span> +  <span class="h-card"><a class="u-url mention">@<span>foo</span></a></span>    """    @html_span_invalid_microformats_sample """ -    <span class="h-card"><a class="u-url mention animate-spin">@<span>foo</span></a></span> +  <span class="h-card"><a class="u-url mention animate-spin">@<span>foo</span></a></span>    """    describe "StripTags scrubber" do      test "works as expected" do        expected = """ -      this is in bold +        this is in bold          this is a paragraph          this is a linebreak -        this is a link with allowed "rel" attribute: example.com -        this is a link with not allowed "rel" attribute: example.com +        this is a link with allowed "rel" attribute: example.com +        this is a link with not allowed "rel" attribute: example.com          this is an image:  -        alert('hacked') +        alert('hacked')        """        assert expected == HTML.strip_tags(@html_sample) @@ -61,13 +61,13 @@ defmodule Pleroma.HTMLTest do    describe "TwitterText scrubber" do      test "normalizes HTML as expected" do        expected = """ -      this is in bold +        this is in bold          <p>this is a paragraph</p> -        this is a linebreak<br /> -        this is a link with allowed "rel" attribute: <a href="http://example.com/" rel="tag">example.com</a> -        this is a link with not allowed "rel" attribute: <a href="http://example.com/">example.com</a> -        this is an image: <img src="http://example.com/image.jpg" /><br /> -        alert('hacked') +        this is a linebreak<br/> +        this is a link with allowed "rel" attribute: <a href="http://example.com/" rel="tag">example.com</a> +        this is a link with not allowed "rel" attribute: <a href="http://example.com/">example.com</a> +        this is an image: <img src="http://example.com/image.jpg"/><br/> +        alert('hacked')        """        assert expected == HTML.filter_tags(@html_sample, Pleroma.HTML.Scrubber.TwitterText) @@ -75,7 +75,7 @@ defmodule Pleroma.HTMLTest do      test "does not allow attribute-based XSS" do        expected = """ -      <img src="http://example.com/image.jpg" /> +      <img src="http://example.com/image.jpg"/>        """        assert expected == HTML.filter_tags(@html_onerror_sample, Pleroma.HTML.Scrubber.TwitterText) @@ -115,13 +115,13 @@ defmodule Pleroma.HTMLTest do    describe "default scrubber" do      test "normalizes HTML as expected" do        expected = """ -      <b>this is in bold</b> +        <b>this is in bold</b>          <p>this is a paragraph</p> -        this is a linebreak<br /> -        this is a link with allowed "rel" attribute: <a href="http://example.com/" rel="tag">example.com</a> -        this is a link with not allowed "rel" attribute: <a href="http://example.com/">example.com</a> -        this is an image: <img src="http://example.com/image.jpg" /><br /> -        alert('hacked') +        this is a linebreak<br/> +        this is a link with allowed "rel" attribute: <a href="http://example.com/" rel="tag">example.com</a> +        this is a link with not allowed "rel" attribute: <a href="http://example.com/">example.com</a> +        this is an image: <img src="http://example.com/image.jpg"/><br/> +        alert('hacked')        """        assert expected == HTML.filter_tags(@html_sample, Pleroma.HTML.Scrubber.Default) @@ -129,7 +129,7 @@ defmodule Pleroma.HTMLTest do      test "does not allow attribute-based XSS" do        expected = """ -      <img src="http://example.com/image.jpg" /> +      <img src="http://example.com/image.jpg"/>        """        assert expected == HTML.filter_tags(@html_onerror_sample, Pleroma.HTML.Scrubber.Default) diff --git a/test/web/activity_pub/mrf/normalize_markup_test.exs b/test/web/activity_pub/mrf/normalize_markup_test.exs index 3916a1f35..0207be56b 100644 --- a/test/web/activity_pub/mrf/normalize_markup_test.exs +++ b/test/web/activity_pub/mrf/normalize_markup_test.exs @@ -20,11 +20,11 @@ defmodule Pleroma.Web.ActivityPub.MRF.NormalizeMarkupTest do      expected = """      <b>this is in bold</b>      <p>this is a paragraph</p> -    this is a linebreak<br /> -    this is a link with allowed "rel" attribute: <a href="http://example.com/" rel="tag">example.com</a> -    this is a link with not allowed "rel" attribute: <a href="http://example.com/">example.com</a> -    this is an image: <img src="http://example.com/image.jpg" /><br /> -    alert('hacked') +    this is a linebreak<br/> +    this is a link with allowed "rel" attribute: <a href="http://example.com/" rel="tag">example.com</a> +    this is a link with not allowed "rel" attribute: <a href="http://example.com/">example.com</a> +    this is an image: <img src="http://example.com/image.jpg"/><br/> +    alert('hacked')      """      message = %{"type" => "Create", "object" => %{"content" => @html_sample}} diff --git a/test/web/common_api/common_api_test.exs b/test/web/common_api/common_api_test.exs index 1d2f20617..212b00cbb 100644 --- a/test/web/common_api/common_api_test.exs +++ b/test/web/common_api/common_api_test.exs @@ -140,7 +140,7 @@ defmodule Pleroma.Web.CommonAPITest do        object = Object.normalize(activity) -      assert object.data["content"] == "<p><b>2hu</b></p>alert('xss')" +      assert object.data["content"] == "<p><b>2hu</b></p>alert('xss')"      end      test "it filters out obviously bad tags when accepting a post as Markdown" do @@ -156,7 +156,7 @@ defmodule Pleroma.Web.CommonAPITest do        object = Object.normalize(activity) -      assert object.data["content"] == "<p><b>2hu</b></p>alert('xss')" +      assert object.data["content"] == "<p><b>2hu</b></p>alert('xss')"      end      test "it does not allow replies to direct messages that are not direct messages themselves" do  | 
