diff options
Diffstat (limited to 'priv/scrubbers')
-rw-r--r-- | priv/scrubbers/default.ex | 109 | ||||
-rw-r--r-- | priv/scrubbers/search_indexing.ex | 24 | ||||
-rw-r--r-- | priv/scrubbers/twitter_text.ex | 3 |
3 files changed, 92 insertions, 44 deletions
diff --git a/priv/scrubbers/default.ex b/priv/scrubbers/default.ex index 79fa6dcdf..a75a6465d 100644 --- a/priv/scrubbers/default.ex +++ b/priv/scrubbers/default.ex @@ -33,72 +33,95 @@ defmodule Pleroma.HTML.Scrubber.Default do "ugc" ]) - Meta.allow_tag_with_these_attributes(:a, ["name", "title"]) - - Meta.allow_tag_with_these_attributes(:abbr, ["title"]) - - Meta.allow_tag_with_these_attributes(:b, []) - Meta.allow_tag_with_these_attributes(:blockquote, []) - Meta.allow_tag_with_these_attributes(:br, []) - Meta.allow_tag_with_these_attributes(:code, []) - Meta.allow_tag_with_these_attributes(:del, []) - Meta.allow_tag_with_these_attributes(:em, []) - Meta.allow_tag_with_these_attributes(:hr, []) - Meta.allow_tag_with_these_attributes(:i, []) - Meta.allow_tag_with_these_attributes(:li, []) - Meta.allow_tag_with_these_attributes(:ol, []) - Meta.allow_tag_with_these_attributes(:p, []) - Meta.allow_tag_with_these_attributes(:pre, []) - Meta.allow_tag_with_these_attributes(:strong, []) - Meta.allow_tag_with_these_attributes(:sub, []) - Meta.allow_tag_with_these_attributes(:sup, []) - Meta.allow_tag_with_these_attributes(:ruby, []) - Meta.allow_tag_with_these_attributes(:rb, []) - Meta.allow_tag_with_these_attributes(:rp, []) - Meta.allow_tag_with_these_attributes(:rt, []) - Meta.allow_tag_with_these_attributes(:rtc, []) - Meta.allow_tag_with_these_attributes(:u, []) - Meta.allow_tag_with_these_attributes(:ul, []) - - Meta.allow_tag_with_this_attribute_values(:span, "class", ["h-card", "recipients-inline"]) - Meta.allow_tag_with_these_attributes(:span, []) + Meta.allow_tag_with_these_attributes(:a, ["name", "title", "lang"]) + + Meta.allow_tag_with_these_attributes(:abbr, ["title", "lang"]) + Meta.allow_tag_with_these_attributes(:acronym, ["title", "lang"]) + + # sort(1)-ed list + Meta.allow_tag_with_these_attributes(:bdi, []) + Meta.allow_tag_with_these_attributes(:bdo, ["dir"]) + Meta.allow_tag_with_these_attributes(:big, ["lang"]) + Meta.allow_tag_with_these_attributes(:b, ["lang"]) + Meta.allow_tag_with_these_attributes(:blockquote, ["lang"]) + Meta.allow_tag_with_these_attributes(:br, ["lang"]) + Meta.allow_tag_with_these_attributes(:cite, ["lang"]) + Meta.allow_tag_with_these_attributes(:code, ["lang"]) + Meta.allow_tag_with_these_attributes(:del, ["lang"]) + Meta.allow_tag_with_these_attributes(:dfn, ["lang"]) + Meta.allow_tag_with_these_attributes(:em, ["lang"]) + Meta.allow_tag_with_these_attributes(:hr, ["lang"]) + Meta.allow_tag_with_these_attributes(:i, ["lang"]) + Meta.allow_tag_with_these_attributes(:ins, ["lang"]) + Meta.allow_tag_with_these_attributes(:kbd, ["lang"]) + Meta.allow_tag_with_these_attributes(:li, ["lang"]) + Meta.allow_tag_with_these_attributes(:ol, ["lang"]) + Meta.allow_tag_with_these_attributes(:p, ["lang"]) + Meta.allow_tag_with_these_attributes(:pre, ["lang"]) + Meta.allow_tag_with_these_attributes(:q, ["lang"]) + Meta.allow_tag_with_these_attributes(:rb, ["lang"]) + Meta.allow_tag_with_these_attributes(:rp, ["lang"]) + Meta.allow_tag_with_these_attributes(:rtc, ["lang"]) + Meta.allow_tag_with_these_attributes(:rt, ["lang"]) + Meta.allow_tag_with_these_attributes(:ruby, ["lang"]) + Meta.allow_tag_with_these_attributes(:samp, ["lang"]) + Meta.allow_tag_with_these_attributes(:s, ["lang"]) + Meta.allow_tag_with_these_attributes(:small, ["lang"]) + Meta.allow_tag_with_these_attributes(:strong, ["lang"]) + Meta.allow_tag_with_these_attributes(:sub, ["lang"]) + Meta.allow_tag_with_these_attributes(:sup, ["lang"]) + Meta.allow_tag_with_these_attributes(:tt, ["lang"]) + Meta.allow_tag_with_these_attributes(:u, ["lang"]) + Meta.allow_tag_with_these_attributes(:ul, ["lang"]) + Meta.allow_tag_with_these_attributes(:var, ["lang"]) + Meta.allow_tag_with_these_attributes(:wbr, ["lang"]) + + Meta.allow_tag_with_this_attribute_values(:span, "class", [ + "h-card", + "recipients-inline", + "quote-inline" + ]) + + Meta.allow_tag_with_these_attributes(:span, ["lang"]) Meta.allow_tag_with_this_attribute_values(:code, "class", ["inline"]) @allow_inline_images Pleroma.Config.get([:markup, :allow_inline_images]) if @allow_inline_images do + Meta.allow_tag_with_this_attribute_values(:img, "class", ["emoji"]) + # restrict img tags to http/https only, because of MediaProxy. Meta.allow_tag_with_uri_attributes(:img, ["src"], ["http", "https"]) Meta.allow_tag_with_these_attributes(:img, [ "width", "height", - "class", "title", - "alt" + "alt", + "lang" ]) end if Pleroma.Config.get([:markup, :allow_tables]) do - Meta.allow_tag_with_these_attributes(:table, []) - Meta.allow_tag_with_these_attributes(:tbody, []) - Meta.allow_tag_with_these_attributes(:td, []) - Meta.allow_tag_with_these_attributes(:th, []) - Meta.allow_tag_with_these_attributes(:thead, []) - Meta.allow_tag_with_these_attributes(:tr, []) + Meta.allow_tag_with_these_attributes(:table, ["lang"]) + Meta.allow_tag_with_these_attributes(:tbody, ["lang"]) + Meta.allow_tag_with_these_attributes(:td, ["lang"]) + Meta.allow_tag_with_these_attributes(:th, ["lang"]) + Meta.allow_tag_with_these_attributes(:thead, ["lang"]) + Meta.allow_tag_with_these_attributes(:tr, ["lang"]) end if Pleroma.Config.get([:markup, :allow_headings]) do - Meta.allow_tag_with_these_attributes(:h1, []) - Meta.allow_tag_with_these_attributes(:h2, []) - Meta.allow_tag_with_these_attributes(:h3, []) - Meta.allow_tag_with_these_attributes(:h4, []) - Meta.allow_tag_with_these_attributes(:h5, []) + Meta.allow_tag_with_these_attributes(:h1, ["lang"]) + Meta.allow_tag_with_these_attributes(:h2, ["lang"]) + Meta.allow_tag_with_these_attributes(:h3, ["lang"]) + Meta.allow_tag_with_these_attributes(:h4, ["lang"]) + Meta.allow_tag_with_these_attributes(:h5, ["lang"]) end if Pleroma.Config.get([:markup, :allow_fonts]) do - Meta.allow_tag_with_these_attributes(:font, ["face"]) + Meta.allow_tag_with_these_attributes(:font, ["face", "lang"]) end Meta.strip_everything_not_covered() diff --git a/priv/scrubbers/search_indexing.ex b/priv/scrubbers/search_indexing.ex new file mode 100644 index 000000000..02756ab79 --- /dev/null +++ b/priv/scrubbers/search_indexing.ex @@ -0,0 +1,24 @@ +# Pleroma: A lightweight social networking server +# Copyright © 2017-2022 Pleroma Authors <https://pleroma.social/> +# SPDX-License-Identifier: AGPL-3.0-only + +defmodule Pleroma.HTML.Scrubber.SearchIndexing do + @moduledoc """ + An HTML scrubbing policy that scrubs things for searching. + """ + + require FastSanitize.Sanitizer.Meta + alias FastSanitize.Sanitizer.Meta + + # Explicitly remove mentions + def scrub({:a, attrs, children}) do + if(Enum.any?(attrs, fn {att, val} -> att == "class" and String.contains?(val, "mention") end), + do: nil, + # Strip the tag itself, leave only children (text, presumably) + else: children + ) + end + + Meta.strip_comments() + Meta.strip_everything_not_covered() +end diff --git a/priv/scrubbers/twitter_text.ex b/priv/scrubbers/twitter_text.ex index a121a8209..6e23b3efb 100644 --- a/priv/scrubbers/twitter_text.ex +++ b/priv/scrubbers/twitter_text.ex @@ -45,13 +45,14 @@ defmodule Pleroma.HTML.Scrubber.TwitterText do # allow inline images for custom emoji if Pleroma.Config.get([:markup, :allow_inline_images]) do + Meta.allow_tag_with_this_attribute_values(:img, "class", ["emoji"]) + # restrict img tags to http/https only, because of MediaProxy. Meta.allow_tag_with_uri_attributes(:img, ["src"], ["http", "https"]) Meta.allow_tag_with_these_attributes(:img, [ "width", "height", - "class", "title", "alt" ]) |