From 3623504e5d7b4dd6dd250151685343109de1e889 Mon Sep 17 00:00:00 2001 From: "Haelwenn (lanodan) Monnier" Date: Mon, 18 Jun 2018 12:45:15 +0200 Subject: [Pleroma.Formatter]: Add support for non-HTTP schemes in URIs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The call to the regex in add_links is there just to be sure it’s a legal URI, it can be removed if you want to get more performance. The URI Schemes list is sorted, but with http(s) at the start (in case it might make it faster for common links). Closes: https://git.pleroma.social/pleroma/pleroma/issues/127 --- lib/pleroma/formatter.ex | 32 ++++++++++++++++++++++++++++---- 1 file changed, 28 insertions(+), 4 deletions(-) (limited to 'lib') diff --git a/lib/pleroma/formatter.ex b/lib/pleroma/formatter.ex index 0aaf21538..fe3da09ac 100644 --- a/lib/pleroma/formatter.ex +++ b/lib/pleroma/formatter.ex @@ -165,8 +165,29 @@ defmodule Pleroma.Formatter do @emoji end - @link_regex ~r/https?:\/\/[\w\.\/?=\-#\+%&@~'\(\):]+[\w\/]/u + @link_regex ~r/[0-9a-z+\-\.]+:[0-9a-z$-_.+!*'(),]+/ui + + # IANA got a list https://www.iana.org/assignments/uri-schemes/ but + # Stuff like ipfs isn’t in it + # There is very niche stuff + @uri_schemes [ + "https://", + "http://", + "dat://", + "dweb://", + "gopher://", + "ipfs://", + "ipns://", + "irc:", + "ircs:", + "magnet:", + "mailto:", + "mumble:", + "ssb://", + "xmpp:" + ] + # TODO: make it use something other than @link_regex def html_escape(text) do Regex.split(@link_regex, text, include_captures: true) |> Enum.map_every(2, fn chunk -> @@ -176,11 +197,14 @@ defmodule Pleroma.Formatter do |> Enum.join("") end - @doc "changes http:... links to html links" + @doc "changes scheme:... urls to html links" def add_links({subs, text}) do links = - Regex.scan(@link_regex, text) - |> Enum.map(fn [url] -> {Ecto.UUID.generate(), url} end) + text + |> String.split([" ", "\t", "
"]) + |> Enum.filter(fn word -> String.starts_with?(word, @uri_schemes) end) + |> Enum.filter(fn word -> Regex.match?(@link_regex, word) end) + |> Enum.map(fn url -> {Ecto.UUID.generate(), url} end) |> Enum.sort_by(fn {_, url} -> -String.length(url) end) uuid_text = -- cgit v1.2.3 From d5091c3175786e5bcb0449f26cafe1795fd5f5d9 Mon Sep 17 00:00:00 2001 From: "Haelwenn (lanodan) Monnier" Date: Mon, 30 Jul 2018 21:59:04 +0200 Subject: Allow additionnal schemes in the config --- lib/pleroma/formatter.ex | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/pleroma/formatter.ex b/lib/pleroma/formatter.ex index fe3da09ac..e15c08fd6 100644 --- a/lib/pleroma/formatter.ex +++ b/lib/pleroma/formatter.ex @@ -199,10 +199,14 @@ defmodule Pleroma.Formatter do @doc "changes scheme:... urls to html links" def add_links({subs, text}) do + additionnal_schemes = + Application.get_env(:pleroma, :uri_schemes, []) + |> Keyword.get(:additionnal_schemes, []) + links = text |> String.split([" ", "\t", "
"]) - |> Enum.filter(fn word -> String.starts_with?(word, @uri_schemes) end) + |> Enum.filter(fn word -> String.starts_with?(word, @uri_schemes ++ additionnal_schemes) end) |> Enum.filter(fn word -> Regex.match?(@link_regex, word) end) |> Enum.map(fn url -> {Ecto.UUID.generate(), url} end) |> Enum.sort_by(fn {_, url} -> -String.length(url) end) -- cgit v1.2.3