From 01d396585e428ea1ca7e21868d7303a0bd8ffd6f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?H=C3=A9l=C3=A8ne?= Date: Mon, 25 Jul 2022 16:20:12 +0200 Subject: Emoji: implement full-qualifier using combinations This implements fully_qualify_emoji/1, which will return the fully-qualified version of an emoji if it knows of one, or return the emoji unmodified if not. This code generates combinations per emoji: for each FE0F, all possible combinations of the character being removed or staying will be generated. This is made as an attempt to find all partially-qualified and unqualified versions of a fully-qualified emoji. I have found *no cases* for which this would be a problem, after browsing the entire emoji list in emoji-test.txt. This is safe, and, sadly, most likely the sanest too. --- lib/pleroma/emoji.ex | 45 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) (limited to 'lib') diff --git a/lib/pleroma/emoji.ex b/lib/pleroma/emoji.ex index 35f0da816..3726ef185 100644 --- a/lib/pleroma/emoji.ex +++ b/lib/pleroma/emoji.ex @@ -137,4 +137,49 @@ defmodule Pleroma.Emoji do end def is_unicode_emoji?(_), do: false + + # FE0F is the emoji variation sequence. It is used for fully-qualifying + # emoji, and that includes emoji combinations. + # This code generates combinations per emoji: for each FE0F, all possible + # combinations of the character being removed or staying will be generated. + # This is made as an attempt to find all partially-qualified and unqualified + # versions of a fully-qualified emoji. + # I have found *no cases* for which this would be a problem, after browsing + # the entire emoji list in emoji-test.txt. This is safe, and, sadly, most + # likely sane too. + emoji_qualification_map = + emojis + |> Enum.filter(&String.contains?(&1, "\uFE0F")) + |> Enum.map(fn emoji -> + combinate = fn x, combinate -> + case x do + [] -> + [[]] + + ["\uFE0F" | tail] -> + combinate.(tail, combinate) + |> Enum.flat_map(fn x -> [x, ["\uFE0F" | x]] end) + + [codepoint | tail] -> + combinate.(tail, combinate) + |> Enum.map(fn x -> [codepoint | x] end) + end + end + + unqualified_list = + emoji + |> String.codepoints() + |> combinate.(combinate) + |> Enum.map(&List.to_string/1) + + {emoji, unqualified_list} + end) + + for {qualified, unqualified_list} <- emoji_qualification_map do + for unqualified <- unqualified_list do + def fully_qualify_emoji(unquote(unqualified)), do: unquote(qualified) + end + end + + def fully_qualify_emoji(emoji), do: emoji end -- cgit v1.2.3 From fb3f6e1975fc44414af66377061bf30ceee9f9b4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?H=C3=A9l=C3=A8ne?= Date: Mon, 25 Jul 2022 16:49:23 +0200 Subject: EmojiReactValidator: use new qualification method --- .../web/activity_pub/object_validators/emoji_react_validator.ex | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/pleroma/web/activity_pub/object_validators/emoji_react_validator.ex b/lib/pleroma/web/activity_pub/object_validators/emoji_react_validator.ex index 2eb4f6842..0858281e5 100644 --- a/lib/pleroma/web/activity_pub/object_validators/emoji_react_validator.ex +++ b/lib/pleroma/web/activity_pub/object_validators/emoji_react_validator.ex @@ -63,8 +63,7 @@ defmodule Pleroma.Web.ActivityPub.ObjectValidators.EmojiReactValidator do end defp fix_emoji_qualification(%{"content" => emoji} = data) do - # Emoji variation sequence - new_emoji = emoji <> "\uFE0F" + new_emoji = Pleroma.Emoji.fully_qualify_emoji(emoji) cond do Pleroma.Emoji.is_unicode_emoji?(emoji) -> -- cgit v1.2.3 From b99f5d61834ffd86f9e8aeca2b00c704f0a0467e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?H=C3=A9l=C3=A8ne?= Date: Tue, 26 Jul 2022 01:38:59 +0200 Subject: Emoji: split qualification variation into a module --- lib/pleroma/emoji.ex | 35 ++------------------------------- lib/pleroma/emoji/combinations.ex | 41 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 43 insertions(+), 33 deletions(-) create mode 100644 lib/pleroma/emoji/combinations.ex (limited to 'lib') diff --git a/lib/pleroma/emoji.ex b/lib/pleroma/emoji.ex index 3726ef185..dd65d56ae 100644 --- a/lib/pleroma/emoji.ex +++ b/lib/pleroma/emoji.ex @@ -9,6 +9,7 @@ defmodule Pleroma.Emoji do """ use GenServer + alias Pleroma.Emoji.Combinations alias Pleroma.Emoji.Loader require Logger @@ -138,42 +139,10 @@ defmodule Pleroma.Emoji do def is_unicode_emoji?(_), do: false - # FE0F is the emoji variation sequence. It is used for fully-qualifying - # emoji, and that includes emoji combinations. - # This code generates combinations per emoji: for each FE0F, all possible - # combinations of the character being removed or staying will be generated. - # This is made as an attempt to find all partially-qualified and unqualified - # versions of a fully-qualified emoji. - # I have found *no cases* for which this would be a problem, after browsing - # the entire emoji list in emoji-test.txt. This is safe, and, sadly, most - # likely sane too. emoji_qualification_map = emojis |> Enum.filter(&String.contains?(&1, "\uFE0F")) - |> Enum.map(fn emoji -> - combinate = fn x, combinate -> - case x do - [] -> - [[]] - - ["\uFE0F" | tail] -> - combinate.(tail, combinate) - |> Enum.flat_map(fn x -> [x, ["\uFE0F" | x]] end) - - [codepoint | tail] -> - combinate.(tail, combinate) - |> Enum.map(fn x -> [codepoint | x] end) - end - end - - unqualified_list = - emoji - |> String.codepoints() - |> combinate.(combinate) - |> Enum.map(&List.to_string/1) - - {emoji, unqualified_list} - end) + |> Combinations.variate_emoji_qualification() for {qualified, unqualified_list} <- emoji_qualification_map do for unqualified <- unqualified_list do diff --git a/lib/pleroma/emoji/combinations.ex b/lib/pleroma/emoji/combinations.ex new file mode 100644 index 000000000..c49466406 --- /dev/null +++ b/lib/pleroma/emoji/combinations.ex @@ -0,0 +1,41 @@ +# Pleroma: A lightweight social networking server +# Copyright © 2017-2022 Pleroma Authors +# SPDX-License-Identifier: AGPL-3.0-only + +defmodule Pleroma.Emoji.Combinations do + # FE0F is the emoji variation sequence. It is used for fully-qualifying + # emoji, and that includes emoji combinations. + # This code generates combinations per emoji: for each FE0F, all possible + # combinations of the character being removed or staying will be generated. + # This is made as an attempt to find all partially-qualified and unqualified + # versions of a fully-qualified emoji. + # I have found *no cases* for which this would be a problem, after browsing + # the entire emoji list in emoji-test.txt. This is safe, and, sadly, most + # likely sane too. + + defp qualification_combinations([]), do: [[]] + + defp qualification_combinations(["\uFE0F" | tail]) do + tail + |> qualification_combinations() + |> Enum.flat_map(fn x -> [x, ["\uFE0F" | x]] end) + end + + defp qualification_combinations([codepoint | tail]) do + tail + |> qualification_combinations() + |> Enum.map(fn x -> [codepoint | x] end) + end + + def variate_emoji_qualification(emoji) when is_binary(emoji) do + emoji + |> String.codepoints() + |> qualification_combinations() + |> Enum.map(&List.to_string/1) + end + + def variate_emoji_qualification(emoji) when is_list(emoji) do + emoji + |> Enum.map(fn emoji -> {emoji, variate_emoji_qualification(emoji)} end) + end +end -- cgit v1.2.3 From 7167de592e3523459a1eb65d902085e828f962b2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?H=C3=A9l=C3=A8ne?= Date: Tue, 26 Jul 2022 23:15:09 +0200 Subject: Emoji: apply recommended tail call changes Behavior matches previous code. Co-authored-by: Tusooa Zhu --- lib/pleroma/emoji/combinations.ex | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) (limited to 'lib') diff --git a/lib/pleroma/emoji/combinations.ex b/lib/pleroma/emoji/combinations.ex index c49466406..981c73596 100644 --- a/lib/pleroma/emoji/combinations.ex +++ b/lib/pleroma/emoji/combinations.ex @@ -13,18 +13,22 @@ defmodule Pleroma.Emoji.Combinations do # the entire emoji list in emoji-test.txt. This is safe, and, sadly, most # likely sane too. - defp qualification_combinations([]), do: [[]] + defp qualification_combinations(codepoints) do + qualification_combinations([[]], codepoints) + end - defp qualification_combinations(["\uFE0F" | tail]) do - tail - |> qualification_combinations() - |> Enum.flat_map(fn x -> [x, ["\uFE0F" | x]] end) + defp qualification_combinations(acc, []), do: acc + + defp qualification_combinations(acc, ["\uFE0F" | tail]) do + acc + |> Enum.flat_map(fn x -> [x, x ++ ["\uFE0F"]] end) + |> qualification_combinations(tail) end - defp qualification_combinations([codepoint | tail]) do - tail - |> qualification_combinations() - |> Enum.map(fn x -> [codepoint | x] end) + defp qualification_combinations(acc, [codepoint | tail]) do + acc + |> Enum.map(&Kernel.++(&1, [codepoint])) + |> qualification_combinations(tail) end def variate_emoji_qualification(emoji) when is_binary(emoji) do -- cgit v1.2.3