From 097fdf6a5d1ecd373c911bda4a1d7ee3c873fa21 Mon Sep 17 00:00:00 2001 From: Mark Felder Date: Wed, 12 Jun 2019 17:56:51 -0500 Subject: Attempt to use from HTML as a fallback --- lib/pleroma/web/rich_media/parsers/meta_tags_parser.ex | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/pleroma/web/rich_media/parsers/meta_tags_parser.ex b/lib/pleroma/web/rich_media/parsers/meta_tags_parser.ex index 4a7c5eae0..7da4e7561 100644 --- a/lib/pleroma/web/rich_media/parsers/meta_tags_parser.ex +++ b/lib/pleroma/web/rich_media/parsers/meta_tags_parser.ex @@ -1,12 +1,14 @@ defmodule Pleroma.Web.RichMedia.Parsers.MetaTagsParser do def parse(html, data, prefix, error_message, key_name, value_name \\ "content") do with elements = [_ | _] <- get_elements(html, key_name, prefix), + page_title = get_page_title(html), meta_data = Enum.reduce(elements, data, fn el, acc -> attributes = normalize_attributes(el, prefix, key_name, value_name) Map.merge(acc, attributes) - end) do + end) + |> Map.put_new(:title, page_title) do {:ok, meta_data} else _e -> {:error, error_message} @@ -27,4 +29,8 @@ defmodule Pleroma.Web.RichMedia.Parsers.MetaTagsParser do %{String.to_atom(data[key_name]) => data[value_name]} end + + defp get_page_title(html) do + Floki.find(html, "title") |> Floki.text() + end end -- cgit v1.2.3 From 97d2b1a45ab12c530dd730518b9d8ca546bbc9f2 Mon Sep 17 00:00:00 2001 From: Mark Felder <feld@FreeBSD.org> Date: Wed, 12 Jun 2019 18:27:35 -0500 Subject: Only run Floki if title is missing from the map --- lib/pleroma/web/rich_media/parsers/meta_tags_parser.ex | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) (limited to 'lib') diff --git a/lib/pleroma/web/rich_media/parsers/meta_tags_parser.ex b/lib/pleroma/web/rich_media/parsers/meta_tags_parser.ex index 7da4e7561..8c42557aa 100644 --- a/lib/pleroma/web/rich_media/parsers/meta_tags_parser.ex +++ b/lib/pleroma/web/rich_media/parsers/meta_tags_parser.ex @@ -1,15 +1,14 @@ defmodule Pleroma.Web.RichMedia.Parsers.MetaTagsParser do def parse(html, data, prefix, error_message, key_name, value_name \\ "content") do with elements = [_ | _] <- get_elements(html, key_name, prefix), - page_title = get_page_title(html), meta_data = Enum.reduce(elements, data, fn el, acc -> attributes = normalize_attributes(el, prefix, key_name, value_name) Map.merge(acc, attributes) - end) - |> Map.put_new(:title, page_title) do - {:ok, meta_data} + end) do + rich_meta_data = maybe_use_page_title(meta_data, html) + {:ok, rich_meta_data} else _e -> {:error, error_message} end @@ -30,7 +29,10 @@ defmodule Pleroma.Web.RichMedia.Parsers.MetaTagsParser do %{String.to_atom(data[key_name]) => data[value_name]} end - defp get_page_title(html) do - Floki.find(html, "title") |> Floki.text() + defp maybe_use_page_title(meta_data, html) do + if !Map.has_key?(meta_data, :title) do + page_title = Floki.find(html, "title") |> Floki.text() + Map.put_new(meta_data, :title, page_title) + end end end -- cgit v1.2.3 From 7363a0ea8aa5c034e0335e826c081f1166e71f92 Mon Sep 17 00:00:00 2001 From: Mark Felder <feld@FreeBSD.org> Date: Wed, 12 Jun 2019 18:32:28 -0500 Subject: Revert "Only run Floki if title is missing from the map" This reverts commit 97d2b1a45ab12c530dd730518b9d8ca546bbc9f2. --- lib/pleroma/web/rich_media/parsers/meta_tags_parser.ex | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) (limited to 'lib') diff --git a/lib/pleroma/web/rich_media/parsers/meta_tags_parser.ex b/lib/pleroma/web/rich_media/parsers/meta_tags_parser.ex index 8c42557aa..7da4e7561 100644 --- a/lib/pleroma/web/rich_media/parsers/meta_tags_parser.ex +++ b/lib/pleroma/web/rich_media/parsers/meta_tags_parser.ex @@ -1,14 +1,15 @@ defmodule Pleroma.Web.RichMedia.Parsers.MetaTagsParser do def parse(html, data, prefix, error_message, key_name, value_name \\ "content") do with elements = [_ | _] <- get_elements(html, key_name, prefix), + page_title = get_page_title(html), meta_data = Enum.reduce(elements, data, fn el, acc -> attributes = normalize_attributes(el, prefix, key_name, value_name) Map.merge(acc, attributes) - end) do - rich_meta_data = maybe_use_page_title(meta_data, html) - {:ok, rich_meta_data} + end) + |> Map.put_new(:title, page_title) do + {:ok, meta_data} else _e -> {:error, error_message} end @@ -29,10 +30,7 @@ defmodule Pleroma.Web.RichMedia.Parsers.MetaTagsParser do %{String.to_atom(data[key_name]) => data[value_name]} end - defp maybe_use_page_title(meta_data, html) do - if !Map.has_key?(meta_data, :title) do - page_title = Floki.find(html, "title") |> Floki.text() - Map.put_new(meta_data, :title, page_title) - end + defp get_page_title(html) do + Floki.find(html, "title") |> Floki.text() end end -- cgit v1.2.3 From a12f8e13c8f3cd176989c28810ff578bf7c09c69 Mon Sep 17 00:00:00 2001 From: Egor Kislitsyn <egor@kislitsyn.com> Date: Thu, 13 Jun 2019 15:02:46 +0700 Subject: Improve <title> fallback; Add a test --- .../web/rich_media/parsers/meta_tags_parser.ex | 33 ++++++++++++++-------- 1 file changed, 22 insertions(+), 11 deletions(-) (limited to 'lib') diff --git a/lib/pleroma/web/rich_media/parsers/meta_tags_parser.ex b/lib/pleroma/web/rich_media/parsers/meta_tags_parser.ex index 7da4e7561..82f1cce29 100644 --- a/lib/pleroma/web/rich_media/parsers/meta_tags_parser.ex +++ b/lib/pleroma/web/rich_media/parsers/meta_tags_parser.ex @@ -1,17 +1,19 @@ defmodule Pleroma.Web.RichMedia.Parsers.MetaTagsParser do def parse(html, data, prefix, error_message, key_name, value_name \\ "content") do - with elements = [_ | _] <- get_elements(html, key_name, prefix), - page_title = get_page_title(html), - meta_data = - Enum.reduce(elements, data, fn el, acc -> - attributes = normalize_attributes(el, prefix, key_name, value_name) - - Map.merge(acc, attributes) - end) - |> Map.put_new(:title, page_title) do - {:ok, meta_data} + meta_data = + html + |> get_elements(key_name, prefix) + |> Enum.reduce(data, fn el, acc -> + attributes = normalize_attributes(el, prefix, key_name, value_name) + + Map.merge(acc, attributes) + end) + |> maybe_put_title(html) + + if Enum.empty?(meta_data) do + {:error, error_message} else - _e -> {:error, error_message} + {:ok, meta_data} end end @@ -30,6 +32,15 @@ defmodule Pleroma.Web.RichMedia.Parsers.MetaTagsParser do %{String.to_atom(data[key_name]) => data[value_name]} end + defp maybe_put_title(%{title: _} = meta, _), do: meta + + defp maybe_put_title(meta, html) do + case get_page_title(html) do + "" -> meta + title -> Map.put_new(meta, :title, title) + end + end + defp get_page_title(html) do Floki.find(html, "title") |> Floki.text() end -- cgit v1.2.3