summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--CHANGELOG.md1
-rw-r--r--config/config.exs1
-rw-r--r--config/description.exs2
-rw-r--r--lib/pleroma/web/rich_media/parser.ex4
-rw-r--r--lib/pleroma/web/rich_media/parsers/meta_tags_parser.ex25
-rw-r--r--lib/pleroma/web/rich_media/parsers/oembed_parser.ex4
-rw-r--r--lib/pleroma/web/rich_media/parsers/ogp.ex11
-rw-r--r--lib/pleroma/web/rich_media/parsers/twitter_card.ex15
-rw-r--r--test/web/rich_media/parsers/twitter_card_test.exs130
9 files changed, 93 insertions, 100 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md
index b3f2dd10f..d2629bf84 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -7,6 +7,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
### Changed
- MFR policy to set global expiration for all local Create activities
+- OGP rich media parser merged with TwitterCard
<details>
<summary>API Changes</summary>
- **Breaking:** Emoji API: changed methods and renamed routes.
diff --git a/config/config.exs b/config/config.exs
index 7a70164f3..6a7bb9e06 100644
--- a/config/config.exs
+++ b/config/config.exs
@@ -387,7 +387,6 @@ config :pleroma, :rich_media,
ignore_tld: ["local", "localdomain", "lan"],
parsers: [
Pleroma.Web.RichMedia.Parsers.TwitterCard,
- Pleroma.Web.RichMedia.Parsers.OGP,
Pleroma.Web.RichMedia.Parsers.OEmbed
],
ttl_setters: [Pleroma.Web.RichMedia.Parser.TTL.AwsSignedUrl]
diff --git a/config/description.exs b/config/description.exs
index 2f1eaf5f2..b21d7840c 100644
--- a/config/description.exs
+++ b/config/description.exs
@@ -2104,9 +2104,7 @@ config :pleroma, :config_description, [
description:
"List of Rich Media parsers. Module names are shortened (removed leading `Pleroma.Web.RichMedia.Parsers.` part), but on adding custom module you need to use full name.",
suggestions: [
- Pleroma.Web.RichMedia.Parsers.MetaTagsParser,
Pleroma.Web.RichMedia.Parsers.OEmbed,
- Pleroma.Web.RichMedia.Parsers.OGP,
Pleroma.Web.RichMedia.Parsers.TwitterCard
]
},
diff --git a/lib/pleroma/web/rich_media/parser.ex b/lib/pleroma/web/rich_media/parser.ex
index d9b5068b1..ef5ead2da 100644
--- a/lib/pleroma/web/rich_media/parser.ex
+++ b/lib/pleroma/web/rich_media/parser.ex
@@ -105,8 +105,8 @@ defmodule Pleroma.Web.RichMedia.Parser do
defp maybe_parse(html) do
Enum.reduce_while(parsers(), %{}, fn parser, acc ->
case parser.parse(html, acc) do
- {:ok, data} -> {:halt, data}
- {:error, _msg} -> {:cont, acc}
+ data when data != %{} -> {:halt, data}
+ _ -> {:cont, acc}
end
end)
end
diff --git a/lib/pleroma/web/rich_media/parsers/meta_tags_parser.ex b/lib/pleroma/web/rich_media/parsers/meta_tags_parser.ex
index 2762b5902..3d577e254 100644
--- a/lib/pleroma/web/rich_media/parsers/meta_tags_parser.ex
+++ b/lib/pleroma/web/rich_media/parsers/meta_tags_parser.ex
@@ -3,22 +3,15 @@
# SPDX-License-Identifier: AGPL-3.0-only
defmodule Pleroma.Web.RichMedia.Parsers.MetaTagsParser do
- def parse(html, data, prefix, error_message, key_name, value_name \\ "content") do
- meta_data =
- html
- |> get_elements(key_name, prefix)
- |> Enum.reduce(data, fn el, acc ->
- attributes = normalize_attributes(el, prefix, key_name, value_name)
-
- Map.merge(acc, attributes)
- end)
- |> maybe_put_title(html)
-
- if Enum.empty?(meta_data) do
- {:error, error_message}
- else
- {:ok, meta_data}
- end
+ def parse(data, html, prefix, key_name, value_name \\ "content") do
+ html
+ |> get_elements(key_name, prefix)
+ |> Enum.reduce(data, fn el, acc ->
+ attributes = normalize_attributes(el, prefix, key_name, value_name)
+
+ Map.merge(acc, attributes)
+ end)
+ |> maybe_put_title(html)
end
defp get_elements(html, key_name, prefix) do
diff --git a/lib/pleroma/web/rich_media/parsers/oembed_parser.ex b/lib/pleroma/web/rich_media/parsers/oembed_parser.ex
index db8ccf15d..6bdeac89c 100644
--- a/lib/pleroma/web/rich_media/parsers/oembed_parser.ex
+++ b/lib/pleroma/web/rich_media/parsers/oembed_parser.ex
@@ -7,9 +7,9 @@ defmodule Pleroma.Web.RichMedia.Parsers.OEmbed do
with elements = [_ | _] <- get_discovery_data(html),
oembed_url when is_binary(oembed_url) <- get_oembed_url(elements),
{:ok, oembed_data} <- get_oembed_data(oembed_url) do
- {:ok, oembed_data}
+ oembed_data
else
- _e -> {:error, "No OEmbed data found"}
+ _e -> %{}
end
end
diff --git a/lib/pleroma/web/rich_media/parsers/ogp.ex b/lib/pleroma/web/rich_media/parsers/ogp.ex
index 3e9012588..b3b3b059c 100644
--- a/lib/pleroma/web/rich_media/parsers/ogp.ex
+++ b/lib/pleroma/web/rich_media/parsers/ogp.ex
@@ -3,13 +3,8 @@
# SPDX-License-Identifier: AGPL-3.0-only
defmodule Pleroma.Web.RichMedia.Parsers.OGP do
- def parse(html, data) do
- Pleroma.Web.RichMedia.Parsers.MetaTagsParser.parse(
- html,
- data,
- "og",
- "No OGP metadata found",
- "property"
- )
+ @deprecated "OGP parser is deprecated. Use TwitterCard instead."
+ def parse(_html, _data) do
+ %{}
end
end
diff --git a/lib/pleroma/web/rich_media/parsers/twitter_card.ex b/lib/pleroma/web/rich_media/parsers/twitter_card.ex
index 09d4b526e..4a04865d2 100644
--- a/lib/pleroma/web/rich_media/parsers/twitter_card.ex
+++ b/lib/pleroma/web/rich_media/parsers/twitter_card.ex
@@ -5,18 +5,11 @@
defmodule Pleroma.Web.RichMedia.Parsers.TwitterCard do
alias Pleroma.Web.RichMedia.Parsers.MetaTagsParser
- @spec parse(String.t(), map()) :: {:ok, map()} | {:error, String.t()}
+ @spec parse(list(), map()) :: map()
def parse(html, data) do
data
- |> parse_name_attrs(html)
- |> parse_property_attrs(html)
- end
-
- defp parse_name_attrs(data, html) do
- MetaTagsParser.parse(html, data, "twitter", %{}, "name")
- end
-
- defp parse_property_attrs({_, data}, html) do
- MetaTagsParser.parse(html, data, "twitter", "No twitter card metadata found", "property")
+ |> MetaTagsParser.parse(html, "og", "property")
+ |> MetaTagsParser.parse(html, "twitter", "name")
+ |> MetaTagsParser.parse(html, "twitter", "property")
end
end
diff --git a/test/web/rich_media/parsers/twitter_card_test.exs b/test/web/rich_media/parsers/twitter_card_test.exs
index 847623535..219f005a2 100644
--- a/test/web/rich_media/parsers/twitter_card_test.exs
+++ b/test/web/rich_media/parsers/twitter_card_test.exs
@@ -7,8 +7,7 @@ defmodule Pleroma.Web.RichMedia.Parsers.TwitterCardTest do
alias Pleroma.Web.RichMedia.Parsers.TwitterCard
test "returns error when html not contains twitter card" do
- assert TwitterCard.parse([{"html", [], [{"head", [], []}, {"body", [], []}]}], %{}) ==
- {:error, "No twitter card metadata found"}
+ assert TwitterCard.parse([{"html", [], [{"head", [], []}, {"body", [], []}]}], %{}) == %{}
end
test "parses twitter card with only name attributes" do
@@ -17,15 +16,21 @@ defmodule Pleroma.Web.RichMedia.Parsers.TwitterCardTest do
|> Floki.parse_document!()
assert TwitterCard.parse(html, %{}) ==
- {:ok,
- %{
- "app:id:googleplay" => "com.nytimes.android",
- "app:name:googleplay" => "NYTimes",
- "app:url:googleplay" => "nytimes://reader/id/100000006583622",
- "site" => nil,
- "title" =>
- "She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database. - The New York Times"
- }}
+ %{
+ "app:id:googleplay" => "com.nytimes.android",
+ "app:name:googleplay" => "NYTimes",
+ "app:url:googleplay" => "nytimes://reader/id/100000006583622",
+ "site" => nil,
+ "description" =>
+ "With little oversight, the N.Y.P.D. has been using powerful surveillance technology on photos of children and teenagers.",
+ "image" =>
+ "https://static01.nyt.com/images/2019/08/01/nyregion/01nypd-juveniles-promo/01nypd-juveniles-promo-facebookJumbo.jpg",
+ "type" => "article",
+ "url" =>
+ "https://www.nytimes.com/2019/08/01/nyregion/nypd-facial-recognition-children-teenagers.html",
+ "title" =>
+ "She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database."
+ }
end
test "parses twitter card with only property attributes" do
@@ -34,19 +39,19 @@ defmodule Pleroma.Web.RichMedia.Parsers.TwitterCardTest do
|> Floki.parse_document!()
assert TwitterCard.parse(html, %{}) ==
- {:ok,
- %{
- "card" => "summary_large_image",
- "description" =>
- "With little oversight, the N.Y.P.D. has been using powerful surveillance technology on photos of children and teenagers.",
- "image" =>
- "https://static01.nyt.com/images/2019/08/01/nyregion/01nypd-juveniles-promo/01nypd-juveniles-promo-videoSixteenByNineJumbo1600.jpg",
- "image:alt" => "",
- "title" =>
- "She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database.",
- "url" =>
- "https://www.nytimes.com/2019/08/01/nyregion/nypd-facial-recognition-children-teenagers.html"
- }}
+ %{
+ "card" => "summary_large_image",
+ "description" =>
+ "With little oversight, the N.Y.P.D. has been using powerful surveillance technology on photos of children and teenagers.",
+ "image" =>
+ "https://static01.nyt.com/images/2019/08/01/nyregion/01nypd-juveniles-promo/01nypd-juveniles-promo-videoSixteenByNineJumbo1600.jpg",
+ "image:alt" => "",
+ "title" =>
+ "She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database.",
+ "url" =>
+ "https://www.nytimes.com/2019/08/01/nyregion/nypd-facial-recognition-children-teenagers.html",
+ "type" => "article"
+ }
end
test "parses twitter card with name & property attributes" do
@@ -55,23 +60,23 @@ defmodule Pleroma.Web.RichMedia.Parsers.TwitterCardTest do
|> Floki.parse_document!()
assert TwitterCard.parse(html, %{}) ==
- {:ok,
- %{
- "app:id:googleplay" => "com.nytimes.android",
- "app:name:googleplay" => "NYTimes",
- "app:url:googleplay" => "nytimes://reader/id/100000006583622",
- "card" => "summary_large_image",
- "description" =>
- "With little oversight, the N.Y.P.D. has been using powerful surveillance technology on photos of children and teenagers.",
- "image" =>
- "https://static01.nyt.com/images/2019/08/01/nyregion/01nypd-juveniles-promo/01nypd-juveniles-promo-videoSixteenByNineJumbo1600.jpg",
- "image:alt" => "",
- "site" => nil,
- "title" =>
- "She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database.",
- "url" =>
- "https://www.nytimes.com/2019/08/01/nyregion/nypd-facial-recognition-children-teenagers.html"
- }}
+ %{
+ "app:id:googleplay" => "com.nytimes.android",
+ "app:name:googleplay" => "NYTimes",
+ "app:url:googleplay" => "nytimes://reader/id/100000006583622",
+ "card" => "summary_large_image",
+ "description" =>
+ "With little oversight, the N.Y.P.D. has been using powerful surveillance technology on photos of children and teenagers.",
+ "image" =>
+ "https://static01.nyt.com/images/2019/08/01/nyregion/01nypd-juveniles-promo/01nypd-juveniles-promo-videoSixteenByNineJumbo1600.jpg",
+ "image:alt" => "",
+ "site" => nil,
+ "title" =>
+ "She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database.",
+ "url" =>
+ "https://www.nytimes.com/2019/08/01/nyregion/nypd-facial-recognition-children-teenagers.html",
+ "type" => "article"
+ }
end
test "respect only first title tag on the page" do
@@ -84,14 +89,17 @@ defmodule Pleroma.Web.RichMedia.Parsers.TwitterCardTest do
File.read!("test/fixtures/margaret-corbin-grave-west-point.html") |> Floki.parse_document!()
assert TwitterCard.parse(html, %{}) ==
- {:ok,
- %{
- "site" => "@atlasobscura",
- "title" =>
- "The Missing Grave of Margaret Corbin, Revolutionary War Veteran - Atlas Obscura",
- "card" => "summary_large_image",
- "image" => image_path
- }}
+ %{
+ "site" => "@atlasobscura",
+ "title" => "The Missing Grave of Margaret Corbin, Revolutionary War Veteran",
+ "card" => "summary_large_image",
+ "image" => image_path,
+ "description" =>
+ "She's the only woman veteran honored with a monument at West Point. But where was she buried?",
+ "site_name" => "Atlas Obscura",
+ "type" => "article",
+ "url" => "http://www.atlasobscura.com/articles/margaret-corbin-grave-west-point"
+ }
end
test "takes first founded title in html head if there is html markup error" do
@@ -100,14 +108,20 @@ defmodule Pleroma.Web.RichMedia.Parsers.TwitterCardTest do
|> Floki.parse_document!()
assert TwitterCard.parse(html, %{}) ==
- {:ok,
- %{
- "site" => nil,
- "title" =>
- "She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database. - The New York Times",
- "app:id:googleplay" => "com.nytimes.android",
- "app:name:googleplay" => "NYTimes",
- "app:url:googleplay" => "nytimes://reader/id/100000006583622"
- }}
+ %{
+ "site" => nil,
+ "title" =>
+ "She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database.",
+ "app:id:googleplay" => "com.nytimes.android",
+ "app:name:googleplay" => "NYTimes",
+ "app:url:googleplay" => "nytimes://reader/id/100000006583622",
+ "description" =>
+ "With little oversight, the N.Y.P.D. has been using powerful surveillance technology on photos of children and teenagers.",
+ "image" =>
+ "https://static01.nyt.com/images/2019/08/01/nyregion/01nypd-juveniles-promo/01nypd-juveniles-promo-facebookJumbo.jpg",
+ "type" => "article",
+ "url" =>
+ "https://www.nytimes.com/2019/08/01/nyregion/nypd-facial-recognition-children-teenagers.html"
+ }
end
end