From 2aab4e03c3a2867abd4555dc776eebc8b0dba176 Mon Sep 17 00:00:00 2001 From: Maxim Filippov Date: Tue, 1 Jan 2019 23:26:40 +0300 Subject: Add OGP parser --- lib/pleroma/web/rich_media/data.ex | 3 +++ lib/pleroma/web/rich_media/parser.ex | 14 ++++++++++++++ lib/pleroma/web/rich_media/parsers/ogp.ex | 30 ++++++++++++++++++++++++++++++ 3 files changed, 47 insertions(+) create mode 100644 lib/pleroma/web/rich_media/data.ex create mode 100644 lib/pleroma/web/rich_media/parser.ex create mode 100644 lib/pleroma/web/rich_media/parsers/ogp.ex (limited to 'lib') diff --git a/lib/pleroma/web/rich_media/data.ex b/lib/pleroma/web/rich_media/data.ex new file mode 100644 index 000000000..403d1d341 --- /dev/null +++ b/lib/pleroma/web/rich_media/data.ex @@ -0,0 +1,3 @@ +defmodule Pleroma.Web.RichMedia.Data do + defstruct [:title, :type, :image, :url, :description] +end diff --git a/lib/pleroma/web/rich_media/parser.ex b/lib/pleroma/web/rich_media/parser.ex new file mode 100644 index 000000000..d9c1684d5 --- /dev/null +++ b/lib/pleroma/web/rich_media/parser.ex @@ -0,0 +1,14 @@ +defmodule Pleroma.Web.RichMedia.Parser do + @parsers [Pleroma.Web.RichMedia.Parsers.OGP] + + def parse(url) do + {:ok, %Tesla.Env{body: html}} = Pleroma.HTTP.get(url) + + Enum.reduce_while(@parsers, %Pleroma.Web.RichMedia.Data{}, fn parser, acc -> + case parser.parse(html, acc) do + {:ok, data} -> {:halt, data} + {:error, _msg} -> {:cont, acc} + end + end) + end +end diff --git a/lib/pleroma/web/rich_media/parsers/ogp.ex b/lib/pleroma/web/rich_media/parsers/ogp.ex new file mode 100644 index 000000000..75084c7ee --- /dev/null +++ b/lib/pleroma/web/rich_media/parsers/ogp.ex @@ -0,0 +1,30 @@ +defmodule Pleroma.Web.RichMedia.Parsers.OGP do + def parse(html, data) do + with elements = [_ | _] <- get_elements(html), + ogp_data = + Enum.reduce(elements, data, fn el, acc -> + attributes = normalize_attributes(el) + + Map.merge(acc, attributes) + end) do + {:ok, ogp_data} + else + _e -> {:error, "No OGP metadata found"} + end + end + + defp get_elements(html) do + html |> Floki.find("meta[property^='og:']") + end + + defp normalize_attributes(tuple) do + {_tag, attributes, _children} = tuple + + data = + Enum.into(attributes, %{}, fn {name, value} -> + {name, String.trim_leading(value, "og:")} + end) + + %{String.to_atom(data["property"]) => data["content"]} + end +end -- cgit v1.2.3 From 917d48d09bad573260bc816310ee2c75d4db84a8 Mon Sep 17 00:00:00 2001 From: Maxim Filippov Date: Tue, 1 Jan 2019 23:29:47 +0300 Subject: Better variable name --- lib/pleroma/web/rich_media/parsers/ogp.ex | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/pleroma/web/rich_media/parsers/ogp.ex b/lib/pleroma/web/rich_media/parsers/ogp.ex index 75084c7ee..5773a5263 100644 --- a/lib/pleroma/web/rich_media/parsers/ogp.ex +++ b/lib/pleroma/web/rich_media/parsers/ogp.ex @@ -17,8 +17,8 @@ defmodule Pleroma.Web.RichMedia.Parsers.OGP do html |> Floki.find("meta[property^='og:']") end - defp normalize_attributes(tuple) do - {_tag, attributes, _children} = tuple + defp normalize_attributes(html_node) do + {_tag, attributes, _children} = html_node data = Enum.into(attributes, %{}, fn {name, value} -> -- cgit v1.2.3 From 48e81d3d40d334bccb8438c61ab6b307ddb1392f Mon Sep 17 00:00:00 2001 From: Maxim Filippov Date: Wed, 2 Jan 2019 17:02:50 +0300 Subject: Add RichMediaController and tests --- .../web/rich_media/controllers/rich_media_controller.ex | 17 +++++++++++++++++ lib/pleroma/web/rich_media/data.ex | 3 --- lib/pleroma/web/rich_media/parser.ex | 14 +++++++++++++- lib/pleroma/web/router.ex | 6 ++++++ 4 files changed, 36 insertions(+), 4 deletions(-) create mode 100644 lib/pleroma/web/rich_media/controllers/rich_media_controller.ex delete mode 100644 lib/pleroma/web/rich_media/data.ex (limited to 'lib') diff --git a/lib/pleroma/web/rich_media/controllers/rich_media_controller.ex b/lib/pleroma/web/rich_media/controllers/rich_media_controller.ex new file mode 100644 index 000000000..91019961d --- /dev/null +++ b/lib/pleroma/web/rich_media/controllers/rich_media_controller.ex @@ -0,0 +1,17 @@ +defmodule Pleroma.Web.RichMedia.RichMediaController do + use Pleroma.Web, :controller + + import Pleroma.Web.ControllerHelper, only: [json_response: 3] + + def parse(conn, %{"url" => url}) do + case Pleroma.Web.RichMedia.Parser.parse(url) do + {:ok, data} -> + conn + |> json_response(200, data) + + {:error, msg} -> + conn + |> json_response(404, msg) + end + end +end diff --git a/lib/pleroma/web/rich_media/data.ex b/lib/pleroma/web/rich_media/data.ex deleted file mode 100644 index 403d1d341..000000000 --- a/lib/pleroma/web/rich_media/data.ex +++ /dev/null @@ -1,3 +0,0 @@ -defmodule Pleroma.Web.RichMedia.Data do - defstruct [:title, :type, :image, :url, :description] -end diff --git a/lib/pleroma/web/rich_media/parser.ex b/lib/pleroma/web/rich_media/parser.ex index d9c1684d5..477a38196 100644 --- a/lib/pleroma/web/rich_media/parser.ex +++ b/lib/pleroma/web/rich_media/parser.ex @@ -4,11 +4,23 @@ defmodule Pleroma.Web.RichMedia.Parser do def parse(url) do {:ok, %Tesla.Env{body: html}} = Pleroma.HTTP.get(url) - Enum.reduce_while(@parsers, %Pleroma.Web.RichMedia.Data{}, fn parser, acc -> + html |> maybe_parse() |> get_parsed_data() + end + + defp maybe_parse(html) do + Enum.reduce_while(@parsers, %{}, fn parser, acc -> case parser.parse(html, acc) do {:ok, data} -> {:halt, data} {:error, _msg} -> {:cont, acc} end end) end + + defp get_parsed_data(data) when data == %{} do + {:error, "No metadata found"} + end + + defp get_parsed_data(data) do + {:ok, data} + end end diff --git a/lib/pleroma/web/router.ex b/lib/pleroma/web/router.ex index 1f929ee21..8df45bf4d 100644 --- a/lib/pleroma/web/router.ex +++ b/lib/pleroma/web/router.ex @@ -232,6 +232,12 @@ defmodule Pleroma.Web.Router do put("/settings", MastodonAPIController, :put_settings) end + scope "/api", Pleroma.Web.RichMedia do + pipe_through(:authenticated_api) + + get("/rich_media/parse", RichMediaController, :parse) + end + scope "/api/v1", Pleroma.Web.MastodonAPI do pipe_through(:api) get("/instance", MastodonAPIController, :masto_instance) -- cgit v1.2.3