diff options
| -rw-r--r-- | lib/pleroma/web/rich_media/controllers/rich_media_controller.ex | 17 | ||||
| -rw-r--r-- | lib/pleroma/web/rich_media/parser.ex | 26 | ||||
| -rw-r--r-- | lib/pleroma/web/rich_media/parsers/ogp.ex | 30 | ||||
| -rw-r--r-- | lib/pleroma/web/router.ex | 6 | ||||
| -rw-r--r-- | mix.exs | 3 | ||||
| -rw-r--r-- | mix.lock | 2 | ||||
| -rw-r--r-- | test/fixtures/rich_media/ogp.html | 9 | ||||
| -rw-r--r-- | test/web/rich_media/controllers/rich_media_controller_test.exs | 54 | ||||
| -rw-r--r-- | test/web/rich_media/parser_test.exs | 33 | 
9 files changed, 179 insertions, 1 deletions
| diff --git a/lib/pleroma/web/rich_media/controllers/rich_media_controller.ex b/lib/pleroma/web/rich_media/controllers/rich_media_controller.ex new file mode 100644 index 000000000..91019961d --- /dev/null +++ b/lib/pleroma/web/rich_media/controllers/rich_media_controller.ex @@ -0,0 +1,17 @@ +defmodule Pleroma.Web.RichMedia.RichMediaController do +  use Pleroma.Web, :controller + +  import Pleroma.Web.ControllerHelper, only: [json_response: 3] + +  def parse(conn, %{"url" => url}) do +    case Pleroma.Web.RichMedia.Parser.parse(url) do +      {:ok, data} -> +        conn +        |> json_response(200, data) + +      {:error, msg} -> +        conn +        |> json_response(404, msg) +    end +  end +end diff --git a/lib/pleroma/web/rich_media/parser.ex b/lib/pleroma/web/rich_media/parser.ex new file mode 100644 index 000000000..477a38196 --- /dev/null +++ b/lib/pleroma/web/rich_media/parser.ex @@ -0,0 +1,26 @@ +defmodule Pleroma.Web.RichMedia.Parser do +  @parsers [Pleroma.Web.RichMedia.Parsers.OGP] + +  def parse(url) do +    {:ok, %Tesla.Env{body: html}} = Pleroma.HTTP.get(url) + +    html |> maybe_parse() |> get_parsed_data() +  end + +  defp maybe_parse(html) do +    Enum.reduce_while(@parsers, %{}, fn parser, acc -> +      case parser.parse(html, acc) do +        {:ok, data} -> {:halt, data} +        {:error, _msg} -> {:cont, acc} +      end +    end) +  end + +  defp get_parsed_data(data) when data == %{} do +    {:error, "No metadata found"} +  end + +  defp get_parsed_data(data) do +    {:ok, data} +  end +end diff --git a/lib/pleroma/web/rich_media/parsers/ogp.ex b/lib/pleroma/web/rich_media/parsers/ogp.ex new file mode 100644 index 000000000..5773a5263 --- /dev/null +++ b/lib/pleroma/web/rich_media/parsers/ogp.ex @@ -0,0 +1,30 @@ +defmodule Pleroma.Web.RichMedia.Parsers.OGP do +  def parse(html, data) do +    with elements = [_ | _] <- get_elements(html), +         ogp_data = +           Enum.reduce(elements, data, fn el, acc -> +             attributes = normalize_attributes(el) + +             Map.merge(acc, attributes) +           end) do +      {:ok, ogp_data} +    else +      _e -> {:error, "No OGP metadata found"} +    end +  end + +  defp get_elements(html) do +    html |> Floki.find("meta[property^='og:']") +  end + +  defp normalize_attributes(html_node) do +    {_tag, attributes, _children} = html_node + +    data = +      Enum.into(attributes, %{}, fn {name, value} -> +        {name, String.trim_leading(value, "og:")} +      end) + +    %{String.to_atom(data["property"]) => data["content"]} +  end +end diff --git a/lib/pleroma/web/router.ex b/lib/pleroma/web/router.ex index 1f929ee21..8df45bf4d 100644 --- a/lib/pleroma/web/router.ex +++ b/lib/pleroma/web/router.ex @@ -232,6 +232,12 @@ defmodule Pleroma.Web.Router do      put("/settings", MastodonAPIController, :put_settings)    end +  scope "/api", Pleroma.Web.RichMedia do +    pipe_through(:authenticated_api) + +    get("/rich_media/parse", RichMediaController, :parse) +  end +    scope "/api/v1", Pleroma.Web.MastodonAPI do      pipe_through(:api)      get("/instance", MastodonAPIController, :masto_instance) @@ -75,7 +75,8 @@ defmodule Pleroma.Mixfile do        {:web_push_encryption, "~> 0.2.1"},        {:swoosh, "~> 0.20"},        {:gen_smtp, "~> 0.13"}, -      {:websocket_client, git: "https://github.com/jeremyong/websocket_client.git", only: :test} +      {:websocket_client, git: "https://github.com/jeremyong/websocket_client.git", only: :test}, +      {:floki, "~> 0.20.0"}      ]    end @@ -20,9 +20,11 @@    "ex_aws_s3": {:hex, :ex_aws_s3, "2.0.1", "9e09366e77f25d3d88c5393824e613344631be8db0d1839faca49686e99b6704", [:mix], [{:ex_aws, "~> 2.0", [hex: :ex_aws, repo: "hexpm", optional: false]}, {:sweet_xml, ">= 0.0.0", [hex: :sweet_xml, repo: "hexpm", optional: true]}], "hexpm"},    "ex_doc": {:hex, :ex_doc, "0.19.1", "519bb9c19526ca51d326c060cb1778d4a9056b190086a8c6c115828eaccea6cf", [:mix], [{:earmark, "~> 1.1", [hex: :earmark, repo: "hexpm", optional: false]}, {:makeup_elixir, "~> 0.7", [hex: :makeup_elixir, repo: "hexpm", optional: false]}], "hexpm"},    "ex_machina": {:hex, :ex_machina, "2.2.0", "fec496331e04fc2db2a1a24fe317c12c0c4a50d2beb8ebb3531ed1f0d84be0ed", [:mix], [{:ecto, "~> 2.1", [hex: :ecto, repo: "hexpm", optional: true]}], "hexpm"}, +  "floki": {:hex, :floki, "0.20.4", "be42ac911fece24b4c72f3b5846774b6e61b83fe685c2fc9d62093277fb3bc86", [:mix], [{:html_entities, "~> 0.4.0", [hex: :html_entities, repo: "hexpm", optional: false]}, {:mochiweb, "~> 2.15", [hex: :mochiweb, repo: "hexpm", optional: false]}], "hexpm"},    "gen_smtp": {:hex, :gen_smtp, "0.13.0", "11f08504c4bdd831dc520b8f84a1dce5ce624474a797394e7aafd3c29f5dcd25", [:rebar3], [], "hexpm"},    "gettext": {:hex, :gettext, "0.15.0", "40a2b8ce33a80ced7727e36768499fc9286881c43ebafccae6bab731e2b2b8ce", [:mix], [], "hexpm"},    "hackney": {:hex, :hackney, "1.14.3", "b5f6f5dcc4f1fba340762738759209e21914516df6be440d85772542d4a5e412", [:rebar3], [{:certifi, "2.4.2", [hex: :certifi, repo: "hexpm", optional: false]}, {:idna, "6.0.0", [hex: :idna, repo: "hexpm", optional: false]}, {:metrics, "1.0.1", [hex: :metrics, repo: "hexpm", optional: false]}, {:mimerl, "1.0.2", [hex: :mimerl, repo: "hexpm", optional: false]}, {:ssl_verify_fun, "1.1.4", [hex: :ssl_verify_fun, repo: "hexpm", optional: false]}], "hexpm"}, +  "html_entities": {:hex, :html_entities, "0.4.0", "f2fee876858cf6aaa9db608820a3209e45a087c5177332799592142b50e89a6b", [:mix], [], "hexpm"},    "html_sanitize_ex": {:hex, :html_sanitize_ex, "1.3.0", "f005ad692b717691203f940c686208aa3d8ffd9dd4bb3699240096a51fa9564e", [:mix], [{:mochiweb, "~> 2.15", [hex: :mochiweb, repo: "hexpm", optional: false]}], "hexpm"},    "httpoison": {:hex, :httpoison, "1.2.0", "2702ed3da5fd7a8130fc34b11965c8cfa21ade2f232c00b42d96d4967c39a3a3", [:mix], [{:hackney, "~> 1.8", [hex: :hackney, repo: "hexpm", optional: false]}], "hexpm"},    "idna": {:hex, :idna, "6.0.0", "689c46cbcdf3524c44d5f3dde8001f364cd7608a99556d8fbd8239a5798d4c10", [:rebar3], [{:unicode_util_compat, "0.4.1", [hex: :unicode_util_compat, repo: "hexpm", optional: false]}], "hexpm"}, diff --git a/test/fixtures/rich_media/ogp.html b/test/fixtures/rich_media/ogp.html new file mode 100644 index 000000000..c886b5871 --- /dev/null +++ b/test/fixtures/rich_media/ogp.html @@ -0,0 +1,9 @@ +<html prefix="og: http://ogp.me/ns#"> +  <head> +  <title>The Rock (1996)</title> +    <meta property="og:title" content="The Rock" /> +    <meta property="og:type" content="video.movie" /> +    <meta property="og:url" content="http://www.imdb.com/title/tt0117500/" /> +    <meta property="og:image" content="http://ia.media-imdb.com/images/rock.jpg" /> +  </head> +</html> diff --git a/test/web/rich_media/controllers/rich_media_controller_test.exs b/test/web/rich_media/controllers/rich_media_controller_test.exs new file mode 100644 index 000000000..37c82631f --- /dev/null +++ b/test/web/rich_media/controllers/rich_media_controller_test.exs @@ -0,0 +1,54 @@ +defmodule Pleroma.Web.RichMedia.RichMediaControllerTest do +  use Pleroma.Web.ConnCase +  import Pleroma.Factory + +  setup do +    Tesla.Mock.mock(fn +      %{ +        method: :get, +        url: "http://example.com/ogp" +      } -> +        %Tesla.Env{status: 200, body: File.read!("test/fixtures/rich_media/ogp.html")} + +      %{method: :get, url: "http://example.com/empty"} -> +        %Tesla.Env{status: 200, body: "hello"} +    end) + +    :ok +  end + +  describe "GET /api/rich_media/parse" do +    setup do +      user = insert(:user) + +      [user: user] +    end + +    test "returns 404 if not metadata found", %{user: user} do +      build_conn() +      |> with_credentials(user.nickname, "test") +      |> get("/api/rich_media/parse", %{"url" => "http://example.com/empty"}) +      |> json_response(404) +    end + +    test "returns OGP metadata", %{user: user} do +      response = +        build_conn() +        |> with_credentials(user.nickname, "test") +        |> get("/api/rich_media/parse", %{"url" => "http://example.com/ogp"}) +        |> json_response(200) + +      assert response == %{ +               "image" => "http://ia.media-imdb.com/images/rock.jpg", +               "title" => "The Rock", +               "type" => "video.movie", +               "url" => "http://www.imdb.com/title/tt0117500/" +             } +    end +  end + +  defp with_credentials(conn, username, password) do +    header_content = "Basic " <> Base.encode64("#{username}:#{password}") +    put_req_header(conn, "authorization", header_content) +  end +end diff --git a/test/web/rich_media/parser_test.exs b/test/web/rich_media/parser_test.exs new file mode 100644 index 000000000..caf81e9fa --- /dev/null +++ b/test/web/rich_media/parser_test.exs @@ -0,0 +1,33 @@ +defmodule Pleroma.Web.RichMedia.ParserTest do +  use ExUnit.Case, async: true + +  setup do +    Tesla.Mock.mock(fn +      %{ +        method: :get, +        url: "http://example.com/ogp" +      } -> +        %Tesla.Env{status: 200, body: File.read!("test/fixtures/rich_media/ogp.html")} + +      %{method: :get, url: "http://example.com/empty"} -> +        %Tesla.Env{status: 200, body: "hello"} +    end) + +    :ok +  end + +  test "returns error when no metadata present" do +    assert {:error, _} = Pleroma.Web.RichMedia.Parser.parse("http://example.com/empty") +  end + +  test "parses ogp" do +    assert Pleroma.Web.RichMedia.Parser.parse("http://example.com/ogp") == +             {:ok, +              %{ +                image: "http://ia.media-imdb.com/images/rock.jpg", +                title: "The Rock", +                type: "video.movie", +                url: "http://www.imdb.com/title/tt0117500/" +              }} +  end +end | 
