From dc45ec62c2f5dfcc895854dfbddf6fe9621d3072 Mon Sep 17 00:00:00 2001 From: Ivan Tashkinov Date: Mon, 14 Jan 2019 20:04:45 +0300 Subject: [#477] User search improvements: tsquery search with field weights, friends & followers boosting. --- lib/pleroma/user.ex | 75 +++++++++++++++++++--- .../web/mastodon_api/mastodon_api_controller.ex | 6 +- .../web/twitter_api/twitter_api_controller.ex | 2 +- 3 files changed, 70 insertions(+), 13 deletions(-) (limited to 'lib') diff --git a/lib/pleroma/user.ex b/lib/pleroma/user.ex index 681280539..52638b446 100644 --- a/lib/pleroma/user.ex +++ b/lib/pleroma/user.ex @@ -35,7 +35,7 @@ defmodule Pleroma.User do field(:avatar, :map) field(:local, :boolean, default: true) field(:follower_address, :string) - field(:search_distance, :float, virtual: true) + field(:search_rank, :float, virtual: true) field(:tags, {:array, :string}, default: []) field(:last_refreshed_at, :naive_datetime) has_many(:notifications, Notification) @@ -511,6 +511,12 @@ defmodule Pleroma.User do {:ok, Repo.all(q)} end + def get_followers_ids(user, page \\ nil) do + q = get_followers_query(user, page) + + Repo.all(from(u in q, select: u.id)) + end + def get_friends_query(%User{id: id, following: following}, nil) do from( u in User, @@ -535,6 +541,12 @@ defmodule Pleroma.User do {:ok, Repo.all(q)} end + def get_friends_ids(user, page \\ nil) do + q = get_friends_query(user, page) + + Repo.all(from(u in q, select: u.id)) + end + def get_follow_requests_query(%User{} = user) do from( a in Activity, @@ -666,7 +678,7 @@ defmodule Pleroma.User do Repo.all(query) end - def search(query, resolve \\ false) do + def search(query, resolve \\ false, for_user \\ nil) do # strip the beginning @ off if there is a query query = String.trim_leading(query, "@") @@ -674,16 +686,28 @@ defmodule Pleroma.User do User.get_or_fetch_by_nickname(query) end + processed_query = + query + |> String.replace(~r/\W+/, " ") + |> String.trim() + |> String.split() + |> Enum.map(&(&1 <> ":*")) + |> Enum.join(" | ") + inner = from( u in User, select_merge: %{ - search_distance: + search_rank: fragment( - "? <-> (? || coalesce(?, ''))", - ^query, - u.nickname, - u.name + """ + ts_rank_cd( + setweight(to_tsvector('simple', regexp_replace(nickname, '\\W', ' ', 'g')), 'A') || + setweight(to_tsvector('simple', regexp_replace(coalesce(name, ''), '\\W', ' ', 'g')), 'B'), + to_tsquery('simple', ?) + ) + """, + ^processed_query ) }, where: not is_nil(u.nickname) @@ -692,11 +716,44 @@ defmodule Pleroma.User do q = from( s in subquery(inner), - order_by: s.search_distance, + order_by: [desc: s.search_rank], limit: 20 ) - Repo.all(q) + results = + q + |> Repo.all() + |> Enum.filter(&(&1.search_rank > 0)) + + weighted_results = + if for_user do + friends_ids = get_friends_ids(for_user) + followers_ids = get_followers_ids(for_user) + + Enum.map( + results, + fn u -> + search_rank_coef = + cond do + u.id in friends_ids -> + 1.2 + + u.id in followers_ids -> + 1.1 + + true -> + 1 + end + + Map.put(u, :search_rank, u.search_rank * search_rank_coef) + end + ) + |> Enum.sort_by(&(-&1.search_rank)) + else + results + end + + weighted_results end def blocks_import(%User{} = blocker, blocked_identifiers) when is_list(blocked_identifiers) do diff --git a/lib/pleroma/web/mastodon_api/mastodon_api_controller.ex b/lib/pleroma/web/mastodon_api/mastodon_api_controller.ex index a8fe9d708..54367f586 100644 --- a/lib/pleroma/web/mastodon_api/mastodon_api_controller.ex +++ b/lib/pleroma/web/mastodon_api/mastodon_api_controller.ex @@ -772,7 +772,7 @@ defmodule Pleroma.Web.MastodonAPI.MastodonAPIController do end def search2(%{assigns: %{user: user}} = conn, %{"q" => query} = params) do - accounts = User.search(query, params["resolve"] == "true") + accounts = User.search(query, params["resolve"] == "true", user) statuses = status_search(user, query) @@ -796,7 +796,7 @@ defmodule Pleroma.Web.MastodonAPI.MastodonAPIController do end def search(%{assigns: %{user: user}} = conn, %{"q" => query} = params) do - accounts = User.search(query, params["resolve"] == "true") + accounts = User.search(query, params["resolve"] == "true", user) statuses = status_search(user, query) @@ -817,7 +817,7 @@ defmodule Pleroma.Web.MastodonAPI.MastodonAPIController do end def account_search(%{assigns: %{user: user}} = conn, %{"q" => query} = params) do - accounts = User.search(query, params["resolve"] == "true") + accounts = User.search(query, params["resolve"] == "true", user) res = AccountView.render("accounts.json", users: accounts, for: user, as: :user) diff --git a/lib/pleroma/web/twitter_api/twitter_api_controller.ex b/lib/pleroma/web/twitter_api/twitter_api_controller.ex index 1c728166c..ede079963 100644 --- a/lib/pleroma/web/twitter_api/twitter_api_controller.ex +++ b/lib/pleroma/web/twitter_api/twitter_api_controller.ex @@ -675,7 +675,7 @@ defmodule Pleroma.Web.TwitterAPI.Controller do end def search_user(%{assigns: %{user: user}} = conn, %{"query" => query}) do - users = User.search(query, true) + users = User.search(query, true, user) conn |> put_view(UserView) -- cgit v1.2.3 From 5b8f9ff8c14b5992e3db7a0c890ca5539e6a0086 Mon Sep 17 00:00:00 2001 From: Ivan Tashkinov Date: Tue, 15 Jan 2019 13:05:25 +0300 Subject: [#477] User search tests. Normalized search rank in User.search. --- lib/pleroma/user.ex | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/pleroma/user.ex b/lib/pleroma/user.ex index 52638b446..2488697bb 100644 --- a/lib/pleroma/user.ex +++ b/lib/pleroma/user.ex @@ -704,7 +704,8 @@ defmodule Pleroma.User do ts_rank_cd( setweight(to_tsvector('simple', regexp_replace(nickname, '\\W', ' ', 'g')), 'A') || setweight(to_tsvector('simple', regexp_replace(coalesce(name, ''), '\\W', ' ', 'g')), 'B'), - to_tsquery('simple', ?) + to_tsquery('simple', ?), + 32 ) """, ^processed_query -- cgit v1.2.3 From 0bc6d30f7dfe53be588329e48f1255b5eef18a2a Mon Sep 17 00:00:00 2001 From: Ivan Tashkinov Date: Wed, 16 Jan 2019 10:44:32 +0300 Subject: [#477] Minor refactoring (user search query). --- lib/pleroma/user.ex | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/pleroma/user.ex b/lib/pleroma/user.ex index 2488697bb..8ae36416a 100644 --- a/lib/pleroma/user.ex +++ b/lib/pleroma/user.ex @@ -702,12 +702,14 @@ defmodule Pleroma.User do fragment( """ ts_rank_cd( - setweight(to_tsvector('simple', regexp_replace(nickname, '\\W', ' ', 'g')), 'A') || - setweight(to_tsvector('simple', regexp_replace(coalesce(name, ''), '\\W', ' ', 'g')), 'B'), + setweight(to_tsvector('simple', regexp_replace(?, '\\W', ' ', 'g')), 'A') || + setweight(to_tsvector('simple', regexp_replace(coalesce(?, ''), '\\W', ' ', 'g')), 'B'), to_tsquery('simple', ?), 32 ) """, + u.nickname, + u.name, ^processed_query ) }, -- cgit v1.2.3 From ed8f55ab8eb292903cec8f7699aa6775cc304458 Mon Sep 17 00:00:00 2001 From: Ivan Tashkinov Date: Fri, 18 Jan 2019 10:35:45 +0300 Subject: [#477] User: FTS and trigram search results mixing (to handle misspelled requests). --- lib/pleroma/user.ex | 136 ++++++++++++++++++++++++++++++---------------------- 1 file changed, 79 insertions(+), 57 deletions(-) (limited to 'lib') diff --git a/lib/pleroma/user.ex b/lib/pleroma/user.ex index 8ae36416a..1d0bf1edf 100644 --- a/lib/pleroma/user.ex +++ b/lib/pleroma/user.ex @@ -679,48 +679,24 @@ defmodule Pleroma.User do end def search(query, resolve \\ false, for_user \\ nil) do - # strip the beginning @ off if there is a query + # Strip the beginning @ off if there is a query query = String.trim_leading(query, "@") - if resolve do - User.get_or_fetch_by_nickname(query) - end + if resolve, do: User.get_or_fetch_by_nickname(query) - processed_query = - query - |> String.replace(~r/\W+/, " ") - |> String.trim() - |> String.split() - |> Enum.map(&(&1 <> ":*")) - |> Enum.join(" | ") + fts_results = do_search(fts_search_subquery(query), for_user) - inner = - from( - u in User, - select_merge: %{ - search_rank: - fragment( - """ - ts_rank_cd( - setweight(to_tsvector('simple', regexp_replace(?, '\\W', ' ', 'g')), 'A') || - setweight(to_tsvector('simple', regexp_replace(coalesce(?, ''), '\\W', ' ', 'g')), 'B'), - to_tsquery('simple', ?), - 32 - ) - """, - u.nickname, - u.name, - ^processed_query - ) - }, - where: not is_nil(u.nickname) - ) + trigram_results = do_search(trigram_search_subquery(query), for_user) + + Enum.uniq_by(fts_results ++ trigram_results, & &1.id) + end + defp do_search(subquery, for_user, options \\ []) do q = from( - s in subquery(inner), + s in subquery(subquery), order_by: [desc: s.search_rank], - limit: 20 + limit: ^(options[:limit] || 20) ) results = @@ -728,35 +704,81 @@ defmodule Pleroma.User do |> Repo.all() |> Enum.filter(&(&1.search_rank > 0)) - weighted_results = - if for_user do - friends_ids = get_friends_ids(for_user) - followers_ids = get_followers_ids(for_user) + boost_search_results(results, for_user) + end - Enum.map( - results, - fn u -> - search_rank_coef = - cond do - u.id in friends_ids -> - 1.2 + defp fts_search_subquery(query) do + processed_query = + query + |> String.replace(~r/\W+/, " ") + |> String.trim() + |> String.split() + |> Enum.map(&(&1 <> ":*")) + |> Enum.join(" | ") + + from( + u in User, + select_merge: %{ + search_rank: + fragment( + """ + ts_rank_cd( + setweight(to_tsvector('simple', regexp_replace(?, '\\W', ' ', 'g')), 'A') || + setweight(to_tsvector('simple', regexp_replace(coalesce(?, ''), '\\W', ' ', 'g')), 'B'), + to_tsquery('simple', ?), + 32 + ) + """, + u.nickname, + u.name, + ^processed_query + ) + }, + where: not is_nil(u.nickname) + ) + end + + defp trigram_search_subquery(query) do + from( + u in User, + select_merge: %{ + search_rank: + fragment( + "similarity(?, ? || ' ' || coalesce(?, ''))", + ^query, + u.nickname, + u.name + ) + }, + where: not is_nil(u.nickname) + ) + end - u.id in followers_ids -> - 1.1 + defp boost_search_results(results, nil), do: results - true -> - 1 - end + defp boost_search_results(results, for_user) do + friends_ids = get_friends_ids(for_user) + followers_ids = get_followers_ids(for_user) - Map.put(u, :search_rank, u.search_rank * search_rank_coef) + Enum.map( + results, + fn u -> + search_rank_coef = + cond do + u.id in friends_ids -> + 1.2 + + u.id in followers_ids -> + 1.1 + + true -> + 1 end - ) - |> Enum.sort_by(&(-&1.search_rank)) - else - results - end - weighted_results + Map.put(u, :search_rank, u.search_rank * search_rank_coef) + end + ) + |> Enum.sort_by(&(-&1.search_rank)) end def blocks_import(%User{} = blocker, blocked_identifiers) when is_list(blocked_identifiers) do -- cgit v1.2.3 From 79e44042bc08cf69274008e408cac912ae693afe Mon Sep 17 00:00:00 2001 From: Ivan Tashkinov Date: Fri, 18 Jan 2019 10:57:42 +0300 Subject: [#477] User trigram index adjustment. --- lib/pleroma/user.ex | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/pleroma/user.ex b/lib/pleroma/user.ex index 1d0bf1edf..eb4218ebe 100644 --- a/lib/pleroma/user.ex +++ b/lib/pleroma/user.ex @@ -744,7 +744,7 @@ defmodule Pleroma.User do select_merge: %{ search_rank: fragment( - "similarity(?, ? || ' ' || coalesce(?, ''))", + "similarity(?, trim(? || ' ' || coalesce(?, '')))", ^query, u.nickname, u.name -- cgit v1.2.3 From b108aeee082949e2e534f8bc406fdacb8924803d Mon Sep 17 00:00:00 2001 From: lain Date: Sun, 20 Jan 2019 00:31:17 +0100 Subject: Make use of the indices. Indices in postgresql rely on operators, so they won't be used if you use only functions. --- lib/pleroma/user.ex | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/pleroma/user.ex b/lib/pleroma/user.ex index eb4218ebe..87815e11c 100644 --- a/lib/pleroma/user.ex +++ b/lib/pleroma/user.ex @@ -734,7 +734,16 @@ defmodule Pleroma.User do ^processed_query ) }, - where: not is_nil(u.nickname) + where: + fragment( + """ + (setweight(to_tsvector('simple', regexp_replace(?, '\\W', ' ', 'g')), 'A') || + setweight(to_tsvector('simple', regexp_replace(coalesce(?, ''), '\\W', ' ', 'g')), 'B')) @@ to_tsquery('simple', ?) + """, + u.nickname, + u.name, + ^processed_query + ) ) end @@ -750,7 +759,7 @@ defmodule Pleroma.User do u.name ) }, - where: not is_nil(u.nickname) + where: fragment("trim(? || ' ' || coalesce(?, '')) % ?", u.nickname, u.name, ^query) ) end -- cgit v1.2.3 From 5834b08fe77250d1dad0f2f6cd148f2fd8f85c09 Mon Sep 17 00:00:00 2001 From: lain Date: Sun, 20 Jan 2019 10:57:49 +0100 Subject: Set custom similarity limit. --- lib/pleroma/user.ex | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/pleroma/user.ex b/lib/pleroma/user.ex index 87815e11c..955808e28 100644 --- a/lib/pleroma/user.ex +++ b/lib/pleroma/user.ex @@ -686,7 +686,11 @@ defmodule Pleroma.User do fts_results = do_search(fts_search_subquery(query), for_user) - trigram_results = do_search(trigram_search_subquery(query), for_user) + {:ok, trigram_results} = + Repo.transaction(fn -> + Ecto.Adapters.SQL.query(Repo, "select set_limit(0.25)", []) + do_search(trigram_search_subquery(query), for_user) + end) Enum.uniq_by(fts_results ++ trigram_results, & &1.id) end -- cgit v1.2.3