From 852495fdb08298168c90fbe540b3b79d69c6793e Mon Sep 17 00:00:00 2001 From: Helmut Merz Date: Fri, 23 Jun 2023 11:31:37 +0200 Subject: [PATCH] provide SQL scripts for populating search index tables from index.ejsl --- sql/load.sql | 17 +++++++++++++++++ sql/tables.sql | 19 +++++++++++++++++++ 2 files changed, 36 insertions(+) create mode 100644 sql/load.sql create mode 100644 sql/tables.sql diff --git a/sql/load.sql b/sql/load.sql new file mode 100644 index 0000000..925385f --- /dev/null +++ b/sql/load.sql @@ -0,0 +1,17 @@ +-- load.sql + +delete from hugo_input; + +\copy hugo_input (data) from 'public/index.ejsl' + +insert into hugo_text (site, url, title, content) + select data ->> 'site', data ->> 'url', data ->> 'title', data ->> 'content' + from hugo_input; +-- TODO: fill more columns +-- TODO: on conflict update ... + +-- sample query: +-- select to_tsquery('german', 'prolog') as q \gset +-- select url, title, +-- ts_headline(content, :q, 'MaxFragments=3, MaxWords=6, MinWords=3') +-- from hugo_text where :q @@ content_tsv; diff --git a/sql/tables.sql b/sql/tables.sql new file mode 100644 index 0000000..8567d99 --- /dev/null +++ b/sql/tables.sql @@ -0,0 +1,19 @@ +-- tables.sql + +create table hugo_input (data jsonb); + +create table hugo_text ( + site text, + url text, + title text, + content text, + title_tsv tsvector generated always as + (to_tsvector('german', coalesce(title, ''))) stored, + content_tsv tsvector generated always as + (to_tsvector('german', coalesce(content, ''))) stored, + primary key (site, url) +); + +create index title_tsv_idx on hugo_text using gin (title_tsv); +create index content_tsv_idx on hugo_text using gin (content_tsv); +