diff --git a/sql/load.sql b/sql/load.sql new file mode 100644 index 0000000..925385f --- /dev/null +++ b/sql/load.sql @@ -0,0 +1,17 @@ +-- load.sql + +delete from hugo_input; + +\copy hugo_input (data) from 'public/index.ejsl' + +insert into hugo_text (site, url, title, content) + select data ->> 'site', data ->> 'url', data ->> 'title', data ->> 'content' + from hugo_input; +-- TODO: fill more columns +-- TODO: on conflict update ... + +-- sample query: +-- select to_tsquery('german', 'prolog') as q \gset +-- select url, title, +-- ts_headline(content, :q, 'MaxFragments=3, MaxWords=6, MinWords=3') +-- from hugo_text where :q @@ content_tsv; diff --git a/sql/tables.sql b/sql/tables.sql new file mode 100644 index 0000000..8567d99 --- /dev/null +++ b/sql/tables.sql @@ -0,0 +1,19 @@ +-- tables.sql + +create table hugo_input (data jsonb); + +create table hugo_text ( + site text, + url text, + title text, + content text, + title_tsv tsvector generated always as + (to_tsvector('german', coalesce(title, ''))) stored, + content_tsv tsvector generated always as + (to_tsvector('german', coalesce(content, ''))) stored, + primary key (site, url) +); + +create index title_tsv_idx on hugo_text using gin (title_tsv); +create index content_tsv_idx on hugo_text using gin (content_tsv); +