pgweb cfg

This is an example configuration

Short description:

  1. create new default configuration 'pg'. To be default' it should match server's locale !!!
  2. register dictionaries (synonym pg-specific dict, english ispell. Check paths !
  3. Map tokens and dictionaries for configuration 'pg'
  4. Test configuration

Usage:

psql tt< pgweb.sql
-- Creates configuration 'pg' - default, should match server's locale !!!
-- Change 'ru_RU.UTF-8'

begin;

-- create special (default) configuration 'pg'
update pg_ts_cfg set locale=NULL where locale = 'ru_RU.UTF-8';
insert into pg_ts_cfg values('pg','default','ru_RU.UTF8');

-- register 'pg_dict' dictionary using synonym template
-- postgres    pg
-- pgsql       pg
-- postgresql  pg
insert into pg_ts_dict
(select 'pg_dict',dict_init,
'/usr/local/pgsql-dev/share/contrib/pg_dict.txt',
dict_lexize, 'pg-specific dictionary'
from pg_ts_dict
where dict_name='synonym'
);

-- register ispell dictionary, check paths and stop words
-- I used iconv for english files, since there are some cyrillic stuff
insert into pg_ts_dict
(SELECT 'en_ispell', dict_init,
'DictFile="/usr/local/share/dicts/ispell/utf8/english-utf8.dict",'
 'AffFile="/usr/local/share/dicts/ispell/utf8/english-utf8.aff",'
 'StopFile="/usr/local/share/dicts/ispell/utf8/english-utf8.stop"',
 dict_lexize
 FROM pg_ts_dict
 WHERE dict_name = 'ispell_template'
 );

 -- use the same stop-word list as 'en_ispell' dictionary
UPDATE pg_ts_dict set dict_initoption='/usr/local/share/dicts/english.stop'
where dict_name='en_stem';

-- default token<->dicts mappings
insert into pg_ts_cfgmap  select 'pg', tok_alias, dict_name from public.pg_ts_cf
gmap where ts_name='default';

-- modify mappings for latin words for configuration 'pg'
update pg_ts_cfgmap set dict_name = '{pg_dict,en_ispell,en_stem}'
where tok_alias in ( 'lword', 'lhword', 'lpart_hword' )
and ts_name = 'pg';

-- we won't index/search some tokens
update pg_ts_cfgmap set dict_name = NULL
where tok_alias in ('email', 'url', 'sfloat', 'uri', 'float')
and ts_name = 'pg';

-- testing

select * from ts_debug('
PostgreSQL, the highly scalable, SQL compliant, open source object-relational
database management system, is now undergoing beta testing of the next
version of our software: PostgreSQL 8.2.
');

end;

You shoud get something like:

 ts_name |  tok_type   |          description          |       token       |          dict_name          |              tsvector
---------+-------------+-------------------------------+-------------------+-----------------------------+------------------------------------
 pg      | lword       | Latin word                    | PostgreSQL        | {pg_dict,en_ispell,en_stem} | 'pg'
 pg      | lword       | Latin word                    | the               | {pg_dict,en_ispell,en_stem} |
 pg      | lword       | Latin word                    | highly            | {pg_dict,en_ispell,en_stem} | 'high'
 pg      | lword       | Latin word                    | scalable          | {pg_dict,en_ispell,en_stem} | 'scalable'
 pg      | lword       | Latin word                    | SQL               | {pg_dict,en_ispell,en_stem} | 'sql'
 pg      | lword       | Latin word                    | compliant         | {pg_dict,en_ispell,en_stem} | 'compliant'
 pg      | lword       | Latin word                    | open              | {pg_dict,en_ispell,en_stem} | 'open'
 pg      | lword       | Latin word                    | source            | {pg_dict,en_ispell,en_stem} | 'source'
 pg      | lhword      | Latin hyphenated word         | object-relational | {pg_dict,en_ispell,en_stem} | 'object' 'object-rel' 'relational'
 pg      | lpart_hword | Latin part of hyphenated word | object            | {pg_dict,en_ispell,en_stem} | 'object'
 pg      | lpart_hword | Latin part of hyphenated word | relational        | {pg_dict,en_ispell,en_stem} | 'relational'
 pg      | lword       | Latin word                    | database          | {pg_dict,en_ispell,en_stem} | 'database'
 pg      | lword       | Latin word                    | management        | {pg_dict,en_ispell,en_stem} | 'management'
 pg      | lword       | Latin word                    | system            | {pg_dict,en_ispell,en_stem} | 'system'
 pg      | lword       | Latin word                    | is                | {pg_dict,en_ispell,en_stem} |
 pg      | lword       | Latin word                    | now               | {pg_dict,en_ispell,en_stem} | 'now'
 pg      | lword       | Latin word                    | undergoing        | {pg_dict,en_ispell,en_stem} | 'undergo'
 pg      | lword       | Latin word                    | beta              | {pg_dict,en_ispell,en_stem} | 'beta'
 pg      | lword       | Latin word                    | testing           | {pg_dict,en_ispell,en_stem} | 'test'
 pg      | lword       | Latin word                    | of                | {pg_dict,en_ispell,en_stem} |
 pg      | lword       | Latin word                    | the               | {pg_dict,en_ispell,en_stem} |
 pg      | lword       | Latin word                    | next              | {pg_dict,en_ispell,en_stem} | 'next'
 pg      | lword       | Latin word                    | version           | {pg_dict,en_ispell,en_stem} | 'verse'
 pg      | lword       | Latin word                    | of                | {pg_dict,en_ispell,en_stem} |
 pg      | lword       | Latin word                    | our               | {pg_dict,en_ispell,en_stem} |
 pg      | lword       | Latin word                    | software          | {pg_dict,en_ispell,en_stem} | 'software'
 pg      | lword       | Latin word                    | PostgreSQL        | {pg_dict,en_ispell,en_stem} | 'pg'
 pg      | float       | Decimal notation              | 8.2               |                             |
(28 rows)