utf8/plugins/sphinx/config.proto


7	7
8	8	PLUGINS += sphinx
9	9
10		SPHINX_HOST = localhost
	10	SPHINX_HOST = 127.0.0.1
11	11	SPHINX_PORT = 9306
	12	SPHINX_DATA = /path/to/sphinx/database/
	13	SPHINX_TABLE = indextable
	14	SPHINX_TABLE_STEMMED = indextablestemmed
12	15
13		REWRITE += SPHINX_HOST SPHINX_PORT
	16	REWRITE += SPHINX_HOST SPHINX_PORT SPHINX_DATA SPHINX_TABLE SPHINX_TABLE_STEMMED

utf8/plugins/sphinx/etc/sphinx.conf


	1	#
	2	# Sphinx configuration file sample
	3	#
	4	# WARNING! While this sample file mentions all available options,
	5	# it contains (very) short helper descriptions only. Please refer to
	6	# doc/sphinx.html for details.
	7	#
	8
	9	#############################################################################
	10	## data source definition
	11	#############################################################################
	12
	13	source zvukiru
	14	{
	15	# data source type. mandatory, no default value
	16	# known types are mysql, pgsql, mssql, xmlpipe, xmlpipe2, odbc
	17	type = pgsql
	18
	19	#####################################################################
	20	## SQL settings (for 'mysql' and 'pgsql' types)
	21	#####################################################################
	22
	23	# some straightforward parameters for SQL source types
	24	sql_host = localhost
	25	sql_user = zvuki
	26	sql_pass = sarUchOov
	27	sql_db = zvukirutf
	28	sql_port = 5432 # optional, default is 3306
	29
	30	# UNIX socket name
	31	# optional, default is empty (reuse client library defaults)
	32	# usually '/var/lib/mysql/mysql.sock' on Linux
	33	# usually '/tmp/mysql.sock' on FreeBSD
	34	#
	35	# sql_sock = /tmp/mysql.sock
	36
	37
	38	# MySQL specific client connection flags
	39	# optional, default is 0
	40	#
	41	# mysql_connect_flags = 32 # enable compression
	42
	43	# MySQL specific SSL certificate settings
	44	# optional, defaults are empty
	45	#
	46	# mysql_ssl_cert = /etc/ssl/client-cert.pem
	47	# mysql_ssl_key = /etc/ssl/client-key.pem
	48	# mysql_ssl_ca = /etc/ssl/cacert.pem
	49
	50	# MS SQL specific Windows authentication mode flag
	51	# MUST be in sync with charset_type index-level setting
	52	# optional, default is 0
	53	#
	54	# mssql_winauth = 1 # use currently logged on user credentials
	55
	56
	57	# ODBC specific DSN (data source name)
	58	# mandatory for odbc source type, no default value
	59	#
	60	# odbc_dsn = DBQ=C:\data;DefaultDir=C:\data;Driver={Microsoft Text Driver (.txt; .csv)};
	61	# sql_query = SELECT id, data FROM documents.csv
	62
	63
	64	# ODBC and MS SQL specific, per-column buffer sizes
	65	# optional, default is auto-detect
	66	#
	67	# sql_column_buffers = content=12M, comments=1M
	68
	69
	70	# pre-query, executed before the main fetch query
	71	# multi-value, optional, default is empty list of queries
	72	#
	73	# sql_query_pre = SET NAMES utf8
	74	# sql_query_pre = SET SESSION query_cache_type=OFF
	75
	76
	77	# main document fetch query
	78	# mandatory, integer document ID field MUST be the first selected column
	79	sql_query = \
	80	SELECT id, object_id, object_class, extract(epoch from date_trunc('seconds', mtime)) AS last_edited, is_deleted, name as title, search as content \
	81	FROM search
	82
	83
	84	# joined/payload field fetch query
	85	# joined fields let you avoid (slow) JOIN and GROUP_CONCAT
	86	# payload fields let you attach custom per-keyword values (eg. for ranking)
	87	#
	88	# syntax is FIELD-NAME 'from' ( 'query' \| 'payload-query' ); QUERY
	89	# joined field QUERY should return 2 columns (docid, text)
	90	# payload field QUERY should return 3 columns (docid, keyword, weight)
	91	#
	92	# REQUIRES that query results are in ascending document ID order!
	93	# multi-value, optional, default is empty list of queries
	94	#
	95	# sql_joined_field = tags from query; SELECT docid, CONCAT('tag',tagid) FROM tags ORDER BY docid ASC
	96	# sql_joined_field = wtags from payload-query; SELECT docid, tag, tagweight FROM tags ORDER BY docid ASC
	97
	98
	99	# file based field declaration
	100	#
	101	# content of this field is treated as a file name
	102	# and the file gets loaded and indexed in place of a field
	103	#
	104	# max file size is limited by max_file_field_buffer indexer setting
	105	# file IO errors are non-fatal and get reported as warnings
	106	#
	107	# sql_file_field = content_file_path
	108
	109
	110	# range query setup, query that must return min and max ID values
	111	# optional, default is empty
	112	#
	113	# sql_query will need to reference $start and $end boundaries
	114	# if using ranged query:
	115	#
	116	# sql_query = \
	117	# SELECT doc.id, doc.id AS group, doc.title, doc.data \
	118	# FROM documents doc \
	119	# WHERE id>=$start AND id<=$end
	120	#
	121	# sql_query_range = SELECT MIN(id),MAX(id) FROM documents
	122
	123
	124	# range query step
	125	# optional, default is 1024
	126	#
	127	sql_range_step = 1000
	128
	129
	130	# unsigned integer attribute declaration
	131	# multi-value (an arbitrary number of attributes is allowed), optional
	132	# optional bit size can be specified, default is 32
	133	#
	134	# sql_attr_uint = author_id
	135	# sql_attr_uint = forum_id:9 # 9 bits for forum_id
	136	sql_attr_uint = object_id
	137
	138	# boolean attribute declaration
	139	# multi-value (an arbitrary number of attributes is allowed), optional
	140	# equivalent to sql_attr_uint with 1-bit size
	141	#
	142	sql_attr_bool = is_deleted
	143
	144
	145	# bigint attribute declaration
	146	# multi-value (an arbitrary number of attributes is allowed), optional
	147	# declares a signed (unlike uint!) 64-bit attribute
	148	#
	149	# sql_attr_bigint = my_bigint_id
	150
	151
	152	# UNIX timestamp attribute declaration
	153	# multi-value (an arbitrary number of attributes is allowed), optional
	154	# similar to integer, but can also be used in date functions
	155	#
	156	# sql_attr_timestamp = posted_ts
	157	sql_attr_timestamp = last_edited
	158	# sql_attr_timestamp = date_added
	159
	160
	161	# floating point attribute declaration
	162	# multi-value (an arbitrary number of attributes is allowed), optional
	163	# values are stored in single precision, 32-bit IEEE 754 format
	164	#
	165	# sql_attr_float = lat_radians
	166	# sql_attr_float = long_radians
	167
	168
	169	# multi-valued attribute (MVA) attribute declaration
	170	# multi-value (an arbitrary number of attributes is allowed), optional
	171	# MVA values are variable length lists of unsigned 32-bit integers
	172	#
	173	# syntax is ATTR-TYPE ATTR-NAME 'from' SOURCE-TYPE [;QUERY] [;RANGE-QUERY]
	174	# ATTR-TYPE is 'uint' or 'timestamp'
	175	# SOURCE-TYPE is 'field', 'query', or 'ranged-query'
	176	# QUERY is SQL query used to fetch all ( docid, attrvalue ) pairs
	177	# RANGE-QUERY is SQL query used to fetch min and max ID values, similar to 'sql_query_range'
	178	#
	179	# sql_attr_multi = uint tag from query; SELECT docid, tagid FROM tags
	180	# sql_attr_multi = uint tag from ranged-query; \
	181	# SELECT docid, tagid FROM tags WHERE id>=$start AND id<=$end; \
	182	# SELECT MIN(docid), MAX(docid) FROM tags
	183
	184
	185	# string attribute declaration
	186	# multi-value (an arbitrary number of these is allowed), optional
	187	# lets you store and retrieve strings
	188	#
	189	sql_attr_string = object_class
	190
	191
	192	# JSON attribute declaration
	193	# multi-value (an arbitrary number of these is allowed), optional
	194	# lets you store a JSON document as an (in-memory) attribute for later use
	195	#
	196	# sql_attr_json = properties
	197
	198
	199	# combined field plus attribute declaration (from a single column)
	200	# stores column as an attribute, but also indexes it as a full-text field
	201	#
	202	# sql_field_string = author
	203
	204
	205	# post-query, executed on sql_query completion
	206	# optional, default is empty
	207	#
	208	# sql_query_post =
	209
	210
	211	# post-index-query, executed on successful indexing completion
	212	# optional, default is empty
	213	# $maxid expands to max document ID actually fetched from DB
	214	#
	215	# sql_query_post_index = REPLACE INTO counters ( id, val ) \
	216	# VALUES ( 'max_indexed_id', $maxid )
	217
	218
	219	# ranged query throttling, in milliseconds
	220	# optional, default is 0 which means no delay
	221	# enforces given delay before each query step
	222	sql_ranged_throttle = 0
	223
	224
	225	# kill-list query, fetches the document IDs for kill-list
	226	# k-list will suppress matches from preceding indexes in the same query
	227	# optional, default is empty
	228	#
	229	# sql_query_killlist = SELECT id FROM documents WHERE edited>=@last_reindex
	230
	231
	232	# columns to unpack on indexer side when indexing
	233	# multi-value, optional, default is empty list
	234	#
	235	# unpack_zlib = zlib_column
	236	# unpack_mysqlcompress = compressed_column
	237	# unpack_mysqlcompress = compressed_column_2
	238
	239
	240	# maximum unpacked length allowed in MySQL COMPRESS() unpacker
	241	# optional, default is 16M
	242	#
	243	# unpack_mysqlcompress_maxsize = 16M
	244
	245
	246	# hook command to run when SQL connection succeeds
	247	# optional, default value is empty (do nothing)
	248	#
	249	# hook_connect = bash sql_connect.sh
	250
	251
	252	# hook command to run after (any) SQL range query
	253	# it may print out "minid maxid" (w/o quotes) to override the range
	254	# optional, default value is empty (do nothing)
	255	#
	256	# hook_query_range = bash sql_query_range.sh
	257
	258
	259	# hook command to run on successful indexing completion
	260	# $maxid expands to max document ID actually fetched from DB
	261	# optional, default value is empty (do nothing)
	262	#
	263	# hook_post_index = bash sql_post_index.sh $maxid
	264
	265	#####################################################################
	266	## xmlpipe2 settings
	267	#####################################################################
	268
	269	# type = xmlpipe
	270
	271	# shell command to invoke xmlpipe stream producer
	272	# mandatory
	273	#
	274	# xmlpipe_command = cat /var/db/sphinxsearch/test.xml
	275
	276	# xmlpipe2 field declaration
	277	# multi-value, optional, default is empty
	278	#
	279	# xmlpipe_field = subject
	280	# xmlpipe_field = content
	281
	282
	283	# xmlpipe2 attribute declaration
	284	# multi-value, optional, default is empty
	285	# all xmlpipe_attr_XXX options are fully similar to sql_attr_XXX
	286	# examples:
	287	#
	288	# xmlpipe_attr_timestamp = published
	289	# xmlpipe_attr_uint = author_id
	290	# xmlpipe_attr_bool = is_enabled
	291	# xmlpipe_attr_float = latitude
	292	# xmlpipe_attr_bigint = guid
	293	# xmlpipe_attr_multi = tags
	294	# xmlpipe_attr_multi_64 = tags64
	295	# xmlpipe_attr_string = title
	296	# xmlpipe_attr_json = extra_data
	297	# xmlpipe_field_string = content
	298
	299
	300	# perform UTF-8 validation, and filter out incorrect codes
	301	# avoids XML parser choking on non-UTF-8 documents
	302	# optional, default is 0
	303	#
	304	# xmlpipe_fixup_utf8 = 1
	305	}
	306
	307
	308	# inherited source example
	309	#
	310	# all the parameters are copied from the parent source,
	311	# and may then be overridden in this source definition
	312	source zvukiruthrottled : zvukiru
	313	{
	314	sql_ranged_throttle = 100
	315	}
	316
	317	#############################################################################
	318	## index definition
	319	#############################################################################
	320
	321	# local index example
	322	#
	323	# this is an index which is stored locally in the filesystem
	324	#
	325	# all indexing-time options (such as morphology and charsets)
	326	# are configured per local index
	327	index zvukiru
	328	{
	329	# index type
	330	# optional, default is 'plain'
	331	# known values are 'plain', 'distributed', and 'rt' (see samples below)
	332	# type = plain
	333
	334	# document source(s) to index
	335	# multi-value, mandatory
	336	# document IDs must be globally unique across all sources
	337	source = zvukiru
	338
	339	# index files path and file name, without extension
	340	# mandatory, path must be writable, extensions will be auto-appended
	341	path = /var/db/sphinxsearch/data/zvukiru
	342
	343	# document attribute values (docinfo) storage mode
	344	# optional, default is 'extern'
	345	# known values are 'none', 'extern' and 'inline'
	346	docinfo = extern
	347
	348	# dictionary type, 'crc' or 'keywords'
	349	# crc is faster to index when no substring/wildcards searches are needed
	350	# crc with substrings might be faster to search but is much slower to index
	351	# (because all substrings are pre-extracted as individual keywords)
	352	# keywords is much faster to index with substrings, and index is much (3-10x) smaller
	353	# keywords supports wildcards, crc does not, and never will
	354	# optional, default is 'keywords'
	355	dict = keywords
	356
	357	# memory locking for cached data (.spa and .spi), to prevent swapping
	358	# optional, default is 0 (do not mlock)
	359	# requires searchd to be run from root
	360	mlock = 0
	361
	362	# a list of morphology preprocessors to apply
	363	# optional, default is empty
	364	#
	365	# builtin preprocessors are 'none', 'stem_en', 'stem_ru', 'stem_enru',
	366	# 'soundex', and 'metaphone'; additional preprocessors available from
	367	# libstemmer are 'libstemmer_XXX', where XXX is algorithm code
	368	# (see libstemmer_c/libstemmer/modules.txt)
	369	#
	370	morphology = stem_en, stem_ru, soundex
	371	# morphology = libstemmer_german
	372	# morphology = libstemmer_sv
	373	# morphology = none
	374
	375	# minimum word length at which to enable stemming
	376	# optional, default is 1 (stem everything)
	377	#
	378	min_stemming_len = 2
	379
	380
	381	# stopword files list (space separated)
	382	# optional, default is empty
	383	# contents are plain text, charset_table and stemming are both applied
	384	#
	385	# stopwords = /var/db/sphinxsearch/data/stopwords.txt
	386
	387
	388	# wordforms file, in "mapfrom > mapto" plain text format
	389	# optional, default is empty
	390	#
	391	# wordforms = /var/db/sphinxsearch/data/wordforms.txt
	392
	393
	394	# tokenizing exceptions file
	395	# optional, default is empty
	396	#
	397	# plain text, case sensitive, space insensitive in map-from part
	398	# one "Map Several Words => ToASingleOne" entry per line
	399	#
	400	# exceptions = /var/db/sphinxsearch/data/exceptions.txt
	401
	402
	403	# embedded file size limit
	404	# optional, default is 16K
	405	#
	406	# exceptions, wordforms, and stopwords files smaller than this limit
	407	# are stored in the index; otherwise, their paths and sizes are stored
	408	#
	409	# embedded_limit = 16K
	410
	411	# minimum indexed word length
	412	# default is 1 (index everything)
	413	min_word_len = 2
	414
	415
	416	# ignored characters list
	417	# optional, default value is empty
	418	#
	419	# ignore_chars = U+00AD
	420
	421
	422	# minimum word prefix length to index
	423	# optional, default is 0 (do not index prefixes)
	424	#
	425	# min_prefix_len = 0
	426
	427
	428	# minimum word infix length to index
	429	# optional, default is 0 (do not index infixes)
	430	#
	431	# min_infix_len = 0
	432
	433
	434	# maximum substring (prefix or infix) length to index
	435	# optional, default is 0 (do not limit substring length)
	436	#
	437	# max_substring_len = 8
	438
	439
	440	# list of fields to limit prefix/infix indexing to
	441	# optional, default value is empty (index all fields in prefix/infix mode)
	442	#
	443	# prefix_fields = filename
	444	# infix_fields = url, domain
	445
	446
	447	# expand keywords with exact forms and/or stars when searching fit indexes
	448	# search-time only, does not affect indexing, can be 0 or 1
	449	# optional, default is 0 (do not expand keywords)
	450	#
	451	# expand_keywords = 1
	452
	453
	454	# n-gram length to index, for CJK indexing
	455	# only supports 0 and 1 for now, other lengths to be implemented
	456	# optional, default is 0 (disable n-grams)
	457	#
	458	# ngram_len = 1
	459
	460
	461	# n-gram characters list, for CJK indexing
	462	# optional, default is empty
	463	#
	464	# ngram_chars = U+3000..U+2FA1F
	465
	466
	467	# phrase boundary characters list
	468	# optional, default is empty
	469	#
	470	# phrase_boundary = ., ?, !, U+2026 # horizontal ellipsis
	471
	472
	473	# phrase boundary word position increment
	474	# optional, default is 0
	475	#
	476	# phrase_boundary_step = 100
	477
	478
	479	# blended characters list
	480	# blended chars are indexed both as separators and valid characters
	481	# for instance, AT&T will results in 3 tokens ("at", "t", and "at&t")
	482	# optional, default is empty
	483	#
	484	# blend_chars = +, &, U+23
	485
	486
	487	# blended token indexing mode
	488	# a comma separated list of blended token indexing variants
	489	# known variants are trim_none, trim_head, trim_tail, trim_both, skip_pure
	490	# optional, default is trim_none
	491	#
	492	# blend_mode = trim_tail, skip_pure
	493
	494
	495	# whether to strip HTML tags from incoming documents
	496	# known values are 0 (do not strip) and 1 (do strip)
	497	# optional, default is 0
	498	html_strip = 0
	499
	500	# what HTML attributes to index if stripping HTML
	501	# optional, default is empty (do not index anything)
	502	#
	503	# html_index_attrs = img=alt,title; a=title;
	504
	505
	506	# what HTML elements contents to strip
	507	# optional, default is empty (do not strip element contents)
	508	#
	509	# html_remove_elements = style, script
	510
	511
	512	# whether to preopen index data files on startup
	513	# optional, default is 0 (do not preopen), searchd-only
	514	#
	515	# preopen = 1
	516
	517
	518	# whether to enable in-place inversion (2x less disk, 90-95% speed)
	519	# optional, default is 0 (use separate temporary files), indexer-only
	520	#
	521	# inplace_enable = 1
	522
	523
	524	# in-place fine-tuning options
	525	# optional, defaults are listed below
	526	#
	527	# inplace_hit_gap = 0 # preallocated hitlist gap size
	528	# inplace_docinfo_gap = 0 # preallocated docinfo gap size
	529	# inplace_reloc_factor = 0.1 # relocation buffer size within arena
	530	# inplace_write_factor = 0.1 # write buffer size within arena
	531
	532
	533	# whether to index original keywords along with stemmed versions
	534	# enables "=exactform" operator to work
	535	# optional, default is 0
	536	#
	537	# index_exact_words = 1
	538
	539
	540	# position increment on overshort (less that min_word_len) words
	541	# optional, allowed values are 0 and 1, default is 1
	542	#
	543	# overshort_step = 1
	544
	545
	546	# position increment on stopword
	547	# optional, allowed values are 0 and 1, default is 1
	548	#
	549	# stopword_step = 1
	550
	551
	552	# hitless words list
	553	# positions for these keywords will not be stored in the index
	554	# optional, allowed values are 'all', or a list file name
	555	#
	556	# hitless_words = all
	557	# hitless_words = hitless.txt
	558
	559
	560	# detect and index sentence and paragraph boundaries
	561	# required for the SENTENCE and PARAGRAPH operators to work
	562	# optional, allowed values are 0 and 1, default is 0
	563	#
	564	# index_sp = 1
	565
	566
	567	# index zones, delimited by HTML/XML tags
	568	# a comma separated list of tags and wildcards
	569	# required for the ZONE operator to work
	570	# optional, default is empty string (do not index zones)
	571	#
	572	# index_zones = title, h*, th
	573
	574
	575	# index per-document and average per-index field lengths, in tokens
	576	# required for the BM25A(), BM25F() in expression ranker
	577	# optional, default is 0 (do not index field lenghts)
	578	#
	579	# index_field_lengths = 1
	580
	581
	582	# regular expressions (regexps) to filter the fields and queries with
	583	# gets applied to data source fields when indexing
	584	# gets applied to search queries when searching
	585	# multi-value, optional, default is empty list of regexps
	586	#
	587	# regexp_filter = \b(\d+)\" => \1inch
	588	# regexp_filter = (blue\|red) => color
	589
	590
	591	# list of the words considered frequent with respect to bigram indexing
	592	# optional, default is empty
	593	#
	594	# bigram_freq_words = the, a, i, you, my
	595
	596
	597	# bigram indexing mode
	598	# known values are none, all, first_freq, both_freq
	599	# option, default is none (do not index bigrams)
	600	#
	601	# bigram_index = both_freq
	602
	603
	604	# snippet document file name prefix
	605	# preprended to file names when generating snippets using load_files option
	606	# WARNING, this is a prefix (not a path), trailing slash matters!
	607	# optional, default is empty
	608	#
	609	# snippets_file_prefix = /mnt/mydocs/server1
	610
	611
	612	# whether to apply stopwords before or after stemming
	613	# optional, default is 0 (apply stopwords after stemming)
	614	#
	615	# stopwords_unstemmed = 0
	616
	617
	618	# path to a global (cluster-wide) keyword IDFs file
	619	# optional, default is empty (use local IDFs)
	620	#
	621	# global_idf = /usr/local/sphinx/var/global.idf
	622	}
	623
	624
	625	# inherited index example
	626	#
	627	# all the parameters are copied from the parent index,
	628	# and may then be overridden in this index definition
	629	index zvukirustemmed : zvukiru
	630	{
	631	path = /var/db/sphinxsearch/data/zvukirustemmed
	632	morphology = stem_en
	633	}
	634
	635
	636	# distributed index example
	637	#
	638	# this is a virtual index which can NOT be directly indexed,
	639	# and only contains references to other local and/or remote indexes
	640	index dist1
	641	{
	642	# 'distributed' index type MUST be specified
	643	type = distributed
	644
	645	# local index to be searched
	646	# there can be many local indexes configured
	647	local = test1
	648	local = test1stemmed
	649
	650	# remote agent
	651	# multiple remote agents may be specified
	652	# syntax for TCP connections is 'hostname:port:index1,[index2[,...]]'
	653	# syntax for local UNIX connections is '/path/to/socket:index1,[index2[,...]]'
	654	agent = localhost:9313:remote1
	655	agent = localhost:9314:remote2,remote3
	656	# agent = /var/run/searchd.sock:remote4
	657
	658	# remote agent mirrors groups, aka mirrors, aka HA agents
	659	# defines 2 or more interchangeable mirrors for a given index part
	660	#
	661	# agent = server3:9312 \| server4:9312 :indexchunk2
	662	# agent = server3:9312:chunk2server3 \| server4:9312:chunk2server4
	663	# agent = server3:chunk2server3 \| server4:chunk2server4
	664	# agent = server21\|server22\|server23:chunk2
	665
	666
	667	# blackhole remote agent, for debugging/testing
	668	# network errors and search results will be ignored
	669	#
	670	# agent_blackhole = testbox:9312:testindex1,testindex2
	671
	672
	673	# persistenly connected remote agent
	674	# reduces connect() pressure, requires that workers IS threads
	675	#
	676	# agent_persistent = testbox:9312:testindex1,testindex2
	677
	678
	679	# remote agent connection timeout, milliseconds
	680	# optional, default is 1000 ms, ie. 1 sec
	681	agent_connect_timeout = 1000
	682
	683	# remote agent query timeout, milliseconds
	684	# optional, default is 3000 ms, ie. 3 sec
	685	agent_query_timeout = 3000
	686
	687	# HA mirror agent strategy
	688	# optional, defaults to ??? (random mirror)
	689	# know values are nodeads, noerrors, roundrobin, nodeadstm, noerrorstm
	690	#
	691	# ha_strategy = nodeads
	692
	693	# path to RLP context file
	694	# optional, defaut is empty
	695	#
	696	# rlp_context = /usr/local/share/sphinx/rlp/rlp-context.xml
	697	}
	698
	699
	700	# realtime index example
	701	#
	702	# you can run INSERT, REPLACE, and DELETE on this index on the fly
	703	# using MySQL protocol (see 'listen' directive below)
	704	index rt
	705	{
	706	# 'rt' index type must be specified to use RT index
	707	type = rt
	708
	709	# index files path and file name, without extension
	710	# mandatory, path must be writable, extensions will be auto-appended
	711	path = /var/db/sphinxsearch/data/rt
	712
	713	# RAM chunk size limit
	714	# RT index will keep at most this much data in RAM, then flush to disk
	715	# optional, default is 128M
	716	#
	717	# rt_mem_limit = 512M
	718
	719	# full-text field declaration
	720	# multi-value, mandatory
	721	rt_field = title
	722	rt_field = content
	723
	724	# unsigned integer attribute declaration
	725	# multi-value (an arbitrary number of attributes is allowed), optional
	726	# declares an unsigned 32-bit attribute
	727	rt_attr_uint = gid
	728
	729	# RT indexes currently support the following attribute types:
	730	# uint, bigint, float, timestamp, string, mva, mva64, json
	731	#
	732	# rt_attr_bigint = guid
	733	# rt_attr_float = gpa
	734	# rt_attr_timestamp = ts_added
	735	# rt_attr_string = author
	736	# rt_attr_multi = tags
	737	# rt_attr_multi_64 = tags64
	738	# rt_attr_json = extra_data
	739	}
	740
	741	#############################################################################
	742	## indexer settings
	743	#############################################################################
	744
	745	indexer
	746	{
	747	# memory limit, in bytes, kiloytes (16384K) or megabytes (256M)
	748	# optional, default is 128M, max is 2047M, recommended is 256M to 1024M
	749	mem_limit = 128M
	750
	751	# maximum IO calls per second (for I/O throttling)
	752	# optional, default is 0 (unlimited)
	753	#
	754	# max_iops = 40
	755
	756
	757	# maximum IO call size, bytes (for I/O throttling)
	758	# optional, default is 0 (unlimited)
	759	#
	760	# max_iosize = 1048576
	761
	762
	763	# maximum xmlpipe2 field length, bytes
	764	# optional, default is 2M
	765	#
	766	# max_xmlpipe2_field = 4M
	767
	768
	769	# write buffer size, bytes
	770	# several (currently up to 4) buffers will be allocated
	771	# write buffers are allocated in addition to mem_limit
	772	# optional, default is 1M
	773	#
	774	# write_buffer = 1M
	775
	776
	777	# maximum file field adaptive buffer size
	778	# optional, default is 8M, minimum is 1M
	779	#
	780	# max_file_field_buffer = 32M
	781
	782
	783	# how to handle IO errors in file fields
	784	# known values are 'ignore_field', 'skip_document', and 'fail_index'
	785	# optional, default is 'ignore_field'
	786	#
	787	# on_file_field_error = skip_document
	788
	789
	790	# lemmatizer cache size
	791	# improves the indexing time when the lemmatization is enabled
	792	# optional, default is 256K
	793	#
	794	# lemmatizer_cache = 512M
	795	}
	796
	797	#############################################################################
	798	## searchd settings
	799	#############################################################################
	800
	801	searchd
	802	{
	803	# [hostname:]port[:protocol], or /unix/socket/path to listen on
	804	# known protocols are 'sphinx' (SphinxAPI) and 'mysql41' (SphinxQL)
	805	#
	806	# multi-value, multiple listen points are allowed
	807	# optional, defaults are 9312:sphinx and 9306:mysql41, as below
	808	#
	809	# listen = 127.0.0.1
	810	# listen = 192.168.0.1:9312
	811	# listen = 9312
	812	# listen = /var/run/searchd.sock
	813	listen = 9312
	814	listen = 9306:mysql41
	815
	816	# log file, searchd run info is logged here
	817	# optional, default is 'searchd.log'
	818	log = /var/log/sphinxsearch/searchd.log
	819
	820	# query log file, all search queries are logged here
	821	# optional, default is empty (do not log queries)
	822	query_log = /var/log/sphinxsearch/sphinx-query.log
	823
	824	# client read timeout, seconds
	825	# optional, default is 5
	826	read_timeout = 5
	827
	828	# request timeout, seconds
	829	# optional, default is 5 minutes
	830	client_timeout = 300
	831
	832	# maximum amount of children to fork (concurrent searches to run)
	833	# optional, default is 0 (unlimited)
	834	max_children = 30
	835
	836	# maximum amount of persistent connections from this master to each agent host
	837	# optional, but necessary if you use agent_persistent. It is reasonable to set the value
	838	# as max_children, or less on the agent's hosts.
	839	persistent_connections_limit = 30
	840
	841	# PID file, searchd process ID file name
	842	# mandatory
	843	pid_file = /var/run/sphinxsearch/searchd.pid
	844
	845	# seamless rotate, prevents rotate stalls if precaching huge datasets
	846	# optional, default is 1
	847	seamless_rotate = 1
	848
	849	# whether to forcibly preopen all indexes on startup
	850	# optional, default is 1 (preopen everything)
	851	preopen_indexes = 1
	852
	853	# whether to unlink .old index copies on succesful rotation.
	854	# optional, default is 1 (do unlink)
	855	unlink_old = 1
	856
	857	# attribute updates periodic flush timeout, seconds
	858	# updates will be automatically dumped to disk this frequently
	859	# optional, default is 0 (disable periodic flush)
	860	#
	861	# attr_flush_period = 900
	862
	863
	864	# MVA updates pool size
	865	# shared between all instances of searchd, disables attr flushes!
	866	# optional, default size is 1M
	867	mva_updates_pool = 1M
	868
	869	# max allowed network packet size
	870	# limits both query packets from clients, and responses from agents
	871	# optional, default size is 8M
	872	max_packet_size = 8M
	873
	874	# max allowed per-query filter count
	875	# optional, default is 256
	876	max_filters = 256
	877
	878	# max allowed per-filter values count
	879	# optional, default is 4096
	880	max_filter_values = 4096
	881
	882
	883	# socket listen queue length
	884	# optional, default is 5
	885	#
	886	# listen_backlog = 5
	887
	888
	889	# per-keyword read buffer size
	890	# optional, default is 256K
	891	#
	892	# read_buffer = 256K
	893
	894
	895	# unhinted read size (currently used when reading hits)
	896	# optional, default is 32K
	897	#
	898	# read_unhinted = 32K
	899
	900
	901	# max allowed per-batch query count (aka multi-query count)
	902	# optional, default is 32
	903	max_batch_queries = 32
	904
	905
	906	# max common subtree document cache size, per-query
	907	# optional, default is 0 (disable subtree optimization)
	908	#
	909	# subtree_docs_cache = 4M
	910
	911
	912	# max common subtree hit cache size, per-query
	913	# optional, default is 0 (disable subtree optimization)
	914	#
	915	# subtree_hits_cache = 8M
	916
	917
	918	# multi-processing mode (MPM)
	919	# known values are none, fork, prefork, and threads
	920	# threads is required for RT backend to work
	921	# optional, default is threads
	922	workers = threads # for RT to work
	923
	924
	925	# max threads to create for searching local parts of a distributed index
	926	# optional, default is 0, which means disable multi-threaded searching
	927	# should work with all MPMs (ie. does NOT require workers=threads)
	928	#
	929	# dist_threads = 4
	930
	931
	932	# binlog files path; use empty string to disable binlog
	933	# optional, default is build-time configured data directory
	934	#
	935	# binlog_path = # disable logging
	936	# binlog_path = /var/db/sphinxsearch/data # binlog.001 etc will be created there
	937
	938
	939	# binlog flush/sync mode
	940	# 0 means flush and sync every second
	941	# 1 means flush and sync every transaction
	942	# 2 means flush every transaction, sync every second
	943	# optional, default is 2
	944	#
	945	# binlog_flush = 2
	946
	947
	948	# binlog per-file size limit
	949	# optional, default is 128M, 0 means no limit
	950	#
	951	# binlog_max_log_size = 256M
	952
	953
	954	# per-thread stack size, only affects workers=threads mode
	955	# optional, default is 64K
	956	#
	957	# thread_stack = 128K
	958
	959
	960	# per-keyword expansion limit (for dict=keywords prefix searches)
	961	# optional, default is 0 (no limit)
	962	#
	963	# expansion_limit = 1000
	964
	965
	966	# RT RAM chunks flush period
	967	# optional, default is 0 (no periodic flush)
	968	#
	969	# rt_flush_period = 900
	970
	971
	972	# query log file format
	973	# optional, known values are plain and sphinxql, default is plain
	974	#
	975	# query_log_format = sphinxql
	976
	977
	978	# version string returned to MySQL network protocol clients
	979	# optional, default is empty (use Sphinx version)
	980	#
	981	# mysql_version_string = 5.0.37
	982
	983
	984	# default server-wide collation
	985	# optional, default is libc_ci
	986	#
	987	# collation_server = utf8_general_ci
	988
	989
	990	# server-wide locale for libc based collations
	991	# optional, default is C
	992	#
	993	# collation_libc_locale = ru_RU.UTF-8
	994
	995
	996	# threaded server watchdog (only used in workers=threads mode)
	997	# optional, values are 0 and 1, default is 1 (watchdog on)
	998	#
	999	# watchdog = 1
	1000
	1001
	1002	# costs for max_predicted_time model, in (imaginary) nanoseconds
	1003	# optional, default is "doc=64, hit=48, skip=2048, match=64"
	1004	#
	1005	# predicted_time_costs = doc=64, hit=48, skip=2048, match=64
	1006
	1007
	1008	# current SphinxQL state (uservars etc) serialization path
	1009	# optional, default is none (do not serialize SphinxQL state)
	1010	#
	1011	# sphinxql_state = sphinxvars.sql
	1012
	1013
	1014	# maximum RT merge thread IO calls per second, and per-call IO size
	1015	# useful for throttling (the background) OPTIMIZE INDEX impact
	1016	# optional, default is 0 (unlimited)
	1017	#
	1018	# rt_merge_iops = 40
	1019	# rt_merge_maxiosize = 1M
	1020
	1021
	1022	# interval between agent mirror pings, in milliseconds
	1023	# 0 means disable pings
	1024	# optional, default is 1000
	1025	#
	1026	# ha_ping_interval = 0
	1027
	1028
	1029	# agent mirror statistics window size, in seconds
	1030	# stats older than the window size (karma) are retired
	1031	# that is, they will not affect master choice of agents in any way
	1032	# optional, default is 60 seconds
	1033	#
	1034	# ha_period_karma = 60
	1035
	1036
	1037	# delay between preforked children restarts on rotation, in milliseconds
	1038	# optional, default is 0 (no delay)
	1039	#
	1040	# prefork_rotation_throttle = 100
	1041
	1042
	1043	# a prefix to prepend to the local file names when creating snippets
	1044	# with load_files and/or load_files_scatter options
	1045	# optional, default is empty
	1046	#
	1047	# snippets_file_prefix = /mnt/common/server1/
	1048	}
	1049
	1050	#############################################################################
	1051	## common settings
	1052	#############################################################################
	1053
	1054	common
	1055	{
	1056
	1057	# lemmatizer dictionaries base path
	1058	# optional, defaut is /usr/local/share (see ./configure --datadir)
	1059	#
	1060	# lemmatizer_base = /usr/local/share/sphinx/dicts
	1061
	1062
	1063	# how to handle syntax errors in JSON attributes
	1064	# known values are 'ignore_attr' and 'fail_index'
	1065	# optional, default is 'ignore_attr'
	1066	#
	1067	# on_json_attr_error = fail_index
	1068
	1069
	1070	# whether to auto-convert numeric values from strings in JSON attributes
	1071	# with auto-conversion, string value with actually numeric data
	1072	# (as in {"key":"12345"}) gets stored as a number, rather than string
	1073	# optional, allowed values are 0 and 1, default is 0 (do not convert)
	1074	#
	1075	# json_autoconv_numbers = 1
	1076
	1077
	1078	# whether and how to auto-convert key names in JSON attributes
	1079	# known value is 'lowercase'
	1080	# optional, default is unspecified (do nothing)
	1081	#
	1082	# json_autoconv_keynames = lowercase
	1083
	1084
	1085	# path to RLP root directory
	1086	# optional, defaut is /usr/local/share (see ./configure --datadir)
	1087	#
	1088	# rlp_root = /usr/local/share/sphinx/rlp
	1089
	1090
	1091	# path to RLP environment file
	1092	# optional, defaut is /usr/local/share/rlp-environment.xml (see ./configure --datadir)
	1093	#
	1094	# rlp_environment = /usr/local/share/sphinx/rlp/rlp/etc/rlp-environment.xml
	1095
	1096
	1097	# maximum total size of documents batched before processing them by the RLP
	1098	# optional, default is 51200
	1099	#
	1100	# rlp_max_batch_size = 100k
	1101
	1102
	1103	# maximum number of documents batched before processing them by the RLP
	1104	# optional, default is 50
	1105	#
	1106	# rlp_max_batch_docs = 100
	1107
	1108
	1109	# trusted plugin directory
	1110	# optional, default is empty (disable UDFs)
	1111	#
	1112	# plugin_dir = /usr/local/sphinx/lib
	1113
	1114	}
	1115
	1116	# --eof--

utf8/plugins/sphinx/lib/sphinx/Keeper.pm


4	4	use warnings 'all';
5	5	use base qw(Contenido::Keeper);
6	6	use Contenido::Globals;
	7	use Data::Dumper;
7	8
8	9	######################
9	10	# Отправить объект в поиск:
…	…
26	27	my $doc = shift;
27	28	return undef unless ref $doc && $doc->id;
28	29
29		my ($object) = $self->get_documents(
	30	my ($object) = $keeper->get_documents(
30	31	class => 'sphinx::Search',
31	32	object_id => $doc->id,
32	33	object_class => $doc->class,
…	…
35	36	my $data = $doc->get_search_data;
36	37	return undef unless $data;
37	38	unless ( ref $object ) {
38		$object = sphinx::Search->new( $self );
	39	$object = sphinx::Search->new( $keeper );
39	40	$object->status( 1 );
40	41	$object->is_deleted( 0 );
41	42	$object->object_id( $doc->id );
…	…
62	63	}
63	64
64	65
	66	# Методы поиска
	67	####################################################################
	68	sub search {
	69	my $self = shift;
	70	my $text = shift;
	71	return unless $text;
	72	my (%opts) = @_;
	73
	74	my $result;
	75	my $db_table = delete $opts{db_table} \|\| $self->state->table_name;
	76	my @wheres = ("MATCH(?)");
	77	my @values = ($text);
	78	my $count = delete $opts{count};
	79	my $limit = delete $opts{limit};
	80	return if $limit && ($limit =~ /\D/ \|\| $limit < 0);
	81	my $no_limit = delete $opts{no_limit};
	82	unless ( $no_limit ) {
	83	$limit \|\|= 1000;
	84	}
	85	my $offset = delete $opts{offset};
	86	return if $offset && ($offset =~ /\D/ \|\| $offset < 0);
	87	my $return_value = delete $opts{return_value} \|\| 'array_ref';
	88	my $hash_by = delete $opts{hash_by} \|\| 'object_id';
	89
	90	while ( my ($key, $val) = each %opts ) {
	91	if ( ref $val eq 'ARRAY' ) {
	92	push @wheres, "$key in (".join(',', map { '?' } @$val).")";
	93	push @values, @$val;
	94	} else {
	95	push @wheres, "$key = ?";
	96	push @values, $val;
	97	}
	98	}
	99	my $query = "select ".($count ? 'count() as cnt' : ', weight() as weight')." from $db_table where ".join( ' and ', @wheres );
	100	if ( $limit ) {
	101	$query .= " limit $limit ";
	102	}
	103	if ( $offset ) {
	104	$query .= " offset $offset ";
	105	}
	106	warn "SEARCH QUERY: $query\n" if $DEBUG;
	107	warn "SEARCH VALUES: ".Dumper( \@values ) if $DEBUG;
	108	my $sth = $self->SQL->prepare_cached( $query );
	109	$sth->execute( @values );
	110	if ( $count ) {
	111	$result = $sth->fetchrow_arrayref;
	112	$result = $result->[0];
	113	} else {
	114	$result = [];
	115	while ( my $row = $sth->fetchrow_hashref ) {
	116	push @$result, $row;
	117	}
	118	}
	119	return $result;
	120	}
	121
	122	sub stemmed {
	123	my $self = shift;
	124	my $db_table = $self->state->table_name_stemmed;
	125	return $self->search( @_, db_table => $db_table );
	126	}
	127
	128	# МЕТОДЫ ДОСТУПА К СОЕДИНЕНИЯМ С БАЗОЙ УМНЫЕ
	129	####################################################################
	130	# получение соединения с базой или установка нового если его не было
	131	sub SQL {
	132	my $self = shift;
	133	return ($self->connect_check() ? $self->{SQL} : undef);
	134	}
	135
	136	# -------------------------------------------------------------------------------------------------
	137	# Открываем соединение с базой данных
	138	# -------------------------------------------------------------------------------------------------
	139	sub connect {
	140	my $self = shift;
	141	#соединение уже есть
	142	if ($self->is_connected) {
	143	} else {
	144	unless ($self->{SQL} = $self->db_connect) {
	145	warn "Не могу соединиться с базой данных";
	146	die;
	147	}
	148	$self->{SQL}->do("SET NAMES '".$self->state->db_client_encoding."'") if ($self->state->db_client_encoding);
	149	}
	150
	151	$self->{_connect_ok} = 1;
	152	return 1;
	153	}
	154
	155	#проверка соединения с базой кеширующая состояние соединения
	156	sub connect_check {
	157	my $self = shift;
	158	return 1 if ($self->{_connect_ok});
	159	if ($self->is_connected) {
	160	$self->{_connect_ok} = 1;
	161	return 1;
	162	} else {
	163	if ($self->connect) {
	164	return 1;
	165	} else {
	166	#сюда по логике попадать не должно так как die вылететь должен
	167	warn "Connect failed\n";
	168	return 0;
	169	}
	170	}
	171	}
	172
	173	sub db_connect {
	174	my $self = shift;
	175	my $dbh = DBI->connect('DBI:mysql:host='.$self->{db_host}.';port='.$self->{db_port}.';mysql_enable_utf8=1')
	176	\|\| die "Contenido Error: Не могу соединиться с Sphinx базой данных\n";
	177
	178	# $dbh->{'AutoCommit'} = 1;
	179	# $dbh->{mysql_auto_reconnect} = 1;
	180
	181	return $dbh;
	182	}
	183
	184	sub is_connected {
	185	my $self = shift;
	186	if ( ref $self->{SQL} and $self->{SQL}->can('ping') and $self->{SQL}->ping() ) {
	187	$self->{_connect_ok} = 1;
	188	return 1;
	189	} else {
	190	$self->{_connect_ok} = 0;
	191	return 0;
	192	}
	193
	194	# warn 'Check if MySQL DB connected: '.(ref $self && exists $self->{SQL} && ref $self->{SQL} ? 1 : 0 ) if $DEBUG;
	195	# return ( ref($self) && exists $self->{SQL} && ref $self->{SQL} );
	196	}
65	197	1;

utf8/plugins/sphinx/lib/sphinx/State.pm.proto


12	12	bless $self, $class;
13	13
14	14	# configured
15		$self->{debug} = (lc('') eq 'yes');
16		$self->{project} = '';
17		$self->{contenido_notab} = 0;
	15	$self->{debug} = (lc('@DEBUG@') eq 'yes');
	16	$self->{project} = '@PROJECT@';
	17	$self->{contenido_notab} = 1;
18	18	$self->{tab_name} = 'sphinx';
19	19
20	20	# зашитая конфигурация плагина
21		$self->{db_type} = 'none'; ### For REAL database use 'remote'
22		$self->{db_keepalive} = 0;
23		$self->{db_host} = '';
	21	$self->{db_type} = 'remote'; ### For REAL database use 'remote'
	22	$self->{db_keepalive} = 0;
	23	$self->{db_host} = '@SPHINX_HOST@';
24	24	$self->{db_name} = '';
25	25	$self->{db_user} = '';
26		$self->{db_password} = '';
27		$self->{db_port} = '';
	26	$self->{db_password} = '';
	27	$self->{db_port} = '@SPHINX_PORT@';
28	28	$self->{store_method} = 'toast';
29	29	$self->{cascade} = 1;
30	30	$self->{db_prepare} = 0;
31	31
32		$self->{memcached_enable} = lc( '' ) eq 'yes' ? 1 : 0;
	32	$self->{memcached_enable} = lc( '@MEMCACHED_ENABLE@' ) eq 'yes' ? 1 : 0;
33	33	$self->{memcached_enable_compress} = 1;
34		$self->{memcached_backend} = '';
35		$self->{memcached_servers} = [qw()];
	34	$self->{memcached_backend} = '@MEMCACHED_BACKEND@';
	35	$self->{memcached_servers} = [qw(@MEMCACHED_SERVERS@)];
36	36	$self->{memcached_busy_lock} = 60;
37		$self->{memcached_delayed} = lc('') eq 'yes' ? 1 : 0;
	37	$self->{memcached_delayed} = lc('@MEMCACHED_DELAYED@') eq 'yes' ? 1 : 0;
38	38
39	39	$self->{serialize_with} = 'json'; ### or 'dumper'
40	40
…	…
44	44	$self->{images_directory} = '/nonexistent';
45	45	$self->{preview} = '0';
46	46
	47	$self->{table_name} = '@SPHINX_TABLE@';
	48	$self->{table_name_stemmed} = '@SPHINX_TABLE_STEMMED@';
	49
47	50	$self->_init_();
48	51	$self;
49	52	}
…	…
90	93	data_directory
91	94	images_directory
92	95	preview
	96
	97	table_name
	98	table_name_stemmed
93	99	);
94	100	}
95	101

Repository List / Contenido @ r603

Revision Navigation

Revision 603

Legend:

utf8/plugins/sphinx/config.proto

utf8/plugins/sphinx/etc/sphinx.conf

utf8/plugins/sphinx/lib/sphinx/Keeper.pm

utf8/plugins/sphinx/lib/sphinx/State.pm.proto