Revision 603
- Date:
- 2016/10/24 15:31:01
- Files:
Legend:
- Added
- Removed
- Modified
-
utf8/plugins/sphinx/config.proto
7 7 8 8 PLUGINS += sphinx 9 9 10 SPHINX_HOST = localhost 10 SPHINX_HOST = 127.0.0.1 11 11 SPHINX_PORT = 9306 12 SPHINX_DATA = /path/to/sphinx/database/ 13 SPHINX_TABLE = indextable 14 SPHINX_TABLE_STEMMED = indextablestemmed 12 15 13 REWRITE += SPHINX_HOST SPHINX_PORT 16 REWRITE += SPHINX_HOST SPHINX_PORT SPHINX_DATA SPHINX_TABLE SPHINX_TABLE_STEMMED -
utf8/plugins/sphinx/etc/sphinx.conf
1 # 2 # Sphinx configuration file sample 3 # 4 # WARNING! While this sample file mentions all available options, 5 # it contains (very) short helper descriptions only. Please refer to 6 # doc/sphinx.html for details. 7 # 8 9 ############################################################################# 10 ## data source definition 11 ############################################################################# 12 13 source zvukiru 14 { 15 # data source type. mandatory, no default value 16 # known types are mysql, pgsql, mssql, xmlpipe, xmlpipe2, odbc 17 type = pgsql 18 19 ##################################################################### 20 ## SQL settings (for 'mysql' and 'pgsql' types) 21 ##################################################################### 22 23 # some straightforward parameters for SQL source types 24 sql_host = localhost 25 sql_user = zvuki 26 sql_pass = sarUchOov 27 sql_db = zvukirutf 28 sql_port = 5432 # optional, default is 3306 29 30 # UNIX socket name 31 # optional, default is empty (reuse client library defaults) 32 # usually '/var/lib/mysql/mysql.sock' on Linux 33 # usually '/tmp/mysql.sock' on FreeBSD 34 # 35 # sql_sock = /tmp/mysql.sock 36 37 38 # MySQL specific client connection flags 39 # optional, default is 0 40 # 41 # mysql_connect_flags = 32 # enable compression 42 43 # MySQL specific SSL certificate settings 44 # optional, defaults are empty 45 # 46 # mysql_ssl_cert = /etc/ssl/client-cert.pem 47 # mysql_ssl_key = /etc/ssl/client-key.pem 48 # mysql_ssl_ca = /etc/ssl/cacert.pem 49 50 # MS SQL specific Windows authentication mode flag 51 # MUST be in sync with charset_type index-level setting 52 # optional, default is 0 53 # 54 # mssql_winauth = 1 # use currently logged on user credentials 55 56 57 # ODBC specific DSN (data source name) 58 # mandatory for odbc source type, no default value 59 # 60 # odbc_dsn = DBQ=C:\data;DefaultDir=C:\data;Driver={Microsoft Text Driver (*.txt; *.csv)}; 61 # sql_query = SELECT id, data FROM documents.csv 62 63 64 # ODBC and MS SQL specific, per-column buffer sizes 65 # optional, default is auto-detect 66 # 67 # sql_column_buffers = content=12M, comments=1M 68 69 70 # pre-query, executed before the main fetch query 71 # multi-value, optional, default is empty list of queries 72 # 73 # sql_query_pre = SET NAMES utf8 74 # sql_query_pre = SET SESSION query_cache_type=OFF 75 76 77 # main document fetch query 78 # mandatory, integer document ID field MUST be the first selected column 79 sql_query = \ 80 SELECT id, object_id, object_class, extract(epoch from date_trunc('seconds', mtime)) AS last_edited, is_deleted, name as title, search as content \ 81 FROM search 82 83 84 # joined/payload field fetch query 85 # joined fields let you avoid (slow) JOIN and GROUP_CONCAT 86 # payload fields let you attach custom per-keyword values (eg. for ranking) 87 # 88 # syntax is FIELD-NAME 'from' ( 'query' | 'payload-query' ); QUERY 89 # joined field QUERY should return 2 columns (docid, text) 90 # payload field QUERY should return 3 columns (docid, keyword, weight) 91 # 92 # REQUIRES that query results are in ascending document ID order! 93 # multi-value, optional, default is empty list of queries 94 # 95 # sql_joined_field = tags from query; SELECT docid, CONCAT('tag',tagid) FROM tags ORDER BY docid ASC 96 # sql_joined_field = wtags from payload-query; SELECT docid, tag, tagweight FROM tags ORDER BY docid ASC 97 98 99 # file based field declaration 100 # 101 # content of this field is treated as a file name 102 # and the file gets loaded and indexed in place of a field 103 # 104 # max file size is limited by max_file_field_buffer indexer setting 105 # file IO errors are non-fatal and get reported as warnings 106 # 107 # sql_file_field = content_file_path 108 109 110 # range query setup, query that must return min and max ID values 111 # optional, default is empty 112 # 113 # sql_query will need to reference $start and $end boundaries 114 # if using ranged query: 115 # 116 # sql_query = \ 117 # SELECT doc.id, doc.id AS group, doc.title, doc.data \ 118 # FROM documents doc \ 119 # WHERE id>=$start AND id<=$end 120 # 121 # sql_query_range = SELECT MIN(id),MAX(id) FROM documents 122 123 124 # range query step 125 # optional, default is 1024 126 # 127 sql_range_step = 1000 128 129 130 # unsigned integer attribute declaration 131 # multi-value (an arbitrary number of attributes is allowed), optional 132 # optional bit size can be specified, default is 32 133 # 134 # sql_attr_uint = author_id 135 # sql_attr_uint = forum_id:9 # 9 bits for forum_id 136 sql_attr_uint = object_id 137 138 # boolean attribute declaration 139 # multi-value (an arbitrary number of attributes is allowed), optional 140 # equivalent to sql_attr_uint with 1-bit size 141 # 142 sql_attr_bool = is_deleted 143 144 145 # bigint attribute declaration 146 # multi-value (an arbitrary number of attributes is allowed), optional 147 # declares a signed (unlike uint!) 64-bit attribute 148 # 149 # sql_attr_bigint = my_bigint_id 150 151 152 # UNIX timestamp attribute declaration 153 # multi-value (an arbitrary number of attributes is allowed), optional 154 # similar to integer, but can also be used in date functions 155 # 156 # sql_attr_timestamp = posted_ts 157 sql_attr_timestamp = last_edited 158 # sql_attr_timestamp = date_added 159 160 161 # floating point attribute declaration 162 # multi-value (an arbitrary number of attributes is allowed), optional 163 # values are stored in single precision, 32-bit IEEE 754 format 164 # 165 # sql_attr_float = lat_radians 166 # sql_attr_float = long_radians 167 168 169 # multi-valued attribute (MVA) attribute declaration 170 # multi-value (an arbitrary number of attributes is allowed), optional 171 # MVA values are variable length lists of unsigned 32-bit integers 172 # 173 # syntax is ATTR-TYPE ATTR-NAME 'from' SOURCE-TYPE [;QUERY] [;RANGE-QUERY] 174 # ATTR-TYPE is 'uint' or 'timestamp' 175 # SOURCE-TYPE is 'field', 'query', or 'ranged-query' 176 # QUERY is SQL query used to fetch all ( docid, attrvalue ) pairs 177 # RANGE-QUERY is SQL query used to fetch min and max ID values, similar to 'sql_query_range' 178 # 179 # sql_attr_multi = uint tag from query; SELECT docid, tagid FROM tags 180 # sql_attr_multi = uint tag from ranged-query; \ 181 # SELECT docid, tagid FROM tags WHERE id>=$start AND id<=$end; \ 182 # SELECT MIN(docid), MAX(docid) FROM tags 183 184 185 # string attribute declaration 186 # multi-value (an arbitrary number of these is allowed), optional 187 # lets you store and retrieve strings 188 # 189 sql_attr_string = object_class 190 191 192 # JSON attribute declaration 193 # multi-value (an arbitrary number of these is allowed), optional 194 # lets you store a JSON document as an (in-memory) attribute for later use 195 # 196 # sql_attr_json = properties 197 198 199 # combined field plus attribute declaration (from a single column) 200 # stores column as an attribute, but also indexes it as a full-text field 201 # 202 # sql_field_string = author 203 204 205 # post-query, executed on sql_query completion 206 # optional, default is empty 207 # 208 # sql_query_post = 209 210 211 # post-index-query, executed on successful indexing completion 212 # optional, default is empty 213 # $maxid expands to max document ID actually fetched from DB 214 # 215 # sql_query_post_index = REPLACE INTO counters ( id, val ) \ 216 # VALUES ( 'max_indexed_id', $maxid ) 217 218 219 # ranged query throttling, in milliseconds 220 # optional, default is 0 which means no delay 221 # enforces given delay before each query step 222 sql_ranged_throttle = 0 223 224 225 # kill-list query, fetches the document IDs for kill-list 226 # k-list will suppress matches from preceding indexes in the same query 227 # optional, default is empty 228 # 229 # sql_query_killlist = SELECT id FROM documents WHERE edited>=@last_reindex 230 231 232 # columns to unpack on indexer side when indexing 233 # multi-value, optional, default is empty list 234 # 235 # unpack_zlib = zlib_column 236 # unpack_mysqlcompress = compressed_column 237 # unpack_mysqlcompress = compressed_column_2 238 239 240 # maximum unpacked length allowed in MySQL COMPRESS() unpacker 241 # optional, default is 16M 242 # 243 # unpack_mysqlcompress_maxsize = 16M 244 245 246 # hook command to run when SQL connection succeeds 247 # optional, default value is empty (do nothing) 248 # 249 # hook_connect = bash sql_connect.sh 250 251 252 # hook command to run after (any) SQL range query 253 # it may print out "minid maxid" (w/o quotes) to override the range 254 # optional, default value is empty (do nothing) 255 # 256 # hook_query_range = bash sql_query_range.sh 257 258 259 # hook command to run on successful indexing completion 260 # $maxid expands to max document ID actually fetched from DB 261 # optional, default value is empty (do nothing) 262 # 263 # hook_post_index = bash sql_post_index.sh $maxid 264 265 ##################################################################### 266 ## xmlpipe2 settings 267 ##################################################################### 268 269 # type = xmlpipe 270 271 # shell command to invoke xmlpipe stream producer 272 # mandatory 273 # 274 # xmlpipe_command = cat /var/db/sphinxsearch/test.xml 275 276 # xmlpipe2 field declaration 277 # multi-value, optional, default is empty 278 # 279 # xmlpipe_field = subject 280 # xmlpipe_field = content 281 282 283 # xmlpipe2 attribute declaration 284 # multi-value, optional, default is empty 285 # all xmlpipe_attr_XXX options are fully similar to sql_attr_XXX 286 # examples: 287 # 288 # xmlpipe_attr_timestamp = published 289 # xmlpipe_attr_uint = author_id 290 # xmlpipe_attr_bool = is_enabled 291 # xmlpipe_attr_float = latitude 292 # xmlpipe_attr_bigint = guid 293 # xmlpipe_attr_multi = tags 294 # xmlpipe_attr_multi_64 = tags64 295 # xmlpipe_attr_string = title 296 # xmlpipe_attr_json = extra_data 297 # xmlpipe_field_string = content 298 299 300 # perform UTF-8 validation, and filter out incorrect codes 301 # avoids XML parser choking on non-UTF-8 documents 302 # optional, default is 0 303 # 304 # xmlpipe_fixup_utf8 = 1 305 } 306 307 308 # inherited source example 309 # 310 # all the parameters are copied from the parent source, 311 # and may then be overridden in this source definition 312 source zvukiruthrottled : zvukiru 313 { 314 sql_ranged_throttle = 100 315 } 316 317 ############################################################################# 318 ## index definition 319 ############################################################################# 320 321 # local index example 322 # 323 # this is an index which is stored locally in the filesystem 324 # 325 # all indexing-time options (such as morphology and charsets) 326 # are configured per local index 327 index zvukiru 328 { 329 # index type 330 # optional, default is 'plain' 331 # known values are 'plain', 'distributed', and 'rt' (see samples below) 332 # type = plain 333 334 # document source(s) to index 335 # multi-value, mandatory 336 # document IDs must be globally unique across all sources 337 source = zvukiru 338 339 # index files path and file name, without extension 340 # mandatory, path must be writable, extensions will be auto-appended 341 path = /var/db/sphinxsearch/data/zvukiru 342 343 # document attribute values (docinfo) storage mode 344 # optional, default is 'extern' 345 # known values are 'none', 'extern' and 'inline' 346 docinfo = extern 347 348 # dictionary type, 'crc' or 'keywords' 349 # crc is faster to index when no substring/wildcards searches are needed 350 # crc with substrings might be faster to search but is much slower to index 351 # (because all substrings are pre-extracted as individual keywords) 352 # keywords is much faster to index with substrings, and index is much (3-10x) smaller 353 # keywords supports wildcards, crc does not, and never will 354 # optional, default is 'keywords' 355 dict = keywords 356 357 # memory locking for cached data (.spa and .spi), to prevent swapping 358 # optional, default is 0 (do not mlock) 359 # requires searchd to be run from root 360 mlock = 0 361 362 # a list of morphology preprocessors to apply 363 # optional, default is empty 364 # 365 # builtin preprocessors are 'none', 'stem_en', 'stem_ru', 'stem_enru', 366 # 'soundex', and 'metaphone'; additional preprocessors available from 367 # libstemmer are 'libstemmer_XXX', where XXX is algorithm code 368 # (see libstemmer_c/libstemmer/modules.txt) 369 # 370 morphology = stem_en, stem_ru, soundex 371 # morphology = libstemmer_german 372 # morphology = libstemmer_sv 373 # morphology = none 374 375 # minimum word length at which to enable stemming 376 # optional, default is 1 (stem everything) 377 # 378 min_stemming_len = 2 379 380 381 # stopword files list (space separated) 382 # optional, default is empty 383 # contents are plain text, charset_table and stemming are both applied 384 # 385 # stopwords = /var/db/sphinxsearch/data/stopwords.txt 386 387 388 # wordforms file, in "mapfrom > mapto" plain text format 389 # optional, default is empty 390 # 391 # wordforms = /var/db/sphinxsearch/data/wordforms.txt 392 393 394 # tokenizing exceptions file 395 # optional, default is empty 396 # 397 # plain text, case sensitive, space insensitive in map-from part 398 # one "Map Several Words => ToASingleOne" entry per line 399 # 400 # exceptions = /var/db/sphinxsearch/data/exceptions.txt 401 402 403 # embedded file size limit 404 # optional, default is 16K 405 # 406 # exceptions, wordforms, and stopwords files smaller than this limit 407 # are stored in the index; otherwise, their paths and sizes are stored 408 # 409 # embedded_limit = 16K 410 411 # minimum indexed word length 412 # default is 1 (index everything) 413 min_word_len = 2 414 415 416 # ignored characters list 417 # optional, default value is empty 418 # 419 # ignore_chars = U+00AD 420 421 422 # minimum word prefix length to index 423 # optional, default is 0 (do not index prefixes) 424 # 425 # min_prefix_len = 0 426 427 428 # minimum word infix length to index 429 # optional, default is 0 (do not index infixes) 430 # 431 # min_infix_len = 0 432 433 434 # maximum substring (prefix or infix) length to index 435 # optional, default is 0 (do not limit substring length) 436 # 437 # max_substring_len = 8 438 439 440 # list of fields to limit prefix/infix indexing to 441 # optional, default value is empty (index all fields in prefix/infix mode) 442 # 443 # prefix_fields = filename 444 # infix_fields = url, domain 445 446 447 # expand keywords with exact forms and/or stars when searching fit indexes 448 # search-time only, does not affect indexing, can be 0 or 1 449 # optional, default is 0 (do not expand keywords) 450 # 451 # expand_keywords = 1 452 453 454 # n-gram length to index, for CJK indexing 455 # only supports 0 and 1 for now, other lengths to be implemented 456 # optional, default is 0 (disable n-grams) 457 # 458 # ngram_len = 1 459 460 461 # n-gram characters list, for CJK indexing 462 # optional, default is empty 463 # 464 # ngram_chars = U+3000..U+2FA1F 465 466 467 # phrase boundary characters list 468 # optional, default is empty 469 # 470 # phrase_boundary = ., ?, !, U+2026 # horizontal ellipsis 471 472 473 # phrase boundary word position increment 474 # optional, default is 0 475 # 476 # phrase_boundary_step = 100 477 478 479 # blended characters list 480 # blended chars are indexed both as separators and valid characters 481 # for instance, AT&T will results in 3 tokens ("at", "t", and "at&t") 482 # optional, default is empty 483 # 484 # blend_chars = +, &, U+23 485 486 487 # blended token indexing mode 488 # a comma separated list of blended token indexing variants 489 # known variants are trim_none, trim_head, trim_tail, trim_both, skip_pure 490 # optional, default is trim_none 491 # 492 # blend_mode = trim_tail, skip_pure 493 494 495 # whether to strip HTML tags from incoming documents 496 # known values are 0 (do not strip) and 1 (do strip) 497 # optional, default is 0 498 html_strip = 0 499 500 # what HTML attributes to index if stripping HTML 501 # optional, default is empty (do not index anything) 502 # 503 # html_index_attrs = img=alt,title; a=title; 504 505 506 # what HTML elements contents to strip 507 # optional, default is empty (do not strip element contents) 508 # 509 # html_remove_elements = style, script 510 511 512 # whether to preopen index data files on startup 513 # optional, default is 0 (do not preopen), searchd-only 514 # 515 # preopen = 1 516 517 518 # whether to enable in-place inversion (2x less disk, 90-95% speed) 519 # optional, default is 0 (use separate temporary files), indexer-only 520 # 521 # inplace_enable = 1 522 523 524 # in-place fine-tuning options 525 # optional, defaults are listed below 526 # 527 # inplace_hit_gap = 0 # preallocated hitlist gap size 528 # inplace_docinfo_gap = 0 # preallocated docinfo gap size 529 # inplace_reloc_factor = 0.1 # relocation buffer size within arena 530 # inplace_write_factor = 0.1 # write buffer size within arena 531 532 533 # whether to index original keywords along with stemmed versions 534 # enables "=exactform" operator to work 535 # optional, default is 0 536 # 537 # index_exact_words = 1 538 539 540 # position increment on overshort (less that min_word_len) words 541 # optional, allowed values are 0 and 1, default is 1 542 # 543 # overshort_step = 1 544 545 546 # position increment on stopword 547 # optional, allowed values are 0 and 1, default is 1 548 # 549 # stopword_step = 1 550 551 552 # hitless words list 553 # positions for these keywords will not be stored in the index 554 # optional, allowed values are 'all', or a list file name 555 # 556 # hitless_words = all 557 # hitless_words = hitless.txt 558 559 560 # detect and index sentence and paragraph boundaries 561 # required for the SENTENCE and PARAGRAPH operators to work 562 # optional, allowed values are 0 and 1, default is 0 563 # 564 # index_sp = 1 565 566 567 # index zones, delimited by HTML/XML tags 568 # a comma separated list of tags and wildcards 569 # required for the ZONE operator to work 570 # optional, default is empty string (do not index zones) 571 # 572 # index_zones = title, h*, th 573 574 575 # index per-document and average per-index field lengths, in tokens 576 # required for the BM25A(), BM25F() in expression ranker 577 # optional, default is 0 (do not index field lenghts) 578 # 579 # index_field_lengths = 1 580 581 582 # regular expressions (regexps) to filter the fields and queries with 583 # gets applied to data source fields when indexing 584 # gets applied to search queries when searching 585 # multi-value, optional, default is empty list of regexps 586 # 587 # regexp_filter = \b(\d+)\" => \1inch 588 # regexp_filter = (blue|red) => color 589 590 591 # list of the words considered frequent with respect to bigram indexing 592 # optional, default is empty 593 # 594 # bigram_freq_words = the, a, i, you, my 595 596 597 # bigram indexing mode 598 # known values are none, all, first_freq, both_freq 599 # option, default is none (do not index bigrams) 600 # 601 # bigram_index = both_freq 602 603 604 # snippet document file name prefix 605 # preprended to file names when generating snippets using load_files option 606 # WARNING, this is a prefix (not a path), trailing slash matters! 607 # optional, default is empty 608 # 609 # snippets_file_prefix = /mnt/mydocs/server1 610 611 612 # whether to apply stopwords before or after stemming 613 # optional, default is 0 (apply stopwords after stemming) 614 # 615 # stopwords_unstemmed = 0 616 617 618 # path to a global (cluster-wide) keyword IDFs file 619 # optional, default is empty (use local IDFs) 620 # 621 # global_idf = /usr/local/sphinx/var/global.idf 622 } 623 624 625 # inherited index example 626 # 627 # all the parameters are copied from the parent index, 628 # and may then be overridden in this index definition 629 index zvukirustemmed : zvukiru 630 { 631 path = /var/db/sphinxsearch/data/zvukirustemmed 632 morphology = stem_en 633 } 634 635 636 # distributed index example 637 # 638 # this is a virtual index which can NOT be directly indexed, 639 # and only contains references to other local and/or remote indexes 640 index dist1 641 { 642 # 'distributed' index type MUST be specified 643 type = distributed 644 645 # local index to be searched 646 # there can be many local indexes configured 647 local = test1 648 local = test1stemmed 649 650 # remote agent 651 # multiple remote agents may be specified 652 # syntax for TCP connections is 'hostname:port:index1,[index2[,...]]' 653 # syntax for local UNIX connections is '/path/to/socket:index1,[index2[,...]]' 654 agent = localhost:9313:remote1 655 agent = localhost:9314:remote2,remote3 656 # agent = /var/run/searchd.sock:remote4 657 658 # remote agent mirrors groups, aka mirrors, aka HA agents 659 # defines 2 or more interchangeable mirrors for a given index part 660 # 661 # agent = server3:9312 | server4:9312 :indexchunk2 662 # agent = server3:9312:chunk2server3 | server4:9312:chunk2server4 663 # agent = server3:chunk2server3 | server4:chunk2server4 664 # agent = server21|server22|server23:chunk2 665 666 667 # blackhole remote agent, for debugging/testing 668 # network errors and search results will be ignored 669 # 670 # agent_blackhole = testbox:9312:testindex1,testindex2 671 672 673 # persistenly connected remote agent 674 # reduces connect() pressure, requires that workers IS threads 675 # 676 # agent_persistent = testbox:9312:testindex1,testindex2 677 678 679 # remote agent connection timeout, milliseconds 680 # optional, default is 1000 ms, ie. 1 sec 681 agent_connect_timeout = 1000 682 683 # remote agent query timeout, milliseconds 684 # optional, default is 3000 ms, ie. 3 sec 685 agent_query_timeout = 3000 686 687 # HA mirror agent strategy 688 # optional, defaults to ??? (random mirror) 689 # know values are nodeads, noerrors, roundrobin, nodeadstm, noerrorstm 690 # 691 # ha_strategy = nodeads 692 693 # path to RLP context file 694 # optional, defaut is empty 695 # 696 # rlp_context = /usr/local/share/sphinx/rlp/rlp-context.xml 697 } 698 699 700 # realtime index example 701 # 702 # you can run INSERT, REPLACE, and DELETE on this index on the fly 703 # using MySQL protocol (see 'listen' directive below) 704 index rt 705 { 706 # 'rt' index type must be specified to use RT index 707 type = rt 708 709 # index files path and file name, without extension 710 # mandatory, path must be writable, extensions will be auto-appended 711 path = /var/db/sphinxsearch/data/rt 712 713 # RAM chunk size limit 714 # RT index will keep at most this much data in RAM, then flush to disk 715 # optional, default is 128M 716 # 717 # rt_mem_limit = 512M 718 719 # full-text field declaration 720 # multi-value, mandatory 721 rt_field = title 722 rt_field = content 723 724 # unsigned integer attribute declaration 725 # multi-value (an arbitrary number of attributes is allowed), optional 726 # declares an unsigned 32-bit attribute 727 rt_attr_uint = gid 728 729 # RT indexes currently support the following attribute types: 730 # uint, bigint, float, timestamp, string, mva, mva64, json 731 # 732 # rt_attr_bigint = guid 733 # rt_attr_float = gpa 734 # rt_attr_timestamp = ts_added 735 # rt_attr_string = author 736 # rt_attr_multi = tags 737 # rt_attr_multi_64 = tags64 738 # rt_attr_json = extra_data 739 } 740 741 ############################################################################# 742 ## indexer settings 743 ############################################################################# 744 745 indexer 746 { 747 # memory limit, in bytes, kiloytes (16384K) or megabytes (256M) 748 # optional, default is 128M, max is 2047M, recommended is 256M to 1024M 749 mem_limit = 128M 750 751 # maximum IO calls per second (for I/O throttling) 752 # optional, default is 0 (unlimited) 753 # 754 # max_iops = 40 755 756 757 # maximum IO call size, bytes (for I/O throttling) 758 # optional, default is 0 (unlimited) 759 # 760 # max_iosize = 1048576 761 762 763 # maximum xmlpipe2 field length, bytes 764 # optional, default is 2M 765 # 766 # max_xmlpipe2_field = 4M 767 768 769 # write buffer size, bytes 770 # several (currently up to 4) buffers will be allocated 771 # write buffers are allocated in addition to mem_limit 772 # optional, default is 1M 773 # 774 # write_buffer = 1M 775 776 777 # maximum file field adaptive buffer size 778 # optional, default is 8M, minimum is 1M 779 # 780 # max_file_field_buffer = 32M 781 782 783 # how to handle IO errors in file fields 784 # known values are 'ignore_field', 'skip_document', and 'fail_index' 785 # optional, default is 'ignore_field' 786 # 787 # on_file_field_error = skip_document 788 789 790 # lemmatizer cache size 791 # improves the indexing time when the lemmatization is enabled 792 # optional, default is 256K 793 # 794 # lemmatizer_cache = 512M 795 } 796 797 ############################################################################# 798 ## searchd settings 799 ############################################################################# 800 801 searchd 802 { 803 # [hostname:]port[:protocol], or /unix/socket/path to listen on 804 # known protocols are 'sphinx' (SphinxAPI) and 'mysql41' (SphinxQL) 805 # 806 # multi-value, multiple listen points are allowed 807 # optional, defaults are 9312:sphinx and 9306:mysql41, as below 808 # 809 # listen = 127.0.0.1 810 # listen = 192.168.0.1:9312 811 # listen = 9312 812 # listen = /var/run/searchd.sock 813 listen = 9312 814 listen = 9306:mysql41 815 816 # log file, searchd run info is logged here 817 # optional, default is 'searchd.log' 818 log = /var/log/sphinxsearch/searchd.log 819 820 # query log file, all search queries are logged here 821 # optional, default is empty (do not log queries) 822 query_log = /var/log/sphinxsearch/sphinx-query.log 823 824 # client read timeout, seconds 825 # optional, default is 5 826 read_timeout = 5 827 828 # request timeout, seconds 829 # optional, default is 5 minutes 830 client_timeout = 300 831 832 # maximum amount of children to fork (concurrent searches to run) 833 # optional, default is 0 (unlimited) 834 max_children = 30 835 836 # maximum amount of persistent connections from this master to each agent host 837 # optional, but necessary if you use agent_persistent. It is reasonable to set the value 838 # as max_children, or less on the agent's hosts. 839 persistent_connections_limit = 30 840 841 # PID file, searchd process ID file name 842 # mandatory 843 pid_file = /var/run/sphinxsearch/searchd.pid 844 845 # seamless rotate, prevents rotate stalls if precaching huge datasets 846 # optional, default is 1 847 seamless_rotate = 1 848 849 # whether to forcibly preopen all indexes on startup 850 # optional, default is 1 (preopen everything) 851 preopen_indexes = 1 852 853 # whether to unlink .old index copies on succesful rotation. 854 # optional, default is 1 (do unlink) 855 unlink_old = 1 856 857 # attribute updates periodic flush timeout, seconds 858 # updates will be automatically dumped to disk this frequently 859 # optional, default is 0 (disable periodic flush) 860 # 861 # attr_flush_period = 900 862 863 864 # MVA updates pool size 865 # shared between all instances of searchd, disables attr flushes! 866 # optional, default size is 1M 867 mva_updates_pool = 1M 868 869 # max allowed network packet size 870 # limits both query packets from clients, and responses from agents 871 # optional, default size is 8M 872 max_packet_size = 8M 873 874 # max allowed per-query filter count 875 # optional, default is 256 876 max_filters = 256 877 878 # max allowed per-filter values count 879 # optional, default is 4096 880 max_filter_values = 4096 881 882 883 # socket listen queue length 884 # optional, default is 5 885 # 886 # listen_backlog = 5 887 888 889 # per-keyword read buffer size 890 # optional, default is 256K 891 # 892 # read_buffer = 256K 893 894 895 # unhinted read size (currently used when reading hits) 896 # optional, default is 32K 897 # 898 # read_unhinted = 32K 899 900 901 # max allowed per-batch query count (aka multi-query count) 902 # optional, default is 32 903 max_batch_queries = 32 904 905 906 # max common subtree document cache size, per-query 907 # optional, default is 0 (disable subtree optimization) 908 # 909 # subtree_docs_cache = 4M 910 911 912 # max common subtree hit cache size, per-query 913 # optional, default is 0 (disable subtree optimization) 914 # 915 # subtree_hits_cache = 8M 916 917 918 # multi-processing mode (MPM) 919 # known values are none, fork, prefork, and threads 920 # threads is required for RT backend to work 921 # optional, default is threads 922 workers = threads # for RT to work 923 924 925 # max threads to create for searching local parts of a distributed index 926 # optional, default is 0, which means disable multi-threaded searching 927 # should work with all MPMs (ie. does NOT require workers=threads) 928 # 929 # dist_threads = 4 930 931 932 # binlog files path; use empty string to disable binlog 933 # optional, default is build-time configured data directory 934 # 935 # binlog_path = # disable logging 936 # binlog_path = /var/db/sphinxsearch/data # binlog.001 etc will be created there 937 938 939 # binlog flush/sync mode 940 # 0 means flush and sync every second 941 # 1 means flush and sync every transaction 942 # 2 means flush every transaction, sync every second 943 # optional, default is 2 944 # 945 # binlog_flush = 2 946 947 948 # binlog per-file size limit 949 # optional, default is 128M, 0 means no limit 950 # 951 # binlog_max_log_size = 256M 952 953 954 # per-thread stack size, only affects workers=threads mode 955 # optional, default is 64K 956 # 957 # thread_stack = 128K 958 959 960 # per-keyword expansion limit (for dict=keywords prefix searches) 961 # optional, default is 0 (no limit) 962 # 963 # expansion_limit = 1000 964 965 966 # RT RAM chunks flush period 967 # optional, default is 0 (no periodic flush) 968 # 969 # rt_flush_period = 900 970 971 972 # query log file format 973 # optional, known values are plain and sphinxql, default is plain 974 # 975 # query_log_format = sphinxql 976 977 978 # version string returned to MySQL network protocol clients 979 # optional, default is empty (use Sphinx version) 980 # 981 # mysql_version_string = 5.0.37 982 983 984 # default server-wide collation 985 # optional, default is libc_ci 986 # 987 # collation_server = utf8_general_ci 988 989 990 # server-wide locale for libc based collations 991 # optional, default is C 992 # 993 # collation_libc_locale = ru_RU.UTF-8 994 995 996 # threaded server watchdog (only used in workers=threads mode) 997 # optional, values are 0 and 1, default is 1 (watchdog on) 998 # 999 # watchdog = 1 1000 1001 1002 # costs for max_predicted_time model, in (imaginary) nanoseconds 1003 # optional, default is "doc=64, hit=48, skip=2048, match=64" 1004 # 1005 # predicted_time_costs = doc=64, hit=48, skip=2048, match=64 1006 1007 1008 # current SphinxQL state (uservars etc) serialization path 1009 # optional, default is none (do not serialize SphinxQL state) 1010 # 1011 # sphinxql_state = sphinxvars.sql 1012 1013 1014 # maximum RT merge thread IO calls per second, and per-call IO size 1015 # useful for throttling (the background) OPTIMIZE INDEX impact 1016 # optional, default is 0 (unlimited) 1017 # 1018 # rt_merge_iops = 40 1019 # rt_merge_maxiosize = 1M 1020 1021 1022 # interval between agent mirror pings, in milliseconds 1023 # 0 means disable pings 1024 # optional, default is 1000 1025 # 1026 # ha_ping_interval = 0 1027 1028 1029 # agent mirror statistics window size, in seconds 1030 # stats older than the window size (karma) are retired 1031 # that is, they will not affect master choice of agents in any way 1032 # optional, default is 60 seconds 1033 # 1034 # ha_period_karma = 60 1035 1036 1037 # delay between preforked children restarts on rotation, in milliseconds 1038 # optional, default is 0 (no delay) 1039 # 1040 # prefork_rotation_throttle = 100 1041 1042 1043 # a prefix to prepend to the local file names when creating snippets 1044 # with load_files and/or load_files_scatter options 1045 # optional, default is empty 1046 # 1047 # snippets_file_prefix = /mnt/common/server1/ 1048 } 1049 1050 ############################################################################# 1051 ## common settings 1052 ############################################################################# 1053 1054 common 1055 { 1056 1057 # lemmatizer dictionaries base path 1058 # optional, defaut is /usr/local/share (see ./configure --datadir) 1059 # 1060 # lemmatizer_base = /usr/local/share/sphinx/dicts 1061 1062 1063 # how to handle syntax errors in JSON attributes 1064 # known values are 'ignore_attr' and 'fail_index' 1065 # optional, default is 'ignore_attr' 1066 # 1067 # on_json_attr_error = fail_index 1068 1069 1070 # whether to auto-convert numeric values from strings in JSON attributes 1071 # with auto-conversion, string value with actually numeric data 1072 # (as in {"key":"12345"}) gets stored as a number, rather than string 1073 # optional, allowed values are 0 and 1, default is 0 (do not convert) 1074 # 1075 # json_autoconv_numbers = 1 1076 1077 1078 # whether and how to auto-convert key names in JSON attributes 1079 # known value is 'lowercase' 1080 # optional, default is unspecified (do nothing) 1081 # 1082 # json_autoconv_keynames = lowercase 1083 1084 1085 # path to RLP root directory 1086 # optional, defaut is /usr/local/share (see ./configure --datadir) 1087 # 1088 # rlp_root = /usr/local/share/sphinx/rlp 1089 1090 1091 # path to RLP environment file 1092 # optional, defaut is /usr/local/share/rlp-environment.xml (see ./configure --datadir) 1093 # 1094 # rlp_environment = /usr/local/share/sphinx/rlp/rlp/etc/rlp-environment.xml 1095 1096 1097 # maximum total size of documents batched before processing them by the RLP 1098 # optional, default is 51200 1099 # 1100 # rlp_max_batch_size = 100k 1101 1102 1103 # maximum number of documents batched before processing them by the RLP 1104 # optional, default is 50 1105 # 1106 # rlp_max_batch_docs = 100 1107 1108 1109 # trusted plugin directory 1110 # optional, default is empty (disable UDFs) 1111 # 1112 # plugin_dir = /usr/local/sphinx/lib 1113 1114 } 1115 1116 # --eof-- -
utf8/plugins/sphinx/lib/sphinx/Keeper.pm
4 4 use warnings 'all'; 5 5 use base qw(Contenido::Keeper); 6 6 use Contenido::Globals; 7 use Data::Dumper; 7 8 8 9 ###################### 9 10 # Отправить объект в поиск: … … 26 27 my $doc = shift; 27 28 return undef unless ref $doc && $doc->id; 28 29 29 my ($object) = $self->get_documents( 30 my ($object) = $keeper->get_documents( 30 31 class => 'sphinx::Search', 31 32 object_id => $doc->id, 32 33 object_class => $doc->class, … … 35 36 my $data = $doc->get_search_data; 36 37 return undef unless $data; 37 38 unless ( ref $object ) { 38 $object = sphinx::Search->new( $self ); 39 $object = sphinx::Search->new( $keeper ); 39 40 $object->status( 1 ); 40 41 $object->is_deleted( 0 ); 41 42 $object->object_id( $doc->id ); … … 62 63 } 63 64 64 65 66 # Методы поиска 67 #################################################################### 68 sub search { 69 my $self = shift; 70 my $text = shift; 71 return unless $text; 72 my (%opts) = @_; 73 74 my $result; 75 my $db_table = delete $opts{db_table} || $self->state->table_name; 76 my @wheres = ("MATCH(?)"); 77 my @values = ($text); 78 my $count = delete $opts{count}; 79 my $limit = delete $opts{limit}; 80 return if $limit && ($limit =~ /\D/ || $limit < 0); 81 my $no_limit = delete $opts{no_limit}; 82 unless ( $no_limit ) { 83 $limit ||= 1000; 84 } 85 my $offset = delete $opts{offset}; 86 return if $offset && ($offset =~ /\D/ || $offset < 0); 87 my $return_value = delete $opts{return_value} || 'array_ref'; 88 my $hash_by = delete $opts{hash_by} || 'object_id'; 89 90 while ( my ($key, $val) = each %opts ) { 91 if ( ref $val eq 'ARRAY' ) { 92 push @wheres, "$key in (".join(',', map { '?' } @$val).")"; 93 push @values, @$val; 94 } else { 95 push @wheres, "$key = ?"; 96 push @values, $val; 97 } 98 } 99 my $query = "select ".($count ? 'count(*) as cnt' : '*, weight() as weight')." from $db_table where ".join( ' and ', @wheres ); 100 if ( $limit ) { 101 $query .= " limit $limit "; 102 } 103 if ( $offset ) { 104 $query .= " offset $offset "; 105 } 106 warn "SEARCH QUERY: $query\n" if $DEBUG; 107 warn "SEARCH VALUES: ".Dumper( \@values ) if $DEBUG; 108 my $sth = $self->SQL->prepare_cached( $query ); 109 $sth->execute( @values ); 110 if ( $count ) { 111 $result = $sth->fetchrow_arrayref; 112 $result = $result->[0]; 113 } else { 114 $result = []; 115 while ( my $row = $sth->fetchrow_hashref ) { 116 push @$result, $row; 117 } 118 } 119 return $result; 120 } 121 122 sub stemmed { 123 my $self = shift; 124 my $db_table = $self->state->table_name_stemmed; 125 return $self->search( @_, db_table => $db_table ); 126 } 127 128 # МЕТОДЫ ДОСТУПА К СОЕДИНЕНИЯМ С БАЗОЙ УМНЫЕ 129 #################################################################### 130 # получение соединения с базой или установка нового если его не было 131 sub SQL { 132 my $self = shift; 133 return ($self->connect_check() ? $self->{SQL} : undef); 134 } 135 136 # ------------------------------------------------------------------------------------------------- 137 # Открываем соединение с базой данных 138 # ------------------------------------------------------------------------------------------------- 139 sub connect { 140 my $self = shift; 141 #соединение уже есть 142 if ($self->is_connected) { 143 } else { 144 unless ($self->{SQL} = $self->db_connect) { 145 warn "Не могу соединиться с базой данных"; 146 die; 147 } 148 $self->{SQL}->do("SET NAMES '".$self->state->db_client_encoding."'") if ($self->state->db_client_encoding); 149 } 150 151 $self->{_connect_ok} = 1; 152 return 1; 153 } 154 155 #проверка соединения с базой кеширующая состояние соединения 156 sub connect_check { 157 my $self = shift; 158 return 1 if ($self->{_connect_ok}); 159 if ($self->is_connected) { 160 $self->{_connect_ok} = 1; 161 return 1; 162 } else { 163 if ($self->connect) { 164 return 1; 165 } else { 166 #сюда по логике попадать не должно так как die вылететь должен 167 warn "Connect failed\n"; 168 return 0; 169 } 170 } 171 } 172 173 sub db_connect { 174 my $self = shift; 175 my $dbh = DBI->connect('DBI:mysql:host='.$self->{db_host}.';port='.$self->{db_port}.';mysql_enable_utf8=1') 176 || die "Contenido Error: Не могу соединиться с Sphinx базой данных\n"; 177 178 # $dbh->{'AutoCommit'} = 1; 179 # $dbh->{mysql_auto_reconnect} = 1; 180 181 return $dbh; 182 } 183 184 sub is_connected { 185 my $self = shift; 186 if ( ref $self->{SQL} and $self->{SQL}->can('ping') and $self->{SQL}->ping() ) { 187 $self->{_connect_ok} = 1; 188 return 1; 189 } else { 190 $self->{_connect_ok} = 0; 191 return 0; 192 } 193 194 # warn 'Check if MySQL DB connected: '.(ref $self && exists $self->{SQL} && ref $self->{SQL} ? 1 : 0 ) if $DEBUG; 195 # return ( ref($self) && exists $self->{SQL} && ref $self->{SQL} ); 196 } 65 197 1; -
utf8/plugins/sphinx/lib/sphinx/State.pm.proto
12 12 bless $self, $class; 13 13 14 14 # configured 15 $self->{debug} = (lc('') eq 'yes'); 16 $self->{project} = ''; 17 $self->{contenido_notab} = 0; 15 $self->{debug} = (lc('@DEBUG@') eq 'yes'); 16 $self->{project} = '@PROJECT@'; 17 $self->{contenido_notab} = 1; 18 18 $self->{tab_name} = 'sphinx'; 19 19 20 20 # зашитая конфигурация плагина 21 $self->{db_type} = 'none'; ### For REAL database use 'remote' 22 $self->{db_keepalive} = 0; 23 $self->{db_host} = ''; 21 $self->{db_type} = 'remote'; ### For REAL database use 'remote' 22 $self->{db_keepalive} = 0; 23 $self->{db_host} = '@SPHINX_HOST@'; 24 24 $self->{db_name} = ''; 25 25 $self->{db_user} = ''; 26 $self->{db_password} = ''; 27 $self->{db_port} = ''; 26 $self->{db_password} = ''; 27 $self->{db_port} = '@SPHINX_PORT@'; 28 28 $self->{store_method} = 'toast'; 29 29 $self->{cascade} = 1; 30 30 $self->{db_prepare} = 0; 31 31 32 $self->{memcached_enable} = lc( '' ) eq 'yes' ? 1 : 0; 32 $self->{memcached_enable} = lc( '@MEMCACHED_ENABLE@' ) eq 'yes' ? 1 : 0; 33 33 $self->{memcached_enable_compress} = 1; 34 $self->{memcached_backend} = ''; 35 $self->{memcached_servers} = [qw()]; 34 $self->{memcached_backend} = '@MEMCACHED_BACKEND@'; 35 $self->{memcached_servers} = [qw(@MEMCACHED_SERVERS@)]; 36 36 $self->{memcached_busy_lock} = 60; 37 $self->{memcached_delayed} = lc('') eq 'yes' ? 1 : 0; 37 $self->{memcached_delayed} = lc('@MEMCACHED_DELAYED@') eq 'yes' ? 1 : 0; 38 38 39 39 $self->{serialize_with} = 'json'; ### or 'dumper' 40 40 … … 44 44 $self->{images_directory} = '/nonexistent'; 45 45 $self->{preview} = '0'; 46 46 47 $self->{table_name} = '@SPHINX_TABLE@'; 48 $self->{table_name_stemmed} = '@SPHINX_TABLE_STEMMED@'; 49 47 50 $self->_init_(); 48 51 $self; 49 52 } … … 90 93 data_directory 91 94 images_directory 92 95 preview 96 97 table_name 98 table_name_stemmed 93 99 ); 94 100 } 95 101