sphinx实践

 

安装参考地址:http://briansnelson.com/How_to_install_Sphinx_on_Centos_Server

 

yum install sphinx

如果失败的话使用下面的方式安装

wget http://sphinxsearch.com/files/sphinx-2.2.9-1.rhel6.x86_64.rpm

yum localinstall sphinx-2.2.9-1.rhel6.x86_64.rpm

 

安装完了出现下面的内容

Sphinx installed!

Now create a full-text index, start the search daemon, and you're all set.

 

To manage indexes:

    editor /etc/sphinx/sphinx.conf

 

To rebuild all disk indexes:

    sudo -u sphinx indexer --all --rotate

 

To start/stop search daemon:

    service searchd start/stop

 

To query search daemon using MySQL client:

    mysql -h 0 -P 9306

    mysql> SELECT * FROM test1 WHERE MATCH('test');

 

See the manual at /usr/share/doc/sphinx-2.0.9 for details.

 

For commercial support please contact Sphinx Technologies Inc at

http://sphinxsearch.com/contacts.html

 

  Verifying  : postgresql-libs-8.4.20-2.el6_6.x86_64                                                                                                                             1/2 

  Verifying  : sphinx-2.0.9-1.rhel6.x86_64                                                                                                                                       2/2 

 

Installed:

  sphinx.x86_64 0:2.0.9-1.rhel6                                                                                                                                                      

 

Dependency Installed:

  postgresql-libs.x86_64 0:8.4.20-2.el6_6                                                                                                                                            

 

Complete!

 

安装完了之后, 现在要创建一个全文索引, 并且开启搜索的守护进程, 然后要设置

 

启动服务

service searchd start

or

/etc/init.d/searchd start

 

设置开机自启动

chkconfig searchd on

chkconfig --list searchd

 

配置实例, 先新建一个mysql的数据表

blog.sphinx_article

 

CREATE TABLE `sphinx_article` (

`id` int(11) UNSIGNED NOT NULL AUTO_INCREMENT,

`title` varchar(255) NULL,

`cat_id` tinyint(3) UNSIGNED NULL,

`member_id` int(11) UNSIGNED NULL,

`content` longtext NULL,

`created` int(11) UNSIGNED NULL,

PRIMARY KEY (`id`)

) ENGINE=InnoDB;

 

配置文件

vim /etc/sphinx/sphinx.conf

~~~

#

# Minimal Sphinx configuration sample (clean, simple, functional)

#

 

##索引源##

source article_src

{

        type                    = mysql ##数据源类型

 

        sql_host                = rdsnnamnbnnamnbprivate.mysql.rds.aliyuncs.com ##mysql主机

        sql_user                = maxwelldu ##mysql 用户名

        sql_pass                = yu13jiu14 ##mysql密码

        sql_db                  = blog  ##mysql数据库名

        sql_port                = 3306  ##mysql端口 

        sql_query_pre           = SET NAMES UTF8 ##mysql检索编码

 

        sql_query               = SELECT id, title, cat_id, member_id, content, created FROM sphinx_article ##获取数据的sql

 

        ##过滤或条件查询的属性##

        sql_attr_uint           = cat_id

        sql_attr_uint           = member_id

        sql_attr_timestamp      = created

 

        sql_query_info          = SELECT * FROM sphinx_article WHERE id=$id

}

 

##索引

index article

{

        source                  = article_src ##声明索引源

        path                    = /var/lib/sphinx/article ##索引文件存放路径及索引的文件名

        docinfo                 = extern ##文档信息存储方式

        mlock                   = 0 ##缓存数据内存锁定

        morphology              = none ##形态学(对中文无效)

        min_word_len            = 1 ##索引的词最小长度

        charset_type            = utf-8 ##数据编码

        min_prefix_len          = 0

        min_infix_len           = 1

        ngram_len               = 1

}

 

 

index testrt

{

        type                    = rt

        rt_mem_limit            = 32M

 

        path                    = /var/lib/sphinx/testrt

        charset_type            = utf-8

 

        rt_field                = title

        rt_field                = content

        rt_attr_uint            = gid

}

 

 

indexer

{

        mem_limit               = 128M

}

 

searchd

{

        listen                  = 9312

        listen                  = 9306:mysql41

        log                     = /var/log/sphinx/searchd.log

        query_log               = /var/log/sphinx/query.log

        read_timeout            = 5

        max_children            = 30

        pid_file                = /var/run/sphinx/searchd.pid

        max_matches             = 1000

        seamless_rotate         = 1

        preopen_indexes         = 1

        unlink_old              = 1

        workers                 = threads # for RT to work

        binlog_path             = /var/lib/sphinx/

}

     

~~~

 

建立索引文件

indexer -c /etc/sphinx/sphinx.conf article

CLI上测试

search -c /etc/sphinx/sphinx.conf asdf

php测试

<?php

$keyword = $_GET['keyword'];

$s = new SphinxClient;

$s->setServer("localhost", 9312);

$s->setMatchMode(SPH_MATCH_EXTENDED);

$s->setMaxQueryTime(3);

 

$result = $s->query($keyword, "article");

 

echo '<pre>';

print_r($result);

echo '</pre>';

?>

 

访问测试地址: http://123.56.135.230/sphinx.php?keyword=dfsa

 

如果还没有安装pshinx client php扩展的话先安装扩展

 

参考地址: http://linux008.blog.51cto.com/2837805/622171

安装sphinx php扩展

cd /usr/local/src/

wget http://sphinxsearch.com/files/sphinx-2.2.9-release.tar.gz

tar zxf sphinx-2.2.9-release.tar.gz

cd sphinx-2.2.9-release/api/libsphinxclient/

 

 

安装sphinx扩展

cd /usr/local/src

wget http://sphinxsearch.com/files/sphinx-2.2.9-release.tar.gz

tar zxf sphinx-2.2.9-release.tar.gz 

cd sphinx-2.2.9-release/api/libsphinxclient/

vim sphinxclient.c

./configure --prefix=/usr/local/sphinxclient

make

make install

cd /tmp/

wget http://pecl.php.net/get/sphinx-1.0.4.tgz

tar zxf sphinx-1.0.4.tgz 

cd sphinx-1.0.4

/alidata/server/php/bin/phpize 

./configure --with-php-config=/alidata/server/php/bin/php-config --with-sphinx=/usr/local/sphinxclient

make && make install

 

报错信息:

/tmp/sphinx-1.0.4/sphinx.c: In function ‘php_sphinx_client_read_property’:

/tmp/sphinx-1.0.4/sphinx.c:105: error: too few arguments to function ‘std_hnd->read_property’

/tmp/sphinx-1.0.4/sphinx.c: In function ‘zim_SphinxClient_setRankingMode’:

/tmp/sphinx-1.0.4/sphinx.c:767: error: too few arguments to function ‘sphinx_set_ranking_mode’

/tmp/sphinx-1.0.4/sphinx.c: In function ‘zm_startup_sphinx’:

/tmp/sphinx-1.0.4/sphinx.c:1786: warning: assignment from incompatible pointer type

make: *** [sphinx.lo] Error 1

 

报错解决方案参考地址: http://www.cnblogs.com/yun007/p/3802363.html 

retval = std_hnd->read_property(object, member, type TSRMLS_CC);

将这个函数最后添加一个参数NULL

retval = std_hnd->read_property(object, member, type TSRMLS_CC, NULL);

 

res = sphinx_set_ranking_mode(c->sphinx, (int)ranker);

最后加一个参数NULL

res = sphinx_set_ranking_mode(c->sphinx, (int)ranker, NULL);

 

make && make install

 

扩展安装好了之后修改php.ini, 然后重启php-fpm即可

vim /alidata/server/php/etc/php.ini

最后添加一行

extension=sphinx.so

 

重启

/etc/init.d/php-fpm restart

 

测试

 http://123.56.135.230/sphinx.php?keyword=dfsa

 

到现在为止还不能搜索中文, 参考地址: http://www.cnblogs.com/gaoxu387/archive/2012/12/05/2803183.html

cd /tmp

wget http://www.coreseek.cn/uploads/csft/3.2/coreseek-3.2.14.tar.gz

tar zxf coreseek-3.2.14.tar.gz

cd coreseek-3.2.14

 

安装mmseg中文分词

cd mmseg-3.2.14

./bootstrap    #输出的warning信息可以忽略,如果出现error则需要解决

./configure --prefix=/usr/local/mmseg3

make && make install

cd ..

##安装coreseek

cd csft-3.2.14

sh buildconf.sh    #输出的warning信息可以忽略,如果出现error则需要解决

./configure --prefix=/usr/local/coreseek --without-unixodbc --with-mmseg --with-mmseg-includes=/usr/local/mmseg3/include/mmseg/ --with-mmseg-libs=/usr/local/mmseg3/lib/ --with-mysql    ##如果提示mysql问题,可以查看MySQL数据源安装说明

make  ## 报错, 错误参考地址: http://blog.csdn.net/carzyer/article/details/6566351

vim src/MakeFile

LIBS = -lodbc -lm -lz -lexpat -L/usr/local/lib -lrt  -lpthread

修改为

LIBS = -lodbc -lm -lz -lexpat -liconv  -L/usr/local/lib -lrt  -lpthread

 

make install

cd ..

 

配置mysql数据源

cd /usr/local/coreseek

cp /etc/sphinx/sphinx.conf etc/csft.conf

vim /etc/csft.conf

~~~

#

# Minimal Sphinx configuration sample (clean, simple, functional)

#

 

##索引源##

source article_src

{

        type                    = mysql ##数据源类型

 

        sql_host                = rdsnnamnbnnamnbprivate.mysql.rds.aliyuncs.com ##mysql主机

        sql_user                = maxwelldu ##mysql 用户名

        sql_pass                = yu13jiu14 ##mysql密码

        sql_db                  = blog  ##mysql数据库名

        sql_port                = 3306  ##mysql端口 

        sql_query_pre           = SET NAMES UTF8 ##mysql检索编码

 

        sql_query               = SELECT id, title, cat_id, member_id, content, created FROM sphinx_article ##获取数据的sql

 

        ##过滤或条件查询的属性##

        sql_attr_uint           = cat_id

        sql_attr_uint           = member_id

        sql_attr_timestamp      = created

 

        sql_query_info          = SELECT * FROM sphinx_article WHERE id=$id

        sql_query_info_pre      = SET NAMES utf8

}

 

##索引

index article

{

        source                  = article_src ##声明索引源

        path                    = /usr/local/coreseek/var/data/article ##索引文件存放路径及索引的文件名

        docinfo                 = extern ##文档信息存储方式

        mlock                   = 0 ##缓存数据内存锁定

        morphology              = none ##形态学(对中文无效)

        min_word_len            = 1 ##索引的词最小长度

        charset_type            = zh_cn.utf-8 ##数据编码

        charset_dictpath        = /usr/local/mmseg3/etc/

}

 

indexer

{

        mem_limit               = 128M

}

 

 

searchd

{

        listen                  = 9312

        read_timeout            = 5

        max_children            = 30

        max_matches             = 1000

        seamless_rotate         = 0

        preopen_indexes         = 0

        unlink_old              = 1

 

        pid_file                = /usr/local/coreseek/var/log/searchd_article.pid

        log                     = /usr/local/coreseek/var/log/searchd_article.log

        query_log               = /usr/local/coreseek/var/log/query_article.log

}

~~~

 

创建索引和测试全文检索

测试indexer索引

/usr/local/coreseek/bin/indexer -c etc/csft.conf 

 

测试sphinx索引文件

/usr/local/coreseek/bin/indexer -c etc/csft.conf —all

 

使用search程序测试全文检索(注意是search不是search)

/usr/local/coreseek/bin/search -c etc/csft.conf -a asdf北京

 

停止sphinxsearchd服务, 开启coreseeksearchd服务

service searchd stop

 

关闭sphinx的自启动服务

chkconfig searchd off

 

开启coreseek的自启动服务

echo "/usr/local/coreseek/bin/searchd -c /usr/local/coreseek/etc/csft.conf" >> /etc/rc.d/rc.local

 

使用PHP API调用coreseek

复制api/sphinxapi.php到项目中, 然后写测试程序, (需要先将sphinxphp扩展关闭, 修改php.ini, sphinx.so注释, 然后重启php-fpm)

 

 

现在实现了中文检索, 但是有新的数据还不能够检索到, 下面就开始增量索引实践, 参考地址: http://blog.csdn.net/kongdeqian1988/article/details/38303343

 

先建立一张增量索引记录表pre_coreseek(id,maxid)

CREATE TABLE `pre_coreseek` (

  `id` int(11) unsigned NOT NULL AUTO_INCREMENT,

  `maxid` int(11) unsigned NOT NULL,

  PRIMARY KEY (`id`)

) ENGINE=InnoDB DEFAULT CHARSET=utf8

 

修改配置

vim /usr/local/coreseek/etc/csft.conf

~~~

#

# Minimal Sphinx configuration sample (clean, simple, functional)

#

 

##索引源##

source article_src

{

        type                    = mysql ##数据源类型

 

        sql_host                = rdsnnamnbnnamnbprivate.mysql.rds.aliyuncs.com ##mysql主机

        sql_user                = maxwelldu ##mysql 用户名

        sql_pass                = yu13jiu14 ##mysql密码

        sql_db                  = blog  ##mysql数据库名

        sql_port                = 3306  ##mysql端口 

        sql_query_pre           = SET NAMES UTF8 ##mysql检索编码

        sql_query_pre           = REPLACE INTO pre_coreseek SELECT 1,MAX(id) from sphinx_article

 

        sql_query               = SELECT id, title, cat_id, member_id, content, created FROM sphinx_article WHERE id < (SELECT maxid FROM pre_coreseek WHERE id=1) ##获取数据的sql

 

        ##过滤或条件查询的属性##

        sql_attr_uint           = cat_id

        sql_attr_uint           = member_id

        sql_attr_timestamp      = created

 

        sql_query_info          = SELECT * FROM sphinx_article WHERE id=$id

        sql_query_info_pre      = SET NAMES utf8

}

 

source increment : article_src

{

        sql_query_pre           = SET NAMES utf8

        sql_query               = SELECT id, title, cat_id, member_id, content, created FROM sphinx_article WHERE id > (SELECT maxid FROM pre_coreseek WHERE id=1) ##获取数据的sql

}

 

##索引

index article

{

        source                  = article_src ##声明索引源

        path                    = /usr/local/coreseek/var/data/article ##索引文件存放路径及索引的文件名

        docinfo                 = extern ##文档信息存储方式

        mlock                   = 0 ##缓存数据内存锁定

        morphology              = none ##形态学(对中文无效)

        min_word_len            = 1 ##索引的词最小长度

        charset_type            = zh_cn.utf-8 ##数据编码

        charset_dictpath        = /usr/local/mmseg3/etc/

}

 

index increment : article

{

        source                  = increment

        path                    = /usr/local/coreseek/var/data/increment

}

 

indexer

{

        mem_limit               = 128M

}

indexer

{

        mem_limit               = 128M

}

 

 

searchd

{

        listen                  = 9312

        read_timeout            = 5

        max_children            = 30

        max_matches             = 1000

        seamless_rotate         = 0

        preopen_indexes         = 0

        unlink_old              = 1

 

        pid_file                = /usr/local/coreseek/var/log/searchd_article.pid

        log                     = /usr/local/coreseek/var/log/searchd_article.log

        query_log               = /usr/local/coreseek/var/log/query_article.log

}

~~~

 

停止searchd服务

/usr/local/coreseek/bin/searchd -c /usr/local/coreseek/etc/csft.conf --stop

 

第一次启动建立全索引

/usr/local/coreseek/bin/indexer -c /usr/local/coreseek/etc/csft.conf  --all

 

启动searchd 后台模式,启动前一定先建立好全索引,不然启动失败或增量索引部分会索引不到

 

/usr/local/coreseek/bin/searchd -c /usr/local/coreseek/etc/csft.conf   ###启动

 

添加一些数据 , 尝试检索, 发现索引不到

 

增量索引

/usr/local/coreseek/bin/indexer -c /usr/local/coreseek/etc/csft.conf increment --rotate

 

合并索引

/usr/local/coreseek/bin/indexer -c /usr/local/coreseek/etc/csft.conf --merge article increment --merge-dst-range deleted 0 0 --rotate

 

到现在为止可以检索到了

 

可以写个定时cron每隔30分钟作一次增量索引

 

可以写个定时cron每隔1天作一次全索引以确定保持数据的一致性

 

做实验的时候每隔一分钟做一次增量索引

*/1 * * * * /usr/local/coreseek/bin/indexer -c /usr/local/coreseek/etc/csft.conf increment --rotate

*/1 * * * * /usr/local/coreseek/bin/indexer -c /usr/local/coreseek/etc/csft.conf --merge article increment --merge-dst-range deleted 0 0 —rotate

 

每天做一次全索引, 确保数据的一致性

 

* * */1 * * /usr/local/coreseek/bin/indexer -c /usr/local/coreseek/etc/csft.conf  --all

你可能感兴趣的:(sphinx)