Brew: Installing Sphinx search with mysql support on Mac OS X

I consider that you already have Homebrew on your system.

Install Mysql

brew install mysql
  • start mysql server
mysql.server start
  • stop mysql server
mysql.server stop

Install Sphinx search with mysql support

By default, sphinx was configured without mysql support, to fix it, use the following command line :

brew install sphinx --mysql
  •  start sphinx
searchd -c /usr/local/etc/sphinx.conf
  • stop sphinx
searchd -c /usr/local/etc/sphinx.conf --stop
indexer -c /usr/local/etc/sphinx.conf --rotate yourindex
indexer -c /usr/local/etc/sphinx.conf --rotate yourindex_delta
  • rotate your index in a cron
0 0 * * * /usr/bin/indexer --rotate yourindex >/dev/null 2>&0
0 */2 * * * /usr/bin/indexer --rotate yourindex_delta >/dev/null 2>&0

It rotates your main index every day at midnight and your delta every 2 hours.

When your first start searchd, your index doesn’t exist and you can see an error like this :

listening on 127.0.0.1:9312
 precaching index 'yourindex'
 WARNING: index 'yourindex': preload: failed to open /usr/local/var/data/yourindex.sph: No such file or directory; NOT SERVING
 precaching index 'yourindex_delta'
 WARNING: index 'yourindex_delta': preload: failed to open /usr/local/var/data/yourindex_delta.sph: No such file or directory; NOT SERVING
 FATAL: no valid indexes to serve

Running the indexer command will fix this error.

 Sphinx search with Tiny Tiny RSS configuration example

Tiny Tiny RSS is an open source web-based news feed (RSS/Atom) reader and aggregator, designed to allow you to read news from any location, while feeling as close to a real desktop application as possible.

  • sphinx.conf
#############################################################################
## data source definition
#############################################################################

source ttrss
{
	type			= mysql # this is a MARIADB/MYSQL database
	sql_host		= localhost # this is your hostname, typically localhost
	sql_user		= ttrss # this is the ttrss_user name you defined 
	sql_pass		= ********** # this is the ttrss_user password you defined
	sql_db			= ttrss # this is the name of the ttrss MARIADB/MYSQL database
	sql_port		= 3306	# optional, default is 3306 for MARIADB/MYSQL
        sql_query_pre           = SET NAMES utf8


        sql_query               = \
		SELECT int_id AS id, ref_id, UNIX_TIMESTAMP(updated) AS updated, \
 			ttrss_entries.title AS title, link, content, \
                        ttrss_feeds.title AS feed_title, \
                        marked, published, unread, \
                        author, ttrss_user_entries.owner_uid \
                        FROM ttrss_entries, ttrss_user_entries, ttrss_feeds \
                        WHERE ref_id = ttrss_entries.id AND feed_id = ttrss_feeds.id;


	sql_attr_uint		= owner_uid 
	sql_attr_uint		= ref_id

	sql_ranged_throttle	= 0

	sql_query_info		= \
		SELECT * FROM ttrss_entries,  \
			ttrss_user_entries WHERE ref_id = id AND int_id=$id


}

source ttrss_delta : ttrss {

        sql_query		= \
                SELECT int_id AS id, ref_id, UNIX_TIMESTAMP(updated) AS updated, \
                        ttrss_entries.title AS title, link, content, \
                        ttrss_feeds.title AS feed_title, \
                        marked, published, unread, \
                        author, ttrss_user_entries.owner_uid \
                        FROM ttrss_entries, ttrss_user_entries, ttrss_feeds \
                        WHERE ref_id = ttrss_entries.id AND feed_id = ttrss_feeds.id \
                        AND ttrss_entries.updated > NOW() - INTERVAL 24 HOUR;

        sql_query_killlist      = \
		SELECT int_id FROM ttrss_entries, ttrss_user_entries \
                	WHERE ref_id = ttrss_entries.id AND updated > NOW() - INTERVAL 24 HOUR;


}

index ttrss
{
        source			= ttrss
	path			= /usr/local/var/data/ttrss # Distribution Dependant, VALIDATE
	docinfo			= extern
	mlock			= 0
	morphology		= none
	min_word_len		= 1
	charset_type		= utf-8
	min_prefix_len	        = 3
	prefix_fields		= title, content, feed_title, author
	enable_star		= 1
	html_strip		= 1

}

index ttrss_delta : ttrss {
	source			= ttrss_delta
	path			= /usr/local/var/data/ttrss_delta # Distribution Dependant, VALIDATE
}


indexer
{
	mem_limit		= 32M
}

#############################################################################
## searchd settings
#############################################################################

searchd
{
	log			= /usr/local/var/log/searchd.log
	query_log		= /usr/local/var/log/query.log
	read_timeout		= 5
	client_timeout		= 300
	max_children		= 30
	pid_file		= /usr/local/var/run/searchd.pid 
	max_matches		= 1000
	seamless_rotate		= 1
	preopen_indexes		= 1
	unlink_old		= 1
	mva_updates_pool	= 1M
	max_packet_size		= 8M
	max_filters		= 256
	max_filter_values	= 4096
	listen			= localhost
}
# --eof--

 

 
  • I guess you’ve missed –merge somewhere in “manage indexes” section

  • `NOW() – INTERVAL 24 HOUR` will fail in case if your cron will die or server was rebooted, so, you can lose everything in delta index that was added to DB after server/cron downtime and before next cron job started. So, you need to store latest –merge timestamp somewhere in your DB and use this timestamp in delta index query and kill list query.