lucene&solr 基础使用-蒲公英云

lucene和solr：官网下载地址http://lucene.apache.org/

一般网站用solr比较多，但是想要了解solr的用法，我们需要首先了解一下lucene的使用方法。因为solr底层使用lucene开发的。

lucene

全文检索：
将非结构化数据中的一部分信息提取出来，重新组织，使其变得有一定结构，然后对此有一定结构的数据进行搜索，从而达到搜索相对快的目的。这部分从
非结构化数据中提取出的，然后重新组织的信息，我们称之为索引。
这种先建立索引，再对索引进行搜索的过程就叫做全文检索。

lucene实现全文检索的流程：
创建索引：
获取文档：通过io操作获取磁盘文档。
创建文档：
文档对象：每一个字段（属性）就是一个域
分析文档：
将一段原始内容分析后得到语汇单元。
每一个单词是一个Term，不同域中的相同单词，不是同一个Term。Term是一个map结构。
创建索引：
在索引库中保存了两个部分。1、索引 2、文档对象

查询索引：  
    用户查询入口  
    创建查询  
    执行查询  
    渲染结果

lucene全文检索流程

watermark_type_ZmFuZ3poZW5naGVpdGk_shadow_10_text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L1ROVF9E_size_16_color_FFFFFF_t_70

要是用lucene，首先要在项目中导入常用的jar包。

watermark_type_ZmFuZ3poZW5naGVpdGk_shadow_10_text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L1ROVF9E_size_16_color_FFFFFF_t_70 1

Lucene 入门创建索引查询索引

/**
 * Lucene 入门 创建索引 查询索引
 * 
 * @author lx
 *
 */
public class FirstLucene {
    // 创建索引
    @Test
    public void testIndex() throws Exception {
        // 第一步：创建一个java工程，并导入jar包。
        // 第二步：创建一个indexwriter对象。
        Directory directory = FSDirectory.open(new File("D:\\temp\\index"));
        // Directory directory = new RAMDirectory();//保存索引到内存中 （内存索引库）
//        Analyzer analyzer = new StandardAnalyzer();// 官方推荐
        Analyzer analyzer = new IKAnalyzer();// 官方推荐
        IndexWriterConfig config = new IndexWriterConfig(Version.LATEST, analyzer);
        IndexWriter indexWriter = new IndexWriter(directory, config);
        // 1）指定索引库的存放位置Directory对象
        // 2）指定一个分析器，对文档内容进行分析。
        // 第三步：创建field对象，将field添加到document对象中。
        File f = new File("D:\\Lucene&solr\\searchsource");
        File[] listFiles = f.listFiles();
        for (File file : listFiles) {
            // 第三步：创建document对象。
            Document document = new Document();
            // 文件名称
            String file_name = file.getName();
            Field fileNameField = new TextField("fileName", file_name, Store.YES);
            // 文件大小
            long file_size = FileUtils.sizeOf(file);
            Field fileSizeField = new LongField("fileSize", file_size, Store.YES);
            // 文件路径
            String file_path = file.getPath();
            Field filePathField = new StoredField("filePath", file_path);
            // 文件内容
            String file_content = FileUtils.readFileToString(file);
            Field fileContentField = new TextField("fileContent", file_content, Store.NO);
            document.add(fileNameField);
            document.add(fileSizeField);
            document.add(filePathField);
            document.add(fileContentField);
            // 第四步：使用indexwriter对象将document对象写入索引库，此过程进行索引创建。并将索引和document对象写入索引库。
            indexWriter.addDocument(document);
        }
        // 第五步：关闭IndexWriter对象。
        indexWriter.close();
    }
    // 搜索索引
    @Test
    public void testSearch() throws Exception {
        // 第一步：创建一个Directory对象，也就是索引库存放的位置。
        Directory directory = FSDirectory.open(new File("D:\\temp\\index"));// 磁盘
        // 第二步：创建一个indexReader对象，需要指定Directory对象。
        IndexReader indexReader = DirectoryReader.open(directory);
        // 第三步：创建一个indexsearcher对象，需要指定IndexReader对象
        IndexSearcher indexSearcher = new IndexSearcher(indexReader);
        // 第四步：创建一个TermQuery对象，指定查询的域和查询的关键词。
        Query query = new TermQuery(new Term("fileName", "lucene"));
        // 第五步：执行查询。
        TopDocs topDocs = indexSearcher.search(query, 10);
        // 第六步：返回查询结果。遍历查询结果并输出。
        ScoreDoc[] scoreDocs = topDocs.scoreDocs;
        for (ScoreDoc scoreDoc : scoreDocs) {
            int doc = scoreDoc.doc;
            Document document = indexSearcher.doc(doc);
            // 文件名称
            String fileName = document.get("fileName");
            System.out.println(fileName);
            // 文件内容
            String fileContent = document.get("fileContent");
            System.out.println(fileContent);
            // 文件大小
            String fileSize = document.get("fileSize");
            System.out.println(fileSize);
            // 文件路径
            String filePath = document.get("filePath");
            System.out.println(filePath);
            System.out.println("------------");
        }
        // 第七步：关闭IndexReader对象
        indexReader.close();
    }
    // 查看标准分析器的分词效果
    @Test
    public void testTokenStream() throws Exception {
        // 创建一个标准分析器对象
//        Analyzer analyzer = new StandardAnalyzer();
//        Analyzer analyzer = new CJKAnalyzer();
//        Analyzer analyzer = new SmartChineseAnalyzer();
        Analyzer analyzer = new IKAnalyzer();
        // 获得tokenStream对象
        // 第一个参数：域名，可以随便给一个
        // 第二个参数：要分析的文本内容
//        TokenStream tokenStream = analyzer.tokenStream("test",
//                "The Spring Framework provides a comprehensive programming and configuration model.");
        TokenStream tokenStream = analyzer.tokenStream("test",
                "高富帅可以用二维表结构来逻辑表达实现的数据");
        // 添加一个引用，可以获得每个关键词
        CharTermAttribute charTermAttribute = tokenStream.addAttribute(CharTermAttribute.class);
        // 添加一个偏移量的引用，记录了关键词的开始位置以及结束位置
        OffsetAttribute offsetAttribute = tokenStream.addAttribute(OffsetAttribute.class);
        // 将指针调整到列表的头部
        tokenStream.reset();
        // 遍历关键词列表，通过incrementToken方法判断列表是否结束
        while (tokenStream.incrementToken()) {
            // 关键词的起始位置
            System.out.println("start->" + offsetAttribute.startOffset());
            // 取关键词
            System.out.println(charTermAttribute);
            // 结束位置
            System.out.println("end->" + offsetAttribute.endOffset());
        }
        tokenStream.close();
    }
}

lucene索引维护，其实也就是索引的增、删、改、查。

/**
 * 索引维护
 * 添加  入门程序
 * 删除
 * 修改
 * 查询  入门程序 精准查询 
 * @author lx
 *
 */
public class LuceneManager {
    //
    public IndexWriter getIndexWriter() throws Exception{
        // 第一步：创建一个java工程，并导入jar包。
        // 第二步：创建一个indexwriter对象。
        Directory directory = FSDirectory.open(new File("D:\\temp\\index"));
        // Directory directory = new RAMDirectory();//保存索引到内存中 （内存索引库）
        Analyzer analyzer = new StandardAnalyzer();// 官方推荐
        IndexWriterConfig config = new IndexWriterConfig(Version.LATEST, analyzer);
        return new IndexWriter(directory, config);
    }
    //全删除
    @Test
    public void testAllDelete() throws Exception {
        IndexWriter indexWriter = getIndexWriter();
        indexWriter.deleteAll();
        indexWriter.close();
    }
    //根据条件删除
    @Test
    public void testDelete() throws Exception {
        IndexWriter indexWriter = getIndexWriter();
        Query query = new TermQuery(new Term("fileName","apache"));
        indexWriter.deleteDocuments(query);
        indexWriter.close();
    }
    //修改
    @Test
    public void testUpdate() throws Exception {
        IndexWriter indexWriter = getIndexWriter();
        Document doc = new Document();
        doc.add(new TextField("fileN", "测试文件名",Store.YES));
        doc.add(new TextField("fileC", "测试文件内容",Store.YES));
        indexWriter.updateDocument(new Term("fileName","lucene"), doc, new IKAnalyzer());
        indexWriter.close();
    }
    //IndexReader  IndexSearcher
    public IndexSearcher getIndexSearcher() throws Exception{
        // 第一步：创建一个Directory对象，也就是索引库存放的位置。
        Directory directory = FSDirectory.open(new File("D:\\temp\\index"));// 磁盘
        // 第二步：创建一个indexReader对象，需要指定Directory对象。
        IndexReader indexReader = DirectoryReader.open(directory);
        // 第三步：创建一个indexsearcher对象，需要指定IndexReader对象
        return new IndexSearcher(indexReader);
    }
    //执行查询的结果
    public void printResult(IndexSearcher indexSearcher,Query query)throws Exception{
        // 第五步：执行查询。
        TopDocs topDocs = indexSearcher.search(query, 10);
        // 第六步：返回查询结果。遍历查询结果并输出。
        ScoreDoc[] scoreDocs = topDocs.scoreDocs;
        for (ScoreDoc scoreDoc : scoreDocs) {
            int doc = scoreDoc.doc;
            Document document = indexSearcher.doc(doc);
            // 文件名称
            String fileName = document.get("fileName");
            System.out.println(fileName);
            // 文件内容
            String fileContent = document.get("fileContent");
            System.out.println(fileContent);
            // 文件大小
            String fileSize = document.get("fileSize");
            System.out.println(fileSize);
            // 文件路径
            String filePath = document.get("filePath");
            System.out.println(filePath);
            System.out.println("------------");
        }
    }
    //查询所有
    @Test
    public void testMatchAllDocsQuery() throws Exception {
        IndexSearcher indexSearcher = getIndexSearcher();
        Query query = new MatchAllDocsQuery();
        System.out.println(query);
        printResult(indexSearcher, query);
        //关闭资源
        indexSearcher.getIndexReader().close();
    }
    //根据数值范围查询
    @Test
    public void testNumericRangeQuery() throws Exception {
        IndexSearcher indexSearcher = getIndexSearcher();
        Query query = NumericRangeQuery.newLongRange("fileSize", 47L, 200L, false, true);
        System.out.println(query);
        printResult(indexSearcher, query);
        //关闭资源
        indexSearcher.getIndexReader().close();
    }
    //可以组合查询条件
    @Test
    public void testBooleanQuery() throws Exception {
        IndexSearcher indexSearcher = getIndexSearcher();
        BooleanQuery booleanQuery = new BooleanQuery();
        Query query1 = new TermQuery(new Term("fileName","apache"));
        Query query2 = new TermQuery(new Term("fileName","lucene"));
        //  select * from user where id =1 or name = 'safdsa'
        booleanQuery.add(query1, Occur.MUST);
        booleanQuery.add(query2, Occur.SHOULD);
        System.out.println(booleanQuery);
        printResult(indexSearcher, booleanQuery);
        //关闭资源
        indexSearcher.getIndexReader().close();
    }
    //条件解释的对象查询
    @Test
    public void testQueryParser() throws Exception {
        IndexSearcher indexSearcher = getIndexSearcher();
        //参数1： 默认查询的域  
        //参数2：采用的分析器
        QueryParser queryParser = new QueryParser("fileName",new IKAnalyzer());
        // *:*   域：值
        Query query = queryParser.parse("fileName:lucene is apache OR fileContent:lucene is apache");
        printResult(indexSearcher, query);
        //关闭资源
        indexSearcher.getIndexReader().close();
    }
    //条件解析的对象查询   多个默念域
    @Test
    public void testMultiFieldQueryParser() throws Exception {
        IndexSearcher indexSearcher = getIndexSearcher();
        String[] fields = {"fileName","fileContent"};
        //参数1： 默认查询的域  
        //参数2：采用的分析器
        MultiFieldQueryParser queryParser = new MultiFieldQueryParser(fields,new IKAnalyzer());
        // *:*   域：值
        Query query = queryParser.parse("lucene is apache");
        printResult(indexSearcher, query);
        //关闭资源
        indexSearcher.getIndexReader().close();
    }
}

以上就是lucene的基本使用方法。

solr

solr实现全文检索的流程：

配置solr服务器环境。  
配置中文分析器：  
    Schema.xml：  
        FieldType：域类型定义  
        Field：域定义  
        uniqueKey：组件  
        copyField：复制域，用于当查询一个条件的时候，查询多个域。（只发起一次请求）  
        dynamicField：动态域，用于添加一个可以动态改天域名的域，这样就可以方便我们添加域名。  
    安装ik分词器  
设置业务系统field（域）  
配置批量导入处理器

使用solr其实就是在lucene的基础上多了一些配置方法，让我们使用起来更加方便。所以，solr相应的配置起来会比较繁琐，相应的教程可以在网上搜到。

在配置好了相应的环境之后，我们可以通过自己的使用，看到solr自带的web界面的Query选项中的一些参数。熟悉一下都有什么用。

话不多说，solr的使用直接上代码吧。

public class JdDaoImpl implements JdDao {
    // 索引库
    @Autowired
    private SolrServer solrServer;
    // 单机版的SolrServer
    // String baseURL = "127.0.0.1:8080/solr/使用的core一般默认为core1"
    // SolrServer 是一个抽象类，所以不能直接new它，需要知道子类的实现
    // SolrServer solrServer = new HttpSolrServer（baseURL）
     通过上面四个条件查询对象商品结果集
    public List<ProductModel> selectProductModelListByQuery(String queryString, String catalog_name,
            String price,String sort) throws Exception {
        // 查询 关键词  过滤条件
        // 价格排序 分页 开始行 每页数 高亮 默认域 只查询指定域
        SolrQuery solrQuery = new SolrQuery();
        // 关键词
        solrQuery.setQuery(queryString);
        // 过滤条件 
        if(null != catalog_name && !"".equals(catalog_name)){
            solrQuery.set("fq", "product_catalog_name:" + catalog_name);
        }
        if(null != price && !"".equals(price)){
            //0-9   50-*
            String[] p = price.split("-");
            solrQuery.set("fq", "product_price:[" + p[0] + " TO " + p[1] + "]");
        }
        // 价格排序
        if("1".equals(sort)){
            solrQuery.addSort("product_price", ORDER.desc);
        }else{
            solrQuery.addSort("product_price", ORDER.asc);
        }
        // 分页
        solrQuery.setStart(0);
        solrQuery.setRows(16);
        // 默认域
        solrQuery.set("df", "product_keywords");
        // 只查询指定域
        solrQuery.set("fl", "id,product_name,product_price,product_picture");
        // 高亮
        // 打开开关
        solrQuery.setHighlight(true);
        // 指定高亮域
        solrQuery.addHighlightField("product_name");
        // 前缀
        solrQuery.setHighlightSimplePre("<span style='color:red'>");
        solrQuery.setHighlightSimplePost("</span>");
        // 后缀
        // 执行查询
        QueryResponse response = solrServer.query(solrQuery);
        // 文档结果集
        SolrDocumentList docs = response.getResults();
        Map<String, Map<String, List<String>>> highlighting = response.getHighlighting();
        // Map K id V Map
        // Map K 域名 V List
        // List list.get(0)
        // 总条数
        long numFound = docs.getNumFound();
        List<ProductModel> productModels = new ArrayList<ProductModel>();
        for (SolrDocument doc : docs) {
            ProductModel productModel = new ProductModel();
            productModel.setPid((String) doc.get("id"));
            productModel.setPrice((Float) doc.get("product_price"));
            productModel.setPicture((String) doc.get("product_picture"));
            Map<String, List<String>> map = highlighting.get((String) doc.get("id"));
            List<String> list = map.get("product_name");
            productModel.setName(list.get(0));
            productModels.add(productModel);
        }
        return productModels;
    }
}