💎一站式轻松地调用各大LLM模型接口,支持GPT4、智谱、豆包、星火、月之暗面及文生图、文生视频 广告
### **1.创建索引的流程** ![](https://box.kancloud.cn/d2ed55648edec1acfd08107161dda914_554x289.png) ### 2.添加依赖 使用Lucene需要添加Lucene的依赖。 ~~~ lucene核心库    lucene-core 查询解析器       lucene-queryparser 默认分词器      lucene-analyzers-common IK分词器        ikanalyzer 高亮显示        lucene-highlighter ~~~ Maven工程中的依赖添加如下: ~~~ <?xml version="1.0" encoding="UTF-8"?> <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> <modelVersion>4.0.0</modelVersion> <groupId>com.igeekhome</groupId> <artifactId>lucene</artifactId> <version>1.0-SNAPSHOT</version> <dependencies> <!-- Junit单元测试 --> <dependency> <groupId>junit</groupId> <artifactId>junit</artifactId> <version>4.12</version> </dependency> <!-- lucene核心库 --> <dependency> <groupId>org.apache.lucene</groupId> <artifactId>lucene-core</artifactId> <version>4.10.2</version> </dependency> <!-- Lucene的查询解析器 --> <dependency> <groupId>org.apache.lucene</groupId> <artifactId>lucene-queryparser</artifactId> <version>4.10.2</version> </dependency> <!-- lucene的默认分词器库 --> <dependency> <groupId>org.apache.lucene</groupId> <artifactId>lucene-analyzers-common</artifactId> <version>4.10.2</version> </dependency> <!-- lucene的高亮显示 --> <dependency> <groupId>org.apache.lucene</groupId> <artifactId>lucene-highlighter</artifactId> <version>4.10.2</version> </dependency> <!-- IK分词器 --> <dependency> <groupId>com.janeluo</groupId> <artifactId>ikanalyzer</artifactId> <version>2012_u6</version> </dependency> </dependencies> </project> ~~~ ~~~ ////demo1代码 package com.igeekhome.lucene; import org.apache.lucene.analysis.cn.ChineseAnalyzer; import org.apache.lucene.analysis.core.SimpleAnalyzer; import org.apache.lucene.document.*; import org.apache.lucene.index.*; import org.apache.lucene.queryparser.classic.ParseException; import org.apache.lucene.queryparser.classic.QueryParser; import org.apache.lucene.search.*; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.util.Version; import org.junit.Test; import org.wltea.analyzer.lucene.IKAnalyzer; import java.io.File; import java.io.IOException; public class Demo01 { /* 创建索引 */ @Test public void createIndex() throws IOException { //1.创建文档对象 //Document document = new Document(); //给文档添加字段 //StringField 默认不分词 TextField:分词 //参数:name:字段名 value:字段值 Store:是否存储 //document.add(new IntField("did",2,Field.Store.YES)); // document.add(new IntField("did",5,Field.Store.YES)); //document.add(new TextField("dtitle","id为2的string类型新的字段",Field.Store.YES)); /*Document document02 = new Document(); //给文档添加字段 //StringField 默认不分词 TextField:分词 //参数:name:字段名 value:字段值 Store:是否存储 document02.add(new StringField("did","2",Field.Store.YES)); document02.add(new TextField("dtitle","极客营,专业IT培训100年啊!",Field.Store.YES)); */ //2.创建索引写入器对象 //2.1 创建索引库的目录对象 绝对路径:D:\indexDir 相对路径 Directory d = FSDirectory.open(new File("indexDir")); //2.2 索引写入器配置类对象 IKAnalyzer:中文分词器 //参数:1.Lucene版本号 2.分词器对象 IndexWriterConfig cofg = new IndexWriterConfig(Version.LATEST,new IKAnalyzer()); IndexWriter indexWriter = new IndexWriter(d,cofg); //3.将文档写入索引库 for(int i = 20;i<25;i++){ Document document = new Document(); document.add(new IntField("did",i,Field.Store.YES)); document.add(new TextField("dtitle","id为"+i+"的新的内容",Field.Store.YES)); indexWriter.addDocument(document); } //indexWriter.addDocument(document); //indexWriter.addDocument(document02); //4.提交 indexWriter.commit(); //5.关闭资源 indexWriter.close(); } /** * 查询索引 * 1.基本查询 */ @Test public void searchIndex() throws IOException, ParseException { //创建索引库目录对象 Directory d = FSDirectory.open(new File("indexDir")); //创建索引读取对象 IndexReader reader = DirectoryReader.open(d); //创建索引查询对象 IndexSearcher indexSearcher = new IndexSearcher(reader); //创建查询解析器对象 //参数:1.查询字段 2.分词器 QueryParser parser = new QueryParser("dtitle",new IKAnalyzer()); //通过查询解析器解析查询条件 Query query = parser.parse("绮"); //执行查询 并返回查询结果 //参数:1. 查询条件 2.每页条数 TopDocs topDocs = indexSearcher.search(query, 10); System.out.println("共查询出"+topDocs.totalHits+"条数据;"); // 获取文档得分数组 ScoreDoc[] scoreDocs = topDocs.scoreDocs; for(ScoreDoc scoreDoc : scoreDocs){ int docId = scoreDoc.doc;//获取文档的id 索引下标 //根据文档id获取文档对象 Document document = reader.document(docId); System.out.println("ID:"+document.get("did")+";Title:"+document.get("dtitle"));//获取根据文档字段获取字段的值 } } /** * 2.词条(经过全文检索后所产生的每一个单词,是不可分割的)查询 */ @Test public void searchIndex_Term() throws IOException, ParseException { //创建索引库目录对象 Directory d = FSDirectory.open(new File("indexDir")); //创建索引读取对象 IndexReader reader = DirectoryReader.open(d); //创建索引查询对象 IndexSearcher indexSearcher = new IndexSearcher(reader); //通过词条查询对象 Query query = new TermQuery(new Term("dtitle","张雨绮"));//参数:1.查询字段 2.查询词条 //执行查询 并返回查询结果 TopDocs topDocs = indexSearcher.search(query, 10); System.out.println("共查询出"+topDocs.totalHits+"条数据;"); // 获取文档得分数组 ScoreDoc[] scoreDocs = topDocs.scoreDocs; for(ScoreDoc scoreDoc : scoreDocs){ int docId = scoreDoc.doc;//获取文档的id 索引下标 //根据文档id获取文档对象 Document document = reader.document(docId); System.out.println("ID:"+document.get("did")+";Title:"+document.get("dtitle"));//获取根据文档字段获取字段的值 } } /** * 封装查询方法 */ public void searchIndex_Basic(Query query) throws IOException, ParseException { //创建索引库目录对象 Directory d = FSDirectory.open(new File("indexDir")); //创建索引读取对象 IndexReader reader = DirectoryReader.open(d); //创建索引查询对象 IndexSearcher indexSearcher = new IndexSearcher(reader); //执行查询 并返回查询结果 TopDocs topDocs = indexSearcher.search(query, 20); System.out.println("共查询出"+topDocs.totalHits+"条数据;"); // 获取文档得分数组 ScoreDoc[] scoreDocs = topDocs.scoreDocs; for(ScoreDoc scoreDoc : scoreDocs){ int docId = scoreDoc.doc;//获取文档的id 索引下标 //根据文档id获取文档对象 Document document = reader.document(docId); System.out.println("ID:"+document.get("did")+";Title:"+document.get("dtitle"));//获取根据文档字段获取字段的值 } } /** * 3.通配符查询 * * 任意数量字符 * ? 单个字符 */ @Test public void searchIndex_WildCard() throws IOException, ParseException { //创建通配符查询条件 Query query = new WildcardQuery(new Term("dtitle","??绮")); searchIndex_Basic(query); } /** * 4.模糊查询 * * 编辑距离:由错误的字符串改成正确的字符串需要修改的字符的数量 * * 一般情况下,默认最大编辑距离2,也可以修改默认的最大编辑距离 */ @Test public void searchIndex_Fuzzy() throws IOException, ParseException { //创建通配符查询条件 Query query = new FuzzyQuery(new Term("dtitle","长江器号"),1); searchIndex_Basic(query); } /** * 5.数值范围查询 */ @Test public void searchIndex_NumberRange() throws IOException, ParseException { //创建查询条件 查询id 在 2-4之间的数据 //参数:1.查询字段 2.范围最小值 3.范围最大值 4.是否包含最小值 5.是否包含最大值 Query query = NumericRangeQuery.newIntRange("did",2,4,true,true); searchIndex_Basic(query); } /** * 6.布尔查询 组合查询 * 本身是没有查询条件的,主要是为了整合、组合其他查询条件 */ @Test public void searchIndex_Boolean() throws IOException, ParseException { //创建查询条件 查询id 在 2-4之间的数据 //参数:1.查询字段 2.范围最小值 3.范围最大值 4.是否包含最小值 5.是否包含最大值 Query query01 = NumericRangeQuery.newIntRange("did",2,10,true,true); Query query02 = NumericRangeQuery.newIntRange("did",5,16,true,true); //searchIndex_Basic(query); BooleanQuery booleanQuery = new BooleanQuery(); //参数: 1. 查询条件 2.组合条件 Occur.MUST 必须满足 Occur.SHOULD Occur.MUST_NOT /*booleanQuery.add(query01,BooleanClause.Occur.MUST); booleanQuery.add(query02,BooleanClause.Occur.MUST); 交集*/ /*booleanQuery.add(query01,BooleanClause.Occur.SHOULD); booleanQuery.add(query02,BooleanClause.Occur.SHOULD); 并集*/ booleanQuery.add(query01,BooleanClause.Occur.MUST); booleanQuery.add(query02,BooleanClause.Occur.MUST_NOT);//差集 searchIndex_Basic(booleanQuery); } /* * 更新索引 * */ @Test public void updateIndex() throws IOException { //需要更新的文档 Document document = new Document(); document.add(new IntField("did",1,Field.Store.YES)); document.add(new TextField("dtitle","这是id为1的新的字段",Field.Store.YES)); Directory d = FSDirectory.open(new File("indexDir")); //2.2 索引写入器配置类对象 IKAnalyzer:中文分词器 //参数:1.Lucene版本号 2.分词器对象 IndexWriterConfig cofg = new IndexWriterConfig(Version.LATEST,new IKAnalyzer()); IndexWriter indexWriter = new IndexWriter(d,cofg); //参数:1.词条对象(更新条件) 2.需要更新的文档 indexWriter.updateDocument(new Term("did","1"),document); indexWriter.commit(); indexWriter.close(); } /* * 删除索引 * */ @Test public void deleteIndex() throws IOException { Directory d = FSDirectory.open(new File("indexDir")); //2.2 索引写入器配置类对象 IKAnalyzer:中文分词器 //参数:1.Lucene版本号 2.分词器对象 IndexWriterConfig cofg = new IndexWriterConfig(Version.LATEST,new IKAnalyzer()); IndexWriter indexWriter = new IndexWriter(d,cofg); //删除dtitle中包含“新” //根据词条进行删除 //indexWriter.deleteDocuments(new Term("dtitle","新")); //删除id为2的索引 //indexWriter.deleteDocuments(new Term("did","2")); indexWriter.deleteDocuments(NumericRangeQuery.newIntRange("did",2,2,true,true)); indexWriter.commit(); indexWriter.close(); } } ~~~ ~~~ //demo2 package com.igeekhome.lucene; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.IntField; import org.apache.lucene.document.TextField; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.queryparser.classic.ParseException; import org.apache.lucene.queryparser.classic.QueryParser; import org.apache.lucene.search.*; import org.apache.lucene.search.highlight.*; import org.apache.lucene.search.highlight.Scorer; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.util.Version; import org.junit.Test; import org.wltea.analyzer.lucene.IKAnalyzer; import java.io.File; import java.io.IOException; public class Demo02 { /*高亮*/ @Test public void highlighter() throws IOException, ParseException, InvalidTokenOffsetsException { //创建索引库目录对象 Directory d = FSDirectory.open(new File("indexDir")); //创建索引读取对象 IndexReader reader = DirectoryReader.open(d); //创建索引查询对象 IndexSearcher indexSearcher = new IndexSearcher(reader); //创建查询解析器对象 //参数:1.查询字段 2.分词器 QueryParser parser = new QueryParser("dtitle",new IKAnalyzer()); //通过查询解析器解析查询条件 Query query = parser.parse("新"); //高亮格式化对象 并设置前缀后缀 Formatter formatter = new SimpleHTMLFormatter("<em style='color:red'>","</em>"); //得分对象 Scorer scorer = new QueryScorer(query,"dtitle"); //创建高亮工具 Highlighter highlighter = new Highlighter(formatter,scorer); //执行查询 并返回查询结果 //参数:1. 查询条件 2.每页条数 TopDocs topDocs = indexSearcher.search(query, 10); System.out.println("共查询出"+topDocs.totalHits+"条数据;"); // 获取文档得分数组 ScoreDoc[] scoreDocs = topDocs.scoreDocs; for(ScoreDoc scoreDoc : scoreDocs){ int docId = scoreDoc.doc;//获取文档的id 索引下标 //根据文档id获取文档对象 Document document = reader.document(docId); String title = document.get("dtitle"); //获取高亮字段的值 String h_title = highlighter.getBestFragment(new IKAnalyzer(), "dtitle", title); System.out.println("ID:"+document.get("did")+";Title:"+h_title);//获取根据文档字段获取字段的值 } } /*排序*/ @Test public void sort() throws IOException, ParseException, InvalidTokenOffsetsException { //创建索引库目录对象 Directory d = FSDirectory.open(new File("indexDir")); //创建索引读取对象 IndexReader reader = DirectoryReader.open(d); //创建索引查询对象 IndexSearcher indexSearcher = new IndexSearcher(reader); //创建查询解析器对象 //参数:1.查询字段 2.分词器 QueryParser parser = new QueryParser("dtitle",new IKAnalyzer()); //通过查询解析器解析查询条件 Query query = parser.parse("新"); //SortField 排序字段 参数:1.需要排序的字段 2.该字段的类型 3.默认是升序 如果需要降序 则为true Sort sort = new Sort(new SortField("did",SortField.Type.INT,true)); //执行查询 并返回查询结果 //参数:1. 查询条件 2.每页条数 3.排序对象 TopDocs topDocs = indexSearcher.search(query,10,sort); System.out.println("共查询出"+topDocs.totalHits+"条数据;"); // 获取文档得分数组 ScoreDoc[] scoreDocs = topDocs.scoreDocs; for(ScoreDoc scoreDoc : scoreDocs){ int docId = scoreDoc.doc;//获取文档的id 索引下标 //根据文档id获取文档对象 Document document = reader.document(docId); String title = document.get("dtitle"); System.out.println("ID:"+document.get("did")+";Title:"+title);//获取根据文档字段获取字段的值 } } /*分页*/ /*排序*/ @Test public void page() throws IOException, ParseException, InvalidTokenOffsetsException { int page = 4;//当前页 int nums = 10;//每页条数 int start = (page-1)*nums; int end = start+nums; //创建索引库目录对象 Directory d = FSDirectory.open(new File("indexDir")); //创建索引读取对象 IndexReader reader = DirectoryReader.open(d); //创建索引查询对象 IndexSearcher indexSearcher = new IndexSearcher(reader); //创建查询解析器对象 //参数:1.查询字段 2.分词器 QueryParser parser = new QueryParser("dtitle",new IKAnalyzer()); //通过查询解析器解析查询条件 Query query = parser.parse("内容"); //SortField 排序字段 参数:1.需要排序的字段 2.该字段的类型 3.默认是升序 如果需要降序 则为true Sort sort = new Sort(new SortField("did",SortField.Type.INT,true)); //执行查询 并返回查询结果 //参数:1. 查询条件 2.每页条数 3.排序对象 TopDocs topDocs = indexSearcher.search(query,end,sort); System.out.println("共查询出"+topDocs.totalHits+"条数据;"); // 获取文档得分数组 ScoreDoc[] scoreDocs = topDocs.scoreDocs; //for(ScoreDoc scoreDoc : scoreDocs){ for(int i = start;i<end;i++){ if(i>=scoreDocs.length){ break; } int docId = scoreDocs[i].doc;//获取文档的id 索引下标 //根据文档id获取文档对象 Document document = reader.document(docId); String title = document.get("dtitle"); System.out.println("ID:"+document.get("did")+";Title:"+title);//获取根据文档字段获取字段的值 } } /*权重*/ @Test public void boost() throws IOException, ParseException, InvalidTokenOffsetsException { //创建索引库目录对象 Directory d = FSDirectory.open(new File("indexDir")); //创建索引读取对象 IndexReader reader = DirectoryReader.open(d); //创建索引查询对象 IndexSearcher indexSearcher = new IndexSearcher(reader); //创建查询解析器对象 //参数:1.查询字段 2.分词器 QueryParser parser = new QueryParser("dtitle",new IKAnalyzer()); //通过查询解析器解析查询条件 Query query = parser.parse("新"); //执行查询 并返回查询结果 //参数:1. 查询条件 2.每页条数 3.排序对象 TopDocs topDocs = indexSearcher.search(query,10); System.out.println("共查询出"+topDocs.totalHits+"条数据;"); // 获取文档得分数组 ScoreDoc[] scoreDocs = topDocs.scoreDocs; for(ScoreDoc scoreDoc : scoreDocs){ int docId = scoreDoc.doc;//获取文档的id 索引下标 //根据文档id获取文档对象 Document document = reader.document(docId); String title = document.get("dtitle"); System.out.println("ID:"+document.get("did")+";Title:"+title);//获取根据文档字段获取字段的值 } } /* 创建索引 */ @Test public void createIndex() throws IOException { //2.创建索引写入器对象 //2.1 创建索引库的目录对象 绝对路径:D:\indexDir 相对路径 Directory d = FSDirectory.open(new File("indexDir")); //2.2 索引写入器配置类对象 IKAnalyzer:中文分词器 //参数:1.Lucene版本号 2.分词器对象 IndexWriterConfig cofg = new IndexWriterConfig(Version.LATEST,new IKAnalyzer()); IndexWriter indexWriter = new IndexWriter(d,cofg); Document document = new Document(); document.add(new IntField("did",100,Field.Store.YES)); //设置权重 让查询结果靠前 默认每个字段权重都是1 TextField textField = new TextField("dtitle", "id为100的新的内容", Field.Store.YES); textField.setBoost(2);//设置权重 默认是1 document.add(textField); indexWriter.addDocument(document); //4.提交 indexWriter.commit(); //5.关闭资源 indexWriter.close(); } } ~~~