繁体中文
设为首页
加入收藏
当前位置:JSP技术首页 >> 资料/其它 >> A simple example about full-text search based Java:Lucene

A simple example about full-text search based Java:Lucene

2005-04-15 08:00:00  作者:  来源:互联网  浏览次数:0  文字大小:【】【】【
简介:索引源代码: package lucene; /** * Title: * Description: * Copyright: Copyright (c) 2003 * Company: * @author Shirley * @version 1.0 */ import org.apache.lucene.index.*; import org.apache.lucen...

索引源代码:

package lucene;

/**

*

Title:

*

Description:

*

Copyright: Copyright (c) 2003

*

Company:

* @author Shirley

* @version 1.0

*/

import org.apache.lucene.index.*;

import org.apache.lucene.analysis.*;

import java.io.*;

import org.apache.lucene.document.*;

public class IndexFiles {

//使用方法:: IndexFiles [索引输出目录] [索引的文件列表] ...

public static void main(String[] arg) throws Exception {

String[] args = new String[2];

//索引后存放索引信息的路径

args[0] = System.getProperty("java.io.tmpdir", "tmp") + System.getProperty("file.separator") + "index-1";

//待索引文件

args[1] = "E:\\AppWork\\lucene\\rfc2047.txt";

args[2] = "E:\\AppWork\\cyberoffice\\CO\\Sheldon Java Mail.htm";

args[3] = "E:\\AppWork\\lucene\\englishtest.doc";

args[4] = "E:\\AppWork\\cyberoffice\\CO\\xls1.xls";

args[5] = "E:\\AppWork\\cyberoffice\\CO\\ppt1.ppt";

String indexPath = args[0];

IndexWriter writer;

//用指定的语言分析器构造一个新的写索引器(第3个参数表示是否为追加索引)

writer = new IndexWriter(indexPath, new SimpleAnalyzer(), false);

for (int i=1; i

System.out.println("Indexing file " + args[i]);

InputStream is = new FileInputStream(args[i]);

//构造包含2个字段Field的Document对象

//一个是路径path字段,不索引,只存储

//一个是内容body字段,进行全文索引,并存储

Document doc = new Document();

doc.add(Field.UnIndexed("path", args[i]));

doc.add(Field.Text("body", (Reader) new InputStreamReader(is)));

//将文档写入索引

writer.addDocument(doc);

is.close();

};

//关闭写索引器

writer.close();

}

}

搜索源代码:

package lucene;

/**

*

Title:

*

Description:

*

Copyright: Copyright (c) 2003

*

Company:

* @author Shirley

* @version 1.0

*/

import org.apache.lucene.search.*;

import org.apache.lucene.queryParser.*;

import org.apache.lucene.analysis.*;

public class Search {

public static void main(String[] arg) throws Exception {

String[] args = new String[2];

//索引后存放索引信息的路径

args[0] = System.getProperty("java.io.tmpdir", "tmp") + System.getProperty("file.separator") + "index-1";

//搜索關鍵字

args[1] = "sending";

String indexPath = args[0];

String queryString = args[1];

//指向索引目录的搜索器

Searcher searcher = new IndexSearcher(indexPath);

//查询解析器:使用和索引同样的语言分析器

Query query = QueryParser.parse(queryString, "body", new SimpleAnalyzer());

//搜索结果使用Hits存储

Hits hits = searcher.search(query);

//通过hits可以访问到相应字段的数据和查询的匹配度

for (int i=0; i

System.out.println(hits.doc(i).get("path") + "; Score: " + hits.score(i));

};

}

}

注:目前程序只支持英文索引,可以过滤文件类型为.txt .doc .htm .xls .ppt

中文索引及其它类型文件的索引正在研究中......

责任编辑:admin
相关文章