索引源代码:
package lucene;
/**
*
Title:
*
Description:
*
Copyright: Copyright (c) 2003
*
Company:
* @author Shirley
* @version 1.0
*/
import org.apache.lucene.index.*;
import org.apache.lucene.analysis.*;
import java.io.*;
import org.apache.lucene.document.*;
public class IndexFiles {
//使用方法:: IndexFiles [索引输出目录] [索引的文件列表] ...
public static void main(String[] arg) throws Exception {
String[] args = new String[2];
//索引后存放索引信息的路径
args[0] = System.getProperty("java.io.tmpdir", "tmp") + System.getProperty("file.separator") + "index-1";
//待索引文件
args[1] = "E:\\AppWork\\lucene\\rfc2047.txt";
args[2] = "E:\\AppWork\\cyberoffice\\CO\\Sheldon Java Mail.htm";
args[3] = "E:\\AppWork\\lucene\\englishtest.doc";
args[4] = "E:\\AppWork\\cyberoffice\\CO\\xls1.xls";
args[5] = "E:\\AppWork\\cyberoffice\\CO\\ppt1.ppt";
String indexPath = args[0];
IndexWriter writer;
//用指定的语言分析器构造一个新的写索引器(第3个参数表示是否为追加索引)
writer = new IndexWriter(indexPath, new SimpleAnalyzer(), false);
for (int i=1; i System.out.println("Indexing file " + args[i]); InputStream is = new FileInputStream(args[i]); //构造包含2个字段Field的Document对象 //一个是路径path字段,不索引,只存储 //一个是内容body字段,进行全文索引,并存储 Document doc = new Document(); doc.add(Field.UnIndexed("path", args[i])); doc.add(Field.Text("body", (Reader) new InputStreamReader(is))); //将文档写入索引 writer.addDocument(doc); is.close(); }; //关闭写索引器 writer.close(); } } 搜索源代码: package lucene; /** * Title:
*
Description:
*
Copyright: Copyright (c) 2003
*
Company:
* @author Shirley
* @version 1.0
*/
import org.apache.lucene.search.*;
import org.apache.lucene.queryParser.*;
import org.apache.lucene.analysis.*;
public class Search {
public static void main(String[] arg) throws Exception {
String[] args = new String[2];
//索引后存放索引信息的路径
args[0] = System.getProperty("java.io.tmpdir", "tmp") + System.getProperty("file.separator") + "index-1";
//搜索關鍵字
args[1] = "sending";
String indexPath = args[0];
String queryString = args[1];
//指向索引目录的搜索器
Searcher searcher = new IndexSearcher(indexPath);
//查询解析器:使用和索引同样的语言分析器
Query query = QueryParser.parse(queryString, "body", new SimpleAnalyzer());
//搜索结果使用Hits存储
Hits hits = searcher.search(query);
//通过hits可以访问到相应字段的数据和查询的匹配度
for (int i=0; i System.out.println(hits.doc(i).get("path") + "; Score: " + hits.score(i)); }; } } 注:目前程序只支持英文索引,可以过滤文件类型为.txt .doc .htm .xls .ppt 中文索引及其它类型文件的索引正在研究中......

