查询
代码语言:javascript
复制 public class LuceneQuery : ILuceneQuery
{
#region Identity
private Logger logger = new Logger(typeof(LuceneQuery));
#endregion Identity
#region QueryIndex
/// <summary>
/// 获取商品信息数据
/// </summary>
/// <param name="queryString"></param>
/// <returns></returns>
public List<Commodity> QueryIndex(string queryString)
{
IndexSearcher searcher = null;
try
{
List<Commodity> ciList = new List<Commodity>();
Directory dir = FSDirectory.Open(StaticConstant.IndexPath);
searcher = new IndexSearcher(dir);
Analyzer analyzer = new PanGuAnalyzer();
//--------------------------------------这里配置搜索条件
QueryParser parser = new QueryParser(Version.LUCENE_30, "title", analyzer);
Query query = parser.Parse(queryString);
Console.WriteLine(query.ToString()); //显示搜索表达式
TopDocs docs = searcher.Search(query, (Filter)null, 10000);
foreach (ScoreDoc sd in docs.ScoreDocs)
{
Document doc = searcher.Doc(sd.Doc);
ciList.Add(DocumentToCommodityInfo(doc));
}
return ciList;
}
finally
{
if (searcher != null)
{
searcher.Dispose();
}
}
}
/// <summary>
/// 分页获取商品信息数据
/// </summary>
/// <param name="queryString"></param>
/// <param name="pageIndex">第一页为1</param>
/// <param name="pageSize"></param>
/// <param name="totalCount"></param>
/// <returns></returns>
public List<Commodity> QueryIndexPage(string queryString, int pageIndex, int pageSize, out int totalCount, string priceFilter, string priceOrderBy)
{
totalCount = 0;
IndexSearcher searcher = null;
try
{
List<Commodity> ciList = new List<Commodity>();
FSDirectory dir = FSDirectory.Open(StaticConstant.IndexPath);
searcher = new IndexSearcher(dir);
Analyzer analyzer = new PanGuAnalyzer();
//--------------------------------------这里配置搜索条件
QueryParser parser = new QueryParser(Version.LUCENE_30, "title", analyzer);
Query query = parser.Parse(queryString);
pageIndex = Math.Max(1, pageIndex);//索引从1开始
int startIndex = (pageIndex - 1) * pageSize;
int endIndex = pageIndex * pageSize;
NumericRangeFilter<float> numPriceFilter = null;
if (!string.IsNullOrWhiteSpace(priceFilter))
{
bool isContainStart = priceFilter.StartsWith("[");
bool isContainEnd = priceFilter.EndsWith("]");
string[] floatArray = priceFilter.Replace("[", "").Replace("]", "").Replace("{", "").Replace("}", "").Split(',');
float start = 0;
float end = 0;
if (!float.TryParse(floatArray[0], out start) || !float.TryParse(floatArray[1], out end))
{
throw new Exception("Wrong priceFilter");
}
numPriceFilter = NumericRangeFilter.NewFloatRange("price", start, end, isContainStart, isContainEnd);
}
Sort sort = new Sort();
if (!string.IsNullOrWhiteSpace(priceOrderBy))
{
SortField sortField = new SortField("price", SortField.FLOAT, priceOrderBy.EndsWith("asc", StringComparison.CurrentCultureIgnoreCase));
sort.SetSort(sortField);
}
TopDocs docs = searcher.Search(query, numPriceFilter, 10000, sort);
//TopDocs docs = searcher.Search(query, null, 10000);
totalCount = docs.TotalHits;
//PrintScores(docs, startIndex, endIndex, searcher);
for (int i = startIndex; i < endIndex && i < totalCount; i )
{
Document doc = searcher.Doc(docs.ScoreDocs[i].Doc);
ciList.Add(DocumentToCommodityInfo(doc));
}
return ciList;
}
finally
{
if (searcher != null)
{
searcher.Dispose();
}
}
}
private void PrintScores(TopDocs docs, int startIndex, int endIndex, MultiSearcher searcher)
{
ScoreDoc[] scoreDocs = docs.ScoreDocs;
for (int i = startIndex; i < endIndex && i < scoreDocs.Count(); i )
{
int docId = scoreDocs[i].Doc;
Document doc = searcher.Doc(docId);
logger.Info(string.Format("{0}的分值为{1}", doc.Get("productid"), scoreDocs[i].Score));
}
}
#endregion QueryIndex
#region private
private Commodity DocumentToCommodityInfo(Document doc)
{
return new Commodity()
{
Id = int.Parse(doc.Get("id")),
Title = doc.Get("title"),
ProductId = long.Parse(doc.Get("productid")),
CategoryId = int.Parse(doc.Get("categoryid")),
ImageUrl = doc.Get("iamgeurl"),
Price = decimal.Parse(doc.Get("price")),
Url = doc.Get("url")
};
}
#endregion private
}
批量/单个索引的增删改
代码语言:javascript
复制 /// <summary>
/// 多线程的问题 :多文件写,然后合并
/// 延时:异步队列
///
/// </summary>
public class LuceneBulid : ILuceneBulid
{
#region Identity
private Logger logger = new Logger(typeof(LuceneBulid));
#endregion Identity
#region 批量BuildIndex 索引合并
/// <summary>
/// 批量创建索引(要求是统一的sourceflag,即目录是一致的)
/// </summary>
/// <param name="ciList">sourceflag统一的</param>
/// <param name="pathSuffix">索引目录后缀,加在电商的路径后面,为空则为根目录.如sa1</param>
/// <param name="isCreate">默认为false 增量索引 true的时候删除原有索引</param>
public void BuildIndex(List<Commodity> ciList, string pathSuffix = "", bool isCreate = false)
{
IndexWriter writer = null;
try
{
if (ciList == null || ciList.Count == 0)
{
return;
}
string rootIndexPath = StaticConstant.IndexPath;
string indexPath = string.IsNullOrWhiteSpace(pathSuffix) ? rootIndexPath : string.Format("{0}\{1}", rootIndexPath, pathSuffix);
DirectoryInfo dirInfo = Directory.CreateDirectory(indexPath);
LuceneIO.Directory directory = LuceneIO.FSDirectory.Open(dirInfo);
writer = new IndexWriter(directory, new PanGuAnalyzer(), isCreate, IndexWriter.MaxFieldLength.LIMITED);
//writer = new IndexWriter(directory, CreateAnalyzerWrapper(), isCreate, IndexWriter.MaxFieldLength.LIMITED);
writer.SetMaxBufferedDocs(100);//控制写入一个新的segent前内存中保存的doc的数量 默认10
writer.MergeFactor = 100;//控制多个segment合并的频率,默认10
writer.UseCompoundFile = true;//创建复合文件 减少索引文件数量
ciList.ForEach(c => CreateCIIndex(writer, c));
}
finally
{
if (writer != null)
{
//writer.Optimize(); 创建索引的时候不做合并 merge的时候处理
writer.Close();
}
}
}
/// <summary>
/// 将索引合并到上级目录
/// </summary>
/// <param name="sourceDir">子文件夹名</param>
public void MergeIndex(string[] childDirs)
{
Console.WriteLine("MergeIndex Start");
IndexWriter writer = null;
try
{
if (childDirs == null || childDirs.Length == 0) return;
Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_30);
string rootPath = StaticConstant.IndexPath;
DirectoryInfo dirInfo = Directory.CreateDirectory(rootPath);
LuceneIO.Directory directory = LuceneIO.FSDirectory.Open(dirInfo);
writer = new IndexWriter(directory, analyzer, true, IndexWriter.MaxFieldLength.LIMITED);//删除原有的
LuceneIO.Directory[] dirNo = childDirs.Select(dir => LuceneIO.FSDirectory.Open(Directory.CreateDirectory(string.Format("{0}\{1}", rootPath, dir)))).ToArray();
writer.MergeFactor = 100;//控制多个segment合并的频率,默认10
writer.UseCompoundFile = true;//创建符合文件 减少索引文件数量
writer.AddIndexesNoOptimize(dirNo);
}
finally
{
if (writer != null)
{
writer.Optimize();
writer.Close();
}
Console.WriteLine("MergeIndex End");
}
}
//Field.Store.YES:存储字段值(未分词前的字段值)
//Field.Store.NO:不存储,存储与索引没有关系
//Field.Store.COMPRESS:压缩存储,用于长文本或二进制,但性能受损
//Field.Index.ANALYZED:分词建索引
//Field.Index.ANALYZED_NO_NORMS:分词建索引,但是Field的值不像通常那样被保存,而是只取一个byte,这样节约存储空间
//Field.Index.NOT_ANALYZED:不分词且索引
//Field.Index.NOT_ANALYZED_NO_NORMS:不分词建索引,Field的值去一个byte保存
//TermVector表示文档的条目(由一个Document和Field定位)和它们在当前文档中所出现的次数
//Field.TermVector.YES:为每个文档(Document)存储该字段的TermVector
//Field.TermVector.NO:不存储TermVector
// Field.TermVector.WITH_POSITIONS:存储位置
//Field.TermVector.WITH_OFFSETS:存储偏移量
//Field.TermVector.WITH_POSITIONS_OFFSETS:存储位置和偏移量
#endregion 批量BuildIndex 索引合并
#region 单个/批量索引增删改
/// <summary>
/// 新增一条数据的索引
/// </summary>
/// <param name="ci"></param>
public void InsertIndex(Commodity ci)
{
IndexWriter writer = null;
try
{
if (ci == null) return;
string rootIndexPath = StaticConstant.IndexPath;
DirectoryInfo dirInfo = Directory.CreateDirectory(rootIndexPath);
bool isCreate = dirInfo.GetFiles().Count() == 0;//下面没有文件则为新建索引
LuceneIO.Directory directory = LuceneIO.FSDirectory.Open(dirInfo);
writer = new IndexWriter(directory, CreateAnalyzerWrapper(), isCreate, IndexWriter.MaxFieldLength.LIMITED);
writer.MergeFactor = 100;//控制多个segment合并的频率,默认10
writer.UseCompoundFile = true;//创建符合文件 减少索引文件数量
CreateCIIndex(writer, ci);
}
catch (Exception ex)
{
logger.Error("InsertIndex异常", ex);
throw ex;
}
finally
{
if (writer != null)
{
//if (fileNum > 50)
// writer.Optimize();
writer.Close();
}
}
}
/// <summary>
/// 批量新增数据的索引
/// </summary>
/// <param name="ciList"></param>
public void InsertIndexMuti(List<Commodity> ciList)
{
BuildIndex(ciList, "", false);
}
/// <summary>
/// 批量删除数据的索引
/// </summary>
/// <param name="ciList"></param>
public void DeleteIndexMuti(List<Commodity> ciList)
{
IndexReader reader = null;
try
{
if (ciList == null || ciList.Count == 0) return;
Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_30);
string rootIndexPath = StaticConstant.IndexPath;
DirectoryInfo dirInfo = Directory.CreateDirectory(rootIndexPath);
LuceneIO.Directory directory = LuceneIO.FSDirectory.Open(dirInfo);
reader = IndexReader.Open(directory, false);
foreach (Commodity ci in ciList)
{
reader.DeleteDocuments(new Term("productid", ci.ProductId.ToString()));
}
}
catch (Exception ex)
{
logger.Error("DeleteIndex异常", ex);
throw ex;
}
finally
{
if (reader != null)
{
reader.Dispose();
}
}
}
/// <summary>
/// 删除多条数据的索引
/// </summary>
/// <param name="ci"></param>
public void DeleteIndex(Commodity ci)
{
IndexReader reader = null;
try
{
if (ci == null) return;
Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_30);
string rootIndexPath = StaticConstant.IndexPath;
DirectoryInfo dirInfo = Directory.CreateDirectory(rootIndexPath);
LuceneIO.Directory directory = LuceneIO.FSDirectory.Open(dirInfo);
reader = IndexReader.Open(directory, false);
reader.DeleteDocuments(new Term("productid", ci.ProductId.ToString()));
}
catch (Exception ex)
{
logger.Error("DeleteIndex异常", ex);
throw ex;
}
finally
{
if (reader != null)
{
reader.Dispose();
}
}
}
/////// <summary>
/////// 更新一条数据的索引
/////// </summary>
//public void UpdateIndex(Commodity ci)
//{
// DeleteIndex(ci);
// InsertIndex(ci);
//}
/// <summary>
/// 更新一条数据的索引
/// </summary>
/// <param name="ci"></param>
public void UpdateIndex(Commodity ci)
{
IndexWriter writer = null;
try
{
if (ci == null) return;
string rootIndexPath = StaticConstant.IndexPath;
DirectoryInfo dirInfo = Directory.CreateDirectory(rootIndexPath);
bool isCreate = dirInfo.GetFiles().Count() == 0;//下面没有文件则为新建索引
LuceneIO.Directory directory = LuceneIO.FSDirectory.Open(dirInfo);
writer = new IndexWriter(directory, CreateAnalyzerWrapper(), isCreate, IndexWriter.MaxFieldLength.LIMITED);
writer.MergeFactor = 100;//控制多个segment合并的频率,默认10
writer.UseCompoundFile = true;//创建符合文件 减少索引文件数量
writer.UpdateDocument(new Term("productid", ci.ProductId.ToString()), ParseCItoDoc(ci));
}
catch (Exception ex)
{
logger.Error("InsertIndex异常", ex);
throw ex;
}
finally
{
if (writer != null)
{
//if (fileNum > 50)
// writer.Optimize();
writer.Close();
}
}
}
/// <summary>
/// 批量更新数据的索引
/// </summary>
/// <param name="ciList">sourceflag统一的</param>
public void UpdateIndexMuti(List<Commodity> ciList)
{
IndexWriter writer = null;
try
{
if (ciList == null || ciList.Count == 0) return;
string rootIndexPath = StaticConstant.IndexPath;
DirectoryInfo dirInfo = Directory.CreateDirectory(rootIndexPath);
bool isCreate = dirInfo.GetFiles().Count() == 0;//下面没有文件则为新建索引
LuceneIO.Directory directory = LuceneIO.FSDirectory.Open(dirInfo);
writer = new IndexWriter(directory, CreateAnalyzerWrapper(), isCreate, IndexWriter.MaxFieldLength.LIMITED);
writer.MergeFactor = 50;//控制多个segment合并的频率,默认10
writer.UseCompoundFile = true;//创建符合文件 减少索引文件数量
foreach (Commodity ci in ciList)
{
writer.UpdateDocument(new Term("productid", ci.ProductId.ToString()), ParseCItoDoc(ci));
}
}
catch (Exception ex)
{
logger.Error("InsertIndex异常", ex);
throw ex;
}
finally
{
if (writer != null)
{
//if (fileNum > 50)
// writer.Optimize();
writer.Close();
}
}
}
#endregion 单个索引增删改
#region PrivateMethod
/// <summary>
/// 创建分析器
/// </summary>
/// <returns></returns>
private PerFieldAnalyzerWrapper CreateAnalyzerWrapper()
{
Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_30);
PerFieldAnalyzerWrapper analyzerWrapper = new PerFieldAnalyzerWrapper(analyzer);
analyzerWrapper.AddAnalyzer("title", new PanGuAnalyzer());
analyzerWrapper.AddAnalyzer("categoryid", new StandardAnalyzer(Version.LUCENE_30));
return analyzerWrapper;
}
/// <summary>
/// 创建索引
/// </summary>
/// <param name="analyzer"></param>
/// <param name="title"></param>
/// <param name="content"></param>
private void CreateCIIndex(IndexWriter writer, Commodity ci)
{
try
{
writer.AddDocument(ParseCItoDoc(ci));
}
catch (Exception ex)
{
logger.Error("CreateCIIndex异常", ex);
throw ex;
}
}
/// <summary>
/// 将Commodity转换成doc
/// </summary>
/// <param name="ci"></param>
/// <returns></returns>
private Document ParseCItoDoc(Commodity ci)
{
Document doc = new Document();
doc.Add(new Field("id", ci.Id.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED));
doc.Add(new Field("title", ci.Title, Field.Store.YES, Field.Index.ANALYZED));//盘古分词
doc.Add(new Field("productid", ci.ProductId.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED));
doc.Add(new Field("categoryid", ci.CategoryId.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED));
doc.Add(new Field("imageurl", ci.ImageUrl, Field.Store.YES, Field.Index.NOT_ANALYZED));
doc.Add(new Field("url", ci.Url, Field.Store.YES, Field.Index.NOT_ANALYZED));
doc.Add(new NumericField("price", Field.Store.YES, true).SetFloatValue((float)ci.Price));
return doc;
}
#endregion PrivateMethod
}
分词器封装
代码语言:javascript
复制 public class LuceneAnalyze : ILuceneAnalyze
{
private Logger logger = new Logger(typeof(LuceneAnalyze));
//
#region AnalyzerKey
/// <summary>
/// 将搜索的keyword分词
/// 通过or 链接;查询更多的数据(贪婪查询)
/// </summary>
/// <param name="keyword"></param>
/// <returns></returns>
public string[] AnalyzerKey(string keyword)
{
Analyzer analyzer = new PanGuAnalyzer();
QueryParser parser = new QueryParser(Version.LUCENE_30, "title", analyzer);
Query query = parser.Parse(this.CleanKeyword(keyword));
if (query is TermQuery)
{
Term term = ((TermQuery)query).Term;
return new string[] { term.Text };
}
else if (query is PhraseQuery)
{
Term[] term = ((PhraseQuery)query).GetTerms();
return term.Select(t => t.Text).ToArray();
}
else if (query is BooleanQuery)// and or
{
BooleanClause[] clauses = ((BooleanQuery)query).GetClauses();
List<string> analyzerWords = new List<string>();
foreach (BooleanClause clause in clauses)
{
Query childQuery = clause.Query;
if (childQuery is TermQuery)
{
Term term = ((TermQuery)childQuery).Term;
analyzerWords.Add(term.Text);
}
else if (childQuery is PhraseQuery)
{
Term[] term = ((PhraseQuery)childQuery).GetTerms();
analyzerWords.AddRange(term.Select(t => t.Text));
}
}
return analyzerWords.ToArray();
}
else
{
logger.Debug(string.Format("AnalyzerKey在解析keyword={0}的结果为new string[] { keyword } ", keyword));
return new string[] { keyword };
}
}
/// <summary>
/// 清理头尾and or 关键字
/// </summary>
/// <param name="keyword"></param>
/// <returns></returns>
private string CleanKeyword(string keyword)
{
if (string.IsNullOrWhiteSpace(keyword))
{ }
else
{
bool isClean = false;
while (!isClean)
{
keyword = keyword.Trim();
if (keyword.EndsWith(" AND"))
{
keyword = string.Format("{0}and", keyword.Remove(keyword.Length - 3, 3));
}
else if (keyword.EndsWith(" OR"))
{
keyword = string.Format("{0}or", keyword.Remove(keyword.Length - 2, 2));
}
else if (keyword.StartsWith("AND "))
{
keyword = string.Format("and{0}", keyword.Substring(3));
}
else if (keyword.StartsWith("OR "))
{
keyword = string.Format("or{0}", keyword.Substring(2));
}
else if (keyword.Contains(" OR "))
{
keyword = keyword.Replace(" OR ", " or ");
}
else if (keyword.Contains(" AND "))
{
keyword = keyword.Replace(" AND ", " and ");
}
else
isClean = true;
}
}
return QueryParser.Escape(keyword);
}
#endregion AnalyzerKey