using System;
using ;
using ;
using ;
using ;
namespace
{
class SVMModle
{
/// <summary>
/// 降维词表
/// </summary>
private List<string> reducingKeys = new List<string>();
/// <summary>
/// 构造函数:使用降维表
/// </summary>
/// <param name="reducingKeys">降维词表</param>
public SVMModle(List<string> reducingKeys)
{
= reducingKeys;
}
/// <summary>
/// 构造函数:不使用降维表
/// </summary>
public SVMModle()
{
}
/// <summary>
/// 相似度计算
/// </summary>
/// <param name="text1">文档1(分好词的,分词符为非汉字字符)</param>
/// <param name="text2">文档2(分好词的,分词符为非汉字字符)</param>
/// <returns>两篇文章的相似度</returns>
public double Similarity(string text1, string text2)
{
double similarity = , numerator = , denominator1 = , denominator2 = ;
int temp1, temp2;
Dictionary<string, int> dictionary1 = GetDictionary(text1);
Dictionary<string, int> dictionary2 = GetDictionary(text2);
if (( < 1) || ( < 1))//如果任一篇文章中不含有汉字
{
return ;
}
Dictionary<string, int>.KeyCollection keys1 = ;
foreach (string key in keys1)
{
(key, out temp1);
if (!(key, out temp2))
{
temp2 = 0;
}
(key);
numerator += temp1 * temp2;
den
向量空间模型文档相似度计算实现(c#) 来自淘豆网m.daumloan.com转载请标明出处.