C#/.NET计算文本相似度

看到的一篇文本匹配程度程序,記一下

public static double Sim(string txt1, string txt2)

{

List<char> sl1 = txt1.ToCharArray().ToList();

List<char> sl2 = txt2.ToCharArray().ToList();

//去重

List<char> sl = sl1.Union(sl2).ToList<char>();

//获取重复次数

List<int> arrA = new List<int>();

List<int> arrB = new List<int>();

foreach (var str in sl)

{

arrA.Add(sl1.Where(x => x == str).Count());

arrB.Add(sl2.Where(x => x == str).Count());

}

//计算商

double num = 0;

//被除数

double numA = 0;

double numB = 0;

for (int i = 0; i < sl.Count; i++)

{

num += arrA[i] * arrB[i];

numA += Math.Pow(arrA[i], 2);

numB += Math.Pow(arrB[i], 2);

}

double cos = num / (Math.Sqrt(numA) * Math.Sqrt(numB));

return cos;

}