JavaScript 实现最长子字符串,LCS

目标:

找出两个 字符串 X = 'CGATAATTGAGA' 和 Y = 'GTTCCTAATA' 的最长公共子序列。用动态规划求解(解不唯一?)。

首先建立一个表 table = table[row,col] = table[12+1,10+1];为什么多一格呢?

先初始化每个单元格的值为0;

然后,遍历行和列:

当X[row] = Y[col]时,table[row,col] = table[row-1,col-1]+1;

当X[row] <> Y[col]时,table[row,col] = Math.max(table[row,col-1],table[row-1,col]);

这样就构建出一个下面的表:

G T T C C T A A T A

[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]

C [0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1]

G [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]

A [0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2]

T [0, 1, 2, 2, 2, 2, 2, 2, 2, 3, 3]

A [0, 1, 2, 2, 2, 2, 2, 3, 3, 3, 4]

A [0, 1, 2, 2, 2, 2, 2, 3, 4, 4, 4]

T [0, 1, 2, 3, 3, 3, 3, 3, 4, 5, 5]

T [0, 1, 2, 3, 3, 3, 4, 4, 4, 5, 5]

G [0, 1, 2, 3, 3, 3, 4, 4, 4, 5, 5]

A [0, 1, 2, 3, 3, 3, 4, 5, 5, 5, 6]

G [0, 1, 2, 3, 3, 3, 4, 5, 5, 5, 6]

A [0, 1, 2, 3, 3, 3, 4, 5, 6, 6, 6]

通过这个表来找出LCS,LCS由表的下标给出,将这些下标放入一个数组L

从表的最后开始回溯:

row = 13, col = 11;

1、table[13,11]对应的行和列的字符相等,将(13,11)放入L,row-=1,col-=1;

2、table[12,10]对应的行和列的字符不相等,比较table[11,10]和table[12,9]

  2.1 table[11,10] > table[12,9],i -= 1;

  2.2 否则 j -= 1;

重复1、2

实现:

/*
  initial a row*col table
  return a table
*/
function initTable(row,col){
    var table = [];
    
    for(var i=0;i<row+1;i++){
        if(!table[i])table[i] = [];
        for(var j=0;j<col+1;j++){
            if(!table[i][j])table[i][j] = [];
            table[i][j] = 0;
        }
    }
    return table;
}
/*
  compute the path index of the table
  return array contain the index of s1 and s2
*/
function computeLCSIndex(s1,s2,table){
    var i = s1.length,
    j = s2.length,
    L1=[];
    
    while(i>0 && j>0){
        //console.log(i,j);
        if(s1[i-1] == s2[j-1]){
            //console.log(i,j);
            L1.push([i,j]);
            j -=1;
            i -=1;
        }else{
            if(!table[i-1]){
                console.log(i);     
                break;
            }

            if(table[i-1][j] > table[i][j-1]){
                i -= 1;
            }else{
                j -= 1;
            }
        }
    }

    return L1;
}
/*
  get LCS from s1,or s2 from LCS index
 */
function LCS(s1,s2,L1){
    //console.log(L1 = L1.reverse());
    L1 = L1.reverse();
    var LCS = "";
    for(var j=0;j<L1.length;j++){
        LCS+=(s2[L1[j][1]-1]);
    }
    //console.log(LCS);
    return LCS;
}

function diff(s1,s2){
    var len1 = s1.length,
    len2 = s2.length,
    table,
    L1;//LCS index of s1
    
    table  = initTable(len1,len2);

    for(k=1;k<len1+1;k++){
        for(l=1;l<len2+1;l++){
            if(s1[k-1] === s2[l-1]){
                //console.log("k:"+k+",l:"+l+table[k-1][l-1]);
                table[k][l] = table[k-1][l-1] + 1;
            }else{
                //console.log("k:"+k,"l:"+l);
                table[k][l] = Math.max(table[k][l-1],table[k-1][l]);
            }
        }
    }
    /*
    table.forEach(function(i){
        console.log(i);
    });
    */

    L1 = computeLCSIndex(s1,s2,table);
    return L1;
}

var debug = true;
if(debug){
    var A = 'CGATAATTGAGA',
    B = 'GTTCCTAATA';
    var L = diff(A,B);
    
    var s = LCS(A,B,L);
    console.log(s);

    var X = ["Cqdf", "G23", "A", "T", "A", "A", "T", "T", "G", "A", "G", "A"],
    Y = ["G2", "Tsfj", "T", "C", "C", "T", "A", "A", "T", "A"];

    var L2 = diff(X,Y);
    var s2 = LCS(X,Y,L2);

    console.log(s2);
}

通常是以单个字符作为比较的“元”,若以多个字符作为比较的元,那么输入的参数为数组即可: