javascript - Javascript - 逐字比较两个句子,并返回与某些条件匹配的单词数

下面是一段代码,可以逐字比较两个句子,并返回单词与某些条件的匹配次数:

第一个句子是字符串:


 let speechResult ="they're were protecting him i knew that i was aware";



如您所见,它有两个i,但第二句话只有一个i:


let expectSt = ['i was sent to earth to protect you'];



应该把这个匹配看作一个匹配而不是两个匹配,如果第二个句子中出现两个i匹配,那么将把i匹配看作两个匹配。

6)是:是否匹配结果

这是代码:


// Sentences we should compare word by word


let speechResult ="they're were protecting him i knew that i was aware";


let expectSt = ['i was sent to earth to protect you'];



// Create arrays of words from above sentences


let speechResultWords = speechResult.split(/s+/);


let expectStWords = expectSt[0].split(/s+/);



// Here you are.. 


//console.log(speechResultWords)


//console.log(expectStWords)



// Count Matches between two sentences


function includeWords(){


// Declare a variable to hold the count number of matches 


let countMatches = 0; 


for(let a = 0; a < speechResultWords.length; a++){



 for(let b = 0; b < expectStWords.length; b++){



 if(speechResultWords[a].includes(expectStWords[b])){


 console.log(speechResultWords[a] + ' includes in ' + expectStWords[b]);


 countMatches++


 }



 } // End of first for loop 



} // End of second for loop



return countMatches;


};



// Finally initiate the function to count the matches


let matches = includeWords();


console.log('Matched words: ' + matches);


 

时间: 原作者:

你可以对想要的单词进行计数,并通过检查单词计数来迭代给定的单词。


function includeWords(wanted, seen) {


 var wantedMap = wanted.split(/s+/).reduce((m, s) => m.set(s, (m.get(s) || 0) + 1), new Map),


 wantedArray = Array.from(wantedMap.keys()),


 count = 0;



 seen.split(/s+/)


 .forEach(s => {


 var key = wantedArray.find(t => s === t || s.length > 3 && t.length > 3 && (s.startsWith(t) || t.startsWith(s)));


 if (!wantedMap.get(key)) return;


 console.log(s, key)


 ++count;


 wantedMap.set(key, wantedMap.get(key) - 1);


 });



 return count;


}



let matches = includeWords('i was sent to earth to protect you', 'they're were protecting him i knew that i was aware');



console.log('Matched words: ' + matches);
.as-console-wrapper { max-height: 100% !important; top: 0; }

原作者:

我认为这应该有效:


let speechResult ="they're were protecting him i knew that i was aware";


let expectSt = ['i was sent to earth to protect you'];



function includeWords(){


 let countMatches = 0; 


 let ArrayFromStr = speechResult.split("");


 let Uniq = new Set(ArrayFromStr)


 let NewArray = [Uniq]


 let str2 = expectSt[0]



 for (word in NewArray){


 if (str2.includes(word)){


 countMatches += 1


 }


 }



 return countMatches;


};



let matches = includeWords();



原作者:

循环访问字符串,并使用空字符串更新匹配单词的索引,并将匹配项存储在数组中。


let speechResult ="they're were protecting him i knew that i was aware";


let expectSt = ['i was sent to earth to protect you'];



// Create arrays of words from above sentences


let speechResultWords = speechResult.split(/s+/);


let expectStWords = expectSt[0].split(/s+/);



const matches = [];



speechResultWords.forEach(str => {


 for(let i=0; i<expectStWords.length; i++) {


 const innerStr = expectStWords[i];


 if(innerStr && (str.startsWith(innerStr) || innerStr.startsWith(str)) && (str.includes(innerStr) || innerStr.includes(str))) {


 if(str.length >= innerStr.length) {


 matches.push(innerStr);


 expectStWords[i] = '';


 } else {


 matches.push(str);


 }


 break;


 }


 }


});



console.log(matches.length);

原作者:

通过使用stemming,可以知道词干相同的词。

比如,

  • 对于动词:protect, protected, protecting,...
  • 还有复数:ball, balls

  • 词干:使用一些词干分析器(例如,PorterStemmer,它有一个js实现),
  • 计算"干式空间"上的出现次数,这很简单,

下例使用PorterStemmer


const examples = [


 ['protecting','i'],


 ['protecting','protect'],


 ['protect','protecting'],


 ['him','i'],


 ['i','i'],


 ['they're were protecting him i knew that i was aware','i was sent to earth to protect you'],


 ['i i', 'i i i i i']


]


function tokenize(s) {


 // this is not good, get yourself a good tokenizer


 return s.split(/s+/).filter(x=>x.replace(/[^a-zA-Z0-9']/g,''))


}



function countWords(a, b){


 const sa = tokenize(a).map(t => stemmer(t))


 const sb = tokenize(b).map(t => stemmer(t))


 const m = sa.reduce((m, w) => (m[w] = (m[w] || 0) + 1, m), {})


 return sb.reduce((count, w) => {


 if (m[w]) {


 m[w]--


 return count + 1


 }


 return count


 }, 0)


}


examples.forEach(([a,b], i) => console.log(`ex ${i+1}: ${countWords(a,b)}`))
<script src="https://cdn.jsdelivr.net/gh/kristopolous/Porter-Stemmer/PorterStemmer1980.js"></script>

原作者:
...