fork download
  1. import sklearn
  2. from sklearn.feature_extraction.text import TfidfVectorizer
  3. from sklearn.metrics.pairwise import cosine_similarity
  4.  
  5. text1 = 'A smiling costumed woman is holding an umbrella'
  6. text2 = 'A happy woman in a fairy costume holds an umbrella'
  7.  
  8. vectorizer = TfidfVectorizer()
  9. vectors = vectorizer.fit_transform([text1,text2])
  10.  
  11. similarity = cosine_similarity(vectors)
  12. print(similarity)
Success #stdin #stdout 0.5s 73532KB
stdin
Standard input is empty
stdout
[[1.         0.25320945]
 [0.25320945 1.        ]]