import gensim from gensim.models import Word2Vec from nltk.tokenize import word_tokenize import nltk nltk.download('punkt') legal_corpus = [ "The plaintiff filed a motion in court", "The defendant was found guilty of negligence", "The contract was deemed void due to misrepresentation", "The lawyer presented evidence before the judge", "The court issued a legal notice to the defendant" ] tokenized_corpus = [word_tokenize(doc.lower()) for doc in legal_corpus] model = Word2Vec( sentences=tokenized_corpus, vector_size=100, window=5, min_count=1, sg=1, epochs=50 ) model.save("legal_word2vec.model") print("Model Trained Successfully") print("\nWords similar to 'plaintiff':") for word, score in model.wv.most_similar("plaintiff"): print(f"{word}: {score:.4f}") print("\nWords similar to 'contract':") for word, score in model.wv.most_similar("contract"): print(f"{word}: {score:.4f}") similarity = model.wv.similarity("plaintiff", "defendant") print(f"\nSimilarity between 'plaintiff' and 'defendant': {similarity:.4f}")