adeshpande3 · rohith1125 · Sep 21, 2022
diff --git a/Word2Vec.py b/Word2Vec.py
@@ -1,4 +1,5 @@
 import tensorflow as tf
+tf.compat.v1.disable_eager_execution()
 import numpy as np
 import re
 from collections import Counter
@@ -25,7 +26,7 @@
 # into one huge string, and then uses a Counter to identify words
 # and the number of occurences
 def processDataset(filename):
-    openedFile = open(filename, 'r')
+    openedFile = open(filename, 'r', encoding='utf-8')
     allLines = openedFile.readlines()
     myStr = ""
     for line in allLines:
@@ -41,7 +42,7 @@ def createTrainingMatrices(dictionary, corpus):
     yTrain=[]
     for i in range(numTotalWords):
         if i % 100000 == 0:
-            print 'Finished %d/%d total words' % (i, numTotalWords)
+            print ('Finished %d/%d total words' % (i, numTotalWords))
         wordsAfter = allWords[i + 1:i + windowSize + 1]
         wordsBefore = allWords[max(0, i - windowSize):i]
         wordsAdded = wordsAfter + wordsBefore
@@ -61,19 +62,19 @@ def getTrainingBatch():
 if (os.path.isfile('Word2VecXTrain.npy') and os.path.isfile('Word2VecYTrain.npy') and os.path.isfile('wordList.txt')):
     xTrain = np.load('Word2VecXTrain.npy')
     yTrain = np.load('Word2VecYTrain.npy')
-    print 'Finished loading training matrices'
+    print ('Finished loading training matrices')
     with open("wordList.txt", "rb") as fp:
         wordList = pickle.load(fp)
-    print 'Finished loading word list'
+    print ('Finished loading word list')
 
 else:
     fullCorpus, datasetDictionary = processDataset('conversationData.txt')
-    print 'Finished parsing and cleaning dataset'
+    print ('Finished parsing and cleaning dataset')
     wordList = list(datasetDictionary.keys())
-    createOwnVectors = raw_input('Do you want to create your own vectors through Word2Vec (y/n)?')
+    createOwnVectors = input('Do you want to create your own vectors through Word2Vec (y/n)?')
     if (createOwnVectors == 'y'):
         xTrain, yTrain  = createTrainingMatrices(datasetDictionary, fullCorpus)
-        print 'Finished creating training matrices'
+        print ('Finished creating training matrices')
         np.save('Word2VecXTrain.npy', xTrain)
         np.save('Word2VecYTrain.npy', yTrain)
     else:
@@ -90,13 +91,13 @@ def getTrainingBatch():
 numTrainingExamples = len(xTrain)
 vocabSize = len(wordList)
 
-sess = tf.Session()
-embeddingMatrix = tf.Variable(tf.random_uniform([vocabSize, wordVecDimensions], -1.0, 1.0))
-nceWeights = tf.Variable(tf.truncated_normal([vocabSize, wordVecDimensions], stddev=1.0 / math.sqrt(wordVecDimensions)))
+sess = tf.compat.v1.Session()
+embeddingMatrix = tf.Variable(tf.random.uniform([vocabSize, wordVecDimensions], -1.0, 1.0))
+nceWeights = tf.Variable(tf.random.truncated_normal([vocabSize, wordVecDimensions], stddev=1.0 / math.sqrt(wordVecDimensions)))
 nceBiases = tf.Variable(tf.zeros([vocabSize]))
 
-inputs = tf.placeholder(tf.int32, shape=[batchSize])
-outputs = tf.placeholder(tf.int32, shape=[batchSize, 1])
+inputs = tf.compat.v1.placeholder(tf.int32, shape=[batchSize])
+outputs = tf.compat.v1.placeholder(tf.int32, shape=[batchSize, 1]) 
 
 embed = tf.nn.embedding_lookup(embeddingMatrix, inputs)
 
@@ -108,14 +109,14 @@ def getTrainingBatch():
                  num_sampled=numNegativeSample,
                  num_classes=vocabSize))
 
-optimizer = tf.train.GradientDescentOptimizer(learning_rate=1.0).minimize(loss)
+optimizer = tf.compat.v1.train.GradientDescentOptimizer(learning_rate=1.0).minimize(loss)
 
-sess.run(tf.global_variables_initializer())
+sess.run(tf.compat.v1.global_variables_initializer())
 for i in range(numIterations):
     trainInputs, trainLabels = getTrainingBatch()
     _, curLoss = sess.run([optimizer, loss], feed_dict={inputs: trainInputs, outputs: trainLabels})
     if (i % 10000 == 0):
         print ('Current loss is:', curLoss)
-print 'Saving the word embedding matrix'
+print ('Saving the word embedding matrix')
 embedMatrix = embeddingMatrix.eval(session=sess)
 np.save('embeddingMatrix.npy', embedMatrix)