Skip to content

Instantly share code, notes, and snippets.

View Venkatstatistics's full-sized avatar

Venkat Venkatstatistics

  • Aryma Labs
  • Bangalore
View GitHub Profile
###Spacy Tutorials###
## References: https://course.spacy.io/chapter1 ##
## References: https://spacy.io/usage/spacy-101 ##
### Learning to work with NLP object ###
from spacy.lang.en import English
nlp = English ()
import spacy
from spacy.lang.en import English
nlpsm = English()
sbd = nlpsm.create_pipe('sentencizer')
nlpsm.add_pipe(sbd)
import en_vectors_web_lg
nlplg = en_vectors_web_lg.load()
nlplg.add_pipe(sbd)
# -*- coding: utf-8 -*-
#lowercasing
texts=["JOHN","keLLY","ArJUN","SITA"]
lower_words=[word.lower() for word in texts]
lower_words
#Stemming
import nltk
import pandas as pd
# -*- coding: utf-8 -*-
from gensim.models.word2vec import Word2Vec
import gensim.downloader as api
#corpus = api.load('word2vec-google-news-300')
#corpus = api.load('glove-wiki-gigaword-100')
#model = api.load('glove-wiki-gigaword-100')
corpus = api.load('text8') # download the corpus and return it opened as an iterable
model = Word2Vec(corpus) # train a model from the corpus
import time
start = time.time()
import csv
import sys
import pandas as pd
import numpy as np
from operator import itemgetter
import redis
def dump():
with open('results.csv', 'w') as f:
for key in r.lrange('results', 0, -1):
print(key)
f.write(key.decode('utf-8'))
f.write('\n')
if __name__== "__main__":
def process():
import en_vectors_web_lg
nlp = en_vectors_web_lg.load()
topicdf = pd.read_csv("small_Topics.csv", encoding='Latin-1')
topics = topicdf.Topic.tolist()
while True:
big_keyword = r.lpop('big_keywords').decode('utf-8')
if not big_keyword:
# we define a function to read the 200k words. The words are stored in a column called 'keyword'in the csv file 'big_Keywords'. The words are read one by one and stored
in a list are under the key "big_words".
def read_biglist():
biglist = pd.read_csv("big_Keywords.csv")
bigwords = biglist.keyword.tolist()
for token1 in bigwords:
r.lpush("big_keywords", token1) #LPUSH puts the new value at the start of the list.
import csv
import sys
import pandas as pd
import numpy as np
from operator import itemgetter
import redis
#by default Redis runs on port 6379, the below is the url
REDIS_URL = "redis://localhost:6379/0"
r = redis.Redis(host='localhost', port=6379, db=0) #the object r is created
#Resume Phrase Matcher code
#importing all required libraries
import PyPDF2
import os
from os import listdir
from os.path import isfile, join
from io import StringIO