for text in df['Job_Description']: # print(text) if text is not None: with open('職位表述文本.txt', mode='a', encoding='utf-8') as file: file.write(str(text))
# jieba.load_userdict('userdict.txt') # 創(chuàng)建停用詞list def stopwordslist(filepath): stopwords = [line.strip() for line in open(filepath, 'r', encoding='utf-8').readlines()] return stopwords
# 對(duì)句子進(jìn)行分詞 def seg_sentence(sentence): sentence_seged = jieba.cut(sentence.strip()) stopwords = stopwordslist('stop_word.txt') # 這里加載停用詞的路徑 outstr = '' for word in sentence_seged: if word not in stopwords: if word != '\t': outstr += word outstr += " " return outstr
inputs = open('職位表述文本.txt', 'r', encoding='utf-8') outputs = open('職位表述文本分詞后_outputs.txt', 'w', encoding='utf-8') for line in inputs: line_seg = seg_sentence(line) # 這里的返回值是字符串 outputs.write(line_seg + '\n') outputs.close() inputs.close()
wbk = xlwt.Workbook(encoding='ascii') sheet = wbk.add_sheet("wordCount") # Excel單元格名字 word_lst = [] key_list = [] for line in open('職位表述文本.txt', encoding='utf-8'): # 需要分詞統(tǒng)計(jì)的原始目標(biāo)文檔
item = line.strip('\n\r').split('\t') # 制表格切分 # print item tags = jieba.analyse.extract_tags(item[0]) # jieba分詞 for t in tags: word_lst.append(t)
word_dict = {} with open("分詞結(jié)果.txt", 'w') as wf2: # 指定生成文件的名稱
for item in word_lst: if item not in word_dict: # 統(tǒng)計(jì)數(shù)量 word_dict[item] = 1 else: word_dict[item] += 1
orderList = list(word_dict.values()) orderList.sort(reverse=True) # print orderList for i in range(len(orderList)): for key in word_dict: if word_dict[key] == orderList[i]: wf2.write(key + ' ' + str(word_dict[key]) + '\n') # 寫入txt文檔 key_list.append(key) word_dict[key] = 0
for i in range(len(key_list)): sheet.write(i, 1, label=orderList[i]) sheet.write(i, 0, label=key_list[i]) wbk.save('wordCount_all_lyrics.xls') # 保存為 wordCount.xls文件
def get_word(): with open("情感分析用詞.txt", encoding='utf-8') as f: line = f.readline() word_list = [] while line: line = f.readline() word_list.append(line.strip('\r\n')) f.close() return word_list
def get_sentiment(word): text = u'{}'.format(word) s = SnowNLP(text) print(s.sentiments)
if __name__ == '__main__': words = get_word() for word in words: get_sentiment(word)
# text = u''' # 也許 # ''' # s = SnowNLP(text) # print(s.sentiments) # with open('lyric_sentiments.txt', 'a', encoding='utf-8') as fp: # fp.write(str(s.sentiments)+'\n') # print('happy end')