1. 首页
  2. 操作系统
  3. 其他
  4. 使用NLP技术进行京东商品评论情感分析

使用NLP技术进行京东商品评论情感分析

上传者: 2023-03-10 21:32:22上传 ZIP文件 27.95KB 热度 17次
# 获取数据路径及文件
import os

all_file_path = []

for root, dirs, files in os.walk('data'):
    for dir in dirs:
        all_file_path.append(os.path.join(root, dir, 'neg.txt'))
        all_file_path.append(os.path.join(root, dir, 'pos.txt'))

# 数据预处理及分词
import jieba
import pandas as pd

data = []

for file_path in all_file_path:
    with open(file_path, 'r', encoding='utf-8') as f:
        lines = f.readlines()
        for line in lines:
            words = jieba.lcut(line.strip())
            data.append([' '.join(words), file_path.split(os.path.sep)[-2]])

df = pd.DataFrame(data, columns=['comment', 'category'])

# 文本向量化
from sklearn.feature_extraction.text import TfidfVectorizer

tfidf = TfidfVectorizer()
X_tfidf = tfidf.fit_transform(df['comment'])

# 情感分析
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

X_train, X_test, y_train, y_test = train_test_split(X_tfidf, df['category'], random_state=42)
lr = LogisticRegression()
lr.fit(X_train, y_train)

print(f"情感分析在测试集上的准确率为:{accuracy_score(y_test, lr.predict(X_test))}")
下载地址
用户评论