init
This commit is contained in:
59
nlp_processor.py
Normal file
59
nlp_processor.py
Normal file
@@ -0,0 +1,59 @@
|
||||
import logging
|
||||
|
||||
import joblib
|
||||
import nltk
|
||||
import numpy as np
|
||||
from nltk import word_tokenize
|
||||
from nltk.corpus import stopwords
|
||||
|
||||
|
||||
nltk.download('stopwords')
|
||||
nltk.download('punkt_tab')
|
||||
|
||||
model, vectorizer = joblib.load('model.pkl')
|
||||
|
||||
|
||||
def model_reload():
|
||||
print('Reloading the model')
|
||||
global model, vectorizer
|
||||
model, vectorizer = joblib.load('model.pkl')
|
||||
|
||||
def preprocess_comment(comment):
|
||||
tokens = word_tokenize(comment)
|
||||
stop_words = set(stopwords.words('russian'))
|
||||
filtered_tokens = [t for t in tokens if t.lower() not in stop_words]
|
||||
return ' '.join(filtered_tokens)
|
||||
|
||||
|
||||
def comment_to_vector(comment):
|
||||
vector = vectorizer.transform([comment])
|
||||
return vector
|
||||
|
||||
|
||||
# Определение категории по комменту
|
||||
def predict_category(comment):
|
||||
# Преобразуем текст в вектор
|
||||
vector = vectorizer.transform([comment])
|
||||
|
||||
# Получаем "вероятности" через decision function
|
||||
decision_scores = model.decision_function(vector)
|
||||
|
||||
# Преобразуем scores в "псевдо-вероятности" через softmax
|
||||
exp_scores = np.exp(decision_scores - np.max(decision_scores))
|
||||
probabilities = exp_scores / np.sum(exp_scores)
|
||||
|
||||
# Получаем топ-3 категорий
|
||||
top_3_indices = np.argsort(probabilities[0])[::-1][:3]
|
||||
top_3_categories = [
|
||||
{'category': model.classes_[i].encode('latin1').decode('utf-8'), 'weight': float(probabilities[0][i])}
|
||||
for i in top_3_indices
|
||||
]
|
||||
|
||||
return top_3_categories
|
||||
|
||||
|
||||
# Тестирование
|
||||
comment = "ремешок часы" # Пример комментария
|
||||
result = predict_category(comment)
|
||||
for item in result:
|
||||
print(f"Категория: {item['category']}, Вес: {item['weight']:.4f}")
|
||||
Reference in New Issue
Block a user