import logging import joblib import nltk import numpy as np from nltk import word_tokenize from nltk.corpus import stopwords nltk.download('stopwords') nltk.download('punkt_tab') model, vectorizer = joblib.load('model.pkl') def model_reload(): print('Reloading the model') global model, vectorizer model, vectorizer = joblib.load('model.pkl') def preprocess_comment(comment): tokens = word_tokenize(comment) stop_words = set(stopwords.words('russian')) filtered_tokens = [t for t in tokens if t.lower() not in stop_words] return ' '.join(filtered_tokens) def comment_to_vector(comment): vector = vectorizer.transform([comment]) return vector # Определение категории по комменту def predict_category(comment): # Преобразуем текст в вектор vector = vectorizer.transform([comment]) # Получаем "вероятности" через decision function decision_scores = model.decision_function(vector) # Преобразуем scores в "псевдо-вероятности" через softmax exp_scores = np.exp(decision_scores - np.max(decision_scores)) probabilities = exp_scores / np.sum(exp_scores) # Получаем топ-3 категорий top_3_indices = np.argsort(probabilities[0])[::-1][:3] top_3_categories = [ {'category': model.classes_[i].encode('latin1').decode('utf-8'), 'weight': float(probabilities[0][i])} for i in top_3_indices ] return top_3_categories # Тестирование comment = "ремешок часы" # Пример комментария result = predict_category(comment) for item in result: print(f"Категория: {item['category']}, Вес: {item['weight']:.4f}")