# Load libraries library(tm) library(SnowballC) library(caret) library(e1071) # Load and prepare data sms_data <- read.csv("https://raw.githubusercontent.com/jbrownlee/Datasets/master/sms_spam.csv", stringsAsFactors = FALSE) colnames(sms_data) <- c("Label", "Message") sms_data$Label <- factor(sms_data$Label, levels = c("ham", "spam")) # Clean and preprocess text corpus <- VCorpus(VectorSource(sms_data$Message)) corpus <- tm_map(corpus, content_transformer(tolower)) corpus <- tm_map(corpus, removePunctuation) corpus <- tm_map(corpus, removeNumbers) corpus <- tm_map(corpus, removeWords, stopwords("english")) corpus <- tm_map(corpus, stemDocument) corpus <- tm_map(corpus, stripWhitespace) # Create Document-Term Matrix dtm <- DocumentTermMatrix(corpus) dtm_df <- as.data.frame(as.matrix(dtm)) dtm_df$Label <- sms_data$Label # Split into training and testing sets set.seed(123) split_index <- createDataPartition(dtm_df$Label, p = 0.8, list = FALSE) train_data <- dtm_df[split_index, ] test_data <- dtm_df[-split_index, ] # Separate features and labels x_train <- train_data[, -ncol(train_data)] y_train <- train_data$Label x_test <- test_data[, -ncol(test_data)] y_test <- test_data$Label # Train Naive Bayes model and predict nb_model <- naiveBayes(x_train, y_train) predictions <- predict(nb_model, x_test) # Evaluate performance conf_mat <- confusionMatrix(predictions, y_test) print(conf_mat) cat("Accuracy:", round(conf_mat$overall["Accuracy"] * 100, 2), "%\n")
Preview:
downloadDownload PNG
downloadDownload JPEG
downloadDownload SVG
Tip: You can change the style, width & colours of the snippet with the inspect tool before clicking Download!
Click to optimize width for Twitter