> setwd("C:/Users/userr/Desktop/alkhotimah/Datmin/Latihan pert 3") #session-set working dir-choose dir > zonasiaul=read.csv("zonasiaul.csv" , header=TRUE,sep=";") > zonasiaul <- zonasiaul[!is.na(zonasiaul$likes),] > library(devtools) Loading required package: usethis > install_github("nurandi/katadasaR", force=TRUE) WARNING: Rtools is required to build R packages, but is not currently installed. Please download and install Rtools 3.5 from http://cran.r-project.org/bin/windows/Rtools/. Downloading GitHub repo nurandi/katadasaR@master WARNING: Rtools is required to build R packages, but is not currently installed. Please download and install Rtools 3.5 from http://cran.r-project.org/bin/windows/Rtools/. √ checking for file 'C:\Users\userr\AppData\Local\Temp\RtmpusWneK\remotes1f0854d71024\nurandi-katadasaR-91e463a/DESCRIPTION' (5s) - preparing 'katadasaR': (669ms) √ checking DESCRIPTION meta-information - checking for LF line-endings in source and make files and shell scripts (535ms) - checking for empty or unneeded directories - looking to see if a 'data/datalist' file should be added - building 'katadasaR_0.1.tar.gz' Installing package into ‘C:/Users/userr/Documents/R/win-library/3.6’ (as ‘lib’ is unspecified) * installing *source* package 'katadasaR' ... ** using staged installation ** R ** data *** moving datasets to lazyload DB ** byte-compile and prepare package for lazy loading ** help *** installing help indices converting help for package 'katadasaR' finding HTML links ... done kamus_katadasar html katadasar html ** building package indices ** testing if installed package can be loaded from temporary location *** arch - i386 *** arch - x64 ** testing if installed package can be loaded from final location *** arch - i386 *** arch - x64 ** testing if installed package keeps a record of temporary installation path * DONE (katadasaR) > library(textclean) > library(katadasaR) > library(tokenizers) > library(wordcloud) Loading required package: RColorBrewer > library(dplyr) Attaching package: ‘dplyr’ The following objects are masked from ‘package:stats’: filter, lag The following objects are masked from ‘package:base’: intersect, setdiff, setequal, union > tweets <- zonasiaul[!is.na(zonasiaul$likes),] > tweets <- tweets$text %>% as.character() > tweets <- gsub( "\n"," ",tweets) > tweets <- tweets %>% + replace_html() %>% # replace html with blank + replace_url() # replace URLs with blank > # print original tweet with converted emoji in index [198] > replace_emoji(tweets[198]) [1] "Inilah pentingnya sistem zonasi, agar murid-murid tidak lagi berjalan jauh & berangkat lebih awal saat dini hari menuju ke sekolah " > # print tweet with converted html in index [198] > replace_html(replace_emoji(tweets[198])) [1] "Inilah pentingnya sistem zonasi, agar murid-murid tidak lagi berjalan jauh & berangkat lebih awal saat dini hari menuju ke sekolah " > # perform the replacement task to whole text variable > tweets <- tweets %>% + replace_emoji(.) %>% + replace_html(.) > tweets <- tweets %>% + replace_tag(tweets, pattern = "@([A-Za-z0-9_]+)",replacement="") %>% # remove mentions + replace_hash(tweets, pattern = "#([A-Za-z0-9_]+)",replacement="") # remove hashtags > #spell.lex <- read_excel("colloquial-indonesian-lexicon.xlsx") > spell.lex <- read.csv("colloquial-indonesian-lexicon.csv") > tweets <- replace_internet_slang(tweets, slang = paste0("\\b", + spell.lex$slang, "\\b"), + replacement = spell.lex$formal, ignore.case = TRUE) > tweets <- strip(tweets) > tweets <- tweets %>% + as.data.frame() %>% + distinct() > # number of tweet rows after duplicated text removed > nrow(tweets) [1] 7678 > # example for katadasaR usage > katadasaR("membenarkan") [1] "benar" > tweets <- as.character(tweets$.) > stemming <- function(x){ + paste(lapply(x,katadasar),collapse = " ")} > tweets <- lapply(tokenize_words(tweets[]), stemming) > # after stemming > tweets[5] [[1]] [1] "terima siswa baru dengan sistem zonasi tanpa lihat nem buat siswa ogah ajar nem tidak pakai untuk cari sekolah hampir pasti lulus siswa jadi malas ajar buat uji dinasehatin guru malah ngelunjak kian" > library(tokenizers) > tweets <- tokenize_words(tweets) > library(stopwords) > View(tweets) > head(tweets) [[1]] [1] "sistem" "zonasi" "sekolah" "itu" "untung" "apa" "rugi" "apa" [[2]] [1] "cukup" "sama" "aku" "gajadi" "ke" "sekolah" "yang" "aku" "mau" "garagara" "sistem" [12] "zonasi" [[3]] [1] "keluarga" "jugamalah" "yang" "rame" "omong" "cara" "daftar" "sekolah" [9] "sampai" "pakai" "sistem" "zonasiheuheu" [[4]] [1] "kalo" "sistem" "zonasi" "tah" "terus" "mungkin" "yang" "jadi" "primadona" "nanti" [11] "adalah" "sekolah" "swasta" "citacita" "anak" "adalah" "masuk" "sekolah" "swasta" "sekolah" [21] "neger" "mah" "cuma" "sisa" "dari" "yang" "enggak" "tampung" "di" "swasta" [[5]] [1] "terima" "siswa" "baru" "dengan" "sistem" "zonasi" "tanpa" "lihat" "nem" [10] "buat" "siswa" "ogah" "ajar" "nem" "tidak" "pakai" "untuk" "cari" [19] "sekolah" "hampir" "pasti" "lulus" "siswa" "jadi" "malas" "ajar" "buat" [28] "uji" "dinasehatin" "guru" "malah" "ngelunjak" "kian" [[6]] [1] "bijak" "pemerintah" "terap" "sistem" "zonasi" "dasar" "jarak" "rumah" "ke" [10] "sekolah" "dengan" "dalih" "untuk" "pemerataan" "didik" "memang" "benar" "dapat" [19] "buat" "siswa" "yang" "milik" "mampu" "atas" "rata" "rata" "jadi" [28] "sebar" "lebih" "rata" > myStopwords <- readLines("stopwords.txt") > tweetstop <- as.character(tweets) > tweetstop <- tokenize_words(tweetstop, stopwords = myStopwords) > head(tweetstop) [[1]] [1] "untung" [[2]] [1] "gajadi" "garagara" [[3]] [1] "jugamalah" "omong" "zonasiheuheu" [[4]] [1] "tah" "primadona" "citacita" "neger" "sisa" [[5]] [1] "ogah" "uji" "dinasehatin" "ngelunjak" "kian" [[6]] [1] "bijak" "terap" "jarak" "dalih" "milik" "sebar" > tweetss <- as.character(tweetstop) > library(wordcloud) > wordcloud(tweetss) Loading required namespace: tm There were 50 or more warnings (use warnings() to see the first 50) > opinion.lexicon.pos = scan("C:/Users/userr/Desktop/alkhotimah/Datmin/dari dede hehe/positif.txt", what = "character", comment.char = ";") Read 1409 items > opinion.lexicon.neg = scan("C:/Users/userr/Desktop/alkhotimah/Datmin/dari dede hehe/negatif.txt", what = "character", comment.char = ";") Read 3774 items > pos.words = c(opinion.lexicon.pos) > neg.words = c(opinion.lexicon.neg) > #membuat fungsi score.sentiment(), yang bisa menghitung hasil sentimen mentah berdasarkan algoritma pencocokan sederhana: > getSentimentScore = function(sentences, pos.words, neg.words, .progress = "none") + { + require(plyr) + require(stringr) + scores = laply(sentences, function(sentence, pos.words, neg.words) { + #remove digit, punctuation, dan special/control character: + sentence = gsub("[[:cntrl:]]", "", gsub("[[:punct:]]", "", gsub("\\d+", "", sentence))) + #convert semua teks menjadi lowercase: + sentence = tolower(sentence) + #pisahkan setiap kalimat menggunakan spasi (space delimiter): + words = unlist(str_split(sentence, "\\s+")) + #lakukan boolean match dari setiap kata-kata menggunakan pos &amp;amp; neg opinion-lexicon: + pos.matches = !is.na(match(words, pos.words)) + neg.matches = !is.na(match(words, neg.words)) + #score sentimen = total positive sentiment - total negative: + score = sum(pos.matches) - sum(neg.matches) + return(score) + }, pos.words, neg.words, .progress=.progress) + #return data frame berisi kalimat beserta sentimennya: + return(data.frame(text = sentences, score = scores)) + } > #terapkan ke data tweet yang telah kita bersihkan: > zonasiResult = getSentimentScore(tweetss, pos.words, neg.words) Loading required package: plyr --------------------------------------------------------------------------------------------------------------------------------- You have loaded plyr after dplyr - this is likely to cause problems. If you need functions from both plyr and dplyr, please load plyr first, then dplyr: library(plyr); library(dplyr) --------------------------------------------------------------------------------------------------------------------------------- Attaching package: ‘plyr’ The following objects are masked from ‘package:dplyr’: arrange, count, desc, failwith, id, mutate, rename, summarise, summarize Loading required package: stringr > #export to csv: > write.csv(zonasiResult, file = "zonasiResult.csv") > hist(zonasiResult$score) >