.Rhistory

kff_medicare_data <- read.delim("~/Documents/Georgetown/Data-Visualization-PPOL-646-Fall-2016/kff_medicare_data.csv")
for(i in 2:ncol(kff_medicare_data)){
kff_medicare_data[,i] = as.numeric(as.character(kff_medicare_data[,i]))/as.numeric(as.character(kff_medicare_data[,ncol(kff_medicare_data)]))
}
kff_medicare_data[is.na(kff_medicare_data)] <- 0
View(kff_medicare_data)
kff_medicare_data <- read.delim("~/Documents/Georgetown/Data-Visualization-PPOL-646-Fall-2016/kff_medicare_data.csv")
medicare_rep_total = as.numeric(as.character(kff_medicare_data$Total))
for(i in 2:ncol(kff_medicare_data)){
kff_medicare_data[,i] = as.numeric(as.character(kff_medicare_data[,i]))/as.numeric(as.character(kff_medicare_data[,ncol(kff_medicare_data)]))
}
kff_medicare_data[is.na(kff_medicare_data)] <- 0
medicare_rep_total
Medicare_Spending <- read.delim("~/Documents/Georgetown/Data-Visualization-PPOL-646-Fall-2016/Total Medicare Spending by State.csv")
View(Medicare_Spending)
class(Medicare_Spending$Total.Medicare.Spending.by.Residence)
Age.adjusted_death_rates_1900_2013 <- read.csv("~/Documents/Georgetown/Data-Visualization-PPOL-646-Fall-2016/NCHS_-_Age-adjusted_death_rates_and_life-expectancy_at_birth___All_Races__Both_Sexes___United_States__1900-2013.csv",  stringsAsFactors=FALSE)
Age.adjusted_death_rates_1900_2013 = Age.adjusted_death_rates_1900_2013[!is.na(Age.adjusted_death_rates_1900_2013$Average.Life.Expectancy), ]
par(mfrow = c(3,1))
for(i in unique(Age.adjusted_death_rates_1900_2013$Race)){
plot(Average.Life.Expectancy~Year, data = Age.adjusted_death_rates_1900_2013[Age.adjusted_death_rates_1900_2013$Sex== "Both Sexes" & Age.adjusted_death_rates_1900_2013$Race == i, ], ylim = c(0, ceiling(max(Age.adjusted_death_rates_1900_2013$Average.Life.Expectancy)/10)*10+10), type = "l",lwd= 2, col = "black")
points(Average.Life.Expectancy~Year, data = Age.adjusted_death_rates_1900_2013[Age.adjusted_death_rates_1900_2013$Sex== "Female" & Age.adjusted_death_rates_1900_2013$Race == i, ], type = "l",lwd= 2, col = "red")
points(Average.Life.Expectancy~Year, data = Age.adjusted_death_rates_1900_2013[Age.adjusted_death_rates_1900_2013$Sex== "Male" & Age.adjusted_death_rates_1900_2013$Race == i, ], type = "l",lwd= 2, col = "blue")
}
Age.adjusted_death_rates_1900_2013 <- read.csv("~/Documents/Georgetown/Data-Visualization-PPOL-646-Fall-2016/NCHS_-_Age-adjusted_death_rates_and_life-expectancy_at_birth___All_Races__Both_Sexes___United_States__1900-2013.csv",  stringsAsFactors=FALSE)
Age.adjusted_death_rates_1900_2013 = Age.adjusted_death_rates_1900_2013[!is.na(Age.adjusted_death_rates_1900_2013$Average.Life.Expectancy), ]
par(mfrow = c(3,1))
for(i in unique(Age.adjusted_death_rates_1900_2013$Race)){
plot(Average.Life.Expectancy~Year, data = Age.adjusted_death_rates_1900_2013[Age.adjusted_death_rates_1900_2013$Sex== "Both Sexes" & Age.adjusted_death_rates_1900_2013$Race == i, ], type = "l",lwd= 2, col = "black")
points(Average.Life.Expectancy~Year, data = Age.adjusted_death_rates_1900_2013[Age.adjusted_death_rates_1900_2013$Sex== "Female" & Age.adjusted_death_rates_1900_2013$Race == i, ], type = "l",lwd= 2, col = "red")
points(Average.Life.Expectancy~Year, data = Age.adjusted_death_rates_1900_2013[Age.adjusted_death_rates_1900_2013$Sex== "Male" & Age.adjusted_death_rates_1900_2013$Race == i, ], type = "l",lwd= 2, col = "blue")
}
View(Age.adjusted_death_rates_1900_2013)
Healthcare_Expenditures_by_State <- read.csv("~/Documents/Georgetown/Data-Visualization-PPOL-646-Fall-2016/Health Care Expenditures by State of Residence.csv", sep="")
View(Healthcare_Expenditures_by_State)
Healthcare_Expenditures_by_State <- read.csv("~/Documents/Georgetown/Data-Visualization-PPOL-646-Fall-2016/Health Care Expenditures by State of Residence.csv")
View(Healthcare_Expenditures_by_State)
Healthcare_Expenditures_by_State <- read.delim("~/Documents/Georgetown/Data-Visualization-PPOL-646-Fall-2016/Health Care Expenditures by State of Residence.csv")
View(Healthcare_Expenditures_by_State)
library(qcc)
?pareto.chart
states = names(Healthcare_Expenditures_by_State)
states = Healthcare_Expenditures_by_State[,1]
states = as.character(Healthcare_Expenditures_by_State[,1])
Healthcare_Expenditures_by_State = Healthcare_Expenditures_by_State$Total.Health.Spending
names(Healthcare_Expenditures_by_State) = states
Healthcare_Expenditures_by_State
pareto.chart(Healthcare_Expenditures_by_State[-1,])
pareto.chart(table(Healthcare_Expenditures_by_State[-1,]))
Healthcare_Expenditures_by_State
pareto.chart(table(Healthcare_Expenditures_by_State[-1]))
pareto.chart((Healthcare_Expenditures_by_State[-1]))
?pareto.chart
pareto.chart(Healthcare_Expenditures_by_State[-1], cex = .5)
pareto.chart(Healthcare_Expenditures_by_State[-1], cex.lab = .5)
pareto.chart(Healthcare_Expenditures_by_State[-1], cex.lab = .5,srt=45)
pareto.chart(Healthcare_Expenditures_by_State[-1], cex.xlab = .5,srt=45)
confusion_matrix_POS <- read.csv("~/Documents/Georgetown/NLP-LING-572-Fall-2016/confusion_matrix_POS.csv")
View(confusion_matrix_POS)
table(confusion_matrix_POS)
install.packages(irr)
install.packages('irr')
library(irr)
kappa2(confusion_matrix_POS)
mean(confusion_matrix_POS$Mohammad == confusion_matrix_POS$Arif)
mean(as.character(confusion_matrix_POS$Mohammad) == as.character(confusion_matrix_POS$Arif))
mean(as.character(confusion_matrix_POS$Mohammad) == as.character(confusion_matrix_POS$Arif))*105
drugs <- read.csv(file.choose(), header=TRUE) ## load the data
dim(drugs) ## check data dimensions (rows, columns)
head(drugs) ## look at the first six rows of data
summary(drugs) ## gives five number summary for continuous variables
table(drugs$HEALTH, useNA="always") ## frequency table of the HEALTH column
table(drugs$AGE2, useNA="always") ## frequency table of the AGE2 column
hist(drugs$AGE2) # quick histogram
## Subsetting data:
example <- subset(drugs, CIGEVER == "No")
drugs <- subset(drugs, AGE2 %in% c(1,2,3,4,5,6,7,8,9,10)) ## subset out older age categories
# drugs <- subset(drugs, AGE2 < 11)
# drugs <- subset(drugs, AGE2 <= 10)
# drugs <- subset(drugs, AGE2 %in% 1:10)
drugs$AGE2 <- drugs$AGE2 + 11 ## change age
table(drugs$AGE2) ## check to see if that worked
hist(drugs$AGE2) ## histogram of the above table
table(drugs$MJEVER)
prop.table(table(drugs$MJEVER)) ## Proportional Frequency Table
drugs$MJEVER <- factor(drugs$MJEVER, levels = c("Yes","No"))
drugs$CIGEVER <- factor(drugs$CIGEVER, levels = c("Yes","No"))
drugs$SNFEVER <- factor(drugs$SNFEVER, levels = c("Yes","No"))
drugs$CIGAREVR <- factor(drugs$CIGAREVR, levels = c("Yes","No"))
drugs$ALCEVER <- factor(drugs$ALCEVER, levels = c("Yes","No"))
drugs$IRSEX <- factor(drugs$IRSEX, levels = c("Male","Female"))
library(dplyr)
glimpse(drugs)
new_data <- filter(drugs, CIGEVER == "Yes")
new_data <- group_by(new_data, IRSEX)
new_data <- summarize(new_data,
avg_education = mean(IREDUC2),
median_health = median(HEALTH, na.rm=TRUE))
new_data <- arrange(new_data, -avg_education)
# Can chain those operations together with the %>% operator:
new_data2 <- drugs %>%
filter(CIGEVER == "Yes") %>%
group_by(IRSEX) %>%
summarize(avg_education = mean(IREDUC2),
median_health = median(HEALTH, na.rm=TRUE)) %>%
arrange(-avg_education)
## These are exactly the same:
new_data == new_data2
## These chains can be extended to ggplot2 - a powerful graphics package.
# install.packages("ggplot2")
library(ggplot2)
drugs %>%
filter(CIGEVER == "Yes") %>%
group_by(HEALTH) %>%
summarize(avg_education = mean(IREDUC2)) %>%
arrange(-avg_education)  %>%
ggplot(aes(x=HEALTH, y=avg_education)) +
geom_bar(stat="identity", fill="#00AEEF") +
theme(panel.background = element_rect(fill = "#9D9FA2")) + ggtitle("Average Education Level by Health Outcomes")
?rexp
x = 1
t = 2
x[t-1]
dexp(x)
dexp(1, rate = x[t-1])
f = function(y){
exp(-y)*(1-exp(-y))^((n-1)/2)*exp(-y*(n-1)/2)
}
aa = f(1:1e4)
f = function(y, n){
exp(-y)*(1-exp(-y))^((n-1)/2)*exp(-y*(n-1)/2)
}
aa = f(1:1e4, 1e4)
head(aa)
aa
aa = f(1:1e3, 1e3)
aa
log(aa)
log_f = function(y,n){
-y+(n-1)/2*log(1-exp(-y))+(-y*(n-1)/2)
}
fs = log_f(1:1e4, 1e4)
head(fs)
?density
plot(density(fs))
f = function(y, n){
exp(-y)*(1-exp(-y))^((n-1)/2)*exp(-y*(n-1)/2)
}
ffs = f(1:1e4, 1e4)
log(ffs)
?exp
f(2,3)
log(2,3)
confusion_matrix_POS <- read.csv("~/Documents/Georgetown/NLP-LING-572-Fall-2016/confusion_matrix_POS.csv")
View(confusion_matrix_POS)
table(confusion_matrix_POS)
cm = table(confusion_matrix_POS)
rowSums(cm)
log_f = function(y,n){
-y+(n-1)/2*log(1-exp(-y))+(-y*(n-1)/2)
}
log_f(3,101)
logf <- function(x,n=101) {
return((n-1)/2*log(1-exp(-x))-x*(n+1)/2)
}
logf(3,101)
?density
plot(1:1e4, log_f(1:1e4, 1e4))
plot(1:1e4, exp(log_f(1:1e4, 1e4)))
plot(1:100, exp(log_f(1:100, 100)))
exp(log_f(1:100, 100))
plot(1:100, exp(log_f(1:100/100, 100)))
cm = read.csv("Documents/Georgetown/NLP-LING-572-Fall-2016/confusion_matrix_POS.csv")
table(cm)
cm = read.csv("Documents/Georgetown/NLP-LING-572-Fall-2016/confusion_matrix_POS.csv")
table(cm)
sum(cm$Mohammad==cm$Arif)/nrow(cm)
?read.csv
cm = read.csv("Documents/Georgetown/NLP-LING-572-Fall-2016/confusion_matrix_POS.csv",stringsAsFactors = F)
table(cm)
sum(cm$Mohammad==cm$Arif)/nrow(cm)
rowSums(table(cm))
prod(rowSums(table(cm)))
prod(rowSums(table(cm)))/nrow(cm)
p_0= sum(cm$Mohammad==cm$Arif)/nrow(cm)
p_e = (prod(rowSums(table(cm)))/nrow(cm) + prod(colSums(table(cm)))/nrow(cm))/nrow(cm)
library(irr)
kappa2(cm)
cm = read.csv("Documents/Georgetown/NLP-LING-572-Fall-2016/confusion_matrix_POS.csv",stringsAsFactors = F)
table(cm)
kappa2
library(irr)
k = kappa2(cm)
k$value
aggreement = sum(cm$Mohammad==cm$Arif)
rowSums(cm)
rowSums(table(cm))
rowSums(table(cm))*colSums(table(cm))
rowSums(table(cm))*colSums(table(cm)[-15])
rowSums(table(cm))*colSums(table(cm)[,-15])
rowSums(table(cm))*colSums(table(cm)[,-15])/nrow(cm)
?diag
as.matrix(cm(table))
as.matrix(table(cm))
diag(as.matrix(table(cm)))
E*diag(as.matrix(table(cm)))
E = rowSums(table(cm))*colSums(table(cm)[,-15])/nrow(cm)
E*diag(as.matrix(table(cm)))
sum(E*diag(as.matrix(table(cm))))
sum_E = sum(E*diag(as.matrix(table(cm))))
(aggreement - sum_E)/(ncol(cm)-sum_E)
aggreement
ncol(cm)-sum_E
(aggreement - sum_E)/(nrow(cm)-sum_E)
E
rowSums(table(cm))
colSums(table(cm)[,-15])
rowSums(table(cm))/nrow(cm)
rowSums(table(cm))/nrow(cm)*colSums(table(cm)[,-15])/nrow(cm)
p_e = rowSums(table(cm))/nrow(cm)*colSums(table(cm)[,-15])/nrow(cm)*diag(as.matrix(table(cm)))
p_e
diag(as.matrix(table(cm)))
table(cm)
p_e = rowSums(table(cm))/nrow(cm)*colSums(table(cm)[,-15])/nrow(cm)*diag(as.matrix(table(cm[,-15])))
p_e
table(cm[,-15])
table(cm)[,-15]
p_e = rowSums(table(cm))/nrow(cm)*colSums(table(cm)[,-15])/nrow(cm)*diag(as.matrix(table(cm)[,-15]))
p_e
diag(as.matrix(table(cm)[,-15]))
View(cm)
p_e = rowSums(table(cm))/nrow(cm)*colSums(table(cm)[,-15])/nrow(cm)
p_e
sum(p_e)
p_0-p_e
p_e = sum(rowSums(table(cm))/nrow(cm)*colSums(table(cm)[,-15])/nrow(cm))
p_0-p_e
(p_0-p_e)/(1-p_e)
rowSums(table(cm))/nrow(cm)*colSums(table(cm)
)
plot(cars)
exp(-55/72)
exp(-(1+1/2+1/3)*5/12)
exp(-(1^-1+(1/2)^-1+(1/3)^-1)*5/12)
1^-1
.5^-1
(1/3)^-1
?ppoints
?ppois
ppois(1, 2/5)
ppois(2, 2/5)
ppois(0, 2/5)
dpois(1, 2/5)
dpois(0, 2/5)
dpois(2, 2/5)
dpois(c(0,1,2), 2/5)
1-sum(dpois(c(0,1,2), 2/5))
1-dpois
1-dpois(0,2/5)
?dexp
x = 1
t= 2
y = rexp(1, rate = x[t-1])
rho = exp(log_f(y,101))*dexp(x[t-1], rate = y)/
(exp(log_f(x[t-1],101))**dexp(y, rate = x[t-1]))
log_f = function(y,n){
-y+(n-1)/2*log(1-exp(-y))+(-y*(n-1)/2)
}
rho = exp(log_f(y,101))*dexp(x[t-1], rate = y)/
(exp(log_f(x[t-1],101))**dexp(y, rate = x[t-1]))
y
x = 1
accept = 0
for(t in 2:1e4){
y = rexp(1, rate = x[t-1])
rho = exp(log_f(y,101))*dexp(x[t-1], rate = y)/
(exp(log_f(x[t-1],101))**dexp(y, rate = x[t-1]))
if(runif(1)<rho){
X[t] = Y
accept[t] = 1
}
else{
X[t] =X[t-1]
accept[t] = 0
}
}
x = 1
accept = 0
for(t in 2:1e4){
y = rexp(1, rate = x[t-1])
rho = exp(log_f(y,101))*dexp(x[t-1], rate = y)/
(exp(log_f(x[t-1],101))**dexp(y, rate = x[t-1]))
if(runif(1)<rho){
x[t] = y
accept[t] = 1
}
else{
x[t] =x[t-1]
accept[t] = 0
}
}
mean(accept)
rho = exp(log_f(y,101))*dexp(x[t-1], rate = y)/
(exp(log_f(x[t-1],101))**dexp(y, rate = x[t-1]))
rho
runif(1)
runif(1)<rho
y
x = 1
accept = 0
for(t in 2:1e4){
y = rexp(1, rate = x[t-1])
rho = exp(log_f(y,101))*dexp(x[t-1], rate = y)/
(exp(log_f(x[t-1],101))*dexp(y, rate = x[t-1]))
if(runif(1)<rho){
x[t] = y
accept[t] = 1
}
else{
x[t] =x[t-1]
accept[t] = 0
}
}
mean(accept)
?curve
log_f = function(y,n=101){
-y+(n-1)/2*log(1-exp(-y))+(-y*(n-1)/2)
}
plot(1:100, exp(log_f(1:100/100, 100)))
curve(exp(log_f(x)), 0, 1)
curve(x)
?curve
density(x)
plot(density(x))
curve(exp(log_f(x)), 0, 1)
plot(density(x), add = T, col = "red")
curve(exp(log_f(x)), 0, 1.2)
plot(density(x), add = T, col = "red")
plot(density(x),col = "red")
curve(exp(log_f(x)), 0, 1.2, add = T)
plot(x,col = "red")
curve(exp(log_f(x)), 0, 1.2, add = T)
plot(density(x),col = "red")
curve(exp(log_f(x)), 0, 1.2, add = T)
?density
x
log(101)
?ncr
combn(100,50)
?combn
comb = function(n, x) {
return(factorial(n) / (factorial(x) * factorial(n-x)))
}
comb(100,50)
log(comb(100,50))
plot(density(x),col = "red")
curve(exp(log(101)+log(log(comb(100,50)))+log_f(x)), 0, 1.2, add = T)
exp(log(101)+log(log(comb(100,50)))+log_f(x)),
exp(log(101)+log(log(comb(100,50)))+log_f(x))
plot(density(x),col = "red")
curve(exp(log(log(comb(101,50)))+log_f(x)), 0, 1.2, add = T)
density(x)
normCons <- exp(log(factorial(n))-2*log(factorial((n-1)/2)))
plot(density(x),col = "red")
curve(exp(log(101)+log(log(comb(100,50))))*exp(log_f(x)), 0, 1.2, add = T)
density(x)
density(x)/exp(log(101)+log(log(comb(100,50))))
summary(x)
curve(exp(log(101)+log(log(comb(100,50))))*exp(log_f(x)), 0, 1.2)
plot(density(x),col = "red")
curve(exp(log(101)+log(log(comb(100,50))))*exp(log_f(x)), 0, 1.2)
plot(density(x),col = "red")
n =101
normCons <- exp(log(factorial(n))-2*log(factorial((n-1)/2)))
plot(density(x),col = "red")
n =101
normCons <- exp(log(factorial(n))-2*log(factorial((n-1)/2)))
curve(exp(normCons)*exp(log(101)+log(log(comb(100,50))))*exp(log_f(x)), 0, 1.2, add = T)
burn=1e3
n = 101
normCons <- exp(log(factorial(n))-2*log(factorial((n-1)/2)))
plot(density(x[(burn+1):length(x)]),
main="Density Comparions: Simulated to Exact")
curve(normCons*exp(logf(x)),0.2,1.4,add=TRUE,col="red")
curve(normCons*exp(log_f(x)),0.2,1.4,add=TRUE,col="red")
burn=0
n = 101
normCons <- exp(log(factorial(n))-2*log(factorial((n-1)/2)))
plot(density(x[(burn+1):length(x)]),
main="Density Comparions: Simulated to Exact")
curve(normCons*exp(log_f(x)),0.2,1.4,add=TRUE,col="red")
plot(density(x),col = "red")
n =101
normCons <- exp(log(factorial(n))-2*log(factorial((n-1)/2)))
curve(exp(normCons)*exp(log_f(x)), 0, 1.2, add = T)
plot(density(x),col = "red")
n =101
normCons <- exp(log(factorial(n))-2*log(factorial((n-1)/2)))
curve(exp(normCons)*exp(log_f(x)), 0.4, 1.2, add = T)
plot(density(x),
main="Density Comparions: Simulated to Exact")
norm = exp(log(factorial(n))-2*log(factorial((n-1)/2)))
curve(norm*exp(logf(x)),0.2,1.4,add=TRUE,col="red")
plot(density(x),
main="Simulated vs Exact")
norm = exp(log(factorial(n))-2*log(factorial((n-1)/2)))
curve(norm*exp(log_f(x)),0.4,1.2,add=T,col="red")
plot(density(x),
main="Simulated vs Exact")
norm = factorial(n))-2*log(factorial((n-1)/2))
plot(density(x),
main="Simulated vs Exact")
norm = factorial(101)/((factorial((n-1)/2))^2)
curve(norm*exp(log_f(x)),0.4,1.2,add=T,col="red")
plot(density(x),
main="Simulated vs Exact")
#Exact Samples
norm = factorial(101)/((factorial((101-1)/2))^2)
curve(norm*exp(log_f(x)),0.4,1.2,add=T,col="red")
setwd("~/Documents/Georgetown/NLP-LING-572-Fall-2016/")
cm = read.csv("confusion_matrix_POS.csv")
POS_cm = function(p1, p2){
cm_pi = cm[, c(p1,p2)]
print("Confusion Matrix")
print(table(cm_pi))
p_0 = sum(cm_pi[,p1]==cm_pi[,p2])/nrow(cm_pi)
p_e = sum(rowSums(table(cm_pi))/nrow(cm_pi)*(colSums(table(cm_pi))[-15])/nrow(cm_pi))
#P(A=X) =0 therefore P(A=X)*P(B=X) = 0
print("Cohen's Kappa")
return((p_0-p_e)/(1-p_e))
}
p1= "Mohammad"
p2= "Arif"
cm_pi = cm[, c(p1,p2)]
cm_pi$Mohammad
levels(cm_pi$Mohammad)
levels(cm_pi$Mohammad) = unique(levels(cm_pi$Mohammad), levels(cm_pi$Arif))
cm_pi$Mohammad
class(levels(cm_pi$Mohammad))
levels(cm_pi$Mohammad) = unique(c(levels(cm_pi$Mohammad), levels(cm_pi$Arif)))
class(levels(cm_pi$Mohammad))
cm_pi$Mohammad
table(cm_pi$Mohammad)
cm = read.csv("confusion_matrix_POS.csv")
POS_cm = function(p1, p2){
cm_pi = cm[, c(p1,p2)]
levels(cm_pi[,p1]) = unique(c(levels(cm_pi[,p1]), levels(cm_pi[,p2])))
levels(cm_pi[,p2]) = unique(c(levels(cm_pi[,p1]), levels(cm_pi[,p2])))
print("Confusion Matrix")
print(table(cm_pi))
p_0 = sum(as.character(cm_pi[,p1])==as.character(cm_pi[,p2]))/nrow(cm_pi)
p_e = sum(rowSums(table(cm_pi))/nrow(cm_pi)*(colSums(table(cm_pi))[-15])/nrow(cm_pi))
#P(A=X) =0 therefore P(A=X)*P(B=X) = 0
print("Agreement")
print(sum(as.character(cm_pi[,p1])==as.character(cm_pi[,p2])))
print("Cohen's Kappa")
return((p_0-p_e)/(1-p_e))
}
POS_cm("Mohammad","Arif")
POS_cm = function(p1, p2){
cm_pi = cm[, c(p1,p2)]
levels(cm_pi[,p1]) = unique(c(levels(cm_pi[,p1]), levels(cm_pi[,p2])))
levels(cm_pi[,p2]) = unique(c(levels(cm_pi[,p1]), levels(cm_pi[,p2])))
print("Confusion Matrix")
print(table(cm_pi))
p_0 = sum(as.character(cm_pi[,p1])==as.character(cm_pi[,p2]))/nrow(cm_pi)
p_e = sum(rowSums(table(cm_pi))/nrow(cm_pi)*(colSums(table(cm_pi)))/nrow(cm_pi))
#P(A=X) =0 therefore P(A=X)*P(B=X) = 0
print("Agreement")
print(sum(as.character(cm_pi[,p1])==as.character(cm_pi[,p2])))
print("Cohen's Kappa")
return((p_0-p_e)/(1-p_e))
}
POS_cm("Mohammad","Arif")
which(levels(cm_pi[,p2])) %in% levels(cm_pi[,p1])))
which(levels(cm_pi[,p2]) %in% levels(cm_pi[,p1]))
-which(levels(cm_pi[,p2]) %in% levels(cm_pi[,p1]))
levels(cm_pi[,p2])[-which(levels(cm_pi[,p2]) %in% levels(cm_pi[,p1]))]
cm_pi = cm[, c(p1,p2)]
cm_pi
cm_pi[,p1]
levels(cm_pi[,p2])[-which(levels(cm_pi[,p2]) %in% levels(cm_pi[,p1]))]
cm = read.csv("confusion_matrix_POS.csv")
POS_cm = function(p1, p2){
cm_pi = cm[, c(p1,p2)]
levels(cm_pi[,p1]) = c(levels(cm_pi[,p1]), levels(cm_pi[,p2])[-which(levels(cm_pi[,p2]) %in% levels(cm_pi[,p1]))])
levels(cm_pi[,p2]) = c(levels(cm_pi[,p2]), levels(cm_pi[,p1])[-which(levels(cm_pi[,p1]) %in% levels(cm_pi[,p2]))])
print("Confusion Matrix")
print(table(cm_pi))
p_0 = sum(as.character(cm_pi[,p1])==as.character(cm_pi[,p2]))/nrow(cm_pi)
p_e = sum(rowSums(table(cm_pi))/nrow(cm_pi)*(colSums(table(cm_pi)))/nrow(cm_pi))
#P(A=X) =0 therefore P(A=X)*P(B=X) = 0
print("Agreement")
print(sum(as.character(cm_pi[,p1])==as.character(cm_pi[,p2])))
print("Cohen's Kappa")
return((p_0-p_e)/(1-p_e))
}
POS_cm("Mohammad","Arif")
?order
table(cm)
table(cm_pi)
aa = table(cm_pi)
rownames(aa)
aa[order(rownames(aa)),]
aa = table(cm_pi)
aa[order(rownames(aa)),order(names(aa))]
aa[order(rownames(aa)),order(colnames(aa))]
e^(-2*5/12)
exp(-2*5/12)
exp(-3*5/12)
exp(-1*5/12)
0.6592406*0.2865048*0.4345982