#Transforming Data ####################### #Skewed Data (One Variable at a Time) library(car) attach(Prestige) plot(density(income)) #positive skew, now what? #descend ladder of powers (square root or log) #How does log work ?log x <- c(1,2,4,9,100) log(x) log(x, 2.718) #natural log, e=2.718 log10(x) log(x, 10) #common log # the bigger the base-farther you descend the ladder of powers sqrt(x) x^(.5) # these two are both square roots x^(1/3) #cube root #log to "fix" positive skew income2 <- log(income) plot(density(income2)) #powers to "fix" negative skew income3 <- sqrt(income) plot(density(income3)) #Ascend ladder of powers (x squared, cubed, etc) income4 <- income3^2 plot(density(income4)) #not negative, not positive, What to pick? #Box.Cox summary(powerTransform(income)) #power=1-leave alone, power=0-log #transformation income5 <- income^.1793 plot(density(income5)) #looks nice, no #Use qq.plot to check qqPlot(income) qqPlot(income5) ################# #Linearity mod1 <- lm(prestige ~ income) summary(mod1) scatterplot(income, prestige) mod2 <- lm(prestige ~ log(income) ) scatterplot(log(income), prestige) summary(mod2) summary(powerTransform(mod1)) income2 <- income^.7584 mod3 <- lm(prestige ~ income2) summary(mod3)