Lecture 1 Some R Base Plotting

#devtools::install_github("heike/extracat")
library("ggthemes")
library("GGally")

## Loading required package: ggplot2

## Registered S3 method overwritten by 'GGally':
##   method from   
##   +.gg   ggplot2

library("extracat")
library(hdrcde)

## This is hdrcde 3.4

library(KernSmooth)

## KernSmooth 2.23 loaded
## Copyright M. P. Wand 1997-2009

library("ggplot2")
library("gridExtra")
library("vcd")

## Loading required package: grid

# The Titanic data revisited
Titanic1 <- data.frame(Titanic)
head(Titanic1)

##   Class    Sex   Age Survived Freq
## 1   1st   Male Child       No    0
## 2   2nd   Male Child       No    0
## 3   3rd   Male Child       No   35
## 4  Crew   Male Child       No    0
## 5   1st Female Child       No    0
## 6   2nd Female Child       No    0

str(Titanic1)

## 'data.frame':    32 obs. of  5 variables:
##  $ Class   : Factor w/ 4 levels "1st","2nd","3rd",..: 1 2 3 4 1 2 3 4 1 2 ...
##  $ Sex     : Factor w/ 2 levels "Male","Female": 1 1 1 1 2 2 2 2 1 1 ...
##  $ Age     : Factor w/ 2 levels "Child","Adult": 1 1 1 1 1 1 1 1 2 2 ...
##  $ Survived: Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 1 1 ...
##  $ Freq    : num  0 0 35 0 0 0 17 0 118 154 ...

p <- ggplot(Titanic1, aes(weight=Freq)) + ylab("") + ylim(0,2250)
cs <- p + aes(Class) + geom_bar(fill="blue")
sx <- p + aes(Sex) + geom_bar(fill="green")
ag <- p + aes(Age) + geom_bar(fill="tan2")
su <- p + aes(Survived) + geom_bar(fill="red")
grid.arrange(cs, sx, ag, su, nrow=1, widths=c(3, 2, 2, 2))

doubledecker(Survived ~ Sex, data = Titanic, gp = gpar(fill = c("grey90", "blue")))

doubledecker(Survived ~ Sex, data = Titanic, gp = gpar(fill = c("grey90", "blue")))

doubledecker(Survived ~ Class, data = Titanic, gp = gpar(fill = c("grey90", "blue")))

doubledecker(Survived ~ Sex + Class, data = Titanic, gp = gpar(fill = c("grey90", "blue")))

doubledecker(Survived ~ Class + Age, data = Titanic, gp = gpar(fill = c("grey90", "blue")))

doubledecker(Survived ~ Sex + Class + Age, data = Titanic, gp = gpar(fill = c("grey90", "blue")))

# Movies data
#install.packages("ggplot2movies")
library(ggplot2movies)
data(movies)
#?movies
ggplot(movies, aes(length)) + geom_bar() + ylab("") + xlab("Movie length (minutes)")

ggplot(movies, aes("var", length)) + geom_boxplot() + xlab("") +
       ylab("Movie length (minutes)")  + scale_x_discrete(breaks=NULL) + coord_flip()

ggplot(movies, aes(length)) + ylab("") +  xlim(0,180) +
       geom_histogram(binwidth=1)  +
       xlab("Movie length (minutes)")

## Warning: Removed 392 rows containing non-finite values (stat_bin).

## Warning: Removed 2 rows containing missing values (geom_bar).

ggplot(movies, aes(x = length)) +  xlim(0,240) +
       geom_histogram(binwidth=1)  +
       xlab("Movie length (minutes)") + ylab("")

## Warning: Removed 114 rows containing non-finite values (stat_bin).

## Warning: Removed 2 rows containing missing values (geom_bar).

ggplot(movies, aes(x = length)) +  xlim(0,240) +
       geom_histogram(aes(y=..density..),binwidth=1)  +
       xlab("Movie length (minutes)") + ylab("") +geom_density(colour="blue")

## Warning: Removed 114 rows containing non-finite values (stat_bin).

## Warning: Removed 114 rows containing non-finite values (stat_density).

## Warning: Removed 2 rows containing missing values (geom_bar).

ggplot(movies, aes(votes, rating,alpha=0.005)) + geom_point(size=0.25) + ylim(1,10) + ylab("Rating") + xlab("Votes")

foo<-which(movies$votes>10000)
length(foo)

## [1] 840

ggplot(movies[foo,], aes(votes, rating)) + geom_point() + ylim(1,10) + ylab("Rating") + xlab("Votes")

foo1<-which(movies$rating[foo]==min(movies$rating[foo]))
movies1<-movies[foo,]
movies1[foo1,]

## # A tibble: 1 x 24
##   title  year length   budget rating votes    r1    r2    r3    r4    r5    r6
##   <chr> <int>  <int>    <int>  <dbl> <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Gigli  2003    121 54000000    2.3 11285  74.5   4.5   4.5   4.5   4.5   4.5
## # ... with 12 more variables: r7 <dbl>, r8 <dbl>, r9 <dbl>, r10 <dbl>,
## #   mpaa <chr>, Action <int>, Animation <int>, Comedy <int>, Drama <int>,
## #   Documentary <int>, Romance <int>, Short <int>

foo2<-which(movies$rating[foo]==max(movies$rating[foo]))
movies2<-movies[foo,]
movies2[foo2,]

## # A tibble: 2 x 24
##   title     year length budget rating  votes    r1    r2    r3    r4    r5    r6
##   <chr>    <int>  <int>  <int>  <dbl>  <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Godfath~  1972    175  6  e6    9.1 122755   4.5   4.5   4.5   4.5   4.5   4.5
## 2 Shawsha~  1994    142  2.5e7    9.1 149494   4.5   4.5   4.5   4.5   4.5   4.5
## # ... with 12 more variables: r7 <dbl>, r8 <dbl>, r9 <dbl>, r10 <dbl>,
## #   mpaa <chr>, Action <int>, Animation <int>, Comedy <int>, Drama <int>,
## #   Documentary <int>, Romance <int>, Short <int>

temp<-sort(movies2$rating,decreasing=TRUE,index.return=TRUE)
temp$ix

##   [1] 331 673 452 332 675 144 153 451 453 600 654 723 726   2 147 168 170 239
##  [19] 491 557 604 610 795 262 341 401 431 541 566 599 665 738  30  35  54 122
##  [37] 128 236 286 460 483 506 583 683 686 745 757 765 799  37  53 137 164 347
##  [55] 423 468 473 495 511 515 545 554 603 607 618 661 701 732 772 779 805   5
##  [73]  27  29  33  50 132 175 234 290 348 370 392 416 417 499 507 543 611 619
##  [91] 647 677 685 736 828  44 111 145 190 251 277 282 296 308 345 406 448 474
## [109] 555 569 580 598 663 689 748 822 829  21  68  93 100 166 189 208 215 291
## [127] 314 329 338 349 354 394 442 590 727 734 774 780 793 835   6  31  32  76
## [145]  92 101 133 222 229 231 247 312 327 352 396 458 465 498 510 527 585 679
## [163] 681 695 707 714 759 778 813 837  19  83  91 119 173 227 269 273 365 400
## [181] 420 429 447 497 519 588 605 622 636 650 672 697 712 750 766 773 785 794
## [199] 834   7  23  86  87 107 154 199 200 216 217 245 276 301 321 328 336 340
## [217] 362 391 403 445 461 464 500 525 537 548 559 612 616 630 680 699 703 756
## [235] 776 812  24  60 104 113 115 116 121 130 135 136 176 179 205 212 249 281
## [253] 284 294 307 356 357 359 367 459 469 513 529 551 552 591 625 644 666 783
## [271] 784 818  25  36 108 120 129 134 146 151 156 161 180 201 203 210 211 226
## [289] 233 253 259 265 311 315 369 377 381 476 536 539 606 638 649 676 684 704
## [307] 722 741 817 826 827 830  15  49  55  65  77  95 124 131 162 169 283 319
## [325] 320 386 405 430 434 437 479 501 517 549 579 582 592 597 601 602 648 662
## [343] 669 769  16  58  66  67  79 143 163 178 192 198 206 250 266 268 295 299
## [361] 302 303 330 351 366 379 389 411 463 471 480 508 528 564 584 595 617 623
## [379] 653 655 693 711 758 767 820   8  14  94 125 126 155 165 188 209 230 255
## [397] 261 274 289 306 313 333 343 361 382 385 387 390 397 409 444 446 455 472
## [415] 487 509 560 581 587 594 614 717 746 753 761 775 777 833  74  98 152 196
## [433] 238 246 280 298 309 310 342 363 395 398 413 518 524 530 533 575 577 637
## [451] 754 755 771 802 804  56  89  96 114 149 150 185 194 240 256 257 271 278
## [469] 285 339 371 443 467 481 538 558 628 640 682 708 715 739 744 747 781 792
## [487] 810  43  51  62  69 123 138 204 220 224 252 264 270 317 326 384 424 486
## [505] 490 544 562 563 629 635 658 691 698 713 725 733 743 751 782 803  40 402
## [523] 407 433 438 523 694 716 735 749 763 764 797 806   9  11  38  41  59 109
## [541] 172 219 293 304 305 318 323 325 335 346 350 360 415 462 477 492 567 568
## [559] 571 631 656 702 809 811 825   1  70  75  78 102 140 177 191 223 235 368
## [577] 436 454 484 553 570 586 633 642 671 692 706 729 760 832 839  46  71 103
## [595] 110 158 181 228 243 287 292 399 414 421 488 504 531 532 546 596 608 613
## [613] 667 737 752 801 815 821 823  47  63  82 106 184 197 275 288 422 441 449
## [631] 489 512 521 593 609 627 643 690 720 740 770 789 790 819  20  52  72 127
## [649] 142 186 187 195 221 373 425 428 435 478 516 522 540 561 705 724 728 768
## [667] 816  22  39  42 183 207 244 439 440 482 574 578 624 664 700 719 730 814
## [685] 831  17  61 118 167 171 232 248 358 376 388 494 502 576 620 641 718 742
## [703] 836   3   4  73  90 105 112 139 157 218 258 272 300 316 380 393 475 520
## [721] 565 572 670 791 838  28  48 174 297 410 419 470 485 496 550 556 668 731
## [739]  10  12  88  99 214 225 237 353 645 659 687 786 787  34  85  97 159 260
## [757] 267 344 355 375 418 526 547 615 634 721 788 800  57 141 182 202 263 279
## [775] 322 337 378 404 505 535 626 646 696 412 493 534 589 621 688 798  26 148
## [793] 213 450 456 514 651 674 762 807 840  13  81 193 408 427 432 573 639 660
## [811] 796 808 117 254 372 383 632 657 710 241 426 542 160 374  18 503 457 242
## [829] 364 466 334 652  45 824 678  80  64 709  84 324

movies2[temp$ix,]

## # A tibble: 840 x 24
##    title    year length budget rating  votes    r1    r2    r3    r4    r5    r6
##    <chr>   <int>  <int>  <int>  <dbl>  <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
##  1 Godfat~  1972    175  6  e6    9.1 122755   4.5   4.5   4.5   4.5   4.5   4.5
##  2 Shawsh~  1994    142  2.5e7    9.1 149494   4.5   4.5   4.5   4.5   4.5   4.5
##  3 Lord o~  2003    251  9.4e7    9   103631   4.5   4.5   4.5   4.5   4.5   4.5
##  4 Godfat~  1974    200  1.3e7    8.9  71363   4.5   4.5   4.5   4.5   4.5   4.5
##  5 Shichi~  1954    206  5  e5    8.9  32141   4.5   4.5   4.5   4.5   4.5   4.5
##  6 Buono,~  1966    180  1.2e6    8.8  30224   4.5   4.5   4.5   4.5   4.5   4.5
##  7 Casabl~  1942    102  9.5e5    8.8  66030   4.5   4.5   4.5   4.5   4.5   4.5
##  8 Lord o~  2001    208  9.3e7    8.8 157608   4.5   4.5   4.5   4.5   4.5   4.5
##  9 Lord o~  2002    223  9.4e7    8.8 114797   4.5   4.5   4.5   4.5   4.5   4.5
## 10 Pulp F~  1994    168  8  e6    8.8 132745   4.5   4.5   4.5   4.5   4.5   4.5
## # ... with 830 more rows, and 12 more variables: r7 <dbl>, r8 <dbl>, r9 <dbl>,
## #   r10 <dbl>, mpaa <chr>, Action <int>, Animation <int>, Comedy <int>,
## #   Drama <int>, Documentary <int>, Romance <int>, Short <int>

movies2[temp$ix,1]

## # A tibble: 840 x 1
##    title                                             
##    <chr>                                             
##  1 Godfather, The                                    
##  2 Shawshank Redemption, The                         
##  3 Lord of the Rings: The Return of the King, The    
##  4 Godfather: Part II, The                           
##  5 Shichinin no samurai                              
##  6 Buono, il brutto, il cattivo, Il                  
##  7 Casablanca                                        
##  8 Lord of the Rings: The Fellowship of the Ring, The
##  9 Lord of the Rings: The Two Towers, The            
## 10 Pulp Fiction                                      
## # ... with 830 more rows

summary(movies$year)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    1893    1958    1983    1976    1997    2005

ggplot(movies, aes(length, rating)) + geom_point() + ylim(1,10) + ylab("Rating") + xlab("Length (minutes)")+  xlim(60,240)

## Warning: Removed 10930 rows containing missing values (geom_point).

# Berkeley data
ucba <- as.data.frame(UCBAdmissions)
a <- ggplot(ucba, aes(Dept)) + geom_bar(aes(weight=Freq))
b <- ggplot(ucba, aes(Gender)) + geom_bar(aes(weight=Freq))
c <- ggplot(ucba, aes(Admit)) + geom_bar(aes(weight=Freq))
grid.arrange(a, b, c, nrow=1, widths=c(7,3,3))

ucba <- within(ucba, Accept <- factor(Admit, levels=c("Rejected", "Admitted")))
doubledecker(xtabs(Freq~ Dept + Gender + Accept, data = ucba), gp = gpar(fill = c("grey90", "steelblue")))

#Geyser Data
library(hdrcde)
data(geyser, package="MASS")
ggplot(geyser, aes(duration, waiting)) + geom_point() + ylab("Waiting time (minutes)") + xlab("Duration (minutes)")

ggplot(geyser, aes(duration, waiting)) + geom_point() + geom_density2d() + ylab("Waiting time (minutes)") + xlab("Duration (minutes)")

data(father.son, package="UsingR")
c2 <- ggplot(father.son, aes(sheight)) + 
             geom_histogram(aes(y = ..density..), binwidth=1) +
             geom_density(colour="blue") + xlim(58, 80) + ylim(0, 0.16) +
             xlab("Height (inches)") + ylab("") + ggtitle("Sons")
p2 <- ggplot(father.son, aes(fheight)) + 
             geom_histogram(aes(y = ..density..), binwidth=1) +
             geom_density(colour="blue") + xlim(58, 80) + ylim(0, 0.16) +
             xlab("Height (inches)") + ylab("") +
             ggtitle("Fathers")
grid.arrange(c2, p2, nrow = 1)

## Warning: Removed 2 rows containing missing values (geom_bar).

## Warning: Removed 2 rows containing missing values (geom_bar).

qqnorm(father.son$sheight, main="Sons", xlab="",ylab="", pch=16, ylim=c(55,80))
qqline(father.son$sheight)

qqnorm(father.son$fheight, main="Fathers", xlab="",ylab="", pch=16, ylim=c(55,80))
qqline(father.son$fheight)

ggplot(father.son, aes(fheight, sheight)) + geom_point() +
       geom_smooth(method="lm", colour="red", se=FALSE) +
       geom_abline(slope=1, intercept=0) +xlab("Father's height (inches)")+ylab("Son's height (inches)")

## `geom_smooth()` using formula 'y ~ x'

ggplot(father.son, aes(fheight, sheight)) + geom_point() +
       geom_smooth(method="lm", colour="red", se=TRUE) +
       geom_abline(slope=1, intercept=0) +xlab("Father's height (inches)")+ylab("Son's height (inches)")

## `geom_smooth()` using formula 'y ~ x'

ggplot(father.son, aes(fheight, sheight)) + geom_point() +
       geom_smooth(method="lm", colour="red", se=FALSE) +
       stat_smooth() +xlab("Father's height (inches)")+ylab("Son's height (inches)")

## `geom_smooth()` using formula 'y ~ x'

## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'

# Iris data
data(iris)
ggpairs(iris, aes(colour=Species, alpha=0.4))

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggparcoord(iris, columns=1:4, groupColumn="Species")

a <- ggplot(iris, aes("Boxplot for all", Sepal.Width)) +
            xlab("")  + geom_boxplot() +
            scale_x_discrete(breaks=NULL) 
b <- ggplot(iris, aes(Species, Sepal.Width)) + 
            geom_boxplot() +  xlab("")
grid.arrange(a, b, nrow=1, widths=c(1,2))

a <- ggplot(iris, aes("Boxplot for all", Petal.Width)) +
            xlab("")  + geom_boxplot() +
            scale_x_discrete(breaks=NULL) 
b <- ggplot(iris, aes(Species, Petal.Width)) + 
            geom_boxplot() +  xlab("")
grid.arrange(a, b, nrow=1, widths=c(1,2))

# Body data
data(body, package="gclus")
body1 <- body
names(body1) <- abbreviate(names(body), 2)
names(body1)[c(4:5, 11:13, 19:21)] <-  
       c("CDp", "CD", "Ch", "Ws", "Ab", "Cl", "An", "Wr")
a1 <- ggparcoord(body1, columns=1:24, alphaLines=0.1,groupColumn="Gn") + xlab("") + ylab("")
a2 <- ggparcoord(body1, columns=1:24, scale="uniminmax", alphaLines=0.1) + xlab("") + ylab("")
a3 <- ggparcoord(body1, columns=1:24, scale="globalminmax", alphaLines=0.1) + xlab("") + ylab("")
a4 <- ggparcoord(body1, columns=1:24, scale="center",scaleSummary="median", alphaLines=0.1) +xlab("") + ylab("")
grid.arrange(a1, a2, a3, a4)

a1 <- ggparcoord(body1, columns=1:24, alphaLines=0.1,groupColumn="Gn") + xlab("") + ylab("")
a1

body1$Gn<-as.factor(body1$Gn)
a1 <- ggparcoord(body1, columns=1:24, alphaLines=0.3,groupColumn="Gn") +xlab("") + ylab("")
a1

a1 <- ggparcoord(body1, columns=1:24, alphaLines=0.1,groupColumn="Gn") + xlab("") + ylab("")
a2 <- ggparcoord(body1, columns=1:24, scale="uniminmax",groupColumn="Gn",alphaLines=0.1) + xlab("") + ylab("")
a3 <- ggparcoord(body1, columns=1:24,scale="globalminmax", alphaLines=0.1,groupColumn="Gn") + xlab("") + ylab("")
a4 <- ggparcoord(body1, columns=1:24, scale="center",scaleSummary="median", alphaLines=0.1,groupColumn="Gn") +xlab("") + ylab("")
grid.arrange(a1, a2, a3, a4)

# Coffee data
data(coffee, package="pgmm")
coffee <- within(coffee, Type <- ifelse(Variety==1,"Arabica", "Robusta"))
names(coffee) <- abbreviate(names(coffee), 8)
ggpairs(coffee[,-c(1,2)], aes(colour=Type, alpha=0.4))

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

names(coffee)[6]<-"Ph"
ggpairs(coffee[,-c(1,2)], aes(colour=Type, alpha=0.4))

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

a <- ggplot(coffee, aes(x=Type)) + geom_bar(aes(fill=Type)) +
            scale_fill_manual(values = c("grey70", "red")) +
            guides(fill=FALSE) + ylab("")

## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.

b <- ggplot(coffee, aes(x=Fat, y=Caffine, colour=Type)) +
            geom_point(size=2) +
            scale_colour_manual(values = c("grey70", "red"))
c <- ggparcoord(coffee[order(coffee$Type),], columns=3:14,
                groupColumn="Type", scale="uniminmax") +
                xlab("") +  ylab("") +
                theme(legend.position = "none") +
                scale_colour_manual(values = c("grey","red")) +
                theme(axis.ticks.y = element_blank(),
                axis.text.y = element_blank())
grid.arrange(arrangeGrob(a, b, ncol=2, widths=c(1,2)), c, nrow=2)

# Housing data
library(vcd)    
data(housing, package="MASS")
?housing

## starting httpd help server ...

##  done

mosaic(xtabs(Freq ~ Cont + Type + Infl + Sat, data = housing),
       direction = c("h", "v", "v", "h"), 
       gp = gpar(fill = c("grey", "grey","blue")),
       spacing = spacing_highlighting)

par(mar=c(5, 4, 4, 2) + 0.1)
rmb(formula = ~Type+Cont+Infl+Sat, data = housing, cat.ord = 3,
    spine = TRUE, freq.trans = "const")

# Crime data
data(crime.us, package="VGAMdata")
crime1<-crime.us
names(crime1)<-gsub("*Rate","",names(crime1))
names(crime1)[19:20]<-c("Larceny","MVTheft")
ggpairs(crime1[,c(13:16,18:20)],diag=list(continuous="densityDiag"),axisLabels="none")

# Boston data
data(Boston, package="MASS")
par(mar=c(1.1, 1.1, 1.1, 1.1))
palette(rainbow(14, s = 0.6, v = 0.75))
stars(Boston[1:12,], labels=NULL, draw.segments = TRUE)

stars(Boston, labels=NULL, draw.segments = TRUE)

par(mar=c(5, 4, 4, 2) + 0.1)