THE BENEFIT

the

library(tidyverse)

## -- Attaching packages ------------------------------------------- tidyverse 1.3.0 --

##  ggplot2 3.2.1      purrr   0.3.3
##  tibble  2.1.3      dplyr   0.8.3
##  tidyr   1.0.0      stringr 1.4.0
##  readr   1.3.1      forcats 0.4.0

## -- Conflicts ---------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()

system.time(reads2019 <- read_csv("~/Downloads/Blogging A to Z/SaraReads2019_allchanges.csv",
                      col_names = TRUE))

## Parsed with column specification:
## cols(
##   Title = col_character(),
##   Pages = col_double(),
##   date_started = col_character(),
##   date_read = col_character(),
##   Book.ID = col_double(),
##   Author = col_character(),
##   AdditionalAuthors = col_character(),
##   AverageRating = col_double(),
##   OriginalPublicationYear = col_double(),
##   read_time = col_double(),
##   MyRating = col_double(),
##   Gender = col_double(),
##   Fiction = col_double(),
##   Childrens = col_double(),
##   Fantasy = col_double(),
##   SciFi = col_double(),
##   Mystery = col_double(),
##   SelfHelp = col_double()
## )

##    user  system elapsed 
##    0.00    0.10    0.14

rm(reads2019)

library(data.table)

## 
## Attaching package: 'data.table'

## The following objects are masked from 'package:dplyr':
## 
##     between, first, last

## The following object is masked from 'package:purrr':
## 
##     transpose

system.time(reads2019 <- fread("~/Downloads/Blogging A to Z/SaraReads2019_allchanges.csv"))

##    user  system elapsed 
##       0       0       0

library(wakefield)

## Warning: package 'wakefield' was built under R version 3.6.3

## 
## Attaching package: 'wakefield'

## The following objects are masked from 'package:data.table':
## 
##     hour, minute, month, second, year

## The following object is masked from 'package:dplyr':
## 
##     id

set.seed(42)

reallybigshew <- r_data_frame(n = 10000000,
                              id,
                              race,
                              age,
                              smokes,
                              marital,
                              Start = hour,
                              End = hour,
                              iq,
                              height,
                              died)


system.time(write_csv(reallybigshew, "~/Downloads/Blogging A to Z/bigdata1.csv"))

##    user  system elapsed 
##  134.22    2.52  137.80

system.time(fwrite(reallybigshew, "~/Downloads/Blogging A to Z/bigdata2.csv"))

##    user  system elapsed 
##    8.65    0.32    2.77

# Reading from SAS and SPSS  install.packages("Hmisc", dependencies = TRUE)  # Reading from Stata, Systat and Weka  install.packages("foreign", dependencies = TRUE)  # Reading from KNIME  install.packages(c("protr","foreign"), dependencies = TRUE)  # Reading from EXCEL  install.packages(c("readxl","xlsx"), dependencies = TRUE)  # Reading from TXT, CSV  install.packages(c("csv","readr","tidyverse"), dependencies = TRUE)  # Reading from JSON  install.packages(c("jsonLite","rjson","RJSONIO","jsonvalidate"), dependencies = TRUE)  # Reading from AVRO  install.packages("sparkavro", dependencies = TRUE)  # Reading from Parquet file  install.packages("arrow", dependencies = TRUE)  devtools::install_github("apache/arrow/r")  # Reading from XML  install.packages("XML", dependencies = TRUE)
install.packages(c("odbc", "RODBC"), dependencies = TRUE)
#Microsoft MSSQL Server  install.packages(c("mssqlR", "RODBC"), dependencies = TRUE)  #MySQL   install.packages(c("RMySQL","dbConnect"), dependencies = TRUE)  #PostgreSQL  install.packages(c("postGIStools","RPostgreSQL"), dependencies = TRUE)  #Oracle  install.packages(c("ODBC"), dependencies = TRUE)  #Amazon  install.packages(c("RRedshiftSQL"), dependencies = TRUE)  #SQL Lite  install.packages(c("RSQLite","sqliter","dbflobr"), dependencies = TRUE)  #General SQL packages  install.packages(c("RSQL","sqldf","poplite","queryparser"), dependencies = TRUE)
install.packages(c("janitor","outliers","missForest","frequency","Amelia",                     "diffobj","mice","VIM","Bioconductor","mi",                      "wrangle"), dependencies = TRUE)
install.packages(c("stringr","lubridate","glue",                     "scales","hablar","readr"), dependencies = TRUE)
install.packages(c("dplyr","tidyverse","purr","magrittr",                     "data.table","plyr","tidyr","tibble",                     "reshape2"), dependencies = TRUE)
install.packages(c("stats","ggpubr","lme4","MASS","car"),                      dependencies = TRUE)
install.packages(c("sampling","icarus","sampler","SamplingStrata",                      "survey","laeken","stratification","simPop"),                        dependencies = TRUE)
install.packages(c("stats","Lars","caret","survival","gam","glmnet",                    "quantreg","sgd","BLR","MASS","car","mlogit","earth",                    "faraway","nortest","lmtest","nlme","splines",                    "sem","WLS","OLS","pls","2SLS","3SLS","tree","rpart"),   dependencies = TRUE)
install.packages(c("caret","rio","car","MASS","FuzzyNumbers",                     "stats","ez"), dependencies = TRUE)
install.packages(c("psych","CCA","CCP","MASS","icapca","gvlma","smacof",                   "MVN","rpca","gpca","EFA.MRFA","MFAg","MVar","fabMix",                   "fad","spBFA","cate","mnlfa","CSFA","GFA","lmds","SPCALDA",                   "semds", "superMDS", "vcd", "vcdExtra"),    dependencies = TRUE)
install.packages(c("fpc","cluster","treeClust","e1071","NbClust","skmeans",                  "kml","compHclust","protoclust","pvclust","genie", "tclust",                  "ClusterR","dbscan","CEC","GMCM","EMCluster","randomLCA",                  "MOCCA","factoextra",poLCA), dependencies = TRUE)
install.packages("tree", "e1071")
install.packages(c("ts","zoo","xts","timeSeries","tsModel", "TSMining",                "TSA","fma","fpp2","fpp3","tsfa","TSdist","TSclust","feasts",                "MTS", "dse","sazedR","kza","fable","forecast","tseries",                "nnfor","quantmod"), dependencies = TRUE)
install.packages(c("fastnet","tsna","sna","networkR","InteractiveIGraph",                   "SemNeT","igraph","NetworkToolbox","dyads",                     "staTools","CINNA"), dependencies = TRUE)
install.packages(c("tm","tau","koRpus","lexicon","sylly","textir",           "textmineR","MediaNews", "lsa","SemNeT","ngram","ngramrr",           "corpustools","udpipe","textstem", "tidytext","text2vec"),             dependencies = TRUE)
install.packages(c("tree", "e1071","crossval","caret","rpart","bcv",                    "klaR","EnsembleCV","gencve","cvAUC","CVThresh",                    "cvTools","dcv","cvms","blockCV"), dependencies = TRUE)
install.packages(c("randomForest","grf","ipred","party","randomForestSRC",                    "grf","BART","Boruta","LTRCtrees","REEMtree","refr",                    "binomialRF","superml"), dependencies = TRUE)
install.packages(c("earth", "gbm","GAMBoost", "GMMBoost", "bst","superml",                     "sboost"), dependencies = TRUE)
install.packages(c("rpart", "tree", "C50", "RWeka","klar", "e1071",                     "kernlab","svmpath","superml","sboost"),   dependencies = TRUE)
install.packages(c("nnet","gnn","rnn","spnn","brnn","RSNNS","AMORE",                     "simpleNeural","ANN2","yap","yager","deep","neuralnet",                     "nnfor","TeachNet"), dependencies = TRUE)
install.packages(c("deepnet","RcppDL","tensorflow","h2o","kerasR",                     "deepNN", "Buddle","automl"), dependencies = TRUE)
devtools::install_github("nproellochs/ReinforcementLearning")  install.packages(c("RLT","ReinforcementLearning","MDPtoolbox"),   dependencies = TRUE)
install.packages(c("lime","localModel","iml","EIX","flashlight",                      "interpret","outliertree","breakDown"),   dependencies = TRUE)
install.packages(c("ggvis","htmlwidgets","maps","sunburstR", "lattice",    "predict3d","rgl","rglwidget","plot3Drgl","ggmap","ggplot2","plotly",    "RColorBrewer","dygraphs","canvasXpress","qgraph","moveVis","ggcharts",    "igraph","visNetwork","visreg", "VIM", "sjPlot", "plotKML", "squash",    "statVisual", "mlr3viz", "klaR","DiagrammeR","pavo","rasterVis",    "timelineR","DataViz","d3r","d3heatmap","dashboard" "highcharter",    "rbokeh"), dependencies = TRUE)
install.packages(c("rvest","Rcrawler","ralger","scrapeR"),                dependencies = TRUE)
install.packages(c("devtools","usethis","roxygen2","knitr",                      "rmarkdown","flexdashboard","Shiny",                      "xtable","httr","profvis"), dependencies = TRUE)
install.packages(c("Hmisc","foreign","protr","readxl","xlsx",                   "csv","readr","tidyverse","jsonLite","rjson",                   "RJSONIO","jsonvalidate","sparkavro","arrow","feather",                   "XML","odbc","RODBC","mssqlR","RMySQL",                   "dbConnect","postGIStools","RPostgreSQL","ODBC",                   "RSQLite","sqliter","dbflobr","RSQL","sqldf",                   "poplite","queryparser","influxdbr","janitor","outliers",                   "missForest","frequency","Amelia","diffobj","mice",                   "VIM","Bioconductor","mi","wrangle","mitools",                   "stringr","lubridate","glue","scales","hablar",                   "dplyr","purr","magrittr","data.table","plyr",                   "tidyr","tibble","reshape2","stats","Lars",                   "caret","survival","gam","glmnet","quantreg",                   "sgd","BLR","MASS","car","mlogit","RRedshiftSQL",                   "earth","faraway","nortest","lmtest","nlme",                   "splines","sem","WLS","OLS","pls",                   "2SLS","3SLS","tree","rpart","rio",                   "FuzzyNumbers","ez","psych","CCA","CCP",                   "icapca","gvlma","smacof","MVN","rpca",                   "gpca","EFA.MRFA","MFAg","MVar","fabMix",                   "fad","spBFA","cate","mnlfa","CSFA",                   "GFA","lmds","SPCALDA","semds","superMDS",                   "vcd","vcdExtra","ks","rrcov","eRm",                   "MNP","bayesm","ltm","fpc","cluster",                   "treeClust","e1071","NbClust","skmeans","kml",                   "compHclust","protoclust","pvclust","genie","tclust",                   "ClusterR","dbscan","CEC","GMCM","EMCluster",                   "randomLCA","MOCCA","factoextra","poLCA","ts",                   "zoo","xts","timeSeries","tsModel","TSMining",                   "TSA","fma","fpp2","fpp3","tsfa",                   "TSdist","TSclust","feasts","MTS","dse",                   "sazedR","kza","fable","forecast","tseries",                   "nnfor","quantmod","fastnet","tsna","sna",                   "networkR","InteractiveIGraph","SemNeT","igraph",                   "dyads","staTools","CINNA","tm","tau","NetworkToolbox"                   "koRpus","lexicon","sylly","textir","textmineR",                   "MediaNews","lsa","ngram","ngramrr","corpustools",                   "udpipe","textstem","tidytext","text2vec","crossval",                   "bcv","klaR","EnsembleCV","gencve","cvAUC",                   "CVThresh","cvTools","dcv","cvms","blockCV",                   "randomForest","grf","ipred","party","randomForestSRC",                   "BART","Boruta","LTRCtrees","REEMtree","refr",                   "binomialRF","superml","gbm","GAMBoost","GMMBoost",                   "bst","sboost","C50","RWeka","klar",                   "kernlab","svmpath","nnet","gnn","rnn",                   "spnn","brnn","RSNNS","AMORE","simpleNeural",                   "ANN2","yap","yager","deep","neuralnet",                   "TeachNet","deepnet","RcppDL","tensorflow","h2o",                   "kerasR","deepNN","Buddle","automl","RLT",                   "ReinforcementLearning","MDPtoolbox","lime","localModel",                   "iml","EIX","flashlight","interpret","outliertree",                   "dockerfiler","azuremlsdk","sparklyr","cloudml","ggvis",                   "htmlwidgets","maps","sunburstR","lattice","predict3d",                   "rgl","rglwidget","plot3Drgl","ggmap","ggplot2",                   "plotly","RColorBrewer","dygraphs","canvasXpress","qgraph",                   "moveVis","ggcharts","visNetwork","visreg","sjPlot",                   "plotKML","squash","statVisual","mlr3viz","DiagrammeR",                   "pavo","rasterVis","timelineR","DataViz","d3r","breakDown",                   "d3heatmap","dashboard","highcharter","rbokeh","rvest",                   "Rcrawler","ralger","scrapeR","devtools","usethis",                   "roxygen2","knitr","rmarkdown","flexdashboard","Shiny",                   "xtable","httr","profvis"), dependencies = TRUE)
old_packages <- installed.packages(lib.loc = "/home/johannes/R/x86_64-pc-linux-gnu-library/3.6/")  head(old_packages[, 1])
##       abind     acepack        ade4         AER   animation   anomalize   ##     "abind"   "acepack"      "ade4"       "AER" "animation" "anomalize"
new_packages <- installed.packages()  missing_df <- as.data.frame(old_packages[    !old_packages[, "Package"] %in% new_packages[, "Package"],     ])
install.packages(missing_df$Package)
missing_df <- as.data.frame(old_packages[    !old_packages[, 1] %in% installed.packages()[, 1],     ])
library(dplyr, warn.conflicts = FALSE)  on_gh <- function(pkg) {    repo = jsonlite::fromJSON(paste0("http://rpkg-api.gepuro.net/rpkg?q=", pkg))    repo[basename(repo$pkg_name) == pkg,]  }  gh_pkgs <- lapply(c("quanteda.classifiers", "emo"), on_gh) %>%     bind_rows()  as_tibble(gh_pkgs)
## # A tibble: 2 x 3  ##   pkg_name            title                           url                         ##                                                                    ## 1 quanteda/quanteda.… quanteda textmodel extensions … https://github.com/quante…  ## 2 hadley/emo          Easily insert emoji into R and… https://github.com/hadley…
require(tidyverse)    allmypackages <- as.data.frame(installed.packages())    allmypackages <- allmypackages %>%    filter(Priority != "base" | is.na(Priority)) %>%    select(-c(Enhances:MD5sum, LinkingTo:Suggests)) %>%    droplevels()    str(allmypackages)
package_source <- function(pkg){    x <- as.character(packageDescription(pkg)$Repository)    if (length(x) == 0) {      y <- as.character(packageDescription(pkg)$GithubRepo)      z <- as.character(packageDescription(pkg)$GithubUsername)      if (length(y) == 0) {        return("Other")      } else {        return(str_c("GitHub repo = ",                     z,                     "/",                     y))      }    } else {      return(x)    }  }    # show the first 60 as an example  head(sapply(allmypackages$Package,              package_source),       60)
allmypackages$whereat <- sapply(allmypackages$Package,                                  package_source)  str(allmypackages)    table(allmypackages$whereat)  allmypackages %>%    filter(whereat == "Other") %>%    select(Package, Version)
write.csv(allmypackages, "mypackagelistApril2020.csv")
# post upgrade with output surpessed  install.packages("tidyverse")  library(tidyverse)
oldpackages <- read.csv("mypackagelistApril2020.csv")  allmypackages <- as.data.frame(installed.packages())  allmypackages <- allmypackages %>%    filter(Priority != "base" | is.na(Priority)) %>%    select(-c(Enhances:MD5sum, LinkingTo:Suggests))  thediff <- anti_join(oldpackages,                       allmypackages,                        by = "Package")    thediff <- droplevels(thediff)  thediff %>%    filter(whereat == "CRAN") %>%    pull(Package) %>%    as.character
thediff %>%    filter(whereat == "CRAN") %>%    pull(Package) %>%    as.character %>%    install.packages
# Manual peek  thediff %>%    filter(str_detect(whereat, "GitHub repo")) %>%    select(Package, Version, NeedsCompilation, whereat)    # if you want to automate  thediff %>%    filter(str_detect(whereat, "GitHub repo")) %>%    pull(whereat) %>%    as.character %>%    str_remove("GitHub repo = ") %>%    devtools::install_github()
allmypackages %>%    filter(str_detect(whereat, "R-Forge")) %>%    select(Package, Version, NeedsCompilation, whereat)    install.packages("CHAID", repos="http://R-Forge.R-project.org")
library(Rcrawler)    install_browser() # One time only    br <- run_browser()  page<-LinkExtractor(url="https://towardsdatascience.com/springer-has-released-65-machine-learning-and-data-books-for-free-961f8181f189",                      Browser = br, ExternalLInks = TRUE)      el <- page$ExternalLinks  sprlnks <- el[grep("springer", el, fixed = TRUE)]    for (sprlnk in sprlnks) {    spr_page <- LinkExtractor(sprlnk)    il <- spr_page$InternalLinks    ttl <- spr_page$Info$Title    ttl <- trimws(strsplit(ttl, "|", fixed = TRUE)[[1]][1])    chapter_link <- il[grep("chapter", il, fixed = TRUE)][1]    chp_splits <- strsplit(chapter_link, "/", fixed = TRUE)    n <- length(chp_splits[[1]])    suff <- chp_splits[[1]][n]    suff <- gsub(".{2}$", "", suff)    pref <- chp_splits[[1]][n-1]    final_url <- paste0("https://link.springer.com/content/pdf/", pref, "/",                        suff, ".pdf")    print(final_url)    download.file(final_url, paste0(ttl, ".pdf"), mode = "wb")    Sys.sleep(5)  }    stop_browser(br)
# install.packages("devtools")  devtools::install_github("renanxcortes/springerQuarantineBooksR")  library(springerQuarantineBooksR)
setwd("path_of_your_choice") # where you want to save the books  download_springer_book_files(parallel = TRUE)
springer_table <- download_springer_table()
# install.packages("DT")  library(DT)    springer_table$open_url <- paste0(    'SpringerLink' # closing HTML tag  )    datatable(springer_table,    rownames = FALSE, # remove row numbers    filter = "top", # add filter on top of columns    extensions = "Buttons", # add download buttons    options = list(      autoWidth = TRUE,      dom = "Blfrtip", # location of the download buttons      buttons = c("copy", "csv", "excel", "pdf", "print"), # download buttons      pageLength = 5, # show first 5 entries, default is 10      order = list(0, "asc") # order the title column by ascending order    ),    escape = FALSE # make URLs clickable  )
download_springer_book_files(springer_books_titles = "All of Statistics")
springer_table <- download_springer_table()    library(dplyr)  specific_titles_list <- springer_table %>%    filter(str_detect(      book_title, # look for a pattern in the book_title column      "Statistics" # specify the title    )) %>%    pull(book_title)    download_springer_book_files(springer_books_titles = specific_titles_list)
springer_table <- download_springer_table()    # library(dplyr)  specific_titles_list <- springer_table %>%    filter(str_detect(      author, # look for a pattern in the author column      "John Hunt" # specify the author    )) %>%    pull(book_title)    download_springer_book_files(springer_books_titles = specific_titles_list)
springer_table <- download_springer_table()    # library(dplyr)  specific_titles_list <- springer_table %>%    filter(str_detect(      subject_classification, # look for a pattern in the subject_calssification column      "Statistics" # specify the subject    )) %>%    pull(book_title)    download_springer_book_files(springer_books_titles = specific_titles_list)

[R-bloggers] another surmortaliy graph (and 11 more aRticles)

[R-bloggers] another surmortaliy graph (and 11 more aRticles)

R is everywhere

Introduction to R

Quiz: R Facts

Why Use R?

Quiz: Using R

You R in Good Company

Building Blocks

Exercise: Submit your first code

R is everywhere

Introduction to R

Quiz: R Facts

Why Use R?

Quiz: Using R

You R in Good Company

Building Blocks

Exercise: Submit your first code

1. Loading and importing data

1.1. Importing from binary files

1.2. Importing from ODBC

1.3. Importing from SQL Databases

2. Manipulating Data

2.1. Cleaning data

2.2. Dealing with R data types and formats

2.3. Wrangling, subseting and aggregating data

3. Statistical tests and Sampling Data

3.1. Statistical tests

3.2. Data Sampling

4. Statistical Analysis

4.1. Regression Analysis

4.2. Analysis of variance

4.3. Multivariate analysis

4.4. Classification and Clustering

4.5. Analysis of Time-series

4.6. Network analysis

4.7. Analysis of text

5. Machine Learning

5.1. Building and validating the models

5.2. Random forests packages

5.3. Regression type (regression, boosting, Gradient descent) algoritms packages

5.4. Classification algorithms

5.5. Neural networks

5.6. Deep Learning

5.7. Reinforcement Learning

5.8. Model interpretability and explainability

6. Visualisation

7. Web Scraping

8. Documents and books organisation

Wrap up

Mixed emotions

Before you upgrade!

A function to do the hard work

What's in your libraries?

Go ahead and install R 4.0.0

Just do it!

Done

Example 1:

Example 2:

Example 3

Example 4

Introduction

Installation

Download all books at once

Create a table of Springer books

Download only specific books

By title

By author

By subject

Acknowledgments

Comments

Post a Comment