读取Facebook的用户数据
getwd()## [1]“C:/ Users / HH / Desktop / R Data analyst”list.files()## [1]“07-tidy-data.pdf”“demystifying.R”## [3 ]“demystifyingR2_v3.html”“demystifyingR2_v3.Rmd”## [5]“EDA_Course_Materials.zip”“lesson3_student.html”## [7]“lesson3_student.rmd”“pseudo_facebook.tsv”## [9]“reddit.csv “”stateData.csv“## [11]”tidy-data.pdf“pf <-read.delim('pseudo_facebook.tsv')name(pf)## [1]”userid“”age“## [3 ]“dob_day”“dob_year”## [5]“dob_month”“gender”## [7]“tenure”“friend_count”## [9]“friendships_initiated”“likes”## [11]“likes_received”“mobile_likes “## [13]”mobile_likes_received“”www_likes“## [15]”www_likes_received“
data:image/s3,"s3://crabby-images/9c910/9c9106ea08a5050590597e423ab33da55e66bcfe" alt=""
用户生日直方图
library(ggplot2)qplot(x = dob_day,data = pf)+ scale_x_continuous(breaks = 1:31)##`stat_bin()`using`bins = 30`。用`binwidth`选择更好的价值。
data:image/s3,"s3://crabby-images/9c910/9c9106ea08a5050590597e423ab33da55e66bcfe" alt=""
data:image/s3,"s3://crabby-images/99285/992850e8335b0e4a91fa548984455dfc1b59e090" alt=""
data:image/s3,"s3://crabby-images/9c910/9c9106ea08a5050590597e423ab33da55e66bcfe" alt=""
qplot(x = dob_day,data = pf)+ scale_x_continuous(breaks = 1:31)+ facet_wrap(~dob_month,ncol = 3)##`stat_bin()`使用`bins = 30`。用`binwidth`选择更好的价值。
data:image/s3,"s3://crabby-images/9c910/9c9106ea08a5050590597e423ab33da55e66bcfe" alt=""
data:image/s3,"s3://crabby-images/f3241/f3241f8cdc5f5141a1fc0c9d01100acba559abc0" alt=""
data:image/s3,"s3://crabby-images/9c910/9c9106ea08a5050590597e423ab33da55e66bcfe" alt=""
朋友数
qplot(friend_count,data = pf)##`stat_bin()`使用`bins = 30`。用`binwidth`选择更好的价值。
data:image/s3,"s3://crabby-images/9c910/9c9106ea08a5050590597e423ab33da55e66bcfe" alt=""
data:image/s3,"s3://crabby-images/f6163/f6163be3301f097aa6b002e61d1f61b5f4546a76" alt=""
data:image/s3,"s3://crabby-images/6f99b/6f99b3dc5283cdd769c2bb337e6b0c82f579818c" alt=""
qplot(friend_count,data = pf)+ scale_x_continuous(limits = c(0,1000))##`stat_bin()`使用`bins = 30`。用`binwidth`选择更好的值。##警告:删除了包含非有限值(stat_bin)的2951行。
data:image/s3,"s3://crabby-images/fde98/fde986053b9128364f398bc43c9f3f6aa43bfcb3" alt=""
data:image/s3,"s3://crabby-images/e0840/e0840804e180062f06f37b704b096ac394198afd" alt=""
data:image/s3,"s3://crabby-images/d39c5/d39c5cc68b290aa9a6111183aa5037a49961500d" alt=""
面对朋友数量
#你会添加什么代码来按性别创建直方图?#将它添加到下面的代码中.qplot(x = friend_count,data = pf,binwidth = 10)+ scale_x_continuous(limits = c(0,1000) ),break = seq(0,1000,50))+ facet_wrap(〜sex性)##警告:删除了包含非有限值(stat_bin)的2951行。
data:image/s3,"s3://crabby-images/441da/441daa9669cf641af5a13a54c124c1e1b7ef8442" alt=""
data:image/s3,"s3://crabby-images/a941b/a941bcdb230f802899203124328de97bd03ddab8" alt=""
data:image/s3,"s3://crabby-images/baaa6/baaa66ee9de12d1e1684a37f5385e283ebfc47fc" alt=""
qplot(friend_count,data = subset(pf,!is.na(gender)),binwidth = 25)+ scale_x_continuous(limits = c(0,1000),breaks = seq(0,1000,50))+ facet_wrap(〜性别)##警告:删除了包含非有限值(stat_bin)的2949行。
data:image/s3,"s3://crabby-images/72447/72447f03adbcaa8ee2f30bb6014564a1e6a94460" alt=""
table(pf $ gender)## ## female male ## 40254 58574by(pf $ friend_count,pf $ gender,summary)## pf $ gender:female ## Min。第一曲。中位数第3曲。最大。## 0 37 96 242 244 4923 ## --------------------------------------- - ---------------- ## pf $性别:男## Min。第一曲。中位数第3曲。最大。## 0 27 74 165 182 4917
data:image/s3,"s3://crabby-images/824a5/824a55f386be70b43e58bcd5efdfb15b55a18c99" alt=""
谁有更多的朋友:男人还是女人?
qplot(x = tenure,data = pf,binwidth = 30,color = I('black'),fill = I('#099DD9'))##警告:删除了包含非有限值(stat_bin)的2行。
data:image/s3,"s3://crabby-images/2fa82/2fa825e5aad71c8ebaa019f1e4cb0e8eca1da6d7" alt=""
data:image/s3,"s3://crabby-images/67cec/67cec627929be11abefbdd19875bab3430707a5b" alt=""
data:image/s3,"s3://crabby-images/11a82/11a82ca9fc053d1aa3de32acc44b8a59c97fa510" alt=""
qplot(x = tenure / 365,data = pf,binwidth = .25,color = I('black'),fill = I('#F79420'))+ scale_x_continuous(breaks = seq(1,7,1), limits = c(0,7))##警告:删除了包含非有限值(stat_bin)的26行。
data:image/s3,"s3://crabby-images/3bf0c/3bf0c8f51540a0924cd9a61495939facd6087d32" alt=""
data:image/s3,"s3://crabby-images/f6953/f695389d9426091050aec1125b481641f435fe08" alt=""
data:image/s3,"s3://crabby-images/c4781/c47811316e65f1fdc0a8060f6229491fc1671118" alt=""
笔记:
qplot(x = tenure / 365,data = pf,xlab ='使用FB的年数',ylab ='样本中的用户数',binwidth = .25,color = I('black'),fill = I(' #F79420'))+ scale_x_continuous(breaks = seq(1,7,1),limits = c(0,7))##警告:删除了包含非有限值(stat_bin)的26行。
data:image/s3,"s3://crabby-images/54d3d/54d3d6b23a728a5a8afce494ae5a399d351e1c94" alt=""
用户年龄
qplot(x =年龄,数据= pf,xlab ='用户年龄',ylab ='用户数',binwidth = 1,color = I('black'),fill = I('#5760AB'))+ scale_x_continuous(断裂= SEQ(1,113,5))
data:image/s3,"s3://crabby-images/54d3d/54d3d6b23a728a5a8afce494ae5a399d351e1c94" alt=""
data:image/s3,"s3://crabby-images/78a52/78a5218fc664573e1b8b2c6fa1c9e1acd4635afa" alt=""
data:image/s3,"s3://crabby-images/104ea/104ea9a7fea301c310c051e0d1c4d54c4f86bf7c" alt=""
data:image/s3,"s3://crabby-images/3b51e/3b51ecee801773ef0e04471091f6ba0115d9e964" alt=""
data:image/s3,"s3://crabby-images/aa50c/aa50c19e0ddb794c589722c1fb23364ce32f00bb" alt=""
方块图
data:image/s3,"s3://crabby-images/4f2bb/4f2bbde58d06dbe9d9246c4f0e7ddb4c47fdb814" alt=""
data:image/s3,"s3://crabby-images/8b059/8b05941848ed162e68e83052c511dcab04a3a573" alt=""
调整代码以关注朋友数在0到1000之间的用户。
非常感谢您阅读本文,有任何问题请在下面留言!
1
1
关于作者
Kaizong Ye是拓端研究室(TRL)的研究员。
本文借鉴了作者最近为《R语言数据分析挖掘必知必会 》课堂做的准备。