stringr包詳解

知識 07-25

install.packages("stringr")

library(stringr)

#1.str_c() 字元串連接函數

str_c(letters[1:6],c("ab","bb"),sep=",")

str_c(letters[1:6],c("ab","bb"),collapse = ",")

#區別：

#第一種：長度為6的向量，先插入分隔符

#第二種：長度為1的向量，先連接

#2.str_count() 字元串匹配個數統計函數

str<-c("abcdedf","aaa","bbb")

str_count(str,"a")

#a在向量str每個元素中出現的次數

#3.str_detect() 字元串匹配函數

str<-c("abcdedf","aaa","bbb")

str_detect(str,"a")

#如果向量中每個元素匹配到就返回TRUE，否則返回FALSE

#4.str_dup() 字元串擴展函數

str_dup(str,3)

str_dup(str,1:3)

#區別：

#第一種：向量str中每個元素重複3次

#第二種：向量str中每個元素分別重複1,2,3次

#5.str_extract()/str_extract_all() 抽取函數

str_extract(str,"a")

str_extract_all(str,"a")

#區別：

#第一種：將字元串中第一個匹配到的字元a取出，若匹配不到，則返回NA

#第二種：將字元串中所有匹配到的字元取出

#6.str_length() 字元串長度統計函數

str

str_length(str)

#7.str_locate()/str_locate_all() 子串定位函數

str1<-c("sfdf","ulkdsf","yellow")

str_locate(str1,"f")

str_locate_all(str1,"f")

#區別：

#第一種：返回第一次匹配到的起止位置

#第二種：返回所有匹配到的起止位置

#8.str_pad() 字元串佔據位置函數

str_pad("yellow",10,"left",pad=",")

str_pad("yellow",10,"right",pad="*")

#yellow佔據了6個字元，所有空白為4個字元，left表示填充的方向

#9.str_replace() 字元串替換函數

str3<-"addsjhadsk"

str_replace(str3,"a","$")

str_replace_all(str3,"a","$")

#區別：

#第一種：只替換第一次匹配到的字元

#第二種：替換所有匹配到的字元

#10.str_split() 字元串切分函數

#str_split(string,pattern,n=Inf)

str3<-"abcddsesdafs"

str_split(str3,"a",2)

#n表示最大切分數，返迴向量格式

#11.str_split_fixed() 字元串切分函數

#str_split_fixed(string,pattern,n)

str_split_fixed(str3,"a",3)

#返回矩陣形式

#12.str_sub() 取字元串子串函數

#str_sub(string,start=1L,end=-1L)

str3

str_sub(str3,1,4)

#13.str_sub() 子串替換形式

#str_sub(string,start=1L,end=-1L)<-replacement

str3

str_sub(str3,1,3)<-"hello"

str3

#14.str_trim() 去除空白函數

#str_trim(string,side="both")

a<-" sadjlsa "

str_trim(a,"left")

#去除左邊的空白

str_trim(a,"both")

#15.word()

#word(string,start=1L,end=start,sep=fixed(""))

data<-c("a b c efg sadf sdf")

word(data,2,-1)#-1表示最後一個

word(data,2)

#sep默認為一個空字元

#16.str_wrap() 調整段落格式

#str_wrap(string,width=80,indent=0,exdent=0)

#width:表示每行的長度

#indent:句子首行的縮進字元數

#exdent：非首行的縮進字元數

thanks_path<-file.path(R.home("doc"),"THANKS")

thanks<-str_c(readLines(thanks_path),collapse = "
")

thanks<-word(thanks,1,3,fixed("

"))

cat(str_wrap(thanks),"
")

cat(str_wrap(thanks,width=40),"
")

cat(str_wrap(thanks,width=60,indent=2),"
")

cat(str_wrap(thanks,width=60,exdent=2),"
")

#17.實戰1：替換

#Ex1：替換「歲」，得到數值型年齡

age<-c("21歲","22歲","23歲","24歲")

age_replace<-str_replace(age,"歲","")

age_replace

age_numeric<-as.numeric(age_replace)

age_numeric

#18.實戰2：分割

vec<-c("a-b","b-c","d-e","u-p")

sep_loc<-str_locate(vec,"-")

sep_loc

str_sub(vec,0,sep_loc[,1]-1)->left

left

str_sub(vec,sep_loc[,1]+1,str_length(vec))->right

right

stringr包詳解

喜歡這篇文章嗎？立刻分享出去讓更多人知道吧！

本站內容充實豐富，博大精深，小編精選每日熱門資訊，隨時更新，點擊「搶先收到最新資訊」瀏覽吧！

請您繼續閱讀更多來自 程序員小新人學習 的精彩文章:

※TensorFlow 數據讀取
※TensorFlow中的所有模型

TAG:程序員小新人學習 |