- r - 使用第一个拆分字符将行分隔为列

有一个dataframe:


data.frame(text = c("separate1: and: more","another 20: 42")



在每个行中预期输出示例


data.frame(text1 = c("separate1","another 20"), text2 = c("and: more","42")



时间:


library(reshape2)



df <- data.frame(text = c("separate1: and: more","another 20: 42")



colsplit(df$text,":", c("text1","text2"))



可以使用str_split_fixed来自stringr包,缺省情况下,它将在第一个分隔符上拆分,换句话说,


stringr::str_split_fixed(d1$text, ':', 2)



# [,1] [,2] 


#[1,]"separate1"" and: more"


#[2,]"another 20"" 42" 




df <- data.frame(text = c("separate1: and: more","another 20: 42"))



df$text1 <- gsub(':.*', '', df$text)


df$text2 <- gsub('^[^:]+: ', '', df$text)



df


# text text1 text2


# 1 separate1: and: more separate1 and: more


# 2 another 20: 42 another 20 42



使用tidyr:


library(dplyr)


library(tidyr)



df %>% 


 separate(text, c("a","b"), sep =":", extra ="merge")


# a b


# 1 separate1 and: more


# 2 another 20 42




x <- c("separate1: and: more","another 20: 42")



i <- regexpr(":", x)


data.frame(text1 = trimws(substr(x, 1, i-1)), text2 = trimws(substring(x, i+1)))


# text1 text2


#1 separate1 and: more


#2 another 20 42



另一个基本R解决方案


df <- do.call(rbind,lapply(as.character(df$text), function(x) {


 k <- head(unlist(gregexpr(":",x)),1)


 data.frame(text1 = substr(x,1,k-1),


 text2 = substr(x,k+1,nchar(x)))


}))



这样


> df


 text1 text2


1 separate1 and: more


2 another 20 42



下面是在第一次出现分隔符时拆分的另一个基本解决方案。


df <- data.frame(text = c("separate1: and: more","another 20: 42"))



list <- apply(df, 1, function(x) regmatches(x, regexpr(":", x), invert = TRUE))


df <- data.frame(matrix(unlist(list), nrow = length(list), byrow = TRUE))



df


#> X1 X2


#> 1 separate1 and: more


#> 2 another 20 42



...