Selecting rows from data based on unique conditions

dat <- structure(list(doy = c(274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 
 295, 296, 297, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, 312, 313, 314, 315, 
 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 
 337, 338, 339, 340, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, 356, 357, 
 358, 359, 360, 361, 362, 363, 364, 365), 
 no.plant = c(0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1), 
 cum.value = c(0, 1.34973713866726e-05, 0.000107973870218436, 0.000364365089792096, 0.000863464598244823, 0.00168576031682954, 
 0.00291120609231443, 0.00291120609231443, 0.0046189294134239, 0.00688687680728461, 0.00688687680728461, 
 0.00979139917551386, 0.0134067801825104, 0.0178047117788614, 0.0230537220148601, 0.0292185614529241, 
 0.0292185614529241, 0.0363595556987137, 0.0363595556987137, 0.0445319328097977, 0.0537851355741434, 
 0.0641621298405947, 0.0756987211882645, 0.0884228931969177, 0.102354181379628, 0.102354181379628, 0.117503097415405, 
 0.133870618627253, 0.151447757647197, 0.151447757647197, 0.170215226855778, 0.170215226855778, 
 0.190143211447851, 0.211191263836225, 0.233308330547831, 0.256432920794094, 0.280493423522773, 0.305408577012532, 
 0.331088091999851, 0.357433425992349, 0.384338702900249, 0.411691768499651, 0.439375368630229, 0.467268433537531, 
 0.495247448513112, 0.523187888081939, 0.550965688550059, 0.578458731861707, 0.605548312515632, 0.632120558828558, 
 0.658067780159839, 0.683289712849355, 0.707694639565394, 0.731200359474982, 0.753734990069534, 0.753734990069534, 
 0.753734990069534, 0.753734990069534, 0.775237585508182, 0.795658560857758, 0.814959916467899, 0.833115261761304, 
 0.850109642771837, 0.865939182653005, 0.865939182653005, 0.880610548937487, 0.894140265397845, 0.906553889802375, 
 0.917885081566473, 0.928174585188328, 0.93746915638157, 0.945820457966355, 0.95328395187962, 0.959917812174526, 
 0.965781881688334, 0.970936692282333, 0.975442565331355, 0.97935880560985, 0.97935880560985, 0.982742998037354, 
 0.985650413056059, 0.988133522855331, 0.990241627354782, 0.992020585910824, 0.993512648199701, 0.994756375705273, 
 0.995786643728671, 0.996634712840931, 0.997328358197721, 0.997892045086969, 0.998347139430071, 0.998347139430071)), 
 class = "data.frame", row.names = c(NA, -92L))


 delta <- 0.04991736

I need to select those doy where the cum.value reaches 1*delta, 2*delta, 3*delta, 4*delta ....n*delta and also
include last doy which is 365 if n*delta does not reach the doy 365.

At the moment I am selecting n by trial and error which is by first creating a sequencnce of 1:n. For e.g 1:19:

qt.vec.19 <- 1:19 * delta
max(qt.vec.19) >= max(dat$cum.value)
FALSE

If I change qt.vec to 1:20

qt.vec.20 <- 1:20 * delta
max(qt.vec.20) >= max(dat$cum.value)
TRUE

This means that I can do 1*delta, 2*delta....19*delta and then also select the last doy.

sample.dat <- dat %>% dplyr::slice(unique(c(which.max(cum.value > qt.vec.19[1]), 
 which.max(cum.value > qt.vec.19[2]),
 which.max(cum.value > qt.vec.19[3]),
 which.max(cum.value > qt.vec.19[4]),
 which.max(cum.value > qt.vec.19[5]),
 which.max(cum.value > qt.vec.19[6]),
 which.max(cum.value > qt.vec.19[7]),
 which.max(cum.value > qt.vec.19[8]),
 which.max(cum.value > qt.vec.19[9]),
 which.max(cum.value > qt.vec.19[10]),
 which.max(cum.value > qt.vec.19[11]),
 which.max(cum.value > qt.vec.19[12]),
 which.max(cum.value > qt.vec.19[13]),
 which.max(cum.value > qt.vec.19[14]),
 which.max(cum.value > qt.vec.19[15]),
 which.max(cum.value > qt.vec.19[16]),
 which.max(cum.value > qt.vec.19[17]),
 which.max(cum.value > qt.vec.19[18]),
 which.max(cum.value > qt.vec.19[19])))) 


last.doy <- dat %>% dplyr::filter(doy == 365) 

all.doy <- as.data.frame(rbind(sample.dat, last.doy))

 doy no.plant cum.value
 294 0 0.05378514
 298 0 0.10235418
 302 0 0.15144776
 307 0 0.21119126
 309 0 0.25643292
 311 0 0.30540858
 313 0 0.35743343
 315 0 0.41169177
 317 0 0.46726843
 319 0 0.52318789
 320 0 0.55096569
 322 0 0.60554831
 324 0 0.65806778
 326 0 0.70769464
 328 0 0.75373499
 334 0 0.81495992
 336 0 0.85010964
 341 0 0.90655389
 346 0 0.95328395
 365 1 0.99834714

I was wondering if there's any better way to do this like selecting what my n value should be or avoid the long slice(unique(... part?

asked Nov 13 '18 at 11:55

Crop89

98711122

Any reason to shun Base R? You only want dplyr or data.table?

– vaettchen
Nov 13 '18 at 12:59

No particular reason. I could also use base R

– Crop89
Nov 13 '18 at 13:10

add a comment |

dat <- structure(list(doy = c(274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 
 295, 296, 297, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, 312, 313, 314, 315, 
 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 
 337, 338, 339, 340, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, 356, 357, 
 358, 359, 360, 361, 362, 363, 364, 365), 
 no.plant = c(0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1), 
 cum.value = c(0, 1.34973713866726e-05, 0.000107973870218436, 0.000364365089792096, 0.000863464598244823, 0.00168576031682954, 
 0.00291120609231443, 0.00291120609231443, 0.0046189294134239, 0.00688687680728461, 0.00688687680728461, 
 0.00979139917551386, 0.0134067801825104, 0.0178047117788614, 0.0230537220148601, 0.0292185614529241, 
 0.0292185614529241, 0.0363595556987137, 0.0363595556987137, 0.0445319328097977, 0.0537851355741434, 
 0.0641621298405947, 0.0756987211882645, 0.0884228931969177, 0.102354181379628, 0.102354181379628, 0.117503097415405, 
 0.133870618627253, 0.151447757647197, 0.151447757647197, 0.170215226855778, 0.170215226855778, 
 0.190143211447851, 0.211191263836225, 0.233308330547831, 0.256432920794094, 0.280493423522773, 0.305408577012532, 
 0.331088091999851, 0.357433425992349, 0.384338702900249, 0.411691768499651, 0.439375368630229, 0.467268433537531, 
 0.495247448513112, 0.523187888081939, 0.550965688550059, 0.578458731861707, 0.605548312515632, 0.632120558828558, 
 0.658067780159839, 0.683289712849355, 0.707694639565394, 0.731200359474982, 0.753734990069534, 0.753734990069534, 
 0.753734990069534, 0.753734990069534, 0.775237585508182, 0.795658560857758, 0.814959916467899, 0.833115261761304, 
 0.850109642771837, 0.865939182653005, 0.865939182653005, 0.880610548937487, 0.894140265397845, 0.906553889802375, 
 0.917885081566473, 0.928174585188328, 0.93746915638157, 0.945820457966355, 0.95328395187962, 0.959917812174526, 
 0.965781881688334, 0.970936692282333, 0.975442565331355, 0.97935880560985, 0.97935880560985, 0.982742998037354, 
 0.985650413056059, 0.988133522855331, 0.990241627354782, 0.992020585910824, 0.993512648199701, 0.994756375705273, 
 0.995786643728671, 0.996634712840931, 0.997328358197721, 0.997892045086969, 0.998347139430071, 0.998347139430071)), 
 class = "data.frame", row.names = c(NA, -92L))


 delta <- 0.04991736

I need to select those doy where the cum.value reaches 1*delta, 2*delta, 3*delta, 4*delta ....n*delta and also
include last doy which is 365 if n*delta does not reach the doy 365.

At the moment I am selecting n by trial and error which is by first creating a sequencnce of 1:n. For e.g 1:19:

qt.vec.19 <- 1:19 * delta
max(qt.vec.19) >= max(dat$cum.value)
FALSE

If I change qt.vec to 1:20

qt.vec.20 <- 1:20 * delta
max(qt.vec.20) >= max(dat$cum.value)
TRUE

This means that I can do 1*delta, 2*delta....19*delta and then also select the last doy.

sample.dat <- dat %>% dplyr::slice(unique(c(which.max(cum.value > qt.vec.19[1]), 
 which.max(cum.value > qt.vec.19[2]),
 which.max(cum.value > qt.vec.19[3]),
 which.max(cum.value > qt.vec.19[4]),
 which.max(cum.value > qt.vec.19[5]),
 which.max(cum.value > qt.vec.19[6]),
 which.max(cum.value > qt.vec.19[7]),
 which.max(cum.value > qt.vec.19[8]),
 which.max(cum.value > qt.vec.19[9]),
 which.max(cum.value > qt.vec.19[10]),
 which.max(cum.value > qt.vec.19[11]),
 which.max(cum.value > qt.vec.19[12]),
 which.max(cum.value > qt.vec.19[13]),
 which.max(cum.value > qt.vec.19[14]),
 which.max(cum.value > qt.vec.19[15]),
 which.max(cum.value > qt.vec.19[16]),
 which.max(cum.value > qt.vec.19[17]),
 which.max(cum.value > qt.vec.19[18]),
 which.max(cum.value > qt.vec.19[19])))) 


last.doy <- dat %>% dplyr::filter(doy == 365) 

all.doy <- as.data.frame(rbind(sample.dat, last.doy))

 doy no.plant cum.value
 294 0 0.05378514
 298 0 0.10235418
 302 0 0.15144776
 307 0 0.21119126
 309 0 0.25643292
 311 0 0.30540858
 313 0 0.35743343
 315 0 0.41169177
 317 0 0.46726843
 319 0 0.52318789
 320 0 0.55096569
 322 0 0.60554831
 324 0 0.65806778
 326 0 0.70769464
 328 0 0.75373499
 334 0 0.81495992
 336 0 0.85010964
 341 0 0.90655389
 346 0 0.95328395
 365 1 0.99834714

I was wondering if there's any better way to do this like selecting what my n value should be or avoid the long slice(unique(... part?

asked Nov 13 '18 at 11:55

Crop89

98711122

Any reason to shun Base R? You only want dplyr or data.table?

– vaettchen
Nov 13 '18 at 12:59

No particular reason. I could also use base R

– Crop89
Nov 13 '18 at 13:10

add a comment |

dat <- structure(list(doy = c(274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 
 295, 296, 297, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, 312, 313, 314, 315, 
 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 
 337, 338, 339, 340, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, 356, 357, 
 358, 359, 360, 361, 362, 363, 364, 365), 
 no.plant = c(0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1), 
 cum.value = c(0, 1.34973713866726e-05, 0.000107973870218436, 0.000364365089792096, 0.000863464598244823, 0.00168576031682954, 
 0.00291120609231443, 0.00291120609231443, 0.0046189294134239, 0.00688687680728461, 0.00688687680728461, 
 0.00979139917551386, 0.0134067801825104, 0.0178047117788614, 0.0230537220148601, 0.0292185614529241, 
 0.0292185614529241, 0.0363595556987137, 0.0363595556987137, 0.0445319328097977, 0.0537851355741434, 
 0.0641621298405947, 0.0756987211882645, 0.0884228931969177, 0.102354181379628, 0.102354181379628, 0.117503097415405, 
 0.133870618627253, 0.151447757647197, 0.151447757647197, 0.170215226855778, 0.170215226855778, 
 0.190143211447851, 0.211191263836225, 0.233308330547831, 0.256432920794094, 0.280493423522773, 0.305408577012532, 
 0.331088091999851, 0.357433425992349, 0.384338702900249, 0.411691768499651, 0.439375368630229, 0.467268433537531, 
 0.495247448513112, 0.523187888081939, 0.550965688550059, 0.578458731861707, 0.605548312515632, 0.632120558828558, 
 0.658067780159839, 0.683289712849355, 0.707694639565394, 0.731200359474982, 0.753734990069534, 0.753734990069534, 
 0.753734990069534, 0.753734990069534, 0.775237585508182, 0.795658560857758, 0.814959916467899, 0.833115261761304, 
 0.850109642771837, 0.865939182653005, 0.865939182653005, 0.880610548937487, 0.894140265397845, 0.906553889802375, 
 0.917885081566473, 0.928174585188328, 0.93746915638157, 0.945820457966355, 0.95328395187962, 0.959917812174526, 
 0.965781881688334, 0.970936692282333, 0.975442565331355, 0.97935880560985, 0.97935880560985, 0.982742998037354, 
 0.985650413056059, 0.988133522855331, 0.990241627354782, 0.992020585910824, 0.993512648199701, 0.994756375705273, 
 0.995786643728671, 0.996634712840931, 0.997328358197721, 0.997892045086969, 0.998347139430071, 0.998347139430071)), 
 class = "data.frame", row.names = c(NA, -92L))


 delta <- 0.04991736

I need to select those doy where the cum.value reaches 1*delta, 2*delta, 3*delta, 4*delta ....n*delta and also
include last doy which is 365 if n*delta does not reach the doy 365.

At the moment I am selecting n by trial and error which is by first creating a sequencnce of 1:n. For e.g 1:19:

qt.vec.19 <- 1:19 * delta
max(qt.vec.19) >= max(dat$cum.value)
FALSE

If I change qt.vec to 1:20

qt.vec.20 <- 1:20 * delta
max(qt.vec.20) >= max(dat$cum.value)
TRUE

This means that I can do 1*delta, 2*delta....19*delta and then also select the last doy.

sample.dat <- dat %>% dplyr::slice(unique(c(which.max(cum.value > qt.vec.19[1]), 
 which.max(cum.value > qt.vec.19[2]),
 which.max(cum.value > qt.vec.19[3]),
 which.max(cum.value > qt.vec.19[4]),
 which.max(cum.value > qt.vec.19[5]),
 which.max(cum.value > qt.vec.19[6]),
 which.max(cum.value > qt.vec.19[7]),
 which.max(cum.value > qt.vec.19[8]),
 which.max(cum.value > qt.vec.19[9]),
 which.max(cum.value > qt.vec.19[10]),
 which.max(cum.value > qt.vec.19[11]),
 which.max(cum.value > qt.vec.19[12]),
 which.max(cum.value > qt.vec.19[13]),
 which.max(cum.value > qt.vec.19[14]),
 which.max(cum.value > qt.vec.19[15]),
 which.max(cum.value > qt.vec.19[16]),
 which.max(cum.value > qt.vec.19[17]),
 which.max(cum.value > qt.vec.19[18]),
 which.max(cum.value > qt.vec.19[19])))) 


last.doy <- dat %>% dplyr::filter(doy == 365) 

all.doy <- as.data.frame(rbind(sample.dat, last.doy))

 doy no.plant cum.value
 294 0 0.05378514
 298 0 0.10235418
 302 0 0.15144776
 307 0 0.21119126
 309 0 0.25643292
 311 0 0.30540858
 313 0 0.35743343
 315 0 0.41169177
 317 0 0.46726843
 319 0 0.52318789
 320 0 0.55096569
 322 0 0.60554831
 324 0 0.65806778
 326 0 0.70769464
 328 0 0.75373499
 334 0 0.81495992
 336 0 0.85010964
 341 0 0.90655389
 346 0 0.95328395
 365 1 0.99834714

I was wondering if there's any better way to do this like selecting what my n value should be or avoid the long slice(unique(... part?

asked Nov 13 '18 at 11:55

Crop89

98711122

dat <- structure(list(doy = c(274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 
 295, 296, 297, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, 312, 313, 314, 315, 
 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 
 337, 338, 339, 340, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, 356, 357, 
 358, 359, 360, 361, 362, 363, 364, 365), 
 no.plant = c(0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1), 
 cum.value = c(0, 1.34973713866726e-05, 0.000107973870218436, 0.000364365089792096, 0.000863464598244823, 0.00168576031682954, 
 0.00291120609231443, 0.00291120609231443, 0.0046189294134239, 0.00688687680728461, 0.00688687680728461, 
 0.00979139917551386, 0.0134067801825104, 0.0178047117788614, 0.0230537220148601, 0.0292185614529241, 
 0.0292185614529241, 0.0363595556987137, 0.0363595556987137, 0.0445319328097977, 0.0537851355741434, 
 0.0641621298405947, 0.0756987211882645, 0.0884228931969177, 0.102354181379628, 0.102354181379628, 0.117503097415405, 
 0.133870618627253, 0.151447757647197, 0.151447757647197, 0.170215226855778, 0.170215226855778, 
 0.190143211447851, 0.211191263836225, 0.233308330547831, 0.256432920794094, 0.280493423522773, 0.305408577012532, 
 0.331088091999851, 0.357433425992349, 0.384338702900249, 0.411691768499651, 0.439375368630229, 0.467268433537531, 
 0.495247448513112, 0.523187888081939, 0.550965688550059, 0.578458731861707, 0.605548312515632, 0.632120558828558, 
 0.658067780159839, 0.683289712849355, 0.707694639565394, 0.731200359474982, 0.753734990069534, 0.753734990069534, 
 0.753734990069534, 0.753734990069534, 0.775237585508182, 0.795658560857758, 0.814959916467899, 0.833115261761304, 
 0.850109642771837, 0.865939182653005, 0.865939182653005, 0.880610548937487, 0.894140265397845, 0.906553889802375, 
 0.917885081566473, 0.928174585188328, 0.93746915638157, 0.945820457966355, 0.95328395187962, 0.959917812174526, 
 0.965781881688334, 0.970936692282333, 0.975442565331355, 0.97935880560985, 0.97935880560985, 0.982742998037354, 
 0.985650413056059, 0.988133522855331, 0.990241627354782, 0.992020585910824, 0.993512648199701, 0.994756375705273, 
 0.995786643728671, 0.996634712840931, 0.997328358197721, 0.997892045086969, 0.998347139430071, 0.998347139430071)), 
 class = "data.frame", row.names = c(NA, -92L))


 delta <- 0.04991736

I need to select those doy where the cum.value reaches 1*delta, 2*delta, 3*delta, 4*delta ....n*delta and also
include last doy which is 365 if n*delta does not reach the doy 365.

At the moment I am selecting n by trial and error which is by first creating a sequencnce of 1:n. For e.g 1:19:

qt.vec.19 <- 1:19 * delta
max(qt.vec.19) >= max(dat$cum.value)
FALSE

If I change qt.vec to 1:20

qt.vec.20 <- 1:20 * delta
max(qt.vec.20) >= max(dat$cum.value)
TRUE

This means that I can do 1*delta, 2*delta....19*delta and then also select the last doy.

sample.dat <- dat %>% dplyr::slice(unique(c(which.max(cum.value > qt.vec.19[1]), 
 which.max(cum.value > qt.vec.19[2]),
 which.max(cum.value > qt.vec.19[3]),
 which.max(cum.value > qt.vec.19[4]),
 which.max(cum.value > qt.vec.19[5]),
 which.max(cum.value > qt.vec.19[6]),
 which.max(cum.value > qt.vec.19[7]),
 which.max(cum.value > qt.vec.19[8]),
 which.max(cum.value > qt.vec.19[9]),
 which.max(cum.value > qt.vec.19[10]),
 which.max(cum.value > qt.vec.19[11]),
 which.max(cum.value > qt.vec.19[12]),
 which.max(cum.value > qt.vec.19[13]),
 which.max(cum.value > qt.vec.19[14]),
 which.max(cum.value > qt.vec.19[15]),
 which.max(cum.value > qt.vec.19[16]),
 which.max(cum.value > qt.vec.19[17]),
 which.max(cum.value > qt.vec.19[18]),
 which.max(cum.value > qt.vec.19[19])))) 


last.doy <- dat %>% dplyr::filter(doy == 365) 

all.doy <- as.data.frame(rbind(sample.dat, last.doy))

 doy no.plant cum.value
 294 0 0.05378514
 298 0 0.10235418
 302 0 0.15144776
 307 0 0.21119126
 309 0 0.25643292
 311 0 0.30540858
 313 0 0.35743343
 315 0 0.41169177
 317 0 0.46726843
 319 0 0.52318789
 320 0 0.55096569
 322 0 0.60554831
 324 0 0.65806778
 326 0 0.70769464
 328 0 0.75373499
 334 0 0.81495992
 336 0 0.85010964
 341 0 0.90655389
 346 0 0.95328395
 365 1 0.99834714

I was wondering if there's any better way to do this like selecting what my n value should be or avoid the long slice(unique(... part?

r dplyr data.table

asked Nov 13 '18 at 11:55

Crop89

98711122

asked Nov 13 '18 at 11:55

Crop89

98711122

asked Nov 13 '18 at 11:55

Crop89

98711122

asked Nov 13 '18 at 11:55

Crop89

98711122

asked Nov 13 '18 at 11:55

Crop89

98711122

Any reason to shun Base R? You only want dplyr or data.table?

– vaettchen
Nov 13 '18 at 12:59

No particular reason. I could also use base R

– Crop89
Nov 13 '18 at 13:10

add a comment |

Any reason to shun Base R? You only want dplyr or data.table?

– vaettchen
Nov 13 '18 at 12:59

No particular reason. I could also use base R

– Crop89
Nov 13 '18 at 13:10

Any reason to shun Base R? You only want dplyr or data.table?

– vaettchen
Nov 13 '18 at 12:59

No particular reason. I could also use base R

– Crop89
Nov 13 '18 at 13:10

add a comment |

2 Answers
2

active

oldest

votes

A matter of taste and context and you read a lot about "loops are frowned upon in R" - but they deliver results and are easy to read, and they are Base R - no extra packages needed or new syntax to learn:

options( scipen = 10, digits = 15 ) # display all digits
dat <- read.csv( "crop89.csv" ) # load your data from a file
delta <- 0.04991736 # selected threshold
n <- 1 # initiate multiplier variable
all.doy <- dat[ 1, ] # initiate receiving data.frame

for( i in 1:length( dat$doy ) ) # loop through dat rows
 if( dat[ i, "cum.value"] >= n * delta ) # as soon as threshold is passed
 all.doy[ n, ] <- dat[ i, ] # write the line to the target data.frame
 n <- n + 1 # increment multiplier
 

all.doy[ n, ] <- dat[ i, ] # add the last row anyway

all.doy
> all.doy
 doy no.plant cum.value
1 294 0 0.0537851355741434
25 298 0 0.1023541813796280
29 302 0 0.1514477576471970
34 307 0 0.2111912638362250
36 309 0 0.2564329207940940
38 311 0 0.3054085770125320
40 313 0 0.3574334259923490
42 315 0 0.4116917684996510
44 317 0 0.4672684335375310
46 319 0 0.5231878880819389
47 320 0 0.5509656885500590
49 322 0 0.6055483125156320
51 324 0 0.6580677801598390
53 326 0 0.7076946395653940
55 328 0 0.7537349900695340
61 334 0 0.8149599164678990
63 336 0 0.8501096427718370
68 341 0 0.9065538898023749
73 346 0 0.9532839518796200
92 365 1 0.9983471394300710

edited Nov 15 '18 at 0:12

answered Nov 14 '18 at 3:02

vaettchen

5,2201332

add a comment |

The main point is the cut function here:

library(data.table)
DT<-as.data.table(dat)
DT[,group:=as.numeric(cut(cum.value,c(-Inf,qt.vec.19,Inf),ordered_result = T))-1]
DT[,position:=frank(cum.value,ties.method = "first" ),by=group]
DT<-DT[position==1 & group>0]
DT[,position:=NULL]
DT[,group:=NULL]
if (max(DT$cum.value)!=max(dat$cum.value)) DT<-rbind(DT,dat[dat$doy==max(dat$doy),])

answered Nov 13 '18 at 13:17

Vladimir Volokhonsky

1206

1

From my understanding, the OP wants to avoid guessing the n=19 so cut does not deliver the best result here as you need to know the value beforehand.

– vaettchen
Nov 14 '18 at 3:09

I don't see any problem here, n<-floor(max(dat$cum.value)/delta)...

– Vladimir Volokhonsky
Nov 14 '18 at 10:46

I have not run your code but I see a reference to qt.vect.19 which I believe is the step the OP wants to avoid. Not sure about my understanding though...

– vaettchen
Nov 14 '18 at 11:00

@vaettchen yes I want to avoid guessing the n = 19 part

– Crop89
Nov 15 '18 at 11:31

add a comment |

Your Answer

StackExchange.ifUsing("editor", function ()
StackExchange.using("externalEditor", function ()
StackExchange.using("snippets", function ()
StackExchange.snippets.init();
);
);
, "code-snippets");

StackExchange.ready(function()
var channelOptions =
tags: "".split(" "),
id: "1"
;
initTagRenderer("".split(" "), "".split(" "), channelOptions);

StackExchange.using("externalEditor", function()
// Have to fire editor after snippets, if snippets enabled
if (StackExchange.settings.snippets.snippetsEnabled)
StackExchange.using("snippets", function()
createEditor();
);

else
createEditor();

);

function createEditor()
StackExchange.prepareEditor(
heartbeatType: 'answer',
autoActivateHeartbeat: false,
convertImagesToLinks: true,
noModals: true,
showLowRepImageUploadWarning: true,
reputationToPostImages: 10,
bindNavPrevention: true,
postfix: "",
imageUploader:
brandingHtml: "Powered by u003ca class="icon-imgur-white" href="https://imgur.com/"u003eu003c/au003e",
contentPolicyHtml: "User contributions licensed under u003ca href="https://creativecommons.org/licenses/by-sa/3.0/"u003ecc by-sa 3.0 with attribution requiredu003c/au003e u003ca href="https://stackoverflow.com/legal/content-policy"u003e(content policy)u003c/au003e",
allowUrls: true
,
onDemand: true,
discardSelector: ".discard-answer"
,immediatelyShowMarkdownHelp:true
);

);

draft saved

draft discarded

StackExchange.ready(
function ()
StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fstackoverflow.com%2fquestions%2f53280517%2fselecting-rows-from-data-based-on-unique-conditions%23new-answer', 'question_page');

);

Post as a guest

Name

Required, but never shown

2 Answers
2

active

oldest

votes

2 Answers
2

active

oldest

votes

options( scipen = 10, digits = 15 ) # display all digits
dat <- read.csv( "crop89.csv" ) # load your data from a file
delta <- 0.04991736 # selected threshold
n <- 1 # initiate multiplier variable
all.doy <- dat[ 1, ] # initiate receiving data.frame

for( i in 1:length( dat$doy ) ) # loop through dat rows
 if( dat[ i, "cum.value"] >= n * delta ) # as soon as threshold is passed
 all.doy[ n, ] <- dat[ i, ] # write the line to the target data.frame
 n <- n + 1 # increment multiplier
 

all.doy[ n, ] <- dat[ i, ] # add the last row anyway

all.doy
> all.doy
 doy no.plant cum.value
1 294 0 0.0537851355741434
25 298 0 0.1023541813796280
29 302 0 0.1514477576471970
34 307 0 0.2111912638362250
36 309 0 0.2564329207940940
38 311 0 0.3054085770125320
40 313 0 0.3574334259923490
42 315 0 0.4116917684996510
44 317 0 0.4672684335375310
46 319 0 0.5231878880819389
47 320 0 0.5509656885500590
49 322 0 0.6055483125156320
51 324 0 0.6580677801598390
53 326 0 0.7076946395653940
55 328 0 0.7537349900695340
61 334 0 0.8149599164678990
63 336 0 0.8501096427718370
68 341 0 0.9065538898023749
73 346 0 0.9532839518796200
92 365 1 0.9983471394300710

edited Nov 15 '18 at 0:12

answered Nov 14 '18 at 3:02

vaettchen

5,2201332

add a comment |

options( scipen = 10, digits = 15 ) # display all digits
dat <- read.csv( "crop89.csv" ) # load your data from a file
delta <- 0.04991736 # selected threshold
n <- 1 # initiate multiplier variable
all.doy <- dat[ 1, ] # initiate receiving data.frame

for( i in 1:length( dat$doy ) ) # loop through dat rows
 if( dat[ i, "cum.value"] >= n * delta ) # as soon as threshold is passed
 all.doy[ n, ] <- dat[ i, ] # write the line to the target data.frame
 n <- n + 1 # increment multiplier
 

all.doy[ n, ] <- dat[ i, ] # add the last row anyway

all.doy
> all.doy
 doy no.plant cum.value
1 294 0 0.0537851355741434
25 298 0 0.1023541813796280
29 302 0 0.1514477576471970
34 307 0 0.2111912638362250
36 309 0 0.2564329207940940
38 311 0 0.3054085770125320
40 313 0 0.3574334259923490
42 315 0 0.4116917684996510
44 317 0 0.4672684335375310
46 319 0 0.5231878880819389
47 320 0 0.5509656885500590
49 322 0 0.6055483125156320
51 324 0 0.6580677801598390
53 326 0 0.7076946395653940
55 328 0 0.7537349900695340
61 334 0 0.8149599164678990
63 336 0 0.8501096427718370
68 341 0 0.9065538898023749
73 346 0 0.9532839518796200
92 365 1 0.9983471394300710

edited Nov 15 '18 at 0:12

answered Nov 14 '18 at 3:02

vaettchen

5,2201332

add a comment |

options( scipen = 10, digits = 15 ) # display all digits
dat <- read.csv( "crop89.csv" ) # load your data from a file
delta <- 0.04991736 # selected threshold
n <- 1 # initiate multiplier variable
all.doy <- dat[ 1, ] # initiate receiving data.frame

for( i in 1:length( dat$doy ) ) # loop through dat rows
 if( dat[ i, "cum.value"] >= n * delta ) # as soon as threshold is passed
 all.doy[ n, ] <- dat[ i, ] # write the line to the target data.frame
 n <- n + 1 # increment multiplier
 

all.doy[ n, ] <- dat[ i, ] # add the last row anyway

all.doy
> all.doy
 doy no.plant cum.value
1 294 0 0.0537851355741434
25 298 0 0.1023541813796280
29 302 0 0.1514477576471970
34 307 0 0.2111912638362250
36 309 0 0.2564329207940940
38 311 0 0.3054085770125320
40 313 0 0.3574334259923490
42 315 0 0.4116917684996510
44 317 0 0.4672684335375310
46 319 0 0.5231878880819389
47 320 0 0.5509656885500590
49 322 0 0.6055483125156320
51 324 0 0.6580677801598390
53 326 0 0.7076946395653940
55 328 0 0.7537349900695340
61 334 0 0.8149599164678990
63 336 0 0.8501096427718370
68 341 0 0.9065538898023749
73 346 0 0.9532839518796200
92 365 1 0.9983471394300710

edited Nov 15 '18 at 0:12

answered Nov 14 '18 at 3:02

vaettchen

5,2201332

options( scipen = 10, digits = 15 ) # display all digits
dat <- read.csv( "crop89.csv" ) # load your data from a file
delta <- 0.04991736 # selected threshold
n <- 1 # initiate multiplier variable
all.doy <- dat[ 1, ] # initiate receiving data.frame

for( i in 1:length( dat$doy ) ) # loop through dat rows
 if( dat[ i, "cum.value"] >= n * delta ) # as soon as threshold is passed
 all.doy[ n, ] <- dat[ i, ] # write the line to the target data.frame
 n <- n + 1 # increment multiplier
 

all.doy[ n, ] <- dat[ i, ] # add the last row anyway

all.doy
> all.doy
 doy no.plant cum.value
1 294 0 0.0537851355741434
25 298 0 0.1023541813796280
29 302 0 0.1514477576471970
34 307 0 0.2111912638362250
36 309 0 0.2564329207940940
38 311 0 0.3054085770125320
40 313 0 0.3574334259923490
42 315 0 0.4116917684996510
44 317 0 0.4672684335375310
46 319 0 0.5231878880819389
47 320 0 0.5509656885500590
49 322 0 0.6055483125156320
51 324 0 0.6580677801598390
53 326 0 0.7076946395653940
55 328 0 0.7537349900695340
61 334 0 0.8149599164678990
63 336 0 0.8501096427718370
68 341 0 0.9065538898023749
73 346 0 0.9532839518796200
92 365 1 0.9983471394300710

edited Nov 15 '18 at 0:12

answered Nov 14 '18 at 3:02

vaettchen

5,2201332

edited Nov 15 '18 at 0:12

answered Nov 14 '18 at 3:02

vaettchen

5,2201332

answered Nov 14 '18 at 3:02

vaettchen

5,2201332

answered Nov 14 '18 at 3:02

vaettchen

5,2201332

add a comment |

The main point is the cut function here:

library(data.table)
DT<-as.data.table(dat)
DT[,group:=as.numeric(cut(cum.value,c(-Inf,qt.vec.19,Inf),ordered_result = T))-1]
DT[,position:=frank(cum.value,ties.method = "first" ),by=group]
DT<-DT[position==1 & group>0]
DT[,position:=NULL]
DT[,group:=NULL]
if (max(DT$cum.value)!=max(dat$cum.value)) DT<-rbind(DT,dat[dat$doy==max(dat$doy),])

answered Nov 13 '18 at 13:17

Vladimir Volokhonsky

1206

1

From my understanding, the OP wants to avoid guessing the n=19 so cut does not deliver the best result here as you need to know the value beforehand.

– vaettchen
Nov 14 '18 at 3:09

I don't see any problem here, n<-floor(max(dat$cum.value)/delta)...

– Vladimir Volokhonsky
Nov 14 '18 at 10:46

I have not run your code but I see a reference to qt.vect.19 which I believe is the step the OP wants to avoid. Not sure about my understanding though...

– vaettchen
Nov 14 '18 at 11:00

@vaettchen yes I want to avoid guessing the n = 19 part

– Crop89
Nov 15 '18 at 11:31

add a comment |

The main point is the cut function here:

library(data.table)
DT<-as.data.table(dat)
DT[,group:=as.numeric(cut(cum.value,c(-Inf,qt.vec.19,Inf),ordered_result = T))-1]
DT[,position:=frank(cum.value,ties.method = "first" ),by=group]
DT<-DT[position==1 & group>0]
DT[,position:=NULL]
DT[,group:=NULL]
if (max(DT$cum.value)!=max(dat$cum.value)) DT<-rbind(DT,dat[dat$doy==max(dat$doy),])

answered Nov 13 '18 at 13:17

Vladimir Volokhonsky

1206

1

From my understanding, the OP wants to avoid guessing the n=19 so cut does not deliver the best result here as you need to know the value beforehand.

– vaettchen
Nov 14 '18 at 3:09

I don't see any problem here, n<-floor(max(dat$cum.value)/delta)...

– Vladimir Volokhonsky
Nov 14 '18 at 10:46

I have not run your code but I see a reference to qt.vect.19 which I believe is the step the OP wants to avoid. Not sure about my understanding though...

– vaettchen
Nov 14 '18 at 11:00

@vaettchen yes I want to avoid guessing the n = 19 part

– Crop89
Nov 15 '18 at 11:31

add a comment |

The main point is the cut function here:

library(data.table)
DT<-as.data.table(dat)
DT[,group:=as.numeric(cut(cum.value,c(-Inf,qt.vec.19,Inf),ordered_result = T))-1]
DT[,position:=frank(cum.value,ties.method = "first" ),by=group]
DT<-DT[position==1 & group>0]
DT[,position:=NULL]
DT[,group:=NULL]
if (max(DT$cum.value)!=max(dat$cum.value)) DT<-rbind(DT,dat[dat$doy==max(dat$doy),])

answered Nov 13 '18 at 13:17

Vladimir Volokhonsky

1206

The main point is the cut function here:

library(data.table)
DT<-as.data.table(dat)
DT[,group:=as.numeric(cut(cum.value,c(-Inf,qt.vec.19,Inf),ordered_result = T))-1]
DT[,position:=frank(cum.value,ties.method = "first" ),by=group]
DT<-DT[position==1 & group>0]
DT[,position:=NULL]
DT[,group:=NULL]
if (max(DT$cum.value)!=max(dat$cum.value)) DT<-rbind(DT,dat[dat$doy==max(dat$doy),])

answered Nov 13 '18 at 13:17

Vladimir Volokhonsky

1206

answered Nov 13 '18 at 13:17

Vladimir Volokhonsky

1206

answered Nov 13 '18 at 13:17

Vladimir Volokhonsky

1206

answered Nov 13 '18 at 13:17

Vladimir Volokhonsky

1206

1

From my understanding, the OP wants to avoid guessing the n=19 so cut does not deliver the best result here as you need to know the value beforehand.

– vaettchen
Nov 14 '18 at 3:09

I don't see any problem here, n<-floor(max(dat$cum.value)/delta)...

– Vladimir Volokhonsky
Nov 14 '18 at 10:46

I have not run your code but I see a reference to qt.vect.19 which I believe is the step the OP wants to avoid. Not sure about my understanding though...

– vaettchen
Nov 14 '18 at 11:00

@vaettchen yes I want to avoid guessing the n = 19 part

– Crop89
Nov 15 '18 at 11:31

add a comment |

1

From my understanding, the OP wants to avoid guessing the n=19 so cut does not deliver the best result here as you need to know the value beforehand.

– vaettchen
Nov 14 '18 at 3:09

I don't see any problem here, n<-floor(max(dat$cum.value)/delta)...

– Vladimir Volokhonsky
Nov 14 '18 at 10:46

I have not run your code but I see a reference to qt.vect.19 which I believe is the step the OP wants to avoid. Not sure about my understanding though...

– vaettchen
Nov 14 '18 at 11:00

@vaettchen yes I want to avoid guessing the n = 19 part

– Crop89
Nov 15 '18 at 11:31

From my understanding, the OP wants to avoid guessing the n=19 so cut does not deliver the best result here as you need to know the value beforehand.

– vaettchen
Nov 14 '18 at 3:09

I don't see any problem here, n<-floor(max(dat$cum.value)/delta)...

– Vladimir Volokhonsky
Nov 14 '18 at 10:46

I have not run your code but I see a reference to qt.vect.19 which I believe is the step the OP wants to avoid. Not sure about my understanding though...

– vaettchen
Nov 14 '18 at 11:00

@vaettchen yes I want to avoid guessing the n = 19 part

– Crop89
Nov 15 '18 at 11:31

add a comment |

draft saved

draft discarded

Thanks for contributing an answer to Stack Overflow!

Please be sure to answer the question. Provide details and share your research!

But avoid …

Asking for help, clarification, or responding to other answers.

Making statements based on opinion; back them up with references or personal experience.

To learn more, see our tips on writing great answers.

draft saved

draft discarded

Post as a guest

Name

Required, but never shown

Name

Required, but never shown

Name

Required, but never shown

This page is only for reference, If you need detailed information, please check here

搜尋此網誌

Pfthb