Selecting rows from data based on unique conditions










1















dat <- structure(list(doy = c(274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 
295, 296, 297, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, 312, 313, 314, 315,
316, 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336,
337, 338, 339, 340, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, 356, 357,
358, 359, 360, 361, 362, 363, 364, 365),
no.plant = c(0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1),
cum.value = c(0, 1.34973713866726e-05, 0.000107973870218436, 0.000364365089792096, 0.000863464598244823, 0.00168576031682954,
0.00291120609231443, 0.00291120609231443, 0.0046189294134239, 0.00688687680728461, 0.00688687680728461,
0.00979139917551386, 0.0134067801825104, 0.0178047117788614, 0.0230537220148601, 0.0292185614529241,
0.0292185614529241, 0.0363595556987137, 0.0363595556987137, 0.0445319328097977, 0.0537851355741434,
0.0641621298405947, 0.0756987211882645, 0.0884228931969177, 0.102354181379628, 0.102354181379628, 0.117503097415405,
0.133870618627253, 0.151447757647197, 0.151447757647197, 0.170215226855778, 0.170215226855778,
0.190143211447851, 0.211191263836225, 0.233308330547831, 0.256432920794094, 0.280493423522773, 0.305408577012532,
0.331088091999851, 0.357433425992349, 0.384338702900249, 0.411691768499651, 0.439375368630229, 0.467268433537531,
0.495247448513112, 0.523187888081939, 0.550965688550059, 0.578458731861707, 0.605548312515632, 0.632120558828558,
0.658067780159839, 0.683289712849355, 0.707694639565394, 0.731200359474982, 0.753734990069534, 0.753734990069534,
0.753734990069534, 0.753734990069534, 0.775237585508182, 0.795658560857758, 0.814959916467899, 0.833115261761304,
0.850109642771837, 0.865939182653005, 0.865939182653005, 0.880610548937487, 0.894140265397845, 0.906553889802375,
0.917885081566473, 0.928174585188328, 0.93746915638157, 0.945820457966355, 0.95328395187962, 0.959917812174526,
0.965781881688334, 0.970936692282333, 0.975442565331355, 0.97935880560985, 0.97935880560985, 0.982742998037354,
0.985650413056059, 0.988133522855331, 0.990241627354782, 0.992020585910824, 0.993512648199701, 0.994756375705273,
0.995786643728671, 0.996634712840931, 0.997328358197721, 0.997892045086969, 0.998347139430071, 0.998347139430071)),
class = "data.frame", row.names = c(NA, -92L))


delta <- 0.04991736


I need to select those doy where the cum.value reaches 1*delta, 2*delta, 3*delta, 4*delta ....n*delta and also
include last doy which is 365 if n*delta does not reach the doy 365.



At the moment I am selecting n by trial and error which is by first creating a sequencnce of 1:n. For e.g 1:19:



qt.vec.19 <- 1:19 * delta
max(qt.vec.19) >= max(dat$cum.value)
FALSE


If I change qt.vec to 1:20



qt.vec.20 <- 1:20 * delta
max(qt.vec.20) >= max(dat$cum.value)
TRUE


This means that I can do 1*delta, 2*delta....19*delta and then also select the last doy.



sample.dat <- dat %>% dplyr::slice(unique(c(which.max(cum.value > qt.vec.19[1]), 
which.max(cum.value > qt.vec.19[2]),
which.max(cum.value > qt.vec.19[3]),
which.max(cum.value > qt.vec.19[4]),
which.max(cum.value > qt.vec.19[5]),
which.max(cum.value > qt.vec.19[6]),
which.max(cum.value > qt.vec.19[7]),
which.max(cum.value > qt.vec.19[8]),
which.max(cum.value > qt.vec.19[9]),
which.max(cum.value > qt.vec.19[10]),
which.max(cum.value > qt.vec.19[11]),
which.max(cum.value > qt.vec.19[12]),
which.max(cum.value > qt.vec.19[13]),
which.max(cum.value > qt.vec.19[14]),
which.max(cum.value > qt.vec.19[15]),
which.max(cum.value > qt.vec.19[16]),
which.max(cum.value > qt.vec.19[17]),
which.max(cum.value > qt.vec.19[18]),
which.max(cum.value > qt.vec.19[19]))))


last.doy <- dat %>% dplyr::filter(doy == 365)

all.doy <- as.data.frame(rbind(sample.dat, last.doy))

doy no.plant cum.value
294 0 0.05378514
298 0 0.10235418
302 0 0.15144776
307 0 0.21119126
309 0 0.25643292
311 0 0.30540858
313 0 0.35743343
315 0 0.41169177
317 0 0.46726843
319 0 0.52318789
320 0 0.55096569
322 0 0.60554831
324 0 0.65806778
326 0 0.70769464
328 0 0.75373499
334 0 0.81495992
336 0 0.85010964
341 0 0.90655389
346 0 0.95328395
365 1 0.99834714


I was wondering if there's any better way to do this like selecting what my n value should be or avoid the long slice(unique(... part?










share|improve this question






















  • Any reason to shun Base R? You only want dplyr or data.table?

    – vaettchen
    Nov 13 '18 at 12:59











  • No particular reason. I could also use base R

    – Crop89
    Nov 13 '18 at 13:10















1















dat <- structure(list(doy = c(274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 
295, 296, 297, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, 312, 313, 314, 315,
316, 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336,
337, 338, 339, 340, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, 356, 357,
358, 359, 360, 361, 362, 363, 364, 365),
no.plant = c(0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1),
cum.value = c(0, 1.34973713866726e-05, 0.000107973870218436, 0.000364365089792096, 0.000863464598244823, 0.00168576031682954,
0.00291120609231443, 0.00291120609231443, 0.0046189294134239, 0.00688687680728461, 0.00688687680728461,
0.00979139917551386, 0.0134067801825104, 0.0178047117788614, 0.0230537220148601, 0.0292185614529241,
0.0292185614529241, 0.0363595556987137, 0.0363595556987137, 0.0445319328097977, 0.0537851355741434,
0.0641621298405947, 0.0756987211882645, 0.0884228931969177, 0.102354181379628, 0.102354181379628, 0.117503097415405,
0.133870618627253, 0.151447757647197, 0.151447757647197, 0.170215226855778, 0.170215226855778,
0.190143211447851, 0.211191263836225, 0.233308330547831, 0.256432920794094, 0.280493423522773, 0.305408577012532,
0.331088091999851, 0.357433425992349, 0.384338702900249, 0.411691768499651, 0.439375368630229, 0.467268433537531,
0.495247448513112, 0.523187888081939, 0.550965688550059, 0.578458731861707, 0.605548312515632, 0.632120558828558,
0.658067780159839, 0.683289712849355, 0.707694639565394, 0.731200359474982, 0.753734990069534, 0.753734990069534,
0.753734990069534, 0.753734990069534, 0.775237585508182, 0.795658560857758, 0.814959916467899, 0.833115261761304,
0.850109642771837, 0.865939182653005, 0.865939182653005, 0.880610548937487, 0.894140265397845, 0.906553889802375,
0.917885081566473, 0.928174585188328, 0.93746915638157, 0.945820457966355, 0.95328395187962, 0.959917812174526,
0.965781881688334, 0.970936692282333, 0.975442565331355, 0.97935880560985, 0.97935880560985, 0.982742998037354,
0.985650413056059, 0.988133522855331, 0.990241627354782, 0.992020585910824, 0.993512648199701, 0.994756375705273,
0.995786643728671, 0.996634712840931, 0.997328358197721, 0.997892045086969, 0.998347139430071, 0.998347139430071)),
class = "data.frame", row.names = c(NA, -92L))


delta <- 0.04991736


I need to select those doy where the cum.value reaches 1*delta, 2*delta, 3*delta, 4*delta ....n*delta and also
include last doy which is 365 if n*delta does not reach the doy 365.



At the moment I am selecting n by trial and error which is by first creating a sequencnce of 1:n. For e.g 1:19:



qt.vec.19 <- 1:19 * delta
max(qt.vec.19) >= max(dat$cum.value)
FALSE


If I change qt.vec to 1:20



qt.vec.20 <- 1:20 * delta
max(qt.vec.20) >= max(dat$cum.value)
TRUE


This means that I can do 1*delta, 2*delta....19*delta and then also select the last doy.



sample.dat <- dat %>% dplyr::slice(unique(c(which.max(cum.value > qt.vec.19[1]), 
which.max(cum.value > qt.vec.19[2]),
which.max(cum.value > qt.vec.19[3]),
which.max(cum.value > qt.vec.19[4]),
which.max(cum.value > qt.vec.19[5]),
which.max(cum.value > qt.vec.19[6]),
which.max(cum.value > qt.vec.19[7]),
which.max(cum.value > qt.vec.19[8]),
which.max(cum.value > qt.vec.19[9]),
which.max(cum.value > qt.vec.19[10]),
which.max(cum.value > qt.vec.19[11]),
which.max(cum.value > qt.vec.19[12]),
which.max(cum.value > qt.vec.19[13]),
which.max(cum.value > qt.vec.19[14]),
which.max(cum.value > qt.vec.19[15]),
which.max(cum.value > qt.vec.19[16]),
which.max(cum.value > qt.vec.19[17]),
which.max(cum.value > qt.vec.19[18]),
which.max(cum.value > qt.vec.19[19]))))


last.doy <- dat %>% dplyr::filter(doy == 365)

all.doy <- as.data.frame(rbind(sample.dat, last.doy))

doy no.plant cum.value
294 0 0.05378514
298 0 0.10235418
302 0 0.15144776
307 0 0.21119126
309 0 0.25643292
311 0 0.30540858
313 0 0.35743343
315 0 0.41169177
317 0 0.46726843
319 0 0.52318789
320 0 0.55096569
322 0 0.60554831
324 0 0.65806778
326 0 0.70769464
328 0 0.75373499
334 0 0.81495992
336 0 0.85010964
341 0 0.90655389
346 0 0.95328395
365 1 0.99834714


I was wondering if there's any better way to do this like selecting what my n value should be or avoid the long slice(unique(... part?










share|improve this question






















  • Any reason to shun Base R? You only want dplyr or data.table?

    – vaettchen
    Nov 13 '18 at 12:59











  • No particular reason. I could also use base R

    – Crop89
    Nov 13 '18 at 13:10













1












1








1








dat <- structure(list(doy = c(274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 
295, 296, 297, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, 312, 313, 314, 315,
316, 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336,
337, 338, 339, 340, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, 356, 357,
358, 359, 360, 361, 362, 363, 364, 365),
no.plant = c(0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1),
cum.value = c(0, 1.34973713866726e-05, 0.000107973870218436, 0.000364365089792096, 0.000863464598244823, 0.00168576031682954,
0.00291120609231443, 0.00291120609231443, 0.0046189294134239, 0.00688687680728461, 0.00688687680728461,
0.00979139917551386, 0.0134067801825104, 0.0178047117788614, 0.0230537220148601, 0.0292185614529241,
0.0292185614529241, 0.0363595556987137, 0.0363595556987137, 0.0445319328097977, 0.0537851355741434,
0.0641621298405947, 0.0756987211882645, 0.0884228931969177, 0.102354181379628, 0.102354181379628, 0.117503097415405,
0.133870618627253, 0.151447757647197, 0.151447757647197, 0.170215226855778, 0.170215226855778,
0.190143211447851, 0.211191263836225, 0.233308330547831, 0.256432920794094, 0.280493423522773, 0.305408577012532,
0.331088091999851, 0.357433425992349, 0.384338702900249, 0.411691768499651, 0.439375368630229, 0.467268433537531,
0.495247448513112, 0.523187888081939, 0.550965688550059, 0.578458731861707, 0.605548312515632, 0.632120558828558,
0.658067780159839, 0.683289712849355, 0.707694639565394, 0.731200359474982, 0.753734990069534, 0.753734990069534,
0.753734990069534, 0.753734990069534, 0.775237585508182, 0.795658560857758, 0.814959916467899, 0.833115261761304,
0.850109642771837, 0.865939182653005, 0.865939182653005, 0.880610548937487, 0.894140265397845, 0.906553889802375,
0.917885081566473, 0.928174585188328, 0.93746915638157, 0.945820457966355, 0.95328395187962, 0.959917812174526,
0.965781881688334, 0.970936692282333, 0.975442565331355, 0.97935880560985, 0.97935880560985, 0.982742998037354,
0.985650413056059, 0.988133522855331, 0.990241627354782, 0.992020585910824, 0.993512648199701, 0.994756375705273,
0.995786643728671, 0.996634712840931, 0.997328358197721, 0.997892045086969, 0.998347139430071, 0.998347139430071)),
class = "data.frame", row.names = c(NA, -92L))


delta <- 0.04991736


I need to select those doy where the cum.value reaches 1*delta, 2*delta, 3*delta, 4*delta ....n*delta and also
include last doy which is 365 if n*delta does not reach the doy 365.



At the moment I am selecting n by trial and error which is by first creating a sequencnce of 1:n. For e.g 1:19:



qt.vec.19 <- 1:19 * delta
max(qt.vec.19) >= max(dat$cum.value)
FALSE


If I change qt.vec to 1:20



qt.vec.20 <- 1:20 * delta
max(qt.vec.20) >= max(dat$cum.value)
TRUE


This means that I can do 1*delta, 2*delta....19*delta and then also select the last doy.



sample.dat <- dat %>% dplyr::slice(unique(c(which.max(cum.value > qt.vec.19[1]), 
which.max(cum.value > qt.vec.19[2]),
which.max(cum.value > qt.vec.19[3]),
which.max(cum.value > qt.vec.19[4]),
which.max(cum.value > qt.vec.19[5]),
which.max(cum.value > qt.vec.19[6]),
which.max(cum.value > qt.vec.19[7]),
which.max(cum.value > qt.vec.19[8]),
which.max(cum.value > qt.vec.19[9]),
which.max(cum.value > qt.vec.19[10]),
which.max(cum.value > qt.vec.19[11]),
which.max(cum.value > qt.vec.19[12]),
which.max(cum.value > qt.vec.19[13]),
which.max(cum.value > qt.vec.19[14]),
which.max(cum.value > qt.vec.19[15]),
which.max(cum.value > qt.vec.19[16]),
which.max(cum.value > qt.vec.19[17]),
which.max(cum.value > qt.vec.19[18]),
which.max(cum.value > qt.vec.19[19]))))


last.doy <- dat %>% dplyr::filter(doy == 365)

all.doy <- as.data.frame(rbind(sample.dat, last.doy))

doy no.plant cum.value
294 0 0.05378514
298 0 0.10235418
302 0 0.15144776
307 0 0.21119126
309 0 0.25643292
311 0 0.30540858
313 0 0.35743343
315 0 0.41169177
317 0 0.46726843
319 0 0.52318789
320 0 0.55096569
322 0 0.60554831
324 0 0.65806778
326 0 0.70769464
328 0 0.75373499
334 0 0.81495992
336 0 0.85010964
341 0 0.90655389
346 0 0.95328395
365 1 0.99834714


I was wondering if there's any better way to do this like selecting what my n value should be or avoid the long slice(unique(... part?










share|improve this question














dat <- structure(list(doy = c(274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 
295, 296, 297, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, 312, 313, 314, 315,
316, 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336,
337, 338, 339, 340, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, 356, 357,
358, 359, 360, 361, 362, 363, 364, 365),
no.plant = c(0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1),
cum.value = c(0, 1.34973713866726e-05, 0.000107973870218436, 0.000364365089792096, 0.000863464598244823, 0.00168576031682954,
0.00291120609231443, 0.00291120609231443, 0.0046189294134239, 0.00688687680728461, 0.00688687680728461,
0.00979139917551386, 0.0134067801825104, 0.0178047117788614, 0.0230537220148601, 0.0292185614529241,
0.0292185614529241, 0.0363595556987137, 0.0363595556987137, 0.0445319328097977, 0.0537851355741434,
0.0641621298405947, 0.0756987211882645, 0.0884228931969177, 0.102354181379628, 0.102354181379628, 0.117503097415405,
0.133870618627253, 0.151447757647197, 0.151447757647197, 0.170215226855778, 0.170215226855778,
0.190143211447851, 0.211191263836225, 0.233308330547831, 0.256432920794094, 0.280493423522773, 0.305408577012532,
0.331088091999851, 0.357433425992349, 0.384338702900249, 0.411691768499651, 0.439375368630229, 0.467268433537531,
0.495247448513112, 0.523187888081939, 0.550965688550059, 0.578458731861707, 0.605548312515632, 0.632120558828558,
0.658067780159839, 0.683289712849355, 0.707694639565394, 0.731200359474982, 0.753734990069534, 0.753734990069534,
0.753734990069534, 0.753734990069534, 0.775237585508182, 0.795658560857758, 0.814959916467899, 0.833115261761304,
0.850109642771837, 0.865939182653005, 0.865939182653005, 0.880610548937487, 0.894140265397845, 0.906553889802375,
0.917885081566473, 0.928174585188328, 0.93746915638157, 0.945820457966355, 0.95328395187962, 0.959917812174526,
0.965781881688334, 0.970936692282333, 0.975442565331355, 0.97935880560985, 0.97935880560985, 0.982742998037354,
0.985650413056059, 0.988133522855331, 0.990241627354782, 0.992020585910824, 0.993512648199701, 0.994756375705273,
0.995786643728671, 0.996634712840931, 0.997328358197721, 0.997892045086969, 0.998347139430071, 0.998347139430071)),
class = "data.frame", row.names = c(NA, -92L))


delta <- 0.04991736


I need to select those doy where the cum.value reaches 1*delta, 2*delta, 3*delta, 4*delta ....n*delta and also
include last doy which is 365 if n*delta does not reach the doy 365.



At the moment I am selecting n by trial and error which is by first creating a sequencnce of 1:n. For e.g 1:19:



qt.vec.19 <- 1:19 * delta
max(qt.vec.19) >= max(dat$cum.value)
FALSE


If I change qt.vec to 1:20



qt.vec.20 <- 1:20 * delta
max(qt.vec.20) >= max(dat$cum.value)
TRUE


This means that I can do 1*delta, 2*delta....19*delta and then also select the last doy.



sample.dat <- dat %>% dplyr::slice(unique(c(which.max(cum.value > qt.vec.19[1]), 
which.max(cum.value > qt.vec.19[2]),
which.max(cum.value > qt.vec.19[3]),
which.max(cum.value > qt.vec.19[4]),
which.max(cum.value > qt.vec.19[5]),
which.max(cum.value > qt.vec.19[6]),
which.max(cum.value > qt.vec.19[7]),
which.max(cum.value > qt.vec.19[8]),
which.max(cum.value > qt.vec.19[9]),
which.max(cum.value > qt.vec.19[10]),
which.max(cum.value > qt.vec.19[11]),
which.max(cum.value > qt.vec.19[12]),
which.max(cum.value > qt.vec.19[13]),
which.max(cum.value > qt.vec.19[14]),
which.max(cum.value > qt.vec.19[15]),
which.max(cum.value > qt.vec.19[16]),
which.max(cum.value > qt.vec.19[17]),
which.max(cum.value > qt.vec.19[18]),
which.max(cum.value > qt.vec.19[19]))))


last.doy <- dat %>% dplyr::filter(doy == 365)

all.doy <- as.data.frame(rbind(sample.dat, last.doy))

doy no.plant cum.value
294 0 0.05378514
298 0 0.10235418
302 0 0.15144776
307 0 0.21119126
309 0 0.25643292
311 0 0.30540858
313 0 0.35743343
315 0 0.41169177
317 0 0.46726843
319 0 0.52318789
320 0 0.55096569
322 0 0.60554831
324 0 0.65806778
326 0 0.70769464
328 0 0.75373499
334 0 0.81495992
336 0 0.85010964
341 0 0.90655389
346 0 0.95328395
365 1 0.99834714


I was wondering if there's any better way to do this like selecting what my n value should be or avoid the long slice(unique(... part?







r dplyr data.table






share|improve this question













share|improve this question











share|improve this question




share|improve this question










asked Nov 13 '18 at 11:55









Crop89Crop89

98711122




98711122












  • Any reason to shun Base R? You only want dplyr or data.table?

    – vaettchen
    Nov 13 '18 at 12:59











  • No particular reason. I could also use base R

    – Crop89
    Nov 13 '18 at 13:10

















  • Any reason to shun Base R? You only want dplyr or data.table?

    – vaettchen
    Nov 13 '18 at 12:59











  • No particular reason. I could also use base R

    – Crop89
    Nov 13 '18 at 13:10
















Any reason to shun Base R? You only want dplyr or data.table?

– vaettchen
Nov 13 '18 at 12:59





Any reason to shun Base R? You only want dplyr or data.table?

– vaettchen
Nov 13 '18 at 12:59













No particular reason. I could also use base R

– Crop89
Nov 13 '18 at 13:10





No particular reason. I could also use base R

– Crop89
Nov 13 '18 at 13:10












2 Answers
2






active

oldest

votes


















1














A matter of taste and context and you read a lot about "loops are frowned upon in R" - but they deliver results and are easy to read, and they are Base R - no extra packages needed or new syntax to learn:



options( scipen = 10, digits = 15 ) # display all digits
dat <- read.csv( "crop89.csv" ) # load your data from a file
delta <- 0.04991736 # selected threshold
n <- 1 # initiate multiplier variable
all.doy <- dat[ 1, ] # initiate receiving data.frame

for( i in 1:length( dat$doy ) ) # loop through dat rows
if( dat[ i, "cum.value"] >= n * delta ) # as soon as threshold is passed
all.doy[ n, ] <- dat[ i, ] # write the line to the target data.frame
n <- n + 1 # increment multiplier


all.doy[ n, ] <- dat[ i, ] # add the last row anyway

all.doy
> all.doy
doy no.plant cum.value
1 294 0 0.0537851355741434
25 298 0 0.1023541813796280
29 302 0 0.1514477576471970
34 307 0 0.2111912638362250
36 309 0 0.2564329207940940
38 311 0 0.3054085770125320
40 313 0 0.3574334259923490
42 315 0 0.4116917684996510
44 317 0 0.4672684335375310
46 319 0 0.5231878880819389
47 320 0 0.5509656885500590
49 322 0 0.6055483125156320
51 324 0 0.6580677801598390
53 326 0 0.7076946395653940
55 328 0 0.7537349900695340
61 334 0 0.8149599164678990
63 336 0 0.8501096427718370
68 341 0 0.9065538898023749
73 346 0 0.9532839518796200
92 365 1 0.9983471394300710





share|improve this answer
































    0














    The main point is the cut function here:



    library(data.table)
    DT<-as.data.table(dat)
    DT[,group:=as.numeric(cut(cum.value,c(-Inf,qt.vec.19,Inf),ordered_result = T))-1]
    DT[,position:=frank(cum.value,ties.method = "first" ),by=group]
    DT<-DT[position==1 & group>0]
    DT[,position:=NULL]
    DT[,group:=NULL]
    if (max(DT$cum.value)!=max(dat$cum.value)) DT<-rbind(DT,dat[dat$doy==max(dat$doy),])





    share|improve this answer


















    • 1





      From my understanding, the OP wants to avoid guessing the n=19 so cut does not deliver the best result here as you need to know the value beforehand.

      – vaettchen
      Nov 14 '18 at 3:09












    • I don't see any problem here, n<-floor(max(dat$cum.value)/delta)...

      – Vladimir Volokhonsky
      Nov 14 '18 at 10:46











    • I have not run your code but I see a reference to qt.vect.19 which I believe is the step the OP wants to avoid. Not sure about my understanding though...

      – vaettchen
      Nov 14 '18 at 11:00











    • @vaettchen yes I want to avoid guessing the n = 19 part

      – Crop89
      Nov 15 '18 at 11:31










    Your Answer






    StackExchange.ifUsing("editor", function ()
    StackExchange.using("externalEditor", function ()
    StackExchange.using("snippets", function ()
    StackExchange.snippets.init();
    );
    );
    , "code-snippets");

    StackExchange.ready(function()
    var channelOptions =
    tags: "".split(" "),
    id: "1"
    ;
    initTagRenderer("".split(" "), "".split(" "), channelOptions);

    StackExchange.using("externalEditor", function()
    // Have to fire editor after snippets, if snippets enabled
    if (StackExchange.settings.snippets.snippetsEnabled)
    StackExchange.using("snippets", function()
    createEditor();
    );

    else
    createEditor();

    );

    function createEditor()
    StackExchange.prepareEditor(
    heartbeatType: 'answer',
    autoActivateHeartbeat: false,
    convertImagesToLinks: true,
    noModals: true,
    showLowRepImageUploadWarning: true,
    reputationToPostImages: 10,
    bindNavPrevention: true,
    postfix: "",
    imageUploader:
    brandingHtml: "Powered by u003ca class="icon-imgur-white" href="https://imgur.com/"u003eu003c/au003e",
    contentPolicyHtml: "User contributions licensed under u003ca href="https://creativecommons.org/licenses/by-sa/3.0/"u003ecc by-sa 3.0 with attribution requiredu003c/au003e u003ca href="https://stackoverflow.com/legal/content-policy"u003e(content policy)u003c/au003e",
    allowUrls: true
    ,
    onDemand: true,
    discardSelector: ".discard-answer"
    ,immediatelyShowMarkdownHelp:true
    );



    );













    draft saved

    draft discarded


















    StackExchange.ready(
    function ()
    StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fstackoverflow.com%2fquestions%2f53280517%2fselecting-rows-from-data-based-on-unique-conditions%23new-answer', 'question_page');

    );

    Post as a guest















    Required, but never shown

























    2 Answers
    2






    active

    oldest

    votes








    2 Answers
    2






    active

    oldest

    votes









    active

    oldest

    votes






    active

    oldest

    votes









    1














    A matter of taste and context and you read a lot about "loops are frowned upon in R" - but they deliver results and are easy to read, and they are Base R - no extra packages needed or new syntax to learn:



    options( scipen = 10, digits = 15 ) # display all digits
    dat <- read.csv( "crop89.csv" ) # load your data from a file
    delta <- 0.04991736 # selected threshold
    n <- 1 # initiate multiplier variable
    all.doy <- dat[ 1, ] # initiate receiving data.frame

    for( i in 1:length( dat$doy ) ) # loop through dat rows
    if( dat[ i, "cum.value"] >= n * delta ) # as soon as threshold is passed
    all.doy[ n, ] <- dat[ i, ] # write the line to the target data.frame
    n <- n + 1 # increment multiplier


    all.doy[ n, ] <- dat[ i, ] # add the last row anyway

    all.doy
    > all.doy
    doy no.plant cum.value
    1 294 0 0.0537851355741434
    25 298 0 0.1023541813796280
    29 302 0 0.1514477576471970
    34 307 0 0.2111912638362250
    36 309 0 0.2564329207940940
    38 311 0 0.3054085770125320
    40 313 0 0.3574334259923490
    42 315 0 0.4116917684996510
    44 317 0 0.4672684335375310
    46 319 0 0.5231878880819389
    47 320 0 0.5509656885500590
    49 322 0 0.6055483125156320
    51 324 0 0.6580677801598390
    53 326 0 0.7076946395653940
    55 328 0 0.7537349900695340
    61 334 0 0.8149599164678990
    63 336 0 0.8501096427718370
    68 341 0 0.9065538898023749
    73 346 0 0.9532839518796200
    92 365 1 0.9983471394300710





    share|improve this answer





























      1














      A matter of taste and context and you read a lot about "loops are frowned upon in R" - but they deliver results and are easy to read, and they are Base R - no extra packages needed or new syntax to learn:



      options( scipen = 10, digits = 15 ) # display all digits
      dat <- read.csv( "crop89.csv" ) # load your data from a file
      delta <- 0.04991736 # selected threshold
      n <- 1 # initiate multiplier variable
      all.doy <- dat[ 1, ] # initiate receiving data.frame

      for( i in 1:length( dat$doy ) ) # loop through dat rows
      if( dat[ i, "cum.value"] >= n * delta ) # as soon as threshold is passed
      all.doy[ n, ] <- dat[ i, ] # write the line to the target data.frame
      n <- n + 1 # increment multiplier


      all.doy[ n, ] <- dat[ i, ] # add the last row anyway

      all.doy
      > all.doy
      doy no.plant cum.value
      1 294 0 0.0537851355741434
      25 298 0 0.1023541813796280
      29 302 0 0.1514477576471970
      34 307 0 0.2111912638362250
      36 309 0 0.2564329207940940
      38 311 0 0.3054085770125320
      40 313 0 0.3574334259923490
      42 315 0 0.4116917684996510
      44 317 0 0.4672684335375310
      46 319 0 0.5231878880819389
      47 320 0 0.5509656885500590
      49 322 0 0.6055483125156320
      51 324 0 0.6580677801598390
      53 326 0 0.7076946395653940
      55 328 0 0.7537349900695340
      61 334 0 0.8149599164678990
      63 336 0 0.8501096427718370
      68 341 0 0.9065538898023749
      73 346 0 0.9532839518796200
      92 365 1 0.9983471394300710





      share|improve this answer



























        1












        1








        1







        A matter of taste and context and you read a lot about "loops are frowned upon in R" - but they deliver results and are easy to read, and they are Base R - no extra packages needed or new syntax to learn:



        options( scipen = 10, digits = 15 ) # display all digits
        dat <- read.csv( "crop89.csv" ) # load your data from a file
        delta <- 0.04991736 # selected threshold
        n <- 1 # initiate multiplier variable
        all.doy <- dat[ 1, ] # initiate receiving data.frame

        for( i in 1:length( dat$doy ) ) # loop through dat rows
        if( dat[ i, "cum.value"] >= n * delta ) # as soon as threshold is passed
        all.doy[ n, ] <- dat[ i, ] # write the line to the target data.frame
        n <- n + 1 # increment multiplier


        all.doy[ n, ] <- dat[ i, ] # add the last row anyway

        all.doy
        > all.doy
        doy no.plant cum.value
        1 294 0 0.0537851355741434
        25 298 0 0.1023541813796280
        29 302 0 0.1514477576471970
        34 307 0 0.2111912638362250
        36 309 0 0.2564329207940940
        38 311 0 0.3054085770125320
        40 313 0 0.3574334259923490
        42 315 0 0.4116917684996510
        44 317 0 0.4672684335375310
        46 319 0 0.5231878880819389
        47 320 0 0.5509656885500590
        49 322 0 0.6055483125156320
        51 324 0 0.6580677801598390
        53 326 0 0.7076946395653940
        55 328 0 0.7537349900695340
        61 334 0 0.8149599164678990
        63 336 0 0.8501096427718370
        68 341 0 0.9065538898023749
        73 346 0 0.9532839518796200
        92 365 1 0.9983471394300710





        share|improve this answer















        A matter of taste and context and you read a lot about "loops are frowned upon in R" - but they deliver results and are easy to read, and they are Base R - no extra packages needed or new syntax to learn:



        options( scipen = 10, digits = 15 ) # display all digits
        dat <- read.csv( "crop89.csv" ) # load your data from a file
        delta <- 0.04991736 # selected threshold
        n <- 1 # initiate multiplier variable
        all.doy <- dat[ 1, ] # initiate receiving data.frame

        for( i in 1:length( dat$doy ) ) # loop through dat rows
        if( dat[ i, "cum.value"] >= n * delta ) # as soon as threshold is passed
        all.doy[ n, ] <- dat[ i, ] # write the line to the target data.frame
        n <- n + 1 # increment multiplier


        all.doy[ n, ] <- dat[ i, ] # add the last row anyway

        all.doy
        > all.doy
        doy no.plant cum.value
        1 294 0 0.0537851355741434
        25 298 0 0.1023541813796280
        29 302 0 0.1514477576471970
        34 307 0 0.2111912638362250
        36 309 0 0.2564329207940940
        38 311 0 0.3054085770125320
        40 313 0 0.3574334259923490
        42 315 0 0.4116917684996510
        44 317 0 0.4672684335375310
        46 319 0 0.5231878880819389
        47 320 0 0.5509656885500590
        49 322 0 0.6055483125156320
        51 324 0 0.6580677801598390
        53 326 0 0.7076946395653940
        55 328 0 0.7537349900695340
        61 334 0 0.8149599164678990
        63 336 0 0.8501096427718370
        68 341 0 0.9065538898023749
        73 346 0 0.9532839518796200
        92 365 1 0.9983471394300710






        share|improve this answer














        share|improve this answer



        share|improve this answer








        edited Nov 15 '18 at 0:12

























        answered Nov 14 '18 at 3:02









        vaettchenvaettchen

        5,2201332




        5,2201332























            0














            The main point is the cut function here:



            library(data.table)
            DT<-as.data.table(dat)
            DT[,group:=as.numeric(cut(cum.value,c(-Inf,qt.vec.19,Inf),ordered_result = T))-1]
            DT[,position:=frank(cum.value,ties.method = "first" ),by=group]
            DT<-DT[position==1 & group>0]
            DT[,position:=NULL]
            DT[,group:=NULL]
            if (max(DT$cum.value)!=max(dat$cum.value)) DT<-rbind(DT,dat[dat$doy==max(dat$doy),])





            share|improve this answer


















            • 1





              From my understanding, the OP wants to avoid guessing the n=19 so cut does not deliver the best result here as you need to know the value beforehand.

              – vaettchen
              Nov 14 '18 at 3:09












            • I don't see any problem here, n<-floor(max(dat$cum.value)/delta)...

              – Vladimir Volokhonsky
              Nov 14 '18 at 10:46











            • I have not run your code but I see a reference to qt.vect.19 which I believe is the step the OP wants to avoid. Not sure about my understanding though...

              – vaettchen
              Nov 14 '18 at 11:00











            • @vaettchen yes I want to avoid guessing the n = 19 part

              – Crop89
              Nov 15 '18 at 11:31















            0














            The main point is the cut function here:



            library(data.table)
            DT<-as.data.table(dat)
            DT[,group:=as.numeric(cut(cum.value,c(-Inf,qt.vec.19,Inf),ordered_result = T))-1]
            DT[,position:=frank(cum.value,ties.method = "first" ),by=group]
            DT<-DT[position==1 & group>0]
            DT[,position:=NULL]
            DT[,group:=NULL]
            if (max(DT$cum.value)!=max(dat$cum.value)) DT<-rbind(DT,dat[dat$doy==max(dat$doy),])





            share|improve this answer


















            • 1





              From my understanding, the OP wants to avoid guessing the n=19 so cut does not deliver the best result here as you need to know the value beforehand.

              – vaettchen
              Nov 14 '18 at 3:09












            • I don't see any problem here, n<-floor(max(dat$cum.value)/delta)...

              – Vladimir Volokhonsky
              Nov 14 '18 at 10:46











            • I have not run your code but I see a reference to qt.vect.19 which I believe is the step the OP wants to avoid. Not sure about my understanding though...

              – vaettchen
              Nov 14 '18 at 11:00











            • @vaettchen yes I want to avoid guessing the n = 19 part

              – Crop89
              Nov 15 '18 at 11:31













            0












            0








            0







            The main point is the cut function here:



            library(data.table)
            DT<-as.data.table(dat)
            DT[,group:=as.numeric(cut(cum.value,c(-Inf,qt.vec.19,Inf),ordered_result = T))-1]
            DT[,position:=frank(cum.value,ties.method = "first" ),by=group]
            DT<-DT[position==1 & group>0]
            DT[,position:=NULL]
            DT[,group:=NULL]
            if (max(DT$cum.value)!=max(dat$cum.value)) DT<-rbind(DT,dat[dat$doy==max(dat$doy),])





            share|improve this answer













            The main point is the cut function here:



            library(data.table)
            DT<-as.data.table(dat)
            DT[,group:=as.numeric(cut(cum.value,c(-Inf,qt.vec.19,Inf),ordered_result = T))-1]
            DT[,position:=frank(cum.value,ties.method = "first" ),by=group]
            DT<-DT[position==1 & group>0]
            DT[,position:=NULL]
            DT[,group:=NULL]
            if (max(DT$cum.value)!=max(dat$cum.value)) DT<-rbind(DT,dat[dat$doy==max(dat$doy),])






            share|improve this answer












            share|improve this answer



            share|improve this answer










            answered Nov 13 '18 at 13:17









            Vladimir VolokhonskyVladimir Volokhonsky

            1206




            1206







            • 1





              From my understanding, the OP wants to avoid guessing the n=19 so cut does not deliver the best result here as you need to know the value beforehand.

              – vaettchen
              Nov 14 '18 at 3:09












            • I don't see any problem here, n<-floor(max(dat$cum.value)/delta)...

              – Vladimir Volokhonsky
              Nov 14 '18 at 10:46











            • I have not run your code but I see a reference to qt.vect.19 which I believe is the step the OP wants to avoid. Not sure about my understanding though...

              – vaettchen
              Nov 14 '18 at 11:00











            • @vaettchen yes I want to avoid guessing the n = 19 part

              – Crop89
              Nov 15 '18 at 11:31












            • 1





              From my understanding, the OP wants to avoid guessing the n=19 so cut does not deliver the best result here as you need to know the value beforehand.

              – vaettchen
              Nov 14 '18 at 3:09












            • I don't see any problem here, n<-floor(max(dat$cum.value)/delta)...

              – Vladimir Volokhonsky
              Nov 14 '18 at 10:46











            • I have not run your code but I see a reference to qt.vect.19 which I believe is the step the OP wants to avoid. Not sure about my understanding though...

              – vaettchen
              Nov 14 '18 at 11:00











            • @vaettchen yes I want to avoid guessing the n = 19 part

              – Crop89
              Nov 15 '18 at 11:31







            1




            1





            From my understanding, the OP wants to avoid guessing the n=19 so cut does not deliver the best result here as you need to know the value beforehand.

            – vaettchen
            Nov 14 '18 at 3:09






            From my understanding, the OP wants to avoid guessing the n=19 so cut does not deliver the best result here as you need to know the value beforehand.

            – vaettchen
            Nov 14 '18 at 3:09














            I don't see any problem here, n<-floor(max(dat$cum.value)/delta)...

            – Vladimir Volokhonsky
            Nov 14 '18 at 10:46





            I don't see any problem here, n<-floor(max(dat$cum.value)/delta)...

            – Vladimir Volokhonsky
            Nov 14 '18 at 10:46













            I have not run your code but I see a reference to qt.vect.19 which I believe is the step the OP wants to avoid. Not sure about my understanding though...

            – vaettchen
            Nov 14 '18 at 11:00





            I have not run your code but I see a reference to qt.vect.19 which I believe is the step the OP wants to avoid. Not sure about my understanding though...

            – vaettchen
            Nov 14 '18 at 11:00













            @vaettchen yes I want to avoid guessing the n = 19 part

            – Crop89
            Nov 15 '18 at 11:31





            @vaettchen yes I want to avoid guessing the n = 19 part

            – Crop89
            Nov 15 '18 at 11:31

















            draft saved

            draft discarded
















































            Thanks for contributing an answer to Stack Overflow!


            • Please be sure to answer the question. Provide details and share your research!

            But avoid


            • Asking for help, clarification, or responding to other answers.

            • Making statements based on opinion; back them up with references or personal experience.

            To learn more, see our tips on writing great answers.




            draft saved


            draft discarded














            StackExchange.ready(
            function ()
            StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fstackoverflow.com%2fquestions%2f53280517%2fselecting-rows-from-data-based-on-unique-conditions%23new-answer', 'question_page');

            );

            Post as a guest















            Required, but never shown





















































            Required, but never shown














            Required, but never shown












            Required, but never shown







            Required, but never shown

































            Required, but never shown














            Required, but never shown












            Required, but never shown







            Required, but never shown







            Popular posts from this blog

            Use pre created SQLite database for Android project in kotlin

            Darth Vader #20

            Ondo