Selecting rows from data based on unique conditions
dat <- structure(list(doy = c(274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294,
295, 296, 297, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, 312, 313, 314, 315,
316, 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336,
337, 338, 339, 340, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, 356, 357,
358, 359, 360, 361, 362, 363, 364, 365),
no.plant = c(0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1),
cum.value = c(0, 1.34973713866726e-05, 0.000107973870218436, 0.000364365089792096, 0.000863464598244823, 0.00168576031682954,
0.00291120609231443, 0.00291120609231443, 0.0046189294134239, 0.00688687680728461, 0.00688687680728461,
0.00979139917551386, 0.0134067801825104, 0.0178047117788614, 0.0230537220148601, 0.0292185614529241,
0.0292185614529241, 0.0363595556987137, 0.0363595556987137, 0.0445319328097977, 0.0537851355741434,
0.0641621298405947, 0.0756987211882645, 0.0884228931969177, 0.102354181379628, 0.102354181379628, 0.117503097415405,
0.133870618627253, 0.151447757647197, 0.151447757647197, 0.170215226855778, 0.170215226855778,
0.190143211447851, 0.211191263836225, 0.233308330547831, 0.256432920794094, 0.280493423522773, 0.305408577012532,
0.331088091999851, 0.357433425992349, 0.384338702900249, 0.411691768499651, 0.439375368630229, 0.467268433537531,
0.495247448513112, 0.523187888081939, 0.550965688550059, 0.578458731861707, 0.605548312515632, 0.632120558828558,
0.658067780159839, 0.683289712849355, 0.707694639565394, 0.731200359474982, 0.753734990069534, 0.753734990069534,
0.753734990069534, 0.753734990069534, 0.775237585508182, 0.795658560857758, 0.814959916467899, 0.833115261761304,
0.850109642771837, 0.865939182653005, 0.865939182653005, 0.880610548937487, 0.894140265397845, 0.906553889802375,
0.917885081566473, 0.928174585188328, 0.93746915638157, 0.945820457966355, 0.95328395187962, 0.959917812174526,
0.965781881688334, 0.970936692282333, 0.975442565331355, 0.97935880560985, 0.97935880560985, 0.982742998037354,
0.985650413056059, 0.988133522855331, 0.990241627354782, 0.992020585910824, 0.993512648199701, 0.994756375705273,
0.995786643728671, 0.996634712840931, 0.997328358197721, 0.997892045086969, 0.998347139430071, 0.998347139430071)),
class = "data.frame", row.names = c(NA, -92L))
delta <- 0.04991736
I need to select those doy
where the cum.value
reaches 1*delta
, 2*delta
, 3*delta
, 4*delta
....n*delta
and also
include last doy which is 365
if n*delta
does not reach the doy 365.
At the moment I am selecting n
by trial and error which is by first creating a sequencnce of 1:n
. For e.g 1:19:
qt.vec.19 <- 1:19 * delta
max(qt.vec.19) >= max(dat$cum.value)
FALSE
If I change qt.vec
to 1:20
qt.vec.20 <- 1:20 * delta
max(qt.vec.20) >= max(dat$cum.value)
TRUE
This means that I can do 1*delta, 2*delta....19*delta and then also select the last doy.
sample.dat <- dat %>% dplyr::slice(unique(c(which.max(cum.value > qt.vec.19[1]),
which.max(cum.value > qt.vec.19[2]),
which.max(cum.value > qt.vec.19[3]),
which.max(cum.value > qt.vec.19[4]),
which.max(cum.value > qt.vec.19[5]),
which.max(cum.value > qt.vec.19[6]),
which.max(cum.value > qt.vec.19[7]),
which.max(cum.value > qt.vec.19[8]),
which.max(cum.value > qt.vec.19[9]),
which.max(cum.value > qt.vec.19[10]),
which.max(cum.value > qt.vec.19[11]),
which.max(cum.value > qt.vec.19[12]),
which.max(cum.value > qt.vec.19[13]),
which.max(cum.value > qt.vec.19[14]),
which.max(cum.value > qt.vec.19[15]),
which.max(cum.value > qt.vec.19[16]),
which.max(cum.value > qt.vec.19[17]),
which.max(cum.value > qt.vec.19[18]),
which.max(cum.value > qt.vec.19[19]))))
last.doy <- dat %>% dplyr::filter(doy == 365)
all.doy <- as.data.frame(rbind(sample.dat, last.doy))
doy no.plant cum.value
294 0 0.05378514
298 0 0.10235418
302 0 0.15144776
307 0 0.21119126
309 0 0.25643292
311 0 0.30540858
313 0 0.35743343
315 0 0.41169177
317 0 0.46726843
319 0 0.52318789
320 0 0.55096569
322 0 0.60554831
324 0 0.65806778
326 0 0.70769464
328 0 0.75373499
334 0 0.81495992
336 0 0.85010964
341 0 0.90655389
346 0 0.95328395
365 1 0.99834714
I was wondering if there's any better way to do this like selecting what my n
value should be or avoid the long slice(unique(...
part?
r dplyr data.table
add a comment |
dat <- structure(list(doy = c(274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294,
295, 296, 297, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, 312, 313, 314, 315,
316, 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336,
337, 338, 339, 340, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, 356, 357,
358, 359, 360, 361, 362, 363, 364, 365),
no.plant = c(0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1),
cum.value = c(0, 1.34973713866726e-05, 0.000107973870218436, 0.000364365089792096, 0.000863464598244823, 0.00168576031682954,
0.00291120609231443, 0.00291120609231443, 0.0046189294134239, 0.00688687680728461, 0.00688687680728461,
0.00979139917551386, 0.0134067801825104, 0.0178047117788614, 0.0230537220148601, 0.0292185614529241,
0.0292185614529241, 0.0363595556987137, 0.0363595556987137, 0.0445319328097977, 0.0537851355741434,
0.0641621298405947, 0.0756987211882645, 0.0884228931969177, 0.102354181379628, 0.102354181379628, 0.117503097415405,
0.133870618627253, 0.151447757647197, 0.151447757647197, 0.170215226855778, 0.170215226855778,
0.190143211447851, 0.211191263836225, 0.233308330547831, 0.256432920794094, 0.280493423522773, 0.305408577012532,
0.331088091999851, 0.357433425992349, 0.384338702900249, 0.411691768499651, 0.439375368630229, 0.467268433537531,
0.495247448513112, 0.523187888081939, 0.550965688550059, 0.578458731861707, 0.605548312515632, 0.632120558828558,
0.658067780159839, 0.683289712849355, 0.707694639565394, 0.731200359474982, 0.753734990069534, 0.753734990069534,
0.753734990069534, 0.753734990069534, 0.775237585508182, 0.795658560857758, 0.814959916467899, 0.833115261761304,
0.850109642771837, 0.865939182653005, 0.865939182653005, 0.880610548937487, 0.894140265397845, 0.906553889802375,
0.917885081566473, 0.928174585188328, 0.93746915638157, 0.945820457966355, 0.95328395187962, 0.959917812174526,
0.965781881688334, 0.970936692282333, 0.975442565331355, 0.97935880560985, 0.97935880560985, 0.982742998037354,
0.985650413056059, 0.988133522855331, 0.990241627354782, 0.992020585910824, 0.993512648199701, 0.994756375705273,
0.995786643728671, 0.996634712840931, 0.997328358197721, 0.997892045086969, 0.998347139430071, 0.998347139430071)),
class = "data.frame", row.names = c(NA, -92L))
delta <- 0.04991736
I need to select those doy
where the cum.value
reaches 1*delta
, 2*delta
, 3*delta
, 4*delta
....n*delta
and also
include last doy which is 365
if n*delta
does not reach the doy 365.
At the moment I am selecting n
by trial and error which is by first creating a sequencnce of 1:n
. For e.g 1:19:
qt.vec.19 <- 1:19 * delta
max(qt.vec.19) >= max(dat$cum.value)
FALSE
If I change qt.vec
to 1:20
qt.vec.20 <- 1:20 * delta
max(qt.vec.20) >= max(dat$cum.value)
TRUE
This means that I can do 1*delta, 2*delta....19*delta and then also select the last doy.
sample.dat <- dat %>% dplyr::slice(unique(c(which.max(cum.value > qt.vec.19[1]),
which.max(cum.value > qt.vec.19[2]),
which.max(cum.value > qt.vec.19[3]),
which.max(cum.value > qt.vec.19[4]),
which.max(cum.value > qt.vec.19[5]),
which.max(cum.value > qt.vec.19[6]),
which.max(cum.value > qt.vec.19[7]),
which.max(cum.value > qt.vec.19[8]),
which.max(cum.value > qt.vec.19[9]),
which.max(cum.value > qt.vec.19[10]),
which.max(cum.value > qt.vec.19[11]),
which.max(cum.value > qt.vec.19[12]),
which.max(cum.value > qt.vec.19[13]),
which.max(cum.value > qt.vec.19[14]),
which.max(cum.value > qt.vec.19[15]),
which.max(cum.value > qt.vec.19[16]),
which.max(cum.value > qt.vec.19[17]),
which.max(cum.value > qt.vec.19[18]),
which.max(cum.value > qt.vec.19[19]))))
last.doy <- dat %>% dplyr::filter(doy == 365)
all.doy <- as.data.frame(rbind(sample.dat, last.doy))
doy no.plant cum.value
294 0 0.05378514
298 0 0.10235418
302 0 0.15144776
307 0 0.21119126
309 0 0.25643292
311 0 0.30540858
313 0 0.35743343
315 0 0.41169177
317 0 0.46726843
319 0 0.52318789
320 0 0.55096569
322 0 0.60554831
324 0 0.65806778
326 0 0.70769464
328 0 0.75373499
334 0 0.81495992
336 0 0.85010964
341 0 0.90655389
346 0 0.95328395
365 1 0.99834714
I was wondering if there's any better way to do this like selecting what my n
value should be or avoid the long slice(unique(...
part?
r dplyr data.table
Any reason to shun Base R? You only wantdplyr
ordata.table
?
– vaettchen
Nov 13 '18 at 12:59
No particular reason. I could also use base R
– Crop89
Nov 13 '18 at 13:10
add a comment |
dat <- structure(list(doy = c(274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294,
295, 296, 297, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, 312, 313, 314, 315,
316, 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336,
337, 338, 339, 340, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, 356, 357,
358, 359, 360, 361, 362, 363, 364, 365),
no.plant = c(0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1),
cum.value = c(0, 1.34973713866726e-05, 0.000107973870218436, 0.000364365089792096, 0.000863464598244823, 0.00168576031682954,
0.00291120609231443, 0.00291120609231443, 0.0046189294134239, 0.00688687680728461, 0.00688687680728461,
0.00979139917551386, 0.0134067801825104, 0.0178047117788614, 0.0230537220148601, 0.0292185614529241,
0.0292185614529241, 0.0363595556987137, 0.0363595556987137, 0.0445319328097977, 0.0537851355741434,
0.0641621298405947, 0.0756987211882645, 0.0884228931969177, 0.102354181379628, 0.102354181379628, 0.117503097415405,
0.133870618627253, 0.151447757647197, 0.151447757647197, 0.170215226855778, 0.170215226855778,
0.190143211447851, 0.211191263836225, 0.233308330547831, 0.256432920794094, 0.280493423522773, 0.305408577012532,
0.331088091999851, 0.357433425992349, 0.384338702900249, 0.411691768499651, 0.439375368630229, 0.467268433537531,
0.495247448513112, 0.523187888081939, 0.550965688550059, 0.578458731861707, 0.605548312515632, 0.632120558828558,
0.658067780159839, 0.683289712849355, 0.707694639565394, 0.731200359474982, 0.753734990069534, 0.753734990069534,
0.753734990069534, 0.753734990069534, 0.775237585508182, 0.795658560857758, 0.814959916467899, 0.833115261761304,
0.850109642771837, 0.865939182653005, 0.865939182653005, 0.880610548937487, 0.894140265397845, 0.906553889802375,
0.917885081566473, 0.928174585188328, 0.93746915638157, 0.945820457966355, 0.95328395187962, 0.959917812174526,
0.965781881688334, 0.970936692282333, 0.975442565331355, 0.97935880560985, 0.97935880560985, 0.982742998037354,
0.985650413056059, 0.988133522855331, 0.990241627354782, 0.992020585910824, 0.993512648199701, 0.994756375705273,
0.995786643728671, 0.996634712840931, 0.997328358197721, 0.997892045086969, 0.998347139430071, 0.998347139430071)),
class = "data.frame", row.names = c(NA, -92L))
delta <- 0.04991736
I need to select those doy
where the cum.value
reaches 1*delta
, 2*delta
, 3*delta
, 4*delta
....n*delta
and also
include last doy which is 365
if n*delta
does not reach the doy 365.
At the moment I am selecting n
by trial and error which is by first creating a sequencnce of 1:n
. For e.g 1:19:
qt.vec.19 <- 1:19 * delta
max(qt.vec.19) >= max(dat$cum.value)
FALSE
If I change qt.vec
to 1:20
qt.vec.20 <- 1:20 * delta
max(qt.vec.20) >= max(dat$cum.value)
TRUE
This means that I can do 1*delta, 2*delta....19*delta and then also select the last doy.
sample.dat <- dat %>% dplyr::slice(unique(c(which.max(cum.value > qt.vec.19[1]),
which.max(cum.value > qt.vec.19[2]),
which.max(cum.value > qt.vec.19[3]),
which.max(cum.value > qt.vec.19[4]),
which.max(cum.value > qt.vec.19[5]),
which.max(cum.value > qt.vec.19[6]),
which.max(cum.value > qt.vec.19[7]),
which.max(cum.value > qt.vec.19[8]),
which.max(cum.value > qt.vec.19[9]),
which.max(cum.value > qt.vec.19[10]),
which.max(cum.value > qt.vec.19[11]),
which.max(cum.value > qt.vec.19[12]),
which.max(cum.value > qt.vec.19[13]),
which.max(cum.value > qt.vec.19[14]),
which.max(cum.value > qt.vec.19[15]),
which.max(cum.value > qt.vec.19[16]),
which.max(cum.value > qt.vec.19[17]),
which.max(cum.value > qt.vec.19[18]),
which.max(cum.value > qt.vec.19[19]))))
last.doy <- dat %>% dplyr::filter(doy == 365)
all.doy <- as.data.frame(rbind(sample.dat, last.doy))
doy no.plant cum.value
294 0 0.05378514
298 0 0.10235418
302 0 0.15144776
307 0 0.21119126
309 0 0.25643292
311 0 0.30540858
313 0 0.35743343
315 0 0.41169177
317 0 0.46726843
319 0 0.52318789
320 0 0.55096569
322 0 0.60554831
324 0 0.65806778
326 0 0.70769464
328 0 0.75373499
334 0 0.81495992
336 0 0.85010964
341 0 0.90655389
346 0 0.95328395
365 1 0.99834714
I was wondering if there's any better way to do this like selecting what my n
value should be or avoid the long slice(unique(...
part?
r dplyr data.table
dat <- structure(list(doy = c(274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294,
295, 296, 297, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, 312, 313, 314, 315,
316, 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336,
337, 338, 339, 340, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, 356, 357,
358, 359, 360, 361, 362, 363, 364, 365),
no.plant = c(0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1),
cum.value = c(0, 1.34973713866726e-05, 0.000107973870218436, 0.000364365089792096, 0.000863464598244823, 0.00168576031682954,
0.00291120609231443, 0.00291120609231443, 0.0046189294134239, 0.00688687680728461, 0.00688687680728461,
0.00979139917551386, 0.0134067801825104, 0.0178047117788614, 0.0230537220148601, 0.0292185614529241,
0.0292185614529241, 0.0363595556987137, 0.0363595556987137, 0.0445319328097977, 0.0537851355741434,
0.0641621298405947, 0.0756987211882645, 0.0884228931969177, 0.102354181379628, 0.102354181379628, 0.117503097415405,
0.133870618627253, 0.151447757647197, 0.151447757647197, 0.170215226855778, 0.170215226855778,
0.190143211447851, 0.211191263836225, 0.233308330547831, 0.256432920794094, 0.280493423522773, 0.305408577012532,
0.331088091999851, 0.357433425992349, 0.384338702900249, 0.411691768499651, 0.439375368630229, 0.467268433537531,
0.495247448513112, 0.523187888081939, 0.550965688550059, 0.578458731861707, 0.605548312515632, 0.632120558828558,
0.658067780159839, 0.683289712849355, 0.707694639565394, 0.731200359474982, 0.753734990069534, 0.753734990069534,
0.753734990069534, 0.753734990069534, 0.775237585508182, 0.795658560857758, 0.814959916467899, 0.833115261761304,
0.850109642771837, 0.865939182653005, 0.865939182653005, 0.880610548937487, 0.894140265397845, 0.906553889802375,
0.917885081566473, 0.928174585188328, 0.93746915638157, 0.945820457966355, 0.95328395187962, 0.959917812174526,
0.965781881688334, 0.970936692282333, 0.975442565331355, 0.97935880560985, 0.97935880560985, 0.982742998037354,
0.985650413056059, 0.988133522855331, 0.990241627354782, 0.992020585910824, 0.993512648199701, 0.994756375705273,
0.995786643728671, 0.996634712840931, 0.997328358197721, 0.997892045086969, 0.998347139430071, 0.998347139430071)),
class = "data.frame", row.names = c(NA, -92L))
delta <- 0.04991736
I need to select those doy
where the cum.value
reaches 1*delta
, 2*delta
, 3*delta
, 4*delta
....n*delta
and also
include last doy which is 365
if n*delta
does not reach the doy 365.
At the moment I am selecting n
by trial and error which is by first creating a sequencnce of 1:n
. For e.g 1:19:
qt.vec.19 <- 1:19 * delta
max(qt.vec.19) >= max(dat$cum.value)
FALSE
If I change qt.vec
to 1:20
qt.vec.20 <- 1:20 * delta
max(qt.vec.20) >= max(dat$cum.value)
TRUE
This means that I can do 1*delta, 2*delta....19*delta and then also select the last doy.
sample.dat <- dat %>% dplyr::slice(unique(c(which.max(cum.value > qt.vec.19[1]),
which.max(cum.value > qt.vec.19[2]),
which.max(cum.value > qt.vec.19[3]),
which.max(cum.value > qt.vec.19[4]),
which.max(cum.value > qt.vec.19[5]),
which.max(cum.value > qt.vec.19[6]),
which.max(cum.value > qt.vec.19[7]),
which.max(cum.value > qt.vec.19[8]),
which.max(cum.value > qt.vec.19[9]),
which.max(cum.value > qt.vec.19[10]),
which.max(cum.value > qt.vec.19[11]),
which.max(cum.value > qt.vec.19[12]),
which.max(cum.value > qt.vec.19[13]),
which.max(cum.value > qt.vec.19[14]),
which.max(cum.value > qt.vec.19[15]),
which.max(cum.value > qt.vec.19[16]),
which.max(cum.value > qt.vec.19[17]),
which.max(cum.value > qt.vec.19[18]),
which.max(cum.value > qt.vec.19[19]))))
last.doy <- dat %>% dplyr::filter(doy == 365)
all.doy <- as.data.frame(rbind(sample.dat, last.doy))
doy no.plant cum.value
294 0 0.05378514
298 0 0.10235418
302 0 0.15144776
307 0 0.21119126
309 0 0.25643292
311 0 0.30540858
313 0 0.35743343
315 0 0.41169177
317 0 0.46726843
319 0 0.52318789
320 0 0.55096569
322 0 0.60554831
324 0 0.65806778
326 0 0.70769464
328 0 0.75373499
334 0 0.81495992
336 0 0.85010964
341 0 0.90655389
346 0 0.95328395
365 1 0.99834714
I was wondering if there's any better way to do this like selecting what my n
value should be or avoid the long slice(unique(...
part?
r dplyr data.table
r dplyr data.table
asked Nov 13 '18 at 11:55
Crop89Crop89
98711122
98711122
Any reason to shun Base R? You only wantdplyr
ordata.table
?
– vaettchen
Nov 13 '18 at 12:59
No particular reason. I could also use base R
– Crop89
Nov 13 '18 at 13:10
add a comment |
Any reason to shun Base R? You only wantdplyr
ordata.table
?
– vaettchen
Nov 13 '18 at 12:59
No particular reason. I could also use base R
– Crop89
Nov 13 '18 at 13:10
Any reason to shun Base R? You only want
dplyr
or data.table
?– vaettchen
Nov 13 '18 at 12:59
Any reason to shun Base R? You only want
dplyr
or data.table
?– vaettchen
Nov 13 '18 at 12:59
No particular reason. I could also use base R
– Crop89
Nov 13 '18 at 13:10
No particular reason. I could also use base R
– Crop89
Nov 13 '18 at 13:10
add a comment |
2 Answers
2
active
oldest
votes
A matter of taste and context and you read a lot about "loops are frowned upon in R" - but they deliver results and are easy to read, and they are Base R - no extra packages needed or new syntax to learn:
options( scipen = 10, digits = 15 ) # display all digits
dat <- read.csv( "crop89.csv" ) # load your data from a file
delta <- 0.04991736 # selected threshold
n <- 1 # initiate multiplier variable
all.doy <- dat[ 1, ] # initiate receiving data.frame
for( i in 1:length( dat$doy ) ) # loop through dat rows
if( dat[ i, "cum.value"] >= n * delta ) # as soon as threshold is passed
all.doy[ n, ] <- dat[ i, ] # write the line to the target data.frame
n <- n + 1 # increment multiplier
all.doy[ n, ] <- dat[ i, ] # add the last row anyway
all.doy
> all.doy
doy no.plant cum.value
1 294 0 0.0537851355741434
25 298 0 0.1023541813796280
29 302 0 0.1514477576471970
34 307 0 0.2111912638362250
36 309 0 0.2564329207940940
38 311 0 0.3054085770125320
40 313 0 0.3574334259923490
42 315 0 0.4116917684996510
44 317 0 0.4672684335375310
46 319 0 0.5231878880819389
47 320 0 0.5509656885500590
49 322 0 0.6055483125156320
51 324 0 0.6580677801598390
53 326 0 0.7076946395653940
55 328 0 0.7537349900695340
61 334 0 0.8149599164678990
63 336 0 0.8501096427718370
68 341 0 0.9065538898023749
73 346 0 0.9532839518796200
92 365 1 0.9983471394300710
add a comment |
The main point is the cut
function here:
library(data.table)
DT<-as.data.table(dat)
DT[,group:=as.numeric(cut(cum.value,c(-Inf,qt.vec.19,Inf),ordered_result = T))-1]
DT[,position:=frank(cum.value,ties.method = "first" ),by=group]
DT<-DT[position==1 & group>0]
DT[,position:=NULL]
DT[,group:=NULL]
if (max(DT$cum.value)!=max(dat$cum.value)) DT<-rbind(DT,dat[dat$doy==max(dat$doy),])
1
From my understanding, the OP wants to avoid guessing then=19
socut
does not deliver the best result here as you need to know the value beforehand.
– vaettchen
Nov 14 '18 at 3:09
I don't see any problem here,n<-floor(max(dat$cum.value)/delta)
...
– Vladimir Volokhonsky
Nov 14 '18 at 10:46
I have not run your code but I see a reference toqt.vect.19
which I believe is the step the OP wants to avoid. Not sure about my understanding though...
– vaettchen
Nov 14 '18 at 11:00
@vaettchen yes I want to avoid guessing the n = 19 part
– Crop89
Nov 15 '18 at 11:31
add a comment |
Your Answer
StackExchange.ifUsing("editor", function ()
StackExchange.using("externalEditor", function ()
StackExchange.using("snippets", function ()
StackExchange.snippets.init();
);
);
, "code-snippets");
StackExchange.ready(function()
var channelOptions =
tags: "".split(" "),
id: "1"
;
initTagRenderer("".split(" "), "".split(" "), channelOptions);
StackExchange.using("externalEditor", function()
// Have to fire editor after snippets, if snippets enabled
if (StackExchange.settings.snippets.snippetsEnabled)
StackExchange.using("snippets", function()
createEditor();
);
else
createEditor();
);
function createEditor()
StackExchange.prepareEditor(
heartbeatType: 'answer',
autoActivateHeartbeat: false,
convertImagesToLinks: true,
noModals: true,
showLowRepImageUploadWarning: true,
reputationToPostImages: 10,
bindNavPrevention: true,
postfix: "",
imageUploader:
brandingHtml: "Powered by u003ca class="icon-imgur-white" href="https://imgur.com/"u003eu003c/au003e",
contentPolicyHtml: "User contributions licensed under u003ca href="https://creativecommons.org/licenses/by-sa/3.0/"u003ecc by-sa 3.0 with attribution requiredu003c/au003e u003ca href="https://stackoverflow.com/legal/content-policy"u003e(content policy)u003c/au003e",
allowUrls: true
,
onDemand: true,
discardSelector: ".discard-answer"
,immediatelyShowMarkdownHelp:true
);
);
Sign up or log in
StackExchange.ready(function ()
StackExchange.helpers.onClickDraftSave('#login-link');
);
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
StackExchange.ready(
function ()
StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fstackoverflow.com%2fquestions%2f53280517%2fselecting-rows-from-data-based-on-unique-conditions%23new-answer', 'question_page');
);
Post as a guest
Required, but never shown
2 Answers
2
active
oldest
votes
2 Answers
2
active
oldest
votes
active
oldest
votes
active
oldest
votes
A matter of taste and context and you read a lot about "loops are frowned upon in R" - but they deliver results and are easy to read, and they are Base R - no extra packages needed or new syntax to learn:
options( scipen = 10, digits = 15 ) # display all digits
dat <- read.csv( "crop89.csv" ) # load your data from a file
delta <- 0.04991736 # selected threshold
n <- 1 # initiate multiplier variable
all.doy <- dat[ 1, ] # initiate receiving data.frame
for( i in 1:length( dat$doy ) ) # loop through dat rows
if( dat[ i, "cum.value"] >= n * delta ) # as soon as threshold is passed
all.doy[ n, ] <- dat[ i, ] # write the line to the target data.frame
n <- n + 1 # increment multiplier
all.doy[ n, ] <- dat[ i, ] # add the last row anyway
all.doy
> all.doy
doy no.plant cum.value
1 294 0 0.0537851355741434
25 298 0 0.1023541813796280
29 302 0 0.1514477576471970
34 307 0 0.2111912638362250
36 309 0 0.2564329207940940
38 311 0 0.3054085770125320
40 313 0 0.3574334259923490
42 315 0 0.4116917684996510
44 317 0 0.4672684335375310
46 319 0 0.5231878880819389
47 320 0 0.5509656885500590
49 322 0 0.6055483125156320
51 324 0 0.6580677801598390
53 326 0 0.7076946395653940
55 328 0 0.7537349900695340
61 334 0 0.8149599164678990
63 336 0 0.8501096427718370
68 341 0 0.9065538898023749
73 346 0 0.9532839518796200
92 365 1 0.9983471394300710
add a comment |
A matter of taste and context and you read a lot about "loops are frowned upon in R" - but they deliver results and are easy to read, and they are Base R - no extra packages needed or new syntax to learn:
options( scipen = 10, digits = 15 ) # display all digits
dat <- read.csv( "crop89.csv" ) # load your data from a file
delta <- 0.04991736 # selected threshold
n <- 1 # initiate multiplier variable
all.doy <- dat[ 1, ] # initiate receiving data.frame
for( i in 1:length( dat$doy ) ) # loop through dat rows
if( dat[ i, "cum.value"] >= n * delta ) # as soon as threshold is passed
all.doy[ n, ] <- dat[ i, ] # write the line to the target data.frame
n <- n + 1 # increment multiplier
all.doy[ n, ] <- dat[ i, ] # add the last row anyway
all.doy
> all.doy
doy no.plant cum.value
1 294 0 0.0537851355741434
25 298 0 0.1023541813796280
29 302 0 0.1514477576471970
34 307 0 0.2111912638362250
36 309 0 0.2564329207940940
38 311 0 0.3054085770125320
40 313 0 0.3574334259923490
42 315 0 0.4116917684996510
44 317 0 0.4672684335375310
46 319 0 0.5231878880819389
47 320 0 0.5509656885500590
49 322 0 0.6055483125156320
51 324 0 0.6580677801598390
53 326 0 0.7076946395653940
55 328 0 0.7537349900695340
61 334 0 0.8149599164678990
63 336 0 0.8501096427718370
68 341 0 0.9065538898023749
73 346 0 0.9532839518796200
92 365 1 0.9983471394300710
add a comment |
A matter of taste and context and you read a lot about "loops are frowned upon in R" - but they deliver results and are easy to read, and they are Base R - no extra packages needed or new syntax to learn:
options( scipen = 10, digits = 15 ) # display all digits
dat <- read.csv( "crop89.csv" ) # load your data from a file
delta <- 0.04991736 # selected threshold
n <- 1 # initiate multiplier variable
all.doy <- dat[ 1, ] # initiate receiving data.frame
for( i in 1:length( dat$doy ) ) # loop through dat rows
if( dat[ i, "cum.value"] >= n * delta ) # as soon as threshold is passed
all.doy[ n, ] <- dat[ i, ] # write the line to the target data.frame
n <- n + 1 # increment multiplier
all.doy[ n, ] <- dat[ i, ] # add the last row anyway
all.doy
> all.doy
doy no.plant cum.value
1 294 0 0.0537851355741434
25 298 0 0.1023541813796280
29 302 0 0.1514477576471970
34 307 0 0.2111912638362250
36 309 0 0.2564329207940940
38 311 0 0.3054085770125320
40 313 0 0.3574334259923490
42 315 0 0.4116917684996510
44 317 0 0.4672684335375310
46 319 0 0.5231878880819389
47 320 0 0.5509656885500590
49 322 0 0.6055483125156320
51 324 0 0.6580677801598390
53 326 0 0.7076946395653940
55 328 0 0.7537349900695340
61 334 0 0.8149599164678990
63 336 0 0.8501096427718370
68 341 0 0.9065538898023749
73 346 0 0.9532839518796200
92 365 1 0.9983471394300710
A matter of taste and context and you read a lot about "loops are frowned upon in R" - but they deliver results and are easy to read, and they are Base R - no extra packages needed or new syntax to learn:
options( scipen = 10, digits = 15 ) # display all digits
dat <- read.csv( "crop89.csv" ) # load your data from a file
delta <- 0.04991736 # selected threshold
n <- 1 # initiate multiplier variable
all.doy <- dat[ 1, ] # initiate receiving data.frame
for( i in 1:length( dat$doy ) ) # loop through dat rows
if( dat[ i, "cum.value"] >= n * delta ) # as soon as threshold is passed
all.doy[ n, ] <- dat[ i, ] # write the line to the target data.frame
n <- n + 1 # increment multiplier
all.doy[ n, ] <- dat[ i, ] # add the last row anyway
all.doy
> all.doy
doy no.plant cum.value
1 294 0 0.0537851355741434
25 298 0 0.1023541813796280
29 302 0 0.1514477576471970
34 307 0 0.2111912638362250
36 309 0 0.2564329207940940
38 311 0 0.3054085770125320
40 313 0 0.3574334259923490
42 315 0 0.4116917684996510
44 317 0 0.4672684335375310
46 319 0 0.5231878880819389
47 320 0 0.5509656885500590
49 322 0 0.6055483125156320
51 324 0 0.6580677801598390
53 326 0 0.7076946395653940
55 328 0 0.7537349900695340
61 334 0 0.8149599164678990
63 336 0 0.8501096427718370
68 341 0 0.9065538898023749
73 346 0 0.9532839518796200
92 365 1 0.9983471394300710
edited Nov 15 '18 at 0:12
answered Nov 14 '18 at 3:02
vaettchenvaettchen
5,2201332
5,2201332
add a comment |
add a comment |
The main point is the cut
function here:
library(data.table)
DT<-as.data.table(dat)
DT[,group:=as.numeric(cut(cum.value,c(-Inf,qt.vec.19,Inf),ordered_result = T))-1]
DT[,position:=frank(cum.value,ties.method = "first" ),by=group]
DT<-DT[position==1 & group>0]
DT[,position:=NULL]
DT[,group:=NULL]
if (max(DT$cum.value)!=max(dat$cum.value)) DT<-rbind(DT,dat[dat$doy==max(dat$doy),])
1
From my understanding, the OP wants to avoid guessing then=19
socut
does not deliver the best result here as you need to know the value beforehand.
– vaettchen
Nov 14 '18 at 3:09
I don't see any problem here,n<-floor(max(dat$cum.value)/delta)
...
– Vladimir Volokhonsky
Nov 14 '18 at 10:46
I have not run your code but I see a reference toqt.vect.19
which I believe is the step the OP wants to avoid. Not sure about my understanding though...
– vaettchen
Nov 14 '18 at 11:00
@vaettchen yes I want to avoid guessing the n = 19 part
– Crop89
Nov 15 '18 at 11:31
add a comment |
The main point is the cut
function here:
library(data.table)
DT<-as.data.table(dat)
DT[,group:=as.numeric(cut(cum.value,c(-Inf,qt.vec.19,Inf),ordered_result = T))-1]
DT[,position:=frank(cum.value,ties.method = "first" ),by=group]
DT<-DT[position==1 & group>0]
DT[,position:=NULL]
DT[,group:=NULL]
if (max(DT$cum.value)!=max(dat$cum.value)) DT<-rbind(DT,dat[dat$doy==max(dat$doy),])
1
From my understanding, the OP wants to avoid guessing then=19
socut
does not deliver the best result here as you need to know the value beforehand.
– vaettchen
Nov 14 '18 at 3:09
I don't see any problem here,n<-floor(max(dat$cum.value)/delta)
...
– Vladimir Volokhonsky
Nov 14 '18 at 10:46
I have not run your code but I see a reference toqt.vect.19
which I believe is the step the OP wants to avoid. Not sure about my understanding though...
– vaettchen
Nov 14 '18 at 11:00
@vaettchen yes I want to avoid guessing the n = 19 part
– Crop89
Nov 15 '18 at 11:31
add a comment |
The main point is the cut
function here:
library(data.table)
DT<-as.data.table(dat)
DT[,group:=as.numeric(cut(cum.value,c(-Inf,qt.vec.19,Inf),ordered_result = T))-1]
DT[,position:=frank(cum.value,ties.method = "first" ),by=group]
DT<-DT[position==1 & group>0]
DT[,position:=NULL]
DT[,group:=NULL]
if (max(DT$cum.value)!=max(dat$cum.value)) DT<-rbind(DT,dat[dat$doy==max(dat$doy),])
The main point is the cut
function here:
library(data.table)
DT<-as.data.table(dat)
DT[,group:=as.numeric(cut(cum.value,c(-Inf,qt.vec.19,Inf),ordered_result = T))-1]
DT[,position:=frank(cum.value,ties.method = "first" ),by=group]
DT<-DT[position==1 & group>0]
DT[,position:=NULL]
DT[,group:=NULL]
if (max(DT$cum.value)!=max(dat$cum.value)) DT<-rbind(DT,dat[dat$doy==max(dat$doy),])
answered Nov 13 '18 at 13:17
Vladimir VolokhonskyVladimir Volokhonsky
1206
1206
1
From my understanding, the OP wants to avoid guessing then=19
socut
does not deliver the best result here as you need to know the value beforehand.
– vaettchen
Nov 14 '18 at 3:09
I don't see any problem here,n<-floor(max(dat$cum.value)/delta)
...
– Vladimir Volokhonsky
Nov 14 '18 at 10:46
I have not run your code but I see a reference toqt.vect.19
which I believe is the step the OP wants to avoid. Not sure about my understanding though...
– vaettchen
Nov 14 '18 at 11:00
@vaettchen yes I want to avoid guessing the n = 19 part
– Crop89
Nov 15 '18 at 11:31
add a comment |
1
From my understanding, the OP wants to avoid guessing then=19
socut
does not deliver the best result here as you need to know the value beforehand.
– vaettchen
Nov 14 '18 at 3:09
I don't see any problem here,n<-floor(max(dat$cum.value)/delta)
...
– Vladimir Volokhonsky
Nov 14 '18 at 10:46
I have not run your code but I see a reference toqt.vect.19
which I believe is the step the OP wants to avoid. Not sure about my understanding though...
– vaettchen
Nov 14 '18 at 11:00
@vaettchen yes I want to avoid guessing the n = 19 part
– Crop89
Nov 15 '18 at 11:31
1
1
From my understanding, the OP wants to avoid guessing the
n=19
so cut
does not deliver the best result here as you need to know the value beforehand.– vaettchen
Nov 14 '18 at 3:09
From my understanding, the OP wants to avoid guessing the
n=19
so cut
does not deliver the best result here as you need to know the value beforehand.– vaettchen
Nov 14 '18 at 3:09
I don't see any problem here,
n<-floor(max(dat$cum.value)/delta)
...– Vladimir Volokhonsky
Nov 14 '18 at 10:46
I don't see any problem here,
n<-floor(max(dat$cum.value)/delta)
...– Vladimir Volokhonsky
Nov 14 '18 at 10:46
I have not run your code but I see a reference to
qt.vect.19
which I believe is the step the OP wants to avoid. Not sure about my understanding though...– vaettchen
Nov 14 '18 at 11:00
I have not run your code but I see a reference to
qt.vect.19
which I believe is the step the OP wants to avoid. Not sure about my understanding though...– vaettchen
Nov 14 '18 at 11:00
@vaettchen yes I want to avoid guessing the n = 19 part
– Crop89
Nov 15 '18 at 11:31
@vaettchen yes I want to avoid guessing the n = 19 part
– Crop89
Nov 15 '18 at 11:31
add a comment |
Thanks for contributing an answer to Stack Overflow!
- Please be sure to answer the question. Provide details and share your research!
But avoid …
- Asking for help, clarification, or responding to other answers.
- Making statements based on opinion; back them up with references or personal experience.
To learn more, see our tips on writing great answers.
Sign up or log in
StackExchange.ready(function ()
StackExchange.helpers.onClickDraftSave('#login-link');
);
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
StackExchange.ready(
function ()
StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fstackoverflow.com%2fquestions%2f53280517%2fselecting-rows-from-data-based-on-unique-conditions%23new-answer', 'question_page');
);
Post as a guest
Required, but never shown
Sign up or log in
StackExchange.ready(function ()
StackExchange.helpers.onClickDraftSave('#login-link');
);
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
Sign up or log in
StackExchange.ready(function ()
StackExchange.helpers.onClickDraftSave('#login-link');
);
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
Sign up or log in
StackExchange.ready(function ()
StackExchange.helpers.onClickDraftSave('#login-link');
);
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Any reason to shun Base R? You only want
dplyr
ordata.table
?– vaettchen
Nov 13 '18 at 12:59
No particular reason. I could also use base R
– Crop89
Nov 13 '18 at 13:10