for one serialnumber spark scala code is working. how to do with multiple serialnumber values

up vote
0
down vote

favorite

i have dataset having 10 million records ,

it has column name serial_number, the below code is for calculating data for one serial number.

how can i calculate data for all serial_number . total 5 serial numbers are there having approximate 2 million records for each in a same file.

if i simple create table for df it will not solve the purpose and i want below code to run for everyserial number whatever comes in the next record.

val df = spark.read.format("csv").option("header", "true").schema(schema).load("/home/file.csv")

val s_n181 = df.selectExpr("*").where("serial_number = 1222") 
// others are 2111,3555,4555 etc

val s_n181s = s_n181.na.fill(0)
s_n181s.createOrReplaceTempView("table81")


for (row <- s181s.collect) {
 var ts = row.mkString(",").split(",")(4)
 var nd = row.mkString(",").split(",")(9)
 var ms = row.mkString(",").split(",")(10)
 var tp = row.mkString(",").split(",")(11)

 //----- trip starts from --------------

 if (nd <= "0.5"|| ms <= "0.5"|| tp <= "0.5") {
 if (loop1 == false) 
 starttime = ts
 println("timestamp at which the trip starts is " + starttime)
 if (nd <= "0.5") 
 start = "nd"
 
 if (ms <= "0.5") 
 start = "ms"
 
 if (tp <= "0.5") 
 start = "tp"

 
 println(" Starts from : " + start)
 startnew = starttime
 

 var table1 = spark.sql(s"""select * from table81 where timestamp >= $startnew""")
 table1.createOrReplaceTempView("view1")

 . . . . . . 
 . . . . . .

 endtime=endn
 var end_df = spark.sql(s"""select * from view1 where timestamp <= $endtime""")
 end_df.show()

asked Nov 9 at 13:52

stackoverflow

217

add a comment |

up vote
0
down vote

favorite

i have dataset having 10 million records ,

it has column name serial_number, the below code is for calculating data for one serial number.

how can i calculate data for all serial_number . total 5 serial numbers are there having approximate 2 million records for each in a same file.

if i simple create table for df it will not solve the purpose and i want below code to run for everyserial number whatever comes in the next record.

val df = spark.read.format("csv").option("header", "true").schema(schema).load("/home/file.csv")

val s_n181 = df.selectExpr("*").where("serial_number = 1222") 
// others are 2111,3555,4555 etc

val s_n181s = s_n181.na.fill(0)
s_n181s.createOrReplaceTempView("table81")


for (row <- s181s.collect) {
 var ts = row.mkString(",").split(",")(4)
 var nd = row.mkString(",").split(",")(9)
 var ms = row.mkString(",").split(",")(10)
 var tp = row.mkString(",").split(",")(11)

 //----- trip starts from --------------

 if (nd <= "0.5"|| ms <= "0.5"|| tp <= "0.5") {
 if (loop1 == false) 
 starttime = ts
 println("timestamp at which the trip starts is " + starttime)
 if (nd <= "0.5") 
 start = "nd"
 
 if (ms <= "0.5") 
 start = "ms"
 
 if (tp <= "0.5") 
 start = "tp"

 
 println(" Starts from : " + start)
 startnew = starttime
 

 var table1 = spark.sql(s"""select * from table81 where timestamp >= $startnew""")
 table1.createOrReplaceTempView("view1")

 . . . . . . 
 . . . . . .

 endtime=endn
 var end_df = spark.sql(s"""select * from view1 where timestamp <= $endtime""")
 end_df.show()

asked Nov 9 at 13:52

stackoverflow

217

add a comment |

up vote
0
down vote

favorite

i have dataset having 10 million records ,

it has column name serial_number, the below code is for calculating data for one serial number.

how can i calculate data for all serial_number . total 5 serial numbers are there having approximate 2 million records for each in a same file.

if i simple create table for df it will not solve the purpose and i want below code to run for everyserial number whatever comes in the next record.

val df = spark.read.format("csv").option("header", "true").schema(schema).load("/home/file.csv")

val s_n181 = df.selectExpr("*").where("serial_number = 1222") 
// others are 2111,3555,4555 etc

val s_n181s = s_n181.na.fill(0)
s_n181s.createOrReplaceTempView("table81")


for (row <- s181s.collect) {
 var ts = row.mkString(",").split(",")(4)
 var nd = row.mkString(",").split(",")(9)
 var ms = row.mkString(",").split(",")(10)
 var tp = row.mkString(",").split(",")(11)

 //----- trip starts from --------------

 if (nd <= "0.5"|| ms <= "0.5"|| tp <= "0.5") {
 if (loop1 == false) 
 starttime = ts
 println("timestamp at which the trip starts is " + starttime)
 if (nd <= "0.5") 
 start = "nd"
 
 if (ms <= "0.5") 
 start = "ms"
 
 if (tp <= "0.5") 
 start = "tp"

 
 println(" Starts from : " + start)
 startnew = starttime
 

 var table1 = spark.sql(s"""select * from table81 where timestamp >= $startnew""")
 table1.createOrReplaceTempView("view1")

 . . . . . . 
 . . . . . .

 endtime=endn
 var end_df = spark.sql(s"""select * from view1 where timestamp <= $endtime""")
 end_df.show()

asked Nov 9 at 13:52

stackoverflow

217

i have dataset having 10 million records ,

it has column name serial_number, the below code is for calculating data for one serial number.

how can i calculate data for all serial_number . total 5 serial numbers are there having approximate 2 million records for each in a same file.

if i simple create table for df it will not solve the purpose and i want below code to run for everyserial number whatever comes in the next record.

val df = spark.read.format("csv").option("header", "true").schema(schema).load("/home/file.csv")

val s_n181 = df.selectExpr("*").where("serial_number = 1222") 
// others are 2111,3555,4555 etc

val s_n181s = s_n181.na.fill(0)
s_n181s.createOrReplaceTempView("table81")


for (row <- s181s.collect) {
 var ts = row.mkString(",").split(",")(4)
 var nd = row.mkString(",").split(",")(9)
 var ms = row.mkString(",").split(",")(10)
 var tp = row.mkString(",").split(",")(11)

 //----- trip starts from --------------

 if (nd <= "0.5"|| ms <= "0.5"|| tp <= "0.5") {
 if (loop1 == false) 
 starttime = ts
 println("timestamp at which the trip starts is " + starttime)
 if (nd <= "0.5") 
 start = "nd"
 
 if (ms <= "0.5") 
 start = "ms"
 
 if (tp <= "0.5") 
 start = "tp"

 
 println(" Starts from : " + start)
 startnew = starttime
 

 var table1 = spark.sql(s"""select * from table81 where timestamp >= $startnew""")
 table1.createOrReplaceTempView("view1")

 . . . . . . 
 . . . . . .

 endtime=endn
 var end_df = spark.sql(s"""select * from view1 where timestamp <= $endtime""")
 end_df.show()

apache-spark apache-spark-sql

asked Nov 9 at 13:52

stackoverflow

217

asked Nov 9 at 13:52

stackoverflow

217

asked Nov 9 at 13:52

stackoverflow

217

asked Nov 9 at 13:52

stackoverflow

217

asked Nov 9 at 13:52

stackoverflow

217

add a comment |

active

oldest

votes

Your Answer

StackExchange.ifUsing("editor", function ()
StackExchange.using("externalEditor", function ()
StackExchange.using("snippets", function ()
StackExchange.snippets.init();
);
);
, "code-snippets");

StackExchange.ready(function()
var channelOptions =
tags: "".split(" "),
id: "1"
;
initTagRenderer("".split(" "), "".split(" "), channelOptions);

StackExchange.using("externalEditor", function()
// Have to fire editor after snippets, if snippets enabled
if (StackExchange.settings.snippets.snippetsEnabled)
StackExchange.using("snippets", function()
createEditor();
);

else
createEditor();

);

function createEditor()
StackExchange.prepareEditor(
heartbeatType: 'answer',
convertImagesToLinks: true,
noModals: true,
showLowRepImageUploadWarning: true,
reputationToPostImages: 10,
bindNavPrevention: true,
postfix: "",
imageUploader:
brandingHtml: "Powered by u003ca class="icon-imgur-white" href="https://imgur.com/"u003eu003c/au003e",
contentPolicyHtml: "User contributions licensed under u003ca href="https://creativecommons.org/licenses/by-sa/3.0/"u003ecc by-sa 3.0 with attribution requiredu003c/au003e u003ca href="https://stackoverflow.com/legal/content-policy"u003e(content policy)u003c/au003e",
allowUrls: true
,
onDemand: true,
discardSelector: ".discard-answer"
,immediatelyShowMarkdownHelp:true
);

);

draft saved

draft discarded

StackExchange.ready(
function ()
StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fstackoverflow.com%2fquestions%2f53227012%2ffor-one-serialnumber-spark-scala-code-is-working-how-to-do-with-multiple-serial%23new-answer', 'question_page');

);

Post as a guest

Name

active

oldest

votes

draft saved

draft discarded

draft saved

draft discarded

Post as a guest

Name

This page is only for reference, If you need detailed information, please check here

riG siKIQ,Fmdaw6gXNHOos,eKmFqhwH47b,tN3R3yrI5atXrfaRZjBXroa64nPRdKESoiDab

搜尋此網誌

Pfthb