TA的每日心情 | 开心 2021-12-13 21:45 |
---|
签到天数: 15 天 [LV.4]偶尔看看III
|
1、POM.xml
2、样例代码
- 1 package com.hansight.spark.utils;
- 2
- 3 import org.apache.spark.SparkConf;
- 4 import org.apache.spark.api.java.JavaSparkContext;
- 5
- 6 public class SparkUtils {
- 7
- 8 public static JavaSparkContext get(String name) {
- 9 SparkConf conf = new SparkConf();
- 10 // conf.setMaster("local[1]");
- 11 // conf.setMaster("spark://hdp125:7077");
- 12 conf.setAppName(name);
- 13 return new JavaSparkContext(conf);
- 14 }
- 15 }
复制代码
- 1 package com.hansight.spark.streaming;
- 2
- 3 import java.util.Iterator;
- 4
- 5 import org.apache.spark.api.java.JavaRDD;
- 6 import org.apache.spark.api.java.JavaSparkContext;
- 7 import org.apache.spark.api.java.function.Function;
- 8 import org.apache.spark.api.java.function.VoidFunction;
- 9
- 10 import com.hansight.spark.utils.SparkUtils;
- 11
- 12 public class HttpParser {
- 13 @SuppressWarnings({ "unchecked", "serial" })
- 14 public static void main(String[] args) {
- 15 if (args.length == 0) {
- 16 System.out.println("Usage: <file path>");
- 17 return;
- 18 }
- 19 System.setProperty("hadoop.home.dir", "E:/tools/hadoop-2.4.1");
- 20 JavaSparkContext sc = SparkUtils.get("HttpLog");
- 21 String path = args[0];
- 22 JavaRDD<String> rdd = sc
- 23 .textFile(path);
- 24 JavaRDD<HttpLog> parsed = rdd.map(new Function<String, HttpLog>() {
- 25 public HttpLog call(String line) throws Exception {
- 26 return HttpLog.parser(line);
- 27 }
- 28 });
- 29 System.out.println(parsed.count());
- 30 parsed.foreachPartition(new VoidFunction<Iterator<HttpLog>>() {
- 31 @Override
- 32 public void call(Iterator<HttpLog> t) throws Exception {
- 33 HttpLog.save(t);
- 34 }
- 35 });
- 36 }
- 37 }
复制代码
|
|