{"id":1713,"date":"2019-07-30T12:10:18","date_gmt":"2019-07-30T03:10:18","guid":{"rendered":"http:\/\/idealive.jp\/blog\/?p=1713"},"modified":"2019-07-30T11:10:29","modified_gmt":"2019-07-30T02:10:29","slug":"spark","status":"publish","type":"post","link":"https:\/\/idealive.jp\/blog\/2019\/07\/30\/spark\/","title":{"rendered":"Spark"},"content":{"rendered":"<p>AWS\u306eGlue\u3068\u3044\u3046\u30b5\u30fc\u30d3\u30b9\u3067Python\u306eSpark\u3092\u4f7f\u3046\u3053\u3068\u304c\u3042\u3063\u305f\u306e\u3067Spark\u306b\u3064\u3044\u3066\u8a18\u8f09\u3057\u305f\u3044\u3068\u601d\u3044\u307e\u3059\u3002<br \/>\n\uff08Glue\u306fAWS\u306e\u5206\u6790\u30fb\u96c6\u8a08\u306a\u3069\u3092\u3057\u3066\u304f\u308c\u308b\u30b5\u30fc\u30d3\u30b9\u3067\u3059\u3002\uff09<br \/>\n\u4ee5\u524d\u304b\u3089\u8a18\u8f09\u3057\u3066\u3044\u305f\u300cpandas\u300d\u300cNumPy\u300d\u306b\u8fd1\u3044\u3082\u306e\u304c\u3042\u308a\u307e\u3059\u3002<\/p>\n<p>\u3069\u3093\u306a\u3053\u3068\u304c\u3067\u304d\u308b\u304b\u3068\u8a00\u3046\u3068<br \/>\n\u30c7\u30fc\u30bf\u306b\u5bfe\u3057\u3066SQL\u304c\u4f7f\u7528\u53ef\u80fd\u3068\u306a\u3063\u3066\u304a\u308a\u3001\u6a5f\u68b0\u5b66\u7fd2\u3084\u30b0\u30e9\u30d5\u7406\u8ad6\u304c\u53ef\u80fd\u3068\u306a\u3063\u3066\u3044\u307e\u3059\u3002<\/p>\n<p>\u4eca\u56de\u8a2d\u5b9a\u3057\u305f\u30d5\u30ed\u30fc\u3067\u7d39\u4ecb\u3057\u305f\u3044\u3068\u601d\u3044\u307e\u3059\u3002<br \/>\n\uff08AWS\u306e\u30b5\u30fc\u30d3\u30b9\u3092\u5229\u7528\u3057\u3066\u3044\u308b\u306e\u3067\u74b0\u5883\u306f\u6574\u3063\u305f\u524d\u63d0\u3067\u8a18\u8f09\u3057\u307e\u3059\uff09<\/p>\n<p>#\u7279\u5b9a\u306eCSV\u30d5\u30a1\u30a4\u30eb\u3092\u8aad\u307f\u8fbc\u3080<br \/>\ndf = spark.read.csv([CSV\u30d1\u30b9], header=True, sep=&#8217;,&#8217;, inferSchema=True)<\/p>\n<p>#\u30c6\u30f3\u30dd\u30e9\u30ea\u30c6\u30fc\u30d6\u30eb\u4f5c\u6210<br \/>\ndf.createTempView(&#8216;sample_table&#8217;)<\/p>\n<p>#\u30af\u30a8\u30ea\u5b9f\u884c\uff08\u6761\u4ef6\u3082\u8a2d\u5b9a\u53ef\u80fd\uff09<br \/>\ndf2 = spark.sql(&#8216;select id, sum(num) from sample_table group by id&#8217;)<br \/>\n#\u30c7\u30fc\u30bf\u30d5\u30ec\u30fc\u30e0\u3067\u8a18\u8f09\u3059\u308b\u3068<br \/>\ndf2 = df.groupBy(&#8216;id&#8217;).agg({&#8216;num&#8217;: &#8216;sum&#8217;})<\/p>\n<p>\u4e0a\u8a18\u306e\u3088\u3046\u304b\u611f\u3058\u3067sql\u304c\u4f7f\u7528\u53ef\u80fd\u306b\u306a\u308a\u307e\u3059\u3002<br \/>\nCSV\u30d5\u30a1\u30a4\u30eb\u3092\u30c6\u30fc\u30d6\u30eb\u3068\u3057\u3066\u6271\u3048\u3001\u305d\u306e\u30c6\u30f3\u30dd\u30e9\u30ea\u30c6\u30fc\u30d6\u30eb\u306b\u5bfe\u3057\u3066SQL\u3067\u5b9f\u884c\u3067\u304d\u308b\u3068\u3044\u3063\u305f\u611f\u3058\u3067\u3059\u3002<\/p>\n<p>\u8aad\u307f\u8fbc\u3080\u30d5\u30a1\u30a4\u30eb\u306b\u3088\u3063\u3066\u306f\u30a8\u30e9\u30fc\u304c\u3067\u308b\u304b\u3082\u3002<br \/>\n\u307e\u305f\u3001\u30d0\u30fc\u30b8\u30e7\u30f3\u306b\u3088\u3063\u3066\u8a18\u8f09\u65b9\u6cd5\u304c\u7570\u306a\u308b\u304b\u3082\u3067\u3059\u3002<br \/>\n(\u307e\u3060\u3001\u8a73\u7d30\u306a\u90e8\u5206\u306b\u95a2\u3057\u3066\u306f\u672a\u8abf\u67fb)<\/p>\n<p>\u3053\u308c\u306fSQL\u3092\u7406\u89e3\u3057\u3066\u3044\u308b\u4eba\u306b\u3068\u3063\u3066\u306f\u4fbf\u5229\u3067\u306f\u306a\u3044\u304b\u3068\u611f\u3058\u307e\u3057\u305f\u304c\u2026<br \/>\n\u554f\u984c\u306f\u901f\u5ea6\u3067\u3059\u306d\u3002<br \/>\n\u5b9f\u969b\u306b\u5b9f\u88c5\u3057\u305f\u74b0\u5883\u304cAWS\u306eGlue\u3067\u3053\u306e\u30b5\u30fc\u30d3\u30b9\u306f\u30d0\u30c3\u30c1\u7684\u306a\u30eb\u30fc\u30eb\u3067\u88cf\u3067\u52d5\u304f\u3082\u306e\u3067\u3059\u3002<br \/>\n\u30d5\u30ed\u30f3\u30c8\u5074\u3067\u3053\u306espark\u3092\u5229\u7528\u3057\u3066\u3044\u306a\u3044\u306e\u3067\u308f\u304b\u3089\u306a\u3044\u3067\u3059\u304c<br \/>\nPython\u306e\u96c6\u8a08\u51e6\u7406\u30c4\u30fc\u30eb\u3092\u6bd4\u8f03\u3057\u3066\u3044\u308b\u30b5\u30a4\u30c8\u3092\u307f\u305f\u611f\u3058\u3067\u306fspark\u304c\u4e00\u756a\u65e9\u305d\u3046\u306a\u8a18\u8f09\u3092\u3057\u3066\u3044\u307e\u3057\u305f\u3002<\/p>\n<p>\u4ee5\u4e0a\u3001Spark\u306b\u3064\u3044\u3066\u3067\u3057\u305f\u3002<\/p>\n","protected":false},"excerpt":{"rendered":"<p>AWS\u306eGlue\u3068\u3044\u3046\u30b5\u30fc\u30d3\u30b9\u3067Python\u306eSpark\u3092\u4f7f\u3046\u3053\u3068\u304c\u3042\u3063\u305f\u306e\u3067Spark\u306b\u3064\u3044\u3066\u8a18\u8f09\u3057\u305f\u3044\u3068\u601d\u3044\u307e\u3059\u3002 \uff08Glue\u306fAWS\u306e\u5206\u6790\u30fb\u96c6\u8a08\u306a\u3069\u3092\u3057\u3066\u304f\u308c\u308b\u30b5\u30fc\u30d3\u30b9\u3067\u3059\u3002\uff09 \u4ee5\u524d\u304b\u3089\u8a18\u8f09\u3057\u3066\u3044\u305f\u300cpandas\u300d&#8230;<a class=\"read-more-link button\" href=\"https:\/\/idealive.jp\/blog\/2019\/07\/30\/spark\/\">\u7d9a\u304d\u3092\u8aad\u3080<\/a><\/p>\n","protected":false},"author":7,"featured_media":1715,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[4],"tags":[],"class_list":["post-1713","post","type-post","status-publish","format-standard","has-post-thumbnail","hentry","category-python"],"aioseo_notices":[],"_links":{"self":[{"href":"https:\/\/idealive.jp\/blog\/wp-json\/wp\/v2\/posts\/1713"}],"collection":[{"href":"https:\/\/idealive.jp\/blog\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/idealive.jp\/blog\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/idealive.jp\/blog\/wp-json\/wp\/v2\/users\/7"}],"replies":[{"embeddable":true,"href":"https:\/\/idealive.jp\/blog\/wp-json\/wp\/v2\/comments?post=1713"}],"version-history":[{"count":3,"href":"https:\/\/idealive.jp\/blog\/wp-json\/wp\/v2\/posts\/1713\/revisions"}],"predecessor-version":[{"id":1717,"href":"https:\/\/idealive.jp\/blog\/wp-json\/wp\/v2\/posts\/1713\/revisions\/1717"}],"wp:featuredmedia":[{"embeddable":true,"href":"https:\/\/idealive.jp\/blog\/wp-json\/wp\/v2\/media\/1715"}],"wp:attachment":[{"href":"https:\/\/idealive.jp\/blog\/wp-json\/wp\/v2\/media?parent=1713"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/idealive.jp\/blog\/wp-json\/wp\/v2\/categories?post=1713"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/idealive.jp\/blog\/wp-json\/wp\/v2\/tags?post=1713"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}