>데이터 베이스 >MySQL 튜토리얼 >newlisp跟踪jobtracker状态

newlisp跟踪jobtracker状态

WBOY
WBOY원래의
2016-06-07 16:06:491070검색

基本思路,通过newlisp定时下载jobtracker页面,用正则表达式解析html中的table元素,然后获得最新的mapreduce的状态。 每次获得状态数据后,存入mysql数据库,然后用tableau将mapreduce集群状态用报表呈现。 这是jobtracker站点的数据 这是Tableau绘制的报

基本思路,通过newlisp定时下载jobtracker页面,用正则表达式解析html中的table元素,然后获得最新的mapreduce的状态。

每次获得状态数据后,存入mysql数据库,然后用tableau将mapreduce集群状态用报表呈现。

这是jobtracker站点的数据

\

这是Tableau绘制的报表

\

这样就可以用数据可视化的方式展示Hadoop集群计算的压力状态。

 

下面是newlisp代码,主要就是用正则表达式解析html,用mysql模块写入数据库。

 

#!/usr/bin/newlisp
(load "mysql.lsp")

(define (check-args)
  (print "args: ")
  (println (main-args))
  (set 'args-length (length (main-args)))
  (if (< args-length 3)
      (begin
	(println "the number of args must be 3 or 4, e.g. ./job.lsp jobtracker.bigdata.cn 8080")
	(exit))))

(define (parse-args)
  (set &#39;domain (main-args 2))
  (if (= 4 args-length)
      (set &#39;port (main-args 3)))
  (if (= 3 args-length)
      (set &#39;port "80"))
  (set &#39;url (string "http://" domain ":" port "/jobtracker.jsp"))
  (println (string "job tracker site is located at " url)))

(define (access-job-tracker-site)
  (set &#39;page-content (get-url url))
  (extract-tables page-content)
  )

(define (extract-summary-table table)
  (if (regex "Running Map Tasks" table)
      table)
  )

(define (get-number td)
  (set &#39;r ((regex "<td>(.*)</td>" td) 3))
  (if (find "<a href" r)
      ((regex "<a.*?>(.*)</a>" r) 3)
    r))

(define (remove-td tds)
  (set &#39;result &#39;())
  (dolist (td tds)
    (push (get-number td) result -1)
    )
  result
  )

(define (parse-summary-table table)
  (set &#39;all-tds (find-all "<td>[\\s\\S]*?</td>" table))
  (set &#39;all-summary-values (remove-td all-tds))
  )

(define (extract-tables html-content)
  (set &#39;all-tables (find-all "<table[^>]+>[\\s\\S]*?</table>" html-content))
  (dolist (table all-tables)
    (if (extract-summary-table table)
	(set &#39;summary-table table))
    
    )
  (parse-summary-table summary-table)
  )

(define (write-summary-to-mysql all-summary-values)
  (println all-summary-values)
  (set &#39;mysql-instance (Mysql))
  (println "mysql-instance: " mysql-instance)
  (set &#39;mysql-host  "10.100.10.10")
  (set &#39;mysql-port 3306)
  (set &#39;mysql-user "user")
  (set &#39;mysql-pwd "123456")
  (set &#39;mysql-db "bigdata_data_market")
  (set &#39;job-tracker-summary-table "hadoop_job_tracker_summary")
  (:connect mysql-instance mysql-host mysql-user mysql-pwd mysql-db mysql-port)
  (:query mysql-instance "SET character_set_client = utf8;")
  (set &#39;insert-summary-sql (format "insert into %s (collect_time,running_map_tasks,running_reduce_tasks,total_submissions,nodes,occupied_map_slots,occupied_reduce_slots,reserved_map_slots,reserved_reduce_slots,map_task_capacity,reduce_task_capacity,average_tasks_per_node,blacked_listed_nodes,gray_listed_nodes,exclueded_nodes) values (now(),%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)"
				   job-tracker-summary-table
				   (all-summary-values 0)
				   (all-summary-values 1)
				   (all-summary-values 2)
				   (all-summary-values 3)
				   (all-summary-values 4)
				   (all-summary-values 5)
				   (all-summary-values 6)
				   (all-summary-values 7)
				   (all-summary-values 8)
				   (all-summary-values 9)
				   (all-summary-values 10)
				   (all-summary-values 11)
				   (all-summary-values 12)
				   (all-summary-values 13)))
  (println insert-summary-sql)
  (:query mysql-instance insert-summary-sql)
  
)

;; main logic starts now
(check-args)
(parse-args)
(access-job-tracker-site)
(write-summary-to-mysql all-summary-values)

(exit)
성명:
본 글의 내용은 네티즌들의 자발적인 기여로 작성되었으며, 저작권은 원저작자에게 있습니다. 본 사이트는 이에 상응하는 법적 책임을 지지 않습니다. 표절이나 침해가 의심되는 콘텐츠를 발견한 경우 admin@php.cn으로 문의하세요.