Home  >  Article  >  Database  >  newlisp跟踪jobtracker状态

newlisp跟踪jobtracker状态

WBOY
WBOYOriginal
2016-06-07 16:06:49994browse

基本思路,通过newlisp定时下载jobtracker页面,用正则表达式解析html中的table元素,然后获得最新的mapreduce的状态。 每次获得状态数据后,存入mysql数据库,然后用tableau将mapreduce集群状态用报表呈现。 这是jobtracker站点的数据 这是Tableau绘制的报

基本思路,通过newlisp定时下载jobtracker页面,用正则表达式解析html中的table元素,然后获得最新的mapreduce的状态。

每次获得状态数据后,存入mysql数据库,然后用tableau将mapreduce集群状态用报表呈现。

这是jobtracker站点的数据

\

这是Tableau绘制的报表

\

这样就可以用数据可视化的方式展示Hadoop集群计算的压力状态。

 

下面是newlisp代码,主要就是用正则表达式解析html,用mysql模块写入数据库。

 

#!/usr/bin/newlisp
(load "mysql.lsp")

(define (check-args)
  (print "args: ")
  (println (main-args))
  (set 'args-length (length (main-args)))
  (if (< args-length 3)
      (begin
	(println "the number of args must be 3 or 4, e.g. ./job.lsp jobtracker.bigdata.cn 8080")
	(exit))))

(define (parse-args)
  (set &#39;domain (main-args 2))
  (if (= 4 args-length)
      (set &#39;port (main-args 3)))
  (if (= 3 args-length)
      (set &#39;port "80"))
  (set &#39;url (string "http://" domain ":" port "/jobtracker.jsp"))
  (println (string "job tracker site is located at " url)))

(define (access-job-tracker-site)
  (set &#39;page-content (get-url url))
  (extract-tables page-content)
  )

(define (extract-summary-table table)
  (if (regex "Running Map Tasks" table)
      table)
  )

(define (get-number td)
  (set &#39;r ((regex "<td>(.*)</td>" td) 3))
  (if (find "<a href" r)
      ((regex "<a.*?>(.*)</a>" r) 3)
    r))

(define (remove-td tds)
  (set &#39;result &#39;())
  (dolist (td tds)
    (push (get-number td) result -1)
    )
  result
  )

(define (parse-summary-table table)
  (set &#39;all-tds (find-all "<td>[\\s\\S]*?</td>" table))
  (set &#39;all-summary-values (remove-td all-tds))
  )

(define (extract-tables html-content)
  (set &#39;all-tables (find-all "<table[^>]+>[\\s\\S]*?</table>" html-content))
  (dolist (table all-tables)
    (if (extract-summary-table table)
	(set &#39;summary-table table))
    
    )
  (parse-summary-table summary-table)
  )

(define (write-summary-to-mysql all-summary-values)
  (println all-summary-values)
  (set &#39;mysql-instance (Mysql))
  (println "mysql-instance: " mysql-instance)
  (set &#39;mysql-host  "10.100.10.10")
  (set &#39;mysql-port 3306)
  (set &#39;mysql-user "user")
  (set &#39;mysql-pwd "123456")
  (set &#39;mysql-db "bigdata_data_market")
  (set &#39;job-tracker-summary-table "hadoop_job_tracker_summary")
  (:connect mysql-instance mysql-host mysql-user mysql-pwd mysql-db mysql-port)
  (:query mysql-instance "SET character_set_client = utf8;")
  (set &#39;insert-summary-sql (format "insert into %s (collect_time,running_map_tasks,running_reduce_tasks,total_submissions,nodes,occupied_map_slots,occupied_reduce_slots,reserved_map_slots,reserved_reduce_slots,map_task_capacity,reduce_task_capacity,average_tasks_per_node,blacked_listed_nodes,gray_listed_nodes,exclueded_nodes) values (now(),%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)"
				   job-tracker-summary-table
				   (all-summary-values 0)
				   (all-summary-values 1)
				   (all-summary-values 2)
				   (all-summary-values 3)
				   (all-summary-values 4)
				   (all-summary-values 5)
				   (all-summary-values 6)
				   (all-summary-values 7)
				   (all-summary-values 8)
				   (all-summary-values 9)
				   (all-summary-values 10)
				   (all-summary-values 11)
				   (all-summary-values 12)
				   (all-summary-values 13)))
  (println insert-summary-sql)
  (:query mysql-instance insert-summary-sql)
  
)

;; main logic starts now
(check-args)
(parse-args)
(access-job-tracker-site)
(write-summary-to-mysql all-summary-values)

(exit)
Statement:
The content of this article is voluntarily contributed by netizens, and the copyright belongs to the original author. This site does not assume corresponding legal responsibility. If you find any content suspected of plagiarism or infringement, please contact admin@php.cn